diff --git a/DESCRIPTION b/DESCRIPTION index 8172049..c161b3f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: vcfppR Title: Rapid Manipulation of the Variant Call Format (VCF) -Version: 0.6.1 +Version: 0.6.2 Authors@R: c( person("Zilong", "Li", , "zilong.dk@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-5859-2078")), diff --git a/configure b/configure new file mode 100755 index 0000000..6446244 --- /dev/null +++ b/configure @@ -0,0 +1,5 @@ +#!/bin/sh + +HTSLIB_DIR="src/htslib-1.21" +echo "Configuring HTSlib in $HTSLIB_DIR" +cd $HTSLIB_DIR && ./configure && make diff --git a/configure.win b/configure.win new file mode 100755 index 0000000..6446244 --- /dev/null +++ b/configure.win @@ -0,0 +1,5 @@ +#!/bin/sh + +HTSLIB_DIR="src/htslib-1.21" +echo "Configuring HTSlib in $HTSLIB_DIR" +cd $HTSLIB_DIR && ./configure && make diff --git a/src/Makevars b/src/Makevars index de596aa..e9c52e1 100644 --- a/src/Makevars +++ b/src/Makevars @@ -1,25 +1,11 @@ -HTSLIB_DIR='htslib-1.19.1' +HTSLIB_DIR='htslib-1.21' PKG_CPPFLAGS = -I${HTSLIB_DIR} -I../inst/include PKG_LIBS = ${HTSLIB_DIR}/libhts.a -fPIC -lz -lm -lbz2 -llzma -lcurl -.PHONY: all clean HTSLIB +.PHONY: all clean all : $(SHLIB) -$(SHLIB) : HTSLIB - -CC=$(shell "R CMD config CC") -CXX=$(shell "R CMD config CXX") -AR=$(shell "R CMD config AR") -RANLIB=$(shell "R CMD config RANLIB") -LDFLAGS=$(shell "R CMD config LDFLAGS") -# CFLAGS=$(shell "R CMD config CFLAGS") -# CPPFLAGS=$(shell "R CMD config CPPFLAGS") - -HTSLIB: - (cd ${HTSLIB_DIR} && $(MAKE) -f Makefile.vcfppR libhts.a && cd ..) - - clean: $(RM) *.o $(RM) *.dll diff --git a/src/Makevars.win b/src/Makevars.win index d96f485..5bdd737 100644 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -1,4 +1,4 @@ -HTSLIB_DIR=htslib-1.18 +HTSLIB_DIR='htslib-1.21' PKG_CPPFLAGS = -I${HTSLIB_DIR} -I../inst/include -D_FILE_OFFSET_BITS=64 PKG_LIBS=${HTSLIB_DIR}/libhts.a @@ -11,21 +11,10 @@ else PKG_CPPFLAGS += $(shell pkg-config --cflags libcurl) endif -.PHONY: all clean HTSLIB +.PHONY: all clean all : $(SHLIB) -$(SHLIB) : HTSLIB - -# CC=$(shell "R CMD config CC") -# CXX=$(shell "R CMD config CXX") -# CPPFLAGS=$(shell "R CMD config CPPFLAGS") -# LDFLAGS=$(shell "R CMD config LDFLAGS") - -HTSLIB: - cd ${HTSLIB_DIR} && $(MAKE) -f Makefile.win libhts.a && cd .. - - clean: $(RM) *.o $(RM) *.dll diff --git a/src/htslib-1.18/INSTALL b/src/htslib-1.18/INSTALL deleted file mode 100644 index e0fddd9..0000000 --- a/src/htslib-1.18/INSTALL +++ /dev/null @@ -1,309 +0,0 @@ - Building and Installing HTSlib - ============================== - -Requirements -============ - -Building HTSlib requires a few programs and libraries to be present. -See the "System Specific Details" below for guidance on how to install -these. - -At least the following are required: - - GNU make - C compiler (e.g. gcc or clang) - -In addition, building the configure script requires: - - autoheader - autoconf - autoreconf - -Running the configure script uses awk, along with a number of -standard UNIX tools (cat, cp, grep, mv, rm, sed, among others). Almost -all installations will have these already. - -Running the test harness (make test) uses: - - bash - perl - -HTSlib uses the following external libraries. Building requires both the -library itself, and include files needed to compile code that uses functions -from the library. Note that some Linux distributions put include files in -a development ('-dev' or '-devel') package separate from the main library. - - zlib (required) - libbz2 (required, unless configured with --disable-bz2) - liblzma (required, unless configured with --disable-lzma) - libcurl (optional, but strongly recommended) - libcrypto (optional for Amazon S3 support; not needed on MacOS) - -Disabling libbzip2 and liblzma will make some CRAM files unreadable, so -is not recommended. - -Using libcurl provides HTSlib with network protocol support, for -example it enables the use of ftp://, http://, and https:// URLs. -It is also required if direct access to Amazon S3 or Google Cloud -Storage is enabled. - -Amazon S3 support requires an HMAC function to calculate a message -authentication code. On MacOS, the CCHmac function from the standard -library is used. Systems that do not have CCHmac will get this from -libcrypto. libcrypto is part of OpenSSL or one of its derivatives (LibreSSL -or BoringSSL). - -On Microsoft Windows we recommend use of Mingw64/Msys2. Whilst the -code may work on Windows with other environments, these have not been -verified. Use of the configure script is a requirement too. - -Update htscodecs submodule -========================== - -Note that this section only applies to git checkouts. If you're building -from a release tar file, you can skip this section. - -Some parts of HTSlib are provided by the external "htscodecs" project. This -is included as a submodule. When building from the git repository, -either clone the project using "git clone --recurse-submodules", or run: - - git submodule update --init --recursive - -to ensure the correct version of the submodule is present. - -It is also possible to link against an external libhtscodecs library -by using the '--with-external-htscodecs' configure option. When -this is used, the submodule files will be ignored. - -Building Configure -================== - -This step is only needed if configure.ac has been changed, or if configure -does not exist (for example, when building from a git clone). The -configure script and config.h.in can be built by running: - - autoreconf -i - -Basic Installation -================== - -To build and install HTSlib, 'cd' to the htslib-1.x directory containing -the package's source and type the following commands: - - ./configure - make - make install - -The './configure' command checks your build environment and allows various -optional functionality to be enabled (see Configuration below). If you -don't want to select any optional functionality, you may wish to omit -configure and just type 'make; make install' as for previous versions -of HTSlib. However if the build fails you should run './configure' as -it can diagnose the common reasons for build failures. - -The 'make' command builds the HTSlib library and various useful -utilities: bgzip, htsfile, and tabix. If compilation fails you should -run './configure' as it can diagnose problems with your build environment -that cause build failures. - -The 'make install' command installs the libraries, library header files, -utilities, several manual pages, and a pkgconfig file to /usr/local. -The installation location can be changed by configuring with --prefix=DIR -or via 'make prefix=DIR install' (see Installation Locations below). -Shared library permissions can be set via e.g. 'make install LIB_PERM=755'. - - -Configuration -============= - -By default, './configure' examines your build environment, checking for -requirements such as the zlib development files, and arranges for a plain -HTSlib build. The following configure options can be used to enable -various features and specify further optional external requirements: - ---enable-plugins - Use plugins to implement exotic file access protocols and other - specialised facilities. This enables such facilities to be developed - and packaged outwith HTSlib, and somewhat isolates HTSlib-using programs - from their library dependencies. By default (or with --disable-plugins), - any enabled pluggable facilities (such as libcurl file access) are built - directly within HTSlib. - - Programs that are statically linked to a libhts.a with plugins enabled - need to be linked using -rdynamic or a similar linker option. - - The repository contains - several additional plugins, including the iRODS () - file access plugin previously distributed with HTSlib. - ---with-plugin-dir=DIR - Specifies the directory into which plugins built while building HTSlib - should be installed; by default, LIBEXECDIR/htslib. - ---with-plugin-path=DIR:DIR:DIR... - Specifies the list of directories that HTSlib will search for plugins. - By default, only the directory specified via --with-plugin-dir will be - searched; you can use --with-plugin-path='DIR:$(plugindir):DIR' and so - on to cause additional directories to be searched. - ---with-external-htscodecs - Build and link against an external copy of the htscodecs library - instead of using the source files in the htscodecs directory. - ---enable-libcurl - Use libcurl () to implement network access to - remote files via FTP, HTTP, HTTPS, etc. By default or with - --enable-libcurl=check, configure will probe for libcurl and include - this functionality if libcurl is available. Use --disable-libcurl - to prevent this. - ---enable-gcs - Implement network access to Google Cloud Storage. By default or with - --enable-gcs=check, this is enabled when libcurl is enabled. - ---enable-s3 - Implement network access to Amazon AWS S3. By default or with - --enable-s3=check, this is enabled when libcurl is enabled. - ---disable-bz2 - Bzip2 is an optional compression codec format for CRAM, included - in HTSlib by default. It can be disabled with --disable-bz2, but - be aware that not all CRAM files may be possible to decode. - ---disable-lzma - LZMA is an optional compression codec for CRAM, included in HTSlib - by default. It can be disabled with --disable-lzma, but be aware - that not all CRAM files may be possible to decode. - ---with-libdeflate - Libdeflate is a heavily optimized library for DEFLATE-based compression - and decompression. It also includes a fast crc32 implementation. - By default, ./configure will probe for libdeflate and use it if - available. To prevent this, use --without-libdeflate. - -Each --enable-FEATURE/--disable-FEATURE/--with-PACKAGE/--without-PACKAGE -option listed also has an opposite, e.g., --without-external-htscodecs -or --disable-plugins. However, apart from those options for which the -default is to probe for related facilities, using these opposite options -is mostly unnecessary as they just select the default configure behaviour. - -The configure script also accepts the usual options and environment variables -for tuning installation locations and compilers: type './configure --help' -for details. For example, - - ./configure CC=icc --prefix=/opt/icc-compiled - -would specify that HTSlib is to be built with icc and installed into bin, -lib, etc subdirectories under /opt/icc-compiled. - -If dependencies have been installed in non-standard locations (i.e. not on -the normal include and library search paths) then the CPPFLAGS and LDFLAGS -environment variables can be used to set the options needed to find them. -For example, NetBSD users may use: - - ./configure CPPFLAGS=-I/usr/pkg/include \ - LDFLAGS='-L/usr/pkg/lib -Wl,-R/usr/pkg/lib' - -to allow compiling and linking against dependencies installed via the ports -collection. - -Installation Locations -====================== - -By default, 'make install' installs HTSlib libraries under /usr/local/lib, -HTSlib header files under /usr/local/include, utility programs under -/usr/local/bin, etc. (To be precise, the header files are installed within -a fixed 'htslib' subdirectory under the specified .../include location.) - -You can specify a different location to install HTSlib by configuring -with --prefix=DIR or specify locations for particular parts of HTSlib by -configuring with --libdir=DIR and so on. Type './configure --help' for -the full list of such install directory options. - -Alternatively you can specify different locations at install time by -typing 'make prefix=DIR install' or 'make libdir=DIR install' and so on. -Consult the list of prefix/exec_prefix/etc variables near the top of the -Makefile for the full list of such variables that can be overridden. - -You can also specify a staging area by typing 'make DESTDIR=DIR install', -possibly in conjunction with other --prefix or prefix=DIR settings. -For example, - - make DESTDIR=/tmp/staging prefix=/opt - -would install into bin, lib, etc subdirectories under /tmp/staging/opt. - - -System Specific Details -======================= - -Installing the prerequisites is system dependent and there is more -than one correct way of satisfying these, including downloading them -from source, compiling and installing them yourself. - -For people with super-user access, we provide an example set of commands -below for installing the dependencies on a variety of operating system -distributions. Note these are not specific recommendations on distribution, -compiler or SSL implementation. It is assumed you already have the core set -of packages for the given distribution - the lists may be incomplete if -this is not the case. - -Debian / Ubuntu ---------------- - -sudo apt-get update # Ensure the package list is up to date -sudo apt-get install autoconf automake make gcc perl zlib1g-dev libbz2-dev liblzma-dev libcurl4-gnutls-dev libssl-dev - -Note: libcurl4-openssl-dev can be used as an alternative to libcurl4-gnutls-dev. - -RedHat / CentOS ---------------- - -sudo yum install autoconf automake make gcc perl-Data-Dumper zlib-devel bzip2 bzip2-devel xz-devel curl-devel openssl-devel - -Note: On some versions perl FindBin will need to be installed to make the tests work. - -sudo yum install perl-FindBin - -Alpine Linux ------------- - -doas apk update # Ensure the package list is up to date -doas apk add autoconf automake make gcc musl-dev perl bash zlib-dev bzip2-dev xz-dev curl-dev openssl-dev - -Note: some older Alpine versions use libressl-dev rather than openssl-dev. - -OpenSUSE --------- - -sudo zypper install autoconf automake make gcc perl zlib-devel libbz2-devel xz-devel libcurl-devel libopenssl-devel - -Windows MSYS2/MINGW64 ---------------------- - -The configure script must be used as without it the compilation will -likely fail. - -Follow MSYS2 installation instructions at -https://www.msys2.org/wiki/MSYS2-installation/ - -Then relaunch to MSYS2 shell using the "MSYS2 MinGW x64" executable. -Once in that environment (check $MSYSTEM equals "MINGW64") install the -compilers using pacman -S and the following package list: - -base-devel mingw-w64-x86_64-toolchain -mingw-w64-x86_64-libdeflate mingw-w64-x86_64-zlib mingw-w64-x86_64-bzip2 -mingw-w64-x86_64-xz mingw-w64-x86_64-curl mingw-w64-x86_64-autotools -mingw-w64-x86_64-tools-git - -(The last is only needed for building libraries compatible with MSVC.) - -HP-UX ------ - -HP-UX requires that shared libraries have execute permission. The -default for HTSlib is to install with permission 644 (read-write for -owner and read-only for group / other). This can be overridden by -setting the LIB_PERM variable at install time with: - - make install LIB_PERM=755 diff --git a/src/htslib-1.18/LICENSE b/src/htslib-1.18/LICENSE deleted file mode 100644 index 925d47b..0000000 --- a/src/htslib-1.18/LICENSE +++ /dev/null @@ -1,69 +0,0 @@ -[Files in this distribution outwith the cram/ subdirectory are distributed -according to the terms of the following MIT/Expat license.] - -The MIT/Expat License - -Copyright (C) 2012-2023 Genome Research Ltd. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. - - -[Files within the cram/ subdirectory in this distribution are distributed -according to the terms of the following Modified 3-Clause BSD license.] - -The Modified-BSD License - -Copyright (C) 2012-2023 Genome Research Ltd. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -3. Neither the names Genome Research Ltd and Wellcome Trust Sanger Institute - nor the names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR ITS CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -[The use of a range of years within a copyright notice in this distribution -should be interpreted as being equivalent to a list of years including the -first and last year specified and all consecutive years between them. - -For example, a copyright notice that reads "Copyright (C) 2005, 2007-2009, -2011-2012" should be interpreted as being identical to a notice that reads -"Copyright (C) 2005, 2007, 2008, 2009, 2011, 2012" and a copyright notice -that reads "Copyright (C) 2005-2012" should be interpreted as being identical -to a notice that reads "Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, -2011, 2012".] diff --git a/src/htslib-1.18/Makefile b/src/htslib-1.18/Makefile deleted file mode 100644 index eec8164..0000000 --- a/src/htslib-1.18/Makefile +++ /dev/null @@ -1,965 +0,0 @@ -# Makefile for htslib, a C library for high-throughput sequencing data formats. -# -# Copyright (C) 2013-2023 Genome Research Ltd. -# -# Author: John Marshall -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -CC = gcc -AR = ar -RANLIB = ranlib - -# Default libraries to link if configure is not used -htslib_default_libs = -lz -lm -lbz2 -llzma -lcurl - -CPPFLAGS = -# TODO: make the 64-bit support for VCF optional via configure, for now add -DVCF_ALLOW_INT64 -# to CFLAGS manually, here or in config.mk if the latter exists. -# TODO: probably update cram code to make it compile cleanly with -Wc++-compat -# For testing strict C99 support add -std=c99 -D_XOPEN_SOURCE=600 -#CFLAGS = -g -Wall -O2 -pedantic -std=c99 -D_XOPEN_SOURCE=600 -CFLAGS = -g -Wall -O2 -fvisibility=hidden -EXTRA_CFLAGS_PIC = -fpic -TARGET_CFLAGS = -LDFLAGS = -fvisibility=hidden -VERSION_SCRIPT_LDFLAGS = -Wl,-version-script,$(srcprefix)htslib.map -LIBS = $(htslib_default_libs) - -prefix = /usr/local -exec_prefix = $(prefix) -bindir = $(exec_prefix)/bin -includedir = $(prefix)/include -libdir = $(exec_prefix)/lib -libexecdir = $(exec_prefix)/libexec -datarootdir = $(prefix)/share -mandir = $(datarootdir)/man -man1dir = $(mandir)/man1 -man5dir = $(mandir)/man5 -man7dir = $(mandir)/man7 -pkgconfigdir= $(libdir)/pkgconfig - -MKDIR_P = mkdir -p -INSTALL = install -p -INSTALL_DATA = $(INSTALL) -m 644 -INSTALL_DIR = $(MKDIR_P) -m 755 -LIB_PERM = 644 -INSTALL_LIB = $(INSTALL) -m $(LIB_PERM) -INSTALL_MAN = $(INSTALL_DATA) -INSTALL_PROGRAM = $(INSTALL) - -# Set by config.mk if plugins are enabled -plugindir = - -BUILT_PROGRAMS = \ - bgzip \ - htsfile \ - tabix - -BUILT_TEST_PROGRAMS = \ - test/hts_endian \ - test/fieldarith \ - test/hfile \ - test/pileup \ - test/pileup_mod \ - test/plugins-dlhts \ - test/sam \ - test/test_bgzf \ - test/test_expr \ - test/test_faidx \ - test/test_kfunc \ - test/test_kstring \ - test/test_mod \ - test/test_realn \ - test/test-regidx \ - test/test_str2int \ - test/test_time_funcs \ - test/test_view \ - test/test_index \ - test/test-vcf-api \ - test/test-vcf-sweep \ - test/test-bcf-sr \ - test/fuzz/hts_open_fuzzer.o \ - test/test-bcf-translate \ - test/test-parse-reg \ - test/test_introspection \ - test/test-bcf_set_variant_type - -BUILT_THRASH_PROGRAMS = \ - test/thrash_threads1 \ - test/thrash_threads2 \ - test/thrash_threads3 \ - test/thrash_threads4 \ - test/thrash_threads5 \ - test/thrash_threads6 \ - test/thrash_threads7 - -all: lib-static lib-shared $(BUILT_PROGRAMS) plugins $(BUILT_TEST_PROGRAMS) \ - htslib_static.mk htslib-uninstalled.pc - -ALL_CPPFLAGS = -I. $(CPPFLAGS) - -# Usually htscodecs.mk is generated by running configure or config.status, -# but if those aren't used create a default here. -htscodecs.mk: - echo '# Default htscodecs.mk generated by Makefile' > $@ - echo 'include $$(HTSPREFIX)htscodecs_bundled.mk' >> $@ - $(srcdir)/hts_probe_cc.sh '$(CC)' '$(CFLAGS) $(CPPFLAGS)' '$(LDFLAGS)' >> $@ - -srcdir = . -srcprefix = -HTSPREFIX = - -# Flags for SIMD code -HTS_CFLAGS_AVX2 = -HTS_CFLAGS_AVX512 = -HTS_CFLAGS_SSE4 = - -# Control building of SIMD code. Not used if configure has been run. -HTS_BUILD_AVX2 = -HTS_BUILD_AVX512 = -HTS_BUILD_SSSE3 = -HTS_BUILD_POPCNT = -HTS_BUILD_SSE4_1 = - -include htslib_vars.mk -include htscodecs.mk - -# If not using GNU make, you need to copy the version number from version.sh -# into here. -PACKAGE_VERSION := $(shell $(srcdir)/version.sh) - -LIBHTS_SOVERSION = 3 - -# Version numbers for the Mac dynamic library. Note that the leading 3 -# is not strictly necessary and should be removed the next time -# LIBHTS_SOVERSION is bumped (see #1144 and -# https://developer.apple.com/library/archive/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html#//apple_ref/doc/uid/TP40002013-SW23) -MACH_O_COMPATIBILITY_VERSION = 3.1.18 -MACH_O_CURRENT_VERSION = 3.1.18 - -# $(NUMERIC_VERSION) is for items that must have a numeric X.Y.Z string -# even if this is a dirty or untagged Git working tree. -NUMERIC_VERSION := $(shell $(srcdir)/version.sh numeric) - -# Force version.h to be remade if $(PACKAGE_VERSION) has changed. -version.h: $(if $(wildcard version.h),$(if $(findstring "$(PACKAGE_VERSION)",$(shell cat version.h)),,force)) - -version.h: - echo '#define HTS_VERSION_TEXT "$(PACKAGE_VERSION)"' > $@ - -print-version: - @echo $(PACKAGE_VERSION) - -show-version: - @echo PACKAGE_VERSION = $(PACKAGE_VERSION) - @echo NUMERIC_VERSION = $(NUMERIC_VERSION) - -config_vars.h: override escape=$(subst ',\x27,$(subst ",\",$(subst \,\\,$(1)))) -config_vars.h: override hts_cc_escaped=$(call escape,$(CC)) -config_vars.h: override hts_cppflags_escaped=$(call escape,$(CPPFLAGS)) -config_vars.h: override hts_cflags_escaped=$(call escape,$(CFLAGS)) -config_vars.h: override hts_ldflags_escaped=$(call escape,$(LDFLAGS)) -config_vars.h: override hts_libs_escaped=$(call escape,$(LIBS)) - -config_vars.h: - printf '#define HTS_CC "%s"\n#define HTS_CPPFLAGS "%s"\n#define HTS_CFLAGS "%s"\n#define HTS_LDFLAGS "%s"\n#define HTS_LIBS "%s"\n' \ - '$(hts_cc_escaped)' \ - '$(hts_cppflags_escaped)' \ - '$(hts_cflags_escaped)' \ - '$(hts_ldflags_escaped)' \ - '$(hts_libs_escaped)' > $@ - -.SUFFIXES: .bundle .c .cygdll .dll .o .pico .so - -.c.o: - $(CC) $(CFLAGS) $(TARGET_CFLAGS) $(ALL_CPPFLAGS) -c -o $@ $< - -.c.pico: - $(CC) $(CFLAGS) $(TARGET_CFLAGS) $(ALL_CPPFLAGS) $(EXTRA_CFLAGS_PIC) -c -o $@ $< - - -LIBHTS_OBJS = \ - kfunc.o \ - kstring.o \ - bcf_sr_sort.o \ - bgzf.o \ - errmod.o \ - faidx.o \ - header.o \ - hfile.o \ - hts.o \ - hts_expr.o \ - hts_os.o\ - md5.o \ - multipart.o \ - probaln.o \ - realn.o \ - regidx.o \ - region.o \ - sam.o \ - sam_mods.o \ - synced_bcf_reader.o \ - vcf_sweep.o \ - tbx.o \ - textutils.o \ - thread_pool.o \ - vcf.o \ - vcfutils.o \ - cram/cram_codecs.o \ - cram/cram_decode.o \ - cram/cram_encode.o \ - cram/cram_external.o \ - cram/cram_index.o \ - cram/cram_io.o \ - cram/cram_stats.o \ - cram/mFILE.o \ - cram/open_trace_file.o \ - cram/pooled_alloc.o \ - cram/string_alloc.o \ - $(HTSCODECS_OBJS) \ - $(NONCONFIGURE_OBJS) - -# Without configure we wish to have a rich set of default figures, -# but we still need conditional inclusion as we wish to still -# support ./configure --disable-blah. -NONCONFIGURE_OBJS = hfile_libcurl.o - -PLUGIN_EXT = -PLUGIN_OBJS = - -cram_h = cram/cram.h $(cram_samtools_h) $(header_h) $(cram_structs_h) $(cram_io_h) cram/cram_encode.h cram/cram_decode.h cram/cram_stats.h cram/cram_codecs.h cram/cram_index.h $(htslib_cram_h) -cram_io_h = cram/cram_io.h $(cram_misc_h) -cram_misc_h = cram/misc.h -cram_os_h = cram/os.h $(htslib_hts_endian_h) -cram_samtools_h = cram/cram_samtools.h $(htslib_sam_h) -cram_structs_h = cram/cram_structs.h $(htslib_thread_pool_h) $(htslib_cram_h) cram/string_alloc.h cram/mFILE.h $(htslib_khash_h) -cram_open_trace_file_h = cram/open_trace_file.h cram/mFILE.h -bcf_sr_sort_h = bcf_sr_sort.h $(htslib_synced_bcf_reader_h) $(htslib_kbitset_h) -header_h = header.h cram/string_alloc.h cram/pooled_alloc.h $(htslib_khash_h) $(htslib_kstring_h) $(htslib_sam_h) -hfile_internal_h = hfile_internal.h $(htslib_hts_defs_h) $(htslib_hfile_h) $(textutils_internal_h) -hts_internal_h = hts_internal.h $(htslib_hts_h) $(textutils_internal_h) -hts_time_funcs_h = hts_time_funcs.h -sam_internal_h = sam_internal.h $(htslib_sam_h) -textutils_internal_h = textutils_internal.h $(htslib_kstring_h) -thread_pool_internal_h = thread_pool_internal.h $(htslib_thread_pool_h) - -# To be effective, config.mk needs to appear after most Makefile variables are -# set but before most rules appear, so that it can both use previously-set -# variables in its own rules' prerequisites and also update variables for use -# in later rules' prerequisites. - -# If your make doesn't accept -include, change this to 'include' if you are -# using the configure script or just comment the line out if you are not. --include config.mk - -# Usually config.h is generated by running configure or config.status, -# but if those aren't used create a default config.h here. -config.h: - echo '/* Default config.h generated by Makefile */' > $@ - echo '#ifndef _XOPEN_SOURCE' >> $@ - echo '#define _XOPEN_SOURCE 600' >> $@ - echo '#endif' >> $@ - echo '#define HAVE_LIBBZ2 1' >> $@ - echo '#define HAVE_LIBLZMA 1' >> $@ - echo '#ifndef __APPLE__' >> $@ - echo '#define HAVE_LZMA_H 1' >> $@ - echo '#endif' >> $@ - echo '#define HAVE_DRAND48 1' >> $@ - echo '#define HAVE_LIBCURL 1' >> $@ - if [ "x$(HTS_BUILD_POPCNT)" != "x" ] && \ - [ "x$(HTS_BUILD_SSE4_1)" != "x" ] && \ - [ "x$(HTS_BUILD_SSSE3)" != "x" ]; then \ - echo '#define HAVE_POPCNT 1' >> $@ ; \ - echo '#define HAVE_SSE4_1 1' >> $@ ; \ - echo '#define HAVE_SSSE3 1' >> $@ ; \ - echo '#if defined(HTS_ALLOW_UNALIGNED) && HTS_ALLOW_UNALIGNED == 0' >> $@ ; \ - echo '#define UBSAN 1' >> $@ ; \ - echo '#endif' >> $@ ; \ - fi - if [ "x$(HTS_BUILD_AVX2)" != "x" ] ; then \ - echo '#define HAVE_AVX2 1' >> $@ ; \ - fi - if [ "x$(HTS_BUILD_AVX512)" != "x" ] ; then \ - echo '#define HAVE_AVX512 1' >> $@ ; \ - fi - -# And similarly for htslib.pc.tmp ("pkg-config template"). No dependency -# on htslib.pc.in listed, as if that file is newer the usual way to regenerate -# this target is via configure or config.status rather than this rule. -htslib.pc.tmp: - sed -e '/^static_libs=/s/@static_LIBS@/$(htslib_default_libs)/;s#@[^-][^@]*@##g' $(srcprefix)htslib.pc.in > $@ - -# Create a makefile fragment listing the libraries and LDFLAGS needed for -# static linking. This can be included by projects that want to build -# and link against the htslib source tree instead of an installed library. -htslib_static.mk: htslib.pc.tmp - sed -n '/^static_libs=/s/[^=]*=/HTSLIB_static_LIBS = /p;/^static_ldflags=/s/[^=]*=/HTSLIB_static_LDFLAGS = /p' $< > $@ - - -lib-static: libhts.a - -# $(shell), :=, and ifeq/.../endif are GNU Make-specific. If you don't have -# GNU Make, comment out the parts of these conditionals that don't apply. -ifneq "$(origin PLATFORM)" "file" -PLATFORM := $(shell uname -s) -endif -ifeq "$(PLATFORM)" "Darwin" -SHLIB_FLAVOUR = dylib -lib-shared: libhts.dylib -else ifeq "$(findstring CYGWIN,$(PLATFORM))" "CYGWIN" -SHLIB_FLAVOUR = cygdll -lib-shared: cyghts-$(LIBHTS_SOVERSION).dll -else ifeq "$(findstring MSYS,$(PLATFORM))" "MSYS" -SHLIB_FLAVOUR = dll -lib-shared: hts-$(LIBHTS_SOVERSION).dll hts-$(LIBHTS_SOVERSION).def hts-$(LIBHTS_SOVERSION).lib -else ifeq "$(findstring MINGW,$(PLATFORM))" "MINGW" -SHLIB_FLAVOUR = dll -lib-shared: hts-$(LIBHTS_SOVERSION).dll hts-$(LIBHTS_SOVERSION).def hts-$(LIBHTS_SOVERSION).lib -else -SHLIB_FLAVOUR = so -lib-shared: libhts.so -endif - -BUILT_PLUGINS = $(PLUGIN_OBJS:.o=$(PLUGIN_EXT)) - -ifneq "$(BUILT_PLUGINS)" "" -plugins: lib-shared -endif -plugins: $(BUILT_PLUGINS) - - -libhts.a: $(LIBHTS_OBJS) - @-rm -f $@ - $(AR) -rc $@ $(LIBHTS_OBJS) - -$(RANLIB) $@ - -print-config: - @echo HTS_CFLAGS_AVX2 = $(HTS_CFLAGS_AVX2) - @echo HTS_CFLAGS_AVX512 = $(HTS_CFLAGS_AVX512) - @echo HTS_CFLAGS_SSE4 = $(HTS_CFLAGS_SSE4) - @echo HTS_HAVE_NEON = $(HTS_HAVE_NEON) - @echo LDFLAGS = $(LDFLAGS) - @echo LIBHTS_OBJS = $(LIBHTS_OBJS) - @echo LIBS = $(LIBS) - @echo PLATFORM = $(PLATFORM) - -# The target here is libhts.so, as that is the built file that other rules -# depend upon and that is used when -lhts appears in other program's recipes. -# As a byproduct invisible to make, libhts.so.NN is also created, as it is the -# file used at runtime (when $LD_LIBRARY_PATH includes the build directory). - -libhts.so: $(LIBHTS_OBJS:.o=.pico) - $(CC) -shared -Wl,-soname,libhts.so.$(LIBHTS_SOVERSION) $(VERSION_SCRIPT_LDFLAGS) $(LDFLAGS) -o $@ $(LIBHTS_OBJS:.o=.pico) $(LIBS) -lpthread - ln -sf $@ libhts.so.$(LIBHTS_SOVERSION) - -# Similarly this also creates libhts.NN.dylib as a byproduct, so that programs -# when run can find this uninstalled shared library (when $DYLD_LIBRARY_PATH -# includes this project's build directory). - -libhts.dylib: $(LIBHTS_OBJS) - $(CC) -dynamiclib -install_name $(libdir)/libhts.$(LIBHTS_SOVERSION).dylib -current_version $(MACH_O_CURRENT_VERSION) -compatibility_version $(MACH_O_COMPATIBILITY_VERSION) $(LDFLAGS) -o $@ $(LIBHTS_OBJS) $(LIBS) - ln -sf $@ libhts.$(LIBHTS_SOVERSION).dylib - -cyghts-$(LIBHTS_SOVERSION).dll libhts.dll.a: $(LIBHTS_OBJS) - $(CC) -shared -Wl,--out-implib=libhts.dll.a -Wl,--enable-auto-import $(LDFLAGS) -o $@ -Wl,--whole-archive $(LIBHTS_OBJS) -Wl,--no-whole-archive $(LIBS) -lpthread - -hts-$(LIBHTS_SOVERSION).dll hts.dll.a: $(LIBHTS_OBJS) - $(CC) -shared -Wl,--out-implib=hts.dll.a -Wl,--enable-auto-import -Wl,--exclude-all-symbols $(LDFLAGS) -o $@ -Wl,--whole-archive $(LIBHTS_OBJS) -Wl,--no-whole-archive $(LIBS) -lpthread - -hts-$(LIBHTS_SOVERSION).def: hts-$(LIBHTS_SOVERSION).dll - gendef hts-$(LIBHTS_SOVERSION).dll - -hts-$(LIBHTS_SOVERSION).lib: hts-$(LIBHTS_SOVERSION).def - dlltool -m i386:x86-64 -d hts-$(LIBHTS_SOVERSION).def -l hts-$(LIBHTS_SOVERSION).lib - -# Bundling libraries, binaries, dll dependencies, and licenses into a -# single directory. NB: This is not needed for end-users, but a test bed -# for maintainers building binary distributions. -# -# NOTE: only tested on the supported MSYS2/MINGW64 environment. -dist-windows: DESTDIR= -dist-windows: prefix=dist-windows -dist-windows: install - cp hts-$(LIBHTS_SOVERSION).def hts-$(LIBHTS_SOVERSION).lib dist-windows/lib - cp `ldd hts-$(LIBHTS_SOVERSION).dll| awk '/mingw64/ {print $$3}'` dist-windows/bin - mkdir -p dist-windows/share/licenses/htslib - -cp -r /mingw64/share/licenses/mingw-w64-libraries \ - /mingw64/share/licenses/brotli \ - /mingw64/share/licenses/bzip2 \ - /mingw64/share/licenses/gcc-libs \ - /mingw64/share/licenses/libdeflate \ - /mingw64/share/licenses/libpsl \ - /mingw64/share/licenses/libtre \ - /mingw64/share/licenses/libwinpthread \ - /mingw64/share/licenses/openssl \ - /mingw64/share/licenses/xz \ - /mingw64/share/licenses/zlib \ - /mingw64/share/licenses/zstd \ - dist-windows/share/licenses/ - -cp -r /usr/share/licenses/curl \ - dist-windows/share/licenses/ - cp LICENSE dist-windows/share/licenses/htslib/ - - -# Target to allow htslib.mk to build all the object files before it -# links the shared and static libraries. -hts-object-files: $(LIBHTS_OBJS) - touch $@ - -# On Unix dlopen("libhts.so.NN", RTLD_LAZY) may default to RTLD_LOCAL. -# Hence plugins need to link to (shared) libhts.so.NN themselves, as they -# may not be able to access libhts symbols via the main program's libhts -# if that was dynamically loaded without an explicit RTLD_GLOBAL. -%.so: %.pico libhts.so - $(CC) -shared -Wl,-E $(LDFLAGS) -o $@ $< libhts.so $(LIBS) -lpthread - -# For programs *statically* linked to libhts.a, on macOS loading a plugin -# linked to a shared libhts.NN.dylib would lead to conflicting duplicate -# symbols. Fortunately macOS dlopen() defaults to RTLD_GLOBAL so there -# is less need for plugins to link back to libhts themselves. -%.bundle: %.o - $(CC) -bundle -Wl,-undefined,dynamic_lookup $(LDFLAGS) -o $@ $< $(LIBS) - -%.cygdll: %.o libhts.dll.a - $(CC) -shared $(LDFLAGS) -o $@ $< libhts.dll.a $(LIBS) - -%.dll: %.o hts.dll.a - $(CC) -shared $(LDFLAGS) -o $@ $< hts.dll.a $(LIBS) - - -bgzf.o bgzf.pico: bgzf.c config.h $(htslib_hts_h) $(htslib_bgzf_h) $(htslib_hfile_h) $(htslib_thread_pool_h) $(htslib_hts_endian_h) cram/pooled_alloc.h $(hts_internal_h) $(htslib_khash_h) -errmod.o errmod.pico: errmod.c config.h $(htslib_hts_h) $(htslib_ksort_h) $(htslib_hts_os_h) -kstring.o kstring.pico: kstring.c config.h $(htslib_kstring_h) -header.o header.pico: header.c config.h $(textutils_internal_h) $(header_h) -hfile.o hfile.pico: hfile.c config.h $(htslib_hfile_h) $(hfile_internal_h) $(htslib_kstring_h) $(hts_internal_h) $(htslib_khash_h) -hfile_gcs.o hfile_gcs.pico: hfile_gcs.c config.h $(htslib_hts_h) $(htslib_kstring_h) $(hfile_internal_h) -hfile_libcurl.o hfile_libcurl.pico: hfile_libcurl.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_h) -hfile_s3_write.o hfile_s3_write.pico: hfile_s3_write.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_h) -hfile_s3.o hfile_s3.pico: hfile_s3.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(hts_time_funcs_h) -hts.o hts.pico: hts.c config.h os/lzma_stub.h $(htslib_hts_h) $(htslib_bgzf_h) $(cram_h) $(htslib_hfile_h) $(htslib_hts_endian_h) version.h config_vars.h $(hts_internal_h) $(hfile_internal_h) $(sam_internal_h) $(htslib_hts_expr_h) $(htslib_hts_os_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_ksort_h) $(htslib_tbx_h) $(htscodecs_htscodecs_h) -hts_expr.o hts_expr.pico: hts_expr.c config.h $(htslib_hts_expr_h) $(htslib_hts_log_h) $(textutils_internal_h) -hts_os.o hts_os.pico: hts_os.c config.h $(htslib_hts_defs_h) os/rand.c -vcf.o vcf.pico: vcf.c config.h $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_hfile_h) $(hts_internal_h) $(htslib_khash_str2int_h) $(htslib_kstring_h) $(htslib_sam_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_hts_endian_h) -sam.o sam.pico: sam.c config.h $(htslib_hts_defs_h) $(htslib_sam_h) $(htslib_bgzf_h) $(cram_h) $(hts_internal_h) $(sam_internal_h) $(htslib_hfile_h) $(htslib_hts_endian_h) $(htslib_hts_expr_h) $(header_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_kstring_h) -sam_mods.o sam_mods.pico: sam_mods.c config.h $(htslib_sam_h) $(textutils_internal_h) -tbx.o tbx.pico: tbx.c config.h $(htslib_tbx_h) $(htslib_bgzf_h) $(htslib_hts_endian_h) $(hts_internal_h) $(htslib_khash_h) -faidx.o faidx.pico: faidx.c config.h $(htslib_bgzf_h) $(htslib_faidx_h) $(htslib_hfile_h) $(htslib_khash_h) $(htslib_kstring_h) $(hts_internal_h) -bcf_sr_sort.o bcf_sr_sort.pico: bcf_sr_sort.c config.h $(bcf_sr_sort_h) $(htslib_khash_str2int_h) $(htslib_kbitset_h) -synced_bcf_reader.o synced_bcf_reader.pico: synced_bcf_reader.c config.h $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(htslib_bgzf_h) $(htslib_thread_pool_h) $(bcf_sr_sort_h) -vcf_sweep.o vcf_sweep.pico: vcf_sweep.c config.h $(htslib_vcf_sweep_h) $(htslib_bgzf_h) -vcfutils.o vcfutils.pico: vcfutils.c config.h $(htslib_vcfutils_h) $(htslib_kbitset_h) -kfunc.o kfunc.pico: kfunc.c config.h $(htslib_kfunc_h) -regidx.o regidx.pico: regidx.c config.h $(htslib_hts_h) $(htslib_kstring_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(htslib_regidx_h) $(hts_internal_h) -region.o region.pico: region.c config.h $(htslib_hts_h) $(htslib_khash_h) -md5.o md5.pico: md5.c config.h $(htslib_hts_h) $(htslib_hts_endian_h) -multipart.o multipart.pico: multipart.c config.h $(htslib_kstring_h) $(hts_internal_h) $(hfile_internal_h) -plugin.o plugin.pico: plugin.c config.h $(hts_internal_h) $(htslib_kstring_h) -probaln.o probaln.pico: probaln.c config.h $(htslib_hts_h) -realn.o realn.pico: realn.c config.h $(htslib_hts_h) $(htslib_sam_h) -textutils.o textutils.pico: textutils.c config.h $(htslib_hfile_h) $(htslib_kstring_h) $(htslib_sam_h) $(hts_internal_h) - -cram/cram_codecs.o cram/cram_codecs.pico: cram/cram_codecs.c config.h $(htslib_hts_endian_h) $(htscodecs_varint_h) $(htscodecs_pack_h) $(htscodecs_rle_h) $(cram_h) -cram/cram_decode.o cram/cram_decode.pico: cram/cram_decode.c config.h $(cram_h) $(cram_os_h) $(htslib_hts_h) -cram/cram_encode.o cram/cram_encode.pico: cram/cram_encode.c config.h $(cram_h) $(cram_os_h) $(sam_internal_h) $(htslib_hts_h) $(htslib_hts_endian_h) $(textutils_internal_h) -cram/cram_external.o cram/cram_external.pico: cram/cram_external.c config.h $(htscodecs_rANS_static4x16_h) $(htslib_hfile_h) $(cram_h) -cram/cram_index.o cram/cram_index.pico: cram/cram_index.c config.h $(htslib_bgzf_h) $(htslib_hfile_h) $(hts_internal_h) $(cram_h) $(cram_os_h) -cram/cram_io.o cram/cram_io.pico: cram/cram_io.c config.h os/lzma_stub.h $(cram_h) $(cram_os_h) $(htslib_hts_h) $(cram_open_trace_file_h) $(htscodecs_rANS_static_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_arith_dynamic_h) $(htscodecs_tokenise_name3_h) $(htscodecs_fqzcomp_qual_h) $(htscodecs_varint_h) $(htslib_hfile_h) $(htslib_bgzf_h) $(htslib_faidx_h) $(hts_internal_h) -cram/cram_stats.o cram/cram_stats.pico: cram/cram_stats.c config.h $(cram_h) $(cram_os_h) -cram/mFILE.o cram/mFILE.pico: cram/mFILE.c config.h $(htslib_hts_log_h) $(cram_os_h) cram/mFILE.h -cram/open_trace_file.o cram/open_trace_file.pico: cram/open_trace_file.c config.h $(cram_os_h) $(cram_open_trace_file_h) $(cram_misc_h) $(htslib_hfile_h) $(htslib_hts_log_h) $(htslib_hts_h) -cram/pooled_alloc.o cram/pooled_alloc.pico: cram/pooled_alloc.c config.h cram/pooled_alloc.h $(cram_misc_h) -cram/string_alloc.o cram/string_alloc.pico: cram/string_alloc.c config.h cram/string_alloc.h -thread_pool.o thread_pool.pico: thread_pool.c config.h $(thread_pool_internal_h) $(htslib_hts_log_h) - -htscodecs/htscodecs/arith_dynamic.o htscodecs/htscodecs/arith_dynamic.pico: htscodecs/htscodecs/arith_dynamic.c config.h $(htscodecs_arith_dynamic_h) $(htscodecs_varint_h) $(htscodecs_pack_h) $(htscodecs_utils_h) $(htscodecs_c_simple_model_h) -htscodecs/htscodecs/fqzcomp_qual.o htscodecs/htscodecs/fqzcomp_qual.pico: htscodecs/htscodecs/fqzcomp_qual.c config.h $(htscodecs_fqzcomp_qual_h) $(htscodecs_varint_h) $(htscodecs_utils_h) $(htscodecs_c_simple_model_h) -htscodecs/htscodecs/htscodecs.o htscodecs/htscodecs/htscodecs.pico: htscodecs/htscodecs/htscodecs.c $(htscodecs_htscodecs_h) $(htscodecs_version_h) -htscodecs/htscodecs/pack.o htscodecs/htscodecs/pack.pico: htscodecs/htscodecs/pack.c config.h $(htscodecs_pack_h) -htscodecs/htscodecs/rANS_static32x16pr.o htscodecs/htscodecs/rANS_static32x16pr.pico: htscodecs/htscodecs/rANS_static32x16pr.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/rANS_static32x16pr_avx2.o htscodecs/htscodecs/rANS_static32x16pr_avx2.pico: htscodecs/htscodecs/rANS_static32x16pr_avx2.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) $(htscodecs_permute_h) -htscodecs/htscodecs/rANS_static32x16pr_avx512.o htscodecs/htscodecs/rANS_static32x16pr_avx512.pico: htscodecs/htscodecs/rANS_static32x16pr_avx512.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/rANS_static32x16pr_neon.o htscodecs/htscodecs/rANS_static32x16pr_neon.pico: htscodecs/htscodecs/rANS_static32x16pr_neon.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/rANS_static32x16pr_sse4.o htscodecs/htscodecs/rANS_static32x16pr_sse4.pico: htscodecs/htscodecs/rANS_static32x16pr_sse4.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/rANS_static4x16pr.o htscodecs/htscodecs/rANS_static4x16pr.pico: htscodecs/htscodecs/rANS_static4x16pr.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_pack_h) $(htscodecs_rle_h) $(htscodecs_utils_h) $(htscodecs_rANS_static32x16pr_h) -htscodecs/htscodecs/rANS_static.o htscodecs/htscodecs/rANS_static.pico: htscodecs/htscodecs/rANS_static.c config.h $(htscodecs_rANS_byte_h) $(htscodecs_utils_h) $(htscodecs_rANS_static_h) -htscodecs/htscodecs/rle.o htscodecs/htscodecs/rle.pico: htscodecs/htscodecs/rle.c config.h $(htscodecs_varint_h) $(htscodecs_rle_h) -htscodecs/htscodecs/tokenise_name3.o htscodecs/htscodecs/tokenise_name3.pico: htscodecs/htscodecs/tokenise_name3.c config.h $(htscodecs_pooled_alloc_h) $(htscodecs_arith_dynamic_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_tokenise_name3_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/utils.o htscodecs/htscodecs/utils.pico: htscodecs/htscodecs/utils.c config.h $(htscodecs_utils_h) - -# Extra CFLAGS for specific files -htscodecs/htscodecs/rANS_static32x16pr_avx2.o htscodecs/htscodecs/rANS_static32x16pr_avx2.pico: TARGET_CFLAGS = $(HTS_CFLAGS_AVX2) -htscodecs/htscodecs/rANS_static32x16pr_avx512.o htscodecs/htscodecs/rANS_static32x16pr_avx512.pico: TARGET_CFLAGS = $(HTS_CFLAGS_AVX512) -htscodecs/htscodecs/rANS_static32x16pr_sse4.o htscodecs/htscodecs/rANS_static32x16pr_sse4.pico: TARGET_CFLAGS = $(HTS_CFLAGS_SSE4) - -bgzip: bgzip.o libhts.a - $(CC) $(LDFLAGS) -o $@ bgzip.o libhts.a $(LIBS) -lpthread - -htsfile: htsfile.o libhts.a - $(CC) $(LDFLAGS) -o $@ htsfile.o libhts.a $(LIBS) -lpthread - -tabix: tabix.o libhts.a - $(CC) $(LDFLAGS) -o $@ tabix.o libhts.a $(LIBS) -lpthread - -bgzip.o: bgzip.c config.h $(htslib_bgzf_h) $(htslib_hts_h) $(htslib_hfile_h) -htsfile.o: htsfile.c config.h $(htslib_hfile_h) $(htslib_hts_h) $(htslib_sam_h) $(htslib_vcf_h) -tabix.o: tabix.c config.h $(htslib_tbx_h) $(htslib_sam_h) $(htslib_vcf_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(htslib_hts_h) $(htslib_regidx_h) $(htslib_hts_defs_h) $(htslib_hts_log_h) - -# Runes to check that the htscodecs submodule is present -ifdef HTSCODECS_SOURCES -htscodecs/htscodecs/%.c: | htscodecs/htscodecs - @if test -e htscodecs/.git && test ! -e "$@" ; then \ - echo "Missing file '$@'" ; \ - echo " - Do you need to update the htscodecs submodule?" ; \ - false ; \ - fi - -htscodecs/htscodecs/%.h: | htscodecs/htscodecs - @if test -e htscodecs/.git && test ! -e "$@" ; then \ - echo "Missing file '$@'" ; \ - echo " - Do you need to update the htscodecs submodule?" ; \ - false ; \ - fi - -htscodecs/htscodecs: - @if test -e .git ; then \ - printf "\\n\\nError: htscodecs submodule files not present for htslib.\\n\ - Try running: \\n\ - git submodule update --init --recursive\\n\ - in the top-level htslib directory and then re-run make.\\n\\n\\n" ; \ - else \ - printf "\\n\\nError: htscodecs submodule files not present and this is not a git checkout.\\n\ - You have an incomplete distribution. Please try downloading one of the\\n\ - official releases from https://www.htslib.org/\\n" ; \ - fi - @false - -# Build the htscodecs/htscodecs/version.h file if necessary -htscodecs/htscodecs/version.h: force - @if test -e $(srcdir)/htscodecs/.git && test -e $(srcdir)/htscodecs/configure.ac ; then \ - vers=`cd $(srcdir)/htscodecs && git describe --always --dirty --match 'v[0-9]\.[0-9]*'` && \ - case "$$vers" in \ - v*) vers=$${vers#v} ;; \ - *) iv=`awk '/^AC_INIT/ { match($$0, /^AC_INIT\(htscodecs, *([0-9](\.[0-9])*)\)/, m); print substr($$0, m[1, "start"], m[1, "length"]) }' $(srcdir)/htscodecs/configure.ac` ; vers="$$iv$${vers:+-g$$vers}" ;; \ - esac ; \ - if ! grep -s -q '"'"$$vers"'"' $@ ; then \ - echo 'Updating $@ : #define HTSCODECS_VERSION_TEXT "'"$$vers"'"' ; \ - echo '#define HTSCODECS_VERSION_TEXT "'"$$vers"'"' > $@ ; \ - fi ; \ - fi -endif - -# Maintainer source code checks -# - copyright boilerplate presence -# - tab and trailing space detection -maintainer-check: - test/maintainer/check_copyright.pl . - test/maintainer/check_spaces.pl . - -# Look for untracked files in the git repository. -check-untracked: - @if test -e .git && git status --porcelain | grep '^\?'; then \ - echo 'Untracked files detected (see above). Please either clean up, add to .gitignore, or for test output files consider naming them to match *.tmp or *.tmp.*' ; \ - false ; \ - fi - -# Create a shorthand. We use $(SRC) or $(srcprefix) rather than $(srcdir)/ -# for brevity in test and install rules, and so that build logs do not have -# ./ sprinkled throughout. -SRC = $(srcprefix) - -# For tests that might use it, set $REF_PATH explicitly to use only reference -# areas within the test suite (or set it to ':' to use no reference areas). -# -# If using MSYS, avoid poor shell expansion via: -# MSYS2_ARG_CONV_EXCL="*" make check -check test: all $(HTSCODECS_TEST_TARGETS) - test/hts_endian - test/test_expr - test/test_kfunc - test/test_kstring - test/test_str2int - test/test_time_funcs - test/fieldarith test/fieldarith.sam - test/hfile - if test "x$(BUILT_PLUGINS)" != "x"; then \ - HTS_PATH=. test/with-shlib.sh test/plugins-dlhts -g ./libhts.$(SHLIB_FLAVOUR); \ - fi - if test "x$(BUILT_PLUGINS)" != "x"; then \ - HTS_PATH=. test/with-shlib.sh test/plugins-dlhts -l ./libhts.$(SHLIB_FLAVOUR); \ - fi - test/test_bgzf test/bgziptest.txt - test/test-parse-reg -t test/colons.bam - cd test/faidx && ./test-faidx.sh faidx.tst - cd test/sam_filter && ./filter.sh filter.tst - cd test/tabix && ./test-tabix.sh tabix.tst - cd test/mpileup && ./test-pileup.sh mpileup.tst - cd test/fastq && ./test-fastq.sh - cd test/base_mods && ./base-mods.sh base-mods.tst - REF_PATH=: test/sam test/ce.fa test/faidx/faidx.fa test/faidx/fastqs.fq - test/test-regidx - cd test && REF_PATH=: ./test.pl $${TEST_OPTS:-} - -test/hts_endian: test/hts_endian.o - $(CC) $(LDFLAGS) -o $@ test/hts_endian.o $(LIBS) - -test/fuzz/hts_open_fuzzer: test/fuzz/hts_open_fuzzer.o - $(CC) $(LDFLAGS) -o $@ test/fuzz/hts_open_fuzzer.o libhts.a $(LIBS) -lpthread - -test/fieldarith: test/fieldarith.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/fieldarith.o libhts.a $(LIBS) -lpthread - -test/hfile: test/hfile.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/hfile.o libhts.a $(LIBS) -lpthread - -test/pileup: test/pileup.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/pileup.o libhts.a $(LIBS) -lpthread - -test/pileup_mod: test/pileup_mod.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/pileup_mod.o libhts.a $(LIBS) -lpthread - -test/plugins-dlhts: test/plugins-dlhts.o - $(CC) $(LDFLAGS) -o $@ test/plugins-dlhts.o $(LIBS) - -test/sam: test/sam.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/sam.o libhts.a $(LIBS) -lpthread - -test/test_bgzf: test/test_bgzf.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_bgzf.o libhts.a -lz $(LIBS) -lpthread - -test/test_expr: test/test_expr.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_expr.o libhts.a -lz $(LIBS) -lpthread - -test/test_faidx: test/test_faidx.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_faidx.o libhts.a -lz $(LIBS) -lpthread - -test/test_kfunc: test/test_kfunc.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_kfunc.o libhts.a -lz $(LIBS) -lpthread - -test/test_kstring: test/test_kstring.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_kstring.o libhts.a -lz $(LIBS) -lpthread - -test/test_mod: test/test_mod.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_mod.o libhts.a $(LIBS) -lpthread - -test/test_realn: test/test_realn.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_realn.o libhts.a $(LIBS) -lpthread - -test/test-regidx: test/test-regidx.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-regidx.o libhts.a $(LIBS) -lpthread - -test/test-parse-reg: test/test-parse-reg.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-parse-reg.o libhts.a $(LIBS) -lpthread - -test/test_str2int: test/test_str2int.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_str2int.o libhts.a $(LIBS) -lpthread - -test/test_time_funcs: test/test_time_funcs.o - $(CC) $(LDFLAGS) -o $@ test/test_time_funcs.o - -test/test_view: test/test_view.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_view.o libhts.a $(LIBS) -lpthread - -test/test_index: test/test_index.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_index.o libhts.a $(LIBS) -lpthread - -test/test-vcf-api: test/test-vcf-api.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-vcf-api.o libhts.a $(LIBS) -lpthread - -test/test-vcf-sweep: test/test-vcf-sweep.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-vcf-sweep.o libhts.a $(LIBS) -lpthread - -test/test-bcf-sr: test/test-bcf-sr.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-bcf-sr.o libhts.a -lz $(LIBS) -lpthread - -test/test-bcf-translate: test/test-bcf-translate.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-bcf-translate.o libhts.a -lz $(LIBS) -lpthread - -test/test_introspection: test/test_introspection.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_introspection.o libhts.a $(LIBS) -lpthread - -test/test-bcf_set_variant_type: test/test-bcf_set_variant_type.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-bcf_set_variant_type.o libhts.a $(LIBS) -lpthread - -# Extra tests for bundled htscodecs -test_htscodecs_rans4x8: htscodecs/tests/rans4x8 - cd htscodecs/tests && srcdir=. && export srcdir && ./rans4x8.test - -test_htscodecs_rans4x16: htscodecs/tests/rans4x16pr - cd htscodecs/tests && srcdir=. && export srcdir && ./rans4x16.test - -test_htscodecs_arith: htscodecs/tests/arith_dynamic - cd htscodecs/tests && srcdir=. && export srcdir && ./arith.test - -test_htscodecs_tok3: htscodecs/tests/tokenise_name3 - cd htscodecs/tests && srcdir=. && export srcdir && ./tok3.test - -test_htscodecs_fqzcomp: htscodecs/tests/fqzcomp_qual - cd htscodecs/tests && srcdir=. && export srcdir && ./fqzcomp.test - -test_htscodecs_varint: htscodecs/tests/varint - cd htscodecs/tests && ./varint - -htscodecs/tests/arith_dynamic: htscodecs/tests/arith_dynamic_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/fqzcomp_qual: htscodecs/tests/fqzcomp_qual_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/rans4x16pr: htscodecs/tests/rANS_static4x16pr_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/rans4x8: htscodecs/tests/rANS_static_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/tokenise_name3: htscodecs/tests/tokenise_name3_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/varint: htscodecs/tests/varint_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/arith_dynamic_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/arith_dynamic_test.o: htscodecs/tests/arith_dynamic_test.c config.h $(htscodecs_arith_dynamic_h) -htscodecs/tests/fqzcomp_qual_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/fqzcomp_qual_test.o: htscodecs/tests/fqzcomp_qual_test.c config.h $(htscodecs_fqzcomp_qual_h) $(htscodecs_varint_h) -htscodecs/tests/rANS_static4x16pr_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/rANS_static4x16pr_test.o: htscodecs/tests/rANS_static4x16pr_test.c config.h $(htscodecs_rANS_static4x16_h) -htscodecs/tests/rANS_static_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/rANS_static_test.o: htscodecs/tests/rANS_static_test.c config.h $(htscodecs_rANS_static_h) -htscodecs/tests/tokenise_name3_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/tokenise_name3_test.o: htscodecs/tests/tokenise_name3_test.c config.h $(htscodecs_tokenise_name3_h) -htscodecs/tests/varint_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/varint_test.o: htscodecs/tests/varint_test.c config.h $(htscodecs_varint_h) - -test/hts_endian.o: test/hts_endian.c config.h $(htslib_hts_endian_h) -test/fuzz/hts_open_fuzzer.o: test/fuzz/hts_open_fuzzer.c config.h $(htslib_hfile_h) $(htslib_hts_h) $(htslib_sam_h) $(htslib_vcf_h) -test/fieldarith.o: test/fieldarith.c config.h $(htslib_sam_h) -test/hfile.o: test/hfile.c config.h $(htslib_hfile_h) $(htslib_hts_defs_h) $(htslib_kstring_h) -test/pileup.o: test/pileup.c config.h $(htslib_sam_h) $(htslib_kstring_h) -test/pileup_mod.o: test/pileup_mod.c config.h $(htslib_sam_h) -test/plugins-dlhts.o: test/plugins-dlhts.c config.h -test/sam.o: test/sam.c config.h $(htslib_hts_defs_h) $(htslib_sam_h) $(htslib_faidx_h) $(htslib_khash_h) $(htslib_hts_log_h) -test/test_bgzf.o: test/test_bgzf.c config.h $(htslib_bgzf_h) $(htslib_hfile_h) $(htslib_hts_log_h) $(hfile_internal_h) -test/test_expr.o: test/test_expr.c config.h $(htslib_hts_expr_h) -test/test_kfunc.o: test/test_kfunc.c config.h $(htslib_kfunc_h) -test/test_kstring.o: test/test_kstring.c config.h $(htslib_kstring_h) -test/test_mod.o: test/test_mod.c config.h $(htslib_sam_h) -test/test-parse-reg.o: test/test-parse-reg.c config.h $(htslib_hts_h) $(htslib_sam_h) -test/test_realn.o: test/test_realn.c config.h $(htslib_hts_h) $(htslib_sam_h) $(htslib_faidx_h) -test/test-regidx.o: test/test-regidx.c config.h $(htslib_kstring_h) $(htslib_regidx_h) $(htslib_hts_defs_h) $(textutils_internal_h) -test/test_str2int.o: test/test_str2int.c config.h $(textutils_internal_h) -test/test_time_funcs.o: test/test_time_funcs.c config.h $(hts_time_funcs_h) -test/test_view.o: test/test_view.c config.h $(cram_h) $(htslib_sam_h) $(htslib_vcf_h) $(htslib_hts_log_h) -test/test_faidx.o: test/test_faidx.c config.h $(htslib_faidx_h) -test/test_index.o: test/test_index.c config.h $(htslib_sam_h) $(htslib_vcf_h) -test/test-vcf-api.o: test/test-vcf-api.c config.h $(htslib_hts_h) $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_kseq_h) -test/test-vcf-sweep.o: test/test-vcf-sweep.c config.h $(htslib_vcf_sweep_h) -test/test-bcf-sr.o: test/test-bcf-sr.c config.h $(htslib_synced_bcf_reader_h) $(htslib_hts_h) $(htslib_vcf_h) -test/test-bcf-translate.o: test/test-bcf-translate.c config.h $(htslib_vcf_h) -test/test_introspection.o: test/test_introspection.c config.h $(htslib_hts_h) $(htslib_hfile_h) -test/test-bcf_set_variant_type.o: test/test-bcf_set_variant_type.c config.h $(htslib_hts_h) vcf.c - - -test/thrash_threads1: test/thrash_threads1.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads1.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads2: test/thrash_threads2.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads2.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads3: test/thrash_threads3.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads3.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads4: test/thrash_threads4.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads4.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads5: test/thrash_threads5.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads5.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads6: test/thrash_threads6.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads6.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads7: test/thrash_threads7.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads7.o libhts.a -lz $(LIBS) -lpthread - -test_thrash: $(BUILT_THRASH_PROGRAMS) - -# Test to ensure the functions in the header files are exported by the shared -# library. This currently works by comparing the output from ctags on -# the headers with the list of functions exported by the shared library. -# Note that functions marked as exported in the .c files and not the public -# headers will be missed by this test. -test-shlib-exports: header-exports.txt shlib-exports-$(SHLIB_FLAVOUR).txt - @echo "Checking shared library exports" - @if test ! -s header-exports.txt ; then echo "Error: header-exports.txt empty" ; false ; fi - @if test ! -s shlib-exports-$(SHLIB_FLAVOUR).txt ; then echo "Error: shlib-exports-$(SHLIB_FLAVOUR).txt empty" ; false ; fi - @! comm -23 header-exports.txt shlib-exports-$(SHLIB_FLAVOUR).txt | grep . || \ - ( echo "Error: Found unexported symbols (listed above)" ; false ) - -# Extract symbols that should be exported from public headers using ctags -# Filter out macros in htslib/hts_defs.h. -header-exports.txt: test/header_syms.pl htslib/*.h - test/header_syms.pl htslib/*.h | sort -u -o $@ - -shlib-exports-so.txt: libhts.so - nm -D -g libhts.so | awk '$$2 == "T" { sub("@.*", "", $$3); print $$3 }' | sort -u -o $@ - -shlib-exports-dylib.txt: libhts.dylib - nm -Ug libhts.dylib | awk '$$2 == "T" { sub("^_", "", $$3); print $$3 }' | sort -u -o $@ - -shlib-exports-dll.txt: hts.dll.a - nm -g hts.dll.a | awk '$$2 == "T" { print $$3 }' | sort -u -o $@ - -$(srcprefix)htslib.map: libhts.so - LC_ALL=C ; export LC_ALL; \ - curr_vers=`expr 'X$(PACKAGE_VERSION)' : 'X\([0-9]*\.[0-9.]*\)'` ; \ - last_vers=`awk '/^HTSLIB_[0-9](\.[0-9]+)+/ { lv = $$1 } END { print lv }' htslib.map` ; \ - if test "x$$curr_vers" = 'x' || test "x$$last_vers" = 'x' ; then \ - echo "Version check failed : $$curr_vers / $$las_vers" 1>&2 ; \ - exit 1 ; \ - fi && \ - if test "HTSLIB_$$curr_vers" = "$$last_vers" ; then \ - echo "Refusing to update $@ - HTSlib version not changed" 1>&2 ; \ - exit 1 ; \ - fi && \ - nm --with-symbol-versions -D -g libhts.so | awk '$$2 ~ /^[DGRT]$$/ && $$3 ~ /@@Base$$/ && $$3 !~ /^(_init|_fini|_edata)@@/ { sub(/@@Base$$/, ";", $$3); print " " $$3 }' > $@.tmp && \ - if [ -s $@.tmp ] ; then \ - cat $@ > $@.new.tmp && \ - printf '\n%s {\n' "HTSLIB_$$curr_vers" >> $@.new.tmp && \ - cat $@.tmp >> $@.new.tmp && \ - printf '} %s;\n' "$$last_vers" >> $@.new.tmp && \ - rm -f $@.tmp && \ - mv $@.new.tmp $@ ; \ - fi ; \ - else \ - rm -f $@.tmp ; \ - fi - -install: libhts.a $(BUILT_PROGRAMS) $(BUILT_PLUGINS) installdirs install-$(SHLIB_FLAVOUR) install-pkgconfig - $(INSTALL_PROGRAM) $(BUILT_PROGRAMS) $(DESTDIR)$(bindir) - if test -n "$(BUILT_PLUGINS)"; then $(INSTALL_PROGRAM) $(BUILT_PLUGINS) $(DESTDIR)$(plugindir); fi - $(INSTALL_DATA) $(SRC)htslib/*.h $(DESTDIR)$(includedir)/htslib - $(INSTALL_DATA) libhts.a $(DESTDIR)$(libdir)/libhts.a - $(INSTALL_MAN) $(SRC)bgzip.1 $(SRC)htsfile.1 $(SRC)tabix.1 $(DESTDIR)$(man1dir) - $(INSTALL_MAN) $(SRC)faidx.5 $(SRC)sam.5 $(SRC)vcf.5 $(DESTDIR)$(man5dir) - $(INSTALL_MAN) $(SRC)htslib-s3-plugin.7 $(DESTDIR)$(man7dir) - -installdirs: - $(INSTALL_DIR) $(DESTDIR)$(bindir) $(DESTDIR)$(includedir) $(DESTDIR)$(includedir)/htslib $(DESTDIR)$(libdir) $(DESTDIR)$(man1dir) $(DESTDIR)$(man5dir) $(DESTDIR)$(man7dir) $(DESTDIR)$(pkgconfigdir) - if test -n "$(plugindir)"; then $(INSTALL_DIR) $(DESTDIR)$(plugindir); fi - -# After installation, the real file in $(libdir) will be libhts.so.X.Y.Z, -# with symlinks libhts.so (used via -lhts during linking of client programs) -# and libhts.so.NN (used by client executables at runtime). - -install-so: libhts.so installdirs - $(INSTALL_LIB) libhts.so $(DESTDIR)$(libdir)/libhts.so.$(PACKAGE_VERSION) - ln -sf libhts.so.$(PACKAGE_VERSION) $(DESTDIR)$(libdir)/libhts.so - ln -sf libhts.so.$(PACKAGE_VERSION) $(DESTDIR)$(libdir)/libhts.so.$(LIBHTS_SOVERSION) - -install-cygdll: cyghts-$(LIBHTS_SOVERSION).dll installdirs - $(INSTALL_PROGRAM) cyghts-$(LIBHTS_SOVERSION).dll $(DESTDIR)$(bindir)/cyghts-$(LIBHTS_SOVERSION).dll - $(INSTALL_PROGRAM) libhts.dll.a $(DESTDIR)$(libdir)/libhts.dll.a - -install-dll: hts-$(LIBHTS_SOVERSION).dll installdirs - $(INSTALL_PROGRAM) hts-$(LIBHTS_SOVERSION).dll $(DESTDIR)$(bindir)/hts-$(LIBHTS_SOVERSION).dll - $(INSTALL_PROGRAM) hts.dll.a $(DESTDIR)$(libdir)/hts.dll.a - -install-dylib: libhts.dylib installdirs - $(INSTALL_PROGRAM) libhts.dylib $(DESTDIR)$(libdir)/libhts.$(PACKAGE_VERSION).dylib - ln -sf libhts.$(PACKAGE_VERSION).dylib $(DESTDIR)$(libdir)/libhts.dylib - ln -sf libhts.$(PACKAGE_VERSION).dylib $(DESTDIR)$(libdir)/libhts.$(LIBHTS_SOVERSION).dylib - -# Substitute these pseudo-autoconf variables only at install time -# so that "make install prefix=/prefix/path" etc continue to work. -install-pkgconfig: htslib.pc.tmp installdirs - sed -e 's#@-includedir@#$(includedir)#g;s#@-libdir@#$(libdir)#g;s#@-PACKAGE_VERSION@#$(PACKAGE_VERSION)#g' htslib.pc.tmp > $(DESTDIR)$(pkgconfigdir)/htslib.pc - chmod 644 $(DESTDIR)$(pkgconfigdir)/htslib.pc - -# A pkg-config file (suitable for copying to $PKG_CONFIG_PATH) that provides -# flags for building against the uninstalled library in this build directory. -htslib-uninstalled.pc: htslib.pc.tmp - sed -e 's#@-includedir@#'`pwd`'#g;s#@-libdir@#'`pwd`'#g' htslib.pc.tmp > $@ - - -testclean: - -rm -f test/*.tmp test/*.tmp.* test/faidx/*.tmp* test/faidx/FAIL* \ - test/longrefs/*.tmp.* test/tabix/*.tmp.* test/tabix/FAIL* \ - header-exports.txt shlib-exports-$(SHLIB_FLAVOUR).txt - -rm -rf htscodecs/tests/test.out - -# Only remove this in git checkouts -DEL_HTSCODECS_VERSION := $(if $(wildcard htscodecs/.git),htscodecs/htscodecs/version.h) - -mostlyclean: testclean - -rm -f *.o *.pico cram/*.o cram/*.pico test/*.o test/*.dSYM config_vars.h version.h - -rm -f htscodecs/htscodecs/*.o htscodecs/htscodecs/*.pico $(DEL_HTSCODECS_VERSION) - -rm -f hts-object-files - -rm -f htscodecs/tests/*.o - -clean: mostlyclean clean-$(SHLIB_FLAVOUR) - -rm -f libhts.a $(BUILT_PROGRAMS) $(BUILT_PLUGINS) $(BUILT_TEST_PROGRAMS) $(BUILT_THRASH_PROGRAMS) - -rm -f htscodecs/tests/rans4x8 htscodecs/tests/rans4x16pr htscodecs/tests/arith_dynamic htscodecs/tests/tokenise_name3 htscodecs/tests/fqzcomp_qual htscodecs/tests/varint - -distclean maintainer-clean: clean - -rm -f config.cache config.h config.log config.mk config.status - -rm -f TAGS *.pc.tmp *-uninstalled.pc htslib_static.mk htscodecs.mk - -rm -rf autom4te.cache - -clean-so: - -rm -f libhts.so libhts.so.* - -clean-cygdll: - -rm -f cyghts-*.dll libhts.dll.a - -clean-dll: - -rm -f hts-*.dll hts.dll.a - -clean-dylib: - -rm -f libhts.dylib libhts.*.dylib - - -tags TAGS: - ctags -f TAGS *.[ch] cram/*.[ch] htslib/*.h - -# We recommend libhts-using programs be built against a separate htslib -# installation. However if you feel that you must bundle htslib source -# code with your program, this hook enables Automake-style "make dist" -# for this subdirectory. If you do bundle an htslib snapshot, please -# add identifying information to $(PACKAGE_VERSION) as appropriate. -# (The wildcards attempt to omit non-exported files (.git*, README.md, -# etc) and other detritus that might be in the top-level directory.) -distdir: - @if [ -z "$(distdir)" ]; then echo "Please supply a distdir=DIR argument."; false; fi - tar -c *.[ch15] [ILMNRchtv]*[ELSbcekmnth] | (cd $(distdir) && tar -x) - +cd $(distdir) && $(MAKE) distclean - -force: - - -.PHONY: all check check-untracked clean distclean distdir force -.PHONY: install install-pkgconfig installdirs lib-shared lib-static -.PHONY: maintainer-check maintainer-clean mostlyclean plugins -.PHONY: print-config print-version show-version tags -.PHONY: test test-shlib-exports test_thrash testclean -.PHONY: clean-so install-so -.PHONY: clean-cygdll install-cygdll -.PHONY: clean-dll install-dll -.PHONY: clean-dylib install-dylib -.PHONY: test_htscodecs_rans4x8 test_htscodecs_rans4x16 test_htscodecs_arith -.PHONY: test_htscodecs_tok3 test_htscodecs_fqzcomp test_htscodecs_varint diff --git a/src/htslib-1.18/Makefile.vcfppR b/src/htslib-1.18/Makefile.vcfppR deleted file mode 100644 index 29d0272..0000000 --- a/src/htslib-1.18/Makefile.vcfppR +++ /dev/null @@ -1,968 +0,0 @@ -# Makefile for htslib, a C library for high-throughput sequencing data formats. -# -# Copyright (C) 2013-2023 Genome Research Ltd. -# -# Author: John Marshall -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -CC := $(shell ${R_HOME}/bin/R CMD config CC) -AR := $(shell ${R_HOME}/bin/R CMD config AR) -RANLIB := $(shell ${R_HOME}/bin/R CMD config RANLIB) -CFLAGS := $(shell ${R_HOME}/bin/R CMD config CFLAGS) -CPPFLAGS := $(shell ${R_HOME}/bin/R CMD config CPPFLAGS) -LDFLAGS := $(shell ${R_HOME}/bin/R CMD config LDFLAGS) - -# Default libraries to link if configure is not used -htslib_default_libs = -lz -lm -lbz2 -llzma -lcurl - -CPPFLAGS = -# TODO: make the 64-bit support for VCF optional via configure, for now add -DVCF_ALLOW_INT64 -# to CFLAGS manually, here or in config.mk if the latter exists. -# TODO: probably update cram code to make it compile cleanly with -Wc++-compat -# For testing strict C99 support add -std=c99 -D_XOPEN_SOURCE=600 -#CFLAGS = -g -Wall -O2 -pedantic -std=c99 -D_XOPEN_SOURCE=600 -CFLAGS += -fpic -fvisibility=hidden -Wstrict-prototypes -EXTRA_CFLAGS_PIC = -TARGET_CFLAGS = -LDFLAGS += -fvisibility=hidden -VERSION_SCRIPT_LDFLAGS = -Wl,-version-script,$(srcprefix)htslib.map -LIBS = $(htslib_default_libs) - -prefix = /usr/local -exec_prefix = $(prefix) -bindir = $(exec_prefix)/bin -includedir = $(prefix)/include -libdir = $(exec_prefix)/lib -libexecdir = $(exec_prefix)/libexec -datarootdir = $(prefix)/share -mandir = $(datarootdir)/man -man1dir = $(mandir)/man1 -man5dir = $(mandir)/man5 -man7dir = $(mandir)/man7 -pkgconfigdir= $(libdir)/pkgconfig - -MKDIR_P = mkdir -p -INSTALL = install -p -INSTALL_DATA = $(INSTALL) -m 644 -INSTALL_DIR = $(MKDIR_P) -m 755 -LIB_PERM = 644 -INSTALL_LIB = $(INSTALL) -m $(LIB_PERM) -INSTALL_MAN = $(INSTALL_DATA) -INSTALL_PROGRAM = $(INSTALL) - -# Set by config.mk if plugins are enabled -plugindir = - -BUILT_PROGRAMS = \ - bgzip \ - htsfile \ - tabix - -BUILT_TEST_PROGRAMS = \ - test/hts_endian \ - test/fieldarith \ - test/hfile \ - test/pileup \ - test/pileup_mod \ - test/plugins-dlhts \ - test/sam \ - test/test_bgzf \ - test/test_expr \ - test/test_faidx \ - test/test_kfunc \ - test/test_kstring \ - test/test_mod \ - test/test_realn \ - test/test-regidx \ - test/test_str2int \ - test/test_time_funcs \ - test/test_view \ - test/test_index \ - test/test-vcf-api \ - test/test-vcf-sweep \ - test/test-bcf-sr \ - test/fuzz/hts_open_fuzzer.o \ - test/test-bcf-translate \ - test/test-parse-reg \ - test/test_introspection \ - test/test-bcf_set_variant_type - -BUILT_THRASH_PROGRAMS = \ - test/thrash_threads1 \ - test/thrash_threads2 \ - test/thrash_threads3 \ - test/thrash_threads4 \ - test/thrash_threads5 \ - test/thrash_threads6 \ - test/thrash_threads7 - -all: lib-static lib-shared $(BUILT_PROGRAMS) plugins $(BUILT_TEST_PROGRAMS) \ - htslib_static.mk htslib-uninstalled.pc - -ALL_CPPFLAGS = -I. $(CPPFLAGS) - -# Usually htscodecs.mk is generated by running configure or config.status, -# but if those aren't used create a default here. -htscodecs.mk: - echo '# Default htscodecs.mk generated by Makefile' > $@ - echo 'include $$(HTSPREFIX)htscodecs_bundled.mk' >> $@ - $(srcdir)/hts_probe_cc.sh '$(CC)' '$(CFLAGS) $(CPPFLAGS)' '$(LDFLAGS)' >> $@ - -srcdir = . -srcprefix = -HTSPREFIX = - -# Flags for SIMD code -HTS_CFLAGS_AVX2 = -HTS_CFLAGS_AVX512 = -HTS_CFLAGS_SSE4 = - -# Control building of SIMD code. Not used if configure has been run. -HTS_BUILD_AVX2 = -HTS_BUILD_AVX512 = -HTS_BUILD_SSSE3 = -HTS_BUILD_POPCNT = -HTS_BUILD_SSE4_1 = - -include htslib_vars.mk -include htscodecs.mk - -# If not using GNU make, you need to copy the version number from version.sh -# into here. -PACKAGE_VERSION := $(shell $(srcdir)/version.sh) - -LIBHTS_SOVERSION = 3 - -# Version numbers for the Mac dynamic library. Note that the leading 3 -# is not strictly necessary and should be removed the next time -# LIBHTS_SOVERSION is bumped (see #1144 and -# https://developer.apple.com/library/archive/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html#//apple_ref/doc/uid/TP40002013-SW23) -MACH_O_COMPATIBILITY_VERSION = 3.1.18 -MACH_O_CURRENT_VERSION = 3.1.18 - -# $(NUMERIC_VERSION) is for items that must have a numeric X.Y.Z string -# even if this is a dirty or untagged Git working tree. -NUMERIC_VERSION := $(shell $(srcdir)/version.sh numeric) - -# Force version.h to be remade if $(PACKAGE_VERSION) has changed. -version.h: $(if $(wildcard version.h),$(if $(findstring "$(PACKAGE_VERSION)",$(shell cat version.h)),,force)) - -version.h: - echo '#define HTS_VERSION_TEXT "$(PACKAGE_VERSION)"' > $@ - -print-version: - @echo $(PACKAGE_VERSION) - -show-version: - @echo PACKAGE_VERSION = $(PACKAGE_VERSION) - @echo NUMERIC_VERSION = $(NUMERIC_VERSION) - -config_vars.h: override escape=$(subst ',\x27,$(subst ",\",$(subst \,\\,$(1)))) -config_vars.h: override hts_cc_escaped=$(call escape,$(CC)) -config_vars.h: override hts_cppflags_escaped=$(call escape,$(CPPFLAGS)) -config_vars.h: override hts_cflags_escaped=$(call escape,$(CFLAGS)) -config_vars.h: override hts_ldflags_escaped=$(call escape,$(LDFLAGS)) -config_vars.h: override hts_libs_escaped=$(call escape,$(LIBS)) - -config_vars.h: - printf '#define HTS_CC "%s"\n#define HTS_CPPFLAGS "%s"\n#define HTS_CFLAGS "%s"\n#define HTS_LDFLAGS "%s"\n#define HTS_LIBS "%s"\n' \ - '$(hts_cc_escaped)' \ - '$(hts_cppflags_escaped)' \ - '$(hts_cflags_escaped)' \ - '$(hts_ldflags_escaped)' \ - '$(hts_libs_escaped)' > $@ - -.SUFFIXES: .bundle .c .cygdll .dll .o .pico .so - -.c.o: - $(CC) $(CFLAGS) $(TARGET_CFLAGS) $(ALL_CPPFLAGS) -c -o $@ $< - -.c.pico: - $(CC) $(CFLAGS) $(TARGET_CFLAGS) $(ALL_CPPFLAGS) $(EXTRA_CFLAGS_PIC) -c -o $@ $< - - -LIBHTS_OBJS = \ - kfunc.o \ - kstring.o \ - bcf_sr_sort.o \ - bgzf.o \ - errmod.o \ - faidx.o \ - header.o \ - hfile.o \ - hts.o \ - hts_expr.o \ - hts_os.o\ - md5.o \ - multipart.o \ - probaln.o \ - realn.o \ - regidx.o \ - region.o \ - sam.o \ - sam_mods.o \ - synced_bcf_reader.o \ - vcf_sweep.o \ - tbx.o \ - textutils.o \ - thread_pool.o \ - vcf.o \ - vcfutils.o \ - cram/cram_codecs.o \ - cram/cram_decode.o \ - cram/cram_encode.o \ - cram/cram_external.o \ - cram/cram_index.o \ - cram/cram_io.o \ - cram/cram_stats.o \ - cram/mFILE.o \ - cram/open_trace_file.o \ - cram/pooled_alloc.o \ - cram/string_alloc.o \ - $(HTSCODECS_OBJS) \ - $(NONCONFIGURE_OBJS) - -# Without configure we wish to have a rich set of default figures, -# but we still need conditional inclusion as we wish to still -# support ./configure --disable-blah. -NONCONFIGURE_OBJS = hfile_libcurl.o - -PLUGIN_EXT = -PLUGIN_OBJS = - -cram_h = cram/cram.h $(cram_samtools_h) $(header_h) $(cram_structs_h) $(cram_io_h) cram/cram_encode.h cram/cram_decode.h cram/cram_stats.h cram/cram_codecs.h cram/cram_index.h $(htslib_cram_h) -cram_io_h = cram/cram_io.h $(cram_misc_h) -cram_misc_h = cram/misc.h -cram_os_h = cram/os.h $(htslib_hts_endian_h) -cram_samtools_h = cram/cram_samtools.h $(htslib_sam_h) -cram_structs_h = cram/cram_structs.h $(htslib_thread_pool_h) $(htslib_cram_h) cram/string_alloc.h cram/mFILE.h $(htslib_khash_h) -cram_open_trace_file_h = cram/open_trace_file.h cram/mFILE.h -bcf_sr_sort_h = bcf_sr_sort.h $(htslib_synced_bcf_reader_h) $(htslib_kbitset_h) -header_h = header.h cram/string_alloc.h cram/pooled_alloc.h $(htslib_khash_h) $(htslib_kstring_h) $(htslib_sam_h) -hfile_internal_h = hfile_internal.h $(htslib_hts_defs_h) $(htslib_hfile_h) $(textutils_internal_h) -hts_internal_h = hts_internal.h $(htslib_hts_h) $(textutils_internal_h) -hts_time_funcs_h = hts_time_funcs.h -sam_internal_h = sam_internal.h $(htslib_sam_h) -textutils_internal_h = textutils_internal.h $(htslib_kstring_h) -thread_pool_internal_h = thread_pool_internal.h $(htslib_thread_pool_h) - -# To be effective, config.mk needs to appear after most Makefile variables are -# set but before most rules appear, so that it can both use previously-set -# variables in its own rules' prerequisites and also update variables for use -# in later rules' prerequisites. - -# If your make doesn't accept -include, change this to 'include' if you are -# using the configure script or just comment the line out if you are not. --include config.mk - -# Usually config.h is generated by running configure or config.status, -# but if those aren't used create a default config.h here. -config.h: - echo '/* Default config.h generated by Makefile */' > $@ - echo '#ifndef _XOPEN_SOURCE' >> $@ - echo '#define _XOPEN_SOURCE 600' >> $@ - echo '#endif' >> $@ - echo '#define HAVE_LIBBZ2 1' >> $@ - echo '#define HAVE_LIBLZMA 1' >> $@ - echo '#ifndef __APPLE__' >> $@ - echo '#define HAVE_LZMA_H 1' >> $@ - echo '#endif' >> $@ - echo '#define HAVE_DRAND48 1' >> $@ - echo '#define HAVE_LIBCURL 1' >> $@ - if [ "x$(HTS_BUILD_POPCNT)" != "x" ] && \ - [ "x$(HTS_BUILD_SSE4_1)" != "x" ] && \ - [ "x$(HTS_BUILD_SSSE3)" != "x" ]; then \ - echo '#define HAVE_POPCNT 1' >> $@ ; \ - echo '#define HAVE_SSE4_1 1' >> $@ ; \ - echo '#define HAVE_SSSE3 1' >> $@ ; \ - echo '#if defined(HTS_ALLOW_UNALIGNED) && HTS_ALLOW_UNALIGNED == 0' >> $@ ; \ - echo '#define UBSAN 1' >> $@ ; \ - echo '#endif' >> $@ ; \ - fi - if [ "x$(HTS_BUILD_AVX2)" != "x" ] ; then \ - echo '#define HAVE_AVX2 1' >> $@ ; \ - fi - if [ "x$(HTS_BUILD_AVX512)" != "x" ] ; then \ - echo '#define HAVE_AVX512 1' >> $@ ; \ - fi - -# And similarly for htslib.pc.tmp ("pkg-config template"). No dependency -# on htslib.pc.in listed, as if that file is newer the usual way to regenerate -# this target is via configure or config.status rather than this rule. -htslib.pc.tmp: - sed -e '/^static_libs=/s/@static_LIBS@/$(htslib_default_libs)/;s#@[^-][^@]*@##g' $(srcprefix)htslib.pc.in > $@ - -# Create a makefile fragment listing the libraries and LDFLAGS needed for -# static linking. This can be included by projects that want to build -# and link against the htslib source tree instead of an installed library. -htslib_static.mk: htslib.pc.tmp - sed -n '/^static_libs=/s/[^=]*=/HTSLIB_static_LIBS = /p;/^static_ldflags=/s/[^=]*=/HTSLIB_static_LDFLAGS = /p' $< > $@ - - -lib-static: libhts.a - -# $(shell), :=, and ifeq/.../endif are GNU Make-specific. If you don't have -# GNU Make, comment out the parts of these conditionals that don't apply. -ifneq "$(origin PLATFORM)" "file" -PLATFORM := $(shell uname -s) -endif -ifeq "$(PLATFORM)" "Darwin" -SHLIB_FLAVOUR = dylib -lib-shared: libhts.dylib -else ifeq "$(findstring CYGWIN,$(PLATFORM))" "CYGWIN" -SHLIB_FLAVOUR = cygdll -lib-shared: cyghts-$(LIBHTS_SOVERSION).dll -else ifeq "$(findstring MSYS,$(PLATFORM))" "MSYS" -SHLIB_FLAVOUR = dll -lib-shared: hts-$(LIBHTS_SOVERSION).dll hts-$(LIBHTS_SOVERSION).def hts-$(LIBHTS_SOVERSION).lib -else ifeq "$(findstring MINGW,$(PLATFORM))" "MINGW" -SHLIB_FLAVOUR = dll -lib-shared: hts-$(LIBHTS_SOVERSION).dll hts-$(LIBHTS_SOVERSION).def hts-$(LIBHTS_SOVERSION).lib -else -SHLIB_FLAVOUR = so -lib-shared: libhts.so -endif - -BUILT_PLUGINS = $(PLUGIN_OBJS:.o=$(PLUGIN_EXT)) - -ifneq "$(BUILT_PLUGINS)" "" -plugins: lib-shared -endif -plugins: $(BUILT_PLUGINS) - - -libhts.a: $(LIBHTS_OBJS) - @-rm -f $@ - $(AR) -rc $@ $(LIBHTS_OBJS) - -$(RANLIB) $@ - -print-config: - @echo HTS_CFLAGS_AVX2 = $(HTS_CFLAGS_AVX2) - @echo HTS_CFLAGS_AVX512 = $(HTS_CFLAGS_AVX512) - @echo HTS_CFLAGS_SSE4 = $(HTS_CFLAGS_SSE4) - @echo HTS_HAVE_NEON = $(HTS_HAVE_NEON) - @echo LDFLAGS = $(LDFLAGS) - @echo LIBHTS_OBJS = $(LIBHTS_OBJS) - @echo LIBS = $(LIBS) - @echo PLATFORM = $(PLATFORM) - -# The target here is libhts.so, as that is the built file that other rules -# depend upon and that is used when -lhts appears in other program's recipes. -# As a byproduct invisible to make, libhts.so.NN is also created, as it is the -# file used at runtime (when $LD_LIBRARY_PATH includes the build directory). - -libhts.so: $(LIBHTS_OBJS:.o=.pico) - $(CC) -shared -Wl,-soname,libhts.so.$(LIBHTS_SOVERSION) $(VERSION_SCRIPT_LDFLAGS) $(LDFLAGS) -o $@ $(LIBHTS_OBJS:.o=.pico) $(LIBS) -lpthread - ln -sf $@ libhts.so.$(LIBHTS_SOVERSION) - -# Similarly this also creates libhts.NN.dylib as a byproduct, so that programs -# when run can find this uninstalled shared library (when $DYLD_LIBRARY_PATH -# includes this project's build directory). - -libhts.dylib: $(LIBHTS_OBJS) - $(CC) -dynamiclib -install_name $(libdir)/libhts.$(LIBHTS_SOVERSION).dylib -current_version $(MACH_O_CURRENT_VERSION) -compatibility_version $(MACH_O_COMPATIBILITY_VERSION) $(LDFLAGS) -o $@ $(LIBHTS_OBJS) $(LIBS) - ln -sf $@ libhts.$(LIBHTS_SOVERSION).dylib - -cyghts-$(LIBHTS_SOVERSION).dll libhts.dll.a: $(LIBHTS_OBJS) - $(CC) -shared -Wl,--out-implib=libhts.dll.a -Wl,--enable-auto-import $(LDFLAGS) -o $@ -Wl,--whole-archive $(LIBHTS_OBJS) -Wl,--no-whole-archive $(LIBS) -lpthread - -hts-$(LIBHTS_SOVERSION).dll hts.dll.a: $(LIBHTS_OBJS) - $(CC) -shared -Wl,--out-implib=hts.dll.a -Wl,--enable-auto-import -Wl,--exclude-all-symbols $(LDFLAGS) -o $@ -Wl,--whole-archive $(LIBHTS_OBJS) -Wl,--no-whole-archive $(LIBS) -lpthread - -hts-$(LIBHTS_SOVERSION).def: hts-$(LIBHTS_SOVERSION).dll - gendef hts-$(LIBHTS_SOVERSION).dll - -hts-$(LIBHTS_SOVERSION).lib: hts-$(LIBHTS_SOVERSION).def - dlltool -m i386:x86-64 -d hts-$(LIBHTS_SOVERSION).def -l hts-$(LIBHTS_SOVERSION).lib - -# Bundling libraries, binaries, dll dependencies, and licenses into a -# single directory. NB: This is not needed for end-users, but a test bed -# for maintainers building binary distributions. -# -# NOTE: only tested on the supported MSYS2/MINGW64 environment. -dist-windows: DESTDIR= -dist-windows: prefix=dist-windows -dist-windows: install - cp hts-$(LIBHTS_SOVERSION).def hts-$(LIBHTS_SOVERSION).lib dist-windows/lib - cp `ldd hts-$(LIBHTS_SOVERSION).dll| awk '/mingw64/ {print $$3}'` dist-windows/bin - mkdir -p dist-windows/share/licenses/htslib - -cp -r /mingw64/share/licenses/mingw-w64-libraries \ - /mingw64/share/licenses/brotli \ - /mingw64/share/licenses/bzip2 \ - /mingw64/share/licenses/gcc-libs \ - /mingw64/share/licenses/libdeflate \ - /mingw64/share/licenses/libpsl \ - /mingw64/share/licenses/libtre \ - /mingw64/share/licenses/libwinpthread \ - /mingw64/share/licenses/openssl \ - /mingw64/share/licenses/xz \ - /mingw64/share/licenses/zlib \ - /mingw64/share/licenses/zstd \ - dist-windows/share/licenses/ - -cp -r /usr/share/licenses/curl \ - dist-windows/share/licenses/ - cp LICENSE dist-windows/share/licenses/htslib/ - - -# Target to allow htslib.mk to build all the object files before it -# links the shared and static libraries. -hts-object-files: $(LIBHTS_OBJS) - touch $@ - -# On Unix dlopen("libhts.so.NN", RTLD_LAZY) may default to RTLD_LOCAL. -# Hence plugins need to link to (shared) libhts.so.NN themselves, as they -# may not be able to access libhts symbols via the main program's libhts -# if that was dynamically loaded without an explicit RTLD_GLOBAL. -%.so: %.pico libhts.so - $(CC) -shared -Wl,-E $(LDFLAGS) -o $@ $< libhts.so $(LIBS) -lpthread - -# For programs *statically* linked to libhts.a, on macOS loading a plugin -# linked to a shared libhts.NN.dylib would lead to conflicting duplicate -# symbols. Fortunately macOS dlopen() defaults to RTLD_GLOBAL so there -# is less need for plugins to link back to libhts themselves. -%.bundle: %.o - $(CC) -bundle -Wl,-undefined,dynamic_lookup $(LDFLAGS) -o $@ $< $(LIBS) - -%.cygdll: %.o libhts.dll.a - $(CC) -shared $(LDFLAGS) -o $@ $< libhts.dll.a $(LIBS) - -%.dll: %.o hts.dll.a - $(CC) -shared $(LDFLAGS) -o $@ $< hts.dll.a $(LIBS) - - -bgzf.o bgzf.pico: bgzf.c config.h $(htslib_hts_h) $(htslib_bgzf_h) $(htslib_hfile_h) $(htslib_thread_pool_h) $(htslib_hts_endian_h) cram/pooled_alloc.h $(hts_internal_h) $(htslib_khash_h) -errmod.o errmod.pico: errmod.c config.h $(htslib_hts_h) $(htslib_ksort_h) $(htslib_hts_os_h) -kstring.o kstring.pico: kstring.c config.h $(htslib_kstring_h) -header.o header.pico: header.c config.h $(textutils_internal_h) $(header_h) -hfile.o hfile.pico: hfile.c config.h $(htslib_hfile_h) $(hfile_internal_h) $(htslib_kstring_h) $(hts_internal_h) $(htslib_khash_h) -hfile_gcs.o hfile_gcs.pico: hfile_gcs.c config.h $(htslib_hts_h) $(htslib_kstring_h) $(hfile_internal_h) -hfile_libcurl.o hfile_libcurl.pico: hfile_libcurl.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_h) -hfile_s3_write.o hfile_s3_write.pico: hfile_s3_write.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_h) -hfile_s3.o hfile_s3.pico: hfile_s3.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(hts_time_funcs_h) -hts.o hts.pico: hts.c config.h os/lzma_stub.h $(htslib_hts_h) $(htslib_bgzf_h) $(cram_h) $(htslib_hfile_h) $(htslib_hts_endian_h) version.h config_vars.h $(hts_internal_h) $(hfile_internal_h) $(sam_internal_h) $(htslib_hts_expr_h) $(htslib_hts_os_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_ksort_h) $(htslib_tbx_h) $(htscodecs_htscodecs_h) -hts_expr.o hts_expr.pico: hts_expr.c config.h $(htslib_hts_expr_h) $(htslib_hts_log_h) $(textutils_internal_h) -hts_os.o hts_os.pico: hts_os.c config.h $(htslib_hts_defs_h) os/rand.c -vcf.o vcf.pico: vcf.c config.h $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_hfile_h) $(hts_internal_h) $(htslib_khash_str2int_h) $(htslib_kstring_h) $(htslib_sam_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_hts_endian_h) -sam.o sam.pico: sam.c config.h $(htslib_hts_defs_h) $(htslib_sam_h) $(htslib_bgzf_h) $(cram_h) $(hts_internal_h) $(sam_internal_h) $(htslib_hfile_h) $(htslib_hts_endian_h) $(htslib_hts_expr_h) $(header_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_kstring_h) -sam_mods.o sam_mods.pico: sam_mods.c config.h $(htslib_sam_h) $(textutils_internal_h) -tbx.o tbx.pico: tbx.c config.h $(htslib_tbx_h) $(htslib_bgzf_h) $(htslib_hts_endian_h) $(hts_internal_h) $(htslib_khash_h) -faidx.o faidx.pico: faidx.c config.h $(htslib_bgzf_h) $(htslib_faidx_h) $(htslib_hfile_h) $(htslib_khash_h) $(htslib_kstring_h) $(hts_internal_h) -bcf_sr_sort.o bcf_sr_sort.pico: bcf_sr_sort.c config.h $(bcf_sr_sort_h) $(htslib_khash_str2int_h) $(htslib_kbitset_h) -synced_bcf_reader.o synced_bcf_reader.pico: synced_bcf_reader.c config.h $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(htslib_bgzf_h) $(htslib_thread_pool_h) $(bcf_sr_sort_h) -vcf_sweep.o vcf_sweep.pico: vcf_sweep.c config.h $(htslib_vcf_sweep_h) $(htslib_bgzf_h) -vcfutils.o vcfutils.pico: vcfutils.c config.h $(htslib_vcfutils_h) $(htslib_kbitset_h) -kfunc.o kfunc.pico: kfunc.c config.h $(htslib_kfunc_h) -regidx.o regidx.pico: regidx.c config.h $(htslib_hts_h) $(htslib_kstring_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(htslib_regidx_h) $(hts_internal_h) -region.o region.pico: region.c config.h $(htslib_hts_h) $(htslib_khash_h) -md5.o md5.pico: md5.c config.h $(htslib_hts_h) $(htslib_hts_endian_h) -multipart.o multipart.pico: multipart.c config.h $(htslib_kstring_h) $(hts_internal_h) $(hfile_internal_h) -plugin.o plugin.pico: plugin.c config.h $(hts_internal_h) $(htslib_kstring_h) -probaln.o probaln.pico: probaln.c config.h $(htslib_hts_h) -realn.o realn.pico: realn.c config.h $(htslib_hts_h) $(htslib_sam_h) -textutils.o textutils.pico: textutils.c config.h $(htslib_hfile_h) $(htslib_kstring_h) $(htslib_sam_h) $(hts_internal_h) - -cram/cram_codecs.o cram/cram_codecs.pico: cram/cram_codecs.c config.h $(htslib_hts_endian_h) $(htscodecs_varint_h) $(htscodecs_pack_h) $(htscodecs_rle_h) $(cram_h) -cram/cram_decode.o cram/cram_decode.pico: cram/cram_decode.c config.h $(cram_h) $(cram_os_h) $(htslib_hts_h) -cram/cram_encode.o cram/cram_encode.pico: cram/cram_encode.c config.h $(cram_h) $(cram_os_h) $(sam_internal_h) $(htslib_hts_h) $(htslib_hts_endian_h) $(textutils_internal_h) -cram/cram_external.o cram/cram_external.pico: cram/cram_external.c config.h $(htscodecs_rANS_static4x16_h) $(htslib_hfile_h) $(cram_h) -cram/cram_index.o cram/cram_index.pico: cram/cram_index.c config.h $(htslib_bgzf_h) $(htslib_hfile_h) $(hts_internal_h) $(cram_h) $(cram_os_h) -cram/cram_io.o cram/cram_io.pico: cram/cram_io.c config.h os/lzma_stub.h $(cram_h) $(cram_os_h) $(htslib_hts_h) $(cram_open_trace_file_h) $(htscodecs_rANS_static_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_arith_dynamic_h) $(htscodecs_tokenise_name3_h) $(htscodecs_fqzcomp_qual_h) $(htscodecs_varint_h) $(htslib_hfile_h) $(htslib_bgzf_h) $(htslib_faidx_h) $(hts_internal_h) -cram/cram_stats.o cram/cram_stats.pico: cram/cram_stats.c config.h $(cram_h) $(cram_os_h) -cram/mFILE.o cram/mFILE.pico: cram/mFILE.c config.h $(htslib_hts_log_h) $(cram_os_h) cram/mFILE.h -cram/open_trace_file.o cram/open_trace_file.pico: cram/open_trace_file.c config.h $(cram_os_h) $(cram_open_trace_file_h) $(cram_misc_h) $(htslib_hfile_h) $(htslib_hts_log_h) $(htslib_hts_h) -cram/pooled_alloc.o cram/pooled_alloc.pico: cram/pooled_alloc.c config.h cram/pooled_alloc.h $(cram_misc_h) -cram/string_alloc.o cram/string_alloc.pico: cram/string_alloc.c config.h cram/string_alloc.h -thread_pool.o thread_pool.pico: thread_pool.c config.h $(thread_pool_internal_h) $(htslib_hts_log_h) - -htscodecs/htscodecs/arith_dynamic.o htscodecs/htscodecs/arith_dynamic.pico: htscodecs/htscodecs/arith_dynamic.c config.h $(htscodecs_arith_dynamic_h) $(htscodecs_varint_h) $(htscodecs_pack_h) $(htscodecs_utils_h) $(htscodecs_c_simple_model_h) -htscodecs/htscodecs/fqzcomp_qual.o htscodecs/htscodecs/fqzcomp_qual.pico: htscodecs/htscodecs/fqzcomp_qual.c config.h $(htscodecs_fqzcomp_qual_h) $(htscodecs_varint_h) $(htscodecs_utils_h) $(htscodecs_c_simple_model_h) -htscodecs/htscodecs/htscodecs.o htscodecs/htscodecs/htscodecs.pico: htscodecs/htscodecs/htscodecs.c $(htscodecs_htscodecs_h) $(htscodecs_version_h) -htscodecs/htscodecs/pack.o htscodecs/htscodecs/pack.pico: htscodecs/htscodecs/pack.c config.h $(htscodecs_pack_h) -htscodecs/htscodecs/rANS_static32x16pr.o htscodecs/htscodecs/rANS_static32x16pr.pico: htscodecs/htscodecs/rANS_static32x16pr.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/rANS_static32x16pr_avx2.o htscodecs/htscodecs/rANS_static32x16pr_avx2.pico: htscodecs/htscodecs/rANS_static32x16pr_avx2.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) $(htscodecs_permute_h) -htscodecs/htscodecs/rANS_static32x16pr_avx512.o htscodecs/htscodecs/rANS_static32x16pr_avx512.pico: htscodecs/htscodecs/rANS_static32x16pr_avx512.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/rANS_static32x16pr_neon.o htscodecs/htscodecs/rANS_static32x16pr_neon.pico: htscodecs/htscodecs/rANS_static32x16pr_neon.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/rANS_static32x16pr_sse4.o htscodecs/htscodecs/rANS_static32x16pr_sse4.pico: htscodecs/htscodecs/rANS_static32x16pr_sse4.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/rANS_static4x16pr.o htscodecs/htscodecs/rANS_static4x16pr.pico: htscodecs/htscodecs/rANS_static4x16pr.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_pack_h) $(htscodecs_rle_h) $(htscodecs_utils_h) $(htscodecs_rANS_static32x16pr_h) -htscodecs/htscodecs/rANS_static.o htscodecs/htscodecs/rANS_static.pico: htscodecs/htscodecs/rANS_static.c config.h $(htscodecs_rANS_byte_h) $(htscodecs_utils_h) $(htscodecs_rANS_static_h) -htscodecs/htscodecs/rle.o htscodecs/htscodecs/rle.pico: htscodecs/htscodecs/rle.c config.h $(htscodecs_varint_h) $(htscodecs_rle_h) -htscodecs/htscodecs/tokenise_name3.o htscodecs/htscodecs/tokenise_name3.pico: htscodecs/htscodecs/tokenise_name3.c config.h $(htscodecs_pooled_alloc_h) $(htscodecs_arith_dynamic_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_tokenise_name3_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/utils.o htscodecs/htscodecs/utils.pico: htscodecs/htscodecs/utils.c config.h $(htscodecs_utils_h) - -# Extra CFLAGS for specific files -htscodecs/htscodecs/rANS_static32x16pr_avx2.o htscodecs/htscodecs/rANS_static32x16pr_avx2.pico: TARGET_CFLAGS = $(HTS_CFLAGS_AVX2) -htscodecs/htscodecs/rANS_static32x16pr_avx512.o htscodecs/htscodecs/rANS_static32x16pr_avx512.pico: TARGET_CFLAGS = $(HTS_CFLAGS_AVX512) -htscodecs/htscodecs/rANS_static32x16pr_sse4.o htscodecs/htscodecs/rANS_static32x16pr_sse4.pico: TARGET_CFLAGS = $(HTS_CFLAGS_SSE4) - -bgzip: bgzip.o libhts.a - $(CC) $(LDFLAGS) -o $@ bgzip.o libhts.a $(LIBS) -lpthread - -htsfile: htsfile.o libhts.a - $(CC) $(LDFLAGS) -o $@ htsfile.o libhts.a $(LIBS) -lpthread - -tabix: tabix.o libhts.a - $(CC) $(LDFLAGS) -o $@ tabix.o libhts.a $(LIBS) -lpthread - -bgzip.o: bgzip.c config.h $(htslib_bgzf_h) $(htslib_hts_h) $(htslib_hfile_h) -htsfile.o: htsfile.c config.h $(htslib_hfile_h) $(htslib_hts_h) $(htslib_sam_h) $(htslib_vcf_h) -tabix.o: tabix.c config.h $(htslib_tbx_h) $(htslib_sam_h) $(htslib_vcf_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(htslib_hts_h) $(htslib_regidx_h) $(htslib_hts_defs_h) $(htslib_hts_log_h) - -# Runes to check that the htscodecs submodule is present -ifdef HTSCODECS_SOURCES -htscodecs/htscodecs/%.c: | htscodecs/htscodecs - @if test -e htscodecs/.git && test ! -e "$@" ; then \ - echo "Missing file '$@'" ; \ - echo " - Do you need to update the htscodecs submodule?" ; \ - false ; \ - fi - -htscodecs/htscodecs/%.h: | htscodecs/htscodecs - @if test -e htscodecs/.git && test ! -e "$@" ; then \ - echo "Missing file '$@'" ; \ - echo " - Do you need to update the htscodecs submodule?" ; \ - false ; \ - fi - -htscodecs/htscodecs: - @if test -e .git ; then \ - printf "\\n\\nError: htscodecs submodule files not present for htslib.\\n\ - Try running: \\n\ - git submodule update --init --recursive\\n\ - in the top-level htslib directory and then re-run make.\\n\\n\\n" ; \ - else \ - printf "\\n\\nError: htscodecs submodule files not present and this is not a git checkout.\\n\ - You have an incomplete distribution. Please try downloading one of the\\n\ - official releases from https://www.htslib.org/\\n" ; \ - fi - @false - -# Build the htscodecs/htscodecs/version.h file if necessary -htscodecs/htscodecs/version.h: force - @if test -e $(srcdir)/htscodecs/.git && test -e $(srcdir)/htscodecs/configure.ac ; then \ - vers=`cd $(srcdir)/htscodecs && git describe --always --dirty --match 'v[0-9]\.[0-9]*'` && \ - case "$$vers" in \ - v*) vers=$${vers#v} ;; \ - *) iv=`awk '/^AC_INIT/ { match($$0, /^AC_INIT\(htscodecs, *([0-9](\.[0-9])*)\)/, m); print substr($$0, m[1, "start"], m[1, "length"]) }' $(srcdir)/htscodecs/configure.ac` ; vers="$$iv$${vers:+-g$$vers}" ;; \ - esac ; \ - if ! grep -s -q '"'"$$vers"'"' $@ ; then \ - echo 'Updating $@ : #define HTSCODECS_VERSION_TEXT "'"$$vers"'"' ; \ - echo '#define HTSCODECS_VERSION_TEXT "'"$$vers"'"' > $@ ; \ - fi ; \ - fi -endif - -# Maintainer source code checks -# - copyright boilerplate presence -# - tab and trailing space detection -maintainer-check: - test/maintainer/check_copyright.pl . - test/maintainer/check_spaces.pl . - -# Look for untracked files in the git repository. -check-untracked: - @if test -e .git && git status --porcelain | grep '^\?'; then \ - echo 'Untracked files detected (see above). Please either clean up, add to .gitignore, or for test output files consider naming them to match *.tmp or *.tmp.*' ; \ - false ; \ - fi - -# Create a shorthand. We use $(SRC) or $(srcprefix) rather than $(srcdir)/ -# for brevity in test and install rules, and so that build logs do not have -# ./ sprinkled throughout. -SRC = $(srcprefix) - -# For tests that might use it, set $REF_PATH explicitly to use only reference -# areas within the test suite (or set it to ':' to use no reference areas). -# -# If using MSYS, avoid poor shell expansion via: -# MSYS2_ARG_CONV_EXCL="*" make check -check test: all $(HTSCODECS_TEST_TARGETS) - test/hts_endian - test/test_expr - test/test_kfunc - test/test_kstring - test/test_str2int - test/test_time_funcs - test/fieldarith test/fieldarith.sam - test/hfile - if test "x$(BUILT_PLUGINS)" != "x"; then \ - HTS_PATH=. test/with-shlib.sh test/plugins-dlhts -g ./libhts.$(SHLIB_FLAVOUR); \ - fi - if test "x$(BUILT_PLUGINS)" != "x"; then \ - HTS_PATH=. test/with-shlib.sh test/plugins-dlhts -l ./libhts.$(SHLIB_FLAVOUR); \ - fi - test/test_bgzf test/bgziptest.txt - test/test-parse-reg -t test/colons.bam - cd test/faidx && ./test-faidx.sh faidx.tst - cd test/sam_filter && ./filter.sh filter.tst - cd test/tabix && ./test-tabix.sh tabix.tst - cd test/mpileup && ./test-pileup.sh mpileup.tst - cd test/fastq && ./test-fastq.sh - cd test/base_mods && ./base-mods.sh base-mods.tst - REF_PATH=: test/sam test/ce.fa test/faidx/faidx.fa test/faidx/fastqs.fq - test/test-regidx - cd test && REF_PATH=: ./test.pl $${TEST_OPTS:-} - -test/hts_endian: test/hts_endian.o - $(CC) $(LDFLAGS) -o $@ test/hts_endian.o $(LIBS) - -test/fuzz/hts_open_fuzzer: test/fuzz/hts_open_fuzzer.o - $(CC) $(LDFLAGS) -o $@ test/fuzz/hts_open_fuzzer.o libhts.a $(LIBS) -lpthread - -test/fieldarith: test/fieldarith.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/fieldarith.o libhts.a $(LIBS) -lpthread - -test/hfile: test/hfile.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/hfile.o libhts.a $(LIBS) -lpthread - -test/pileup: test/pileup.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/pileup.o libhts.a $(LIBS) -lpthread - -test/pileup_mod: test/pileup_mod.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/pileup_mod.o libhts.a $(LIBS) -lpthread - -test/plugins-dlhts: test/plugins-dlhts.o - $(CC) $(LDFLAGS) -o $@ test/plugins-dlhts.o $(LIBS) - -test/sam: test/sam.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/sam.o libhts.a $(LIBS) -lpthread - -test/test_bgzf: test/test_bgzf.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_bgzf.o libhts.a -lz $(LIBS) -lpthread - -test/test_expr: test/test_expr.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_expr.o libhts.a -lz $(LIBS) -lpthread - -test/test_faidx: test/test_faidx.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_faidx.o libhts.a -lz $(LIBS) -lpthread - -test/test_kfunc: test/test_kfunc.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_kfunc.o libhts.a -lz $(LIBS) -lpthread - -test/test_kstring: test/test_kstring.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_kstring.o libhts.a -lz $(LIBS) -lpthread - -test/test_mod: test/test_mod.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_mod.o libhts.a $(LIBS) -lpthread - -test/test_realn: test/test_realn.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_realn.o libhts.a $(LIBS) -lpthread - -test/test-regidx: test/test-regidx.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-regidx.o libhts.a $(LIBS) -lpthread - -test/test-parse-reg: test/test-parse-reg.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-parse-reg.o libhts.a $(LIBS) -lpthread - -test/test_str2int: test/test_str2int.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_str2int.o libhts.a $(LIBS) -lpthread - -test/test_time_funcs: test/test_time_funcs.o - $(CC) $(LDFLAGS) -o $@ test/test_time_funcs.o - -test/test_view: test/test_view.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_view.o libhts.a $(LIBS) -lpthread - -test/test_index: test/test_index.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_index.o libhts.a $(LIBS) -lpthread - -test/test-vcf-api: test/test-vcf-api.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-vcf-api.o libhts.a $(LIBS) -lpthread - -test/test-vcf-sweep: test/test-vcf-sweep.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-vcf-sweep.o libhts.a $(LIBS) -lpthread - -test/test-bcf-sr: test/test-bcf-sr.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-bcf-sr.o libhts.a -lz $(LIBS) -lpthread - -test/test-bcf-translate: test/test-bcf-translate.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-bcf-translate.o libhts.a -lz $(LIBS) -lpthread - -test/test_introspection: test/test_introspection.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_introspection.o libhts.a $(LIBS) -lpthread - -test/test-bcf_set_variant_type: test/test-bcf_set_variant_type.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-bcf_set_variant_type.o libhts.a $(LIBS) -lpthread - -# Extra tests for bundled htscodecs -test_htscodecs_rans4x8: htscodecs/tests/rans4x8 - cd htscodecs/tests && srcdir=. && export srcdir && ./rans4x8.test - -test_htscodecs_rans4x16: htscodecs/tests/rans4x16pr - cd htscodecs/tests && srcdir=. && export srcdir && ./rans4x16.test - -test_htscodecs_arith: htscodecs/tests/arith_dynamic - cd htscodecs/tests && srcdir=. && export srcdir && ./arith.test - -test_htscodecs_tok3: htscodecs/tests/tokenise_name3 - cd htscodecs/tests && srcdir=. && export srcdir && ./tok3.test - -test_htscodecs_fqzcomp: htscodecs/tests/fqzcomp_qual - cd htscodecs/tests && srcdir=. && export srcdir && ./fqzcomp.test - -test_htscodecs_varint: htscodecs/tests/varint - cd htscodecs/tests && ./varint - -htscodecs/tests/arith_dynamic: htscodecs/tests/arith_dynamic_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/fqzcomp_qual: htscodecs/tests/fqzcomp_qual_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/rans4x16pr: htscodecs/tests/rANS_static4x16pr_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/rans4x8: htscodecs/tests/rANS_static_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/tokenise_name3: htscodecs/tests/tokenise_name3_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/varint: htscodecs/tests/varint_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/arith_dynamic_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/arith_dynamic_test.o: htscodecs/tests/arith_dynamic_test.c config.h $(htscodecs_arith_dynamic_h) -htscodecs/tests/fqzcomp_qual_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/fqzcomp_qual_test.o: htscodecs/tests/fqzcomp_qual_test.c config.h $(htscodecs_fqzcomp_qual_h) $(htscodecs_varint_h) -htscodecs/tests/rANS_static4x16pr_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/rANS_static4x16pr_test.o: htscodecs/tests/rANS_static4x16pr_test.c config.h $(htscodecs_rANS_static4x16_h) -htscodecs/tests/rANS_static_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/rANS_static_test.o: htscodecs/tests/rANS_static_test.c config.h $(htscodecs_rANS_static_h) -htscodecs/tests/tokenise_name3_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/tokenise_name3_test.o: htscodecs/tests/tokenise_name3_test.c config.h $(htscodecs_tokenise_name3_h) -htscodecs/tests/varint_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/varint_test.o: htscodecs/tests/varint_test.c config.h $(htscodecs_varint_h) - -test/hts_endian.o: test/hts_endian.c config.h $(htslib_hts_endian_h) -test/fuzz/hts_open_fuzzer.o: test/fuzz/hts_open_fuzzer.c config.h $(htslib_hfile_h) $(htslib_hts_h) $(htslib_sam_h) $(htslib_vcf_h) -test/fieldarith.o: test/fieldarith.c config.h $(htslib_sam_h) -test/hfile.o: test/hfile.c config.h $(htslib_hfile_h) $(htslib_hts_defs_h) $(htslib_kstring_h) -test/pileup.o: test/pileup.c config.h $(htslib_sam_h) $(htslib_kstring_h) -test/pileup_mod.o: test/pileup_mod.c config.h $(htslib_sam_h) -test/plugins-dlhts.o: test/plugins-dlhts.c config.h -test/sam.o: test/sam.c config.h $(htslib_hts_defs_h) $(htslib_sam_h) $(htslib_faidx_h) $(htslib_khash_h) $(htslib_hts_log_h) -test/test_bgzf.o: test/test_bgzf.c config.h $(htslib_bgzf_h) $(htslib_hfile_h) $(htslib_hts_log_h) $(hfile_internal_h) -test/test_expr.o: test/test_expr.c config.h $(htslib_hts_expr_h) -test/test_kfunc.o: test/test_kfunc.c config.h $(htslib_kfunc_h) -test/test_kstring.o: test/test_kstring.c config.h $(htslib_kstring_h) -test/test_mod.o: test/test_mod.c config.h $(htslib_sam_h) -test/test-parse-reg.o: test/test-parse-reg.c config.h $(htslib_hts_h) $(htslib_sam_h) -test/test_realn.o: test/test_realn.c config.h $(htslib_hts_h) $(htslib_sam_h) $(htslib_faidx_h) -test/test-regidx.o: test/test-regidx.c config.h $(htslib_kstring_h) $(htslib_regidx_h) $(htslib_hts_defs_h) $(textutils_internal_h) -test/test_str2int.o: test/test_str2int.c config.h $(textutils_internal_h) -test/test_time_funcs.o: test/test_time_funcs.c config.h $(hts_time_funcs_h) -test/test_view.o: test/test_view.c config.h $(cram_h) $(htslib_sam_h) $(htslib_vcf_h) $(htslib_hts_log_h) -test/test_faidx.o: test/test_faidx.c config.h $(htslib_faidx_h) -test/test_index.o: test/test_index.c config.h $(htslib_sam_h) $(htslib_vcf_h) -test/test-vcf-api.o: test/test-vcf-api.c config.h $(htslib_hts_h) $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_kseq_h) -test/test-vcf-sweep.o: test/test-vcf-sweep.c config.h $(htslib_vcf_sweep_h) -test/test-bcf-sr.o: test/test-bcf-sr.c config.h $(htslib_synced_bcf_reader_h) $(htslib_hts_h) $(htslib_vcf_h) -test/test-bcf-translate.o: test/test-bcf-translate.c config.h $(htslib_vcf_h) -test/test_introspection.o: test/test_introspection.c config.h $(htslib_hts_h) $(htslib_hfile_h) -test/test-bcf_set_variant_type.o: test/test-bcf_set_variant_type.c config.h $(htslib_hts_h) vcf.c - - -test/thrash_threads1: test/thrash_threads1.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads1.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads2: test/thrash_threads2.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads2.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads3: test/thrash_threads3.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads3.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads4: test/thrash_threads4.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads4.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads5: test/thrash_threads5.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads5.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads6: test/thrash_threads6.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads6.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads7: test/thrash_threads7.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads7.o libhts.a -lz $(LIBS) -lpthread - -test_thrash: $(BUILT_THRASH_PROGRAMS) - -# Test to ensure the functions in the header files are exported by the shared -# library. This currently works by comparing the output from ctags on -# the headers with the list of functions exported by the shared library. -# Note that functions marked as exported in the .c files and not the public -# headers will be missed by this test. -test-shlib-exports: header-exports.txt shlib-exports-$(SHLIB_FLAVOUR).txt - @echo "Checking shared library exports" - @if test ! -s header-exports.txt ; then echo "Error: header-exports.txt empty" ; false ; fi - @if test ! -s shlib-exports-$(SHLIB_FLAVOUR).txt ; then echo "Error: shlib-exports-$(SHLIB_FLAVOUR).txt empty" ; false ; fi - @! comm -23 header-exports.txt shlib-exports-$(SHLIB_FLAVOUR).txt | grep . || \ - ( echo "Error: Found unexported symbols (listed above)" ; false ) - -# Extract symbols that should be exported from public headers using ctags -# Filter out macros in htslib/hts_defs.h. -header-exports.txt: test/header_syms.pl htslib/*.h - test/header_syms.pl htslib/*.h | sort -u -o $@ - -shlib-exports-so.txt: libhts.so - nm -D -g libhts.so | awk '$$2 == "T" { sub("@.*", "", $$3); print $$3 }' | sort -u -o $@ - -shlib-exports-dylib.txt: libhts.dylib - nm -Ug libhts.dylib | awk '$$2 == "T" { sub("^_", "", $$3); print $$3 }' | sort -u -o $@ - -shlib-exports-dll.txt: hts.dll.a - nm -g hts.dll.a | awk '$$2 == "T" { print $$3 }' | sort -u -o $@ - -$(srcprefix)htslib.map: libhts.so - LC_ALL=C ; export LC_ALL; \ - curr_vers=`expr 'X$(PACKAGE_VERSION)' : 'X\([0-9]*\.[0-9.]*\)'` ; \ - last_vers=`awk '/^HTSLIB_[0-9](\.[0-9]+)+/ { lv = $$1 } END { print lv }' htslib.map` ; \ - if test "x$$curr_vers" = 'x' || test "x$$last_vers" = 'x' ; then \ - echo "Version check failed : $$curr_vers / $$las_vers" 1>&2 ; \ - exit 1 ; \ - fi && \ - if test "HTSLIB_$$curr_vers" = "$$last_vers" ; then \ - echo "Refusing to update $@ - HTSlib version not changed" 1>&2 ; \ - exit 1 ; \ - fi && \ - nm --with-symbol-versions -D -g libhts.so | awk '$$2 ~ /^[DGRT]$$/ && $$3 ~ /@@Base$$/ && $$3 !~ /^(_init|_fini|_edata)@@/ { sub(/@@Base$$/, ";", $$3); print " " $$3 }' > $@.tmp && \ - if [ -s $@.tmp ] ; then \ - cat $@ > $@.new.tmp && \ - printf '\n%s {\n' "HTSLIB_$$curr_vers" >> $@.new.tmp && \ - cat $@.tmp >> $@.new.tmp && \ - printf '} %s;\n' "$$last_vers" >> $@.new.tmp && \ - rm -f $@.tmp && \ - mv $@.new.tmp $@ ; \ - fi ; \ - else \ - rm -f $@.tmp ; \ - fi - -install: libhts.a $(BUILT_PROGRAMS) $(BUILT_PLUGINS) installdirs install-$(SHLIB_FLAVOUR) install-pkgconfig - $(INSTALL_PROGRAM) $(BUILT_PROGRAMS) $(DESTDIR)$(bindir) - if test -n "$(BUILT_PLUGINS)"; then $(INSTALL_PROGRAM) $(BUILT_PLUGINS) $(DESTDIR)$(plugindir); fi - $(INSTALL_DATA) $(SRC)htslib/*.h $(DESTDIR)$(includedir)/htslib - $(INSTALL_DATA) libhts.a $(DESTDIR)$(libdir)/libhts.a - $(INSTALL_MAN) $(SRC)bgzip.1 $(SRC)htsfile.1 $(SRC)tabix.1 $(DESTDIR)$(man1dir) - $(INSTALL_MAN) $(SRC)faidx.5 $(SRC)sam.5 $(SRC)vcf.5 $(DESTDIR)$(man5dir) - $(INSTALL_MAN) $(SRC)htslib-s3-plugin.7 $(DESTDIR)$(man7dir) - -installdirs: - $(INSTALL_DIR) $(DESTDIR)$(bindir) $(DESTDIR)$(includedir) $(DESTDIR)$(includedir)/htslib $(DESTDIR)$(libdir) $(DESTDIR)$(man1dir) $(DESTDIR)$(man5dir) $(DESTDIR)$(man7dir) $(DESTDIR)$(pkgconfigdir) - if test -n "$(plugindir)"; then $(INSTALL_DIR) $(DESTDIR)$(plugindir); fi - -# After installation, the real file in $(libdir) will be libhts.so.X.Y.Z, -# with symlinks libhts.so (used via -lhts during linking of client programs) -# and libhts.so.NN (used by client executables at runtime). - -install-so: libhts.so installdirs - $(INSTALL_LIB) libhts.so $(DESTDIR)$(libdir)/libhts.so.$(PACKAGE_VERSION) - ln -sf libhts.so.$(PACKAGE_VERSION) $(DESTDIR)$(libdir)/libhts.so - ln -sf libhts.so.$(PACKAGE_VERSION) $(DESTDIR)$(libdir)/libhts.so.$(LIBHTS_SOVERSION) - -install-cygdll: cyghts-$(LIBHTS_SOVERSION).dll installdirs - $(INSTALL_PROGRAM) cyghts-$(LIBHTS_SOVERSION).dll $(DESTDIR)$(bindir)/cyghts-$(LIBHTS_SOVERSION).dll - $(INSTALL_PROGRAM) libhts.dll.a $(DESTDIR)$(libdir)/libhts.dll.a - -install-dll: hts-$(LIBHTS_SOVERSION).dll installdirs - $(INSTALL_PROGRAM) hts-$(LIBHTS_SOVERSION).dll $(DESTDIR)$(bindir)/hts-$(LIBHTS_SOVERSION).dll - $(INSTALL_PROGRAM) hts.dll.a $(DESTDIR)$(libdir)/hts.dll.a - -install-dylib: libhts.dylib installdirs - $(INSTALL_PROGRAM) libhts.dylib $(DESTDIR)$(libdir)/libhts.$(PACKAGE_VERSION).dylib - ln -sf libhts.$(PACKAGE_VERSION).dylib $(DESTDIR)$(libdir)/libhts.dylib - ln -sf libhts.$(PACKAGE_VERSION).dylib $(DESTDIR)$(libdir)/libhts.$(LIBHTS_SOVERSION).dylib - -# Substitute these pseudo-autoconf variables only at install time -# so that "make install prefix=/prefix/path" etc continue to work. -install-pkgconfig: htslib.pc.tmp installdirs - sed -e 's#@-includedir@#$(includedir)#g;s#@-libdir@#$(libdir)#g;s#@-PACKAGE_VERSION@#$(PACKAGE_VERSION)#g' htslib.pc.tmp > $(DESTDIR)$(pkgconfigdir)/htslib.pc - chmod 644 $(DESTDIR)$(pkgconfigdir)/htslib.pc - -# A pkg-config file (suitable for copying to $PKG_CONFIG_PATH) that provides -# flags for building against the uninstalled library in this build directory. -htslib-uninstalled.pc: htslib.pc.tmp - sed -e 's#@-includedir@#'`pwd`'#g;s#@-libdir@#'`pwd`'#g' htslib.pc.tmp > $@ - - -testclean: - -rm -f test/*.tmp test/*.tmp.* test/faidx/*.tmp* test/faidx/FAIL* \ - test/longrefs/*.tmp.* test/tabix/*.tmp.* test/tabix/FAIL* \ - header-exports.txt shlib-exports-$(SHLIB_FLAVOUR).txt - -rm -rf htscodecs/tests/test.out - -# Only remove this in git checkouts -DEL_HTSCODECS_VERSION := $(if $(wildcard htscodecs/.git),htscodecs/htscodecs/version.h) - -mostlyclean: testclean - -rm -f *.o *.pico cram/*.o cram/*.pico test/*.o test/*.dSYM config_vars.h version.h - -rm -f htscodecs/htscodecs/*.o htscodecs/htscodecs/*.pico $(DEL_HTSCODECS_VERSION) - -rm -f hts-object-files - -rm -f htscodecs/tests/*.o - -clean: mostlyclean clean-$(SHLIB_FLAVOUR) - -rm -f libhts.a $(BUILT_PROGRAMS) $(BUILT_PLUGINS) $(BUILT_TEST_PROGRAMS) $(BUILT_THRASH_PROGRAMS) - -rm -f htscodecs/tests/rans4x8 htscodecs/tests/rans4x16pr htscodecs/tests/arith_dynamic htscodecs/tests/tokenise_name3 htscodecs/tests/fqzcomp_qual htscodecs/tests/varint - -distclean maintainer-clean: clean - -rm -f config.cache config.h config.log config.mk config.status - -rm -f TAGS *.pc.tmp *-uninstalled.pc htslib_static.mk htscodecs.mk - -rm -rf autom4te.cache - -clean-so: - -rm -f libhts.so libhts.so.* - -clean-cygdll: - -rm -f cyghts-*.dll libhts.dll.a - -clean-dll: - -rm -f hts-*.dll hts.dll.a - -clean-dylib: - -rm -f libhts.dylib libhts.*.dylib - - -tags TAGS: - ctags -f TAGS *.[ch] cram/*.[ch] htslib/*.h - -# We recommend libhts-using programs be built against a separate htslib -# installation. However if you feel that you must bundle htslib source -# code with your program, this hook enables Automake-style "make dist" -# for this subdirectory. If you do bundle an htslib snapshot, please -# add identifying information to $(PACKAGE_VERSION) as appropriate. -# (The wildcards attempt to omit non-exported files (.git*, README.md, -# etc) and other detritus that might be in the top-level directory.) -distdir: - @if [ -z "$(distdir)" ]; then echo "Please supply a distdir=DIR argument."; false; fi - tar -c *.[ch15] [ILMNRchtv]*[ELSbcekmnth] | (cd $(distdir) && tar -x) - +cd $(distdir) && $(MAKE) distclean - -force: - - -.PHONY: all check check-untracked clean distclean distdir force -.PHONY: install install-pkgconfig installdirs lib-shared lib-static -.PHONY: maintainer-check maintainer-clean mostlyclean plugins -.PHONY: print-config print-version show-version tags -.PHONY: test test-shlib-exports test_thrash testclean -.PHONY: clean-so install-so -.PHONY: clean-cygdll install-cygdll -.PHONY: clean-dll install-dll -.PHONY: clean-dylib install-dylib -.PHONY: test_htscodecs_rans4x8 test_htscodecs_rans4x16 test_htscodecs_arith -.PHONY: test_htscodecs_tok3 test_htscodecs_fqzcomp test_htscodecs_varint diff --git a/src/htslib-1.18/Makefile.win b/src/htslib-1.18/Makefile.win deleted file mode 100644 index e82c351..0000000 --- a/src/htslib-1.18/Makefile.win +++ /dev/null @@ -1,970 +0,0 @@ -# Makefile for htslib, a C library for high-throughput sequencing data formats. -# -# Copyright (C) 2013-2023 Genome Research Ltd. -# -# Author: John Marshall -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -# Get CC, AR, RANLIB, CFLAGS, CPPFLAGS, and LDFLAGS values from -# ${R_HOME}/etc/Makeconf or from a customized Makevars file (site-wide -# or user-specified). -CC := $(shell ${R_HOME}/bin/R CMD config CC) -AR := $(shell ${R_HOME}/bin/R CMD config AR) -RANLIB := $(shell ${R_HOME}/bin/R CMD config RANLIB) -CFLAGS := $(shell ${R_HOME}/bin/R CMD config CFLAGS) -CPPFLAGS := $(shell ${R_HOME}/bin/R CMD config CPPFLAGS) -LDFLAGS := $(shell ${R_HOME}/bin/R CMD config LDFLAGS) - -# Default libraries to link if configure is not used -htslib_default_libs = -lz -lm -lbz2 -llzma -lcurl -lbcrypt -lidn2 -lunistring -liconv -lssl -lcrypto -lcrypt32 -lwsock32 -lwldap32 -lssh2 -lgcrypt -lgpg-error -lws2_32 -lzstd -lregex - -CPPFLAGS += -D_FILE_OFFSET_BITS=64 -DCURL_STATICLIB -# TODO: make the 64-bit support for VCF optional via configure, for now add -DVCF_ALLOW_INT64 -# to CFLAGS manually, here or in config.mk if the latter exists. -# TODO: probably update cram code to make it compile cleanly with -Wc++-compat -# For testing strict C99 support add -std=c99 -D_XOPEN_SOURCE=600 -#CFLAGS = -g -Wall -O2 -pedantic -std=c99 -D_XOPEN_SOURCE=600 -CFLAGS += -fpic -fvisibility=hidden -Wstrict-prototypes -EXTRA_CFLAGS_PIC = -TARGET_CFLAGS = -LDFLAGS = -fvisibility=hidden -VERSION_SCRIPT_LDFLAGS = -Wl,-version-script,$(srcprefix)htslib.map -LIBS = $(htslib_default_libs) - -prefix = /usr/local -exec_prefix = $(prefix) -bindir = $(exec_prefix)/bin -includedir = $(prefix)/include -libdir = $(exec_prefix)/lib -libexecdir = $(exec_prefix)/libexec -datarootdir = $(prefix)/share -mandir = $(datarootdir)/man -man1dir = $(mandir)/man1 -man5dir = $(mandir)/man5 -man7dir = $(mandir)/man7 -pkgconfigdir= $(libdir)/pkgconfig - -MKDIR_P = mkdir -p -INSTALL = install -p -INSTALL_DATA = $(INSTALL) -m 644 -INSTALL_DIR = $(MKDIR_P) -m 755 -LIB_PERM = 644 -INSTALL_LIB = $(INSTALL) -m $(LIB_PERM) -INSTALL_MAN = $(INSTALL_DATA) -INSTALL_PROGRAM = $(INSTALL) - -# Set by config.mk if plugins are enabled -plugindir = - -BUILT_PROGRAMS = \ - bgzip \ - htsfile \ - tabix - -BUILT_TEST_PROGRAMS = \ - test/hts_endian \ - test/fieldarith \ - test/hfile \ - test/pileup \ - test/pileup_mod \ - test/plugins-dlhts \ - test/sam \ - test/test_bgzf \ - test/test_expr \ - test/test_faidx \ - test/test_kfunc \ - test/test_kstring \ - test/test_mod \ - test/test_realn \ - test/test-regidx \ - test/test_str2int \ - test/test_time_funcs \ - test/test_view \ - test/test_index \ - test/test-vcf-api \ - test/test-vcf-sweep \ - test/test-bcf-sr \ - test/fuzz/hts_open_fuzzer.o \ - test/test-bcf-translate \ - test/test-parse-reg \ - test/test_introspection \ - test/test-bcf_set_variant_type - -BUILT_THRASH_PROGRAMS = \ - test/thrash_threads1 \ - test/thrash_threads2 \ - test/thrash_threads3 \ - test/thrash_threads4 \ - test/thrash_threads5 \ - test/thrash_threads6 \ - test/thrash_threads7 - -all: lib-static $(BUILT_PROGRAMS) plugins $(BUILT_TEST_PROGRAMS) \ - htslib_static.mk htslib-uninstalled.pc - -ALL_CPPFLAGS = -I. $(CPPFLAGS) - -# Usually htscodecs.mk is generated by running configure or config.status, -# but if those aren't used create a default here. -htscodecs.mk: - echo '# Default htscodecs.mk generated by Makefile' > $@ - echo 'include $$(HTSPREFIX)htscodecs_bundled.mk' >> $@ - $(srcdir)/hts_probe_cc.sh '$(CC)' '$(CFLAGS) $(CPPFLAGS)' '$(LDFLAGS)' >> $@ - -srcdir = . -srcprefix = -HTSPREFIX = - -# Flags for SIMD code -HTS_CFLAGS_AVX2 = -HTS_CFLAGS_AVX512 = -HTS_CFLAGS_SSE4 = - -# Control building of SIMD code. Not used if configure has been run. -HTS_BUILD_AVX2 = -HTS_BUILD_AVX512 = -HTS_BUILD_SSSE3 = -HTS_BUILD_POPCNT = -HTS_BUILD_SSE4_1 = - -include htslib_vars.mk -include htscodecs.mk - -# If not using GNU make, you need to copy the version number from version.sh -# into here. -PACKAGE_VERSION := $(shell $(srcdir)/version.sh) - -LIBHTS_SOVERSION = 3 - -# Version numbers for the Mac dynamic library. Note that the leading 3 -# is not strictly necessary and should be removed the next time -# LIBHTS_SOVERSION is bumped (see #1144 and -# https://developer.apple.com/library/archive/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html#//apple_ref/doc/uid/TP40002013-SW23) -MACH_O_COMPATIBILITY_VERSION = 3.1.18 -MACH_O_CURRENT_VERSION = 3.1.18 - -# $(NUMERIC_VERSION) is for items that must have a numeric X.Y.Z string -# even if this is a dirty or untagged Git working tree. -NUMERIC_VERSION := $(shell $(srcdir)/version.sh numeric) - -# Force version.h to be remade if $(PACKAGE_VERSION) has changed. -version.h: $(if $(wildcard version.h),$(if $(findstring "$(PACKAGE_VERSION)",$(shell cat version.h)),,force)) - -version.h: - echo '#define HTS_VERSION_TEXT "$(PACKAGE_VERSION)"' > $@ - -print-version: - @echo $(PACKAGE_VERSION) - -show-version: - @echo PACKAGE_VERSION = $(PACKAGE_VERSION) - @echo NUMERIC_VERSION = $(NUMERIC_VERSION) - -config_vars.h: override escape=$(subst ',\x27,$(subst ",\",$(subst \,\\,$(1)))) -config_vars.h: override hts_cc_escaped=$(call escape,$(CC)) -config_vars.h: override hts_cppflags_escaped=$(call escape,$(CPPFLAGS)) -config_vars.h: override hts_cflags_escaped=$(call escape,$(CFLAGS)) -config_vars.h: override hts_ldflags_escaped=$(call escape,$(LDFLAGS)) -config_vars.h: override hts_libs_escaped=$(call escape,$(LIBS)) - -config_vars.h: - printf '#define HTS_CC "%s"\n#define HTS_CPPFLAGS "%s"\n#define HTS_CFLAGS "%s"\n#define HTS_LDFLAGS "%s"\n#define HTS_LIBS "%s"\n' \ - '$(hts_cc_escaped)' \ - '$(hts_cppflags_escaped)' \ - '$(hts_cflags_escaped)' \ - '$(hts_ldflags_escaped)' \ - '$(hts_libs_escaped)' > $@ - -.SUFFIXES: .bundle .c .cygdll .dll .o .pico .so - -.c.o: - $(CC) $(CFLAGS) $(TARGET_CFLAGS) $(ALL_CPPFLAGS) -c -o $@ $< - -.c.pico: - $(CC) $(CFLAGS) $(TARGET_CFLAGS) $(ALL_CPPFLAGS) $(EXTRA_CFLAGS_PIC) -c -o $@ $< - - -LIBHTS_OBJS = \ - kfunc.o \ - kstring.o \ - bcf_sr_sort.o \ - bgzf.o \ - errmod.o \ - faidx.o \ - header.o \ - hfile.o \ - hts.o \ - hts_expr.o \ - hts_os.o\ - md5.o \ - multipart.o \ - probaln.o \ - realn.o \ - regidx.o \ - region.o \ - sam.o \ - sam_mods.o \ - synced_bcf_reader.o \ - vcf_sweep.o \ - tbx.o \ - textutils.o \ - thread_pool.o \ - vcf.o \ - vcfutils.o \ - cram/cram_codecs.o \ - cram/cram_decode.o \ - cram/cram_encode.o \ - cram/cram_external.o \ - cram/cram_index.o \ - cram/cram_io.o \ - cram/cram_stats.o \ - cram/mFILE.o \ - cram/open_trace_file.o \ - cram/pooled_alloc.o \ - cram/string_alloc.o \ - $(HTSCODECS_OBJS) \ - $(NONCONFIGURE_OBJS) - -# Without configure we wish to have a rich set of default figures, -# but we still need conditional inclusion as we wish to still -# support ./configure --disable-blah. -NONCONFIGURE_OBJS = hfile_libcurl.o - -PLUGIN_EXT = -PLUGIN_OBJS = - -cram_h = cram/cram.h $(cram_samtools_h) $(header_h) $(cram_structs_h) $(cram_io_h) cram/cram_encode.h cram/cram_decode.h cram/cram_stats.h cram/cram_codecs.h cram/cram_index.h $(htslib_cram_h) -cram_io_h = cram/cram_io.h $(cram_misc_h) -cram_misc_h = cram/misc.h -cram_os_h = cram/os.h $(htslib_hts_endian_h) -cram_samtools_h = cram/cram_samtools.h $(htslib_sam_h) -cram_structs_h = cram/cram_structs.h $(htslib_thread_pool_h) $(htslib_cram_h) cram/string_alloc.h cram/mFILE.h $(htslib_khash_h) -cram_open_trace_file_h = cram/open_trace_file.h cram/mFILE.h -bcf_sr_sort_h = bcf_sr_sort.h $(htslib_synced_bcf_reader_h) $(htslib_kbitset_h) -header_h = header.h cram/string_alloc.h cram/pooled_alloc.h $(htslib_khash_h) $(htslib_kstring_h) $(htslib_sam_h) -hfile_internal_h = hfile_internal.h $(htslib_hts_defs_h) $(htslib_hfile_h) $(textutils_internal_h) -hts_internal_h = hts_internal.h $(htslib_hts_h) $(textutils_internal_h) -hts_time_funcs_h = hts_time_funcs.h -sam_internal_h = sam_internal.h $(htslib_sam_h) -textutils_internal_h = textutils_internal.h $(htslib_kstring_h) -thread_pool_internal_h = thread_pool_internal.h $(htslib_thread_pool_h) - -# To be effective, config.mk needs to appear after most Makefile variables are -# set but before most rules appear, so that it can both use previously-set -# variables in its own rules' prerequisites and also update variables for use -# in later rules' prerequisites. - -# If your make doesn't accept -include, change this to 'include' if you are -# using the configure script or just comment the line out if you are not. --include config.mk - -# Usually config.h is generated by running configure or config.status, -# but if those aren't used create a default config.h here. -config.h: - echo '/* Default config.h generated by Makefile */' > $@ - echo '#ifndef _XOPEN_SOURCE' >> $@ - echo '#define _XOPEN_SOURCE 600' >> $@ - echo '#endif' >> $@ - echo '#define HAVE_LIBBZ2 1' >> $@ - echo '#define HAVE_LIBLZMA 1' >> $@ - echo '#ifndef __APPLE__' >> $@ - echo '#define HAVE_LZMA_H 1' >> $@ - echo '#endif' >> $@ - echo '#define HAVE_LIBCURL 1' >> $@ - if [ "x$(HTS_BUILD_POPCNT)" != "x" ] && \ - [ "x$(HTS_BUILD_SSE4_1)" != "x" ] && \ - [ "x$(HTS_BUILD_SSSE3)" != "x" ]; then \ - echo '#define HAVE_POPCNT 1' >> $@ ; \ - echo '#define HAVE_SSE4_1 1' >> $@ ; \ - echo '#define HAVE_SSSE3 1' >> $@ ; \ - echo '#if defined(HTS_ALLOW_UNALIGNED) && HTS_ALLOW_UNALIGNED == 0' >> $@ ; \ - echo '#define UBSAN 1' >> $@ ; \ - echo '#endif' >> $@ ; \ - fi - if [ "x$(HTS_BUILD_AVX2)" != "x" ] ; then \ - echo '#define HAVE_AVX2 1' >> $@ ; \ - fi - if [ "x$(HTS_BUILD_AVX512)" != "x" ] ; then \ - echo '#define HAVE_AVX512 1' >> $@ ; \ - fi - -# And similarly for htslib.pc.tmp ("pkg-config template"). No dependency -# on htslib.pc.in listed, as if that file is newer the usual way to regenerate -# this target is via configure or config.status rather than this rule. -htslib.pc.tmp: - sed -e '/^static_libs=/s/@static_LIBS@/$(htslib_default_libs)/;s#@[^-][^@]*@##g' $(srcprefix)htslib.pc.in > $@ - -# Create a makefile fragment listing the libraries and LDFLAGS needed for -# static linking. This can be included by projects that want to build -# and link against the htslib source tree instead of an installed library. -htslib_static.mk: htslib.pc.tmp - sed -n '/^static_libs=/s/[^=]*=/HTSLIB_static_LIBS = /p;/^static_ldflags=/s/[^=]*=/HTSLIB_static_LDFLAGS = /p' $< > $@ - - -lib-static: libhts.a - -# $(shell), :=, and ifeq/.../endif are GNU Make-specific. If you don't have -# GNU Make, comment out the parts of these conditionals that don't apply. -ifneq "$(origin PLATFORM)" "file" -PLATFORM := $(shell uname -s) -endif -ifeq "$(PLATFORM)" "Darwin" -SHLIB_FLAVOUR = dylib -lib-shared: libhts.dylib -else ifeq "$(findstring CYGWIN,$(PLATFORM))" "CYGWIN" -SHLIB_FLAVOUR = cygdll -lib-shared: cyghts-$(LIBHTS_SOVERSION).dll -else ifeq "$(findstring MSYS,$(PLATFORM))" "MSYS" -SHLIB_FLAVOUR = dll -lib-shared: hts-$(LIBHTS_SOVERSION).dll hts-$(LIBHTS_SOVERSION).def hts-$(LIBHTS_SOVERSION).lib -else ifeq "$(findstring MINGW,$(PLATFORM))" "MINGW" -SHLIB_FLAVOUR = dll -lib-shared: hts-$(LIBHTS_SOVERSION).dll hts-$(LIBHTS_SOVERSION).def hts-$(LIBHTS_SOVERSION).lib -else -SHLIB_FLAVOUR = so -lib-shared: libhts.so -endif - -BUILT_PLUGINS = $(PLUGIN_OBJS:.o=$(PLUGIN_EXT)) - -ifneq "$(BUILT_PLUGINS)" "" -plugins: lib-shared -endif -plugins: $(BUILT_PLUGINS) - - -libhts.a: $(LIBHTS_OBJS) - @-rm -f $@ - $(AR) -rc $@ $(LIBHTS_OBJS) - -$(RANLIB) $@ - -print-config: - @echo HTS_CFLAGS_AVX2 = $(HTS_CFLAGS_AVX2) - @echo HTS_CFLAGS_AVX512 = $(HTS_CFLAGS_AVX512) - @echo HTS_CFLAGS_SSE4 = $(HTS_CFLAGS_SSE4) - @echo HTS_HAVE_NEON = $(HTS_HAVE_NEON) - @echo LDFLAGS = $(LDFLAGS) - @echo LIBHTS_OBJS = $(LIBHTS_OBJS) - @echo LIBS = $(LIBS) - @echo PLATFORM = $(PLATFORM) - -# The target here is libhts.so, as that is the built file that other rules -# depend upon and that is used when -lhts appears in other program's recipes. -# As a byproduct invisible to make, libhts.so.NN is also created, as it is the -# file used at runtime (when $LD_LIBRARY_PATH includes the build directory). - -libhts.so: $(LIBHTS_OBJS:.o=.pico) - $(CC) -shared -Wl,-soname,libhts.so.$(LIBHTS_SOVERSION) $(VERSION_SCRIPT_LDFLAGS) $(LDFLAGS) -o $@ $(LIBHTS_OBJS:.o=.pico) $(LIBS) -lpthread - ln -sf $@ libhts.so.$(LIBHTS_SOVERSION) - -# Similarly this also creates libhts.NN.dylib as a byproduct, so that programs -# when run can find this uninstalled shared library (when $DYLD_LIBRARY_PATH -# includes this project's build directory). - -libhts.dylib: $(LIBHTS_OBJS) - $(CC) -dynamiclib -install_name $(libdir)/libhts.$(LIBHTS_SOVERSION).dylib -current_version $(MACH_O_CURRENT_VERSION) -compatibility_version $(MACH_O_COMPATIBILITY_VERSION) $(LDFLAGS) -o $@ $(LIBHTS_OBJS) $(LIBS) - ln -sf $@ libhts.$(LIBHTS_SOVERSION).dylib - -cyghts-$(LIBHTS_SOVERSION).dll libhts.dll.a: $(LIBHTS_OBJS) - $(CC) -shared -Wl,--out-implib=libhts.dll.a -Wl,--enable-auto-import $(LDFLAGS) -o $@ -Wl,--whole-archive $(LIBHTS_OBJS) -Wl,--no-whole-archive $(LIBS) -lpthread - -hts-$(LIBHTS_SOVERSION).dll hts.dll.a: $(LIBHTS_OBJS) - $(CC) -shared -Wl,--out-implib=hts.dll.a -Wl,--enable-auto-import -Wl,--exclude-all-symbols $(LDFLAGS) -o $@ -Wl,--whole-archive $(LIBHTS_OBJS) -Wl,--no-whole-archive $(LIBS) -lpthread - -hts-$(LIBHTS_SOVERSION).def: hts-$(LIBHTS_SOVERSION).dll - gendef hts-$(LIBHTS_SOVERSION).dll - -hts-$(LIBHTS_SOVERSION).lib: hts-$(LIBHTS_SOVERSION).def - dlltool -m i386:x86-64 -d hts-$(LIBHTS_SOVERSION).def -l hts-$(LIBHTS_SOVERSION).lib - -# Bundling libraries, binaries, dll dependencies, and licenses into a -# single directory. NB: This is not needed for end-users, but a test bed -# for maintainers building binary distributions. -# -# NOTE: only tested on the supported MSYS2/MINGW64 environment. -dist-windows: DESTDIR= -dist-windows: prefix=dist-windows -dist-windows: install - cp hts-$(LIBHTS_SOVERSION).def hts-$(LIBHTS_SOVERSION).lib dist-windows/lib - cp `ldd hts-$(LIBHTS_SOVERSION).dll| awk '/mingw64/ {print $$3}'` dist-windows/bin - mkdir -p dist-windows/share/licenses/htslib - -cp -r /mingw64/share/licenses/mingw-w64-libraries \ - /mingw64/share/licenses/brotli \ - /mingw64/share/licenses/bzip2 \ - /mingw64/share/licenses/gcc-libs \ - /mingw64/share/licenses/libdeflate \ - /mingw64/share/licenses/libpsl \ - /mingw64/share/licenses/libtre \ - /mingw64/share/licenses/libwinpthread \ - /mingw64/share/licenses/openssl \ - /mingw64/share/licenses/xz \ - /mingw64/share/licenses/zlib \ - /mingw64/share/licenses/zstd \ - dist-windows/share/licenses/ - -cp -r /usr/share/licenses/curl \ - dist-windows/share/licenses/ - cp LICENSE dist-windows/share/licenses/htslib/ - - -# Target to allow htslib.mk to build all the object files before it -# links the shared and static libraries. -hts-object-files: $(LIBHTS_OBJS) - touch $@ - -# On Unix dlopen("libhts.so.NN", RTLD_LAZY) may default to RTLD_LOCAL. -# Hence plugins need to link to (shared) libhts.so.NN themselves, as they -# may not be able to access libhts symbols via the main program's libhts -# if that was dynamically loaded without an explicit RTLD_GLOBAL. -%.so: %.pico libhts.so - $(CC) -shared -Wl,-E $(LDFLAGS) -o $@ $< libhts.so $(LIBS) -lpthread - -# For programs *statically* linked to libhts.a, on macOS loading a plugin -# linked to a shared libhts.NN.dylib would lead to conflicting duplicate -# symbols. Fortunately macOS dlopen() defaults to RTLD_GLOBAL so there -# is less need for plugins to link back to libhts themselves. -%.bundle: %.o - $(CC) -bundle -Wl,-undefined,dynamic_lookup $(LDFLAGS) -o $@ $< $(LIBS) - -%.cygdll: %.o libhts.dll.a - $(CC) -shared $(LDFLAGS) -o $@ $< libhts.dll.a $(LIBS) - -%.dll: %.o hts.dll.a - $(CC) -shared $(LDFLAGS) -o $@ $< hts.dll.a $(LIBS) - - -bgzf.o bgzf.pico: bgzf.c config.h $(htslib_hts_h) $(htslib_bgzf_h) $(htslib_hfile_h) $(htslib_thread_pool_h) $(htslib_hts_endian_h) cram/pooled_alloc.h $(hts_internal_h) $(htslib_khash_h) -errmod.o errmod.pico: errmod.c config.h $(htslib_hts_h) $(htslib_ksort_h) $(htslib_hts_os_h) -kstring.o kstring.pico: kstring.c config.h $(htslib_kstring_h) -header.o header.pico: header.c config.h $(textutils_internal_h) $(header_h) -hfile.o hfile.pico: hfile.c config.h $(htslib_hfile_h) $(hfile_internal_h) $(htslib_kstring_h) $(hts_internal_h) $(htslib_khash_h) -hfile_gcs.o hfile_gcs.pico: hfile_gcs.c config.h $(htslib_hts_h) $(htslib_kstring_h) $(hfile_internal_h) -hfile_libcurl.o hfile_libcurl.pico: hfile_libcurl.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_h) -hfile_s3_write.o hfile_s3_write.pico: hfile_s3_write.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_h) -hfile_s3.o hfile_s3.pico: hfile_s3.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(hts_time_funcs_h) -hts.o hts.pico: hts.c config.h os/lzma_stub.h $(htslib_hts_h) $(htslib_bgzf_h) $(cram_h) $(htslib_hfile_h) $(htslib_hts_endian_h) version.h config_vars.h $(hts_internal_h) $(hfile_internal_h) $(sam_internal_h) $(htslib_hts_expr_h) $(htslib_hts_os_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_ksort_h) $(htslib_tbx_h) $(htscodecs_htscodecs_h) -hts_expr.o hts_expr.pico: hts_expr.c config.h $(htslib_hts_expr_h) $(htslib_hts_log_h) $(textutils_internal_h) -hts_os.o hts_os.pico: hts_os.c config.h $(htslib_hts_defs_h) os/rand.c -vcf.o vcf.pico: vcf.c config.h $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_hfile_h) $(hts_internal_h) $(htslib_khash_str2int_h) $(htslib_kstring_h) $(htslib_sam_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_hts_endian_h) -sam.o sam.pico: sam.c config.h $(htslib_hts_defs_h) $(htslib_sam_h) $(htslib_bgzf_h) $(cram_h) $(hts_internal_h) $(sam_internal_h) $(htslib_hfile_h) $(htslib_hts_endian_h) $(htslib_hts_expr_h) $(header_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_kstring_h) -sam_mods.o sam_mods.pico: sam_mods.c config.h $(htslib_sam_h) $(textutils_internal_h) -tbx.o tbx.pico: tbx.c config.h $(htslib_tbx_h) $(htslib_bgzf_h) $(htslib_hts_endian_h) $(hts_internal_h) $(htslib_khash_h) -faidx.o faidx.pico: faidx.c config.h $(htslib_bgzf_h) $(htslib_faidx_h) $(htslib_hfile_h) $(htslib_khash_h) $(htslib_kstring_h) $(hts_internal_h) -bcf_sr_sort.o bcf_sr_sort.pico: bcf_sr_sort.c config.h $(bcf_sr_sort_h) $(htslib_khash_str2int_h) $(htslib_kbitset_h) -synced_bcf_reader.o synced_bcf_reader.pico: synced_bcf_reader.c config.h $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(htslib_bgzf_h) $(htslib_thread_pool_h) $(bcf_sr_sort_h) -vcf_sweep.o vcf_sweep.pico: vcf_sweep.c config.h $(htslib_vcf_sweep_h) $(htslib_bgzf_h) -vcfutils.o vcfutils.pico: vcfutils.c config.h $(htslib_vcfutils_h) $(htslib_kbitset_h) -kfunc.o kfunc.pico: kfunc.c config.h $(htslib_kfunc_h) -regidx.o regidx.pico: regidx.c config.h $(htslib_hts_h) $(htslib_kstring_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(htslib_regidx_h) $(hts_internal_h) -region.o region.pico: region.c config.h $(htslib_hts_h) $(htslib_khash_h) -md5.o md5.pico: md5.c config.h $(htslib_hts_h) $(htslib_hts_endian_h) -multipart.o multipart.pico: multipart.c config.h $(htslib_kstring_h) $(hts_internal_h) $(hfile_internal_h) -plugin.o plugin.pico: plugin.c config.h $(hts_internal_h) $(htslib_kstring_h) -probaln.o probaln.pico: probaln.c config.h $(htslib_hts_h) -realn.o realn.pico: realn.c config.h $(htslib_hts_h) $(htslib_sam_h) -textutils.o textutils.pico: textutils.c config.h $(htslib_hfile_h) $(htslib_kstring_h) $(htslib_sam_h) $(hts_internal_h) - -cram/cram_codecs.o cram/cram_codecs.pico: cram/cram_codecs.c config.h $(htslib_hts_endian_h) $(htscodecs_varint_h) $(htscodecs_pack_h) $(htscodecs_rle_h) $(cram_h) -cram/cram_decode.o cram/cram_decode.pico: cram/cram_decode.c config.h $(cram_h) $(cram_os_h) $(htslib_hts_h) -cram/cram_encode.o cram/cram_encode.pico: cram/cram_encode.c config.h $(cram_h) $(cram_os_h) $(sam_internal_h) $(htslib_hts_h) $(htslib_hts_endian_h) $(textutils_internal_h) -cram/cram_external.o cram/cram_external.pico: cram/cram_external.c config.h $(htscodecs_rANS_static4x16_h) $(htslib_hfile_h) $(cram_h) -cram/cram_index.o cram/cram_index.pico: cram/cram_index.c config.h $(htslib_bgzf_h) $(htslib_hfile_h) $(hts_internal_h) $(cram_h) $(cram_os_h) -cram/cram_io.o cram/cram_io.pico: cram/cram_io.c config.h os/lzma_stub.h $(cram_h) $(cram_os_h) $(htslib_hts_h) $(cram_open_trace_file_h) $(htscodecs_rANS_static_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_arith_dynamic_h) $(htscodecs_tokenise_name3_h) $(htscodecs_fqzcomp_qual_h) $(htscodecs_varint_h) $(htslib_hfile_h) $(htslib_bgzf_h) $(htslib_faidx_h) $(hts_internal_h) -cram/cram_stats.o cram/cram_stats.pico: cram/cram_stats.c config.h $(cram_h) $(cram_os_h) -cram/mFILE.o cram/mFILE.pico: cram/mFILE.c config.h $(htslib_hts_log_h) $(cram_os_h) cram/mFILE.h -cram/open_trace_file.o cram/open_trace_file.pico: cram/open_trace_file.c config.h $(cram_os_h) $(cram_open_trace_file_h) $(cram_misc_h) $(htslib_hfile_h) $(htslib_hts_log_h) $(htslib_hts_h) -cram/pooled_alloc.o cram/pooled_alloc.pico: cram/pooled_alloc.c config.h cram/pooled_alloc.h $(cram_misc_h) -cram/string_alloc.o cram/string_alloc.pico: cram/string_alloc.c config.h cram/string_alloc.h -thread_pool.o thread_pool.pico: thread_pool.c config.h $(thread_pool_internal_h) $(htslib_hts_log_h) - -htscodecs/htscodecs/arith_dynamic.o htscodecs/htscodecs/arith_dynamic.pico: htscodecs/htscodecs/arith_dynamic.c config.h $(htscodecs_arith_dynamic_h) $(htscodecs_varint_h) $(htscodecs_pack_h) $(htscodecs_utils_h) $(htscodecs_c_simple_model_h) -htscodecs/htscodecs/fqzcomp_qual.o htscodecs/htscodecs/fqzcomp_qual.pico: htscodecs/htscodecs/fqzcomp_qual.c config.h $(htscodecs_fqzcomp_qual_h) $(htscodecs_varint_h) $(htscodecs_utils_h) $(htscodecs_c_simple_model_h) -htscodecs/htscodecs/htscodecs.o htscodecs/htscodecs/htscodecs.pico: htscodecs/htscodecs/htscodecs.c $(htscodecs_htscodecs_h) $(htscodecs_version_h) -htscodecs/htscodecs/pack.o htscodecs/htscodecs/pack.pico: htscodecs/htscodecs/pack.c config.h $(htscodecs_pack_h) -htscodecs/htscodecs/rANS_static32x16pr.o htscodecs/htscodecs/rANS_static32x16pr.pico: htscodecs/htscodecs/rANS_static32x16pr.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/rANS_static32x16pr_avx2.o htscodecs/htscodecs/rANS_static32x16pr_avx2.pico: htscodecs/htscodecs/rANS_static32x16pr_avx2.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) $(htscodecs_permute_h) -htscodecs/htscodecs/rANS_static32x16pr_avx512.o htscodecs/htscodecs/rANS_static32x16pr_avx512.pico: htscodecs/htscodecs/rANS_static32x16pr_avx512.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/rANS_static32x16pr_neon.o htscodecs/htscodecs/rANS_static32x16pr_neon.pico: htscodecs/htscodecs/rANS_static32x16pr_neon.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/rANS_static32x16pr_sse4.o htscodecs/htscodecs/rANS_static32x16pr_sse4.pico: htscodecs/htscodecs/rANS_static32x16pr_sse4.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/rANS_static4x16pr.o htscodecs/htscodecs/rANS_static4x16pr.pico: htscodecs/htscodecs/rANS_static4x16pr.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_pack_h) $(htscodecs_rle_h) $(htscodecs_utils_h) $(htscodecs_rANS_static32x16pr_h) -htscodecs/htscodecs/rANS_static.o htscodecs/htscodecs/rANS_static.pico: htscodecs/htscodecs/rANS_static.c config.h $(htscodecs_rANS_byte_h) $(htscodecs_utils_h) $(htscodecs_rANS_static_h) -htscodecs/htscodecs/rle.o htscodecs/htscodecs/rle.pico: htscodecs/htscodecs/rle.c config.h $(htscodecs_varint_h) $(htscodecs_rle_h) -htscodecs/htscodecs/tokenise_name3.o htscodecs/htscodecs/tokenise_name3.pico: htscodecs/htscodecs/tokenise_name3.c config.h $(htscodecs_pooled_alloc_h) $(htscodecs_arith_dynamic_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_tokenise_name3_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/utils.o htscodecs/htscodecs/utils.pico: htscodecs/htscodecs/utils.c config.h $(htscodecs_utils_h) - -# Extra CFLAGS for specific files -htscodecs/htscodecs/rANS_static32x16pr_avx2.o htscodecs/htscodecs/rANS_static32x16pr_avx2.pico: TARGET_CFLAGS = $(HTS_CFLAGS_AVX2) -htscodecs/htscodecs/rANS_static32x16pr_avx512.o htscodecs/htscodecs/rANS_static32x16pr_avx512.pico: TARGET_CFLAGS = $(HTS_CFLAGS_AVX512) -htscodecs/htscodecs/rANS_static32x16pr_sse4.o htscodecs/htscodecs/rANS_static32x16pr_sse4.pico: TARGET_CFLAGS = $(HTS_CFLAGS_SSE4) - -bgzip: bgzip.o libhts.a - $(CC) $(LDFLAGS) -o $@ bgzip.o libhts.a $(LIBS) -lpthread - -htsfile: htsfile.o libhts.a - $(CC) $(LDFLAGS) -o $@ htsfile.o libhts.a $(LIBS) -lpthread - -tabix: tabix.o libhts.a - $(CC) $(LDFLAGS) -o $@ tabix.o libhts.a $(LIBS) -lpthread - -bgzip.o: bgzip.c config.h $(htslib_bgzf_h) $(htslib_hts_h) $(htslib_hfile_h) -htsfile.o: htsfile.c config.h $(htslib_hfile_h) $(htslib_hts_h) $(htslib_sam_h) $(htslib_vcf_h) -tabix.o: tabix.c config.h $(htslib_tbx_h) $(htslib_sam_h) $(htslib_vcf_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(htslib_hts_h) $(htslib_regidx_h) $(htslib_hts_defs_h) $(htslib_hts_log_h) - -# Runes to check that the htscodecs submodule is present -ifdef HTSCODECS_SOURCES -htscodecs/htscodecs/%.c: | htscodecs/htscodecs - @if test -e htscodecs/.git && test ! -e "$@" ; then \ - echo "Missing file '$@'" ; \ - echo " - Do you need to update the htscodecs submodule?" ; \ - false ; \ - fi - -htscodecs/htscodecs/%.h: | htscodecs/htscodecs - @if test -e htscodecs/.git && test ! -e "$@" ; then \ - echo "Missing file '$@'" ; \ - echo " - Do you need to update the htscodecs submodule?" ; \ - false ; \ - fi - -htscodecs/htscodecs: - @if test -e .git ; then \ - printf "\\n\\nError: htscodecs submodule files not present for htslib.\\n\ - Try running: \\n\ - git submodule update --init --recursive\\n\ - in the top-level htslib directory and then re-run make.\\n\\n\\n" ; \ - else \ - printf "\\n\\nError: htscodecs submodule files not present and this is not a git checkout.\\n\ - You have an incomplete distribution. Please try downloading one of the\\n\ - official releases from https://www.htslib.org/\\n" ; \ - fi - @false - -# Build the htscodecs/htscodecs/version.h file if necessary -htscodecs/htscodecs/version.h: force - @if test -e $(srcdir)/htscodecs/.git && test -e $(srcdir)/htscodecs/configure.ac ; then \ - vers=`cd $(srcdir)/htscodecs && git describe --always --dirty --match 'v[0-9]\.[0-9]*'` && \ - case "$$vers" in \ - v*) vers=$${vers#v} ;; \ - *) iv=`awk '/^AC_INIT/ { match($$0, /^AC_INIT\(htscodecs, *([0-9](\.[0-9])*)\)/, m); print substr($$0, m[1, "start"], m[1, "length"]) }' $(srcdir)/htscodecs/configure.ac` ; vers="$$iv$${vers:+-g$$vers}" ;; \ - esac ; \ - if ! grep -s -q '"'"$$vers"'"' $@ ; then \ - echo 'Updating $@ : #define HTSCODECS_VERSION_TEXT "'"$$vers"'"' ; \ - echo '#define HTSCODECS_VERSION_TEXT "'"$$vers"'"' > $@ ; \ - fi ; \ - fi -endif - -# Maintainer source code checks -# - copyright boilerplate presence -# - tab and trailing space detection -maintainer-check: - test/maintainer/check_copyright.pl . - test/maintainer/check_spaces.pl . - -# Look for untracked files in the git repository. -check-untracked: - @if test -e .git && git status --porcelain | grep '^\?'; then \ - echo 'Untracked files detected (see above). Please either clean up, add to .gitignore, or for test output files consider naming them to match *.tmp or *.tmp.*' ; \ - false ; \ - fi - -# Create a shorthand. We use $(SRC) or $(srcprefix) rather than $(srcdir)/ -# for brevity in test and install rules, and so that build logs do not have -# ./ sprinkled throughout. -SRC = $(srcprefix) - -# For tests that might use it, set $REF_PATH explicitly to use only reference -# areas within the test suite (or set it to ':' to use no reference areas). -# -# If using MSYS, avoid poor shell expansion via: -# MSYS2_ARG_CONV_EXCL="*" make check -check test: all $(HTSCODECS_TEST_TARGETS) - test/hts_endian - test/test_expr - test/test_kfunc - test/test_kstring - test/test_str2int - test/test_time_funcs - test/fieldarith test/fieldarith.sam - test/hfile - if test "x$(BUILT_PLUGINS)" != "x"; then \ - HTS_PATH=. test/with-shlib.sh test/plugins-dlhts -g ./libhts.$(SHLIB_FLAVOUR); \ - fi - if test "x$(BUILT_PLUGINS)" != "x"; then \ - HTS_PATH=. test/with-shlib.sh test/plugins-dlhts -l ./libhts.$(SHLIB_FLAVOUR); \ - fi - test/test_bgzf test/bgziptest.txt - test/test-parse-reg -t test/colons.bam - cd test/faidx && ./test-faidx.sh faidx.tst - cd test/sam_filter && ./filter.sh filter.tst - cd test/tabix && ./test-tabix.sh tabix.tst - cd test/mpileup && ./test-pileup.sh mpileup.tst - cd test/fastq && ./test-fastq.sh - cd test/base_mods && ./base-mods.sh base-mods.tst - REF_PATH=: test/sam test/ce.fa test/faidx/faidx.fa test/faidx/fastqs.fq - test/test-regidx - cd test && REF_PATH=: ./test.pl $${TEST_OPTS:-} - -test/hts_endian: test/hts_endian.o - $(CC) $(LDFLAGS) -o $@ test/hts_endian.o $(LIBS) - -test/fuzz/hts_open_fuzzer: test/fuzz/hts_open_fuzzer.o - $(CC) $(LDFLAGS) -o $@ test/fuzz/hts_open_fuzzer.o libhts.a $(LIBS) -lpthread - -test/fieldarith: test/fieldarith.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/fieldarith.o libhts.a $(LIBS) -lpthread - -test/hfile: test/hfile.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/hfile.o libhts.a $(LIBS) -lpthread - -test/pileup: test/pileup.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/pileup.o libhts.a $(LIBS) -lpthread - -test/pileup_mod: test/pileup_mod.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/pileup_mod.o libhts.a $(LIBS) -lpthread - -test/plugins-dlhts: test/plugins-dlhts.o - $(CC) $(LDFLAGS) -o $@ test/plugins-dlhts.o $(LIBS) - -test/sam: test/sam.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/sam.o libhts.a $(LIBS) -lpthread - -test/test_bgzf: test/test_bgzf.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_bgzf.o libhts.a -lz $(LIBS) -lpthread - -test/test_expr: test/test_expr.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_expr.o libhts.a -lz $(LIBS) -lpthread - -test/test_faidx: test/test_faidx.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_faidx.o libhts.a -lz $(LIBS) -lpthread - -test/test_kfunc: test/test_kfunc.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_kfunc.o libhts.a -lz $(LIBS) -lpthread - -test/test_kstring: test/test_kstring.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_kstring.o libhts.a -lz $(LIBS) -lpthread - -test/test_mod: test/test_mod.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_mod.o libhts.a $(LIBS) -lpthread - -test/test_realn: test/test_realn.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_realn.o libhts.a $(LIBS) -lpthread - -test/test-regidx: test/test-regidx.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-regidx.o libhts.a $(LIBS) -lpthread - -test/test-parse-reg: test/test-parse-reg.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-parse-reg.o libhts.a $(LIBS) -lpthread - -test/test_str2int: test/test_str2int.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_str2int.o libhts.a $(LIBS) -lpthread - -test/test_time_funcs: test/test_time_funcs.o - $(CC) $(LDFLAGS) -o $@ test/test_time_funcs.o - -test/test_view: test/test_view.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_view.o libhts.a $(LIBS) -lpthread - -test/test_index: test/test_index.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_index.o libhts.a $(LIBS) -lpthread - -test/test-vcf-api: test/test-vcf-api.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-vcf-api.o libhts.a $(LIBS) -lpthread - -test/test-vcf-sweep: test/test-vcf-sweep.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-vcf-sweep.o libhts.a $(LIBS) -lpthread - -test/test-bcf-sr: test/test-bcf-sr.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-bcf-sr.o libhts.a -lz $(LIBS) -lpthread - -test/test-bcf-translate: test/test-bcf-translate.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-bcf-translate.o libhts.a -lz $(LIBS) -lpthread - -test/test_introspection: test/test_introspection.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_introspection.o libhts.a $(LIBS) -lpthread - -test/test-bcf_set_variant_type: test/test-bcf_set_variant_type.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-bcf_set_variant_type.o libhts.a $(LIBS) -lpthread - -# Extra tests for bundled htscodecs -test_htscodecs_rans4x8: htscodecs/tests/rans4x8 - cd htscodecs/tests && srcdir=. && export srcdir && ./rans4x8.test - -test_htscodecs_rans4x16: htscodecs/tests/rans4x16pr - cd htscodecs/tests && srcdir=. && export srcdir && ./rans4x16.test - -test_htscodecs_arith: htscodecs/tests/arith_dynamic - cd htscodecs/tests && srcdir=. && export srcdir && ./arith.test - -test_htscodecs_tok3: htscodecs/tests/tokenise_name3 - cd htscodecs/tests && srcdir=. && export srcdir && ./tok3.test - -test_htscodecs_fqzcomp: htscodecs/tests/fqzcomp_qual - cd htscodecs/tests && srcdir=. && export srcdir && ./fqzcomp.test - -test_htscodecs_varint: htscodecs/tests/varint - cd htscodecs/tests && ./varint - -htscodecs/tests/arith_dynamic: htscodecs/tests/arith_dynamic_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/fqzcomp_qual: htscodecs/tests/fqzcomp_qual_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/rans4x16pr: htscodecs/tests/rANS_static4x16pr_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/rans4x8: htscodecs/tests/rANS_static_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/tokenise_name3: htscodecs/tests/tokenise_name3_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/varint: htscodecs/tests/varint_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/arith_dynamic_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/arith_dynamic_test.o: htscodecs/tests/arith_dynamic_test.c config.h $(htscodecs_arith_dynamic_h) -htscodecs/tests/fqzcomp_qual_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/fqzcomp_qual_test.o: htscodecs/tests/fqzcomp_qual_test.c config.h $(htscodecs_fqzcomp_qual_h) $(htscodecs_varint_h) -htscodecs/tests/rANS_static4x16pr_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/rANS_static4x16pr_test.o: htscodecs/tests/rANS_static4x16pr_test.c config.h $(htscodecs_rANS_static4x16_h) -htscodecs/tests/rANS_static_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/rANS_static_test.o: htscodecs/tests/rANS_static_test.c config.h $(htscodecs_rANS_static_h) -htscodecs/tests/tokenise_name3_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/tokenise_name3_test.o: htscodecs/tests/tokenise_name3_test.c config.h $(htscodecs_tokenise_name3_h) -htscodecs/tests/varint_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/varint_test.o: htscodecs/tests/varint_test.c config.h $(htscodecs_varint_h) - -test/hts_endian.o: test/hts_endian.c config.h $(htslib_hts_endian_h) -test/fuzz/hts_open_fuzzer.o: test/fuzz/hts_open_fuzzer.c config.h $(htslib_hfile_h) $(htslib_hts_h) $(htslib_sam_h) $(htslib_vcf_h) -test/fieldarith.o: test/fieldarith.c config.h $(htslib_sam_h) -test/hfile.o: test/hfile.c config.h $(htslib_hfile_h) $(htslib_hts_defs_h) $(htslib_kstring_h) -test/pileup.o: test/pileup.c config.h $(htslib_sam_h) $(htslib_kstring_h) -test/pileup_mod.o: test/pileup_mod.c config.h $(htslib_sam_h) -test/plugins-dlhts.o: test/plugins-dlhts.c config.h -test/sam.o: test/sam.c config.h $(htslib_hts_defs_h) $(htslib_sam_h) $(htslib_faidx_h) $(htslib_khash_h) $(htslib_hts_log_h) -test/test_bgzf.o: test/test_bgzf.c config.h $(htslib_bgzf_h) $(htslib_hfile_h) $(htslib_hts_log_h) $(hfile_internal_h) -test/test_expr.o: test/test_expr.c config.h $(htslib_hts_expr_h) -test/test_kfunc.o: test/test_kfunc.c config.h $(htslib_kfunc_h) -test/test_kstring.o: test/test_kstring.c config.h $(htslib_kstring_h) -test/test_mod.o: test/test_mod.c config.h $(htslib_sam_h) -test/test-parse-reg.o: test/test-parse-reg.c config.h $(htslib_hts_h) $(htslib_sam_h) -test/test_realn.o: test/test_realn.c config.h $(htslib_hts_h) $(htslib_sam_h) $(htslib_faidx_h) -test/test-regidx.o: test/test-regidx.c config.h $(htslib_kstring_h) $(htslib_regidx_h) $(htslib_hts_defs_h) $(textutils_internal_h) -test/test_str2int.o: test/test_str2int.c config.h $(textutils_internal_h) -test/test_time_funcs.o: test/test_time_funcs.c config.h $(hts_time_funcs_h) -test/test_view.o: test/test_view.c config.h $(cram_h) $(htslib_sam_h) $(htslib_vcf_h) $(htslib_hts_log_h) -test/test_faidx.o: test/test_faidx.c config.h $(htslib_faidx_h) -test/test_index.o: test/test_index.c config.h $(htslib_sam_h) $(htslib_vcf_h) -test/test-vcf-api.o: test/test-vcf-api.c config.h $(htslib_hts_h) $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_kseq_h) -test/test-vcf-sweep.o: test/test-vcf-sweep.c config.h $(htslib_vcf_sweep_h) -test/test-bcf-sr.o: test/test-bcf-sr.c config.h $(htslib_synced_bcf_reader_h) $(htslib_hts_h) $(htslib_vcf_h) -test/test-bcf-translate.o: test/test-bcf-translate.c config.h $(htslib_vcf_h) -test/test_introspection.o: test/test_introspection.c config.h $(htslib_hts_h) $(htslib_hfile_h) -test/test-bcf_set_variant_type.o: test/test-bcf_set_variant_type.c config.h $(htslib_hts_h) vcf.c - - -test/thrash_threads1: test/thrash_threads1.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads1.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads2: test/thrash_threads2.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads2.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads3: test/thrash_threads3.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads3.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads4: test/thrash_threads4.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads4.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads5: test/thrash_threads5.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads5.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads6: test/thrash_threads6.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads6.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads7: test/thrash_threads7.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads7.o libhts.a -lz $(LIBS) -lpthread - -test_thrash: $(BUILT_THRASH_PROGRAMS) - -# Test to ensure the functions in the header files are exported by the shared -# library. This currently works by comparing the output from ctags on -# the headers with the list of functions exported by the shared library. -# Note that functions marked as exported in the .c files and not the public -# headers will be missed by this test. -test-shlib-exports: header-exports.txt shlib-exports-$(SHLIB_FLAVOUR).txt - @echo "Checking shared library exports" - @if test ! -s header-exports.txt ; then echo "Error: header-exports.txt empty" ; false ; fi - @if test ! -s shlib-exports-$(SHLIB_FLAVOUR).txt ; then echo "Error: shlib-exports-$(SHLIB_FLAVOUR).txt empty" ; false ; fi - @! comm -23 header-exports.txt shlib-exports-$(SHLIB_FLAVOUR).txt | grep . || \ - ( echo "Error: Found unexported symbols (listed above)" ; false ) - -# Extract symbols that should be exported from public headers using ctags -# Filter out macros in htslib/hts_defs.h. -header-exports.txt: test/header_syms.pl htslib/*.h - test/header_syms.pl htslib/*.h | sort -u -o $@ - -shlib-exports-so.txt: libhts.so - nm -D -g libhts.so | awk '$$2 == "T" { sub("@.*", "", $$3); print $$3 }' | sort -u -o $@ - -shlib-exports-dylib.txt: libhts.dylib - nm -Ug libhts.dylib | awk '$$2 == "T" { sub("^_", "", $$3); print $$3 }' | sort -u -o $@ - -shlib-exports-dll.txt: hts.dll.a - nm -g hts.dll.a | awk '$$2 == "T" { print $$3 }' | sort -u -o $@ - -$(srcprefix)htslib.map: libhts.so - LC_ALL=C ; export LC_ALL; \ - curr_vers=`expr 'X$(PACKAGE_VERSION)' : 'X\([0-9]*\.[0-9.]*\)'` ; \ - last_vers=`awk '/^HTSLIB_[0-9](\.[0-9]+)+/ { lv = $$1 } END { print lv }' htslib.map` ; \ - if test "x$$curr_vers" = 'x' || test "x$$last_vers" = 'x' ; then \ - echo "Version check failed : $$curr_vers / $$las_vers" 1>&2 ; \ - exit 1 ; \ - fi && \ - if test "HTSLIB_$$curr_vers" = "$$last_vers" ; then \ - echo "Refusing to update $@ - HTSlib version not changed" 1>&2 ; \ - exit 1 ; \ - fi && \ - nm --with-symbol-versions -D -g libhts.so | awk '$$2 ~ /^[DGRT]$$/ && $$3 ~ /@@Base$$/ && $$3 !~ /^(_init|_fini|_edata)@@/ { sub(/@@Base$$/, ";", $$3); print " " $$3 }' > $@.tmp && \ - if [ -s $@.tmp ] ; then \ - cat $@ > $@.new.tmp && \ - printf '\n%s {\n' "HTSLIB_$$curr_vers" >> $@.new.tmp && \ - cat $@.tmp >> $@.new.tmp && \ - printf '} %s;\n' "$$last_vers" >> $@.new.tmp && \ - rm -f $@.tmp && \ - mv $@.new.tmp $@ ; \ - fi ; \ - else \ - rm -f $@.tmp ; \ - fi - -install: libhts.a $(BUILT_PROGRAMS) $(BUILT_PLUGINS) installdirs install-$(SHLIB_FLAVOUR) install-pkgconfig - $(INSTALL_PROGRAM) $(BUILT_PROGRAMS) $(DESTDIR)$(bindir) - if test -n "$(BUILT_PLUGINS)"; then $(INSTALL_PROGRAM) $(BUILT_PLUGINS) $(DESTDIR)$(plugindir); fi - $(INSTALL_DATA) $(SRC)htslib/*.h $(DESTDIR)$(includedir)/htslib - $(INSTALL_DATA) libhts.a $(DESTDIR)$(libdir)/libhts.a - $(INSTALL_MAN) $(SRC)bgzip.1 $(SRC)htsfile.1 $(SRC)tabix.1 $(DESTDIR)$(man1dir) - $(INSTALL_MAN) $(SRC)faidx.5 $(SRC)sam.5 $(SRC)vcf.5 $(DESTDIR)$(man5dir) - $(INSTALL_MAN) $(SRC)htslib-s3-plugin.7 $(DESTDIR)$(man7dir) - -installdirs: - $(INSTALL_DIR) $(DESTDIR)$(bindir) $(DESTDIR)$(includedir) $(DESTDIR)$(includedir)/htslib $(DESTDIR)$(libdir) $(DESTDIR)$(man1dir) $(DESTDIR)$(man5dir) $(DESTDIR)$(man7dir) $(DESTDIR)$(pkgconfigdir) - if test -n "$(plugindir)"; then $(INSTALL_DIR) $(DESTDIR)$(plugindir); fi - -# After installation, the real file in $(libdir) will be libhts.so.X.Y.Z, -# with symlinks libhts.so (used via -lhts during linking of client programs) -# and libhts.so.NN (used by client executables at runtime). - -install-so: libhts.so installdirs - $(INSTALL_LIB) libhts.so $(DESTDIR)$(libdir)/libhts.so.$(PACKAGE_VERSION) - ln -sf libhts.so.$(PACKAGE_VERSION) $(DESTDIR)$(libdir)/libhts.so - ln -sf libhts.so.$(PACKAGE_VERSION) $(DESTDIR)$(libdir)/libhts.so.$(LIBHTS_SOVERSION) - -install-cygdll: cyghts-$(LIBHTS_SOVERSION).dll installdirs - $(INSTALL_PROGRAM) cyghts-$(LIBHTS_SOVERSION).dll $(DESTDIR)$(bindir)/cyghts-$(LIBHTS_SOVERSION).dll - $(INSTALL_PROGRAM) libhts.dll.a $(DESTDIR)$(libdir)/libhts.dll.a - -install-dll: hts-$(LIBHTS_SOVERSION).dll installdirs - $(INSTALL_PROGRAM) hts-$(LIBHTS_SOVERSION).dll $(DESTDIR)$(bindir)/hts-$(LIBHTS_SOVERSION).dll - $(INSTALL_PROGRAM) hts.dll.a $(DESTDIR)$(libdir)/hts.dll.a - -install-dylib: libhts.dylib installdirs - $(INSTALL_PROGRAM) libhts.dylib $(DESTDIR)$(libdir)/libhts.$(PACKAGE_VERSION).dylib - ln -sf libhts.$(PACKAGE_VERSION).dylib $(DESTDIR)$(libdir)/libhts.dylib - ln -sf libhts.$(PACKAGE_VERSION).dylib $(DESTDIR)$(libdir)/libhts.$(LIBHTS_SOVERSION).dylib - -# Substitute these pseudo-autoconf variables only at install time -# so that "make install prefix=/prefix/path" etc continue to work. -install-pkgconfig: htslib.pc.tmp installdirs - sed -e 's#@-includedir@#$(includedir)#g;s#@-libdir@#$(libdir)#g;s#@-PACKAGE_VERSION@#$(PACKAGE_VERSION)#g' htslib.pc.tmp > $(DESTDIR)$(pkgconfigdir)/htslib.pc - chmod 644 $(DESTDIR)$(pkgconfigdir)/htslib.pc - -# A pkg-config file (suitable for copying to $PKG_CONFIG_PATH) that provides -# flags for building against the uninstalled library in this build directory. -htslib-uninstalled.pc: htslib.pc.tmp - sed -e 's#@-includedir@#'`pwd`'#g;s#@-libdir@#'`pwd`'#g' htslib.pc.tmp > $@ - - -testclean: - -rm -f test/*.tmp test/*.tmp.* test/faidx/*.tmp* test/faidx/FAIL* \ - test/longrefs/*.tmp.* test/tabix/*.tmp.* test/tabix/FAIL* \ - header-exports.txt shlib-exports-$(SHLIB_FLAVOUR).txt - -rm -rf htscodecs/tests/test.out - -# Only remove this in git checkouts -DEL_HTSCODECS_VERSION := $(if $(wildcard htscodecs/.git),htscodecs/htscodecs/version.h) - -mostlyclean: testclean - -rm -f *.o *.pico cram/*.o cram/*.pico test/*.o test/*.dSYM config_vars.h version.h - -rm -f htscodecs/htscodecs/*.o htscodecs/htscodecs/*.pico $(DEL_HTSCODECS_VERSION) - -rm -f hts-object-files - -rm -f htscodecs/tests/*.o - -clean: mostlyclean clean-$(SHLIB_FLAVOUR) - -rm -f libhts.a $(BUILT_PROGRAMS) $(BUILT_PLUGINS) $(BUILT_TEST_PROGRAMS) $(BUILT_THRASH_PROGRAMS) - -rm -f htscodecs/tests/rans4x8 htscodecs/tests/rans4x16pr htscodecs/tests/arith_dynamic htscodecs/tests/tokenise_name3 htscodecs/tests/fqzcomp_qual htscodecs/tests/varint - -distclean maintainer-clean: clean - -rm -f config.cache config.h config.log config.mk config.status - -rm -f TAGS *.pc.tmp *-uninstalled.pc htslib_static.mk htscodecs.mk - -rm -rf autom4te.cache - -clean-so: - -rm -f libhts.so libhts.so.* - -clean-cygdll: - -rm -f cyghts-*.dll libhts.dll.a - -clean-dll: - -rm -f hts-*.dll hts.dll.a - -clean-dylib: - -rm -f libhts.dylib libhts.*.dylib - - -tags TAGS: - ctags -f TAGS *.[ch] cram/*.[ch] htslib/*.h - -# We recommend libhts-using programs be built against a separate htslib -# installation. However if you feel that you must bundle htslib source -# code with your program, this hook enables Automake-style "make dist" -# for this subdirectory. If you do bundle an htslib snapshot, please -# add identifying information to $(PACKAGE_VERSION) as appropriate. -# (The wildcards attempt to omit non-exported files (.git*, README.md, -# etc) and other detritus that might be in the top-level directory.) -distdir: - @if [ -z "$(distdir)" ]; then echo "Please supply a distdir=DIR argument."; false; fi - tar -c *.[ch15] [ILMNRchtv]*[ELSbcekmnth] | (cd $(distdir) && tar -x) - +cd $(distdir) && $(MAKE) distclean - -force: - - -.PHONY: all check check-untracked clean distclean distdir force -.PHONY: install install-pkgconfig installdirs lib-shared lib-static -.PHONY: maintainer-check maintainer-clean mostlyclean plugins -.PHONY: print-config print-version show-version tags -.PHONY: test test-shlib-exports test_thrash testclean -.PHONY: clean-so install-so -.PHONY: clean-cygdll install-cygdll -.PHONY: clean-dll install-dll -.PHONY: clean-dylib install-dylib -.PHONY: test_htscodecs_rans4x8 test_htscodecs_rans4x16 test_htscodecs_arith -.PHONY: test_htscodecs_tok3 test_htscodecs_fqzcomp test_htscodecs_varint diff --git a/src/htslib-1.18/NEWS b/src/htslib-1.18/NEWS deleted file mode 100644 index 7ec7729..0000000 --- a/src/htslib-1.18/NEWS +++ /dev/null @@ -1,2150 +0,0 @@ -Noteworthy changes in release 1.18 (25th July 2023) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Updates -------- - -* Using CRAM 3.1 no longer gives a warning about the specification - being draft. Note CRAM 3.0 is still the default output format. - (PR#1583) - -* Replaced use of sprintf with snprintf, to silence potential warnings - from Apple's compilers and those who implement similar checks. - (PR#1594, fixes #1586. Reported by Oleksii Nikolaienko) - -* Fastq output will now generate empty records for reads with no - sequence data (i.e. sequence is "*" in SAM format). (PR#1576, - fixes samtools/samtools#1576. Reported by Nils Homer) - -* CRAM decoding speed-ups. (PR#1580) - -* A new MN aux tag can now be used to verify that MM/ML base modification - data has not been broken by hard clipping. (PR#1590, PR#1612. See also - PR samtools/hts-specs#714 and issue samtools/hts-specs#646. - Reported by Jared Simpson) - -* The base modification API has been improved to make it easier for callers - to tell unchecked bases from unmodified ones. (PR#1636, fixes #1550. - Requested by Chris Wright) - -* A new bam_mods_queryi() API has been added to return additional - data about the i-th base modification returned by bam_mods_recorded(). - (PR#1636, fixes #1550 and #1635. Requested by Jared Simpson) - -* Speed up index look-ups for whole-chromosome queries. (PR#1596) - -* Mpileup now merges adjacent (mis)match CIGAR operations, so CIGARs - using the X/= operators give the same results as if the M operator - was used. (PR#1607, fixes #1597. Reported by Marcel Martin) - -* It's now possible to call bcf_sr_set_regions() after adding readers - using bcf_sr_add_reader() (previously this returned an error). Doing so - will discard any unread data, and reset the readers so they iterate over - the new regions. (PR#1624, fixes samtools/bcftools#1918. Reported by - Gregg Thomas) - -* The synced BCF reader can now accept regions with reference names including - colons and hyphens, by enclosing them in curly braces. For example, - {chr_part:1-1001}:10-20 will return bases 10 to 20 from reference - "chr_part:1-1001". (PR#1630, fixes #1620. Reported by Bren) - -* Add a "samples" directory with code demonstrating usage of HTSlib plus - a tutorial document. (PR#1589) - -Build changes -------------- - -* Htscodecs has been updated to 1.5.1 (PR#1654) - -* Htscodecs SIMD code now works with Apple multiarch binaries. - (PR#1587, HTSlib fix for samtools/htscodecs#76. Reported by John Marshall) - -* Improve portability of "expr" usage in version.sh. - (PR#1593, fixes #1592. Reported by John Marshall) - -* Improve portability to *BSD targets by ensuring _XOPEN_SOURCE is defined - correctly and that source files properly include "config.h". Perl - scripts also now all use #!/usr/bin/env instead of assuming that - it's in /usr/bin/perl. (PR#1628, fixes #1606. - Reported by Robert Clausecker) - -* Fixed NAME entry in htslib-s3-plugin man page so the whatis and apropos - commands find it. (PR#1634, thanks to Étienne Mollier) - -* Assorted dependency tracking fixes. (PR#1653, thanks to John Marshall) - -Documentation updates ---------------------- - -* Changed Alpine build instructions as they've switched back to using openssl. - (PR#1609) - -* Recommend using -rdynamic when statically linking a libhts.a with - plugins enabled. (PR#1611, thanks to John Marshall. Fixes #1600, - reported by Jack Wimberley) - -* Fixed example in docs for sam_hdr_add_line(). (PR#1618, thanks to kojix2) - -* Improved test harness for base modifications API. (PR#1648) - -Bug fixes ---------- - -* Fix a major bug when searching against a CRAM index where one container - has start and end coordinates entirely contained within the previous - container. This would occasionally miss data, and sometimes return much - more than required. The bug affected versions 1.11 to 1.17, although the - change in 1.11 was bug-fixing multi-threaded index queries. This bug did - not affect index building. There is no need to reindex your CRAM files. - (PR#1574, PR#1640. Fixes #1569, #1639, samtools/samtools#1808, - samtools/samtools#1819. Reported by xuxif, Jens Reeder and Jared Simpson) - -* Prevent CRAM blocks from becoming too big in files with short - sequences but very long aux tags. (PR #1613) - -* Fix bug where the CRAM decoder for CONST_INT and CONST_BYTE - codecs may incorrectly look for extra data in the CORE block. - Note that this bug only affected the experimental CRAM v4.0 decoder. - (PR#1614) - -* Fix crypt4gh redirection so it works in conjunction with non-file - IO, such as using htsget. (PR#1577) - -* Improve error checking for the VCF POS column, when facing invalid - data. (PR#1575, replaces #1570 originally reported and fixed - by Colin Nolan.) - -* Improved error checking on VCF indexing to validate the data is BGZF - compressed. (PR#1581) - -* Fix bug where bin number calculation could overflow when making iterators - over regions that go to the end of a chromosome. (PR#1595) - -* Backport attractivechaos/klib#78 (by Pall Melsted) to HTSlib. - Prevents infinite loops in kseq_read() when reading broken gzip files. - (PR#1582, fixes #1579. Reported by Goran Vinterhalter) - -* Backport attractivechaos/klib@384277a (by innoink) to HTSlib. - Fixes the kh_int_hash_func2() macro definition. - (PR#1599, fixes #1598. Reported by fanxinping) - -* Remove a compilation warning on systems with newer libcurl releases. - (PR#1572) - -* Windows: Fixed BGZF EOF check for recent MinGW releases. (PR#1601, - fixes samtools/bcftools#1901) - -* Fixed bug where tabix would not return the correct regions for files - where the column ordering is end, ..., begin instead of begin, ..., end. - (PR#1626, fixes #1622. Reported by Hiruna Samarakoon) - -* sam_format_aux1() now always NUL-terminates Z/H tags. (PR#1631) - -* Ensure base modification iterator is reset when no MM tag is present. - (PR#1631, PR#1647) - -* Fix segfault when attempting to write an uncompressed BAM file opened using - hts_open(name, "wbu"). This was attempting to write BAM data without - wrapping it in BGZF blocks, which is invalid according to the BAM - specification. "wbu" is now internally converted to "wb0" to output - uncompressed data wrapped in BGZF blocks. (PR#1632, fixes #1617. - Reported by Joyjit Daw) - -* Fixed over-strict bounds check in probaln_glocal() which caused it to make - sub-optimal alignments when the requested band width was greater than the - query length. (PR#1616, fixes #1605. Reported by Jared Simpson) - -* Fixed possible double frees when handling errors in bcf_hdr_add_hrec(), - if particular memory allocations fail. (PR#1637) - -* Ensure that bcf_hdr_remove() clears up all pointers to the items removed - from dictionaries. Failing to do this could have resulted in a call - requesting a deleted item via bcf_hdr_get_hrec() returning a stale pointer. - (PR#1637) - -* Stop the gzip decompresser from finishing prematurely when an empty - gzip block is followed by more data. (PR#1643, PR#1646) - -Noteworthy changes in release 1.17 (21st February 2023) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -* A new API for iterating through a BAM record's aux field. - (PR#1354, addresses #1319. Thanks to John Marshall) - -* Text mode for bgzip. Allows bgzip to compress lines of text with block breaks - at newlines. - (PR#1493, thanks to Mike Lin for the initial version PR#1369) - -* Make tabix support CSI indices with large positions. Unlike SAM and VCF - files, BED files do not set a maximum reference length which hindered CSI - support. This change sets an arbitrary large size of 100G to enable it to - work. - (PR#1506) - -* Add a fai_line_length function. Exposes the internal line-wrap length. - (PR#1516) - -* Check for invalid barcode tags in fastq output. - (PR#1518, fixes samtools#1728. Reported by Poshi) - -* Warn if reference found in a CRAM file is not contained in the specified - reference file. - (PR#1517 and PR#1521, adds diagnostics for #1515. Reported by Wei WeiDeng) - -* Add a faidx_seq_len64 function that can return sequence lengths longer than - INT_MAX. At the same time limit faidx_seq_len to INT_MAX output. Also add a - fai_adjust_region to ensure given ranges do not go beyond the end of the - requested sequence. - (PR#1519) - -* Add a bcf_strerror function to give text descriptions of BCF errors. - (PR#1510) - -* Add CRAM SQ/M5 header checking when specifying a fasta file. This is to - prevent creating a CRAM that cannot be decoded again. - (PR#1522. In response to samtools#1748 though not a direct fix) - -* Improve support for very long input lines (> 2Gbyte). This is mostly useful - for tabix which does not do much interpretation of its input. - (PR#1542, a partial fix for #1539) - -* Speed up load_ref_portion. This function has been sped up by about 7x, which - speeds up low-depth CRAM decoding by about 10%. - (PR#1551) - -* Expand CRAM API to cope with new samtools cram_size command. - (PR#1546) - -* Merges neighbouring I and D ops into one op within pileup. This means - 4M1D1D1D3M is reported as 4M3D3M. Fixing this in sam.c means not only is - samtools mpileup now looking better, but any tool using the mpileup API will - be getting consistent results. - (PR#1552, fixes the last remaining part of samtools#139) - -* Update the API documentation for bgzf_mt as it refered to a previous - iteration. - (PR#1556, fixes #1553. Reported by Raghavendra Padmanabhan) - - -Build changes -------------- - -* Use POSIX grep in testing as egrep and fgrep are considered obsolete. - (PR#1509, thanks to David Seifert) - -* Switch to building libdefalte with cmake for Cirris CI. - (PR#1511) - -* Ensure strings in config_vars.h are escaped correctly. - (PR#1530, fixes #1527. Reported by Lucas Czech) - -* Easier modification of shared library permissions during install. - (PR#1532, fixes #1525. Reported by StephDC) - -* Fix build on ancient compilers. Added -std=gnu90 to build tests so older - C compilers will still be happy. - (PR#1524, fixes #1523. Reported by Martin Jakt) - -* Switch MacOS CI tests to an ARM-based image. - (PR#1536) - -* Cut down the number of embed_ref=2 tests that get run. - (PR#1537) - -* Add symbol versions to libhts.so. This is to aid package developers. - (PR#1560 addresses #1505, thanks to John Marshall. Reported by Stefan Bruens) - -* htscodecs now updated to v1.4.0. - (PR#1563) - -* Cleaned up misleading system error reports in test_bgzf. - (PR#1565) - -Bug fixes ---------- - -* VCF. Fix n-squared complexity in sample line with many adjacent tabs [fuzz]. - (PR#1503) - -* Improved bcftools detection and reporting of bgzf decode errors. - (PR#1504, thanks to Lilian Janin. PR#1529 thanks to Bergur Ragnarsson, fixes - #1528. PR#1554) - -* Prevent crash when the only FASTA entry has no sequence [fuzz]. - (PR#1507) - -* Fixed typo in sam.h documentation. - (PR#1512, thanks to kojix2) - -* Fix buffer read-overrun in bam_plp_insertion_mod. - (PR#1520) - -* Fix hash keys being left behind by bcf_hdr_remove. - (PR#1535, fixes #1533. Reported by Giulio Genovese in #842) - -* Make bcf_hdr_idinfo_exists more robust by checking id value exists. - (PR#1544, fixes #1538. Reported by Giulio Genovese) - -* CRAM improvements. Fixed crash with multi-threaded CRAM. Fixed a bug in the - codec parameter learning for CRAM 3.1 name tokeniser. Fixed Cram compression - container substitution matrix generation, - (PR#1558, PR#1559 and PR#1562) - -Noteworthy changes in release 1.16 (18th August 2022) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -* Make hfile_s3 refresh AWS credentials on expiry in order to make HTSlib work - better with AWS IAM credentials, which have a limited lifespan. - (PR#1462 and PR#1474, addresses #344) - -* Allow BAM headers between 2GB and 4GB in size once more. This is not - permitted in the BAM specification but was allowed in an earlier version of - HTSlib. There is now a warning at 2GB and a hard failure at 4GB. - (PR#1421, fixes #1420 and samtools#1613. Reported by John Marshall and - R C Mueller) - -* Improve error message when failing to load an index. - (PR#1468, example of the problem samtools#1637) - -* Permit MM (base modification) tags containing "." and "?" suffixes. These - define implicit vs explicit coordinates. See the SAM tags specification for - details. - (PR#1423 and PR#1426, fixes #1418. PR#1469, fixes #1466. Reported - by cjw85) - -* Warn if spaces instead of tabs are detected in a VCF file to prevent - confusion. - (PR#1328, fixes bcftools#1575. Reported by ketkijoshi278) - -* Add an "sclen" filter expression keyword. This is the length of a soft-clip, - both left and right end. It may be combined with qlen (qlen-sclen) to obtain - the number of bases in the query sequence that have been aligned to the genome - ie it provides a way to compare local-alignment vs global-alignment length. - (PR#1441 and PR/samtools#1661, fixes #1436. Requested by Chang Y) - -* Improve error messages for CRAM reference mismatches. If the user specifies - the wrong reference, the CRAM slice header MD5sum checks fail. We now report - the SQ line M5 string too so it is possible to validate against the whole - chr in the ref.fa file. The error message has also been improved to report - the reference name instead of #num. Finally, we now hint at the likely cause, - which counters the misleading samtools supplied error of "truncated or - corrupt" file. - (PR#1427, fixes samtools#1640. Reported by Jian-Guo Zhou) - -* Expose more of the CRAM API and add new functionality to extract the reference - from a CRAM file. - (PR#1429 and PR#1442) - -* Improvements to the implementation of embedded references in CRAM where no - external reference is specified. - (PR#1449, addresses some of the issues in #1445) - -* The CRAM writer now allows alignment records with RG:Z: aux tags that - don't have a corresponding @RG ID in the file header. Previously these - tags would have been silently dropped. HTSlib will complain whenever it - has to add one though, as such tags do not conform to recommended practice - for the SAM, BAM and CRAM formats. - (PR#1480, fixes #1479. Reported by Alex Leonard) - -* Set tab delimiter in man page for tabix GFF3 sort. - (PR#1457. Thanks to Colin Diesh) - -* When using libdeflate, the 1...9 scale of BGZF compression levels is - now remapped to the 1...12 range used by libdeflate instead of being - passed directly. In particular, HTSlib levels 8 and 9 now map to - libdeflate levels 10 and 12, so it is possible to select the highest (but - slowest) compression offered by libdeflate. - (PR#1488, fixes #1477. Reported by Gert Hulselmans) - -* The VCF variant API has been extended so that it can return separate flags - for INS and DEL variants as well as the existing INDEL one. These flags - have not been added to the old bcf_get_variant_types() interface as - it could break existing users. To access them, it is necessary to use new - functions bcf_has_variant_type() and bcf_has_variant_types(). - (PR#1467) - -* The missing, but trivial, `le_to_u8()` function has been added to hts_endian. - (PR#1494, Thanks to John Marshall) - -* bcf_format_gt() now works properly on big-endian platforms. - (PR#1495, Thanks to John Marshall) - -Build changes -------------- - -These are compiler, configuration and makefile based changes. - -* Update htscodecs to version 1.3.0 for new SIMD code + various fixes. - Updates the htscodecs submodule and adds changes necessary to make HTSlib - build the new SIMD codec implementations. - (PR#1438, PR#1489, PR#1500) - -* Fix clang builds under mingw. Under mingw, clang requires dllexport to be - applied to both function declarations and function definitions. - (PR#1435, PR#1497, PR#1498 fixes #1433. Reported by teepean) - -* Fix curl type warning with gcc 12.1 on Windows. - (PR#1443) - -* Detect ARM Neon support and only build appropriate SIMD object files. - (PR#1451, fixes #1450. Thanks to John Marshall) - -* `make print-config` now reports extra CFLAGS that are needed to build the - SIMD parts of htscodecs. These may be of use to third-party build - systems that don't use HTSlib's or htscodecs' build infrastructure. (PR#1485. - Thanks to John Marshall) - -* Fixed some Makefile dependency issues for the "check"/"test" targets - and plugins. In particular, "make check" will now build the "all" target, - if not done already, before running the tests. - (PR#1496) - -Bug fixes ---------- - -* Fix bug when reading position -1 in BCF (0 in VCF), which is used to indicate - telomeric regions. The BCF reader was incorrectly assuming the value stored - in the file was unsigned, so a VCF->BCF->VCF round-trip would change it - from 0 to 4294967296. - (PR#1476, fixes #1475 and bcftools#1753. Reported by Rodrigo Martin) - -* Various bugs and quirks have been fixed in the filter expression engine, - mostly related to the handling of absent tags, and the is_true flag. - Note that as a result of these fixes, some filter expressions may give - different results: - - Fixed and-expressions including aux tag values which could give an invalid - true result depending on the order of terms. - - The expression `![NM]` is now true if only `NM` does not exist. In - earlier versions it would also report true for tags like `NM:i:0` which - exist but have a value of zero. - - The expression `[X1] != 0` is now false when `X1` does not exist. Earlier - versions would return true for this comparison when the tag was missing. - - NULL values due to missing tags now propagate through string, bitwise - and mathematical operations. Logical operations always treat them as - false. - (PR#1463, fixes samtools#1670. Reported by Gert Hulselmans; - PR#1478, fixes samtools#1677. Reported by johnsonzcode) - -* Fix buffer overrun in bam_plp_insertion_mod. Memory now grows to the proper - size needed for base modification data. - (PR#1430, fixes samtools#1652. Reported by hd2326) - -* Remove limit of returned size from fai_retrieve(). - (PR#1446, fixes samtools#1660. Reported by Shane McCarthy) - -* Cap hts_getline() return value at INT_MAX. Prevents hts_getline() from - returning a negative number (a fail) for very long string length values. - (PR#1448. Thanks to John Marshall) - -* Fix breakend detection and test bcf_set_variant_type(). - (PR#1456, fixes #1455. Thanks to Martin Pollard) - -* Prevent arrays of BCF_BT_NULL values found in BCF files from causing - bcf_fmt_array() to call exit() as the type is unsupported. These are - now tested for and caught by bcf_record_check(), which returns an - error code instead. (PR#1486) - -* Improved detection of fasta and fastq files that have very long comments - following identifiers. (PR#1491, thanks to John Marshall. - Fixes samtools/samtools#1689, reported by cjw85) - -* Fixed a SEGV triggered by giving a SAM file to `samtools import`. - (PR#1492) - -Noteworthy changes in release 1.15.1 (7th April 2022) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -* Security fix: Fixed broken error reporting in the sam_prob_realn() - function, due to a missing hts_log() parameter. Prior to this fix - (i.e., in HTSlib versions 1.8 to 1.15) it was possible to abuse - the log message format string by passing a specially crafted - alignment record to this function. (PR#1406) - -* HTSlib now uses libhtscodecs release 1.2.2. This fixes a number - of bugs where invalid compressed data could trigger usage of - uninitialised values. (PR#1416) - -* Fixed excessive memory used by multi-threaded SAM output on - long reads. (Part of PR#1384) - -* Fixed a bug where tabix would misinterpret region specifiers - starting at position 0. It will also now warn if the file - being indexed is supposed to be 1-based but has positions - less than or equal to 0. (PR#1411) - -* The VCF header parser will now issue a warning if it finds an - INFO header with Type=Flag but Number not equal to 0. It will - also ignore the incorrect Number so the flag can be used. (PR#1415) - -Noteworthy changes in release 1.15 (21st February 2022) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Features and Updates --------------------- - -* Bgzip now has a --keep option to not remove the input file after - compressing. (PR#1331) - -* Improved file format detection so some BED files are no longer - detected as FASTQ or FASTA. (PR#1350, thanks to John Marshall) - -* Added xz (lzma), zstd and D4 formats to the file type detection - functions. We don't actively support reading these data types, but - function calls and htsfile can detect them. (PR#1340, thanks to - John Marshall) - -* CRAM now also uses libdeflate for read-names if the libdeflate - version is new enough (1.9 onwards). Previously we used zlib for - this due to poor performance of libdeflate. This gives a slight - speed up and reduction in file size. (PR#1383) - -* The VCF and BCF readers will now issue a warning if contig, INFO - or FORMAT IDs do not match the formats described in the VCFv4.3 - specification. Note that while the invalid names will mostly still - be accepted, future updates will convert the warnings to errors - causing files including invalid names to be rejected. (PR#1389) - -Build changes -------------- - -These are compiler, configuration and makefile based changes. - -* HTSlib now uses libhtscodecs release 1.2.1. - -* Improved support for compiling and linking against HTSlib with - Microsoft Visual Studio. (PR#1380, #1377, #1375. Thanks to - Aidan Bickford and John Marshall) - -* Various internal CI improvements. - -Bug fixes ---------- - -* Fixed CRAM index queries for HTSJDK output (PR#1388, reported by - Chris Norman). Note this also fixes writing CRAM writing, to match - the specification (and HTSJDK), from version 3.1 onwards. - -* Fixed CRAM index queries when required-fields settings are selected - to ignore CIGARs (PR#1372, reported by Giulio Genovese). - -* Unmapped but placed (having chr/pos) are now included in the BAM - indices. (PR#1352, thanks to John Marshall) - -* CRAM now honours the filename##idx##index nomenclature for - specifying non-standard index locations. (PR#1360, reported by - Michael Cariaso) - -* Minor CRAM v1.0 read-group fix (PR#1349, thanks to John Marshall) - -* Permit .fa and .fq file type detection as synonyms for FASTA and - FASTQ. (PR#1386). - -* Empty VCF format fields are now output ":.:" as instead of "::". - (PR#1370) - -* Repeated bcf_sr_seek calls now work. (PR#1363, reported by - Giulio Genovese) - -* Bcf_remove_allele_set now works on unpacked BCF records. (PR#1358, - reported by Brent Pedersen). - -* The hts_parse_decimal() function used to read numbers in region lists - is now better at rejecting non-numeric values. In particular it - now rejects a lone 'G' instead of interpreting it as '0G', i.e. zero. - (PR#1396, PR#1400, reported by SSSimon Yang; thanks to John Marshall). - -* Improve support for GPU issues listed by -Wdouble-promotion. - (PR#1365, reported by David Seisert) - -* Fix example code in header file documentation. (PR#1381, Thanks to - Aidan Bickford) - -Noteworthy changes in release 1.14 (22nd October 2021) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Features and Updates --------------------- - -* Added a keep option to bgzip to leave the original file untouched. This - brings bgzip into line with gzip. (PR #1331, thanks to Alex Petty) - -* "endpos" has been added to the filter language, giving the position - of the rightmost mapped base as measured by the CIGAR string. For - unmapped reads it is the same as "pos". (PR #1307, thanks to John Marshall) - -* Interfaces have been added to interpret the new base modification tags - added to the SAMtags document in samtools/hts-specs#418. (PR #1132) - -* New API functions hts_flush()/sam_flush()/bcf_flush() for flushing output - htsFile/samFile/vcfFile streams. (PR #1326, thanks to John Marshall) - -* The synced_bcf_reader now sorts lines with symbolic alleles by END tag as - well as POS. (PR #1321) - -* Added synced_bcf_reader options BCF_SR_REGIONS_OVERLAP and - BCF_SR_TARGETS_OVERLAP for better control of records that start outside - the desired region but overlap it are handled. Fixes samtools/bcftools#1420 - and samtools/bcftools#1421 raised by John Marshall. (PR #1327) - -* HTSlib will now accept long-cigar CG:B: tags made by htsjdk which don't - quite follow the specification properly (using signed values instead of - unsigned). Thanks to Colin Diesh for reporting an example file. (PR #1317) - -* The warning printed when the BGZF reader finds a file with no EOF block - has been changed to be less alarming. Unfortunately some third-party - BGZF encoders don't write EOF blocks at the end of files. Thanks to - Keiran Raine for reporting an example file. (PR #1323) - -* The FASTA and FASTQ readers get an option to skip over the first item on - the header line, and use the second as the read name. It allows the original - name to be restored on some of the fastq files served from the European - Nucleotide Archive (ENA). (PR #1325) - -* HTSlib is now more strict when parsing the VCF samples line (beginning - #CHROM). It will only accept tabs between the mandatory field names and - sample names must be separated with tabs. (PR #1328) - -* HTSlib will now warn if it looks like the header has been corrupted - by diagnostic messages from the program that made it. This can happen when - using `nohup`, which by default mixes stdout and stderr into the same - stream. (PR#1339, thanks to John Marshall) - -* File format detection will now recognise signatures for XZ, Zstd and D4 - files (note that HTSlib will not read them yet). (PR #1340, thanks to - John Marshall) - -Build changes -------------- - -These are compiler, configuration and makefile based changes. - -* Some redundant tests have been removed from the test harness, speeding it up. - (PR #1308) - -* The version.sh script now works better on shallow checkouts. (PR #1324) - -* A check-untracked Makefile target has been added to catch untracked files - (mostly) left by the test harness. (PR #1324) - -Bug fixes ---------- - -* Fixed a case where flushing the thread pool could very occasionally cause - a deadlock. (PR #1309) - -* Fixed a bug where some CRAM files could fail to decode if the required_fields - option was in use. Thanks to Matt Sexton for reporting the issue. - (PR #1314, fixes samtools/samtools#1475) - -* Fixed a regression where the S3 plugin could not read public files unless - you supplied some Amazon credentials. Thanks to Chris Saunders for reporting. - (PR #1332, fixes samtools/samtools#1491) - -* Fixed a possible CRAM thread deadlock discovered by @ryancaicse. - (PR #1330, fixes #1329) - -* Some set-but-unused variables have been removed. (PR #1334) - -* Fixed a bug which prevented "flag.read2" from working in the filter - language unless it was at the end of the expression. Thanks to Vamsi Kodali - for reporting the issue. (PR #1342) - -* Fixed a memory leak that could happen if CRAM fails to inflate a LZMA - block. (PR #1340, thanks to John Marshall) - -Noteworthy changes in release 1.13 (7th July 2021) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Features and Updates --------------------- - -* In case a PG header line has multiple ID tags supplied by other applications, - the header API now selects the first one encountered as the identifying tag - and issues a warning when detecting subsequent ID tags. - (#1256; fixed samtools/samtools#1393) - -* VCF header reading function (vcf_hdr_read) no longer tries to download a - remote index file by default. - (#1266; fixes #380) - -* Support reading and writing FASTQ format in the same way as SAM, BAM or CRAM. - Records read from a FASTQ file will be treated as unmapped data. - (#1156) - -* Added GCP requester pays bucket access. Thanks to @indraniel. - (#1255) - -* Made mpileup's overlap removal choose which copy to remove at random instead - of always removing the second one. This avoids strand bias in experiments - where the +ve and -ve strand reads always appear in the same order. - (#1273; fixes samtools/bcftools#1459) - -* It is now possible to use platform specific BAQ parameters. This also - selects long-read parameters for read lengths bigger than 1kb, which helps - bcftools mpileup call SNPs on PacBio CCS reads. - (#1275) - -* Improved bcf_remove_allele_set. This fixes a bug that stopped iteration over - alleles prematurely, marks removed alleles as 'missing' and does automatic - lazy unpacking. - (#1288; fixes #1259) - -* Improved compression metrics for unsorted CRAM files. This improves the - choice of codecs when handling unsorted data. - (#1291) - -* Linear index entries for empty intervals are now initialised with the file - offset in the next non-empty interval instead of the previous one. This - may reduce the amount of data iterators have to discard before reaching - the desired region, when the starting location is in a sequence gap. - Thanks to @carsonh for reporting the issue. - (#1286; fixes #486) - -* A new hts_bin_level API function has been added, to compute the level of a - given bin in the binning index. - (#1286) - -* Related to the above, a new API method, hts_idx_nseq, now returns the total - number of contigs from an index. - (#1295 and #1299) - -* Added bracket handling to bcf_hdr_parse_line, for use with ##META lines. - Thanks to Alberto Casas Ortiz. - (#1240) - -Build changes -------------- - -These are compiler, configuration and makefile based changes. - -* HTSlib now uses libhtscodecs release 1.1.1. - -* Added a curl/curl.h check to configure and improved INSTALL documentation on - build options. Thanks to Melanie Kirsche and John Marshall. - (#1265; fixes #1261) - -* Some fixes to address GCC 11.1 warnings. - (#1280, #1284, #1285; fixes #1283) - -* Supports building HTSlib in a separate directory. Thanks to John Marshall. - (#1277; fixes #231) - -* Supports building HTSlib on MinGW 32-bit environments. Thanks to - John Marshall. - (#1301) - -Bug fixes ---------- - -* Fixed hts_itr_query() et al region queries: fixed bug introduced in - HTSlib 1.12, which led to iterators producing very few reads for some - queries (especially for larger target regions) when unmapped reads were - present. HTSlib 1.11 had a related problem in which iterators would omit - a few unmapped reads that should have been produced; cf #1142. - Thanks to Daniel Cooke for reporting the issue. - (#1281; fixes #1279) - -* Removed compressBound assertions on opening bgzf files. Thanks to - Gurt Hulselmans for reporting the issue. - (#1258; fixed #1257) - -* Duplicate sample name error message for a VCF file now only displays the - duplicated name rather the entire same name list. - (#1262; fixes samtools/bcftools#1451) - -* Fix to make samtools cat work on CRAMs again. - (#1276; fixes samtools/samtools#1420) - -* Fix for a double memory free in SAM header creation. Thanks to @ihsineme. - (#1274) - -* Prevent assert in bcf_sr_set_regions. Thanks to Dr K D Murray. - (#1270) - -* Fixed crash in knet_open() etc stubs. Thanks to John Marshall. - (#1289) - -* Fixed filter expression "cigar" on unmapped reads. Stop treating an empty - CIGAR string as an error. Thanks to Chang Y for reporting the issue. - (#1298, fixes samtools/samtools#1445) - -* Bug fixes in the bundled copy of htscodecs: - - - Fixed an uninitialized access in the name tokeniser decoder. - (samtools/htscodecs#23) - - - Fixed a bug with name tokeniser and variable number of names per slice, - causing it to incorrectly report an error on certain valid inputs. - (samtools/htscodecs#24) - - -Noteworthy changes in release 1.12 (17th March 2021) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Features and Updates --------------------- - -* Added experimental CRAM 3.1 and 4.0 support. (#929) - - These should not be used for long term data storage as the - specification still needs to be ratified by GA4GH and may be subject - to changes in format. (This is highly likely for 4.0). However it - may be tested using: - - test/test_view -t ref.fa -C -o version=3.1 in.bam -p out31.cram - - For smaller but slower files, try varying the compression profile - with an additional "-o small". Profile choices are fast, normal, - small and archive, and can be applied to all CRAM versions. - -* Added a general filtering syntax for alignment records in SAM/BAM/CRAM - readers. (#1181, #1203) - - An example to find chromosome spanning read-pairs with high mapping - quality: 'mqual >= 30 && mrname != rname' - - To find significant sized deletions: - 'cigar =~ "[0-9]{2}D"' or 'rlen - qlen > 10'. - - To report duplicates that aren't part of a "proper pair": - 'flag.dup && !flag.proper_pair' - - More details are in the samtools.1 man page under "FILTER EXPRESSIONS". - -* The knet networking code has been removed. It only supported the http - and ftp protocols, and a better and safer alternative using libcurl - has been available since release 1.3. If you need access to ftp:// and - http:// URLs, HTSlib should be built with libcurl support. (#1200) - -* The old htslib/knetfile.h interfaces have been marked as deprecated. Any - code still using them should be updated to use hFILE instead. (#1200) - -* Added an introspection API for checking some of the capabilities provided - by HTSlib. (#1170) Thanks also to John Marshall for contributions. (#1222) - - `hfile_list_schemes`: returns the number of schemes found - - `hfile_list_plugins`: returns the number of plugins found - - `hfile_has_plugin`: checks if a specific plugin is available - - `hts_features`: returns a bit mask with all available features - - `hts_test_feature`: test if a feature is available - - `hts_feature_string`: return a string summary of enabled features - -* Made performance improvements to `probaln_glocal` method, which - speeds up mpileup BAQ calculations. (#1188) - - Caching of reused loop variables and removal of loop invariants - - Code reordering to remove instruction latency. - - Other refactoring and tidyups. - -* Added a public method for constructing a BAM record from the - component pieces. Thanks to Anders Kaplan. (#1159, #1164) - -* Added two public methods, `sam_parse_cigar` and `bam_parse_cigar`, as part of - a small CIGAR API (#1169, #1182). Thanks to Daniel Cameron for input. (#1147) - -* HTSlib, and the included htsfile program, will now recognise the old - RAZF compressed file format. Note that while the format is detected, - HTSlib is unable to read it. It is recommended that RAZF files are - uncompressed with `gunzip` before using them with HTSlib. Thanks to - John Marshall (#1244); and Matthew J. Oldach who reported problems - with uncompressing some RAZF files (samtools/samtools#1387). - -* The S3 plugin now has options to force the address style. It will recognise - the addressing_style and host_bucket entries in the respective aws - .credentials and s3cmd .s3cfg files. There is also a new HTS_S3_ADDRESS_STYLE - environment variable. Details are in the htslib-s3-plugin.7 man file (#1249). - -Build changes -------------- - -These are compiler, configuration and makefile based changes. - -* Added new Makefile targets for the applications that embed HTSlib and - want to run its test suite or clean its generated artefacts. (#1230, #1238) - -* The CRAM codecs are now obtained via the htscodecs submodule, hence - when cloning it is now best to use "git clone --recursive". In an - existing clone, you may use "git submodule update --init" to obtain - the htscodecs submodule checkout. - -* Updated CI test configuration to recurse HTSlib submodules. (#1359) - -* Added Cirrus-CI integration as a replacement for Travis, which was - phased out. (#1175; #1212) - -* Updated the Windows image used by Appveyor to 'Visual Studio 2019'. (#1172; - fixed #1166) - -* Fixed a buglet in configure.ac, exposed by the release 2.70 of autoconf. - Thanks to John Marshall. (#1198) - -* Fixed plugin linking on macOS, to prevent symbol conflict when linking - with a static HTSlib. Thanks to John Marshall. (#1184) - -* Fixed a clang++9 error in `cram_io.h`. Thanks to Pjotr Prins. (#1190) - -* Introduced $(ALL_CPPFLAGS) to allow for more flexibility in setting the - compiler flags. Thanks to John Marshall. (#1187) - -* Added 'fall through' comments to prevent warnings issued by Clang on - intentional fall through case statements, when building with - `-Wextra flag`. Thanks to John Marshall. (#1163) - -* Non-configure builds now define _XOPEN_SOURCE=600 to allow them to work - when the `gcc -std=c99` option is used. Thanks to John Marshall. (#1246) - -Bug fixes ---------- - -* Fixed VCF `#CHROM` header parsing to only separate columns at tab characters. - Thanks to Sam Morris for reporting the issue. - (#1237; fixed samtools/bcftools#1408) - -* Fixed a crash reported in `bcf_sr_sort_set`, which expects REF to be present. - (#1204; fixed samtools/bcftools#1361) - -* Fixed a bcf synced reader bug when filtering with a region list, and - the first record for a chromosome had the same position as the last - record for the previous chromosome. (#1254; fixed samtools/bcftools#1441) - -* Fixed a bug in the overlapping logic of mpileup, dealing with iterating over - CIGAR segments. Thanks to `@wulj2` for the analysis. (#1202; fixed #1196) - -* Fixed a tabix bug that prevented setting the correct number of lines to be - skipped in a region file. Thanks to Jim Robinson for reporting it. (#1189; - fixed #1186) - -* Made `bam_itr_next` an alias for `sam_itr_next`, to prevent it from crashing - when working with htsFile pointers. Thanks to Torbjörn Klatt for - reporting it. (#1180; fixed #1179) - -* Fixed once per outgoing multi-threaded block `bgzf_idx_flush` assertion, to - accommodate situations when a single record could span multiple blocks. - Thanks to `@lacek`. (#1168; fixed samtools/samtools#1328) - -* Fixed assumption of pthread_t being a non-structure, as permitted by POSIX. - Thanks also to John Marshall and Anders Kaplan. (#1167, #1153, #1153) - -* Fixed the minimum offset of a BAI index bin, to account for unmapped reads. - Thanks to John Marshall for spotting the issue. (#1158; fixed #1142) - -* Fixed the CRLF handling in `sam_parse_worker` method. Thanks to - Anders Kaplan. (#1149; fixed #1148) - -* Included unistd.h and errno.h directly in HTSlib files, as opposed to - including them indirectly, via third party code. Thanks to - Andrew Patterson (#1143) and John Marshall (#1145). - - -Noteworthy changes in release 1.11 (22nd September 2020) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Features and Updates --------------------- - -* Support added for remote reference files. fai_path() can take a remote - reference file and will return the corresponding index file. Remote indexes - can be handled by refs_load_fai(). UR tags in @SQ lines can now be set to - remote URIs. (#1017) - -* Added tabix --separate-regions option, which adds header comment lines - separating different regions' output records when multiple target regions - are supplied on the command line. (#1108) - -* Added tabix --cache option to set a BGZF block cache size. Most beneficial - when the -R option is used and the same blocks need to be re-read multiple - times. (#1053) - -* Improved error checking in tabix and added a --verbosity option so - it is possible to change the amount of logging when it runs. (#1040) - -* A note about the maximum chromosome length usable with TBI indexes has been - added to the tabix manual page. Thanks to John Marshall. (#1070) - -* New method vcf_open_mode() changes the opening mode of a variant file - based on its file extension. Similar to sam_open_mode(). (#1096) - -* The VCF parser has been made faster and easier to maintain. (#1057) - -* bcf_record_check() has been made faster, giving a 15% speed increase when - reading an uncompressed BCF file. (#1130) - -* The VCF parser now recognises the "" symbolic allele produced - by GATK. (#1045) - -* Support has been added for simultaneous reading of unindexed VCF/BCF files - when using the synced_bcf_reader interface. Input files must have the - chromosomes in the same order as each other and be consistent with the order - of sequences in the header. (#1089) - -* The VCF and BCF readers will now attempt to fix up invalid INFO/END tags - where the stored END value is less than POS, resulting in an apparently - negative record length. Such files have been generated by programs which - used END incorrectly, and by broken lift-over processes that failed to - update any END tags present. (#1021; fixed samtools/bcftools#1154) - -* The htsFile interface can now detect the crypt4gh encrypted format (see - https://samtools.github.io/hts-specs/crypt4gh.pdf). If HTSlib is - built with external plug-in support, and the hfile_crypt4gh plug-in is - present, the file will be passed to it for decryption. The plug-in - can be obtained from https://github.com/samtools/htslib-crypt4gh. (#1046) - -* hts_srand48() now seeds the same POSIX-standard sequences of pseudo-random - numbers regardless of platform, including on OpenBSD where plain srand48() - produces a different cryptographically-strong non-deterministic sequence. - Thanks to John Marshall. (#1002) - -* Iterators now work with 64 bit positions. (#1018) - -* Improved the speed of range queries when using BAI indexes by - making better use of the linear index data included in the file. - The best improvement is on low-coverage data. (#1031) - -* Alignments which consume no reference bases are now considered to have - length 1. This would make such alignments cover 1 reference position in - the same manner as alignments that are unmapped or have no CIGAR strings. - These alignments can now be returned by iterator-based queries. Thanks - to John Marshall. (#1063; fixed samtools/samtools#1240, see also - samtools/hts-specs#521). - -* A bam_set_seqi() function to modify a single base in the BAM structure - has been added. This is a companion function to bam_seqi(). (#1022) - -* Writing SAM format is around 30% faster. (#1035) - -* Added sam_format_aux1() which converts a BAM aux tag to a SAM format string. - (#1134) - -* bam_aux_update_str() no longer requires NUL-terminated strings. It - is also now possible to create tags containing part of a longer string. - (#1088) - -* It is now possible to use external plug-ins in language bindings that - dynamically load HTSlib. Note that a side-effect of this change is that - some plug-ins now link against libhts.so, which means that they have to be - able to find the shared library when they are started up. Thanks to - John Marshall. (#1072) - -* bgzf_close(), and therefore hts_close(), will now return non-zero when - closing a BGZF handle on which errors have been detected. (Part of #1117) - -* Added a special case to the kt_fisher_exact() test for when the table - probability is too small to be represented in a double. This fixes a - bug where it would, for some inputs, fail to correctly determine which - side of the distribution the table was on resulting in swapped p-values - being returned for the left- and right-tailed tests. The two-tailed - test value was not affected by this problem. (#1126) - -* Improved error diagnostics in the CRAM decoder (#1042), BGZF (#1049), - the VCF and BCF readers (#1059), and the SAM parser (#1073). - -* ks_resize() now allocates 1.5 times the requested size when it needs - to expand a kstring instead of rounding up to the next power of two. - This has been done mainly to make the inlined function smaller, but it - also reduces the overhead of storing data in kstrings at the expense of - possibly needing a few more reallocations. (#1129) - -CRAM improvements ------------------ - -* Delay CRAM crc32 checks until the data actually needs to be used. With - other changes this leads to a 20x speed up in indexing and other sub-query - based actions. (#988) - -* CRAM now handles the transition from mapped to unmapped data in a better - way, improving compression of the unmapped data. (#961) - -* CRAM can now use libdeflate. (#961) - -* Fixed bug in MD tag generation with "b" read feature codes, causing the - numbers in the tag to be too large. Note that HTSlib never uses this - feature code so it is unlikely that this bug would be seen on real data. - The problem was found when testing against hand-crafted CRAM files. (#1086) - -* Fixed a regression where the CRAM multi-region iterator became much less - efficient when using threads. It now works more like the single iterator - and does not preemptively decode the next container unless it will be used. - (#1061) - -* Set CRAM default quality in lossy quality modes. If lossy quality is enabled - and 'B', 'q' or 'Q' features are used, CRAM starts off with QUAL being all 255 - (as per BAM spec and "*" quality) and then modifies individual qualities as - dictated by the specific features. - - However that then produces ASCII quality " " (space, q=-1) for the unmodified - bases. Instead ASCII quality "?" (q=30) is used, as per HTSJDK. Quality 255 - is still used for sequences with no modifications at all. (#1094) - - -Build changes -------------- - -These are compiler, configuration and makefile based changes. - -* `make all` now also builds htslib_static.mk and htslib-uninstalled.pc. - Thanks to John Marshall. (#1011) - -* Various cppcheck-1.90 warnings have been fixed. (#995, #1011) - -* HTSlib now prefers its own headers when being compiled, fixing build - failures on machines that already had a system-installed HTSlib. Thanks to - John Marshall. (#1078; fixed #347) - -* Define HTSLIB_EXPORT without using a helper macro to reduce the length of - compiler diagnostics that mention exported functions. Thanks to - John Marshall. (#1029) - -* Fix dirty default build by including latest pkg.m4 instead of using - aclocal.m4. Thanks to Damien Zammit. (#1091) - -* Struct tags have been added to htslib/*.h public typedefs. This makes it - possible to forward declare htsFile without including htslib/hts.h. Thanks - to Lucas Czech and John Marshall. (#1115; fixed #1106) - -* Fixed compiler warnings emitted by the latest gcc and clang releases - when compiling HTSlib, along with some -Wextra warnings in the public - include files. Thanks to John Marshall. (#1066, #1063, #1083) - -Bug fixes ---------- - -* Fixed hfile_libcurl breakage when using libcurl 7.69.1 or later. Thanks to - John Marshall for tracking down the exact libcurl change that caused the - incompatibility. (#1105; fixed samtools/samtools#1254 and - samtools/samtools#1284) - -* Fixed overflows kroundup32() and kroundup_size_t() which caused them to - return zero when rounding up values where the most significant bit was - set. When this happens they now return the highest value that can - be stored (#1044). All of the kroundup macro definitions have also been - gathered together into a unified implementation (#1051). - -* Fixed missing return parameter value in idx_test_and_fetch(). Thanks to - Lilian Janin. (#1014) - -* Fixed crashes due to inconsistent selection between BGZF and plain (hFILE) - interfaces when reading files. [fuzz] (#1019) - -* Added and/or fixed byte swapping code for big-endian platforms. Thanks - to Jun Aruga, John Marshall, Michael R Crusoe and Gianfranco Costamagna - for their help. (#1023; fixed #119 and #355) - -* Fixed a problem with multi-threaded on-the-fly indexes which would - occasionally write virtual offsets pointing at the end of a BGZF block. - Attempting to read from such an offset caused EOF to be incorrectly - reported. These offsets are now handled correctly, and the indexer - has been updated to avoid generating them. (#1028; fixed - samtools/samtools#1197) - -* In sam_hdr_create(), free newly allocated SN strings when encountering an - error. [fuzz] (#1034) - -* Prevent double free in case of idx_test_and_fetch() failure. Thanks to - @fanwayne for the bug report. (#1047; fixed #1033) - -* In the header, link a new PG line only to valid chains. Prevents an - explosive growth of PG lines on headers where PG lines are already present - but not linked together correctly. (#1062; fixed samtools/samtools#1235) - -* Also in the header, when calling sam_hdr_update_line(), update target arrays - only when the name or length is changed. (#1007) - -* Fixed buffer overflows in CRAM MD5 calculation triggered by - files with invalid compression headers, or files with embedded - references that were one byte too short. [fuzz] (#1024, #1068) - -* Fix mpileup regression between 1.9 and 1.10 where overlap detection - was incorrectly skipped on reads where RNEXT, PNEXT and TLEN were - set to the "unavailable" values ("*", 0, 0 in SAM). (#1097) - -* kputs() now checks for null pointer in source string. [fuzz] (#1087) - -* Fix potential bcf_update_alleles() crash on 0 alleles. Thanks to - John Marshall. (#994) - -* Added bcf_unpack() calls to some bcf_update functions to fix a bug - where updates made after a call to bcf_dup() could be lost. (#1032; - fixed #1030) - -* Error message typo "Number=R" instead of "Number=G" fixed in - bcf_remove_allele_set(). Thanks to Ilya Vorontsov. (#1100) - -* Fixed crashes that could occur in BCF files that use IDX= header annotations - to create a sparse set of CHROM, FILTER or FORMAT indexes, and - include records that use one of the missing index values. [fuzz] (#1092) - -* Fixed potential integer overflows in the VCF parser and ensured that - the total length of FORMAT fields cannot go over 2Gbytes. [fuzz] (#1044, - #1104; latter is CVE-2020-36403 affecting all HTSlib versions up to 1.10.2) - -* Download index files atomically in idx_test_and_fetch(). This prevents - corruption when running parallel jobs on S3 files. Thanks to John Marshall. - (#1112; samtools/samtools#1242). - -* The pileup constructor callback is now given the copy of the bam1_t struct - made by pileup instead of the original one passed to bam_plp_push(). This - makes it the same as the one passed to the destructor and ensures that - cached data, for example the location of an aux tag, will remain valid. - (#1127) - -* Fixed possible error in code_sort() on negative CRAM Huffman code - length. (#1008) - -* Fixed possible undefined shift in cram_byte_array_stop_decode_init(). (#1009) - -* Fixed a bug where range queries to the end of a given reference - would return incorrect results on CRAM files. (#1016; - fixed samtools/samtools#1173) - -* Fixed an integer overflow in cram_read_slice(). [fuzz] (#1026) - -* Fixed a memory leak on failure in cram_decode_slice(). [fuzz] (#1054) - -* Fixed a regression which caused cram_transcode_rg() to fail, resulting - in a crash in "samtools cat" on CRAM files. (#1093; - fixed samtools/samtools#1276) - -* Fixed an undersized string reallocation in the threaded SAM reader which - caused it to crash when reading SAM files with very long lines. Numerous - memory allocation checks have also been added. (#1117) - - -Noteworthy changes in release 1.10.2 (19th December 2019) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is a release fix that corrects minor inconsistencies discovered in -previous deliverables. - - -Noteworthy changes in release 1.10.1 (17th December 2019) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The support for 64-bit coordinates in VCF brought problems for files -not conforming to VCF/BCF specification. While previous versions would -make out-of-range values silently overflow creating nonsense values -but parseable file, the version 1.10 would silently create an invalid BCF. - - -Noteworthy changes in release 1.10 (6th December 2019) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Brief summary -------------- - -There are many changes in this release, so the executive summary is: - -* Addition of support for references longer than 2Gb (NB: SAM and VCF - formats only, not their binary counterparts). This may need changes - in code using HTSlib. See README.large_positions.md for more information. - -* Added a SAM header API. - -* Major speed up to SAM reading and writing. This also now supports - multi-threading. - -* We can now auto-index on-the-fly while writing a file. This also - includes to bgzipped SAM.gz. - -* Overhaul of the S3 interface, which now supports version 4 - signatures. This also makes writing to S3 work. - -These also required some ABI changes. See below for full details. - - -Features / updates ------------------- - -* A new SAM/BAM/CRAM header API has been added to HTSlib, allowing header - data to be updated without having to parse or rewrite large parts of the - header text. See htslib/sam.h for function definitions and - documentation. (#812) - - The header typedef and several pre-existing functions have been renamed - to have a sam_hdr_ prefix: sam_hdr_t, sam_hdr_init(), sam_hdr_destroy(), - and sam_hdr_dup(). (The existing bam_hdr_-prefixed names are still - provided for compatibility with existing code.) (#887, thanks to - John Marshall) - -* Changes to hfile_s3, which provides support for the AWS S3 API. (#839) - - - hfile_s3 now uses version 4 signatures by default. Attempting to write to - an S3 bucket will also now work correctly. It is possible to force - version 2 signatures by creating environment variable HTS_S3_V2 (the exact - value does not matter, it just has to exist). Note that writing depends - on features that need version 4 signatures, so forcing version 2 will - disable writes. - - - hfile_s3 will automatically retry requests where the region endpoint - was not specified correctly, either by following the 301 redirect (when - using path-style requests) or reading the 400 response (when using - virtual-hosted style requests and version 4 signatures). The first - region to try can be set by using the AWS_DEFAULT_REGION environment - variable, by setting "region" in ".aws/credentials" or by setting - "bucket_location" in ".s3cfg". - - - hfile_s3 now percent-escapes the path component of s3:// URLs. For - backwards-compatibility it will ignore any paths that have already - been escaped (detected by looking for '%' followed by two hexadecimal - digits.) - - - New environment variables HTS_S3_V2, HTS_S3_HOST, HTS_S3_S3CFG - and HTS_S3_PART_SIZE to force version-2 signatures, control the - S3 server hostname, the configuration file and upload chunk - sizes respectively. - -* Numerous SAM format improvements. - - - Bgzipped SAM files can now be indexed and queried. The library now - recognises sam.gz as a format name to ease this usage. (#718, #916) - - - The SAM reader and writer now supports multi-threading via the - thread-pool. (#916) - - Note that the multi-threaded SAM reader does not currently support seek - operations. Trying to do this (for example with an iterator range request) - will result in the SAM readers dropping back to single-threaded mode. - - - Major speed up of SAM decoding and encoding, by around 2x. (#722) - - - SAM format can now handle 64-bit coordinates and references. This - has implications for the ABI too (see below). Note BAM and CRAM - currently cannot handle references longer than 2Gb, however given - the speed and threading improvements SAM.gz is a viable workaround. (#709) - -* We can now automatically build indices on-the-fly while writing - SAM, BAM, CRAM, VCF and BCF files. (Note for SAM and VCF this only - works when bgzipped.) (#718) - -* HTSlib now supports the @SQ-AN header field, which lists alternative names - for reference sequences. This means given "@SQ SN:1 AN:chr1", tools like - samtools can accept requests for "1" or "chr1" equivalently. (#931) - -* Zero-length files are no longer considered to be valid SAM files - (with no header and no alignments). This has been changed so that pipelines - such as `somecmd | samtools ...` with `somecmd` aborting before outputting - anything will now propagate the error to the second command. (#721, thanks - to John Marshall; #261 reported by Adrian Tan) - -* Added support for use of non-standard index names by pasting the - data filename and index filename with ##idx##. For example - "/path1/my_data.bam##idx##/path2/my_index.csi" will open bam file - "/path1/my_data.bam" and index file "/path2/my_index.csi". (#884) - - This affects hts_idx_load() and hts_open() functions. - -* Improved the region parsing code to handle colons in reference - names. Strings can be disambiguated by the use of braces, so for - example when reference sequences called "chr1" and "chr1:100-200" - are both present, the regions "{chr1}:100-200" and "{chr1:100-200}" - unambiguously indicate which reference is being used. (#708) - - A new function hts_parse_region() has been added along with - specialisations for sam_parse_region() and fai_parse_region(). - -* CRAM encoding now has additional checks for MD/NM validity. If - they are incorrect, it stores the (incorrect copy) verbatim so - round-trips "work". (#792) - -* Sped up decoding of CRAM by around 10% when the MD tag is being - generated. (#874) - -* CRAM REF_PATH now supports %Ns (where N is a single digit) - expansion in http URLs, similar to how it already supported this - for directories. (#791) - -* BGZF now permits indexing and seeking using virtual offsets in - completely uncompressed streams. (#904, thanks to Adam Novak) - -* bgzip now asks for extra confirmation before decompressing files - that don't have a known compression extension (e.g. .gz). This avoids - `bgzip -d foo.bam.bai` producing a foo.bam file that is very much not - a BAM-formatted file. (#927, thanks to John Marshall) - -* The htsfile utility can now copy files (including to/from URLs using - HTSlib's remote access facilities) with the --copy option, in - addition to its existing uses of identifying file formats and - displaying sequence or variant data. (#756, thanks to John Marshall) - -* Added tabix --min-shift option. (#752, thanks to Garrett Stevens) - -* Tabix now has an -D option to disable storing a local copy of a - remote index. (#870) - -* Improved support for MSYS Windows compiler environment. (#966) - -* External htslib plugins are now supported on Windows. (#966) - - -API additions and improvements ------------------------------- - -* New API functions bam_set_mempolicy() and bam_get_mempolicy() have - been added. These allow more control over the ownership of bam1_t - alignment record data; see documentation in htslib/sam.h for more - information. (#922) - -* Added more HTS_RESULT_USED checks, this time for VCF I/O. (#805) - -* khash can now hash kstrings. This makes it easier to hash - non-NUL-terminated strings. (#713) - -* New haddextension() filename extension API function. (#788, thanks to - John Marshall) - -* New hts_resize() macro, designed to replace uses of hts_expand() - and hts_expand0(). (#805) - -* Added way of cleaning up unused jobs in the thread pool via the new - hts_tpool_dispatch3() function. (#830) - -* New API functions hts_reglist_create() and sam_itr_regarray() are added - to create hts_reglist_t region lists from `chr:-` type region - specifiers. (#836) - -* Ksort has been improved to facilitate library use. See KSORT_INIT2 - (adds scope / namespace capabilities) and KSORT_INIT_STATIC interfaces. - (#851, thanks to John Marshall) - -* New kstring functions (#879): - KS_INITIALIZE - Initializer for structure assignment - ks_initialize() - Initializer for pointed-to kstrings - ks_expand() - Increase kstring capacity by a given amount - ks_clear() - Set kstring length to zero - ks_free() - Free the underlying buffer - ks_c_str() - Returns the kstring buffer as a const char *, - or an empty string if the length is zero. - -* New API functions hts_idx_load3(), sam_index_load3(), tbx_index_load3() - and bcf_index_load3() have been added. These allow control of whether - remote indexes should be cached locally, and allow the error message - printed when the index does not exist to be suppressed. (#870) - -* Improved hts_detect_format() so it no longer assumes all text is - SAM unless positively identified otherwise. It also makes a stab - at detecting bzip2 format and identifying BED, FASTA and FASTQ - files. (#721, thanks to John Marshall; #200, #719 both reported by - Torsten Seemann) - -* File format errors now set errno to EFTYPE (BSD, MacOS) when - available instead of ENOEXEC. (#721) - -* New API function bam_set_qname (#942) - -* In addition to the existing hts_version() function, which reflects the - HTSlib version being used at runtime, now also provides - HTS_VERSION, a preprocessor macro reflecting the HTSlib version that - a program is being compiled against. (#951, thanks to John Marshall; #794) - - -ABI changes ------------ - -This release contains a number of things which change the Application -Binary Interface (ABI). This means code compiled against an earlier -library will require recompiling. The shared library soversion has -been bumped. - -* On systems that support it, the default symbol visibility has been - changed to hidden and the only exported symbols are ones that form part - of the officially supported ABI. This is to make clear exactly which - symbols are considered parts of the library interface. It also - helps packagers who want to check compatibility between HTSlib versions. - (#946; see for example issues #311, #616, and #695) - -* HTSlib now supports 64 bit reference positions. This means several - structures, function parameters, and return values have been made bigger - to allow larger values to be stored. While most code that uses - HTSlib interfaces should still build after this change, some alterations - may be needed - notably to printf() formats where the values of structure - members are being printed. (#709) - - Due to file format limitations, large positions are only supported - when reading and writing SAM and VCF files. - - See README.large_positions.md for more information. - -* An extra field has been added to the kbitset_t struct so bitsets can - be made smaller (and later enlarged) without involving memory allocation. - (#710, thanks to John Marshall) - -* A new field has been added to the bam_pileup1_t structure to keep track - of which CIGAR operator is being processed. This is used by a new - bam_plp_insertion() function which can be used to return the sequence of - any inserted bases at a given pileup location. If the alignment includes - CIGAR P operators, the returned sequence will include pads. (#699) - -* The hts_itr_t and hts_itr_multi_t structures have been merged and can be - used interchangeably. Extra fields have been added to hts_itr_t to support - this. hts_itr_multi_t is now a typedef for hts_itr_t; sam_itr_multi_next() - is now an alias for sam_itr_next() and hts_itr_multi_destroy() is an alias - for hts_itr_destroy(). (#836) - -* An improved regidx interface has been added. To allow this, struct - reg_t has been removed, regitr_t has been modified and various new - API functions have been added to htslib/regidx.h. While parts of - the old regidx API have been retained for backwards compatibility, - it is recommended that all code using regidx should be changed to use - the new interface. (#761) - -* Elements in the hts_reglist_t structure have been reordered slightly - so that they pack together better. (#761) - -* bgzf_utell() and bgzf_useek() now use type off_t instead of long for - the offset. This allows them to work correctly on files longer than - 2G bytes on Windows and 32-bit Linux. (#868) - -* A number of functions that used to return void now return int so that - they can report problems like memory allocation failures. Callers - should take care to check the return values from these functions. (#834) - - The affected functions are: - ksort.h: ks_introsort(), ks_mergesort() - sam.h: bam_mplp_init_overlaps() - synced_bcf_reader.h: bcf_sr_regions_flush() - vcf.h: bcf_format_gt(), bcf_fmt_array(), - bcf_enc_int1(), bcf_enc_size(), - bcf_enc_vchar(), bcf_enc_vfloat(), bcf_enc_vint(), - bcf_hdr_set_version(), bcf_hrec_format() - vcfutils.h: bcf_remove_alleles() - -* bcf_set_variant_type() now outputs VCF_OVERLAP for spanning - deletions (ALT=*). (#726) - -* A new field (hrecs) has been added to the bam_hdr_t structure for - use by the new header API. The old sdict field is now not used and - marked as deprecated. The l_text field has been changed from uint32_t - to size_t, to allow for very large headers in SAM files. The text - and l_text fields have been left for backwards compatibility, but - should not be accessed directly in code that uses the new header API. - To access the header text, the new functions sam_hdr_length() and - sam_hdr_str() should be used instead. (#812) - -* The old cigar_tab field is now marked as deprecated; use the new - bam_cigar_table[] instead. (#891, thanks to John Marshall) - -* The bam1_core_t structure's l_qname and l_extranul fields have been - rearranged and enlarged; l_qname still includes the extra NULs. - (Almost all code should use bam_get_qname(), bam_get_cigar(), etc, - and has no need to use these fields directly.) HTSlib now supports - the SAM specification's full 254 QNAME length again. (#900, thanks - to John Marshall; #520) - -* bcf_index_load() no longer tries the '.tbi' suffix when looking for - BCF index files (.tbi indexes are for text files, not binary BCF). (#870) - -* htsFile has a new 'state' member to support SAM multi-threading. (#916) - -* A new field has been added to the bam1_t structure, and others - have been rearranged to remove structure holes. (#709; #922) - - -Bug fixes ---------- - -* Several BGZF format fixes: - - - Support for multi-member gzip files. (#744, thanks to Adam Novak; #742) - - - Fixed error handling code for native gzip formatted files. (64c4927) - - - CRCs checked when threading too (previously only when non-threaded). (#745) - - - Made bgzf_useek function work with threads. (#818) - - - Fixed rare threading deadlocks. (#831) - - - Reading of very short files (<28 bytes) that do not contain an EOF block. - (#910) - -* Fixed some thread pool deadlocks caused by race conditions. (#746, #906) - -* Many additional memory allocation checks in VCF, BCF, SAM and CRAM - code. This also changes the return type of some functions. See ABI - changes above. (#920 amongst others) - -* Replace some sam parsing abort() calls with proper errors. - (#721, thanks to John Marshall; #576) - -* Fixed to permit SAM read names of length 252 to 254 (the maximum - specified by the SAM specification). (#900, thanks to John Marshall) - -* Fixed mpileup overlap detection heuristic to work with BAMs having - long CIGARs (more than 65536 operations). (#802) - -* Security fix: CIGAR strings starting with the "N" operation can no - longer cause underflow on the bam CIGAR structure. Similarly CIGAR - strings that are entirely "D" ops could leak the contents of - uninitialised variables. (#699) - -* Fixed bug where alignments starting 0M could cause an invalid - memory access in sam_prob_realn(). (#699) - -* Fixed out of bounds memory access in mpileup when given a reference - with binary characters (top-bit set). (#808, thanks to John Marshall) - -* Fixed crash in mpileup overlap_push() function. (#882; #852 reported - by Pierre Lindenbaum) - -* Fixed various potential CRAM memory leaks when recovering from - error cases. - -* Fixed CRAM index queries for unmapped reads (#911; samtools/samtools#958 - reported by @acorvelo) - -* Fixed the combination of CRAM embedded references and multiple - slices per container. This was incorrectly setting the header - MD5sum. (No impact on default CRAM behaviour.) (b2552fd) - -* Removed unwanted explicit data flushing in CRAM writing, which on - some OSes caused major slowdowns. (#883) - -* Fixed inefficiencies in CRAM encoding when many small references - occur within the middle of large chromosomes. Previously it - switched into multi-ref mode, but not back out of it which caused - the read POS field to be stored poorly. (#896) - -* Fixed CRAM handling of references when the order of sequences in a - supplied fasta file differs to the order of the @SQ headers. (#935) - -* Fixed BAM and CRAM multi-threaded decoding when used in conjunction - with the multi-region iterator. (#830; #577, #822, #926 all reported by - Brent Pedersen) - -* Removed some unaligned memory accesses in CRAM encoder and - undefined behaviour in BCF reading (#867, thanks to David Seifert) - -* Repeated calling of bcf_empty() no longer crashes. (#741) - -* Fixed bug where some 8 or 16-bit negative integers were stored using values - reserved by the BCF specification. These numbers are now promoted to the - next size up, so -121 to -128 are stored using at least 16 bits, and -32761 - to -32768 are stored using 32 bits. - - Note that while BCF files affected by this bug are technically incorrect, - it is still possible to read them. When converting to VCF format, - HTSlib (and therefore bcftools) will interpret the values as intended - and write out the correct negative numbers. (#766, thanks to John Marshall; - samtools/bcftools#874) - -* Allow repeated invocations of bcf_update_info() and bcf_update_format_*() - functions. (#856, thanks to John Marshall; #813 reported by Steffen Möller) - -* Memory leak removed in knetfile's kftp_parse_url() function. (#759, thanks - to David Alexander) - -* Fixed various crashes found by libfuzzer (invalid data leading to - errors), mostly but not exclusively in CRAM, VCF and BCF decoding. (#805) - -* Improved robustness of BAI and CSI index creation and loading. (#870; #967) - -* Prevent (invalid) creation of TBI indices for BCF files. - (#837; samtools/bcftools#707) - -* Better parsing of handling of remote URLs with ?param=val - components and their interaction with remote index URLs. (#790; #784 - reported by Mark Ebbert) - -* hts_idx_load() now checks locally for all possible index names before - attempting to download a remote index. It also checks that the remote - file it downloads is actually an index before trying to save and use - it. (#870; samtools/samtools#1045 reported by Albert Vilella) - -* hts_open_format() now honours the compression field, no longer also - requiring an explicit "z" in the mode string. Also fixed a 1 byte - buffer overrun. (#880) - -* Removed duplicate hts_tpool_process_flush prototype. (#816, reported by - James S Blachly) - -* Deleted defunct cram_tell declaration. (66c41e2; #915 reported by - Martin Morgan) - -* Fixed overly aggressive filename suffix checking in bgzip. (#927, thanks to - John Marshall; #129, reported by @hguturu) - -* Tabix and bgzip --help output now goes to standard output. (#754, thanks to - John Marshall) - -* Fixed bgzip index creation when using multiple threads. (#817) - -* Made bgzip -b option honour -I (index filename). (#817) - -* Bgzip -d no longer attempts to unlink(NULL) when decompressing stdin. (#718) - - -Miscellaneous other changes ---------------------------- - -* Integration with Google OSS fuzzing for automatic detection of - more bugs. (Thanks to Google for their assistance and the bugs it - has found.) (#796, thanks to Markus Kusano) - -* aclocal.m4 now has the pkg-config macros. (6ec3b94d; #733 reported by - Thomas Hickman) - -* Improved C++ compatibility of some header files. (#772; #771 reported - by @cwrussell) - -* Improved strict C99 compatibility. (#860, thanks to John Marshall) - -* Travis and AppVeyor improvements to aid testing. (#747; #773 thanks to - Lennard Berger; #781; #809; #804; #860; #909) - -* Various minor compiler warnings fixed. (#708; #765; #846, #860, thanks to - John Marshall; #865; #966; #973) - -* Various new and improved error messages. - -* Documentation updates (mostly in the header files). - -* Even more testing with "make check". - -* Corrected many copyright dates. (#979) - -* The default non-configure Makefile now uses libcurl instead of - knet, so it can support https. (#895) - - - - - - -Noteworthy changes in release 1.9 (18th July 2018) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -* If `./configure` fails, `make` will stop working until either configure - is re-run successfully, or `make distclean` is used. This makes - configuration failures more obvious. (#711, thanks to John Marshall) - -* The default SAM version has been changed to 1.6. This is in line with the - latest version specification and indicates that HTSlib supports the - CG tag used to store long CIGAR data in BAM format. - -* bgzip integrity check option '--test' (#682, thanks to @sd4B75bJ, @jrayner) - -* Faidx can now index fastq files as well as fasta. The fastq index adds - an extra column to the `.fai` index which gives the offset to the quality - values. New interfaces have been added to `htslib/faidx.h` to read the - fastq index and retrieve the quality values. It is possible to open - a fastq index as if fasta (only sequences will be returned), but not - the other way round. (#701) - -* New API interfaces to add or update integer, float and array aux tags. (#694) - -* Add `level=` option to `hts_set_opt()` to allow the compression - level to be set. Setting `level=0` enables uncompressed output. (#715) - -* Improved bgzip error reporting. - -* Better error reporting when CRAM reference files can't be opened. (#706) - -* Fixes to make tests work properly on Windows/MinGW - mainly to handle - line ending differences. (#716) - -* Efficiency improvements: - - - Small speed-up for CRAM indexing. - - - Reduce the number of unnecessary wake-ups in the thread pool. (#703) - - - Avoid some memory copies when writing data, notably for uncompressed - BGZF output. (#703) - -* Bug fixes: - - - Fix multi-region iterator bugs on CRAM files. (#684) - - - Fixed multi-region iterator bug that caused some reads to be skipped - incorrectly when reading BAM files. (#687) - - - Fixed synced_bcf_reader() bug when reading contigs multiple times. (#691, - reported by @freeseek) - - - Fixed bug where bcf_hdr_set_samples() did not update the sample dictionary - when removing samples. (#692, reported by @freeseek) - - - Fixed bug where the VCF record ref length was calculated incorrectly - if an INFO END tag was present. (71b00a) - - - Fixed warnings found when compiling with gcc 8.1.0. (#700) - - - sam_hdr_read() and sam_hdr_write() will now return an error code - if passed a NULL file pointer, instead of crashing. - - - Fixed possible negative array look-up in sam_parse1() that somehow escaped - previous fuzz testing. (CVE-2018-13845, #731, reported by @fCorleone) - - - Fixed bug where cram range queries could incorrectly report an error - when using multiple threads. (#734, reported by Brent Pedersen) - - - Fixed very rare rANS normalisation bug that could cause an assertion - failure when writing CRAM files. (#739, reported by @carsonhh) - -Noteworthy changes in release 1.8 (3rd April 2018) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -* The URL to get sequences from the EBI reference server has been changed - to https://. This is because the EBI no longer serve sequences via - plain HTTP - requests to the http:// endpoint just get redirected. - HTSlib needs to be linked against libcurl to download https:// URLs, - so CRAM users who want to get references from the EBI will need to - run configure and ensure libcurl support is enabled using the - --enable-libcurl option. - -* Added libdeflate as a build option for alternative faster compression and - decompression. Results vary by CPU but compression should be twice as fast - and decompression faster. - -* It is now possible to set the compression level in bgzip. (#675; thanks - to Nathan Weeks). - -* bgzip now gets its own manual page. - -* CRAM encoding now stored MD and NM tags verbatim where the reference - contains 'N' characters, to work around ambiguities in the SAM - specification (samtools #717/762). - Also added "store_md" and "store_nm" cram-options for forcing these - tags to be stored at all locations. This is best when combined with - a subsequent decode_md=0 option while reading CRAM. - -* Multiple CRAM bug fixes, including a fix to free and the subsequent reuse of - references with `-T ref.fa`. (#654; reported by Chris Saunders) - -* CRAM multi-threading bugs fixed: don't try to call flush on reading; - processing of multiple range queries; problems with multi-slice containers. - -* Fixed crashes caused when decoding some cramtools produced CRAM files. - -* Fixed a couple of minor rANS issues with handling invalid data. - -* Fixed bug where probaln_glocal() tried to allocate far more memory than - needed when the query sequence was much longer than the reference. This - caused crashes in samtools and bcftools mpileup when used on data with very - long reads. (#572, problem reported by Felix Bemm via minimap2). - -* sam_prop_realn() now returns -1 (the same value as for unmapped reads) - on reads that do not include at least one 'M', 'X' or '=' CIGAR operator, - and no longer adds BQ or ZQ tags. BAQ adjustments are only made to bases - covered by these operators so there is no point in trying to align - reads that do not have them. (#572) - -Noteworthy changes in release 1.7 (26th January 2018) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -* BAM: HTSlib now supports BAMs which include CIGARs with more than - 65535 operations as per HTS-Specs 18th November (dab57f4 and 2f915a8). - -* BCF/VCF: - - Removed the need for long double in pileup calculations. - - Sped up the synced reader in some situations. - - Bug fixing: removed memory leak in bcf_copy. - -* CRAM: - - Added support for HTS_IDX_START in cram iterators. - - Easier to build when lzma header files are absent. - - Bug fixing: a region query with REQUIRED_FIELDS option to - disable sequence retrieval now gives correct results. - - Bug fixing: stop queries to regions starting after the last - read on a chromosome from incorrectly reporting errors - (#651, #653; reported by Imran Haque and @egafni via pysam). - -* Multi-region iterator: The new structure takes a list of regions and - iterates over all, deduplicating reads in the process, and producing a - full list of file offset intervals. This is usually much faster than - repeatedly using the old single-region iterator on a series of regions. - -* Curl improvements: - - Add Bearer token support via HTS_AUTH_LOCATION env (#600). - - Use CURL_CA_BUNDLE environment variable to override the CA (#622; - thanks to Garret Kelly & David Alexander). - - Speed up (removal of excessive waiting) for both http(s) and ftp. - - Avoid repeatedly reconnecting by removal of unnecessary seeks. - - Bug fixing: double free when libcurl_open fails. - -* BGZF block caching, if enabled, now performs far better (#629; reported - by Ram Yalamanchili). - -* Added an hFILE layer for in-memory I/O buffers (#590; thanks to Thomas - Hickman). - -* Tidied up the drand48 support (intended for systems that do not - provide this function). - -Noteworthy changes in release 1.6 (28th September 2017) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -* Fixed bug where iterators on CRAM files did not propagate error return - values to the caller correctly. Thanks go to Chris Saunders. - -* Overhauled Windows builds. Building with msys2/mingw64 now works - correctly and passes all tests. - -* More improvements to logging output (thanks again to Anders Kaplan). - -* Return codes from sam_read1() when reading cram have been made - consistent with those returned when reading sam/bam. Thanks to - Chris Saunders (#575). - -* BGZF CRC32 checksums are now always verified. - -* It's now possible to set nthreads = 1 for cram files. - -* hfile_libcurl has been modified to make it thread-safe. It's also - better at handling web servers that do not honour byte range requests - when attempting to seek - it now sets errno to ESPIPE and keeps - the existing connection open so callers can revert to streaming mode - it they want to. - -* hfile_s3 now recalculates access tokens if they have become stale. This - fixes a reported problem where authentication failed after a file - had been in use for more than 15 minutes. - -* Fixed bug where remote index fetches would fail to notice errors when - writing files. - -* bam_read1() now checks that the query sequence length derived from the - CIGAR alignment matches the sequence length in the BAM record. - -Noteworthy changes in release 1.5 (21st June 2017) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -* Added a new logging API: hts_log(), along with hts_log_error(), - hts_log_warn() etc. convenience macros. Thanks go to Anders Kaplan - for the implementation. (#499, #543, #551) - -* Added a new file I/O option "block_size" (HTS_OPT_BLOCK_SIZE) to - alter the hFILE buffer size. - -* Fixed various bugs, including compilation issues samtools/bcftools#610, - samtools/bcftools#611 and robustness to corrupted data #537, #538, - #541, #546, #548, #549, #554. - - -Noteworthy changes in release 1.4.1 (8th May 2017) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is primarily a security bug fix update. - -* Fixed SECURITY (CVE-2017-1000206) issue with buffer overruns with - malicious data. (#514) - -* S3 support for non Amazon AWS endpoints. (#506) - -* Support for variant breakpoints in bcftools. (#516) - -* Improved handling of BCF NaNs. (#485) - -* Compilation / portability improvements. (#255, #423, #498, #488) - -* Miscellaneous bug fixes (#482, #521, #522, #523, #524). - -* Sanitise headers (#509) - - -Release 1.4 (13 March 2017) - -* Incompatible changes: several functions and data types have been changed - in this release, and the shared library soversion has been bumped to 2. - - - bam_pileup1_t has an additional field (which holds user data) - - bam1_core_t has been modified to allow for >64K CIGAR operations - and (along with bam1_t) so that CIGAR entries are aligned in memory - - hopen() has vararg arguments for setting URL scheme-dependent options - - the various tbx_conf_* presets are now const - - auxiliary fields in bam1_t are now always stored in little-endian byte - order (previously this depended on if you read a bam, sam or cram file) - - index metadata (accessible via hts_idx_get_meta()) is now always - stored in little-endian byte order (previously this depended on if - the index was in tbi or csi format) - - bam_aux2i() now returns an int64_t value - - fai_load() will no longer save local copies of remote fasta indexes - - hts_idx_get_meta() now takes a uint32_t * for l_meta (was int32_t *) - -* HTSlib now links against libbz2 and liblzma by default. To remove these - dependencies, run configure with options --disable-bz2 and --disable-lzma, - but note that this may make some CRAM files produced elsewhere unreadable. - -* Added a thread pool interface and replaced the bgzf multi-threading - code to use this pool. BAM and CRAM decoding is now multi-threaded - too, using the pool to automatically balance the number of threads - between decode, encode and any data processing jobs. - -* New errmod_cal(), probaln_glocal(), sam_cap_mapq(), and sam_prob_realn() - functions, previously internal to SAMtools, have been added to HTSlib. - -* Files can now be accessed via Google Cloud Storage using gs: URLs, when - HTSlib is configured to use libcurl for network file access rather than - the included basic knetfile networking. - -* S3 file access now also supports the "host_base" setting in the - $HOME/.s3cfg configuration file. - -* Data URLs ("data:,text") now follow the standard RFC 2397 format and may - be base64-encoded (when written as "data:;base64,text") or may include - percent-encoded characters. HTSlib's previous over-simplified "data:text" - format is no longer supported -- you will need to add an initial comma. - -* When plugins are enabled, S3 support is now provided by a separate - hfile_s3 plugin rather than by hfile_libcurl itself as previously. - When --enable-libcurl is used, by default both GCS and S3 support - and plugins will also be built; they can be individually disabled - via --disable-gcs and --disable-s3. - -* The iRODS file access plugin has been moved to a separate repository. - Configure no longer has a --with-irods option; instead build the plugin - found at . - -* APIs to portably read and write (possibly unaligned) data in little-endian - byte order have been added. - -* New functions bam_auxB_len(), bam_auxB2i() and bam_auxB2f() have been - added to make accessing array-type auxiliary data easier. bam_aux2i() - can now return the full range of values that can be stored in an integer - tag (including unsigned 32 bit tags). bam_aux2f() will return the value - of integer tags (as a double) as well as floating-point ones. All of - the bam_aux2 and bam_auxB2 functions will set errno if the requested - conversion is not valid. - -* New functions fai_load3() and fai_build3() allow fasta indexes to be - stored in a different location to the indexed fasta file. - -* New functions bgzf_index_dump_hfile() and bgzf_index_load_hfile() - allow bgzf index files (.gzi) to be written to / read from an existing - hFILE handle. - -* hts_idx_push() will report when trying to add a range to an index that - is beyond the limits that the given index can handle. This means trying - to index chromosomes longer than 2^29 bases with a .bai or .tbi index - will report an error instead of apparently working but creating an invalid - index entry. - -* VCF formatting is now approximately 4x faster. (Whether this is - noticeable depends on what was creating the VCF.) - -* CRAM lossy_names mode now works with TLEN of 0 or TLEN within +/- 1 - of the computed value. Note in these situations TLEN will be - generated / fixed during CRAM decode. - -* CRAM now supports bzip2 and lzma codecs. Within htslib these are - disabled by default, but can be enabled by specifying "use_bzip2" or - "use_lzma" in an hts_opt_add() call or via the mode string of the - hts_open_format() function. - -Noteworthy changes in release 1.3.2 (13 September 2016) - -* Corrected bin calculation when converting directly from CRAM to BAM. - Previously a small fraction of converted reads would fail Picard's - validation with "bin field of BAM record does not equal value computed" - (SAMtools issue #574). - -* Plugins can now signal to HTSlib which of RTLD_LOCAL and RTLD_GLOBAL - they wish to be opened with -- previously they were always RTLD_LOCAL. - - -Noteworthy changes in release 1.3.1 (22 April 2016) - -* Improved error checking and reporting, especially of I/O errors when - writing output files (#17, #315, PR #271, PR #317). - -* Build fixes for 32-bit systems; be sure to run configure to enable - large file support and access to 2GiB+ files. - -* Numerous VCF parsing fixes (#321, #322, #323, #324, #325; PR #370). - Particular thanks to Kostya Kortchinsky of the Google Security Team - for testing and numerous input parsing bug reports. - -* HTSlib now prints an informational message when initially creating a - CRAM reference cache in the default location under your $HOME directory. - (No message is printed if you are using $REF_CACHE to specify a location.) - -* Avoided rare race condition when caching downloaded CRAM reference sequence - files, by using distinctive names for temporary files (in addition to O_EXCL, - which has always been used). Occasional corruption would previously occur - when multiple tools were simultaneously caching the same reference sequences - on an NFS filesystem that did not support O_EXCL (PR #320). - -* Prevented race condition in file access plugin loading (PR #341). - -* Fixed mpileup memory leak, so no more "[bam_plp_destroy] memory leak [...] - Continue anyway" warning messages (#299). - -* Various minor CRAM fixes. - -* Fixed documentation problems #348 and #358. - - -Noteworthy changes in release 1.3 (15 December 2015) - -* Files can now be accessed via HTTPS and Amazon S3 in addition to HTTP - and FTP, when HTSlib is configured to use libcurl for network file access - rather than the included basic knetfile networking. - -* HTSlib can be built to use remote access hFILE backends (such as iRODS - and libcurl) via a plugin mechanism. This allows other backends to be - easily added and facilitates building tools that use HTSlib, as they - don't need to be linked with the backends' various required libraries. - -* When writing CRAM output, sam_open() etc now default to writing CRAM v3.0 - rather than v2.1. - -* fai_build() and samtools faidx now accept initial whitespace in ">" - headers (e.g., "> chr1 description" is taken to refer to "chr1"). - -* tabix --only-header works again (was broken in 1.2.x; #249). - -* HTSlib's configure script and Makefile now fully support the standard - convention of allowing CC/CPPFLAGS/CFLAGS/LDFLAGS/LIBS to be overridden - as needed. Previously the Makefile listened to $(LDLIBS) instead; if you - were overriding that, you should now override LIBS rather than LDLIBS. - -* Fixed bugs #168, #172, #176, #197, #206, #225, #245, #265, #295, and #296. - - -Noteworthy changes in release 1.2.1 (3 February 2015) - -* Reinstated hts_file_type() and FT_* macros, which were available until 1.1 - but briefly removed in 1.2. This function is deprecated and will be removed - in a future release -- you should use hts_detect_format() etc instead - - -Noteworthy changes in release 1.2 (2 February 2015) - -* HTSlib now has a configure script which checks your build environment - and allows for selection of optional extras. See INSTALL for details - -* By default, reference sequences are fetched from the EBI CRAM Reference - Registry and cached in your $HOME cache directory. This behaviour can - be controlled by setting REF_PATH and REF_CACHE environment variables - (see the samtools(1) man page for details) - -* Numerous CRAM improvements: - - Support for CRAM v3.0, an upcoming revision to CRAM supporting - better compression and per-container checksums - - EOF checking for v2.1 and v3.0 (similar to checking BAM EOF blocks) - - Non-standard values for PNEXT and TLEN fields are now preserved - - hts_set_fai_filename() now provides a reference file when encoding - - Generated read names are now numbered from 1, rather than being - labelled 'slice:record-in-slice' - - Multi-threading and speed improvements - -* New htsfile command for identifying file formats, and corresponding - file format detection APIs - -* New tabix --regions FILE, --targets FILE options for filtering via BED files - -* Optional iRODS file access, disabled by default. Configure with --with-irods - to enable accessing iRODS data objects directly via 'irods:DATAOBJ' - -* All occurrences of 2^29 in the source have been eliminated, so indexing - and querying against reference sequences larger than 512Mbp works (when - using CSI indices) - -* Support for plain GZIP compression in various places - -* VCF header editing speed improvements - -* Added seq_nt16_int[] (equivalent to the samtools API's bam_nt16_nt4_table) - -* Reinstated faidx_fetch_nseq(), which was accidentally removed from 1.1. - Now faidx_fetch_nseq() and faidx_nseq() are equivalent; eventually - faidx_fetch_nseq() will be deprecated and removed [#156] - -* Fixed bugs #141, #152, #155, #158, #159, and various memory leaks diff --git a/src/htslib-1.18/bgzf.c b/src/htslib-1.18/bgzf.c deleted file mode 100644 index 45f2b11..0000000 --- a/src/htslib-1.18/bgzf.c +++ /dev/null @@ -1,2579 +0,0 @@ -/* The MIT License - - Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology - 2011, 2012 Attractive Chaos - Copyright (C) 2009, 2013-2022 Genome Research Ltd - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. -*/ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef HAVE_LIBDEFLATE -#include -#endif - -#include "htslib/hts.h" -#include "htslib/bgzf.h" -#include "htslib/hfile.h" -#include "htslib/thread_pool.h" -#include "htslib/hts_endian.h" -#include "cram/pooled_alloc.h" -#include "hts_internal.h" - -#ifndef EFTYPE -#define EFTYPE ENOEXEC -#endif - -#define BGZF_CACHE -#define BGZF_MT - -#define BLOCK_HEADER_LENGTH 18 -#define BLOCK_FOOTER_LENGTH 8 - - -/* BGZF/GZIP header (specialized from RFC 1952; little endian): - +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ - | 31|139| 8| 4| 0| 0|255| 6| 66| 67| 2|BLK_LEN| - +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ - BGZF extension: - ^ ^ ^ ^ - | | | | - FLG.EXTRA XLEN B C - - BGZF format is compatible with GZIP. It limits the size of each compressed - block to 2^16 bytes and adds and an extra "BC" field in the gzip header which - records the size. - -*/ -static const uint8_t g_magic[19] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\0\0"; - -#ifdef BGZF_CACHE -typedef struct { - int size; - uint8_t *block; - int64_t end_offset; -} cache_t; - -#include "htslib/khash.h" -KHASH_MAP_INIT_INT64(cache, cache_t) -#endif - -struct bgzf_cache_t { - khash_t(cache) *h; - khint_t last_pos; -}; - -#ifdef BGZF_MT - -typedef struct bgzf_job { - BGZF *fp; - unsigned char comp_data[BGZF_MAX_BLOCK_SIZE]; - size_t comp_len; - unsigned char uncomp_data[BGZF_MAX_BLOCK_SIZE]; - size_t uncomp_len; - int errcode; - int64_t block_address; - int hit_eof; -} bgzf_job; - -enum mtaux_cmd { - NONE = 0, - SEEK, - SEEK_DONE, - HAS_EOF, - HAS_EOF_DONE, - CLOSE, -}; - -// When multi-threaded bgzf_tell won't work, so we delay the hts_idx_push -// until we've written the last block. -typedef struct { - hts_pos_t beg, end; - int tid, is_mapped; // args for hts_idx_push - uint64_t offset, block_number; -} hts_idx_cache_entry; - -typedef struct { - int nentries, mentries; // used and allocated - hts_idx_cache_entry *e; // hts_idx elements -} hts_idx_cache_t; - -typedef struct bgzf_mtaux_t { - // Memory pool for bgzf_job structs, to avoid many malloc/free - pool_alloc_t *job_pool; - bgzf_job *curr_job; - - // Thread pool - int n_threads; - int own_pool; - hts_tpool *pool; - - // Output queue holding completed bgzf_jobs - hts_tpool_process *out_queue; - - // I/O thread. - pthread_t io_task; - pthread_mutex_t job_pool_m; - int jobs_pending; // number of jobs waiting - int flush_pending; - void *free_block; - int hit_eof; // r/w entirely within main thread - - // Message passing to the reader thread; eg seek requests - int errcode; - uint64_t block_address; - int eof; - pthread_mutex_t command_m; // Set whenever fp is being updated - pthread_cond_t command_c; - enum mtaux_cmd command; - - // For multi-threaded on-the-fly indexing. See bgzf_idx_push below. - pthread_mutex_t idx_m; - hts_idx_t *hts_idx; - uint64_t block_number, block_written; - hts_idx_cache_t idx_cache; -} mtaux_t; -#endif - -typedef struct -{ - uint64_t uaddr; // offset w.r.t. uncompressed data - uint64_t caddr; // offset w.r.t. compressed data -} -bgzidx1_t; - -struct bgzidx_t -{ - int noffs, moffs; // the size of the index, n:used, m:allocated - bgzidx1_t *offs; // offsets - uint64_t ublock_addr; // offset of the current block (uncompressed data) -}; - -/* - * Buffers up arguments to hts_idx_push for later use, once we've written all bar - * this block. This is necessary when multiple blocks are in flight (threading) - * and fp->block_address isn't known at the time of call as we have in-flight - * blocks that haven't yet been compressed. - * - * NB: this only matters when we're indexing on the fly (writing). - * Normal indexing is threaded reads, but we already know block sizes - * so it's a simpler process - * - * Returns 0 on success, - * -1 on failure - */ -int bgzf_idx_push(BGZF *fp, hts_idx_t *hidx, int tid, hts_pos_t beg, hts_pos_t end, uint64_t offset, int is_mapped) { - hts_idx_cache_entry *e; - mtaux_t *mt = fp->mt; - - if (!mt) - return hts_idx_push(hidx, tid, beg, end, offset, is_mapped); - - // Early check for out of range positions which would fail in hts_idx_push() - if (hts_idx_check_range(hidx, tid, beg, end) < 0) - return -1; - - pthread_mutex_lock(&mt->idx_m); - - mt->hts_idx = hidx; - hts_idx_cache_t *ic = &mt->idx_cache; - - if (ic->nentries >= ic->mentries) { - int new_sz = ic->mentries ? ic->mentries*2 : 1024; - if (!(e = realloc(ic->e, new_sz * sizeof(*ic->e)))) { - pthread_mutex_unlock(&mt->idx_m); - return -1; - } - ic->e = e; - ic->mentries = new_sz; - } - - e = &ic->e[ic->nentries++]; - e->tid = tid; - e->beg = beg; - e->end = end; - e->is_mapped = is_mapped; - e->offset = offset & 0xffff; - e->block_number = mt->block_number; - - pthread_mutex_unlock(&mt->idx_m); - - return 0; -} - -/* - * bgzf analogue to hts_idx_amend_last. - * - * This is needed when multi-threading and writing indices on the fly. - * At the point of writing a record we know the virtual offset for start - * and end, but that end virtual offset may be the end of the current - * block. In standard indexing our end virtual offset becomes the start - * of the next block. Thus to ensure bit for bit compatibility we - * detect this boundary case and fix it up here. - * - * In theory this has no behavioural change, but it also works around - * a bug elsewhere which causes bgzf_read to return 0 when our offset - * is the end of a block rather than the start of the next. - */ -void bgzf_idx_amend_last(BGZF *fp, hts_idx_t *hidx, uint64_t offset) { - mtaux_t *mt = fp->mt; - if (!mt) { - hts_idx_amend_last(hidx, offset); - return; - } - - pthread_mutex_lock(&mt->idx_m); - hts_idx_cache_t *ic = &mt->idx_cache; - if (ic->nentries > 0) { - hts_idx_cache_entry *e = &ic->e[ic->nentries-1]; - if ((offset & 0xffff) == 0 && e->offset != 0) { - // bumped to next block number - e->offset = 0; - e->block_number++; - } - } - pthread_mutex_unlock(&mt->idx_m); -} - -static int bgzf_idx_flush(BGZF *fp) { - mtaux_t *mt = fp->mt; - - if (!mt->idx_cache.e) { - mt->block_written++; - return 0; - } - - pthread_mutex_lock(&mt->idx_m); - - hts_idx_cache_entry *e = mt->idx_cache.e; - int i; - - assert(mt->idx_cache.nentries == 0 || mt->block_written <= e[0].block_number); - - for (i = 0; i < mt->idx_cache.nentries && e[i].block_number == mt->block_written; i++) { - if (hts_idx_push(mt->hts_idx, e[i].tid, e[i].beg, e[i].end, - (mt->block_address << 16) + e[i].offset, - e[i].is_mapped) < 0) { - pthread_mutex_unlock(&mt->idx_m); - return -1; - } - } - - memmove(&e[0], &e[i], (mt->idx_cache.nentries - i) * sizeof(*e)); - mt->idx_cache.nentries -= i; - mt->block_written++; - - pthread_mutex_unlock(&mt->idx_m); - return 0; -} - -void bgzf_index_destroy(BGZF *fp); -int bgzf_index_add_block(BGZF *fp); -static int mt_destroy(mtaux_t *mt); - -static inline void packInt16(uint8_t *buffer, uint16_t value) -{ - buffer[0] = value; - buffer[1] = value >> 8; -} - -static inline int unpackInt16(const uint8_t *buffer) -{ - return buffer[0] | buffer[1] << 8; -} - -static inline void packInt32(uint8_t *buffer, uint32_t value) -{ - buffer[0] = value; - buffer[1] = value >> 8; - buffer[2] = value >> 16; - buffer[3] = value >> 24; -} - -static void razf_info(hFILE *hfp, const char *filename) -{ - uint64_t usize, csize; - off_t sizes_pos; - - if (filename == NULL || strcmp(filename, "-") == 0) filename = "FILE"; - - // RAZF files end with USIZE,CSIZE stored as big-endian uint64_t - if ((sizes_pos = hseek(hfp, -16, SEEK_END)) < 0) goto no_sizes; - if (hread(hfp, &usize, 8) != 8 || hread(hfp, &csize, 8) != 8) goto no_sizes; - if (!ed_is_big()) ed_swap_8p(&usize), ed_swap_8p(&csize); - if (csize >= sizes_pos) goto no_sizes; // Very basic validity check - - hts_log_error( -"To decompress this file, use the following commands:\n" -" truncate -s %" PRIu64 " %s\n" -" gunzip %s\n" -"The resulting uncompressed file should be %" PRIu64 " bytes in length.\n" -"If you do not have a truncate command, skip that step (though gunzip will\n" -"likely produce a \"trailing garbage ignored\" message, which can be ignored).", - csize, filename, filename, usize); - return; - -no_sizes: - hts_log_error( -"To decompress this file, use the following command:\n" -" gunzip %s\n" -"This will likely produce a \"trailing garbage ignored\" message, which can\n" -"usually be safely ignored.", filename); -} - -static const char *bgzf_zerr(int errnum, z_stream *zs) -{ - static char buffer[32]; - - /* Return zs->msg if available. - zlib doesn't set this very reliably. Looking at the source suggests - that it may get set to a useful message for deflateInit2, inflateInit2 - and inflate when it returns Z_DATA_ERROR. For inflate with other - return codes, deflate, deflateEnd and inflateEnd it doesn't appear - to be useful. For the likely non-useful cases, the caller should - pass NULL into zs. */ - - if (zs && zs->msg) return zs->msg; - - // gzerror OF((gzFile file, int *errnum) - switch (errnum) { - case Z_ERRNO: - return strerror(errno); - case Z_STREAM_ERROR: - return "invalid parameter/compression level, or inconsistent stream state"; - case Z_DATA_ERROR: - return "invalid or incomplete IO"; - case Z_MEM_ERROR: - return "out of memory"; - case Z_BUF_ERROR: - return "progress temporarily not possible, or in() / out() returned an error"; - case Z_VERSION_ERROR: - return "zlib version mismatch"; - case Z_NEED_DICT: - return "data was compressed using a dictionary"; - case Z_OK: // 0: maybe gzgets error Z_NULL - default: - snprintf(buffer, sizeof(buffer), "[%d] unknown", errnum); - return buffer; // FIXME: Not thread-safe. - } -} - -static BGZF *bgzf_read_init(hFILE *hfpr, const char *filename) -{ - BGZF *fp; - uint8_t magic[18]; - ssize_t n = hpeek(hfpr, magic, 18); - if (n < 0) return NULL; - - fp = (BGZF*)calloc(1, sizeof(BGZF)); - if (fp == NULL) return NULL; - - fp->is_write = 0; - fp->uncompressed_block = malloc(2 * BGZF_MAX_BLOCK_SIZE); - if (fp->uncompressed_block == NULL) { free(fp); return NULL; } - fp->compressed_block = (char *)fp->uncompressed_block + BGZF_MAX_BLOCK_SIZE; - fp->is_compressed = (n==18 && magic[0]==0x1f && magic[1]==0x8b); - fp->is_gzip = ( !fp->is_compressed || ((magic[3]&4) && memcmp(&magic[12], "BC\2\0",4)==0) ) ? 0 : 1; - if (fp->is_compressed && (magic[3]&4) && memcmp(&magic[12], "RAZF", 4)==0) { - hts_log_error("Cannot decompress legacy RAZF format"); - razf_info(hfpr, filename); - free(fp->uncompressed_block); - free(fp); - errno = EFTYPE; - return NULL; - } -#ifdef BGZF_CACHE - if (!(fp->cache = malloc(sizeof(*fp->cache)))) { - free(fp->uncompressed_block); - free(fp); - return NULL; - } - if (!(fp->cache->h = kh_init(cache))) { - free(fp->uncompressed_block); - free(fp->cache); - free(fp); - return NULL; - } - fp->cache->last_pos = 0; -#endif - return fp; -} - -// get the compress level from the mode string: compress_level==-1 for the default level, -2 plain uncompressed -static int mode2level(const char *mode) -{ - int i, compress_level = -1; - for (i = 0; mode[i]; ++i) - if (mode[i] >= '0' && mode[i] <= '9') break; - if (mode[i]) compress_level = (int)mode[i] - '0'; - if (strchr(mode, 'u')) compress_level = -2; - return compress_level; -} -static BGZF *bgzf_write_init(const char *mode) -{ - BGZF *fp; - fp = (BGZF*)calloc(1, sizeof(BGZF)); - if (fp == NULL) goto mem_fail; - fp->is_write = 1; - int compress_level = mode2level(mode); - if ( compress_level==-2 ) - { - fp->is_compressed = 0; - return fp; - } - fp->is_compressed = 1; - - fp->uncompressed_block = malloc(2 * BGZF_MAX_BLOCK_SIZE); - if (fp->uncompressed_block == NULL) goto mem_fail; - fp->compressed_block = (char *)fp->uncompressed_block + BGZF_MAX_BLOCK_SIZE; - - fp->compress_level = compress_level < 0? Z_DEFAULT_COMPRESSION : compress_level; // Z_DEFAULT_COMPRESSION==-1 - if (fp->compress_level > 9) fp->compress_level = Z_DEFAULT_COMPRESSION; - if ( strchr(mode,'g') ) - { - // gzip output - fp->is_gzip = 1; - fp->gz_stream = (z_stream*)calloc(1,sizeof(z_stream)); - if (fp->gz_stream == NULL) goto mem_fail; - fp->gz_stream->zalloc = NULL; - fp->gz_stream->zfree = NULL; - fp->gz_stream->msg = NULL; - - int ret = deflateInit2(fp->gz_stream, fp->compress_level, Z_DEFLATED, 15|16, 8, Z_DEFAULT_STRATEGY); - if (ret!=Z_OK) { - hts_log_error("Call to deflateInit2 failed: %s", bgzf_zerr(ret, fp->gz_stream)); - goto fail; - } - } - return fp; - -mem_fail: - hts_log_error("%s", strerror(errno)); - -fail: - if (fp != NULL) { - free(fp->uncompressed_block); - free(fp->gz_stream); - free(fp); - } - return NULL; -} - -BGZF *bgzf_open(const char *path, const char *mode) -{ - BGZF *fp = 0; - if (strchr(mode, 'r')) { - hFILE *fpr; - if ((fpr = hopen(path, mode)) == 0) return 0; - fp = bgzf_read_init(fpr, path); - if (fp == 0) { hclose_abruptly(fpr); return NULL; } - fp->fp = fpr; - } else if (strchr(mode, 'w') || strchr(mode, 'a')) { - hFILE *fpw; - if ((fpw = hopen(path, mode)) == 0) return 0; - fp = bgzf_write_init(mode); - if (fp == NULL) return NULL; - fp->fp = fpw; - } - else { errno = EINVAL; return 0; } - - fp->is_be = ed_is_big(); - return fp; -} - -BGZF *bgzf_dopen(int fd, const char *mode) -{ - BGZF *fp = 0; - if (strchr(mode, 'r')) { - hFILE *fpr; - if ((fpr = hdopen(fd, mode)) == 0) return 0; - fp = bgzf_read_init(fpr, NULL); - if (fp == 0) { hclose_abruptly(fpr); return NULL; } // FIXME this closes fd - fp->fp = fpr; - } else if (strchr(mode, 'w') || strchr(mode, 'a')) { - hFILE *fpw; - if ((fpw = hdopen(fd, mode)) == 0) return 0; - fp = bgzf_write_init(mode); - if (fp == NULL) return NULL; - fp->fp = fpw; - } - else { errno = EINVAL; return 0; } - - fp->is_be = ed_is_big(); - return fp; -} - -BGZF *bgzf_hopen(hFILE *hfp, const char *mode) -{ - BGZF *fp = NULL; - if (strchr(mode, 'r')) { - fp = bgzf_read_init(hfp, NULL); - if (fp == NULL) return NULL; - } else if (strchr(mode, 'w') || strchr(mode, 'a')) { - fp = bgzf_write_init(mode); - if (fp == NULL) return NULL; - } - else { errno = EINVAL; return 0; } - - fp->fp = hfp; - fp->is_be = ed_is_big(); - return fp; -} - -#ifdef HAVE_LIBDEFLATE -int bgzf_compress(void *_dst, size_t *dlen, const void *src, size_t slen, int level) -{ - if (slen == 0) { - // EOF block - if (*dlen < 28) return -1; - memcpy(_dst, "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0", 28); - *dlen = 28; - return 0; - } - - uint8_t *dst = (uint8_t*)_dst; - - if (level == 0) { - // Uncompressed data - if (*dlen < slen+5 + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH) return -1; - dst[BLOCK_HEADER_LENGTH] = 1; // BFINAL=1, BTYPE=00; see RFC1951 - u16_to_le(slen, &dst[BLOCK_HEADER_LENGTH+1]); // length - u16_to_le(~slen, &dst[BLOCK_HEADER_LENGTH+3]); // ones-complement length - memcpy(dst + BLOCK_HEADER_LENGTH+5, src, slen); - *dlen = slen+5 + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH; - - } else { - level = level > 0 ? level : 6; // libdeflate doesn't honour -1 as default - // NB levels go up to 12 here. - int lvl_map[] = {0,1,2,3,5,6,7,8,10,12}; - level = lvl_map[level>9 ?9 :level]; - struct libdeflate_compressor *z = libdeflate_alloc_compressor(level); - if (!z) return -1; - - // Raw deflate - size_t clen = - libdeflate_deflate_compress(z, src, slen, - dst + BLOCK_HEADER_LENGTH, - *dlen - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH); - - if (clen <= 0) { - hts_log_error("Call to libdeflate_deflate_compress failed"); - libdeflate_free_compressor(z); - return -1; - } - - *dlen = clen + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH; - - libdeflate_free_compressor(z); - } - - // write the header - memcpy(dst, g_magic, BLOCK_HEADER_LENGTH); // the last two bytes are a place holder for the length of the block - packInt16(&dst[16], *dlen - 1); // write the compressed length; -1 to fit 2 bytes - - // write the footer - uint32_t crc = libdeflate_crc32(0, src, slen); - packInt32((uint8_t*)&dst[*dlen - 8], crc); - packInt32((uint8_t*)&dst[*dlen - 4], slen); - return 0; -} - -#else - -int bgzf_compress(void *_dst, size_t *dlen, const void *src, size_t slen, int level) -{ - uint32_t crc; - z_stream zs; - uint8_t *dst = (uint8_t*)_dst; - - if (level == 0) { - uncomp: - // Uncompressed data - if (*dlen < slen+5 + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH) return -1; - dst[BLOCK_HEADER_LENGTH] = 1; // BFINAL=1, BTYPE=00; see RFC1951 - u16_to_le(slen, &dst[BLOCK_HEADER_LENGTH+1]); // length - u16_to_le(~slen, &dst[BLOCK_HEADER_LENGTH+3]); // ones-complement length - memcpy(dst + BLOCK_HEADER_LENGTH+5, src, slen); - *dlen = slen+5 + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH; - } else { - // compress the body - zs.zalloc = NULL; zs.zfree = NULL; - zs.msg = NULL; - zs.next_in = (Bytef*)src; - zs.avail_in = slen; - zs.next_out = dst + BLOCK_HEADER_LENGTH; - zs.avail_out = *dlen - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH; - int ret = deflateInit2(&zs, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY); // -15 to disable zlib header/footer - if (ret!=Z_OK) { - hts_log_error("Call to deflateInit2 failed: %s", bgzf_zerr(ret, &zs)); - return -1; - } - if ((ret = deflate(&zs, Z_FINISH)) != Z_STREAM_END) { - if (ret == Z_OK && zs.avail_out == 0) { - deflateEnd(&zs); - goto uncomp; - } else { - hts_log_error("Deflate operation failed: %s", bgzf_zerr(ret, ret == Z_DATA_ERROR ? &zs : NULL)); - } - return -1; - } - // If we used up the entire output buffer, then we either ran out of - // room or we *just* fitted, but either way we may as well store - // uncompressed for faster decode. - if (zs.avail_out == 0) { - deflateEnd(&zs); - goto uncomp; - } - if ((ret = deflateEnd(&zs)) != Z_OK) { - hts_log_error("Call to deflateEnd failed: %s", bgzf_zerr(ret, NULL)); - return -1; - } - *dlen = zs.total_out + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH; - } - - // write the header - memcpy(dst, g_magic, BLOCK_HEADER_LENGTH); // the last two bytes are a place holder for the length of the block - packInt16(&dst[16], *dlen - 1); // write the compressed length; -1 to fit 2 bytes - // write the footer - crc = crc32(crc32(0L, NULL, 0L), (Bytef*)src, slen); - packInt32((uint8_t*)&dst[*dlen - 8], crc); - packInt32((uint8_t*)&dst[*dlen - 4], slen); - return 0; -} -#endif // HAVE_LIBDEFLATE - -static int bgzf_gzip_compress(BGZF *fp, void *_dst, size_t *dlen, const void *src, size_t slen, int level) -{ - uint8_t *dst = (uint8_t*)_dst; - z_stream *zs = fp->gz_stream; - int flush = slen ? Z_PARTIAL_FLUSH : Z_FINISH; - zs->next_in = (Bytef*)src; - zs->avail_in = slen; - zs->next_out = dst; - zs->avail_out = *dlen; - int ret = deflate(zs, flush); - if (ret == Z_STREAM_ERROR) { - hts_log_error("Deflate operation failed: %s", bgzf_zerr(ret, NULL)); - return -1; - } - if (zs->avail_in != 0) { - hts_log_error("Deflate block too large for output buffer"); - return -1; - } - *dlen = *dlen - zs->avail_out; - return 0; -} - -// Deflate the block in fp->uncompressed_block into fp->compressed_block. Also adds an extra field that stores the compressed block length. -static int deflate_block(BGZF *fp, int block_length) -{ - size_t comp_size = BGZF_MAX_BLOCK_SIZE; - int ret; - if ( !fp->is_gzip ) - ret = bgzf_compress(fp->compressed_block, &comp_size, fp->uncompressed_block, block_length, fp->compress_level); - else - ret = bgzf_gzip_compress(fp, fp->compressed_block, &comp_size, fp->uncompressed_block, block_length, fp->compress_level); - - if ( ret != 0 ) - { - hts_log_debug("Compression error %d", ret); - fp->errcode |= BGZF_ERR_ZLIB; - return -1; - } - fp->block_offset = 0; - return comp_size; -} - -#ifdef HAVE_LIBDEFLATE - -static int bgzf_uncompress(uint8_t *dst, size_t *dlen, - const uint8_t *src, size_t slen, - uint32_t expected_crc) { - struct libdeflate_decompressor *z = libdeflate_alloc_decompressor(); - if (!z) { - hts_log_error("Call to libdeflate_alloc_decompressor failed"); - return -1; - } - - int ret = libdeflate_deflate_decompress(z, src, slen, dst, *dlen, dlen); - libdeflate_free_decompressor(z); - - if (ret != LIBDEFLATE_SUCCESS) { - hts_log_error("Inflate operation failed: %d", ret); - return -1; - } - - uint32_t crc = libdeflate_crc32(0, (unsigned char *)dst, *dlen); - if (crc != expected_crc) { - hts_log_error("CRC32 checksum mismatch"); - return -2; - } - - return 0; -} - -#else - -static int bgzf_uncompress(uint8_t *dst, size_t *dlen, - const uint8_t *src, size_t slen, - uint32_t expected_crc) { - z_stream zs = { - .zalloc = NULL, - .zfree = NULL, - .msg = NULL, - .next_in = (Bytef*)src, - .avail_in = slen, - .next_out = (Bytef*)dst, - .avail_out = *dlen - }; - - int ret = inflateInit2(&zs, -15); - if (ret != Z_OK) { - hts_log_error("Call to inflateInit2 failed: %s", bgzf_zerr(ret, &zs)); - return -1; - } - if ((ret = inflate(&zs, Z_FINISH)) != Z_STREAM_END) { - hts_log_error("Inflate operation failed: %s", bgzf_zerr(ret, ret == Z_DATA_ERROR ? &zs : NULL)); - if ((ret = inflateEnd(&zs)) != Z_OK) { - hts_log_warning("Call to inflateEnd failed: %s", bgzf_zerr(ret, NULL)); - } - return -1; - } - if ((ret = inflateEnd(&zs)) != Z_OK) { - hts_log_error("Call to inflateEnd failed: %s", bgzf_zerr(ret, NULL)); - return -1; - } - *dlen = *dlen - zs.avail_out; - - uint32_t crc = crc32(crc32(0L, NULL, 0L), (unsigned char *)dst, *dlen); - if (crc != expected_crc) { - hts_log_error("CRC32 checksum mismatch"); - return -2; - } - - return 0; -} -#endif // HAVE_LIBDEFLATE - -// Inflate the block in fp->compressed_block into fp->uncompressed_block -static int inflate_block(BGZF* fp, int block_length) -{ - size_t dlen = BGZF_MAX_BLOCK_SIZE; - uint32_t crc = le_to_u32((uint8_t *)fp->compressed_block + block_length-8); - int ret = bgzf_uncompress(fp->uncompressed_block, &dlen, - (Bytef*)fp->compressed_block + 18, - block_length - 18, crc); - if (ret < 0) { - if (ret == -2) - fp->errcode |= BGZF_ERR_CRC; - else - fp->errcode |= BGZF_ERR_ZLIB; - return -1; - } - - return dlen; -} - -// Decompress the next part of a non-blocked GZIP file. -// Return the number of uncompressed bytes read, 0 on EOF, or a negative number on error. -// Will fill the output buffer unless the end of the GZIP file is reached. -static int inflate_gzip_block(BGZF *fp) -{ - // we will set this to true when we detect EOF, so we don't bang against the EOF more than once per call - int input_eof = 0; - - // write to the part of the output buffer after block_offset - fp->gz_stream->next_out = (Bytef*)fp->uncompressed_block + fp->block_offset; - fp->gz_stream->avail_out = BGZF_MAX_BLOCK_SIZE - fp->block_offset; - - while ( fp->gz_stream->avail_out != 0 ) { - // until we fill the output buffer (or hit EOF) - - if ( !input_eof && fp->gz_stream->avail_in == 0 ) { - // we are out of input data in the buffer. Get more. - fp->gz_stream->next_in = fp->compressed_block; - int ret = hread(fp->fp, fp->compressed_block, BGZF_BLOCK_SIZE); - if ( ret < 0 ) { - // hread had an error. Pass it on. - return ret; - } - fp->gz_stream->avail_in = ret; - if ( fp->gz_stream->avail_in < BGZF_BLOCK_SIZE ) { - // we have reached EOF but the decompressor hasn't necessarily - input_eof = 1; - } - } - - fp->gz_stream->msg = NULL; - // decompress as much data as we can - int ret = inflate(fp->gz_stream, Z_SYNC_FLUSH); - - if ( (ret < 0 && ret != Z_BUF_ERROR) || ret == Z_NEED_DICT ) { - // an error occurred, other than running out of space - hts_log_error("Inflate operation failed: %s", bgzf_zerr(ret, ret == Z_DATA_ERROR ? fp->gz_stream : NULL)); - fp->errcode |= BGZF_ERR_ZLIB; - return -1; - } else if ( ret == Z_STREAM_END ) { - // we finished a GZIP member - - // scratch for peeking to see if the file is over - char c; - if (fp->gz_stream->avail_in > 0 || hpeek(fp->fp, &c, 1) == 1) { - // there is more data; try and read another GZIP member in the remaining data - int reset_ret = inflateReset(fp->gz_stream); - if (reset_ret != Z_OK) { - hts_log_error("Call to inflateReset failed: %s", bgzf_zerr(reset_ret, NULL)); - fp->errcode |= BGZF_ERR_ZLIB; - return -1; - } - } else { - // we consumed all the input data and hit Z_STREAM_END - // so stop looping, even if we never fill the output buffer - break; - } - } else if ( ret == Z_BUF_ERROR && input_eof && fp->gz_stream->avail_out > 0 ) { - // the gzip file has ended prematurely - hts_log_error("Gzip file truncated"); - fp->errcode |= BGZF_ERR_IO; - return -1; - } - } - - // when we get here, the buffer is full or there is an EOF after a complete gzip member - return BGZF_MAX_BLOCK_SIZE - fp->gz_stream->avail_out; -} - -// Returns: 0 on success (BGZF header); -1 on non-BGZF GZIP header; -2 on error -static int check_header(const uint8_t *header) -{ - if ( header[0] != 31 || header[1] != 139 || header[2] != 8 ) return -2; - return ((header[3] & 4) != 0 - && unpackInt16((uint8_t*)&header[10]) == 6 - && header[12] == 'B' && header[13] == 'C' - && unpackInt16((uint8_t*)&header[14]) == 2) ? 0 : -1; -} - -#ifdef BGZF_CACHE -static void free_cache(BGZF *fp) -{ - khint_t k; - if (fp->is_write) return; - khash_t(cache) *h = fp->cache->h; - for (k = kh_begin(h); k < kh_end(h); ++k) - if (kh_exist(h, k)) free(kh_val(h, k).block); - kh_destroy(cache, h); - free(fp->cache); -} - -static int load_block_from_cache(BGZF *fp, int64_t block_address) -{ - khint_t k; - cache_t *p; - - khash_t(cache) *h = fp->cache->h; - k = kh_get(cache, h, block_address); - if (k == kh_end(h)) return 0; - p = &kh_val(h, k); - if (fp->block_length != 0) fp->block_offset = 0; - fp->block_address = block_address; - fp->block_length = p->size; - memcpy(fp->uncompressed_block, p->block, p->size); - if ( hseek(fp->fp, p->end_offset, SEEK_SET) < 0 ) - { - // todo: move the error up - hts_log_error("Could not hseek to %" PRId64, p->end_offset); - exit(1); - } - return p->size; -} - -static void cache_block(BGZF *fp, int size) -{ - int ret; - khint_t k, k_orig; - uint8_t *block = NULL; - cache_t *p; - //fprintf(stderr, "Cache block at %llx\n", (int)fp->block_address); - khash_t(cache) *h = fp->cache->h; - if (BGZF_MAX_BLOCK_SIZE >= fp->cache_size) return; - if (fp->block_length < 0 || fp->block_length > BGZF_MAX_BLOCK_SIZE) return; - if ((kh_size(h) + 1) * BGZF_MAX_BLOCK_SIZE > (uint32_t)fp->cache_size) { - /* Remove uniformly from any position in the hash by a simple - * round-robin approach. An alternative strategy would be to - * remove the least recently accessed block, but the round-robin - * removal is simpler and is not expected to have a big impact - * on performance */ - if (fp->cache->last_pos >= kh_end(h)) fp->cache->last_pos = kh_begin(h); - k_orig = k = fp->cache->last_pos; - if (++k >= kh_end(h)) k = kh_begin(h); - while (k != k_orig) { - if (kh_exist(h, k)) - break; - if (++k == kh_end(h)) - k = kh_begin(h); - } - fp->cache->last_pos = k; - - if (k != k_orig) { - block = kh_val(h, k).block; - kh_del(cache, h, k); - } - } else { - block = (uint8_t*)malloc(BGZF_MAX_BLOCK_SIZE); - } - if (!block) return; - k = kh_put(cache, h, fp->block_address, &ret); - if (ret <= 0) { // kh_put failed, or in there already (shouldn't happen) - free(block); - return; - } - p = &kh_val(h, k); - p->size = fp->block_length; - p->end_offset = fp->block_address + size; - p->block = block; - memcpy(p->block, fp->uncompressed_block, p->size); -} -#else -static void free_cache(BGZF *fp) {} -static int load_block_from_cache(BGZF *fp, int64_t block_address) {return 0;} -static void cache_block(BGZF *fp, int size) {} -#endif - -/* - * Absolute htell in this compressed file. - * - * Do not confuse with the external bgzf_tell macro which returns the virtual - * offset. - */ -static off_t bgzf_htell(BGZF *fp) { - if (fp->mt) { - pthread_mutex_lock(&fp->mt->job_pool_m); - off_t pos = fp->block_address + fp->block_clength; - pthread_mutex_unlock(&fp->mt->job_pool_m); - return pos; - } else { - return htell(fp->fp); - } -} - -int bgzf_read_block(BGZF *fp) -{ - hts_tpool_result *r; - - if (fp->errcode) return -1; - - if (fp->mt) { - again: - if (fp->mt->hit_eof) { - // Further reading at EOF will always return 0 - fp->block_length = 0; - return 0; - } - r = hts_tpool_next_result_wait(fp->mt->out_queue); - bgzf_job *j = r ? (bgzf_job *)hts_tpool_result_data(r) : NULL; - - if (!j || j->errcode == BGZF_ERR_MT) { - if (!fp->mt->free_block) { - fp->uncompressed_block = malloc(2 * BGZF_MAX_BLOCK_SIZE); - if (fp->uncompressed_block == NULL) return -1; - fp->compressed_block = (char *)fp->uncompressed_block + BGZF_MAX_BLOCK_SIZE; - } // else it's already allocated with malloc, maybe even in-use. - if (mt_destroy(fp->mt) < 0) { - fp->errcode = BGZF_ERR_IO; - } - fp->mt = NULL; - hts_tpool_delete_result(r, 0); - if (fp->errcode) { - return -1; - } - goto single_threaded; - } - - if (j->errcode) { - fp->errcode = j->errcode; - hts_log_error("BGZF decode jobs returned error %d " - "for block offset %"PRId64, - j->errcode, j->block_address); - hts_tpool_delete_result(r, 0); - return -1; - } - - if (j->hit_eof) { - if (!fp->last_block_eof && !fp->no_eof_block) { - fp->no_eof_block = 1; - hts_log_warning("EOF marker is absent. The input may be truncated"); - } - fp->mt->hit_eof = 1; - } - - // Zero length blocks in the middle of a file are (wrongly) - // considered as EOF by many callers. We work around this by - // trying again to see if we hit a genuine EOF. - if (!j->hit_eof && j->uncomp_len == 0) { - fp->last_block_eof = 1; - hts_tpool_delete_result(r, 0); - goto again; - } - - // block_length=0 and block_offset set by bgzf_seek. - if (fp->block_length != 0) fp->block_offset = 0; - if (!j->hit_eof) fp->block_address = j->block_address; - fp->block_clength = j->comp_len; - fp->block_length = j->uncomp_len; - // bgzf_read() can change fp->block_length - fp->last_block_eof = (fp->block_length == 0); - - if ( j->uncomp_len && j->fp->idx_build_otf ) - { - bgzf_index_add_block(j->fp); - j->fp->idx->ublock_addr += j->uncomp_len; - } - - // Steal the data block as it's quicker than a memcpy. - // We just need to make sure we delay the pool free. - if (fp->mt->curr_job) { - pthread_mutex_lock(&fp->mt->job_pool_m); - pool_free(fp->mt->job_pool, fp->mt->curr_job); - pthread_mutex_unlock(&fp->mt->job_pool_m); - } - fp->uncompressed_block = j->uncomp_data; - fp->mt->curr_job = j; - if (fp->mt->free_block) { - free(fp->mt->free_block); // clear up last non-mt block - fp->mt->free_block = NULL; - } - - hts_tpool_delete_result(r, 0); - return 0; - } - - uint8_t header[BLOCK_HEADER_LENGTH], *compressed_block; - int count, size, block_length, remaining; - - single_threaded: - size = 0; - - int64_t block_address; - block_address = bgzf_htell(fp); - - // Reading an uncompressed file - if ( !fp->is_compressed ) - { - count = hread(fp->fp, fp->uncompressed_block, BGZF_MAX_BLOCK_SIZE); - if (count < 0) // Error - { - hts_log_error("Failed to read uncompressed data " - "at offset %"PRId64"%s%s", - block_address, errno ? ": " : "", strerror(errno)); - fp->errcode |= BGZF_ERR_IO; - return -1; - } - else if (count == 0) // EOF - { - fp->block_length = 0; - return 0; - } - if (fp->block_length != 0) fp->block_offset = 0; - fp->block_address = block_address; - fp->block_length = count; - return 0; - } - - // Reading compressed file - if ( fp->is_gzip && fp->gz_stream ) // is this is an initialized gzip stream? - { - count = inflate_gzip_block(fp); - if ( count<0 ) - { - hts_log_error("Reading GZIP stream failed at offset %"PRId64, - block_address); - fp->errcode |= BGZF_ERR_ZLIB; - return -1; - } - fp->block_length = count; - fp->block_address = block_address; - return 0; - } - if (fp->cache_size && load_block_from_cache(fp, block_address)) return 0; - - // loop to skip empty bgzf blocks - while (1) - { - count = hread(fp->fp, header, sizeof(header)); - if (count == 0) { // no data read - if (!fp->last_block_eof && !fp->no_eof_block && !fp->is_gzip) { - fp->no_eof_block = 1; - hts_log_warning("EOF marker is absent. The input may be truncated"); - } - fp->block_length = 0; - return 0; - } - int ret = 0; - if ( count != sizeof(header) || (ret=check_header(header))==-2 ) - { - fp->errcode |= BGZF_ERR_HEADER; - hts_log_error("%s BGZF header at offset %"PRId64, - ret ? "Invalid" : "Failed to read", - block_address); - return -1; - } - if ( ret==-1 ) - { - // GZIP, not BGZF - uint8_t *cblock = (uint8_t*)fp->compressed_block; - memcpy(cblock, header, sizeof(header)); - count = hread(fp->fp, cblock+sizeof(header), BGZF_BLOCK_SIZE - sizeof(header)) + sizeof(header); - - fp->is_gzip = 1; - fp->gz_stream = (z_stream*) calloc(1,sizeof(z_stream)); - // Set up zlib, using a window size of 15, and its built-in GZIP header processing (+16). - int ret = inflateInit2(fp->gz_stream, 15 + 16); - if (ret != Z_OK) - { - hts_log_error("Call to inflateInit2 failed: %s", bgzf_zerr(ret, fp->gz_stream)); - fp->errcode |= BGZF_ERR_ZLIB; - return -1; - } - fp->gz_stream->avail_in = count; - fp->gz_stream->next_in = cblock; - count = inflate_gzip_block(fp); - if ( count<0 ) - { - hts_log_error("Reading GZIP stream failed at offset %"PRId64, - block_address); - fp->errcode |= BGZF_ERR_ZLIB; - return -1; - } - fp->block_length = count; - fp->block_address = block_address; - if ( fp->idx_build_otf ) return -1; // cannot build index for gzip - return 0; - } - size = count; - block_length = unpackInt16((uint8_t*)&header[16]) + 1; // +1 because when writing this number, we used "-1" - if (block_length < BLOCK_HEADER_LENGTH) - { - hts_log_error("Invalid BGZF block length at offset %"PRId64, - block_address); - fp->errcode |= BGZF_ERR_HEADER; - return -1; - } - compressed_block = (uint8_t*)fp->compressed_block; - memcpy(compressed_block, header, BLOCK_HEADER_LENGTH); - remaining = block_length - BLOCK_HEADER_LENGTH; - count = hread(fp->fp, &compressed_block[BLOCK_HEADER_LENGTH], remaining); - if (count != remaining) { - hts_log_error("Failed to read BGZF block data at offset %"PRId64 - " expected %d bytes; hread returned %d", - block_address, remaining, count); - fp->errcode |= BGZF_ERR_IO; - return -1; - } - size += count; - if ((count = inflate_block(fp, block_length)) < 0) { - hts_log_debug("Inflate block operation failed for " - "block at offset %"PRId64": %s", - block_address, bgzf_zerr(count, NULL)); - fp->errcode |= BGZF_ERR_ZLIB; - return -1; - } - fp->last_block_eof = (count == 0); - if ( count ) break; // otherwise an empty bgzf block - block_address = bgzf_htell(fp); // update for new block start - } - if (fp->block_length != 0) fp->block_offset = 0; // Do not reset offset if this read follows a seek. - fp->block_address = block_address; - fp->block_length = count; - if ( fp->idx_build_otf ) - { - bgzf_index_add_block(fp); - fp->idx->ublock_addr += count; - } - cache_block(fp, size); - return 0; -} - -ssize_t bgzf_read(BGZF *fp, void *data, size_t length) -{ - ssize_t bytes_read = 0; - uint8_t *output = (uint8_t*)data; - if (length <= 0) return 0; - assert(fp->is_write == 0); - while (bytes_read < length) { - int copy_length, available = fp->block_length - fp->block_offset; - uint8_t *buffer; - if (available <= 0) { - int ret = bgzf_read_block(fp); - if (ret != 0) { - hts_log_error("Read block operation failed with error %d after %zd of %zu bytes", fp->errcode, bytes_read, length); - fp->errcode |= BGZF_ERR_ZLIB; - return -1; - } - available = fp->block_length - fp->block_offset; - if (available == 0) { - if (fp->block_length == 0) - break; // EOF - - // Offset was at end of block (see commit e9863a0) - fp->block_address = bgzf_htell(fp); - fp->block_offset = fp->block_length = 0; - continue; - } else if (available < 0) { - // Block offset was set to an invalid coordinate - hts_log_error("BGZF block offset %d set beyond block size %d", - fp->block_offset, fp->block_length); - fp->errcode |= BGZF_ERR_MISUSE; - return -1; - } - } - copy_length = length - bytes_read < available? length - bytes_read : available; - buffer = (uint8_t*)fp->uncompressed_block; - memcpy(output, buffer + fp->block_offset, copy_length); - fp->block_offset += copy_length; - output += copy_length; - bytes_read += copy_length; - - // For raw gzip streams this avoids short reads. - if (fp->block_offset == fp->block_length) { - fp->block_address = bgzf_htell(fp); - fp->block_offset = fp->block_length = 0; - } - } - - fp->uncompressed_address += bytes_read; - - return bytes_read; -} - -// -1 for EOF, -2 for error, 0-255 for byte. -int bgzf_peek(BGZF *fp) { - int available = fp->block_length - fp->block_offset; - if (available <= 0) { - if (bgzf_read_block(fp) < 0) { - hts_log_error("Read block operation failed with error %d", fp->errcode); - fp->errcode = BGZF_ERR_ZLIB; - return -2; - } - } - available = fp->block_length - fp->block_offset; - if (available) - return ((unsigned char *)fp->uncompressed_block)[fp->block_offset]; - - return -1; -} - -ssize_t bgzf_raw_read(BGZF *fp, void *data, size_t length) -{ - ssize_t ret = hread(fp->fp, data, length); - if (ret < 0) fp->errcode |= BGZF_ERR_IO; - return ret; -} - -#ifdef BGZF_MT - -/* Function to clean up when jobs are discarded (e.g. during seek) - * This works for results too, as results are the same struct with - * decompressed data stored in it. */ -static void job_cleanup(void *arg) { - bgzf_job *j = (bgzf_job *)arg; - mtaux_t *mt = j->fp->mt; - pthread_mutex_lock(&mt->job_pool_m); - pool_free(mt->job_pool, j); - pthread_mutex_unlock(&mt->job_pool_m); -} - -static void *bgzf_encode_func(void *arg) { - bgzf_job *j = (bgzf_job *)arg; - - j->comp_len = BGZF_MAX_BLOCK_SIZE; - int ret = bgzf_compress(j->comp_data, &j->comp_len, - j->uncomp_data, j->uncomp_len, - j->fp->compress_level); - if (ret != 0) - j->errcode |= BGZF_ERR_ZLIB; - - return arg; -} - -// Optimisation for compression level 0 (uncompressed deflate blocks) -// Avoids memcpy of the data from uncompressed to compressed buffer. -static void *bgzf_encode_level0_func(void *arg) { - bgzf_job *j = (bgzf_job *)arg; - uint32_t crc; - j->comp_len = j->uncomp_len + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH + 5; - - // Data will have already been copied in to - // j->comp_data + BLOCK_HEADER_LENGTH + 5 - - // Add preamble - memcpy(j->comp_data, g_magic, BLOCK_HEADER_LENGTH); - u16_to_le(j->comp_len-1, j->comp_data + 16); - - // Deflate uncompressed data header - j->comp_data[BLOCK_HEADER_LENGTH] = 1; // BFINAL=1, BTYPE=00; see RFC1951 - u16_to_le(j->uncomp_len, j->comp_data + BLOCK_HEADER_LENGTH + 1); - u16_to_le(~j->uncomp_len, j->comp_data + BLOCK_HEADER_LENGTH + 3); - - // Trailer (CRC, uncompressed length) -#ifdef HAVE_LIBDEFLATE - crc = libdeflate_crc32(0, j->comp_data + BLOCK_HEADER_LENGTH + 5, - j->uncomp_len); -#else - crc = crc32(crc32(0L, NULL, 0L), - (Bytef*)j->comp_data + BLOCK_HEADER_LENGTH + 5, j->uncomp_len); -#endif - u32_to_le(crc, j->comp_data + j->comp_len - 8); - u32_to_le(j->uncomp_len, j->comp_data + j->comp_len - 4); - - return arg; -} - -// Our input block has already been decoded by bgzf_mt_read_block(). -// We need to split that into a fetch block (compressed) and make this -// do the actual decompression step. -static void *bgzf_decode_func(void *arg) { - bgzf_job *j = (bgzf_job *)arg; - - j->uncomp_len = BGZF_MAX_BLOCK_SIZE; - uint32_t crc = le_to_u32((uint8_t *)j->comp_data + j->comp_len-8); - int ret = bgzf_uncompress(j->uncomp_data, &j->uncomp_len, - j->comp_data+18, j->comp_len-18, crc); - if (ret != 0) - j->errcode |= BGZF_ERR_ZLIB; - - return arg; -} - -/* - * Nul function so we can dispatch a job with the correct serial - * to mark failure or to indicate an empty read (EOF). - */ -static void *bgzf_nul_func(void *arg) { return arg; } - -/* - * Takes compressed blocks off the results queue and calls hwrite to - * punt them to the output stream. - * - * Returns NULL when no more are left, or -1 on error - */ -static void *bgzf_mt_writer(void *vp) { - BGZF *fp = (BGZF *)vp; - mtaux_t *mt = fp->mt; - hts_tpool_result *r; - - if (fp->idx_build_otf) { - fp->idx->moffs = fp->idx->noffs = 1; - fp->idx->offs = (bgzidx1_t*) calloc(fp->idx->moffs, sizeof(bgzidx1_t)); - if (!fp->idx->offs) goto err; - } - - // Iterates until result queue is shutdown, where it returns NULL. - while ((r = hts_tpool_next_result_wait(mt->out_queue))) { - bgzf_job *j = (bgzf_job *)hts_tpool_result_data(r); - assert(j); - - if (fp->idx_build_otf) { - fp->idx->noffs++; - if ( fp->idx->noffs > fp->idx->moffs ) - { - fp->idx->moffs = fp->idx->noffs; - kroundup32(fp->idx->moffs); - fp->idx->offs = (bgzidx1_t*) realloc(fp->idx->offs, fp->idx->moffs*sizeof(bgzidx1_t)); - if ( !fp->idx->offs ) goto err; - } - fp->idx->offs[ fp->idx->noffs-1 ].uaddr = fp->idx->offs[ fp->idx->noffs-2 ].uaddr + j->uncomp_len; - fp->idx->offs[ fp->idx->noffs-1 ].caddr = fp->idx->offs[ fp->idx->noffs-2 ].caddr + j->comp_len; - } - - // Flush any cached hts_idx_push calls - if (bgzf_idx_flush(fp) < 0) - goto err; - - if (hwrite(fp->fp, j->comp_data, j->comp_len) != j->comp_len) - goto err; - - // Update our local block_address. Cannot be fp->block_address due to no - // locking in bgzf_tell. - pthread_mutex_lock(&mt->idx_m); - mt->block_address += j->comp_len; - pthread_mutex_unlock(&mt->idx_m); - - /* - * Periodically call hflush (which calls fsync when on a file). - * This avoids the fsync being done at the bgzf_close stage, - * which can sometimes cause significant delays. As this is in - * a separate thread, spreading the sync delays throughout the - * program execution seems better. - * Frequency of 1/512 has been chosen by experimentation - * across local XFS, NFS and Lustre tests. - */ - if (++mt->flush_pending % 512 == 0) - if (hflush(fp->fp) != 0) - goto err; - - - hts_tpool_delete_result(r, 0); - - // Also updated by main thread - pthread_mutex_lock(&mt->job_pool_m); - pool_free(mt->job_pool, j); - mt->jobs_pending--; - pthread_mutex_unlock(&mt->job_pool_m); - } - - if (hflush(fp->fp) != 0) - goto err; - - hts_tpool_process_destroy(mt->out_queue); - - return NULL; - - err: - hts_tpool_process_destroy(mt->out_queue); - return (void *)-1; -} - - -/* - * Reads a compressed block of data using hread and dispatches it to - * the thread pool for decompression. This is the analogue of the old - * non-threaded bgzf_read_block() function, but without modifying fp - * in any way (except for the read offset). All output goes via the - * supplied bgzf_job struct. - * - * Returns NULL when no more are left, or -1 on error - */ -int bgzf_mt_read_block(BGZF *fp, bgzf_job *j) -{ - uint8_t header[BLOCK_HEADER_LENGTH], *compressed_block; - int count, block_length, remaining; - - // NOTE: Guaranteed to be compressed as we block multi-threading in - // uncompressed mode. However it may be gzip compression instead - // of bgzf. - - // Reading compressed file - int64_t block_address; - block_address = htell(fp->fp); - - j->block_address = block_address; // in case we exit with j->errcode - - if (fp->cache_size && load_block_from_cache(fp, block_address)) return 0; - count = hpeek(fp->fp, header, sizeof(header)); - if (count == 0) // no data read - return -1; - int ret; - if ( count != sizeof(header) || (ret=check_header(header))==-2 ) - { - j->errcode |= BGZF_ERR_HEADER; - return -1; - } - if (ret == -1) { - j->errcode |= BGZF_ERR_MT; - return -1; - } - - count = hread(fp->fp, header, sizeof(header)); - if (count != sizeof(header)) // no data read - return -1; - - block_length = unpackInt16((uint8_t*)&header[16]) + 1; // +1 because when writing this number, we used "-1" - if (block_length < BLOCK_HEADER_LENGTH) { - j->errcode |= BGZF_ERR_HEADER; - return -1; - } - compressed_block = (uint8_t*)j->comp_data; - memcpy(compressed_block, header, BLOCK_HEADER_LENGTH); - remaining = block_length - BLOCK_HEADER_LENGTH; - count = hread(fp->fp, &compressed_block[BLOCK_HEADER_LENGTH], remaining); - if (count != remaining) { - j->errcode |= BGZF_ERR_IO; - return -1; - } - j->comp_len = block_length; - j->uncomp_len = BGZF_MAX_BLOCK_SIZE; - j->block_address = block_address; - j->fp = fp; - j->errcode = 0; - - return 0; -} - - -static int bgzf_check_EOF_common(BGZF *fp) -{ - uint8_t buf[28]; - off_t offset = htell(fp->fp); - if (hseek(fp->fp, -28, SEEK_END) < 0) { - if (errno == ESPIPE) { hclearerr(fp->fp); return 2; } -#ifdef _WIN32 - if (errno == EINVAL) { hclearerr(fp->fp); return 2; } -#else - // Assume that EINVAL was due to the file being less than 28 bytes - // long, rather than being a random error return from an hfile backend. - // This should be reported as "no EOF block" rather than an error. - if (errno == EINVAL) { hclearerr(fp->fp); return 0; } -#endif - return -1; - } - if ( hread(fp->fp, buf, 28) != 28 ) return -1; - if ( hseek(fp->fp, offset, SEEK_SET) < 0 ) return -1; - return (memcmp("\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0", buf, 28) == 0)? 1 : 0; -} - -/* - * Checks EOF from the reader thread. - */ -static void bgzf_mt_eof(BGZF *fp) { - mtaux_t *mt = fp->mt; - - pthread_mutex_lock(&mt->job_pool_m); - mt->eof = bgzf_check_EOF_common(fp); - pthread_mutex_unlock(&mt->job_pool_m); - mt->command = HAS_EOF_DONE; - pthread_cond_signal(&mt->command_c); -} - - -/* - * Performs the seek (called by reader thread). - * - * This simply drains the entire queue, throwing away blocks, seeks, - * and starts it up again. Brute force, but maybe sufficient. - */ -static void bgzf_mt_seek(BGZF *fp) { - mtaux_t *mt = fp->mt; - - hts_tpool_process_reset(mt->out_queue, 0); - pthread_mutex_lock(&mt->job_pool_m); - mt->errcode = 0; - - if (hseek(fp->fp, mt->block_address, SEEK_SET) < 0) - mt->errcode = BGZF_ERR_IO; - - pthread_mutex_unlock(&mt->job_pool_m); - mt->command = SEEK_DONE; - pthread_cond_signal(&mt->command_c); -} - -static void *bgzf_mt_reader(void *vp) { - BGZF *fp = (BGZF *)vp; - mtaux_t *mt = fp->mt; - -restart: - pthread_mutex_lock(&mt->job_pool_m); - bgzf_job *j = pool_alloc(mt->job_pool); - pthread_mutex_unlock(&mt->job_pool_m); - if (!j) goto err; - j->errcode = 0; - j->comp_len = 0; - j->uncomp_len = 0; - j->hit_eof = 0; - j->fp = fp; - - while (bgzf_mt_read_block(fp, j) == 0) { - // Dispatch - if (hts_tpool_dispatch3(mt->pool, mt->out_queue, bgzf_decode_func, j, - job_cleanup, job_cleanup, 0) < 0) { - job_cleanup(j); - goto err; - } - - // Check for command - pthread_mutex_lock(&mt->command_m); - switch (mt->command) { - case SEEK: - bgzf_mt_seek(fp); // Sets mt->command to SEEK_DONE - pthread_mutex_unlock(&mt->command_m); - goto restart; - - case HAS_EOF: - bgzf_mt_eof(fp); // Sets mt->command to HAS_EOF_DONE - break; - - case SEEK_DONE: - case HAS_EOF_DONE: - pthread_cond_signal(&mt->command_c); - break; - - case CLOSE: - pthread_cond_signal(&mt->command_c); - pthread_mutex_unlock(&mt->command_m); - hts_tpool_process_destroy(mt->out_queue); - return NULL; - - default: - break; - } - pthread_mutex_unlock(&mt->command_m); - - // Allocate buffer for next block - pthread_mutex_lock(&mt->job_pool_m); - j = pool_alloc(mt->job_pool); - pthread_mutex_unlock(&mt->job_pool_m); - if (!j) { - hts_tpool_process_destroy(mt->out_queue); - return NULL; - } - j->errcode = 0; - j->comp_len = 0; - j->uncomp_len = 0; - j->hit_eof = 0; - j->fp = fp; - } - - if (j->errcode == BGZF_ERR_MT) { - // Attempt to multi-thread decode a raw gzip stream cannot be done. - // We tear down the multi-threaded decoder and revert to the old code. - if (hts_tpool_dispatch3(mt->pool, mt->out_queue, bgzf_nul_func, j, - job_cleanup, job_cleanup, 0) < 0) { - job_cleanup(j); - hts_tpool_process_destroy(mt->out_queue); - return NULL; - } - hts_tpool_process_ref_decr(mt->out_queue); - return &j->errcode; - } - - // Dispatch an empty block so EOF is spotted. - // We also use this mechanism for returning errors, in which case - // j->errcode is set already. - - j->hit_eof = 1; - if (hts_tpool_dispatch3(mt->pool, mt->out_queue, bgzf_nul_func, j, - job_cleanup, job_cleanup, 0) < 0) { - job_cleanup(j); - hts_tpool_process_destroy(mt->out_queue); - return NULL; - } - if (j->errcode != 0) { - hts_tpool_process_destroy(mt->out_queue); - return &j->errcode; - } - - // We hit EOF so can stop reading, but we may get a subsequent - // seek request. In this case we need to restart the reader. - // - // To handle this we wait on a condition variable and then - // monitor the command. (This could be either seek or close.) - for (;;) { - pthread_mutex_lock(&mt->command_m); - if (mt->command == NONE) - pthread_cond_wait(&mt->command_c, &mt->command_m); - switch(mt->command) { - default: - pthread_mutex_unlock(&mt->command_m); - break; - - case SEEK: - bgzf_mt_seek(fp); - pthread_mutex_unlock(&mt->command_m); - goto restart; - - case HAS_EOF: - bgzf_mt_eof(fp); // Sets mt->command to HAS_EOF_DONE - pthread_mutex_unlock(&mt->command_m); - break; - - case SEEK_DONE: - case HAS_EOF_DONE: - pthread_cond_signal(&mt->command_c); - pthread_mutex_unlock(&mt->command_m); - break; - - case CLOSE: - pthread_cond_signal(&mt->command_c); - pthread_mutex_unlock(&mt->command_m); - hts_tpool_process_destroy(mt->out_queue); - return NULL; - } - } - - err: - pthread_mutex_lock(&mt->command_m); - mt->command = CLOSE; - pthread_cond_signal(&mt->command_c); - pthread_mutex_unlock(&mt->command_m); - hts_tpool_process_destroy(mt->out_queue); - return NULL; -} - -int bgzf_thread_pool(BGZF *fp, hts_tpool *pool, int qsize) { - // No gain from multi-threading when not compressed - if (!fp->is_compressed) - return 0; - - mtaux_t *mt; - mt = (mtaux_t*)calloc(1, sizeof(mtaux_t)); - if (!mt) return -1; - fp->mt = mt; - - mt->pool = pool; - mt->n_threads = hts_tpool_size(pool); - if (!qsize) - qsize = mt->n_threads*2; - if (!(mt->out_queue = hts_tpool_process_init(mt->pool, qsize, 0))) - goto err; - hts_tpool_process_ref_incr(mt->out_queue); - - mt->job_pool = pool_create(sizeof(bgzf_job)); - if (!mt->job_pool) - goto err; - - pthread_mutex_init(&mt->job_pool_m, NULL); - pthread_mutex_init(&mt->command_m, NULL); - pthread_mutex_init(&mt->idx_m, NULL); - pthread_cond_init(&mt->command_c, NULL); - mt->flush_pending = 0; - mt->jobs_pending = 0; - mt->free_block = fp->uncompressed_block; // currently in-use block - mt->block_address = fp->block_address; - pthread_create(&mt->io_task, NULL, - fp->is_write ? bgzf_mt_writer : bgzf_mt_reader, fp); - - return 0; - - err: - free(mt); - fp->mt = NULL; - return -1; -} - -int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks) -{ - // No gain from multi-threading when not compressed - if (!fp->is_compressed || fp->is_gzip) - return 0; - - if (n_threads < 1) return -1; - hts_tpool *p = hts_tpool_init(n_threads); - if (!p) - return -1; - - if (bgzf_thread_pool(fp, p, 0) != 0) { - hts_tpool_destroy(p); - return -1; - } - - fp->mt->own_pool = 1; - - return 0; -} - -static int mt_destroy(mtaux_t *mt) -{ - int ret = 0; - - // Tell the reader to shut down - pthread_mutex_lock(&mt->command_m); - mt->command = CLOSE; - pthread_cond_signal(&mt->command_c); - hts_tpool_wake_dispatch(mt->out_queue); // unstick the reader - pthread_mutex_unlock(&mt->command_m); - - // Check for thread worker failure, indicated by is_shutdown returning 2 - // It's possible really late errors might be missed, but we can live with - // that. - ret = -(hts_tpool_process_is_shutdown(mt->out_queue) > 1); - // Destroying the queue first forces the writer to exit. - // mt->out_queue is reference counted, so destroy gets called in both - // this and the IO threads. The last to do it will clean up. - hts_tpool_process_destroy(mt->out_queue); - - // IO thread will now exit. Wait for it and perform final clean-up. - // If it returned non-NULL, it was not happy. - void *retval = NULL; - pthread_join(mt->io_task, &retval); - ret = retval != NULL ? -1 : ret; - - pthread_mutex_destroy(&mt->job_pool_m); - pthread_mutex_destroy(&mt->command_m); - pthread_mutex_destroy(&mt->idx_m); - pthread_cond_destroy(&mt->command_c); - if (mt->curr_job) - pool_free(mt->job_pool, mt->curr_job); - - if (mt->own_pool) - hts_tpool_destroy(mt->pool); - - pool_destroy(mt->job_pool); - - if (mt->idx_cache.e) - free(mt->idx_cache.e); - - free(mt); - fflush(stderr); - - return ret; -} - -static int mt_queue(BGZF *fp) -{ - mtaux_t *mt = fp->mt; - - mt->block_number++; - - // Also updated by writer thread - pthread_mutex_lock(&mt->job_pool_m); - bgzf_job *j = pool_alloc(mt->job_pool); - if (j) mt->jobs_pending++; - pthread_mutex_unlock(&mt->job_pool_m); - if (!j) return -1; - - j->fp = fp; - j->errcode = 0; - j->uncomp_len = fp->block_offset; - if (fp->compress_level == 0) { - memcpy(j->comp_data + BLOCK_HEADER_LENGTH + 5, fp->uncompressed_block, - j->uncomp_len); - if (hts_tpool_dispatch3(mt->pool, mt->out_queue, - bgzf_encode_level0_func, j, - job_cleanup, job_cleanup, 0) < 0) { - goto fail; - } - } else { - memcpy(j->uncomp_data, fp->uncompressed_block, j->uncomp_len); - - // Need non-block vers & job_pending? - if (hts_tpool_dispatch3(mt->pool, mt->out_queue, bgzf_encode_func, j, - job_cleanup, job_cleanup, 0) < 0) { - goto fail; - } - } - - fp->block_offset = 0; - return 0; - - fail: - job_cleanup(j); - pthread_mutex_lock(&mt->job_pool_m); - mt->jobs_pending--; - pthread_mutex_unlock(&mt->job_pool_m); - return -1; -} - -static int mt_flush_queue(BGZF *fp) -{ - mtaux_t *mt = fp->mt; - - // Drain the encoder jobs. - // We cannot use hts_tpool_flush here as it can cause deadlock if - // the queue is full up of decoder tasks. The best solution would - // be to have one input queue per type of job, but we don't right now. - //hts_tpool_flush(mt->pool); - pthread_mutex_lock(&mt->job_pool_m); - int shutdown = 0; - while (mt->jobs_pending != 0) { - if ((shutdown = hts_tpool_process_is_shutdown(mt->out_queue))) - break; - pthread_mutex_unlock(&mt->job_pool_m); - usleep(10000); // FIXME: replace by condition variable - pthread_mutex_lock(&mt->job_pool_m); - } - pthread_mutex_unlock(&mt->job_pool_m); - - if (shutdown) - return -1; - - // Wait on bgzf_mt_writer to drain the queue - if (hts_tpool_process_flush(mt->out_queue) != 0) - return -1; - - return (fp->errcode == 0)? 0 : -1; -} - -static int lazy_flush(BGZF *fp) -{ - if (fp->mt) - return fp->block_offset ? mt_queue(fp) : 0; - else - return bgzf_flush(fp); -} - -#else // ~ #ifdef BGZF_MT - -int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks) -{ - return 0; -} - -static inline int lazy_flush(BGZF *fp) -{ - return bgzf_flush(fp); -} - -#endif // ~ #ifdef BGZF_MT - -int bgzf_flush(BGZF *fp) -{ - if (!fp->is_write) return 0; -#ifdef BGZF_MT - if (fp->mt) { - int ret = 0; - if (fp->block_offset) ret = mt_queue(fp); - if (!ret) ret = mt_flush_queue(fp); - - // We maintain mt->block_address when threading as the - // main code can call bgzf_tell without any locks. - // (The result from tell are wrong, but we only care about the last - // 16-bits worth except for the final flush process. - pthread_mutex_lock(&fp->mt->idx_m); - fp->block_address = fp->mt->block_address; - pthread_mutex_unlock(&fp->mt->idx_m); - - return ret; - } -#endif - while (fp->block_offset > 0) { - int block_length; - if ( fp->idx_build_otf ) - { - bgzf_index_add_block(fp); - fp->idx->ublock_addr += fp->block_offset; - } - block_length = deflate_block(fp, fp->block_offset); - if (block_length < 0) { - hts_log_debug("Deflate block operation failed: %s", bgzf_zerr(block_length, NULL)); - return -1; - } - if (hwrite(fp->fp, fp->compressed_block, block_length) != block_length) { - hts_log_error("File write failed (wrong size)"); - fp->errcode |= BGZF_ERR_IO; // possibly truncated file - return -1; - } - fp->block_address += block_length; - } - return 0; -} - -int bgzf_flush_try(BGZF *fp, ssize_t size) -{ - if (fp->block_offset + size > BGZF_BLOCK_SIZE) return lazy_flush(fp); - return 0; -} - -ssize_t bgzf_write(BGZF *fp, const void *data, size_t length) -{ - if ( !fp->is_compressed ) { - size_t push = length + (size_t) fp->block_offset; - fp->block_offset = push % BGZF_MAX_BLOCK_SIZE; - fp->block_address += (push - fp->block_offset); - return hwrite(fp->fp, data, length); - } - - const uint8_t *input = (const uint8_t*)data; - ssize_t remaining = length; - assert(fp->is_write); - while (remaining > 0) { - uint8_t* buffer = (uint8_t*)fp->uncompressed_block; - int copy_length = BGZF_BLOCK_SIZE - fp->block_offset; - if (copy_length > remaining) copy_length = remaining; - memcpy(buffer + fp->block_offset, input, copy_length); - fp->block_offset += copy_length; - input += copy_length; - remaining -= copy_length; - if (fp->block_offset == BGZF_BLOCK_SIZE) { - if (lazy_flush(fp) != 0) return -1; - } - } - return length - remaining; -} - -ssize_t bgzf_block_write(BGZF *fp, const void *data, size_t length) -{ - if ( !fp->is_compressed ) { - size_t push = length + (size_t) fp->block_offset; - fp->block_offset = push % BGZF_MAX_BLOCK_SIZE; - fp->block_address += (push - fp->block_offset); - return hwrite(fp->fp, data, length); - } - - const uint8_t *input = (const uint8_t*)data; - ssize_t remaining = length; - assert(fp->is_write); - uint64_t current_block; //keep track of current block - uint64_t ublock_size; // amount of uncompressed data to be fed into next block - while (remaining > 0) { - current_block = fp->idx->moffs - fp->idx->noffs; - ublock_size = current_block + 1 < fp->idx->moffs ? fp->idx->offs[current_block+1].uaddr-fp->idx->offs[current_block].uaddr : BGZF_MAX_BLOCK_SIZE; - uint8_t* buffer = (uint8_t*)fp->uncompressed_block; - int copy_length = ublock_size - fp->block_offset; - if (copy_length > remaining) copy_length = remaining; - memcpy(buffer + fp->block_offset, input, copy_length); - fp->block_offset += copy_length; - input += copy_length; - remaining -= copy_length; - if (fp->block_offset == ublock_size) { - if (lazy_flush(fp) != 0) return -1; - if (fp->idx->noffs > 0) - fp->idx->noffs--; // decrement noffs to track the blocks - } - } - return length - remaining; -} - - -ssize_t bgzf_raw_write(BGZF *fp, const void *data, size_t length) -{ - ssize_t ret = hwrite(fp->fp, data, length); - if (ret < 0) fp->errcode |= BGZF_ERR_IO; - return ret; -} - -// Helper function for tidying up fp->mt and setting errcode -static void bgzf_close_mt(BGZF *fp) { - if (fp->mt) { - if (!fp->mt->free_block) - fp->uncompressed_block = NULL; - if (mt_destroy(fp->mt) < 0) - fp->errcode = BGZF_ERR_IO; - } -} - -int bgzf_close(BGZF* fp) -{ - int ret, block_length; - if (fp == 0) return -1; - if (fp->is_write && fp->is_compressed) { - if (bgzf_flush(fp) != 0) { - bgzf_close_mt(fp); - return -1; - } - fp->compress_level = -1; - block_length = deflate_block(fp, 0); // write an empty block - if (block_length < 0) { - hts_log_debug("Deflate block operation failed: %s", bgzf_zerr(block_length, NULL)); - bgzf_close_mt(fp); - return -1; - } - if (hwrite(fp->fp, fp->compressed_block, block_length) < 0 - || hflush(fp->fp) != 0) { - hts_log_error("File write failed"); - fp->errcode |= BGZF_ERR_IO; - return -1; - } - } - - bgzf_close_mt(fp); - - if ( fp->is_gzip ) - { - if (fp->gz_stream == NULL) ret = Z_OK; - else if (!fp->is_write) ret = inflateEnd(fp->gz_stream); - else ret = deflateEnd(fp->gz_stream); - if (ret != Z_OK) { - hts_log_error("Call to inflateEnd/deflateEnd failed: %s", bgzf_zerr(ret, NULL)); - } - free(fp->gz_stream); - } - ret = hclose(fp->fp); - if (ret != 0) return -1; - bgzf_index_destroy(fp); - free(fp->uncompressed_block); - free_cache(fp); - ret = fp->errcode ? -1 : 0; - free(fp); - return ret; -} - -void bgzf_set_cache_size(BGZF *fp, int cache_size) -{ - if (fp && fp->mt) return; // Not appropriate when multi-threading - if (fp && fp->cache) fp->cache_size = cache_size; -} - -int bgzf_check_EOF(BGZF *fp) { - int has_eof; - - if (fp->mt) { - pthread_mutex_lock(&fp->mt->command_m); - // fp->mt->command state transitions should be: - // NONE -> HAS_EOF -> HAS_EOF_DONE -> NONE - // (HAS_EOF -> HAS_EOF_DONE happens in bgzf_mt_reader thread) - if (fp->mt->command != CLOSE) - fp->mt->command = HAS_EOF; - pthread_cond_signal(&fp->mt->command_c); - hts_tpool_wake_dispatch(fp->mt->out_queue); - do { - if (fp->mt->command == CLOSE) { - // possible error in bgzf_mt_reader - pthread_mutex_unlock(&fp->mt->command_m); - return 0; - } - pthread_cond_wait(&fp->mt->command_c, &fp->mt->command_m); - switch (fp->mt->command) { - case HAS_EOF_DONE: break; - case HAS_EOF: - // Resend signal intended for bgzf_mt_reader() - pthread_cond_signal(&fp->mt->command_c); - break; - case CLOSE: - continue; - default: - abort(); // Should not get to any other state - } - } while (fp->mt->command != HAS_EOF_DONE); - fp->mt->command = NONE; - has_eof = fp->mt->eof; - pthread_mutex_unlock(&fp->mt->command_m); - } else { - has_eof = bgzf_check_EOF_common(fp); - } - - fp->no_eof_block = (has_eof == 0); - - return has_eof; -} - -static inline int64_t bgzf_seek_common(BGZF* fp, - int64_t block_address, int block_offset) -{ - if (fp->mt) { - // The reader runs asynchronous and does loops of: - // Read block - // Check & process command - // Dispatch decode job - // - // Once at EOF it then switches to loops of - // Wait for command - // Process command (possibly switching back to above loop). - // - // To seek we therefore send the reader thread a SEEK command, - // waking it up if blocked in dispatch and signalling if - // waiting for a command. We then wait for the response so we - // know the seek succeeded. - pthread_mutex_lock(&fp->mt->command_m); - fp->mt->hit_eof = 0; - // fp->mt->command state transitions should be: - // NONE -> SEEK -> SEEK_DONE -> NONE - // (SEEK -> SEEK_DONE happens in bgzf_mt_reader thread) - fp->mt->command = SEEK; - fp->mt->block_address = block_address; - pthread_cond_signal(&fp->mt->command_c); - hts_tpool_wake_dispatch(fp->mt->out_queue); - do { - pthread_cond_wait(&fp->mt->command_c, &fp->mt->command_m); - switch (fp->mt->command) { - case SEEK_DONE: break; - case SEEK: - // Resend signal intended for bgzf_mt_reader() - pthread_cond_signal(&fp->mt->command_c); - break; - default: - abort(); // Should not get to any other state - } - } while (fp->mt->command != SEEK_DONE); - fp->mt->command = NONE; - - fp->block_length = 0; // indicates current block has not been loaded - fp->block_address = block_address; - fp->block_offset = block_offset; - - pthread_mutex_unlock(&fp->mt->command_m); - } else { - if (hseek(fp->fp, block_address, SEEK_SET) < 0) { - fp->errcode |= BGZF_ERR_IO; - return -1; - } - fp->block_length = 0; // indicates current block has not been loaded - fp->block_address = block_address; - fp->block_offset = block_offset; - } - - return 0; -} - -int64_t bgzf_seek(BGZF* fp, int64_t pos, int where) -{ - if (fp->is_write || where != SEEK_SET || fp->is_gzip) { - fp->errcode |= BGZF_ERR_MISUSE; - return -1; - } - - // This is a flag to indicate we've jumped elsewhere in the stream, to act - // as a hint to any other code which is wrapping up bgzf for its own - // purposes. We may not be able to tell when seek happens as it can be - // done on our behalf, eg by the iterator. - // - // This is never cleared here. Any tool that needs to handle it is also - // responsible for clearing it. - fp->seeked = pos; - - return bgzf_seek_common(fp, pos >> 16, pos & 0xFFFF); -} - -int bgzf_is_bgzf(const char *fn) -{ - uint8_t buf[16]; - int n; - hFILE *fp; - if ((fp = hopen(fn, "r")) == 0) return 0; - n = hread(fp, buf, 16); - if (hclose(fp) < 0) return 0; - if (n != 16) return 0; - return check_header(buf) == 0? 1 : 0; -} - -int bgzf_compression(BGZF *fp) -{ - return (!fp->is_compressed)? no_compression : (fp->is_gzip)? gzip : bgzf; -} - -int bgzf_getc(BGZF *fp) -{ - if (fp->block_offset+1 < fp->block_length) { - fp->uncompressed_address++; - return ((unsigned char*)fp->uncompressed_block)[fp->block_offset++]; - } - - int c; - if (fp->block_offset >= fp->block_length) { - if (bgzf_read_block(fp) != 0) return -2; /* error */ - if (fp->block_length == 0) return -1; /* end-of-file */ - } - c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++]; - if (fp->block_offset == fp->block_length) { - fp->block_address = bgzf_htell(fp); - fp->block_offset = 0; - fp->block_length = 0; - } - fp->uncompressed_address++; - return c; -} - -int bgzf_getline(BGZF *fp, int delim, kstring_t *str) -{ - int l, state = 0; - str->l = 0; - do { - if (fp->block_offset >= fp->block_length) { - if (bgzf_read_block(fp) != 0) { state = -2; break; } - if (fp->block_length == 0) { state = -1; break; } - } - unsigned char *buf = fp->uncompressed_block; - for (l = fp->block_offset; l < fp->block_length && buf[l] != delim; ++l); - if (l < fp->block_length) state = 1; - l -= fp->block_offset; - if (ks_expand(str, l + 2) < 0) { state = -3; break; } - memcpy(str->s + str->l, buf + fp->block_offset, l); - str->l += l; - fp->block_offset += l + 1; - if (fp->block_offset >= fp->block_length) { - fp->block_address = bgzf_htell(fp); - fp->block_offset = 0; - fp->block_length = 0; - } - } while (state == 0); - if (state < -1) return state; - if (str->l == 0 && state < 0) return state; - fp->uncompressed_address += str->l + 1; - if ( delim=='\n' && str->l>0 && str->s[str->l-1]=='\r' ) str->l--; - str->s[str->l] = 0; - return str->l <= INT_MAX ? (int) str->l : INT_MAX; -} - -void bgzf_index_destroy(BGZF *fp) -{ - if ( !fp->idx ) return; - free(fp->idx->offs); - free(fp->idx); - fp->idx = NULL; - fp->idx_build_otf = 0; -} - -int bgzf_index_build_init(BGZF *fp) -{ - bgzf_index_destroy(fp); - fp->idx = (bgzidx_t*) calloc(1,sizeof(bgzidx_t)); - if ( !fp->idx ) return -1; - fp->idx_build_otf = 1; // build index on the fly - return 0; -} - -int bgzf_index_add_block(BGZF *fp) -{ - fp->idx->noffs++; - if ( fp->idx->noffs > fp->idx->moffs ) - { - fp->idx->moffs = fp->idx->noffs; - kroundup32(fp->idx->moffs); - fp->idx->offs = (bgzidx1_t*) realloc(fp->idx->offs, fp->idx->moffs*sizeof(bgzidx1_t)); - if ( !fp->idx->offs ) return -1; - } - fp->idx->offs[ fp->idx->noffs-1 ].uaddr = fp->idx->ublock_addr; - fp->idx->offs[ fp->idx->noffs-1 ].caddr = fp->block_address; - return 0; -} - -static inline int hwrite_uint64(uint64_t x, hFILE *f) -{ - if (ed_is_big()) x = ed_swap_8(x); - if (hwrite(f, &x, sizeof(x)) != sizeof(x)) return -1; - return 0; -} - -static char * get_name_suffix(const char *bname, const char *suffix) -{ - size_t len = strlen(bname) + strlen(suffix) + 1; - char *buff = malloc(len); - if (!buff) return NULL; - snprintf(buff, len, "%s%s", bname, suffix); - return buff; -} - -int bgzf_index_dump_hfile(BGZF *fp, struct hFILE *idx, const char *name) -{ - // Note that the index contains one extra record when indexing files opened - // for reading. The terminating record is not present when opened for writing. - // This is not a bug. - - int i; - - if (!fp->idx) { - hts_log_error("Called for BGZF handle with no index"); - errno = EINVAL; - return -1; - } - - if (bgzf_flush(fp) != 0) return -1; - - // discard the entry marking the end of the file - if (fp->mt && fp->idx) - fp->idx->noffs--; - - if (hwrite_uint64(fp->idx->noffs - 1, idx) < 0) goto fail; - for (i=1; iidx->noffs; i++) - { - if (hwrite_uint64(fp->idx->offs[i].caddr, idx) < 0) goto fail; - if (hwrite_uint64(fp->idx->offs[i].uaddr, idx) < 0) goto fail; - } - return 0; - - fail: - hts_log_error("Error writing to %s : %s", name ? name : "index", strerror(errno)); - return -1; -} - -int bgzf_index_dump(BGZF *fp, const char *bname, const char *suffix) -{ - const char *name = bname, *msg = NULL; - char *tmp = NULL; - hFILE *idx = NULL; - - if (!fp->idx) { - hts_log_error("Called for BGZF handle with no index"); - errno = EINVAL; - return -1; - } - - if ( suffix ) - { - tmp = get_name_suffix(bname, suffix); - if ( !tmp ) return -1; - name = tmp; - } - - idx = hopen(name, "wb"); - if ( !idx ) { - msg = "Error opening"; - goto fail; - } - - if (bgzf_index_dump_hfile(fp, idx, name) != 0) goto fail; - - if (hclose(idx) < 0) - { - idx = NULL; - msg = "Error on closing"; - goto fail; - } - - free(tmp); - return 0; - - fail: - if (msg != NULL) { - hts_log_error("%s %s : %s", msg, name, strerror(errno)); - } - if (idx) hclose_abruptly(idx); - free(tmp); - return -1; -} - -static inline int hread_uint64(uint64_t *xptr, hFILE *f) -{ - if (hread(f, xptr, sizeof(*xptr)) != sizeof(*xptr)) return -1; - if (ed_is_big()) ed_swap_8p(xptr); - return 0; -} - -int bgzf_index_load_hfile(BGZF *fp, struct hFILE *idx, const char *name) -{ - fp->idx = (bgzidx_t*) calloc(1,sizeof(bgzidx_t)); - if (fp->idx == NULL) goto fail; - uint64_t x; - if (hread_uint64(&x, idx) < 0) goto fail; - - fp->idx->noffs = fp->idx->moffs = x + 1; - fp->idx->offs = (bgzidx1_t*) malloc(fp->idx->moffs*sizeof(bgzidx1_t)); - if (fp->idx->offs == NULL) goto fail; - fp->idx->offs[0].caddr = fp->idx->offs[0].uaddr = 0; - - int i; - for (i=1; iidx->noffs; i++) - { - if (hread_uint64(&fp->idx->offs[i].caddr, idx) < 0) goto fail; - if (hread_uint64(&fp->idx->offs[i].uaddr, idx) < 0) goto fail; - } - - return 0; - - fail: - hts_log_error("Error reading %s : %s", name ? name : "index", strerror(errno)); - if (fp->idx) { - free(fp->idx->offs); - free(fp->idx); - fp->idx = NULL; - } - return -1; -} - -int bgzf_index_load(BGZF *fp, const char *bname, const char *suffix) -{ - const char *name = bname, *msg = NULL; - char *tmp = NULL; - hFILE *idx = NULL; - if ( suffix ) - { - tmp = get_name_suffix(bname, suffix); - if ( !tmp ) return -1; - name = tmp; - } - - idx = hopen(name, "rb"); - if ( !idx ) { - msg = "Error opening"; - goto fail; - } - - if (bgzf_index_load_hfile(fp, idx, name) != 0) goto fail; - - if (hclose(idx) != 0) { - idx = NULL; - msg = "Error closing"; - goto fail; - } - - free(tmp); - return 0; - - fail: - if (msg != NULL) { - hts_log_error("%s %s : %s", msg, name, strerror(errno)); - } - if (idx) hclose_abruptly(idx); - free(tmp); - return -1; -} - -int bgzf_useek(BGZF *fp, off_t uoffset, int where) -{ - if (fp->is_write || where != SEEK_SET || fp->is_gzip) { - fp->errcode |= BGZF_ERR_MISUSE; - return -1; - } - if (uoffset >= fp->uncompressed_address - fp->block_offset && - uoffset < fp->uncompressed_address + fp->block_length - fp->block_offset) { - // Can seek into existing data - fp->block_offset += uoffset - fp->uncompressed_address; - fp->uncompressed_address = uoffset; - return 0; - } - if ( !fp->is_compressed ) - { - if (hseek(fp->fp, uoffset, SEEK_SET) < 0) - { - fp->errcode |= BGZF_ERR_IO; - return -1; - } - fp->block_length = 0; // indicates current block has not been loaded - fp->block_address = uoffset; - fp->block_offset = 0; - if (bgzf_read_block(fp) < 0) { - fp->errcode |= BGZF_ERR_IO; - return -1; - } - fp->uncompressed_address = uoffset; - return 0; - } - - if ( !fp->idx ) - { - fp->errcode |= BGZF_ERR_IO; - return -1; - } - - // binary search - int ilo = 0, ihi = fp->idx->noffs - 1; - while ( ilo<=ihi ) - { - int i = (ilo+ihi)*0.5; - if ( uoffset < fp->idx->offs[i].uaddr ) ihi = i - 1; - else if ( uoffset >= fp->idx->offs[i].uaddr ) ilo = i + 1; - else break; - } - int i = ilo-1; - if (bgzf_seek_common(fp, fp->idx->offs[i].caddr, 0) < 0) - return -1; - - if ( bgzf_read_block(fp) < 0 ) { - fp->errcode |= BGZF_ERR_IO; - return -1; - } - if ( uoffset - fp->idx->offs[i].uaddr > 0 ) - { - fp->block_offset = uoffset - fp->idx->offs[i].uaddr; - assert( fp->block_offset <= fp->block_length ); // todo: skipped, unindexed, blocks - } - fp->uncompressed_address = uoffset; - return 0; -} - -off_t bgzf_utell(BGZF *fp) -{ - return fp->uncompressed_address; // currently maintained only when reading -} - -/* prototype is in hfile_internal.h */ -struct hFILE *bgzf_hfile(struct BGZF *fp) { - return fp->fp; -} diff --git a/src/htslib-1.18/bgzip.1 b/src/htslib-1.18/bgzip.1 deleted file mode 100644 index b1950d2..0000000 --- a/src/htslib-1.18/bgzip.1 +++ /dev/null @@ -1,197 +0,0 @@ -.TH bgzip 1 "25 July 2023" "htslib-1.18" "Bioinformatics tools" -.SH NAME -.PP -bgzip \- Block compression/decompression utility -.\" -.\" Copyright (C) 2009-2011 Broad Institute. -.\" Copyright (C) 2018, 2021-2022 Genome Research Limited. -.\" -.\" Author: Heng Li -.\" -.\" Permission is hereby granted, free of charge, to any person obtaining a -.\" copy of this software and associated documentation files (the "Software"), -.\" to deal in the Software without restriction, including without limitation -.\" the rights to use, copy, modify, merge, publish, distribute, sublicense, -.\" and/or sell copies of the Software, and to permit persons to whom the -.\" Software is furnished to do so, subject to the following conditions: -.\" -.\" The above copyright notice and this permission notice shall be included in -.\" all copies or substantial portions of the Software. -.\" -.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -.\" IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -.\" FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -.\" THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -.\" LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -.\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -.\" DEALINGS IN THE SOFTWARE. -.\" -. -.\" For code blocks and examples (cf groff's Ultrix-specific man macros) -.de EX - -. in +\\$1 -. nf -. ft CR -.. -.de EE -. ft -. fi -. in - -.. -.SH SYNOPSIS -.PP -.B bgzip -.RB [ -cdfhikrt ] -.RB [ -b -.IR virtualOffset ] -.RB [ -I -.IR index_name ] -.RB [ -l -.IR compression_level ] -.RB [ -s -.IR size ] -.RB [ -@ -.IR threads ] -.RI [ file ] -.PP -.SH DESCRIPTION -.PP -Bgzip compresses files in a similar manner to, and compatible with, gzip(1). -The file is compressed into a series of small (less than 64K) 'BGZF' blocks. -This allows indexes to be built against the compressed file and used to -retrieve portions of the data without having to decompress the entire file. - -If no files are specified on the command line, bgzip will compress (or -decompress if the -d option is used) standard input to standard output. -If a file is specified, it will be compressed (or decompressed with -d). -If the -c option is used, the result will be written to standard output, -otherwise when compressing bgzip will write to a new file with a .gz -suffix and remove the original. When decompressing the input file must -have a .gz suffix, which will be removed to make the output name. Again -after decompression completes the input file will be removed. - -.SH OPTIONS -.TP 10 -.B "--binary" -Bgzip will attempt to ensure BGZF blocks end on a newline when the -input is a text file. The exception to this is where a single line is -larger than a BGZF block (64Kb). This can aid tools that use the -index to perform random access on the compressed stream, as the start -of a block is likely to also be the start of a text record. - -This option processes text files as if they were binary content, -ignoring the location of newlines. This also restores the behaviour -for text files to bgzip version 1.15 and earlier. -.TP -.BI "-b, --offset " INT -Decompress to standard output from virtual file position (0-based uncompressed -offset). -Implies -c and -d. -.TP -.B "-c, --stdout" -Write to standard output, keep original files unchanged. -.TP -.B "-d, --decompress" -Decompress. -.TP -.B "-f, --force" -Overwrite files without asking, or decompress files that don't have a known -compression filename extension (e.g., \fI.gz\fR) without asking. -Use \fB--force\fR twice to do both without asking. -.TP -.B "-g, --rebgzip" -Try to use an existing index to create a compressed file with matching -block offsets. The index must be specified using the \fB-I -\fIfile.gzi\fR option. -Note that this assumes that the same compression library and level are in use -as when making the original file. -Don't use it unless you know what you're doing. -.TP -.B "-h, --help" -Displays a help message. -.TP -.B "-i, --index" -Create a BGZF index while compressing. -Unless the -I option is used, this will have the name of the compressed -file with .gzi appended to it. -.TP -.BI "-I, --index-name " FILE -Index file name. -.TP -.B "-k, --keep" -Do not delete input file during operation. -.TP -.BI "-l, --compress-level " INT -Compression level to use when compressing. -From 0 to 9, or -1 for the default level set by the compression library. [-1] -.TP -.B "-r, --reindex" -Rebuild the index on an existing compressed file. -.TP -.BI "-s, --size " INT -Decompress INT bytes (uncompressed size) to standard output. -Implies -c. -.TP -.B "-t, --test" -Test the intregrity of the compressed file. -.TP -.BI "-@, --threads " INT -Number of threads to use [1]. -.PP - -.SH BGZF FORMAT -The BGZF format written by bgzip is described in the SAM format specification -available from http://samtools.github.io/hts-specs/SAMv1.pdf. - -It makes use of a gzip feature which allows compressed files to be -concatenated. -The input data is divided into blocks which are no larger than 64 kilobytes -both before and after compression (including compression headers). -Each block is compressed into a gzip file. -The gzip header includes an extra sub-field with identifier 'BC' and the length -of the compressed block, including all headers. - -.SH GZI FORMAT -The index format is a binary file listing pairs of compressed and -uncompressed offsets in a BGZF file. -Each compressed offset points to the start of a BGZF block. -The uncompressed offset is the corresponding location in the uncompressed -data stream. - -All values are stored as little-endian 64-bit unsigned integers. - -The file contents are: -.EX 4 -uint64_t number_entries -.EE -followed by number_entries pairs of: -.EX 4 -uint64_t compressed_offset -uint64_t uncompressed_offset -.EE - -.SH EXAMPLES -.EX 4 -# Compress stdin to stdout -bgzip < /usr/share/dict/words > /tmp/words.gz - -# Make a .gzi index -bgzip -r /tmp/words.gz - -# Extract part of the data using the index -bgzip -b 367635 -s 4 /tmp/words.gz - -# Uncompress the whole file, removing the compressed copy -bgzip -d /tmp/words.gz -.EE - -.SH AUTHOR -.PP -The BGZF library was originally implemented by Bob Handsaker and modified -by Heng Li for remote file access and in-memory caching. - -.SH SEE ALSO -.IR gzip (1), -.IR tabix (1) diff --git a/src/htslib-1.18/bgzip.c b/src/htslib-1.18/bgzip.c deleted file mode 100644 index 589f79f..0000000 --- a/src/htslib-1.18/bgzip.c +++ /dev/null @@ -1,516 +0,0 @@ -/* bgzip.c -- Block compression/decompression utility. - - Copyright (C) 2008, 2009 Broad Institute / Massachusetts Institute of Technology - Copyright (C) 2010, 2013-2019, 2021-2022 Genome Research Ltd. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notices and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. -*/ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "htslib/bgzf.h" -#include "htslib/hts.h" -#include "htslib/hfile.h" - -#ifdef _WIN32 -# define WIN32_LEAN_AND_MEAN -# include -#endif - -static const int WINDOW_SIZE = BGZF_BLOCK_SIZE; - -static void error(const char *format, ...) -{ - va_list ap; - va_start(ap, format); - vfprintf(stderr, format, ap); - va_end(ap); - exit(EXIT_FAILURE); -} - -static int ask_yn() -{ - char line[1024]; - if (fgets(line, sizeof line, stdin) == NULL) - return 0; - return line[0] == 'Y' || line[0] == 'y'; -} - -static int confirm_overwrite(const char *fn) -{ - int save_errno = errno; - int ret = 0; - - if (isatty(STDIN_FILENO)) { - fprintf(stderr, "[bgzip] %s already exists; do you wish to overwrite (y or n)? ", fn); - if (ask_yn()) ret = 1; - } - - errno = save_errno; - return ret; -} - -static int known_extension(const char *ext) -{ - static const char *known[] = { - "gz", "bgz", "bgzf", - NULL - }; - - const char **p; - for (p = known; *p; p++) - if (strcasecmp(ext, *p) == 0) return 1; - return 0; -} - -static int confirm_filename(int *is_forced, const char *name, const char *ext) -{ - if (*is_forced) { - (*is_forced)--; - return 1; - } - - if (!isatty(STDIN_FILENO)) - return 0; - - fprintf(stderr, "[bgzip] .%s is not a known extension; do you wish to decompress to %s (y or n)? ", ext, name); - return ask_yn(); -} - -static int bgzip_main_usage(FILE *fp, int status) -{ - fprintf(fp, "\n"); - fprintf(fp, "Version: %s\n", hts_version()); - fprintf(fp, "Usage: bgzip [OPTIONS] [FILE] ...\n"); - fprintf(fp, "Options:\n"); - fprintf(fp, " -b, --offset INT decompress at virtual file pointer (0-based uncompressed offset)\n"); - fprintf(fp, " -c, --stdout write on standard output, keep original files unchanged\n"); - fprintf(fp, " -d, --decompress decompress\n"); - fprintf(fp, " -f, --force overwrite files without asking\n"); - fprintf(fp, " -g, --rebgzip use an index file to bgzip a file\n"); - fprintf(fp, " -h, --help give this help\n"); - fprintf(fp, " -i, --index compress and create BGZF index\n"); - fprintf(fp, " -I, --index-name FILE name of BGZF index file [file.gz.gzi]\n"); - fprintf(fp, " -k, --keep don't delete input files during operation\n"); - fprintf(fp, " -l, --compress-level INT Compression level to use when compressing; 0 to 9, or -1 for default [-1]\n"); - fprintf(fp, " -r, --reindex (re)index compressed file\n"); - fprintf(fp, " -s, --size INT decompress INT bytes (uncompressed size)\n"); - fprintf(fp, " -t, --test test integrity of compressed file\n"); - fprintf(fp, " --binary Don't align blocks with text lines\n"); - fprintf(fp, " -@, --threads INT number of compression threads to use [1]\n"); - return status; -} - -int main(int argc, char **argv) -{ - int c, compress, compress_level = -1, pstdout, is_forced, test, index = 0, rebgzip = 0, reindex = 0, keep, binary; - BGZF *fp; - char *buffer; - long start, end, size; - char *index_fname = NULL; - int threads = 1; - - static const struct option loptions[] = - { - {"help", no_argument, NULL, 'h'}, - {"offset", required_argument, NULL, 'b'}, - {"stdout", no_argument, NULL, 'c'}, - {"decompress", no_argument, NULL, 'd'}, - {"force", no_argument, NULL, 'f'}, - {"index", no_argument, NULL, 'i'}, - {"index-name", required_argument, NULL, 'I'}, - {"compress-level", required_argument, NULL, 'l'}, - {"reindex", no_argument, NULL, 'r'}, - {"rebgzip",no_argument,NULL,'g'}, - {"size", required_argument, NULL, 's'}, - {"threads", required_argument, NULL, '@'}, - {"test", no_argument, NULL, 't'}, - {"version", no_argument, NULL, 1}, - {"keep", no_argument, NULL, 'k'}, - {"binary", no_argument, NULL, 2}, - {NULL, 0, NULL, 0} - }; - - compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0; test = 0; keep = 0; binary = 0; - while((c = getopt_long(argc, argv, "cdh?fb:@:s:iI:l:grtk",loptions,NULL)) >= 0){ - switch(c){ - case 'd': compress = 0; break; - case 'c': pstdout = 1; break; - case 'b': start = atol(optarg); compress = 0; pstdout = 1; break; - case 's': size = atol(optarg); pstdout = 1; break; - case 'f': is_forced++; break; - case 'i': index = 1; break; - case 'I': index_fname = optarg; break; - case 'l': compress_level = atol(optarg); break; - case 'g': rebgzip = 1; break; - case 'r': reindex = 1; compress = 0; break; - case '@': threads = atoi(optarg); break; - case 't': test = 1; compress = 0; reindex = 0; break; - case 'k': keep = 1; break; - case 1: - printf( -"bgzip (htslib) %s\n" -"Copyright (C) 2023 Genome Research Ltd.\n", hts_version()); - return EXIT_SUCCESS; - case 2: binary = 1; break; - case 'h': return bgzip_main_usage(stdout, EXIT_SUCCESS); - case '?': return bgzip_main_usage(stderr, EXIT_FAILURE); - } - } - if (size >= 0) end = start + size; - if (end >= 0 && end < start) { - fprintf(stderr, "[bgzip] Illegal region: [%ld, %ld]\n", start, end); - return 1; - } - if (compress == 1) { - hFILE* f_src = NULL; - char out_mode[3] = "w\0"; - char out_mode_exclusive[4] = "wx\0"; - - if (compress_level < -1 || compress_level > 9) { - fprintf(stderr, "[bgzip] Invalid compress-level: %d\n", compress_level); - return 1; - } - if (compress_level >= 0) { - out_mode[1] = compress_level + '0'; - out_mode_exclusive[2] = compress_level + '0'; - } - - if (!(f_src = hopen(argc > optind ? argv[optind] : "-", "r"))) { - fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]); - return 1; - } - - if ( argc>optind ) - { - if (pstdout) - fp = bgzf_open("-", out_mode); - else - { - char *name = malloc(strlen(argv[optind]) + 5); - strcpy(name, argv[optind]); - strcat(name, ".gz"); - fp = bgzf_open(name, is_forced? out_mode : out_mode_exclusive); - if (fp == NULL && errno == EEXIST && confirm_overwrite(name)) - fp = bgzf_open(name, out_mode); - if (fp == NULL) { - fprintf(stderr, "[bgzip] can't create %s: %s\n", name, strerror(errno)); - free(name); - return 1; - } - free(name); - } - } - else if (!pstdout && isatty(fileno((FILE *)stdout)) ) - return bgzip_main_usage(stderr, EXIT_FAILURE); - else if ( index && !index_fname ) - { - fprintf(stderr, "[bgzip] Index file name expected when writing to stdout\n"); - return 1; - } - else - fp = bgzf_open("-", out_mode); - - if ( index && rebgzip ) - { - fprintf(stderr, "[bgzip] Can't produce a index and rebgzip simultaneously\n"); - return 1; - } - - if ( rebgzip && !index_fname ) - { - fprintf(stderr, "[bgzip] Index file name expected when writing to stdout. See -I option.\n"); - return 1; - } - - if ( index ) bgzf_index_build_init(fp); - if (threads > 1) - bgzf_mt(fp, threads, 256); - - buffer = malloc(WINDOW_SIZE); - if (!buffer) - return 1; - if (rebgzip){ - if ( bgzf_index_load(fp, index_fname, NULL) < 0 ) error("Could not load index: %s.gzi\n", argv[optind]); - - while ((c = hread(f_src, buffer, WINDOW_SIZE)) > 0) - if (bgzf_block_write(fp, buffer, c) < 0) error("Could not write %d bytes: Error %d\n", c, fp->errcode); - } - else { - htsFormat fmt; - int textual = 0; - if (!binary - && hts_detect_format(f_src, &fmt) == 0 - && fmt.compression == no_compression) { - switch(fmt.format) { - case text_format: - case sam: - case vcf: - case bed: - case fasta_format: - case fastq_format: - case fai_format: - case fqi_format: - textual = 1; - break; - default: break; // silence clang warnings - } - } - - if (binary || !textual) { - // Binary data, either detected or explicit - while ((c = hread(f_src, buffer, WINDOW_SIZE)) > 0) - if (bgzf_write(fp, buffer, c) < 0) - error("Could not write %d bytes: Error %d\n", - c, fp->errcode); - } else { - /* Text mode, try a flush after a newline */ - int in_header = 1, n = 0, long_line = 0; - while ((c = hread(f_src, buffer+n, WINDOW_SIZE-n)) > 0) { - int c2 = c+n; - int flush = 0; - if (in_header && - (long_line || buffer[0] == '@' || buffer[0] == '#')) { - // Scan forward to find the last header line. - int last_start = 0; - n = 0; - while (n < c2) { - if (buffer[n++] != '\n') - continue; - - last_start = n; - if (n < c2 && - !(buffer[n] == '@' || buffer[n] == '#')) { - in_header = 0; - break; - } - } - if (!last_start) { - n = c2; - long_line = 1; - } else { - n = last_start; - flush = 1; - long_line = 0; - } - } else { - // Scan backwards to find the last newline. - n += c; // c read plus previous n overflow - while (--n >= 0 && ((char *)buffer)[n] != '\n') - ; - - if (n >= 0) { - flush = 1; - n++; - } else { - n = c2; - } - } - - // Pos n is either at the end of the buffer with flush==0, - // or the first byte after a newline and a flush point. - if (bgzf_write(fp, buffer, n) < 0) - error("Could not write %d bytes: Error %d\n", - n, fp->errcode); - if (flush) - if (bgzf_flush_try(fp, 65536) < 0) // force - return -1; - - memmove(buffer, buffer+n, c2-n); - n = c2-n; - } - - // Trailing data. - if (bgzf_write(fp, buffer, n) < 0) - error("Could not write %d bytes: Error %d\n", - n, fp->errcode); - } - } - if ( index ) - { - if (index_fname) { - if (bgzf_index_dump(fp, index_fname, NULL) < 0) - error("Could not write index to '%s'\n", index_fname); - } else { - if (bgzf_index_dump(fp, argv[optind], ".gz.gzi") < 0) - error("Could not write index to '%s.gz.gzi'\n", - argv[optind]); - } - } - if (bgzf_close(fp) < 0) - error("Output close failed: Error %d\n", fp->errcode); - if (hclose(f_src) < 0) - error("Input close failed\n"); - if (argc > optind && !pstdout && !keep) unlink(argv[optind]); - free(buffer); - return 0; - } - else if ( reindex ) - { - if ( argc>optind ) - { - fp = bgzf_open(argv[optind], "r"); - if ( !fp ) error("[bgzip] Could not open file: %s\n", argv[optind]); - } - else - { - if ( !index_fname ) error("[bgzip] Index file name expected when reading from stdin\n"); - fp = bgzf_open("-", "r"); - if ( !fp ) error("[bgzip] Could not read from stdin: %s\n", strerror(errno)); - } - - buffer = malloc(BGZF_BLOCK_SIZE); - bgzf_index_build_init(fp); - int ret; - while ( (ret=bgzf_read(fp, buffer, BGZF_BLOCK_SIZE))>0 ) ; - free(buffer); - if ( ret<0 ) error("Is the file gzipped or bgzipped? The latter is required for indexing.\n"); - - if ( index_fname ) { - if (bgzf_index_dump(fp, index_fname, NULL) < 0) - error("Could not write index to '%s'\n", index_fname); - } else { - if (bgzf_index_dump(fp, argv[optind], ".gzi") < 0) - error("Could not write index to '%s.gzi'\n", argv[optind]); - } - - if ( bgzf_close(fp)<0 ) error("Close failed: Error %d\n",fp->errcode); - return 0; - } - else - { - int f_dst; - - if ( argc>optind ) - { - fp = bgzf_open(argv[optind], "r"); - if (fp == NULL) { - fprintf(stderr, "[bgzip] Could not open %s: %s\n", argv[optind], strerror(errno)); - return 1; - } - if (bgzf_compression(fp) == no_compression) { - fprintf(stderr, "[bgzip] %s: not a compressed file -- ignored\n", argv[optind]); - bgzf_close(fp); - return 1; - } - - if (pstdout || test) { - f_dst = fileno(stdout); - } - else { - const int wrflags = O_WRONLY | O_CREAT | O_TRUNC; - char *name = argv[optind], *ext; - size_t pos; - for (pos = strlen(name); pos > 0; --pos) - if (name[pos] == '.' || name[pos] == '/') break; - if (pos == 0 || name[pos] != '.') { - fprintf(stderr, "[bgzip] can't remove an extension from %s -- please rename\n", argv[optind]); - bgzf_close(fp); - return 1; - } - name = strdup(argv[optind]); - name[pos] = '\0'; - ext = &name[pos+1]; - if (! (known_extension(ext) || confirm_filename(&is_forced, name, ext))) { - fprintf(stderr, "[bgzip] unknown extension .%s -- declining to decompress to %s\n", ext, name); - bgzf_close(fp); - free(name); - return 1; - } - f_dst = open(name, is_forced? wrflags : wrflags|O_EXCL, 0666); - if (f_dst < 0 && errno == EEXIST && confirm_overwrite(name)) - f_dst = open(name, wrflags, 0666); - if (f_dst < 0) { - fprintf(stderr, "[bgzip] can't create %s: %s\n", name, strerror(errno)); - free(name); - return 1; - } - free(name); - } - } - else if (!pstdout && isatty(fileno((FILE *)stdin)) ) - return bgzip_main_usage(stderr, EXIT_FAILURE); - else - { - f_dst = fileno(stdout); - fp = bgzf_open("-", "r"); - if (fp == NULL) { - fprintf(stderr, "[bgzip] Could not read from stdin: %s\n", strerror(errno)); - return 1; - } - if (bgzf_compression(fp) == no_compression) { - fprintf(stderr, "[bgzip] stdin is not compressed -- ignored\n"); - bgzf_close(fp); - return 1; - } - } - - buffer = malloc(WINDOW_SIZE); - if ( start>0 ) - { - if (index_fname) { - if ( bgzf_index_load(fp, index_fname, NULL) < 0 ) - error("Could not load index: %s\n", index_fname); - } else { - if (optind >= argc) { - error("The -b option requires -I when reading from stdin " - "(and stdin must be seekable)\n"); - } - if ( bgzf_index_load(fp, argv[optind], ".gzi") < 0 ) - error("Could not load index: %s.gzi\n", argv[optind]); - } - if ( bgzf_useek(fp, start, SEEK_SET) < 0 ) error("Could not seek to %d-th (uncompressd) byte\n", start); - } - - if (threads > 1) - bgzf_mt(fp, threads, 256); - -#ifdef _WIN32 - _setmode(f_dst, O_BINARY); -#endif - while (1) { - if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE); - else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start)); - if (c == 0) break; - if (c < 0) error("Error %d in block starting at offset %" PRId64 "(%" PRIX64 ")\n", fp->errcode, fp->block_address, fp->block_address); - start += c; - if ( !test && write(f_dst, buffer, c) != c ) { -#ifdef _WIN32 - if (GetLastError() != ERROR_NO_DATA) -#endif - error("Could not write %d bytes\n", c); - } - if (end >= 0 && start >= end) break; - } - free(buffer); - if (bgzf_close(fp) < 0) error("Close failed: Error %d\n",fp->errcode); - if (argc > optind && !pstdout && !test && !keep) unlink(argv[optind]); - return 0; - } -} diff --git a/src/htslib-1.18/config.h.in b/src/htslib-1.18/config.h.in deleted file mode 100644 index 08358aa..0000000 --- a/src/htslib-1.18/config.h.in +++ /dev/null @@ -1,157 +0,0 @@ -/* config.h.in. Generated from configure.ac by autoheader. */ - -/* If you use configure, this file provides #defines reflecting your - configuration choices. If you have not run configure, suitable - conservative defaults will be used. - - Autoheader adds a number of items to this template file that are not - used by HTSlib: STDC_HEADERS and most HAVE_*_H header file defines - are immaterial, as we assume standard ISO C headers and facilities; - the PACKAGE_* defines are unused and are overridden by the more - accurate PACKAGE_VERSION as computed by the Makefile. */ - -/* Define if HTSlib should enable GCS support. */ -#undef ENABLE_GCS - -/* Define if HTSlib should enable plugins. */ -#undef ENABLE_PLUGINS - -/* Define if HTSlib should enable S3 support. */ -#undef ENABLE_S3 - -/* Defined to 1 if rANS source using AVX2 can be compiled. */ -#undef HAVE_AVX2 - -/* Defined to 1 if rANS source using AVX512F can be compiled. */ -#undef HAVE_AVX512 - -/* Define if you have the Common Crypto library. */ -#undef HAVE_COMMONCRYPTO - -/* Define to 1 if you have the `drand48' function. */ -#undef HAVE_DRAND48 - -/* Define if using an external libhtscodecs */ -#undef HAVE_EXTERNAL_LIBHTSCODECS - -/* Define to 1 if you have the `fdatasync' function. */ -#undef HAVE_FDATASYNC - -/* Define to 1 if you have the `fsync' function. */ -#undef HAVE_FSYNC - -/* Define to 1 if you have the `getpagesize' function. */ -#undef HAVE_GETPAGESIZE - -/* Define to 1 if you have the `gmtime_r' function. */ -#undef HAVE_GMTIME_R - -/* Define if you have libcrypto-style HMAC(). */ -#undef HAVE_HMAC - -/* Define to 1 if you have the header file. */ -#undef HAVE_INTTYPES_H - -/* Define to 1 if you have the `bz2' library (-lbz2). */ -#undef HAVE_LIBBZ2 - -/* Define if libcurl file access is enabled. */ -#undef HAVE_LIBCURL - -/* Define if libdeflate is available. */ -#undef HAVE_LIBDEFLATE - -/* Define to 1 if you have the `lzma' library (-llzma). */ -#undef HAVE_LIBLZMA - -/* Define to 1 if you have the `z' library (-lz). */ -#undef HAVE_LIBZ - -/* Define to 1 if you have the header file. */ -#undef HAVE_LZMA_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_MEMORY_H - -/* Define to 1 if you have a working `mmap' system call. */ -#undef HAVE_MMAP - -/* Defined to 1 if rANS source using popcnt can be compiled. */ -#undef HAVE_POPCNT - -/* Define to 1 if you have the `srand48_deterministic' function. */ -#undef HAVE_SRAND48_DETERMINISTIC - -/* Defined to 1 if rANS source using SSE4.1 can be compiled. */ -#undef HAVE_SSE4_1 - -/* Defined to 1 if rANS source using SSSE3 can be compiled. */ -#undef HAVE_SSSE3 - -/* Define to 1 if you have the header file. */ -#undef HAVE_STDINT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STDLIB_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STRINGS_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STRING_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_PARAM_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_STAT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_TYPES_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_UNISTD_H - -/* Define to the address where bug reports for this package should be sent. */ -#undef PACKAGE_BUGREPORT - -/* Define to the full name of this package. */ -#undef PACKAGE_NAME - -/* Define to the full name and version of this package. */ -#undef PACKAGE_STRING - -/* Define to the one symbol short name of this package. */ -#undef PACKAGE_TARNAME - -/* Define to the home page for this package. */ -#undef PACKAGE_URL - -/* Define to the version of this package. */ -#undef PACKAGE_VERSION - -/* Platform-dependent plugin filename extension. */ -#undef PLUGIN_EXT - -/* Define to 1 if you have the ANSI C header files. */ -#undef STDC_HEADERS - - -/* Prevent unaligned access in htscodecs SSE4 rANS codec */ -#if defined(HTS_ALLOW_UNALIGNED) && HTS_ALLOW_UNALIGNED == 0 -#undef UBSAN -#endif - -/* Enable large inode numbers on Mac OS X 10.5. */ -#ifndef _DARWIN_USE_64_BIT_INODE -# define _DARWIN_USE_64_BIT_INODE 1 -#endif - -/* Number of bits in a file offset, on hosts where this is settable. */ -#undef _FILE_OFFSET_BITS - -/* Define for large files, on AIX-style hosts. */ -#undef _LARGE_FILES - -/* Specify X/Open requirements */ -#undef _XOPEN_SOURCE diff --git a/src/htslib-1.18/config.mk.in b/src/htslib-1.18/config.mk.in deleted file mode 100644 index 7341a17..0000000 --- a/src/htslib-1.18/config.mk.in +++ /dev/null @@ -1,121 +0,0 @@ -# Optional configure Makefile overrides for htslib. -# -# Copyright (C) 2015-2017, 2019, 2023 Genome Research Ltd. -# -# Author: John Marshall -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -# This is @configure_input@ -# -# If you use configure, this file overrides variables and augments rules -# in the Makefile to reflect your configuration choices. If you don't run -# configure, the main Makefile contains suitable conservative defaults. - -prefix = @prefix@ -exec_prefix = @exec_prefix@ -bindir = @bindir@ -includedir = @includedir@ -libdir = @libdir@ -libexecdir = @libexecdir@ -datarootdir = @datarootdir@ -mandir = @mandir@ - -CC = @CC@ -RANLIB = @RANLIB@ - -CPPFLAGS = @CPPFLAGS@ -CFLAGS = @CFLAGS@ -LDFLAGS = @LDFLAGS@ -VERSION_SCRIPT_LDFLAGS = @VERSION_SCRIPT_LDFLAGS@ -LIBS = @LIBS@ - -PLATFORM = @PLATFORM@ -PLUGIN_EXT = @PLUGIN_EXT@ - -# The default Makefile enables some of the optional files, but we blank -# them so they can be controlled by configure instead. -NONCONFIGURE_OBJS = - -# Lowercase here indicates these are "local" to config.mk -plugin_OBJS = -noplugin_LDFLAGS = -noplugin_LIBS = - -# ifeq/.../endif, +=, and target-specific variables are GNU Make-specific. -# If you don't have GNU Make, comment out this conditional and note that -# to enable libcurl you will need to implement the following elsewhere. -ifeq "libcurl-@libcurl@" "libcurl-enabled" - -LIBCURL_LIBS = -lcurl - -plugin_OBJS += hfile_libcurl.o - -hfile_libcurl$(PLUGIN_EXT): LIBS += $(LIBCURL_LIBS) - -noplugin_LIBS += $(LIBCURL_LIBS) - -endif - -ifeq "gcs-@gcs@" "gcs-enabled" -plugin_OBJS += hfile_gcs.o -endif - -ifeq "s3-@s3@" "s3-enabled" -plugin_OBJS += hfile_s3.o -plugin_OBJS += hfile_s3_write.o - -CRYPTO_LIBS = @CRYPTO_LIBS@ -noplugin_LIBS += $(CRYPTO_LIBS) -hfile_s3$(PLUGIN_EXT): LIBS += $(CRYPTO_LIBS) -hfile_s3_write$(PLUGIN_EXT): LIBS += $(CRYPTO_LIBS) $(LIBCURL_LIBS) -endif - -ifeq "plugins-@enable_plugins@" "plugins-yes" - -plugindir = @plugindir@ -pluginpath = @pluginpath@ - -LIBHTS_OBJS += plugin.o -PLUGIN_OBJS += $(plugin_OBJS) - -plugin.o plugin.pico: ALL_CPPFLAGS += -DPLUGINPATH=\"$(pluginpath)\" - -# When built as separate plugins, these record their version themselves. -hfile_gcs.o hfile_gcs.pico: version.h -hfile_libcurl.o hfile_libcurl.pico: version.h -hfile_s3.o hfile_s3.pico: version.h -hfile_s3_write.o hfile_s3_write.pico: version.h - -# Windows DLL plugins depend on the import library, built as a byproduct. -$(plugin_OBJS:.o=.cygdll): cyghts-$(LIBHTS_SOVERSION).dll - -else - -LIBHTS_OBJS += $(plugin_OBJS) -LDFLAGS += $(noplugin_LDFLAGS) -LIBS += $(noplugin_LIBS) - -endif - -# Extra CFLAGS for specific files -HTS_CFLAGS_AVX2 = @hts_cflags_avx2@ -HTS_CFLAGS_AVX512 = @hts_cflags_avx512@ -HTS_CFLAGS_SSE4 = @hts_cflags_sse4@ -HTS_HAVE_NEON = @hts_have_neon@ diff --git a/src/htslib-1.18/configure b/src/htslib-1.18/configure deleted file mode 100755 index ac63dee..0000000 --- a/src/htslib-1.18/configure +++ /dev/null @@ -1,7474 +0,0 @@ -#! /bin/sh -# Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for HTSlib 1.18. -# -# Report bugs to . -# -# -# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. -# -# -# This configure script is free software; the Free Software Foundation -# gives unlimited permission to copy, distribute and modify it. -# -# Portions copyright (C) 2020-2023 Genome Research Ltd. -# -# This configure script is free software: you are free to change and -# redistribute it. There is NO WARRANTY, to the extent permitted by law. -## -------------------- ## -## M4sh Initialization. ## -## -------------------- ## - -# Be more Bourne compatible -DUALCASE=1; export DUALCASE # for MKS sh -if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : - emulate sh - NULLCMD=: - # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which - # is contrary to our usage. Disable this feature. - alias -g '${1+"$@"}'='"$@"' - setopt NO_GLOB_SUBST -else - case `(set -o) 2>/dev/null` in #( - *posix*) : - set -o posix ;; #( - *) : - ;; -esac -fi - - -as_nl=' -' -export as_nl -# Printing a long string crashes Solaris 7 /usr/bin/printf. -as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' -as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo -as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo -# Prefer a ksh shell builtin over an external printf program on Solaris, -# but without wasting forks for bash or zsh. -if test -z "$BASH_VERSION$ZSH_VERSION" \ - && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then - as_echo='print -r --' - as_echo_n='print -rn --' -elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then - as_echo='printf %s\n' - as_echo_n='printf %s' -else - if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then - as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' - as_echo_n='/usr/ucb/echo -n' - else - as_echo_body='eval expr "X$1" : "X\\(.*\\)"' - as_echo_n_body='eval - arg=$1; - case $arg in #( - *"$as_nl"*) - expr "X$arg" : "X\\(.*\\)$as_nl"; - arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; - esac; - expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" - ' - export as_echo_n_body - as_echo_n='sh -c $as_echo_n_body as_echo' - fi - export as_echo_body - as_echo='sh -c $as_echo_body as_echo' -fi - -# The user is always right. -if test "${PATH_SEPARATOR+set}" != set; then - PATH_SEPARATOR=: - (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { - (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || - PATH_SEPARATOR=';' - } -fi - - -# IFS -# We need space, tab and new line, in precisely that order. Quoting is -# there to prevent editors from complaining about space-tab. -# (If _AS_PATH_WALK were called with IFS unset, it would disable word -# splitting by setting IFS to empty value.) -IFS=" "" $as_nl" - -# Find who we are. Look in the path if we contain no directory separator. -as_myself= -case $0 in #(( - *[\\/]* ) as_myself=$0 ;; - *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break - done -IFS=$as_save_IFS - - ;; -esac -# We did not find ourselves, most probably we were run as `sh COMMAND' -# in which case we are not to be found in the path. -if test "x$as_myself" = x; then - as_myself=$0 -fi -if test ! -f "$as_myself"; then - $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 - exit 1 -fi - -# Unset variables that we do not need and which cause bugs (e.g. in -# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" -# suppresses any "Segmentation fault" message there. '((' could -# trigger a bug in pdksh 5.2.14. -for as_var in BASH_ENV ENV MAIL MAILPATH -do eval test x\${$as_var+set} = xset \ - && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : -done -PS1='$ ' -PS2='> ' -PS4='+ ' - -# NLS nuisances. -LC_ALL=C -export LC_ALL -LANGUAGE=C -export LANGUAGE - -# CDPATH. -(unset CDPATH) >/dev/null 2>&1 && unset CDPATH - -# Use a proper internal environment variable to ensure we don't fall - # into an infinite loop, continuously re-executing ourselves. - if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then - _as_can_reexec=no; export _as_can_reexec; - # We cannot yet assume a decent shell, so we have to provide a -# neutralization value for shells without unset; and this also -# works around shells that cannot unset nonexistent variables. -# Preserve -v and -x to the replacement shell. -BASH_ENV=/dev/null -ENV=/dev/null -(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV -case $- in # (((( - *v*x* | *x*v* ) as_opts=-vx ;; - *v* ) as_opts=-v ;; - *x* ) as_opts=-x ;; - * ) as_opts= ;; -esac -exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} -# Admittedly, this is quite paranoid, since all the known shells bail -# out after a failed `exec'. -$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 -as_fn_exit 255 - fi - # We don't want this to propagate to other subprocesses. - { _as_can_reexec=; unset _as_can_reexec;} -if test "x$CONFIG_SHELL" = x; then - as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : - emulate sh - NULLCMD=: - # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which - # is contrary to our usage. Disable this feature. - alias -g '\${1+\"\$@\"}'='\"\$@\"' - setopt NO_GLOB_SUBST -else - case \`(set -o) 2>/dev/null\` in #( - *posix*) : - set -o posix ;; #( - *) : - ;; -esac -fi -" - as_required="as_fn_return () { (exit \$1); } -as_fn_success () { as_fn_return 0; } -as_fn_failure () { as_fn_return 1; } -as_fn_ret_success () { return 0; } -as_fn_ret_failure () { return 1; } - -exitcode=0 -as_fn_success || { exitcode=1; echo as_fn_success failed.; } -as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } -as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } -as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } -if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : - -else - exitcode=1; echo positional parameters were not saved. -fi -test x\$exitcode = x0 || exit 1 -test -x / || exit 1" - as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO - as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO - eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && - test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 -test \$(( 1 + 1 )) = 2 || exit 1" - if (eval "$as_required") 2>/dev/null; then : - as_have_required=yes -else - as_have_required=no -fi - if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : - -else - as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -as_found=false -for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - as_found=: - case $as_dir in #( - /*) - for as_base in sh bash ksh sh5; do - # Try only shells that exist, to save several forks. - as_shell=$as_dir/$as_base - if { test -f "$as_shell" || test -f "$as_shell.exe"; } && - { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : - CONFIG_SHELL=$as_shell as_have_required=yes - if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : - break 2 -fi -fi - done;; - esac - as_found=false -done -$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && - { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : - CONFIG_SHELL=$SHELL as_have_required=yes -fi; } -IFS=$as_save_IFS - - - if test "x$CONFIG_SHELL" != x; then : - export CONFIG_SHELL - # We cannot yet assume a decent shell, so we have to provide a -# neutralization value for shells without unset; and this also -# works around shells that cannot unset nonexistent variables. -# Preserve -v and -x to the replacement shell. -BASH_ENV=/dev/null -ENV=/dev/null -(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV -case $- in # (((( - *v*x* | *x*v* ) as_opts=-vx ;; - *v* ) as_opts=-v ;; - *x* ) as_opts=-x ;; - * ) as_opts= ;; -esac -exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} -# Admittedly, this is quite paranoid, since all the known shells bail -# out after a failed `exec'. -$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 -exit 255 -fi - - if test x$as_have_required = xno; then : - $as_echo "$0: This script requires a shell more modern than all" - $as_echo "$0: the shells that I found on your system." - if test x${ZSH_VERSION+set} = xset ; then - $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" - $as_echo "$0: be upgraded to zsh 4.3.4 or later." - else - $as_echo "$0: Please tell bug-autoconf@gnu.org and -$0: samtools-help@lists.sourceforge.net about your system, -$0: including any error possibly output before this -$0: message. Then install a modern shell, or manually run -$0: the script under such a shell if you do have one." - fi - exit 1 -fi -fi -fi -SHELL=${CONFIG_SHELL-/bin/sh} -export SHELL -# Unset more variables known to interfere with behavior of common tools. -CLICOLOR_FORCE= GREP_OPTIONS= -unset CLICOLOR_FORCE GREP_OPTIONS - -## --------------------- ## -## M4sh Shell Functions. ## -## --------------------- ## -# as_fn_unset VAR -# --------------- -# Portably unset VAR. -as_fn_unset () -{ - { eval $1=; unset $1;} -} -as_unset=as_fn_unset - -# as_fn_set_status STATUS -# ----------------------- -# Set $? to STATUS, without forking. -as_fn_set_status () -{ - return $1 -} # as_fn_set_status - -# as_fn_exit STATUS -# ----------------- -# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. -as_fn_exit () -{ - set +e - as_fn_set_status $1 - exit $1 -} # as_fn_exit - -# as_fn_mkdir_p -# ------------- -# Create "$as_dir" as a directory, including parents if necessary. -as_fn_mkdir_p () -{ - - case $as_dir in #( - -*) as_dir=./$as_dir;; - esac - test -d "$as_dir" || eval $as_mkdir_p || { - as_dirs= - while :; do - case $as_dir in #( - *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( - *) as_qdir=$as_dir;; - esac - as_dirs="'$as_qdir' $as_dirs" - as_dir=`$as_dirname -- "$as_dir" || -$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ - X"$as_dir" : 'X\(//\)[^/]' \| \ - X"$as_dir" : 'X\(//\)$' \| \ - X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$as_dir" | - sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ - s//\1/ - q - } - /^X\(\/\/\)[^/].*/{ - s//\1/ - q - } - /^X\(\/\/\)$/{ - s//\1/ - q - } - /^X\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - test -d "$as_dir" && break - done - test -z "$as_dirs" || eval "mkdir $as_dirs" - } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" - - -} # as_fn_mkdir_p - -# as_fn_executable_p FILE -# ----------------------- -# Test if FILE is an executable regular file. -as_fn_executable_p () -{ - test -f "$1" && test -x "$1" -} # as_fn_executable_p -# as_fn_append VAR VALUE -# ---------------------- -# Append the text in VALUE to the end of the definition contained in VAR. Take -# advantage of any shell optimizations that allow amortized linear growth over -# repeated appends, instead of the typical quadratic growth present in naive -# implementations. -if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : - eval 'as_fn_append () - { - eval $1+=\$2 - }' -else - as_fn_append () - { - eval $1=\$$1\$2 - } -fi # as_fn_append - -# as_fn_arith ARG... -# ------------------ -# Perform arithmetic evaluation on the ARGs, and store the result in the -# global $as_val. Take advantage of shells that can avoid forks. The arguments -# must be portable across $(()) and expr. -if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : - eval 'as_fn_arith () - { - as_val=$(( $* )) - }' -else - as_fn_arith () - { - as_val=`expr "$@" || test $? -eq 1` - } -fi # as_fn_arith - - -# as_fn_error STATUS ERROR [LINENO LOG_FD] -# ---------------------------------------- -# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are -# provided, also output the error to LOG_FD, referencing LINENO. Then exit the -# script with STATUS, using 1 if that was 0. -as_fn_error () -{ - as_status=$1; test $as_status -eq 0 && as_status=1 - if test "$4"; then - as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 - fi - $as_echo "$as_me: error: $2" >&2 - as_fn_exit $as_status -} # as_fn_error - -if expr a : '\(a\)' >/dev/null 2>&1 && - test "X`expr 00001 : '.*\(...\)'`" = X001; then - as_expr=expr -else - as_expr=false -fi - -if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then - as_basename=basename -else - as_basename=false -fi - -if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then - as_dirname=dirname -else - as_dirname=false -fi - -as_me=`$as_basename -- "$0" || -$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ - X"$0" : 'X\(//\)$' \| \ - X"$0" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X/"$0" | - sed '/^.*\/\([^/][^/]*\)\/*$/{ - s//\1/ - q - } - /^X\/\(\/\/\)$/{ - s//\1/ - q - } - /^X\/\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - -# Avoid depending upon Character Ranges. -as_cr_letters='abcdefghijklmnopqrstuvwxyz' -as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' -as_cr_Letters=$as_cr_letters$as_cr_LETTERS -as_cr_digits='0123456789' -as_cr_alnum=$as_cr_Letters$as_cr_digits - - - as_lineno_1=$LINENO as_lineno_1a=$LINENO - as_lineno_2=$LINENO as_lineno_2a=$LINENO - eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && - test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { - # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) - sed -n ' - p - /[$]LINENO/= - ' <$as_myself | - sed ' - s/[$]LINENO.*/&-/ - t lineno - b - :lineno - N - :loop - s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ - t loop - s/-\n.*// - ' >$as_me.lineno && - chmod +x "$as_me.lineno" || - { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } - - # If we had to re-execute with $CONFIG_SHELL, we're ensured to have - # already done that, so ensure we don't try to do so again and fall - # in an infinite loop. This has already happened in practice. - _as_can_reexec=no; export _as_can_reexec - # Don't try to exec as it changes $[0], causing all sort of problems - # (the dirname of $[0] is not the place where we might find the - # original and so on. Autoconf is especially sensitive to this). - . "./$as_me.lineno" - # Exit status is that of the last command. - exit -} - -ECHO_C= ECHO_N= ECHO_T= -case `echo -n x` in #((((( --n*) - case `echo 'xy\c'` in - *c*) ECHO_T=' ';; # ECHO_T is single tab character. - xy) ECHO_C='\c';; - *) echo `echo ksh88 bug on AIX 6.1` > /dev/null - ECHO_T=' ';; - esac;; -*) - ECHO_N='-n';; -esac - -rm -f conf$$ conf$$.exe conf$$.file -if test -d conf$$.dir; then - rm -f conf$$.dir/conf$$.file -else - rm -f conf$$.dir - mkdir conf$$.dir 2>/dev/null -fi -if (echo >conf$$.file) 2>/dev/null; then - if ln -s conf$$.file conf$$ 2>/dev/null; then - as_ln_s='ln -s' - # ... but there are two gotchas: - # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. - # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. - # In both cases, we have to default to `cp -pR'. - ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || - as_ln_s='cp -pR' - elif ln conf$$.file conf$$ 2>/dev/null; then - as_ln_s=ln - else - as_ln_s='cp -pR' - fi -else - as_ln_s='cp -pR' -fi -rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file -rmdir conf$$.dir 2>/dev/null - -if mkdir -p . 2>/dev/null; then - as_mkdir_p='mkdir -p "$as_dir"' -else - test -d ./-p && rmdir ./-p - as_mkdir_p=false -fi - -as_test_x='test -x' -as_executable_p=as_fn_executable_p - -# Sed expression to map a string onto a valid CPP name. -as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" - -# Sed expression to map a string onto a valid variable name. -as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" - - -test -n "$DJDIR" || exec 7<&0 &1 - -# Name of the host. -# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, -# so uname gets run too. -ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` - -# -# Initializations. -# -ac_default_prefix=/usr/local -ac_clean_files= -ac_config_libobj_dir=. -LIBOBJS= -cross_compiling=no -subdirs= -MFLAGS= -MAKEFLAGS= - -# Identity of this package. -PACKAGE_NAME='HTSlib' -PACKAGE_TARNAME='htslib' -PACKAGE_VERSION='1.18' -PACKAGE_STRING='HTSlib 1.18' -PACKAGE_BUGREPORT='samtools-help@lists.sourceforge.net' -PACKAGE_URL='http://www.htslib.org/' - -ac_unique_file="hts.c" -# Factoring default headers for most tests. -ac_includes_default="\ -#include -#ifdef HAVE_SYS_TYPES_H -# include -#endif -#ifdef HAVE_SYS_STAT_H -# include -#endif -#ifdef STDC_HEADERS -# include -# include -#else -# ifdef HAVE_STDLIB_H -# include -# endif -#endif -#ifdef HAVE_STRING_H -# if !defined STDC_HEADERS && defined HAVE_MEMORY_H -# include -# endif -# include -#endif -#ifdef HAVE_STRINGS_H -# include -#endif -#ifdef HAVE_INTTYPES_H -# include -#endif -#ifdef HAVE_STDINT_H -# include -#endif -#ifdef HAVE_UNISTD_H -# include -#endif" - -ac_header_list= -ac_subst_vars='LTLIBOBJS -LIBOBJS -HTSDIRslash_if_relsrcdir -static_LIBS -static_LDFLAGS -private_LIBS -pc_requires -CRYPTO_LIBS -s3 -gcs -libcurl -PLUGIN_EXT -VERSION_SCRIPT_LDFLAGS -PLATFORM -pluginpath -plugindir -with_external_htscodecs -enable_plugins -PKG_CONFIG_LIBDIR -PKG_CONFIG_PATH -PKG_CONFIG -hts_cflags_avx512 -hts_cflags_avx2 -hts_cflags_sse4 -EGREP -CPP -GREP -RANLIB -OBJEXT -EXEEXT -ac_ct_CC -CPPFLAGS -LDFLAGS -CFLAGS -CC -target_alias -host_alias -build_alias -LIBS -ECHO_T -ECHO_N -ECHO_C -DEFS -mandir -localedir -libdir -psdir -pdfdir -dvidir -htmldir -infodir -docdir -oldincludedir -includedir -runstatedir -localstatedir -sharedstatedir -sysconfdir -datadir -datarootdir -libexecdir -sbindir -bindir -program_transform_name -prefix -exec_prefix -PACKAGE_URL -PACKAGE_BUGREPORT -PACKAGE_STRING -PACKAGE_VERSION -PACKAGE_TARNAME -PACKAGE_NAME -PATH_SEPARATOR -SHELL' -ac_subst_files='' -ac_user_opts=' -enable_option_checking -enable_warnings -enable_werror -enable_versioned_symbols -enable_bz2 -enable_gcs -enable_largefile -enable_libcurl -enable_lzma -enable_plugins -with_external_htscodecs -with_libdeflate -with_plugin_dir -with_plugin_path -enable_s3 -' - ac_precious_vars='build_alias -host_alias -target_alias -CC -CFLAGS -LDFLAGS -LIBS -CPPFLAGS -CPP -PKG_CONFIG -PKG_CONFIG_PATH -PKG_CONFIG_LIBDIR' - - -# Initialize some variables set by options. -ac_init_help= -ac_init_version=false -ac_unrecognized_opts= -ac_unrecognized_sep= -# The variables have the same names as the options, with -# dashes changed to underlines. -cache_file=/dev/null -exec_prefix=NONE -no_create= -no_recursion= -prefix=NONE -program_prefix=NONE -program_suffix=NONE -program_transform_name=s,x,x, -silent= -site= -srcdir= -verbose= -x_includes=NONE -x_libraries=NONE - -# Installation directory options. -# These are left unexpanded so users can "make install exec_prefix=/foo" -# and all the variables that are supposed to be based on exec_prefix -# by default will actually change. -# Use braces instead of parens because sh, perl, etc. also accept them. -# (The list follows the same order as the GNU Coding Standards.) -bindir='${exec_prefix}/bin' -sbindir='${exec_prefix}/sbin' -libexecdir='${exec_prefix}/libexec' -datarootdir='${prefix}/share' -datadir='${datarootdir}' -sysconfdir='${prefix}/etc' -sharedstatedir='${prefix}/com' -localstatedir='${prefix}/var' -runstatedir='${localstatedir}/run' -includedir='${prefix}/include' -oldincludedir='/usr/include' -docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' -infodir='${datarootdir}/info' -htmldir='${docdir}' -dvidir='${docdir}' -pdfdir='${docdir}' -psdir='${docdir}' -libdir='${exec_prefix}/lib' -localedir='${datarootdir}/locale' -mandir='${datarootdir}/man' - -ac_prev= -ac_dashdash= -for ac_option -do - # If the previous option needs an argument, assign it. - if test -n "$ac_prev"; then - eval $ac_prev=\$ac_option - ac_prev= - continue - fi - - case $ac_option in - *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; - *=) ac_optarg= ;; - *) ac_optarg=yes ;; - esac - - # Accept the important Cygnus configure options, so we can diagnose typos. - - case $ac_dashdash$ac_option in - --) - ac_dashdash=yes ;; - - -bindir | --bindir | --bindi | --bind | --bin | --bi) - ac_prev=bindir ;; - -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) - bindir=$ac_optarg ;; - - -build | --build | --buil | --bui | --bu) - ac_prev=build_alias ;; - -build=* | --build=* | --buil=* | --bui=* | --bu=*) - build_alias=$ac_optarg ;; - - -cache-file | --cache-file | --cache-fil | --cache-fi \ - | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) - ac_prev=cache_file ;; - -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ - | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) - cache_file=$ac_optarg ;; - - --config-cache | -C) - cache_file=config.cache ;; - - -datadir | --datadir | --datadi | --datad) - ac_prev=datadir ;; - -datadir=* | --datadir=* | --datadi=* | --datad=*) - datadir=$ac_optarg ;; - - -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ - | --dataroo | --dataro | --datar) - ac_prev=datarootdir ;; - -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ - | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) - datarootdir=$ac_optarg ;; - - -disable-* | --disable-*) - ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` - # Reject names that are not valid shell variable names. - expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid feature name: $ac_useropt" - ac_useropt_orig=$ac_useropt - ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` - case $ac_user_opts in - *" -"enable_$ac_useropt" -"*) ;; - *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" - ac_unrecognized_sep=', ';; - esac - eval enable_$ac_useropt=no ;; - - -docdir | --docdir | --docdi | --doc | --do) - ac_prev=docdir ;; - -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) - docdir=$ac_optarg ;; - - -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) - ac_prev=dvidir ;; - -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) - dvidir=$ac_optarg ;; - - -enable-* | --enable-*) - ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` - # Reject names that are not valid shell variable names. - expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid feature name: $ac_useropt" - ac_useropt_orig=$ac_useropt - ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` - case $ac_user_opts in - *" -"enable_$ac_useropt" -"*) ;; - *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" - ac_unrecognized_sep=', ';; - esac - eval enable_$ac_useropt=\$ac_optarg ;; - - -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ - | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ - | --exec | --exe | --ex) - ac_prev=exec_prefix ;; - -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ - | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ - | --exec=* | --exe=* | --ex=*) - exec_prefix=$ac_optarg ;; - - -gas | --gas | --ga | --g) - # Obsolete; use --with-gas. - with_gas=yes ;; - - -help | --help | --hel | --he | -h) - ac_init_help=long ;; - -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) - ac_init_help=recursive ;; - -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) - ac_init_help=short ;; - - -host | --host | --hos | --ho) - ac_prev=host_alias ;; - -host=* | --host=* | --hos=* | --ho=*) - host_alias=$ac_optarg ;; - - -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) - ac_prev=htmldir ;; - -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ - | --ht=*) - htmldir=$ac_optarg ;; - - -includedir | --includedir | --includedi | --included | --include \ - | --includ | --inclu | --incl | --inc) - ac_prev=includedir ;; - -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ - | --includ=* | --inclu=* | --incl=* | --inc=*) - includedir=$ac_optarg ;; - - -infodir | --infodir | --infodi | --infod | --info | --inf) - ac_prev=infodir ;; - -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) - infodir=$ac_optarg ;; - - -libdir | --libdir | --libdi | --libd) - ac_prev=libdir ;; - -libdir=* | --libdir=* | --libdi=* | --libd=*) - libdir=$ac_optarg ;; - - -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ - | --libexe | --libex | --libe) - ac_prev=libexecdir ;; - -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ - | --libexe=* | --libex=* | --libe=*) - libexecdir=$ac_optarg ;; - - -localedir | --localedir | --localedi | --localed | --locale) - ac_prev=localedir ;; - -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) - localedir=$ac_optarg ;; - - -localstatedir | --localstatedir | --localstatedi | --localstated \ - | --localstate | --localstat | --localsta | --localst | --locals) - ac_prev=localstatedir ;; - -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ - | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) - localstatedir=$ac_optarg ;; - - -mandir | --mandir | --mandi | --mand | --man | --ma | --m) - ac_prev=mandir ;; - -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) - mandir=$ac_optarg ;; - - -nfp | --nfp | --nf) - # Obsolete; use --without-fp. - with_fp=no ;; - - -no-create | --no-create | --no-creat | --no-crea | --no-cre \ - | --no-cr | --no-c | -n) - no_create=yes ;; - - -no-recursion | --no-recursion | --no-recursio | --no-recursi \ - | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) - no_recursion=yes ;; - - -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ - | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ - | --oldin | --oldi | --old | --ol | --o) - ac_prev=oldincludedir ;; - -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ - | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ - | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) - oldincludedir=$ac_optarg ;; - - -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) - ac_prev=prefix ;; - -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) - prefix=$ac_optarg ;; - - -program-prefix | --program-prefix | --program-prefi | --program-pref \ - | --program-pre | --program-pr | --program-p) - ac_prev=program_prefix ;; - -program-prefix=* | --program-prefix=* | --program-prefi=* \ - | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) - program_prefix=$ac_optarg ;; - - -program-suffix | --program-suffix | --program-suffi | --program-suff \ - | --program-suf | --program-su | --program-s) - ac_prev=program_suffix ;; - -program-suffix=* | --program-suffix=* | --program-suffi=* \ - | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) - program_suffix=$ac_optarg ;; - - -program-transform-name | --program-transform-name \ - | --program-transform-nam | --program-transform-na \ - | --program-transform-n | --program-transform- \ - | --program-transform | --program-transfor \ - | --program-transfo | --program-transf \ - | --program-trans | --program-tran \ - | --progr-tra | --program-tr | --program-t) - ac_prev=program_transform_name ;; - -program-transform-name=* | --program-transform-name=* \ - | --program-transform-nam=* | --program-transform-na=* \ - | --program-transform-n=* | --program-transform-=* \ - | --program-transform=* | --program-transfor=* \ - | --program-transfo=* | --program-transf=* \ - | --program-trans=* | --program-tran=* \ - | --progr-tra=* | --program-tr=* | --program-t=*) - program_transform_name=$ac_optarg ;; - - -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) - ac_prev=pdfdir ;; - -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) - pdfdir=$ac_optarg ;; - - -psdir | --psdir | --psdi | --psd | --ps) - ac_prev=psdir ;; - -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) - psdir=$ac_optarg ;; - - -q | -quiet | --quiet | --quie | --qui | --qu | --q \ - | -silent | --silent | --silen | --sile | --sil) - silent=yes ;; - - -runstatedir | --runstatedir | --runstatedi | --runstated \ - | --runstate | --runstat | --runsta | --runst | --runs \ - | --run | --ru | --r) - ac_prev=runstatedir ;; - -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \ - | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \ - | --run=* | --ru=* | --r=*) - runstatedir=$ac_optarg ;; - - -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) - ac_prev=sbindir ;; - -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ - | --sbi=* | --sb=*) - sbindir=$ac_optarg ;; - - -sharedstatedir | --sharedstatedir | --sharedstatedi \ - | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ - | --sharedst | --shareds | --shared | --share | --shar \ - | --sha | --sh) - ac_prev=sharedstatedir ;; - -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ - | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ - | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ - | --sha=* | --sh=*) - sharedstatedir=$ac_optarg ;; - - -site | --site | --sit) - ac_prev=site ;; - -site=* | --site=* | --sit=*) - site=$ac_optarg ;; - - -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) - ac_prev=srcdir ;; - -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) - srcdir=$ac_optarg ;; - - -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ - | --syscon | --sysco | --sysc | --sys | --sy) - ac_prev=sysconfdir ;; - -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ - | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) - sysconfdir=$ac_optarg ;; - - -target | --target | --targe | --targ | --tar | --ta | --t) - ac_prev=target_alias ;; - -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) - target_alias=$ac_optarg ;; - - -v | -verbose | --verbose | --verbos | --verbo | --verb) - verbose=yes ;; - - -version | --version | --versio | --versi | --vers | -V) - ac_init_version=: ;; - - -with-* | --with-*) - ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` - # Reject names that are not valid shell variable names. - expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid package name: $ac_useropt" - ac_useropt_orig=$ac_useropt - ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` - case $ac_user_opts in - *" -"with_$ac_useropt" -"*) ;; - *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" - ac_unrecognized_sep=', ';; - esac - eval with_$ac_useropt=\$ac_optarg ;; - - -without-* | --without-*) - ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` - # Reject names that are not valid shell variable names. - expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid package name: $ac_useropt" - ac_useropt_orig=$ac_useropt - ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` - case $ac_user_opts in - *" -"with_$ac_useropt" -"*) ;; - *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" - ac_unrecognized_sep=', ';; - esac - eval with_$ac_useropt=no ;; - - --x) - # Obsolete; use --with-x. - with_x=yes ;; - - -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ - | --x-incl | --x-inc | --x-in | --x-i) - ac_prev=x_includes ;; - -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ - | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) - x_includes=$ac_optarg ;; - - -x-libraries | --x-libraries | --x-librarie | --x-librari \ - | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) - ac_prev=x_libraries ;; - -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ - | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) - x_libraries=$ac_optarg ;; - - -*) as_fn_error $? "unrecognized option: \`$ac_option' -Try \`$0 --help' for more information" - ;; - - *=*) - ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` - # Reject names that are not valid shell variable names. - case $ac_envvar in #( - '' | [0-9]* | *[!_$as_cr_alnum]* ) - as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; - esac - eval $ac_envvar=\$ac_optarg - export $ac_envvar ;; - - *) - # FIXME: should be removed in autoconf 3.0. - $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 - expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && - $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 - : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" - ;; - - esac -done - -if test -n "$ac_prev"; then - ac_option=--`echo $ac_prev | sed 's/_/-/g'` - as_fn_error $? "missing argument to $ac_option" -fi - -if test -n "$ac_unrecognized_opts"; then - case $enable_option_checking in - no) ;; - fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; - *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; - esac -fi - -# Check all directory arguments for consistency. -for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ - datadir sysconfdir sharedstatedir localstatedir includedir \ - oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ - libdir localedir mandir runstatedir -do - eval ac_val=\$$ac_var - # Remove trailing slashes. - case $ac_val in - */ ) - ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` - eval $ac_var=\$ac_val;; - esac - # Be sure to have absolute directory names. - case $ac_val in - [\\/$]* | ?:[\\/]* ) continue;; - NONE | '' ) case $ac_var in *prefix ) continue;; esac;; - esac - as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" -done - -# There might be people who depend on the old broken behavior: `$host' -# used to hold the argument of --host etc. -# FIXME: To remove some day. -build=$build_alias -host=$host_alias -target=$target_alias - -# FIXME: To remove some day. -if test "x$host_alias" != x; then - if test "x$build_alias" = x; then - cross_compiling=maybe - elif test "x$build_alias" != "x$host_alias"; then - cross_compiling=yes - fi -fi - -ac_tool_prefix= -test -n "$host_alias" && ac_tool_prefix=$host_alias- - -test "$silent" = yes && exec 6>/dev/null - - -ac_pwd=`pwd` && test -n "$ac_pwd" && -ac_ls_di=`ls -di .` && -ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || - as_fn_error $? "working directory cannot be determined" -test "X$ac_ls_di" = "X$ac_pwd_ls_di" || - as_fn_error $? "pwd does not report name of working directory" - - -# Find the source files, if location was not specified. -if test -z "$srcdir"; then - ac_srcdir_defaulted=yes - # Try the directory containing this script, then the parent directory. - ac_confdir=`$as_dirname -- "$as_myself" || -$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ - X"$as_myself" : 'X\(//\)[^/]' \| \ - X"$as_myself" : 'X\(//\)$' \| \ - X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$as_myself" | - sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ - s//\1/ - q - } - /^X\(\/\/\)[^/].*/{ - s//\1/ - q - } - /^X\(\/\/\)$/{ - s//\1/ - q - } - /^X\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - srcdir=$ac_confdir - if test ! -r "$srcdir/$ac_unique_file"; then - srcdir=.. - fi -else - ac_srcdir_defaulted=no -fi -if test ! -r "$srcdir/$ac_unique_file"; then - test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." - as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" -fi -ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" -ac_abs_confdir=`( - cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" - pwd)` -# When building in place, set srcdir=. -if test "$ac_abs_confdir" = "$ac_pwd"; then - srcdir=. -fi -# Remove unnecessary trailing slashes from srcdir. -# Double slashes in file names in object file debugging info -# mess up M-x gdb in Emacs. -case $srcdir in -*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; -esac -for ac_var in $ac_precious_vars; do - eval ac_env_${ac_var}_set=\${${ac_var}+set} - eval ac_env_${ac_var}_value=\$${ac_var} - eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} - eval ac_cv_env_${ac_var}_value=\$${ac_var} -done - -# -# Report the --help message. -# -if test "$ac_init_help" = "long"; then - # Omit some internal or obsolete options to make the list less imposing. - # This message is too long to be a string in the A/UX 3.1 sh. - cat <<_ACEOF -\`configure' configures HTSlib 1.18 to adapt to many kinds of systems. - -Usage: $0 [OPTION]... [VAR=VALUE]... - -To assign environment variables (e.g., CC, CFLAGS...), specify them as -VAR=VALUE. See below for descriptions of some of the useful variables. - -Defaults for the options are specified in brackets. - -Configuration: - -h, --help display this help and exit - --help=short display options specific to this package - --help=recursive display the short help of all the included packages - -V, --version display version information and exit - -q, --quiet, --silent do not print \`checking ...' messages - --cache-file=FILE cache test results in FILE [disabled] - -C, --config-cache alias for \`--cache-file=config.cache' - -n, --no-create do not create output files - --srcdir=DIR find the sources in DIR [configure dir or \`..'] - -Installation directories: - --prefix=PREFIX install architecture-independent files in PREFIX - [$ac_default_prefix] - --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX - [PREFIX] - -By default, \`make install' will install all the files in -\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify -an installation prefix other than \`$ac_default_prefix' using \`--prefix', -for instance \`--prefix=\$HOME'. - -For better control, use the options below. - -Fine tuning of the installation directories: - --bindir=DIR user executables [EPREFIX/bin] - --sbindir=DIR system admin executables [EPREFIX/sbin] - --libexecdir=DIR program executables [EPREFIX/libexec] - --sysconfdir=DIR read-only single-machine data [PREFIX/etc] - --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] - --localstatedir=DIR modifiable single-machine data [PREFIX/var] - --runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run] - --libdir=DIR object code libraries [EPREFIX/lib] - --includedir=DIR C header files [PREFIX/include] - --oldincludedir=DIR C header files for non-gcc [/usr/include] - --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] - --datadir=DIR read-only architecture-independent data [DATAROOTDIR] - --infodir=DIR info documentation [DATAROOTDIR/info] - --localedir=DIR locale-dependent data [DATAROOTDIR/locale] - --mandir=DIR man documentation [DATAROOTDIR/man] - --docdir=DIR documentation root [DATAROOTDIR/doc/htslib] - --htmldir=DIR html documentation [DOCDIR] - --dvidir=DIR dvi documentation [DOCDIR] - --pdfdir=DIR pdf documentation [DOCDIR] - --psdir=DIR ps documentation [DOCDIR] -_ACEOF - - cat <<\_ACEOF -_ACEOF -fi - -if test -n "$ac_init_help"; then - case $ac_init_help in - short | recursive ) echo "Configuration of HTSlib 1.18:";; - esac - cat <<\_ACEOF - -Optional Features: - --disable-option-checking ignore unrecognized --enable/--with options - --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) - --enable-FEATURE[=ARG] include FEATURE [ARG=yes] - --disable-warnings turn off compiler warnings - --enable-werror change warnings into errors, where supported - --disable-versioned-symbols - disable versioned symbols in shared library - --disable-bz2 omit support for BZ2-compressed CRAM files - --enable-gcs support Google Cloud Storage URLs - --disable-largefile omit support for large files - --enable-libcurl enable libcurl-based support for http/https/etc URLs - --disable-lzma omit support for LZMA-compressed CRAM files - --enable-plugins enable separately-compiled plugins for file access - --enable-s3 support Amazon AWS S3 URLs - -Optional Packages: - --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] - --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) - --with-external-htscodecs - get htscodecs functions from a shared library - --with-libdeflate use libdeflate for faster crc and deflate algorithms - --with-plugin-dir=DIR plugin installation location [LIBEXECDIR/htslib] - --with-plugin-path=PATH default HTS_PATH plugin search path [PLUGINDIR] - -Some influential environment variables: - CC C compiler command - CFLAGS C compiler flags - LDFLAGS linker flags, e.g. -L if you have libraries in a - nonstandard directory - LIBS libraries to pass to the linker, e.g. -l - CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if - you have headers in a nonstandard directory - CPP C preprocessor - PKG_CONFIG path to pkg-config utility - PKG_CONFIG_PATH - directories to add to pkg-config's search path - PKG_CONFIG_LIBDIR - path overriding pkg-config's built-in search path - -Use these variables to override the choices made by `configure' or to help -it to find libraries and programs with nonstandard names/locations. - -Report bugs to . -HTSlib home page: . -_ACEOF -ac_status=$? -fi - -if test "$ac_init_help" = "recursive"; then - # If there are subdirs, report their specific --help. - for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue - test -d "$ac_dir" || - { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || - continue - ac_builddir=. - -case "$ac_dir" in -.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; -*) - ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` - # A ".." for each directory in $ac_dir_suffix. - ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` - case $ac_top_builddir_sub in - "") ac_top_builddir_sub=. ac_top_build_prefix= ;; - *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; - esac ;; -esac -ac_abs_top_builddir=$ac_pwd -ac_abs_builddir=$ac_pwd$ac_dir_suffix -# for backward compatibility: -ac_top_builddir=$ac_top_build_prefix - -case $srcdir in - .) # We are building in place. - ac_srcdir=. - ac_top_srcdir=$ac_top_builddir_sub - ac_abs_top_srcdir=$ac_pwd ;; - [\\/]* | ?:[\\/]* ) # Absolute name. - ac_srcdir=$srcdir$ac_dir_suffix; - ac_top_srcdir=$srcdir - ac_abs_top_srcdir=$srcdir ;; - *) # Relative name. - ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix - ac_top_srcdir=$ac_top_build_prefix$srcdir - ac_abs_top_srcdir=$ac_pwd/$srcdir ;; -esac -ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix - - cd "$ac_dir" || { ac_status=$?; continue; } - # Check for guested configure. - if test -f "$ac_srcdir/configure.gnu"; then - echo && - $SHELL "$ac_srcdir/configure.gnu" --help=recursive - elif test -f "$ac_srcdir/configure"; then - echo && - $SHELL "$ac_srcdir/configure" --help=recursive - else - $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 - fi || ac_status=$? - cd "$ac_pwd" || { ac_status=$?; break; } - done -fi - -test -n "$ac_init_help" && exit $ac_status -if $ac_init_version; then - cat <<\_ACEOF -HTSlib configure 1.18 -generated by GNU Autoconf 2.69 - -Copyright (C) 2012 Free Software Foundation, Inc. -This configure script is free software; the Free Software Foundation -gives unlimited permission to copy, distribute and modify it. - -Portions copyright (C) 2020-2023 Genome Research Ltd. - -This configure script is free software: you are free to change and -redistribute it. There is NO WARRANTY, to the extent permitted by law. -_ACEOF - exit -fi - -## ------------------------ ## -## Autoconf initialization. ## -## ------------------------ ## - -# ac_fn_c_try_compile LINENO -# -------------------------- -# Try to compile conftest.$ac_ext, and return whether this succeeded. -ac_fn_c_try_compile () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - rm -f conftest.$ac_objext - if { { ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_compile") 2>conftest.err - ac_status=$? - if test -s conftest.err; then - grep -v '^ *+' conftest.err >conftest.er1 - cat conftest.er1 >&5 - mv -f conftest.er1 conftest.err - fi - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then : - ac_retval=0 -else - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_retval=1 -fi - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - as_fn_set_status $ac_retval - -} # ac_fn_c_try_compile - -# ac_fn_c_check_decl LINENO SYMBOL VAR INCLUDES -# --------------------------------------------- -# Tests whether SYMBOL is declared in INCLUDES, setting cache variable VAR -# accordingly. -ac_fn_c_check_decl () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - as_decl_name=`echo $2|sed 's/ *(.*//'` - as_decl_use=`echo $2|sed -e 's/(/((/' -e 's/)/) 0&/' -e 's/,/) 0& (/g'` - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $as_decl_name is declared" >&5 -$as_echo_n "checking whether $as_decl_name is declared... " >&6; } -if eval \${$3+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$4 -int -main () -{ -#ifndef $as_decl_name -#ifdef __cplusplus - (void) $as_decl_use; -#else - (void) $as_decl_name; -#endif -#endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - eval "$3=yes" -else - eval "$3=no" -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -eval ac_res=\$$3 - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 -$as_echo "$ac_res" >&6; } - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - -} # ac_fn_c_check_decl - -# ac_fn_c_try_cpp LINENO -# ---------------------- -# Try to preprocess conftest.$ac_ext, and return whether this succeeded. -ac_fn_c_try_cpp () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - if { { ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err - ac_status=$? - if test -s conftest.err; then - grep -v '^ *+' conftest.err >conftest.er1 - cat conftest.er1 >&5 - mv -f conftest.er1 conftest.err - fi - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } > conftest.i && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then : - ac_retval=0 -else - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_retval=1 -fi - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - as_fn_set_status $ac_retval - -} # ac_fn_c_try_cpp - -# ac_fn_c_try_run LINENO -# ---------------------- -# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes -# that executables *can* be run. -ac_fn_c_try_run () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - if { { ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_link") 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' - { { case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; }; then : - ac_retval=0 -else - $as_echo "$as_me: program exited with status $ac_status" >&5 - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_retval=$ac_status -fi - rm -rf conftest.dSYM conftest_ipa8_conftest.oo - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - as_fn_set_status $ac_retval - -} # ac_fn_c_try_run - -# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES -# ------------------------------------------------------- -# Tests whether HEADER exists and can be compiled using the include files in -# INCLUDES, setting the cache variable VAR accordingly. -ac_fn_c_check_header_compile () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 -$as_echo_n "checking for $2... " >&6; } -if eval \${$3+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$4 -#include <$2> -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - eval "$3=yes" -else - eval "$3=no" -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -eval ac_res=\$$3 - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 -$as_echo "$ac_res" >&6; } - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - -} # ac_fn_c_check_header_compile - -# ac_fn_c_try_link LINENO -# ----------------------- -# Try to link conftest.$ac_ext, and return whether this succeeded. -ac_fn_c_try_link () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - rm -f conftest.$ac_objext conftest$ac_exeext - if { { ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_link") 2>conftest.err - ac_status=$? - if test -s conftest.err; then - grep -v '^ *+' conftest.err >conftest.er1 - cat conftest.er1 >&5 - mv -f conftest.er1 conftest.err - fi - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && { - test "$cross_compiling" = yes || - test -x conftest$ac_exeext - }; then : - ac_retval=0 -else - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_retval=1 -fi - # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information - # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would - # interfere with the next link command; also delete a directory that is - # left behind by Apple's compiler. We do this before executing the actions. - rm -rf conftest.dSYM conftest_ipa8_conftest.oo - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - as_fn_set_status $ac_retval - -} # ac_fn_c_try_link - -# ac_fn_c_check_func LINENO FUNC VAR -# ---------------------------------- -# Tests whether FUNC exists, setting the cache variable VAR accordingly -ac_fn_c_check_func () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 -$as_echo_n "checking for $2... " >&6; } -if eval \${$3+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -/* Define $2 to an innocuous variant, in case declares $2. - For example, HP-UX 11i declares gettimeofday. */ -#define $2 innocuous_$2 - -/* System header to define __stub macros and hopefully few prototypes, - which can conflict with char $2 (); below. - Prefer to if __STDC__ is defined, since - exists even on freestanding compilers. */ - -#ifdef __STDC__ -# include -#else -# include -#endif - -#undef $2 - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char $2 (); -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined __stub_$2 || defined __stub___$2 -choke me -#endif - -int -main () -{ -return $2 (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - eval "$3=yes" -else - eval "$3=no" -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -eval ac_res=\$$3 - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 -$as_echo "$ac_res" >&6; } - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - -} # ac_fn_c_check_func -cat >config.log <<_ACEOF -This file contains any messages produced by compilers while -running configure, to aid debugging if configure makes a mistake. - -It was created by HTSlib $as_me 1.18, which was -generated by GNU Autoconf 2.69. Invocation command line was - - $ $0 $@ - -_ACEOF -exec 5>>config.log -{ -cat <<_ASUNAME -## --------- ## -## Platform. ## -## --------- ## - -hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` -uname -m = `(uname -m) 2>/dev/null || echo unknown` -uname -r = `(uname -r) 2>/dev/null || echo unknown` -uname -s = `(uname -s) 2>/dev/null || echo unknown` -uname -v = `(uname -v) 2>/dev/null || echo unknown` - -/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` -/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` - -/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` -/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` -/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` -/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` -/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` -/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` -/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` - -_ASUNAME - -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - $as_echo "PATH: $as_dir" - done -IFS=$as_save_IFS - -} >&5 - -cat >&5 <<_ACEOF - - -## ----------- ## -## Core tests. ## -## ----------- ## - -_ACEOF - - -# Keep a trace of the command line. -# Strip out --no-create and --no-recursion so they do not pile up. -# Strip out --silent because we don't want to record it for future runs. -# Also quote any args containing shell meta-characters. -# Make two passes to allow for proper duplicate-argument suppression. -ac_configure_args= -ac_configure_args0= -ac_configure_args1= -ac_must_keep_next=false -for ac_pass in 1 2 -do - for ac_arg - do - case $ac_arg in - -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; - -q | -quiet | --quiet | --quie | --qui | --qu | --q \ - | -silent | --silent | --silen | --sile | --sil) - continue ;; - *\'*) - ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; - esac - case $ac_pass in - 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; - 2) - as_fn_append ac_configure_args1 " '$ac_arg'" - if test $ac_must_keep_next = true; then - ac_must_keep_next=false # Got value, back to normal. - else - case $ac_arg in - *=* | --config-cache | -C | -disable-* | --disable-* \ - | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ - | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ - | -with-* | --with-* | -without-* | --without-* | --x) - case "$ac_configure_args0 " in - "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; - esac - ;; - -* ) ac_must_keep_next=true ;; - esac - fi - as_fn_append ac_configure_args " '$ac_arg'" - ;; - esac - done -done -{ ac_configure_args0=; unset ac_configure_args0;} -{ ac_configure_args1=; unset ac_configure_args1;} - -# When interrupted or exit'd, cleanup temporary files, and complete -# config.log. We remove comments because anyway the quotes in there -# would cause problems or look ugly. -# WARNING: Use '\'' to represent an apostrophe within the trap. -# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. -trap 'exit_status=$? - # Save into config.log some information that might help in debugging. - { - echo - - $as_echo "## ---------------- ## -## Cache variables. ## -## ---------------- ##" - echo - # The following way of writing the cache mishandles newlines in values, -( - for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do - eval ac_val=\$$ac_var - case $ac_val in #( - *${as_nl}*) - case $ac_var in #( - *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 -$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; - esac - case $ac_var in #( - _ | IFS | as_nl) ;; #( - BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( - *) { eval $ac_var=; unset $ac_var;} ;; - esac ;; - esac - done - (set) 2>&1 | - case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( - *${as_nl}ac_space=\ *) - sed -n \ - "s/'\''/'\''\\\\'\'''\''/g; - s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" - ;; #( - *) - sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" - ;; - esac | - sort -) - echo - - $as_echo "## ----------------- ## -## Output variables. ## -## ----------------- ##" - echo - for ac_var in $ac_subst_vars - do - eval ac_val=\$$ac_var - case $ac_val in - *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; - esac - $as_echo "$ac_var='\''$ac_val'\''" - done | sort - echo - - if test -n "$ac_subst_files"; then - $as_echo "## ------------------- ## -## File substitutions. ## -## ------------------- ##" - echo - for ac_var in $ac_subst_files - do - eval ac_val=\$$ac_var - case $ac_val in - *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; - esac - $as_echo "$ac_var='\''$ac_val'\''" - done | sort - echo - fi - - if test -s confdefs.h; then - $as_echo "## ----------- ## -## confdefs.h. ## -## ----------- ##" - echo - cat confdefs.h - echo - fi - test "$ac_signal" != 0 && - $as_echo "$as_me: caught signal $ac_signal" - $as_echo "$as_me: exit $exit_status" - } >&5 - rm -f core *.core core.conftest.* && - rm -f -r conftest* confdefs* conf$$* $ac_clean_files && - exit $exit_status -' 0 -for ac_signal in 1 2 13 15; do - trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal -done -ac_signal=0 - -# confdefs.h avoids OS command line length limits that DEFS can exceed. -rm -f -r conftest* confdefs.h - -$as_echo "/* confdefs.h */" > confdefs.h - -# Predefined preprocessor variables. - -cat >>confdefs.h <<_ACEOF -#define PACKAGE_NAME "$PACKAGE_NAME" -_ACEOF - -cat >>confdefs.h <<_ACEOF -#define PACKAGE_TARNAME "$PACKAGE_TARNAME" -_ACEOF - -cat >>confdefs.h <<_ACEOF -#define PACKAGE_VERSION "$PACKAGE_VERSION" -_ACEOF - -cat >>confdefs.h <<_ACEOF -#define PACKAGE_STRING "$PACKAGE_STRING" -_ACEOF - -cat >>confdefs.h <<_ACEOF -#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" -_ACEOF - -cat >>confdefs.h <<_ACEOF -#define PACKAGE_URL "$PACKAGE_URL" -_ACEOF - - -# Let the site file select an alternate cache file if it wants to. -# Prefer an explicitly selected file to automatically selected ones. -ac_site_file1=NONE -ac_site_file2=NONE -if test -n "$CONFIG_SITE"; then - # We do not want a PATH search for config.site. - case $CONFIG_SITE in #(( - -*) ac_site_file1=./$CONFIG_SITE;; - */*) ac_site_file1=$CONFIG_SITE;; - *) ac_site_file1=./$CONFIG_SITE;; - esac -elif test "x$prefix" != xNONE; then - ac_site_file1=$prefix/share/config.site - ac_site_file2=$prefix/etc/config.site -else - ac_site_file1=$ac_default_prefix/share/config.site - ac_site_file2=$ac_default_prefix/etc/config.site -fi -for ac_site_file in "$ac_site_file1" "$ac_site_file2" -do - test "x$ac_site_file" = xNONE && continue - if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 -$as_echo "$as_me: loading site script $ac_site_file" >&6;} - sed 's/^/| /' "$ac_site_file" >&5 - . "$ac_site_file" \ - || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error $? "failed to load site script $ac_site_file -See \`config.log' for more details" "$LINENO" 5; } - fi -done - -if test -r "$cache_file"; then - # Some versions of bash will fail to source /dev/null (special files - # actually), so we avoid doing that. DJGPP emulates it as a regular file. - if test /dev/null != "$cache_file" && test -f "$cache_file"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 -$as_echo "$as_me: loading cache $cache_file" >&6;} - case $cache_file in - [\\/]* | ?:[\\/]* ) . "$cache_file";; - *) . "./$cache_file";; - esac - fi -else - { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 -$as_echo "$as_me: creating cache $cache_file" >&6;} - >$cache_file -fi - -as_fn_append ac_header_list " stdlib.h" -as_fn_append ac_header_list " unistd.h" -as_fn_append ac_header_list " sys/param.h" -# Check that the precious variables saved in the cache have kept the same -# value. -ac_cache_corrupted=false -for ac_var in $ac_precious_vars; do - eval ac_old_set=\$ac_cv_env_${ac_var}_set - eval ac_new_set=\$ac_env_${ac_var}_set - eval ac_old_val=\$ac_cv_env_${ac_var}_value - eval ac_new_val=\$ac_env_${ac_var}_value - case $ac_old_set,$ac_new_set in - set,) - { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 -$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} - ac_cache_corrupted=: ;; - ,set) - { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 -$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} - ac_cache_corrupted=: ;; - ,);; - *) - if test "x$ac_old_val" != "x$ac_new_val"; then - # differences in whitespace do not lead to failure. - ac_old_val_w=`echo x $ac_old_val` - ac_new_val_w=`echo x $ac_new_val` - if test "$ac_old_val_w" != "$ac_new_val_w"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 -$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} - ac_cache_corrupted=: - else - { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 -$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} - eval $ac_var=\$ac_old_val - fi - { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 -$as_echo "$as_me: former value: \`$ac_old_val'" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 -$as_echo "$as_me: current value: \`$ac_new_val'" >&2;} - fi;; - esac - # Pass precious variables to config.status. - if test "$ac_new_set" = set; then - case $ac_new_val in - *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; - *) ac_arg=$ac_var=$ac_new_val ;; - esac - case " $ac_configure_args " in - *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. - *) as_fn_append ac_configure_args " '$ac_arg'" ;; - esac - fi -done -if $ac_cache_corrupted; then - { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 -$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} - as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 -fi -## -------------------- ## -## Main body of script. ## -## -------------------- ## - -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - - - -ac_config_headers="$ac_config_headers config.h" - - - - -# SYNOPSIS -# -# HTS_PROG_CC_WERROR(FLAGS_VAR) -# -# Set FLAGS_VAR to the flags needed to make the C compiler treat warnings -# as errors. - - -# hts_check_compile_flags_needed.m4 -# -# SYNOPSIS -# -# HTS_CHECK_COMPILE_FLAGS_NEEDED(FEATURE, FLAGS, [INPUT], [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS]) -# -# DESCRIPTION -# -# Check whether the given FLAGS are required to build and link INPUT with -# the current language's compiler. Compilation and linking are first -# tries without FLAGS. If that fails it then tries to compile and -# link again with FLAGS. -# -# FEATURE describes the feature being tested, and is used when printing -# messages and to name the cache entry (along with the tested flags). -# -# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on -# success/failure. In ACTION-SUCCESS, $flags_needed will be set to -# either an empty string or FLAGS depending on the test results. -# -# If EXTRA-FLAGS is defined, it is added to the current language's default -# flags (e.g. CFLAGS) when the check is done. The check is thus made with -# the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to -# force the compiler to issue an error when a bad flag is given. -# -# If omitted, INPUT defaults to AC_LANG_PROGRAM(), although that probably -# isn't very useful. -# -# NOTE: Implementation based on AX_CHECK_COMPILE_FLAG. -# -# LICENSE -# -# Copyright (c) 2008 Guido U. Draheim -# Copyright (c) 2011 Maarten Bosmans -# Copyright (c) 2023 Robert Davies -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -# AX_CHECK_COMPILE_FLAGS_NEEDED(FEATURE, FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT]) - - - -# SYNOPSIS -# -# HTS_TEST_CC_C_LD_FLAG(FLAG, FOUND_VAR) -# -# Test if FLAG can be used on both CFLAGS and LDFLAGS. It it works, -# variable FOUND_VAR is set to FLAG. - - - - - -# pkg.m4 - Macros to locate and use pkg-config. -*- Autoconf -*- -# serial 12 (pkg-config-0.29.2) - - - - - - - - - - - - - - - - - - - - - - - - - -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu -if test -n "$ac_tool_prefix"; then - # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. -set dummy ${ac_tool_prefix}gcc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_CC="${ac_tool_prefix}gcc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - -fi -if test -z "$ac_cv_prog_CC"; then - ac_ct_CC=$CC - # Extract the first word of "gcc", so it can be a program name with args. -set dummy gcc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_ac_ct_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$ac_ct_CC"; then - ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_ac_ct_CC="gcc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -ac_ct_CC=$ac_cv_prog_ac_ct_CC -if test -n "$ac_ct_CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 -$as_echo "$ac_ct_CC" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - if test "x$ac_ct_CC" = x; then - CC="" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} -ac_tool_warned=yes ;; -esac - CC=$ac_ct_CC - fi -else - CC="$ac_cv_prog_CC" -fi - -if test -z "$CC"; then - if test -n "$ac_tool_prefix"; then - # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. -set dummy ${ac_tool_prefix}cc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_CC="${ac_tool_prefix}cc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - - fi -fi -if test -z "$CC"; then - # Extract the first word of "cc", so it can be a program name with args. -set dummy cc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else - ac_prog_rejected=no -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then - ac_prog_rejected=yes - continue - fi - ac_cv_prog_CC="cc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -if test $ac_prog_rejected = yes; then - # We found a bogon in the path, so make sure we never use it. - set dummy $ac_cv_prog_CC - shift - if test $# != 0; then - # We chose a different compiler from the bogus one. - # However, it has the same basename, so the bogon will be chosen - # first if we set CC to just the basename; use the full file name. - shift - ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" - fi -fi -fi -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - -fi -if test -z "$CC"; then - if test -n "$ac_tool_prefix"; then - for ac_prog in cl.exe - do - # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. -set dummy $ac_tool_prefix$ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_CC="$ac_tool_prefix$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - - test -n "$CC" && break - done -fi -if test -z "$CC"; then - ac_ct_CC=$CC - for ac_prog in cl.exe -do - # Extract the first word of "$ac_prog", so it can be a program name with args. -set dummy $ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_ac_ct_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$ac_ct_CC"; then - ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_ac_ct_CC="$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -ac_ct_CC=$ac_cv_prog_ac_ct_CC -if test -n "$ac_ct_CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 -$as_echo "$ac_ct_CC" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - - test -n "$ac_ct_CC" && break -done - - if test "x$ac_ct_CC" = x; then - CC="" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} -ac_tool_warned=yes ;; -esac - CC=$ac_ct_CC - fi -fi - -fi - - -test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error $? "no acceptable C compiler found in \$PATH -See \`config.log' for more details" "$LINENO" 5; } - -# Provide some information about the compiler. -$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 -set X $ac_compile -ac_compiler=$2 -for ac_option in --version -v -V -qversion; do - { { ac_try="$ac_compiler $ac_option >&5" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_compiler $ac_option >&5") 2>conftest.err - ac_status=$? - if test -s conftest.err; then - sed '10a\ -... rest of stderr output deleted ... - 10q' conftest.err >conftest.er1 - cat conftest.er1 >&5 - fi - rm -f conftest.er1 conftest.err - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } -done - -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -ac_clean_files_save=$ac_clean_files -ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" -# Try to create an executable without -o first, disregard a.out. -# It will help us diagnose broken compilers, and finding out an intuition -# of exeext. -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 -$as_echo_n "checking whether the C compiler works... " >&6; } -ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` - -# The possible output files: -ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" - -ac_rmfiles= -for ac_file in $ac_files -do - case $ac_file in - *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; - * ) ac_rmfiles="$ac_rmfiles $ac_file";; - esac -done -rm -f $ac_rmfiles - -if { { ac_try="$ac_link_default" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_link_default") 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; then : - # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. -# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' -# in a Makefile. We should not override ac_cv_exeext if it was cached, -# so that the user can short-circuit this test for compilers unknown to -# Autoconf. -for ac_file in $ac_files '' -do - test -f "$ac_file" || continue - case $ac_file in - *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) - ;; - [ab].out ) - # We found the default executable, but exeext='' is most - # certainly right. - break;; - *.* ) - if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; - then :; else - ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` - fi - # We set ac_cv_exeext here because the later test for it is not - # safe: cross compilers may not add the suffix if given an `-o' - # argument, so we may need to know it at that point already. - # Even if this section looks crufty: it has the advantage of - # actually working. - break;; - * ) - break;; - esac -done -test "$ac_cv_exeext" = no && ac_cv_exeext= - -else - ac_file='' -fi -if test -z "$ac_file"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -$as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - -{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error 77 "C compiler cannot create executables -See \`config.log' for more details" "$LINENO" 5; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 -$as_echo_n "checking for C compiler default output file name... " >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 -$as_echo "$ac_file" >&6; } -ac_exeext=$ac_cv_exeext - -rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out -ac_clean_files=$ac_clean_files_save -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 -$as_echo_n "checking for suffix of executables... " >&6; } -if { { ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_link") 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; then : - # If both `conftest.exe' and `conftest' are `present' (well, observable) -# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will -# work properly (i.e., refer to `conftest.exe'), while it won't with -# `rm'. -for ac_file in conftest.exe conftest conftest.*; do - test -f "$ac_file" || continue - case $ac_file in - *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; - *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` - break;; - * ) break;; - esac -done -else - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error $? "cannot compute suffix of executables: cannot compile and link -See \`config.log' for more details" "$LINENO" 5; } -fi -rm -f conftest conftest$ac_cv_exeext -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 -$as_echo "$ac_cv_exeext" >&6; } - -rm -f conftest.$ac_ext -EXEEXT=$ac_cv_exeext -ac_exeext=$EXEEXT -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -int -main () -{ -FILE *f = fopen ("conftest.out", "w"); - return ferror (f) || fclose (f) != 0; - - ; - return 0; -} -_ACEOF -ac_clean_files="$ac_clean_files conftest.out" -# Check that the compiler produces executables we can run. If not, either -# the compiler is broken, or we cross compile. -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 -$as_echo_n "checking whether we are cross compiling... " >&6; } -if test "$cross_compiling" != yes; then - { { ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_link") 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } - if { ac_try='./conftest$ac_cv_exeext' - { { case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; }; then - cross_compiling=no - else - if test "$cross_compiling" = maybe; then - cross_compiling=yes - else - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error $? "cannot run C compiled programs. -If you meant to cross compile, use \`--host'. -See \`config.log' for more details" "$LINENO" 5; } - fi - fi -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 -$as_echo "$cross_compiling" >&6; } - -rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out -ac_clean_files=$ac_clean_files_save -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 -$as_echo_n "checking for suffix of object files... " >&6; } -if ${ac_cv_objext+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.o conftest.obj -if { { ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_compile") 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; then : - for ac_file in conftest.o conftest.obj conftest.*; do - test -f "$ac_file" || continue; - case $ac_file in - *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; - *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` - break;; - esac -done -else - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - -{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error $? "cannot compute suffix of object files: cannot compile -See \`config.log' for more details" "$LINENO" 5; } -fi -rm -f conftest.$ac_cv_objext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 -$as_echo "$ac_cv_objext" >&6; } -OBJEXT=$ac_cv_objext -ac_objext=$OBJEXT -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 -$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } -if ${ac_cv_c_compiler_gnu+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ -#ifndef __GNUC__ - choke me -#endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_compiler_gnu=yes -else - ac_compiler_gnu=no -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -ac_cv_c_compiler_gnu=$ac_compiler_gnu - -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 -$as_echo "$ac_cv_c_compiler_gnu" >&6; } -if test $ac_compiler_gnu = yes; then - GCC=yes -else - GCC= -fi -ac_test_CFLAGS=${CFLAGS+set} -ac_save_CFLAGS=$CFLAGS -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 -$as_echo_n "checking whether $CC accepts -g... " >&6; } -if ${ac_cv_prog_cc_g+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_save_c_werror_flag=$ac_c_werror_flag - ac_c_werror_flag=yes - ac_cv_prog_cc_g=no - CFLAGS="-g" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_cv_prog_cc_g=yes -else - CFLAGS="" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - -else - ac_c_werror_flag=$ac_save_c_werror_flag - CFLAGS="-g" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_cv_prog_cc_g=yes -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - ac_c_werror_flag=$ac_save_c_werror_flag -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 -$as_echo "$ac_cv_prog_cc_g" >&6; } -if test "$ac_test_CFLAGS" = set; then - CFLAGS=$ac_save_CFLAGS -elif test $ac_cv_prog_cc_g = yes; then - if test "$GCC" = yes; then - CFLAGS="-g -O2" - else - CFLAGS="-g" - fi -else - if test "$GCC" = yes; then - CFLAGS="-O2" - else - CFLAGS= - fi -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 -$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } -if ${ac_cv_prog_cc_c89+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_cv_prog_cc_c89=no -ac_save_CC=$CC -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -#include -struct stat; -/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ -struct buf { int x; }; -FILE * (*rcsopen) (struct buf *, struct stat *, int); -static char *e (p, i) - char **p; - int i; -{ - return p[i]; -} -static char *f (char * (*g) (char **, int), char **p, ...) -{ - char *s; - va_list v; - va_start (v,p); - s = g (p, va_arg (v,int)); - va_end (v); - return s; -} - -/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has - function prototypes and stuff, but not '\xHH' hex character constants. - These don't provoke an error unfortunately, instead are silently treated - as 'x'. The following induces an error, until -std is added to get - proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an - array size at least. It's necessary to write '\x00'==0 to get something - that's true only with -std. */ -int osf4_cc_array ['\x00' == 0 ? 1 : -1]; - -/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters - inside strings and character constants. */ -#define FOO(x) 'x' -int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; - -int test (int i, double x); -struct s1 {int (*f) (int a);}; -struct s2 {int (*f) (double a);}; -int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); -int argc; -char **argv; -int -main () -{ -return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; - ; - return 0; -} -_ACEOF -for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ - -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" -do - CC="$ac_save_CC $ac_arg" - if ac_fn_c_try_compile "$LINENO"; then : - ac_cv_prog_cc_c89=$ac_arg -fi -rm -f core conftest.err conftest.$ac_objext - test "x$ac_cv_prog_cc_c89" != "xno" && break -done -rm -f conftest.$ac_ext -CC=$ac_save_CC - -fi -# AC_CACHE_VAL -case "x$ac_cv_prog_cc_c89" in - x) - { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 -$as_echo "none needed" >&6; } ;; - xno) - { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 -$as_echo "unsupported" >&6; } ;; - *) - CC="$CC $ac_cv_prog_cc_c89" - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 -$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; -esac -if test "x$ac_cv_prog_cc_c89" != xno; then : - -fi - -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - -if test -n "$ac_tool_prefix"; then - # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. -set dummy ${ac_tool_prefix}ranlib; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_RANLIB+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$RANLIB"; then - ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -RANLIB=$ac_cv_prog_RANLIB -if test -n "$RANLIB"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5 -$as_echo "$RANLIB" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - -fi -if test -z "$ac_cv_prog_RANLIB"; then - ac_ct_RANLIB=$RANLIB - # Extract the first word of "ranlib", so it can be a program name with args. -set dummy ranlib; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_ac_ct_RANLIB+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$ac_ct_RANLIB"; then - ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_ac_ct_RANLIB="ranlib" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB -if test -n "$ac_ct_RANLIB"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5 -$as_echo "$ac_ct_RANLIB" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - if test "x$ac_ct_RANLIB" = x; then - RANLIB=":" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} -ac_tool_warned=yes ;; -esac - RANLIB=$ac_ct_RANLIB - fi -else - RANLIB="$ac_cv_prog_RANLIB" -fi - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 -$as_echo_n "checking for grep that handles long lines and -e... " >&6; } -if ${ac_cv_path_GREP+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -z "$GREP"; then - ac_path_GREP_found=false - # Loop through the user's path and test for each of PROGNAME-LIST - as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_prog in grep ggrep; do - for ac_exec_ext in '' $ac_executable_extensions; do - ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" - as_fn_executable_p "$ac_path_GREP" || continue -# Check for GNU ac_path_GREP and select it if it is found. - # Check for GNU $ac_path_GREP -case `"$ac_path_GREP" --version 2>&1` in -*GNU*) - ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; -*) - ac_count=0 - $as_echo_n 0123456789 >"conftest.in" - while : - do - cat "conftest.in" "conftest.in" >"conftest.tmp" - mv "conftest.tmp" "conftest.in" - cp "conftest.in" "conftest.nl" - $as_echo 'GREP' >> "conftest.nl" - "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break - diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break - as_fn_arith $ac_count + 1 && ac_count=$as_val - if test $ac_count -gt ${ac_path_GREP_max-0}; then - # Best one so far, save it but keep looking for a better one - ac_cv_path_GREP="$ac_path_GREP" - ac_path_GREP_max=$ac_count - fi - # 10*(2^10) chars as input seems more than enough - test $ac_count -gt 10 && break - done - rm -f conftest.in conftest.tmp conftest.nl conftest.out;; -esac - - $ac_path_GREP_found && break 3 - done - done - done -IFS=$as_save_IFS - if test -z "$ac_cv_path_GREP"; then - as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 - fi -else - ac_cv_path_GREP=$GREP -fi - -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 -$as_echo "$ac_cv_path_GREP" >&6; } - GREP="$ac_cv_path_GREP" - - - - # Check whether --enable-warnings was given. -if test "${enable_warnings+set}" = set; then : - enableval=$enable_warnings; -else - enable_warnings=yes -fi - - - if test "x$enable_warnings" != xno; then : - - - - ansi="" - if test "x$ansi" = "x"; then : - msg="for C compiler warning flags" -else - msg="for C compiler warning and ANSI conformance flags" -fi - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking $msg" >&5 -$as_echo_n "checking $msg... " >&6; } - if ${hts_cv_prog_cc_warnings+:} false; then : - $as_echo_n "(cached) " >&6 -else - hts_cv_prog_cc_warnings="" - if test "x$CC" != "x"; then : - - cat > conftest.c < /dev/null 2>&1 && - test -f conftest.o; then : - if test "x$ansi" = "x"; then : - hts_cv_prog_cc_warnings="-Wall" -else - hts_cv_prog_cc_warnings="-Wall -ansi -pedantic" -fi - -elif # Sun Studio or Solaris C compiler - "$CC" -V 2>&1 | $GREP -i -E "WorkShop|Sun C" > /dev/null 2>&1 && - "$CC" -c -v -Xc conftest.c > /dev/null 2>&1 && - test -f conftest.o; then : - if test "x$ansi" = "x"; then : - hts_cv_prog_cc_warnings="-v" -else - hts_cv_prog_cc_warnings="-v -Xc" -fi - -elif # Digital Unix C compiler - "$CC" -V 2>&1 | $GREP -i "Digital UNIX Compiler" > /dev/null 2>&1 && - "$CC" -c -verbose -w0 -warnprotos -std1 conftest.c > /dev/null 2>&1 && - test -f conftest.o; then : - if test "x$ansi" = "x"; then : - hts_cv_prog_cc_warnings="-verbose -w0 -warnprotos" -else - hts_cv_prog_cc_warnings="-verbose -w0 -warnprotos -std1" -fi - -elif # C for AIX Compiler - "$CC" 2>&1 | $GREP -i "C for AIX Compiler" > /dev/null 2>&1 && - "$CC" -c -qlanglvl=ansi -qinfo=all conftest.c > /dev/null 2>&1 && - test -f conftest.o; then : - if test "x$ansi" = "x"; then : - hts_cv_prog_cc_warnings="-qsrcmsg -qinfo=all:noppt:noppc:noobs:nocnd" -else - hts_cv_prog_cc_warnings="-qsrcmsg -qinfo=all:noppt:noppc:noobs:nocnd -qlanglvl=ansi" -fi - -elif # IRIX C compiler - "$CC" -version 2>&1 | $GREP -i "MIPSpro Compilers" > /dev/null 2>&1 && - "$CC" -c -fullwarn -ansi -ansiE conftest.c > /dev/null 2>&1 && - test -f conftest.o; then : - if test "x$ansi" = "x"; then : - hts_cv_prog_cc_warnings="-fullwarn" -else - hts_cv_prog_cc_warnings="-fullwarn -ansi -ansiE" -fi - -elif # HP-UX C compiler - what "$CC" 2>&1 | $GREP -i "HP C Compiler" > /dev/null 2>&1 && - "$CC" -c -Aa +w1 conftest.c > /dev/null 2>&1 && - test -f conftest.o; then : - if test "x$ansi" = "x"; then : - hts_cv_prog_cc_warnings="+w1" -else - hts_cv_prog_cc_warnings="+w1 -Aa" -fi - -elif # The NEC SX series (Super-UX 10) C compiler - "$CC" -V 2>&1 | $GREP "/SX" > /dev/null 2>&1 && - "$CC" -c -pvctl,fullmsg -Xc conftest.c > /dev/null 2>&1 && - test -f conftest.o; then : - - if test "x$ansi" = "x"; then : - hts_cv_prog_cc_warnings="-pvctl,fullmsg" -else - hts_cv_prog_cc_warnings="-pvctl,fullmsg -Xc" -fi - -elif # The Cray C compiler (Unicos) - "$CC" -V 2>&1 | $GREP -i "Cray" > /dev/null 2>&1 && - "$CC" -c -h msglevel_2 conftest.c > /dev/null 2>&1 && - test -f conftest.o; then : - if test "x$ansi" = "x"; then : - hts_cv_prog_cc_warnings="-h#msglevel_2" -else - hts_cv_prog_cc_warnings="-h#msglevel_2,conform" -fi - -elif # The Tiny C Compiler - "$CC" -v 2>&1 | $GREP "tcc version" > /dev/null && - "$CC" -Wall -c conftest.c > /dev/null 2>&1 && - test -f conftest.o; then : - hts_cv_prog_cc_warnings="-Wall" - -fi - rm -f conftest.* - -fi - -fi - - - if test "x$hts_cv_prog_cc_warnings" != "x"; then : - -ac_arg_result=`echo "$hts_cv_prog_cc_warnings" | tr '#' ' '` -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_arg_result" >&5 -$as_echo "$ac_arg_result" >&6; } - -ac_arg_needed="" -for ac_arg in $hts_cv_prog_cc_warnings -do - ac_arg_sp=`echo "$ac_arg" | tr '#' ' '` - case " $CFLAGS " in #( - *" $ac_arg_sp "*) : - ;; #( - *) : - ac_arg_needed="$ac_arg_all $ac_arg_sp" ;; -esac -done -CFLAGS="$ac_arg_needed $CFLAGS" -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: unknown" >&5 -$as_echo "unknown" >&6; } - -fi - -fi - - - # Check whether --enable-werror was given. -if test "${enable_werror+set}" = set; then : - enableval=$enable_werror; -else - enable_werror=no -fi - - - if test "x$enable_werror" != xno; then : - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler flags to error on warnings" >&5 -$as_echo_n "checking for C compiler flags to error on warnings... " >&6; } - if ${hts_cv_prog_cc_werror+:} false; then : - $as_echo_n "(cached) " >&6 -else - hts_cv_prog_cc_werror="" - if test "x$CC" != "x"; then : - - cat > conftest.c < /dev/null 2>&1 && - test -f conftest.o; then : - hts_cv_prog_cc_werror="-Werror" -elif # Sun Studio or Solaris C compiler - "$CC" -V 2>&1 | $GREP -i -E "WorkShop|Sun C" > /dev/null 2>&1 && - "$CC" -c -errwarn=%all conftest.c > /dev/null 2>&1 && - test -f conftest.o; then : - hts_cv_prog_cc_werror="-errwarn=%all" -elif # The Tiny C Compiler - "$CC" -v 2>&1 | $GREP "tcc version" > /dev/null && - "$CC" -Wall -c conftest.c > /dev/null 2>&1 && - test -f conftest.o; then : - hts_cv_prog_cc_werror="-Werror" - -fi - rm -f conftest.* - -fi - -fi - - if test "x$hts_cv_prog_cc_werror" != x; then : - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hts_cv_prog_cc_werror" >&5 -$as_echo "$hts_cv_prog_cc_werror" >&6; } - if test "xhts_late_cflags" != x; then : - eval hts_late_cflags="$hts_cv_prog_cc_werror" -fi - -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: unknown" >&5 -$as_echo "unknown" >&6; } - -fi - -fi - - -# HTSlib uses X/Open-only facilities (M_SQRT2 etc, drand48() etc), and -# various POSIX functions that are provided by various _POSIX_C_SOURCE values -# or by _XOPEN_SOURCE >= 500. It also uses usleep(), which is removed when -# _XOPEN_SOURCE >= 700. Additionally, some definitions may require -# _XOPEN_SOURCE >= 600 on some platforms (snprintf on MinGW, -# PTHREAD_MUTEX_RECURSIVE on some Linux distributions). Hence we set it to 600. - -# Define _XOPEN_SOURCE unless the user has already done so via $CPPFLAGS etc. - -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 -$as_echo_n "checking how to run the C preprocessor... " >&6; } -# On Suns, sometimes $CPP names a directory. -if test -n "$CPP" && test -d "$CPP"; then - CPP= -fi -if test -z "$CPP"; then - if ${ac_cv_prog_CPP+:} false; then : - $as_echo_n "(cached) " >&6 -else - # Double quotes because CPP needs to be expanded - for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" - do - ac_preproc_ok=false -for ac_c_preproc_warn_flag in '' yes -do - # Use a header file that comes with gcc, so configuring glibc - # with a fresh cross-compiler works. - # Prefer to if __STDC__ is defined, since - # exists even on freestanding compilers. - # On the NeXT, cc -E runs the code through the compiler's parser, - # not just through cpp. "Syntax error" is here to catch this case. - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#ifdef __STDC__ -# include -#else -# include -#endif - Syntax error -_ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : - -else - # Broken: fails on valid input. -continue -fi -rm -f conftest.err conftest.i conftest.$ac_ext - - # OK, works on sane cases. Now check whether nonexistent headers - # can be detected and how. - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -_ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : - # Broken: success on invalid input. -continue -else - # Passes both tests. -ac_preproc_ok=: -break -fi -rm -f conftest.err conftest.i conftest.$ac_ext - -done -# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. -rm -f conftest.i conftest.err conftest.$ac_ext -if $ac_preproc_ok; then : - break -fi - - done - ac_cv_prog_CPP=$CPP - -fi - CPP=$ac_cv_prog_CPP -else - ac_cv_prog_CPP=$CPP -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 -$as_echo "$CPP" >&6; } -ac_preproc_ok=false -for ac_c_preproc_warn_flag in '' yes -do - # Use a header file that comes with gcc, so configuring glibc - # with a fresh cross-compiler works. - # Prefer to if __STDC__ is defined, since - # exists even on freestanding compilers. - # On the NeXT, cc -E runs the code through the compiler's parser, - # not just through cpp. "Syntax error" is here to catch this case. - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#ifdef __STDC__ -# include -#else -# include -#endif - Syntax error -_ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : - -else - # Broken: fails on valid input. -continue -fi -rm -f conftest.err conftest.i conftest.$ac_ext - - # OK, works on sane cases. Now check whether nonexistent headers - # can be detected and how. - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -_ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : - # Broken: success on invalid input. -continue -else - # Passes both tests. -ac_preproc_ok=: -break -fi -rm -f conftest.err conftest.i conftest.$ac_ext - -done -# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. -rm -f conftest.i conftest.err conftest.$ac_ext -if $ac_preproc_ok; then : - -else - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error $? "C preprocessor \"$CPP\" fails sanity check -See \`config.log' for more details" "$LINENO" 5; } -fi - -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 -$as_echo_n "checking for egrep... " >&6; } -if ${ac_cv_path_EGREP+:} false; then : - $as_echo_n "(cached) " >&6 -else - if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 - then ac_cv_path_EGREP="$GREP -E" - else - if test -z "$EGREP"; then - ac_path_EGREP_found=false - # Loop through the user's path and test for each of PROGNAME-LIST - as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_prog in egrep; do - for ac_exec_ext in '' $ac_executable_extensions; do - ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" - as_fn_executable_p "$ac_path_EGREP" || continue -# Check for GNU ac_path_EGREP and select it if it is found. - # Check for GNU $ac_path_EGREP -case `"$ac_path_EGREP" --version 2>&1` in -*GNU*) - ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; -*) - ac_count=0 - $as_echo_n 0123456789 >"conftest.in" - while : - do - cat "conftest.in" "conftest.in" >"conftest.tmp" - mv "conftest.tmp" "conftest.in" - cp "conftest.in" "conftest.nl" - $as_echo 'EGREP' >> "conftest.nl" - "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break - diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break - as_fn_arith $ac_count + 1 && ac_count=$as_val - if test $ac_count -gt ${ac_path_EGREP_max-0}; then - # Best one so far, save it but keep looking for a better one - ac_cv_path_EGREP="$ac_path_EGREP" - ac_path_EGREP_max=$ac_count - fi - # 10*(2^10) chars as input seems more than enough - test $ac_count -gt 10 && break - done - rm -f conftest.in conftest.tmp conftest.nl conftest.out;; -esac - - $ac_path_EGREP_found && break 3 - done - done - done -IFS=$as_save_IFS - if test -z "$ac_cv_path_EGREP"; then - as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 - fi -else - ac_cv_path_EGREP=$EGREP -fi - - fi -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 -$as_echo "$ac_cv_path_EGREP" >&6; } - EGREP="$ac_cv_path_EGREP" - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 -$as_echo_n "checking for ANSI C header files... " >&6; } -if ${ac_cv_header_stdc+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -#include -#include -#include - -int -main () -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_cv_header_stdc=yes -else - ac_cv_header_stdc=no -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - -if test $ac_cv_header_stdc = yes; then - # SunOS 4.x string.h does not declare mem*, contrary to ANSI. - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include - -_ACEOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - $EGREP "memchr" >/dev/null 2>&1; then : - -else - ac_cv_header_stdc=no -fi -rm -f conftest* - -fi - -if test $ac_cv_header_stdc = yes; then - # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include - -_ACEOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - $EGREP "free" >/dev/null 2>&1; then : - -else - ac_cv_header_stdc=no -fi -rm -f conftest* - -fi - -if test $ac_cv_header_stdc = yes; then - # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. - if test "$cross_compiling" = yes; then : - : -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -#include -#if ((' ' & 0x0FF) == 0x020) -# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') -# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) -#else -# define ISLOWER(c) \ - (('a' <= (c) && (c) <= 'i') \ - || ('j' <= (c) && (c) <= 'r') \ - || ('s' <= (c) && (c) <= 'z')) -# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) -#endif - -#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) -int -main () -{ - int i; - for (i = 0; i < 256; i++) - if (XOR (islower (i), ISLOWER (i)) - || toupper (i) != TOUPPER (i)) - return 2; - return 0; -} -_ACEOF -if ac_fn_c_try_run "$LINENO"; then : - -else - ac_cv_header_stdc=no -fi -rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ - conftest.$ac_objext conftest.beam conftest.$ac_ext -fi - -fi -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 -$as_echo "$ac_cv_header_stdc" >&6; } -if test $ac_cv_header_stdc = yes; then - -$as_echo "#define STDC_HEADERS 1" >>confdefs.h - -fi - -# On IRIX 5.3, sys/types and inttypes.h are conflicting. -for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ - inttypes.h stdint.h unistd.h -do : - as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` -ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default -" -if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : - cat >>confdefs.h <<_ACEOF -#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 -_ACEOF - -fi - -done - - -ac_fn_c_check_decl "$LINENO" "_XOPEN_SOURCE" "ac_cv_have_decl__XOPEN_SOURCE" "$ac_includes_default" -if test "x$ac_cv_have_decl__XOPEN_SOURCE" = xyes; then : - -else - -$as_echo "#define _XOPEN_SOURCE 600" >>confdefs.h - -fi - - - -hts_cflags_sse4="" -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking C compiler flags needed for ssse3" >&5 -$as_echo_n "checking C compiler flags needed for ssse3... " >&6; } -if ${hts_cv_check_cflags_needed_ssse3___mssse3+:} false; then : - $as_echo_n "(cached) " >&6 -else - - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - #ifdef __x86_64__ - #include "x86intrin.h" - #endif - -int -main () -{ - - #ifdef __x86_64__ - __m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1); - __m128i c = _mm_shuffle_epi8(a, b); - return *((char *) &c); - #endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - hts_cv_check_cflags_needed_ssse3___mssse3=none -else - ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mssse3" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - #ifdef __x86_64__ - #include "x86intrin.h" - #endif - -int -main () -{ - - #ifdef __x86_64__ - __m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1); - __m128i c = _mm_shuffle_epi8(a, b); - return *((char *) &c); - #endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - hts_cv_check_cflags_needed_ssse3___mssse3=-mssse3 -else - hts_cv_check_cflags_needed_ssse3___mssse3=unsupported -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext - CFLAGS=$ax_check_save_flags -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hts_cv_check_cflags_needed_ssse3___mssse3" >&5 -$as_echo "$hts_cv_check_cflags_needed_ssse3___mssse3" >&6; } -if test "x$hts_cv_check_cflags_needed_ssse3___mssse3" = xunsupported; then : - - : - -else - - if test "x$hts_cv_check_cflags_needed_ssse3___mssse3" = xnone; then : - flags_needed="" -else - flags_needed="$hts_cv_check_cflags_needed_ssse3___mssse3" -fi - - hts_cflags_sse4="$flags_needed $hts_cflags_sse4" - -$as_echo "#define HAVE_SSSE3 1" >>confdefs.h - - - -fi - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking C compiler flags needed for popcnt" >&5 -$as_echo_n "checking C compiler flags needed for popcnt... " >&6; } -if ${hts_cv_check_cflags_needed_popcnt___mpopcnt+:} false; then : - $as_echo_n "(cached) " >&6 -else - - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - #ifdef __x86_64__ - #include "x86intrin.h" - #endif - -int -main () -{ - - #ifdef __x86_64__ - unsigned int i = _mm_popcnt_u32(1); - return i != 1; - #endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - hts_cv_check_cflags_needed_popcnt___mpopcnt=none -else - ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mpopcnt" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - #ifdef __x86_64__ - #include "x86intrin.h" - #endif - -int -main () -{ - - #ifdef __x86_64__ - unsigned int i = _mm_popcnt_u32(1); - return i != 1; - #endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - hts_cv_check_cflags_needed_popcnt___mpopcnt=-mpopcnt -else - hts_cv_check_cflags_needed_popcnt___mpopcnt=unsupported -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext - CFLAGS=$ax_check_save_flags -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hts_cv_check_cflags_needed_popcnt___mpopcnt" >&5 -$as_echo "$hts_cv_check_cflags_needed_popcnt___mpopcnt" >&6; } -if test "x$hts_cv_check_cflags_needed_popcnt___mpopcnt" = xunsupported; then : - - : - -else - - if test "x$hts_cv_check_cflags_needed_popcnt___mpopcnt" = xnone; then : - flags_needed="" -else - flags_needed="$hts_cv_check_cflags_needed_popcnt___mpopcnt" -fi - - hts_cflags_sse4="$flags_needed $hts_cflags_sse4" - -$as_echo "#define HAVE_POPCNT 1" >>confdefs.h - - - -fi - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking C compiler flags needed for sse4.1" >&5 -$as_echo_n "checking C compiler flags needed for sse4.1... " >&6; } -if ${hts_cv_check_cflags_needed_sse4_1___msse4_1+:} false; then : - $as_echo_n "(cached) " >&6 -else - - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - #ifdef __x86_64__ - #include "x86intrin.h" - #endif - -int -main () -{ - - #ifdef __x86_64__ - __m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1); - __m128i c = _mm_max_epu32(a, b); - return *((char *) &c); - #endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - hts_cv_check_cflags_needed_sse4_1___msse4_1=none -else - ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -msse4.1" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - #ifdef __x86_64__ - #include "x86intrin.h" - #endif - -int -main () -{ - - #ifdef __x86_64__ - __m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1); - __m128i c = _mm_max_epu32(a, b); - return *((char *) &c); - #endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - hts_cv_check_cflags_needed_sse4_1___msse4_1=-msse4.1 -else - hts_cv_check_cflags_needed_sse4_1___msse4_1=unsupported -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext - CFLAGS=$ax_check_save_flags -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hts_cv_check_cflags_needed_sse4_1___msse4_1" >&5 -$as_echo "$hts_cv_check_cflags_needed_sse4_1___msse4_1" >&6; } -if test "x$hts_cv_check_cflags_needed_sse4_1___msse4_1" = xunsupported; then : - - : - -else - - if test "x$hts_cv_check_cflags_needed_sse4_1___msse4_1" = xnone; then : - flags_needed="" -else - flags_needed="$hts_cv_check_cflags_needed_sse4_1___msse4_1" -fi - - hts_cflags_sse4="$flags_needed $hts_cflags_sse4" - -$as_echo "#define HAVE_SSE4_1 1" >>confdefs.h - - - $as_echo "#define UBSAN 1" >>confdefs.h - - - -fi - - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking C compiler flags needed for avx2" >&5 -$as_echo_n "checking C compiler flags needed for avx2... " >&6; } -if ${hts_cv_check_cflags_needed_avx2___mavx2+:} false; then : - $as_echo_n "(cached) " >&6 -else - - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - #ifdef __x86_64__ - #include "x86intrin.h" - #endif - -int -main () -{ - - #ifdef __x86_64__ - __m256i a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); - __m256i b = _mm256_add_epi32(a, a); - long long c = _mm256_extract_epi64(b, 0); - return (int) c; - #endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - hts_cv_check_cflags_needed_avx2___mavx2=none -else - ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx2" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - #ifdef __x86_64__ - #include "x86intrin.h" - #endif - -int -main () -{ - - #ifdef __x86_64__ - __m256i a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); - __m256i b = _mm256_add_epi32(a, a); - long long c = _mm256_extract_epi64(b, 0); - return (int) c; - #endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - hts_cv_check_cflags_needed_avx2___mavx2=-mavx2 -else - hts_cv_check_cflags_needed_avx2___mavx2=unsupported -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext - CFLAGS=$ax_check_save_flags -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hts_cv_check_cflags_needed_avx2___mavx2" >&5 -$as_echo "$hts_cv_check_cflags_needed_avx2___mavx2" >&6; } -if test "x$hts_cv_check_cflags_needed_avx2___mavx2" = xunsupported; then : - - : - -else - - if test "x$hts_cv_check_cflags_needed_avx2___mavx2" = xnone; then : - flags_needed="" -else - flags_needed="$hts_cv_check_cflags_needed_avx2___mavx2" -fi - - hts_cflags_avx2="$flags_needed" - - -$as_echo "#define HAVE_AVX2 1" >>confdefs.h - - - -fi - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking C compiler flags needed for avx512f" >&5 -$as_echo_n "checking C compiler flags needed for avx512f... " >&6; } -if ${hts_cv_check_cflags_needed_avx512f___mavx512f+:} false; then : - $as_echo_n "(cached) " >&6 -else - - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - #ifdef __x86_64__ - #include "x86intrin.h" - #endif - -int -main () -{ - - #ifdef __x86_64__ - __m512i a = _mm512_set1_epi32(1); - __m512i b = _mm512_add_epi32(a, a); - return *((char *) &b); - #endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - hts_cv_check_cflags_needed_avx512f___mavx512f=none -else - ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512f" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - #ifdef __x86_64__ - #include "x86intrin.h" - #endif - -int -main () -{ - - #ifdef __x86_64__ - __m512i a = _mm512_set1_epi32(1); - __m512i b = _mm512_add_epi32(a, a); - return *((char *) &b); - #endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - hts_cv_check_cflags_needed_avx512f___mavx512f=-mavx512f -else - hts_cv_check_cflags_needed_avx512f___mavx512f=unsupported -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext - CFLAGS=$ax_check_save_flags -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hts_cv_check_cflags_needed_avx512f___mavx512f" >&5 -$as_echo "$hts_cv_check_cflags_needed_avx512f___mavx512f" >&6; } -if test "x$hts_cv_check_cflags_needed_avx512f___mavx512f" = xunsupported; then : - - : - -else - - if test "x$hts_cv_check_cflags_needed_avx512f___mavx512f" = xnone; then : - flags_needed="" -else - flags_needed="$hts_cv_check_cflags_needed_avx512f___mavx512f" -fi - - hts_cflags_avx512="$flags_needed" - - -$as_echo "#define HAVE_AVX512 1" >>confdefs.h - - - -fi - - - - - - - - - -if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then - if test -n "$ac_tool_prefix"; then - # Extract the first word of "${ac_tool_prefix}pkg-config", so it can be a program name with args. -set dummy ${ac_tool_prefix}pkg-config; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_path_PKG_CONFIG+:} false; then : - $as_echo_n "(cached) " >&6 -else - case $PKG_CONFIG in - [\\/]* | ?:[\\/]*) - ac_cv_path_PKG_CONFIG="$PKG_CONFIG" # Let the user override the test with a path. - ;; - *) - as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_path_PKG_CONFIG="$as_dir/$ac_word$ac_exec_ext" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - - ;; -esac -fi -PKG_CONFIG=$ac_cv_path_PKG_CONFIG -if test -n "$PKG_CONFIG"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $PKG_CONFIG" >&5 -$as_echo "$PKG_CONFIG" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - -fi -if test -z "$ac_cv_path_PKG_CONFIG"; then - ac_pt_PKG_CONFIG=$PKG_CONFIG - # Extract the first word of "pkg-config", so it can be a program name with args. -set dummy pkg-config; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_path_ac_pt_PKG_CONFIG+:} false; then : - $as_echo_n "(cached) " >&6 -else - case $ac_pt_PKG_CONFIG in - [\\/]* | ?:[\\/]*) - ac_cv_path_ac_pt_PKG_CONFIG="$ac_pt_PKG_CONFIG" # Let the user override the test with a path. - ;; - *) - as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_path_ac_pt_PKG_CONFIG="$as_dir/$ac_word$ac_exec_ext" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - - ;; -esac -fi -ac_pt_PKG_CONFIG=$ac_cv_path_ac_pt_PKG_CONFIG -if test -n "$ac_pt_PKG_CONFIG"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_PKG_CONFIG" >&5 -$as_echo "$ac_pt_PKG_CONFIG" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - if test "x$ac_pt_PKG_CONFIG" = x; then - PKG_CONFIG="" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} -ac_tool_warned=yes ;; -esac - PKG_CONFIG=$ac_pt_PKG_CONFIG - fi -else - PKG_CONFIG="$ac_cv_path_PKG_CONFIG" -fi - -fi -if test -n "$PKG_CONFIG"; then - _pkg_min_version=0.9.0 - { $as_echo "$as_me:${as_lineno-$LINENO}: checking pkg-config is at least version $_pkg_min_version" >&5 -$as_echo_n "checking pkg-config is at least version $_pkg_min_version... " >&6; } - if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } - else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - PKG_CONFIG="" - fi -fi - -need_crypto=no -pc_requires= -static_LDFLAGS=$LDFLAGS -static_LIBS='-lpthread -lz -lm' -private_LIBS=$LDFLAGS - -# Check whether --enable-versioned-symbols was given. -if test "${enable_versioned_symbols+set}" = set; then : - enableval=$enable_versioned_symbols; -else - enable_versioned_symbols=yes -fi - - -# Check whether --enable-bz2 was given. -if test "${enable_bz2+set}" = set; then : - enableval=$enable_bz2; -else - enable_bz2=yes -fi - - -# Check whether --enable-gcs was given. -if test "${enable_gcs+set}" = set; then : - enableval=$enable_gcs; -else - enable_gcs=check -fi - - -# Check whether --enable-largefile was given. -if test "${enable_largefile+set}" = set; then : - enableval=$enable_largefile; -fi - -if test "$enable_largefile" != no; then - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for special C compiler options needed for large files" >&5 -$as_echo_n "checking for special C compiler options needed for large files... " >&6; } -if ${ac_cv_sys_largefile_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_cv_sys_largefile_CC=no - if test "$GCC" != yes; then - ac_save_CC=$CC - while :; do - # IRIX 6.2 and later do not support large files by default, - # so use the C compiler's -n32 option if that helps. - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include - /* Check that off_t can represent 2**63 - 1 correctly. - We can't simply define LARGE_OFF_T to be 9223372036854775807, - since some C++ compilers masquerading as C compilers - incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) - int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 - && LARGE_OFF_T % 2147483647 == 1) - ? 1 : -1]; -int -main () -{ - - ; - return 0; -} -_ACEOF - if ac_fn_c_try_compile "$LINENO"; then : - break -fi -rm -f core conftest.err conftest.$ac_objext - CC="$CC -n32" - if ac_fn_c_try_compile "$LINENO"; then : - ac_cv_sys_largefile_CC=' -n32'; break -fi -rm -f core conftest.err conftest.$ac_objext - break - done - CC=$ac_save_CC - rm -f conftest.$ac_ext - fi -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sys_largefile_CC" >&5 -$as_echo "$ac_cv_sys_largefile_CC" >&6; } - if test "$ac_cv_sys_largefile_CC" != no; then - CC=$CC$ac_cv_sys_largefile_CC - fi - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _FILE_OFFSET_BITS value needed for large files" >&5 -$as_echo_n "checking for _FILE_OFFSET_BITS value needed for large files... " >&6; } -if ${ac_cv_sys_file_offset_bits+:} false; then : - $as_echo_n "(cached) " >&6 -else - while :; do - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include - /* Check that off_t can represent 2**63 - 1 correctly. - We can't simply define LARGE_OFF_T to be 9223372036854775807, - since some C++ compilers masquerading as C compilers - incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) - int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 - && LARGE_OFF_T % 2147483647 == 1) - ? 1 : -1]; -int -main () -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_cv_sys_file_offset_bits=no; break -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#define _FILE_OFFSET_BITS 64 -#include - /* Check that off_t can represent 2**63 - 1 correctly. - We can't simply define LARGE_OFF_T to be 9223372036854775807, - since some C++ compilers masquerading as C compilers - incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) - int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 - && LARGE_OFF_T % 2147483647 == 1) - ? 1 : -1]; -int -main () -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_cv_sys_file_offset_bits=64; break -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - ac_cv_sys_file_offset_bits=unknown - break -done -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sys_file_offset_bits" >&5 -$as_echo "$ac_cv_sys_file_offset_bits" >&6; } -case $ac_cv_sys_file_offset_bits in #( - no | unknown) ;; - *) -cat >>confdefs.h <<_ACEOF -#define _FILE_OFFSET_BITS $ac_cv_sys_file_offset_bits -_ACEOF -;; -esac -rm -rf conftest* - if test $ac_cv_sys_file_offset_bits = unknown; then - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _LARGE_FILES value needed for large files" >&5 -$as_echo_n "checking for _LARGE_FILES value needed for large files... " >&6; } -if ${ac_cv_sys_large_files+:} false; then : - $as_echo_n "(cached) " >&6 -else - while :; do - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include - /* Check that off_t can represent 2**63 - 1 correctly. - We can't simply define LARGE_OFF_T to be 9223372036854775807, - since some C++ compilers masquerading as C compilers - incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) - int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 - && LARGE_OFF_T % 2147483647 == 1) - ? 1 : -1]; -int -main () -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_cv_sys_large_files=no; break -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#define _LARGE_FILES 1 -#include - /* Check that off_t can represent 2**63 - 1 correctly. - We can't simply define LARGE_OFF_T to be 9223372036854775807, - since some C++ compilers masquerading as C compilers - incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) - int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 - && LARGE_OFF_T % 2147483647 == 1) - ? 1 : -1]; -int -main () -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_cv_sys_large_files=1; break -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - ac_cv_sys_large_files=unknown - break -done -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sys_large_files" >&5 -$as_echo "$ac_cv_sys_large_files" >&6; } -case $ac_cv_sys_large_files in #( - no | unknown) ;; - *) -cat >>confdefs.h <<_ACEOF -#define _LARGE_FILES $ac_cv_sys_large_files -_ACEOF -;; -esac -rm -rf conftest* - fi - - -fi - - -# Check whether --enable-libcurl was given. -if test "${enable_libcurl+set}" = set; then : - enableval=$enable_libcurl; -else - enable_libcurl=check -fi - - -# Check whether --enable-lzma was given. -if test "${enable_lzma+set}" = set; then : - enableval=$enable_lzma; -else - enable_lzma=yes -fi - - -# Check whether --enable-plugins was given. -if test "${enable_plugins+set}" = set; then : - enableval=$enable_plugins; -else - enable_plugins=no -fi - - - - -# Check whether --with-external-htscodecs was given. -if test "${with_external_htscodecs+set}" = set; then : - withval=$with_external_htscodecs; -else - with_external_htscodecs=no -fi - - - - -# Check whether --with-libdeflate was given. -if test "${with_libdeflate+set}" = set; then : - withval=$with_libdeflate; -else - with_libdeflate=check -fi - - - -# Check whether --with-plugin-dir was given. -if test "${with_plugin_dir+set}" = set; then : - withval=$with_plugin_dir; case $withval in - yes|no) cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "no directory specified for --with-plugin-dir" "$LINENO" 5 ;; - esac -else - with_plugin_dir='$(libexecdir)/htslib' -fi - -plugindir=$with_plugin_dir - - - -# Check whether --with-plugin-path was given. -if test "${with_plugin_path+set}" = set; then : - withval=$with_plugin_path; case $withval in - yes) cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "no path specified for --with-plugin-path" "$LINENO" 5 ;; - no) with_plugin_path= ;; - esac -else - with_plugin_path=$with_plugin_dir -fi - -pluginpath=$with_plugin_path - - -# Check whether --enable-s3 was given. -if test "${enable_s3+set}" = set; then : - enableval=$enable_s3; -else - enable_s3=check -fi - - -basic_host=${host_alias:-unknown-`uname -s`} -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking shared library type for $basic_host" >&5 -$as_echo_n "checking shared library type for $basic_host... " >&6; } -case $basic_host in - *-cygwin* | *-CYGWIN*) - host_result="Cygwin DLL" - PLATFORM=CYGWIN - PLUGIN_EXT=.cygdll - ;; - *-darwin* | *-Darwin*) - host_result="Darwin dylib" - PLATFORM=Darwin - PLUGIN_EXT=.bundle - ;; - *-msys* | *-MSYS* | *-mingw* | *-MINGW*) - host_result="MSYS dll" - PLATFORM=MSYS - PLUGIN_EXT=.dll - # This also sets __USE_MINGW_ANSI_STDIO which in turn makes PRId64, - # %lld and %z printf formats work. It also enforces the snprintf to - # be C99 compliant so it returns the correct values (in kstring.c). - - # Now set by default, so no need to do it here. - # CPPFLAGS="$CPPFLAGS -D_XOPEN_SOURCE=600" - ;; - *) - host_result="plain .so" - PLATFORM=default - PLUGIN_EXT=.so - ;; -esac -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $host_result" >&5 -$as_echo "$host_result" >&6; } - - -if test x"$PLATFORM" = xdefault && test x"$enable_versioned_symbols" = xyes; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the linker supports versioned symbols" >&5 -$as_echo_n "checking whether the linker supports versioned symbols... " >&6; } -if ${hts_cv_have_versioned_symbols+:} false; then : - $as_echo_n "(cached) " >&6 -else - - save_LDFLAGS=$LDFLAGS - LDFLAGS="-Wl,-version-script,$srcdir/htslib.map $LDFLAGS" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - hts_cv_have_versioned_symbols=yes -else - hts_cv_have_versioned_symbols=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext - LDFLAGS=$save_LDFLAGS - -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hts_cv_have_versioned_symbols" >&5 -$as_echo "$hts_cv_have_versioned_symbols" >&6; } - if test "x$hts_cv_have_versioned_symbols" = xyes; then : - - VERSION_SCRIPT_LDFLAGS='-Wl,-version-script,$(srcprefix)htslib.map' - - -fi - -fi - - - # Test for flags to set default shared library visibility to hidden - # -fvisibility=hidden : GCC compatible - # -xldscope=hidden : SunStudio - ac_opt_found=no - if test "x$ac_opt_found" = "xno"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the compiler accepts -fvisibility=hidden" >&5 -$as_echo_n "checking whether the compiler accepts -fvisibility=hidden... " >&6; } -if ${hts_cv_check__fvisibility_hidden+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_check_save_cflags=$CFLAGS - ac_check_save_ldflags=$LDFLAGS - CFLAGS="$CFLAGS -fvisibility=hidden" - LDFLAGS="$LDFLAGS -fvisibility=hidden" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - hts_cv_check__fvisibility_hidden=yes - if test "xac_opt_found" != x; then : - eval ac_opt_found="-fvisibility=hidden" -fi -else - hts_cv_check__fvisibility_hidden=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext - CFLAGS=$ac_check_save_cflags - LDFLAGS=$ac_check_save_ldflags -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hts_cv_check__fvisibility_hidden" >&5 -$as_echo "$hts_cv_check__fvisibility_hidden" >&6; } - -fi - if test "x$ac_opt_found" = "xno"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the compiler accepts -xldscope=hidden" >&5 -$as_echo_n "checking whether the compiler accepts -xldscope=hidden... " >&6; } -if ${hts_cv_check__xldscope_hidden+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_check_save_cflags=$CFLAGS - ac_check_save_ldflags=$LDFLAGS - CFLAGS="$CFLAGS -xldscope=hidden" - LDFLAGS="$LDFLAGS -xldscope=hidden" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - hts_cv_check__xldscope_hidden=yes - if test "xac_opt_found" != x; then : - eval ac_opt_found="-xldscope=hidden" -fi -else - hts_cv_check__xldscope_hidden=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext - CFLAGS=$ac_check_save_cflags - LDFLAGS=$ac_check_save_ldflags -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hts_cv_check__xldscope_hidden" >&5 -$as_echo "$hts_cv_check__xldscope_hidden" >&6; } - -fi - - if test "x$ac_opt_found" != "xno"; then : - CFLAGS="$CFLAGS $ac_opt_found" - LDFLAGS="$LDFLAGS $ac_opt_found" -fi - - - - - - for ac_header in $ac_header_list -do : - as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` -ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default -" -if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : - cat >>confdefs.h <<_ACEOF -#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 -_ACEOF - -fi - -done - - - - - - - - -for ac_func in getpagesize -do : - ac_fn_c_check_func "$LINENO" "getpagesize" "ac_cv_func_getpagesize" -if test "x$ac_cv_func_getpagesize" = xyes; then : - cat >>confdefs.h <<_ACEOF -#define HAVE_GETPAGESIZE 1 -_ACEOF - -fi -done - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for working mmap" >&5 -$as_echo_n "checking for working mmap... " >&6; } -if ${ac_cv_func_mmap_fixed_mapped+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test "$cross_compiling" = yes; then : - ac_cv_func_mmap_fixed_mapped=no -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$ac_includes_default -/* malloc might have been renamed as rpl_malloc. */ -#undef malloc - -/* Thanks to Mike Haertel and Jim Avera for this test. - Here is a matrix of mmap possibilities: - mmap private not fixed - mmap private fixed at somewhere currently unmapped - mmap private fixed at somewhere already mapped - mmap shared not fixed - mmap shared fixed at somewhere currently unmapped - mmap shared fixed at somewhere already mapped - For private mappings, we should verify that changes cannot be read() - back from the file, nor mmap's back from the file at a different - address. (There have been systems where private was not correctly - implemented like the infamous i386 svr4.0, and systems where the - VM page cache was not coherent with the file system buffer cache - like early versions of FreeBSD and possibly contemporary NetBSD.) - For shared mappings, we should conversely verify that changes get - propagated back to all the places they're supposed to be. - - Grep wants private fixed already mapped. - The main things grep needs to know about mmap are: - * does it exist and is it safe to write into the mmap'd area - * how to use it (BSD variants) */ - -#include -#include - -#if !defined STDC_HEADERS && !defined HAVE_STDLIB_H -char *malloc (); -#endif - -/* This mess was copied from the GNU getpagesize.h. */ -#ifndef HAVE_GETPAGESIZE -# ifdef _SC_PAGESIZE -# define getpagesize() sysconf(_SC_PAGESIZE) -# else /* no _SC_PAGESIZE */ -# ifdef HAVE_SYS_PARAM_H -# include -# ifdef EXEC_PAGESIZE -# define getpagesize() EXEC_PAGESIZE -# else /* no EXEC_PAGESIZE */ -# ifdef NBPG -# define getpagesize() NBPG * CLSIZE -# ifndef CLSIZE -# define CLSIZE 1 -# endif /* no CLSIZE */ -# else /* no NBPG */ -# ifdef NBPC -# define getpagesize() NBPC -# else /* no NBPC */ -# ifdef PAGESIZE -# define getpagesize() PAGESIZE -# endif /* PAGESIZE */ -# endif /* no NBPC */ -# endif /* no NBPG */ -# endif /* no EXEC_PAGESIZE */ -# else /* no HAVE_SYS_PARAM_H */ -# define getpagesize() 8192 /* punt totally */ -# endif /* no HAVE_SYS_PARAM_H */ -# endif /* no _SC_PAGESIZE */ - -#endif /* no HAVE_GETPAGESIZE */ - -int -main () -{ - char *data, *data2, *data3; - const char *cdata2; - int i, pagesize; - int fd, fd2; - - pagesize = getpagesize (); - - /* First, make a file with some known garbage in it. */ - data = (char *) malloc (pagesize); - if (!data) - return 1; - for (i = 0; i < pagesize; ++i) - *(data + i) = rand (); - umask (0); - fd = creat ("conftest.mmap", 0600); - if (fd < 0) - return 2; - if (write (fd, data, pagesize) != pagesize) - return 3; - close (fd); - - /* Next, check that the tail of a page is zero-filled. File must have - non-zero length, otherwise we risk SIGBUS for entire page. */ - fd2 = open ("conftest.txt", O_RDWR | O_CREAT | O_TRUNC, 0600); - if (fd2 < 0) - return 4; - cdata2 = ""; - if (write (fd2, cdata2, 1) != 1) - return 5; - data2 = (char *) mmap (0, pagesize, PROT_READ | PROT_WRITE, MAP_SHARED, fd2, 0L); - if (data2 == MAP_FAILED) - return 6; - for (i = 0; i < pagesize; ++i) - if (*(data2 + i)) - return 7; - close (fd2); - if (munmap (data2, pagesize)) - return 8; - - /* Next, try to mmap the file at a fixed address which already has - something else allocated at it. If we can, also make sure that - we see the same garbage. */ - fd = open ("conftest.mmap", O_RDWR); - if (fd < 0) - return 9; - if (data2 != mmap (data2, pagesize, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_FIXED, fd, 0L)) - return 10; - for (i = 0; i < pagesize; ++i) - if (*(data + i) != *(data2 + i)) - return 11; - - /* Finally, make sure that changes to the mapped area do not - percolate back to the file as seen by read(). (This is a bug on - some variants of i386 svr4.0.) */ - for (i = 0; i < pagesize; ++i) - *(data2 + i) = *(data2 + i) + 1; - data3 = (char *) malloc (pagesize); - if (!data3) - return 12; - if (read (fd, data3, pagesize) != pagesize) - return 13; - for (i = 0; i < pagesize; ++i) - if (*(data + i) != *(data3 + i)) - return 14; - close (fd); - free (data); - free (data3); - return 0; -} -_ACEOF -if ac_fn_c_try_run "$LINENO"; then : - ac_cv_func_mmap_fixed_mapped=yes -else - ac_cv_func_mmap_fixed_mapped=no -fi -rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ - conftest.$ac_objext conftest.beam conftest.$ac_ext -fi - -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_mmap_fixed_mapped" >&5 -$as_echo "$ac_cv_func_mmap_fixed_mapped" >&6; } -if test $ac_cv_func_mmap_fixed_mapped = yes; then - -$as_echo "#define HAVE_MMAP 1" >>confdefs.h - -fi -rm -f conftest.mmap conftest.txt - -for ac_func in gmtime_r fsync drand48 srand48_deterministic -do : - as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` -ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" -if eval test \"x\$"$as_ac_var"\" = x"yes"; then : - cat >>confdefs.h <<_ACEOF -#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 -_ACEOF - -fi -done - - -# Darwin has a dubious fdatasync() symbol, but no declaration in -as_ac_Symbol=`$as_echo "ac_cv_have_decl_fdatasync(int)" | $as_tr_sh` -ac_fn_c_check_decl "$LINENO" "fdatasync(int)" "$as_ac_Symbol" "$ac_includes_default" -if eval test \"x\$"$as_ac_Symbol"\" = x"yes"; then : - for ac_func in fdatasync -do : - ac_fn_c_check_func "$LINENO" "fdatasync" "ac_cv_func_fdatasync" -if test "x$ac_cv_func_fdatasync" = xyes; then : - cat >>confdefs.h <<_ACEOF -#define HAVE_FDATASYNC 1 -_ACEOF - -fi -done - -fi - - -if test $enable_plugins != no; then - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing dlsym" >&5 -$as_echo_n "checking for library containing dlsym... " >&6; } -if ${ac_cv_search_dlsym+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_func_search_save_LIBS=$LIBS -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char dlsym (); -int -main () -{ -return dlsym (); - ; - return 0; -} -_ACEOF -for ac_lib in '' dl; do - if test -z "$ac_lib"; then - ac_res="none required" - else - ac_res=-l$ac_lib - LIBS="-l$ac_lib $ac_func_search_save_LIBS" - fi - if ac_fn_c_try_link "$LINENO"; then : - ac_cv_search_dlsym=$ac_res -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext - if ${ac_cv_search_dlsym+:} false; then : - break -fi -done -if ${ac_cv_search_dlsym+:} false; then : - -else - ac_cv_search_dlsym=no -fi -rm conftest.$ac_ext -LIBS=$ac_func_search_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_dlsym" >&5 -$as_echo "$ac_cv_search_dlsym" >&6; } -ac_res=$ac_cv_search_dlsym -if test "$ac_res" != no; then : - test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" - -else - cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "dlsym() not found - -Plugin support requires dynamic linking facilities from the operating system. -Either configure with --disable-plugins or resolve this error to build HTSlib." "$LINENO" 5 -fi - - # Check if the compiler understands -rdynamic - # TODO Test whether this is required and/or needs tweaking per-platform - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the compiler accepts -rdynamic" >&5 -$as_echo_n "checking whether the compiler accepts -rdynamic... " >&6; } -if ${hts_cv_check__rdynamic+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_check_save_cflags=$CFLAGS - ac_check_save_ldflags=$LDFLAGS - CFLAGS="$CFLAGS -rdynamic" - LDFLAGS="$LDFLAGS -rdynamic" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - hts_cv_check__rdynamic=yes - if test "xrdynamic_flag" != x; then : - eval rdynamic_flag="-rdynamic" -fi -else - hts_cv_check__rdynamic=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext - CFLAGS=$ac_check_save_cflags - LDFLAGS=$ac_check_save_ldflags -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hts_cv_check__rdynamic" >&5 -$as_echo "$hts_cv_check__rdynamic" >&6; } - - if test x"$rdynamic_flag" != "xno"; then : - LDFLAGS="$LDFLAGS $rdynamic_flag" - static_LDFLAGS="$static_LDFLAGS $rdynamic_flag" -fi - case "$ac_cv_search_dlsym" in - -l*) static_LIBS="$static_LIBS $ac_cv_search_dlsym" ;; - esac - -$as_echo "#define ENABLE_PLUGINS 1" >>confdefs.h - - - -cat >>confdefs.h <<_ACEOF -#define PLUGIN_EXT "$PLUGIN_EXT" -_ACEOF - -fi - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing log" >&5 -$as_echo_n "checking for library containing log... " >&6; } -if ${ac_cv_search_log+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_func_search_save_LIBS=$LIBS -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char log (); -int -main () -{ -return log (); - ; - return 0; -} -_ACEOF -for ac_lib in '' m; do - if test -z "$ac_lib"; then - ac_res="none required" - else - ac_res=-l$ac_lib - LIBS="-l$ac_lib $ac_func_search_save_LIBS" - fi - if ac_fn_c_try_link "$LINENO"; then : - ac_cv_search_log=$ac_res -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext - if ${ac_cv_search_log+:} false; then : - break -fi -done -if ${ac_cv_search_log+:} false; then : - -else - ac_cv_search_log=no -fi -rm conftest.$ac_ext -LIBS=$ac_func_search_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_log" >&5 -$as_echo "$ac_cv_search_log" >&6; } -ac_res=$ac_cv_search_log -if test "$ac_res" != no; then : - test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" - -else - cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "log() not found - -HTSLIB requires a working floating-point math library. -FAILED. This error must be resolved in order to build HTSlib successfully." "$LINENO" 5 -fi - - -zlib_devel=ok -ac_fn_c_check_header_compile "$LINENO" "zlib.h" "ac_cv_header_zlib_h" "; -" -if test "x$ac_cv_header_zlib_h" = xyes; then : - -else - zlib_devel=missing -fi - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for inflate in -lz" >&5 -$as_echo_n "checking for inflate in -lz... " >&6; } -if ${ac_cv_lib_z_inflate+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_check_lib_save_LIBS=$LIBS -LIBS="-lz $LIBS" -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char inflate (); -int -main () -{ -return inflate (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - ac_cv_lib_z_inflate=yes -else - ac_cv_lib_z_inflate=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_z_inflate" >&5 -$as_echo "$ac_cv_lib_z_inflate" >&6; } -if test "x$ac_cv_lib_z_inflate" = xyes; then : - cat >>confdefs.h <<_ACEOF -#define HAVE_LIBZ 1 -_ACEOF - - LIBS="-lz $LIBS" - -else - zlib_devel=missing -fi - - -if test $zlib_devel != ok; then - cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "zlib development files not found - -HTSlib uses compression routines from the zlib library . -Building HTSlib requires zlib development files to be installed on the build -machine; you may need to ensure a package such as zlib1g-dev (on Debian or -Ubuntu Linux) or zlib-devel (on RPM-based Linux distributions or Cygwin) -is installed. - -FAILED. This error must be resolved in order to build HTSlib successfully." "$LINENO" 5 -fi - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing recv" >&5 -$as_echo_n "checking for library containing recv... " >&6; } -if ${ac_cv_search_recv+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_func_search_save_LIBS=$LIBS -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char recv (); -int -main () -{ -return recv (); - ; - return 0; -} -_ACEOF -for ac_lib in '' socket ws2_32; do - if test -z "$ac_lib"; then - ac_res="none required" - else - ac_res=-l$ac_lib - LIBS="-l$ac_lib $ac_func_search_save_LIBS" - fi - if ac_fn_c_try_link "$LINENO"; then : - ac_cv_search_recv=$ac_res -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext - if ${ac_cv_search_recv+:} false; then : - break -fi -done -if ${ac_cv_search_recv+:} false; then : - -else - ac_cv_search_recv=no -fi -rm conftest.$ac_ext -LIBS=$ac_func_search_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_recv" >&5 -$as_echo "$ac_cv_search_recv" >&6; } -ac_res=$ac_cv_search_recv -if test "$ac_res" != no; then : - test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" - -if test "$ac_cv_search_recv" != "none required" -then - static_LIBS="$static_LIBS $ac_cv_search_recv" -fi -else - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing recv using declaration" >&5 -$as_echo_n "checking for library containing recv using declaration... " >&6; } - LIBS="-lws2_32 $LIBS" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -int -main () -{ -recv(0, 0, 0, 0); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: -lws2_32" >&5 -$as_echo "-lws2_32" >&6; } - static_LIBS="$static_LIBS -lws2_32" -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "unable to find the recv() function" "$LINENO" 5 -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi - - -if test "$enable_bz2" != no; then - bz2_devel=ok - ac_fn_c_check_header_compile "$LINENO" "bzlib.h" "ac_cv_header_bzlib_h" "; -" -if test "x$ac_cv_header_bzlib_h" = xyes; then : - -else - bz2_devel=missing -fi - - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for BZ2_bzBuffToBuffCompress in -lbz2" >&5 -$as_echo_n "checking for BZ2_bzBuffToBuffCompress in -lbz2... " >&6; } -if ${ac_cv_lib_bz2_BZ2_bzBuffToBuffCompress+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_check_lib_save_LIBS=$LIBS -LIBS="-lbz2 $LIBS" -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char BZ2_bzBuffToBuffCompress (); -int -main () -{ -return BZ2_bzBuffToBuffCompress (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - ac_cv_lib_bz2_BZ2_bzBuffToBuffCompress=yes -else - ac_cv_lib_bz2_BZ2_bzBuffToBuffCompress=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_bz2_BZ2_bzBuffToBuffCompress" >&5 -$as_echo "$ac_cv_lib_bz2_BZ2_bzBuffToBuffCompress" >&6; } -if test "x$ac_cv_lib_bz2_BZ2_bzBuffToBuffCompress" = xyes; then : - cat >>confdefs.h <<_ACEOF -#define HAVE_LIBBZ2 1 -_ACEOF - - LIBS="-lbz2 $LIBS" - -else - bz2_devel=missing -fi - - if test $bz2_devel != ok; then - cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "libbzip2 development files not found - -The CRAM format may use bzip2 compression, which is implemented in HTSlib -by using compression routines from libbzip2 . - -Building HTSlib requires libbzip2 development files to be installed on the -build machine; you may need to ensure a package such as libbz2-dev (on Debian -or Ubuntu Linux) or bzip2-devel (on RPM-based Linux distributions or Cygwin) -is installed. - -Either configure with --disable-bz2 (which will make some CRAM files -produced elsewhere unreadable) or resolve this error to build HTSlib." "$LINENO" 5 - fi - if test -n "$PKG_CONFIG" && "$PKG_CONFIG" --exists bzip2; then - pc_requires="$pc_requires bzip2" - else - private_LIBS="$private_LIBS -lbz2" - fi - static_LIBS="$static_LIBS -lbz2" -fi - -if test "$enable_lzma" != no; then - lzma_devel=ok - for ac_header in lzma.h -do : - ac_fn_c_check_header_compile "$LINENO" "lzma.h" "ac_cv_header_lzma_h" "; -" -if test "x$ac_cv_header_lzma_h" = xyes; then : - cat >>confdefs.h <<_ACEOF -#define HAVE_LZMA_H 1 -_ACEOF - -else - lzma_devel=header-missing -fi - -done - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for lzma_easy_buffer_encode in -llzma" >&5 -$as_echo_n "checking for lzma_easy_buffer_encode in -llzma... " >&6; } -if ${ac_cv_lib_lzma_lzma_easy_buffer_encode+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_check_lib_save_LIBS=$LIBS -LIBS="-llzma $LIBS" -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char lzma_easy_buffer_encode (); -int -main () -{ -return lzma_easy_buffer_encode (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - ac_cv_lib_lzma_lzma_easy_buffer_encode=yes -else - ac_cv_lib_lzma_lzma_easy_buffer_encode=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_lzma_lzma_easy_buffer_encode" >&5 -$as_echo "$ac_cv_lib_lzma_lzma_easy_buffer_encode" >&6; } -if test "x$ac_cv_lib_lzma_lzma_easy_buffer_encode" = xyes; then : - cat >>confdefs.h <<_ACEOF -#define HAVE_LIBLZMA 1 -_ACEOF - - LIBS="-llzma $LIBS" - -else - lzma_devel=missing -fi - - if test $lzma_devel = missing; then - cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "liblzma development files not found - -The CRAM format may use LZMA2 compression, which is implemented in HTSlib -by using compression routines from liblzma . - -Building HTSlib requires liblzma development files to be installed on the -build machine; you may need to ensure a package such as liblzma-dev (on Debian -or Ubuntu Linux), xz-devel (on RPM-based Linux distributions or Cygwin), or -xz (via Homebrew on macOS) is installed; or build XZ Utils from source. - -Either configure with --disable-lzma (which will make some CRAM files -produced elsewhere unreadable) or resolve this error to build HTSlib." "$LINENO" 5 - fi - pc_requires="$pc_requires liblzma" - static_LIBS="$static_LIBS -llzma" -fi - -if test "x$with_external_htscodecs" != "xno"; then : - libhtscodecs=ok - ac_fn_c_check_header_compile "$LINENO" "htscodecs/rANS_static4x16.h" "ac_cv_header_htscodecs_rANS_static4x16_h" "; -" -if test "x$ac_cv_header_htscodecs_rANS_static4x16_h" = xyes; then : - -else - libhtscodecs='missing header' -fi - - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for rans_compress_bound_4x16 in -lhtscodecs" >&5 -$as_echo_n "checking for rans_compress_bound_4x16 in -lhtscodecs... " >&6; } -if ${ac_cv_lib_htscodecs_rans_compress_bound_4x16+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_check_lib_save_LIBS=$LIBS -LIBS="-lhtscodecs $LIBS" -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char rans_compress_bound_4x16 (); -int -main () -{ -return rans_compress_bound_4x16 (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - ac_cv_lib_htscodecs_rans_compress_bound_4x16=yes -else - ac_cv_lib_htscodecs_rans_compress_bound_4x16=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_htscodecs_rans_compress_bound_4x16" >&5 -$as_echo "$ac_cv_lib_htscodecs_rans_compress_bound_4x16" >&6; } -if test "x$ac_cv_lib_htscodecs_rans_compress_bound_4x16" = xyes; then : - : -else - libhtscodecs='missing library' -fi - - if test "$libhtscodecs" = "ok"; then : - -$as_echo "#define HAVE_EXTERNAL_LIBHTSCODECS 1" >>confdefs.h - - LIBS="-lhtscodecs $LIBS" - private_LIBS="-lhtscodecs $private_LIBS" - static_LIBS="-lhtscodecs $static_LIBS" - selected_htscodecs_mk="htscodecs_external.mk" -else - cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "libhtscodecs development files not found: $libhtscodecs - -You asked to use an external htscodecs library, but do not have the -required header / library files. You either need to supply these and -if necessary set CPPFLAGS and LDFLAGS so the compiler can find them; -or configure using --without-external-htscodecs to build the required -functions from the htscodecs submodule. -" "$LINENO" 5 -fi -else - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether htscodecs files are present" >&5 -$as_echo_n "checking whether htscodecs files are present... " >&6; } - if test -e "$srcdir/htscodecs/htscodecs/rANS_static4x16.h"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } - selected_htscodecs_mk="htscodecs_bundled.mk" -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - if test -e "$srcdir/.git"; then : - cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "htscodecs submodule files not present. - -HTSlib uses some functions from the htscodecs project, which is normally -included as a submodule. Try running: - - git submodule update --init --recursive - -in the top-level htslib directory to update it, and then re-run configure. -" "$LINENO" 5 -else - cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "htscodecs submodule files not present. - -You have an incomplete distribution. Please try downloading one of the -official releases from https://www.htslib.org -" "$LINENO" 5 -fi -fi -fi - -if test "x$with_libdeflate" != "xno"; then : - libdeflate=ok - ac_fn_c_check_header_compile "$LINENO" "libdeflate.h" "ac_cv_header_libdeflate_h" "; -" -if test "x$ac_cv_header_libdeflate_h" = xyes; then : - -else - libdeflate='missing header' -fi - - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for libdeflate_deflate_compress in -ldeflate" >&5 -$as_echo_n "checking for libdeflate_deflate_compress in -ldeflate... " >&6; } -if ${ac_cv_lib_deflate_libdeflate_deflate_compress+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_check_lib_save_LIBS=$LIBS -LIBS="-ldeflate $LIBS" -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char libdeflate_deflate_compress (); -int -main () -{ -return libdeflate_deflate_compress (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - ac_cv_lib_deflate_libdeflate_deflate_compress=yes -else - ac_cv_lib_deflate_libdeflate_deflate_compress=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_deflate_libdeflate_deflate_compress" >&5 -$as_echo "$ac_cv_lib_deflate_libdeflate_deflate_compress" >&6; } -if test "x$ac_cv_lib_deflate_libdeflate_deflate_compress" = xyes; then : - : -else - libdeflate='missing library' -fi - - if test "$libdeflate" = "ok"; then : - -$as_echo "#define HAVE_LIBDEFLATE 1" >>confdefs.h - - LIBS="-ldeflate $LIBS" - private_LIBS="$private_LIBS -ldeflate" - static_LIBS="$static_LIBS -ldeflate" -else - if test "x$with_libdeflate" != "xcheck"; then : - cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "libdeflate development files not found: $libdeflate - -You requested libdeflate, but do not have the required header / library -files. The source for libdeflate is available from -. You may have to adjust -search paths in CPPFLAGS and/or LDFLAGS if the header and library -are not currently on them. - -Either configure with --without-libdeflate or resolve this error to build -HTSlib." "$LINENO" 5 -fi -fi -fi - -libcurl=disabled -if test "$enable_libcurl" != no; then - libcurl_devel=ok - ac_fn_c_check_header_compile "$LINENO" "curl/curl.h" "ac_cv_header_curl_curl_h" "; -" -if test "x$ac_cv_header_curl_curl_h" = xyes; then : - -else - libcurl_devel="headers not found" -fi - - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for curl_easy_pause in -lcurl" >&5 -$as_echo_n "checking for curl_easy_pause in -lcurl... " >&6; } -if ${ac_cv_lib_curl_curl_easy_pause+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_check_lib_save_LIBS=$LIBS -LIBS="-lcurl $LIBS" -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char curl_easy_pause (); -int -main () -{ -return curl_easy_pause (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - ac_cv_lib_curl_curl_easy_pause=yes -else - ac_cv_lib_curl_curl_easy_pause=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_curl_curl_easy_pause" >&5 -$as_echo "$ac_cv_lib_curl_curl_easy_pause" >&6; } -if test "x$ac_cv_lib_curl_curl_easy_pause" = xyes; then : - : -else - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for curl_easy_init in -lcurl" >&5 -$as_echo_n "checking for curl_easy_init in -lcurl... " >&6; } -if ${ac_cv_lib_curl_curl_easy_init+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_check_lib_save_LIBS=$LIBS -LIBS="-lcurl $LIBS" -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char curl_easy_init (); -int -main () -{ -return curl_easy_init (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - ac_cv_lib_curl_curl_easy_init=yes -else - ac_cv_lib_curl_curl_easy_init=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_curl_curl_easy_init" >&5 -$as_echo "$ac_cv_lib_curl_curl_easy_init" >&6; } -if test "x$ac_cv_lib_curl_curl_easy_init" = xyes; then : - libcurl_devel="library is too old (7.18+ required)" -else - libcurl_devel="library not found" -fi - -fi - - - if test "$libcurl_devel" = ok; then - -$as_echo "#define HAVE_LIBCURL 1" >>confdefs.h - - libcurl=enabled - elif test "$enable_libcurl" = check; then - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: libcurl not enabled: $libcurl_devel" >&5 -$as_echo "$as_me: WARNING: libcurl not enabled: $libcurl_devel" >&2;} - else - cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "libcurl $libcurl_devel - -Support for HTTPS and other SSL-based URLs requires routines from the libcurl -library . Building HTSlib with libcurl enabled -requires libcurl development files to be installed on the build machine; you -may need to ensure a package such as libcurl4-{gnutls,nss,openssl}-dev (on -Debian or Ubuntu Linux) or libcurl-devel (on RPM-based Linux distributions -or Cygwin) is installed. - -Either configure with --disable-libcurl or resolve this error to build HTSlib." "$LINENO" 5 - fi - - if test "$libcurl" = enabled ; then - if test "$enable_plugins" != yes ; then - static_LIBS="$static_LIBS -lcurl" - fi - fi -fi - - -gcs=disabled -if test "$enable_gcs" != no; then - if test $libcurl = enabled; then - -$as_echo "#define ENABLE_GCS 1" >>confdefs.h - - gcs=enabled - else - case "$enable_gcs" in - check) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: GCS support not enabled: requires libcurl support" >&5 -$as_echo "$as_me: WARNING: GCS support not enabled: requires libcurl support" >&2;} ;; - *) cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "GCS support not enabled - -Support for Google Cloud Storage URLs requires libcurl support to be enabled -in HTSlib. Configure with --enable-libcurl in order to use GCS URLs." "$LINENO" 5 - ;; - esac - fi -fi - - -s3=disabled -if test "$enable_s3" != no; then - if test $libcurl = enabled; then - s3=enabled - need_crypto="$enable_s3" - else - case "$enable_s3" in - check) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: S3 support not enabled: requires libcurl support" >&5 -$as_echo "$as_me: WARNING: S3 support not enabled: requires libcurl support" >&2;} ;; - *) cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "S3 support not enabled - -Support for Amazon AWS S3 URLs requires libcurl support to be enabled -in HTSlib. Configure with --enable-libcurl in order to use S3 URLs." "$LINENO" 5 - ;; - esac - fi -fi - -CRYPTO_LIBS= -if test $need_crypto != no; then - ac_fn_c_check_func "$LINENO" "CCHmac" "ac_cv_func_CCHmac" -if test "x$ac_cv_func_CCHmac" = xyes; then : - -$as_echo "#define HAVE_COMMONCRYPTO 1" >>confdefs.h - -else - save_LIBS=$LIBS - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing HMAC" >&5 -$as_echo_n "checking for library containing HMAC... " >&6; } -if ${ac_cv_search_HMAC+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_func_search_save_LIBS=$LIBS -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char HMAC (); -int -main () -{ -return HMAC (); - ; - return 0; -} -_ACEOF -for ac_lib in '' crypto; do - if test -z "$ac_lib"; then - ac_res="none required" - else - ac_res=-l$ac_lib - LIBS="-l$ac_lib $ac_func_search_save_LIBS" - fi - if ac_fn_c_try_link "$LINENO"; then : - ac_cv_search_HMAC=$ac_res -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext - if ${ac_cv_search_HMAC+:} false; then : - break -fi -done -if ${ac_cv_search_HMAC+:} false; then : - -else - ac_cv_search_HMAC=no -fi -rm conftest.$ac_ext -LIBS=$ac_func_search_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_HMAC" >&5 -$as_echo "$ac_cv_search_HMAC" >&6; } -ac_res=$ac_cv_search_HMAC -if test "$ac_res" != no; then : - test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" - -$as_echo "#define HAVE_HMAC 1" >>confdefs.h - - case "$ac_cv_search_HMAC" in - -l*) CRYPTO_LIBS=$ac_cv_search_HMAC ;; - esac -else - case "$need_crypto" in - check) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: S3 support not enabled: requires SSL development files" >&5 -$as_echo "$as_me: WARNING: S3 support not enabled: requires SSL development files" >&2;} - s3=disabled ;; - *) cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "SSL development files not found - -Support for AWS S3 URLs requires routines from an SSL library. Building -HTSlib with libcurl enabled requires SSL development files to be installed -on the build machine; you may need to ensure a package such as libgnutls-dev, -libnss3-dev, or libssl-dev (on Debian or Ubuntu Linux, corresponding to the -libcurl4-*-dev package installed), or openssl-devel (on RPM-based Linux -distributions or Cygwin) is installed. - -Either configure with --disable-s3 or resolve this error to build HTSlib." "$LINENO" 5 ;; - esac -fi - - LIBS=$save_LIBS -fi - - if test "$enable_plugins" != yes ; then - static_LIBS="$static_LIBS $CRYPTO_LIBS" - fi -fi - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing regcomp" >&5 -$as_echo_n "checking for library containing regcomp... " >&6; } -if ${ac_cv_search_regcomp+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_func_search_save_LIBS=$LIBS -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char regcomp (); -int -main () -{ -return regcomp (); - ; - return 0; -} -_ACEOF -for ac_lib in '' regex; do - if test -z "$ac_lib"; then - ac_res="none required" - else - ac_res=-l$ac_lib - LIBS="-l$ac_lib $ac_func_search_save_LIBS" - fi - if ac_fn_c_try_link "$LINENO"; then : - ac_cv_search_regcomp=$ac_res -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext - if ${ac_cv_search_regcomp+:} false; then : - break -fi -done -if ${ac_cv_search_regcomp+:} false; then : - -else - ac_cv_search_regcomp=no -fi -rm conftest.$ac_ext -LIBS=$ac_func_search_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_regcomp" >&5 -$as_echo "$ac_cv_search_regcomp" >&6; } -ac_res=$ac_cv_search_regcomp -if test "$ac_res" != no; then : - test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" - libregex=needed -fi - - - -if test "$s3" = enabled ; then - -$as_echo "#define ENABLE_S3 1" >>confdefs.h - -fi - -if test "x$hts_late_cflags" != x; then : - CFLAGS="$CFLAGS $hts_late_cflags" -fi - - - - - - - - - -ac_config_files="$ac_config_files config.mk htslib.pc.tmp:htslib.pc.in" - -ac_config_links="$ac_config_links htscodecs.mk:$selected_htscodecs_mk" - - -if test "$srcdir" != .; then - # Set up for a separate build directory. As HTSlib uses a non-recursive - # makefile, we need to create additional build subdirectories explicitly. - ac_config_links="$ac_config_links Makefile:Makefile htslib.mk:htslib.mk" - - ac_config_files="$ac_config_files htslib_vars.mk:builddir_vars.mk.in" - - ac_config_commands="$ac_config_commands mkdir" - -fi - -# @HTSDIRslash_if_relsrcdir@ will be empty when $srcdir is absolute -case "$srcdir" in - /*) HTSDIRslash_if_relsrcdir= ;; - *) HTSDIRslash_if_relsrcdir='$(HTSDIR)/' ;; -esac - - -cat >confcache <<\_ACEOF -# This file is a shell script that caches the results of configure -# tests run on this system so they can be shared between configure -# scripts and configure runs, see configure's option --config-cache. -# It is not useful on other systems. If it contains results you don't -# want to keep, you may remove or edit it. -# -# config.status only pays attention to the cache file if you give it -# the --recheck option to rerun configure. -# -# `ac_cv_env_foo' variables (set or unset) will be overridden when -# loading this file, other *unset* `ac_cv_foo' will be assigned the -# following values. - -_ACEOF - -# The following way of writing the cache mishandles newlines in values, -# but we know of no workaround that is simple, portable, and efficient. -# So, we kill variables containing newlines. -# Ultrix sh set writes to stderr and can't be redirected directly, -# and sets the high bit in the cache file unless we assign to the vars. -( - for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do - eval ac_val=\$$ac_var - case $ac_val in #( - *${as_nl}*) - case $ac_var in #( - *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 -$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; - esac - case $ac_var in #( - _ | IFS | as_nl) ;; #( - BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( - *) { eval $ac_var=; unset $ac_var;} ;; - esac ;; - esac - done - - (set) 2>&1 | - case $as_nl`(ac_space=' '; set) 2>&1` in #( - *${as_nl}ac_space=\ *) - # `set' does not quote correctly, so add quotes: double-quote - # substitution turns \\\\ into \\, and sed turns \\ into \. - sed -n \ - "s/'/'\\\\''/g; - s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" - ;; #( - *) - # `set' quotes correctly as required by POSIX, so do not add quotes. - sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" - ;; - esac | - sort -) | - sed ' - /^ac_cv_env_/b end - t clear - :clear - s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ - t end - s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ - :end' >>confcache -if diff "$cache_file" confcache >/dev/null 2>&1; then :; else - if test -w "$cache_file"; then - if test "x$cache_file" != "x/dev/null"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 -$as_echo "$as_me: updating cache $cache_file" >&6;} - if test ! -f "$cache_file" || test -h "$cache_file"; then - cat confcache >"$cache_file" - else - case $cache_file in #( - */* | ?:*) - mv -f confcache "$cache_file"$$ && - mv -f "$cache_file"$$ "$cache_file" ;; #( - *) - mv -f confcache "$cache_file" ;; - esac - fi - fi - else - { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 -$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} - fi -fi -rm -f confcache - -test "x$prefix" = xNONE && prefix=$ac_default_prefix -# Let make expand exec_prefix. -test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' - -DEFS=-DHAVE_CONFIG_H - -ac_libobjs= -ac_ltlibobjs= -U= -for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue - # 1. Remove the extension, and $U if already installed. - ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' - ac_i=`$as_echo "$ac_i" | sed "$ac_script"` - # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR - # will be set to the directory where LIBOBJS objects are built. - as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" - as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' -done -LIBOBJS=$ac_libobjs - -LTLIBOBJS=$ac_ltlibobjs - - - -: "${CONFIG_STATUS=./config.status}" -ac_write_fail=0 -ac_clean_files_save=$ac_clean_files -ac_clean_files="$ac_clean_files $CONFIG_STATUS" -{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 -$as_echo "$as_me: creating $CONFIG_STATUS" >&6;} -as_write_fail=0 -cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 -#! $SHELL -# Generated by $as_me. -# Run this file to recreate the current configuration. -# Compiler output produced by configure, useful for debugging -# configure, is in config.log if it exists. - -debug=false -ac_cs_recheck=false -ac_cs_silent=false - -SHELL=\${CONFIG_SHELL-$SHELL} -export SHELL -_ASEOF -cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 -## -------------------- ## -## M4sh Initialization. ## -## -------------------- ## - -# Be more Bourne compatible -DUALCASE=1; export DUALCASE # for MKS sh -if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : - emulate sh - NULLCMD=: - # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which - # is contrary to our usage. Disable this feature. - alias -g '${1+"$@"}'='"$@"' - setopt NO_GLOB_SUBST -else - case `(set -o) 2>/dev/null` in #( - *posix*) : - set -o posix ;; #( - *) : - ;; -esac -fi - - -as_nl=' -' -export as_nl -# Printing a long string crashes Solaris 7 /usr/bin/printf. -as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' -as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo -as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo -# Prefer a ksh shell builtin over an external printf program on Solaris, -# but without wasting forks for bash or zsh. -if test -z "$BASH_VERSION$ZSH_VERSION" \ - && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then - as_echo='print -r --' - as_echo_n='print -rn --' -elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then - as_echo='printf %s\n' - as_echo_n='printf %s' -else - if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then - as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' - as_echo_n='/usr/ucb/echo -n' - else - as_echo_body='eval expr "X$1" : "X\\(.*\\)"' - as_echo_n_body='eval - arg=$1; - case $arg in #( - *"$as_nl"*) - expr "X$arg" : "X\\(.*\\)$as_nl"; - arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; - esac; - expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" - ' - export as_echo_n_body - as_echo_n='sh -c $as_echo_n_body as_echo' - fi - export as_echo_body - as_echo='sh -c $as_echo_body as_echo' -fi - -# The user is always right. -if test "${PATH_SEPARATOR+set}" != set; then - PATH_SEPARATOR=: - (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { - (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || - PATH_SEPARATOR=';' - } -fi - - -# IFS -# We need space, tab and new line, in precisely that order. Quoting is -# there to prevent editors from complaining about space-tab. -# (If _AS_PATH_WALK were called with IFS unset, it would disable word -# splitting by setting IFS to empty value.) -IFS=" "" $as_nl" - -# Find who we are. Look in the path if we contain no directory separator. -as_myself= -case $0 in #(( - *[\\/]* ) as_myself=$0 ;; - *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break - done -IFS=$as_save_IFS - - ;; -esac -# We did not find ourselves, most probably we were run as `sh COMMAND' -# in which case we are not to be found in the path. -if test "x$as_myself" = x; then - as_myself=$0 -fi -if test ! -f "$as_myself"; then - $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 - exit 1 -fi - -# Unset variables that we do not need and which cause bugs (e.g. in -# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" -# suppresses any "Segmentation fault" message there. '((' could -# trigger a bug in pdksh 5.2.14. -for as_var in BASH_ENV ENV MAIL MAILPATH -do eval test x\${$as_var+set} = xset \ - && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : -done -PS1='$ ' -PS2='> ' -PS4='+ ' - -# NLS nuisances. -LC_ALL=C -export LC_ALL -LANGUAGE=C -export LANGUAGE - -# CDPATH. -(unset CDPATH) >/dev/null 2>&1 && unset CDPATH - - -# as_fn_error STATUS ERROR [LINENO LOG_FD] -# ---------------------------------------- -# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are -# provided, also output the error to LOG_FD, referencing LINENO. Then exit the -# script with STATUS, using 1 if that was 0. -as_fn_error () -{ - as_status=$1; test $as_status -eq 0 && as_status=1 - if test "$4"; then - as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 - fi - $as_echo "$as_me: error: $2" >&2 - as_fn_exit $as_status -} # as_fn_error - - -# as_fn_set_status STATUS -# ----------------------- -# Set $? to STATUS, without forking. -as_fn_set_status () -{ - return $1 -} # as_fn_set_status - -# as_fn_exit STATUS -# ----------------- -# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. -as_fn_exit () -{ - set +e - as_fn_set_status $1 - exit $1 -} # as_fn_exit - -# as_fn_unset VAR -# --------------- -# Portably unset VAR. -as_fn_unset () -{ - { eval $1=; unset $1;} -} -as_unset=as_fn_unset -# as_fn_append VAR VALUE -# ---------------------- -# Append the text in VALUE to the end of the definition contained in VAR. Take -# advantage of any shell optimizations that allow amortized linear growth over -# repeated appends, instead of the typical quadratic growth present in naive -# implementations. -if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : - eval 'as_fn_append () - { - eval $1+=\$2 - }' -else - as_fn_append () - { - eval $1=\$$1\$2 - } -fi # as_fn_append - -# as_fn_arith ARG... -# ------------------ -# Perform arithmetic evaluation on the ARGs, and store the result in the -# global $as_val. Take advantage of shells that can avoid forks. The arguments -# must be portable across $(()) and expr. -if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : - eval 'as_fn_arith () - { - as_val=$(( $* )) - }' -else - as_fn_arith () - { - as_val=`expr "$@" || test $? -eq 1` - } -fi # as_fn_arith - - -if expr a : '\(a\)' >/dev/null 2>&1 && - test "X`expr 00001 : '.*\(...\)'`" = X001; then - as_expr=expr -else - as_expr=false -fi - -if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then - as_basename=basename -else - as_basename=false -fi - -if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then - as_dirname=dirname -else - as_dirname=false -fi - -as_me=`$as_basename -- "$0" || -$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ - X"$0" : 'X\(//\)$' \| \ - X"$0" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X/"$0" | - sed '/^.*\/\([^/][^/]*\)\/*$/{ - s//\1/ - q - } - /^X\/\(\/\/\)$/{ - s//\1/ - q - } - /^X\/\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - -# Avoid depending upon Character Ranges. -as_cr_letters='abcdefghijklmnopqrstuvwxyz' -as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' -as_cr_Letters=$as_cr_letters$as_cr_LETTERS -as_cr_digits='0123456789' -as_cr_alnum=$as_cr_Letters$as_cr_digits - -ECHO_C= ECHO_N= ECHO_T= -case `echo -n x` in #((((( --n*) - case `echo 'xy\c'` in - *c*) ECHO_T=' ';; # ECHO_T is single tab character. - xy) ECHO_C='\c';; - *) echo `echo ksh88 bug on AIX 6.1` > /dev/null - ECHO_T=' ';; - esac;; -*) - ECHO_N='-n';; -esac - -rm -f conf$$ conf$$.exe conf$$.file -if test -d conf$$.dir; then - rm -f conf$$.dir/conf$$.file -else - rm -f conf$$.dir - mkdir conf$$.dir 2>/dev/null -fi -if (echo >conf$$.file) 2>/dev/null; then - if ln -s conf$$.file conf$$ 2>/dev/null; then - as_ln_s='ln -s' - # ... but there are two gotchas: - # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. - # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. - # In both cases, we have to default to `cp -pR'. - ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || - as_ln_s='cp -pR' - elif ln conf$$.file conf$$ 2>/dev/null; then - as_ln_s=ln - else - as_ln_s='cp -pR' - fi -else - as_ln_s='cp -pR' -fi -rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file -rmdir conf$$.dir 2>/dev/null - - -# as_fn_mkdir_p -# ------------- -# Create "$as_dir" as a directory, including parents if necessary. -as_fn_mkdir_p () -{ - - case $as_dir in #( - -*) as_dir=./$as_dir;; - esac - test -d "$as_dir" || eval $as_mkdir_p || { - as_dirs= - while :; do - case $as_dir in #( - *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( - *) as_qdir=$as_dir;; - esac - as_dirs="'$as_qdir' $as_dirs" - as_dir=`$as_dirname -- "$as_dir" || -$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ - X"$as_dir" : 'X\(//\)[^/]' \| \ - X"$as_dir" : 'X\(//\)$' \| \ - X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$as_dir" | - sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ - s//\1/ - q - } - /^X\(\/\/\)[^/].*/{ - s//\1/ - q - } - /^X\(\/\/\)$/{ - s//\1/ - q - } - /^X\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - test -d "$as_dir" && break - done - test -z "$as_dirs" || eval "mkdir $as_dirs" - } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" - - -} # as_fn_mkdir_p -if mkdir -p . 2>/dev/null; then - as_mkdir_p='mkdir -p "$as_dir"' -else - test -d ./-p && rmdir ./-p - as_mkdir_p=false -fi - - -# as_fn_executable_p FILE -# ----------------------- -# Test if FILE is an executable regular file. -as_fn_executable_p () -{ - test -f "$1" && test -x "$1" -} # as_fn_executable_p -as_test_x='test -x' -as_executable_p=as_fn_executable_p - -# Sed expression to map a string onto a valid CPP name. -as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" - -# Sed expression to map a string onto a valid variable name. -as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" - - -exec 6>&1 -## ----------------------------------- ## -## Main body of $CONFIG_STATUS script. ## -## ----------------------------------- ## -_ASEOF -test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 - -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 -# Save the log message, to keep $0 and so on meaningful, and to -# report actual input values of CONFIG_FILES etc. instead of their -# values after options handling. -ac_log=" -This file was extended by HTSlib $as_me 1.18, which was -generated by GNU Autoconf 2.69. Invocation command line was - - CONFIG_FILES = $CONFIG_FILES - CONFIG_HEADERS = $CONFIG_HEADERS - CONFIG_LINKS = $CONFIG_LINKS - CONFIG_COMMANDS = $CONFIG_COMMANDS - $ $0 $@ - -on `(hostname || uname -n) 2>/dev/null | sed 1q` -" - -_ACEOF - -case $ac_config_files in *" -"*) set x $ac_config_files; shift; ac_config_files=$*;; -esac - -case $ac_config_headers in *" -"*) set x $ac_config_headers; shift; ac_config_headers=$*;; -esac - - -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 -# Files that config.status was made for. -config_files="$ac_config_files" -config_headers="$ac_config_headers" -config_links="$ac_config_links" -config_commands="$ac_config_commands" - -_ACEOF - -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 -ac_cs_usage="\ -\`$as_me' instantiates files and other configuration actions -from templates according to the current configuration. Unless the files -and actions are specified as TAGs, all are instantiated by default. - -Usage: $0 [OPTION]... [TAG]... - - -h, --help print this help, then exit - -V, --version print version number and configuration settings, then exit - --config print configuration, then exit - -q, --quiet, --silent - do not print progress messages - -d, --debug don't remove temporary files - --recheck update $as_me by reconfiguring in the same conditions - --file=FILE[:TEMPLATE] - instantiate the configuration file FILE - --header=FILE[:TEMPLATE] - instantiate the configuration header FILE - -Configuration files: -$config_files - -Configuration headers: -$config_headers - -Configuration links: -$config_links - -Configuration commands: -$config_commands - -Report bugs to . -HTSlib home page: ." - -_ACEOF -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 -ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" -ac_cs_version="\\ -HTSlib config.status 1.18 -configured by $0, generated by GNU Autoconf 2.69, - with options \\"\$ac_cs_config\\" - -Copyright (C) 2012 Free Software Foundation, Inc. -This config.status script is free software; the Free Software Foundation -gives unlimited permission to copy, distribute and modify it." - -ac_pwd='$ac_pwd' -srcdir='$srcdir' -test -n "\$AWK" || AWK=awk -_ACEOF - -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 -# The default lists apply if the user does not specify any file. -ac_need_defaults=: -while test $# != 0 -do - case $1 in - --*=?*) - ac_option=`expr "X$1" : 'X\([^=]*\)='` - ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` - ac_shift=: - ;; - --*=) - ac_option=`expr "X$1" : 'X\([^=]*\)='` - ac_optarg= - ac_shift=: - ;; - *) - ac_option=$1 - ac_optarg=$2 - ac_shift=shift - ;; - esac - - case $ac_option in - # Handling of the options. - -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) - ac_cs_recheck=: ;; - --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) - $as_echo "$ac_cs_version"; exit ;; - --config | --confi | --conf | --con | --co | --c ) - $as_echo "$ac_cs_config"; exit ;; - --debug | --debu | --deb | --de | --d | -d ) - debug=: ;; - --file | --fil | --fi | --f ) - $ac_shift - case $ac_optarg in - *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; - '') as_fn_error $? "missing file argument" ;; - esac - as_fn_append CONFIG_FILES " '$ac_optarg'" - ac_need_defaults=false;; - --header | --heade | --head | --hea ) - $ac_shift - case $ac_optarg in - *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; - esac - as_fn_append CONFIG_HEADERS " '$ac_optarg'" - ac_need_defaults=false;; - --he | --h) - # Conflict between --help and --header - as_fn_error $? "ambiguous option: \`$1' -Try \`$0 --help' for more information.";; - --help | --hel | -h ) - $as_echo "$ac_cs_usage"; exit ;; - -q | -quiet | --quiet | --quie | --qui | --qu | --q \ - | -silent | --silent | --silen | --sile | --sil | --si | --s) - ac_cs_silent=: ;; - - # This is an error. - -*) as_fn_error $? "unrecognized option: \`$1' -Try \`$0 --help' for more information." ;; - - *) as_fn_append ac_config_targets " $1" - ac_need_defaults=false ;; - - esac - shift -done - -ac_configure_extra_args= - -if $ac_cs_silent; then - exec 6>/dev/null - ac_configure_extra_args="$ac_configure_extra_args --silent" -fi - -_ACEOF -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 -if \$ac_cs_recheck; then - set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion - shift - \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 - CONFIG_SHELL='$SHELL' - export CONFIG_SHELL - exec "\$@" -fi - -_ACEOF -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 -exec 5>>config.log -{ - echo - sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX -## Running $as_me. ## -_ASBOX - $as_echo "$ac_log" -} >&5 - -_ACEOF -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 -_ACEOF - -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 - -# Handling of arguments. -for ac_config_target in $ac_config_targets -do - case $ac_config_target in - "config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;; - "config.mk") CONFIG_FILES="$CONFIG_FILES config.mk" ;; - "htslib.pc.tmp") CONFIG_FILES="$CONFIG_FILES htslib.pc.tmp:htslib.pc.in" ;; - "htscodecs.mk") CONFIG_LINKS="$CONFIG_LINKS htscodecs.mk:$selected_htscodecs_mk" ;; - "Makefile") CONFIG_LINKS="$CONFIG_LINKS Makefile:Makefile" ;; - "htslib.mk") CONFIG_LINKS="$CONFIG_LINKS htslib.mk:htslib.mk" ;; - "htslib_vars.mk") CONFIG_FILES="$CONFIG_FILES htslib_vars.mk:builddir_vars.mk.in" ;; - "mkdir") CONFIG_COMMANDS="$CONFIG_COMMANDS mkdir" ;; - - *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; - esac -done - - -# If the user did not use the arguments to specify the items to instantiate, -# then the envvar interface is used. Set only those that are not. -# We use the long form for the default assignment because of an extremely -# bizarre bug on SunOS 4.1.3. -if $ac_need_defaults; then - test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files - test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers - test "${CONFIG_LINKS+set}" = set || CONFIG_LINKS=$config_links - test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands -fi - -# Have a temporary directory for convenience. Make it in the build tree -# simply because there is no reason against having it here, and in addition, -# creating and moving files from /tmp can sometimes cause problems. -# Hook for its removal unless debugging. -# Note that there is a small window in which the directory will not be cleaned: -# after its creation but before its name has been assigned to `$tmp'. -$debug || -{ - tmp= ac_tmp= - trap 'exit_status=$? - : "${ac_tmp:=$tmp}" - { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status -' 0 - trap 'as_fn_exit 1' 1 2 13 15 -} -# Create a (secure) tmp directory for tmp files. - -{ - tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && - test -d "$tmp" -} || -{ - tmp=./conf$$-$RANDOM - (umask 077 && mkdir "$tmp") -} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 -ac_tmp=$tmp - -# Set up the scripts for CONFIG_FILES section. -# No need to generate them if there are no CONFIG_FILES. -# This happens for instance with `./config.status config.h'. -if test -n "$CONFIG_FILES"; then - - -ac_cr=`echo X | tr X '\015'` -# On cygwin, bash can eat \r inside `` if the user requested igncr. -# But we know of no other shell where ac_cr would be empty at this -# point, so we can use a bashism as a fallback. -if test "x$ac_cr" = x; then - eval ac_cr=\$\'\\r\' -fi -ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` -if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then - ac_cs_awk_cr='\\r' -else - ac_cs_awk_cr=$ac_cr -fi - -echo 'BEGIN {' >"$ac_tmp/subs1.awk" && -_ACEOF - - -{ - echo "cat >conf$$subs.awk <<_ACEOF" && - echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && - echo "_ACEOF" -} >conf$$subs.sh || - as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 -ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` -ac_delim='%!_!# ' -for ac_last_try in false false false false false :; do - . ./conf$$subs.sh || - as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 - - ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` - if test $ac_delim_n = $ac_delim_num; then - break - elif $ac_last_try; then - as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 - else - ac_delim="$ac_delim!$ac_delim _$ac_delim!! " - fi -done -rm -f conf$$subs.sh - -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 -cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && -_ACEOF -sed -n ' -h -s/^/S["/; s/!.*/"]=/ -p -g -s/^[^!]*!// -:repl -t repl -s/'"$ac_delim"'$// -t delim -:nl -h -s/\(.\{148\}\)..*/\1/ -t more1 -s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ -p -n -b repl -:more1 -s/["\\]/\\&/g; s/^/"/; s/$/"\\/ -p -g -s/.\{148\}// -t nl -:delim -h -s/\(.\{148\}\)..*/\1/ -t more2 -s/["\\]/\\&/g; s/^/"/; s/$/"/ -p -b -:more2 -s/["\\]/\\&/g; s/^/"/; s/$/"\\/ -p -g -s/.\{148\}// -t delim -' >$CONFIG_STATUS || ac_write_fail=1 -rm -f conf$$subs.awk -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 -_ACAWK -cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && - for (key in S) S_is_set[key] = 1 - FS = "" - -} -{ - line = $ 0 - nfields = split(line, field, "@") - substed = 0 - len = length(field[1]) - for (i = 2; i < nfields; i++) { - key = field[i] - keylen = length(key) - if (S_is_set[key]) { - value = S[key] - line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) - len += length(value) + length(field[++i]) - substed = 1 - } else - len += 1 + keylen - } - - print line -} - -_ACAWK -_ACEOF -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 -if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then - sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" -else - cat -fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ - || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 -_ACEOF - -# VPATH may cause trouble with some makes, so we remove sole $(srcdir), -# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and -# trailing colons and then remove the whole line if VPATH becomes empty -# (actually we leave an empty line to preserve line numbers). -if test "x$srcdir" = x.; then - ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ -h -s/// -s/^/:/ -s/[ ]*$/:/ -s/:\$(srcdir):/:/g -s/:\${srcdir}:/:/g -s/:@srcdir@:/:/g -s/^:*// -s/:*$// -x -s/\(=[ ]*\).*/\1/ -G -s/\n// -s/^[^=]*=[ ]*$// -}' -fi - -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 -fi # test -n "$CONFIG_FILES" - -# Set up the scripts for CONFIG_HEADERS section. -# No need to generate them if there are no CONFIG_HEADERS. -# This happens for instance with `./config.status Makefile'. -if test -n "$CONFIG_HEADERS"; then -cat >"$ac_tmp/defines.awk" <<\_ACAWK || -BEGIN { -_ACEOF - -# Transform confdefs.h into an awk script `defines.awk', embedded as -# here-document in config.status, that substitutes the proper values into -# config.h.in to produce config.h. - -# Create a delimiter string that does not exist in confdefs.h, to ease -# handling of long lines. -ac_delim='%!_!# ' -for ac_last_try in false false :; do - ac_tt=`sed -n "/$ac_delim/p" confdefs.h` - if test -z "$ac_tt"; then - break - elif $ac_last_try; then - as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5 - else - ac_delim="$ac_delim!$ac_delim _$ac_delim!! " - fi -done - -# For the awk script, D is an array of macro values keyed by name, -# likewise P contains macro parameters if any. Preserve backslash -# newline sequences. - -ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* -sed -n ' -s/.\{148\}/&'"$ac_delim"'/g -t rset -:rset -s/^[ ]*#[ ]*define[ ][ ]*/ / -t def -d -:def -s/\\$// -t bsnl -s/["\\]/\\&/g -s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ -D["\1"]=" \3"/p -s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2"/p -d -:bsnl -s/["\\]/\\&/g -s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ -D["\1"]=" \3\\\\\\n"\\/p -t cont -s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p -t cont -d -:cont -n -s/.\{148\}/&'"$ac_delim"'/g -t clear -:clear -s/\\$// -t bsnlc -s/["\\]/\\&/g; s/^/"/; s/$/"/p -d -:bsnlc -s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p -b cont -' >$CONFIG_STATUS || ac_write_fail=1 - -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 - for (key in D) D_is_set[key] = 1 - FS = "" -} -/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ { - line = \$ 0 - split(line, arg, " ") - if (arg[1] == "#") { - defundef = arg[2] - mac1 = arg[3] - } else { - defundef = substr(arg[1], 2) - mac1 = arg[2] - } - split(mac1, mac2, "(") #) - macro = mac2[1] - prefix = substr(line, 1, index(line, defundef) - 1) - if (D_is_set[macro]) { - # Preserve the white space surrounding the "#". - print prefix "define", macro P[macro] D[macro] - next - } else { - # Replace #undef with comments. This is necessary, for example, - # in the case of _POSIX_SOURCE, which is predefined and required - # on some systems where configure will not decide to define it. - if (defundef == "undef") { - print "/*", prefix defundef, macro, "*/" - next - } - } -} -{ print } -_ACAWK -_ACEOF -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 - as_fn_error $? "could not setup config headers machinery" "$LINENO" 5 -fi # test -n "$CONFIG_HEADERS" - - -eval set X " :F $CONFIG_FILES :H $CONFIG_HEADERS :L $CONFIG_LINKS :C $CONFIG_COMMANDS" -shift -for ac_tag -do - case $ac_tag in - :[FHLC]) ac_mode=$ac_tag; continue;; - esac - case $ac_mode$ac_tag in - :[FHL]*:*);; - :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; - :[FH]-) ac_tag=-:-;; - :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; - esac - ac_save_IFS=$IFS - IFS=: - set x $ac_tag - IFS=$ac_save_IFS - shift - ac_file=$1 - shift - - case $ac_mode in - :L) ac_source=$1;; - :[FH]) - ac_file_inputs= - for ac_f - do - case $ac_f in - -) ac_f="$ac_tmp/stdin";; - *) # Look for the file first in the build tree, then in the source tree - # (if the path is not absolute). The absolute path cannot be DOS-style, - # because $ac_f cannot contain `:'. - test -f "$ac_f" || - case $ac_f in - [\\/$]*) false;; - *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; - esac || - as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; - esac - case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac - as_fn_append ac_file_inputs " '$ac_f'" - done - - # Let's still pretend it is `configure' which instantiates (i.e., don't - # use $as_me), people would be surprised to read: - # /* config.h. Generated by config.status. */ - configure_input='Generated from '` - $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' - `' by configure.' - if test x"$ac_file" != x-; then - configure_input="$ac_file. $configure_input" - { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 -$as_echo "$as_me: creating $ac_file" >&6;} - fi - # Neutralize special characters interpreted by sed in replacement strings. - case $configure_input in #( - *\&* | *\|* | *\\* ) - ac_sed_conf_input=`$as_echo "$configure_input" | - sed 's/[\\\\&|]/\\\\&/g'`;; #( - *) ac_sed_conf_input=$configure_input;; - esac - - case $ac_tag in - *:-:* | *:-) cat >"$ac_tmp/stdin" \ - || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; - esac - ;; - esac - - ac_dir=`$as_dirname -- "$ac_file" || -$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ - X"$ac_file" : 'X\(//\)[^/]' \| \ - X"$ac_file" : 'X\(//\)$' \| \ - X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$ac_file" | - sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ - s//\1/ - q - } - /^X\(\/\/\)[^/].*/{ - s//\1/ - q - } - /^X\(\/\/\)$/{ - s//\1/ - q - } - /^X\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - as_dir="$ac_dir"; as_fn_mkdir_p - ac_builddir=. - -case "$ac_dir" in -.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; -*) - ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` - # A ".." for each directory in $ac_dir_suffix. - ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` - case $ac_top_builddir_sub in - "") ac_top_builddir_sub=. ac_top_build_prefix= ;; - *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; - esac ;; -esac -ac_abs_top_builddir=$ac_pwd -ac_abs_builddir=$ac_pwd$ac_dir_suffix -# for backward compatibility: -ac_top_builddir=$ac_top_build_prefix - -case $srcdir in - .) # We are building in place. - ac_srcdir=. - ac_top_srcdir=$ac_top_builddir_sub - ac_abs_top_srcdir=$ac_pwd ;; - [\\/]* | ?:[\\/]* ) # Absolute name. - ac_srcdir=$srcdir$ac_dir_suffix; - ac_top_srcdir=$srcdir - ac_abs_top_srcdir=$srcdir ;; - *) # Relative name. - ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix - ac_top_srcdir=$ac_top_build_prefix$srcdir - ac_abs_top_srcdir=$ac_pwd/$srcdir ;; -esac -ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix - - - case $ac_mode in - :F) - # - # CONFIG_FILE - # - -_ACEOF - -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 -# If the template does not know about datarootdir, expand it. -# FIXME: This hack should be removed a few years after 2.60. -ac_datarootdir_hack=; ac_datarootdir_seen= -ac_sed_dataroot=' -/datarootdir/ { - p - q -} -/@datadir@/p -/@docdir@/p -/@infodir@/p -/@localedir@/p -/@mandir@/p' -case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in -*datarootdir*) ac_datarootdir_seen=yes;; -*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 -$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} -_ACEOF -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 - ac_datarootdir_hack=' - s&@datadir@&$datadir&g - s&@docdir@&$docdir&g - s&@infodir@&$infodir&g - s&@localedir@&$localedir&g - s&@mandir@&$mandir&g - s&\\\${datarootdir}&$datarootdir&g' ;; -esac -_ACEOF - -# Neutralize VPATH when `$srcdir' = `.'. -# Shell code in configure.ac might set extrasub. -# FIXME: do we really want to maintain this feature? -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 -ac_sed_extra="$ac_vpsub -$extrasub -_ACEOF -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 -:t -/@[a-zA-Z_][a-zA-Z_0-9]*@/!b -s|@configure_input@|$ac_sed_conf_input|;t t -s&@top_builddir@&$ac_top_builddir_sub&;t t -s&@top_build_prefix@&$ac_top_build_prefix&;t t -s&@srcdir@&$ac_srcdir&;t t -s&@abs_srcdir@&$ac_abs_srcdir&;t t -s&@top_srcdir@&$ac_top_srcdir&;t t -s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t -s&@builddir@&$ac_builddir&;t t -s&@abs_builddir@&$ac_abs_builddir&;t t -s&@abs_top_builddir@&$ac_abs_top_builddir&;t t -$ac_datarootdir_hack -" -eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ - >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 - -test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && - { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && - { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ - "$ac_tmp/out"`; test -z "$ac_out"; } && - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' -which seems to be undefined. Please make sure it is defined" >&5 -$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' -which seems to be undefined. Please make sure it is defined" >&2;} - - rm -f "$ac_tmp/stdin" - case $ac_file in - -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; - *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; - esac \ - || as_fn_error $? "could not create $ac_file" "$LINENO" 5 - ;; - :H) - # - # CONFIG_HEADER - # - if test x"$ac_file" != x-; then - { - $as_echo "/* $configure_input */" \ - && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" - } >"$ac_tmp/config.h" \ - || as_fn_error $? "could not create $ac_file" "$LINENO" 5 - if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then - { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 -$as_echo "$as_me: $ac_file is unchanged" >&6;} - else - rm -f "$ac_file" - mv "$ac_tmp/config.h" "$ac_file" \ - || as_fn_error $? "could not create $ac_file" "$LINENO" 5 - fi - else - $as_echo "/* $configure_input */" \ - && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \ - || as_fn_error $? "could not create -" "$LINENO" 5 - fi - ;; - :L) - # - # CONFIG_LINK - # - - if test "$ac_source" = "$ac_file" && test "$srcdir" = '.'; then - : - else - # Prefer the file from the source tree if names are identical. - if test "$ac_source" = "$ac_file" || test ! -r "$ac_source"; then - ac_source=$srcdir/$ac_source - fi - - { $as_echo "$as_me:${as_lineno-$LINENO}: linking $ac_source to $ac_file" >&5 -$as_echo "$as_me: linking $ac_source to $ac_file" >&6;} - - if test ! -r "$ac_source"; then - as_fn_error $? "$ac_source: file not found" "$LINENO" 5 - fi - rm -f "$ac_file" - - # Try a relative symlink, then a hard link, then a copy. - case $ac_source in - [\\/$]* | ?:[\\/]* ) ac_rel_source=$ac_source ;; - *) ac_rel_source=$ac_top_build_prefix$ac_source ;; - esac - ln -s "$ac_rel_source" "$ac_file" 2>/dev/null || - ln "$ac_source" "$ac_file" 2>/dev/null || - cp -p "$ac_source" "$ac_file" || - as_fn_error $? "cannot link or copy $ac_source to $ac_file" "$LINENO" 5 - fi - ;; - :C) { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5 -$as_echo "$as_me: executing $ac_file commands" >&6;} - ;; - esac - - - case $ac_file$ac_mode in - "mkdir":C) as_dir=cram; as_fn_mkdir_p - as_dir=htscodecs/htscodecs; as_fn_mkdir_p - as_dir=htscodecs/tests; as_fn_mkdir_p - as_dir=test/fuzz; as_fn_mkdir_p - as_dir=test/longrefs; as_fn_mkdir_p - as_dir=test/tabix; as_fn_mkdir_p ;; - - esac -done # for ac_tag - - -as_fn_exit 0 -_ACEOF -ac_clean_files=$ac_clean_files_save - -test $ac_write_fail = 0 || - as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 - - -# configure is writing to config.log, and then calls config.status. -# config.status does its own redirection, appending to config.log. -# Unfortunately, on DOS this fails, as config.log is still kept open -# by configure, so config.status won't be able to write to it; its -# output is simply discarded. So we exec the FD to /dev/null, -# effectively closing config.log, so it can be properly (re)opened and -# appended to by config.status. When coming back to configure, we -# need to make the FD available again. -if test "$no_create" != yes; then - ac_cs_success=: - ac_config_status_args= - test "$silent" = yes && - ac_config_status_args="$ac_config_status_args --quiet" - exec 5>/dev/null - $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false - exec 5>>config.log - # Use ||, not &&, to avoid exiting from the if with $? = 1, which - # would make configure fail if this is the last instruction. - $ac_cs_success || as_fn_exit 1 -fi -if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 -$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} -fi - diff --git a/src/htslib-1.18/configure.ac b/src/htslib-1.18/configure.ac deleted file mode 100644 index c1afb38..0000000 --- a/src/htslib-1.18/configure.ac +++ /dev/null @@ -1,642 +0,0 @@ -# Configure script for htslib, a C library for high-throughput sequencing data. -# -# Copyright (C) 2015-2023 Genome Research Ltd. -# -# Author: John Marshall -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -dnl Process this file with autoconf to produce a configure script -AC_INIT([HTSlib], m4_esyscmd_s([./version.sh 2>/dev/null]), - [samtools-help@lists.sourceforge.net], [], [http://www.htslib.org/]) -AC_PREREQ(2.63) dnl This version introduced 4-argument AC_CHECK_HEADER -AC_CONFIG_SRCDIR(hts.c) -AC_CONFIG_HEADERS(config.h) - -m4_include([m4/hts_prog_cc_warnings.m4]) -m4_include([m4/hts_check_compile_flags_needed.m4]) -m4_include([m4/hts_hide_dynamic_syms.m4]) -m4_include([m4/pkg.m4]) - -dnl Copyright notice to be copied into the generated configure script -AC_COPYRIGHT([Portions copyright (C) 2020-2023 Genome Research Ltd. - -This configure script is free software: you are free to change and -redistribute it. There is NO WARRANTY, to the extent permitted by law.]) - -dnl Notes to be copied (by autoheader) into the generated config.h.in -AH_TOP([/* If you use configure, this file provides @%:@defines reflecting your - configuration choices. If you have not run configure, suitable - conservative defaults will be used. - - Autoheader adds a number of items to this template file that are not - used by HTSlib: STDC_HEADERS and most HAVE_*_H header file defines - are immaterial, as we assume standard ISO C headers and facilities; - the PACKAGE_* defines are unused and are overridden by the more - accurate PACKAGE_VERSION as computed by the Makefile. */]) - -dnl Variant of AC_MSG_ERROR that ensures subsequent make(1) invocations fail -dnl until the configuration error is resolved and configure is run again. -AC_DEFUN([MSG_ERROR], - [cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - AC_MSG_ERROR([$1], [$2])]) - -AC_PROG_CC -AC_PROG_RANLIB - -dnl Turn on compiler warnings, if possible -HTS_PROG_CC_WARNINGS -dnl Flags to treat warnings as errors. These need to be applied to CFLAGS -dnl later as they can interfere with some of the tests (notably AC_SEARCH_LIBS) -HTS_PROG_CC_WERROR(hts_late_cflags) - -# HTSlib uses X/Open-only facilities (M_SQRT2 etc, drand48() etc), and -# various POSIX functions that are provided by various _POSIX_C_SOURCE values -# or by _XOPEN_SOURCE >= 500. It also uses usleep(), which is removed when -# _XOPEN_SOURCE >= 700. Additionally, some definitions may require -# _XOPEN_SOURCE >= 600 on some platforms (snprintf on MinGW, -# PTHREAD_MUTEX_RECURSIVE on some Linux distributions). Hence we set it to 600. - -# Define _XOPEN_SOURCE unless the user has already done so via $CPPFLAGS etc. -AC_CHECK_DECL([_XOPEN_SOURCE], [], - [AC_DEFINE([_XOPEN_SOURCE], [600], [Specify X/Open requirements])], - []) - - -dnl Check for various compiler flags to enable SIMD features -dnl Options for rANS32x16 sse4.1 version - ssse3 -hts_cflags_sse4="" -HTS_CHECK_COMPILE_FLAGS_NEEDED([ssse3], [-mssse3], [AC_LANG_PROGRAM([[ - #ifdef __x86_64__ - #include "x86intrin.h" - #endif - ]],[[ - #ifdef __x86_64__ - __m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1); - __m128i c = _mm_shuffle_epi8(a, b); - return *((char *) &c); - #endif - ]])], [ - hts_cflags_sse4="$flags_needed $hts_cflags_sse4" - AC_DEFINE([HAVE_SSSE3],1,[Defined to 1 if rANS source using SSSE3 can be compiled.]) -]) - -dnl Options for rANS32x16 sse4.1 version - popcnt -HTS_CHECK_COMPILE_FLAGS_NEEDED([popcnt], [-mpopcnt], [AC_LANG_PROGRAM([[ - #ifdef __x86_64__ - #include "x86intrin.h" - #endif - ]],[[ - #ifdef __x86_64__ - unsigned int i = _mm_popcnt_u32(1); - return i != 1; - #endif - ]])], [ - hts_cflags_sse4="$flags_needed $hts_cflags_sse4" - AC_DEFINE([HAVE_POPCNT],1,[Defined to 1 if rANS source using popcnt can be compiled.]) -]) - -dnl Options for rANS32x16 sse4.1 version - sse4.1 -HTS_CHECK_COMPILE_FLAGS_NEEDED([sse4.1], [-msse4.1], [AC_LANG_PROGRAM([[ - #ifdef __x86_64__ - #include "x86intrin.h" - #endif - ]],[[ - #ifdef __x86_64__ - __m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1); - __m128i c = _mm_max_epu32(a, b); - return *((char *) &c); - #endif - ]])], [ - hts_cflags_sse4="$flags_needed $hts_cflags_sse4" - AC_DEFINE([HAVE_SSE4_1],1,[Defined to 1 if rANS source using SSE4.1 can be compiled. -]) -dnl Propagate HTSlib's unaligned access preference to htscodecs - AH_VERBATIM([UBSAN],[ -/* Prevent unaligned access in htscodecs SSE4 rANS codec */ -#if defined(HTS_ALLOW_UNALIGNED) && HTS_ALLOW_UNALIGNED == 0 -#undef UBSAN -#endif]) - AC_DEFINE([UBSAN],1,[]) -]) -AC_SUBST([hts_cflags_sse4]) - -dnl Options for rANS32x16 avx2 version -HTS_CHECK_COMPILE_FLAGS_NEEDED([avx2], [-mavx2], [AC_LANG_PROGRAM([[ - #ifdef __x86_64__ - #include "x86intrin.h" - #endif - ]],[[ - #ifdef __x86_64__ - __m256i a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); - __m256i b = _mm256_add_epi32(a, a); - long long c = _mm256_extract_epi64(b, 0); - return (int) c; - #endif - ]])], [ - hts_cflags_avx2="$flags_needed" - AC_SUBST([hts_cflags_avx2]) - AC_DEFINE([HAVE_AVX2],1,[Defined to 1 if rANS source using AVX2 can be compiled.]) -]) - -dnl Options for rANS32x16 avx512 version -HTS_CHECK_COMPILE_FLAGS_NEEDED([avx512f], [-mavx512f], [AC_LANG_PROGRAM([[ - #ifdef __x86_64__ - #include "x86intrin.h" - #endif - ]],[[ - #ifdef __x86_64__ - __m512i a = _mm512_set1_epi32(1); - __m512i b = _mm512_add_epi32(a, a); - return *((char *) &b); - #endif - ]])], [ - hts_cflags_avx512="$flags_needed" - AC_SUBST([hts_cflags_avx512]) - AC_DEFINE([HAVE_AVX512],1,[Defined to 1 if rANS source using AVX512F can be compiled.]) -]) - -dnl Avoid chicken-and-egg problem where pkg-config supplies the -dnl PKG_PROG_PKG_CONFIG macro, but we want to use it to check -dnl for pkg-config... -m4_ifdef([PKG_PROG_PKG_CONFIG], [PKG_PROG_PKG_CONFIG], [PKG_CONFIG=""]) - -need_crypto=no -pc_requires= -static_LDFLAGS=$LDFLAGS -static_LIBS='-lpthread -lz -lm' -private_LIBS=$LDFLAGS - -AC_ARG_ENABLE([versioned-symbols], - [AS_HELP_STRING([--disable-versioned-symbols], - [disable versioned symbols in shared library])], - [], [enable_versioned_symbols=yes]) - -AC_ARG_ENABLE([bz2], - [AS_HELP_STRING([--disable-bz2], - [omit support for BZ2-compressed CRAM files])], - [], [enable_bz2=yes]) - -AC_ARG_ENABLE([gcs], - [AS_HELP_STRING([--enable-gcs], - [support Google Cloud Storage URLs])], - [], [enable_gcs=check]) - -AC_SYS_LARGEFILE - -AC_ARG_ENABLE([libcurl], - [AS_HELP_STRING([--enable-libcurl], - [enable libcurl-based support for http/https/etc URLs])], - [], [enable_libcurl=check]) - -AC_ARG_ENABLE([lzma], - [AS_HELP_STRING([--disable-lzma], - [omit support for LZMA-compressed CRAM files])], - [], [enable_lzma=yes]) - -AC_ARG_ENABLE([plugins], - [AS_HELP_STRING([--enable-plugins], - [enable separately-compiled plugins for file access])], - [], [enable_plugins=no]) -AC_SUBST(enable_plugins) - -AC_ARG_WITH([external-htscodecs], - [AS_HELP_STRING([--with-external-htscodecs], - [get htscodecs functions from a shared library])], - [], [with_external_htscodecs=no]) -AC_SUBST(with_external_htscodecs) - -AC_ARG_WITH([libdeflate], - [AS_HELP_STRING([--with-libdeflate], - [use libdeflate for faster crc and deflate algorithms])], - [], [with_libdeflate=check]) - -AC_ARG_WITH([plugin-dir], - [AS_HELP_STRING([--with-plugin-dir=DIR], - [plugin installation location [LIBEXECDIR/htslib]])], - [case $withval in - yes|no) MSG_ERROR([no directory specified for --with-plugin-dir]) ;; - esac], - [with_plugin_dir='$(libexecdir)/htslib']) -AC_SUBST([plugindir], $with_plugin_dir) - -AC_ARG_WITH([plugin-path], - [AS_HELP_STRING([--with-plugin-path=PATH], - [default HTS_PATH plugin search path [PLUGINDIR]])], - [case $withval in - yes) MSG_ERROR([no path specified for --with-plugin-path]) ;; - no) with_plugin_path= ;; - esac], - [with_plugin_path=$with_plugin_dir]) -AC_SUBST([pluginpath], $with_plugin_path) - -AC_ARG_ENABLE([s3], - [AS_HELP_STRING([--enable-s3], - [support Amazon AWS S3 URLs])], - [], [enable_s3=check]) - -basic_host=${host_alias:-unknown-`uname -s`} -AC_MSG_CHECKING([shared library type for $basic_host]) -case $basic_host in - *-cygwin* | *-CYGWIN*) - host_result="Cygwin DLL" - PLATFORM=CYGWIN - PLUGIN_EXT=.cygdll - ;; - *-darwin* | *-Darwin*) - host_result="Darwin dylib" - PLATFORM=Darwin - PLUGIN_EXT=.bundle - ;; - *-msys* | *-MSYS* | *-mingw* | *-MINGW*) - host_result="MSYS dll" - PLATFORM=MSYS - PLUGIN_EXT=.dll - # This also sets __USE_MINGW_ANSI_STDIO which in turn makes PRId64, - # %lld and %z printf formats work. It also enforces the snprintf to - # be C99 compliant so it returns the correct values (in kstring.c). - - # Now set by default, so no need to do it here. - # CPPFLAGS="$CPPFLAGS -D_XOPEN_SOURCE=600" - ;; - *) - host_result="plain .so" - PLATFORM=default - PLUGIN_EXT=.so - ;; -esac -AC_MSG_RESULT([$host_result]) -AC_SUBST([PLATFORM]) - -dnl Check for versioned symbol support -dnl Only try for .so shared libraries as other types won't work -AS_IF([test x"$PLATFORM" = xdefault && test x"$enable_versioned_symbols" = xyes], - [AC_CACHE_CHECK([whether the linker supports versioned symbols], - [hts_cv_have_versioned_symbols], [ - save_LDFLAGS=$LDFLAGS - LDFLAGS="-Wl,-version-script,$srcdir/htslib.map $LDFLAGS" - AC_LINK_IFELSE([AC_LANG_PROGRAM()], - [hts_cv_have_versioned_symbols=yes], - [hts_cv_have_versioned_symbols=no]) - LDFLAGS=$save_LDFLAGS - ]) - AS_IF([test "x$hts_cv_have_versioned_symbols" = xyes],[ - VERSION_SCRIPT_LDFLAGS='-Wl,-version-script,$(srcprefix)htslib.map' - AC_SUBST([VERSION_SCRIPT_LDFLAGS]) - ]) -]) - -dnl Try to get more control over which symbols are exported in the shared -dnl library. -HTS_HIDE_DYNAMIC_SYMBOLS - -dnl FIXME This pulls in dozens of standard header checks -AC_FUNC_MMAP -AC_CHECK_FUNCS([gmtime_r fsync drand48 srand48_deterministic]) - -# Darwin has a dubious fdatasync() symbol, but no declaration in -AC_CHECK_DECL([fdatasync(int)], [AC_CHECK_FUNCS(fdatasync)]) - -if test $enable_plugins != no; then - AC_SEARCH_LIBS([dlsym], [dl], [], - [MSG_ERROR([dlsym() not found - -Plugin support requires dynamic linking facilities from the operating system. -Either configure with --disable-plugins or resolve this error to build HTSlib.])]) - # Check if the compiler understands -rdynamic - # TODO Test whether this is required and/or needs tweaking per-platform - HTS_TEST_CC_C_LD_FLAG([-rdynamic],[rdynamic_flag]) - AS_IF([test x"$rdynamic_flag" != "xno"], - [LDFLAGS="$LDFLAGS $rdynamic_flag" - static_LDFLAGS="$static_LDFLAGS $rdynamic_flag"]) - case "$ac_cv_search_dlsym" in - -l*) static_LIBS="$static_LIBS $ac_cv_search_dlsym" ;; - esac - AC_DEFINE([ENABLE_PLUGINS], 1, [Define if HTSlib should enable plugins.]) - AC_SUBST([PLUGIN_EXT]) - AC_DEFINE_UNQUOTED([PLUGIN_EXT], ["$PLUGIN_EXT"], - [Platform-dependent plugin filename extension.]) -fi - -AC_SEARCH_LIBS([log], [m], [], - [MSG_ERROR([log() not found - -HTSLIB requires a working floating-point math library. -FAILED. This error must be resolved in order to build HTSlib successfully.])]) - -zlib_devel=ok -dnl Set a trivial non-empty INCLUDES to avoid excess default includes tests -AC_CHECK_HEADER([zlib.h], [], [zlib_devel=missing], [;]) -AC_CHECK_LIB(z, inflate, [], [zlib_devel=missing]) - -if test $zlib_devel != ok; then - MSG_ERROR([zlib development files not found - -HTSlib uses compression routines from the zlib library . -Building HTSlib requires zlib development files to be installed on the build -machine; you may need to ensure a package such as zlib1g-dev (on Debian or -Ubuntu Linux) or zlib-devel (on RPM-based Linux distributions or Cygwin) -is installed. - -FAILED. This error must be resolved in order to build HTSlib successfully.]) -fi - -dnl connect() etc. fns are in libc on linux, but libsocket on illumos/Solaris -AC_SEARCH_LIBS([recv], [socket ws2_32], [ -if test "$ac_cv_search_recv" != "none required" -then - static_LIBS="$static_LIBS $ac_cv_search_recv" -fi], - dnl on MinGW-i686, checking recv() linking requires an annotated declaration - [AC_MSG_CHECKING([for library containing recv using declaration]) - LIBS="-lws2_32 $LIBS" - AC_LINK_IFELSE( - [AC_LANG_PROGRAM([[#include ]], [[recv(0, 0, 0, 0);]])], - [AC_MSG_RESULT([-lws2_32]) - static_LIBS="$static_LIBS -lws2_32"], - [AC_MSG_RESULT([no]) - MSG_ERROR([unable to find the recv() function])])]) - -if test "$enable_bz2" != no; then - bz2_devel=ok - AC_CHECK_HEADER([bzlib.h], [], [bz2_devel=missing], [;]) - AC_CHECK_LIB([bz2], [BZ2_bzBuffToBuffCompress], [], [bz2_devel=missing]) - if test $bz2_devel != ok; then - MSG_ERROR([libbzip2 development files not found - -The CRAM format may use bzip2 compression, which is implemented in HTSlib -by using compression routines from libbzip2 . - -Building HTSlib requires libbzip2 development files to be installed on the -build machine; you may need to ensure a package such as libbz2-dev (on Debian -or Ubuntu Linux) or bzip2-devel (on RPM-based Linux distributions or Cygwin) -is installed. - -Either configure with --disable-bz2 (which will make some CRAM files -produced elsewhere unreadable) or resolve this error to build HTSlib.]) - fi -dnl Unfortunately the 'bzip2' package-cfg module is not standard. -dnl Redhat/Fedora has it; Debian/Ubuntu does not. - if test -n "$PKG_CONFIG" && "$PKG_CONFIG" --exists bzip2; then - pc_requires="$pc_requires bzip2" - else - private_LIBS="$private_LIBS -lbz2" - fi - static_LIBS="$static_LIBS -lbz2" -fi - -if test "$enable_lzma" != no; then - lzma_devel=ok - AC_CHECK_HEADERS([lzma.h], [], [lzma_devel=header-missing], [;]) - AC_CHECK_LIB([lzma], [lzma_easy_buffer_encode], [], [lzma_devel=missing]) - if test $lzma_devel = missing; then - MSG_ERROR([liblzma development files not found - -The CRAM format may use LZMA2 compression, which is implemented in HTSlib -by using compression routines from liblzma . - -Building HTSlib requires liblzma development files to be installed on the -build machine; you may need to ensure a package such as liblzma-dev (on Debian -or Ubuntu Linux), xz-devel (on RPM-based Linux distributions or Cygwin), or -xz (via Homebrew on macOS) is installed; or build XZ Utils from source. - -Either configure with --disable-lzma (which will make some CRAM files -produced elsewhere unreadable) or resolve this error to build HTSlib.]) - fi - pc_requires="$pc_requires liblzma" - static_LIBS="$static_LIBS -llzma" -fi - -AS_IF([test "x$with_external_htscodecs" != "xno"], - [libhtscodecs=ok - AC_CHECK_HEADER([htscodecs/rANS_static4x16.h],[], - [libhtscodecs='missing header'],[;]) - AC_CHECK_LIB([htscodecs],[rans_compress_bound_4x16], - [:],[libhtscodecs='missing library']) - AS_IF([test "$libhtscodecs" = "ok"], - [AC_DEFINE([HAVE_EXTERNAL_LIBHTSCODECS], 1, [Define if using an external libhtscodecs]) - LIBS="-lhtscodecs $LIBS" - private_LIBS="-lhtscodecs $private_LIBS" - static_LIBS="-lhtscodecs $static_LIBS" - selected_htscodecs_mk="htscodecs_external.mk"], - [MSG_ERROR([libhtscodecs development files not found: $libhtscodecs - -You asked to use an external htscodecs library, but do not have the -required header / library files. You either need to supply these and -if necessary set CPPFLAGS and LDFLAGS so the compiler can find them; -or configure using --without-external-htscodecs to build the required -functions from the htscodecs submodule. -])])], - [AC_MSG_CHECKING([whether htscodecs files are present]) - AS_IF([test -e "$srcdir/htscodecs/htscodecs/rANS_static4x16.h"], - [AC_MSG_RESULT([yes]) - selected_htscodecs_mk="htscodecs_bundled.mk"], - [AC_MSG_RESULT([no]) - AS_IF([test -e "$srcdir/.git"], - [MSG_ERROR([htscodecs submodule files not present. - -HTSlib uses some functions from the htscodecs project, which is normally -included as a submodule. Try running: - - git submodule update --init --recursive - -in the top-level htslib directory to update it, and then re-run configure. -])], - [MSG_ERROR([htscodecs submodule files not present. - -You have an incomplete distribution. Please try downloading one of the -official releases from https://www.htslib.org -])])])]) - -AS_IF([test "x$with_libdeflate" != "xno"], - [libdeflate=ok - AC_CHECK_HEADER([libdeflate.h],[],[libdeflate='missing header'],[;]) - AC_CHECK_LIB([deflate], [libdeflate_deflate_compress],[:],[libdeflate='missing library']) - AS_IF([test "$libdeflate" = "ok"], - [AC_DEFINE([HAVE_LIBDEFLATE], 1, [Define if libdeflate is available.]) - LIBS="-ldeflate $LIBS" - private_LIBS="$private_LIBS -ldeflate" - static_LIBS="$static_LIBS -ldeflate"], - [AS_IF([test "x$with_libdeflate" != "xcheck"], - [MSG_ERROR([libdeflate development files not found: $libdeflate - -You requested libdeflate, but do not have the required header / library -files. The source for libdeflate is available from -. You may have to adjust -search paths in CPPFLAGS and/or LDFLAGS if the header and library -are not currently on them. - -Either configure with --without-libdeflate or resolve this error to build -HTSlib.])])])]) - -libcurl=disabled -if test "$enable_libcurl" != no; then - libcurl_devel=ok - AC_CHECK_HEADER([curl/curl.h], [], [libcurl_devel="headers not found"], [;]) - AC_CHECK_LIB([curl], [curl_easy_pause], [:], - [AC_CHECK_LIB([curl], [curl_easy_init], - [libcurl_devel="library is too old (7.18+ required)"], - [libcurl_devel="library not found"])]) - - if test "$libcurl_devel" = ok; then - AC_DEFINE([HAVE_LIBCURL], 1, [Define if libcurl file access is enabled.]) - libcurl=enabled - elif test "$enable_libcurl" = check; then - AC_MSG_WARN([libcurl not enabled: $libcurl_devel]) - else - MSG_ERROR([libcurl $libcurl_devel - -Support for HTTPS and other SSL-based URLs requires routines from the libcurl -library . Building HTSlib with libcurl enabled -requires libcurl development files to be installed on the build machine; you -may need to ensure a package such as libcurl4-{gnutls,nss,openssl}-dev (on -Debian or Ubuntu Linux) or libcurl-devel (on RPM-based Linux distributions -or Cygwin) is installed. - -Either configure with --disable-libcurl or resolve this error to build HTSlib.]) - fi - -dnl -lcurl is only needed for static linking if hfile_libcurl is not a plugin - if test "$libcurl" = enabled ; then - if test "$enable_plugins" != yes ; then - static_LIBS="$static_LIBS -lcurl" - fi - fi -fi -AC_SUBST([libcurl]) - -gcs=disabled -if test "$enable_gcs" != no; then - if test $libcurl = enabled; then - AC_DEFINE([ENABLE_GCS], 1, [Define if HTSlib should enable GCS support.]) - gcs=enabled - else - case "$enable_gcs" in - check) AC_MSG_WARN([GCS support not enabled: requires libcurl support]) ;; - *) MSG_ERROR([GCS support not enabled - -Support for Google Cloud Storage URLs requires libcurl support to be enabled -in HTSlib. Configure with --enable-libcurl in order to use GCS URLs.]) - ;; - esac - fi -fi -AC_SUBST([gcs]) - -s3=disabled -if test "$enable_s3" != no; then - if test $libcurl = enabled; then - s3=enabled - need_crypto="$enable_s3" - else - case "$enable_s3" in - check) AC_MSG_WARN([S3 support not enabled: requires libcurl support]) ;; - *) MSG_ERROR([S3 support not enabled - -Support for Amazon AWS S3 URLs requires libcurl support to be enabled -in HTSlib. Configure with --enable-libcurl in order to use S3 URLs.]) - ;; - esac - fi -fi - -CRYPTO_LIBS= -if test $need_crypto != no; then - AC_CHECK_FUNC([CCHmac], - [AC_DEFINE([HAVE_COMMONCRYPTO], 1, - [Define if you have the Common Crypto library.])], - [save_LIBS=$LIBS - AC_SEARCH_LIBS([HMAC], [crypto], - [AC_DEFINE([HAVE_HMAC], 1, [Define if you have libcrypto-style HMAC().]) - case "$ac_cv_search_HMAC" in - -l*) CRYPTO_LIBS=$ac_cv_search_HMAC ;; - esac], - [case "$need_crypto" in - check) AC_MSG_WARN([S3 support not enabled: requires SSL development files]) - s3=disabled ;; - *) MSG_ERROR([SSL development files not found - -Support for AWS S3 URLs requires routines from an SSL library. Building -HTSlib with libcurl enabled requires SSL development files to be installed -on the build machine; you may need to ensure a package such as libgnutls-dev, -libnss3-dev, or libssl-dev (on Debian or Ubuntu Linux, corresponding to the -libcurl4-*-dev package installed), or openssl-devel (on RPM-based Linux -distributions or Cygwin) is installed. - -Either configure with --disable-s3 or resolve this error to build HTSlib.]) ;; - esac]) - LIBS=$save_LIBS]) -dnl Only need to add to static_LIBS if not building as a plugin - if test "$enable_plugins" != yes ; then - static_LIBS="$static_LIBS $CRYPTO_LIBS" - fi -fi - -dnl Look for regcomp in various libraries (needed on windows/mingw). -AC_SEARCH_LIBS(regcomp, regex, [libregex=needed], []) - -dnl Look for PTHREAD_MUTEX_RECURSIVE. -dnl This is normally in pthread.h except on some broken glibc implementations. -dnl Now set by default -dnl AC_CHECK_DECL(PTHREAD_MUTEX_RECURSIVE, [], [AC_DEFINE([_XOPEN_SOURCE],[600], [Needed for PTHREAD_MUTEX_RECURSIVE])], [[#include ]]) - -if test "$s3" = enabled ; then - AC_DEFINE([ENABLE_S3], 1, [Define if HTSlib should enable S3 support.]) -fi - -dnl Apply value from HTS_PROG_CC_WERROR (if set) -AS_IF([test "x$hts_late_cflags" != x],[CFLAGS="$CFLAGS $hts_late_cflags"]) - -AC_SUBST([s3]) -AC_SUBST([CRYPTO_LIBS]) - -AC_SUBST([pc_requires]) -AC_SUBST([private_LIBS]) -AC_SUBST([static_LDFLAGS]) -AC_SUBST([static_LIBS]) - -AC_CONFIG_FILES([config.mk htslib.pc.tmp:htslib.pc.in]) -AC_CONFIG_LINKS([htscodecs.mk:$selected_htscodecs_mk]) - -if test "$srcdir" != .; then - # Set up for a separate build directory. As HTSlib uses a non-recursive - # makefile, we need to create additional build subdirectories explicitly. - AC_CONFIG_LINKS([Makefile:Makefile htslib.mk:htslib.mk]) - AC_CONFIG_FILES([htslib_vars.mk:builddir_vars.mk.in]) - AC_CONFIG_COMMANDS([mkdir], - [AS_MKDIR_P([cram]) - AS_MKDIR_P([htscodecs/htscodecs]) - AS_MKDIR_P([htscodecs/tests]) - AS_MKDIR_P([test/fuzz]) - AS_MKDIR_P([test/longrefs]) - AS_MKDIR_P([test/tabix])]) -fi - -# @HTSDIRslash_if_relsrcdir@ will be empty when $srcdir is absolute -case "$srcdir" in - /*) HTSDIRslash_if_relsrcdir= ;; - *) HTSDIRslash_if_relsrcdir='$(HTSDIR)/' ;; -esac -AC_SUBST([HTSDIRslash_if_relsrcdir]) - -AC_OUTPUT diff --git a/src/htslib-1.18/cram/cram_codecs.c b/src/htslib-1.18/cram/cram_codecs.c deleted file mode 100644 index cc5e52b..0000000 --- a/src/htslib-1.18/cram/cram_codecs.c +++ /dev/null @@ -1,4129 +0,0 @@ -/* -Copyright (c) 2012-2021,2023 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* - * FIXME: add checking of cram_external_type to return NULL on unsupported - * {codec,type} tuples. - */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "../htslib/hts_endian.h" - -#if defined(HAVE_EXTERNAL_LIBHTSCODECS) -#include -#include -#include -#else -#include "../htscodecs/htscodecs/varint.h" -#include "../htscodecs/htscodecs/pack.h" -#include "../htscodecs/htscodecs/rle.h" -#endif - -#include "cram.h" - -/* - * --------------------------------------------------------------------------- - * Block bit-level I/O functions. - * All defined static here to promote easy inlining by the compiler. - */ - -#if 0 -/* Get a single bit, MSB first */ -static signed int get_bit_MSB(cram_block *block) { - unsigned int val; - - if (block->byte > block->alloc) - return -1; - - val = block->data[block->byte] >> block->bit; - if (--block->bit == -1) { - block->bit = 7; - block->byte++; - //printf("(%02X)", block->data[block->byte]); - } - - //printf("-B%d-", val&1); - - return val & 1; -} -#endif - -/* - * Count number of successive 0 and 1 bits - */ -static int get_one_bits_MSB(cram_block *block) { - int n = 0, b; - if (block->byte >= block->uncomp_size) - return -1; - do { - b = block->data[block->byte] >> block->bit; - if (--block->bit == -1) { - block->bit = 7; - block->byte++; - if (block->byte == block->uncomp_size && (b&1)) - return -1; - } - n++; - } while (b&1); - - return n-1; -} - -static int get_zero_bits_MSB(cram_block *block) { - int n = 0, b; - if (block->byte >= block->uncomp_size) - return -1; - do { - b = block->data[block->byte] >> block->bit; - if (--block->bit == -1) { - block->bit = 7; - block->byte++; - if (block->byte == block->uncomp_size && !(b&1)) - return -1; - } - n++; - } while (!(b&1)); - - return n-1; -} - -#if 0 -/* Stores a single bit */ -static void store_bit_MSB(cram_block *block, unsigned int bit) { - if (block->byte >= block->alloc) { - block->alloc = block->alloc ? block->alloc*2 : 1024; - block->data = realloc(block->data, block->alloc); - } - - if (bit) - block->data[block->byte] |= (1 << block->bit); - - if (--block->bit == -1) { - block->bit = 7; - block->byte++; - block->data[block->byte] = 0; - } -} -#endif - -#if 0 -/* Rounds to the next whole byte boundary first */ -static void store_bytes_MSB(cram_block *block, char *bytes, int len) { - if (block->bit != 7) { - block->bit = 7; - block->byte++; - } - - while (block->byte + len >= block->alloc) { - block->alloc = block->alloc ? block->alloc*2 : 1024; - block->data = realloc(block->data, block->alloc); - } - - memcpy(&block->data[block->byte], bytes, len); - block->byte += len; -} -#endif - -/* Local optimised copy for inlining */ -static inline int64_t get_bits_MSB(cram_block *block, int nbits) { - uint64_t val = 0; - int i; - -#if 0 - // Fits within the current byte */ - if (nbits <= block->bit+1) { - val = (block->data[block->byte]>>(block->bit-(nbits-1))) & ((1<bit -= nbits) == -1) { - block->bit = 7; - block->byte++; - } - return val; - } - - // partial first byte - val = block->data[block->byte] & ((1<<(block->bit+1))-1); - nbits -= block->bit+1; - block->bit = 7; - block->byte++; - - // whole middle bytes - while (nbits >= 8) { - val = (val << 8) | block->data[block->byte++]; - nbits -= 8; - } - - val <<= nbits; - val |= (block->data[block->byte]>>(block->bit-(nbits-1))) & ((1<bit -= nbits; - return val; -#endif - -#if 0 - /* Inefficient implementation! */ - //printf("{"); - for (i = 0; i < nbits; i++) - //val = (val << 1) | get_bit_MSB(block); - GET_BIT_MSB(block, val); -#endif - -#if 1 - /* Combination of 1st two methods */ - if (nbits <= block->bit+1) { - val = (block->data[block->byte]>>(block->bit-(nbits-1))) & ((1<bit -= nbits) == -1) { - block->bit = 7; - block->byte++; - } - return val; - } - - switch(nbits) { -// case 15: GET_BIT_MSB(block, val); // fall through -// case 14: GET_BIT_MSB(block, val); // fall through -// case 13: GET_BIT_MSB(block, val); // fall through -// case 12: GET_BIT_MSB(block, val); // fall through -// case 11: GET_BIT_MSB(block, val); // fall through -// case 10: GET_BIT_MSB(block, val); // fall through -// case 9: GET_BIT_MSB(block, val); // fall through - case 8: GET_BIT_MSB(block, val); // fall through - case 7: GET_BIT_MSB(block, val); // fall through - case 6: GET_BIT_MSB(block, val); // fall through - case 5: GET_BIT_MSB(block, val); // fall through - case 4: GET_BIT_MSB(block, val); // fall through - case 3: GET_BIT_MSB(block, val); // fall through - case 2: GET_BIT_MSB(block, val); // fall through - case 1: GET_BIT_MSB(block, val); - break; - - default: - for (i = 0; i < nbits; i++) - //val = (val << 1) | get_bit_MSB(block); - GET_BIT_MSB(block, val); - } -#endif - - //printf("=0x%x}", val); - - return val; -} - -/* - * Can store up to 24-bits worth of data encoded in an integer value - * Possibly we'd want to have a less optimal store_bits function when dealing - * with nbits > 24, but for now we assume the codes generated are never - * that big. (Given this is only possible with 121392 or more - * characters with exactly the correct frequency distribution we check - * for it elsewhere.) - */ -static int store_bits_MSB(cram_block *block, uint64_t val, int nbits) { - //fprintf(stderr, " store_bits: %02x %d\n", val, nbits); - - /* - * Use slow mode until we tweak the huffman generator to never generate - * codes longer than 24-bits. - */ - unsigned int mask; - - if (block->byte+8 >= block->alloc) { - if (block->byte) { - block->alloc *= 2; - block->data = realloc(block->data, block->alloc + 8); - if (!block->data) - return -1; - } else { - block->alloc = 1024; - block->data = realloc(block->data, block->alloc + 8); - if (!block->data) - return -1; - block->data[0] = 0; // initialise first byte of buffer - } - } - - /* fits in current bit-field */ - if (nbits <= block->bit+1) { - block->data[block->byte] |= (val << (block->bit+1-nbits)); - if ((block->bit-=nbits) == -1) { - block->bit = 7; - block->byte++; - block->data[block->byte] = 0; - } - return 0; - } - - block->data[block->byte] |= (val >> (nbits -= block->bit+1)); - block->bit = 7; - block->byte++; - block->data[block->byte] = 0; - - mask = 1<<(nbits-1); - do { - if (val & mask) - block->data[block->byte] |= (1 << block->bit); - if (--block->bit == -1) { - block->bit = 7; - block->byte++; - block->data[block->byte] = 0; - } - mask >>= 1; - } while(--nbits); - - return 0; -} - -/* - * Returns the next 'size' bytes from a block, or NULL if insufficient - * data left.This is just a pointer into the block data and not an - * allocated object, so do not free the result. - */ -static char *cram_extract_block(cram_block *b, int size) { - char *cp = (char *)b->data + b->idx; - b->idx += size; - if (b->idx > b->uncomp_size) - return NULL; - - return cp; -} - -/* - * --------------------------------------------------------------------------- - * EXTERNAL - * - * In CRAM 3.0 and earlier, E_EXTERNAL use the data type to determine the - * size of the object being returned. This type is hard coded in the - * spec document (changing from uint32 to uint64 requires a spec change) - * and there is no data format introspection so implementations have - * to determine which size to use based on version numbers. It also - * doesn't support signed data. - * - * With CRAM 4.0 onwards the size and sign of the data is no longer stated - * explicitly in the specification. Instead EXTERNAL is replaced by three - * new encodings, for bytes and signed / unsigned integers which used a - * variable sized encoding. - * - * For simplicity we use the same encode and decode functions for - * bytes (CRAM4) and external (CRAM3). Given we already had code to - * replace codec + type into a function pointer it makes little - * difference how we ended up at that function. However we disallow - * this codec to operate on integer data for CRAM4 onwards. - */ -int cram_external_decode_int(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, int *out_size) { - char *cp; - cram_block *b; - - /* Find the external block */ - b = cram_get_block_by_id(slice, c->u.external.content_id); - if (!b) - return *out_size?-1:0; - - cp = (char *)b->data + b->idx; - // E_INT and E_LONG are guaranteed single item queries - int err = 0; - *(int32_t *)out = c->vv->varint_get32(&cp, (char *)b->data + b->uncomp_size, &err); - b->idx = cp - (char *)b->data; - *out_size = 1; - - return err ? -1 : 0; -} - -int cram_external_decode_long(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, int *out_size) { - char *cp; - cram_block *b; - - /* Find the external block */ - b = cram_get_block_by_id(slice, c->u.external.content_id); - if (!b) - return *out_size?-1:0; - - cp = (char *)b->data + b->idx; - // E_INT and E_LONG are guaranteed single item queries - int err = 0; - *(int64_t *)out = c->vv->varint_get64(&cp, (char *)b->data + b->uncomp_size, &err); - b->idx = cp - (char *)b->data; - *out_size = 1; - - return err ? -1 : 0; -} - -int cram_external_decode_char(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, - int *out_size) { - char *cp; - cram_block *b; - - /* Find the external block */ - b = cram_get_block_by_id(slice, c->u.external.content_id); - if (!b) - return *out_size?-1:0; - - cp = cram_extract_block(b, *out_size); - if (!cp) - return -1; - - if (out) - memcpy(out, cp, *out_size); - return 0; -} - -static int cram_external_decode_block(cram_slice *slice, cram_codec *c, - cram_block *in, char *out_, - int *out_size) { - char *cp; - cram_block *out = (cram_block *)out_; - cram_block *b = NULL; - - /* Find the external block */ - b = cram_get_block_by_id(slice, c->u.external.content_id); - if (!b) - return *out_size?-1:0; - - cp = cram_extract_block(b, *out_size); - if (!cp) - return -1; - - BLOCK_APPEND(out, cp, *out_size); - return 0; - - block_err: - return -1; -} - -void cram_external_decode_free(cram_codec *c) { - if (c) - free(c); -} - - -int cram_external_decode_size(cram_slice *slice, cram_codec *c) { - cram_block *b; - - /* Find the external block */ - b = cram_get_block_by_id(slice, c->u.external.content_id); - if (!b) - return -1; - - return b->uncomp_size; -} - -cram_block *cram_external_get_block(cram_slice *slice, cram_codec *c) { - return cram_get_block_by_id(slice, c->u.external.content_id); -} - -int cram_external_describe(cram_codec *c, kstring_t *ks) { - return ksprintf(ks, "EXTERNAL(id=%d)", - c->u.external.content_id) < 0 ? -1 : 0; -} - -cram_codec *cram_external_decode_init(cram_block_compression_hdr *hdr, - char *data, int size, - enum cram_encoding codec, - enum cram_external_type option, - int version, varint_vec *vv) { - cram_codec *c = NULL; - char *cp = data; - - if (size < 1) - goto malformed; - - if (!(c = malloc(sizeof(*c)))) - return NULL; - - c->codec = E_EXTERNAL; - if (CRAM_MAJOR_VERS(version) >= 4) { - // Version 4 does not permit integer data to be encoded as a - // series of bytes. This is used purely for bytes, either - // singular or declared as arrays - switch (codec) { - case E_EXTERNAL: - if (option == E_BYTE_ARRAY_BLOCK) - c->decode = cram_external_decode_block; - else if (option == E_BYTE || option == E_BYTE_ARRAY) - c->decode = cram_external_decode_char; - else - return NULL; - break; - default: - return NULL; - } - } else { - // CRAM 3 and earlier encodes integers as EXTERNAL. We need - // use the option field to indicate the input data format so - // we know which serialisation format to use. - if (option == E_INT) - c->decode = cram_external_decode_int; - else if (option == E_LONG) - c->decode = cram_external_decode_long; - else if (option == E_BYTE_ARRAY || option == E_BYTE) - c->decode = cram_external_decode_char; - else - c->decode = cram_external_decode_block; - } - c->free = cram_external_decode_free; - c->size = cram_external_decode_size; - c->get_block = cram_external_get_block; - c->describe = cram_external_describe; - - c->u.external.content_id = vv->varint_get32(&cp, data+size, NULL); - - if (cp - data != size) - goto malformed; - - c->u.external.type = option; - - return c; - - malformed: - hts_log_error("Malformed external header stream"); - free(c); - return NULL; -} - -int cram_external_encode_int(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - uint32_t *i32 = (uint32_t *)in; - return c->vv->varint_put32_blk(c->out, *i32) >= 0 ? 0 : -1; -} - -int cram_external_encode_sint(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - int32_t *i32 = (int32_t *)in; - return c->vv->varint_put32s_blk(c->out, *i32) >= 0 ? 0 : -1; -} - -int cram_external_encode_long(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - uint64_t *i64 = (uint64_t *)in; - return c->vv->varint_put64_blk(c->out, *i64) >= 0 ? 0 : -1; -} - -int cram_external_encode_slong(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - int64_t *i64 = (int64_t *)in; - return c->vv->varint_put64s_blk(c->out, *i64) >= 0 ? 0 : -1; -} - -int cram_external_encode_char(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - BLOCK_APPEND(c->out, in, in_size); - return 0; - - block_err: - return -1; -} - -void cram_external_encode_free(cram_codec *c) { - if (!c) - return; - free(c); -} - -int cram_external_encode_store(cram_codec *c, cram_block *b, char *prefix, - int version) { - char tmp[99], *tp = tmp, *tpend = tmp+99; - int len = 0, r = 0, n; - - if (prefix) { - size_t l = strlen(prefix); - BLOCK_APPEND(b, prefix, l); - len += l; - } - - tp += c->vv->varint_put32(tp, tpend, c->u.e_external.content_id); - len += (n = c->vv->varint_put32_blk(b, c->codec)); r |= n; - len += (n = c->vv->varint_put32_blk(b, tp-tmp)); r |= n; - BLOCK_APPEND(b, tmp, tp-tmp); - len += tp-tmp; - - if (r > 0) - return len; - - block_err: - return -1; -} - -cram_codec *cram_external_encode_init(cram_stats *st, - enum cram_encoding codec, - enum cram_external_type option, - void *dat, - int version, varint_vec *vv) { - cram_codec *c; - - c = malloc(sizeof(*c)); - if (!c) - return NULL; - c->codec = E_EXTERNAL; - c->free = cram_external_encode_free; - if (CRAM_MAJOR_VERS(version) >= 4) { - // Version 4 does not permit integer data to be encoded as a - // series of bytes. This is used purely for bytes, either - // singular or declared as arrays - switch (codec) { - case E_EXTERNAL: - if (option != E_BYTE && option != E_BYTE_ARRAY) - return NULL; - c->encode = cram_external_encode_char; - break; - default: - return NULL; - } - } else { - // CRAM 3 and earlier encodes integers as EXTERNAL. We need - // use the option field to indicate the input data format so - // we know which serialisation format to use. - if (option == E_INT) - c->encode = cram_external_encode_int; - else if (option == E_LONG) - c->encode = cram_external_encode_long; - else if (option == E_BYTE_ARRAY || option == E_BYTE) - c->encode = cram_external_encode_char; - else - abort(); - } - c->store = cram_external_encode_store; - c->flush = NULL; - - c->u.e_external.content_id = (size_t)dat; - - return c; -} - -/* - * --------------------------------------------------------------------------- - * VARINT - * - * In CRAM 3.0 and earlier, E_EXTERNAL stored both integers in ITF8 - * format as well as bytes. In CRAM 4 EXTERNAL is only for bytes and - * byte arrays, with two dedicated encodings for integers: - * VARINT_SIGNED and VARINT_UNSIGNED. These also differ a little to - * EXTERNAL with the addition of an offset field, meaning we can store - * values in, say, the range -2 to 1 million without needing to use - * a signed zig-zag transformation. - */ -int cram_varint_decode_int(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, int *out_size) { - char *cp; - cram_block *b; - - /* Find the data block */ - b = cram_get_block_by_id(slice, c->u.varint.content_id); - if (!b) - return *out_size?-1:0; - - cp = (char *)b->data + b->idx; - // E_INT and E_LONG are guaranteed single item queries - int err = 0; - *(int32_t *)out = c->vv->varint_get32(&cp, - (char *)b->data + b->uncomp_size, - &err) + c->u.varint.offset; - b->idx = cp - (char *)b->data; - *out_size = 1; - - return err ? -1 : 0; -} - -int cram_varint_decode_sint(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, int *out_size) { - char *cp; - cram_block *b; - - /* Find the data block */ - b = cram_get_block_by_id(slice, c->u.varint.content_id); - if (!b) - return *out_size?-1:0; - - cp = (char *)b->data + b->idx; - // E_INT and E_LONG are guaranteed single item queries - int err = 0; - *(int32_t *)out = c->vv->varint_get32s(&cp, - (char *)b->data + b->uncomp_size, - &err) + c->u.varint.offset; - b->idx = cp - (char *)b->data; - *out_size = 1; - - return err ? -1 : 0; -} - -int cram_varint_decode_long(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, int *out_size) { - char *cp; - cram_block *b; - - /* Find the data block */ - b = cram_get_block_by_id(slice, c->u.varint.content_id); - if (!b) - return *out_size?-1:0; - - cp = (char *)b->data + b->idx; - // E_INT and E_LONG are guaranteed single item queries - int err = 0; - *(int64_t *)out = c->vv->varint_get64(&cp, - (char *)b->data + b->uncomp_size, - &err) + c->u.varint.offset; - b->idx = cp - (char *)b->data; - *out_size = 1; - - return err ? -1 : 0; -} - -int cram_varint_decode_slong(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, int *out_size) { - char *cp; - cram_block *b; - - /* Find the data block */ - b = cram_get_block_by_id(slice, c->u.varint.content_id); - if (!b) - return *out_size?-1:0; - - cp = (char *)b->data + b->idx; - // E_INT and E_LONG are guaranteed single item queries - int err = 0; - *(int64_t *)out = c->vv->varint_get64s(&cp, - (char *)b->data + b->uncomp_size, - &err) + c->u.varint.offset; - b->idx = cp - (char *)b->data; - *out_size = 1; - - return err ? -1 : 0; -} - -void cram_varint_decode_free(cram_codec *c) { - if (c) - free(c); -} - -int cram_varint_decode_size(cram_slice *slice, cram_codec *c) { - cram_block *b; - - /* Find the data block */ - b = cram_get_block_by_id(slice, c->u.varint.content_id); - if (!b) - return -1; - - return b->uncomp_size; -} - -cram_block *cram_varint_get_block(cram_slice *slice, cram_codec *c) { - return cram_get_block_by_id(slice, c->u.varint.content_id); -} - -int cram_varint_describe(cram_codec *c, kstring_t *ks) { - return ksprintf(ks, "VARINT(id=%d,offset=%"PRId64",type=%d)", - c->u.varint.content_id, - c->u.varint.offset, - c->u.varint.type) - < 0 ? -1 : 0; -} - -cram_codec *cram_varint_decode_init(cram_block_compression_hdr *hdr, - char *data, int size, - enum cram_encoding codec, - enum cram_external_type option, - int version, varint_vec *vv) { - cram_codec *c; - char *cp = data, *cp_end = data+size; - - if (!(c = malloc(sizeof(*c)))) - return NULL; - - c->codec = codec; - - // Function pointer choice is theoretically by codec type. - // Given we have some vars as int32 and some as int64 we - // use option too for sizing, although on disk format - // does not change. - switch(codec) { - case E_VARINT_UNSIGNED: - c->decode = (option == E_INT) - ? cram_varint_decode_int - : cram_varint_decode_long; - break; - case E_VARINT_SIGNED: - c->decode = (option == E_INT) - ? cram_varint_decode_sint - : cram_varint_decode_slong; - break; - default: - return NULL; - } - - c->free = cram_varint_decode_free; - c->size = cram_varint_decode_size; - c->get_block = cram_varint_get_block; - c->describe = cram_varint_describe; - - c->u.varint.content_id = vv->varint_get32 (&cp, cp_end, NULL); - c->u.varint.offset = vv->varint_get64s(&cp, cp_end, NULL); - - if (cp - data != size) { - fprintf(stderr, "Malformed varint header stream\n"); - free(c); - return NULL; - } - - c->u.varint.type = option; - - return c; -} - -int cram_varint_encode_int(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - uint32_t *i32 = (uint32_t *)in; - return c->vv->varint_put32_blk(c->out, *i32 - c->u.varint.offset) >= 0 - ? 0 : -1; -} - -int cram_varint_encode_sint(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - int32_t *i32 = (int32_t *)in; - return c->vv->varint_put32s_blk(c->out, *i32 - c->u.varint.offset) >= 0 - ? 0 : -1; -} - -int cram_varint_encode_long(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - uint64_t *i64 = (uint64_t *)in; - return c->vv->varint_put64_blk(c->out, *i64 - c->u.varint.offset) >= 0 - ? 0 : -1; -} - -int cram_varint_encode_slong(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - int64_t *i64 = (int64_t *)in; - return c->vv->varint_put64s_blk(c->out, *i64 - c->u.varint.offset) >= 0 - ? 0 : -1; -} - -void cram_varint_encode_free(cram_codec *c) { - if (!c) - return; - free(c); -} - -int cram_varint_encode_store(cram_codec *c, cram_block *b, char *prefix, - int version) { - char tmp[99], *tp = tmp; - int len = 0; - - if (prefix) { - size_t l = strlen(prefix); - BLOCK_APPEND(b, prefix, l); - len += l; - } - - tp += c->vv->varint_put32 (tp, NULL, c->u.e_varint.content_id); - tp += c->vv->varint_put64s(tp, NULL, c->u.e_varint.offset); - len += c->vv->varint_put32_blk(b, c->codec); - len += c->vv->varint_put32_blk(b, tp-tmp); - BLOCK_APPEND(b, tmp, tp-tmp); - len += tp-tmp; - - return len; - - block_err: - return -1; -} - -cram_codec *cram_varint_encode_init(cram_stats *st, - enum cram_encoding codec, - enum cram_external_type option, - void *dat, - int version, varint_vec *vv) { - cram_codec *c; - - if (!(c = malloc(sizeof(*c)))) - return NULL; - - c->u.e_varint.offset = 0; - if (st) { - // Marginal difference so far! Not worth the hassle? - if (st->min_val < 0 && st->min_val >= -127 - && st->max_val / -st->min_val > 100) { - c->u.e_varint.offset = -st->min_val; - codec = E_VARINT_UNSIGNED; - } else if (st->min_val > 0) { - c->u.e_varint.offset = -st->min_val; - } - } - - c->codec = codec; - c->free = cram_varint_encode_free; - - // Function pointer choice is theoretically by codec type. - // Given we have some vars as int32 and some as int64 we - // use option too for sizing, although on disk format - // does not change. - switch (codec) { - case E_VARINT_UNSIGNED: - c->encode = (option == E_INT) - ? cram_varint_encode_int - : cram_varint_encode_long; - break; - case E_VARINT_SIGNED: - c->encode = (option == E_INT) - ? cram_varint_encode_sint - : cram_varint_encode_slong; - break; - default: - return NULL; - } - c->store = cram_varint_encode_store; - c->flush = NULL; - - c->u.e_varint.content_id = (size_t)dat; - - return c; -} -/* - * --------------------------------------------------------------------------- - * CONST_BYTE and CONST_INT - */ -int cram_const_decode_byte(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, int *out_size) { - int i, n; - - for (i = 0, n = *out_size; i < n; i++) - out[i] = c->u.xconst.val; - - return 0; -} - -int cram_const_decode_int(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, int *out_size) { - int32_t *out_i = (int32_t *)out; - int i, n; - - for (i = 0, n = *out_size; i < n; i++) - out_i[i] = c->u.xconst.val; - - return 0; -} - -int cram_const_decode_long(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, int *out_size) { - int64_t *out_i = (int64_t *)out; - int i, n; - - for (i = 0, n = *out_size; i < n; i++) - out_i[i] = c->u.xconst.val; - - return 0; -} - -void cram_const_decode_free(cram_codec *c) { - if (c) - free(c); -} - -int cram_const_decode_size(cram_slice *slice, cram_codec *c) { - return 0; -} - -int cram_const_describe(cram_codec *c, kstring_t *ks) { - return ksprintf(ks, "CONST(val=%"PRId64")", - c->u.xconst.val) < 0 ? -1 : 0; -} - -cram_codec *cram_const_decode_init(cram_block_compression_hdr *hdr, - char *data, int size, - enum cram_encoding codec, - enum cram_external_type option, - int version, varint_vec *vv) { - cram_codec *c; - char *cp = data; - - if (!(c = malloc(sizeof(*c)))) - return NULL; - - c->codec = codec; - if (codec == E_CONST_BYTE) - c->decode = cram_const_decode_byte; - else if (option == E_INT) - c->decode = cram_const_decode_int; - else - c->decode = cram_const_decode_long; - c->free = cram_const_decode_free; - c->size = cram_const_decode_size; - c->get_block = NULL; - c->describe = cram_const_describe; - - c->u.xconst.val = vv->varint_get64s(&cp, data+size, NULL); - - if (cp - data != size) { - fprintf(stderr, "Malformed const header stream\n"); - free(c); - return NULL; - } - - return c; -} - -int cram_const_encode(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - return 0; -} - -int cram_const_encode_store(cram_codec *c, cram_block *b, char *prefix, - int version) { - char tmp[99], *tp = tmp; - int len = 0; - - if (prefix) { - size_t l = strlen(prefix); - BLOCK_APPEND(b, prefix, l); - len += l; - } - - tp += c->vv->varint_put64s(tp, NULL, c->u.xconst.val); - len += c->vv->varint_put32_blk(b, c->codec); - len += c->vv->varint_put32_blk(b, tp-tmp); - BLOCK_APPEND(b, tmp, tp-tmp); - len += tp-tmp; - - return len; - - block_err: - return -1; -} - -cram_codec *cram_const_encode_init(cram_stats *st, - enum cram_encoding codec, - enum cram_external_type option, - void *dat, - int version, varint_vec *vv) { - cram_codec *c; - - if (!(c = malloc(sizeof(*c)))) - return NULL; - - c->codec = codec; - c->free = cram_const_decode_free; // as as decode - c->encode = cram_const_encode; // a nop - c->store = cram_const_encode_store; - c->flush = NULL; - c->u.e_xconst.val = st->min_val; - - return c; -} - -/* - * --------------------------------------------------------------------------- - * BETA - */ -int cram_beta_decode_long(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) { - int64_t *out_i = (int64_t *)out; - int i, n = *out_size; - - if (c->u.beta.nbits) { - if (cram_not_enough_bits(in, c->u.beta.nbits * n)) - return -1; - - for (i = 0; i < n; i++) - out_i[i] = get_bits_MSB(in, c->u.beta.nbits) - c->u.beta.offset; - } else { - for (i = 0; i < n; i++) - out_i[i] = -c->u.beta.offset; - } - - return 0; -} - -int cram_beta_decode_int(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) { - int32_t *out_i = (int32_t *)out; - int i, n = *out_size; - - if (c->u.beta.nbits) { - if (cram_not_enough_bits(in, c->u.beta.nbits * n)) - return -1; - - for (i = 0; i < n; i++) - out_i[i] = get_bits_MSB(in, c->u.beta.nbits) - c->u.beta.offset; - } else { - for (i = 0; i < n; i++) - out_i[i] = -c->u.beta.offset; - } - - return 0; -} - -int cram_beta_decode_char(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) { - int i, n = *out_size; - - - if (c->u.beta.nbits) { - if (cram_not_enough_bits(in, c->u.beta.nbits * n)) - return -1; - - if (out) - for (i = 0; i < n; i++) - out[i] = get_bits_MSB(in, c->u.beta.nbits) - c->u.beta.offset; - else - for (i = 0; i < n; i++) - get_bits_MSB(in, c->u.beta.nbits); - } else { - if (out) - for (i = 0; i < n; i++) - out[i] = -c->u.beta.offset; - } - - return 0; -} - -void cram_beta_decode_free(cram_codec *c) { - if (c) - free(c); -} - -int cram_beta_describe(cram_codec *c, kstring_t *ks) { - return ksprintf(ks, "BETA(offset=%d, nbits=%d)", - c->u.beta.offset, c->u.beta.nbits) - < 0 ? -1 : 0; -} - -cram_codec *cram_beta_decode_init(cram_block_compression_hdr *hdr, - char *data, int size, - enum cram_encoding codec, - enum cram_external_type option, - int version, varint_vec *vv) { - cram_codec *c; - char *cp = data; - - if (!(c = malloc(sizeof(*c)))) - return NULL; - - c->codec = E_BETA; - if (option == E_INT || option == E_SINT) - c->decode = cram_beta_decode_int; - else if (option == E_LONG || option == E_SLONG) - c->decode = cram_beta_decode_long; - else if (option == E_BYTE_ARRAY || option == E_BYTE) - c->decode = cram_beta_decode_char; - else { - hts_log_error("BYTE_ARRAYs not supported by this codec"); - free(c); - return NULL; - } - c->free = cram_beta_decode_free; - c->describe = cram_beta_describe; - - c->u.beta.nbits = -1; - c->u.beta.offset = vv->varint_get32(&cp, data + size, NULL); - if (cp < data + size) // Ensure test below works - c->u.beta.nbits = vv->varint_get32(&cp, data + size, NULL); - - if (cp - data != size - || c->u.beta.nbits < 0 || c->u.beta.nbits > 8 * sizeof(int)) { - hts_log_error("Malformed beta header stream"); - free(c); - return NULL; - } - - return c; -} - -int cram_beta_encode_store(cram_codec *c, cram_block *b, - char *prefix, int version) { - int len = 0, r = 0, n; - - if (prefix) { - size_t l = strlen(prefix); - BLOCK_APPEND(b, prefix, l); - len += l; - } - - len += (n = c->vv->varint_put32_blk(b, c->codec)); r |= n; - // codec length - len += (n = c->vv->varint_put32_blk(b, c->vv->varint_size(c->u.e_beta.offset) - + c->vv->varint_size(c->u.e_beta.nbits))); - r |= n; - len += (n = c->vv->varint_put32_blk(b, c->u.e_beta.offset)); r |= n; - len += (n = c->vv->varint_put32_blk(b, c->u.e_beta.nbits)); r |= n; - - if (r > 0) return len; - - block_err: - return -1; -} - -int cram_beta_encode_long(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - int64_t *syms = (int64_t *)in; - int i, r = 0; - - for (i = 0; i < in_size; i++) - r |= store_bits_MSB(c->out, syms[i] + c->u.e_beta.offset, - c->u.e_beta.nbits); - - return r; -} - -int cram_beta_encode_int(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - int *syms = (int *)in; - int i, r = 0; - - for (i = 0; i < in_size; i++) - r |= store_bits_MSB(c->out, syms[i] + c->u.e_beta.offset, - c->u.e_beta.nbits); - - return r; -} - -int cram_beta_encode_char(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - unsigned char *syms = (unsigned char *)in; - int i, r = 0; - - for (i = 0; i < in_size; i++) - r |= store_bits_MSB(c->out, syms[i] + c->u.e_beta.offset, - c->u.e_beta.nbits); - - return r; -} - -void cram_beta_encode_free(cram_codec *c) { - if (c) free(c); -} - -cram_codec *cram_beta_encode_init(cram_stats *st, - enum cram_encoding codec, - enum cram_external_type option, - void *dat, - int version, varint_vec *vv) { - cram_codec *c; - int min_val, max_val, len = 0; - int64_t range; - - c = malloc(sizeof(*c)); - if (!c) - return NULL; - c->codec = E_BETA; - c->free = cram_beta_encode_free; - if (option == E_INT || option == E_SINT) - c->encode = cram_beta_encode_int; - else if (option == E_LONG || option == E_SLONG) - c->encode = cram_beta_encode_long; - else - c->encode = cram_beta_encode_char; - c->store = cram_beta_encode_store; - c->flush = NULL; - - if (dat) { - min_val = ((int *)dat)[0]; - max_val = ((int *)dat)[1]; - } else { - min_val = INT_MAX; - max_val = INT_MIN; - int i; - for (i = 0; i < MAX_STAT_VAL; i++) { - if (!st->freqs[i]) - continue; - if (min_val > i) - min_val = i; - max_val = i; - } - if (st->h) { - khint_t k; - - for (k = kh_begin(st->h); k != kh_end(st->h); k++) { - if (!kh_exist(st->h, k)) - continue; - - i = kh_key(st->h, k); - if (min_val > i) - min_val = i; - if (max_val < i) - max_val = i; - } - } - } - - assert(max_val >= min_val); - c->u.e_beta.offset = -min_val; - range = (int64_t) max_val - min_val; - while (range) { - len++; - range >>= 1; - } - c->u.e_beta.nbits = len; - - return c; -} - -/* - * --------------------------------------------------------------------------- - * XPACK: Packing multiple values into a single byte. A fast transform that - * reduces time taken by entropy encoder and may also improve compression. - * - * This also has the additional requirement that the data series is not - * interleaved with another, permitting efficient encoding and decoding - * of all elements enmasse instead of needing to only extract the bits - * necessary per item. - */ -int cram_xpack_decode_long(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) { - int64_t *out_i = (int64_t *)out; - int i, n = *out_size; - - if (c->u.xpack.nbits) { - for (i = 0; i < n; i++) - out_i[i] = c->u.xpack.rmap[get_bits_MSB(in, c->u.xpack.nbits)]; - } else { - for (i = 0; i < n; i++) - out_i[i] = c->u.xpack.rmap[0]; - } - - return 0; -} - -int cram_xpack_decode_int(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) { - int32_t *out_i = (int32_t *)out; - int i, n = *out_size; - - if (c->u.xpack.nbits) { - if (cram_not_enough_bits(in, c->u.xpack.nbits * n)) - return -1; - - for (i = 0; i < n; i++) - out_i[i] = c->u.xpack.rmap[get_bits_MSB(in, c->u.xpack.nbits)]; - } else { - for (i = 0; i < n; i++) - out_i[i] = c->u.xpack.rmap[0]; - } - - return 0; -} - -static int cram_xpack_decode_expand_char(cram_slice *slice, cram_codec *c) { - cram_block *b = slice->block_by_id[512 + c->codec_id]; - if (b) - return 0; - - // get sub-codec data. - cram_block *sub_b = c->u.xpack.sub_codec->get_block(slice, c->u.xpack.sub_codec); - if (!sub_b) - return -1; - - // Allocate local block to expand into - b = slice->block_by_id[512 + c->codec_id] = cram_new_block(0, 0); - if (!b) - return -1; - int n = sub_b->uncomp_size * 8/c->u.xpack.nbits; - BLOCK_GROW(b, n); - b->uncomp_size = n; - - uint8_t p[256]; - int z; - for (z = 0; z < 256; z++) - p[z] = c->u.xpack.rmap[z]; - hts_unpack(sub_b->data, sub_b->uncomp_size, b->data, b->uncomp_size, - 8 / c->u.xpack.nbits, p); - - return 0; - - block_err: - return -1; -} - -int cram_xpack_decode_char(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) { - // FIXME: we need to ban data-series interleaving in the spec for this to work. - - // Remember this may be called when threaded and multi-slice per container. - // Hence one cram_codec instance, multiple slices, multiple blocks. - // We therefore have to cache appropriate block info in slice and not codec. - // b = cram_get_block_by_id(slice, c->external.content_id); - if (c->u.xpack.nval > 1) { - cram_xpack_decode_expand_char(slice, c); - cram_block *b = slice->block_by_id[512 + c->codec_id]; - if (!b) - return -1; - - if (out) - memcpy(out, b->data + b->byte, *out_size); - b->byte += *out_size; - } else { - memset(out, c->u.xpack.rmap[0], *out_size); - } - - return 0; -} - -void cram_xpack_decode_free(cram_codec *c) { - if (!c) return; - - if (c->u.xpack.sub_codec) - c->u.xpack.sub_codec->free(c->u.xpack.sub_codec); - - //free(slice->block_by_id[512 + c->codec_id]); - //slice->block_by_id[512 + c->codec_id] = 0; - - free(c); -} - -int cram_xpack_decode_size(cram_slice *slice, cram_codec *c) { - cram_xpack_decode_expand_char(slice, c); - return slice->block_by_id[512 + c->codec_id]->uncomp_size; -} - -cram_block *cram_xpack_get_block(cram_slice *slice, cram_codec *c) { - cram_xpack_decode_expand_char(slice, c); - return slice->block_by_id[512 + c->codec_id]; -} - -cram_codec *cram_xpack_decode_init(cram_block_compression_hdr *hdr, - char *data, int size, - enum cram_encoding codec, - enum cram_external_type option, - int version, varint_vec *vv) { - cram_codec *c; - char *cp = data; - char *endp = data+size; - - if (!(c = calloc(1, sizeof(*c)))) - return NULL; - - c->codec = E_XPACK; - if (option == E_LONG) - c->decode = cram_xpack_decode_long; - else if (option == E_INT) - c->decode = cram_xpack_decode_int; - else if (option == E_BYTE_ARRAY || option == E_BYTE) - c->decode = cram_xpack_decode_char; - else { - fprintf(stderr, "BYTE_ARRAYs not supported by this codec\n"); - goto malformed; - } - c->free = cram_xpack_decode_free; - c->size = cram_xpack_decode_size; - c->get_block = cram_xpack_get_block; - c->describe = NULL; - - c->u.xpack.nbits = vv->varint_get32(&cp, endp, NULL); - c->u.xpack.nval = vv->varint_get32(&cp, endp, NULL); - if (c->u.xpack.nbits >= 8 || c->u.xpack.nbits < 0 || - c->u.xpack.nval > 256 || c->u.xpack.nval < 0) - goto malformed; - int i; - for (i = 0; i < c->u.xpack.nval; i++) { - uint32_t v = vv->varint_get32(&cp, endp, NULL); - if (v >= 256) - goto malformed; - c->u.xpack.rmap[i] = v; // reverse map: e.g 0-3 to P,A,C,K - } - - int encoding = vv->varint_get32(&cp, endp, NULL); - int sub_size = vv->varint_get32(&cp, endp, NULL); - if (sub_size < 0 || endp - cp < sub_size) - goto malformed; - c->u.xpack.sub_codec = cram_decoder_init(hdr, encoding, cp, sub_size, - option, version, vv); - if (c->u.xpack.sub_codec == NULL) - goto malformed; - cp += sub_size; - - if (cp - data != size - || c->u.xpack.nbits < 0 || c->u.xpack.nbits > 8 * sizeof(int64_t)) { - malformed: - fprintf(stderr, "Malformed xpack header stream\n"); - cram_xpack_decode_free(c); - return NULL; - } - - return c; -} - -int cram_xpack_encode_flush(cram_codec *c) { - // Pack the buffered up data - int meta_len; - uint64_t out_len; - uint8_t out_meta[1024]; - uint8_t *out = hts_pack(BLOCK_DATA(c->out), BLOCK_SIZE(c->out), - out_meta, &meta_len, &out_len); - - // We now need to pass this through the next layer of transform - if (c->u.e_xpack.sub_codec->encode(NULL, // also indicates flush incoming - c->u.e_xpack.sub_codec, - (char *)out, out_len)) - return -1; - - int r = 0; - if (c->u.e_xpack.sub_codec->flush) - r = c->u.e_xpack.sub_codec->flush(c->u.e_xpack.sub_codec); - - free(out); - return r; -} - -int cram_xpack_encode_store(cram_codec *c, cram_block *b, - char *prefix, int version) { - int len = 0, r = 0, n; - - if (prefix) { - size_t l = strlen(prefix); - BLOCK_APPEND(b, prefix, l); - len += l; - } - - // Store sub-codec - cram_codec *tc = c->u.e_xpack.sub_codec; - cram_block *tb = cram_new_block(0, 0); - if (!tb) - return -1; - int len2 = tc->store(tc, tb, NULL, version); - - len += (n = c->vv->varint_put32_blk(b, c->codec)); r |= n; - - // codec length - int len1 = 0, i; - for (i = 0; i < c->u.e_xpack.nval; i++) - len1 += (n = c->vv->varint_size(c->u.e_xpack.rmap[i])), r |= n; - len += (n = c->vv->varint_put32_blk(b, c->vv->varint_size(c->u.e_xpack.nbits) - + c->vv->varint_size(c->u.e_xpack.nval) - + len1 + len2)); r |= n; - - // The map and sub-codec - len += (n = c->vv->varint_put32_blk(b, c->u.e_xpack.nbits)); r |= n; - len += (n = c->vv->varint_put32_blk(b, c->u.e_xpack.nval)); r |= n; - for (i = 0; i < c->u.e_xpack.nval; i++) - len += (n = c->vv->varint_put32_blk(b, c->u.e_xpack.rmap[i])), r |= n; - - BLOCK_APPEND(b, BLOCK_DATA(tb), BLOCK_SIZE(tb)); - - cram_free_block(tb); - - return r > 0 ? len + len2 : -1; - - block_err: - return -1; -} - -// Same as cram_beta_encode_long -int cram_xpack_encode_long(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - int64_t *syms = (int64_t *)in; - int i, r = 0; - - for (i = 0; i < in_size; i++) - r |= store_bits_MSB(c->out, c->u.e_xpack.map[syms[i]], c->u.e_xpack.nbits); - - return r; -} - -int cram_xpack_encode_int(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - int *syms = (int *)in; - int i, r = 0; - - for (i = 0; i < in_size; i++) - r |= store_bits_MSB(c->out, c->u.e_xpack.map[syms[i]], c->u.e_xpack.nbits); - - return r; -} - -int cram_xpack_encode_char(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - BLOCK_APPEND(c->out, in, in_size); - return 0; - - block_err: - return -1; -} - -void cram_xpack_encode_free(cram_codec *c) { - if (!c) return; - - if (c->u.e_xpack.sub_codec) - c->u.e_xpack.sub_codec->free(c->u.e_xpack.sub_codec); - - cram_free_block(c->out); - - free(c); -} - -cram_codec *cram_xpack_encode_init(cram_stats *st, - enum cram_encoding codec, - enum cram_external_type option, - void *dat, - int version, varint_vec *vv) { - cram_codec *c; - - if (!(c = malloc(sizeof(*c)))) - return NULL; - - c->codec = E_XPACK; - c->free = cram_xpack_encode_free; - if (option == E_LONG) - c->encode = cram_xpack_encode_long; - else if (option == E_INT) - c->encode = cram_xpack_encode_int; - else - c->encode = cram_xpack_encode_char; - c->store = cram_xpack_encode_store; - c->flush = cram_xpack_encode_flush; - - cram_xpack_encoder *e = (cram_xpack_encoder *)dat; - c->u.e_xpack.nbits = e->nbits; - c->u.e_xpack.nval = e->nval; - c->u.e_xpack.sub_codec = cram_encoder_init(e->sub_encoding, NULL, - E_BYTE_ARRAY, e->sub_codec_dat, - version, vv); - - // Initialise fwd and rev maps - memcpy(c->u.e_xpack.map, e->map, sizeof(e->map)); // P,A,C,K to 0,1,2,3 - int i, n; - for (i = n = 0; i < 256; i++) - if (e->map[i] != -1) - c->u.e_xpack.rmap[n++] = i; // 0,1,2,3 to P,A,C,K - if (n != e->nval) { - fprintf(stderr, "Incorrectly specified number of map items in PACK\n"); - return NULL; - } - - return c; -} - -/* - * --------------------------------------------------------------------------- - * XDELTA: subtract successive values, zig-zag to turn +/- to + only, - * and then var-int encode the result. - * - * This also has the additional requirement that the data series is not - * interleaved with another, permitting efficient encoding and decoding - * of all elements enmasse instead of needing to only extract the bits - * necessary per item. - */ - -static uint8_t zigzag8 (int8_t x) { return (x << 1) ^ (x >> 7); } -static uint16_t zigzag16(int16_t x) { return (x << 1) ^ (x >> 15); } -static uint32_t zigzag32(int32_t x) { return (x << 1) ^ (x >> 31); } - -//static int8_t unzigzag8 (uint8_t x) { return (x >> 1) ^ -(x & 1); } -static int16_t unzigzag16(uint16_t x) { return (x >> 1) ^ -(x & 1); } -static int32_t unzigzag32(uint32_t x) { return (x >> 1) ^ -(x & 1); } - -int cram_xdelta_decode_long(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) { - return -1; -} - -int cram_xdelta_decode_int(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) { - // Slow value-by-value method for now - uint32_t *out32 = (uint32_t *)out; - int i; - for (i = 0; i < *out_size; i++) { - uint32_t v; - int one = 1; - if (c->u.e_xdelta.sub_codec->decode(slice, c->u.e_xdelta.sub_codec, in, - (char *)&v, &one) < 0) - return -1; - uint32_t d = unzigzag32(v); - c->u.xdelta.last = out32[i] = d + c->u.xdelta.last; - } - - return 0; -} - -static int cram_xdelta_decode_expand_char(cram_slice *slice, cram_codec *c) { - return -1; -} - -int cram_xdelta_decode_char(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) { - return -1; -} - -static inline int16_t le_int2(int16_t i) { - int16_t s; - i16_to_le(i, (uint8_t *)&s); - return s; -} - -int cram_xdelta_decode_block(cram_slice *slice, cram_codec *c, cram_block *in, - char *out_, int *out_size) { - cram_block *out = (cram_block *)out_; - cram_block *b = c->u.e_xdelta.sub_codec->get_block(slice, c->u.e_xdelta.sub_codec); - int i = 0; - - const int w = c->u.xdelta.word_size; - uint32_t npad = (w - *out_size%w)%w; - uint32_t out_sz = *out_size + npad; - c->u.xdelta.last = 0; // reset for each new array - - for (i = 0; i < out_sz; i += w) { - uint16_t v; - // Need better interface - char *cp = (char *)b->data + b->byte; - char *cp_end = (char *)b->data + b->uncomp_size; - int err = 0; - v = c->vv->varint_get32(&cp, cp_end, &err); - if (err) - return -1; - b->byte = cp - (char *)b->data; - - switch(w) { - case 2: { - int16_t d = unzigzag16(v), z; - c->u.xdelta.last = d + c->u.xdelta.last; - z = le_int2(c->u.xdelta.last); - BLOCK_APPEND(out, &z, 2-npad); - npad = 0; - break; - } - default: - fprintf(stderr, "Unsupported word size by XDELTA\n"); - return -1; - } - } - - return 0; - - block_err: - return -1; -} - -void cram_xdelta_decode_free(cram_codec *c) { - if (!c) return; - - if (c->u.xdelta.sub_codec) - c->u.xdelta.sub_codec->free(c->u.xdelta.sub_codec); - - free(c); -} - -int cram_xdelta_decode_size(cram_slice *slice, cram_codec *c) { - cram_xdelta_decode_expand_char(slice, c); - return slice->block_by_id[512 + c->codec_id]->uncomp_size; -} - -cram_block *cram_xdelta_get_block(cram_slice *slice, cram_codec *c) { - cram_xdelta_decode_expand_char(slice, c); - return slice->block_by_id[512 + c->codec_id]; -} - -cram_codec *cram_xdelta_decode_init(cram_block_compression_hdr *hdr, - char *data, int size, - enum cram_encoding codec, - enum cram_external_type option, - int version, varint_vec *vv) { - cram_codec *c; - char *cp = data; - char *endp = data+size; - - if (!(c = calloc(1, sizeof(*c)))) - return NULL; - - c->codec = E_XDELTA; - if (option == E_LONG) - c->decode = cram_xdelta_decode_long; - else if (option == E_INT) - c->decode = cram_xdelta_decode_int; - else if (option == E_BYTE_ARRAY || option == E_BYTE) - c->decode = cram_xdelta_decode_char; - else if (option == E_BYTE_ARRAY_BLOCK) { - option = E_BYTE_ARRAY; - c->decode = cram_xdelta_decode_block; - } else { - free(c); - return NULL; - } - c->free = cram_xdelta_decode_free; - c->size = cram_xdelta_decode_size; - c->get_block = cram_xdelta_get_block; - c->describe = NULL; - - c->u.xdelta.word_size = vv->varint_get32(&cp, endp, NULL); - c->u.xdelta.last = 0; - - int encoding = vv->varint_get32(&cp, endp, NULL); - int sub_size = vv->varint_get32(&cp, endp, NULL); - if (sub_size < 0 || endp - cp < sub_size) - goto malformed; - c->u.xdelta.sub_codec = cram_decoder_init(hdr, encoding, cp, sub_size, - option, version, vv); - if (c->u.xdelta.sub_codec == NULL) - goto malformed; - cp += sub_size; - - if (cp - data != size) { - malformed: - fprintf(stderr, "Malformed xdelta header stream\n"); - cram_xdelta_decode_free(c); - return NULL; - } - - return c; -} - -int cram_xdelta_encode_flush(cram_codec *c) { - int r = -1; - cram_block *b = cram_new_block(0, 0); - if (!b) - return -1; - - switch (c->u.e_xdelta.word_size) { - case 2: { - // Delta + zigzag transform. - // Subtracting two 8-bit values has a 9-bit result (-255 to 255). - // However think of it as turning a wheel clockwise or anti-clockwise. - // If it has 256 gradations then a -ve rotation followed by a +ve - // rotation of the same amount reverses it regardless. - // - // Similarly the zig-zag transformation doesn't invent any extra bits, - // so the entire thing can be done in-situ. This may permit faster - // SIMD loops if we break apart the steps. - - // uint16_t last = 0, d; - // for (i = 0; i < n; i++) { - // d = io[i] - last; - // last = io[i]; - // io[i] = zigzag16(vd); - // } - - // --- vs --- - - // for (i = n-1; i >= 1; i--) - // io[i] -= io[i-1]; - // for (i = 0; i < n; i++) - // io[i] = zigzag16(io[i]); - - // varint: need array variant for speed here. - // With zig-zag - int i, n = BLOCK_SIZE(c->out)/2;; - uint16_t *dat = (uint16_t *)BLOCK_DATA(c->out), last = 0; - - if (n*2 < BLOCK_SIZE(c->out)) { - // half word - last = *(uint8_t *)dat; - c->vv->varint_put32_blk(b, zigzag16(last)); - dat = (uint16_t *)(((uint8_t *)dat)+1); - } - - for (i = 0; i < n; i++) { - uint16_t d = dat[i] - last; // possibly unaligned - last = dat[i]; - c->vv->varint_put32_blk(b, zigzag16(d)); - } - - break; - } - - case 4: { - int i, n = BLOCK_SIZE(c->out)/4;; - uint32_t *dat = (uint32_t *)BLOCK_DATA(c->out), last = 0; - - for (i = 0; i < n; i++) { - uint32_t d = dat[i] - last; - last = dat[i]; - c->vv->varint_put32_blk(b, zigzag32(d)); - } - - break; - } - - case 1: { - int i, n = BLOCK_SIZE(c->out);; - uint8_t *dat = (uint8_t *)BLOCK_DATA(c->out), last = 0; - - for (i = 0; i < n; i++) { - uint32_t d = dat[i] - last; - last = dat[i]; - c->vv->varint_put32_blk(b, zigzag8(d)); - } - - break; - } - - default: - goto err; - } - - if (c->u.e_xdelta.sub_codec->encode(NULL, c->u.e_xdelta.sub_codec, - (char *)b->data, b->byte)) - goto err; - - r = 0; - - err: - cram_free_block(b); - return r; - -} - -int cram_xdelta_encode_store(cram_codec *c, cram_block *b, - char *prefix, int version) { - int len = 0, r = 0, n; - - if (prefix) { - size_t l = strlen(prefix); - BLOCK_APPEND(b, prefix, l); - len += l; - } - - // Store sub-codec - cram_codec *tc = c->u.e_xdelta.sub_codec; - cram_block *tb = cram_new_block(0, 0); - if (!tb) - return -1; - int len2 = tc->store(tc, tb, NULL, version); - - len += (n = c->vv->varint_put32_blk(b, c->codec)); r |= n; - - // codec length - len += (n = c->vv->varint_put32_blk(b, c->vv->varint_size(c->u.e_xdelta.word_size) - + len2)); r |= n; - - // This and sub-codec - len += (n = c->vv->varint_put32_blk(b, c->u.e_xdelta.word_size)); r |= n; - BLOCK_APPEND(b, BLOCK_DATA(tb), BLOCK_SIZE(tb)); - - cram_free_block(tb); - - return r > 0 ? len + len2 : -1; - - block_err: - return -1; -} - -// Same as cram_beta_encode_long -int cram_xdelta_encode_long(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - return -1; -} - -int cram_xdelta_encode_int(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - return -1; -} - -int cram_xdelta_encode_char(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - char *dat = malloc(in_size*5); - if (!dat) - return -1; - char *cp = dat, *cp_end = dat + in_size*5; - - c->u.e_xdelta.last = 0; // reset for each new array - if (c->u.e_xdelta.word_size == 2) { - int i, part; - - part = in_size%2; - if (part) { - uint16_t z = in[0]; - c->u.e_xdelta.last = le_int2(z); - cp += c->vv->varint_put32(cp, cp_end, zigzag16(c->u.e_xdelta.last)); - } - - uint16_t *in16 = (uint16_t *)(in+part); - for (i = 0; i < in_size/2; i++) { - uint16_t d = le_int2(in16[i]) - c->u.e_xdelta.last; - c->u.e_xdelta.last = le_int2(in16[i]); - cp += c->vv->varint_put32(cp, cp_end, zigzag16(d)); - } - } - if (c->u.e_xdelta.sub_codec->encode(slice, c->u.e_xdelta.sub_codec, - (char *)dat, cp-dat)) { - free(dat); - return -1; - } - - free(dat); - return 0; -} - -void cram_xdelta_encode_free(cram_codec *c) { - if (!c) return; - - if (c->u.e_xdelta.sub_codec) - c->u.e_xdelta.sub_codec->free(c->u.e_xdelta.sub_codec); - - cram_free_block(c->out); - - free(c); -} - -cram_codec *cram_xdelta_encode_init(cram_stats *st, - enum cram_encoding codec, - enum cram_external_type option, - void *dat, - int version, varint_vec *vv) { - cram_codec *c; - - if (!(c = malloc(sizeof(*c)))) - return NULL; - - c->codec = E_XDELTA; - c->free = cram_xdelta_encode_free; - if (option == E_LONG) - c->encode = cram_xdelta_encode_long; - else if (option == E_INT) - c->encode = cram_xdelta_encode_int; - else - c->encode = cram_xdelta_encode_char; - c->store = cram_xdelta_encode_store; - c->flush = cram_xdelta_encode_flush; - - cram_xdelta_encoder *e = (cram_xdelta_encoder *)dat; - c->u.e_xdelta.word_size = e->word_size; - c->u.e_xdelta.last = 0; - c->u.e_xdelta.sub_codec = cram_encoder_init(e->sub_encoding, NULL, - E_BYTE_ARRAY, - e->sub_codec_dat, - version, vv); - - return c; -} - -/* - * --------------------------------------------------------------------------- - * XRLE - * - * This also has the additional requirement that the data series is not - * interleaved with another, permitting efficient encoding and decoding - * of all elements enmasse instead of needing to only extract the bits - * necessary per item. - */ -int cram_xrle_decode_long(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) { - // TODO if and when needed - return -1; -} - -int cram_xrle_decode_int(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) { - // TODO if and when needed - return -1; -} - -// Expands an XRLE transform and caches result in slice->block_by_id[] -static int cram_xrle_decode_expand_char(cram_slice *slice, cram_codec *c) { - cram_block *b = slice->block_by_id[512 + c->codec_id]; - if (b) - return 0; - - b = slice->block_by_id[512 + c->codec_id] = cram_new_block(0, 0); - if (!b) - return -1; - cram_block *lit_b = c->u.xrle.lit_codec->get_block(slice, c->u.xrle.lit_codec); - if (!lit_b) - return -1; - unsigned char *lit_dat = lit_b->data; - unsigned int lit_sz = lit_b->uncomp_size; - unsigned int len_sz = c->u.xrle.len_codec->size(slice, c->u.xrle.len_codec); - - cram_block *len_b = c->u.xrle.len_codec->get_block(slice, c->u.xrle.len_codec); - if (!len_b) - return -1; - unsigned char *len_dat = len_b->data; - - uint8_t rle_syms[256]; - int rle_nsyms = 0; - int i; - for (i = 0; i < 256; i++) { - if (c->u.xrle.rep_score[i] > 0) - rle_syms[rle_nsyms++] = i; - } - - uint64_t out_sz; - int nb = var_get_u64(len_dat, len_dat+len_sz, &out_sz); - if (!(b->data = malloc(out_sz))) - return -1; - hts_rle_decode(lit_dat, lit_sz, - len_dat+nb, len_sz-nb, - rle_syms, rle_nsyms, - b->data, &out_sz); - b->uncomp_size = out_sz; - - return 0; -} - -int cram_xrle_decode_size(cram_slice *slice, cram_codec *c) { - cram_xrle_decode_expand_char(slice, c); - return slice->block_by_id[512 + c->codec_id]->uncomp_size; -} - -cram_block *cram_xrle_get_block(cram_slice *slice, cram_codec *c) { - cram_xrle_decode_expand_char(slice, c); - return slice->block_by_id[512 + c->codec_id]; -} - -int cram_xrle_decode_char(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) { - int n = *out_size; - - cram_xrle_decode_expand_char(slice, c); - cram_block *b = slice->block_by_id[512 + c->codec_id]; - - memcpy(out, b->data + b->idx, n); - b->idx += n; - return 0; - - // Old code when not cached - while (n > 0) { - if (c->u.xrle.cur_len == 0) { - unsigned char lit; - int one = 1; - if (c->u.xrle.lit_codec->decode(slice, c->u.xrle.lit_codec, in, - (char *)&lit, &one) < 0) - return -1; - c->u.xrle.cur_lit = lit; - - if (c->u.xrle.rep_score[lit] > 0) { - if (c->u.xrle.len_codec->decode(slice, c->u.xrle.len_codec, in, - (char *)&c->u.xrle.cur_len, &one) < 0) - return -1; - } // else cur_len still zero - //else fprintf(stderr, "%d\n", lit); - - c->u.xrle.cur_len++; - } - - if (n >= c->u.xrle.cur_len) { - memset(out, c->u.xrle.cur_lit, c->u.xrle.cur_len); - out += c->u.xrle.cur_len; - n -= c->u.xrle.cur_len; - c->u.xrle.cur_len = 0; - } else { - memset(out, c->u.xrle.cur_lit, n); - out += n; - c->u.xrle.cur_len -= n; - n = 0; - } - } - - return 0; -} - -void cram_xrle_decode_free(cram_codec *c) { - if (!c) return; - - if (c->u.xrle.len_codec) - c->u.xrle.len_codec->free(c->u.xrle.len_codec); - - if (c->u.xrle.lit_codec) - c->u.xrle.lit_codec->free(c->u.xrle.lit_codec); - - free(c); -} - -cram_codec *cram_xrle_decode_init(cram_block_compression_hdr *hdr, - char *data, int size, - enum cram_encoding codec, - enum cram_external_type option, - int version, varint_vec *vv) { - cram_codec *c; - char *cp = data; - char *endp = data+size; - int err = 0; - - if (!(c = calloc(1, sizeof(*c)))) - return NULL; - - c->codec = E_XRLE; - if (option == E_LONG) - c->decode = cram_xrle_decode_long; - else if (option == E_INT) - c->decode = cram_xrle_decode_int; - else if (option == E_BYTE_ARRAY || option == E_BYTE) - c->decode = cram_xrle_decode_char; - else { - fprintf(stderr, "BYTE_ARRAYs not supported by this codec\n"); - free(c); - return NULL; - } - c->free = cram_xrle_decode_free; - c->size = cram_xrle_decode_size; - c->get_block = cram_xrle_get_block; - c->describe = NULL; - c->u.xrle.cur_len = 0; - c->u.xrle.cur_lit = -1; - - // RLE map - int i, j, nrle = vv->varint_get32(&cp, endp, &err); - memset(c->u.xrle.rep_score, 0, 256*sizeof(*c->u.xrle.rep_score)); - for (i = 0; i < nrle && i < 256; i++) { - j = vv->varint_get32(&cp, endp, &err); - if (j >= 0 && j < 256) - c->u.xrle.rep_score[j] = 1; - } - - // Length and literal sub encodings - c->u.xrle.len_encoding = vv->varint_get32(&cp, endp, &err); - int sub_size = vv->varint_get32(&cp, endp, &err); - if (sub_size < 0 || endp - cp < sub_size) - goto malformed; - c->u.xrle.len_codec = cram_decoder_init(hdr, c->u.xrle.len_encoding, - cp, sub_size, E_INT, version, vv); - if (c->u.xrle.len_codec == NULL) - goto malformed; - cp += sub_size; - - c->u.xrle.lit_encoding = vv->varint_get32(&cp, endp, &err); - sub_size = vv->varint_get32(&cp, endp, &err); - if (sub_size < 0 || endp - cp < sub_size) - goto malformed; - c->u.xrle.lit_codec = cram_decoder_init(hdr, c->u.xrle.lit_encoding, - cp, sub_size, option, version, vv); - if (c->u.xrle.lit_codec == NULL) - goto malformed; - cp += sub_size; - - if (err) - goto malformed; - - return c; - - malformed: - fprintf(stderr, "Malformed xrle header stream\n"); - cram_xrle_decode_free(c); - return NULL; -} - -int cram_xrle_encode_flush(cram_codec *c) { - uint8_t *out_lit, *out_len; - uint64_t out_lit_size, out_len_size; - uint8_t rle_syms[256]; - int rle_nsyms = 0, i; - - for (i = 0; i < 256; i++) - if (c->u.e_xrle.rep_score[i] > 0) - rle_syms[rle_nsyms++] = i; - - if (!c->u.e_xrle.to_flush) { - c->u.e_xrle.to_flush = (char *)BLOCK_DATA(c->out); - c->u.e_xrle.to_flush_size = BLOCK_SIZE(c->out); - } - - out_len = malloc(c->u.e_xrle.to_flush_size+8); - if (!out_len) - return -1; - - int nb = var_put_u64(out_len, NULL, c->u.e_xrle.to_flush_size); - - out_lit = hts_rle_encode((uint8_t *)c->u.e_xrle.to_flush, c->u.e_xrle.to_flush_size, - out_len+nb, &out_len_size, - rle_syms, &rle_nsyms, - NULL, &out_lit_size); - out_len_size += nb; - - - // TODO: can maybe "gift" the sub codec the data block, to remove - // one level of memcpy. - if (c->u.e_xrle.len_codec->encode(NULL, - c->u.e_xrle.len_codec, - (char *)out_len, out_len_size)) - return -1; - - if (c->u.e_xrle.lit_codec->encode(NULL, - c->u.e_xrle.lit_codec, - (char *)out_lit, out_lit_size)) - return -1; - - free(out_len); - free(out_lit); - - return 0; -} - -int cram_xrle_encode_store(cram_codec *c, cram_block *b, - char *prefix, int version) { - int len = 0, r = 0, n; - cram_codec *tc; - cram_block *b_rle, *b_len, *b_lit; - - if (prefix) { - size_t l = strlen(prefix); - BLOCK_APPEND(b, prefix, l); - len += l; - } - - // List of symbols to RLE - b_rle = cram_new_block(0, 0); - if (!b_rle) - return -1; - int i, nrle = 0, len1 = 0; - for (i = 0; i < 256; i++) { - if (c->u.e_xrle.rep_score[i] > 0) { - nrle++; - len1 += (n = c->vv->varint_put32_blk(b_rle,i)); r |= n; - } - } - - // Store length and literal sub-codecs to get encoded length - tc = c->u.e_xrle.len_codec; - b_len = cram_new_block(0, 0); - if (!b_len) - return -1; - int len2 = tc->store(tc, b_len, NULL, version); - - tc = c->u.e_xrle.lit_codec; - b_lit = cram_new_block(0, 0); - if (!b_lit) - return -1; - int len3 = tc->store(tc, b_lit, NULL, version); - - len += (n = c->vv->varint_put32_blk(b, c->codec)); r |= n; - len += (n = c->vv->varint_put32_blk(b, len1 + len2 + len3 - + c->vv->varint_size(nrle))); r |= n; - len += (n = c->vv->varint_put32_blk(b, nrle)); r |= n; - BLOCK_APPEND(b, BLOCK_DATA(b_rle), BLOCK_SIZE(b_rle)); - BLOCK_APPEND(b, BLOCK_DATA(b_len), BLOCK_SIZE(b_len)); - BLOCK_APPEND(b, BLOCK_DATA(b_lit), BLOCK_SIZE(b_lit)); - - cram_free_block(b_rle); - cram_free_block(b_len); - cram_free_block(b_lit); - - if (r > 0) - return len + len1 + len2 + len3; - - block_err: - return -1; -} - -int cram_xrle_encode_long(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - // TODO if and when needed - return -1; -} - -int cram_xrle_encode_int(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - // TODO if and when needed - return -1; -} - -int cram_xrle_encode_char(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - if (c->u.e_xrle.to_flush) { - if (!c->out && !(c->out = cram_new_block(0, 0))) - return -1; - BLOCK_APPEND(c->out, c->u.e_xrle.to_flush, c->u.e_xrle.to_flush_size); - c->u.e_xrle.to_flush = NULL; - c->u.e_xrle.to_flush_size = 0; - } - - if (c->out && BLOCK_SIZE(c->out) > 0) { - // Gathering data - BLOCK_APPEND(c->out, in, in_size); - return 0; - } - - // else cache copy of the data we're about to send to flush instead. - c->u.e_xrle.to_flush = in; - c->u.e_xrle.to_flush_size = in_size; - return 0; - - block_err: - return -1; -} - -void cram_xrle_encode_free(cram_codec *c) { - if (!c) return; - - if (c->u.e_xrle.len_codec) - c->u.e_xrle.len_codec->free(c->u.e_xrle.len_codec); - if (c->u.e_xrle.lit_codec) - c->u.e_xrle.lit_codec->free(c->u.e_xrle.lit_codec); - - cram_free_block(c->out); - - free(c); -} - -cram_codec *cram_xrle_encode_init(cram_stats *st, - enum cram_encoding codec, - enum cram_external_type option, - void *dat, - int version, varint_vec *vv) { - cram_codec *c; - - if (!(c = malloc(sizeof(*c)))) - return NULL; - - c->codec = E_XRLE; - c->free = cram_xrle_encode_free; - if (option == E_LONG) - c->encode = cram_xrle_encode_long; - else if (option == E_INT) - c->encode = cram_xrle_encode_int; - else - c->encode = cram_xrle_encode_char; - c->store = cram_xrle_encode_store; - c->flush = cram_xrle_encode_flush; - - cram_xrle_encoder *e = (cram_xrle_encoder *)dat; - - c->u.e_xrle.len_codec = cram_encoder_init(e->len_encoding, NULL, - E_BYTE, e->len_dat, - version, vv); - c->u.e_xrle.lit_codec = cram_encoder_init(e->lit_encoding, NULL, - E_BYTE, e->lit_dat, - version, vv); - c->u.e_xrle.cur_lit = -1; - c->u.e_xrle.cur_len = -1; - c->u.e_xrle.to_flush = NULL; - c->u.e_xrle.to_flush_size = 0; - - memcpy(c->u.e_xrle.rep_score, e->rep_score, 256*sizeof(*c->u.e_xrle.rep_score)); - - return c; -} - -/* - * --------------------------------------------------------------------------- - * SUBEXP - */ -int cram_subexp_decode(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) { - int32_t *out_i = (int32_t *)out; - int n, count; - int k = c->u.subexp.k; - - for (count = 0, n = *out_size; count < n; count++) { - int i = 0, tail; - int val; - - /* Get number of 1s */ - //while (get_bit_MSB(in) == 1) i++; - i = get_one_bits_MSB(in); - if (i < 0 || cram_not_enough_bits(in, i > 0 ? i + k - 1 : k)) - return -1; - /* - * Val is - * i > 0: 2^(k+i-1) + k+i-1 bits - * i = 0: k bits - */ - if (i) { - tail = i + k-1; - val = 0; - while (tail) { - //val = val<<1; val |= get_bit_MSB(in); - GET_BIT_MSB(in, val); - tail--; - } - val += 1 << (i + k-1); - } else { - tail = k; - val = 0; - while (tail) { - //val = val<<1; val |= get_bit_MSB(in); - GET_BIT_MSB(in, val); - tail--; - } - } - - out_i[count] = val - c->u.subexp.offset; - } - - return 0; -} - -void cram_subexp_decode_free(cram_codec *c) { - if (c) - free(c); -} - -int cram_subexp_describe(cram_codec *c, kstring_t *ks) { - return ksprintf(ks, "SUBEXP(offset=%d,k=%d)", - c->u.subexp.offset, - c->u.subexp.k) - < 0 ? -1 : 0; -} - -cram_codec *cram_subexp_decode_init(cram_block_compression_hdr *hdr, - char *data, int size, - enum cram_encoding codec, - enum cram_external_type option, - int version, varint_vec *vv) { - cram_codec *c; - char *cp = data; - - if (option != E_INT) { - hts_log_error("This codec only supports INT encodings"); - return NULL; - } - - if (!(c = malloc(sizeof(*c)))) - return NULL; - - c->codec = E_SUBEXP; - c->decode = cram_subexp_decode; - c->free = cram_subexp_decode_free; - c->describe = cram_subexp_describe; - c->u.subexp.k = -1; - - c->u.subexp.offset = vv->varint_get32(&cp, data + size, NULL); - c->u.subexp.k = vv->varint_get32(&cp, data + size, NULL); - - if (cp - data != size || c->u.subexp.k < 0) { - hts_log_error("Malformed subexp header stream"); - free(c); - return NULL; - } - - return c; -} - -/* - * --------------------------------------------------------------------------- - * GAMMA - */ -int cram_gamma_decode(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) { - int32_t *out_i = (int32_t *)out; - int i, n; - - for (i = 0, n = *out_size; i < n; i++) { - int nz = 0; - int val; - //while (get_bit_MSB(in) == 0) nz++; - nz = get_zero_bits_MSB(in); - if (cram_not_enough_bits(in, nz)) - return -1; - val = 1; - while (nz > 0) { - //val <<= 1; val |= get_bit_MSB(in); - GET_BIT_MSB(in, val); - nz--; - } - - out_i[i] = val - c->u.gamma.offset; - } - - return 0; -} - -void cram_gamma_decode_free(cram_codec *c) { - if (c) - free(c); -} - -int cram_gamma_describe(cram_codec *c, kstring_t *ks) { - return ksprintf(ks, "GAMMA(offset=%d)", c->u.subexp.offset) - < 0 ? -1 : 0; -} - -cram_codec *cram_gamma_decode_init(cram_block_compression_hdr *hdr, - char *data, int size, - enum cram_encoding codec, - enum cram_external_type option, - int version, varint_vec *vv) { - cram_codec *c = NULL; - char *cp = data; - - if (option != E_INT) { - hts_log_error("This codec only supports INT encodings"); - return NULL; - } - - if (size < 1) - goto malformed; - - if (!(c = malloc(sizeof(*c)))) - return NULL; - - c->codec = E_GAMMA; - c->decode = cram_gamma_decode; - c->free = cram_gamma_decode_free; - c->describe = cram_gamma_describe; - - c->u.gamma.offset = vv->varint_get32(&cp, data+size, NULL); - - if (cp - data != size) - goto malformed; - - return c; - - malformed: - hts_log_error("Malformed gamma header stream"); - free(c); - return NULL; -} - -/* - * --------------------------------------------------------------------------- - * HUFFMAN - */ - -static int code_sort(const void *vp1, const void *vp2) { - const cram_huffman_code *c1 = (const cram_huffman_code *)vp1; - const cram_huffman_code *c2 = (const cram_huffman_code *)vp2; - - if (c1->len != c2->len) - return c1->len - c2->len; - else - return c1->symbol < c2->symbol ? -1 : (c1->symbol > c2->symbol ? 1 : 0); -} - -void cram_huffman_decode_free(cram_codec *c) { - if (!c) - return; - - if (c->u.huffman.codes) - free(c->u.huffman.codes); - free(c); -} - -int cram_huffman_decode_null(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, int *out_size) { - return -1; -} - -int cram_huffman_decode_char0(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, int *out_size) { - int i, n; - - if (!out) - return 0; - - /* Special case of 0 length codes */ - for (i = 0, n = *out_size; i < n; i++) { - out[i] = c->u.huffman.codes[0].symbol; - } - return 0; -} - -int cram_huffman_decode_char(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, int *out_size) { - int i, n, ncodes = c->u.huffman.ncodes; - const cram_huffman_code * const codes = c->u.huffman.codes; - - for (i = 0, n = *out_size; i < n; i++) { - int idx = 0; - int val = 0, len = 0, last_len = 0; - - for (;;) { - int dlen = codes[idx].len - last_len; - if (cram_not_enough_bits(in, dlen)) - return -1; - - //val <<= dlen; - //val |= get_bits_MSB(in, dlen); - //last_len = (len += dlen); - - last_len = (len += dlen); - for (; dlen; dlen--) GET_BIT_MSB(in, val); - - idx = val - codes[idx].p; - if (idx >= ncodes || idx < 0) - return -1; - - if (codes[idx].code == val && codes[idx].len == len) { - if (out) out[i] = codes[idx].symbol; - break; - } - } - } - - return 0; -} - -int cram_huffman_decode_int0(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, int *out_size) { - int32_t *out_i = (int32_t *)out; - int i, n; - const cram_huffman_code * const codes = c->u.huffman.codes; - - /* Special case of 0 length codes */ - for (i = 0, n = *out_size; i < n; i++) { - out_i[i] = codes[0].symbol; - } - return 0; -} - -int cram_huffman_decode_int(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, int *out_size) { - int32_t *out_i = (int32_t *)out; - int i, n, ncodes = c->u.huffman.ncodes; - const cram_huffman_code * const codes = c->u.huffman.codes; - - for (i = 0, n = *out_size; i < n; i++) { - int idx = 0; - int val = 0, len = 0, last_len = 0; - - // Now one bit at a time for remaining checks - for (;;) { - int dlen = codes[idx].len - last_len; - if (cram_not_enough_bits(in, dlen)) - return -1; - - //val <<= dlen; - //val |= get_bits_MSB(in, dlen); - //last_len = (len += dlen); - - last_len = (len += dlen); - for (; dlen; dlen--) GET_BIT_MSB(in, val); - - idx = val - codes[idx].p; - if (idx >= ncodes || idx < 0) - return -1; - - if (codes[idx].code == val && codes[idx].len == len) { - out_i[i] = codes[idx].symbol; - break; - } - } - } - - return 0; -} - -int cram_huffman_decode_long0(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, int *out_size) { - int64_t *out_i = (int64_t *)out; - int i, n; - const cram_huffman_code * const codes = c->u.huffman.codes; - - /* Special case of 0 length codes */ - for (i = 0, n = *out_size; i < n; i++) { - out_i[i] = codes[0].symbol; - } - return 0; -} - -int cram_huffman_decode_long(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, int *out_size) { - int64_t *out_i = (int64_t *)out; - int i, n, ncodes = c->u.huffman.ncodes; - const cram_huffman_code * const codes = c->u.huffman.codes; - - for (i = 0, n = *out_size; i < n; i++) { - int idx = 0; - int val = 0, len = 0, last_len = 0; - - // Now one bit at a time for remaining checks - for (;;) { - int dlen = codes[idx].len - last_len; - if (cram_not_enough_bits(in, dlen)) - return -1; - - //val <<= dlen; - //val |= get_bits_MSB(in, dlen); - //last_len = (len += dlen); - - last_len = (len += dlen); - for (; dlen; dlen--) GET_BIT_MSB(in, val); - - idx = val - codes[idx].p; - if (idx >= ncodes || idx < 0) - return -1; - - if (codes[idx].code == val && codes[idx].len == len) { - out_i[i] = codes[idx].symbol; - break; - } - } - } - - return 0; -} - -int cram_huffman_describe(cram_codec *c, kstring_t *ks) { - int r = 0, n; - r |= ksprintf(ks, "HUFFMAN(codes={") < 0; - for (n = 0; n < c->u.huffman.ncodes; n++) { - r |= ksprintf(ks, "%s%"PRId64, n?",":"", - c->u.huffman.codes[n].symbol); - } - r |= ksprintf(ks, "},lengths={") < 0; - for (n = 0; n < c->u.huffman.ncodes; n++) { - r |= ksprintf(ks, "%s%d", n?",":"", - c->u.huffman.codes[n].len); - } - r |= ksprintf(ks, "})") < 0; - return r; -} - -/* - * Initialises a huffman decoder from an encoding data stream. - */ -cram_codec *cram_huffman_decode_init(cram_block_compression_hdr *hdr, - char *data, int size, - enum cram_encoding codec, - enum cram_external_type option, - int version, varint_vec *vv) { - int32_t ncodes = 0, i, j; - char *cp = data, *data_end = &data[size]; - cram_codec *h; - cram_huffman_code *codes = NULL; - int32_t val, last_len, max_len = 0; - uint32_t max_val; // needs one more bit than val - const int max_code_bits = sizeof(val) * 8 - 1; - int err = 0; - - if (option == E_BYTE_ARRAY_BLOCK) { - hts_log_error("BYTE_ARRAYs not supported by this codec"); - return NULL; - } - - ncodes = vv->varint_get32(&cp, data_end, &err); - if (ncodes < 0) { - hts_log_error("Invalid number of symbols in huffman stream"); - return NULL; - } - if (ncodes >= SIZE_MAX / sizeof(*codes)) { - errno = ENOMEM; - return NULL; - } - - h = calloc(1, sizeof(*h)); - if (!h) - return NULL; - - h->codec = E_HUFFMAN; - h->free = cram_huffman_decode_free; - - h->u.huffman.ncodes = ncodes; - h->u.huffman.option = option; - if (ncodes) { - codes = h->u.huffman.codes = malloc(ncodes * sizeof(*codes)); - if (!codes) { - free(h); - return NULL; - } - } else { - codes = h->u.huffman.codes = NULL; - } - - /* Read symbols and bit-lengths */ - if (option == E_LONG) { - for (i = 0; i < ncodes; i++) - codes[i].symbol = vv->varint_get64(&cp, data_end, &err); - } else if (option == E_INT || option == E_BYTE) { - for (i = 0; i < ncodes; i++) - codes[i].symbol = vv->varint_get32(&cp, data_end, &err); - } else { - goto malformed; - } - - if (err) - goto malformed; - - i = vv->varint_get32(&cp, data_end, &err); - if (i != ncodes) - goto malformed; - - if (ncodes == 0) { - /* NULL huffman stream. Ensure it returns an error if - anything tries to use it. */ - h->decode = cram_huffman_decode_null; - return h; - } - - for (i = 0; i < ncodes; i++) { - codes[i].len = vv->varint_get32(&cp, data_end, &err); - if (err) - break; - if (codes[i].len < 0) { - hts_log_error("Huffman code length (%d) is negative", codes[i].len); - goto malformed; - } - if (max_len < codes[i].len) - max_len = codes[i].len; - } - if (err || cp - data != size || max_len >= ncodes) - goto malformed; - - /* 31 is max. bits available in val */ - if (max_len > max_code_bits) { - hts_log_error("Huffman code length (%d) is greater " - "than maximum supported (%d)", max_len, max_code_bits); - goto malformed; - } - - /* Sort by bit length and then by symbol value */ - qsort(codes, ncodes, sizeof(*codes), code_sort); - - /* Assign canonical codes */ - val = -1, last_len = 0, max_val = 0; - for (i = 0; i < ncodes; i++) { - val++; - if (val > max_val) - goto malformed; - - if (codes[i].len > last_len) { - val <<= (codes[i].len - last_len); - last_len = codes[i].len; - max_val = (1U << codes[i].len) - 1; - } - codes[i].code = val; - } - - /* - * Compute the next starting point, offset by the i'th value. - * For example if codes 10, 11, 12, 13 are 30, 31, 32, 33 then - * codes[10..13].p = 30 - 10. - */ - last_len = 0; - for (i = j = 0; i < ncodes; i++) { - if (codes[i].len > last_len) { - j = codes[i].code - i; - last_len = codes[i].len; - } - codes[i].p = j; - } - - // puts("==HUFF LEN=="); - // for (i = 0; i <= last_len+1; i++) { - // printf("len %d=%d prefix %d\n", i, h->u.huffman.lengths[i], h->u.huffman.prefix[i]); - // } - // puts("===HUFFMAN CODES==="); - // for (i = 0; i < ncodes; i++) { - // int j; - // printf("%d: %d %d %d ", i, codes[i].symbol, codes[i].len, codes[i].code); - // j = codes[i].len; - // while (j) { - // putchar(codes[i].code & (1 << --j) ? '1' : '0'); - // } - // printf(" %d\n", codes[i].code); - // } - - if (option == E_BYTE || option == E_BYTE_ARRAY) { - if (h->u.huffman.codes[0].len == 0) - h->decode = cram_huffman_decode_char0; - else - h->decode = cram_huffman_decode_char; - } else if (option == E_LONG || option == E_SLONG) { - if (h->u.huffman.codes[0].len == 0) - h->decode = cram_huffman_decode_long0; - else - h->decode = cram_huffman_decode_long; - } else if (option == E_INT || option == E_SINT || option == E_BYTE) { - if (h->u.huffman.codes[0].len == 0) - h->decode = cram_huffman_decode_int0; - else - h->decode = cram_huffman_decode_int; - } else { - return NULL; - } - h->describe = cram_huffman_describe; - - return (cram_codec *)h; - - malformed: - hts_log_error("Malformed huffman header stream"); - free(codes); - free(h); - return NULL; -} - -int cram_huffman_encode_char0(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - return 0; -} - -int cram_huffman_encode_char(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - int i, code, len, r = 0; - unsigned char *syms = (unsigned char *)in; - - while (in_size--) { - int sym = *syms++; - if (sym >= -1 && sym < MAX_HUFF) { - i = c->u.e_huffman.val2code[sym+1]; - assert(c->u.e_huffman.codes[i].symbol == sym); - code = c->u.e_huffman.codes[i].code; - len = c->u.e_huffman.codes[i].len; - } else { - /* Slow - use a lookup table for when sym < MAX_HUFF? */ - for (i = 0; i < c->u.e_huffman.nvals; i++) { - if (c->u.e_huffman.codes[i].symbol == sym) - break; - } - if (i == c->u.e_huffman.nvals) - return -1; - - code = c->u.e_huffman.codes[i].code; - len = c->u.e_huffman.codes[i].len; - } - - r |= store_bits_MSB(c->out, code, len); - } - - return r; -} - -int cram_huffman_encode_int0(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - return 0; -} - -int cram_huffman_encode_int(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - int i, code, len, r = 0; - int *syms = (int *)in; - - while (in_size--) { - int sym = *syms++; - - if (sym >= -1 && sym < MAX_HUFF) { - i = c->u.e_huffman.val2code[sym+1]; - assert(c->u.e_huffman.codes[i].symbol == sym); - code = c->u.e_huffman.codes[i].code; - len = c->u.e_huffman.codes[i].len; - } else { - /* Slow - use a lookup table for when sym < MAX_HUFFMAN_SYM? */ - for (i = 0; i < c->u.e_huffman.nvals; i++) { - if (c->u.e_huffman.codes[i].symbol == sym) - break; - } - if (i == c->u.e_huffman.nvals) - return -1; - - code = c->u.e_huffman.codes[i].code; - len = c->u.e_huffman.codes[i].len; - } - - r |= store_bits_MSB(c->out, code, len); - } - - return r; -} - -int cram_huffman_encode_long0(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - return 0; -} - -int cram_huffman_encode_long(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - int i, code, len, r = 0; - int64_t *syms = (int64_t *)in; - - while (in_size--) { - int sym = *syms++; - - if (sym >= -1 && sym < MAX_HUFF) { - i = c->u.e_huffman.val2code[sym+1]; - assert(c->u.e_huffman.codes[i].symbol == sym); - code = c->u.e_huffman.codes[i].code; - len = c->u.e_huffman.codes[i].len; - } else { - /* Slow - use a lookup table for when sym < MAX_HUFFMAN_SYM? */ - for (i = 0; i < c->u.e_huffman.nvals; i++) { - if (c->u.e_huffman.codes[i].symbol == sym) - break; - } - if (i == c->u.e_huffman.nvals) - return -1; - - code = c->u.e_huffman.codes[i].code; - len = c->u.e_huffman.codes[i].len; - } - - r |= store_bits_MSB(c->out, code, len); - } - - return r; -} - -void cram_huffman_encode_free(cram_codec *c) { - if (!c) - return; - - if (c->u.e_huffman.codes) - free(c->u.e_huffman.codes); - free(c); -} - -/* - * Encodes a huffman tree. - * Returns number of bytes written. - */ -int cram_huffman_encode_store(cram_codec *c, cram_block *b, char *prefix, - int version) { - int i, len = 0, r = 0, n; - cram_huffman_code *codes = c->u.e_huffman.codes; - /* - * Up to code length 127 means 2.5e+26 bytes of data required (worst - * case huffman tree needs symbols with freqs matching the Fibonacci - * series). So guaranteed 1 byte per code. - * - * Symbols themselves could be 5 bytes (eg -1 is 5 bytes in itf8). - * - * Therefore 6*ncodes + 5 + 5 + 1 + 5 is max memory - */ - char *tmp = malloc(6*c->u.e_huffman.nvals+16); - char *tp = tmp, *tpend = tmp+6*c->u.e_huffman.nvals+16; - - if (!tmp) - return -1; - - if (prefix) { - size_t l = strlen(prefix); - BLOCK_APPEND(b, prefix, l); - len += l; - } - - tp += c->vv->varint_put32(tp, tpend, c->u.e_huffman.nvals); - if (c->u.e_huffman.option == E_LONG) { - for (i = 0; i < c->u.e_huffman.nvals; i++) { - tp += c->vv->varint_put64(tp, tpend, codes[i].symbol); - } - } else if (c->u.e_huffman.option == E_SLONG) { - for (i = 0; i < c->u.e_huffman.nvals; i++) { - tp += c->vv->varint_put64s(tp, tpend, codes[i].symbol); - } - } else if (c->u.e_huffman.option == E_INT || c->u.e_huffman.option == E_BYTE) { - for (i = 0; i < c->u.e_huffman.nvals; i++) { - tp += c->vv->varint_put32(tp, tpend, codes[i].symbol); - } - } else if (c->u.e_huffman.option == E_SINT) { - for (i = 0; i < c->u.e_huffman.nvals; i++) { - tp += c->vv->varint_put32s(tp, tpend, codes[i].symbol); - } - } else { - return -1; - } - - tp += c->vv->varint_put32(tp, tpend, c->u.e_huffman.nvals); - for (i = 0; i < c->u.e_huffman.nvals; i++) - tp += c->vv->varint_put32(tp, tpend, codes[i].len); - - len += (n = c->vv->varint_put32_blk(b, c->codec)); r |= n; - len += (n = c->vv->varint_put32_blk(b, tp-tmp)); r |= n; - BLOCK_APPEND(b, tmp, tp-tmp); - len += tp-tmp; - - free(tmp); - - if (r > 0) - return len; - - block_err: - return -1; -} - -cram_codec *cram_huffman_encode_init(cram_stats *st, - enum cram_encoding codec, - enum cram_external_type option, - void *dat, - int version, varint_vec *vv) { - int *vals = NULL, *freqs = NULL, *lens = NULL, code, len; - int *new_vals, *new_freqs; - int i, max_val = 0, min_val = INT_MAX, k; - size_t nvals, vals_alloc = 0; - cram_codec *c; - cram_huffman_code *codes; - - c = malloc(sizeof(*c)); - if (!c) - return NULL; - c->codec = E_HUFFMAN; - - /* Count number of unique symbols */ - for (nvals = i = 0; i < MAX_STAT_VAL; i++) { - if (!st->freqs[i]) - continue; - if (nvals >= vals_alloc) { - vals_alloc = vals_alloc ? vals_alloc*2 : 1024; - new_vals = realloc(vals, vals_alloc * sizeof(int)); - if (!new_vals) goto nomem; - vals = new_vals; - new_freqs = realloc(freqs, vals_alloc * sizeof(int)); - if (!new_freqs) goto nomem; - freqs = new_freqs; - } - vals[nvals] = i; - freqs[nvals] = st->freqs[i]; - assert(st->freqs[i] > 0); - if (max_val < i) max_val = i; - if (min_val > i) min_val = i; - nvals++; - } - if (st->h) { - khint_t k; - - for (k = kh_begin(st->h); k != kh_end(st->h); k++) { - if (!kh_exist(st->h, k)) - continue; - if (nvals >= vals_alloc) { - vals_alloc = vals_alloc ? vals_alloc*2 : 1024; - new_vals = realloc(vals, vals_alloc * sizeof(int)); - if (!new_vals) goto nomem; - vals = new_vals; - new_freqs = realloc(freqs, vals_alloc * sizeof(int)); - if (!new_freqs) goto nomem; - freqs = new_freqs; - } - vals[nvals]= kh_key(st->h, k); - freqs[nvals] = kh_val(st->h, k); - assert(freqs[nvals] > 0); - if (max_val < i) max_val = i; - if (min_val > i) min_val = i; - nvals++; - } - } - - assert(nvals > 0); - - new_freqs = realloc(freqs, 2*nvals*sizeof(*freqs)); - if (!new_freqs) goto nomem; - freqs = new_freqs; - lens = calloc(2*nvals, sizeof(*lens)); - if (!lens) goto nomem; - - /* Inefficient, use pointers to form chain so we can insert and maintain - * a sorted list? This is currently O(nvals^2) complexity. - */ - for (;;) { - int low1 = INT_MAX, low2 = INT_MAX; - int ind1 = 0, ind2 = 0; - for (i = 0; i < nvals; i++) { - if (freqs[i] < 0) - continue; - if (low1 > freqs[i]) - low2 = low1, ind2 = ind1, low1 = freqs[i], ind1 = i; - else if (low2 > freqs[i]) - low2 = freqs[i], ind2 = i; - } - if (low2 == INT_MAX) - break; - - freqs[nvals] = low1 + low2; - lens[ind1] = nvals; - lens[ind2] = nvals; - freqs[ind1] *= -1; - freqs[ind2] *= -1; - nvals++; - } - nvals = nvals/2+1; - - /* Assign lengths */ - for (i = 0; i < nvals; i++) { - int code_len = 0; - for (k = lens[i]; k; k = lens[k]) - code_len++; - lens[i] = code_len; - freqs[i] *= -1; - //fprintf(stderr, "%d / %d => %d\n", vals[i], freqs[i], lens[i]); - } - - - /* Sort, need in a struct */ - if (!(codes = malloc(nvals * sizeof(*codes)))) - goto nomem; - for (i = 0; i < nvals; i++) { - codes[i].symbol = vals[i]; - codes[i].len = lens[i]; - } - qsort(codes, nvals, sizeof(*codes), code_sort); - - /* - * Generate canonical codes from lengths. - * Sort by length. - * Start with 0. - * Every new code of same length is +1. - * Every new code of new length is +1 then <<1 per extra length. - * - * /\ - * a/\ - * /\/\ - * bcd/\ - * ef - * - * a 1 0 - * b 3 4 (0+1)<<2 - * c 3 5 - * d 3 6 - * e 4 14 (6+1)<<1 - * f 5 15 - */ - code = 0; len = codes[0].len; - for (i = 0; i < nvals; i++) { - while (len != codes[i].len) { - code<<=1; - len++; - } - codes[i].code = code++; - - if (codes[i].symbol >= -1 && codes[i].symbol < MAX_HUFF) - c->u.e_huffman.val2code[codes[i].symbol+1] = i; - - //fprintf(stderr, "sym %d, code %d, len %d\n", - // codes[i].symbol, codes[i].code, codes[i].len); - } - - free(lens); - free(vals); - free(freqs); - - c->u.e_huffman.codes = codes; - c->u.e_huffman.nvals = nvals; - c->u.e_huffman.option = option; - - c->free = cram_huffman_encode_free; - if (option == E_BYTE || option == E_BYTE_ARRAY) { - if (c->u.e_huffman.codes[0].len == 0) - c->encode = cram_huffman_encode_char0; - else - c->encode = cram_huffman_encode_char; - } else if (option == E_INT || option == E_SINT) { - if (c->u.e_huffman.codes[0].len == 0) - c->encode = cram_huffman_encode_int0; - else - c->encode = cram_huffman_encode_int; - } else if (option == E_LONG || option == E_SLONG) { - if (c->u.e_huffman.codes[0].len == 0) - c->encode = cram_huffman_encode_long0; - else - c->encode = cram_huffman_encode_long; - } else { - return NULL; - } - c->store = cram_huffman_encode_store; - c->flush = NULL; - - return c; - - nomem: - hts_log_error("Out of memory"); - free(vals); - free(freqs); - free(lens); - free(c); - return NULL; -} - -/* - * --------------------------------------------------------------------------- - * BYTE_ARRAY_LEN - */ -int cram_byte_array_len_decode(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, - int *out_size) { - /* Fetch length */ - int32_t len = 0, one = 1; - int r; - - r = c->u.byte_array_len.len_codec->decode(slice, c->u.byte_array_len.len_codec, - in, (char *)&len, &one); - //printf("ByteArray Len=%d\n", len); - - if (!r && c->u.byte_array_len.val_codec && len >= 0) { - r = c->u.byte_array_len.val_codec->decode(slice, - c->u.byte_array_len.val_codec, - in, out, &len); - } else { - return -1; - } - - *out_size = len; - - return r; -} - -void cram_byte_array_len_decode_free(cram_codec *c) { - if (!c) return; - - if (c->u.byte_array_len.len_codec) - c->u.byte_array_len.len_codec->free(c->u.byte_array_len.len_codec); - - if (c->u.byte_array_len.val_codec) - c->u.byte_array_len.val_codec->free(c->u.byte_array_len.val_codec); - - free(c); -} - -int cram_byte_array_len_describe(cram_codec *c, kstring_t *ks) { - int r = 0; - r |= ksprintf(ks, "BYTE_ARRAY_LEN(len_codec={") < 0; - cram_byte_array_len_decoder *l = &c->u.byte_array_len; - r |= l->len_codec->describe - ? l->len_codec->describe(l->len_codec, ks) - : (ksprintf(ks, "?")<0); - r |= ksprintf(ks, "},val_codec={") < 0; - r |= l->val_codec->describe - ? l->val_codec->describe(l->val_codec, ks) - : (ksprintf(ks, "?")<0); - r |= ksprintf(ks, "}") < 0; - - return r; -} - -cram_codec *cram_byte_array_len_decode_init(cram_block_compression_hdr *hdr, - char *data, int size, - enum cram_encoding codec, - enum cram_external_type option, - int version, varint_vec *vv) { - cram_codec *c; - char *cp = data; - char *endp = data + size; - - if (!(c = malloc(sizeof(*c)))) - return NULL; - - c->codec = E_BYTE_ARRAY_LEN; - c->decode = cram_byte_array_len_decode; - c->free = cram_byte_array_len_decode_free; - c->describe = cram_byte_array_len_describe; - c->u.byte_array_len.len_codec = NULL; - c->u.byte_array_len.val_codec = NULL; - - int encoding = vv->varint_get32(&cp, endp, NULL); - int sub_size = vv->varint_get32(&cp, endp, NULL); - if (sub_size < 0 || endp - cp < sub_size) - goto malformed; - c->u.byte_array_len.len_codec = cram_decoder_init(hdr, encoding, cp, sub_size, - E_INT, version, vv); - if (c->u.byte_array_len.len_codec == NULL) - goto no_codec; - cp += sub_size; - - encoding = vv->varint_get32(&cp, endp, NULL); - sub_size = vv->varint_get32(&cp, endp, NULL); - if (sub_size < 0 || endp - cp < sub_size) - goto malformed; - c->u.byte_array_len.val_codec = cram_decoder_init(hdr, encoding, cp, sub_size, - option, version, vv); - if (c->u.byte_array_len.val_codec == NULL) - goto no_codec; - cp += sub_size; - - if (cp - data != size) - goto malformed; - - return c; - - malformed: - hts_log_error("Malformed byte_array_len header stream"); - no_codec: - cram_byte_array_len_decode_free(c); - return NULL; -} - -int cram_byte_array_len_encode(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - int32_t i32 = in_size; - int r = 0; - - r |= c->u.e_byte_array_len.len_codec->encode(slice, - c->u.e_byte_array_len.len_codec, - (char *)&i32, 1); - r |= c->u.e_byte_array_len.val_codec->encode(slice, - c->u.e_byte_array_len.val_codec, - in, in_size); - return r; -} - -void cram_byte_array_len_encode_free(cram_codec *c) { - if (!c) - return; - - if (c->u.e_byte_array_len.len_codec) - c->u.e_byte_array_len.len_codec->free(c->u.e_byte_array_len.len_codec); - - if (c->u.e_byte_array_len.val_codec) - c->u.e_byte_array_len.val_codec->free(c->u.e_byte_array_len.val_codec); - - free(c); -} - -int cram_byte_array_len_encode_store(cram_codec *c, cram_block *b, - char *prefix, int version) { - int len = 0, len2, len3, r = 0, n; - cram_codec *tc; - cram_block *b_len = NULL, *b_val = NULL; - - if (prefix) { - size_t l = strlen(prefix); - BLOCK_APPEND(b, prefix, l); - len += l; - } - - tc = c->u.e_byte_array_len.len_codec; - b_len = cram_new_block(0, 0); - if (!b_len) goto block_err; - len2 = tc->store(tc, b_len, NULL, version); - if (len2 < 0) goto block_err; - - tc = c->u.e_byte_array_len.val_codec; - b_val = cram_new_block(0, 0); - if (!b_val) goto block_err; - len3 = tc->store(tc, b_val, NULL, version); - if (len3 < 0) goto block_err; - - len += (n = c->vv->varint_put32_blk(b, c->codec)); r |= n; - len += (n = c->vv->varint_put32_blk(b, len2+len3)); r |= n; - BLOCK_APPEND(b, BLOCK_DATA(b_len), BLOCK_SIZE(b_len)); - BLOCK_APPEND(b, BLOCK_DATA(b_val), BLOCK_SIZE(b_val)); - - cram_free_block(b_len); - cram_free_block(b_val); - - if (r > 0) - return len + len2 + len3; - - block_err: - if (b_len) cram_free_block(b_len); - if (b_val) cram_free_block(b_val); - return -1; -} - -cram_codec *cram_byte_array_len_encode_init(cram_stats *st, - enum cram_encoding codec, - enum cram_external_type option, - void *dat, - int version, varint_vec *vv) { - cram_codec *c; - cram_byte_array_len_encoder *e = (cram_byte_array_len_encoder *)dat; - - c = malloc(sizeof(*c)); - if (!c) - return NULL; - c->codec = E_BYTE_ARRAY_LEN; - c->free = cram_byte_array_len_encode_free; - c->encode = cram_byte_array_len_encode; - c->store = cram_byte_array_len_encode_store; - c->flush = NULL; - - c->u.e_byte_array_len.len_codec = cram_encoder_init(e->len_encoding, - st, E_INT, - e->len_dat, - version, vv); - c->u.e_byte_array_len.val_codec = cram_encoder_init(e->val_encoding, - NULL, E_BYTE_ARRAY, - e->val_dat, - version, vv); - - if (!c->u.e_byte_array_len.len_codec || - !c->u.e_byte_array_len.val_codec) { - cram_byte_array_len_encode_free(c); - return NULL; - } - - return c; -} - -/* - * --------------------------------------------------------------------------- - * BYTE_ARRAY_STOP - */ -static int cram_byte_array_stop_decode_char(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, - int *out_size) { - char *cp, ch; - cram_block *b = NULL; - - b = cram_get_block_by_id(slice, c->u.byte_array_stop.content_id); - if (!b) - return *out_size?-1:0; - - if (b->idx >= b->uncomp_size) - return -1; - - cp = (char *)b->data + b->idx; - if (out) { - while ((ch = *cp) != (char)c->u.byte_array_stop.stop) { - if (cp - (char *)b->data >= b->uncomp_size) - return -1; - *out++ = ch; - cp++; - } - } else { - // Consume input, but produce no output - while ((ch = *cp) != (char)c->u.byte_array_stop.stop) { - if (cp - (char *)b->data >= b->uncomp_size) - return -1; - cp++; - } - } - - *out_size = cp - (char *)(b->data + b->idx); - b->idx = cp - (char *)b->data + 1; - - return 0; -} - -int cram_byte_array_stop_decode_block(cram_slice *slice, cram_codec *c, - cram_block *in, char *out_, - int *out_size) { - cram_block *b; - cram_block *out = (cram_block *)out_; - unsigned char *cp, *cp_end; - unsigned char stop; - - b = cram_get_block_by_id(slice, c->u.byte_array_stop.content_id); - if (!b) - return *out_size?-1:0; - - if (b->idx >= b->uncomp_size) - return -1; - cp = b->data + b->idx; - cp_end = b->data + b->uncomp_size; - - stop = c->u.byte_array_stop.stop; - if (cp_end - cp < out->alloc - out->byte) { - unsigned char *out_cp = BLOCK_END(out); - while (cp != cp_end && *cp != stop) - *out_cp++ = *cp++; - BLOCK_SIZE(out) = out_cp - BLOCK_DATA(out); - } else { - unsigned char *cp_start; - for (cp_start = cp; cp != cp_end && *cp != stop; cp++) - ; - BLOCK_APPEND(out, cp_start, cp - cp_start); - BLOCK_GROW(out, cp - cp_start); - } - - *out_size = cp - (b->data + b->idx); - b->idx = cp - b->data + 1; - - return 0; - - block_err: - return -1; -} - -void cram_byte_array_stop_decode_free(cram_codec *c) { - if (!c) return; - - free(c); -} - -int cram_byte_array_stop_describe(cram_codec *c, kstring_t *ks) { - return ksprintf(ks, "BYTE_ARRAY_STOP(stop=%d,id=%d)", - c->u.byte_array_stop.stop, - c->u.byte_array_stop.content_id) - < 0 ? -1 : 0; -} - -cram_codec *cram_byte_array_stop_decode_init(cram_block_compression_hdr *hdr, - char *data, int size, - enum cram_encoding codec, - enum cram_external_type option, - int version, varint_vec *vv) { - cram_codec *c = NULL; - unsigned char *cp = (unsigned char *)data; - int err = 0; - - if (size < (CRAM_MAJOR_VERS(version) == 1 ? 5 : 2)) - goto malformed; - - if (!(c = malloc(sizeof(*c)))) - return NULL; - - c->codec = E_BYTE_ARRAY_STOP; - switch (option) { - case E_BYTE_ARRAY_BLOCK: - c->decode = cram_byte_array_stop_decode_block; - break; - case E_BYTE_ARRAY: - c->decode = cram_byte_array_stop_decode_char; - break; - default: - hts_log_error("The byte_array_stop codec only supports BYTE_ARRAYs"); - free(c); - return NULL; - } - c->free = cram_byte_array_stop_decode_free; - c->describe = cram_byte_array_stop_describe; - - c->u.byte_array_stop.stop = *cp++; - if (CRAM_MAJOR_VERS(version) == 1) { - c->u.byte_array_stop.content_id = cp[0] + (cp[1]<<8) + (cp[2]<<16) - + ((unsigned int) cp[3]<<24); - cp += 4; - } else { - c->u.byte_array_stop.content_id = vv->varint_get32((char **)&cp, data+size, &err); - } - - if ((char *)cp - data != size || err) - goto malformed; - - return c; - - malformed: - hts_log_error("Malformed byte_array_stop header stream"); - free(c); - return NULL; -} - -int cram_byte_array_stop_encode(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - BLOCK_APPEND(c->out, in, in_size); - BLOCK_APPEND_CHAR(c->out, c->u.e_byte_array_stop.stop); - return 0; - - block_err: - return -1; -} - -void cram_byte_array_stop_encode_free(cram_codec *c) { - if (!c) - return; - free(c); -} - -int cram_byte_array_stop_encode_store(cram_codec *c, cram_block *b, - char *prefix, int version) { - int len = 0; - char buf[20], *cp = buf; - - if (prefix) { - size_t l = strlen(prefix); - BLOCK_APPEND(b, prefix, l); - len += l; - } - - cp += c->vv->varint_put32(cp, buf+20, c->codec); - - if (CRAM_MAJOR_VERS(version) == 1) { - cp += c->vv->varint_put32(cp, buf+20, 5); - *cp++ = c->u.e_byte_array_stop.stop; - *cp++ = (c->u.e_byte_array_stop.content_id >> 0) & 0xff; - *cp++ = (c->u.e_byte_array_stop.content_id >> 8) & 0xff; - *cp++ = (c->u.e_byte_array_stop.content_id >> 16) & 0xff; - *cp++ = (c->u.e_byte_array_stop.content_id >> 24) & 0xff; - } else { - cp += c->vv->varint_put32(cp, buf+20, 1 + - c->vv->varint_size(c->u.e_byte_array_stop.content_id)); - *cp++ = c->u.e_byte_array_stop.stop; - cp += c->vv->varint_put32(cp, buf+20, c->u.e_byte_array_stop.content_id); - } - - BLOCK_APPEND(b, buf, cp-buf); - len += cp-buf; - - return len; - - block_err: - return -1; -} - -cram_codec *cram_byte_array_stop_encode_init(cram_stats *st, - enum cram_encoding codec, - enum cram_external_type option, - void *dat, - int version, varint_vec *vv) { - cram_codec *c; - - c = malloc(sizeof(*c)); - if (!c) - return NULL; - c->codec = E_BYTE_ARRAY_STOP; - c->free = cram_byte_array_stop_encode_free; - c->encode = cram_byte_array_stop_encode; - c->store = cram_byte_array_stop_encode_store; - c->flush = NULL; - - c->u.e_byte_array_stop.stop = ((int *)dat)[0]; - c->u.e_byte_array_stop.content_id = ((int *)dat)[1]; - - return c; -} - -/* - * --------------------------------------------------------------------------- - */ - -const char *cram_encoding2str(enum cram_encoding t) { - switch (t) { - case E_NULL: return "NULL"; - case E_EXTERNAL: return "EXTERNAL"; - case E_GOLOMB: return "GOLOMB"; - case E_HUFFMAN: return "HUFFMAN"; - case E_BYTE_ARRAY_LEN: return "BYTE_ARRAY_LEN"; - case E_BYTE_ARRAY_STOP: return "BYTE_ARRAY_STOP"; - case E_BETA: return "BETA"; - case E_SUBEXP: return "SUBEXP"; - case E_GOLOMB_RICE: return "GOLOMB_RICE"; - case E_GAMMA: return "GAMMA"; - - case E_VARINT_UNSIGNED: return "VARINT_UNSIGNED"; - case E_VARINT_SIGNED: return "VARINT_SIGNED"; - case E_CONST_BYTE: return "CONST_BYTE"; - case E_CONST_INT: return "CONST_INT"; - - case E_NUM_CODECS: - default: return "?"; - } -} - -static cram_codec *(*decode_init[])(cram_block_compression_hdr *hdr, - char *data, - int size, - enum cram_encoding codec, - enum cram_external_type option, - int version, varint_vec *vv) = { - // CRAM 3.0 valid codecs - NULL, // null codec - cram_external_decode_init, - NULL, // golomb - cram_huffman_decode_init, - cram_byte_array_len_decode_init, - cram_byte_array_stop_decode_init, - cram_beta_decode_init, - cram_subexp_decode_init, - NULL, // golomb rice - cram_gamma_decode_init, - - // Gap between CRAM 3 and CRAM 4; 9 to 39 inclusive - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - NULL, // was xbyte - cram_varint_decode_init, // varint unsigned - cram_varint_decode_init, // varint signed - cram_const_decode_init, // const byte - cram_const_decode_init, // const int - - // Gap to CRAM 4 transfomrations; 45 to 49 inclusive - NULL, NULL, NULL, NULL, NULL, - - NULL, // xhuffman - cram_xpack_decode_init, - cram_xrle_decode_init, - cram_xdelta_decode_init, -}; - -cram_codec *cram_decoder_init(cram_block_compression_hdr *hdr, - enum cram_encoding codec, - char *data, int size, - enum cram_external_type option, - int version, varint_vec *vv) { - if (codec >= E_NULL && codec < E_NUM_CODECS && decode_init[codec]) { - cram_codec *r = decode_init[codec](hdr, data, size, codec, - option, version, vv); - if (r) { - r->vv = vv; - r->codec_id = hdr->ncodecs++; - } - return r; - } else { - hts_log_error("Unimplemented codec of type %s", cram_encoding2str(codec)); - return NULL; - } -} - -static cram_codec *(*encode_init[])(cram_stats *stx, - enum cram_encoding codec, - enum cram_external_type option, - void *opt, - int version, varint_vec *vv) = { - // CRAM 3.0 valid codecs - NULL, // null codec - cram_external_encode_init, // int/bytes in cram 3, byte only in cram 4 - NULL, // golomb - cram_huffman_encode_init, - cram_byte_array_len_encode_init, - cram_byte_array_stop_encode_init, - cram_beta_encode_init, - NULL, // subexponential (we support decode only) - NULL, // golomb rice - NULL, // gamma (we support decode only) - - // Gap between CRAM 3 and CRAM 4; 9 to 39 inclusive - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - NULL, // was xbyte - cram_varint_encode_init, // varint unsigned - cram_varint_encode_init, // varint signed - cram_const_encode_init, // const byte - cram_const_encode_init, // const int - - // Gap to CRAM 4 transfomrations; 45 to 49 inclusive - NULL, NULL, NULL, NULL, NULL, - - NULL, // xhuffman - cram_xpack_encode_init, - cram_xrle_encode_init, - cram_xdelta_encode_init, -}; - -cram_codec *cram_encoder_init(enum cram_encoding codec, - cram_stats *st, - enum cram_external_type option, - void *dat, - int version, varint_vec *vv) { - if (st && !st->nvals) - return NULL; - - // cram_stats_encoding assumes integer data, but if option - // is E_BYTE then tweak the requested encoding. This ought - // to be fixed in cram_stats_encoding instead. - if (option == E_BYTE || option == E_BYTE_ARRAY || - option == E_BYTE_ARRAY_BLOCK) { - if (codec == E_VARINT_SIGNED || codec == E_VARINT_UNSIGNED) - codec = E_EXTERNAL; - else if (codec == E_CONST_INT) - codec = E_CONST_BYTE; - } - - if (encode_init[codec]) { - cram_codec *r; - if ((r = encode_init[codec](st, codec, option, dat, version, vv))) - r->out = NULL; - if (!r) { - hts_log_error("Unable to initialise codec of type %s", cram_encoding2str(codec)); - return NULL; - } - r->vv = vv; - return r; - } else { - hts_log_error("Unimplemented codec of type %s", cram_encoding2str(codec)); - abort(); - } -} - -/* - * Returns the content_id used by this codec, also in id2 if byte_array_len. - * Returns -1 for the CORE block and -2 for unneeded. - * id2 is only filled out for BYTE_ARRAY_LEN which uses 2 codecs. - */ -int cram_codec_to_id(cram_codec *c, int *id2) { - int bnum1, bnum2 = -2; - - switch (c->codec) { - case E_CONST_INT: - case E_CONST_BYTE: - bnum1 = -2; // no blocks used - break; - - case E_HUFFMAN: - bnum1 = c->u.huffman.ncodes == 1 ? -2 : -1; - break; - - case E_GOLOMB: - case E_BETA: - case E_SUBEXP: - case E_GOLOMB_RICE: - case E_GAMMA: - // CORE block - bnum1 = -1; - break; - - case E_EXTERNAL: - case E_VARINT_UNSIGNED: - case E_VARINT_SIGNED: - bnum1 = c->u.external.content_id; - break; - - case E_BYTE_ARRAY_LEN: - bnum1 = cram_codec_to_id(c->u.byte_array_len.len_codec, NULL); - bnum2 = cram_codec_to_id(c->u.byte_array_len.val_codec, NULL); - break; - - case E_BYTE_ARRAY_STOP: - bnum1 = c->u.byte_array_stop.content_id; - break; - - case E_NULL: - bnum1 = -2; - break; - - default: - hts_log_error("Unknown codec type %d", c->codec); - bnum1 = -1; - } - - if (id2) - *id2 = bnum2; - return bnum1; -} - - -/* - * cram_codec structures are specialised for decoding or encoding. - * Unfortunately this makes turning a decoder into an encoder (such as - * when transcoding files) problematic. - * - * This function converts a cram decoder codec into an encoder version - * in-place (ie it modifiers the codec itself). - * - * Returns 0 on success; - * -1 on failure. - */ -int cram_codec_decoder2encoder(cram_fd *fd, cram_codec *c) { - int j; - - switch (c->codec) { - case E_CONST_INT: - case E_CONST_BYTE: - // shares struct with decode - c->store = cram_const_encode_store; - break; - - case E_EXTERNAL: - // shares struct with decode - c->free = cram_external_encode_free; - c->store = cram_external_encode_store; - if (c->decode == cram_external_decode_int) - c->encode = cram_external_encode_int; - else if (c->decode == cram_external_decode_long) - c->encode = cram_external_encode_long; - else if (c->decode == cram_external_decode_char) - c->encode = cram_external_encode_char; - else if (c->decode == cram_external_decode_block) - c->encode = cram_external_encode_char; - else - return -1; - break; - - case E_VARINT_SIGNED: - case E_VARINT_UNSIGNED: - // shares struct with decode - c->free = cram_varint_encode_free; - c->store = cram_varint_encode_store; - if (c->decode == cram_varint_decode_int) - c->encode = cram_varint_encode_int; - else if (c->decode == cram_varint_decode_sint) - c->encode = cram_varint_encode_sint; - else if (c->decode == cram_varint_decode_long) - c->encode = cram_varint_encode_long; - else if (c->decode == cram_varint_decode_slong) - c->encode = cram_varint_encode_slong; - else - return -1; - break; - - case E_HUFFMAN: { - // New structure, so switch. - // FIXME: we huffman and e_huffman structs amended, we could - // unify this. - cram_codec *t = malloc(sizeof(*t)); - if (!t) return -1; - t->vv = c->vv; - t->codec = E_HUFFMAN; - t->free = cram_huffman_encode_free; - t->store = cram_huffman_encode_store; - t->u.e_huffman.codes = c->u.huffman.codes; - t->u.e_huffman.nvals = c->u.huffman.ncodes; - t->u.e_huffman.option = c->u.huffman.option; - for (j = 0; j < t->u.e_huffman.nvals; j++) { - int32_t sym = t->u.e_huffman.codes[j].symbol; - if (sym >= -1 && sym < MAX_HUFF) - t->u.e_huffman.val2code[sym+1] = j; - } - - if (c->decode == cram_huffman_decode_char0) - t->encode = cram_huffman_encode_char0; - else if (c->decode == cram_huffman_decode_char) - t->encode = cram_huffman_encode_char; - else if (c->decode == cram_huffman_decode_int0) - t->encode = cram_huffman_encode_int0; - else if (c->decode == cram_huffman_decode_int) - t->encode = cram_huffman_encode_int; - else if (c->decode == cram_huffman_decode_long0) - t->encode = cram_huffman_encode_long0; - else if (c->decode == cram_huffman_decode_long) - t->encode = cram_huffman_encode_long; - else { - free(t); - return -1; - } - *c = *t; - free(t); - break; - } - - case E_BETA: - // shares struct with decode - c->free = cram_beta_encode_free; - c->store = cram_beta_encode_store; - if (c->decode == cram_beta_decode_int) - c->encode = cram_beta_encode_int; - else if (c->decode == cram_beta_decode_long) - c->encode = cram_beta_encode_long; - else if (c->decode == cram_beta_decode_char) - c->encode = cram_beta_encode_char; - else - return -1; - break; - - case E_XPACK: { - // shares struct with decode - cram_codec t = *c; - t.free = cram_xpack_encode_free; - t.store = cram_xpack_encode_store; - if (t.decode == cram_xpack_decode_long) - t.encode = cram_xpack_encode_long; - else if (t.decode == cram_xpack_decode_int) - t.encode = cram_xpack_encode_int; - else if (t.decode == cram_xpack_decode_char) - t.encode = cram_xpack_encode_char; - else - return -1; - t.u.e_xpack.sub_codec = t.u.xpack.sub_codec; - if (cram_codec_decoder2encoder(fd, t.u.e_xpack.sub_codec) == -1) - return -1; - *c = t; - break; - } - - case E_BYTE_ARRAY_LEN: { - cram_codec *t = malloc(sizeof(*t)); - if (!t) return -1; - t->vv = c->vv; - t->codec = E_BYTE_ARRAY_LEN; - t->free = cram_byte_array_len_encode_free; - t->store = cram_byte_array_len_encode_store; - t->encode = cram_byte_array_len_encode; - t->u.e_byte_array_len.len_codec = c->u.byte_array_len.len_codec; - t->u.e_byte_array_len.val_codec = c->u.byte_array_len.val_codec; - if (cram_codec_decoder2encoder(fd, t->u.e_byte_array_len.len_codec) == -1 || - cram_codec_decoder2encoder(fd, t->u.e_byte_array_len.val_codec) == -1) { - t->free(t); - return -1; - } - - // {len,val}_{encoding,dat} are undefined, but unused. - // Leaving them unset here means we can test that assertion. - *c = *t; - free(t); - break; - } - - case E_BYTE_ARRAY_STOP: - // shares struct with decode - c->free = cram_byte_array_stop_encode_free; - c->store = cram_byte_array_stop_encode_store; - c->encode = cram_byte_array_stop_encode; - break; - - default: - return -1; - } - - return 0; -} - -int cram_codec_describe(cram_codec *c, kstring_t *ks) { - if (c && c->describe) - return c->describe(c, ks); - else - return ksprintf(ks, "?"); -} diff --git a/src/htslib-1.18/cram/cram_decode.c b/src/htslib-1.18/cram/cram_decode.c deleted file mode 100644 index 26c7c1f..0000000 --- a/src/htslib-1.18/cram/cram_decode.c +++ /dev/null @@ -1,3587 +0,0 @@ -/* -Copyright (c) 2012-2020, 2022-2023 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* - * - In-memory decoding of CRAM data structures. - * - Iterator for reading CRAM record by record. - */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cram.h" -#include "os.h" -#include "../htslib/hts.h" - -//Whether CIGAR has just M or uses = and X to indicate match and mismatch -//#define USE_X - -/* ---------------------------------------------------------------------- - * CRAM compression headers - */ - -/* - * Decodes the Tag Dictionary record in the preservation map - * Updates the cram compression header. - * - * Returns number of bytes decoded on success - * -1 on failure - */ -int cram_decode_TD(cram_fd *fd, char *cp, const char *endp, - cram_block_compression_hdr *h) { - char *op = cp; - unsigned char *dat; - cram_block *b; - int32_t blk_size = 0; - int nTL, i, sz, err = 0; - - if (!(b = cram_new_block(0, 0))) - return -1; - - if (h->TD_blk || h->TL) { - hts_log_warning("More than one TD block found in compression header"); - cram_free_block(h->TD_blk); - free(h->TL); - h->TD_blk = NULL; - h->TL = NULL; - } - - /* Decode */ - blk_size = fd->vv.varint_get32(&cp, endp, &err); - if (!blk_size) { - h->nTL = 0; - cram_free_block(b); - return cp - op; - } - - if (err || blk_size < 0 || endp - cp < blk_size) { - cram_free_block(b); - return -1; - } - - BLOCK_APPEND(b, cp, blk_size); - cp += blk_size; - sz = cp - op; - // Force nul termination if missing - if (BLOCK_DATA(b)[BLOCK_SIZE(b)-1]) - BLOCK_APPEND_CHAR(b, '\0'); - - /* Set up TL lookup table */ - dat = BLOCK_DATA(b); - - // Count - for (nTL = i = 0; i < BLOCK_SIZE(b); i++) { - nTL++; - while (dat[i]) - i++; - } - - // Copy - if (!(h->TL = calloc(nTL, sizeof(*h->TL)))) { - cram_free_block(b); - return -1; - } - for (nTL = i = 0; i < BLOCK_SIZE(b); i++) { - h->TL[nTL++] = &dat[i]; - while (dat[i]) - i++; - } - h->TD_blk = b; - h->nTL = nTL; - - return sz; - - block_err: - cram_free_block(b); - return -1; -} - -/* - * Decodes a CRAM block compression header. - * Returns header ptr on success - * NULL on failure - */ -cram_block_compression_hdr *cram_decode_compression_header(cram_fd *fd, - cram_block *b) { - char *cp, *endp, *cp_copy; - cram_block_compression_hdr *hdr = calloc(1, sizeof(*hdr)); - int i, err = 0; - int32_t map_size = 0, map_count = 0; - - if (!hdr) - return NULL; - - if (b->method != RAW) { - if (cram_uncompress_block(b)) { - free(hdr); - return NULL; - } - } - - cp = (char *)b->data; - endp = cp + b->uncomp_size; - - if (CRAM_MAJOR_VERS(fd->version) == 1) { - hdr->ref_seq_id = fd->vv.varint_get32(&cp, endp, &err); - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - hdr->ref_seq_start = fd->vv.varint_get64(&cp, endp, &err); - hdr->ref_seq_span = fd->vv.varint_get64(&cp, endp, &err); - } else { - hdr->ref_seq_start = fd->vv.varint_get32(&cp, endp, &err); - hdr->ref_seq_span = fd->vv.varint_get32(&cp, endp, &err); - } - hdr->num_records = fd->vv.varint_get32(&cp, endp, &err); - hdr->num_landmarks = fd->vv.varint_get32(&cp, endp, &err); - if (hdr->num_landmarks < 0 || - hdr->num_landmarks >= SIZE_MAX / sizeof(int32_t) || - endp - cp < hdr->num_landmarks) { - free(hdr); - return NULL; - } - if (!(hdr->landmark = malloc(hdr->num_landmarks * sizeof(int32_t)))) { - free(hdr); - return NULL; - } - for (i = 0; i < hdr->num_landmarks; i++) - hdr->landmark[i] = fd->vv.varint_get32(&cp, endp, &err);; - } - - hdr->preservation_map = kh_init(map); - - memset(hdr->rec_encoding_map, 0, - CRAM_MAP_HASH * sizeof(hdr->rec_encoding_map[0])); - memset(hdr->tag_encoding_map, 0, - CRAM_MAP_HASH * sizeof(hdr->tag_encoding_map[0])); - - if (!hdr->preservation_map) { - cram_free_compression_header(hdr); - return NULL; - } - - /* Initialise defaults for preservation map */ - hdr->read_names_included = 0; - hdr->AP_delta = 1; - hdr->qs_seq_orient = 1; - memcpy(hdr->substitution_matrix, "CGTNAGTNACTNACGNACGT", 20); - - /* Preservation map */ - map_size = fd->vv.varint_get32(&cp, endp, &err); cp_copy = cp; - map_count = fd->vv.varint_get32(&cp, endp, &err); - for (i = 0; i < map_count; i++) { - pmap_t hd; - khint_t k; - int r; - - if (endp - cp < 3) { - cram_free_compression_header(hdr); - return NULL; - } - cp += 2; - switch(CRAM_KEY(cp[-2],cp[-1])) { - case CRAM_KEY('M','I'): // was mapped QS included in V1.0 - case CRAM_KEY('U','I'): // was unmapped QS included in V1.0 - case CRAM_KEY('P','I'): // was unmapped placed in V1.0 - hd.i = *cp++; - break; - - case CRAM_KEY('R','N'): - hd.i = *cp++; - k = kh_put(map, hdr->preservation_map, "RN", &r); - if (-1 == r) { - cram_free_compression_header(hdr); - return NULL; - } - - kh_val(hdr->preservation_map, k) = hd; - hdr->read_names_included = hd.i; - break; - - case CRAM_KEY('A','P'): - hd.i = *cp++; - k = kh_put(map, hdr->preservation_map, "AP", &r); - if (-1 == r) { - cram_free_compression_header(hdr); - return NULL; - } - - kh_val(hdr->preservation_map, k) = hd; - hdr->AP_delta = hd.i; - break; - - case CRAM_KEY('R','R'): - hd.i = *cp++; - k = kh_put(map, hdr->preservation_map, "RR", &r); - if (-1 == r) { - cram_free_compression_header(hdr); - return NULL; - } - - kh_val(hdr->preservation_map, k) = hd; - hdr->no_ref = !hd.i; - break; - - case CRAM_KEY('Q','O'): - hd.i = *cp++; - k = kh_put(map, hdr->preservation_map, "QO", &r); - if (-1 == r) { - cram_free_compression_header(hdr); - return NULL; - } - - kh_val(hdr->preservation_map, k) = hd; - hdr->qs_seq_orient = hd.i; - break; - - case CRAM_KEY('S','M'): - if (endp - cp < 5) { - cram_free_compression_header(hdr); - return NULL; - } - hdr->substitution_matrix[0][(cp[0]>>6)&3] = 'C'; - hdr->substitution_matrix[0][(cp[0]>>4)&3] = 'G'; - hdr->substitution_matrix[0][(cp[0]>>2)&3] = 'T'; - hdr->substitution_matrix[0][(cp[0]>>0)&3] = 'N'; - - hdr->substitution_matrix[1][(cp[1]>>6)&3] = 'A'; - hdr->substitution_matrix[1][(cp[1]>>4)&3] = 'G'; - hdr->substitution_matrix[1][(cp[1]>>2)&3] = 'T'; - hdr->substitution_matrix[1][(cp[1]>>0)&3] = 'N'; - - hdr->substitution_matrix[2][(cp[2]>>6)&3] = 'A'; - hdr->substitution_matrix[2][(cp[2]>>4)&3] = 'C'; - hdr->substitution_matrix[2][(cp[2]>>2)&3] = 'T'; - hdr->substitution_matrix[2][(cp[2]>>0)&3] = 'N'; - - hdr->substitution_matrix[3][(cp[3]>>6)&3] = 'A'; - hdr->substitution_matrix[3][(cp[3]>>4)&3] = 'C'; - hdr->substitution_matrix[3][(cp[3]>>2)&3] = 'G'; - hdr->substitution_matrix[3][(cp[3]>>0)&3] = 'N'; - - hdr->substitution_matrix[4][(cp[4]>>6)&3] = 'A'; - hdr->substitution_matrix[4][(cp[4]>>4)&3] = 'C'; - hdr->substitution_matrix[4][(cp[4]>>2)&3] = 'G'; - hdr->substitution_matrix[4][(cp[4]>>0)&3] = 'T'; - - hd.p = cp; - cp += 5; - - k = kh_put(map, hdr->preservation_map, "SM", &r); - if (-1 == r) { - cram_free_compression_header(hdr); - return NULL; - } - kh_val(hdr->preservation_map, k) = hd; - break; - - case CRAM_KEY('T','D'): { - int sz = cram_decode_TD(fd, cp, endp, hdr); // tag dictionary - if (sz < 0) { - cram_free_compression_header(hdr); - return NULL; - } - - hd.p = cp; - cp += sz; - - k = kh_put(map, hdr->preservation_map, "TD", &r); - if (-1 == r) { - cram_free_compression_header(hdr); - return NULL; - } - kh_val(hdr->preservation_map, k) = hd; - break; - } - - default: - hts_log_warning("Unrecognised preservation map key %c%c", cp[-2], cp[-1]); - // guess byte; - cp++; - break; - } - } - if (cp - cp_copy != map_size) { - cram_free_compression_header(hdr); - return NULL; - } - - /* Record encoding map */ - map_size = fd->vv.varint_get32(&cp, endp, &err); cp_copy = cp; - map_count = fd->vv.varint_get32(&cp, endp, &err); - int is_v4 = CRAM_MAJOR_VERS(fd->version) >= 4 ? 1 : 0; - for (i = 0; i < map_count; i++) { - char *key = cp; - int32_t encoding = E_NULL; - int32_t size = 0; - ptrdiff_t offset; - cram_map *m; - enum cram_DS_ID ds_id; - enum cram_external_type type; - - if (endp - cp < 4) { - cram_free_compression_header(hdr); - return NULL; - } - - cp += 2; - encoding = fd->vv.varint_get32(&cp, endp, &err); - size = fd->vv.varint_get32(&cp, endp, &err); - - offset = cp - (char *)b->data; - - if (encoding == E_NULL) - continue; - - if (size < 0 || endp - cp < size) { - cram_free_compression_header(hdr); - return NULL; - } - - //printf("%s codes for %.2s\n", cram_encoding2str(encoding), key); - - /* - * For CRAM1.0 CF and BF are Byte and not Int. - * Practically speaking it makes no difference unless we have a - * 1.0 format file that stores these in EXTERNAL as only then - * does Byte vs Int matter. - * - * Neither this C code nor Java reference implementations did this, - * so we gloss over it and treat them as int. - */ - ds_id = DS_CORE; - if (key[0] == 'B' && key[1] == 'F') { - ds_id = DS_BF; type = E_INT; - } else if (key[0] == 'C' && key[1] == 'F') { - ds_id = DS_CF; type = E_INT; - } else if (key[0] == 'R' && key[1] == 'I') { - ds_id = DS_RI; type = E_INT; - } else if (key[0] == 'R' && key[1] == 'L') { - ds_id = DS_RL; type = E_INT; - } else if (key[0] == 'A' && key[1] == 'P') { - ds_id = DS_AP; - type = is_v4 ? E_SLONG : E_INT; - } else if (key[0] == 'R' && key[1] == 'G') { - ds_id = DS_RG; - type = E_INT; - } else if (key[0] == 'M' && key[1] == 'F') { - ds_id = DS_MF; type = E_INT; - } else if (key[0] == 'N' && key[1] == 'S') { - ds_id = DS_NS; type = E_INT; - } else if (key[0] == 'N' && key[1] == 'P') { - ds_id = DS_NP; - type = is_v4 ? E_LONG : E_INT; - } else if (key[0] == 'T' && key[1] == 'S') { - ds_id = DS_TS; - type = is_v4 ? E_SLONG : E_INT; - } else if (key[0] == 'N' && key[1] == 'F') { - ds_id = DS_NF; type = E_INT; - } else if (key[0] == 'T' && key[1] == 'C') { - ds_id = DS_TC; type = E_BYTE; - } else if (key[0] == 'T' && key[1] == 'N') { - ds_id = DS_TN; type = E_INT; - } else if (key[0] == 'F' && key[1] == 'N') { - ds_id = DS_FN; type = E_INT; - } else if (key[0] == 'F' && key[1] == 'C') { - ds_id = DS_FC; type = E_BYTE; - } else if (key[0] == 'F' && key[1] == 'P') { - ds_id = DS_FP; type = E_INT; - } else if (key[0] == 'B' && key[1] == 'S') { - ds_id = DS_BS; type = E_BYTE; - } else if (key[0] == 'I' && key[1] == 'N') { - ds_id = DS_IN; type = E_BYTE_ARRAY; - } else if (key[0] == 'S' && key[1] == 'C') { - ds_id = DS_SC; type = E_BYTE_ARRAY; - } else if (key[0] == 'D' && key[1] == 'L') { - ds_id = DS_DL; type = E_INT; - } else if (key[0] == 'B' && key[1] == 'A') { - ds_id = DS_BA; type = E_BYTE; - } else if (key[0] == 'B' && key[1] == 'B') { - ds_id = DS_BB; type = E_BYTE_ARRAY; - } else if (key[0] == 'R' && key[1] == 'S') { - ds_id = DS_RS; type = E_INT; - } else if (key[0] == 'P' && key[1] == 'D') { - ds_id = DS_PD; type = E_INT; - } else if (key[0] == 'H' && key[1] == 'C') { - ds_id = DS_HC; type = E_INT; - } else if (key[0] == 'M' && key[1] == 'Q') { - ds_id = DS_MQ; type = E_INT; - } else if (key[0] == 'R' && key[1] == 'N') { - ds_id = DS_RN; type = E_BYTE_ARRAY_BLOCK; - } else if (key[0] == 'Q' && key[1] == 'S') { - ds_id = DS_QS; type = E_BYTE; - } else if (key[0] == 'Q' && key[1] == 'Q') { - ds_id = DS_QQ; type = E_BYTE_ARRAY; - } else if (key[0] == 'T' && key[1] == 'L') { - ds_id = DS_TL; type = E_INT; - } else if (key[0] == 'T' && key[1] == 'M') { - } else if (key[0] == 'T' && key[1] == 'V') { - } else { - hts_log_warning("Unrecognised key: %.2s", key); - } - - if (ds_id != DS_CORE) { - if (hdr->codecs[ds_id] != NULL) { - hts_log_warning("Codec for key %.2s defined more than once", - key); - hdr->codecs[ds_id]->free(hdr->codecs[ds_id]); - } - hdr->codecs[ds_id] = cram_decoder_init(hdr, encoding, cp, size, - type, fd->version, &fd->vv); - if (!hdr->codecs[ds_id]) { - cram_free_compression_header(hdr); - return NULL; - } - } - - cp += size; - - // Fill out cram_map purely for cram_dump to dump out. - m = malloc(sizeof(*m)); - if (!m) { - cram_free_compression_header(hdr); - return NULL; - } - m->key = CRAM_KEY(key[0], key[1]); - m->encoding = encoding; - m->size = size; - m->offset = offset; - m->codec = NULL; - - m->next = hdr->rec_encoding_map[CRAM_MAP(key[0], key[1])]; - hdr->rec_encoding_map[CRAM_MAP(key[0], key[1])] = m; - } - if (cp - cp_copy != map_size) { - cram_free_compression_header(hdr); - return NULL; - } - - /* Tag encoding map */ - map_size = fd->vv.varint_get32(&cp, endp, &err); cp_copy = cp; - map_count = fd->vv.varint_get32(&cp, endp, &err); - for (i = 0; i < map_count; i++) { - int32_t encoding = E_NULL; - int32_t size = 0; - cram_map *m = malloc(sizeof(*m)); // FIXME: use pooled_alloc - uint8_t key[3]; - - if (!m || endp - cp < 6) { - free(m); - cram_free_compression_header(hdr); - return NULL; - } - - m->key = fd->vv.varint_get32(&cp, endp, &err); - key[0] = m->key>>16; - key[1] = m->key>>8; - key[2] = m->key; - encoding = fd->vv.varint_get32(&cp, endp, &err); - size = fd->vv.varint_get32(&cp, endp, &err); - - m->encoding = encoding; - m->size = size; - m->offset = cp - (char *)b->data; - if (size < 0 || endp - cp < size || - !(m->codec = cram_decoder_init(hdr, encoding, cp, size, - E_BYTE_ARRAY_BLOCK, fd->version, &fd->vv))) { - cram_free_compression_header(hdr); - free(m); - return NULL; - } - - cp += size; - - m->next = hdr->tag_encoding_map[CRAM_MAP(key[0],key[1])]; - hdr->tag_encoding_map[CRAM_MAP(key[0],key[1])] = m; - } - if (err || cp - cp_copy != map_size) { - cram_free_compression_header(hdr); - return NULL; - } - - return hdr; -} - -/* - * Note we also need to scan through the record encoding map to - * see which data series share the same block, either external or - * CORE. For example if we need the BF data series but MQ and CF - * are also encoded in the same block then we need to add those in - * as a dependency in order to correctly decode BF. - * - * Returns 0 on success - * -1 on failure - */ -int cram_dependent_data_series(cram_fd *fd, - cram_block_compression_hdr *hdr, - cram_slice *s) { - int *block_used; - int core_used = 0; - int i; - static int i_to_id[] = { - DS_BF, DS_AP, DS_FP, DS_RL, DS_DL, DS_NF, DS_BA, DS_QS, - DS_FC, DS_FN, DS_BS, DS_IN, DS_RG, DS_MQ, DS_TL, DS_RN, - DS_NS, DS_NP, DS_TS, DS_MF, DS_CF, DS_RI, DS_RS, DS_PD, - DS_HC, DS_SC, DS_BB, DS_QQ, - }; - uint32_t orig_ds; - - /* - * Set the data_series bit field based on fd->required_fields - * contents. - */ - if (fd->required_fields && fd->required_fields != INT_MAX) { - s->data_series = 0; - - if (fd->required_fields & SAM_QNAME) - s->data_series |= CRAM_RN; - - if (fd->required_fields & SAM_FLAG) - s->data_series |= CRAM_BF; - - if (fd->required_fields & SAM_RNAME) - s->data_series |= CRAM_RI | CRAM_BF; - - if (fd->required_fields & SAM_POS) - s->data_series |= CRAM_AP | CRAM_BF; - - if (fd->required_fields & SAM_MAPQ) - s->data_series |= CRAM_MQ; - - if (fd->required_fields & SAM_CIGAR) - s->data_series |= CRAM_CIGAR; - - if (fd->required_fields & SAM_RNEXT) - s->data_series |= CRAM_CF | CRAM_NF | CRAM_RI | CRAM_NS |CRAM_BF; - - if (fd->required_fields & SAM_PNEXT) - s->data_series |= CRAM_CF | CRAM_NF | CRAM_AP | CRAM_NP | CRAM_BF; - - if (fd->required_fields & SAM_TLEN) - s->data_series |= CRAM_CF | CRAM_NF | CRAM_AP | CRAM_TS | - CRAM_BF | CRAM_MF | CRAM_RI | CRAM_CIGAR; - - if (fd->required_fields & SAM_SEQ) - s->data_series |= CRAM_SEQ; - - if (!(fd->required_fields & SAM_AUX)) - // No easy way to get MD/NM without other tags at present - s->decode_md = 0; - - if (fd->required_fields & SAM_QUAL) - s->data_series |= CRAM_QUAL; - - if (fd->required_fields & SAM_AUX) - s->data_series |= CRAM_RG | CRAM_TL | CRAM_aux; - - if (fd->required_fields & SAM_RGAUX) - s->data_series |= CRAM_RG | CRAM_BF; - - // Always uncompress CORE block - if (cram_uncompress_block(s->block[0])) - return -1; - } else { - s->data_series = CRAM_ALL; - - for (i = 0; i < s->hdr->num_blocks; i++) { - if (cram_uncompress_block(s->block[i])) - return -1; - } - - return 0; - } - - block_used = calloc(s->hdr->num_blocks+1, sizeof(int)); - if (!block_used) - return -1; - - do { - /* - * Also set data_series based on code prerequisites. Eg if we need - * CRAM_QS then we also need to know CRAM_RL so we know how long it - * is, or if we need FC/FP then we also need FN (number of features). - * - * It's not reciprocal though. We may be needing to decode FN - * but have no need to decode FC, FP and cigar ops. - */ - if (s->data_series & CRAM_RS) s->data_series |= CRAM_FC|CRAM_FP; - if (s->data_series & CRAM_PD) s->data_series |= CRAM_FC|CRAM_FP; - if (s->data_series & CRAM_HC) s->data_series |= CRAM_FC|CRAM_FP; - if (s->data_series & CRAM_QS) s->data_series |= CRAM_FC|CRAM_FP; - if (s->data_series & CRAM_IN) s->data_series |= CRAM_FC|CRAM_FP; - if (s->data_series & CRAM_SC) s->data_series |= CRAM_FC|CRAM_FP; - if (s->data_series & CRAM_BS) s->data_series |= CRAM_FC|CRAM_FP; - if (s->data_series & CRAM_DL) s->data_series |= CRAM_FC|CRAM_FP; - if (s->data_series & CRAM_BA) s->data_series |= CRAM_FC|CRAM_FP; - if (s->data_series & CRAM_BB) s->data_series |= CRAM_FC|CRAM_FP; - if (s->data_series & CRAM_QQ) s->data_series |= CRAM_FC|CRAM_FP; - - // cram_decode_seq() needs seq[] array - if (s->data_series & (CRAM_SEQ|CRAM_CIGAR)) s->data_series |= CRAM_RL; - - if (s->data_series & CRAM_FP) s->data_series |= CRAM_FC; - if (s->data_series & CRAM_FC) s->data_series |= CRAM_FN; - if (s->data_series & CRAM_aux) s->data_series |= CRAM_TL; - if (s->data_series & CRAM_MF) s->data_series |= CRAM_CF; - if (s->data_series & CRAM_MQ) s->data_series |= CRAM_BF; - if (s->data_series & CRAM_BS) s->data_series |= CRAM_RI; - if (s->data_series & (CRAM_MF |CRAM_NS |CRAM_NP |CRAM_TS |CRAM_NF)) - s->data_series |= CRAM_CF; - if (!hdr->read_names_included && s->data_series & CRAM_RN) - s->data_series |= CRAM_CF | CRAM_NF; - if (s->data_series & (CRAM_BA | CRAM_QS | CRAM_BB | CRAM_QQ)) - s->data_series |= CRAM_BF | CRAM_CF | CRAM_RL; - if (s->data_series & CRAM_FN) { - // The CRAM_FN loop checks for reference length boundaries, - // which needs a working seq_pos. Some fields are fixed size - // irrespective of if we decode (BS), but others need to know - // the size of the string fetched back (SC, IN, BB). - s->data_series |= CRAM_SC | CRAM_IN | CRAM_BB; - } - - orig_ds = s->data_series; - - // Find which blocks are in use. - for (i = 0; i < sizeof(i_to_id)/sizeof(*i_to_id); i++) { - int bnum1, bnum2, j; - cram_codec *c = hdr->codecs[i_to_id[i]]; - - if (!(s->data_series & (1<hdr->num_blocks; j++) { - if (s->block[j]->content_type == EXTERNAL && - s->block[j]->content_id == bnum1) { - block_used[j] = 1; - if (cram_uncompress_block(s->block[j])) { - free(block_used); - return -1; - } - } - } - break; - } - - if (bnum2 == -2 || bnum1 == bnum2) - break; - - bnum1 = bnum2; // 2nd pass - } - } - - // Tags too - if ((fd->required_fields & SAM_AUX) || - (s->data_series & CRAM_aux)) { - for (i = 0; i < CRAM_MAP_HASH; i++) { - int bnum1, bnum2, j; - cram_map *m = hdr->tag_encoding_map[i]; - - while (m) { - cram_codec *c = m->codec; - if (!c) { - m = m->next; - continue; - } - - bnum1 = cram_codec_to_id(c, &bnum2); - - for (;;) { - switch (bnum1) { - case -2: - break; - - case -1: - core_used = 1; - break; - - default: - for (j = 0; j < s->hdr->num_blocks; j++) { - if (s->block[j]->content_type == EXTERNAL && - s->block[j]->content_id == bnum1) { - block_used[j] = 1; - if (cram_uncompress_block(s->block[j])) { - free(block_used); - return -1; - } - } - } - break; - } - - if (bnum2 == -2 || bnum1 == bnum2) - break; - - bnum1 = bnum2; // 2nd pass - } - - m = m->next; - } - } - } - - // We now know which blocks are in used, so repeat and find - // which other data series need to be added. - for (i = 0; i < sizeof(i_to_id)/sizeof(*i_to_id); i++) { - int bnum1, bnum2, j; - cram_codec *c = hdr->codecs[i_to_id[i]]; - - if (!c) - continue; - - bnum1 = cram_codec_to_id(c, &bnum2); - - for (;;) { - switch (bnum1) { - case -2: - break; - - case -1: - if (core_used) { - //printf(" + data series %08x:\n", 1<data_series |= 1<hdr->num_blocks; j++) { - if (s->block[j]->content_type == EXTERNAL && - s->block[j]->content_id == bnum1) { - if (block_used[j]) { - //printf(" + data series %08x:\n", 1<data_series |= 1<tag_encoding_map[i]; - - while (m) { - cram_codec *c = m->codec; - if (!c) { - m = m->next; - continue; - } - - bnum1 = cram_codec_to_id(c, &bnum2); - - for (;;) { - switch (bnum1) { - case -2: - break; - - case -1: - //printf(" + data series %08x:\n", CRAM_aux); - s->data_series |= CRAM_aux; - break; - - default: - for (j = 0; j < s->hdr->num_blocks; j++) { - if (s->block[j]->content_type == EXTERNAL && - s->block[j]->content_id == bnum1) { - if (block_used[j]) { - //printf(" + data series %08x:\n", - // CRAM_aux); - s->data_series |= CRAM_aux; - } - } - } - break; - } - - if (bnum2 == -2 || bnum1 == bnum2) - break; - - bnum1 = bnum2; // 2nd pass - } - - m = m->next; - } - } - } while (orig_ds != s->data_series); - - free(block_used); - return 0; -} - -/* - * Checks whether an external block is used solely by a single data series. - * Returns the codec type if so (EXTERNAL, BYTE_ARRAY_LEN, BYTE_ARRAY_STOP) - * or 0 if not (E_NULL). - */ -static int cram_ds_unique(cram_block_compression_hdr *hdr, cram_codec *c, - int id) { - int i, n_id = 0; - enum cram_encoding e_type = 0; - - for (i = 0; i < DS_END; i++) { - cram_codec *c; - int bnum1, bnum2, old_n_id; - - if (!(c = hdr->codecs[i])) - continue; - - bnum1 = cram_codec_to_id(c, &bnum2); - - old_n_id = n_id; - if (bnum1 == id) { - n_id++; - e_type = c->codec; - } - if (bnum2 == id) { - n_id++; - e_type = c->codec; - } - - if (n_id == old_n_id+2) - n_id--; // len/val in same place counts once only. - } - - return n_id == 1 ? e_type : 0; -} - -/* - * Attempts to estimate the size of some blocks so we can preallocate them - * before decoding. Although decoding will automatically grow the blocks, - * it is typically more efficient to preallocate. - */ -void cram_decode_estimate_sizes(cram_block_compression_hdr *hdr, cram_slice *s, - int *qual_size, int *name_size, - int *q_id) { - int bnum1, bnum2; - cram_codec *cd; - - *qual_size = 0; - *name_size = 0; - - /* Qual */ - cd = hdr->codecs[DS_QS]; - if (cd == NULL) return; - bnum1 = cram_codec_to_id(cd, &bnum2); - if (bnum1 < 0 && bnum2 >= 0) bnum1 = bnum2; - if (cram_ds_unique(hdr, cd, bnum1)) { - cram_block *b = cram_get_block_by_id(s, bnum1); - if (b) *qual_size = b->uncomp_size; - if (q_id && cd->codec == E_EXTERNAL) - *q_id = bnum1; - } - - /* Name */ - cd = hdr->codecs[DS_RN]; - if (cd == NULL) return; - bnum1 = cram_codec_to_id(cd, &bnum2); - if (bnum1 < 0 && bnum2 >= 0) bnum1 = bnum2; - if (cram_ds_unique(hdr, cd, bnum1)) { - cram_block *b = cram_get_block_by_id(s, bnum1); - if (b) *name_size = b->uncomp_size; - } -} - - -/* ---------------------------------------------------------------------- - * CRAM slices - */ - -/* - * Decodes a CRAM (un)mapped slice header block. - * Returns slice header ptr on success - * NULL on failure - */ -cram_block_slice_hdr *cram_decode_slice_header(cram_fd *fd, cram_block *b) { - cram_block_slice_hdr *hdr; - unsigned char *cp; - unsigned char *cp_end; - int i, err = 0; - - if (b->method != RAW) { - /* Spec. says slice header should be RAW, but we can future-proof - by trying to decode it if it isn't. */ - if (cram_uncompress_block(b) < 0) - return NULL; - } - cp = (unsigned char *)BLOCK_DATA(b); - cp_end = cp + b->uncomp_size; - - if (b->content_type != MAPPED_SLICE && - b->content_type != UNMAPPED_SLICE) - return NULL; - - if (!(hdr = calloc(1, sizeof(*hdr)))) - return NULL; - - hdr->content_type = b->content_type; - - if (b->content_type == MAPPED_SLICE) { - hdr->ref_seq_id = fd->vv.varint_get32s((char **)&cp, (char *)cp_end, &err); - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - hdr->ref_seq_start = fd->vv.varint_get64((char **)&cp, (char *)cp_end, &err); - hdr->ref_seq_span = fd->vv.varint_get64((char **)&cp, (char *)cp_end, &err); - } else { - hdr->ref_seq_start = fd->vv.varint_get32((char **)&cp, (char *)cp_end, &err); - hdr->ref_seq_span = fd->vv.varint_get32((char **)&cp, (char *)cp_end, &err); - } - if (hdr->ref_seq_start < 0 || hdr->ref_seq_span < 0) { - free(hdr); - hts_log_error("Negative values not permitted for header " - "sequence start or span fields"); - return NULL; - } - } - hdr->num_records = fd->vv.varint_get32((char **)&cp, (char *) cp_end, &err); - hdr->record_counter = 0; - if (CRAM_MAJOR_VERS(fd->version) == 2) { - hdr->record_counter = fd->vv.varint_get32((char **)&cp, (char *)cp_end, &err); - } else if (CRAM_MAJOR_VERS(fd->version) >= 3) { - hdr->record_counter = fd->vv.varint_get64((char **)&cp, (char *)cp_end, &err); - } - hdr->num_blocks = fd->vv.varint_get32((char **)&cp, (char *)cp_end, &err); - hdr->num_content_ids = fd->vv.varint_get32((char **)&cp, (char *)cp_end, &err); - if (hdr->num_content_ids < 1 || - hdr->num_content_ids >= SIZE_MAX / sizeof(int32_t)) { - /* Slice must have at least one data block, - and malloc'd size shouldn't wrap. */ - free(hdr); - return NULL; - } - hdr->block_content_ids = malloc(hdr->num_content_ids * sizeof(int32_t)); - if (!hdr->block_content_ids) { - free(hdr); - return NULL; - } - - for (i = 0; i < hdr->num_content_ids; i++) - hdr->block_content_ids[i] = fd->vv.varint_get32((char **)&cp, - (char *)cp_end, - &err); - if (err) { - free(hdr->block_content_ids); - free(hdr); - return NULL; - } - - if (b->content_type == MAPPED_SLICE) - hdr->ref_base_id = fd->vv.varint_get32((char **)&cp, (char *) cp_end, &err); - - if (CRAM_MAJOR_VERS(fd->version) != 1) { - if (cp_end - cp < 16) { - free(hdr->block_content_ids); - free(hdr); - return NULL; - } - memcpy(hdr->md5, cp, 16); - } else { - memset(hdr->md5, 0, 16); - } - - if (!err) - return hdr; - - free(hdr->block_content_ids); - free(hdr); - return NULL; -} - - -#if 0 -/* Returns the number of bits set in val; it the highest bit used */ -static int nbits(int v) { - static const int MultiplyDeBruijnBitPosition[32] = { - 1, 10, 2, 11, 14, 22, 3, 30, 12, 15, 17, 19, 23, 26, 4, 31, - 9, 13, 21, 29, 16, 18, 25, 8, 20, 28, 24, 7, 27, 6, 5, 32 - }; - - v |= v >> 1; // first up to set all bits 1 after the first 1 */ - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - - // DeBruijn magic to find top bit - return MultiplyDeBruijnBitPosition[(uint32_t)(v * 0x07C4ACDDU) >> 27]; -} -#endif - -#if 0 -static int sort_freqs(const void *vp1, const void *vp2) { - const int i1 = *(const int *)vp1; - const int i2 = *(const int *)vp2; - return i1-i2; -} -#endif - -/* ---------------------------------------------------------------------- - * Primary CRAM sequence decoder - */ - -static inline int add_md_char(cram_slice *s, int decode_md, char c, int32_t *md_dist) { - if (decode_md) { - BLOCK_APPEND_UINT(s->aux_blk, *md_dist); - BLOCK_APPEND_CHAR(s->aux_blk, c); - *md_dist = 0; - } - return 0; - - block_err: - return -1; -} - -/* - * Internal part of cram_decode_slice(). - * Generates the sequence, quality and cigar components. - */ -static int cram_decode_seq(cram_fd *fd, cram_container *c, cram_slice *s, - cram_block *blk, cram_record *cr, sam_hdr_t *sh, - int cf, char *seq, char *qual, - int has_MD, int has_NM) { - int prev_pos = 0, f, r = 0, out_sz = 1; - int seq_pos = 1; - int cig_len = 0; - int64_t ref_pos = cr->apos; - int32_t fn, i32; - enum cigar_op cig_op = BAM_CMATCH; - uint32_t *cigar = s->cigar; - uint32_t ncigar = s->ncigar; - uint32_t cigar_alloc = s->cigar_alloc; - uint32_t nm = 0; - int32_t md_dist = 0; - int orig_aux = 0; - // CRAM < 4.0 decode_md is off/on - // CRAM >= 4.0 decode_md is auto/on (auto=on if MD* present, off otherwise) - int do_md = CRAM_MAJOR_VERS(fd->version) >= 4 - ? (s->decode_md > 0) - : (s->decode_md != 0); - int decode_md = s->ref && cr->ref_id >= 0 && ((do_md && !has_MD) || has_MD < 0); - int decode_nm = s->ref && cr->ref_id >= 0 && ((do_md && !has_NM) || has_NM < 0); - uint32_t ds = s->data_series; - sam_hrecs_t *bfd = sh->hrecs; - - cram_codec **codecs = c->comp_hdr->codecs; - - if ((ds & CRAM_QS) && !(cf & CRAM_FLAG_PRESERVE_QUAL_SCORES)) { - memset(qual, 255, cr->len); - } - - if (cr->cram_flags & CRAM_FLAG_NO_SEQ) - decode_md = decode_nm = 0; - - if (decode_md) { - orig_aux = BLOCK_SIZE(s->aux_blk); - if (has_MD == 0) - BLOCK_APPEND(s->aux_blk, "MDZ", 3); - } - - if (ds & CRAM_FN) { - if (!codecs[DS_FN]) return -1; - r |= codecs[DS_FN]->decode(s,codecs[DS_FN], - blk, (char *)&fn, &out_sz); - if (r) return r; - } else { - fn = 0; - } - - ref_pos--; // count from 0 - cr->cigar = ncigar; - - if (!(ds & (CRAM_FC | CRAM_FP))) - goto skip_cigar; - - if (fn) { - if ((ds & CRAM_FC) && !codecs[DS_FC]) - return -1; - if ((ds & CRAM_FP) && !codecs[DS_FP]) - return -1; - } - - for (f = 0; f < fn; f++) { - int32_t pos = 0; - char op; - - if (ncigar+2 >= cigar_alloc) { - cigar_alloc = cigar_alloc ? cigar_alloc*2 : 1024; - if (!(cigar = realloc(s->cigar, cigar_alloc * sizeof(*cigar)))) - return -1; - s->cigar = cigar; - } - - if (ds & CRAM_FC) { - r |= codecs[DS_FC]->decode(s, - codecs[DS_FC], - blk, - &op, &out_sz); - if (r) return r; - } - - if (!(ds & CRAM_FP)) - continue; - - r |= codecs[DS_FP]->decode(s, - codecs[DS_FP], - blk, - (char *)&pos, &out_sz); - if (r) return r; - pos += prev_pos; - - if (pos <= 0) { - hts_log_error("Feature position %d before start of read", pos); - return -1; - } - - if (pos > seq_pos) { - if (pos > cr->len+1) - return -1; - - if (s->ref && cr->ref_id >= 0) { - if (ref_pos + pos - seq_pos > bfd->ref[cr->ref_id].len) { - static int whinged = 0; - int rlen; - if (!whinged) - hts_log_warning("Ref pos outside of ref sequence boundary"); - whinged = 1; - rlen = bfd->ref[cr->ref_id].len - ref_pos; - // May miss MD/NM cases where both seq/ref are N, but this is a - // malformed cram file anyway. - if (rlen > 0) { - if (ref_pos + rlen > s->ref_end) - goto beyond_slice; - - memcpy(&seq[seq_pos-1], - &s->ref[ref_pos - s->ref_start +1], rlen); - if ((pos - seq_pos) - rlen > 0) - memset(&seq[seq_pos-1+rlen], 'N', - (pos - seq_pos) - rlen); - } else { - memset(&seq[seq_pos-1], 'N', cr->len - seq_pos + 1); - } - if (md_dist >= 0) - md_dist += pos - seq_pos; - } else { - // 'N' in both ref and seq is also mismatch for NM/MD - if (ref_pos + pos-seq_pos > s->ref_end) - goto beyond_slice; - - const char *refp = s->ref + ref_pos - s->ref_start + 1; - const int frag_len = pos - seq_pos; - int do_cpy = 1; - if (decode_md || decode_nm) { - char *N = memchr(refp, 'N', frag_len); - if (N) { - int i; - for (i = 0; i < frag_len; i++) { - char base = refp[i]; - if (base == 'N') { - if (add_md_char(s, decode_md, - 'N', &md_dist) < 0) - return -1; - nm++; - } else { - md_dist++; - } - seq[seq_pos-1+i] = base; - } - do_cpy = 0; - } else { - md_dist += frag_len; - } - } - if (do_cpy) - memcpy(&seq[seq_pos-1], refp, frag_len); - } - } -#ifdef USE_X - if (cig_len && cig_op != BAM_CBASE_MATCH) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - cig_op = BAM_CBASE_MATCH; -#else - if (cig_len && cig_op != BAM_CMATCH) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - cig_op = BAM_CMATCH; -#endif - cig_len += pos - seq_pos; - ref_pos += pos - seq_pos; - seq_pos = pos; - } - - prev_pos = pos; - - if (!(ds & CRAM_FC)) - goto skip_cigar; - - switch(op) { - case 'S': { // soft clip: IN - int32_t out_sz2 = 1; - int have_sc = 0; - - if (cig_len) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - switch (CRAM_MAJOR_VERS(fd->version)) { - case 1: - if (ds & CRAM_IN) { - r |= codecs[DS_IN] - ? codecs[DS_IN]->decode(s, codecs[DS_IN], - blk, - cr->len ? &seq[pos-1] : NULL, - &out_sz2) - : (seq[pos-1] = 'N', out_sz2 = 1, 0); - have_sc = 1; - } - break; - case 2: - default: - if (ds & CRAM_SC) { - r |= codecs[DS_SC] - ? codecs[DS_SC]->decode(s, codecs[DS_SC], - blk, - cr->len ? &seq[pos-1] : NULL, - &out_sz2) - : (seq[pos-1] = 'N', out_sz2 = 1, 0); - have_sc = 1; - } - break; - - //default: - // r |= codecs[DS_BB] - // ? codecs[DS_BB]->decode(s, codecs[DS_BB], - // blk, &seq[pos-1], &out_sz2) - // : (seq[pos-1] = 'N', out_sz2 = 1, 0); - } - if (have_sc) { - if (r) return r; - cigar[ncigar++] = (out_sz2<<4) + BAM_CSOFT_CLIP; - cig_op = BAM_CSOFT_CLIP; - seq_pos += out_sz2; - } - break; - } - - case 'X': { // Substitution; BS - unsigned char base; -#ifdef USE_X - if (cig_len && cig_op != BAM_CBASE_MISMATCH) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - if (ds & CRAM_BS) { - if (!codecs[DS_BS]) return -1; - r |= codecs[DS_BS]->decode(s, codecs[DS_BS], blk, - (char *)&base, &out_sz); - if (pos-1 < cr->len) - seq[pos-1] = 'N'; // FIXME look up BS=base value - } - cig_op = BAM_CBASE_MISMATCH; -#else - int ref_base; - if (cig_len && cig_op != BAM_CMATCH) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - if (ds & CRAM_BS) { - if (!codecs[DS_BS]) return -1; - r |= codecs[DS_BS]->decode(s, codecs[DS_BS], blk, - (char *)&base, &out_sz); - if (r) return -1; - if (cr->ref_id < 0 || ref_pos >= bfd->ref[cr->ref_id].len || !s->ref) { - if (pos-1 < cr->len) - seq[pos-1] = c->comp_hdr-> - substitution_matrix[fd->L1['N']][base]; - if (decode_md || decode_nm) { - if (md_dist >= 0 && decode_md) - BLOCK_APPEND_UINT(s->aux_blk, md_dist); - md_dist = -1; - nm--; - } - } else { - unsigned char ref_call = ref_pos < s->ref_end - ? (uc)s->ref[ref_pos - s->ref_start +1] - : 'N'; - ref_base = fd->L1[ref_call]; - if (pos-1 < cr->len) - seq[pos-1] = c->comp_hdr-> - substitution_matrix[ref_base][base]; - if (add_md_char(s, decode_md, ref_call, &md_dist) < 0) - return -1; - } - } - cig_op = BAM_CMATCH; -#endif - nm++; - cig_len++; - seq_pos++; - ref_pos++; - break; - } - - case 'D': { // Deletion; DL - if (cig_len && cig_op != BAM_CDEL) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - if (ds & CRAM_DL) { - if (!codecs[DS_DL]) return -1; - r |= codecs[DS_DL]->decode(s, codecs[DS_DL], blk, - (char *)&i32, &out_sz); - if (r) return r; - if (decode_md || decode_nm) { - if (ref_pos + i32 > s->ref_end) - goto beyond_slice; - if (md_dist >= 0 && decode_md) - BLOCK_APPEND_UINT(s->aux_blk, md_dist); - if (ref_pos + i32 <= bfd->ref[cr->ref_id].len) { - if (decode_md) { - BLOCK_APPEND_CHAR(s->aux_blk, '^'); - BLOCK_APPEND(s->aux_blk, - &s->ref[ref_pos - s->ref_start +1], - i32); - md_dist = 0; - } - nm += i32; - } else { - uint32_t dlen; - if (bfd->ref[cr->ref_id].len >= ref_pos) { - if (decode_md) { - BLOCK_APPEND_CHAR(s->aux_blk, '^'); - BLOCK_APPEND(s->aux_blk, - &s->ref[ref_pos - s->ref_start+1], - bfd->ref[cr->ref_id].len-ref_pos); - BLOCK_APPEND_UINT(s->aux_blk, 0); - } - dlen = i32 - (bfd->ref[cr->ref_id].len - ref_pos); - nm += i32 - dlen; - } else { - dlen = i32; - } - - md_dist = -1; - } - } - cig_op = BAM_CDEL; - cig_len += i32; - ref_pos += i32; - //printf(" %d: DL = %d (ret %d)\n", f, i32, r); - } - break; - } - - case 'I': { // Insertion (several bases); IN - int32_t out_sz2 = 1; - - if (cig_len && cig_op != BAM_CINS) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - - if (ds & CRAM_IN) { - if (!codecs[DS_IN]) return -1; - r |= codecs[DS_IN]->decode(s, codecs[DS_IN], blk, - cr->len ? &seq[pos-1] : NULL, - &out_sz2); - if (r) return r; - cig_op = BAM_CINS; - cig_len += out_sz2; - seq_pos += out_sz2; - nm += out_sz2; - //printf(" %d: IN(I) = %.*s (ret %d, out_sz %d)\n", f, out_sz2, dat, r, out_sz2); - } - break; - } - - case 'i': { // Insertion (single base); BA - if (cig_len && cig_op != BAM_CINS) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - if (ds & CRAM_BA) { - if (!codecs[DS_BA]) return -1; - r |= codecs[DS_BA]->decode(s, codecs[DS_BA], blk, - cr->len ? &seq[pos-1] : NULL, - &out_sz); - if (r) return r; - } - cig_op = BAM_CINS; - cig_len++; - seq_pos++; - nm++; - break; - } - - case 'b': { // Several bases - int32_t len = 1; - - if (cig_len && cig_op != BAM_CMATCH) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - - if (ds & CRAM_BB) { - if (!codecs[DS_BB]) return -1; - r |= codecs[DS_BB]->decode(s, codecs[DS_BB], blk, - cr->len ? &seq[pos-1] : NULL, - &len); - if (r) return r; - - if (decode_md || decode_nm) { - int x; - if (md_dist >= 0 && decode_md) - BLOCK_APPEND_UINT(s->aux_blk, md_dist); - - for (x = 0; x < len; x++) { - if (x && decode_md) - BLOCK_APPEND_UINT(s->aux_blk, 0); - if (ref_pos+x >= bfd->ref[cr->ref_id].len || !s->ref) { - md_dist = -1; - break; - } else { - if (decode_md) { - if (ref_pos + x > s->ref_end) - goto beyond_slice; - char r = s->ref[ref_pos+x-s->ref_start +1]; - BLOCK_APPEND_CHAR(s->aux_blk, r); - } - } - } - - nm += x; - md_dist = 0; - } - } - - cig_op = BAM_CMATCH; - - cig_len+=len; - seq_pos+=len; - ref_pos+=len; - //prev_pos+=len; - break; - } - - case 'q': { // Several quality values - int32_t len = 1; - - if (cig_len && cig_op != BAM_CMATCH) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - - if (ds & CRAM_QQ) { - if (!codecs[DS_QQ]) return -1; - if ((ds & CRAM_QS) && !(cf & CRAM_FLAG_PRESERVE_QUAL_SCORES) - && (unsigned char)*qual == 255) - memset(qual, 30, cr->len); // ? - r |= codecs[DS_QQ]->decode(s, codecs[DS_QQ], blk, - (char *)&qual[pos-1], &len); - if (r) return r; - } - - cig_op = BAM_CMATCH; - - //prev_pos+=len; - break; - } - - case 'B': { // Read base; BA, QS -#ifdef USE_X - if (cig_len && cig_op != BAM_CBASE_MISMATCH) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } -#else - if (cig_len && cig_op != BAM_CMATCH) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } -#endif - if (ds & CRAM_BA) { - if (!codecs[DS_BA]) return -1; - r |= codecs[DS_BA]->decode(s, codecs[DS_BA], blk, - cr->len ? &seq[pos-1] : NULL, - &out_sz); - - if (decode_md || decode_nm) { - if (md_dist >= 0 && decode_md) - BLOCK_APPEND_UINT(s->aux_blk, md_dist); - if (ref_pos >= bfd->ref[cr->ref_id].len || !s->ref) { - md_dist = -1; - } else { - if (decode_md) { - if (ref_pos > s->ref_end) - goto beyond_slice; - BLOCK_APPEND_CHAR(s->aux_blk, - s->ref[ref_pos-s->ref_start +1]); - } - nm++; - md_dist = 0; - } - } - } - if (ds & CRAM_QS) { - if (!codecs[DS_QS]) return -1; - if (!(cf & CRAM_FLAG_PRESERVE_QUAL_SCORES) - && (unsigned char)*qual == 255) - memset(qual, 30, cr->len); // ASCII ?. Same as htsjdk - r |= codecs[DS_QS]->decode(s, codecs[DS_QS], blk, - (char *)&qual[pos-1], &out_sz); - } -#ifdef USE_X - cig_op = BAM_CBASE_MISMATCH; -#else - cig_op = BAM_CMATCH; -#endif - cig_len++; - seq_pos++; - ref_pos++; - //printf(" %d: BA/QS(B) = %c/%d (ret %d)\n", f, i32, qc, r); - break; - } - - case 'Q': { // Quality score; QS - if (ds & CRAM_QS) { - if (!codecs[DS_QS]) return -1; - if (!(cf & CRAM_FLAG_PRESERVE_QUAL_SCORES) && - (unsigned char)*qual == 255) - memset(qual, 30, cr->len); // ? - r |= codecs[DS_QS]->decode(s, codecs[DS_QS], blk, - (char *)&qual[pos-1], &out_sz); - //printf(" %d: QS = %d (ret %d)\n", f, qc, r); - } - break; - } - - case 'H': { // hard clip; HC - if (cig_len && cig_op != BAM_CHARD_CLIP) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - if (ds & CRAM_HC) { - if (!codecs[DS_HC]) return -1; - r |= codecs[DS_HC]->decode(s, codecs[DS_HC], blk, - (char *)&i32, &out_sz); - if (r) return r; - cig_op = BAM_CHARD_CLIP; - cig_len += i32; - } - break; - } - - case 'P': { // padding; PD - if (cig_len && cig_op != BAM_CPAD) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - if (ds & CRAM_PD) { - if (!codecs[DS_PD]) return -1; - r |= codecs[DS_PD]->decode(s, codecs[DS_PD], blk, - (char *)&i32, &out_sz); - if (r) return r; - cig_op = BAM_CPAD; - cig_len += i32; - } - break; - } - - case 'N': { // Ref skip; RS - if (cig_len && cig_op != BAM_CREF_SKIP) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - if (ds & CRAM_RS) { - if (!codecs[DS_RS]) return -1; - r |= codecs[DS_RS]->decode(s, codecs[DS_RS], blk, - (char *)&i32, &out_sz); - if (r) return r; - cig_op = BAM_CREF_SKIP; - cig_len += i32; - ref_pos += i32; - } - break; - } - - default: - hts_log_error("Unknown feature code '%c'", op); - return -1; - } - } - - if (!(ds & CRAM_FC)) - goto skip_cigar; - - /* An implicit match op for any unaccounted for bases */ - if ((ds & CRAM_FN) && cr->len >= seq_pos) { - if (s->ref && cr->ref_id >= 0) { - if (ref_pos + cr->len - seq_pos + 1 > bfd->ref[cr->ref_id].len) { - static int whinged = 0; - int rlen; - if (!whinged) - hts_log_warning("Ref pos outside of ref sequence boundary"); - whinged = 1; - rlen = bfd->ref[cr->ref_id].len - ref_pos; - // May miss MD/NM cases where both seq/ref are N, but this is a - // malformed cram file anyway. - if (rlen > 0) { - if (seq_pos-1 + rlen < cr->len) - memcpy(&seq[seq_pos-1], - &s->ref[ref_pos - s->ref_start +1], rlen); - if ((cr->len - seq_pos + 1) - rlen > 0) - memset(&seq[seq_pos-1+rlen], 'N', - (cr->len - seq_pos + 1) - rlen); - } else { - if (cr->len - seq_pos + 1 > 0) - memset(&seq[seq_pos-1], 'N', cr->len - seq_pos + 1); - } - if (md_dist >= 0) - md_dist += cr->len - seq_pos + 1; - } else { - if (cr->len - seq_pos + 1 > 0) { - if (ref_pos + cr->len-seq_pos +1 > s->ref_end) - goto beyond_slice; - int remainder = cr->len - (seq_pos-1); - int j = ref_pos - s->ref_start + 1; - if (decode_md || decode_nm) { - int i; - char *N = memchr(&s->ref[j], 'N', remainder); - if (!N) { - // short cut the common case - md_dist += cr->len - (seq_pos-1); - } else { - char *refp = &s->ref[j-(seq_pos-1)]; - md_dist += N-&s->ref[j]; - int i_start = seq_pos-1 + (N - &s->ref[j]); - for (i = i_start; i < cr->len; i++) { - char base = refp[i]; - if (base == 'N') { - if (add_md_char(s, decode_md, 'N', - &md_dist) < 0) - return -1; - nm++; - } else { - md_dist++; - } - } - } - } - memcpy(&seq[seq_pos-1], &s->ref[j], remainder); - } - ref_pos += cr->len - seq_pos + 1; - } - } else if (cr->ref_id >= 0) { - // So alignment end can be computed even when not decoding sequence - ref_pos += cr->len - seq_pos + 1; - } - - if (ncigar+1 >= cigar_alloc) { - cigar_alloc = cigar_alloc ? cigar_alloc*2 : 1024; - if (!(cigar = realloc(s->cigar, cigar_alloc * sizeof(*cigar)))) - return -1; - s->cigar = cigar; - } -#ifdef USE_X - if (cig_len && cig_op != BAM_CBASE_MATCH) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - cig_op = BAM_CBASE_MATCH; -#else - if (cig_len && cig_op != BAM_CMATCH) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - cig_op = BAM_CMATCH; -#endif - cig_len += cr->len - seq_pos+1; - } - - skip_cigar: - - if ((ds & CRAM_FN) && decode_md) { - if (md_dist >= 0) - BLOCK_APPEND_UINT(s->aux_blk, md_dist); - } - - if (cig_len) { - if (ncigar >= cigar_alloc) { - cigar_alloc = cigar_alloc ? cigar_alloc*2 : 1024; - if (!(cigar = realloc(s->cigar, cigar_alloc * sizeof(*cigar)))) - return -1; - s->cigar = cigar; - } - - cigar[ncigar++] = (cig_len<<4) + cig_op; - } - - cr->ncigar = ncigar - cr->cigar; - cr->aend = ref_pos > cr->apos ? ref_pos : cr->apos; - - //printf("2: %.*s %d .. %d\n", cr->name_len, DSTRING_STR(name_ds) + cr->name, cr->apos, ref_pos); - - if (ds & CRAM_MQ) { - if (!codecs[DS_MQ]) return -1; - r |= codecs[DS_MQ]->decode(s, codecs[DS_MQ], blk, - (char *)&cr->mqual, &out_sz); - } else { - cr->mqual = 40; - } - - if ((ds & CRAM_QS) && (cf & CRAM_FLAG_PRESERVE_QUAL_SCORES)) { - int32_t out_sz2 = cr->len; - - if (!codecs[DS_QS]) return -1; - r |= codecs[DS_QS]->decode(s, codecs[DS_QS], blk, - qual, &out_sz2); - } - - s->cigar = cigar; - s->cigar_alloc = cigar_alloc; - s->ncigar = ncigar; - - if (cr->cram_flags & CRAM_FLAG_NO_SEQ) - cr->len = 0; - - if (decode_md) { - BLOCK_APPEND_CHAR(s->aux_blk, '\0'); // null terminate MD:Z: - size_t sz = BLOCK_SIZE(s->aux_blk) - orig_aux; - if (has_MD < 0) { - // has_MD < 0; already have MDZ allocated in aux at -has_MD, - // but wrote MD to end of aux (at orig_aux). - // We need some memmoves to shuffle it around. - char tmp_MD_[1024], *tmp_MD = tmp_MD_; - unsigned char *orig_aux_p = BLOCK_DATA(s->aux_blk) + orig_aux; - if (sz > 1024) { - tmp_MD = malloc(sz); - if (!tmp_MD) - return -1; - } - memcpy(tmp_MD, orig_aux_p, sz); - memmove(&BLOCK_DATA(s->aux_blk)[-has_MD] + sz, - &BLOCK_DATA(s->aux_blk)[-has_MD], - orig_aux_p - &BLOCK_DATA(s->aux_blk)[-has_MD]); - memcpy(&BLOCK_DATA(s->aux_blk)[-has_MD], tmp_MD, sz); - if (tmp_MD != tmp_MD_) - free(tmp_MD); - - if (-has_NM > -has_MD) - // we inserted before NM, so move it up a bit - has_NM -= sz; - } - // else has_MD == 0 and we've already appended MD to the end. - - cr->aux_size += sz; - } - - if (decode_nm) { - if (has_NM == 0) { - char buf[7]; - size_t buf_size; - buf[0] = 'N'; buf[1] = 'M'; - if (nm <= UINT8_MAX) { - buf_size = 4; - buf[2] = 'C'; - buf[3] = (nm>> 0) & 0xff; - } else if (nm <= UINT16_MAX) { - buf_size = 5; - buf[2] = 'S'; - buf[3] = (nm>> 0) & 0xff; - buf[4] = (nm>> 8) & 0xff; - } else { - buf_size = 7; - buf[2] = 'I'; - buf[3] = (nm>> 0) & 0xff; - buf[4] = (nm>> 8) & 0xff; - buf[5] = (nm>>16) & 0xff; - buf[6] = (nm>>24) & 0xff; - } - BLOCK_APPEND(s->aux_blk, buf, buf_size); - cr->aux_size += buf_size; - } else { - // Preallocated space for NM at -has_NM into aux block - unsigned char *buf = BLOCK_DATA(s->aux_blk) + -has_NM; - buf[0] = (nm>> 0) & 0xff; - buf[1] = (nm>> 8) & 0xff; - buf[2] = (nm>>16) & 0xff; - buf[3] = (nm>>24) & 0xff; - } - } - - return r; - - beyond_slice: - // Cramtools can create CRAMs that have sequence features outside the - // stated range of the container & slice reference extents (start + span). - // We have to check for these in many places, but for brevity have the - // error reporting in only one. - hts_log_error("CRAM CIGAR extends beyond slice reference extents"); - return -1; - - block_err: - return -1; -} - -/* - * Quick and simple hash lookup for cram_map arrays - */ -static cram_map *map_find(cram_map **map, unsigned char *key, int id) { - cram_map *m; - - m = map[CRAM_MAP(key[0],key[1])]; - while (m && m->key != id) - m= m->next; - - return m; -} - -//#define map_find(M,K,I) M[CRAM_MAP(K[0],K[1])];while (m && m->key != I);m= m->next - - -static int cram_decode_aux_1_0(cram_container *c, cram_slice *s, - cram_block *blk, cram_record *cr) { - int i, r = 0, out_sz = 1; - unsigned char ntags; - - if (!c->comp_hdr->codecs[DS_TC]) return -1; - r |= c->comp_hdr->codecs[DS_TC]->decode(s, c->comp_hdr->codecs[DS_TC], blk, - (char *)&ntags, &out_sz); - cr->ntags = ntags; - - //printf("TC=%d\n", cr->ntags); - cr->aux_size = 0; - cr->aux = BLOCK_SIZE(s->aux_blk); - - for (i = 0; i < cr->ntags; i++) { - int32_t id, out_sz = 1; - unsigned char tag_data[3]; - cram_map *m; - - //printf("Tag %d/%d\n", i+1, cr->ntags); - if (!c->comp_hdr->codecs[DS_TN]) return -1; - r |= c->comp_hdr->codecs[DS_TN]->decode(s, c->comp_hdr->codecs[DS_TN], - blk, (char *)&id, &out_sz); - if (out_sz == 3) { - // Tag name stored as 3 chars instead of an int? - memcpy(tag_data, &id, 3); - } else { - tag_data[0] = (id>>16) & 0xff; - tag_data[1] = (id>>8) & 0xff; - tag_data[2] = id & 0xff; - } - - m = map_find(c->comp_hdr->tag_encoding_map, tag_data, id); - if (!m) - return -1; - BLOCK_APPEND(s->aux_blk, (char *)tag_data, 3); - - if (!m->codec) return -1; - r |= m->codec->decode(s, m->codec, blk, (char *)s->aux_blk, &out_sz); - - cr->aux_size += out_sz + 3; - } - - return r; - - block_err: - return -1; -} - -// has_MD and has_NM are filled out with 0 for none present, -// 1 for present and verbatim, and -pos for present as placeholder -// (MD*, NM*) to be generated and filled out at offset +pos. -static int cram_decode_aux(cram_fd *fd, - cram_container *c, cram_slice *s, - cram_block *blk, cram_record *cr, - int *has_MD, int *has_NM) { - int i, r = 0, out_sz = 1; - int32_t TL = 0; - unsigned char *TN; - uint32_t ds = s->data_series; - - if (!(ds & (CRAM_TL|CRAM_aux))) { - cr->aux = 0; - cr->aux_size = 0; - return 0; - } - - if (!c->comp_hdr->codecs[DS_TL]) return -1; - r |= c->comp_hdr->codecs[DS_TL]->decode(s, c->comp_hdr->codecs[DS_TL], blk, - (char *)&TL, &out_sz); - if (r || TL < 0 || TL >= c->comp_hdr->nTL) - return -1; - - TN = c->comp_hdr->TL[TL]; - cr->ntags = strlen((char *)TN)/3; // optimise to remove strlen - - //printf("TC=%d\n", cr->ntags); - cr->aux_size = 0; - cr->aux = BLOCK_SIZE(s->aux_blk); - - if (!(ds & CRAM_aux)) - return 0; - - for (i = 0; i < cr->ntags; i++) { - int32_t id, out_sz = 1; - unsigned char tag_data[7]; - cram_map *m; - - if (TN[0] == 'M' && TN[1] == 'D' && has_MD) - *has_MD = (BLOCK_SIZE(s->aux_blk)+3) * (TN[2] == '*' ? -1 : 1); - if (TN[0] == 'N' && TN[1] == 'M' && has_NM) - *has_NM = (BLOCK_SIZE(s->aux_blk)+3) * (TN[2] == '*' ? -1 : 1);; - - //printf("Tag %d/%d\n", i+1, cr->ntags); - tag_data[0] = TN[0]; - tag_data[1] = TN[1]; - tag_data[2] = TN[2]; - id = (tag_data[0]<<16) | (tag_data[1]<<8) | tag_data[2]; - - if (CRAM_MAJOR_VERS(fd->version) >= 4 && TN[2] == '*') { - // Place holder, fill out contents later. - int tag_data_size; - if (TN[0] == 'N' && TN[1] == 'M') { - // Use a fixed size, so we can allocate room for it now. - memcpy(&tag_data[2], "I\0\0\0\0", 5); - tag_data_size = 7; - } else if (TN[0] == 'R' && TN[1] == 'G') { - // RG is variable size, but known already. Insert now - TN += 3; - // Equiv to fd->header->hrecs->rg[cr->rg], but this is the - // new header API equivalent. - const char *rg = sam_hdr_line_name(fd->header, "RG", cr->rg); - if (!rg) - continue; - - size_t rg_len = strlen(rg); - tag_data[2] = 'Z'; - BLOCK_APPEND(s->aux_blk, (char *)tag_data, 3); - BLOCK_APPEND(s->aux_blk, rg, rg_len); - BLOCK_APPEND_CHAR(s->aux_blk, '\0'); - cr->aux_size += 3 + rg_len + 1; - cr->rg = -1; // prevents auto-add later - continue; - } else { - // Unknown size. We'll insert MD into stream later. - tag_data[2] = 'Z'; - tag_data_size = 3; - } - BLOCK_APPEND(s->aux_blk, (char *)tag_data, tag_data_size); - cr->aux_size += tag_data_size; - TN += 3; - } else { - TN += 3; - m = map_find(c->comp_hdr->tag_encoding_map, tag_data, id); - if (!m) - return -1; - - BLOCK_APPEND(s->aux_blk, (char *)tag_data, 3); - - if (!m->codec) return -1; - r |= m->codec->decode(s, m->codec, blk, (char *)s->aux_blk, &out_sz); - if (r) break; - cr->aux_size += out_sz + 3; - - // cF CRAM flags. - if (TN[-3]=='c' && TN[-2]=='F' && TN[-1]=='C' && out_sz == 1) { - // Remove cF tag - uint8_t cF = BLOCK_END(s->aux_blk)[-1]; - BLOCK_SIZE(s->aux_blk) -= out_sz+3; - cr->aux_size -= out_sz+3; - - // bit 1 => don't auto-decode MD. - // Pretend MD is present verbatim, so we don't auto-generate - if ((cF & 1) && has_MD && *has_MD == 0) - *has_MD = 1; - - // bit 1 => don't auto-decode NM - if ((cF & 2) && has_NM && *has_NM == 0) - *has_NM = 1; - } - } - - // We could go to 2^32 fine, but we shouldn't be hitting this anyway, - // and it's protecting against memory hogs too. - if (BLOCK_SIZE(s->aux_blk) > (1u<<31)) { - hts_log_error("CRAM->BAM aux block size overflow"); - goto block_err; - } - } - - return r; - - block_err: - return -1; -} - -/* Resolve mate pair cross-references between recs within this slice */ -static int cram_decode_slice_xref(cram_slice *s, int required_fields) { - int rec; - - if (!(required_fields & (SAM_RNEXT | SAM_PNEXT | SAM_TLEN))) { - for (rec = 0; rec < s->hdr->num_records; rec++) { - cram_record *cr = &s->crecs[rec]; - - cr->tlen = 0; - cr->mate_pos = 0; - cr->mate_ref_id = -1; - } - - return 0; - } - - for (rec = 0; rec < s->hdr->num_records; rec++) { - cram_record *cr = &s->crecs[rec]; - - if (cr->mate_line >= 0) { - if (cr->mate_line < s->hdr->num_records) { - /* - * On the first read, loop through computing lengths. - * It's not perfect as we have one slice per reference so we - * cannot detect when TLEN should be zero due to seqs that - * map to multiple references. - * - * We also cannot set tlen correct when it spans a slice for - * other reasons. This may make tlen too small. Should we - * fix this by forcing TLEN to be stored verbatim in such cases? - * - * Or do we just admit defeat and output 0 for tlen? It's the - * safe option... - */ - if (cr->tlen == INT64_MIN) { - int id1 = rec, id2 = rec; - int64_t aleft = cr->apos, aright = cr->aend; - int64_t tlen; - int ref = cr->ref_id; - - // number of segments starting at the same point. - int left_cnt = 0; - - do { - if (aleft > s->crecs[id2].apos) - aleft = s->crecs[id2].apos, left_cnt = 1; - else if (aleft == s->crecs[id2].apos) - left_cnt++; - if (aright < s->crecs[id2].aend) - aright = s->crecs[id2].aend; - if (s->crecs[id2].mate_line == -1) { - s->crecs[id2].mate_line = rec; - break; - } - if (s->crecs[id2].mate_line <= id2 || - s->crecs[id2].mate_line >= s->hdr->num_records) - return -1; - id2 = s->crecs[id2].mate_line; - - if (s->crecs[id2].ref_id != ref) - ref = -1; - } while (id2 != id1); - - if (ref != -1) { - tlen = aright - aleft + 1; - id1 = id2 = rec; - - /* - * When we have two seqs with identical start and - * end coordinates, set +/- tlen based on 1st/last - * bit flags instead, as a tie breaker. - */ - if (s->crecs[id2].apos == aleft) { - if (left_cnt == 1 || - (s->crecs[id2].flags & BAM_FREAD1)) - s->crecs[id2].tlen = tlen; - else - s->crecs[id2].tlen = -tlen; - } else { - s->crecs[id2].tlen = -tlen; - } - - id2 = s->crecs[id2].mate_line; - while (id2 != id1) { - if (s->crecs[id2].apos == aleft) { - if (left_cnt == 1 || - (s->crecs[id2].flags & BAM_FREAD1)) - s->crecs[id2].tlen = tlen; - else - s->crecs[id2].tlen = -tlen; - } else { - s->crecs[id2].tlen = -tlen; - } - id2 = s->crecs[id2].mate_line; - } - } else { - id1 = id2 = rec; - - s->crecs[id2].tlen = 0; - id2 = s->crecs[id2].mate_line; - while (id2 != id1) { - s->crecs[id2].tlen = 0; - id2 = s->crecs[id2].mate_line; - } - } - } - - cr->mate_pos = s->crecs[cr->mate_line].apos; - cr->mate_ref_id = s->crecs[cr->mate_line].ref_id; - - // paired - cr->flags |= BAM_FPAIRED; - - // set mate unmapped if needed - if (s->crecs[cr->mate_line].flags & BAM_FUNMAP) { - cr->flags |= BAM_FMUNMAP; - cr->tlen = 0; - } - if (cr->flags & BAM_FUNMAP) { - cr->tlen = 0; - } - - // set mate reversed if needed - if (s->crecs[cr->mate_line].flags & BAM_FREVERSE) - cr->flags |= BAM_FMREVERSE; - } else { - hts_log_error("Mate line out of bounds: %d vs [0, %d]", - cr->mate_line, s->hdr->num_records-1); - } - - /* FIXME: construct read names here too if needed */ - } else { - if (cr->mate_flags & CRAM_M_REVERSE) { - cr->flags |= BAM_FPAIRED | BAM_FMREVERSE; - } - if (cr->mate_flags & CRAM_M_UNMAP) { - cr->flags |= BAM_FMUNMAP; - //cr->mate_ref_id = -1; - } - if (!(cr->flags & BAM_FPAIRED)) - cr->mate_ref_id = -1; - } - - if (cr->tlen == INT64_MIN) - cr->tlen = 0; // Just incase - } - - for (rec = 0; rec < s->hdr->num_records; rec++) { - cram_record *cr = &s->crecs[rec]; - if (cr->explicit_tlen != INT64_MIN) - cr->tlen = cr->explicit_tlen; - } - - return 0; -} - -static char *md5_print(unsigned char *md5, char *out) { - int i; - for (i = 0; i < 16; i++) { - out[i*2+0] = "0123456789abcdef"[md5[i]>>4]; - out[i*2+1] = "0123456789abcdef"[md5[i]&15]; - } - out[32] = 0; - - return out; -} - -/* - * Utility function to decode tlen (ISIZE), as it's called - * in multiple places. - * - * Returns codec return value (0 on success). - */ -static int cram_decode_tlen(cram_fd *fd, cram_container *c, cram_slice *s, - cram_block *blk, int64_t *tlen) { - int out_sz = 1, r = 0; - - if (!c->comp_hdr->codecs[DS_TS]) return -1; - if (CRAM_MAJOR_VERS(fd->version) < 4) { - int32_t i32; - r |= c->comp_hdr->codecs[DS_TS] - ->decode(s, c->comp_hdr->codecs[DS_TS], blk, - (char *)&i32, &out_sz); - *tlen = i32; - } else { - r |= c->comp_hdr->codecs[DS_TS] - ->decode(s, c->comp_hdr->codecs[DS_TS], blk, - (char *)tlen, &out_sz); - } - return r; -} - -/* - * Decode an entire slice from container blocks. Fills out s->crecs[] array. - * Returns 0 on success - * -1 on failure - */ -int cram_decode_slice(cram_fd *fd, cram_container *c, cram_slice *s, - sam_hdr_t *sh) { - cram_block *blk = s->block[0]; - int32_t bf, ref_id; - unsigned char cf; - int out_sz, r = 0; - int rec; - char *seq = NULL, *qual = NULL; - int unknown_rg = -1; - int embed_ref; - char **refs = NULL; - uint32_t ds; - sam_hrecs_t *bfd = sh->hrecs; - - if (cram_dependent_data_series(fd, c->comp_hdr, s) != 0) - return -1; - - ds = s->data_series; - - blk->bit = 7; // MSB first - - // Study the blocks and estimate approx sizes to preallocate. - // This looks to speed up decoding by around 8-9%. - // We can always shrink back down at the end if we overestimated. - // However it's likely that this also saves memory as own growth - // factor (*=1.5) is never applied. - { - int qsize, nsize, q_id; - cram_decode_estimate_sizes(c->comp_hdr, s, &qsize, &nsize, &q_id); - //fprintf(stderr, "qsize=%d nsize=%d\n", qsize, nsize); - - if (qsize && (ds & CRAM_RL)) BLOCK_RESIZE_EXACT(s->seqs_blk, qsize+1); - if (qsize && (ds & CRAM_RL)) BLOCK_RESIZE_EXACT(s->qual_blk, qsize+1); - if (nsize && (ds & CRAM_NS)) BLOCK_RESIZE_EXACT(s->name_blk, nsize+1); - - // To do - consider using q_id here to usurp the quality block and - // avoid a memcpy during decode. - // Specifically when quality is an external block uniquely used by - // DS_QS only, then we can set s->qual_blk directly to this - // block and save the codec->decode() calls. (Approx 3% cpu saving) - } - - /* Look for unknown RG, added as last by Java CRAM? */ - if (bfd->nrg > 0 && - bfd->rg[bfd->nrg-1].name != NULL && - !strcmp(bfd->rg[bfd->nrg-1].name, "UNKNOWN")) - unknown_rg = bfd->nrg-1; - - if (blk->content_type != CORE) - return -1; - - if (s->crecs) - free(s->crecs); - if (!(s->crecs = malloc(s->hdr->num_records * sizeof(*s->crecs)))) - return -1; - - ref_id = s->hdr->ref_seq_id; - if (CRAM_MAJOR_VERS(fd->version) < 4) - embed_ref = s->hdr->ref_base_id >= 0 ? 1 : 0; - else - embed_ref = s->hdr->ref_base_id > 0 ? 1 : 0; - - if (ref_id >= 0) { - if (embed_ref) { - cram_block *b; - if (s->hdr->ref_base_id < 0) { - hts_log_error("No reference specified and no embedded reference is available" - " at #%d:%"PRId64"-%"PRId64, ref_id, s->hdr->ref_seq_start, - s->hdr->ref_seq_start + s->hdr->ref_seq_span-1); - return -1; - } - b = cram_get_block_by_id(s, s->hdr->ref_base_id); - if (!b) - return -1; - if (cram_uncompress_block(b) != 0) - return -1; - s->ref = (char *)BLOCK_DATA(b); - s->ref_start = s->hdr->ref_seq_start; - s->ref_end = s->hdr->ref_seq_start + s->hdr->ref_seq_span-1; - if (s->hdr->ref_seq_span > b->uncomp_size) { - hts_log_error("Embedded reference is too small at #%d:%d-%d", - ref_id, s->ref_start, s->ref_end); - return -1; - } - } else if (!c->comp_hdr->no_ref) { - //// Avoid Java cramtools bug by loading entire reference seq - //s->ref = cram_get_ref(fd, s->hdr->ref_seq_id, 1, 0); - //s->ref_start = 1; - - if (fd->required_fields & SAM_SEQ) { - s->ref = - cram_get_ref(fd, s->hdr->ref_seq_id, - s->hdr->ref_seq_start, - s->hdr->ref_seq_start + s->hdr->ref_seq_span -1); - } - s->ref_start = s->hdr->ref_seq_start; - s->ref_end = s->hdr->ref_seq_start + s->hdr->ref_seq_span-1; - - /* Sanity check */ - if (s->ref_start < 0) { - hts_log_warning("Slice starts before base 1" - " at #%d:%"PRId64"-%"PRId64, ref_id, s->hdr->ref_seq_start, - s->hdr->ref_seq_start + s->hdr->ref_seq_span-1); - s->ref_start = 0; - } - pthread_mutex_lock(&fd->ref_lock); - pthread_mutex_lock(&fd->refs->lock); - if ((fd->required_fields & SAM_SEQ) && - ref_id < fd->refs->nref && fd->refs->ref_id && - s->ref_end > fd->refs->ref_id[ref_id]->length) { - s->ref_end = fd->refs->ref_id[ref_id]->length; - } - pthread_mutex_unlock(&fd->refs->lock); - pthread_mutex_unlock(&fd->ref_lock); - } - } - - if ((fd->required_fields & SAM_SEQ) && - s->ref == NULL && s->hdr->ref_seq_id >= 0 && !c->comp_hdr->no_ref) { - hts_log_error("Unable to fetch reference #%d:%"PRId64"-%"PRId64"\n", - ref_id, s->hdr->ref_seq_start, - s->hdr->ref_seq_start + s->hdr->ref_seq_span-1); - return -1; - } - - if (CRAM_MAJOR_VERS(fd->version) != 1 - && (fd->required_fields & SAM_SEQ) - && s->hdr->ref_seq_id >= 0 - && !fd->ignore_md5 - && memcmp(s->hdr->md5, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 16)) { - hts_md5_context *md5; - unsigned char digest[16]; - - if (s->ref && s->hdr->ref_seq_id >= 0) { - int start, len; - - if (s->hdr->ref_seq_start >= s->ref_start) { - start = s->hdr->ref_seq_start - s->ref_start; - } else { - hts_log_warning("Slice starts before base 1 at #%d:%d-%d", - ref_id, s->ref_start, s->ref_end); - start = 0; - } - - if (s->hdr->ref_seq_span <= s->ref_end - s->ref_start + 1) { - len = s->hdr->ref_seq_span; - } else { - hts_log_warning("Slice ends beyond reference end at #%d:%d-%d", - ref_id, s->ref_start, s->ref_end); - len = s->ref_end - s->ref_start + 1; - } - - if (!(md5 = hts_md5_init())) - return -1; - if (start + len > s->ref_end - s->ref_start + 1) - len = s->ref_end - s->ref_start + 1 - start; - if (len >= 0) - hts_md5_update(md5, s->ref + start, len); - hts_md5_final(digest, md5); - hts_md5_destroy(md5); - } else if (!s->ref && s->hdr->ref_base_id >= 0) { - cram_block *b = cram_get_block_by_id(s, s->hdr->ref_base_id); - if (b) { - if (!(md5 = hts_md5_init())) - return -1; - hts_md5_update(md5, b->data, b->uncomp_size); - hts_md5_final(digest, md5); - hts_md5_destroy(md5); - } - } - - if (!c->comp_hdr->no_ref && - ((!s->ref && s->hdr->ref_base_id < 0) - || memcmp(digest, s->hdr->md5, 16) != 0)) { - char M[33]; - const char *rname = sam_hdr_tid2name(sh, ref_id); - if (!rname) rname="?"; // cannot happen normally - hts_log_error("MD5 checksum reference mismatch at %s:%d-%d", - rname, s->ref_start, s->ref_end); - hts_log_error("CRAM : %s", md5_print(s->hdr->md5, M)); - hts_log_error("Ref : %s", md5_print(digest, M)); - kstring_t ks = KS_INITIALIZE; - if (sam_hdr_find_tag_id(sh, "SQ", "SN", rname, "M5", &ks) == 0) - hts_log_error("@SQ M5: %s", ks.s); - hts_log_error("Please check the reference given is correct"); - ks_free(&ks); - return -1; - } - } - - if (ref_id == -2) { - pthread_mutex_lock(&fd->ref_lock); - pthread_mutex_lock(&fd->refs->lock); - refs = calloc(fd->refs->nref, sizeof(char *)); - pthread_mutex_unlock(&fd->refs->lock); - pthread_mutex_unlock(&fd->ref_lock); - if (!refs) - return -1; - } - - int last_ref_id = -9; // Arbitrary -ve marker for not-yet-set - for (rec = 0; rec < s->hdr->num_records; rec++) { - cram_record *cr = &s->crecs[rec]; - int has_MD, has_NM; - - //fprintf(stderr, "Decode seq %d, %d/%d\n", rec, blk->byte, blk->bit); - - cr->s = s; - - out_sz = 1; /* decode 1 item */ - if (ds & CRAM_BF) { - if (!c->comp_hdr->codecs[DS_BF]) goto block_err; - r |= c->comp_hdr->codecs[DS_BF] - ->decode(s, c->comp_hdr->codecs[DS_BF], blk, - (char *)&bf, &out_sz); - if (r || bf < 0 || - bf >= sizeof(fd->bam_flag_swap)/sizeof(*fd->bam_flag_swap)) - goto block_err; - bf = fd->bam_flag_swap[bf]; - cr->flags = bf; - } else { - cr->flags = bf = 0x4; // unmapped - } - - if (ds & CRAM_CF) { - if (CRAM_MAJOR_VERS(fd->version) == 1) { - /* CF is byte in 1.0, int32 in 2.0 */ - if (!c->comp_hdr->codecs[DS_CF]) goto block_err; - r |= c->comp_hdr->codecs[DS_CF] - ->decode(s, c->comp_hdr->codecs[DS_CF], blk, - (char *)&cf, &out_sz); - if (r) goto block_err; - cr->cram_flags = cf; - } else { - if (!c->comp_hdr->codecs[DS_CF]) goto block_err; - r |= c->comp_hdr->codecs[DS_CF] - ->decode(s, c->comp_hdr->codecs[DS_CF], blk, - (char *)&cr->cram_flags, &out_sz); - if (r) goto block_err; - cf = cr->cram_flags; - } - } else { - cf = cr->cram_flags = 0; - } - - if (CRAM_MAJOR_VERS(fd->version) != 1 && ref_id == -2) { - if (ds & CRAM_RI) { - if (!c->comp_hdr->codecs[DS_RI]) goto block_err; - r |= c->comp_hdr->codecs[DS_RI] - ->decode(s, c->comp_hdr->codecs[DS_RI], blk, - (char *)&cr->ref_id, &out_sz); - if (r) goto block_err; - if ((fd->required_fields & (SAM_SEQ|SAM_TLEN)) - && cr->ref_id >= 0 - && cr->ref_id != last_ref_id) { - if (!c->comp_hdr->no_ref) { - // Range(fd): seq >= 0, unmapped -1, unspecified -2 - // Slice(s): seq >= 0, unmapped -1, multiple refs -2 - // Record(cr): seq >= 0, unmapped -1 - pthread_mutex_lock(&fd->range_lock); - int need_ref = (fd->range.refid == -2 || cr->ref_id == fd->range.refid); - pthread_mutex_unlock(&fd->range_lock); - if (need_ref) { - if (!refs[cr->ref_id]) - refs[cr->ref_id] = cram_get_ref(fd, cr->ref_id, 1, 0); - if (!(s->ref = refs[cr->ref_id])) - goto block_err; - } else { - // For multi-ref containers, we don't need to fetch all - // refs if we're only querying one. - s->ref = NULL; - } - - pthread_mutex_lock(&fd->range_lock); - int discard_last_ref = (last_ref_id >= 0 && - refs[last_ref_id] && - (fd->range.refid == -2 || - last_ref_id == fd->range.refid)); - pthread_mutex_unlock(&fd->range_lock); - if (discard_last_ref) { - pthread_mutex_lock(&fd->ref_lock); - discard_last_ref = !fd->unsorted; - pthread_mutex_unlock(&fd->ref_lock); - } - if (discard_last_ref) { - cram_ref_decr(fd->refs, last_ref_id); - refs[last_ref_id] = NULL; - } - } - s->ref_start = 1; - pthread_mutex_lock(&fd->ref_lock); - pthread_mutex_lock(&fd->refs->lock); - s->ref_end = fd->refs->ref_id[cr->ref_id]->length; - pthread_mutex_unlock(&fd->refs->lock); - pthread_mutex_unlock(&fd->ref_lock); - - last_ref_id = cr->ref_id; - } - } else { - cr->ref_id = -1; - } - } else { - cr->ref_id = ref_id; // Forced constant in CRAM 1.0 - } - if (cr->ref_id < -1 || cr->ref_id >= bfd->nref) { - hts_log_error("Requested unknown reference ID %d", cr->ref_id); - goto block_err; - } - - if (ds & CRAM_RL) { - if (!c->comp_hdr->codecs[DS_RL]) goto block_err; - r |= c->comp_hdr->codecs[DS_RL] - ->decode(s, c->comp_hdr->codecs[DS_RL], blk, - (char *)&cr->len, &out_sz); - if (r) goto block_err; - if (cr->len < 0) { - hts_log_error("Read has negative length"); - goto block_err; - } - } - - if (ds & CRAM_AP) { - if (!c->comp_hdr->codecs[DS_AP]) goto block_err; - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - r |= c->comp_hdr->codecs[DS_AP] - ->decode(s, c->comp_hdr->codecs[DS_AP], blk, - (char *)&cr->apos, &out_sz); - } else { - int32_t i32; - r |= c->comp_hdr->codecs[DS_AP] - ->decode(s, c->comp_hdr->codecs[DS_AP], blk, - (char *)&i32, &out_sz); - cr->apos = i32; - } - if (r) goto block_err;; - if (c->comp_hdr->AP_delta) { - if (cr->apos < 0 && c->unsorted == 0) { - // cache locally in c->unsorted so we don't have an - // excessive number of locks - pthread_mutex_lock(&fd->ref_lock); - c->unsorted = fd->unsorted = 1; - pthread_mutex_unlock(&fd->ref_lock); - } - cr->apos += s->last_apos; - } - s->last_apos= cr->apos; - } else { - cr->apos = c->ref_seq_start; - } - - if (ds & CRAM_RG) { - if (!c->comp_hdr->codecs[DS_RG]) goto block_err; - r |= c->comp_hdr->codecs[DS_RG] - ->decode(s, c->comp_hdr->codecs[DS_RG], blk, - (char *)&cr->rg, &out_sz); - if (r) goto block_err; - if (cr->rg == unknown_rg) - cr->rg = -1; - } else { - cr->rg = -1; - } - - cr->name_len = 0; - - if (c->comp_hdr->read_names_included) { - int32_t out_sz2 = 1; - - // Read directly into name cram_block - cr->name = BLOCK_SIZE(s->name_blk); - if (ds & CRAM_RN) { - if (!c->comp_hdr->codecs[DS_RN]) goto block_err; - r |= c->comp_hdr->codecs[DS_RN] - ->decode(s, c->comp_hdr->codecs[DS_RN], blk, - (char *)s->name_blk, &out_sz2); - if (r) goto block_err; - cr->name_len = out_sz2; - } - } - - cr->mate_pos = 0; - cr->mate_line = -1; - cr->mate_ref_id = -1; - cr->explicit_tlen = INT64_MIN; - if ((ds & CRAM_CF) && (cf & CRAM_FLAG_DETACHED)) { - if (ds & CRAM_MF) { - if (CRAM_MAJOR_VERS(fd->version) == 1) { - /* MF is byte in 1.0, int32 in 2.0 */ - unsigned char mf; - if (!c->comp_hdr->codecs[DS_MF]) goto block_err; - r |= c->comp_hdr->codecs[DS_MF] - ->decode(s, c->comp_hdr->codecs[DS_MF], - blk, (char *)&mf, &out_sz); - if (r) goto block_err; - cr->mate_flags = mf; - } else { - if (!c->comp_hdr->codecs[DS_MF]) goto block_err; - r |= c->comp_hdr->codecs[DS_MF] - ->decode(s, c->comp_hdr->codecs[DS_MF], - blk, - (char *)&cr->mate_flags, - &out_sz); - if (r) goto block_err; - } - } else { - cr->mate_flags = 0; - } - - if (!c->comp_hdr->read_names_included) { - int32_t out_sz2 = 1; - - // Read directly into name cram_block - cr->name = BLOCK_SIZE(s->name_blk); - if (ds & CRAM_RN) { - if (!c->comp_hdr->codecs[DS_RN]) goto block_err; - r |= c->comp_hdr->codecs[DS_RN] - ->decode(s, c->comp_hdr->codecs[DS_RN], - blk, (char *)s->name_blk, - &out_sz2); - if (r) goto block_err; - cr->name_len = out_sz2; - } - } - - if (ds & CRAM_NS) { - if (!c->comp_hdr->codecs[DS_NS]) goto block_err; - r |= c->comp_hdr->codecs[DS_NS] - ->decode(s, c->comp_hdr->codecs[DS_NS], blk, - (char *)&cr->mate_ref_id, &out_sz); - if (r) goto block_err; - } - - // Skip as mate_ref of "*" is legit. It doesn't mean unmapped, just unknown. - // if (cr->mate_ref_id == -1 && cr->flags & 0x01) { - // /* Paired, but unmapped */ - // cr->flags |= BAM_FMUNMAP; - // } - - if (ds & CRAM_NP) { - if (!c->comp_hdr->codecs[DS_NP]) goto block_err;; - if (CRAM_MAJOR_VERS(fd->version) < 4) { - int32_t i32; - r |= c->comp_hdr->codecs[DS_NP] - ->decode(s, c->comp_hdr->codecs[DS_NP], blk, - (char *)&i32, &out_sz); - cr->mate_pos = i32; - } else { - r |= c->comp_hdr->codecs[DS_NP] - ->decode(s, c->comp_hdr->codecs[DS_NP], blk, - (char *)&cr->mate_pos, &out_sz); - } - if (r) goto block_err; - } - - if (ds & CRAM_TS) { - if (!c->comp_hdr->codecs[DS_TS]) goto block_err; - r = cram_decode_tlen(fd, c, s, blk, &cr->tlen); - if (r) goto block_err; - } else { - cr->tlen = INT64_MIN; - } - } else if ((ds & CRAM_CF) && (cf & CRAM_FLAG_MATE_DOWNSTREAM)) { - // else not detached - if (ds & CRAM_NF) { - if (!c->comp_hdr->codecs[DS_NF]) goto block_err; - r |= c->comp_hdr->codecs[DS_NF] - ->decode(s, c->comp_hdr->codecs[DS_NF], blk, - (char *)&cr->mate_line, &out_sz); - if (r) goto block_err; - cr->mate_line += rec + 1; - - //cr->name_len = sprintf(name, "%d", name_id++); - //cr->name = DSTRING_LEN(name_ds); - //dstring_nappend(name_ds, name, cr->name_len); - - cr->mate_ref_id = -1; - cr->tlen = INT64_MIN; - cr->mate_pos = 0; - } else { - cr->mate_flags = 0; - cr->tlen = INT64_MIN; - } - if ((ds & CRAM_CF) && (cf & CRAM_FLAG_EXPLICIT_TLEN)) { - if (ds & CRAM_TS) { - r = cram_decode_tlen(fd, c, s, blk, &cr->explicit_tlen); - if (r) return r; - } else { - cr->mate_flags = 0; - cr->tlen = INT64_MIN; - } - } - } else if ((ds & CRAM_CF) && (cf & CRAM_FLAG_EXPLICIT_TLEN)) { - if (ds & CRAM_TS) { - r = cram_decode_tlen(fd, c, s, blk, &cr->explicit_tlen); - if (r) return r; - } else { - cr->mate_flags = 0; - cr->tlen = INT64_MIN; - } - } else { - cr->mate_flags = 0; - cr->tlen = INT64_MIN; - } - /* - else if (!name[0]) { - //name[0] = '?'; name[1] = 0; - //cr->name_len = 1; - //cr->name= DSTRING_LEN(s->name_ds); - //dstring_nappend(s->name_ds, "?", 1); - - cr->mate_ref_id = -1; - cr->tlen = 0; - cr->mate_pos = 0; - } - */ - - /* Auxiliary tags */ - has_MD = has_NM = 0; - if (CRAM_MAJOR_VERS(fd->version) == 1) - r |= cram_decode_aux_1_0(c, s, blk, cr); - else - r |= cram_decode_aux(fd, c, s, blk, cr, &has_MD, &has_NM); - if (r) goto block_err; - - /* Fake up dynamic string growth and appending */ - if (ds & CRAM_RL) { - cr->seq = BLOCK_SIZE(s->seqs_blk); - BLOCK_GROW(s->seqs_blk, cr->len); - seq = (char *)BLOCK_END(s->seqs_blk); - BLOCK_SIZE(s->seqs_blk) += cr->len; - - if (!seq) - goto block_err; - - cr->qual = BLOCK_SIZE(s->qual_blk); - BLOCK_GROW(s->qual_blk, cr->len); - qual = (char *)BLOCK_END(s->qual_blk); - BLOCK_SIZE(s->qual_blk) += cr->len; - - if (!s->ref) - memset(seq, '=', cr->len); - } - - if (!(bf & BAM_FUNMAP)) { - if ((ds & CRAM_AP) && cr->apos <= 0) { - hts_log_error("Read has alignment position %"PRId64 - " but no unmapped flag", - cr->apos); - goto block_err; - } - /* Decode sequence and generate CIGAR */ - if (ds & (CRAM_SEQ | CRAM_MQ)) { - r |= cram_decode_seq(fd, c, s, blk, cr, sh, cf, seq, qual, - has_MD, has_NM); - if (r) goto block_err; - } else { - cr->cigar = 0; - cr->ncigar = 0; - cr->aend = cr->apos; - cr->mqual = 0; - } - } else { - int out_sz2 = cr->len; - - //puts("Unmapped"); - cr->cigar = 0; - cr->ncigar = 0; - cr->aend = cr->apos; - cr->mqual = 0; - - if (ds & CRAM_BA && cr->len) { - if (!c->comp_hdr->codecs[DS_BA]) goto block_err; - r |= c->comp_hdr->codecs[DS_BA] - ->decode(s, c->comp_hdr->codecs[DS_BA], blk, - (char *)seq, &out_sz2); - if (r) goto block_err; - } - - if ((ds & CRAM_CF) && (cf & CRAM_FLAG_PRESERVE_QUAL_SCORES)) { - out_sz2 = cr->len; - if (ds & CRAM_QS && cr->len >= 0) { - if (!c->comp_hdr->codecs[DS_QS]) goto block_err; - r |= c->comp_hdr->codecs[DS_QS] - ->decode(s, c->comp_hdr->codecs[DS_QS], - blk, qual, &out_sz2); - if (r) goto block_err; - } - } else { - if (ds & CRAM_RL) - memset(qual, 255, cr->len); - } - } - - if (!c->comp_hdr->qs_seq_orient && (ds & CRAM_QS) && (cr->flags & BAM_FREVERSE)) { - int i, j; - for (i = 0, j = cr->len-1; i < j; i++, j--) { - unsigned char c; - c = qual[i]; - qual[i] = qual[j]; - qual[j] = c; - } - } - } - - pthread_mutex_lock(&fd->ref_lock); - if (refs) { - int i; - for (i = 0; i < fd->refs->nref; i++) { - if (refs[i]) - cram_ref_decr(fd->refs, i); - } - free(refs); - refs = NULL; - } else if (ref_id >= 0 && s->ref != fd->ref_free && !embed_ref) { - cram_ref_decr(fd->refs, ref_id); - } - pthread_mutex_unlock(&fd->ref_lock); - - /* Resolve mate pair cross-references between recs within this slice */ - r |= cram_decode_slice_xref(s, fd->required_fields); - - // Free the original blocks as we no longer need these. - { - int i; - for (i = 0; i < s->hdr->num_blocks; i++) { - cram_block *b = s->block[i]; - cram_free_block(b); - s->block[i] = NULL; - } - } - - // Also see initial BLOCK_RESIZE_EXACT at top of function. - // As we grow blocks we overallocate by up to 50%. So shrink - // back to their final sizes here. - // - //fprintf(stderr, "%d %d // %d %d // %d %d // %d %d\n", - // (int)s->seqs_blk->byte, (int)s->seqs_blk->alloc, - // (int)s->qual_blk->byte, (int)s->qual_blk->alloc, - // (int)s->name_blk->byte, (int)s->name_blk->alloc, - // (int)s->aux_blk->byte, (int)s->aux_blk->alloc); - BLOCK_RESIZE_EXACT(s->seqs_blk, BLOCK_SIZE(s->seqs_blk)+1); - BLOCK_RESIZE_EXACT(s->qual_blk, BLOCK_SIZE(s->qual_blk)+1); - BLOCK_RESIZE_EXACT(s->name_blk, BLOCK_SIZE(s->name_blk)+1); - BLOCK_RESIZE_EXACT(s->aux_blk, BLOCK_SIZE(s->aux_blk)+1); - - return r; - - block_err: - if (refs) { - int i; - pthread_mutex_lock(&fd->ref_lock); - for (i = 0; i < fd->refs->nref; i++) { - if (refs[i]) - cram_ref_decr(fd->refs, i); - } - free(refs); - pthread_mutex_unlock(&fd->ref_lock); - } - - return -1; -} - -typedef struct { - cram_fd *fd; - cram_container *c; - cram_slice *s; - sam_hdr_t *h; - int exit_code; -} cram_decode_job; - -void *cram_decode_slice_thread(void *arg) { - cram_decode_job *j = (cram_decode_job *)arg; - - j->exit_code = cram_decode_slice(j->fd, j->c, j->s, j->h); - - return j; -} - -/* - * Spawn a multi-threaded version of cram_decode_slice(). - */ -int cram_decode_slice_mt(cram_fd *fd, cram_container *c, cram_slice *s, - sam_hdr_t *bfd) { - cram_decode_job *j; - int nonblock; - - if (!fd->pool) - return cram_decode_slice(fd, c, s, bfd); - - if (!(j = malloc(sizeof(*j)))) - return -1; - - j->fd = fd; - j->c = c; - j->s = s; - j->h = bfd; - - nonblock = hts_tpool_process_sz(fd->rqueue) ? 1 : 0; - - int saved_errno = errno; - errno = 0; - if (-1 == hts_tpool_dispatch2(fd->pool, fd->rqueue, cram_decode_slice_thread, - j, nonblock)) { - /* Would block */ - if (errno != EAGAIN) - return -1; - fd->job_pending = j; - } else { - fd->job_pending = NULL; - } - errno = saved_errno; - - // flush too - return 0; -} - - -/* ---------------------------------------------------------------------- - * CRAM sequence iterators. - */ - -/* - * Converts a cram in-memory record into a bam in-memory record. We - * pass a pointer to a bam_seq_t pointer along with the a pointer to - * the allocated size. These can initially be pointers to NULL and zero. - * - * This function will reallocate the bam buffer as required and update - * (*bam)->alloc accordingly, allowing it to be used within a loop - * efficiently without needing to allocate new bam objects over and - * over again. - * - * Returns the used size of the bam record on success - * -1 on failure. - */ -static int cram_to_bam(sam_hdr_t *sh, cram_fd *fd, cram_slice *s, - cram_record *cr, int rec, bam_seq_t **bam) { - int ret, rg_len; - char name_a[1024], *name; - int name_len; - char *aux; - char *seq, *qual; - sam_hrecs_t *bfd = sh->hrecs; - - /* Assign names if not explicitly set */ - if (fd->required_fields & SAM_QNAME) { - if (cr->name_len) { - name = (char *)BLOCK_DATA(s->name_blk) + cr->name; - name_len = cr->name_len; - } else { - name = name_a; - if (cr->mate_line >= 0 && cr->mate_line < s->max_rec && - s->crecs[cr->mate_line].name_len > 0) { - // Copy our mate if non-zero. - memcpy(name_a, BLOCK_DATA(s->name_blk)+s->crecs[cr->mate_line].name, - s->crecs[cr->mate_line].name_len); - name = name_a + s->crecs[cr->mate_line].name_len; - } else { - // Otherwise generate a name based on prefix - name_len = strlen(fd->prefix); - memcpy(name, fd->prefix, name_len); - name += name_len; - *name++ = ':'; - if (cr->mate_line >= 0 && cr->mate_line < rec) { - name = (char *)append_uint64((unsigned char *)name, - s->hdr->record_counter + - cr->mate_line + 1); - } else { - name = (char *)append_uint64((unsigned char *)name, - s->hdr->record_counter + - rec + 1); - } - } - name_len = name - name_a; - name = name_a; - } - } else { - name = "?"; - name_len = 1; - } - - /* Generate BAM record */ - if (cr->rg < -1 || cr->rg >= bfd->nrg) - return -1; - rg_len = (cr->rg != -1) ? bfd->rg[cr->rg].name_len + 4 : 0; - - if (fd->required_fields & (SAM_SEQ | SAM_QUAL)) { - if (!BLOCK_DATA(s->seqs_blk)) - return -1; - seq = (char *)BLOCK_DATA(s->seqs_blk) + cr->seq; - } else { - seq = "*"; - cr->len = 0; - } - - if (fd->required_fields & SAM_QUAL) { - if (!BLOCK_DATA(s->qual_blk)) - return -1; - qual = (char *)BLOCK_DATA(s->qual_blk) + cr->qual; - } else { - qual = NULL; - } - - ret = bam_set1(*bam, - name_len, name, - cr->flags, cr->ref_id, cr->apos - 1, cr->mqual, - cr->ncigar, &s->cigar[cr->cigar], - cr->mate_ref_id, cr->mate_pos - 1, cr->tlen, - cr->len, seq, qual, - cr->aux_size + rg_len); - if (ret < 0) { - return ret; - } - - aux = (char *)bam_aux(*bam); - - /* Auxiliary strings */ - if (cr->aux_size != 0) { - memcpy(aux, BLOCK_DATA(s->aux_blk) + cr->aux, cr->aux_size); - aux += cr->aux_size; - (*bam)->l_data += cr->aux_size; - } - - /* RG:Z: */ - if (rg_len > 0) { - *aux++ = 'R'; *aux++ = 'G'; *aux++ = 'Z'; - int len = bfd->rg[cr->rg].name_len; - memcpy(aux, bfd->rg[cr->rg].name, len); - aux += len; - *aux++ = 0; - (*bam)->l_data += rg_len; - } - - return (*bam)->l_data; -} - -/* - * Here be dragons! The multi-threading code in this is crufty beyond belief. - */ - -/* - * Load first container. - * Called when fd->ctr is NULL> - * - * Returns container on success - * NULL on failure. - */ -static cram_container *cram_first_slice(cram_fd *fd) { - cram_container *c; - - do { - if (fd->ctr) - cram_free_container(fd->ctr); - - if (!(c = fd->ctr = cram_read_container(fd))) - return NULL; - c->curr_slice_mt = c->curr_slice; - } while (c->length == 0); - - /* - * The first container may be a result of a sub-range query. - * In which case it may still not be the optimal starting point - * due to skipped containers/slices in the index. - */ - // No need for locks here as we're in the main thread. - if (fd->range.refid != -2) { - while (c->ref_seq_id != -2 && - (c->ref_seq_id < fd->range.refid || - (fd->range.refid >= 0 && c->ref_seq_id == fd->range.refid - && c->ref_seq_start + c->ref_seq_span-1 < fd->range.start))) { - if (0 != cram_seek(fd, c->length, SEEK_CUR)) - return NULL; - cram_free_container(fd->ctr); - do { - if (!(c = fd->ctr = cram_read_container(fd))) - return NULL; - } while (c->length == 0); - } - - if (c->ref_seq_id != -2 && c->ref_seq_id != fd->range.refid) { - fd->eof = 1; - return NULL; - } - } - - if (!(c->comp_hdr_block = cram_read_block(fd))) - return NULL; - if (c->comp_hdr_block->content_type != COMPRESSION_HEADER) - return NULL; - - c->comp_hdr = cram_decode_compression_header(fd, c->comp_hdr_block); - if (!c->comp_hdr) - return NULL; - if (!c->comp_hdr->AP_delta && - sam_hrecs_sort_order(fd->header->hrecs) != ORDER_COORD) { - pthread_mutex_lock(&fd->ref_lock); - fd->unsorted = 1; - pthread_mutex_unlock(&fd->ref_lock); - } - - return c; -} - -static cram_slice *cram_next_slice(cram_fd *fd, cram_container **cp) { - cram_container *c_curr; // container being consumed via cram_get_seq() - cram_slice *s_curr = NULL; - - // Populate the first container if unknown. - if (!(c_curr = fd->ctr)) { - if (!(c_curr = cram_first_slice(fd))) - return NULL; - } - - // Discard previous slice - if ((s_curr = c_curr->slice)) { - c_curr->slice = NULL; - cram_free_slice(s_curr); - s_curr = NULL; - } - - // If we've consumed all slices in this container, also discard - // the container too. - if (c_curr->curr_slice == c_curr->max_slice) { - if (fd->ctr == c_curr) - fd->ctr = NULL; - if (fd->ctr_mt == c_curr) - fd->ctr_mt = NULL; - cram_free_container(c_curr); - c_curr = NULL; - } - - if (!fd->ctr_mt) - fd->ctr_mt = c_curr; - - // Fetch the next slice (and the container if necessary). - // - // If single threaded this loop bails out as soon as it finds - // a slice in range. In this case c_next and c_curr end up being - // the same thing. - // - // If multi-threaded, we loop until we have filled out - // thread pool input queue. Here c_next and c_curr *may* differ, as - // can fd->ctr and fd->ctr_mt. - for (;;) { - cram_container *c_next = fd->ctr_mt; - cram_slice *s_next = NULL; - - // Next slice; either from the last job we failed to push - // to the input queue or via more I/O. - if (fd->job_pending) { - cram_decode_job *j = (cram_decode_job *)fd->job_pending; - c_next = j->c; - s_next = j->s; - free(fd->job_pending); - fd->job_pending = NULL; - } else if (!fd->ooc) { - empty_container: - if (!c_next || c_next->curr_slice_mt == c_next->max_slice) { - // new container - for(;;) { - if (!(c_next = cram_read_container(fd))) { - if (fd->pool) { - fd->ooc = 1; - break; - } - - return NULL; - } - c_next->curr_slice_mt = c_next->curr_slice; - - if (c_next->length != 0) - break; - - cram_free_container(c_next); - } - if (fd->ooc) - break; - - /* Skip containers not yet spanning our range */ - if (fd->range.refid != -2 && c_next->ref_seq_id != -2) { - // ref_id beyond end of range; bail out - if (c_next->ref_seq_id != fd->range.refid) { - cram_free_container(c_next); - fd->ctr_mt = NULL; - fd->ooc = 1; - break; - } - - // position beyond end of range; bail out - if (fd->range.refid != -1 && - c_next->ref_seq_start > fd->range.end) { - cram_free_container(c_next); - fd->ctr_mt = NULL; - fd->ooc = 1; - break; - } - - // before start of range; skip to next container - if (fd->range.refid != -1 && - c_next->ref_seq_start + c_next->ref_seq_span-1 < - fd->range.start) { - c_next->curr_slice_mt = c_next->max_slice; - cram_seek(fd, c_next->length, SEEK_CUR); - cram_free_container(c_next); - c_next = NULL; - continue; - } - } - - // Container is valid range, so remember it for restarting - // this function. - fd->ctr_mt = c_next; - - if (!(c_next->comp_hdr_block = cram_read_block(fd))) - return NULL; - if (c_next->comp_hdr_block->content_type != COMPRESSION_HEADER) - return NULL; - - c_next->comp_hdr = - cram_decode_compression_header(fd, c_next->comp_hdr_block); - if (!c_next->comp_hdr) - return NULL; - - if (!c_next->comp_hdr->AP_delta && - sam_hrecs_sort_order(fd->header->hrecs) != ORDER_COORD) { - pthread_mutex_lock(&fd->ref_lock); - fd->unsorted = 1; - pthread_mutex_unlock(&fd->ref_lock); - } - } - - if (c_next->num_records == 0) { - if (fd->ctr == c_next) - fd->ctr = NULL; - if (c_curr == c_next) - c_curr = NULL; - if (fd->ctr_mt == c_next) - fd->ctr_mt = NULL; - cram_free_container(c_next); - c_next = NULL; - goto empty_container; - } - - if (!(s_next = c_next->slice = cram_read_slice(fd))) - return NULL; - - s_next->slice_num = ++c_next->curr_slice_mt; - s_next->curr_rec = 0; - s_next->max_rec = s_next->hdr->num_records; - - s_next->last_apos = s_next->hdr->ref_seq_start; - - // We know the container overlaps our range, but with multi-slice - // containers we may have slices that do not. Skip these also. - if (fd->range.refid != -2 && s_next->hdr->ref_seq_id != -2) { - // ref_id beyond end of range; bail out - if (s_next->hdr->ref_seq_id != fd->range.refid) { - fd->ooc = 1; - cram_free_slice(s_next); - c_next->slice = s_next = NULL; - break; - } - - // position beyond end of range; bail out - if (fd->range.refid != -1 && - s_next->hdr->ref_seq_start > fd->range.end) { - fd->ooc = 1; - cram_free_slice(s_next); - c_next->slice = s_next = NULL; - break; - } - - // before start of range; skip to next slice - if (fd->range.refid != -1 && - s_next->hdr->ref_seq_start + s_next->hdr->ref_seq_span-1 < - fd->range.start) { - cram_free_slice(s_next); - c_next->slice = s_next = NULL; - continue; - } - } - } // end: if (!fd->ooc) - - if (!c_next || !s_next) - break; - - // Decode the slice, either right now (non-threaded) or by pushing - // it to the a decode queue (threaded). - if (cram_decode_slice_mt(fd, c_next, s_next, fd->header) != 0) { - hts_log_error("Failure to decode slice"); - cram_free_slice(s_next); - c_next->slice = NULL; - return NULL; - } - - // No thread pool, so don't loop again - if (!fd->pool) { - c_curr = c_next; - s_curr = s_next; - break; - } - - // With thread pool, but we have a job pending so our decode queue - // is full. - if (fd->job_pending) - break; - - // Otherwise we're threaded with room in the decode input queue, so - // keep reading slices for decode. - // Push it a bit far, to qsize in queue rather than pending arrival, - // as cram tends to be a bit bursty in decode timings. - if (hts_tpool_process_len(fd->rqueue) > - hts_tpool_process_qsize(fd->rqueue)) - break; - } // end of for(;;) - - - // When not threaded we've already have c_curr and s_curr. - // Otherwise we need get them by pulling off the decode output queue. - if (fd->pool) { - hts_tpool_result *res; - cram_decode_job *j; - - if (fd->ooc && hts_tpool_process_empty(fd->rqueue)) { - fd->eof = 1; - return NULL; - } - - res = hts_tpool_next_result_wait(fd->rqueue); - - if (!res || !hts_tpool_result_data(res)) { - hts_log_error("Call to hts_tpool_next_result failed"); - return NULL; - } - - j = (cram_decode_job *)hts_tpool_result_data(res); - c_curr = j->c; - s_curr = j->s; - - if (j->exit_code != 0) { - hts_log_error("Slice decode failure"); - fd->eof = 0; - hts_tpool_delete_result(res, 1); - return NULL; - } - - hts_tpool_delete_result(res, 1); - } - - *cp = c_curr; - - // Update current slice being processed (as opposed to current - // slice in the multi-threaded reahead. - fd->ctr = c_curr; - if (c_curr) { - c_curr->slice = s_curr; - if (s_curr) - c_curr->curr_slice = s_curr->slice_num; - } - if (s_curr) - s_curr->curr_rec = 0; - else - fd->eof = 1; - - return s_curr; -} - -/* - * Read the next cram record and return it. - * Note that to decode cram_record the caller will need to look up some data - * in the current slice, pointed to by fd->ctr->slice. This is valid until - * the next call to cram_get_seq (which may invalidate it). - * - * Returns record pointer on success (do not free) - * NULL on failure - */ -cram_record *cram_get_seq(cram_fd *fd) { - cram_container *c; - cram_slice *s; - - for (;;) { - c = fd->ctr; - if (c && c->slice && c->slice->curr_rec < c->slice->max_rec) { - s = c->slice; - } else { - if (!(s = cram_next_slice(fd, &c))) - return NULL; - continue; /* In case slice contains no records */ - } - - // No need to lock here as get_seq is running in the main thread, - // which is also the same one that does the range modifications. - if (fd->range.refid != -2) { - if (fd->range.refid == -1 && s->crecs[s->curr_rec].ref_id != -1) { - // Special case when looking for unmapped blocks at end. - // If these are mixed in with mapped data (c->ref_id == -2) - // then we need skip until we find the unmapped data, if at all - s->curr_rec++; - continue; - } - if (s->crecs[s->curr_rec].ref_id < fd->range.refid && - s->crecs[s->curr_rec].ref_id != -1) { - // Looking for a mapped read, but not there yet. Special case - // as -1 (unmapped) shouldn't be considered < refid. - s->curr_rec++; - continue; - } - - if (s->crecs[s->curr_rec].ref_id != fd->range.refid) { - fd->eof = 1; - cram_free_slice(s); - c->slice = NULL; - return NULL; - } - - if (fd->range.refid != -1 && s->crecs[s->curr_rec].apos > fd->range.end) { - fd->eof = 1; - cram_free_slice(s); - c->slice = NULL; - return NULL; - } - - if (fd->range.refid != -1 && s->crecs[s->curr_rec].aend < fd->range.start) { - s->curr_rec++; - continue; - } - } - - break; - } - - fd->ctr = c; - c->slice = s; - return &s->crecs[s->curr_rec++]; -} - -/* - * Read the next cram record and convert it to a bam_seq_t struct. - * - * Returns >= 0 success (number of bytes written to *bam) - * -1 on EOF or failure (check fd->err) - */ -int cram_get_bam_seq(cram_fd *fd, bam_seq_t **bam) { - cram_record *cr; - cram_container *c; - cram_slice *s; - - if (!(cr = cram_get_seq(fd))) - return -1; - - c = fd->ctr; - s = c->slice; - - return cram_to_bam(fd->header, fd, s, cr, s->curr_rec-1, bam); -} - -/* - * Drains and frees the decode read-queue for a multi-threaded reader. - */ -void cram_drain_rqueue(cram_fd *fd) { - cram_container *lc = NULL; - - if (!fd->pool || !fd->rqueue) - return; - - // drain queue of any in-flight decode jobs - while (!hts_tpool_process_empty(fd->rqueue)) { - hts_tpool_result *r = hts_tpool_next_result_wait(fd->rqueue); - if (!r) - break; - cram_decode_job *j = (cram_decode_job *)hts_tpool_result_data(r); - if (j->c->slice == j->s) - j->c->slice = NULL; - if (j->c != lc) { - if (lc) { - if (fd->ctr == lc) - fd->ctr = NULL; - if (fd->ctr_mt == lc) - fd->ctr_mt = NULL; - cram_free_container(lc); - } - lc = j->c; - } - cram_free_slice(j->s); - hts_tpool_delete_result(r, 1); - } - - // Also tidy up any pending decode job that we didn't submit to the workers - // due to the input queue being full. - if (fd->job_pending) { - cram_decode_job *j = (cram_decode_job *)fd->job_pending; - if (j->c->slice == j->s) - j->c->slice = NULL; - if (j->c != lc) { - if (lc) { - if (fd->ctr == lc) - fd->ctr = NULL; - if (fd->ctr_mt == lc) - fd->ctr_mt = NULL; - cram_free_container(lc); - } - lc = j->c; - } - cram_free_slice(j->s); - free(j); - fd->job_pending = NULL; - } - - if (lc) { - if (fd->ctr == lc) - fd->ctr = NULL; - if (fd->ctr_mt == lc) - fd->ctr_mt = NULL; - cram_free_container(lc); - } -} diff --git a/src/htslib-1.18/cram/cram_decode.h b/src/htslib-1.18/cram/cram_decode.h deleted file mode 100644 index 400eb6b..0000000 --- a/src/htslib-1.18/cram/cram_decode.h +++ /dev/null @@ -1,117 +0,0 @@ -/* -Copyright (c) 2012-2013, 2018 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/*! \file - * Include cram.h instead. - * - * This is an internal part of the CRAM system and is automatically included - * when you #include cram.h. - * - * Implements the decoding portion of CRAM I/O. Also see - * cram_codecs.[ch] for the actual encoding functions themselves. - */ - -#ifndef CRAM_DECODE_H -#define CRAM_DECODE_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* ---------------------------------------------------------------------- - * CRAM sequence iterators. - */ - -/*! Read the next cram record and return it as a cram_record. - * - * Note that to decode cram_record the caller will need to look up some data - * in the current slice, pointed to by fd->ctr->slice. This is valid until - * the next call to cram_get_seq (which may invalidate it). - * - * @return - * Returns record pointer on success (do not free); - * NULL on failure - */ -cram_record *cram_get_seq(cram_fd *fd); - -/*! Read the next cram record and convert it to a bam_seq_t struct. - * - * @return - * Returns 0 on success; - * -1 on EOF or failure (check fd->err) - */ -int cram_get_bam_seq(cram_fd *fd, bam_seq_t **bam); - - -/* ---------------------------------------------------------------------- - * Internal functions - */ - -/*! INTERNAL: - * Decodes a CRAM block compression header. - * - * @return - * Returns header ptr on success; - * NULL on failure - */ -cram_block_compression_hdr *cram_decode_compression_header(cram_fd *fd, - cram_block *b); - -/*! INTERNAL: - * Decodes a CRAM (un)mapped slice header block. - * - * @return - * Returns slice header ptr on success; - * NULL on failure - */ -cram_block_slice_hdr *cram_decode_slice_header(cram_fd *fd, cram_block *b); - - -/*! INTERNAL: - * Decode an entire slice from container blocks. Fills out s->crecs[] array. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_decode_slice(cram_fd *fd, cram_container *c, cram_slice *s, - sam_hdr_t *hdr); - - -/* - * Drains and frees the decode read-queue for a multi-threaded reader. - */ -void cram_drain_rqueue(cram_fd *fd); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.18/cram/cram_encode.c b/src/htslib-1.18/cram/cram_encode.c deleted file mode 100644 index 9797fa7..0000000 --- a/src/htslib-1.18/cram/cram_encode.c +++ /dev/null @@ -1,4005 +0,0 @@ -/* -Copyright (c) 2012-2020, 2022-2023 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cram.h" -#include "os.h" -#include "../sam_internal.h" // for nibble2base -#include "../htslib/hts.h" -#include "../htslib/hts_endian.h" -#include "../textutils_internal.h" - -KHASH_MAP_INIT_STR(m_s2u64, uint64_t) - -#define Z_CRAM_STRAT Z_FILTERED -//#define Z_CRAM_STRAT Z_RLE -//#define Z_CRAM_STRAT Z_HUFFMAN_ONLY -//#define Z_CRAM_STRAT Z_DEFAULT_STRATEGY - -static int process_one_read(cram_fd *fd, cram_container *c, - cram_slice *s, cram_record *cr, - bam_seq_t *b, int rnum, kstring_t *MD, - int embed_ref, int no_ref); - -/* - * Returns index of val into key. - * Basically strchr(key, val)-key; - */ -static int sub_idx(char *key, char val) { - int i; - - for (i = 0; i < 4 && *key++ != val; i++); - return i; -} - -/* - * Encodes a compression header block into a generic cram_block structure. - * - * Returns cram_block ptr on success - * NULL on failure - */ -cram_block *cram_encode_compression_header(cram_fd *fd, cram_container *c, - cram_block_compression_hdr *h, - int embed_ref) { - cram_block *cb = cram_new_block(COMPRESSION_HEADER, 0); - cram_block *map = cram_new_block(COMPRESSION_HEADER, 0); - int i, mc, r = 0; - - int no_ref = c->no_ref; - - if (!cb || !map) - return NULL; - - /* - * This is a concatenation of several blocks of data: - * header + landmarks, preservation map, read encoding map, and the tag - * encoding map. - * All 4 are variable sized and we need to know how large these are - * before creating the compression header itself as this starts with - * the total size (stored as a variable length string). - */ - - // Duplicated from container itself, and removed in 1.1 - if (CRAM_MAJOR_VERS(fd->version) == 1) { - r |= itf8_put_blk(cb, h->ref_seq_id); - r |= itf8_put_blk(cb, h->ref_seq_start); - r |= itf8_put_blk(cb, h->ref_seq_span); - r |= itf8_put_blk(cb, h->num_records); - r |= itf8_put_blk(cb, h->num_landmarks); - for (i = 0; i < h->num_landmarks; i++) { - r |= itf8_put_blk(cb, h->landmark[i]); - } - } - - if (h->preservation_map) { - kh_destroy(map, h->preservation_map); - h->preservation_map = NULL; - } - - /* Create in-memory preservation map */ - /* FIXME: should create this when we create the container */ - if (c->num_records > 0) { - khint_t k; - int r; - - if (!(h->preservation_map = kh_init(map))) - return NULL; - - k = kh_put(map, h->preservation_map, "RN", &r); - if (-1 == r) return NULL; - kh_val(h->preservation_map, k).i = !fd->lossy_read_names; - - if (CRAM_MAJOR_VERS(fd->version) == 1) { - k = kh_put(map, h->preservation_map, "PI", &r); - if (-1 == r) return NULL; - kh_val(h->preservation_map, k).i = 0; - - k = kh_put(map, h->preservation_map, "UI", &r); - if (-1 == r) return NULL; - kh_val(h->preservation_map, k).i = 1; - - k = kh_put(map, h->preservation_map, "MI", &r); - if (-1 == r) return NULL; - kh_val(h->preservation_map, k).i = 1; - - } else { - // Technically SM was in 1.0, but wasn't in Java impl. - k = kh_put(map, h->preservation_map, "SM", &r); - if (-1 == r) return NULL; - kh_val(h->preservation_map, k).i = 0; - - k = kh_put(map, h->preservation_map, "TD", &r); - if (-1 == r) return NULL; - kh_val(h->preservation_map, k).i = 0; - - k = kh_put(map, h->preservation_map, "AP", &r); - if (-1 == r) return NULL; - kh_val(h->preservation_map, k).i = h->AP_delta; - - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - k = kh_put(map, h->preservation_map, "QO", &r); - if (-1 == r) return NULL; - kh_val(h->preservation_map, k).i = h->qs_seq_orient; - } - - if (no_ref || embed_ref>0) { - // Reference Required == No - k = kh_put(map, h->preservation_map, "RR", &r); - if (-1 == r) return NULL; - kh_val(h->preservation_map, k).i = 0; - } - } - } - - /* Encode preservation map; could collapse this and above into one */ - mc = 0; - BLOCK_SIZE(map) = 0; - if (h->preservation_map) { - khint_t k; - - for (k = kh_begin(h->preservation_map); - k != kh_end(h->preservation_map); - k++) { - const char *key; - khash_t(map) *pmap = h->preservation_map; - - - if (!kh_exist(pmap, k)) - continue; - - key = kh_key(pmap, k); - BLOCK_APPEND(map, key, 2); - - switch(CRAM_KEY(key[0], key[1])) { - case CRAM_KEY('M','I'): - case CRAM_KEY('U','I'): - case CRAM_KEY('P','I'): - case CRAM_KEY('A','P'): - case CRAM_KEY('R','N'): - case CRAM_KEY('R','R'): - case CRAM_KEY('Q','O'): - BLOCK_APPEND_CHAR(map, kh_val(pmap, k).i); - break; - - case CRAM_KEY('S','M'): { - char smat[5], *mp = smat; - // Output format is for order ACGTN (minus ref base) - // to store the code value 0-3 for each symbol. - // - // Note this is different to storing the symbols in order - // that the codes occur from 0-3, which is what we used to - // do. (It didn't matter as we always had a fixed table in - // the order.) - *mp++ = - (sub_idx(h->substitution_matrix[0], 'C') << 6) | - (sub_idx(h->substitution_matrix[0], 'G') << 4) | - (sub_idx(h->substitution_matrix[0], 'T') << 2) | - (sub_idx(h->substitution_matrix[0], 'N') << 0); - *mp++ = - (sub_idx(h->substitution_matrix[1], 'A') << 6) | - (sub_idx(h->substitution_matrix[1], 'G') << 4) | - (sub_idx(h->substitution_matrix[1], 'T') << 2) | - (sub_idx(h->substitution_matrix[1], 'N') << 0); - *mp++ = - (sub_idx(h->substitution_matrix[2], 'A') << 6) | - (sub_idx(h->substitution_matrix[2], 'C') << 4) | - (sub_idx(h->substitution_matrix[2], 'T') << 2) | - (sub_idx(h->substitution_matrix[2], 'N') << 0); - *mp++ = - (sub_idx(h->substitution_matrix[3], 'A') << 6) | - (sub_idx(h->substitution_matrix[3], 'C') << 4) | - (sub_idx(h->substitution_matrix[3], 'G') << 2) | - (sub_idx(h->substitution_matrix[3], 'N') << 0); - *mp++ = - (sub_idx(h->substitution_matrix[4], 'A') << 6) | - (sub_idx(h->substitution_matrix[4], 'C') << 4) | - (sub_idx(h->substitution_matrix[4], 'G') << 2) | - (sub_idx(h->substitution_matrix[4], 'T') << 0); - BLOCK_APPEND(map, smat, 5); - break; - } - - case CRAM_KEY('T','D'): { - r |= (fd->vv.varint_put32_blk(map, BLOCK_SIZE(h->TD_blk)) <= 0); - BLOCK_APPEND(map, - BLOCK_DATA(h->TD_blk), - BLOCK_SIZE(h->TD_blk)); - break; - } - - default: - hts_log_warning("Unknown preservation key '%.2s'", key); - break; - } - - mc++; - } - } - r |= (fd->vv.varint_put32_blk(cb, BLOCK_SIZE(map) + fd->vv.varint_size(mc)) <= 0); - r |= (fd->vv.varint_put32_blk(cb, mc) <= 0); - BLOCK_APPEND(cb, BLOCK_DATA(map), BLOCK_SIZE(map)); - - /* rec encoding map */ - mc = 0; - BLOCK_SIZE(map) = 0; - if (h->codecs[DS_BF]) { - if (-1 == h->codecs[DS_BF]->store(h->codecs[DS_BF], map, "BF", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_CF]) { - if (-1 == h->codecs[DS_CF]->store(h->codecs[DS_CF], map, "CF", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_RL]) { - if (-1 == h->codecs[DS_RL]->store(h->codecs[DS_RL], map, "RL", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_AP]) { - if (-1 == h->codecs[DS_AP]->store(h->codecs[DS_AP], map, "AP", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_RG]) { - if (-1 == h->codecs[DS_RG]->store(h->codecs[DS_RG], map, "RG", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_MF]) { - if (-1 == h->codecs[DS_MF]->store(h->codecs[DS_MF], map, "MF", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_NS]) { - if (-1 == h->codecs[DS_NS]->store(h->codecs[DS_NS], map, "NS", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_NP]) { - if (-1 == h->codecs[DS_NP]->store(h->codecs[DS_NP], map, "NP", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_TS]) { - if (-1 == h->codecs[DS_TS]->store(h->codecs[DS_TS], map, "TS", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_NF]) { - if (-1 == h->codecs[DS_NF]->store(h->codecs[DS_NF], map, "NF", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_TC]) { - if (-1 == h->codecs[DS_TC]->store(h->codecs[DS_TC], map, "TC", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_TN]) { - if (-1 == h->codecs[DS_TN]->store(h->codecs[DS_TN], map, "TN", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_TL]) { - if (-1 == h->codecs[DS_TL]->store(h->codecs[DS_TL], map, "TL", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_FN]) { - if (-1 == h->codecs[DS_FN]->store(h->codecs[DS_FN], map, "FN", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_FC]) { - if (-1 == h->codecs[DS_FC]->store(h->codecs[DS_FC], map, "FC", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_FP]) { - if (-1 == h->codecs[DS_FP]->store(h->codecs[DS_FP], map, "FP", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_BS]) { - if (-1 == h->codecs[DS_BS]->store(h->codecs[DS_BS], map, "BS", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_IN]) { - if (-1 == h->codecs[DS_IN]->store(h->codecs[DS_IN], map, "IN", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_DL]) { - if (-1 == h->codecs[DS_DL]->store(h->codecs[DS_DL], map, "DL", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_BA]) { - if (-1 == h->codecs[DS_BA]->store(h->codecs[DS_BA], map, "BA", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_BB]) { - if (-1 == h->codecs[DS_BB]->store(h->codecs[DS_BB], map, "BB", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_MQ]) { - if (-1 == h->codecs[DS_MQ]->store(h->codecs[DS_MQ], map, "MQ", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_RN]) { - if (-1 == h->codecs[DS_RN]->store(h->codecs[DS_RN], map, "RN", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_QS]) { - if (-1 == h->codecs[DS_QS]->store(h->codecs[DS_QS], map, "QS", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_QQ]) { - if (-1 == h->codecs[DS_QQ]->store(h->codecs[DS_QQ], map, "QQ", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_RI]) { - if (-1 == h->codecs[DS_RI]->store(h->codecs[DS_RI], map, "RI", - fd->version)) - return NULL; - mc++; - } - if (CRAM_MAJOR_VERS(fd->version) != 1) { - if (h->codecs[DS_SC]) { - if (-1 == h->codecs[DS_SC]->store(h->codecs[DS_SC], map, "SC", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_RS]) { - if (-1 == h->codecs[DS_RS]->store(h->codecs[DS_RS], map, "RS", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_PD]) { - if (-1 == h->codecs[DS_PD]->store(h->codecs[DS_PD], map, "PD", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_HC]) { - if (-1 == h->codecs[DS_HC]->store(h->codecs[DS_HC], map, "HC", - fd->version)) - return NULL; - mc++; - } - } - if (h->codecs[DS_TM]) { - if (-1 == h->codecs[DS_TM]->store(h->codecs[DS_TM], map, "TM", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_TV]) { - if (-1 == h->codecs[DS_TV]->store(h->codecs[DS_TV], map, "TV", - fd->version)) - return NULL; - mc++; - } - r |= (fd->vv.varint_put32_blk(cb, BLOCK_SIZE(map) + fd->vv.varint_size(mc)) <= 0); - r |= (fd->vv.varint_put32_blk(cb, mc) <= 0); - BLOCK_APPEND(cb, BLOCK_DATA(map), BLOCK_SIZE(map)); - - /* tag encoding map */ - mc = 0; - BLOCK_SIZE(map) = 0; - if (c->tags_used) { - khint_t k; - - for (k = kh_begin(c->tags_used); k != kh_end(c->tags_used); k++) { - int key; - if (!kh_exist(c->tags_used, k)) - continue; - - key = kh_key(c->tags_used, k); - cram_codec *cd = kh_val(c->tags_used, k)->codec; - - r |= (fd->vv.varint_put32_blk(map, key) <= 0); - if (-1 == cd->store(cd, map, NULL, fd->version)) - return NULL; - - mc++; - } - } - - r |= (fd->vv.varint_put32_blk(cb, BLOCK_SIZE(map) + fd->vv.varint_size(mc)) <= 0); - r |= (fd->vv.varint_put32_blk(cb, mc) <= 0); - BLOCK_APPEND(cb, BLOCK_DATA(map), BLOCK_SIZE(map)); - - hts_log_info("Wrote compression block header in %d bytes", (int)BLOCK_SIZE(cb)); - - BLOCK_UPLEN(cb); - - cram_free_block(map); - - if (r >= 0) - return cb; - - block_err: - return NULL; -} - - -/* - * Encodes a slice compression header. - * - * Returns cram_block on success - * NULL on failure - */ -cram_block *cram_encode_slice_header(cram_fd *fd, cram_slice *s) { - char *buf; - char *cp; - cram_block *b = cram_new_block(MAPPED_SLICE, 0); - int j; - - if (!b) - return NULL; - - cp = buf = malloc(22+16+5*(8+s->hdr->num_blocks)); - if (NULL == buf) { - cram_free_block(b); - return NULL; - } - - cp += fd->vv.varint_put32s(cp, NULL, s->hdr->ref_seq_id); - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - cp += fd->vv.varint_put64(cp, NULL, s->hdr->ref_seq_start); - cp += fd->vv.varint_put64(cp, NULL, s->hdr->ref_seq_span); - } else { - cp += fd->vv.varint_put32(cp, NULL, s->hdr->ref_seq_start); - cp += fd->vv.varint_put32(cp, NULL, s->hdr->ref_seq_span); - } - cp += fd->vv.varint_put32(cp, NULL, s->hdr->num_records); - if (CRAM_MAJOR_VERS(fd->version) == 2) - cp += fd->vv.varint_put32(cp, NULL, s->hdr->record_counter); - else if (CRAM_MAJOR_VERS(fd->version) >= 3) - cp += fd->vv.varint_put64(cp, NULL, s->hdr->record_counter); - cp += fd->vv.varint_put32(cp, NULL, s->hdr->num_blocks); - cp += fd->vv.varint_put32(cp, NULL, s->hdr->num_content_ids); - for (j = 0; j < s->hdr->num_content_ids; j++) { - cp += fd->vv.varint_put32(cp, NULL, s->hdr->block_content_ids[j]); - } - if (s->hdr->content_type == MAPPED_SLICE) - cp += fd->vv.varint_put32(cp, NULL, s->hdr->ref_base_id); - - if (CRAM_MAJOR_VERS(fd->version) != 1) { - memcpy(cp, s->hdr->md5, 16); cp += 16; - } - - assert(cp-buf <= 22+16+5*(8+s->hdr->num_blocks)); - - b->data = (unsigned char *)buf; - b->comp_size = b->uncomp_size = cp-buf; - - return b; -} - - -/* - * Encodes a single read. - * - * Returns 0 on success - * -1 on failure - */ -static int cram_encode_slice_read(cram_fd *fd, - cram_container *c, - cram_block_compression_hdr *h, - cram_slice *s, - cram_record *cr, - int64_t *last_pos) { - int r = 0; - int32_t i32; - int64_t i64; - unsigned char uc; - - //fprintf(stderr, "Encode seq %d, %d/%d FN=%d, %s\n", rec, core->byte, core->bit, cr->nfeature, s->name_ds->str + cr->name); - - //printf("BF=0x%x\n", cr->flags); - // bf = cram_flag_swap[cr->flags]; - i32 = fd->cram_flag_swap[cr->flags & 0xfff]; - r |= h->codecs[DS_BF]->encode(s, h->codecs[DS_BF], (char *)&i32, 1); - - i32 = cr->cram_flags & CRAM_FLAG_MASK; - r |= h->codecs[DS_CF]->encode(s, h->codecs[DS_CF], (char *)&i32, 1); - - if (CRAM_MAJOR_VERS(fd->version) != 1 && s->hdr->ref_seq_id == -2) - r |= h->codecs[DS_RI]->encode(s, h->codecs[DS_RI], (char *)&cr->ref_id, 1); - - r |= h->codecs[DS_RL]->encode(s, h->codecs[DS_RL], (char *)&cr->len, 1); - - if (c->pos_sorted) { - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - i64 = cr->apos - *last_pos; - r |= h->codecs[DS_AP]->encode(s, h->codecs[DS_AP], (char *)&i64, 1); - } else { - i32 = cr->apos - *last_pos; - r |= h->codecs[DS_AP]->encode(s, h->codecs[DS_AP], (char *)&i32, 1); - } - *last_pos = cr->apos; - } else { - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - i64 = cr->apos; - r |= h->codecs[DS_AP]->encode(s, h->codecs[DS_AP], (char *)&i64, 1); - } else { - i32 = cr->apos; - r |= h->codecs[DS_AP]->encode(s, h->codecs[DS_AP], (char *)&i32, 1); - } - } - - r |= h->codecs[DS_RG]->encode(s, h->codecs[DS_RG], (char *)&cr->rg, 1); - - if (cr->cram_flags & CRAM_FLAG_DETACHED) { - i32 = cr->mate_flags; - r |= h->codecs[DS_MF]->encode(s, h->codecs[DS_MF], (char *)&i32, 1); - - r |= h->codecs[DS_NS]->encode(s, h->codecs[DS_NS], - (char *)&cr->mate_ref_id, 1); - - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - r |= h->codecs[DS_NP]->encode(s, h->codecs[DS_NP], - (char *)&cr->mate_pos, 1); - r |= h->codecs[DS_TS]->encode(s, h->codecs[DS_TS], - (char *)&cr->tlen, 1); - } else { - i32 = cr->mate_pos; - r |= h->codecs[DS_NP]->encode(s, h->codecs[DS_NP], - (char *)&i32, 1); - i32 = cr->tlen; - r |= h->codecs[DS_TS]->encode(s, h->codecs[DS_TS], - (char *)&i32, 1); - } - } else { - if (cr->cram_flags & CRAM_FLAG_MATE_DOWNSTREAM) { - r |= h->codecs[DS_NF]->encode(s, h->codecs[DS_NF], - (char *)&cr->mate_line, 1); - } - if (cr->cram_flags & CRAM_FLAG_EXPLICIT_TLEN) { - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - r |= h->codecs[DS_TS]->encode(s, h->codecs[DS_TS], - (char *)&cr->tlen, 1); - } - } - } - - /* Aux tags */ - if (CRAM_MAJOR_VERS(fd->version) == 1) { - int j; - uc = cr->ntags; - r |= h->codecs[DS_TC]->encode(s, h->codecs[DS_TC], (char *)&uc, 1); - - for (j = 0; j < cr->ntags; j++) { - uint32_t i32 = s->TN[cr->TN_idx + j]; // id - r |= h->codecs[DS_TN]->encode(s, h->codecs[DS_TN], (char *)&i32, 1); - } - } else { - r |= h->codecs[DS_TL]->encode(s, h->codecs[DS_TL], (char *)&cr->TL, 1); - } - - // qual - // QS codec : Already stored in block[2]. - - // features (diffs) - if (!(cr->flags & BAM_FUNMAP)) { - int prev_pos = 0, j; - - r |= h->codecs[DS_FN]->encode(s, h->codecs[DS_FN], - (char *)&cr->nfeature, 1); - for (j = 0; j < cr->nfeature; j++) { - cram_feature *f = &s->features[cr->feature + j]; - - uc = f->X.code; - r |= h->codecs[DS_FC]->encode(s, h->codecs[DS_FC], (char *)&uc, 1); - i32 = f->X.pos - prev_pos; - r |= h->codecs[DS_FP]->encode(s, h->codecs[DS_FP], (char *)&i32, 1); - prev_pos = f->X.pos; - - switch(f->X.code) { - //char *seq; - - case 'X': - //fprintf(stderr, " FC=%c FP=%d base=%d\n", f->X.code, i32, f->X.base); - - uc = f->X.base; - r |= h->codecs[DS_BS]->encode(s, h->codecs[DS_BS], - (char *)&uc, 1); - break; - case 'S': - // Already done - //r |= h->codecs[DS_SC]->encode(s, h->codecs[DS_SC], - // BLOCK_DATA(s->soft_blk) + f->S.seq_idx, - // f->S.len); - - //if (CRAM_MAJOR_VERS(fd->version) >= 3) { - // r |= h->codecs[DS_BB]->encode(s, h->codecs[DS_BB], - // BLOCK_DATA(s->seqs_blk) + f->S.seq_idx, - // f->S.len); - //} - break; - case 'I': - //seq = DSTRING_STR(s->seqs_ds) + f->S.seq_idx; - //r |= h->codecs[DS_IN]->encode(s, h->codecs[DS_IN], - // seq, f->S.len); - //if (CRAM_MAJOR_VERS(fd->version) >= 3) { - // r |= h->codecs[DS_BB]->encode(s, h->codecs[DS_BB], - // BLOCK_DATA(s->seqs_blk) + f->I.seq_idx, - // f->I.len); - //} - break; - case 'i': - uc = f->i.base; - r |= h->codecs[DS_BA]->encode(s, h->codecs[DS_BA], - (char *)&uc, 1); - //seq = DSTRING_STR(s->seqs_ds) + f->S.seq_idx; - //r |= h->codecs[DS_IN]->encode(s, h->codecs[DS_IN], - // seq, 1); - break; - case 'D': - i32 = f->D.len; - r |= h->codecs[DS_DL]->encode(s, h->codecs[DS_DL], - (char *)&i32, 1); - break; - - case 'B': - // // Used when we try to store a non ACGTN base or an N - // // that aligns against a non ACGTN reference - - uc = f->B.base; - r |= h->codecs[DS_BA]->encode(s, h->codecs[DS_BA], - (char *)&uc, 1); - - // Already added - // uc = f->B.qual; - // r |= h->codecs[DS_QS]->encode(s, h->codecs[DS_QS], - // (char *)&uc, 1); - break; - - case 'b': - // string of bases - r |= h->codecs[DS_BB]->encode(s, h->codecs[DS_BB], - (char *)BLOCK_DATA(s->seqs_blk) - + f->b.seq_idx, - f->b.len); - break; - - case 'Q': - // Already added - // uc = f->B.qual; - // r |= h->codecs[DS_QS]->encode(s, h->codecs[DS_QS], - // (char *)&uc, 1); - break; - - case 'N': - i32 = f->N.len; - r |= h->codecs[DS_RS]->encode(s, h->codecs[DS_RS], - (char *)&i32, 1); - break; - - case 'P': - i32 = f->P.len; - r |= h->codecs[DS_PD]->encode(s, h->codecs[DS_PD], - (char *)&i32, 1); - break; - - case 'H': - i32 = f->H.len; - r |= h->codecs[DS_HC]->encode(s, h->codecs[DS_HC], - (char *)&i32, 1); - break; - - - default: - hts_log_error("Unhandled feature code %c", f->X.code); - return -1; - } - } - - r |= h->codecs[DS_MQ]->encode(s, h->codecs[DS_MQ], - (char *)&cr->mqual, 1); - } else { - char *seq = (char *)BLOCK_DATA(s->seqs_blk) + cr->seq; - if (cr->len) - r |= h->codecs[DS_BA]->encode(s, h->codecs[DS_BA], seq, cr->len); - } - - return r ? -1 : 0; -} - - -/* - * Applies various compression methods to specific blocks, depending on - * known observations of how data series compress. - * - * Returns 0 on success - * -1 on failure - */ -static int cram_compress_slice(cram_fd *fd, cram_container *c, cram_slice *s) { - int level = fd->level, i; - int method = 1<version >= (3<<8)+1); - - /* Compress the CORE Block too, with minimal zlib level */ - if (level > 5 && s->block[0]->uncomp_size > 500) - cram_compress_block2(fd, s, s->block[0], NULL, 1<use_bz2) - method |= 1<use_rans) { - method_ranspr = (1< 1) - method_ranspr |= - (1< 5) - method_ranspr |= (1<use_rans) { - methodF |= v31_or_above ? method_ranspr : method_rans; - method |= v31_or_above ? method_ranspr : method_rans; - } - - int method_arith = 0; - if (fd->use_arith) { - method_arith = (1< 1) - method_arith |= - (1<use_arith && v31_or_above) { - methodF |= method_arith; - method |= method_arith; - } - - if (fd->use_lzma) - method |= (1<= 5) { - method |= 1<use_fqz) { - qmethod |= 1<level > 4) { - qmethod |= 1<level > 6) { - qmethod |= (1<metrics_lock); - for (i = 0; i < DS_END; i++) - if (c->stats[i] && c->stats[i]->nvals > 16) - fd->m[i]->unpackable = 1; - pthread_mutex_unlock(&fd->metrics_lock); - - /* Specific compression methods for certain block types */ - if (cram_compress_block2(fd, s, s->block[DS_IN], fd->m[DS_IN], //IN (seq) - method, level)) - return -1; - - if (fd->level == 0) { - /* Do nothing */ - } else if (fd->level == 1) { - if (cram_compress_block2(fd, s, s->block[DS_QS], fd->m[DS_QS], - qmethodF, 1)) - return -1; - for (i = DS_aux; i <= DS_aux_oz; i++) { - if (s->block[i]) - if (cram_compress_block2(fd, s, s->block[i], fd->m[i], - method, 1)) - return -1; - } - } else if (fd->level < 3) { - if (cram_compress_block2(fd, s, s->block[DS_QS], fd->m[DS_QS], - qmethod, 1)) - return -1; - if (cram_compress_block2(fd, s, s->block[DS_BA], fd->m[DS_BA], - method, 1)) - return -1; - if (s->block[DS_BB]) - if (cram_compress_block2(fd, s, s->block[DS_BB], fd->m[DS_BB], - method, 1)) - return -1; - for (i = DS_aux; i <= DS_aux_oz; i++) { - if (s->block[i]) - if (cram_compress_block2(fd, s, s->block[i], fd->m[i], - method, level)) - return -1; - } - } else { - if (cram_compress_block2(fd, s, s->block[DS_QS], fd->m[DS_QS], - qmethod, level)) - return -1; - if (cram_compress_block2(fd, s, s->block[DS_BA], fd->m[DS_BA], - method, level)) - return -1; - if (s->block[DS_BB]) - if (cram_compress_block2(fd, s, s->block[DS_BB], fd->m[DS_BB], - method, level)) - return -1; - for (i = DS_aux; i <= DS_aux_oz; i++) { - if (s->block[i]) - if (cram_compress_block2(fd, s, s->block[i], fd->m[i], - method, level)) - return -1; - } - } - - // NAME: best is generally xz, bzip2, zlib then rans1 - int method_rn = method & ~(method_rans | method_ranspr | 1<version >= (3<<8)+1 && fd->use_tok) - method_rn |= fd->use_arith ? (1<block[DS_RN], fd->m[DS_RN], - method_rn, level)) - return -1; - - // NS shows strong local correlation as rearrangements are localised - if (s->block[DS_NS] && s->block[DS_NS] != s->block[0]) - if (cram_compress_block2(fd, s, s->block[DS_NS], fd->m[DS_NS], - method, level)) - return -1; - - - /* - * Compress any auxiliary tags with their own per-tag metrics - */ - { - int i; - for (i = 0; i < s->naux_block; i++) { - if (!s->aux_block[i] || s->aux_block[i] == s->block[0]) - continue; - - if (s->aux_block[i]->method != RAW) - continue; - - if (cram_compress_block2(fd, s, s->aux_block[i], s->aux_block[i]->m, - method, level)) - return -1; - } - } - - /* - * Minimal compression of any block still uncompressed, bar CORE - */ - { - int i; - for (i = 1; i < s->hdr->num_blocks && i < DS_END; i++) { - if (!s->block[i] || s->block[i] == s->block[0]) - continue; - - if (s->block[i]->method != RAW) - continue; - - if (cram_compress_block2(fd, s, s->block[i], fd->m[i], - methodF, level)) - return -1; - } - } - - return 0; -} - -/* - * Allocates a block associated with the cram codec associated with - * data series ds_id or the internal codec_id (depending on codec - * type). - * - * The ds_ids are what end up written to disk as an external block. - * The c_ids are internal and used when daisy-chaining transforms - * such as MAP and RLE. These blocks are also allocated, but - * are ephemeral in nature. (The codecs themselves cannot allocate - * these as the same codec pointer may be operating on multiple slices - * if we're using a multi-slice container.) - * - * Returns 0 on success - * -1 on failure - */ -static int cram_allocate_block(cram_codec *codec, cram_slice *s, int ds_id) { - if (!codec) - return 0; - - switch(codec->codec) { - // Codecs which are hard-coded to use the CORE block - case E_GOLOMB: - case E_HUFFMAN: - case E_BETA: - case E_SUBEXP: - case E_GOLOMB_RICE: - case E_GAMMA: - codec->out = s->block[0]; - break; - - // Codecs which don't use external blocks - case E_CONST_BYTE: - case E_CONST_INT: - codec->out = NULL; - break; - - // Codecs that emit directly to external blocks - case E_EXTERNAL: - case E_VARINT_UNSIGNED: - case E_VARINT_SIGNED: - if (!(s->block[ds_id] = cram_new_block(EXTERNAL, ds_id))) - return -1; - codec->u.external.content_id = ds_id; - codec->out = s->block[ds_id]; - break; - - case E_BYTE_ARRAY_STOP: // Why no sub-codec? - if (!(s->block[ds_id] = cram_new_block(EXTERNAL, ds_id))) - return -1; - codec->u.byte_array_stop.content_id = ds_id; - codec->out = s->block[ds_id]; - break; - - - // Codecs that contain sub-codecs which may in turn emit to external blocks - case E_BYTE_ARRAY_LEN: { - cram_codec *bal = codec->u.e_byte_array_len.len_codec; - if (cram_allocate_block(bal, s, bal->u.external.content_id)) - return -1; - bal = codec->u.e_byte_array_len.val_codec; - if (cram_allocate_block(bal, s, bal->u.external.content_id)) - return -1; - - break; - } - - case E_XRLE: - if (cram_allocate_block(codec->u.e_xrle.len_codec, s, ds_id)) - //ds_id == DS_QS ? DS_QS_len : ds_id)) - return -1; - if (cram_allocate_block(codec->u.e_xrle.lit_codec, s, ds_id)) - return -1; - - break; - - case E_XPACK: - if (cram_allocate_block(codec->u.e_xpack.sub_codec, s, ds_id)) - return -1; - codec->out = cram_new_block(0, 0); // ephemeral - if (!codec->out) - return -1; - - break; - - case E_XDELTA: - if (cram_allocate_block(codec->u.e_xdelta.sub_codec, s, ds_id)) - return -1; - codec->out = cram_new_block(0, 0); // ephemeral - if (!codec->out) - return -1; - - break; - - default: - break; - } - - return 0; -} - -/* - * Encodes a single slice from a container - * - * Returns 0 on success - * -1 on failure - */ -static int cram_encode_slice(cram_fd *fd, cram_container *c, - cram_block_compression_hdr *h, cram_slice *s, - int embed_ref) { - int rec, r = 0; - int64_t last_pos; - enum cram_DS_ID id; - - /* - * Slice external blocks: - * ID 0 => base calls (insertions, soft-clip) - * ID 1 => qualities - * ID 2 => names - * ID 3 => TS (insert size), NP (next frag) - * ID 4 => tag values - * ID 6 => tag IDs (TN), if CRAM_V1.0 - * ID 7 => TD tag dictionary, if !CRAM_V1.0 - */ - - /* Create cram slice header */ - s->hdr->ref_base_id = embed_ref>0 && s->hdr->ref_seq_span > 0 - ? DS_ref - : (CRAM_MAJOR_VERS(fd->version) >= 4 ? 0 : -1); - s->hdr->record_counter = c->num_records + c->record_counter; - c->num_records += s->hdr->num_records; - - int ntags = c->tags_used ? c->tags_used->n_occupied : 0; - s->block = calloc(DS_END + ntags*2, sizeof(s->block[0])); - s->hdr->block_content_ids = malloc(DS_END * sizeof(int32_t)); - if (!s->block || !s->hdr->block_content_ids) - return -1; - - // Create first fixed blocks, always external. - // CORE - if (!(s->block[0] = cram_new_block(CORE, 0))) - return -1; - - // TN block for CRAM v1 - if (CRAM_MAJOR_VERS(fd->version) == 1) { - if (h->codecs[DS_TN]->codec == E_EXTERNAL) { - if (!(s->block[DS_TN] = cram_new_block(EXTERNAL,DS_TN))) return -1; - h->codecs[DS_TN]->u.external.content_id = DS_TN; - } else { - s->block[DS_TN] = s->block[0]; - } - } - - // Embedded reference - if (embed_ref>0) { - if (!(s->block[DS_ref] = cram_new_block(EXTERNAL, DS_ref))) - return -1; - s->ref_id = DS_ref; // needed? - BLOCK_APPEND(s->block[DS_ref], - c->ref + s->hdr->ref_seq_start - c->ref_start, - s->hdr->ref_seq_span); - } - - /* - * All the data-series blocks if appropriate. - */ - for (id = DS_QS; id < DS_TN; id++) { - if (cram_allocate_block(h->codecs[id], s, id) < 0) - return -1; - } - - /* - * Add in the external tag blocks too. - */ - if (c->tags_used) { - int n; - s->hdr->num_blocks = DS_END; - for (n = 0; n < s->naux_block; n++) - s->block[s->hdr->num_blocks++] = s->aux_block[n]; - } - - /* Encode reads */ - last_pos = s->hdr->ref_seq_start; - for (rec = 0; rec < s->hdr->num_records; rec++) { - cram_record *cr = &s->crecs[rec]; - if (cram_encode_slice_read(fd, c, h, s, cr, &last_pos) == -1) - return -1; - } - - s->block[0]->uncomp_size = s->block[0]->byte + (s->block[0]->bit < 7); - s->block[0]->comp_size = s->block[0]->uncomp_size; - - // Make sure the fixed blocks point to the correct sources - if (s->block[DS_IN]) cram_free_block(s->block[DS_IN]); - s->block[DS_IN] = s->base_blk; s->base_blk = NULL; - if (s->block[DS_QS]) cram_free_block(s->block[DS_QS]); - s->block[DS_QS] = s->qual_blk; s->qual_blk = NULL; - if (s->block[DS_RN]) cram_free_block(s->block[DS_RN]); - s->block[DS_RN] = s->name_blk; s->name_blk = NULL; - if (s->block[DS_SC]) cram_free_block(s->block[DS_SC]); - s->block[DS_SC] = s->soft_blk; s->soft_blk = NULL; - - // Finalise any data transforms. - for (id = DS_QS; id < DS_TN; id++) { - if (h->codecs[id] && h->codecs[id]->flush) - h->codecs[id]->flush(h->codecs[id]); - } - - // Ensure block sizes are up to date. - for (id = 1; id < s->hdr->num_blocks; id++) { - if (!s->block[id] || s->block[id] == s->block[0]) - continue; - - if (s->block[id]->uncomp_size == 0) - BLOCK_UPLEN(s->block[id]); - } - - // Compress it all - if (cram_compress_slice(fd, c, s) == -1) - return -1; - - // Collapse empty blocks and create hdr_block - { - int i, j; - - s->hdr->block_content_ids = realloc(s->hdr->block_content_ids, - s->hdr->num_blocks * sizeof(int32_t)); - if (!s->hdr->block_content_ids) - return -1; - - for (i = j = 1; i < s->hdr->num_blocks; i++) { - if (!s->block[i] || s->block[i] == s->block[0]) - continue; - if (s->block[i]->uncomp_size == 0) { - cram_free_block(s->block[i]); - s->block[i] = NULL; - continue; - } - s->block[j] = s->block[i]; - s->hdr->block_content_ids[j-1] = s->block[i]->content_id; - j++; - } - s->hdr->num_content_ids = j-1; - s->hdr->num_blocks = j; - - if (!(s->hdr_block = cram_encode_slice_header(fd, s))) - return -1; - } - - return r ? -1 : 0; - - block_err: - return -1; -} - -/* - * Returns the number of expected read names for this record. - */ -static int expected_template_count(bam_seq_t *b) { - int expected = bam_flag(b) & BAM_FPAIRED ? 2 : 1; - - uint8_t *TC = (uint8_t *)bam_aux_get(b, "TC"); - if (TC) { - int n = bam_aux2i(TC); - if (expected < n) - expected = n; - } - - if (!TC && bam_aux_get(b, "SA")) { - // We could count the semicolons, but we'd have to do this for - // read1, read2 and read(not-1-or-2) combining the results - // together. This is a cheap and safe alternative for now. - expected = INT_MAX; - } - - return expected; -} - -/* - * Lossily reject read names. - * - * The rule here is that if all reads for this template reside in the - * same slice then we can lose the name. Otherwise we keep them as we - * do not know when (or if) the other reads will turn up. - * - * Note there may be only 1 read (non-paired library) or more than 2 - * reads (paired library with supplementary reads), or other weird - * setups. We need to know how many are expected. Ways to guess: - * - * - Flags (0x1 - has > 1 read) - * - TC aux field (not mandatory) - * - SA tags (count semicolons, NB per fragment so sum - hard) - * - RNEXT/PNEXT uniqueness count. (not implemented, tricky) - * - * Returns 0 on success - * -1 on failure - */ -static int lossy_read_names(cram_fd *fd, cram_container *c, cram_slice *s, - int bam_start) { - int r1, r2, ret = -1; - - // Initialise cram_flags - for (r2 = 0; r2 < s->hdr->num_records; r2++) - s->crecs[r2].cram_flags = 0; - - if (!fd->lossy_read_names) - return 0; - - khash_t(m_s2u64) *names = kh_init(m_s2u64); - if (!names) - goto fail; - - // 1: Iterate through names to count frequency - for (r1 = bam_start, r2 = 0; r2 < s->hdr->num_records; r1++, r2++) { - //cram_record *cr = &s->crecs[r2]; - bam_seq_t *b = c->bams[r1]; - khint_t k; - int n; - uint64_t e; - union { - uint64_t i64; - struct { - int32_t e,c; // expected & observed counts. - } counts; - } u; - - e = expected_template_count(b); - u.counts.e = e; u.counts.c = 1; - - k = kh_put(m_s2u64, names, bam_name(b), &n); - if (n == -1) - goto fail; - - if (n == 0) { - // not a new name - u.i64 = kh_val(names, k); - if (u.counts.e != e) { - // different expectation or already hit the max - //fprintf(stderr, "Err computing no. %s recs\n", bam_name(b)); - kh_val(names, k) = 0; - } else { - u.counts.c++; - if (u.counts.e == u.counts.c) { - // Reached expected count. - kh_val(names, k) = -1; - } else { - kh_val(names, k) = u.i64; - } - } - } else { - // new name - kh_val(names, k) = u.i64; - } - } - - // 2: Remove names if all present (hd.i == -1) - for (r1 = bam_start, r2 = 0; r2 < s->hdr->num_records; r1++, r2++) { - cram_record *cr = &s->crecs[r2]; - bam_seq_t *b = c->bams[r1]; - khint_t k; - - k = kh_get(m_s2u64, names, bam_name(b)); - - if (k == kh_end(names)) - goto fail; - - if (kh_val(names, k) == -1) - cr->cram_flags = CRAM_FLAG_DISCARD_NAME; - } - - ret = 0; - fail: // ret==-1 - - if (names) - kh_destroy(m_s2u64, names); - - return ret; -} - -/* - * Adds the reading names. We do this here as a separate pass rather - * than per record in the process_one_read calls as that function can - * go back and change the CRAM_FLAG_DETACHED status of a previously - * processed read if it subsequently determines the TLEN field is - * incorrect. Given DETACHED reads always try to decode read names, - * we need to know their status before generating the read-name block. - * - * Output is an update s->name_blk, and cr->name / cr->name_len - * fields. - */ -static int add_read_names(cram_fd *fd, cram_container *c, cram_slice *s, - int bam_start) { - int r1, r2; - int keep_names = !fd->lossy_read_names; - - for (r1 = bam_start, r2 = 0; - r1 < c->curr_c_rec && r2 < s->hdr->num_records; - r1++, r2++) { - cram_record *cr = &s->crecs[r2]; - bam_seq_t *b = c->bams[r1]; - - cr->name = BLOCK_SIZE(s->name_blk); - if ((cr->cram_flags & CRAM_FLAG_DETACHED) || keep_names) { - if (CRAM_MAJOR_VERS(fd->version) >= 4 - && (cr->cram_flags & CRAM_FLAG_MATE_DOWNSTREAM) - && cr->mate_line) { - // Dedup read names in V4 - BLOCK_APPEND(s->name_blk, "\0", 1); - cr->name_len = 1; - } else { - BLOCK_APPEND(s->name_blk, bam_name(b), bam_name_len(b)); - cr->name_len = bam_name_len(b); - } - } else { - // Can only discard duplicate names if not detached - cr->name_len = 0; - } - - if (cram_stats_add(c->stats[DS_RN], cr->name_len) < 0) - goto block_err; - } - - return 0; - - block_err: - return -1; -} - -// CRAM version >= 3.1 -#define CRAM_ge31(v) ((v) >= 0x301) - -// Returns the next cigar op code: one of the BAM_C* codes, -// or -1 if no more are present. -static inline -int next_cigar_op(uint32_t *cigar, uint32_t ncigar, int *skip, int *spos, - uint32_t *cig_ind, uint32_t *cig_op, uint32_t *cig_len) { - for(;;) { - while (*cig_len == 0) { - if (*cig_ind < ncigar) { - *cig_op = cigar[*cig_ind] & BAM_CIGAR_MASK; - *cig_len = cigar[*cig_ind] >> BAM_CIGAR_SHIFT; - (*cig_ind)++; - } else { - return -1; - } - } - - if (skip[*cig_op]) { - *spos += (bam_cigar_type(*cig_op)&1) * *cig_len; - *cig_len = 0; - continue; - } - - (*cig_len)--; - break; - } - - return *cig_op; -} - -// Ensure ref and hist are large enough. -static inline int extend_ref(char **ref, uint32_t (**hist)[5], hts_pos_t pos, - hts_pos_t ref_start, hts_pos_t *ref_end) { - if (pos < ref_start) - return -1; - if (pos < *ref_end) - return 0; - - // realloc - hts_pos_t old_end = *ref_end ? *ref_end : ref_start; - hts_pos_t new_end = *ref_end = ref_start + 1000 + (pos-ref_start)*1.5; - - char *tmp = realloc(*ref, *ref_end-ref_start); - if (!tmp) - return -1; - *ref = tmp; - - uint32_t (*tmp5)[5] = realloc(**hist, - (*ref_end - ref_start)*sizeof(**hist)); - if (!tmp5) - return -1; - *hist = tmp5; - *ref_end = new_end; - - // initialise - old_end -= ref_start; - new_end -= ref_start; - memset(&(*ref)[old_end], 0, new_end-old_end); - memset(&(*hist)[old_end], 0, (new_end-old_end)*sizeof(**hist)); - - return 0; -} - -// Walk through MD + seq to generate ref -static int cram_add_to_ref_MD(bam1_t *b, char **ref, uint32_t (**hist)[5], - hts_pos_t ref_start, hts_pos_t *ref_end, - const uint8_t *MD) { - uint8_t *seq = bam_get_seq(b); - uint32_t *cigar = bam_get_cigar(b); - uint32_t ncigar = b->core.n_cigar; - uint32_t cig_op = 0, cig_len = 0, cig_ind = 0; - - int iseq = 0, next_op; - hts_pos_t iref = b->core.pos - ref_start; - - // Skip INS, REF_SKIP, *CLIP, PAD. and BACK. - static int cig_skip[16] = {0,1,0,1,1,1,1,0,0,1,1,1,1,1,1,1}; - while (iseq < b->core.l_qseq && *MD) { - if (isdigit(*MD)) { - // match - int overflow = 0; - int len = hts_str2uint((char *)MD, (char **)&MD, 31, &overflow); - if (overflow || - extend_ref(ref, hist, iref+ref_start + len, - ref_start, ref_end) < 0) - return -1; - while (iseq < b->core.l_qseq && len) { - // rewrite to have internal loops? - if ((next_op = next_cigar_op(cigar, ncigar, cig_skip, - &iseq, &cig_ind, &cig_op, - &cig_len)) < 0) - return -1; - - if (next_op != BAM_CMATCH && - next_op != BAM_CEQUAL) { - hts_log_info("MD:Z and CIGAR are incompatible for " - "record %s", bam_get_qname(b)); - return -1; - } - - // Short-cut loop over same cigar op for efficiency - cig_len++; - do { - cig_len--; - (*ref)[iref++] = seq_nt16_str[bam_seqi(seq, iseq)]; - iseq++; - len--; - } while (cig_len && iseq < b->core.l_qseq && len); - } - if (len > 0) - return -1; // MD is longer than seq - } else if (*MD == '^') { - // deletion - MD++; - while (isalpha(*MD)) { - if (extend_ref(ref, hist, iref+ref_start, ref_start, - ref_end) < 0) - return -1; - if ((next_op = next_cigar_op(cigar, ncigar, cig_skip, - &iseq, &cig_ind, &cig_op, - &cig_len)) < 0) - return -1; - - if (next_op != BAM_CDEL) { - hts_log_info("MD:Z and CIGAR are incompatible"); - return -1; - } - - (*ref)[iref++] = *MD++ & ~0x20; - } - } else { - // substitution - if (extend_ref(ref, hist, iref+ref_start, ref_start, ref_end) < 0) - return -1; - if ((next_op = next_cigar_op(cigar, ncigar, cig_skip, - &iseq, &cig_ind, &cig_op, - &cig_len)) < 0) - return -1; - - if (next_op != BAM_CMATCH && next_op != BAM_CDIFF) { - hts_log_info("MD:Z and CIGAR are incompatible"); - return -1; - } - - (*ref)[iref++] = *MD++ & ~0x20; - iseq++; - } - } - - return 1; -} - -// Append a sequence to a ref/consensus structure. -// We maintain both an absolute refefence (ACGTN where MD:Z is -// present) and a 5-way frequency array for when no MD:Z is known. -// We then subsequently convert the 5-way frequencies to a consensus -// ref in a second pass. -// -// Returns >=0 on success, -// -1 on failure (eg inconsistent data) -static int cram_add_to_ref(bam1_t *b, char **ref, uint32_t (**hist)[5], - hts_pos_t ref_start, hts_pos_t *ref_end) { - const uint8_t *MD = bam_aux_get(b, "MD"); - int ret = 0; - if (MD && *MD == 'Z') { - // We can use MD to directly compute the reference - int ret = cram_add_to_ref_MD(b, ref, hist, ref_start, ref_end, MD+1); - - if (ret > 0) - return ret; - } - - // Otherwise we just use SEQ+CIGAR and build a consensus which we later - // turn into a fake reference - uint32_t *cigar = bam_get_cigar(b); - uint32_t ncigar = b->core.n_cigar; - uint32_t i, j; - hts_pos_t iseq = 0, iref = b->core.pos - ref_start; - uint8_t *seq = bam_get_seq(b); - for (i = 0; i < ncigar; i++) { - switch (bam_cigar_op(cigar[i])) { - case BAM_CSOFT_CLIP: - case BAM_CINS: - iseq += bam_cigar_oplen(cigar[i]); - break; - - case BAM_CMATCH: - case BAM_CEQUAL: - case BAM_CDIFF: { - int len = bam_cigar_oplen(cigar[i]); - // Maps an nt16 (A=1 C=2 G=4 T=8 bits) to 0123 plus N=4 - static uint8_t L16[16] = {4,0,1,4, 2,4,4,4, 3,4,4,4, 4,4,4,4}; - - if (extend_ref(ref, hist, iref+ref_start + len, - ref_start, ref_end) < 0) - return -1; - if (iseq + len <= b->core.l_qseq) { - // Nullify failed MD:Z if appropriate - if (ret < 0) - memset(&(*ref)[iref], 0, len); - - for (j = 0; j < len; j++, iref++, iseq++) - (*hist)[iref][L16[bam_seqi(seq, iseq)]]++; - } else { - // Probably a 2ndary read with seq "*" - iseq += len; - iref += len; - } - break; - } - - case BAM_CDEL: - case BAM_CREF_SKIP: - iref += bam_cigar_oplen(cigar[i]); - } - } - - return 1; -} - -// Automatically generates the reference and stashed it in c->ref, also -// setting c->ref_start and c->ref_end. -// -// If we have MD:Z tags then we use them to directly infer the reference, -// along with SEQ + CIGAR. Otherwise we use SEQ/CIGAR only to build up -// a consensus and then assume the reference as the majority rule. -// -// In this latter scenario we need to be wary of auto-generating MD and NM -// during decode, but that's handled elsewhere via an additional aux tag. -// -// Returns 0 on success, -// -1 on failure -static int cram_generate_reference(cram_container *c, cram_slice *s, int r1) { - // TODO: if we can find an external reference then use it, even if the - // user told us to do embed_ref=2. - char *ref = NULL; - uint32_t (*hist)[5] = NULL; - hts_pos_t ref_start = c->bams[r1]->core.pos, ref_end = 0; - - // initial allocation - if (extend_ref(&ref, &hist, - c->bams[r1 + s->hdr->num_records-1]->core.pos + - c->bams[r1 + s->hdr->num_records-1]->core.l_qseq, - ref_start, &ref_end) < 0) - return -1; - - // Add each bam file to the reference/consensus arrays - int r2; - hts_pos_t last_pos = -1; - for (r2 = 0; r1 < c->curr_c_rec && r2 < s->hdr->num_records; r1++, r2++) { - if (c->bams[r1]->core.pos < last_pos) { - hts_log_error("Cannot build reference with unsorted data"); - goto err; - } - last_pos = c->bams[r1]->core.pos; - if (cram_add_to_ref(c->bams[r1], &ref, &hist, ref_start, &ref_end) < 0) - goto err; - } - - // Compute the consensus - hts_pos_t i; - for (i = 0; i < ref_end-ref_start; i++) { - if (!ref[i]) { - int max_v = 0, max_j = 4, j; - for (j = 0; j < 4; j++) - // don't call N (j==4) unless no coverage - if (max_v < hist[i][j]) - max_v = hist[i][j], max_j = j; - ref[i] = "ACGTN"[max_j]; - } - } - free(hist); - - // Put the reference in place so it appears to be an external - // ref file. - c->ref = ref; - c->ref_start = ref_start+1; - c->ref_end = ref_end+1; - - return 0; - - err: - free(ref); - free(hist); - return -1; -} - -// Check if the SQ M5 tag matches the reference we've loaded. -static int validate_md5(cram_fd *fd, int ref_id) { - if (fd->ignore_md5 || ref_id < 0 || ref_id >= fd->refs->nref) - return 0; - - // Have we already checked this ref? - if (fd->refs->ref_id[ref_id]->validated_md5) - return 0; - - // Check if we have the MD5 known. - // We should, but maybe we're using embedded references? - sam_hrecs_t *hrecs = fd->header->hrecs; - sam_hrec_type_t *ty = sam_hrecs_find_type_id(hrecs, "SQ", "SN", - hrecs->ref[ref_id].name); - if (!ty) - return 0; - - sam_hrec_tag_t *m5tag = sam_hrecs_find_key(ty, "M5", NULL); - if (!m5tag) - return 0; - - // It's known, so compute md5 on the loaded reference sequence. - char *ref = fd->refs->ref_id[ref_id]->seq; - int64_t len = fd->refs->ref_id[ref_id]->length; - hts_md5_context *md5; - char unsigned buf[16]; - char buf2[33]; - - if (!(md5 = hts_md5_init())) - return -1; - hts_md5_update(md5, ref, len); - hts_md5_final(buf, md5); - hts_md5_destroy(md5); - hts_md5_hex(buf2, buf); - - // Compare it to header @SQ M5 tag - if (strcmp(m5tag->str+3, buf2)) { - hts_log_error("SQ header M5 tag discrepancy for reference '%s'", - hrecs->ref[ref_id].name); - hts_log_error("Please use the correct reference, or " - "consider using embed_ref=2"); - return -1; - } - fd->refs->ref_id[ref_id]->validated_md5 = 1; - - return 0; -} - -/* - * Encodes all slices in a container into blocks. - * Returns 0 on success - * -1 on failure - */ -int cram_encode_container(cram_fd *fd, cram_container *c) { - int i, j, slice_offset; - cram_block_compression_hdr *h = c->comp_hdr; - cram_block *c_hdr; - int multi_ref = 0; - int r1, r2, sn, nref, embed_ref, no_ref; - spare_bams *spares; - - if (CRAM_MAJOR_VERS(fd->version) == 1) - goto err; - -//#define goto_err {fprintf(stderr, "ERR at %s:%d\n", __FILE__, __LINE__);goto err;} -#define goto_err goto err - - restart: - /* Cache references up-front if we have unsorted access patterns */ - pthread_mutex_lock(&fd->ref_lock); - nref = fd->refs->nref; - pthread_mutex_unlock(&fd->ref_lock); - embed_ref = c->embed_ref; - no_ref = c->no_ref; - - /* To create M5 strings */ - /* Fetch reference sequence */ - if (!no_ref) { - if (!c->bams || !c->bams[0]) - goto_err; - bam_seq_t *b = c->bams[0]; - - if (embed_ref <= 1) { - char *ref = cram_get_ref(fd, bam_ref(b), 1, 0); - if (!ref && bam_ref(b) >= 0) { - if (!c->pos_sorted) { - // TODO: maybe also check fd->no_ref? - hts_log_warning("Failed to load reference #%d", - bam_ref(b)); - hts_log_warning("Switching to non-ref mode"); - - pthread_mutex_lock(&fd->ref_lock); - c->embed_ref = fd->embed_ref = 0; - c->no_ref = fd->no_ref = 1; - pthread_mutex_unlock(&fd->ref_lock); - goto restart; - } - - if (c->multi_seq || embed_ref == 0) { - hts_log_error("Failed to load reference #%d", bam_ref(b)); - return -1; - } - hts_log_warning("Failed to load reference #%d", bam_ref(b)); - hts_log_warning("Enabling embed_ref=2 mode to auto-generate" - " reference"); - if (embed_ref <= 0) - hts_log_warning("NOTE: the CRAM file will be bigger than" - " using an external reference"); - pthread_mutex_lock(&fd->ref_lock); - embed_ref = c->embed_ref = fd->embed_ref = 2; - pthread_mutex_unlock(&fd->ref_lock); - goto auto_ref; - } else if (ref) { - if (validate_md5(fd, c->ref_seq_id) < 0) - goto_err; - } - if ((c->ref_id = bam_ref(b)) >= 0) { - c->ref_seq_id = c->ref_id; - c->ref = fd->refs->ref_id[c->ref_seq_id]->seq; - c->ref_start = 1; - c->ref_end = fd->refs->ref_id[c->ref_seq_id]->length; - } - } else { - auto_ref: - // Auto-embed ref. - // This starts as 'N' and is amended on-the-fly as we go - // based on MD:Z tags. - if ((c->ref_id = bam_ref(b)) >= 0) { - c->ref_free = 1; - c->ref = NULL; - } - } - c->ref_seq_id = c->ref_id; - } else { - c->ref_id = bam_ref(c->bams[0]); - cram_ref_incr(fd->refs, c->ref_id); - c->ref_seq_id = c->ref_id; - } - - if (!no_ref && c->refs_used) { - for (i = 0; i < nref; i++) { - if (c->refs_used[i]) { - if (cram_get_ref(fd, i, 1, 0)) { - if (validate_md5(fd, i) < 0) - goto_err; - } else { - hts_log_warning("Failed to find reference, " - "switching to non-ref mode"); - no_ref = c->no_ref = 1; - } - } - } - } - - /* Turn bams into cram_records and gather basic stats */ - for (r1 = sn = 0; r1 < c->curr_c_rec; sn++) { - cram_slice *s = c->slices[sn]; - int64_t first_base = INT64_MAX, last_base = INT64_MIN; - - int r1_start = r1; - - assert(sn < c->curr_slice); - - // Discover which read names *may* be safely removed. - // Ie which ones have all their records in this slice. - if (lossy_read_names(fd, c, s, r1_start) != 0) - return -1; - - // Tracking of MD tags so we can spot when the auto-generated values - // will differ from the current stored ones. The kstring here is - // simply to avoid excessive malloc and free calls. All initialisation - // is done within process_one_read(). - kstring_t MD = {0}; - - // Embed consensus / MD-generated ref - if (embed_ref == 2) { - if (cram_generate_reference(c, s, r1) < 0) { - // Should this be a permanent thing via fd->no_ref? - // Doing so means we cannot easily switch back again should - // things fix themselves later on. This is likely not a - // concern though as failure to generate a reference implies - // unsorted data which is rarely recovered from. - - // Only if sn == 0. We're hosed if we're on the 2nd slice and - // the first worked, as no-ref is a container global param. - if (sn > 0) { - hts_log_error("Failed to build reference, " - "switching to non-ref mode"); - return -1; - } else { - hts_log_warning("Failed to build reference, " - "switching to non-ref mode"); - } - pthread_mutex_lock(&fd->ref_lock); - c->embed_ref = fd->embed_ref = 0; - c->no_ref = fd->no_ref = 1; - pthread_mutex_unlock(&fd->ref_lock); - goto restart; - } - } - - // Iterate through records creating the cram blocks for some - // fields and just gathering stats for others. - for (r2 = 0; r1 < c->curr_c_rec && r2 < s->hdr->num_records; r1++, r2++) { - cram_record *cr = &s->crecs[r2]; - bam_seq_t *b = c->bams[r1]; - - /* If multi-ref we need to cope with changing reference per seq */ - if (c->multi_seq && !no_ref) { - if (bam_ref(b) != c->ref_seq_id && bam_ref(b) >= 0) { - if (c->ref_seq_id >= 0) - cram_ref_decr(fd->refs, c->ref_seq_id); - - if (!cram_get_ref(fd, bam_ref(b), 1, 0)) { - hts_log_error("Failed to load reference #%d", bam_ref(b)); - free(MD.s); - return -1; - } - if (validate_md5(fd, bam_ref(b)) < 0) - return -1; - - c->ref_seq_id = bam_ref(b); // overwritten later by -2 - if (!fd->refs->ref_id[c->ref_seq_id]->seq) - return -1; - c->ref = fd->refs->ref_id[c->ref_seq_id]->seq; - c->ref_start = 1; - c->ref_end = fd->refs->ref_id[c->ref_seq_id]->length; - } - } - - if (process_one_read(fd, c, s, cr, b, r2, &MD, embed_ref, - no_ref) != 0) { - free(MD.s); - return -1; - } - - if (first_base > cr->apos) - first_base = cr->apos; - - if (last_base < cr->aend) - last_base = cr->aend; - } - - free(MD.s); - - // Process_one_read doesn't add read names as it can change - // its mind during the loop on the CRAM_FLAG_DETACHED setting - // of earlier records (if it detects the auto-generation of - // TLEN is incorrect). This affects which read-names can be - // lossily compressed, so we do these in another pass. - if (add_read_names(fd, c, s, r1_start) < 0) - return -1; - - if (c->multi_seq) { - s->hdr->ref_seq_id = -2; - s->hdr->ref_seq_start = 0; - s->hdr->ref_seq_span = 0; - } else if (c->ref_id == -1 && CRAM_ge31(fd->version)) { - // Spec states span=0, but it broke our range queries. - // See commit message for this and prior. - s->hdr->ref_seq_id = -1; - s->hdr->ref_seq_start = 0; - s->hdr->ref_seq_span = 0; - } else { - s->hdr->ref_seq_id = c->ref_id; - s->hdr->ref_seq_start = first_base; - s->hdr->ref_seq_span = MAX(0, last_base - first_base + 1); - } - s->hdr->num_records = r2; - - // Processed a slice, now stash the aux blocks so the next - // slice can start aggregating them from the start again. - if (c->tags_used->n_occupied) { - int ntags = c->tags_used->n_occupied; - s->aux_block = calloc(ntags*2, sizeof(*s->aux_block)); - if (!s->aux_block) - return -1; - - khint_t k; - - s->naux_block = 0; - for (k = kh_begin(c->tags_used); k != kh_end(c->tags_used); k++) { - if (!kh_exist(c->tags_used, k)) - continue; - - cram_tag_map *tm = kh_val(c->tags_used, k); - if (!tm) goto_err; - if (!tm->blk) continue; - s->aux_block[s->naux_block++] = tm->blk; - tm->blk = NULL; - if (!tm->blk2) continue; - s->aux_block[s->naux_block++] = tm->blk2; - tm->blk2 = NULL; - } - assert(s->naux_block <= 2*c->tags_used->n_occupied); - } - } - - if (c->multi_seq && !no_ref) { - if (c->ref_seq_id >= 0) - cram_ref_decr(fd->refs, c->ref_seq_id); - } - - /* Link our bams[] array onto the spare bam list for reuse */ - spares = malloc(sizeof(*spares)); - if (!spares) goto_err; - pthread_mutex_lock(&fd->bam_list_lock); - spares->bams = c->bams; - spares->next = fd->bl; - fd->bl = spares; - pthread_mutex_unlock(&fd->bam_list_lock); - c->bams = NULL; - - /* Detect if a multi-seq container */ - cram_stats_encoding(fd, c->stats[DS_RI]); - multi_ref = c->stats[DS_RI]->nvals > 1; - pthread_mutex_lock(&fd->metrics_lock); - fd->last_RI_count = c->stats[DS_RI]->nvals; - pthread_mutex_unlock(&fd->metrics_lock); - - - if (multi_ref) { - hts_log_info("Multi-ref container"); - c->ref_seq_id = -2; - c->ref_seq_start = 0; - c->ref_seq_span = 0; - } - - - /* Compute MD5s */ - no_ref = c->no_ref; - int is_v4 = CRAM_MAJOR_VERS(fd->version) >= 4 ? 1 : 0; - - for (i = 0; i < c->curr_slice; i++) { - cram_slice *s = c->slices[i]; - - if (CRAM_MAJOR_VERS(fd->version) != 1) { - if (s->hdr->ref_seq_id >= 0 && c->multi_seq == 0 && !no_ref) { - hts_md5_context *md5 = hts_md5_init(); - if (!md5) - return -1; - hts_md5_update(md5, - c->ref + s->hdr->ref_seq_start - c->ref_start, - s->hdr->ref_seq_span); - hts_md5_final(s->hdr->md5, md5); - hts_md5_destroy(md5); - } else { - memset(s->hdr->md5, 0, 16); - } - } - } - - c->num_records = 0; - c->num_blocks = 1; // cram_block_compression_hdr - c->length = 0; - - //fprintf(stderr, "=== BF ===\n"); - h->codecs[DS_BF] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_BF]), - c->stats[DS_BF], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_BF]->nvals && !h->codecs[DS_BF]) goto_err; - - //fprintf(stderr, "=== CF ===\n"); - h->codecs[DS_CF] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_CF]), - c->stats[DS_CF], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_CF]->nvals && !h->codecs[DS_CF]) goto_err; - - //fprintf(stderr, "=== RN ===\n"); - //h->codecs[DS_RN] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_RN]), - // c->stats[DS_RN], E_BYTE_ARRAY, NULL, - // fd->version); - - //fprintf(stderr, "=== AP ===\n"); - if (c->pos_sorted || CRAM_MAJOR_VERS(fd->version) >= 4) { - if (c->pos_sorted) - h->codecs[DS_AP] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_AP]), - c->stats[DS_AP], - is_v4 ? E_LONG : E_INT, - NULL, fd->version, &fd->vv); - else - // Unsorted data has no stats, but hard-code VARINT_SIGNED / EXT. - h->codecs[DS_AP] = cram_encoder_init(is_v4 ? E_VARINT_SIGNED - : E_EXTERNAL, - NULL, - is_v4 ? E_LONG : E_INT, - NULL, fd->version, &fd->vv); - } else { - // Removed BETA in v4.0. - // Should we consider dropping use of it for 3.0 too? - int p[2] = {0, c->max_apos}; - h->codecs[DS_AP] = cram_encoder_init(E_BETA, NULL, - is_v4 ? E_LONG : E_INT, - p, fd->version, &fd->vv); -// cram_xdelta_encoder e; -// e.word_size = is_v4 ? 8 : 4; -// e.sub_encoding = E_EXTERNAL; -// e.sub_codec_dat = (void *)DS_AP; -// -// h->codecs[DS_AP] = cram_encoder_init(E_XDELTA, NULL, -// is_v4 ? E_LONG : E_INT, -// &e, fd->version, &fd->vv); - } - if (!h->codecs[DS_AP]) goto_err; - - //fprintf(stderr, "=== RG ===\n"); - h->codecs[DS_RG] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_RG]), - c->stats[DS_RG], - E_INT, - NULL, - fd->version, &fd->vv); - if (c->stats[DS_RG]->nvals && !h->codecs[DS_RG]) goto_err; - - //fprintf(stderr, "=== MQ ===\n"); - h->codecs[DS_MQ] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_MQ]), - c->stats[DS_MQ], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_MQ]->nvals && !h->codecs[DS_MQ]) goto_err; - - //fprintf(stderr, "=== NS ===\n"); - h->codecs[DS_NS] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_NS]), - c->stats[DS_NS], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_NS]->nvals && !h->codecs[DS_NS]) goto_err; - - //fprintf(stderr, "=== MF ===\n"); - h->codecs[DS_MF] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_MF]), - c->stats[DS_MF], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_MF]->nvals && !h->codecs[DS_MF]) goto_err; - - //fprintf(stderr, "=== TS ===\n"); - h->codecs[DS_TS] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_TS]), - c->stats[DS_TS], - is_v4 ? E_LONG : E_INT, - NULL, fd->version, &fd->vv); - if (c->stats[DS_TS]->nvals && !h->codecs[DS_TS]) goto_err; - - //fprintf(stderr, "=== NP ===\n"); - h->codecs[DS_NP] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_NP]), - c->stats[DS_NP], - is_v4 ? E_LONG : E_INT, - NULL, fd->version, &fd->vv); - if (c->stats[DS_NP]->nvals && !h->codecs[DS_NP]) goto_err; - - //fprintf(stderr, "=== NF ===\n"); - h->codecs[DS_NF] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_NF]), - c->stats[DS_NF], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_NF]->nvals && !h->codecs[DS_NF]) goto_err; - - //fprintf(stderr, "=== RL ===\n"); - h->codecs[DS_RL] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_RL]), - c->stats[DS_RL], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_RL]->nvals && !h->codecs[DS_RL]) goto_err; - - //fprintf(stderr, "=== FN ===\n"); - h->codecs[DS_FN] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_FN]), - c->stats[DS_FN], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_FN]->nvals && !h->codecs[DS_FN]) goto_err; - - //fprintf(stderr, "=== FC ===\n"); - h->codecs[DS_FC] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_FC]), - c->stats[DS_FC], E_BYTE, NULL, - fd->version, &fd->vv); - if (c->stats[DS_FC]->nvals && !h->codecs[DS_FC]) goto_err; - - //fprintf(stderr, "=== FP ===\n"); - h->codecs[DS_FP] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_FP]), - c->stats[DS_FP], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_FP]->nvals && !h->codecs[DS_FP]) goto_err; - - //fprintf(stderr, "=== DL ===\n"); - h->codecs[DS_DL] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_DL]), - c->stats[DS_DL], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_DL]->nvals && !h->codecs[DS_DL]) goto_err; - - //fprintf(stderr, "=== BA ===\n"); - h->codecs[DS_BA] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_BA]), - c->stats[DS_BA], E_BYTE, NULL, - fd->version, &fd->vv); - if (c->stats[DS_BA]->nvals && !h->codecs[DS_BA]) goto_err; - - if (CRAM_MAJOR_VERS(fd->version) >= 3) { - cram_byte_array_len_encoder e; - - e.len_encoding = CRAM_MAJOR_VERS(fd->version) >= 4 - ? E_VARINT_UNSIGNED - : E_EXTERNAL; - e.len_dat = (void *)DS_BB_len; - //e.len_dat = (void *)DS_BB; - - e.val_encoding = E_EXTERNAL; - e.val_dat = (void *)DS_BB; - - h->codecs[DS_BB] = cram_encoder_init(E_BYTE_ARRAY_LEN, NULL, - E_BYTE_ARRAY, (void *)&e, - fd->version, &fd->vv); - if (!h->codecs[DS_BB]) goto_err; - } else { - h->codecs[DS_BB] = NULL; - } - - //fprintf(stderr, "=== BS ===\n"); - h->codecs[DS_BS] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_BS]), - c->stats[DS_BS], E_BYTE, NULL, - fd->version, &fd->vv); - if (c->stats[DS_BS]->nvals && !h->codecs[DS_BS]) goto_err; - - if (CRAM_MAJOR_VERS(fd->version) == 1) { - h->codecs[DS_TL] = NULL; - h->codecs[DS_RI] = NULL; - h->codecs[DS_RS] = NULL; - h->codecs[DS_PD] = NULL; - h->codecs[DS_HC] = NULL; - h->codecs[DS_SC] = NULL; - - //fprintf(stderr, "=== TC ===\n"); - h->codecs[DS_TC] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_TC]), - c->stats[DS_TC], E_BYTE, NULL, - fd->version, &fd->vv); - if (c->stats[DS_TC]->nvals && !h->codecs[DS_TC]) goto_err; - - //fprintf(stderr, "=== TN ===\n"); - h->codecs[DS_TN] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_TN]), - c->stats[DS_TN], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_TN]->nvals && !h->codecs[DS_TN]) goto_err; - } else { - h->codecs[DS_TC] = NULL; - h->codecs[DS_TN] = NULL; - - //fprintf(stderr, "=== TL ===\n"); - h->codecs[DS_TL] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_TL]), - c->stats[DS_TL], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_TL]->nvals && !h->codecs[DS_TL]) goto_err; - - - //fprintf(stderr, "=== RI ===\n"); - h->codecs[DS_RI] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_RI]), - c->stats[DS_RI], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_RI]->nvals && !h->codecs[DS_RI]) goto_err; - - //fprintf(stderr, "=== RS ===\n"); - h->codecs[DS_RS] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_RS]), - c->stats[DS_RS], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_RS]->nvals && !h->codecs[DS_RS]) goto_err; - - //fprintf(stderr, "=== PD ===\n"); - h->codecs[DS_PD] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_PD]), - c->stats[DS_PD], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_PD]->nvals && !h->codecs[DS_PD]) goto_err; - - //fprintf(stderr, "=== HC ===\n"); - h->codecs[DS_HC] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_HC]), - c->stats[DS_HC], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_HC]->nvals && !h->codecs[DS_HC]) goto_err; - - //fprintf(stderr, "=== SC ===\n"); - if (1) { - int i2[2] = {0, DS_SC}; - - h->codecs[DS_SC] = cram_encoder_init(E_BYTE_ARRAY_STOP, NULL, - E_BYTE_ARRAY, (void *)i2, - fd->version, &fd->vv); - } else { - // Appears to be no practical benefit to using this method, - // but it may work better if we start mixing SC, IN and BB - // elements into the same external block. - cram_byte_array_len_encoder e; - - e.len_encoding = CRAM_MAJOR_VERS(fd->version) >= 4 - ? E_VARINT_UNSIGNED - : E_EXTERNAL; - e.len_dat = (void *)DS_SC_len; - - e.val_encoding = E_EXTERNAL; - e.val_dat = (void *)DS_SC; - - h->codecs[DS_SC] = cram_encoder_init(E_BYTE_ARRAY_LEN, NULL, - E_BYTE_ARRAY, (void *)&e, - fd->version, &fd->vv); - } - if (!h->codecs[DS_SC]) goto_err; - } - - //fprintf(stderr, "=== IN ===\n"); - { - int i2[2] = {0, DS_IN}; - h->codecs[DS_IN] = cram_encoder_init(E_BYTE_ARRAY_STOP, NULL, - E_BYTE_ARRAY, (void *)i2, - fd->version, &fd->vv); - if (!h->codecs[DS_IN]) goto_err; - } - - h->codecs[DS_QS] = cram_encoder_init(E_EXTERNAL, NULL, E_BYTE, - (void *)DS_QS, - fd->version, &fd->vv); - if (!h->codecs[DS_QS]) goto_err; - { - int i2[2] = {0, DS_RN}; - h->codecs[DS_RN] = cram_encoder_init(E_BYTE_ARRAY_STOP, NULL, - E_BYTE_ARRAY, (void *)i2, - fd->version, &fd->vv); - if (!h->codecs[DS_RN]) goto_err; - } - - - /* Encode slices */ - for (i = 0; i < c->curr_slice; i++) { - hts_log_info("Encode slice %d", i); - - int local_embed_ref = - embed_ref>0 && c->slices[i]->hdr->ref_seq_id != -1 ? 1 : 0; - if (cram_encode_slice(fd, c, h, c->slices[i], local_embed_ref) != 0) - return -1; - } - - /* Create compression header */ - { - h->ref_seq_id = c->ref_seq_id; - h->ref_seq_start = c->ref_seq_start; - h->ref_seq_span = c->ref_seq_span; - h->num_records = c->num_records; - h->qs_seq_orient = c->qs_seq_orient; - // slight misnomer - sorted or treat as-if sorted (ap_delta force to 1) - h->AP_delta = c->pos_sorted; - memcpy(h->substitution_matrix, CRAM_SUBST_MATRIX, 20); - - if (!(c_hdr = cram_encode_compression_header(fd, c, h, embed_ref))) - return -1; - } - - /* Compute landmarks */ - /* Fill out slice landmarks */ - c->num_landmarks = c->curr_slice; - c->landmark = malloc(c->num_landmarks * sizeof(*c->landmark)); - if (!c->landmark) - return -1; - - /* - * Slice offset starts after the first block, so we need to simulate - * writing it to work out the correct offset - */ - { - slice_offset = c_hdr->method == RAW - ? c_hdr->uncomp_size - : c_hdr->comp_size; - slice_offset += 2 + 4*(CRAM_MAJOR_VERS(fd->version) >= 3) + - fd->vv.varint_size(c_hdr->content_id) + - fd->vv.varint_size(c_hdr->comp_size) + - fd->vv.varint_size(c_hdr->uncomp_size); - } - - c->ref_seq_id = c->slices[0]->hdr->ref_seq_id; - if (c->ref_seq_id == -1 && CRAM_ge31(fd->version)) { - // Spec states span=0, but it broke our range queries. - // See commit message for this and prior. - c->ref_seq_start = 0; - c->ref_seq_span = 0; - } else { - c->ref_seq_start = c->slices[0]->hdr->ref_seq_start; - c->ref_seq_span = c->slices[0]->hdr->ref_seq_span; - } - for (i = 0; i < c->curr_slice; i++) { - cram_slice *s = c->slices[i]; - - c->num_blocks += s->hdr->num_blocks + 1; // slice header - c->landmark[i] = slice_offset; - - if (s->hdr->ref_seq_start + s->hdr->ref_seq_span > - c->ref_seq_start + c->ref_seq_span) { - c->ref_seq_span = s->hdr->ref_seq_start + s->hdr->ref_seq_span - - c->ref_seq_start; - } - - slice_offset += s->hdr_block->method == RAW - ? s->hdr_block->uncomp_size - : s->hdr_block->comp_size; - - slice_offset += 2 + 4*(CRAM_MAJOR_VERS(fd->version) >= 3) + - fd->vv.varint_size(s->hdr_block->content_id) + - fd->vv.varint_size(s->hdr_block->comp_size) + - fd->vv.varint_size(s->hdr_block->uncomp_size); - - for (j = 0; j < s->hdr->num_blocks; j++) { - slice_offset += 2 + 4*(CRAM_MAJOR_VERS(fd->version) >= 3) + - fd->vv.varint_size(s->block[j]->content_id) + - fd->vv.varint_size(s->block[j]->comp_size) + - fd->vv.varint_size(s->block[j]->uncomp_size); - - slice_offset += s->block[j]->method == RAW - ? s->block[j]->uncomp_size - : s->block[j]->comp_size; - } - } - c->length += slice_offset; // just past the final slice - - c->comp_hdr_block = c_hdr; - - if (c->ref_seq_id >= 0) { - cram_ref_decr(fd->refs, c->ref_seq_id); - } - - /* Cache references up-front if we have unsorted access patterns */ - if (!no_ref && c->refs_used) { - for (i = 0; i < fd->refs->nref; i++) { - if (c->refs_used[i]) - cram_ref_decr(fd->refs, i); - } - } - - return 0; - - err: - return -1; -} - - -/* - * Adds a feature code to a read within a slice. For purposes of minimising - * memory allocations and fragmentation we have one array of features for all - * reads within the slice. We return the index into this array for this new - * feature. - * - * Returns feature index on success - * -1 on failure. - */ -static int cram_add_feature(cram_container *c, cram_slice *s, - cram_record *r, cram_feature *f) { - if (s->nfeatures >= s->afeatures) { - s->afeatures = s->afeatures ? s->afeatures*2 : 1024; - s->features = realloc(s->features, s->afeatures*sizeof(*s->features)); - if (!s->features) - return -1; - } - - if (!r->nfeature++) { - r->feature = s->nfeatures; - if (cram_stats_add(c->stats[DS_FP], f->X.pos) < 0) - return -1; - } else { - if (cram_stats_add(c->stats[DS_FP], - f->X.pos - s->features[r->feature + r->nfeature-2].X.pos) < 0) - return -1; - - } - if (cram_stats_add(c->stats[DS_FC], f->X.code) < 0) - return -1; - - s->features[s->nfeatures++] = *f; - - return 0; -} - -static int cram_add_substitution(cram_fd *fd, cram_container *c, - cram_slice *s, cram_record *r, - int pos, char base, char qual, char ref) { - cram_feature f; - - // seq=ACGTN vs ref=ACGT or seq=ACGT vs ref=ACGTN - if (fd->L2[(uc)base]<4 || (fd->L2[(uc)base]<5 && fd->L2[(uc)ref]<4)) { - f.X.pos = pos+1; - f.X.code = 'X'; - f.X.base = fd->cram_sub_matrix[ref&0x1f][base&0x1f]; - if (cram_stats_add(c->stats[DS_BS], f.X.base) < 0) - return -1; - } else { - f.B.pos = pos+1; - f.B.code = 'B'; - f.B.base = base; - f.B.qual = qual; - if (cram_stats_add(c->stats[DS_BA], f.B.base) < 0) return -1; - if (cram_stats_add(c->stats[DS_QS], f.B.qual) < 0) return -1; - BLOCK_APPEND_CHAR(s->qual_blk, qual); - } - return cram_add_feature(c, s, r, &f); - - block_err: - return -1; -} - -static int cram_add_bases(cram_fd *fd, cram_container *c, - cram_slice *s, cram_record *r, - int pos, int len, char *base) { - cram_feature f; - - f.b.pos = pos+1; - f.b.code = 'b'; - f.b.seq_idx = base - (char *)BLOCK_DATA(s->seqs_blk); - f.b.len = len; - - return cram_add_feature(c, s, r, &f); -} - -static int cram_add_base(cram_fd *fd, cram_container *c, - cram_slice *s, cram_record *r, - int pos, char base, char qual) { - cram_feature f; - f.B.pos = pos+1; - f.B.code = 'B'; - f.B.base = base; - f.B.qual = qual; - if (cram_stats_add(c->stats[DS_BA], base) < 0) return -1; - if (cram_stats_add(c->stats[DS_QS], qual) < 0) return -1; - BLOCK_APPEND_CHAR(s->qual_blk, qual); - return cram_add_feature(c, s, r, &f); - - block_err: - return -1; -} - -static int cram_add_quality(cram_fd *fd, cram_container *c, - cram_slice *s, cram_record *r, - int pos, char qual) { - cram_feature f; - f.Q.pos = pos+1; - f.Q.code = 'Q'; - f.Q.qual = qual; - if (cram_stats_add(c->stats[DS_QS], qual) < 0) return -1; - BLOCK_APPEND_CHAR(s->qual_blk, qual); - return cram_add_feature(c, s, r, &f); - - block_err: - return -1; -} - -static int cram_add_deletion(cram_container *c, cram_slice *s, cram_record *r, - int pos, int len, char *base) { - cram_feature f; - f.D.pos = pos+1; - f.D.code = 'D'; - f.D.len = len; - if (cram_stats_add(c->stats[DS_DL], len) < 0) return -1; - return cram_add_feature(c, s, r, &f); -} - -static int cram_add_softclip(cram_container *c, cram_slice *s, cram_record *r, - int pos, int len, char *base, int version) { - cram_feature f; - f.S.pos = pos+1; - f.S.code = 'S'; - f.S.len = len; - switch (CRAM_MAJOR_VERS(version)) { - case 1: - f.S.seq_idx = BLOCK_SIZE(s->base_blk); - BLOCK_APPEND(s->base_blk, base, len); - BLOCK_APPEND_CHAR(s->base_blk, '\0'); - break; - - case 2: - default: - f.S.seq_idx = BLOCK_SIZE(s->soft_blk); - if (base) { - BLOCK_APPEND(s->soft_blk, base, len); - } else { - int i; - for (i = 0; i < len; i++) - BLOCK_APPEND_CHAR(s->soft_blk, 'N'); - } - BLOCK_APPEND_CHAR(s->soft_blk, '\0'); - break; - - //default: - // // v3.0 onwards uses BB data-series - // f.S.seq_idx = BLOCK_SIZE(s->soft_blk); - } - return cram_add_feature(c, s, r, &f); - - block_err: - return -1; -} - -static int cram_add_hardclip(cram_container *c, cram_slice *s, cram_record *r, - int pos, int len, char *base) { - cram_feature f; - f.S.pos = pos+1; - f.S.code = 'H'; - f.S.len = len; - if (cram_stats_add(c->stats[DS_HC], len) < 0) return -1; - return cram_add_feature(c, s, r, &f); -} - -static int cram_add_skip(cram_container *c, cram_slice *s, cram_record *r, - int pos, int len, char *base) { - cram_feature f; - f.S.pos = pos+1; - f.S.code = 'N'; - f.S.len = len; - if (cram_stats_add(c->stats[DS_RS], len) < 0) return -1; - return cram_add_feature(c, s, r, &f); -} - -static int cram_add_pad(cram_container *c, cram_slice *s, cram_record *r, - int pos, int len, char *base) { - cram_feature f; - f.S.pos = pos+1; - f.S.code = 'P'; - f.S.len = len; - if (cram_stats_add(c->stats[DS_PD], len) < 0) return -1; - return cram_add_feature(c, s, r, &f); -} - -static int cram_add_insertion(cram_container *c, cram_slice *s, cram_record *r, - int pos, int len, char *base) { - cram_feature f; - f.I.pos = pos+1; - if (len == 1) { - char b = base ? *base : 'N'; - f.i.code = 'i'; - f.i.base = b; - if (cram_stats_add(c->stats[DS_BA], b) < 0) return -1; - } else { - f.I.code = 'I'; - f.I.len = len; - f.S.seq_idx = BLOCK_SIZE(s->base_blk); - if (base) { - BLOCK_APPEND(s->base_blk, base, len); - } else { - int i; - for (i = 0; i < len; i++) - BLOCK_APPEND_CHAR(s->base_blk, 'N'); - } - BLOCK_APPEND_CHAR(s->base_blk, '\0'); - } - return cram_add_feature(c, s, r, &f); - - block_err: - return -1; -} - -/* - * Encodes auxiliary data. Largely duplicated from above, but done so to - * keep it simple and avoid a myriad of version ifs. - * - * Returns the RG header line pointed to by the BAM aux fields on success, - * NULL on failure or no rg present, also sets "*err" to non-zero - */ -static sam_hrec_rg_t *cram_encode_aux(cram_fd *fd, bam_seq_t *b, - cram_container *c, - cram_slice *s, cram_record *cr, - int verbatim_NM, int verbatim_MD, - int NM, kstring_t *MD, int cf_tag, - int no_ref, int *err) { - char *aux, *orig; - sam_hrec_rg_t *brg = NULL; - int aux_size = bam_get_l_aux(b); - cram_block *td_b = c->comp_hdr->TD_blk; - int TD_blk_size = BLOCK_SIZE(td_b), new; - char *key; - khint_t k; - - if (err) *err = 1; - - orig = aux = (char *)bam_aux(b); - - - // cF:i => Extra CRAM bit flags. - // 1: Don't auto-decode MD (may be invalid) - // 2: Don't auto-decode NM (may be invalid) - if (cf_tag && CRAM_MAJOR_VERS(fd->version) < 4) { - // Temporary copy of aux so we can ammend it. - aux = malloc(aux_size+4); - if (!aux) - return NULL; - - memcpy(aux, orig, aux_size); - aux[aux_size++] = 'c'; - aux[aux_size++] = 'F'; - aux[aux_size++] = 'C'; - aux[aux_size++] = cf_tag; - orig = aux; - } - - // Copy aux keys to td_b and aux values to slice aux blocks - while (aux - orig < aux_size && aux[0] != 0) { - int r; - - // RG:Z - if (aux[0] == 'R' && aux[1] == 'G' && aux[2] == 'Z') { - char *rg = &aux[3]; - brg = sam_hrecs_find_rg(fd->header->hrecs, rg); - if (brg) { - while (*aux++); - if (CRAM_MAJOR_VERS(fd->version) >= 4) - BLOCK_APPEND(td_b, "RG*", 3); - continue; - } else { - // RG:Z tag will be stored verbatim - hts_log_warning("Missing @RG header for RG \"%s\"", rg); - } - } - - // MD:Z - if (aux[0] == 'M' && aux[1] == 'D' && aux[2] == 'Z') { - if (cr->len && !no_ref && !(cr->flags & BAM_FUNMAP) && !verbatim_MD) { - if (MD && MD->s && strncasecmp(MD->s, aux+3, orig + aux_size - (aux+3)) == 0) { - while (*aux++); - if (CRAM_MAJOR_VERS(fd->version) >= 4) - BLOCK_APPEND(td_b, "MD*", 3); - continue; - } - } - } - - // NM:i - if (aux[0] == 'N' && aux[1] == 'M') { - if (cr->len && !no_ref && !(cr->flags & BAM_FUNMAP) && !verbatim_NM) { - int NM_ = bam_aux2i((uint8_t *)aux+2); - if (NM_ == NM) { - switch(aux[2]) { - case 'A': case 'C': case 'c': aux+=4; break; - case 'S': case 's': aux+=5; break; - case 'I': case 'i': case 'f': aux+=7; break; - default: - hts_log_error("Unhandled type code for NM tag"); - return NULL; - } - if (CRAM_MAJOR_VERS(fd->version) >= 4) - BLOCK_APPEND(td_b, "NM*", 3); - continue; - } - } - } - - BLOCK_APPEND(td_b, aux, 3); - - // Container level tags_used, for TD series - // Maps integer key ('X0i') to cram_tag_map struct. - int key = (aux[0]<<16)|(aux[1]<<8)|aux[2]; - k = kh_put(m_tagmap, c->tags_used, key, &r); - if (-1 == r) - return NULL; - else if (r != 0) - kh_val(c->tags_used, k) = NULL; - - if (r == 1) { - khint_t k_global; - - // Global tags_used for cram_metrics support - pthread_mutex_lock(&fd->metrics_lock); - k_global = kh_put(m_metrics, fd->tags_used, key, &r); - if (-1 == r) { - pthread_mutex_unlock(&fd->metrics_lock); - return NULL; - } - if (r >= 1) { - kh_val(fd->tags_used, k_global) = cram_new_metrics(); - if (!kh_val(fd->tags_used, k_global)) { - kh_del(m_metrics, fd->tags_used, k_global); - pthread_mutex_unlock(&fd->metrics_lock); - goto err; - } - } - - pthread_mutex_unlock(&fd->metrics_lock); - - int i2[2] = {'\t',key}; - size_t sk = key; - cram_tag_map *m = calloc(1, sizeof(*m)); - if (!m) - goto_err; - kh_val(c->tags_used, k) = m; - - cram_codec *c; - - // Use a block content id based on the tag id. - // Codec type depends on tag data type. - switch(aux[2]) { - case 'Z': case 'H': - // string as byte_array_stop - c = cram_encoder_init(E_BYTE_ARRAY_STOP, NULL, - E_BYTE_ARRAY, (void *)i2, - fd->version, &fd->vv); - break; - - case 'A': case 'c': case 'C': { - // byte array len, 1 byte - cram_byte_array_len_encoder e; - cram_stats st; - - if (CRAM_MAJOR_VERS(fd->version) <= 3) { - e.len_encoding = E_HUFFMAN; - e.len_dat = NULL; // will get codes from st - } else { - e.len_encoding = E_CONST_INT; - e.len_dat = NULL; // will get codes from st - } - memset(&st, 0, sizeof(st)); - if (cram_stats_add(&st, 1) < 0) goto block_err; - cram_stats_encoding(fd, &st); - - e.val_encoding = E_EXTERNAL; - e.val_dat = (void *)sk; - - c = cram_encoder_init(E_BYTE_ARRAY_LEN, &st, - E_BYTE_ARRAY, (void *)&e, - fd->version, &fd->vv); - break; - } - - case 's': case 'S': { - // byte array len, 2 byte - cram_byte_array_len_encoder e; - cram_stats st; - - if (CRAM_MAJOR_VERS(fd->version) <= 3) { - e.len_encoding = E_HUFFMAN; - e.len_dat = NULL; // will get codes from st - } else { - e.len_encoding = E_CONST_INT; - e.len_dat = NULL; // will get codes from st - } - memset(&st, 0, sizeof(st)); - if (cram_stats_add(&st, 2) < 0) goto block_err; - cram_stats_encoding(fd, &st); - - e.val_encoding = E_EXTERNAL; - e.val_dat = (void *)sk; - - c = cram_encoder_init(E_BYTE_ARRAY_LEN, &st, - E_BYTE_ARRAY, (void *)&e, - fd->version, &fd->vv); - break; - } - case 'i': case 'I': case 'f': { - // byte array len, 4 byte - cram_byte_array_len_encoder e; - cram_stats st; - - if (CRAM_MAJOR_VERS(fd->version) <= 3) { - e.len_encoding = E_HUFFMAN; - e.len_dat = NULL; // will get codes from st - } else { - e.len_encoding = E_CONST_INT; - e.len_dat = NULL; // will get codes from st - } - memset(&st, 0, sizeof(st)); - if (cram_stats_add(&st, 4) < 0) goto block_err; - cram_stats_encoding(fd, &st); - - e.val_encoding = E_EXTERNAL; - e.val_dat = (void *)sk; - - c = cram_encoder_init(E_BYTE_ARRAY_LEN, &st, - E_BYTE_ARRAY, (void *)&e, - fd->version, &fd->vv); - break; - } - - case 'B': { - // Byte array of variable size, but we generate our tag - // byte stream at the wrong stage (during reading and not - // after slice header construction). So we use - // BYTE_ARRAY_LEN with the length codec being external - // too. - cram_byte_array_len_encoder e; - - e.len_encoding = CRAM_MAJOR_VERS(fd->version) >= 4 - ? E_VARINT_UNSIGNED - : E_EXTERNAL; - e.len_dat = (void *)sk; // or key+128 for len? - - e.val_encoding = E_EXTERNAL; - e.val_dat = (void *)sk; - - c = cram_encoder_init(E_BYTE_ARRAY_LEN, NULL, - E_BYTE_ARRAY, (void *)&e, - fd->version, &fd->vv); - break; - } - - default: - hts_log_error("Unsupported SAM aux type '%c'", aux[2]); - c = NULL; - } - - if (!c) - goto_err; - - m->codec = c; - - // Link to fd-global tag metrics - pthread_mutex_lock(&fd->metrics_lock); - m->m = k_global ? (cram_metrics *)kh_val(fd->tags_used, k_global) : NULL; - pthread_mutex_unlock(&fd->metrics_lock); - } - - cram_tag_map *tm = (cram_tag_map *)kh_val(c->tags_used, k); - if (!tm) goto_err; - cram_codec *codec = tm->codec; - if (!tm->codec) goto_err; - - switch(aux[2]) { - case 'A': case 'C': case 'c': - if (!tm->blk) { - if (!(tm->blk = cram_new_block(EXTERNAL, key))) - return NULL; - codec->u.e_byte_array_len.val_codec->out = tm->blk; - } - - aux+=3; - //codec->encode(s, codec, aux, 1); - // Functionally equivalent, but less code. - BLOCK_APPEND_CHAR(tm->blk, *aux); - aux++; - break; - - case 'S': case 's': - if (!tm->blk) { - if (!(tm->blk = cram_new_block(EXTERNAL, key))) - return NULL; - codec->u.e_byte_array_len.val_codec->out = tm->blk; - } - - aux+=3; - //codec->encode(s, codec, aux, 2); - BLOCK_APPEND(tm->blk, aux, 2); - aux+=2; - break; - - case 'I': case 'i': case 'f': - if (!tm->blk) { - if (!(tm->blk = cram_new_block(EXTERNAL, key))) - return NULL; - codec->u.e_byte_array_len.val_codec->out = tm->blk; - } - - aux+=3; - //codec->encode(s, codec, aux, 4); - BLOCK_APPEND(tm->blk, aux, 4); - aux+=4; - break; - - case 'd': - if (!tm->blk) { - if (!(tm->blk = cram_new_block(EXTERNAL, key))) - return NULL; - codec->u.e_byte_array_len.val_codec->out = tm->blk; - } - - aux+=3; //*tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++; - //codec->encode(s, codec, aux, 8); - BLOCK_APPEND(tm->blk, aux, 8); - aux+=8; - break; - - case 'Z': case 'H': - { - if (!tm->blk) { - if (!(tm->blk = cram_new_block(EXTERNAL, key))) - return NULL; - codec->out = tm->blk; - } - - char *aux_s; - aux += 3; - aux_s = aux; - while (*aux++); - if (codec->encode(s, codec, aux_s, aux - aux_s) < 0) - return NULL; - } - break; - - case 'B': { - int type = aux[3], blen; - uint32_t count = (uint32_t)((((unsigned char *)aux)[4]<< 0) + - (((unsigned char *)aux)[5]<< 8) + - (((unsigned char *)aux)[6]<<16) + - (((unsigned char *)aux)[7]<<24)); - if (!tm->blk) { - if (!(tm->blk = cram_new_block(EXTERNAL, key))) - return NULL; - if (codec->u.e_byte_array_len.val_codec->codec == E_XDELTA) { - if (!(tm->blk2 = cram_new_block(EXTERNAL, key+128))) - return NULL; - codec->u.e_byte_array_len.len_codec->out = tm->blk2; - codec->u.e_byte_array_len.val_codec->u.e_xdelta.sub_codec->out = tm->blk; - } else { - codec->u.e_byte_array_len.len_codec->out = tm->blk; - codec->u.e_byte_array_len.val_codec->out = tm->blk; - } - } - - // skip TN field - aux+=3; - - // We use BYTE_ARRAY_LEN with external length, so store that first - switch (type) { - case 'c': case 'C': - blen = count; - break; - case 's': case 'S': - blen = 2*count; - break; - case 'i': case 'I': case 'f': - blen = 4*count; - break; - default: - hts_log_error("Unknown sub-type '%c' for aux type 'B'", type); - return NULL; - } - - blen += 5; // sub-type & length - - if (codec->encode(s, codec, aux, blen) < 0) - return NULL; - aux += blen; - break; - } - default: - hts_log_error("Unknown aux type '%c'", aux[2]); - return NULL; - } - tm->blk->m = tm->m; - } - - // FIXME: sort BLOCK_DATA(td_b) by char[3] triples - - // And and increment TD hash entry - BLOCK_APPEND_CHAR(td_b, 0); - - // Duplicate key as BLOCK_DATA() can be realloced to a new pointer. - key = string_ndup(c->comp_hdr->TD_keys, - (char *)BLOCK_DATA(td_b) + TD_blk_size, - BLOCK_SIZE(td_b) - TD_blk_size); - if (!key) - goto block_err; - k = kh_put(m_s2i, c->comp_hdr->TD_hash, key, &new); - if (new < 0) { - return NULL; - } else if (new == 0) { - BLOCK_SIZE(td_b) = TD_blk_size; - } else { - kh_val(c->comp_hdr->TD_hash, k) = c->comp_hdr->nTL; - c->comp_hdr->nTL++; - } - - cr->TL = kh_val(c->comp_hdr->TD_hash, k); - if (cram_stats_add(c->stats[DS_TL], cr->TL) < 0) - goto block_err; - - if (orig != (char *)bam_aux(b)) - free(orig); - - if (err) *err = 0; - - return brg; - - err: - block_err: - if (orig != (char *)bam_aux(b)) - free(orig); - return NULL; -} - -/* - * During cram_next_container or before the final flush at end of - * file, we update the current slice headers and increment the slice - * number to the next slice. - * - * See cram_next_container() and cram_close(). - */ -void cram_update_curr_slice(cram_container *c, int version) { - cram_slice *s = c->slice; - if (c->multi_seq) { - s->hdr->ref_seq_id = -2; - s->hdr->ref_seq_start = 0; - s->hdr->ref_seq_span = 0; - } else if (c->curr_ref == -1 && CRAM_ge31(version)) { - // Spec states span=0, but it broke our range queries. - // See commit message for this and prior. - s->hdr->ref_seq_id = -1; - s->hdr->ref_seq_start = 0; - s->hdr->ref_seq_span = 0; - } else { - s->hdr->ref_seq_id = c->curr_ref; - s->hdr->ref_seq_start = c->first_base; - s->hdr->ref_seq_span = MAX(0, c->last_base - c->first_base + 1); - } - s->hdr->num_records = c->curr_rec; - - if (c->curr_slice == 0) { - if (c->ref_seq_id != s->hdr->ref_seq_id) - c->ref_seq_id = s->hdr->ref_seq_id; - c->ref_seq_start = c->first_base; - } - - c->curr_slice++; -} - -/* - * Handles creation of a new container or new slice, flushing any - * existing containers when appropriate. - * - * Really this is next slice, which may or may not lead to a new container. - * - * Returns cram_container pointer on success - * NULL on failure. - */ -static cram_container *cram_next_container(cram_fd *fd, bam_seq_t *b) { - cram_container *c = fd->ctr; - int i; - - /* First occurrence */ - if (c->curr_ref == -2) - c->curr_ref = bam_ref(b); - - if (c->slice) - cram_update_curr_slice(c, fd->version); - - /* Flush container */ - if (c->curr_slice == c->max_slice || - (bam_ref(b) != c->curr_ref && !c->multi_seq)) { - c->ref_seq_span = fd->last_base - c->ref_seq_start + 1; - hts_log_info("Flush container %d/%"PRId64"..%"PRId64, - c->ref_seq_id, c->ref_seq_start, - c->ref_seq_start + c->ref_seq_span -1); - - /* Encode slices */ - if (-1 == cram_flush_container_mt(fd, c)) - return NULL; - if (!fd->pool) { - // Move to sep func, as we need cram_flush_container for - // the closing phase to flush the partial container. - for (i = 0; i < c->max_slice; i++) { - cram_free_slice(c->slices[i]); - c->slices[i] = NULL; - } - - c->slice = NULL; - c->curr_slice = 0; - - /* Easy approach for purposes of freeing stats */ - cram_free_container(c); - } - - c = fd->ctr = cram_new_container(fd->seqs_per_slice, - fd->slices_per_container); - if (!c) - return NULL; - - pthread_mutex_lock(&fd->ref_lock); - c->no_ref = fd->no_ref; - c->embed_ref = fd->embed_ref; - c->record_counter = fd->record_counter; - pthread_mutex_unlock(&fd->ref_lock); - c->curr_ref = bam_ref(b); - } - - c->last_pos = c->first_base = c->last_base = bam_pos(b)+1; - - /* New slice */ - c->slice = c->slices[c->curr_slice] = - cram_new_slice(MAPPED_SLICE, c->max_rec); - if (!c->slice) - return NULL; - - if (c->multi_seq) { - c->slice->hdr->ref_seq_id = -2; - c->slice->hdr->ref_seq_start = 0; - c->slice->last_apos = 1; - } else { - c->slice->hdr->ref_seq_id = bam_ref(b); - // wrong for unsorted data, will fix during encoding. - c->slice->hdr->ref_seq_start = bam_pos(b)+1; - c->slice->last_apos = bam_pos(b)+1; - } - - c->curr_rec = 0; - c->s_num_bases = 0; - c->n_mapped = 0; - - // QO field: 0 implies original orientation, 1 implies sequence orientation - // 1 is often preferable for NovaSeq, but impact is slight. ~0.5% diff. - // Conversely other data sets it's often better than 1% saving for 0. - // Short of trying both and learning, for now we use use 0 for V4, 1 for V3. - c->qs_seq_orient = CRAM_MAJOR_VERS(fd->version) >= 4 ? 0 : 1; - - return c; -} - - -/* - * Converts a single bam record into a cram record. - * Possibly used within a thread. - * - * Returns 0 on success; - * -1 on failure - */ -static int process_one_read(cram_fd *fd, cram_container *c, - cram_slice *s, cram_record *cr, - bam_seq_t *b, int rnum, kstring_t *MD, - int embed_ref, int no_ref) { - int i, fake_qual = -1, NM = 0; - char *cp; - char *ref, *seq, *qual; - - // Any places with N in seq and/or reference can lead to ambiguous - // interpretation of the SAM NM:i tag. So we store these verbatim - // to ensure valid data round-trips the same regardless of who - // defines it as valid. - // Similarly when alignments go beyond end of the reference. - int verbatim_NM = fd->store_nm; - int verbatim_MD = fd->store_md; - - // FIXME: multi-ref containers - - cr->flags = bam_flag(b); - cr->len = bam_seq_len(b); - uint8_t *md; - if (!(md = bam_aux_get(b, "MD"))) - MD = NULL; - else - MD->l = 0; - - int cf_tag = 0; - - if (embed_ref == 2) { - cf_tag = MD ? 0 : 1; // No MD - cf_tag |= bam_aux_get(b, "NM") ? 0 : 2; // No NM - } - - //fprintf(stderr, "%s => %d\n", rg ? rg : "\"\"", cr->rg); - - ref = c->ref ? c->ref - (c->ref_start-1) : NULL; - cr->ref_id = bam_ref(b); - if (cram_stats_add(c->stats[DS_RI], cr->ref_id) < 0) - goto block_err; - if (cram_stats_add(c->stats[DS_BF], fd->cram_flag_swap[cr->flags & 0xfff]) < 0) - goto block_err; - - // Non reference based encoding means storing the bases verbatim as features, which in - // turn means every base also has a quality already stored. - if (!no_ref || CRAM_MAJOR_VERS(fd->version) >= 3) - cr->cram_flags |= CRAM_FLAG_PRESERVE_QUAL_SCORES; - - if (cr->len <= 0 && CRAM_MAJOR_VERS(fd->version) >= 3) - cr->cram_flags |= CRAM_FLAG_NO_SEQ; - //cram_stats_add(c->stats[DS_CF], cr->cram_flags & CRAM_FLAG_MASK); - - c->num_bases += cr->len; - cr->apos = bam_pos(b)+1; - if (c->pos_sorted) { - if (cr->apos < s->last_apos && !fd->ap_delta) { - c->pos_sorted = 0; - } else { - if (cram_stats_add(c->stats[DS_AP], cr->apos - s->last_apos) < 0) - goto block_err; - s->last_apos = cr->apos; - } - } else { - //cram_stats_add(c->stats[DS_AP], cr->apos); - } - c->max_apos += (cr->apos > c->max_apos) * (cr->apos - c->max_apos); - - /* - * This seqs_ds is largely pointless and it could reuse the same memory - * over and over. - * s->base_blk is what we need for encoding. - */ - cr->seq = BLOCK_SIZE(s->seqs_blk); - cr->qual = BLOCK_SIZE(s->qual_blk); - BLOCK_GROW(s->seqs_blk, cr->len+1); - BLOCK_GROW(s->qual_blk, cr->len); - - // Convert BAM nibble encoded sequence to string of base pairs - seq = cp = (char *)BLOCK_END(s->seqs_blk); - *seq = 0; - nibble2base(bam_seq(b), cp, cr->len); - BLOCK_SIZE(s->seqs_blk) += cr->len; - - qual = cp = (char *)bam_qual(b); - - - /* Copy and parse */ - if (!(cr->flags & BAM_FUNMAP)) { - uint32_t *cig_to, *cig_from; - int64_t apos = cr->apos-1, spos = 0; - int64_t MD_last = apos; // last position of edit in MD tag - - cr->cigar = s->ncigar; - cr->ncigar = bam_cigar_len(b); - while (cr->cigar + cr->ncigar >= s->cigar_alloc) { - s->cigar_alloc = s->cigar_alloc ? s->cigar_alloc*2 : 1024; - s->cigar = realloc(s->cigar, s->cigar_alloc * sizeof(*s->cigar)); - if (!s->cigar) - return -1; - } - - cig_to = (uint32_t *)s->cigar; - cig_from = (uint32_t *)bam_cigar(b); - - cr->feature = 0; - cr->nfeature = 0; - for (i = 0; i < cr->ncigar; i++) { - enum cigar_op cig_op = cig_from[i] & BAM_CIGAR_MASK; - uint32_t cig_len = cig_from[i] >> BAM_CIGAR_SHIFT; - cig_to[i] = cig_from[i]; - - /* Can also generate events from here for CRAM diffs */ - - switch (cig_op) { - int l; - - // Don't trust = and X ops to be correct. - case BAM_CMATCH: - case BAM_CBASE_MATCH: - case BAM_CBASE_MISMATCH: - //fprintf(stderr, "\nBAM_CMATCH\nR: %.*s\nS: %.*s\n", - // cig_len, &ref[apos], cig_len, &seq[spos]); - l = 0; - if (!no_ref && cr->len) { - int end = cig_len+apos < c->ref_end - ? cig_len : c->ref_end - apos; - char *sp = &seq[spos]; - char *rp = &ref[apos]; - char *qp = &qual[spos]; - if (end > cr->len) { - hts_log_error("CIGAR and query sequence are of different length"); - return -1; - } - for (l = 0; l < end; l++) { - // This case is just too disputed and different tools - // interpret these in different ways. We give up and - // store verbatim. - if (rp[l] == 'N' && sp[l] == 'N') - verbatim_NM = verbatim_MD = 1; - if (rp[l] != sp[l]) { - // Build our own MD tag if one is on the sequence, so - // we can ensure it matches and thus can be discarded. - if (MD && ref) { - if (kputuw(apos+l - MD_last, MD) < 0) goto err; - if (kputc(rp[l], MD) < 0) goto err; - MD_last = apos+l+1; - } - NM++; - if (!sp[l]) - break; - if (0 && CRAM_MAJOR_VERS(fd->version) >= 3) { -#if 0 - // Disabled for the time being as it doesn't - // seem to gain us much. - int ol=l; - while (l 1) { - if (cram_add_bases(fd, c, s, cr, spos+ol, - l-ol, &seq[spos+ol])) - return -1; - l--; - } else { - l = ol; - if (cram_add_substitution(fd, c, s, cr, - spos+l, sp[l], - qp[l], rp[l])) - return -1; - } -#else - // With urmap pushed to the limit and lots - // of unaligned data (should be soft-clipped) - // this saves ~2-7%. Worth it? - int nl = l; - int max_end = nl, max_score = 0, score = 0; - while (nl < end) { - if (rp[nl] != sp[nl]) { - score += 3; - if (max_score < score) { - max_score = score; - max_end = nl; - } - } else { - score--; - if (score < -2 || - max_score - score > 7) - break; - } - nl++; - } - if (max_score > 20) { - cram_add_bases(fd, c, s, cr, spos+l, - max_end-l, &seq[spos+l]); - l = max_end-1; - } else { - while (l < nl) { - if (rp[l] != sp[l]) - cram_add_substitution(fd, c, s, - cr, spos+l, - sp[l], qp[l], - rp[l]); - l++; - } - l--; - } -#endif - } else { - if (cram_add_substitution(fd, c, s, cr, spos+l, - sp[l], qp[l], rp[l])) - return -1; - } - } - } - spos += l; - apos += l; - } - - if (l < cig_len && cr->len) { - if (no_ref) { - if (CRAM_MAJOR_VERS(fd->version) == 3) { - if (cram_add_bases(fd, c, s, cr, spos, - cig_len-l, &seq[spos])) - return -1; - spos += cig_len-l; - } else { - for (; l < cig_len && seq[spos]; l++, spos++) { - if (cram_add_base(fd, c, s, cr, spos, - seq[spos], qual[spos])) - return -1; - } - } - } else { - /* off end of sequence or non-ref based output */ - verbatim_NM = verbatim_MD = 1; - for (; l < cig_len && seq[spos]; l++, spos++) { - if (cram_add_base(fd, c, s, cr, spos, - seq[spos], qual[spos])) - return -1; - } - } - apos += cig_len; - } else if (!cr->len) { - /* Seq "*" */ - verbatim_NM = verbatim_MD = 1; - apos += cig_len; - spos += cig_len; - } - break; - - case BAM_CDEL: - if (MD && ref) { - if (kputuw(apos - MD_last, MD) < 0) goto err; - if (apos < c->ref_end) { - if (kputc_('^', MD) < 0) goto err; - if (kputsn(&ref[apos], MIN(c->ref_end - apos, cig_len), MD) < 0) - goto err; - } - } - NM += cig_len; - - if (cram_add_deletion(c, s, cr, spos, cig_len, &seq[spos])) - return -1; - apos += cig_len; - MD_last = apos; - break; - - case BAM_CREF_SKIP: - if (cram_add_skip(c, s, cr, spos, cig_len, &seq[spos])) - return -1; - apos += cig_len; - MD_last += cig_len; - break; - - case BAM_CINS: - if (cram_add_insertion(c, s, cr, spos, cig_len, - cr->len ? &seq[spos] : NULL)) - return -1; - if (no_ref && cr->len) { - for (l = 0; l < cig_len; l++, spos++) { - cram_add_quality(fd, c, s, cr, spos, qual[spos]); - } - } else { - spos += cig_len; - } - NM += cig_len; - break; - - case BAM_CSOFT_CLIP: - if (cram_add_softclip(c, s, cr, spos, cig_len, - cr->len ? &seq[spos] : NULL, - fd->version)) - return -1; - - if (no_ref && - !(cr->cram_flags & CRAM_FLAG_PRESERVE_QUAL_SCORES)) { - if (cr->len) { - for (l = 0; l < cig_len; l++, spos++) { - cram_add_quality(fd, c, s, cr, spos, qual[spos]); - } - } else { - for (l = 0; l < cig_len; l++, spos++) { - cram_add_quality(fd, c, s, cr, spos, -1); - } - } - } else { - spos += cig_len; - } - break; - - case BAM_CHARD_CLIP: - if (cram_add_hardclip(c, s, cr, spos, cig_len, &seq[spos])) - return -1; - break; - - case BAM_CPAD: - if (cram_add_pad(c, s, cr, spos, cig_len, &seq[spos])) - return -1; - break; - - default: - hts_log_error("Unknown CIGAR op code %d", cig_op); - return -1; - } - } - if (cr->len && spos != cr->len) { - hts_log_error("CIGAR and query sequence are of different length"); - return -1; - } - fake_qual = spos; - cr->aend = no_ref ? apos : MIN(apos, c->ref_end); - if (cram_stats_add(c->stats[DS_FN], cr->nfeature) < 0) - goto block_err; - - if (MD && ref) - if (kputuw(apos - MD_last, MD) < 0) goto err; - } else { - // Unmapped - cr->cram_flags |= CRAM_FLAG_PRESERVE_QUAL_SCORES; - cr->cigar = 0; - cr->ncigar = 0; - cr->nfeature = 0; - cr->aend = cr->apos; - for (i = 0; i < cr->len; i++) - if (cram_stats_add(c->stats[DS_BA], seq[i]) < 0) - goto block_err; - fake_qual = 0; - } - - cr->ntags = 0; //cram_stats_add(c->stats[DS_TC], cr->ntags); - int err = 0; - sam_hrec_rg_t *brg = - cram_encode_aux(fd, b, c, s, cr, verbatim_NM, verbatim_MD, NM, MD, - cf_tag, no_ref, &err); - if (err) - goto block_err; - - /* Read group, identified earlier */ - if (brg) { - cr->rg = brg->id; - } else if (CRAM_MAJOR_VERS(fd->version) == 1) { - sam_hrec_rg_t *brg = sam_hrecs_find_rg(fd->header->hrecs, "UNKNOWN"); - if (!brg) goto block_err; - cr->rg = brg->id; - } else { - cr->rg = -1; - } - if (cram_stats_add(c->stats[DS_RG], cr->rg) < 0) - goto block_err; - - /* - * Append to the qual block now. We do this here as - * cram_add_substitution() can generate BA/QS events which need to - * be in the qual block before we append the rest of the data. - */ - if (cr->cram_flags & CRAM_FLAG_PRESERVE_QUAL_SCORES) { - /* Special case of seq "*" */ - if (cr->len == 0) { - cr->len = fake_qual; - BLOCK_GROW(s->qual_blk, cr->len); - cp = (char *)BLOCK_END(s->qual_blk); - memset(cp, 255, cr->len); - } else { - BLOCK_GROW(s->qual_blk, cr->len); - cp = (char *)BLOCK_END(s->qual_blk); - char *from = (char *)&bam_qual(b)[0]; - char *to = &cp[0]; - memcpy(to, from, cr->len); - - // Store quality in original orientation for better compression. - if (!c->qs_seq_orient) { - if (cr->flags & BAM_FREVERSE) { - int i, j; - for (i = 0, j = cr->len-1; i < j; i++, j--) { - unsigned char c; - c = to[i]; - to[i] = to[j]; - to[j] = c; - } - } - } - } - BLOCK_SIZE(s->qual_blk) += cr->len; - } else { - if (cr->len == 0) - cr->len = fake_qual >= 0 ? fake_qual : cr->aend - cr->apos + 1; - } - - if (cram_stats_add(c->stats[DS_RL], cr->len) < 0) - goto block_err; - - /* Now we know apos and aend both, update mate-pair information */ - { - int new; - khint_t k; - int sec = (cr->flags & BAM_FSECONDARY) ? 1 : 0; - - //fprintf(stderr, "Checking %"PRId64"/%.*s\t", rnum, - // cr->name_len, DSTRING_STR(s->name_ds)+cr->name); - if (cr->flags & BAM_FPAIRED) { - char *key = string_ndup(s->pair_keys, bam_name(b), bam_name_len(b)); - if (!key) - return -1; - - k = kh_put(m_s2i, s->pair[sec], key, &new); - if (-1 == new) - return -1; - else if (new > 0) - kh_val(s->pair[sec], k) = rnum; - } else { - new = 1; - k = 0; // Prevents false-positive warning from gcc -Og - } - - if (new == 0) { - cram_record *p = &s->crecs[kh_val(s->pair[sec], k)]; - int64_t aleft, aright; - int sign; - - aleft = MIN(cr->apos, p->apos); - aright = MAX(cr->aend, p->aend); - if (cr->apos < p->apos) { - sign = 1; - } else if (cr->apos > p->apos) { - sign = -1; - } else if (cr->flags & BAM_FREAD1) { - sign = 1; - } else { - sign = -1; - } - - // This vs p: tlen, matepos, flags. Permit TLEN 0 and/or TLEN +/- - // a small amount, if appropriate options set. - if ((!fd->tlen_zero && MAX(bam_mate_pos(b)+1, 0) != p->apos) && - !(fd->tlen_zero && bam_mate_pos(b) == 0)) - goto detached; - - if (((bam_flag(b) & BAM_FMUNMAP) != 0) != - ((p->flags & BAM_FUNMAP) != 0)) - goto detached; - - if (((bam_flag(b) & BAM_FMREVERSE) != 0) != - ((p->flags & BAM_FREVERSE) != 0)) - goto detached; - - - // p vs this: tlen, matepos, flags - if (p->ref_id != cr->ref_id && - !(fd->tlen_zero && p->ref_id == -1)) - goto detached; - - if (p->mate_pos != cr->apos && - !(fd->tlen_zero && p->mate_pos == 0)) - goto detached; - - if (((p->flags & BAM_FMUNMAP) != 0) != - ((p->mate_flags & CRAM_M_UNMAP) != 0)) - goto detached; - - if (((p->flags & BAM_FMREVERSE) != 0) != - ((p->mate_flags & CRAM_M_REVERSE) != 0)) - goto detached; - - // Supplementary reads are just too ill defined - if ((cr->flags & BAM_FSUPPLEMENTARY) || - (p->flags & BAM_FSUPPLEMENTARY)) - goto detached; - - // When in lossy name mode, if a read isn't detached we - // cannot store the name. The corollary is that when we - // must store the name, it must be detached (inefficient). - if (fd->lossy_read_names && - (!(cr->cram_flags & CRAM_FLAG_DISCARD_NAME) || - !((p->cram_flags & CRAM_FLAG_DISCARD_NAME)))) - goto detached; - - // Now check TLEN. We do this last as sometimes it's the - // only thing that differs. In CRAM4 we have a better way - // of handling this that doesn't break detached status - int explicit_tlen = 0; - int tflag1 = ((bam_ins_size(b) && - llabs(bam_ins_size(b) - sign*(aright-aleft+1)) - > fd->tlen_approx) - || (!bam_ins_size(b) && !fd->tlen_zero)); - - int tflag2 = ((p->tlen && llabs(p->tlen - -sign*(aright-aleft+1)) - > fd->tlen_approx) - || (!p->tlen && !fd->tlen_zero)); - - if (tflag1 || tflag2) { - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - explicit_tlen = CRAM_FLAG_EXPLICIT_TLEN; - } else { - // Stil do detached for unmapped data in CRAM4 as this - // also impacts RNEXT calculation. - goto detached; - } - } - - /* - * The fields below are unused when encoding this read as it is - * no longer detached. In theory they may get referred to when - * processing a 3rd or 4th read in this template?, so we set them - * here just to be sure. - * - * They do not need cram_stats_add() calls those as they are - * not emitted. - */ - cr->mate_pos = p->apos; - cram_stats_add(c->stats[DS_NP], cr->mate_pos); - cr->tlen = explicit_tlen ? bam_ins_size(b) : sign*(aright-aleft+1); - cram_stats_add(c->stats[DS_TS], cr->tlen); - cr->mate_flags = - ((p->flags & BAM_FMUNMAP) == BAM_FMUNMAP) * CRAM_M_UNMAP + - ((p->flags & BAM_FMREVERSE) == BAM_FMREVERSE) * CRAM_M_REVERSE; - - // Decrement statistics aggregated earlier - if (p->cram_flags & CRAM_FLAG_STATS_ADDED) { - cram_stats_del(c->stats[DS_NP], p->mate_pos); - cram_stats_del(c->stats[DS_MF], p->mate_flags); - if (!(p->cram_flags & CRAM_FLAG_EXPLICIT_TLEN)) - cram_stats_del(c->stats[DS_TS], p->tlen); - cram_stats_del(c->stats[DS_NS], p->mate_ref_id); - } - - /* Similarly we could correct the p-> values too, but these will no - * longer have any code that refers back to them as the new 'p' - * for this template is our current 'cr'. - */ - //p->mate_pos = cr->apos; - //p->mate_flags = - // ((cr->flags & BAM_FMUNMAP) == BAM_FMUNMAP) * CRAM_M_UNMAP + - // ((cr->flags & BAM_FMREVERSE) == BAM_FMREVERSE)* CRAM_M_REVERSE; - //p->tlen = p->apos - cr->aend; - - // Clear detached from cr flags - cr->cram_flags &= ~CRAM_FLAG_DETACHED; - cr->cram_flags |= explicit_tlen; - if (cram_stats_add(c->stats[DS_CF], cr->cram_flags & CRAM_FLAG_MASK) < 0) - goto block_err; - - // Clear detached from p flags and set downstream - if (p->cram_flags & CRAM_FLAG_STATS_ADDED) { - cram_stats_del(c->stats[DS_CF], p->cram_flags & CRAM_FLAG_MASK); - p->cram_flags &= ~CRAM_FLAG_STATS_ADDED; - } - - p->cram_flags &= ~CRAM_FLAG_DETACHED; - p->cram_flags |= CRAM_FLAG_MATE_DOWNSTREAM | explicit_tlen;; - if (cram_stats_add(c->stats[DS_CF], p->cram_flags & CRAM_FLAG_MASK) < 0) - goto block_err; - - p->mate_line = rnum - (kh_val(s->pair[sec], k) + 1); - if (cram_stats_add(c->stats[DS_NF], p->mate_line) < 0) - goto block_err; - - kh_val(s->pair[sec], k) = rnum; - } else { - detached: - //fprintf(stderr, "unpaired\n"); - - /* Derive mate flags from this flag */ - cr->mate_flags = 0; - if (bam_flag(b) & BAM_FMUNMAP) - cr->mate_flags |= CRAM_M_UNMAP; - if (bam_flag(b) & BAM_FMREVERSE) - cr->mate_flags |= CRAM_M_REVERSE; - - if (cram_stats_add(c->stats[DS_MF], cr->mate_flags) < 0) - goto block_err; - - cr->mate_pos = MAX(bam_mate_pos(b)+1, 0); - if (cram_stats_add(c->stats[DS_NP], cr->mate_pos) < 0) - goto block_err; - - cr->tlen = bam_ins_size(b); - if (cram_stats_add(c->stats[DS_TS], cr->tlen) < 0) - goto block_err; - - cr->cram_flags |= CRAM_FLAG_DETACHED; - if (cram_stats_add(c->stats[DS_CF], cr->cram_flags & CRAM_FLAG_MASK) < 0) - goto block_err; - if (cram_stats_add(c->stats[DS_NS], bam_mate_ref(b)) < 0) - goto block_err; - - cr->cram_flags |= CRAM_FLAG_STATS_ADDED; - } - } - - cr->mqual = bam_map_qual(b); - if (cram_stats_add(c->stats[DS_MQ], cr->mqual) < 0) - goto block_err; - - cr->mate_ref_id = bam_mate_ref(b); - - if (!(bam_flag(b) & BAM_FUNMAP)) { - if (c->first_base > cr->apos) - c->first_base = cr->apos; - - if (c->last_base < cr->aend) - c->last_base = cr->aend; - } - - return 0; - - block_err: - err: - return -1; -} - -/* - * Write iterator: put BAM format sequences into a CRAM file. - * We buffer up a containers worth of data at a time. - * - * Returns 0 on success - * -1 on failure - */ -int cram_put_bam_seq(cram_fd *fd, bam_seq_t *b) { - cram_container *c; - - if (!fd->ctr) { - fd->ctr = cram_new_container(fd->seqs_per_slice, - fd->slices_per_container); - if (!fd->ctr) - return -1; - fd->ctr->record_counter = fd->record_counter; - - pthread_mutex_lock(&fd->ref_lock); - fd->ctr->no_ref = fd->no_ref; - fd->ctr->embed_ref = fd->embed_ref; - pthread_mutex_unlock(&fd->ref_lock); - } - c = fd->ctr; - - int embed_ref = c->embed_ref; - - if (!c->slice || c->curr_rec == c->max_rec || - (bam_ref(b) != c->curr_ref && c->curr_ref >= -1) || - (c->s_num_bases + c->s_aux_bytes >= fd->bases_per_slice)) { - int slice_rec, curr_rec, multi_seq = fd->multi_seq == 1; - int curr_ref = c->slice ? c->curr_ref : bam_ref(b); - - /* - * Start packing slices when we routinely have under 1/4tr full. - * - * This option isn't available if we choose to embed references - * since we can only have one per slice. - * - * The multi_seq var here refers to our intention for the next slice. - * This slice has already been encoded so we output as-is. - */ - if (fd->multi_seq == -1 && c->curr_rec < c->max_rec/4+10 && - fd->last_slice && fd->last_slice < c->max_rec/4+10 && - embed_ref<=0) { - if (!c->multi_seq) - hts_log_info("Multi-ref enabled for next container"); - multi_seq = 1; - } else if (fd->multi_seq == 1) { - pthread_mutex_lock(&fd->metrics_lock); - if (fd->last_RI_count <= c->max_slice && fd->multi_seq_user != 1) { - multi_seq = 0; - hts_log_info("Multi-ref disabled for next container"); - } - pthread_mutex_unlock(&fd->metrics_lock); - } - - slice_rec = c->slice_rec; - curr_rec = c->curr_rec; - - if (CRAM_MAJOR_VERS(fd->version) == 1 || - c->curr_rec == c->max_rec || fd->multi_seq != 1 || !c->slice || - c->s_num_bases + c->s_aux_bytes >= fd->bases_per_slice) { - if (NULL == (c = cram_next_container(fd, b))) { - if (fd->ctr) { - // prevent cram_close attempting to flush - fd->ctr_mt = fd->ctr; // delay free when threading - fd->ctr = NULL; - } - return -1; - } - } - - /* - * Due to our processing order, some things we've already done we - * cannot easily undo. So when we first notice we should be packing - * multiple sequences per container we emit the small partial - * container as-is and then start a fresh one in a different mode. - */ - if (multi_seq == 0 && fd->multi_seq == 1 && fd->multi_seq_user == -1) { - // User selected auto-mode, we're currently using multi-seq, but - // have detected we don't need to. Switch back to auto. - fd->multi_seq = -1; - } else if (multi_seq) { - // We detected we need multi-seq - fd->multi_seq = 1; - c->multi_seq = 1; - c->pos_sorted = 0; - - // Cram_next_container may end up flushing an existing one and - // triggering fd->embed_ref=2 if no reference is found. - // Embedded refs are incompatible with multi-seq, so we bail - // out and switch to no_ref in this scenario. We do this - // within the container only, as multi_seq may be temporary - // and we switch back away from it again. - pthread_mutex_lock(&fd->ref_lock); - if (fd->embed_ref > 0 && c->curr_rec == 0 && c->curr_slice == 0) { - hts_log_warning("Changing from embed_ref to no_ref mode"); - // Should we update fd->embed_ref and no_ref here too? - // Doing so means if we go into multi-seq and back out - // again, eg due a cluster of tiny refs in the middle of - // much larger ones, then we bake in no-ref mode. - // - // However for unsorted data we're realistically not - // going to switch back. - c->embed_ref = fd->embed_ref = 0; // or -1 for auto? - c->no_ref = fd->no_ref = 1; - } - pthread_mutex_unlock(&fd->ref_lock); - - if (!c->refs_used) { - pthread_mutex_lock(&fd->ref_lock); - c->refs_used = calloc(fd->refs->nref, sizeof(int)); - pthread_mutex_unlock(&fd->ref_lock); - if (!c->refs_used) - return -1; - } - } - - fd->last_slice = curr_rec - slice_rec; - c->slice_rec = c->curr_rec; - - // Have we seen this reference before? - if (bam_ref(b) >= 0 && curr_ref >= 0 && bam_ref(b) != curr_ref && - embed_ref<=0 && !fd->unsorted && multi_seq) { - - if (!c->refs_used) { - pthread_mutex_lock(&fd->ref_lock); - c->refs_used = calloc(fd->refs->nref, sizeof(int)); - pthread_mutex_unlock(&fd->ref_lock); - if (!c->refs_used) - return -1; - } else if (c->refs_used && c->refs_used[bam_ref(b)]) { - pthread_mutex_lock(&fd->ref_lock); - fd->unsorted = 1; - fd->multi_seq = 1; - pthread_mutex_unlock(&fd->ref_lock); - } - } - - c->curr_ref = bam_ref(b); - if (c->refs_used && c->curr_ref >= 0) c->refs_used[c->curr_ref]++; - } - - if (!c->bams) { - /* First time through, allocate a set of bam pointers */ - pthread_mutex_lock(&fd->bam_list_lock); - if (fd->bl) { - spare_bams *spare = fd->bl; - c->bams = spare->bams; - fd->bl = spare->next; - free(spare); - } else { - c->bams = calloc(c->max_c_rec, sizeof(bam_seq_t *)); - if (!c->bams) { - pthread_mutex_unlock(&fd->bam_list_lock); - return -1; - } - } - pthread_mutex_unlock(&fd->bam_list_lock); - } - - /* Copy or alloc+copy the bam record, for later encoding */ - if (c->bams[c->curr_c_rec]) { - if (bam_copy1(c->bams[c->curr_c_rec], b) == NULL) - return -1; - } else { - c->bams[c->curr_c_rec] = bam_dup1(b); - if (c->bams[c->curr_c_rec] == NULL) - return -1; - } - c->curr_rec++; - c->curr_c_rec++; - c->s_num_bases += bam_seq_len(b); - c->s_aux_bytes += bam_get_l_aux(b); - c->n_mapped += (bam_flag(b) & BAM_FUNMAP) ? 0 : 1; - fd->record_counter++; - - return 0; -} diff --git a/src/htslib-1.18/cram/cram_external.c b/src/htslib-1.18/cram/cram_external.c deleted file mode 100644 index 7455185..0000000 --- a/src/htslib-1.18/cram/cram_external.c +++ /dev/null @@ -1,836 +0,0 @@ -/* -Copyright (c) 2015, 2018-2020, 2022-2023 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/*! \file - * External CRAM interface. - * - * Internally we're happy to use macros and to grub around in the cram - * structures. This isn't very sustainable for an externally usable - * ABI though, so we have anonymous structs and accessor functions too - * to permit software such as samtools reheader to manipulate cram - * containers and blocks in a robust manner. - */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include -#include - -#if defined(HAVE_EXTERNAL_LIBHTSCODECS) -#include -#else -#include "../htscodecs/htscodecs/rANS_static4x16.h" -#endif - -#include "../htslib/hfile.h" -#include "cram.h" - -/* - *----------------------------------------------------------------------------- - * cram_fd - */ -sam_hdr_t *cram_fd_get_header(cram_fd *fd) { return fd->header; } -void cram_fd_set_header(cram_fd *fd, sam_hdr_t *hdr) { fd->header = hdr; } - -int cram_fd_get_version(cram_fd *fd) { return fd->version; } -void cram_fd_set_version(cram_fd *fd, int vers) { fd->version = vers; } - -int cram_major_vers(cram_fd *fd) { return CRAM_MAJOR_VERS(fd->version); } -int cram_minor_vers(cram_fd *fd) { return CRAM_MINOR_VERS(fd->version); } - -hFILE *cram_fd_get_fp(cram_fd *fd) { return fd->fp; } -void cram_fd_set_fp(cram_fd *fd, hFILE *fp) { fd->fp = fp; } - - -/* - *----------------------------------------------------------------------------- - * cram_container - */ -int32_t cram_container_get_length(cram_container *c) { - return c->length; -} - -void cram_container_set_length(cram_container *c, int32_t length) { - c->length = length; -} - - -int32_t cram_container_get_num_blocks(cram_container *c) { - return c->num_blocks; -} - -void cram_container_set_num_blocks(cram_container *c, int32_t num_blocks) { - c->num_blocks = num_blocks; -} - -int32_t cram_container_get_num_records(cram_container *c) { - return c->num_records; -} - -int64_t cram_container_get_num_bases(cram_container *c) { - return c->num_bases; -} - - -/* Returns the landmarks[] array and the number of elements - * in num_landmarks. - */ -int32_t *cram_container_get_landmarks(cram_container *c, int32_t *num_landmarks) { - *num_landmarks = c->num_landmarks; - return c->landmark; -} - -/* Sets the landmarks[] array (pointer copy, not a memory dup) and - * num_landmarks value. - */ -void cram_container_set_landmarks(cram_container *c, int32_t num_landmarks, - int32_t *landmarks) { - c->num_landmarks = num_landmarks; - c->landmark = landmarks; -} - - -/* Returns true if the container is empty (EOF marker) */ -int cram_container_is_empty(cram_fd *fd) { - return fd->empty_container; -} - - -/* - *----------------------------------------------------------------------------- - * cram_block_compression_hdr - */ - -/* - * Utility function to edit an RG id. - * This is only possible if there is one single RG value used and it - * is in the container compression header using HUFFMAN or BETA - * codec. In this case it is essentially hard coded and needs no - * editing of external (or worse, CORE) blocks. - * - * Returns 0 on success - * -1 on failure - */ -// Or arbitrary set compression header constant? - -static int cram_block_compression_hdr_set_DS(cram_block_compression_hdr *ch, - int ds, int new_rg) { - if (!ch || !ch->codecs[ds]) - return -1; - - switch (ch->codecs[ds]->codec) { - case E_HUFFMAN: - if (ch->codecs[ds]->u.huffman.ncodes != 1) - return -1; - ch->codecs[ds]->u.huffman.codes[0].symbol = new_rg; - return 0; - - case E_BETA: - if (ch->codecs[ds]->u.beta.nbits != 0) - return -1; - ch->codecs[ds]->u.beta.offset = -new_rg; - return 0; - - default: - break; - } - - return -1; -} - -int cram_block_compression_hdr_set_rg(cram_block_compression_hdr *ch, int new_rg) { - return cram_block_compression_hdr_set_DS(ch, DS_RG, new_rg); -} - -/* - * Converts a cram_block_compression_hdr struct used for decoding to - * one used for encoding. Maybe this should be a transparent - * operation applied on-demand. - * - * Returns 0 on success - * -1 on failure - */ -int cram_block_compression_hdr_decoder2encoder(cram_fd *fd, - cram_block_compression_hdr *ch) { - int i; - - if (!ch) - return -1; - - for (i = 0; i < DS_END; i++) { - cram_codec *co = ch->codecs[i]; - if (!co) - continue; - - if (-1 == cram_codec_decoder2encoder(fd, co)) - return -1; - } - - return 0; -} - -typedef struct { - cram_block_compression_hdr *hdr; - cram_map *curr_map; - int idx; - int is_tag; // phase 2 using tag_encoding_map -} cram_codec_iter; - -static void cram_codec_iter_init(cram_block_compression_hdr *hdr, - cram_codec_iter *iter) { - iter->hdr = hdr; - iter->curr_map = NULL; - iter->idx = 0; - iter->is_tag = 0; -} - -// See enum cram_DS_ID in cram/cram_structs -static int cram_ds_to_key(enum cram_DS_ID ds) { - switch(ds) { - case DS_RN: return 256*'R'+'N'; - case DS_QS: return 256*'Q'+'S'; - case DS_IN: return 256*'I'+'N'; - case DS_SC: return 256*'S'+'C'; - case DS_BF: return 256*'B'+'F'; - case DS_CF: return 256*'C'+'F'; - case DS_AP: return 256*'A'+'P'; - case DS_RG: return 256*'R'+'G'; - case DS_MQ: return 256*'M'+'Q'; - case DS_NS: return 256*'N'+'S'; - case DS_MF: return 256*'M'+'F'; - case DS_TS: return 256*'T'+'S'; - case DS_NP: return 256*'N'+'P'; - case DS_NF: return 256*'N'+'F'; - case DS_RL: return 256*'R'+'L'; - case DS_FN: return 256*'F'+'N'; - case DS_FC: return 256*'F'+'C'; - case DS_FP: return 256*'F'+'P'; - case DS_DL: return 256*'D'+'L'; - case DS_BA: return 256*'B'+'A'; - case DS_BS: return 256*'B'+'S'; - case DS_TL: return 256*'T'+'L'; - case DS_RI: return 256*'R'+'I'; - case DS_RS: return 256*'R'+'S'; - case DS_PD: return 256*'P'+'D'; - case DS_HC: return 256*'H'+'C'; - case DS_BB: return 256*'B'+'B'; - case DS_QQ: return 256*'Q'+'Q'; - case DS_TN: return 256*'T'+'N'; - case DS_TC: return 256*'T'+'C'; - case DS_TM: return 256*'T'+'M'; - case DS_TV: return 256*'T'+'V'; - default: break; - } - - return -1; // unknown -} - -static cram_codec *cram_codec_iter_next(cram_codec_iter *iter, - int *key) { - cram_codec *cc = NULL; - cram_block_compression_hdr *hdr = iter->hdr; - - if (!iter->is_tag) { - // 1: Iterating through main data-series - do { - cc = hdr->codecs[iter->idx++]; - } while(!cc && iter->idx < DS_END); - if (cc) { - *key = cram_ds_to_key(iter->idx-1); - return cc; - } - - // Reset index for phase 2 - iter->idx = 0; - iter->is_tag = 1; - } - - do { - if (!iter->curr_map) - iter->curr_map = hdr->tag_encoding_map[iter->idx++]; - - cc = iter->curr_map ? iter->curr_map->codec : NULL; - if (cc) { - *key = iter->curr_map->key; - iter->curr_map = iter->curr_map->next; - return cc; - } - } while (iter->idx <= CRAM_MAP_HASH); - - // End of codecs - return NULL; -} - -/* - * A list of data-series, used to create a linked list threaded through - * a single array. - */ -typedef struct ds_list { - int data_series; - int next; -} ds_list; - -KHASH_MAP_INIT_INT(cid, int64_t) - -// Opaque struct for the CRAM block content-id -> data-series map. -struct cram_cid2ds_t { - ds_list *ds; // array of data-series with linked lists threading through it - int ds_size; - int ds_idx; - khash_t(cid) *hash; // key=content_id, value=index to ds array - int *ds_a; // serialised array of data-series returned by queries. -}; - -void cram_cid2ds_free(cram_cid2ds_t *cid2ds) { - if (cid2ds) { - if (cid2ds->hash) - kh_destroy(cid, cid2ds->hash); - free(cid2ds->ds); - free(cid2ds->ds_a); - free(cid2ds); - } -} - -/* - * Map cram block numbers to data-series. It's normally a 1:1 mapping, - * but in rare cases it can be 1:many (or even many:many). - * The key is the block number and the value is an index into the data-series - * array, which we iterate over until reaching a negative value. - * - * Provide cid2ds as NULL to allocate a new map or pass in an existing one - * to append to this map. The new (or existing) map is returned. - * - * Returns the cid2ds (newly allocated or as provided) on success, - * NULL on failure. - */ -cram_cid2ds_t *cram_update_cid2ds_map(cram_block_compression_hdr *hdr, - cram_cid2ds_t *cid2ds) { - cram_cid2ds_t *c2d = cid2ds; - if (!c2d) { - c2d = calloc(1, sizeof(*c2d)); - if (!c2d) - return NULL; - - c2d->hash = kh_init(cid); - if (!c2d->hash) - goto err; - } - - // Iterate through codecs. Initially primary two-left ones in - // rec_encoding_map, and then the three letter in tag_encoding_map. - cram_codec_iter citer; - cram_codec_iter_init(hdr, &citer); - cram_codec *codec; - int key; - - while ((codec = cram_codec_iter_next(&citer, &key))) { - // Having got a codec, we can then use cram_codec_to_id to get - // the block IDs utilised by that codec. This is then our - // map for allocating data blocks to data series, but for shared - // blocks we can't separate out how much is used by each DS. - int bnum[2]; - cram_codec_get_content_ids(codec, bnum); - - khiter_t k; - int ret, i; - for (i = 0; i < 2; i++) { - if (bnum[i] > -2) { - k = kh_put(cid, c2d->hash, bnum[i], &ret); - if (ret < 0) - goto err; - - if (c2d->ds_idx >= c2d->ds_size) { - c2d->ds_size += 100; - c2d->ds_size *= 2; - ds_list *ds_new = realloc(c2d->ds, - c2d->ds_size * sizeof(*ds_new)); - if (!ds_new) - goto err; - c2d->ds = ds_new; - } - - if (ret == 0) { - // Shared content_id, so add to list of DS - - // Maybe data-series should be part of the hash key? - // - // So top-32 bit is content-id, bot-32 bit is key. - // Sort hash by key and then can group all the data-series - // known together. ?? - // - // Brute force for now, scan to see if recorded. - // Typically this is minimal effort as we almost always - // have 1 data-series per block content-id, so the list to - // search is of size 1. - int dsi = kh_value(c2d->hash, k); - while (dsi >= 0) { - if (c2d->ds[dsi].data_series == key) - break; - dsi = c2d->ds[dsi].next; - } - - if (dsi == -1) { - // Block content_id seen before, but not with this DS - c2d->ds[c2d->ds_idx].data_series = key; - c2d->ds[c2d->ds_idx].next = kh_value(c2d->hash, k); - kh_value(c2d->hash, k) = c2d->ds_idx; - c2d->ds_idx++; - } - } else { - // First time this content id has been used - c2d->ds[c2d->ds_idx].data_series = key; - c2d->ds[c2d->ds_idx].next = -1; - kh_value(c2d->hash, k) = c2d->ds_idx; - c2d->ds_idx++; - } - } - } - } - - return c2d; - - err: - if (c2d != cid2ds) - cram_cid2ds_free(c2d); - return NULL; -} - -/* - * Return a list of data series observed as belonging to a block with - * the specified content_id. *n is the number of data series - * returned, or 0 if block is unused. - * Block content_id of -1 is used to indicate the CORE block. - * - * The pointer returned is owned by the cram_cid2ds state and should - * not be freed by the caller. - */ -int *cram_cid2ds_query(cram_cid2ds_t *c2d, int content_id, int *n) { - *n = 0; - if (!c2d || !c2d->hash) - return NULL; - - khiter_t k = kh_get(cid, c2d->hash, content_id); - if (k == kh_end(c2d->hash)) - return NULL; - - if (!c2d->ds_a) { - c2d->ds_a = malloc(c2d->ds_idx * sizeof(int)); - if (!c2d->ds_a) - return NULL; - } - - int dsi = kh_value(c2d->hash, k); // initial ds array index from hash - int idx = 0; - while (dsi >= 0) { - c2d->ds_a[idx++] = c2d->ds[dsi].data_series; - dsi = c2d->ds[dsi].next; // iterate over list within ds array - } - - *n = idx; - return c2d->ds_a; -} - -/* - * Produces a description of the record and tag encodings held within - * a compression header and appends to 'ks'. - * - * Returns 0 on success, - * <0 on failure. - */ -int cram_describe_encodings(cram_block_compression_hdr *hdr, kstring_t *ks) { - cram_codec_iter citer; - cram_codec_iter_init(hdr, &citer); - cram_codec *codec; - int key, r = 0; - - while ((codec = cram_codec_iter_next(&citer, &key))) { - char key_s[4] = {0}; - int key_i = 0; - if (key>>16) key_s[key_i++] = key>>16; - key_s[key_i++] = (key>>8)&0xff; - key_s[key_i++] = key&0xff; - r |= ksprintf(ks, "\t%s\t", key_s) < 0; - r |= cram_codec_describe(codec, ks) < 0; - r |= kputc('\n', ks) < 0; - } - - return r ? -1 : 0; -} - -/* - *----------------------------------------------------------------------------- - * cram_slice - */ -int32_t cram_slice_hdr_get_num_blocks(cram_block_slice_hdr *hdr) { - return hdr->num_blocks; -} - -int cram_slice_hdr_get_embed_ref_id(cram_block_slice_hdr *h) { - return h->ref_base_id; -} - -void cram_slice_hdr_get_coords(cram_block_slice_hdr *h, - int *refid, hts_pos_t *start, hts_pos_t *span) { - if (refid) - *refid = h->ref_seq_id; - if (start) - *start = h->ref_seq_start; - if (span) - *span = h->ref_seq_span; -} - -/* - *----------------------------------------------------------------------------- - * cram_block - */ -int32_t cram_block_get_content_id(cram_block *b) { - return b->content_type == CORE ? -1 : b->content_id; -} -int32_t cram_block_get_comp_size(cram_block *b) { return b->comp_size; } -int32_t cram_block_get_uncomp_size(cram_block *b) { return b->uncomp_size; } -int32_t cram_block_get_crc32(cram_block *b) { return b->crc32; } -void * cram_block_get_data(cram_block *b) { return BLOCK_DATA(b); } -int32_t cram_block_get_size(cram_block *b) { return BLOCK_SIZE(b); } -enum cram_block_method cram_block_get_method(cram_block *b) { - return (enum cram_block_method)b->orig_method; -} -enum cram_content_type cram_block_get_content_type(cram_block *b) { - return b->content_type; -} - -void cram_block_set_content_id(cram_block *b, int32_t id) { b->content_id = id; } -void cram_block_set_comp_size(cram_block *b, int32_t size) { b->comp_size = size; } -void cram_block_set_uncomp_size(cram_block *b, int32_t size) { b->uncomp_size = size; } -void cram_block_set_crc32(cram_block *b, int32_t crc) { b->crc32 = crc; } -void cram_block_set_data(cram_block *b, void *data) { BLOCK_DATA(b) = data; } -void cram_block_set_size(cram_block *b, int32_t size) { BLOCK_SIZE(b) = size; } - -int cram_block_append(cram_block *b, const void *data, int size) { - BLOCK_APPEND(b, data, size); - return 0; - - block_err: - return -1; -} -void cram_block_update_size(cram_block *b) { BLOCK_UPLEN(b); } - -// Offset is known as "size" internally, but it can be confusing. -size_t cram_block_get_offset(cram_block *b) { return BLOCK_SIZE(b); } -void cram_block_set_offset(cram_block *b, size_t offset) { BLOCK_SIZE(b) = offset; } - -/* - * Given a compressed block of data in a specified compression method, - * fill out the 'cm' field with meta-data gleaned from the compressed - * block. - * - * If comp is CRAM_COMP_UNKNOWN, we attempt to auto-detect the compression - * format, but this doesn't work for all methods. - * - * Retuns the detected or specified comp method, and fills out *cm - * if non-NULL. - */ -cram_method_details *cram_expand_method(uint8_t *data, int32_t size, - enum cram_block_method comp) { - cram_method_details *cm = calloc(1, sizeof(*cm)); - if (!cm) - return NULL; - - const char *xz_header = "\xFD""7zXZ"; // including nul - - if (comp == CRAM_COMP_UNKNOWN) { - // Auto-detect - if (size > 1 && data[0] == 0x1f && data[1] == 0x8b) - comp = CRAM_COMP_GZIP; - else if (size > 3 && data[1] == 'B' && data[2] == 'Z' - && data[3] == 'h') - comp = CRAM_COMP_BZIP2; - else if (size > 6 && memcmp(xz_header, data, 6) == 0) - comp = CRAM_COMP_LZMA; - else - comp = CRAM_COMP_UNKNOWN; - } - cm->method = comp; - - // Interrogate the compressed data stream to fill out additional fields. - switch (comp) { - case CRAM_COMP_GZIP: - if (size > 8) { - if (data[8] == 4) - cm->level = 1; - else if (data[8] == 2) - cm->level = 9; - else - cm->level = 5; - } - break; - - case CRAM_COMP_BZIP2: - if (size > 3 && data[3] >= '1' && data[3] <= '9') - cm->level = data[3]-'0'; - break; - - case CRAM_COMP_RANS4x8: - cm->Nway = 4; - if (size > 0 && data[0] == 1) - cm->order = 1; - else - cm->order = 0; - break; - - case CRAM_COMP_RANSNx16: - if (size > 0) { - cm->order = data[0] & 1; - cm->Nway = data[0] & RANS_ORDER_X32 ? 32 : 4; - cm->rle = data[0] & RANS_ORDER_RLE ? 1 : 0; - cm->pack = data[0] & RANS_ORDER_PACK ? 1 : 0; - cm->cat = data[0] & RANS_ORDER_CAT ? 1 : 0; - cm->stripe = data[0] & RANS_ORDER_STRIPE ? 1 : 0; - cm->nosz = data[0] & RANS_ORDER_NOSZ ? 1 : 0; - } - break; - - case CRAM_COMP_ARITH: - if (size > 0) { - // Not in a public header, but the same transforms as rANSNx16 - cm->order = data[0] & 3; - cm->rle = data[0] & RANS_ORDER_RLE ? 1 : 0; - cm->pack = data[0] & RANS_ORDER_PACK ? 1 : 0; - cm->cat = data[0] & RANS_ORDER_CAT ? 1 : 0; - cm->stripe = data[0] & RANS_ORDER_STRIPE ? 1 : 0; - cm->nosz = data[0] & RANS_ORDER_NOSZ ? 1 : 0; - cm->ext = data[0] & 4 /*external*/ ? 1 : 0; - } - break; - - case CRAM_COMP_TOK3: - if (size > 8) { - if (data[8] == 1) - cm->level = 11; - else if (data[8] == 0) - cm->level = 1; - } - break; - - default: - break; - } - - return cm; -} - -/* - *----------------------------------------------------------------------------- - * cram_codecs - */ - -// -2 is unused. -// -1 is CORE -// >= 0 is the block with that Content ID -void cram_codec_get_content_ids(cram_codec *c, int ids[2]) { - ids[0] = cram_codec_to_id(c, &ids[1]); -} - -/* - *----------------------------------------------------------------------------- - * Utility functions - */ - -/* - * Copies the blocks representing the next num_slice slices from a - * container from 'in' to 'out'. It is expected that the file pointer - * is just after the read of the cram_container and cram compression - * header. - * - * Returns 0 on success - * -1 on failure - */ -int cram_copy_slice(cram_fd *in, cram_fd *out, int32_t num_slice) { - int32_t i, j; - - for (i = 0; i < num_slice; i++) { - cram_block *blk; - cram_block_slice_hdr *hdr; - - if (!(blk = cram_read_block(in))) - return -1; - if (!(hdr = cram_decode_slice_header(in, blk))) { - cram_free_block(blk); - return -1; - } - if (cram_write_block(out, blk) != 0) { - cram_free_block(blk); - return -1; - } - cram_free_block(blk); - - int num_blocks = cram_slice_hdr_get_num_blocks(hdr); - for (j = 0; j < num_blocks; j++) { - blk = cram_read_block(in); - if (!blk || cram_write_block(out, blk) != 0) { - if (blk) cram_free_block(blk); - return -1; - } - cram_free_block(blk); - } - cram_free_slice_header(hdr); - } - - return 0; -} - -/* - * Renumbers RG numbers in a cram compression header. - * - * CRAM stores RG as the Nth number in the header, rather than a - * string holding the ID: tag. This is smaller in space, but means - * "samtools cat" to join files together that contain single but - * different RG lines needs a way of renumbering them. - * - * The file descriptor is expected to be immediately after the - * cram_container structure (ie before the cram compression header). - * Due to the nature of the CRAM format, this needs to read and write - * the blocks itself. Note that there may be multiple slices within - * the container, meaning multiple compression headers to manipulate. - * Changing RG may change the size of the compression header and - * therefore the length field in the container. Hence we rewrite all - * blocks just in case and also emit the adjusted container. - * - * The current implementation can only cope with renumbering a single - * RG (and only then if it is using HUFFMAN or BETA codecs). In - * theory it *may* be possible to renumber multiple RGs if they use - * HUFFMAN to the CORE block or use an external block unshared by any - * other data series. So we have an API that can be upgraded to - * support this, but do not implement it for now. An example - * implementation of RG as an EXTERNAL block would be to find that - * block and rewrite it, returning the number of blocks consumed. - * - * Returns 0 on success; - * -1 if unable to edit; - * -2 on other errors (eg I/O). - */ -int cram_transcode_rg(cram_fd *in, cram_fd *out, - cram_container *c, - int nrg, int *in_rg, int *out_rg) { - int new_rg = *out_rg, old_size, new_size; - cram_block *o_blk, *n_blk; - cram_block_compression_hdr *ch; - - if (nrg != 1) { - hts_log_error("CRAM transcode supports only a single RG"); - return -2; - } - - // Produce a new block holding the updated compression header, - // with RG transcoded to a new value. (Single only supported.) - o_blk = cram_read_block(in); - old_size = cram_block_size(o_blk); - ch = cram_decode_compression_header(in, o_blk); - if (cram_block_compression_hdr_set_rg(ch, new_rg) != 0) - return -1; - if (cram_block_compression_hdr_decoder2encoder(in, ch) != 0) - return -1; - n_blk = cram_encode_compression_header(in, c, ch, in->embed_ref); - cram_free_compression_header(ch); - - /* - * Warning: this has internal knowledge of the cram compression - * header format. - * - * The decoder doesn't set c->tags_used, so the encoder puts a two - * byte blank segment. This means n_blk is too short. We skip - * through the decoded old block (o_blk) and copy from there. - */ - char *cp = cram_block_get_data(o_blk); - char *op = cp; - char *endp = cp + cram_block_get_uncomp_size(o_blk); - //fprintf(stderr, "sz = %d\n", (int)(endp-cp)); - int32_t i32, err = 0; - - i32 = in->vv.varint_get32(&cp, endp, &err); - cp += i32; - i32 = in->vv.varint_get32(&cp, endp, &err); - cp += i32; - op = cp; - i32 = in->vv.varint_get32(&cp, endp, &err); - i32 += (cp-op); - if (err) - return -2; - - //fprintf(stderr, "remaining %d bytes\n", i32); - cram_block_set_size(n_blk, cram_block_get_size(n_blk)-2); - cram_block_append(n_blk, op, i32); - cram_block_update_size(n_blk); - - new_size = cram_block_size(n_blk); - - //fprintf(stderr, "size %d -> %d\n", old_size, new_size); - - // Now we've constructedthe updated compression header, - // amend the container too (it may have changed size). - int32_t *landmarks, num_landmarks; - landmarks = cram_container_get_landmarks(c, &num_landmarks); - - if (old_size != new_size) { - int diff = new_size - old_size, j; - - for (j = 0; j < num_landmarks; j++) - landmarks[j] += diff; - //cram_container_set_landmarks(c, num_landmarks, landmarks); - cram_container_set_length(c, cram_container_get_length(c) + diff); - } - - // Finally write it all out; container, compression header, - // and then all the remaining slice blocks. - if (cram_write_container(out, c) != 0) - return -2; - - cram_write_block(out, n_blk); - cram_free_block(o_blk); - cram_free_block(n_blk); - - // Container num_blocks can be invalid, due to a bug. - // Instead we iterate in slice context instead. - return cram_copy_slice(in, out, num_landmarks); -} - - -/*! - * Returns the refs_t structure used by a cram file handle. - * - * This may be used in conjunction with option CRAM_OPT_SHARED_REF to - * share reference memory between multiple file handles. - * - * @return - * Returns NULL if none exists or the file handle is not a CRAM file. - */ -refs_t *cram_get_refs(htsFile *fd) { - return fd->format.format == cram - ? fd->fp.cram->refs - : NULL; -} diff --git a/src/htslib-1.18/cram/cram_index.c b/src/htslib-1.18/cram/cram_index.c deleted file mode 100644 index b775e94..0000000 --- a/src/htslib-1.18/cram/cram_index.c +++ /dev/null @@ -1,840 +0,0 @@ -/* -Copyright (c) 2013-2020, 2023 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* - * The index is a gzipped tab-delimited text file with one line per slice. - * The columns are: - * 1: reference number (0 to N-1, as per BAM ref_id) - * 2: reference position of 1st read in slice (1..?) - * 3: number of reads in slice - * 4: offset of container start (relative to end of SAM header, so 1st - * container is offset 0). - * 5: slice number within container (ie which landmark). - * - * In memory, we hold this in a nested containment list. Each list element is - * a cram_index struct. Each element in turn can contain its own list of - * cram_index structs. - * - * Any start..end range which is entirely contained within another (and - * earlier as it is sorted) range will be held within it. This ensures that - * the outer list will never have containments and we can safely do a - * binary search to find the first range which overlaps any given coordinate. - */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../htslib/bgzf.h" -#include "../htslib/hfile.h" -#include "../hts_internal.h" -#include "cram.h" -#include "os.h" - -#if 0 -static void dump_index_(cram_index *e, int level) { - int i, n; - n = printf("%*s%d / %d .. %d, ", level*4, "", e->refid, e->start, e->end); - printf("%*soffset %"PRId64" %p %p\n", MAX(0,50-n), "", e->offset, e, e->e_next); - for (i = 0; i < e->nslice; i++) { - dump_index_(&e->e[i], level+1); - } -} - -static void dump_index(cram_fd *fd) { - int i; - for (i = 0; i < fd->index_sz; i++) { - dump_index_(&fd->index[i], 0); - } -} -#endif - -// Thread a linked list through the nested containment list. -// This makes navigating it and finding the "next" index entry -// trivial. -static cram_index *link_index_(cram_index *e, cram_index *e_last) { - int i; - if (e_last) - e_last->e_next = e; - - // We don't want to link in the top-level cram_index with - // offset=0 and start/end = INT_MIN/INT_MAX. - if (e->offset) - e_last = e; - - for (i = 0; i < e->nslice; i++) - e_last = link_index_(&e->e[i], e_last); - - return e_last; -} - -static void link_index(cram_fd *fd) { - int i; - cram_index *e_last = NULL; - - for (i = 0; i < fd->index_sz; i++) { - e_last = link_index_(&fd->index[i], e_last); - } - - if (e_last) - e_last->e_next = NULL; -} - -static int kget_int32(kstring_t *k, size_t *pos, int32_t *val_p) { - int sign = 1; - int32_t val = 0; - size_t p = *pos; - - while (p < k->l && (k->s[p] == ' ' || k->s[p] == '\t')) - p++; - - if (p < k->l && k->s[p] == '-') - sign = -1, p++; - - if (p >= k->l || !(k->s[p] >= '0' && k->s[p] <= '9')) - return -1; - - while (p < k->l && k->s[p] >= '0' && k->s[p] <= '9') { - int digit = k->s[p++]-'0'; - val = val*10 + digit; - } - - *pos = p; - *val_p = sign*val; - - return 0; -} - -static int kget_int64(kstring_t *k, size_t *pos, int64_t *val_p) { - int sign = 1; - int64_t val = 0; - size_t p = *pos; - - while (p < k->l && (k->s[p] == ' ' || k->s[p] == '\t')) - p++; - - if (p < k->l && k->s[p] == '-') - sign = -1, p++; - - if (p >= k->l || !(k->s[p] >= '0' && k->s[p] <= '9')) - return -1; - - while (p < k->l && k->s[p] >= '0' && k->s[p] <= '9') { - int digit = k->s[p++]-'0'; - val = val*10 + digit; - } - - *pos = p; - *val_p = sign*val; - - return 0; -} - -/* - * Loads a CRAM .crai index into memory. - * - * Returns 0 for success - * -1 for failure - */ -int cram_index_load(cram_fd *fd, const char *fn, const char *fn_idx) { - - char *tfn_idx = NULL; - char buf[65536]; - ssize_t len; - kstring_t kstr = {0}; - hFILE *fp; - cram_index *idx; - cram_index **idx_stack = NULL, *ep, e; - int idx_stack_alloc = 0, idx_stack_ptr = 0; - size_t pos = 0; - - /* Check if already loaded */ - if (fd->index) - return 0; - - fd->index = calloc((fd->index_sz = 1), sizeof(*fd->index)); - if (!fd->index) - return -1; - - idx = &fd->index[0]; - idx->refid = -1; - idx->start = INT_MIN; - idx->end = INT_MAX; - - idx_stack = calloc(++idx_stack_alloc, sizeof(*idx_stack)); - if (!idx_stack) - goto fail; - - idx_stack[idx_stack_ptr] = idx; - - // Support pathX.cram##idx##pathY.crai - const char *fn_delim = strstr(fn, HTS_IDX_DELIM); - if (fn_delim && !fn_idx) - fn_idx = fn_delim + strlen(HTS_IDX_DELIM); - - if (!fn_idx) { - if (hts_idx_check_local(fn, HTS_FMT_CRAI, &tfn_idx) == 0 && hisremote(fn)) - tfn_idx = hts_idx_getfn(fn, ".crai"); - - if (!tfn_idx) { - hts_log_error("Could not retrieve index file for '%s'", fn); - goto fail; - } - fn_idx = tfn_idx; - } - - if (!(fp = hopen(fn_idx, "r"))) { - hts_log_error("Could not open index file '%s'", fn_idx); - goto fail; - } - - // Load the file into memory - while ((len = hread(fp, buf, sizeof(buf))) > 0) { - if (kputsn(buf, len, &kstr) < 0) - goto fail; - } - - if (len < 0 || kstr.l < 2) - goto fail; - - if (hclose(fp) < 0) - goto fail; - - // Uncompress if required - if (kstr.s[0] == 31 && (uc)kstr.s[1] == 139) { - size_t l = 0; - char *s = zlib_mem_inflate(kstr.s, kstr.l, &l); - if (!s) - goto fail; - - free(kstr.s); - kstr.s = s; - kstr.l = l; - kstr.m = l; // conservative estimate of the size allocated - if (kputsn("", 0, &kstr) < 0) // ensure kstr.s is NUL-terminated - goto fail; - } - - - // Parse it line at a time - while (pos < kstr.l) { - /* 1.1 layout */ - if (kget_int32(&kstr, &pos, &e.refid) == -1) - goto fail; - - if (kget_int32(&kstr, &pos, &e.start) == -1) - goto fail; - - if (kget_int32(&kstr, &pos, &e.end) == -1) - goto fail; - - if (kget_int64(&kstr, &pos, &e.offset) == -1) - goto fail; - - if (kget_int32(&kstr, &pos, &e.slice) == -1) - goto fail; - - if (kget_int32(&kstr, &pos, &e.len) == -1) - goto fail; - - e.end += e.start-1; - //printf("%d/%d..%d-offset=%" PRIu64 ",len=%d,slice=%d\n", e.refid, e.start, e.end, e.offset, e.len, e.slice); - - if (e.refid < -1) { - hts_log_error("Malformed index file, refid %d", e.refid); - goto fail; - } - - if (e.refid != idx->refid) { - if (fd->index_sz < e.refid+2) { - cram_index *new_idx; - int new_sz = e.refid+2; - size_t index_end = fd->index_sz * sizeof(*fd->index); - new_idx = realloc(fd->index, - new_sz * sizeof(*fd->index)); - if (!new_idx) - goto fail; - - fd->index = new_idx; - fd->index_sz = new_sz; - memset(((char *)fd->index) + index_end, 0, - fd->index_sz * sizeof(*fd->index) - index_end); - } - idx = &fd->index[e.refid+1]; - idx->refid = e.refid; - idx->start = INT_MIN; - idx->end = INT_MAX; - idx->nslice = idx->nalloc = 0; - idx->e = NULL; - idx_stack[(idx_stack_ptr = 0)] = idx; - } - - while (!(e.start >= idx->start && e.end <= idx->end) || - (idx->start == 0 && idx->refid == -1)) { - idx = idx_stack[--idx_stack_ptr]; - } - - // Now contains, so append - if (idx->nslice+1 >= idx->nalloc) { - cram_index *new_e; - idx->nalloc = idx->nalloc ? idx->nalloc*2 : 16; - new_e = realloc(idx->e, idx->nalloc * sizeof(*idx->e)); - if (!new_e) - goto fail; - - idx->e = new_e; - } - - e.nalloc = e.nslice = 0; e.e = NULL; - *(ep = &idx->e[idx->nslice++]) = e; - idx = ep; - - if (++idx_stack_ptr >= idx_stack_alloc) { - cram_index **new_stack; - idx_stack_alloc *= 2; - new_stack = realloc(idx_stack, idx_stack_alloc*sizeof(*idx_stack)); - if (!new_stack) - goto fail; - idx_stack = new_stack; - } - idx_stack[idx_stack_ptr] = idx; - - while (pos < kstr.l && kstr.s[pos] != '\n') - pos++; - pos++; - } - - free(idx_stack); - free(kstr.s); - free(tfn_idx); - - // Convert NCList to linear linked list - link_index(fd); - - //dump_index(fd); - - return 0; - - fail: - free(kstr.s); - free(idx_stack); - free(tfn_idx); - cram_index_free(fd); // Also sets fd->index = NULL - return -1; -} - -static void cram_index_free_recurse(cram_index *e) { - if (e->e) { - int i; - for (i = 0; i < e->nslice; i++) { - cram_index_free_recurse(&e->e[i]); - } - free(e->e); - } -} - -void cram_index_free(cram_fd *fd) { - int i; - - if (!fd->index) - return; - - for (i = 0; i < fd->index_sz; i++) { - cram_index_free_recurse(&fd->index[i]); - } - free(fd->index); - - fd->index = NULL; -} - -/* - * Searches the index for the first slice overlapping a reference ID - * and position, or one immediately preceding it if none is found in - * the index to overlap this position. (Our index may have missing - * entries, but we require at least one per reference.) - * - * If the index finds multiple slices overlapping this position we - * return the first one only. Subsequent calls should specify - * "from" as the last slice we checked to find the next one. Otherwise - * set "from" to be NULL to find the first one. - * - * Refid can also be any of the special HTS_IDX_ values. - * For backwards compatibility, refid -1 is equivalent to HTS_IDX_NOCOOR. - * - * Returns the cram_index pointer on success - * NULL on failure - */ -cram_index *cram_index_query(cram_fd *fd, int refid, hts_pos_t pos, - cram_index *from) { - int i, j, k; - cram_index *e; - - if (from) { - // Continue from a previous search. - // We switch to just scanning the linked list, as the nested - // lists are typically short. - e = from->e_next; - if (e && e->refid == refid && e->start <= pos) - return e; - else - return NULL; - } - - switch(refid) { - case HTS_IDX_NONE: - case HTS_IDX_REST: - // fail, or already there, dealt with elsewhere. - return NULL; - - case HTS_IDX_NOCOOR: - refid = -1; - pos = 0; - break; - - case HTS_IDX_START: { - int64_t min_idx = INT64_MAX; - for (i = 0, j = -1; i < fd->index_sz; i++) { - if (fd->index[i].e && fd->index[i].e[0].offset < min_idx) { - min_idx = fd->index[i].e[0].offset; - j = i; - } - } - if (j < 0) - return NULL; - return fd->index[j].e; - } - - default: - if (refid < HTS_IDX_NONE || refid+1 >= fd->index_sz) - return NULL; - } - - from = &fd->index[refid+1]; - - // Ref with nothing aligned against it. - if (!from->e) - return NULL; - - // This sequence is covered by the index, so binary search to find - // the optimal starting block. - i = 0, j = fd->index[refid+1].nslice-1; - for (k = j/2; k != i; k = (j-i)/2 + i) { - if (from->e[k].refid > refid) { - j = k; - continue; - } - - if (from->e[k].refid < refid) { - i = k; - continue; - } - - if (from->e[k].start >= pos) { - j = k; - continue; - } - - if (from->e[k].start < pos) { - i = k; - continue; - } - } - // i==j or i==j-1. Check if j is better. - if (j >= 0 && from->e[j].start < pos && from->e[j].refid == refid) - i = j; - - /* The above found *a* bin overlapping, but not necessarily the first */ - while (i > 0 && from->e[i-1].end >= pos) - i--; - - /* We may be one bin before the optimum, so check */ - while (i+1 < from->nslice && - (from->e[i].refid < refid || - from->e[i].end < pos)) - i++; - - e = &from->e[i]; - - return e; -} - -// Return the index entry for last slice on a specific reference. -cram_index *cram_index_last(cram_fd *fd, int refid, cram_index *from) { - int slice; - - if (refid+1 < 0 || refid+1 >= fd->index_sz) - return NULL; - - if (!from) - from = &fd->index[refid+1]; - - // Ref with nothing aligned against it. - if (!from->e) - return NULL; - - slice = fd->index[refid+1].nslice - 1; - - // e is the last entry in the nested containment list, but it may - // contain further slices within it. - cram_index *e = &from->e[slice]; - while (e->e_next) - e = e->e_next; - - return e; -} - -/* - * Find the last container overlapping pos 'end', and the file offset of - * its end (equivalent to the start offset of the container following it). - */ -cram_index *cram_index_query_last(cram_fd *fd, int refid, hts_pos_t end) { - cram_index *e = NULL, *prev_e; - do { - prev_e = e; - e = cram_index_query(fd, refid, end, prev_e); - } while (e); - - if (!prev_e) - return NULL; - e = prev_e; - - // Note: offset of e and e->e_next may be the same if we're using a - // multi-ref container where a single container generates multiple - // index entries. - // - // We need to keep iterating until offset differs in order to find - // the genuine file offset for the end of container. - do { - prev_e = e; - e = e->e_next; - } while (e && e->offset == prev_e->offset); - - return prev_e; -} - -/* - * Skips to a container overlapping the start coordinate listed in - * cram_range. - * - * In theory we call cram_index_query multiple times, once per slice - * overlapping the range. However slices may be absent from the index - * which makes this problematic. Instead we find the left-most slice - * and then read from then on, skipping decoding of slices and/or - * whole containers when they don't overlap the specified cram_range. - * - * This function also updates the cram_fd range field. - * - * Returns 0 on success - * -1 on general failure - * -2 on no-data (empty chromosome) - */ -int cram_seek_to_refpos(cram_fd *fd, cram_range *r) { - int ret = 0; - cram_index *e; - - if (r->refid == HTS_IDX_NONE) { - ret = -2; goto err; - } - - // Ideally use an index, so see if we have one. - if ((e = cram_index_query(fd, r->refid, r->start, NULL))) { - if (0 != cram_seek(fd, e->offset, SEEK_SET)) { - if (0 != cram_seek(fd, e->offset - fd->first_container, SEEK_CUR)) { - ret = -1; goto err; - } - } - } else { - // Absent from index, but this most likely means it simply has no data. - ret = -2; goto err; - } - - pthread_mutex_lock(&fd->range_lock); - fd->range = *r; - if (r->refid == HTS_IDX_NOCOOR) { - fd->range.refid = -1; - fd->range.start = 0; - } else if (r->refid == HTS_IDX_START || r->refid == HTS_IDX_REST) { - fd->range.refid = -2; // special case in cram_next_slice - } - pthread_mutex_unlock(&fd->range_lock); - - if (fd->ctr) { - cram_free_container(fd->ctr); - if (fd->ctr_mt && fd->ctr_mt != fd->ctr) - cram_free_container(fd->ctr_mt); - fd->ctr = NULL; - fd->ctr_mt = NULL; - fd->ooc = 0; - fd->eof = 0; - } - - return 0; - - err: - // It's unlikely fd->range will be accessed after EOF or error, - // but this maintains identical behaviour to the previous code. - pthread_mutex_lock(&fd->range_lock); - fd->range = *r; - pthread_mutex_unlock(&fd->range_lock); - return ret; -} - - -/* - * A specialised form of cram_index_build (below) that deals with slices - * having multiple references in this (ref_id -2). In this scenario we - * decode the slice to look at the RI data series instead. - * - * Returns 0 on success - * -1 on read failure - * -2 on wrong sort order - * -4 on write failure - */ -static int cram_index_build_multiref(cram_fd *fd, - cram_container *c, - cram_slice *s, - BGZF *fp, - off_t cpos, - int32_t landmark, - int sz) { - int i, ref = -2; - int64_t ref_start = 0, ref_end; - char buf[1024]; - - if (fd->mode != 'w') { - if (0 != cram_decode_slice(fd, c, s, fd->header)) - return -1; - } - - ref_end = INT_MIN; - - int32_t last_ref = -9; - int32_t last_pos = -9; - for (i = 0; i < s->hdr->num_records; i++) { - if (s->crecs[i].ref_id == last_ref && s->crecs[i].apos < last_pos) { - hts_log_error("CRAM file is not sorted by chromosome / position"); - return -2; - } - last_ref = s->crecs[i].ref_id; - last_pos = s->crecs[i].apos; - - if (s->crecs[i].ref_id == ref) { - if (ref_end < s->crecs[i].aend) - ref_end = s->crecs[i].aend; - continue; - } - - if (ref != -2) { - snprintf(buf, sizeof(buf), - "%d\t%"PRId64"\t%"PRId64"\t%"PRId64"\t%d\t%d\n", - ref, ref_start, ref_end - ref_start + 1, - (int64_t)cpos, landmark, sz); - if (bgzf_write(fp, buf, strlen(buf)) < 0) - return -4; - } - - ref = s->crecs[i].ref_id; - ref_start = s->crecs[i].apos; - ref_end = s->crecs[i].aend; - } - - if (ref != -2) { - snprintf(buf, sizeof(buf), - "%d\t%"PRId64"\t%"PRId64"\t%"PRId64"\t%d\t%d\n", - ref, ref_start, ref_end - ref_start + 1, - (int64_t)cpos, landmark, sz); - if (bgzf_write(fp, buf, strlen(buf)) < 0) - return -4; - } - - return 0; -} - -/* - * Adds a single slice to the index. - */ -int cram_index_slice(cram_fd *fd, - cram_container *c, - cram_slice *s, - BGZF *fp, - off_t cpos, - off_t spos, // relative to cpos - off_t sz) { - int ret; - char buf[1024]; - - if (sz > INT_MAX) { - hts_log_error("CRAM slice is too big (%"PRId64" bytes)", - (int64_t) sz); - return -1; - } - - if (s->hdr->ref_seq_id == -2) { - ret = cram_index_build_multiref(fd, c, s, fp, cpos, spos, sz); - } else { - snprintf(buf, sizeof(buf), - "%d\t%"PRId64"\t%"PRId64"\t%"PRId64"\t%d\t%d\n", - s->hdr->ref_seq_id, s->hdr->ref_seq_start, - s->hdr->ref_seq_span, (int64_t)cpos, (int)spos, (int)sz); - ret = (bgzf_write(fp, buf, strlen(buf)) >= 0)? 0 : -4; - } - - return ret; -} - -/* - * Adds a single container to the index. - */ -static -int cram_index_container(cram_fd *fd, - cram_container *c, - BGZF *fp, - off_t cpos) { - int j; - off_t spos; - - // 2.0 format - for (j = 0; j < c->num_landmarks; j++) { - cram_slice *s; - off_t sz; - int ret; - - spos = htell(fd->fp); - if (spos - cpos - c->offset != c->landmark[j]) { - hts_log_error("CRAM slice offset %"PRId64" does not match" - " landmark %d in container header (%d)", - spos - cpos - c->offset, j, c->landmark[j]); - return -1; - } - - if (!(s = cram_read_slice(fd))) { - return -1; - } - - sz = htell(fd->fp) - spos; - ret = cram_index_slice(fd, c, s, fp, cpos, c->landmark[j], sz); - - cram_free_slice(s); - - if (ret < 0) { - return ret; - } - } - - return 0; -} - - -/* - * Builds an index file. - * - * fd is a newly opened cram file that we wish to index. - * fn_base is the filename of the associated CRAM file. - * fn_idx is the filename of the index file to be written; - * if NULL, we add ".crai" to fn_base to get the index filename. - * - * Returns 0 on success, - * negative on failure (-1 for read failure, -4 for write failure) - */ -int cram_index_build(cram_fd *fd, const char *fn_base, const char *fn_idx) { - cram_container *c; - off_t cpos, hpos; - BGZF *fp; - kstring_t fn_idx_str = {0}; - int64_t last_ref = -9, last_start = -9; - - // Useful for cram_index_build_multiref - cram_set_option(fd, CRAM_OPT_REQUIRED_FIELDS, SAM_RNAME | SAM_POS | SAM_CIGAR); - - if (! fn_idx) { - kputs(fn_base, &fn_idx_str); - kputs(".crai", &fn_idx_str); - fn_idx = fn_idx_str.s; - } - - if (!(fp = bgzf_open(fn_idx, "wg"))) { - perror(fn_idx); - free(fn_idx_str.s); - return -4; - } - - free(fn_idx_str.s); - - cpos = htell(fd->fp); - while ((c = cram_read_container(fd))) { - if (fd->err) { - perror("Cram container read"); - return -1; - } - - hpos = htell(fd->fp); - - if (!(c->comp_hdr_block = cram_read_block(fd))) - return -1; - assert(c->comp_hdr_block->content_type == COMPRESSION_HEADER); - - c->comp_hdr = cram_decode_compression_header(fd, c->comp_hdr_block); - if (!c->comp_hdr) - return -1; - - if (c->ref_seq_id == last_ref && c->ref_seq_start < last_start) { - hts_log_error("CRAM file is not sorted by chromosome / position"); - return -2; - } - last_ref = c->ref_seq_id; - last_start = c->ref_seq_start; - - if (cram_index_container(fd, c, fp, cpos) < 0) { - bgzf_close(fp); - return -1; - } - - cpos = htell(fd->fp); - assert(cpos == hpos + c->length); - - cram_free_container(c); - } - if (fd->err) { - bgzf_close(fp); - return -1; - } - - return (bgzf_close(fp) >= 0)? 0 : -4; -} diff --git a/src/htslib-1.18/cram/cram_io.c b/src/htslib-1.18/cram/cram_io.c deleted file mode 100644 index ca226e2..0000000 --- a/src/htslib-1.18/cram/cram_io.c +++ /dev/null @@ -1,5951 +0,0 @@ -/* -Copyright (c) 2012-2023 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* - * CRAM I/O primitives. - * - * - ITF8 encoding and decoding. - * - Block based I/O - * - Zlib inflating and deflating (memory) - * - CRAM basic data structure reading and writing - * - File opening / closing - * - Reference sequence handling - */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#ifdef HAVE_LIBBZ2 -#include -#endif -#ifdef HAVE_LIBLZMA -#ifdef HAVE_LZMA_H -#include -#else -#include "../os/lzma_stub.h" -#endif -#endif -#include -#include -#include -#include - -#ifdef HAVE_LIBDEFLATE -#include -#define crc32(a,b,c) libdeflate_crc32((a),(b),(c)) -#endif - -#include "cram.h" -#include "os.h" -#include "../htslib/hts.h" -#include "open_trace_file.h" - -#if defined(HAVE_EXTERNAL_LIBHTSCODECS) -#include -#include -#include -#include -#include -#include // CRAM v4.0 variable-size integers -#else -#include "../htscodecs/htscodecs/rANS_static.h" -#include "../htscodecs/htscodecs/rANS_static4x16.h" -#include "../htscodecs/htscodecs/arith_dynamic.h" -#include "../htscodecs/htscodecs/tokenise_name3.h" -#include "../htscodecs/htscodecs/fqzcomp_qual.h" -#include "../htscodecs/htscodecs/varint.h" -#endif - -//#define REF_DEBUG - -#ifdef REF_DEBUG -#include -#define gettid() (int)syscall(SYS_gettid) - -#define RP(...) fprintf (stderr, __VA_ARGS__) -#else -#define RP(...) -#endif - -#include "../htslib/hfile.h" -#include "../htslib/bgzf.h" -#include "../htslib/faidx.h" -#include "../hts_internal.h" - -#ifndef PATH_MAX -#define PATH_MAX FILENAME_MAX -#endif - -#define TRIAL_SPAN 70 -#define NTRIALS 3 - -#define CRAM_DEFAULT_LEVEL 5 - -/* ---------------------------------------------------------------------- - * ITF8 encoding and decoding. - * - * Also see the itf8_get and itf8_put macros in cram_io.h - */ - -/* - * LEGACY: consider using itf8_decode_crc. - * - * Reads an integer in ITF-8 encoding from 'cp' and stores it in - * *val. - * - * Returns the number of bytes read on success - * -1 on failure - */ -int itf8_decode(cram_fd *fd, int32_t *val_p) { - static int nbytes[16] = { - 0,0,0,0, 0,0,0,0, // 0000xxxx - 0111xxxx - 1,1,1,1, // 1000xxxx - 1011xxxx - 2,2, // 1100xxxx - 1101xxxx - 3, // 1110xxxx - 4, // 1111xxxx - }; - - static int nbits[16] = { - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, // 0000xxxx - 0111xxxx - 0x3f, 0x3f, 0x3f, 0x3f, // 1000xxxx - 1011xxxx - 0x1f, 0x1f, // 1100xxxx - 1101xxxx - 0x0f, // 1110xxxx - 0x0f, // 1111xxxx - }; - - int32_t val = hgetc(fd->fp); - if (val == -1) - return -1; - - int i = nbytes[val>>4]; - val &= nbits[val>>4]; - - switch(i) { - case 0: - *val_p = val; - return 1; - - case 1: - val = (val<<8) | (unsigned char)hgetc(fd->fp); - *val_p = val; - return 2; - - case 2: - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - *val_p = val; - return 3; - - case 3: - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - *val_p = val; - return 4; - - case 4: // really 3.5 more, why make it different? - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<4) | (((unsigned char)hgetc(fd->fp)) & 0x0f); - *val_p = val; - } - - return 5; -} - -int itf8_decode_crc(cram_fd *fd, int32_t *val_p, uint32_t *crc) { - static int nbytes[16] = { - 0,0,0,0, 0,0,0,0, // 0000xxxx - 0111xxxx - 1,1,1,1, // 1000xxxx - 1011xxxx - 2,2, // 1100xxxx - 1101xxxx - 3, // 1110xxxx - 4, // 1111xxxx - }; - - static int nbits[16] = { - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, // 0000xxxx - 0111xxxx - 0x3f, 0x3f, 0x3f, 0x3f, // 1000xxxx - 1011xxxx - 0x1f, 0x1f, // 1100xxxx - 1101xxxx - 0x0f, // 1110xxxx - 0x0f, // 1111xxxx - }; - unsigned char c[5]; - - int32_t val = hgetc(fd->fp); - if (val == -1) - return -1; - - c[0]=val; - - int i = nbytes[val>>4]; - val &= nbits[val>>4]; - - if (i > 0) { - if (hread(fd->fp, &c[1], i) < i) - return -1; - } - - switch(i) { - case 0: - *val_p = val; - *crc = crc32(*crc, c, 1); - return 1; - - case 1: - val = (val<<8) | c[1]; - *val_p = val; - *crc = crc32(*crc, c, 2); - return 2; - - case 2: - val = (val<<8) | c[1]; - val = (val<<8) | c[2]; - *val_p = val; - *crc = crc32(*crc, c, 3); - return 3; - - case 3: - val = (val<<8) | c[1]; - val = (val<<8) | c[2]; - val = (val<<8) | c[3]; - *val_p = val; - *crc = crc32(*crc, c, 4); - return 4; - - case 4: // really 3.5 more, why make it different? - { - uint32_t uv = val; - uv = (uv<<8) | c[1]; - uv = (uv<<8) | c[2]; - uv = (uv<<8) | c[3]; - uv = (uv<<4) | (c[4] & 0x0f); - // Avoid implementation-defined behaviour on negative values - *val_p = uv < 0x80000000UL ? (int32_t) uv : -((int32_t) (0xffffffffUL - uv)) - 1; - *crc = crc32(*crc, c, 5); - } - } - - return 5; -} - -/* - * Stores a value to memory in ITF-8 format. - * - * Returns the number of bytes required to store the number. - * This is a maximum of 5 bytes. - */ -static inline int itf8_put(char *cp, int32_t val) { - unsigned char *up = (unsigned char *)cp; - if (!(val & ~0x00000007f)) { // 1 byte - *up = val; - return 1; - } else if (!(val & ~0x00003fff)) { // 2 byte - *up++ = (val >> 8 ) | 0x80; - *up = val & 0xff; - return 2; - } else if (!(val & ~0x01fffff)) { // 3 byte - *up++ = (val >> 16) | 0xc0; - *up++ = (val >> 8 ) & 0xff; - *up = val & 0xff; - return 3; - } else if (!(val & ~0x0fffffff)) { // 4 byte - *up++ = (val >> 24) | 0xe0; - *up++ = (val >> 16) & 0xff; - *up++ = (val >> 8 ) & 0xff; - *up = val & 0xff; - return 4; - } else { // 5 byte - *up++ = 0xf0 | ((val>>28) & 0xff); - *up++ = (val >> 20) & 0xff; - *up++ = (val >> 12) & 0xff; - *up++ = (val >> 4 ) & 0xff; - *up = val & 0x0f; - return 5; - } -} - - -/* 64-bit itf8 variant */ -static inline int ltf8_put(char *cp, int64_t val) { - unsigned char *up = (unsigned char *)cp; - if (!(val & ~((1LL<<7)-1))) { - *up = val; - return 1; - } else if (!(val & ~((1LL<<(6+8))-1))) { - *up++ = (val >> 8 ) | 0x80; - *up = val & 0xff; - return 2; - } else if (!(val & ~((1LL<<(5+2*8))-1))) { - *up++ = (val >> 16) | 0xc0; - *up++ = (val >> 8 ) & 0xff; - *up = val & 0xff; - return 3; - } else if (!(val & ~((1LL<<(4+3*8))-1))) { - *up++ = (val >> 24) | 0xe0; - *up++ = (val >> 16) & 0xff; - *up++ = (val >> 8 ) & 0xff; - *up = val & 0xff; - return 4; - } else if (!(val & ~((1LL<<(3+4*8))-1))) { - *up++ = (val >> 32) | 0xf0; - *up++ = (val >> 24) & 0xff; - *up++ = (val >> 16) & 0xff; - *up++ = (val >> 8 ) & 0xff; - *up = val & 0xff; - return 5; - } else if (!(val & ~((1LL<<(2+5*8))-1))) { - *up++ = (val >> 40) | 0xf8; - *up++ = (val >> 32) & 0xff; - *up++ = (val >> 24) & 0xff; - *up++ = (val >> 16) & 0xff; - *up++ = (val >> 8 ) & 0xff; - *up = val & 0xff; - return 6; - } else if (!(val & ~((1LL<<(1+6*8))-1))) { - *up++ = (val >> 48) | 0xfc; - *up++ = (val >> 40) & 0xff; - *up++ = (val >> 32) & 0xff; - *up++ = (val >> 24) & 0xff; - *up++ = (val >> 16) & 0xff; - *up++ = (val >> 8 ) & 0xff; - *up = val & 0xff; - return 7; - } else if (!(val & ~((1LL<<(7*8))-1))) { - *up++ = (val >> 56) | 0xfe; - *up++ = (val >> 48) & 0xff; - *up++ = (val >> 40) & 0xff; - *up++ = (val >> 32) & 0xff; - *up++ = (val >> 24) & 0xff; - *up++ = (val >> 16) & 0xff; - *up++ = (val >> 8 ) & 0xff; - *up = val & 0xff; - return 8; - } else { - *up++ = 0xff; - *up++ = (val >> 56) & 0xff; - *up++ = (val >> 48) & 0xff; - *up++ = (val >> 40) & 0xff; - *up++ = (val >> 32) & 0xff; - *up++ = (val >> 24) & 0xff; - *up++ = (val >> 16) & 0xff; - *up++ = (val >> 8 ) & 0xff; - *up = val & 0xff; - return 9; - } -} - -/* - * Encodes and writes a single integer in ITF-8 format. - * Returns 0 on success - * -1 on failure - */ -int itf8_encode(cram_fd *fd, int32_t val) { - char buf[5]; - int len = itf8_put(buf, val); - return hwrite(fd->fp, buf, len) == len ? 0 : -1; -} - -const int itf8_bytes[16] = { - 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 3, 3, 4, 5 -}; - -const int ltf8_bytes[256] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - - 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 9 -}; - -/* - * LEGACY: consider using ltf8_decode_crc. - */ -int ltf8_decode(cram_fd *fd, int64_t *val_p) { - int c = hgetc(fd->fp); - int64_t val = (unsigned char)c; - if (c == -1) - return -1; - - if (val < 0x80) { - *val_p = val; - return 1; - - } else if (val < 0xc0) { - val = (val<<8) | (unsigned char)hgetc(fd->fp); - *val_p = val & (((1LL<<(6+8)))-1); - return 2; - - } else if (val < 0xe0) { - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - *val_p = val & ((1LL<<(5+2*8))-1); - return 3; - - } else if (val < 0xf0) { - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - *val_p = val & ((1LL<<(4+3*8))-1); - return 4; - - } else if (val < 0xf8) { - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - *val_p = val & ((1LL<<(3+4*8))-1); - return 5; - - } else if (val < 0xfc) { - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - *val_p = val & ((1LL<<(2+5*8))-1); - return 6; - - } else if (val < 0xfe) { - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - *val_p = val & ((1LL<<(1+6*8))-1); - return 7; - - } else if (val < 0xff) { - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - *val_p = val & ((1LL<<(7*8))-1); - return 8; - - } else { - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - *val_p = val; - } - - return 9; -} - -int ltf8_decode_crc(cram_fd *fd, int64_t *val_p, uint32_t *crc) { - unsigned char c[9]; - int64_t val = hgetc(fd->fp); - if (val < 0) - return -1; - - c[0] = val; - - if (val < 0x80) { - *val_p = val; - *crc = crc32(*crc, c, 1); - return 1; - - } else if (val < 0xc0) { - int v = hgetc(fd->fp); - if (v < 0) - return -1; - val = (val<<8) | (c[1]=v); - *val_p = val & (((1LL<<(6+8)))-1); - *crc = crc32(*crc, c, 2); - return 2; - - } else if (val < 0xe0) { - if (hread(fd->fp, &c[1], 2) < 2) - return -1; - val = (val<<8) | c[1]; - val = (val<<8) | c[2]; - *val_p = val & ((1LL<<(5+2*8))-1); - *crc = crc32(*crc, c, 3); - return 3; - - } else if (val < 0xf0) { - if (hread(fd->fp, &c[1], 3) < 3) - return -1; - val = (val<<8) | c[1]; - val = (val<<8) | c[2]; - val = (val<<8) | c[3]; - *val_p = val & ((1LL<<(4+3*8))-1); - *crc = crc32(*crc, c, 4); - return 4; - - } else if (val < 0xf8) { - if (hread(fd->fp, &c[1], 4) < 4) - return -1; - val = (val<<8) | c[1]; - val = (val<<8) | c[2]; - val = (val<<8) | c[3]; - val = (val<<8) | c[4]; - *val_p = val & ((1LL<<(3+4*8))-1); - *crc = crc32(*crc, c, 5); - return 5; - - } else if (val < 0xfc) { - if (hread(fd->fp, &c[1], 5) < 5) - return -1; - val = (val<<8) | c[1]; - val = (val<<8) | c[2]; - val = (val<<8) | c[3]; - val = (val<<8) | c[4]; - val = (val<<8) | c[5]; - *val_p = val & ((1LL<<(2+5*8))-1); - *crc = crc32(*crc, c, 6); - return 6; - - } else if (val < 0xfe) { - if (hread(fd->fp, &c[1], 6) < 6) - return -1; - val = (val<<8) | c[1]; - val = (val<<8) | c[2]; - val = (val<<8) | c[3]; - val = (val<<8) | c[4]; - val = (val<<8) | c[5]; - val = (val<<8) | c[6]; - *val_p = val & ((1LL<<(1+6*8))-1); - *crc = crc32(*crc, c, 7); - return 7; - - } else if (val < 0xff) { - uint64_t uval = val; - if (hread(fd->fp, &c[1], 7) < 7) - return -1; - uval = (uval<<8) | c[1]; - uval = (uval<<8) | c[2]; - uval = (uval<<8) | c[3]; - uval = (uval<<8) | c[4]; - uval = (uval<<8) | c[5]; - uval = (uval<<8) | c[6]; - uval = (uval<<8) | c[7]; - *val_p = uval & ((1ULL<<(7*8))-1); - *crc = crc32(*crc, c, 8); - return 8; - - } else { - uint64_t uval; - if (hread(fd->fp, &c[1], 8) < 8) - return -1; - uval = c[1]; - uval = (uval<<8) | c[2]; - uval = (uval<<8) | c[3]; - uval = (uval<<8) | c[4]; - uval = (uval<<8) | c[5]; - uval = (uval<<8) | c[6]; - uval = (uval<<8) | c[7]; - uval = (uval<<8) | c[8]; - *crc = crc32(*crc, c, 9); - // Avoid implementation-defined behaviour on negative values - *val_p = c[1] < 0x80 ? (int64_t) uval : -((int64_t) (0xffffffffffffffffULL - uval)) - 1; - } - - return 9; -} - -/* - * Pushes a value in ITF8 format onto the end of a block. - * This shouldn't be used for high-volume data as it is not the fastest - * method. - * - * Returns the number of bytes written - */ -int itf8_put_blk(cram_block *blk, int32_t val) { - char buf[5]; - int sz; - - sz = itf8_put(buf, val); - BLOCK_APPEND(blk, buf, sz); - return sz; - - block_err: - return -1; -} - -int ltf8_put_blk(cram_block *blk, int64_t val) { - char buf[9]; - int sz; - - sz = ltf8_put(buf, val); - BLOCK_APPEND(blk, buf, sz); - return sz; - - block_err: - return -1; -} - -static int64_t safe_itf8_get(char **cp, const char *endp, int *err) { - const unsigned char *up = (unsigned char *)*cp; - - if (endp && endp - *cp < 5 && - (*cp >= endp || endp - *cp < itf8_bytes[up[0]>>4])) { - if (err) *err = 1; - return 0; - } - - if (up[0] < 0x80) { - (*cp)++; - return up[0]; - } else if (up[0] < 0xc0) { - (*cp)+=2; - return ((up[0] <<8) | up[1]) & 0x3fff; - } else if (up[0] < 0xe0) { - (*cp)+=3; - return ((up[0]<<16) | (up[1]<< 8) | up[2]) & 0x1fffff; - } else if (up[0] < 0xf0) { - (*cp)+=4; - uint32_t uv = (((uint32_t)up[0]<<24) | (up[1]<<16) | (up[2]<<8) | up[3]) & 0x0fffffff; - return (int32_t)uv; - } else { - (*cp)+=5; - uint32_t uv = (((uint32_t)up[0] & 0x0f)<<28) | (up[1]<<20) | (up[2]<<12) | (up[3]<<4) | (up[4] & 0x0f); - return (int32_t)uv; - } -} - -static int64_t safe_ltf8_get(char **cp, const char *endp, int *err) { - unsigned char *up = (unsigned char *)*cp; - - if (endp && endp - *cp < 9 && - (*cp >= endp || endp - *cp < ltf8_bytes[up[0]])) { - if (err) *err = 1; - return 0; - } - - if (up[0] < 0x80) { - (*cp)++; - return up[0]; - } else if (up[0] < 0xc0) { - (*cp)+=2; - return (((uint64_t)up[0]<< 8) | - (uint64_t)up[1]) & (((1LL<<(6+8)))-1); - } else if (up[0] < 0xe0) { - (*cp)+=3; - return (((uint64_t)up[0]<<16) | - ((uint64_t)up[1]<< 8) | - (uint64_t)up[2]) & ((1LL<<(5+2*8))-1); - } else if (up[0] < 0xf0) { - (*cp)+=4; - return (((uint64_t)up[0]<<24) | - ((uint64_t)up[1]<<16) | - ((uint64_t)up[2]<< 8) | - (uint64_t)up[3]) & ((1LL<<(4+3*8))-1); - } else if (up[0] < 0xf8) { - (*cp)+=5; - return (((uint64_t)up[0]<<32) | - ((uint64_t)up[1]<<24) | - ((uint64_t)up[2]<<16) | - ((uint64_t)up[3]<< 8) | - (uint64_t)up[4]) & ((1LL<<(3+4*8))-1); - } else if (up[0] < 0xfc) { - (*cp)+=6; - return (((uint64_t)up[0]<<40) | - ((uint64_t)up[1]<<32) | - ((uint64_t)up[2]<<24) | - ((uint64_t)up[3]<<16) | - ((uint64_t)up[4]<< 8) | - (uint64_t)up[5]) & ((1LL<<(2+5*8))-1); - } else if (up[0] < 0xfe) { - (*cp)+=7; - return (((uint64_t)up[0]<<48) | - ((uint64_t)up[1]<<40) | - ((uint64_t)up[2]<<32) | - ((uint64_t)up[3]<<24) | - ((uint64_t)up[4]<<16) | - ((uint64_t)up[5]<< 8) | - (uint64_t)up[6]) & ((1LL<<(1+6*8))-1); - } else if (up[0] < 0xff) { - (*cp)+=8; - return (((uint64_t)up[1]<<48) | - ((uint64_t)up[2]<<40) | - ((uint64_t)up[3]<<32) | - ((uint64_t)up[4]<<24) | - ((uint64_t)up[5]<<16) | - ((uint64_t)up[6]<< 8) | - (uint64_t)up[7]) & ((1LL<<(7*8))-1); - } else { - (*cp)+=9; - return (((uint64_t)up[1]<<56) | - ((uint64_t)up[2]<<48) | - ((uint64_t)up[3]<<40) | - ((uint64_t)up[4]<<32) | - ((uint64_t)up[5]<<24) | - ((uint64_t)up[6]<<16) | - ((uint64_t)up[7]<< 8) | - (uint64_t)up[8]); - } -} - -// Wrapper for now -static int safe_itf8_put(char *cp, char *cp_end, int32_t val) { - return itf8_put(cp, val); -} - -static int safe_ltf8_put(char *cp, char *cp_end, int64_t val) { - return ltf8_put(cp, val); -} - -static int itf8_size(int64_t v) { - return ((!((v)&~0x7f))?1:(!((v)&~0x3fff))?2:(!((v)&~0x1fffff))?3:(!((v)&~0xfffffff))?4:5); -} - -//----------------------------------------------------------------------------- - -// CRAM v4.0 onwards uses a different variable sized integer encoding -// that is size agnostic. - -// Local interface to varint.h inline version, so we can use in func ptr. -// Note a lot of these use the unsigned interface but take signed int64_t. -// This is because the old CRAM ITF8 inteface had signed -1 as unsigned -// 0xffffffff. -static int uint7_size(int64_t v) { - return var_size_u64(v); -} - -static int64_t uint7_get_32(char **cp, const char *endp, int *err) { - uint32_t val; - int nb = var_get_u32((uint8_t *)(*cp), (const uint8_t *)endp, &val); - (*cp) += nb; - if (!nb && err) *err = 1; - return val; -} - -static int64_t sint7_get_32(char **cp, const char *endp, int *err) { - int32_t val; - int nb = var_get_s32((uint8_t *)(*cp), (const uint8_t *)endp, &val); - (*cp) += nb; - if (!nb && err) *err = 1; - return val; -} - -static int64_t uint7_get_64(char **cp, const char *endp, int *err) { - uint64_t val; - int nb = var_get_u64((uint8_t *)(*cp), (const uint8_t *)endp, &val); - (*cp) += nb; - if (!nb && err) *err = 1; - return val; -} - -static int64_t sint7_get_64(char **cp, const char *endp, int *err) { - int64_t val; - int nb = var_get_s64((uint8_t *)(*cp), (const uint8_t *)endp, &val); - (*cp) += nb; - if (!nb && err) *err = 1; - return val; -} - -static int uint7_put_32(char *cp, char *endp, int32_t val) { - return var_put_u32((uint8_t *)cp, (uint8_t *)endp, val); -} - -static int sint7_put_32(char *cp, char *endp, int32_t val) { - return var_put_s32((uint8_t *)cp, (uint8_t *)endp, val); -} - -static int uint7_put_64(char *cp, char *endp, int64_t val) { - return var_put_u64((uint8_t *)cp, (uint8_t *)endp, val); -} - -static int sint7_put_64(char *cp, char *endp, int64_t val) { - return var_put_s64((uint8_t *)cp, (uint8_t *)endp, val); -} - -// Put direct to to cram_block -static int uint7_put_blk_32(cram_block *blk, int32_t v) { - uint8_t buf[10]; - int sz = var_put_u32(buf, buf+10, v); - BLOCK_APPEND(blk, buf, sz); - return sz; - - block_err: - return -1; -} - -static int sint7_put_blk_32(cram_block *blk, int32_t v) { - uint8_t buf[10]; - int sz = var_put_s32(buf, buf+10, v); - BLOCK_APPEND(blk, buf, sz); - return sz; - - block_err: - return -1; -} - -static int uint7_put_blk_64(cram_block *blk, int64_t v) { - uint8_t buf[10]; - int sz = var_put_u64(buf, buf+10, v); - BLOCK_APPEND(blk, buf, sz); - return sz; - - block_err: - return -1; -} - -static int sint7_put_blk_64(cram_block *blk, int64_t v) { - uint8_t buf[10]; - int sz = var_put_s64(buf, buf+10, v); - BLOCK_APPEND(blk, buf, sz); - return sz; - - block_err: - return -1; -} - -// Decode 32-bits with CRC update from cram_fd -static int uint7_decode_crc32(cram_fd *fd, int32_t *val_p, uint32_t *crc) { - uint8_t b[5], i = 0; - int c; - uint32_t v = 0; - -#ifdef VARINT2 - b[0] = hgetc(fd->fp); - if (b[0] < 177) { - } else if (b[0] < 241) { - b[1] = hgetc(fd->fp); - } else if (b[0] < 249) { - b[1] = hgetc(fd->fp); - b[2] = hgetc(fd->fp); - } else { - int n = b[0]+2, z = 1; - while (n-- >= 249) - b[z++] = hgetc(fd->fp); - } - i = var_get_u32(b, NULL, &v); -#else -// // Little endian -// int s = 0; -// do { -// b[i++] = c = hgetc(fd->fp); -// if (c < 0) -// return -1; -// v |= (c & 0x7f) << s; -// s += 7; -// } while (i < 5 && (c & 0x80)); - - // Big endian, see also htscodecs/varint.h - do { - b[i++] = c = hgetc(fd->fp); - if (c < 0) - return -1; - v = (v<<7) | (c & 0x7f); - } while (i < 5 && (c & 0x80)); -#endif - *crc = crc32(*crc, b, i); - - *val_p = v; - return i; -} - -// Decode 32-bits with CRC update from cram_fd -static int sint7_decode_crc32(cram_fd *fd, int32_t *val_p, uint32_t *crc) { - uint8_t b[5], i = 0; - int c; - uint32_t v = 0; - -#ifdef VARINT2 - b[0] = hgetc(fd->fp); - if (b[0] < 177) { - } else if (b[0] < 241) { - b[1] = hgetc(fd->fp); - } else if (b[0] < 249) { - b[1] = hgetc(fd->fp); - b[2] = hgetc(fd->fp); - } else { - int n = b[0]+2, z = 1; - while (n-- >= 249) - b[z++] = hgetc(fd->fp); - } - i = var_get_u32(b, NULL, &v); -#else -// // Little endian -// int s = 0; -// do { -// b[i++] = c = hgetc(fd->fp); -// if (c < 0) -// return -1; -// v |= (c & 0x7f) << s; -// s += 7; -// } while (i < 5 && (c & 0x80)); - - // Big endian, see also htscodecs/varint.h - do { - b[i++] = c = hgetc(fd->fp); - if (c < 0) - return -1; - v = (v<<7) | (c & 0x7f); - } while (i < 5 && (c & 0x80)); -#endif - *crc = crc32(*crc, b, i); - - *val_p = (v>>1) ^ -(v&1); - return i; -} - - -// Decode 64-bits with CRC update from cram_fd -static int uint7_decode_crc64(cram_fd *fd, int64_t *val_p, uint32_t *crc) { - uint8_t b[10], i = 0; - int c; - uint64_t v = 0; - -#ifdef VARINT2 - b[0] = hgetc(fd->fp); - if (b[0] < 177) { - } else if (b[0] < 241) { - b[1] = hgetc(fd->fp); - } else if (b[0] < 249) { - b[1] = hgetc(fd->fp); - b[2] = hgetc(fd->fp); - } else { - int n = b[0]+2, z = 1; - while (n-- >= 249) - b[z++] = hgetc(fd->fp); - } - i = var_get_u64(b, NULL, &v); -#else -// // Little endian -// int s = 0; -// do { -// b[i++] = c = hgetc(fd->fp); -// if (c < 0) -// return -1; -// v |= (c & 0x7f) << s; -// s += 7; -// } while (i < 10 && (c & 0x80)); - - // Big endian, see also htscodecs/varint.h - do { - b[i++] = c = hgetc(fd->fp); - if (c < 0) - return -1; - v = (v<<7) | (c & 0x7f); - } while (i < 5 && (c & 0x80)); -#endif - *crc = crc32(*crc, b, i); - - *val_p = v; - return i; -} - -//----------------------------------------------------------------------------- - -/* - * Decodes a 32-bit little endian value from fd and stores in val. - * - * Returns the number of bytes read on success - * -1 on failure - */ -static int int32_decode(cram_fd *fd, int32_t *val) { - int32_t i; - if (4 != hread(fd->fp, &i, 4)) - return -1; - - *val = le_int4(i); - return 4; -} - -/* - * Encodes a 32-bit little endian value 'val' and writes to fd. - * - * Returns the number of bytes written on success - * -1 on failure - */ -static int int32_encode(cram_fd *fd, int32_t val) { - uint32_t v = le_int4(val); - if (4 != hwrite(fd->fp, &v, 4)) - return -1; - - return 4; -} - -/* As int32_decoded/encode, but from/to blocks instead of cram_fd */ -int int32_get_blk(cram_block *b, int32_t *val) { - if (b->uncomp_size - BLOCK_SIZE(b) < 4) - return -1; - - uint32_t v = - ((uint32_t) b->data[b->byte ]) | - (((uint32_t) b->data[b->byte+1]) << 8) | - (((uint32_t) b->data[b->byte+2]) << 16) | - (((uint32_t) b->data[b->byte+3]) << 24); - // Avoid implementation-defined behaviour on negative values - *val = v < 0x80000000U ? (int32_t) v : -((int32_t) (0xffffffffU - v)) - 1; - BLOCK_SIZE(b) += 4; - return 4; -} - -/* As int32_decoded/encode, but from/to blocks instead of cram_fd */ -int int32_put_blk(cram_block *b, int32_t val) { - unsigned char cp[4]; - uint32_t v = val; - cp[0] = ( v & 0xff); - cp[1] = ((v>>8) & 0xff); - cp[2] = ((v>>16) & 0xff); - cp[3] = ((v>>24) & 0xff); - - BLOCK_APPEND(b, cp, 4); - return 0; - - block_err: - return -1; -} - -#ifdef HAVE_LIBDEFLATE -/* ---------------------------------------------------------------------- - * libdeflate compression code, with interface to match - * zlib_mem_{in,de}flate for simplicity elsewhere. - */ - -// Named the same as the version that uses zlib as we always use libdeflate for -// decompression when available. -char *zlib_mem_inflate(char *cdata, size_t csize, size_t *size) { - struct libdeflate_decompressor *z = libdeflate_alloc_decompressor(); - if (!z) { - hts_log_error("Call to libdeflate_alloc_decompressor failed"); - return NULL; - } - - uint8_t *data = NULL, *new_data; - if (!*size) - *size = csize*2; - for(;;) { - new_data = realloc(data, *size); - if (!new_data) { - hts_log_error("Memory allocation failure"); - goto fail; - } - data = new_data; - - int ret = libdeflate_gzip_decompress(z, cdata, csize, data, *size, size); - - // Auto grow output buffer size if needed and try again. - // Fortunately for all bar one call of this we know the size already. - if (ret == LIBDEFLATE_INSUFFICIENT_SPACE) { - (*size) *= 1.5; - continue; - } - - if (ret != LIBDEFLATE_SUCCESS) { - hts_log_error("Inflate operation failed: %d", ret); - goto fail; - } else { - break; - } - } - - libdeflate_free_decompressor(z); - return (char *)data; - - fail: - libdeflate_free_decompressor(z); - free(data); - return NULL; -} - -// Named differently as we use both zlib/libdeflate for compression. -static char *libdeflate_deflate(char *data, size_t size, size_t *cdata_size, - int level, int strat) { - level = level > 0 ? level : 6; // libdeflate doesn't honour -1 as default - level *= 1.23; // NB levels go up to 12 here; 5 onwards is +1 - level += level>=8; // 5,6,7->6,7,8 8->10 9->12 - if (level > 12) level = 12; - - if (strat == Z_RLE) // not supported by libdeflate - level = 1; - - struct libdeflate_compressor *z = libdeflate_alloc_compressor(level); - if (!z) { - hts_log_error("Call to libdeflate_alloc_compressor failed"); - return NULL; - } - - unsigned char *cdata = NULL; /* Compressed output */ - size_t cdata_alloc; - cdata = malloc(cdata_alloc = size*1.05+100); - if (!cdata) { - hts_log_error("Memory allocation failure"); - libdeflate_free_compressor(z); - return NULL; - } - - *cdata_size = libdeflate_gzip_compress(z, data, size, cdata, cdata_alloc); - libdeflate_free_compressor(z); - - if (*cdata_size == 0) { - hts_log_error("Call to libdeflate_gzip_compress failed"); - free(cdata); - return NULL; - } - - return (char *)cdata; -} - -#else - -/* ---------------------------------------------------------------------- - * zlib compression code - from Gap5's tg_iface_g.c - * They're static here as they're only used within the cram_compress_block - * and cram_uncompress_block functions, which are the external interface. - */ -char *zlib_mem_inflate(char *cdata, size_t csize, size_t *size) { - z_stream s; - unsigned char *data = NULL; /* Uncompressed output */ - int data_alloc = 0; - int err; - - /* Starting point at uncompressed size, and scale after that */ - data = malloc(data_alloc = csize*1.2+100); - if (!data) - return NULL; - - /* Initialise zlib stream */ - s.zalloc = Z_NULL; /* use default allocation functions */ - s.zfree = Z_NULL; - s.opaque = Z_NULL; - s.next_in = (unsigned char *)cdata; - s.avail_in = csize; - s.total_in = 0; - s.next_out = data; - s.avail_out = data_alloc; - s.total_out = 0; - - //err = inflateInit(&s); - err = inflateInit2(&s, 15 + 32); - if (err != Z_OK) { - hts_log_error("Call to zlib inflateInit failed: %s", s.msg); - free(data); - return NULL; - } - - /* Decode to 'data' array */ - for (;s.avail_in;) { - unsigned char *data_tmp; - int alloc_inc; - - s.next_out = &data[s.total_out]; - err = inflate(&s, Z_NO_FLUSH); - if (err == Z_STREAM_END) - break; - - if (err != Z_OK) { - hts_log_error("Call to zlib inflate failed: %s", s.msg); - free(data); - inflateEnd(&s); - return NULL; - } - - /* More to come, so realloc based on growth so far */ - alloc_inc = (double)s.avail_in/s.total_in * s.total_out + 100; - data = realloc((data_tmp = data), data_alloc += alloc_inc); - if (!data) { - free(data_tmp); - inflateEnd(&s); - return NULL; - } - s.avail_out += alloc_inc; - } - inflateEnd(&s); - - *size = s.total_out; - return (char *)data; -} -#endif - -#if !defined(HAVE_LIBDEFLATE) || LIBDEFLATE_VERSION_MAJOR < 1 || (LIBDEFLATE_VERSION_MAJOR == 1 && LIBDEFLATE_VERSION_MINOR <= 8) -static char *zlib_mem_deflate(char *data, size_t size, size_t *cdata_size, - int level, int strat) { - z_stream s; - unsigned char *cdata = NULL; /* Compressed output */ - int cdata_alloc = 0; - int cdata_pos = 0; - int err; - - cdata = malloc(cdata_alloc = size*1.05+100); - if (!cdata) - return NULL; - cdata_pos = 0; - - /* Initialise zlib stream */ - s.zalloc = Z_NULL; /* use default allocation functions */ - s.zfree = Z_NULL; - s.opaque = Z_NULL; - s.next_in = (unsigned char *)data; - s.avail_in = size; - s.total_in = 0; - s.next_out = cdata; - s.avail_out = cdata_alloc; - s.total_out = 0; - s.data_type = Z_BINARY; - - err = deflateInit2(&s, level, Z_DEFLATED, 15|16, 9, strat); - if (err != Z_OK) { - hts_log_error("Call to zlib deflateInit2 failed: %s", s.msg); - return NULL; - } - - /* Encode to 'cdata' array */ - for (;s.avail_in;) { - s.next_out = &cdata[cdata_pos]; - s.avail_out = cdata_alloc - cdata_pos; - if (cdata_alloc - cdata_pos <= 0) { - hts_log_error("Deflate produced larger output than expected"); - return NULL; - } - err = deflate(&s, Z_NO_FLUSH); - cdata_pos = cdata_alloc - s.avail_out; - if (err != Z_OK) { - hts_log_error("Call to zlib deflate failed: %s", s.msg); - break; - } - } - if (deflate(&s, Z_FINISH) != Z_STREAM_END) { - hts_log_error("Call to zlib deflate failed: %s", s.msg); - } - *cdata_size = s.total_out; - - if (deflateEnd(&s) != Z_OK) { - hts_log_error("Call to zlib deflate failed: %s", s.msg); - } - return (char *)cdata; -} -#endif - -#ifdef HAVE_LIBLZMA -/* ------------------------------------------------------------------------ */ -/* - * Data compression routines using liblzma (xz) - * - * On a test set this shrunk the main db from 136157104 bytes to 114796168, but - * caused tg_index to grow from 2m43.707s to 15m3.961s. Exporting as bfastq - * went from 18.3s to 36.3s. So decompression suffers too, but not as bad - * as compression times. - * - * For now we disable this functionality. If it's to be reenabled make sure you - * improve the mem_inflate implementation as it's just a test hack at the - * moment. - */ - -static char *lzma_mem_deflate(char *data, size_t size, size_t *cdata_size, - int level) { - char *out; - size_t out_size = lzma_stream_buffer_bound(size); - *cdata_size = 0; - - out = malloc(out_size); - - /* Single call compression */ - if (LZMA_OK != lzma_easy_buffer_encode(level, LZMA_CHECK_CRC32, NULL, - (uint8_t *)data, size, - (uint8_t *)out, cdata_size, - out_size)) - return NULL; - - return out; -} - -static char *lzma_mem_inflate(char *cdata, size_t csize, size_t *size) { - lzma_stream strm = LZMA_STREAM_INIT; - size_t out_size = 0, out_pos = 0; - char *out = NULL, *new_out; - int r; - - /* Initiate the decoder */ - if (LZMA_OK != lzma_stream_decoder(&strm, lzma_easy_decoder_memusage(9), 0)) - return NULL; - - /* Decode loop */ - strm.avail_in = csize; - strm.next_in = (uint8_t *)cdata; - - for (;strm.avail_in;) { - if (strm.avail_in > out_size - out_pos) { - out_size += strm.avail_in * 4 + 32768; - new_out = realloc(out, out_size); - if (!new_out) - goto fail; - out = new_out; - } - strm.avail_out = out_size - out_pos; - strm.next_out = (uint8_t *)&out[out_pos]; - - r = lzma_code(&strm, LZMA_RUN); - if (LZMA_OK != r && LZMA_STREAM_END != r) { - hts_log_error("LZMA decode failure (error %d)", r); - goto fail; - } - - out_pos = strm.total_out; - - if (r == LZMA_STREAM_END) - break; - } - - /* finish up any unflushed data; necessary? */ - r = lzma_code(&strm, LZMA_FINISH); - if (r != LZMA_OK && r != LZMA_STREAM_END) { - hts_log_error("Call to lzma_code failed with error %d", r); - goto fail; - } - - new_out = realloc(out, strm.total_out > 0 ? strm.total_out : 1); - if (new_out) - out = new_out; - *size = strm.total_out; - - lzma_end(&strm); - - return out; - - fail: - lzma_end(&strm); - free(out); - return NULL; -} -#endif - -/* ---------------------------------------------------------------------- - * CRAM blocks - the dynamically growable data block. We have code to - * create, update, (un)compress and read/write. - * - * These are derived from the deflate_interlaced.c blocks, but with the - * CRAM extension of content types and IDs. - */ - -/* - * Allocates a new cram_block structure with a specified content_type and - * id. - * - * Returns block pointer on success - * NULL on failure - */ -cram_block *cram_new_block(enum cram_content_type content_type, - int content_id) { - cram_block *b = malloc(sizeof(*b)); - if (!b) - return NULL; - b->method = b->orig_method = RAW; - b->content_type = content_type; - b->content_id = content_id; - b->comp_size = 0; - b->uncomp_size = 0; - b->data = NULL; - b->alloc = 0; - b->byte = 0; - b->bit = 7; // MSB - b->crc32 = 0; - b->idx = 0; - b->m = NULL; - - return b; -} - -/* - * Reads a block from a cram file. - * Returns cram_block pointer on success. - * NULL on failure - */ -cram_block *cram_read_block(cram_fd *fd) { - cram_block *b = malloc(sizeof(*b)); - unsigned char c; - uint32_t crc = 0; - if (!b) - return NULL; - - //fprintf(stderr, "Block at %d\n", (int)ftell(fd->fp)); - - if (-1 == (b->method = hgetc(fd->fp))) { free(b); return NULL; } - c = b->method; crc = crc32(crc, &c, 1); - if (-1 == (b->content_type= hgetc(fd->fp))) { free(b); return NULL; } - c = b->content_type; crc = crc32(crc, &c, 1); - if (-1 == fd->vv.varint_decode32_crc(fd, &b->content_id, &crc)) { free(b); return NULL; } - if (-1 == fd->vv.varint_decode32_crc(fd, &b->comp_size, &crc)) { free(b); return NULL; } - if (-1 == fd->vv.varint_decode32_crc(fd, &b->uncomp_size, &crc)) { free(b); return NULL; } - - //fprintf(stderr, " method %d, ctype %d, cid %d, csize %d, ucsize %d\n", - // b->method, b->content_type, b->content_id, b->comp_size, b->uncomp_size); - - if (b->method == RAW) { - if (b->uncomp_size < 0 || b->comp_size != b->uncomp_size) { - free(b); - return NULL; - } - b->alloc = b->uncomp_size; - if (!(b->data = malloc(b->uncomp_size))){ free(b); return NULL; } - if (b->uncomp_size != hread(fd->fp, b->data, b->uncomp_size)) { - free(b->data); - free(b); - return NULL; - } - } else { - if (b->comp_size < 0 || b->uncomp_size < 0) { - free(b); - return NULL; - } - b->alloc = b->comp_size; - if (!(b->data = malloc(b->comp_size))) { free(b); return NULL; } - if (b->comp_size != hread(fd->fp, b->data, b->comp_size)) { - free(b->data); - free(b); - return NULL; - } - } - - if (CRAM_MAJOR_VERS(fd->version) >= 3) { - if (-1 == int32_decode(fd, (int32_t *)&b->crc32)) { - free(b->data); - free(b); - return NULL; - } - - b->crc32_checked = fd->ignore_md5; - b->crc_part = crc; - } else { - b->crc32_checked = 1; // CRC not present - } - - b->orig_method = b->method; - b->idx = 0; - b->byte = 0; - b->bit = 7; // MSB - - return b; -} - - -/* - * Computes the size of a cram block, including the block - * header itself. - */ -uint32_t cram_block_size(cram_block *b) { - unsigned char dat[100], *cp = dat;; - uint32_t sz; - - *cp++ = b->method; - *cp++ = b->content_type; - cp += itf8_put((char*)cp, b->content_id); - cp += itf8_put((char*)cp, b->comp_size); - cp += itf8_put((char*)cp, b->uncomp_size); - - sz = cp-dat + 4; - sz += b->method == RAW ? b->uncomp_size : b->comp_size; - - return sz; -} - -/* - * Writes a CRAM block. - * Returns 0 on success - * -1 on failure - */ -int cram_write_block(cram_fd *fd, cram_block *b) { - char vardata[100]; - int vardata_o = 0; - - assert(b->method != RAW || (b->comp_size == b->uncomp_size)); - - if (hputc(b->method, fd->fp) == EOF) return -1; - if (hputc(b->content_type, fd->fp) == EOF) return -1; - vardata_o += fd->vv.varint_put32(vardata , vardata+100, b->content_id); - vardata_o += fd->vv.varint_put32(vardata+vardata_o, vardata+100, b->comp_size); - vardata_o += fd->vv.varint_put32(vardata+vardata_o, vardata+100, b->uncomp_size); - if (vardata_o != hwrite(fd->fp, vardata, vardata_o)) - return -1; - - if (b->data) { - if (b->method == RAW) { - if (b->uncomp_size != hwrite(fd->fp, b->data, b->uncomp_size)) - return -1; - } else { - if (b->comp_size != hwrite(fd->fp, b->data, b->comp_size)) - return -1; - } - } else { - // Absent blocks should be size 0 - assert(b->method == RAW && b->uncomp_size == 0); - } - - if (CRAM_MAJOR_VERS(fd->version) >= 3) { - char dat[100], *cp = (char *)dat; - uint32_t crc; - - *cp++ = b->method; - *cp++ = b->content_type; - cp += fd->vv.varint_put32(cp, dat+100, b->content_id); - cp += fd->vv.varint_put32(cp, dat+100, b->comp_size); - cp += fd->vv.varint_put32(cp, dat+100, b->uncomp_size); - crc = crc32(0L, (uc *)dat, cp-dat); - - if (b->method == RAW) { - b->crc32 = crc32(crc, b->data ? b->data : (uc*)"", b->uncomp_size); - } else { - b->crc32 = crc32(crc, b->data ? b->data : (uc*)"", b->comp_size); - } - - if (-1 == int32_encode(fd, b->crc32)) - return -1; - } - - return 0; -} - -/* - * Frees a CRAM block, deallocating internal data too. - */ -void cram_free_block(cram_block *b) { - if (!b) - return; - if (b->data) - free(b->data); - free(b); -} - -/* - * Uncompresses a CRAM block, if compressed. - */ -int cram_uncompress_block(cram_block *b) { - char *uncomp; - size_t uncomp_size = 0; - - if (b->crc32_checked == 0) { - uint32_t crc = crc32(b->crc_part, b->data ? b->data : (uc *)"", b->alloc); - b->crc32_checked = 1; - if (crc != b->crc32) { - hts_log_error("Block CRC32 failure"); - return -1; - } - } - - if (b->uncomp_size == 0) { - // blank block - b->method = RAW; - return 0; - } - assert(b->uncomp_size >= 0); // cram_read_block should ensure this - - switch (b->method) { - case RAW: - return 0; - - case GZIP: - uncomp_size = b->uncomp_size; - uncomp = zlib_mem_inflate((char *)b->data, b->comp_size, &uncomp_size); - - if (!uncomp) - return -1; - if (uncomp_size != b->uncomp_size) { - free(uncomp); - return -1; - } - free(b->data); - b->data = (unsigned char *)uncomp; - b->alloc = uncomp_size; - b->method = RAW; - break; - -#ifdef HAVE_LIBBZ2 - case BZIP2: { - unsigned int usize = b->uncomp_size; - if (!(uncomp = malloc(usize))) - return -1; - if (BZ_OK != BZ2_bzBuffToBuffDecompress(uncomp, &usize, - (char *)b->data, b->comp_size, - 0, 0)) { - free(uncomp); - return -1; - } - free(b->data); - b->data = (unsigned char *)uncomp; - b->alloc = usize; - b->method = RAW; - b->uncomp_size = usize; // Just in case it differs - break; - } -#else - case BZIP2: - hts_log_error("Bzip2 compression is not compiled into this version. Please rebuild and try again"); - return -1; -#endif - -#ifdef HAVE_LIBLZMA - case LZMA: - uncomp = lzma_mem_inflate((char *)b->data, b->comp_size, &uncomp_size); - if (!uncomp) - return -1; - if (uncomp_size != b->uncomp_size) { - free(uncomp); - return -1; - } - free(b->data); - b->data = (unsigned char *)uncomp; - b->alloc = uncomp_size; - b->method = RAW; - break; -#else - case LZMA: - hts_log_error("Lzma compression is not compiled into this version. Please rebuild and try again"); - return -1; - break; -#endif - - case RANS: { - unsigned int usize = b->uncomp_size, usize2; - uncomp = (char *)rans_uncompress(b->data, b->comp_size, &usize2); - if (!uncomp) - return -1; - if (usize != usize2) { - free(uncomp); - return -1; - } - free(b->data); - b->data = (unsigned char *)uncomp; - b->alloc = usize2; - b->method = RAW; - b->uncomp_size = usize2; // Just in case it differs - //fprintf(stderr, "Expanded %d to %d\n", b->comp_size, b->uncomp_size); - break; - } - - case FQZ: { - uncomp_size = b->uncomp_size; - uncomp = fqz_decompress((char *)b->data, b->comp_size, &uncomp_size, NULL, 0); - if (!uncomp) - return -1; - free(b->data); - b->data = (unsigned char *)uncomp; - b->alloc = uncomp_size; - b->method = RAW; - b->uncomp_size = uncomp_size; - break; - } - - case RANS_PR0: { - unsigned int usize = b->uncomp_size, usize2; - uncomp = (char *)rans_uncompress_4x16(b->data, b->comp_size, &usize2); - if (!uncomp) - return -1; - if (usize != usize2) { - free(uncomp); - return -1; - } - b->orig_method = RANS_PR0 + (b->data[0]&1) - + 2*((b->data[0]&0x40)>0) + 4*((b->data[0]&0x80)>0); - free(b->data); - b->data = (unsigned char *)uncomp; - b->alloc = usize2; - b->method = RAW; - b->uncomp_size = usize2; // Just incase it differs - //fprintf(stderr, "Expanded %d to %d\n", b->comp_size, b->uncomp_size); - break; - } - - case ARITH_PR0: { - unsigned int usize = b->uncomp_size, usize2; - uncomp = (char *)arith_uncompress_to(b->data, b->comp_size, NULL, &usize2); - if (!uncomp) - return -1; - if (usize != usize2) { - free(uncomp); - return -1; - } - b->orig_method = ARITH_PR0 + (b->data[0]&1) - + 2*((b->data[0]&0x40)>0) + 4*((b->data[0]&0x80)>0); - free(b->data); - b->data = (unsigned char *)uncomp; - b->alloc = usize2; - b->method = RAW; - b->uncomp_size = usize2; // Just incase it differs - //fprintf(stderr, "Expanded %d to %d\n", b->comp_size, b->uncomp_size); - break; - } - - case TOK3: { - uint32_t out_len; - uint8_t *cp = tok3_decode_names(b->data, b->comp_size, &out_len); - if (!cp) - return -1; - b->orig_method = TOK3; - b->method = RAW; - free(b->data); - b->data = cp; - b->alloc = out_len; - b->uncomp_size = out_len; - break; - } - - default: - return -1; - } - - return 0; -} - -static char *cram_compress_by_method(cram_slice *s, char *in, size_t in_size, - int content_id, size_t *out_size, - enum cram_block_method_int method, - int level, int strat) { - switch (method) { - case GZIP: - case GZIP_RLE: - case GZIP_1: - // Read names bizarrely benefit from zlib over libdeflate for - // mid-range compression levels. Focusing purely of ratio or - // speed, libdeflate still wins. It also seems to win for - // other data series too. - // - // Eg RN at level 5; libdeflate=55.9MB zlib=51.6MB -#ifdef HAVE_LIBDEFLATE -# if (LIBDEFLATE_VERSION_MAJOR < 1 || (LIBDEFLATE_VERSION_MAJOR == 1 && LIBDEFLATE_VERSION_MINOR <= 8)) - if (content_id == DS_RN && level >= 4 && level <= 7) - return zlib_mem_deflate(in, in_size, out_size, level, strat); - else -# endif - return libdeflate_deflate(in, in_size, out_size, level, strat); -#else - return zlib_mem_deflate(in, in_size, out_size, level, strat); -#endif - - case BZIP2: { -#ifdef HAVE_LIBBZ2 - unsigned int comp_size = in_size*1.01 + 600; - char *comp = malloc(comp_size); - if (!comp) - return NULL; - - if (BZ_OK != BZ2_bzBuffToBuffCompress(comp, &comp_size, - in, in_size, - level, 0, 30)) { - free(comp); - return NULL; - } - *out_size = comp_size; - return comp; -#else - return NULL; -#endif - } - - case FQZ: - case FQZ_b: - case FQZ_c: - case FQZ_d: { - // Extract the necessary portion of the slice into an fqz_slice struct. - // These previously were the same thing, but this permits us to detach - // the codec from the rest of this CRAM implementation. - fqz_slice *f = malloc(2*s->hdr->num_records * sizeof(uint32_t) + sizeof(fqz_slice)); - if (!f) - return NULL; - f->num_records = s->hdr->num_records; - f->len = (uint32_t *)(((char *)f) + sizeof(fqz_slice)); - f->flags = f->len + s->hdr->num_records; - int i; - for (i = 0; i < s->hdr->num_records; i++) { - f->flags[i] = s->crecs[i].flags; - f->len[i] = (i+1 < s->hdr->num_records - ? s->crecs[i+1].qual - s->crecs[i].qual - : s->block[DS_QS]->uncomp_size - s->crecs[i].qual); - } - char *comp = fqz_compress(strat & 0xff /* cram vers */, f, - in, in_size, out_size, strat >> 8, NULL); - free(f); - return comp; - } - - case LZMA: -#ifdef HAVE_LIBLZMA - return lzma_mem_deflate(in, in_size, out_size, level); -#else - return NULL; -#endif - - case RANS0: - case RANS1: { - unsigned int out_size_i; - unsigned char *cp; - cp = rans_compress((unsigned char *)in, in_size, &out_size_i, - method == RANS0 ? 0 : 1); - *out_size = out_size_i; - return (char *)cp; - } - - case RANS_PR0: - case RANS_PR1: - case RANS_PR64: - case RANS_PR9: - case RANS_PR128: - case RANS_PR129: - case RANS_PR192: - case RANS_PR193: { - unsigned int out_size_i; - unsigned char *cp; - - // see enum cram_block. We map RANS_* methods to order bit-fields - static int methmap[] = { 1, 64,9, 128,129, 192,193 }; - - cp = rans_compress_4x16((unsigned char *)in, in_size, &out_size_i, - method == RANS_PR0 ? 0 : methmap[method - RANS_PR1]); - *out_size = out_size_i; - return (char *)cp; - } - - case ARITH_PR0: - case ARITH_PR1: - case ARITH_PR64: - case ARITH_PR9: - case ARITH_PR128: - case ARITH_PR129: - case ARITH_PR192: - case ARITH_PR193: { - unsigned int out_size_i; - unsigned char *cp; - - // see enum cram_block. We map ARITH_* methods to order bit-fields - static int methmap[] = { 1, 64,9, 128,129, 192,193 }; - - cp = arith_compress_to((unsigned char *)in, in_size, NULL, &out_size_i, - method == ARITH_PR0 ? 0 : methmap[method - ARITH_PR1]); - *out_size = out_size_i; - return (char *)cp; - } - - case TOK3: - case TOKA: { - int out_len; - int lev = level; - if (method == TOK3 && lev > 3) - lev = 3; - uint8_t *cp = tok3_encode_names(in, in_size, lev, strat, &out_len, NULL); - *out_size = out_len; - return (char *)cp; - } - - case RAW: - break; - - default: - return NULL; - } - - return NULL; -} - - -/* - * Compresses a block using one of two different zlib strategies. If we only - * want one choice set strat2 to be -1. - * - * The logic here is that sometimes Z_RLE does a better job than Z_FILTERED - * or Z_DEFAULT_STRATEGY on quality data. If so, we'd rather use it as it is - * significantly faster. - * - * Method and level -1 implies defaults, as specified in cram_fd. - */ -int cram_compress_block2(cram_fd *fd, cram_slice *s, - cram_block *b, cram_metrics *metrics, - int method, int level) { - - if (!b) - return 0; - - char *comp = NULL; - size_t comp_size = 0; - int strat; - - // Internally we have parameterised methods that externally map - // to the same CRAM method value. - // See enum_cram_block_method_int in cram_structs.h. - int methmap[] = { - // Externally defined values - RAW, GZIP, BZIP2, LZMA, RANS, RANSPR, ARITH, FQZ, TOK3, - - // Reserved for possible expansion - 0, 0, - - // Internally parameterised versions matching back to above - // external values - GZIP, GZIP, - FQZ, FQZ, FQZ, - RANS, - RANSPR, RANSPR, RANSPR, RANSPR, RANSPR, RANSPR, RANSPR, - TOK3, - ARITH, ARITH, ARITH, ARITH, ARITH, ARITH, ARITH, - }; - - if (b->method != RAW) { - // Maybe already compressed if s->block[0] was compressed and - // we have e.g. s->block[DS_BA] set to s->block[0] due to only - // one base type present and hence using E_HUFFMAN on block 0. - // A second explicit attempt to compress the same block then - // occurs. - return 0; - } - - if (method == -1) { - method = 1<use_bz2) - method |= 1<use_lzma) - method |= 1<level; - - //fprintf(stderr, "IN: block %d, sz %d\n", b->content_id, b->uncomp_size); - - if (method == RAW || level == 0 || b->uncomp_size == 0) { - b->method = RAW; - b->comp_size = b->uncomp_size; - //fprintf(stderr, "Skip block id %d\n", b->content_id); - return 0; - } - -#ifndef ABS -# define ABS(a) ((a)>=0?(a):-(a)) -#endif - - if (metrics) { - pthread_mutex_lock(&fd->metrics_lock); - // Sudden changes in size trigger a retrial. These are mainly - // triggered when switching to sorted / unsorted, where the number - // of elements in a slice radically changes. - // - // We also get large fluctuations based on genome coordinate for - // e.g. SA:Z and SC series, but we consider the typical scale of - // delta between blocks and use this to look for abnormality. - if (metrics->input_avg_sz && - (b->uncomp_size + 1000 > 4*(metrics->input_avg_sz+1000) || - b->uncomp_size + 1000 < (metrics->input_avg_sz+1000)/4) && - ABS(b->uncomp_size-metrics->input_avg_sz) - > 10*metrics->input_avg_delta) { - metrics->next_trial = 0; - } - - if (metrics->trial > 0 || --metrics->next_trial <= 0) { - int m, unpackable = metrics->unpackable; - size_t sz_best = b->uncomp_size; - size_t sz[CRAM_MAX_METHOD] = {0}; - int method_best = 0; // RAW - char *c_best = NULL, *c = NULL; - - metrics->input_avg_delta = - 0.9 * (metrics->input_avg_delta + - ABS(b->uncomp_size - metrics->input_avg_sz)); - - metrics->input_avg_sz += b->uncomp_size*.2; - metrics->input_avg_sz *= 0.8; - - if (metrics->revised_method) - method = metrics->revised_method; - else - metrics->revised_method = method; - - if (metrics->next_trial <= 0) { - metrics->next_trial = TRIAL_SPAN; - metrics->trial = NTRIALS; - for (m = 0; m < CRAM_MAX_METHOD; m++) - metrics->sz[m] /= 2; - metrics->unpackable = 0; - } - - // Compress this block using the best method - if (unpackable && CRAM_MAJOR_VERS(fd->version) > 3) { - // No point trying bit-pack if 17+ symbols. - if (method & (1<metrics_lock); - - for (m = 0; m < CRAM_MAX_METHOD; m++) { - if (method & (1u<version); break; - case FQZ_b: strat = CRAM_MAJOR_VERS(fd->version)+256; break; - case FQZ_c: strat = CRAM_MAJOR_VERS(fd->version)+2*256; break; - case FQZ_d: strat = CRAM_MAJOR_VERS(fd->version)+3*256; break; - case TOK3: strat = 0; break; - case TOKA: strat = 1; break; - default: strat = 0; - } - - c = cram_compress_by_method(s, (char *)b->data, b->uncomp_size, - b->content_id, &sz[m], m, lvl, strat); - - if (c && sz_best > sz[m]) { - sz_best = sz[m]; - method_best = m; - if (c_best) - free(c_best); - c_best = c; - } else if (c) { - free(c); - } else { - sz[m] = b->uncomp_size*2+1000; // arbitrarily worse than raw - } - } else { - sz[m] = b->uncomp_size*2+1000; // arbitrarily worse than raw - } - } - - if (c_best) { - free(b->data); - b->data = (unsigned char *)c_best; - b->method = method_best; // adjusted to methmap[method_best] later - b->comp_size = sz_best; - } - - // Accumulate stats for all methods tried - pthread_mutex_lock(&fd->metrics_lock); - for (m = 0; m < CRAM_MAX_METHOD; m++) - // don't be overly sure on small blocks. - // +2000 means eg bzip2 vs gzip (1.07 to 1.04) or gz vs rans1 - // needs to be at least 60 bytes smaller to overcome the - // fixed size addition. - metrics->sz[m] += sz[m]+2000; - - // When enough trials performed, find the best on average - if (--metrics->trial == 0) { - int best_method = RAW; - int best_sz = INT_MAX; - - // Relative costs of methods. See enum_cram_block_method_int - // and methmap - double meth_cost[32] = { - // Externally defined methods - 1, // 0 raw - 1.04, // 1 gzip (Z_FILTERED) - 1.07, // 2 bzip2 - 1.08, // 3 lzma - 1.00, // 4 rans (O0) - 1.00, // 5 ranspr (O0) - 1.04, // 6 arithpr (O0) - 1.05, // 7 fqz - 1.05, // 8 tok3 (rans) - 1.00, 1.00, // 9,10 reserved - - // Paramterised versions of above - 1.01, // gzip rle - 1.01, // gzip -1 - - 1.05, 1.05, 1.05, // FQZ_b,c,d - - 1.01, // rans O1 - - 1.01, // rans_pr1 - 1.00, // rans_pr64; if smaller, usually fast - 1.03, // rans_pr65/9 - 1.00, // rans_pr128 - 1.01, // rans_pr129 - 1.00, // rans_pr192 - 1.01, // rans_pr193 - - 1.07, // tok3 arith - - 1.04, // arith_pr1 - 1.04, // arith_pr64 - 1.04, // arith_pr9 - 1.03, // arith_pr128 - 1.04, // arith_pr129 - 1.04, // arith_pr192 - 1.04, // arith_pr193 - }; - - // Scale methods by cost based on compression level - if (fd->level <= 1) { - for (m = 0; m < CRAM_MAX_METHOD; m++) - metrics->sz[m] *= 1+(meth_cost[m]-1)*4; - } else if (fd->level <= 3) { - for (m = 0; m < CRAM_MAX_METHOD; m++) - metrics->sz[m] *= 1+(meth_cost[m]-1); - } else if (fd->level <= 6) { - for (m = 0; m < CRAM_MAX_METHOD; m++) - metrics->sz[m] *= 1+(meth_cost[m]-1)/2; - } else if (fd->level <= 7) { - for (m = 0; m < CRAM_MAX_METHOD; m++) - metrics->sz[m] *= 1+(meth_cost[m]-1)/3; - } // else cost is ignored - - // Ensure these are never used; BSC and ZSTD - metrics->sz[9] = metrics->sz[10] = INT_MAX; - - for (m = 0; m < CRAM_MAX_METHOD; m++) { - if ((!metrics->sz[m]) || (!(method & (1u< metrics->sz[m]) - best_sz = metrics->sz[m], best_method = m; - } - - if (best_method != metrics->method) { - //metrics->trial = (NTRIALS+1)/2; // be sure - //metrics->next_trial /= 1.5; - metrics->consistency = 0; - } else { - metrics->next_trial *= MIN(2, 1+metrics->consistency/4.0); - metrics->consistency++; - } - - metrics->method = best_method; - switch (best_method) { - case GZIP: strat = Z_FILTERED; break; - case GZIP_1: strat = Z_DEFAULT_STRATEGY; break; - case GZIP_RLE: strat = Z_RLE; break; - case FQZ: strat = CRAM_MAJOR_VERS(fd->version); break; - case FQZ_b: strat = CRAM_MAJOR_VERS(fd->version)+256; break; - case FQZ_c: strat = CRAM_MAJOR_VERS(fd->version)+2*256; break; - case FQZ_d: strat = CRAM_MAJOR_VERS(fd->version)+3*256; break; - case TOK3: strat = 0; break; - case TOKA: strat = 1; break; - default: strat = 0; - } - metrics->strat = strat; - - // If we see at least MAXFAIL trials in a row for a specific - // compression method with more than MAXDELTA aggregate - // size then we drop this from the list of methods used - // for this block type. -#define MAXDELTA 0.20 -#define MAXFAILS 4 - for (m = 0; m < CRAM_MAX_METHOD; m++) { - if (best_method == m) { - metrics->cnt[m] = 0; - metrics->extra[m] = 0; - } else if (best_sz < metrics->sz[m]) { - double r = (double)metrics->sz[m] / best_sz - 1; - int mul = 1+(fd->level>=7); - if (++metrics->cnt[m] >= MAXFAILS*mul && - (metrics->extra[m] += r) >= MAXDELTA*mul) - method &= ~(1u<sz[m] > best_sz) - method &= ~(1u<verbose > 1 && method != metrics->revised_method) - // fprintf(stderr, "%d: revising method from %x to %x\n", - // b->content_id, metrics->revised_method, method); - metrics->revised_method = method; - } - pthread_mutex_unlock(&fd->metrics_lock); - } else { - metrics->input_avg_delta = - 0.9 * (metrics->input_avg_delta + - ABS(b->uncomp_size - metrics->input_avg_sz)); - - metrics->input_avg_sz += b->uncomp_size*.2; - metrics->input_avg_sz *= 0.8; - - strat = metrics->strat; - method = metrics->method; - - pthread_mutex_unlock(&fd->metrics_lock); - comp = cram_compress_by_method(s, (char *)b->data, b->uncomp_size, - b->content_id, &comp_size, method, - method == GZIP_1 ? 1 : level, - strat); - if (!comp) - return -1; - - if (comp_size < b->uncomp_size) { - free(b->data); - b->data = (unsigned char *)comp; - b->comp_size = comp_size; - b->method = method; - } else { - free(comp); - } - } - - } else { - // no cached metrics, so just do zlib? - comp = cram_compress_by_method(s, (char *)b->data, b->uncomp_size, - b->content_id, &comp_size, GZIP, level, Z_FILTERED); - if (!comp) { - hts_log_error("Compression failed!"); - return -1; - } - - if (comp_size < b->uncomp_size) { - free(b->data); - b->data = (unsigned char *)comp; - b->comp_size = comp_size; - b->method = GZIP; - } else { - free(comp); - } - strat = Z_FILTERED; - } - - hts_log_info("Compressed block ID %d from %d to %d by method %s", - b->content_id, b->uncomp_size, b->comp_size, - cram_block_method2str(b->method)); - - b->method = methmap[b->method]; - - return 0; -} -int cram_compress_block(cram_fd *fd, cram_block *b, cram_metrics *metrics, - int method, int level) { - return cram_compress_block2(fd, NULL, b, metrics, method, level); -} - -cram_metrics *cram_new_metrics(void) { - cram_metrics *m = calloc(1, sizeof(*m)); - if (!m) - return NULL; - m->trial = NTRIALS-1; - m->next_trial = TRIAL_SPAN/2; // learn quicker at start - m->method = RAW; - m->strat = 0; - m->revised_method = 0; - m->unpackable = 0; - - return m; -} - -char *cram_block_method2str(enum cram_block_method_int m) { - switch(m) { - case RAW: return "RAW"; - case GZIP: return "GZIP"; - case BZIP2: return "BZIP2"; - case LZMA: return "LZMA"; - case RANS0: return "RANS0"; - case RANS1: return "RANS1"; - case GZIP_RLE: return "GZIP_RLE"; - case GZIP_1: return "GZIP_1"; - case FQZ: return "FQZ"; - case FQZ_b: return "FQZ_b"; - case FQZ_c: return "FQZ_c"; - case FQZ_d: return "FQZ_d"; - case RANS_PR0: return "RANS_PR0"; - case RANS_PR1: return "RANS_PR1"; - case RANS_PR64: return "RANS_PR64"; - case RANS_PR9: return "RANS_PR9"; - case RANS_PR128: return "RANS_PR128"; - case RANS_PR129: return "RANS_PR129"; - case RANS_PR192: return "RANS_PR192"; - case RANS_PR193: return "RANS_PR193"; - case TOK3: return "TOK3_R"; - case TOKA: return "TOK3_A"; - case ARITH_PR0: return "ARITH_PR0"; - case ARITH_PR1: return "ARITH_PR1"; - case ARITH_PR64: return "ARITH_PR64"; - case ARITH_PR9: return "ARITH_PR9"; - case ARITH_PR128: return "ARITH_PR128"; - case ARITH_PR129: return "ARITH_PR129"; - case ARITH_PR192: return "ARITH_PR192"; - case ARITH_PR193: return "ARITH_PR193"; - case BM_ERROR: break; - } - return "?"; -} - -char *cram_content_type2str(enum cram_content_type t) { - switch (t) { - case FILE_HEADER: return "FILE_HEADER"; - case COMPRESSION_HEADER: return "COMPRESSION_HEADER"; - case MAPPED_SLICE: return "MAPPED_SLICE"; - case UNMAPPED_SLICE: return "UNMAPPED_SLICE"; - case EXTERNAL: return "EXTERNAL"; - case CORE: return "CORE"; - case CT_ERROR: break; - } - return "?"; -} - -/* ---------------------------------------------------------------------- - * Reference sequence handling - * - * These revolve around the refs_t structure, which may potentially be - * shared between multiple cram_fd. - * - * We start with refs_create() to allocate an empty refs_t and then - * populate it with @SQ line data using refs_from_header(). This is done on - * cram_open(). Also at start up we can call cram_load_reference() which - * is used with "scramble -r foo.fa". This replaces the fd->refs with the - * new one specified. In either case refs2id() is then called which - * maps ref_entry names to @SQ ids (refs_t->ref_id[]). - * - * Later, possibly within a thread, we will want to know the actual ref - * seq itself, obtained by calling cram_get_ref(). This may use the - * UR: or M5: fields or the filename specified in the original - * cram_load_reference() call. - * - * Given the potential for multi-threaded reference usage, we have - * reference counting (sorry for the confusing double use of "ref") to - * track the number of callers interested in any specific reference. - */ - -/* - * Frees/unmaps a reference sequence and associated file handles. - */ -static void ref_entry_free_seq(ref_entry *e) { - if (e->mf) - mfclose(e->mf); - if (e->seq && !e->mf) - free(e->seq); - - e->seq = NULL; - e->mf = NULL; -} - -void refs_free(refs_t *r) { - RP("refs_free()\n"); - - if (--r->count > 0) - return; - - if (!r) - return; - - if (r->pool) - string_pool_destroy(r->pool); - - if (r->h_meta) { - khint_t k; - - for (k = kh_begin(r->h_meta); k != kh_end(r->h_meta); k++) { - ref_entry *e; - - if (!kh_exist(r->h_meta, k)) - continue; - if (!(e = kh_val(r->h_meta, k))) - continue; - ref_entry_free_seq(e); - free(e); - } - - kh_destroy(refs, r->h_meta); - } - - if (r->ref_id) - free(r->ref_id); - - if (r->fp) - bgzf_close(r->fp); - - pthread_mutex_destroy(&r->lock); - - free(r); -} - -static refs_t *refs_create(void) { - refs_t *r = calloc(1, sizeof(*r)); - - RP("refs_create()\n"); - - if (!r) - return NULL; - - if (!(r->pool = string_pool_create(8192))) - goto err; - - r->ref_id = NULL; // see refs2id() to populate. - r->count = 1; - r->last = NULL; - r->last_id = -1; - - if (!(r->h_meta = kh_init(refs))) - goto err; - - pthread_mutex_init(&r->lock, NULL); - - return r; - - err: - refs_free(r); - return NULL; -} - -/* - * Opens a reference fasta file as a BGZF stream, allowing for - * compressed files. It automatically builds a .fai file if - * required and if compressed a .gzi bgzf index too. - * - * Returns a BGZF handle on success; - * NULL on failure. - */ -static BGZF *bgzf_open_ref(char *fn, char *mode, int is_md5) { - BGZF *fp; - - if (!is_md5 && !hisremote(fn)) { - char fai_file[PATH_MAX]; - - snprintf(fai_file, PATH_MAX, "%s.fai", fn); - if (access(fai_file, R_OK) != 0) - if (fai_build(fn) != 0) - return NULL; - } - - if (!(fp = bgzf_open(fn, mode))) { - perror(fn); - return NULL; - } - - if (fp->is_compressed == 1 && bgzf_index_load(fp, fn, ".gzi") < 0) { - hts_log_error("Unable to load .gzi index '%s.gzi'", fn); - bgzf_close(fp); - return NULL; - } - - return fp; -} - -/* - * Loads a FAI file for a reference.fasta. - * "is_err" indicates whether failure to load is worthy of emitting an - * error message. In some cases (eg with embedded references) we - * speculatively load, just in case, and silently ignore errors. - * - * Returns the refs_t struct on success (maybe newly allocated); - * NULL on failure - */ -static refs_t *refs_load_fai(refs_t *r_orig, const char *fn, int is_err) { - hFILE *fp = NULL; - char fai_fn[PATH_MAX]; - char line[8192]; - refs_t *r = r_orig; - size_t fn_l = strlen(fn); - int id = 0, id_alloc = 0; - - RP("refs_load_fai %s\n", fn); - - if (!r) - if (!(r = refs_create())) - goto err; - - if (r->fp) - if (bgzf_close(r->fp) != 0) - goto err; - r->fp = NULL; - - /* Look for a FASTA##idx##FAI format */ - char *fn_delim = strstr(fn, HTS_IDX_DELIM); - if (fn_delim) { - if (!(r->fn = string_ndup(r->pool, fn, fn_delim - fn))) - goto err; - fn_delim += strlen(HTS_IDX_DELIM); - snprintf(fai_fn, PATH_MAX, "%s", fn_delim); - } else { - /* An index file was provided, instead of the actual reference file */ - if (fn_l > 4 && strcmp(&fn[fn_l-4], ".fai") == 0) { - if (!r->fn) { - if (!(r->fn = string_ndup(r->pool, fn, fn_l-4))) - goto err; - } - snprintf(fai_fn, PATH_MAX, "%s", fn); - } else { - /* Only the reference file provided. Get the index file name from it */ - if (!(r->fn = string_dup(r->pool, fn))) - goto err; - snprintf(fai_fn, PATH_MAX, "%.*s.fai", PATH_MAX-5, fn); - } - } - - if (!(r->fp = bgzf_open_ref(r->fn, "r", 0))) { - hts_log_error("Failed to open reference file '%s'", r->fn); - goto err; - } - - if (!(fp = hopen(fai_fn, "r"))) { - hts_log_error("Failed to open index file '%s'", fai_fn); - if (is_err) - perror(fai_fn); - goto err; - } - while (hgets(line, 8192, fp) != NULL) { - ref_entry *e = malloc(sizeof(*e)); - char *cp; - int n; - khint_t k; - - if (!e) - return NULL; - - // id - for (cp = line; *cp && !isspace_c(*cp); cp++) - ; - *cp++ = 0; - e->name = string_dup(r->pool, line); - - // length - while (*cp && isspace_c(*cp)) - cp++; - e->length = strtoll(cp, &cp, 10); - - // offset - while (*cp && isspace_c(*cp)) - cp++; - e->offset = strtoll(cp, &cp, 10); - - // bases per line - while (*cp && isspace_c(*cp)) - cp++; - e->bases_per_line = strtol(cp, &cp, 10); - - // line length - while (*cp && isspace_c(*cp)) - cp++; - e->line_length = strtol(cp, &cp, 10); - - // filename - e->fn = r->fn; - - e->count = 0; - e->seq = NULL; - e->mf = NULL; - e->is_md5 = 0; - e->validated_md5 = 0; - - k = kh_put(refs, r->h_meta, e->name, &n); - if (-1 == n) { - free(e); - return NULL; - } - - if (n) { - kh_val(r->h_meta, k) = e; - } else { - ref_entry *re = kh_val(r->h_meta, k); - if (re && (re->count != 0 || re->length != 0)) { - /* Keep old */ - free(e); - } else { - /* Replace old */ - if (re) - free(re); - kh_val(r->h_meta, k) = e; - } - } - - if (id >= id_alloc) { - ref_entry **new_refs; - int x; - - id_alloc = id_alloc ?id_alloc*2 : 16; - new_refs = realloc(r->ref_id, id_alloc * sizeof(*r->ref_id)); - if (!new_refs) - goto err; - r->ref_id = new_refs; - - for (x = id; x < id_alloc; x++) - r->ref_id[x] = NULL; - } - r->ref_id[id] = e; - r->nref = ++id; - } - - if(hclose(fp) < 0) - goto err; - return r; - - err: - if (fp) - hclose_abruptly(fp); - - if (!r_orig) - refs_free(r); - - return NULL; -} - -/* - * Verifies that the CRAM @SQ lines and .fai files match. - */ -static void sanitise_SQ_lines(cram_fd *fd) { - int i; - - if (!fd->header || !fd->header->hrecs) - return; - - if (!fd->refs || !fd->refs->h_meta) - return; - - for (i = 0; i < fd->header->hrecs->nref; i++) { - const char *name = fd->header->hrecs->ref[i].name; - khint_t k = kh_get(refs, fd->refs->h_meta, name); - ref_entry *r; - - // We may have @SQ lines which have no known .fai, but do not - // in themselves pose a problem because they are unused in the file. - if (k == kh_end(fd->refs->h_meta)) - continue; - - if (!(r = (ref_entry *)kh_val(fd->refs->h_meta, k))) - continue; - - if (r->length && r->length != fd->header->hrecs->ref[i].len) { - assert(strcmp(r->name, fd->header->hrecs->ref[i].name) == 0); - - // Should we also check MD5sums here to ensure the correct - // reference was given? - hts_log_warning("Header @SQ length mismatch for ref %s, %"PRIhts_pos" vs %d", - r->name, fd->header->hrecs->ref[i].len, (int)r->length); - - // Fixing the parsed @SQ header will make MD:Z: strings work - // and also stop it producing N for the sequence. - fd->header->hrecs->ref[i].len = r->length; - } - } -} - -/* - * Indexes references by the order they appear in a BAM file. This may not - * necessarily be the same order they appear in the fasta reference file. - * - * Returns 0 on success - * -1 on failure - */ -int refs2id(refs_t *r, sam_hdr_t *hdr) { - int i; - sam_hrecs_t *h = hdr->hrecs; - - if (r->ref_id) - free(r->ref_id); - if (r->last) - r->last = NULL; - - r->ref_id = calloc(h->nref, sizeof(*r->ref_id)); - if (!r->ref_id) - return -1; - - r->nref = h->nref; - for (i = 0; i < h->nref; i++) { - khint_t k = kh_get(refs, r->h_meta, h->ref[i].name); - if (k != kh_end(r->h_meta)) { - r->ref_id[i] = kh_val(r->h_meta, k); - } else { - hts_log_warning("Unable to find ref name '%s'", h->ref[i].name); - } - } - - return 0; -} - -/* - * Generates refs_t entries based on @SQ lines in the header. - * Returns 0 on success - * -1 on failure - */ -static int refs_from_header(cram_fd *fd) { - if (!fd) - return -1; - - refs_t *r = fd->refs; - if (!r) - return -1; - - sam_hdr_t *h = fd->header; - if (!h) - return 0; - - if (!h->hrecs) { - if (-1 == sam_hdr_fill_hrecs(h)) - return -1; - } - - if (h->hrecs->nref == 0) - return 0; - - //fprintf(stderr, "refs_from_header for %p mode %c\n", fd, fd->mode); - - /* Existing refs are fine, as long as they're compatible with the hdr. */ - ref_entry **new_ref_id = realloc(r->ref_id, (r->nref + h->hrecs->nref) * sizeof(*r->ref_id)); - if (!new_ref_id) - return -1; - r->ref_id = new_ref_id; - - int i, j; - /* Copy info from h->ref[i] over to r */ - for (i = 0, j = r->nref; i < h->hrecs->nref; i++) { - sam_hrec_type_t *ty; - sam_hrec_tag_t *tag; - khint_t k; - int n; - - k = kh_get(refs, r->h_meta, h->hrecs->ref[i].name); - if (k != kh_end(r->h_meta)) - // Ref already known about - continue; - - if (!(r->ref_id[j] = calloc(1, sizeof(ref_entry)))) - return -1; - - if (!h->hrecs->ref[i].name) - return -1; - - r->ref_id[j]->name = string_dup(r->pool, h->hrecs->ref[i].name); - if (!r->ref_id[j]->name) return -1; - r->ref_id[j]->length = 0; // marker for not yet loaded - - /* Initialise likely filename if known */ - if ((ty = sam_hrecs_find_type_id(h->hrecs, "SQ", "SN", h->hrecs->ref[i].name))) { - if ((tag = sam_hrecs_find_key(ty, "M5", NULL))) { - r->ref_id[j]->fn = string_dup(r->pool, tag->str+3); - //fprintf(stderr, "Tagging @SQ %s / %s\n", r->ref_id[h]->name, r->ref_id[h]->fn); - } - } - - k = kh_put(refs, r->h_meta, r->ref_id[j]->name, &n); - if (n <= 0) // already exists or error - return -1; - kh_val(r->h_meta, k) = r->ref_id[j]; - - j++; - } - r->nref = j; - - return 0; -} - -/* - * Attaches a header to a cram_fd. - * - * This should be used when creating a new cram_fd for writing where - * we have a header already constructed (eg from a file we've read - * in). - */ -int cram_set_header2(cram_fd *fd, const sam_hdr_t *hdr) { - if (!fd || !hdr ) - return -1; - - if (fd->header != hdr) { - if (fd->header) - sam_hdr_destroy(fd->header); - fd->header = sam_hdr_dup(hdr); - if (!fd->header) - return -1; - } - return refs_from_header(fd); -} - -int cram_set_header(cram_fd *fd, sam_hdr_t *hdr) { - return cram_set_header2(fd, hdr); -} - -/* - * Returns whether the path refers to a directory. - */ -static int is_directory(char *fn) { - struct stat buf; - if ( stat(fn,&buf) ) return 0; - return S_ISDIR(buf.st_mode); -} - -/* - * Converts a directory and a filename into an expanded path, replacing %s - * in directory with the filename and %[0-9]+s with portions of the filename - * Any remaining parts of filename are added to the end with /%s. - */ -static int expand_cache_path(char *path, char *dir, const char *fn) { - char *cp, *start = path; - size_t len; - size_t sz = PATH_MAX; - - while ((cp = strchr(dir, '%'))) { - if (cp-dir >= sz) return -1; - strncpy(path, dir, cp-dir); - path += cp-dir; - sz -= cp-dir; - - if (*++cp == 's') { - len = strlen(fn); - if (len >= sz) return -1; - strcpy(path, fn); - path += len; - sz -= len; - fn += len; - cp++; - } else if (*cp >= '0' && *cp <= '9') { - char *endp; - long l; - - l = strtol(cp, &endp, 10); - l = MIN(l, strlen(fn)); - if (*endp == 's') { - if (l >= sz) return -1; - strncpy(path, fn, l); - path += l; - fn += l; - sz -= l; - *path = 0; - cp = endp+1; - } else { - if (sz < 3) return -1; - *path++ = '%'; - *path++ = *cp++; - } - } else { - if (sz < 3) return -1; - *path++ = '%'; - *path++ = *cp++; - } - dir = cp; - } - - len = strlen(dir); - if (len >= sz) return -1; - strcpy(path, dir); - path += len; - sz -= len; - - len = strlen(fn) + ((*fn && path > start && path[-1] != '/') ? 1 : 0); - if (len >= sz) return -1; - if (*fn && path > start && path[-1] != '/') - *path++ = '/'; - strcpy(path, fn); - return 0; -} - -/* - * Make the directory containing path and any prefix directories. - */ -static void mkdir_prefix(char *path, int mode) { - char *cp = strrchr(path, '/'); - if (!cp) - return; - - *cp = 0; - if (is_directory(path)) { - *cp = '/'; - return; - } - - if (mkdir(path, mode) == 0) { - chmod(path, mode); - *cp = '/'; - return; - } - - mkdir_prefix(path, mode); - mkdir(path, mode); - chmod(path, mode); - *cp = '/'; -} - -/* - * Return the cache directory to use, based on the first of these - * environment variables to be set to a non-empty value. - */ -static const char *get_cache_basedir(const char **extra) { - char *base; - - *extra = ""; - - base = getenv("XDG_CACHE_HOME"); - if (base && *base) return base; - - base = getenv("HOME"); - if (base && *base) { *extra = "/.cache"; return base; } - - base = getenv("TMPDIR"); - if (base && *base) return base; - - base = getenv("TEMP"); - if (base && *base) return base; - - return "/tmp"; -} - -/* - * Queries the M5 string from the header and attempts to populate the - * reference from this using the REF_PATH environment. - * - * Returns 0 on success - * -1 on failure - */ -static int cram_populate_ref(cram_fd *fd, int id, ref_entry *r) { - char *ref_path = getenv("REF_PATH"); - sam_hrec_type_t *ty; - sam_hrec_tag_t *tag; - char path[PATH_MAX]; - kstring_t path_tmp = KS_INITIALIZE; - char cache[PATH_MAX], cache_root[PATH_MAX]; - char *local_cache = getenv("REF_CACHE"); - mFILE *mf; - int local_path = 0; - - hts_log_info("Running cram_populate_ref on fd %p, id %d", (void *)fd, id); - - cache_root[0] = '\0'; - - if (!ref_path || *ref_path == '\0') { - /* - * If we have no ref path, we use the EBI server. - * However to avoid spamming it we require a local ref cache too. - */ - ref_path = "https://www.ebi.ac.uk/ena/cram/md5/%s"; - if (!local_cache || *local_cache == '\0') { - const char *extra; - const char *base = get_cache_basedir(&extra); - snprintf(cache_root, PATH_MAX, "%s%s/hts-ref", base, extra); - snprintf(cache,PATH_MAX, "%s%s/hts-ref/%%2s/%%2s/%%s", base, extra); - local_cache = cache; - hts_log_info("Populating local cache: %s", local_cache); - } - } - - if (!r->name) - return -1; - - if (!(ty = sam_hrecs_find_type_id(fd->header->hrecs, "SQ", "SN", r->name))) - return -1; - - if (!(tag = sam_hrecs_find_key(ty, "M5", NULL))) - goto no_M5; - - hts_log_info("Querying ref %s", tag->str+3); - - /* Use cache if available */ - if (local_cache && *local_cache) { - if (expand_cache_path(path, local_cache, tag->str+3) == 0) - local_path = 1; - } - -#ifndef HAVE_MMAP - char *path2; - /* Search local files in REF_PATH; we can open them and return as above */ - if (!local_path && (path2 = find_path(tag->str+3, ref_path))) { - int len = snprintf(path, PATH_MAX, "%s", path2); - free(path2); - if (len > 0 && len < PATH_MAX) // in case it's too long - local_path = 1; - } -#endif - - /* Found via REF_CACHE or local REF_PATH file */ - if (local_path) { - struct stat sb; - BGZF *fp; - - if (0 == stat(path, &sb) - && S_ISREG(sb.st_mode) - && (fp = bgzf_open(path, "r"))) { - r->length = sb.st_size; - r->offset = r->line_length = r->bases_per_line = 0; - - r->fn = string_dup(fd->refs->pool, path); - - if (fd->refs->fp) - if (bgzf_close(fd->refs->fp) != 0) - return -1; - fd->refs->fp = fp; - fd->refs->fn = r->fn; - r->is_md5 = 1; - r->validated_md5 = 1; - - // Fall back to cram_get_ref() where it'll do the actual - // reading of the file. - return 0; - } - } - - - /* Otherwise search full REF_PATH; slower as loads entire file */ - if ((mf = open_path_mfile(tag->str+3, ref_path, NULL))) { - size_t sz; - r->seq = mfsteal(mf, &sz); - if (r->seq) { - r->mf = NULL; - } else { - // keep mf around as we couldn't detach - r->seq = mf->data; - r->mf = mf; - } - r->length = sz; - r->is_md5 = 1; - r->validated_md5 = 1; - } else { - refs_t *refs; - const char *fn; - - no_M5: - /* Failed to find in search path or M5 cache, see if @SQ UR: tag? */ - if (!(tag = sam_hrecs_find_key(ty, "UR", NULL))) - return -1; - - fn = (strncmp(tag->str+3, "file:", 5) == 0) - ? tag->str+8 - : tag->str+3; - - if (fd->refs->fp) { - if (bgzf_close(fd->refs->fp) != 0) - return -1; - fd->refs->fp = NULL; - } - if (!(refs = refs_load_fai(fd->refs, fn, 0))) - return -1; - sanitise_SQ_lines(fd); - - fd->refs = refs; - if (fd->refs->fp) { - if (bgzf_close(fd->refs->fp) != 0) - return -1; - fd->refs->fp = NULL; - } - - if (!fd->refs->fn) - return -1; - - if (-1 == refs2id(fd->refs, fd->header)) - return -1; - if (!fd->refs->ref_id || !fd->refs->ref_id[id]) - return -1; - - // Local copy already, so fall back to cram_get_ref(). - return 0; - } - - /* Populate the local disk cache if required */ - if (local_cache && *local_cache) { - hFILE *fp; - - if (*cache_root && !is_directory(cache_root)) { - hts_log_warning("Creating reference cache directory %s\n" - "This may become large; see the samtools(1) manual page REF_CACHE discussion", - cache_root); - } - - if (expand_cache_path(path, local_cache, tag->str+3) < 0) { - return 0; // Not fatal - we have the data already so keep going. - } - hts_log_info("Writing cache file '%s'", path); - mkdir_prefix(path, 01777); - - fp = hts_open_tmpfile(path, "wx", &path_tmp); - if (!fp) { - perror(path_tmp.s); - free(path_tmp.s); - - // Not fatal - we have the data already so keep going. - return 0; - } - - // Check md5sum - hts_md5_context *md5; - char unsigned md5_buf1[16]; - char md5_buf2[33]; - - if (!(md5 = hts_md5_init())) { - hclose_abruptly(fp); - unlink(path_tmp.s); - free(path_tmp.s); - return -1; - } - hts_md5_update(md5, r->seq, r->length); - hts_md5_final(md5_buf1, md5); - hts_md5_destroy(md5); - hts_md5_hex(md5_buf2, md5_buf1); - - if (strncmp(tag->str+3, md5_buf2, 32) != 0) { - hts_log_error("Mismatching md5sum for downloaded reference"); - hclose_abruptly(fp); - unlink(path_tmp.s); - free(path_tmp.s); - return -1; - } - - ssize_t length_written = hwrite(fp, r->seq, r->length); - if (hclose(fp) < 0 || length_written != r->length || - chmod(path_tmp.s, 0444) < 0 || - rename(path_tmp.s, path) < 0) { - hts_log_error("Creating reference at %s failed: %s", - path, strerror(errno)); - unlink(path_tmp.s); - } - } - - free(path_tmp.s); - return 0; -} - -static void cram_ref_incr_locked(refs_t *r, int id) { - RP("%d INC REF %d, %d %p\n", gettid(), id, - (int)(id>=0 && r->ref_id[id]?r->ref_id[id]->count+1:-999), - id>=0 && r->ref_id[id]?r->ref_id[id]->seq:(char *)1); - - if (id < 0 || !r->ref_id[id] || !r->ref_id[id]->seq) - return; - - if (r->last_id == id) - r->last_id = -1; - - ++r->ref_id[id]->count; -} - -void cram_ref_incr(refs_t *r, int id) { - pthread_mutex_lock(&r->lock); - cram_ref_incr_locked(r, id); - pthread_mutex_unlock(&r->lock); -} - -static void cram_ref_decr_locked(refs_t *r, int id) { - RP("%d DEC REF %d, %d %p\n", gettid(), id, - (int)(id>=0 && r->ref_id[id]?r->ref_id[id]->count-1:-999), - id>=0 && r->ref_id[id]?r->ref_id[id]->seq:(char *)1); - - if (id < 0 || !r->ref_id[id] || !r->ref_id[id]->seq) { - return; - } - - if (--r->ref_id[id]->count <= 0) { - assert(r->ref_id[id]->count == 0); - if (r->last_id >= 0) { - if (r->ref_id[r->last_id]->count <= 0 && - r->ref_id[r->last_id]->seq) { - RP("%d FREE REF %d (%p)\n", gettid(), - r->last_id, r->ref_id[r->last_id]->seq); - ref_entry_free_seq(r->ref_id[r->last_id]); - if (r->ref_id[r->last_id]->is_md5) r->ref_id[r->last_id]->length = 0; - } - } - r->last_id = id; - } -} - -void cram_ref_decr(refs_t *r, int id) { - pthread_mutex_lock(&r->lock); - cram_ref_decr_locked(r, id); - pthread_mutex_unlock(&r->lock); -} - -/* - * Used by cram_ref_load and cram_get_ref. The file handle will have - * already been opened, so we can catch it. The ref_entry *e informs us - * of whether this is a multi-line fasta file or a raw MD5 style file. - * Either way we create a single contiguous sequence. - * - * Returns all or part of a reference sequence on success (malloced); - * NULL on failure. - */ -static char *load_ref_portion(BGZF *fp, ref_entry *e, int start, int end) { - off_t offset, len; - char *seq; - - if (end < start) - end = start; - - /* - * Compute locations in file. This is trivial for the MD5 files, but - * is still necessary for the fasta variants. - * - * Note the offset here, as with faidx, has the assumption that white- - * space (the diff between line_length and bases_per_line) only occurs - * at the end of a line of text. - */ - offset = e->line_length - ? e->offset + (start-1)/e->bases_per_line * e->line_length + - (start-1) % e->bases_per_line - : start-1; - - len = (e->line_length - ? e->offset + (end-1)/e->bases_per_line * e->line_length + - (end-1) % e->bases_per_line - : end-1) - offset + 1; - - if (bgzf_useek(fp, offset, SEEK_SET) < 0) { - perror("bgzf_useek() on reference file"); - return NULL; - } - - if (len == 0 || !(seq = malloc(len))) { - return NULL; - } - - if (len != bgzf_read(fp, seq, len)) { - perror("bgzf_read() on reference file"); - free(seq); - return NULL; - } - - /* Strip white-space if required. */ - if (len != end-start+1) { - hts_pos_t i, j; - char *cp = seq; - char *cp_to; - - // Copy up to the first white-space, and then repeatedly just copy - // bases_per_line verbatim, and use the slow method to end again. - // - // This may seem excessive, but this code can be a significant - // portion of total CRAM decode CPU time for shallow data sets. - for (i = j = 0; i < len; i++) { - if (!isspace_c(cp[i])) - cp[j++] = cp[i] & ~0x20; - else - break; - } - while (i < len && isspace_c(cp[i])) - i++; - while (i < len - e->line_length) { - hts_pos_t j_end = j + e->bases_per_line; - while (j < j_end) - cp[j++] = cp[i++] & ~0x20; // toupper equiv - i += e->line_length - e->bases_per_line; - } - for (; i < len; i++) { - if (!isspace_c(cp[i])) - cp[j++] = cp[i] & ~0x20; - } - - cp_to = cp+j; - - if (cp_to - seq != end-start+1) { - hts_log_error("Malformed reference file"); - free(seq); - return NULL; - } - } else { - int i; - for (i = 0; i < len; i++) { - seq[i] = toupper_c(seq[i]); - } - } - - return seq; -} - -/* - * Load the entire reference 'id'. - * This also increments the reference count by 1. - * - * Returns ref_entry on success; - * NULL on failure - */ -ref_entry *cram_ref_load(refs_t *r, int id, int is_md5) { - ref_entry *e = r->ref_id[id]; - int start = 1, end = e->length; - char *seq; - - if (e->seq) { - return e; - } - - assert(e->count == 0); - - if (r->last) { -#ifdef REF_DEBUG - int idx = 0; - for (idx = 0; idx < r->nref; idx++) - if (r->last == r->ref_id[idx]) - break; - RP("%d cram_ref_load DECR %d\n", gettid(), idx); -#endif - assert(r->last->count > 0); - if (--r->last->count <= 0) { - RP("%d FREE REF %d (%p)\n", gettid(), id, r->ref_id[id]->seq); - if (r->last->seq) - ref_entry_free_seq(r->last); - } - } - - if (!r->fn) - return NULL; - - /* Open file if it's not already the current open reference */ - if (strcmp(r->fn, e->fn) || r->fp == NULL) { - if (r->fp) - if (bgzf_close(r->fp) != 0) - return NULL; - r->fn = e->fn; - if (!(r->fp = bgzf_open_ref(r->fn, "r", is_md5))) - return NULL; - } - - RP("%d Loading ref %d (%d..%d)\n", gettid(), id, start, end); - - if (!(seq = load_ref_portion(r->fp, e, start, end))) { - return NULL; - } - - RP("%d Loaded ref %d (%d..%d) = %p\n", gettid(), id, start, end, seq); - - RP("%d INC REF %d, %"PRId64"\n", gettid(), id, (e->count+1)); - e->seq = seq; - e->mf = NULL; - e->count++; - - /* - * Also keep track of last used ref so incr/decr loops on the same - * sequence don't cause load/free loops. - */ - RP("%d cram_ref_load INCR %d => %"PRId64"\n", gettid(), id, e->count+1); - r->last = e; - e->count++; - - return e; -} - -/* - * Returns a portion of a reference sequence from start to end inclusive. - * The returned pointer is owned by either the cram_file fd or by the - * internal refs_t structure and should not be freed by the caller. - * - * The difference is whether or not this refs_t is in use by just the one - * cram_fd or by multiples, or whether we have multiple threads accessing - * references. In either case fd->shared will be true and we start using - * reference counting to track the number of users of a specific reference - * sequence. - * - * Otherwise the ref seq returned is allocated as part of cram_fd itself - * and will be freed up on the next call to cram_get_ref or cram_close. - * - * To return the entire reference sequence, specify start as 1 and end - * as 0. - * - * To cease using a reference, call cram_ref_decr(). - * - * Returns reference on success, - * NULL on failure - */ -char *cram_get_ref(cram_fd *fd, int id, int start, int end) { - ref_entry *r; - char *seq; - int ostart = start; - - if (id == -1 || start < 1) - return NULL; - - /* FIXME: axiomatic query of r->seq being true? - * Or shortcut for unsorted data where we load once and never free? - */ - - //fd->shared_ref = 1; // hard code for now to simplify things - - pthread_mutex_lock(&fd->ref_lock); - - RP("%d cram_get_ref on fd %p, id %d, range %d..%d\n", gettid(), fd, id, start, end); - - /* - * Unsorted data implies we want to fetch an entire reference at a time. - * We just deal with this at the moment by claiming we're sharing - * references instead, which has the same requirement. - */ - if (fd->unsorted) - fd->shared_ref = 1; - - - /* Sanity checking: does this ID exist? */ - if (id >= fd->refs->nref) { - hts_log_error("No reference found for id %d", id); - pthread_mutex_unlock(&fd->ref_lock); - return NULL; - } - - if (!fd->refs || !fd->refs->ref_id[id]) { - hts_log_error("No reference found for id %d", id); - pthread_mutex_unlock(&fd->ref_lock); - return NULL; - } - - if (!(r = fd->refs->ref_id[id])) { - hts_log_error("No reference found for id %d", id); - pthread_mutex_unlock(&fd->ref_lock); - return NULL; - } - - - /* - * It has an entry, but may not have been populated yet. - * Any manually loaded .fai files have their lengths known. - * A ref entry computed from @SQ lines (M5 or UR field) will have - * r->length == 0 unless it's been loaded once and verified that we have - * an on-disk filename for it. - * - * 19 Sep 2013: Moved the lock here as the cram_populate_ref code calls - * open_path_mfile and libcurl, which isn't multi-thread safe unless I - * rewrite my code to have one curl handle per thread. - */ - pthread_mutex_lock(&fd->refs->lock); - if (r->length == 0) { - if (fd->ref_fn) - hts_log_warning("Reference file given, but ref '%s' not present", - r->name); - if (cram_populate_ref(fd, id, r) == -1) { - hts_log_warning("Failed to populate reference for id %d", id); - pthread_mutex_unlock(&fd->refs->lock); - pthread_mutex_unlock(&fd->ref_lock); - return NULL; - } - r = fd->refs->ref_id[id]; - if (fd->unsorted) - cram_ref_incr_locked(fd->refs, id); - } - - - /* - * We now know that we the filename containing the reference, so check - * for limits. If it's over half the reference we'll load all of it in - * memory as this will speed up subsequent calls. - */ - if (end < 1) - end = r->length; - if (end >= r->length) - end = r->length; - - if (end - start >= 0.5*r->length || fd->shared_ref) { - start = 1; - end = r->length; - } - - /* - * Maybe we have it cached already? If so use it. - * - * Alternatively if we don't have the sequence but we're sharing - * references and/or are asking for the entire length of it, then - * load the full reference into the refs structure and return - * a pointer to that one instead. - */ - if (fd->shared_ref || r->seq || (start == 1 && end == r->length)) { - char *cp; - - if (id >= 0) { - if (r->seq) { - cram_ref_incr_locked(fd->refs, id); - } else { - ref_entry *e; - if (!(e = cram_ref_load(fd->refs, id, r->is_md5))) { - pthread_mutex_unlock(&fd->refs->lock); - pthread_mutex_unlock(&fd->ref_lock); - return NULL; - } - - /* unsorted data implies cache ref indefinitely, to avoid - * continually loading and unloading. - */ - if (fd->unsorted) - cram_ref_incr_locked(fd->refs, id); - } - - fd->ref = NULL; /* We never access it directly */ - fd->ref_start = 1; - fd->ref_end = r->length; - fd->ref_id = id; - - cp = fd->refs->ref_id[id]->seq + ostart-1; - } else { - fd->ref = NULL; - cp = NULL; - } - - RP("%d cram_get_ref returning for id %d, count %d\n", gettid(), id, (int)r->count); - - pthread_mutex_unlock(&fd->refs->lock); - pthread_mutex_unlock(&fd->ref_lock); - return cp; - } - - /* - * Otherwise we're not sharing, we don't have a copy of it already and - * we're only asking for a small portion of it. - * - * In this case load up just that segment ourselves, freeing any old - * small segments in the process. - */ - - /* Unmapped ref ID */ - if (id < 0 || !fd->refs->fn) { - if (fd->ref_free) { - free(fd->ref_free); - fd->ref_free = NULL; - } - fd->ref = NULL; - fd->ref_id = id; - pthread_mutex_unlock(&fd->refs->lock); - pthread_mutex_unlock(&fd->ref_lock); - return NULL; - } - - /* Open file if it's not already the current open reference */ - if (strcmp(fd->refs->fn, r->fn) || fd->refs->fp == NULL) { - if (fd->refs->fp) - if (bgzf_close(fd->refs->fp) != 0) - return NULL; - fd->refs->fn = r->fn; - if (!(fd->refs->fp = bgzf_open_ref(fd->refs->fn, "r", r->is_md5))) { - pthread_mutex_unlock(&fd->refs->lock); - pthread_mutex_unlock(&fd->ref_lock); - return NULL; - } - } - - if (!(fd->ref = load_ref_portion(fd->refs->fp, r, start, end))) { - pthread_mutex_unlock(&fd->refs->lock); - pthread_mutex_unlock(&fd->ref_lock); - return NULL; - } - - if (fd->ref_free) - free(fd->ref_free); - - fd->ref_id = id; - fd->ref_start = start; - fd->ref_end = end; - fd->ref_free = fd->ref; - seq = fd->ref; - - pthread_mutex_unlock(&fd->refs->lock); - pthread_mutex_unlock(&fd->ref_lock); - - return seq ? seq + ostart - start : NULL; -} - -/* - * If fd has been opened for reading, it may be permitted to specify 'fn' - * as NULL and let the code auto-detect the reference by parsing the - * SAM header @SQ lines. - */ -int cram_load_reference(cram_fd *fd, char *fn) { - int ret = 0; - - if (fn) { - fd->refs = refs_load_fai(fd->refs, fn, - !(fd->embed_ref>0 && fd->mode == 'r')); - fn = fd->refs ? fd->refs->fn : NULL; - if (!fn) - ret = -1; - sanitise_SQ_lines(fd); - } - fd->ref_fn = fn; - - if ((!fd->refs || (fd->refs->nref == 0 && !fn)) && fd->header) { - if (fd->refs) - refs_free(fd->refs); - if (!(fd->refs = refs_create())) - return -1; - if (-1 == refs_from_header(fd)) - return -1; - } - - if (fd->header) - if (-1 == refs2id(fd->refs, fd->header)) - return -1; - - return ret; -} - -/* ---------------------------------------------------------------------- - * Containers - */ - -/* - * Creates a new container, specifying the maximum number of slices - * and records permitted. - * - * Returns cram_container ptr on success - * NULL on failure - */ -cram_container *cram_new_container(int nrec, int nslice) { - cram_container *c = calloc(1, sizeof(*c)); - enum cram_DS_ID id; - - if (!c) - return NULL; - - c->curr_ref = -2; - - c->max_c_rec = nrec * nslice; - c->curr_c_rec = 0; - - c->max_rec = nrec; - c->record_counter = 0; - c->num_bases = 0; - c->s_num_bases = 0; - - c->max_slice = nslice; - c->curr_slice = 0; - - c->pos_sorted = 1; - c->max_apos = 0; - c->multi_seq = 0; - c->qs_seq_orient = 1; - c->no_ref = 0; - c->embed_ref = -1; // automatic selection - - c->bams = NULL; - - if (!(c->slices = calloc(nslice != 0 ? nslice : 1, sizeof(cram_slice *)))) - goto err; - c->slice = NULL; - - if (!(c->comp_hdr = cram_new_compression_header())) - goto err; - c->comp_hdr_block = NULL; - - for (id = DS_RN; id < DS_TN; id++) - if (!(c->stats[id] = cram_stats_create())) goto err; - - //c->aux_B_stats = cram_stats_create(); - - if (!(c->tags_used = kh_init(m_tagmap))) - goto err; - c->refs_used = 0; - c->ref_free = 0; - - return c; - - err: - if (c) { - if (c->slices) - free(c->slices); - free(c); - } - return NULL; -} - -void cram_free_container(cram_container *c) { - enum cram_DS_ID id; - int i; - - if (!c) - return; - - if (c->refs_used) - free(c->refs_used); - - if (c->landmark) - free(c->landmark); - - if (c->comp_hdr) - cram_free_compression_header(c->comp_hdr); - - if (c->comp_hdr_block) - cram_free_block(c->comp_hdr_block); - - // Free the slices; filled out by encoder only - if (c->slices) { - for (i = 0; i < c->max_slice; i++) { - if (c->slices[i]) - cram_free_slice(c->slices[i]); - if (c->slices[i] == c->slice) - c->slice = NULL; - } - free(c->slices); - } - - // Free the current slice; set by both encoder & decoder - if (c->slice) { - cram_free_slice(c->slice); - c->slice = NULL; - } - - for (id = DS_RN; id < DS_TN; id++) - if (c->stats[id]) cram_stats_free(c->stats[id]); - - //if (c->aux_B_stats) cram_stats_free(c->aux_B_stats); - - if (c->tags_used) { - khint_t k; - - for (k = kh_begin(c->tags_used); k != kh_end(c->tags_used); k++) { - if (!kh_exist(c->tags_used, k)) - continue; - - cram_tag_map *tm = (cram_tag_map *)kh_val(c->tags_used, k); - if (tm) { - cram_codec *c = tm->codec; - - if (c) c->free(c); - free(tm); - } - } - - kh_destroy(m_tagmap, c->tags_used); - } - - if (c->ref_free) - free(c->ref); - - free(c); -} - -/* - * Reads a container header. - * - * Returns cram_container on success - * NULL on failure or no container left (fd->err == 0). - */ -cram_container *cram_read_container(cram_fd *fd) { - cram_container c2, *c; - int i, s; - size_t rd = 0; - uint32_t crc = 0; - - fd->err = 0; - fd->eof = 0; - - memset(&c2, 0, sizeof(c2)); - if (CRAM_MAJOR_VERS(fd->version) == 1) { - if ((s = fd->vv.varint_decode32_crc(fd, &c2.length, &crc)) == -1) { - fd->eof = fd->empty_container ? 1 : 2; - return NULL; - } else { - rd+=s; - } - } else if (CRAM_MAJOR_VERS(fd->version) < 4) { - uint32_t len; - if ((s = int32_decode(fd, &c2.length)) == -1) { - if (CRAM_MAJOR_VERS(fd->version) == 2 && - CRAM_MINOR_VERS(fd->version) == 0) - fd->eof = 1; // EOF blocks arrived in v2.1 - else - fd->eof = fd->empty_container ? 1 : 2; - return NULL; - } else { - rd+=s; - } - len = le_int4(c2.length); - crc = crc32(0L, (unsigned char *)&len, 4); - } else { - if ((s = fd->vv.varint_decode32_crc(fd, &c2.length, &crc)) == -1) { - fd->eof = fd->empty_container ? 1 : 2; - return NULL; - } else { - rd+=s; - } - } - if ((s = fd->vv.varint_decode32s_crc(fd, &c2.ref_seq_id, &crc)) == -1) return NULL; else rd+=s; - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - int64_t i64; - if ((s = fd->vv.varint_decode64_crc(fd, &i64, &crc))== -1) return NULL; else rd+=s; - c2.ref_seq_start = i64; - if ((s = fd->vv.varint_decode64_crc(fd, &i64, &crc)) == -1) return NULL; else rd+=s; - c2.ref_seq_span = i64; - } else { - int32_t i32; - if ((s = fd->vv.varint_decode32_crc(fd, &i32, &crc))== -1) return NULL; else rd+=s; - c2.ref_seq_start = i32; - if ((s = fd->vv.varint_decode32_crc(fd, &i32, &crc)) == -1) return NULL; else rd+=s; - c2.ref_seq_span = i32; - } - if ((s = fd->vv.varint_decode32_crc(fd, &c2.num_records, &crc)) == -1) return NULL; else rd+=s; - - if (CRAM_MAJOR_VERS(fd->version) == 1) { - c2.record_counter = 0; - c2.num_bases = 0; - } else { - if (CRAM_MAJOR_VERS(fd->version) >= 3) { - if ((s = fd->vv.varint_decode64_crc(fd, &c2.record_counter, &crc)) == -1) - return NULL; - else - rd += s; - } else { - int32_t i32; - if ((s = fd->vv.varint_decode32_crc(fd, &i32, &crc)) == -1) - return NULL; - else - rd += s; - c2.record_counter = i32; - } - - if ((s = fd->vv.varint_decode64_crc(fd, &c2.num_bases, &crc))== -1) - return NULL; - else - rd += s; - } - if ((s = fd->vv.varint_decode32_crc(fd, &c2.num_blocks, &crc)) == -1) - return NULL; - else - rd+=s; - if ((s = fd->vv.varint_decode32_crc(fd, &c2.num_landmarks, &crc))== -1) - return NULL; - else - rd+=s; - - if (c2.num_landmarks < 0 || c2.num_landmarks >= SIZE_MAX / sizeof(int32_t)) - return NULL; - - if (!(c = calloc(1, sizeof(*c)))) - return NULL; - - *c = c2; - - if (c->num_landmarks && !(c->landmark = malloc(c->num_landmarks * sizeof(int32_t)))) { - fd->err = errno; - cram_free_container(c); - return NULL; - } - for (i = 0; i < c->num_landmarks; i++) { - if ((s = fd->vv.varint_decode32_crc(fd, &c->landmark[i], &crc)) == -1) { - cram_free_container(c); - return NULL; - } else { - rd += s; - } - } - - if (CRAM_MAJOR_VERS(fd->version) >= 3) { - if (-1 == int32_decode(fd, (int32_t *)&c->crc32)) { - cram_free_container(c); - return NULL; - } else { - rd+=4; - } - - if (crc != c->crc32) { - hts_log_error("Container header CRC32 failure"); - cram_free_container(c); - return NULL; - } - } - - c->offset = rd; - c->slices = NULL; - c->slice = NULL; - c->curr_slice = 0; - c->max_slice = c->num_landmarks; - c->slice_rec = 0; - c->curr_rec = 0; - c->max_rec = 0; - - if (c->ref_seq_id == -2) { - c->multi_seq = 1; - fd->multi_seq = 1; - } - - fd->empty_container = - (c->num_records == 0 && - c->ref_seq_id == -1 && - c->ref_seq_start == 0x454f46 /* EOF */) ? 1 : 0; - - return c; -} - - -/* MAXIMUM storage size needed for the container. */ -int cram_container_size(cram_container *c) { - return 55 + 5*c->num_landmarks; -} - - -/* - * Stores the container structure in dat and returns *size as the - * number of bytes written to dat[]. The input size of dat is also - * held in *size and should be initialised to cram_container_size(c). - * - * Returns 0 on success; - * -1 on failure - */ -int cram_store_container(cram_fd *fd, cram_container *c, char *dat, int *size) -{ - char *cp = (char *)dat; - int i; - - // Check the input buffer is large enough according to our stated - // requirements. (NOTE: it may actually take less.) - if (cram_container_size(c) > *size) - return -1; - - if (CRAM_MAJOR_VERS(fd->version) == 1) { - cp += itf8_put(cp, c->length); - } else { - *(int32_t *)cp = le_int4(c->length); - cp += 4; - } - if (c->multi_seq) { - cp += fd->vv.varint_put32(cp, NULL, -2); - cp += fd->vv.varint_put32(cp, NULL, 0); - cp += fd->vv.varint_put32(cp, NULL, 0); - } else { - cp += fd->vv.varint_put32s(cp, NULL, c->ref_seq_id); - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - cp += fd->vv.varint_put64(cp, NULL, c->ref_seq_start); - cp += fd->vv.varint_put64(cp, NULL, c->ref_seq_span); - } else { - cp += fd->vv.varint_put32(cp, NULL, c->ref_seq_start); - cp += fd->vv.varint_put32(cp, NULL, c->ref_seq_span); - } - } - cp += fd->vv.varint_put32(cp, NULL, c->num_records); - if (CRAM_MAJOR_VERS(fd->version) == 2) { - cp += fd->vv.varint_put64(cp, NULL, c->record_counter); - } else if (CRAM_MAJOR_VERS(fd->version) >= 3) { - cp += fd->vv.varint_put32(cp, NULL, c->record_counter); - } - cp += fd->vv.varint_put64(cp, NULL, c->num_bases); - cp += fd->vv.varint_put32(cp, NULL, c->num_blocks); - cp += fd->vv.varint_put32(cp, NULL, c->num_landmarks); - for (i = 0; i < c->num_landmarks; i++) - cp += fd->vv.varint_put32(cp, NULL, c->landmark[i]); - - if (CRAM_MAJOR_VERS(fd->version) >= 3) { - c->crc32 = crc32(0L, (uc *)dat, cp-dat); - cp[0] = c->crc32 & 0xff; - cp[1] = (c->crc32 >> 8) & 0xff; - cp[2] = (c->crc32 >> 16) & 0xff; - cp[3] = (c->crc32 >> 24) & 0xff; - cp += 4; - } - - *size = cp-dat; // actual used size - - return 0; -} - - -/* - * Writes a container structure. - * - * Returns 0 on success - * -1 on failure - */ -int cram_write_container(cram_fd *fd, cram_container *c) { - char buf_a[1024], *buf = buf_a, *cp; - int i; - - if (61 + c->num_landmarks * 10 >= 1024) { - buf = malloc(61 + c->num_landmarks * 10); - if (!buf) - return -1; - } - cp = buf; - - if (CRAM_MAJOR_VERS(fd->version) == 1) { - cp += itf8_put(cp, c->length); - } else if (CRAM_MAJOR_VERS(fd->version) <= 3) { - *(int32_t *)cp = le_int4(c->length); - cp += 4; - } else { - cp += fd->vv.varint_put32(cp, NULL, c->length); - } - if (c->multi_seq) { - cp += fd->vv.varint_put32(cp, NULL, (uint32_t)-2); - cp += fd->vv.varint_put32(cp, NULL, 0); - cp += fd->vv.varint_put32(cp, NULL, 0); - } else { - cp += fd->vv.varint_put32s(cp, NULL, c->ref_seq_id); - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - cp += fd->vv.varint_put64(cp, NULL, c->ref_seq_start); - cp += fd->vv.varint_put64(cp, NULL, c->ref_seq_span); - } else { - cp += fd->vv.varint_put32(cp, NULL, c->ref_seq_start); - cp += fd->vv.varint_put32(cp, NULL, c->ref_seq_span); - } - } - cp += fd->vv.varint_put32(cp, NULL, c->num_records); - if (CRAM_MAJOR_VERS(fd->version) >= 3) - cp += fd->vv.varint_put64(cp, NULL, c->record_counter); - else - cp += fd->vv.varint_put32(cp, NULL, c->record_counter); - cp += fd->vv.varint_put64(cp, NULL, c->num_bases); - cp += fd->vv.varint_put32(cp, NULL, c->num_blocks); - cp += fd->vv.varint_put32(cp, NULL, c->num_landmarks); - for (i = 0; i < c->num_landmarks; i++) - cp += fd->vv.varint_put32(cp, NULL, c->landmark[i]); - - if (CRAM_MAJOR_VERS(fd->version) >= 3) { - c->crc32 = crc32(0L, (uc *)buf, cp-buf); - cp[0] = c->crc32 & 0xff; - cp[1] = (c->crc32 >> 8) & 0xff; - cp[2] = (c->crc32 >> 16) & 0xff; - cp[3] = (c->crc32 >> 24) & 0xff; - cp += 4; - } - - if (cp-buf != hwrite(fd->fp, buf, cp-buf)) { - if (buf != buf_a) - free(buf); - return -1; - } - - if (buf != buf_a) - free(buf); - - return 0; -} - -// common component shared by cram_flush_container{,_mt} -static int cram_flush_container2(cram_fd *fd, cram_container *c) { - int i, j; - - if (c->curr_slice > 0 && !c->slices) - return -1; - - //fprintf(stderr, "Writing container %d, sum %u\n", c->record_counter, sum); - - off_t c_offset = htell(fd->fp); // File offset of container - - /* Write the container struct itself */ - if (0 != cram_write_container(fd, c)) - return -1; - - off_t hdr_size = htell(fd->fp) - c_offset; - - /* And the compression header */ - if (0 != cram_write_block(fd, c->comp_hdr_block)) - return -1; - - /* Followed by the slice blocks */ - off_t file_offset = htell(fd->fp); - for (i = 0; i < c->curr_slice; i++) { - cram_slice *s = c->slices[i]; - off_t spos = file_offset - c_offset - hdr_size; - - if (0 != cram_write_block(fd, s->hdr_block)) - return -1; - - for (j = 0; j < s->hdr->num_blocks; j++) { - if (0 != cram_write_block(fd, s->block[j])) - return -1; - } - - file_offset = htell(fd->fp); - off_t sz = file_offset - c_offset - hdr_size - spos; - - if (fd->idxfp) { - if (cram_index_slice(fd, c, s, fd->idxfp, c_offset, spos, sz) < 0) - return -1; - } - } - - return 0; -} - -/* - * Flushes a completely or partially full container to disk, writing - * container structure, header and blocks. This also calls the encoder - * functions. - * - * Returns 0 on success - * -1 on failure - */ -int cram_flush_container(cram_fd *fd, cram_container *c) { - /* Encode the container blocks and generate compression header */ - if (0 != cram_encode_container(fd, c)) - return -1; - - return cram_flush_container2(fd, c); -} - -typedef struct { - cram_fd *fd; - cram_container *c; -} cram_job; - -void *cram_flush_thread(void *arg) { - cram_job *j = (cram_job *)arg; - - /* Encode the container blocks and generate compression header */ - if (0 != cram_encode_container(j->fd, j->c)) { - hts_log_error("Call to cram_encode_container failed"); - return NULL; - } - - return arg; -} - -static int cram_flush_result(cram_fd *fd) { - int i, ret = 0; - hts_tpool_result *r; - cram_container *lc = NULL; - - // NB: we can have one result per slice, not per container, - // so we need to free the container only after all slices - // within it have been freed. (Automatic via reference counting.) - while ((r = hts_tpool_next_result(fd->rqueue))) { - cram_job *j = (cram_job *)hts_tpool_result_data(r); - cram_container *c; - - if (!j) { - hts_tpool_delete_result(r, 0); - return -1; - } - - fd = j->fd; - c = j->c; - - if (fd->mode == 'w') - if (0 != cram_flush_container2(fd, c)) - return -1; - - // Free the slices; filled out by encoder only - if (c->slices) { - for (i = 0; i < c->max_slice; i++) { - if (c->slices[i]) - cram_free_slice(c->slices[i]); - if (c->slices[i] == c->slice) - c->slice = NULL; - c->slices[i] = NULL; - } - } - - // Free the current slice; set by both encoder & decoder - if (c->slice) { - cram_free_slice(c->slice); - c->slice = NULL; - } - c->curr_slice = 0; - - // Our jobs will be in order, so we free the last - // container when our job has switched to a new one. - if (c != lc) { - if (lc) { - if (fd->ctr == lc) - fd->ctr = NULL; - if (fd->ctr_mt == lc) - fd->ctr_mt = NULL; - cram_free_container(lc); - } - lc = c; - } - - hts_tpool_delete_result(r, 1); - } - if (lc) { - if (fd->ctr == lc) - fd->ctr = NULL; - if (fd->ctr_mt == lc) - fd->ctr_mt = NULL; - cram_free_container(lc); - } - - return ret; -} - -// Note: called while metrics_lock is held. -// Will be left in this state too, but may temporarily unlock. -void reset_metrics(cram_fd *fd) { - int i; - - if (fd->pool) { - // If multi-threaded we have multiple blocks being - // compressed already and several on the to-do list - // (fd->rqueue->pending). It's tricky to reset the - // metrics exactly the correct point, so instead we - // just flush the pool, reset, and then continue again. - - // Don't bother starting a new trial before then though. - for (i = 0; i < DS_END; i++) { - cram_metrics *m = fd->m[i]; - if (!m) - continue; - m->next_trial = 999; - } - - pthread_mutex_unlock(&fd->metrics_lock); - hts_tpool_process_flush(fd->rqueue); - pthread_mutex_lock(&fd->metrics_lock); - } - - for (i = 0; i < DS_END; i++) { - cram_metrics *m = fd->m[i]; - if (!m) - continue; - - m->trial = NTRIALS; - m->next_trial = TRIAL_SPAN; - m->revised_method = 0; - m->unpackable = 0; - - memset(m->sz, 0, sizeof(m->sz)); - } -} - -int cram_flush_container_mt(cram_fd *fd, cram_container *c) { - cram_job *j; - - // At the junction of mapped to unmapped data the compression - // methods may need to change due to very different statistical - // properties; particularly BA if minhash sorted. - // - // However with threading we'll have several in-flight blocks - // arriving out of order. - // - // So we do one trial reset of NThreads to last for NThreads - // duration to get us over this transition period, followed - // by another retrial of the usual ntrials & trial span. - pthread_mutex_lock(&fd->metrics_lock); - if (c->n_mapped < 0.3*c->curr_rec && - fd->last_mapped > 0.7*c->max_rec) { - reset_metrics(fd); - } - fd->last_mapped = c->n_mapped * (c->max_rec+1)/(c->curr_rec+1) ; - pthread_mutex_unlock(&fd->metrics_lock); - - if (!fd->pool) - return cram_flush_container(fd, c); - - if (!(j = malloc(sizeof(*j)))) - return -1; - j->fd = fd; - j->c = c; - - // Flush the job. Note our encoder queue may be full, so we - // either have to keep trying in non-blocking mode (what we do) or - // use a dedicated separate thread for draining the queue. - for (;;) { - errno = 0; - hts_tpool_dispatch2(fd->pool, fd->rqueue, cram_flush_thread, j, 1); - int pending = (errno == EAGAIN); - if (cram_flush_result(fd) != 0) - return -1; - if (!pending) - break; - - usleep(1000); - } - - return 0; -} - -/* ---------------------------------------------------------------------- - * Compression headers; the first part of the container - */ - -/* - * Creates a new blank container compression header - * - * Returns header ptr on success - * NULL on failure - */ -cram_block_compression_hdr *cram_new_compression_header(void) { - cram_block_compression_hdr *hdr = calloc(1, sizeof(*hdr)); - if (!hdr) - return NULL; - - if (!(hdr->TD_blk = cram_new_block(CORE, 0))) { - free(hdr); - return NULL; - } - - if (!(hdr->TD_hash = kh_init(m_s2i))) { - cram_free_block(hdr->TD_blk); - free(hdr); - return NULL; - } - - if (!(hdr->TD_keys = string_pool_create(8192))) { - kh_destroy(m_s2i, hdr->TD_hash); - cram_free_block(hdr->TD_blk); - free(hdr); - return NULL; - } - - return hdr; -} - -void cram_free_compression_header(cram_block_compression_hdr *hdr) { - int i; - - if (hdr->landmark) - free(hdr->landmark); - - if (hdr->preservation_map) - kh_destroy(map, hdr->preservation_map); - - for (i = 0; i < CRAM_MAP_HASH; i++) { - cram_map *m, *m2; - for (m = hdr->rec_encoding_map[i]; m; m = m2) { - m2 = m->next; - if (m->codec) - m->codec->free(m->codec); - free(m); - } - } - - for (i = 0; i < CRAM_MAP_HASH; i++) { - cram_map *m, *m2; - for (m = hdr->tag_encoding_map[i]; m; m = m2) { - m2 = m->next; - if (m->codec) - m->codec->free(m->codec); - free(m); - } - } - - for (i = 0; i < DS_END; i++) { - if (hdr->codecs[i]) - hdr->codecs[i]->free(hdr->codecs[i]); - } - - if (hdr->TL) - free(hdr->TL); - if (hdr->TD_blk) - cram_free_block(hdr->TD_blk); - if (hdr->TD_hash) - kh_destroy(m_s2i, hdr->TD_hash); - if (hdr->TD_keys) - string_pool_destroy(hdr->TD_keys); - - free(hdr); -} - - -/* ---------------------------------------------------------------------- - * Slices and slice headers - */ - -void cram_free_slice_header(cram_block_slice_hdr *hdr) { - if (!hdr) - return; - - if (hdr->block_content_ids) - free(hdr->block_content_ids); - - free(hdr); - - return; -} - -void cram_free_slice(cram_slice *s) { - if (!s) - return; - - if (s->hdr_block) - cram_free_block(s->hdr_block); - - if (s->block) { - int i; - - if (s->hdr) { - for (i = 0; i < s->hdr->num_blocks; i++) { - if (i > 0 && s->block[i] == s->block[0]) - continue; - cram_free_block(s->block[i]); - } - } - free(s->block); - } - - if (s->block_by_id) - free(s->block_by_id); - - if (s->hdr) - cram_free_slice_header(s->hdr); - - if (s->seqs_blk) - cram_free_block(s->seqs_blk); - - if (s->qual_blk) - cram_free_block(s->qual_blk); - - if (s->name_blk) - cram_free_block(s->name_blk); - - if (s->aux_blk) - cram_free_block(s->aux_blk); - - if (s->base_blk) - cram_free_block(s->base_blk); - - if (s->soft_blk) - cram_free_block(s->soft_blk); - - if (s->cigar) - free(s->cigar); - - if (s->crecs) - free(s->crecs); - - if (s->features) - free(s->features); - - if (s->TN) - free(s->TN); - - if (s->pair_keys) - string_pool_destroy(s->pair_keys); - - if (s->pair[0]) - kh_destroy(m_s2i, s->pair[0]); - if (s->pair[1]) - kh_destroy(m_s2i, s->pair[1]); - - if (s->aux_block) - free(s->aux_block); - - free(s); -} - -/* - * Creates a new empty slice in memory, for subsequent writing to - * disk. - * - * Returns cram_slice ptr on success - * NULL on failure - */ -cram_slice *cram_new_slice(enum cram_content_type type, int nrecs) { - cram_slice *s = calloc(1, sizeof(*s)); - if (!s) - return NULL; - - if (!(s->hdr = (cram_block_slice_hdr *)calloc(1, sizeof(*s->hdr)))) - goto err; - s->hdr->content_type = type; - - s->hdr_block = NULL; - s->block = NULL; - s->block_by_id = NULL; - s->last_apos = 0; - if (!(s->crecs = malloc(nrecs * sizeof(cram_record)))) goto err; - s->cigar_alloc = 1024; - if (!(s->cigar = malloc(s->cigar_alloc * sizeof(*s->cigar)))) goto err; - s->ncigar = 0; - - if (!(s->seqs_blk = cram_new_block(EXTERNAL, 0))) goto err; - if (!(s->qual_blk = cram_new_block(EXTERNAL, DS_QS))) goto err; - if (!(s->name_blk = cram_new_block(EXTERNAL, DS_RN))) goto err; - if (!(s->aux_blk = cram_new_block(EXTERNAL, DS_aux))) goto err; - if (!(s->base_blk = cram_new_block(EXTERNAL, DS_IN))) goto err; - if (!(s->soft_blk = cram_new_block(EXTERNAL, DS_SC))) goto err; - - s->features = NULL; - s->nfeatures = s->afeatures = 0; - -#ifndef TN_external - s->TN = NULL; - s->nTN = s->aTN = 0; -#endif - - // Volatile keys as we do realloc in dstring - if (!(s->pair_keys = string_pool_create(8192))) goto err; - if (!(s->pair[0] = kh_init(m_s2i))) goto err; - if (!(s->pair[1] = kh_init(m_s2i))) goto err; - -#ifdef BA_external - s->BA_len = 0; -#endif - - return s; - - err: - if (s) - cram_free_slice(s); - - return NULL; -} - -/* - * Loads an entire slice. - * FIXME: In 1.0 the native unit of slices within CRAM is broken - * as slices contain references to objects in other slices. - * To work around this while keeping the slice oriented outer loop - * we read all slices and stitch them together into a fake large - * slice instead. - * - * Returns cram_slice ptr on success - * NULL on failure - */ -cram_slice *cram_read_slice(cram_fd *fd) { - cram_block *b = cram_read_block(fd); - cram_slice *s = calloc(1, sizeof(*s)); - int i, n, max_id, min_id; - - if (!b || !s) - goto err; - - s->hdr_block = b; - switch (b->content_type) { - case MAPPED_SLICE: - case UNMAPPED_SLICE: - if (!(s->hdr = cram_decode_slice_header(fd, b))) - goto err; - break; - - default: - hts_log_error("Unexpected block of type %s", - cram_content_type2str(b->content_type)); - goto err; - } - - if (s->hdr->num_blocks < 1) { - hts_log_error("Slice does not include any data blocks"); - goto err; - } - - s->block = calloc(n = s->hdr->num_blocks, sizeof(*s->block)); - if (!s->block) - goto err; - - for (max_id = i = 0, min_id = INT_MAX; i < n; i++) { - if (!(s->block[i] = cram_read_block(fd))) - goto err; - - if (s->block[i]->content_type == EXTERNAL) { - if (max_id < s->block[i]->content_id) - max_id = s->block[i]->content_id; - if (min_id > s->block[i]->content_id) - min_id = s->block[i]->content_id; - } - } - - if (!(s->block_by_id = calloc(512, sizeof(s->block[0])))) - goto err; - - for (i = 0; i < n; i++) { - if (s->block[i]->content_type != EXTERNAL) - continue; - uint32_t v = s->block[i]->content_id; - if (v >= 256) - v = 256 + v % 251; - s->block_by_id[v] = s->block[i]; - } - - /* Initialise encoding/decoding tables */ - s->cigar_alloc = 1024; - if (!(s->cigar = malloc(s->cigar_alloc * sizeof(*s->cigar)))) goto err; - s->ncigar = 0; - - if (!(s->seqs_blk = cram_new_block(EXTERNAL, 0))) goto err; - if (!(s->qual_blk = cram_new_block(EXTERNAL, DS_QS))) goto err; - if (!(s->name_blk = cram_new_block(EXTERNAL, DS_RN))) goto err; - if (!(s->aux_blk = cram_new_block(EXTERNAL, DS_aux))) goto err; - if (!(s->base_blk = cram_new_block(EXTERNAL, DS_IN))) goto err; - if (!(s->soft_blk = cram_new_block(EXTERNAL, DS_SC))) goto err; - - s->crecs = NULL; - - s->last_apos = s->hdr->ref_seq_start; - s->decode_md = fd->decode_md; - - return s; - - err: - if (b) - cram_free_block(b); - if (s) { - s->hdr_block = NULL; - cram_free_slice(s); - } - return NULL; -} - - -/* ---------------------------------------------------------------------- - * CRAM file definition (header) - */ - -/* - * Reads a CRAM file definition structure. - * Returns file_def ptr on success - * NULL on failure - */ -cram_file_def *cram_read_file_def(cram_fd *fd) { - cram_file_def *def = malloc(sizeof(*def)); - if (!def) - return NULL; - - if (26 != hread(fd->fp, &def->magic[0], 26)) { - free(def); - return NULL; - } - - if (memcmp(def->magic, "CRAM", 4) != 0) { - free(def); - return NULL; - } - - if (def->major_version > 4) { - hts_log_error("CRAM version number mismatch. Expected 1.x, 2.x, 3.x or 4.x, got %d.%d", - def->major_version, def->minor_version); - free(def); - return NULL; - } - - fd->first_container += 26; - fd->curr_position = fd->first_container; - fd->last_slice = 0; - - return def; -} - -/* - * Writes a cram_file_def structure to cram_fd. - * Returns 0 on success - * -1 on failure - */ -int cram_write_file_def(cram_fd *fd, cram_file_def *def) { - return (hwrite(fd->fp, &def->magic[0], 26) == 26) ? 0 : -1; -} - -void cram_free_file_def(cram_file_def *def) { - if (def) free(def); -} - -/* ---------------------------------------------------------------------- - * SAM header I/O - */ - - -/* - * Reads the SAM header from the first CRAM data block. - * Also performs minimal parsing to extract read-group - * and sample information. - - * Returns SAM hdr ptr on success - * NULL on failure - */ -sam_hdr_t *cram_read_SAM_hdr(cram_fd *fd) { - int32_t header_len; - char *header; - sam_hdr_t *hdr; - - /* 1.1 onwards stores the header in the first block of a container */ - if (CRAM_MAJOR_VERS(fd->version) == 1) { - /* Length */ - if (-1 == int32_decode(fd, &header_len)) - return NULL; - - /* Alloc and read */ - if (header_len < 0 || NULL == (header = malloc((size_t) header_len+1))) - return NULL; - - if (header_len != hread(fd->fp, header, header_len)) { - free(header); - return NULL; - } - header[header_len] = '\0'; - - fd->first_container += 4 + header_len; - } else { - cram_container *c = cram_read_container(fd); - cram_block *b; - int i; - int64_t len; - - if (!c) - return NULL; - - fd->first_container += c->length + c->offset; - fd->curr_position = fd->first_container; - - if (c->num_blocks < 1) { - cram_free_container(c); - return NULL; - } - - if (!(b = cram_read_block(fd))) { - cram_free_container(c); - return NULL; - } - if (cram_uncompress_block(b) != 0) { - cram_free_container(c); - cram_free_block(b); - return NULL; - } - - len = b->comp_size + 2 + 4*(CRAM_MAJOR_VERS(fd->version) >= 3) + - fd->vv.varint_size(b->content_id) + - fd->vv.varint_size(b->uncomp_size) + - fd->vv.varint_size(b->comp_size); - - /* Extract header from 1st block */ - if (-1 == int32_get_blk(b, &header_len) || - header_len < 0 || /* Spec. says signed... why? */ - b->uncomp_size - 4 < header_len) { - cram_free_container(c); - cram_free_block(b); - return NULL; - } - if (NULL == (header = malloc((size_t) header_len+1))) { - cram_free_container(c); - cram_free_block(b); - return NULL; - } - memcpy(header, BLOCK_END(b), header_len); - header[header_len] = '\0'; - cram_free_block(b); - - /* Consume any remaining blocks */ - for (i = 1; i < c->num_blocks; i++) { - if (!(b = cram_read_block(fd))) { - cram_free_container(c); - free(header); - return NULL; - } - len += b->comp_size + 2 + 4*(CRAM_MAJOR_VERS(fd->version) >= 3) + - fd->vv.varint_size(b->content_id) + - fd->vv.varint_size(b->uncomp_size) + - fd->vv.varint_size(b->comp_size); - cram_free_block(b); - } - - if (c->length > 0 && len > 0 && c->length > len) { - // Consume padding - char *pads = malloc(c->length - len); - if (!pads) { - cram_free_container(c); - free(header); - return NULL; - } - - if (c->length - len != hread(fd->fp, pads, c->length - len)) { - cram_free_container(c); - free(header); - free(pads); - return NULL; - } - free(pads); - } - - cram_free_container(c); - } - - /* Parse */ - hdr = sam_hdr_init(); - if (!hdr) { - free(header); - return NULL; - } - - if (-1 == sam_hdr_add_lines(hdr, header, header_len)) { - free(header); - sam_hdr_destroy(hdr); - return NULL; - } - - hdr->l_text = header_len; - hdr->text = header; - - return hdr; - -} - -/* - * Converts 'in' to a full pathname to store in out. - * Out must be at least PATH_MAX bytes long. - */ -static void full_path(char *out, char *in) { - size_t in_l = strlen(in); - if (hisremote(in)) { - if (in_l > PATH_MAX) { - hts_log_error("Reference path is longer than %d", PATH_MAX); - return; - } - strncpy(out, in, PATH_MAX-1); - out[PATH_MAX-1] = 0; - return; - } - if (*in == '/' || - // Windows paths - (in_l > 3 && toupper_c(*in) >= 'A' && toupper_c(*in) <= 'Z' && - in[1] == ':' && (in[2] == '/' || in[2] == '\\'))) { - strncpy(out, in, PATH_MAX-1); - out[PATH_MAX-1] = 0; - } else { - size_t len; - - // unable to get dir or out+in is too long - if (!getcwd(out, PATH_MAX) || - (len = strlen(out))+1+strlen(in) >= PATH_MAX) { - strncpy(out, in, PATH_MAX-1); - out[PATH_MAX-1] = 0; - return; - } - - snprintf(out+len, PATH_MAX - len, "/%s", in); - - // FIXME: cope with `pwd`/../../../foo.fa ? - } -} - -/* - * Writes a CRAM SAM header. - * Returns 0 on success - * -1 on failure - */ -int cram_write_SAM_hdr(cram_fd *fd, sam_hdr_t *hdr) { - size_t header_len; - int blank_block = (CRAM_MAJOR_VERS(fd->version) >= 3); - - /* Write CRAM MAGIC if not yet written. */ - if (fd->file_def->major_version == 0) { - fd->file_def->major_version = CRAM_MAJOR_VERS(fd->version); - fd->file_def->minor_version = CRAM_MINOR_VERS(fd->version); - if (0 != cram_write_file_def(fd, fd->file_def)) - return -1; - } - - /* 1.0 requires an UNKNOWN read-group */ - if (CRAM_MAJOR_VERS(fd->version) == 1) { - if (!sam_hrecs_find_rg(hdr->hrecs, "UNKNOWN")) - if (sam_hdr_add_line(hdr, "RG", - "ID", "UNKNOWN", "SM", "UNKNOWN", NULL)) - return -1; - } - - if (-1 == refs_from_header(fd)) - return -1; - if (-1 == refs2id(fd->refs, fd->header)) - return -1; - - /* Fix M5 strings */ - if (fd->refs && !fd->no_ref && fd->embed_ref <= 1) { - int i; - for (i = 0; i < hdr->hrecs->nref; i++) { - sam_hrec_type_t *ty; - char *ref; - - if (!(ty = sam_hrecs_find_type_id(hdr->hrecs, "SQ", "SN", hdr->hrecs->ref[i].name))) - return -1; - - if (!sam_hrecs_find_key(ty, "M5", NULL)) { - char unsigned buf[16]; - char buf2[33]; - int rlen; - hts_md5_context *md5; - - if (!fd->refs || - !fd->refs->ref_id || - !fd->refs->ref_id[i]) { - return -1; - } - rlen = fd->refs->ref_id[i]->length; - ref = cram_get_ref(fd, i, 1, rlen); - if (NULL == ref) { - if (fd->embed_ref == -1) { - // auto embed-ref - hts_log_warning("No M5 tags present and could not " - "find reference"); - hts_log_warning("Enabling embed_ref=2 option"); - hts_log_warning("NOTE: the CRAM file will be bigger " - "than using an external reference"); - pthread_mutex_lock(&fd->ref_lock); - fd->embed_ref = 2; - pthread_mutex_unlock(&fd->ref_lock); - break; - } - return -1; - } - rlen = fd->refs->ref_id[i]->length; /* In case it just loaded */ - if (!(md5 = hts_md5_init())) - return -1; - hts_md5_update(md5, ref, rlen); - hts_md5_final(buf, md5); - hts_md5_destroy(md5); - cram_ref_decr(fd->refs, i); - - hts_md5_hex(buf2, buf); - fd->refs->ref_id[i]->validated_md5 = 1; - if (sam_hdr_update_line(hdr, "SQ", "SN", hdr->hrecs->ref[i].name, "M5", buf2, NULL)) - return -1; - } - - if (fd->ref_fn) { - char ref_fn[PATH_MAX]; - full_path(ref_fn, fd->ref_fn); - if (sam_hdr_update_line(hdr, "SQ", "SN", hdr->hrecs->ref[i].name, "UR", ref_fn, NULL)) - return -1; - } - } - } - - /* Length */ - header_len = sam_hdr_length(hdr); - if (header_len > INT32_MAX) { - hts_log_error("Header is too long for CRAM format"); - return -1; - } - if (CRAM_MAJOR_VERS(fd->version) == 1) { - if (-1 == int32_encode(fd, header_len)) - return -1; - - /* Text data */ - if (header_len != hwrite(fd->fp, sam_hdr_str(hdr), header_len)) - return -1; - } else { - /* Create block(s) inside a container */ - cram_block *b = cram_new_block(FILE_HEADER, 0); - cram_container *c = cram_new_container(0, 0); - int padded_length; - char *pads; - int is_cram_3 = (CRAM_MAJOR_VERS(fd->version) >= 3); - - if (!b || !c) { - if (b) cram_free_block(b); - if (c) cram_free_container(c); - return -1; - } - - if (int32_put_blk(b, header_len) < 0) - return -1; - if (header_len) - BLOCK_APPEND(b, sam_hdr_str(hdr), header_len); - BLOCK_UPLEN(b); - - // Compress header block if V3.0 and above - if (CRAM_MAJOR_VERS(fd->version) >= 3) - if (cram_compress_block(fd, b, NULL, -1, -1) < 0) - return -1; - - if (blank_block) { - c->length = b->comp_size + 2 + 4*is_cram_3 + - fd->vv.varint_size(b->content_id) + - fd->vv.varint_size(b->uncomp_size) + - fd->vv.varint_size(b->comp_size); - - c->num_blocks = 2; - c->num_landmarks = 2; - if (!(c->landmark = malloc(2*sizeof(*c->landmark)))) { - cram_free_block(b); - cram_free_container(c); - return -1; - } - c->landmark[0] = 0; - c->landmark[1] = c->length; - - // Plus extra storage for uncompressed secondary blank block - padded_length = MIN(c->length*.5, 10000); - c->length += padded_length + 2 + 4*is_cram_3 + - fd->vv.varint_size(b->content_id) + - fd->vv.varint_size(padded_length)*2; - } else { - // Pad the block instead. - c->num_blocks = 1; - c->num_landmarks = 1; - if (!(c->landmark = malloc(sizeof(*c->landmark)))) - return -1; - c->landmark[0] = 0; - - padded_length = MAX(c->length*1.5, 10000) - c->length; - - c->length = b->comp_size + padded_length + - 2 + 4*is_cram_3 + - fd->vv.varint_size(b->content_id) + - fd->vv.varint_size(b->uncomp_size) + - fd->vv.varint_size(b->comp_size); - - if (NULL == (pads = calloc(1, padded_length))) { - cram_free_block(b); - cram_free_container(c); - return -1; - } - BLOCK_APPEND(b, pads, padded_length); - BLOCK_UPLEN(b); - free(pads); - } - - if (-1 == cram_write_container(fd, c)) { - cram_free_block(b); - cram_free_container(c); - return -1; - } - - if (-1 == cram_write_block(fd, b)) { - cram_free_block(b); - cram_free_container(c); - return -1; - } - - if (blank_block) { - BLOCK_RESIZE(b, padded_length); - memset(BLOCK_DATA(b), 0, padded_length); - BLOCK_SIZE(b) = padded_length; - BLOCK_UPLEN(b); - b->method = RAW; - if (-1 == cram_write_block(fd, b)) { - cram_free_block(b); - cram_free_container(c); - return -1; - } - } - - cram_free_block(b); - cram_free_container(c); - } - - if (0 != hflush(fd->fp)) - return -1; - - RP("=== Finishing saving header ===\n"); - - return 0; - - block_err: - return -1; -} - -/* ---------------------------------------------------------------------- - * The top-level cram opening, closing and option handling - */ - -/* - * Sets CRAM variable sized integer decode function tables. - * CRAM 1, 2, and 3.x all used ITF8 for uint32 and UTF8 for uint64. - * CRAM 4.x uses the same encoding mechanism for 32-bit and 64-bit - * (or anything inbetween), but also now supports signed values. - * - * Version is the CRAM major version number. - * vv is the vector table (probably &cram_fd->vv) - */ -static void cram_init_varint(varint_vec *vv, int version) { - if (version >= 4) { - vv->varint_get32 = uint7_get_32; // FIXME: varint.h API should be size agnostic - vv->varint_get32s = sint7_get_32; - vv->varint_get64 = uint7_get_64; - vv->varint_get64s = sint7_get_64; - vv->varint_put32 = uint7_put_32; - vv->varint_put32s = sint7_put_32; - vv->varint_put64 = uint7_put_64; - vv->varint_put64s = sint7_put_64; - vv->varint_put32_blk = uint7_put_blk_32; - vv->varint_put32s_blk = sint7_put_blk_32; - vv->varint_put64_blk = uint7_put_blk_64; - vv->varint_put64s_blk = sint7_put_blk_64; - vv->varint_size = uint7_size; - vv->varint_decode32_crc = uint7_decode_crc32; - vv->varint_decode32s_crc = sint7_decode_crc32; - vv->varint_decode64_crc = uint7_decode_crc64; - } else { - vv->varint_get32 = safe_itf8_get; - vv->varint_get32s = safe_itf8_get; - vv->varint_get64 = safe_ltf8_get; - vv->varint_get64s = safe_ltf8_get; - vv->varint_put32 = safe_itf8_put; - vv->varint_put32s = safe_itf8_put; - vv->varint_put64 = safe_ltf8_put; - vv->varint_put64s = safe_ltf8_put; - vv->varint_put32_blk = itf8_put_blk; - vv->varint_put32s_blk = itf8_put_blk; - vv->varint_put64_blk = ltf8_put_blk; - vv->varint_put64s_blk = ltf8_put_blk; - vv->varint_size = itf8_size; - vv->varint_decode32_crc = itf8_decode_crc; - vv->varint_decode32s_crc = itf8_decode_crc; - vv->varint_decode64_crc = ltf8_decode_crc; - } -} - -/* - * Initialises the lookup tables. These could be global statics, but they're - * clumsy to setup in a multi-threaded environment unless we generate - * verbatim code and include that. - */ -static void cram_init_tables(cram_fd *fd) { - int i; - - memset(fd->L1, 4, 256); - fd->L1['A'] = 0; fd->L1['a'] = 0; - fd->L1['C'] = 1; fd->L1['c'] = 1; - fd->L1['G'] = 2; fd->L1['g'] = 2; - fd->L1['T'] = 3; fd->L1['t'] = 3; - - memset(fd->L2, 5, 256); - fd->L2['A'] = 0; fd->L2['a'] = 0; - fd->L2['C'] = 1; fd->L2['c'] = 1; - fd->L2['G'] = 2; fd->L2['g'] = 2; - fd->L2['T'] = 3; fd->L2['t'] = 3; - fd->L2['N'] = 4; fd->L2['n'] = 4; - - if (CRAM_MAJOR_VERS(fd->version) == 1) { - for (i = 0; i < 0x200; i++) { - int f = 0; - - if (i & CRAM_FPAIRED) f |= BAM_FPAIRED; - if (i & CRAM_FPROPER_PAIR) f |= BAM_FPROPER_PAIR; - if (i & CRAM_FUNMAP) f |= BAM_FUNMAP; - if (i & CRAM_FREVERSE) f |= BAM_FREVERSE; - if (i & CRAM_FREAD1) f |= BAM_FREAD1; - if (i & CRAM_FREAD2) f |= BAM_FREAD2; - if (i & CRAM_FSECONDARY) f |= BAM_FSECONDARY; - if (i & CRAM_FQCFAIL) f |= BAM_FQCFAIL; - if (i & CRAM_FDUP) f |= BAM_FDUP; - - fd->bam_flag_swap[i] = f; - } - - for (i = 0; i < 0x1000; i++) { - int g = 0; - - if (i & BAM_FPAIRED) g |= CRAM_FPAIRED; - if (i & BAM_FPROPER_PAIR) g |= CRAM_FPROPER_PAIR; - if (i & BAM_FUNMAP) g |= CRAM_FUNMAP; - if (i & BAM_FREVERSE) g |= CRAM_FREVERSE; - if (i & BAM_FREAD1) g |= CRAM_FREAD1; - if (i & BAM_FREAD2) g |= CRAM_FREAD2; - if (i & BAM_FSECONDARY) g |= CRAM_FSECONDARY; - if (i & BAM_FQCFAIL) g |= CRAM_FQCFAIL; - if (i & BAM_FDUP) g |= CRAM_FDUP; - - fd->cram_flag_swap[i] = g; - } - } else { - /* NOP */ - for (i = 0; i < 0x1000; i++) - fd->bam_flag_swap[i] = i; - for (i = 0; i < 0x1000; i++) - fd->cram_flag_swap[i] = i; - } - - memset(fd->cram_sub_matrix, 4, 32*32); - for (i = 0; i < 32; i++) { - fd->cram_sub_matrix[i]['A'&0x1f]=0; - fd->cram_sub_matrix[i]['C'&0x1f]=1; - fd->cram_sub_matrix[i]['G'&0x1f]=2; - fd->cram_sub_matrix[i]['T'&0x1f]=3; - fd->cram_sub_matrix[i]['N'&0x1f]=4; - } - for (i = 0; i < 20; i+=4) { - int j; - for (j = 0; j < 20; j++) { - fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][j]=3; - fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][j]=3; - fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][j]=3; - fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][j]=3; - } - fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][CRAM_SUBST_MATRIX[i+0]&0x1f]=0; - fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][CRAM_SUBST_MATRIX[i+1]&0x1f]=1; - fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][CRAM_SUBST_MATRIX[i+2]&0x1f]=2; - fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][CRAM_SUBST_MATRIX[i+3]&0x1f]=3; - } - - cram_init_varint(&fd->vv, CRAM_MAJOR_VERS(fd->version)); -} - -// Default version numbers for CRAM -static int major_version = 3; -static int minor_version = 0; - -/* - * Opens a CRAM file for read (mode "rb") or write ("wb"). - * The filename may be "-" to indicate stdin or stdout. - * - * Returns file handle on success - * NULL on failure. - */ -cram_fd *cram_open(const char *filename, const char *mode) { - hFILE *fp; - cram_fd *fd; - char fmode[3]= { mode[0], '\0', '\0' }; - - if (strlen(mode) > 1 && (mode[1] == 'b' || mode[1] == 'c')) { - fmode[1] = 'b'; - } - - fp = hopen(filename, fmode); - if (!fp) - return NULL; - - fd = cram_dopen(fp, filename, mode); - if (!fd) - hclose_abruptly(fp); - - return fd; -} - -/* Opens an existing stream for reading or writing. - * - * Returns file handle on success; - * NULL on failure. - */ -cram_fd *cram_dopen(hFILE *fp, const char *filename, const char *mode) { - int i; - char *cp; - cram_fd *fd = calloc(1, sizeof(*fd)); - if (!fd) - return NULL; - - fd->level = CRAM_DEFAULT_LEVEL; - for (i = 0; mode[i]; i++) { - if (mode[i] >= '0' && mode[i] <= '9') { - fd->level = mode[i] - '0'; - break; - } - } - - fd->fp = fp; - fd->mode = *mode; - fd->first_container = 0; - fd->curr_position = 0; - - if (fd->mode == 'r') { - /* Reader */ - - if (!(fd->file_def = cram_read_file_def(fd))) - goto err; - - fd->version = fd->file_def->major_version * 256 + - fd->file_def->minor_version; - - cram_init_tables(fd); - - if (!(fd->header = cram_read_SAM_hdr(fd))) { - cram_free_file_def(fd->file_def); - goto err; - } - - } else { - /* Writer */ - cram_file_def *def = calloc(1, sizeof(*def)); - if (!def) - return NULL; - - fd->file_def = def; - - def->magic[0] = 'C'; - def->magic[1] = 'R'; - def->magic[2] = 'A'; - def->magic[3] = 'M'; - def->major_version = 0; // Indicator to write file def later. - def->minor_version = 0; - memset(def->file_id, 0, 20); - strncpy(def->file_id, filename, 20); - - fd->version = major_version * 256 + minor_version; - cram_init_tables(fd); - - /* SAM header written later along with this file_def */ - } - - fd->prefix = strdup((cp = strrchr(filename, '/')) ? cp+1 : filename); - if (!fd->prefix) - goto err; - fd->first_base = fd->last_base = -1; - fd->record_counter = 0; - - fd->ctr = NULL; - fd->ctr_mt = NULL; - fd->refs = refs_create(); - if (!fd->refs) - goto err; - fd->ref_id = -2; - fd->ref = NULL; - - fd->decode_md = 0; - fd->seqs_per_slice = SEQS_PER_SLICE; - fd->bases_per_slice = BASES_PER_SLICE; - fd->slices_per_container = SLICE_PER_CNT; - fd->embed_ref = -1; // automatic selection - fd->no_ref = 0; - fd->ap_delta = 0; - fd->ignore_md5 = 0; - fd->lossy_read_names = 0; - fd->use_bz2 = 0; - fd->use_rans = (CRAM_MAJOR_VERS(fd->version) >= 3); - fd->use_tok = (CRAM_MAJOR_VERS(fd->version) >= 3) && (CRAM_MINOR_VERS(fd->version) >= 1); - fd->use_lzma = 0; - fd->multi_seq = -1; - fd->multi_seq_user = -1; - fd->unsorted = 0; - fd->shared_ref = 0; - fd->store_md = 0; - fd->store_nm = 0; - fd->last_RI_count = 0; - - fd->index = NULL; - fd->own_pool = 0; - fd->pool = NULL; - fd->rqueue = NULL; - fd->job_pending = NULL; - fd->ooc = 0; - fd->required_fields = INT_MAX; - - for (i = 0; i < DS_END; i++) { - fd->m[i] = cram_new_metrics(); - if (!fd->m[i]) - goto err; - } - - if (!(fd->tags_used = kh_init(m_metrics))) - goto err; - - fd->range.refid = -2; // no ref. - fd->eof = 1; // See samtools issue #150 - fd->ref_fn = NULL; - - fd->bl = NULL; - - /* Initialise dummy refs from the @SQ headers */ - if (-1 == refs_from_header(fd)) - goto err; - - return fd; - - err: - if (fd) - free(fd); - - return NULL; -} - -/* - * Seek within a CRAM file. - * - * Returns 0 on success - * -1 on failure - */ -int cram_seek(cram_fd *fd, off_t offset, int whence) { - char buf[65536]; - - fd->ooc = 0; - - cram_drain_rqueue(fd); - - if (hseek(fd->fp, offset, whence) >= 0) { - return 0; - } - - if (!(whence == SEEK_CUR && offset >= 0)) - return -1; - - /* Couldn't fseek, but we're in SEEK_CUR mode so read instead */ - while (offset > 0) { - int len = MIN(65536, offset); - if (len != hread(fd->fp, buf, len)) - return -1; - offset -= len; - } - - return 0; -} - -/* - * Flushes a CRAM file. - * Useful for when writing to stdout without wishing to close the stream. - * - * Returns 0 on success - * -1 on failure - */ -int cram_flush(cram_fd *fd) { - if (!fd) - return -1; - - if (fd->mode == 'w' && fd->ctr) { - if(fd->ctr->slice) - cram_update_curr_slice(fd->ctr, fd->version); - - if (-1 == cram_flush_container_mt(fd, fd->ctr)) - return -1; - } - - return 0; -} - -/* - * Writes an EOF block to a CRAM file. - * - * Returns 0 on success - * -1 on failure - */ -int cram_write_eof_block(cram_fd *fd) { - // EOF block is a container with special values to aid detection - if (CRAM_MAJOR_VERS(fd->version) >= 2) { - // Empty container with - // ref_seq_id -1 - // start pos 0x454f46 ("EOF") - // span 0 - // nrec 0 - // counter 0 - // nbases 0 - // 1 block (landmark 0) - // (CRC32) - cram_container c; - memset(&c, 0, sizeof(c)); - c.ref_seq_id = -1; - c.ref_seq_start = 0x454f46; // "EOF" - c.ref_seq_span = 0; - c.record_counter = 0; - c.num_bases = 0; - c.num_blocks = 1; - int32_t land[1] = {0}; - c.landmark = land; - - // An empty compression header block with - // method raw (0) - // type comp header (1) - // content id 0 - // block contents size 6 - // raw size 6 - // empty preservation map (01 00) - // empty data series map (01 00) - // empty tag map (01 00) - // block CRC - cram_block_compression_hdr ch; - memset(&ch, 0, sizeof(ch)); - c.comp_hdr_block = cram_encode_compression_header(fd, &c, &ch, 0); - - c.length = c.comp_hdr_block->byte // Landmark[0] - + 5 // block struct - + 4*(CRAM_MAJOR_VERS(fd->version) >= 3); // CRC - if (cram_write_container(fd, &c) < 0 || - cram_write_block(fd, c.comp_hdr_block) < 0) { - cram_close(fd); - cram_free_block(c.comp_hdr_block); - return -1; - } - if (ch.preservation_map) - kh_destroy(map, ch.preservation_map); - cram_free_block(c.comp_hdr_block); - - // V2.1 bytes - // 0b 00 00 00 ff ff ff ff 0f // Cont HDR: size, ref seq id - // e0 45 4f 46 00 00 00 // Cont HDR: pos, span, nrec, counter - // 00 01 00 // Cont HDR: nbase, nblk, landmark - // 00 01 00 06 06 // Comp.HDR blk - // 01 00 01 00 01 00 // Comp.HDR blk - - // V3.0 bytes: - // 0f 00 00 00 ff ff ff ff 0f // Cont HDR: size, ref seq id - // e0 45 4f 46 00 00 00 // Cont HDR: pos, span, nrec, counter - // 00 01 00 // Cont HDR: nbase, nblk, landmark - // 05 bd d9 4f // CRC32 - // 00 01 00 06 06 // Comp.HDR blk - // 01 00 01 00 01 00 // Comp.HDR blk - // ee 63 01 4b // CRC32 - - // V4.0 bytes: - // 0f 00 00 00 8f ff ff ff // Cont HDR: size, ref seq id - // 82 95 9e 46 00 00 00 // Cont HDR: pos, span, nrec, counter - // 00 01 00 // Cont HDR: nbase, nblk, landmark - // ac d6 05 bc // CRC32 - // 00 01 00 06 06 // Comp.HDR blk - // 01 00 01 00 01 00 // Comp.HDR blk - // ee 63 01 4b // CRC32 - } - - return 0; -} -/* - * Closes a CRAM file. - * Returns 0 on success - * -1 on failure - */ -int cram_close(cram_fd *fd) { - spare_bams *bl, *next; - int i; - - if (!fd) - return -1; - - if (fd->mode == 'w' && fd->ctr) { - if(fd->ctr->slice) - cram_update_curr_slice(fd->ctr, fd->version); - - if (-1 == cram_flush_container_mt(fd, fd->ctr)) - return -1; - } - - if (fd->mode != 'w') - cram_drain_rqueue(fd); - - if (fd->pool && fd->eof >= 0 && fd->rqueue) { - hts_tpool_process_flush(fd->rqueue); - - if (0 != cram_flush_result(fd)) - return -1; - - if (fd->mode == 'w') - fd->ctr = NULL; // prevent double freeing - - pthread_mutex_destroy(&fd->metrics_lock); - pthread_mutex_destroy(&fd->ref_lock); - pthread_mutex_destroy(&fd->bam_list_lock); - - //fprintf(stderr, "CRAM: destroy queue %p\n", fd->rqueue); - - hts_tpool_process_destroy(fd->rqueue); - } - - if (fd->mode == 'w') { - /* Write EOF block */ - if (0 != cram_write_eof_block(fd)) - return -1; - } - - for (bl = fd->bl; bl; bl = next) { - int i, max_rec = fd->seqs_per_slice * fd->slices_per_container; - - next = bl->next; - for (i = 0; i < max_rec; i++) { - if (bl->bams[i]) - bam_free(bl->bams[i]); - } - free(bl->bams); - free(bl); - } - - if (hclose(fd->fp) != 0) - return -1; - - if (fd->file_def) - cram_free_file_def(fd->file_def); - - if (fd->header) - sam_hdr_destroy(fd->header); - - free(fd->prefix); - - if (fd->ctr) - cram_free_container(fd->ctr); - - if (fd->ctr_mt && fd->ctr_mt != fd->ctr) - cram_free_container(fd->ctr_mt); - - if (fd->refs) - refs_free(fd->refs); - if (fd->ref_free) - free(fd->ref_free); - - for (i = 0; i < DS_END; i++) - if (fd->m[i]) - free(fd->m[i]); - - if (fd->tags_used) { - khint_t k; - - for (k = kh_begin(fd->tags_used); k != kh_end(fd->tags_used); k++) { - if (kh_exist(fd->tags_used, k)) - free(kh_val(fd->tags_used, k)); - } - - kh_destroy(m_metrics, fd->tags_used); - } - - if (fd->index) - cram_index_free(fd); - - if (fd->own_pool && fd->pool) - hts_tpool_destroy(fd->pool); - - if (fd->idxfp) - if (bgzf_close(fd->idxfp) < 0) - return -1; - - free(fd); - return 0; -} - -/* - * Returns 1 if we hit an EOF while reading. - */ -int cram_eof(cram_fd *fd) { - return fd->eof; -} - - -/* - * Sets options on the cram_fd. See CRAM_OPT_* definitions in cram_structs.h. - * Use this immediately after opening. - * - * Returns 0 on success - * -1 on failure - */ -int cram_set_option(cram_fd *fd, enum hts_fmt_option opt, ...) { - int r; - va_list args; - - va_start(args, opt); - r = cram_set_voption(fd, opt, args); - va_end(args); - - return r; -} - -/* - * Sets options on the cram_fd. See CRAM_OPT_* definitions in cram_structs.h. - * Use this immediately after opening. - * - * Returns 0 on success - * -1 on failure - */ -int cram_set_voption(cram_fd *fd, enum hts_fmt_option opt, va_list args) { - refs_t *refs; - - if (!fd) { - errno = EBADF; - return -1; - } - - switch (opt) { - case CRAM_OPT_DECODE_MD: - fd->decode_md = va_arg(args, int); - break; - - case CRAM_OPT_PREFIX: - if (fd->prefix) - free(fd->prefix); - if (!(fd->prefix = strdup(va_arg(args, char *)))) - return -1; - break; - - case CRAM_OPT_VERBOSITY: - break; - - case CRAM_OPT_SEQS_PER_SLICE: - fd->seqs_per_slice = va_arg(args, int); - if (fd->bases_per_slice == BASES_PER_SLICE) - fd->bases_per_slice = fd->seqs_per_slice * 500; - break; - - case CRAM_OPT_BASES_PER_SLICE: - fd->bases_per_slice = va_arg(args, int); - break; - - case CRAM_OPT_SLICES_PER_CONTAINER: - fd->slices_per_container = va_arg(args, int); - break; - - case CRAM_OPT_EMBED_REF: - fd->embed_ref = va_arg(args, int); - break; - - case CRAM_OPT_NO_REF: - fd->no_ref = va_arg(args, int); - break; - - case CRAM_OPT_POS_DELTA: - fd->ap_delta = va_arg(args, int); - break; - - case CRAM_OPT_IGNORE_MD5: - fd->ignore_md5 = va_arg(args, int); - break; - - case CRAM_OPT_LOSSY_NAMES: - fd->lossy_read_names = va_arg(args, int); - // Currently lossy read names required paired (attached) reads. - // TLEN 0 or being 1 out causes read pairs to be detached, breaking - // the lossy read name compression, so we have extra options to - // slacken the exact TLEN round-trip checks. - fd->tlen_approx = fd->lossy_read_names; - fd->tlen_zero = fd->lossy_read_names; - break; - - case CRAM_OPT_USE_BZIP2: - fd->use_bz2 = va_arg(args, int); - break; - - case CRAM_OPT_USE_RANS: - fd->use_rans = va_arg(args, int); - break; - - case CRAM_OPT_USE_TOK: - fd->use_tok = va_arg(args, int); - break; - - case CRAM_OPT_USE_FQZ: - fd->use_fqz = va_arg(args, int); - break; - - case CRAM_OPT_USE_ARITH: - fd->use_arith = va_arg(args, int); - break; - - case CRAM_OPT_USE_LZMA: - fd->use_lzma = va_arg(args, int); - break; - - case CRAM_OPT_SHARED_REF: - fd->shared_ref = 1; - refs = va_arg(args, refs_t *); - if (refs != fd->refs) { - if (fd->refs) - refs_free(fd->refs); - fd->refs = refs; - fd->refs->count++; - } - break; - - case CRAM_OPT_RANGE: { - int r = cram_seek_to_refpos(fd, va_arg(args, cram_range *)); - pthread_mutex_lock(&fd->range_lock); - if (fd->range.refid != -2) - fd->required_fields |= SAM_POS; - pthread_mutex_unlock(&fd->range_lock); - return r; - } - - case CRAM_OPT_RANGE_NOSEEK: { - // As per CRAM_OPT_RANGE, but no seeking - pthread_mutex_lock(&fd->range_lock); - cram_range *r = va_arg(args, cram_range *); - fd->range = *r; - if (r->refid == HTS_IDX_NOCOOR) { - fd->range.refid = -1; - fd->range.start = 0; - } else if (r->refid == HTS_IDX_START || r->refid == HTS_IDX_REST) { - fd->range.refid = -2; // special case in cram_next_slice - } - if (fd->range.refid != -2) - fd->required_fields |= SAM_POS; - fd->ooc = 0; - fd->eof = 0; - pthread_mutex_unlock(&fd->range_lock); - return 0; - } - - case CRAM_OPT_REFERENCE: - return cram_load_reference(fd, va_arg(args, char *)); - - case CRAM_OPT_VERSION: { - int major, minor; - char *s = va_arg(args, char *); - if (2 != sscanf(s, "%d.%d", &major, &minor)) { - hts_log_error("Malformed version string %s", s); - return -1; - } - if (!((major == 1 && minor == 0) || - (major == 2 && (minor == 0 || minor == 1)) || - (major == 3 && (minor == 0 || minor == 1)) || - (major == 4 && minor == 0))) { - hts_log_error("Unknown version string; use 1.0, 2.0, 2.1, 3.0, 3.1 or 4.0"); - errno = EINVAL; - return -1; - } - - if (major > 3 || (major == 3 && minor > 1)) { - hts_log_warning( - "CRAM version %s is still a draft and subject to change.\n" - "This is a technology demonstration that should not be " - "used for archival data.", s); - } - - fd->version = major*256 + minor; - - fd->use_rans = (CRAM_MAJOR_VERS(fd->version) >= 3) ? 1 : 0; - - fd->use_tok = ((CRAM_MAJOR_VERS(fd->version) == 3 && - CRAM_MINOR_VERS(fd->version) >= 1) || - CRAM_MAJOR_VERS(fd->version) >= 4) ? 1 : 0; - cram_init_tables(fd); - - break; - } - - case CRAM_OPT_MULTI_SEQ_PER_SLICE: - fd->multi_seq_user = fd->multi_seq = va_arg(args, int); - break; - - case CRAM_OPT_NTHREADS: { - int nthreads = va_arg(args, int); - if (nthreads >= 1) { - if (!(fd->pool = hts_tpool_init(nthreads))) - return -1; - - fd->rqueue = hts_tpool_process_init(fd->pool, nthreads*2, 0); - pthread_mutex_init(&fd->metrics_lock, NULL); - pthread_mutex_init(&fd->ref_lock, NULL); - pthread_mutex_init(&fd->range_lock, NULL); - pthread_mutex_init(&fd->bam_list_lock, NULL); - fd->shared_ref = 1; - fd->own_pool = 1; - } - break; - } - - case CRAM_OPT_THREAD_POOL: { - htsThreadPool *p = va_arg(args, htsThreadPool *); - fd->pool = p ? p->pool : NULL; - if (fd->pool) { - fd->rqueue = hts_tpool_process_init(fd->pool, - p->qsize ? p->qsize : hts_tpool_size(fd->pool)*2, - 0); - pthread_mutex_init(&fd->metrics_lock, NULL); - pthread_mutex_init(&fd->ref_lock, NULL); - pthread_mutex_init(&fd->range_lock, NULL); - pthread_mutex_init(&fd->bam_list_lock, NULL); - } - fd->shared_ref = 1; // Needed to avoid clobbering ref between threads - fd->own_pool = 0; - - //fd->qsize = 1; - //fd->decoded = calloc(fd->qsize, sizeof(cram_container *)); - //hts_tpool_dispatch(fd->pool, cram_decoder_thread, fd); - break; - } - - case CRAM_OPT_REQUIRED_FIELDS: - fd->required_fields = va_arg(args, int); - if (fd->range.refid != -2) - fd->required_fields |= SAM_POS; - break; - - case CRAM_OPT_STORE_MD: - fd->store_md = va_arg(args, int); - break; - - case CRAM_OPT_STORE_NM: - fd->store_nm = va_arg(args, int); - break; - - case HTS_OPT_COMPRESSION_LEVEL: - fd->level = va_arg(args, int); - break; - - case HTS_OPT_PROFILE: { - enum hts_profile_option prof = va_arg(args, int); - switch (prof) { - case HTS_PROFILE_FAST: - if (fd->level == CRAM_DEFAULT_LEVEL) fd->level = 1; - fd->use_tok = 0; - fd->seqs_per_slice = 10000; - break; - - case HTS_PROFILE_NORMAL: - break; - - case HTS_PROFILE_SMALL: - if (fd->level == CRAM_DEFAULT_LEVEL) fd->level = 6; - fd->use_bz2 = 1; - fd->use_fqz = 1; - fd->seqs_per_slice = 25000; - break; - - case HTS_PROFILE_ARCHIVE: - if (fd->level == CRAM_DEFAULT_LEVEL) fd->level = 7; - fd->use_bz2 = 1; - fd->use_fqz = 1; - fd->use_arith = 1; - if (fd->level > 7) - fd->use_lzma = 1; - fd->seqs_per_slice = 100000; - break; - } - - if (fd->bases_per_slice == BASES_PER_SLICE) - fd->bases_per_slice = fd->seqs_per_slice * 500; - break; - } - - default: - hts_log_error("Unknown CRAM option code %d", opt); - errno = EINVAL; - return -1; - } - - return 0; -} - -int cram_check_EOF(cram_fd *fd) -{ - // Byte 9 in these templates is & with 0x0f to resolve differences - // between ITF-8 interpretations between early Java and C - // implementations of CRAM - static const unsigned char TEMPLATE_2_1[30] = { - 0x0b, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x0f, 0xe0, - 0x45, 0x4f, 0x46, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, - 0x01, 0x00, 0x06, 0x06, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00 - }; - static const unsigned char TEMPLATE_3[38] = { - 0x0f, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x0f, 0xe0, - 0x45, 0x4f, 0x46, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x05, - 0xbd, 0xd9, 0x4f, 0x00, 0x01, 0x00, 0x06, 0x06, 0x01, 0x00, - 0x01, 0x00, 0x01, 0x00, 0xee, 0x63, 0x01, 0x4b - }; - - unsigned char buf[38]; // max(sizeof TEMPLATE_*) - - uint8_t major = CRAM_MAJOR_VERS(fd->version); - uint8_t minor = CRAM_MINOR_VERS(fd->version); - - const unsigned char *template; - ssize_t template_len; - if ((major < 2) || - (major == 2 && minor == 0)) { - return 3; // No EOF support in cram versions less than 2.1 - } else if (major == 2 && minor == 1) { - template = TEMPLATE_2_1; - template_len = sizeof TEMPLATE_2_1; - } else { - template = TEMPLATE_3; - template_len = sizeof TEMPLATE_3; - } - - off_t offset = htell(fd->fp); - if (hseek(fd->fp, -template_len, SEEK_END) < 0) { - if (errno == ESPIPE) { - hclearerr(fd->fp); - return 2; - } - else { - return -1; - } - } - if (hread(fd->fp, buf, template_len) != template_len) return -1; - if (hseek(fd->fp, offset, SEEK_SET) < 0) return -1; - buf[8] &= 0x0f; - return (memcmp(template, buf, template_len) == 0)? 1 : 0; -} diff --git a/src/htslib-1.18/cram/cram_io.h b/src/htslib-1.18/cram/cram_io.h deleted file mode 100644 index 53ae30f..0000000 --- a/src/htslib-1.18/cram/cram_io.h +++ /dev/null @@ -1,650 +0,0 @@ -/* -Copyright (c) 2012-2020 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/*! \file - * Include cram.h instead. - * - * This is an internal part of the CRAM system and is automatically included - * when you #include cram.h. - * - * Implements the low level CRAM I/O primitives. - * This includes basic data types such as byte, int, ITF-8, - * maps, bitwise I/O, etc. - */ - -#ifndef CRAM_IO_H -#define CRAM_IO_H - -#include - -#include "misc.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/**@{ ---------------------------------------------------------------------- - * ITF8 encoding and decoding. - * - * Also see the itf8_get and itf8_put macros. - */ - -/*! INTERNAL: Converts two characters into an integer for use in switch{} */ -#define CRAM_KEY(a,b) ((((unsigned char) a)<<8)|(((unsigned char) b))) - -/*! Reads an integer in ITF-8 encoding from 'fd' and stores it in - * *val. - * - * @return - * Returns the number of bytes read on success; - * -1 on failure - */ -int itf8_decode(cram_fd *fd, int32_t *val); - -extern const int itf8_bytes[16]; -extern const int ltf8_bytes[256]; - -/*! Pushes a value in ITF8 format onto the end of a block. - * - * This shouldn't be used for high-volume data as it is not the fastest - * method. - * - * @return - * Returns the number of bytes written - */ -int itf8_put_blk(cram_block *blk, int32_t val); -int ltf8_put_blk(cram_block *blk, int64_t val); - -/*! Pulls a literal 32-bit value from a block. - * - * @returns the number of bytes decoded; - * -1 on failure. - */ -int int32_get_blk(cram_block *b, int32_t *val); - -/*! Pushes a literal 32-bit value onto the end of a block. - * - * @return - * Returns 0 on success; - * -1 on failure. - */ -int int32_put_blk(cram_block *blk, int32_t val); - - -/**@}*/ -/**@{ ---------------------------------------------------------------------- - * CRAM blocks - the dynamically growable data block. We have code to - * create, update, (un)compress and read/write. - * - * These are derived from the deflate_interlaced.c blocks, but with the - * CRAM extension of content types and IDs. - */ - -/*! Allocates a new cram_block structure with a specified content_type and - * id. - * - * @return - * Returns block pointer on success; - * NULL on failure - */ -cram_block *cram_new_block(enum cram_content_type content_type, - int content_id); - -/*! Reads a block from a cram file. - * - * @return - * Returns cram_block pointer on success; - * NULL on failure - */ -cram_block *cram_read_block(cram_fd *fd); - -/*! Writes a CRAM block. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_write_block(cram_fd *fd, cram_block *b); - -/*! Frees a CRAM block, deallocating internal data too. - */ -void cram_free_block(cram_block *b); - -/*! Uncompress a memory block using Zlib. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -char *zlib_mem_inflate(char *cdata, size_t csize, size_t *size); - -/*! Uncompresses a CRAM block, if compressed. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_uncompress_block(cram_block *b); - -/*! Compresses a block. - * - * Compresses a block using one of two different zlib strategies. If we only - * want one choice set strat2 to be -1. - * - * The logic here is that sometimes Z_RLE does a better job than Z_FILTERED - * or Z_DEFAULT_STRATEGY on quality data. If so, we'd rather use it as it is - * significantly faster. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_compress_block(cram_fd *fd, cram_block *b, cram_metrics *metrics, - int method, int level); -int cram_compress_block2(cram_fd *fd, cram_slice *s, - cram_block *b, cram_metrics *metrics, - int method, int level); - -cram_metrics *cram_new_metrics(void); -char *cram_block_method2str(enum cram_block_method_int m); -char *cram_content_type2str(enum cram_content_type t); - -/* - * Find an external block by its content_id - */ - -static inline cram_block *cram_get_block_by_id(cram_slice *slice, int id) { - //fprintf(stderr, "%d\t%p\n", id, slice->block_by_id); - uint32_t v = id; - if (slice->block_by_id && v < 256) { - return slice->block_by_id[v]; - } else { - v = 256 + v % 251; - if (slice->block_by_id && - slice->block_by_id[v] && - slice->block_by_id[v]->content_id == id) - return slice->block_by_id[v]; - - // Otherwise a linear search in case of collision - int i; - for (i = 0; i < slice->hdr->num_blocks; i++) { - cram_block *b = slice->block[i]; - if (b && b->content_type == EXTERNAL && b->content_id == id) - return b; - } - } - return NULL; -} - -/* --- Accessor macros for manipulating blocks on a byte by byte basis --- */ - -/* Block size and data pointer. */ -#define BLOCK_SIZE(b) ((b)->byte) -#define BLOCK_DATA(b) ((b)->data) - -/* Returns the address one past the end of the block */ -#define BLOCK_END(b) (&(b)->data[(b)->byte]) - -/* Make block exactly 'l' bytes long */ -static inline int block_resize_exact(cram_block *b, size_t len) { - unsigned char *tmp = realloc(b->data, len); - if (!tmp) - return -1; - b->alloc = len; - b->data = tmp; - return 0; -} - -/* Request block to be at least 'l' bytes long */ -static inline int block_resize(cram_block *b, size_t len) { - if (b->alloc > len) - return 0; - - size_t alloc = b->alloc; - while (alloc <= len) - alloc = alloc ? alloc + (alloc>>2) : 1024; - - return block_resize_exact(b, alloc); -} - - -/* Ensure the block can hold at least another 'l' bytes */ -static inline int block_grow(cram_block *b, size_t len) { - return block_resize(b, BLOCK_SIZE(b) + len); -} - -/* Append string 's' of length 'l'. */ -static inline int block_append(cram_block *b, const void *s, size_t len) { - if (block_grow(b, len) < 0) - return -1; - - if (len) { - memcpy(BLOCK_END(b), s, len); - BLOCK_SIZE(b) += len; - } - - return 0; -} - -/* Append as single character 'c' */ -static inline int block_append_char(cram_block *b, char c) { - if (block_grow(b, 1) < 0) - return -1; - - b->data[b->byte++] = c; - return 0; -} - -/* Append a single unsigned integer */ -static inline unsigned char *append_uint32(unsigned char *cp, uint32_t i); -static inline int block_append_uint(cram_block *b, unsigned int i) { - if (block_grow(b, 11) < 0) - return -1; - - unsigned char *cp = &b->data[b->byte]; - b->byte += append_uint32(cp, i) - cp; - return 0; -} - -// Versions of above with built in goto block_err calls. -#define BLOCK_RESIZE_EXACT(b,l) if (block_resize_exact((b),(l))<0) goto block_err -#define BLOCK_RESIZE(b,l) if (block_resize((b),(l)) <0) goto block_err -#define BLOCK_GROW(b,l) if (block_grow((b),(l)) <0) goto block_err -#define BLOCK_APPEND(b,s,l) if (block_append((b),(s),(l)) <0) goto block_err -#define BLOCK_APPEND_CHAR(b,c) if (block_append_char((b),(c)) <0) goto block_err -#define BLOCK_APPEND_UINT(b,i) if (block_append_uint((b),(i)) <0) goto block_err - -static inline unsigned char *append_uint32(unsigned char *cp, uint32_t i) { - uint32_t j; - - if (i == 0) { - *cp++ = '0'; - return cp; - } - - if (i < 100) goto b1; - if (i < 10000) goto b3; - if (i < 1000000) goto b5; - if (i < 100000000) goto b7; - - if ((j = i / 1000000000)) {*cp++ = j + '0'; i -= j*1000000000; goto x8;} - if ((j = i / 100000000)) {*cp++ = j + '0'; i -= j*100000000; goto x7;} - b7:if ((j = i / 10000000)) {*cp++ = j + '0'; i -= j*10000000; goto x6;} - if ((j = i / 1000000)) {*cp++ = j + '0', i -= j*1000000; goto x5;} - b5:if ((j = i / 100000)) {*cp++ = j + '0', i -= j*100000; goto x4;} - if ((j = i / 10000)) {*cp++ = j + '0', i -= j*10000; goto x3;} - b3:if ((j = i / 1000)) {*cp++ = j + '0', i -= j*1000; goto x2;} - if ((j = i / 100)) {*cp++ = j + '0', i -= j*100; goto x1;} - b1:if ((j = i / 10)) {*cp++ = j + '0', i -= j*10; goto x0;} - if (i) *cp++ = i + '0'; - return cp; - - x8: *cp++ = i / 100000000 + '0', i %= 100000000; - x7: *cp++ = i / 10000000 + '0', i %= 10000000; - x6: *cp++ = i / 1000000 + '0', i %= 1000000; - x5: *cp++ = i / 100000 + '0', i %= 100000; - x4: *cp++ = i / 10000 + '0', i %= 10000; - x3: *cp++ = i / 1000 + '0', i %= 1000; - x2: *cp++ = i / 100 + '0', i %= 100; - x1: *cp++ = i / 10 + '0', i %= 10; - x0: *cp++ = i + '0'; - - return cp; -} - -static inline unsigned char *append_sub32(unsigned char *cp, uint32_t i) { - *cp++ = i / 100000000 + '0', i %= 100000000; - *cp++ = i / 10000000 + '0', i %= 10000000; - *cp++ = i / 1000000 + '0', i %= 1000000; - *cp++ = i / 100000 + '0', i %= 100000; - *cp++ = i / 10000 + '0', i %= 10000; - *cp++ = i / 1000 + '0', i %= 1000; - *cp++ = i / 100 + '0', i %= 100; - *cp++ = i / 10 + '0', i %= 10; - *cp++ = i + '0'; - - return cp; -} - -static inline unsigned char *append_uint64(unsigned char *cp, uint64_t i) { - uint64_t j; - - if (i <= 0xffffffff) - return append_uint32(cp, i); - - if ((j = i/1000000000) > 1000000000) { - cp = append_uint32(cp, j/1000000000); - j %= 1000000000; - cp = append_sub32(cp, j); - } else { - cp = append_uint32(cp, i / 1000000000); - } - cp = append_sub32(cp, i % 1000000000); - - return cp; -} - -#define BLOCK_UPLEN(b) \ - (b)->comp_size = (b)->uncomp_size = BLOCK_SIZE((b)) - -/**@}*/ -/**@{ ---------------------------------------------------------------------- - * Reference sequence handling - */ - -/*! Loads a reference set from fn and stores in the cram_fd. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_load_reference(cram_fd *fd, char *fn); - -/*! Generates a lookup table in refs based on the SQ headers in sam_hdr_t. - * - * Indexes references by the order they appear in a BAM file. This may not - * necessarily be the same order they appear in the fasta reference file. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int refs2id(refs_t *r, sam_hdr_t *hdr); - -void refs_free(refs_t *r); - -/*! Returns a portion of a reference sequence from start to end inclusive. - * - * The returned pointer is owned by the cram_file fd and should not be freed - * by the caller. It is valid only until the next cram_get_ref is called - * with the same fd parameter (so is thread-safe if given multiple files). - * - * To return the entire reference sequence, specify start as 1 and end - * as 0. - * - * @return - * Returns reference on success; - * NULL on failure - */ -char *cram_get_ref(cram_fd *fd, int id, int start, int end); -void cram_ref_incr(refs_t *r, int id); -void cram_ref_decr(refs_t *r, int id); -/**@}*/ -/**@{ ---------------------------------------------------------------------- - * Containers - */ - -/*! Creates a new container, specifying the maximum number of slices - * and records permitted. - * - * @return - * Returns cram_container ptr on success; - * NULL on failure - */ -cram_container *cram_new_container(int nrec, int nslice); -void cram_free_container(cram_container *c); - -/*! Reads a container header. - * - * @return - * Returns cram_container on success; - * NULL on failure or no container left (fd->err == 0). - */ -cram_container *cram_read_container(cram_fd *fd); - -/*! Writes a container structure. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_write_container(cram_fd *fd, cram_container *h); - -/*! Flushes a container to disk. - * - * Flushes a completely or partially full container to disk, writing - * container structure, header and blocks. This also calls the encoder - * functions. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_flush_container(cram_fd *fd, cram_container *c); -int cram_flush_container_mt(cram_fd *fd, cram_container *c); - - -/**@}*/ -/**@{ ---------------------------------------------------------------------- - * Compression headers; the first part of the container - */ - -/*! Creates a new blank container compression header - * - * @return - * Returns header ptr on success; - * NULL on failure - */ -cram_block_compression_hdr *cram_new_compression_header(void); - -/*! Frees a cram_block_compression_hdr */ -void cram_free_compression_header(cram_block_compression_hdr *hdr); - - -/**@}*/ -/**@{ ---------------------------------------------------------------------- - * Slices and slice headers - */ - -/*! Frees a slice header */ -void cram_free_slice_header(cram_block_slice_hdr *hdr); - -/*! Frees a slice */ -void cram_free_slice(cram_slice *s); - -/*! Creates a new empty slice in memory, for subsequent writing to - * disk. - * - * @return - * Returns cram_slice ptr on success; - * NULL on failure - */ -cram_slice *cram_new_slice(enum cram_content_type type, int nrecs); - -/*! Loads an entire slice. - * - * FIXME: In 1.0 the native unit of slices within CRAM is broken - * as slices contain references to objects in other slices. - * To work around this while keeping the slice oriented outer loop - * we read all slices and stitch them together into a fake large - * slice instead. - * - * @return - * Returns cram_slice ptr on success; - * NULL on failure - */ -cram_slice *cram_read_slice(cram_fd *fd); - - - -/**@}*/ -/**@{ ---------------------------------------------------------------------- - * CRAM file definition (header) - */ - -/*! Reads a CRAM file definition structure. - * - * @return - * Returns file_def ptr on success; - * NULL on failure - */ -cram_file_def *cram_read_file_def(cram_fd *fd); - -/*! Writes a cram_file_def structure to cram_fd. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_write_file_def(cram_fd *fd, cram_file_def *def); - -/*! Frees a cram_file_def structure. */ -void cram_free_file_def(cram_file_def *def); - - -/**@}*/ -/**@{ ---------------------------------------------------------------------- - * SAM header I/O - */ - -/*! Reads the SAM header from the first CRAM data block. - * - * Also performs minimal parsing to extract read-group - * and sample information. - * - * @return - * Returns SAM hdr ptr on success; - * NULL on failure - */ -sam_hdr_t *cram_read_SAM_hdr(cram_fd *fd); - -/*! Writes a CRAM SAM header. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_write_SAM_hdr(cram_fd *fd, sam_hdr_t *hdr); - - -/**@}*/ -/**@{ ---------------------------------------------------------------------- - * The top-level cram opening, closing and option handling - */ - -/*! Opens a CRAM file for read (mode "rb") or write ("wb"). - * - * The filename may be "-" to indicate stdin or stdout. - * - * @return - * Returns file handle on success; - * NULL on failure. - */ -cram_fd *cram_open(const char *filename, const char *mode); - -/*! Opens an existing stream for reading or writing. - * - * @return - * Returns file handle on success; - * NULL on failure. - */ -cram_fd *cram_dopen(struct hFILE *fp, const char *filename, const char *mode); - -/*! Closes a CRAM file. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_close(cram_fd *fd); - -/* - * Seek within a CRAM file. - * - * Returns 0 on success - * -1 on failure - */ -int cram_seek(cram_fd *fd, off_t offset, int whence); - -/* - * Flushes a CRAM file. - * Useful for when writing to stdout without wishing to close the stream. - * - * Returns 0 on success - * -1 on failure - */ -int cram_flush(cram_fd *fd); - -/*! Checks for end of file on a cram_fd stream. - * - * @return - * Returns 0 if not at end of file - * 1 if we hit an expected EOF (end of range or EOF block) - * 2 for other EOF (end of stream without EOF block) - */ -int cram_eof(cram_fd *fd); - -/*! Sets options on the cram_fd. - * - * See CRAM_OPT_* definitions in cram_structs.h. - * Use this immediately after opening. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_set_option(cram_fd *fd, enum hts_fmt_option opt, ...); - -/*! Sets options on the cram_fd. - * - * See CRAM_OPT_* definitions in cram_structs.h. - * Use this immediately after opening. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_set_voption(cram_fd *fd, enum hts_fmt_option opt, va_list args); - -/*! - * Attaches a header to a cram_fd. - * - * This should be used when creating a new cram_fd for writing where - * we have an sam_hdr_t already constructed (eg from a file we've read - * in). - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_set_header2(cram_fd *fd, const sam_hdr_t *hdr); - -/*! - * Returns the hFILE connected to a cram_fd. - */ -static inline struct hFILE *cram_hfile(cram_fd *fd) { - return fd->fp; -} - -#ifdef __cplusplus -} -#endif - -#endif /* CRAM_IO_H */ diff --git a/src/htslib-1.18/cram/cram_stats.c b/src/htslib-1.18/cram/cram_stats.c deleted file mode 100644 index 3ceda0d..0000000 --- a/src/htslib-1.18/cram/cram_stats.c +++ /dev/null @@ -1,226 +0,0 @@ -/* -Copyright (c) 2012-2014, 2016, 2018, 2020 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cram.h" -#include "os.h" - -cram_stats *cram_stats_create(void) { - return calloc(1, sizeof(cram_stats)); -} - -int cram_stats_add(cram_stats *st, int64_t val) { - st->nsamp++; - - //assert(val >= 0); - - if (val < MAX_STAT_VAL && val >= 0) { - st->freqs[val]++; - } else { - khint_t k; - int r; - - if (!st->h) { - st->h = kh_init(m_i2i); - if (!st->h) - return -1; - } - - k = kh_put(m_i2i, st->h, val, &r); - if (r == 0) - kh_val(st->h, k)++; - else if (r != -1) - kh_val(st->h, k) = 1; - else - return -1; - } - return 0; -} - -void cram_stats_del(cram_stats *st, int64_t val) { - st->nsamp--; - - //assert(val >= 0); - - if (val < MAX_STAT_VAL && val >= 0) { - st->freqs[val]--; - assert(st->freqs[val] >= 0); - } else if (st->h) { - khint_t k = kh_get(m_i2i, st->h, val); - - if (k != kh_end(st->h)) { - if (--kh_val(st->h, k) == 0) - kh_del(m_i2i, st->h, k); - } else { - hts_log_warning("Failed to remove val %"PRId64" from cram_stats", val); - st->nsamp++; - } - } else { - hts_log_warning("Failed to remove val %"PRId64" from cram_stats", val); - st->nsamp++; - } -} - -#if DEBUG_CRAM_STATS -void cram_stats_dump(cram_stats *st) { - int i; - fprintf(stderr, "cram_stats:\n"); - for (i = 0; i < MAX_STAT_VAL; i++) { - if (!st->freqs[i]) - continue; - fprintf(stderr, "\t%d\t%d\n", i, st->freqs[i]); - } - if (st->h) { - khint_t k; - for (k = kh_begin(st->h); k != kh_end(st->h); k++) { - if (!kh_exist(st->h, k)) - continue; - - fprintf(stderr, "\t%d\t%d\n", kh_key(st->h, k), kh_val(st->h, k)); - } - } -} -#endif - -/* - * Computes entropy from integer frequencies for various encoding methods and - * picks the best encoding. - * - * FIXME: we could reuse some of the code here for the actual encoding - * parameters too. Eg the best 'k' for SUBEXP or the code lengths for huffman. - * - * Returns the best codec to use. - */ -enum cram_encoding cram_stats_encoding(cram_fd *fd, cram_stats *st) { - int nvals, i, ntot = 0, max_val = 0, min_val = INT_MAX; - int *vals = NULL, *freqs = NULL, vals_alloc = 0; - -#if DEBUG_CRAM_STATS - cram_stats_dump(st); -#endif - - /* Count number of unique symbols */ - for (nvals = i = 0; i < MAX_STAT_VAL; i++) { - if (!st->freqs[i]) - continue; - if (nvals >= vals_alloc) { - vals_alloc = vals_alloc ? vals_alloc*2 : 1024; - int *vals_tmp = realloc(vals, vals_alloc * sizeof(int)); - int *freqs_tmp = realloc(freqs, vals_alloc * sizeof(int)); - if (!vals_tmp || !freqs_tmp) { - free(vals_tmp ? vals_tmp : vals); - free(freqs_tmp ? freqs_tmp : freqs); - return E_HUFFMAN; // Cannot do much else atm - } - vals = vals_tmp; - freqs = freqs_tmp; - } - vals[nvals] = i; - freqs[nvals] = st->freqs[i]; - ntot += freqs[nvals]; - if (max_val < i) max_val = i; - if (min_val > i) min_val = i; - nvals++; - } - if (st->h) { - khint_t k; - int i; - - for (k = kh_begin(st->h); k != kh_end(st->h); k++) { - if (!kh_exist(st->h, k)) - continue; - - if (nvals >= vals_alloc) { - vals_alloc = vals_alloc ? vals_alloc*2 : 1024; - int *vals_tmp = realloc(vals, vals_alloc * sizeof(int)); - int *freqs_tmp = realloc(freqs, vals_alloc * sizeof(int)); - if (!vals_tmp || !freqs_tmp) { - free(vals_tmp ? vals_tmp : vals); - free(freqs_tmp ? freqs_tmp : freqs); - return E_HUFFMAN; // Cannot do much else atm - } - vals = vals_tmp; - freqs = freqs_tmp; - } - i = kh_key(st->h, k); - vals[nvals]=i; - freqs[nvals] = kh_val(st->h, k); - ntot += freqs[nvals]; - if (max_val < i) max_val = i; - if (min_val > i) min_val = i; - nvals++; - } - } - - st->nvals = nvals; - st->min_val = min_val; - st->max_val = max_val; - assert(ntot == st->nsamp); - - free(vals); - free(freqs); - - /* - * Simple policy that everything is external unless it can be - * encoded using zero bits as a unary item huffman table. - */ - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - // Note, we're assuming integer data here as we don't have the - // type passed in. Cram_encoder_init does know the type and - // will convert to E_CONST_BYTE or E_EXTERNAL as appropriate. - if (nvals == 1) - return E_CONST_INT; - else if (nvals == 0 || min_val < 0) - return E_VARINT_SIGNED; - else - return E_VARINT_UNSIGNED; - } else { - return nvals <= 1 ? E_HUFFMAN : E_EXTERNAL; - } -} - -void cram_stats_free(cram_stats *st) { - if (st->h) - kh_destroy(m_i2i, st->h); - free(st); -} diff --git a/src/htslib-1.18/cram/cram_structs.h b/src/htslib-1.18/cram/cram_structs.h deleted file mode 100644 index 1606633..0000000 --- a/src/htslib-1.18/cram/cram_structs.h +++ /dev/null @@ -1,976 +0,0 @@ -/* -Copyright (c) 2012-2016, 2018-2020, 2023 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef HTSLIB_CRAM_STRUCTS_H -#define HTSLIB_CRAM_STRUCTS_H - -/* - * Defines in-memory structs for the basic file-format objects in the - * CRAM format. - * - * The basic file format is: - * File-def SAM-hdr Container Container ... - * - * Container: - * Service-block data-block data-block ... - * - * Multiple blocks in a container are grouped together as slices, - * also sometimes referred to as landmarks in the spec. - */ - - -#include -#include -#include - -#include "../htslib/thread_pool.h" -#include "../htslib/cram.h" -#include "string_alloc.h" -#include "mFILE.h" -#include "../htslib/khash.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Generic hash-map integer -> integer -KHASH_MAP_INIT_INT64(m_i2i, int) - -// Generic hash-set integer -> (existence) -KHASH_SET_INIT_INT(s_i2i) - -// For brevity -typedef unsigned char uc; - -/* - * A union for the preservation map. Required for khash. - */ -typedef union { - int i; - char *p; -} pmap_t; - -// Generates static functions here which isn't ideal, but we have no way -// currently to declare the kh_map_t structure here without also declaring a -// duplicate in the .c files due to the nature of the KHASH macros. -KHASH_MAP_INIT_STR(map, pmap_t) - -struct hFILE; - -#define SEQS_PER_SLICE 10000 -#define BASES_PER_SLICE (SEQS_PER_SLICE*500) -#define SLICE_PER_CNT 1 - -#define CRAM_SUBST_MATRIX "CGTNGTANCATNGCANACGT" - -#define MAX_STAT_VAL 1024 -//#define MAX_STAT_VAL 16 -typedef struct cram_stats { - int freqs[MAX_STAT_VAL]; - khash_t(m_i2i) *h; - int nsamp; // total number of values added - int nvals; // total number of unique values added - int64_t min_val, max_val; -} cram_stats; - -/* NB: matches java impl, not the spec */ -enum cram_encoding { - E_NULL = 0, - E_EXTERNAL = 1, // Only for BYTE type in CRAM 4 - E_GOLOMB = 2, // Not in CRAM 4 - E_HUFFMAN = 3, // Not in CRAM 4 - E_BYTE_ARRAY_LEN = 4, - E_BYTE_ARRAY_STOP = 5, - E_BETA = 6, // Not in CRAM 4 - E_SUBEXP = 7, // Not in CRAM 4 - E_GOLOMB_RICE = 8, // Not in CRAM 4 - E_GAMMA = 9, // Not in CRAM 4 - - // CRAM 4 specific codecs - E_VARINT_UNSIGNED = 41, // Specialisation of EXTERNAL - E_VARINT_SIGNED = 42, // Specialisation of EXTERNAL - E_CONST_BYTE = 43, // Alternative to HUFFMAN with 1 symbol - E_CONST_INT = 44, // Alternative to HUFFMAN with 1 symbol - - // More experimental ideas, not documented in spec yet - E_XHUFFMAN = 50, // To external block - E_XPACK = 51, // Transform to sub-codec - E_XRLE = 52, // Transform to sub-codec - E_XDELTA = 53, // Transform to sub-codec - - // Total number of codecs, not a real one. - E_NUM_CODECS, -}; - -enum cram_external_type { - E_INT = 1, - E_LONG = 2, - E_BYTE = 3, - E_BYTE_ARRAY = 4, - E_BYTE_ARRAY_BLOCK = 5, - E_SINT = 6, // signed INT - E_SLONG = 7, // signed LONG -}; - -/* External IDs used by this implementation (only assumed during writing) */ -enum cram_DS_ID { - DS_CORE = 0, - DS_aux = 1, // aux_blk - DS_aux_OQ = 2, - DS_aux_BQ = 3, - DS_aux_BD = 4, - DS_aux_BI = 5, - DS_aux_FZ = 6, // also ZM:B - DS_aux_oq = 7, // other qualities - DS_aux_os = 8, // other sequences - DS_aux_oz = 9, // other strings - DS_ref, - DS_RN, // name_blk - DS_QS, // qual_blk - DS_IN, // base_blk - DS_SC, // soft_blk - - DS_BF, // start loop - DS_CF, - DS_AP, - DS_RG, - DS_MQ, - DS_NS, - DS_MF, - DS_TS, - DS_NP, - DS_NF, - DS_RL, - DS_FN, - DS_FC, - DS_FP, - DS_DL, - DS_BA, - DS_BS, - DS_TL, - DS_RI, - DS_RS, - DS_PD, - DS_HC, - DS_BB, - DS_QQ, - - DS_TN, // end loop - - DS_RN_len, - DS_SC_len, - DS_BB_len, - DS_QQ_len, - - DS_TC, // CRAM v1.0 tags - DS_TM, // test - DS_TV, // test - - DS_END, -}; - -/* "File Definition Structure" */ -struct cram_file_def { - char magic[4]; - uint8_t major_version; - uint8_t minor_version; - char file_id[20] HTS_NONSTRING; // Filename or SHA1 checksum -}; - -#define CRAM_MAJOR_VERS(v) ((v) >> 8) -#define CRAM_MINOR_VERS(v) ((v) & 0xff) - -struct cram_slice; - -// Internal version of htslib/cram.h enum. -// Note these have to match the laout of methmap and methcost in -// cram_io.c:cram_compress_block2 -enum cram_block_method_int { - // Public methods as defined in the CRAM spec. - BM_ERROR = -1, - - // CRAM 2.x and 3.0 - RAW = 0, - GZIP = 1, - BZIP2 = 2, - LZMA = 3, - RANS = 4, RANS0 = RANS, - - // CRAM 3.1 onwards - RANSPR = 5, RANS_PR0 = RANSPR, - ARITH = 6, ARITH_PR0 = ARITH, - FQZ = 7, - TOK3 = 8, - // BSC = 9, ZSTD = 10 - - // Methods not externalised, but used in metrics. - // Externally they become one of the above methods. - GZIP_RLE = 11, - GZIP_1, // Z_DEFAULT_STRATEGY level 1, NB: not externalised in CRAM - - FQZ_b, FQZ_c, FQZ_d, // Various preset FQZ methods - - //RANS0, // Order 0 - RANS1, - - //RANS_PR0, // Order 0 - RANS_PR1, // Order 1 - RANS_PR64, // O0 + RLE - RANS_PR9, // O1 + X4 - RANS_PR128, // O0 + Pack - RANS_PR129, // O1 + Pack - RANS_PR192, // O0 + RLE + pack - RANS_PR193, // O1 + RLE + pack - - //TOK3, // tok+rans - TOKA, // tok+arith - - //ARITH_PR0, // Order 0 - ARITH_PR1, // Order 1 - ARITH_PR64, // O0 + RLE - ARITH_PR9, // O1 + X4 - ARITH_PR128, // O0 + Pack - ARITH_PR129, // O1 + Pack - ARITH_PR192, // O0 + RLE + pack - ARITH_PR193, // O1 + RLE + pack - - // NB: must end on no more than 31 unless we change to a - // 64-bit method type. -}; - -/* Now in htslib/cram.h -enum cram_content_type { - CT_ERROR = -1, - FILE_HEADER = 0, - COMPRESSION_HEADER = 1, - MAPPED_SLICE = 2, - UNMAPPED_SLICE = 3, // CRAM V1.0 only - EXTERNAL = 4, - CORE = 5, -}; -*/ - -/* Maximum simultaneous codecs allowed, 1 per bit */ -#define CRAM_MAX_METHOD 32 - -/* Compression metrics */ -struct cram_metrics { - // number of trials and time to next trial - int trial; - int next_trial; - int consistency; - - // aggregate sizes during trials - int sz[CRAM_MAX_METHOD]; - int input_avg_sz, input_avg_delta; - - // resultant method from trials - int method, revised_method; - int strat; - - // Revisions of method, to allow culling of continually failing ones. - int cnt[CRAM_MAX_METHOD]; - - double extra[CRAM_MAX_METHOD]; - - // Not amenable to rANS bit-packing techniques; cardinality > 16 - int unpackable; -}; - -// Hash aux key (XX:i) to cram_metrics -KHASH_MAP_INIT_INT(m_metrics, cram_metrics*) - - -/* Block */ -struct cram_block { - enum cram_block_method_int method, orig_method; - enum cram_content_type content_type; - int32_t content_id; - int32_t comp_size; - int32_t uncomp_size; - uint32_t crc32; - int32_t idx; /* offset into data */ - unsigned char *data; - - // For bit I/O - size_t alloc; - size_t byte; - int bit; - - // To aid compression - cram_metrics *m; // used to track aux block compression only - - int crc32_checked; - uint32_t crc_part; -}; - -struct cram_codec; /* defined in cram_codecs.h */ -struct cram_map; - -#define CRAM_MAP_HASH 32 -#define CRAM_MAP(a,b) (((a)*3+(b))&(CRAM_MAP_HASH-1)) - -/* Compression header block */ -struct cram_block_compression_hdr { - int32_t ref_seq_id; - int64_t ref_seq_start; - int64_t ref_seq_span; - int32_t num_records; - int32_t num_landmarks; - int32_t *landmark; - - /* Flags from preservation map */ - int read_names_included; - int AP_delta; - // indexed by ref-base and subst. code - char substitution_matrix[5][4]; - int no_ref; - int qs_seq_orient; // 1 => same as seq. 0 => original orientation - - // TD Dictionary as a concatenated block - cram_block *TD_blk; // Tag Dictionary - int nTL; // number of TL entries in TD - unsigned char **TL; // array of size nTL, pointer into TD_blk. - khash_t(m_s2i) *TD_hash; // Keyed on TD strings, map to TL[] indices - string_alloc_t *TD_keys; // Pooled keys for TD hash. - - khash_t(map) *preservation_map; - struct cram_map *rec_encoding_map[CRAM_MAP_HASH]; - struct cram_map *tag_encoding_map[CRAM_MAP_HASH]; - - struct cram_codec *codecs[DS_END]; - - char *uncomp; // A single block of uncompressed data - size_t uncomp_size, uncomp_alloc; - - // Total codec count, used for index to block_by_id for transforms - int ncodecs; -}; - -typedef struct cram_map { - int key; /* 0xe0 + 3 bytes */ - enum cram_encoding encoding; - int offset; /* Offset into a single block of memory */ - int size; /* Size */ - struct cram_codec *codec; - struct cram_map *next; // for noddy internal hash -} cram_map; - -typedef struct cram_tag_map { - struct cram_codec *codec; - cram_block *blk; - cram_block *blk2; - cram_metrics *m; -} cram_tag_map; - -// Hash aux key (XX:i) to cram_tag_map -KHASH_MAP_INIT_INT(m_tagmap, cram_tag_map*) - -/* Mapped or unmapped slice header block */ -struct cram_block_slice_hdr { - enum cram_content_type content_type; - int32_t ref_seq_id; /* if content_type == MAPPED_SLICE */ - int64_t ref_seq_start; /* if content_type == MAPPED_SLICE */ - int64_t ref_seq_span; /* if content_type == MAPPED_SLICE */ - int32_t num_records; - int64_t record_counter; - int32_t num_blocks; - int32_t num_content_ids; - int32_t *block_content_ids; - int32_t ref_base_id; /* if content_type == MAPPED_SLICE */ - unsigned char md5[16]; -}; - -struct ref_entry; - -/* - * Container. - * - * Conceptually a container is split into slices, and slices into blocks. - * However on disk it's just a list of blocks and we need to query the - * block types to identify the start/end points of the slices. - * - * OR... are landmarks the start/end points of slices? - */ -struct cram_container { - int32_t length; - int32_t ref_seq_id; - int64_t ref_seq_start; - int64_t ref_seq_span; - int64_t record_counter; - int64_t num_bases; - int32_t num_records; - int32_t num_blocks; - int32_t num_landmarks; - int32_t *landmark; - - /* Size of container header above */ - size_t offset; - - /* Compression header is always the first block? */ - cram_block_compression_hdr *comp_hdr; - cram_block *comp_hdr_block; - - /* For construction purposes */ - int max_slice, curr_slice; // maximum number of slices - int curr_slice_mt; // Curr_slice when reading ahead (via threads) - int max_rec, curr_rec; // current and max recs per slice - int max_c_rec, curr_c_rec; // current and max recs per container - int slice_rec; // rec no. for start of this slice - int curr_ref; // current ref ID. -2 for no previous - int64_t last_pos; // last record position - struct cram_slice **slices, *slice; - int pos_sorted; // boolean, 1=>position sorted data - int64_t max_apos; // maximum position, used if pos_sorted==0 - int last_slice; // number of reads in last slice (0 for 1st) - int multi_seq; // true if packing multi seqs per cont/slice - int unsorted; // true is AP_delta is 0. - int qs_seq_orient; // 1 => same as seq. 0 => original orientation - - /* Copied from fd before encoding, to allow multi-threading */ - int ref_start, first_base, last_base, ref_id, ref_end; - char *ref; - int embed_ref; // 1 if embedding ref, 2 if embedding cons - int no_ref; // true if referenceless - //struct ref_entry *ref; - - /* For multi-threading */ - bam_seq_t **bams; - - /* Statistics for encoding */ - cram_stats *stats[DS_END]; - - khash_t(m_tagmap) *tags_used; // set of tag types in use, for tag encoding map - int *refs_used; // array of frequency of ref seq IDs - - uint32_t crc32; // CRC32 - - uint64_t s_num_bases; // number of bases in this slice - uint64_t s_aux_bytes; // number of bytes of aux in BAM - - uint32_t n_mapped; // Number of mapped reads - int ref_free; // whether 'ref' is owned by us and must be freed. -}; - -/* - * A single cram record - */ -typedef struct cram_record { - struct cram_slice *s; // Filled out by cram_decode only - - int32_t ref_id; // fixed for all recs in slice? - int32_t flags; // BF - int32_t cram_flags; // CF - int32_t len; // RL - int64_t apos; // AP - int32_t rg; // RG - int32_t name; // RN; idx to s->names_blk - int32_t name_len; - int32_t mate_line; // index to another cram_record - int32_t mate_ref_id; - int64_t mate_pos; // NP - int64_t tlen; // TS - int64_t explicit_tlen;// TS, but PNEXT/RNEXT still need auto-computing - - // Auxiliary data - int32_t ntags; // TC - uint32_t aux; // idx to s->aux_blk - uint32_t aux_size; // total size of packed ntags in aux_blk -#ifndef TN_external - int32_t TN_idx; // TN; idx to s->TN; -#else - int32_t tn; // idx to s->tn_blk -#endif - int TL; - - uint32_t seq; // idx to s->seqs_blk - uint32_t qual; // idx to s->qual_blk - uint32_t cigar; // idx to s->cigar - int32_t ncigar; - int64_t aend; // alignment end - int32_t mqual; // MQ - - uint32_t feature; // idx to s->feature - uint32_t nfeature; // number of features - int32_t mate_flags; // MF -} cram_record; - -// Accessor macros as an analogue of the bam ones -#define cram_qname(c) (&(c)->s->name_blk->data[(c)->name]) -#define cram_seq(c) (&(c)->s->seqs_blk->data[(c)->seq]) -#define cram_qual(c) (&(c)->s->qual_blk->data[(c)->qual]) -#define cram_aux(c) (&(c)->s->aux_blk->data[(c)->aux]) -#define cram_seqi(c,i) (cram_seq((c))[(i)]) -#define cram_name_len(c) ((c)->name_len) -#define cram_strand(c) (((c)->flags & BAM_FREVERSE) != 0) -#define cram_mstrand(c) (((c)->flags & BAM_FMREVERSE) != 0) -#define cram_cigar(c) (&((cr)->s->cigar)[(c)->cigar]) - -/* - * A feature is a base difference, used for the sequence reference encoding. - * (We generate these internally when writing CRAM.) - */ -typedef union cram_feature { - struct { - int pos; - int code; - int base; // substitution code - } X; - struct { - int pos; - int code; - int base; // actual base & qual - int qual; - } B; - struct { - int pos; - int code; - int seq_idx; // index to s->seqs_blk - int len; - } b; - struct { - int pos; - int code; - int qual; - } Q; - struct { - int pos; - int code; - int len; - int seq_idx; // soft-clip multiple bases - } S; - struct { - int pos; - int code; - int len; - int seq_idx; // insertion multiple bases - } I; - struct { - int pos; - int code; - int base; // insertion single base - } i; - struct { - int pos; - int code; - int len; - } D; - struct { - int pos; - int code; - int len; - } N; - struct { - int pos; - int code; - int len; - } P; - struct { - int pos; - int code; - int len; - } H; -} cram_feature; - -/* - * A slice is really just a set of blocks, but it - * is the logical unit for decoding a number of - * sequences. - */ -struct cram_slice { - cram_block_slice_hdr *hdr; - cram_block *hdr_block; - cram_block **block; - cram_block **block_by_id; - - /* State used during encoding/decoding */ - int64_t last_apos, max_apos; - - /* Array of decoded cram records */ - cram_record *crecs; - - /* An dynamically growing buffers for data pointed - * to by crecs[] array. - */ - uint32_t *cigar; - uint32_t cigar_alloc; - uint32_t ncigar; - - cram_feature *features; - uint32_t nfeatures; - uint32_t afeatures; // allocated size of features - -#ifndef TN_external - // TN field (Tag Name) - uint32_t *TN; - int nTN, aTN; // used and allocated size for TN[] -#else - cram_block *tn_blk; - int tn_id; -#endif - - // For variable sized elements which are always external blocks. - cram_block *name_blk; - cram_block *seqs_blk; - cram_block *qual_blk; - cram_block *base_blk; - cram_block *soft_blk; - cram_block *aux_blk; // BAM aux block, created while decoding CRAM - - string_alloc_t *pair_keys; // Pooled keys for pair hash. - khash_t(m_s2i) *pair[2]; // for identifying read-pairs in this slice. - - char *ref; // slice of current reference - int ref_start; // start position of current reference; - int ref_end; // end position of current reference; - int ref_id; - - // For going from BAM to CRAM; an array of auxiliary blocks per type - int naux_block; - cram_block **aux_block; - - unsigned int data_series; // See cram_fields enum - int decode_md; - - int max_rec, curr_rec; // current and max recs per slice - int slice_num; // To be copied into c->curr_slice in decode -}; - -/*----------------------------------------------------------------------------- - * Consider moving reference handling to cram_refs.[ch] - */ -// from fa.fai / samtools faidx files -typedef struct ref_entry { - char *name; - char *fn; - int64_t length; - int64_t offset; - int bases_per_line; - int line_length; - int64_t count; // for shared references so we know to dealloc seq - char *seq; - mFILE *mf; - int is_md5; // Reference comes from a raw seq found by MD5 - int validated_md5; -} ref_entry; - -KHASH_MAP_INIT_STR(refs, ref_entry*) - -// References structure. -struct refs_t { - string_alloc_t *pool; // String pool for holding filenames and SN vals - - khash_t(refs) *h_meta; // ref_entry*, index by name - ref_entry **ref_id; // ref_entry*, index by ID - int nref; // number of ref_entry - - char *fn; // current file opened - BGZF *fp; // and the hFILE* to go with it. - - int count; // how many cram_fd sharing this refs struct - - pthread_mutex_t lock; // Mutex for multi-threaded updating - ref_entry *last; // Last queried sequence - int last_id; // Used in cram_ref_decr_locked to delay free -}; - -/*----------------------------------------------------------------------------- - * CRAM index - * - * Detect format by number of entries per line. - * 5 => 1.0 (refid, start, nseq, C offset, slice) - * 6 => 1.1 (refid, start, span, C offset, S offset, S size) - * - * Indices are stored in a nested containment list, which is trivial to set - * up as the indices are on sorted data so we're appending to the nclist - * in sorted order. Basically if a slice entirely fits within a previous - * slice then we append to that slices list. This is done recursively. - * - * Lists are sorted on two dimensions: ref id + slice coords. - */ -typedef struct cram_index { - int nslice, nalloc; // total number of slices - struct cram_index *e; // array of size nslice - - int refid; // 1.0 1.1 - int start; // 1.0 1.1 - int end; // 1.1 - int nseq; // 1.0 - undocumented - int slice; // 1.0 landmark index, 1.1 landmark value - int len; // 1.1 - size of slice in bytes - int64_t offset; // 1.0 1.1 - - // Linked list of cram_index entries. Used to convert recursive - // NCList back to a linear list. - struct cram_index *e_next; -} cram_index; - -typedef struct { - int refid; - int64_t start; - int64_t end; -} cram_range; - -/*----------------------------------------------------------------------------- - */ -/* CRAM File handle */ - -typedef struct spare_bams { - bam_seq_t **bams; - struct spare_bams *next; -} spare_bams; - -struct cram_fd; -typedef struct varint_vec { - // Returns number of bytes decoded from fd, 0 on error - int (*varint_decode32_crc)(struct cram_fd *fd, int32_t *val_p, uint32_t *crc); - int (*varint_decode32s_crc)(struct cram_fd *fd, int32_t *val_p, uint32_t *crc); - int (*varint_decode64_crc)(struct cram_fd *fd, int64_t *val_p, uint32_t *crc); - - // Returns the value and increments *cp. Sets err to 1 iff an error occurs. - // NOTE: Does not set err to 0 on success. - int64_t (*varint_get32) (char **cp, const char *endp, int *err); - int64_t (*varint_get32s)(char **cp, const char *endp, int *err); - int64_t (*varint_get64) (char **cp, const char *endp, int *err); - int64_t (*varint_get64s)(char **cp, const char *endp, int *err); - - // Returns the number of bytes written, <= 0 on error. - int (*varint_put32) (char *cp, char *endp, int32_t val_p); - int (*varint_put32s)(char *cp, char *endp, int32_t val_p); - int (*varint_put64) (char *cp, char *endp, int64_t val_p); - int (*varint_put64s)(char *cp, char *endp, int64_t val_p); - - // Returns the number of bytes written, <= 0 on error. - int (*varint_put32_blk) (cram_block *blk, int32_t val_p); - int (*varint_put32s_blk)(cram_block *blk, int32_t val_p); - int (*varint_put64_blk) (cram_block *blk, int64_t val_p); - int (*varint_put64s_blk)(cram_block *blk, int64_t val_p); - - // Returns number of bytes needed to encode 'val' - int (*varint_size)(int64_t val); -} varint_vec; - -struct cram_fd { - struct hFILE *fp; - int mode; // 'r' or 'w' - int version; - cram_file_def *file_def; - sam_hdr_t *header; - - char *prefix; - int64_t record_counter; - int err; - - // Most recent compression header decoded - //cram_block_compression_hdr *comp_hdr; - //cram_block_slice_hdr *slice_hdr; - - // Current container being processed - cram_container *ctr; - - // Current container used for decoder threads - cram_container *ctr_mt; - - // positions for encoding or decoding - int first_base, last_base; // copied to container - - // cached reference portion - refs_t *refs; // ref meta-data structure - char *ref, *ref_free; // current portion held in memory - int ref_id; // copied to container - int ref_start; // copied to container - int ref_end; // copied to container - char *ref_fn; // reference fasta filename - - // compression level and metrics - int level; - cram_metrics *m[DS_END]; - khash_t(m_metrics) *tags_used; // cram_metrics[], per tag types in use. - - // options - int decode_md; // Whether to export MD and NM tags - int seqs_per_slice; - int bases_per_slice; - int slices_per_container; - int embed_ref; // copied to container - int no_ref; // copied to container - int ignore_md5; - int use_bz2; - int use_rans; - int use_lzma; - int use_fqz; - int use_tok; - int use_arith; - int shared_ref; - unsigned int required_fields; - int store_md; - int store_nm; - cram_range range; - - // lookup tables, stored here so we can be trivially multi-threaded - unsigned int bam_flag_swap[0x1000]; // cram -> bam flags - unsigned int cram_flag_swap[0x1000];// bam -> cram flags - unsigned char L1[256]; // ACGT{*} ->0123{4} - unsigned char L2[256]; // ACGTN{*}->01234{5} - char cram_sub_matrix[32][32]; // base substitution codes - - int index_sz; - cram_index *index; // array, sizeof index_sz - off_t first_container; - off_t curr_position; - int eof; - int last_slice; // number of recs encoded in last slice - int last_RI_count; // number of references encoded in last container - int multi_seq; // -1 is auto, 0 is one ref per container, 1 is multi... - int multi_seq_user; // Original user setting (CRAM_OPT_MULTI_SEQ_PER_SLICE) - int unsorted; - int last_mapped; // number of mapped reads in last container - int empty_container; // Marker for EOF block - - // thread pool - int own_pool; - hts_tpool *pool; - hts_tpool_process *rqueue; - pthread_mutex_t metrics_lock; - pthread_mutex_t ref_lock; - pthread_mutex_t range_lock; - spare_bams *bl; - pthread_mutex_t bam_list_lock; - void *job_pending; - int ooc; // out of containers. - - int lossy_read_names; // boolean - int tlen_approx; // max TLEN calculation offset. - int tlen_zero; // If true, permit tlen 0 (=> tlen calculated) - - BGZF *idxfp; // File pointer for on-the-fly index creation - - // variable integer decoding callbacks. - // This changed in CRAM4.0 to a data-size agnostic encoding. - varint_vec vv; - - // Force AP delta even on non positional sorted data. - // This can be beneficial for pairs where pairs are nearby each other. - // We suffer with delta to unrelated things (previous pair), but gain - // in delta between them. (Ideal would be a per read setting.) - int ap_delta; -}; - -// Translation of required fields to cram data series -enum cram_fields { - CRAM_BF = 0x00000001, - CRAM_AP = 0x00000002, - CRAM_FP = 0x00000004, - CRAM_RL = 0x00000008, - CRAM_DL = 0x00000010, - CRAM_NF = 0x00000020, - CRAM_BA = 0x00000040, - CRAM_QS = 0x00000080, - CRAM_FC = 0x00000100, - CRAM_FN = 0x00000200, - CRAM_BS = 0x00000400, - CRAM_IN = 0x00000800, - CRAM_RG = 0x00001000, - CRAM_MQ = 0x00002000, - CRAM_TL = 0x00004000, - CRAM_RN = 0x00008000, - CRAM_NS = 0x00010000, - CRAM_NP = 0x00020000, - CRAM_TS = 0x00040000, - CRAM_MF = 0x00080000, - CRAM_CF = 0x00100000, - CRAM_RI = 0x00200000, - CRAM_RS = 0x00400000, - CRAM_PD = 0x00800000, - CRAM_HC = 0x01000000, - CRAM_SC = 0x02000000, - CRAM_BB = 0x04000000, - CRAM_BB_len = 0x08000000, - CRAM_QQ = 0x10000000, - CRAM_QQ_len = 0x20000000, - CRAM_aux= 0x40000000, - CRAM_ALL= 0x7fffffff, -}; - -// A CIGAR opcode, but not necessarily the implications of it. Eg FC/FP may -// encode a base difference, but we don't need to know what it is for CIGAR. -// If we have a soft-clip or insertion, we do need SC/IN though to know how -// long that array is. -#define CRAM_CIGAR (CRAM_FN | CRAM_FP | CRAM_FC | CRAM_DL | CRAM_IN | \ - CRAM_SC | CRAM_HC | CRAM_PD | CRAM_RS | CRAM_RL | CRAM_BF) - -#define CRAM_SEQ (CRAM_CIGAR | CRAM_BA | CRAM_BS | \ - CRAM_RL | CRAM_AP | CRAM_BB) - -#define CRAM_QUAL (CRAM_CIGAR | CRAM_RL | CRAM_AP | CRAM_QS | CRAM_QQ) - -/* BF bitfields */ -/* Corrected in 1.1. Use bam_flag_swap[bf] and BAM_* macros for 1.0 & 1.1 */ -#define CRAM_FPAIRED 256 -#define CRAM_FPROPER_PAIR 128 -#define CRAM_FUNMAP 64 -#define CRAM_FREVERSE 32 -#define CRAM_FREAD1 16 -#define CRAM_FREAD2 8 -#define CRAM_FSECONDARY 4 -#define CRAM_FQCFAIL 2 -#define CRAM_FDUP 1 - -#define DS_aux_S "\001" -#define DS_aux_OQ_S "\002" -#define DS_aux_BQ_S "\003" -#define DS_aux_BD_S "\004" -#define DS_aux_BI_S "\005" -#define DS_aux_FZ_S "\006" -#define DS_aux_oq_S "\007" -#define DS_aux_os_S "\010" -#define DS_aux_oz_S "\011" - -#define CRAM_M_REVERSE 1 -#define CRAM_M_UNMAP 2 - - -/* CF bitfields */ -#define CRAM_FLAG_PRESERVE_QUAL_SCORES (1<<0) -#define CRAM_FLAG_DETACHED (1<<1) -#define CRAM_FLAG_MATE_DOWNSTREAM (1<<2) -#define CRAM_FLAG_NO_SEQ (1<<3) -#define CRAM_FLAG_EXPLICIT_TLEN (1<<4) -#define CRAM_FLAG_MASK ((1<<5)-1) - -/* Internal only */ -#define CRAM_FLAG_STATS_ADDED (1<<30) -#define CRAM_FLAG_DISCARD_NAME (1U<<31) - -#ifdef __cplusplus -} -#endif - -#endif /* HTSLIB_CRAM_STRUCTS_H */ diff --git a/src/htslib-1.18/faidx.c b/src/htslib-1.18/faidx.c deleted file mode 100644 index 5dd4bf1..0000000 --- a/src/htslib-1.18/faidx.c +++ /dev/null @@ -1,1012 +0,0 @@ -/* faidx.c -- FASTA and FASTQ random access. - - Copyright (C) 2008, 2009, 2013-2020, 2022 Genome Research Ltd. - Portions copyright (C) 2011 Broad Institute. - - Author: Heng Li - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "htslib/bgzf.h" -#include "htslib/faidx.h" -#include "htslib/hfile.h" -#include "htslib/khash.h" -#include "htslib/kstring.h" -#include "hts_internal.h" - -typedef struct { - int id; // faidx_t->name[id] is for this struct. - uint32_t line_len, line_blen; - uint64_t len; - uint64_t seq_offset; - uint64_t qual_offset; -} faidx1_t; -KHASH_MAP_INIT_STR(s, faidx1_t) - -struct faidx_t { - BGZF *bgzf; - int n, m; - char **name; - khash_t(s) *hash; - enum fai_format_options format; -}; - -static int fai_name2id(void *v, const char *ref) -{ - faidx_t *fai = (faidx_t *)v; - khint_t k = kh_get(s, fai->hash, ref); - return k == kh_end(fai->hash) ? -1 : kh_val(fai->hash, k).id; -} - -static inline int fai_insert_index(faidx_t *idx, const char *name, uint64_t len, uint32_t line_len, uint32_t line_blen, uint64_t seq_offset, uint64_t qual_offset) -{ - if (!name) { - hts_log_error("Malformed line"); - return -1; - } - - char *name_key = strdup(name); - int absent; - khint_t k = kh_put(s, idx->hash, name_key, &absent); - faidx1_t *v = &kh_value(idx->hash, k); - - if (! absent) { - hts_log_warning("Ignoring duplicate sequence \"%s\" at byte offset %" PRIu64, name, seq_offset); - free(name_key); - return 0; - } - - if (idx->n == idx->m) { - char **tmp; - idx->m = idx->m? idx->m<<1 : 16; - if (!(tmp = (char**)realloc(idx->name, sizeof(char*) * idx->m))) { - hts_log_error("Out of memory"); - return -1; - } - idx->name = tmp; - } - v->id = idx->n; - idx->name[idx->n++] = name_key; - v->len = len; - v->line_len = line_len; - v->line_blen = line_blen; - v->seq_offset = seq_offset; - v->qual_offset = qual_offset; - - return 0; -} - - -static faidx_t *fai_build_core(BGZF *bgzf) { - kstring_t name = { 0, 0, NULL }; - int c, read_done, line_num; - faidx_t *idx; - uint64_t seq_offset, qual_offset; - uint64_t seq_len, qual_len; - uint64_t char_len, cl, line_len, ll; - enum read_state {OUT_READ, IN_NAME, IN_SEQ, SEQ_END, IN_QUAL} state; - - idx = (faidx_t*)calloc(1, sizeof(faidx_t)); - idx->hash = kh_init(s); - idx->format = FAI_NONE; - - state = OUT_READ, read_done = 0, line_num = 1; - seq_offset = qual_offset = seq_len = qual_len = char_len = cl = line_len = ll = 0; - - while ((c = bgzf_getc(bgzf)) >= 0) { - switch (state) { - case OUT_READ: - switch (c) { - case '>': - if (idx->format == FAI_FASTQ) { - hts_log_error("Found '>' in a FASTQ file, error at line %d", line_num); - goto fail; - } - - idx->format = FAI_FASTA; - state = IN_NAME; - break; - - case '@': - if (idx->format == FAI_FASTA) { - hts_log_error("Found '@' in a FASTA file, error at line %d", line_num); - goto fail; - } - - idx->format = FAI_FASTQ; - state = IN_NAME; - break; - - case '\r': - // Blank line with cr-lf ending? - if ((c = bgzf_getc(bgzf)) == '\n') { - line_num++; - } else { - hts_log_error("Format error, carriage return not followed by new line at line %d", line_num); - goto fail; - } - break; - - case '\n': - // just move onto the next line - line_num++; - break; - - default: { - char s[4] = { '"', c, '"', '\0' }; - hts_log_error("Format error, unexpected %s at line %d", isprint(c) ? s : "character", line_num); - goto fail; - } - } - break; - - case IN_NAME: - if (read_done) { - if (fai_insert_index(idx, name.s, seq_len, line_len, char_len, seq_offset, qual_offset) != 0) - goto fail; - - read_done = 0; - } - - name.l = 0; - - do { - if (!isspace(c)) { - kputc(c, &name); - } else if (name.l > 0 || c == '\n') { - break; - } - } while ((c = bgzf_getc(bgzf)) >= 0); - - kputsn("", 0, &name); - - if (c < 0) { - hts_log_error("The last entry '%s' has no sequence", name.s); - goto fail; - } - - // read the rest of the line if necessary - if (c != '\n') while ((c = bgzf_getc(bgzf)) >= 0 && c != '\n'); - - state = IN_SEQ; seq_len = qual_len = char_len = line_len = 0; - seq_offset = bgzf_utell(bgzf); - line_num++; - break; - - case IN_SEQ: - if (idx->format == FAI_FASTA) { - if (c == '\n') { - state = OUT_READ; - line_num++; - continue; - } else if (c == '>') { - state = IN_NAME; - continue; - } - } else if (idx->format == FAI_FASTQ) { - if (c == '+') { - state = IN_QUAL; - if (c != '\n') while ((c = bgzf_getc(bgzf)) >= 0 && c != '\n'); - qual_offset = bgzf_utell(bgzf); - line_num++; - continue; - } else if (c == '\n') { - hts_log_error("Inlined empty line is not allowed in sequence '%s' at line %d", name.s, line_num); - goto fail; - } - } - - ll = cl = 0; - - if (idx->format == FAI_FASTA) read_done = 1; - - do { - ll++; - if (isgraph(c)) cl++; - } while ((c = bgzf_getc(bgzf)) >= 0 && c != '\n'); - - ll++; seq_len += cl; - - if (line_len == 0) { - line_len = ll; - char_len = cl; - } else if (line_len > ll) { - - if (idx->format == FAI_FASTA) - state = OUT_READ; - else - state = SEQ_END; - - } else if (line_len < ll) { - hts_log_error("Different line length in sequence '%s'", name.s); - goto fail; - } - - line_num++; - break; - - case SEQ_END: - if (c == '+') { - state = IN_QUAL; - while ((c = bgzf_getc(bgzf)) >= 0 && c != '\n'); - qual_offset = bgzf_utell(bgzf); - line_num++; - } else { - hts_log_error("Format error, expecting '+', got '%c' at line %d", c, line_num); - goto fail; - } - break; - - case IN_QUAL: - if (c == '\n') { - if (!read_done) { - hts_log_error("Inlined empty line is not allowed in quality of sequence '%s'", name.s); - goto fail; - } - - state = OUT_READ; - line_num++; - continue; - } else if (c == '@' && read_done) { - state = IN_NAME; - continue; - } - - ll = cl = 0; - - do { - ll++; - if (isgraph(c)) cl++; - } while ((c = bgzf_getc(bgzf)) >= 0 && c != '\n'); - - ll++; qual_len += cl; - - if (line_len < ll) { - hts_log_error("Quality line length too long in '%s' at line %d", name.s, line_num); - goto fail; - } else if (qual_len == seq_len) { - read_done = 1; - } else if (qual_len > seq_len) { - hts_log_error("Quality length longer than sequence in '%s' at line %d", name.s, line_num); - goto fail; - } else if (line_len > ll) { - hts_log_error("Quality line length too short in '%s' at line %d", name.s, line_num); - goto fail; - } - - line_num++; - break; - } - } - - if (read_done) { - if (fai_insert_index(idx, name.s, seq_len, line_len, char_len, seq_offset, qual_offset) != 0) - goto fail; - } else { - goto fail; - } - - free(name.s); - return idx; - -fail: - free(name.s); - fai_destroy(idx); - return NULL; -} - - -static int fai_save(const faidx_t *fai, hFILE *fp) { - khint_t k; - int i; - char buf[96]; // Must be big enough for format below. - - for (i = 0; i < fai->n; ++i) { - faidx1_t x; - k = kh_get(s, fai->hash, fai->name[i]); - assert(k < kh_end(fai->hash)); - x = kh_value(fai->hash, k); - - if (fai->format == FAI_FASTA) { - snprintf(buf, sizeof(buf), - "\t%"PRIu64"\t%"PRIu64"\t%"PRIu32"\t%"PRIu32"\n", - x.len, x.seq_offset, x.line_blen, x.line_len); - } else { - snprintf(buf, sizeof(buf), - "\t%"PRIu64"\t%"PRIu64"\t%"PRIu32"\t%"PRIu32"\t%"PRIu64"\n", - x.len, x.seq_offset, x.line_blen, x.line_len, x.qual_offset); - } - - if (hputs(fai->name[i], fp) != 0) return -1; - if (hputs(buf, fp) != 0) return -1; - } - return 0; -} - - -static faidx_t *fai_read(hFILE *fp, const char *fname, int format) -{ - faidx_t *fai; - char *buf = NULL, *p; - ssize_t l, lnum = 1; - - fai = (faidx_t*)calloc(1, sizeof(faidx_t)); - if (!fai) return NULL; - - fai->hash = kh_init(s); - if (!fai->hash) goto fail; - - buf = (char*)calloc(0x10000, 1); - if (!buf) goto fail; - - while ((l = hgetln(buf, 0x10000, fp)) > 0) { - uint32_t line_len, line_blen, n; - uint64_t len; - uint64_t seq_offset; - uint64_t qual_offset = 0; - - for (p = buf; *p && !isspace_c(*p); ++p); - - if (p - buf < l) { - *p = 0; ++p; - } - - if (format == FAI_FASTA) { - n = sscanf(p, "%"SCNu64"%"SCNu64"%"SCNu32"%"SCNu32, &len, &seq_offset, &line_blen, &line_len); - - if (n != 4) { - hts_log_error("Could not understand FASTA index %s line %zd", fname, lnum); - goto fail; - } - } else { - n = sscanf(p, "%"SCNu64"%"SCNu64"%"SCNu32"%"SCNu32"%"SCNu64, &len, &seq_offset, &line_blen, &line_len, &qual_offset); - - if (n != 5) { - if (n == 4) { - hts_log_error("Possibly this is a FASTA index, try using faidx. Problem in %s line %zd", fname, lnum); - } else { - hts_log_error("Could not understand FASTQ index %s line %zd", fname, lnum); - } - - goto fail; - } - } - - if (fai_insert_index(fai, buf, len, line_len, line_blen, seq_offset, qual_offset) != 0) { - goto fail; - } - - if (buf[l - 1] == '\n') ++lnum; - } - - if (l < 0) { - hts_log_error("Error while reading %s: %s", fname, strerror(errno)); - goto fail; - } - free(buf); - return fai; - - fail: - free(buf); - fai_destroy(fai); - return NULL; -} - -void fai_destroy(faidx_t *fai) -{ - int i; - if (!fai) return; - for (i = 0; i < fai->n; ++i) free(fai->name[i]); - free(fai->name); - kh_destroy(s, fai->hash); - if (fai->bgzf) bgzf_close(fai->bgzf); - free(fai); -} - - -static int fai_build3_core(const char *fn, const char *fnfai, const char *fngzi) -{ - kstring_t fai_kstr = { 0, 0, NULL }; - kstring_t gzi_kstr = { 0, 0, NULL }; - BGZF *bgzf = NULL; - hFILE *fp = NULL; - faidx_t *fai = NULL; - int save_errno, res; - char *file_type; - - bgzf = bgzf_open(fn, "r"); - - if ( !bgzf ) { - hts_log_error("Failed to open the file %s", fn); - goto fail; - } - - if ( bgzf->is_compressed ) { - if (bgzf_index_build_init(bgzf) != 0) { - hts_log_error("Failed to allocate bgzf index"); - goto fail; - } - } - - fai = fai_build_core(bgzf); - - if ( !fai ) { - if (bgzf->is_compressed && bgzf->is_gzip) { - hts_log_error("Cannot index files compressed with gzip, please use bgzip"); - } - goto fail; - } - - if (fai->format == FAI_FASTA) { - file_type = "FASTA"; - } else { - file_type = "FASTQ"; - } - - if (!fnfai) { - if (ksprintf(&fai_kstr, "%s.fai", fn) < 0) goto fail; - fnfai = fai_kstr.s; - } - - if (!fngzi) { - if (ksprintf(&gzi_kstr, "%s.gzi", fn) < 0) goto fail; - fngzi = gzi_kstr.s; - } - - if ( bgzf->is_compressed ) { - if (bgzf_index_dump(bgzf, fngzi, NULL) < 0) { - hts_log_error("Failed to make bgzf index %s", fngzi); - goto fail; - } - } - - res = bgzf_close(bgzf); - bgzf = NULL; - - if (res < 0) { - hts_log_error("Error on closing %s : %s", fn, strerror(errno)); - goto fail; - } - - fp = hopen(fnfai, "wb"); - - if ( !fp ) { - hts_log_error("Failed to open %s index %s : %s", file_type, fnfai, strerror(errno)); - goto fail; - } - - if (fai_save(fai, fp) != 0) { - hts_log_error("Failed to write %s index %s : %s", file_type, fnfai, strerror(errno)); - goto fail; - } - - if (hclose(fp) != 0) { - hts_log_error("Failed on closing %s index %s : %s", file_type, fnfai, strerror(errno)); - goto fail; - } - - free(fai_kstr.s); - free(gzi_kstr.s); - fai_destroy(fai); - return 0; - - fail: - save_errno = errno; - free(fai_kstr.s); - free(gzi_kstr.s); - bgzf_close(bgzf); - fai_destroy(fai); - errno = save_errno; - return -1; -} - - -int fai_build3(const char *fn, const char *fnfai, const char *fngzi) { - return fai_build3_core(fn, fnfai, fngzi); -} - - -int fai_build(const char *fn) { - return fai_build3(fn, NULL, NULL); -} - - -static faidx_t *fai_load3_core(const char *fn, const char *fnfai, const char *fngzi, - int flags, int format) -{ - kstring_t fai_kstr = { 0, 0, NULL }; - kstring_t gzi_kstr = { 0, 0, NULL }; - hFILE *fp = NULL; - faidx_t *fai = NULL; - int res, gzi_index_needed = 0; - char *file_type; - - if (format == FAI_FASTA) { - file_type = "FASTA"; - } else { - file_type = "FASTQ"; - } - - if (fn == NULL) - return NULL; - - if (fnfai == NULL) { - if (ksprintf(&fai_kstr, "%s.fai", fn) < 0) goto fail; - fnfai = fai_kstr.s; - } - if (fngzi == NULL) { - if (ksprintf(&gzi_kstr, "%s.gzi", fn) < 0) goto fail; - fngzi = gzi_kstr.s; - } - - fp = hopen(fnfai, "rb"); - - if (fp) { - // index file present, check if a compressed index is needed - hFILE *gz = NULL; - BGZF *bgzf = bgzf_open(fn, "rb"); - - if (bgzf == 0) { - hts_log_error("Failed to open %s file %s", file_type, fn); - goto fail; - } - - if (bgzf_compression(bgzf) == 2) { // BGZF compression - if ((gz = hopen(fngzi, "rb")) == 0) { - - if (!(flags & FAI_CREATE) || errno != ENOENT) { - hts_log_error("Failed to open %s index %s: %s", file_type, fngzi, strerror(errno)); - bgzf_close(bgzf); - goto fail; - } - - gzi_index_needed = 1; - res = hclose(fp); // closed as going to be re-indexed - - if (res < 0) { - hts_log_error("Failed on closing %s index %s : %s", file_type, fnfai, strerror(errno)); - goto fail; - } - } else { - res = hclose(gz); - - if (res < 0) { - hts_log_error("Failed on closing %s index %s : %s", file_type, fngzi, strerror(errno)); - goto fail; - } - } - } - - bgzf_close(bgzf); - } - - if (fp == 0 || gzi_index_needed) { - if (!(flags & FAI_CREATE) || errno != ENOENT) { - hts_log_error("Failed to open %s index %s: %s", file_type, fnfai, strerror(errno)); - goto fail; - } - - hts_log_info("Build %s index", file_type); - - if (fai_build3_core(fn, fnfai, fngzi) < 0) { - goto fail; - } - - fp = hopen(fnfai, "rb"); - if (fp == 0) { - hts_log_error("Failed to open %s index %s: %s", file_type, fnfai, strerror(errno)); - goto fail; - } - } - - fai = fai_read(fp, fnfai, format); - if (fai == NULL) { - hts_log_error("Failed to read %s index %s", file_type, fnfai); - goto fail; - } - - res = hclose(fp); - fp = NULL; - if (res < 0) { - hts_log_error("Failed on closing %s index %s : %s", file_type, fnfai, strerror(errno)); - goto fail; - } - - fai->bgzf = bgzf_open(fn, "rb"); - if (fai->bgzf == 0) { - hts_log_error("Failed to open %s file %s", file_type, fn); - goto fail; - } - - if ( fai->bgzf->is_compressed==1 ) { - if ( bgzf_index_load(fai->bgzf, fngzi, NULL) < 0 ) { - hts_log_error("Failed to load .gzi index: %s", fngzi); - goto fail; - } - } - free(fai_kstr.s); - free(gzi_kstr.s); - return fai; - - fail: - if (fai) fai_destroy(fai); - if (fp) hclose_abruptly(fp); - free(fai_kstr.s); - free(gzi_kstr.s); - return NULL; -} - - -faidx_t *fai_load3(const char *fn, const char *fnfai, const char *fngzi, - int flags) { - return fai_load3_core(fn, fnfai, fngzi, flags, FAI_FASTA); -} - - -faidx_t *fai_load(const char *fn) -{ - return fai_load3(fn, NULL, NULL, FAI_CREATE); -} - - -faidx_t *fai_load3_format(const char *fn, const char *fnfai, const char *fngzi, - int flags, enum fai_format_options format) { - return fai_load3_core(fn, fnfai, fngzi, flags, format); -} - - -faidx_t *fai_load_format(const char *fn, enum fai_format_options format) { - return fai_load3_format(fn, NULL, NULL, FAI_CREATE, format); -} - - -static char *fai_retrieve(const faidx_t *fai, const faidx1_t *val, - uint64_t offset, hts_pos_t beg, hts_pos_t end, hts_pos_t *len) { - char *s; - size_t l; - int c = 0; - int ret; - - if ((uint64_t) end - (uint64_t) beg >= SIZE_MAX - 2) { - hts_log_error("Range %"PRId64"..%"PRId64" too big", beg, end); - *len = -1; - return NULL; - } - - if (val->line_blen <= 0) { - hts_log_error("Invalid line length in index: %d", val->line_blen); - *len = -1; - return NULL; - } - - ret = bgzf_useek(fai->bgzf, - offset - + beg / val->line_blen * val->line_len - + beg % val->line_blen, SEEK_SET); - - if (ret < 0) { - *len = -1; - hts_log_error("Failed to retrieve block. (Seeking in a compressed, .gzi unindexed, file?)"); - return NULL; - } - - l = 0; - s = (char*)malloc((size_t) end - beg + 2); - if (!s) { - *len = -1; - return NULL; - } - - while ( l < end - beg && (c=bgzf_getc(fai->bgzf))>=0 ) - if (isgraph(c)) s[l++] = c; - if (c < 0) { - hts_log_error("Failed to retrieve block: %s", - c == -1 ? "unexpected end of file" : "error reading file"); - free(s); - *len = -1; - return NULL; - } - - s[l] = '\0'; - *len = l; - return s; -} - -static int fai_get_val(const faidx_t *fai, const char *str, - hts_pos_t *len, faidx1_t *val, hts_pos_t *fbeg, hts_pos_t *fend) { - khiter_t iter; - khash_t(s) *h; - int id; - hts_pos_t beg, end; - - if (!fai_parse_region(fai, str, &id, &beg, &end, 0)) { - hts_log_warning("Reference %s not found in FASTA file, returning empty sequence", str); - *len = -2; - return 1; - } - - h = fai->hash; - iter = kh_get(s, h, faidx_iseq(fai, id)); - if (iter >= kh_end(h)) { - // should have already been caught above - abort(); - } - *val = kh_value(h, iter); - - if (beg >= val->len) beg = val->len; - if (end >= val->len) end = val->len; - if (beg > end) beg = end; - - *fbeg = beg; - *fend = end; - - return 0; -} - -/* - * The internal still has line_blen as uint32_t, but our references - * can be longer, so for future proofing we use hts_pos_t. We also needed - * a signed value so we can return negatives as an error. - */ -hts_pos_t fai_line_length(const faidx_t *fai, const char *str) -{ - faidx1_t val; - int64_t beg, end; - hts_pos_t len; - - if (fai_get_val(fai, str, &len, &val, &beg, &end)) - return -1; - else - return val.line_blen; -} - -char *fai_fetch64(const faidx_t *fai, const char *str, hts_pos_t *len) -{ - faidx1_t val; - int64_t beg, end; - - if (fai_get_val(fai, str, len, &val, &beg, &end)) { - return NULL; - } - - // now retrieve the sequence - return fai_retrieve(fai, &val, val.seq_offset, beg, end, len); -} - -char *fai_fetch(const faidx_t *fai, const char *str, int *len) -{ - hts_pos_t len64; - char *ret = fai_fetch64(fai, str, &len64); - *len = len64 < INT_MAX ? len64 : INT_MAX; // trunc - return ret; -} - -char *fai_fetchqual64(const faidx_t *fai, const char *str, hts_pos_t *len) { - faidx1_t val; - int64_t beg, end; - - if (fai_get_val(fai, str, len, &val, &beg, &end)) { - return NULL; - } - - // now retrieve the sequence - return fai_retrieve(fai, &val, val.qual_offset, beg, end, len); -} - -char *fai_fetchqual(const faidx_t *fai, const char *str, int *len) { - hts_pos_t len64; - char *ret = fai_fetchqual64(fai, str, &len64); - *len = len64 < INT_MAX ? len64 : INT_MAX; // trunc - return ret; -} - -int faidx_fetch_nseq(const faidx_t *fai) -{ - return fai->n; -} - -int faidx_nseq(const faidx_t *fai) -{ - return fai->n; -} - -const char *faidx_iseq(const faidx_t *fai, int i) -{ - return fai->name[i]; -} - -hts_pos_t faidx_seq_len64(const faidx_t *fai, const char *seq) -{ - khint_t k = kh_get(s, fai->hash, seq); - if ( k == kh_end(fai->hash) ) return -1; - return kh_val(fai->hash, k).len; -} - -int faidx_seq_len(const faidx_t *fai, const char *seq) -{ - hts_pos_t len = faidx_seq_len64(fai, seq); - return len < INT_MAX ? len : INT_MAX; -} - -static int faidx_adjust_position(const faidx_t *fai, int end_adjust, - faidx1_t *val_out, const char *c_name, - hts_pos_t *p_beg_i, hts_pos_t *p_end_i, - hts_pos_t *len) { - khiter_t iter; - faidx1_t *val; - - // Adjust position - iter = kh_get(s, fai->hash, c_name); - - if (iter == kh_end(fai->hash)) { - if (len) - *len = -2; - hts_log_error("The sequence \"%s\" was not found", c_name); - return 1; - } - - val = &kh_value(fai->hash, iter); - - if (val_out) - *val_out = *val; - - if(*p_end_i < *p_beg_i) - *p_beg_i = *p_end_i; - - if(*p_beg_i < 0) - *p_beg_i = 0; - else if(val->len <= *p_beg_i) - *p_beg_i = val->len; - - if(*p_end_i < 0) - *p_end_i = 0; - else if(val->len <= *p_end_i) - *p_end_i = val->len - end_adjust; - - return 0; -} - -int fai_adjust_region(const faidx_t *fai, int tid, - hts_pos_t *beg, hts_pos_t *end) -{ - hts_pos_t orig_beg, orig_end; - - if (!fai || !beg || !end || tid < 0 || tid >= fai->n) - return -1; - - orig_beg = *beg; - orig_end = *end; - if (faidx_adjust_position(fai, 0, NULL, fai->name[tid], beg, end, NULL) != 0) { - hts_log_error("Inconsistent faidx internal state - couldn't find \"%s\"", - fai->name[tid]); - return -1; - } - - return ((orig_beg != *beg ? 1 : 0) | - (orig_end != *end && orig_end < HTS_POS_MAX ? 2 : 0)); -} - -char *faidx_fetch_seq64(const faidx_t *fai, const char *c_name, hts_pos_t p_beg_i, hts_pos_t p_end_i, hts_pos_t *len) -{ - faidx1_t val; - - // Adjust position - if (faidx_adjust_position(fai, 1, &val, c_name, &p_beg_i, &p_end_i, len)) { - return NULL; - } - - // Now retrieve the sequence - return fai_retrieve(fai, &val, val.seq_offset, p_beg_i, p_end_i + 1, len); -} - -char *faidx_fetch_seq(const faidx_t *fai, const char *c_name, int p_beg_i, int p_end_i, int *len) -{ - hts_pos_t len64; - char *ret = faidx_fetch_seq64(fai, c_name, p_beg_i, p_end_i, &len64); - *len = len64 < INT_MAX ? len64 : INT_MAX; // trunc - return ret; -} - -char *faidx_fetch_qual64(const faidx_t *fai, const char *c_name, hts_pos_t p_beg_i, hts_pos_t p_end_i, hts_pos_t *len) -{ - faidx1_t val; - - // Adjust position - if (faidx_adjust_position(fai, 1, &val, c_name, &p_beg_i, &p_end_i, len)) { - return NULL; - } - - // Now retrieve the sequence - return fai_retrieve(fai, &val, val.qual_offset, p_beg_i, p_end_i + 1, len); -} - -char *faidx_fetch_qual(const faidx_t *fai, const char *c_name, int p_beg_i, int p_end_i, int *len) -{ - hts_pos_t len64; - char *ret = faidx_fetch_qual64(fai, c_name, p_beg_i, p_end_i, &len64); - *len = len64 < INT_MAX ? len64 : INT_MAX; // trunc - return ret; -} - -int faidx_has_seq(const faidx_t *fai, const char *seq) -{ - khiter_t iter = kh_get(s, fai->hash, seq); - if (iter == kh_end(fai->hash)) return 0; - return 1; -} - -const char *fai_parse_region(const faidx_t *fai, const char *s, - int *tid, hts_pos_t *beg, hts_pos_t *end, - int flags) -{ - return hts_parse_region(s, tid, beg, end, (hts_name2id_f)fai_name2id, (void *)fai, flags); -} - -void fai_set_cache_size(faidx_t *fai, int cache_size) { - bgzf_set_cache_size(fai->bgzf, cache_size); -} - -// Adds a thread pool to the underlying BGZF layer. -int fai_thread_pool(faidx_t *fai, struct hts_tpool *pool, int qsize) { - return bgzf_thread_pool(fai->bgzf, pool, qsize); -} - -char *fai_path(const char *fa) { - char *fai = NULL; - if (!fa) { - hts_log_error("No reference file specified"); - } else { - char *fai_tmp = strstr(fa, HTS_IDX_DELIM); - if (fai_tmp) { - fai_tmp += strlen(HTS_IDX_DELIM); - fai = strdup(fai_tmp); - if (!fai) - hts_log_error("Failed to allocate memory"); - } else { - if (hisremote(fa)) { - fai = hts_idx_locatefn(fa, ".fai"); // get the remote fai file name, if any, but do not download the file - if (!fai) - hts_log_error("Failed to locate index file for remote reference file '%s'", fa); - } else{ - if (hts_idx_check_local(fa, HTS_FMT_FAI, &fai) == 0 && fai) { - if (fai_build3(fa, fai, NULL) == -1) { // create local fai file by indexing local fasta - hts_log_error("Failed to build index file for reference file '%s'", fa); - free(fai); - fai = NULL; - } - } - } - } - } - - return fai; -} diff --git a/src/htslib-1.18/header.c b/src/htslib-1.18/header.c deleted file mode 100644 index 6d3a02e..0000000 --- a/src/htslib-1.18/header.c +++ /dev/null @@ -1,2740 +0,0 @@ -/* -Copyright (c) 2018-2020 Genome Research Ltd. -Authors: James Bonfield , Valeriu Ohan - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include "textutils_internal.h" -#include "header.h" - -// Hash table for removing multiple lines from the header -KHASH_SET_INIT_STR(rm) -// Used for long refs in SAM files -KHASH_DECLARE(s2i, kh_cstr_t, int64_t) - -typedef khash_t(rm) rmhash_t; - -static int sam_hdr_link_pg(sam_hdr_t *bh); - -static int sam_hrecs_vupdate(sam_hrecs_t *hrecs, sam_hrec_type_t *type, va_list ap); -static int sam_hrecs_update(sam_hrecs_t *hrecs, sam_hrec_type_t *type, ...); - - -#define MAX_ERROR_QUOTE 320 // Prevent over-long error messages -static void sam_hrecs_error(const char *msg, const char *line, size_t len, size_t lno) { - int j; - - if (len > MAX_ERROR_QUOTE) - len = MAX_ERROR_QUOTE; - for (j = 0; j < len && line[j] != '\n'; j++) - ; - hts_log_error("%s at line %zd: \"%.*s\"", msg, lno, j, line); -} - -/* ==== Static methods ==== */ - -static int sam_hrecs_init_type_order(sam_hrecs_t *hrecs, char *type_list) { - if (!hrecs) - return -1; - - if (!type_list) { - hrecs->type_count = 5; - hrecs->type_order = calloc(hrecs->type_count, 3); - if (!hrecs->type_order) - return -1; - memcpy(hrecs->type_order[0], "HD", 2); - memcpy(hrecs->type_order[1], "SQ", 2); - memcpy(hrecs->type_order[2], "RG", 2); - memcpy(hrecs->type_order[3], "PG", 2); - memcpy(hrecs->type_order[4], "CO", 2); - } - - return 0; -} - -static int sam_hrecs_add_ref_altnames(sam_hrecs_t *hrecs, int nref, const char *list) { - const char *token; - ks_tokaux_t aux; - - if (!list) - return 0; - - for (token = kstrtok(list, ",", &aux); token; token = kstrtok(NULL, NULL, &aux)) { - if (aux.p == token) - continue; - - char *name = string_ndup(hrecs->str_pool, token, aux.p - token); - if (!name) - return -1; - int r; - khint_t k = kh_put(m_s2i, hrecs->ref_hash, name, &r); - if (r < 0) return -1; - - if (r > 0) - kh_val(hrecs->ref_hash, k) = nref; - else if (kh_val(hrecs->ref_hash, k) != nref) - hts_log_warning("Duplicate entry AN:\"%s\" in sam header", name); - } - - return 0; -} - -static void sam_hrecs_remove_ref_altnames(sam_hrecs_t *hrecs, int expected, const char *list) { - const char *token, *sn; - ks_tokaux_t aux; - kstring_t str = KS_INITIALIZE; - - if (expected < 0 || expected >= hrecs->nref) - return; - sn = hrecs->ref[expected].name; - - for (token = kstrtok(list, ",", &aux); token; token = kstrtok(NULL, NULL, &aux)) { - kputsn(token, aux.p - token, ks_clear(&str)); - khint_t k = kh_get(m_s2i, hrecs->ref_hash, str.s); - if (k != kh_end(hrecs->ref_hash) - && kh_val(hrecs->ref_hash, k) == expected - && strcmp(sn, str.s) != 0) - kh_del(m_s2i, hrecs->ref_hash, k); - } - - free(str.s); -} - -/* Updates the hash tables in the sam_hrecs_t structure. - * - * Returns 0 on success; - * -1 on failure - */ -static int sam_hrecs_update_hashes(sam_hrecs_t *hrecs, - khint32_t type, - sam_hrec_type_t *h_type) { - /* Add to reference hash? */ - if (type == TYPEKEY("SQ")) { - sam_hrec_tag_t *tag = h_type->tag; - int nref = hrecs->nref; - const char *name = NULL; - const char *altnames = NULL; - hts_pos_t len = -1; - int r; - khint_t k; - - while (tag) { - if (tag->str[0] == 'S' && tag->str[1] == 'N') { - assert(tag->len >= 3); - name = tag->str+3; - } else if (tag->str[0] == 'L' && tag->str[1] == 'N') { - assert(tag->len >= 3); - len = strtoll(tag->str+3, NULL, 10); - } else if (tag->str[0] == 'A' && tag->str[1] == 'N') { - assert(tag->len >= 3); - altnames = tag->str+3; - } - tag = tag->next; - } - - if (!name) { - hts_log_error("Header includes @SQ line with no SN: tag"); - return -1; // SN should be present, according to spec. - } - - if (len == -1) { - hts_log_error("Header includes @SQ line \"%s\" with no LN: tag", - name); - return -1; // LN should be present, according to spec. - } - - // Seen already? - k = kh_get(m_s2i, hrecs->ref_hash, name); - if (k < kh_end(hrecs->ref_hash)) { - nref = kh_val(hrecs->ref_hash, k); - int ref_changed_flag = 0; - - // Check for hash entry added by sam_hrecs_refs_from_targets_array() - if (hrecs->ref[nref].ty == NULL) { - // Attach header line to existing stub entry. - hrecs->ref[nref].ty = h_type; - // Check lengths match; correct if not. - if (len != hrecs->ref[nref].len) { - char tmp[32]; - snprintf(tmp, sizeof(tmp), "%" PRIhts_pos, - hrecs->ref[nref].len); - if (sam_hrecs_update(hrecs, h_type, "LN", tmp, NULL) < 0) - return -1; - ref_changed_flag = 1; - } - if (sam_hrecs_add_ref_altnames(hrecs, nref, altnames) < 0) - return -1; - - if (ref_changed_flag && (hrecs->refs_changed < 0 || hrecs->refs_changed > nref)) - hrecs->refs_changed = nref; - return 0; - } - - // Check to see if an existing entry is being updated - if (hrecs->ref[nref].ty == h_type) { - if (hrecs->ref[nref].len != len) { - hrecs->ref[nref].len = len; - ref_changed_flag = 1; - } - if (!hrecs->ref[nref].name || strcmp(hrecs->ref[nref].name, name)) { - hrecs->ref[nref].name = name; - ref_changed_flag = 1; - } - if (sam_hrecs_add_ref_altnames(hrecs, nref, altnames) < 0) - return -1; - - if (ref_changed_flag && (hrecs->refs_changed < 0 || hrecs->refs_changed > nref)) - hrecs->refs_changed = nref; - return 0; - } - - // If here, the name is a duplicate. - // Check to see if it matches the SN: tag from the earlier record. - if (strcmp(hrecs->ref[nref].name, name) == 0) { - hts_log_error("Duplicate entry \"%s\" in sam header", - name); - return -1; - } - - // Clash with an already-seen altname - // As SN: should be preferred to AN: add this as a new - // record and update the hash entry to point to it. - hts_log_warning("Ref name SN:\"%s\" is a duplicate of an existing AN key", name); - nref = hrecs->nref; - } - - if (nref == hrecs->ref_sz) { - size_t new_sz = hrecs->ref_sz >= 4 ? hrecs->ref_sz + (hrecs->ref_sz / 4) : 32; - sam_hrec_sq_t *new_ref = realloc(hrecs->ref, sizeof(*hrecs->ref) * new_sz); - if (!new_ref) - return -1; - hrecs->ref = new_ref; - hrecs->ref_sz = new_sz; - } - - hrecs->ref[nref].name = name; - hrecs->ref[nref].len = len; - hrecs->ref[nref].ty = h_type; - - k = kh_put(m_s2i, hrecs->ref_hash, hrecs->ref[nref].name, &r); - if (-1 == r) return -1; - kh_val(hrecs->ref_hash, k) = nref; - - if (sam_hrecs_add_ref_altnames(hrecs, nref, altnames) < 0) - return -1; - - if (hrecs->refs_changed < 0 || hrecs->refs_changed > hrecs->nref) - hrecs->refs_changed = hrecs->nref; - hrecs->nref++; - } - - /* Add to read-group hash? */ - if (type == TYPEKEY("RG")) { - sam_hrec_tag_t *tag = sam_hrecs_find_key(h_type, "ID", NULL); - int nrg = hrecs->nrg, r; - khint_t k; - - if (!tag) { - hts_log_error("Header includes @RG line with no ID: tag"); - return -1; // ID should be present, according to spec. - } - assert(tag->str && tag->len >= 3); - - // Seen already? - k = kh_get(m_s2i, hrecs->rg_hash, tag->str + 3); - if (k < kh_end(hrecs->rg_hash)) { - nrg = kh_val(hrecs->rg_hash, k); - assert(hrecs->rg[nrg].ty != NULL); - if (hrecs->rg[nrg].ty != h_type) { - hts_log_warning("Duplicate entry \"%s\" in sam header", - tag->str + 3); - } else { - hrecs->rg[nrg].name = tag->str + 3; - hrecs->rg[nrg].name_len = tag->len - 3; - } - return 0; - } - - if (nrg == hrecs->rg_sz) { - size_t new_sz = hrecs->rg_sz >= 4 ? hrecs->rg_sz + hrecs->rg_sz / 4 : 4; - sam_hrec_rg_t *new_rg = realloc(hrecs->rg, sizeof(*hrecs->rg) * new_sz); - if (!new_rg) - return -1; - hrecs->rg = new_rg; - hrecs->rg_sz = new_sz; - } - - hrecs->rg[nrg].name = tag->str + 3; - hrecs->rg[nrg].name_len = tag->len - 3; - hrecs->rg[nrg].ty = h_type; - hrecs->rg[nrg].id = nrg; - - k = kh_put(m_s2i, hrecs->rg_hash, hrecs->rg[nrg].name, &r); - if (-1 == r) return -1; - kh_val(hrecs->rg_hash, k) = nrg; - - hrecs->nrg++; - } - - /* Add to program hash? */ - if (type == TYPEKEY("PG")) { - sam_hrec_tag_t *tag; - sam_hrec_pg_t *new_pg; - int npg = hrecs->npg; - - if (npg == hrecs->pg_sz) { - size_t new_sz = hrecs->pg_sz >= 4 ? hrecs->pg_sz + hrecs->pg_sz / 4 : 4; - new_pg = realloc(hrecs->pg, sizeof(*hrecs->pg) * new_sz); - if (!new_pg) - return -1; - hrecs->pg = new_pg; - hrecs->pg_sz = new_sz; - } - - tag = h_type->tag; - hrecs->pg[npg].name = NULL; - hrecs->pg[npg].name_len = 0; - hrecs->pg[npg].ty = h_type; - hrecs->pg[npg].id = npg; - hrecs->pg[npg].prev_id = -1; - - while (tag) { - if (tag->str[0] == 'I' && tag->str[1] == 'D') { - /* Avoid duplicate ID tags coming from other applications */ - if (!hrecs->pg[npg].name) { - assert(tag->len >= 3); - hrecs->pg[npg].name = tag->str + 3; - hrecs->pg[npg].name_len = tag->len - 3; - } else { - hts_log_warning("PG line with multiple ID tags. The first encountered was preferred - ID:%s", hrecs->pg[npg].name); - } - } else if (tag->str[0] == 'P' && tag->str[1] == 'P') { - // Resolve later if needed - khint_t k; - k = kh_get(m_s2i, hrecs->pg_hash, tag->str+3); - - if (k != kh_end(hrecs->pg_hash)) { - int p_id = kh_val(hrecs->pg_hash, k); - hrecs->pg[npg].prev_id = hrecs->pg[p_id].id; - - /* Unmark previous entry as a PG termination */ - if (hrecs->npg_end > 0 && - hrecs->pg_end[hrecs->npg_end-1] == p_id) { - hrecs->npg_end--; - } else { - int i; - for (i = 0; i < hrecs->npg_end; i++) { - if (hrecs->pg_end[i] == p_id) { - memmove(&hrecs->pg_end[i], &hrecs->pg_end[i+1], - (hrecs->npg_end-i-1)*sizeof(*hrecs->pg_end)); - hrecs->npg_end--; - } - } - } - } else { - hrecs->pg[npg].prev_id = -1; - } - } - tag = tag->next; - } - - if (hrecs->pg[npg].name) { - khint_t k; - int r; - k = kh_put(m_s2i, hrecs->pg_hash, hrecs->pg[npg].name, &r); - if (-1 == r) return -1; - kh_val(hrecs->pg_hash, k) = npg; - } else { - return -1; // ID should be present, according to spec. - } - - /* Add to npg_end[] array. Remove later if we find a PP line */ - if (hrecs->npg_end >= hrecs->npg_end_alloc) { - int *new_pg_end; - int new_alloc = hrecs->npg_end_alloc ? hrecs->npg_end_alloc*2 : 4; - - new_pg_end = realloc(hrecs->pg_end, new_alloc * sizeof(int)); - if (!new_pg_end) - return -1; - hrecs->npg_end_alloc = new_alloc; - hrecs->pg_end = new_pg_end; - } - hrecs->pg_end[hrecs->npg_end++] = npg; - - hrecs->npg++; - } - - return 0; -} - -static int sam_hrecs_remove_hash_entry(sam_hrecs_t *hrecs, khint32_t type, sam_hrec_type_t *h_type) { - if (!hrecs || !h_type) - return -1; - - sam_hrec_tag_t *tag; - const char *key = NULL; - khint_t k; - - /* Remove name and any alternative names from reference hash */ - if (type == TYPEKEY("SQ")) { - const char *altnames = NULL; - - tag = h_type->tag; - - while (tag) { - if (tag->str[0] == 'S' && tag->str[1] == 'N') { - assert(tag->len >= 3); - key = tag->str + 3; - } else if (tag->str[0] == 'A' && tag->str[1] == 'N') { - assert(tag->len >= 3); - altnames = tag->str + 3; - } - tag = tag->next; - } - - if (key) { - k = kh_get(m_s2i, hrecs->ref_hash, key); - if (k != kh_end(hrecs->ref_hash)) { - int idx = kh_val(hrecs->ref_hash, k); - if (idx + 1 < hrecs->nref) - memmove(&hrecs->ref[idx], &hrecs->ref[idx+1], - sizeof(sam_hrec_sq_t)*(hrecs->nref - idx - 1)); - if (altnames) - sam_hrecs_remove_ref_altnames(hrecs, idx, altnames); - kh_del(m_s2i, hrecs->ref_hash, k); - hrecs->nref--; - if (hrecs->refs_changed < 0 || hrecs->refs_changed > idx) - hrecs->refs_changed = idx; - for (k = 0; k < kh_end(hrecs->ref_hash); k++) { - if (kh_exist(hrecs->ref_hash, k) - && kh_value(hrecs->ref_hash, k) > idx) { - kh_value(hrecs->ref_hash, k)--; - } - } - } - } - } - - /* Remove from read-group hash */ - if (type == TYPEKEY("RG")) { - tag = h_type->tag; - - while (tag) { - if (tag->str[0] == 'I' && tag->str[1] == 'D') { - assert(tag->len >= 3); - key = tag->str + 3; - k = kh_get(m_s2i, hrecs->rg_hash, key); - if (k != kh_end(hrecs->rg_hash)) { - int idx = kh_val(hrecs->rg_hash, k); - if (idx + 1 < hrecs->nrg) - memmove(&hrecs->rg[idx], &hrecs->rg[idx+1], sizeof(sam_hrec_rg_t)*(hrecs->nrg - idx - 1)); - kh_del(m_s2i, hrecs->rg_hash, k); - hrecs->nrg--; - for (k = 0; k < kh_end(hrecs->rg_hash); k++) { - if (kh_exist(hrecs->rg_hash, k) - && kh_value(hrecs->rg_hash, k) > idx) { - kh_value(hrecs->rg_hash, k)--; - } - } - } - break; - } - tag = tag->next; - } - } - - return 0; -} - -/** Add a header record to the global line ordering - * - * If @p after is not NULL, the new record will be inserted after this one, - * otherwise it will go at the end. - * - * An exception is an HD record, which will always be put first unless - * one is already present. - */ -static void sam_hrecs_global_list_add(sam_hrecs_t *hrecs, - sam_hrec_type_t *h_type, - sam_hrec_type_t *after) { - const khint32_t hd_type = TYPEKEY("HD"); - int update_first_line = 0; - - // First line seen - if (!hrecs->first_line) { - hrecs->first_line = h_type->global_next = h_type->global_prev = h_type; - return; - } - - // @HD goes at the top (unless there's one already) - if (h_type->type == hd_type && hrecs->first_line->type != hd_type) { - after = hrecs->first_line->global_prev; - update_first_line = 1; - } - - // If no instructions given, put it at the end - if (!after) - after = hrecs->first_line->global_prev; - - h_type->global_prev = after; - h_type->global_next = after->global_next; - h_type->global_prev->global_next = h_type; - h_type->global_next->global_prev = h_type; - - if (update_first_line) - hrecs->first_line = h_type; -} - -/*! Add header record with a va_list interface. - * - * Adds a single record to a SAM header. - * - * This takes a header record type, a va_list argument and one or more - * key,value pairs, ending with the NULL key. - * - * Eg. sam_hrecs_vadd(h, "SQ", args, "ID", "foo", "LN", "100", NULL). - * - * The purpose of the additional va_list parameter is to permit other - * varargs functions to call this while including their own additional - * parameters; an example is in sam_hdr_add_pg(). - * - * Note: this function invokes va_arg at least once, making the value - * of ap indeterminate after the return. The caller should call - * va_start/va_end before/after calling this function or use va_copy. - * - * @return - * Returns >= 0 on success; - * -1 on failure - */ -static int sam_hrecs_vadd(sam_hrecs_t *hrecs, const char *type, va_list ap, ...) { - va_list args; - sam_hrec_type_t *h_type; - sam_hrec_tag_t *h_tag, *last=NULL; - int new; - khint32_t type_i = TYPEKEY(type), k; - - if (!strncmp(type, "HD", 2) && (h_type = sam_hrecs_find_type_id(hrecs, "HD", NULL, NULL))) - return sam_hrecs_vupdate(hrecs, h_type, ap); - - if (!(h_type = pool_alloc(hrecs->type_pool))) - return -1; - k = kh_put(sam_hrecs_t, hrecs->h, type_i, &new); - if (new < 0) - return -1; - - h_type->type = type_i; - - // Form the ring, either with self or other lines of this type - if (!new) { - sam_hrec_type_t *t = kh_val(hrecs->h, k), *p; - p = t->prev; - - assert(p->next == t); - p->next = h_type; - h_type->prev = p; - - t->prev = h_type; - h_type->next = t; - } else { - kh_val(hrecs->h, k) = h_type; - h_type->prev = h_type->next = h_type; - } - h_type->tag = NULL; - - // Add to global line ordering after any existing line of the same type, - // or at the end if no line of this type exists yet. - sam_hrecs_global_list_add(hrecs, h_type, !new ? h_type->prev : NULL); - - // Check linked-list invariants - assert(h_type->prev->next == h_type); - assert(h_type->next->prev == h_type); - assert(h_type->global_prev->global_next == h_type); - assert(h_type->global_next->global_prev == h_type); - - // Any ... varargs - va_start(args, ap); - for (;;) { - char *key, *val = NULL, *str; - - if (!(key = (char *)va_arg(args, char *))) - break; - if (strncmp(type, "CO", 2) && !(val = (char *)va_arg(args, char *))) - break; - if (*val == '\0') - continue; - - if (!(h_tag = pool_alloc(hrecs->tag_pool))) - return -1; - - if (strncmp(type, "CO", 2)) { - h_tag->len = 3 + strlen(val); - str = string_alloc(hrecs->str_pool, h_tag->len+1); - if (!str || snprintf(str, h_tag->len+1, "%2.2s:%s", key, val) < 0) - return -1; - h_tag->str = str; - } else { - h_tag->len = strlen(key); - h_tag->str = string_ndup(hrecs->str_pool, key, h_tag->len); - if (!h_tag->str) - return -1; - } - - h_tag->next = NULL; - if (last) - last->next = h_tag; - else - h_type->tag = h_tag; - - last = h_tag; - } - va_end(args); - - // Plus the specified va_list params - for (;;) { - char *key, *val = NULL, *str; - - if (!(key = (char *)va_arg(ap, char *))) - break; - if (strncmp(type, "CO", 2) && !(val = (char *)va_arg(ap, char *))) - break; - - if (!(h_tag = pool_alloc(hrecs->tag_pool))) - return -1; - - if (strncmp(type, "CO", 2)) { - h_tag->len = 3 + strlen(val); - str = string_alloc(hrecs->str_pool, h_tag->len+1); - if (!str || snprintf(str, h_tag->len+1, "%2.2s:%s", key, val) < 0) - return -1; - h_tag->str = str; - } else { - h_tag->len = strlen(key); - h_tag->str = string_ndup(hrecs->str_pool, key, h_tag->len); - if (!h_tag->str) - return -1; - } - - h_tag->next = NULL; - if (last) - last->next = h_tag; - else - h_type->tag = h_tag; - - last = h_tag; - } - - if (-1 == sam_hrecs_update_hashes(hrecs, TYPEKEY(type), h_type)) - return -1; - - if (!strncmp(type, "PG", 2)) - hrecs->pgs_changed = 1; - - hrecs->dirty = 1; - - return 0; -} - -// As sam_hrecs_vadd(), but without the extra va_list parameter -static int sam_hrecs_add(sam_hrecs_t *hrecs, const char *type, ...) { - va_list args; - int res; - va_start(args, type); - res = sam_hrecs_vadd(hrecs, type, args, NULL); - va_end(args); - return res; -} - -/* - * Function for deallocating a list of tags - */ - -static void sam_hrecs_free_tags(sam_hrecs_t *hrecs, sam_hrec_tag_t *tag) { - if (!hrecs || !tag) - return; - if (tag->next) - sam_hrecs_free_tags(hrecs, tag->next); - - pool_free(hrecs->tag_pool, tag); -} - -static int sam_hrecs_remove_line(sam_hrecs_t *hrecs, const char *type_name, sam_hrec_type_t *type_found) { - if (!hrecs || !type_name || !type_found) - return -1; - - khint32_t itype = TYPEKEY(type_name); - khint_t k = kh_get(sam_hrecs_t, hrecs->h, itype); - if (k == kh_end(hrecs->h)) - return -1; - - // Remove from global list (remembering it could be the only line) - if (hrecs->first_line == type_found) { - hrecs->first_line = (type_found->global_next != type_found - ? type_found->global_next : NULL); - } - type_found->global_next->global_prev = type_found->global_prev; - type_found->global_prev->global_next = type_found->global_next; - - /* single element in the list */ - if (type_found->prev == type_found || type_found->next == type_found) { - kh_del(sam_hrecs_t, hrecs->h, k); - } else { - type_found->prev->next = type_found->next; - type_found->next->prev = type_found->prev; - if (kh_val(hrecs->h, k) == type_found) { //first element - kh_val(hrecs->h, k) = type_found->next; - } - } - - if (!strncmp(type_name, "SQ", 2) || !strncmp(type_name, "RG", 2)) - sam_hrecs_remove_hash_entry(hrecs, itype, type_found); - - sam_hrecs_free_tags(hrecs, type_found->tag); - pool_free(hrecs->type_pool, type_found); - - hrecs->dirty = 1; - - return 0; -} - -// Paste together a line from the parsed data structures -static int build_header_line(const sam_hrec_type_t *ty, kstring_t *ks) { - sam_hrec_tag_t *tag; - int r = 0; - char c[2]= { ty->type >> 8, ty->type & 0xff }; - - r |= (kputc_('@', ks) == EOF); - r |= (kputsn(c, 2, ks) == EOF); - for (tag = ty->tag; tag; tag = tag->next) { - r |= (kputc_('\t', ks) == EOF); - r |= (kputsn(tag->str, tag->len, ks) == EOF); - } - - return r; -} - -static int sam_hrecs_rebuild_lines(const sam_hrecs_t *hrecs, kstring_t *ks) { - const sam_hrec_type_t *t1, *t2; - - if (!hrecs->first_line) - return kputsn("", 0, ks) >= 0 ? 0 : -1; - - t1 = t2 = hrecs->first_line; - do { - if (build_header_line(t1, ks) != 0) - return -1; - if (kputc('\n', ks) < 0) - return -1; - - t1 = t1->global_next; - } while (t1 != t2); - - return 0; -} - -static int sam_hrecs_parse_lines(sam_hrecs_t *hrecs, const char *hdr, size_t len) { - size_t i, lno; - - if (!hrecs || len > SSIZE_MAX) - return -1; - - if (!len) - len = strlen(hdr); - - if (len < 3) { - if (len == 0 || *hdr == '\0') return 0; - sam_hrecs_error("Header line too short", hdr, len, 1); - return -1; - } - - for (i = 0, lno = 1; i < len - 3 && hdr[i] != '\0'; i++, lno++) { - khint32_t type; - khint_t k; - - int l_start = i, new; - sam_hrec_type_t *h_type; - sam_hrec_tag_t *h_tag, *last; - - if (hdr[i] != '@') { - sam_hrecs_error("Header line does not start with '@'", - &hdr[l_start], len - l_start, lno); - return -1; - } - - if (!isalpha_c(hdr[i+1]) || !isalpha_c(hdr[i+2])) { - sam_hrecs_error("Header line does not have a two character key", - &hdr[l_start], len - l_start, lno); - return -1; - } - type = TYPEKEY(&hdr[i+1]); - - i += 3; - if (i == len || hdr[i] == '\n') - continue; - - // Add the header line type - if (!(h_type = pool_alloc(hrecs->type_pool))) - return -1; - k = kh_put(sam_hrecs_t, hrecs->h, type, &new); - if (new < 0) - return -1; - - h_type->type = type; - - // Add to end of global list - sam_hrecs_global_list_add(hrecs, h_type, NULL); - - // Form the ring, either with self or other lines of this type - if (!new) { - sam_hrec_type_t *t = kh_val(hrecs->h, k), *p; - p = t->prev; - - assert(p->next == t); - p->next = h_type; - h_type->prev = p; - - t->prev = h_type; - h_type->next = t; - } else { - kh_val(hrecs->h, k) = h_type; - h_type->prev = h_type->next = h_type; - } - - // Parse the tags on this line - last = NULL; - if (type == TYPEKEY("CO")) { - size_t j; - - if (i == len || hdr[i] != '\t') { - sam_hrecs_error("Missing tab", - &hdr[l_start], len - l_start, lno); - return -1; - } - - for (j = ++i; j < len && hdr[j] != '\0' && hdr[j] != '\n'; j++) - ; - - if (!(h_type->tag = h_tag = pool_alloc(hrecs->tag_pool))) - return -1; - h_tag->str = string_ndup(hrecs->str_pool, &hdr[i], j-i); - h_tag->len = j-i; - h_tag->next = NULL; - if (!h_tag->str) - return -1; - - i = j; - - } else { - do { - size_t j; - - if (i == len || hdr[i] != '\t') { - sam_hrecs_error("Missing tab", - &hdr[l_start], len - l_start, lno); - return -1; - } - - for (j = ++i; j < len && hdr[j] != '\0' && hdr[j] != '\n' && hdr[j] != '\t'; j++) - ; - - if (j - i < 3 || hdr[i + 2] != ':') { - sam_hrecs_error("Malformed key:value pair", - &hdr[l_start], len - l_start, lno); - return -1; - } - - if (!(h_tag = pool_alloc(hrecs->tag_pool))) - return -1; - h_tag->str = string_ndup(hrecs->str_pool, &hdr[i], j-i); - h_tag->len = j-i; - h_tag->next = NULL; - if (!h_tag->str) - return -1; - - if (last) - last->next = h_tag; - else - h_type->tag = h_tag; - - last = h_tag; - i = j; - } while (i < len && hdr[i] != '\0' && hdr[i] != '\n'); - } - - /* Update RG/SQ hashes */ - if (-1 == sam_hrecs_update_hashes(hrecs, type, h_type)) - return -1; - } - - return 0; -} - -/*! Update sam_hdr_t target_name and target_len arrays - * - * @return 0 on success; -1 on failure - */ -int sam_hdr_update_target_arrays(sam_hdr_t *bh, const sam_hrecs_t *hrecs, - int refs_changed) { - if (!bh || !hrecs) - return -1; - - if (refs_changed < 0) - return 0; - - // Grow arrays if necessary - if (bh->n_targets < hrecs->nref) { - char **new_names = realloc(bh->target_name, - hrecs->nref * sizeof(*new_names)); - if (!new_names) - return -1; - bh->target_name = new_names; - uint32_t *new_lens = realloc(bh->target_len, - hrecs->nref * sizeof(*new_lens)); - if (!new_lens) - return -1; - bh->target_len = new_lens; - } - - // Update names and lengths where changed - // hrecs->refs_changed is the first ref that has been updated, so ones - // before that can be skipped. - int i; - khint_t k; - khash_t(s2i) *long_refs = (khash_t(s2i) *) bh->sdict; - for (i = refs_changed; i < hrecs->nref; i++) { - if (i >= bh->n_targets - || strcmp(bh->target_name[i], hrecs->ref[i].name) != 0) { - if (i < bh->n_targets) - free(bh->target_name[i]); - bh->target_name[i] = strdup(hrecs->ref[i].name); - if (!bh->target_name[i]) - return -1; - } - if (hrecs->ref[i].len < UINT32_MAX) { - bh->target_len[i] = hrecs->ref[i].len; - - if (!long_refs) - continue; - - // Check if we have an old length, if so remove it. - k = kh_get(s2i, long_refs, bh->target_name[i]); - if (k < kh_end(long_refs)) - kh_del(s2i, long_refs, k); - } else { - bh->target_len[i] = UINT32_MAX; - if (bh->hrecs != hrecs) { - // Called from sam_hdr_dup; need to add sdict entries - if (!long_refs) { - if (!(bh->sdict = long_refs = kh_init(s2i))) - return -1; - } - - // Add / update length - int absent; - k = kh_put(s2i, long_refs, bh->target_name[i], &absent); - if (absent < 0) - return -1; - kh_val(long_refs, k) = hrecs->ref[i].len; - } - } - } - - // Free up any names that have been removed - for (; i < bh->n_targets; i++) { - if (long_refs) { - k = kh_get(s2i, long_refs, bh->target_name[i]); - if (k < kh_end(long_refs)) - kh_del(s2i, long_refs, k); - } - free(bh->target_name[i]); - } - - bh->n_targets = hrecs->nref; - return 0; -} - -static int rebuild_target_arrays(sam_hdr_t *bh) { - if (!bh || !bh->hrecs) - return -1; - - sam_hrecs_t *hrecs = bh->hrecs; - if (hrecs->refs_changed < 0) - return 0; - - if (sam_hdr_update_target_arrays(bh, hrecs, hrecs->refs_changed) != 0) - return -1; - - hrecs->refs_changed = -1; - return 0; -} - -/// Populate hrecs refs array from header target_name, target_len arrays -/** - * @return 0 on success; -1 on failure - * - * Pre-fills the refs hash from the target arrays. For BAM files this - * will ensure that they are in the correct order as the target arrays - * are the canonical source for converting target ids to names and lengths. - * - * The added entries do not link to a header line. sam_hrecs_update_hashes() - * will add the links later for lines found in the text header. - * - * This should be called before the text header is parsed. - */ -static int sam_hrecs_refs_from_targets_array(sam_hrecs_t *hrecs, - const sam_hdr_t *bh) { - int32_t tid = 0; - - if (!hrecs || !bh) - return -1; - - // This should always be called before parsing the text header - // so the ref array should start off empty, and we don't have to try - // to reconcile any existing data. - if (hrecs->nref > 0) { - hts_log_error("Called with non-empty ref array"); - return -1; - } - - if (hrecs->ref_sz < bh->n_targets) { - sam_hrec_sq_t *new_ref = realloc(hrecs->ref, - bh->n_targets * sizeof(*new_ref)); - if (!new_ref) - return -1; - - hrecs->ref = new_ref; - hrecs->ref_sz = bh->n_targets; - } - - for (tid = 0; tid < bh->n_targets; tid++) { - khint_t k; - int r; - hrecs->ref[tid].name = string_dup(hrecs->str_pool, bh->target_name[tid]); - if (!hrecs->ref[tid].name) goto fail; - if (bh->target_len[tid] < UINT32_MAX || !bh->sdict) { - hrecs->ref[tid].len = bh->target_len[tid]; - } else { - khash_t(s2i) *long_refs = (khash_t(s2i) *) bh->sdict; - k = kh_get(s2i, long_refs, hrecs->ref[tid].name); - if (k < kh_end(long_refs)) { - hrecs->ref[tid].len = kh_val(long_refs, k); - } else { - hrecs->ref[tid].len = UINT32_MAX; - } - } - hrecs->ref[tid].ty = NULL; - k = kh_put(m_s2i, hrecs->ref_hash, hrecs->ref[tid].name, &r); - if (r < 0) goto fail; - if (r == 0) { - hts_log_error("Duplicate entry \"%s\" in target list", - hrecs->ref[tid].name); - return -1; - } else { - kh_val(hrecs->ref_hash, k) = tid; - } - } - hrecs->nref = bh->n_targets; - return 0; - - fail: { - int32_t i; - hts_log_error("%s", strerror(errno)); - for (i = 0; i < tid; i++) { - khint_t k; - if (!hrecs->ref[i].name) continue; - k = kh_get(m_s2i, hrecs->ref_hash, hrecs->ref[tid].name); - if (k < kh_end(hrecs->ref_hash)) kh_del(m_s2i, hrecs->ref_hash, k); - } - hrecs->nref = 0; - return -1; - } -} - -/* - * Add SQ header records for any references in the hrecs->ref array that - * were added by sam_hrecs_refs_from_targets_array() but have not - * been linked to an @SQ line by sam_hrecs_update_hashes() yet. - * - * This may be needed either because: - * - * - A bam file was read that had entries in its refs list with no - * corresponding @SQ line. - * - * - A program constructed a sam_hdr_t which has target_name and target_len - * array entries with no corresponding @SQ line in text. - */ -static int add_stub_ref_sq_lines(sam_hrecs_t *hrecs) { - int tid; - char len[32]; - - for (tid = 0; tid < hrecs->nref; tid++) { - if (hrecs->ref[tid].ty == NULL) { - snprintf(len, sizeof(len), "%"PRIhts_pos, hrecs->ref[tid].len); - if (sam_hrecs_add(hrecs, "SQ", - "SN", hrecs->ref[tid].name, - "LN", len, NULL) != 0) - return -1; - - // Check that the stub has actually been filled - if(hrecs->ref[tid].ty == NULL) { - hts_log_error("Reference stub with tid=%d, name=\"%s\", len=%"PRIhts_pos" could not be filled", - tid, hrecs->ref[tid].name, hrecs->ref[tid].len); - return -1; - } - } - } - return 0; -} - -int sam_hdr_fill_hrecs(sam_hdr_t *bh) { - sam_hrecs_t *hrecs = sam_hrecs_new(); - - if (!hrecs) - return -1; - - if (bh->target_name && bh->target_len && bh->n_targets > 0) { - if (sam_hrecs_refs_from_targets_array(hrecs, bh) != 0) { - sam_hrecs_free(hrecs); - return -1; - } - } - - // Parse existing header text - if (bh->text && bh->l_text > 0) { - if (sam_hrecs_parse_lines(hrecs, bh->text, bh->l_text) != 0) { - sam_hrecs_free(hrecs); - return -1; - } - } - - if (add_stub_ref_sq_lines(hrecs) < 0) { - sam_hrecs_free(hrecs); - return -1; - } - - bh->hrecs = hrecs; - - if (hrecs->refs_changed >= 0 && rebuild_target_arrays(bh) != 0) - return -1; - - return 0; -} - -/** Remove outdated header text - - @param bh BAM header - - This is called when API functions have changed the header so that the - text version is no longer valid. - */ -static void redact_header_text(sam_hdr_t *bh) { - assert(bh->hrecs && bh->hrecs->dirty); - bh->l_text = 0; - free(bh->text); - bh->text = NULL; -} - -/** Find nth header record of a given type - - @param type Header type (SQ, RG etc.) - @param idx 0-based index - - @return sam_hrec_type_t pointer to the record on success - NULL if no record exists with the given type and index - */ - -static sam_hrec_type_t *sam_hrecs_find_type_pos(sam_hrecs_t *hrecs, - const char *type, int idx) { - sam_hrec_type_t *first, *itr; - - if (idx < 0) - return NULL; - - if (type[0] == 'S' && type[1] == 'Q') - return idx < hrecs->nref ? hrecs->ref[idx].ty : NULL; - - if (type[0] == 'R' && type[1] == 'G') - return idx < hrecs->nrg ? hrecs->rg[idx].ty : NULL; - - if (type[0] == 'P' && type[1] == 'G') - return idx < hrecs->npg ? hrecs->pg[idx].ty : NULL; - - first = itr = sam_hrecs_find_type_id(hrecs, type, NULL, NULL); - if (!first) - return NULL; - - while (idx > 0) { - itr = itr->next; - if (itr == first) - break; - --idx; - } - - return idx == 0 ? itr : NULL; -} - -/* ==== Public methods ==== */ - -size_t sam_hdr_length(sam_hdr_t *bh) { - if (!bh || -1 == sam_hdr_rebuild(bh)) - return SIZE_MAX; - - return bh->l_text; -} - -const char *sam_hdr_str(sam_hdr_t *bh) { - if (!bh || -1 == sam_hdr_rebuild(bh)) - return NULL; - - return bh->text; -} - -int sam_hdr_nref(const sam_hdr_t *bh) { - if (!bh) - return -1; - - return bh->hrecs ? bh->hrecs->nref : bh->n_targets; -} - -/* - * Reconstructs the text representation from the header hash table. - * Returns 0 on success - * -1 on failure - */ -int sam_hdr_rebuild(sam_hdr_t *bh) { - sam_hrecs_t *hrecs; - if (!bh) - return -1; - - if (!(hrecs = bh->hrecs)) - return bh->text ? 0 : -1; - - if (hrecs->refs_changed >= 0) { - if (rebuild_target_arrays(bh) < 0) { - hts_log_error("Header target array rebuild has failed"); - return -1; - } - } - - /* If header text wasn't changed or header is empty, don't rebuild it. */ - if (!hrecs->dirty) - return 0; - - if (hrecs->pgs_changed && sam_hdr_link_pg(bh) < 0) { - hts_log_error("Linking @PG lines has failed"); - return -1; - } - - kstring_t ks = KS_INITIALIZE; - if (sam_hrecs_rebuild_text(hrecs, &ks) != 0) { - ks_free(&ks); - hts_log_error("Header text rebuild has failed"); - return -1; - } - - hrecs->dirty = 0; - - /* Sync */ - free(bh->text); - bh->l_text = ks_len(&ks); - bh->text = ks_release(&ks); - - return 0; -} - -/* - * Appends a formatted line to an existing SAM header. - * Line is a full SAM header record, eg "@SQ\tSN:foo\tLN:100", with - * optional new-line. If it contains more than 1 line then multiple lines - * will be added in order. - * - * Input text is of maximum length len or as terminated earlier by a NUL. - * len may be 0 if unknown, in which case lines must be NUL-terminated. - * - * Returns 0 on success - * -1 on failure - */ -int sam_hdr_add_lines(sam_hdr_t *bh, const char *lines, size_t len) { - sam_hrecs_t *hrecs; - - if (!bh || !lines) - return -1; - - if (len == 0 && *lines == '\0') - return 0; - - if (!(hrecs = bh->hrecs)) { - if (sam_hdr_fill_hrecs(bh) != 0) - return -1; - hrecs = bh->hrecs; - } - - if (sam_hrecs_parse_lines(hrecs, lines, len) != 0) - return -1; - - if (hrecs->refs_changed >= 0 && rebuild_target_arrays(bh) != 0) - return -1; - - hrecs->dirty = 1; - redact_header_text(bh); - - return 0; -} - -/* - * Adds a single line to a SAM header. - * Specify type and one or more key,value pairs, ending with the NULL key. - * Eg. sam_hdr_add_line(h, "SQ", "ID", "foo", "LN", "100", NULL). - * - * Returns 0 on success - * -1 on failure - */ -int sam_hdr_add_line(sam_hdr_t *bh, const char *type, ...) { - va_list args; - sam_hrecs_t *hrecs; - - if (!bh || !type) - return -1; - - if (!(hrecs = bh->hrecs)) { - if (sam_hdr_fill_hrecs(bh) != 0) - return -1; - hrecs = bh->hrecs; - } - - va_start(args, type); - int ret = sam_hrecs_vadd(hrecs, type, args, NULL); - va_end(args); - - if (ret == 0) { - if (hrecs->refs_changed >= 0 && rebuild_target_arrays(bh) != 0) - return -1; - - if (hrecs->dirty) - redact_header_text(bh); - } - - return ret; -} - -/* - * Returns a complete line of formatted text for a specific head type/ID - * combination. If ID_key is NULL then it returns the first line of the specified - * type. - */ -int sam_hdr_find_line_id(sam_hdr_t *bh, const char *type, - const char *ID_key, const char *ID_val, kstring_t *ks) { - sam_hrecs_t *hrecs; - if (!bh || !type) - return -2; - - if (!(hrecs = bh->hrecs)) { - if (sam_hdr_fill_hrecs(bh) != 0) - return -2; - hrecs = bh->hrecs; - } - - sam_hrec_type_t *ty = sam_hrecs_find_type_id(hrecs, type, ID_key, ID_val); - if (!ty) - return -1; - - ks->l = 0; - if (build_header_line(ty, ks) < 0) { - return -2; - } - - return 0; -} - -int sam_hdr_find_line_pos(sam_hdr_t *bh, const char *type, - int pos, kstring_t *ks) { - sam_hrecs_t *hrecs; - if (!bh || !type) - return -2; - - if (!(hrecs = bh->hrecs)) { - if (sam_hdr_fill_hrecs(bh) != 0) - return -2; - hrecs = bh->hrecs; - } - - sam_hrec_type_t *ty = sam_hrecs_find_type_pos(hrecs, type, pos); - if (!ty) - return -1; - - ks->l = 0; - if (build_header_line(ty, ks) < 0) { - return -2; - } - - return 0; -} - -/* - * Remove a line from the header by specifying a tag:value that uniquely - * identifies a line, i.e. the @SQ line containing "SN:ref1". - * @SQ line is uniquely identified by SN tag. - * @RG line is uniquely identified by ID tag. - * @PG line is uniquely identified by ID tag. - * - * Returns 0 on success and -1 on error - */ - -int sam_hdr_remove_line_id(sam_hdr_t *bh, const char *type, const char *ID_key, const char *ID_value) { - sam_hrecs_t *hrecs; - if (!bh || !type) - return -1; - - if (!(hrecs = bh->hrecs)) { - if (sam_hdr_fill_hrecs(bh) != 0) - return -1; - hrecs = bh->hrecs; - } - - if (!strncmp(type, "PG", 2)) { - hts_log_warning("Removing PG lines is not supported!"); - return -1; - } - - sam_hrec_type_t *type_found = sam_hrecs_find_type_id(hrecs, type, ID_key, ID_value); - if (!type_found) - return 0; - - int ret = sam_hrecs_remove_line(hrecs, type, type_found); - if (ret == 0) { - if (hrecs->refs_changed >= 0 && rebuild_target_arrays(bh) != 0) - return -1; - - if (hrecs->dirty) - redact_header_text(bh); - } - - return ret; -} - -/* - * Remove a line from the header by specifying the position in the type - * group, i.e. 3rd @SQ line. - * - * Returns 0 on success and -1 on error - */ - -int sam_hdr_remove_line_pos(sam_hdr_t *bh, const char *type, int position) { - sam_hrecs_t *hrecs; - if (!bh || !type || position <= 0) - return -1; - - if (!(hrecs = bh->hrecs)) { - if (sam_hdr_fill_hrecs(bh) != 0) - return -1; - hrecs = bh->hrecs; - } - - if (!strncmp(type, "PG", 2)) { - hts_log_warning("Removing PG lines is not supported!"); - return -1; - } - - sam_hrec_type_t *type_found = sam_hrecs_find_type_pos(hrecs, type, - position); - if (!type_found) - return -1; - - int ret = sam_hrecs_remove_line(hrecs, type, type_found); - if (ret == 0) { - if (hrecs->refs_changed >= 0 && rebuild_target_arrays(bh) != 0) - return -1; - - if (hrecs->dirty) - redact_header_text(bh); - } - - return ret; -} - -/* - * Check if sam_hdr_update_line() is being used to change the name of - * a record, and if the new name is going to clash with an existing one. - * - * If ap includes repeated keys, we go with the last one as sam_hrecs_vupdate() - * will go through them all and leave the final one in place. - * - * Returns 0 if the name does not change - * 1 if the name changes but does not clash - * -1 if the name changes and the new one is already in use - */ -static int check_for_name_update(sam_hrecs_t *hrecs, sam_hrec_type_t *rec, - va_list ap, const char **old_name, - const char **new_name, - char id_tag_out[3], - khash_t(m_s2i) **hash_out) { - char *key, *val; - const char *id_tag; - sam_hrec_tag_t *tag, *prev; - khash_t(m_s2i) *hash; - khint_t k; - int ret = 0; - - if (rec->type == TYPEKEY("SQ")) { - id_tag = "SN"; hash = hrecs->ref_hash; - } else if (rec->type == TYPEKEY("RG")) { - id_tag = "ID"; hash = hrecs->rg_hash; - } else if (rec->type == TYPEKEY("PG")) { - id_tag = "ID"; hash = hrecs->pg_hash; - } else { - return 0; - } - - memcpy(id_tag_out, id_tag, 3); - *hash_out = hash; - - tag = sam_hrecs_find_key(rec, id_tag, &prev); - if (!tag) - return 0; - assert(tag->len >= 3); - *old_name = tag->str + 3; - - while ((key = va_arg(ap, char *)) != NULL) { - val = va_arg(ap, char *); - if (!val) val = ""; - if (strcmp(key, id_tag) != 0) continue; - if (strcmp(val, tag->str + 3) == 0) { ret = 0; continue; } - k = kh_get(m_s2i, hash, val); - ret = k < kh_end(hash) ? -1 : 1; - *new_name = val; - } - return ret; -} - -int sam_hdr_update_line(sam_hdr_t *bh, const char *type, - const char *ID_key, const char *ID_value, ...) { - sam_hrecs_t *hrecs; - if (!bh) - return -1; - - if (!(hrecs = bh->hrecs)) { - if (sam_hdr_fill_hrecs(bh) != 0) - return -1; - hrecs = bh->hrecs; - } - - int ret, rename; - sam_hrec_type_t *ty = sam_hrecs_find_type_id(hrecs, type, ID_key, ID_value); - if (!ty) - return -1; - - va_list args; - const char *old_name = "?", *new_name = "?"; - char id_tag[3]; - khash_t(m_s2i) *hash = NULL; - va_start(args, ID_value); - rename = check_for_name_update(hrecs, ty, args, - &old_name, &new_name, id_tag, &hash); - va_end(args); - if (rename < 0) { - hts_log_error("Cannot rename @%s \"%s\" to \"%s\" : already exists", - type, old_name, new_name); - return -1; - } - if (rename > 0 && TYPEKEY(type) == TYPEKEY("PG")) { - // This is just too complicated - hts_log_error("Renaming @PG records is not supported"); - return -1; - } - va_start(args, ID_value); - ret = sam_hrecs_vupdate(hrecs, ty, args); - va_end(args); - - if (ret) - return ret; - - // TODO Account for @SQ-AN altnames - - if (rename) { - // Adjust the hash table to point to the new name - // sam_hrecs_update_hashes() should sort out everything else - khint_t k = kh_get(m_s2i, hash, old_name); - sam_hrec_tag_t *new_tag = sam_hrecs_find_key(ty, id_tag, NULL); - int r, pos; - assert(k < kh_end(hash)); // Or we wouldn't have found it earlier - assert(new_tag && new_tag->str); // id_tag should exist - assert(new_tag->len > 3); - pos = kh_val(hash, k); - kh_del(m_s2i, hash, k); - k = kh_put(m_s2i, hash, new_tag->str + 3, &r); - if (r < 1) { - hts_log_error("Failed to rename item in hash table"); - return -1; - } - kh_val(hash, k) = pos; - } - - ret = sam_hrecs_update_hashes(hrecs, TYPEKEY(type), ty); - - if (!ret && hrecs->refs_changed >= 0) - ret = rebuild_target_arrays(bh); - - if (!ret && hrecs->dirty) - redact_header_text(bh); - - return ret; -} - -int sam_hdr_remove_except(sam_hdr_t *bh, const char *type, const char *ID_key, const char *ID_value) { - sam_hrecs_t *hrecs; - if (!bh || !type) - return -1; - - if (!(hrecs = bh->hrecs)) { - if (sam_hdr_fill_hrecs(bh) != 0) - return -1; - hrecs = bh->hrecs; - } - - sam_hrec_type_t *step; - int ret = 1, remove_all = (ID_key == NULL); - - if (!strncmp(type, "PG", 2) || !strncmp(type, "CO", 2)) { - hts_log_warning("Removing PG or CO lines is not supported!"); - return -1; - } - - sam_hrec_type_t *type_found = sam_hrecs_find_type_id(hrecs, type, ID_key, ID_value); - if (!type_found) { // remove all line of this type - khint_t k = kh_get(sam_hrecs_t, hrecs->h, TYPEKEY(type)); - if (k == kh_end(hrecs->h)) - return 0; - type_found = kh_val(hrecs->h, k); - if (!type_found) - return 0; - remove_all = 1; - } - - step = type_found->next; - while (step != type_found) { - sam_hrec_type_t *to_remove = step; - step = step->next; - ret &= sam_hrecs_remove_line(hrecs, type, to_remove); - } - - if (remove_all) - ret &= sam_hrecs_remove_line(hrecs, type, type_found); - - if (!ret && hrecs->dirty) - redact_header_text(bh); - - return 0; -} - -int sam_hdr_remove_lines(sam_hdr_t *bh, const char *type, const char *id, void *vrh) { - sam_hrecs_t *hrecs; - rmhash_t *rh = (rmhash_t *)vrh; - - if (!bh || !type) - return -1; - if (!rh) // remove all lines - return sam_hdr_remove_except(bh, type, NULL, NULL); - if (!id) - return -1; - - if (!(hrecs = bh->hrecs)) { - if (sam_hdr_fill_hrecs(bh) != 0) - return -1; - hrecs = bh->hrecs; - } - - khint_t k = kh_get(sam_hrecs_t, hrecs->h, TYPEKEY(type)); - if (k == kh_end(hrecs->h)) // nothing to remove from - return 0; - - sam_hrec_type_t *head = kh_val(hrecs->h, k); - if (!head) { - hts_log_error("Header inconsistency"); - return -1; - } - - int ret = 0; - sam_hrec_type_t *step = head->next; - while (step != head) { - sam_hrec_tag_t *tag = sam_hrecs_find_key(step, id, NULL); - if (tag && tag->str && tag->len >= 3) { - k = kh_get(rm, rh, tag->str+3); - if (k == kh_end(rh)) { // value is not in the hash table, so remove - sam_hrec_type_t *to_remove = step; - step = step->next; - ret |= sam_hrecs_remove_line(hrecs, type, to_remove); - } else { - step = step->next; - } - } else { // tag is not on the line, so skip to next line - step = step->next; - } - } - - // process the first line - sam_hrec_tag_t * tag = sam_hrecs_find_key(head, id, NULL); - if (tag && tag->str && tag->len >= 3) { - k = kh_get(rm, rh, tag->str+3); - if (k == kh_end(rh)) { // value is not in the hash table, so remove - sam_hrec_type_t *to_remove = head; - head = head->next; - ret |= sam_hrecs_remove_line(hrecs, type, to_remove); - } - } - - if (!ret && hrecs->dirty) - redact_header_text(bh); - - return ret; -} - -int sam_hdr_count_lines(sam_hdr_t *bh, const char *type) { - int count; - sam_hrec_type_t *first_ty, *itr_ty; - - if (!bh || !type) - return -1; - - if (!bh->hrecs) { - if (sam_hdr_fill_hrecs(bh) != 0) - return -1; - } - - // Deal with types that have counts - switch (type[0]) { - case 'S': - if (type[1] == 'Q') - return bh->hrecs->nref; - break; - case 'R': - if (type[1] == 'G') - return bh->hrecs->nrg; - break; - case 'P': - if (type[1] == 'G') - return bh->hrecs->npg; - break; - default: - break; - } - - first_ty = sam_hrecs_find_type_id(bh->hrecs, type, NULL, NULL); - if (!first_ty) - return 0; - - count = 1; - for (itr_ty = first_ty->next; - itr_ty && itr_ty != first_ty; itr_ty = itr_ty->next) { - count++; - } - - return count; -} - -int sam_hdr_line_index(sam_hdr_t *bh, - const char *type, - const char *key) { - sam_hrecs_t *hrecs; - if (!bh || !type || !key) - return -2; - - if (!(hrecs = bh->hrecs)) { - if (sam_hdr_fill_hrecs(bh) != 0) - return -2; - hrecs = bh->hrecs; - } - - khint_t k; - int idx = -1; - switch (type[0]) { - case 'S': - if (type[1] == 'Q') { - k = kh_get(m_s2i, hrecs->ref_hash, key); - if (k != kh_end(hrecs->ref_hash)) - idx = kh_val(hrecs->ref_hash, k); - } else { - hts_log_warning("Type '%s' not supported. Only @SQ, @RG and @PG lines are indexed", type); - } - break; - case 'R': - if (type[1] == 'G') { - k = kh_get(m_s2i, hrecs->rg_hash, key); - if (k != kh_end(hrecs->rg_hash)) - idx = kh_val(hrecs->rg_hash, k); - } else { - hts_log_warning("Type '%s' not supported. Only @SQ, @RG and @PG lines are indexed", type); - } - break; - case 'P': - if (type[1] == 'G') { - k = kh_get(m_s2i, hrecs->pg_hash, key); - if (k != kh_end(hrecs->pg_hash)) - idx = kh_val(hrecs->pg_hash, k); - } else { - hts_log_warning("Type '%s' not supported. Only @SQ, @RG and @PG lines are indexed", type); - } - break; - default: - hts_log_warning("Type '%s' not supported. Only @SQ, @RG and @PG lines are indexed", type); - } - - return idx; -} - -const char *sam_hdr_line_name(sam_hdr_t *bh, - const char *type, - int pos) { - sam_hrecs_t *hrecs; - if (!bh || !type || pos < 0) - return NULL; - - if (!(hrecs = bh->hrecs)) { - if (sam_hdr_fill_hrecs(bh) != 0) - return NULL; - hrecs = bh->hrecs; - } - - switch (type[0]) { - case 'S': - if (type[1] == 'Q') { - if (pos < hrecs->nref) - return hrecs->ref[pos].name; - } else { - hts_log_warning("Type '%s' not supported. Only @SQ, @RG and @PG lines are indexed", type); - } - break; - case 'R': - if (type[1] == 'G') { - if (pos < hrecs->nrg) - return hrecs->rg[pos].name; - } else { - hts_log_warning("Type '%s' not supported. Only @SQ, @RG and @PG lines are indexed", type); - } - break; - case 'P': - if (type[1] == 'G') { - if (pos < hrecs->npg) - return hrecs->pg[pos].name; - } else { - hts_log_warning("Type '%s' not supported. Only @SQ, @RG and @PG lines are indexed", type); - } - break; - default: - hts_log_warning("Type '%s' not supported. Only @SQ, @RG and @PG lines are indexed", type); - } - - return NULL; -} - -/* ==== Key:val level methods ==== */ - -int sam_hdr_find_tag_id(sam_hdr_t *bh, - const char *type, - const char *ID_key, - const char *ID_value, - const char *key, - kstring_t *ks) { - sam_hrecs_t *hrecs; - if (!bh || !type || !key) - return -2; - - if (!(hrecs = bh->hrecs)) { - if (sam_hdr_fill_hrecs(bh) != 0) - return -2; - hrecs = bh->hrecs; - } - - sam_hrec_type_t *ty = sam_hrecs_find_type_id(hrecs, type, ID_key, ID_value); - if (!ty) - return -1; - - sam_hrec_tag_t *tag = sam_hrecs_find_key(ty, key, NULL); - if (!tag || !tag->str || tag->len < 4) - return -1; - - ks->l = 0; - if (kputsn(tag->str+3, tag->len-3, ks) == EOF) { - return -2; - } - - return 0; -} - -int sam_hdr_find_tag_pos(sam_hdr_t *bh, - const char *type, - int pos, - const char *key, - kstring_t *ks) { - sam_hrecs_t *hrecs; - if (!bh || !type || !key) - return -2; - - if (!(hrecs = bh->hrecs)) { - if (sam_hdr_fill_hrecs(bh) != 0) - return -2; - hrecs = bh->hrecs; - } - - sam_hrec_type_t *ty = sam_hrecs_find_type_pos(hrecs, type, pos); - if (!ty) - return -1; - - sam_hrec_tag_t *tag = sam_hrecs_find_key(ty, key, NULL); - if (!tag || !tag->str || tag->len < 4) - return -1; - - ks->l = 0; - if (kputsn(tag->str+3, tag->len-3, ks) == EOF) { - return -2; - } - - return 0; -} - -int sam_hdr_remove_tag_id(sam_hdr_t *bh, - const char *type, - const char *ID_key, - const char *ID_value, - const char *key) { - sam_hrecs_t *hrecs; - if (!bh || !type || !key) - return -1; - - if (!(hrecs = bh->hrecs)) { - if (sam_hdr_fill_hrecs(bh) != 0) - return -1; - hrecs = bh->hrecs; - } - - sam_hrec_type_t *ty = sam_hrecs_find_type_id(hrecs, type, ID_key, ID_value); - if (!ty) - return -1; - - int ret = sam_hrecs_remove_key(hrecs, ty, key); - if (!ret && hrecs->dirty) - redact_header_text(bh); - - return ret; -} - -/* - * Reconstructs a kstring from the header hash table. - * Returns 0 on success - * -1 on failure - */ -int sam_hrecs_rebuild_text(const sam_hrecs_t *hrecs, kstring_t *ks) { - ks->l = 0; - - if (!hrecs->h || !hrecs->h->size) { - return kputsn("", 0, ks) >= 0 ? 0 : -1; - } - if (sam_hrecs_rebuild_lines(hrecs, ks) != 0) - return -1; - - return 0; -} - -/* - * Looks up a reference sequence by name and returns the numerical ID. - * Returns -1 if unknown reference; -2 if header could not be parsed. - */ -int sam_hdr_name2tid(sam_hdr_t *bh, const char *ref) { - sam_hrecs_t *hrecs; - khint_t k; - - if (!bh) - return -1; - - if (!(hrecs = bh->hrecs)) { - if (sam_hdr_fill_hrecs(bh) != 0) - return -2; - hrecs = bh->hrecs; - } - - if (!hrecs->ref_hash) - return -1; - - k = kh_get(m_s2i, hrecs->ref_hash, ref); - return k == kh_end(hrecs->ref_hash) ? -1 : kh_val(hrecs->ref_hash, k); -} - -const char *sam_hdr_tid2name(const sam_hdr_t *h, int tid) { - sam_hrecs_t *hrecs; - - if (!h || tid < 0) - return NULL; - - if ((hrecs = h->hrecs) != NULL && tid < hrecs->nref) { - return hrecs->ref[tid].name; - } else { - if (tid < h->n_targets) - return h->target_name[tid]; - } - - return NULL; -} - -hts_pos_t sam_hdr_tid2len(const sam_hdr_t *h, int tid) { - sam_hrecs_t *hrecs; - - if (!h || tid < 0) - return 0; - - if ((hrecs = h->hrecs) != NULL && tid < hrecs->nref) { - return hrecs->ref[tid].len; - } else { - if (tid < h->n_targets) { - if (h->target_len[tid] < UINT32_MAX || !h->sdict) { - return h->target_len[tid]; - } else { - khash_t(s2i) *long_refs = (khash_t(s2i) *) h->sdict; - khint_t k = kh_get(s2i, long_refs, h->target_name[tid]); - if (k < kh_end(long_refs)) { - return kh_val(long_refs, k); - } else { - return UINT32_MAX; - } - } - } - } - - return 0; -} - -/* - * Fixes any PP links in @PG headers. - * If the entries are in order then this doesn't need doing, but in case - * our header is out of order this goes through the hrecs->pg[] array - * setting the prev_id field. - * - * Note we can have multiple complete chains. This code should identify the - * tails of these chains as these are the entries we have to link to in - * subsequent PP records. - * - * Returns 0 on success - * -1 on failure (indicating broken PG/PP records) - */ -static int sam_hdr_link_pg(sam_hdr_t *bh) { - sam_hrecs_t *hrecs; - int i, j, ret = 0, *new_pg_end; - - if (!bh) - return -1; - - if (!(hrecs = bh->hrecs)) { - if (sam_hdr_fill_hrecs(bh) != 0) - return -1; - hrecs = bh->hrecs; - } - - if (!hrecs->pgs_changed || !hrecs->npg) - return 0; - - hrecs->npg_end_alloc = hrecs->npg; - new_pg_end = realloc(hrecs->pg_end, hrecs->npg * sizeof(*new_pg_end)); - if (!new_pg_end) - return -1; - hrecs->pg_end = new_pg_end; - int *chain_size = calloc(hrecs->npg, sizeof(int)); - if (!chain_size) - return -1; - - for (i = 0; i < hrecs->npg; i++) - hrecs->pg_end[i] = i; - - for (i = 0; i < hrecs->npg; i++) { - khint_t k; - sam_hrec_tag_t *tag; - - assert(hrecs->pg[i].ty != NULL); - for (tag = hrecs->pg[i].ty->tag; tag; tag = tag->next) { - if (tag->str[0] == 'P' && tag->str[1] == 'P') - break; - } - if (!tag) { - /* Chain start points */ - continue; - } - - k = kh_get(m_s2i, hrecs->pg_hash, tag->str+3); - - if (k == kh_end(hrecs->pg_hash)) { - hts_log_warning("PG line with PN:%s has a PP link to missing program '%s'", - hrecs->pg[i].name, tag->str+3); - continue; - } - - hrecs->pg[i].prev_id = hrecs->pg[kh_val(hrecs->pg_hash, k)].id; - hrecs->pg_end[kh_val(hrecs->pg_hash, k)] = -1; - chain_size[i] = chain_size[kh_val(hrecs->pg_hash, k)]+1; - } - - for (i = j = 0; i < hrecs->npg; i++) { - if (hrecs->pg_end[i] != -1 && chain_size[i] > 0) - hrecs->pg_end[j++] = hrecs->pg_end[i]; - } - /* Only leafs? Choose the last one! */ - if (!j && hrecs->npg_end > 0) { - hrecs->pg_end[0] = hrecs->pg_end[hrecs->npg_end-1]; - j = 1; - } - - hrecs->npg_end = j; - hrecs->pgs_changed = 0; - - /* mark as dirty or empty for rebuild */ - hrecs->dirty = 1; - redact_header_text(bh); - free(chain_size); - - return ret; -} - -/* - * Returns a unique ID from a base name. - * - * The value returned is valid until the next call to - * this function. - */ -const char *sam_hdr_pg_id(sam_hdr_t *bh, const char *name) { - sam_hrecs_t *hrecs; - size_t name_len; - const size_t name_extra = 17; - if (!bh || !name) - return NULL; - - if (!(hrecs = bh->hrecs)) { - if (sam_hdr_fill_hrecs(bh) != 0) - return NULL; - hrecs = bh->hrecs; - } - - khint_t k = kh_get(m_s2i, hrecs->pg_hash, name); - if (k == kh_end(hrecs->pg_hash)) - return name; - - name_len = strlen(name); - if (name_len > 1000) name_len = 1000; - if (hrecs->ID_buf_sz < name_len + name_extra) { - char *new_ID_buf = realloc(hrecs->ID_buf, name_len + name_extra); - if (new_ID_buf == NULL) - return NULL; - hrecs->ID_buf = new_ID_buf; - hrecs->ID_buf_sz = name_len + name_extra; - } - - do { - snprintf(hrecs->ID_buf, hrecs->ID_buf_sz, "%.1000s.%d", name, hrecs->ID_cnt++); - k = kh_get(m_s2i, hrecs->pg_hash, hrecs->ID_buf); - } while (k != kh_end(hrecs->pg_hash)); - - return hrecs->ID_buf; -} - -/* - * Add an @PG line. - * - * If we wish complete control over this use sam_hdr_add_line() directly. This - * function uses that, but attempts to do a lot of tedious house work for - * you too. - * - * - It will generate a suitable ID if the supplied one clashes. - * - It will generate multiple @PG records if we have multiple PG chains. - * - * Call it as per sam_hdr_add_line() with a series of key,value pairs ending - * in NULL. - * - * Returns 0 on success - * -1 on failure - */ -int sam_hdr_add_pg(sam_hdr_t *bh, const char *name, ...) { - sam_hrecs_t *hrecs; - const char *specified_id = NULL, *specified_pn = NULL, *specified_pp = NULL; - const char *key, *val; - if (!bh) - return -1; - - if (!(hrecs = bh->hrecs)) { - if (sam_hdr_fill_hrecs(bh) != 0) - return -1; - hrecs = bh->hrecs; - } - - bh->hrecs->pgs_changed = 1; - if (sam_hdr_link_pg(bh) < 0) { - hts_log_error("Error linking @PG lines"); - return -1; - } - - va_list args; - // Check for ID / PN / PP tags in varargs list - va_start(args, name); - while ((key = va_arg(args, const char *)) != NULL) { - val = va_arg(args, const char *); - if (!val) break; - if (strcmp(key, "PN") == 0 && *val != '\0') - specified_pn = val; - else if (strcmp(key, "PP") == 0 && *val != '\0') - specified_pp = val; - else if (strcmp(key, "ID") == 0 && *val != '\0') - specified_id = val; - } - va_end(args); - - if (specified_id && hrecs->pg_hash) { - khint_t k = kh_get(m_s2i, hrecs->pg_hash, specified_id); - if (k != kh_end(hrecs->pg_hash)) { - hts_log_error("Header @PG ID:%s already present", specified_id); - return -1; - } - } - - if (specified_pp && hrecs->pg_hash) { - khint_t k = kh_get(m_s2i, hrecs->pg_hash, specified_pp); - if (k == kh_end(hrecs->pg_hash)) { - hts_log_error("Header @PG ID:%s referred to by PP tag not present", - specified_pp); - return -1; - } - } - - if (!specified_pp && hrecs->npg_end) { - /* Copy ends array to avoid us looping while modifying it */ - int *end = malloc(hrecs->npg_end * sizeof(int)); - int i, nends = hrecs->npg_end; - - if (!end) - return -1; - - memcpy(end, hrecs->pg_end, nends * sizeof(*end)); - - for (i = 0; i < nends; i++) { - const char *id = !specified_id ? sam_hdr_pg_id(bh, name) : ""; - if (!id) { - free(end); - return -1; - } - va_start(args, name); - if (-1 == sam_hrecs_vadd(hrecs, "PG", args, - "ID", id, - "PN", !specified_pn ? name : "", - "PP", hrecs->pg[end[i]].name, - NULL)) { - free(end); - return -1; - } - va_end(args); - } - - free(end); - } else { - const char *id = !specified_id ? sam_hdr_pg_id(bh, name) : ""; - if (!id) - return -1; - va_start(args, name); - if (-1 == sam_hrecs_vadd(hrecs, "PG", args, - "ID", id, - "PN", !specified_pn ? name : "", - NULL)) - return -1; - va_end(args); - } - - hrecs->dirty = 1; - redact_header_text(bh); - - return 0; -} - -/*! Increments a reference count on bh. - * - * This permits multiple files to share the same header, all calling - * sam_hdr_destroy when done, without causing errors for other open files. - */ -void sam_hdr_incr_ref(sam_hdr_t *bh) { - if (!bh) - return; - bh->ref_count++; -} - -/* ==== Internal methods ==== */ - -/* - * Creates an empty SAM header. Allocates space for the SAM header - * structures (hash tables) ready to be populated. - * - * Returns a sam_hrecs_t struct on success (free with sam_hrecs_free()) - * NULL on failure - */ -sam_hrecs_t *sam_hrecs_new(void) { - sam_hrecs_t *hrecs = calloc(1, sizeof(*hrecs)); - - if (!hrecs) - return NULL; - - hrecs->h = kh_init(sam_hrecs_t); - if (!hrecs->h) - goto err; - - hrecs->ID_cnt = 1; - - hrecs->nref = 0; - hrecs->ref_sz = 0; - hrecs->ref = NULL; - if (!(hrecs->ref_hash = kh_init(m_s2i))) - goto err; - hrecs->refs_changed = -1; - - hrecs->nrg = 0; - hrecs->rg_sz = 0; - hrecs->rg = NULL; - if (!(hrecs->rg_hash = kh_init(m_s2i))) - goto err; - - hrecs->npg = 0; - hrecs->pg_sz = 0; - hrecs->pg = NULL; - hrecs->npg_end = hrecs->npg_end_alloc = 0; - hrecs->pg_end = NULL; - if (!(hrecs->pg_hash = kh_init(m_s2i))) - goto err; - - if (!(hrecs->tag_pool = pool_create(sizeof(sam_hrec_tag_t)))) - goto err; - - if (!(hrecs->type_pool = pool_create(sizeof(sam_hrec_type_t)))) - goto err; - - if (!(hrecs->str_pool = string_pool_create(65536))) - goto err; - - if (sam_hrecs_init_type_order(hrecs, NULL)) - goto err; - - return hrecs; - -err: - if (hrecs->h) - kh_destroy(sam_hrecs_t, hrecs->h); - - if (hrecs->tag_pool) - pool_destroy(hrecs->tag_pool); - - if (hrecs->type_pool) - pool_destroy(hrecs->type_pool); - - if (hrecs->str_pool) - string_pool_destroy(hrecs->str_pool); - - free(hrecs); - - return NULL; -} -#if 0 -/* - * Produces a duplicate copy of source and returns it. - * Returns NULL on failure - */ -sam_hrecs_t *sam_hrecs_dup(sam_hrecs_t *source) { - return NULL; -} -#endif -/*! Deallocates all storage used by a sam_hrecs_t struct. - * - * This also decrements the header reference count. If after decrementing - * it is still non-zero then the header is assumed to be in use by another - * caller and the free is not done. - * - */ -void sam_hrecs_free(sam_hrecs_t *hrecs) { - if (!hrecs) - return; - - if (hrecs->h) - kh_destroy(sam_hrecs_t, hrecs->h); - - if (hrecs->ref_hash) - kh_destroy(m_s2i, hrecs->ref_hash); - - if (hrecs->ref) - free(hrecs->ref); - - if (hrecs->rg_hash) - kh_destroy(m_s2i, hrecs->rg_hash); - - if (hrecs->rg) - free(hrecs->rg); - - if (hrecs->pg_hash) - kh_destroy(m_s2i, hrecs->pg_hash); - - if (hrecs->pg) - free(hrecs->pg); - - if (hrecs->pg_end) - free(hrecs->pg_end); - - if (hrecs->type_pool) - pool_destroy(hrecs->type_pool); - - if (hrecs->tag_pool) - pool_destroy(hrecs->tag_pool); - - if (hrecs->str_pool) - string_pool_destroy(hrecs->str_pool); - - if (hrecs->type_order) - free(hrecs->type_order); - - if (hrecs->ID_buf) - free(hrecs->ID_buf); - - free(hrecs); -} - -/* - * Internal method already used by the CRAM code - * Returns the first header item matching 'type'. If ID is non-NULL it checks - * for the tag ID: and compares against the specified ID. - * - * Returns NULL if no type/ID is found - */ -sam_hrec_type_t *sam_hrecs_find_type_id(sam_hrecs_t *hrecs, const char *type, - const char *ID_key, const char *ID_value) { - if (!hrecs || !type) - return NULL; - sam_hrec_type_t *t1, *t2; - khint_t k; - - /* Special case for types we have prebuilt hashes on */ - if (ID_key) { - if (!ID_value) - return NULL; - - if (type[0] == 'S' && type[1] == 'Q' && - ID_key[0] == 'S' && ID_key[1] == 'N') { - k = kh_get(m_s2i, hrecs->ref_hash, ID_value); - return k != kh_end(hrecs->ref_hash) - ? hrecs->ref[kh_val(hrecs->ref_hash, k)].ty - : NULL; - } - - if (type[0] == 'R' && type[1] == 'G' && - ID_key[0] == 'I' && ID_key[1] == 'D') { - k = kh_get(m_s2i, hrecs->rg_hash, ID_value); - return k != kh_end(hrecs->rg_hash) - ? hrecs->rg[kh_val(hrecs->rg_hash, k)].ty - : NULL; - } - - if (type[0] == 'P' && type[1] == 'G' && - ID_key[0] == 'I' && ID_key[1] == 'D') { - k = kh_get(m_s2i, hrecs->pg_hash, ID_value); - return k != kh_end(hrecs->pg_hash) - ? hrecs->pg[kh_val(hrecs->pg_hash, k)].ty - : NULL; - } - } - - k = kh_get(sam_hrecs_t, hrecs->h, TYPEKEY(type)); - if (k == kh_end(hrecs->h)) - return NULL; - - if (!ID_key) - return kh_val(hrecs->h, k); - - t1 = t2 = kh_val(hrecs->h, k); - do { - sam_hrec_tag_t *tag; - for (tag = t1->tag; tag; tag = tag->next) { - if (tag->str[0] == ID_key[0] && tag->str[1] == ID_key[1]) { - const char *cp1 = tag->str+3; - const char *cp2 = ID_value; - while (*cp1 && *cp1 == *cp2) - cp1++, cp2++; - if (*cp2 || *cp1) - continue; - return t1; - } - } - t1 = t1->next; - } while (t1 != t2); - - return NULL; -} - -/* - * Adds or updates tag key,value pairs in a header line. - * Eg for adding M5 tags to @SQ lines or updating sort order for the - * @HD line. - * - * va_list contains multiple key,value pairs ending in NULL. - * - * Returns 0 on success - * -1 on failure - */ -int sam_hrecs_vupdate(sam_hrecs_t *hrecs, sam_hrec_type_t *type, va_list ap) { - if (!hrecs) - return -1; - - for (;;) { - char *k, *v, *str; - sam_hrec_tag_t *tag, *prev = NULL; - - if (!(k = (char *)va_arg(ap, char *))) - break; - if (!(v = va_arg(ap, char *))) - v = ""; - - tag = sam_hrecs_find_key(type, k, &prev); - if (!tag) { - if (!(tag = pool_alloc(hrecs->tag_pool))) - return -1; - if (prev) - prev->next = tag; - else - type->tag = tag; - - tag->next = NULL; - } - - tag->len = 3 + strlen(v); - str = string_alloc(hrecs->str_pool, tag->len+1); - if (!str) - return -1; - - if (snprintf(str, tag->len+1, "%2.2s:%s", k, v) < 0) - return -1; - - tag->str = str; - } - - hrecs->dirty = 1; //mark text as dirty and force a rebuild - - return 0; -} - -/* - * Adds or updates tag key,value pairs in a header line. - * Eg for adding M5 tags to @SQ lines or updating sort order for the - * @HD line. - * - * Specify multiple key,value pairs ending in NULL. - * - * Returns 0 on success - * -1 on failure - */ -static int sam_hrecs_update(sam_hrecs_t *hrecs, sam_hrec_type_t *type, ...) { - va_list args; - int res; - va_start(args, type); - res = sam_hrecs_vupdate(hrecs, type, args); - va_end(args); - return res; -} - -/* - * Looks for a specific key in a single sam header line identified by *type. - * If prev is non-NULL it also fills this out with the previous tag, to - * permit use in key removal. *prev is set to NULL when the tag is the first - * key in the list. When a tag isn't found, prev (if non NULL) will be the last - * tag in the existing list. - * - * Returns the tag pointer on success - * NULL on failure - */ -sam_hrec_tag_t *sam_hrecs_find_key(sam_hrec_type_t *type, - const char *key, - sam_hrec_tag_t **prev) { - sam_hrec_tag_t *tag, *p = NULL; - if (!type) - return NULL; - - for (tag = type->tag; tag; p = tag, tag = tag->next) { - if (tag->str[0] == key[0] && tag->str[1] == key[1]) { - if (prev) - *prev = p; - return tag; - } - } - - if (prev) - *prev = p; - - return NULL; -} - -int sam_hrecs_remove_key(sam_hrecs_t *hrecs, - sam_hrec_type_t *type, - const char *key) { - sam_hrec_tag_t *tag, *prev; - if (!hrecs) - return -1; - tag = sam_hrecs_find_key(type, key, &prev); - if (!tag) - return 0; // Not there anyway - - if (type->type == TYPEKEY("SQ") && tag->str[0] == 'A' && tag->str[1] == 'N') { - assert(tag->len >= 3); - sam_hrec_tag_t *sn_tag = sam_hrecs_find_key(type, "SN", NULL); - if (sn_tag) { - assert(sn_tag->len >= 3); - khint_t k = kh_get(m_s2i, hrecs->ref_hash, sn_tag->str + 3); - if (k != kh_end(hrecs->ref_hash)) - sam_hrecs_remove_ref_altnames(hrecs, kh_val(hrecs->ref_hash, k), tag->str + 3); - } - } - - if (!prev) { //first tag - type->tag = tag->next; - } else { - prev->next = tag->next; - } - pool_free(hrecs->tag_pool, tag); - hrecs->dirty = 1; //mark text as dirty and force a rebuild - - return 1; -} - -/* - * Looks up a read-group by name and returns a pointer to the start of the - * associated tag list. - * - * Returns NULL on failure - */ -sam_hrec_rg_t *sam_hrecs_find_rg(sam_hrecs_t *hrecs, const char *rg) { - khint_t k = kh_get(m_s2i, hrecs->rg_hash, rg); - return k == kh_end(hrecs->rg_hash) - ? NULL - : &hrecs->rg[kh_val(hrecs->rg_hash, k)]; -} - -#if DEBUG_HEADER -void sam_hrecs_dump(sam_hrecs_t *hrecs) { - khint_t k; - int i; - - printf("===DUMP===\n"); - for (k = kh_begin(hrecs->h); k != kh_end(hrecs->h); k++) { - sam_hrec_type_t *t1, *t2; - char c[2]; - int idx = 0; - - if (!kh_exist(hrecs->h, k)) - continue; - - t1 = t2 = kh_val(hrecs->h, k); - c[0] = kh_key(hrecs->h, k)>>8; - c[1] = kh_key(hrecs->h, k)&0xff; - printf("Type %.2s\n", c); - - do { - sam_hrec_tag_t *tag; - printf(">>>%d ", idx++); - for (tag = t1->tag; tag; tag=tag->next) { - if (strncmp(c, "CO", 2)) - printf("\"%.2s\":\"%.*s\"\t", tag->str, tag->len-3, tag->str+3); - else - printf("%s", tag->str); - } - putchar('\n'); - t1 = t1->next; - } while (t1 != t2); - } - - /* Dump out PG chains */ - printf("\n@PG chains:\n"); - for (i = 0; i < hrecs->npg_end; i++) { - int j; - printf(" %d:", i); - for (j = hrecs->pg_end[i]; j != -1; j = hrecs->pg[j].prev_id) { - printf("%s%d(%.*s)", - j == hrecs->pg_end[i] ? " " : "->", - j, hrecs->pg[j].name_len, hrecs->pg[j].name); - } - printf("\n"); - } - - puts("===END DUMP==="); -} -#endif - -/* - * Returns the sort order: - */ -enum sam_sort_order sam_hrecs_sort_order(sam_hrecs_t *hrecs) { - khint_t k; - enum sam_sort_order so; - - so = ORDER_UNKNOWN; - k = kh_get(sam_hrecs_t, hrecs->h, TYPEKEY("HD")); - if (k != kh_end(hrecs->h)) { - sam_hrec_type_t *ty = kh_val(hrecs->h, k); - sam_hrec_tag_t *tag; - for (tag = ty->tag; tag; tag = tag->next) { - if (tag->str[0] == 'S' && tag->str[1] == 'O') { - if (strcmp(tag->str+3, "unsorted") == 0) - so = ORDER_UNSORTED; - else if (strcmp(tag->str+3, "queryname") == 0) - so = ORDER_NAME; - else if (strcmp(tag->str+3, "coordinate") == 0) - so = ORDER_COORD; - else if (strcmp(tag->str+3, "unknown") != 0) - hts_log_error("Unknown sort order field: %s", tag->str+3); - } - } - } - - return so; -} - -enum sam_group_order sam_hrecs_group_order(sam_hrecs_t *hrecs) { - khint_t k; - enum sam_group_order go; - - go = ORDER_NONE; - k = kh_get(sam_hrecs_t, hrecs->h, TYPEKEY("HD")); - if (k != kh_end(hrecs->h)) { - sam_hrec_type_t *ty = kh_val(hrecs->h, k); - sam_hrec_tag_t *tag; - for (tag = ty->tag; tag; tag = tag->next) { - if (tag->str[0] == 'G' && tag->str[1] == 'O') { - if (strcmp(tag->str+3, "query") == 0) - go = ORDER_QUERY; - else if (strcmp(tag->str+3, "reference") == 0) - go = ORDER_REFERENCE; - } - } - } - - return go; -} diff --git a/src/htslib-1.18/hfile.c b/src/htslib-1.18/hfile.c deleted file mode 100644 index 7b17437..0000000 --- a/src/htslib-1.18/hfile.c +++ /dev/null @@ -1,1415 +0,0 @@ -/* hfile.c -- buffered low-level input/output streams. - - Copyright (C) 2013-2021 Genome Research Ltd. - - Author: John Marshall - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include - -#include - -#ifdef ENABLE_PLUGINS -#if defined(_WIN32) || defined(__CYGWIN__) || defined(__MSYS__) -#define USING_WINDOWS_PLUGIN_DLLS -#include -#endif -#endif - -#include "htslib/hfile.h" -#include "hfile_internal.h" -#include "htslib/kstring.h" - -#ifndef ENOTSUP -#define ENOTSUP EINVAL -#endif -#ifndef EOVERFLOW -#define EOVERFLOW ERANGE -#endif -#ifndef EPROTONOSUPPORT -#define EPROTONOSUPPORT ENOSYS -#endif - -#ifndef SSIZE_MAX /* SSIZE_MAX is POSIX 1 */ -#define SSIZE_MAX LONG_MAX -#endif - -/* hFILE fields are used as follows: - - char *buffer; // Pointer to the start of the I/O buffer - char *begin; // First not-yet-read character / unused position - char *end; // First unfilled/unfillable position - char *limit; // Pointer to the first position past the buffer - - const hFILE_backend *backend; // Methods to refill/flush I/O buffer - - off_t offset; // Offset within the stream of buffer position 0 - unsigned at_eof:1;// For reading, whether EOF has been seen - unsigned mobile:1;// Buffer is a mobile window or fixed full contents - unsigned readonly:1;// Whether opened as "r" rather than "r+"/"w"/"a" - int has_errno; // Error number from the last failure on this stream - -For reading, begin is the first unread character in the buffer and end is the -first unfilled position: - - -----------ABCDEFGHIJKLMNO--------------- - ^buffer ^begin ^end ^limit - -For writing, begin is the first unused position and end is unused so remains -equal to buffer: - - ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------- - ^buffer ^begin ^limit - ^end - -Thus if begin > end then there is a non-empty write buffer, if begin < end -then there is a non-empty read buffer, and if begin == end then both buffers -are empty. In all cases, the stream's file position indicator corresponds -to the position pointed to by begin. - -The above is the normal scenario of a mobile window. For in-memory -streams (eg via hfile_init_fixed) the buffer can be used as the full -contents without any separate backend behind it. These always have at_eof -set, offset set to 0, need no read() method, and should just return EINVAL -for seek(): - - abcdefghijkLMNOPQRSTUVWXYZ------ - ^buffer ^begin ^end ^limit -*/ -HTSLIB_EXPORT -hFILE *hfile_init(size_t struct_size, const char *mode, size_t capacity) -{ - hFILE *fp = (hFILE *) malloc(struct_size); - if (fp == NULL) goto error; - - if (capacity == 0) capacity = 32768; - // FIXME For now, clamp input buffer sizes so mpileup doesn't eat memory - if (strchr(mode, 'r') && capacity > 32768) capacity = 32768; - - fp->buffer = (char *) malloc(capacity); - if (fp->buffer == NULL) goto error; - - fp->begin = fp->end = fp->buffer; - fp->limit = &fp->buffer[capacity]; - - fp->offset = 0; - fp->at_eof = 0; - fp->mobile = 1; - fp->readonly = (strchr(mode, 'r') && ! strchr(mode, '+')); - fp->has_errno = 0; - return fp; - -error: - hfile_destroy(fp); - return NULL; -} - -hFILE *hfile_init_fixed(size_t struct_size, const char *mode, - char *buffer, size_t buf_filled, size_t buf_size) -{ - hFILE *fp = (hFILE *) malloc(struct_size); - if (fp == NULL) return NULL; - - fp->buffer = fp->begin = buffer; - fp->end = &fp->buffer[buf_filled]; - fp->limit = &fp->buffer[buf_size]; - - fp->offset = 0; - fp->at_eof = 1; - fp->mobile = 0; - fp->readonly = (strchr(mode, 'r') && ! strchr(mode, '+')); - fp->has_errno = 0; - return fp; -} - -static const struct hFILE_backend mem_backend; - -HTSLIB_EXPORT -void hfile_destroy(hFILE *fp) -{ - int save = errno; - if (fp) free(fp->buffer); - free(fp); - errno = save; -} - -static inline int writebuffer_is_nonempty(hFILE *fp) -{ - return fp->begin > fp->end; -} - -/* Refills the read buffer from the backend (once, so may only partially - fill the buffer), returning the number of additional characters read - (which might be 0), or negative when an error occurred. */ -static ssize_t refill_buffer(hFILE *fp) -{ - ssize_t n; - - // Move any unread characters to the start of the buffer - if (fp->mobile && fp->begin > fp->buffer) { - fp->offset += fp->begin - fp->buffer; - memmove(fp->buffer, fp->begin, fp->end - fp->begin); - fp->end = &fp->buffer[fp->end - fp->begin]; - fp->begin = fp->buffer; - } - - // Read into the available buffer space at fp->[end,limit) - if (fp->at_eof || fp->end == fp->limit) n = 0; - else { - n = fp->backend->read(fp, fp->end, fp->limit - fp->end); - if (n < 0) { fp->has_errno = errno; return n; } - else if (n == 0) fp->at_eof = 1; - } - - fp->end += n; - return n; -} - -/* - * Changes the buffer size for an hFILE. Ideally this is done - * immediately after opening. If performed later, this function may - * fail if we are reducing the buffer size and the current offset into - * the buffer is beyond the new capacity. - * - * Returns 0 on success; - * -1 on failure. - */ -HTSLIB_EXPORT -int hfile_set_blksize(hFILE *fp, size_t bufsiz) { - char *buffer; - ptrdiff_t curr_used; - if (!fp) return -1; - curr_used = (fp->begin > fp->end ? fp->begin : fp->end) - fp->buffer; - if (bufsiz == 0) bufsiz = 32768; - - // Ensure buffer resize will not erase live data - if (bufsiz < curr_used) - return -1; - - if (!(buffer = (char *) realloc(fp->buffer, bufsiz))) return -1; - - fp->begin = buffer + (fp->begin - fp->buffer); - fp->end = buffer + (fp->end - fp->buffer); - fp->buffer = buffer; - fp->limit = &fp->buffer[bufsiz]; - - return 0; -} - -/* Called only from hgetc(), when our buffer is empty. */ -HTSLIB_EXPORT -int hgetc2(hFILE *fp) -{ - return (refill_buffer(fp) > 0)? (unsigned char) *(fp->begin++) : EOF; -} - -ssize_t hgetdelim(char *buffer, size_t size, int delim, hFILE *fp) -{ - char *found; - size_t n, copied = 0; - ssize_t got; - - if (size < 1 || size > SSIZE_MAX) { - fp->has_errno = errno = EINVAL; - return -1; - } - if (writebuffer_is_nonempty(fp)) { - fp->has_errno = errno = EBADF; - return -1; - } - - --size; /* to allow space for the NUL terminator */ - - do { - n = fp->end - fp->begin; - if (n > size - copied) n = size - copied; - - /* Look in the hFILE buffer for the delimiter */ - found = memchr(fp->begin, delim, n); - if (found != NULL) { - n = found - fp->begin + 1; - memcpy(buffer + copied, fp->begin, n); - buffer[n + copied] = '\0'; - fp->begin += n; - return n + copied; - } - - /* No delimiter yet, copy as much as we can and refill if necessary */ - memcpy(buffer + copied, fp->begin, n); - fp->begin += n; - copied += n; - - if (copied == size) { /* Output buffer full */ - buffer[copied] = '\0'; - return copied; - } - - got = refill_buffer(fp); - } while (got > 0); - - if (got < 0) return -1; /* Error on refill. */ - - buffer[copied] = '\0'; /* EOF, return anything that was copied. */ - return copied; -} - -char *hgets(char *buffer, int size, hFILE *fp) -{ - if (size < 1) { - fp->has_errno = errno = EINVAL; - return NULL; - } - return hgetln(buffer, size, fp) > 0 ? buffer : NULL; -} - -ssize_t hpeek(hFILE *fp, void *buffer, size_t nbytes) -{ - size_t n = fp->end - fp->begin; - while (n < nbytes) { - ssize_t ret = refill_buffer(fp); - if (ret < 0) return ret; - else if (ret == 0) break; - else n += ret; - } - - if (n > nbytes) n = nbytes; - memcpy(buffer, fp->begin, n); - return n; -} - -/* Called only from hread(); when called, our buffer is empty and nread bytes - have already been placed in the destination buffer. */ -HTSLIB_EXPORT -ssize_t hread2(hFILE *fp, void *destv, size_t nbytes, size_t nread) -{ - const size_t capacity = fp->limit - fp->buffer; - int buffer_invalidated = 0; - char *dest = (char *) destv; - dest += nread, nbytes -= nread; - - // Read large requests directly into the destination buffer - while (nbytes * 2 >= capacity && !fp->at_eof) { - ssize_t n = fp->backend->read(fp, dest, nbytes); - if (n < 0) { fp->has_errno = errno; return n; } - else if (n == 0) fp->at_eof = 1; - else buffer_invalidated = 1; - fp->offset += n; - dest += n, nbytes -= n; - nread += n; - } - - if (buffer_invalidated) { - // Our unread buffer is empty, so begin == end, but our already-read - // buffer [buffer,begin) is likely non-empty and is no longer valid as - // its contents are no longer adjacent to the file position indicator. - // Discard it so that hseek() can't try to take advantage of it. - fp->offset += fp->begin - fp->buffer; - fp->begin = fp->end = fp->buffer; - } - - while (nbytes > 0 && !fp->at_eof) { - size_t n; - ssize_t ret = refill_buffer(fp); - if (ret < 0) return ret; - - n = fp->end - fp->begin; - if (n > nbytes) n = nbytes; - memcpy(dest, fp->begin, n); - fp->begin += n; - dest += n, nbytes -= n; - nread += n; - } - - return nread; -} - -/* Flushes the write buffer, fp->[buffer,begin), out through the backend - returning 0 on success or negative if an error occurred. */ -static ssize_t flush_buffer(hFILE *fp) -{ - const char *buffer = fp->buffer; - while (buffer < fp->begin) { - ssize_t n = fp->backend->write(fp, buffer, fp->begin - buffer); - if (n < 0) { fp->has_errno = errno; return n; } - buffer += n; - fp->offset += n; - } - - fp->begin = fp->buffer; // Leave the buffer empty - return 0; -} - -int hflush(hFILE *fp) -{ - if (flush_buffer(fp) < 0) return EOF; - if (fp->backend->flush) { - if (fp->backend->flush(fp) < 0) { fp->has_errno = errno; return EOF; } - } - return 0; -} - -/* Called only from hputc(), when our buffer is already full. */ -HTSLIB_EXPORT -int hputc2(int c, hFILE *fp) -{ - if (flush_buffer(fp) < 0) return EOF; - *(fp->begin++) = c; - return c; -} - -/* Called only from hwrite() and hputs2(); when called, our buffer is either - full and ncopied bytes from the source have already been copied to our - buffer; or completely empty, ncopied is zero and totalbytes is greater than - the buffer size. */ -HTSLIB_EXPORT -ssize_t hwrite2(hFILE *fp, const void *srcv, size_t totalbytes, size_t ncopied) -{ - const char *src = (const char *) srcv; - ssize_t ret; - const size_t capacity = fp->limit - fp->buffer; - size_t remaining = totalbytes - ncopied; - src += ncopied; - - ret = flush_buffer(fp); - if (ret < 0) return ret; - - // Write large blocks out directly from the source buffer - while (remaining * 2 >= capacity) { - ssize_t n = fp->backend->write(fp, src, remaining); - if (n < 0) { fp->has_errno = errno; return n; } - fp->offset += n; - src += n, remaining -= n; - } - - // Just buffer any remaining characters - memcpy(fp->begin, src, remaining); - fp->begin += remaining; - - return totalbytes; -} - -/* Called only from hputs(), when our buffer is already full. */ -HTSLIB_EXPORT -int hputs2(const char *text, size_t totalbytes, size_t ncopied, hFILE *fp) -{ - return (hwrite2(fp, text, totalbytes, ncopied) >= 0)? 0 : EOF; -} - -off_t hseek(hFILE *fp, off_t offset, int whence) -{ - off_t curpos, pos; - - if (writebuffer_is_nonempty(fp) && fp->mobile) { - int ret = flush_buffer(fp); - if (ret < 0) return ret; - } - - curpos = htell(fp); - - // Relative offsets are given relative to the hFILE's stream position, - // which may differ from the backend's physical position due to buffering - // read-ahead. Correct for this by converting to an absolute position. - if (whence == SEEK_CUR) { - if (curpos + offset < 0) { - // Either a negative offset resulted in a position before the - // start of the file, or we overflowed when given a positive offset - fp->has_errno = errno = (offset < 0)? EINVAL : EOVERFLOW; - return -1; - } - - whence = SEEK_SET; - offset = curpos + offset; - } - // For fixed immobile buffers, convert everything else to SEEK_SET too - // so that seeking can be avoided for all (within range) requests. - else if (! fp->mobile && whence == SEEK_END) { - size_t length = fp->end - fp->buffer; - if (offset > 0 || -offset > length) { - fp->has_errno = errno = EINVAL; - return -1; - } - - whence = SEEK_SET; - offset = length + offset; - } - - // Avoid seeking if the desired position is within our read buffer. - // (But not when the next operation may be a write on a mobile buffer.) - if (whence == SEEK_SET && (! fp->mobile || fp->readonly) && - offset >= fp->offset && offset - fp->offset <= fp->end - fp->buffer) { - fp->begin = &fp->buffer[offset - fp->offset]; - return offset; - } - - pos = fp->backend->seek(fp, offset, whence); - if (pos < 0) { fp->has_errno = errno; return pos; } - - // Seeking succeeded, so discard any non-empty read buffer - fp->begin = fp->end = fp->buffer; - fp->at_eof = 0; - - fp->offset = pos; - return pos; -} - -int hclose(hFILE *fp) -{ - int err = fp->has_errno; - - if (writebuffer_is_nonempty(fp) && hflush(fp) < 0) err = fp->has_errno; - if (fp->backend->close(fp) < 0) err = errno; - hfile_destroy(fp); - - if (err) { - errno = err; - return EOF; - } - else return 0; -} - -void hclose_abruptly(hFILE *fp) -{ - int save = errno; - if (fp->backend->close(fp) < 0) { /* Ignore subsequent errors */ } - hfile_destroy(fp); - errno = save; -} - - -/*************************** - * File descriptor backend * - ***************************/ - -#ifndef _WIN32 -#include -#include -#define HAVE_STRUCT_STAT_ST_BLKSIZE -#else -#include -#define HAVE_CLOSESOCKET -#define HAVE_SETMODE -#endif -#include -#include - -/* For Unix, it doesn't matter whether a file descriptor is a socket. - However Windows insists on send()/recv() and its own closesocket() - being used when fd happens to be a socket. */ - -typedef struct { - hFILE base; - int fd; - unsigned is_socket:1; -} hFILE_fd; - -static ssize_t fd_read(hFILE *fpv, void *buffer, size_t nbytes) -{ - hFILE_fd *fp = (hFILE_fd *) fpv; - ssize_t n; - do { - n = fp->is_socket? recv(fp->fd, buffer, nbytes, 0) - : read(fp->fd, buffer, nbytes); - } while (n < 0 && errno == EINTR); - return n; -} - -static ssize_t fd_write(hFILE *fpv, const void *buffer, size_t nbytes) -{ - hFILE_fd *fp = (hFILE_fd *) fpv; - ssize_t n; - do { - n = fp->is_socket? send(fp->fd, buffer, nbytes, 0) - : write(fp->fd, buffer, nbytes); - } while (n < 0 && errno == EINTR); -#ifdef _WIN32 - // On windows we have no SIGPIPE. Instead write returns - // EINVAL. We check for this and our fd being a pipe. - // If so, we raise SIGTERM instead of SIGPIPE. It's not - // ideal, but I think the only alternative is extra checking - // in every single piece of code. - if (n < 0 && errno == EINVAL && - GetLastError() == ERROR_NO_DATA && - GetFileType((HANDLE)_get_osfhandle(fp->fd)) == FILE_TYPE_PIPE) { - raise(SIGTERM); - } -#endif - return n; -} - -static off_t fd_seek(hFILE *fpv, off_t offset, int whence) -{ - hFILE_fd *fp = (hFILE_fd *) fpv; -#ifdef _WIN32 - // On windows lseek can return non-zero values even on a pipe. Instead - // it's likely to seek somewhere within the pipe memory buffer. - // This breaks bgzf_check_EOF among other things. - if (GetFileType((HANDLE)_get_osfhandle(fp->fd)) == FILE_TYPE_PIPE) { - errno = ESPIPE; - return -1; - } -#endif - - return lseek(fp->fd, offset, whence); -} - -static int fd_flush(hFILE *fpv) -{ - int ret = 0; - do { -#ifdef HAVE_FDATASYNC - hFILE_fd *fp = (hFILE_fd *) fpv; - ret = fdatasync(fp->fd); -#elif defined(HAVE_FSYNC) - hFILE_fd *fp = (hFILE_fd *) fpv; - ret = fsync(fp->fd); -#endif - // Ignore invalid-for-fsync(2) errors due to being, e.g., a pipe, - // and operation-not-supported errors (Mac OS X) - if (ret < 0 && (errno == EINVAL || errno == ENOTSUP)) ret = 0; - } while (ret < 0 && errno == EINTR); - return ret; -} - -static int fd_close(hFILE *fpv) -{ - hFILE_fd *fp = (hFILE_fd *) fpv; - int ret; - do { -#ifdef HAVE_CLOSESOCKET - ret = fp->is_socket? closesocket(fp->fd) : close(fp->fd); -#else - ret = close(fp->fd); -#endif - } while (ret < 0 && errno == EINTR); - return ret; -} - -static const struct hFILE_backend fd_backend = -{ - fd_read, fd_write, fd_seek, fd_flush, fd_close -}; - -static size_t blksize(int fd) -{ -#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE - struct stat sbuf; - if (fstat(fd, &sbuf) != 0) return 0; - return sbuf.st_blksize; -#else - return 0; -#endif -} - -static hFILE *hopen_fd(const char *filename, const char *mode) -{ - hFILE_fd *fp = NULL; - int fd = open(filename, hfile_oflags(mode), 0666); - if (fd < 0) goto error; - - fp = (hFILE_fd *) hfile_init(sizeof (hFILE_fd), mode, blksize(fd)); - if (fp == NULL) goto error; - - fp->fd = fd; - fp->is_socket = 0; - fp->base.backend = &fd_backend; - return &fp->base; - -error: - if (fd >= 0) { int save = errno; (void) close(fd); errno = save; } - hfile_destroy((hFILE *) fp); - return NULL; -} - -// Loads the contents of filename to produced a read-only, in memory, -// immobile hfile. fp is the already opened file. We always close this -// input fp, irrespective of whether we error or whether we return a new -// immobile hfile. -static hFILE *hpreload(hFILE *fp) { - hFILE *mem_fp; - char *buf = NULL; - off_t buf_sz = 0, buf_a = 0, buf_inc = 8192, len; - - for (;;) { - if (buf_a - buf_sz < 5000) { - buf_a += buf_inc; - char *t = realloc(buf, buf_a); - if (!t) goto err; - buf = t; - if (buf_inc < 1000000) buf_inc *= 1.3; - } - len = hread(fp, buf+buf_sz, buf_a-buf_sz); - if (len > 0) - buf_sz += len; - else - break; - } - - if (len < 0) goto err; - mem_fp = hfile_init_fixed(sizeof(hFILE), "r", buf, buf_sz, buf_a); - if (!mem_fp) goto err; - mem_fp->backend = &mem_backend; - - if (hclose(fp) < 0) { - hclose_abruptly(mem_fp); - goto err; - } - return mem_fp; - - err: - free(buf); - hclose_abruptly(fp); - return NULL; -} - -static int is_preload_url_remote(const char *url){ - return hisremote(url + 8); // len("preload:") = 8 -} - -static hFILE *hopen_preload(const char *url, const char *mode){ - hFILE* fp = hopen(url + 8, mode); - return hpreload(fp); -} - -hFILE *hdopen(int fd, const char *mode) -{ - hFILE_fd *fp = (hFILE_fd*) hfile_init(sizeof (hFILE_fd), mode, blksize(fd)); - if (fp == NULL) return NULL; - - fp->fd = fd; - fp->is_socket = (strchr(mode, 's') != NULL); - fp->base.backend = &fd_backend; - return &fp->base; -} - -static hFILE *hopen_fd_fileuri(const char *url, const char *mode) -{ - if (strncmp(url, "file://localhost/", 17) == 0) url += 16; - else if (strncmp(url, "file:///", 8) == 0) url += 7; - else { errno = EPROTONOSUPPORT; return NULL; } - -#if defined(_WIN32) || defined(__MSYS__) - // For cases like C:/foo - if (url[0] == '/' && url[1] && url[2] == ':' && url[3] == '/') url++; -#endif - - return hopen_fd(url, mode); -} - -static hFILE *hopen_fd_stdinout(const char *mode) -{ - int fd = (strchr(mode, 'r') != NULL)? STDIN_FILENO : STDOUT_FILENO; -#if defined HAVE_SETMODE && defined O_BINARY - if (setmode(fd, O_BINARY) < 0) return NULL; -#endif - return hdopen(fd, mode); -} - -HTSLIB_EXPORT -int hfile_oflags(const char *mode) -{ - int rdwr = 0, flags = 0; - const char *s; - for (s = mode; *s; s++) - switch (*s) { - case 'r': rdwr = O_RDONLY; break; - case 'w': rdwr = O_WRONLY; flags |= O_CREAT | O_TRUNC; break; - case 'a': rdwr = O_WRONLY; flags |= O_CREAT | O_APPEND; break; - case '+': rdwr = O_RDWR; break; -#ifdef O_CLOEXEC - case 'e': flags |= O_CLOEXEC; break; -#endif -#ifdef O_EXCL - case 'x': flags |= O_EXCL; break; -#endif - default: break; - } - -#ifdef O_BINARY - flags |= O_BINARY; -#endif - - return rdwr | flags; -} - - -/********************* - * In-memory backend * - *********************/ - -#include "hts_internal.h" - -typedef struct { - hFILE base; -} hFILE_mem; - -static off_t mem_seek(hFILE *fpv, off_t offset, int whence) -{ - errno = EINVAL; - return -1; -} - -static int mem_close(hFILE *fpv) -{ - return 0; -} - -static const struct hFILE_backend mem_backend = -{ - NULL, NULL, mem_seek, NULL, mem_close -}; - -static int cmp_prefix(const char *key, const char *s) -{ - while (*key) - if (tolower_c(*s) != *key) return +1; - else s++, key++; - - return 0; -} - -static hFILE *create_hfile_mem(char* buffer, const char* mode, size_t buf_filled, size_t buf_size) -{ - hFILE_mem *fp = (hFILE_mem *) hfile_init_fixed(sizeof(hFILE_mem), mode, buffer, buf_filled, buf_size); - if (fp == NULL) - return NULL; - - fp->base.backend = &mem_backend; - return &fp->base; -} - -static hFILE *hopen_mem(const char *url, const char *mode) -{ - size_t length, size; - char *buffer; - const char *data, *comma = strchr(url, ','); - if (comma == NULL) { errno = EINVAL; return NULL; } - data = comma+1; - - // TODO Implement write modes - if (strchr(mode, 'r') == NULL) { errno = EROFS; return NULL; } - - if (comma - url >= 7 && cmp_prefix(";base64", &comma[-7]) == 0) { - size = hts_base64_decoded_length(strlen(data)); - buffer = malloc(size); - if (buffer == NULL) return NULL; - hts_decode_base64(buffer, &length, data); - } - else { - size = strlen(data) + 1; - buffer = malloc(size); - if (buffer == NULL) return NULL; - hts_decode_percent(buffer, &length, data); - } - hFILE* hf; - - if(!(hf = create_hfile_mem(buffer, mode, length, size))){ - free(buffer); - return NULL; - } - - return hf; -} - -static hFILE *hopenv_mem(const char *filename, const char *mode, va_list args) -{ - char* buffer = va_arg(args, char*); - size_t sz = va_arg(args, size_t); - va_end(args); - - hFILE* hf; - - if(!(hf = create_hfile_mem(buffer, mode, sz, sz))){ - free(buffer); - return NULL; - } - - return hf; -} - -char *hfile_mem_get_buffer(hFILE *file, size_t *length) { - if (file->backend != &mem_backend) { - errno = EINVAL; - return NULL; - } - - if (length) - *length = file->buffer - file->limit; - - return file->buffer; -} - -char *hfile_mem_steal_buffer(hFILE *file, size_t *length) { - char *buf = hfile_mem_get_buffer(file, length); - if (buf) - file->buffer = NULL; - return buf; -} - -int hfile_plugin_init_mem(struct hFILE_plugin *self) -{ - // mem files are declared remote so they work with a tabix index - static const struct hFILE_scheme_handler handler = - {NULL, hfile_always_remote, "mem", 2000 + 50, hopenv_mem}; - self->name = "mem"; - hfile_add_scheme_handler("mem", &handler); - return 0; -} - -/********************************************************************** - * Dummy crypt4gh plug-in. Does nothing apart from advise how to get * - * the real one. It will be overridden by the actual plug-in. * - **********************************************************************/ - -static hFILE *crypt4gh_needed(const char *url, const char *mode) -{ - const char *u = strncmp(url, "crypt4gh:", 9) == 0 ? url + 9 : url; -#if defined(ENABLE_PLUGINS) - const char *enable_plugins = ""; -#else - const char *enable_plugins = "You also need to rebuild HTSlib with plug-ins enabled.\n"; -#endif - - hts_log_error("Accessing \"%s\" needs the crypt4gh plug-in.\n" - "It can be found at " - "https://github.com/samtools/htslib-crypt4gh\n" - "%s" - "If you have the plug-in, please ensure it can be " - "found on your HTS_PATH.", - u, enable_plugins); - - errno = EPROTONOSUPPORT; - return NULL; -} - -int hfile_plugin_init_crypt4gh_needed(struct hFILE_plugin *self) -{ - static const struct hFILE_scheme_handler handler = - { crypt4gh_needed, NULL, "crypt4gh-needed", 0, NULL }; - self->name = "crypt4gh-needed"; - hfile_add_scheme_handler("crypt4gh", &handler); - return 0; -} - - -/***************************************** - * Plugin and hopen() backend dispatcher * - *****************************************/ - -#include "htslib/khash.h" - -KHASH_MAP_INIT_STR(scheme_string, const struct hFILE_scheme_handler *) -static khash_t(scheme_string) *schemes = NULL; - -struct hFILE_plugin_list { - struct hFILE_plugin plugin; - struct hFILE_plugin_list *next; -}; - -static struct hFILE_plugin_list *plugins = NULL; -static pthread_mutex_t plugins_lock = PTHREAD_MUTEX_INITIALIZER; - -void hfile_shutdown(int do_close_plugin) -{ - pthread_mutex_lock(&plugins_lock); - - if (schemes) { - kh_destroy(scheme_string, schemes); - schemes = NULL; - } - - while (plugins != NULL) { - struct hFILE_plugin_list *p = plugins; - if (p->plugin.destroy) p->plugin.destroy(); -#ifdef ENABLE_PLUGINS - if (p->plugin.obj && do_close_plugin) close_plugin(p->plugin.obj); -#endif - plugins = p->next; - free(p); - } - - pthread_mutex_unlock(&plugins_lock); -} - -static void hfile_exit(void) -{ - hfile_shutdown(0); - pthread_mutex_destroy(&plugins_lock); -} - -static inline int priority(const struct hFILE_scheme_handler *handler) -{ - return handler->priority % 1000; -} - -#ifdef USING_WINDOWS_PLUGIN_DLLS -/* - * Work-around for Windows plug-in dlls where the plug-in could be - * using a different HTSlib library to the executable (for example - * because the latter was build against a static libhts.a). When this - * happens, the plug-in can call the wrong copy of hfile_add_scheme_handler(). - * If this is detected, it calls this function which attempts to fix the - * problem by redirecting to the hfile_add_scheme_handler() in the main - * executable. - */ -static int try_exe_add_scheme_handler(const char *scheme, - const struct hFILE_scheme_handler *handler) -{ - static void (*add_scheme_handler)(const char *scheme, - const struct hFILE_scheme_handler *handler); - if (!add_scheme_handler) { - // dlopen the main executable and resolve hfile_add_scheme_handler - void *exe_handle = dlopen(NULL, RTLD_LAZY); - if (!exe_handle) return -1; - *(void **) (&add_scheme_handler) = dlsym(exe_handle, "hfile_add_scheme_handler"); - dlclose(exe_handle); - } - // Check that the symbol was obtained and isn't the one in this copy - // of the library (to avoid infinite recursion) - if (!add_scheme_handler || add_scheme_handler == hfile_add_scheme_handler) - return -1; - add_scheme_handler(scheme, handler); - return 0; -} -#else -static int try_exe_add_scheme_handler(const char *scheme, - const struct hFILE_scheme_handler *handler) -{ - return -1; -} -#endif - -HTSLIB_EXPORT -void hfile_add_scheme_handler(const char *scheme, - const struct hFILE_scheme_handler *handler) -{ - int absent; - if (!schemes) { - if (try_exe_add_scheme_handler(scheme, handler) != 0) { - hts_log_warning("Couldn't register scheme handler for %s", scheme); - } - return; - } - khint_t k = kh_put(scheme_string, schemes, scheme, &absent); - if (absent < 0) { - hts_log_warning("Couldn't register scheme handler for %s : %s", - scheme, strerror(errno)); - return; - } - if (absent || priority(handler) > priority(kh_value(schemes, k))) { - kh_value(schemes, k) = handler; - } -} - -static int init_add_plugin(void *obj, int (*init)(struct hFILE_plugin *), - const char *pluginname) -{ - struct hFILE_plugin_list *p = malloc (sizeof (struct hFILE_plugin_list)); - if (p == NULL) { - hts_log_debug("Failed to allocate memory for plugin \"%s\"", pluginname); - return -1; - } - - p->plugin.api_version = 1; - p->plugin.obj = obj; - p->plugin.name = NULL; - p->plugin.destroy = NULL; - - int ret = (*init)(&p->plugin); - - if (ret != 0) { - hts_log_debug("Initialisation failed for plugin \"%s\": %d", pluginname, ret); - free(p); - return ret; - } - - hts_log_debug("Loaded \"%s\"", pluginname); - - p->next = plugins, plugins = p; - return 0; -} - -/* - * Returns 0 on success, - * <0 on failure - */ -static int load_hfile_plugins(void) -{ - static const struct hFILE_scheme_handler - data = { hopen_mem, hfile_always_local, "built-in", 80 }, - file = { hopen_fd_fileuri, hfile_always_local, "built-in", 80 }, - preload = { hopen_preload, is_preload_url_remote, "built-in", 80 }; - - schemes = kh_init(scheme_string); - if (schemes == NULL) - return -1; - - hfile_add_scheme_handler("data", &data); - hfile_add_scheme_handler("file", &file); - hfile_add_scheme_handler("preload", &preload); - init_add_plugin(NULL, hfile_plugin_init_mem, "mem"); - init_add_plugin(NULL, hfile_plugin_init_crypt4gh_needed, "crypt4gh-needed"); - -#ifdef ENABLE_PLUGINS - struct hts_path_itr path; - const char *pluginname; - hts_path_itr_setup(&path, NULL, NULL, "hfile_", 6, NULL, 0); - while ((pluginname = hts_path_itr_next(&path)) != NULL) { - void *obj; - int (*init)(struct hFILE_plugin *) = (int (*)(struct hFILE_plugin *)) - load_plugin(&obj, pluginname, "hfile_plugin_init"); - - if (init) { - if (init_add_plugin(obj, init, pluginname) != 0) - close_plugin(obj); - } - } -#else - -#ifdef HAVE_LIBCURL - init_add_plugin(NULL, hfile_plugin_init_libcurl, "libcurl"); -#endif -#ifdef ENABLE_GCS - init_add_plugin(NULL, hfile_plugin_init_gcs, "gcs"); -#endif -#ifdef ENABLE_S3 - init_add_plugin(NULL, hfile_plugin_init_s3, "s3"); - init_add_plugin(NULL, hfile_plugin_init_s3_write, "s3w"); -#endif - -#endif - - // In the unlikely event atexit() fails, it's better to succeed here and - // carry on; then eventually when the program exits, we'll merely close - // down the plugins uncleanly, as if we had aborted. - (void) atexit(hfile_exit); - - return 0; -} - -/* A filename like "foo:bar" in which we don't recognise the scheme is - either an ordinary file or an indication of a missing or broken plugin. - Try to open it as an ordinary file; but if there's no such file, set - errno distinctively to make the plugin issue apparent. */ -static hFILE *hopen_unknown_scheme(const char *fname, const char *mode) -{ - hFILE *fp = hopen_fd(fname, mode); - if (fp == NULL && errno == ENOENT) errno = EPROTONOSUPPORT; - return fp; -} - -/* Returns the appropriate handler, or NULL if the string isn't an URL. */ -static const struct hFILE_scheme_handler *find_scheme_handler(const char *s) -{ - static const struct hFILE_scheme_handler unknown_scheme = - { hopen_unknown_scheme, hfile_always_local, "built-in", 0 }; - - char scheme[12]; - int i; - - for (i = 0; i < sizeof scheme; i++) - if (isalnum_c(s[i]) || s[i] == '+' || s[i] == '-' || s[i] == '.') - scheme[i] = tolower_c(s[i]); - else if (s[i] == ':') break; - else return NULL; - - // 1 byte schemes are likely windows C:/foo pathnames - if (i <= 1 || i >= sizeof scheme) return NULL; - scheme[i] = '\0'; - - pthread_mutex_lock(&plugins_lock); - if (!schemes && load_hfile_plugins() < 0) { - pthread_mutex_unlock(&plugins_lock); - return NULL; - } - pthread_mutex_unlock(&plugins_lock); - - khint_t k = kh_get(scheme_string, schemes, scheme); - return (k != kh_end(schemes))? kh_value(schemes, k) : &unknown_scheme; -} - - -/*************************** - * Library introspection functions - ***************************/ - -/* - * Fills out sc_list[] with the list of known URL schemes. - * This can be restricted to just ones from a specific plugin, - * or all (plugin == NULL). - * - * Returns number of schemes found on success; - * -1 on failure. - */ -HTSLIB_EXPORT -int hfile_list_schemes(const char *plugin, const char *sc_list[], int *nschemes) -{ - pthread_mutex_lock(&plugins_lock); - if (!schemes && load_hfile_plugins() < 0) { - pthread_mutex_unlock(&plugins_lock); - return -1; - } - pthread_mutex_unlock(&plugins_lock); - - khiter_t k; - int ns = 0; - - for (k = kh_begin(schemes); k != kh_end(schemes); k++) { - if (!kh_exist(schemes, k)) - continue; - - const struct hFILE_scheme_handler *s = kh_value(schemes, k); - if (plugin && strcmp(s->provider, plugin) != 0) - continue; - - if (ns < *nschemes) - sc_list[ns] = kh_key(schemes, k); - ns++; - } - - if (*nschemes > ns) - *nschemes = ns; - - return ns; -} - - -/* - * Fills out plist[] with the list of known hFILE plugins. - * - * Returns number of schemes found on success; - * -1 on failure - */ -HTSLIB_EXPORT -int hfile_list_plugins(const char *plist[], int *nplugins) -{ - pthread_mutex_lock(&plugins_lock); - if (!schemes && load_hfile_plugins() < 0) { - pthread_mutex_unlock(&plugins_lock); - return -1; - } - pthread_mutex_unlock(&plugins_lock); - - int np = 0; - if (*nplugins) - plist[np++] = "built-in"; - - struct hFILE_plugin_list *p = plugins; - while (p) { - if (np < *nplugins) - plist[np] = p->plugin.name; - - p = p->next; - np++; - } - - if (*nplugins > np) - *nplugins = np; - - return np; -} - - -/* - * Tests for the presence of a specific hFILE plugin. - * - * Returns 1 if true - * 0 otherwise - */ -HTSLIB_EXPORT -int hfile_has_plugin(const char *name) -{ - pthread_mutex_lock(&plugins_lock); - if (!schemes && load_hfile_plugins() < 0) { - pthread_mutex_unlock(&plugins_lock); - return -1; - } - pthread_mutex_unlock(&plugins_lock); - - struct hFILE_plugin_list *p = plugins; - while (p) { - if (strcmp(p->plugin.name, name) == 0) - return 1; - p = p->next; - } - - return 0; -} - -/*************************** - * hFILE interface proper - ***************************/ - -hFILE *hopen(const char *fname, const char *mode, ...) -{ - const struct hFILE_scheme_handler *handler = find_scheme_handler(fname); - if (handler) { - if (strchr(mode, ':') == NULL - || handler->priority < 2000 - || handler->vopen == NULL) { - return handler->open(fname, mode); - } - else { - hFILE *fp; - va_list arg; - va_start(arg, mode); - fp = handler->vopen(fname, mode, arg); - va_end(arg); - return fp; - } - } - else if (strcmp(fname, "-") == 0) return hopen_fd_stdinout(mode); - else return hopen_fd(fname, mode); -} - -HTSLIB_EXPORT -int hfile_always_local (const char *fname) { return 0; } - -HTSLIB_EXPORT -int hfile_always_remote(const char *fname) { return 1; } - -int hisremote(const char *fname) -{ - const struct hFILE_scheme_handler *handler = find_scheme_handler(fname); - return handler? handler->isremote(fname) : 0; -} - -// Remove an extension, if any, from the basename part of [start,limit). -// Note: Doesn't notice percent-encoded '.' and '/' characters. Don't do that. -static const char *strip_extension(const char *start, const char *limit) -{ - const char *s = limit; - while (s > start) { - --s; - if (*s == '.') return s; - else if (*s == '/') break; - } - return limit; -} - -char *haddextension(struct kstring_t *buffer, const char *filename, - int replace, const char *new_extension) -{ - const char *trailing, *end; - - if (find_scheme_handler(filename)) { - // URL, so alter extensions before any trailing query or fragment parts - // Allow # symbols in s3 URLs - trailing = filename + ((strncmp(filename, "s3://", 5) && strncmp(filename, "s3+http://", 10) && strncmp(filename, "s3+https://", 11)) ? strcspn(filename, "?#") : strcspn(filename, "?")); - } - else { - // Local path, so alter extensions at the end of the filename - trailing = strchr(filename, '\0'); - } - - end = replace? strip_extension(filename, trailing) : trailing; - - buffer->l = 0; - if (kputsn(filename, end - filename, buffer) >= 0 && - kputs(new_extension, buffer) >= 0 && - kputs(trailing, buffer) >= 0) return buffer->s; - else return NULL; -} - - -/* - * ---------------------------------------------------------------------- - * Minimal stub functions for knet, added after the removal of - * hfile_net.c and knetfile.c. - * - * They exist purely for ABI compatibility, but are simply wrappers to - * hFILE. API should be compatible except knet_fileno (unused?). - * - * CULL THESE and knetfile.h at the next .so version bump. - */ -typedef struct knetFile_s { - // As per htslib/knetfile.h. Duplicated here as we don't wish to - // have any dependence on the deprecated knetfile.h interface, plus - // it's hopefully only temporary. - int type, fd; - int64_t offset; - char *host, *port; - int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready; - char *response, *retr, *size_cmd; - int64_t seek_offset; - int64_t file_size; - char *path, *http_host; - - // Our local addition - hFILE *hf; -} knetFile; - -HTSLIB_EXPORT -knetFile *knet_open(const char *fn, const char *mode) { - knetFile *fp = calloc(1, sizeof(*fp)); - if (!fp) return NULL; - if (!(fp->hf = hopen(fn, mode))) { - free(fp); - return NULL; - } - - // FD backend is the only one implementing knet_fileno - fp->fd = fp->hf->backend == &fd_backend - ? ((hFILE_fd *)fp->hf)->fd - : -1; - - return fp; -} - -HTSLIB_EXPORT -knetFile *knet_dopen(int fd, const char *mode) { - knetFile *fp = calloc(1, sizeof(*fp)); - if (!fp) return NULL; - if (!(fp->hf = hdopen(fd, mode))) { - free(fp); - return NULL; - } - fp->fd = fd; - return fp; -} - -HTSLIB_EXPORT -ssize_t knet_read(knetFile *fp, void *buf, size_t len) { - ssize_t r = hread(fp->hf, buf, len); - fp->offset += r>0?r:0; - return r; -} - -HTSLIB_EXPORT -off_t knet_seek(knetFile *fp, off_t off, int whence) { - off_t r = hseek(fp->hf, off, whence); - if (r >= 0) - fp->offset = r; - return r; -} - -HTSLIB_EXPORT -int knet_close(knetFile *fp) { - int r = hclose(fp->hf); - free(fp); - return r; -} diff --git a/src/htslib-1.18/hfile_libcurl.c b/src/htslib-1.18/hfile_libcurl.c deleted file mode 100644 index 04d222b..0000000 --- a/src/htslib-1.18/hfile_libcurl.c +++ /dev/null @@ -1,1563 +0,0 @@ -/* hfile_libcurl.c -- libcurl backend for low-level file streams. - - Copyright (C) 2015-2017, 2019-2020 Genome Research Ltd. - - Author: John Marshall - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#ifndef _WIN32 -# include -#endif -#include - -#include "hfile_internal.h" -#ifdef ENABLE_PLUGINS -#include "version.h" -#endif -#include "htslib/hts.h" // for hts_version() and hts_verbose -#include "htslib/kstring.h" -#include "htslib/khash.h" - -#include - -// Number of seconds to take off auth_token expiry, to allow for clock skew -// and slow servers -#define AUTH_REFRESH_EARLY_SECS 60 - -// Minimum number of bytes to skip when seeking forward. Seeks less than -// this will just read the data and throw it away. The optimal value -// depends on how long it takes to make a new connection compared -// to how fast the data arrives. -#define MIN_SEEK_FORWARD 1000000 - -typedef struct { - char *path; - char *token; - time_t expiry; - int failed; - pthread_mutex_t lock; -} auth_token; - -// For the authorization header cache -KHASH_MAP_INIT_STR(auth_map, auth_token *) - -// Curl-compatible header linked list -typedef struct { - struct curl_slist *list; - unsigned int num; - unsigned int size; -} hdrlist; - -typedef struct { - hdrlist fixed; // List of headers supplied at hopen() - hdrlist extra; // List of headers from callback - hts_httphdr_callback callback; // Callback to get more headers - void *callback_data; // Data to pass to httphdr callback - auth_token *auth; // Authentication token - int auth_hdr_num; // Location of auth_token in hdrlist extra - // If -1, Authorization header is in fixed - // -2, it came from the callback - // -3, "auth_token_enabled", "false" - // passed to hopen() - redirect_callback redirect; // Callback to handle 3xx redirects - void *redirect_data; // Data to pass to redirect_callback - long *http_response_ptr; // Location to store http response code. - int fail_on_error; // Open fails on >400 response code - // (default true) -} http_headers; - -typedef struct { - hFILE base; - CURL *easy; - CURLM *multi; - off_t file_size; - struct { - union { char *rd; const char *wr; } ptr; - size_t len; - } buffer; - CURLcode final_result; // easy result code for finished transfers - // Flags for communicating with libcurl callbacks: - unsigned paused : 1; // callback tells us that it has paused transfer - unsigned closing : 1; // informs callback that hclose() has been invoked - unsigned finished : 1; // wait_perform() tells us transfer is complete - unsigned perform_again : 1; - unsigned is_read : 1; // Opened in read mode - unsigned can_seek : 1; // Can (attempt to) seek on this handle - unsigned is_recursive:1; // Opened by hfile_libcurl itself - unsigned tried_seek : 1; // At least one seek has been attempted - int nrunning; - http_headers headers; - - off_t delayed_seek; // Location to seek to before reading - off_t last_offset; // Location we're seeking from - char *preserved; // Preserved buffer content on seek - size_t preserved_bytes; // Number of preserved bytes - size_t preserved_size; // Size of preserved buffer -} hFILE_libcurl; - -static off_t libcurl_seek(hFILE *fpv, off_t offset, int whence); -static int restart_from_position(hFILE_libcurl *fp, off_t pos); - -static int http_status_errno(int status) -{ - if (status >= 500) - switch (status) { - case 501: return ENOSYS; - case 503: return EBUSY; - case 504: return ETIMEDOUT; - default: return EIO; - } - else if (status >= 400) - switch (status) { - case 401: return EPERM; - case 403: return EACCES; - case 404: return ENOENT; - case 405: return EROFS; - case 407: return EPERM; - case 408: return ETIMEDOUT; - case 410: return ENOENT; - default: return EINVAL; - } - else return 0; -} - -static int easy_errno(CURL *easy, CURLcode err) -{ - long lval; - - switch (err) { - case CURLE_OK: - return 0; - - case CURLE_UNSUPPORTED_PROTOCOL: - case CURLE_URL_MALFORMAT: - return EINVAL; - -#if LIBCURL_VERSION_NUM >= 0x071505 - case CURLE_NOT_BUILT_IN: - return ENOSYS; -#endif - - case CURLE_COULDNT_RESOLVE_PROXY: - case CURLE_COULDNT_RESOLVE_HOST: - case CURLE_FTP_CANT_GET_HOST: - return EDESTADDRREQ; // Lookup failure - - case CURLE_COULDNT_CONNECT: - case CURLE_SEND_ERROR: - case CURLE_RECV_ERROR: - if (curl_easy_getinfo(easy, CURLINFO_OS_ERRNO, &lval) == CURLE_OK) - return lval; - else - return ECONNABORTED; - - case CURLE_REMOTE_ACCESS_DENIED: - case CURLE_LOGIN_DENIED: - case CURLE_TFTP_PERM: - return EACCES; - - case CURLE_PARTIAL_FILE: - return EPIPE; - - case CURLE_HTTP_RETURNED_ERROR: - if (curl_easy_getinfo(easy, CURLINFO_RESPONSE_CODE, &lval) == CURLE_OK) - return http_status_errno(lval); - else - return EIO; - - case CURLE_OUT_OF_MEMORY: - return ENOMEM; - - case CURLE_OPERATION_TIMEDOUT: - return ETIMEDOUT; - - case CURLE_RANGE_ERROR: - return ESPIPE; - - case CURLE_SSL_CONNECT_ERROR: - // TODO return SSL error buffer messages - return ECONNABORTED; - - case CURLE_FILE_COULDNT_READ_FILE: - case CURLE_TFTP_NOTFOUND: - return ENOENT; - - case CURLE_TOO_MANY_REDIRECTS: - return ELOOP; - - case CURLE_FILESIZE_EXCEEDED: - return EFBIG; - - case CURLE_REMOTE_DISK_FULL: - return ENOSPC; - - case CURLE_REMOTE_FILE_EXISTS: - return EEXIST; - - default: - hts_log_error("Libcurl reported error %d (%s)", (int) err, - curl_easy_strerror(err)); - return EIO; - } -} - -static int multi_errno(CURLMcode errm) -{ - switch (errm) { - case CURLM_CALL_MULTI_PERFORM: - case CURLM_OK: - return 0; - - case CURLM_BAD_HANDLE: - case CURLM_BAD_EASY_HANDLE: - case CURLM_BAD_SOCKET: - return EBADF; - - case CURLM_OUT_OF_MEMORY: - return ENOMEM; - - default: - hts_log_error("Libcurl reported error %d (%s)", (int) errm, - curl_multi_strerror(errm)); - return EIO; - } -} - -static struct { - kstring_t useragent; - CURLSH *share; - char *auth_path; - khash_t(auth_map) *auth_map; - int allow_unencrypted_auth_header; - pthread_mutex_t auth_lock; - pthread_mutex_t share_lock; -} curl = { { 0, 0, NULL }, NULL, NULL, NULL, 0, PTHREAD_MUTEX_INITIALIZER, - PTHREAD_MUTEX_INITIALIZER }; - -static void share_lock(CURL *handle, curl_lock_data data, - curl_lock_access access, void *userptr) { - pthread_mutex_lock(&curl.share_lock); -} - -static void share_unlock(CURL *handle, curl_lock_data data, void *userptr) { - pthread_mutex_unlock(&curl.share_lock); -} - -static void free_auth(auth_token *tok) { - if (!tok) return; - if (pthread_mutex_destroy(&tok->lock)) abort(); - free(tok->path); - free(tok->token); - free(tok); -} - -static void libcurl_exit(void) -{ - if (curl_share_cleanup(curl.share) == CURLSHE_OK) - curl.share = NULL; - - free(curl.useragent.s); - curl.useragent.l = curl.useragent.m = 0; curl.useragent.s = NULL; - - free(curl.auth_path); - curl.auth_path = NULL; - - if (curl.auth_map) { - khiter_t i; - for (i = kh_begin(curl.auth_map); i != kh_end(curl.auth_map); ++i) { - if (kh_exist(curl.auth_map, i)) { - free_auth(kh_value(curl.auth_map, i)); - kh_key(curl.auth_map, i) = NULL; - kh_value(curl.auth_map, i) = NULL; - } - } - kh_destroy(auth_map, curl.auth_map); - curl.auth_map = NULL; - } - - curl_global_cleanup(); -} - -static int append_header(hdrlist *hdrs, const char *data, int dup) { - if (hdrs->num == hdrs->size) { - unsigned int new_sz = hdrs->size ? hdrs->size * 2 : 4, i; - struct curl_slist *new_list = realloc(hdrs->list, - new_sz * sizeof(*new_list)); - if (!new_list) return -1; - hdrs->size = new_sz; - hdrs->list = new_list; - for (i = 1; i < hdrs->num; i++) hdrs->list[i-1].next = &hdrs->list[i]; - } - // Annoyingly, libcurl doesn't declare the char * as const... - hdrs->list[hdrs->num].data = dup ? strdup(data) : (char *) data; - if (!hdrs->list[hdrs->num].data) return -1; - if (hdrs->num > 0) hdrs->list[hdrs->num - 1].next = &hdrs->list[hdrs->num]; - hdrs->list[hdrs->num].next = NULL; - hdrs->num++; - return 0; -} - -static void free_headers(hdrlist *hdrs, int completely) { - unsigned int i; - for (i = 0; i < hdrs->num; i++) { - free(hdrs->list[i].data); - hdrs->list[i].data = NULL; - hdrs->list[i].next = NULL; - } - hdrs->num = 0; - if (completely) { - free(hdrs->list); - hdrs->size = 0; - hdrs->list = NULL; - } -} - -static struct curl_slist * get_header_list(hFILE_libcurl *fp) { - if (fp->headers.fixed.num > 0) - return &fp->headers.fixed.list[0]; - if (fp->headers.extra.num > 0) - return &fp->headers.extra.list[0]; - return 0; -} - -static inline int is_authorization(const char *hdr) { - return (strncasecmp("authorization:", hdr, 14) == 0); -} - -static int add_callback_headers(hFILE_libcurl *fp) { - char **hdrs = NULL, **hdr; - - if (!fp->headers.callback) - return 0; - - // Get the headers from the callback - if (fp->headers.callback(fp->headers.callback_data, &hdrs) != 0) { - return -1; - } - - if (!hdrs) // No change - return 0; - - // Remove any old callback headers - if (fp->headers.fixed.num > 0) { - // Unlink lists - fp->headers.fixed.list[fp->headers.fixed.num - 1].next = NULL; - } - free_headers(&fp->headers.extra, 0); - - if (fp->headers.auth_hdr_num > 0 || fp->headers.auth_hdr_num == -2) - fp->headers.auth_hdr_num = 0; // Just removed it... - - // Convert to libcurl-suitable form - for (hdr = hdrs; *hdr; hdr++) { - if (append_header(&fp->headers.extra, *hdr, 0) < 0) { - goto cleanup; - } - if (is_authorization(*hdr) && !fp->headers.auth_hdr_num) - fp->headers.auth_hdr_num = -2; - } - for (hdr = hdrs; *hdr; hdr++) *hdr = NULL; - - if (fp->headers.fixed.num > 0 && fp->headers.extra.num > 0) { - // Relink lists - fp->headers.fixed.list[fp->headers.fixed.num - 1].next - = &fp->headers.extra.list[0]; - } - return 0; - - cleanup: - while (hdr && *hdr) { - free(*hdr); - *hdr = NULL; - } - return -1; -} - -/* - * Read an OAUTH2-style Bearer access token (see - * https://tools.ietf.org/html/rfc6750#section-4). - * Returns 'v' for valid; 'i' for invalid (token missing or wrong sort); - * '?' for a JSON parse error; 'm' if it runs out of memory. - */ -static int read_auth_json(auth_token *tok, hFILE *auth_fp) { - hts_json_token *t = hts_json_alloc_token(); - kstring_t str = {0, 0, NULL}; - char *token = NULL, *type = NULL, *expiry = NULL; - int ret = 'i'; - - if (!t) goto error; - - if ((ret = hts_json_fnext(auth_fp, t, &str)) != '{') goto error; - while (hts_json_fnext(auth_fp, t, &str) != '}') { - char *key; - if (hts_json_token_type(t) != 's') { - ret = '?'; - goto error; - } - key = hts_json_token_str(t); - if (!key) goto error; - if (strcmp(key, "access_token") == 0) { - if ((ret = hts_json_fnext(auth_fp, t, &str)) != 's') goto error; - token = ks_release(&str); - } else if (strcmp(key, "token_type") == 0) { - if ((ret = hts_json_fnext(auth_fp, t, &str)) != 's') goto error; - type = ks_release(&str); - } else if (strcmp(key, "expires_in") == 0) { - if ((ret = hts_json_fnext(auth_fp, t, &str)) != 'n') goto error; - expiry = ks_release(&str); - } else if (hts_json_fskip_value(auth_fp, '\0') != 'v') { - ret = '?'; - goto error; - } - } - - if (!token || (type && strcmp(type, "Bearer") != 0)) { - ret = 'i'; - goto error; - } - - ret = 'm'; - str.l = 0; - if (kputs("Authorization: Bearer ", &str) < 0) goto error; - if (kputs(token, &str) < 0) goto error; - free(tok->token); - tok->token = ks_release(&str); - if (expiry) { - long exp = strtol(expiry, NULL, 10); - if (exp < 0) exp = 0; - tok->expiry = time(NULL) + exp; - } else { - tok->expiry = 0; - } - ret = 'v'; - - error: - free(token); - free(type); - free(expiry); - free(str.s); - hts_json_free_token(t); - return ret; -} - -static int read_auth_plain(auth_token *tok, hFILE *auth_fp) { - kstring_t line = {0, 0, NULL}; - kstring_t token = {0, 0, NULL}; - const char *start, *end; - - if (kgetline(&line, (char * (*)(char *, int, void *)) hgets, auth_fp) < 0) goto error; - if (kputc('\0', &line) < 0) goto error; - - for (start = line.s; *start && isspace_c(*start); start++) {} - for (end = start; *end && !isspace_c(*end); end++) {} - - if (end > start) { - if (kputs("Authorization: Bearer ", &token) < 0) goto error; - if (kputsn(start, end - start, &token) < 0) goto error; - } - - free(tok->token); - tok->token = ks_release(&token); - tok->expiry = 0; - free(line.s); - return 0; - - error: - free(line.s); - free(token.s); - return -1; -} - -static int renew_auth_token(auth_token *tok, int *changed) { - hFILE *auth_fp = NULL; - char buffer[16]; - ssize_t len; - - *changed = 0; - if (tok->expiry == 0 || time(NULL) + AUTH_REFRESH_EARLY_SECS < tok->expiry) - return 0; // Still valid - - if (tok->failed) - return -1; - - *changed = 1; - auth_fp = hopen(tok->path, "rR"); - if (!auth_fp) { - // Not worried about missing files; other errors are bad. - if (errno != ENOENT) - goto fail; - - tok->expiry = 0; // Prevent retry - free(tok->token); // Just in case it was set - return 0; - } - - len = hpeek(auth_fp, buffer, sizeof(buffer)); - if (len < 0) - goto fail; - - if (memchr(buffer, '{', len) != NULL) { - if (read_auth_json(tok, auth_fp) != 'v') - goto fail; - } else { - if (read_auth_plain(tok, auth_fp) < 0) - goto fail; - } - - return hclose(auth_fp) < 0 ? -1 : 0; - - fail: - tok->failed = 1; - if (auth_fp) hclose_abruptly(auth_fp); - return -1; -} - -static int add_auth_header(hFILE_libcurl *fp) { - int changed = 0; - - if (fp->headers.auth_hdr_num < 0) - return 0; // Have an Authorization header from open or header callback - - if (!fp->headers.auth) - return 0; // Nothing to add - - pthread_mutex_lock(&fp->headers.auth->lock); - if (renew_auth_token(fp->headers.auth, &changed) < 0) - goto unlock_fail; - - if (!changed && fp->headers.auth_hdr_num > 0) { - pthread_mutex_unlock(&fp->headers.auth->lock); - return 0; - } - - if (fp->headers.auth_hdr_num > 0) { - // Had a previous header, so swap in the new one - char *header = fp->headers.auth->token; - char *header_copy = header ? strdup(header) : NULL; - int idx = fp->headers.auth_hdr_num - 1; - if (header && !header_copy) - goto unlock_fail; - - if (header_copy) { - free(fp->headers.extra.list[idx].data); - fp->headers.extra.list[idx].data = header_copy; - } else { - unsigned int j; - // More complicated case - need to get rid of the old header - // and tidy up linked lists - free(fp->headers.extra.list[idx].data); - for (j = idx + 1; j < fp->headers.extra.num; j++) { - fp->headers.extra.list[j - 1] = fp->headers.extra.list[j]; - fp->headers.extra.list[j - 1].next = &fp->headers.extra.list[j]; - } - fp->headers.extra.num--; - if (fp->headers.extra.num > 0) { - fp->headers.extra.list[fp->headers.extra.num-1].next = NULL; - } else if (fp->headers.fixed.num > 0) { - fp->headers.fixed.list[fp->headers.fixed.num - 1].next = NULL; - } - fp->headers.auth_hdr_num = 0; - } - } else if (fp->headers.auth->token) { - // Add new header and remember where it is - if (append_header(&fp->headers.extra, - fp->headers.auth->token, 1) < 0) { - goto unlock_fail; - } - fp->headers.auth_hdr_num = fp->headers.extra.num; - } - - pthread_mutex_unlock(&fp->headers.auth->lock); - return 0; - - unlock_fail: - pthread_mutex_unlock(&fp->headers.auth->lock); - return -1; -} - -static int get_auth_token(hFILE_libcurl *fp, const char *url) { - const char *host = NULL, *p, *q; - kstring_t name = {0, 0, NULL}; - size_t host_len = 0; - khiter_t idx; - auth_token *tok = NULL; - - // Nothing to do if: - // curl.auth_path has not been set - // fp was made by hfile_libcurl (e.g. auth_path is a http:// url) - // we already have an Authorization header - if (!curl.auth_path || fp->is_recursive || fp->headers.auth_hdr_num != 0) - return 0; - - // Insist on having a secure connection unless the user insists harder - if (!curl.allow_unencrypted_auth_header && strncmp(url, "https://", 8) != 0) - return 0; - - host = strstr(url, "://"); - if (host) { - host += 3; - host_len = strcspn(host, "/"); - } - - p = curl.auth_path; - while ((q = strstr(p, "%h")) != NULL) { - if (q - p > INT_MAX || host_len > INT_MAX) goto error; - if (kputsn_(p, q - p, &name) < 0) goto error; - if (kputsn_(host, host_len, &name) < 0) goto error; - p = q + 2; - } - if (kputs(p, &name) < 0) goto error; - - pthread_mutex_lock(&curl.auth_lock); - idx = kh_get(auth_map, curl.auth_map, name.s); - if (idx < kh_end(curl.auth_map)) { - tok = kh_value(curl.auth_map, idx); - } else { - tok = calloc(1, sizeof(*tok)); - if (tok && pthread_mutex_init(&tok->lock, NULL) != 0) { - free(tok); - tok = NULL; - } - if (tok) { - int ret = -1; - tok->path = ks_release(&name); - tok->token = NULL; - tok->expiry = 1; // Force refresh - idx = kh_put(auth_map, curl.auth_map, tok->path, &ret); - if (ret < 0) { - free_auth(tok); - tok = NULL; - } - kh_value(curl.auth_map, idx) = tok; - } - } - pthread_mutex_unlock(&curl.auth_lock); - - fp->headers.auth = tok; - free(name.s); - - return add_auth_header(fp); - - error: - free(name.s); - return -1; -} - -static void process_messages(hFILE_libcurl *fp) -{ - CURLMsg *msg; - int remaining; - - while ((msg = curl_multi_info_read(fp->multi, &remaining)) != NULL) { - switch (msg->msg) { - case CURLMSG_DONE: - fp->finished = 1; - fp->final_result = msg->data.result; - break; - - default: - break; - } - } -} - -static int wait_perform(hFILE_libcurl *fp) -{ - fd_set rd, wr, ex; - int maxfd, nrunning; - long timeout; - CURLMcode errm; - - if (!fp->perform_again) { - FD_ZERO(&rd); - FD_ZERO(&wr); - FD_ZERO(&ex); - if (curl_multi_fdset(fp->multi, &rd, &wr, &ex, &maxfd) != CURLM_OK) - maxfd = -1, timeout = 1000; - else { - if (curl_multi_timeout(fp->multi, &timeout) != CURLM_OK) - timeout = 1000; - else if (timeout < 0) { - timeout = 10000; // as recommended by curl_multi_timeout(3) - } - } - if (maxfd < 0) { - if (timeout > 100) - timeout = 100; // as recommended by curl_multi_fdset(3) -#ifdef _WIN32 - /* Windows ignores the first argument of select, so calling select - * with maxfd=-1 does not give the expected result of sleeping for - * timeout milliseconds in the conditional block below. - * So sleep here and skip the next block. - */ - Sleep(timeout); - timeout = 0; -#endif - } - - if (timeout > 0) { - struct timeval tval; - tval.tv_sec = (timeout / 1000); - tval.tv_usec = (timeout % 1000) * 1000; - - if (select(maxfd + 1, &rd, &wr, &ex, &tval) < 0) return -1; - } - } - - errm = curl_multi_perform(fp->multi, &nrunning); - fp->perform_again = 0; - if (errm == CURLM_CALL_MULTI_PERFORM) fp->perform_again = 1; - else if (errm != CURLM_OK) { errno = multi_errno(errm); return -1; } - - if (nrunning < fp->nrunning) process_messages(fp); - return 0; -} - - -static size_t recv_callback(char *ptr, size_t size, size_t nmemb, void *fpv) -{ - hFILE_libcurl *fp = (hFILE_libcurl *) fpv; - size_t n = size * nmemb; - - if (n > fp->buffer.len) { - fp->paused = 1; - return CURL_WRITEFUNC_PAUSE; - } - else if (n == 0) return 0; - - memcpy(fp->buffer.ptr.rd, ptr, n); - fp->buffer.ptr.rd += n; - fp->buffer.len -= n; - return n; -} - - -static size_t header_callback(void *contents, size_t size, size_t nmemb, - void *userp) -{ - size_t realsize = size * nmemb; - kstring_t *resp = (kstring_t *)userp; - - if (kputsn((const char *)contents, realsize, resp) == EOF) { - return 0; - } - - return realsize; -} - - -static ssize_t libcurl_read(hFILE *fpv, void *bufferv, size_t nbytes) -{ - hFILE_libcurl *fp = (hFILE_libcurl *) fpv; - char *buffer = (char *) bufferv; - off_t to_skip = -1; - ssize_t got = 0; - CURLcode err; - - if (fp->delayed_seek >= 0) { - assert(fp->base.offset == fp->delayed_seek); - - if (fp->preserved - && fp->last_offset > fp->delayed_seek - && fp->last_offset - fp->preserved_bytes <= fp->delayed_seek) { - // Can use buffer contents copied when seeking started, to - // avoid having to re-read data discarded by hseek(). - // Note fp->last_offset is the offset of the *end* of the - // preserved buffer. - size_t n = fp->last_offset - fp->delayed_seek; - char *start = fp->preserved + (fp->preserved_bytes - n); - size_t bytes = n <= nbytes ? n : nbytes; - memcpy(buffer, start, bytes); - if (bytes < n) { // Part of the preserved buffer still left - fp->delayed_seek += bytes; - } else { - fp->last_offset = fp->delayed_seek = -1; - } - return bytes; - } - - if (fp->last_offset >= 0 - && fp->delayed_seek > fp->last_offset - && fp->delayed_seek - fp->last_offset < MIN_SEEK_FORWARD) { - // If not seeking far, just read the data and throw it away. This - // is likely to be quicker than opening a new stream - to_skip = fp->delayed_seek - fp->last_offset; - } else { - if (restart_from_position(fp, fp->delayed_seek) < 0) { - return -1; - } - } - fp->delayed_seek = -1; - fp->last_offset = -1; - fp->preserved_bytes = 0; - } - - do { - fp->buffer.ptr.rd = buffer; - fp->buffer.len = nbytes; - fp->paused = 0; - if (!fp->finished) { - err = curl_easy_pause(fp->easy, CURLPAUSE_CONT); - if (err != CURLE_OK) { - errno = easy_errno(fp->easy, err); - return -1; - } - } - - while (! fp->paused && ! fp->finished) { - if (wait_perform(fp) < 0) return -1; - } - - got = fp->buffer.ptr.rd - buffer; - - if (to_skip >= 0) { // Skipping over a small seek - if (got < to_skip) { // Need to skip more data - to_skip -= got; - } else { - got -= to_skip; - if (got > 0) { // If enough was skipped, return the rest - memmove(buffer, buffer + to_skip, got); - to_skip = -1; - } - } - } - } while (to_skip >= 0 && ! fp->finished); - fp->buffer.ptr.rd = NULL; - fp->buffer.len = 0; - - if (fp->finished && fp->final_result != CURLE_OK) { - errno = easy_errno(fp->easy, fp->final_result); - return -1; - } - - return got; -} - -static size_t send_callback(char *ptr, size_t size, size_t nmemb, void *fpv) -{ - hFILE_libcurl *fp = (hFILE_libcurl *) fpv; - size_t n = size * nmemb; - - if (fp->buffer.len == 0) { - // Send buffer is empty; normally pause, or signal EOF if we're closing - if (fp->closing) return 0; - else { fp->paused = 1; return CURL_READFUNC_PAUSE; } - } - - if (n > fp->buffer.len) n = fp->buffer.len; - memcpy(ptr, fp->buffer.ptr.wr, n); - fp->buffer.ptr.wr += n; - fp->buffer.len -= n; - return n; -} - -static ssize_t libcurl_write(hFILE *fpv, const void *bufferv, size_t nbytes) -{ - hFILE_libcurl *fp = (hFILE_libcurl *) fpv; - const char *buffer = (const char *) bufferv; - CURLcode err; - - fp->buffer.ptr.wr = buffer; - fp->buffer.len = nbytes; - fp->paused = 0; - err = curl_easy_pause(fp->easy, CURLPAUSE_CONT); - if (err != CURLE_OK) { errno = easy_errno(fp->easy, err); return -1; } - - while (! fp->paused && ! fp->finished) - if (wait_perform(fp) < 0) return -1; - - nbytes = fp->buffer.ptr.wr - buffer; - fp->buffer.ptr.wr = NULL; - fp->buffer.len = 0; - - if (fp->finished && fp->final_result != CURLE_OK) { - errno = easy_errno(fp->easy, fp->final_result); - return -1; - } - - return nbytes; -} - -static void preserve_buffer_content(hFILE_libcurl *fp) -{ - if (fp->base.begin == fp->base.end) { - fp->preserved_bytes = 0; - return; - } - if (!fp->preserved - || fp->preserved_size < fp->base.limit - fp->base.buffer) { - fp->preserved = malloc(fp->base.limit - fp->base.buffer); - if (!fp->preserved) return; - fp->preserved_size = fp->base.limit - fp->base.buffer; - } - - assert(fp->base.end - fp->base.begin <= fp->preserved_size); - - memcpy(fp->preserved, fp->base.begin, fp->base.end - fp->base.begin); - fp->preserved_bytes = fp->base.end - fp->base.begin; - return; -} - -static off_t libcurl_seek(hFILE *fpv, off_t offset, int whence) -{ - hFILE_libcurl *fp = (hFILE_libcurl *) fpv; - off_t origin, pos; - - if (!fp->is_read || !fp->can_seek) { - // Cowardly refuse to seek when writing or a previous seek failed. - errno = ESPIPE; - return -1; - } - - switch (whence) { - case SEEK_SET: - origin = 0; - break; - case SEEK_CUR: - errno = ENOSYS; - return -1; - case SEEK_END: - if (fp->file_size < 0) { errno = ESPIPE; return -1; } - origin = fp->file_size; - break; - default: - errno = EINVAL; - return -1; - } - - // Check 0 <= origin+offset < fp->file_size carefully, avoiding overflow - if ((offset < 0)? origin + offset < 0 - : (fp->file_size >= 0 && offset > fp->file_size - origin)) { - errno = EINVAL; - return -1; - } - - pos = origin + offset; - - if (fp->tried_seek) { - /* Seeking has worked at least once, so now we can delay doing - the actual work until the next read. This avoids lots of pointless - http or ftp reconnections if the caller does lots of seeks - without any intervening reads. */ - if (fp->delayed_seek < 0) { - fp->last_offset = fp->base.offset + (fp->base.end - fp->base.buffer); - // Stash the current hFILE buffer content in case it's useful later - preserve_buffer_content(fp); - } - fp->delayed_seek = pos; - return pos; - } - - if (restart_from_position(fp, pos) < 0) { - /* This value for errno may not be entirely true, but the caller may be - able to carry on with the existing handle. */ - errno = ESPIPE; - return -1; - } - - fp->tried_seek = 1; - return pos; -} - -static int restart_from_position(hFILE_libcurl *fp, off_t pos) { - hFILE_libcurl temp_fp; - CURLcode err; - CURLMcode errm; - int update_headers = 0; - int save_errno = 0; - - // TODO If we seem to be doing random access, use CURLOPT_RANGE to do - // limited reads (e.g. about a BAM block!) so seeking can reuse the - // existing connection more often. - - // Get new headers from the callback (if defined). This changes the - // headers in fp before it gets duplicated, but they should be have been - // sent by now. - - if (fp->headers.callback) { - if (add_callback_headers(fp) != 0) - return -1; - update_headers = 1; - } - if (fp->headers.auth_hdr_num > 0 && fp->headers.auth) { - if (add_auth_header(fp) != 0) - return -1; - update_headers = 1; - } - if (update_headers) { - struct curl_slist *list = get_header_list(fp); - if (list) { - err = curl_easy_setopt(fp->easy, CURLOPT_HTTPHEADER, list); - if (err != CURLE_OK) { - errno = easy_errno(fp->easy,err); - return -1; - } - } - } - - /* - Duplicate the easy handle, and use CURLOPT_RESUME_FROM_LARGE to open - a new request to the server, reading from the location that we want - to seek to. If the new request works and returns the correct data, - the original easy handle in *fp is closed and replaced with the new - one. If not, we close the new handle and leave *fp unchanged. - */ - - memcpy(&temp_fp, fp, sizeof(temp_fp)); - temp_fp.buffer.len = 0; - temp_fp.buffer.ptr.rd = NULL; - temp_fp.easy = curl_easy_duphandle(fp->easy); - if (!temp_fp.easy) - goto early_error; - - err = curl_easy_setopt(temp_fp.easy, CURLOPT_RESUME_FROM_LARGE,(curl_off_t)pos); - err |= curl_easy_setopt(temp_fp.easy, CURLOPT_PRIVATE, &temp_fp); - err |= curl_easy_setopt(temp_fp.easy, CURLOPT_WRITEDATA, &temp_fp); - if (err != CURLE_OK) { - save_errno = easy_errno(temp_fp.easy, err); - goto error; - } - - temp_fp.buffer.len = 0; // Ensures we only read the response headers - temp_fp.paused = temp_fp.finished = 0; - - // fp->multi and temp_fp.multi are the same. - errm = curl_multi_add_handle(fp->multi, temp_fp.easy); - if (errm != CURLM_OK) { - save_errno = multi_errno(errm); - goto error; - } - temp_fp.nrunning = ++fp->nrunning; - - while (! temp_fp.paused && ! temp_fp.finished) - if (wait_perform(&temp_fp) < 0) { - save_errno = errno; - goto error_remove; - } - - if (temp_fp.finished && temp_fp.final_result != CURLE_OK) { - save_errno = easy_errno(temp_fp.easy, temp_fp.final_result); - goto error_remove; - } - - // We've got a good response, close the original connection and - // replace it with the new one. - - errm = curl_multi_remove_handle(fp->multi, fp->easy); - if (errm != CURLM_OK) { - // Clean up as much as possible - curl_easy_reset(temp_fp.easy); - if (curl_multi_remove_handle(fp->multi, temp_fp.easy) == CURLM_OK) { - fp->nrunning--; - curl_easy_cleanup(temp_fp.easy); - } - save_errno = multi_errno(errm); - goto early_error; - } - fp->nrunning--; - - curl_easy_cleanup(fp->easy); - fp->easy = temp_fp.easy; - err = curl_easy_setopt(fp->easy, CURLOPT_WRITEDATA, fp); - err |= curl_easy_setopt(fp->easy, CURLOPT_PRIVATE, fp); - if (err != CURLE_OK) { - save_errno = easy_errno(fp->easy, err); - curl_easy_reset(fp->easy); - errno = save_errno; - return -1; - } - fp->buffer.len = 0; - fp->paused = temp_fp.paused; - fp->finished = temp_fp.finished; - fp->perform_again = temp_fp.perform_again; - fp->final_result = temp_fp.final_result; - - return 0; - - error_remove: - curl_easy_reset(temp_fp.easy); // Ensure no pointers to on-stack temp_fp - errm = curl_multi_remove_handle(fp->multi, temp_fp.easy); - if (errm != CURLM_OK) { - errno = multi_errno(errm); - return -1; - } - fp->nrunning--; - error: - curl_easy_cleanup(temp_fp.easy); - early_error: - fp->can_seek = 0; // Don't try to seek again - if (save_errno) - errno = save_errno; - return -1; -} - -static int libcurl_close(hFILE *fpv) -{ - hFILE_libcurl *fp = (hFILE_libcurl *) fpv; - CURLcode err; - CURLMcode errm; - int save_errno = 0; - - // Before closing the file, unpause it and perform on it so that uploads - // have the opportunity to signal EOF to the server -- see send_callback(). - - fp->buffer.len = 0; - fp->closing = 1; - fp->paused = 0; - if (!fp->finished) { - err = curl_easy_pause(fp->easy, CURLPAUSE_CONT); - if (err != CURLE_OK) save_errno = easy_errno(fp->easy, err); - } - - while (save_errno == 0 && ! fp->paused && ! fp->finished) - if (wait_perform(fp) < 0) save_errno = errno; - - if (fp->finished && fp->final_result != CURLE_OK) - save_errno = easy_errno(fp->easy, fp->final_result); - - errm = curl_multi_remove_handle(fp->multi, fp->easy); - if (errm != CURLM_OK && save_errno == 0) save_errno = multi_errno(errm); - fp->nrunning--; - - curl_easy_cleanup(fp->easy); - curl_multi_cleanup(fp->multi); - - if (fp->headers.callback) // Tell callback to free any data it needs to - fp->headers.callback(fp->headers.callback_data, NULL); - free_headers(&fp->headers.fixed, 1); - free_headers(&fp->headers.extra, 1); - - free(fp->preserved); - - if (save_errno) { errno = save_errno; return -1; } - else return 0; -} - -static const struct hFILE_backend libcurl_backend = -{ - libcurl_read, libcurl_write, libcurl_seek, NULL, libcurl_close -}; - -static hFILE * -libcurl_open(const char *url, const char *modes, http_headers *headers) -{ - hFILE_libcurl *fp; - struct curl_slist *list; - char mode; - const char *s; - CURLcode err; - CURLMcode errm; - int save, is_recursive; - kstring_t in_header = {0, 0, NULL}; - long response; - - is_recursive = strchr(modes, 'R') != NULL; - - if ((s = strpbrk(modes, "rwa+")) != NULL) { - mode = *s; - if (strpbrk(&s[1], "rwa+")) mode = 'e'; - } - else mode = '\0'; - - if (mode != 'r' && mode != 'w') { errno = EINVAL; goto early_error; } - - fp = (hFILE_libcurl *) hfile_init(sizeof (hFILE_libcurl), modes, 0); - if (fp == NULL) goto early_error; - - if (headers) { - fp->headers = *headers; - } else { - memset(&fp->headers, 0, sizeof(fp->headers)); - fp->headers.fail_on_error = 1; - } - - fp->file_size = -1; - fp->buffer.ptr.rd = NULL; - fp->buffer.len = 0; - fp->final_result = (CURLcode) -1; - fp->paused = fp->closing = fp->finished = fp->perform_again = 0; - fp->can_seek = 1; - fp->tried_seek = 0; - fp->delayed_seek = fp->last_offset = -1; - fp->preserved = NULL; - fp->preserved_bytes = fp->preserved_size = 0; - fp->is_recursive = is_recursive; - fp->nrunning = 0; - fp->easy = NULL; - - fp->multi = curl_multi_init(); - if (fp->multi == NULL) { errno = ENOMEM; goto error; } - - fp->easy = curl_easy_init(); - if (fp->easy == NULL) { errno = ENOMEM; goto error; } - - // Make a route to the hFILE_libcurl* given just a CURL* easy handle - err = curl_easy_setopt(fp->easy, CURLOPT_PRIVATE, fp); - - // Avoid many repeated CWD calls with FTP, instead requesting the filename - // by full path (but not strictly compliant with RFC1738). - err |= curl_easy_setopt(fp->easy, CURLOPT_FTP_FILEMETHOD, CURLFTPMETHOD_NOCWD); - - if (mode == 'r') { - err |= curl_easy_setopt(fp->easy, CURLOPT_WRITEFUNCTION, recv_callback); - err |= curl_easy_setopt(fp->easy, CURLOPT_WRITEDATA, fp); - fp->is_read = 1; - } - else { - err |= curl_easy_setopt(fp->easy, CURLOPT_READFUNCTION, send_callback); - err |= curl_easy_setopt(fp->easy, CURLOPT_READDATA, fp); - err |= curl_easy_setopt(fp->easy, CURLOPT_UPLOAD, 1L); - if (append_header(&fp->headers.fixed, - "Transfer-Encoding: chunked", 1) < 0) - goto error; - fp->is_read = 0; - } - - err |= curl_easy_setopt(fp->easy, CURLOPT_SHARE, curl.share); - err |= curl_easy_setopt(fp->easy, CURLOPT_URL, url); - { - char* env_curl_ca_bundle = getenv("CURL_CA_BUNDLE"); - if (env_curl_ca_bundle) { - err |= curl_easy_setopt(fp->easy, CURLOPT_CAINFO, env_curl_ca_bundle); - } - } - err |= curl_easy_setopt(fp->easy, CURLOPT_USERAGENT, curl.useragent.s); - if (fp->headers.callback) { - if (add_callback_headers(fp) != 0) goto error; - } - if (get_auth_token(fp, url) < 0) - goto error; - if ((list = get_header_list(fp)) != NULL) - err |= curl_easy_setopt(fp->easy, CURLOPT_HTTPHEADER, list); - - if (hts_verbose <= 8 && fp->headers.fail_on_error) - err |= curl_easy_setopt(fp->easy, CURLOPT_FAILONERROR, 1L); - if (hts_verbose >= 8) - err |= curl_easy_setopt(fp->easy, CURLOPT_VERBOSE, 1L); - - if (fp->headers.redirect) { - err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERFUNCTION, header_callback); - err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERDATA, (void *)&in_header); - } else { - err |= curl_easy_setopt(fp->easy, CURLOPT_FOLLOWLOCATION, 1L); - } - - if (err != 0) { errno = ENOSYS; goto error; } - - errm = curl_multi_add_handle(fp->multi, fp->easy); - if (errm != CURLM_OK) { errno = multi_errno(errm); goto error; } - fp->nrunning++; - - while (! fp->paused && ! fp->finished) { - if (wait_perform(fp) < 0) goto error_remove; - } - - curl_easy_getinfo(fp->easy, CURLINFO_RESPONSE_CODE, &response); - if (fp->headers.http_response_ptr) { - *fp->headers.http_response_ptr = response; - } - - if (fp->finished && fp->final_result != CURLE_OK) { - errno = easy_errno(fp->easy, fp->final_result); - goto error_remove; - } - - if (fp->headers.redirect) { - if (response >= 300 && response < 400) { // redirection - kstring_t new_url = {0, 0, NULL}; - - if (fp->headers.redirect(fp->headers.redirect_data, response, - &in_header, &new_url)) { - errno = ENOSYS; - goto error; - } - - err |= curl_easy_setopt(fp->easy, CURLOPT_URL, new_url.s); - err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERFUNCTION, NULL); - err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERDATA, NULL); - free(ks_release(&in_header)); - - if (err != 0) { errno = ENOSYS; goto error; } - free(ks_release(&new_url)); - - if (restart_from_position(fp, 0) < 0) { - goto error_remove; - } - - if (fp->headers.http_response_ptr) { - curl_easy_getinfo(fp->easy, CURLINFO_RESPONSE_CODE, - fp->headers.http_response_ptr); - } - - if (fp->finished && fp->final_result != CURLE_OK) { - errno = easy_errno(fp->easy, fp->final_result); - goto error_remove; - } - } else { - // we no longer need to look at the headers - err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERFUNCTION, NULL); - err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERDATA, NULL); - free(ks_release(&in_header)); - - if (err != 0) { errno = ENOSYS; goto error; } - } - } - - if (mode == 'r') { -#if LIBCURL_VERSION_NUM >= 0x073700 // 7.55.0 - curl_off_t offset; - - if (curl_easy_getinfo(fp->easy, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, - &offset) == CURLE_OK && offset > 0) - fp->file_size = (off_t) offset; -#else - double dval; - - if (curl_easy_getinfo(fp->easy, CURLINFO_CONTENT_LENGTH_DOWNLOAD, - &dval) == CURLE_OK && dval >= 0.0) - fp->file_size = (off_t) (dval + 0.1); -#endif - } - fp->base.backend = &libcurl_backend; - return &fp->base; - -error_remove: - save = errno; - (void) curl_multi_remove_handle(fp->multi, fp->easy); - fp->nrunning--; - errno = save; - -error: - if (fp->headers.redirect) free(in_header.s); - save = errno; - if (fp->easy) curl_easy_cleanup(fp->easy); - if (fp->multi) curl_multi_cleanup(fp->multi); - free_headers(&fp->headers.extra, 1); - hfile_destroy((hFILE *) fp); - errno = save; - return NULL; - -early_error: - return NULL; -} - -static hFILE *hopen_libcurl(const char *url, const char *modes) -{ - return libcurl_open(url, modes, NULL); -} - -static int parse_va_list(http_headers *headers, va_list args) -{ - const char *argtype; - - while ((argtype = va_arg(args, const char *)) != NULL) - if (strcmp(argtype, "httphdr:v") == 0) { - const char **hdr; - for (hdr = va_arg(args, const char **); *hdr; hdr++) { - if (append_header(&headers->fixed, *hdr, 1) < 0) - return -1; - if (is_authorization(*hdr)) - headers->auth_hdr_num = -1; - } - } - else if (strcmp(argtype, "httphdr:l") == 0) { - const char *hdr; - while ((hdr = va_arg(args, const char *)) != NULL) { - if (append_header(&headers->fixed, hdr, 1) < 0) - return -1; - if (is_authorization(hdr)) - headers->auth_hdr_num = -1; - } - } - else if (strcmp(argtype, "httphdr") == 0) { - const char *hdr = va_arg(args, const char *); - if (hdr) { - if (append_header(&headers->fixed, hdr, 1) < 0) - return -1; - if (is_authorization(hdr)) - headers->auth_hdr_num = -1; - } - } - else if (strcmp(argtype, "httphdr_callback") == 0) { - headers->callback = va_arg(args, const hts_httphdr_callback); - } - else if (strcmp(argtype, "httphdr_callback_data") == 0) { - headers->callback_data = va_arg(args, void *); - } - else if (strcmp(argtype, "va_list") == 0) { - va_list *args2 = va_arg(args, va_list *); - if (args2) { - if (parse_va_list(headers, *args2) < 0) return -1; - } - } - else if (strcmp(argtype, "auth_token_enabled") == 0) { - const char *flag = va_arg(args, const char *); - if (strcmp(flag, "false") == 0) - headers->auth_hdr_num = -3; - } - else if (strcmp(argtype, "redirect_callback") == 0) { - headers->redirect = va_arg(args, const redirect_callback); - } - else if (strcmp(argtype, "redirect_callback_data") == 0) { - headers->redirect_data = va_arg(args, void *); - } - else if (strcmp(argtype, "http_response_ptr") == 0) { - headers->http_response_ptr = va_arg(args, long *); - } - else if (strcmp(argtype, "fail_on_error") == 0) { - headers->fail_on_error = va_arg(args, int); - } - else { errno = EINVAL; return -1; } - - return 0; -} - -/* - HTTP headers to be added to the request can be passed in as extra - arguments to hopen(). The headers can be specified as follows: - - * Single header: - hopen(url, mode, "httphdr", "X-Hdr-1: text", NULL); - - * Multiple headers in the argument list: - hopen(url, mode, "httphdr:l", "X-Hdr-1: text", "X-Hdr-2: text", NULL, NULL); - - * Multiple headers in a char* array: - hopen(url, mode, "httphdr:v", hdrs, NULL); - where `hdrs` is a char **. The list ends with a NULL pointer. - - * A callback function - hopen(url, mode, "httphdr_callback", func, - "httphdr_callback_data", arg, NULL); - `func` has type - int (* hts_httphdr_callback) (void *cb_data, char ***hdrs); - `arg` is passed to the callback as a void *. - - The function is called at file open, and when attempting to seek (which - opens a new HTTP request). This allows, for example, access tokens - that may have gone stale to be regenerated. The function is also - called (with `hdrs` == NULL) on file close so that the callback can - free any memory that it needs to. - - The callback should return 0 on success, non-zero on failure. It should - return in *hdrs a list of strings containing the new headers (terminated - with a NULL pointer). These will replace any headers previously supplied - by the callback. If no changes are necessary, it can return NULL - in *hdrs, in which case the previous headers will be left unchanged. - - Ownership of the strings in the header list passes to hfile_libcurl, - so the callback should not attempt to use or free them itself. The memory - containing the array belongs to the callback and will not be freed by - hfile_libcurl. - - Headers supplied by the callback are appended after any specified - using the "httphdr", "httphdr:l" or "httphdr:v" methods. No attempt - is made to replace these headers (even if a key is repeated) so anything - that is expected to vary needs to come from the callback. - */ - -static hFILE *vhopen_libcurl(const char *url, const char *modes, va_list args) -{ - hFILE *fp = NULL; - http_headers headers = { .fail_on_error = 1 }; - - if (parse_va_list(&headers, args) == 0) { - fp = libcurl_open(url, modes, &headers); - } - - if (!fp) { - free_headers(&headers.fixed, 1); - } - return fp; -} - -int PLUGIN_GLOBAL(hfile_plugin_init,_libcurl)(struct hFILE_plugin *self) -{ - static const struct hFILE_scheme_handler handler = - { hopen_libcurl, hfile_always_remote, "libcurl", - 2000 + 50, - vhopen_libcurl }; - -#ifdef ENABLE_PLUGINS - // Embed version string for examination via strings(1) or what(1) - static const char id[] = - "@(#)hfile_libcurl plugin (htslib)\t" HTS_VERSION_TEXT; - const char *version = strchr(id, '\t')+1; -#else - const char *version = hts_version(); -#endif - const curl_version_info_data *info; - const char * const *protocol; - const char *auth; - CURLcode err; - CURLSHcode errsh; - - err = curl_global_init(CURL_GLOBAL_ALL); - if (err != CURLE_OK) { errno = easy_errno(NULL, err); return -1; } - - curl.share = curl_share_init(); - if (curl.share == NULL) { curl_global_cleanup(); errno = EIO; return -1; } - errsh = curl_share_setopt(curl.share, CURLSHOPT_LOCKFUNC, share_lock); - errsh |= curl_share_setopt(curl.share, CURLSHOPT_UNLOCKFUNC, share_unlock); - errsh |= curl_share_setopt(curl.share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS); - if (errsh != 0) { - curl_share_cleanup(curl.share); - curl_global_cleanup(); - errno = EIO; - return -1; - } - - if ((auth = getenv("HTS_AUTH_LOCATION")) != NULL) { - curl.auth_path = strdup(auth); - curl.auth_map = kh_init(auth_map); - if (!curl.auth_path || !curl.auth_map) { - int save_errno = errno; - free(curl.auth_path); - kh_destroy(auth_map, curl.auth_map); - curl_share_cleanup(curl.share); - curl_global_cleanup(); - errno = save_errno; - return -1; - } - } - if ((auth = getenv("HTS_ALLOW_UNENCRYPTED_AUTHORIZATION_HEADER")) != NULL - && strcmp(auth, "I understand the risks") == 0) { - curl.allow_unencrypted_auth_header = 1; - } - - info = curl_version_info(CURLVERSION_NOW); - ksprintf(&curl.useragent, "htslib/%s libcurl/%s", version, info->version); - - self->name = "libcurl"; - self->destroy = libcurl_exit; - - for (protocol = info->protocols; *protocol; protocol++) - hfile_add_scheme_handler(*protocol, &handler); - return 0; -} diff --git a/src/htslib-1.18/hfile_s3.c b/src/htslib-1.18/hfile_s3.c deleted file mode 100644 index e2718f6..0000000 --- a/src/htslib-1.18/hfile_s3.c +++ /dev/null @@ -1,1408 +0,0 @@ -/* hfile_s3.c -- Amazon S3 backend for low-level file streams. - - Copyright (C) 2015-2017, 2019-2023 Genome Research Ltd. - - Author: John Marshall - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include - -#include - -#include "hfile_internal.h" -#ifdef ENABLE_PLUGINS -#include "version.h" -#endif -#include "htslib/hts.h" // for hts_version() and hts_verbose -#include "htslib/kstring.h" -#include "hts_time_funcs.h" - -typedef struct s3_auth_data { - kstring_t id; - kstring_t token; - kstring_t secret; - kstring_t region; - kstring_t canonical_query_string; - kstring_t user_query_string; - kstring_t host; - kstring_t profile; - time_t creds_expiry_time; - char *bucket; - kstring_t auth_hdr; - time_t auth_time; - char date[40]; - char date_long[17]; - char date_short[9]; - kstring_t date_html; - char mode; - char *headers[5]; - int refcount; -} s3_auth_data; - -#define AUTH_LIFETIME 60 // Regenerate auth headers if older than this -#define CREDENTIAL_LIFETIME 60 // Seconds before expiry to reread credentials - -#if defined HAVE_COMMONCRYPTO - -#include - -#define DIGEST_BUFSIZ CC_SHA1_DIGEST_LENGTH -#define SHA256_DIGEST_BUFSIZE CC_SHA256_DIGEST_LENGTH -#define HASH_LENGTH_SHA256 (SHA256_DIGEST_BUFSIZE * 2) + 1 - -static size_t -s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message) -{ - CCHmac(kCCHmacAlgSHA1, key->s, key->l, message->s, message->l, digest); - return CC_SHA1_DIGEST_LENGTH; -} - - -static void s3_sha256(const unsigned char *in, size_t length, unsigned char *out) { - CC_SHA256(in, length, out); -} - - -static void s3_sign_sha256(const void *key, int key_len, const unsigned char *d, int n, unsigned char *md, unsigned int *md_len) { - CCHmac(kCCHmacAlgSHA256, key, key_len, d, n, md); - *md_len = CC_SHA256_DIGEST_LENGTH; -} - - -#elif defined HAVE_HMAC - -#include -#include - -#define DIGEST_BUFSIZ EVP_MAX_MD_SIZE -#define SHA256_DIGEST_BUFSIZE SHA256_DIGEST_LENGTH -#define HASH_LENGTH_SHA256 (SHA256_DIGEST_BUFSIZE * 2) + 1 - -static size_t -s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message) -{ - unsigned int len; - HMAC(EVP_sha1(), key->s, key->l, - (unsigned char *) message->s, message->l, digest, &len); - return len; -} - - -static void s3_sha256(const unsigned char *in, size_t length, unsigned char *out) { - SHA256(in, length, out); -} - - -static void s3_sign_sha256(const void *key, int key_len, const unsigned char *d, int n, unsigned char *md, unsigned int *md_len) { - HMAC(EVP_sha256(), key, key_len, d, n, md, md_len); -} - -#else -#error No HMAC() routine found by configure -#endif - -static void -urldecode_kput(const char *s, int len, kstring_t *str) -{ - char buf[3]; - int i = 0; - - while (i < len) - if (s[i] == '%' && i+2 < len) { - buf[0] = s[i+1], buf[1] = s[i+2], buf[2] = '\0'; - kputc(strtol(buf, NULL, 16), str); - i += 3; - } - else kputc(s[i++], str); -} - -static void base64_kput(const unsigned char *data, size_t len, kstring_t *str) -{ - static const char base64[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - - size_t i = 0; - unsigned x = 0; - int bits = 0, pad = 0; - - while (bits || i < len) { - if (bits < 6) { - x <<= 8, bits += 8; - if (i < len) x |= data[i++]; - else pad++; - } - - bits -= 6; - kputc(base64[(x >> bits) & 63], str); - } - - str->l -= pad; - kputsn("==", pad, str); -} - -static int is_dns_compliant(const char *s0, const char *slim, int is_https) -{ - int has_nondigit = 0, len = 0; - const char *s; - - for (s = s0; s < slim; len++, s++) - if (islower_c(*s)) - has_nondigit = 1; - else if (*s == '-') { - has_nondigit = 1; - if (s == s0 || s+1 == slim) return 0; - } - else if (isdigit_c(*s)) - ; - else if (*s == '.') { - if (is_https) return 0; - if (s == s0 || ! isalnum_c(s[-1])) return 0; - if (s+1 == slim || ! isalnum_c(s[1])) return 0; - } - else return 0; - - return has_nondigit && len >= 3 && len <= 63; -} - -static FILE *expand_tilde_open(const char *fname, const char *mode) -{ - FILE *fp; - - if (strncmp(fname, "~/", 2) == 0) { - kstring_t full_fname = { 0, 0, NULL }; - const char *home = getenv("HOME"); - if (! home) return NULL; - - kputs(home, &full_fname); - kputs(&fname[1], &full_fname); - - fp = fopen(full_fname.s, mode); - free(full_fname.s); - } - else - fp = fopen(fname, mode); - - return fp; -} - -static void parse_ini(const char *fname, const char *section, ...) -{ - kstring_t line = { 0, 0, NULL }; - int active = 1; // Start active, so global properties are accepted - char *s; - - FILE *fp = expand_tilde_open(fname, "r"); - if (fp == NULL) return; - - while (line.l = 0, kgetline(&line, (kgets_func *) fgets, fp) >= 0) - if (line.s[0] == '[' && (s = strchr(line.s, ']')) != NULL) { - *s = '\0'; - active = (strcmp(&line.s[1], section) == 0); - } - else if (active && (s = strpbrk(line.s, ":=")) != NULL) { - const char *key = line.s, *value = &s[1], *akey; - va_list args; - - while (isspace_c(*key)) key++; - while (s > key && isspace_c(s[-1])) s--; - *s = '\0'; - - while (isspace_c(*value)) value++; - while (line.l > 0 && isspace_c(line.s[line.l-1])) - line.s[--line.l] = '\0'; - - va_start(args, section); - while ((akey = va_arg(args, const char *)) != NULL) { - kstring_t *avar = va_arg(args, kstring_t *); - if (strcmp(key, akey) == 0) { - avar->l = 0; - kputs(value, avar); - break; } - } - va_end(args); - } - - fclose(fp); - free(line.s); -} - -static void parse_simple(const char *fname, kstring_t *id, kstring_t *secret) -{ - kstring_t text = { 0, 0, NULL }; - char *s; - size_t len; - - FILE *fp = expand_tilde_open(fname, "r"); - if (fp == NULL) return; - - while (kgetline(&text, (kgets_func *) fgets, fp) >= 0) - kputc(' ', &text); - fclose(fp); - - s = text.s; - while (isspace_c(*s)) s++; - kputsn(s, len = strcspn(s, " \t"), id); - - s += len; - while (isspace_c(*s)) s++; - kputsn(s, strcspn(s, " \t"), secret); - - free(text.s); -} - -static int copy_auth_headers(s3_auth_data *ad, char ***hdrs) { - char **hdr = &ad->headers[0]; - int idx = 0; - *hdrs = hdr; - - hdr[idx] = strdup(ad->date); - if (!hdr[idx]) return -1; - idx++; - - if (ad->token.l) { - kstring_t token_hdr = KS_INITIALIZE; - kputs("X-Amz-Security-Token: ", &token_hdr); - kputs(ad->token.s, &token_hdr); - if (token_hdr.s) { - hdr[idx++] = token_hdr.s; - } else { - goto fail; - } - } - - if (ad->auth_hdr.l) { - hdr[idx] = strdup(ad->auth_hdr.s); - if (!hdr[idx]) goto fail; - idx++; - } - - hdr[idx] = NULL; - return 0; - - fail: - for (--idx; idx >= 0; --idx) - free(hdr[idx]); - return -1; -} - -static void free_auth_data(s3_auth_data *ad) { - if (ad->refcount > 0) { - --ad->refcount; - return; - } - free(ad->profile.s); - free(ad->id.s); - free(ad->token.s); - free(ad->secret.s); - free(ad->region.s); - free(ad->canonical_query_string.s); - free(ad->user_query_string.s); - free(ad->host.s); - free(ad->bucket); - free(ad->auth_hdr.s); - free(ad->date_html.s); - free(ad); -} - -static time_t parse_rfc3339_date(kstring_t *datetime) -{ - int offset = 0; - time_t when; - int num; - char should_be_t = '\0', timezone[10] = { '\0' }; - unsigned int year, mon, day, hour, min, sec; - - if (!datetime->s) - return 0; - - // It should be possible to do this with strptime(), but it seems - // to not get on with our feature definitions. - num = sscanf(datetime->s, "%4u-%2u-%2u%c%2u:%2u:%2u%9s", - &year, &mon, &day, &should_be_t, &hour, &min, &sec, timezone); - if (num < 8) - return 0; - if (should_be_t != 'T' && should_be_t != 't' && should_be_t != ' ') - return 0; - struct tm parsed = { sec, min, hour, day, mon - 1, year - 1900, 0, 0, 0 }; - - switch (timezone[0]) { - case 'Z': - case 'z': - case '\0': - break; - case '+': - case '-': { - unsigned hr_off, min_off; - if (sscanf(timezone + 1, "%2u:%2u", &hr_off, &min_off)) { - if (hr_off < 24 && min_off <= 60) { - offset = ((hr_off * 60 + min_off) - * (timezone[0] == '+' ? -60 : 60)); - } - } - break; - } - default: - return 0; - } - - when = hts_time_gm(&parsed); - return when >= 0 ? when + offset : 0; -} - -static void refresh_auth_data(s3_auth_data *ad) { - // Basically a copy of the AWS_SHARED_CREDENTIALS_FILE part of - // setup_auth_data(), but this only reads the authorisation parts. - const char *v = getenv("AWS_SHARED_CREDENTIALS_FILE"); - kstring_t expiry_time = KS_INITIALIZE; - parse_ini(v? v : "~/.aws/credentials", ad->profile.s, - "aws_access_key_id", &ad->id, - "aws_secret_access_key", &ad->secret, - "aws_session_token", &ad->token, - "expiry_time", &expiry_time); - if (expiry_time.l) { - ad->creds_expiry_time = parse_rfc3339_date(&expiry_time); - } - ks_free(&expiry_time); -} - -static int auth_header_callback(void *ctx, char ***hdrs) { - s3_auth_data *ad = (s3_auth_data *) ctx; - - time_t now = time(NULL); -#ifdef HAVE_GMTIME_R - struct tm tm_buffer; - struct tm *tm = gmtime_r(&now, &tm_buffer); -#else - struct tm *tm = gmtime(&now); -#endif - kstring_t message = { 0, 0, NULL }; - unsigned char digest[DIGEST_BUFSIZ]; - size_t digest_len; - - if (!hdrs) { // Closing connection - free_auth_data(ad); - return 0; - } - - if (ad->creds_expiry_time > 0 - && ad->creds_expiry_time - now < CREDENTIAL_LIFETIME) { - refresh_auth_data(ad); - } else if (now - ad->auth_time < AUTH_LIFETIME) { - // Last auth string should still be valid - *hdrs = NULL; - return 0; - } - - strftime(ad->date, sizeof(ad->date), "Date: %a, %d %b %Y %H:%M:%S GMT", tm); - if (!ad->id.l || !ad->secret.l) { - ad->auth_time = now; - return copy_auth_headers(ad, hdrs); - } - - if (ksprintf(&message, "%s\n\n\n%s\n%s%s%s%s", - ad->mode == 'r' ? "GET" : "PUT", ad->date + 6, - ad->token.l ? "x-amz-security-token:" : "", - ad->token.l ? ad->token.s : "", - ad->token.l ? "\n" : "", - ad->bucket) < 0) { - return -1; - } - - digest_len = s3_sign(digest, &ad->secret, &message); - ad->auth_hdr.l = 0; - if (ksprintf(&ad->auth_hdr, "Authorization: AWS %s:", ad->id.s) < 0) - goto fail; - base64_kput(digest, digest_len, &ad->auth_hdr); - - free(message.s); - ad->auth_time = now; - return copy_auth_headers(ad, hdrs); - - fail: - free(message.s); - return -1; -} - - -/* like a escape path but for query strings '=' and '&' are untouched */ -static char *escape_query(const char *qs) { - size_t i, j = 0, length, alloced; - char *escaped; - - length = strlen(qs); - alloced = length * 3 + 1; - if ((escaped = malloc(alloced)) == NULL) { - return NULL; - } - - for (i = 0; i < length; i++) { - int c = qs[i]; - - if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || - c == '_' || c == '-' || c == '~' || c == '.' || c == '/' || c == '=' || c == '&') { - escaped[j++] = c; - } else { - snprintf(escaped + j, alloced - j, "%%%02X", c); - j += 3; - } - } - - escaped[j] = '\0'; - - return escaped; -} - - -static char *escape_path(const char *path) { - size_t i, j = 0, length, alloced; - char *escaped; - - length = strlen(path); - alloced = length * 3 + 1; - - if ((escaped = malloc(alloced)) == NULL) { - return NULL; - } - - for (i = 0; i < length; i++) { - int c = path[i]; - - if (c == '?') break; // don't escape ? or beyond - - if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || - c == '_' || c == '-' || c == '~' || c == '.' || c == '/') { - escaped[j++] = c; - } else { - snprintf(escaped + j, alloced - j, "%%%02X", c); - j += 3; - } - } - - if (i != length) { - // in the case of a '?' copy the rest of the path across unchanged - strcpy(escaped + j, path + i); - } else { - escaped[j] = '\0'; - } - - return escaped; -} - - -static int is_escaped(const char *str) { - const char *c = str; - int escaped = 0; - int needs_escape = 0; - - while (*c != '\0') { - if (*c == '%' && c[1] != '\0' && c[2] != '\0') { - if (isxdigit_c(c[1]) && isxdigit_c(c[2])) { - escaped = 1; - c += 3; - continue; - } else { - // only escaped if all % signs are escaped - escaped = 0; - } - } - if (!((*c >= '0' && *c <= '9') || (*c >= 'A' && *c <= 'Z') - || (*c >= 'a' && *c <= 'z') || - *c == '_' || *c == '-' || *c == '~' || *c == '.' || *c == '/')) { - needs_escape = 1; - } - c++; - } - - return escaped || !needs_escape; -} - -static int redirect_endpoint_callback(void *auth, long response, - kstring_t *header, kstring_t *url) { - s3_auth_data *ad = (s3_auth_data *)auth; - char *new_region; - char *end; - int ret = -1; - - // get the new region from the reply header - if ((new_region = strstr(header->s, "x-amz-bucket-region: "))) { - - new_region += strlen("x-amz-bucket-region: "); - end = new_region; - - while (isalnum_c(*end) || ispunct_c(*end)) end++; - - *end = 0; - - if (strstr(ad->host.s, "amazonaws.com")) { - ad->region.l = 0; - kputs(new_region, &ad->region); - - ad->host.l = 0; - ksprintf(&ad->host, "s3.%s.amazonaws.com", new_region); - - if (ad->region.l && ad->host.l) { - url->l = 0; - kputs(ad->host.s, url); - kputsn(ad->bucket, strlen(ad->bucket), url); - if (ad->user_query_string.l) { - kputc('?', url); - kputsn(ad->user_query_string.s, ad->user_query_string.l, url); - } - ret = 0; - } - } - } - - return ret; -} - -static s3_auth_data * setup_auth_data(const char *s3url, const char *mode, - int sigver, kstring_t *url) -{ - s3_auth_data *ad = calloc(1, sizeof(*ad)); - const char *bucket, *path; - char *escaped = NULL; - size_t url_path_pos; - ptrdiff_t bucket_len; - int is_https = 1, dns_compliant; - char *query_start; - enum {s3_auto, s3_virtual, s3_path} address_style = s3_auto; - - if (!ad) - return NULL; - ad->mode = strchr(mode, 'r') ? 'r' : 'w'; - - // Our S3 URL format is s3[+SCHEME]://[ID[:SECRET[:TOKEN]]@]BUCKET/PATH - - if (s3url[2] == '+') { - bucket = strchr(s3url, ':') + 1; - if (bucket == NULL) { - free(ad); - return NULL; - } - kputsn(&s3url[3], bucket - &s3url[3], url); - is_https = strncmp(url->s, "https:", 6) == 0; - } - else { - kputs("https:", url); - bucket = &s3url[3]; - } - while (*bucket == '/') kputc(*bucket++, url); - - path = bucket + strcspn(bucket, "/?#@"); - - if (*path == '@') { - const char *colon = strpbrk(bucket, ":@"); - if (*colon != ':') { - urldecode_kput(bucket, colon - bucket, &ad->profile); - } - else { - const char *colon2 = strpbrk(&colon[1], ":@"); - urldecode_kput(bucket, colon - bucket, &ad->id); - urldecode_kput(&colon[1], colon2 - &colon[1], &ad->secret); - if (*colon2 == ':') - urldecode_kput(&colon2[1], path - &colon2[1], &ad->token); - } - - bucket = &path[1]; - path = bucket + strcspn(bucket, "/?#"); - } - else { - // If the URL has no ID[:SECRET]@, consider environment variables. - const char *v; - if ((v = getenv("AWS_ACCESS_KEY_ID")) != NULL) kputs(v, &ad->id); - if ((v = getenv("AWS_SECRET_ACCESS_KEY")) != NULL) kputs(v, &ad->secret); - if ((v = getenv("AWS_SESSION_TOKEN")) != NULL) kputs(v, &ad->token); - if ((v = getenv("AWS_DEFAULT_REGION")) != NULL) kputs(v, &ad->region); - if ((v = getenv("HTS_S3_HOST")) != NULL) kputs(v, &ad->host); - - if ((v = getenv("AWS_DEFAULT_PROFILE")) != NULL) kputs(v, &ad->profile); - else if ((v = getenv("AWS_PROFILE")) != NULL) kputs(v, &ad->profile); - else kputs("default", &ad->profile); - - if ((v = getenv("HTS_S3_ADDRESS_STYLE")) != NULL) { - if (strcasecmp(v, "virtual") == 0) { - address_style = s3_virtual; - } else if (strcasecmp(v, "path") == 0) { - address_style = s3_path; - } - } - } - - if (ad->id.l == 0) { - kstring_t url_style = KS_INITIALIZE; - kstring_t expiry_time = KS_INITIALIZE; - const char *v = getenv("AWS_SHARED_CREDENTIALS_FILE"); - parse_ini(v? v : "~/.aws/credentials", ad->profile.s, - "aws_access_key_id", &ad->id, - "aws_secret_access_key", &ad->secret, - "aws_session_token", &ad->token, - "region", &ad->region, - "addressing_style", &url_style, - "expiry_time", &expiry_time, - NULL); - - if (url_style.l) { - if (strcmp(url_style.s, "virtual") == 0) { - address_style = s3_virtual; - } else if (strcmp(url_style.s, "path") == 0) { - address_style = s3_path; - } else { - address_style = s3_auto; - } - } - if (expiry_time.l) { - // Not a real part of the AWS configuration file, but it allows - // support for short-term credentials like those for the IAM - // service. The botocore library uses the key "expiry_time" - // internally for this purpose. - // See https://github.com/boto/botocore/blob/develop/botocore/credentials.py - ad->creds_expiry_time = parse_rfc3339_date(&expiry_time); - } - - ks_free(&url_style); - ks_free(&expiry_time); - } - - if (ad->id.l == 0) { - kstring_t url_style = KS_INITIALIZE; - const char *v = getenv("HTS_S3_S3CFG"); - parse_ini(v? v : "~/.s3cfg", ad->profile.s, "access_key", &ad->id, - "secret_key", &ad->secret, "access_token", &ad->token, - "host_base", &ad->host, - "bucket_location", &ad->region, - "host_bucket", &url_style, - NULL); - - if (url_style.l) { - // Conforming to s3cmd's GitHub PR#416, host_bucket without the "%(bucket)s" string - // indicates use of path style adressing. - if (strstr(url_style.s, "%(bucket)s") == NULL) { - address_style = s3_path; - } else { - address_style = s3_auto; - } - } - - ks_free(&url_style); - } - - if (ad->id.l == 0) - parse_simple("~/.awssecret", &ad->id, &ad->secret); - - - // if address_style is set, force the dns_compliant setting - if (address_style == s3_virtual) { - dns_compliant = 1; - } else if (address_style == s3_path) { - dns_compliant = 0; - } else { - dns_compliant = is_dns_compliant(bucket, path, is_https); - } - - if (ad->host.l == 0) - kputs("s3.amazonaws.com", &ad->host); - - if (!dns_compliant && ad->region.l > 0 - && strcmp(ad->host.s, "s3.amazonaws.com") == 0) { - // Can avoid a redirection by including the region in the host name - // (assuming the right one has been specified) - ad->host.l = 0; - ksprintf(&ad->host, "s3.%s.amazonaws.com", ad->region.s); - } - - if (ad->region.l == 0) - kputs("us-east-1", &ad->region); - - if (!is_escaped(path)) { - escaped = escape_path(path); - if (escaped == NULL) { - goto error; - } - } - - bucket_len = path - bucket; - - // Use virtual hosted-style access if possible, otherwise path-style. - if (dns_compliant) { - size_t url_host_pos = url->l; - // Append "bucket.host" to url - kputsn_(bucket, bucket_len, url); - kputc('.', url); - kputsn(ad->host.s, ad->host.l, url); - url_path_pos = url->l; - - if (sigver == 4) { - // Copy back to ad->host to use when making the signature - ad->host.l = 0; - kputsn(url->s + url_host_pos, url->l - url_host_pos, &ad->host); - } - } - else { - // Append "host/bucket" to url - kputsn(ad->host.s, ad->host.l, url); - url_path_pos = url->l; - kputc('/', url); - kputsn(bucket, bucket_len, url); - } - - kputs(escaped == NULL ? path : escaped, url); - - if (sigver == 4 || !dns_compliant) { - ad->bucket = malloc(url->l - url_path_pos + 1); - if (ad->bucket == NULL) { - goto error; - } - memcpy(ad->bucket, url->s + url_path_pos, url->l - url_path_pos + 1); - } - else { - ad->bucket = malloc(url->l - url_path_pos + bucket_len + 2); - if (ad->bucket == NULL) { - goto error; - } - ad->bucket[0] = '/'; - memcpy(ad->bucket + 1, bucket, bucket_len); - memcpy(ad->bucket + bucket_len + 1, - url->s + url_path_pos, url->l - url_path_pos + 1); - } - - // write any query strings to its own place to use later - if ((query_start = strchr(ad->bucket, '?'))) { - kputs(query_start + 1, &ad->user_query_string); - *query_start = 0; - } - - free(escaped); - - return ad; - - error: - free(escaped); - free_auth_data(ad); - return NULL; -} - -static hFILE * s3_rewrite(const char *s3url, const char *mode, va_list *argsp) -{ - kstring_t url = { 0, 0, NULL }; - s3_auth_data *ad = setup_auth_data(s3url, mode, 2, &url); - - if (!ad) - return NULL; - - hFILE *fp = hopen(url.s, mode, "va_list", argsp, - "httphdr_callback", auth_header_callback, - "httphdr_callback_data", ad, - "redirect_callback", redirect_endpoint_callback, - "redirect_callback_data", ad, - NULL); - if (!fp) goto fail; - - free(url.s); - return fp; - - fail: - free(url.s); - free_auth_data(ad); - return NULL; -} - -/*************************************************************** - -AWS S3 sig version 4 writing code - -****************************************************************/ - -static void hash_string(char *in, size_t length, char *out, size_t out_len) { - unsigned char hashed[SHA256_DIGEST_BUFSIZE]; - int i, j; - - s3_sha256((const unsigned char *)in, length, hashed); - - for (i = 0, j = 0; i < SHA256_DIGEST_BUFSIZE; i++, j+= 2) { - snprintf(out + j, out_len - j, "%02x", hashed[i]); - } -} - -static void ksinit(kstring_t *s) { - s->l = 0; - s->m = 0; - s->s = NULL; -} - - -static void ksfree(kstring_t *s) { - free(s->s); - ksinit(s); -} - - -static int make_signature(s3_auth_data *ad, kstring_t *string_to_sign, char *signature_string, size_t sig_string_len) { - unsigned char date_key[SHA256_DIGEST_BUFSIZE]; - unsigned char date_region_key[SHA256_DIGEST_BUFSIZE]; - unsigned char date_region_service_key[SHA256_DIGEST_BUFSIZE]; - unsigned char signing_key[SHA256_DIGEST_BUFSIZE]; - unsigned char signature[SHA256_DIGEST_BUFSIZE]; - - const unsigned char service[] = "s3"; - const unsigned char request[] = "aws4_request"; - - kstring_t secret_access_key = {0, 0, NULL}; - unsigned int len; - unsigned int i, j; - - ksprintf(&secret_access_key, "AWS4%s", ad->secret.s); - - if (secret_access_key.l == 0) { - return -1; - } - - s3_sign_sha256(secret_access_key.s, secret_access_key.l, (const unsigned char *)ad->date_short, strlen(ad->date_short), date_key, &len); - s3_sign_sha256(date_key, len, (const unsigned char *)ad->region.s, ad->region.l, date_region_key, &len); - s3_sign_sha256(date_region_key, len, service, 2, date_region_service_key, &len); - s3_sign_sha256(date_region_service_key, len, request, 12, signing_key, &len); - s3_sign_sha256(signing_key, len, (const unsigned char *)string_to_sign->s, string_to_sign->l, signature, &len); - - for (i = 0, j = 0; i < len; i++, j+= 2) { - snprintf(signature_string + j, sig_string_len - j, "%02x", signature[i]); - } - - ksfree(&secret_access_key); - - return 0; -} - - -static int make_authorisation(s3_auth_data *ad, char *http_request, char *content, kstring_t *auth) { - kstring_t signed_headers = {0, 0, NULL}; - kstring_t canonical_headers = {0, 0, NULL}; - kstring_t canonical_request = {0, 0, NULL}; - kstring_t scope = {0, 0, NULL}; - kstring_t string_to_sign = {0, 0, NULL}; - char cr_hash[HASH_LENGTH_SHA256]; - char signature_string[HASH_LENGTH_SHA256]; - int ret = -1; - - - if (!ad->token.l) { - kputs("host;x-amz-content-sha256;x-amz-date", &signed_headers); - } else { - kputs("host;x-amz-content-sha256;x-amz-date;x-amz-security-token", &signed_headers); - } - - if (signed_headers.l == 0) { - return -1; - } - - - if (!ad->token.l) { - ksprintf(&canonical_headers, "host:%s\nx-amz-content-sha256:%s\nx-amz-date:%s\n", - ad->host.s, content, ad->date_long); - } else { - ksprintf(&canonical_headers, "host:%s\nx-amz-content-sha256:%s\nx-amz-date:%s\nx-amz-security-token:%s\n", - ad->host.s, content, ad->date_long, ad->token.s); - } - - if (canonical_headers.l == 0) { - goto cleanup; - } - - // bucket == canonical_uri - ksprintf(&canonical_request, "%s\n%s\n%s\n%s\n%s\n%s", - http_request, ad->bucket, ad->canonical_query_string.s, - canonical_headers.s, signed_headers.s, content); - - if (canonical_request.l == 0) { - goto cleanup; - } - - hash_string(canonical_request.s, canonical_request.l, cr_hash, sizeof(cr_hash)); - - ksprintf(&scope, "%s/%s/s3/aws4_request", ad->date_short, ad->region.s); - - if (scope.l == 0) { - goto cleanup; - } - - ksprintf(&string_to_sign, "AWS4-HMAC-SHA256\n%s\n%s\n%s", ad->date_long, scope.s, cr_hash); - - if (string_to_sign.l == 0) { - goto cleanup; - } - - if (make_signature(ad, &string_to_sign, signature_string, sizeof(signature_string))) { - goto cleanup; - } - - ksprintf(auth, "Authorization: AWS4-HMAC-SHA256 Credential=%s/%s/%s/s3/aws4_request,SignedHeaders=%s,Signature=%s", - ad->id.s, ad->date_short, ad->region.s, signed_headers.s, signature_string); - - if (auth->l == 0) { - goto cleanup; - } - - ret = 0; - - cleanup: - ksfree(&signed_headers); - ksfree(&canonical_headers); - ksfree(&canonical_request); - ksfree(&scope); - ksfree(&string_to_sign); - - return ret; -} - - -static int update_time(s3_auth_data *ad, time_t now) { - int ret = -1; -#ifdef HAVE_GMTIME_R - struct tm tm_buffer; - struct tm *tm = gmtime_r(&now, &tm_buffer); -#else - struct tm *tm = gmtime(&now); -#endif - - if (now - ad->auth_time > AUTH_LIFETIME) { - // update timestamp - ad->auth_time = now; - - if (strftime(ad->date_long, 17, "%Y%m%dT%H%M%SZ", tm) != 16) { - return -1; - } - - if (strftime(ad->date_short, 9, "%Y%m%d", tm) != 8) { - return -1;; - } - - ad->date_html.l = 0; - ksprintf(&ad->date_html, "x-amz-date: %s", ad->date_long); - } - - if (ad->date_html.l) ret = 0; - - return ret; -} - - -static int query_cmp(const void *p1, const void *p2) { - char **q1 = (char **)p1; - char **q2 = (char **)p2; - - return strcmp(*q1, *q2); -} - - -/* Query strings must be in alphabetical order for authorisation */ - -static int order_query_string(kstring_t *qs) { - int *query_offset = NULL; - int num_queries, i; - char **queries = NULL; - kstring_t ordered = {0, 0, NULL}; - char *escaped = NULL; - int ret = -1; - - if ((query_offset = ksplit(qs, '&', &num_queries)) == NULL) { - return -1; - } - - if ((queries = malloc(num_queries * sizeof(char*))) == NULL) - goto err; - - for (i = 0; i < num_queries; i++) { - queries[i] = qs->s + query_offset[i]; - } - - qsort(queries, num_queries, sizeof(char *), query_cmp); - - for (i = 0; i < num_queries; i++) { - if (i) { - kputs("&", &ordered); - } - - kputs(queries[i], &ordered); - } - - if ((escaped = escape_query(ordered.s)) == NULL) - goto err; - - qs->l = 0; - kputs(escaped, qs); - - ret = 0; - err: - free(ordered.s); - free(queries); - free(query_offset); - free(escaped); - - return ret; -} - - -static int write_authorisation_callback(void *auth, char *request, kstring_t *content, char *cqs, - kstring_t *hash, kstring_t *auth_str, kstring_t *date, - kstring_t *token, int uqs) { - s3_auth_data *ad = (s3_auth_data *)auth; - char content_hash[HASH_LENGTH_SHA256]; - time_t now; - - if (request == NULL) { - // signal to free auth data - free_auth_data(ad); - return 0; - } - - now = time(NULL); - - if (update_time(ad, now)) { - return -1; - } - if (ad->creds_expiry_time > 0 - && ad->creds_expiry_time - now < CREDENTIAL_LIFETIME) { - refresh_auth_data(ad); - } - - if (content) { - hash_string(content->s, content->l, content_hash, sizeof(content_hash)); - } else { - // empty hash - hash_string("", 0, content_hash, sizeof(content_hash)); - } - - ad->canonical_query_string.l = 0; - kputs(cqs, &ad->canonical_query_string); - - if (ad->canonical_query_string.l == 0) { - return -1; - } - - /* add a user provided query string, normally only useful on upload initiation */ - if (uqs) { - kputs("&", &ad->canonical_query_string); - kputs(ad->user_query_string.s, &ad->canonical_query_string); - - if (order_query_string(&ad->canonical_query_string)) { - return -1; - } - } - - if (make_authorisation(ad, request, content_hash, auth_str)) { - return -1; - } - - kputs(ad->date_html.s, date); - kputsn(content_hash, HASH_LENGTH_SHA256, hash); - - if (date->l == 0 || hash->l == 0) { - return -1; - } - - if (ad->token.l) { - ksprintf(token, "x-amz-security-token: %s", ad->token.s); - } - - return 0; -} - - -static int v4_auth_header_callback(void *ctx, char ***hdrs) { - s3_auth_data *ad = (s3_auth_data *) ctx; - char content_hash[HASH_LENGTH_SHA256]; - kstring_t content = KS_INITIALIZE; - kstring_t authorisation = KS_INITIALIZE; - kstring_t token_hdr = KS_INITIALIZE; - char *date_html = NULL; - time_t now; - int idx; - - if (!hdrs) { // Closing connection - free_auth_data(ad); - return 0; - } - - now = time(NULL); - - if (update_time(ad, now)) { - return -1; - } - - if (ad->creds_expiry_time > 0 - && ad->creds_expiry_time - now < CREDENTIAL_LIFETIME) { - refresh_auth_data(ad); - } - - if (!ad->id.l || !ad->secret.l) { - return copy_auth_headers(ad, hdrs); - } - - hash_string("", 0, content_hash, sizeof(content_hash)); // empty hash - - ad->canonical_query_string.l = 0; - - if (ad->user_query_string.l > 0) { - kputs(ad->user_query_string.s, &ad->canonical_query_string); - - if (order_query_string(&ad->canonical_query_string)) { - return -1; - } - } else { - kputs("", &ad->canonical_query_string); - } - - if (make_authorisation(ad, "GET", content_hash, &authorisation)) { - return -1; - } - - ksprintf(&content, "x-amz-content-sha256: %s", content_hash); - date_html = strdup(ad->date_html.s); - - if (ad->token.l > 0) { - kputs("X-Amz-Security-Token: ", &token_hdr); - kputs(ad->token.s, &token_hdr); - } - - if (content.l == 0 || date_html == NULL) { - ksfree(&authorisation); - ksfree(&content); - ksfree(&token_hdr); - free(date_html); - return -1; - } - - *hdrs = &ad->headers[0]; - idx = 0; - ad->headers[idx++] = ks_release(&authorisation); - ad->headers[idx++] = date_html; - ad->headers[idx++] = ks_release(&content); - if (token_hdr.s) - ad->headers[idx++] = ks_release(&token_hdr); - ad->headers[idx++] = NULL; - - return 0; -} - -static int handle_400_response(hFILE *fp, s3_auth_data *ad) { - // v4 signatures in virtual hosted mode return 400 Bad Request if the - // wrong region is used to make the signature. The response is an xml - // document which includes the name of the correct region. This can - // be extracted and used to generate a corrected signature. - // As the xml is fairly simple, go with something "good enough" instead - // of trying to parse it properly. - - char buffer[1024], *region, *reg_end; - ssize_t bytes; - - bytes = hread(fp, buffer, sizeof(buffer) - 1); - if (bytes < 0) { - return -1; - } - buffer[bytes] = '\0'; - region = strstr(buffer, ""); - if (region == NULL) { - return -1; - } - region += 8; - while (isspace((unsigned char) *region)) ++region; - reg_end = strchr(region, '<'); - if (reg_end == NULL || strncmp(reg_end + 1, "/Region>", 8) != 0) { - return -1; - } - while (reg_end > region && isspace((unsigned char) reg_end[-1])) --reg_end; - ad->region.l = 0; - kputsn(region, reg_end - region, &ad->region); - if (ad->region.l == 0) { - return -1; - } - - return 0; -} - -static int set_region(void *adv, kstring_t *region) { - s3_auth_data *ad = (s3_auth_data *) adv; - - ad->region.l = 0; - return kputsn(region->s, region->l, &ad->region) < 0; -} - -static int http_status_errno(int status) -{ - if (status >= 500) - switch (status) { - case 501: return ENOSYS; - case 503: return EBUSY; - case 504: return ETIMEDOUT; - default: return EIO; - } - else if (status >= 400) - switch (status) { - case 401: return EPERM; - case 403: return EACCES; - case 404: return ENOENT; - case 405: return EROFS; - case 407: return EPERM; - case 408: return ETIMEDOUT; - case 410: return ENOENT; - default: return EINVAL; - } - else return 0; -} - -static hFILE *s3_open_v4(const char *s3url, const char *mode, va_list *argsp) { - kstring_t url = { 0, 0, NULL }; - - s3_auth_data *ad = setup_auth_data(s3url, mode, 4, &url); - hFILE *fp = NULL; - - if (ad == NULL) { - return NULL; - } - - if (ad->mode == 'r') { - long http_response = 0; - - fp = hopen(url.s, mode, "va_list", argsp, - "httphdr_callback", v4_auth_header_callback, - "httphdr_callback_data", ad, - "redirect_callback", redirect_endpoint_callback, - "redirect_callback_data", ad, - "http_response_ptr", &http_response, - "fail_on_error", 0, - NULL); - - if (fp == NULL) goto error; - - if (http_response == 400) { - ad->refcount = 1; - if (handle_400_response(fp, ad) != 0) { - goto error; - } - hclose_abruptly(fp); - fp = hopen(url.s, mode, "va_list", argsp, - "httphdr_callback", v4_auth_header_callback, - "httphdr_callback_data", ad, - "redirect_callback", redirect_endpoint_callback, - "redirect_callback_data", ad, - NULL); - } else if (http_response > 400) { - ad->refcount = 1; - errno = http_status_errno(http_response); - goto error; - } - - if (fp == NULL) goto error; - } else { - kstring_t final_url = {0, 0, NULL}; - - // add the scheme marker - ksprintf(&final_url, "s3w+%s", url.s); - - if(final_url.l == 0) goto error; - - fp = hopen(final_url.s, mode, "va_list", argsp, - "s3_auth_callback", write_authorisation_callback, - "s3_auth_callback_data", ad, - "redirect_callback", redirect_endpoint_callback, - "set_region_callback", set_region, - NULL); - free(final_url.s); - - if (fp == NULL) goto error; - } - - free(url.s); - - return fp; - - error: - - if (fp) hclose_abruptly(fp); - free(url.s); - free_auth_data(ad); - - return NULL; -} - - -static hFILE *s3_open(const char *url, const char *mode) -{ - hFILE *fp; - - kstring_t mode_colon = { 0, 0, NULL }; - kputs(mode, &mode_colon); - kputc(':', &mode_colon); - - if (getenv("HTS_S3_V2") == NULL) { // Force the v2 signature code - fp = s3_open_v4(url, mode_colon.s, NULL); - } else { - fp = s3_rewrite(url, mode_colon.s, NULL); - } - - free(mode_colon.s); - - return fp; -} - -static hFILE *s3_vopen(const char *url, const char *mode_colon, va_list args0) -{ - hFILE *fp; - // Need to use va_copy() as we can only take the address of an actual - // va_list object, not that of a parameter whose type may have decayed. - va_list args; - va_copy(args, args0); - - if (getenv("HTS_S3_V2") == NULL) { // Force the v2 signature code - fp = s3_open_v4(url, mode_colon, &args); - } else { - fp = s3_rewrite(url, mode_colon, &args); - } - - va_end(args); - return fp; -} - -int PLUGIN_GLOBAL(hfile_plugin_init,_s3)(struct hFILE_plugin *self) -{ - static const struct hFILE_scheme_handler handler = - { s3_open, hfile_always_remote, "Amazon S3", 2000 + 50, s3_vopen - }; - -#ifdef ENABLE_PLUGINS - // Embed version string for examination via strings(1) or what(1) - static const char id[] = "@(#)hfile_s3 plugin (htslib)\t" HTS_VERSION_TEXT; - if (hts_verbose >= 9) - fprintf(stderr, "[M::hfile_s3.init] version %s\n", strchr(id, '\t')+1); -#endif - - self->name = "Amazon S3"; - hfile_add_scheme_handler("s3", &handler); - hfile_add_scheme_handler("s3+http", &handler); - hfile_add_scheme_handler("s3+https", &handler); - return 0; -} diff --git a/src/htslib-1.18/hfile_s3_write.c b/src/htslib-1.18/hfile_s3_write.c deleted file mode 100644 index d549458..0000000 --- a/src/htslib-1.18/hfile_s3_write.c +++ /dev/null @@ -1,896 +0,0 @@ -/* - hfile_s3_write.c - Code to handle multipart uploading to S3. - - Copyright (C) 2019 Genome Research Ltd. - - Author: Andrew Whitwham - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - - -S3 Multipart Upload -------------------- - -There are several steps in the Mulitipart upload. - - -1) Initiate Upload ------------------- - -Initiate the upload and get an upload ID. This ID is used in all other steps. - - -2) Upload Part --------------- - -Upload a part of the data. 5Mb minimum part size (except for the last part). -Each part is numbered and a successful upload returns an Etag header value that -needs to used for the completion step. - -Step repeated till all data is uploaded. - - -3) Completion -------------- - -Complete the upload by sending all the part numbers along with their associated -Etag values. - - -Optional - Abort ----------------- - -If something goes wrong this instructs the server to delete all the partial -uploads and abandon the upload process. - - -Andrew Whitwham, January 2019 -*/ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#ifdef __MSYS__ -#include -#endif -#include -#include - -#include "hfile_internal.h" -#ifdef ENABLE_PLUGINS -#include "version.h" -#endif -#include "htslib/hts.h" -#include "htslib/kstring.h" -#include "htslib/khash.h" - -#include - -#define MINIMUM_S3_WRITE_SIZE 5242880 -#define S3_MOVED_PERMANENTLY 301 -#define S3_BAD_REQUEST 400 - -// Lets the part memory size grow to about 1Gb giving a 2.5Tb max file size. -// Max. parts allowed by AWS is 10000, so use ceil(10000.0/9.0) -#define EXPAND_ON 1112 - -static struct { - kstring_t useragent; - CURLSH *share; - pthread_mutex_t share_lock; -} curl = { { 0, 0, NULL }, NULL, PTHREAD_MUTEX_INITIALIZER }; - -static void share_lock(CURL *handle, curl_lock_data data, - curl_lock_access access, void *userptr) { - pthread_mutex_lock(&curl.share_lock); -} - -static void share_unlock(CURL *handle, curl_lock_data data, void *userptr) { - pthread_mutex_unlock(&curl.share_lock); -} - -typedef int (*s3_auth_callback) (void *auth_data, char *, kstring_t*, char*, kstring_t*, kstring_t*, kstring_t*, kstring_t*, int); - -typedef int (*set_region_callback) (void *auth_data, kstring_t *region); - -typedef struct { - s3_auth_callback callback; - redirect_callback redirect_callback; - set_region_callback set_region_callback; - void *callback_data; -} s3_authorisation; - -typedef struct { - hFILE base; - CURL *curl; - CURLcode ret; - s3_authorisation *au; - kstring_t buffer; - kstring_t url; - kstring_t upload_id; - kstring_t completion_message; - int part_no; - int aborted; - size_t index; - long verbose; - int part_size; - int expand; -} hFILE_s3_write; - - -static void ksinit(kstring_t *s) { - s->l = 0; - s->m = 0; - s->s = NULL; -} - - -static void ksfree(kstring_t *s) { - free(s->s); - ksinit(s); -} - - -static size_t response_callback(void *contents, size_t size, size_t nmemb, void *userp) { - size_t realsize = size * nmemb; - kstring_t *resp = (kstring_t *)userp; - - if (kputsn((const char *)contents, realsize, resp) == EOF) { - return 0; - } - - return realsize; -} - - -static int get_entry(char *in, char *start_tag, char *end_tag, kstring_t *out) { - char *start; - char *end; - - if (!in) { - return EOF; - } - - start = strstr(in, start_tag); - if (!start) return EOF; - - start += strlen(start_tag); - end = strstr(start, end_tag); - - if (!end) return EOF; - - return kputsn(start, end - start, out); -} - - -static void cleanup_local(hFILE_s3_write *fp) { - ksfree(&fp->buffer); - ksfree(&fp->url); - ksfree(&fp->upload_id); - ksfree(&fp->completion_message); - curl_easy_cleanup(fp->curl); - free(fp->au); - -} - - -static void cleanup(hFILE_s3_write *fp) { - // free up authorisation data - fp->au->callback(fp->au->callback_data, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0); - cleanup_local(fp); -} - - -static struct curl_slist *set_html_headers(hFILE_s3_write *fp, kstring_t *auth, kstring_t *date, kstring_t *content, kstring_t *token) { - struct curl_slist *headers = NULL; - - headers = curl_slist_append(headers, "Content-Type:"); // get rid of this - headers = curl_slist_append(headers, "Expect:"); // and this - headers = curl_slist_append(headers, auth->s); - headers = curl_slist_append(headers, date->s); - headers = curl_slist_append(headers, content->s); - - if (token->l) { - headers = curl_slist_append(headers, token->s); - } - - curl_easy_setopt(fp->curl, CURLOPT_HTTPHEADER, headers); - - return headers; -} - - -/* - The partially uploaded file will hang around unless the delete command is sent. -*/ -static int abort_upload(hFILE_s3_write *fp) { - kstring_t content_hash = {0, 0, NULL}; - kstring_t authorisation = {0, 0, NULL}; - kstring_t url = {0, 0, NULL}; - kstring_t content = {0, 0, NULL}; - kstring_t canonical_query_string = {0, 0, NULL}; - kstring_t date = {0, 0, NULL}; - kstring_t token = {0, 0, NULL}; - int ret = -1; - struct curl_slist *headers = NULL; - char http_request[] = "DELETE"; - - if (ksprintf(&canonical_query_string, "uploadId=%s", fp->upload_id.s) < 0) { - goto out; - } - - if (fp->au->callback(fp->au->callback_data, http_request, NULL, - canonical_query_string.s, &content_hash, - &authorisation, &date, &token, 0) != 0) { - goto out; - } - - if (ksprintf(&url, "%s?%s", fp->url.s, canonical_query_string.s) < 0) { - goto out; - } - - if (ksprintf(&content, "x-amz-content-sha256: %s", content_hash.s) < 0) { - goto out; - } - - curl_easy_reset(fp->curl); - curl_easy_setopt(fp->curl, CURLOPT_CUSTOMREQUEST, http_request); - curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s); - curl_easy_setopt(fp->curl, CURLOPT_URL, url.s); - - curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose); - - headers = set_html_headers(fp, &authorisation, &date, &content, &token); - fp->ret = curl_easy_perform(fp->curl); - - if (fp->ret == CURLE_OK) { - ret = 0; - } - - out: - ksfree(&authorisation); - ksfree(&content); - ksfree(&content_hash); - ksfree(&url); - ksfree(&date); - ksfree(&canonical_query_string); - ksfree(&token); - curl_slist_free_all(headers); - - fp->aborted = 1; - cleanup(fp); - - return ret; -} - - -static int complete_upload(hFILE_s3_write *fp, kstring_t *resp) { - kstring_t content_hash = {0, 0, NULL}; - kstring_t authorisation = {0, 0, NULL}; - kstring_t url = {0, 0, NULL}; - kstring_t content = {0, 0, NULL}; - kstring_t canonical_query_string = {0, 0, NULL}; - kstring_t date = {0, 0, NULL}; - kstring_t token = {0, 0, NULL}; - int ret = -1; - struct curl_slist *headers = NULL; - char http_request[] = "POST"; - - if (ksprintf(&canonical_query_string, "uploadId=%s", fp->upload_id.s) < 0) { - return -1; - } - - // finish off the completion reply - if (kputs("\n", &fp->completion_message) < 0) { - goto out; - } - - if (fp->au->callback(fp->au->callback_data, http_request, - &fp->completion_message, canonical_query_string.s, - &content_hash, &authorisation, &date, &token, 0) != 0) { - goto out; - } - - if (ksprintf(&url, "%s?%s", fp->url.s, canonical_query_string.s) < 0) { - goto out; - } - - if (ksprintf(&content, "x-amz-content-sha256: %s", content_hash.s) < 0) { - goto out; - } - - curl_easy_reset(fp->curl); - curl_easy_setopt(fp->curl, CURLOPT_POST, 1L); - curl_easy_setopt(fp->curl, CURLOPT_POSTFIELDS, fp->completion_message.s); - curl_easy_setopt(fp->curl, CURLOPT_POSTFIELDSIZE, (long) fp->completion_message.l); - curl_easy_setopt(fp->curl, CURLOPT_WRITEFUNCTION, response_callback); - curl_easy_setopt(fp->curl, CURLOPT_WRITEDATA, (void *)resp); - curl_easy_setopt(fp->curl, CURLOPT_URL, url.s); - curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s); - - curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose); - - headers = set_html_headers(fp, &authorisation, &date, &content, &token); - fp->ret = curl_easy_perform(fp->curl); - - if (fp->ret == CURLE_OK) { - ret = 0; - } - - out: - ksfree(&authorisation); - ksfree(&content); - ksfree(&content_hash); - ksfree(&url); - ksfree(&date); - ksfree(&token); - ksfree(&canonical_query_string); - curl_slist_free_all(headers); - - return ret; -} - - -static size_t upload_callback(void *ptr, size_t size, size_t nmemb, void *stream) { - size_t realsize = size * nmemb; - hFILE_s3_write *fp = (hFILE_s3_write *)stream; - size_t read_length; - - if (realsize > (fp->buffer.l - fp->index)) { - read_length = fp->buffer.l - fp->index; - } else { - read_length = realsize; - } - - memcpy(ptr, fp->buffer.s + fp->index, read_length); - fp->index += read_length; - - return read_length; -} - - -static int upload_part(hFILE_s3_write *fp, kstring_t *resp) { - kstring_t content_hash = {0, 0, NULL}; - kstring_t authorisation = {0, 0, NULL}; - kstring_t url = {0, 0, NULL}; - kstring_t content = {0, 0, NULL}; - kstring_t canonical_query_string = {0, 0, NULL}; - kstring_t date = {0, 0, NULL}; - kstring_t token = {0, 0, NULL}; - int ret = -1; - struct curl_slist *headers = NULL; - char http_request[] = "PUT"; - - if (ksprintf(&canonical_query_string, "partNumber=%d&uploadId=%s", fp->part_no, fp->upload_id.s) < 0) { - return -1; - } - - if (fp->au->callback(fp->au->callback_data, http_request, &fp->buffer, - canonical_query_string.s, &content_hash, - &authorisation, &date, &token, 0) != 0) { - goto out; - } - - if (ksprintf(&url, "%s?%s", fp->url.s, canonical_query_string.s) < 0) { - goto out; - } - - fp->index = 0; - if (ksprintf(&content, "x-amz-content-sha256: %s", content_hash.s) < 0) { - goto out; - } - - curl_easy_reset(fp->curl); - - curl_easy_setopt(fp->curl, CURLOPT_UPLOAD, 1L); - curl_easy_setopt(fp->curl, CURLOPT_READFUNCTION, upload_callback); - curl_easy_setopt(fp->curl, CURLOPT_READDATA, fp); - curl_easy_setopt(fp->curl, CURLOPT_INFILESIZE_LARGE, (curl_off_t)fp->buffer.l); - curl_easy_setopt(fp->curl, CURLOPT_HEADERFUNCTION, response_callback); - curl_easy_setopt(fp->curl, CURLOPT_HEADERDATA, (void *)resp); - curl_easy_setopt(fp->curl, CURLOPT_URL, url.s); - curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s); - - curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose); - - headers = set_html_headers(fp, &authorisation, &date, &content, &token); - fp->ret = curl_easy_perform(fp->curl); - - if (fp->ret == CURLE_OK) { - ret = 0; - } - - out: - ksfree(&authorisation); - ksfree(&content); - ksfree(&content_hash); - ksfree(&url); - ksfree(&date); - ksfree(&token); - ksfree(&canonical_query_string); - curl_slist_free_all(headers); - - return ret; -} - - -static ssize_t s3_write(hFILE *fpv, const void *bufferv, size_t nbytes) { - hFILE_s3_write *fp = (hFILE_s3_write *)fpv; - const char *buffer = (const char *)bufferv; - - if (kputsn(buffer, nbytes, &fp->buffer) == EOF) { - return -1; - } - - if (fp->buffer.l > fp->part_size) { - // time to write out our data - kstring_t response = {0, 0, NULL}; - int ret; - - ret = upload_part(fp, &response); - - if (!ret) { - long response_code; - kstring_t etag = {0, 0, NULL}; - - curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code); - - if (response_code > 200) { - ret = -1; - } else { - if (get_entry(response.s, "ETag: \"", "\"", &etag) == EOF) { - ret = -1; - } else { - ksprintf(&fp->completion_message, "\t\n\t\t%d\n\t\t%s\n\t\n", - fp->part_no, etag.s); - - ksfree(&etag); - } - } - } - - ksfree(&response); - - if (ret) { - abort_upload(fp); - return -1; - } - - fp->part_no++; - fp->buffer.l = 0; - - if (fp->expand && (fp->part_no % EXPAND_ON == 0)) { - fp->part_size *= 2; - } - } - - return nbytes; -} - - -static int s3_close(hFILE *fpv) { - hFILE_s3_write *fp = (hFILE_s3_write *)fpv; - kstring_t response = {0, 0, NULL}; - int ret = 0; - - if (!fp->aborted) { - - if (fp->buffer.l) { - // write the last part - - ret = upload_part(fp, &response); - - if (!ret) { - long response_code; - kstring_t etag = {0, 0, NULL}; - - curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code); - - if (response_code > 200) { - ret = -1; - } else { - if (get_entry(response.s, "ETag: \"", "\"", &etag) == EOF) { - ret = -1; - } else { - ksprintf(&fp->completion_message, "\t\n\t\t%d\n\t\t%s\n\t\n", - fp->part_no, etag.s); - - ksfree(&etag); - } - } - } - - ksfree(&response); - - if (ret) { - abort_upload(fp); - return -1; - } - - fp->part_no++; - } - - if (fp->part_no > 1) { - ret = complete_upload(fp, &response); - - if (!ret) { - if (strstr(response.s, "CompleteMultipartUploadResult") == NULL) { - ret = -1; - } - } - } else { - ret = -1; - } - - if (ret) { - abort_upload(fp); - } else { - cleanup(fp); - } - } - - ksfree(&response); - - return ret; -} - - -static int redirect_endpoint(hFILE_s3_write *fp, kstring_t *head) { - int ret = -1; - - if (fp->au->redirect_callback) { - ret = fp->au->redirect_callback(fp->au->callback_data, 301, head, &fp->url); - } - - return ret; -} - -static int handle_bad_request(hFILE_s3_write *fp, kstring_t *resp) { - kstring_t region = {0, 0, NULL}; - int ret = -1; - - if (fp->au->set_region_callback) { - if (get_entry(resp->s, "", "", ®ion) == EOF) { - return -1; - } - - ret = fp->au->set_region_callback(fp->au->callback_data, ®ion); - - ksfree(®ion); - } - - return ret; -} - -static int initialise_upload(hFILE_s3_write *fp, kstring_t *head, kstring_t *resp, int user_query) { - kstring_t content_hash = {0, 0, NULL}; - kstring_t authorisation = {0, 0, NULL}; - kstring_t url = {0, 0, NULL}; - kstring_t content = {0, 0, NULL}; - kstring_t date = {0, 0, NULL}; - kstring_t token = {0, 0, NULL}; - int ret = -1; - struct curl_slist *headers = NULL; - char http_request[] = "POST"; - char delimiter = '?'; - - if (user_query) { - delimiter = '&'; - } - - if (fp->au->callback(fp->au->callback_data, http_request, NULL, "uploads=", - &content_hash, &authorisation, &date, &token, user_query) != 0) { - goto out; - } - - if (ksprintf(&url, "%s%cuploads", fp->url.s, delimiter) < 0) { - goto out; - } - - if (ksprintf(&content, "x-amz-content-sha256: %s", content_hash.s) < 0) { - goto out; - } - - curl_easy_setopt(fp->curl, CURLOPT_URL, url.s); - curl_easy_setopt(fp->curl, CURLOPT_POST, 1L); - curl_easy_setopt(fp->curl, CURLOPT_POSTFIELDS, ""); // send no data - curl_easy_setopt(fp->curl, CURLOPT_WRITEFUNCTION, response_callback); - curl_easy_setopt(fp->curl, CURLOPT_WRITEDATA, (void *)resp); - curl_easy_setopt(fp->curl, CURLOPT_HEADERFUNCTION, response_callback); - curl_easy_setopt(fp->curl, CURLOPT_HEADERDATA, (void *)head); - curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s); - - curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose); - - headers = set_html_headers(fp, &authorisation, &date, &content, &token); - fp->ret = curl_easy_perform(fp->curl); - - if (fp->ret == CURLE_OK) { - ret = 0; - } - - out: - ksfree(&authorisation); - ksfree(&content); - ksfree(&content_hash); - ksfree(&url); - ksfree(&date); - ksfree(&token); - curl_slist_free_all(headers); - - return ret; -} - - -static int get_upload_id(hFILE_s3_write *fp, kstring_t *resp) { - int ret = 0; - - ksinit(&fp->upload_id); - - if (get_entry(resp->s, "", "", &fp->upload_id) == EOF) { - ret = -1; - } - - return ret; -} - - -static const struct hFILE_backend s3_write_backend = { - NULL, s3_write, NULL, NULL, s3_close -}; - - -static hFILE *s3_write_open(const char *url, s3_authorisation *auth) { - hFILE_s3_write *fp; - kstring_t response = {0, 0, NULL}; - kstring_t header = {0, 0, NULL}; - int ret, has_user_query = 0; - char *query_start; - const char *env; - - - if (!auth || !auth->callback || !auth->callback_data) { - return NULL; - } - - fp = (hFILE_s3_write *)hfile_init(sizeof(hFILE_s3_write), "w", 0); - - if (fp == NULL) { - return NULL; - } - - if ((fp->curl = curl_easy_init()) == NULL) { - errno = ENOMEM; - goto error; - } - - if ((fp->au = calloc(1, sizeof(s3_authorisation))) == NULL) { - goto error; - } - - memcpy(fp->au, auth, sizeof(s3_authorisation)); - - ksinit(&fp->buffer); - ksinit(&fp->url); - ksinit(&fp->completion_message); - fp->aborted = 0; - - fp->part_size = MINIMUM_S3_WRITE_SIZE; - fp->expand = 1; - - if ((env = getenv("HTS_S3_PART_SIZE")) != NULL) { - int part_size = atoi(env) * 1024 * 1024; - - if (part_size > fp->part_size) - fp->part_size = part_size; - - fp->expand = 0; - } - - if (hts_verbose >= 8) { - fp->verbose = 1L; - } else { - fp->verbose = 0L; - } - - kputs(url + 4, &fp->url); - - if ((query_start = strchr(fp->url.s, '?'))) { - has_user_query = 1;; - } - - ret = initialise_upload(fp, &header, &response, has_user_query); - - if (ret == 0) { - long response_code; - - curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code); - - if (response_code == S3_MOVED_PERMANENTLY) { - if (redirect_endpoint(fp, &header) == 0) { - ksfree(&response); - ksfree(&header); - - ret = initialise_upload(fp, &header, &response, has_user_query); - } - } else if (response_code == S3_BAD_REQUEST) { - if (handle_bad_request(fp, &response) == 0) { - ksfree(&response); - ksfree(&header); - - ret = initialise_upload(fp, &header, &response, has_user_query); - } - } - - ksfree(&header); // no longer needed - } - - if (ret) goto error; - - if (get_upload_id(fp, &response)) goto error; - - // start the completion message (a formatted list of parts) - ksinit(&fp->completion_message); - - if (kputs("\n", &fp->completion_message) == EOF) { - goto error; - } - - fp->part_no = 1; - - // user query string no longer a useful part of the URL - if (query_start) - *query_start = '\0'; - - fp->base.backend = &s3_write_backend; - ksfree(&response); - - return &fp->base; - -error: - ksfree(&response); - cleanup_local(fp); - hfile_destroy((hFILE *)fp); - return NULL; -} - - -static hFILE *hopen_s3_write(const char *url, const char *mode) { - if (hts_verbose >= 1) { - fprintf(stderr, "[E::%s] s3w:// URLs should not be used directly; use s3:// instead.\n", __func__); - } - return NULL; -} - - -static int parse_va_list(s3_authorisation *auth, va_list args) { - const char *argtype; - - while ((argtype = va_arg(args, const char *)) != NULL) { - if (strcmp(argtype, "s3_auth_callback") == 0) { - auth->callback = va_arg(args, s3_auth_callback); - } else if (strcmp(argtype, "s3_auth_callback_data") == 0) { - auth->callback_data = va_arg(args, void *); - } else if (strcmp(argtype, "redirect_callback") == 0) { - auth->redirect_callback = va_arg(args, redirect_callback); - } else if (strcmp(argtype, "set_region_callback") == 0) { - auth->set_region_callback = va_arg(args, set_region_callback); - } else if (strcmp(argtype, "va_list") == 0) { - va_list *args2 = va_arg(args, va_list *); - - if (args2) { - if (parse_va_list(auth, *args2) < 0) return -1; - } - } else { - errno = EINVAL; - return -1; - } - } - - return 0; -} - - -static hFILE *vhopen_s3_write(const char *url, const char *mode, va_list args) { - hFILE *fp = NULL; - s3_authorisation auth = {NULL, NULL, NULL}; - - if (parse_va_list(&auth, args) == 0) { - fp = s3_write_open(url, &auth); - } - - return fp; -} - - -static void s3_write_exit() { - if (curl_share_cleanup(curl.share) == CURLSHE_OK) - curl.share = NULL; - - free(curl.useragent.s); - curl.useragent.l = curl.useragent.m = 0; curl.useragent.s = NULL; - curl_global_cleanup(); -} - - -int PLUGIN_GLOBAL(hfile_plugin_init,_s3_write)(struct hFILE_plugin *self) { - - static const struct hFILE_scheme_handler handler = - { hopen_s3_write, hfile_always_remote, "S3 Multipart Upload", - 2000 + 50, vhopen_s3_write - }; - -#ifdef ENABLE_PLUGINS - // Embed version string for examination via strings(1) or what(1) - static const char id[] = - "@(#)hfile_s3_write plugin (htslib)\t" HTS_VERSION_TEXT; - const char *version = strchr(id, '\t') + 1; - - if (hts_verbose >= 9) - fprintf(stderr, "[M::hfile_s3_write.init] version %s\n", - version); -#else - const char *version = hts_version(); -#endif - - const curl_version_info_data *info; - CURLcode err; - CURLSHcode errsh; - - err = curl_global_init(CURL_GLOBAL_ALL); - - if (err != CURLE_OK) { - // look at putting in an errno here - return -1; - } - - curl.share = curl_share_init(); - - if (curl.share == NULL) { - curl_global_cleanup(); - errno = EIO; - return -1; - } - - errsh = curl_share_setopt(curl.share, CURLSHOPT_LOCKFUNC, share_lock); - errsh |= curl_share_setopt(curl.share, CURLSHOPT_UNLOCKFUNC, share_unlock); - errsh |= curl_share_setopt(curl.share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS); - - if (errsh != 0) { - curl_share_cleanup(curl.share); - curl_global_cleanup(); - errno = EIO; - return -1; - } - - info = curl_version_info(CURLVERSION_NOW); - ksprintf(&curl.useragent, "htslib/%s libcurl/%s", version, info->version); - - self->name = "S3 Multipart Upload"; - self->destroy = s3_write_exit; - - hfile_add_scheme_handler("s3w", &handler); - hfile_add_scheme_handler("s3w+http", &handler); - hfile_add_scheme_handler("s3w+https", &handler); - - return 0; -} diff --git a/src/htslib-1.18/hts.c b/src/htslib-1.18/hts.c deleted file mode 100644 index ef2a0b3..0000000 --- a/src/htslib-1.18/hts.c +++ /dev/null @@ -1,4957 +0,0 @@ -/* hts.c -- format-neutral I/O, indexing, and iterator API functions. - - Copyright (C) 2008, 2009, 2012-2023 Genome Research Ltd. - Copyright (C) 2012, 2013 Broad Institute. - - Author: Heng Li - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef HAVE_LIBLZMA -#ifdef HAVE_LZMA_H -#include -#else -#include "os/lzma_stub.h" -#endif -#endif - -#include "htslib/hts.h" -#include "htslib/bgzf.h" -#include "cram/cram.h" -#include "htslib/hfile.h" -#include "htslib/hts_endian.h" -#include "version.h" -#include "config_vars.h" -#include "hts_internal.h" -#include "hfile_internal.h" -#include "sam_internal.h" -#include "htslib/hts_expr.h" -#include "htslib/hts_os.h" // drand48 - -#include "htslib/khash.h" -#include "htslib/kseq.h" -#include "htslib/ksort.h" -#include "htslib/tbx.h" -#if defined(HAVE_EXTERNAL_LIBHTSCODECS) -#include -#else -#include "htscodecs/htscodecs/htscodecs.h" -#endif - -#ifndef EFTYPE -#define EFTYPE ENOEXEC -#endif - -KHASH_INIT2(s2i,, kh_cstr_t, int64_t, 1, kh_str_hash_func, kh_str_hash_equal) - -HTSLIB_EXPORT -int hts_verbose = HTS_LOG_WARNING; - -const char *hts_version(void) -{ - return HTS_VERSION_TEXT; -} - -unsigned int hts_features(void) { - unsigned int feat = HTS_FEATURE_HTSCODECS; // Always present - -#ifdef PACKAGE_URL - feat |= HTS_FEATURE_CONFIGURE; -#endif - -#ifdef ENABLE_PLUGINS - feat |= HTS_FEATURE_PLUGINS; -#endif - -#ifdef HAVE_LIBCURL - feat |= HTS_FEATURE_LIBCURL; -#endif - -#ifdef ENABLE_S3 - feat |= HTS_FEATURE_S3; -#endif - -#ifdef ENABLE_GCS - feat |= HTS_FEATURE_GCS; -#endif - -#ifdef HAVE_LIBDEFLATE - feat |= HTS_FEATURE_LIBDEFLATE; -#endif - -#ifdef HAVE_LIBLZMA - feat |= HTS_FEATURE_LZMA; -#endif - -#ifdef HAVE_LIBBZ2 - feat |= HTS_FEATURE_BZIP2; -#endif - - return feat; -} - -const char *hts_test_feature(unsigned int id) { - unsigned int feat = hts_features(); - - switch (id) { - case HTS_FEATURE_CONFIGURE: - return feat & HTS_FEATURE_CONFIGURE ? "yes" : NULL; - case HTS_FEATURE_PLUGINS: - return feat & HTS_FEATURE_PLUGINS ? "yes" : NULL; - case HTS_FEATURE_LIBCURL: - return feat & HTS_FEATURE_LIBCURL ? "yes" : NULL; - case HTS_FEATURE_S3: - return feat & HTS_FEATURE_S3 ? "yes" : NULL; - case HTS_FEATURE_GCS: - return feat & HTS_FEATURE_GCS ? "yes" : NULL; - case HTS_FEATURE_LIBDEFLATE: - return feat & HTS_FEATURE_LIBDEFLATE ? "yes" : NULL; - case HTS_FEATURE_BZIP2: - return feat & HTS_FEATURE_BZIP2 ? "yes" : NULL; - case HTS_FEATURE_LZMA: - return feat & HTS_FEATURE_LZMA ? "yes" : NULL; - - case HTS_FEATURE_HTSCODECS: - return htscodecs_version(); - - case HTS_FEATURE_CC: - return HTS_CC; - case HTS_FEATURE_CFLAGS: - return HTS_CFLAGS; - case HTS_FEATURE_LDFLAGS: - return HTS_LDFLAGS; - case HTS_FEATURE_CPPFLAGS: - return HTS_CPPFLAGS; - - default: - fprintf(stderr, "Unknown feature code: %u\n", id); - } - - return NULL; -} - -// Note this implementation also means we can just "strings" the library -// to find the configuration parameters. -const char *hts_feature_string(void) { - static char config[1200]; - const char *flags= - -#ifdef PACKAGE_URL - "build=configure " -#else - "build=Makefile " -#endif - -#ifdef HAVE_LIBCURL - "libcurl=yes " -#else - "libcurl=no " -#endif - -#ifdef ENABLE_S3 - "S3=yes " -#else - "S3=no " -#endif - -#ifdef ENABLE_GCS - "GCS=yes " -#else - "GCS=no " -#endif - -#ifdef HAVE_LIBDEFLATE - "libdeflate=yes " -#else - "libdeflate=no " -#endif - -#ifdef HAVE_LIBLZMA - "lzma=yes " -#else - "lzma=no " -#endif - -#ifdef HAVE_LIBBZ2 - "bzip2=yes " -#else - "bzip2=no " -#endif - -// "plugins=" must stay at the end as it is followed by "plugin-path=" -#ifdef ENABLE_PLUGINS - "plugins=yes"; -#else - "plugins=no"; -#endif - -#ifdef ENABLE_PLUGINS - snprintf(config, sizeof(config), - "%s plugin-path=%.1000s htscodecs=%.40s", - flags, hts_plugin_path(), htscodecs_version()); -#else - snprintf(config, sizeof(config), - "%s htscodecs=%.40s", - flags, htscodecs_version()); -#endif - return config; -} - - -HTSLIB_EXPORT -const unsigned char seq_nt16_table[256] = { - 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, - 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, - 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, - 1, 2, 4, 8, 15,15,15,15, 15,15,15,15, 15, 0 /*=*/,15,15, - 15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15, - 15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15, - 15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15, - 15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15, - - 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, - 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, - 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, - 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, - 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, - 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, - 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, - 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15 -}; - -HTSLIB_EXPORT -const char seq_nt16_str[] = "=ACMGRSVTWYHKDBN"; - -HTSLIB_EXPORT -const int seq_nt16_int[] = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 }; - -/********************** - *** Basic file I/O *** - **********************/ - -static enum htsFormatCategory format_category(enum htsExactFormat fmt) -{ - switch (fmt) { - case bam: - case sam: - case cram: - case fastq_format: - case fasta_format: - return sequence_data; - - case vcf: - case bcf: - return variant_data; - - case bai: - case crai: - case csi: - case fai_format: - case fqi_format: - case gzi: - case tbi: - return index_file; - - case bed: - case d4_format: - return region_list; - - case htsget: - case hts_crypt4gh_format: - return unknown_category; - - case unknown_format: - case binary_format: - case text_format: - case empty_format: - case format_maximum: - break; - } - - return unknown_category; -} - -// Decompress several hundred bytes by peeking at the file, which must be -// positioned at the start of a GZIP block. -static ssize_t -decompress_peek_gz(hFILE *fp, unsigned char *dest, size_t destsize) -{ - unsigned char buffer[2048]; - z_stream zs; - ssize_t npeek = hpeek(fp, buffer, sizeof buffer); - - if (npeek < 0) return -1; - - zs.zalloc = NULL; - zs.zfree = NULL; - zs.next_in = buffer; - zs.avail_in = npeek; - zs.next_out = dest; - zs.avail_out = destsize; - if (inflateInit2(&zs, 31) != Z_OK) return -1; - - int ret; - const unsigned char *last_in = buffer; - while (zs.avail_out > 0) { - ret = inflate(&zs, Z_SYNC_FLUSH); - if (ret == Z_STREAM_END) { - if (last_in == zs.next_in) - break; // Paranoia to avoid potential looping. Shouldn't happen - else - last_in = zs.next_in; - inflateReset(&zs); - } else if (ret != Z_OK) { - // eg Z_BUF_ERROR due to avail_in/out becoming zero - break; - } - } - - // NB: zs.total_out is changed by inflateReset, so use pointer diff instead - destsize = zs.next_out - dest; - inflateEnd(&zs); - - return destsize; -} - -#ifdef HAVE_LIBLZMA -// Similarly decompress a portion by peeking at the file, which must be -// positioned at the start of the file. -static ssize_t -decompress_peek_xz(hFILE *fp, unsigned char *dest, size_t destsize) -{ - unsigned char buffer[2048]; - ssize_t npeek = hpeek(fp, buffer, sizeof buffer); - if (npeek < 0) return -1; - - lzma_stream ls = LZMA_STREAM_INIT; - if (lzma_stream_decoder(&ls, lzma_easy_decoder_memusage(9), 0) != LZMA_OK) - return -1; - - ls.next_in = buffer; - ls.avail_in = npeek; - ls.next_out = dest; - ls.avail_out = destsize; - - int r = lzma_code(&ls, LZMA_RUN); - if (! (r == LZMA_OK || r == LZMA_STREAM_END)) { - lzma_end(&ls); - return -1; - } - - destsize = ls.total_out; - lzma_end(&ls); - - return destsize; -} -#endif - -// Parse "x.y" text, taking care because the string is not NUL-terminated -// and filling in major/minor only when the digits are followed by a delimiter, -// so we don't misread "1.10" as "1.1" due to reaching the end of the buffer. -static void -parse_version(htsFormat *fmt, const unsigned char *u, const unsigned char *ulim) -{ - const char *s = (const char *) u; - const char *slim = (const char *) ulim; - short v; - - fmt->version.major = fmt->version.minor = -1; - - for (v = 0; s < slim && isdigit_c(*s); s++) - v = 10 * v + *s - '0'; - - if (s < slim) { - fmt->version.major = v; - if (*s == '.') { - s++; - for (v = 0; s < slim && isdigit_c(*s); s++) - v = 10 * v + *s - '0'; - if (s < slim) - fmt->version.minor = v; - } - else - fmt->version.minor = 0; - } -} - -static int -cmp_nonblank(const char *key, const unsigned char *u, const unsigned char *ulim) -{ - const unsigned char *ukey = (const unsigned char *) key; - - while (*ukey) - if (u >= ulim) return +1; - else if (isspace_c(*u)) u++; - else if (*u != *ukey) return (*ukey < *u)? -1 : +1; - else u++, ukey++; - - return 0; -} - -static int is_text_only(const unsigned char *u, const unsigned char *ulim) -{ - for (; u < ulim; u++) - if (! (*u >= ' ' || *u == '\t' || *u == '\r' || *u == '\n')) - return 0; - - return 1; -} - -static int is_fastaq(const unsigned char *u, const unsigned char *ulim) -{ - const unsigned char *eol = memchr(u, '\n', ulim - u); - - // Check that the first line is entirely textual - if (! is_text_only(u, eol? eol : ulim)) return 0; - - // If the first line is very long, consider the file to indeed be FASTA/Q - if (eol == NULL) return 1; - - u = eol+1; // Now points to the first character of the second line - - // Scan over all base-encoding letters (including 'N' but not SEQ's '=') - while (u < ulim && (seq_nt16_table[*u] != 15 || toupper(*u) == 'N')) { - if (*u == '=') return 0; - u++; - } - - return (u == ulim || *u == '\r' || *u == '\n')? 1 : 0; -} - -// Parse tab-delimited text, filling in a string of column types and returning -// the number of columns spotted (within [u,ulim), and up to column_len) or -1 -// if non-printable characters were seen. Column types: -// i: integer, s: strand sign, C: CIGAR, O: SAM optional field, Z: anything -static int -parse_tabbed_text(char *columns, int column_len, - const unsigned char *u, const unsigned char *ulim, - int *complete) -{ - const char *str = (const char *) u; - const char *slim = (const char *) ulim; - const char *s; - int ncolumns = 0; - - enum { digit = 1, leading_sign = 2, cigar_operator = 4, other = 8 }; - unsigned seen = 0; - *complete = 0; - - for (s = str; s < slim; s++) - if (*s >= ' ') { - if (isdigit_c(*s)) - seen |= digit; - else if ((*s == '+' || *s == '-') && s == str) - seen |= leading_sign; - else if (strchr(BAM_CIGAR_STR, *s) && s > str && isdigit_c(s[-1])) - seen |= cigar_operator; - else - seen |= other; - } - else if (*s == '\t' || *s == '\r' || *s == '\n') { - size_t len = s - str; - char type; - - if (seen == digit || seen == (leading_sign|digit)) type = 'i'; - else if (seen == (digit|cigar_operator)) type = 'C'; - else if (len == 1) - switch (str[0]) { - case '*': type = 'C'; break; - case '+': case '-': case '.': type = 's'; break; - default: type = 'Z'; break; - } - else if (len >= 5 && str[2] == ':' && str[4] == ':') type = 'O'; - else type = 'Z'; - - columns[ncolumns++] = type; - if (*s != '\t' || ncolumns >= column_len - 1) { - *complete = 1; // finished the line or more columns than needed - break; - } - - str = s + 1; - seen = 0; - } - else return -1; - - columns[ncolumns] = '\0'; - return ncolumns; -} - -// Match COLUMNS as a prefix against PATTERN (so COLUMNS may run out first). -// Returns len(COLUMNS) (modulo '+'), or 0 if there is a mismatched entry. -static int colmatch(const char *columns, const char *pattern) -{ - int i; - for (i = 0; columns[i] != '\0'; i++) { - if (pattern[i] == '+') return i; - if (! (columns[i] == pattern[i] || pattern[i] == 'Z')) return 0; - } - - return i; -} - -int hts_detect_format(hFILE *hfile, htsFormat *fmt) -{ - return hts_detect_format2(hfile, NULL, fmt); -} - -int hts_detect_format2(hFILE *hfile, const char *fname, htsFormat *fmt) -{ - char extension[HTS_MAX_EXT_LEN], columns[24]; - unsigned char s[1024]; - int complete = 0; - ssize_t len = hpeek(hfile, s, 18); - if (len < 0) return -1; - - fmt->category = unknown_category; - fmt->format = unknown_format; - fmt->version.major = fmt->version.minor = -1; - fmt->compression = no_compression; - fmt->compression_level = -1; - fmt->specific = NULL; - - if (len >= 2 && s[0] == 0x1f && s[1] == 0x8b) { - // The stream is either gzip-compressed or BGZF-compressed. - // Determine which, and decompress the first few records or lines. - fmt->compression = gzip; - if (len >= 18 && (s[3] & 4)) { - if (memcmp(&s[12], "BC\2\0", 4) == 0) - fmt->compression = bgzf; - else if (memcmp(&s[12], "RAZF", 4) == 0) - fmt->compression = razf_compression; - } - if (len >= 9 && s[2] == 8) - fmt->compression_level = (s[8] == 2)? 9 : (s[8] == 4)? 1 : -1; - - len = decompress_peek_gz(hfile, s, sizeof s); - } - else if (len >= 10 && memcmp(s, "BZh", 3) == 0 && - (memcmp(&s[4], "\x31\x41\x59\x26\x53\x59", 6) == 0 || - memcmp(&s[4], "\x17\x72\x45\x38\x50\x90", 6) == 0)) { - fmt->compression = bzip2_compression; - fmt->compression_level = s[3] - '0'; - // Decompressing via libbz2 produces no output until it has a whole - // block (of size 100Kb x level), which is too large for peeking. - // So unfortunately we can recognise bzip2 but not the contents, - // except that \x1772... magic indicates the stream is empty. - if (s[4] == '\x31') return 0; - else len = 0; - } - else if (len >= 6 && memcmp(s, "\xfd""7zXZ\0", 6) == 0) { - fmt->compression = xz_compression; -#ifdef HAVE_LIBLZMA - len = decompress_peek_xz(hfile, s, sizeof s); -#else - // Without liblzma, we can't recognise the decompressed contents. - return 0; -#endif - } - else if (len >= 4 && memcmp(s, "\x28\xb5\x2f\xfd", 4) == 0) { - fmt->compression = zstd_compression; - return 0; - } - else { - len = hpeek(hfile, s, sizeof s); - } - if (len < 0) return -1; - - if (len == 0) { - fmt->format = empty_format; - return 0; - } - - // We avoid using filename extensions wherever possible (as filenames are - // not always available), but in a few cases they must be considered: - // - FASTA/Q indexes are simply tab-separated text; files that match these - // patterns but not the fai/fqi extension are usually generic BED files - // - GZI indexes have no magic numbers so can only be detected by filename - if (fname && strcmp(fname, "-") != 0) { - char *s; - if (find_file_extension(fname, extension) < 0) extension[0] = '\0'; - for (s = extension; *s; s++) *s = tolower_c(*s); - } - else extension[0] = '\0'; - - if (len >= 6 && memcmp(s,"CRAM",4) == 0 && s[4]>=1 && s[4]<=7 && s[5]<=7) { - fmt->category = sequence_data; - fmt->format = cram; - fmt->version.major = s[4], fmt->version.minor = s[5]; - fmt->compression = custom; - return 0; - } - else if (len >= 4 && s[3] <= '\4') { - if (memcmp(s, "BAM\1", 4) == 0) { - fmt->category = sequence_data; - fmt->format = bam; - // TODO Decompress enough to pick version from @HD-VN header - fmt->version.major = 1, fmt->version.minor = -1; - return 0; - } - else if (memcmp(s, "BAI\1", 4) == 0) { - fmt->category = index_file; - fmt->format = bai; - fmt->version.major = -1, fmt->version.minor = -1; - return 0; - } - else if (memcmp(s, "BCF\4", 4) == 0) { - fmt->category = variant_data; - fmt->format = bcf; - fmt->version.major = 1, fmt->version.minor = -1; - return 0; - } - else if (memcmp(s, "BCF\2", 4) == 0) { - fmt->category = variant_data; - fmt->format = bcf; - fmt->version.major = s[3]; - fmt->version.minor = (len >= 5 && s[4] <= 2)? s[4] : 0; - return 0; - } - else if (memcmp(s, "CSI\1", 4) == 0) { - fmt->category = index_file; - fmt->format = csi; - fmt->version.major = 1, fmt->version.minor = -1; - return 0; - } - else if (memcmp(s, "TBI\1", 4) == 0) { - fmt->category = index_file; - fmt->format = tbi; - return 0; - } - // GZI indexes have no magic numbers, so must be recognised solely by - // filename extension. - else if (strcmp(extension, "gzi") == 0) { - fmt->category = index_file; - fmt->format = gzi; - return 0; - } - } - else if (len >= 16 && memcmp(s, "##fileformat=VCF", 16) == 0) { - fmt->category = variant_data; - fmt->format = vcf; - if (len >= 21 && s[16] == 'v') - parse_version(fmt, &s[17], &s[len]); - return 0; - } - else if (len >= 4 && s[0] == '@' && - (memcmp(s, "@HD\t", 4) == 0 || memcmp(s, "@SQ\t", 4) == 0 || - memcmp(s, "@RG\t", 4) == 0 || memcmp(s, "@PG\t", 4) == 0 || - memcmp(s, "@CO\t", 4) == 0)) { - fmt->category = sequence_data; - fmt->format = sam; - // @HD-VN is not guaranteed to be the first tag, but then @HD is - // not guaranteed to be present at all... - if (len >= 9 && memcmp(s, "@HD\tVN:", 7) == 0) - parse_version(fmt, &s[7], &s[len]); - else - fmt->version.major = 1, fmt->version.minor = -1; - return 0; - } - else if (len >= 8 && memcmp(s, "d4\xdd\xdd", 4) == 0) { - fmt->category = region_list; - fmt->format = d4_format; - // How to decode the D4 Format Version bytes is not yet specified - // so we don't try to set fmt->version.{major,minor}. - return 0; - } - else if (cmp_nonblank("{\"htsget\":", s, &s[len]) == 0) { - fmt->category = unknown_category; - fmt->format = htsget; - return 0; - } - else if (len > 8 && memcmp(s, "crypt4gh", 8) == 0) { - fmt->category = unknown_category; - fmt->format = hts_crypt4gh_format; - return 0; - } - else if (len >= 1 && s[0] == '>' && is_fastaq(s, &s[len])) { - fmt->category = sequence_data; - fmt->format = fasta_format; - return 0; - } - else if (len >= 1 && s[0] == '@' && is_fastaq(s, &s[len])) { - fmt->category = sequence_data; - fmt->format = fastq_format; - return 0; - } - else if (parse_tabbed_text(columns, sizeof columns, s, - &s[len], &complete) > 0) { - // A complete SAM line is at least 11 columns. On unmapped long reads may - // be missing two. (On mapped long reads we must have an @ header so long - // CIGAR is irrelevant.) - if (colmatch(columns, "ZiZiiCZiiZZOOOOOOOOOOOOOOOOOOOO+") - >= 9 + 2*complete) { - fmt->category = sequence_data; - fmt->format = sam; - fmt->version.major = 1, fmt->version.minor = -1; - return 0; - } - else if (fmt->compression == gzip && colmatch(columns, "iiiiii") == 6) { - fmt->category = index_file; - fmt->format = crai; - return 0; - } - else if (strstr(extension, "fqi") && colmatch(columns, "Ziiiii") == 6) { - fmt->category = index_file; - fmt->format = fqi_format; - return 0; - } - else if (strstr(extension, "fai") && colmatch(columns, "Ziiii") == 5) { - fmt->category = index_file; - fmt->format = fai_format; - return 0; - } - else if (colmatch(columns, "Zii+") >= 3) { - fmt->category = region_list; - fmt->format = bed; - return 0; - } - } - - // Arbitrary text files can be read using hts_getline(). - if (is_text_only(s, &s[len])) fmt->format = text_format; - - // Nothing recognised: leave unset fmt-> fields as unknown. - return 0; -} - -char *hts_format_description(const htsFormat *format) -{ - kstring_t str = { 0, 0, NULL }; - - switch (format->format) { - case sam: kputs("SAM", &str); break; - case bam: kputs("BAM", &str); break; - case cram: kputs("CRAM", &str); break; - case fasta_format: kputs("FASTA", &str); break; - case fastq_format: kputs("FASTQ", &str); break; - case vcf: kputs("VCF", &str); break; - case bcf: - if (format->version.major == 1) kputs("Legacy BCF", &str); - else kputs("BCF", &str); - break; - case bai: kputs("BAI", &str); break; - case crai: kputs("CRAI", &str); break; - case csi: kputs("CSI", &str); break; - case fai_format: kputs("FASTA-IDX", &str); break; - case fqi_format: kputs("FASTQ-IDX", &str); break; - case gzi: kputs("GZI", &str); break; - case tbi: kputs("Tabix", &str); break; - case bed: kputs("BED", &str); break; - case d4_format: kputs("D4", &str); break; - case htsget: kputs("htsget", &str); break; - case hts_crypt4gh_format: kputs("crypt4gh", &str); break; - case empty_format: kputs("empty", &str); break; - default: kputs("unknown", &str); break; - } - - if (format->version.major >= 0) { - kputs(" version ", &str); - kputw(format->version.major, &str); - if (format->version.minor >= 0) { - kputc('.', &str); - kputw(format->version.minor, &str); - } - } - - switch (format->compression) { - case bzip2_compression: kputs(" bzip2-compressed", &str); break; - case razf_compression: kputs(" legacy-RAZF-compressed", &str); break; - case xz_compression: kputs(" XZ-compressed", &str); break; - case zstd_compression: kputs(" Zstandard-compressed", &str); break; - case custom: kputs(" compressed", &str); break; - case gzip: kputs(" gzip-compressed", &str); break; - case bgzf: - switch (format->format) { - case bam: - case bcf: - case csi: - case tbi: - // These are by definition BGZF, so just use the generic term - kputs(" compressed", &str); - break; - default: - kputs(" BGZF-compressed", &str); - break; - } - break; - default: break; - } - - switch (format->category) { - case sequence_data: kputs(" sequence", &str); break; - case variant_data: kputs(" variant calling", &str); break; - case index_file: kputs(" index", &str); break; - case region_list: kputs(" genomic region", &str); break; - default: break; - } - - if (format->compression == no_compression) - switch (format->format) { - case text_format: - case sam: - case crai: - case vcf: - case bed: - case fai_format: - case fqi_format: - case fasta_format: - case fastq_format: - case htsget: - kputs(" text", &str); - break; - - case empty_format: - break; - - default: - kputs(" data", &str); - break; - } - else - kputs(" data", &str); - - return ks_release(&str); -} - -htsFile *hts_open_format(const char *fn, const char *mode, const htsFormat *fmt) -{ - char smode[101], *cp, *cp2, *mode_c, *uncomp = NULL; - htsFile *fp = NULL; - hFILE *hfile = NULL; - char fmt_code = '\0'; - // see enum htsExactFormat in htslib/hts.h - const char format_to_mode[] = "\0g\0\0b\0c\0\0b\0g\0\0\0\0\0Ff\0\0"; - - strncpy(smode, mode, 99); - smode[99]=0; - if ((cp = strchr(smode, ','))) - *cp = '\0'; - - // Migrate format code (b or c) to the end of the smode buffer. - for (cp2 = cp = smode; *cp; cp++) { - if (*cp == 'b') - fmt_code = 'b'; - else if (*cp == 'c') - fmt_code = 'c'; - else { - *cp2++ = *cp; - // Cache the uncompress flag 'u' pos if present - if (!uncomp && (*cp == 'u')) { - uncomp = cp2 - 1; - } - } - } - mode_c = cp2; - *cp2++ = fmt_code; - *cp2++ = 0; - - // Set or reset the format code if opts->format is used - if (fmt && fmt->format > unknown_format - && fmt->format < sizeof(format_to_mode)) { - *mode_c = format_to_mode[fmt->format]; - } - - // Uncompressed bam/bcf is not supported, change 'u' to '0' on write - if (uncomp && *mode_c == 'b' && (strchr(smode, 'w') || strchr(smode, 'a'))) { - *uncomp = '0'; - } - - // If we really asked for a compressed text format then mode_c above will - // point to nul. We set to 'z' to enable bgzf. - if (strchr(mode, 'w') && fmt && fmt->compression == bgzf) { - if (fmt->format == sam || fmt->format == vcf || fmt->format == text_format) - *mode_c = 'z'; - } - - char *rmme = NULL, *fnidx = strstr(fn, HTS_IDX_DELIM); - if ( fnidx ) { - rmme = strdup(fn); - if ( !rmme ) goto error; - rmme[fnidx-fn] = 0; - fn = rmme; - } - - hfile = hopen(fn, smode); - if (hfile == NULL) goto error; - - fp = hts_hopen(hfile, fn, smode); - if (fp == NULL) goto error; - - // Compensate for the loss of exactness in htsExactFormat. - // hts_hopen returns generics such as binary or text, but we - // have been given something explicit here so use that instead. - if (fp->is_write && fmt && - (fmt->format == bam || fmt->format == sam || - fmt->format == vcf || fmt->format == bcf || - fmt->format == bed || fmt->format == fasta_format || - fmt->format == fastq_format)) - fp->format.format = fmt->format; - - if (fmt && fmt->specific) - if (hts_opt_apply(fp, fmt->specific) != 0) - goto error; - - if ( rmme ) free(rmme); - return fp; - -error: - hts_log_error("Failed to open file \"%s\"%s%s", fn, - errno ? " : " : "", errno ? strerror(errno) : ""); - if ( rmme ) free(rmme); - - if (hfile) - hclose_abruptly(hfile); - - return NULL; -} - -htsFile *hts_open(const char *fn, const char *mode) { - return hts_open_format(fn, mode, NULL); -} - -/* - * Splits str into a prefix, delimiter ('\0' or delim), and suffix, writing - * the prefix in lowercase into buf and returning a pointer to the suffix. - * On return, buf is always NUL-terminated; thus assumes that the "keyword" - * prefix should be one of several known values of maximum length buflen-2. - * (If delim is not found, returns a pointer to the '\0'.) - */ -static const char * -scan_keyword(const char *str, char delim, char *buf, size_t buflen) -{ - size_t i = 0; - while (*str && *str != delim) { - if (i < buflen-1) buf[i++] = tolower_c(*str); - str++; - } - - buf[i] = '\0'; - return *str? str+1 : str; -} - -/* - * Parses arg and appends it to the option list. - * - * Returns 0 on success; - * -1 on failure. - */ -int hts_opt_add(hts_opt **opts, const char *c_arg) { - hts_opt *o, *t; - char *val; - - /* - * IMPORTANT!!! - * If you add another string option here, don't forget to also add - * it to the case statement in hts_opt_apply. - */ - - if (!c_arg) - return -1; - - if (!(o = malloc(sizeof(*o)))) - return -1; - - if (!(o->arg = strdup(c_arg))) { - free(o); - return -1; - } - - if (!(val = strchr(o->arg, '='))) - val = "1"; // assume boolean - else - *val++ = '\0'; - - if (strcmp(o->arg, "decode_md") == 0 || - strcmp(o->arg, "DECODE_MD") == 0) - o->opt = CRAM_OPT_DECODE_MD, o->val.i = atoi(val); - - else if (strcmp(o->arg, "verbosity") == 0 || - strcmp(o->arg, "VERBOSITY") == 0) - o->opt = CRAM_OPT_VERBOSITY, o->val.i = atoi(val); - - else if (strcmp(o->arg, "seqs_per_slice") == 0 || - strcmp(o->arg, "SEQS_PER_SLICE") == 0) - o->opt = CRAM_OPT_SEQS_PER_SLICE, o->val.i = atoi(val); - - else if (strcmp(o->arg, "bases_per_slice") == 0 || - strcmp(o->arg, "BASES_PER_SLICE") == 0) - o->opt = CRAM_OPT_BASES_PER_SLICE, o->val.i = atoi(val); - - else if (strcmp(o->arg, "slices_per_container") == 0 || - strcmp(o->arg, "SLICES_PER_CONTAINER") == 0) - o->opt = CRAM_OPT_SLICES_PER_CONTAINER, o->val.i = atoi(val); - - else if (strcmp(o->arg, "embed_ref") == 0 || - strcmp(o->arg, "EMBED_REF") == 0) - o->opt = CRAM_OPT_EMBED_REF, o->val.i = atoi(val); - - else if (strcmp(o->arg, "no_ref") == 0 || - strcmp(o->arg, "NO_REF") == 0) - o->opt = CRAM_OPT_NO_REF, o->val.i = atoi(val); - - else if (strcmp(o->arg, "pos_delta") == 0 || - strcmp(o->arg, "POS_DELTA") == 0) - o->opt = CRAM_OPT_POS_DELTA, o->val.i = atoi(val); - - else if (strcmp(o->arg, "ignore_md5") == 0 || - strcmp(o->arg, "IGNORE_MD5") == 0) - o->opt = CRAM_OPT_IGNORE_MD5, o->val.i = atoi(val); - - else if (strcmp(o->arg, "use_bzip2") == 0 || - strcmp(o->arg, "USE_BZIP2") == 0) - o->opt = CRAM_OPT_USE_BZIP2, o->val.i = atoi(val); - - else if (strcmp(o->arg, "use_rans") == 0 || - strcmp(o->arg, "USE_RANS") == 0) - o->opt = CRAM_OPT_USE_RANS, o->val.i = atoi(val); - - else if (strcmp(o->arg, "use_lzma") == 0 || - strcmp(o->arg, "USE_LZMA") == 0) - o->opt = CRAM_OPT_USE_LZMA, o->val.i = atoi(val); - - else if (strcmp(o->arg, "use_tok") == 0 || - strcmp(o->arg, "USE_TOK") == 0) - o->opt = CRAM_OPT_USE_TOK, o->val.i = atoi(val); - - else if (strcmp(o->arg, "use_fqz") == 0 || - strcmp(o->arg, "USE_FQZ") == 0) - o->opt = CRAM_OPT_USE_FQZ, o->val.i = atoi(val); - - else if (strcmp(o->arg, "use_arith") == 0 || - strcmp(o->arg, "USE_ARITH") == 0) - o->opt = CRAM_OPT_USE_ARITH, o->val.i = atoi(val); - - else if (strcmp(o->arg, "fast") == 0 || - strcmp(o->arg, "FAST") == 0) - o->opt = HTS_OPT_PROFILE, o->val.i = HTS_PROFILE_FAST; - - else if (strcmp(o->arg, "normal") == 0 || - strcmp(o->arg, "NORMAL") == 0) - o->opt = HTS_OPT_PROFILE, o->val.i = HTS_PROFILE_NORMAL; - - else if (strcmp(o->arg, "small") == 0 || - strcmp(o->arg, "SMALL") == 0) - o->opt = HTS_OPT_PROFILE, o->val.i = HTS_PROFILE_SMALL; - - else if (strcmp(o->arg, "archive") == 0 || - strcmp(o->arg, "ARCHIVE") == 0) - o->opt = HTS_OPT_PROFILE, o->val.i = HTS_PROFILE_ARCHIVE; - - else if (strcmp(o->arg, "reference") == 0 || - strcmp(o->arg, "REFERENCE") == 0) - o->opt = CRAM_OPT_REFERENCE, o->val.s = val; - - else if (strcmp(o->arg, "version") == 0 || - strcmp(o->arg, "VERSION") == 0) - o->opt = CRAM_OPT_VERSION, o->val.s =val; - - else if (strcmp(o->arg, "multi_seq_per_slice") == 0 || - strcmp(o->arg, "MULTI_SEQ_PER_SLICE") == 0) - o->opt = CRAM_OPT_MULTI_SEQ_PER_SLICE, o->val.i = atoi(val); - - else if (strcmp(o->arg, "nthreads") == 0 || - strcmp(o->arg, "NTHREADS") == 0) - o->opt = HTS_OPT_NTHREADS, o->val.i = atoi(val); - - else if (strcmp(o->arg, "cache_size") == 0 || - strcmp(o->arg, "CACHE_SIZE") == 0) { - char *endp; - o->opt = HTS_OPT_CACHE_SIZE; - o->val.i = strtol(val, &endp, 0); - // NB: Doesn't support floats, eg 1.5g - // TODO: extend hts_parse_decimal? See also samtools sort. - switch (*endp) { - case 'g': case 'G': o->val.i *= 1024; // fall through - case 'm': case 'M': o->val.i *= 1024; // fall through - case 'k': case 'K': o->val.i *= 1024; break; - case '\0': break; - default: - hts_log_error("Unrecognised cache size suffix '%c'", *endp); - free(o->arg); - free(o); - return -1; - } - } - - else if (strcmp(o->arg, "required_fields") == 0 || - strcmp(o->arg, "REQUIRED_FIELDS") == 0) - o->opt = CRAM_OPT_REQUIRED_FIELDS, o->val.i = strtol(val, NULL, 0); - - else if (strcmp(o->arg, "lossy_names") == 0 || - strcmp(o->arg, "LOSSY_NAMES") == 0) - o->opt = CRAM_OPT_LOSSY_NAMES, o->val.i = strtol(val, NULL, 0); - - else if (strcmp(o->arg, "name_prefix") == 0 || - strcmp(o->arg, "NAME_PREFIX") == 0) - o->opt = CRAM_OPT_PREFIX, o->val.s = val; - - else if (strcmp(o->arg, "store_md") == 0 || - strcmp(o->arg, "store_md") == 0) - o->opt = CRAM_OPT_STORE_MD, o->val.i = atoi(val); - - else if (strcmp(o->arg, "store_nm") == 0 || - strcmp(o->arg, "store_nm") == 0) - o->opt = CRAM_OPT_STORE_NM, o->val.i = atoi(val); - - else if (strcmp(o->arg, "block_size") == 0 || - strcmp(o->arg, "BLOCK_SIZE") == 0) - o->opt = HTS_OPT_BLOCK_SIZE, o->val.i = strtol(val, NULL, 0); - - else if (strcmp(o->arg, "level") == 0 || - strcmp(o->arg, "LEVEL") == 0) - o->opt = HTS_OPT_COMPRESSION_LEVEL, o->val.i = strtol(val, NULL, 0); - - else if (strcmp(o->arg, "filter") == 0 || - strcmp(o->arg, "FILTER") == 0) - o->opt = HTS_OPT_FILTER, o->val.s = val; - - else if (strcmp(o->arg, "fastq_aux") == 0 || - strcmp(o->arg, "FASTQ_AUX") == 0) - o->opt = FASTQ_OPT_AUX, o->val.s = val; - - else if (strcmp(o->arg, "fastq_barcode") == 0 || - strcmp(o->arg, "FASTQ_BARCODE") == 0) - o->opt = FASTQ_OPT_BARCODE, o->val.s = val; - - else if (strcmp(o->arg, "fastq_rnum") == 0 || - strcmp(o->arg, "FASTQ_RNUM") == 0) - o->opt = FASTQ_OPT_RNUM, o->val.i = 1; - - else if (strcmp(o->arg, "fastq_casava") == 0 || - strcmp(o->arg, "FASTQ_CASAVA") == 0) - o->opt = FASTQ_OPT_CASAVA, o->val.i = 1; - - else if (strcmp(o->arg, "fastq_name2") == 0 || - strcmp(o->arg, "FASTQ_NAME2") == 0) - o->opt = FASTQ_OPT_NAME2, o->val.i = 1; - - else { - hts_log_error("Unknown option '%s'", o->arg); - free(o->arg); - free(o); - return -1; - } - - o->next = NULL; - - // Append; assumes small list. - if (*opts) { - t = *opts; - while (t->next) - t = t->next; - t->next = o; - } else { - *opts = o; - } - - return 0; -} - -/* - * Applies an hts_opt option list to a given htsFile. - * - * Returns 0 on success - * -1 on failure - */ -int hts_opt_apply(htsFile *fp, hts_opt *opts) { - hts_opt *last = NULL; - - for (; opts; opts = (last=opts)->next) { - switch (opts->opt) { - case CRAM_OPT_REFERENCE: - if (!(fp->fn_aux = strdup(opts->val.s))) - return -1; - // fall through - case CRAM_OPT_VERSION: - case CRAM_OPT_PREFIX: - case HTS_OPT_FILTER: - case FASTQ_OPT_AUX: - case FASTQ_OPT_BARCODE: - if (hts_set_opt(fp, opts->opt, opts->val.s) != 0) - return -1; - break; - default: - if (hts_set_opt(fp, opts->opt, opts->val.i) != 0) - return -1; - break; - } - } - - return 0; -} - -/* - * Frees an hts_opt list. - */ -void hts_opt_free(hts_opt *opts) { - hts_opt *last = NULL; - while (opts) { - opts = (last=opts)->next; - free(last->arg); - free(last); - } -} - - -/* - * Tokenise options as (key(=value)?,)*(key(=value)?)? - * NB: No provision for ',' appearing in the value! - * Add backslashing rules? - * - * This could be used as part of a general command line option parser or - * as a string concatenated onto the file open mode. - * - * Returns 0 on success - * -1 on failure. - */ -int hts_parse_opt_list(htsFormat *fmt, const char *str) { - while (str && *str) { - const char *str_start; - int len; - char arg[8001]; - - while (*str && *str == ',') - str++; - - for (str_start = str; *str && *str != ','; str++); - len = str - str_start; - - // Produce a nul terminated copy of the option - strncpy(arg, str_start, len < 8000 ? len : 8000); - arg[len < 8000 ? len : 8000] = '\0'; - - if (hts_opt_add((hts_opt **)&fmt->specific, arg) != 0) - return -1; - - if (*str) - str++; - } - - return 0; -} - -/* - * Accepts a string file format (sam, bam, cram, vcf, bam) optionally - * followed by a comma separated list of key=value options and splits - * these up into the fields of htsFormat struct. - * - * format is assumed to be already initialised, either to blank - * "unknown" values or via previous hts_opt_add calls. - * - * Returns 0 on success - * -1 on failure. - */ -int hts_parse_format(htsFormat *format, const char *str) { - char fmt[8]; - const char *cp = scan_keyword(str, ',', fmt, sizeof fmt); - - format->version.minor = 0; // unknown - format->version.major = 0; // unknown - - if (strcmp(fmt, "sam") == 0) { - format->category = sequence_data; - format->format = sam; - format->compression = no_compression; - format->compression_level = 0; - } else if (strcmp(fmt, "sam.gz") == 0) { - format->category = sequence_data; - format->format = sam; - format->compression = bgzf; - format->compression_level = -1; - } else if (strcmp(fmt, "bam") == 0) { - format->category = sequence_data; - format->format = bam; - format->compression = bgzf; - format->compression_level = -1; - } else if (strcmp(fmt, "cram") == 0) { - format->category = sequence_data; - format->format = cram; - format->compression = custom; - format->compression_level = -1; - } else if (strcmp(fmt, "vcf") == 0) { - format->category = variant_data; - format->format = vcf; - format->compression = no_compression; - format->compression_level = 0; - } else if (strcmp(fmt, "bcf") == 0) { - format->category = variant_data; - format->format = bcf; - format->compression = bgzf; - format->compression_level = -1; - } else if (strcmp(fmt, "fastq") == 0 || strcmp(fmt, "fq") == 0) { - format->category = sequence_data; - format->format = fastq_format; - format->compression = no_compression; - format->compression_level = 0; - } else if (strcmp(fmt, "fastq.gz") == 0 || strcmp(fmt, "fq.gz") == 0) { - format->category = sequence_data; - format->format = fastq_format; - format->compression = bgzf; - format->compression_level = 0; - } else if (strcmp(fmt, "fasta") == 0 || strcmp(fmt, "fa") == 0) { - format->category = sequence_data; - format->format = fasta_format; - format->compression = no_compression; - format->compression_level = 0; - } else if (strcmp(fmt, "fasta.gz") == 0 || strcmp(fmt, "fa.gz") == 0) { - format->category = sequence_data; - format->format = fasta_format; - format->compression = bgzf; - format->compression_level = 0; - } else { - return -1; - } - - return hts_parse_opt_list(format, cp); -} - - -/* - * Tokenise options as (key(=value)?,)*(key(=value)?)? - * NB: No provision for ',' appearing in the value! - * Add backslashing rules? - * - * This could be used as part of a general command line option parser or - * as a string concatenated onto the file open mode. - * - * Returns 0 on success - * -1 on failure. - */ -static int hts_process_opts(htsFile *fp, const char *opts) { - htsFormat fmt; - - fmt.specific = NULL; - if (hts_parse_opt_list(&fmt, opts) != 0) - return -1; - - if (hts_opt_apply(fp, fmt.specific) != 0) { - hts_opt_free(fmt.specific); - return -1; - } - - hts_opt_free(fmt.specific); - - return 0; -} - -static int hts_crypt4gh_redirect(const char *fn, const char *mode, - hFILE **hfile_ptr, htsFile *fp) { - hFILE *hfile1 = *hfile_ptr; - hFILE *hfile2 = NULL; - char fn_buf[512], *fn2 = fn_buf; - char mode2[102]; // Size set by sizeof(simple_mode) in hts_hopen() - const char *prefix = "crypt4gh:"; - size_t fn2_len = strlen(prefix) + strlen(fn) + 1; - int ret = -1; - - if (fn2_len > sizeof(fn_buf)) { - if (fn2_len >= INT_MAX) // Silence gcc format-truncation warning - return -1; - fn2 = malloc(fn2_len); - if (!fn2) return -1; - } - - // Reopen fn using the crypt4gh plug-in (if available) - snprintf(fn2, fn2_len, "%s%s", prefix, fn); - snprintf(mode2, sizeof(mode2), "%s%s", mode, strchr(mode, ':') ? "" : ":"); - hfile2 = hopen(fn2, mode2, "parent", hfile1, NULL); - if (hfile2) { - // Replace original hfile with the new one. The original is now - // enclosed within hfile2 - *hfile_ptr = hfile2; - ret = 0; - } - - if (fn2 != fn_buf) - free(fn2); - return ret; -} - -htsFile *hts_hopen(hFILE *hfile, const char *fn, const char *mode) -{ - hFILE *hfile_orig = hfile; - htsFile *fp = (htsFile*)calloc(1, sizeof(htsFile)); - char simple_mode[101], *cp, *opts; - simple_mode[100] = '\0'; - - if (fp == NULL) goto error; - - fp->fn = strdup(fn); - fp->is_be = ed_is_big(); - - // Split mode into simple_mode,opts strings - if ((cp = strchr(mode, ','))) { - strncpy(simple_mode, mode, cp-mode <= 100 ? cp-mode : 100); - simple_mode[cp-mode] = '\0'; - opts = cp+1; - } else { - strncpy(simple_mode, mode, 100); - opts = NULL; - } - - if (strchr(simple_mode, 'r')) { - const int max_loops = 5; // Should be plenty - int loops = 0; - if (hts_detect_format2(hfile, fn, &fp->format) < 0) goto error; - - // Deal with formats that re-direct an underlying file via a plug-in. - // Loops as we may have crypt4gh served via htsget, or - // crypt4gh-in-crypt4gh. - while (fp->format.format == htsget || - fp->format.format == hts_crypt4gh_format) { - // Ensure we don't get stuck in an endless redirect loop - if (++loops > max_loops) { - errno = ELOOP; - goto error; - } - - if (fp->format.format == htsget) { - hFILE *hfile2 = hopen_htsget_redirect(hfile, simple_mode); - if (hfile2 == NULL) goto error; - - hfile = hfile2; - } - else if (fp->format.format == hts_crypt4gh_format) { - if (hts_crypt4gh_redirect(fn, simple_mode, &hfile, fp) < 0) - goto error; - } - - // Re-detect format against the result of the redirection - if (hts_detect_format2(hfile, fn, &fp->format) < 0) goto error; - } - } - else if (strchr(simple_mode, 'w') || strchr(simple_mode, 'a')) { - htsFormat *fmt = &fp->format; - fp->is_write = 1; - - if (strchr(simple_mode, 'b')) fmt->format = binary_format; - else if (strchr(simple_mode, 'c')) fmt->format = cram; - else if (strchr(simple_mode, 'f')) fmt->format = fastq_format; - else if (strchr(simple_mode, 'F')) fmt->format = fasta_format; - else fmt->format = text_format; - - if (strchr(simple_mode, 'z')) fmt->compression = bgzf; - else if (strchr(simple_mode, 'g')) fmt->compression = gzip; - else if (strchr(simple_mode, 'u')) fmt->compression = no_compression; - else { - // No compression mode specified, set to the default for the format - switch (fmt->format) { - case binary_format: fmt->compression = bgzf; break; - case cram: fmt->compression = custom; break; - case fastq_format: fmt->compression = no_compression; break; - case fasta_format: fmt->compression = no_compression; break; - case text_format: fmt->compression = no_compression; break; - default: abort(); - } - } - - // Fill in category (if determinable; e.g. 'b' could be BAM or BCF) - fmt->category = format_category(fmt->format); - - fmt->version.major = fmt->version.minor = -1; - fmt->compression_level = -1; - fmt->specific = NULL; - } - else { errno = EINVAL; goto error; } - - switch (fp->format.format) { - case binary_format: - case bam: - case bcf: - fp->fp.bgzf = bgzf_hopen(hfile, simple_mode); - if (fp->fp.bgzf == NULL) goto error; - fp->is_bin = fp->is_bgzf = 1; - break; - - case cram: - fp->fp.cram = cram_dopen(hfile, fn, simple_mode); - if (fp->fp.cram == NULL) goto error; - if (!fp->is_write) - cram_set_option(fp->fp.cram, CRAM_OPT_DECODE_MD, -1); // auto - fp->is_cram = 1; - break; - - case empty_format: - case text_format: - case bed: - case fasta_format: - case fastq_format: - case sam: - case vcf: - if (fp->format.compression != no_compression) { - fp->fp.bgzf = bgzf_hopen(hfile, simple_mode); - if (fp->fp.bgzf == NULL) goto error; - fp->is_bgzf = 1; - } - else - fp->fp.hfile = hfile; - break; - - default: - errno = EFTYPE; - goto error; - } - - if (opts) - hts_process_opts(fp, opts); - - // If redirecting, close the original hFILE now (pedantically we would - // instead close it in hts_close(), but this a simplifying optimisation) - if (hfile != hfile_orig) hclose_abruptly(hfile_orig); - - return fp; - -error: - hts_log_error("Failed to open file %s", fn); - - // If redirecting, close the failed redirection hFILE that we have opened - if (hfile != hfile_orig) hclose_abruptly(hfile); - - if (fp) { - free(fp->fn); - free(fp->fn_aux); - free(fp); - } - return NULL; -} - -int hts_close(htsFile *fp) -{ - int ret = 0, save; - - switch (fp->format.format) { - case binary_format: - case bam: - case bcf: - ret = bgzf_close(fp->fp.bgzf); - break; - - case cram: - if (!fp->is_write) { - switch (cram_eof(fp->fp.cram)) { - case 2: - hts_log_warning("EOF marker is absent. The input is probably truncated"); - break; - case 0: /* not at EOF, but may not have wanted all seqs */ - default: /* case 1, expected EOF */ - break; - } - } - ret = cram_close(fp->fp.cram); - break; - - case empty_format: - case text_format: - case bed: - case fasta_format: - case fastq_format: - case sam: - case vcf: - if (fp->format.format == sam) - ret = sam_state_destroy(fp); - else if (fp->format.format == fastq_format || - fp->format.format == fasta_format) - fastq_state_destroy(fp); - - if (fp->format.compression != no_compression) - ret |= bgzf_close(fp->fp.bgzf); - else - ret |= hclose(fp->fp.hfile); - break; - - default: - ret = -1; - break; - } - - save = errno; - sam_hdr_destroy(fp->bam_header); - hts_idx_destroy(fp->idx); - hts_filter_free(fp->filter); - free(fp->fn); - free(fp->fn_aux); - free(fp->line.s); - free(fp); - errno = save; - return ret; -} - -int hts_flush(htsFile *fp) -{ - if (fp == NULL) return 0; - - switch (fp->format.format) { - case binary_format: - case bam: - case bcf: - return bgzf_flush(fp->fp.bgzf); - - case cram: - return cram_flush(fp->fp.cram); - - case empty_format: - case text_format: - case bed: - case fasta_format: - case fastq_format: - case sam: - case vcf: - if (fp->format.compression != no_compression) - return bgzf_flush(fp->fp.bgzf); - else - return hflush(fp->fp.hfile); - - default: - break; - } - - return 0; -} - -const htsFormat *hts_get_format(htsFile *fp) -{ - return fp? &fp->format : NULL; -} - -const char *hts_format_file_extension(const htsFormat *format) { - if (!format) - return "?"; - - switch (format->format) { - case sam: return "sam"; - case bam: return "bam"; - case bai: return "bai"; - case cram: return "cram"; - case crai: return "crai"; - case vcf: return "vcf"; - case bcf: return "bcf"; - case csi: return "csi"; - case fai_format: return "fai"; - case fqi_format: return "fqi"; - case gzi: return "gzi"; - case tbi: return "tbi"; - case bed: return "bed"; - case d4_format: return "d4"; - case fasta_format: return "fa"; - case fastq_format: return "fq"; - default: return "?"; - } -} - -static hFILE *hts_hfile(htsFile *fp) { - switch (fp->format.format) { - case binary_format:// fall through - case bcf: // fall through - case bam: return bgzf_hfile(fp->fp.bgzf); - case cram: return cram_hfile(fp->fp.cram); - case text_format: return fp->fp.hfile; - case vcf: // fall through - case fastq_format: // fall through - case fasta_format: // fall through - case sam: return fp->format.compression != no_compression - ? bgzf_hfile(fp->fp.bgzf) - : fp->fp.hfile; - default: return NULL; - } -} - -int hts_set_opt(htsFile *fp, enum hts_fmt_option opt, ...) { - int r; - va_list args; - - switch (opt) { - case HTS_OPT_NTHREADS: { - va_start(args, opt); - int nthreads = va_arg(args, int); - va_end(args); - return hts_set_threads(fp, nthreads); - } - - case HTS_OPT_BLOCK_SIZE: { - hFILE *hf = hts_hfile(fp); - - if (hf) { - va_start(args, opt); - if (hfile_set_blksize(hf, va_arg(args, int)) != 0) - hts_log_warning("Failed to change block size"); - va_end(args); - } - else { - // To do - implement for vcf/bcf. - hts_log_warning("Cannot change block size for this format"); - } - - return 0; - } - - case HTS_OPT_THREAD_POOL: { - va_start(args, opt); - htsThreadPool *p = va_arg(args, htsThreadPool *); - va_end(args); - return hts_set_thread_pool(fp, p); - } - - case HTS_OPT_CACHE_SIZE: { - va_start(args, opt); - int cache_size = va_arg(args, int); - va_end(args); - hts_set_cache_size(fp, cache_size); - return 0; - } - - case FASTQ_OPT_CASAVA: - case FASTQ_OPT_RNUM: - case FASTQ_OPT_NAME2: - if (fp->format.format == fastq_format || - fp->format.format == fasta_format) - return fastq_state_set(fp, opt); - return 0; - - case FASTQ_OPT_AUX: - if (fp->format.format == fastq_format || - fp->format.format == fasta_format) { - va_start(args, opt); - char *list = va_arg(args, char *); - va_end(args); - return fastq_state_set(fp, opt, list); - } - return 0; - - case FASTQ_OPT_BARCODE: - if (fp->format.format == fastq_format || - fp->format.format == fasta_format) { - va_start(args, opt); - char *bc = va_arg(args, char *); - va_end(args); - return fastq_state_set(fp, opt, bc); - } - return 0; - - // Options below here flow through to cram_set_voption - case HTS_OPT_COMPRESSION_LEVEL: { - va_start(args, opt); - int level = va_arg(args, int); - va_end(args); - if (fp->is_bgzf) - fp->fp.bgzf->compress_level = level; - else if (fp->format.format == cram) - return cram_set_option(fp->fp.cram, opt, level); - return 0; - } - - case HTS_OPT_FILTER: { - va_start(args, opt); - char *expr = va_arg(args, char *); - va_end(args); - return hts_set_filter_expression(fp, expr); - } - - case HTS_OPT_PROFILE: { - va_start(args, opt); - enum hts_profile_option prof = va_arg(args, int); - va_end(args); - if (fp->is_bgzf) { - switch (prof) { -#ifdef HAVE_LIBDEFLATE - case HTS_PROFILE_FAST: fp->fp.bgzf->compress_level = 2; break; - case HTS_PROFILE_NORMAL: fp->fp.bgzf->compress_level = -1; break; - case HTS_PROFILE_SMALL: fp->fp.bgzf->compress_level = 10; break; - case HTS_PROFILE_ARCHIVE: fp->fp.bgzf->compress_level = 12; break; -#else - case HTS_PROFILE_FAST: fp->fp.bgzf->compress_level = 1; break; - case HTS_PROFILE_NORMAL: fp->fp.bgzf->compress_level = -1; break; - case HTS_PROFILE_SMALL: fp->fp.bgzf->compress_level = 8; break; - case HTS_PROFILE_ARCHIVE: fp->fp.bgzf->compress_level = 9; break; -#endif - } - } // else CRAM manages this in its own way - break; - } - - default: - break; - } - - if (fp->format.format != cram) - return 0; - - va_start(args, opt); - r = cram_set_voption(fp->fp.cram, opt, args); - va_end(args); - - return r; -} - -BGZF *hts_get_bgzfp(htsFile *fp); - -int hts_set_threads(htsFile *fp, int n) -{ - if (fp->format.format == sam) { - return sam_set_threads(fp, n); - } else if (fp->format.compression == bgzf) { - return bgzf_mt(hts_get_bgzfp(fp), n, 256/*unused*/); - } else if (fp->format.format == cram) { - return hts_set_opt(fp, CRAM_OPT_NTHREADS, n); - } - else return 0; -} - -int hts_set_thread_pool(htsFile *fp, htsThreadPool *p) { - if (fp->format.format == sam || fp->format.format == text_format) { - return sam_set_thread_pool(fp, p); - } else if (fp->format.compression == bgzf) { - return bgzf_thread_pool(hts_get_bgzfp(fp), p->pool, p->qsize); - } else if (fp->format.format == cram) { - return hts_set_opt(fp, CRAM_OPT_THREAD_POOL, p); - } - else return 0; -} - -void hts_set_cache_size(htsFile *fp, int n) -{ - if (fp->format.compression == bgzf) - bgzf_set_cache_size(hts_get_bgzfp(fp), n); -} - -int hts_set_fai_filename(htsFile *fp, const char *fn_aux) -{ - free(fp->fn_aux); - if (fn_aux) { - fp->fn_aux = strdup(fn_aux); - if (fp->fn_aux == NULL) return -1; - } - else fp->fn_aux = NULL; - - if (fp->format.format == cram) - if (cram_set_option(fp->fp.cram, CRAM_OPT_REFERENCE, fp->fn_aux)) - return -1; - - return 0; -} - -int hts_set_filter_expression(htsFile *fp, const char *expr) -{ - if (fp->filter) - hts_filter_free(fp->filter); - - if (!expr) - return 0; - - return (fp->filter = hts_filter_init(expr)) - ? 0 : -1; -} - -hFILE *hts_open_tmpfile(const char *fname, const char *mode, kstring_t *tmpname) -{ - int pid = (int) getpid(); - unsigned ptr = (uintptr_t) tmpname; - int n = 0; - hFILE *fp = NULL; - - do { - // Attempt to further uniquify the temporary filename - unsigned t = ((unsigned) time(NULL)) ^ ((unsigned) clock()) ^ ptr; - n++; - - ks_clear(tmpname); - if (ksprintf(tmpname, "%s.tmp_%d_%d_%u", fname, pid, n, t) < 0) break; - - fp = hopen(tmpname->s, mode); - } while (fp == NULL && errno == EEXIST && n < 100); - - return fp; -} - -// For VCF/BCF backward sweeper. Not exposing these functions because their -// future is uncertain. Things will probably have to change with hFILE... -BGZF *hts_get_bgzfp(htsFile *fp) -{ - if (fp->is_bgzf) - return fp->fp.bgzf; - else - return NULL; -} -int hts_useek(htsFile *fp, off_t uoffset, int where) -{ - if (fp->is_bgzf) - return bgzf_useek(fp->fp.bgzf, uoffset, where); - else - return (hseek(fp->fp.hfile, uoffset, SEEK_SET) >= 0)? 0 : -1; -} -off_t hts_utell(htsFile *fp) -{ - if (fp->is_bgzf) - return bgzf_utell(fp->fp.bgzf); - else - return htell(fp->fp.hfile); -} - -int hts_getline(htsFile *fp, int delimiter, kstring_t *str) -{ - int ret; - if (! (delimiter == KS_SEP_LINE || delimiter == '\n')) { - hts_log_error("Unexpected delimiter %d", delimiter); - abort(); - } - - switch (fp->format.compression) { - case no_compression: - str->l = 0; - ret = kgetline2(str, (kgets_func2 *) hgetln, fp->fp.hfile); - if (ret >= 0) ret = (str->l <= INT_MAX)? (int) str->l : INT_MAX; - else if (herrno(fp->fp.hfile)) ret = -2, errno = herrno(fp->fp.hfile); - else ret = -1; - break; - - case gzip: - case bgzf: - ret = bgzf_getline(fp->fp.bgzf, '\n', str); - break; - - default: - abort(); - } - - ++fp->lineno; - return ret; -} - -char **hts_readlist(const char *string, int is_file, int *_n) -{ - unsigned int m = 0, n = 0; - char **s = 0, **s_new; - if ( is_file ) - { - BGZF *fp = bgzf_open(string, "r"); - if ( !fp ) return NULL; - - kstring_t str; - int ret; - str.s = 0; str.l = str.m = 0; - while ((ret = bgzf_getline(fp, '\n', &str)) >= 0) - { - if (str.l == 0) continue; - if (hts_resize(char*, n + 1, &m, &s, 0) < 0) - goto err; - s[n] = strdup(str.s); - if (!s[n]) - goto err; - n++; - } - if (ret < -1) // Read error - goto err; - bgzf_close(fp); - free(str.s); - } - else - { - const char *q = string, *p = string; - while ( 1 ) - { - if (*p == ',' || *p == 0) - { - if (hts_resize(char*, n + 1, &m, &s, 0) < 0) - goto err; - s[n] = (char*)calloc(p - q + 1, 1); - if (!s[n]) - goto err; - strncpy(s[n++], q, p - q); - q = p + 1; - } - if ( !*p ) break; - p++; - } - } - // Try to shrink s to the minimum size needed - s_new = (char**)realloc(s, n * sizeof(char*)); - if (!s_new) - goto err; - - s = s_new; - assert(n < INT_MAX); // hts_resize() should ensure this - *_n = n; - return s; - - err: - for (m = 0; m < n; m++) - free(s[m]); - free(s); - return NULL; -} - -char **hts_readlines(const char *fn, int *_n) -{ - unsigned int m = 0, n = 0; - char **s = 0, **s_new; - BGZF *fp = bgzf_open(fn, "r"); - if ( fp ) { // read from file - kstring_t str; - int ret; - str.s = 0; str.l = str.m = 0; - while ((ret = bgzf_getline(fp, '\n', &str)) >= 0) { - if (str.l == 0) continue; - if (hts_resize(char *, n + 1, &m, &s, 0) < 0) - goto err; - s[n] = strdup(str.s); - if (!s[n]) - goto err; - n++; - } - if (ret < -1) // Read error - goto err; - bgzf_close(fp); - free(str.s); - } else if (*fn == ':') { // read from string - const char *q, *p; - for (q = p = fn + 1;; ++p) - if (*p == ',' || *p == 0) { - if (hts_resize(char *, n + 1, &m, &s, 0) < 0) - goto err; - s[n] = (char*)calloc(p - q + 1, 1); - if (!s[n]) - goto err; - strncpy(s[n++], q, p - q); - q = p + 1; - if (*p == 0) break; - } - } else return 0; - // Try to shrink s to the minimum size needed - s_new = (char**)realloc(s, n * sizeof(char*)); - if (!s_new) - goto err; - - s = s_new; - assert(n < INT_MAX); // hts_resize() should ensure this - *_n = n; - return s; - - err: - for (m = 0; m < n; m++) - free(s[m]); - free(s); - return NULL; -} - -// DEPRECATED: To be removed in a future HTSlib release -int hts_file_type(const char *fname) -{ - int len = strlen(fname); - if ( !strcasecmp(".vcf.gz",fname+len-7) ) return FT_VCF_GZ; - if ( !strcasecmp(".vcf",fname+len-4) ) return FT_VCF; - if ( !strcasecmp(".bcf",fname+len-4) ) return FT_BCF_GZ; - if ( !strcmp("-",fname) ) return FT_STDIN; - - hFILE *f = hopen(fname, "r"); - if (f == NULL) return 0; - - htsFormat fmt; - if (hts_detect_format2(f, fname, &fmt) < 0) { hclose_abruptly(f); return 0; } - if (hclose(f) < 0) return 0; - - switch (fmt.format) { - case vcf: return (fmt.compression == no_compression)? FT_VCF : FT_VCF_GZ; - case bcf: return (fmt.compression == no_compression)? FT_BCF : FT_BCF_GZ; - default: return 0; - } -} - -int hts_check_EOF(htsFile *fp) -{ - if (fp->format.compression == bgzf) - return bgzf_check_EOF(hts_get_bgzfp(fp)); - else if (fp->format.format == cram) - return cram_check_EOF(fp->fp.cram); - else - return 3; -} - - -/**************** - *** Indexing *** - ****************/ - -#define HTS_MIN_MARKER_DIST 0x10000 - -// Finds the special meta bin -// ((1<<(3 * n_lvls + 3)) - 1) / 7 + 1 -#define META_BIN(idx) ((idx)->n_bins + 1) - -#define pair64_lt(a,b) ((a).u < (b).u) -#define pair64max_lt(a,b) ((a).u < (b).u || \ - ((a).u == (b).u && (a).max < (b).max)) - -KSORT_INIT_STATIC(_off, hts_pair64_t, pair64_lt) -KSORT_INIT_STATIC(_off_max, hts_pair64_max_t, pair64max_lt) - -typedef struct { - int32_t m, n; - uint64_t loff; - hts_pair64_t *list; -} bins_t; - -KHASH_MAP_INIT_INT(bin, bins_t) -typedef khash_t(bin) bidx_t; - -typedef struct { - hts_pos_t n, m; - uint64_t *offset; -} lidx_t; - -struct hts_idx_t { - int fmt, min_shift, n_lvls, n_bins; - uint32_t l_meta; - int32_t n, m; - uint64_t n_no_coor; - bidx_t **bidx; - lidx_t *lidx; - uint8_t *meta; // MUST have a terminating NUL on the end - int tbi_n, last_tbi_tid; - struct { - uint32_t last_bin, save_bin; - hts_pos_t last_coor; - int last_tid, save_tid, finished; - uint64_t last_off, save_off; - uint64_t off_beg, off_end; - uint64_t n_mapped, n_unmapped; - } z; // keep internal states -}; - -static char * idx_format_name(int fmt) { - switch (fmt) { - case HTS_FMT_CSI: return "csi"; - case HTS_FMT_BAI: return "bai"; - case HTS_FMT_TBI: return "tbi"; - case HTS_FMT_CRAI: return "crai"; - default: return "unknown"; - } -} - -#ifdef DEBUG_INDEX -static void idx_dump(const hts_idx_t *idx) { - int i; - int64_t j; - - if (!idx) fprintf(stderr, "Null index\n"); - - fprintf(stderr, "format='%s', min_shift=%d, n_lvls=%d, n_bins=%d, l_meta=%u ", - idx_format_name(idx->fmt), idx->min_shift, idx->n_lvls, idx->n_bins, idx->l_meta); - fprintf(stderr, "n=%d, m=%d, n_no_coor=%"PRIu64"\n", idx->n, idx->m, idx->n_no_coor); - for (i = 0; i < idx->n; i++) { - bidx_t *bidx = idx->bidx[i]; - lidx_t *lidx = &idx->lidx[i]; - if (bidx) { - fprintf(stderr, "======== BIN Index - tid=%d, n_buckets=%d, size=%d\n", i, bidx->n_buckets, bidx->size); - int b; - for (b = 0; b < META_BIN(idx); b++) { - khint_t k; - if ((k = kh_get(bin, bidx, b)) != kh_end(bidx)) { - bins_t *entries = &kh_value(bidx, k); - int l = hts_bin_level(b); - int64_t bin_width = 1LL << ((idx->n_lvls - l) * 3 + idx->min_shift); - fprintf(stderr, "\tbin=%d, level=%d, parent=%d, n_chunks=%d, loff=%"PRIu64", interval=[%"PRId64" - %"PRId64"]\n", - b, l, hts_bin_parent(b), entries->n, entries->loff, (b-hts_bin_first(l))*bin_width+1, (b+1-hts_bin_first(l))*bin_width); - for (j = 0; j < entries->n; j++) - fprintf(stderr, "\t\tchunk=%"PRId64", u=%"PRIu64", v=%"PRIu64"\n", j, entries->list[j].u, entries->list[j].v); - } - } - } - if (lidx) { - fprintf(stderr, "======== LINEAR Index - tid=%d, n_values=%"PRId64"\n", i, lidx->n); - for (j = 0; j < lidx->n; j++) { - fprintf(stderr, "\t\tentry=%"PRId64", offset=%"PRIu64", interval=[%"PRId64" - %"PRId64"]\n", - j, lidx->offset[j], j*(1<min_shift)+1, (j+1)*(1<min_shift)); - } - } - } -} -#endif - -static inline int insert_to_b(bidx_t *b, int bin, uint64_t beg, uint64_t end) -{ - khint_t k; - bins_t *l; - int absent; - k = kh_put(bin, b, bin, &absent); - if (absent < 0) return -1; // Out of memory - l = &kh_value(b, k); - if (absent) { - l->m = 1; l->n = 0; - l->list = (hts_pair64_t*)calloc(l->m, sizeof(hts_pair64_t)); - if (!l->list) { - kh_del(bin, b, k); - return -1; - } - } else if (l->n == l->m) { - uint32_t new_m = l->m ? l->m << 1 : 1; - hts_pair64_t *new_list = realloc(l->list, new_m * sizeof(hts_pair64_t)); - if (!new_list) return -1; - l->list = new_list; - l->m = new_m; - } - l->list[l->n].u = beg; - l->list[l->n++].v = end; - return 0; -} - -static inline int insert_to_l(lidx_t *l, int64_t _beg, int64_t _end, uint64_t offset, int min_shift) -{ - int i; - hts_pos_t beg, end; - beg = _beg >> min_shift; - end = (_end - 1) >> min_shift; - if (l->m < end + 1) { - size_t new_m = l->m * 2 > end + 1 ? l->m * 2 : end + 1; - uint64_t *new_offset; - - new_offset = (uint64_t*)realloc(l->offset, new_m * sizeof(uint64_t)); - if (!new_offset) return -1; - - // fill unused memory with (uint64_t)-1 - memset(new_offset + l->m, 0xff, sizeof(uint64_t) * (new_m - l->m)); - l->m = new_m; - l->offset = new_offset; - } - for (i = beg; i <= end; ++i) { - if (l->offset[i] == (uint64_t)-1) l->offset[i] = offset; - } - if (l->n < end + 1) l->n = end + 1; - return 0; -} - -hts_idx_t *hts_idx_init(int n, int fmt, uint64_t offset0, int min_shift, int n_lvls) -{ - hts_idx_t *idx; - idx = (hts_idx_t*)calloc(1, sizeof(hts_idx_t)); - if (idx == NULL) return NULL; - idx->fmt = fmt; - idx->min_shift = min_shift; - idx->n_lvls = n_lvls; - idx->n_bins = ((1<<(3 * n_lvls + 3)) - 1) / 7; - idx->z.save_tid = idx->z.last_tid = -1; - idx->z.save_bin = idx->z.last_bin = 0xffffffffu; - idx->z.save_off = idx->z.last_off = idx->z.off_beg = idx->z.off_end = offset0; - idx->z.last_coor = 0xffffffffu; - if (n) { - idx->n = idx->m = n; - idx->bidx = (bidx_t**)calloc(n, sizeof(bidx_t*)); - if (idx->bidx == NULL) { free(idx); return NULL; } - idx->lidx = (lidx_t*) calloc(n, sizeof(lidx_t)); - if (idx->lidx == NULL) { free(idx->bidx); free(idx); return NULL; } - } - idx->tbi_n = -1; - idx->last_tbi_tid = -1; - return idx; -} - -static void update_loff(hts_idx_t *idx, int i, int free_lidx) -{ - bidx_t *bidx = idx->bidx[i]; - lidx_t *lidx = &idx->lidx[i]; - khint_t k; - int l; - // the last entry is always valid - for (l=lidx->n-2; l >= 0; l--) { - if (lidx->offset[l] == (uint64_t)-1) - lidx->offset[l] = lidx->offset[l+1]; - } - if (bidx == 0) return; - for (k = kh_begin(bidx); k != kh_end(bidx); ++k) // set loff - if (kh_exist(bidx, k)) - { - if ( kh_key(bidx, k) < idx->n_bins ) - { - int bot_bin = hts_bin_bot(kh_key(bidx, k), idx->n_lvls); - // disable linear index if bot_bin out of bounds - kh_val(bidx, k).loff = bot_bin < lidx->n ? lidx->offset[bot_bin] : 0; - } - else - kh_val(bidx, k).loff = 0; - } - if (free_lidx) { - free(lidx->offset); - lidx->m = lidx->n = 0; - lidx->offset = 0; - } -} - -static int compress_binning(hts_idx_t *idx, int i) -{ - bidx_t *bidx = idx->bidx[i]; - khint_t k; - int l, m; - if (bidx == 0) return 0; - // merge a bin to its parent if the bin is too small - for (l = idx->n_lvls; l > 0; --l) { - unsigned start = hts_bin_first(l); - for (k = kh_begin(bidx); k != kh_end(bidx); ++k) { - bins_t *p, *q; - if (!kh_exist(bidx, k) || kh_key(bidx, k) >= idx->n_bins || kh_key(bidx, k) < start) continue; - p = &kh_value(bidx, k); - if (l < idx->n_lvls && p->n > 1) ks_introsort(_off, p->n, p->list); - if ((p->list[p->n - 1].v>>16) - (p->list[0].u>>16) < HTS_MIN_MARKER_DIST) { - khint_t kp; - kp = kh_get(bin, bidx, hts_bin_parent(kh_key(bidx, k))); - if (kp == kh_end(bidx)) continue; - q = &kh_val(bidx, kp); - if (q->n + p->n > q->m) { - uint32_t new_m = q->n + p->n; - hts_pair64_t *new_list; - kroundup32(new_m); - if (new_m > INT32_MAX) return -1; // Limited by index format - new_list = realloc(q->list, new_m * sizeof(*new_list)); - if (!new_list) return -1; - q->m = new_m; - q->list = new_list; - } - memcpy(q->list + q->n, p->list, p->n * sizeof(hts_pair64_t)); - q->n += p->n; - free(p->list); - kh_del(bin, bidx, k); - } - } - } - k = kh_get(bin, bidx, 0); - if (k != kh_end(bidx)) ks_introsort(_off, kh_val(bidx, k).n, kh_val(bidx, k).list); - // merge adjacent chunks that start from the same BGZF block - for (k = kh_begin(bidx); k != kh_end(bidx); ++k) { - bins_t *p; - if (!kh_exist(bidx, k) || kh_key(bidx, k) >= idx->n_bins) continue; - p = &kh_value(bidx, k); - for (l = 1, m = 0; l < p->n; ++l) { - if (p->list[m].v>>16 >= p->list[l].u>>16) { - if (p->list[m].v < p->list[l].v) p->list[m].v = p->list[l].v; - } else p->list[++m] = p->list[l]; - } - p->n = m + 1; - } - return 0; -} - -int hts_idx_finish(hts_idx_t *idx, uint64_t final_offset) -{ - int i, ret = 0; - if (idx == NULL || idx->z.finished) return 0; // do not run this function on an empty index or multiple times - if (idx->z.save_tid >= 0) { - ret |= insert_to_b(idx->bidx[idx->z.save_tid], idx->z.save_bin, idx->z.save_off, final_offset); - ret |= insert_to_b(idx->bidx[idx->z.save_tid], META_BIN(idx), idx->z.off_beg, final_offset); - ret |= insert_to_b(idx->bidx[idx->z.save_tid], META_BIN(idx), idx->z.n_mapped, idx->z.n_unmapped); - } - for (i = 0; i < idx->n; ++i) { - update_loff(idx, i, (idx->fmt == HTS_FMT_CSI)); - ret |= compress_binning(idx, i); - } - idx->z.finished = 1; - - return ret; -} - -int hts_idx_check_range(hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end) -{ - int64_t maxpos = (int64_t) 1 << (idx->min_shift + idx->n_lvls * 3); - if (tid < 0 || (beg <= maxpos && end <= maxpos)) - return 0; - - if (idx->fmt == HTS_FMT_CSI) { - hts_log_error("Region %"PRIhts_pos"..%"PRIhts_pos" " - "cannot be stored in a csi index with these parameters. " - "Please use a larger min_shift or depth", - beg, end); - } else { - hts_log_error("Region %"PRIhts_pos"..%"PRIhts_pos - " cannot be stored in a %s index. Try using a csi index", - beg, end, idx_format_name(idx->fmt)); - } - errno = ERANGE; - return -1; -} - -int hts_idx_push(hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end, uint64_t offset, int is_mapped) -{ - int bin; - if (tid<0) beg = -1, end = 0; - if (hts_idx_check_range(idx, tid, beg, end) < 0) - return -1; - if (tid >= idx->m) { // enlarge the index - uint32_t new_m = idx->m * 2 > tid + 1 ? idx->m * 2 : tid + 1; - bidx_t **new_bidx; - lidx_t *new_lidx; - - new_bidx = (bidx_t**)realloc(idx->bidx, new_m * sizeof(bidx_t*)); - if (!new_bidx) return -1; - idx->bidx = new_bidx; - - new_lidx = (lidx_t*) realloc(idx->lidx, new_m * sizeof(lidx_t)); - if (!new_lidx) return -1; - idx->lidx = new_lidx; - - memset(&idx->bidx[idx->m], 0, (new_m - idx->m) * sizeof(bidx_t*)); - memset(&idx->lidx[idx->m], 0, (new_m - idx->m) * sizeof(lidx_t)); - idx->m = new_m; - } - if (idx->n < tid + 1) idx->n = tid + 1; - if (idx->z.finished) return 0; - if (idx->z.last_tid != tid || (idx->z.last_tid >= 0 && tid < 0)) { // change of chromosome - if ( tid>=0 && idx->n_no_coor ) - { - hts_log_error("NO_COOR reads not in a single block at the end %d %d", tid, idx->z.last_tid); - return -1; - } - if (tid>=0 && idx->bidx[tid] != 0) - { - hts_log_error("Chromosome blocks not continuous"); - return -1; - } - idx->z.last_tid = tid; - idx->z.last_bin = 0xffffffffu; - } else if (tid >= 0 && idx->z.last_coor > beg) { // test if positions are out of order - hts_log_error("Unsorted positions on sequence #%d: %"PRIhts_pos" followed by %"PRIhts_pos, tid+1, idx->z.last_coor+1, beg+1); - return -1; - } - if (end < beg) { - // Malformed ranges are errors. (Empty ranges (beg==end) are unusual but acceptable.) - hts_log_error("Invalid record on sequence #%d: end %"PRId64" < begin %"PRId64, tid+1, end, beg+1); - return -1; - } - if ( tid>=0 ) - { - if (idx->bidx[tid] == 0) idx->bidx[tid] = kh_init(bin); - // shoehorn [-1,0) (VCF POS=0) into the leftmost bottom-level bin - if (beg < 0) beg = 0; - if (end <= 0) end = 1; - // idx->z.last_off points to the start of the current record - if (insert_to_l(&idx->lidx[tid], beg, end, - idx->z.last_off, idx->min_shift) < 0) return -1; - } - else idx->n_no_coor++; - bin = hts_reg2bin(beg, end, idx->min_shift, idx->n_lvls); - if ((int)idx->z.last_bin != bin) { // then possibly write the binning index - if (idx->z.save_bin != 0xffffffffu) { // save_bin==0xffffffffu only happens to the first record - if (insert_to_b(idx->bidx[idx->z.save_tid], idx->z.save_bin, - idx->z.save_off, idx->z.last_off) < 0) return -1; - } - if (idx->z.last_bin == 0xffffffffu && idx->z.save_bin != 0xffffffffu) { // change of chr; keep meta information - idx->z.off_end = idx->z.last_off; - if (insert_to_b(idx->bidx[idx->z.save_tid], META_BIN(idx), - idx->z.off_beg, idx->z.off_end) < 0) return -1; - if (insert_to_b(idx->bidx[idx->z.save_tid], META_BIN(idx), - idx->z.n_mapped, idx->z.n_unmapped) < 0) return -1; - idx->z.n_mapped = idx->z.n_unmapped = 0; - idx->z.off_beg = idx->z.off_end; - } - idx->z.save_off = idx->z.last_off; - idx->z.save_bin = idx->z.last_bin = bin; - idx->z.save_tid = tid; - } - if (is_mapped) ++idx->z.n_mapped; - else ++idx->z.n_unmapped; - idx->z.last_off = offset; - idx->z.last_coor = beg; - return 0; -} - -// Needed for TBI only. Ensure 'tid' with 'name' is in the index meta data. -// idx->meta needs to have been initialised first with an appropriate Tabix -// configuration via hts_idx_set_meta. -// -// NB number of references (first 4 bytes of tabix header) aren't in -// idx->meta, but held in idx->n instead. -int hts_idx_tbi_name(hts_idx_t *idx, int tid, const char *name) { - // Horrid - we have to map incoming tid to a tbi alternative tid. - // This is because TBI counts tids by "covered" refs while everything - // else counts by Nth SQ/contig record in header. - if (tid == idx->last_tbi_tid || tid < 0 || !name) - return idx->tbi_n; - - uint32_t len = strlen(name)+1; - uint8_t *tmp = (uint8_t *)realloc(idx->meta, idx->l_meta + len); - if (!tmp) - return -1; - - // Append name - idx->meta = tmp; - strcpy((char *)idx->meta + idx->l_meta, name); - idx->l_meta += len; - - // Update seq length - u32_to_le(le_to_u32(idx->meta+24)+len, idx->meta+24); - - idx->last_tbi_tid = tid; - return ++idx->tbi_n; -} - -// When doing samtools index we have a read_bam / hts_idx_push(bgzf_tell()) -// loop. idx->z.last_off is the previous bzgf_tell location, so we know -// the location the current bam record started at as well as where it ends. -// -// When building an index on the fly via a write_bam / hts_idx_push loop, -// this isn't quite identical as we may amend the virtual coord returned -// by bgzf_tell to the start of a new block if the next bam struct doesn't -// fit. It's essentially the same thing, but for bit-identical indices -// we need to amend the idx->z.last_off when we know we're starting a new -// block. -void hts_idx_amend_last(hts_idx_t *idx, uint64_t offset) -{ - idx->z.last_off = offset; -} - -void hts_idx_destroy(hts_idx_t *idx) -{ - khint_t k; - int i; - if (idx == 0) return; - - // For HTS_FMT_CRAI, idx actually points to a different type -- see sam.c - if (idx->fmt == HTS_FMT_CRAI) { - hts_cram_idx_t *cidx = (hts_cram_idx_t *) idx; - cram_index_free(cidx->cram); - free(cidx); - return; - } - - for (i = 0; i < idx->m; ++i) { - bidx_t *bidx = idx->bidx[i]; - free(idx->lidx[i].offset); - if (bidx == 0) continue; - for (k = kh_begin(bidx); k != kh_end(bidx); ++k) - if (kh_exist(bidx, k)) - free(kh_value(bidx, k).list); - kh_destroy(bin, bidx); - } - free(idx->bidx); free(idx->lidx); free(idx->meta); - free(idx); -} - -int hts_idx_fmt(hts_idx_t *idx) { - return idx->fmt; -} - -// The optimizer eliminates these ed_is_big() calls; still it would be good to -// TODO Determine endianness at configure- or compile-time - -static inline ssize_t HTS_RESULT_USED idx_write_int32(BGZF *fp, int32_t x) -{ - if (ed_is_big()) x = ed_swap_4(x); - return bgzf_write(fp, &x, sizeof x); -} - -static inline ssize_t HTS_RESULT_USED idx_write_uint32(BGZF *fp, uint32_t x) -{ - if (ed_is_big()) x = ed_swap_4(x); - return bgzf_write(fp, &x, sizeof x); -} - -static inline ssize_t HTS_RESULT_USED idx_write_uint64(BGZF *fp, uint64_t x) -{ - if (ed_is_big()) x = ed_swap_8(x); - return bgzf_write(fp, &x, sizeof x); -} - -static inline void swap_bins(bins_t *p) -{ - int i; - for (i = 0; i < p->n; ++i) { - ed_swap_8p(&p->list[i].u); - ed_swap_8p(&p->list[i].v); - } -} - -static int idx_save_core(const hts_idx_t *idx, BGZF *fp, int fmt) -{ - int32_t i, j; - - #define check(ret) if ((ret) < 0) return -1 - - // VCF TBI/CSI only writes IDs for non-empty bins (ie covered references) - // - // NOTE: CSI meta is undefined in spec, so this code has an assumption - // that we're only using it for Tabix data. - int nids = idx->n; - if (idx->meta && idx->l_meta >= 4 && le_to_u32(idx->meta) == TBX_VCF) { - for (i = nids = 0; i < idx->n; ++i) { - if (idx->bidx[i]) - nids++; - } - } - check(idx_write_int32(fp, nids)); - if (fmt == HTS_FMT_TBI && idx->l_meta) - check(bgzf_write(fp, idx->meta, idx->l_meta)); - - for (i = 0; i < idx->n; ++i) { - khint_t k; - bidx_t *bidx = idx->bidx[i]; - lidx_t *lidx = &idx->lidx[i]; - - // write binning index - if (nids == idx->n || bidx) - check(idx_write_int32(fp, bidx? kh_size(bidx) : 0)); - if (bidx) - for (k = kh_begin(bidx); k != kh_end(bidx); ++k) - if (kh_exist(bidx, k)) { - bins_t *p = &kh_value(bidx, k); - check(idx_write_uint32(fp, kh_key(bidx, k))); - if (fmt == HTS_FMT_CSI) check(idx_write_uint64(fp, p->loff)); - //int j;for(j=0;jn;++j)fprintf(stderr,"%d,%llx,%d,%llx:%llx\n",kh_key(bidx,k),kh_val(bidx, k).loff,j,p->list[j].u,p->list[j].v); - check(idx_write_int32(fp, p->n)); - for (j = 0; j < p->n; ++j) { - //fprintf(stderr, "\t%ld\t%ld\n", p->list[j].u, p->list[j].v); - check(idx_write_uint64(fp, p->list[j].u)); - check(idx_write_uint64(fp, p->list[j].v)); - } - } - - // write linear index - if (fmt != HTS_FMT_CSI) { - check(idx_write_int32(fp, lidx->n)); - for (j = 0; j < lidx->n; ++j) - check(idx_write_uint64(fp, lidx->offset[j])); - } - } - - check(idx_write_uint64(fp, idx->n_no_coor)); -#ifdef DEBUG_INDEX - idx_dump(idx); -#endif - - return 0; - #undef check -} - -int hts_idx_save(const hts_idx_t *idx, const char *fn, int fmt) -{ - int ret, save; - if (idx == NULL || fn == NULL) { errno = EINVAL; return -1; } - char *fnidx = (char*)calloc(1, strlen(fn) + 5); - if (fnidx == NULL) return -1; - - strcpy(fnidx, fn); - switch (fmt) { - case HTS_FMT_BAI: strcat(fnidx, ".bai"); break; - case HTS_FMT_CSI: strcat(fnidx, ".csi"); break; - case HTS_FMT_TBI: strcat(fnidx, ".tbi"); break; - default: abort(); - } - - ret = hts_idx_save_as(idx, fn, fnidx, fmt); - save = errno; - free(fnidx); - errno = save; - return ret; -} - -int hts_idx_save_as(const hts_idx_t *idx, const char *fn, const char *fnidx, int fmt) -{ - BGZF *fp; - - #define check(ret) if ((ret) < 0) goto fail - - if (fnidx == NULL) return hts_idx_save(idx, fn, fmt); - - fp = bgzf_open(fnidx, (fmt == HTS_FMT_BAI)? "wu" : "w"); - if (fp == NULL) return -1; - - if (fmt == HTS_FMT_CSI) { - check(bgzf_write(fp, "CSI\1", 4)); - check(idx_write_int32(fp, idx->min_shift)); - check(idx_write_int32(fp, idx->n_lvls)); - check(idx_write_uint32(fp, idx->l_meta)); - if (idx->l_meta) check(bgzf_write(fp, idx->meta, idx->l_meta)); - } else if (fmt == HTS_FMT_TBI) { - check(bgzf_write(fp, "TBI\1", 4)); - } else if (fmt == HTS_FMT_BAI) { - check(bgzf_write(fp, "BAI\1", 4)); - } else abort(); - - check(idx_save_core(idx, fp, fmt)); - - return bgzf_close(fp); - #undef check - -fail: - bgzf_close(fp); - return -1; -} - -static int idx_read_core(hts_idx_t *idx, BGZF *fp, int fmt) -{ - int32_t i, n, is_be; - is_be = ed_is_big(); - if (idx == NULL) return -4; - for (i = 0; i < idx->n; ++i) { - bidx_t *h; - lidx_t *l = &idx->lidx[i]; - uint32_t key; - int j, absent; - bins_t *p; - h = idx->bidx[i] = kh_init(bin); - if (bgzf_read(fp, &n, 4) != 4) return -1; - if (is_be) ed_swap_4p(&n); - if (n < 0) return -3; - for (j = 0; j < n; ++j) { - khint_t k; - if (bgzf_read(fp, &key, 4) != 4) return -1; - if (is_be) ed_swap_4p(&key); - k = kh_put(bin, h, key, &absent); - if (absent < 0) return -2; // No memory - if (absent == 0) return -3; // Duplicate bin number - p = &kh_val(h, k); - if (fmt == HTS_FMT_CSI) { - if (bgzf_read(fp, &p->loff, 8) != 8) return -1; - if (is_be) ed_swap_8p(&p->loff); - } else p->loff = 0; - if (bgzf_read(fp, &p->n, 4) != 4) return -1; - if (is_be) ed_swap_4p(&p->n); - if (p->n < 0) return -3; - if ((size_t) p->n > SIZE_MAX / sizeof(hts_pair64_t)) return -2; - p->m = p->n; - p->list = (hts_pair64_t*)malloc(p->m * sizeof(hts_pair64_t)); - if (p->list == NULL) return -2; - if (bgzf_read(fp, p->list, ((size_t) p->n)<<4) != ((size_t) p->n)<<4) return -1; - if (is_be) swap_bins(p); - } - if (fmt != HTS_FMT_CSI) { // load linear index - int j, k; - uint32_t x; - if (bgzf_read(fp, &x, 4) != 4) return -1; - if (is_be) ed_swap_4p(&x); - l->n = x; - if (l->n < 0) return -3; - if ((size_t) l->n > SIZE_MAX / sizeof(uint64_t)) return -2; - l->m = l->n; - l->offset = (uint64_t*)malloc(l->n * sizeof(uint64_t)); - if (l->offset == NULL) return -2; - if (bgzf_read(fp, l->offset, l->n << 3) != l->n << 3) return -1; - if (is_be) for (j = 0; j < l->n; ++j) ed_swap_8p(&l->offset[j]); - for (k = j = 0; j < l->n && l->offset[j] == 0; k = ++j); // stop at the first non-zero entry - for (j = l->n-1; j > k; j--) // fill missing values; may happen given older samtools and tabix - if (l->offset[j-1] == 0) l->offset[j-1] = l->offset[j]; - update_loff(idx, i, 0); - } - } - if (bgzf_read(fp, &idx->n_no_coor, 8) != 8) idx->n_no_coor = 0; - if (is_be) ed_swap_8p(&idx->n_no_coor); -#ifdef DEBUG_INDEX - idx_dump(idx); -#endif - - return 0; -} - -static hts_idx_t *idx_read(const char *fn) -{ - uint8_t magic[4]; - int i, is_be; - hts_idx_t *idx = NULL; - uint8_t *meta = NULL; - BGZF *fp = bgzf_open(fn, "r"); - if (fp == NULL) return NULL; - is_be = ed_is_big(); - if (bgzf_read(fp, magic, 4) != 4) goto fail; - - if (memcmp(magic, "CSI\1", 4) == 0) { - uint32_t x[3], n; - if (bgzf_read(fp, x, 12) != 12) goto fail; - if (is_be) for (i = 0; i < 3; ++i) ed_swap_4p(&x[i]); - if (x[2]) { - if (SIZE_MAX - x[2] < 1) goto fail; // Prevent possible overflow - if ((meta = (uint8_t*)malloc((size_t) x[2] + 1)) == NULL) goto fail; - if (bgzf_read(fp, meta, x[2]) != x[2]) goto fail; - // Prevent possible strlen past the end in tbx_index_load2 - meta[x[2]] = '\0'; - } - if (bgzf_read(fp, &n, 4) != 4) goto fail; - if (is_be) ed_swap_4p(&n); - if (n > INT32_MAX) goto fail; - if ((idx = hts_idx_init(n, HTS_FMT_CSI, 0, x[0], x[1])) == NULL) goto fail; - idx->l_meta = x[2]; - idx->meta = meta; - meta = NULL; - if (idx_read_core(idx, fp, HTS_FMT_CSI) < 0) goto fail; - } - else if (memcmp(magic, "TBI\1", 4) == 0) { - uint8_t x[8 * 4]; - uint32_t n; - // Read file header - if (bgzf_read(fp, x, sizeof(x)) != sizeof(x)) goto fail; - n = le_to_u32(&x[0]); // location of n_ref - if (n > INT32_MAX) goto fail; - if ((idx = hts_idx_init(n, HTS_FMT_TBI, 0, 14, 5)) == NULL) goto fail; - n = le_to_u32(&x[7*4]); // location of l_nm - if (n > UINT32_MAX - 29) goto fail; // Prevent possible overflow - idx->l_meta = 28 + n; - if ((idx->meta = (uint8_t*)malloc(idx->l_meta + 1)) == NULL) goto fail; - // copy format, col_seq, col_beg, col_end, meta, skip, l_nm - // N.B. left in little-endian byte order. - memcpy(idx->meta, &x[1*4], 28); - // Read in sequence names. - if (bgzf_read(fp, idx->meta + 28, n) != n) goto fail; - // Prevent possible strlen past the end in tbx_index_load2 - idx->meta[idx->l_meta] = '\0'; - if (idx_read_core(idx, fp, HTS_FMT_TBI) < 0) goto fail; - } - else if (memcmp(magic, "BAI\1", 4) == 0) { - uint32_t n; - if (bgzf_read(fp, &n, 4) != 4) goto fail; - if (is_be) ed_swap_4p(&n); - if (n > INT32_MAX) goto fail; - if ((idx = hts_idx_init(n, HTS_FMT_BAI, 0, 14, 5)) == NULL) goto fail; - if (idx_read_core(idx, fp, HTS_FMT_BAI) < 0) goto fail; - } - else { errno = EINVAL; goto fail; } - - bgzf_close(fp); - return idx; - -fail: - bgzf_close(fp); - hts_idx_destroy(idx); - free(meta); - return NULL; -} - -int hts_idx_set_meta(hts_idx_t *idx, uint32_t l_meta, uint8_t *meta, - int is_copy) -{ - uint8_t *new_meta = meta; - if (is_copy) { - size_t l = l_meta; - if (l > SIZE_MAX - 1) { - errno = ENOMEM; - return -1; - } - new_meta = malloc(l + 1); - if (!new_meta) return -1; - memcpy(new_meta, meta, l); - // Prevent possible strlen past the end in tbx_index_load2 - new_meta[l] = '\0'; - } - if (idx->meta) free(idx->meta); - idx->l_meta = l_meta; - idx->meta = new_meta; - return 0; -} - -uint8_t *hts_idx_get_meta(hts_idx_t *idx, uint32_t *l_meta) -{ - *l_meta = idx->l_meta; - return idx->meta; -} - -const char **hts_idx_seqnames(const hts_idx_t *idx, int *n, hts_id2name_f getid, void *hdr) -{ - if ( !idx || !idx->n ) - { - *n = 0; - return NULL; - } - - int tid = 0, i; - const char **names = (const char**) calloc(idx->n,sizeof(const char*)); - for (i=0; in; i++) - { - bidx_t *bidx = idx->bidx[i]; - if ( !bidx ) continue; - names[tid++] = getid(hdr,i); - } - *n = tid; - return names; -} - -int hts_idx_nseq(const hts_idx_t *idx) { - if (!idx) return -1; - return idx->n; -} - -int hts_idx_get_stat(const hts_idx_t* idx, int tid, uint64_t* mapped, uint64_t* unmapped) -{ - if (!idx) return -1; - if ( idx->fmt == HTS_FMT_CRAI ) { - *mapped = 0; *unmapped = 0; - return -1; - } - - bidx_t *h = idx->bidx[tid]; - if (!h) return -1; - khint_t k = kh_get(bin, h, META_BIN(idx)); - if (k != kh_end(h)) { - *mapped = kh_val(h, k).list[1].u; - *unmapped = kh_val(h, k).list[1].v; - return 0; - } else { - *mapped = 0; *unmapped = 0; - return -1; - } -} - -uint64_t hts_idx_get_n_no_coor(const hts_idx_t* idx) -{ - if (idx->fmt == HTS_FMT_CRAI) return 0; - return idx->n_no_coor; -} - -/**************** - *** Iterator *** - ****************/ - -// Note: even with 32-bit hts_pos_t, end needs to be 64-bit here due to 1LL<>s); e = t + (end>>s); - for (i = b; i <= e; ++i) { - if (kh_get(bin, bidx, i) != kh_end(bidx)) { - assert(itr->bins.n < itr->bins.m); - itr->bins.a[itr->bins.n++] = i; - } - } - } - return itr->bins.n; -} - -static inline int reg2bins_wide(int64_t beg, int64_t end, hts_itr_t *itr, int min_shift, int n_lvls, bidx_t *bidx) -{ - khint_t i; - hts_pos_t max_shift = 3 * n_lvls + min_shift; - --end; - if (beg < 0) beg = 0; - for (i = kh_begin(bidx); i != kh_end(bidx); i++) { - if (!kh_exist(bidx, i)) continue; - hts_pos_t bin = (hts_pos_t) kh_key(bidx, i); - int level = hts_bin_level(bin); - if (level > n_lvls) continue; // Dodgy index? - hts_pos_t first = hts_bin_first(level); - hts_pos_t beg_at_level = first + (beg >> (max_shift - 3 * level)); - hts_pos_t end_at_level = first + (end >> (max_shift - 3 * level)); - if (beg_at_level <= bin && bin <= end_at_level) { - assert(itr->bins.n < itr->bins.m); - itr->bins.a[itr->bins.n++] = bin; - } - } - return itr->bins.n; -} - -static inline int reg2bins(int64_t beg, int64_t end, hts_itr_t *itr, int min_shift, int n_lvls, bidx_t *bidx) -{ - int l, t, s = min_shift + (n_lvls<<1) + n_lvls; - size_t reg_bin_count = 0, hash_bin_count = kh_n_buckets(bidx), max_bins; - hts_pos_t end1; - if (end >= 1LL<= end) return 0; - end1 = end - 1; - - // Count bins to see if it's faster to iterate through the hash table - // or the set of bins covering the region - for (l = 0, t = 0; l <= n_lvls; s -= 3, t += 1<<((l<<1)+l), ++l) { - reg_bin_count += (end1 >> s) - (beg >> s) + 1; - } - max_bins = reg_bin_count < kh_size(bidx) ? reg_bin_count : kh_size(bidx); - if (itr->bins.m - itr->bins.n < max_bins) { - // Worst-case memory usage. May be wasteful on very sparse - // data, but the bin list usually won't be too big anyway. - size_t new_m = max_bins + itr->bins.n; - if (new_m > INT_MAX || new_m > SIZE_MAX / sizeof(int)) { - errno = ENOMEM; - return -1; - } - int *new_a = realloc(itr->bins.a, new_m * sizeof(*new_a)); - if (!new_a) return -1; - itr->bins.a = new_a; - itr->bins.m = new_m; - } - if (reg_bin_count < hash_bin_count) { - return reg2bins_narrow(beg, end, itr, min_shift, n_lvls, bidx); - } else { - return reg2bins_wide(beg, end, itr, min_shift, n_lvls, bidx); - } -} - -static inline int add_to_interval(hts_itr_t *iter, bins_t *bin, - int tid, uint32_t interval, - uint64_t min_off, uint64_t max_off) -{ - hts_pair64_max_t *off; - int j; - - if (!bin->n) - return 0; - off = realloc(iter->off, (iter->n_off + bin->n) * sizeof(*off)); - if (!off) - return -2; - - iter->off = off; - for (j = 0; j < bin->n; ++j) { - if (bin->list[j].v > min_off && bin->list[j].u < max_off) { - iter->off[iter->n_off].u = min_off > bin->list[j].u - ? min_off : bin->list[j].u; - iter->off[iter->n_off].v = max_off < bin->list[j].v - ? max_off : bin->list[j].v; - // hts_pair64_max_t::max is now used to link - // file offsets to region list entries. - // The iterator can use this to decide if it - // can skip some file regions. - iter->off[iter->n_off].max = ((uint64_t) tid << 32) | interval; - iter->n_off++; - } - } - return 0; -} - -static inline int reg2intervals_narrow(hts_itr_t *iter, const bidx_t *bidx, - int tid, int64_t beg, int64_t end, - uint32_t interval, - uint64_t min_off, uint64_t max_off, - int min_shift, int n_lvls) -{ - int l, t, s = min_shift + n_lvls * 3; - hts_pos_t b, e, i; - - for (--end, l = 0, t = 0; l <= n_lvls; s -= 3, t += 1<<((l<<1)+l), ++l) { - b = t + (beg>>s); e = t + (end>>s); - for (i = b; i <= e; ++i) { - khint_t k = kh_get(bin, bidx, i); - if (k != kh_end(bidx)) { - bins_t *bin = &kh_value(bidx, k); - int res = add_to_interval(iter, bin, tid, interval, min_off, max_off); - if (res < 0) - return res; - } - } - } - return 0; -} - -static inline int reg2intervals_wide(hts_itr_t *iter, const bidx_t *bidx, - int tid, int64_t beg, int64_t end, - uint32_t interval, - uint64_t min_off, uint64_t max_off, - int min_shift, int n_lvls) -{ - khint_t i; - hts_pos_t max_shift = 3 * n_lvls + min_shift; - --end; - if (beg < 0) beg = 0; - for (i = kh_begin(bidx); i != kh_end(bidx); i++) { - if (!kh_exist(bidx, i)) continue; - hts_pos_t bin = (hts_pos_t) kh_key(bidx, i); - int level = hts_bin_level(bin); - if (level > n_lvls) continue; // Dodgy index? - hts_pos_t first = hts_bin_first(level); - hts_pos_t beg_at_level = first + (beg >> (max_shift - 3 * level)); - hts_pos_t end_at_level = first + (end >> (max_shift - 3 * level)); - if (beg_at_level <= bin && bin <= end_at_level) { - bins_t *bin = &kh_value(bidx, i); - int res = add_to_interval(iter, bin, tid, interval, min_off, max_off); - if (res < 0) - return res; - } - } - return 0; -} - -static inline int reg2intervals(hts_itr_t *iter, const hts_idx_t *idx, int tid, int64_t beg, int64_t end, uint32_t interval, uint64_t min_off, uint64_t max_off, int min_shift, int n_lvls) -{ - int l, t, s; - int i, j; - hts_pos_t end1; - bidx_t *bidx; - int start_n_off; - size_t reg_bin_count = 0, hash_bin_count; - int res; - - if (!iter || !idx || (bidx = idx->bidx[tid]) == NULL || beg >= end) - return -1; - - hash_bin_count = kh_n_buckets(bidx); - - s = min_shift + (n_lvls<<1) + n_lvls; - if (end >= 1LL<> s) - (beg >> s) + 1; - } - - start_n_off = iter->n_off; - - // Populate iter->off with the intervals for this region - if (reg_bin_count < hash_bin_count) { - res = reg2intervals_narrow(iter, bidx, tid, beg, end, interval, - min_off, max_off, min_shift, n_lvls); - } else { - res = reg2intervals_wide(iter, bidx, tid, beg, end, interval, - min_off, max_off, min_shift, n_lvls); - } - if (res < 0) - return res; - - if (iter->n_off - start_n_off > 1) { - ks_introsort(_off_max, iter->n_off - start_n_off, iter->off + start_n_off); - for (i = start_n_off, j = start_n_off + 1; j < iter->n_off; j++) { - if (iter->off[i].v >= iter->off[j].u) { - if (iter->off[i].v < iter->off[j].v) - iter->off[i].v = iter->off[j].v; - } else { - i++; - if (i < j) - iter->off[i] = iter->off[j]; - } - } - iter->n_off = i + 1; - } - - return iter->n_off; -} - -static int compare_regions(const void *r1, const void *r2) { - hts_reglist_t *reg1 = (hts_reglist_t *)r1; - hts_reglist_t *reg2 = (hts_reglist_t *)r2; - - if (reg1->tid < 0 && reg2->tid >= 0) - return 1; - else if (reg1->tid >= 0 && reg2->tid < 0) - return -1; - else - return reg1->tid - reg2->tid; -} - -uint64_t hts_itr_off(const hts_idx_t* idx, int tid) { - - int i; - bidx_t* bidx; - uint64_t off0 = (uint64_t) -1; - khint_t k; - switch (tid) { - case HTS_IDX_START: - // Find the smallest offset, note that sequence ids may not be ordered sequentially - for (i = 0; i < idx->n; i++) { - bidx = idx->bidx[i]; - k = kh_get(bin, bidx, META_BIN(idx)); - if (k == kh_end(bidx)) - continue; - - if (off0 > kh_val(bidx, k).list[0].u) - off0 = kh_val(bidx, k).list[0].u; - } - if (off0 == (uint64_t) -1 && idx->n_no_coor) - off0 = 0; - // only no-coor reads in this bam - break; - case HTS_IDX_NOCOOR: - /* No-coor reads sort after all of the mapped reads. The position - is not stored in the index itself, so need to find the end - offset for the last mapped read. A loop is needed here in - case references at the end of the file have no mapped reads, - or sequence ids are not ordered sequentially. - See issue samtools#568 and commits b2aab8, 60c22d and cc207d. */ - for (i = 0; i < idx->n; i++) { - bidx = idx->bidx[i]; - k = kh_get(bin, bidx, META_BIN(idx)); - if (k != kh_end(bidx)) { - if (off0 == (uint64_t) -1 || off0 < kh_val(bidx, k).list[0].v) { - off0 = kh_val(bidx, k).list[0].v; - } - } - } - if (off0 == (uint64_t) -1 && idx->n_no_coor) - off0 = 0; - // only no-coor reads in this bam - break; - case HTS_IDX_REST: - off0 = 0; - break; - case HTS_IDX_NONE: - off0 = 0; - break; - } - - return off0; -} - -hts_itr_t *hts_itr_query(const hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end, hts_readrec_func *readrec) -{ - int i, n_off, l, bin; - hts_pair64_max_t *off; - khint_t k; - bidx_t *bidx; - uint64_t min_off, max_off; - hts_itr_t *iter; - uint32_t unmapped = 0, rel_off; - - // It's possible to call this function with NULL idx iff - // tid is one of the special values HTS_IDX_REST or HTS_IDX_NONE - if (!idx && !(tid == HTS_IDX_REST || tid == HTS_IDX_NONE)) { - errno = EINVAL; - return NULL; - } - - iter = (hts_itr_t*)calloc(1, sizeof(hts_itr_t)); - if (iter) { - if (tid < 0) { - uint64_t off = hts_itr_off(idx, tid); - if (off != (uint64_t) -1) { - iter->read_rest = 1; - iter->curr_off = off; - iter->readrec = readrec; - if (tid == HTS_IDX_NONE) - iter->finished = 1; - } else { - free(iter); - iter = NULL; - } - } else if (tid >= idx->n || (bidx = idx->bidx[tid]) == NULL) { - iter->finished = 1; - } else { - if (beg < 0) beg = 0; - if (end < beg) { - free(iter); - return NULL; - } - - k = kh_get(bin, bidx, META_BIN(idx)); - if (k != kh_end(bidx)) - unmapped = kh_val(bidx, k).list[1].v; - else - unmapped = 1; - - iter->tid = tid, iter->beg = beg, iter->end = end; iter->i = -1; - iter->readrec = readrec; - - if ( !kh_size(bidx) ) { iter->finished = 1; return iter; } - - rel_off = beg>>idx->min_shift; - // compute min_off - bin = hts_bin_first(idx->n_lvls) + rel_off; - do { - int first; - k = kh_get(bin, bidx, bin); - if (k != kh_end(bidx)) break; - first = (hts_bin_parent(bin)<<3) + 1; - if (bin > first) --bin; - else bin = hts_bin_parent(bin); - } while (bin); - if (bin == 0) k = kh_get(bin, bidx, bin); - min_off = k != kh_end(bidx)? kh_val(bidx, k).loff : 0; - // min_off can be calculated more accurately if the - // linear index is available - if (idx->lidx[tid].offset - && rel_off < idx->lidx[tid].n) { - if (min_off < idx->lidx[tid].offset[rel_off]) - min_off = idx->lidx[tid].offset[rel_off]; - if (unmapped) { - // unmapped reads are not covered by the linear index, - // so search backwards for a smaller offset - int tmp_off; - for (tmp_off = rel_off-1; tmp_off >= 0; tmp_off--) { - if (idx->lidx[tid].offset[tmp_off] < min_off) { - min_off = idx->lidx[tid].offset[tmp_off]; - break; - } - } - // if the search went too far back or no satisfactory entry - // was found, revert to the bin index loff value - if (k != kh_end(bidx) && (min_off < kh_val(bidx, k).loff || tmp_off < 0)) - min_off = kh_val(bidx, k).loff; - } - } else if (unmapped) { //CSI index - if (k != kh_end(bidx)) - min_off = kh_val(bidx, k).loff; - } - - // compute max_off: a virtual offset from a bin to the right of end - // First check if end lies within the range of the index (it won't - // if it's HTS_POS_MAX) - if (end < 1LL << (idx->min_shift + 3 * idx->n_lvls)) { - bin = hts_bin_first(idx->n_lvls) + ((end-1) >> idx->min_shift) + 1; - if (bin >= idx->n_bins) bin = 0; - while (1) { - // search for an extant bin by moving right, but moving up to the - // parent whenever we get to a first child (which also covers falling - // off the RHS, which wraps around and immediately goes up to bin 0) - while (bin % 8 == 1) bin = hts_bin_parent(bin); - if (bin == 0) { max_off = UINT64_MAX; break; } - k = kh_get(bin, bidx, bin); - if (k != kh_end(bidx) && kh_val(bidx, k).n > 0) { max_off = kh_val(bidx, k).list[0].u; break; } - bin++; - } - } else { - // Searching to end of reference - max_off = UINT64_MAX; - } - - // retrieve bins - if (reg2bins(beg, end, iter, idx->min_shift, idx->n_lvls, bidx) < 0) { - hts_itr_destroy(iter); - return NULL; - } - - for (i = n_off = 0; i < iter->bins.n; ++i) - if ((k = kh_get(bin, bidx, iter->bins.a[i])) != kh_end(bidx)) - n_off += kh_value(bidx, k).n; - if (n_off == 0) { - // No overlapping bins means the iterator has already finished. - iter->finished = 1; - return iter; - } - off = calloc(n_off, sizeof(*off)); - for (i = n_off = 0; i < iter->bins.n; ++i) { - if ((k = kh_get(bin, bidx, iter->bins.a[i])) != kh_end(bidx)) { - int j; - bins_t *p = &kh_value(bidx, k); - for (j = 0; j < p->n; ++j) - if (p->list[j].v > min_off && p->list[j].u < max_off) { - off[n_off].u = min_off > p->list[j].u - ? min_off : p->list[j].u; - off[n_off].v = max_off < p->list[j].v - ? max_off : p->list[j].v; - // hts_pair64_max_t::max is now used to link - // file offsets to region list entries. - // The iterator can use this to decide if it - // can skip some file regions. - off[n_off].max = ((uint64_t) tid << 32) | j; - n_off++; - } - } - } - - if (n_off == 0) { - free(off); - iter->finished = 1; - return iter; - } - ks_introsort(_off_max, n_off, off); - // resolve completely contained adjacent blocks - for (i = 1, l = 0; i < n_off; ++i) - if (off[l].v < off[i].v) off[++l] = off[i]; - n_off = l + 1; - // resolve overlaps between adjacent blocks; this may happen due to the merge in indexing - for (i = 1; i < n_off; ++i) - if (off[i-1].v >= off[i].u) off[i-1].v = off[i].u; - // merge adjacent blocks - for (i = 1, l = 0; i < n_off; ++i) { - if (off[l].v>>16 == off[i].u>>16) off[l].v = off[i].v; - else off[++l] = off[i]; - } - n_off = l + 1; - iter->n_off = n_off; iter->off = off; - } - } - - return iter; -} - -int hts_itr_multi_bam(const hts_idx_t *idx, hts_itr_t *iter) -{ - int i, j, bin; - khint_t k; - bidx_t *bidx; - uint64_t min_off, max_off, t_off = (uint64_t)-1; - int tid; - hts_pos_t beg, end; - hts_reglist_t *curr_reg; - uint32_t unmapped = 0, rel_off; - - if (!idx || !iter || !iter->multi) - return -1; - - iter->i = -1; - for (i=0; in_reg; i++) { - - curr_reg = &iter->reg_list[i]; - tid = curr_reg->tid; - - if (tid < 0) { - t_off = hts_itr_off(idx, tid); - if (t_off != (uint64_t)-1) { - switch (tid) { - case HTS_IDX_NONE: - iter->finished = 1; - // fall through - case HTS_IDX_START: - case HTS_IDX_REST: - iter->curr_off = t_off; - iter->n_reg = 0; - iter->reg_list = NULL; - iter->read_rest = 1; - return 0; - case HTS_IDX_NOCOOR: - iter->nocoor = 1; - iter->nocoor_off = t_off; - } - } - } else { - if (tid >= idx->n || (bidx = idx->bidx[tid]) == NULL || !kh_size(bidx)) - continue; - - k = kh_get(bin, bidx, META_BIN(idx)); - if (k != kh_end(bidx)) - unmapped = kh_val(bidx, k).list[1].v; - else - unmapped = 1; - - for(j=0; jcount; j++) { - hts_pair32_t *curr_intv = &curr_reg->intervals[j]; - if (curr_intv->end < curr_intv->beg) - continue; - - beg = curr_intv->beg; - end = curr_intv->end; - rel_off = beg>>idx->min_shift; - - /* Compute 'min_off' by searching the lowest level bin containing 'beg'. - If the computed bin is not in the index, try the next bin to the - left, belonging to the same parent. If it is the first sibling bin, - try the parent bin. */ - bin = hts_bin_first(idx->n_lvls) + rel_off; - do { - int first; - k = kh_get(bin, bidx, bin); - if (k != kh_end(bidx)) break; - first = (hts_bin_parent(bin)<<3) + 1; - if (bin > first) --bin; - else bin = hts_bin_parent(bin); - } while (bin); - if (bin == 0) - k = kh_get(bin, bidx, bin); - min_off = k != kh_end(bidx)? kh_val(bidx, k).loff : 0; - // min_off can be calculated more accurately if the - // linear index is available - if (idx->lidx[tid].offset - && rel_off < idx->lidx[tid].n) { - if (min_off < idx->lidx[tid].offset[rel_off]) - min_off = idx->lidx[tid].offset[rel_off]; - if (unmapped) { - int tmp_off; - for (tmp_off = rel_off-1; tmp_off >= 0; tmp_off--) { - if (idx->lidx[tid].offset[tmp_off] < min_off) { - min_off = idx->lidx[tid].offset[tmp_off]; - break; - } - } - - if (k != kh_end(bidx) && (min_off < kh_val(bidx, k).loff || tmp_off < 0)) - min_off = kh_val(bidx, k).loff; - } - } else if (unmapped) { //CSI index - if (k != kh_end(bidx)) - min_off = kh_val(bidx, k).loff; - } - - // compute max_off: a virtual offset from a bin to the right of end - // First check if end lies within the range of the index (it - // won't if it's HTS_POS_MAX) - if (end < 1LL << (idx->min_shift + 3 * idx->n_lvls)) { - bin = hts_bin_first(idx->n_lvls) + ((end-1) >> idx->min_shift) + 1; - if (bin >= idx->n_bins) bin = 0; - while (1) { - // search for an extant bin by moving right, but moving up to the - // parent whenever we get to a first child (which also covers falling - // off the RHS, which wraps around and immediately goes up to bin 0) - while (bin % 8 == 1) bin = hts_bin_parent(bin); - if (bin == 0) { max_off = UINT64_MAX; break; } - k = kh_get(bin, bidx, bin); - if (k != kh_end(bidx) && kh_val(bidx, k).n > 0) { - max_off = kh_val(bidx, k).list[0].u; - break; - } - bin++; - } - } else { - // Searching to end of reference - max_off = UINT64_MAX; - } - - //convert coordinates to file offsets - if (reg2intervals(iter, idx, tid, beg, end, j, - min_off, max_off, - idx->min_shift, idx->n_lvls) < 0) { - return -1; - } - } - } - } - - if (iter->n_off > 1) - ks_introsort(_off_max, iter->n_off, iter->off); - - if(!iter->n_off && !iter->nocoor) - iter->finished = 1; - - return 0; -} - -int hts_itr_multi_cram(const hts_idx_t *idx, hts_itr_t *iter) -{ - const hts_cram_idx_t *cidx = (const hts_cram_idx_t *) idx; - int tid, i, n_off = 0; - uint32_t j; - hts_pos_t beg, end; - hts_reglist_t *curr_reg; - hts_pair32_t *curr_intv; - hts_pair64_max_t *off = NULL, *tmp; - cram_index *e = NULL; - - if (!cidx || !iter || !iter->multi) - return -1; - - iter->is_cram = 1; - iter->read_rest = 0; - iter->off = NULL; - iter->n_off = 0; - iter->curr_off = 0; - iter->i = -1; - - for (i=0; in_reg; i++) { - - curr_reg = &iter->reg_list[i]; - tid = curr_reg->tid; - - if (tid >= 0) { - tmp = realloc(off, (n_off + curr_reg->count) * sizeof(*off)); - if (!tmp) - goto err; - off = tmp; - - for (j=0; j < curr_reg->count; j++) { - curr_intv = &curr_reg->intervals[j]; - if (curr_intv->end < curr_intv->beg) - continue; - - beg = curr_intv->beg; - end = curr_intv->end; - -/* First, fetch the container overlapping 'beg' and assign its file offset to u, then - * find the container overlapping 'end' and assign the relative end of the slice to v. - * The cram_ptell function will adjust with the container offset, which is not stored - * in the index. - */ - e = cram_index_query(cidx->cram, tid, beg+1, NULL); - if (e) { - off[n_off].u = e->offset; - // hts_pair64_max_t::max is now used to link - // file offsets to region list entries. - // The iterator can use this to decide if it - // can skip some file regions. - off[n_off].max = ((uint64_t) tid << 32) | j; - - if (end >= HTS_POS_MAX) { - e = cram_index_last(cidx->cram, tid, NULL); - } else { - e = cram_index_query_last(cidx->cram, tid, end+1); - } - - if (e) { - off[n_off++].v = e->e_next - ? e->e_next->offset - : e->offset + e->slice + e->len; - } else { - hts_log_warning("Could not set offset end for region %d:%"PRIhts_pos"-%"PRIhts_pos". Skipping", tid, beg, end); - } - } - } - } else { - switch (tid) { - case HTS_IDX_NOCOOR: - e = cram_index_query(cidx->cram, tid, 1, NULL); - if (e) { - iter->nocoor = 1; - iter->nocoor_off = e->offset; - } else { - hts_log_warning("No index entry for NOCOOR region"); - } - break; - case HTS_IDX_START: - e = cram_index_query(cidx->cram, tid, 1, NULL); - if (e) { - iter->read_rest = 1; - tmp = realloc(off, sizeof(*off)); - if (!tmp) - goto err; - off = tmp; - off[0].u = e->offset; - off[0].v = 0; - n_off=1; - } else { - hts_log_warning("No index entries"); - } - break; - case HTS_IDX_REST: - break; - case HTS_IDX_NONE: - iter->finished = 1; - break; - default: - hts_log_error("Query with tid=%d not implemented for CRAM files", tid); - } - } - } - - if (n_off) { - ks_introsort(_off_max, n_off, off); - iter->n_off = n_off; iter->off = off; - } - - if(!n_off && !iter->nocoor) - iter->finished = 1; - - return 0; - - err: - free(off); - return -1; -} - -void hts_itr_destroy(hts_itr_t *iter) -{ - if (iter) { - if (iter->multi) { - hts_reglist_free(iter->reg_list, iter->n_reg); - } else { - free(iter->bins.a); - } - - if (iter->off) - free(iter->off); - free(iter); - } -} - -static inline long long push_digit(long long i, char c) -{ - // ensure subtraction occurs first, avoiding overflow for >= MAX-48 or so - int digit = c - '0'; - return 10 * i + digit; -} - -long long hts_parse_decimal(const char *str, char **strend, int flags) -{ - long long n = 0; - int digits = 0, decimals = 0, e = 0, lost = 0; - char sign = '+', esign = '+'; - const char *s, *str_orig = str; - - while (isspace_c(*str)) str++; - s = str; - - if (*s == '+' || *s == '-') sign = *s++; - while (*s) - if (isdigit_c(*s)) digits++, n = push_digit(n, *s++); - else if (*s == ',' && (flags & HTS_PARSE_THOUSANDS_SEP)) s++; - else break; - - if (*s == '.') { - s++; - while (isdigit_c(*s)) decimals++, digits++, n = push_digit(n, *s++); - } - - switch (*s) { - case 'e': case 'E': - s++; - if (*s == '+' || *s == '-') esign = *s++; - while (isdigit_c(*s)) e = push_digit(e, *s++); - if (esign == '-') e = -e; - break; - - case 'k': case 'K': e += 3; s++; break; - case 'm': case 'M': e += 6; s++; break; - case 'g': case 'G': e += 9; s++; break; - } - - e -= decimals; - while (e > 0) n *= 10, e--; - while (e < 0) lost += n % 10, n /= 10, e++; - - if (lost > 0) { - hts_log_warning("Discarding fractional part of %.*s", (int)(s - str), str); - } - - if (strend) { - // Set to the original input str pointer if not valid number syntax - *strend = (digits > 0)? (char *)s : (char *)str_orig; - } else if (digits == 0) { - hts_log_warning("Invalid numeric value %.8s[truncated]", str); - } else if (*s) { - if ((flags & HTS_PARSE_THOUSANDS_SEP) || (!(flags & HTS_PARSE_THOUSANDS_SEP) && *s != ',')) - hts_log_warning("Ignoring unknown characters after %.*s[%s]", (int)(s - str), str, s); - } - - return (sign == '+')? n : -n; -} - -static void *hts_memrchr(const void *s, int c, size_t n) { - size_t i; - unsigned char *u = (unsigned char *)s; - for (i = n; i > 0; i--) { - if (u[i-1] == c) - return u+i-1; - } - - return NULL; -} - -/* - * A variant of hts_parse_reg which is reference-id aware. It uses - * the iterator name2id callbacks to validate the region tokenisation works. - * - * This is necessary due to GRCh38 HLA additions which have reference names - * like "HLA-DRB1*12:17". - * - * All parameters are mandatory. - * - * To work around ambiguous parsing issues, eg both "chr1" and "chr1:100-200" - * are reference names, we may quote using curly braces. - * Thus "{chr1}:100-200" and "{chr1:100-200}" disambiguate the above example. - * - * Flags are used to control how parsing works, and can be one of the below. - * - * HTS_PARSE_LIST: - * If present, the region is assmed to be a comma separated list and - * position parsing will not contain commas (this implicitly - * clears HTS_PARSE_THOUSANDS_SEP in the call to hts_parse_decimal). - * On success the return pointer will be the start of the next region, ie - * the character after the comma. (If *ret != '\0' then the caller can - * assume another region is present in the list.) - * - * If not set then positions may contain commas. In this case the return - * value should point to the end of the string, or NULL on failure. - * - * HTS_PARSE_ONE_COORD: - * If present, X:100 is treated as the single base pair region X:100-100. - * In this case X:-100 is shorthand for X:1-100 and X:100- is X:100-. - * (This is the standard bcftools region convention.) - * - * When not set X:100 is considered to be X:100- where is - * the end of chromosome X (set to HTS_POS_MAX here). X:100- and X:-100 - * are invalid. - * (This is the standard samtools region convention.) - * - * Note the supplied string expects 1 based inclusive coordinates, but the - * returned coordinates start from 0 and are half open, so pos0 is valid - * for use in e.g. "for (pos0 = beg; pos0 < end; pos0++) {...}" - * - * On success a pointer to the byte after the end of the entire region - * specifier is returned (plus any trailing comma), and tid, - * beg & end will be set. - * On failure NULL is returned. - */ -const char *hts_parse_region(const char *s, int *tid, hts_pos_t *beg, - hts_pos_t *end, hts_name2id_f getid, void *hdr, - int flags) -{ - if (!s || !tid || !beg || !end || !getid) - return NULL; - - size_t s_len = strlen(s); - kstring_t ks = { 0, 0, NULL }; - - const char *colon = NULL, *comma = NULL; - int quoted = 0; - - if (flags & HTS_PARSE_LIST) - flags &= ~HTS_PARSE_THOUSANDS_SEP; - else - flags |= HTS_PARSE_THOUSANDS_SEP; - - const char *s_end = s + s_len; - - // Braced quoting of references is permitted to resolve ambiguities. - if (*s == '{') { - const char *close = memchr(s, '}', s_len); - if (!close) { - hts_log_error("Mismatching braces in \"%s\"", s); - *tid = -1; - return NULL; - } - s++; - s_len--; - if (close[1] == ':') - colon = close+1; - quoted = 1; // number of trailing characters to trim - - // Truncate to this item only, if appropriate. - if (flags & HTS_PARSE_LIST) { - comma = strchr(close, ','); - if (comma) { - s_len = comma-s; - s_end = comma+1; - } - } - } else { - // Truncate to this item only, if appropriate. - if (flags & HTS_PARSE_LIST) { - comma = strchr(s, ','); - if (comma) { - s_len = comma-s; - s_end = comma+1; - } - } - - colon = hts_memrchr(s, ':', s_len); - } - - // No colon is simplest case; just check and return. - if (colon == NULL) { - *beg = 0; *end = HTS_POS_MAX; - kputsn(s, s_len-quoted, &ks); // convert to nul terminated string - if (!ks.s) { - *tid = -2; - return NULL; - } - - *tid = getid(hdr, ks.s); - free(ks.s); - - return *tid >= 0 ? s_end : NULL; - } - - // Has a colon, but check whole name first. - if (!quoted) { - *beg = 0; *end = HTS_POS_MAX; - kputsn(s, s_len, &ks); // convert to nul terminated string - if (!ks.s) { - *tid = -2; - return NULL; - } - if ((*tid = getid(hdr, ks.s)) >= 0) { - // Entire name matches, but also check this isn't - // ambiguous. eg we have ref chr1 and ref chr1:100-200 - // both present. - ks.l = 0; - kputsn(s, colon-s, &ks); // convert to nul terminated string - if (!ks.s) { - *tid = -2; - return NULL; - } - if (getid(hdr, ks.s) >= 0) { - free(ks.s); - *tid = -1; - hts_log_error("Range is ambiguous. " - "Use {%s} or {%.*s}%s instead", - s, (int)(colon-s), s, colon); - return NULL; - } - free(ks.s); - - return s_end; - } - if (*tid < -1) // Failed to parse header - return NULL; - } - - // Quoted, or unquoted and whole string isn't a name. - // Check the pre-colon part is valid. - ks.l = 0; - kputsn(s, colon-s-quoted, &ks); // convert to nul terminated string - if (!ks.s) { - *tid = -2; - return NULL; - } - *tid = getid(hdr, ks.s); - free(ks.s); - if (*tid < 0) - return NULL; - - // Finally parse the post-colon coordinates - char *hyphen; - *beg = hts_parse_decimal(colon+1, &hyphen, flags) - 1; - if (*beg < 0) { - if (*beg != -1 && *hyphen == '-' && colon[1] != '\0') { - // User specified zero, but we're 1-based. - hts_log_error("Coordinates must be > 0"); - return NULL; - } - if (isdigit_c(*hyphen) || *hyphen == '\0' || *hyphen == ',') { - // interpret chr:-100 as chr:1-100 - *end = *beg==-1 ? HTS_POS_MAX : -(*beg+1); - *beg = 0; - return s_end; - } else if (*beg < -1) { - hts_log_error("Unexpected string \"%s\" after region", hyphen); - return NULL; - } - } - - if (*hyphen == '\0' || ((flags & HTS_PARSE_LIST) && *hyphen == ',')) { - *end = flags & HTS_PARSE_ONE_COORD ? *beg+1 : HTS_POS_MAX; - } else if (*hyphen == '-') { - *end = hts_parse_decimal(hyphen+1, &hyphen, flags); - if (*hyphen != '\0' && *hyphen != ',') { - hts_log_error("Unexpected string \"%s\" after region", hyphen); - return NULL; - } - } else { - hts_log_error("Unexpected string \"%s\" after region", hyphen); - return NULL; - } - - if (*end == 0) - *end = HTS_POS_MAX; // interpret chr:100- as chr:100- - - if (*beg >= *end) return NULL; - - return s_end; -} - -// Next release we should mark this as deprecated? -// Use hts_parse_region above instead. -const char *hts_parse_reg64(const char *s, hts_pos_t *beg, hts_pos_t *end) -{ - char *hyphen; - const char *colon = strrchr(s, ':'); - if (colon == NULL) { - *beg = 0; *end = HTS_POS_MAX; - return s + strlen(s); - } - - *beg = hts_parse_decimal(colon+1, &hyphen, HTS_PARSE_THOUSANDS_SEP) - 1; - if (*beg < 0) *beg = 0; - - if (*hyphen == '\0') *end = HTS_POS_MAX; - else if (*hyphen == '-') *end = hts_parse_decimal(hyphen+1, NULL, HTS_PARSE_THOUSANDS_SEP); - else return NULL; - - if (*beg >= *end) return NULL; - return colon; -} - -const char *hts_parse_reg(const char *s, int *beg, int *end) -{ - hts_pos_t beg64 = 0, end64 = 0; - const char *colon = hts_parse_reg64(s, &beg64, &end64); - if (beg64 > INT_MAX) { - hts_log_error("Position %"PRId64" too large", beg64); - return NULL; - } - if (end64 > INT_MAX) { - if (end64 == HTS_POS_MAX) { - end64 = INT_MAX; - } else { - hts_log_error("Position %"PRId64" too large", end64); - return NULL; - } - } - *beg = beg64; - *end = end64; - return colon; -} - -hts_itr_t *hts_itr_querys(const hts_idx_t *idx, const char *reg, hts_name2id_f getid, void *hdr, hts_itr_query_func *itr_query, hts_readrec_func *readrec) -{ - int tid; - hts_pos_t beg, end; - - if (strcmp(reg, ".") == 0) - return itr_query(idx, HTS_IDX_START, 0, 0, readrec); - else if (strcmp(reg, "*") == 0) - return itr_query(idx, HTS_IDX_NOCOOR, 0, 0, readrec); - - if (!hts_parse_region(reg, &tid, &beg, &end, getid, hdr, HTS_PARSE_THOUSANDS_SEP)) - return NULL; - - return itr_query(idx, tid, beg, end, readrec); -} - -hts_itr_t *hts_itr_regions(const hts_idx_t *idx, hts_reglist_t *reglist, int count, hts_name2id_f getid, void *hdr, hts_itr_multi_query_func *itr_specific, hts_readrec_func *readrec, hts_seek_func *seek, hts_tell_func *tell) { - - int i; - - if (!reglist) - return NULL; - - hts_itr_t *itr = (hts_itr_t*)calloc(1, sizeof(hts_itr_t)); - if (itr) { - itr->n_reg = count; - itr->readrec = readrec; - itr->seek = seek; - itr->tell = tell; - itr->reg_list = reglist; - itr->finished = 0; - itr->nocoor = 0; - itr->multi = 1; - - for (i = 0; i < itr->n_reg; i++) { - if (itr->reg_list[i].reg) { - if (!strcmp(itr->reg_list[i].reg, ".")) { - itr->reg_list[i].tid = HTS_IDX_START; - continue; - } - - if (!strcmp(itr->reg_list[i].reg, "*")) { - itr->reg_list[i].tid = HTS_IDX_NOCOOR; - continue; - } - - itr->reg_list[i].tid = getid(hdr, reglist[i].reg); - if (itr->reg_list[i].tid < 0) { - if (itr->reg_list[i].tid < -1) { - hts_log_error("Failed to parse header"); - hts_itr_destroy(itr); - return NULL; - } else { - hts_log_warning("Region '%s' specifies an unknown reference name. Continue anyway", reglist[i].reg); - } - } - } - } - - qsort(itr->reg_list, itr->n_reg, sizeof(hts_reglist_t), compare_regions); - if (itr_specific(idx, itr) != 0) { - hts_log_error("Failed to create the multi-region iterator!"); - hts_itr_destroy(itr); - itr = NULL; - } - } - - return itr; -} - -int hts_itr_next(BGZF *fp, hts_itr_t *iter, void *r, void *data) -{ - int ret, tid; - hts_pos_t beg, end; - if (iter == NULL || iter->finished) return -1; - if (iter->read_rest) { - if (iter->curr_off) { // seek to the start - if (bgzf_seek(fp, iter->curr_off, SEEK_SET) < 0) { - hts_log_error("Failed to seek to offset %"PRIu64"%s%s", - iter->curr_off, - errno ? ": " : "", strerror(errno)); - return -2; - } - iter->curr_off = 0; // only seek once - } - ret = iter->readrec(fp, data, r, &tid, &beg, &end); - if (ret < 0) iter->finished = 1; - iter->curr_tid = tid; - iter->curr_beg = beg; - iter->curr_end = end; - return ret; - } - // A NULL iter->off should always be accompanied by iter->finished. - assert(iter->off != NULL); - for (;;) { - if (iter->curr_off == 0 || iter->curr_off >= iter->off[iter->i].v) { // then jump to the next chunk - if (iter->i == iter->n_off - 1) { ret = -1; break; } // no more chunks - if (iter->i < 0 || iter->off[iter->i].v != iter->off[iter->i+1].u) { // not adjacent chunks; then seek - if (bgzf_seek(fp, iter->off[iter->i+1].u, SEEK_SET) < 0) { - hts_log_error("Failed to seek to offset %"PRIu64"%s%s", - iter->off[iter->i+1].u, - errno ? ": " : "", strerror(errno)); - return -2; - } - iter->curr_off = bgzf_tell(fp); - } - ++iter->i; - } - if ((ret = iter->readrec(fp, data, r, &tid, &beg, &end)) >= 0) { - iter->curr_off = bgzf_tell(fp); - if (tid != iter->tid || beg >= iter->end) { // no need to proceed - ret = -1; break; - } else if (end > iter->beg && iter->end > beg) { - iter->curr_tid = tid; - iter->curr_beg = beg; - iter->curr_end = end; - return ret; - } - } else break; // end of file or error - } - iter->finished = 1; - return ret; -} - -int hts_itr_multi_next(htsFile *fd, hts_itr_t *iter, void *r) -{ - void *fp; - int ret, tid, i, cr, ci; - hts_pos_t beg, end; - hts_reglist_t *found_reg; - - if (iter == NULL || iter->finished) return -1; - - if (iter->is_cram) { - fp = fd->fp.cram; - } else { - fp = fd->fp.bgzf; - } - - if (iter->read_rest) { - if (iter->curr_off) { // seek to the start - if (iter->seek(fp, iter->curr_off, SEEK_SET) < 0) { - hts_log_error("Seek at offset %" PRIu64 " failed.", iter->curr_off); - return -1; - } - iter->curr_off = 0; // only seek once - } - - ret = iter->readrec(fp, fd, r, &tid, &beg, &end); - if (ret < 0) - iter->finished = 1; - - iter->curr_tid = tid; - iter->curr_beg = beg; - iter->curr_end = end; - - return ret; - } - // A NULL iter->off should always be accompanied by iter->finished. - assert(iter->off != NULL || iter->nocoor != 0); - - int next_range = 0; - for (;;) { - // Note that due to the way bam indexing works, iter->off may contain - // file chunks that are not actually needed as they contain data - // beyond the end of the requested region. These are filtered out - // by comparing the tid and index into hts_reglist_t::intervals - // (packed for reasons of convenience into iter->off[iter->i].max) - // associated with the file region with iter->curr_tid and - // iter->curr_intv. - - if (next_range - || iter->curr_off == 0 - || iter->i >= iter->n_off - || iter->curr_off >= iter->off[iter->i].v - || (iter->off[iter->i].max >> 32 == iter->curr_tid - && (iter->off[iter->i].max & 0xffffffff) < iter->curr_intv)) { - - // Jump to the next chunk. It may be necessary to skip more - // than one as the iter->off list can include overlapping entries. - do { - iter->i++; - } while (iter->i < iter->n_off - && (iter->curr_off >= iter->off[iter->i].v - || (iter->off[iter->i].max >> 32 == iter->curr_tid - && (iter->off[iter->i].max & 0xffffffff) < iter->curr_intv))); - - if (iter->is_cram && iter->i < iter->n_off) { - // Ensure iter->curr_reg is correct. - // - // We need this for CRAM as we shortcut some of the later - // logic by getting an end-of-range and continuing to the - // next offset. - // - // We cannot do this for BAM (and fortunately do not need to - // either) because in BAM world a query to genomic positions - // GX and GY leading to a seek offsets PX and PY may have - // GX > GY and PX < PY. (This is due to the R-tree and falling - // between intervals, bumping up to a higher bin.) - // CRAM strictly follows PX >= PY if GX >= GY, so this logic - // works. - int want_tid = iter->off[iter->i].max >> 32; - if (!(iter->curr_reg < iter->n_reg && - iter->reg_list[iter->curr_reg].tid == want_tid)) { - int j; - for (j = 0; j < iter->n_reg; j++) - if (iter->reg_list[j].tid == want_tid) - break; - if (j == iter->n_reg) - return -1; - iter->curr_reg = j; - iter->curr_tid = iter->reg_list[iter->curr_reg].tid; - }; - iter->curr_intv = iter->off[iter->i].max & 0xffffffff; - } - - if (iter->i >= iter->n_off) { // no more chunks, except NOCOORs - if (iter->nocoor) { - next_range = 0; - if (iter->seek(fp, iter->nocoor_off, SEEK_SET) < 0) { - hts_log_error("Seek at offset %" PRIu64 " failed.", iter->nocoor_off); - return -1; - } - if (iter->is_cram) { - cram_range r = { HTS_IDX_NOCOOR }; - cram_set_option(fp, CRAM_OPT_RANGE_NOSEEK, &r); - } - - // The first slice covering the unmapped reads might - // contain a few mapped reads, so scroll - // forward until finding the first unmapped read. - do { - ret = iter->readrec(fp, fd, r, &tid, &beg, &end); - } while (tid >= 0 && ret >=0); - - if (ret < 0) - iter->finished = 1; - else - iter->read_rest = 1; - - iter->curr_off = 0; // don't seek any more - iter->curr_tid = tid; - iter->curr_beg = beg; - iter->curr_end = end; - - return ret; - } else { - ret = -1; break; - } - } else if (iter->i < iter->n_off) { - // New chunk may overlap the last one, so ensure we - // only seek forwards. - if (iter->curr_off < iter->off[iter->i].u || next_range) { - iter->curr_off = iter->off[iter->i].u; - - // CRAM has the capability of setting an end location. - // This means multi-threaded decodes can stop once they - // reach that point, rather than pointlessly decoding - // more slices than we'll be using. - // - // We have to be careful here. Whenever we set the cram - // range we need a corresponding seek in order to ensure - // we can safely decode at that offset. We use next_range - // var to ensure this is always true; this is set on - // end-of-range condition. It's never modified for BAM. - if (iter->is_cram) { - // Next offset.[uv] tuple, but it's already been - // included in our cram range, so don't seek and don't - // reset range so we can efficiently multi-thread. - if (next_range || iter->curr_off >= iter->end) { - if (iter->seek(fp, iter->curr_off, SEEK_SET) < 0) { - hts_log_error("Seek at offset %" PRIu64 - " failed.", iter->curr_off); - return -1; - } - - // Find the genomic range matching this interval. - int j; - hts_reglist_t *rl = &iter->reg_list[iter->curr_reg]; - cram_range r = { - rl->tid, - rl->intervals[iter->curr_intv].beg, - rl->intervals[iter->curr_intv].end - }; - - // Expand it up to cover neighbouring intervals. - // Note we can only have a single chromosome in a - // range, so if we detect our blocks span chromosomes - // or we have a multi-ref mode slice, we just use - // HTS_IDX_START refid instead. This doesn't actually - // seek (due to CRAM_OPT_RANGE_NOSEEK) and is simply - // and indicator of decoding with no end limit. - // - // That isn't as efficient as it could be, but it's - // no poorer than before and it works. - int tid = r.refid; - int64_t end = r.end; - int64_t v = iter->off[iter->i].v; - j = iter->i+1; - while (j < iter->n_off) { - if (iter->off[j].u > v) - break; - - uint64_t max = iter->off[j].max; - if ((max>>32) != tid) - tid = HTS_IDX_START; // => no range limit - - if (end < rl->intervals[max & 0xffffffff].end) - end = rl->intervals[max & 0xffffffff].end; - if (v < iter->off[j].v) - v = iter->off[j].v; - j++; - } - r.refid = tid; - r.end = end; - - // Remember maximum 'v' here so we don't do - // unnecessary subsequent seeks for the next - // regions. We can't change curr_off, but - // beg/end are used only by single region iterator so - // we cache it there to avoid changing the struct. - iter->end = v; - - cram_set_option(fp, CRAM_OPT_RANGE_NOSEEK, &r); - next_range = 0; - } - } else { // Not CRAM - if (iter->seek(fp, iter->curr_off, SEEK_SET) < 0) { - hts_log_error("Seek at offset %" PRIu64 " failed.", - iter->curr_off); - return -1; - } - } - } - } - } - - ret = iter->readrec(fp, fd, r, &tid, &beg, &end); - if (ret < 0) { - if (iter->is_cram && cram_eof(fp)) { - // Skip to end of range - // - // We should never be adjusting curr_off manually unless - // we also can guarantee we'll be doing a seek after to - // a new location. Otherwise we'll be reading wrong offset - // for the next container. - // - // We ensure this by adjusting our CRAM_OPT_RANGE - // accordingly above, but to double check we also - // set the skipped_block flag to enforce a seek also. - iter->curr_off = iter->off[iter->i].v; - next_range = 1; - - // Next region - if (++iter->curr_intv >= iter->reg_list[iter->curr_reg].count){ - if (++iter->curr_reg >= iter->n_reg) - break; - iter->curr_intv = 0; - iter->curr_tid = iter->reg_list[iter->curr_reg].tid; - } - continue; - } else { - break; - } - } - - iter->curr_off = iter->tell(fp); - - if (tid != iter->curr_tid) { - hts_reglist_t key; - key.tid = tid; - - found_reg = (hts_reglist_t *)bsearch(&key, iter->reg_list, - iter->n_reg, - sizeof(hts_reglist_t), - compare_regions); - if (!found_reg) - continue; - - iter->curr_reg = (found_reg - iter->reg_list); - iter->curr_tid = tid; - iter->curr_intv = 0; - } - - cr = iter->curr_reg; - ci = iter->curr_intv; - - for (i = ci; i < iter->reg_list[cr].count; i++) { - if (end > iter->reg_list[cr].intervals[i].beg && - iter->reg_list[cr].intervals[i].end > beg) { - iter->curr_beg = beg; - iter->curr_end = end; - iter->curr_intv = i; - - return ret; - } - - // Check if the read starts beyond intervals[i].end - // If so, the interval is finished so move on to the next. - if (beg > iter->reg_list[cr].intervals[i].end) - iter->curr_intv = i + 1; - - // No need to keep searching if the read ends before intervals[i].beg - if (end < iter->reg_list[cr].intervals[i].beg) - break; - } - } - iter->finished = 1; - - return ret; -} - -/********************** - *** Retrieve index *** - **********************/ -// Local_fn and local_len will return a sub-region of 'fn'. -// Eg http://elsewhere/dir/foo.bam.bai?a=b may return -// foo.bam.bai via local_fn and local_len. -// -// Returns -1 if index couldn't be opened. -// -2 on other errors -static int idx_test_and_fetch(const char *fn, const char **local_fn, int *local_len, int download) -{ - hFILE *remote_hfp = NULL; - hFILE *local_fp = NULL; - int save_errno; - htsFormat fmt; - kstring_t s = KS_INITIALIZE; - kstring_t tmps = KS_INITIALIZE; - - if (hisremote(fn)) { - const int buf_size = 1 * 1024 * 1024; - int l; - const char *p, *e; - // Ignore ?# params: eg any file.fmt?param=val, except for S3 URLs - e = fn + ((strncmp(fn, "s3://", 5) && strncmp(fn, "s3+http://", 10) && strncmp(fn, "s3+https://", 11)) ? strcspn(fn, "?#") : strcspn(fn, "?")); - // Find the previous slash from there. - p = e; - while (p > fn && *p != '/') p--; - if (*p == '/') p++; - - // Attempt to open local file first - kputsn(p, e-p, &s); - if (access(s.s, R_OK) == 0) - { - free(s.s); - *local_fn = p; - *local_len = e-p; - return 0; - } - - // Attempt to open remote file. Stay quiet on failure, it is OK to fail when trying first .csi then .bai or .tbi index. - if ((remote_hfp = hopen(fn, "r")) == 0) { - hts_log_info("Failed to open index file '%s'", fn); - free(s.s); - return -1; - } - if (hts_detect_format2(remote_hfp, fn, &fmt)) { - hts_log_error("Failed to detect format of index file '%s'", fn); - goto fail; - } - if (fmt.category != index_file || (fmt.format != bai && fmt.format != csi && fmt.format != tbi - && fmt.format != crai && fmt.format != fai_format)) { - hts_log_error("Format of index file '%s' is not supported", fn); - goto fail; - } - - if (download) { - if ((local_fp = hts_open_tmpfile(s.s, "wx", &tmps)) == NULL) { - hts_log_error("Failed to create file %s in the working directory", p); - goto fail; - } - hts_log_info("Downloading file %s to local directory", fn); - uint8_t *buf = (uint8_t*)calloc(buf_size, 1); - if (!buf) { - hts_log_error("%s", strerror(errno)); - goto fail; - } - while ((l = hread(remote_hfp, buf, buf_size)) > 0) { - if (hwrite(local_fp, buf, l) != l) { - hts_log_error("Failed to write data to %s : %s", - fn, strerror(errno)); - free(buf); - goto fail; - } - } - free(buf); - if (l < 0) { - hts_log_error("Error reading \"%s\"", fn); - goto fail; - } - if (hclose(local_fp) < 0) { - hts_log_error("Error closing %s : %s", fn, strerror(errno)); - local_fp = NULL; - goto fail; - } - local_fp = NULL; - if (rename(tmps.s, s.s) < 0) { - hts_log_error("Error renaming %s : %s", tmps.s, strerror(errno)); - goto fail; - } - ks_clear(&tmps); - - *local_fn = p; - *local_len = e-p; - } else { - *local_fn = fn; - *local_len = e-fn; - } - - if (hclose(remote_hfp) != 0) { - hts_log_error("Failed to close remote file %s", fn); - } - - free(tmps.s); - free(s.s); - return 0; - } else { - hFILE *local_hfp; - if ((local_hfp = hopen(fn, "r")) == 0) return -1; - hclose_abruptly(local_hfp); - *local_fn = fn; - *local_len = strlen(fn); - return 0; - } - - fail: - save_errno = errno; - if (remote_hfp) hclose_abruptly(remote_hfp); - if (local_fp) hclose_abruptly(local_fp); - if (tmps.l > 0) unlink(tmps.s); - free(tmps.s); - free(s.s); - errno = save_errno; - return -2; -} - -/* - * Check the existence of a local index file using part of the alignment file name. - * The order is alignment.bam.csi, alignment.csi, alignment.bam.bai, alignment.bai - * @param fn - pointer to the file name - * @param fnidx - pointer to the index file name placeholder - * @return 1 for success, 0 for failure - */ -int hts_idx_check_local(const char *fn, int fmt, char **fnidx) { - int i, l_fn, l_ext; - const char *fn_tmp = NULL; - char *fnidx_tmp; - char *csi_ext = ".csi"; - char *bai_ext = ".bai"; - char *tbi_ext = ".tbi"; - char *crai_ext = ".crai"; - char *fai_ext = ".fai"; - - if (!fn) - return 0; - - if (hisremote(fn)) { - for (i = strlen(fn) - 1; i >= 0; --i) - if (fn[i] == '/') { - fn_tmp = (char *)&fn[i+1]; - break; - } - } else { - // Borrowed from hopen_fd_fileuri() - if (strncmp(fn, "file://localhost/", 17) == 0) fn_tmp = fn + 16; - else if (strncmp(fn, "file:///", 8) == 0) fn_tmp = fn + 7; - else fn_tmp = fn; -#if defined(_WIN32) || defined(__MSYS__) - // For cases like C:/foo - if (fn_tmp[0] == '/' && fn_tmp[1] && fn_tmp[2] == ':' && fn_tmp[3] == '/') - fn_tmp++; -#endif - } - - if (!fn_tmp) return 0; - hts_log_info("Using alignment file '%s'", fn_tmp); - l_fn = strlen(fn_tmp); l_ext = 5; - fnidx_tmp = (char*)calloc(l_fn + l_ext + 1, 1); - if (!fnidx_tmp) return 0; - - struct stat sbuf; - - // Try alignment.bam.csi first - strcpy(fnidx_tmp, fn_tmp); strcpy(fnidx_tmp + l_fn, csi_ext); - if(stat(fnidx_tmp, &sbuf) == 0) { - *fnidx = fnidx_tmp; - return 1; - } else { // Then try alignment.csi - for (i = l_fn - 1; i > 0; --i) - if (fnidx_tmp[i] == '.') { - strcpy(fnidx_tmp + i, csi_ext); - if(stat(fnidx_tmp, &sbuf) == 0) { - *fnidx = fnidx_tmp; - return 1; - } - break; - } - } - if (fmt == HTS_FMT_BAI) { - // Next, try alignment.bam.bai - strcpy(fnidx_tmp, fn_tmp); strcpy(fnidx_tmp + l_fn, bai_ext); - if(stat(fnidx_tmp, &sbuf) == 0) { - *fnidx = fnidx_tmp; - return 1; - } else { // And finally, try alignment.bai - for (i = l_fn - 1; i > 0; --i) - if (fnidx_tmp[i] == '.') { - strcpy(fnidx_tmp + i, bai_ext); - if(stat(fnidx_tmp, &sbuf) == 0) { - *fnidx = fnidx_tmp; - return 1; - } - break; - } - } - } else if (fmt == HTS_FMT_TBI) { // Or .tbi - strcpy(fnidx_tmp, fn_tmp); strcpy(fnidx_tmp + l_fn, tbi_ext); - if(stat(fnidx_tmp, &sbuf) == 0) { - *fnidx = fnidx_tmp; - return 1; - } else { - for (i = l_fn - 1; i > 0; --i) - if (fnidx_tmp[i] == '.') { - strcpy(fnidx_tmp + i, tbi_ext); - if(stat(fnidx_tmp, &sbuf) == 0) { - *fnidx = fnidx_tmp; - return 1; - } - break; - } - } - } else if (fmt == HTS_FMT_CRAI) { // Or .crai - strcpy(fnidx_tmp, fn_tmp); strcpy(fnidx_tmp + l_fn, crai_ext); - if(stat(fnidx_tmp, &sbuf) == 0) { - *fnidx = fnidx_tmp; - return 1; - } else { - for (i = l_fn - 1; i > 0; --i) - if (fnidx_tmp[i] == '.') { - strcpy(fnidx_tmp + i, crai_ext); - if(stat(fnidx_tmp, &sbuf) == 0) { - *fnidx = fnidx_tmp; - return 1; - } - break; - } - } - } else if (fmt == HTS_FMT_FAI) { // Or .fai - strcpy(fnidx_tmp, fn_tmp); strcpy(fnidx_tmp + l_fn, fai_ext); - *fnidx = fnidx_tmp; - if(stat(fnidx_tmp, &sbuf) == 0) - return 1; - else - return 0; - } - - free(fnidx_tmp); - return 0; -} - -static char *idx_filename(const char *fn, const char *ext, int download) { - int ret, local_len; - char *fnidx; - const char *local_fn = NULL; - kstring_t buffer = KS_INITIALIZE; - - // First try : append `ext` to `fn` - if (!(fnidx = haddextension(&buffer, fn, 0, ext))) { - free(buffer.s); - return NULL; - } - if ((ret = idx_test_and_fetch(fnidx, &local_fn, &local_len, download)) == -1) { - // Second try : replace suffix of `fn` with `ext` - if (!(fnidx = haddextension(&buffer, fn, 1, ext))) { - free(buffer.s); - return NULL; - } - ret = idx_test_and_fetch(fnidx, &local_fn, &local_len, download); - } - - if (ret < 0) { - free(buffer.s); - return NULL; - } - - memmove(fnidx, local_fn, local_len); - fnidx[local_len] = 0; - return fnidx; -} - -char *hts_idx_getfn(const char *fn, const char *ext) -{ - return idx_filename(fn, ext, HTS_IDX_SAVE_REMOTE); -} - -char *hts_idx_locatefn(const char *fn, const char *ext) -{ - return idx_filename(fn, ext, 0); -} - -static hts_idx_t *idx_find_and_load(const char *fn, int fmt, int flags) -{ - char *fnidx = strstr(fn, HTS_IDX_DELIM); - hts_idx_t *idx; - - if ( fnidx ) { - char *fn2 = strdup(fn); - if (!fn2) { - hts_log_error("%s", strerror(errno)); - return NULL; - } - fn2[fnidx - fn] = '\0'; - fnidx += strlen(HTS_IDX_DELIM); - idx = hts_idx_load3(fn2, fnidx, fmt, flags); - free(fn2); - return idx; - } - - if (hts_idx_check_local(fn, fmt, &fnidx) == 0 && hisremote(fn)) { - if (flags & HTS_IDX_SAVE_REMOTE) { - fnidx = idx_filename(fn, ".csi", HTS_IDX_SAVE_REMOTE); - if (!fnidx) { - switch (fmt) { - case HTS_FMT_BAI: fnidx = idx_filename(fn, ".bai", HTS_IDX_SAVE_REMOTE); break; - case HTS_FMT_TBI: fnidx = idx_filename(fn, ".tbi", HTS_IDX_SAVE_REMOTE); break; - default: break; - } - } - } else { - fnidx = idx_filename(fn, ".csi", 0); - if (!fnidx) { - switch (fmt) { - case HTS_FMT_BAI: fnidx = idx_filename(fn, ".bai", 0); break; - case HTS_FMT_TBI: fnidx = idx_filename(fn, ".tbi", 0); break; - default: break; - } - } - } - } - if (!fnidx) { - if (!(flags & HTS_IDX_SILENT_FAIL)) - hts_log_error("Could not retrieve index file for '%s'", fn); - return 0; - } - - if (flags & HTS_IDX_SAVE_REMOTE) - idx = hts_idx_load3(fn, fnidx, fmt, flags); - else - idx = idx_read(fnidx); - free(fnidx); - return idx; -} - -hts_idx_t *hts_idx_load(const char *fn, int fmt) { - return idx_find_and_load(fn, fmt, 1); -} - -hts_idx_t *hts_idx_load2(const char *fn, const char *fnidx) -{ - return hts_idx_load3(fn, fnidx, 0, 0); -} - -hts_idx_t *hts_idx_load3(const char *fn, const char *fnidx, int fmt, int flags) -{ - const char *local_fn = NULL; - char *local_fnidx = NULL; - int local_len; - if (!fnidx) - return idx_find_and_load(fn, fmt, flags); - - // Check that the index file is up to date, the main file might have changed - struct stat stat_idx,stat_main; - int remote_fn = hisremote(fn), remote_fnidx = hisremote(fnidx); - if ( !remote_fn && !remote_fnidx - && !stat(fn, &stat_main) && !stat(fnidx, &stat_idx) ) - { - if ( stat_idx.st_mtime < stat_main.st_mtime ) - hts_log_warning("The index file is older than the data file: %s", fnidx); - } - - if (remote_fnidx && (flags & HTS_IDX_SAVE_REMOTE)) - { - int ret = idx_test_and_fetch(fnidx, &local_fn, &local_len, 1); - if (ret == 0) { - local_fnidx = strdup(local_fn); - if (local_fnidx) { - local_fnidx[local_len] = '\0'; - fnidx = local_fnidx; - } - } - } - - hts_idx_t *idx = idx_read(fnidx); - if (!idx && !(flags & HTS_IDX_SILENT_FAIL)) - hts_log_error("Could not load local index file '%s'%s%s", fnidx, - errno ? " : " : "", errno ? strerror(errno) : ""); - - - free(local_fnidx); - - return idx; -} - - - -/********************** - *** Memory *** - **********************/ - -/* For use with hts_expand macros *only* */ -HTSLIB_EXPORT -size_t hts_realloc_or_die(size_t n, size_t m, size_t m_sz, size_t size, - int clear, void **ptr, const char *func) { - /* If new_m and size are both below this limit, multiplying them - together can't overflow */ - const size_t safe = (size_t) 1 << (sizeof(size_t) * 4); - void *new_ptr; - size_t bytes, new_m; - - new_m = n; - kroundup_size_t(new_m); - - bytes = size * new_m; - - /* Check for overflow. Both ensure that new_m will fit in m (we make the - pessimistic assumption that m is signed), and that bytes has not - wrapped around. */ - if (new_m > (((size_t) 1 << (m_sz * 8 - 1)) - 1) - || ((size > safe || new_m > safe) - && bytes / new_m != size)) { - errno = ENOMEM; - goto die; - } - - new_ptr = realloc(*ptr, bytes); - if (new_ptr == NULL) goto die; - - if (clear) { - if (new_m > m) { - memset((char *) new_ptr + m * size, 0, (new_m - m) * size); - } - } - - *ptr = new_ptr; - - return new_m; - - die: - hts_log_error("%s", strerror(errno)); - exit(1); -} - -/* - * Companion to hts_resize() macro that does the actual allocation. - * - * Somewhat complicated as hts_resize() needs to write the new allocated - * size back into *size_in_out, and the value pointed to may either be - * int32_t, uint32_t or size_t depending on which array is being resized. - * This is solved by making `size_in_out` a void pointer, getting the macro - * to pass in the size of the item pointed to (in `size_sz`) and then using - * an appropriate cast (based on the value of size_sz). The function - * ensures that the maximum size will be storable in a signed type of - * the given size so storing to an int32_t should work correctly. - * - * Assumes that sizeof(uint32_t) and sizeof(int32_t) is 4, - * sizeof(uint64_t) and sizeof(int64_t) is 8 and sizeof(size_t) is - * either 4 or 8. It also assumes casting from unsigned to signed will - * work as long as the top bit isn't set. - */ - -int hts_resize_array_(size_t item_size, size_t num, size_t size_sz, - void *size_in_out, void **ptr_in_out, int flags, - const char *func) { - /* If new_size and item_size are both below this limit, multiplying them - together can't overflow */ - const size_t safe = (size_t) 1 << (sizeof(size_t) * 4); - void *new_ptr; - size_t bytes, new_size; - - new_size = num; - kroundup_size_t(new_size); - bytes = item_size * new_size; - - /* Check for overflow. Both ensure that alloc will fit in alloc_in_out (we - make the pessimistic assumption that *alloc_in_out is signed), and that - bytes has not wrapped around. */ - - if ((new_size > (((size_t) 1 << (size_sz * 8 - 1)) - 1)) - || (((item_size > safe) || (new_size > safe)) - && bytes / new_size != item_size)) { - hts_log(HTS_LOG_ERROR, func, "Memory allocation too large"); - errno = ENOMEM; - return -1; - } - - new_ptr = realloc(*ptr_in_out, bytes); - if (new_ptr == NULL) { - int save_errno = errno; - hts_log(HTS_LOG_ERROR, func, "%s", strerror(errno)); - errno = save_errno; - return -1; - } - - if (flags & HTS_RESIZE_CLEAR) { - size_t old_size; - switch (size_sz) { - case 4: old_size = *((uint32_t *) size_in_out); break; - case 8: old_size = *((uint64_t *) size_in_out); break; - default: abort(); - } - if (new_size > old_size) { - memset((char *) new_ptr + old_size * item_size, 0, - (new_size - old_size) * item_size); - } - } - - switch (size_sz) { - case 4: *((uint32_t *) size_in_out) = new_size; break; - case 8: *((uint64_t *) size_in_out) = new_size; break; - default: abort(); - } - - *ptr_in_out = new_ptr; - return 0; -} - -void hts_lib_shutdown(void) -{ - hfile_shutdown(1); -} - -void hts_free(void *ptr) { - free(ptr); -} - -void hts_set_log_level(enum htsLogLevel level) -{ - hts_verbose = level; -} - -enum htsLogLevel hts_get_log_level(void) -{ - return hts_verbose; -} - -static char get_severity_tag(enum htsLogLevel severity) -{ - switch (severity) { - case HTS_LOG_ERROR: - return 'E'; - case HTS_LOG_WARNING: - return 'W'; - case HTS_LOG_INFO: - return 'I'; - case HTS_LOG_DEBUG: - return 'D'; - case HTS_LOG_TRACE: - return 'T'; - default: - break; - } - - return '*'; -} - -void hts_log(enum htsLogLevel severity, const char *context, const char *format, ...) -{ - int save_errno = errno; - if (severity <= hts_verbose) { - va_list argptr; - - fprintf(stderr, "[%c::%s] ", get_severity_tag(severity), context); - - va_start(argptr, format); - vfprintf(stderr, format, argptr); - va_end(argptr); - - fprintf(stderr, "\n"); - } - errno = save_errno; -} diff --git a/src/htslib-1.18/hts_expr.c b/src/htslib-1.18/hts_expr.c deleted file mode 100644 index 5e5a132..0000000 --- a/src/htslib-1.18/hts_expr.c +++ /dev/null @@ -1,921 +0,0 @@ -/* hts_expr.c -- filter expression parsing and processing. - - Copyright (C) 2020-2022 Genome Research Ltd. - - Author: James Bonfield - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notices and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -// TODO: -// - ?: operator for conditionals? - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "htslib/hts_expr.h" -#include "htslib/hts_log.h" -#include "textutils_internal.h" - -// Could also cache hts_expr_val_t stack here for kstring reuse? -#define MAX_REGEX 10 -struct hts_filter_t { - char *str; - int parsed; - int curr_regex, max_regex; - regex_t preg[MAX_REGEX]; -}; - -/* - * This is designed to be mostly C like with mostly same the precedence rules, - * with the exception of bit operators (widely considered as a mistake in C). - * It's not full C (eg no bit-shifting), but good enough for our purposes. - * - * Supported syntax, in order of precedence: - * - * Grouping: (, ), eg "(1+2)*3" - * Values: integers, floats, strings or variables - * Unary ops: +, -, !, ~ eg -10 +10, !10 (0), ~5 (bitwise not) - * Math ops: *, /, % [TODO: add // for floor division?] - * Math ops: +, - - * Bit-wise: &, ^, | [NB as 3 precedence levels, in that order] - * Conditionals: >, >=, <, <=, - * Equality: ==, !=, =~, !~ - * Boolean: &&, || - */ - -// Skip to start of term -static char *ws(char *str) { - while (*str && (*str == ' ' || *str == '\t')) - str++; - return str; -} - -static int expression(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, - char *str, char **end, hts_expr_val_t *res); - -/* - * Simple functions operating on strings only. - * length, min, max, avg. - * - * All return 0 on success, - * -1 on failure - */ -static int expr_func_length(hts_expr_val_t *res) { - if (!res->is_str) - return -1; - - res->is_str = 0; - res->d = res->s.l; - return 0; -} - -static int expr_func_min(hts_expr_val_t *res) { - if (!res->is_str) - return -1; - - size_t l = res->s.l; - int v = INT_MAX; - const uint8_t *x = (uint8_t *)res->s.s; - for (l = 0; l < res->s.l; l++) - if (v > x[l]) - v = x[l]; - - res->is_str = 0; - res->d = v == INT_MAX ? NAN : v; - - return 0; -} - -static int expr_func_max(hts_expr_val_t *res) { - if (!res->is_str) - return -1; - - size_t l = res->s.l; - int v = INT_MIN; - const uint8_t *x = (uint8_t *)res->s.s; - for (l = 0; l < res->s.l; l++) - if (v < x[l]) - v = x[l]; - - res->is_str = 0; - res->d = v == INT_MIN ? NAN : v; - - return 0; -} - -static int expr_func_avg(hts_expr_val_t *res) { - if (!res->is_str) - return -1; - - size_t l = res->s.l; - double v = 0; - const uint8_t *x = (uint8_t *)res->s.s; - for (l = 0; l < res->s.l; l++) - v += x[l]; - if (l) - v /= l; - - res->is_str = 0; - res->d = v; - - return 0; -} - -/* - * functions: FUNC(expr). - * Note for simplicity of parsing, the "(" must immediately follow FUNC, - * so "FUNC (x)" is invalid. - */ -static int func_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, - char *str, char **end, hts_expr_val_t *res) { - int func_ok = -1; - switch (*str) { - case 'a': - if (strncmp(str, "avg(", 4) == 0) { - if (expression(filt, data, fn, str+4, end, res)) return -1; - func_ok = expr_func_avg(res); - } - break; - - case 'd': - if (strncmp(str, "default(", 8) == 0) { - if (expression(filt, data, fn, str+8, end, res)) return -1; - if (**end != ',') - return -1; - (*end)++; - hts_expr_val_t val = HTS_EXPR_VAL_INIT; - if (expression(filt, data, fn, ws(*end), end, &val)) return -1; - func_ok = 1; - if (!hts_expr_val_existsT(res)) { - kstring_t swap = res->s; - *res = val; - val.s = swap; - hts_expr_val_free(&val); - } - } - break; - - case 'e': - if (strncmp(str, "exists(", 7) == 0) { - if (expression(filt, data, fn, str+7, end, res)) return -1; - func_ok = 1; - res->is_true = res->d = hts_expr_val_existsT(res); - res->is_str = 0; - } else if (strncmp(str, "exp(", 4) == 0) { - if (expression(filt, data, fn, str+4, end, res)) return -1; - func_ok = 1; - res->d = exp(res->d); - res->is_str = 0; - if (isnan(res->d)) - hts_expr_val_undef(res); - } - - break; - - case 'l': - if (strncmp(str, "length(", 7) == 0) { - if (expression(filt, data, fn, str+7, end, res)) return -1; - func_ok = expr_func_length(res); - } else if (strncmp(str, "log(", 4) == 0) { - if (expression(filt, data, fn, str+4, end, res)) return -1; - func_ok = 1; - res->d = log(res->d); - res->is_str = 0; - if (isnan(res->d)) - hts_expr_val_undef(res); - } - break; - - case 'm': - if (strncmp(str, "min(", 4) == 0) { - if (expression(filt, data, fn, str+4, end, res)) return -1; - func_ok = expr_func_min(res); - } else if (strncmp(str, "max(", 4) == 0) { - if (expression(filt, data, fn, str+4, end, res)) return -1; - func_ok = expr_func_max(res); - } - break; - - case 'p': - if (strncmp(str, "pow(", 4) == 0) { - if (expression(filt, data, fn, str+4, end, res)) return -1; - func_ok = 1; - - if (**end != ',') - return -1; - (*end)++; - hts_expr_val_t val = HTS_EXPR_VAL_INIT; - if (expression(filt, data, fn, ws(*end), end, &val)) return -1; - if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) { - hts_expr_val_undef(res); - } else if (res->is_str || val.is_str) { - hts_expr_val_free(&val); // arith on strings - return -1; - } else { - func_ok = 1; - res->d = pow(res->d, val.d); - hts_expr_val_free(&val); - res->is_str = 0; - } - - if (isnan(res->d)) - hts_expr_val_undef(res); - } - break; - - case 's': - if (strncmp(str, "sqrt(", 5) == 0) { - if (expression(filt, data, fn, str+5, end, res)) return -1; - func_ok = 1; - res->d = sqrt(res->d); - res->is_str = 0; - if (isnan(res->d)) - hts_expr_val_undef(res); - } - break; - } - - if (func_ok < 0) - return -1; - - str = ws(*end); - if (*str != ')') { - fprintf(stderr, "Missing ')'\n"); - return -1; - } - *end = str+1; - - return 0; -} - -/* - * simple_expr - * : identifier - * | constant - * | string - * | func_expr - * | '(' expression ')' -*/ -static int simple_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, - char *str, char **end, hts_expr_val_t *res) { - // Main recursion step - str = ws(str); - if (*str == '(') { - if (expression(filt, data, fn, str+1, end, res)) return -1; - str = ws(*end); - if (*str != ')') { - fprintf(stderr, "Missing ')'\n"); - return -1; - } - *end = str+1; - - return 0; - } - - // Otherwise a basic element. - int fail = 0; - double d = hts_str2dbl(str, end, &fail); - if (str != *end) { - res->is_str = 0; - res->d = d; - } else { - // Not valid floating point syntax. - // TODO: add function call names in here; len(), sqrt(), pow(), etc - if (*str == '"') { - res->is_str = 1; - char *e = str+1; - int backslash = 0; - while (*e && *e != '"') { - if (*e == '\\') - backslash=1, e+=1+(e[1]!='\0'); - else - e++; - } - - kputsn(str+1, e-(str+1), ks_clear(&res->s)); - if (backslash) { - size_t i, j; - for (i = j = 0; i < res->s.l; i++) { - res->s.s[j++] = res->s.s[i]; - if (res->s.s[i] == '\\') { - switch (res->s.s[++i]) { - case '"': res->s.s[j-1] = '"'; break; - case '\\':res->s.s[j-1] = '\\'; break; - case 't': res->s.s[j-1] = '\t'; break; - case 'n': res->s.s[j-1] = '\n'; break; - case 'r': res->s.s[j-1] = '\r'; break; - default: res->s.s[j++] = res->s.s[i]; - } - } - } - res->s.s[j] = 0; - res->s.l = j; - } - if (*e != '"') - return -1; - *end = e+1; - } else if (fn) { - // Try lookup as variable, if not as function - if (fn(data, str, end, res) == 0) - return 0; - else - return func_expr(filt, data, fn, str, end, res); - } else { - return -1; - } - } - - return 0; -} - -/* - * unary_expr - * : simple_expr - * | '+' simple_expr - * | '-' simple_expr - * | '!' unary_expr // higher precedence - * | '~' unary_expr // higher precedence - */ -static int unary_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, - char *str, char **end, hts_expr_val_t *res) { - int err; - str = ws(str); - if (*str == '+' || *str == '-') { - err = simple_expr(filt, data, fn, str+1, end, res); - if (!hts_expr_val_exists(res)) { - hts_expr_val_undef(res); - } else { - err |= res->is_str; - if (*str == '-') - res->d = -res->d; - res->is_true = res->d != 0; - } - } else if (*str == '!') { - err = unary_expr(filt, data, fn, str+1, end, res); - if (res->is_true) { - // Any explicitly true value becomes false - res->d = res->is_true = 0; - } else if (!hts_expr_val_exists(res)) { - // We can also still negate undef values by toggling the - // is_true override value. - res->d = res->is_true = !res->is_true; - } else if (res->is_str) { - // !null = true, !"foo" = false, NOTE: !"" = false also - res->d = res->is_true = (res->s.s == NULL); - } else { - res->d = !(int64_t)res->d; - res->is_true = res->d != 0; - } - res->is_str = 0; - } else if (*str == '~') { - err = unary_expr(filt, data, fn, str+1, end, res); - if (!hts_expr_val_exists(res)) { - hts_expr_val_undef(res); - } else { - err |= res->is_str; - if (!hts_expr_val_exists(res)) { - hts_expr_val_undef(res); - } else { - res->d = ~(int64_t)res->d; - res->is_true = res->d != 0; - } - } - } else { - err = simple_expr(filt, data, fn, str, end, res); - } - return err ? -1 : 0; -} - - -/* - * mul_expr - * : unary_expr ( - * '*' unary_expr - * | '/' unary_expr - * | '%' unary_expr - * )* - */ -static int mul_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, - char *str, char **end, hts_expr_val_t *res) { - if (unary_expr(filt, data, fn, str, end, res)) - return -1; - - str = *end; - hts_expr_val_t val = HTS_EXPR_VAL_INIT; - while (*str) { - str = ws(str); - if (*str == '*' || *str == '/' || *str == '%') { - if (unary_expr(filt, data, fn, str+1, end, &val)) return -1; - if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) { - hts_expr_val_undef(res); - } else if (val.is_str || res->is_str) { - hts_expr_val_free(&val); - return -1; // arith on strings - } - } - - if (*str == '*') - res->d *= val.d; - else if (*str == '/') - res->d /= val.d; - else if (*str == '%') { - if (val.d) - res->d = (int64_t)res->d % (int64_t)val.d; - else - hts_expr_val_undef(res); - } else - break; - - res->is_true = hts_expr_val_exists(res) && (res->d != 0); - str = *end; - } - - hts_expr_val_free(&val); - - return 0; -} - -/* - * add_expr - * : mul_expr ( - * '+' mul_expr - * | '-' mul_expr - * )* - */ -static int add_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, - char *str, char **end, hts_expr_val_t *res) { - if (mul_expr(filt, data, fn, str, end, res)) - return -1; - - str = *end; - hts_expr_val_t val = HTS_EXPR_VAL_INIT; - while (*str) { - str = ws(str); - int undef = 0; - if (*str == '+' || *str == '-') { - if (mul_expr(filt, data, fn, str+1, end, &val)) return -1; - if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) { - undef = 1; - } else if (val.is_str || res->is_str) { - hts_expr_val_free(&val); - return -1; // arith on strings - } - } - - if (*str == '+') - res->d += val.d; - else if (*str == '-') - res->d -= val.d; - else - break; - - if (undef) - hts_expr_val_undef(res); - else - res->is_true = res->d != 0; - - str = *end; - } - - hts_expr_val_free(&val); - - return 0; -} - -/* - * bitand_expr - * : add_expr - * | bitand_expr '&' add_expr - */ -static int bitand_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, - char *str, char **end, hts_expr_val_t *res) { - if (add_expr(filt, data, fn, str, end, res)) return -1; - - hts_expr_val_t val = HTS_EXPR_VAL_INIT; - int undef = 0; - for (;;) { - str = ws(*end); - if (*str == '&' && str[1] != '&') { - if (add_expr(filt, data, fn, str+1, end, &val)) return -1; - if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) { - undef = 1; - } else if (res->is_str || val.is_str) { - hts_expr_val_free(&val); - return -1; - } - res->is_true = (res->d = ((int64_t)res->d & (int64_t)val.d)) != 0; - } else { - break; - } - } - hts_expr_val_free(&val); - if (undef) - hts_expr_val_undef(res); - - return 0; -} - -/* - * bitxor_expr - * : bitand_expr - * | bitxor_expr '^' bitand_expr - */ -static int bitxor_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, - char *str, char **end, hts_expr_val_t *res) { - if (bitand_expr(filt, data, fn, str, end, res)) return -1; - - hts_expr_val_t val = HTS_EXPR_VAL_INIT; - int undef = 0; - for (;;) { - str = ws(*end); - if (*str == '^') { - if (bitand_expr(filt, data, fn, str+1, end, &val)) return -1; - if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) { - undef = 1; - } else if (res->is_str || val.is_str) { - hts_expr_val_free(&val); - return -1; - } - res->is_true = (res->d = ((int64_t)res->d ^ (int64_t)val.d)) != 0; - } else { - break; - } - } - hts_expr_val_free(&val); - if (undef) - hts_expr_val_undef(res); - - return 0; -} - -/* - * bitor_expr - * : bitxor_expr - * | bitor_expr '|' bitxor_expr - */ -static int bitor_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, - char *str, char **end, hts_expr_val_t *res) { - if (bitxor_expr(filt, data, fn, str, end, res)) return -1; - - hts_expr_val_t val = HTS_EXPR_VAL_INIT; - int undef = 0; - for (;;) { - str = ws(*end); - if (*str == '|' && str[1] != '|') { - if (bitxor_expr(filt, data, fn, str+1, end, &val)) return -1; - if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) { - undef = 1; - } else if (res->is_str || val.is_str) { - hts_expr_val_free(&val); - return -1; - } - res->is_true = (res->d = ((int64_t)res->d | (int64_t)val.d)) != 0; - } else { - break; - } - } - hts_expr_val_free(&val); - if (undef) - hts_expr_val_undef(res); - - return 0; -} - -/* - * cmp_expr - * : bitor_expr - * | cmp_expr '<=' bitor_expr - * | cmp_expr '<' bitor_expr - * | cmp_expr '>=' bitor_expr - * | cmp_expr '>' bitor_expr - */ -static int cmp_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, - char *str, char **end, hts_expr_val_t *res) { - if (bitor_expr(filt, data, fn, str, end, res)) return -1; - - str = ws(*end); - hts_expr_val_t val = HTS_EXPR_VAL_INIT; - int err = 0, cmp_done = 0; - - if (*str == '>' && str[1] == '=') { - cmp_done = 1; - err = cmp_expr(filt, data, fn, str+2, end, &val); - if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) { - hts_expr_val_undef(res); - } else { - res->is_true=res->d - = res->is_str && res->s.s && val.is_str && val.s.s - ? strcmp(res->s.s, val.s.s) >= 0 - : !res->is_str && !val.is_str && res->d >= val.d; - res->is_str = 0; - } - } else if (*str == '>') { - cmp_done = 1; - err = cmp_expr(filt, data, fn, str+1, end, &val); - if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) { - hts_expr_val_undef(res); - } else { - res->is_true=res->d - = res->is_str && res->s.s && val.is_str && val.s.s - ? strcmp(res->s.s, val.s.s) > 0 - : !res->is_str && !val.is_str && res->d > val.d; - res->is_str = 0; - } - } else if (*str == '<' && str[1] == '=') { - cmp_done = 1; - err = cmp_expr(filt, data, fn, str+2, end, &val); - if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) { - hts_expr_val_undef(res); - } else { - res->is_true=res->d - = res->is_str && res->s.s && val.is_str && val.s.s - ? strcmp(res->s.s, val.s.s) <= 0 - : !res->is_str && !val.is_str && res->d <= val.d; - res->is_str = 0; - } - } else if (*str == '<') { - cmp_done = 1; - err = cmp_expr(filt, data, fn, str+1, end, &val); - if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) { - hts_expr_val_undef(res); - } else { - res->is_true=res->d - = res->is_str && res->s.s && val.is_str && val.s.s - ? strcmp(res->s.s, val.s.s) < 0 - : !res->is_str && !val.is_str && res->d < val.d; - res->is_str = 0; - } - } - - if (cmp_done && (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res))) - hts_expr_val_undef(res); - hts_expr_val_free(&val); - - return err ? -1 : 0; -} - -/* - * eq_expr - * : cmp_expr - * | eq_expr '==' cmp_expr - * | eq_expr '!=' cmp_expr - * | eq_expr '=~' cmp_expr - * | eq_expr '!~' cmp_expr - */ -static int eq_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, - char *str, char **end, hts_expr_val_t *res) { - if (cmp_expr(filt, data, fn, str, end, res)) return -1; - - str = ws(*end); - - int err = 0, eq_done = 0; - hts_expr_val_t val = HTS_EXPR_VAL_INIT; - - // numeric vs numeric comparison is as expected - // string vs string comparison is as expected - // numeric vs string is false - if (str[0] == '=' && str[1] == '=') { - eq_done = 1; - if ((err = eq_expr(filt, data, fn, str+2, end, &val))) { - res->is_true = res->d = 0; - } else { - if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) { - hts_expr_val_undef(res); - } else { - res->is_true = res->d = res->is_str - ? (res->s.s && val.s.s ?strcmp(res->s.s, val.s.s)==0 :0) - : !res->is_str && !val.is_str && res->d == val.d; - } - } - res->is_str = 0; - - } else if (str[0] == '!' && str[1] == '=') { - eq_done = 1; - if ((err = eq_expr(filt, data, fn, str+2, end, &val))) { - res->is_true = res->d = 0; - } else { - if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) { - hts_expr_val_undef(res); - } else { - res->is_true = res->d = res->is_str - ? (res->s.s && val.s.s ?strcmp(res->s.s, val.s.s) != 0 :1) - : res->is_str != val.is_str || res->d != val.d; - } - } - res->is_str = 0; - - } else if ((str[0] == '=' && str[1] == '~') || - (str[0] == '!' && str[1] == '~')) { - eq_done = 1; - err = eq_expr(filt, data, fn, str+2, end, &val); - if (!val.is_str || !res->is_str) { - hts_expr_val_free(&val); - return -1; - } - if (val.s.s && res->s.s && val.is_true >= 0 && res->is_true >= 0) { - regex_t preg_, *preg; - if (filt->curr_regex >= filt->max_regex) { - // Compile regex if not seen before - if (filt->curr_regex >= MAX_REGEX) { - preg = &preg_; - } else { - preg = &filt->preg[filt->curr_regex]; - filt->max_regex++; - } - - int ec = regcomp(preg, val.s.s, REG_EXTENDED | REG_NOSUB); - if (ec != 0) { - char errbuf[1024]; - regerror(ec, preg, errbuf, 1024); - fprintf(stderr, "Failed regex: %.1024s\n", errbuf); - hts_expr_val_free(&val); - return -1; - } - } else { - preg = &filt->preg[filt->curr_regex]; - } - res->is_true = res->d = regexec(preg, res->s.s, 0, NULL, 0) == 0 - ? *str == '=' // matcn - : *str == '!'; // no-match - if (preg == &preg_) - regfree(preg); - - filt->curr_regex++; - } else { - // nul regexp or input is considered false - res->is_true = 0; - } - res->is_str = 0; - } - - if (eq_done && ((!hts_expr_val_exists(&val)) || !hts_expr_val_exists(res))) - hts_expr_val_undef(res); - hts_expr_val_free(&val); - - return err ? -1 : 0; -} - -/* - * and_expr - * : eq_expr - * | and_expr 'and' eq_expr - * | and_expr 'or' eq_expr - */ -static int and_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, - char *str, char **end, hts_expr_val_t *res) { - if (eq_expr(filt, data, fn, str, end, res)) return -1; - - for (;;) { - hts_expr_val_t val = HTS_EXPR_VAL_INIT; - str = ws(*end); - if (str[0] == '&' && str[1] == '&') { - if (eq_expr(filt, data, fn, str+2, end, &val)) return -1; - if (!hts_expr_val_existsT(res) || !hts_expr_val_existsT(&val)) { - hts_expr_val_undef(res); - res->d = 0; - } else { - res->is_true = res->d = - (res->is_true || (res->is_str && res->s.s) || res->d) && - (val.is_true || (val.is_str && val.s.s) || val.d); - res->is_str = 0; - } - } else if (str[0] == '|' && str[1] == '|') { - if (eq_expr(filt, data, fn, str+2, end, &val)) return -1; - if (!hts_expr_val_existsT(res) && !hts_expr_val_existsT(&val)) { - // neither defined - hts_expr_val_undef(res); - res->d = 0; - } else if (!hts_expr_val_existsT(res) && - !(val.is_true || (val.is_str && val.s.s ) || val.d)) { - // LHS undef and RHS false - hts_expr_val_undef(res); - res->d = 0; - } else if (!hts_expr_val_existsT(&val) && - !(res->is_true || (res->is_str && res->s.s) || res->d)){ - // RHS undef and LHS false - hts_expr_val_undef(res); - res->d = 0; - } else { - res->is_true = res->d = - res->is_true || (res->is_str && res->s.s) || res->d || - val.is_true || (val.is_str && val.s.s ) || val.d; - res->is_str = 0; - } - } else { - break; - } - hts_expr_val_free(&val); - } - - return 0; -} - -static int expression(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, - char *str, char **end, hts_expr_val_t *res) { - return and_expr(filt, data, fn, str, end, res); -} - -hts_filter_t *hts_filter_init(const char *str) { - hts_filter_t *f = calloc(1, sizeof(*f)); - if (!f) return NULL; - - // Oversize to permit faster comparisons with memcmp over strcmp - size_t len = strlen(str)+100; - if (!(f->str = malloc(len))) { - free(f); - return NULL; - } - strcpy(f->str, str); - return f; -} - -void hts_filter_free(hts_filter_t *filt) { - if (!filt) - return; - - int i; - for (i = 0; i < filt->max_regex; i++) - regfree(&filt->preg[i]); - - free(filt->str); - free(filt); -} - -static int hts_filter_eval_(hts_filter_t *filt, - void *data, hts_expr_sym_func *fn, - hts_expr_val_t *res) { - char *end = NULL; - - filt->curr_regex = 0; - if (expression(filt, data, fn, filt->str, &end, res)) - return -1; - - if (end && *ws(end)) { - fprintf(stderr, "Unable to parse expression at %s\n", filt->str); - return -1; - } - - // Strings evaluate to true. An empty string is also true, but an - // absent (null) string is false, unless overriden by is_true. An - // empty string has kstring length of zero, but a pointer as it's - // nul-terminated. - if (res->is_str) { - res->is_true |= res->s.s != NULL; - res->d = res->is_true; - } else if (hts_expr_val_exists(res)) { - res->is_true |= res->d != 0; - } - - return 0; -} - -int hts_filter_eval(hts_filter_t *filt, - void *data, hts_expr_sym_func *fn, - hts_expr_val_t *res) { - if (res->s.l != 0 || res->s.m != 0 || res->s.s != NULL) { - // As *res is cleared below, it's not safe to call this function - // with res->s.s set, as memory would be leaked. It's also not - // possible to know is res was initialised correctly, so in - // either case we fail. - hts_log_error("Results structure must be cleared before calling this function"); - return -1; - } - - memset(res, 0, sizeof(*res)); - - return hts_filter_eval_(filt, data, fn, res); -} - -int hts_filter_eval2(hts_filter_t *filt, - void *data, hts_expr_sym_func *fn, - hts_expr_val_t *res) { - ks_free(&res->s); - memset(res, 0, sizeof(*res)); - - return hts_filter_eval_(filt, data, fn, res); -} diff --git a/src/htslib-1.18/hts_internal.h b/src/htslib-1.18/hts_internal.h deleted file mode 100644 index 61956da..0000000 --- a/src/htslib-1.18/hts_internal.h +++ /dev/null @@ -1,153 +0,0 @@ -/* hts_internal.h -- internal functions; not part of the public API. - - Copyright (C) 2015-2016, 2018-2020 Genome Research Ltd. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#ifndef HTSLIB_HTS_INTERNAL_H -#define HTSLIB_HTS_INTERNAL_H - -#include -#include - -#include "htslib/hts.h" -#include "textutils_internal.h" - -#define HTS_MAX_EXT_LEN 9 - -#ifdef __cplusplus -extern "C" { -#endif - -struct hFILE; - -struct hts_json_token { - char type; ///< Token type - char *str; ///< Value as a C string (filled in for all token types) - // TODO Add other fields to fill in for particular data types, e.g. - // int inum; - // float fnum; -}; - -struct cram_fd; - -/* - * Check the existence of a local index file using part of the alignment file name. - * The order is alignment.bam.csi, alignment.csi, alignment.bam.bai, alignment.bai - * @param fn - pointer to the file name - * @param fnidx - pointer to the index file name placeholder - * @return 1 for success, 0 for failure - */ -int hts_idx_check_local(const char *fn, int fmt, char **fnidx); - -// Retrieve the name of the index file and also download it, if it is remote -char *hts_idx_getfn(const char *fn, const char *ext); - -// Retrieve the name of the index file, but do not download it, if it is remote -char *hts_idx_locatefn(const char *fn, const char *ext); - -// Used for on-the-fly indexing. See the comments in hts.c. -void hts_idx_amend_last(hts_idx_t *idx, uint64_t offset); - -int hts_idx_fmt(hts_idx_t *idx); - -// Construct a unique filename based on fname and open it. -struct hFILE *hts_open_tmpfile(const char *fname, const char *mode, kstring_t *tmpname); - -// Check that index is capable of storing items in range beg..end -int hts_idx_check_range(hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end); - -// The CRAM implementation stores the loaded index within the cram_fd rather -// than separately as is done elsewhere in htslib. So if p is a pointer to -// an hts_idx_t with p->fmt == HTS_FMT_CRAI, then it actually points to an -// hts_cram_idx_t and should be cast accordingly. -typedef struct hts_cram_idx_t { - int fmt; - struct cram_fd *cram; -} hts_cram_idx_t; - - -// Entry point to hFILE_multipart backend. -struct hFILE *hopen_htsget_redirect(struct hFILE *hfile, const char *mode); - -struct hts_path_itr { - kstring_t path, entry; - void *dirv; // DIR * privately - const char *pathdir, *prefix, *suffix; - size_t prefix_len, suffix_len, entry_dir_l; -}; - -void hts_path_itr_setup(struct hts_path_itr *itr, const char *path, - const char *builtin_path, const char *prefix, size_t prefix_len, - const char *suffix, size_t suffix_len); - -const char *hts_path_itr_next(struct hts_path_itr *itr); - -typedef void plugin_void_func(void); -plugin_void_func *load_plugin(void **pluginp, const char *filename, const char *symbol); -void *plugin_sym(void *plugin, const char *name, const char **errmsg); -plugin_void_func *plugin_func(void *plugin, const char *name, const char **errmsg); -void close_plugin(void *plugin); -const char *hts_plugin_path(void); - -/* - * Buffers up arguments to hts_idx_push for later use, once we've written all bar - * this block. This is necessary when multiple blocks are in flight (threading). - * - * Returns 0 on success, - * -1 on failure - */ -int bgzf_idx_push(BGZF *fp, hts_idx_t *hidx, int tid, hts_pos_t beg, hts_pos_t end, uint64_t offset, int is_mapped); - -/* - * bgzf analogue to hts_idx_amend_last. - * - * This is needed when multi-threading and writing indices on the fly. - * At the point of writing a record we know the virtual offset for start - * and end, but that end virtual offset may be the end of the current - * block. In standard indexing our end virtual offset becomes the start - * of the next block. Thus to ensure bit for bit compatibility we - * detect this boundary case and fix it up here. - */ -void bgzf_idx_amend_last(BGZF *fp, hts_idx_t *hidx, uint64_t offset); - -static inline int find_file_extension(const char *fn, char ext_out[static HTS_MAX_EXT_LEN]) -{ - const char *delim = fn ? strstr(fn, HTS_IDX_DELIM) : NULL, *ext; - if (!fn) return -1; - if (!delim) delim = fn + strlen(fn); - for (ext = delim; ext > fn && *ext != '.' && *ext != '/'; --ext) {} - if (*ext == '.' && - ((delim - ext == 3 && ext[1] == 'g' && ext[2] == 'z') || // permit .sam.gz as a valid file extension - (delim - ext == 4 && ext[1] == 'b' && ext[2] == 'g' && ext[3] == 'z'))) // permit .vcf.bgz as a valid file extension - { - for (ext--; ext > fn && *ext != '.' && *ext != '/'; --ext) {} - } - if (*ext != '.' || delim - ext > HTS_MAX_EXT_LEN || delim - ext < 3) - return -1; - memcpy(ext_out, ext + 1, delim - ext - 1); - ext_out[delim - ext - 1] = '\0'; - return 0; -} - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.18/hts_os.c b/src/htslib-1.18/hts_os.c deleted file mode 100644 index d098872..0000000 --- a/src/htslib-1.18/hts_os.c +++ /dev/null @@ -1,60 +0,0 @@ -/// @file hts_os.c -/// Operating System specific tweaks, for compatibility with POSIX. -/* - Copyright (C) 2017, 2019-2020 Genome Research Ltd. - - Author: James Bonfield - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include -#include "htslib/hts_defs.h" -#include "htslib/hts_os.h" - -// Windows (maybe more) lack a drand48 implementation. -#ifndef HAVE_DRAND48 -#include "os/rand.c" -#else -#include -HTSLIB_EXPORT -void hts_srand48(long seed) -{ -#ifdef HAVE_SRAND48_DETERMINISTIC - srand48_deterministic(seed); -#else - srand48(seed); -#endif -} - -HTSLIB_EXPORT -double hts_erand48(unsigned short xseed[3]) { return erand48(xseed); } - -HTSLIB_EXPORT -double hts_drand48(void) { return drand48(); } - -HTSLIB_EXPORT -long hts_lrand48(void) { return lrand48(); } -#endif - -// // On Windows when using the MSYS or Cygwin terminals, isatty fails -// #ifdef _WIN32 -// #define USE_FILEEXTD -// #include "os/iscygpty.c" -// #endif diff --git a/src/htslib-1.18/hts_probe_cc.sh b/src/htslib-1.18/hts_probe_cc.sh deleted file mode 100755 index 71c6f5d..0000000 --- a/src/htslib-1.18/hts_probe_cc.sh +++ /dev/null @@ -1,143 +0,0 @@ -#!/bin/sh - -# Check compiler options for non-configure builds and create Makefile fragment -# -# Copyright (C) 2022-2023 Genome Research Ltd. -# -# Author: Rob Davies -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -# Arguments are: -# 1. C compiler command -# 2. Initial CFLAGS -# 3. LDFLAGS - -CC=$1 -CFLAGS=$2 -LDFLAGS=$3 - -# Try running the compiler. Uses the same contest.* names as -# configure for temporary files. -run_compiler () -{ - "$CC" $CFLAGS $1 $LDFLAGS -o conftest conftest.c 2> conftest.err - retval=$? - rm -f conftest.err conftest - return $retval -} - -# Run a test. $1 is the flag to try, $2 is the Makefile variable to set -# with the flag probe result, $3 is a Makefile variable which will be -# set to 1 if the code was built successfully. The code to test should -# be passed in via fd 0. -# First try compiling conftest.c without the flag. If that fails, try -# again with it to see if the flag is needed. -run_test () -{ - rm -f conftest conftest.err conftest.c - cat - > conftest.c - if run_compiler ; then - echo "$2 =" - echo "$3 = 1" - elif run_compiler "$1" ; then - echo "$2 = $1" - echo "$3 = 1" - else - echo "$3 =" - fi -} - -echo "# Compiler probe results, generated by $0" - -# Check for ssse3 -run_test "-mssse3" HTS_CFLAGS_SSSE3 HTS_BUILD_SSSE3 <<'EOF' -#ifdef __x86_64__ -#include "x86intrin.h" -int main(int argc, char **argv) { - __m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1); - __m128i c = _mm_shuffle_epi8(a, b); - return *((char *) &c); -} -#else -int main(int argc, char **argv) { return 0; } -#endif -EOF - -# Check for popcnt -run_test "-mpopcnt" HTS_CFLAGS_POPCNT HTS_BUILD_POPCNT <<'EOF' -#ifdef __x86_64__ -#include "x86intrin.h" -int main(int argc, char **argv) { - unsigned int i = _mm_popcnt_u32(1); - return i != 1; -} -#else -int main(int argc, char **argv) { return 0; } -#endif -EOF - -# Check for sse4.1 etc. support -run_test "-msse4.1" HTS_CFLAGS_SSE4_1 HTS_BUILD_SSE4_1 <<'EOF' -#ifdef __x86_64__ -#include "x86intrin.h" -int main(int argc, char **argv) { - __m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1); - __m128i c = _mm_max_epu32(a, b); - return *((char *) &c); -} -#else -int main(int argc, char **argv) { return 0; } -#endif -EOF - -echo 'HTS_CFLAGS_SSE4 = $(HTS_CFLAGS_SSSE3) $(HTS_CFLAGS_POPCNT) $(HTS_CFLAGS_SSE4_1)' - -# Check for avx2 - -run_test -mavx2 HTS_CFLAGS_AVX2 HTS_BUILD_AVX2 <<'EOF' -#ifdef __x86_64__ -#include "x86intrin.h" -int main(int argc, char **argv) { - __m256i a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); - __m256i b = _mm256_add_epi32(a, a); - long long c = _mm256_extract_epi64(b, 0); - return (int) c; -} -#else -int main(int argc, char **argv) { return 0; } -#endif -EOF - -# Check for avx512 - -run_test -mavx512f HTS_CFLAGS_AVX512 HTS_BUILD_AVX512 <<'EOF' -#ifdef __x86_64__ -#include "x86intrin.h" -int main(int argc, char **argv) { - __m512i a = _mm512_set1_epi32(1); - __m512i b = _mm512_add_epi32(a, a); - return *((char *) &b); -} -#else -int main(int argc, char **argv) { return 0; } -#endif -EOF - -rm -f conftest.c diff --git a/src/htslib-1.18/htscodecs.mk b/src/htslib-1.18/htscodecs.mk deleted file mode 120000 index 2a91c26..0000000 --- a/src/htslib-1.18/htscodecs.mk +++ /dev/null @@ -1 +0,0 @@ -htscodecs_bundled.mk \ No newline at end of file diff --git a/src/htslib-1.18/htscodecs/NEWS.md b/src/htslib-1.18/htscodecs/NEWS.md deleted file mode 100644 index 1a6f082..0000000 --- a/src/htslib-1.18/htscodecs/NEWS.md +++ /dev/null @@ -1,296 +0,0 @@ -Release 1.5.1: 19th July 2023 ------------------------------ - -This release is mainly small updates and bug fixes focusing on -specific platforms, with no new features added. - -Changes - -- Be more selective in use of AVX512 on AMD Zen4 processors. This can - be faster (e.g. with 64-way unrolling), but in the current rANS codec - implementations AVX2 is faster for certain operations (PR#85). - -- Add config.h to test programs to help them pick up definitions such - as XOPEN_SOURCE (PR#84) - -- Add FreeBSD to CI testing (PR#83) - -Bug fixes - -- Trivial bug fix to the rans4x16pr test harness when given - incompressible data (PR#86). - -- Make ARM NEON checks specific to AArch64 and exclude AArch32 systems. - (PR#82 to fix issue#81, reported by Robert Clausecker) - - -Release 1.5.0: 14th April 2023 ------------------------------- - -Changes - -- Significant speed ups to the fqzcomp codec via code restructuring - and use of memory prefetch instructions. Encode is 30-40% faster - and decode 5-8% faster. (PR#75 James Bonfield) - -- Improve multiarch builds on MacOS, fixing issues with getting the - various SIMD implementations integrated. (Issue#76 John Marshall, - PR#77/#78 Rob Davies) - -- Remove unused ax_with_libdeflate.m4 file from build system. - - -Release 1.4.0: Februrary 2023 ------------------------------ - -This is almost entirely minor bug fixing with a few small updates. - -Changes - -- Optimise compression / speed of the name tokeniser. - - In arithmetic coding mode, it can now utilise bzip2 at higher levels. - - For both rans / arith entropy encoders, the choice of method / order - is now optimised per token type, giving faster compression. - - Culled a pointless zlib check in the configure script. - - Made lack of bzip2 a hard failure in configure, unless an explicit - --disable-bz2 option is given. - (#72, #73) - -- Switch CI to use ARM for MacOS builds - (#69, thanks to Rob Davies) - - -Bug fixes - -- Remove some newer compiler warnings (#61) - -- Improvements for Intel -m32 builds, including better AVX2 validation - (m32 misses _mm256_extract_epi64) and improved data alignment. - (#62. See also samtools/htslib#1500) - -- Detect Neon capability at runtime via operating system APIs. - (#63, thanks to John Marshall) - -- Improve FreeBSD diagnostics when neglecting to use -lpthread / -lthr. - Plus additional extra error checking too. - (#68, #64, thanks to John Marshall) - -- Update hts_pack to operate in line with CRAMcodecs spec, where the - number of symbols > 16. - (#65/#66, reported by Michael Macias) - -- Fixed too-stringent buffer overflow checking in O1 rans decoder. - (#71, reported by Divon Lan) - - -Release 1.3.0: 9th August 2022 ------------------------------- - -The primary change in this release is a new SIMD enabled rANS codec. - -Changes - -- There is a 32-way unrolled rANS implementation. This is accessed - using the existing rans 4x16 API with the RANS_ORDER_X32 bit set. - Implementations exist for SSE4.1, AVX2, AVX512 and ARM Neon, as - well as traditional non-SIMD scalar code in C and JavaScript. See - the commit logs for benchmarks. - -- Improved memory allocation via a new htscodecs_tls_alloc function. - This uses Thread Local Storage (TLS) to avoid multiple malloc/free - calls, reducing system CPU time. - -- Some external functions have been renamed, with the old ones still - existing in a deprecated fashion. Every symbol should now start - hts_, rans_, arith_, fqz_ or tok3_*. - -- Improved test framework with an "entropy" tool that iterates over - all entropy encoders. - -- Updated the Appveyor CI image to user a newer gcc. Also added ARM - to the list of processors to test on. - -- Tab vs space code changes. Use "git diff -w" to see through these. - -- Reworked fuzzing infrastructure. - -- Small speed improvements to various rANS encoders and decoders. - These were tested on a broad range of compilers, versions and - systems. The new code may be slightly slower with some combinations, - but is faster overall and removes a few outliers with considerably - degraded performance. - -- Substantial memory reduction to the name tokeniser (tok3). - -Bug fixes - -- Fixed undefined behaviour in our use of _builtin_clz(). - -- Fixed a few redundant #includes. - -- Work around strict aliasing bugs, uncovered with gcc -O2. - -- Fixed an issue with encoding data blocks close to 2GB in size. - (Additionally blocks above 2GB now error, rather than crashing or - returning incorrect results.) - -- Fix encode error with large blocks using RANS_ORDER_STRIPE. - - -Release 1.2.2: 1st April 2022 ------------------------------ - -This release contains some fixes found during fuzzing with Clang's -memory-sanitizer. None of these are involving writing memory so there -is no possibility for code execution vulnerabilities. However some do -could access uninitialised elements in locally allocated memory, which -could leak private data if the library was used in conjunction with -other tools which don't zero sensitive data before freeing. - -Bug fixes: - -- The name tokeniser now validates the stored length in the data - stream matches the actual decoded length. Discovered by Taotao Gu. - -- Fixed an endless loop in arith_dynamic and rans4x16pr involving - X_STRIPE with 0 stripes. - -- Avoid a harmless (and wrong?) undefined behaviour sanitizer error - when calling memcpy(ptr, NULL, 0) in the name tokeniser. - -- Fixed possible uninitialised memory access in - rans_uncompress_O1_4x16. If the frequency table didn't add up to - the correct amount, parts of the "fb" table were left unpopulated. - It was then possible to use these array elements in some of the rANS - calculations. - -- Similarly rans_uncompress_O0 could access an uninitialised element - 4095 of the decoder tables if the frequencies summed to 4095 instead - of the expected 4096. - -- Improved error detection from fqzcomp's read_array function. - -- Reject fqzcomp parameters with inconsistent "sel" parameters, which - could lead to uninitialised access to the model.sel range coder. - - -Release 1.2.1: 15th February 2022 ---------------------------------- - -The only change in this release is a minor adjustment to the histogram -code so it works on systems with small stacks. This was detected on -Windows Mingw builds. - - -Release 1.2: 10th February 2022 -------------------------------- - -This release contains the following minor changes. -Please see the "git log" for the full details. - -Improvements / changes: - -- Speed up of rANS4x16 order-0. We now use a branchless encoder - renormalisation step. For complex data it's between 13 and 50% - speed up depending on compiler. - -- Improve rANS4x16 compute_shift estimates. The entropy calculation - is now more accurate. This leads to more frequent use of the 10-bit - frequency mode, at an expense of up to 1% size growth. - -- Speed improvements to the striped rANS mode, both encoding and - decoding. Encoder gains ~8% and decoder ~5%, but varies - considerably by compiler and data. - -- Added new var_put_u64_safe and var_put_u32_safe interfaces. - These are automatically used by var_put_u64 and var_put_u32 when - near the end of the buffer, but may also be called directly. - -- Small speed ups to the hist8 and hist1_4 functions. - -- Minor speed up to RLE decoding. - -Bug fixes: - -- Work around an icc-2021 compiler bug, but also speed up the varint - encoding too (#29). - -- Fix an off-by-one error in the initial size check in arith_dynamic. - This meant the very smallest of blocks could fail to decode. - Reported by Divon Lan. - -- Fixed hist1_4 to also count the last byte when computing T0[]. - -- Fixed overly harsh bounds checking in the fqzcomp read_array - function, which meant it failed to decode some configurations. - - -Release 1.1.1: 6th July 2021 ----------------------------- - -This release contains the following minor changes. -Please see the "git log" for the full details. - -Improvements / changes: - -- Modernised autoconf usage to avoid warnings with newer versions. - (John Marshall) - -- Avoid using awk with large records, due to some systems - (e.g. Solaris / OpenIndiana) with line length limits . - (John Marshall) - -- Applied Debian patch to make the library link against -lm. - -Bug fixes: - -- Fixed an issue with the name tokeniser when a slice (name_context) - has exactly 1 more name than the previous call. (James Bonfield) - -- Removed access to an uninitialised variable in the name tokeniser - decode when given malformed data. This occurs when we use delta - encoding for the very first name. (James Bonfield, OSS-Fuzz) - -- Minor fixes to distcheck and distclean targets - - -Release 1.0: 23rd Feb 2021 --------------------------- - -This marks the first non-beta release of htscodecs, following a -perioid of integration with Htslib and automated fuzzing by Google's -OSS-Fuzz program. - -[Note this testing only applies to the C implementation. The -JavaScript code should still be considered as examples of the codecs, -more for purposes of understanding and clarity than as a fully -optimised and tested release.] - -Since the last release (0.5) the key changes are: - -- Improved support for big endian platforms - -- Speed improvements to CRAM 3.0 4x8 rANS order-1 encoding. - It's between 10 and 50% faster at encoding, based on input data. - -- Improved autoconf bzip2 checks and tidy up "make test" output. - -- Added some more files into "make install", so that "make distcheck" - now passes. - -- Replaced Travis with Cirrus-CI testing. - -- Removed various C undefined behaviour, such as left shifting of - negative values and integer overflows. As far as we know these were - currently harmless on the supported platforms, but may break future - compiler optimisations. - -- Fixed numerous OSS-Fuzz identified flaws. Some of these were - potential security issues such as small buffer overruns. - -- Tidied up some code to prevent warnings. - -- The name tokeniser now has a limit on the size of data it can encode - (10 million records). This may still be too high given the memory - it will require, so it may be reduced again. - diff --git a/src/htslib-1.18/htscodecs/htscodecs/arith_dynamic.c b/src/htslib-1.18/htscodecs/htscodecs/arith_dynamic.c deleted file mode 100644 index 93b4eeb..0000000 --- a/src/htslib-1.18/htscodecs/htscodecs/arith_dynamic.c +++ /dev/null @@ -1,1182 +0,0 @@ -/* - * Copyright (c) 2019-2022 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -// As per standard rANS_static but using optional RLE or bit-packing -// techniques prior to entropy encoding. This is a significant -// reduction in some data sets. - -// top bits in order byte -#define X_PACK 0x80 // Pack 2,4,8 or infinite symbols into a byte. -#define X_RLE 0x40 // Run length encoding with runs & lits encoded separately -#define X_CAT 0x20 // Nop; for tiny segments where rANS overhead is too big -#define X_NOSZ 0x10 // Don't store the original size; used by STRIPE mode -#define X_STRIPE 0x08 // For 4-byte integer data; rotate & encode 4 streams. -#define X_EXT 0x04 // External compression codec via magic num (gz, xz, bz2) -#define X_ORDER 0x03 // Mask to obtain order - -#include "config.h" - -#ifdef HAVE_LIBBZ2 -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "arith_dynamic.h" -#include "varint.h" -#include "pack.h" -#include "utils.h" - -#define MIN(a,b) ((a)<(b)?(a):(b)) - -/*----------------------------------------------------------------------------- - * Memory to memory compression functions. - * - * These are original versions without any manual loop unrolling. They - * are easier to understand, but can be up to 2x slower. - */ -#define MAGIC 8 - -unsigned int arith_compress_bound(unsigned int size, int order) { - int N = (order>>8) & 0xff; - if (!N) N=4; - return (order == 0 - ? 1.05*size + 257*3 + 4 - : 1.05*size + 257*257*3 + 4 + 257*3+4) + 5 + - ((order & X_PACK) ? 1 : 0) + - ((order & X_RLE) ? 1 + 257*3+4: 0) + - ((order & X_STRIPE) ? 7 + 5*N: 0); -} - -#ifndef MODEL_256 // see fqzcomp_qual_fuzz.c -#define NSYM 256 -#include "c_simple_model.h" -#endif - -// Compresses in_size bytes from 'in' to *out_size bytes in 'out'. -// -// NB: The output buffer does not hold the original size, so it is up to -// the caller to store this. -static -unsigned char *arith_compress_O0(unsigned char *in, unsigned int in_size, - unsigned char *out, unsigned int *out_size) { - int i, bound = arith_compress_bound(in_size,0)-5; // -5 for order/size - - if (!out) { - *out_size = bound; - out = malloc(*out_size); - } - if (!out || bound > *out_size) - return NULL; - - unsigned int m = 0; - for (i = 0; i < in_size; i++) - if (m < in[i]) - m = in[i]; - m++; - *out = m; - - SIMPLE_MODEL(256,_) byte_model; - SIMPLE_MODEL(256,_init)(&byte_model, m); - - RangeCoder rc; - RC_SetOutput(&rc, (char *)out+1); - RC_StartEncode(&rc); - - for (i = 0; i < in_size; i++) - SIMPLE_MODEL(256, _encodeSymbol)(&byte_model, &rc, in[i]); - - RC_FinishEncode(&rc); - - // Finalise block size and return it - *out_size = RC_OutSize(&rc)+1; - - return out; -} - -static -unsigned char *arith_uncompress_O0(unsigned char *in, unsigned int in_size, - unsigned char *out, unsigned int out_sz) { - RangeCoder rc; - int i; - unsigned int m = in[0] ? in[0] : 256; - - SIMPLE_MODEL(256,_) byte_model; - SIMPLE_MODEL(256,_init)(&byte_model, m); - - if (!out) - out = malloc(out_sz); - if (!out) - return NULL; - - RC_SetInput(&rc, (char *)in+1, (char *)in+in_size); - RC_StartDecode(&rc); - - for (i = 0; i < out_sz; i++) - out[i] = SIMPLE_MODEL(256, _decodeSymbol)(&byte_model, &rc); - - RC_FinishDecode(&rc); - - return out; -} - - -//----------------------------------------------------------------------------- -static -unsigned char *arith_compress_O1(unsigned char *in, unsigned int in_size, - unsigned char *out, unsigned int *out_size) { - int i, bound = arith_compress_bound(in_size,0)-5; // -5 for order/size - unsigned char *out_free = NULL; - - if (!out) { - *out_size = bound; - out_free = out = malloc(*out_size); - } - if (!out || bound > *out_size) - return NULL; - - SIMPLE_MODEL(256,_) *byte_model = - htscodecs_tls_alloc(256 * sizeof(*byte_model)); - if (!byte_model) { - free(out_free); - return NULL; - } - unsigned int m = 0; - if (1 || in_size > 1000) { - for (i = 0; i < in_size; i++) - if (m < in[i]) - m = in[i]; - //fprintf(stderr, "%d max %d\n", in_size, m); - m++; - } - *out = m; - for (i = 0; i < 256; i++) - SIMPLE_MODEL(256,_init)(&byte_model[i], m); - - RangeCoder rc; - RC_SetOutput(&rc, (char *)out+1); - RC_StartEncode(&rc); - - uint8_t last = 0; - for (i = 0; i < in_size; i++) { - SIMPLE_MODEL(256, _encodeSymbol)(&byte_model[last], &rc, in[i]); - last = in[i]; - } - - RC_FinishEncode(&rc); - - // Finalise block size and return it - *out_size = RC_OutSize(&rc)+1; - - htscodecs_tls_free(byte_model); - return out; -} - -static -unsigned char *arith_uncompress_O1(unsigned char *in, unsigned int in_size, - unsigned char *out, unsigned int out_sz) { - RangeCoder rc; - unsigned char *out_free = NULL; - - if (!out) - out_free = out = malloc(out_sz); - if (!out) - return NULL; - - - SIMPLE_MODEL(256,_) *byte_model = - htscodecs_tls_alloc(256 * sizeof(*byte_model)); - if (!byte_model) { - free(out_free); - return NULL; - } - - unsigned int m = in[0] ? in[0] : 256, i; - for (i = 0; i < 256; i++) - SIMPLE_MODEL(256,_init)(&byte_model[i], m); - - RC_SetInput(&rc, (char *)in+1, (char *)in+in_size); - RC_StartDecode(&rc); - - unsigned char last = 0; - for (i = 0; i < out_sz; i++) { - out[i] = SIMPLE_MODEL(256, _decodeSymbol)(&byte_model[last], &rc); - last = out[i]; - } - - RC_FinishDecode(&rc); - - htscodecs_tls_free(byte_model); - return out; -} - -//----------------------------------------------------------------------------- - -// Disable O2 for now -#if 0 - -#if 0 -unsigned char *arith_compress_O2(unsigned char *in, unsigned int in_size, - unsigned char *out, unsigned int *out_size) { - fprintf(stderr, "WARNING: using undocumented O2 arith\n"); - - int i, j; - int bound = arith_compress_bound(in_size,0)-5; // -5 for order/size - - if (!out) { - *out_size = bound; - out = malloc(*out_size); - } - if (!out || bound > *out_size) - return NULL; - - unsigned int m = 0; - if (1 || in_size > 1000) { - for (i = 0; i < in_size; i++) - if (m < in[i]) - m = in[i]; - //fprintf(stderr, "%d max %d\n", in_size, m); - m++; - } - *out = m; - - SIMPLE_MODEL(256,_) *byte_model; - byte_model = malloc(256*256*sizeof(*byte_model)); - for (i = 0; i < 256; i++) - for (j = 0; j < 256; j++) - SIMPLE_MODEL(256,_init)(&byte_model[i*256+j], m); - - RangeCoder rc; - RC_SetOutput(&rc, (char *)out+1); - RC_StartEncode(&rc); - - unsigned char last1 = 0, last2 = 0; - for (i = 0; i < in_size; i++) { - SIMPLE_MODEL(256, _encodeSymbol)(&byte_model[last1*256 + last2], &rc, in[i]); - last2 = last1; - last1 = in[i]; - } - - free(byte_model); - RC_FinishEncode(&rc); - - // Finalise block size and return it - *out_size = RC_OutSize(&rc)+1; - - return out; -} -#else -unsigned char *arith_compress_O2(unsigned char *in, unsigned int in_size, - unsigned char *out, unsigned int *out_size) { - fprintf(stderr, "WARNING: using undocumented O2 arith\n"); - - int i, j; - int bound = arith_compress_bound(in_size,0)-5; // -5 for order/size - - if (!out) { - *out_size = bound; - out = malloc(*out_size); - } - if (!out || bound > *out_size) - return NULL; - - unsigned int m = 0; - if (1 || in_size > 1000) { - for (i = 0; i < in_size; i++) - if (m < in[i]) - m = in[i]; - //fprintf(stderr, "%d max %d\n", in_size, m); - m++; - } - *out = m; - - SIMPLE_MODEL(256,_) *byte_model; - byte_model = malloc(256*256*sizeof(*byte_model)); - for (i = 0; i < 256; i++) - for (j = 0; j < 256; j++) - SIMPLE_MODEL(256,_init)(&byte_model[i*256+j], m); - SIMPLE_MODEL(256,_) byte_model1[256]; - for (i = 0; i < 256; i++) - SIMPLE_MODEL(256,_init)(&byte_model1[i], m); - - RangeCoder rc; - RC_SetOutput(&rc, (char *)out+1); - RC_StartEncode(&rc); - - unsigned char last1 = 0, last2 = 0; - for (i = 0; i < in_size; i++) { - // Use Order-1 is order-2 isn't sufficiently advanced yet (75+ symbols) - if (byte_model[last1*256+last2].TotFreq <= m+75*16) { - SIMPLE_MODEL(256, _encodeSymbol)(&byte_model1[last1], &rc, in[i]); - SIMPLE_MODEL(256, _updateSymbol)(&byte_model[last1*256 + last2], &rc, in[i]); - } else { - SIMPLE_MODEL(256, _encodeSymbol)(&byte_model[last1*256 + last2], &rc, in[i]); - //SIMPLE_MODEL(256, _updateSymbol)(&byte_model1[last1], &rc, in[i]); - } - last2 = last1; - last1 = in[i]; - } - - free(byte_model); - RC_FinishEncode(&rc); - - // Finalise block size and return it - *out_size = RC_OutSize(&rc)+1; - - return out; -} -#endif - -unsigned char *arith_uncompress_O2(unsigned char *in, unsigned int in_size, - unsigned char *out, unsigned int out_sz) { - RangeCoder rc; - - SIMPLE_MODEL(256,_) *byte_model; - byte_model = malloc(256*256*sizeof(*byte_model)); - unsigned int m = in[0] ? in[0] : 256, i, j; - for (i = 0; i < 256; i++) - for (j = 0; j < 256; j++) - SIMPLE_MODEL(256,_init)(&byte_model[i*256+j], m); - - if (!out) - out = malloc(out_sz); - if (!out) - return NULL; - - RC_SetInput(&rc, (char *)in+1, (char *)in+in_size); - RC_StartDecode(&rc); - - unsigned char last1 = 0, last2 = 0; - for (i = 0; i < out_sz; i++) { - out[i] = SIMPLE_MODEL(256, _decodeSymbol)(&byte_model[last1*256 + last2], &rc); - last2 = last1; - last1 = out[i]; - } - - free(byte_model); - RC_FinishDecode(&rc); - - return out; -} - -#endif // Disable O2 -/*----------------------------------------------------------------------------- - */ - -#undef NSYM -#define NSYM 258 -#include "c_simple_model.h" -#define MAX_RUN 4 - -static -unsigned char *arith_compress_O0_RLE(unsigned char *in, unsigned int in_size, - unsigned char *out, unsigned int *out_size) { - int i, bound = arith_compress_bound(in_size,0)-5; // -5 for order/size - unsigned char *out_free = NULL; - - if (!out) { - *out_size = bound; - out_free = out = malloc(*out_size); - } - if (!out || bound > *out_size) - return NULL; - - unsigned int m = 0; - for (i = 0; i < in_size; i++) - if (m < in[i]) - m = in[i]; - m++; - *out = m; - - SIMPLE_MODEL(256,_) byte_model; - SIMPLE_MODEL(256,_init)(&byte_model, m); - - SIMPLE_MODEL(NSYM,_) *run_model = - htscodecs_tls_alloc(NSYM * sizeof(*run_model)); - if (!run_model) { - free(out_free); - return NULL; - } - - for (i = 0; i < NSYM; i++) - SIMPLE_MODEL(NSYM,_init)(&run_model[i], MAX_RUN); - - RangeCoder rc; - RC_SetOutput(&rc, (char *)out+1); - RC_StartEncode(&rc); - - unsigned char last = 0; - for (i = 0; i < in_size;) { - //SIMPLE_MODEL(256, _encodeSymbol)(&byte_model, &rc, in[i]); - SIMPLE_MODEL(256, _encodeSymbol)(&byte_model, &rc, in[i]); - //fprintf(stderr, "lit %c (ctx %c)\n", in[i], last); - int run = 0; - last = in[i++]; - while (i < in_size && in[i] == last/* && run < MAX_RUN-1*/) - run++, i++; - int rctx = last; - do { - int c = run < MAX_RUN ? run : MAX_RUN-1; - SIMPLE_MODEL(NSYM, _encodeSymbol)(&run_model[rctx], &rc, c); - run -= c; - - if (rctx == last) - rctx = 256; - else - rctx += (rctx < NSYM-1); - if (c == MAX_RUN-1 && run == 0) - SIMPLE_MODEL(NSYM, _encodeSymbol)(&run_model[rctx], &rc, 0); - } while (run); - } - - RC_FinishEncode(&rc); - - // Finalise block size and return it - *out_size = RC_OutSize(&rc)+1; - - //fprintf(stderr, "RLE %d to %d\n", in_size, *out_size); - - htscodecs_tls_free(run_model); - return out; -} - -static -unsigned char *arith_uncompress_O0_RLE(unsigned char *in, unsigned int in_size, - unsigned char *out, unsigned int out_sz) { - RangeCoder rc; - int i; - unsigned int m = in[0] ? in[0] : 256; - unsigned char *out_free = NULL; - - if (!out) - out_free = out = malloc(out_sz); - if (!out) - return NULL; - - SIMPLE_MODEL(256,_) byte_model; - SIMPLE_MODEL(256,_init)(&byte_model, m); - - SIMPLE_MODEL(NSYM,_) *run_model = - htscodecs_tls_alloc(NSYM * sizeof(*run_model)); - if (!run_model) { - free(out_free); - return NULL; - } - - for (i = 0; i < NSYM; i++) - SIMPLE_MODEL(NSYM,_init)(&run_model[i], MAX_RUN); - - RC_SetInput(&rc, (char *)in+1, (char *)in+in_size); - RC_StartDecode(&rc); - - for (i = 0; i < out_sz; i++) { - unsigned char last; - last = out[i] = SIMPLE_MODEL(256, _decodeSymbol)(&byte_model, &rc); - //fprintf(stderr, "lit %c\n", last); - int run = 0, r = 0, rctx = out[i]; - do { - r = SIMPLE_MODEL(NSYM, _decodeSymbol)(&run_model[rctx], &rc); - if (rctx == last) - rctx = 256; - else - rctx += (rctx < NSYM-1); - //fprintf(stderr, "run %d (ctx %d, %d)\n", r, last, l); - run += r; - } while (r == MAX_RUN-1 && run < out_sz); - while (run-- && i+1 < out_sz) - out[++i] = last; - } - - RC_FinishDecode(&rc); - - htscodecs_tls_free(run_model); - return out; -} - -static -unsigned char *arith_compress_O1_RLE(unsigned char *in, unsigned int in_size, - unsigned char *out, unsigned int *out_size) { - int i, bound = arith_compress_bound(in_size,0)-5; // -5 for order/size - unsigned char *out_free = NULL; - - if (!out) { - *out_size = bound; - out_free = out = malloc(*out_size); - } - if (!out || bound > *out_size) - return NULL; - - unsigned int m = 0; - for (i = 0; i < in_size; i++) - if (m < in[i]) - m = in[i]; - m++; - *out = m; - - SIMPLE_MODEL(256,_) *byte_model = - htscodecs_tls_alloc(256 * sizeof(*byte_model)); - if (!byte_model) { - free(out_free); - return NULL; - } - for (i = 0; i < 256; i++) - SIMPLE_MODEL(256,_init)(&byte_model[i], m); - - SIMPLE_MODEL(NSYM,_) *run_model = - htscodecs_tls_alloc(NSYM * sizeof(*run_model)); - if (!run_model) { - htscodecs_tls_free(byte_model); - free(out_free); - return NULL; - } - for (i = 0; i < NSYM; i++) - SIMPLE_MODEL(NSYM,_init)(&run_model[i], MAX_RUN); - - RangeCoder rc; - RC_SetOutput(&rc, (char *)out+1); - RC_StartEncode(&rc); - - unsigned char last = 0; - for (i = 0; i < in_size;) { - //SIMPLE_MODEL(256, _encodeSymbol)(&byte_model, &rc, in[i]); - SIMPLE_MODEL(256, _encodeSymbol)(&byte_model[last], &rc, in[i]); - //fprintf(stderr, "lit %c (ctx %c)\n", in[i], last); - int run = 0; - last = in[i++]; - while (i < in_size && in[i] == last/* && run < MAX_RUN-1*/) - run++, i++; - int rctx = last; - do { - int c = run < MAX_RUN ? run : MAX_RUN-1; - SIMPLE_MODEL(NSYM, _encodeSymbol)(&run_model[rctx], &rc, c); - run -= c; - - if (rctx == last) - rctx = 256; - else - rctx += (rctx < NSYM-1); - if (c == MAX_RUN-1 && run == 0) - SIMPLE_MODEL(NSYM, _encodeSymbol)(&run_model[rctx], &rc, 0); - } while (run); - } - - RC_FinishEncode(&rc); - - // Finalise block size and return it - *out_size = RC_OutSize(&rc)+1; - - //fprintf(stderr, "RLE %d to %d\n", in_size, *out_size); - - htscodecs_tls_free(byte_model); - htscodecs_tls_free(run_model); - return out; -} - -static -unsigned char *arith_uncompress_O1_RLE(unsigned char *in, unsigned int in_size, - unsigned char *out, unsigned int out_sz) { - RangeCoder rc; - int i; - unsigned int m = in[0] ? in[0] : 256; - unsigned char *out_free = NULL; - - if (!out) - out_free = out = malloc(out_sz); - if (!out) - return NULL; - - SIMPLE_MODEL(256,_) *byte_model = - htscodecs_tls_alloc(256 * sizeof(*byte_model)); - if (!byte_model) { - free(out_free); - return NULL; - } - for (i = 0; i < 256; i++) - SIMPLE_MODEL(256,_init)(&byte_model[i], m); - - SIMPLE_MODEL(NSYM,_) *run_model = - htscodecs_tls_alloc(NSYM * sizeof(*run_model)); - if (!run_model) { - htscodecs_tls_free(byte_model); - free(out_free); - return NULL; - } - for (i = 0; i < NSYM; i++) - SIMPLE_MODEL(NSYM,_init)(&run_model[i], MAX_RUN); - - RC_SetInput(&rc, (char *)in+1, (char *)in+in_size); - RC_StartDecode(&rc); - - unsigned char last = 0; - for (i = 0; i < out_sz; i++) { - out[i] = SIMPLE_MODEL(256, _decodeSymbol)(&byte_model[last], &rc); - //fprintf(stderr, "lit %c (ctx %c)\n", out[i], last); - last = out[i]; - int run = 0, r = 0, rctx = last; - - do { - r = SIMPLE_MODEL(NSYM, _decodeSymbol)(&run_model[rctx], &rc); - if (rctx == last) - rctx = 256; - else - rctx += (rctx < NSYM-1); - run += r; - } while (r == MAX_RUN-1 && run < out_sz); - while (run-- && i+1 < out_sz) - out[++i] = last; - } - - RC_FinishDecode(&rc); - - htscodecs_tls_free(byte_model); - htscodecs_tls_free(run_model); - return out; -} - -/*----------------------------------------------------------------------------- - * Simple interface to the order-0 vs order-1 encoders and decoders. - * - * Smallest is method, , so worst case 2 bytes longer. - */ -unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size, - unsigned char *out, unsigned int *out_size, - int order) { - unsigned int c_meta_len; - uint8_t *rle = NULL, *packed = NULL; - - if (in_size > INT_MAX) { - *out_size = 0; - return NULL; - } - - if (!out) { - *out_size = arith_compress_bound(in_size, order); - if (!(out = malloc(*out_size))) - return NULL; - } - unsigned char *out_end = out + *out_size; - - if (in_size <= 20) - order &= ~X_STRIPE; - - if (order & X_CAT) { - out[0] = X_CAT; - c_meta_len = 1 + var_put_u32(&out[1], out_end, in_size); - memcpy(out+c_meta_len, in, in_size); - *out_size = in_size+c_meta_len; - } - - if (order & X_STRIPE) { - int N = (order>>8); - if (N == 0) N = 4; // default for compatibility with old tests - - if (N > 255) - return NULL; - - unsigned char *transposed = malloc(in_size); - unsigned int part_len[256]; - unsigned int idx[256]; - if (!transposed) - return NULL; - int i, j, x; - - for (i = 0; i < N; i++) { - part_len[i] = in_size / N + ((in_size % N) > i); - idx[i] = i ? idx[i-1] + part_len[i-1] : 0; // cumulative index - } - - for (i = x = 0; i < in_size-N; i += N, x++) { - for (j = 0; j < N; j++) - transposed[idx[j]+x] = in[i+j]; - } - for (; i < in_size; i += N, x++) { - for (j = 0; i+j < in_size; j++) - transposed[idx[j]+x] = in[i+j]; - } - - unsigned int olen2; - unsigned char *out2, *out2_start; - c_meta_len = 1; - *out = order & ~X_NOSZ; - c_meta_len += var_put_u32(out+c_meta_len, out_end, in_size); - out[c_meta_len++] = N; - - out2_start = out2 = out+7+5*N; // shares a buffer with c_meta - for (i = 0; i < N; i++) { - // Brute force try all methods. - // FIXME: optimise this bit. Maybe learn over time? - int j, best_j = 0, best_sz = INT_MAX; - - // Works OK with read names. The first byte is the most important, - // as it has most variability (little-endian). After that it's - // often quite predictable. - // - // Do we gain in any other context in CRAM? Aux tags maybe? - int m[][4] = {{3, 1,64,0}, - {2, 1,0}, - {2, 1,128}, - {2, 1,128}}; - -// int m[][6] = {{4, 1,64,2,0}, //test of adding in an order-2 codec -// {3, 1,2,0}, -// {3, 1,2,128}, -// {3, 1,2,128}}; - -// Other possibilities for methods to try. -// int m[][10] = {{8, 1,128,129,64,65,192,193,4,0}, -// {8, 1,128,129,64,65,192,193,4,0}, -// {8, 1,128,129,64,65,192,193,4,0}, -// {8, 1,128,129,64,65,192,193,4,0}}; - -// int m[][9] = {{5, 1,128,64,65,0}, -// {5, 1,128,64,65,0}, -// {5, 1,128,64,65,0}, -// {5, 1,128,64,65,0}}; - -// int m[][6] = {{4, 0,1,128,64}, -// {5, 0,1,128,65,193}, -// {3, 0,1,128}, -// {3, 0,1,128}}; - -// int m[][6] = {{4, 1,128,64,0}, -// {4, 1,128,65,0}, -// {2, 128,0}, -// {2, 128,0}}; - -// int m[][6] = {{2, 64,0}, -// {1, 0}, -// {1, 128}, -// {1, 128}}; - -// int m[][6] = {{1, 0}, -// {2, 128,0}, -// {1, 128}, -// {1, 128}}; - - for (j = 1; j <= m[MIN(i,3)][0]; j++) { - olen2 = *out_size - (out2 - out); - //fprintf(stderr, "order=%d m=%d\n", order&3, m[MIN(i,4)][j]); - if ((order&3) == 0 && (m[MIN(i,3)][j]&1)) - continue; - - arith_compress_to(transposed+idx[i], part_len[i], - out2, &olen2, m[MIN(i,3)][j] | X_NOSZ); - if (best_sz > olen2) { - best_sz = olen2; - best_j = j; - } - } -// if (best_j == 0) // none desireable -// return NULL; - if (best_j != j-1) { - olen2 = *out_size - (out2 - out); - arith_compress_to(transposed+idx[i], part_len[i], - out2, &olen2, m[MIN(i,3)][best_j] | X_NOSZ); - } - out2 += olen2; - c_meta_len += var_put_u32(out+c_meta_len, out_end, olen2); - } - memmove(out+c_meta_len, out2_start, out2-out2_start); - free(transposed); - *out_size = c_meta_len + out2-out2_start; - return out; - } - - int do_pack = order & X_PACK; - int do_rle = order & X_RLE; - int no_size = order & X_NOSZ; - int do_ext = order & X_EXT; - - out[0] = order; - c_meta_len = 1; - - if (!no_size) - c_meta_len += var_put_u32(&out[1], out_end, in_size); - - order &= 0x3; - - // Format is compressed meta-data, compressed data. - // Meta-data can be empty, pack, rle lengths, or pack + rle lengths. - // Data is either the original data, bit-packed packed, rle literals or - // packed + rle literals. - - if (do_pack && in_size) { - // PACK 2, 4 or 8 symbols into one byte. - int pmeta_len; - uint64_t packed_len; - packed = hts_pack(in, in_size, out+c_meta_len, &pmeta_len, &packed_len); - if (!packed) { - out[0] &= ~X_PACK; - do_pack = 0; - free(packed); - packed = NULL; - } else { - in = packed; - in_size = packed_len; - c_meta_len += pmeta_len; - - // Could derive this rather than storing verbatim. - // Orig size * 8/nbits (+1 if not multiple of 8/n) - int sz = var_put_u32(out+c_meta_len, out_end, in_size); - c_meta_len += sz; - *out_size -= sz; - } - } else if (do_pack) { - out[0] &= ~X_PACK; - } - - if (do_rle && !in_size) { - out[0] &= ~X_RLE; - } - - *out_size -= c_meta_len; - if (order && in_size < 8) { - out[0] &= ~3; - order &= ~3; - } - - if (do_ext) { - // Use an external compression library instead. - // For now, bzip2 -#ifdef HAVE_LIBBZ2 - if (BZ_OK != BZ2_bzBuffToBuffCompress((char *)out+c_meta_len, out_size, - (char *)in, in_size, 9, 0, 30)) - *out_size = in_size; // Didn't fit with bz2; force X_CAT below instead -#else - fprintf(stderr, "Htscodecs has been compiled without libbz2 support\n"); - free(out); - return NULL; -#endif - -// // lzma doesn't help generally, at least not for the name tokeniser -// size_t lzma_size = 0; -// lzma_easy_buffer_encode(9, LZMA_CHECK_CRC32, NULL, -// in, in_size, out+c_meta_len, &lzma_size, -// *out_size); -// *out_size = lzma_size; - - } else { - if (do_rle) { - if (order == 0) - arith_compress_O0_RLE(in, in_size, out+c_meta_len, out_size); - else - arith_compress_O1_RLE(in, in_size, out+c_meta_len, out_size); - } else { - //if (order == 2) - // arith_compress_O2(in, in_size, out+c_meta_len, out_size); - //else - if (order == 1) - arith_compress_O1(in, in_size, out+c_meta_len, out_size); - else - arith_compress_O0(in, in_size, out+c_meta_len, out_size); - } - } - - if (*out_size >= in_size) { - out[0] &= ~(3|X_EXT); // no entropy encoding, but keep e.g. PACK - out[0] |= X_CAT | no_size; - memcpy(out+c_meta_len, in, in_size); - *out_size = in_size; - } - - free(rle); - free(packed); - - *out_size += c_meta_len; - - return out; -} - -unsigned char *arith_compress(unsigned char *in, unsigned int in_size, - unsigned int *out_size, int order) { - return arith_compress_to(in, in_size, NULL, out_size, order); -} - -unsigned char *arith_uncompress_to(unsigned char *in, unsigned int in_size, - unsigned char *out, unsigned int *out_size) { - unsigned char *in_end = in + in_size; - unsigned char *out_free = NULL; - unsigned char *tmp_free = NULL; - - if (in_size == 0) - return NULL; - - if (*in & X_STRIPE) { - unsigned int ulen, olen, c_meta_len = 1; - int i; - uint64_t clen_tot = 0; - - // Decode lengths - c_meta_len += var_get_u32(in+c_meta_len, in_end, &ulen); - if (c_meta_len >= in_size) - return NULL; - unsigned int N = in[c_meta_len++]; - if (N < 1) // Must be at least one stripe - return NULL; - unsigned int clenN[256], ulenN[256], idxN[256]; - if (!out) { - if (ulen >= INT_MAX) - return NULL; - if (!(out_free = out = malloc(ulen))) { - return NULL; - } - *out_size = ulen; - } - if (ulen != *out_size) { - free(out_free); - return NULL; - } - - for (i = 0; i < N; i++) { - ulenN[i] = ulen / N + ((ulen % N) > i); - idxN[i] = i ? idxN[i-1] + ulenN[i-1] : 0; - c_meta_len += var_get_u32(in+c_meta_len, in_end, &clenN[i]); - clen_tot += clenN[i]; - if (c_meta_len > in_size || clenN[i] > in_size || clenN[i] < 1) { - free(out_free); - return NULL; - } - } - - // We can call this with a larger buffer, but once we've determined - // how much we really use we limit it so the recursion becomes easier - // to limit. - if (c_meta_len + clen_tot > in_size) { - free(out_free); - return NULL; - } - in_size = c_meta_len + clen_tot; - - //fprintf(stderr, " stripe meta %d\n", c_meta_len); //c-size - - // Uncompress the N streams - unsigned char *outN = malloc(ulen); - if (!outN) { - free(out_free); - return NULL; - } - for (i = 0; i < N; i++) { - olen = ulenN[i]; - if (in_size < c_meta_len) { - free(out_free); - free(outN); - return NULL; - } - if (!arith_uncompress_to(in+c_meta_len, in_size-c_meta_len, outN + idxN[i], &olen) - || olen != ulenN[i]) { - free(out_free); - free(outN); - return NULL; - } - c_meta_len += clenN[i]; - } - - unstripe(out, outN, ulen, N, idxN); - - free(outN); - *out_size = ulen; - return out; - } - - int order = *in++; in_size--; - int do_pack = order & X_PACK; - int do_rle = order & X_RLE; - int do_cat = order & X_CAT; - int no_size = order & X_NOSZ; - int do_ext = order & X_EXT; - order &= 3; - - int sz = 0; - unsigned int osz; - if (!no_size) - sz = var_get_u32(in, in_end, &osz); - else - sz = 0, osz = *out_size; - in += sz; - in_size -= sz; - - if (osz >= INT_MAX) - return NULL; - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - // Limit maximum size to get fast turnaround on fuzzing test cases - if (osz > 100000) - goto err; -#endif - - if (no_size && !out) - return NULL; // Need one or the other - - if (!out) { - *out_size = osz; - if (!(out_free = out = malloc(*out_size))) - return NULL; - } else { - if (*out_size < osz) - return NULL; - *out_size = osz; - } - - uint32_t c_meta_size = 0; - unsigned int tmp1_size = *out_size; - unsigned int tmp2_size = *out_size; - unsigned char *tmp1 = NULL, *tmp2 = NULL, *tmp = NULL; - - // Need In, Out and Tmp buffers with temporary buffer of the same size - // as output. Our entropy decode is either arithmetic (with/without RLE) - // or external (bz2, gzip, lzma) but with an optional unPACK transform - // at the end. - // - // To avoid pointless memcpy when unpacking we switch around which - // buffers we're writing to accordingly. - - // Format is pack meta data if present, followed by compressed data. - if (do_pack) { - if (!(tmp_free = tmp = malloc(*out_size))) - goto err; - tmp1 = tmp; // uncompress - tmp2 = out; // unpack - } else { - // no pack - tmp = NULL; - tmp1 = out; // uncompress - tmp2 = out; // NOP - } - - - // Decode the bit-packing map. - uint8_t map[16] = {0}; - int npacked_sym = 0; - uint64_t unpacked_sz = 0; // FIXME: rename to packed_per_byte - if (do_pack) { - c_meta_size = hts_unpack_meta(in, in_size, *out_size, map, &npacked_sym); - if (c_meta_size == 0) - goto err; - - unpacked_sz = osz; - in += c_meta_size; - in_size -= c_meta_size; - - // New unpacked size. We could derive this bit from *out_size - // and npacked_sym. - unsigned int osz; - sz = var_get_u32(in, in_end, &osz); - in += sz; - in_size -= sz; - if (osz > tmp1_size) - goto err; - tmp1_size = osz; - } - - //fprintf(stderr, " meta_size %d bytes\n", (int)(in - orig_in)); //c-size - - // uncompress RLE data. in -> tmp1 - if (in_size) { -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - // Limit maximum size to get fast turnaround on fuzzing test cases - if (tmp1_size > 100000) - goto err; -#endif - if (do_cat) { - //fprintf(stderr, " CAT %d\n", tmp1_size); //c-size - if (tmp1_size > in_size) - goto err; - if (tmp1_size > *out_size) - goto err; - memcpy(tmp1, in, tmp1_size); - } else if (do_ext) { -#ifdef HAVE_LIBBZ2 - if (BZ_OK != BZ2_bzBuffToBuffDecompress((char *)tmp1, &tmp1_size, - (char *)in, in_size, 0, 0)) - goto err; -#else - fprintf(stderr, "Htscodecs has been compiled without libbz2 support\n"); - goto err; -#endif - } else { - // in -> tmp1 - if (do_rle) { - tmp1 = order == 1 - ? arith_uncompress_O1_RLE(in, in_size, tmp1, tmp1_size) - : arith_uncompress_O0_RLE(in, in_size, tmp1, tmp1_size); - } else { - //if (order == 2) - // tmp1 = arith_uncompress_O2(in, in_size, tmp1, tmp1_size) - //else - tmp1 = order == 1 - ? arith_uncompress_O1(in, in_size, tmp1, tmp1_size) - : arith_uncompress_O0(in, in_size, tmp1, tmp1_size); - } - if (!tmp1) - goto err; - } - } else { - tmp1_size = 0; - } - - if (do_pack) { - // Unpack bits via pack-map. tmp1 -> tmp2 - if (npacked_sym == 1) - unpacked_sz = tmp1_size; - //uint8_t *porig = unpack(tmp2, tmp2_size, unpacked_sz, npacked_sym, map); - //memcpy(tmp3, porig, unpacked_sz); - if (!hts_unpack(tmp1, tmp1_size, tmp2, unpacked_sz, npacked_sym, map)) - goto err; - tmp2_size = unpacked_sz; - } else { - tmp2_size = tmp1_size; - } - - if (tmp) - free(tmp); - - *out_size = tmp2_size; - return tmp2; - - err: - free(tmp_free); - free(out_free); - return NULL; -} - -unsigned char *arith_uncompress(unsigned char *in, unsigned int in_size, - unsigned int *out_size) { - return arith_uncompress_to(in, in_size, NULL, out_size); -} diff --git a/src/htslib-1.18/htscodecs/htscodecs/c_range_coder.h b/src/htslib-1.18/htscodecs/htscodecs/c_range_coder.h deleted file mode 100644 index 3ee3977..0000000 --- a/src/htslib-1.18/htscodecs/htscodecs/c_range_coder.h +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright Eugene Shelwien. -// Release into public domain. - -// Modifications by James Bonfield (2019) - - -/* - * Note it is up to the calling code to ensure that no overruns on input and - * output buffers occur. - * - * Call the input() and output() functions to set and query the current - * buffer locations. - * - - */ - -#ifndef C_RANGER_CODER_H -#define C_RANGER_CODER_H - -#define DO(n) int _;for (_=0; _out_buf = rc->in_buf = (uc *)in; - rc->in_end = (uc *)in_end; -} -static inline void RC_SetOutput(RangeCoder *rc, char *out) { rc->in_buf = rc->out_buf = (uc *)out; } -static inline char *RC_GetInput(RangeCoder *rc) { return (char *)rc->in_buf; } -static inline char *RC_GetOutput(RangeCoder *rc) { return (char *)rc->out_buf; } -static inline size_t RC_OutSize(RangeCoder *rc) { return rc->out_buf - rc->in_buf; } -static inline size_t RC_InSize(RangeCoder *rc) { return rc->in_buf - rc->out_buf; } - -static inline void RC_StartEncode(RangeCoder *rc) -{ - rc->range = 0xFFFFFFFF; - rc->low = 0; - rc->FFNum = 0; - rc->Carry = 0; - rc->Cache = 0; - rc->code = 0; -} - -static inline void RC_StartDecode(RangeCoder *rc) -{ - rc->range = 0xFFFFFFFF; - rc->low = 0; - rc->FFNum = 0; - rc->Carry = 0; - rc->Cache = 0; - rc->code = 0; - if (rc->in_buf+5 > rc->in_end) { - rc->in_buf = rc->in_end; // prevent decode - return; - } - DO(5) rc->code = (rc->code<<8) | *rc->in_buf++; -} - -static inline void RC_ShiftLow(RangeCoder *rc) { - if (rc->low < Thres || rc->Carry) { - *rc->out_buf++ = rc->Cache + rc->Carry; - - // Flush any stored FFs - while (rc->FFNum) { - *rc->out_buf++ = rc->Carry-1; // (Carry-1)&255; - rc->FFNum--; - } - - // Take copy of top byte ready for next flush - rc->Cache = rc->low >> 24; - rc->Carry = 0; - } else { - // Low if FFxx xxxx. Bump FF count and shift in as before - rc->FFNum++; - } - rc->low = rc->low<<8; -} - -static inline void RC_FinishEncode(RangeCoder *rc) -{ - DO(5) RC_ShiftLow(rc); -} - -static inline void RC_FinishDecode(RangeCoder *rc) {} - -static inline void RC_Encode (RangeCoder *rc, uint32_t cumFreq, uint32_t freq, uint32_t totFreq) -{ - uint32_t tmp = rc->low; - rc->low += cumFreq * (rc->range/= totFreq); - rc->range*= freq; - - rc->Carry += rc->lowrange < TOP) { - rc->range <<= 8; - RC_ShiftLow(rc); - } -} - -static inline uint32_t RC_GetFreq (RangeCoder *rc, uint32_t totFreq) { - //return rc->code/(rc->range/=totFreq); - return (totFreq && rc->range >= totFreq) ? rc->code/(rc->range/=totFreq) : 0; -} - -static inline void RC_Decode (RangeCoder *rc, uint32_t cumFreq, uint32_t freq, uint32_t totFreq) -{ - rc->code -= cumFreq * rc->range; - rc->range *= freq; - while (rc->range < TOP) { - if (rc->in_buf >= rc->in_end) - return; // FIXME: could signal error, instead of caller just generating nonsense - rc->code = (rc->code<<8) + *rc->in_buf++; - rc->range <<= 8; - } -} - -#endif /* C_RANGER_CODER_H */ diff --git a/src/htslib-1.18/htscodecs/htscodecs/fqzcomp_qual.c b/src/htslib-1.18/htscodecs/htscodecs/fqzcomp_qual.c deleted file mode 100644 index c1d40ad..0000000 --- a/src/htslib-1.18/htscodecs/htscodecs/fqzcomp_qual.c +++ /dev/null @@ -1,1590 +0,0 @@ -/* - * Copyright (c) 2011-2013, 2018-2022 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -// We use generic maps to turn 0-M into 0-N where N <= M -// before adding these into the context. These are used -// for positions, running-diffs and quality values. -// -// This can be used as a simple divisor, eg pos/24 to get -// 2 bits of positional data for each quarter along a 100bp -// read, or it can be tailored for specific such as noting -// the first 5 cycles are poor, then we have stability and -// a gradual drop off in the last 20 or so. Perhaps we then -// map pos 0-4=0, 5-79=1, 80-89=2, 90-99=3. -// -// We don't need to specify how many bits of data we are -// using (2 in the above example), as that is just implicit -// in the values in the map. Specify not to use a map simply -// disables that context type (our map is essentially 0-M -> 0). - -// Example of command line usage: -// -// f=~/scratch/data/q4 -// cc -Wall -DTEST_MAIN -O3 -g fqzcomp_qual2.c -lm -// ./a.out $f > /tmp/_ && ./a.out -d < /tmp/_ > /tmp/__ && cmp /tmp/__ $f - -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "fqzcomp_qual.h" -#include "varint.h" -#include "utils.h" - -#define CTX_BITS 16 -#define CTX_SIZE (1<(b)?(a):(b)) -#endif - -#define QMAX 256 -#define QBITS 12 -#define QSIZE (1< 255 therefore means we need to repeatedly read to find -// the actual run length. -// Alternatively we could bit-encode instead of byte encode, eg BETA. -static int store_array(unsigned char *out, unsigned int *array, int size) { - unsigned char tmp[2048]; - - int i, j, k; - for (i = j = k = 0; i < size; j++) { - int run_len = i; - while (i < size && array[i] == j) - i++; - run_len = i-run_len; - - int r; - do { - r = MIN(255, run_len); - tmp[k++] = r; - run_len -= r; - } while (r == 255); - } - while (i < size) - tmp[k++] = 0, j++; - - // RLE on out. - // 1 2 3 3 3 3 3 4 4 5 - // => 1 2 3 3 +3... 4 4 +0 5 - int last = -1; - for (i = j = 0; j < k; i++) { - out[i] = tmp[j++]; - if (out[i] == last) { - int n = j; - while (j < k && tmp[j] == last) - j++; - out[++i] = j-n; - } else { - last = out[i]; - } - } - k = i; - -// fprintf(stderr, "Store_array %d => %d {", size, k); -// for (i = 0; i < k; i++) -// fprintf(stderr, "%d,", out[i]); -// fprintf(stderr, "}\n"); - return k; -} - -static int read_array(unsigned char *in, size_t in_size, unsigned int *array, int size) { - unsigned char R[1024]; - int i, j, z, last = -1, nb = 0; - - size = MIN(1024, size); - - // Remove level one of run-len encoding - for (i = j = z = 0; z < size && i < in_size; i++) { - int run = in[i]; - R[j++] = run; - z += run; - if (run == last) { - if (i+1 >= in_size) - return -1; - int copy = in[++i]; - z += run * copy; - while (copy-- && z <= size && j < 1024) - R[j++] = run; - } - if (j >= 1024) - return -1; - last = run; - } - nb = i; - - // Now expand inner level of run-length encoding - int R_max = j; - for (i = j = z = 0; j < size; i++) { - int run_len = 0; - int run_part; - if (z >= R_max) - return -1; - do { - run_part = R[z++]; - run_len += run_part; - } while (run_part == 255 && z < R_max); - if (run_part == 255) - return -1; - - while (run_len && j < size) - run_len--, array[j++] = i; - } - - return nb; -} - -// FIXME: how to auto-tune these rather than trial and error? -// r2 = READ2 -// qa = qual avg (0, 2, 4) -static int strat_opts[][12] = { -// qb qs pb ps db ds ql sl pl dl r2 qa - {10, 5, 4,-1, 2, 1, 0, 14, 10, 14, 0,-1}, // basic options (level < 7) - {8, 5, 7, 0, 0, 0, 0, 14, 8, 14, 1,-1}, // e.g. HiSeq 2000 - {12, 6, 2, 0, 2, 3, 0, 9, 12, 14, 0, 0}, // e.g. MiSeq - {12, 6, 0, 0, 0, 0, 0, 12, 0, 0, 0, 0}, // e.g. IonTorrent; adaptive O1 - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // custom -}; -static int nstrats = sizeof(strat_opts) / sizeof(*strat_opts); - -#ifdef HAVE_BUILTIN_PREFETCH -static inline void mm_prefetch(void *x) { - __builtin_prefetch(x); -} -#else -static inline void mm_prefetch(void *x) { - // Fetch and discard is quite close to a genuine prefetch - *(volatile int *)x; -} -#endif - -typedef struct { - unsigned int qctx; // quality sub-context - unsigned int p; // pos (bytes remaining) - unsigned int delta; // delta running total - unsigned int prevq; // previous quality - unsigned int s; // selector - unsigned int qtot, qlen; - unsigned int first_len; - unsigned int last_len; - ssize_t rec; - unsigned int ctx; -} fqz_state; - -static void dump_table(unsigned int *tab, int size, char *name) { - int i, last = -99, run = 0; - fprintf(stderr, "\t%s\t{", name); - for (i = 0; i < size; i++) { - if (tab[i] == last) { - run++; - } else if (run == 1 && tab[i] == last+1) { - int first = last; - do { - last = tab[i]; - i++; - } while (i < size && tab[i] == last+1); - i--; - - // Want 0,1,2,3,3,3 as 0..2 3x3, not 0..3 3x2 - if (tab[i] == tab[i+1]) - i--; - if (tab[i] != first) - fprintf(stderr, "..%d", tab[i]); - run = 1; - last = -99; - } else { - if (run > 1) - fprintf(stderr, " x %d%s%d", run, i?", ":"", tab[i]); - else - fprintf(stderr, "%s%d", i?", ":"", tab[i]); - run = 1; - last = tab[i]; - } - } - if (run > 1) - fprintf(stderr, " x %d", run); - fprintf(stderr, "}\n"); -} - -static void dump_map(unsigned int *map, int size, char *name) { - int i, c = 0; - fprintf(stderr, "\t%s\t{", name); - for (i = 0; i < size; i++) - if (map[i] != INT_MAX) - fprintf(stderr, "%s%d=%d", c++?", ":"", i, map[i]); - fprintf(stderr, "}\n"); -} - -/* #pragma GCC diagnostic ignored "-Wunused-function" */ -static void dump_params(fqz_gparams *gp) { - fprintf(stderr, "Global params = {\n"); - fprintf(stderr, "\tvers\t%d\n", gp->vers); - fprintf(stderr, "\tgflags\t0x%02x\n", gp->gflags); - fprintf(stderr, "\tnparam\t%d\n", gp->nparam); - fprintf(stderr, "\tmax_sel\t%d\n", gp->max_sel); - fprintf(stderr, "\tmax_sym\t%d\n", gp->max_sym); - if (gp->gflags & GFLAG_HAVE_STAB) - dump_table(gp->stab, 256, "stab"); - fprintf(stderr, "}\n"); - - int i; - for (i = 0; i < gp->nparam; i++) { - fqz_param *pm = &gp->p[i]; - fprintf(stderr, "\nParam[%d] = {\n", i); - fprintf(stderr, "\tcontext\t0x%04x\n", pm->context); - fprintf(stderr, "\tpflags\t0x%02x\n", pm->pflags); - fprintf(stderr, "\tmax_sym\t%d\n", pm->max_sym); - fprintf(stderr, "\tqbits\t%d\n", pm->qbits); - fprintf(stderr, "\tqshift\t%d\n", pm->qshift); - fprintf(stderr, "\tqloc\t%d\n", pm->qloc); - fprintf(stderr, "\tsloc\t%d\n", pm->sloc); - fprintf(stderr, "\tploc\t%d\n", pm->ploc); - fprintf(stderr, "\tdloc\t%d\n", pm->dloc); - - if (pm->pflags & PFLAG_HAVE_QMAP) - dump_map(pm->qmap, 256, "qmap"); - - if (pm->pflags & PFLAG_HAVE_QTAB) - dump_table(pm->qtab, 256, "qtab"); - if (pm->pflags & PFLAG_HAVE_PTAB) - dump_table(pm->ptab, 1024, "ptab"); - if (pm->pflags & PFLAG_HAVE_DTAB) - dump_table(pm->dtab, 256, "dtab"); - fprintf(stderr, "}\n"); - } -} - -typedef struct { - SIMPLE_MODEL(QMAX,_) *qual; - SIMPLE_MODEL(256,_) len[4]; - SIMPLE_MODEL(2,_) revcomp; - SIMPLE_MODEL(256,_) sel; - SIMPLE_MODEL(2,_) dup; -} fqz_model; - -static int fqz_create_models(fqz_model *m, fqz_gparams *gp) { - int i; - - if (!(m->qual = htscodecs_tls_alloc(sizeof(*m->qual) * CTX_SIZE))) - return -1; - - for (i = 0; i < CTX_SIZE; i++) - SIMPLE_MODEL(QMAX,_init)(&m->qual[i], gp->max_sym+1); - - for (i = 0; i < 4; i++) - SIMPLE_MODEL(256,_init)(&m->len[i],256); - - SIMPLE_MODEL(2,_init)(&m->revcomp,2); - SIMPLE_MODEL(2,_init)(&m->dup,2); - if (gp->max_sel > 0) - SIMPLE_MODEL(256,_init)(&m->sel, gp->max_sel+1); - - return 0; -} - -static void fqz_destroy_models(fqz_model *m) { - htscodecs_tls_free(m->qual); -} - -static inline unsigned int fqz_update_ctx(fqz_param *pm, fqz_state *state, int q) { - unsigned int last = 0; // pm->context - state->qctx = (state->qctx << pm->qshift) + pm->qtab[q]; - last += (state->qctx & pm->qmask) << pm->qloc; - - // The final shifts have been factored into the tables already. - last += pm->ptab[MIN(1023, state->p)]; // << pm->ploc - last += pm->dtab[MIN(255, state->delta)]; // << pm->dloc - last += state->s << pm->sloc; - - // On the fly average is slow work. - // However it can be slightly better than using a selector bit - // as it's something we can compute on the fly and thus doesn't - // consume output bits for storing the selector itself. - // - // Q4 (novaseq.bam) - // qtot+=q*q -DQ1=8.84 -DQ2=8.51 -DQ3=7.70; 7203598 (-0.7%) - // qtot+=q -DQ1=2.96 -DQ2=2.85 -DQ3=2.69; 7207315 - // vs old delta; 7255614 (default params) - // vs 2 bit selector (no delta) 7203006 (-x 0x8261000e80) - // vs 2 bit selector (no delta) 7199153 (-x 0x7270000e70) -0.8% - // vs 2 bit selector (no delta) 7219668 (-x 0xa243000ea0) - //{ - // double qa = state->qtot / (state->qlen+.01); - // //fprintf(stderr, "%f\n", qa); - // int x = 0; - // if (qa>=Q1) x=3; - // else if (qa>=Q2) x=2; - // else if (qa>=Q3) x=1; - // else x=0; - // last += x << pm->dloc; // tmp reuse of delta pos - // state->qtot += q*q; - // state->qlen++; - //} - - // Only update delta after 1st base. - state->delta += (state->prevq != q); - state->prevq = q; - - state->p--; - - return last & (CTX_SIZE-1); -} - -// Build quality stats for qhist and set nsym, do_dedup and do_sel params. -// One_param is -1 to gather stats on all data, or >= 0 to gather data -// on one specific selector parameter. Used only in TEST_MAIN via -// fqz_manual_parameters at the moment. -void fqz_qual_stats(fqz_slice *s, - unsigned char *in, size_t in_size, - fqz_param *pm, - uint32_t qhist[256], - int one_param) { -#define NP 32 - uint32_t qhistb[NP][256] = {{0}}; // both - uint32_t qhist1[NP][256] = {{0}}; // READ1 only - uint32_t qhist2[NP][256] = {{0}}; // READ2 only - uint64_t t1[NP] = {0}; // Count for READ1 - uint64_t t2[NP] = {0}; // COUNT for READ2 - uint32_t avg[2560] = {0}; // Avg qual *and later* avg-to-selector map. - - int dir = 0; - int last_len = 0; - int do_dedup = 0; - size_t rec; - size_t i, j; - int num_rec = 0; - - // See what info we've been given. - // Do we have READ1 / READ2? - // Do we have selector hidden in the top bits of flag? - int max_sel = 0; - int has_r2 = 0; - for (rec = 0; rec < s->num_records; rec++) { - if (one_param >= 0 && (s->flags[rec] >> 16) != one_param) - continue; - num_rec++; - if (max_sel < (s->flags[rec] >> 16)) - max_sel = (s->flags[rec] >> 16); - if (s->flags[rec] & FQZ_FREAD2) - has_r2 = 1; - } - - // Dedup detection and histogram stats gathering - int *avg_qual = calloc((s->num_records+1), sizeof(int)); - if (!avg_qual) - return; - - rec = i = j = 0; - while (i < in_size) { - if (one_param >= 0 && (s->flags[rec] >> 16) != one_param) { - avg_qual[rec] = 0; - i += s->len[rec++]; - continue; - } - if (rec < s->num_records) { - j = s->len[rec]; - dir = s->flags[rec] & FQZ_FREAD2 ? 1 : 0; - if (i > 0 && j == last_len - && !memcmp(in+i-last_len, in+i, j)) - do_dedup++; // cache which records are dup? - } else { - j = in_size - i; - dir = 0; - } - last_len = j; - - uint32_t (*qh)[256] = dir ? qhist2 : qhist1; - uint64_t *th = dir ? t2 : t1; - - uint32_t tot = 0; - for (; i < in_size && j > 0; i++, j--) { - tot += in[i]; - qhist[in[i]]++; - qhistb[j & (NP-1)][in[i]]++; - qh[j & (NP-1)][in[i]]++; - th[j & (NP-1)]++; - } - tot = last_len ? (tot*10.0)/last_len+.5 : 0; - - avg_qual[rec] = tot; - avg[MIN(2559, tot)]++; - - rec++; - } - pm->do_dedup = ((rec+1)/(do_dedup+1) < 500); - - last_len = 0; - - // Unique symbol count - for (i = pm->max_sym = pm->nsym = 0; i < 256; i++) { - if (qhist[i]) - pm->max_sym = i, pm->nsym++; - } - - - // Auto tune: does average quality helps us? - if (pm->do_qa != 0) { - // Histogram of average qual in avg[] - // NB: we convert avg[] from count to selector index - - // Few symbols means high compression which means - // selector bits become more significant fraction. - // Reduce selector bits by skewing the distribution - // to not be even binning. - double qf0 = pm->nsym > 8 ? 0.2 : 0.05; - double qf1 = pm->nsym > 8 ? 0.5 : 0.22; - double qf2 = pm->nsym > 8 ? 0.8 : 0.60; - - int total = 0; - i = 0; - while (i < 2560) { - total += avg[i]; - if (total > qf0 * num_rec) { - //fprintf(stderr, "Q1=%d\n", (int)i); - break; - } - avg[i++] = 0; - } - while (i < 2560) { - total += avg[i]; - if (total > qf1 * num_rec) { - //fprintf(stderr, "Q2=%d\n", (int)i); - break; - } - avg[i++] = 1; - } - while (i < 2560) { - total += avg[i]; - if (total > qf2 * num_rec) { - //fprintf(stderr, "Q3=%d\n", (int)i); - break; - } - avg[i++] = 2; - } - while (i < 2560) - avg[i++] = 3; - - // Compute simple entropy of merged signal vs split signal. - i = 0; - rec = 0; - - int qbin4[4][NP][256] = {{{0}}}; - int qbin2[2][NP][256] = {{{0}}}; - int qbin1 [NP][256] = {{0}}; - int qcnt4[4][NP] = {{0}}; - int qcnt2[4][NP] = {{0}}; - int qcnt1 [NP] = {0}; - while (i < in_size) { - if (one_param >= 0 && (s->flags[rec] >> 16) != one_param) { - i += s->len[rec++]; - continue; - } - if ((rec & 7) && rec < s->num_records) { - // subsample for speed - i += s->len[rec++]; - continue; - } - if (rec < s->num_records) - j = s->len[rec]; - else - j = in_size - i; - last_len = j; - - uint32_t tot = avg_qual[rec]; - int qb4 = avg[MIN(2559, tot)]; - int qb2 = qb4/2; - - for (; i < in_size && j > 0; i++, j--) { - int x = j & (NP-1); - qbin4[qb4][x][in[i]]++; qcnt4[qb4][x]++; - qbin2[qb2][x][in[i]]++; qcnt2[qb2][x]++; - qbin1 [x][in[i]]++; qcnt1 [x]++; - } - rec++; - } - - double e1 = 0, e2 = 0, e4 = 0; - for (j = 0; j < NP; j++) { - for (i = 0; i < 256; i++) { - if (qbin1 [j][i]) e1 += qbin1 [j][i] * fast_log(qbin1 [j][i] / (double)qcnt1 [j]); - if (qbin2[0][j][i]) e2 += qbin2[0][j][i] * fast_log(qbin2[0][j][i] / (double)qcnt2[0][j]); - if (qbin2[1][j][i]) e2 += qbin2[1][j][i] * fast_log(qbin2[1][j][i] / (double)qcnt2[1][j]); - if (qbin4[0][j][i]) e4 += qbin4[0][j][i] * fast_log(qbin4[0][j][i] / (double)qcnt4[0][j]); - if (qbin4[1][j][i]) e4 += qbin4[1][j][i] * fast_log(qbin4[1][j][i] / (double)qcnt4[1][j]); - if (qbin4[2][j][i]) e4 += qbin4[2][j][i] * fast_log(qbin4[2][j][i] / (double)qcnt4[2][j]); - if (qbin4[3][j][i]) e4 += qbin4[3][j][i] * fast_log(qbin4[3][j][i] / (double)qcnt4[3][j]); - } - } - e1 /= -log(2)/8; - e2 /= -log(2)/8; - e4 /= -log(2)/8; - //fprintf(stderr, "E1=%f E2=%f E4=%f %f\n", e1, e2+s->num_records/8, e4+s->num_records/4, (e4+s->num_records/4)/(e2+s->num_records/8)); - - // Note by using the selector we're robbing bits from elsewhere in - // the context, which may reduce compression better. - // We don't know how much by, so this is basically a guess! - // For now we just say need 5% saving here. - double qm = pm->do_qa > 0 ? 1 : 0.98; - if ((pm->do_qa == -1 || pm->do_qa >= 4) && - e4 + s->num_records/4 < e2*qm + s->num_records/8 && - e4 + s->num_records/4 < e1*qm) { - //fprintf(stderr, "do q4\n"); - for (i = 0; i < s->num_records; i++) { - //fprintf(stderr, "%d -> %d -> %d, %d\n", (int)i, avg_qual[i], avg[MIN(2559, avg_qual[i])], s->flags[i]>>16); - s->flags[i] |= avg[MIN(2559, avg_qual[i])] <<16; - } - pm->do_sel = 1; - max_sel = 3; - } else if ((pm->do_qa == -1 || pm->do_qa >= 2) && e2 + s->num_records/8 < e1*qm) { - //fprintf(stderr, "do q2\n"); - for (i = 0; i < s->num_records; i++) - s->flags[i] |= (avg[MIN(2559, avg_qual[i])]>>1) <<16; - pm->do_sel = 1; - max_sel = 1; - } - - if (pm->do_qa == -1) { - // assume qual, pos, delta in that order. - if (pm->pbits > 0 && pm->dbits > 0) { - // 1 from pos/delta - pm->sloc = pm->dloc-1; - pm->pbits--; - pm->dbits--; - pm->dloc++; - } else if (pm->dbits >= 2) { - // 2 from delta - pm->sloc = pm->dloc; - pm->dbits -= 2; - pm->dloc += 2; - } else if (pm->qbits >= 2) { - pm->qbits -= 2; - pm->ploc -= 2; - pm->sloc = 16-2 - pm->do_r2; - if (pm->qbits == 6 && pm->qshift == 5) - pm->qbits--; - } - pm->do_qa = 4; - } - } - - // Auto tune: does splitting up READ1 and READ2 help us? - if (has_r2 || pm->do_r2) { // FIXME: && but debug for now - double e1 = 0, e2 = 0; // entropy sum - - for (j = 0; j < NP; j++) { - if (!t1[j] || !t2[j]) continue; - for (i = 0; i < 256; i++) { - if (!qhistb[j][i]) continue; - e1 -= (qhistb[j][i])*log(qhistb[j][i] / (double)(t1[j]+t2[j])); - if (qhist1[j][i]) - e2 -= qhist1[j][i] * log(qhist1[j][i] / (double)t1[j]); - if (qhist2[j][i]) - e2 -= qhist2[j][i] * log(qhist2[j][i] / (double)t2[j]); - } - } - e1 /= log(2)*8; // bytes - e2 /= log(2)*8; - - //fprintf(stderr, "read1/2 entropy merge %f split %f\n", e1, e2); - - // Note by using the selector we're robbing bits from elsewhere in - // the context, which may reduce compression better. - // We don't know how much by, so this is basically a guess! - // For now we just say need 5% saving here. - double qm = pm->do_r2 > 0 ? 1 : 0.95; - if (e2 + (8+s->num_records/8) < e1*qm) { - for (rec = 0; rec < s->num_records; rec++) { - if (one_param >= 0 && (s->flags[rec] >> 16) != one_param) - continue; - int sel = s->flags[rec] >> 16; - s->flags[rec] = (s->flags[rec] & 0xffff) - | ((s->flags[rec] & FQZ_FREAD2) - ? ((sel*2)+1) << 16 - : ((sel*2)+0) << 16); - if (max_sel < (s->flags[rec]>>16)) - max_sel = (s->flags[rec]>>16); - } - } - } - - // We provided explicit selector data or auto-tuned it - if (max_sel > 0) { - pm->do_sel = 1; - pm->max_sel = max_sel; - } - - free(avg_qual); -} - -static inline -int fqz_store_parameters1(fqz_param *pm, unsigned char *comp) { - int comp_idx = 0, i, j; - - // Starting context - comp[comp_idx++] = pm->context; - comp[comp_idx++] = pm->context >> 8; - - comp[comp_idx++] = pm->pflags; - comp[comp_idx++] = pm->max_sym; - - comp[comp_idx++] = (pm->qbits<<4)|pm->qshift; - comp[comp_idx++] = (pm->qloc<<4)|pm->sloc; - comp[comp_idx++] = (pm->ploc<<4)|pm->dloc; - - if (pm->store_qmap) { - for (i = j = 0; i < 256; i++) - if (pm->qmap[i] != INT_MAX) - comp[comp_idx++] = i; - } - - if (pm->qbits && pm->use_qtab) - // custom qtab - comp_idx += store_array(comp+comp_idx, pm->qtab, 256); - - if (pm->pbits && pm->use_ptab) - // custom ptab - comp_idx += store_array(comp+comp_idx, pm->ptab, 1024); - - if (pm->dbits && pm->use_dtab) - // custom dtab - comp_idx += store_array(comp+comp_idx, pm->dtab, 256); - - return comp_idx; -} - -static -int fqz_store_parameters(fqz_gparams *gp, unsigned char *comp) { - int comp_idx = 0; - comp[comp_idx++] = gp->vers; // Format number - - comp[comp_idx++] = gp->gflags; - - if (gp->gflags & GFLAG_MULTI_PARAM) - comp[comp_idx++] = gp->nparam; - - if (gp->gflags & GFLAG_HAVE_STAB) { - comp[comp_idx++] = gp->max_sel; - comp_idx += store_array(comp+comp_idx, gp->stab, 256); - } - - int i; - for (i = 0; i < gp->nparam; i++) - comp_idx += fqz_store_parameters1(&gp->p[i], comp+comp_idx); - - //fprintf(stderr, "Encoded %d bytes of param\n", comp_idx); - return comp_idx; -} - -// Choose a set of parameters based on quality statistics and -// some predefined options (selected via "strat"). -static inline -int fqz_pick_parameters(fqz_gparams *gp, - int vers, - int strat, - fqz_slice *s, - unsigned char *in, - size_t in_size) { - //approx sqrt(delta), must be sequential - int dsqr[] = { - 0, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 - }; - uint32_t qhist[256] = {0}; - - if (strat >= nstrats) strat = nstrats-1; - - // Start with 1 set of parameters. - // FIXME: add support for multiple params later. - memset(gp, 0, sizeof(*gp)); - gp->vers = FQZ_VERS; - - if (!(gp->p = calloc(1, sizeof(fqz_param)))) - return -1; - gp->nparam = 1; - gp->max_sel = 0; - - if (vers == 3) // V3.0 doesn't store qual in original orientation - gp->gflags |= GFLAG_DO_REV; - - fqz_param *pm = gp->p; - - // Programmed strategies, which we then amend based on our - // statistical analysis of the quality stream. - pm->qbits = strat_opts[strat][0]; - pm->qshift = strat_opts[strat][1]; - pm->pbits = strat_opts[strat][2]; - pm->pshift = strat_opts[strat][3]; - pm->dbits = strat_opts[strat][4]; - pm->dshift = strat_opts[strat][5]; - pm->qloc = strat_opts[strat][6]; - pm->sloc = strat_opts[strat][7]; - pm->ploc = strat_opts[strat][8]; - pm->dloc = strat_opts[strat][9]; - - // Params for controlling behaviour here. - pm->do_r2 = strat_opts[strat][10]; - pm->do_qa = strat_opts[strat][11]; - - // Validity check input lengths and buffer size - size_t tlen = 0, i; - for (i = 0; i < s->num_records; i++) { - if (tlen + s->len[i] > in_size) - // Oversized buffer - s->len[i] = in_size - tlen; - tlen += s->len[i]; - } - if (s->num_records > 0 && tlen < in_size) - // Undersized buffer - s->len[s->num_records-1] += in_size - tlen; - - // Quality metrics, for all recs - fqz_qual_stats(s, in, in_size, pm, qhist, -1); - - pm->store_qmap = (pm->nsym <= 8 && pm->nsym*2 < pm->max_sym); - - // Check for fixed length. - uint32_t first_len = s->len[0]; - for (i = 1; i < s->num_records; i++) { - if (s->len[i] != first_len) - break; - } - pm->fixed_len = (i == s->num_records); - pm->use_qtab = 0; // unused by current encoder - - if (strat >= nstrats-1) - goto manually_set; // used in TEST_MAIN for debugging - - if (pm->pshift < 0) - pm->pshift = MAX(0, log((double)s->len[0]/(1<pbits))/log(2)+.5); - - if (pm->nsym <= 4) { - // NovaSeq - pm->qshift = 2; // qmax 64, although we can store up to 256 if needed - if (in_size < 5000000) { - pm->pbits =2; - pm->pshift=5; - } - } else if (pm->nsym <= 8) { - // HiSeqX - pm->qbits =MIN(pm->qbits,9); - pm->qshift=3; - if (in_size < 5000000) - pm->qbits =6; - } - - if (in_size < 300000) { - pm->qbits=pm->qshift; - pm->dbits=2; - } - - manually_set: -// fprintf(stderr, "-x 0x%x%x%x%x%x%x%x%x%x%x%x%x\n", -// pm->qbits, pm->qshift, -// pm->pbits, pm->pshift, -// pm->dbits, pm->dshift, -// pm->qloc, pm->sloc, pm->ploc, pm->dloc, -// pm->do_r2, pm->do_qa); - - for (i = 0; i < sizeof(dsqr)/sizeof(*dsqr); i++) - if (dsqr[i] > (1<dbits)-1) - dsqr[i] = (1<dbits)-1; - - if (pm->store_qmap) { - int j; - for (i = j = 0; i < 256; i++) - if (qhist[i]) - pm->qmap[i] = j++; - else - pm->qmap[i] = INT_MAX; - pm->max_sym = pm->nsym; - } else { - pm->nsym = 255; - for (i = 0; i < 256; i++) - pm->qmap[i] = i; - } - if (gp->max_sym < pm->max_sym) - gp->max_sym = pm->max_sym; - - // Produce ptab from pshift. - if (pm->qbits) { - for (i = 0; i < 256; i++) { - pm->qtab[i] = i; // 1:1 - - // Alternative mappings: - //qtab[i] = i > 30 ? MIN(max_sym,i)-15 : i/2; // eg for 9827 BAM - } - - } - pm->qmask = (1<qbits)-1; - - if (pm->pbits) { - for (i = 0; i < 1024; i++) - pm->ptab[i] = MIN((1<pbits)-1, i>>pm->pshift); - - // Alternatively via analysis of quality distributions we - // may select a bunch of positions that are special and - // have a non-uniform ptab[]. - // Manual experimentation on a NovaSeq run saved 2.8% here. - } - - if (pm->dbits) { - for (i = 0; i < 256; i++) - pm->dtab[i] = dsqr[MIN(sizeof(dsqr)/sizeof(*dsqr)-1, i>>pm->dshift)]; - } - - pm->use_ptab = (pm->pbits > 0); - pm->use_dtab = (pm->dbits > 0); - - pm->pflags = - (pm->use_qtab ?PFLAG_HAVE_QTAB :0)| - (pm->use_dtab ?PFLAG_HAVE_DTAB :0)| - (pm->use_ptab ?PFLAG_HAVE_PTAB :0)| - (pm->do_sel ?PFLAG_DO_SEL :0)| - (pm->fixed_len ?PFLAG_DO_LEN :0)| - (pm->do_dedup ?PFLAG_DO_DEDUP :0)| - (pm->store_qmap ?PFLAG_HAVE_QMAP :0); - - gp->max_sel = 0; - if (pm->do_sel) { - // 2 selectors values, but 1 parameter block. - // We'll use the sloc instead to encode the selector bits into - // the context. - gp->max_sel = 1; // indicator to check recs - gp->gflags |= GFLAG_HAVE_STAB; - // NB: stab is already all zero - } - - if (gp->max_sel) { - int max = 0; - for (i = 0; i < s->num_records; i++) { - if (max < (s->flags[i] >> 16)) - max = (s->flags[i] >> 16); - } - gp->max_sel = max; - } - - return 0; -} - -static void fqz_free_parameters(fqz_gparams *gp) { - if (gp && gp->p) free(gp->p); -} - -static int compress_new_read(fqz_slice *s, - fqz_state *state, - fqz_gparams *gp, - fqz_param *pm, - fqz_model *model, - RangeCoder *rc, - unsigned char *in, - size_t *in_i, // in[in_i], - unsigned int *last) { - ssize_t rec = state->rec; - size_t i = *in_i; - if (pm->do_sel || (gp->gflags & GFLAG_MULTI_PARAM)) { - state->s = rec < s->num_records - ? s->flags[rec] >> 16 // reuse spare bits - : 0; - SIMPLE_MODEL(256,_encodeSymbol)(&model->sel, rc, state->s); - } else { - state->s = 0; - } - int x = (gp->gflags & GFLAG_HAVE_STAB) ? gp->stab[state->s] : state->s; - pm = &gp->p[x]; - - int len = s->len[rec]; - if (!pm->fixed_len || state->first_len) { - SIMPLE_MODEL(256,_encodeSymbol)(&model->len[0], rc, (len>> 0) & 0xff); - SIMPLE_MODEL(256,_encodeSymbol)(&model->len[1], rc, (len>> 8) & 0xff); - SIMPLE_MODEL(256,_encodeSymbol)(&model->len[2], rc, (len>>16) & 0xff); - SIMPLE_MODEL(256,_encodeSymbol)(&model->len[3], rc, (len>>24) & 0xff); - state->first_len = 0; - } - - if (gp->gflags & GFLAG_DO_REV) { - // no need to reverse complement for V4.0 as the core format - // already has this feature. - if (s->flags[rec] & FQZ_FREVERSE) - SIMPLE_MODEL(2,_encodeSymbol)(&model->revcomp, rc, 1); - else - SIMPLE_MODEL(2,_encodeSymbol)(&model->revcomp, rc, 0); - } - - state->rec++; - - state->qtot = 0; - state->qlen = 0; - - state->p = len; - state->delta = 0; - state->qctx = 0; - state->prevq = 0; - - *last = pm->context; - - if (pm->do_dedup) { - // Possible dup of previous read? - if (i && len == state->last_len && - !memcmp(in+i-state->last_len, in+i, len)) { - SIMPLE_MODEL(2,_encodeSymbol)(&model->dup, rc, 1); - i += len-1; - state->p = 0; - *in_i = i; - return 1; // is a dup - } else { - SIMPLE_MODEL(2,_encodeSymbol)(&model->dup, rc, 0); - } - - state->last_len = len; - } - - *in_i = i; - - return 0; // not dup -} - -static -unsigned char *compress_block_fqz2f(int vers, - int strat, - fqz_slice *s, - unsigned char *in, - size_t in_size, - size_t *out_size, - fqz_gparams *gp) { - fqz_gparams local_gp; - int free_params = 0; - - unsigned int last = 0; - size_t i, j; - ssize_t rec = 0; - - int comp_idx = 0; - RangeCoder rc; - - unsigned char *comp = (unsigned char *)malloc(in_size*1.1+100000); - unsigned char *compe = comp + (size_t)(in_size*1.1+100000); - if (!comp) - return NULL; - - // Pick and store params - if (!gp) { - gp = &local_gp; - if (fqz_pick_parameters(gp, vers, strat, s, in, in_size) < 0) - return NULL; - free_params = 1; - } - - //dump_params(gp); - comp_idx = var_put_u32(comp, compe, in_size); - comp_idx += fqz_store_parameters(gp, comp+comp_idx); - - fqz_param *pm; - - // Optimise tables to remove shifts in loop (NB: cannot do this in next vers) - for (j = 0; j < gp->nparam; j++) { - pm = &gp->p[j]; - - for (i = 0; i < 1024; i++) - pm->ptab[i] <<= pm->ploc; - - for (i = 0; i < 256; i++) - pm->dtab[i] <<= pm->dloc; - } - - // Create models and initialise range coder - fqz_model model; - if (fqz_create_models(&model, gp) < 0) - return NULL; - - RC_SetOutput(&rc, (char *)comp+comp_idx); - RC_StartEncode(&rc); - - // For CRAM3.1, reverse upfront if needed - pm = &gp->p[0]; - if (gp->gflags & GFLAG_DO_REV) { - i = rec = j = 0; - while (i < in_size) { - int len = rec < s->num_records-1 - ? s->len[rec] : in_size - i; - - if (s->flags[rec] & FQZ_FREVERSE) { - // Reverse complement sequence - note: modifies buffer - int I,J; - unsigned char *cp = in+i; - for (I = 0, J = len-1; I < J; I++, J--) { - unsigned char c; - c = cp[I]; - cp[I] = cp[J]; - cp[J] = c; - } - } - - i += len; - rec++; - } - rec = 0; - } - - fqz_state state = {0}; - pm = &gp->p[0]; - state.p = 0; - state.first_len = 1; - state.last_len = 0; - state.rec = rec; - - for (i = 0; i < in_size; i++) { - if (state.p == 0) { - if (compress_new_read(s, &state, gp, pm, &model, &rc, - in, &i, /*&rec,*/ &last)) - continue; - } - -#if 0 - // fqz_qual_stats imp. - // q40 6.876 6.852 5.96 - // q4 6.566 5.07 - // _Q 1.383 1.11 - unsigned char q = in[i]; - unsigned char qm = pm->qmap[q]; - - SIMPLE_MODEL(QMAX,_encodeSymbol)(&model.qual[last], &rc, qm); - last = fqz_update_ctx(pm, &state, qm); -#else - // gcc clang gcc+fqz_qual_stats imp. - // q40 5.033 5.026 -27% 4.137 -38% - // q4 5.595 -15% 4.011 -36% - // _Q 1.225 -11% 0.956 - int j = -1; - - while (state.p >= 4 && i+j+4 < in_size) { - int l1 = last, l2, l3, l4; - // Model has symbols sorted by frequency, so most common are at - // start. So while model is approx 1Kb, the first cache line is - // a big win. - mm_prefetch(&model.qual[l1]); - unsigned char qm1 = pm->qmap[in[i + ++j]]; - last = fqz_update_ctx(pm, &state, qm1); l2 = last; - - mm_prefetch(&model.qual[l2]); - unsigned char qm2 = pm->qmap[in[i + ++j]]; - last = fqz_update_ctx(pm, &state, qm2); l3 = last; - - mm_prefetch(&model.qual[l3]); - unsigned char qm3 = pm->qmap[in[i + ++j]]; - last = fqz_update_ctx(pm, &state, qm3); l4 = last; - - mm_prefetch(&model.qual[l4]); - unsigned char qm4 = pm->qmap[in[i + ++j]]; - last = fqz_update_ctx(pm, &state, qm4); - - SIMPLE_MODEL(QMAX,_encodeSymbol)(&model.qual[l1], &rc, qm1); - SIMPLE_MODEL(QMAX,_encodeSymbol)(&model.qual[l2], &rc, qm2); - SIMPLE_MODEL(QMAX,_encodeSymbol)(&model.qual[l3], &rc, qm3); - SIMPLE_MODEL(QMAX,_encodeSymbol)(&model.qual[l4], &rc, qm4); - } - - while (state.p > 0) { - int l2 = last; - mm_prefetch(&model.qual[last]); - unsigned char qm = pm->qmap[in[i + ++j]]; - last = fqz_update_ctx(pm, &state, qm); - SIMPLE_MODEL(QMAX,_encodeSymbol)(&model.qual[l2], &rc, qm); - } - i += j; -#endif - } - - RC_FinishEncode(&rc); - - // For CRAM3.1, undo our earlier reversal step - rec = state.rec; - if (gp->gflags & GFLAG_DO_REV) { - i = rec = j = 0; - while (i < in_size) { - int len = rec < s->num_records-1 - ? s->len[rec] - : in_size - i; - - if (s->flags[rec] & FQZ_FREVERSE) { - // Reverse complement sequence - note: modifies buffer - int I,J; - unsigned char *cp = in+i; - for (I = 0, J = len-1; I < J; I++, J--) { - unsigned char c; - c = cp[I]; - cp[I] = cp[J]; - cp[J] = c; - } - } - - i += len; - rec++; - } - } - - // Clear selector abuse of flags - for (rec = 0; rec < s->num_records; rec++) - s->flags[rec] &= 0xffff; - - *out_size = comp_idx + RC_OutSize(&rc); - //fprintf(stderr, "%d -> %d\n", (int)in_size, (int)*out_size); - - fqz_destroy_models(&model); - if (free_params) - fqz_free_parameters(gp); - - return comp; -} - -// Read fqz paramaters. -// -// FIXME: pass in and check in_size. -// -// Returns number of bytes read on success, -// -1 on failure. -static inline -int fqz_read_parameters1(fqz_param *pm, unsigned char *in, size_t in_size) { - int in_idx = 0; - size_t i; - - if (in_size < 7) - return -1; - - // Starting context - pm->context = in[in_idx] + (in[in_idx+1]<<8); - in_idx += 2; - - // Bit flags - pm->pflags = in[in_idx++]; - pm->use_qtab = pm->pflags & PFLAG_HAVE_QTAB; - pm->use_dtab = pm->pflags & PFLAG_HAVE_DTAB; - pm->use_ptab = pm->pflags & PFLAG_HAVE_PTAB; - pm->do_sel = pm->pflags & PFLAG_DO_SEL; - pm->fixed_len = pm->pflags & PFLAG_DO_LEN; - pm->do_dedup = pm->pflags & PFLAG_DO_DEDUP; - pm->store_qmap = pm->pflags & PFLAG_HAVE_QMAP; - pm->max_sym = in[in_idx++]; - - // Sub-context sizes and locations - pm->qbits = in[in_idx]>>4; - pm->qmask = (1<qbits)-1; - pm->qshift = in[in_idx++]&15; - pm->qloc = in[in_idx]>>4; - pm->sloc = in[in_idx++]&15; - pm->ploc = in[in_idx]>>4; - pm->dloc = in[in_idx++]&15; - - // Maps and tables - if (pm->store_qmap) { - for (i = 0; i < 256; i++) pm->qmap[i] = INT_MAX; // so dump_map works - if (in_idx + pm->max_sym > in_size) - return -1; - for (i = 0; i < pm->max_sym; i++) - pm->qmap[i] = in[in_idx++]; - } else { - for (i = 0; i < 256; i++) - pm->qmap[i] = i; - } - - if (pm->qbits) { - if (pm->use_qtab) { - int used = read_array(in+in_idx, in_size-in_idx, pm->qtab, 256); - if (used < 0) - return -1; - in_idx += used; - } else { - for (i = 0; i < 256; i++) - pm->qtab[i] = i; - } - } - - if (pm->use_ptab) { - int used = read_array(in+in_idx, in_size-in_idx, pm->ptab, 1024); - if (used < 0) - return -1; - in_idx += used; - } else { - for (i = 0; i < 1024; i++) - pm->ptab[i] = 0; - } - - if (pm->use_dtab) { - int used = read_array(in+in_idx, in_size-in_idx, pm->dtab, 256); - if (used < 0) - return -1; - in_idx += used; - } else { - for (i = 0; i < 256; i++) - pm->dtab[i] = 0; - } - - return in_idx; -} - -static -int fqz_read_parameters(fqz_gparams *gp, unsigned char *in, size_t in_size) { - int in_idx = 0; - int i; - - if (in_size < 10) - return -1; - - // Format version - gp->vers = in[in_idx++]; - if (gp->vers != FQZ_VERS) - return -1; - - // Global glags - gp->gflags = in[in_idx++]; - - // Number of param blocks and param selector details - gp->nparam = (gp->gflags & GFLAG_MULTI_PARAM) ? in[in_idx++] : 1; - if (gp->nparam <= 0) - return -1; - gp->max_sel = gp->nparam > 1 ? gp->nparam : 0; - - if (gp->gflags & GFLAG_HAVE_STAB) { - gp->max_sel = in[in_idx++]; - int used = read_array(in+in_idx, in_size-in_idx, gp->stab, 256); - if (used < 0) - goto err; - in_idx += used; - } else { - for (i = 0; i < gp->nparam; i++) - gp->stab[i] = i; - for (; i < 256; i++) - gp->stab[i] = gp->nparam-1; - } - - // Load the individual parameter locks - if (!(gp->p = malloc(gp->nparam * sizeof(*gp->p)))) - return -1; - - gp->max_sym = 0; - for (i = 0; i < gp->nparam; i++) { - int e = fqz_read_parameters1(&gp->p[i], in + in_idx, in_size-in_idx); - if (e < 0) - goto err; - if (gp->p[i].do_sel && gp->max_sel == 0) - goto err; // Inconsistent - in_idx += e; - - if (gp->max_sym < gp->p[i].max_sym) - gp->max_sym = gp->p[i].max_sym; - } - - //fprintf(stderr, "Decoded %d bytes of param\n", in_idx); - return in_idx; - - err: - fqz_free_parameters(gp); - gp->nparam = 0; - return -1; -} - -// Handles the state.p==0 section of uncompress_block_fqz2f -static int decompress_new_read(fqz_slice *s, - fqz_state *state, - fqz_gparams *gp, - fqz_param *pm, - fqz_model *model, - RangeCoder *rc, - unsigned char *in, ssize_t *in_i, // in[in_i], - unsigned char *uncomp, size_t *out_size, - int *rev, char *rev_a, int *len_a, - int *lengths, int nlengths) { - size_t i = *in_i; - ssize_t rec = state->rec; - - if (pm->do_sel) { - state->s = SIMPLE_MODEL(256,_decodeSymbol)(&model->sel, rc); - } else { - state->s = 0; - } - - int x = (gp->gflags & GFLAG_HAVE_STAB) - ? gp->stab[MIN(255, state->s)] - : state->s; - if (x >= gp->nparam) - return -1; - pm = &gp->p[x]; - - unsigned int len = state->last_len; - if (!pm->fixed_len || state->first_len) { - len = SIMPLE_MODEL(256,_decodeSymbol)(&model->len[0], rc); - len |= SIMPLE_MODEL(256,_decodeSymbol)(&model->len[1], rc)<<8; - len |= SIMPLE_MODEL(256,_decodeSymbol)(&model->len[2], rc)<<16; - len |= ((unsigned)SIMPLE_MODEL(256,_decodeSymbol)(&model->len[3], rc))<<24; - state->first_len = 0; - state->last_len = len; - } - if (len > *out_size-i || len <= 0) - return -1; - - if (lengths && rec < nlengths) - lengths[rec] = len; - - if (gp->gflags & GFLAG_DO_REV) { - *rev = SIMPLE_MODEL(2,_decodeSymbol)(&model->revcomp, rc); - rev_a[rec] = *rev; - len_a[rec] = len; - } - - if (pm->do_dedup) { - if (SIMPLE_MODEL(2,_decodeSymbol)(&model->dup, rc)) { - // Dup of last line - if (len > i) - return -1; - memcpy(uncomp+i, uncomp+i-len, len); - i += len; - state->p = 0; - state->rec++; - *in_i = i; - return 1; // dup => continue - } - } - - state->rec++; - state->p = len; - state->delta = 0; - state->prevq = 0; - state->qctx = 0; - state->ctx = pm->context; - - *in_i = i; - - return 0; -} - - -static -unsigned char *uncompress_block_fqz2f(fqz_slice *s, - unsigned char *in, - size_t in_size, - size_t *out_size, - int *lengths, - int nlengths) { - fqz_gparams gp; - fqz_param *pm; - char *rev_a = NULL; - int *len_a = NULL; - memset(&gp, 0, sizeof(gp)); - - uint32_t len; - ssize_t i, rec = 0, in_idx; - in_idx = var_get_u32(in, in+in_size, &len); - *out_size = len; - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (len > 100000) - return NULL; -#endif - - unsigned char *uncomp = NULL; - RangeCoder rc; - unsigned int last = 0; - - // Decode parameter blocks - if ((i = fqz_read_parameters(&gp, in+in_idx, in_size-in_idx)) < 0) - return NULL; - //dump_params(&gp); - in_idx += i; - - // Optimisations to remove shifts from main loop - for (i = 0; i < gp.nparam; i++) { - int j; - pm = &gp.p[i]; - for (j = 0; j < 1024; j++) - pm->ptab[j] <<= pm->ploc; - for (j = 0; j < 256; j++) - pm->dtab[j] <<= pm->dloc; - } - - // Initialise models and entropy coder - fqz_model model; - if (fqz_create_models(&model, &gp) < 0) - return NULL; - - RC_SetInput(&rc, (char *)in+in_idx, (char *)in+in_size); - RC_StartDecode(&rc); - - - // Allocate buffers - uncomp = (unsigned char *)malloc(*out_size); - if (!uncomp) - goto err; - - int nrec = 1000; - rev_a = malloc(nrec); - len_a = malloc(nrec * sizeof(int)); - if (!rev_a || !len_a) - goto err; - - // Main decode loop - fqz_state state; - state.delta = 0; - state.prevq = 0; - state.qctx = 0; - state.p = 0; - state.s = 0; - state.first_len = 1; - state.last_len = 0; - state.rec = 0; - state.ctx = last; - - int rev = 0; - int x = 0; - pm = &gp.p[x]; - for (i = 0; i < len; ) { - if (state.rec >= nrec) { - nrec *= 2; - rev_a = realloc(rev_a, nrec); - len_a = realloc(len_a, nrec*sizeof(int)); - if (!rev_a || !len_a) - goto err; - } - - if (state.p == 0) { - int r = decompress_new_read(s, &state, &gp, pm, &model, &rc, - in, &i, uncomp, out_size, - &rev, rev_a, len_a, - lengths, nlengths); - if (r < 0) - goto err; - if (r > 0) - continue; - last = state.ctx; - } - - // Decode and update context - do { - unsigned char Q = SIMPLE_MODEL(QMAX,_decodeSymbol) - (&model.qual[last], &rc); - - last = fqz_update_ctx(pm, &state, Q); - uncomp[i++] = pm->qmap[Q]; - } while (state.p != 0 && i < len); - } - - rec = state.rec; - if (rec >= nrec) { - nrec *= 2; - rev_a = realloc(rev_a, nrec); - len_a = realloc(len_a, nrec*sizeof(int)); - if (!rev_a || !len_a) - goto err; - } - rev_a[rec] = rev; - len_a[rec] = len; - - if (gp.gflags & GFLAG_DO_REV) { - for (i = rec = 0; i < len && rec < nrec; i += len_a[rec++]) { - if (!rev_a[rec]) - continue; - - int I, J; - unsigned char *cp = uncomp+i; - for (I = 0, J = len_a[rec]-1; I < J; I++, J--) { - unsigned char c; - c = cp[I]; - cp[I] = cp[J]; - cp[J] = c; - } - } - } - - RC_FinishDecode(&rc); - fqz_destroy_models(&model); - free(rev_a); - free(len_a); - fqz_free_parameters(&gp); - -#ifdef TEST_MAIN - s->num_records = rec; -#endif - - return uncomp; - - err: - fqz_destroy_models(&model); - free(rev_a); - free(len_a); - fqz_free_parameters(&gp); - free(uncomp); - - return NULL; -} - -char *fqz_compress(int vers, fqz_slice *s, char *in, size_t uncomp_size, - size_t *comp_size, int strat, fqz_gparams *gp) { - if (uncomp_size > INT_MAX) { - *comp_size = 0; - return NULL; - } - - return (char *)compress_block_fqz2f(vers, strat, s, (unsigned char *)in, - uncomp_size, comp_size, gp); -} - -char *fqz_decompress(char *in, size_t comp_size, size_t *uncomp_size, - int *lengths, int nlengths) { - return (char *)uncompress_block_fqz2f(NULL, (unsigned char *)in, - comp_size, uncomp_size, lengths, nlengths); -} diff --git a/src/htslib-1.18/htscodecs/htscodecs/htscodecs.h b/src/htslib-1.18/htscodecs/htscodecs/htscodecs.h deleted file mode 100644 index 1dcd442..0000000 --- a/src/htslib-1.18/htscodecs/htscodecs/htscodecs.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2021-2023 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef HTSCODECS_H -#define HTSCODECS_H - -/* - * Version X.Y.Z encoded as XYYYZZ. - * We mainly increment X and Y. Z *may* get bumped in between official - * releases in order to distinguish untagged github checkouts from - * official release tarballs. - * - * Note currently this needs manually editing as it isn't automatically - * updated by autoconf. - */ -#define HTSCODECS_VERSION 100501 - -/* - * A const string form of the HTSCODECS_VERSION define. - * NB: This is obtained from the auto-generated version.h, so - * we can include release number and git hash. - */ -const char *htscodecs_version(void); - -#endif /* HTSCODECS_H */ diff --git a/src/htslib-1.18/htscodecs/htscodecs/pack.c b/src/htslib-1.18/htscodecs/htscodecs/pack.c deleted file mode 100644 index 6b73bbc..0000000 --- a/src/htslib-1.18/htscodecs/htscodecs/pack.c +++ /dev/null @@ -1,394 +0,0 @@ -/* - * Copyright (c) 2019-2020, 2022 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include -#include -#include -#include - -#include "pack.h" - -//----------------------------------------------------------------------------- - -/* - * Packs multiple symbols into a single byte if the total alphabet of symbols - * used is <= 16. Each new symbol takes up 1, 2, 4 or 8 bits, or 0 if the - * alphabet used is 1 (constant). - * - * If successful, out_meta/out_meta_len are set to hold the mapping table - * to be used during decompression. - * - * Returns the packed buffer on success with new length in out_len, - * NULL of failure - */ -uint8_t *hts_pack(uint8_t *data, int64_t len, - uint8_t *out_meta, int *out_meta_len, uint64_t *out_len) { - int p[256] = {0}, n; - uint64_t i, j; - - // count syms - for (i = 0; i < len; i++) - p[data[i]]=1; - - for (i = n = 0; i < 256; i++) { - if (p[i]) { - p[i] = n++; // p[i] is now the code number - out_meta[n] = i; - } - } - out_meta[0] = n; // 256 wraps to 0 - j = n+1; - - // 1 value per byte - if (n > 16) - return NULL; - - uint8_t *out = malloc(len+1); - if (!out) - return NULL; - - // Work out how many values per byte to encode. - int val_per_byte; - if (n > 4) - val_per_byte = 2; - else if (n > 2) - val_per_byte = 4; - else if (n > 1) - val_per_byte = 8; - else - val_per_byte = 0; // infinite - - *out_meta_len = j; - j = 0; - - switch (val_per_byte) { - case 2: - for (i = 0; i < (len & ~1); i+=2) - out[j++] = (p[data[i]]<<0) | (p[data[i+1]]<<4); - switch (len-i) { - case 1: out[j++] = p[data[i]]; - } - *out_len = j; - return out; - - case 4: { - for (i = 0; i < (len & ~3); i+=4) - out[j++] = (p[data[i]]<<0) | (p[data[i+1]]<<2) | (p[data[i+2]]<<4) | (p[data[i+3]]<<6); - out[j] = 0; - int s = len-i, x = 0; - switch (s) { - case 3: out[j] |= p[data[i++]] << x; x+=2; - case 2: out[j] |= p[data[i++]] << x; x+=2; - case 1: out[j] |= p[data[i++]] << x; x+=2; - j++; - } - *out_len = j; - return out; - } - - case 8: { - for (i = 0; i < (len & ~7); i+=8) - out[j++] = (p[data[i+0]]<<0) | (p[data[i+1]]<<1) | (p[data[i+2]]<<2) | (p[data[i+3]]<<3) - | (p[data[i+4]]<<4) | (p[data[i+5]]<<5) | (p[data[i+6]]<<6) | (p[data[i+7]]<<7); - out[j] = 0; - int s = len-i, x = 0; - switch (s) { - case 7: out[j] |= p[data[i++]] << x++; - case 6: out[j] |= p[data[i++]] << x++; - case 5: out[j] |= p[data[i++]] << x++; - case 4: out[j] |= p[data[i++]] << x++; - case 3: out[j] |= p[data[i++]] << x++; - case 2: out[j] |= p[data[i++]] << x++; - case 1: out[j] |= p[data[i++]] << x++; - j++; - } - *out_len = j; - return out; - } - - case 0: - *out_len = j; - return out; - } - - return NULL; -} - - -/* - * Unpacks the meta-data portions of the hts_pack algorithm. - * This consists of the count of symbols and their values. - * - * The "map" array is filled out with the used symbols. - * "nsym" is set to contain the number of symbols per byte; - * 0, 1, 2, 4 or 8. - * - * Returns number of bytes of data[] consumed on success, - * zero on failure. - */ -uint8_t hts_unpack_meta(uint8_t *data, uint32_t data_len, - uint64_t udata_len, uint8_t *map, int *nsym) { - if (data_len == 0) - return 0; - - // Number of symbols used - unsigned int n = data[0]; - if (n == 0) - n = 256; - - // Symbols per byte - if (n <= 1) - *nsym = 0; - else if (n <= 2) - *nsym = 8; - else if (n <= 4) - *nsym = 4; - else if (n <= 16) - *nsym = 2; - else { - *nsym = 1; // no packing - return 1; - } - - if (data_len <= 1) - return 0; - - int j = 1, c = 0; - do { - map[c++] = data[j++]; - } while (c < n && j < data_len); - - return c < n ? 0 : j; -} - -/* - * Unpacks a packed data steam (given the unpacked meta-data). - * - * "map" is the pack map, mapping 0->n to the expanded symbols. - * The "out" buffer must be preallocated by the caller to be the correct - * size. For error checking purposes, out_len is set to the size of - * this buffer. - * - * Returns uncompressed data (out) on success, - * NULL on failure. - */ -uint8_t *hts_unpack(uint8_t *data, int64_t len, uint8_t *out, uint64_t out_len, int nsym, uint8_t *p) { - //uint8_t *out; - uint8_t c = 0; - int64_t i, j = 0, olen; - - if (nsym == 1) { - // raw data; FIXME: shortcut the need for malloc & memcpy here - memcpy(out, data, len); - return out; - } - - switch(nsym) { - case 8: { - union { - uint64_t w; - uint8_t c[8]; - } map[256]; - int x; - for (x = 0; x < 256; x++) { - map[x].c[0] = p[x>>0&1]; - map[x].c[1] = p[x>>1&1]; - map[x].c[2] = p[x>>2&1]; - map[x].c[3] = p[x>>3&1]; - map[x].c[4] = p[x>>4&1]; - map[x].c[5] = p[x>>5&1]; - map[x].c[6] = p[x>>6&1]; - map[x].c[7] = p[x>>7&1]; - } - if ((out_len+7)/8 > len) - return NULL; - olen = out_len & ~7; - - for (i = 0; i < olen; i+=8) - memcpy(&out[i], &map[data[j++]].w, 8); - - if (out_len != olen) { - c = data[j++]; - while (i < out_len) { - out[i++] = p[c & 1]; - c >>= 1; - } - } - break; - } - - case 4: { - union { - uint32_t w; - uint8_t c[4]; - } map[256]; - - int x, y, z, _, P=0; - for (x = 0; x < 4; x++) - for (y = 0; y < 4; y++) - for (z = 0; z < 4; z++) - for (_ = 0; _ < 4; _++, P++) { - map[P].c[0] = p[_]; - map[P].c[1] = p[z]; - map[P].c[2] = p[y]; - map[P].c[3] = p[x]; - } - - if ((out_len+3)/4 > len) - return NULL; - olen = out_len & ~3; - - for (i = 0; i < olen-12; i+=16) { - uint32_t w[] = { - map[data[j+0]].w, - map[data[j+1]].w, - map[data[j+2]].w, - map[data[j+3]].w - }; - j += 4; - memcpy(&out[i], &w, 16); - } - - for (; i < olen; i+=4) - memcpy(&out[i], &map[data[j++]].w, 4); - - if (out_len != olen) { - c = data[j++]; - while (i < out_len) { - out[i++] = p[c & 3]; - c >>= 2; - } - } - break; - } - - case 2: { - union { - uint16_t w; - uint8_t c[2]; - } map[256]; - - int x, y; - for (x = 0; x < 16; x++) { - for (y = 0; y < 16; y++) { - map[x*16+y].c[0] = p[y]; - map[x*16+y].c[1] = p[x]; - } - } - - if ((out_len+1)/2 > len) - return NULL; - olen = out_len & ~1; - - for (i = j = 0; i+2 < olen; i+=4) { - uint16_t w[] = { - map[data[j+0]].w, - map[data[j+1]].w - }; - memcpy(&out[i], &w, 4); - - j += 2; - } - - for (; i < olen; i+=2) - memcpy(&out[i], &map[data[j++]].w, 2); - - if (out_len != olen) { - c = data[j++]; - out[i+0] = p[c&15]; - } - break; - } - - case 0: - memset(out, p[0], out_len); - break; - - default: - return NULL; - } - - return out; -} - - -uint8_t *hts_unpack_(uint8_t *data, int64_t len, uint8_t *out, uint64_t out_len, int nsym, uint8_t *p) { - //uint8_t *out; - uint8_t c = 0; - int64_t i, j = 0, olen; - - if (nsym == 1) { - // raw data; FIXME: shortcut the need for malloc & memcpy here - memcpy(out, data, len); - return out; - } - - switch(nsym) { - case 2: { - uint16_t map[256], x, y; - for (x = 0; x < 16; x++) - for (y = 0; y < 16; y++) - map[x*16+y] = p[x]*256+p[y]; - - if ((out_len+1)/2 > len) - return NULL; - olen = out_len & ~1; - - uint16_t *o16 = (uint16_t *)out; - for (i = 0; i+4 < olen/2; i+=4) { - int k; - for (k = 0; k < 4; k++) - o16[i+k] = map[data[i+k]]; - } - j = i; i *= 2; - - for (; i < olen; i+=2) { - uint16_t w1 = map[data[j++]]; - *(uint16_t *)&out[i] = w1; - } - - if (out_len != olen) { - c = data[j++]; - out[i+0] = p[c&15]; - } - break; - } - - default: - return NULL; - } - - return out; -} diff --git a/src/htslib-1.18/htscodecs/htscodecs/rANS_static.c b/src/htslib-1.18/htscodecs/htscodecs/rANS_static.c deleted file mode 100644 index e629cb9..0000000 --- a/src/htslib-1.18/htscodecs/htscodecs/rANS_static.c +++ /dev/null @@ -1,844 +0,0 @@ -/* - * Copyright (c) 2014-2022 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -// Use 11 for order-1? -#define TF_SHIFT 12 -#define TOTFREQ (1< -#include -#include -#include -#include -#include -#include -#include -#ifndef NO_THREADS -#include -#endif - -#include "rANS_static.h" - -#define ABS(a) ((a)>0?(a):-(a)) - -/*----------------------------------------------------------------------------- - * Memory to memory compression functions. - * - * These are original versions without any manual loop unrolling. They - * are easier to understand, but can be up to 2x slower. - */ - -static -unsigned char *rans_compress_O0(unsigned char *in, unsigned int in_size, - unsigned int *out_size) { - unsigned char *out_buf = malloc(1.05*in_size + 257*257*3 + 9); - unsigned char *cp, *out_end; - RansEncSymbol syms[256]; - RansState rans0; - RansState rans2; - RansState rans1; - RansState rans3; - uint8_t* ptr; - int F[256+MAGIC] = {0}, i, j, tab_size, rle, x, fsum = 0; - int m = 0, M = 0; - uint64_t tr; - - if (!out_buf) - return NULL; - - ptr = out_end = out_buf + (uint32_t)(1.05*in_size) + 257*257*3 + 9; - - // Compute statistics - if (hist8(in, in_size, (uint32_t *)F) < 0) { - free(out_buf); - return NULL; - } - tr = ((uint64_t)TOTFREQ<<31)/in_size + (1<<30)/in_size; - - normalise_harder: - // Normalise so T[i] == TOTFREQ - for (fsum = m = M = j = 0; j < 256; j++) { - if (!F[j]) - continue; - - if (m < F[j]) - m = F[j], M = j; - - if ((F[j] = (F[j]*tr)>>31) == 0) - F[j] = 1; - fsum += F[j]; - } - - fsum++; - if (fsum < TOTFREQ) { - F[M] += TOTFREQ-fsum; - } else if (fsum-TOTFREQ > F[M]/2) { - // Corner case to avoid excessive frequency reduction - tr = 2104533975; goto normalise_harder; // equiv to *0.98. - } else { - F[M] -= fsum-TOTFREQ; - } - - //printf("F[%d]=%d\n", M, F[M]); - assert(F[M]>0); - - // Encode statistics. - cp = out_buf+9; - - for (x = rle = j = 0; j < 256; j++) { - if (F[j]) { - // j - if (rle) { - rle--; - } else { - *cp++ = j; - if (!rle && j && F[j-1]) { - for(rle=j+1; rle<256 && F[rle]; rle++) - ; - rle -= j+1; - *cp++ = rle; - } - //fprintf(stderr, "%d: %d %d\n", j, rle, N[j]); - } - - // F[j] - if (F[j]<128) { - *cp++ = F[j]; - } else { - *cp++ = 128 | (F[j]>>8); - *cp++ = F[j]&0xff; - } - RansEncSymbolInit(&syms[j], x, F[j], TF_SHIFT); - x += F[j]; - } - } - *cp++ = 0; - - //write(2, out_buf+4, cp-(out_buf+4)); - tab_size = cp-out_buf; - - RansEncInit(&rans0); - RansEncInit(&rans1); - RansEncInit(&rans2); - RansEncInit(&rans3); - - switch (i=(in_size&3)) { - case 3: RansEncPutSymbol(&rans2, &ptr, &syms[in[in_size-(i-2)]]); - case 2: RansEncPutSymbol(&rans1, &ptr, &syms[in[in_size-(i-1)]]); - case 1: RansEncPutSymbol(&rans0, &ptr, &syms[in[in_size-(i-0)]]); - case 0: - break; - } - for (i=(in_size &~3); likely(i>0); i-=4) { - RansEncSymbol *s3 = &syms[in[i-1]]; - RansEncSymbol *s2 = &syms[in[i-2]]; - RansEncSymbol *s1 = &syms[in[i-3]]; - RansEncSymbol *s0 = &syms[in[i-4]]; - - RansEncPutSymbol(&rans3, &ptr, s3); - RansEncPutSymbol(&rans2, &ptr, s2); - RansEncPutSymbol(&rans1, &ptr, s1); - RansEncPutSymbol(&rans0, &ptr, s0); - } - - RansEncFlush(&rans3, &ptr); - RansEncFlush(&rans2, &ptr); - RansEncFlush(&rans1, &ptr); - RansEncFlush(&rans0, &ptr); - - // Finalise block size and return it - *out_size = (out_end - ptr) + tab_size; - - cp = out_buf; - - *cp++ = 0; // order - *cp++ = ((*out_size-9)>> 0) & 0xff; - *cp++ = ((*out_size-9)>> 8) & 0xff; - *cp++ = ((*out_size-9)>>16) & 0xff; - *cp++ = ((*out_size-9)>>24) & 0xff; - - *cp++ = (in_size>> 0) & 0xff; - *cp++ = (in_size>> 8) & 0xff; - *cp++ = (in_size>>16) & 0xff; - *cp++ = (in_size>>24) & 0xff; - - memmove(out_buf + tab_size, ptr, out_end-ptr); - - return out_buf; -} - -typedef struct { - unsigned char R[TOTFREQ]; -} ari_decoder; - -static -unsigned char *rans_uncompress_O0(unsigned char *in, unsigned int in_size, - unsigned int *out_size) { - /* Load in the static tables */ - unsigned char *cp = in + 9; - unsigned char *cp_end = in + in_size; - const uint32_t mask = (1u << TF_SHIFT)-1; - int i, j, rle; - unsigned int x, y; - unsigned int out_sz, in_sz; - char *out_buf; - RansState R[4]; - RansState m[4]; - uint16_t sfreq[TOTFREQ+32]; - uint16_t ssym [TOTFREQ+32]; // faster, but only needs uint8_t - uint32_t sbase[TOTFREQ+16]; // faster, but only needs uint16_t - - if (in_size < 26) // Need at least this many bytes just to start - return NULL; - - if (*in++ != 0) // Order-0 check - return NULL; - - in_sz = ((in[0])<<0) | ((in[1])<<8) | ((in[2])<<16) | (((uint32_t)in[3])<<24); - out_sz = ((in[4])<<0) | ((in[5])<<8) | ((in[6])<<16) | (((uint32_t)in[7])<<24); - if (in_sz != in_size-9) - return NULL; - - if (out_sz >= INT_MAX) - return NULL; // protect against some overflow cases - - // For speeding up the fuzzer only. - // Small input can lead to large uncompressed data. - // We reject this as it just slows things up instead of testing more code - // paths (once we've verified a few times for large data). -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (out_sz > 100000) - return NULL; -#endif - - out_buf = malloc(out_sz); - if (!out_buf) - return NULL; - - //fprintf(stderr, "out_sz=%d\n", out_sz); - - // Precompute reverse lookup of frequency. - rle = x = y = 0; - j = *cp++; - do { - int F, C; - if (cp > cp_end - 16) goto cleanup; // Not enough input bytes left - if ((F = *cp++) >= 128) { - F &= ~128; - F = ((F & 127) << 8) | *cp++; - } - C = x; - - if (x + F > TOTFREQ) - goto cleanup; - - for (y = 0; y < F; y++) { - ssym [y + C] = j; - sfreq[y + C] = F; - sbase[y + C] = y; - } - x += F; - - if (!rle && j+1 == *cp) { - j = *cp++; - rle = *cp++; - } else if (rle) { - rle--; - j++; - if (j > 255) - goto cleanup; - } else { - j = *cp++; - } - } while(j); - - if (x < TOTFREQ-1 || x > TOTFREQ) - goto cleanup; - if (x != TOTFREQ) { - // Protection against accessing uninitialised memory in the case - // where SUM(freqs) == 4095 and not 4096. - ssym [x] = ssym [x-1]; - sfreq[x] = sfreq[x-1]; - sbase[x] = sbase[x-1]+1; - } - - // 16 bytes of cp here. Also why cp - 16 in above loop. - if (cp > cp_end - 16) goto cleanup; // Not enough input bytes left - - RansDecInit(&R[0], &cp); if (R[0] < RANS_BYTE_L) goto cleanup; - RansDecInit(&R[1], &cp); if (R[1] < RANS_BYTE_L) goto cleanup; - RansDecInit(&R[2], &cp); if (R[2] < RANS_BYTE_L) goto cleanup; - RansDecInit(&R[3], &cp); if (R[3] < RANS_BYTE_L) goto cleanup; - - int out_end = (out_sz&~3); - cp_end -= 8; // within 8 for simplicity of loop below - // 2 x likely() here harms gcc 7.5 by about 8% rate drop, but only in O2 - for (i=0; likely(i < out_end); i+=4) { - // /curr code - // gcc7 O2 513/497 562/556++ 556/547 ok - // gcc7 O3 566/552 569/553 581/563+ - // gcc10 O2 544/538 563/547 541/537-? - // gcc10 O3 531/519 546/530 575/546+ - // gcc11 O2 512/490 588/540 540/535 mid - // gcc11 O3 482/471 553/541 549/535 - // gcc12 O2 533/526 544/534 539/535 - // gcc12 O3 548/533 502/497-- 553/527 ok - // clang10 555/542 564/549 560/541 - // clang13 560/553 572/559 556/559 - m[0] = R[0] & mask; - R[0] = sfreq[m[0]] * (R[0] >> TF_SHIFT) + sbase[m[0]]; - - m[1] = R[1] & mask; - R[1] = sfreq[m[1]] * (R[1] >> TF_SHIFT) + sbase[m[1]]; - - m[2] = R[2] & mask; - R[2] = sfreq[m[2]] * (R[2] >> TF_SHIFT) + sbase[m[2]]; - - m[3] = R[3] & mask; - R[3] = sfreq[m[3]] * (R[3] >> TF_SHIFT) + sbase[m[3]]; - - // likely() here harms gcc12 -O3 - if (cp>2)]]++; - F[0][in[2*(in_size>>2)]]++; - F[0][in[3*(in_size>>2)]]++; - T[0]+=3; - - - // Normalise so T[i] == TOTFREQ - for (rle_i = i = 0; i < 256; i++) { - int t2, m, M; - unsigned int x; - - if (T[i] == 0) - continue; - - //uint64_t p = (TOTFREQ * TOTFREQ) / t; - double p = ((double)TOTFREQ)/T[i]; - normalise_harder: - for (t2 = m = M = j = 0; j < 256; j++) { - if (!F[i][j]) - continue; - - if (m < F[i][j]) - m = F[i][j], M = j; - - //if ((F[i][j] = (F[i][j] * p) / TOTFREQ) == 0) - if ((F[i][j] *= p) == 0) - F[i][j] = 1; - t2 += F[i][j]; - } - - t2++; - if (t2 < TOTFREQ) { - F[i][M] += TOTFREQ-t2; - } else if (t2-TOTFREQ >= F[i][M]/2) { - // Corner case to avoid excessive frequency reduction - p = .98; goto normalise_harder; - } else { - F[i][M] -= t2-TOTFREQ; - } - - // Store frequency table - // i - if (rle_i) { - rle_i--; - } else { - *cp++ = i; - // FIXME: could use order-0 statistics to observe which alphabet - // symbols are present and base RLE on that ordering instead. - if (i && T[i-1]) { - for(rle_i=i+1; rle_i<256 && T[rle_i]; rle_i++) - ; - rle_i -= i+1; - *cp++ = rle_i; - } - } - - int *F_i_ = F[i]; - x = 0; - rle_j = 0; - for (j = 0; j < 256; j++) { - if (F_i_[j]) { - //fprintf(stderr, "F[%d][%d]=%d, x=%d\n", i, j, F_i_[j], x); - - // j - if (rle_j) { - rle_j--; - } else { - *cp++ = j; - if (!rle_j && j && F_i_[j-1]) { - for(rle_j=j+1; rle_j<256 && F_i_[rle_j]; rle_j++) - ; - rle_j -= j+1; - *cp++ = rle_j; - } - } - - // F_i_[j] - if (F_i_[j]<128) { - *cp++ = F_i_[j]; - } else { - *cp++ = 128 | (F_i_[j]>>8); - *cp++ = F_i_[j]&0xff; - } - - RansEncSymbolInit(&syms[i][j], x, F_i_[j], TF_SHIFT); - x += F_i_[j]; - } - } - *cp++ = 0; - } - *cp++ = 0; - - //write(2, out_buf+4, cp-(out_buf+4)); - tab_size = cp - out_buf; - assert(tab_size < 257*257*3); - - RansState rans0, rans1, rans2, rans3; - RansEncInit(&rans0); - RansEncInit(&rans1); - RansEncInit(&rans2); - RansEncInit(&rans3); - - uint8_t* ptr = out_end; - - int isz4 = in_size>>2; - int i0 = 1*isz4-2; - int i1 = 2*isz4-2; - int i2 = 3*isz4-2; - int i3 = 4*isz4-2; - - unsigned char l0 = in[i0+1]; - unsigned char l1 = in[i1+1]; - unsigned char l2 = in[i2+1]; - unsigned char l3 = in[i3+1]; - - // Deal with the remainder - l3 = in[in_size-1]; - for (i3 = in_size-2; i3 > 4*isz4-2; i3--) { - unsigned char c3 = in[i3]; - RansEncPutSymbol(&rans3, &ptr, &syms[c3][l3]); - l3 = c3; - } - - for (; likely(i0 >= 0); i0--, i1--, i2--, i3--) { - unsigned char c3 = in[i3]; - unsigned char c2 = in[i2]; - unsigned char c1 = in[i1]; - unsigned char c0 = in[i0]; - - RansEncSymbol *s3 = &syms[c3][l3]; - RansEncSymbol *s2 = &syms[c2][l2]; - RansEncSymbol *s1 = &syms[c1][l1]; - RansEncSymbol *s0 = &syms[c0][l0]; - - RansEncPutSymbol4(&rans3, &rans2, &rans1, &rans0, &ptr, - s3, s2, s1, s0); - - l3 = c3; - l2 = c2; - l1 = c1; - l0 = c0; - } - - RansEncPutSymbol(&rans3, &ptr, &syms[0][l3]); - RansEncPutSymbol(&rans2, &ptr, &syms[0][l2]); - RansEncPutSymbol(&rans1, &ptr, &syms[0][l1]); - RansEncPutSymbol(&rans0, &ptr, &syms[0][l0]); - - RansEncFlush(&rans3, &ptr); - RansEncFlush(&rans2, &ptr); - RansEncFlush(&rans1, &ptr); - RansEncFlush(&rans0, &ptr); - - *out_size = (out_end - ptr) + tab_size; - - cp = out_buf; - *cp++ = 1; // order - - *cp++ = ((*out_size-9)>> 0) & 0xff; - *cp++ = ((*out_size-9)>> 8) & 0xff; - *cp++ = ((*out_size-9)>>16) & 0xff; - *cp++ = ((*out_size-9)>>24) & 0xff; - - *cp++ = (in_size>> 0) & 0xff; - *cp++ = (in_size>> 8) & 0xff; - *cp++ = (in_size>>16) & 0xff; - *cp++ = (in_size>>24) & 0xff; - - memmove(out_buf + tab_size, ptr, out_end-ptr); - - cleanup: - htscodecs_tls_free(syms); - - return out_buf; -} - -static -unsigned char *rans_uncompress_O1(unsigned char *in, unsigned int in_size, - unsigned int *out_size) { - /* Load in the static tables */ - unsigned char *cp = in + 9; - unsigned char *ptr_end = in + in_size; - int i, j = -999, rle_i, rle_j; - unsigned int x; - unsigned int out_sz, in_sz; - char *out_buf = NULL; - - // Sanity checking - if (in_size < 27) // Need at least this many bytes to start - return NULL; - - if (*in++ != 1) // Order-1 check - return NULL; - - in_sz = ((in[0])<<0) | ((in[1])<<8) | ((in[2])<<16) | (((uint32_t)in[3])<<24); - out_sz = ((in[4])<<0) | ((in[5])<<8) | ((in[6])<<16) | (((uint32_t)in[7])<<24); - if (in_sz != in_size-9) - return NULL; - - if (out_sz >= INT_MAX) - return NULL; // protect against some overflow cases - - // For speeding up the fuzzer only. - // Small input can lead to large uncompressed data. - // We reject this as it just slows things up instead of testing more code - // paths (once we've verified a few times for large data). -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (out_sz > 100000) - return NULL; -#endif - - // Allocate decoding lookup tables - RansDecSymbol32 (*syms)[256]; - uint8_t *mem = htscodecs_tls_calloc(256, sizeof(ari_decoder) - + sizeof(*syms)); - if (!mem) - return NULL; - ari_decoder *const D = (ari_decoder *)mem; - syms = (RansDecSymbol32 (*)[256])(mem + 256*sizeof(ari_decoder)); - int16_t map[256], map_i = 0; - - memset(map, -1, 256*sizeof(*map)); - - if (!D) goto cleanup; - /* These memsets prevent illegal memory access in syms due to - broken compressed data. As D is calloc'd, all illegal transitions - will end up in either row or column 0 of syms. */ - memset(&syms[0], 0, sizeof(syms[0])); - for (i = 0; i < 256; i++) - memset(&syms[i][0], 0, sizeof(syms[0][0])); - - //fprintf(stderr, "out_sz=%d\n", out_sz); - - //i = *cp++; - rle_i = 0; - i = *cp++; - do { - // Map arbitrary a,b,c to 0,1,2 to improve cache locality. - if (map[i] == -1) - map[i] = map_i++; - int m_i = map[i]; - - rle_j = x = 0; - j = *cp++; - do { - if (map[j] == -1) - map[j] = map_i++; - - int F, C; - if (cp > ptr_end - 16) goto cleanup; // Not enough input bytes left - if ((F = *cp++) >= 128) { - F &= ~128; - F = ((F & 127) << 8) | *cp++; - } - C = x; - - //fprintf(stderr, "i=%d j=%d F=%d C=%d\n", i, j, F, C); - - if (unlikely(!F)) - F = TOTFREQ; - - RansDecSymbolInit32(&syms[m_i][j], C, F); - - /* Build reverse lookup table */ - //if (!D[i].R) D[i].R = (unsigned char *)malloc(TOTFREQ); - if (x + F > TOTFREQ) - goto cleanup; - - memset(&D[m_i].R[x], j, F); - x += F; - - if (!rle_j && j+1 == *cp) { - j = *cp++; - rle_j = *cp++; - } else if (rle_j) { - rle_j--; - j++; - if (j > 255) - goto cleanup; - } else { - j = *cp++; - } - } while(j); - - if (x < TOTFREQ-1 || x > TOTFREQ) - goto cleanup; - if (x < TOTFREQ) // historically we fill 4095, not 4096 - D[i].R[x] = D[i].R[x-1]; - - if (!rle_i && i+1 == *cp) { - i = *cp++; - rle_i = *cp++; - } else if (rle_i) { - rle_i--; - i++; - if (i > 255) - goto cleanup; - } else { - i = *cp++; - } - } while (i); - for (i = 0; i < 256; i++) - if (map[i] == -1) - map[i] = 0; - - RansState rans0, rans1, rans2, rans3; - uint8_t *ptr = cp; - if (cp > ptr_end - 16) goto cleanup; // Not enough input bytes left - RansDecInit(&rans0, &ptr); if (rans0 < RANS_BYTE_L) goto cleanup; - RansDecInit(&rans1, &ptr); if (rans1 < RANS_BYTE_L) goto cleanup; - RansDecInit(&rans2, &ptr); if (rans2 < RANS_BYTE_L) goto cleanup; - RansDecInit(&rans3, &ptr); if (rans3 < RANS_BYTE_L) goto cleanup; - - RansState R[4]; - R[0] = rans0; - R[1] = rans1; - R[2] = rans2; - R[3] = rans3; - - unsigned int isz4 = out_sz>>2; - uint32_t l0 = 0; - uint32_t l1 = 0; - uint32_t l2 = 0; - uint32_t l3 = 0; - - unsigned int i4[] = {0*isz4, 1*isz4, 2*isz4, 3*isz4}; - - /* Allocate output buffer */ - out_buf = malloc(out_sz); - if (!out_buf) goto cleanup; - - uint8_t cc0 = D[map[l0]].R[R[0] & ((1u << TF_SHIFT)-1)]; - uint8_t cc1 = D[map[l1]].R[R[1] & ((1u << TF_SHIFT)-1)]; - uint8_t cc2 = D[map[l2]].R[R[2] & ((1u << TF_SHIFT)-1)]; - uint8_t cc3 = D[map[l3]].R[R[3] & ((1u << TF_SHIFT)-1)]; - - ptr_end -= 8; - for (; likely(i4[0] < isz4); i4[0]++, i4[1]++, i4[2]++, i4[3]++) { - // seq4-head2: file q40b - // O3 O2 - // gcc7 296/291 290/260 - // gcc10 292/292 290/261 - // gcc11 293/293 290/265 - // gcc12 293/290 291/266 - // clang10 293/290 296/272 - // clang13 300/290 290/266 - out_buf[i4[0]] = cc0; - out_buf[i4[1]] = cc1; - out_buf[i4[2]] = cc2; - out_buf[i4[3]] = cc3; - - RansDecSymbol32 s[4] = { - syms[l0][cc0], - syms[l1][cc1], - syms[l2][cc2], - syms[l3][cc3], - }; - RansDecAdvanceStep(&R[0], s[0].start, s[0].freq, TF_SHIFT); - RansDecAdvanceStep(&R[1], s[1].start, s[1].freq, TF_SHIFT); - RansDecAdvanceStep(&R[2], s[2].start, s[2].freq, TF_SHIFT); - RansDecAdvanceStep(&R[3], s[3].start, s[3].freq, TF_SHIFT); - - // Likely here helps speed of high-entropy data by 10-11%, - // but harms low entropy-data speed by 3-4%. - if ((ptr < ptr_end)) { - RansDecRenorm2(&R[0], &R[1], &ptr); - RansDecRenorm2(&R[2], &R[3], &ptr); - } else { - RansDecRenormSafe(&R[0], &ptr, ptr_end+8); - RansDecRenormSafe(&R[1], &ptr, ptr_end+8); - RansDecRenormSafe(&R[2], &ptr, ptr_end+8); - RansDecRenormSafe(&R[3], &ptr, ptr_end+8); - } - - l0 = map[cc0]; - l1 = map[cc1]; - l2 = map[cc2]; - l3 = map[cc3]; - - cc0 = D[l0].R[R[0] & ((1u << TF_SHIFT)-1)]; - cc1 = D[l1].R[R[1] & ((1u << TF_SHIFT)-1)]; - cc2 = D[l2].R[R[2] & ((1u << TF_SHIFT)-1)]; - cc3 = D[l3].R[R[3] & ((1u << TF_SHIFT)-1)]; - } - - // Remainder - for (; i4[3] < out_sz; i4[3]++) { - unsigned char c3 = D[l3].R[RansDecGet(&R[3], TF_SHIFT)]; - out_buf[i4[3]] = c3; - - uint32_t m = R[3] & ((1u << TF_SHIFT)-1); - R[3] = syms[l3][c3].freq * (R[3]>>TF_SHIFT) + m - syms[l3][c3].start; - RansDecRenormSafe(&R[3], &ptr, ptr_end+8); - l3 = map[c3]; - } - - *out_size = out_sz; - - cleanup: - htscodecs_tls_free(D); - - return (unsigned char *)out_buf; -} - -/*----------------------------------------------------------------------------- - * Simple interface to the order-0 vs order-1 encoders and decoders. - */ -unsigned char *rans_compress(unsigned char *in, unsigned int in_size, - unsigned int *out_size, int order) { - if (in_size > INT_MAX) { - *out_size = 0; - return NULL; - } - - return order - ? rans_compress_O1(in, in_size, out_size) - : rans_compress_O0(in, in_size, out_size); -} - -unsigned char *rans_uncompress(unsigned char *in, unsigned int in_size, - unsigned int *out_size) { - /* Both rans_uncompress functions need to be able to read at least 9 - bytes. */ - if (in_size < 9) - return NULL; - return in[0] - ? rans_uncompress_O1(in, in_size, out_size) - : rans_uncompress_O0(in, in_size, out_size); -} diff --git a/src/htslib-1.18/htscodecs/htscodecs/rANS_static16_int.h b/src/htslib-1.18/htscodecs/htscodecs/rANS_static16_int.h deleted file mode 100644 index 96dc848..0000000 --- a/src/htslib-1.18/htscodecs/htscodecs/rANS_static16_int.h +++ /dev/null @@ -1,643 +0,0 @@ -#ifndef RANS_INTERNAL_H -#define RANS_INTERNAL_H - -#include "config.h" -#include "varint.h" -#include "utils.h" - -/* - * Copyright (c) 2017-2022 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -// Internal: common parts to all the rANSNx16pr implementations. - -// As per standard rANS_static but using optional RLE or bit-packing -// techniques prior to entropy encoding. This is a significant -// reduction in some data sets. - -// top bits in order byte -#define X_PACK 0x80 // Pack 2,4,8 or infinite symbols into a byte. -#define X_RLE 0x40 // Run length encoding with runs & lits encoded separately -#define X_CAT 0x20 // Nop; for tiny segments where rANS overhead is too big -#define X_NOSZ 0x10 // Don't store the original size; used by STRIPE mode -#define X_STRIPE 0x08 // For N-byte integer data; rotate & encode N streams. -#define X_32 0x04 // 32-way unrolling instead of 4-way - -// Not part of the file format, but used to direct the encoder -#define X_SIMD_AUTO 0x100 // automatically enable X_32 if we deem it worthy -#define X_SW32_ENC 0x200 // forcibly use the software version of X_32 -#define X_SW32_DEC 0x400 // forcibly use the software version of X_32 -#define X_NO_AVX512 0x800 // turn off avx512, but permits AVX2 - -#define TF_SHIFT 12 -#define TOTFREQ (1<> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - v++; - return v; -} - -static inline int normalise_freq(uint32_t *F, int size, uint32_t tot) { - int m, M, j, loop = 0; - uint64_t tr; - if (!size) - return 0; - - again: - tr = ((uint64_t)tot<<31)/size + (1<<30)/size; - - for (size = m = M = j = 0; j < 256; j++) { - if (!F[j]) - continue; - - if (m < F[j]) - m = F[j], M = j; - - if ((F[j] = (F[j]*tr)>>31) == 0) - F[j] = 1; - size += F[j]; -// if (F[j] == tot) -// F[j]--; - } - - int adjust = tot - size; - if (adjust > 0) { - F[M] += adjust; - } else if (adjust < 0) { - if (F[M] > -adjust && (loop == 1 || F[M]/2 >= -adjust)) { - F[M] += adjust; - } else { - if (loop < 1) { - loop++; - goto again; - } - adjust += F[M]-1; - F[M] = 1; - for (j = 0; adjust && j < 256; j++) { - if (F[j] < 2) continue; - - int d = F[j] > -adjust; - int m = d ? adjust : 1-F[j]; - F[j] += m; - adjust -= m; - } - } - } - - //printf("F[%d]=%d\n", M, F[M]); - return F[M]>0 ? 0 : -1; -} - -// A specialised version of normalise_freq_shift where the input size -// is already normalised to a power of 2, meaning we can just perform -// shifts instead of hard to define multiplications and adjustments. -static inline void normalise_freq_shift(uint32_t *F, uint32_t size, - uint32_t max_tot) { - if (size == 0 || size == max_tot) - return; - - int shift = 0, i; - while (size < max_tot) - size*=2, shift++; - - for (i = 0; i < 256; i++) - F[i] <<= shift; -} - -// symbols only -static inline int encode_alphabet(uint8_t *cp, uint32_t *F) { - uint8_t *op = cp; - int rle, j; - - for (rle = j = 0; j < 256; j++) { - if (F[j]) { - // j - if (rle) { - rle--; - } else { - *cp++ = j; - if (!rle && j && F[j-1]) { - for(rle=j+1; rle<256 && F[rle]; rle++) - ; - rle -= j+1; - *cp++ = rle; - } - //fprintf(stderr, "%d: %d %d\n", j, rle, N[j]); - } - } - } - *cp++ = 0; - - return cp - op; -} - -static inline int decode_alphabet(uint8_t *cp, uint8_t *cp_end, uint32_t *F) { - if (cp == cp_end) - return 0; - - uint8_t *op = cp; - int rle = 0; - int j = *cp++; - if (cp+2 >= cp_end) - goto carefully; - - do { - F[j] = 1; - if (!rle && j+1 == *cp) { - j = *cp++; - rle = *cp++; - } else if (rle) { - rle--; - j++; - if (j > 255) - return 0; - } else { - j = *cp++; - } - } while(j && cp+2 < cp_end); - - carefully: - if (j) { - do { - F[j] = 1; - if(cp >= cp_end) return 0; - if (!rle && j+1 == *cp) { - if (cp+1 >= cp_end) return 0; - j = *cp++; - rle = *cp++; - } else if (rle) { - rle--; - j++; - if (j > 255) - return 0; - } else { - if (cp >= cp_end) return 0; - j = *cp++; - } - } while(j && cp < cp_end); - } - - return cp - op; -} - -static inline int encode_freq(uint8_t *cp, uint32_t *F) { - uint8_t *op = cp; - int j; - - cp += encode_alphabet(cp, F); - - for (j = 0; j < 256; j++) { - if (F[j]) - cp += var_put_u32(cp, NULL, F[j]); - } - - return cp - op; -} - -static inline int decode_freq(uint8_t *cp, uint8_t *cp_end, uint32_t *F, - uint32_t *fsum) { - if (cp == cp_end) - return 0; - - uint8_t *op = cp; - cp += decode_alphabet(cp, cp_end, F); - - int j, tot = 0; - for (j = 0; j < 256; j++) { - if (F[j]) { - cp += var_get_u32(cp, cp_end, (unsigned int *)&F[j]); - tot += F[j]; - } - } - - *fsum = tot; - return cp - op; -} - - -// Use the order-0 freqs in F0 to encode the order-1 stats in F. -// All symbols present in F are present in F0, but some in F0 will -// be empty in F. Thus we run-length encode the 0 frequencies. -static inline int encode_freq_d(uint8_t *cp, uint32_t *F0, uint32_t *F) { - uint8_t *op = cp; - int j, dz; - - for (dz = j = 0; j < 256; j++) { - if (F0[j]) { - if (F[j] != 0) { - if (dz) { - // Replace dz zeros with zero + dz-1 run length - cp -= dz-1; - *cp++ = dz-1; - } - dz = 0; - cp += var_put_u32(cp, NULL, F[j]); - } else { - //fprintf(stderr, "2: j=%d F0[j]=%d, F[j]=%d, dz=%d\n", j, F0[j], F[j], dz); - dz++; - *cp++ = 0; - } - } else { - assert(F[j] == 0); - } - } - - if (dz) { - cp -= dz-1; - *cp++ = dz-1; - } - - return cp - op; -} - -// Normalise frequency total T[i] to match TOTFREQ_O1 and encode. -// Also initialises the RansEncSymbol structs. -// -// Returns the desired TF_SHIFT; 10 or 12 bit, or -1 on error. -static inline int encode_freq1(uint8_t *in, uint32_t in_size, int Nway, - RansEncSymbol syms[256][256], uint8_t **cp_p) { - int tab_size = 0, i, j, z; - uint8_t *out = *cp_p, *cp = out; - - // Compute O1 frequency statistics - uint32_t (*F)[256] = htscodecs_tls_calloc(256, (sizeof(*F))); - if (!F) - return -1; - uint32_t T[256+MAGIC] = {0}; - int isz4 = in_size/Nway; - if (hist1_4(in, in_size, F, T) < 0) - goto err; - for (z = 1; z < Nway; z++) - F[0][in[z*isz4]]++; - T[0]+=Nway-1; - - // Potential fix for the wrap-around bug in AVX2 O1 encoder with shift=12. - // This occurs when we have one single symbol, giving freq=4096. - // We fix it elsewhere for now by looking for the wrap-around. - // See "if (1)" statements in the AVX2 code, which is an alternative - // to the "if (0)" here. -// if (0) { -// int x = -1, y = -1; -// int n1, n2; -// for (x = 0; x < 256; x++) { -// n1 = n2 = -1; -// for (y = 0; y < 256; y++) { -// if (F[x][y]) -// n2 = n1, n1 = y; -// } -// if (n2!=-1 || n1 == -1) -// continue; -// -// for (y = 0; y < 256; y++) -// if (!F[x][y]) -// break; -// assert(y<256); -// F[x][y]++; -// F[0][y]++; T[y]++; F0[y]=1; -// F[0][x]++; T[x]++; F0[x]=1; -// } -// } - - // Encode the order-0 stats - int tmp_T0 = T[0]; - T[0] = 1; - *cp++ = 0; // marker for uncompressed (may change) - cp += encode_alphabet(cp, T); - T[0] = tmp_T0; - - // Decide between 10-bit and 12-bit freqs. - // Fills out S[] to hold the new scaled maximum value. - uint32_t S[256] = {0}; - int shift = rans_compute_shift(T, F, T, S); - - // Normalise so T[i] == TOTFREQ_O1 - for (i = 0; i < 256; i++) { - unsigned int x; - - if (T[i] == 0) - continue; - - uint32_t max_val = S[i]; - if (shift == TF_SHIFT_O1_FAST && max_val > TOTFREQ_O1_FAST) - max_val = TOTFREQ_O1_FAST; - - if (normalise_freq(F[i], T[i], max_val) < 0) - goto err; - T[i]=max_val; - - // Encode our frequency array - cp += encode_freq_d(cp, T, F[i]); - - normalise_freq_shift(F[i], T[i], 1< 1000) { - uint8_t *op = out; - // try rans0 compression of header - unsigned int u_freq_sz = cp-(op+1); - unsigned int c_freq_sz; - unsigned char *c_freq = rans_compress_O0_4x16(op+1, u_freq_sz, NULL, - &c_freq_sz); - if (c_freq && c_freq_sz + 6 < cp-op) { - *op++ |= 1; // compressed - op += var_put_u32(op, NULL, u_freq_sz); - op += var_put_u32(op, NULL, c_freq_sz); - memcpy(op, c_freq, c_freq_sz); - cp = op+c_freq_sz; - } - free(c_freq); - } - - tab_size = cp - out; - assert(tab_size < 257*257*3); - - *cp_p = cp; - htscodecs_tls_free(F); - return shift; - - err: - htscodecs_tls_free(F); - return -1; -} - -// Part of decode_freq1 below. This decodes an order-1 frequency table -// using an order-0 table to determine which stats may be stored. -static inline int decode_freq_d(uint8_t *cp, uint8_t *cp_end, uint32_t *F0, - uint32_t *F, uint32_t *total) { - if (cp == cp_end) - return 0; - - uint8_t *op = cp; - int j, dz, T = 0; - - for (j = dz = 0; j < 256 && cp < cp_end; j++) { - //if (F0[j]) fprintf(stderr, "F0[%d]=%d\n", j, F0[j]); - if (!F0[j]) - continue; - - uint32_t f; - if (dz) { - f = 0; - dz--; - } else { - if (cp >= cp_end) return 0; - cp += var_get_u32(cp, cp_end, &f); - if (f == 0) { - if (cp >= cp_end) return 0; - dz = *cp++; - } - } - F[j] = f; - T += f; - } - - if (total) *total = T; - return cp - op; -} - -typedef struct { - uint16_t f; - uint16_t b; -} fb_t; - -// Decode order-1 frequency table, filling out various lookup tables -// in the process. (Which will depend on shift and which values have -// been passed in.) -// -// Returns the number of bytes decoded. -static inline int decode_freq1(uint8_t *cp, uint8_t *cp_end, int shift, - uint32_t s3 [256][TOTFREQ_O1], - uint32_t s3F[256][TOTFREQ_O1_FAST], - uint8_t *sfb[256], fb_t fb[256][256]) { - uint8_t *cp_start = cp; - int i, j, x; - uint32_t F0[256] = {0}; - int fsz = decode_alphabet(cp, cp_end, F0); - if (!fsz) - goto err; - cp += fsz; - - if (cp >= cp_end) - goto err; - - // silence false gcc warnings - if (fb) {fb [0][0].b= 0;} - if (s3) {s3 [0][0] = 0;} - if (s3F){s3F[0][0] = 0;} - - for (i = 0; i < 256; i++) { - if (F0[i] == 0) - continue; - - uint32_t F[256] = {0}, T = 0; - fsz = decode_freq_d(cp, cp_end, F0, F, &T); - if (!fsz) - goto err; - cp += fsz; - - if (!T) { - //fprintf(stderr, "No freq for F_%d\n", i); - continue; - } - - normalise_freq_shift(F, T, 1< (1< (1< - -// Our own implementation of _mm256_set_m128i as it's not there on older -// gcc implementations. This is basically the same thing. -static inline __m256i _mm256_set_m128ix(__m128i H, __m128i L) { - return _mm256_insertf128_si256(_mm256_castsi128_si256(L), H, 1); -} - -static inline void rot32_simd(uint8_t t[32][32], uint8_t *out, int iN[32]) { - int z; - - __m256i lh8[32]; - for (z = 0; z < 32/2; z+=2) { - __m256i a, b, c, d; - a = _mm256_loadu_si256((__m256i *)&t[z*2+0]); - b = _mm256_loadu_si256((__m256i *)&t[z*2+1]); - c = _mm256_loadu_si256((__m256i *)&t[z*2+2]); - d = _mm256_loadu_si256((__m256i *)&t[z*2+3]); - - lh8[z+0] = _mm256_unpacklo_epi8(a, b); - lh8[z+16] = _mm256_unpackhi_epi8(a, b); - lh8[z+1] = _mm256_unpacklo_epi8(c, d); - lh8[z+17] = _mm256_unpackhi_epi8(c, d); - } - - __m256i lh32[32]; - for (z = 0; z < 32/4; z+=2) { - __m256i a, b, c, d; - a = _mm256_unpacklo_epi16(lh8[z*4+0], lh8[z*4+1]); - b = _mm256_unpacklo_epi16(lh8[z*4+2], lh8[z*4+3]); - c = _mm256_unpackhi_epi16(lh8[z*4+0], lh8[z*4+1]); - d = _mm256_unpackhi_epi16(lh8[z*4+2], lh8[z*4+3]); - - __m256i e, f, g, h; - e = _mm256_unpacklo_epi16(lh8[(z+1)*4+0], lh8[(z+1)*4+1]); - f = _mm256_unpacklo_epi16(lh8[(z+1)*4+2], lh8[(z+1)*4+3]); - g = _mm256_unpackhi_epi16(lh8[(z+1)*4+0], lh8[(z+1)*4+1]); - h = _mm256_unpackhi_epi16(lh8[(z+1)*4+2], lh8[(z+1)*4+3]); - - lh32[z+0] = _mm256_unpacklo_epi32(a,b); - lh32[z+8] = _mm256_unpacklo_epi32(c,d); - lh32[z+16] = _mm256_unpackhi_epi32(a,b); - lh32[z+24] = _mm256_unpackhi_epi32(c,d); - - lh32[z+1+0] = _mm256_unpacklo_epi32(e,f); - lh32[z+1+8] = _mm256_unpacklo_epi32(g,h); - lh32[z+1+16] = _mm256_unpackhi_epi32(e,f); - lh32[z+1+24] = _mm256_unpackhi_epi32(g,h); - } - - // Final unpack 64 and store - int idx[] = {0, 8, 4, 12, 2, 10, 6, 14}; - for (z = 0; z < 8; z++) { - int i = idx[z]; - - // Putting this here doesn't soeed things up - __m256i a = _mm256_unpacklo_epi64(lh32[i*2+0], lh32[i*2+1]); - __m256i b = _mm256_unpacklo_epi64(lh32[i*2+2], lh32[i*2+3]); - __m256i c = _mm256_unpackhi_epi64(lh32[i*2+0], lh32[i*2+1]); - __m256i d = _mm256_unpackhi_epi64(lh32[i*2+2], lh32[i*2+3]); - - __m256i p = _mm256_set_m128ix(_mm256_extracti128_si256(b,0), - _mm256_extracti128_si256(a,0)); - __m256i q = _mm256_set_m128ix(_mm256_extracti128_si256(d,0), - _mm256_extracti128_si256(c,0)); - __m256i r = _mm256_set_m128ix(_mm256_extracti128_si256(b,1), - _mm256_extracti128_si256(a,1)); - __m256i s = _mm256_set_m128ix(_mm256_extracti128_si256(d,1), - _mm256_extracti128_si256(c,1)); - - _mm256_storeu_si256((__m256i *)(&out[iN[z*2+0]]), p); - _mm256_storeu_si256((__m256i *)(&out[iN[z*2+1]]), q); - _mm256_storeu_si256((__m256i *)(&out[iN[z*2+16]]), r); - _mm256_storeu_si256((__m256i *)(&out[iN[z*2+17]]), s); - } - - // Store - for (z = 0; z < 32; z++) - iN[z] += 32; -} -#endif - -#endif // RANS_INTERNAL_H diff --git a/src/htslib-1.18/htscodecs/htscodecs/rANS_static32x16pr_avx2.c b/src/htslib-1.18/htscodecs/htscodecs/rANS_static32x16pr_avx2.c deleted file mode 100644 index bb4eae1..0000000 --- a/src/htslib-1.18/htscodecs/htscodecs/rANS_static32x16pr_avx2.c +++ /dev/null @@ -1,1650 +0,0 @@ -/* - * Copyright (c) 2017-2023 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#if defined(__x86_64__) && defined(HAVE_AVX2) - -#include -#include -#include -#include -#include -#include - -#include "rANS_word.h" -#include "rANS_static4x16.h" -#define ROT32_SIMD -#include "rANS_static16_int.h" -#include "varint.h" -#include "utils.h" -#include "permute.h" - -#define NX 32 - -#define LOAD1(a,b) __m256i a##1 = _mm256_load_si256((__m256i *)&b[0]); -#define LOAD2(a,b) __m256i a##2 = _mm256_load_si256((__m256i *)&b[8]); -#define LOAD3(a,b) __m256i a##3 = _mm256_load_si256((__m256i *)&b[16]); -#define LOAD4(a,b) __m256i a##4 = _mm256_load_si256((__m256i *)&b[24]); -#define LOAD(a,b) LOAD1(a,b);LOAD2(a,b);LOAD3(a,b);LOAD4(a,b) - -#define STORE1(a,b) _mm256_store_si256((__m256i *)&b[0], a##1); -#define STORE2(a,b) _mm256_store_si256((__m256i *)&b[8], a##2); -#define STORE3(a,b) _mm256_store_si256((__m256i *)&b[16], a##3); -#define STORE4(a,b) _mm256_store_si256((__m256i *)&b[24], a##4); -#define STORE(a,b) STORE1(a,b);STORE2(a,b);STORE3(a,b);STORE4(a,b) - -// _mm256__mul_epu32 is: -// -b -d -f -h -//* -q -s -u -w -//= BQ DS FU HW where BQ=b*q etc -// -// We want -// abcd efgh (a) -// *pqrs tuvw (b) -// =ABCD EFGH -// -// a mul b => BQ DS FU HW -// >>= 8 => -B QD SF UH -// & => -B -D -F -H (1) -// a>>8 mul b>>8 => AP CR ET GV -// & => A- C- E- G- -// | with (1) => AB CD EF GH -#if 0 -static __m256i _mm256_mulhi_epu32(__m256i a, __m256i b) { - __m256i ab_lm = _mm256_mul_epu32(a, b); - ab_lm = _mm256_srli_epi64(ab_lm, 32); - a = _mm256_srli_epi64(a, 32); - - ab_lm = _mm256_and_si256(ab_lm, _mm256_set1_epi64x(0xffffffff)); - b = _mm256_srli_epi64(b, 32); - - __m256i ab_hm = _mm256_mul_epu32(a, b); - - //return _mm256_blend_epi32(ab_lm, ab_hm, 0xaa); - ab_hm = _mm256_and_si256(ab_hm, - _mm256_set1_epi64x((uint64_t)0xffffffff00000000)); - ab_hm = _mm256_or_si256(ab_hm, ab_lm); - - return ab_hm; -} -#else -static inline __m256i _mm256_mulhi_epu32(__m256i a, __m256i b) { - // Multiply bottom 4 items and top 4 items together. - __m256i ab_hm = _mm256_mul_epu32(_mm256_srli_epi64(a, 32), - _mm256_srli_epi64(b, 32)); - __m256i ab_lm = _mm256_srli_epi64(_mm256_mul_epu32(a, b), 32); - - return _mm256_blend_epi32(ab_lm, ab_hm, 0xaa); -// -// // Shift to get hi 32-bit of each 64-bit product -// ab_hm = _mm256_and_si256(ab_hm, -// _mm256_set1_epi64x((uint64_t)0xffffffff00000000)); -// -// return _mm256_or_si256(ab_lm, ab_hm); -} -#endif - -#if 0 -// Simulated gather. This is sometimes faster as it can run on other ports. -static inline __m256i _mm256_i32gather_epi32x(int *b, __m256i idx, int size) { - int c[8] __attribute__((aligned(32))); - _mm256_store_si256((__m256i *)c, idx); - return _mm256_set_epi32(b[c[7]], b[c[6]], b[c[5]], b[c[4]], - b[c[3]], b[c[2]], b[c[1]], b[c[0]]); -} -#else -#define _mm256_i32gather_epi32x _mm256_i32gather_epi32 -#endif - -unsigned char *rans_compress_O0_32x16_avx2(unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int *out_size) { - unsigned char *cp, *out_end; - RansEncSymbol syms[256]; - RansState ransN[NX] __attribute__((aligned(32))); - uint8_t* ptr; - uint32_t F[256+MAGIC] = {0}; - int i, j, tab_size = 0, x, z; - // -20 for order/size/meta - uint32_t bound = rans_compress_bound_4x16(in_size,0)-20; - - if (!out) { - *out_size = bound; - out = malloc(*out_size); - } - if (!out || bound > *out_size) - return NULL; - - // If "out" isn't word aligned, tweak out_end/ptr to ensure it is. - // We already added more round in bound to allow for this. - if (((size_t)out)&1) - bound--; - ptr = out_end = out + bound; - - if (in_size == 0) - goto empty; - - // Compute statistics - if (hist8(in, in_size, F) < 0) - return NULL; - - // Normalise so frequences sum to power of 2 - uint32_t fsum = in_size; - uint32_t max_val = round2(fsum); - if (max_val > TOTFREQ) - max_val = TOTFREQ; - - if (normalise_freq(F, fsum, max_val) < 0) - return NULL; - fsum=max_val; - - cp = out; - cp += encode_freq(cp, F); - tab_size = cp-out; - //write(2, out+4, cp-(out+4)); - - if (normalise_freq(F, fsum, TOTFREQ) < 0) - return NULL; - - // Encode statistics. - for (x = j = 0; j < 256; j++) { - if (F[j]) { - RansEncSymbolInit(&syms[j], x, F[j], TF_SHIFT); - x += F[j]; - } - } - - for (z = 0; z < NX; z++) - RansEncInit(&ransN[z]); - - z = i = in_size&(NX-1); - while (z-- > 0) - RansEncPutSymbol(&ransN[z], &ptr, &syms[in[in_size-(i-z)]]); - - uint16_t *ptr16 = (uint16_t *)ptr; - - LOAD(Rv, ransN); - - for (i=(in_size &~(NX-1)); i>0; i-=NX) { - // We need to gather sym[curr_char][last_char] structs. - // These hold 4 32-bit values, so are 128 bit each, and - // are loaded from 32 distinct addresses. - // - // We load them into 32 128-bit lanes and then combine to get - // 16 avx-256 registers. - // These are now ABCD ABCD ABCD ABCD... orientation - // We can then "gather" from these registers via a combination - // of shuffle / permutes / and / or operations. This is less - // IO than repeating 4 sets of gathers/loads 32-times over. - - // DCBA holding 4 elements in a syms[] array - // -> 4-way rotate via shuffles - // [0] DCBA 11 10 01 00 E4 - // [1] CBAD 10 01 00 11 93 - // [2] BADC 01 00 11 10 4E - // [3] ADCB 00 11 10 01 39 - // - // Then AND to select relevant lanes and OR - // [0] ......A0 - // [1] ....A1.. - // [2] ..A2.... - // [3] A3...... OR to get A3A2A1A0 - // - // or: - // [0] ....B0.. - // [1] ..B1.... - // [2] B2...... - // [3] ......B3 OR to get B2B1B0B3 and shuffle to B3B2B1B0 - - __m256i sh[16]; - for (z = 0; z < 16; z+=4) { - int Z = i - NX + z*2; - -#define m128_to_256 _mm256_castsi128_si256 - __m256i t0, t1, t2, t3; - __m128i *s0, *s1, *s2, *s3; - s0 = (__m128i *)(&syms[in[Z+0]]); - s1 = (__m128i *)(&syms[in[Z+4]]); - s2 = (__m128i *)(&syms[in[Z+1]]); - s3 = (__m128i *)(&syms[in[Z+5]]); - - // FIXME: try load instead of loadu, as 128-bit aligned. - t0 = _mm256_shuffle_epi32(m128_to_256(_mm_loadu_si128(s0)), 0xE4); - t1 = _mm256_shuffle_epi32(m128_to_256(_mm_loadu_si128(s1)), 0xE4); - t2 = _mm256_shuffle_epi32(m128_to_256(_mm_loadu_si128(s2)), 0x93); - t3 = _mm256_shuffle_epi32(m128_to_256(_mm_loadu_si128(s3)), 0x93); - - sh[z+0] = _mm256_permute2x128_si256(t0, t1, 0x20); - sh[z+1] = _mm256_permute2x128_si256(t2, t3, 0x20); - - s0 = (__m128i *)(&syms[in[Z+2]]); - s1 = (__m128i *)(&syms[in[Z+6]]); - s2 = (__m128i *)(&syms[in[Z+3]]); - s3 = (__m128i *)(&syms[in[Z+7]]); - - t0 = _mm256_shuffle_epi32(m128_to_256(_mm_loadu_si128(s0)), 0x4E); - t1 = _mm256_shuffle_epi32(m128_to_256(_mm_loadu_si128(s1)), 0x4E); - t2 = _mm256_shuffle_epi32(m128_to_256(_mm_loadu_si128(s2)), 0x39); - t3 = _mm256_shuffle_epi32(m128_to_256(_mm_loadu_si128(s3)), 0x39); - - sh[z+2] = _mm256_permute2x128_si256(t0, t1, 0x20); - sh[z+3] = _mm256_permute2x128_si256(t2, t3, 0x20); - - // potential to set xmax, rf, bias, and SD in-situ here, removing - // the need to hold sh[] in regs. Doing so doesn't seem to speed - // things up though. - } - - __m256i xA = _mm256_set_epi32(0,0,0,-1, 0,0,0,-1); - __m256i xB = _mm256_set_epi32(0,0,-1,0, 0,0,-1,0); - __m256i xC = _mm256_set_epi32(0,-1,0,0, 0,-1,0,0); - __m256i xD = _mm256_set_epi32(-1,0,0,0, -1,0,0,0); - -#define SYM_LOAD(x, A, B, C, D) \ - _mm256_or_si256(_mm256_or_si256(_mm256_and_si256(sh[x+0], A), \ - _mm256_and_si256(sh[x+1], B)), \ - _mm256_or_si256(_mm256_and_si256(sh[x+2], C), \ - _mm256_and_si256(sh[x+3], D))) - - // Renorm: - // if (x > x_max) {*--ptr16 = x & 0xffff; x >>= 16;} - __m256i xmax1 = SYM_LOAD( 0, xA, xB, xC, xD); - __m256i xmax2 = SYM_LOAD( 4, xA, xB, xC, xD); - __m256i xmax3 = SYM_LOAD( 8, xA, xB, xC, xD); - __m256i xmax4 = SYM_LOAD(12, xA, xB, xC, xD); - - __m256i cv1 = _mm256_cmpgt_epi32(Rv1, xmax1); - __m256i cv2 = _mm256_cmpgt_epi32(Rv2, xmax2); - __m256i cv3 = _mm256_cmpgt_epi32(Rv3, xmax3); - __m256i cv4 = _mm256_cmpgt_epi32(Rv4, xmax4); - - // Store bottom 16-bits at ptr16 - unsigned int imask1 = _mm256_movemask_ps((__m256)cv1); - unsigned int imask2 = _mm256_movemask_ps((__m256)cv2); - unsigned int imask3 = _mm256_movemask_ps((__m256)cv3); - unsigned int imask4 = _mm256_movemask_ps((__m256)cv4); - - __m256i idx1 = _mm256_load_si256((const __m256i*)permutec[imask1]); - __m256i idx2 = _mm256_load_si256((const __m256i*)permutec[imask2]); - __m256i idx3 = _mm256_load_si256((const __m256i*)permutec[imask3]); - __m256i idx4 = _mm256_load_si256((const __m256i*)permutec[imask4]); - - // Permute; to gather together the rans states that need flushing - __m256i V1, V2, V3, V4; - V1 = _mm256_permutevar8x32_epi32(_mm256_and_si256(Rv1, cv1), idx1); - V2 = _mm256_permutevar8x32_epi32(_mm256_and_si256(Rv2, cv2), idx2); - V3 = _mm256_permutevar8x32_epi32(_mm256_and_si256(Rv3, cv3), idx3); - V4 = _mm256_permutevar8x32_epi32(_mm256_and_si256(Rv4, cv4), idx4); - - // We only flush bottom 16 bits, to squash 32-bit states into 16 bit. - V1 = _mm256_and_si256(V1, _mm256_set1_epi32(0xffff)); - V2 = _mm256_and_si256(V2, _mm256_set1_epi32(0xffff)); - V3 = _mm256_and_si256(V3, _mm256_set1_epi32(0xffff)); - V4 = _mm256_and_si256(V4, _mm256_set1_epi32(0xffff)); - __m256i V12 = _mm256_packus_epi32(V1, V2); - __m256i V34 = _mm256_packus_epi32(V3, V4); - - // It's BAba order, want BbAa so shuffle. - V12 = _mm256_permute4x64_epi64(V12, 0xd8); - V34 = _mm256_permute4x64_epi64(V34, 0xd8); - - // Now we have bottom N 16-bit values in each V12/V34 to flush - __m128i f = _mm256_extractf128_si256(V34, 1); - _mm_storeu_si128((__m128i *)(ptr16-8), f); - ptr16 -= _mm_popcnt_u32(imask4); - - f = _mm256_extractf128_si256(V34, 0); - _mm_storeu_si128((__m128i *)(ptr16-8), f); - ptr16 -= _mm_popcnt_u32(imask3); - - f = _mm256_extractf128_si256(V12, 1); - _mm_storeu_si128((__m128i *)(ptr16-8), f); - ptr16 -= _mm_popcnt_u32(imask2); - - f = _mm256_extractf128_si256(V12, 0); - _mm_storeu_si128((__m128i *)(ptr16-8), f); - ptr16 -= _mm_popcnt_u32(imask1); - - __m256i Rs; - Rs = _mm256_srli_epi32(Rv1,16); Rv1 = _mm256_blendv_epi8(Rv1, Rs, cv1); - Rs = _mm256_srli_epi32(Rv2,16); Rv2 = _mm256_blendv_epi8(Rv2, Rs, cv2); - Rs = _mm256_srli_epi32(Rv3,16); Rv3 = _mm256_blendv_epi8(Rv3, Rs, cv3); - Rs = _mm256_srli_epi32(Rv4,16); Rv4 = _mm256_blendv_epi8(Rv4, Rs, cv4); - - // Cannot trivially replace the multiply as mulhi_epu32 doesn't - // exist (only mullo). - // However we can use _mm256_mul_epu32 twice to get 64bit results - // (half our lanes) and shift/or to get the answer. - // - // (AVX512 allows us to hold it all in 64-bit lanes and use mullo_epi64 - // plus a shift. KNC has mulhi_epi32, but not sure if this is - // available.) - __m256i rfv1 = _mm256_shuffle_epi32(SYM_LOAD( 0, xB, xC, xD, xA),0x39); - __m256i rfv2 = _mm256_shuffle_epi32(SYM_LOAD( 4, xB, xC, xD, xA),0x39); - __m256i rfv3 = _mm256_shuffle_epi32(SYM_LOAD( 8, xB, xC, xD, xA),0x39); - __m256i rfv4 = _mm256_shuffle_epi32(SYM_LOAD(12, xB, xC, xD, xA),0x39); - - rfv1 = _mm256_mulhi_epu32(Rv1, rfv1); - rfv2 = _mm256_mulhi_epu32(Rv2, rfv2); - rfv3 = _mm256_mulhi_epu32(Rv3, rfv3); - rfv4 = _mm256_mulhi_epu32(Rv4, rfv4); - - __m256i SDv1 = _mm256_shuffle_epi32(SYM_LOAD( 0, xD, xA, xB, xC),0x93); - __m256i SDv2 = _mm256_shuffle_epi32(SYM_LOAD( 4, xD, xA, xB, xC),0x93); - __m256i SDv3 = _mm256_shuffle_epi32(SYM_LOAD( 8, xD, xA, xB, xC),0x93); - __m256i SDv4 = _mm256_shuffle_epi32(SYM_LOAD(12, xD, xA, xB, xC),0x93); - - __m256i shiftv1 = _mm256_srli_epi32(SDv1, 16); - __m256i shiftv2 = _mm256_srli_epi32(SDv2, 16); - __m256i shiftv3 = _mm256_srli_epi32(SDv3, 16); - __m256i shiftv4 = _mm256_srli_epi32(SDv4, 16); - - shiftv1 = _mm256_sub_epi32(shiftv1, _mm256_set1_epi32(32)); - shiftv2 = _mm256_sub_epi32(shiftv2, _mm256_set1_epi32(32)); - shiftv3 = _mm256_sub_epi32(shiftv3, _mm256_set1_epi32(32)); - shiftv4 = _mm256_sub_epi32(shiftv4, _mm256_set1_epi32(32)); - - __m256i qv1 = _mm256_srlv_epi32(rfv1, shiftv1); - __m256i qv2 = _mm256_srlv_epi32(rfv2, shiftv2); - - __m256i freqv1 = _mm256_and_si256(SDv1, _mm256_set1_epi32(0xffff)); - __m256i freqv2 = _mm256_and_si256(SDv2, _mm256_set1_epi32(0xffff)); - qv1 = _mm256_mullo_epi32(qv1, freqv1); - qv2 = _mm256_mullo_epi32(qv2, freqv2); - - __m256i qv3 = _mm256_srlv_epi32(rfv3, shiftv3); - __m256i qv4 = _mm256_srlv_epi32(rfv4, shiftv4); - - __m256i freqv3 = _mm256_and_si256(SDv3, _mm256_set1_epi32(0xffff)); - __m256i freqv4 = _mm256_and_si256(SDv4, _mm256_set1_epi32(0xffff)); - qv3 = _mm256_mullo_epi32(qv3, freqv3); - qv4 = _mm256_mullo_epi32(qv4, freqv4); - - __m256i biasv1=_mm256_shuffle_epi32(SYM_LOAD( 0, xC, xD, xA, xB),0x4E); - __m256i biasv2=_mm256_shuffle_epi32(SYM_LOAD( 4, xC, xD, xA, xB),0x4E); - __m256i biasv3=_mm256_shuffle_epi32(SYM_LOAD( 8, xC, xD, xA, xB),0x4E); - __m256i biasv4=_mm256_shuffle_epi32(SYM_LOAD(12, xC, xD, xA, xB),0x4E); - - qv1 = _mm256_add_epi32(qv1, biasv1); - qv2 = _mm256_add_epi32(qv2, biasv2); - qv3 = _mm256_add_epi32(qv3, biasv3); - qv4 = _mm256_add_epi32(qv4, biasv4); - - Rv1 = _mm256_add_epi32(Rv1, qv1); - Rv2 = _mm256_add_epi32(Rv2, qv2); - Rv3 = _mm256_add_epi32(Rv3, qv3); - Rv4 = _mm256_add_epi32(Rv4, qv4); - } - - STORE(Rv, ransN); - - ptr = (uint8_t *)ptr16; - for (z = NX-1; z >= 0; z--) - RansEncFlush(&ransN[z], &ptr); - - empty: - // Finalise block size and return it - *out_size = (out_end - ptr) + tab_size; - -// cp = out; -// *cp++ = (in_size>> 0) & 0xff; -// *cp++ = (in_size>> 8) & 0xff; -// *cp++ = (in_size>>16) & 0xff; -// *cp++ = (in_size>>24) & 0xff; - - memmove(out + tab_size, ptr, out_end-ptr); - - return out; -} - -unsigned char *rans_uncompress_O0_32x16_avx2(unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int out_sz) { - if (in_size < 16) // 4-states at least - return NULL; - - if (out_sz >= INT_MAX) - return NULL; // protect against some overflow cases - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (out_sz > 100000) - return NULL; -#endif - - /* Load in the static tables */ - unsigned char *cp = in, *out_free = NULL; - unsigned char *cp_end = in + in_size; - int i; - uint32_t s3[TOTFREQ] __attribute__((aligned(32))); // For TF_SHIFT <= 12 - - if (!out) - out_free = out = malloc(out_sz); - if (!out) - return NULL; - - // Precompute reverse lookup of frequency. - uint32_t F[256] = {0}, fsum; - int fsz = decode_freq(cp, cp_end, F, &fsum); - if (!fsz) - goto err; - cp += fsz; - - normalise_freq_shift(F, fsum, TOTFREQ); - - // Build symbols; fixme, do as part of decode, see the _d variant - if (rans_F_to_s3(F, TF_SHIFT, s3)) - goto err; - - if (cp_end - cp < NX * 4) - goto err; - - int z; - RansState R[NX] __attribute__((aligned(32))); - for (z = 0; z < NX; z++) { - RansDecInit(&R[z], &cp); - if (R[z] < RANS_BYTE_L) - goto err; - } - - uint16_t *sp = (uint16_t *)cp; - uint8_t overflow[64+64] = {0}; - cp_end -= 64; - - // Protect against running off the end of in buffer. - // We copy it to a worst-case local buffer when near the end. - if ((uint8_t *)sp > cp_end) { - memmove(overflow, sp, cp_end+64 - (uint8_t *)sp); - sp = (uint16_t *)overflow; - cp_end = overflow + sizeof(overflow) - 64; - } - - int out_end = (out_sz&~(NX-1)); - const uint32_t mask = (1u << TF_SHIFT)-1; - - __m256i maskv = _mm256_set1_epi32(mask); // set mask in all lanes - LOAD(Rv, R); - - for (i=0; i < out_end; i+=NX) { - //for (z = 0; z < NX; z++) - // m[z] = R[z] & mask; - __m256i masked1 = _mm256_and_si256(Rv1, maskv); - __m256i masked2 = _mm256_and_si256(Rv2, maskv); - - // S[z] = s3[m[z]]; - __m256i Sv1 = _mm256_i32gather_epi32x((int *)s3, masked1, sizeof(*s3)); - __m256i Sv2 = _mm256_i32gather_epi32x((int *)s3, masked2, sizeof(*s3)); - - // f[z] = S[z]>>(TF_SHIFT+8); - __m256i fv1 = _mm256_srli_epi32(Sv1, TF_SHIFT+8); - __m256i fv2 = _mm256_srli_epi32(Sv2, TF_SHIFT+8); - - // b[z] = (S[z]>>8) & mask; - __m256i bv1 = _mm256_and_si256(_mm256_srli_epi32(Sv1, 8), maskv); - __m256i bv2 = _mm256_and_si256(_mm256_srli_epi32(Sv2, 8), maskv); - - // s[z] = S[z] & 0xff; - __m256i sv1 = _mm256_and_si256(Sv1, _mm256_set1_epi32(0xff)); - __m256i sv2 = _mm256_and_si256(Sv2, _mm256_set1_epi32(0xff)); - - // R[z] = f[z] * (R[z] >> TF_SHIFT) + b[z]; - Rv1 = _mm256_add_epi32( - _mm256_mullo_epi32( - _mm256_srli_epi32(Rv1,TF_SHIFT), fv1), bv1); - Rv2 = _mm256_add_epi32( - _mm256_mullo_epi32( - _mm256_srli_epi32(Rv2,TF_SHIFT), fv2), bv2); - -#ifdef __clang__ - // Protect against running off the end of in buffer. - // We copy it to a worst-case local buffer when near the end. - if ((uint8_t *)sp > cp_end) { - memmove(overflow, sp, cp_end+64 - (uint8_t *)sp); - sp = (uint16_t *)overflow; - cp_end = overflow + sizeof(overflow) - 64; - } -#endif - // Tricky one: out[i+z] = s[z]; - // ---h---g ---f---e ---d---c ---b---a - // ---p---o ---n---m ---l---k ---j---i - // packs_epi32 -p-o-n-m -h-g-f-e -l-k-j-i -d-c-b-a - // permute4x64 -p-o-n-m -l-k-j-i -h-g-f-e -d-c-b-a - // packs_epi16 ponmlkji ponmlkji hgfedcba hgfedcba - sv1 = _mm256_packus_epi32(sv1, sv2); - sv1 = _mm256_permute4x64_epi64(sv1, 0xd8); - __m256i Vv1 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - sv1 = _mm256_packus_epi16(sv1, sv1); - - // c = R[z] < RANS_BYTE_L; - -// The lack of unsigned comparisons means we have to jump through hoops. -// in AVX2 land the second version comes out best (and first in SSE land). - -//#define _mm256_cmplt_epu32_imm(a,b) _mm256_andnot_si256(_mm256_cmpeq_epi32(_mm256_max_epu32((a),_mm256_set1_epi32(b)), (a)), _mm256_set1_epi32(-1)); - -#define _mm256_cmplt_epu32_imm(a,b) _mm256_cmpgt_epi32(_mm256_set1_epi32((b)-0x80000000), _mm256_xor_si256((a), _mm256_set1_epi32(0x80000000))) - - __m256i renorm_mask1, renorm_mask2; - renorm_mask1 = _mm256_cmplt_epu32_imm(Rv1, RANS_BYTE_L); - renorm_mask2 = _mm256_cmplt_epu32_imm(Rv2, RANS_BYTE_L); - - // y = (R[z] << 16) | V[z]; - unsigned int imask1 = _mm256_movemask_ps((__m256)renorm_mask1); - __m256i idx1 = _mm256_load_si256((const __m256i*)permute[imask1]); - __m256i Yv1 = _mm256_slli_epi32(Rv1, 16); - Vv1 = _mm256_permutevar8x32_epi32(Vv1, idx1); - __m256i Yv2 = _mm256_slli_epi32(Rv2, 16); - - // Shuffle the renorm values to correct lanes and incr sp pointer - unsigned int imask2 = _mm256_movemask_ps((__m256)renorm_mask2); - sp += _mm_popcnt_u32(imask1); - - __m256i idx2 = _mm256_load_si256((const __m256i*)permute[imask2]); - __m256i Vv2 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - sp += _mm_popcnt_u32(imask2); - - Yv1 = _mm256_or_si256(Yv1, Vv1); - Vv2 = _mm256_permutevar8x32_epi32(Vv2, idx2); - Yv2 = _mm256_or_si256(Yv2, Vv2); - - // R[z] = c ? Y[z] : R[z]; - Rv1 = _mm256_blendv_epi8(Rv1, Yv1, renorm_mask1); - Rv2 = _mm256_blendv_epi8(Rv2, Yv2, renorm_mask2); - - // ------------------------------------------------------------ - - // m[z] = R[z] & mask; - // S[z] = s3[m[z]]; - __m256i masked3 = _mm256_and_si256(Rv3, maskv); - __m256i Sv3 = _mm256_i32gather_epi32x((int *)s3, masked3, sizeof(*s3)); - - *(uint64_t *)&out[i+0] = _mm256_extract_epi64(sv1, 0); - *(uint64_t *)&out[i+8] = _mm256_extract_epi64(sv1, 2); - - __m256i masked4 = _mm256_and_si256(Rv4, maskv); - __m256i Sv4 = _mm256_i32gather_epi32x((int *)s3, masked4, sizeof(*s3)); - - // f[z] = S[z]>>(TF_SHIFT+8); - __m256i fv3 = _mm256_srli_epi32(Sv3, TF_SHIFT+8); - __m256i fv4 = _mm256_srli_epi32(Sv4, TF_SHIFT+8); - - // b[z] = (S[z]>>8) & mask; - __m256i bv3 = _mm256_and_si256(_mm256_srli_epi32(Sv3, 8), maskv); - __m256i bv4 = _mm256_and_si256(_mm256_srli_epi32(Sv4, 8), maskv); - - // s[z] = S[z] & 0xff; - __m256i sv3 = _mm256_and_si256(Sv3, _mm256_set1_epi32(0xff)); - __m256i sv4 = _mm256_and_si256(Sv4, _mm256_set1_epi32(0xff)); - - // R[z] = f[z] * (R[z] >> TF_SHIFT) + b[z]; - Rv3 = _mm256_add_epi32(_mm256_mullo_epi32(_mm256_srli_epi32(Rv3,TF_SHIFT),fv3),bv3); - Rv4 = _mm256_add_epi32(_mm256_mullo_epi32(_mm256_srli_epi32(Rv4,TF_SHIFT),fv4),bv4); - - // Tricky one: out[i+z] = s[z]; - // ---h---g ---f---e ---d---c ---b---a - // ---p---o ---n---m ---l---k ---j---i - // packs_epi32 -p-o-n-m -h-g-f-e -l-k-j-i -d-c-b-a - // permute4x64 -p-o-n-m -l-k-j-i -h-g-f-e -d-c-b-a - // packs_epi16 ponmlkji ponmlkji hgfedcba hgfedcba - sv3 = _mm256_packus_epi32(sv3, sv4); - sv3 = _mm256_permute4x64_epi64(sv3, 0xd8); - - // c = R[z] < RANS_BYTE_L; - __m256i renorm_mask3, renorm_mask4; - renorm_mask3 = _mm256_cmplt_epu32_imm(Rv3, RANS_BYTE_L); - sv3 = _mm256_packus_epi16(sv3, sv3); - renorm_mask4 = _mm256_cmplt_epu32_imm(Rv4, RANS_BYTE_L); - - *(uint64_t *)&out[i+16] = _mm256_extract_epi64(sv3, 0); - *(uint64_t *)&out[i+24] = _mm256_extract_epi64(sv3, 2); - - // y = (R[z] << 16) | V[z]; - __m256i Vv3 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - __m256i Yv3 = _mm256_slli_epi32(Rv3, 16); - unsigned int imask3 = _mm256_movemask_ps((__m256)renorm_mask3); - __m256i idx3 = _mm256_load_si256((const __m256i*)permute[imask3]); - - // Shuffle the renorm values to correct lanes and incr sp pointer - Vv3 = _mm256_permutevar8x32_epi32(Vv3, idx3); - __m256i Yv4 = _mm256_slli_epi32(Rv4, 16); - unsigned int imask4 = _mm256_movemask_ps((__m256)renorm_mask4); - sp += _mm_popcnt_u32(imask3); - - __m256i idx4 = _mm256_load_si256((const __m256i*)permute[imask4]); - __m256i Vv4 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - - //Vv = _mm256_and_si256(Vv, renorm_mask); (blend does the AND anyway) - Yv3 = _mm256_or_si256(Yv3, Vv3); - Vv4 = _mm256_permutevar8x32_epi32(Vv4, idx4); - Yv4 = _mm256_or_si256(Yv4, Vv4); - -#ifndef __clang__ - // 26% faster here than above for gcc10, but former location is - // better on clang. - - // Protect against running off the end of in buffer. - // We copy it to a worst-case local buffer when near the end. - if ((uint8_t *)sp > cp_end) { - memmove(overflow, sp, cp_end+64 - (uint8_t *)sp); - sp = (uint16_t *)overflow; - cp_end = overflow + sizeof(overflow) - 64; - } -#endif - - sp += _mm_popcnt_u32(imask4); - - // R[z] = c ? Y[z] : R[z]; - Rv3 = _mm256_blendv_epi8(Rv3, Yv3, renorm_mask3); - Rv4 = _mm256_blendv_epi8(Rv4, Yv4, renorm_mask4); - } - - STORE(Rv, R); - //_mm256_store_si256((__m256i *)&R[0], Rv1); - //_mm256_store_si256((__m256i *)&R[8], Rv2); - //_mm256_store_si256((__m256i *)&R[16], Rv3); - //_mm256_store_si256((__m256i *)&R[24], Rv4); - - for (z = out_sz & (NX-1); z-- > 0; ) - out[out_end + z] = s3[R[z] & mask]; - - //fprintf(stderr, " 0 Decoded %d bytes\n", (int)(cp-in)); //c-size - - return out; - - err: - free(out_free); - return NULL; -} - -//----------------------------------------------------------------------------- - -unsigned char *rans_compress_O1_32x16_avx2(unsigned char *in, unsigned int in_size, - unsigned char *out, unsigned int *out_size) { - unsigned char *cp, *out_end, *out_free = NULL; - unsigned int tab_size; - uint32_t bound = rans_compress_bound_4x16(in_size,1)-20; - int z; - RansState ransN[NX] __attribute__((aligned(32))); - - if (in_size < NX) // force O0 instead - return NULL; - - if (!out) { - *out_size = bound; - out = malloc(*out_size); - } - if (!out || bound > *out_size) - return NULL; - - if (((size_t)out)&1) - bound--; - out_end = out + bound; - - RansEncSymbol (*syms)[256] = htscodecs_tls_alloc(256 * (sizeof(*syms))); - if (!syms) { - free(out_free); - return NULL; - } - - cp = out; - int shift = encode_freq1(in, in_size, 32, syms, &cp); - if (shift < 0) { - free(out_free); - htscodecs_tls_free(syms); - return NULL; - } - tab_size = cp - out; - - for (z = 0; z < NX; z++) - RansEncInit(&ransN[z]); - - uint8_t* ptr = out_end; - - int iN[NX], isz4 = in_size/NX; - for (z = 0; z < NX; z++) - iN[z] = (z+1)*isz4-2; - - unsigned char lN[NX]; - for (z = 0; z < NX; z++) - lN[z] = in[iN[z]+1]; - - // Deal with the remainder - z = NX-1; - lN[z] = in[in_size-1]; - for (iN[z] = in_size-2; iN[z] > NX*isz4-2; iN[z]--) { - unsigned char c = in[iN[z]]; - RansEncPutSymbol(&ransN[z], &ptr, &syms[c][lN[z]]); - lN[z] = c; - } - - uint16_t *ptr16 = (uint16_t *)ptr; - - LOAD(Rv, ransN); - - for (; iN[0] >= 0; ) { - // We need to gather sym[curr_char][last_char] structs. - // These hold 4 32-bit values, so are 128 bit each, and - // are loaded from 32 distinct addresses. - // - // We load them into 32 128-bit lanes and then combine to get - // 16 avx-256 registers. - // These are now ABCD ABCD ABCD ABCD... orientation - // Code we can then "gather" from these registers via a combination - // of shuffle / permutes / and / or operations. This is less - // IO than repeating 4 sets of gathers/loads 32-times over. - - // DCBA holding 4 elements in a syms[] array - // -> 4-way rotate via shuffles - // [0] DCBA 11 10 01 00 E4 - // [1] CBAD 10 01 00 11 93 - // [2] BADC 01 00 11 10 4E - // [3] ADCB 00 11 10 01 39 - // - // Then AND to select relevant lanes and OR - // [0] ......A0 - // [1] ....A1.. - // [2] ..A2.... - // [3] A3...... OR to get A3A2A1A0 - // - // or: - // [0] ....B0.. - // [1] ..B1.... - // [2] B2...... - // [3] ......B3 OR to get B2B1B0B3 and shuffle to B3B2B1B0 - - __m256i sh[16]; - for (z = 0; z < 16; z+=4) { - int Z = z*2; - -#define m128_to_256 _mm256_castsi128_si256 - __m256i t0, t1, t2, t3; - __m128i *s0, *s1, *s2, *s3; - s0 = (__m128i *)(&syms[in[iN[Z+0]]][lN[Z+0]]); - s1 = (__m128i *)(&syms[in[iN[Z+4]]][lN[Z+4]]); - s2 = (__m128i *)(&syms[in[iN[Z+1]]][lN[Z+1]]); - s3 = (__m128i *)(&syms[in[iN[Z+5]]][lN[Z+5]]); - - t0 = _mm256_shuffle_epi32(m128_to_256(_mm_loadu_si128(s0)), 0xE4); - t1 = _mm256_shuffle_epi32(m128_to_256(_mm_loadu_si128(s1)), 0xE4); - t2 = _mm256_shuffle_epi32(m128_to_256(_mm_loadu_si128(s2)), 0x93); - t3 = _mm256_shuffle_epi32(m128_to_256(_mm_loadu_si128(s3)), 0x93); - - lN[Z+0] = in[iN[Z+0]]; - lN[Z+4] = in[iN[Z+4]]; - lN[Z+1] = in[iN[Z+1]]; - lN[Z+5] = in[iN[Z+5]]; - - sh[z+0] = _mm256_permute2x128_si256(t0, t1, 0x20); - sh[z+1] = _mm256_permute2x128_si256(t2, t3, 0x20); - - s0 = (__m128i *)(&syms[in[iN[Z+2]]][lN[Z+2]]); - s1 = (__m128i *)(&syms[in[iN[Z+6]]][lN[Z+6]]); - s2 = (__m128i *)(&syms[in[iN[Z+3]]][lN[Z+3]]); - s3 = (__m128i *)(&syms[in[iN[Z+7]]][lN[Z+7]]); - - t0 = _mm256_shuffle_epi32(m128_to_256(_mm_loadu_si128(s0)), 0x4E); - t1 = _mm256_shuffle_epi32(m128_to_256(_mm_loadu_si128(s1)), 0x4E); - t2 = _mm256_shuffle_epi32(m128_to_256(_mm_loadu_si128(s2)), 0x39); - t3 = _mm256_shuffle_epi32(m128_to_256(_mm_loadu_si128(s3)), 0x39); - - lN[Z+2] = in[iN[Z+2]]; - lN[Z+6] = in[iN[Z+6]]; - lN[Z+3] = in[iN[Z+3]]; - lN[Z+7] = in[iN[Z+7]]; - - sh[z+2] = _mm256_permute2x128_si256(t0, t1, 0x20); - sh[z+3] = _mm256_permute2x128_si256(t2, t3, 0x20); - - // potential to set xmax, rf, bias, and SD in-situ here, removing - // the need to hold sh[] in regs. Doing so doesn't seem to speed - // things up though. - } - - __m256i xA = _mm256_set_epi32(0,0,0,-1, 0,0,0,-1); - __m256i xB = _mm256_set_epi32(0,0,-1,0, 0,0,-1,0); - __m256i xC = _mm256_set_epi32(0,-1,0,0, 0,-1,0,0); - __m256i xD = _mm256_set_epi32(-1,0,0,0, -1,0,0,0); - - // Extract 32-bit xmax elements from syms[] data (in sh vec array) -/* -#define SYM_LOAD(x, A, B, C, D) \ - _mm256_or_si256(_mm256_or_si256(_mm256_and_si256(sh[x+0], A), \ - _mm256_and_si256(sh[x+1], B)), \ - _mm256_or_si256(_mm256_and_si256(sh[x+2], C), \ - _mm256_and_si256(sh[x+3], D))) -*/ - __m256i xmax1 = SYM_LOAD( 0, xA, xB, xC, xD); - __m256i xmax2 = SYM_LOAD( 4, xA, xB, xC, xD); - __m256i xmax3 = SYM_LOAD( 8, xA, xB, xC, xD); - __m256i xmax4 = SYM_LOAD(12, xA, xB, xC, xD); - - // ------------------------------------------------------------ - // for (z = NX-1; z >= 0; z--) { - // if (ransN[z] >= x_max[z]) { - // *--ptr16 = ransN[z] & 0xffff; - // ransN[z] >>= 16; - // } - // } - __m256i cv1 = _mm256_cmpgt_epi32(Rv1, xmax1); - __m256i cv2 = _mm256_cmpgt_epi32(Rv2, xmax2); - __m256i cv3 = _mm256_cmpgt_epi32(Rv3, xmax3); - __m256i cv4 = _mm256_cmpgt_epi32(Rv4, xmax4); - - // Store bottom 16-bits at ptr16 - // - // for (z = NX-1; z >= 0; z--) { - // if (cond[z]) *--ptr16 = (uint16_t )(ransN[z] & 0xffff); - // } - unsigned int imask1 = _mm256_movemask_ps((__m256)cv1); - unsigned int imask2 = _mm256_movemask_ps((__m256)cv2); - unsigned int imask3 = _mm256_movemask_ps((__m256)cv3); - unsigned int imask4 = _mm256_movemask_ps((__m256)cv4); - - __m256i idx1 = _mm256_load_si256((const __m256i*)permutec[imask1]); - __m256i idx2 = _mm256_load_si256((const __m256i*)permutec[imask2]); - __m256i idx3 = _mm256_load_si256((const __m256i*)permutec[imask3]); - __m256i idx4 = _mm256_load_si256((const __m256i*)permutec[imask4]); - - // Permute; to gather together the rans states that need flushing - __m256i V1, V2, V3, V4; - V1 = _mm256_permutevar8x32_epi32(_mm256_and_si256(Rv1, cv1), idx1); - V2 = _mm256_permutevar8x32_epi32(_mm256_and_si256(Rv2, cv2), idx2); - V3 = _mm256_permutevar8x32_epi32(_mm256_and_si256(Rv3, cv3), idx3); - V4 = _mm256_permutevar8x32_epi32(_mm256_and_si256(Rv4, cv4), idx4); - - // We only flush bottom 16 bits, to squash 32-bit states into 16 bit. - V1 = _mm256_and_si256(V1, _mm256_set1_epi32(0xffff)); - V2 = _mm256_and_si256(V2, _mm256_set1_epi32(0xffff)); - V3 = _mm256_and_si256(V3, _mm256_set1_epi32(0xffff)); - V4 = _mm256_and_si256(V4, _mm256_set1_epi32(0xffff)); - __m256i V12 = _mm256_packus_epi32(V1, V2); - __m256i V34 = _mm256_packus_epi32(V3, V4); - - // It's BAba order, want BbAa so shuffle. - V12 = _mm256_permute4x64_epi64(V12, 0xd8); - V34 = _mm256_permute4x64_epi64(V34, 0xd8); - - // Load rcp_freq ready for later - __m256i rfv1 = _mm256_shuffle_epi32(SYM_LOAD( 0, xB, xC, xD, xA),0x39); - __m256i rfv2 = _mm256_shuffle_epi32(SYM_LOAD( 4, xB, xC, xD, xA),0x39); - - // Now we have bottom N 16-bit values in each V12/V34 to flush - __m128i f = _mm256_extractf128_si256(V34, 1); - _mm_storeu_si128((__m128i *)(ptr16-8), f); - ptr16 -= _mm_popcnt_u32(imask4); - - f = _mm256_extractf128_si256(V34, 0); - _mm_storeu_si128((__m128i *)(ptr16-8), f); - ptr16 -= _mm_popcnt_u32(imask3); - - f = _mm256_extractf128_si256(V12, 1); - _mm_storeu_si128((__m128i *)(ptr16-8), f); - ptr16 -= _mm_popcnt_u32(imask2); - - f = _mm256_extractf128_si256(V12, 0); - _mm_storeu_si128((__m128i *)(ptr16-8), f); - ptr16 -= _mm_popcnt_u32(imask1); - - __m256i rfv3 = _mm256_shuffle_epi32(SYM_LOAD( 8, xB, xC, xD, xA),0x39); - __m256i rfv4 = _mm256_shuffle_epi32(SYM_LOAD(12, xB, xC, xD, xA),0x39); - - __m256i Rs1, Rs2, Rs3, Rs4; - Rs1 = _mm256_srli_epi32(Rv1,16); - Rs2 = _mm256_srli_epi32(Rv2,16); - Rs3 = _mm256_srli_epi32(Rv3,16); - Rs4 = _mm256_srli_epi32(Rv4,16); - - Rv1 = _mm256_blendv_epi8(Rv1, Rs1, cv1); - Rv2 = _mm256_blendv_epi8(Rv2, Rs2, cv2); - Rv3 = _mm256_blendv_epi8(Rv3, Rs3, cv3); - Rv4 = _mm256_blendv_epi8(Rv4, Rs4, cv4); - - // ------------------------------------------------------------ - // uint32_t q = (uint32_t) (((uint64_t)ransN[z] * - // rcp_freq[z]) >> rcp_shift[z]); - // ransN[z] = ransN[z] + bias[z] + q * cmpl_freq[z]; - - // Cannot trivially replace the multiply as mulhi_epu32 doesn't exist - // (only mullo). However we can use _mm256_mul_epu32 twice to get - // 64bit results (half our lanes) and shift/or to get the answer. - // - // (AVX512 allows us to hold it all in 64-bit lanes and use mullo_epi64 - // plus a shift. KNC has mulhi_epi32, but not sure if this is - // available.) - rfv1 = _mm256_mulhi_epu32(Rv1, rfv1); - rfv2 = _mm256_mulhi_epu32(Rv2, rfv2); - rfv3 = _mm256_mulhi_epu32(Rv3, rfv3); - rfv4 = _mm256_mulhi_epu32(Rv4, rfv4); - - // Load cmpl_freq / rcp_shift from syms - __m256i SDv1 = _mm256_shuffle_epi32(SYM_LOAD( 0, xD, xA, xB, xC),0x93); - __m256i SDv2 = _mm256_shuffle_epi32(SYM_LOAD( 4, xD, xA, xB, xC),0x93); - // Load bias from syms[] - __m256i biasv1=_mm256_shuffle_epi32(SYM_LOAD( 0, xC, xD, xA, xB),0x4E); - __m256i biasv2=_mm256_shuffle_epi32(SYM_LOAD( 4, xC, xD, xA, xB),0x4E); - - __m256i shiftv1 = _mm256_srli_epi32(SDv1, 16); - __m256i shiftv2 = _mm256_srli_epi32(SDv2, 16); - - __m256i SDv3 = _mm256_shuffle_epi32(SYM_LOAD( 8, xD, xA, xB, xC),0x93); - __m256i SDv4 = _mm256_shuffle_epi32(SYM_LOAD(12, xD, xA, xB, xC),0x93); - __m256i biasv3=_mm256_shuffle_epi32(SYM_LOAD( 8, xC, xD, xA, xB),0x4E); - __m256i biasv4=_mm256_shuffle_epi32(SYM_LOAD(12, xC, xD, xA, xB),0x4E); - - __m256i shiftv3 = _mm256_srli_epi32(SDv3, 16); - __m256i shiftv4 = _mm256_srli_epi32(SDv4, 16); - - shiftv1 = _mm256_sub_epi32(shiftv1, _mm256_set1_epi32(32)); - shiftv2 = _mm256_sub_epi32(shiftv2, _mm256_set1_epi32(32)); - shiftv3 = _mm256_sub_epi32(shiftv3, _mm256_set1_epi32(32)); - shiftv4 = _mm256_sub_epi32(shiftv4, _mm256_set1_epi32(32)); - - __m256i qv1 = _mm256_srlv_epi32(rfv1, shiftv1); - __m256i qv2 = _mm256_srlv_epi32(rfv2, shiftv2); - - __m256i freqv1 = _mm256_and_si256(SDv1, _mm256_set1_epi32(0xffff)); - __m256i freqv2 = _mm256_and_si256(SDv2, _mm256_set1_epi32(0xffff)); - qv1 = _mm256_mullo_epi32(qv1, freqv1); - qv2 = _mm256_mullo_epi32(qv2, freqv2); - - __m256i qv3 = _mm256_srlv_epi32(rfv3, shiftv3); - __m256i qv4 = _mm256_srlv_epi32(rfv4, shiftv4); - - __m256i freqv3 = _mm256_and_si256(SDv3, _mm256_set1_epi32(0xffff)); - __m256i freqv4 = _mm256_and_si256(SDv4, _mm256_set1_epi32(0xffff)); - qv3 = _mm256_mullo_epi32(qv3, freqv3); - qv4 = _mm256_mullo_epi32(qv4, freqv4); - - qv1 = _mm256_add_epi32(qv1, biasv1); - qv2 = _mm256_add_epi32(qv2, biasv2); - qv3 = _mm256_add_epi32(qv3, biasv3); - qv4 = _mm256_add_epi32(qv4, biasv4); - - for (z = 0; z < NX; z++) - iN[z]--; - - Rv1 = _mm256_add_epi32(Rv1, qv1); - Rv2 = _mm256_add_epi32(Rv2, qv2); - Rv3 = _mm256_add_epi32(Rv3, qv3); - Rv4 = _mm256_add_epi32(Rv4, qv4); - } - - STORE(Rv, ransN); - - ptr = (uint8_t *)ptr16; - - for (z = NX-1; z>=0; z--) - RansEncPutSymbol(&ransN[z], &ptr, &syms[0][lN[z]]); - - for (z = NX-1; z>=0; z--) - RansEncFlush(&ransN[z], &ptr); - - *out_size = (out_end - ptr) + tab_size; - - cp = out; - memmove(out + tab_size, ptr, out_end-ptr); - - htscodecs_tls_free(syms); - return out; -} - -/* - * A 32 x 32 matrix transpose and serialise from t[][] to out. - * Storing in the other orientation speeds up the decoder, and we - * can then flush to out in 1KB blocks. - */ -static inline void transpose_and_copy(uint8_t *out, int iN[32], - uint8_t t[32][32]) { -// int z; -// for (z = 0; z < NX; z++) { -// int k; -// for (k = 0; k < 32; k++) -// out[iN[z]+k] = t[k][z]; -// iN[z] += 32; -// } - - rot32_simd(t, out, iN); -} - -unsigned char *rans_uncompress_O1_32x16_avx2(unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int out_sz) { - if (in_size < NX*4) // 4-states at least - return NULL; - - if (out_sz >= INT_MAX) - return NULL; // protect against some overflow cases - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (out_sz > 100000) - return NULL; -#endif - - /* Load in the static tables */ - unsigned char *cp = in, *cp_end = in+in_size, *out_free = NULL; - unsigned char *c_freq = NULL; - - uint32_t (*s3)[TOTFREQ_O1] = htscodecs_tls_alloc(256*TOTFREQ_O1*4); - if (!s3) - return NULL; - //uint32_t s3[256][TOTFREQ_O1] __attribute__((aligned(32))); - uint32_t (*s3F)[TOTFREQ_O1_FAST] = (uint32_t (*)[TOTFREQ_O1_FAST])s3; - - if (!out) - out_free = out = malloc(out_sz); - - if (!out) - goto err; - - //fprintf(stderr, "out_sz=%d\n", out_sz); - - // compressed header? If so uncompress it - unsigned char *tab_end = NULL; - unsigned char *c_freq_end = cp_end; - unsigned int shift = *cp >> 4; - if (*cp++ & 1) { - uint32_t u_freq_sz, c_freq_sz; - cp += var_get_u32(cp, cp_end, &u_freq_sz); - cp += var_get_u32(cp, cp_end, &c_freq_sz); - if (c_freq_sz > cp_end - cp) - goto err; - tab_end = cp + c_freq_sz; - if (!(c_freq = rans_uncompress_O0_4x16(cp, c_freq_sz, NULL, - u_freq_sz))) - goto err; - cp = c_freq; - c_freq_end = c_freq + u_freq_sz; - } - - // Decode order-0 symbol list; avoids needing in order-1 tables - cp += decode_freq1(cp, c_freq_end, shift, s3, s3F, NULL, NULL); - - if (tab_end) - cp = tab_end; - free(c_freq); - c_freq = NULL; - - if (cp_end - cp < NX * 4) - goto err; - - RansState R[NX] __attribute__((aligned(32))); - uint8_t *ptr = cp, *ptr_end = in + in_size; - int z; - for (z = 0; z < NX; z++) { - RansDecInit(&R[z], &ptr); - if (R[z] < RANS_BYTE_L) - goto err; - } - - int isz4 = out_sz/NX; - int iN[NX], lN[NX] __attribute__((aligned(32))) = {0}; - for (z = 0; z < NX; z++) - iN[z] = z*isz4; - - uint16_t *sp = (uint16_t *)ptr; - const uint32_t mask = (1u << shift)-1; - - __m256i maskv = _mm256_set1_epi32(mask); - LOAD(Rv, R); - LOAD(Lv, lN); - - union { - unsigned char tbuf[32][32]; - uint64_t tbuf64[32][4]; - } u __attribute__((aligned(32))); - unsigned int tidx = 0; - - if (0) { - int z; - for (z = 0; z < 32; z++) - iN[z] = iN[z] & ~31; - } - - if (shift == TF_SHIFT_O1) { - isz4 -= 64; - for (; iN[0] < isz4 && (uint8_t *)sp+64 < ptr_end; ) { - // m[z] = R[z] & mask; - __m256i masked1 = _mm256_and_si256(Rv1, maskv); - __m256i masked2 = _mm256_and_si256(Rv2, maskv); - - // S[z] = s3[lN[z]][m[z]]; - Lv1 = _mm256_slli_epi32(Lv1, TF_SHIFT_O1); - masked1 = _mm256_add_epi32(masked1, Lv1); - - Lv2 = _mm256_slli_epi32(Lv2, TF_SHIFT_O1); - masked2 = _mm256_add_epi32(masked2, Lv2); - - __m256i masked3 = _mm256_and_si256(Rv3, maskv); - __m256i masked4 = _mm256_and_si256(Rv4, maskv); - - Lv3 = _mm256_slli_epi32(Lv3, TF_SHIFT_O1); - masked3 = _mm256_add_epi32(masked3, Lv3); - - Lv4 = _mm256_slli_epi32(Lv4, TF_SHIFT_O1); - masked4 = _mm256_add_epi32(masked4, Lv4); - - __m256i Sv1 = _mm256_i32gather_epi32x((int *)&s3[0][0], masked1, - sizeof(s3[0][0])); - __m256i Sv2 = _mm256_i32gather_epi32x((int *)&s3[0][0], masked2, - sizeof(s3[0][0])); - - // f[z] = S[z]>>(TF_SHIFT_O1+8); - __m256i fv1 = _mm256_srli_epi32(Sv1, TF_SHIFT_O1+8); - __m256i fv2 = _mm256_srli_epi32(Sv2, TF_SHIFT_O1+8); - - __m256i Sv3 = _mm256_i32gather_epi32x((int *)&s3[0][0], masked3, - sizeof(s3[0][0])); - __m256i Sv4 = _mm256_i32gather_epi32x((int *)&s3[0][0], masked4, - sizeof(s3[0][0])); - - __m256i fv3 = _mm256_srli_epi32(Sv3, TF_SHIFT_O1+8); - __m256i fv4 = _mm256_srli_epi32(Sv4, TF_SHIFT_O1+8); - - // b[z] = (S[z]>>8) & mask; - __m256i bv1 = _mm256_and_si256(_mm256_srli_epi32(Sv1, 8), maskv); - __m256i bv2 = _mm256_and_si256(_mm256_srli_epi32(Sv2, 8), maskv); - __m256i bv3 = _mm256_and_si256(_mm256_srli_epi32(Sv3, 8), maskv); - __m256i bv4 = _mm256_and_si256(_mm256_srli_epi32(Sv4, 8), maskv); - - // s[z] = S[z] & 0xff; - __m256i sv1 = _mm256_and_si256(Sv1, _mm256_set1_epi32(0xff)); - __m256i sv2 = _mm256_and_si256(Sv2, _mm256_set1_epi32(0xff)); - __m256i sv3 = _mm256_and_si256(Sv3, _mm256_set1_epi32(0xff)); - __m256i sv4 = _mm256_and_si256(Sv4, _mm256_set1_epi32(0xff)); - - if (1) { - // A maximum frequency of 4096 doesn't fit in our s3 array. - // as it's 12 bit + 12 bit + 8 bit. It wraps around to zero. - // (We don't have this issue for TOTFREQ_O1_FAST.) - // - // Solution 1 is to change to spec to forbid freq of 4096. - // Easy hack is to add an extra symbol so it sums correctly. - // => 572 MB/s on q40 (deskpro). - // - // Solution 2 implemented here is to look for the wrap around - // and fix it. - // => 556 MB/s on q40 - // cope with max freq of 4096. Only 3% hit - __m256i max_freq = _mm256_set1_epi32(TOTFREQ_O1); - __m256i zero = _mm256_setzero_si256(); - __m256i cmp1 = _mm256_cmpeq_epi32(fv1, zero); - fv1 = _mm256_blendv_epi8(fv1, max_freq, cmp1); - __m256i cmp2 = _mm256_cmpeq_epi32(fv2, zero); - fv2 = _mm256_blendv_epi8(fv2, max_freq, cmp2); - } - - // R[z] = f[z] * (R[z] >> TF_SHIFT_O1) + b[z]; - Rv1 = _mm256_add_epi32( - _mm256_mullo_epi32( - _mm256_srli_epi32(Rv1,TF_SHIFT_O1), fv1), bv1); - Rv2 = _mm256_add_epi32( - _mm256_mullo_epi32( - _mm256_srli_epi32(Rv2,TF_SHIFT_O1), fv2), bv2); - - - //for (z = 0; z < NX; z++) lN[z] = c[z]; - Lv1 = sv1; - Lv2 = sv2; - - sv1 = _mm256_packus_epi32(sv1, sv2); - sv1 = _mm256_permute4x64_epi64(sv1, 0xd8); - - // Start loading next batch of normalised states - __m256i Vv1 = _mm256_cvtepu16_epi32( - _mm_loadu_si128((__m128i *)sp)); - - sv1 = _mm256_packus_epi16(sv1, sv1); - - // out[iN[z]] = c[z]; // simulate scatter - // RansDecRenorm(&R[z], &ptr); - __m256i renorm_mask1, renorm_mask2; - renorm_mask1 = _mm256_xor_si256(Rv1,_mm256_set1_epi32(0x80000000)); - renorm_mask2 = _mm256_xor_si256(Rv2,_mm256_set1_epi32(0x80000000)); - - renorm_mask1 = _mm256_cmpgt_epi32( - _mm256_set1_epi32(RANS_BYTE_L-0x80000000), - renorm_mask1); - renorm_mask2 = _mm256_cmpgt_epi32( - _mm256_set1_epi32(RANS_BYTE_L-0x80000000), - renorm_mask2); - - unsigned int imask1 = _mm256_movemask_ps((__m256)renorm_mask1); - __m256i idx1 = _mm256_load_si256((const __m256i*)permute[imask1]); - __m256i Yv1 = _mm256_slli_epi32(Rv1, 16); - __m256i Yv2 = _mm256_slli_epi32(Rv2, 16); - - unsigned int imask2 = _mm256_movemask_ps((__m256)renorm_mask2); - Vv1 = _mm256_permutevar8x32_epi32(Vv1, idx1); - sp += _mm_popcnt_u32(imask1); - - __m256i idx2 = _mm256_load_si256((const __m256i*)permute[imask2]); - __m256i Vv2 = _mm256_cvtepu16_epi32( - _mm_loadu_si128((__m128i *)sp)); - sp += _mm_popcnt_u32(imask2); - Vv2 = _mm256_permutevar8x32_epi32(Vv2, idx2); - - Yv1 = _mm256_or_si256(Yv1, Vv1); - Yv2 = _mm256_or_si256(Yv2, Vv2); - - Rv1 = _mm256_blendv_epi8(Rv1, Yv1, renorm_mask1); - Rv2 = _mm256_blendv_epi8(Rv2, Yv2, renorm_mask2); - - ////////////////////////////////////////////////////////////////// - // Start loading next batch of normalised states - __m256i Vv3 = _mm256_cvtepu16_epi32( - _mm_loadu_si128((__m128i *)sp)); - - if (1) { - // cope with max freq of 4096 - __m256i max_freq = _mm256_set1_epi32(TOTFREQ_O1); - __m256i zero = _mm256_setzero_si256(); - __m256i cmp3 = _mm256_cmpeq_epi32(fv3, zero); - fv3 = _mm256_blendv_epi8(fv3, max_freq, cmp3); - __m256i cmp4 = _mm256_cmpeq_epi32(fv4, zero); - fv4 = _mm256_blendv_epi8(fv4, max_freq, cmp4); - } - - // R[z] = f[z] * (R[z] >> TF_SHIFT_O1) + b[z]; - Rv3 = _mm256_add_epi32( - _mm256_mullo_epi32( - _mm256_srli_epi32(Rv3,TF_SHIFT_O1), fv3), bv3); - Rv4 = _mm256_add_epi32( - _mm256_mullo_epi32( - _mm256_srli_epi32(Rv4,TF_SHIFT_O1), fv4), bv4); - - //for (z = 0; z < NX; z++) lN[z] = c[z]; - Lv3 = sv3; - Lv4 = sv4; - - // out[iN[z]] = c[z]; // simulate scatter - // RansDecRenorm(&R[z], &ptr); - __m256i renorm_mask3, renorm_mask4; - renorm_mask3 = _mm256_xor_si256(Rv3,_mm256_set1_epi32(0x80000000)); - renorm_mask4 = _mm256_xor_si256(Rv4,_mm256_set1_epi32(0x80000000)); - - renorm_mask3 = _mm256_cmpgt_epi32( - _mm256_set1_epi32(RANS_BYTE_L-0x80000000), - renorm_mask3); - renorm_mask4 = _mm256_cmpgt_epi32( - _mm256_set1_epi32(RANS_BYTE_L-0x80000000), - renorm_mask4); - - __m256i Yv3 = _mm256_slli_epi32(Rv3, 16); - __m256i Yv4 = _mm256_slli_epi32(Rv4, 16); - - unsigned int imask3 = _mm256_movemask_ps((__m256)renorm_mask3); - unsigned int imask4 = _mm256_movemask_ps((__m256)renorm_mask4); - __m256i idx3 = _mm256_load_si256((const __m256i*)permute[imask3]); - sp += _mm_popcnt_u32(imask3); - Vv3 = _mm256_permutevar8x32_epi32(Vv3, idx3); - - sv3 = _mm256_packus_epi32(sv3, sv4); - sv3 = _mm256_permute4x64_epi64(sv3, 0xd8); - sv3 = _mm256_packus_epi16(sv3, sv3); - - u.tbuf64[tidx][0] = _mm256_extract_epi64(sv1, 0); - u.tbuf64[tidx][1] = _mm256_extract_epi64(sv1, 2); - u.tbuf64[tidx][2] = _mm256_extract_epi64(sv3, 0); - u.tbuf64[tidx][3] = _mm256_extract_epi64(sv3, 2); - - iN[0]++; - if (++tidx == 32) { - iN[0]-=32; - - transpose_and_copy(out, iN, u.tbuf); - tidx = 0; - } - - __m256i idx4 = _mm256_load_si256((const __m256i*)permute[imask4]); - __m256i Vv4 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - - //Vv = _mm256_and_si256(Vv, renorm_mask); (blend does the AND anyway) - Yv3 = _mm256_or_si256(Yv3, Vv3); - Vv4 = _mm256_permutevar8x32_epi32(Vv4, idx4); - Yv4 = _mm256_or_si256(Yv4, Vv4); - - sp += _mm_popcnt_u32(imask4); - - Rv3 = _mm256_blendv_epi8(Rv3, Yv3, renorm_mask3); - Rv4 = _mm256_blendv_epi8(Rv4, Yv4, renorm_mask4); - - } - isz4 += 64; - - STORE(Rv, R); - STORE(Lv, lN); - ptr = (uint8_t *)sp; - - if (1) { - iN[0]-=tidx; - int T; - for (z = 0; z < NX; z++) - for (T = 0; T < tidx; T++) - out[iN[z]++] = u.tbuf[T][z]; - } - - // Scalar version for close to the end of in[] array so we don't - // do SIMD loads beyond the end of the buffer - for (; iN[0] < isz4;) { - for (z = 0; z < NX; z++) { - uint32_t m = R[z] & ((1u<>(TF_SHIFT_O1+8); - R[z] = (F?F:4096) * (R[z]>>TF_SHIFT_O1) + - ((S>>8) & ((1u<>(TF_SHIFT_O1+8); - R[z] = (F?F:4096) * (R[z]>>TF_SHIFT_O1) + - ((S>>8) & ((1u<>(TF_SHIFT_O1+8); - __m256i fv1 = _mm256_srli_epi32(Sv1, TF_SHIFT_O1_FAST+8); - __m256i fv2 = _mm256_srli_epi32(Sv2, TF_SHIFT_O1_FAST+8); - - __m256i Sv3 = _mm256_i32gather_epi32x((int *)&s3F[0][0], masked3, - sizeof(s3F[0][0])); - __m256i Sv4 = _mm256_i32gather_epi32x((int *)&s3F[0][0], masked4, - sizeof(s3F[0][0])); - - __m256i fv3 = _mm256_srli_epi32(Sv3, TF_SHIFT_O1_FAST+8); - __m256i fv4 = _mm256_srli_epi32(Sv4, TF_SHIFT_O1_FAST+8); - - // b[z] = (S[z]>>8) & mask; - __m256i bv1 = _mm256_and_si256(_mm256_srli_epi32(Sv1, 8), maskv); - __m256i bv2 = _mm256_and_si256(_mm256_srli_epi32(Sv2, 8), maskv); - __m256i bv3 = _mm256_and_si256(_mm256_srli_epi32(Sv3, 8), maskv); - __m256i bv4 = _mm256_and_si256(_mm256_srli_epi32(Sv4, 8), maskv); - - // s[z] = S[z] & 0xff; - __m256i sv1 = _mm256_and_si256(Sv1, _mm256_set1_epi32(0xff)); - __m256i sv2 = _mm256_and_si256(Sv2, _mm256_set1_epi32(0xff)); - __m256i sv3 = _mm256_and_si256(Sv3, _mm256_set1_epi32(0xff)); - __m256i sv4 = _mm256_and_si256(Sv4, _mm256_set1_epi32(0xff)); - - // R[z] = f[z] * (R[z] >> TF_SHIFT_O1) + b[z]; - Rv1 = _mm256_add_epi32( - _mm256_mullo_epi32( - _mm256_srli_epi32(Rv1,TF_SHIFT_O1_FAST), fv1), bv1); - Rv2 = _mm256_add_epi32( - _mm256_mullo_epi32( - _mm256_srli_epi32(Rv2,TF_SHIFT_O1_FAST), fv2), bv2); - - - //for (z = 0; z < NX; z++) lN[z] = c[z]; - Lv1 = sv1; - Lv2 = sv2; - - sv1 = _mm256_packus_epi32(sv1, sv2); - sv1 = _mm256_permute4x64_epi64(sv1, 0xd8); - - // Start loading next batch of normalised states - __m256i Vv1 = _mm256_cvtepu16_epi32( - _mm_loadu_si128((__m128i *)sp)); - - sv1 = _mm256_packus_epi16(sv1, sv1); - - // out[iN[z]] = c[z]; // simulate scatter - // RansDecRenorm(&R[z], &ptr); - __m256i renorm_mask1, renorm_mask2; - renorm_mask1 = _mm256_xor_si256(Rv1,_mm256_set1_epi32(0x80000000)); - renorm_mask2 = _mm256_xor_si256(Rv2,_mm256_set1_epi32(0x80000000)); - - renorm_mask1 = _mm256_cmpgt_epi32( - _mm256_set1_epi32(RANS_BYTE_L-0x80000000), - renorm_mask1); - renorm_mask2 = _mm256_cmpgt_epi32( - _mm256_set1_epi32(RANS_BYTE_L-0x80000000), - renorm_mask2); - - unsigned int imask1 = _mm256_movemask_ps((__m256)renorm_mask1); - __m256i idx1 = _mm256_load_si256((const __m256i*)permute[imask1]); - __m256i Yv1 = _mm256_slli_epi32(Rv1, 16); - __m256i Yv2 = _mm256_slli_epi32(Rv2, 16); - - unsigned int imask2 = _mm256_movemask_ps((__m256)renorm_mask2); - Vv1 = _mm256_permutevar8x32_epi32(Vv1, idx1); - sp += _mm_popcnt_u32(imask1); - - __m256i idx2 = _mm256_load_si256((const __m256i*)permute[imask2]); - __m256i Vv2 = _mm256_cvtepu16_epi32( - _mm_loadu_si128((__m128i *)sp)); - sp += _mm_popcnt_u32(imask2); - Vv2 = _mm256_permutevar8x32_epi32(Vv2, idx2); - - Yv1 = _mm256_or_si256(Yv1, Vv1); - Yv2 = _mm256_or_si256(Yv2, Vv2); - - Rv1 = _mm256_blendv_epi8(Rv1, Yv1, renorm_mask1); - Rv2 = _mm256_blendv_epi8(Rv2, Yv2, renorm_mask2); - - ///////////////////////////////////////////////////////////////// - // Start loading next batch of normalised states - __m256i Vv3 = _mm256_cvtepu16_epi32( - _mm_loadu_si128((__m128i *)sp)); - - // R[z] = f[z] * (R[z] >> TF_SHIFT_O1) + b[z]; - Rv3 = _mm256_add_epi32( - _mm256_mullo_epi32( - _mm256_srli_epi32(Rv3,TF_SHIFT_O1_FAST), fv3), bv3); - Rv4 = _mm256_add_epi32( - _mm256_mullo_epi32( - _mm256_srli_epi32(Rv4,TF_SHIFT_O1_FAST), fv4), bv4); - - //for (z = 0; z < NX; z++) lN[z] = c[z]; - Lv3 = sv3; - Lv4 = sv4; - - // out[iN[z]] = c[z]; // simulate scatter - // RansDecRenorm(&R[z], &ptr); - __m256i renorm_mask3, renorm_mask4; - renorm_mask3 = _mm256_xor_si256(Rv3,_mm256_set1_epi32(0x80000000)); - renorm_mask4 = _mm256_xor_si256(Rv4,_mm256_set1_epi32(0x80000000)); - - renorm_mask3 = _mm256_cmpgt_epi32( - _mm256_set1_epi32(RANS_BYTE_L-0x80000000), - renorm_mask3); - renorm_mask4 = _mm256_cmpgt_epi32( - _mm256_set1_epi32(RANS_BYTE_L-0x80000000), - renorm_mask4); - - __m256i Yv3 = _mm256_slli_epi32(Rv3, 16); - __m256i Yv4 = _mm256_slli_epi32(Rv4, 16); - - unsigned int imask3 = _mm256_movemask_ps((__m256)renorm_mask3); - unsigned int imask4 = _mm256_movemask_ps((__m256)renorm_mask4); - __m256i idx3 = _mm256_load_si256((const __m256i*)permute[imask3]); - sp += _mm_popcnt_u32(imask3); - Vv3 = _mm256_permutevar8x32_epi32(Vv3, idx3); - - // sv3 sv4 are 32-bit ints with lowest bit being char - sv3 = _mm256_packus_epi32(sv3, sv4); // 32 to 16; ABab - sv3 = _mm256_permute4x64_epi64(sv3, 0xd8); // shuffle; AaBb - sv3 = _mm256_packus_epi16(sv3, sv3); // 16 to 8 - - // Method 1 - u.tbuf64[tidx][0] = _mm256_extract_epi64(sv1, 0); - u.tbuf64[tidx][1] = _mm256_extract_epi64(sv1, 2); - u.tbuf64[tidx][2] = _mm256_extract_epi64(sv3, 0); - u.tbuf64[tidx][3] = _mm256_extract_epi64(sv3, 2); - -// // Method 2 -// sv1 = _mm256_permute4x64_epi64(sv1, 8); // x x 10 00 -// _mm_storeu_si128((__m128i *)&u.tbuf64[tidx][0], -// _mm256_extractf128_si256(sv1, 0)); -// sv3 = _mm256_permute4x64_epi64(sv3, 8); // x x 10 00 -// _mm_storeu_si128((__m128i *)&u.tbuf64[tidx][2], -// _mm256_extractf128_si256(sv3, 0)); - -// // Method 3 -// sv1 = _mm256_and_si256(sv1, _mm256_set_epi64x(0,-1,0,-1)); // AxBx -// sv3 = _mm256_and_si256(sv3, _mm256_set_epi64x(-1,0,-1,0)); // xCxD -// sv1 = _mm256_or_si256(sv1, sv3); // ACBD -// sv1 = _mm256_permute4x64_epi64(sv1, 0xD8); //rev 00 10 01 11; ABCD -// _mm256_storeu_si256((__m256i *)u.tbuf64[tidx], sv1); - - iN[0]++; - if (++tidx == 32) { - iN[0]-=32; - - // We have tidx[x][y] which we want to store in - // memory in out[y][z] instead. This is an unrolled - // transposition. - // - // A straight memcpy (obviously wrong) decodes my test - // data in around 1030MB/s vs 930MB/s for this transpose, - // giving an idea of the time spent in this portion. - transpose_and_copy(out, iN, u.tbuf); - - tidx = 0; - } - - __m256i idx4 = _mm256_load_si256((const __m256i*)permute[imask4]); - __m256i Vv4 = _mm256_cvtepu16_epi32( - _mm_loadu_si128((__m128i *)sp)); - - Yv3 = _mm256_or_si256(Yv3, Vv3); - Vv4 = _mm256_permutevar8x32_epi32(Vv4, idx4); - Yv4 = _mm256_or_si256(Yv4, Vv4); - - sp += _mm_popcnt_u32(imask4); - - Rv3 = _mm256_blendv_epi8(Rv3, Yv3, renorm_mask3); - Rv4 = _mm256_blendv_epi8(Rv4, Yv4, renorm_mask4); - } - isz4 += 64; - - STORE(Rv, R); - STORE(Lv, lN); - ptr = (uint8_t *)sp; - - if (1) { - iN[0]-=tidx; - int T; - for (z = 0; z < NX; z++) - for (T = 0; T < tidx; T++) - out[iN[z]++] = u.tbuf[T][z]; - } - - // Scalar version for close to the end of in[] array so we don't - // do SIMD loads beyond the end of the buffer - for (; iN[0] < isz4;) { - for (z = 0; z < NX; z++) { - uint32_t m = R[z] & ((1u<>(TF_SHIFT_O1_FAST+8)) * (R[z]>>TF_SHIFT_O1_FAST) + - ((S>>8) & ((1u<>(TF_SHIFT_O1_FAST+8)) * (R[z]>>TF_SHIFT_O1_FAST) + - ((S>>8) & ((1u< -#include -#include -#include -#include -#include - -#include "rANS_word.h" -#include "rANS_static4x16.h" -#define ROT32_SIMD -#include "rANS_static16_int.h" -#include "varint.h" -#include "utils.h" - -unsigned char *rans_compress_O0_32x16_avx512(unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int *out_size) { - unsigned char *cp, *out_end; - RansEncSymbol syms[256]; - RansState ransN[32] __attribute__((aligned(64))); - uint8_t* ptr; - uint32_t F[256+MAGIC] = {0}; - int i, j, tab_size = 0, x, z; - // -20 for order/size/meta - uint32_t bound = rans_compress_bound_4x16(in_size,0)-20; - - if (!out) { - *out_size = bound; - out = malloc(*out_size); - } - if (!out || bound > *out_size) - return NULL; - - // If "out" isn't word aligned, tweak out_end/ptr to ensure it is. - // We already added more round in bound to allow for this. - if (((size_t)out)&1) - bound--; - ptr = out_end = out + bound; - - if (in_size == 0) - goto empty; - - // Compute statistics - if (hist8(in, in_size, F) < 0) - return NULL; - - // Normalise so frequences sum to power of 2 - uint32_t fsum = in_size; - uint32_t max_val = round2(fsum); - if (max_val > TOTFREQ) - max_val = TOTFREQ; - - if (normalise_freq(F, fsum, max_val) < 0) - return NULL; - fsum=max_val; - - cp = out; - cp += encode_freq(cp, F); - tab_size = cp-out; - //write(2, out+4, cp-(out+4)); - - if (normalise_freq(F, fsum, TOTFREQ) < 0) - return NULL; - - // Encode statistics and build lookup tables for SIMD encoding. - uint32_t SB[256], SA[256], SD[256], SC[256]; - for (x = j = 0; j < 256; j++) { - if (F[j]) { - RansEncSymbolInit(&syms[j], x, F[j], TF_SHIFT); - SB[j] = syms[j].x_max; - SA[j] = syms[j].rcp_freq; - SD[j] = (syms[j].cmpl_freq<<0) | ((syms[j].rcp_shift-32)<<16); - SC[j] = syms[j].bias; - x += F[j]; - } - } - - for (z = 0; z < 32; z++) - RansEncInit(&ransN[z]); - - z = i = in_size&(32-1); - while (z-- > 0) - RansEncPutSymbol(&ransN[z], &ptr, &syms[in[in_size-(i-z)]]); - -#define LOAD512(a,b) \ - __m512i a##1 = _mm512_load_si512((__m512i *)&b[0]); \ - __m512i a##2 = _mm512_load_si512((__m512i *)&b[16]); - -#define STORE512(a,b) \ - _mm512_store_si512((__m256i *)&b[0], a##1); \ - _mm512_store_si512((__m256i *)&b[16], a##2); - - LOAD512(Rv, ransN); - - uint16_t *ptr16 = (uint16_t *)ptr; - for (i=(in_size &~(32-1)); i>0; i-=32) { - uint8_t *c = &in[i-32]; - - // GATHER versions - // Much faster now we have an efficient loadu mechanism in place, - // BUT... - // Try this for avx2 variant too? Better way to populate the mm256 - // regs for mix of avx2 and avx512 opcodes. - __m256i c12 = _mm256_loadu_si256((__m256i const *)c); - __m512i c1 = _mm512_cvtepu8_epi32(_mm256_extracti128_si256(c12,0)); - __m512i c2 = _mm512_cvtepu8_epi32(_mm256_extracti128_si256(c12,1)); -#define SET512(a,b) \ - __m512i a##1 = _mm512_i32gather_epi32(c1, b, 4); \ - __m512i a##2 = _mm512_i32gather_epi32(c2, b, 4) - - SET512(xmax, SB); - - uint16_t gt_mask1 = _mm512_cmpgt_epi32_mask(Rv1, xmax1); - int pc1 = _mm_popcnt_u32(gt_mask1); - __m512i Rp1 = _mm512_and_si512(Rv1, _mm512_set1_epi32(0xffff)); - __m512i Rp2 = _mm512_and_si512(Rv2, _mm512_set1_epi32(0xffff)); - uint16_t gt_mask2 = _mm512_cmpgt_epi32_mask(Rv2, xmax2); - SET512(SDv, SD); - int pc2 = _mm_popcnt_u32(gt_mask2); - - //Rp1 = _mm512_maskz_compress_epi32(gt_mask1, Rp1); - Rp1 = _mm512_maskz_compress_epi32(gt_mask1, Rp1); - Rp2 = _mm512_maskz_compress_epi32(gt_mask2, Rp2); - - _mm512_mask_cvtepi32_storeu_epi16(ptr16-pc2, (1<= 0; z--) - RansEncFlush(&ransN[z], &ptr); - - empty: - // Finalise block size and return it - *out_size = (out_end - ptr) + tab_size; - - memmove(out + tab_size, ptr, out_end-ptr); - - return out; -} - -unsigned char *rans_uncompress_O0_32x16_avx512(unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int out_sz) { - if (in_size < 32*4) // 32-states at least - return NULL; - - if (out_sz >= INT_MAX) - return NULL; // protect against some overflow cases - - /* Load in the static tables */ - unsigned char *cp = in, *out_free = NULL; - unsigned char *cp_end = in + in_size; - int i; - uint32_t s3[TOTFREQ] __attribute__((aligned(64))); // For TF_SHIFT <= 12 - - if (!out) - out_free = out = malloc(out_sz); - if (!out) - return NULL; - - // Precompute reverse lookup of frequency. - uint32_t F[256] = {0}, fsum; - int fsz = decode_freq(cp, cp_end, F, &fsum); - if (!fsz) - goto err; - cp += fsz; - - normalise_freq_shift(F, fsum, TOTFREQ); - - // Build symbols; fixme, do as part of decode, see the _d variant - if (rans_F_to_s3(F, TF_SHIFT, s3)) - goto err; - - if (cp_end - cp < 32 * 4) - goto err; - - int z; - RansState Rv[32] __attribute__((aligned(64))); - for (z = 0; z < 32; z++) { - RansDecInit(&Rv[z], &cp); - if (Rv[z] < RANS_BYTE_L) - goto err; - } - - uint16_t *sp = (uint16_t *)cp; - - int out_end = (out_sz&~(32-1)); - const uint32_t mask = (1u << TF_SHIFT)-1; - - __m512i maskv = _mm512_set1_epi32(mask); // set mask in all lanes - __m512i R1 = _mm512_load_epi32(&Rv[0]); - __m512i R2 = _mm512_load_epi32(&Rv[16]); - - // Start of the first loop iteration, which we do move to the end of the - // loop for the next cycle so we can remove some of the instr. latency. - __m512i masked1 = _mm512_and_epi32(R1, maskv); - __m512i masked2 = _mm512_and_epi32(R2, maskv); - __m512i S1 = _mm512_i32gather_epi32(masked1, (int *)s3, sizeof(*s3)); - __m512i S2 = _mm512_i32gather_epi32(masked2, (int *)s3, sizeof(*s3)); - - uint8_t overflow[64+64] = {0}; - for (i=0; i < out_end; i+=32) { - //for (z = 0; z < 16; z++) { - - // Protect against running off the end of in buffer. - // We copy it to a worst-case local buffer when near the end. - if ((uint8_t *)sp+64 > cp_end) { - memmove(overflow, sp, cp_end - (uint8_t *)sp); - sp = (uint16_t *)overflow; - cp_end = overflow + sizeof(overflow); - } - - //uint32_t S = s3[R[z] & mask]; - __m512i renorm_words1 = _mm512_cvtepu16_epi32(_mm256_loadu_si256((const __m256i *)sp)); // next 16 words - - //uint16_t f = S>>(TF_SHIFT+8), b = (S>>8) & mask; - __m512i f1 = _mm512_srli_epi32(S1, TF_SHIFT+8); - __m512i f2 = _mm512_srli_epi32(S2, TF_SHIFT+8); - __m512i b1 = _mm512_and_epi32(_mm512_srli_epi32(S1, 8), maskv); - __m512i b2 = _mm512_and_epi32(_mm512_srli_epi32(S2, 8), maskv); - - //R[z] = f * (R[z] >> TF_SHIFT) + b; - // approx 10 cycle latency on mullo. - R1 = _mm512_add_epi32( - _mm512_mullo_epi32( - _mm512_srli_epi32(R1, TF_SHIFT), f1), b1); - R2 = _mm512_add_epi32( - _mm512_mullo_epi32( - _mm512_srli_epi32(R2, TF_SHIFT), f2), b2); - - // renorm. this is the interesting part: - __mmask16 renorm_mask1, renorm_mask2; - renorm_mask1=_mm512_cmplt_epu32_mask(R1, _mm512_set1_epi32(RANS_BYTE_L)); - renorm_mask2=_mm512_cmplt_epu32_mask(R2, _mm512_set1_epi32(RANS_BYTE_L)); - // advance by however many words we actually read - sp += _mm_popcnt_u32(renorm_mask1); - __m512i renorm_words2 = _mm512_cvtepu16_epi32(_mm256_loadu_si256( - (const __m256i *)sp)); - - // select masked only - __m512i renorm_vals1, renorm_vals2; - renorm_vals1 = _mm512_maskz_expand_epi32(renorm_mask1, renorm_words1); - renorm_vals2 = _mm512_maskz_expand_epi32(renorm_mask2, renorm_words2); - // shift & add selected words - R1 = _mm512_mask_slli_epi32(R1, renorm_mask1, R1, 16); - R2 = _mm512_mask_slli_epi32(R2, renorm_mask2, R2, 16); - R1 = _mm512_add_epi32(R1, renorm_vals1); - R2 = _mm512_add_epi32(R2, renorm_vals2); - - // For start of next loop iteration. This has been moved here - // (and duplicated to before the loop starts) so we can do something - // with the latency period of gather, such as finishing up the - // renorm offset and writing the results. - __m512i S1_ = S1; // temporary copy for use in out[]=S later - __m512i S2_ = S2; - - masked1 = _mm512_and_epi32(R1, maskv); - masked2 = _mm512_and_epi32(R2, maskv); - // Gather is slow bit (half total time) - 30 cycle latency. - S1 = _mm512_i32gather_epi32(masked1, (int *)s3, sizeof(*s3)); - S2 = _mm512_i32gather_epi32(masked2, (int *)s3, sizeof(*s3)); - - // advance by however many words we actually read - sp += _mm_popcnt_u32(renorm_mask2); - - //out[i+z] = S; - _mm_storeu_si128((__m128i *)(out+i), _mm512_cvtepi32_epi8(S1_)); - _mm_storeu_si128((__m128i *)(out+i+16), _mm512_cvtepi32_epi8(S2_)); - } - - _mm512_store_epi32(&Rv[ 0], R1); - _mm512_store_epi32(&Rv[16], R2); - - for (z = out_sz & (32-1); z-- > 0; ) - out[out_end + z] = s3[Rv[z] & mask]; - - return out; - - err: - free(out_free); - return NULL; -} - -#define TBUF8 -#ifdef TBUF8 -// 15% quicker overall O1 decode now due to rot32_simd below. - -// NB: This uses AVX2 though and we could rewrite using AVX512 for -// further speed gains. -static inline void transpose_and_copy(uint8_t *out, int iN[32], - uint8_t t[32][32]) { -// int z; -// for (z = 0; z < 32; z++) { -// int k; -// for (k = 0; k < 32; k++) -// out[iN[z]+k] = t[k][z]; -// iN[z] += 32; -// } - - rot32_simd(t, out, iN); -} - -#else -// Implemented using AVX512 gathers. -// This is faster than a naive scalar implementation, but doesn't beat the -// AVX2 vectorised 32x32 transpose function. -static inline void transpose_and_copy_avx512(uint8_t *out, int iN[32], - uint32_t t32[32][32]) { - int z; -// for (z = 0; z < 32; z++) { -// int k; -// for (k = 0; k < 32; k++) -// out[iN[z]+k] = t32[k][z]; -// iN[z] += 32; -// } - - - __m512i v1 = _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, - 7, 6, 5, 4, 3, 2, 1, 0); - v1 = _mm512_slli_epi32(v1, 5); - - for (z = 0; z < 32; z++) { - __m512i t1 = _mm512_i32gather_epi32(v1, &t32[ 0][z], 4); - __m512i t2 = _mm512_i32gather_epi32(v1, &t32[16][z], 4); - _mm_storeu_si128((__m128i*)(&out[iN[z] ]), _mm512_cvtepi32_epi8(t1)); - _mm_storeu_si128((__m128i*)(&out[iN[z]+16]), _mm512_cvtepi32_epi8(t2)); - iN[z] += 32; - } -} -#endif // TBUF - -unsigned char *rans_compress_O1_32x16_avx512(unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int *out_size) { - unsigned char *cp, *out_end, *out_free = NULL; - unsigned int tab_size; - uint32_t bound = rans_compress_bound_4x16(in_size,1)-20; - int z; - RansState ransN[32] __attribute__((aligned(64))); - - if (in_size < 32) // force O0 instead - return NULL; - - if (!out) { - *out_size = bound; - out = malloc(*out_size); - } - if (!out || bound > *out_size) - return NULL; - - if (((size_t)out)&1) - bound--; - out_end = out + bound; - - RansEncSymbol (*syms)[256] = htscodecs_tls_alloc(256 * (sizeof(*syms))); - if (!syms) { - free(out_free); - return NULL; - } - - cp = out; - int shift = encode_freq1(in, in_size, 32, syms, &cp); - if (shift < 0) { - free(out_free); - htscodecs_tls_free(syms); - return NULL; - } - tab_size = cp - out; - - for (z = 0; z < 32; z++) - RansEncInit(&ransN[z]); - - uint8_t* ptr = out_end; - - int iN[32], isz4 = in_size/32; - for (z = 0; z < 32; z++) - iN[z] = (z+1)*isz4-2; - - uint32_t lN[32] __attribute__((aligned(64))); - for (z = 0; z < 32; z++) - lN[z] = in[iN[z]+1]; - - // Deal with the remainder - z = 32-1; - lN[z] = in[in_size-1]; - for (iN[z] = in_size-2; iN[z] > 32*isz4-2; iN[z]--) { - unsigned char c = in[iN[z]]; - RansEncPutSymbol(&ransN[z], &ptr, &syms[c][lN[z]]); - lN[z] = c; - } - - LOAD512(Rv, ransN); - - uint16_t *ptr16 = (uint16_t *)ptr; - __m512i last2 = _mm512_set_epi32(lN[31], lN[30], lN[29], lN[28], - lN[27], lN[26], lN[25], lN[24], - lN[23], lN[22], lN[21], lN[20], - lN[19], lN[18], lN[17], lN[16]); - __m512i last1 = _mm512_set_epi32(lN[15], lN[14], lN[13], lN[12], - lN[11], lN[10], lN[ 9], lN[ 8], - lN[ 7], lN[ 6], lN[ 5], lN[ 4], - lN[ 3], lN[ 2], lN[ 1], lN[ 0]); - - __m512i iN2 = _mm512_set_epi32(iN[31], iN[30], iN[29], iN[28], - iN[27], iN[26], iN[25], iN[24], - iN[23], iN[22], iN[21], iN[20], - iN[19], iN[18], iN[17], iN[16]); - __m512i iN1 = _mm512_set_epi32(iN[15], iN[14], iN[13], iN[12], - iN[11], iN[10], iN[ 9], iN[ 8], - iN[ 7], iN[ 6], iN[ 5], iN[ 4], - iN[ 3], iN[ 2], iN[ 1], iN[ 0]); - - __m512i c1 = _mm512_i32gather_epi32(iN1, in, 1); - __m512i c2 = _mm512_i32gather_epi32(iN2, in, 1); - - for (; iN[0] >= 0; iN[0]--) { - // Note, consider doing the same approach for the AVX2 encoder. - // Maybe we can also get gather working well there? - // Gather here is still a major latency bottleneck, consuming - // around 40% of CPU cycles overall. - - // FIXME: maybe we need to cope with in[31] read over-flow - // on loop cycles 0, 1, 2 where gather reads 32-bits instead of - // 8 bits. Use set instead there on c2? - c1 = _mm512_and_si512(c1, _mm512_set1_epi32(0xff)); - c2 = _mm512_and_si512(c2, _mm512_set1_epi32(0xff)); - - // index into syms[0][0] array, used for x_max, rcp_freq, and bias - __m512i vidx1 = _mm512_slli_epi32(c1, 8); - __m512i vidx2 = _mm512_slli_epi32(c2, 8); - vidx1 = _mm512_add_epi32(vidx1, last1); - vidx2 = _mm512_add_epi32(vidx2, last2); - vidx1 = _mm512_slli_epi32(vidx1, 2); - vidx2 = _mm512_slli_epi32(vidx2, 2); - - // ------------------------------------------------------------ - // for (z = NX-1; z >= 0; z--) { - // if (ransN[z] >= x_max[z]) { - // *--ptr16 = ransN[z] & 0xffff; - // ransN[z] >>= 16; - // } - // } - -#define SET512x(a,x) \ - __m512i a##1 = _mm512_i32gather_epi32(vidx1, &syms[0][0].x, 4); \ - __m512i a##2 = _mm512_i32gather_epi32(vidx2, &syms[0][0].x, 4) - - // Start of next loop, moved here to remove latency. - // last[z] = c[z] - // iN[z]-- - // c[z] = in[iN[z]] - last1 = c1; - last2 = c2; - iN1 = _mm512_sub_epi32(iN1, _mm512_set1_epi32(1)); - iN2 = _mm512_sub_epi32(iN2, _mm512_set1_epi32(1)); - c1 = _mm512_i32gather_epi32(iN1, in, 1); - c2 = _mm512_i32gather_epi32(iN2, in, 1); - - SET512x(xmax, x_max); // high latency - - uint16_t gt_mask1 = _mm512_cmpgt_epi32_mask(Rv1, xmax1); - int pc1 = _mm_popcnt_u32(gt_mask1); - __m512i Rp1 = _mm512_and_si512(Rv1, _mm512_set1_epi32(0xffff)); - __m512i Rp2 = _mm512_and_si512(Rv2, _mm512_set1_epi32(0xffff)); - uint16_t gt_mask2 = _mm512_cmpgt_epi32_mask(Rv2, xmax2); - SET512x(SDv, cmpl_freq); // good - int pc2 = _mm_popcnt_u32(gt_mask2); - - Rp1 = _mm512_maskz_compress_epi32(gt_mask1, Rp1); - Rp2 = _mm512_maskz_compress_epi32(gt_mask2, Rp2); - - _mm512_mask_cvtepi32_storeu_epi16(ptr16-pc2, (1<> rcp_shift[z]); - // ransN[z] = ransN[z] + bias[z] + q * cmpl_freq[z]; - SET512x(rfv, rcp_freq); // good-ish - - __m512i rf1_hm = _mm512_mul_epu32(_mm512_srli_epi64(Rv1, 32), - _mm512_srli_epi64(rfv1, 32)); - __m512i rf2_hm = _mm512_mul_epu32(_mm512_srli_epi64(Rv2, 32), - _mm512_srli_epi64(rfv2, 32)); - __m512i rf1_lm = _mm512_srli_epi64(_mm512_mul_epu32(Rv1, rfv1), 32); - __m512i rf2_lm = _mm512_srli_epi64(_mm512_mul_epu32(Rv2, rfv2), 32); - - const __m512i top32 = _mm512_set1_epi64((uint64_t)0xffffffff00000000); - rf1_hm = _mm512_and_epi32(rf1_hm, top32); - rf2_hm = _mm512_and_epi32(rf2_hm, top32); - rfv1 = _mm512_or_epi32(rf1_lm, rf1_hm); - rfv2 = _mm512_or_epi32(rf2_lm, rf2_hm); - - SET512x(biasv, bias); // good - __m512i shiftv1 = _mm512_srli_epi32(SDv1, 16); - __m512i shiftv2 = _mm512_srli_epi32(SDv2, 16); - - shiftv1 = _mm512_sub_epi32(shiftv1, _mm512_set1_epi32(32)); - shiftv2 = _mm512_sub_epi32(shiftv2, _mm512_set1_epi32(32)); - - __m512i qv1 = _mm512_srlv_epi32(rfv1, shiftv1); - __m512i qv2 = _mm512_srlv_epi32(rfv2, shiftv2); - - const __m512i bot16 = _mm512_set1_epi32(0xffff); - qv1 = _mm512_mullo_epi32(qv1, _mm512_and_si512(SDv1, bot16)); - qv2 = _mm512_mullo_epi32(qv2, _mm512_and_si512(SDv2, bot16)); - - qv1 = _mm512_add_epi32(qv1, biasv1); - Rv1 = _mm512_add_epi32(Rv1, qv1); - - qv2 = _mm512_add_epi32(qv2, biasv2); - Rv2 = _mm512_add_epi32(Rv2, qv2); - } - - STORE512(Rv, ransN); - STORE512(last, lN); - - ptr = (uint8_t *)ptr16; - - for (z = 32-1; z>=0; z--) - RansEncPutSymbol(&ransN[z], &ptr, &syms[0][lN[z]]); - - for (z = 32-1; z >= 0; z--) - RansEncFlush(&ransN[z], &ptr); - - // Finalise block size and return it - *out_size = (out_end - ptr) + tab_size; - -// cp = out; -// *cp++ = (in_size>> 0) & 0xff; -// *cp++ = (in_size>> 8) & 0xff; -// *cp++ = (in_size>>16) & 0xff; -// *cp++ = (in_size>>24) & 0xff; - - memmove(out + tab_size, ptr, out_end-ptr); - - htscodecs_tls_free(syms); - return out; -} - -#define NX 32 -unsigned char *rans_uncompress_O1_32x16_avx512(unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int out_sz) { - if (in_size < NX*4) // 4-states at least - return NULL; - - if (out_sz >= INT_MAX) - return NULL; // protect against some overflow cases - - /* Load in the static tables */ - unsigned char *cp = in, *cp_end = in+in_size, *out_free = NULL; - unsigned char *c_freq = NULL; - - uint32_t (*s3)[TOTFREQ_O1] = htscodecs_tls_alloc(256*TOTFREQ_O1*4); - if (!s3) - return NULL; - uint32_t (*s3F)[TOTFREQ_O1_FAST] = (uint32_t (*)[TOTFREQ_O1_FAST])s3; - - if (!out) - out_free = out = malloc(out_sz); - - if (!out) - goto err; - - //fprintf(stderr, "out_sz=%d\n", out_sz); - - // compressed header? If so uncompress it - unsigned char *tab_end = NULL; - unsigned char *c_freq_end = cp_end; - unsigned int shift = *cp >> 4; - if (*cp++ & 1) { - uint32_t u_freq_sz, c_freq_sz; - cp += var_get_u32(cp, cp_end, &u_freq_sz); - cp += var_get_u32(cp, cp_end, &c_freq_sz); - if (c_freq_sz > cp_end - cp) - goto err; - tab_end = cp + c_freq_sz; - if (!(c_freq = rans_uncompress_O0_4x16(cp, c_freq_sz, NULL, - u_freq_sz))) - goto err; - cp = c_freq; - c_freq_end = c_freq + u_freq_sz; - } - - // Decode order-0 symbol list; avoids needing in order-1 tables - cp += decode_freq1(cp, c_freq_end, shift, s3, s3F, NULL, NULL); - - if (tab_end) - cp = tab_end; - free(c_freq); - c_freq = NULL; - - if (cp_end - cp < NX * 4) - goto err; - - RansState R[NX] __attribute__((aligned(64))); - uint8_t *ptr = cp, *ptr_end = in + in_size; - int z; - for (z = 0; z < NX; z++) { - RansDecInit(&R[z], &ptr); - if (R[z] < RANS_BYTE_L) - goto err; - } - - int isz4 = out_sz/NX; - int iN[NX], lN[NX] __attribute__((aligned(64))) = {0}; - for (z = 0; z < NX; z++) - iN[z] = z*isz4; - - uint16_t *sp = (uint16_t *)ptr; - const uint32_t mask = (1u << shift)-1; - - __m512i _maskv = _mm512_set1_epi32(mask); - LOAD512(_Rv, R); - LOAD512(_Lv, lN); - -#ifdef TBUF8 - union { - unsigned char tbuf[32][32]; - uint64_t tbuf64[32][4]; - } u __attribute__((aligned(32))); -#else - uint32_t tbuf[32][32]; -#endif - - unsigned int tidx = 0; - - if (shift == TF_SHIFT_O1) { - isz4 -= 64; - for (; iN[0] < isz4 && (uint8_t *)sp+64 < ptr_end; ) { - // m[z] = R[z] & mask; - __m512i _masked1 = _mm512_and_si512(_Rv1, _maskv); - __m512i _masked2 = _mm512_and_si512(_Rv2, _maskv); - - // S[z] = s3[lN[z]][m[z]]; - _Lv1 = _mm512_slli_epi32(_Lv1, TF_SHIFT_O1); - _Lv2 = _mm512_slli_epi32(_Lv2, TF_SHIFT_O1); - - _masked1 = _mm512_add_epi32(_masked1, _Lv1); - _masked2 = _mm512_add_epi32(_masked2, _Lv2); - - // This is the biggest bottleneck - __m512i _Sv1 = _mm512_i32gather_epi32(_masked1, (int *)&s3F[0][0], - sizeof(s3F[0][0])); - __m512i _Sv2 = _mm512_i32gather_epi32(_masked2, (int *)&s3F[0][0], - sizeof(s3F[0][0])); - - // f[z] = S[z]>>(TF_SHIFT_O1+8); - __m512i _fv1 = _mm512_srli_epi32(_Sv1, TF_SHIFT_O1+8); - __m512i _fv2 = _mm512_srli_epi32(_Sv2, TF_SHIFT_O1+8); - - // b[z] = (S[z]>>8) & mask; - __m512i _bv1 = _mm512_and_si512(_mm512_srli_epi32(_Sv1,8), _maskv); - __m512i _bv2 = _mm512_and_si512(_mm512_srli_epi32(_Sv2,8), _maskv); - - // s[z] = S[z] & 0xff; - __m512i _sv1 = _mm512_and_si512(_Sv1, _mm512_set1_epi32(0xff)); - __m512i _sv2 = _mm512_and_si512(_Sv2, _mm512_set1_epi32(0xff)); - - // A maximum frequency of 4096 doesn't fit in our s3 array. - // as it's 12 bit + 12 bit + 8 bit. It wraps around to zero. - // (We don't have this issue for TOTFREQ_O1_FAST.) - // - // Solution 1 is to change to spec to forbid freq of 4096. - // Easy hack is to add an extra symbol so it sums correctly. - // => 572 MB/s on q40 (deskpro). - // - // Solution 2 implemented here is to look for the wrap around - // and fix it. - // => 556 MB/s on q40 - // cope with max freq of 4096. Only 3% hit - __m512i max_freq = _mm512_set1_epi32(TOTFREQ_O1); - __m512i zero = _mm512_setzero_si512(); - __mmask16 cmp1 = _mm512_cmpeq_epi32_mask(_fv1, zero); - __mmask16 cmp2 = _mm512_cmpeq_epi32_mask(_fv2, zero); - _fv1 = _mm512_mask_blend_epi32(cmp1, _fv1, max_freq); - _fv2 = _mm512_mask_blend_epi32(cmp2, _fv2, max_freq); - - // R[z] = f[z] * (R[z] >> TF_SHIFT_O1) + b[z]; - _Rv1 = _mm512_add_epi32( - _mm512_mullo_epi32( - _mm512_srli_epi32(_Rv1,TF_SHIFT_O1), _fv1), _bv1); - _Rv2 = _mm512_add_epi32( - _mm512_mullo_epi32( - _mm512_srli_epi32(_Rv2,TF_SHIFT_O1), _fv2), _bv2); - - //for (z = 0; z < NX; z++) lN[z] = c[z]; - _Lv1 = _sv1; - _Lv2 = _sv2; - - // RansDecRenorm(&R[z], &ptr); - __m512i _renorm_mask1 = _mm512_xor_si512(_Rv1, - _mm512_set1_epi32(0x80000000)); - __m512i _renorm_mask2 = _mm512_xor_si512(_Rv2, - _mm512_set1_epi32(0x80000000)); - - int _imask1 =_mm512_cmpgt_epi32_mask - (_mm512_set1_epi32(RANS_BYTE_L-0x80000000), _renorm_mask1); - int _imask2 = _mm512_cmpgt_epi32_mask - (_mm512_set1_epi32(RANS_BYTE_L-0x80000000), _renorm_mask2); - - __m512i renorm_words1 = _mm512_cvtepu16_epi32 - (_mm256_loadu_si256((const __m256i *)sp)); - sp += _mm_popcnt_u32(_imask1); - - __m512i renorm_words2 = _mm512_cvtepu16_epi32 - (_mm256_loadu_si256((const __m256i *)sp)); - sp += _mm_popcnt_u32(_imask2); - - __m512i _renorm_vals1 = - _mm512_maskz_expand_epi32(_imask1, renorm_words1); - __m512i _renorm_vals2 = - _mm512_maskz_expand_epi32(_imask2, renorm_words2); - - _Rv1 = _mm512_mask_slli_epi32(_Rv1, _imask1, _Rv1, 16); - _Rv2 = _mm512_mask_slli_epi32(_Rv2, _imask2, _Rv2, 16); - - _Rv1 = _mm512_add_epi32(_Rv1, _renorm_vals1); - _Rv2 = _mm512_add_epi32(_Rv2, _renorm_vals2); - -#ifdef TBUF8 - _mm_storeu_si128((__m128i *)(&u.tbuf64[tidx][0]), - _mm512_cvtepi32_epi8(_Sv1)); // or _sv1? - _mm_storeu_si128((__m128i *)(&u.tbuf64[tidx][2]), - _mm512_cvtepi32_epi8(_Sv2)); -#else - _mm512_storeu_si512((__m512i *)(&tbuf[tidx][ 0]), _sv1); - _mm512_storeu_si512((__m512i *)(&tbuf[tidx][16]), _sv2); -#endif - - iN[0]++; - if (++tidx == 32) { - iN[0]-=32; - - // We have tidx[x][y] which we want to store in - // memory in out[y][z] instead. This is an unrolled - // transposition. -#ifdef TBUF8 - transpose_and_copy(out, iN, u.tbuf); -#else - transpose_and_copy_avx512(out, iN, tbuf); -#endif - tidx = 0; - } - } - isz4 += 64; - - STORE512(_Rv, R); - STORE512(_Lv, lN); - ptr = (uint8_t *)sp; - - if (1) { - iN[0]-=tidx; - int T; - for (z = 0; z < NX; z++) - for (T = 0; T < tidx; T++) -#ifdef TBUF8 - out[iN[z]++] = u.tbuf[T][z]; -#else - out[iN[z]++] = tbuf[T][z]; -#endif - } - - // Scalar version for close to the end of in[] array so we don't - // do SIMD loads beyond the end of the buffer - for (; iN[0] < isz4;) { - for (z = 0; z < NX; z++) { - uint32_t m = R[z] & ((1u<>(TF_SHIFT_O1+8); - R[z] = (F?F:4096) * (R[z]>>TF_SHIFT_O1) + - ((S>>8) & ((1u<>(TF_SHIFT_O1+8); - R[z] = (F?F:4096) * (R[z]>>TF_SHIFT_O1) + - ((S>>8) & ((1u<>(TF_SHIFT_O1+8); - __m512i _fv1 = _mm512_srli_epi32(_Sv1, TF_SHIFT_O1_FAST+8); - __m512i _fv2 = _mm512_srli_epi32(_Sv2, TF_SHIFT_O1_FAST+8); - - // b[z] = (S[z]>>8) & mask; - __m512i _bv1 = _mm512_and_si512(_mm512_srli_epi32(_Sv1,8), _maskv); - __m512i _bv2 = _mm512_and_si512(_mm512_srli_epi32(_Sv2,8), _maskv); - - // s[z] = S[z] & 0xff; - __m512i _sv1 = _mm512_and_si512(_Sv1, _mm512_set1_epi32(0xff)); - __m512i _sv2 = _mm512_and_si512(_Sv2, _mm512_set1_epi32(0xff)); - - // R[z] = f[z] * (R[z] >> TF_SHIFT_O1) + b[z]; - _Rv1 = _mm512_add_epi32( - _mm512_mullo_epi32( - _mm512_srli_epi32(_Rv1,TF_SHIFT_O1_FAST), - _fv1), _bv1); - _Rv2 = _mm512_add_epi32( - _mm512_mullo_epi32( - _mm512_srli_epi32(_Rv2,TF_SHIFT_O1_FAST), - _fv2), _bv2); - - //for (z = 0; z < NX; z++) lN[z] = c[z]; - _Lv1 = _sv1; - _Lv2 = _sv2; - - // RansDecRenorm(&R[z], &ptr); - __m512i _renorm_mask1 = _mm512_xor_si512(_Rv1, - _mm512_set1_epi32(0x80000000)); - __m512i _renorm_mask2 = _mm512_xor_si512(_Rv2, - _mm512_set1_epi32(0x80000000)); - - int _imask1 =_mm512_cmpgt_epi32_mask - (_mm512_set1_epi32(RANS_BYTE_L-0x80000000), _renorm_mask1); - int _imask2 = _mm512_cmpgt_epi32_mask - (_mm512_set1_epi32(RANS_BYTE_L-0x80000000), _renorm_mask2); - - __m512i renorm_words1 = _mm512_cvtepu16_epi32 - (_mm256_loadu_si256((const __m256i *)sp)); - sp += _mm_popcnt_u32(_imask1); - - __m512i renorm_words2 = _mm512_cvtepu16_epi32 - (_mm256_loadu_si256((const __m256i *)sp)); - sp += _mm_popcnt_u32(_imask2); - - __m512i _renorm_vals1 = - _mm512_maskz_expand_epi32(_imask1, renorm_words1); - __m512i _renorm_vals2 = - _mm512_maskz_expand_epi32(_imask2, renorm_words2); - - _Rv1 = _mm512_mask_slli_epi32(_Rv1, _imask1, _Rv1, 16); - _Rv2 = _mm512_mask_slli_epi32(_Rv2, _imask2, _Rv2, 16); - - _Rv1 = _mm512_add_epi32(_Rv1, _renorm_vals1); - _Rv2 = _mm512_add_epi32(_Rv2, _renorm_vals2); - -#ifdef TBUF8 - _mm_storeu_si128((__m128i *)(&u.tbuf64[tidx][0]), - _mm512_cvtepi32_epi8(_Sv1)); // or _sv1? - _mm_storeu_si128((__m128i *)(&u.tbuf64[tidx][2]), - _mm512_cvtepi32_epi8(_Sv2)); -#else - _mm512_storeu_si512((__m512i *)(&tbuf[tidx][ 0]), _sv1); - _mm512_storeu_si512((__m512i *)(&tbuf[tidx][16]), _sv2); -#endif - - iN[0]++; - if (++tidx == 32) { - iN[0]-=32; -#ifdef TBUF8 - transpose_and_copy(out, iN, u.tbuf); -#else - transpose_and_copy_avx512(out, iN, tbuf); -#endif - tidx = 0; - } - } - isz4 += 64; - - STORE512(_Rv, R); - STORE512(_Lv, lN); - ptr = (uint8_t *)sp; - - if (1) { - iN[0]-=tidx; - int T; - for (z = 0; z < NX; z++) - for (T = 0; T < tidx; T++) -#ifdef TBUF8 - out[iN[z]++] = u.tbuf[T][z]; -#else - out[iN[z]++] = tbuf[T][z]; -#endif - } - - // Scalar version for close to the end of in[] array so we don't - // do SIMD loads beyond the end of the buffer - for (; iN[0] < isz4;) { - for (z = 0; z < NX; z++) { - uint32_t m = R[z] & ((1u<>(TF_SHIFT_O1_FAST+8)) * (R[z]>>TF_SHIFT_O1_FAST) + - ((S>>8) & ((1u<>(TF_SHIFT_O1_FAST+8)) * (R[z]>>TF_SHIFT_O1_FAST) + - ((S>>8) & ((1u< -#include -#include -#include -#include -#include - -#include "rANS_word.h" -#include "rANS_static4x16.h" -#include "rANS_static16_int.h" -#include "varint.h" -#include "utils.h" - -/* Uses: SSE, SSE2, SSSE3, SSE4.1 and POPCNT -SSE: -_mm_movemask_ps - -SSE2: - _mm_load_si128 _mm_store_si128 - _mm_set_epi32 _mm_set1_epi32 - _mm_and_si128 _mm_or_si128 - _mm_srli_epi32 _mm_slli_epi32 _mm_srli_epi64 _mm_set1_epi64x - _mm_add_epi32 - _mm_packus_epi32 - _mm_andnot_si128 - _mm_cmpeq_epi32 - _mm_mul_epu32 - _mm_shuffle_epi32 - -SSSE3: - _mm_shuffle_epi8 - -SSE4.1: - _mm_mullo_epi32 - _mm_packus_epi32 - _mm_max_epu32 - _mm_cvtepu16_epi32 - _mm_blendv_epi8 - -POPCNT: - _mm_popcnt_u32 - */ - - -#define NX 32 - -#define LOAD128(a,b) \ - __m128i a##1 = _mm_load_si128((__m128i *)&b[0]); \ - __m128i a##2 = _mm_load_si128((__m128i *)&b[4]); \ - __m128i a##3 = _mm_load_si128((__m128i *)&b[8]); \ - __m128i a##4 = _mm_load_si128((__m128i *)&b[12]); \ - __m128i a##5 = _mm_load_si128((__m128i *)&b[16]); \ - __m128i a##6 = _mm_load_si128((__m128i *)&b[20]); \ - __m128i a##7 = _mm_load_si128((__m128i *)&b[24]); \ - __m128i a##8 = _mm_load_si128((__m128i *)&b[28]); - -#define STORE128(a,b) \ - _mm_store_si128((__m128i *)&b[ 0], a##1); \ - _mm_store_si128((__m128i *)&b[ 4], a##2); \ - _mm_store_si128((__m128i *)&b[ 8], a##3); \ - _mm_store_si128((__m128i *)&b[12], a##4); \ - _mm_store_si128((__m128i *)&b[16], a##5); \ - _mm_store_si128((__m128i *)&b[20], a##6); \ - _mm_store_si128((__m128i *)&b[24], a##7); \ - _mm_store_si128((__m128i *)&b[28], a##8); - -static inline __m128i _mm_i32gather_epi32x(int *b, __m128i idx, int size) { - int c[4] __attribute__((aligned(32))); - _mm_store_si128((__m128i *)c, idx); - return _mm_set_epi32(b[c[3]], b[c[2]], b[c[1]], b[c[0]]); -} - -// SSE4 implementation of the Order-0 encoder is poorly performing. -// Disabled for now. -#if 0 -#define LOAD128v(a,b) \ - __m128i a[8]; \ - a[0] = _mm_load_si128((__m128i *)&b[0]); \ - a[1] = _mm_load_si128((__m128i *)&b[4]); \ - a[2] = _mm_load_si128((__m128i *)&b[8]); \ - a[3] = _mm_load_si128((__m128i *)&b[12]); \ - a[4] = _mm_load_si128((__m128i *)&b[16]); \ - a[5] = _mm_load_si128((__m128i *)&b[20]); \ - a[6] = _mm_load_si128((__m128i *)&b[24]); \ - a[7] = _mm_load_si128((__m128i *)&b[28]); - -#define STORE128v(a,b) \ - _mm_store_si128((__m128i *)&b[ 0], a[0]); \ - _mm_store_si128((__m128i *)&b[ 4], a[1]); \ - _mm_store_si128((__m128i *)&b[ 8], a[2]); \ - _mm_store_si128((__m128i *)&b[12], a[3]); \ - _mm_store_si128((__m128i *)&b[16], a[4]); \ - _mm_store_si128((__m128i *)&b[20], a[5]); \ - _mm_store_si128((__m128i *)&b[24], a[6]); \ - _mm_store_si128((__m128i *)&b[28], a[7]); - -static inline __m128i _mm_mulhi_epu32(__m128i a, __m128i b) { - // Multiply bottom 4 items and top 4 items together. - __m128i ab_hm = _mm_mul_epu32(_mm_srli_epi64(a, 32),_mm_srli_epi64(b, 32)); - __m128i ab_lm = _mm_srli_epi64(_mm_mul_epu32(a, b), 32); - - // Blend or and/or seems to make no difference. - return _mm_blend_epi16(ab_lm, ab_hm, 0xcc); - -// // Shift to get hi 32-bit of each 64-bit product -// ab_hm = _mm_and_si128(ab_hm,_mm_set1_epi64x((uint64_t)0xffffffff00000000)); -// -// return _mm_or_si128(ab_lm, ab_hm); -} - -// Shift A>>B for non-constant B exists in AVX2, but not SSE world. -// We simulate this for now by store, shift, and load. Ugly! -static inline __m128i _mm_srlv_epi32x(__m128i a, __m128i b) { -// Extract and inline shift. Slowest clang, joint fastest gcc -// return _mm_set_epi32(_mm_extract_epi32(a,3)>>_mm_extract_epi32(b,3), -// _mm_extract_epi32(a,2)>>_mm_extract_epi32(b,2), -// _mm_extract_epi32(a,1)>>_mm_extract_epi32(b,1), -// _mm_extract_epi32(a,0)>>_mm_extract_epi32(b,0)); - -// Half store and inline shift; Fastest gcc, comparable to others below clang -// uint32_t A[4]; -// _mm_storeu_si128((__m128i *)&A, a); -// -// return _mm_set_epi32(A[3]>>_mm_extract_epi32(b,3), -// A[2]>>_mm_extract_epi32(b,2), -// A[1]>>_mm_extract_epi32(b,1), -// A[0]>>_mm_extract_epi32(b,0)); - -// Other half - uint32_t B[4]; - _mm_storeu_si128((__m128i *)&B, b); - return _mm_set_epi32(_mm_extract_epi32(a,3)>>B[3], - _mm_extract_epi32(a,2)>>B[2], - _mm_extract_epi32(a,1)>>B[1], - _mm_extract_epi32(a,0)>>B[0]); - -// Check if all b[] match, and constant shift if so. -// Too costly, even on q4 where it's common for all shift to be identical. -// __m128i cmp = _mm_cmpeq_epi32(b, _mm_shuffle_epi32(b, 0x39)); -// if (_mm_movemask_ps((__m128)cmp) == 15) { -// return _mm_srl_epi32(a, _mm_set1_epi64x(_mm_extract_epi32(b,0))); -// //_mm_storeu_si128((__m128i *)&B,_mm_set1_epi32(_mm_extract_epi32(b,0))); -// } else { -// uint32_t B[4]; -// _mm_storeu_si128((__m128i *)&B, b); -// return _mm_set_epi32(_mm_extract_epi32(a,3)>>B[3], -// _mm_extract_epi32(a,2)>>B[2], -// _mm_extract_epi32(a,1)>>B[1], -// _mm_extract_epi32(a,0)>>B[0]); -// } - - -// Full store and inline shift -// uint32_t A[4], B[4] __attribute__((aligned(16))); -// _mm_storeu_si128((__m128i *)&A, a); -// _mm_storeu_si128((__m128i *)&B, b); -// -// return _mm_set_epi32(A[3]>>B[3], A[2]>>B[2], A[1]>>B[1], A[0]>>B[0]); - -// Full store, shift and load -// uint32_t A[4], B[4] __attribute__((aligned(16))); -// _mm_storeu_si128((__m128i *)&A, a); -// _mm_storeu_si128((__m128i *)&B, b); -// A[0]>>=B[0]; -// A[1]>>=B[1]; -// A[2]>>=B[2]; -// A[3]>>=B[3]; -// return _mm_loadu_si128((__m128i *)A); -} - -unsigned char *rans_compress_O0_32x16_sse4(unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int *out_size) { - unsigned char *cp, *out_end; - RansEncSymbol syms[256]; - RansState ransN[NX]; - uint8_t* ptr; - uint32_t F[256+MAGIC] = {0}; - int i, j, tab_size = 0, x, z; - // -20 for order/size/meta - uint32_t bound = rans_compress_bound_4x16(in_size,0)-20; - - if (!out) { - *out_size = bound; - out = malloc(*out_size); - } - if (!out || bound > *out_size) - return NULL; - - // If "out" isn't word aligned, tweak out_end/ptr to ensure it is. - // We already added more round in bound to allow for this. - if (((size_t)out)&1) - bound--; - ptr = out_end = out + bound; - - if (in_size == 0) - goto empty; - - // Compute statistics - if (hist8(in, in_size, F) < 0) - return NULL; - - // Normalise so frequences sum to power of 2 - uint32_t fsum = in_size; - uint32_t max_val = round2(fsum); - if (max_val > TOTFREQ) - max_val = TOTFREQ; - - if (normalise_freq(F, fsum, max_val) < 0) - return NULL; - fsum=max_val; - - cp = out; - cp += encode_freq(cp, F); - tab_size = cp-out; - //write(2, out+4, cp-(out+4)); - - if (normalise_freq(F, fsum, TOTFREQ) < 0) - return NULL; - - // Encode statistics. - for (x = j = 0; j < 256; j++) { - if (F[j]) { - RansEncSymbolInit(&syms[j], x, F[j], TF_SHIFT); - x += F[j]; - } - } - - for (z = 0; z < NX; z++) - RansEncInit(&ransN[z]); - - z = i = in_size&(NX-1); - while (z-- > 0) - RansEncPutSymbol(&ransN[z], &ptr, &syms[in[in_size-(i-z)]]); - - uint32_t SB[256], SA[256], SD[256], SC[256]; - - // Build lookup tables for SIMD encoding - uint16_t *ptr16 = (uint16_t *)ptr; - for (i = 0; i < 256; i++) { - SB[i] = syms[i].x_max; - SA[i] = syms[i].rcp_freq; - SD[i] = (syms[i].cmpl_freq<<0) | (syms[i].rcp_shift<<16); - SC[i] = syms[i].bias; - } - - LOAD128v(Rv, ransN); - - const __m128i shuf = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80, - 0x0d, 0x0c, 0x09, 0x08, - 0x05, 0x04, 0x01, 0x00); - - // FIXME: slower! - // q40: 340 (scalar) vs 300 (this) - // q4: 430 (scalar) vs 304 (this) - for (i=(in_size &~(NX-1)); i>0; i-=NX) { - uint8_t *c = &in[i-NX]; - - int h; - // Slightly better in 4x8 instead of 2x16 cycles with clang, - // but the reverse with gcc. - //for (h=24; h >= 0; h -= 8) { - for (h=16; h >= 0; h -= 16) { - int H = h/4; // rans index - uint8_t *C = &in[i-NX+h]; - -#define SET(i,a) _mm_set_epi32(a[C[i+3]],a[C[i+2]],a[C[i+1]],a[C[i+0]]) - __m128i xmax8 = SET(12, SB); - __m128i xmax7 = SET( 8, SB); - __m128i xmax6 = SET( 4, SB); - __m128i xmax5 = SET( 0, SB); - - __m128i cv8 = _mm_cmpgt_epi32(Rv[H+3], xmax8); - __m128i cv7 = _mm_cmpgt_epi32(Rv[H+2], xmax7); - __m128i cv6 = _mm_cmpgt_epi32(Rv[H+1], xmax6); - __m128i cv5 = _mm_cmpgt_epi32(Rv[H+0], xmax5); - - // Store bottom 16-bits at ptr16 - unsigned int imask8 = _mm_movemask_ps((__m128)cv8); - unsigned int imask7 = _mm_movemask_ps((__m128)cv7); - unsigned int imask6 = _mm_movemask_ps((__m128)cv6); - unsigned int imask5 = _mm_movemask_ps((__m128)cv5); - -#define X(A) 4*A,4*A+1,0x80,0x80 -#define _ 0x80,0x80,0x80,0x80 - uint8_t permutec[16][16] __attribute__((aligned(16))) = { - { _ , _ , _ , _ }, - { _ , _ , _ ,X(0)}, - { _ , _ , _ ,X(1)}, - { _ , _ ,X(0),X(1)}, - - { _ , _ , _ ,X(2)}, - { _ , _ ,X(0),X(2)}, - { _ , _ ,X(1),X(2)}, - { _ ,X(0),X(1),X(2)}, - - { _ , _ , _ ,X(3)}, - { _ , _ ,X(0),X(3)}, - { _ , _ ,X(1),X(3)}, - { _ ,X(0),X(1),X(3)}, - - { _ , _ ,X(2),X(3)}, - { _ ,X(0),X(2),X(3)}, - { _ ,X(1),X(2),X(3)}, - {X(0),X(1),X(2),X(3)}, - }; -#undef X -#undef _ - - __m128i idx8 = _mm_load_si128((__m128i *)permutec[imask8]); - __m128i idx7 = _mm_load_si128((__m128i *)permutec[imask7]); - __m128i idx6 = _mm_load_si128((__m128i *)permutec[imask6]); - __m128i idx5 = _mm_load_si128((__m128i *)permutec[imask5]); - - // Permute; to gather together the rans states that need flushing - __m128i V1, V2, V3, V4, V5, V6, V7, V8; - V8 = _mm_shuffle_epi8(_mm_and_si128(Rv[H+3], cv8), idx8); - V7 = _mm_shuffle_epi8(_mm_and_si128(Rv[H+2], cv7), idx7); - V6 = _mm_shuffle_epi8(_mm_and_si128(Rv[H+1], cv6), idx6); - V5 = _mm_shuffle_epi8(_mm_and_si128(Rv[H+0], cv5), idx5); - - // Shuffle alternating shorts together to collect low 16-bit - // elements together. ... 9 8 5 4 1 0. - // Or as with avx2 code use packus instead. - V8 = _mm_shuffle_epi8(V8, shuf); - V7 = _mm_shuffle_epi8(V7, shuf); - V6 = _mm_shuffle_epi8(V6, shuf); - V5 = _mm_shuffle_epi8(V5, shuf); - - _mm_storeu_si64(ptr16-4, V8); ptr16 -= _mm_popcnt_u32(imask8); - _mm_storeu_si64(ptr16-4, V7); ptr16 -= _mm_popcnt_u32(imask7); - _mm_storeu_si64(ptr16-4, V6); ptr16 -= _mm_popcnt_u32(imask6); - _mm_storeu_si64(ptr16-4, V5); ptr16 -= _mm_popcnt_u32(imask5); - - Rv[H+3] = _mm_blendv_epi8(Rv[H+3], _mm_srli_epi32(Rv[H+3], 16), cv8); - Rv[H+2] = _mm_blendv_epi8(Rv[H+2], _mm_srli_epi32(Rv[H+2], 16), cv7); - Rv[H+1] = _mm_blendv_epi8(Rv[H+1], _mm_srli_epi32(Rv[H+1], 16), cv6); - Rv[H+0] = _mm_blendv_epi8(Rv[H+0], _mm_srli_epi32(Rv[H+0], 16), cv5); - - // Cannot trivially replace the multiply as mulhi_epu32 doesn't - // exist (only mullo). - // However we can use _mm_mul_epu32 twice to get 64bit results - // (h our lanes) and shift/or to get the answer. - // - // (AVX512 allows us to hold it all in 64-bit lanes and use mullo_epi64 - // plus a shift. KNC has mulhi_epi32, but not sure if this is - // available.) - __m128i rfv8 = SET(12, SA); - __m128i rfv7 = SET( 8, SA); - __m128i rfv6 = SET( 4, SA); - __m128i rfv5 = SET( 0, SA); - - rfv8 = _mm_mulhi_epu32(Rv[H+3], rfv8); - rfv7 = _mm_mulhi_epu32(Rv[H+2], rfv7); - rfv6 = _mm_mulhi_epu32(Rv[H+1], rfv6); - rfv5 = _mm_mulhi_epu32(Rv[H+0], rfv5); - - __m128i SDv8 = SET(12, SD); - __m128i SDv7 = SET( 8, SD); - __m128i SDv6 = SET( 4, SD); - __m128i SDv5 = SET( 0, SD); - - __m128i shiftv8 = _mm_srli_epi32(SDv8, 16); - __m128i shiftv7 = _mm_srli_epi32(SDv7, 16); - __m128i shiftv6 = _mm_srli_epi32(SDv6, 16); - __m128i shiftv5 = _mm_srli_epi32(SDv5, 16); - - __m128i freqv8 = _mm_and_si128(SDv8, _mm_set1_epi32(0xffff)); - __m128i freqv7 = _mm_and_si128(SDv7, _mm_set1_epi32(0xffff)); - __m128i freqv6 = _mm_and_si128(SDv6, _mm_set1_epi32(0xffff)); - __m128i freqv5 = _mm_and_si128(SDv5, _mm_set1_epi32(0xffff)); - - // Bake this into the tabel to start with? - shiftv8 = _mm_sub_epi32(shiftv8, _mm_set1_epi32(32)); - shiftv7 = _mm_sub_epi32(shiftv7, _mm_set1_epi32(32)); - shiftv6 = _mm_sub_epi32(shiftv6, _mm_set1_epi32(32)); - shiftv5 = _mm_sub_epi32(shiftv5, _mm_set1_epi32(32)); - - // No way to shift by varying amounts. Store, shift, load? Simulated - __m128i qv8 = _mm_srlv_epi32x(rfv8, shiftv8); - __m128i qv7 = _mm_srlv_epi32x(rfv7, shiftv7); - __m128i qv6 = _mm_srlv_epi32x(rfv6, shiftv6); - __m128i qv5 = _mm_srlv_epi32x(rfv5, shiftv5); - - qv8 = _mm_mullo_epi32(qv8, freqv8); - qv7 = _mm_mullo_epi32(qv7, freqv7); - qv6 = _mm_mullo_epi32(qv6, freqv6); - qv5 = _mm_mullo_epi32(qv5, freqv5); - - qv8 = _mm_add_epi32(qv8, SET(12, SC)); - qv7 = _mm_add_epi32(qv7, SET( 8, SC)); - qv6 = _mm_add_epi32(qv6, SET( 4, SC)); - qv5 = _mm_add_epi32(qv5, SET( 0, SC)); - - Rv[H+3] = _mm_add_epi32(Rv[H+3], qv8); - Rv[H+2] = _mm_add_epi32(Rv[H+2], qv7); - Rv[H+1] = _mm_add_epi32(Rv[H+1], qv6); - Rv[H+0] = _mm_add_epi32(Rv[H+0], qv5); - } - } - - STORE128v(Rv, ransN); - - ptr = (uint8_t *)ptr16; - - for (z = NX-1; z >= 0; z--) - RansEncFlush(&ransN[z], &ptr); - - empty: - // Finalise block size and return it - *out_size = (out_end - ptr) + tab_size; - -// cp = out; -// *cp++ = (in_size>> 0) & 0xff; -// *cp++ = (in_size>> 8) & 0xff; -// *cp++ = (in_size>>16) & 0xff; -// *cp++ = (in_size>>24) & 0xff; - - memmove(out + tab_size, ptr, out_end-ptr); - - return out; -} -#endif // disable SSE4 encoder - -unsigned char *rans_uncompress_O0_32x16_sse4(unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int out_sz) { - if (in_size < 16) // 4-states at least - return NULL; - - if (out_sz >= INT_MAX) - return NULL; // protect against some overflow cases - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (out_sz > 100000) - return NULL; -#endif - - /* Load in the static tables */ - unsigned char *cp = in, *out_free = NULL; - unsigned char *cp_end = in + in_size; - int i; - uint32_t s3[TOTFREQ] __attribute__((aligned(32))); // For TF_SHIFT <= 12 - - if (!out) - out_free = out = malloc(out_sz); - if (!out) - return NULL; - - // Precompute reverse lookup of frequency. - uint32_t F[256] = {0}, fsum; - int fsz = decode_freq(cp, cp_end, F, &fsum); - if (!fsz) - goto err; - cp += fsz; - - normalise_freq_shift(F, fsum, TOTFREQ); - - // Build symbols; fixme, do as part of decode, see the _d variant - if (rans_F_to_s3(F, TF_SHIFT, s3)) - goto err; - - if (cp_end - cp < NX * 4) - goto err; - - int z; - RansState R[NX] __attribute__((aligned(32))); - for (z = 0; z < NX; z++) { - RansDecInit(&R[z], &cp); - if (R[z] < RANS_BYTE_L) - goto err; - } - - uint16_t *sp = (uint16_t *)cp; - - int out_end = (out_sz&~(NX-1)); - const uint32_t mask = (1u << TF_SHIFT)-1; - - __m128i maskv = _mm_set1_epi32(mask); // set mask in all lanes - LOAD128(Rv, R); - - uint8_t overflow[72+64] = {0}; - for (i=0; i < out_end; i+=NX) { - //for (z = 0; z < NX; z++) - // m[z] = R[z] & mask; - __m128i masked1 = _mm_and_si128(Rv1, maskv); - __m128i masked2 = _mm_and_si128(Rv2, maskv); - __m128i masked3 = _mm_and_si128(Rv3, maskv); - __m128i masked4 = _mm_and_si128(Rv4, maskv); - - // S[z] = s3[m[z]]; - __m128i Sv1 = _mm_i32gather_epi32x((int *)s3, masked1, sizeof(*s3)); - __m128i Sv2 = _mm_i32gather_epi32x((int *)s3, masked2, sizeof(*s3)); - __m128i Sv3 = _mm_i32gather_epi32x((int *)s3, masked3, sizeof(*s3)); - __m128i Sv4 = _mm_i32gather_epi32x((int *)s3, masked4, sizeof(*s3)); - - // f[z] = S[z]>>(TF_SHIFT+8); - __m128i fv1 = _mm_srli_epi32(Sv1, TF_SHIFT+8); - __m128i fv2 = _mm_srli_epi32(Sv2, TF_SHIFT+8); - __m128i fv3 = _mm_srli_epi32(Sv3, TF_SHIFT+8); - __m128i fv4 = _mm_srli_epi32(Sv4, TF_SHIFT+8); - - // b[z] = (S[z]>>8) & mask; - __m128i bv1 = _mm_and_si128(_mm_srli_epi32(Sv1, 8), maskv); - __m128i bv2 = _mm_and_si128(_mm_srli_epi32(Sv2, 8), maskv); - __m128i bv3 = _mm_and_si128(_mm_srli_epi32(Sv3, 8), maskv); - __m128i bv4 = _mm_and_si128(_mm_srli_epi32(Sv4, 8), maskv); - - // s[z] = S[z] & 0xff; - __m128i sv1 = _mm_and_si128(Sv1, _mm_set1_epi32(0xff)); - __m128i sv2 = _mm_and_si128(Sv2, _mm_set1_epi32(0xff)); - __m128i sv3 = _mm_and_si128(Sv3, _mm_set1_epi32(0xff)); - __m128i sv4 = _mm_and_si128(Sv4, _mm_set1_epi32(0xff)); - - // R[z] = f[z] * (R[z] >> TF_SHIFT) + b[z]; - Rv1 = _mm_add_epi32( - _mm_mullo_epi32( - _mm_srli_epi32(Rv1,TF_SHIFT), fv1), bv1); - Rv2 = _mm_add_epi32( - _mm_mullo_epi32( - _mm_srli_epi32(Rv2,TF_SHIFT), fv2), bv2); - Rv3 = _mm_add_epi32( - _mm_mullo_epi32( - _mm_srli_epi32(Rv3,TF_SHIFT), fv3), bv3); - Rv4 = _mm_add_epi32( - _mm_mullo_epi32( - _mm_srli_epi32(Rv4,TF_SHIFT), fv4), bv4); - - // Tricky one: out[i+z] = s[z]; - // ---d---c ---b---a sv1 - // ---h---g ---f---e sv2 - // packs_epi32 -h-g-f-e -d-c-b-a sv1(2) - // packs_epi16 ponmlkji hgfedcba sv1(2) / sv3(4) - sv1 = _mm_packus_epi32(sv1, sv2); - sv3 = _mm_packus_epi32(sv3, sv4); - sv1 = _mm_packus_epi16(sv1, sv3); - - // c = R[z] < RANS_BYTE_L; - // A little tricky as we only have signed comparisons. - // See https://stackoverflow.com/questions/32945410/sse2-intrinsics-comparing-unsigned-integers - -#define _mm_cmplt_epu32_imm(a,b) _mm_andnot_si128(_mm_cmpeq_epi32(_mm_max_epu32((a),_mm_set1_epi32(b)), (a)), _mm_set1_epi32(-1)); - -//#define _mm_cmplt_epu32_imm(a,b) _mm_cmpgt_epi32(_mm_set1_epi32((b)-0x80000000), _mm_xor_si128((a), _mm_set1_epi32(0x80000000))) - - __m128i renorm_mask1, renorm_mask2, renorm_mask3, renorm_mask4; - renorm_mask1 = _mm_cmplt_epu32_imm(Rv1, RANS_BYTE_L); - renorm_mask2 = _mm_cmplt_epu32_imm(Rv2, RANS_BYTE_L); - renorm_mask3 = _mm_cmplt_epu32_imm(Rv3, RANS_BYTE_L); - renorm_mask4 = _mm_cmplt_epu32_imm(Rv4, RANS_BYTE_L); - -//#define P(A,B,C,D) ((A)+((B)<<2) + ((C)<<4) + ((D)<<6)) -#define P(A,B,C,D) \ - { A+0,A+1,A+2,A+3, \ - B+0,B+1,B+2,B+3, \ - C+0,C+1,C+2,C+3, \ - D+0,D+1,D+2,D+3} -#ifdef _ -#undef _ -#endif -#define _ 0x80 - uint8_t pidx[16][16] = { - P(_,_,_,_), - P(0,_,_,_), - P(_,0,_,_), - P(0,4,_,_), - - P(_,_,0,_), - P(0,_,4,_), - P(_,0,4,_), - P(0,4,8,_), - - P(_,_,_,0), - P(0,_,_,4), - P(_,0,_,4), - P(0,4,_,8), - - P(_,_,0,4), - P(0,_,4,8), - P(_,0,4,8), - P(0,4,8,12), - }; -#undef _ - - // Protect against running off the end of in buffer. - // We copy it to a worst-case local buffer when near the end. - // 72 = 7*8(imask1..7) + 16; worse case for 8th _mm_loadu_si128 call. - // An extra 64 bytes is to avoid triggering this multiple times - // after we swap sp/cp_end over. - if ((uint8_t *)sp+72 > cp_end) { - memmove(overflow, sp, cp_end - (uint8_t *)sp); - sp = (uint16_t *)overflow; - cp_end = (uint8_t *)overflow + sizeof(overflow); - } - - // Shuffle the renorm values to correct lanes and incr sp pointer - __m128i Vv1 = _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - unsigned int imask1 = _mm_movemask_ps((__m128)renorm_mask1); - Vv1 = _mm_shuffle_epi8(Vv1, _mm_load_si128((__m128i*)pidx[imask1])); - sp += _mm_popcnt_u32(imask1); - - __m128i Vv2 = _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - unsigned int imask2 = _mm_movemask_ps((__m128)renorm_mask2); - sp += _mm_popcnt_u32(imask2); - Vv2 = _mm_shuffle_epi8(Vv2, _mm_load_si128((__m128i*)pidx[imask2])); - - __m128i Vv3 = _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - unsigned int imask3 = _mm_movemask_ps((__m128)renorm_mask3); - Vv3 = _mm_shuffle_epi8(Vv3, _mm_load_si128((__m128i*)pidx[imask3])); - sp += _mm_popcnt_u32(imask3); - - __m128i Vv4 = _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - unsigned int imask4 = _mm_movemask_ps((__m128)renorm_mask4); - sp += _mm_popcnt_u32(imask4); - Vv4 = _mm_shuffle_epi8(Vv4, _mm_load_si128((__m128i*)pidx[imask4])); - - __m128i Yv1 = _mm_slli_epi32(Rv1, 16); - __m128i Yv2 = _mm_slli_epi32(Rv2, 16); - __m128i Yv3 = _mm_slli_epi32(Rv3, 16); - __m128i Yv4 = _mm_slli_epi32(Rv4, 16); - - // y = (R[z] << 16) | V[z]; - Yv1 = _mm_or_si128(Yv1, Vv1); - Yv2 = _mm_or_si128(Yv2, Vv2); - Yv3 = _mm_or_si128(Yv3, Vv3); - Yv4 = _mm_or_si128(Yv4, Vv4); - - // R[z] = c ? Y[z] : R[z]; - Rv1 = _mm_blendv_epi8(Rv1, Yv1, renorm_mask1); - Rv2 = _mm_blendv_epi8(Rv2, Yv2, renorm_mask2); - Rv3 = _mm_blendv_epi8(Rv3, Yv3, renorm_mask3); - Rv4 = _mm_blendv_epi8(Rv4, Yv4, renorm_mask4); - - // ------------------------------------------------------------ - - // m[z] = R[z] & mask; - __m128i masked5 = _mm_and_si128(Rv5, maskv); - __m128i masked6 = _mm_and_si128(Rv6, maskv); - __m128i masked7 = _mm_and_si128(Rv7, maskv); - __m128i masked8 = _mm_and_si128(Rv8, maskv); - - // S[z] = s3[m[z]]; - __m128i Sv5 = _mm_i32gather_epi32x((int *)s3, masked5, sizeof(*s3)); - __m128i Sv6 = _mm_i32gather_epi32x((int *)s3, masked6, sizeof(*s3)); - __m128i Sv7 = _mm_i32gather_epi32x((int *)s3, masked7, sizeof(*s3)); - __m128i Sv8 = _mm_i32gather_epi32x((int *)s3, masked8, sizeof(*s3)); - - // f[z] = S[z]>>(TF_SHIFT+8); - __m128i fv5 = _mm_srli_epi32(Sv5, TF_SHIFT+8); - __m128i fv6 = _mm_srli_epi32(Sv6, TF_SHIFT+8); - __m128i fv7 = _mm_srli_epi32(Sv7, TF_SHIFT+8); - __m128i fv8 = _mm_srli_epi32(Sv8, TF_SHIFT+8); - - // b[z] = (S[z]>>8) & mask; - __m128i bv5 = _mm_and_si128(_mm_srli_epi32(Sv5, 8), maskv); - __m128i bv6 = _mm_and_si128(_mm_srli_epi32(Sv6, 8), maskv); - __m128i bv7 = _mm_and_si128(_mm_srli_epi32(Sv7, 8), maskv); - __m128i bv8 = _mm_and_si128(_mm_srli_epi32(Sv8, 8), maskv); - - // s[z] = S[z] & 0xff; - __m128i sv5 = _mm_and_si128(Sv5, _mm_set1_epi32(0xff)); - __m128i sv6 = _mm_and_si128(Sv6, _mm_set1_epi32(0xff)); - __m128i sv7 = _mm_and_si128(Sv7, _mm_set1_epi32(0xff)); - __m128i sv8 = _mm_and_si128(Sv8, _mm_set1_epi32(0xff)); - - // R[z] = f[z] * (R[z] >> TF_SHIFT) + b[z]; - Rv5 = _mm_add_epi32( - _mm_mullo_epi32( - _mm_srli_epi32(Rv5,TF_SHIFT), fv5), bv5); - Rv6 = _mm_add_epi32( - _mm_mullo_epi32( - _mm_srli_epi32(Rv6,TF_SHIFT), fv6), bv6); - Rv7 = _mm_add_epi32( - _mm_mullo_epi32( - _mm_srli_epi32(Rv7,TF_SHIFT), fv7), bv7); - Rv8 = _mm_add_epi32( - _mm_mullo_epi32( - _mm_srli_epi32(Rv8,TF_SHIFT), fv8), bv8); - - // Tricky one: out[i+z] = s[z]; - // ---d---c ---b---a sv1 - // ---h---g ---f---e sv2 - // packs_epi32 -h-g-f-e -d-c-b-a sv1(2) - // packs_epi16 ponmlkji hgfedcba sv1(2) / sv3(4) - sv5 = _mm_packus_epi32(sv5, sv6); - sv7 = _mm_packus_epi32(sv7, sv8); - sv5 = _mm_packus_epi16(sv5, sv7); - - // c = R[z] < RANS_BYTE_L; - __m128i renorm_mask5, renorm_mask6, renorm_mask7, renorm_mask8; - renorm_mask5 = _mm_cmplt_epu32_imm(Rv5, RANS_BYTE_L); - renorm_mask6 = _mm_cmplt_epu32_imm(Rv6, RANS_BYTE_L); - renorm_mask7 = _mm_cmplt_epu32_imm(Rv7, RANS_BYTE_L); - renorm_mask8 = _mm_cmplt_epu32_imm(Rv8, RANS_BYTE_L); - - // Shuffle the renorm values to correct lanes and incr sp pointer - __m128i Vv5 = _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - unsigned int imask5 = _mm_movemask_ps((__m128)renorm_mask5); - Vv5 = _mm_shuffle_epi8(Vv5, _mm_load_si128((__m128i*)pidx[imask5])); - sp += _mm_popcnt_u32(imask5); - - __m128i Vv6 = _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - unsigned int imask6 = _mm_movemask_ps((__m128)renorm_mask6); - sp += _mm_popcnt_u32(imask6); - Vv6 = _mm_shuffle_epi8(Vv6, _mm_load_si128((__m128i*)pidx[imask6])); - - __m128i Vv7 = _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - unsigned int imask7 = _mm_movemask_ps((__m128)renorm_mask7); - Vv7 = _mm_shuffle_epi8(Vv7, _mm_load_si128((__m128i*)pidx[imask7])); - sp += _mm_popcnt_u32(imask7); - - __m128i Vv8 = _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - unsigned int imask8 = _mm_movemask_ps((__m128)renorm_mask8); - sp += _mm_popcnt_u32(imask8); - Vv8 = _mm_shuffle_epi8(Vv8, _mm_load_si128((__m128i*)pidx[imask8])); - - __m128i Yv5 = _mm_slli_epi32(Rv5, 16); - __m128i Yv6 = _mm_slli_epi32(Rv6, 16); - __m128i Yv7 = _mm_slli_epi32(Rv7, 16); - __m128i Yv8 = _mm_slli_epi32(Rv8, 16); - - // y = (R[z] << 16) | V[z]; - Yv5 = _mm_or_si128(Yv5, Vv5); - Yv6 = _mm_or_si128(Yv6, Vv6); - Yv7 = _mm_or_si128(Yv7, Vv7); - Yv8 = _mm_or_si128(Yv8, Vv8); - - // R[z] = c ? Y[z] : R[z]; - Rv5 = _mm_blendv_epi8(Rv5, Yv5, renorm_mask5); - Rv6 = _mm_blendv_epi8(Rv6, Yv6, renorm_mask6); - Rv7 = _mm_blendv_epi8(Rv7, Yv7, renorm_mask7); - Rv8 = _mm_blendv_epi8(Rv8, Yv8, renorm_mask8); - - // Maybe just a store128 instead? - _mm_storeu_si128((__m128i *)&out[i+ 0], sv1); - _mm_storeu_si128((__m128i *)&out[i+16], sv5); -// *(uint64_t *)&out[i+ 0] = _mm_extract_epi64(sv1, 0); -// *(uint64_t *)&out[i+ 8] = _mm_extract_epi64(sv1, 1); -// *(uint64_t *)&out[i+16] = _mm_extract_epi64(sv5, 0); -// *(uint64_t *)&out[i+24] = _mm_extract_epi64(sv5, 1); - } - - STORE128(Rv, R); - - for (z = out_sz & (NX-1); z-- > 0; ) - out[out_end + z] = s3[R[z] & mask]; - - //fprintf(stderr, " 0 Decoded %d bytes\n", (int)(cp-in)); //c-size - - return out; - - err: - free(out_free); - return NULL; -} - -//#define MAGIC2 111 -#define MAGIC2 179 -//#define MAGIC2 0 - -/* - * A 32 x 32 matrix transpose and serialise from t[][] to out. - * Storing in the other orientation speeds up the decoder, and we - * can then flush to out in 1KB blocks. - */ -static inline void transpose_and_copy(uint8_t *out, int iN[32], - uint8_t t[32][32]) { - int z; -#ifdef UBSAN - // Simplified version to avoid undefined behaviour sanitiser warnings. - for (z = 0; z < NX; z++) { - int k; - for (k = 0; k < 32; k++) - out[iN[z]+k] = t[k][z]; - iN[z] += 32; - } -#else - // Unaligned access. We know we can get away with this as this - // code is only ever executed on x86 platforms which permit this. - for (z = 0; z < NX; z+=4) { - *(uint64_t *)&out[iN[z]] = - ((uint64_t)(t[0][z])<< 0) + - ((uint64_t)(t[1][z])<< 8) + - ((uint64_t)(t[2][z])<<16) + - ((uint64_t)(t[3][z])<<24) + - ((uint64_t)(t[4][z])<<32) + - ((uint64_t)(t[5][z])<<40) + - ((uint64_t)(t[6][z])<<48) + - ((uint64_t)(t[7][z])<<56); - *(uint64_t *)&out[iN[z+1]] = - ((uint64_t)(t[0][z+1])<< 0) + - ((uint64_t)(t[1][z+1])<< 8) + - ((uint64_t)(t[2][z+1])<<16) + - ((uint64_t)(t[3][z+1])<<24) + - ((uint64_t)(t[4][z+1])<<32) + - ((uint64_t)(t[5][z+1])<<40) + - ((uint64_t)(t[6][z+1])<<48) + - ((uint64_t)(t[7][z+1])<<56); - *(uint64_t *)&out[iN[z+2]] = - ((uint64_t)(t[0][z+2])<< 0) + - ((uint64_t)(t[1][z+2])<< 8) + - ((uint64_t)(t[2][z+2])<<16) + - ((uint64_t)(t[3][z+2])<<24) + - ((uint64_t)(t[4][z+2])<<32) + - ((uint64_t)(t[5][z+2])<<40) + - ((uint64_t)(t[6][z+2])<<48) + - ((uint64_t)(t[7][z+2])<<56); - *(uint64_t *)&out[iN[z+3]] = - ((uint64_t)(t[0][z+3])<< 0) + - ((uint64_t)(t[1][z+3])<< 8) + - ((uint64_t)(t[2][z+3])<<16) + - ((uint64_t)(t[3][z+3])<<24) + - ((uint64_t)(t[4][z+3])<<32) + - ((uint64_t)(t[5][z+3])<<40) + - ((uint64_t)(t[6][z+3])<<48) + - ((uint64_t)(t[7][z+3])<<56); - - *(uint64_t *)&out[iN[z]+8] = - ((uint64_t)(t[8+0][z])<< 0) + - ((uint64_t)(t[8+1][z])<< 8) + - ((uint64_t)(t[8+2][z])<<16) + - ((uint64_t)(t[8+3][z])<<24) + - ((uint64_t)(t[8+4][z])<<32) + - ((uint64_t)(t[8+5][z])<<40) + - ((uint64_t)(t[8+6][z])<<48) + - ((uint64_t)(t[8+7][z])<<56); - *(uint64_t *)&out[iN[z+1]+8] = - ((uint64_t)(t[8+0][z+1])<< 0) + - ((uint64_t)(t[8+1][z+1])<< 8) + - ((uint64_t)(t[8+2][z+1])<<16) + - ((uint64_t)(t[8+3][z+1])<<24) + - ((uint64_t)(t[8+4][z+1])<<32) + - ((uint64_t)(t[8+5][z+1])<<40) + - ((uint64_t)(t[8+6][z+1])<<48) + - ((uint64_t)(t[8+7][z+1])<<56); - *(uint64_t *)&out[iN[z+2]+8] = - ((uint64_t)(t[8+0][z+2])<< 0) + - ((uint64_t)(t[8+1][z+2])<< 8) + - ((uint64_t)(t[8+2][z+2])<<16) + - ((uint64_t)(t[8+3][z+2])<<24) + - ((uint64_t)(t[8+4][z+2])<<32) + - ((uint64_t)(t[8+5][z+2])<<40) + - ((uint64_t)(t[8+6][z+2])<<48) + - ((uint64_t)(t[8+7][z+2])<<56); - *(uint64_t *)&out[iN[z+3]+8] = - ((uint64_t)(t[8+0][z+3])<< 0) + - ((uint64_t)(t[8+1][z+3])<< 8) + - ((uint64_t)(t[8+2][z+3])<<16) + - ((uint64_t)(t[8+3][z+3])<<24) + - ((uint64_t)(t[8+4][z+3])<<32) + - ((uint64_t)(t[8+5][z+3])<<40) + - ((uint64_t)(t[8+6][z+3])<<48) + - ((uint64_t)(t[8+7][z+3])<<56); - - *(uint64_t *)&out[iN[z]+16] = - ((uint64_t)(t[16+0][z])<< 0) + - ((uint64_t)(t[16+1][z])<< 8) + - ((uint64_t)(t[16+2][z])<<16) + - ((uint64_t)(t[16+3][z])<<24) + - ((uint64_t)(t[16+4][z])<<32) + - ((uint64_t)(t[16+5][z])<<40) + - ((uint64_t)(t[16+6][z])<<48) + - ((uint64_t)(t[16+7][z])<<56); - *(uint64_t *)&out[iN[z+1]+16] = - ((uint64_t)(t[16+0][z+1])<< 0) + - ((uint64_t)(t[16+1][z+1])<< 8) + - ((uint64_t)(t[16+2][z+1])<<16) + - ((uint64_t)(t[16+3][z+1])<<24) + - ((uint64_t)(t[16+4][z+1])<<32) + - ((uint64_t)(t[16+5][z+1])<<40) + - ((uint64_t)(t[16+6][z+1])<<48) + - ((uint64_t)(t[16+7][z+1])<<56); - *(uint64_t *)&out[iN[z+2]+16] = - ((uint64_t)(t[16+0][z+2])<< 0) + - ((uint64_t)(t[16+1][z+2])<< 8) + - ((uint64_t)(t[16+2][z+2])<<16) + - ((uint64_t)(t[16+3][z+2])<<24) + - ((uint64_t)(t[16+4][z+2])<<32) + - ((uint64_t)(t[16+5][z+2])<<40) + - ((uint64_t)(t[16+6][z+2])<<48) + - ((uint64_t)(t[16+7][z+2])<<56); - *(uint64_t *)&out[iN[z+3]+16] = - ((uint64_t)(t[16+0][z+3])<< 0) + - ((uint64_t)(t[16+1][z+3])<< 8) + - ((uint64_t)(t[16+2][z+3])<<16) + - ((uint64_t)(t[16+3][z+3])<<24) + - ((uint64_t)(t[16+4][z+3])<<32) + - ((uint64_t)(t[16+5][z+3])<<40) + - ((uint64_t)(t[16+6][z+3])<<48) + - ((uint64_t)(t[16+7][z+3])<<56); - - *(uint64_t *)&out[iN[z]+24] = - ((uint64_t)(t[24+0][z])<< 0) + - ((uint64_t)(t[24+1][z])<< 8) + - ((uint64_t)(t[24+2][z])<<16) + - ((uint64_t)(t[24+3][z])<<24) + - ((uint64_t)(t[24+4][z])<<32) + - ((uint64_t)(t[24+5][z])<<40) + - ((uint64_t)(t[24+6][z])<<48) + - ((uint64_t)(t[24+7][z])<<56); - *(uint64_t *)&out[iN[z+1]+24] = - ((uint64_t)(t[24+0][z+1])<< 0) + - ((uint64_t)(t[24+1][z+1])<< 8) + - ((uint64_t)(t[24+2][z+1])<<16) + - ((uint64_t)(t[24+3][z+1])<<24) + - ((uint64_t)(t[24+4][z+1])<<32) + - ((uint64_t)(t[24+5][z+1])<<40) + - ((uint64_t)(t[24+6][z+1])<<48) + - ((uint64_t)(t[24+7][z+1])<<56); - *(uint64_t *)&out[iN[z+2]+24] = - ((uint64_t)(t[24+0][z+2])<< 0) + - ((uint64_t)(t[24+1][z+2])<< 8) + - ((uint64_t)(t[24+2][z+2])<<16) + - ((uint64_t)(t[24+3][z+2])<<24) + - ((uint64_t)(t[24+4][z+2])<<32) + - ((uint64_t)(t[24+5][z+2])<<40) + - ((uint64_t)(t[24+6][z+2])<<48) + - ((uint64_t)(t[24+7][z+2])<<56); - *(uint64_t *)&out[iN[z+3]+24] = - ((uint64_t)(t[24+0][z+3])<< 0) + - ((uint64_t)(t[24+1][z+3])<< 8) + - ((uint64_t)(t[24+2][z+3])<<16) + - ((uint64_t)(t[24+3][z+3])<<24) + - ((uint64_t)(t[24+4][z+3])<<32) + - ((uint64_t)(t[24+5][z+3])<<40) + - ((uint64_t)(t[24+6][z+3])<<48) + - ((uint64_t)(t[24+7][z+3])<<56); - - iN[z+0] += 32; - iN[z+1] += 32; - iN[z+2] += 32; - iN[z+3] += 32; - } -#endif -} - -unsigned char *rans_uncompress_O1_32x16_sse4(unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int out_sz) { - if (in_size < NX*4) // 4-states at least - return NULL; - - if (out_sz >= INT_MAX) - return NULL; // protect against some overflow cases - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (out_sz > 100000) - return NULL; -#endif - - /* Load in the static tables */ - unsigned char *cp = in, *cp_end = in+in_size, *out_free = NULL; - unsigned char *c_freq = NULL; - - uint32_t (*s3)[TOTFREQ_O1] = htscodecs_tls_alloc(256*TOTFREQ_O1*4); - if (!s3) - return NULL; - uint32_t (*s3F)[TOTFREQ_O1_FAST] = (uint32_t (*)[TOTFREQ_O1_FAST])s3; - - if (!out) - out_free = out = malloc(out_sz); - - if (!out) - goto err; - - //fprintf(stderr, "out_sz=%d\n", out_sz); - - // compressed header? If so uncompress it - unsigned char *tab_end = NULL; - unsigned char *c_freq_end = cp_end; - unsigned int shift = *cp >> 4; - if (*cp++ & 1) { - uint32_t u_freq_sz, c_freq_sz; - cp += var_get_u32(cp, cp_end, &u_freq_sz); - cp += var_get_u32(cp, cp_end, &c_freq_sz); - if (c_freq_sz > cp_end - cp) - goto err; - tab_end = cp + c_freq_sz; - if (!(c_freq = rans_uncompress_O0_4x16(cp, c_freq_sz, NULL,u_freq_sz))) - goto err; - cp = c_freq; - c_freq_end = c_freq + u_freq_sz; - } - - // Decode order-0 symbol list; avoids needing in order-1 tables - cp += decode_freq1(cp, c_freq_end, shift, s3, s3F, NULL, NULL); - - if (tab_end) - cp = tab_end; - free(c_freq); - c_freq = NULL; - - if (cp_end - cp < NX * 4) - goto err; - - RansState R[NX]; - uint8_t *ptr = cp, *ptr_end = in + in_size; - int z; - for (z = 0; z < NX; z++) { - RansDecInit(&R[z], &ptr); - if (R[z] < RANS_BYTE_L) - goto err; - } - - int isz4 = out_sz/NX; - int i4[NX], l[NX] = {0}; - for (z = 0; z < NX; z++) - i4[z] = z*isz4; - - // Around 15% faster to specialise for 10/12 than to have one - // loop with shift as a variable. - if (shift == TF_SHIFT_O1) { - // TF_SHIFT_O1 = 12 - uint16_t *sp = (uint16_t *)ptr; - const uint32_t mask = ((1u << TF_SHIFT_O1)-1); - __m128i maskv = _mm_set1_epi32(mask); // set mask in all lanes - uint8_t tbuf[32][32]; - int tidx = 0; - LOAD128(Rv, R); - LOAD128(Lv, l); - - isz4 -= 64; - for (; i4[0] < isz4 && (uint8_t *)sp+72 < ptr_end; ) { - //for (z = 0; z < NX; z++) - // m[z] = R[z] & mask; - __m128i masked1 = _mm_and_si128(Rv1, maskv); - __m128i masked2 = _mm_and_si128(Rv2, maskv); - __m128i masked3 = _mm_and_si128(Rv3, maskv); - __m128i masked4 = _mm_and_si128(Rv4, maskv); - - Lv1 = _mm_slli_epi32(Lv1, TF_SHIFT_O1); - Lv2 = _mm_slli_epi32(Lv2, TF_SHIFT_O1); - Lv3 = _mm_slli_epi32(Lv3, TF_SHIFT_O1); - Lv4 = _mm_slli_epi32(Lv4, TF_SHIFT_O1); - masked1 = _mm_add_epi32(masked1, Lv1); - masked2 = _mm_add_epi32(masked2, Lv2); - masked3 = _mm_add_epi32(masked3, Lv3); - masked4 = _mm_add_epi32(masked4, Lv4); - - // S[z] = s3[l[z]][m[z]]; - __m128i Sv1 = _mm_i32gather_epi32x((int *)s3, masked1, sizeof(*s3)); - __m128i Sv2 = _mm_i32gather_epi32x((int *)s3, masked2, sizeof(*s3)); - __m128i Sv3 = _mm_i32gather_epi32x((int *)s3, masked3, sizeof(*s3)); - __m128i Sv4 = _mm_i32gather_epi32x((int *)s3, masked4, sizeof(*s3)); - - // f[z] = S[z]>>(TF_SHIFT+8); - __m128i fv1 = _mm_srli_epi32(Sv1, TF_SHIFT_O1+8); - __m128i fv2 = _mm_srli_epi32(Sv2, TF_SHIFT_O1+8); - __m128i fv3 = _mm_srli_epi32(Sv3, TF_SHIFT_O1+8); - __m128i fv4 = _mm_srli_epi32(Sv4, TF_SHIFT_O1+8); - - // b[z] = (S[z]>>8) & mask; - __m128i bv1 = _mm_and_si128(_mm_srli_epi32(Sv1, 8), maskv); - __m128i bv2 = _mm_and_si128(_mm_srli_epi32(Sv2, 8), maskv); - __m128i bv3 = _mm_and_si128(_mm_srli_epi32(Sv3, 8), maskv); - __m128i bv4 = _mm_and_si128(_mm_srli_epi32(Sv4, 8), maskv); - - // s[z] = S[z] & 0xff; - __m128i sv1 = _mm_and_si128(Sv1, _mm_set1_epi32(0xff)); - __m128i sv2 = _mm_and_si128(Sv2, _mm_set1_epi32(0xff)); - __m128i sv3 = _mm_and_si128(Sv3, _mm_set1_epi32(0xff)); - __m128i sv4 = _mm_and_si128(Sv4, _mm_set1_epi32(0xff)); - - // A maximum frequency of 4096 doesn't fit in our s3 array. - // as it's 12 bit + 12 bit + 8 bit. It wraps around to zero. - // (We don't have this issue for TOTFREQ_O1_FAST.) - // - // Solution 1 is to change to spec to forbid freq of 4096. - // Easy hack is to add an extra symbol so it sums correctly. - // - // Solution 2 implemented here is to look for the wrap around - // and fix it. - __m128i max_freq = _mm_set1_epi32(TOTFREQ_O1); - __m128i zero = _mm_set1_epi32(0); - __m128i cmp1 = _mm_cmpeq_epi32(fv1, zero); - fv1 = _mm_blendv_epi8(fv1, max_freq, cmp1); - __m128i cmp2 = _mm_cmpeq_epi32(fv2, zero); - fv2 = _mm_blendv_epi8(fv2, max_freq, cmp2); - __m128i cmp3 = _mm_cmpeq_epi32(fv3, zero); - fv3 = _mm_blendv_epi8(fv3, max_freq, cmp3); - __m128i cmp4 = _mm_cmpeq_epi32(fv4, zero); - fv4 = _mm_blendv_epi8(fv4, max_freq, cmp4); - - // R[z] = f[z] * (R[z] >> TF_SHIFT_O1) + b[z]; - Rv1 = _mm_add_epi32( - _mm_mullo_epi32( - _mm_srli_epi32(Rv1,TF_SHIFT_O1), fv1), bv1); - Rv2 = _mm_add_epi32( - _mm_mullo_epi32( - _mm_srli_epi32(Rv2,TF_SHIFT_O1), fv2), bv2); - Rv3 = _mm_add_epi32( - _mm_mullo_epi32( - _mm_srli_epi32(Rv3,TF_SHIFT_O1), fv3), bv3); - Rv4 = _mm_add_epi32( - _mm_mullo_epi32( - _mm_srli_epi32(Rv4,TF_SHIFT_O1), fv4), bv4); - - Lv1 = sv1; - Lv2 = sv2; - Lv3 = sv3; - Lv4 = sv4; - - // Tricky one: out[i+z] = s[z]; - // ---d---c ---b---a sv1 - // ---h---g ---f---e sv2 - // packs_epi32 -h-g-f-e -d-c-b-a sv1(2) - // packs_epi16 ponmlkji hgfedcba sv1(2) / sv3(4) - sv1 = _mm_packus_epi32(sv1, sv2); - sv3 = _mm_packus_epi32(sv3, sv4); - sv1 = _mm_packus_epi16(sv1, sv3); - - // c = R[z] < RANS_BYTE_L; - // A little tricky as we only have signed comparisons. - // See https://stackoverflow.com/questions/32945410/sse2-intrinsics-comparing-unsigned-integers - -//#define _mm_cmplt_epu32_imm(a,b) _mm_andnot_si128(_mm_cmpeq_epi32(_mm_max_epu32((a),_mm_set1_epi32(b)), (a)), _mm_set1_epi32(-1)); - - //#define _mm_cmplt_epu32_imm(a,b) _mm_cmpgt_epi32(_mm_set1_epi32((b)-0x80000000), _mm_xor_si128((a), _mm_set1_epi32(0x80000000))) - - __m128i renorm_mask1, renorm_mask2, renorm_mask3, renorm_mask4; - renorm_mask1 = _mm_cmplt_epu32_imm(Rv1, RANS_BYTE_L); - renorm_mask2 = _mm_cmplt_epu32_imm(Rv2, RANS_BYTE_L); - renorm_mask3 = _mm_cmplt_epu32_imm(Rv3, RANS_BYTE_L); - renorm_mask4 = _mm_cmplt_epu32_imm(Rv4, RANS_BYTE_L); - - //#define P(A,B,C,D) ((A)+((B)<<2) + ((C)<<4) + ((D)<<6)) -#define P(A,B,C,D) \ - { A+0,A+1,A+2,A+3, \ - B+0,B+1,B+2,B+3, \ - C+0,C+1,C+2,C+3, \ - D+0,D+1,D+2,D+3} -#ifdef _ -#undef _ -#endif -#define _ 0x80 - uint8_t pidx[16][16] = { - P(_,_,_,_), - P(0,_,_,_), - P(_,0,_,_), - P(0,4,_,_), - - P(_,_,0,_), - P(0,_,4,_), - P(_,0,4,_), - P(0,4,8,_), - - P(_,_,_,0), - P(0,_,_,4), - P(_,0,_,4), - P(0,4,_,8), - - P(_,_,0,4), - P(0,_,4,8), - P(_,0,4,8), - P(0,4,8,12), - }; -#undef _ - - // Shuffle the renorm values to correct lanes and incr sp pointer - __m128i Vv1 = _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - unsigned int imask1 = _mm_movemask_ps((__m128)renorm_mask1); - Vv1 = _mm_shuffle_epi8(Vv1, _mm_load_si128((__m128i*)pidx[imask1])); - sp += _mm_popcnt_u32(imask1); - - __m128i Vv2 = _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - unsigned int imask2 = _mm_movemask_ps((__m128)renorm_mask2); - sp += _mm_popcnt_u32(imask2); - Vv2 = _mm_shuffle_epi8(Vv2, _mm_load_si128((__m128i*)pidx[imask2])); - - __m128i Vv3 = _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - unsigned int imask3 = _mm_movemask_ps((__m128)renorm_mask3); - Vv3 = _mm_shuffle_epi8(Vv3, _mm_load_si128((__m128i*)pidx[imask3])); - sp += _mm_popcnt_u32(imask3); - - __m128i Vv4 = _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - unsigned int imask4 = _mm_movemask_ps((__m128)renorm_mask4); - sp += _mm_popcnt_u32(imask4); - Vv4 = _mm_shuffle_epi8(Vv4, _mm_load_si128((__m128i*)pidx[imask4])); - - __m128i Yv1 = _mm_slli_epi32(Rv1, 16); - __m128i Yv2 = _mm_slli_epi32(Rv2, 16); - __m128i Yv3 = _mm_slli_epi32(Rv3, 16); - __m128i Yv4 = _mm_slli_epi32(Rv4, 16); - - // y = (R[z] << 16) | V[z]; - Yv1 = _mm_or_si128(Yv1, Vv1); - Yv2 = _mm_or_si128(Yv2, Vv2); - Yv3 = _mm_or_si128(Yv3, Vv3); - Yv4 = _mm_or_si128(Yv4, Vv4); - - // R[z] = c ? Y[z] : R[z]; - Rv1 = _mm_blendv_epi8(Rv1, Yv1, renorm_mask1); - Rv2 = _mm_blendv_epi8(Rv2, Yv2, renorm_mask2); - Rv3 = _mm_blendv_epi8(Rv3, Yv3, renorm_mask3); - Rv4 = _mm_blendv_epi8(Rv4, Yv4, renorm_mask4); - - // ------------------------------------------------------------ - - // m[z] = R[z] & mask; - __m128i masked5 = _mm_and_si128(Rv5, maskv); - __m128i masked6 = _mm_and_si128(Rv6, maskv); - __m128i masked7 = _mm_and_si128(Rv7, maskv); - __m128i masked8 = _mm_and_si128(Rv8, maskv); - - - Lv5 = _mm_slli_epi32(Lv5, TF_SHIFT_O1); - Lv6 = _mm_slli_epi32(Lv6, TF_SHIFT_O1); - Lv7 = _mm_slli_epi32(Lv7, TF_SHIFT_O1); - Lv8 = _mm_slli_epi32(Lv8, TF_SHIFT_O1); - masked5 = _mm_add_epi32(masked5, Lv5); - masked6 = _mm_add_epi32(masked6, Lv6); - masked7 = _mm_add_epi32(masked7, Lv7); - masked8 = _mm_add_epi32(masked8, Lv8); - - // S[z] = s3[m[z]]; - __m128i Sv5 = _mm_i32gather_epi32x((int *)s3, masked5, sizeof(*s3)); - __m128i Sv6 = _mm_i32gather_epi32x((int *)s3, masked6, sizeof(*s3)); - __m128i Sv7 = _mm_i32gather_epi32x((int *)s3, masked7, sizeof(*s3)); - __m128i Sv8 = _mm_i32gather_epi32x((int *)s3, masked8, sizeof(*s3)); - - // f[z] = S[z]>>(TF_SHIFT_O1+8); - __m128i fv5 = _mm_srli_epi32(Sv5, TF_SHIFT_O1+8); - __m128i fv6 = _mm_srli_epi32(Sv6, TF_SHIFT_O1+8); - __m128i fv7 = _mm_srli_epi32(Sv7, TF_SHIFT_O1+8); - __m128i fv8 = _mm_srli_epi32(Sv8, TF_SHIFT_O1+8); - - // b[z] = (S[z]>>8) & mask; - __m128i bv5 = _mm_and_si128(_mm_srli_epi32(Sv5, 8), maskv); - __m128i bv6 = _mm_and_si128(_mm_srli_epi32(Sv6, 8), maskv); - __m128i bv7 = _mm_and_si128(_mm_srli_epi32(Sv7, 8), maskv); - __m128i bv8 = _mm_and_si128(_mm_srli_epi32(Sv8, 8), maskv); - - // s[z] = S[z] & 0xff; - __m128i sv5 = _mm_and_si128(Sv5, _mm_set1_epi32(0xff)); - __m128i sv6 = _mm_and_si128(Sv6, _mm_set1_epi32(0xff)); - __m128i sv7 = _mm_and_si128(Sv7, _mm_set1_epi32(0xff)); - __m128i sv8 = _mm_and_si128(Sv8, _mm_set1_epi32(0xff)); - - // A maximum frequency of 4096 doesn't fit in our s3 array. Fix - __m128i cmp5 = _mm_cmpeq_epi32(fv5, zero); - fv5 = _mm_blendv_epi8(fv5, max_freq, cmp5); - __m128i cmp6 = _mm_cmpeq_epi32(fv6, zero); - fv6 = _mm_blendv_epi8(fv6, max_freq, cmp6); - __m128i cmp7 = _mm_cmpeq_epi32(fv7, zero); - fv7 = _mm_blendv_epi8(fv7, max_freq, cmp7); - __m128i cmp8 = _mm_cmpeq_epi32(fv8, zero); - fv8 = _mm_blendv_epi8(fv8, max_freq, cmp8); - - // R[z] = f[z] * (R[z] >> TF_SHIFT_O1) + b[z]; - Rv5 = _mm_add_epi32( - _mm_mullo_epi32( - _mm_srli_epi32(Rv5,TF_SHIFT_O1), fv5), bv5); - Rv6 = _mm_add_epi32( - _mm_mullo_epi32( - _mm_srli_epi32(Rv6,TF_SHIFT_O1), fv6), bv6); - Rv7 = _mm_add_epi32( - _mm_mullo_epi32( - _mm_srli_epi32(Rv7,TF_SHIFT_O1), fv7), bv7); - Rv8 = _mm_add_epi32( - _mm_mullo_epi32( - _mm_srli_epi32(Rv8,TF_SHIFT_O1), fv8), bv8); - - Lv5 = sv5; - Lv6 = sv6; - Lv7 = sv7; - Lv8 = sv8; - - // Tricky one: out[i+z] = s[z]; - // ---d---c ---b---a sv1 - // ---h---g ---f---e sv2 - // packs_epi32 -h-g-f-e -d-c-b-a sv1(2) - // packs_epi16 ponmlkji hgfedcba sv1(2) / sv3(4) - sv5 = _mm_packus_epi32(sv5, sv6); - sv7 = _mm_packus_epi32(sv7, sv8); - sv5 = _mm_packus_epi16(sv5, sv7); - - // c = R[z] < RANS_BYTE_L; - __m128i renorm_mask5, renorm_mask6, renorm_mask7, renorm_mask8; - renorm_mask5 = _mm_cmplt_epu32_imm(Rv5, RANS_BYTE_L); - renorm_mask6 = _mm_cmplt_epu32_imm(Rv6, RANS_BYTE_L); - renorm_mask7 = _mm_cmplt_epu32_imm(Rv7, RANS_BYTE_L); - renorm_mask8 = _mm_cmplt_epu32_imm(Rv8, RANS_BYTE_L); - - // Shuffle the renorm values to correct lanes and incr sp pointer - __m128i Vv5 = _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - unsigned int imask5 = _mm_movemask_ps((__m128)renorm_mask5); - Vv5 = _mm_shuffle_epi8(Vv5, _mm_load_si128((__m128i*)pidx[imask5])); - sp += _mm_popcnt_u32(imask5); - - __m128i Vv6 = _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - unsigned int imask6 = _mm_movemask_ps((__m128)renorm_mask6); - sp += _mm_popcnt_u32(imask6); - Vv6 = _mm_shuffle_epi8(Vv6, _mm_load_si128((__m128i*)pidx[imask6])); - - __m128i Vv7 = _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - unsigned int imask7 = _mm_movemask_ps((__m128)renorm_mask7); - Vv7 = _mm_shuffle_epi8(Vv7, _mm_load_si128((__m128i*)pidx[imask7])); - sp += _mm_popcnt_u32(imask7); - - __m128i Vv8 = _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - unsigned int imask8 = _mm_movemask_ps((__m128)renorm_mask8); - sp += _mm_popcnt_u32(imask8); - Vv8 = _mm_shuffle_epi8(Vv8, _mm_load_si128((__m128i*)pidx[imask8])); - - __m128i Yv5 = _mm_slli_epi32(Rv5, 16); - __m128i Yv6 = _mm_slli_epi32(Rv6, 16); - __m128i Yv7 = _mm_slli_epi32(Rv7, 16); - __m128i Yv8 = _mm_slli_epi32(Rv8, 16); - - // y = (R[z] << 16) | V[z]; - Yv5 = _mm_or_si128(Yv5, Vv5); - Yv6 = _mm_or_si128(Yv6, Vv6); - Yv7 = _mm_or_si128(Yv7, Vv7); - Yv8 = _mm_or_si128(Yv8, Vv8); - - // R[z] = c ? Y[z] : R[z]; - Rv5 = _mm_blendv_epi8(Rv5, Yv5, renorm_mask5); - Rv6 = _mm_blendv_epi8(Rv6, Yv6, renorm_mask6); - Rv7 = _mm_blendv_epi8(Rv7, Yv7, renorm_mask7); - Rv8 = _mm_blendv_epi8(Rv8, Yv8, renorm_mask8); - - // Maybe just a store128 instead? - _mm_store_si128((__m128i *)&tbuf[tidx][ 0], sv1); - _mm_store_si128((__m128i *)&tbuf[tidx][16], sv5); - // *(uint64_t *)&out[i+ 0] = _mm_extract_epi64(sv1, 0); - // *(uint64_t *)&out[i+ 8] = _mm_extract_epi64(sv1, 1); - // *(uint64_t *)&out[i+16] = _mm_extract_epi64(sv5, 0); - // *(uint64_t *)&out[i+24] = _mm_extract_epi64(sv5, 1); - - // WRONG - need to reorder these periodically. - - i4[0]++; - if (++tidx == 32) { - i4[0]-=32; - transpose_and_copy(out, i4, tbuf); - tidx = 0; - } - - } - isz4 += 64; - - STORE128(Rv, R); - STORE128(Lv, l); - ptr = (uint8_t *)sp; - - i4[0]-=tidx; - int T; - for (z = 0; z < NX; z++) - for (T = 0; T < tidx; T++) - out[i4[z]++] = tbuf[T][z]; - - // Scalar version for close to the end of in[] array so we don't - // do SIMD loads beyond the end of the buffer - for (; i4[0] < isz4;) { - for (z = 0; z < NX; z++) { - uint32_t m = R[z] & ((1u<>(TF_SHIFT_O1+8); - R[z] = (F?F:4096) * (R[z]>>TF_SHIFT_O1) + - ((S>>8) & ((1u<>(TF_SHIFT_O1+8)) * (R[z]>>TF_SHIFT_O1) + - ((S>>8) & ((1u<>(TF_SHIFT+8); - __m128i fv1 = _mm_srli_epi32(Sv1, TF_SHIFT_O1_FAST+8); - __m128i fv2 = _mm_srli_epi32(Sv2, TF_SHIFT_O1_FAST+8); - __m128i fv3 = _mm_srli_epi32(Sv3, TF_SHIFT_O1_FAST+8); - __m128i fv4 = _mm_srli_epi32(Sv4, TF_SHIFT_O1_FAST+8); - - // b[z] = (S[z]>>8) & mask; - __m128i bv1 = _mm_and_si128(_mm_srli_epi32(Sv1, 8), maskv); - __m128i bv2 = _mm_and_si128(_mm_srli_epi32(Sv2, 8), maskv); - __m128i bv3 = _mm_and_si128(_mm_srli_epi32(Sv3, 8), maskv); - __m128i bv4 = _mm_and_si128(_mm_srli_epi32(Sv4, 8), maskv); - - // s[z] = S[z] & 0xff; - __m128i sv1 = _mm_and_si128(Sv1, _mm_set1_epi32(0xff)); - __m128i sv2 = _mm_and_si128(Sv2, _mm_set1_epi32(0xff)); - __m128i sv3 = _mm_and_si128(Sv3, _mm_set1_epi32(0xff)); - __m128i sv4 = _mm_and_si128(Sv4, _mm_set1_epi32(0xff)); - - // R[z] = f[z] * (R[z] >> TF_SHIFT_O1_FAST) + b[z]; - Rv1 = _mm_add_epi32( - _mm_mullo_epi32( - _mm_srli_epi32(Rv1,TF_SHIFT_O1_FAST), fv1), bv1); - Rv2 = _mm_add_epi32( - _mm_mullo_epi32( - _mm_srli_epi32(Rv2,TF_SHIFT_O1_FAST), fv2), bv2); - Rv3 = _mm_add_epi32( - _mm_mullo_epi32( - _mm_srli_epi32(Rv3,TF_SHIFT_O1_FAST), fv3), bv3); - Rv4 = _mm_add_epi32( - _mm_mullo_epi32( - _mm_srli_epi32(Rv4,TF_SHIFT_O1_FAST), fv4), bv4); - - Lv1 = sv1; - Lv2 = sv2; - Lv3 = sv3; - Lv4 = sv4; - - // Tricky one: out[i+z] = s[z]; - // ---d---c ---b---a sv1 - // ---h---g ---f---e sv2 - // packs_epi32 -h-g-f-e -d-c-b-a sv1(2) - // packs_epi16 ponmlkji hgfedcba sv1(2) / sv3(4) - sv1 = _mm_packus_epi32(sv1, sv2); - sv3 = _mm_packus_epi32(sv3, sv4); - sv1 = _mm_packus_epi16(sv1, sv3); - - // c = R[z] < RANS_BYTE_L; - // A little tricky as we only have signed comparisons. - // See https://stackoverflow.com/questions/32945410/sse2-intrinsics-comparing-unsigned-integers - -//#define _mm_cmplt_epu32_imm(a,b) _mm_andnot_si128(_mm_cmpeq_epi32(_mm_max_epu32((a),_mm_set1_epi32(b)), (a)), _mm_set1_epi32(-1)); - - //#define _mm_cmplt_epu32_imm(a,b) _mm_cmpgt_epi32(_mm_set1_epi32((b)-0x80000000), _mm_xor_si128((a), _mm_set1_epi32(0x80000000))) - - __m128i renorm_mask1, renorm_mask2, renorm_mask3, renorm_mask4; - renorm_mask1 = _mm_cmplt_epu32_imm(Rv1, RANS_BYTE_L); - renorm_mask2 = _mm_cmplt_epu32_imm(Rv2, RANS_BYTE_L); - renorm_mask3 = _mm_cmplt_epu32_imm(Rv3, RANS_BYTE_L); - renorm_mask4 = _mm_cmplt_epu32_imm(Rv4, RANS_BYTE_L); - - //#define P(A,B,C,D) ((A)+((B)<<2) + ((C)<<4) + ((D)<<6)) -#define P(A,B,C,D) \ - { A+0,A+1,A+2,A+3, \ - B+0,B+1,B+2,B+3, \ - C+0,C+1,C+2,C+3, \ - D+0,D+1,D+2,D+3} -#ifdef _ -#undef _ -#endif -#define _ 0x80 - uint8_t pidx[16][16] = { - P(_,_,_,_), - P(0,_,_,_), - P(_,0,_,_), - P(0,4,_,_), - - P(_,_,0,_), - P(0,_,4,_), - P(_,0,4,_), - P(0,4,8,_), - - P(_,_,_,0), - P(0,_,_,4), - P(_,0,_,4), - P(0,4,_,8), - - P(_,_,0,4), - P(0,_,4,8), - P(_,0,4,8), - P(0,4,8,12), - }; -#undef _ - - // Shuffle the renorm values to correct lanes and incr sp pointer - __m128i Vv1 = _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - unsigned int imask1 = _mm_movemask_ps((__m128)renorm_mask1); - Vv1 = _mm_shuffle_epi8(Vv1, _mm_load_si128((__m128i*)pidx[imask1])); - sp += _mm_popcnt_u32(imask1); - - __m128i Vv2 = _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - unsigned int imask2 = _mm_movemask_ps((__m128)renorm_mask2); - sp += _mm_popcnt_u32(imask2); - Vv2 = _mm_shuffle_epi8(Vv2, _mm_load_si128((__m128i*)pidx[imask2])); - - __m128i Vv3 = _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - unsigned int imask3 = _mm_movemask_ps((__m128)renorm_mask3); - Vv3 = _mm_shuffle_epi8(Vv3, _mm_load_si128((__m128i*)pidx[imask3])); - sp += _mm_popcnt_u32(imask3); - - __m128i Vv4 = _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - unsigned int imask4 = _mm_movemask_ps((__m128)renorm_mask4); - sp += _mm_popcnt_u32(imask4); - Vv4 = _mm_shuffle_epi8(Vv4, _mm_load_si128((__m128i*)pidx[imask4])); - - __m128i Yv1 = _mm_slli_epi32(Rv1, 16); - __m128i Yv2 = _mm_slli_epi32(Rv2, 16); - __m128i Yv3 = _mm_slli_epi32(Rv3, 16); - __m128i Yv4 = _mm_slli_epi32(Rv4, 16); - - // y = (R[z] << 16) | V[z]; - Yv1 = _mm_or_si128(Yv1, Vv1); - Yv2 = _mm_or_si128(Yv2, Vv2); - Yv3 = _mm_or_si128(Yv3, Vv3); - Yv4 = _mm_or_si128(Yv4, Vv4); - - // R[z] = c ? Y[z] : R[z]; - Rv1 = _mm_blendv_epi8(Rv1, Yv1, renorm_mask1); - Rv2 = _mm_blendv_epi8(Rv2, Yv2, renorm_mask2); - Rv3 = _mm_blendv_epi8(Rv3, Yv3, renorm_mask3); - Rv4 = _mm_blendv_epi8(Rv4, Yv4, renorm_mask4); - - // ------------------------------------------------------------ - - // m[z] = R[z] & mask; - __m128i masked5 = _mm_and_si128(Rv5, maskv); - __m128i masked6 = _mm_and_si128(Rv6, maskv); - __m128i masked7 = _mm_and_si128(Rv7, maskv); - __m128i masked8 = _mm_and_si128(Rv8, maskv); - - - Lv5 = _mm_slli_epi32(Lv5, TF_SHIFT_O1_FAST); - Lv6 = _mm_slli_epi32(Lv6, TF_SHIFT_O1_FAST); - Lv7 = _mm_slli_epi32(Lv7, TF_SHIFT_O1_FAST); - Lv8 = _mm_slli_epi32(Lv8, TF_SHIFT_O1_FAST); - masked5 = _mm_add_epi32(masked5, Lv5); - masked6 = _mm_add_epi32(masked6, Lv6); - masked7 = _mm_add_epi32(masked7, Lv7); - masked8 = _mm_add_epi32(masked8, Lv8); - - // S[z] = s3[m[z]]; - __m128i Sv5 = _mm_i32gather_epi32x((int *)s3F, masked5, sizeof(*s3)); - __m128i Sv6 = _mm_i32gather_epi32x((int *)s3F, masked6, sizeof(*s3)); - __m128i Sv7 = _mm_i32gather_epi32x((int *)s3F, masked7, sizeof(*s3)); - __m128i Sv8 = _mm_i32gather_epi32x((int *)s3F, masked8, sizeof(*s3)); - - // f[z] = S[z]>>(TF_SHIFT_O1_FAST+8); - __m128i fv5 = _mm_srli_epi32(Sv5, TF_SHIFT_O1_FAST+8); - __m128i fv6 = _mm_srli_epi32(Sv6, TF_SHIFT_O1_FAST+8); - __m128i fv7 = _mm_srli_epi32(Sv7, TF_SHIFT_O1_FAST+8); - __m128i fv8 = _mm_srli_epi32(Sv8, TF_SHIFT_O1_FAST+8); - - // b[z] = (S[z]>>8) & mask; - __m128i bv5 = _mm_and_si128(_mm_srli_epi32(Sv5, 8), maskv); - __m128i bv6 = _mm_and_si128(_mm_srli_epi32(Sv6, 8), maskv); - __m128i bv7 = _mm_and_si128(_mm_srli_epi32(Sv7, 8), maskv); - __m128i bv8 = _mm_and_si128(_mm_srli_epi32(Sv8, 8), maskv); - - // s[z] = S[z] & 0xff; - __m128i sv5 = _mm_and_si128(Sv5, _mm_set1_epi32(0xff)); - __m128i sv6 = _mm_and_si128(Sv6, _mm_set1_epi32(0xff)); - __m128i sv7 = _mm_and_si128(Sv7, _mm_set1_epi32(0xff)); - __m128i sv8 = _mm_and_si128(Sv8, _mm_set1_epi32(0xff)); - - // R[z] = f[z] * (R[z] >> TF_SHIFT_O1_FAST) + b[z]; - Rv5 = _mm_add_epi32( - _mm_mullo_epi32( - _mm_srli_epi32(Rv5,TF_SHIFT_O1_FAST), fv5), bv5); - Rv6 = _mm_add_epi32( - _mm_mullo_epi32( - _mm_srli_epi32(Rv6,TF_SHIFT_O1_FAST), fv6), bv6); - Rv7 = _mm_add_epi32( - _mm_mullo_epi32( - _mm_srli_epi32(Rv7,TF_SHIFT_O1_FAST), fv7), bv7); - Rv8 = _mm_add_epi32( - _mm_mullo_epi32( - _mm_srli_epi32(Rv8,TF_SHIFT_O1_FAST), fv8), bv8); - - Lv5 = sv5; - Lv6 = sv6; - Lv7 = sv7; - Lv8 = sv8; - - // Tricky one: out[i+z] = s[z]; - // ---d---c ---b---a sv1 - // ---h---g ---f---e sv2 - // packs_epi32 -h-g-f-e -d-c-b-a sv1(2) - // packs_epi16 ponmlkji hgfedcba sv1(2) / sv3(4) - sv5 = _mm_packus_epi32(sv5, sv6); - sv7 = _mm_packus_epi32(sv7, sv8); - sv5 = _mm_packus_epi16(sv5, sv7); - - // c = R[z] < RANS_BYTE_L; - __m128i renorm_mask5, renorm_mask6, renorm_mask7, renorm_mask8; - renorm_mask5 = _mm_cmplt_epu32_imm(Rv5, RANS_BYTE_L); - renorm_mask6 = _mm_cmplt_epu32_imm(Rv6, RANS_BYTE_L); - renorm_mask7 = _mm_cmplt_epu32_imm(Rv7, RANS_BYTE_L); - renorm_mask8 = _mm_cmplt_epu32_imm(Rv8, RANS_BYTE_L); - - // Shuffle the renorm values to correct lanes and incr sp pointer - __m128i Vv5 = _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - unsigned int imask5 = _mm_movemask_ps((__m128)renorm_mask5); - Vv5 = _mm_shuffle_epi8(Vv5, _mm_load_si128((__m128i*)pidx[imask5])); - sp += _mm_popcnt_u32(imask5); - - __m128i Vv6 = _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - unsigned int imask6 = _mm_movemask_ps((__m128)renorm_mask6); - sp += _mm_popcnt_u32(imask6); - Vv6 = _mm_shuffle_epi8(Vv6, _mm_load_si128((__m128i*)pidx[imask6])); - - __m128i Vv7 = _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - unsigned int imask7 = _mm_movemask_ps((__m128)renorm_mask7); - Vv7 = _mm_shuffle_epi8(Vv7, _mm_load_si128((__m128i*)pidx[imask7])); - sp += _mm_popcnt_u32(imask7); - - __m128i Vv8 = _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i *)sp)); - unsigned int imask8 = _mm_movemask_ps((__m128)renorm_mask8); - sp += _mm_popcnt_u32(imask8); - Vv8 = _mm_shuffle_epi8(Vv8, _mm_load_si128((__m128i*)pidx[imask8])); - - __m128i Yv5 = _mm_slli_epi32(Rv5, 16); - __m128i Yv6 = _mm_slli_epi32(Rv6, 16); - __m128i Yv7 = _mm_slli_epi32(Rv7, 16); - __m128i Yv8 = _mm_slli_epi32(Rv8, 16); - - // y = (R[z] << 16) | V[z]; - Yv5 = _mm_or_si128(Yv5, Vv5); - Yv6 = _mm_or_si128(Yv6, Vv6); - Yv7 = _mm_or_si128(Yv7, Vv7); - Yv8 = _mm_or_si128(Yv8, Vv8); - - // R[z] = c ? Y[z] : R[z]; - Rv5 = _mm_blendv_epi8(Rv5, Yv5, renorm_mask5); - Rv6 = _mm_blendv_epi8(Rv6, Yv6, renorm_mask6); - Rv7 = _mm_blendv_epi8(Rv7, Yv7, renorm_mask7); - Rv8 = _mm_blendv_epi8(Rv8, Yv8, renorm_mask8); - - // Maybe just a store128 instead? - _mm_store_si128((__m128i *)&tbuf[tidx][ 0], sv1); - _mm_store_si128((__m128i *)&tbuf[tidx][16], sv5); - // *(uint64_t *)&out[i+ 0] = _mm_extract_epi64(sv1, 0); - // *(uint64_t *)&out[i+ 8] = _mm_extract_epi64(sv1, 1); - // *(uint64_t *)&out[i+16] = _mm_extract_epi64(sv5, 0); - // *(uint64_t *)&out[i+24] = _mm_extract_epi64(sv5, 1); - - // WRONG - need to reorder these periodically. - - i4[0]++; - if (++tidx == 32) { - i4[0]-=32; - transpose_and_copy(out, i4, tbuf); - tidx = 0; - } - } - isz4 += 64; - - STORE128(Rv, R); - STORE128(Lv, l); - ptr = (uint8_t *)sp; - - i4[0]-=tidx; - int T; - for (z = 0; z < NX; z++) - for (T = 0; T < tidx; T++) - out[i4[z]++] = tbuf[T][z]; - - // Scalar version for close to the end of in[] array so we don't - // do SIMD loads beyond the end of the buffer - for (; i4[0] < isz4;) { - for (z = 0; z < NX; z++) { - uint32_t m = R[z] & ((1u<>(TF_SHIFT_O1_FAST+8)) * (R[z]>>TF_SHIFT_O1_FAST) + - ((S>>8) & ((1u<>(TF_SHIFT_O1_FAST+8)) * (R[z]>>TF_SHIFT_O1_FAST) + - ((S>>8) & ((1u< -#include -#include -#include -#include -#include -#include -#include -#include - -#ifndef NO_THREADS -#include -#endif - -#include "rANS_word.h" -#include "rANS_static4x16.h" -#include "rANS_static16_int.h" -#include "pack.h" -#include "rle.h" -#include "utils.h" - -#define TF_SHIFT 12 -#define TOTFREQ (1<>8) & 0xff; - if (!N) N=4; - - order &= 0xff; - unsigned int sz = (order == 0 - ? 1.05*size + 257*3 + 4 - : 1.05*size + 257*257*3 + 4 + 257*3+4) + - ((order & RANS_ORDER_PACK) ? 1 : 0) + - ((order & RANS_ORDER_RLE) ? 1 + 257*3+4: 0) + 20 + - ((order & RANS_ORDER_X32) ? (32-4)*4 : 0) + - ((order & RANS_ORDER_STRIPE) ? 7 + 5*N: 0); - return sz + (sz&1) + 2; // make this even so buffers are word aligned -} - -// Compresses in_size bytes from 'in' to *out_size bytes in 'out'. -// -// NB: The output buffer does not hold the original size, so it is up to -// the caller to store this. -unsigned char *rans_compress_O0_4x16(unsigned char *in, unsigned int in_size, - unsigned char *out, unsigned int *out_size) { - unsigned char *cp, *out_end; - RansEncSymbol syms[256]; - RansState rans0; - RansState rans2; - RansState rans1; - RansState rans3; - uint8_t* ptr; - uint32_t F[256+MAGIC] = {0}; - int i, j, tab_size = 0, rle, x; - // -20 for order/size/meta - uint32_t bound = rans_compress_bound_4x16(in_size,0)-20; - - if (!out) { - *out_size = bound; - out = malloc(*out_size); - } - if (!out || bound > *out_size) - return NULL; - - // If "out" isn't word aligned, tweak out_end/ptr to ensure it is. - // We already added more round in bound to allow for this. - if (((size_t)out)&1) - bound--; - ptr = out_end = out + bound; - - if (in_size == 0) - goto empty; - - // Compute statistics - if (hist8(in, in_size, F) < 0) - return NULL; - - // Normalise so frequences sum to power of 2 - uint32_t fsum = in_size; - uint32_t max_val = round2(fsum); - if (max_val > TOTFREQ) - max_val = TOTFREQ; - - if (normalise_freq(F, fsum, max_val) < 0) - return NULL; - fsum=max_val; - - cp = out; - cp += encode_freq(cp, F); - tab_size = cp-out; - //write(2, out+4, cp-(out+4)); - - if (normalise_freq(F, fsum, TOTFREQ) < 0) - return NULL; - - // Encode statistics. - for (x = rle = j = 0; j < 256; j++) { - if (F[j]) { - RansEncSymbolInit(&syms[j], x, F[j], TF_SHIFT); - x += F[j]; - } - } - - RansEncInit(&rans0); - RansEncInit(&rans1); - RansEncInit(&rans2); - RansEncInit(&rans3); - - switch (i=(in_size&3)) { - case 3: RansEncPutSymbol(&rans2, &ptr, &syms[in[in_size-(i-2)]]); - case 2: RansEncPutSymbol(&rans1, &ptr, &syms[in[in_size-(i-1)]]); - case 1: RansEncPutSymbol(&rans0, &ptr, &syms[in[in_size-(i-0)]]); - case 0: - break; - } - for (i=(in_size &~3); i>0; i-=4) { - RansEncSymbol *s3 = &syms[in[i-1]]; - RansEncSymbol *s2 = &syms[in[i-2]]; - RansEncSymbol *s1 = &syms[in[i-3]]; - RansEncSymbol *s0 = &syms[in[i-4]]; - -#if 1 - RansEncPutSymbol(&rans3, &ptr, s3); - RansEncPutSymbol(&rans2, &ptr, s2); - RansEncPutSymbol(&rans1, &ptr, s1); - RansEncPutSymbol(&rans0, &ptr, s0); -#else - // Slightly beter on gcc, much better on clang - uint16_t *ptr16 = (uint16_t *)ptr; - - if (rans3 >= s3->x_max) *--ptr16 = (uint16_t)rans3, rans3 >>= 16; - if (rans2 >= s2->x_max) *--ptr16 = (uint16_t)rans2, rans2 >>= 16; - uint32_t q3 = (uint32_t) (((uint64_t)rans3 * s3->rcp_freq) >> s3->rcp_shift); - uint32_t q2 = (uint32_t) (((uint64_t)rans2 * s2->rcp_freq) >> s2->rcp_shift); - rans3 += s3->bias + q3 * s3->cmpl_freq; - rans2 += s2->bias + q2 * s2->cmpl_freq; - - if (rans1 >= s1->x_max) *--ptr16 = (uint16_t)rans1, rans1 >>= 16; - if (rans0 >= s0->x_max) *--ptr16 = (uint16_t)rans0, rans0 >>= 16; - uint32_t q1 = (uint32_t) (((uint64_t)rans1 * s1->rcp_freq) >> s1->rcp_shift); - uint32_t q0 = (uint32_t) (((uint64_t)rans0 * s0->rcp_freq) >> s0->rcp_shift); - rans1 += s1->bias + q1 * s1->cmpl_freq; - rans0 += s0->bias + q0 * s0->cmpl_freq; - - ptr = (uint8_t *)ptr16; -#endif - } - - RansEncFlush(&rans3, &ptr); - RansEncFlush(&rans2, &ptr); - RansEncFlush(&rans1, &ptr); - RansEncFlush(&rans0, &ptr); - - empty: - // Finalise block size and return it - *out_size = (out_end - ptr) + tab_size; - - memmove(out + tab_size, ptr, out_end-ptr); - - return out; -} - -unsigned char *rans_uncompress_O0_4x16(unsigned char *in, unsigned int in_size, - unsigned char *out, unsigned int out_sz) { - if (in_size < 16) // 4-states at least - return NULL; - - if (out_sz >= INT_MAX) - return NULL; // protect against some overflow cases - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (out_sz > 100000) - return NULL; -#endif - - /* Load in the static tables */ - unsigned char *cp = in, *out_free = NULL; - unsigned char *cp_end = in + in_size - 8; // within 8 => be extra safe - int i, j; - unsigned int x, y; - uint16_t sfreq[TOTFREQ+32]; - uint16_t sbase[TOTFREQ+32]; // faster to use 32-bit on clang - uint8_t ssym [TOTFREQ+64]; // faster to use 16-bit on clang - - if (!out) - out_free = out = malloc(out_sz); - if (!out) - return NULL; - - // Precompute reverse lookup of frequency. - uint32_t F[256] = {0}, fsum; - int fsz = decode_freq(cp, cp_end, F, &fsum); - if (!fsz) - goto err; - cp += fsz; - - normalise_freq_shift(F, fsum, TOTFREQ); - - // Build symbols; fixme, do as part of decode, see the _d variant - for (j = x = 0; j < 256; j++) { - if (F[j]) { - if (F[j] > TOTFREQ - x) - goto err; - for (y = 0; y < F[j]; y++) { - ssym [y + x] = j; - sfreq[y + x] = F[j]; - sbase[y + x] = y; - } - x += F[j]; - } - } - - if (x != TOTFREQ) - goto err; - - if (cp+16 > cp_end+8) - goto err; - - RansState R[4]; - RansDecInit(&R[0], &cp); if (R[0] < RANS_BYTE_L) goto err; - RansDecInit(&R[1], &cp); if (R[1] < RANS_BYTE_L) goto err; - RansDecInit(&R[2], &cp); if (R[2] < RANS_BYTE_L) goto err; - RansDecInit(&R[3], &cp); if (R[3] < RANS_BYTE_L) goto err; - -// Simple version is comparable to below, but only with -O3 -// -// for (i = 0; cp < cp_end-8 && i < (out_sz&~7); i+=8) { -// for(j=0; j<8;j++) { -// RansState m = RansDecGet(&R[j%4], TF_SHIFT); -// R[j%4] = sfreq[m] * (R[j%4] >> TF_SHIFT) + sbase[m]; -// out[i+j] = ssym[m]; -// RansDecRenorm(&R[j%4], &cp); -// } -// } - - for (i = 0; cp < cp_end-8 && i < (out_sz&~7); i+=8) { - for (j = 0; j < 8; j+=4) { - RansState m0 = RansDecGet(&R[0], TF_SHIFT); - RansState m1 = RansDecGet(&R[1], TF_SHIFT); - out[i+j+0] = ssym[m0]; - out[i+j+1] = ssym[m1]; - - R[0] = sfreq[m0] * (R[0] >> TF_SHIFT) + sbase[m0]; - R[1] = sfreq[m1] * (R[1] >> TF_SHIFT) + sbase[m1]; - - RansState m2 = RansDecGet(&R[2], TF_SHIFT); - RansState m3 = RansDecGet(&R[3], TF_SHIFT); - - RansDecRenorm(&R[0], &cp); - RansDecRenorm(&R[1], &cp); - - R[2] = sfreq[m2] * (R[2] >> TF_SHIFT) + sbase[m2]; - R[3] = sfreq[m3] * (R[3] >> TF_SHIFT) + sbase[m3]; - - RansDecRenorm(&R[2], &cp); - RansDecRenorm(&R[3], &cp); - - out[i+j+2] = ssym[m2]; - out[i+j+3] = ssym[m3]; - } - } - - // remainder - for (; i < out_sz; i++) { - RansState m = RansDecGet(&R[i%4], TF_SHIFT); - R[i%4] = sfreq[m] * (R[i%4] >> TF_SHIFT) + sbase[m]; - out[i] = ssym[m]; - RansDecRenormSafe(&R[i%4], &cp, cp_end+8); - } - - //fprintf(stderr, " 0 Decoded %d bytes\n", (int)(cp-in)); //c-size - - return out; - - err: - free(out_free); - return NULL; -} - -//----------------------------------------------------------------------------- - -// Compute the entropy of 12-bit vs 10-bit frequency tables. -// 10 bit means smaller memory footprint when decoding and -// more speed due to cache hits, but it *may* be a poor -// compression fit. -int rans_compute_shift(uint32_t *F0, uint32_t (*F)[256], uint32_t *T, - uint32_t *S) { - int i, j; - - double e10 = 0, e12 = 0; - int max_tot = 0; - for (i = 0; i < 256; i++) { - if (F0[i] == 0) - continue; - unsigned int max_val = round2(T[i]); - int ns = 0; -#define MAX(a,b) ((a)>(b)?(a):(b)) - - // Number of samples that get their freq bumped to 1 - int sm10 = 0, sm12 = 0; - for (j = 0; j < 256; j++) { - if (F[i][j] && max_val / F[i][j] > TOTFREQ_O1_FAST) - sm10++; - if (F[i][j] && max_val / F[i][j] > TOTFREQ_O1) - sm12++; - } - - double l10 = log(TOTFREQ_O1_FAST + sm10); - double l12 = log(TOTFREQ_O1 + sm12); - double T_slow = (double)TOTFREQ_O1/T[i]; - double T_fast = (double)TOTFREQ_O1_FAST/T[i]; - - for (j = 0; j < 256; j++) { - if (F[i][j]) { - ns++; - - e10 -= F[i][j] * (fast_log(MAX(F[i][j]*T_fast,1)) - l10); - e12 -= F[i][j] * (fast_log(MAX(F[i][j]*T_slow,1)) - l12); - - // Estimation of compressed symbol freq table too. - e10 += 1.3; - e12 += 4.7; - } - } - - // Order-1 frequencies often end up totalling under TOTFREQ. - // In this case it's smaller to output the real frequencies - // prior to normalisation and normalise after (with an extra - // normalisation step needed in the decoder too). - // - // Thus we normalise to a power of 2 only, store those, - // and renormalise later here (and in decoder) by bit-shift - // to get to the fixed size. - if (ns < 64 && max_val > 128) max_val /= 2; - if (max_val > 1024) max_val /= 2; - if (max_val > TOTFREQ_O1) max_val = TOTFREQ_O1; - S[i] = max_val; // scale to max this - if (max_tot < max_val) - max_tot = max_val; - } - int shift = e10/e12 < 1.01 || max_tot <= TOTFREQ_O1_FAST - ? TF_SHIFT_O1_FAST - : TF_SHIFT_O1; - -// fprintf(stderr, "e10/12 = %f %f %f, shift %d\n", -// e10/log(256), e12/log(256), e10/e12, shift); - - return shift; -} - -static -unsigned char *rans_compress_O1_4x16(unsigned char *in, unsigned int in_size, - unsigned char *out, unsigned int *out_size) { - unsigned char *cp, *out_end, *out_free = NULL; - unsigned int tab_size; - - // -20 for order/size/meta - uint32_t bound = rans_compress_bound_4x16(in_size,1)-20; - - if (!out) { - *out_size = bound; - out_free = out = malloc(*out_size); - } - if (!out || bound > *out_size) - return NULL; - - if (((size_t)out)&1) - bound--; - out_end = out + bound; - - RansEncSymbol (*syms)[256] = htscodecs_tls_alloc(256 * (sizeof(*syms))); - if (!syms) { - free(out_free); - return NULL; - } - - cp = out; - int shift = encode_freq1(in, in_size, 4, syms, &cp); - if (shift < 0) { - htscodecs_tls_free(syms); - return NULL; - } - tab_size = cp - out; - - RansState rans0, rans1, rans2, rans3; - RansEncInit(&rans0); - RansEncInit(&rans1); - RansEncInit(&rans2); - RansEncInit(&rans3); - - uint8_t* ptr = out_end; - - int isz4 = in_size>>2; - int i0 = 1*isz4-2; - int i1 = 2*isz4-2; - int i2 = 3*isz4-2; - int i3 = 4*isz4-2; - - unsigned char l0 = in[i0+1]; - unsigned char l1 = in[i1+1]; - unsigned char l2 = in[i2+1]; - unsigned char l3 = in[i3+1]; - - // Deal with the remainder - l3 = in[in_size-1]; - for (i3 = in_size-2; i3 > 4*isz4-2; i3--) { - unsigned char c3 = in[i3]; - RansEncPutSymbol(&rans3, &ptr, &syms[c3][l3]); - l3 = c3; - } - - for (; i0 >= 0; i0--, i1--, i2--, i3--) { - unsigned char c0, c1, c2, c3; - RansEncSymbol *s3 = &syms[c3 = in[i3]][l3]; - RansEncSymbol *s2 = &syms[c2 = in[i2]][l2]; - RansEncSymbol *s1 = &syms[c1 = in[i1]][l1]; - RansEncSymbol *s0 = &syms[c0 = in[i0]][l0]; - - RansEncPutSymbol(&rans3, &ptr, s3); - RansEncPutSymbol(&rans2, &ptr, s2); - RansEncPutSymbol(&rans1, &ptr, s1); - RansEncPutSymbol(&rans0, &ptr, s0); - - l0 = c0; - l1 = c1; - l2 = c2; - l3 = c3; - } - - RansEncPutSymbol(&rans3, &ptr, &syms[0][l3]); - RansEncPutSymbol(&rans2, &ptr, &syms[0][l2]); - RansEncPutSymbol(&rans1, &ptr, &syms[0][l1]); - RansEncPutSymbol(&rans0, &ptr, &syms[0][l0]); - - RansEncFlush(&rans3, &ptr); - RansEncFlush(&rans2, &ptr); - RansEncFlush(&rans1, &ptr); - RansEncFlush(&rans0, &ptr); - - *out_size = (out_end - ptr) + tab_size; - - cp = out; - memmove(out + tab_size, ptr, out_end-ptr); - - htscodecs_tls_free(syms); - return out; -} - -//#define MAGIC2 111 -#define MAGIC2 179 -//#define MAGIC2 0 - -static -unsigned char *rans_uncompress_O1_4x16(unsigned char *in, unsigned int in_size, - unsigned char *out, unsigned int out_sz) { - if (in_size < 16) // 4-states at least - return NULL; - - if (out_sz >= INT_MAX) - return NULL; // protect against some overflow cases - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (out_sz > 100000) - return NULL; -#endif - - /* Load in the static tables */ - unsigned char *cp = in, *cp_end = in+in_size, *out_free = NULL; - unsigned char *c_freq = NULL; - int i, j = -999; - unsigned int x; - - uint8_t *sfb_ = htscodecs_tls_alloc(256*(TOTFREQ_O1+MAGIC2)*sizeof(*sfb_)); - uint32_t (*s3)[TOTFREQ_O1_FAST] = (uint32_t (*)[TOTFREQ_O1_FAST])sfb_; - // reuse the same memory for the fast mode lookup, but this only works - // if we're on e.g. 12-bit freqs vs 10-bit freqs as needs 4x larger array. - //uint32_t s3[256][TOTFREQ_O1_FAST]; - - if (!sfb_) - return NULL; - fb_t (*fb)[256] = htscodecs_tls_alloc(256 * sizeof(*fb)); - if (!fb) - goto err; - uint8_t *sfb[256]; - if ((*cp >> 4) == TF_SHIFT_O1) { - for (i = 0; i < 256; i++) - sfb[i]= sfb_ + i*(TOTFREQ_O1+MAGIC2); - } else { - for (i = 0; i < 256; i++) - sfb[i]= sfb_ + i*(TOTFREQ_O1_FAST+MAGIC2); - } - - if (!out) - out_free = out = malloc(out_sz); - - if (!out) - goto err; - - //fprintf(stderr, "out_sz=%d\n", out_sz); - - // compressed header? If so uncompress it - unsigned char *tab_end = NULL; - unsigned char *c_freq_end = cp_end; - unsigned int shift = *cp >> 4; - if (*cp++ & 1) { - uint32_t u_freq_sz, c_freq_sz; - cp += var_get_u32(cp, cp_end, &u_freq_sz); - cp += var_get_u32(cp, cp_end, &c_freq_sz); - if (c_freq_sz > cp_end - cp) - goto err; - tab_end = cp + c_freq_sz; - if (!(c_freq = rans_uncompress_O0_4x16(cp, c_freq_sz, NULL, u_freq_sz))) - goto err; - cp = c_freq; - c_freq_end = c_freq + u_freq_sz; - } - - // Decode order-0 symbol list; avoids needing in order-1 tables - uint32_t F0[256] = {0}; - int fsz = decode_alphabet(cp, c_freq_end, F0); - if (!fsz) - goto err; - cp += fsz; - - if (cp >= c_freq_end) - goto err; - - const int s3_fast_on = in_size >= 100000; - - for (i = 0; i < 256; i++) { - if (F0[i] == 0) - continue; - - uint32_t F[256] = {0}, T = 0; - fsz = decode_freq_d(cp, c_freq_end, F0, F, &T); - if (!fsz) - goto err; - cp += fsz; - - if (!T) { - //fprintf(stderr, "No freq for F_%d\n", i); - continue; - } - - normalise_freq_shift(F, T, 1< (1< cp_end) - goto err; - - RansState rans0, rans1, rans2, rans3; - uint8_t *ptr = cp, *ptr_end = in + in_size - 8; - RansDecInit(&rans0, &ptr); if (rans0 < RANS_BYTE_L) goto err; - RansDecInit(&rans1, &ptr); if (rans1 < RANS_BYTE_L) goto err; - RansDecInit(&rans2, &ptr); if (rans2 < RANS_BYTE_L) goto err; - RansDecInit(&rans3, &ptr); if (rans3 < RANS_BYTE_L) goto err; - - unsigned int isz4 = out_sz>>2; - int l0 = 0, l1 = 0, l2 = 0, l3 = 0; - unsigned int i4[] = {0*isz4, 1*isz4, 2*isz4, 3*isz4}; - - RansState R[4]; - R[0] = rans0; - R[1] = rans1; - R[2] = rans2; - R[3] = rans3; - - // Around 15% faster to specialise for 10/12 than to have one - // loop with shift as a variable. - if (shift == TF_SHIFT_O1) { - // TF_SHIFT_O1 = 12 - - const uint32_t mask = ((1u << TF_SHIFT_O1)-1); - for (; i4[0] < isz4; i4[0]++, i4[1]++, i4[2]++, i4[3]++) { - uint16_t m, c; - c = sfb[l0][m = R[0] & mask]; - R[0] = fb[l0][c].f * (R[0]>>TF_SHIFT_O1) + m - fb[l0][c].b; - out[i4[0]] = l0 = c; - - c = sfb[l1][m = R[1] & mask]; - R[1] = fb[l1][c].f * (R[1]>>TF_SHIFT_O1) + m - fb[l1][c].b; - out[i4[1]] = l1 = c; - - c = sfb[l2][m = R[2] & mask]; - R[2] = fb[l2][c].f * (R[2]>>TF_SHIFT_O1) + m - fb[l2][c].b; - out[i4[2]] = l2 = c; - - c = sfb[l3][m = R[3] & mask]; - R[3] = fb[l3][c].f * (R[3]>>TF_SHIFT_O1) + m - fb[l3][c].b; - out[i4[3]] = l3 = c; - - if (ptr < ptr_end) { - RansDecRenorm(&R[0], &ptr); - RansDecRenorm(&R[1], &ptr); - RansDecRenorm(&R[2], &ptr); - RansDecRenorm(&R[3], &ptr); - } else { - RansDecRenormSafe(&R[0], &ptr, ptr_end+8); - RansDecRenormSafe(&R[1], &ptr, ptr_end+8); - RansDecRenormSafe(&R[2], &ptr, ptr_end+8); - RansDecRenormSafe(&R[3], &ptr, ptr_end+8); - } - } - - // Remainder - for (; i4[3] < out_sz; i4[3]++) { - uint32_t m3 = R[3] & ((1u<>TF_SHIFT_O1) + m3 - fb[l3][c3].b; - RansDecRenormSafe(&R[3], &ptr, ptr_end + 8); - l3 = c3; - } - } else if (!s3_fast_on) { - // TF_SHIFT_O1 = 10 with sfb[256][1024] & fb[256]256] array lookup - // Slightly faster for -o193 on q4 (high comp), but also less - // initialisation cost for smaller data - const uint32_t mask = ((1u << TF_SHIFT_O1_FAST)-1); - for (; i4[0] < isz4; i4[0]++, i4[1]++, i4[2]++, i4[3]++) { - uint16_t m, c; - c = sfb[l0][m = R[0] & mask]; - R[0] = fb[l0][c].f * (R[0]>>TF_SHIFT_O1_FAST) + m - fb[l0][c].b; - out[i4[0]] = l0 = c; - - c = sfb[l1][m = R[1] & mask]; - R[1] = fb[l1][c].f * (R[1]>>TF_SHIFT_O1_FAST) + m - fb[l1][c].b; - out[i4[1]] = l1 = c; - - c = sfb[l2][m = R[2] & mask]; - R[2] = fb[l2][c].f * (R[2]>>TF_SHIFT_O1_FAST) + m - fb[l2][c].b; - out[i4[2]] = l2 = c; - - c = sfb[l3][m = R[3] & mask]; - R[3] = fb[l3][c].f * (R[3]>>TF_SHIFT_O1_FAST) + m - fb[l3][c].b; - out[i4[3]] = l3 = c; - - if (ptr < ptr_end) { - RansDecRenorm(&R[0], &ptr); - RansDecRenorm(&R[1], &ptr); - RansDecRenorm(&R[2], &ptr); - RansDecRenorm(&R[3], &ptr); - } else { - RansDecRenormSafe(&R[0], &ptr, ptr_end+8); - RansDecRenormSafe(&R[1], &ptr, ptr_end+8); - RansDecRenormSafe(&R[2], &ptr, ptr_end+8); - RansDecRenormSafe(&R[3], &ptr, ptr_end+8); - } - } - - // Remainder - for (; i4[3] < out_sz; i4[3]++) { - uint32_t m3 = R[3] & ((1u<>TF_SHIFT_O1_FAST) + m3 - fb[l3][c3].b; - RansDecRenormSafe(&R[3], &ptr, ptr_end + 8); - l3 = c3; - } - } else { - // TF_SHIFT_O1_FAST. - // Significantly faster for -o1 on q40 (low comp). - // Higher initialisation cost, so only use if big blocks. - const uint32_t mask = ((1u << TF_SHIFT_O1_FAST)-1); - for (; i4[0] < isz4; i4[0]++, i4[1]++, i4[2]++, i4[3]++) { - uint32_t S0 = s3[l0][R[0] & mask]; - uint32_t S1 = s3[l1][R[1] & mask]; - l0 = out[i4[0]] = S0; - l1 = out[i4[1]] = S1; - uint16_t F0 = S0>>(TF_SHIFT_O1_FAST+8); - uint16_t F1 = S1>>(TF_SHIFT_O1_FAST+8); - uint16_t B0 = (S0>>8) & mask; - uint16_t B1 = (S1>>8) & mask; - - R[0] = F0 * (R[0]>>TF_SHIFT_O1_FAST) + B0; - R[1] = F1 * (R[1]>>TF_SHIFT_O1_FAST) + B1; - - uint32_t S2 = s3[l2][R[2] & mask]; - uint32_t S3 = s3[l3][R[3] & mask]; - l2 = out[i4[2]] = S2; - l3 = out[i4[3]] = S3; - uint16_t F2 = S2>>(TF_SHIFT_O1_FAST+8); - uint16_t F3 = S3>>(TF_SHIFT_O1_FAST+8); - uint16_t B2 = (S2>>8) & mask; - uint16_t B3 = (S3>>8) & mask; - - R[2] = F2 * (R[2]>>TF_SHIFT_O1_FAST) + B2; - R[3] = F3 * (R[3]>>TF_SHIFT_O1_FAST) + B3; - - if (ptr < ptr_end) { - RansDecRenorm(&R[0], &ptr); - RansDecRenorm(&R[1], &ptr); - RansDecRenorm(&R[2], &ptr); - RansDecRenorm(&R[3], &ptr); - } else { - RansDecRenormSafe(&R[0], &ptr, ptr_end+8); - RansDecRenormSafe(&R[1], &ptr, ptr_end+8); - RansDecRenormSafe(&R[2], &ptr, ptr_end+8); - RansDecRenormSafe(&R[3], &ptr, ptr_end+8); - } - } - - // Remainder - for (; i4[3] < out_sz; i4[3]++) { - uint32_t S = s3[l3][R[3] & ((1u<>(TF_SHIFT_O1_FAST+8)) * (R[3]>>TF_SHIFT_O1_FAST) - + ((S>>8) & ((1u< - -#if defined(__clang__) && defined(__has_attribute) -# if __has_attribute(unused) -# define UNUSED __attribute__((unused)) -# else -# define UNUSED -# endif -#elif defined(__GNUC__) && __GNUC__ >= 3 -# define UNUSED __attribute__((unused)) -#else -# define UNUSED -#endif - -// CPU detection is performed once. NB this has an assumption that we're -// not migrating between processes with different instruction stes, but -// to date the only systems I know of that support this don't have different -// capabilities (that we use) per core. -#ifndef NO_THREADS -static pthread_once_t rans_cpu_once = PTHREAD_ONCE_INIT; -#endif - -static int have_ssse3 UNUSED = 0; -static int have_sse4_1 UNUSED = 0; -static int have_popcnt UNUSED = 0; -static int have_avx2 UNUSED = 0; -static int have_avx512f UNUSED = 0; -static int is_amd UNUSED = 0; - -static void htscodecs_tls_cpu_init(void) { - unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; - // These may be unused, depending on HAVE_* config.h macros - - int level = __get_cpuid_max(0, NULL); - __cpuid_count(0, 0, eax, ebx, ecx, edx); - is_amd = (ecx == 0x444d4163); - if (level >= 1) { - __cpuid_count(1, 0, eax, ebx, ecx, edx); -#if defined(bit_SSSE3) - have_ssse3 = ecx & bit_SSSE3; -#endif -#if defined(bit_POPCNT) - have_popcnt = ecx & bit_POPCNT; -#endif -#if defined(bit_SSE4_1) - have_sse4_1 = ecx & bit_SSE4_1; -#endif - } - if (level >= 7) { - __cpuid_count(7, 0, eax, ebx, ecx, edx); -#if defined(bit_AVX2) - have_avx2 = ebx & bit_AVX2; -#endif -#if defined(bit_AVX512F) - have_avx512f = ebx & bit_AVX512F; -#endif - } - - if (!have_popcnt) have_avx512f = have_avx2 = have_sse4_1 = 0; - if (!have_ssse3) have_sse4_1 = 0; - - if (!(rans_cpu & RANS_CPU_ENC_AVX512)) have_avx512f = 0; - if (!(rans_cpu & RANS_CPU_ENC_AVX2)) have_avx2 = 0; - if (!(rans_cpu & RANS_CPU_ENC_SSE4)) have_sse4_1 = 0; -} - -static inline -unsigned char *(*rans_enc_func(int do_simd, int order)) - (unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int *out_size) { - if (!do_simd) { // SIMD disabled - return order & 1 - ? rans_compress_O1_4x16 - : rans_compress_O0_4x16; - } - -#ifdef NO_THREADS - htscodecs_tls_cpu_init(); -#else - int err = pthread_once(&rans_cpu_once, htscodecs_tls_cpu_init); - if (err != 0) { - fprintf(stderr, "Initialising TLS data failed: pthread_once: %s\n", - strerror(err)); - fprintf(stderr, "Using scalar code only\n"); - } -#endif - - if (order & 1) { -#if defined(HAVE_AVX512) - if (have_avx512f && (!is_amd || !have_avx2)) - return rans_compress_O1_32x16_avx512; -#endif -#if defined(HAVE_AVX2) - if (have_avx2) - return rans_compress_O1_32x16_avx2; -#endif -#if defined(HAVE_SSE4_1) && defined(HAVE_SSSE3) && defined(HAVE_POPCNT) - if (have_sse4_1) - return rans_compress_O1_32x16; -#endif - return rans_compress_O1_32x16; - } else { -#if defined(HAVE_AVX512) - if (have_avx512f && (!is_amd || !have_avx2)) - return rans_compress_O0_32x16_avx512; -#endif -#if defined(HAVE_AVX2) - if (have_avx2) - return rans_compress_O0_32x16_avx2; -#endif -#if defined(HAVE_SSE4_1) && defined(HAVE_SSSE3) && defined(HAVE_POPCNT) - if (have_sse4_1) - return rans_compress_O0_32x16; -#endif - return rans_compress_O0_32x16; - } -} - -static inline -unsigned char *(*rans_dec_func(int do_simd, int order)) - (unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int out_size) { - - if (!do_simd) { // SIMD disabled - return order & 1 - ? rans_uncompress_O1_4x16 - : rans_uncompress_O0_4x16; - } - -#ifdef NO_THREADS - htscodecs_tls_cpu_init(); -#else - int err = pthread_once(&rans_cpu_once, htscodecs_tls_cpu_init); - if (err != 0) { - fprintf(stderr, "Initialising TLS data failed: pthread_once: %s\n", - strerror(err)); - fprintf(stderr, "Using scalar code only\n"); - } -#endif - - if (order & 1) { -#if defined(HAVE_AVX512) - if (have_avx512f) - return rans_uncompress_O1_32x16_avx512; -#endif -#if defined(HAVE_AVX2) - if (have_avx2) - return rans_uncompress_O1_32x16_avx2; -#endif -#if defined(HAVE_SSE4_1) && defined(HAVE_SSSE3) && defined(HAVE_POPCNT) - if (have_sse4_1) - return rans_uncompress_O1_32x16_sse4; -#endif - return rans_uncompress_O1_32x16; - } else { -#if defined(HAVE_AVX512) - if (have_avx512f && (!is_amd || !have_avx2)) - return rans_uncompress_O0_32x16_avx512; -#endif -#if defined(HAVE_AVX2) - if (have_avx2) - return rans_uncompress_O0_32x16_avx2; -#endif -#if defined(HAVE_SSE4_1) && defined(HAVE_SSSE3) && defined(HAVE_POPCNT) - if (have_sse4_1) - return rans_uncompress_O0_32x16_sse4; -#endif - return rans_uncompress_O0_32x16; - } -} - -#elif defined(__ARM_NEON) && defined(__aarch64__) - -#if defined(__linux__) || defined(__FreeBSD__) -#include -#elif defined(_WIN32) -#include -#endif - -static inline int have_neon(void) { -#if defined(__linux__) && defined(__arm__) - return (getauxval(AT_HWCAP) & HWCAP_NEON) != 0; -#elif defined(__linux__) && defined(__aarch64__) - return (getauxval(AT_HWCAP) & HWCAP_ASIMD) != 0; -#elif defined(__APPLE__) - return 1; -#elif defined(__FreeBSD__) && defined(__arm__) - u_long cap; - if (elf_aux_info(AT_HWCAP, &cap, sizeof cap) != 0) return 0; - return (cap & HWCAP_NEON) != 0; -#elif defined(__FreeBSD__) && defined(__aarch64__) - u_long cap; - if (elf_aux_info(AT_HWCAP, &cap, sizeof cap) != 0) return 0; - return (cap & HWCAP_ASIMD) != 0; -#elif defined(_WIN32) - return IsProcessorFeaturePresent(PF_ARM_V8_INSTRUCTIONS_AVAILABLE) != 0; -#else - return 0; -#endif -} - -static inline -unsigned char *(*rans_enc_func(int do_simd, int order)) - (unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int *out_size) { - - if (do_simd) { - if ((rans_cpu & RANS_CPU_ENC_NEON) && have_neon()) - return order & 1 - ? rans_compress_O1_32x16_neon - : rans_compress_O0_32x16_neon; - else - return order & 1 - ? rans_compress_O1_32x16 - : rans_compress_O0_32x16; - } else { - return order & 1 - ? rans_compress_O1_4x16 - : rans_compress_O0_4x16; - } -} - -static inline -unsigned char *(*rans_dec_func(int do_simd, int order)) - (unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int out_size) { - - if (do_simd) { - if ((rans_cpu & RANS_CPU_DEC_NEON) && have_neon()) - return order & 1 - ? rans_uncompress_O1_32x16_neon - : rans_uncompress_O0_32x16_neon; - else - return order & 1 - ? rans_uncompress_O1_32x16 - : rans_uncompress_O0_32x16; - } else { - return order & 1 - ? rans_uncompress_O1_4x16 - : rans_uncompress_O0_4x16; - } -} - -#else // !(defined(__GNUC__) && defined(__x86_64__)) && !defined(__ARM_NEON) - -static inline -unsigned char *(*rans_enc_func(int do_simd, int order)) - (unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int *out_size) { - - if (do_simd) { - return order & 1 - ? rans_compress_O1_32x16 - : rans_compress_O0_32x16; - } else { - return order & 1 - ? rans_compress_O1_4x16 - : rans_compress_O0_4x16; - } -} - -static inline -unsigned char *(*rans_dec_func(int do_simd, int order)) - (unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int out_size) { - - if (do_simd) { - return order & 1 - ? rans_uncompress_O1_32x16 - : rans_uncompress_O0_32x16; - } else { - return order & 1 - ? rans_uncompress_O1_4x16 - : rans_uncompress_O0_4x16; - } -} - -#endif - -/*----------------------------------------------------------------------------- - * Simple interface to the order-0 vs order-1 encoders and decoders. - * - * Smallest is method, , so worst case 2 bytes longer. - */ -unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, - unsigned char *out,unsigned int *out_size, - int order) { - if (in_size > INT_MAX) { - *out_size = 0; - return NULL; - } - - unsigned int c_meta_len; - uint8_t *meta = NULL, *rle = NULL, *packed = NULL; - uint8_t *out_free = NULL; - - if (!out) { - *out_size = rans_compress_bound_4x16(in_size, order); - if (*out_size == 0) - return NULL; - if (!(out_free = out = malloc(*out_size))) - return NULL; - } - - unsigned char *out_end = out + *out_size; - - // Permit 32-way unrolling for large blocks, paving the way for - // AVX2 and AVX512 SIMD variants. - if ((order & RANS_ORDER_SIMD_AUTO) && in_size >= 50000 - && !(order & RANS_ORDER_STRIPE)) - order |= X_32; - - if (in_size <= 20) - order &= ~RANS_ORDER_STRIPE; - if (in_size <= 1000) - order &= ~RANS_ORDER_X32; - - if (order & RANS_ORDER_STRIPE) { - int N = (order>>8) & 0xff; - if (N == 0) N = 4; // default for compatibility with old tests - - unsigned char *transposed = malloc(in_size); - unsigned int part_len[256]; - unsigned int idx[256]; - if (!transposed) { - free(out_free); - return NULL; - } - int i, j, x; - - for (i = 0; i < N; i++) { - part_len[i] = in_size / N + ((in_size % N) > i); - idx[i] = i ? idx[i-1] + part_len[i-1] : 0; // cumulative index - } - -#define KN 8 - i = x = 0; - if (in_size >= N*KN) { - for (; i < in_size-N*KN;) { - int k; - unsigned char *ink = in+i; - for (j = 0; j < N; j++) - for (k = 0; k < KN; k++) - transposed[idx[j]+x+k] = ink[j+N*k]; - x += KN; i+=N*KN; - } - } -#undef KN - for (; i < in_size-N; i += N, x++) { - for (j = 0; j < N; j++) - transposed[idx[j]+x] = in[i+j]; - } - - for (; i < in_size; i += N, x++) { - for (j = 0; i+j < in_size; j++) - transposed[idx[j]+x] = in[i+j]; - } - - unsigned int olen2; - unsigned char *out2, *out2_start; - c_meta_len = 1; - *out = order & ~RANS_ORDER_NOSZ; - c_meta_len += var_put_u32(out+c_meta_len, out_end, in_size); - out[c_meta_len++] = N; - - unsigned char *out_best = NULL; - unsigned int out_best_len = 0; - - out2_start = out2 = out+7+5*N; // shares a buffer with c_meta - for (i = 0; i < N; i++) { - // Brute force try all methods. - int j, m[] = {1,64,128,0}, best_j = 0, best_sz = in_size+10; - for (j = 0; j < sizeof(m)/sizeof(*m); j++) { - if ((order & m[j]) != m[j]) - continue; - - // order-1 *only*; bit check above cannot elide order-0 - if ((order & RANS_ORDER_STRIPE_NO0) && (m[j]&1) == 0) - continue; - olen2 = *out_size - (out2 - out); - rans_compress_to_4x16(transposed+idx[i], part_len[i], - out2, &olen2, - m[j] | RANS_ORDER_NOSZ - | (order&RANS_ORDER_X32)); - if (best_sz > olen2) { - best_sz = olen2; - best_j = j; - if (j < sizeof(m)/sizeof(*m) && olen2 > out_best_len) { - unsigned char *tmp = realloc(out_best, olen2); - if (!tmp) { - free(out_free); - return NULL; - } - out_best = tmp; - out_best_len = olen2; - } - - // Cache a copy of the best so far - memcpy(out_best, out2, olen2); - } - } - if (best_j < sizeof(m)/sizeof(*m)) { - // Copy the best compression to output buffer if not current - memcpy(out2, out_best, best_sz); - olen2 = best_sz; - } - - out2 += olen2; - c_meta_len += var_put_u32(out+c_meta_len, out_end, olen2); - } - if (out_best) - free(out_best); - - memmove(out+c_meta_len, out2_start, out2-out2_start); - free(transposed); - *out_size = c_meta_len + out2-out2_start; - return out; - } - - if (order & RANS_ORDER_CAT) { - out[0] = RANS_ORDER_CAT; - c_meta_len = 1; - c_meta_len += var_put_u32(&out[1], out_end, in_size); - memcpy(out+c_meta_len, in, in_size); - *out_size = c_meta_len + in_size; - return out; - } - - int do_pack = order & RANS_ORDER_PACK; - int do_rle = order & RANS_ORDER_RLE; - int no_size = order & RANS_ORDER_NOSZ; - int do_simd = order & RANS_ORDER_X32; - - out[0] = order; - c_meta_len = 1; - - if (!no_size) - c_meta_len += var_put_u32(&out[1], out_end, in_size); - - order &= 3; - - // Format is compressed meta-data, compressed data. - // Meta-data can be empty, pack, rle lengths, or pack + rle lengths. - // Data is either the original data, bit-packed packed, rle literals or - // packed + rle literals. - - if (do_pack && in_size) { - // PACK 2, 4 or 8 symbols into one byte. - int pmeta_len; - uint64_t packed_len; - packed = hts_pack(in, in_size, out+c_meta_len, &pmeta_len, &packed_len); - if (!packed) { - out[0] &= ~RANS_ORDER_PACK; - do_pack = 0; - free(packed); - packed = NULL; - } else { - in = packed; - in_size = packed_len; - c_meta_len += pmeta_len; - - // Could derive this rather than storing verbatim. - // Orig size * 8/nbits (+1 if not multiple of 8/n) - int sz = var_put_u32(out+c_meta_len, out_end, in_size); - c_meta_len += sz; - *out_size -= sz; - } - } else if (do_pack) { - out[0] &= ~RANS_ORDER_PACK; - } - - if (do_rle && in_size) { - // RLE 'in' -> rle_length + rle_literals arrays - unsigned int rmeta_len, c_rmeta_len; - uint64_t rle_len; - c_rmeta_len = in_size+257; - if (!(meta = malloc(c_rmeta_len))) { - free(out_free); - return NULL; - } - - uint8_t rle_syms[256]; - int rle_nsyms = 0; - uint64_t rmeta_len64; - rle = hts_rle_encode(in, in_size, meta, &rmeta_len64, - rle_syms, &rle_nsyms, NULL, &rle_len); - memmove(meta+1+rle_nsyms, meta, rmeta_len64); - meta[0] = rle_nsyms; - memcpy(meta+1, rle_syms, rle_nsyms); - rmeta_len = rmeta_len64 + rle_nsyms+1; - - if (!rle || rle_len + rmeta_len >= .99*in_size) { - // Not worth the speed hit. - out[0] &= ~RANS_ORDER_RLE; - do_rle = 0; - free(rle); - rle = NULL; - } else { - // Compress lengths with O0 and literals with O0/O1 ("order" param) - int sz = var_put_u32(out+c_meta_len, out_end, rmeta_len*2), sz2; - sz += var_put_u32(out+c_meta_len+sz, out_end, rle_len); - c_rmeta_len = *out_size - (c_meta_len+sz+5); - rans_enc_func(do_simd, 0)(meta, rmeta_len, out+c_meta_len+sz+5, &c_rmeta_len); - if (c_rmeta_len < rmeta_len) { - sz2 = var_put_u32(out+c_meta_len+sz, out_end, c_rmeta_len); - memmove(out+c_meta_len+sz+sz2, out+c_meta_len+sz+5, c_rmeta_len); - } else { - // Uncompressed RLE meta-data as too small - sz = var_put_u32(out+c_meta_len, out_end, rmeta_len*2+1); - sz2 = var_put_u32(out+c_meta_len+sz, out_end, rle_len); - memcpy(out+c_meta_len+sz+sz2, meta, rmeta_len); - c_rmeta_len = rmeta_len; - } - - c_meta_len += sz + sz2 + c_rmeta_len; - - in = rle; - in_size = rle_len; - } - - free(meta); - } else if (do_rle) { - out[0] &= ~RANS_ORDER_RLE; - } - - *out_size -= c_meta_len; - if (order && in_size < 8) { - out[0] &= ~1; - order &= ~1; - } - - rans_enc_func(do_simd, order)(in, in_size, out+c_meta_len, out_size); - - if (*out_size >= in_size) { - out[0] &= ~3; - out[0] |= RANS_ORDER_CAT | no_size; - memcpy(out+c_meta_len, in, in_size); - *out_size = in_size; - } - - free(rle); - free(packed); - - *out_size += c_meta_len; - - return out; -} - -unsigned char *rans_compress_4x16(unsigned char *in, unsigned int in_size, - unsigned int *out_size, int order) { - return rans_compress_to_4x16(in, in_size, NULL, out_size, order); -} - -unsigned char *rans_uncompress_to_4x16(unsigned char *in, unsigned int in_size, - unsigned char *out, unsigned int *out_size) { - unsigned char *in_end = in + in_size; - unsigned char *out_free = NULL, *tmp_free = NULL, *meta_free = NULL; - - if (in_size == 0) - return NULL; - - if (*in & RANS_ORDER_STRIPE) { - unsigned int ulen, olen, c_meta_len = 1; - int i; - uint64_t clen_tot = 0; - - // Decode lengths - c_meta_len += var_get_u32(in+c_meta_len, in_end, &ulen); - if (c_meta_len >= in_size) - return NULL; - unsigned int N = in[c_meta_len++]; - if (N < 1) // Must be at least one stripe - return NULL; - unsigned int clenN[256], ulenN[256], idxN[256]; - if (!out) { - if (ulen >= INT_MAX) - return NULL; -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (ulen > 100000) - return NULL; -#endif - if (!(out_free = out = malloc(ulen))) { - return NULL; - } - *out_size = ulen; - } - if (ulen != *out_size) { - free(out_free); - return NULL; - } - - for (i = 0; i < N; i++) { - ulenN[i] = ulen / N + ((ulen % N) > i); - idxN[i] = i ? idxN[i-1] + ulenN[i-1] : 0; - c_meta_len += var_get_u32(in+c_meta_len, in_end, &clenN[i]); - clen_tot += clenN[i]; - if (c_meta_len > in_size || clenN[i] > in_size || clenN[i] < 1) { - free(out_free); - return NULL; - } - } - - // We can call this with a larger buffer, but once we've determined - // how much we really use we limit it so the recursion becomes easier - // to limit. - if (c_meta_len + clen_tot > in_size) { - free(out_free); - return NULL; - } - in_size = c_meta_len + clen_tot; - - //fprintf(stderr, " stripe meta %d\n", c_meta_len); //c-size - - // Uncompress the N streams - unsigned char *outN = malloc(ulen); - if (!outN) { - free(out_free); - return NULL; - } - for (i = 0; i < N; i++) { - olen = ulenN[i]; - if (in_size < c_meta_len) { - free(out_free); - free(outN); - return NULL; - } - if (!rans_uncompress_to_4x16(in+c_meta_len, in_size-c_meta_len, outN + idxN[i], &olen) - || olen != ulenN[i]) { - free(out_free); - free(outN); - return NULL; - } - c_meta_len += clenN[i]; - } - - unstripe(out, outN, ulen, N, idxN); - - free(outN); - *out_size = ulen; - return out; - } - - int order = *in++; in_size--; - int do_pack = order & RANS_ORDER_PACK; - int do_rle = order & RANS_ORDER_RLE; - int do_cat = order & RANS_ORDER_CAT; - int no_size = order & RANS_ORDER_NOSZ; - int do_simd = order & RANS_ORDER_X32; - order &= 1; - - int sz = 0; - unsigned int osz; - if (!no_size) { - sz = var_get_u32(in, in_end, &osz); - } else - sz = 0, osz = *out_size; - in += sz; - in_size -= sz; - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (osz > 100000) - return NULL; -#endif - - if (no_size && !out) - goto err; // Need one or the other - - if (!out) { - *out_size = osz; - if (!(out = out_free = malloc(*out_size))) - return NULL; - } else { - if (*out_size < osz) - goto err; - *out_size = osz; - } - -// if (do_pack || do_rle) { -// in += sz; // size field not needed when pure rANS -// in_size -= sz; -// } - - uint32_t c_meta_size = 0; - unsigned int tmp1_size = *out_size; - unsigned int tmp2_size = *out_size; - unsigned int tmp3_size = *out_size; - unsigned char *tmp1 = NULL, *tmp2 = NULL, *tmp3 = NULL, *tmp = NULL; - - // Need In, Out and Tmp buffers with temporary buffer of the same size - // as output. All use rANS, but with optional transforms (none, RLE, - // Pack, or both). - // - // rans unrle unpack - // If none: in -> out - // If RLE: in -> tmp -> out - // If Pack: in -> tmp -> out - // If RLE+Pack: in -> out -> tmp -> out - // tmp1 tmp2 tmp3 - // - // So rans is in -> tmp1 - // RLE is tmp1 -> tmp2 - // Unpack is tmp2 -> tmp3 - - // Format is meta data (Pack and RLE in that order if present), - // followed by rANS compressed data. - - if (do_pack || do_rle) { - if (!(tmp = tmp_free = malloc(*out_size))) - goto err; - if (do_pack && do_rle) { - tmp1 = out; - tmp2 = tmp; - tmp3 = out; - } else if (do_pack) { - tmp1 = tmp; - tmp2 = tmp1; - tmp3 = out; - } else if (do_rle) { - tmp1 = tmp; - tmp2 = out; - tmp3 = out; - } - } else { - // neither - tmp = NULL; - tmp1 = out; - tmp2 = out; - tmp3 = out; - } - - // Decode the bit-packing map. - uint8_t map[16] = {0}; - int npacked_sym = 0; - uint64_t unpacked_sz = 0; // FIXME: rename to packed_per_byte - if (do_pack) { - c_meta_size = hts_unpack_meta(in, in_size, *out_size, map, &npacked_sym); - if (c_meta_size == 0) - goto err; - - unpacked_sz = osz; - in += c_meta_size; - in_size -= c_meta_size; - - // New unpacked size. We could derive this bit from *out_size - // and npacked_sym. - unsigned int osz; - sz = var_get_u32(in, in_end, &osz); - in += sz; - in_size -= sz; - if (osz > tmp1_size) - goto err; - tmp1_size = osz; - } - - uint8_t *meta = NULL; - uint32_t u_meta_size = 0; - if (do_rle) { - // Uncompress meta data - uint32_t c_meta_size, rle_len, sz; - sz = var_get_u32(in, in_end, &u_meta_size); - sz += var_get_u32(in+sz, in_end, &rle_len); - if (rle_len > tmp1_size) // should never grow - goto err; - if (u_meta_size & 1) { - meta = in + sz; - u_meta_size = u_meta_size/2 > (in_end-meta) ? (in_end-meta) : u_meta_size/2; - c_meta_size = u_meta_size; - } else { - sz += var_get_u32(in+sz, in_end, &c_meta_size); - u_meta_size /= 2; - - meta_free = meta = rans_dec_func(do_simd, 0)(in+sz, in_size-sz, NULL, u_meta_size); - if (!meta) - goto err; - } - if (c_meta_size+sz > in_size) - goto err; - in += c_meta_size+sz; - in_size -= c_meta_size+sz; - tmp1_size = rle_len; - } - //fprintf(stderr, " meta_size %d bytes\n", (int)(in - orig_in)); //c-size - - // uncompress RLE data. in -> tmp1 - if (in_size) { - if (do_cat) { - //fprintf(stderr, " CAT %d\n", tmp1_size); //c-size - if (tmp1_size > in_size) - goto err; - if (tmp1_size > *out_size) - goto err; - memcpy(tmp1, in, tmp1_size); - } else { - tmp1 = rans_dec_func(do_simd, order)(in, in_size, tmp1, tmp1_size); - if (!tmp1) - goto err; - } - } else { - tmp1_size = 0; - } - tmp2_size = tmp3_size = tmp1_size; - - if (do_rle) { - // Unpack RLE. tmp1 -> tmp2. - if (u_meta_size == 0) - goto err; - uint64_t unrle_size = *out_size; - int rle_nsyms = *meta ? *meta : 256; - if (u_meta_size < 1+rle_nsyms) - goto err; - if (!hts_rle_decode(tmp1, tmp1_size, - meta+1+rle_nsyms, u_meta_size-(1+rle_nsyms), - meta+1, rle_nsyms, tmp2, &unrle_size)) - goto err; - tmp3_size = tmp2_size = unrle_size; - free(meta_free); - meta_free = NULL; - } - if (do_pack) { - // Unpack bits via pack-map. tmp2 -> tmp3 - if (npacked_sym == 1) - unpacked_sz = tmp2_size; - //uint8_t *porig = unpack(tmp2, tmp2_size, unpacked_sz, npacked_sym, map); - //memcpy(tmp3, porig, unpacked_sz); - if (!hts_unpack(tmp2, tmp2_size, tmp3, unpacked_sz, npacked_sym, map)) - goto err; - tmp3_size = unpacked_sz; - } - - if (tmp) - free(tmp); - - *out_size = tmp3_size; - return tmp3; - - err: - free(meta_free); - free(out_free); - free(tmp_free); - return NULL; -} - -unsigned char *rans_uncompress_4x16(unsigned char *in, unsigned int in_size, - unsigned int *out_size) { - return rans_uncompress_to_4x16(in, in_size, NULL, out_size); -} diff --git a/src/htslib-1.18/htscodecs/htscodecs/tokenise_name3.c b/src/htslib-1.18/htscodecs/htscodecs/tokenise_name3.c deleted file mode 100644 index b92dc7b..0000000 --- a/src/htslib-1.18/htscodecs/htscodecs/tokenise_name3.c +++ /dev/null @@ -1,1808 +0,0 @@ -/* - * Copyright (c) 2016-2022 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -// cc -O3 -g -DTEST_TOKENISER tokenise_name3.c arith_dynamic.c rANS_static4x16pr.c pooled_alloc.c -I.. -I. -lbz2 -pthread - -// Name tokeniser. -// It generates a series of byte streams (per token) and compresses these -// either using static rANS or dynamic arithmetic coding. Arith coding is -// typically 1-5% smaller, but around 50-100% slower. We only envisage it -// being used at the higher compression levels. - -// TODO -// -// - Is it better when encoding 1, 2, 3, 3, 4, 5, 5, 6, 7, 9, 9, 10 to encode -// this as a mixture of MATCH and DELTA ops, or as entirely as DELTA ops -// with some delta values being zero? I suspect the latter, but it is -// not implemented here. See "last_token_delta" comments in code. -// -// - Consider variable size string implementations. -// Pascal style strings (length + str), -// C style strings (nul terminated), -// Or split blocks: length block and string contents block. -// -// - Is this one token-block or many serialised token-blocks? -// A) Lots of different models but feeding one bit-buffer emitted to -// by the entropy encoder => one block (fqzcomp). -// B) Lots of different models each feeding their own bit-buffers -// => many blocks. -// -// - multiple integer types depending on size; 1, 2, 4 byte long. -// -// - Consider token choice for isalnum instead of isalpha. Sometimes better. -// -// - Consider token synchronisation (eg on matching chr symbols?) incase of -// variable number. Eg consider foo:0999, foo:1000, foo:1001 (the leading -// zero adds an extra token). -// -// - Optimisation of tokens. Eg: -// HS25_09827:2:2102:11274:80442#49 -// HS25_09827:2:2109:12941:31311#49 -// -// We'll have tokens for HS 25 _ 09827 : 2 : that are entirely -// after the initial token. These 7 tokens could be one ALPHA instead -// of 7 distinct tokens, with 1 MATCH instead of 7. This is both a speed -// improvement for decoding as well as a space saving (fewer token-blocks -// and associated overhead). -// -// - XOR. Like ALPHA, but used when previous symbol is ALPHA or XOR -// and string lengths match. Useful when names are similar, eg: -// the sequence in 07.names: -// -// @VP2-06:112:H7LNDMCVY:1:1105:26919:1172 1:N:0:ATTCAGAA+AGGAGAAG -// @VP2-06:112:H7LNDMCVY:1:1105:27100:1172 1:N:0:ATTCAGAA+AGGCGAAG -// @VP2-06:112:H7LNDMCVY:1:1105:27172:1172 1:N:0:ATTCAGAA+AGGCTAAG - -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "pooled_alloc.h" -#include "arith_dynamic.h" -#include "rANS_static4x16.h" -#include "tokenise_name3.h" -#include "varint.h" -#include "utils.h" - -// 128 is insufficient for SAM names (max 256 bytes) as -// we may alternate a0a0a0a0a0 etc. However if we fail, -// we just give up and switch to another codec, so this -// isn't a serious limit. Maybe up to 256 to permit all -// SAM names? -#define MAX_TOKENS 128 -#define MAX_TBLOCKS (MAX_TOKENS<<4) - -// Number of names per block -#define MAX_NAMES 1000000 - -enum name_type {N_ERR = -1, N_TYPE = 0, N_ALPHA, N_CHAR, N_DIGITS0, N_DZLEN, N_DUP, N_DIFF, - N_DIGITS, N_DDELTA, N_DDELTA0, N_MATCH, N_NOP, N_END, N_ALL}; - -typedef struct trie { - struct trie *next, *sibling; - int count; - uint32_t c:8; - uint32_t n:24; // Nth line -} trie_t; - -typedef struct { - enum name_type token_type; - int token_int; - int token_str; -} last_context_tok; - -typedef struct { - char *last_name; - int last_ntok; - last_context_tok *last; // [last_ntok] -} last_context; - -typedef struct { - uint8_t *buf; - size_t buf_a, buf_l; // alloc and used length. - int tnum, ttype; - int dup_from; -} descriptor; - -typedef struct { - last_context *lc; - - // For finding entire line dups - int counter; - - // Trie used in encoder only - trie_t *t_head; - pool_alloc_t *pool; - - // token blocks - descriptor desc[MAX_TBLOCKS]; - - // summary stats per token - int token_dcount[MAX_TOKENS]; - int token_icount[MAX_TOKENS]; - //int token_zcount[MAX_TOKENS]; - - int max_tok; // tracks which desc/[id]count elements have been initialised - int max_names; -} name_context; - -static name_context *create_context(int max_names) { - if (max_names <= 0) - return NULL; - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (max_names > 100000) - return NULL; -#endif - - // An arbitrary limit to prevent malformed data from consuming excessive - // amounts of memory. Consider upping this if we have genuine use cases - // for larger blocks. - if (max_names > 1e7) { - fprintf(stderr, "Name codec currently has a max of 10 million rec.\n"); - return NULL; - } - - name_context *ctx = htscodecs_tls_alloc(sizeof(*ctx) + - ++max_names*sizeof(*ctx->lc)); - if (!ctx) return NULL; - ctx->max_names = max_names; - - ctx->counter = 0; - ctx->t_head = NULL; - - ctx->lc = (last_context *)(((char *)ctx) + sizeof(*ctx)); - ctx->pool = NULL; - - memset(&ctx->desc[0], 0, 2*16 * sizeof(ctx->desc[0])); - memset(&ctx->token_dcount[0], 0, sizeof(int)); - memset(&ctx->token_icount[0], 0, sizeof(int)); - memset(&ctx->lc[0], 0, max_names*sizeof(ctx->lc[0])); - ctx->max_tok = 1; - - ctx->lc[0].last_ntok = 0; - - return ctx; -} - -static void free_context(name_context *ctx) { - if (!ctx) - return; - - if (ctx->t_head) - free(ctx->t_head); - if (ctx->pool) - pool_destroy(ctx->pool); - - int i; - for (i = 0; i < ctx->max_tok*16; i++) - free(ctx->desc[i].buf); - - for (i = 0; i < ctx->max_names; i++) - free(ctx->lc[i].last); - - htscodecs_tls_free(ctx); -} - -//----------------------------------------------------------------------------- -// Fast unsigned integer printing code. -// Returns number of bytes written. -static int append_uint32_fixed(char *cp, uint32_t i, uint8_t l) { - switch (l) { - case 9:*cp++ = i / 100000000 + '0', i %= 100000000; - case 8:*cp++ = i / 10000000 + '0', i %= 10000000; - case 7:*cp++ = i / 1000000 + '0', i %= 1000000; - case 6:*cp++ = i / 100000 + '0', i %= 100000; - case 5:*cp++ = i / 10000 + '0', i %= 10000; - case 4:*cp++ = i / 1000 + '0', i %= 1000; - case 3:*cp++ = i / 100 + '0', i %= 100; - case 2:*cp++ = i / 10 + '0', i %= 10; - case 1:*cp++ = i + '0'; - case 0:break; - } - return l; -} - -static int append_uint32_var(char *cp, uint32_t i) { - char *op = cp; - uint32_t j; - - //if (i < 10) goto b0; - if (i < 100) goto b1; - //if (i < 1000) goto b2; - if (i < 10000) goto b3; - //if (i < 100000) goto b4; - if (i < 1000000) goto b5; - //if (i < 10000000) goto b6; - if (i < 100000000) goto b7; - - if ((j = i / 1000000000)) {*cp++ = j + '0'; i -= j*1000000000; goto x8;} - if ((j = i / 100000000)) {*cp++ = j + '0'; i -= j*100000000; goto x7;} - b7:if ((j = i / 10000000)) {*cp++ = j + '0'; i -= j*10000000; goto x6;} - if ((j = i / 1000000)) {*cp++ = j + '0', i -= j*1000000; goto x5;} - b5:if ((j = i / 100000)) {*cp++ = j + '0', i -= j*100000; goto x4;} - if ((j = i / 10000)) {*cp++ = j + '0', i -= j*10000; goto x3;} - b3:if ((j = i / 1000)) {*cp++ = j + '0', i -= j*1000; goto x2;} - if ((j = i / 100)) {*cp++ = j + '0', i -= j*100; goto x1;} - b1:if ((j = i / 10)) {*cp++ = j + '0', i -= j*10; goto x0;} - if (i) *cp++ = i + '0'; - return cp-op; - - x8:*cp++ = i / 100000000 + '0', i %= 100000000; - x7:*cp++ = i / 10000000 + '0', i %= 10000000; - x6:*cp++ = i / 1000000 + '0', i %= 1000000; - x5:*cp++ = i / 100000 + '0', i %= 100000; - x4:*cp++ = i / 10000 + '0', i %= 10000; - x3:*cp++ = i / 1000 + '0', i %= 1000; - x2:*cp++ = i / 100 + '0', i %= 100; - x1:*cp++ = i / 10 + '0', i %= 10; - x0:*cp++ = i + '0'; - - return cp-op; -} - -//----------------------------------------------------------------------------- -// Example descriptor encoding and IO. -// -// Here we just append to a buffer so we can dump out the results. -// These could then be passed through a static entropy encoder that -// encodes the entire buffer. -// -// Alternatively an adaptive entropy encoder could be place inline -// here to encode as it goes using additional knowledge from the -// supplied context. - -// Ensure room for sz more bytes. -static int descriptor_grow(descriptor *fd, uint32_t sz) { - while (fd->buf_l + sz > fd->buf_a) { - size_t buf_a = fd->buf_a ? fd->buf_a*2 : 65536; - unsigned char *buf = realloc(fd->buf, buf_a); - if (!buf) - return -1; - fd->buf = buf; - fd->buf_a = buf_a; - } - - return 0; -} - -static int encode_token_type(name_context *ctx, int ntok, - enum name_type type) { - int id = ntok<<4; - - if (descriptor_grow(&ctx->desc[id], 1) < 0) return -1; - - ctx->desc[id].buf[ctx->desc[id].buf_l++] = type; - - return 0; -} - -static int encode_token_match(name_context *ctx, int ntok) { - return encode_token_type(ctx, ntok, N_MATCH); -} - -static int encode_token_end(name_context *ctx, int ntok) { - return encode_token_type(ctx, ntok, N_END); -} - -static enum name_type decode_token_type(name_context *ctx, int ntok) { - int id = ntok<<4; - if (ctx->desc[id].buf_l >= ctx->desc[id].buf_a) return -1; - return ctx->desc[id].buf[ctx->desc[id].buf_l++]; -} - -// int stored as 32-bit quantities -static int encode_token_int(name_context *ctx, int ntok, - enum name_type type, uint32_t val) { - int id = (ntok<<4) | type; - - if (encode_token_type(ctx, ntok, type) < 0) return -1; - if (descriptor_grow(&ctx->desc[id], 4) < 0) return -1; - - uint8_t *cp = &ctx->desc[id].buf[ctx->desc[id].buf_l]; - cp[0] = (val >> 0) & 0xff; - cp[1] = (val >> 8) & 0xff; - cp[2] = (val >> 16) & 0xff; - cp[3] = (val >> 24) & 0xff; - ctx->desc[id].buf_l += 4; - - return 0; -} - -// Return 0 on success, -1 on failure; -static int decode_token_int(name_context *ctx, int ntok, - enum name_type type, uint32_t *val) { - int id = (ntok<<4) | type; - - if (ctx->desc[id].buf_l + 4 > ctx->desc[id].buf_a) - return -1; - - uint8_t *cp = ctx->desc[id].buf + ctx->desc[id].buf_l; - *val = (cp[0]) + (cp[1]<<8) + (cp[2]<<16) + ((uint32_t)cp[3]<<24); - ctx->desc[id].buf_l += 4; - - return 0; -} - -// 8 bit integer quantity -static int encode_token_int1(name_context *ctx, int ntok, - enum name_type type, uint32_t val) { - int id = (ntok<<4) | type; - - if (encode_token_type(ctx, ntok, type) < 0) return -1; - if (descriptor_grow(&ctx->desc[id], 1) < 0) return -1; - - ctx->desc[id].buf[ctx->desc[id].buf_l++] = val; - - return 0; -} - -static int encode_token_int1_(name_context *ctx, int ntok, - enum name_type type, uint32_t val) { - int id = (ntok<<4) | type; - - if (descriptor_grow(&ctx->desc[id], 1) < 0) return -1; - - ctx->desc[id].buf[ctx->desc[id].buf_l++] = val; - - return 0; -} - -// Return 0 on success, -1 on failure; -static int decode_token_int1(name_context *ctx, int ntok, - enum name_type type, uint32_t *val) { - int id = (ntok<<4) | type; - - if (ctx->desc[id].buf_l >= ctx->desc[id].buf_a) - return -1; - *val = ctx->desc[id].buf[ctx->desc[id].buf_l++]; - - return 0; -} - - -// Basic C-string style for now. -// -// Maybe XOR with previous string as context? -// This permits partial match to be encoded efficiently. -static int encode_token_alpha(name_context *ctx, int ntok, - char *str, int len) { - int id = (ntok<<4) | N_ALPHA; - - if (encode_token_type(ctx, ntok, N_ALPHA) < 0) return -1; - if (descriptor_grow(&ctx->desc[id], len+1) < 0) return -1; - memcpy(&ctx->desc[id].buf[ctx->desc[id].buf_l], str, len); - ctx->desc[id].buf[ctx->desc[id].buf_l+len] = 0; - ctx->desc[id].buf_l += len+1; - - return 0; -} - -// FIXME: need limit on string length for security. -// Return length on success, -1 on failure; -static int decode_token_alpha(name_context *ctx, int ntok, char *str, int max_len) { - int id = (ntok<<4) | N_ALPHA; - char c; - int len = 0; - if (ctx->desc[id].buf_l >= ctx->desc[id].buf_a) - return -1; - do { - c = ctx->desc[id].buf[ctx->desc[id].buf_l++]; - str[len++] = c; - } while(c && len < max_len && ctx->desc[id].buf_l < ctx->desc[id].buf_a); - - return len-1; -} - -static int encode_token_char(name_context *ctx, int ntok, char c) { - int id = (ntok<<4) | N_CHAR; - - if (encode_token_type(ctx, ntok, N_CHAR) < 0) return -1; - if (descriptor_grow(&ctx->desc[id], 1) < 0) return -1; - ctx->desc[id].buf[ctx->desc[id].buf_l++] = c; - - return 0; -} - -// FIXME: need limit on string length for security -// Return length on success, -1 on failure; -static int decode_token_char(name_context *ctx, int ntok, char *str) { - int id = (ntok<<4) | N_CHAR; - - if (ctx->desc[id].buf_l >= ctx->desc[id].buf_a) - return -1; - *str = ctx->desc[id].buf[ctx->desc[id].buf_l++]; - - return 1; -} - - -// A duplicated name -static int encode_token_dup(name_context *ctx, uint32_t val) { - return encode_token_int(ctx, 0, N_DUP, val); -} - -// Which read to delta against -static int encode_token_diff(name_context *ctx, uint32_t val) { - return encode_token_int(ctx, 0, N_DIFF, val); -} - - -//----------------------------------------------------------------------------- -// Trie implementation for tracking common name prefixes. -static -int build_trie(name_context *ctx, char *data, size_t len, int n) { - int nlines = 0; - size_t i; - trie_t *t; - - if (!ctx->t_head) { - ctx->t_head = calloc(1, sizeof(*ctx->t_head)); - if (!ctx->t_head) - return -1; - } - - // Build our trie, also counting input lines - for (nlines = i = 0; i < len; i++, nlines++) { - t = ctx->t_head; - t->count++; - while (i < len && data[i] > '\n') { - unsigned char c = data[i++]; - if (c & 0x80) - //fprintf(stderr, "8-bit ASCII is unsupported\n"); - abort(); - c &= 127; - - - trie_t *x = t->next, *l = NULL; - while (x && x->c != c) { - l = x; x = x->sibling; - } - if (!x) { - if (!ctx->pool) - ctx->pool = pool_create(sizeof(trie_t)); - if (!(x = (trie_t *)pool_alloc(ctx->pool))) - return -1; - memset(x, 0, sizeof(*x)); - if (!l) - x = t->next = x; - else - x = l->sibling = x; - x->n = n; - x->c = c; - } - t = x; - t->c = c; - t->count++; - } - } - - return 0; -} - -#if 0 -void dump_trie(trie_t *t, int depth) { - if (depth == 0) { - printf("graph x_%p {\n splines = ortho\n ranksep=2\n", t); - printf(" p_%p [label=\"\"];\n", t); - dump_trie(t, 1); - printf("}\n"); - } else { - int j, k, count;//, cj; - char label[100], *cp; - trie_t *tp = t; - -// patricia: -// for (count = j = 0; j < 128; j++) -// if (t->next[j]) -// count++, cj=j; -// -// if (count == 1) { -// t = t->next[cj]; -// *cp++ = cj; -// goto patricia; -// } - - trie_t *x; - for (x = t->next; x; x = x->sibling) { - printf(" p_%p [label=\"%c\"];\n", x, x->c); - printf(" p_%p -- p_%p [label=\"%d\", penwidth=\"%f\"];\n", tp, x, x->count, MAX((log(x->count)-3)*2,1)); - //if (depth <= 11) - dump_trie(x, depth+1); - } - -#if 0 - for (j = 0; j < 128; j++) { - trie_t *tn; - - if (!t->next[j]) - continue; - - cp = label; - tn = t->next[j]; - *cp++ = j; -// patricia: - - for (count = k = 0; k < 128; k++) - if (tn->next[k]) - count++;//, cj=k; - -// if (count == 1) { -// tn = tn->next[cj]; -// *cp++ = cj; -// goto patricia; -// } - *cp++ = 0; - - printf(" p_%p [label=\"%s\"];\n", tn, label); - printf(" p_%p -- p_%p [label=\"%d\", penwidth=\"%f\"];\n", tp, tn, tn->count, MAX((log(tn->count)-3)*2,1)); - if (depth <= 11) - dump_trie(tn, depth+1); - } -#endif - } -} -#endif - -static -int search_trie(name_context *ctx, char *data, size_t len, int n, int *exact, int *is_fixed, int *fixed_len) { - int nlines = 0; - size_t i; - trie_t *t; - int from = -1, p3 = -1; - *exact = 0; - *fixed_len = 0; - *is_fixed = 0; - - // Horrid hack for the encoder only. - // We optimise per known name format here. - int prefix_len; - char *d = *data == '@' ? data+1 : data; - int l = *data == '@' ? len-1 : len; - int f = (*data == '>') ? 1 : 0; - if (l > 70 && d[f+0] == 'm' && d[7] == '_' && d[f+14] == '_' && d[f+61] == '/') { - prefix_len = 60; // PacBio - *is_fixed = 0; - } else if (l == 17 && d[f+5] == ':' && d[f+11] == ':') { - prefix_len = 6; // IonTorrent - *fixed_len = 6; - *is_fixed = 1; - } else if (l > 37 && d[f+8] == '-' && d[f+13] == '-' && d[f+18] == '-' && d[f+23] == '-' && - ((d[f+0] >= '0' && d[f+0] <='9') || (d[f+0] >= 'a' && d[f+0] <= 'f')) && - ((d[f+35] >= '0' && d[f+35] <='9') || (d[f+35] >= 'a' && d[f+35] <= 'f'))) { - // ONT: f33d30d5-6eb8-4115-8f46-154c2620a5da_Basecall_1D_template... - prefix_len = 37; - *fixed_len = 37; - *is_fixed = 1; - } else { - // Check Illumina and trim back to lane:tile:x:y. - int colons = 0; - for (i = 0; i < len && data[i] > ' '; i++) - ; - while (i > 0 && colons < 4) - if (data[--i] == ':') - colons++; - - if (colons == 4) { - // Constant illumina prefix - *fixed_len = i+1; - prefix_len = i+1; - *is_fixed = 1; - } else { - // Unknown, don't use a fixed len, but still search - // for any exact matches. - prefix_len = INT_MAX; - *is_fixed = 0; - } - } - //prefix_len = INT_MAX; - - if (!ctx->t_head) { - ctx->t_head = calloc(1, sizeof(*ctx->t_head)); - if (!ctx->t_head) - return -1; - } - - // Find an item in the trie - for (nlines = i = 0; i < len; i++, nlines++) { - t = ctx->t_head; - while (i < len && data[i] > '\n') { - unsigned char c = data[i++]; - if (c & 0x80) - //fprintf(stderr, "8-bit ASCII is unsupported\n"); - abort(); - c &= 127; - - trie_t *x = t->next; - while (x && x->c != c) - x = x->sibling; - t = x; - -// t = t->next[c]; - -// if (!t) -// return -1; - - from = t->n; - if (i == prefix_len) p3 = t->n; - //if (t->count >= .0035*ctx->t_head->count && t->n != n) p3 = t->n; // pacbio - //if (i == 60) p3 = t->n; // pacbio - //if (i == 7) p3 = t->n; // iontorrent - t->n = n; - } - } - - //printf("Looked for %d, found %d, prefix %d\n", n, from, p3); - - *exact = (n != from) && len; - return *exact ? from : p3; -} - - -//----------------------------------------------------------------------------- -// Name encoder - -/* - * Tokenises a read name using ctx as context as the previous - * tokenisation. - * - * Parsed elements are then emitted for encoding by calling the - * encode_token() function with the context, token number (Nth token - * in line), token type and token value. - * - * Returns 0 on success; - * -1 on failure. - */ -static int encode_name(name_context *ctx, char *name, int len, int mode) { - int i, is_fixed, fixed_len; - - int exact; - int cnum = ctx->counter++; - int pnum = search_trie(ctx, name, len, cnum, &exact, &is_fixed, &fixed_len); - if (pnum < 0) pnum = cnum ? cnum-1 : 0; - //pnum = pnum & (MAX_NAMES-1); - //cnum = cnum & (MAX_NAMES-1); - //if (pnum == cnum) {pnum = cnum ? cnum-1 : 0;} -#ifdef ENC_DEBUG - fprintf(stderr, "%d: pnum=%d (%d), exact=%d\n%s\n%s\n", - ctx->counter, pnum, cnum-pnum, exact, ctx->lc[pnum].last_name, name); -#endif - - // Return DUP or DIFF switch, plus the distance. - if (exact && len == strlen(ctx->lc[pnum].last_name)) { - encode_token_dup(ctx, cnum-pnum); - ctx->lc[cnum].last_name = name; - ctx->lc[cnum].last_ntok = ctx->lc[pnum].last_ntok; - int nc = ctx->lc[cnum].last_ntok ? ctx->lc[cnum].last_ntok : MAX_TOKENS; - ctx->lc[cnum].last = malloc(nc * sizeof(*ctx->lc[cnum].last)); - if (!ctx->lc[cnum].last) - return -1; - memcpy(ctx->lc[cnum].last, ctx->lc[pnum].last, - ctx->lc[cnum].last_ntok * sizeof(*ctx->lc[cnum].last)); - return 0; - } - - ctx->lc[cnum].last = malloc(MAX_TOKENS * sizeof(*ctx->lc[cnum].last)); - if (!ctx->lc[cnum].last) - return -1; - encode_token_diff(ctx, cnum-pnum); - - int ntok = 1; - i = 0; - if (is_fixed) { - if (ntok >= ctx->max_tok) { - memset(&ctx->desc[ctx->max_tok << 4], 0, 16*sizeof(ctx->desc[0])); - memset(&ctx->token_dcount[ctx->max_tok], 0, sizeof(int)); - memset(&ctx->token_icount[ctx->max_tok], 0, sizeof(int)); - ctx->max_tok = ntok+1; - } - if (pnum < cnum && ntok < ctx->lc[pnum].last_ntok && ctx->lc[pnum].last[ntok].token_type == N_ALPHA) { - if (ctx->lc[pnum].last[ntok].token_int == fixed_len && memcmp(name, ctx->lc[pnum].last_name, fixed_len) == 0) { - encode_token_match(ctx, ntok); - } else { - encode_token_alpha(ctx, ntok, name, fixed_len); - } - } else { - encode_token_alpha(ctx, ntok, name, fixed_len); - } - ctx->lc[cnum].last[ntok].token_int = fixed_len; - ctx->lc[cnum].last[ntok].token_str = 0; - ctx->lc[cnum].last[ntok++].token_type = N_ALPHA; - i = fixed_len; - } - - for (; i < len; i++) { - if (ntok >= ctx->max_tok) { - memset(&ctx->desc[ctx->max_tok << 4], 0, 16*sizeof(ctx->desc[0])); - memset(&ctx->token_dcount[ctx->max_tok], 0, sizeof(int)); - memset(&ctx->token_icount[ctx->max_tok], 0, sizeof(int)); - ctx->max_tok = ntok+1; - } - - /* Determine data type of this segment */ - if (isalpha(name[i])) { - int s = i+1; -// int S = i+1; - -// // FIXME: try which of these is best. alnum is good sometimes. -// while (s < len && isalpha(name[s])) - while (s < len && (isalpha(name[s]) || ispunct(name[s]))) -// while (s < len && name[s] != ':') -// while (s < len && !isdigit(name[s]) && name[s] != ':') - s++; - -// if (!is_fixed) { -// while (S < len && isalnum(name[S])) -// S++; -// if (s < S) -// s = S; -// } - - // Single byte strings are better encoded as chars. - if (s-i == 1) goto n_char; - - if (pnum < cnum && ntok < ctx->lc[pnum].last_ntok && ctx->lc[pnum].last[ntok].token_type == N_ALPHA) { - if (s-i == ctx->lc[pnum].last[ntok].token_int && - memcmp(&name[i], - &ctx->lc[pnum].last_name[ctx->lc[pnum].last[ntok].token_str], - s-i) == 0) { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (alpha-mat, %.*s)\n", N_MATCH, s-i, &name[i]); -#endif - if (encode_token_match(ctx, ntok) < 0) return -1; - } else { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (alpha, %.*s / %.*s)\n", N_ALPHA, - s-i, &ctx->lc[pnum].last_name[ctx->lc[pnum].last[ntok].token_str], s-i, &name[i]); -#endif - // same token/length, but mismatches - if (encode_token_alpha(ctx, ntok, &name[i], s-i) < 0) return -1; - } - } else { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (new alpha, %.*s)\n", N_ALPHA, s-i, &name[i]); -#endif - if (encode_token_alpha(ctx, ntok, &name[i], s-i) < 0) return -1; - } - - ctx->lc[cnum].last[ntok].token_int = s-i; - ctx->lc[cnum].last[ntok].token_str = i; - ctx->lc[cnum].last[ntok].token_type = N_ALPHA; - - i = s-1; - } else if (name[i] == '0') digits0: { - // Digits starting with zero; encode length + value - uint32_t s = i; - uint32_t v = 0; - int d = 0; - - while (s < len && isdigit(name[s]) && s-i < 9) { - v = v*10 + name[s] - '0'; - //putchar(name[s]); - s++; - } - - // TODO: optimise choice over whether to switch from DIGITS to DELTA - // regularly vs all DIGITS, also MATCH vs DELTA 0. - if (pnum < cnum && ntok < ctx->lc[pnum].last_ntok && ctx->lc[pnum].last[ntok].token_type == N_DIGITS0) { - d = v - ctx->lc[pnum].last[ntok].token_int; - if (d == 0 && ctx->lc[pnum].last[ntok].token_str == s-i) { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (dig-mat, %d)\n", N_MATCH, v); -#endif - if (encode_token_match(ctx, ntok) < 0) return -1; - //ctx->lc[pnum].last[ntok].token_delta=0; - } else if (mode == 1 && d < 256 && d >= 0 && ctx->lc[pnum].last[ntok].token_str == s-i) { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (dig-delta, %d / %d)\n", N_DDELTA, ctx->lc[pnum].last[ntok].token_int, v); -#endif - //if (encode_token_int1_(ctx, ntok, N_DZLEN, s-i) < 0) return -1; - if (encode_token_int1(ctx, ntok, N_DDELTA0, d) < 0) return -1; - //ctx->lc[pnum].last[ntok].token_delta=1; - } else { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (dig, %d / %d)\n", N_DIGITS, ctx->lc[pnum].last[ntok].token_int, v); -#endif - if (encode_token_int1_(ctx, ntok, N_DZLEN, s-i) < 0) return -1; - if (encode_token_int(ctx, ntok, N_DIGITS0, v) < 0) return -1; - //ctx->lc[pnum].last[ntok].token_delta=0; - } - } else { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (new dig, %d)\n", N_DIGITS, v); -#endif - if (encode_token_int1_(ctx, ntok, N_DZLEN, s-i) < 0) return -1; - if (encode_token_int(ctx, ntok, N_DIGITS0, v) < 0) return -1; - //ctx->lc[pnum].last[ntok].token_delta=0; - } - - ctx->lc[cnum].last[ntok].token_str = s-i; // length - ctx->lc[cnum].last[ntok].token_int = v; - ctx->lc[cnum].last[ntok].token_type = N_DIGITS0; - - i = s-1; - } else if (isdigit(name[i])) { - // digits starting 1-9; encode value - uint32_t s = i; - uint32_t v = 0; - int d = 0; - - while (s < len && isdigit(name[s]) && s-i < 9) { - v = v*10 + name[s] - '0'; - //putchar(name[s]); - s++; - } - - // dataset/10/K562_cytosol_LID8465_TopHat_v2.names - // col 4 is Illumina lane - we don't want match & delta in there - // as it has multiple lanes (so not ALL match) and delta is just - // random chance, increasing entropy instead. -// if (ntok == 4 || ntok == 8 || ntok == 10) { -// encode_token_int(ctx, ntok, N_DIGITS, v); -// } else { - - // If the last token was DIGITS0 and we are the same length, then encode - // using that method instead as it seems likely the entire column is fixed - // width, sometimes with leading zeros. - if (pnum < cnum && ntok < ctx->lc[pnum].last_ntok && - ctx->lc[pnum].last[ntok].token_type == N_DIGITS0 && - ctx->lc[pnum].last[ntok].token_str == s-i) - goto digits0; - - // TODO: optimise choice over whether to switch from DIGITS to DELTA - // regularly vs all DIGITS, also MATCH vs DELTA 0. - if (pnum < cnum && ntok < ctx->lc[pnum].last_ntok && ctx->lc[pnum].last[ntok].token_type == N_DIGITS) { - d = v - ctx->lc[pnum].last[ntok].token_int; - if (d == 0) { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (dig-mat, %d)\n", N_MATCH, v); -#endif - if (encode_token_match(ctx, ntok) < 0) return -1; - //ctx->lc[pnum].last[ntok].token_delta=0; - //ctx->token_zcount[ntok]++; - } else if (mode == 1 && d < 256 && d >= 0 - //&& (10+ctx->token_dcount[ntok]) > (ctx->token_icount[ntok]+ctx->token_zcount[ntok]) - && (5+ctx->token_dcount[ntok]) > ctx->token_icount[ntok] - ) { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (dig-delta, %d / %d)\n", N_DDELTA, ctx->lc[pnum].last[ntok].token_int, v); -#endif - if (encode_token_int1(ctx, ntok, N_DDELTA, d) < 0) return -1; - //ctx->lc[pnum].last[ntok].token_delta=1; - ctx->token_dcount[ntok]++; - } else { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (dig, %d / %d)\n", N_DIGITS, ctx->lc[pnum].last[ntok].token_int, v); -#endif - if (encode_token_int(ctx, ntok, N_DIGITS, v) < 0) return -1; - //ctx->lc[pnum].last[ntok].token_delta=0; - ctx->token_icount[ntok]++; - } - } else { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (new dig, %d)\n", N_DIGITS, v); -#endif - if (encode_token_int(ctx, ntok, N_DIGITS, v) < 0) return -1; - //ctx->lc[pnum].last[ntok].token_delta=0; - } -// } - - ctx->lc[cnum].last[ntok].token_int = v; - ctx->lc[cnum].last[ntok].token_type = N_DIGITS; - - i = s-1; - } else { - n_char: - //if (!isalpha(name[i])) putchar(name[i]); - if (pnum < cnum && ntok < ctx->lc[pnum].last_ntok && ctx->lc[pnum].last[ntok].token_type == N_CHAR) { - if (name[i] == ctx->lc[pnum].last[ntok].token_int) { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (chr-mat, %c)\n", N_MATCH, name[i]); -#endif - if (encode_token_match(ctx, ntok) < 0) return -1; - } else { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (chr, %c / %c)\n", N_CHAR, ctx->lc[pnum].last[ntok].token_int, name[i]); -#endif - if (encode_token_char(ctx, ntok, name[i]) < 0) return -1; - } - } else { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (new chr, %c)\n", N_CHAR, name[i]); -#endif - if (encode_token_char(ctx, ntok, name[i]) < 0) return -1; - } - - ctx->lc[cnum].last[ntok].token_int = name[i]; - ctx->lc[cnum].last[ntok].token_type = N_CHAR; - } - - ntok++; - //putchar(' '); - } - -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (end)\n", N_END); -#endif - if (ntok >= ctx->max_tok) { - memset(&ctx->desc[ctx->max_tok << 4], 0, 16*sizeof(ctx->desc[0])); - memset(&ctx->token_dcount[ctx->max_tok], 0, sizeof(int)); - memset(&ctx->token_icount[ctx->max_tok], 0, sizeof(int)); - ctx->max_tok = ntok+1; - } - if (encode_token_end(ctx, ntok) < 0) return -1; -#ifdef ENC_DEBUG - fprintf(stderr, "ntok=%d max_tok=%d\n", ntok, ctx->max_tok); -#endif - - //printf("Encoded %.*s with %d tokens\n", len, name, ntok); - - ctx->lc[cnum].last_name = name; - ctx->lc[cnum].last_ntok = ntok; - last_context_tok *shrunk = realloc(ctx->lc[cnum].last, - (ntok+1) * sizeof(*ctx->lc[cnum].last)); - if (shrunk) - ctx->lc[cnum].last = shrunk; - - if (!ctx->lc[cnum].last) - return -1; - - return 0; -} - -//----------------------------------------------------------------------------- -// Name decoder - -static int decode_name(name_context *ctx, char *name, int name_len) { - int t0 = decode_token_type(ctx, 0); - uint32_t dist; - int pnum, cnum = ctx->counter++; - - if (cnum >= ctx->max_names) - return -1; - - if (t0 < 0 || t0 >= ctx->max_tok*16) - return 0; - - if (decode_token_int(ctx, 0, t0, &dist) < 0 || dist > cnum) - return -1; - if ((pnum = cnum - dist) < 0) pnum = 0; - - //fprintf(stderr, "t0=%d, dist=%d, pnum=%d, cnum=%d\n", t0, dist, pnum, cnum); - - if (t0 == N_DUP) { - if (pnum == cnum) - return -1; - - if (strlen(ctx->lc[pnum].last_name) +1 >= name_len) return -1; - strcpy(name, ctx->lc[pnum].last_name); - // FIXME: optimise this - ctx->lc[cnum].last_name = name; - ctx->lc[cnum].last_ntok = ctx->lc[pnum].last_ntok; - - int nc = ctx->lc[cnum].last_ntok ? ctx->lc[cnum].last_ntok : MAX_TOKENS; - ctx->lc[cnum].last = malloc(nc * sizeof(*ctx->lc[cnum].last)); - if (!ctx->lc[cnum].last) - return -1; - memcpy(ctx->lc[cnum].last, ctx->lc[pnum].last, - ctx->lc[cnum].last_ntok * sizeof(*ctx->lc[cnum].last)); - - return strlen(name)+1; - } - - *name = 0; - int ntok, len = 0, len2; - ctx->lc[cnum].last = malloc(MAX_TOKENS * sizeof(*ctx->lc[cnum].last)); - if (!ctx->lc[cnum].last) - return -1; - - for (ntok = 1; ntok < MAX_TOKENS && ntok < ctx->max_tok; ntok++) { - uint32_t v, vl; - enum name_type tok; - tok = decode_token_type(ctx, ntok); - //fprintf(stderr, "Tok %d = %d\n", ntok, tok); - - ctx->lc[cnum].last_ntok = 0; - - switch (tok) { - case N_CHAR: - if (len+1 >= name_len) return -1; - if (decode_token_char(ctx, ntok, &name[len]) < 0) return -1; - //fprintf(stderr, "Tok %d CHAR %c\n", ntok, name[len]); - ctx->lc[cnum].last[ntok].token_type = N_CHAR; - ctx->lc[cnum].last[ntok].token_int = name[len++]; - break; - - case N_ALPHA: - if ((len2 = decode_token_alpha(ctx, ntok, &name[len], name_len - len)) < 0) - return -1; - //fprintf(stderr, "Tok %d ALPHA %.*s\n", ntok, len2, &name[len]); - ctx->lc[cnum].last[ntok].token_type = N_ALPHA; - ctx->lc[cnum].last[ntok].token_str = len; - ctx->lc[cnum].last[ntok].token_int = len2; - len += len2; - break; - - case N_DIGITS0: // [0-9]* - if (decode_token_int1(ctx, ntok, N_DZLEN, &vl) < 0) return -1; - if (decode_token_int(ctx, ntok, N_DIGITS0, &v) < 0) return -1; - if (len+20+vl >= name_len) return -1; - len += append_uint32_fixed(&name[len], v, vl); - //fprintf(stderr, "Tok %d DIGITS0 %0*d\n", ntok, vl, v); - ctx->lc[cnum].last[ntok].token_type = N_DIGITS0; - ctx->lc[cnum].last[ntok].token_int = v; - ctx->lc[cnum].last[ntok].token_str = vl; - break; - - case N_DDELTA0: - if (ntok >= ctx->lc[pnum].last_ntok) return -1; - if (decode_token_int1(ctx, ntok, N_DDELTA0, &v) < 0) return -1; - v += ctx->lc[pnum].last[ntok].token_int; - if (len+ctx->lc[pnum].last[ntok].token_str+1 >= name_len) return -1; - len += append_uint32_fixed(&name[len], v, ctx->lc[pnum].last[ntok].token_str); - //fprintf(stderr, "Tok %d DELTA0 %0*d\n", ntok, ctx->lc[pnum].last[ntok].token_str, v); - ctx->lc[cnum].last[ntok].token_type = N_DIGITS0; - ctx->lc[cnum].last[ntok].token_int = v; - ctx->lc[cnum].last[ntok].token_str = ctx->lc[pnum].last[ntok].token_str; - break; - - case N_DIGITS: // [1-9][0-9]* - if (decode_token_int(ctx, ntok, N_DIGITS, &v) < 0) return -1; - if (len+20 >= name_len) return -1; - len += append_uint32_var(&name[len], v); - //fprintf(stderr, "Tok %d DIGITS %d\n", ntok, v); - ctx->lc[cnum].last[ntok].token_type = N_DIGITS; - ctx->lc[cnum].last[ntok].token_int = v; - break; - - case N_DDELTA: - if (ntok >= ctx->lc[pnum].last_ntok) return -1; - if (decode_token_int1(ctx, ntok, N_DDELTA, &v) < 0) return -1; - v += ctx->lc[pnum].last[ntok].token_int; - if (len+20 >= name_len) return -1; - len += append_uint32_var(&name[len], v); - //fprintf(stderr, "Tok %d DELTA %d\n", ntok, v); - ctx->lc[cnum].last[ntok].token_type = N_DIGITS; - ctx->lc[cnum].last[ntok].token_int = v; - break; - - case N_NOP: - ctx->lc[cnum].last[ntok].token_type = N_NOP; - break; - - case N_MATCH: - if (ntok >= ctx->lc[pnum].last_ntok) return -1; - switch (ctx->lc[pnum].last[ntok].token_type) { - case N_CHAR: - if (len+1 >= name_len) return -1; - name[len++] = ctx->lc[pnum].last[ntok].token_int; - //fprintf(stderr, "Tok %d MATCH CHAR %c\n", ntok, ctx->lc[pnum].last[ntok].token_int); - ctx->lc[cnum].last[ntok].token_type = N_CHAR; - ctx->lc[cnum].last[ntok].token_int = ctx->lc[pnum].last[ntok].token_int; - break; - - case N_ALPHA: - if (ctx->lc[pnum].last[ntok].token_int < 0 || - len+ctx->lc[pnum].last[ntok].token_int >= name_len) return -1; - memcpy(&name[len], - &ctx->lc[pnum].last_name[ctx->lc[pnum].last[ntok].token_str], - ctx->lc[pnum].last[ntok].token_int); - //fprintf(stderr, "Tok %d MATCH ALPHA %.*s\n", ntok, ctx->lc[pnum].last[ntok].token_int, &name[len]); - ctx->lc[cnum].last[ntok].token_type = N_ALPHA; - ctx->lc[cnum].last[ntok].token_str = len; - ctx->lc[cnum].last[ntok].token_int = ctx->lc[pnum].last[ntok].token_int; - len += ctx->lc[pnum].last[ntok].token_int; - break; - - case N_DIGITS: - if (len+20 >= name_len) return -1; - len += append_uint32_var(&name[len], ctx->lc[pnum].last[ntok].token_int); - //fprintf(stderr, "Tok %d MATCH DIGITS %d\n", ntok, ctx->lc[pnum].last[ntok].token_int); - ctx->lc[cnum].last[ntok].token_type = N_DIGITS; - ctx->lc[cnum].last[ntok].token_int = ctx->lc[pnum].last[ntok].token_int; - break; - - case N_DIGITS0: - if (len+ctx->lc[pnum].last[ntok].token_str >= name_len) return -1; - len += append_uint32_fixed(&name[len], ctx->lc[pnum].last[ntok].token_int, ctx->lc[pnum].last[ntok].token_str); - //fprintf(stderr, "Tok %d MATCH DIGITS %0*d\n", ntok, ctx->lc[pnum].last[ntok].token_str, ctx->lc[pnum].last[ntok].token_int); - ctx->lc[cnum].last[ntok].token_type = N_DIGITS0; - ctx->lc[cnum].last[ntok].token_int = ctx->lc[pnum].last[ntok].token_int; - ctx->lc[cnum].last[ntok].token_str = ctx->lc[pnum].last[ntok].token_str; - break; - - default: - return -1; - } - break; - - default: // an elided N_END - case N_END: - if (len+1 >= name_len) return -1; - name[len++] = 0; - ctx->lc[cnum].last[ntok].token_type = N_END; - - ctx->lc[cnum].last_name = name; - ctx->lc[cnum].last_ntok = ntok; - - last_context_tok *shrunk - = realloc(ctx->lc[cnum].last, - (ntok+1) * sizeof(*ctx->lc[cnum].last)); - if (shrunk) - ctx->lc[cnum].last = shrunk; - - if (!ctx->lc[cnum].last) - return -1; - - return len; - } - } - - - return -1; -} - -//----------------------------------------------------------------------------- -// arith adaptive codec or static rANS 4x16pr codec -static int arith_encode(uint8_t *in, uint64_t in_len, uint8_t *out, uint64_t *out_len, int method) { - unsigned int olen = *out_len-6, nb; - if (arith_compress_to(in, in_len, out+6, &olen, method) == NULL) - return -1; - - nb = var_put_u32(out, out + *out_len, olen); - memmove(out+nb, out+6, olen); - *out_len = olen+nb; - - return 0; -} - -// Returns number of bytes read from 'in' on success, -// -1 on failure. -static int64_t arith_decode(uint8_t *in, uint64_t in_len, uint8_t *out, uint64_t *out_len) { - unsigned int olen = *out_len; - - uint32_t clen; - int nb = var_get_u32(in, in+in_len, &clen); - //fprintf(stderr, "Arith decode %x\n", in[nb]); - if (arith_uncompress_to(in+nb, in_len-nb, out, &olen) == NULL) - return -1; - //fprintf(stderr, " Stored clen=%d\n", (int)clen); - *out_len = olen; - return clen+nb; -} - -static int rans_encode(uint8_t *in, uint64_t in_len, uint8_t *out, uint64_t *out_len, int method) { - unsigned int olen = *out_len-6, nb; - if (rans_compress_to_4x16(in, in_len, out+6, &olen, method) == NULL) - return -1; - - nb = var_put_u32(out, out + *out_len, olen); - memmove(out+nb, out+6, olen); - *out_len = olen+nb; - - return 0; -} - -// Returns number of bytes read from 'in' on success, -// -1 on failure. -static int64_t rans_decode(uint8_t *in, uint64_t in_len, uint8_t *out, uint64_t *out_len) { - unsigned int olen = *out_len; - - uint32_t clen; - int nb = var_get_u32(in, in+in_len, &clen); - //fprintf(stderr, "Arith decode %x\n", in[nb]); - if (rans_uncompress_to_4x16(in+nb, in_len-nb, out, &olen) == NULL) - return -1; - //fprintf(stderr, " Stored clen=%d\n", (int)clen); - *out_len = olen; - return clen+nb; -} - -static int compress(uint8_t *in, uint64_t in_len, enum name_type type, - int level, int use_arith, - uint8_t *out, uint64_t *out_len) { - uint64_t best_sz = UINT64_MAX; - uint64_t olen = *out_len; - int ret = -1; - - // Map levels 1-9 to 0-4, for parameter lookup in R[] below - level = (level-1)/2; - if (level<0) level=0; - if (level>4) level=4; - - // rANS4x16pr and arith_dynamic parameters to explore. - // We brute force these, so fast levels test 1 setting and slow test more - int R[5][N_ALL][7] = { - { // -1 - /* TYPE */ {1, 128}, - /* ALPHA */ {1, 129}, - /* CHAR */ {1, 0}, - /* DIGITS0 */ {1, 8}, - /* DZLEN */ {1, 0}, - /* DUP */ {1, 8}, - /* DIFF */ {1, 8}, - /* DIGITS */ {1, 8}, - /* DDELTA */ {1, 0}, - /* DDELTA0 */ {1, 128}, - /* MATCH */ {1, 0}, - /* NOP */ {1, 0}, - /* END */ {1, 0} - }, - - { // -3 - /* TYPE */ {2, 192,0}, - /* ALPHA */ {2, 129,1}, - /* CHAR */ {1, 0}, - /* DIGITS0 */ {2, 128+8,0}, // size%4==0 - /* DZLEN */ {1, 0}, - /* DUP */ {1, 192+8}, // size%4==0 - /* DIFF */ {1, 128+8}, // size%4==0 - /* DIGITS */ {1, 192+8}, // size%4==0 - /* DDELTA */ {1, 0}, - /* DDELTA0 */ {1, 128}, - /* MATCH */ {1, 0}, - /* NOP */ {1, 0}, - /* END */ {1, 0} - }, - - { // -5 - /* TYPE */ {2, 192,0}, - /* ALPHA */ {4, 1,128,0,129}, - /* CHAR */ {1, 0}, - /* DIGITS0 */ {2, 200,0}, - /* DZLEN */ {1, 0}, - /* DUP */ {1, 200}, - /* DIFF */ {2, 192,200}, - /* DIGITS */ {2, 132,201}, - /* DDELTA */ {1, 0}, - /* DDELTA0 */ {1, 128}, - /* MATCH */ {1, 0}, - /* NOP */ {1, 0}, - /* END */ {1, 0} - }, - - { // -7 - /* TYPE */ {3, 193,0,1}, - /* ALPHA */ {5, 128, 1,128,0,129}, - /* CHAR */ {2, 1,0}, - /* DIGITS0 */ {2, 200,0}, // or 201,0 - /* DZLEN */ {1, 0}, - /* DUP */ {1, 201}, - /* DIFF */ {2, 192,200}, // or 192,201 - /* DIGITS */ {2, 132, 201}, // +bz2 here and -9 - /* DDELTA */ {1, 0}, - /* DDELTA0 */ {1, 128}, - /* MATCH */ {1, 0}, - /* NOP */ {1, 0}, - /* END */ {1, 0} - }, - - { // -9 - /* TYPE */ {6, 192,0,1, 65, 193,132}, - /* ALPHA */ {4, 132, 1, 0,129}, - /* CHAR */ {3, 1,0,192}, - /* DIGITS0 */ {4, 201,0, 192,64}, - /* DZLEN */ {3, 0,128,1}, - /* DUP */ {1, 201}, - /* DIFF */ {3, 192, 201,65}, - /* DIGITS */ {6, 132, 201,1, 192,129, 193}, - /* DDELTA */ {3, 1,0, 192}, - /* DDELTA0 */ {3, 192,1, 0}, - /* MATCH */ {1, 0}, - /* NOP */ {1, 0}, - /* END */ {1, 0} - }, - }; - // Minor tweak to level 3 DIGITS if arithmetic, to use O(201) instead. - if (use_arith) R[1][N_DIGITS][1]=201; - - int *meth = R[level][type]; - - int last = 0, m; - uint8_t best_static[8192]; - uint8_t *best_dat = best_static; - for (m = 1; m <= meth[0]; m++) { - *out_len = olen; - - if (!use_arith && (meth[m] & 4)) - meth[m] &= ~4; - - if (in_len % 4 != 0 && (meth[m] & 8)) - continue; - - last = 0; - if (use_arith) { - if (arith_encode(in, in_len, out, out_len, meth[m]) <0) - goto err; - } else { - if (rans_encode(in, in_len, out, out_len, meth[m]) < 0) - goto err; - } - - if (best_sz > *out_len) { - best_sz = *out_len; - last = 1; - - if (m+1 > meth[0]) - // no need to memcpy if we're not going to overwrite out - break; - - if (best_sz > 8192 && best_dat == best_static) { - // No need to realloc as best_sz only ever decreases - best_dat = malloc(best_sz); - if (!best_dat) - return -1; - } - memcpy(best_dat, out, best_sz); - } - } - - if (!last) - memcpy(out, best_dat, best_sz); - *out_len = best_sz; - ret = 0; - - err: - if (best_dat != best_static) - free(best_dat); - - return ret; -} - -static uint64_t uncompressed_size(uint8_t *in, uint64_t in_len) { - uint32_t clen, ulen; - - // in[0] in part of buffer written by us - int nb = var_get_u32(in, in+in_len, &clen); - - // in[nb] is part of buffer written to by arith_dynamic. - var_get_u32(in+nb+1, in+in_len, &ulen); - - return ulen; -} - -static int uncompress(int use_arith, uint8_t *in, uint64_t in_len, - uint8_t *out, uint64_t *out_len) { - uint32_t clen; - var_get_u32(in, in+in_len, &clen); - return use_arith - ? arith_decode(in, in_len, out, out_len) - : rans_decode(in, in_len, out, out_len); -} - -//----------------------------------------------------------------------------- - -/* - * Converts a line or \0 separated block of reading names to a compressed buffer. - * The code can only encode whole lines and will not attempt a partial line. - * Use the "last_start_p" return value to identify the partial line start - * offset, for continuation purposes. - * - * Returns a malloced buffer holding compressed data of size *out_len, - * or NULL on failure - */ -uint8_t *tok3_encode_names(char *blk, int len, int level, int use_arith, - int *out_len, int *last_start_p) { - int last_start = 0, i, j, nreads; - - if (len < 0) { - *out_len = 0; - return NULL; - } - - // Count lines - for (nreads = i = 0; i < len; i++) - if (blk[i] <= '\n') // \n or \0 separated entries - nreads++; - - name_context *ctx = create_context(nreads); - if (!ctx) - return NULL; - - // Construct trie - int ctr = 0; - for (i = j = 0; i < len; j=++i) { - while (i < len && blk[i] > '\n') - i++; - if (i >= len) - break; - - //blk[i] = '\0'; - last_start = i+1; - if (build_trie(ctx, &blk[j], i-j, ctr++) < 0) { - free_context(ctx); - return NULL; - } - } - if (last_start_p) - *last_start_p = last_start; - - //fprintf(stderr, "Processed %d of %d in block, line %d\n", last_start, len, ctr); - - // Encode name - for (i = j = 0; i < len; j=++i) { - while (i < len && blk[i] > '\n') - i++; - if (i >= len) - break; - - blk[i] = '\0'; - // try both 0 and 1 and pick best? - if (encode_name(ctx, &blk[j], i-j, 1) < 0) { - free_context(ctx); - return NULL; - } - } - -#if 0 - for (i = 0; i < ctx->max_tok*16; i++) { - char fn[1024]; - if (!ctx->desc[i].buf_l) continue; - sprintf(fn, "_tok.%02d_%02d.%d", i>>4,i&15,i); - FILE *fp = fopen(fn, "w"); - fwrite(ctx->desc[i].buf, 1, ctx->desc[i].buf_l, fp); - fclose(fp); - } -#endif - - //dump_trie(t_head, 0); - - // FIXME: merge descriptors - // - // If we see foo7:1 foo7:12 foo7:7 etc then foo: is constant, - // but it's encoded as alpha+dig<7>+char<:> instead of alpha. - // Any time token type 0 is all match beyond the first location we have - // a candidate for merging in string form. - // - // This saves around .1 to 1.3 percent on varying data sets. - // Cruder hack is dedicated prefix/suffix matching to short-cut this. - - - // Drop N_TYPE blocks if they all contain matches bar the first item, - // as we can regenerate these from the subsequent blocks types during - // decode. - for (i = 0; i < ctx->max_tok*16; i+=16) { - if (!ctx->desc[i].buf_l) continue; - - int z; - for (z=1; zdesc[i].buf_l; z++) { - if (ctx->desc[i].buf[z] != N_MATCH) - break; - } - if (z == ctx->desc[i].buf_l) { - int k; - for (k=1; k<16; k++) - if (ctx->desc[i+k].buf_l) - break; - - if (k < 16) { - ctx->desc[i].buf_l = 0; - free(ctx->desc[i].buf); - ctx->desc[i].buf = NULL; - } - } - } - - // Serialise descriptors - uint32_t tot_size = 9; - for (i = 0; i < ctx->max_tok*16; i++) { - if (!ctx->desc[i].buf_l) continue; - - int tnum = i>>4; - int ttype = i&15; - - uint64_t out_len = 1.5 * arith_compress_bound(ctx->desc[i].buf_l, 1); // guesswork - uint8_t *out = malloc(out_len); - if (!out) { - free_context(ctx); - return NULL; - } - - if (compress(ctx->desc[i].buf, ctx->desc[i].buf_l, i&0xf, level, - use_arith, out, &out_len) < 0) { - free_context(ctx); - return NULL; - } - - free(ctx->desc[i].buf); - ctx->desc[i].buf = out; - ctx->desc[i].buf_l = out_len; - ctx->desc[i].tnum = tnum; - ctx->desc[i].ttype = ttype; - - // Find dups - int j; - for (j = 0; j < i; j++) { - if (!ctx->desc[j].buf) - continue; - if (ctx->desc[i].buf_l != ctx->desc[j].buf_l || ctx->desc[i].buf_l <= 4) - continue; - if (memcmp(ctx->desc[i].buf, ctx->desc[j].buf, ctx->desc[i].buf_l) == 0) - break; - } - if (j < i) { - ctx->desc[i].dup_from = j; - tot_size += 3; // flag, dup_from, ttype - } else { - ctx->desc[i].dup_from = 0; - tot_size += out_len + 1; // ttype - } - } - -#if 0 - for (i = 0; i < ctx->max_tok*16; i++) { - char fn[1024]; - if (!ctx->desc[i].buf_l && !ctx->desc[i].dup_from) continue; - sprintf(fn, "_tok.%02d_%02d.%d.comp", i>>4,i&15,i); - FILE *fp = fopen(fn, "w"); - fwrite(ctx->desc[i].buf, 1, ctx->desc[i].buf_l, fp); - fclose(fp); - } -#endif - - // Write - uint8_t *out = malloc(tot_size+13); - if (!out) { - free_context(ctx); - return NULL; - } - - uint8_t *cp = out; - - *out_len = tot_size; -// *(uint32_t *)cp = last_start; cp += 4; -// *(uint32_t *)cp = nreads; cp += 4; - *cp++ = (last_start >> 0) & 0xff; - *cp++ = (last_start >> 8) & 0xff; - *cp++ = (last_start >> 16) & 0xff; - *cp++ = (last_start >> 24) & 0xff; - *cp++ = (nreads >> 0) & 0xff; - *cp++ = (nreads >> 8) & 0xff; - *cp++ = (nreads >> 16) & 0xff; - *cp++ = (nreads >> 24) & 0xff; - *cp++ = use_arith; - //write(1, &nreads, 4); - int last_tnum = -1; - for (i = 0; i < ctx->max_tok*16; i++) { - if (!ctx->desc[i].buf_l) continue; - uint8_t ttype8 = ctx->desc[i].ttype; - if (ctx->desc[i].tnum != last_tnum) { - ttype8 |= 128; - last_tnum = ctx->desc[i].tnum; - } - if (ctx->desc[i].dup_from) { - //fprintf(stderr, "Dup %d from %d, sz %d\n", i, ctx->desc[i].dup_from, ctx->desc[i].buf_l); - *cp++ = ttype8 | 64; - *cp++ = ctx->desc[i].dup_from >> 4; - *cp++ = ctx->desc[i].dup_from & 15; - } else { - *cp++ = ttype8; - memcpy(cp, ctx->desc[i].buf, ctx->desc[i].buf_l); - cp += ctx->desc[i].buf_l; - } - } - - //assert(cp-out == tot_size); - - free_context(ctx); - - return out; -} - -// Deprecated interface; to remove when we next to an ABI breakage -uint8_t *encode_names(char *blk, int len, int level, int use_arith, - int *out_len, int *last_start_p) { - return tok3_encode_names(blk, len, level, use_arith, out_len, - last_start_p); -} - -/* - * Decodes a compressed block of read names into \0 separated names. - * The size of the data returned (malloced) is in *out_len. - * - * Returns NULL on failure. - */ -uint8_t *tok3_decode_names(uint8_t *in, uint32_t sz, uint32_t *out_len) { - if (sz < 9) - return NULL; - - int i, o = 9; - //int ulen = *(uint32_t *)in; - int ulen = (in[0]<<0) | (in[1]<<8) | (in[2]<<16) | - (((uint32_t)in[3])<<24); - - if (ulen < 0 || ulen >= INT_MAX-1024) - return NULL; - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - // Speed up fuzzing by blocking excessive sizes - if (ulen > 100000) - return NULL; -#endif - - //int nreads = *(uint32_t *)(in+4); - int nreads = (in[4]<<0) | (in[5]<<8) | (in[6]<<16) | (((uint32_t)in[7])<<24); - int use_arith = in[8]; - name_context *ctx = create_context(nreads); - if (!ctx) - return NULL; - - // Unpack descriptors - int tnum = -1; - while (o < sz) { - uint8_t ttype = in[o++]; - if (ttype & 64) { - if (o+2 >= sz) goto err; - int j = in[o++]<<4; - j += in[o++]; - if (ttype & 128) { - tnum++; - if (tnum >= MAX_TOKENS) - goto err; - ctx->max_tok = tnum+1; - memset(&ctx->desc[tnum<<4], 0, 16*sizeof(ctx->desc[tnum])); - } - - if ((ttype & 15) != 0 && (ttype & 128)) { - if (tnum < 0) goto err; - ctx->desc[tnum<<4].buf = malloc(nreads); - if (!ctx->desc[tnum<<4].buf) - goto err; - - ctx->desc[tnum<<4].buf_l = 0; - ctx->desc[tnum<<4].buf_a = nreads; - ctx->desc[tnum<<4].buf[0] = ttype&15; - memset(&ctx->desc[tnum<<4].buf[1], N_MATCH, nreads-1); - } - - if (tnum < 0) goto err; - i = (tnum<<4) | (ttype&15); - if (j >= i) - goto err; - if (!ctx->desc[j].buf) - goto err; // Attempt to copy a non-existent stream - - ctx->desc[i].buf_l = 0; - ctx->desc[i].buf_a = ctx->desc[j].buf_a; - if (ctx->desc[i].buf) free(ctx->desc[i].buf); - ctx->desc[i].buf = malloc(ctx->desc[i].buf_a); - if (!ctx->desc[i].buf) - goto err; - - memcpy(ctx->desc[i].buf, ctx->desc[j].buf, ctx->desc[i].buf_a); - //fprintf(stderr, "Copy ttype %d, i=%d,j=%d, size %d\n", ttype, i, j, (int)ctx->desc[i].buf_a); - continue; - } - - //if (ttype == 0) - if (ttype & 128) { - tnum++; - if (tnum >= MAX_TOKENS) - goto err; - ctx->max_tok = tnum+1; - memset(&ctx->desc[tnum<<4], 0, 16*sizeof(ctx->desc[tnum])); - } - - if ((ttype & 15) != 0 && (ttype & 128)) { - if (tnum < 0) goto err; - if (ctx->desc[tnum<<4].buf) free(ctx->desc[tnum<<4].buf); - ctx->desc[tnum<<4].buf = malloc(nreads); - if (!ctx->desc[tnum<<4].buf) - goto err; - ctx->desc[tnum<<4].buf_l = 0; - ctx->desc[tnum<<4].buf_a = nreads; - ctx->desc[tnum<<4].buf[0] = ttype&15; - memset(&ctx->desc[tnum<<4].buf[1], N_MATCH, nreads-1); - } - - //fprintf(stderr, "Read %02x\n", c); - - // Load compressed block - int64_t clen, ulen = uncompressed_size(&in[o], sz-o); - if (ulen < 0 || ulen >= INT_MAX) - goto err; - if (tnum < 0) goto err; - i = (tnum<<4) | (ttype&15); - - if (i >= MAX_TBLOCKS || i < 0) - goto err; - - ctx->desc[i].buf_l = 0; - if (ctx->desc[i].buf) free(ctx->desc[i].buf); - ctx->desc[i].buf = malloc(ulen); - if (!ctx->desc[i].buf) - goto err; - - ctx->desc[i].buf_a = ulen; - uint64_t usz = ctx->desc[i].buf_a; // convert from size_t for 32-bit sys - clen = uncompress(use_arith, &in[o], sz-o, ctx->desc[i].buf, &usz); - ctx->desc[i].buf_a = usz; - if (clen < 0 || ctx->desc[i].buf_a != ulen) - goto err; - - // fprintf(stderr, "%d: Decode tnum %d type %d clen %d ulen %d via %d\n", - // o, tnum, ttype, (int)clen, (int)ctx->desc[i].buf_a, ctx->desc[i].buf[0]); - - o += clen; - - // Encode tnum 0 type 0 ulen 100000 clen 12530 via 2 - // Encode tnum 0 type 6 ulen 196800 clen 43928 via 3 - // Encode tnum 0 type 7 ulen 203200 clen 17531 via 3 - // Encode tnum 1 type 0 ulen 50800 clen 10 via 1 - // Encode tnum 1 type 1 ulen 3 clen 5 via 0 - // Encode tnum 2 type 0 ulen 50800 clen 10 via 1 - // - } - - int ret; - ulen += 1024; // for easy coding in decode_name. - uint8_t *out = malloc(ulen); - if (!out) - goto err; - - size_t out_sz = 0; - while ((ret = decode_name(ctx, (char *)out+out_sz, ulen)) > 0) { - out_sz += ret; - ulen -= ret; - } - - if (ret < 0) - free(out); - - free_context(ctx); - - *out_len = out_sz; - return ret == 0 ? out : NULL; - - err: - free_context(ctx); - return NULL; -} - -// Deprecated interface; to remove when we next to an ABI breakage -uint8_t *decode_names(uint8_t *in, uint32_t sz, uint32_t *out_len) { - return tok3_decode_names(in, sz, out_len); -} diff --git a/src/htslib-1.18/htscodecs/htscodecs/varint.h b/src/htslib-1.18/htscodecs/htscodecs/varint.h deleted file mode 100644 index a4b148a..0000000 --- a/src/htslib-1.18/htscodecs/htscodecs/varint.h +++ /dev/null @@ -1,446 +0,0 @@ -// FIXME: make get functions const uint8_t * - -/* - * Copyright (c) 2019-2021 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef VARINT_H -#define VARINT_H - -#include - -#ifdef VARINT2 -#include "varint2.h" -#else - -// General API scheme is var_{get,put}_{s,u}{32,64} -// s/u for signed/unsigned; 32/64 for integer size. - -// FIXME: consider returning the value and having nbytes passed in by -// reference instead of vice-versa. -// -// ie uint64_t var_get_u64(uint8_t *cp, int *nbytes) -// vs int var_get_u64(uint8_t *cp, uint64_t *val) -// -// The return value can then be assigned to 32-bit or 64-bit type -// without need of a new function name. The cost is we can't then -// do "cp += var_get_u32(cp, endp, &u_freq_sz);". Maybe we can't do -// overflow detection with former? (Want 32-bit but got, say, 40 bit) - - -// Big endian. -// Harder for encoding, but a simpler and faster decoder. -#define BIG_END -#ifdef BIG_END - -static inline -int var_put_u64_safe(uint8_t *cp, const uint8_t *endp, uint64_t i) { - uint8_t *op = cp; - int s = 0; - uint64_t X = i; - - // safe method when we're near end of buffer - do { - s += 7; - X >>= 7; - } while (X); - - if (endp && (endp-cp)*7 < s) - return 0; - - int n; - for (n = 0; n < 10; n++) { - s -= 7; - *cp++ = ((i>>s) & 0x7f) + (s?128:0); - if (!s) - break; - } - - return cp-op; -} - -// This can be optimised further with __builtin_clzl(i) and goto various -// bits of the if/else-if structure, but it's not a vast improvement and -// we are dominated by small values. Simplicity wins for now -static inline -int var_put_u64(uint8_t *cp, const uint8_t *endp, uint64_t i) { - if (endp && (endp-cp) < 10) - return var_put_u64_safe(cp, endp, i); - - // maximum of 10 bytes written - if (i < (1<<7)) { - *cp = i; - return 1; - } else if (i < (1<<14)) { - *cp++ = ((i>> 7) & 0x7f) | 128; - *cp++ = i & 0x7f; - return 2; - } else if (i < (1<<21)) { - *cp++ = ((i>>14) & 0x7f) | 128; - *cp++ = ((i>> 7) & 0x7f) | 128; - *cp++ = i & 0x7f; - return 3; - } else if (i < (1<<28)) { - *cp++ = ((i>>21) & 0x7f) | 128; - *cp++ = ((i>>14) & 0x7f) | 128; - *cp++ = ((i>> 7) & 0x7f) | 128; - *cp++ = i & 0x7f; - return 4; - } else if (i < (1LL<<35)) { - *cp++ = ((i>>28) & 0x7f) | 128; - *cp++ = ((i>>21) & 0x7f) | 128; - *cp++ = ((i>>14) & 0x7f) | 128; - *cp++ = ((i>> 7) & 0x7f) | 128; - *cp++ = i & 0x7f; - return 5; - } else if (i < (1LL<<42)) { - *cp++ = ((i>>35) & 0x7f) | 128; - *cp++ = ((i>>28) & 0x7f) | 128; - *cp++ = ((i>>21) & 0x7f) | 128; - *cp++ = ((i>>14) & 0x7f) | 128; - *cp++ = ((i>> 7) & 0x7f) | 128; - *cp++ = i & 0x7f; - return 6; - } else if (i < (1LL<<49)) { - *cp++ = ((i>>42) & 0x7f) | 128; - *cp++ = ((i>>35) & 0x7f) | 128; - *cp++ = ((i>>28) & 0x7f) | 128; - *cp++ = ((i>>21) & 0x7f) | 128; - *cp++ = ((i>>14) & 0x7f) | 128; - *cp++ = ((i>> 7) & 0x7f) | 128; - *cp++ = i & 0x7f; - return 7; - } else if (i < (1LL<<56)) { - *cp++ = ((i>>49) & 0x7f) | 128; - *cp++ = ((i>>42) & 0x7f) | 128; - *cp++ = ((i>>35) & 0x7f) | 128; - *cp++ = ((i>>28) & 0x7f) | 128; - *cp++ = ((i>>21) & 0x7f) | 128; - *cp++ = ((i>>14) & 0x7f) | 128; - *cp++ = ((i>> 7) & 0x7f) | 128; - *cp++ = i & 0x7f; - return 8; - } else if (i < (1LL<<63)) { - *cp++ = ((i>>56) & 0x7f) | 128; - *cp++ = ((i>>49) & 0x7f) | 128; - *cp++ = ((i>>42) & 0x7f) | 128; - *cp++ = ((i>>35) & 0x7f) | 128; - *cp++ = ((i>>28) & 0x7f) | 128; - *cp++ = ((i>>21) & 0x7f) | 128; - *cp++ = ((i>>14) & 0x7f) | 128; - *cp++ = ((i>> 7) & 0x7f) | 128; - *cp++ = i & 0x7f; - return 9; - } else { - *cp++ = ((i>>63) & 0x7f) | 128; - *cp++ = ((i>>56) & 0x7f) | 128; - *cp++ = ((i>>49) & 0x7f) | 128; - *cp++ = ((i>>42) & 0x7f) | 128; - *cp++ = ((i>>35) & 0x7f) | 128; - *cp++ = ((i>>28) & 0x7f) | 128; - *cp++ = ((i>>21) & 0x7f) | 128; - *cp++ = ((i>>14) & 0x7f) | 128; - *cp++ = ((i>> 7) & 0x7f) | 128; - *cp++ = i & 0x7f; - } - - return 10; -} - -static inline -int var_put_u32_safe(uint8_t *cp, const uint8_t *endp, uint32_t i) { - uint8_t *op = cp; - int s = 0; - uint32_t X = i; - - // safe method when we're near end of buffer - do { - s += 7; - X >>= 7; - } while (X); - - if (endp && (endp-cp)*7 < s) - return 0; - - int n; - for (n = 0; n < 5; n++) { - s -= 7; - *cp++ = ((i>>s) & 0x7f) + (s?128:0); - if (!s) - break; - } - - return cp-op; -} - -static inline -int var_put_u32(uint8_t *cp, const uint8_t *endp, uint32_t i) { - if (endp && (endp-cp) < 5) - return var_put_u32_safe(cp, endp, i); - - if (i < (1<<7)) { - *cp = i; - return 1; - } else if (i < (1<<14)) { - *cp++ = ((i>> 7) & 0x7f) | 128; - *cp++ = i & 0x7f; - return 2; - } else if (i < (1<<21)) { - *cp++ = ((i>>14) & 0x7f) | 128; - *cp++ = ((i>> 7) & 0x7f) | 128; - *cp++ = i & 0x7f; - return 3; - } else if (i < (1<<28)) { - *cp++ = ((i>>21) & 0x7f) | 128; - *cp++ = ((i>>14) & 0x7f) | 128; - *cp++ = ((i>> 7) & 0x7f) | 128; - *cp++ = i & 0x7f; - return 4; - } else { - *cp++ = ((i>>28) & 0x7f) | 128; - *cp++ = ((i>>21) & 0x7f) | 128; - *cp++ = ((i>>14) & 0x7f) | 128; - *cp++ = ((i>> 7) & 0x7f) | 128; - *cp++ = i & 0x7f; - } - - return 5; -} - -static inline -int var_get_u64(uint8_t *cp, const uint8_t *endp, uint64_t *i) { - uint8_t *op = cp, c; - uint64_t j = 0; - - if (!endp || endp - cp >= 10) { - int n = 10; - do { - c = *cp++; - j = (j<<7) | (c & 0x7f); - } while ((c & 0x80) && n-- > 0); - } else { - if (cp >= endp) { - *i = 0; - return 0; - } - - do { - c = *cp++; - j = (j<<7) | (c & 0x7f); - } while ((c & 0x80) && cp < endp); - } - - *i = j; - return cp-op; -} - -static inline -int var_get_u32(uint8_t *cp, const uint8_t *endp, uint32_t *i) { - uint8_t *op = cp, c; - uint32_t j = 0; - - if (!endp || endp - cp >= 6) { - // Known maximum loop count helps optimiser. - // NB: this helps considerably at -O3 level, but may harm -O2. - // (However we optimise for those that want optimal code.) - int n = 5; - do { - c = *cp++; - j = (j<<7) | (c & 0x7f); - } while ((c & 0x80) && n-- > 0); - } else { - if (cp >= endp) { - *i = 0; - return 0; - } - - if (*cp < 128) { - *i = *cp; - return 1; - } - - do { - c = *cp++; - j = (j<<7) | (c & 0x7f); - } while ((c & 0x80) && cp < endp); - } - - *i = j; - return cp-op; -} - -//----------------------------------------------------------------------------- -#else // BIG_END - -// Little endian 7-bit variable sized integer encoding. -// The unsigned value is equivalent to LEB128 encoding. -// For signed, see below. -// This is also the Google Protocol Buffer and WebAssembly format. -static inline int var_put_u64(uint8_t *cp, const uint8_t *endp, uint64_t i) { - uint8_t *op = cp; - - if (!endp || (endp-cp)*7 >= 10) { - // Unsafe or big-enough anyway - do { - *cp++ = (i&0x7f) + ((i>=0x80)<<7); - i >>= 7; - } while (i); - } else if (cp < endp) { - // End checked variant - do { - *cp++ = (i&0x7f) + ((i>=0x80)<<7); - i >>= 7; - } while (i && cp < endp); - } - - return cp-op; -} - -static inline int var_put_u32(uint8_t *cp, const uint8_t *endp, uint32_t i) { - uint8_t *op = cp; - - if (!endp || (endp-cp)*7 >= 5) { - // Unsafe or big-enough anyway - do { - *cp++ = (i&0x7f) + ((i>=0x80)<<7); - i >>= 7; - } while (i); - } else if (cp < endp) { - // End checked variant - do { - *cp++ = (i&0x7f) + ((i>=0x80)<<7); - i >>= 7; - } while (i && cp < endp); - } - - return cp-op; -} - -static inline int var_get_u64(uint8_t *cp, const uint8_t *endp, uint64_t *i) { - uint8_t *op = cp, c; - uint64_t j = 0, s = 0; - - if (endp) { - // Safe variant - if (cp >= endp) { - *i = 0; - return 0; - } - - do { - c = *cp++; - j |= (c & 0x7f) << s; - s += 7; - } while ((c & 0x80) && cp < endp); - } else { - // Unsafe variant - do { - c = *cp++; - j |= (c & 0x7f) << s; - s += 7; - } while ((c & 0x80)); - } - - *i = j; - return cp-op; -} - -static inline int var_get_u32(uint8_t *cp, const uint8_t *endp, uint32_t *i) { - uint8_t *op = cp, c; - uint32_t j = 0, s = 0; - - if (endp) { - // Safe variant - if (cp >= endp) { - *i = 0; - return 0; - } - - do { - c = *cp++; - j |= (c & 0x7f) << s; - s += 7; - } while ((c & 0x80) && cp < endp); - } else { - // Unsafe variant - do { - c = *cp++; - j |= (c & 0x7f) << s; - s += 7; - } while ((c & 0x80)); - } - - *i = j; - return cp-op; -} -#endif // BIG_END - -//----------------------------------------------------------------------------- -// Signed versions of the above using zig-zag integer encoding. -// This folds the sign bit into the bottom bit so we iterate -// 0, -1, +1, -2, +2, etc. -static inline int var_put_s32(uint8_t *cp, const uint8_t *endp, int32_t i) { - return var_put_u32(cp, endp, ((uint32_t)i << 1) ^ (i >> 31)); -} -static inline int var_put_s64(uint8_t *cp, const uint8_t *endp, int64_t i) { - return var_put_u64(cp, endp, ((uint64_t)i << 1) ^ (i >> 63)); -} - -static inline int var_get_s32(uint8_t *cp, const uint8_t *endp, int32_t *i) { - int b = var_get_u32(cp, endp, (uint32_t *)i); - *i = ((uint32_t)*i >> 1) ^ -(int32_t)(*i & 1); - return b; -} -static inline int var_get_s64(uint8_t *cp, const uint8_t *endp, int64_t *i) { - int b = var_get_u64(cp, endp, (uint64_t *)i); - *i = ((uint64_t)*i >> 1) ^ -(int64_t)(*i & 1); - return b; -} - -static inline int var_size_u64(uint64_t v) { - int i = 0; - do { - i++; - v >>= 7; - } while (v); - return i; -} -#define var_size_u32 var_size_u64 - -static inline int var_size_s64(int64_t v) { - return var_size_u64(((uint64_t)v << 1) ^ (v >> 63)); -} -#define var_size_s32 var_size_s64 - -#endif /* VARINT2 */ - -#endif /* VARINT_H */ diff --git a/src/htslib-1.18/htscodecs/htscodecs/version.h b/src/htslib-1.18/htscodecs/htscodecs/version.h deleted file mode 100644 index f562827..0000000 --- a/src/htslib-1.18/htscodecs/htscodecs/version.h +++ /dev/null @@ -1 +0,0 @@ -#define HTSCODECS_VERSION_TEXT "1.5.1" diff --git a/src/htslib-1.18/htscodecs/tests/arith.test b/src/htslib-1.18/htscodecs/tests/arith.test deleted file mode 100755 index 53a5a42..0000000 --- a/src/htslib-1.18/htscodecs/tests/arith.test +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/sh -out=test.out -if test ! -d $out -then - mkdir $out -fi - -for f in `ls -1 $srcdir/dat/q* $srcdir/dat/u32* $srcdir/htscodecs-corpus/dat/q* 2>/dev/null` -do - comp=${f%/*/*}/dat/arith/${f##*/} - case $f in - */q*) - cut -f 1 < $f | tr -d '\012' > $out/arith-nl - ;; - *) - cp $f $out/arith-nl - ;; - esac - for o in 0 1 64 65 128 129 192 193 8 9 4 - do - if [ ! -e "$comp.$o" ] - then - continue - fi - printf 'Testing arith_dynamic -r -o%s on %s\t' $o "$f" - - # Round trip - ./arith_dynamic -r -o$o $out/arith-nl $out/arith.comp 2>>$out/arith.stderr || exit 1 - wc -c < $out/arith.comp - ./arith_dynamic -r -d $out/arith.comp $out/arith.uncomp 2>>$out/arith.stderr || exit 1 - cmp $out/arith-nl $out/arith.uncomp || exit 1 - - # Precompressed data - ./arith_dynamic -r -d $comp.$o $out/arith.uncomp 2>>$out/arith.stderr || exit 1 - cmp $out/arith-nl $out/arith.uncomp || exit 1 - done -done diff --git a/src/htslib-1.18/htscodecs/tests/arith_dynamic_fuzz.c b/src/htslib-1.18/htscodecs/tests/arith_dynamic_fuzz.c deleted file mode 100644 index 59dfe70..0000000 --- a/src/htslib-1.18/htscodecs/tests/arith_dynamic_fuzz.c +++ /dev/null @@ -1,97 +0,0 @@ -/* Fuzz testing target. */ -/* - * Copyright (c) 2019,2020 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include - -#include "htscodecs/arith_dynamic.h" - -int LLVMFuzzerTestOneInput(uint8_t *in, size_t in_size) { - unsigned int uncomp_size = 0; - unsigned char *uncomp = arith_uncompress(in, in_size, &uncomp_size); - if (uncomp) - free(uncomp); - - return 0; -} - -#ifdef NOFUZZ -#include -#include -#include - -#define BS 1024*1024 -static unsigned char *load(char *fn, uint64_t *lenp) { - unsigned char *data = NULL; - uint64_t dsize = 0; - uint64_t dcurr = 0; - signed int len; - int fd = open(fn, O_RDONLY); - - do { - if (dsize - dcurr < BS) { - dsize = dsize ? dsize * 2 : BS; - data = realloc(data, dsize); - } - - len = read(fd, data + dcurr, BS); - if (len > 0) - dcurr += len; - } while (len > 0); - - if (len == -1) { - perror("read"); - } - - close(fd); - *lenp = dcurr; - return data; -} - -int main(int argc, char **argv) { - uint64_t in_size; - unsigned char *in = load(argv[1], &in_size); - - LLVMFuzzerTestOneInput(in, in_size); - - free(in); - return 0; -} -#endif diff --git a/src/htslib-1.18/htscodecs/tests/arith_dynamic_test.c b/src/htslib-1.18/htscodecs/tests/arith_dynamic_test.c deleted file mode 100644 index dcd2b60..0000000 --- a/src/htslib-1.18/htscodecs/tests/arith_dynamic_test.c +++ /dev/null @@ -1,301 +0,0 @@ -/* Arithmetic coder tests */ -/* - * Copyright (c) 2019,2020 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "htscodecs/arith_dynamic.h" - -#ifndef BLK_SIZE -// Divisible by 4 for X4 -# define BLK_SIZE 1039*251*4 -#endif - -// Room to allow for expanded BLK_SIZE on worst case compression. -#define BLK_SIZE2 ((105LL*BLK_SIZE)/100) - -static unsigned char *in_buf; - -// Max 4GB -static unsigned char *load(FILE *infp, uint32_t *lenp) { - unsigned char *data = NULL; - uint32_t dsize = 0; - uint32_t dcurr = 0; - signed int len; - - do { - if (dsize - dcurr < BLK_SIZE) { - dsize = dsize ? dsize * 2 : BLK_SIZE; - data = realloc(data, dsize); - } - - len = fread(data + dcurr, 1, BLK_SIZE, infp); - if (len > 0) - dcurr += len; - } while (len > 0); - - if (len == -1) { - perror("fread"); - } - - *lenp = dcurr; - return data; -} - -int main(int argc, char **argv) { - int opt, order = 0; - int decode = 0, test = 0; - FILE *infp = stdin, *outfp = stdout; - struct timeval tv1, tv2, tv3, tv4; - size_t bytes = 0, raw = 0; - - in_buf = malloc(BLK_SIZE2+257*257*3); - -#ifdef _WIN32 - _setmode(_fileno(stdin), _O_BINARY); - _setmode(_fileno(stdout), _O_BINARY); -#endif - - extern char *optarg; - extern int optind; - - while ((opt = getopt(argc, argv, "o:dtr")) != -1) { - switch (opt) { - case 'o': { - char *optend; - order = strtol(optarg, &optend, 0); - if (*optend == '.') - order += atoi(optend+1)<<8; - break; - } - - case 'd': - decode = 1; - break; - - case 't': - test = 1; - break; - - case 'r': - raw = 1; - break; - } - } - - //order = order ? 1 : 0; // Only support O(0) and O(1) - - if (optind < argc) { - if (!(infp = fopen(argv[optind], "rb"))) { - perror(argv[optind]); - return 1; - } - optind++; - } - - if (optind < argc) { - if (!(outfp = fopen(argv[optind], "wb"))) { - perror(argv[optind]); - return 1; - } - optind++; - } - - gettimeofday(&tv1, NULL); - - if (test) { - size_t len, in_sz = 0, out_sz = 0; - typedef struct { - unsigned char *blk; - uint32_t sz; - } blocks; - blocks *b = NULL, *bc = NULL, *bu = NULL; - int nb = 0, i; - - while ((len = fread(in_buf, 1, BLK_SIZE, infp)) != 0) { - // inefficient, but it'll do for testing - b = realloc(b, (nb+1)*sizeof(*b)); - bu = realloc(bu, (nb+1)*sizeof(*bu)); - bc = realloc(bc, (nb+1)*sizeof(*bc)); - b[nb].blk = malloc(len); - b[nb].sz = len; - memcpy(b[nb].blk, in_buf, len); - bc[nb].sz = arith_compress_bound(BLK_SIZE, order); - bc[nb].blk = malloc(bc[nb].sz); - bu[nb].sz = len; - bu[nb].blk = malloc(BLK_SIZE); - nb++; - in_sz += len; - } - fprintf(stderr, "Testing %d blocks\n", nb); - -#ifndef NTRIALS -#define NTRIALS 10 -#endif - int trials = NTRIALS; - while (trials--) { - // Warmup - for (i = 0; i < nb; i++) memset(bc[i].blk, 0, bc[i].sz); - - gettimeofday(&tv1, NULL); - - out_sz = 0; - for (i = 0; i < nb; i++) { - unsigned int csz = bc[i].sz; - bc[i].blk = arith_compress_to(b[i].blk, b[i].sz, bc[i].blk, &csz, order); - assert(csz <= bc[i].sz); - out_sz += 5 + csz; - } - - gettimeofday(&tv2, NULL); - - // Warmup - for (i = 0; i < nb; i++) memset(bu[i].blk, 0, BLK_SIZE); - - gettimeofday(&tv3, NULL); - - for (i = 0; i < nb; i++) - bu[i].blk = arith_uncompress_to(bc[i].blk, bc[i].sz, bu[i].blk, &bu[i].sz); - - gettimeofday(&tv4, NULL); - - for (i = 0; i < nb; i++) { - if (b[i].sz != bu[i].sz || memcmp(b[i].blk, bu[i].blk, b[i].sz)) - fprintf(stderr, "Mismatch in block %d, sz %d/%d\n", i, b[i].sz, bu[i].sz); - //free(bc[i].blk); - //free(bu[i].blk); - } - - fprintf(stderr, "%5.1f MB/s enc, %5.1f MB/s dec\t %ld bytes -> %ld bytes\n", - (double)in_sz / ((long)(tv2.tv_sec - tv1.tv_sec)*1000000 + - tv2.tv_usec - tv1.tv_usec), - (double)in_sz / ((long)(tv4.tv_sec - tv3.tv_sec)*1000000 + - tv4.tv_usec - tv3.tv_usec), - (long)in_sz, (long)out_sz); - } - - exit(0); - - } - - if (raw) { - // One naked / raw block, to match the specification - uint32_t in_size, out_size; - unsigned char *in = load(infp, &in_size), *out; - if (!in) exit(1); - - if (decode) { - if (!(out = arith_uncompress(in, in_size, &out_size))) - exit(1); - - fwrite(out, 1, out_size, outfp); - bytes = out_size; - } else { - if (!(out = arith_compress(in, in_size, &out_size, order))) - exit(1); - - fwrite(out, 1, out_size, outfp); - bytes += in_size; - } - - free(in); - free(out); - } else { - // Block based, to permit arbitrarily large data sets. - if (decode) { - for (;;) { - uint32_t in_size, out_size; - unsigned char *out; - - if (4 != fread(&in_size, 1, 4, infp)) - break; - if (in_size > BLK_SIZE) - exit(1); - - if (in_size != fread(in_buf, 1, in_size, infp)) { - fprintf(stderr, "Truncated input\n"); - exit(1); - } - out = arith_uncompress(in_buf, in_size, &out_size); - if (!out) - exit(1); - - fwrite(out, 1, out_size, outfp); - fflush(outfp); - free(out); - - bytes += out_size; - } - } else { - for (;;) { - uint32_t in_size, out_size; - unsigned char *out; - - in_size = fread(in_buf, 1, BLK_SIZE, infp); - if (in_size <= 0) - break; - - if (in_size < 4) - order &= ~1; - - out = arith_compress(in_buf, in_size, &out_size, order); - - fwrite(&out_size, 1, 4, outfp); - fwrite(out, 1, out_size, outfp); - free(out); - - bytes += in_size; - } - } - } - - gettimeofday(&tv2, NULL); - - fprintf(stderr, "Took %ld microseconds, %5.1f MB/s\n", - (long)(tv2.tv_sec - tv1.tv_sec)*1000000 + - tv2.tv_usec - tv1.tv_usec, - (double)bytes / ((long)(tv2.tv_sec - tv1.tv_sec)*1000000 + - tv2.tv_usec - tv1.tv_usec)); - - free(in_buf); - return 0; -} diff --git a/src/htslib-1.18/htscodecs/tests/benchmark.sh b/src/htslib-1.18/htscodecs/tests/benchmark.sh deleted file mode 100755 index 9fd91e9..0000000 --- a/src/htslib-1.18/htscodecs/tests/benchmark.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/sh - -# Run this from the build subdirectory. -# Usage: benchmark.sh filename - -file=$1 -file2=`echo $1 | sed 's#.*/##'` -test_dir=${TEST_DIR:-./tests} -r4x8=$test_dir/rans4x8 -r4x16=$test_dir/rans4x16pr -ntrials=${ntrials:-5} - -awkscript='BEGIN {e1=99999;e2=0;d1=99999;d2=0} /bytes/ {if (e1 > $1) {e1 = $1} if (e2 < $1) {e2 = $1} if (d1 > $4) {d1 = $4} if (d2 < $4) {d2 = $4};s=$10} END {print e1,e2,d1,d2,s}' - -echo "Program Opts Size Encode Decode" -echo "-----------------------------------------------" - -# Order-0 -set -- $(for i in `seq 1 $ntrials`;do - $r4x8 -t -o0 $file 2>&1 - done | awk "$awkscript") -printf "r4x8 -o0 %10d %6.1f %6.1f\n" $5 $2 $4 - -set -- $(for i in `seq 1 $ntrials`;do - $r4x16 -t -o0 $file 2>&1 - done | awk "$awkscript") -printf "r4x16 -o0 %10d %6.1f %6.1f\n" $5 $2 $4 - -for c in 0x0000 0x0101 0x0202 0x0404 -do - set -- $(for i in `seq 1 $ntrials`;do - $r4x16 -t -o4 -c$c $file 2>&1 - done | awk "$awkscript") - printf "r32x16 -o4 -c %-4s %10d %6.1f %6.1f\n" $c $5 $2 $4 -done - -echo - -# Order-1 -set -- $(for i in `seq 1 $ntrials`;do - $r4x8 -t -o1 $file 2>&1 - done | awk "$awkscript") -printf "r4x8 -o1 %10d %6.1f %6.1f\n" $5 $2 $4 - -set -- $(for i in `seq 1 $ntrials`;do - $r4x16 -t -o1 $file 2>&1 - done | awk "$awkscript") -printf "r4x16 -o1 %10d %6.1f %6.1f\n" $5 $2 $4 - -for c in 0x0000 0x0101 0x0202 0x0404 -do - set -- $(for i in `seq 1 $ntrials`;do - $r4x16 -t -o5 -c$c $file 2>&1 - done | awk "$awkscript") - printf "r32x16 -o5 -c %-4s %10d %6.1f %6.1f\n" $c $5 $2 $4 -done diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/q4.0 b/src/htslib-1.18/htscodecs/tests/dat/arith/q4.0 deleted file mode 100644 index 439d892..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/q4.0 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/q4.1 b/src/htslib-1.18/htscodecs/tests/dat/arith/q4.1 deleted file mode 100644 index b45b214..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/q4.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/q4.128 b/src/htslib-1.18/htscodecs/tests/dat/arith/q4.128 deleted file mode 100644 index 146304d..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/q4.128 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/q4.129 b/src/htslib-1.18/htscodecs/tests/dat/arith/q4.129 deleted file mode 100644 index 5f33811..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/q4.129 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/q4.192 b/src/htslib-1.18/htscodecs/tests/dat/arith/q4.192 deleted file mode 100644 index 701f2ea..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/q4.192 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/q4.193 b/src/htslib-1.18/htscodecs/tests/dat/arith/q4.193 deleted file mode 100644 index e963a62..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/q4.193 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/q4.64 b/src/htslib-1.18/htscodecs/tests/dat/arith/q4.64 deleted file mode 100644 index 9a45b3f..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/q4.64 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/q4.65 b/src/htslib-1.18/htscodecs/tests/dat/arith/q4.65 deleted file mode 100644 index 3785a7d..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/q4.65 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/q4.8 b/src/htslib-1.18/htscodecs/tests/dat/arith/q4.8 deleted file mode 100644 index d4eea77..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/q4.8 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/q4.9 b/src/htslib-1.18/htscodecs/tests/dat/arith/q4.9 deleted file mode 100644 index 14c2a98..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/q4.9 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/q40+dir.0 b/src/htslib-1.18/htscodecs/tests/dat/arith/q40+dir.0 deleted file mode 100644 index f8308eb..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/q40+dir.0 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/q40+dir.1 b/src/htslib-1.18/htscodecs/tests/dat/arith/q40+dir.1 deleted file mode 100644 index a63fe74..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/q40+dir.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/q40+dir.64 b/src/htslib-1.18/htscodecs/tests/dat/arith/q40+dir.64 deleted file mode 100644 index c29fe5d..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/q40+dir.64 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/q40+dir.65 b/src/htslib-1.18/htscodecs/tests/dat/arith/q40+dir.65 deleted file mode 100644 index b51531d..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/q40+dir.65 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/q40+dir.8 b/src/htslib-1.18/htscodecs/tests/dat/arith/q40+dir.8 deleted file mode 100644 index 96fe825..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/q40+dir.8 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/q40+dir.9 b/src/htslib-1.18/htscodecs/tests/dat/arith/q40+dir.9 deleted file mode 100644 index 62fc0a5..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/q40+dir.9 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/q8.0 b/src/htslib-1.18/htscodecs/tests/dat/arith/q8.0 deleted file mode 100644 index 886d911..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/q8.0 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/q8.1 b/src/htslib-1.18/htscodecs/tests/dat/arith/q8.1 deleted file mode 100644 index 3caa525..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/q8.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/q8.128 b/src/htslib-1.18/htscodecs/tests/dat/arith/q8.128 deleted file mode 100644 index 44ae87f..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/q8.128 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/q8.129 b/src/htslib-1.18/htscodecs/tests/dat/arith/q8.129 deleted file mode 100644 index f78b7c6..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/q8.129 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/q8.192 b/src/htslib-1.18/htscodecs/tests/dat/arith/q8.192 deleted file mode 100644 index 4d6fea3..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/q8.192 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/q8.193 b/src/htslib-1.18/htscodecs/tests/dat/arith/q8.193 deleted file mode 100644 index 61f05a2..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/q8.193 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/q8.64 b/src/htslib-1.18/htscodecs/tests/dat/arith/q8.64 deleted file mode 100644 index 48c7808..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/q8.64 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/q8.65 b/src/htslib-1.18/htscodecs/tests/dat/arith/q8.65 deleted file mode 100644 index 456abb0..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/q8.65 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/qvar.0 b/src/htslib-1.18/htscodecs/tests/dat/arith/qvar.0 deleted file mode 100644 index c10c473..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/qvar.0 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/qvar.1 b/src/htslib-1.18/htscodecs/tests/dat/arith/qvar.1 deleted file mode 100644 index 61736f4..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/qvar.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/qvar.64 b/src/htslib-1.18/htscodecs/tests/dat/arith/qvar.64 deleted file mode 100644 index b937a7d..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/qvar.64 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/qvar.65 b/src/htslib-1.18/htscodecs/tests/dat/arith/qvar.65 deleted file mode 100644 index f331ce9..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/qvar.65 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/u32.1 b/src/htslib-1.18/htscodecs/tests/dat/arith/u32.1 deleted file mode 100644 index c01ee3c..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/u32.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/u32.4 b/src/htslib-1.18/htscodecs/tests/dat/arith/u32.4 deleted file mode 100644 index d91c7a1..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/u32.4 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/u32.65 b/src/htslib-1.18/htscodecs/tests/dat/arith/u32.65 deleted file mode 100644 index 9ba82f5..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/u32.65 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/arith/u32.9 b/src/htslib-1.18/htscodecs/tests/dat/arith/u32.9 deleted file mode 100644 index ddc92f6..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/arith/u32.9 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q4.0 b/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q4.0 deleted file mode 100644 index 992ff0e..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q4.0 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q4.1 b/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q4.1 deleted file mode 100644 index a9cb24f..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q4.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q4.2 b/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q4.2 deleted file mode 100644 index d9ad593..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q4.2 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q4.3 b/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q4.3 deleted file mode 100644 index 7f8d7b0..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q4.3 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q40+dir.0 b/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q40+dir.0 deleted file mode 100644 index c40bfbd..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q40+dir.0 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q40+dir.1 b/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q40+dir.1 deleted file mode 100644 index 59844bd..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q40+dir.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q40+dir.2 b/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q40+dir.2 deleted file mode 100644 index 9e6bfd9..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q40+dir.2 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q40+dir.3 b/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q40+dir.3 deleted file mode 100644 index d090b41..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q40+dir.3 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q8.0 b/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q8.0 deleted file mode 100644 index a07a785..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q8.0 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q8.1 b/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q8.1 deleted file mode 100644 index dd91f82..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q8.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q8.2 b/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q8.2 deleted file mode 100644 index c249848..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q8.2 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q8.3 b/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q8.3 deleted file mode 100644 index 4d8edf0..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/q8.3 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/qvar.0 b/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/qvar.0 deleted file mode 100644 index ef47edc..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/qvar.0 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/qvar.1 b/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/qvar.1 deleted file mode 100644 index 4005543..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/qvar.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/qvar.2 b/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/qvar.2 deleted file mode 100644 index 3ed5aae..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/qvar.2 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/qvar.3 b/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/qvar.3 deleted file mode 100644 index 3ace9d2..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/fqzcomp/qvar.3 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/q4 b/src/htslib-1.18/htscodecs/tests/dat/q4 deleted file mode 100644 index 2137f7c..0000000 --- a/src/htslib-1.18/htscodecs/tests/dat/q4 +++ /dev/nulldiff --git a/src/htslib-1.18/htscodecs/tests/dat/q40+dir b/src/htslib-1.18/htscodecs/tests/dat/q40+dir deleted file mode 100644 index 664a026..0000000 --- a/src/htslib-1.18/htscodecs/tests/dat/q40+dir +++ /dev/nulldiff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.0 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.0 deleted file mode 100644 index a6720da..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.0 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.1 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.1 deleted file mode 100644 index 776f4f3..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.128 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.128 deleted file mode 100644 index 3785f94..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.128 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.129 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.129 deleted file mode 100644 index 5210732..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.129 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.192 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.192 deleted file mode 100644 index 0c7a04b..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.192 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.193 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.193 deleted file mode 100644 index dec9deb..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.193 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.4 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.4 deleted file mode 100644 index f0eefbb..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.4 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.5 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.5 deleted file mode 100644 index e8870c1..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.5 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.64 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.64 deleted file mode 100644 index ae3f7d9..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.64 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.65 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.65 deleted file mode 100644 index 9b1c8d9..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.65 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.8 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.8 deleted file mode 100644 index ac9e60e..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.8 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.9 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.9 deleted file mode 100644 index 1fa0732..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q4.9 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q40+dir.0 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q40+dir.0 deleted file mode 100644 index 08a68ba..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q40+dir.0 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q40+dir.1 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q40+dir.1 deleted file mode 100644 index 7442a57..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q40+dir.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q40+dir.4 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q40+dir.4 deleted file mode 100644 index c226c48..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q40+dir.4 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q40+dir.5 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q40+dir.5 deleted file mode 100644 index 33d1a2c..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q40+dir.5 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q40+dir.8 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q40+dir.8 deleted file mode 100644 index 6c442f7..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q40+dir.8 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q40+dir.9 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q40+dir.9 deleted file mode 100644 index cb57380..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q40+dir.9 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.0 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.0 deleted file mode 100644 index 30c641a..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.0 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.1 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.1 deleted file mode 100644 index aa70744..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.128 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.128 deleted file mode 100644 index 09a9687..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.128 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.129 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.129 deleted file mode 100644 index e8a0a7b..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.129 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.192 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.192 deleted file mode 100644 index 76affb0..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.192 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.193 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.193 deleted file mode 100644 index 19a8040..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.193 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.4 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.4 deleted file mode 100644 index 9806260..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.4 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.5 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.5 deleted file mode 100644 index 73c2fad..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.5 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.64 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.64 deleted file mode 100644 index 16faede..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.64 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.65 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.65 deleted file mode 100644 index f570fab..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/q8.65 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/qvar.0 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/qvar.0 deleted file mode 100644 index 483ba16..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/qvar.0 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/qvar.1 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/qvar.1 deleted file mode 100644 index 347e5df..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/qvar.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/qvar.4 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/qvar.4 deleted file mode 100644 index 22c33fa..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/qvar.4 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x16/qvar.5 b/src/htslib-1.18/htscodecs/tests/dat/r4x16/qvar.5 deleted file mode 100644 index 8c029fe..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x16/qvar.5 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x8/q4.0 b/src/htslib-1.18/htscodecs/tests/dat/r4x8/q4.0 deleted file mode 100644 index 9c0e898..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x8/q4.0 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x8/q4.1 b/src/htslib-1.18/htscodecs/tests/dat/r4x8/q4.1 deleted file mode 100644 index 011276f..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x8/q4.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x8/q40+dir.0 b/src/htslib-1.18/htscodecs/tests/dat/r4x8/q40+dir.0 deleted file mode 100644 index 8d52845..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x8/q40+dir.0 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x8/q40+dir.1 b/src/htslib-1.18/htscodecs/tests/dat/r4x8/q40+dir.1 deleted file mode 100644 index b227735..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x8/q40+dir.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x8/q8.0 b/src/htslib-1.18/htscodecs/tests/dat/r4x8/q8.0 deleted file mode 100644 index e8bebd7..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x8/q8.0 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x8/q8.1 b/src/htslib-1.18/htscodecs/tests/dat/r4x8/q8.1 deleted file mode 100644 index caa41ea..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x8/q8.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x8/qvar.0 b/src/htslib-1.18/htscodecs/tests/dat/r4x8/qvar.0 deleted file mode 100644 index 60f1e76..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x8/qvar.0 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/r4x8/qvar.1 b/src/htslib-1.18/htscodecs/tests/dat/r4x8/qvar.1 deleted file mode 100644 index 2ac8986..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/r4x8/qvar.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/dat/u32 b/src/htslib-1.18/htscodecs/tests/dat/u32 deleted file mode 100644 index 363372e..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/dat/u32 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/entropy.c b/src/htslib-1.18/htscodecs/tests/entropy.c deleted file mode 100644 index eb7b44b..0000000 --- a/src/htslib-1.18/htscodecs/tests/entropy.c +++ /dev/null @@ -1,263 +0,0 @@ -/* Tests for hts_codecs */ -/* - * Copyright (c) 2022 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#include "config.h" - -/* - * This test aims to test all entropy codecs on an input file. - * This therefore validates the pthead_once memory allocations to ensure - * there are not unforseen initialisation interactions. - * - * We repeatedly compress and decompress a single input file, - * validating the result. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#ifndef _WIN32 -#include -#include -#endif - -#include "htscodecs/arith_dynamic.h" -#include "htscodecs/rANS_static.h" -#include "htscodecs/rANS_static4x16.h" - -#ifndef BLK_SIZE -# define BLK_SIZE 1024*1024 -#endif - -// Max 4GB -static unsigned char *load(FILE *infp, uint32_t *lenp) { - unsigned char *data = NULL; - uint32_t dsize = 0; - uint32_t dcurr = 0; - signed int len; - - do { - if (dsize - dcurr < BLK_SIZE) { - dsize = dsize ? dsize * 2 : BLK_SIZE; - data = realloc(data, dsize); - } - - len = fread(data + dcurr, 1, BLK_SIZE, infp); - if (len > 0) - dcurr += len; - } while (len > 0); - - if (len == -1) { - perror("fread"); - } - - *lenp = dcurr; - return data; -} - -int main(int argc, char **argv) { - FILE *infp = stdin; - int result = EXIT_SUCCESS; -#ifdef _WIN32 - _setmode(_fileno(stdin), _O_BINARY); - _setmode(_fileno(stdout), _O_BINARY); -#endif - - extern void force_sw32_decoder(void); - extern void rans_disable_avx512(void); - extern void rans_disable_avx2(void); - - int benchmark = 0; - while (argc > 1 && strcmp(argv[1], "-b") == 0) { - benchmark++; - argc--; - argv++; - } - - if (argc > 1) { - if (!(infp = fopen(argv[1], "rb"))) { - perror(argv[1]); - return 1; - } - } - -#ifndef _WIN32 - // Specify an extra small stack, eg as in Alpine linux threads. - // This checks we're not accidentally needing high stack usage. - struct rlimit r = {64*1024, 64*1024}; - setrlimit(RLIMIT_STACK, &r); -#endif - - uint32_t in_size, csize, usize; - unsigned char *in = load(infp, &in_size); - int order_a[] = {0,1, // r4x8 - 64,65, 128,129, 192,193, // r4x16, arith - 4,5, 68,69, 132,133, 194,197, // r4x16 SIMD - }; - char *codec[] = {"r4x8", "r4x16", "r32x16", "arith"}; - int i, j; - for (i = 0; i < sizeof(order_a) / sizeof(*order_a); i++) { - int order = order_a[i]; - uint8_t *comp, *uncomp; - for (j = 0; j < 4; j++) { - int chigh = 4, clow = 0, c; - uint8_t *comp0 = NULL; - uint32_t csize0 = 0; - for (c = 0; c < 4; c+=(j==2)?1:4) { - struct timeval tv1, tv2, tv3, tv4; - - // Test combinations of SIMD implementations - uint32_t chex = benchmark - ? (clow<<8) | clow - : (clow<<8) | chigh; - clow = 1<>= 1; - rans_set_cpu(chex); - - int bloop = benchmark; - bloop: - // encode - gettimeofday(&tv1, NULL); - switch (j) { - case 0: // r4x8 - if (i >= 2) continue; - comp = rans_compress(in, in_size, &csize, order); - break; - - case 1: // r4x16 - if (i >= 8) continue; - comp = rans_compress_4x16(in, in_size, &csize, order); - break; - - case 2: // r32x16 - if (i < 8) continue; - comp = rans_compress_4x16(in, in_size, &csize, order); - break; - - case 3: // arith - if (i >= 8) continue; - comp = arith_compress(in, in_size, &csize, order); - break; - } - gettimeofday(&tv2, NULL); - - if (j == 2) - printf("%10s-o%d-c%04x\t", codec[j], order, chex); - else - printf("%10s-o%d \t", codec[j], order); - printf("%10d uncomp, %10d comp", in_size, csize); - - if (comp == NULL) { - printf("\tFAIL (comp)\n"); - result = EXIT_FAILURE; - continue; - } - - if (comp0) { - if (csize != csize0 || memcmp(comp, comp0, csize) != 0) { - printf("\tFAIL (comp cmp)\n"); - result = EXIT_FAILURE; - } - } else { - csize0 = csize; - comp0 = comp; - } - - // decode - gettimeofday(&tv3, NULL); - switch (j) { - case 0: // r4x8 - if (i >= 2) continue; - uncomp = rans_uncompress(comp, csize, &usize); - break; - - case 1: // r4x16 - if (i >= 8) continue; - uncomp = rans_uncompress_4x16(comp, csize, &usize); - break; - - case 2: // r32x16 - if (i < 8) continue; - uncomp = rans_uncompress_4x16(comp, csize, &usize); - break; - - case 3: // arith - if (i >= 8) continue; - uncomp = arith_uncompress(comp, csize, &usize); - break; - } - gettimeofday(&tv4, NULL); - - if (usize != in_size || uncomp == NULL || memcmp(in, uncomp, usize) != 0) { - printf("\tFAIL\n"); - result = EXIT_FAILURE; - } else if (benchmark) { - printf(" %6.1f enc MB/s %6.1f dec MB/s\n", - (double)usize / - ((long)(tv2.tv_sec - tv1.tv_sec)*1000000 + - tv2.tv_usec - tv1.tv_usec), - (double)usize / - ((long)(tv4.tv_sec - tv3.tv_sec)*1000000 + - tv4.tv_usec - tv3.tv_usec)); - } else { - printf("\tpass\n"); - } - - if (comp != comp0) - free(comp); - free(uncomp); - if (--bloop > 0) goto bloop; - } - free(comp0); - } - printf("\n"); - } - - free(in); - - if (result != EXIT_SUCCESS) - return result; - -#ifndef _WIN32 - // We wouldn't normally exit this way, but we explicitly call it to - // check htscodecs_tls_free_all has no leaks. Note that this will - // cause the program to return an exit status of zero. - pthread_exit(NULL); -#endif - - return 0; -} diff --git a/src/htslib-1.18/htscodecs/tests/entropy.test b/src/htslib-1.18/htscodecs/tests/entropy.test deleted file mode 100755 index ad2bfd6..0000000 --- a/src/htslib-1.18/htscodecs/tests/entropy.test +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh - -# One copy tests the small buffer histogram variant -./entropy $srcdir/dat/q4 - -# Four copies tests the large buffer histogram variant -cat $srcdir/dat/q4 $srcdir/dat/q4 $srcdir/dat/q4 $srcdir/dat/q4 | ./entropy diff --git a/src/htslib-1.18/htscodecs/tests/fqzcomp.test b/src/htslib-1.18/htscodecs/tests/fqzcomp.test deleted file mode 100755 index fe26ac5..0000000 --- a/src/htslib-1.18/htscodecs/tests/fqzcomp.test +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/sh -out=test.out -if test ! -d $out -then - mkdir $out -fi - -for f in `ls -1 $srcdir/dat/q* $srcdir/htscodecs-corpus/dat/q* 2>/dev/null` -do - comp=${f%/*/*}/dat/fqzcomp/${f##*/} - cut -f 1 $f > $out/fqz - for s in 0 1 2 3 - do - printf 'Testing fqzcomp_qual -r -s %s on %s\t' $s "$f" - - # Round trip - ./fqzcomp_qual -r -s $s $out/fqz > $out/fqz.comp 2>>$out/fqz.stderr || exit 1 - wc -c < $out/fqz.comp - ./fqzcomp_qual -r -d $out/fqz.comp > $out/fqz.uncomp 2>>$out/fqz.stderr || exit 1 - cmp $out/fqz $out/fqz.uncomp || exit 1 - - # Precompressed data - ./fqzcomp_qual -r -d $comp.$s > $out/fqz.uncomp 2>>$out/fqz.stderr || exit 1 - cmp $out/fqz $out/fqz.uncomp || exit 1 - done - echo -done diff --git a/src/htslib-1.18/htscodecs/tests/fqzcomp_qual_fuzz.c b/src/htslib-1.18/htscodecs/tests/fqzcomp_qual_fuzz.c deleted file mode 100644 index 475dc77..0000000 --- a/src/htslib-1.18/htscodecs/tests/fqzcomp_qual_fuzz.c +++ /dev/null @@ -1,103 +0,0 @@ -/* Fuzz testing target. */ -/* - * Copyright (c) 2019,2020 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "htscodecs/fqzcomp_qual.h" - -int LLVMFuzzerTestOneInput(uint8_t *in, size_t in_size) { - size_t uncomp_size; - char *uncomp = fqz_decompress((char *)in, in_size, &uncomp_size, NULL, 0); - if (uncomp) - free(uncomp); - - return 0; -} - -#ifdef NOFUZZ -#include -#include -#include - -#define BS 1024*1024 -static unsigned char *load(char *fn, uint64_t *lenp) { - unsigned char *data = NULL; - uint64_t dsize = 0; - uint64_t dcurr = 0; - signed int len; - - int fd = open(fn, O_RDONLY); - if (!fd) { - perror(fn); - return NULL; - } - - do { - if (dsize - dcurr < BS) { - dsize = dsize ? dsize * 2 : BS; - data = realloc(data, dsize); - } - - len = read(fd, data + dcurr, BS); - if (len > 0) - dcurr += len; - } while (len > 0); - - if (len == -1) { - perror("read"); - } - close(fd); - - *lenp = dcurr; - return data; -} - -int main(int argc, char **argv) { - uint64_t in_size; - unsigned char *in = load(argv[1], &in_size); - - LLVMFuzzerTestOneInput(in, in_size); - - free(in); - return 0; -} -#endif diff --git a/src/htslib-1.18/htscodecs/tests/fqzcomp_qual_test.c b/src/htslib-1.18/htscodecs/tests/fqzcomp_qual_test.c deleted file mode 100644 index 6c3afbb..0000000 --- a/src/htslib-1.18/htscodecs/tests/fqzcomp_qual_test.c +++ /dev/null @@ -1,471 +0,0 @@ -/* Tests for fqz codec */ -/* - * Copyright (c) 2019-2021 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "htscodecs/fqzcomp_qual.h" -#include "htscodecs/varint.h" - -#ifndef MAX_REC -#define MAX_REC 1000000 -#endif - -#ifndef MAX_SEQ -# define MAX_SEQ 100000 -#endif - -#ifndef MIN -# define MIN(a,b) ((a)<(b)?(a):(b)) -# define MAX(a,b) ((a)>(b)?(a):(b)) -#endif - -static fqz_slice fixed_slice = {0}; - -fqz_slice *fake_slice(size_t buf_len, int *len, int *r2, int *sel, int nlen) { - fixed_slice.num_records = (nlen == 1) ? (buf_len+len[0]-1) / len[0] : nlen; - assert(fixed_slice.num_records <= MAX_REC); - int i; - if (!fixed_slice.len) - fixed_slice.len = malloc(MAX_REC * sizeof(*fixed_slice.len)); - if (!fixed_slice.flags) - fixed_slice.flags = malloc(MAX_REC * sizeof(*fixed_slice.flags)); - for (i = 0; i < fixed_slice.num_records; i++) { - int idx = i < nlen ? i : nlen-1; - fixed_slice.len[i] = len[idx]; - fixed_slice.flags[i] = r2 ? r2[idx]*FQZ_FREAD2 : 0; - fixed_slice.flags[i] |= sel ? (sel[idx]<<16) : 0; - } - - return &fixed_slice; -} - -static uint64_t manual_strats[10] = {0}; -static int manual_nstrat = 0; - -/* - * Manually specified strategies held in global manual_strats[]. - */ -static inline -int fqz_manual_parameters(fqz_gparams *gp, - fqz_slice *s, - unsigned char *in, - size_t in_size) { - int i, p; - int dsqr[] = { - 0, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 - }; - - gp->vers = FQZ_VERS; - gp->nparam = manual_nstrat; - gp->gflags = GFLAG_MULTI_PARAM | GFLAG_HAVE_STAB; - for (i = 0; i < 256; i++) - gp->stab[i] = 0; - - // Fill these out later - gp->max_sel = 0; - gp->max_sym = 0; - gp->p = malloc(gp->nparam * sizeof(*gp->p)); - - for (p = 0; p < gp->nparam; p++) { - fqz_param *pm = &gp->p[p]; - uint64_t st = manual_strats[p]; - - pm->do_qa = st & 15; st >>= 4; - pm->do_r2 = st & 15; st >>= 4; - pm->dloc = st & 15; st >>= 4; - pm->ploc = st & 15; st >>= 4; - pm->sloc = st & 15; st >>= 4; - pm->qloc = st & 15; st >>= 4; - pm->dshift = st & 15; st >>= 4; - pm->dbits = st & 15; st >>= 4; - pm->pshift = st & 15; st >>= 4; - pm->pbits = st & 15; st >>= 4; - pm->qshift = st & 15; st >>= 4; - pm->qbits = st & 15; st >>= 4; - - // Gather some stats, as per qual_stats func. - // r in rec count. - // i = index to in[] - // j = index within this rec - uint32_t qhist[256] = {0}; - - // qual stats for seqs using this parameter only - fqz_qual_stats(s, in, in_size, pm, qhist, p); - int max_sel = pm->max_sel; - - // Update max_sel running total. Eg with 4 sub-params: - // - // sel param no. => new - // 0 0 0 - // 0/1 1 1,2 - // 0/1 2 3,4 - // 0 3 5 - for (i = gp->max_sel; i < gp->max_sel + max_sel+1; i++) - gp->stab[i] = p; - gp->max_sel += max_sel+1; - - pm->fixed_len = pm->fixed_len > 0; - pm->use_qtab = 0; // unused by current encoder - pm->store_qmap = pm->nsym <= 8; - - // Adjust parameters based on quality stats. - // FIXME: dup from fqz_pick_parameters. - for (i = 0; i < sizeof(dsqr)/sizeof(*dsqr); i++) - if (dsqr[i] > (1<dbits)-1) - dsqr[i] = (1<dbits)-1; - - if (pm->store_qmap) { - int j; - for (i = j = 0; i < 256; i++) - if (qhist[i]) - pm->qmap[i] = j++; - else - pm->qmap[i] = INT_MAX; - pm->max_sym = pm->nsym; - } else { - pm->nsym = 255; - for (i = 0; i < 256; i++) - pm->qmap[i] = i; - } - if (gp->max_sym < pm->max_sym) - gp->max_sym = pm->max_sym; - - // Produce ptab from pshift. - if (pm->qbits) { - for (i = 0; i < 256; i++) { - pm->qtab[i] = i; // 1:1 - - // Alternative mappings: - //qtab[i] = i > 30 ? MIN(max_sym,i)-15 : i/2; // eg for 9827 BAM - } - - } - pm->qmask = (1<qbits)-1; - - if (pm->pbits) { - for (i = 0; i < 1024; i++) - pm->ptab[i] = MIN((1<pbits)-1, i>>pm->pshift); - - // Alternatively via analysis of quality distributions we - // may select a bunch of positions that are special and - // have a non-uniform ptab[]. - // Manual experimentation on a NovaSeq run saved 2.8% here. - } - - if (pm->dbits) { - for (i = 0; i < 256; i++) - pm->dtab[i] = dsqr[MIN(sizeof(dsqr)/sizeof(*dsqr)-1, i>>pm->dshift)]; - } - - pm->use_ptab = (pm->pbits > 0); - pm->use_dtab = (pm->dbits > 0); - - pm->pflags = - (pm->use_qtab ?PFLAG_HAVE_QTAB :0)| - (pm->use_dtab ?PFLAG_HAVE_DTAB :0)| - (pm->use_ptab ?PFLAG_HAVE_PTAB :0)| - (pm->do_sel ?PFLAG_DO_SEL :0)| - (pm->fixed_len ?PFLAG_DO_LEN :0)| - (pm->do_dedup ?PFLAG_DO_DEDUP :0)| - (pm->store_qmap ?PFLAG_HAVE_QMAP :0); - } - - for (i = gp->max_sel; i < 256; i++) - gp->stab[i] = gp->stab[gp->max_sel-1]; - - return 0; -} - -#define BS 1024*1024 -static unsigned char *load(char *fn, size_t *lenp) { - unsigned char *data = NULL; - uint64_t dsize = 0; - uint64_t dcurr = 0; - signed int len; - - //build_rcp_freq(); - -#ifndef _O_BINARY -#define _O_BINARY 0 -#endif - - int fd = open(fn, O_RDONLY | _O_BINARY); - if (!fd) { - perror(fn); - return NULL; - } - - do { - if (dsize - dcurr < BS) { - dsize = dsize ? dsize * 2 : BS; - data = realloc(data, dsize); - } - - len = read(fd, data + dcurr, BS); - if (len > 0) - dcurr += len; - } while (len > 0); - - if (len == -1) { - perror("read"); - } - close(fd); - - *lenp = dcurr; - return data; -} - -#ifndef BLK_SIZE -# define BLK_SIZE 300*1000000 -#endif - -int count_lines(unsigned char *in, size_t len) { - size_t i; - int lines = 0; - - for (i = 0; i < len; i++) - if (in[i] == '\n') - lines++; - - return lines; -} - -// QUAL [is_read2 [selector]] -void parse_lines(unsigned char *in, size_t len, - int *rec_len, int *rec_r2, int *rec_sel, size_t *new_len) { - size_t i, j, start; - int rec = 0; - - for (start = i = j = 0; i < len; i++) { - if (in[i] == '\n' || in[i] == ' ' || in[i] == '\t') { - rec_len[rec] = i-start; - - // Read2 marker - while (i < len && in[i] != '\n' && isspace(in[i])) - i++; - - if (in[i] != '\n') - rec_r2[rec] = atoi((char *)&in[i]); - else - rec_r2[rec] = 0; - - while (i < len && !isspace(in[i])) - i++; - - // selector - while (i < len && in[i] != '\n' && isspace(in[i])) - i++; - - if (in[i] != '\n') - rec_sel[rec] = atoi((char *)&in[i]); - else - rec_sel[rec] = 0; - - while (i < len && in[i] != '\n') - i++; - - start = i+1; - rec++; - } else { - in[j++] = in[i]-33; // ASCII phred to qual - } - } - *new_len = j; -} - -int main(int argc, char **argv) { - unsigned char *in, *out; - size_t in_len, out_len; - int decomp = 0, vers = 4; // CRAM version 4.0 (4) or 3.1 (3) - int strat = 0, raw = 0; - fqz_gparams *gp = NULL, gp_local; - uint32_t blk_size = BLK_SIZE; // MAX - -#ifdef _WIN32 - _setmode(_fileno(stdin), _O_BINARY); - _setmode(_fileno(stdout), _O_BINARY); -#endif - - extern char *optarg; - extern int optind; - int opt; - - while ((opt = getopt(argc, argv, "ds:s:b:rx:")) != -1) { - switch (opt) { - case 'd': - decomp = 1; - break; - - case 'b': - blk_size = atoi(optarg); - if (blk_size > BLK_SIZE) - blk_size = BLK_SIZE; - break; - - case 's': - strat = atoi(optarg); - break; - - case 'x': { - // Hex digits are: - // qbits qshift - // pbits pshift - // dbits dshift - // qloc sloc - // ploc dloc - // do_r2 do_qavg - // - // Examples: -x 0x5570000d6e14 q40+dir = 3473340 - // -x 0x8252120e8d04 q4 = 724989 - uint64_t x = strtol(optarg, NULL, 0); - manual_strats[manual_nstrat++] = x; - - gp = &gp_local; - break; - } - case 'r': - raw = 1; - break; - } - } - - in = load(optind < argc ? argv[optind] : "/dev/stdin", &in_len); - if (!in) - exit(1); - - if (raw) - blk_size = in_len; - - // Block based, for arbitrary sizes of input - if (decomp) { - unsigned char *in2 = in; - while (in_len > 0) { - // Read sizes as 32-bit - size_t in2_len, out_len; - if (raw) { - uint32_t u32; - var_get_u32(in2, in2+in_len, &u32); - out_len = u32; - in2_len = in_len; - } else { - out_len = *(uint32_t *)in2; in2 += 4; - in2_len = *(uint32_t *)in2; in2 += 4; - } - - fprintf(stderr, "out_len %ld, in_len %ld\n", (long)out_len, (long)in2_len); - - int *lengths = malloc(MAX_REC * sizeof(int)); - out = (unsigned char *)fqz_decompress((char *)in2, in_len-(raw?0:8), &out_len, lengths, MAX_REC); - if (!out) { - fprintf(stderr, "Failed to decompress\n"); - return 1; - } - - // Convert from binary back to ASCII with newlines - int i = 0, j = 0; - while (j < out_len) { - int k; - char seq[MAX_SEQ]; - for (k = 0; k < lengths[i]; k++) - seq[k] = out[j+k]+33; - seq[k] = 0; - puts(seq); - j += lengths[i++]; - } - free(out); - in2 += in2_len; - in_len -= in2_len+(raw?0:8); - - free(lengths); - - break; // One cycle only until we fix blocking to be \n based - } - } else { - // Convert from ASCII newline separated file to binary block. - // We return an array of line lengths and optionally param selectors. - int nlines = count_lines(in, in_len); - fprintf(stderr, "nlines=%d\n", nlines); - int *rec_len = calloc(nlines, sizeof(*rec_len)); - int *rec_r2 = calloc(nlines, sizeof(*rec_r2)); - int *rec_sel = calloc(nlines, sizeof(*rec_sel)); - parse_lines(in, in_len, rec_len, rec_r2, rec_sel, &in_len); - - unsigned char *in2 = in; - long t_out = 0; - out = NULL; - while (in_len > 0) { - // FIXME: blk_size no longer working in test. One cycle only! - size_t in2_len = in_len <= blk_size ? in_len : blk_size; - fqz_slice *s = fake_slice(in2_len, rec_len, rec_r2, rec_sel, nlines); - if (gp == &gp_local) - if (fqz_manual_parameters(gp, s, in2, in2_len) < 0) - return 1; - out = (unsigned char *)fqz_compress(vers, s, (char *)in2, in2_len, &out_len, strat, gp); - - // Write out 32-bit sizes. - if (!raw) { - uint32_t u32; - u32 = in2_len; if (write(1, &u32, 4) != 4) return 1; - u32 = out_len; if (write(1, &u32, 4) != 4) return 1; - } - if (write(1, out, out_len) < 0) return 1; - in_len -= in2_len; - in2 += in2_len; - t_out += out_len + (raw?0:8); - - break; // One cycle only until we fix blocking to be \n based - } - free(out); - free(rec_len); - free(rec_r2); - free(rec_sel); - fprintf(stderr, "Total output = %ld\n", t_out); - } - - free(in); - - return 0; -} diff --git a/src/htslib-1.18/htscodecs/tests/names/01.names b/src/htslib-1.18/htscodecs/tests/names/01.names deleted file mode 100644 index b88abaf..0000000 --- a/src/htslib-1.18/htscodecs/tests/names/01.names +++ /dev/null @@ -1,1000 +0,0 @@ -@ERR174310.1 HSQ1008_141:5:1101:1454:3564/1 -@ERR174310.2 HSQ1008_141:5:1101:1485:3570/1 -@ERR174310.3 HSQ1008_141:5:1101:1407:3580/1 -@ERR174310.4 HSQ1008_141:5:1101:1491:3591/1 -@ERR174310.5 HSQ1008_141:5:1101:1453:3608/1 -@ERR174310.6 HSQ1008_141:5:1101:1478:3618/1 -@ERR174310.7 HSQ1008_141:5:1101:1458:3655/1 -@ERR174310.8 HSQ1008_141:5:1101:1409:3655/1 -@ERR174310.9 HSQ1008_141:5:1101:1481:3676/1 -@ERR174310.10 HSQ1008_141:5:1101:1419:3686/1 -@ERR174310.11 HSQ1008_141:5:1101:1441:3696/1 -@ERR174310.12 HSQ1008_141:5:1101:1472:3697/1 -@ERR174310.13 HSQ1008_141:5:1101:1398:3698/1 -@ERR174310.14 HSQ1008_141:5:1101:1457:3731/1 -@ERR174310.15 HSQ1008_141:5:1101:1430:3732/1 -@ERR174310.16 HSQ1008_141:5:1101:1481:3750/1 -@ERR174310.17 HSQ1008_141:5:1101:1592:3555/1 -@ERR174310.18 HSQ1008_141:5:1101:1510:3558/1 -@ERR174310.19 HSQ1008_141:5:1101:1683:3570/1 -@ERR174310.20 HSQ1008_141:5:1101:1538:3573/1 -@ERR174310.21 HSQ1008_141:5:1101:1615:3576/1 -@ERR174310.22 HSQ1008_141:5:1101:1683:3588/1 -@ERR174310.23 HSQ1008_141:5:1101:1659:3593/1 -@ERR174310.24 HSQ1008_141:5:1101:1632:3598/1 -@ERR174310.25 HSQ1008_141:5:1101:1742:3603/1 -@ERR174310.26 HSQ1008_141:5:1101:1533:3608/1 -@ERR174310.27 HSQ1008_141:5:1101:1598:3611/1 -@ERR174310.28 HSQ1008_141:5:1101:1513:3616/1 -@ERR174310.29 HSQ1008_141:5:1101:1704:3621/1 -@ERR174310.30 HSQ1008_141:5:1101:1623:3629/1 -@ERR174310.31 HSQ1008_141:5:1101:1557:3630/1 -@ERR174310.32 HSQ1008_141:5:1101:1609:3644/1 -@ERR174310.33 HSQ1008_141:5:1101:1725:3652/1 -@ERR174310.34 HSQ1008_141:5:1101:1699:3652/1 -@ERR174310.35 HSQ1008_141:5:1101:1635:3659/1 -@ERR174310.36 HSQ1008_141:5:1101:1553:3665/1 -@ERR174310.37 HSQ1008_141:5:1101:1521:3667/1 -@ERR174310.38 HSQ1008_141:5:1101:1667:3668/1 -@ERR174310.39 HSQ1008_141:5:1101:1587:3679/1 -@ERR174310.40 HSQ1008_141:5:1101:1729:3687/1 -@ERR174310.41 HSQ1008_141:5:1101:1635:3687/1 -@ERR174310.42 HSQ1008_141:5:1101:1613:3688/1 -@ERR174310.43 HSQ1008_141:5:1101:1546:3690/1 -@ERR174310.44 HSQ1008_141:5:1101:1677:3695/1 -@ERR174310.45 HSQ1008_141:5:1101:1573:3704/1 -@ERR174310.46 HSQ1008_141:5:1101:1630:3708/1 -@ERR174310.47 HSQ1008_141:5:1101:1738:3709/1 -@ERR174310.48 HSQ1008_141:5:1101:1653:3728/1 -@ERR174310.49 HSQ1008_141:5:1101:1616:3736/1 -@ERR174310.50 HSQ1008_141:5:1101:1574:3743/1 -@ERR174310.51 HSQ1008_141:5:1101:1706:3747/1 -@ERR174310.52 HSQ1008_141:5:1101:1886:3558/1 -@ERR174310.53 HSQ1008_141:5:1101:1762:3566/1 -@ERR174310.54 HSQ1008_141:5:1101:1952:3566/1 -@ERR174310.55 HSQ1008_141:5:1101:1791:3569/1 -@ERR174310.56 HSQ1008_141:5:1101:1838:3570/1 -@ERR174310.57 HSQ1008_141:5:1101:1857:3573/1 -@ERR174310.58 HSQ1008_141:5:1101:1928:3578/1 -@ERR174310.59 HSQ1008_141:5:1101:1985:3579/1 -@ERR174310.60 HSQ1008_141:5:1101:1808:3583/1 -@ERR174310.61 HSQ1008_141:5:1101:1870:3603/1 -@ERR174310.62 HSQ1008_141:5:1101:1900:3604/1 -@ERR174310.63 HSQ1008_141:5:1101:1762:3605/1 -@ERR174310.64 HSQ1008_141:5:1101:1803:3617/1 -@ERR174310.65 HSQ1008_141:5:1101:1851:3619/1 -@ERR174310.66 HSQ1008_141:5:1101:1896:3634/1 -@ERR174310.67 HSQ1008_141:5:1101:1874:3636/1 -@ERR174310.68 HSQ1008_141:5:1101:1979:3641/1 -@ERR174310.69 HSQ1008_141:5:1101:1768:3649/1 -@ERR174310.70 HSQ1008_141:5:1101:1897:3655/1 -@ERR174310.71 HSQ1008_141:5:1101:1948:3658/1 -@ERR174310.72 HSQ1008_141:5:1101:1808:3667/1 -@ERR174310.73 HSQ1008_141:5:1101:1899:3678/1 -@ERR174310.74 HSQ1008_141:5:1101:1881:3684/1 -@ERR174310.75 HSQ1008_141:5:1101:1958:3685/1 -@ERR174310.76 HSQ1008_141:5:1101:1779:3692/1 -@ERR174310.77 HSQ1008_141:5:1101:1856:3694/1 -@ERR174310.78 HSQ1008_141:5:1101:1931:3696/1 -@ERR174310.79 HSQ1008_141:5:1101:1825:3701/1 -@ERR174310.80 HSQ1008_141:5:1101:1955:3712/1 -@ERR174310.81 HSQ1008_141:5:1101:1878:3714/1 -@ERR174310.82 HSQ1008_141:5:1101:1846:3720/1 -@ERR174310.83 HSQ1008_141:5:1101:1995:3720/1 -@ERR174310.84 HSQ1008_141:5:1101:1791:3721/1 -@ERR174310.85 HSQ1008_141:5:1101:1750:3727/1 -@ERR174310.86 HSQ1008_141:5:1101:1940:3747/1 -@ERR174310.87 HSQ1008_141:5:1101:2205:3556/1 -@ERR174310.88 HSQ1008_141:5:1101:2172:3562/1 -@ERR174310.89 HSQ1008_141:5:1101:2102:3572/1 -@ERR174310.90 HSQ1008_141:5:1101:2078:3574/1 -@ERR174310.91 HSQ1008_141:5:1101:2007:3584/1 -@ERR174310.92 HSQ1008_141:5:1101:2104:3601/1 -@ERR174310.93 HSQ1008_141:5:1101:2194:3603/1 -@ERR174310.94 HSQ1008_141:5:1101:2228:3608/1 -@ERR174310.95 HSQ1008_141:5:1101:2005:3610/1 -@ERR174310.96 HSQ1008_141:5:1101:2026:3610/1 -@ERR174310.97 HSQ1008_141:5:1101:2161:3614/1 -@ERR174310.98 HSQ1008_141:5:1101:2058:3624/1 -@ERR174310.99 HSQ1008_141:5:1101:2201:3626/1 -@ERR174310.100 HSQ1008_141:5:1101:2008:3632/1 -@ERR174310.101 HSQ1008_141:5:1101:2117:3634/1 -@ERR174310.102 HSQ1008_141:5:1101:2088:3638/1 -@ERR174310.103 HSQ1008_141:5:1101:2028:3641/1 -@ERR174310.104 HSQ1008_141:5:1101:2009:3654/1 -@ERR174310.105 HSQ1008_141:5:1101:2082:3661/1 -@ERR174310.106 HSQ1008_141:5:1101:2210:3668/1 -@ERR174310.107 HSQ1008_141:5:1101:2027:3673/1 -@ERR174310.108 HSQ1008_141:5:1101:2116:3675/1 -@ERR174310.109 HSQ1008_141:5:1101:2243:3675/1 -@ERR174310.110 HSQ1008_141:5:1101:2055:3676/1 -@ERR174310.111 HSQ1008_141:5:1101:2001:3687/1 -@ERR174310.112 HSQ1008_141:5:1101:2187:3693/1 -@ERR174310.113 HSQ1008_141:5:1101:2134:3708/1 -@ERR174310.114 HSQ1008_141:5:1101:2058:3708/1 -@ERR174310.115 HSQ1008_141:5:1101:2081:3713/1 -@ERR174310.116 HSQ1008_141:5:1101:2159:3716/1 -@ERR174310.117 HSQ1008_141:5:1101:2183:3724/1 -@ERR174310.118 HSQ1008_141:5:1101:2217:3729/1 -@ERR174310.119 HSQ1008_141:5:1101:2090:3730/1 -@ERR174310.120 HSQ1008_141:5:1101:2047:3731/1 -@ERR174310.121 HSQ1008_141:5:1101:2156:3740/1 -@ERR174310.122 HSQ1008_141:5:1101:2120:3741/1 -@ERR174310.123 HSQ1008_141:5:1101:2011:3746/1 -@ERR174310.124 HSQ1008_141:5:1101:2067:3748/1 -@ERR174310.125 HSQ1008_141:5:1101:2195:3749/1 -@ERR174310.126 HSQ1008_141:5:1101:2342:3556/1 -@ERR174310.127 HSQ1008_141:5:1101:2357:3565/1 -@ERR174310.128 HSQ1008_141:5:1101:2455:3569/1 -@ERR174310.129 HSQ1008_141:5:1101:2424:3569/1 -@ERR174310.130 HSQ1008_141:5:1101:2274:3571/1 -@ERR174310.131 HSQ1008_141:5:1101:2409:3571/1 -@ERR174310.132 HSQ1008_141:5:1101:2387:3580/1 -@ERR174310.133 HSQ1008_141:5:1101:2286:3589/1 -@ERR174310.134 HSQ1008_141:5:1101:2435:3591/1 -@ERR174310.135 HSQ1008_141:5:1101:2413:3592/1 -@ERR174310.136 HSQ1008_141:5:1101:2359:3595/1 -@ERR174310.137 HSQ1008_141:5:1101:2462:3596/1 -@ERR174310.138 HSQ1008_141:5:1101:2265:3601/1 -@ERR174310.139 HSQ1008_141:5:1101:2320:3602/1 -@ERR174310.140 HSQ1008_141:5:1101:2401:3603/1 -@ERR174310.141 HSQ1008_141:5:1101:2435:3614/1 -@ERR174310.142 HSQ1008_141:5:1101:2382:3615/1 -@ERR174310.143 HSQ1008_141:5:1101:2295:3624/1 -@ERR174310.144 HSQ1008_141:5:1101:2273:3629/1 -@ERR174310.145 HSQ1008_141:5:1101:2409:3631/1 -@ERR174310.146 HSQ1008_141:5:1101:2459:3637/1 -@ERR174310.147 HSQ1008_141:5:1101:2283:3652/1 -@ERR174310.148 HSQ1008_141:5:1101:2253:3654/1 -@ERR174310.149 HSQ1008_141:5:1101:2375:3663/1 -@ERR174310.150 HSQ1008_141:5:1101:2337:3666/1 -@ERR174310.151 HSQ1008_141:5:1101:2405:3668/1 -@ERR174310.152 HSQ1008_141:5:1101:2439:3670/1 -@ERR174310.153 HSQ1008_141:5:1101:2297:3674/1 -@ERR174310.154 HSQ1008_141:5:1101:2272:3674/1 -@ERR174310.155 HSQ1008_141:5:1101:2468:3680/1 -@ERR174310.156 HSQ1008_141:5:1101:2384:3689/1 -@ERR174310.157 HSQ1008_141:5:1101:2257:3690/1 -@ERR174310.158 HSQ1008_141:5:1101:2448:3695/1 -@ERR174310.159 HSQ1008_141:5:1101:2421:3703/1 -@ERR174310.160 HSQ1008_141:5:1101:2265:3706/1 -@ERR174310.161 HSQ1008_141:5:1101:2286:3709/1 -@ERR174310.162 HSQ1008_141:5:1101:2464:3709/1 -@ERR174310.163 HSQ1008_141:5:1101:2373:3710/1 -@ERR174310.164 HSQ1008_141:5:1101:2309:3715/1 -@ERR174310.165 HSQ1008_141:5:1101:2357:3724/1 -@ERR174310.166 HSQ1008_141:5:1101:2433:3727/1 -@ERR174310.167 HSQ1008_141:5:1101:2256:3728/1 -@ERR174310.168 HSQ1008_141:5:1101:2474:3732/1 -@ERR174310.169 HSQ1008_141:5:1101:2400:3734/1 -@ERR174310.170 HSQ1008_141:5:1101:2380:3739/1 -@ERR174310.171 HSQ1008_141:5:1101:2583:3560/1 -@ERR174310.172 HSQ1008_141:5:1101:2538:3561/1 -@ERR174310.173 HSQ1008_141:5:1101:2728:3563/1 -@ERR174310.174 HSQ1008_141:5:1101:2698:3563/1 -@ERR174310.175 HSQ1008_141:5:1101:2518:3567/1 -@ERR174310.176 HSQ1008_141:5:1101:2585:3579/1 -@ERR174310.177 HSQ1008_141:5:1101:2646:3583/1 -@ERR174310.178 HSQ1008_141:5:1101:2538:3583/1 -@ERR174310.179 HSQ1008_141:5:1101:2587:3599/1 -@ERR174310.180 HSQ1008_141:5:1101:2677:3602/1 -@ERR174310.181 HSQ1008_141:5:1101:2701:3603/1 -@ERR174310.182 HSQ1008_141:5:1101:2541:3613/1 -@ERR174310.183 HSQ1008_141:5:1101:2510:3618/1 -@ERR174310.184 HSQ1008_141:5:1101:2628:3621/1 -@ERR174310.185 HSQ1008_141:5:1101:2597:3625/1 -@ERR174310.186 HSQ1008_141:5:1101:2736:3626/1 -@ERR174310.187 HSQ1008_141:5:1101:2672:3627/1 -@ERR174310.188 HSQ1008_141:5:1101:2594:3644/1 -@ERR174310.189 HSQ1008_141:5:1101:2685:3648/1 -@ERR174310.190 HSQ1008_141:5:1101:2622:3653/1 -@ERR174310.191 HSQ1008_141:5:1101:2548:3657/1 -@ERR174310.192 HSQ1008_141:5:1101:2669:3661/1 -@ERR174310.193 HSQ1008_141:5:1101:2566:3663/1 -@ERR174310.194 HSQ1008_141:5:1101:2626:3672/1 -@ERR174310.195 HSQ1008_141:5:1101:2739:3673/1 -@ERR174310.196 HSQ1008_141:5:1101:2659:3680/1 -@ERR174310.197 HSQ1008_141:5:1101:2523:3684/1 -@ERR174310.198 HSQ1008_141:5:1101:2603:3686/1 -@ERR174310.199 HSQ1008_141:5:1101:2575:3687/1 -@ERR174310.200 HSQ1008_141:5:1101:2721:3687/1 -@ERR174310.201 HSQ1008_141:5:1101:2550:3687/1 -@ERR174310.202 HSQ1008_141:5:1101:2637:3695/1 -@ERR174310.203 HSQ1008_141:5:1101:2625:3706/1 -@ERR174310.204 HSQ1008_141:5:1101:2746:3712/1 -@ERR174310.205 HSQ1008_141:5:1101:2546:3712/1 -@ERR174310.206 HSQ1008_141:5:1101:2573:3722/1 -@ERR174310.207 HSQ1008_141:5:1101:2627:3722/1 -@ERR174310.208 HSQ1008_141:5:1101:2729:3727/1 -@ERR174310.209 HSQ1008_141:5:1101:2650:3732/1 -@ERR174310.210 HSQ1008_141:5:1101:2535:3732/1 -@ERR174310.211 HSQ1008_141:5:1101:2630:3747/1 -@ERR174310.212 HSQ1008_141:5:1101:2579:3748/1 -@ERR174310.213 HSQ1008_141:5:1101:2756:3555/1 -@ERR174310.214 HSQ1008_141:5:1101:2860:3563/1 -@ERR174310.215 HSQ1008_141:5:1101:2826:3568/1 -@ERR174310.216 HSQ1008_141:5:1101:2937:3570/1 -@ERR174310.217 HSQ1008_141:5:1101:2889:3574/1 -@ERR174310.218 HSQ1008_141:5:1101:2811:3582/1 -@ERR174310.219 HSQ1008_141:5:1101:2794:3586/1 -@ERR174310.220 HSQ1008_141:5:1101:2751:3586/1 -@ERR174310.221 HSQ1008_141:5:1101:2953:3587/1 -@ERR174310.222 HSQ1008_141:5:1101:2774:3588/1 -@ERR174310.223 HSQ1008_141:5:1101:2900:3590/1 -@ERR174310.224 HSQ1008_141:5:1101:2824:3596/1 -@ERR174310.225 HSQ1008_141:5:1101:2864:3598/1 -@ERR174310.226 HSQ1008_141:5:1101:2932:3599/1 -@ERR174310.227 HSQ1008_141:5:1101:2943:3610/1 -@ERR174310.228 HSQ1008_141:5:1101:2897:3612/1 -@ERR174310.229 HSQ1008_141:5:1101:2964:3614/1 -@ERR174310.230 HSQ1008_141:5:1101:2992:3620/1 -@ERR174310.231 HSQ1008_141:5:1101:2801:3621/1 -@ERR174310.232 HSQ1008_141:5:1101:2764:3623/1 -@ERR174310.233 HSQ1008_141:5:1101:2916:3624/1 -@ERR174310.234 HSQ1008_141:5:1101:2837:3629/1 -@ERR174310.235 HSQ1008_141:5:1101:2871:3632/1 -@ERR174310.236 HSQ1008_141:5:1101:2932:3636/1 -@ERR174310.237 HSQ1008_141:5:1101:2959:3637/1 -@ERR174310.238 HSQ1008_141:5:1101:2853:3646/1 -@ERR174310.239 HSQ1008_141:5:1101:2777:3656/1 -@ERR174310.240 HSQ1008_141:5:1101:2844:3657/1 -@ERR174310.241 HSQ1008_141:5:1101:2753:3657/1 -@ERR174310.242 HSQ1008_141:5:1101:2914:3658/1 -@ERR174310.243 HSQ1008_141:5:1101:2941:3660/1 -@ERR174310.244 HSQ1008_141:5:1101:2993:3661/1 -@ERR174310.245 HSQ1008_141:5:1101:2832:3671/1 -@ERR174310.246 HSQ1008_141:5:1101:2957:3676/1 -@ERR174310.247 HSQ1008_141:5:1101:2762:3681/1 -@ERR174310.248 HSQ1008_141:5:1101:2990:3686/1 -@ERR174310.249 HSQ1008_141:5:1101:2931:3691/1 -@ERR174310.250 HSQ1008_141:5:1101:2826:3692/1 -@ERR174310.251 HSQ1008_141:5:1101:2807:3700/1 -@ERR174310.252 HSQ1008_141:5:1101:2774:3700/1 -@ERR174310.253 HSQ1008_141:5:1101:2985:3711/1 -@ERR174310.254 HSQ1008_141:5:1101:2929:3716/1 -@ERR174310.255 HSQ1008_141:5:1101:2961:3721/1 -@ERR174310.256 HSQ1008_141:5:1101:2911:3732/1 -@ERR174310.257 HSQ1008_141:5:1101:2997:3739/1 -@ERR174310.258 HSQ1008_141:5:1101:2798:3740/1 -@ERR174310.259 HSQ1008_141:5:1101:2935:3740/1 -@ERR174310.260 HSQ1008_141:5:1101:3130:3561/1 -@ERR174310.261 HSQ1008_141:5:1101:3188:3562/1 -@ERR174310.262 HSQ1008_141:5:1101:3031:3568/1 -@ERR174310.263 HSQ1008_141:5:1101:3088:3572/1 -@ERR174310.264 HSQ1008_141:5:1101:3155:3580/1 -@ERR174310.265 HSQ1008_141:5:1101:3191:3580/1 -@ERR174310.266 HSQ1008_141:5:1101:3171:3586/1 -@ERR174310.267 HSQ1008_141:5:1101:3211:3586/1 -@ERR174310.268 HSQ1008_141:5:1101:3080:3587/1 -@ERR174310.269 HSQ1008_141:5:1101:3062:3600/1 -@ERR174310.270 HSQ1008_141:5:1101:3147:3603/1 -@ERR174310.271 HSQ1008_141:5:1101:3035:3606/1 -@ERR174310.272 HSQ1008_141:5:1101:3089:3608/1 -@ERR174310.273 HSQ1008_141:5:1101:3027:3636/1 -@ERR174310.274 HSQ1008_141:5:1101:3087:3653/1 -@ERR174310.275 HSQ1008_141:5:1101:3170:3654/1 -@ERR174310.276 HSQ1008_141:5:1101:3226:3667/1 -@ERR174310.277 HSQ1008_141:5:1101:3187:3673/1 -@ERR174310.278 HSQ1008_141:5:1101:3016:3675/1 -@ERR174310.279 HSQ1008_141:5:1101:3089:3687/1 -@ERR174310.280 HSQ1008_141:5:1101:3147:3689/1 -@ERR174310.281 HSQ1008_141:5:1101:3114:3690/1 -@ERR174310.282 HSQ1008_141:5:1101:3222:3693/1 -@ERR174310.283 HSQ1008_141:5:1101:3061:3695/1 -@ERR174310.284 HSQ1008_141:5:1101:3188:3704/1 -@ERR174310.285 HSQ1008_141:5:1101:3207:3709/1 -@ERR174310.286 HSQ1008_141:5:1101:3080:3712/1 -@ERR174310.287 HSQ1008_141:5:1101:3229:3715/1 -@ERR174310.288 HSQ1008_141:5:1101:3108:3716/1 -@ERR174310.289 HSQ1008_141:5:1101:3010:3717/1 -@ERR174310.290 HSQ1008_141:5:1101:3141:3718/1 -@ERR174310.291 HSQ1008_141:5:1101:3056:3728/1 -@ERR174310.292 HSQ1008_141:5:1101:3248:3729/1 -@ERR174310.293 HSQ1008_141:5:1101:3203:3737/1 -@ERR174310.294 HSQ1008_141:5:1101:3232:3740/1 -@ERR174310.295 HSQ1008_141:5:1101:3076:3742/1 -@ERR174310.296 HSQ1008_141:5:1101:3378:3555/1 -@ERR174310.297 HSQ1008_141:5:1101:3258:3563/1 -@ERR174310.298 HSQ1008_141:5:1101:3307:3564/1 -@ERR174310.299 HSQ1008_141:5:1101:3407:3564/1 -@ERR174310.300 HSQ1008_141:5:1101:3369:3572/1 -@ERR174310.301 HSQ1008_141:5:1101:3437:3573/1 -@ERR174310.302 HSQ1008_141:5:1101:3334:3577/1 -@ERR174310.303 HSQ1008_141:5:1101:3463:3585/1 -@ERR174310.304 HSQ1008_141:5:1101:3407:3587/1 -@ERR174310.305 HSQ1008_141:5:1101:3442:3593/1 -@ERR174310.306 HSQ1008_141:5:1101:3497:3594/1 -@ERR174310.307 HSQ1008_141:5:1101:3321:3597/1 -@ERR174310.308 HSQ1008_141:5:1101:3253:3605/1 -@ERR174310.309 HSQ1008_141:5:1101:3376:3606/1 -@ERR174310.310 HSQ1008_141:5:1101:3419:3609/1 -@ERR174310.311 HSQ1008_141:5:1101:3295:3609/1 -@ERR174310.312 HSQ1008_141:5:1101:3321:3615/1 -@ERR174310.313 HSQ1008_141:5:1101:3498:3621/1 -@ERR174310.314 HSQ1008_141:5:1101:3347:3621/1 -@ERR174310.315 HSQ1008_141:5:1101:3392:3623/1 -@ERR174310.316 HSQ1008_141:5:1101:3450:3627/1 -@ERR174310.317 HSQ1008_141:5:1101:3474:3628/1 -@ERR174310.318 HSQ1008_141:5:1101:3265:3636/1 -@ERR174310.319 HSQ1008_141:5:1101:3314:3644/1 -@ERR174310.320 HSQ1008_141:5:1101:3453:3650/1 -@ERR174310.321 HSQ1008_141:5:1101:3281:3657/1 -@ERR174310.322 HSQ1008_141:5:1101:3360:3660/1 -@ERR174310.323 HSQ1008_141:5:1101:3389:3662/1 -@ERR174310.324 HSQ1008_141:5:1101:3412:3664/1 -@ERR174310.325 HSQ1008_141:5:1101:3490:3665/1 -@ERR174310.326 HSQ1008_141:5:1101:3255:3667/1 -@ERR174310.327 HSQ1008_141:5:1101:3314:3676/1 -@ERR174310.328 HSQ1008_141:5:1101:3339:3684/1 -@ERR174310.329 HSQ1008_141:5:1101:3489:3685/1 -@ERR174310.330 HSQ1008_141:5:1101:3278:3688/1 -@ERR174310.331 HSQ1008_141:5:1101:3437:3692/1 -@ERR174310.332 HSQ1008_141:5:1101:3405:3697/1 -@ERR174310.333 HSQ1008_141:5:1101:3251:3699/1 -@ERR174310.334 HSQ1008_141:5:1101:3377:3699/1 -@ERR174310.335 HSQ1008_141:5:1101:3453:3707/1 -@ERR174310.336 HSQ1008_141:5:1101:3478:3707/1 -@ERR174310.337 HSQ1008_141:5:1101:3352:3718/1 -@ERR174310.338 HSQ1008_141:5:1101:3391:3723/1 -@ERR174310.339 HSQ1008_141:5:1101:3287:3725/1 -@ERR174310.340 HSQ1008_141:5:1101:3492:3725/1 -@ERR174310.341 HSQ1008_141:5:1101:3329:3731/1 -@ERR174310.342 HSQ1008_141:5:1101:3310:3735/1 -@ERR174310.343 HSQ1008_141:5:1101:3433:3738/1 -@ERR174310.344 HSQ1008_141:5:1101:3473:3739/1 -@ERR174310.345 HSQ1008_141:5:1101:3367:3739/1 -@ERR174310.346 HSQ1008_141:5:1101:3498:3749/1 -@ERR174310.347 HSQ1008_141:5:1101:3502:3556/1 -@ERR174310.348 HSQ1008_141:5:1101:3542:3565/1 -@ERR174310.349 HSQ1008_141:5:1101:3578:3579/1 -@ERR174310.350 HSQ1008_141:5:1101:3749:3584/1 -@ERR174310.351 HSQ1008_141:5:1101:3689:3596/1 -@ERR174310.352 HSQ1008_141:5:1101:3717:3601/1 -@ERR174310.353 HSQ1008_141:5:1101:3543:3611/1 -@ERR174310.354 HSQ1008_141:5:1101:3574:3615/1 -@ERR174310.355 HSQ1008_141:5:1101:3693:3628/1 -@ERR174310.356 HSQ1008_141:5:1101:3622:3629/1 -@ERR174310.357 HSQ1008_141:5:1101:3567:3640/1 -@ERR174310.358 HSQ1008_141:5:1101:3719:3643/1 -@ERR174310.359 HSQ1008_141:5:1101:3523:3658/1 -@ERR174310.360 HSQ1008_141:5:1101:3747:3659/1 -@ERR174310.361 HSQ1008_141:5:1101:3558:3664/1 -@ERR174310.362 HSQ1008_141:5:1101:3712:3674/1 -@ERR174310.363 HSQ1008_141:5:1101:3657:3680/1 -@ERR174310.364 HSQ1008_141:5:1101:3547:3684/1 -@ERR174310.365 HSQ1008_141:5:1101:3574:3686/1 -@ERR174310.366 HSQ1008_141:5:1101:3685:3697/1 -@ERR174310.367 HSQ1008_141:5:1101:3507:3706/1 -@ERR174310.368 HSQ1008_141:5:1101:3611:3714/1 -@ERR174310.369 HSQ1008_141:5:1101:3573:3716/1 -@ERR174310.370 HSQ1008_141:5:1101:3637:3719/1 -@ERR174310.371 HSQ1008_141:5:1101:3668:3725/1 -@ERR174310.372 HSQ1008_141:5:1101:3534:3725/1 -@ERR174310.373 HSQ1008_141:5:1101:3512:3732/1 -@ERR174310.374 HSQ1008_141:5:1101:3629:3743/1 -@ERR174310.375 HSQ1008_141:5:1101:3588:3745/1 -@ERR174310.376 HSQ1008_141:5:1101:3544:3749/1 -@ERR174310.377 HSQ1008_141:5:1101:3870:3559/1 -@ERR174310.378 HSQ1008_141:5:1101:3795:3566/1 -@ERR174310.379 HSQ1008_141:5:1101:3902:3567/1 -@ERR174310.380 HSQ1008_141:5:1101:3978:3569/1 -@ERR174310.381 HSQ1008_141:5:1101:3816:3583/1 -@ERR174310.382 HSQ1008_141:5:1101:3775:3586/1 -@ERR174310.383 HSQ1008_141:5:1101:3924:3591/1 -@ERR174310.384 HSQ1008_141:5:1101:3887:3599/1 -@ERR174310.385 HSQ1008_141:5:1101:3855:3600/1 -@ERR174310.386 HSQ1008_141:5:1101:3988:3605/1 -@ERR174310.387 HSQ1008_141:5:1101:3960:3607/1 -@ERR174310.388 HSQ1008_141:5:1101:3830:3609/1 -@ERR174310.389 HSQ1008_141:5:1101:3793:3616/1 -@ERR174310.390 HSQ1008_141:5:1101:3943:3617/1 -@ERR174310.391 HSQ1008_141:5:1101:3893:3626/1 -@ERR174310.392 HSQ1008_141:5:1101:3993:3633/1 -@ERR174310.393 HSQ1008_141:5:1101:3826:3638/1 -@ERR174310.394 HSQ1008_141:5:1101:3913:3641/1 -@ERR174310.395 HSQ1008_141:5:1101:3855:3642/1 -@ERR174310.396 HSQ1008_141:5:1101:3875:3648/1 -@ERR174310.397 HSQ1008_141:5:1101:3897:3653/1 -@ERR174310.398 HSQ1008_141:5:1101:3804:3655/1 -@ERR174310.399 HSQ1008_141:5:1101:3979:3660/1 -@ERR174310.400 HSQ1008_141:5:1101:3847:3663/1 -@ERR174310.401 HSQ1008_141:5:1101:3915:3663/1 -@ERR174310.402 HSQ1008_141:5:1101:3868:3667/1 -@ERR174310.403 HSQ1008_141:5:1101:3950:3667/1 -@ERR174310.404 HSQ1008_141:5:1101:3885:3671/1 -@ERR174310.405 HSQ1008_141:5:1101:3787:3675/1 -@ERR174310.406 HSQ1008_141:5:1101:3980:3683/1 -@ERR174310.407 HSQ1008_141:5:1101:3835:3690/1 -@ERR174310.408 HSQ1008_141:5:1101:3864:3694/1 -@ERR174310.409 HSQ1008_141:5:1101:3900:3695/1 -@ERR174310.410 HSQ1008_141:5:1101:3924:3695/1 -@ERR174310.411 HSQ1008_141:5:1101:3811:3703/1 -@ERR174310.412 HSQ1008_141:5:1101:3985:3703/1 -@ERR174310.413 HSQ1008_141:5:1101:3845:3709/1 -@ERR174310.414 HSQ1008_141:5:1101:3949:3722/1 -@ERR174310.415 HSQ1008_141:5:1101:3761:3724/1 -@ERR174310.416 HSQ1008_141:5:1101:3993:3728/1 -@ERR174310.417 HSQ1008_141:5:1101:3835:3729/1 -@ERR174310.418 HSQ1008_141:5:1101:3796:3736/1 -@ERR174310.419 HSQ1008_141:5:1101:3924:3737/1 -@ERR174310.420 HSQ1008_141:5:1101:3894:3738/1 -@ERR174310.421 HSQ1008_141:5:1101:4099:3563/1 -@ERR174310.422 HSQ1008_141:5:1101:4054:3564/1 -@ERR174310.423 HSQ1008_141:5:1101:4009:3580/1 -@ERR174310.424 HSQ1008_141:5:1101:4192:3583/1 -@ERR174310.425 HSQ1008_141:5:1101:4104:3583/1 -@ERR174310.426 HSQ1008_141:5:1101:4042:3594/1 -@ERR174310.427 HSQ1008_141:5:1101:4125:3614/1 -@ERR174310.428 HSQ1008_141:5:1101:4020:3614/1 -@ERR174310.429 HSQ1008_141:5:1101:4061:3619/1 -@ERR174310.430 HSQ1008_141:5:1101:4092:3622/1 -@ERR174310.431 HSQ1008_141:5:1101:4032:3634/1 -@ERR174310.432 HSQ1008_141:5:1101:4068:3645/1 -@ERR174310.433 HSQ1008_141:5:1101:4000:3653/1 -@ERR174310.434 HSQ1008_141:5:1101:4234:3668/1 -@ERR174310.435 HSQ1008_141:5:1101:4206:3668/1 -@ERR174310.436 HSQ1008_141:5:1101:4147:3679/1 -@ERR174310.437 HSQ1008_141:5:1101:4068:3687/1 -@ERR174310.438 HSQ1008_141:5:1101:4024:3687/1 -@ERR174310.439 HSQ1008_141:5:1101:4198:3688/1 -@ERR174310.440 HSQ1008_141:5:1101:4001:3688/1 -@ERR174310.441 HSQ1008_141:5:1101:4179:3696/1 -@ERR174310.442 HSQ1008_141:5:1101:4006:3708/1 -@ERR174310.443 HSQ1008_141:5:1101:4190:3711/1 -@ERR174310.444 HSQ1008_141:5:1101:4169:3712/1 -@ERR174310.445 HSQ1008_141:5:1101:4098:3716/1 -@ERR174310.446 HSQ1008_141:5:1101:4061:3717/1 -@ERR174310.447 HSQ1008_141:5:1101:4150:3720/1 -@ERR174310.448 HSQ1008_141:5:1101:4022:3728/1 -@ERR174310.449 HSQ1008_141:5:1101:4095:3739/1 -@ERR174310.450 HSQ1008_141:5:1101:4191:3747/1 -@ERR174310.451 HSQ1008_141:5:1101:4026:3748/1 -@ERR174310.452 HSQ1008_141:5:1101:4336:3566/1 -@ERR174310.453 HSQ1008_141:5:1101:4371:3568/1 -@ERR174310.454 HSQ1008_141:5:1101:4473:3569/1 -@ERR174310.455 HSQ1008_141:5:1101:4298:3572/1 -@ERR174310.456 HSQ1008_141:5:1101:4416:3574/1 -@ERR174310.457 HSQ1008_141:5:1101:4264:3575/1 -@ERR174310.458 HSQ1008_141:5:1101:4393:3576/1 -@ERR174310.459 HSQ1008_141:5:1101:4494:3579/1 -@ERR174310.460 HSQ1008_141:5:1101:4429:3587/1 -@ERR174310.461 HSQ1008_141:5:1101:4470:3590/1 -@ERR174310.462 HSQ1008_141:5:1101:4446:3596/1 -@ERR174310.463 HSQ1008_141:5:1101:4280:3596/1 -@ERR174310.464 HSQ1008_141:5:1101:4323:3597/1 -@ERR174310.465 HSQ1008_141:5:1101:4394:3601/1 -@ERR174310.466 HSQ1008_141:5:1101:4498:3606/1 -@ERR174310.467 HSQ1008_141:5:1101:4475:3612/1 -@ERR174310.468 HSQ1008_141:5:1101:4424:3618/1 -@ERR174310.469 HSQ1008_141:5:1101:4327:3618/1 -@ERR174310.470 HSQ1008_141:5:1101:4254:3620/1 -@ERR174310.471 HSQ1008_141:5:1101:4460:3626/1 -@ERR174310.472 HSQ1008_141:5:1101:4267:3637/1 -@ERR174310.473 HSQ1008_141:5:1101:4433:3641/1 -@ERR174310.474 HSQ1008_141:5:1101:4290:3646/1 -@ERR174310.475 HSQ1008_141:5:1101:4364:3650/1 -@ERR174310.476 HSQ1008_141:5:1101:4339:3652/1 -@ERR174310.477 HSQ1008_141:5:1101:4379:3657/1 -@ERR174310.478 HSQ1008_141:5:1101:4460:3659/1 -@ERR174310.479 HSQ1008_141:5:1101:4309:3662/1 -@ERR174310.480 HSQ1008_141:5:1101:4401:3665/1 -@ERR174310.481 HSQ1008_141:5:1101:4427:3665/1 -@ERR174310.482 HSQ1008_141:5:1101:4267:3666/1 -@ERR174310.483 HSQ1008_141:5:1101:4483:3670/1 -@ERR174310.484 HSQ1008_141:5:1101:4328:3675/1 -@ERR174310.485 HSQ1008_141:5:1101:4346:3679/1 -@ERR174310.486 HSQ1008_141:5:1101:4440:3679/1 -@ERR174310.487 HSQ1008_141:5:1101:4461:3680/1 -@ERR174310.488 HSQ1008_141:5:1101:4386:3691/1 -@ERR174310.489 HSQ1008_141:5:1101:4367:3702/1 -@ERR174310.490 HSQ1008_141:5:1101:4349:3720/1 -@ERR174310.491 HSQ1008_141:5:1101:4314:3725/1 -@ERR174310.492 HSQ1008_141:5:1101:4403:3735/1 -@ERR174310.493 HSQ1008_141:5:1101:4382:3743/1 -@ERR174310.494 HSQ1008_141:5:1101:4572:3561/1 -@ERR174310.495 HSQ1008_141:5:1101:4721:3570/1 -@ERR174310.496 HSQ1008_141:5:1101:4595:3570/1 -@ERR174310.497 HSQ1008_141:5:1101:4672:3574/1 -@ERR174310.498 HSQ1008_141:5:1101:4547:3580/1 -@ERR174310.499 HSQ1008_141:5:1101:4601:3587/1 -@ERR174310.500 HSQ1008_141:5:1101:4654:3597/1 -@ERR174310.501 HSQ1008_141:5:1101:4619:3603/1 -@ERR174310.502 HSQ1008_141:5:1101:4735:3603/1 -@ERR174310.503 HSQ1008_141:5:1101:4591:3608/1 -@ERR174310.504 HSQ1008_141:5:1101:4531:3609/1 -@ERR174310.505 HSQ1008_141:5:1101:4641:3622/1 -@ERR174310.506 HSQ1008_141:5:1101:4551:3628/1 -@ERR174310.507 HSQ1008_141:5:1101:4672:3630/1 -@ERR174310.508 HSQ1008_141:5:1101:4562:3650/1 -@ERR174310.509 HSQ1008_141:5:1101:4692:3650/1 -@ERR174310.510 HSQ1008_141:5:1101:4735:3653/1 -@ERR174310.511 HSQ1008_141:5:1101:4619:3654/1 -@ERR174310.512 HSQ1008_141:5:1101:4677:3663/1 -@ERR174310.513 HSQ1008_141:5:1101:4515:3663/1 -@ERR174310.514 HSQ1008_141:5:1101:4647:3674/1 -@ERR174310.515 HSQ1008_141:5:1101:4536:3678/1 -@ERR174310.516 HSQ1008_141:5:1101:4625:3681/1 -@ERR174310.517 HSQ1008_141:5:1101:4750:3686/1 -@ERR174310.518 HSQ1008_141:5:1101:4509:3690/1 -@ERR174310.519 HSQ1008_141:5:1101:4554:3691/1 -@ERR174310.520 HSQ1008_141:5:1101:4588:3693/1 -@ERR174310.521 HSQ1008_141:5:1101:4663:3695/1 -@ERR174310.522 HSQ1008_141:5:1101:4673:3720/1 -@ERR174310.523 HSQ1008_141:5:1101:4637:3728/1 -@ERR174310.524 HSQ1008_141:5:1101:4581:3734/1 -@ERR174310.525 HSQ1008_141:5:1101:4657:3740/1 -@ERR174310.526 HSQ1008_141:5:1101:4733:3741/1 -@ERR174310.527 HSQ1008_141:5:1101:4687:3747/1 -@ERR174310.528 HSQ1008_141:5:1101:4894:3557/1 -@ERR174310.529 HSQ1008_141:5:1101:4966:3564/1 -@ERR174310.530 HSQ1008_141:5:1101:4761:3566/1 -@ERR174310.531 HSQ1008_141:5:1101:4786:3566/1 -@ERR174310.532 HSQ1008_141:5:1101:4991:3569/1 -@ERR174310.533 HSQ1008_141:5:1101:4908:3575/1 -@ERR174310.534 HSQ1008_141:5:1101:4815:3584/1 -@ERR174310.535 HSQ1008_141:5:1101:4782:3593/1 -@ERR174310.536 HSQ1008_141:5:1101:4992:3595/1 -@ERR174310.537 HSQ1008_141:5:1101:4951:3604/1 -@ERR174310.538 HSQ1008_141:5:1101:4984:3613/1 -@ERR174310.539 HSQ1008_141:5:1101:4912:3614/1 -@ERR174310.540 HSQ1008_141:5:1101:4752:3621/1 -@ERR174310.541 HSQ1008_141:5:1101:4808:3621/1 -@ERR174310.542 HSQ1008_141:5:1101:4933:3629/1 -@ERR174310.543 HSQ1008_141:5:1101:4879:3629/1 -@ERR174310.544 HSQ1008_141:5:1101:4986:3638/1 -@ERR174310.545 HSQ1008_141:5:1101:4969:3638/1 -@ERR174310.546 HSQ1008_141:5:1101:4906:3642/1 -@ERR174310.547 HSQ1008_141:5:1101:4762:3643/1 -@ERR174310.548 HSQ1008_141:5:1101:4952:3647/1 -@ERR174310.549 HSQ1008_141:5:1101:4931:3656/1 -@ERR174310.550 HSQ1008_141:5:1101:4787:3660/1 -@ERR174310.551 HSQ1008_141:5:1101:4888:3667/1 -@ERR174310.552 HSQ1008_141:5:1101:4813:3673/1 -@ERR174310.553 HSQ1008_141:5:1101:4912:3675/1 -@ERR174310.554 HSQ1008_141:5:1101:4943:3678/1 -@ERR174310.555 HSQ1008_141:5:1101:4998:3686/1 -@ERR174310.556 HSQ1008_141:5:1101:4854:3688/1 -@ERR174310.557 HSQ1008_141:5:1101:4800:3700/1 -@ERR174310.558 HSQ1008_141:5:1101:4763:3701/1 -@ERR174310.559 HSQ1008_141:5:1101:4894:3708/1 -@ERR174310.560 HSQ1008_141:5:1101:4848:3717/1 -@ERR174310.561 HSQ1008_141:5:1101:4919:3719/1 -@ERR174310.562 HSQ1008_141:5:1101:4948:3719/1 -@ERR174310.563 HSQ1008_141:5:1101:4877:3725/1 -@ERR174310.564 HSQ1008_141:5:1101:4806:3738/1 -@ERR174310.565 HSQ1008_141:5:1101:4906:3740/1 -@ERR174310.566 HSQ1008_141:5:1101:4873:3742/1 -@ERR174310.567 HSQ1008_141:5:1101:4774:3743/1 -@ERR174310.568 HSQ1008_141:5:1101:4982:3746/1 -@ERR174310.569 HSQ1008_141:5:1101:5099:3557/1 -@ERR174310.570 HSQ1008_141:5:1101:5013:3557/1 -@ERR174310.571 HSQ1008_141:5:1101:5158:3563/1 -@ERR174310.572 HSQ1008_141:5:1101:5047:3565/1 -@ERR174310.573 HSQ1008_141:5:1101:5133:3580/1 -@ERR174310.574 HSQ1008_141:5:1101:5193:3582/1 -@ERR174310.575 HSQ1008_141:5:1101:5026:3586/1 -@ERR174310.576 HSQ1008_141:5:1101:5112:3599/1 -@ERR174310.577 HSQ1008_141:5:1101:5221:3600/1 -@ERR174310.578 HSQ1008_141:5:1101:5151:3608/1 -@ERR174310.579 HSQ1008_141:5:1101:5183:3612/1 -@ERR174310.580 HSQ1008_141:5:1101:5014:3612/1 -@ERR174310.581 HSQ1008_141:5:1101:5066:3615/1 -@ERR174310.582 HSQ1008_141:5:1101:5242:3618/1 -@ERR174310.583 HSQ1008_141:5:1101:5218:3619/1 -@ERR174310.584 HSQ1008_141:5:1101:5145:3634/1 -@ERR174310.585 HSQ1008_141:5:1101:5007:3641/1 -@ERR174310.586 HSQ1008_141:5:1101:5208:3642/1 -@ERR174310.587 HSQ1008_141:5:1101:5124:3645/1 -@ERR174310.588 HSQ1008_141:5:1101:5103:3653/1 -@ERR174310.589 HSQ1008_141:5:1101:5019:3654/1 -@ERR174310.590 HSQ1008_141:5:1101:5172:3658/1 -@ERR174310.591 HSQ1008_141:5:1101:5208:3671/1 -@ERR174310.592 HSQ1008_141:5:1101:5030:3671/1 -@ERR174310.593 HSQ1008_141:5:1101:5082:3676/1 -@ERR174310.594 HSQ1008_141:5:1101:5237:3680/1 -@ERR174310.595 HSQ1008_141:5:1101:5054:3683/1 -@ERR174310.596 HSQ1008_141:5:1101:5110:3688/1 -@ERR174310.597 HSQ1008_141:5:1101:5038:3691/1 -@ERR174310.598 HSQ1008_141:5:1101:5179:3701/1 -@ERR174310.599 HSQ1008_141:5:1101:5046:3702/1 -@ERR174310.600 HSQ1008_141:5:1101:5084:3705/1 -@ERR174310.601 HSQ1008_141:5:1101:5004:3707/1 -@ERR174310.602 HSQ1008_141:5:1101:5231:3709/1 -@ERR174310.603 HSQ1008_141:5:1101:5160:3712/1 -@ERR174310.604 HSQ1008_141:5:1101:5121:3714/1 -@ERR174310.605 HSQ1008_141:5:1101:5060:3718/1 -@ERR174310.606 HSQ1008_141:5:1101:5118:3736/1 -@ERR174310.607 HSQ1008_141:5:1101:5220:3743/1 -@ERR174310.608 HSQ1008_141:5:1101:5081:3744/1 -@ERR174310.609 HSQ1008_141:5:1101:5142:3746/1 -@ERR174310.610 HSQ1008_141:5:1101:5349:3562/1 -@ERR174310.611 HSQ1008_141:5:1101:5282:3565/1 -@ERR174310.612 HSQ1008_141:5:1101:5449:3569/1 -@ERR174310.613 HSQ1008_141:5:1101:5428:3570/1 -@ERR174310.614 HSQ1008_141:5:1101:5478:3571/1 -@ERR174310.615 HSQ1008_141:5:1101:5368:3578/1 -@ERR174310.616 HSQ1008_141:5:1101:5260:3590/1 -@ERR174310.617 HSQ1008_141:5:1101:5407:3592/1 -@ERR174310.618 HSQ1008_141:5:1101:5437:3593/1 -@ERR174310.619 HSQ1008_141:5:1101:5311:3599/1 -@ERR174310.620 HSQ1008_141:5:1101:5289:3608/1 -@ERR174310.621 HSQ1008_141:5:1101:5427:3615/1 -@ERR174310.622 HSQ1008_141:5:1101:5263:3617/1 -@ERR174310.623 HSQ1008_141:5:1101:5357:3620/1 -@ERR174310.624 HSQ1008_141:5:1101:5402:3620/1 -@ERR174310.625 HSQ1008_141:5:1101:5342:3628/1 -@ERR174310.626 HSQ1008_141:5:1101:5483:3630/1 -@ERR174310.627 HSQ1008_141:5:1101:5374:3638/1 -@ERR174310.628 HSQ1008_141:5:1101:5430:3639/1 -@ERR174310.629 HSQ1008_141:5:1101:5322:3641/1 -@ERR174310.630 HSQ1008_141:5:1101:5291:3646/1 -@ERR174310.631 HSQ1008_141:5:1101:5261:3646/1 -@ERR174310.632 HSQ1008_141:5:1101:5486:3649/1 -@ERR174310.633 HSQ1008_141:5:1101:5347:3656/1 -@ERR174310.634 HSQ1008_141:5:1101:5280:3670/1 -@ERR174310.635 HSQ1008_141:5:1101:5485:3672/1 -@ERR174310.636 HSQ1008_141:5:1101:5310:3673/1 -@ERR174310.637 HSQ1008_141:5:1101:5369:3681/1 -@ERR174310.638 HSQ1008_141:5:1101:5263:3684/1 -@ERR174310.639 HSQ1008_141:5:1101:5344:3692/1 -@ERR174310.640 HSQ1008_141:5:1101:5303:3698/1 -@ERR174310.641 HSQ1008_141:5:1101:5362:3702/1 -@ERR174310.642 HSQ1008_141:5:1101:5263:3704/1 -@ERR174310.643 HSQ1008_141:5:1101:5326:3705/1 -@ERR174310.644 HSQ1008_141:5:1101:5429:3706/1 -@ERR174310.645 HSQ1008_141:5:1101:5293:3719/1 -@ERR174310.646 HSQ1008_141:5:1101:5440:3722/1 -@ERR174310.647 HSQ1008_141:5:1101:5413:3736/1 -@ERR174310.648 HSQ1008_141:5:1101:5457:3739/1 -@ERR174310.649 HSQ1008_141:5:1101:5371:3746/1 -@ERR174310.650 HSQ1008_141:5:1101:5291:3748/1 -@ERR174310.651 HSQ1008_141:5:1101:5261:3749/1 -@ERR174310.652 HSQ1008_141:5:1101:5659:3558/1 -@ERR174310.653 HSQ1008_141:5:1101:5579:3558/1 -@ERR174310.654 HSQ1008_141:5:1101:5741:3564/1 -@ERR174310.655 HSQ1008_141:5:1101:5551:3571/1 -@ERR174310.656 HSQ1008_141:5:1101:5691:3573/1 -@ERR174310.657 HSQ1008_141:5:1101:5528:3577/1 -@ERR174310.658 HSQ1008_141:5:1101:5588:3579/1 -@ERR174310.659 HSQ1008_141:5:1101:5628:3579/1 -@ERR174310.660 HSQ1008_141:5:1101:5508:3589/1 -@ERR174310.661 HSQ1008_141:5:1101:5651:3594/1 -@ERR174310.662 HSQ1008_141:5:1101:5670:3594/1 -@ERR174310.663 HSQ1008_141:5:1101:5720:3596/1 -@ERR174310.664 HSQ1008_141:5:1101:5563:3600/1 -@ERR174310.665 HSQ1008_141:5:1101:5594:3601/1 -@ERR174310.666 HSQ1008_141:5:1101:5698:3604/1 -@ERR174310.667 HSQ1008_141:5:1101:5611:3605/1 -@ERR174310.668 HSQ1008_141:5:1101:5536:3612/1 -@ERR174310.669 HSQ1008_141:5:1101:5634:3614/1 -@ERR174310.670 HSQ1008_141:5:1101:5724:3615/1 -@ERR174310.671 HSQ1008_141:5:1101:5507:3622/1 -@ERR174310.672 HSQ1008_141:5:1101:5605:3624/1 -@ERR174310.673 HSQ1008_141:5:1101:5585:3637/1 -@ERR174310.674 HSQ1008_141:5:1101:5717:3637/1 -@ERR174310.675 HSQ1008_141:5:1101:5736:3641/1 -@ERR174310.676 HSQ1008_141:5:1101:5647:3643/1 -@ERR174310.677 HSQ1008_141:5:1101:5517:3653/1 -@ERR174310.678 HSQ1008_141:5:1101:5670:3659/1 -@ERR174310.679 HSQ1008_141:5:1101:5697:3662/1 -@ERR174310.680 HSQ1008_141:5:1101:5738:3664/1 -@ERR174310.681 HSQ1008_141:5:1101:5580:3664/1 -@ERR174310.682 HSQ1008_141:5:1101:5715:3666/1 -@ERR174310.683 HSQ1008_141:5:1101:5628:3669/1 -@ERR174310.684 HSQ1008_141:5:1101:5603:3693/1 -@ERR174310.685 HSQ1008_141:5:1101:5649:3695/1 -@ERR174310.686 HSQ1008_141:5:1101:5561:3702/1 -@ERR174310.687 HSQ1008_141:5:1101:5678:3702/1 -@ERR174310.688 HSQ1008_141:5:1101:5708:3715/1 -@ERR174310.689 HSQ1008_141:5:1101:5549:3723/1 -@ERR174310.690 HSQ1008_141:5:1101:5663:3727/1 -@ERR174310.691 HSQ1008_141:5:1101:5696:3737/1 -@ERR174310.692 HSQ1008_141:5:1101:5603:3746/1 -@ERR174310.693 HSQ1008_141:5:1101:5652:3749/1 -@ERR174310.694 HSQ1008_141:5:1101:5970:3558/1 -@ERR174310.695 HSQ1008_141:5:1101:5990:3558/1 -@ERR174310.696 HSQ1008_141:5:1101:5838:3559/1 -@ERR174310.697 HSQ1008_141:5:1101:5882:3565/1 -@ERR174310.698 HSQ1008_141:5:1101:5996:3577/1 -@ERR174310.699 HSQ1008_141:5:1101:5942:3578/1 -@ERR174310.700 HSQ1008_141:5:1101:5972:3578/1 -@ERR174310.701 HSQ1008_141:5:1101:5797:3586/1 -@ERR174310.702 HSQ1008_141:5:1101:5855:3588/1 -@ERR174310.703 HSQ1008_141:5:1101:5922:3589/1 -@ERR174310.704 HSQ1008_141:5:1101:5752:3593/1 -@ERR174310.705 HSQ1008_141:5:1101:5877:3603/1 -@ERR174310.706 HSQ1008_141:5:1101:5827:3604/1 -@ERR174310.707 HSQ1008_141:5:1101:5784:3606/1 -@ERR174310.708 HSQ1008_141:5:1101:5844:3619/1 -@ERR174310.709 HSQ1008_141:5:1101:5753:3619/1 -@ERR174310.710 HSQ1008_141:5:1101:5991:3621/1 -@ERR174310.711 HSQ1008_141:5:1101:5957:3622/1 -@ERR174310.712 HSQ1008_141:5:1101:5774:3628/1 -@ERR174310.713 HSQ1008_141:5:1101:5951:3640/1 -@ERR174310.714 HSQ1008_141:5:1101:5982:3645/1 -@ERR174310.715 HSQ1008_141:5:1101:5922:3645/1 -@ERR174310.716 HSQ1008_141:5:1101:5902:3647/1 -@ERR174310.717 HSQ1008_141:5:1101:5854:3655/1 -@ERR174310.718 HSQ1008_141:5:1101:5876:3660/1 -@ERR174310.719 HSQ1008_141:5:1101:5945:3662/1 -@ERR174310.720 HSQ1008_141:5:1101:5919:3664/1 -@ERR174310.721 HSQ1008_141:5:1101:5967:3667/1 -@ERR174310.722 HSQ1008_141:5:1101:5991:3670/1 -@ERR174310.723 HSQ1008_141:5:1101:5793:3679/1 -@ERR174310.724 HSQ1008_141:5:1101:5953:3687/1 -@ERR174310.725 HSQ1008_141:5:1101:5846:3690/1 -@ERR174310.726 HSQ1008_141:5:1101:5825:3696/1 -@ERR174310.727 HSQ1008_141:5:1101:5927:3696/1 -@ERR174310.728 HSQ1008_141:5:1101:5982:3699/1 -@ERR174310.729 HSQ1008_141:5:1101:5953:3708/1 -@ERR174310.730 HSQ1008_141:5:1101:5796:3709/1 -@ERR174310.731 HSQ1008_141:5:1101:5906:3717/1 -@ERR174310.732 HSQ1008_141:5:1101:5971:3728/1 -@ERR174310.733 HSQ1008_141:5:1101:5939:3729/1 -@ERR174310.734 HSQ1008_141:5:1101:5984:3745/1 -@ERR174310.735 HSQ1008_141:5:1101:5877:3745/1 -@ERR174310.736 HSQ1008_141:5:1101:6079:3560/1 -@ERR174310.737 HSQ1008_141:5:1101:6226:3560/1 -@ERR174310.738 HSQ1008_141:5:1101:6101:3560/1 -@ERR174310.739 HSQ1008_141:5:1101:6058:3564/1 -@ERR174310.740 HSQ1008_141:5:1101:6010:3567/1 -@ERR174310.741 HSQ1008_141:5:1101:6127:3576/1 -@ERR174310.742 HSQ1008_141:5:1101:6207:3581/1 -@ERR174310.743 HSQ1008_141:5:1101:6141:3590/1 -@ERR174310.744 HSQ1008_141:5:1101:6089:3592/1 -@ERR174310.745 HSQ1008_141:5:1101:6245:3596/1 -@ERR174310.746 HSQ1008_141:5:1101:6192:3597/1 -@ERR174310.747 HSQ1008_141:5:1101:6113:3600/1 -@ERR174310.748 HSQ1008_141:5:1101:6160:3603/1 -@ERR174310.749 HSQ1008_141:5:1101:6181:3613/1 -@ERR174310.750 HSQ1008_141:5:1101:6081:3623/1 -@ERR174310.751 HSQ1008_141:5:1101:6011:3633/1 -@ERR174310.752 HSQ1008_141:5:1101:6039:3634/1 -@ERR174310.753 HSQ1008_141:5:1101:6061:3638/1 -@ERR174310.754 HSQ1008_141:5:1101:6163:3638/1 -@ERR174310.755 HSQ1008_141:5:1101:6207:3646/1 -@ERR174310.756 HSQ1008_141:5:1101:6030:3649/1 -@ERR174310.757 HSQ1008_141:5:1101:6099:3649/1 -@ERR174310.758 HSQ1008_141:5:1101:6004:3655/1 -@ERR174310.759 HSQ1008_141:5:1101:6175:3671/1 -@ERR174310.760 HSQ1008_141:5:1101:6041:3671/1 -@ERR174310.761 HSQ1008_141:5:1101:6232:3677/1 -@ERR174310.762 HSQ1008_141:5:1101:6015:3677/1 -@ERR174310.763 HSQ1008_141:5:1101:6096:3681/1 -@ERR174310.764 HSQ1008_141:5:1101:6160:3690/1 -@ERR174310.765 HSQ1008_141:5:1101:6071:3694/1 -@ERR174310.766 HSQ1008_141:5:1101:6034:3697/1 -@ERR174310.767 HSQ1008_141:5:1101:6127:3700/1 -@ERR174310.768 HSQ1008_141:5:1101:6055:3714/1 -@ERR174310.769 HSQ1008_141:5:1101:6007:3720/1 -@ERR174310.770 HSQ1008_141:5:1101:6232:3738/1 -@ERR174310.771 HSQ1008_141:5:1101:6181:3743/1 -@ERR174310.772 HSQ1008_141:5:1101:6096:3747/1 -@ERR174310.773 HSQ1008_141:5:1101:6480:3558/1 -@ERR174310.774 HSQ1008_141:5:1101:6267:3558/1 -@ERR174310.775 HSQ1008_141:5:1101:6351:3560/1 -@ERR174310.776 HSQ1008_141:5:1101:6435:3561/1 -@ERR174310.777 HSQ1008_141:5:1101:6500:3566/1 -@ERR174310.778 HSQ1008_141:5:1101:6461:3567/1 -@ERR174310.779 HSQ1008_141:5:1101:6370:3570/1 -@ERR174310.780 HSQ1008_141:5:1101:6254:3576/1 -@ERR174310.781 HSQ1008_141:5:1101:6342:3579/1 -@ERR174310.782 HSQ1008_141:5:1101:6467:3581/1 -@ERR174310.783 HSQ1008_141:5:1101:6277:3587/1 -@ERR174310.784 HSQ1008_141:5:1101:6319:3588/1 -@ERR174310.785 HSQ1008_141:5:1101:6372:3592/1 -@ERR174310.786 HSQ1008_141:5:1101:6396:3593/1 -@ERR174310.787 HSQ1008_141:5:1101:6491:3598/1 -@ERR174310.788 HSQ1008_141:5:1101:6447:3605/1 -@ERR174310.789 HSQ1008_141:5:1101:6276:3613/1 -@ERR174310.790 HSQ1008_141:5:1101:6370:3617/1 -@ERR174310.791 HSQ1008_141:5:1101:6303:3618/1 -@ERR174310.792 HSQ1008_141:5:1101:6396:3619/1 -@ERR174310.793 HSQ1008_141:5:1101:6418:3630/1 -@ERR174310.794 HSQ1008_141:5:1101:6255:3637/1 -@ERR174310.795 HSQ1008_141:5:1101:6468:3637/1 -@ERR174310.796 HSQ1008_141:5:1101:6367:3646/1 -@ERR174310.797 HSQ1008_141:5:1101:6294:3648/1 -@ERR174310.798 HSQ1008_141:5:1101:6261:3658/1 -@ERR174310.799 HSQ1008_141:5:1101:6464:3660/1 -@ERR174310.800 HSQ1008_141:5:1101:6441:3662/1 -@ERR174310.801 HSQ1008_141:5:1101:6288:3663/1 -@ERR174310.802 HSQ1008_141:5:1101:6383:3666/1 -@ERR174310.803 HSQ1008_141:5:1101:6490:3674/1 -@ERR174310.804 HSQ1008_141:5:1101:6268:3676/1 -@ERR174310.805 HSQ1008_141:5:1101:6351:3678/1 -@ERR174310.806 HSQ1008_141:5:1101:6407:3682/1 -@ERR174310.807 HSQ1008_141:5:1101:6299:3684/1 -@ERR174310.808 HSQ1008_141:5:1101:6374:3684/1 -@ERR174310.809 HSQ1008_141:5:1101:6440:3689/1 -@ERR174310.810 HSQ1008_141:5:1101:6323:3693/1 -@ERR174310.811 HSQ1008_141:5:1101:6253:3697/1 -@ERR174310.812 HSQ1008_141:5:1101:6301:3705/1 -@ERR174310.813 HSQ1008_141:5:1101:6357:3707/1 -@ERR174310.814 HSQ1008_141:5:1101:6377:3709/1 -@ERR174310.815 HSQ1008_141:5:1101:6481:3710/1 -@ERR174310.816 HSQ1008_141:5:1101:6394:3710/1 -@ERR174310.817 HSQ1008_141:5:1101:6335:3715/1 -@ERR174310.818 HSQ1008_141:5:1101:6257:3718/1 -@ERR174310.819 HSQ1008_141:5:1101:6358:3723/1 -@ERR174310.820 HSQ1008_141:5:1101:6434:3726/1 -@ERR174310.821 HSQ1008_141:5:1101:6407:3730/1 -@ERR174310.822 HSQ1008_141:5:1101:6370:3732/1 -@ERR174310.823 HSQ1008_141:5:1101:6326:3748/1 -@ERR174310.824 HSQ1008_141:5:1101:6688:3560/1 -@ERR174310.825 HSQ1008_141:5:1101:6540:3563/1 -@ERR174310.826 HSQ1008_141:5:1101:6603:3569/1 -@ERR174310.827 HSQ1008_141:5:1101:6566:3574/1 -@ERR174310.828 HSQ1008_141:5:1101:6516:3585/1 -@ERR174310.829 HSQ1008_141:5:1101:6535:3595/1 -@ERR174310.830 HSQ1008_141:5:1101:6560:3599/1 -@ERR174310.831 HSQ1008_141:5:1101:6584:3606/1 -@ERR174310.832 HSQ1008_141:5:1101:6700:3611/1 -@ERR174310.833 HSQ1008_141:5:1101:6641:3616/1 -@ERR174310.834 HSQ1008_141:5:1101:6534:3623/1 -@ERR174310.835 HSQ1008_141:5:1101:6577:3629/1 -@ERR174310.836 HSQ1008_141:5:1101:6666:3632/1 -@ERR174310.837 HSQ1008_141:5:1101:6606:3632/1 -@ERR174310.838 HSQ1008_141:5:1101:6738:3640/1 -@ERR174310.839 HSQ1008_141:5:1101:6528:3649/1 -@ERR174310.840 HSQ1008_141:5:1101:6701:3651/1 -@ERR174310.841 HSQ1008_141:5:1101:6573:3653/1 -@ERR174310.842 HSQ1008_141:5:1101:6611:3659/1 -@ERR174310.843 HSQ1008_141:5:1101:6555:3662/1 -@ERR174310.844 HSQ1008_141:5:1101:6732:3665/1 -@ERR174310.845 HSQ1008_141:5:1101:6631:3671/1 -@ERR174310.846 HSQ1008_141:5:1101:6693:3681/1 -@ERR174310.847 HSQ1008_141:5:1101:6717:3685/1 -@ERR174310.848 HSQ1008_141:5:1101:6510:3692/1 -@ERR174310.849 HSQ1008_141:5:1101:6632:3694/1 -@ERR174310.850 HSQ1008_141:5:1101:6527:3699/1 -@ERR174310.851 HSQ1008_141:5:1101:6668:3704/1 -@ERR174310.852 HSQ1008_141:5:1101:6574:3705/1 -@ERR174310.853 HSQ1008_141:5:1101:6688:3707/1 -@ERR174310.854 HSQ1008_141:5:1101:6621:3712/1 -@ERR174310.855 HSQ1008_141:5:1101:6648:3713/1 -@ERR174310.856 HSQ1008_141:5:1101:6534:3719/1 -@ERR174310.857 HSQ1008_141:5:1101:6705:3724/1 -@ERR174310.858 HSQ1008_141:5:1101:6588:3725/1 -@ERR174310.859 HSQ1008_141:5:1101:6555:3727/1 -@ERR174310.860 HSQ1008_141:5:1101:6626:3738/1 -@ERR174310.861 HSQ1008_141:5:1101:6520:3743/1 -@ERR174310.862 HSQ1008_141:5:1101:6582:3747/1 -@ERR174310.863 HSQ1008_141:5:1101:6702:3749/1 -@ERR174310.864 HSQ1008_141:5:1101:6935:3560/1 -@ERR174310.865 HSQ1008_141:5:1101:6817:3561/1 -@ERR174310.866 HSQ1008_141:5:1101:6913:3568/1 -@ERR174310.867 HSQ1008_141:5:1101:6846:3571/1 -@ERR174310.868 HSQ1008_141:5:1101:6874:3578/1 -@ERR174310.869 HSQ1008_141:5:1101:6989:3579/1 -@ERR174310.870 HSQ1008_141:5:1101:6895:3579/1 -@ERR174310.871 HSQ1008_141:5:1101:6948:3585/1 -@ERR174310.872 HSQ1008_141:5:1101:6870:3597/1 -@ERR174310.873 HSQ1008_141:5:1101:6788:3605/1 -@ERR174310.874 HSQ1008_141:5:1101:6887:3609/1 -@ERR174310.875 HSQ1008_141:5:1101:6817:3611/1 -@ERR174310.876 HSQ1008_141:5:1101:6756:3616/1 -@ERR174310.877 HSQ1008_141:5:1101:6854:3622/1 -@ERR174310.878 HSQ1008_141:5:1101:6958:3625/1 -@ERR174310.879 HSQ1008_141:5:1101:6996:3626/1 -@ERR174310.880 HSQ1008_141:5:1101:6930:3627/1 -@ERR174310.881 HSQ1008_141:5:1101:6878:3629/1 -@ERR174310.882 HSQ1008_141:5:1101:6851:3648/1 -@ERR174310.883 HSQ1008_141:5:1101:6968:3648/1 -@ERR174310.884 HSQ1008_141:5:1101:6904:3649/1 -@ERR174310.885 HSQ1008_141:5:1101:6823:3656/1 -@ERR174310.886 HSQ1008_141:5:1101:6926:3659/1 -@ERR174310.887 HSQ1008_141:5:1101:6756:3663/1 -@ERR174310.888 HSQ1008_141:5:1101:6778:3663/1 -@ERR174310.889 HSQ1008_141:5:1101:6996:3668/1 -@ERR174310.890 HSQ1008_141:5:1101:6889:3673/1 -@ERR174310.891 HSQ1008_141:5:1101:6816:3678/1 -@ERR174310.892 HSQ1008_141:5:1101:6794:3684/1 -@ERR174310.893 HSQ1008_141:5:1101:6929:3684/1 -@ERR174310.894 HSQ1008_141:5:1101:6755:3688/1 -@ERR174310.895 HSQ1008_141:5:1101:6954:3689/1 -@ERR174310.896 HSQ1008_141:5:1101:6993:3693/1 -@ERR174310.897 HSQ1008_141:5:1101:6821:3700/1 -@ERR174310.898 HSQ1008_141:5:1101:6844:3702/1 -@ERR174310.899 HSQ1008_141:5:1101:6755:3706/1 -@ERR174310.900 HSQ1008_141:5:1101:6997:3716/1 -@ERR174310.901 HSQ1008_141:5:1101:6970:3716/1 -@ERR174310.902 HSQ1008_141:5:1101:6826:3726/1 -@ERR174310.903 HSQ1008_141:5:1101:6988:3734/1 -@ERR174310.904 HSQ1008_141:5:1101:6752:3736/1 -@ERR174310.905 HSQ1008_141:5:1101:6789:3740/1 -@ERR174310.906 HSQ1008_141:5:1101:6823:3747/1 -@ERR174310.907 HSQ1008_141:5:1101:6877:3749/1 -@ERR174310.908 HSQ1008_141:5:1101:7166:3565/1 -@ERR174310.909 HSQ1008_141:5:1101:7042:3570/1 -@ERR174310.910 HSQ1008_141:5:1101:7113:3586/1 -@ERR174310.911 HSQ1008_141:5:1101:7063:3614/1 -@ERR174310.912 HSQ1008_141:5:1101:7183:3618/1 -@ERR174310.913 HSQ1008_141:5:1101:7031:3625/1 -@ERR174310.914 HSQ1008_141:5:1101:7119:3635/1 -@ERR174310.915 HSQ1008_141:5:1101:7055:3635/1 -@ERR174310.916 HSQ1008_141:5:1101:7170:3642/1 -@ERR174310.917 HSQ1008_141:5:1101:7148:3646/1 -@ERR174310.918 HSQ1008_141:5:1101:7038:3652/1 -@ERR174310.919 HSQ1008_141:5:1101:7223:3658/1 -@ERR174310.920 HSQ1008_141:5:1101:7097:3660/1 -@ERR174310.921 HSQ1008_141:5:1101:7152:3670/1 -@ERR174310.922 HSQ1008_141:5:1101:7033:3675/1 -@ERR174310.923 HSQ1008_141:5:1101:7075:3677/1 -@ERR174310.924 HSQ1008_141:5:1101:7176:3680/1 -@ERR174310.925 HSQ1008_141:5:1101:7125:3684/1 -@ERR174310.926 HSQ1008_141:5:1101:7099:3688/1 -@ERR174310.927 HSQ1008_141:5:1101:7151:3689/1 -@ERR174310.928 HSQ1008_141:5:1101:7197:3694/1 -@ERR174310.929 HSQ1008_141:5:1101:7079:3702/1 -@ERR174310.930 HSQ1008_141:5:1101:7027:3713/1 -@ERR174310.931 HSQ1008_141:5:1101:7077:3720/1 -@ERR174310.932 HSQ1008_141:5:1101:7130:3725/1 -@ERR174310.933 HSQ1008_141:5:1101:7053:3726/1 -@ERR174310.934 HSQ1008_141:5:1101:7179:3732/1 -@ERR174310.935 HSQ1008_141:5:1101:7081:3736/1 -@ERR174310.936 HSQ1008_141:5:1101:7034:3738/1 -@ERR174310.937 HSQ1008_141:5:1101:7099:3745/1 -@ERR174310.938 HSQ1008_141:5:1101:7227:3746/1 -@ERR174310.939 HSQ1008_141:5:1101:7428:3562/1 -@ERR174310.940 HSQ1008_141:5:1101:7335:3564/1 -@ERR174310.941 HSQ1008_141:5:1101:7309:3566/1 -@ERR174310.942 HSQ1008_141:5:1101:7393:3587/1 -@ERR174310.943 HSQ1008_141:5:1101:7254:3588/1 -@ERR174310.944 HSQ1008_141:5:1101:7465:3589/1 -@ERR174310.945 HSQ1008_141:5:1101:7344:3591/1 -@ERR174310.946 HSQ1008_141:5:1101:7309:3597/1 -@ERR174310.947 HSQ1008_141:5:1101:7361:3605/1 -@ERR174310.948 HSQ1008_141:5:1101:7486:3607/1 -@ERR174310.949 HSQ1008_141:5:1101:7433:3608/1 -@ERR174310.950 HSQ1008_141:5:1101:7260:3616/1 -@ERR174310.951 HSQ1008_141:5:1101:7332:3619/1 -@ERR174310.952 HSQ1008_141:5:1101:7456:3619/1 -@ERR174310.953 HSQ1008_141:5:1101:7356:3626/1 -@ERR174310.954 HSQ1008_141:5:1101:7422:3631/1 -@ERR174310.955 HSQ1008_141:5:1101:7495:3653/1 -@ERR174310.956 HSQ1008_141:5:1101:7455:3656/1 -@ERR174310.957 HSQ1008_141:5:1101:7397:3657/1 -@ERR174310.958 HSQ1008_141:5:1101:7425:3657/1 -@ERR174310.959 HSQ1008_141:5:1101:7340:3658/1 -@ERR174310.960 HSQ1008_141:5:1101:7439:3668/1 -@ERR174310.961 HSQ1008_141:5:1101:7352:3688/1 -@ERR174310.962 HSQ1008_141:5:1101:7290:3693/1 -@ERR174310.963 HSQ1008_141:5:1101:7393:3699/1 -@ERR174310.964 HSQ1008_141:5:1101:7370:3699/1 -@ERR174310.965 HSQ1008_141:5:1101:7484:3701/1 -@ERR174310.966 HSQ1008_141:5:1101:7254:3702/1 -@ERR174310.967 HSQ1008_141:5:1101:7313:3715/1 -@ERR174310.968 HSQ1008_141:5:1101:7372:3718/1 -@ERR174310.969 HSQ1008_141:5:1101:7414:3725/1 -@ERR174310.970 HSQ1008_141:5:1101:7349:3729/1 -@ERR174310.971 HSQ1008_141:5:1101:7470:3731/1 -@ERR174310.972 HSQ1008_141:5:1101:7494:3736/1 -@ERR174310.973 HSQ1008_141:5:1101:7377:3741/1 -@ERR174310.974 HSQ1008_141:5:1101:7321:3741/1 -@ERR174310.975 HSQ1008_141:5:1101:7290:3749/1 -@ERR174310.976 HSQ1008_141:5:1101:7621:3561/1 -@ERR174310.977 HSQ1008_141:5:1101:7679:3562/1 -@ERR174310.978 HSQ1008_141:5:1101:7589:3573/1 -@ERR174310.979 HSQ1008_141:5:1101:7539:3583/1 -@ERR174310.980 HSQ1008_141:5:1101:7657:3586/1 -@ERR174310.981 HSQ1008_141:5:1101:7733:3590/1 -@ERR174310.982 HSQ1008_141:5:1101:7679:3596/1 -@ERR174310.983 HSQ1008_141:5:1101:7692:3601/1 -@ERR174310.984 HSQ1008_141:5:1101:7598:3601/1 -@ERR174310.985 HSQ1008_141:5:1101:7643:3602/1 -@ERR174310.986 HSQ1008_141:5:1101:7509:3602/1 -@ERR174310.987 HSQ1008_141:5:1101:7662:3610/1 -@ERR174310.988 HSQ1008_141:5:1101:7711:3616/1 -@ERR174310.989 HSQ1008_141:5:1101:7574:3626/1 -@ERR174310.990 HSQ1008_141:5:1101:7528:3630/1 -@ERR174310.991 HSQ1008_141:5:1101:7604:3631/1 -@ERR174310.992 HSQ1008_141:5:1101:7655:3636/1 -@ERR174310.993 HSQ1008_141:5:1101:7636:3640/1 -@ERR174310.994 HSQ1008_141:5:1101:7561:3642/1 -@ERR174310.995 HSQ1008_141:5:1101:7549:3650/1 -@ERR174310.996 HSQ1008_141:5:1101:7582:3659/1 -@ERR174310.997 HSQ1008_141:5:1101:7626:3660/1 -@ERR174310.998 HSQ1008_141:5:1101:7708:3669/1 -@ERR174310.999 HSQ1008_141:5:1101:7555:3673/1 -@ERR174310.1000 HSQ1008_141:5:1101:7613:3680/1 diff --git a/src/htslib-1.18/htscodecs/tests/names/02.names b/src/htslib-1.18/htscodecs/tests/names/02.names deleted file mode 100644 index c61dd33..0000000 --- a/src/htslib-1.18/htscodecs/tests/names/02.names +++ /dev/null @@ -1,1000 +0,0 @@ -HSQ1004:134:C0D8DACXX:1:1104:3874:86238 -HSQ1004:134:C0D8DACXX:1:1104:3874:86238 -HSQ1004:134:C0D8DACXX:2:2104:2852:75174 -HSQ1004:134:C0D8DACXX:2:2104:2852:75174 -HSQ1004:134:C0D8DACXX:3:1101:1318:114841 -HSQ1004:134:C0D8DACXX:3:1101:1318:114841 -HSQ1004:134:C0D8DACXX:3:1202:21337:51635 -HSQ1004:134:C0D8DACXX:3:1202:21337:51635 -HSQ1004:134:C0D8DACXX:3:2101:20539:23194 -HSQ1004:134:C0D8DACXX:3:2101:20539:23194 -HSQ1004:134:C0D8DACXX:3:2208:18292:174203 -HSQ1004:134:C0D8DACXX:3:2208:18292:174203 -HSQ1004:134:C0D8DACXX:4:1102:21352:12372 -HSQ1004:134:C0D8DACXX:4:1102:21352:12372 -HSQ1004:134:C0D8DACXX:4:1104:17663:42919 -HSQ1004:134:C0D8DACXX:4:1104:17663:42919 -HSQ1004:134:C0D8DACXX:4:1204:13406:85431 -HSQ1004:134:C0D8DACXX:4:1204:13406:85431 -HSQ1004:134:C0D8DACXX:4:2205:3830:6608 -HSQ1004:134:C0D8DACXX:4:2205:3830:6608 -HSQ1004:134:C0D8DACXX:1:1102:8813:137354 -HSQ1004:134:C0D8DACXX:1:1108:13562:84539 -HSQ1004:134:C0D8DACXX:1:1201:19622:5022 -HSQ1004:134:C0D8DACXX:1:1202:11807:99031 -HSQ1004:134:C0D8DACXX:1:1202:10102:180590 -HSQ1004:134:C0D8DACXX:1:1208:2967:78566 -HSQ1004:134:C0D8DACXX:1:1303:20210:193041 -HSQ1004:134:C0D8DACXX:1:1305:14903:55371 -HSQ1004:134:C0D8DACXX:1:2104:2954:88257 -HSQ1004:134:C0D8DACXX:4:1101:19001:189144 -HSQ1004:134:C0D8DACXX:1:2207:3356:167610 -HSQ1004:134:C0D8DACXX:1:2208:5599:191191 -HSQ1004:134:C0D8DACXX:1:2308:19229:39524 -HSQ1004:134:C0D8DACXX:2:1102:10116:158999 -HSQ1004:134:C0D8DACXX:2:1202:5124:126245 -HSQ1004:134:C0D8DACXX:2:1205:1951:191845 -HSQ1004:134:C0D8DACXX:2:1207:8577:153892 -HSQ1004:134:C0D8DACXX:2:1303:12744:95592 -HSQ1004:134:C0D8DACXX:2:1304:10024:155285 -HSQ1004:134:C0D8DACXX:2:1306:7951:77816 -HSQ1004:134:C0D8DACXX:2:2102:1908:137076 -HSQ1004:134:C0D8DACXX:2:2104:18236:131267 -HSQ1004:134:C0D8DACXX:4:2206:2803:99615 -HSQ1004:134:C0D8DACXX:2:2201:5410:96983 -HSQ1004:134:C0D8DACXX:2:2203:17030:180434 -HSQ1004:134:C0D8DACXX:2:2301:8461:152232 -HSQ1004:134:C0D8DACXX:2:2306:5929:36195 -HSQ1004:134:C0D8DACXX:2:2307:20137:117966 -HSQ1004:134:C0D8DACXX:2:2307:20920:196101 -HSQ1004:134:C0D8DACXX:3:1101:12071:75442 -HSQ1004:134:C0D8DACXX:3:1101:12349:104534 -HSQ1004:134:C0D8DACXX:3:1102:3843:41538 -HSQ1004:134:C0D8DACXX:3:1107:5772:169928 -HSQ1004:134:C0D8DACXX:3:1203:11042:74272 -HSQ1004:134:C0D8DACXX:3:1207:15474:128368 -HSQ1004:134:C0D8DACXX:3:1303:7791:23260 -HSQ1004:134:C0D8DACXX:3:1303:2786:71748 -HSQ1004:134:C0D8DACXX:3:1305:13845:113478 -HSQ1004:134:C0D8DACXX:3:1307:12656:79241 -HSQ1004:134:C0D8DACXX:3:2102:14000:83391 -HSQ1004:134:C0D8DACXX:3:2102:1283:191815 -HSQ1004:134:C0D8DACXX:3:2202:6325:190614 -HSQ1004:134:C0D8DACXX:3:2204:5735:164485 -HSQ1004:134:C0D8DACXX:3:2207:21005:42605 -HSQ1004:134:C0D8DACXX:3:2207:7326:168834 -HSQ1004:134:C0D8DACXX:3:2208:12775:49579 -HSQ1004:134:C0D8DACXX:3:2303:6704:80902 -HSQ1004:134:C0D8DACXX:3:2303:14446:145740 -HSQ1004:134:C0D8DACXX:3:2304:15873:71991 -HSQ1004:134:C0D8DACXX:3:2306:20804:39478 -HSQ1004:134:C0D8DACXX:3:2307:17185:133793 -HSQ1004:134:C0D8DACXX:3:2308:15328:129880 -HSQ1004:134:C0D8DACXX:4:1101:12254:115457 -HSQ1004:134:C0D8DACXX:4:1101:19001:189144 -HSQ1004:134:C0D8DACXX:4:1106:20898:138398 -HSQ1004:134:C0D8DACXX:4:1107:16548:15915 -HSQ1004:134:C0D8DACXX:4:1108:16398:46593 -HSQ1004:134:C0D8DACXX:4:1201:16901:109621 -HSQ1004:134:C0D8DACXX:4:1203:10525:8111 -HSQ1004:134:C0D8DACXX:4:1204:10476:108595 -HSQ1004:134:C0D8DACXX:4:1206:3911:22845 -HSQ1004:134:C0D8DACXX:4:1206:6417:112408 -HSQ1004:134:C0D8DACXX:4:1208:9672:145119 -HSQ1004:134:C0D8DACXX:4:1208:20729:161499 -HSQ1004:134:C0D8DACXX:4:1301:6513:184565 -HSQ1004:134:C0D8DACXX:4:2101:3510:128765 -HSQ1004:134:C0D8DACXX:4:2104:2397:54811 -HSQ1004:134:C0D8DACXX:4:2107:19177:54899 -HSQ1004:134:C0D8DACXX:4:2202:10089:108004 -HSQ1004:134:C0D8DACXX:4:2203:10886:81655 -HSQ1004:134:C0D8DACXX:4:2203:6001:165783 -HSQ1004:134:C0D8DACXX:4:2203:19986:194677 -HSQ1004:134:C0D8DACXX:4:2303:20624:115298 -HSQ1004:134:C0D8DACXX:3:1205:17329:12342 -HSQ1004:134:C0D8DACXX:1:1105:19219:24894 -HSQ1004:134:C0D8DACXX:1:1207:9773:175043 -HSQ1004:134:C0D8DACXX:1:2103:4035:105179 -HSQ1004:134:C0D8DACXX:1:2104:2747:64397 -HSQ1004:134:C0D8DACXX:1:2104:13728:77858 -HSQ1004:134:C0D8DACXX:3:1104:20699:157340 -HSQ1004:134:C0D8DACXX:1:2107:6383:140267 -HSQ1004:134:C0D8DACXX:1:2107:3942:148565 -HSQ1004:134:C0D8DACXX:1:2201:2025:61339 -HSQ1004:134:C0D8DACXX:1:2205:19689:36577 -HSQ1004:134:C0D8DACXX:1:2206:2928:184455 -HSQ1004:134:C0D8DACXX:1:2304:2112:132097 -HSQ1004:134:C0D8DACXX:1:2305:20222:100111 -HSQ1004:134:C0D8DACXX:1:2308:15976:141103 -HSQ1004:134:C0D8DACXX:2:1102:13571:6294 -HSQ1004:134:C0D8DACXX:2:1102:1470:17215 -HSQ1004:134:C0D8DACXX:2:1104:8288:139142 -HSQ1004:134:C0D8DACXX:2:1106:10422:30954 -HSQ1004:134:C0D8DACXX:2:1107:18250:23615 -HSQ1004:134:C0D8DACXX:2:1108:14747:55381 -HSQ1004:134:C0D8DACXX:2:1201:14093:29975 -HSQ1004:134:C0D8DACXX:2:1204:17598:31683 -HSQ1004:134:C0D8DACXX:2:1206:12943:38251 -HSQ1004:134:C0D8DACXX:2:1303:20451:162401 -HSQ1004:134:C0D8DACXX:2:2103:7922:68368 -HSQ1004:134:C0D8DACXX:2:2206:12846:29283 -HSQ1004:134:C0D8DACXX:2:2201:16548:159520 -HSQ1004:134:C0D8DACXX:2:2202:8095:168773 -HSQ1004:134:C0D8DACXX:2:2204:9308:104069 -HSQ1004:134:C0D8DACXX:2:2206:12846:29283 -HSQ1004:134:C0D8DACXX:2:2207:17533:188094 -HSQ1004:134:C0D8DACXX:2:2304:20234:197886 -HSQ1004:134:C0D8DACXX:2:2308:5731:149064 -HSQ1004:134:C0D8DACXX:3:1104:20699:157340 -HSQ1004:134:C0D8DACXX:4:2308:2675:113623 -HSQ1004:134:C0D8DACXX:3:1105:11653:113314 -HSQ1004:134:C0D8DACXX:3:1108:9819:12427 -HSQ1004:134:C0D8DACXX:3:1205:17329:12342 -HSQ1004:134:C0D8DACXX:3:1205:18024:160366 -HSQ1004:134:C0D8DACXX:3:1205:5609:169095 -HSQ1004:134:C0D8DACXX:3:1206:8830:8330 -HSQ1004:134:C0D8DACXX:3:1305:16208:65128 -HSQ1004:134:C0D8DACXX:3:2101:21180:175374 -HSQ1004:134:C0D8DACXX:3:2104:11123:176338 -HSQ1004:134:C0D8DACXX:3:2106:10137:111384 -HSQ1004:134:C0D8DACXX:3:2203:8101:83941 -HSQ1004:134:C0D8DACXX:3:2307:10049:91975 -HSQ1004:134:C0D8DACXX:4:1104:2770:38263 -HSQ1004:134:C0D8DACXX:4:1104:5009:93516 -HSQ1004:134:C0D8DACXX:4:1104:5914:122557 -HSQ1004:134:C0D8DACXX:4:1105:14061:94495 -HSQ1004:134:C0D8DACXX:4:1105:7646:188727 -HSQ1004:134:C0D8DACXX:4:1201:7075:30382 -HSQ1004:134:C0D8DACXX:4:1203:7928:17028 -HSQ1004:134:C0D8DACXX:4:1208:18650:4890 -HSQ1004:134:C0D8DACXX:4:1302:17678:71261 -HSQ1004:134:C0D8DACXX:4:1305:14168:26334 -HSQ1004:134:C0D8DACXX:4:1305:4162:63999 -HSQ1004:134:C0D8DACXX:4:1306:20523:93723 -HSQ1004:134:C0D8DACXX:4:2101:10455:19128 -HSQ1004:134:C0D8DACXX:4:2101:16264:147161 -HSQ1004:134:C0D8DACXX:4:2107:18959:196381 -HSQ1004:134:C0D8DACXX:4:2108:9959:147871 -HSQ1004:134:C0D8DACXX:4:2203:12923:33880 -HSQ1004:134:C0D8DACXX:4:2203:12399:86530 -HSQ1004:134:C0D8DACXX:4:2206:4658:76497 -HSQ1004:134:C0D8DACXX:4:2206:2803:99615 -HSQ1004:134:C0D8DACXX:4:2207:11542:46477 -HSQ1004:134:C0D8DACXX:4:2208:14454:67114 -HSQ1004:134:C0D8DACXX:4:2208:19766:184411 -HSQ1004:134:C0D8DACXX:4:2306:9818:174550 -HSQ1004:134:C0D8DACXX:4:2307:10512:71727 -HSQ1004:134:C0D8DACXX:4:2307:20057:125363 -HSQ1004:134:C0D8DACXX:4:2308:2675:113623 -HSQ1004:134:C0D8DACXX:1:2202:19418:162850 -HSQ1004:134:C0D8DACXX:1:2202:19418:162850 -HSQ1004:134:C0D8DACXX:2:1103:18719:48553 -HSQ1004:134:C0D8DACXX:2:1103:18719:48553 -HSQ1004:134:C0D8DACXX:2:1107:10355:184265 -HSQ1004:134:C0D8DACXX:2:1107:10355:184265 -HSQ1004:134:C0D8DACXX:2:1201:1170:170620 -HSQ1004:134:C0D8DACXX:2:1201:1170:170620 -HSQ1004:134:C0D8DACXX:2:1207:11905:5170 -HSQ1004:134:C0D8DACXX:2:1207:11905:5170 -HSQ1004:134:C0D8DACXX:2:1207:17159:66597 -HSQ1004:134:C0D8DACXX:2:1207:17159:66597 -HSQ1004:134:C0D8DACXX:3:2204:14936:131098 -HSQ1004:134:C0D8DACXX:3:2204:14936:131098 -HSQ1004:134:C0D8DACXX:4:2108:8260:116655 -HSQ1004:134:C0D8DACXX:4:2108:8260:116655 -HSQ1004:134:C0D8DACXX:4:2303:19778:126020 -HSQ1004:134:C0D8DACXX:4:2303:19778:126020 -HSQ1004:134:C0D8DACXX:1:1101:15583:177544 -HSQ1004:134:C0D8DACXX:2:2206:11247:154584 -HSQ1004:134:C0D8DACXX:1:1105:20410:116774 -HSQ1004:134:C0D8DACXX:3:2308:15212:45660 -HSQ1004:134:C0D8DACXX:1:1208:11215:89458 -HSQ1004:134:C0D8DACXX:1:1305:1928:59908 -HSQ1004:134:C0D8DACXX:1:1306:5388:181084 -HSQ1004:134:C0D8DACXX:4:2206:19407:178081 -HSQ1004:134:C0D8DACXX:1:2202:13906:125208 -HSQ1004:134:C0D8DACXX:1:2204:5692:125695 -HSQ1004:134:C0D8DACXX:1:2205:15154:26156 -HSQ1004:134:C0D8DACXX:3:1102:5760:79734 -HSQ1004:134:C0D8DACXX:4:1306:2044:10194 -HSQ1004:134:C0D8DACXX:1:2208:15966:140585 -HSQ1004:134:C0D8DACXX:1:2304:21293:19459 -HSQ1004:134:C0D8DACXX:1:2306:3845:55220 -HSQ1004:134:C0D8DACXX:4:2104:8313:137222 -HSQ1004:134:C0D8DACXX:1:2308:7020:134598 -HSQ1004:134:C0D8DACXX:3:2304:17271:69707 -HSQ1004:134:C0D8DACXX:2:1202:7312:20460 -HSQ1004:134:C0D8DACXX:2:1203:10603:139142 -HSQ1004:134:C0D8DACXX:2:1206:18869:9372 -HSQ1004:134:C0D8DACXX:3:1108:9722:54545 -HSQ1004:134:C0D8DACXX:2:1208:2344:175241 -HSQ1004:134:C0D8DACXX:2:1301:11316:22661 -HSQ1004:134:C0D8DACXX:2:2103:5105:167995 -HSQ1004:134:C0D8DACXX:2:2104:18953:58459 -HSQ1004:134:C0D8DACXX:2:2108:7384:188447 -HSQ1004:134:C0D8DACXX:2:2204:6458:37493 -HSQ1004:134:C0D8DACXX:4:2307:17929:60928 -HSQ1004:134:C0D8DACXX:2:2205:8123:155553 -HSQ1004:134:C0D8DACXX:2:2206:19534:123600 -HSQ1004:134:C0D8DACXX:2:2206:11247:154584 -HSQ1004:134:C0D8DACXX:2:2207:4493:24937 -HSQ1004:134:C0D8DACXX:2:2303:10644:60709 -HSQ1004:134:C0D8DACXX:2:2306:3437:69521 -HSQ1004:134:C0D8DACXX:3:1105:7625:190869 -HSQ1004:134:C0D8DACXX:3:1102:1428:78802 -HSQ1004:134:C0D8DACXX:3:1102:5760:79734 -HSQ1004:134:C0D8DACXX:3:1104:12615:67897 -HSQ1004:134:C0D8DACXX:3:1105:4447:159727 -HSQ1004:134:C0D8DACXX:3:1105:7625:190869 -HSQ1004:134:C0D8DACXX:3:1106:17080:102267 -HSQ1004:134:C0D8DACXX:3:1106:10730:157969 -HSQ1004:134:C0D8DACXX:3:1107:14011:89267 -HSQ1004:134:C0D8DACXX:3:1107:5521:136679 -HSQ1004:134:C0D8DACXX:3:1108:9722:54545 -HSQ1004:134:C0D8DACXX:3:1205:11567:84925 -HSQ1004:134:C0D8DACXX:3:1206:7172:98673 -HSQ1004:134:C0D8DACXX:3:1302:12468:97133 -HSQ1004:134:C0D8DACXX:3:1307:2075:54648 -HSQ1004:134:C0D8DACXX:3:1308:6022:36602 -HSQ1004:134:C0D8DACXX:3:1308:5869:164744 -HSQ1004:134:C0D8DACXX:3:2104:10515:187583 -HSQ1004:134:C0D8DACXX:3:2107:10439:163365 -HSQ1004:134:C0D8DACXX:3:2202:14386:85562 -HSQ1004:134:C0D8DACXX:3:2204:2818:43062 -HSQ1004:134:C0D8DACXX:3:2204:12961:93211 -HSQ1004:134:C0D8DACXX:3:2206:21211:140730 -HSQ1004:134:C0D8DACXX:3:2208:8634:64492 -HSQ1004:134:C0D8DACXX:3:2301:14368:182233 -HSQ1004:134:C0D8DACXX:3:2304:17271:69707 -HSQ1004:134:C0D8DACXX:3:2306:20438:32046 -HSQ1004:134:C0D8DACXX:3:2308:15212:45660 -HSQ1004:134:C0D8DACXX:4:1203:11759:19011 -HSQ1004:134:C0D8DACXX:4:1306:2044:10194 -HSQ1004:134:C0D8DACXX:4:1306:7582:85457 -HSQ1004:134:C0D8DACXX:4:1308:4967:126861 -HSQ1004:134:C0D8DACXX:4:2104:8313:137222 -HSQ1004:134:C0D8DACXX:4:2107:1782:175594 -HSQ1004:134:C0D8DACXX:4:2204:11811:49123 -HSQ1004:134:C0D8DACXX:4:2205:19205:16196 -HSQ1004:134:C0D8DACXX:4:2206:15251:122448 -HSQ1004:134:C0D8DACXX:4:2206:19407:178081 -HSQ1004:134:C0D8DACXX:4:2207:18962:169892 -HSQ1004:134:C0D8DACXX:4:2307:17929:60928 -HSQ1004:134:C0D8DACXX:4:2307:17486:85630 -HSQ1004:134:C0D8DACXX:4:2308:4540:114012 -HSQ1004:134:C0D8DACXX:4:2308:16896:119610 -HSQ1004:134:C0D8DACXX:1:1104:19380:56160 -HSQ1004:134:C0D8DACXX:1:1104:11435:191369 -HSQ1004:134:C0D8DACXX:2:2102:13150:49506 -HSQ1004:134:C0D8DACXX:1:1107:15537:32708 -HSQ1004:134:C0D8DACXX:1:1201:18305:31652 -HSQ1004:134:C0D8DACXX:1:1203:3524:85343 -HSQ1004:134:C0D8DACXX:1:1203:15679:97355 -HSQ1004:134:C0D8DACXX:1:1303:8594:15372 -HSQ1004:134:C0D8DACXX:1:1303:5070:69610 -HSQ1004:134:C0D8DACXX:1:1303:7521:173956 -HSQ1004:134:C0D8DACXX:1:1304:9072:125339 -HSQ1004:134:C0D8DACXX:1:1304:1828:158738 -HSQ1004:134:C0D8DACXX:1:1306:4383:56254 -HSQ1004:134:C0D8DACXX:1:1306:11845:127624 -HSQ1004:134:C0D8DACXX:4:1204:11952:12914 -HSQ1004:134:C0D8DACXX:1:2203:13085:24411 -HSQ1004:134:C0D8DACXX:1:2203:9963:171097 -HSQ1004:134:C0D8DACXX:1:2204:17524:104066 -HSQ1004:134:C0D8DACXX:1:2206:7295:21640 -HSQ1004:134:C0D8DACXX:3:1103:18407:59179 -HSQ1004:134:C0D8DACXX:1:2304:9099:155388 -HSQ1004:134:C0D8DACXX:1:2307:21041:134358 -HSQ1004:134:C0D8DACXX:2:1102:20936:95629 -HSQ1004:134:C0D8DACXX:2:1201:11463:83636 -HSQ1004:134:C0D8DACXX:2:1201:2171:123356 -HSQ1004:134:C0D8DACXX:2:1203:5335:200483 -HSQ1004:134:C0D8DACXX:2:1205:1795:167532 -HSQ1004:134:C0D8DACXX:2:1207:16984:27590 -HSQ1004:134:C0D8DACXX:2:1208:2713:28558 -HSQ1004:134:C0D8DACXX:2:1301:3940:82705 -HSQ1004:134:C0D8DACXX:2:1302:12054:79254 -HSQ1004:134:C0D8DACXX:2:1305:2322:72579 -HSQ1004:134:C0D8DACXX:2:1306:6754:77717 -HSQ1004:134:C0D8DACXX:3:2305:10157:84282 -HSQ1004:134:C0D8DACXX:2:1308:16386:23102 -HSQ1004:134:C0D8DACXX:2:2102:13150:49506 -HSQ1004:134:C0D8DACXX:2:2102:21181:170567 -HSQ1004:134:C0D8DACXX:2:2104:15385:96860 -HSQ1004:134:C0D8DACXX:2:2104:20306:139407 -HSQ1004:134:C0D8DACXX:2:2104:8023:174568 -HSQ1004:134:C0D8DACXX:2:2104:7930:193112 -HSQ1004:134:C0D8DACXX:3:2303:4677:89068 -HSQ1004:134:C0D8DACXX:2:2106:1342:150774 -HSQ1004:134:C0D8DACXX:2:2106:11656:155231 -HSQ1004:134:C0D8DACXX:2:2203:2014:196768 -HSQ1004:134:C0D8DACXX:2:2204:2728:5958 -HSQ1004:134:C0D8DACXX:2:2204:12013:146241 -HSQ1004:134:C0D8DACXX:4:2101:13167:60662 -HSQ1004:134:C0D8DACXX:2:2208:4219:120420 -HSQ1004:134:C0D8DACXX:2:2304:17170:40034 -HSQ1004:134:C0D8DACXX:3:1205:16872:25400 -HSQ1004:134:C0D8DACXX:3:1103:19060:135411 -HSQ1004:134:C0D8DACXX:3:1101:5126:32965 -HSQ1004:134:C0D8DACXX:3:1102:8541:166381 -HSQ1004:134:C0D8DACXX:3:1103:18407:59179 -HSQ1004:134:C0D8DACXX:3:1103:19060:135411 -HSQ1004:134:C0D8DACXX:3:1108:14976:150064 -HSQ1004:134:C0D8DACXX:3:2301:3373:141608 -HSQ1004:134:C0D8DACXX:3:1202:4689:79805 -HSQ1004:134:C0D8DACXX:3:1204:6909:77842 -HSQ1004:134:C0D8DACXX:3:1205:16872:25400 -HSQ1004:134:C0D8DACXX:3:1205:17690:61772 -HSQ1004:134:C0D8DACXX:3:1207:12987:31222 -HSQ1004:134:C0D8DACXX:3:1207:12430:71475 -HSQ1004:134:C0D8DACXX:3:1207:1480:73980 -HSQ1004:134:C0D8DACXX:3:1207:8531:165101 -HSQ1004:134:C0D8DACXX:3:1208:10420:23348 -HSQ1004:134:C0D8DACXX:3:1304:4876:65562 -HSQ1004:134:C0D8DACXX:3:2102:6282:14694 -HSQ1004:134:C0D8DACXX:3:2104:16574:57841 -HSQ1004:134:C0D8DACXX:3:2106:8945:36081 -HSQ1004:134:C0D8DACXX:3:2108:11299:12085 -HSQ1004:134:C0D8DACXX:3:2108:2227:31839 -HSQ1004:134:C0D8DACXX:3:2201:4394:80315 -HSQ1004:134:C0D8DACXX:3:2201:18820:126149 -HSQ1004:134:C0D8DACXX:3:2201:10700:188901 -HSQ1004:134:C0D8DACXX:3:2202:13637:64038 -HSQ1004:134:C0D8DACXX:3:2202:8438:159577 -HSQ1004:134:C0D8DACXX:3:2204:16379:10388 -HSQ1004:134:C0D8DACXX:4:2302:14596:18856 -HSQ1004:134:C0D8DACXX:3:2207:13496:16777 -HSQ1004:134:C0D8DACXX:3:2208:10504:11510 -HSQ1004:134:C0D8DACXX:3:2208:2922:144648 -HSQ1004:134:C0D8DACXX:3:2208:2112:186909 -HSQ1004:134:C0D8DACXX:3:2208:2657:191901 -HSQ1004:134:C0D8DACXX:3:2301:3373:141608 -HSQ1004:134:C0D8DACXX:3:2302:13662:95023 -HSQ1004:134:C0D8DACXX:3:2302:10781:98632 -HSQ1004:134:C0D8DACXX:3:2302:9248:106939 -HSQ1004:134:C0D8DACXX:3:2302:12495:138859 -HSQ1004:134:C0D8DACXX:3:2303:4677:89068 -HSQ1004:134:C0D8DACXX:3:2303:18836:133763 -HSQ1004:134:C0D8DACXX:3:2304:14814:192790 -HSQ1004:134:C0D8DACXX:3:2305:10157:84282 -HSQ1004:134:C0D8DACXX:4:2202:1840:44212 -HSQ1004:134:C0D8DACXX:4:1106:21259:101327 -HSQ1004:134:C0D8DACXX:4:1108:20481:125578 -HSQ1004:134:C0D8DACXX:4:1204:11952:12914 -HSQ1004:134:C0D8DACXX:4:1205:10516:187051 -HSQ1004:134:C0D8DACXX:4:1206:17663:108199 -HSQ1004:134:C0D8DACXX:4:1206:2083:108492 -HSQ1004:134:C0D8DACXX:4:1302:19574:100149 -HSQ1004:134:C0D8DACXX:4:1302:20872:188420 -HSQ1004:134:C0D8DACXX:4:1303:19854:6869 -HSQ1004:134:C0D8DACXX:4:2101:13167:60662 -HSQ1004:134:C0D8DACXX:4:2104:4048:110333 -HSQ1004:134:C0D8DACXX:4:2105:3355:143018 -HSQ1004:134:C0D8DACXX:4:2106:1511:15873 -HSQ1004:134:C0D8DACXX:4:2106:5725:71259 -HSQ1004:134:C0D8DACXX:4:2107:8988:116109 -HSQ1004:134:C0D8DACXX:4:2108:5864:118719 -HSQ1004:134:C0D8DACXX:4:2108:4368:136258 -HSQ1004:134:C0D8DACXX:4:2108:12933:138238 -HSQ1004:134:C0D8DACXX:4:2202:1840:44212 -HSQ1004:134:C0D8DACXX:4:2205:15658:25205 -HSQ1004:134:C0D8DACXX:4:2207:17677:187875 -HSQ1004:134:C0D8DACXX:4:2208:14792:161795 -HSQ1004:134:C0D8DACXX:4:2301:3357:25445 -HSQ1004:134:C0D8DACXX:4:2301:2597:129788 -HSQ1004:134:C0D8DACXX:4:2302:14596:18856 -HSQ1004:134:C0D8DACXX:4:2302:8779:24599 -HSQ1004:134:C0D8DACXX:4:2302:9659:33989 -HSQ1004:134:C0D8DACXX:4:2304:20367:98835 -HSQ1004:134:C0D8DACXX:2:2205:5605:125079 -HSQ1004:134:C0D8DACXX:2:2205:5605:125079 -HSQ1004:134:C0D8DACXX:3:2108:9346:79650 -HSQ1004:134:C0D8DACXX:3:2108:9346:79650 -HSQ1004:134:C0D8DACXX:4:2103:10989:37444 -HSQ1004:134:C0D8DACXX:4:2103:10989:37444 -HSQ1004:134:C0D8DACXX:4:2104:7785:83100 -HSQ1004:134:C0D8DACXX:4:2104:7785:83100 -HSQ1004:134:C0D8DACXX:1:1103:3278:15879 -HSQ1004:134:C0D8DACXX:1:1106:5673:34354 -HSQ1004:134:C0D8DACXX:1:1201:17799:92131 -HSQ1004:134:C0D8DACXX:1:1207:20253:170910 -HSQ1004:134:C0D8DACXX:1:1301:3775:167934 -HSQ1004:134:C0D8DACXX:1:1301:10476:175073 -HSQ1004:134:C0D8DACXX:1:1303:8622:32893 -HSQ1004:134:C0D8DACXX:1:1308:4377:110852 -HSQ1004:134:C0D8DACXX:1:2101:21255:25186 -HSQ1004:134:C0D8DACXX:1:2207:18241:106152 -HSQ1004:134:C0D8DACXX:1:2207:10759:196271 -HSQ1004:134:C0D8DACXX:1:2301:6700:126850 -HSQ1004:134:C0D8DACXX:2:1103:6629:189572 -HSQ1004:134:C0D8DACXX:2:1105:8886:95597 -HSQ1004:134:C0D8DACXX:3:2104:21183:174057 -HSQ1004:134:C0D8DACXX:2:1201:1598:150854 -HSQ1004:134:C0D8DACXX:2:1204:2316:101796 -HSQ1004:134:C0D8DACXX:2:1204:15939:196892 -HSQ1004:134:C0D8DACXX:2:1208:8845:39283 -HSQ1004:134:C0D8DACXX:2:1305:4366:44783 -HSQ1004:134:C0D8DACXX:2:2103:12139:138657 -HSQ1004:134:C0D8DACXX:2:2203:21067:157487 -HSQ1004:134:C0D8DACXX:2:2305:13720:55001 -HSQ1004:134:C0D8DACXX:3:1103:10488:109928 -HSQ1004:134:C0D8DACXX:2:2307:14914:58329 -HSQ1004:134:C0D8DACXX:2:2307:20245:133302 -HSQ1004:134:C0D8DACXX:3:1101:10512:178589 -HSQ1004:134:C0D8DACXX:3:1102:14992:200450 -HSQ1004:134:C0D8DACXX:3:1103:10488:109928 -HSQ1004:134:C0D8DACXX:3:1201:6047:192135 -HSQ1004:134:C0D8DACXX:3:1202:13916:16370 -HSQ1004:134:C0D8DACXX:3:1202:18096:59965 -HSQ1004:134:C0D8DACXX:3:1203:2749:31005 -HSQ1004:134:C0D8DACXX:3:1204:10271:107548 -HSQ1004:134:C0D8DACXX:3:1204:14779:125632 -HSQ1004:134:C0D8DACXX:3:1308:14284:70866 -HSQ1004:134:C0D8DACXX:3:2102:19073:27816 -HSQ1004:134:C0D8DACXX:3:2103:10883:11141 -HSQ1004:134:C0D8DACXX:3:2103:16475:110172 -HSQ1004:134:C0D8DACXX:3:2104:21183:174057 -HSQ1004:134:C0D8DACXX:3:2204:9608:121009 -HSQ1004:134:C0D8DACXX:3:2205:3627:19820 -HSQ1004:134:C0D8DACXX:3:2303:2834:138441 -HSQ1004:134:C0D8DACXX:3:2308:2735:115971 -HSQ1004:134:C0D8DACXX:4:1105:14581:102379 -HSQ1004:134:C0D8DACXX:4:1306:15452:43424 -HSQ1004:134:C0D8DACXX:4:2101:20403:36238 -HSQ1004:134:C0D8DACXX:4:2103:14992:158987 -HSQ1004:134:C0D8DACXX:4:2201:19454:34788 -HSQ1004:134:C0D8DACXX:4:2204:20578:64783 -HSQ1004:134:C0D8DACXX:4:2205:19239:88400 -HSQ1004:134:C0D8DACXX:4:2207:16417:103551 -HSQ1004:134:C0D8DACXX:4:2208:14339:147962 -HSQ1004:134:C0D8DACXX:4:2301:11406:85129 -HSQ1004:134:C0D8DACXX:4:2305:20422:111076 -HSQ1004:134:C0D8DACXX:1:1101:3059:135524 -HSQ1004:134:C0D8DACXX:1:1104:10025:131538 -HSQ1004:134:C0D8DACXX:2:2303:15896:128977 -HSQ1004:134:C0D8DACXX:1:1106:19016:164991 -HSQ1004:134:C0D8DACXX:4:2304:3940:111724 -HSQ1004:134:C0D8DACXX:1:2204:17800:84155 -HSQ1004:134:C0D8DACXX:1:1207:8818:77471 -HSQ1004:134:C0D8DACXX:1:1308:17423:179074 -HSQ1004:134:C0D8DACXX:1:1306:5554:110085 -HSQ1004:134:C0D8DACXX:1:1308:17423:179074 -HSQ1004:134:C0D8DACXX:1:2105:21057:190292 -HSQ1004:134:C0D8DACXX:1:2202:17339:135301 -HSQ1004:134:C0D8DACXX:1:2204:17800:84155 -HSQ1004:134:C0D8DACXX:1:2204:18211:194624 -HSQ1004:134:C0D8DACXX:1:2205:17130:62889 -HSQ1004:134:C0D8DACXX:2:1304:13406:149208 -HSQ1004:134:C0D8DACXX:1:2304:2727:31081 -HSQ1004:134:C0D8DACXX:2:1102:9030:162676 -HSQ1004:134:C0D8DACXX:2:1102:3794:163533 -HSQ1004:134:C0D8DACXX:2:1104:4798:13660 -HSQ1004:134:C0D8DACXX:2:1105:15325:20985 -HSQ1004:134:C0D8DACXX:2:1105:19856:28330 -HSQ1004:134:C0D8DACXX:2:1105:13179:49889 -HSQ1004:134:C0D8DACXX:2:1107:18462:94609 -HSQ1004:134:C0D8DACXX:2:1202:1519:15259 -HSQ1004:134:C0D8DACXX:2:1203:13596:148533 -HSQ1004:134:C0D8DACXX:2:1208:19453:137788 -HSQ1004:134:C0D8DACXX:2:1304:13406:149208 -HSQ1004:134:C0D8DACXX:2:1307:7511:59315 -HSQ1004:134:C0D8DACXX:2:1307:16157:159452 -HSQ1004:134:C0D8DACXX:2:1308:8055:59293 -HSQ1004:134:C0D8DACXX:2:1308:8851:105392 -HSQ1004:134:C0D8DACXX:2:2105:7048:4444 -HSQ1004:134:C0D8DACXX:2:2105:18913:54713 -HSQ1004:134:C0D8DACXX:2:2107:4981:126460 -HSQ1004:134:C0D8DACXX:2:2108:2891:155637 -HSQ1004:134:C0D8DACXX:2:2108:20408:191934 -HSQ1004:134:C0D8DACXX:2:2202:13572:107989 -HSQ1004:134:C0D8DACXX:2:2202:1058:126294 -HSQ1004:134:C0D8DACXX:2:2202:15725:176204 -HSQ1004:134:C0D8DACXX:2:2202:15725:176220 -HSQ1004:134:C0D8DACXX:2:2204:20925:158599 -HSQ1004:134:C0D8DACXX:2:2206:1414:161216 -HSQ1004:134:C0D8DACXX:2:2208:18731:126728 -HSQ1004:134:C0D8DACXX:2:2208:16656:144244 -HSQ1004:134:C0D8DACXX:2:2302:17438:71992 -HSQ1004:134:C0D8DACXX:2:2303:15896:128977 -HSQ1004:134:C0D8DACXX:2:2303:8606:163279 -HSQ1004:134:C0D8DACXX:2:2304:8401:18959 -HSQ1004:134:C0D8DACXX:2:2305:18501:116225 -HSQ1004:134:C0D8DACXX:3:1103:20631:129278 -HSQ1004:134:C0D8DACXX:3:1104:9291:99123 -HSQ1004:134:C0D8DACXX:3:1204:4391:189225 -HSQ1004:134:C0D8DACXX:3:1206:4152:95144 -HSQ1004:134:C0D8DACXX:4:2108:19971:99014 -HSQ1004:134:C0D8DACXX:3:1301:18165:119189 -HSQ1004:134:C0D8DACXX:3:1303:8956:91277 -HSQ1004:134:C0D8DACXX:3:1304:10292:106858 -HSQ1004:134:C0D8DACXX:3:1305:10326:128072 -HSQ1004:134:C0D8DACXX:3:1307:8439:169493 -HSQ1004:134:C0D8DACXX:3:1307:9326:195988 -HSQ1004:134:C0D8DACXX:3:2105:1456:82339 -HSQ1004:134:C0D8DACXX:3:2107:11510:71941 -HSQ1004:134:C0D8DACXX:3:2306:20267:196622 -HSQ1004:134:C0D8DACXX:3:2201:19246:2810 -HSQ1004:134:C0D8DACXX:3:2201:17836:79242 -HSQ1004:134:C0D8DACXX:3:2202:20268:4184 -HSQ1004:134:C0D8DACXX:3:2202:11024:19112 -HSQ1004:134:C0D8DACXX:3:2203:8888:159548 -HSQ1004:134:C0D8DACXX:3:2205:9449:23216 -HSQ1004:134:C0D8DACXX:3:2206:16957:2933 -HSQ1004:134:C0D8DACXX:3:2206:20073:96605 -HSQ1004:134:C0D8DACXX:3:2206:7540:148415 -HSQ1004:134:C0D8DACXX:4:2106:8973:62494 -HSQ1004:134:C0D8DACXX:3:2207:7288:124577 -HSQ1004:134:C0D8DACXX:3:2306:20267:196622 -HSQ1004:134:C0D8DACXX:4:1102:15344:21764 -HSQ1004:134:C0D8DACXX:4:1105:2251:25789 -HSQ1004:134:C0D8DACXX:4:1106:16504:158070 -HSQ1004:134:C0D8DACXX:4:1107:20162:182452 -HSQ1004:134:C0D8DACXX:4:1207:19937:141363 -HSQ1004:134:C0D8DACXX:4:1208:5610:44575 -HSQ1004:134:C0D8DACXX:4:1305:19992:116901 -HSQ1004:134:C0D8DACXX:4:1308:19118:150575 -HSQ1004:134:C0D8DACXX:4:1308:11866:189703 -HSQ1004:134:C0D8DACXX:4:2103:11622:42868 -HSQ1004:134:C0D8DACXX:4:2105:8553:89656 -HSQ1004:134:C0D8DACXX:4:2106:9008:10915 -HSQ1004:134:C0D8DACXX:4:2106:8973:62494 -HSQ1004:134:C0D8DACXX:4:2108:19971:99014 -HSQ1004:134:C0D8DACXX:4:2108:11222:113760 -HSQ1004:134:C0D8DACXX:4:2201:20905:155070 -HSQ1004:134:C0D8DACXX:4:2203:14139:27399 -HSQ1004:134:C0D8DACXX:4:2203:7714:100470 -HSQ1004:134:C0D8DACXX:4:2206:18215:119055 -HSQ1004:134:C0D8DACXX:4:2304:3940:111724 -HSQ1004:134:C0D8DACXX:4:2305:8883:65509 -HSQ1004:134:C0D8DACXX:4:2306:8568:145927 -HSQ1004:134:C0D8DACXX:4:2308:2671:12077 -HSQ1004:134:C0D8DACXX:1:1303:1144:111035 -HSQ1004:134:C0D8DACXX:1:1303:1144:111035 -HSQ1004:134:C0D8DACXX:1:1307:7557:87037 -HSQ1004:134:C0D8DACXX:1:1307:7557:87037 -HSQ1004:134:C0D8DACXX:2:1301:15038:146834 -HSQ1004:134:C0D8DACXX:2:1301:15038:146834 -HSQ1004:134:C0D8DACXX:3:2207:7983:120451 -HSQ1004:134:C0D8DACXX:3:2207:7983:120451 -HSQ1004:134:C0D8DACXX:3:2303:11834:111438 -HSQ1004:134:C0D8DACXX:3:2303:11834:111438 -HSQ1004:134:C0D8DACXX:1:2305:16913:11111 -HSQ1004:134:C0D8DACXX:1:1201:3136:57801 -HSQ1004:134:C0D8DACXX:3:1303:10945:143315 -HSQ1004:134:C0D8DACXX:1:1206:12738:112054 -HSQ1004:134:C0D8DACXX:1:1306:9230:167862 -HSQ1004:134:C0D8DACXX:1:1308:16971:20756 -HSQ1004:134:C0D8DACXX:1:2104:10001:26899 -HSQ1004:134:C0D8DACXX:1:2105:18320:40791 -HSQ1004:134:C0D8DACXX:1:2201:2519:84862 -HSQ1004:134:C0D8DACXX:4:1305:3447:76191 -HSQ1004:134:C0D8DACXX:2:2208:4407:36998 -HSQ1004:134:C0D8DACXX:1:2208:15987:189865 -HSQ1004:134:C0D8DACXX:1:2305:16913:11111 -HSQ1004:134:C0D8DACXX:1:2306:8925:163605 -HSQ1004:134:C0D8DACXX:2:1107:3777:49743 -HSQ1004:134:C0D8DACXX:2:2204:15517:106520 -HSQ1004:134:C0D8DACXX:2:1201:9976:197440 -HSQ1004:134:C0D8DACXX:2:1203:13917:103576 -HSQ1004:134:C0D8DACXX:2:1204:9999:91274 -HSQ1004:134:C0D8DACXX:2:1204:20976:129874 -HSQ1004:134:C0D8DACXX:2:1205:16157:117078 -HSQ1004:134:C0D8DACXX:2:1206:12712:73949 -HSQ1004:134:C0D8DACXX:2:2206:9005:180974 -HSQ1004:134:C0D8DACXX:4:1305:13965:42301 -HSQ1004:134:C0D8DACXX:2:2105:17085:53192 -HSQ1004:134:C0D8DACXX:2:2106:3263:200250 -HSQ1004:134:C0D8DACXX:2:2108:11228:109904 -HSQ1004:134:C0D8DACXX:2:2204:15517:106520 -HSQ1004:134:C0D8DACXX:2:2206:9005:180974 -HSQ1004:134:C0D8DACXX:2:2208:4407:36998 -HSQ1004:134:C0D8DACXX:2:2208:17836:108369 -HSQ1004:134:C0D8DACXX:2:2305:15836:176265 -HSQ1004:134:C0D8DACXX:2:2306:1152:30844 -HSQ1004:134:C0D8DACXX:2:2306:9108:49830 -HSQ1004:134:C0D8DACXX:3:1101:7262:135419 -HSQ1004:134:C0D8DACXX:3:1201:9002:137345 -HSQ1004:134:C0D8DACXX:3:1201:20177:157397 -HSQ1004:134:C0D8DACXX:3:1202:7343:185129 -HSQ1004:134:C0D8DACXX:3:1203:16310:97155 -HSQ1004:134:C0D8DACXX:3:1203:13476:199956 -HSQ1004:134:C0D8DACXX:3:1208:15267:173459 -HSQ1004:134:C0D8DACXX:3:1302:3118:67188 -HSQ1004:134:C0D8DACXX:3:1303:10945:143315 -HSQ1004:134:C0D8DACXX:3:1304:1165:153366 -HSQ1004:134:C0D8DACXX:3:1306:19012:91689 -HSQ1004:134:C0D8DACXX:3:1307:19748:43989 -HSQ1004:134:C0D8DACXX:3:2102:12965:23155 -HSQ1004:134:C0D8DACXX:3:2105:7313:125463 -HSQ1004:134:C0D8DACXX:3:2105:3974:151446 -HSQ1004:134:C0D8DACXX:3:2108:5572:145956 -HSQ1004:134:C0D8DACXX:3:2204:5238:182461 -HSQ1004:134:C0D8DACXX:3:2302:16765:75830 -HSQ1004:134:C0D8DACXX:3:2303:1062:76466 -HSQ1004:134:C0D8DACXX:3:2304:11957:38950 -HSQ1004:134:C0D8DACXX:3:2306:7985:178368 -HSQ1004:134:C0D8DACXX:4:1102:4003:6144 -HSQ1004:134:C0D8DACXX:4:1107:5210:10552 -HSQ1004:134:C0D8DACXX:4:1107:13236:74055 -HSQ1004:134:C0D8DACXX:4:1203:14898:87378 -HSQ1004:134:C0D8DACXX:4:1204:15154:166512 -HSQ1004:134:C0D8DACXX:4:1205:13715:8043 -HSQ1004:134:C0D8DACXX:4:1207:2620:149509 -HSQ1004:134:C0D8DACXX:4:1208:5585:200602 -HSQ1004:134:C0D8DACXX:4:1303:15962:85658 -HSQ1004:134:C0D8DACXX:4:1305:13965:42301 -HSQ1004:134:C0D8DACXX:4:1305:3447:76191 -HSQ1004:134:C0D8DACXX:4:1306:16488:158701 -HSQ1004:134:C0D8DACXX:4:1308:19143:93849 -HSQ1004:134:C0D8DACXX:4:1308:15073:127184 -HSQ1004:134:C0D8DACXX:4:2102:1687:105508 -HSQ1004:134:C0D8DACXX:4:2107:5662:51269 -HSQ1004:134:C0D8DACXX:4:2202:11274:10169 -HSQ1004:134:C0D8DACXX:4:2203:7808:182955 -HSQ1004:134:C0D8DACXX:4:2207:11910:105270 -HSQ1004:134:C0D8DACXX:4:2207:11910:105270 -HSQ1004:134:C0D8DACXX:4:2207:5677:126040 -HSQ1004:134:C0D8DACXX:4:2302:1704:105189 -HSQ1004:134:C0D8DACXX:4:2306:21311:149572 -HSQ1004:134:C0D8DACXX:4:2308:9990:77857 -HSQ1004:134:C0D8DACXX:1:1102:11075:87068 -HSQ1004:134:C0D8DACXX:1:1104:20659:44264 -HSQ1004:134:C0D8DACXX:1:1206:12301:134805 -HSQ1004:134:C0D8DACXX:1:1201:17626:98637 -HSQ1004:134:C0D8DACXX:1:1202:9711:106291 -HSQ1004:134:C0D8DACXX:1:1204:14976:34568 -HSQ1004:134:C0D8DACXX:1:1206:12301:134805 -HSQ1004:134:C0D8DACXX:1:1207:16691:37194 -HSQ1004:134:C0D8DACXX:1:1208:19856:109149 -HSQ1004:134:C0D8DACXX:4:1205:3041:176593 -HSQ1004:134:C0D8DACXX:1:1308:17406:62759 -HSQ1004:134:C0D8DACXX:1:2103:9294:2995 -HSQ1004:134:C0D8DACXX:1:2203:8211:100769 -HSQ1004:134:C0D8DACXX:1:2203:12416:161662 -HSQ1004:134:C0D8DACXX:1:2203:5237:164095 -HSQ1004:134:C0D8DACXX:2:2103:13067:110902 -HSQ1004:134:C0D8DACXX:3:2301:9960:80554 -HSQ1004:134:C0D8DACXX:1:2303:8093:30706 -HSQ1004:134:C0D8DACXX:1:2303:4914:110911 -HSQ1004:134:C0D8DACXX:1:2305:15864:137828 -HSQ1004:134:C0D8DACXX:2:2204:12959:114270 -HSQ1004:134:C0D8DACXX:1:2306:20519:14103 -HSQ1004:134:C0D8DACXX:1:2307:20854:76957 -HSQ1004:134:C0D8DACXX:1:2307:6982:94439 -HSQ1004:134:C0D8DACXX:2:1101:11309:5283 -HSQ1004:134:C0D8DACXX:2:1104:3860:45377 -HSQ1004:134:C0D8DACXX:2:1104:16117:108766 -HSQ1004:134:C0D8DACXX:4:1206:20994:22415 -HSQ1004:134:C0D8DACXX:2:1105:20890:79715 -HSQ1004:134:C0D8DACXX:2:1107:13204:183989 -HSQ1004:134:C0D8DACXX:2:1107:3223:197017 -HSQ1004:134:C0D8DACXX:2:1203:13013:139237 -HSQ1004:134:C0D8DACXX:2:1303:18973:161388 -HSQ1004:134:C0D8DACXX:2:1304:1314:176509 -HSQ1004:134:C0D8DACXX:2:1307:1800:28262 -HSQ1004:134:C0D8DACXX:2:2101:18593:111614 -HSQ1004:134:C0D8DACXX:2:2102:5833:66770 -HSQ1004:134:C0D8DACXX:2:2103:13067:110902 -HSQ1004:134:C0D8DACXX:2:2104:2249:114080 -HSQ1004:134:C0D8DACXX:2:2104:11744:137901 -HSQ1004:134:C0D8DACXX:2:2105:17576:103968 -HSQ1004:134:C0D8DACXX:2:2105:3720:108317 -HSQ1004:134:C0D8DACXX:2:2105:17194:127835 -HSQ1004:134:C0D8DACXX:2:2203:1268:173169 -HSQ1004:134:C0D8DACXX:2:2204:12959:114270 -HSQ1004:134:C0D8DACXX:2:2206:16446:164819 -HSQ1004:134:C0D8DACXX:2:2207:15657:14907 -HSQ1004:134:C0D8DACXX:2:2207:13509:126184 -HSQ1004:134:C0D8DACXX:2:2301:17458:140449 -HSQ1004:134:C0D8DACXX:2:2304:15633:181113 -HSQ1004:134:C0D8DACXX:2:2305:4379:111007 -HSQ1004:134:C0D8DACXX:2:2305:17290:157620 -HSQ1004:134:C0D8DACXX:4:1101:14374:63931 -HSQ1004:134:C0D8DACXX:3:1103:12043:52062 -HSQ1004:134:C0D8DACXX:3:1103:20542:53030 -HSQ1004:134:C0D8DACXX:3:1104:8839:18549 -HSQ1004:134:C0D8DACXX:3:1105:15667:153728 -HSQ1004:134:C0D8DACXX:3:1107:14273:116451 -HSQ1004:134:C0D8DACXX:3:1201:10616:79818 -HSQ1004:134:C0D8DACXX:3:1202:19716:16239 -HSQ1004:134:C0D8DACXX:3:1203:11214:128181 -HSQ1004:134:C0D8DACXX:3:1205:9553:40329 -HSQ1004:134:C0D8DACXX:3:1304:11403:47185 -HSQ1004:134:C0D8DACXX:3:1304:8580:118622 -HSQ1004:134:C0D8DACXX:3:1306:17014:171730 -HSQ1004:134:C0D8DACXX:3:1307:12710:82400 -HSQ1004:134:C0D8DACXX:3:2102:4391:59408 -HSQ1004:134:C0D8DACXX:3:2105:13566:61259 -HSQ1004:134:C0D8DACXX:3:2106:15797:37086 -HSQ1004:134:C0D8DACXX:3:2108:19629:34824 -HSQ1004:134:C0D8DACXX:3:2201:16180:154896 -HSQ1004:134:C0D8DACXX:3:2203:9088:101600 -HSQ1004:134:C0D8DACXX:3:2208:7003:130695 -HSQ1004:134:C0D8DACXX:3:2301:14751:7000 -HSQ1004:134:C0D8DACXX:3:2301:9960:80554 -HSQ1004:134:C0D8DACXX:4:1101:14374:63931 -HSQ1004:134:C0D8DACXX:4:1102:1957:119264 -HSQ1004:134:C0D8DACXX:4:1104:6220:172163 -HSQ1004:134:C0D8DACXX:4:1104:14152:179506 -HSQ1004:134:C0D8DACXX:4:1105:4279:191367 -HSQ1004:134:C0D8DACXX:4:1106:8274:63167 -HSQ1004:134:C0D8DACXX:4:1107:18609:37537 -HSQ1004:134:C0D8DACXX:4:1107:8513:163179 -HSQ1004:134:C0D8DACXX:4:1204:5275:87399 -HSQ1004:134:C0D8DACXX:4:1205:3041:176593 -HSQ1004:134:C0D8DACXX:4:1205:12477:187614 -HSQ1004:134:C0D8DACXX:4:1206:20994:22415 -HSQ1004:134:C0D8DACXX:4:1208:12000:77370 -HSQ1004:134:C0D8DACXX:4:1306:9089:47869 -HSQ1004:134:C0D8DACXX:4:2101:6877:20065 -HSQ1004:134:C0D8DACXX:4:2108:2492:65996 -HSQ1004:134:C0D8DACXX:4:2108:14424:130896 -HSQ1004:134:C0D8DACXX:4:2108:4408:141628 -HSQ1004:134:C0D8DACXX:4:2201:18088:45917 -HSQ1004:134:C0D8DACXX:4:2206:12234:167323 -HSQ1004:134:C0D8DACXX:4:2301:7206:133810 -HSQ1004:134:C0D8DACXX:4:2303:16251:185476 -HSQ1004:134:C0D8DACXX:4:2305:20668:60558 -HSQ1004:134:C0D8DACXX:4:2305:2514:168205 -HSQ1004:134:C0D8DACXX:4:2307:21275:16769 -HSQ1004:134:C0D8DACXX:4:2307:15766:88145 -HSQ1004:134:C0D8DACXX:4:1207:17961:91520 -HSQ1004:134:C0D8DACXX:4:1207:17961:91520 -HSQ1004:134:C0D8DACXX:1:1203:19873:58674 -HSQ1004:134:C0D8DACXX:1:1203:19873:58674 -HSQ1004:134:C0D8DACXX:1:1306:8789:67299 -HSQ1004:134:C0D8DACXX:1:1306:8789:67299 -HSQ1004:134:C0D8DACXX:2:2107:7373:62174 -HSQ1004:134:C0D8DACXX:2:2107:7373:62174 -HSQ1004:134:C0D8DACXX:2:2108:10892:159997 -HSQ1004:134:C0D8DACXX:2:2108:10892:159997 -HSQ1004:134:C0D8DACXX:2:2302:1333:195400 -HSQ1004:134:C0D8DACXX:2:2302:1333:195400 -HSQ1004:134:C0D8DACXX:4:1102:19442:136443 -HSQ1004:134:C0D8DACXX:4:1102:19442:136443 -HSQ1004:134:C0D8DACXX:4:1104:13876:147755 -HSQ1004:134:C0D8DACXX:4:1104:13876:147755 -HSQ1004:134:C0D8DACXX:4:1204:15085:87620 -HSQ1004:134:C0D8DACXX:4:1204:15085:87620 -HSQ1004:134:C0D8DACXX:4:1208:1151:145803 -HSQ1004:134:C0D8DACXX:4:1208:1151:145803 -HSQ1004:134:C0D8DACXX:1:1102:4822:190403 -HSQ1004:134:C0D8DACXX:1:1104:2947:96938 -HSQ1004:134:C0D8DACXX:1:1105:2184:184250 -HSQ1004:134:C0D8DACXX:4:2101:9703:86514 -HSQ1004:134:C0D8DACXX:4:2207:5514:12083 -HSQ1004:134:C0D8DACXX:1:1201:20290:11969 -HSQ1004:134:C0D8DACXX:1:1202:18172:3456 -HSQ1004:134:C0D8DACXX:1:1205:13281:66081 -HSQ1004:134:C0D8DACXX:1:1206:8570:64722 -HSQ1004:134:C0D8DACXX:4:2207:7458:38617 -HSQ1004:134:C0D8DACXX:1:1207:6423:73315 -HSQ1004:134:C0D8DACXX:1:1208:8551:50015 -HSQ1004:134:C0D8DACXX:1:1208:10204:53989 -HSQ1004:134:C0D8DACXX:1:1208:19785:114078 -HSQ1004:134:C0D8DACXX:1:1304:5122:38729 -HSQ1004:134:C0D8DACXX:1:1305:7019:191850 -HSQ1004:134:C0D8DACXX:1:1308:20293:81540 -HSQ1004:134:C0D8DACXX:1:2107:9932:22536 -HSQ1004:134:C0D8DACXX:1:2202:20473:27802 -HSQ1004:134:C0D8DACXX:1:2202:20473:27802 -HSQ1004:134:C0D8DACXX:1:2207:3710:121218 -HSQ1004:134:C0D8DACXX:1:2207:12181:174602 -HSQ1004:134:C0D8DACXX:4:2105:11900:191711 -HSQ1004:134:C0D8DACXX:1:2304:2001:149871 -HSQ1004:134:C0D8DACXX:1:2305:11856:93915 -HSQ1004:134:C0D8DACXX:1:2305:16432:191098 -HSQ1004:134:C0D8DACXX:4:1302:12969:76099 -HSQ1004:134:C0D8DACXX:1:2307:14327:101482 -HSQ1004:134:C0D8DACXX:4:2205:16836:170561 -HSQ1004:134:C0D8DACXX:1:2308:6784:153723 -HSQ1004:134:C0D8DACXX:2:2103:17057:181576 -HSQ1004:134:C0D8DACXX:2:1107:17891:15739 -HSQ1004:134:C0D8DACXX:4:2108:15346:36178 -HSQ1004:134:C0D8DACXX:2:1107:19469:32547 -HSQ1004:134:C0D8DACXX:3:1205:1685:5860 -HSQ1004:134:C0D8DACXX:2:1108:16630:125669 -HSQ1004:134:C0D8DACXX:2:1206:8500:88984 -HSQ1004:134:C0D8DACXX:2:1207:16079:68640 -HSQ1004:134:C0D8DACXX:2:1301:17331:153233 -HSQ1004:134:C0D8DACXX:2:1304:11886:98869 -HSQ1004:134:C0D8DACXX:2:1306:14848:155532 -HSQ1004:134:C0D8DACXX:2:1307:7167:86300 -HSQ1004:134:C0D8DACXX:2:2206:5387:14968 -HSQ1004:134:C0D8DACXX:2:1307:4863:113878 -HSQ1004:134:C0D8DACXX:2:1307:5190:178496 -HSQ1004:134:C0D8DACXX:1:1105:12529:138294 -HSQ1004:134:C0D8DACXX:2:2101:15015:138887 -HSQ1004:134:C0D8DACXX:2:2103:17057:181576 -HSQ1004:134:C0D8DACXX:2:2106:13474:18229 -HSQ1004:134:C0D8DACXX:2:2108:16470:127139 -HSQ1004:134:C0D8DACXX:2:2206:5387:14968 -HSQ1004:134:C0D8DACXX:2:2206:16562:18160 -HSQ1004:134:C0D8DACXX:2:2207:17734:16523 -HSQ1004:134:C0D8DACXX:2:2208:2272:136518 -HSQ1004:134:C0D8DACXX:2:2302:21232:21965 -HSQ1004:134:C0D8DACXX:2:2306:8874:88711 -HSQ1004:134:C0D8DACXX:4:1302:6750:50270 -HSQ1004:134:C0D8DACXX:2:2307:10356:157631 -HSQ1004:134:C0D8DACXX:3:1104:20203:109713 -HSQ1004:134:C0D8DACXX:3:1104:18171:153532 -HSQ1004:134:C0D8DACXX:3:2108:10435:112054 -HSQ1004:134:C0D8DACXX:3:1105:2282:89927 -HSQ1004:134:C0D8DACXX:3:1204:7351:8348 -HSQ1004:134:C0D8DACXX:3:1205:1685:5860 -HSQ1004:134:C0D8DACXX:3:1206:17720:134434 -HSQ1004:134:C0D8DACXX:3:1208:7499:64307 -HSQ1004:134:C0D8DACXX:3:1301:10812:68527 -HSQ1004:134:C0D8DACXX:3:1302:1418:82329 -HSQ1004:134:C0D8DACXX:3:1306:10655:167583 -HSQ1004:134:C0D8DACXX:3:2102:1879:130363 -HSQ1004:134:C0D8DACXX:3:2103:3038:46837 -HSQ1004:134:C0D8DACXX:3:2107:13514:13360 -HSQ1004:134:C0D8DACXX:3:2107:16596:34354 -HSQ1004:134:C0D8DACXX:3:2108:10435:112054 -HSQ1004:134:C0D8DACXX:4:2208:15683:171576 -HSQ1004:134:C0D8DACXX:3:2208:20064:91762 -HSQ1004:134:C0D8DACXX:3:2208:14618:200465 -HSQ1004:134:C0D8DACXX:3:2301:15604:127420 -HSQ1004:134:C0D8DACXX:4:2106:15050:22730 -HSQ1004:134:C0D8DACXX:3:2303:7985:19398 -HSQ1004:134:C0D8DACXX:3:2306:16692:171568 -HSQ1004:134:C0D8DACXX:4:1203:21181:70294 -HSQ1004:134:C0D8DACXX:4:1205:6271:103731 -HSQ1004:134:C0D8DACXX:4:1302:6750:50270 -HSQ1004:134:C0D8DACXX:4:1302:2913:71129 -HSQ1004:134:C0D8DACXX:4:1302:12969:76099 -HSQ1004:134:C0D8DACXX:4:1303:3536:63766 -HSQ1004:134:C0D8DACXX:4:1304:10676:25792 -HSQ1004:134:C0D8DACXX:4:1306:7479:12261 -HSQ1004:134:C0D8DACXX:4:1306:18081:160742 -HSQ1004:134:C0D8DACXX:4:1306:19429:163974 -HSQ1004:134:C0D8DACXX:4:2101:9088:22057 -HSQ1004:134:C0D8DACXX:4:2101:9703:86514 -HSQ1004:134:C0D8DACXX:4:2104:8461:181054 -HSQ1004:134:C0D8DACXX:4:2105:11900:191711 -HSQ1004:134:C0D8DACXX:4:2106:15050:22730 -HSQ1004:134:C0D8DACXX:4:2106:13472:44763 -HSQ1004:134:C0D8DACXX:4:2108:15346:36178 -HSQ1004:134:C0D8DACXX:4:2108:5245:194148 -HSQ1004:134:C0D8DACXX:4:2205:16836:170561 -HSQ1004:134:C0D8DACXX:4:2206:11887:146897 -HSQ1004:134:C0D8DACXX:4:2207:5514:12083 -HSQ1004:134:C0D8DACXX:4:2207:17148:104235 -HSQ1004:134:C0D8DACXX:4:2208:15683:171576 -HSQ1004:134:C0D8DACXX:4:2305:11335:85981 -HSQ1004:134:C0D8DACXX:4:2307:14760:103793 -HSQ1004:134:C0D8DACXX:4:2308:20105:149766 -HSQ1004:134:C0D8DACXX:1:1105:9700:21888 -HSQ1004:134:C0D8DACXX:1:1105:1848:124389 -HSQ1004:134:C0D8DACXX:1:1105:12529:138294 -HSQ1004:134:C0D8DACXX:1:1106:12209:16204 -HSQ1004:134:C0D8DACXX:1:1108:21134:3640 -HSQ1004:134:C0D8DACXX:3:2302:15230:104627 -HSQ1004:134:C0D8DACXX:1:1204:2298:65997 -HSQ1004:134:C0D8DACXX:1:1206:8274:84175 -HSQ1004:134:C0D8DACXX:2:2201:12112:64749 -HSQ1004:134:C0D8DACXX:2:2308:18866:131685 -HSQ1004:134:C0D8DACXX:1:1208:20292:139360 -HSQ1004:134:C0D8DACXX:1:1301:15883:160076 -HSQ1004:134:C0D8DACXX:1:1302:13608:27903 -HSQ1004:134:C0D8DACXX:1:1305:13565:100347 -HSQ1004:134:C0D8DACXX:1:1306:7302:80352 -HSQ1004:134:C0D8DACXX:1:1308:1232:18988 -HSQ1004:134:C0D8DACXX:1:1308:17451:144215 -HSQ1004:134:C0D8DACXX:1:2102:19098:157712 -HSQ1004:134:C0D8DACXX:1:2106:2497:151348 -HSQ1004:134:C0D8DACXX:1:2104:6957:88312 -HSQ1004:134:C0D8DACXX:1:2104:8023:173243 -HSQ1004:134:C0D8DACXX:3:2305:10266:141215 -HSQ1004:134:C0D8DACXX:1:2106:2497:151348 -HSQ1004:134:C0D8DACXX:1:2202:8024:76971 -HSQ1004:134:C0D8DACXX:2:2204:13511:68706 -HSQ1004:134:C0D8DACXX:1:2205:3216:80121 -HSQ1004:134:C0D8DACXX:3:1207:5114:72346 -HSQ1004:134:C0D8DACXX:1:2206:11163:167767 -HSQ1004:134:C0D8DACXX:1:2207:8887:39400 -HSQ1004:134:C0D8DACXX:1:2208:4978:49903 -HSQ1004:134:C0D8DACXX:1:2302:9168:166412 -HSQ1004:134:C0D8DACXX:1:2304:3490:111358 -HSQ1004:134:C0D8DACXX:1:2305:18771:52169 -HSQ1004:134:C0D8DACXX:1:2305:7737:164944 -HSQ1004:134:C0D8DACXX:1:2306:8722:4245 -HSQ1004:134:C0D8DACXX:1:2308:14659:100370 -HSQ1004:134:C0D8DACXX:2:1105:14014:96905 -HSQ1004:134:C0D8DACXX:2:1105:14014:96905 -HSQ1004:134:C0D8DACXX:3:2105:16965:55839 -HSQ1004:134:C0D8DACXX:4:2104:13690:73261 -HSQ1004:134:C0D8DACXX:2:1201:15484:123089 -HSQ1004:134:C0D8DACXX:2:1202:16766:183429 -HSQ1004:134:C0D8DACXX:2:1203:15334:171958 -HSQ1004:134:C0D8DACXX:4:1204:11993:52472 -HSQ1004:134:C0D8DACXX:2:1208:19244:65653 -HSQ1004:134:C0D8DACXX:2:1208:13141:180444 -HSQ1004:134:C0D8DACXX:2:1301:1640:52419 -HSQ1004:134:C0D8DACXX:3:2303:15288:126071 -HSQ1004:134:C0D8DACXX:2:1306:10511:110263 -HSQ1004:134:C0D8DACXX:2:2101:20912:188105 -HSQ1004:134:C0D8DACXX:2:2102:11518:77995 -HSQ1004:134:C0D8DACXX:4:1101:11829:181246 -HSQ1004:134:C0D8DACXX:2:2105:8215:72182 -HSQ1004:134:C0D8DACXX:2:2105:18107:187270 -HSQ1004:134:C0D8DACXX:2:2105:10107:190305 -HSQ1004:134:C0D8DACXX:2:2201:12112:64749 -HSQ1004:134:C0D8DACXX:2:2202:9905:21334 -HSQ1004:134:C0D8DACXX:2:2203:6057:22741 -HSQ1004:134:C0D8DACXX:2:2203:3118:42184 -HSQ1004:134:C0D8DACXX:3:2104:5430:162139 -HSQ1004:134:C0D8DACXX:2:2204:13511:68706 -HSQ1004:134:C0D8DACXX:2:2205:13787:99115 -HSQ1004:134:C0D8DACXX:2:2205:1358:145283 -HSQ1004:134:C0D8DACXX:2:2205:17504:149129 -HSQ1004:134:C0D8DACXX:2:2206:10884:98510 -HSQ1004:134:C0D8DACXX:2:2207:14579:68142 -HSQ1004:134:C0D8DACXX:2:2301:16931:7706 -HSQ1004:134:C0D8DACXX:2:2301:1158:20232 -HSQ1004:134:C0D8DACXX:2:2302:19998:27559 -HSQ1004:134:C0D8DACXX:2:2303:11321:91400 -HSQ1004:134:C0D8DACXX:2:2306:19983:139206 -HSQ1004:134:C0D8DACXX:2:2308:18866:131685 -HSQ1004:134:C0D8DACXX:3:1102:13462:12585 -HSQ1004:134:C0D8DACXX:3:1103:8373:37251 -HSQ1004:134:C0D8DACXX:3:1106:4303:78025 -HSQ1004:134:C0D8DACXX:3:1107:17905:37008 -HSQ1004:134:C0D8DACXX:3:1107:8730:180378 -HSQ1004:134:C0D8DACXX:3:1201:19180:40362 -HSQ1004:134:C0D8DACXX:3:1203:14626:156897 -HSQ1004:134:C0D8DACXX:3:1205:12539:14599 -HSQ1004:134:C0D8DACXX:3:1205:9199:127706 -HSQ1004:134:C0D8DACXX:3:1206:17397:160183 -HSQ1004:134:C0D8DACXX:3:1206:13944:184944 -HSQ1004:134:C0D8DACXX:3:1207:14614:33382 -HSQ1004:134:C0D8DACXX:3:1207:5114:72346 -HSQ1004:134:C0D8DACXX:3:1207:9633:97057 -HSQ1004:134:C0D8DACXX:3:1302:2099:114477 -HSQ1004:134:C0D8DACXX:3:1302:15399:154752 -HSQ1004:134:C0D8DACXX:3:1306:9937:61091 -HSQ1004:134:C0D8DACXX:3:1306:5623:63573 -HSQ1004:134:C0D8DACXX:3:2101:21268:65732 -HSQ1004:134:C0D8DACXX:3:2101:18708:80182 -HSQ1004:134:C0D8DACXX:3:2104:5430:162139 -HSQ1004:134:C0D8DACXX:3:2105:16965:55839 -HSQ1004:134:C0D8DACXX:3:2107:16748:38843 -HSQ1004:134:C0D8DACXX:3:2108:3109:100423 -HSQ1004:134:C0D8DACXX:3:2205:11174:112252 -HSQ1004:134:C0D8DACXX:3:2208:18048:49381 -HSQ1004:134:C0D8DACXX:3:2208:9852:154805 -HSQ1004:134:C0D8DACXX:3:2301:16460:60885 -HSQ1004:134:C0D8DACXX:3:2301:5618:118926 -HSQ1004:134:C0D8DACXX:3:2302:15230:104627 -HSQ1004:134:C0D8DACXX:3:2303:15288:126071 -HSQ1004:134:C0D8DACXX:3:2303:14238:159221 -HSQ1004:134:C0D8DACXX:4:1106:11907:23265 -HSQ1004:134:C0D8DACXX:3:2305:10266:141215 -HSQ1004:134:C0D8DACXX:3:2307:15857:109739 -HSQ1004:134:C0D8DACXX:4:1101:11309:118848 -HSQ1004:134:C0D8DACXX:4:1101:16647:163653 -HSQ1004:134:C0D8DACXX:4:1101:15504:168741 -HSQ1004:134:C0D8DACXX:4:1101:10098:176498 -HSQ1004:134:C0D8DACXX:4:1101:11829:181246 -HSQ1004:134:C0D8DACXX:4:1102:4496:73019 -HSQ1004:134:C0D8DACXX:4:1102:7083:90329 -HSQ1004:134:C0D8DACXX:4:1106:11907:23265 -HSQ1004:134:C0D8DACXX:4:1107:17100:3872 -HSQ1004:134:C0D8DACXX:4:1201:16980:92196 -HSQ1004:134:C0D8DACXX:4:1201:3672:113006 -HSQ1004:134:C0D8DACXX:4:1201:15841:140190 -HSQ1004:134:C0D8DACXX:4:1204:11993:52472 -HSQ1004:134:C0D8DACXX:4:1204:11992:174415 -HSQ1004:134:C0D8DACXX:4:1304:16856:142697 -HSQ1004:134:C0D8DACXX:4:1306:13310:39361 -HSQ1004:134:C0D8DACXX:4:1307:13619:13469 -HSQ1004:134:C0D8DACXX:4:1308:4110:127760 -HSQ1004:134:C0D8DACXX:4:2101:17191:161579 -HSQ1004:134:C0D8DACXX:4:2103:12469:27163 -HSQ1004:134:C0D8DACXX:4:2103:18701:141539 -HSQ1004:134:C0D8DACXX:4:2104:7832:72956 -HSQ1004:134:C0D8DACXX:4:2104:13690:73261 -HSQ1004:134:C0D8DACXX:4:2105:19429:167598 -HSQ1004:134:C0D8DACXX:4:2106:11452:88845 -HSQ1004:134:C0D8DACXX:4:2107:13069:90998 diff --git a/src/htslib-1.18/htscodecs/tests/names/03.names b/src/htslib-1.18/htscodecs/tests/names/03.names deleted file mode 100644 index d6b399c..0000000 --- a/src/htslib-1.18/htscodecs/tests/names/03.names +++ /dev/null @@ -1,1000 +0,0 @@ -m130705_075619_00127_c100506302550000001823078908081306_s1_p0/96976/0_4343 -m130802_152357_00127_c100560082550000001823094812221331_s1_p0/87695/967_6752 -m130624_213730_00127_c100506232550000001823078908081302_s1_p0/37348/0_4139 -m130629_054003_00127_c100506212550000001823078908081325_s1_p0/139370/0_2834 -m130629_234907_00127_c100506292550000001823078908081344_s1_p0/145777/0_654 -m130629_234907_00127_c100506292550000001823078908081344_s1_p0/72518/0_6625 -m130630_063807_00127_c100506292550000001823078908081347_s1_p0/146352/0_13620 -m130702_133558_00127_c100506222550000001823078908081310_s1_p0/127535/0_2742 -m130719_010051_00127_c100533942550000001823079711101305_s1_p0/106740/0_4886 -m130719_010051_00127_c100533942550000001823079711101305_s1_p0/117333/0_1994 -m130725_182546_00127_c100534042550000001823079711101360_s1_p0/64027/2092_5857 -m130731_224642_00127_c100534282550000001823079711101366_s1_p0/466/0_7037 -m130801_031922_00127_c100534262550000001823079711101386_s1_p0/89789/0_6718 -m130801_031922_00127_c100534262550000001823079711101386_s1_p0/89789/0_6718 -m130801_053542_00127_c100534262550000001823079711101387_s1_p0/62492/0_2474 -m130801_192848_00127_c100541292550000001823084511241302_s1_p0/62392/1218_6533 -m130803_050157_00127_c100560082550000001823094812221337_s1_p0/77519/1682_4337 -m130619_205452_00127_c100506172550000001823078908081393_s1_p0/16456/0_4714 -m130626_224003_00127_c100506252550000001823078908081386_s1_p0/122586/0_9504 -m130628_000054_00127_c100506132550000001823078908081335_s1_p0/141568/2634_11215 -m130629_234907_00127_c100506292550000001823078908081344_s1_p0/136772/6047_7902 -m130710_054811_00127_c100534572550000001823079711101385_s1_p0/31698/328_2636 -m130712_155519_00127_c100534092550000001823079711101310_s1_p0/84877/5892_9945 -m130712_181139_00127_c100534092550000001823079711101311_s1_p0/96267/2253_2978 -m130715_203247_00127_c100533892550000001823079711101381_s1_p0/89950/0_10166 -m130719_010051_00127_c100533942550000001823079711101305_s1_p0/85229/2028_8966 -m130726_235520_00127_c100534082550000001823079711101324_s1_p0/30171/1708_10560 -m130727_021140_00127_c100534082550000001823079711101325_s1_p0/9312/2855_8662 -m130801_031922_00127_c100534262550000001823079711101386_s1_p0/153309/947_3482 -m130801_171227_00127_c100541292550000001823084511241301_s1_p0/134404/10064_16602 -m130702_155219_00127_c100506222550000001823078908081311_s1_p0/37033/0_678 -m130725_182546_00127_c100534042550000001823079711101360_s1_p0/64027/2092_5857 -m130620_060001_00127_c100506172550000001823078908081397_s1_p0/107162/0_5308 -m130625_042630_00127_c100506232550000001823078908081305_s1_p0/93356/0_2611 -m130630_020527_00127_c100506292550000001823078908081345_s1_p0/112909/0_6303 -m130709_204242_00127_c100534572550000001823079711101381_s1_p0/89528/0_5464 -m130718_181151_00127_c100533942550000001823079711101302_s1_p0/155072/13925_20379 -m130803_002917_00127_c100560082550000001823094812221335_s1_p0/133547/0_1695 -m130618_190615_00127_c100506252550000001823078908081381_s1_p0/13747/3451_10189 -m130712_023333_00127_c100533802550000001823079711101377_s1_p0/127446/4404_11019 -m130801_145607_00127_c100541292550000001823084511241300_s1_p0/14819/0_2841 -m130802_221257_00127_c100560082550000001823094812221334_s1_p0/128361/872_4288 -m130710_011522_00127_c100534572550000001823079711101383_s1_p0/18731/0_807 -m130713_165239_00127_c100534072550000001823079711101332_s1_p0/102440/1142_5439 -m130802_000127_00127_c100541292550000001823084511241304_s1_p0/75432/2242_4738 -m130618_190615_00127_c100506252550000001823078908081381_s1_p0/158365/0_5414 -m130624_235350_00127_c100506232550000001823078908081303_s1_p0/147247/0_916 -m130628_225103_00127_c100506212550000001823078908081322_s1_p0/51877/0_4102 -m130629_191627_00127_c100506292550000001823078908081342_s1_p0/11735/604_2909 -m130618_165003_00127_c100506252550000001823078908081380_s1_p0/20605/0_1541 -m130718_181151_00127_c100533942550000001823079711101302_s1_p0/150095/0_2132 -m130713_165239_00127_c100534072550000001823079711101332_s1_p0/83014/0_4636 -m130719_165430_00127_c100533902550000001823079711101343_s1_p0/115455/0_6215 -m130711_194433_00127_c100533802550000001823079711101374_s1_p0/19304/2940_8754 -m130712_181139_00127_c100534092550000001823079711101311_s1_p0/118874/6294_8195 -m130723_182647_00127_c100534262550000001823079711101382_s1_p0/62357/777_4871 -m130710_033142_00127_c100534572550000001823079711101384_s1_p0/155941/0_2447 -m130705_075619_00127_c100506302550000001823078908081306_s1_p0/138885/1530_2224 -m130801_145607_00127_c100541292550000001823084511241300_s1_p0/65754/9493_15683 -m130801_031922_00127_c100534262550000001823079711101386_s1_p0/153309/947_3482 -m130715_203247_00127_c100533892550000001823079711101381_s1_p0/134325/790_5233 -m130715_203247_00127_c100533892550000001823079711101381_s1_p0/134325/790_5233 -m130719_122150_00127_c100533902550000001823079711101341_s1_p0/66444/0_3758 -m130620_060001_00127_c100506172550000001823078908081397_s1_p0/149648/1104_14281 -m130801_145607_00127_c100541292550000001823084511241300_s1_p0/113265/0_3728 -m130727_043028_00127_c100534082550000001823079711101326_s1_p0/141082/0_6346 -m130710_054811_00127_c100534572550000001823079711101385_s1_p0/25319/9612_10306 -m130727_064420_00127_c100534082550000001823079711101327_s1_p0/34300/0_3887 -m130719_234626_00127_c100533902550000001823079711101346_s1_p0/20490/257_8052 -m130726_170620_00127_c100534082550000001823079711101321_s1_p0/141579/0_4078 -m130806_014706_00127_c100546662550000001823085811241363_s1_p0/58004/0_2478 -m130618_165003_00127_c100506252550000001823078908081380_s1_p0/113065/0_10723 -m130719_032023_00127_c100533942550000001823079711101306_s1_p0/12532/0_2786 -m130718_155531_00127_c100533942550000001823079711101301_s1_p0/54305/7566_11956 -m130801_192848_00127_c100541292550000001823084511241302_s1_p0/17500/1083_8968 -m130715_203247_00127_c100533892550000001823079711101381_s1_p0/134325/790_5233 -m130801_171227_00127_c100541292550000001823084511241301_s1_p0/134404/10064_16602 -m130629_075949_00127_c100506212550000001823078908081326_s1_p0/137017/0_4724 -m130619_205452_00127_c100506172550000001823078908081393_s1_p0/18197/0_1171 -m130803_002917_00127_c100560082550000001823094812221335_s1_p0/133547/0_1695 -m130710_054811_00127_c100534572550000001823079711101385_s1_p0/25319/9612_10306 -m130713_010039_00127_c100534092550000001823079711101314_s1_p0/9009/2753_5988 -m130715_203247_00127_c100533892550000001823079711101381_s1_p0/89950/0_10166 -m130719_122150_00127_c100533902550000001823079711101341_s1_p0/91258/6880_20790 -m130713_074939_00127_c100534092550000001823079711101317_s1_p0/16162/0_5044 -m130713_074939_00127_c100534092550000001823079711101317_s1_p0/113238/6263_10929 -m130716_010527_00127_c100533892550000001823079711101383_s1_p0/50821/8929_11720 -m130802_221257_00127_c100560082550000001823094812221334_s1_p0/128361/872_4288 -m130619_205452_00127_c100506172550000001823078908081393_s1_p0/755/0_9314 -m130801_192848_00127_c100541292550000001823084511241302_s1_p0/114687/0_5759 -m130725_225833_00127_c100534042550000001823079711101362_s1_p0/30582/0_5145 -m130802_130737_00127_c100560082550000001823094812221330_s1_p0/51522/0_295 -m130711_194433_00127_c100533802550000001823079711101374_s1_p0/19304/2940_8754 -m130802_065027_00127_c100541292550000001823084511241307_s1_p0/133011/0_932 -m130713_010039_00127_c100534092550000001823079711101314_s1_p0/33759/0_3824 -m130719_122150_00127_c100533902550000001823079711101341_s1_p0/43098/1130_1276 -m130620_012721_00127_c100506172550000001823078908081395_s1_p0/146157/0_1377 -m130719_122150_00127_c100533902550000001823079711101341_s1_p0/91258/6880_20790 -m130801_171227_00127_c100541292550000001823084511241301_s1_p0/90945/0_5935 -m130725_182546_00127_c100534042550000001823079711101360_s1_p0/118129/0_4132 -m130702_202512_00127_c100506222550000001823078908081313_s1_p0/151463/5993_6069 -m130710_033142_00127_c100534572550000001823079711101384_s1_p0/155941/0_2447 -m130620_060001_00127_c100506172550000001823078908081397_s1_p0/149648/1104_14281 -m130802_195637_00127_c100560082550000001823094812221333_s1_p0/131751/413_10154 -m130629_234907_00127_c100506292550000001823078908081344_s1_p0/72518/0_6625 -m130801_053542_00127_c100534262550000001823079711101387_s1_p0/114613/1034_10174 -m130625_042630_00127_c100506232550000001823078908081305_s1_p0/146713/10532_12981 -m130717_184917_00127_c100534262550000001823079711101380_s1_p0/134939/0_4569 -m130719_122150_00127_c100533902550000001823079711101341_s1_p0/91258/6880_20790 -m130802_021747_00127_c100541292550000001823084511241305_s1_p0/145537/0_3559 -m130713_212519_00127_c100534072550000001823079711101334_s1_p0/114075/11216_14537 -m130802_221257_00127_c100560082550000001823094812221334_s1_p0/114850/2352_6917 -m130628_043334_00127_c100506132550000001823078908081337_s1_p0/132380/3979_7426 -m130713_212519_00127_c100534072550000001823079711101334_s1_p0/114075/7907_11171 -m130718_155531_00127_c100533942550000001823079711101301_s1_p0/54305/0_3176 -m130726_145000_00127_c100534082550000001823079711101320_s1_p0/100485/0_8584 -m130719_053331_00127_c100533942550000001823079711101307_s1_p0/26423/12045_16645 -m130802_021747_00127_c100541292550000001823084511241305_s1_p0/145537/3602_6819 -m130629_234907_00127_c100506292550000001823078908081344_s1_p0/72518/6675_8780 -m130805_185823_00127_c100546662550000001823085811241360_s1_p0/40769/6303_11006 -m130725_204206_00127_c100534042550000001823079711101361_s1_p0/12139/0_3587 -m130802_195637_00127_c100560082550000001823094812221333_s1_p0/23763/3784_12558 -m130712_023333_00127_c100533802550000001823079711101377_s1_p0/90171/0_2876 -m130712_070931_00127_c100506142550000001823078908081326_s1_p0/37522/4291_10809 -m130718_155531_00127_c100533942550000001823079711101301_s1_p0/54305/7566_11956 -m130713_212519_00127_c100534072550000001823079711101334_s1_p0/114075/4880_7861 -m130719_010051_00127_c100533942550000001823079711101305_s1_p0/106740/0_4886 -m130703_005756_00127_c100506222550000001823078908081315_s1_p0/106159/5114_7157 -m130703_031724_00127_c100506222550000001823078908081316_s1_p0/129634/1995_3229 -m130801_010302_00127_c100534282550000001823079711101367_s1_p0/64858/1384_8554 -m130713_031659_00127_c100534092550000001823079711101315_s1_p0/136913/606_11120 -m130719_010051_00127_c100533942550000001823079711101305_s1_p0/106740/0_4886 -m130714_041419_00127_c100534072550000001823079711101337_s1_p0/57144/13600_16060 -m130709_182622_00127_c100534572550000001823079711101380_s1_p0/156636/0_9799 -m130717_184917_00127_c100534262550000001823079711101380_s1_p0/134939/0_4569 -m130803_002917_00127_c100560082550000001823094812221335_s1_p0/76107/3177_7534 -m130719_191050_00127_c100533902550000001823079711101344_s1_p0/27704/368_5389 -m130629_032343_00127_c100506212550000001823078908081324_s1_p0/134010/3753_8622 -m130802_021747_00127_c100541292550000001823084511241305_s1_p0/145537/0_3559 -m130802_130737_00127_c100560082550000001823094812221330_s1_p0/140219/0_8368 -m130703_005756_00127_c100506222550000001823078908081315_s1_p0/106159/5114_7157 -m130805_185823_00127_c100546662550000001823085811241360_s1_p0/69053/11174_14429 -m130703_005756_00127_c100506222550000001823078908081315_s1_p0/72838/0_9750 -m130702_180854_00127_c100506222550000001823078908081312_s1_p0/26942/0_1871 -m130714_041419_00127_c100534072550000001823079711101337_s1_p0/57144/13600_16060 -m130726_011446_00127_c100534042550000001823079711101363_s1_p0/112985/0_5994 -m130626_224003_00127_c100506252550000001823078908081386_s1_p0/15324/2081_4809 -m130709_050042_00127_c100534312550000001823079711101305_s1_p0/66883/4433_4507 -m130702_133558_00127_c100506222550000001823078908081310_s1_p0/78085/13355_15170 -m130702_133558_00127_c100506222550000001823078908081310_s1_p0/78085/13355_15170 -m130619_231101_00127_c100506172550000001823078908081394_s1_p0/81937/463_7563 -m130714_020039_00127_c100534072550000001823079711101336_s1_p0/4173/1089_4602 -m130714_020039_00127_c100534072550000001823079711101336_s1_p0/4173/1089_4602 -m130626_224003_00127_c100506252550000001823078908081386_s1_p0/15324/0_2035 -m130719_191050_00127_c100533902550000001823079711101344_s1_p0/27704/368_5389 -m130712_070931_00127_c100506142550000001823078908081326_s1_p0/37522/4291_10809 -m130630_063807_00127_c100506292550000001823078908081347_s1_p0/81616/1208_4499 -m130702_133558_00127_c100506222550000001823078908081310_s1_p0/78085/13355_15170 -m130709_204242_00127_c100534572550000001823079711101381_s1_p0/156475/1984_5897 -m130714_020039_00127_c100534072550000001823079711101336_s1_p0/4173/1089_4602 -m130630_063807_00127_c100506292550000001823078908081347_s1_p0/81616/1208_4499 -m130702_180854_00127_c100506222550000001823078908081312_s1_p0/130336/0_175 -m130726_145000_00127_c100534082550000001823079711101320_s1_p0/139620/0_4518 -m130727_043028_00127_c100534082550000001823079711101326_s1_p0/111467/1594_4353 -m130712_044953_00127_c100506252550000001823078908081387_s1_p0/94852/1966_6817 -m130703_005756_00127_c100506222550000001823078908081315_s1_p0/72838/0_9750 -m130725_204206_00127_c100534042550000001823079711101361_s1_p0/71400/1030_2174 -m130802_174017_00127_c100560082550000001823094812221332_s1_p0/87058/0_5169 -m130705_053709_00127_c100506302550000001823078908081305_s1_p0/141975/4797_13683 -m130718_224433_00127_c100533942550000001823079711101304_s1_p0/156104/5879_12841 -m130709_093322_00127_c100534312550000001823079711101307_s1_p0/78490/2898_7820 -m130628_022025_00127_c100506132550000001823078908081336_s1_p0/23468/0_1005 -m130710_033142_00127_c100534572550000001823079711101384_s1_p0/74034/0_389 -m130712_155519_00127_c100534092550000001823079711101310_s1_p0/32141/13488_19438 -m130709_182622_00127_c100534572550000001823079711101380_s1_p0/29905/1064_12211 -m130620_060001_00127_c100506172550000001823078908081397_s1_p0/134093/0_7813 -m130713_165239_00127_c100534072550000001823079711101332_s1_p0/43334/0_9374 -m130713_031659_00127_c100534092550000001823079711101315_s1_p0/136913/606_11120 -m130629_170007_00127_c100506292550000001823078908081341_s1_p0/41938/900_3656 -m130713_122008_00127_c100534072550000001823079711101330_s1_p0/6546/0_1326 -m130802_195637_00127_c100560082550000001823094812221333_s1_p0/49707/1782_2054 -m130709_093322_00127_c100534312550000001823079711101307_s1_p0/78490/2898_7820 -m130720_015950_00127_c100533902550000001823079711101347_s1_p0/7094/404_7559 -m130805_211425_00127_c100546662550000001823085811241361_s1_p0/153598/0_2424 -m130719_191050_00127_c100533902550000001823079711101344_s1_p0/65384/1572_1726 -m130802_000127_00127_c100541292550000001823084511241304_s1_p0/112707/0_749 -m130630_020527_00127_c100506292550000001823078908081345_s1_p0/40839/0_640 -m130720_015950_00127_c100533902550000001823079711101347_s1_p0/7094/404_7559 -m130802_221257_00127_c100560082550000001823094812221334_s1_p0/71860/0_1100 -m130805_185823_00127_c100546662550000001823085811241360_s1_p0/5646/4597_14057 -m130618_233855_00127_c100506252550000001823078908081383_s1_p0/47655/0_4171 -m130719_234626_00127_c100533902550000001823079711101346_s1_p0/34956/0_1504 -m130628_225103_00127_c100506212550000001823078908081322_s1_p0/10739/5775_10978 -m130628_022025_00127_c100506132550000001823078908081336_s1_p0/54755/10880_14278 -m130803_050157_00127_c100560082550000001823094812221337_s1_p0/35508/0_1007 -m130628_225103_00127_c100506212550000001823078908081322_s1_p0/10739/5775_10978 -m130802_221257_00127_c100560082550000001823094812221334_s1_p0/71860/0_1100 -m130719_165430_00127_c100533902550000001823079711101343_s1_p0/87169/0_1104 -m130802_152357_00127_c100560082550000001823094812221331_s1_p0/125010/2673_3449 -m130803_050157_00127_c100560082550000001823094812221337_s1_p0/75078/0_3650 -m130708_221142_00127_c100534312550000001823079711101302_s1_p0/56407/958_1224 -m130720_015950_00127_c100533902550000001823079711101347_s1_p0/146173/2610_4448 -m130630_042519_00127_c100506292550000001823078908081346_s1_p0/69919/2112_6460 -m130801_145607_00127_c100541292550000001823084511241300_s1_p0/126875/1876_4878 -m130713_053616_00127_c100534092550000001823079711101316_s1_p0/54771/134_1447 -m130624_192110_00127_c100506232550000001823078908081301_s1_p0/77167/8724_10080 -m130803_050157_00127_c100560082550000001823094812221337_s1_p0/35508/8481_9407 -m130625_021010_00127_c100506232550000001823078908081304_s1_p0/147990/0_732 -m130702_155219_00127_c100506222550000001823078908081311_s1_p0/139772/3918_4673 -m130712_181139_00127_c100534092550000001823079711101311_s1_p0/70819/0_2655 -m130712_155519_00127_c100534092550000001823079711101310_s1_p0/32141/0_13440 -m130710_080722_00127_c100534572550000001823079711101386_s1_p0/11738/0_3313 -m130801_010302_00127_c100534282550000001823079711101367_s1_p0/130930/1344_4620 -m130709_182622_00127_c100534572550000001823079711101380_s1_p0/114025/497_2243 -m130702_133558_00127_c100506222550000001823078908081310_s1_p0/161611/0_1986 -m130802_221257_00127_c100560082550000001823094812221334_s1_p0/37125/0_5085 -m130726_192240_00127_c100534082550000001823079711101322_s1_p0/115774/4296_8106 -m130624_170450_00127_c100506232550000001823078908081300_s1_p0/25950/0_4732 -m130628_043334_00127_c100506132550000001823078908081337_s1_p0/133560/799_3109 -m130723_225927_00127_c100534262550000001823079711101384_s1_p0/11670/0_5211 -m130726_192240_00127_c100534082550000001823079711101322_s1_p0/115774/712_4251 -m130618_165003_00127_c100506252550000001823078908081380_s1_p0/31521/0_1343 -m130624_170450_00127_c100506232550000001823078908081300_s1_p0/141296/2936_4261 -m130712_023333_00127_c100533802550000001823079711101377_s1_p0/101753/0_2420 -m130726_192240_00127_c100534082550000001823079711101322_s1_p0/115774/712_4251 -m130719_165430_00127_c100533902550000001823079711101343_s1_p0/95919/6342_15810 -m130716_101047_00127_c100533892550000001823079711101387_s1_p0/3753/5250_14699 -m130712_001713_00127_c100533802550000001823079711101376_s1_p0/18516/0_2713 -m130618_165003_00127_c100506252550000001823078908081380_s1_p0/31521/0_1343 -m130806_014706_00127_c100546662550000001823085811241363_s1_p0/119009/0_12932 -m130618_165003_00127_c100506252550000001823078908081380_s1_p0/31521/0_1343 -m130727_021140_00127_c100534082550000001823079711101325_s1_p0/159300/0_11463 -m130801_171227_00127_c100541292550000001823084511241301_s1_p0/111466/210_2803 -m130801_171227_00127_c100541292550000001823084511241301_s1_p0/111466/2849_5455 -m130727_021140_00127_c100534082550000001823079711101325_s1_p0/159300/11513_12572 -m130710_080722_00127_c100534572550000001823079711101386_s1_p0/43534/1449_3777 -m130714_041419_00127_c100534072550000001823079711101337_s1_p0/93897/0_565 -m130618_165003_00127_c100506252550000001823078908081380_s1_p0/160792/0_1290 -m130802_174017_00127_c100560082550000001823094812221332_s1_p0/87058/0_5169 -m130801_145607_00127_c100541292550000001823084511241300_s1_p0/109300/0_4428 -m130629_191627_00127_c100506292550000001823078908081342_s1_p0/74130/634_2283 -m130629_191627_00127_c100506292550000001823078908081342_s1_p0/74130/0_592 -m130712_070931_00127_c100506142550000001823078908081326_s1_p0/22470/3333_6814 -m130801_031922_00127_c100534262550000001823079711101386_s1_p0/153872/3137_9769 -m130801_010302_00127_c100534282550000001823079711101367_s1_p0/130930/0_1298 -m130617_170700_00127_c100506142550000001823078908081325_s1_p0/93906/0_9581 -m130710_080722_00127_c100534572550000001823079711101386_s1_p0/43534/1449_3777 -m130628_043334_00127_c100506132550000001823078908081337_s1_p0/142612/755_3603 -m130712_070931_00127_c100506142550000001823078908081326_s1_p0/22470/3333_6814 -m130725_204206_00127_c100534042550000001823079711101361_s1_p0/88375/6466_7920 -m130801_192848_00127_c100541292550000001823084511241302_s1_p0/107156/7855_13110 -m130709_071930_00127_c100534312550000001823079711101306_s1_p0/144272/243_3280 -m130702_155219_00127_c100506222550000001823078908081311_s1_p0/41184/0_4244 -m130726_170620_00127_c100534082550000001823079711101321_s1_p0/26973/0_8217 -m130726_145000_00127_c100534082550000001823079711101320_s1_p0/22638/1738_4305 -m130620_060001_00127_c100506172550000001823078908081397_s1_p0/134093/0_7813 -m130620_060001_00127_c100506172550000001823078908081397_s1_p0/134093/0_7813 -m130629_170007_00127_c100506292550000001823078908081341_s1_p0/74578/1441_3946 -m130624_213730_00127_c100506232550000001823078908081302_s1_p0/156723/9127_13172 -m130713_165239_00127_c100534072550000001823079711101332_s1_p0/43334/0_9374 -m130716_032147_00127_c100533892550000001823079711101384_s1_p0/143635/12185_20091 -m130719_100547_00127_c100533902550000001823079711101340_s1_p0/38337/0_282 -m130712_070931_00127_c100506142550000001823078908081326_s1_p0/22470/3333_6814 -m130719_053331_00127_c100533942550000001823079711101307_s1_p0/133310/0_1286 -m130702_224133_00127_c100506222550000001823078908081314_s1_p0/94419/0_1875 -m130801_214507_00127_c100541292550000001823084511241303_s1_p0/124304/0_6198 -m130713_122008_00127_c100534072550000001823079711101330_s1_p0/25358/0_2240 -m130726_102006_00127_c100534042550000001823079711101367_s1_p0/57321/0_1462 -m130713_074939_00127_c100534092550000001823079711101317_s1_p0/119409/715_11586 -m130801_192848_00127_c100541292550000001823084511241302_s1_p0/131235/6624_13485 -m130629_144357_00127_c100506292550000001823078908081340_s1_p0/115365/0_124 -m130628_000054_00127_c100506132550000001823078908081335_s1_p0/42374/5869_8549 -m130805_211425_00127_c100546662550000001823085811241361_s1_p0/17952/0_2469 -m130718_224433_00127_c100533942550000001823079711101304_s1_p0/71662/5698_8476 -m130727_021140_00127_c100534082550000001823079711101325_s1_p0/159300/0_11463 -m130713_074939_00127_c100534092550000001823079711101317_s1_p0/119409/715_11586 -m130705_101000_00127_c100506302550000001823078908081307_s1_p0/41406/10352_11532 -m130709_071930_00127_c100534312550000001823079711101306_s1_p0/142034/10836_12926 -m130710_080722_00127_c100534572550000001823079711101386_s1_p0/9584/2413_3573 -m130718_224433_00127_c100533942550000001823079711101304_s1_p0/71662/0_353 -m130629_213247_00127_c100506292550000001823078908081343_s1_p0/46962/0_2070 -m130710_080722_00127_c100534572550000001823079711101386_s1_p0/106330/3447_4223 -m130713_074939_00127_c100534092550000001823079711101317_s1_p0/119409/715_11586 -m130710_033142_00127_c100534572550000001823079711101384_s1_p0/72109/0_903 -m130713_165239_00127_c100534072550000001823079711101332_s1_p0/143792/0_2711 -m130719_191050_00127_c100533902550000001823079711101344_s1_p0/58895/3187_7683 -m130719_032023_00127_c100533942550000001823079711101306_s1_p0/41608/12313_12653 -m130620_060001_00127_c100506172550000001823078908081397_s1_p0/141262/444_8007 -m130713_053616_00127_c100534092550000001823079711101316_s1_p0/103066/2138_6414 -m130702_155219_00127_c100506222550000001823078908081311_s1_p0/3815/10545_11063 -m130625_064552_00127_c100506232550000001823078908081306_s1_p0/92927/0_4818 -m130712_070931_00127_c100506142550000001823078908081326_s1_p0/44025/3255_6435 -m130713_074939_00127_c100534092550000001823079711101317_s1_p0/119409/715_11586 -m130713_074939_00127_c100534092550000001823079711101317_s1_p0/119409/715_11586 -m130719_143810_00127_c100533902550000001823079711101342_s1_p0/79352/0_143 -m130801_010302_00127_c100534282550000001823079711101367_s1_p0/91336/0_8393 -m130624_213730_00127_c100506232550000001823078908081302_s1_p0/83588/4060_6211 -m130617_170700_00127_c100506142550000001823078908081325_s1_p0/147310/1393_16371 -m130713_165239_00127_c100534072550000001823079711101332_s1_p0/143792/0_2711 -m130803_024915_00127_c100560082550000001823094812221336_s1_p0/82827/5013_9523 -m130716_075734_00127_c100533892550000001823079711101386_s1_p0/145272/4486_8688 -m130709_204242_00127_c100534572550000001823079711101381_s1_p0/87474/2363_2547 -m130802_065027_00127_c100541292550000001823084511241307_s1_p0/90593/0_579 -m130802_152357_00127_c100560082550000001823094812221331_s1_p0/145264/0_2096 -m130618_165003_00127_c100506252550000001823078908081380_s1_p0/31745/1286_2886 -m130713_031659_00127_c100534092550000001823079711101315_s1_p0/40886/384_1034 -m130712_023333_00127_c100533802550000001823079711101377_s1_p0/125829/0_5779 -m130710_011522_00127_c100534572550000001823079711101383_s1_p0/132847/497_3624 -m130713_031659_00127_c100534092550000001823079711101315_s1_p0/40886/384_1034 -m130629_213247_00127_c100506292550000001823078908081343_s1_p0/148728/0_416 -m130802_130737_00127_c100560082550000001823094812221330_s1_p0/12512/2171_4447 -m130713_212519_00127_c100534072550000001823079711101334_s1_p0/64208/0_1440 -m130724_011547_00127_c100534262550000001823079711101385_s1_p0/151626/992_5468 -m130708_221142_00127_c100534312550000001823079711101302_s1_p0/11626/0_3030 -m130717_210534_00127_c100534262550000001823079711101381_s1_p0/110105/1948_5816 -m130724_011547_00127_c100534262550000001823079711101385_s1_p0/151626/992_5468 -m130805_185823_00127_c100546662550000001823085811241360_s1_p0/70685/7100_10303 -m130805_185823_00127_c100546662550000001823085811241360_s1_p0/70685/7100_10303 -m130724_011547_00127_c100534262550000001823079711101385_s1_p0/151626/992_5468 -m130710_033142_00127_c100534572550000001823079711101384_s1_p0/69768/6569_9236 -m130725_225833_00127_c100534042550000001823079711101362_s1_p0/7163/2347_8949 -m130705_053709_00127_c100506302550000001823078908081305_s1_p0/23738/10553_15127 -m130710_011522_00127_c100534572550000001823079711101383_s1_p0/18921/0_2343 -m130710_011522_00127_c100534572550000001823079711101383_s1_p0/18921/0_2343 -m130709_204242_00127_c100534572550000001823079711101381_s1_p0/138660/4923_8114 -m130801_031922_00127_c100534262550000001823079711101386_s1_p0/58408/0_173 -m130724_011547_00127_c100534262550000001823079711101385_s1_p0/151626/992_5468 -m130718_202811_00127_c100533942550000001823079711101303_s1_p0/130825/5838_8333 -m130712_202759_00127_c100534092550000001823079711101312_s1_p0/101694/3019_7262 -m130803_002917_00127_c100560082550000001823094812221335_s1_p0/78310/118_2525 -m130803_002917_00127_c100560082550000001823094812221335_s1_p0/78310/5105_7562 -m130624_170450_00127_c100506232550000001823078908081300_s1_p0/103126/0_2582 -m130709_225902_00127_c100534572550000001823079711101382_s1_p0/83060/1988_4757 -m130719_010051_00127_c100533942550000001823079711101305_s1_p0/143270/2789_6945 -m130725_225833_00127_c100534042550000001823079711101362_s1_p0/7163/2347_8949 -m130710_033142_00127_c100534572550000001823079711101384_s1_p0/124400/289_760 -m130628_000054_00127_c100506132550000001823078908081335_s1_p0/103400/4925_7589 -m130705_075619_00127_c100506302550000001823078908081306_s1_p0/116364/0_121 -m130703_005756_00127_c100506222550000001823078908081315_s1_p0/135029/502_3955 -m130805_233054_00127_c100546662550000001823085811241362_s1_p0/142574/5224_6370 -m130801_031922_00127_c100534262550000001823079711101386_s1_p0/22542/9849_12042 -m130702_180854_00127_c100506222550000001823078908081312_s1_p0/18551/3930_4181 -m130628_000054_00127_c100506132550000001823078908081335_s1_p0/103400/4925_7589 -m130708_221142_00127_c100534312550000001823079711101302_s1_p0/71145/4441_5488 -m130708_221142_00127_c100534312550000001823079711101302_s1_p0/71145/6633_7711 -m130802_152357_00127_c100560082550000001823094812221331_s1_p0/108896/4557_8468 -m130620_034626_00127_c100506172550000001823078908081396_s1_p0/6375/2786_4744 -m130723_204307_00127_c100534262550000001823079711101383_s1_p0/154656/0_805 -m130802_000127_00127_c100541292550000001823084511241304_s1_p0/17178/0_1393 -m130805_185823_00127_c100546662550000001823085811241360_s1_p0/54237/0_3395 -m130705_053709_00127_c100506302550000001823078908081305_s1_p0/3415/0_810 -m130726_170620_00127_c100534082550000001823079711101321_s1_p0/10788/1144_2040 -m130705_053709_00127_c100506302550000001823078908081305_s1_p0/3415/0_810 -m130708_221142_00127_c100534312550000001823079711101302_s1_p0/71145/0_1029 -m130628_000054_00127_c100506132550000001823078908081335_s1_p0/103400/4925_7589 -m130726_054726_00127_c100534042550000001823079711101365_s1_p0/127564/0_629 -m130719_212710_00127_c100533902550000001823079711101345_s1_p0/55792/0_4517 -m130725_204206_00127_c100534042550000001823079711101361_s1_p0/8282/500_8897 -m130726_080606_00127_c100534042550000001823079711101366_s1_p0/161096/1185_4530 -m130713_190859_00127_c100534072550000001823079711101333_s1_p0/105136/0_8098 -m130802_195637_00127_c100560082550000001823094812221333_s1_p0/79722/0_3302 -m130731_224642_00127_c100534282550000001823079711101366_s1_p0/81272/1461_3716 -m130727_021140_00127_c100534082550000001823079711101325_s1_p0/102228/2539_5485 -m130703_005756_00127_c100506222550000001823078908081315_s1_p0/141090/4775_5054 -m130703_005756_00127_c100506222550000001823078908081315_s1_p0/141090/202_2442 -m130719_143810_00127_c100533902550000001823079711101342_s1_p0/129092/0_4747 -m130802_195637_00127_c100560082550000001823094812221333_s1_p0/79722/0_3302 -m130719_100547_00127_c100533902550000001823079711101340_s1_p0/153488/6563_10970 -m130802_195637_00127_c100560082550000001823094812221333_s1_p0/79722/0_3302 -m130629_170007_00127_c100506292550000001823078908081341_s1_p0/159796/2716_5189 -m130712_202759_00127_c100534092550000001823079711101312_s1_p0/101694/3019_7262 -m130629_144357_00127_c100506292550000001823078908081340_s1_p0/128834/0_2737 -m130719_100547_00127_c100533902550000001823079711101340_s1_p0/153488/6563_10970 -m130716_101047_00127_c100533892550000001823079711101387_s1_p0/16121/6132_11646 -m130719_032023_00127_c100533942550000001823079711101306_s1_p0/85718/0_881 -m130802_195637_00127_c100560082550000001823094812221333_s1_p0/79722/0_3302 -m130716_101047_00127_c100533892550000001823079711101387_s1_p0/16121/6132_11646 -m130705_032050_00127_c100506302550000001823078908081304_s1_p0/72601/463_2989 -m130710_102042_00127_c100534572550000001823079711101387_s1_p0/99383/981_4429 -m130710_102042_00127_c100534572550000001823079711101387_s1_p0/99383/7949_13334 -m130710_102042_00127_c100534572550000001823079711101387_s1_p0/99383/4473_7905 -m130718_181151_00127_c100533942550000001823079711101302_s1_p0/152450/6211_11593 -m130710_102042_00127_c100534572550000001823079711101387_s1_p0/99383/7949_13334 -m130710_102042_00127_c100534572550000001823079711101387_s1_p0/99383/981_4429 -m130710_102042_00127_c100534572550000001823079711101387_s1_p0/99383/4473_7905 -m130711_172813_00127_c100533802550000001823079711101373_s1_p0/10659/0_3183 -m130705_053709_00127_c100506302550000001823078908081305_s1_p0/137329/1287_4675 -m130705_053709_00127_c100506302550000001823078908081305_s1_p0/137329/0_1239 -m130717_210534_00127_c100534262550000001823079711101381_s1_p0/95636/10459_14135 -m130805_233054_00127_c100546662550000001823085811241362_s1_p0/162592/0_5459 -m130726_080606_00127_c100534042550000001823079711101366_s1_p0/161096/0_1136 -m130716_101047_00127_c100533892550000001823079711101387_s1_p0/16121/6132_11646 -m130705_010438_00127_c100506302550000001823078908081303_s1_p0/140317/0_5730 -m130801_053542_00127_c100534262550000001823079711101387_s1_p0/85838/0_548 -m130705_032050_00127_c100506302550000001823078908081304_s1_p0/72601/463_2989 -m130726_213900_00127_c100534082550000001823079711101323_s1_p0/104538/0_6308 -m130710_102042_00127_c100534572550000001823079711101387_s1_p0/99383/0_935 -m130724_011547_00127_c100534262550000001823079711101385_s1_p0/12699/2796_3230 -m130702_180854_00127_c100506222550000001823078908081312_s1_p0/21432/4622_8043 -m130705_032050_00127_c100506302550000001823078908081304_s1_p0/47719/897_2577 -m130710_102042_00127_c100534572550000001823079711101387_s1_p0/59676/906_1720 -m130716_101047_00127_c100533892550000001823079711101387_s1_p0/16121/6132_11646 -m130702_180854_00127_c100506222550000001823078908081312_s1_p0/21432/4622_8043 -m130626_224003_00127_c100506252550000001823078908081386_s1_p0/92018/3991_6378 -m130716_101047_00127_c100533892550000001823079711101387_s1_p0/16121/6132_11646 -m130719_191050_00127_c100533902550000001823079711101344_s1_p0/45976/0_9493 -m130802_130737_00127_c100560082550000001823094812221330_s1_p0/157369/3503_9014 -m130709_071930_00127_c100534312550000001823079711101306_s1_p0/8313/9761_13084 -m130805_233054_00127_c100546662550000001823085811241362_s1_p0/162592/0_5459 -m130718_181151_00127_c100533942550000001823079711101302_s1_p0/59209/0_6879 -m130629_101243_00127_c100506212550000001823078908081327_s1_p0/20076/0_9169 -m130726_213900_00127_c100534082550000001823079711101323_s1_p0/104538/0_6308 -m130629_101243_00127_c100506212550000001823078908081327_s1_p0/20076/0_9169 -m130717_210534_00127_c100534262550000001823079711101381_s1_p0/95636/10459_14135 -m130718_181151_00127_c100533942550000001823079711101302_s1_p0/152450/6211_11593 -m130629_101243_00127_c100506212550000001823078908081327_s1_p0/20076/0_9169 -m130805_233054_00127_c100546662550000001823085811241362_s1_p0/162592/0_5459 -m130802_152357_00127_c100560082550000001823094812221331_s1_p0/158023/4991_9759 -m130725_225833_00127_c100534042550000001823079711101362_s1_p0/128213/1004_3356 -m130806_014706_00127_c100546662550000001823085811241363_s1_p0/51919/0_8298 -m130718_181151_00127_c100533942550000001823079711101302_s1_p0/152450/6211_11593 -m130718_181151_00127_c100533942550000001823079711101302_s1_p0/59209/0_6879 -m130806_014706_00127_c100546662550000001823085811241363_s1_p0/51919/0_8298 -m130718_181151_00127_c100533942550000001823079711101302_s1_p0/59209/0_6879 -m130805_233054_00127_c100546662550000001823085811241362_s1_p0/162592/0_5459 -m130802_152357_00127_c100560082550000001823094812221331_s1_p0/158023/4991_9759 -m130802_043639_00127_c100541292550000001823084511241306_s1_p0/43499/2021_12070 -m130720_015950_00127_c100533902550000001823079711101347_s1_p0/8377/3299_5534 -m130802_152357_00127_c100560082550000001823094812221331_s1_p0/158023/4991_9759 -m130725_225833_00127_c100534042550000001823079711101362_s1_p0/128213/1004_3356 -m130629_170007_00127_c100506292550000001823078908081341_s1_p0/44440/1598_3280 -m130629_170007_00127_c100506292550000001823078908081341_s1_p0/44440/5022_6989 -m130726_145000_00127_c100534082550000001823079711101320_s1_p0/146423/869_9477 -m130720_015950_00127_c100533902550000001823079711101347_s1_p0/8377/3299_5534 -m130718_181151_00127_c100533942550000001823079711101302_s1_p0/59209/0_6879 -m130802_152357_00127_c100560082550000001823094812221331_s1_p0/158023/4991_9759 -m130717_210534_00127_c100534262550000001823079711101381_s1_p0/53844/0_897 -m130802_043639_00127_c100541292550000001823084511241306_s1_p0/43499/2021_12070 -m130719_100547_00127_c100533902550000001823079711101340_s1_p0/932/0_6632 -m130716_101047_00127_c100533892550000001823079711101387_s1_p0/111011/0_6509 -m130620_034626_00127_c100506172550000001823078908081396_s1_p0/52760/3394_3693 -m130718_181151_00127_c100533942550000001823079711101302_s1_p0/87522/6518_7242 -m130726_145000_00127_c100534082550000001823079711101320_s1_p0/146423/869_9477 -m130802_043639_00127_c100541292550000001823084511241306_s1_p0/43499/2021_12070 -m130802_043639_00127_c100541292550000001823084511241306_s1_p0/45376/4429_4927 -m130629_170007_00127_c100506292550000001823078908081341_s1_p0/44440/5022_6989 -m130726_235520_00127_c100534082550000001823079711101324_s1_p0/123470/3893_5914 -m130618_165003_00127_c100506252550000001823078908081380_s1_p0/104183/1145_2693 -m130713_010039_00127_c100534092550000001823079711101314_s1_p0/19097/6148_6646 -m130803_024915_00127_c100560082550000001823094812221336_s1_p0/140780/0_7566 -m130710_011522_00127_c100534572550000001823079711101383_s1_p0/147167/1281_1552 -m130719_165430_00127_c100533902550000001823079711101343_s1_p0/48814/0_391 -m130702_180854_00127_c100506222550000001823078908081312_s1_p0/57043/0_955 -m130802_174017_00127_c100560082550000001823094812221332_s1_p0/67523/0_1203 -m130713_165239_00127_c100534072550000001823079711101332_s1_p0/18302/0_780 -m130705_032050_00127_c100506302550000001823078908081304_s1_p0/129603/0_8138 -m130713_165239_00127_c100534072550000001823079711101332_s1_p0/18302/846_4522 -m130713_165239_00127_c100534072550000001823079711101332_s1_p0/18302/8277_9008 -m130712_001713_00127_c100533802550000001823079711101376_s1_p0/68962/6655_9324 -m130718_133911_00127_c100533942550000001823079711101300_s1_p0/18016/16864_19226 -m130712_001713_00127_c100533802550000001823079711101376_s1_p0/68962/6655_9324 -m130709_093322_00127_c100534312550000001823079711101307_s1_p0/33495/0_86 -m130731_224642_00127_c100534282550000001823079711101366_s1_p0/13159/0_3809 -m130712_001713_00127_c100533802550000001823079711101376_s1_p0/68962/0_6604 -m130712_001713_00127_c100533802550000001823079711101376_s1_p0/50126/1327_3279 -m130709_071930_00127_c100534312550000001823079711101306_s1_p0/72115/1192_3342 -m130718_133911_00127_c100533942550000001823079711101300_s1_p0/18016/19274_21453 -m130712_001713_00127_c100533802550000001823079711101376_s1_p0/68962/6655_9324 -m130702_224133_00127_c100506222550000001823078908081314_s1_p0/88374/1724_2765 -m130710_102042_00127_c100534572550000001823079711101387_s1_p0/108839/9852_17794 -m130709_204242_00127_c100534572550000001823079711101381_s1_p0/138380/12978_15357 -m130713_165239_00127_c100534072550000001823079711101332_s1_p0/18302/4573_8230 -m130709_071930_00127_c100534312550000001823079711101306_s1_p0/72115/0_1143 -m130718_133911_00127_c100533942550000001823079711101300_s1_p0/18016/19274_21453 -m130713_143619_00127_c100534072550000001823079711101331_s1_p0/130300/0_2582 -m130719_122150_00127_c100533902550000001823079711101341_s1_p0/138045/3387_4930 -m130712_001713_00127_c100533802550000001823079711101376_s1_p0/129195/2116_3212 -m130712_001713_00127_c100533802550000001823079711101376_s1_p0/129195/0_2068 -m130718_133911_00127_c100533942550000001823079711101300_s1_p0/18016/21498_23945 -m130719_191050_00127_c100533902550000001823079711101344_s1_p0/47131/567_6586 -m130726_145000_00127_c100534082550000001823079711101320_s1_p0/106010/5177_8831 -m130718_133911_00127_c100533942550000001823079711101300_s1_p0/18016/14296_16815 -m130731_224642_00127_c100534282550000001823079711101366_s1_p0/65924/0_2979 -m130709_204242_00127_c100534572550000001823079711101381_s1_p0/138380/20022_21249 -m130630_063807_00127_c100506292550000001823078908081347_s1_p0/1279/0_1347 -m130705_032050_00127_c100506302550000001823078908081304_s1_p0/129603/0_8138 -m130802_221257_00127_c100560082550000001823094812221334_s1_p0/128154/0_2904 -m130709_024422_00127_c100534312550000001823079711101304_s1_p0/132930/0_1895 -m130619_140541_00127_c100506172550000001823078908081390_s1_p0/16135/2382_5787 -m130709_225902_00127_c100534572550000001823079711101382_s1_p0/109402/205_2819 -m130805_211425_00127_c100546662550000001823085811241361_s1_p0/12983/0_2950 -m130711_194433_00127_c100533802550000001823079711101374_s1_p0/91683/3255_8130 -m130802_221257_00127_c100560082550000001823094812221334_s1_p0/107227/0_11478 -m130802_221257_00127_c100560082550000001823094812221334_s1_p0/107227/0_11478 -m130802_152357_00127_c100560082550000001823094812221331_s1_p0/110693/2471_8078 -m130719_032023_00127_c100533942550000001823079711101306_s1_p0/136555/841_2417 -m130713_074939_00127_c100534092550000001823079711101317_s1_p0/78928/6035_12741 -m130801_010302_00127_c100534282550000001823079711101367_s1_p0/7986/3750_6883 -m130715_224907_00127_c100533892550000001823079711101382_s1_p0/109884/1838_9147 -m130802_221257_00127_c100560082550000001823094812221334_s1_p0/107227/11521_16671 -m130801_214507_00127_c100541292550000001823084511241303_s1_p0/124103/5180_13071 -m130710_080722_00127_c100534572550000001823079711101386_s1_p0/71745/0_5061 -m130709_182622_00127_c100534572550000001823079711101380_s1_p0/6735/10558_12271 -m130618_212235_00127_c100506252550000001823078908081382_s1_p0/137300/4333_15030 -m130629_144357_00127_c100506292550000001823078908081340_s1_p0/108472/1334_2588 -m130705_075619_00127_c100506302550000001823078908081306_s1_p0/70458/4868_7564 -m130801_192848_00127_c100541292550000001823084511241302_s1_p0/110658/2666_9111 -m130708_221142_00127_c100534312550000001823079711101302_s1_p0/89311/847_3240 -m130708_221142_00127_c100534312550000001823079711101302_s1_p0/89311/0_800 -m130709_182622_00127_c100534572550000001823079711101380_s1_p0/6735/9487_10505 -m130710_080722_00127_c100534572550000001823079711101386_s1_p0/71745/0_5061 -m130801_192848_00127_c100541292550000001823084511241302_s1_p0/110658/2666_9111 -m130708_221142_00127_c100534312550000001823079711101302_s1_p0/89311/0_800 -m130712_224419_00127_c100534092550000001823079711101313_s1_p0/91498/0_1073 -m130630_042519_00127_c100506292550000001823078908081346_s1_p0/76600/592_2171 -m130712_224419_00127_c100534092550000001823079711101313_s1_p0/26628/0_7244 -m130718_133911_00127_c100533942550000001823079711101300_s1_p0/131419/9281_11109 -m130618_212235_00127_c100506252550000001823078908081382_s1_p0/4337/0_1034 -m130618_212235_00127_c100506252550000001823078908081382_s1_p0/4337/1079_2515 -m130709_050042_00127_c100534312550000001823079711101305_s1_p0/36272/0_4725 -m130712_224419_00127_c100534092550000001823079711101313_s1_p0/26628/0_7244 -m130802_152357_00127_c100560082550000001823094812221331_s1_p0/95871/0_851 -m130718_133911_00127_c100533942550000001823079711101300_s1_p0/131419/3332_6634 -m130709_050042_00127_c100534312550000001823079711101305_s1_p0/36272/0_4725 -m130620_060001_00127_c100506172550000001823078908081397_s1_p0/46163/493_3304 -m130620_060001_00127_c100506172550000001823078908081397_s1_p0/46163/3349_4816 -m130619_162201_00127_c100506172550000001823078908081391_s1_p0/49216/0_7467 -m130702_133558_00127_c100506222550000001823078908081310_s1_p0/157678/10582_10691 -m130802_152357_00127_c100560082550000001823094812221331_s1_p0/12780/0_3010 -m130801_214507_00127_c100541292550000001823084511241303_s1_p0/124103/5180_13071 -m130629_101243_00127_c100506212550000001823078908081327_s1_p0/56432/0_1362 -m130801_214507_00127_c100541292550000001823084511241303_s1_p0/124103/5180_13071 -m130628_225103_00127_c100506212550000001823078908081322_s1_p0/136301/3317_6025 -m130624_213730_00127_c100506232550000001823078908081302_s1_p0/25078/11397_13616 -m130630_063807_00127_c100506292550000001823078908081347_s1_p0/89769/6524_10116 -m130801_214507_00127_c100541292550000001823084511241303_s1_p0/124103/5180_13071 -m130714_020039_00127_c100534072550000001823079711101336_s1_p0/135664/0_3976 -m130714_020039_00127_c100534072550000001823079711101336_s1_p0/135664/0_3976 -m130711_194433_00127_c100533802550000001823079711101374_s1_p0/47940/3333_12812 -m130801_031922_00127_c100534262550000001823079711101386_s1_p0/83923/1514_8943 -m130711_194433_00127_c100533802550000001823079711101374_s1_p0/92006/3261_5874 -m130714_020039_00127_c100534072550000001823079711101336_s1_p0/135664/0_3976 -m130624_213730_00127_c100506232550000001823078908081302_s1_p0/25078/1546_4982 -m130709_182622_00127_c100534572550000001823079711101380_s1_p0/53415/8631_9309 -m130624_213730_00127_c100506232550000001823078908081302_s1_p0/25078/11397_13616 -m130712_044953_00127_c100506252550000001823078908081387_s1_p0/85761/1364_5344 -m130709_182622_00127_c100534572550000001823079711101380_s1_p0/53415/3592_8575 -m130802_130737_00127_c100560082550000001823094812221330_s1_p0/43084/0_3756 -m130801_053542_00127_c100534262550000001823079711101387_s1_p0/123215/15008_22563 -m130801_053542_00127_c100534262550000001823079711101387_s1_p0/67539/1211_10526 -m130711_194433_00127_c100533802550000001823079711101374_s1_p0/47940/3333_12812 -m130801_053542_00127_c100534262550000001823079711101387_s1_p0/67539/1211_10526 -m130618_233855_00127_c100506252550000001823078908081383_s1_p0/49385/0_5882 -m130731_224642_00127_c100534282550000001823079711101366_s1_p0/74903/1155_4251 -m130711_194433_00127_c100533802550000001823079711101374_s1_p0/92006/3261_5874 -m130802_130737_00127_c100560082550000001823094812221330_s1_p0/43084/0_3756 -m130801_145607_00127_c100541292550000001823084511241300_s1_p0/48794/756_5209 -m130801_053542_00127_c100534262550000001823079711101387_s1_p0/67539/1211_10526 -m130801_171227_00127_c100541292550000001823084511241301_s1_p0/21211/0_5940 -m130801_053542_00127_c100534262550000001823079711101387_s1_p0/134716/1651_12368 -m130705_032050_00127_c100506302550000001823078908081304_s1_p0/82641/10268_12926 -m130705_032050_00127_c100506302550000001823078908081304_s1_p0/82641/4328_7263 -m130705_032050_00127_c100506302550000001823078908081304_s1_p0/82641/1227_4282 -m130705_032050_00127_c100506302550000001823078908081304_s1_p0/82641/7307_10221 -m130628_000054_00127_c100506132550000001823078908081335_s1_p0/120208/6169_8202 -m130630_063807_00127_c100506292550000001823078908081347_s1_p0/89769/13780_14486 -m130726_192240_00127_c100534082550000001823079711101322_s1_p0/90276/0_10882 -m130714_041419_00127_c100534072550000001823079711101337_s1_p0/26736/1582_6683 -m130716_101047_00127_c100533892550000001823079711101387_s1_p0/25974/1556_4109 -m130625_021010_00127_c100506232550000001823078908081304_s1_p0/59105/0_2893 -m130803_024915_00127_c100560082550000001823094812221336_s1_p0/52286/4825_13868 -m130625_090038_00127_c100506232550000001823078908081307_s1_p0/2954/3264_5160 -m130712_001713_00127_c100533802550000001823079711101376_s1_p0/142028/711_6122 -m130712_155519_00127_c100534092550000001823079711101310_s1_p0/12269/0_3934 -m130711_194433_00127_c100533802550000001823079711101374_s1_p0/47940/3333_12812 -m130720_015950_00127_c100533902550000001823079711101347_s1_p0/121158/3803_4828 -m130711_194433_00127_c100533802550000001823079711101374_s1_p0/47940/3333_12812 -m130731_224642_00127_c100534282550000001823079711101366_s1_p0/74903/0_1109 -m130716_101047_00127_c100533892550000001823079711101387_s1_p0/25974/0_1515 -m130726_192240_00127_c100534082550000001823079711101322_s1_p0/90276/0_10882 -m130806_014706_00127_c100546662550000001823085811241363_s1_p0/122995/3907_7211 -m130806_014706_00127_c100546662550000001823085811241363_s1_p0/122995/7250_9299 -m130718_224433_00127_c100533942550000001823079711101304_s1_p0/136847/1756_12557 -m130626_224003_00127_c100506252550000001823078908081386_s1_p0/148018/2796_5234 -m130626_224003_00127_c100506252550000001823078908081386_s1_p0/148018/337_2749 -m130626_224003_00127_c100506252550000001823078908081386_s1_p0/148018/5279_7684 -m130712_224419_00127_c100534092550000001823079711101313_s1_p0/143774/6173_12899 -m130714_041419_00127_c100534072550000001823079711101337_s1_p0/26736/1582_6683 -m130805_185823_00127_c100546662550000001823085811241360_s1_p0/47061/725_3893 -m130711_194433_00127_c100533802550000001823079711101374_s1_p0/47940/3333_12812 -m130802_130737_00127_c100560082550000001823094812221330_s1_p0/48367/0_9612 -m130712_224419_00127_c100534092550000001823079711101313_s1_p0/143774/6173_12899 -m130718_224433_00127_c100533942550000001823079711101304_s1_p0/136847/1756_12557 -m130719_191050_00127_c100533902550000001823079711101344_s1_p0/95917/2007_9229 -m130705_075619_00127_c100506302550000001823078908081306_s1_p0/42858/0_925 -m130802_065027_00127_c100541292550000001823084511241307_s1_p0/47679/5369_7749 -m130802_065027_00127_c100541292550000001823084511241307_s1_p0/47679/0_536 -m130718_224433_00127_c100533942550000001823079711101304_s1_p0/136847/1756_12557 -m130806_014706_00127_c100546662550000001823085811241363_s1_p0/122995/3907_7211 -m130802_021747_00127_c100541292550000001823084511241305_s1_p0/50511/0_13712 -m130723_225927_00127_c100534262550000001823079711101384_s1_p0/56717/3187_6843 -m130705_075619_00127_c100506302550000001823078908081306_s1_p0/42858/0_925 -m130710_054811_00127_c100534572550000001823079711101385_s1_p0/161666/0_7140 -m130718_224433_00127_c100533942550000001823079711101304_s1_p0/136847/1756_12557 -m130802_065027_00127_c100541292550000001823084511241307_s1_p0/47679/7796_10185 -m130718_224433_00127_c100533942550000001823079711101304_s1_p0/75087/0_9923 -m130719_191050_00127_c100533902550000001823079711101344_s1_p0/95917/2007_9229 -m130806_014706_00127_c100546662550000001823085811241363_s1_p0/122995/823_3862 -m130626_224003_00127_c100506252550000001823078908081386_s1_p0/148018/0_290 -m130711_194433_00127_c100533802550000001823079711101374_s1_p0/47940/3333_12812 -m130802_065027_00127_c100541292550000001823084511241307_s1_p0/47679/5369_7749 -m130802_065027_00127_c100541292550000001823084511241307_s1_p0/47679/5369_7749 -m130801_053542_00127_c100534262550000001823079711101387_s1_p0/67539/1211_10526 -m130709_071930_00127_c100534312550000001823079711101306_s1_p0/90795/0_300 -m130801_053542_00127_c100534262550000001823079711101387_s1_p0/62300/4599_15475 -m130720_015950_00127_c100533902550000001823079711101347_s1_p0/109258/2102_3736 -m130620_012721_00127_c100506172550000001823078908081395_s1_p0/80238/1337_1904 -m130727_043028_00127_c100534082550000001823079711101326_s1_p0/160475/0_2814 -m130802_021747_00127_c100541292550000001823084511241305_s1_p0/127155/1279_9091 -m130802_043639_00127_c100541292550000001823084511241306_s1_p0/26660/0_6048 -m130710_102042_00127_c100534572550000001823079711101387_s1_p0/46675/1768_6270 -m130724_011547_00127_c100534262550000001823079711101385_s1_p0/97902/1556_9054 -m130709_225902_00127_c100534572550000001823079711101382_s1_p0/38163/0_1024 -m130709_204242_00127_c100534572550000001823079711101381_s1_p0/139004/0_2749 -m130727_043028_00127_c100534082550000001823079711101326_s1_p0/75798/0_2782 -m130710_033142_00127_c100534572550000001823079711101384_s1_p0/100157/0_919 -m130712_070931_00127_c100506142550000001823078908081326_s1_p0/146246/0_3906 -m130718_224433_00127_c100533942550000001823079711101304_s1_p0/136847/1756_12557 -m130726_033106_00127_c100534042550000001823079711101364_s1_p0/157081/1592_5302 -m130726_080606_00127_c100534042550000001823079711101366_s1_p0/113912/1230_8465 -m130801_053542_00127_c100534262550000001823079711101387_s1_p0/62300/4599_15475 -m130717_184917_00127_c100534262550000001823079711101380_s1_p0/159840/8140_9354 -m130715_224907_00127_c100533892550000001823079711101382_s1_p0/23232/3307_6138 -m130712_070931_00127_c100506142550000001823078908081326_s1_p0/146246/0_3906 -m130629_032343_00127_c100506212550000001823078908081324_s1_p0/120201/7588_7761 -m130719_212710_00127_c100533902550000001823079711101345_s1_p0/14422/952_15206 -m130715_203247_00127_c100533892550000001823079711101381_s1_p0/94029/1860_5889 -m130629_032343_00127_c100506212550000001823078908081324_s1_p0/54652/0_3534 -m130801_214507_00127_c100541292550000001823084511241303_s1_p0/92232/4383_10088 -m130717_184917_00127_c100534262550000001823079711101380_s1_p0/159840/7662_8088 -m130719_212710_00127_c100533902550000001823079711101345_s1_p0/14422/952_15206 -m130801_053542_00127_c100534262550000001823079711101387_s1_p0/62300/4599_15475 -m130719_010051_00127_c100533942550000001823079711101305_s1_p0/66093/4233_6107 -m130709_204242_00127_c100534572550000001823079711101381_s1_p0/141875/3349_5984 -m130718_133911_00127_c100533942550000001823079711101300_s1_p0/114276/11692_19003 -m130718_224433_00127_c100533942550000001823079711101304_s1_p0/136847/1756_12557 -m130801_053542_00127_c100534262550000001823079711101387_s1_p0/62300/4599_15475 -m130801_214507_00127_c100541292550000001823084511241303_s1_p0/92232/4383_10088 -m130801_214507_00127_c100541292550000001823084511241303_s1_p0/92232/4383_10088 -m130719_212710_00127_c100533902550000001823079711101345_s1_p0/14422/952_15206 -m130719_212710_00127_c100533902550000001823079711101345_s1_p0/14422/952_15206 -m130801_214507_00127_c100541292550000001823084511241303_s1_p0/92232/4383_10088 -m130718_224433_00127_c100533942550000001823079711101304_s1_p0/136847/1756_12557 -m130801_145607_00127_c100541292550000001823084511241300_s1_p0/74325/3104_3354 -m130801_214507_00127_c100541292550000001823084511241303_s1_p0/37846/2776_4278 -m130712_001713_00127_c100533802550000001823079711101376_s1_p0/32120/5586_9275 -m130719_212710_00127_c100533902550000001823079711101345_s1_p0/14422/952_15206 -m130802_000127_00127_c100541292550000001823084511241304_s1_p0/137136/0_8055 -m130629_213247_00127_c100506292550000001823078908081343_s1_p0/145343/0_3399 -m130801_214507_00127_c100541292550000001823084511241303_s1_p0/92232/4383_10088 -m130713_010039_00127_c100534092550000001823079711101314_s1_p0/98365/430_3698 -m130712_023333_00127_c100533802550000001823079711101377_s1_p0/41805/4695_17610 -m130626_224003_00127_c100506252550000001823078908081386_s1_p0/127152/13422_13544 -m130719_032023_00127_c100533942550000001823079711101306_s1_p0/143041/3103_7111 -m130629_075949_00127_c100506212550000001823078908081326_s1_p0/85786/0_16389 -m130719_191050_00127_c100533902550000001823079711101344_s1_p0/109462/4496_7750 -m130709_050042_00127_c100534312550000001823079711101305_s1_p0/112650/1983_3176 -m130629_010723_00127_c100506212550000001823078908081323_s1_p0/88899/0_1809 -m130711_172813_00127_c100533802550000001823079711101373_s1_p0/40454/0_7866 -m130719_122150_00127_c100533902550000001823079711101341_s1_p0/151685/1851_2115 -m130712_181139_00127_c100534092550000001823079711101311_s1_p0/44630/0_2640 -m130711_172813_00127_c100533802550000001823079711101373_s1_p0/11365/2522_6868 -m130709_050042_00127_c100534312550000001823079711101305_s1_p0/127994/7787_10825 -m130726_054726_00127_c100534042550000001823079711101365_s1_p0/25545/0_5966 -m130629_054003_00127_c100506212550000001823078908081325_s1_p0/153395/0_9106 -m130710_102042_00127_c100534572550000001823079711101387_s1_p0/42875/3352_3599 -m130716_053807_00127_c100533892550000001823079711101385_s1_p0/56850/0_4189 -m130805_233054_00127_c100546662550000001823085811241362_s1_p0/1814/0_6104 -m130719_122150_00127_c100533902550000001823079711101341_s1_p0/111304/0_5397 -m130726_033106_00127_c100534042550000001823079711101364_s1_p0/86449/3560_8448 -m130702_133558_00127_c100506222550000001823078908081310_s1_p0/43806/0_3976 -m130709_225902_00127_c100534572550000001823079711101382_s1_p0/42174/0_7312 -m130714_020039_00127_c100534072550000001823079711101336_s1_p0/88371/0_1626 -m130712_202759_00127_c100534092550000001823079711101312_s1_p0/130333/11503_11966 -m130716_053807_00127_c100533892550000001823079711101385_s1_p0/97914/0_2001 -m130723_182647_00127_c100534262550000001823079711101382_s1_p0/142762/0_1869 -m130709_225902_00127_c100534572550000001823079711101382_s1_p0/42174/0_7312 -m130626_224003_00127_c100506252550000001823078908081386_s1_p0/13424/4339_5960 -m130714_020039_00127_c100534072550000001823079711101336_s1_p0/87201/0_2674 -m130719_122150_00127_c100533902550000001823079711101341_s1_p0/131824/0_8609 -m130726_235520_00127_c100534082550000001823079711101324_s1_p0/33591/11035_13906 -m130715_203247_00127_c100533892550000001823079711101381_s1_p0/114225/1168_3590 -m130718_181151_00127_c100533942550000001823079711101302_s1_p0/143604/10961_14068 -m130718_155531_00127_c100533942550000001823079711101301_s1_p0/43198/14261_15622 -m130718_181151_00127_c100533942550000001823079711101302_s1_p0/143604/9398_10907 -m130719_143810_00127_c100533902550000001823079711101342_s1_p0/74362/0_9333 -m130710_080722_00127_c100534572550000001823079711101386_s1_p0/143668/12120_12258 -m130715_181627_00127_c100533892550000001823079711101380_s1_p0/108043/3298_9103 -m130716_032147_00127_c100533892550000001823079711101384_s1_p0/26212/5406_17201 -m130719_143810_00127_c100533902550000001823079711101342_s1_p0/74362/0_9333 -m130717_210534_00127_c100534262550000001823079711101381_s1_p0/14781/0_4313 -m130709_182622_00127_c100534572550000001823079711101380_s1_p0/57025/2938_9862 -m130719_143810_00127_c100533902550000001823079711101342_s1_p0/74362/0_9333 -m130801_145607_00127_c100541292550000001823084511241300_s1_p0/126004/4173_11762 -m130717_210534_00127_c100534262550000001823079711101381_s1_p0/14781/0_4313 -m130719_143810_00127_c100533902550000001823079711101342_s1_p0/74362/0_9333 -m130629_170007_00127_c100506292550000001823078908081341_s1_p0/151834/0_685 -m130726_235520_00127_c100534082550000001823079711101324_s1_p0/71007/2751_8222 -m130723_204307_00127_c100534262550000001823079711101383_s1_p0/24538/0_724 -m130705_101000_00127_c100506302550000001823078908081307_s1_p0/55081/0_2446 -m130710_054811_00127_c100534572550000001823079711101385_s1_p0/8502/5233_8231 -m130803_050157_00127_c100560082550000001823094812221337_s1_p0/11352/0_10686 -m130710_102042_00127_c100534572550000001823079711101387_s1_p0/25397/3155_6925 -m130629_075949_00127_c100506212550000001823078908081326_s1_p0/85918/0_937 -m130713_190859_00127_c100534072550000001823079711101333_s1_p0/42511/0_920 -m130720_015950_00127_c100533902550000001823079711101347_s1_p0/66354/0_606 -m130709_225902_00127_c100534572550000001823079711101382_s1_p0/126481/3061_6462 -m130705_010438_00127_c100506302550000001823078908081303_s1_p0/3124/0_2358 -m130626_224003_00127_c100506252550000001823078908081386_s1_p0/126876/0_2152 -m130802_065027_00127_c100541292550000001823084511241307_s1_p0/136817/8264_11316 -m130625_064552_00127_c100506232550000001823078908081306_s1_p0/70382/3198_5265 -m130705_032050_00127_c100506302550000001823078908081304_s1_p0/146433/0_6980 -m130705_010438_00127_c100506302550000001823078908081303_s1_p0/120699/7438_9114 -m130713_190859_00127_c100534072550000001823079711101333_s1_p0/124946/952_3476 -m130723_182647_00127_c100534262550000001823079711101382_s1_p0/16426/4785_10159 -m130723_182647_00127_c100534262550000001823079711101382_s1_p0/16426/4785_10159 -m130712_181139_00127_c100534092550000001823079711101311_s1_p0/114849/1058_2602 -m130628_000054_00127_c100506132550000001823078908081335_s1_p0/132845/9222_14649 -m130709_002802_00127_c100534312550000001823079711101303_s1_p0/121317/0_9014 -m130705_075619_00127_c100506302550000001823078908081306_s1_p0/130066/0_5208 -m130709_182622_00127_c100534572550000001823079711101380_s1_p0/144809/0_10450 -m130719_234626_00127_c100533902550000001823079711101346_s1_p0/137450/0_3777 -m130709_182622_00127_c100534572550000001823079711101380_s1_p0/6477/1311_12650 -m130720_015950_00127_c100533902550000001823079711101347_s1_p0/74472/0_2850 -m130802_065027_00127_c100541292550000001823084511241307_s1_p0/121563/0_6032 -m130719_234626_00127_c100533902550000001823079711101346_s1_p0/137450/0_3777 -m130709_182622_00127_c100534572550000001823079711101380_s1_p0/6477/1311_12650 -m130713_053616_00127_c100534092550000001823079711101316_s1_p0/163038/1139_2633 -m130710_102042_00127_c100534572550000001823079711101387_s1_p0/108657/1087_3232 -m130708_221142_00127_c100534312550000001823079711101302_s1_p0/141250/0_11312 -m130629_234907_00127_c100506292550000001823078908081344_s1_p0/100535/0_9759 -m130705_032050_00127_c100506302550000001823078908081304_s1_p0/38461/0_2789 -m130709_182622_00127_c100534572550000001823079711101380_s1_p0/6477/1311_12650 -m130712_023333_00127_c100533802550000001823079711101377_s1_p0/148984/0_4823 -m130712_023333_00127_c100533802550000001823079711101377_s1_p0/5493/1337_4705 -m130620_034626_00127_c100506172550000001823078908081396_s1_p0/61726/0_9356 -m130705_053709_00127_c100506302550000001823078908081305_s1_p0/79038/5668_7572 -m130709_071930_00127_c100534312550000001823079711101306_s1_p0/112704/3386_8783 -m130713_190859_00127_c100534072550000001823079711101333_s1_p0/140425/355_5602 -m130711_220053_00127_c100533802550000001823079711101375_s1_p0/109231/19575_23413 -m130712_023333_00127_c100533802550000001823079711101377_s1_p0/99062/3825_13603 -m130718_133911_00127_c100533942550000001823079711101300_s1_p0/135977/3534_15255 -m130718_224433_00127_c100533942550000001823079711101304_s1_p0/115444/0_8373 -m130719_165430_00127_c100533902550000001823079711101343_s1_p0/149702/616_7665 -m130709_182622_00127_c100534572550000001823079711101380_s1_p0/6477/1311_12650 -m130802_000127_00127_c100541292550000001823084511241304_s1_p0/25220/0_2468 -m130723_182647_00127_c100534262550000001823079711101382_s1_p0/129644/0_13649 -m130713_074939_00127_c100534092550000001823079711101317_s1_p0/140257/0_1327 -m130801_192848_00127_c100541292550000001823084511241302_s1_p0/131862/5394_11358 -m130705_101000_00127_c100506302550000001823078908081307_s1_p0/16025/2092_4902 -m130713_053616_00127_c100534092550000001823079711101316_s1_p0/122497/4636_8407 -m130719_053331_00127_c100533942550000001823079711101307_s1_p0/32713/4798_5926 -m130719_191050_00127_c100533902550000001823079711101344_s1_p0/57050/8737_14166 -m130801_171227_00127_c100541292550000001823084511241301_s1_p0/46259/6100_9549 -m130629_101243_00127_c100506212550000001823078908081327_s1_p0/27383/0_2121 -m130619_162201_00127_c100506172550000001823078908081391_s1_p0/31606/0_15691 -m130719_053331_00127_c100533942550000001823079711101307_s1_p0/32713/0_4749 -m130618_190615_00127_c100506252550000001823078908081381_s1_p0/142457/6142_9431 -m130720_015950_00127_c100533902550000001823079711101347_s1_p0/128322/397_5194 -m130702_133558_00127_c100506222550000001823078908081310_s1_p0/100470/5665_8589 -m130629_010723_00127_c100506212550000001823078908081323_s1_p0/92268/5431_6266 -m130719_053331_00127_c100533942550000001823079711101307_s1_p0/129618/5719_12143 -m130719_191050_00127_c100533902550000001823079711101344_s1_p0/57050/8737_14166 -m130711_220053_00127_c100533802550000001823079711101375_s1_p0/109231/15529_19519 -m130618_190615_00127_c100506252550000001823078908081381_s1_p0/142457/6142_9431 -m130716_032147_00127_c100533892550000001823079711101384_s1_p0/38491/1328_9305 -m130625_064552_00127_c100506232550000001823078908081306_s1_p0/128981/0_6775 -m130619_162201_00127_c100506172550000001823078908081391_s1_p0/31606/0_15691 -m130801_192848_00127_c100541292550000001823084511241302_s1_p0/33842/0_3941 -m130719_191050_00127_c100533902550000001823079711101344_s1_p0/57050/8737_14166 -m130801_192848_00127_c100541292550000001823084511241302_s1_p0/33842/0_3941 -m130720_015950_00127_c100533902550000001823079711101347_s1_p0/128322/397_5194 -m130716_075734_00127_c100533892550000001823079711101386_s1_p0/16274/0_11173 -m130720_015950_00127_c100533902550000001823079711101347_s1_p0/128322/397_5194 -m130710_080722_00127_c100534572550000001823079711101386_s1_p0/160193/502_1392 -m130716_032147_00127_c100533892550000001823079711101384_s1_p0/38491/1328_9305 -m130708_221142_00127_c100534312550000001823079711101302_s1_p0/22668/0_3127 -m130726_145000_00127_c100534082550000001823079711101320_s1_p0/86785/0_2018 -m130719_165430_00127_c100533902550000001823079711101343_s1_p0/80563/70_7448 -m130709_093322_00127_c100534312550000001823079711101307_s1_p0/36512/10604_11250 -m130711_220053_00127_c100533802550000001823079711101375_s1_p0/155713/489_11383 -m130719_053331_00127_c100533942550000001823079711101307_s1_p0/129618/5719_12143 -m130719_100547_00127_c100533902550000001823079711101340_s1_p0/75497/0_12424 -m130719_234626_00127_c100533902550000001823079711101346_s1_p0/28455/0_9781 -m130718_224433_00127_c100533942550000001823079711101304_s1_p0/80626/2967_6574 -m130805_233054_00127_c100546662550000001823085811241362_s1_p0/127753/0_3660 -m130718_202811_00127_c100533942550000001823079711101303_s1_p0/59570/5495_14325 -m130802_152357_00127_c100560082550000001823094812221331_s1_p0/61128/0_3299 -m130718_202811_00127_c100533942550000001823079711101303_s1_p0/81322/17095_20054 -m130709_024422_00127_c100534312550000001823079711101304_s1_p0/124350/1376_2015 -m130712_224419_00127_c100534092550000001823079711101313_s1_p0/25274/0_5128 -m130717_184917_00127_c100534262550000001823079711101380_s1_p0/50189/4166_8722 -m130719_010051_00127_c100533942550000001823079711101305_s1_p0/153116/0_98 -m130725_204206_00127_c100534042550000001823079711101361_s1_p0/141230/0_8073 -m130801_192848_00127_c100541292550000001823084511241302_s1_p0/81474/0_6593 -m130708_221142_00127_c100534312550000001823079711101302_s1_p0/127747/4057_10769 -m130710_080722_00127_c100534572550000001823079711101386_s1_p0/24804/2533_5640 -m130710_102042_00127_c100534572550000001823079711101387_s1_p0/102191/10866_12789 -m130801_214507_00127_c100541292550000001823084511241303_s1_p0/160687/6034_7540 -m130718_155531_00127_c100533942550000001823079711101301_s1_p0/25048/2030_8916 -m130714_020039_00127_c100534072550000001823079711101336_s1_p0/141041/5404_19073 -m130802_000127_00127_c100541292550000001823084511241304_s1_p0/107330/13460_16621 -m130726_192240_00127_c100534082550000001823079711101322_s1_p0/70560/4655_15726 -m130619_231101_00127_c100506172550000001823078908081394_s1_p0/129557/0_3877 -m130712_202759_00127_c100534092550000001823079711101312_s1_p0/117881/2415_8231 -m130727_043028_00127_c100534082550000001823079711101326_s1_p0/159684/0_6781 -m130708_221142_00127_c100534312550000001823079711101302_s1_p0/162103/0_9126 -m130709_225902_00127_c100534572550000001823079711101382_s1_p0/147954/5453_9896 -m130619_162201_00127_c100506172550000001823078908081391_s1_p0/31606/0_15691 -m130801_214507_00127_c100541292550000001823084511241303_s1_p0/130684/2450_7985 -m130712_224419_00127_c100534092550000001823079711101313_s1_p0/5977/3088_6325 -m130719_234626_00127_c100533902550000001823079711101346_s1_p0/28455/0_9781 -m130802_130737_00127_c100560082550000001823094812221330_s1_p0/51663/3348_3994 -m130630_042519_00127_c100506292550000001823078908081346_s1_p0/89539/0_636 -m130630_042519_00127_c100506292550000001823078908081346_s1_p0/92680/5568_7722 -m130710_054811_00127_c100534572550000001823079711101385_s1_p0/7060/3585_5330 -m130625_064552_00127_c100506232550000001823078908081306_s1_p0/94971/2046_14055 -m130629_234907_00127_c100506292550000001823078908081344_s1_p0/55130/8040_14997 -m130711_172813_00127_c100533802550000001823079711101373_s1_p0/116745/777_7835 -m130711_172813_00127_c100533802550000001823079711101373_s1_p0/116745/777_7835 -m130625_064552_00127_c100506232550000001823078908081306_s1_p0/94971/2046_14055 -m130711_172813_00127_c100533802550000001823079711101373_s1_p0/116745/777_7835 -m130802_130737_00127_c100560082550000001823094812221330_s1_p0/109884/711_6572 -m130719_100547_00127_c100533902550000001823079711101340_s1_p0/75497/0_12424 -m130710_080722_00127_c100534572550000001823079711101386_s1_p0/126403/0_6488 -m130726_033106_00127_c100534042550000001823079711101364_s1_p0/63194/0_2873 -m130630_042519_00127_c100506292550000001823078908081346_s1_p0/101380/1486_3716 -m130719_100547_00127_c100533902550000001823079711101340_s1_p0/75497/0_12424 -m130731_224642_00127_c100534282550000001823079711101366_s1_p0/29960/0_3982 -m130711_172813_00127_c100533802550000001823079711101373_s1_p0/116745/777_7835 -m130709_024422_00127_c100534312550000001823079711101304_s1_p0/108298/0_3088 -m130709_225902_00127_c100534572550000001823079711101382_s1_p0/34710/1209_11237 -m130625_064552_00127_c100506232550000001823078908081306_s1_p0/94971/2046_14055 -m130719_143810_00127_c100533902550000001823079711101342_s1_p0/117590/0_3851 -m130726_011446_00127_c100534042550000001823079711101363_s1_p0/73115/11983_12084 -m130726_170620_00127_c100534082550000001823079711101321_s1_p0/16503/5146_7658 -m130718_155531_00127_c100533942550000001823079711101301_s1_p0/51777/193_2629 -m130710_033142_00127_c100534572550000001823079711101384_s1_p0/97142/0_4398 -m130716_010527_00127_c100533892550000001823079711101383_s1_p0/39621/325_3597 -m130716_010527_00127_c100533892550000001823079711101383_s1_p0/39621/7027_10569 -m130801_171227_00127_c100541292550000001823084511241301_s1_p0/123279/0_11274 -m130619_140541_00127_c100506172550000001823078908081390_s1_p0/97572/0_4357 -m130619_205452_00127_c100506172550000001823078908081393_s1_p0/102001/477_2960 -m130718_181151_00127_c100533942550000001823079711101302_s1_p0/106797/0_432 -m130725_204206_00127_c100534042550000001823079711101361_s1_p0/6988/1346_1555 -m130628_000054_00127_c100506132550000001823078908081335_s1_p0/83992/6792_10018 -m130716_101047_00127_c100533892550000001823079711101387_s1_p0/67883/2778_11002 -m130620_012721_00127_c100506172550000001823078908081395_s1_p0/127594/459_6633 -m130710_011522_00127_c100534572550000001823079711101383_s1_p0/148369/269_639 -m130716_010527_00127_c100533892550000001823079711101383_s1_p0/39621/10616_14111 -m130702_202512_00127_c100506222550000001823078908081313_s1_p0/50315/3786_14979 -m130709_225902_00127_c100534572550000001823079711101382_s1_p0/34710/921_1172 -m130708_221142_00127_c100534312550000001823079711101302_s1_p0/126168/0_5912 -m130702_133558_00127_c100506222550000001823078908081310_s1_p0/141611/17508_18881 -m130718_155531_00127_c100533942550000001823079711101301_s1_p0/1730/0_7297 -m130625_064552_00127_c100506232550000001823078908081306_s1_p0/144263/1789_8141 -m130628_225103_00127_c100506212550000001823078908081322_s1_p0/147010/0_258 -m130719_010051_00127_c100533942550000001823079711101305_s1_p0/143488/2376_7215 -m130719_010051_00127_c100533942550000001823079711101305_s1_p0/143488/2376_7215 -m130716_075734_00127_c100533892550000001823079711101386_s1_p0/101804/0_4354 -m130726_102006_00127_c100534042550000001823079711101367_s1_p0/23046/0_6292 -m130716_010527_00127_c100533892550000001823079711101383_s1_p0/39621/7027_10569 -m130709_050042_00127_c100534312550000001823079711101305_s1_p0/134033/0_296 -m130726_102006_00127_c100534042550000001823079711101367_s1_p0/23046/6338_7887 -m130801_145607_00127_c100541292550000001823084511241300_s1_p0/38090/4483_5931 -m130716_101047_00127_c100533892550000001823079711101387_s1_p0/67883/2778_11002 -m130716_010527_00127_c100533892550000001823079711101383_s1_p0/39621/10616_14111 -m130716_010527_00127_c100533892550000001823079711101383_s1_p0/39621/0_277 -m130626_224003_00127_c100506252550000001823078908081386_s1_p0/104436/7168_12239 -m130626_224003_00127_c100506252550000001823078908081386_s1_p0/104436/1611_7122 -m130702_155219_00127_c100506222550000001823078908081311_s1_p0/124100/0_5901 -m130703_031724_00127_c100506222550000001823078908081316_s1_p0/19493/6775_8652 -m130719_212710_00127_c100533902550000001823079711101345_s1_p0/14221/6528_9393 -m130718_155531_00127_c100533942550000001823079711101301_s1_p0/97240/257_6232 -m130712_155519_00127_c100534092550000001823079711101310_s1_p0/28737/1155_3632 -m130709_225902_00127_c100534572550000001823079711101382_s1_p0/162647/0_1824 -m130705_032050_00127_c100506302550000001823078908081304_s1_p0/93278/3756_6162 -m130705_032050_00127_c100506302550000001823078908081304_s1_p0/93278/0_3714 -m130803_002917_00127_c100560082550000001823094812221335_s1_p0/41971/17731_18184 -m130726_080606_00127_c100534042550000001823079711101366_s1_p0/151202/2375_3283 -m130625_064552_00127_c100506232550000001823078908081306_s1_p0/131962/0_3459 -m130709_024422_00127_c100534312550000001823079711101304_s1_p0/123837/0_1079 -m130712_181139_00127_c100534092550000001823079711101311_s1_p0/130765/16568_18872 -m130702_180854_00127_c100506222550000001823078908081312_s1_p0/69115/7476_9306 -m130702_180854_00127_c100506222550000001823078908081312_s1_p0/69115/11204_12973 -m130702_180854_00127_c100506222550000001823078908081312_s1_p0/69115/3623_5481 -m130702_180854_00127_c100506222550000001823078908081312_s1_p0/69115/13013_14810 -m130702_180854_00127_c100506222550000001823078908081312_s1_p0/69115/1651_3579 -m130702_180854_00127_c100506222550000001823078908081312_s1_p0/69115/5529_7433 -m130702_180854_00127_c100506222550000001823078908081312_s1_p0/69115/9346_11166 -m130709_024422_00127_c100534312550000001823079711101304_s1_p0/123837/0_1079 -m130718_155531_00127_c100533942550000001823079711101301_s1_p0/97240/257_6232 -m130715_181627_00127_c100533892550000001823079711101380_s1_p0/104637/0_12315 -m130619_140541_00127_c100506172550000001823078908081390_s1_p0/68864/688_3415 -m130715_181627_00127_c100533892550000001823079711101380_s1_p0/104637/12349_15830 -m130718_155531_00127_c100533942550000001823079711101301_s1_p0/1730/0_7297 -m130624_213730_00127_c100506232550000001823078908081302_s1_p0/48666/5629_6656 -m130713_074939_00127_c100534092550000001823079711101317_s1_p0/42395/379_3883 -m130702_180854_00127_c100506222550000001823078908081312_s1_p0/69115/0_1610 -m130713_143619_00127_c100534072550000001823079711101331_s1_p0/14610/3635_9993 -m130713_074939_00127_c100534092550000001823079711101317_s1_p0/42395/0_332 -m130718_155531_00127_c100533942550000001823079711101301_s1_p0/1730/0_7297 -m130625_042630_00127_c100506232550000001823078908081305_s1_p0/132581/0_674 -m130719_100547_00127_c100533902550000001823079711101340_s1_p0/79091/4148_8866 -m130802_000127_00127_c100541292550000001823084511241304_s1_p0/96393/0_1237 -m130726_235520_00127_c100534082550000001823079711101324_s1_p0/39493/555_2757 -m130726_235520_00127_c100534082550000001823079711101324_s1_p0/39493/555_2757 -m130630_042519_00127_c100506292550000001823078908081346_s1_p0/158823/2260_6806 -m130803_024915_00127_c100560082550000001823094812221336_s1_p0/46436/3457_6402 -m130715_181627_00127_c100533892550000001823079711101380_s1_p0/104637/12349_15830 -m130702_202512_00127_c100506222550000001823078908081313_s1_p0/50315/3786_14979 -m130715_181627_00127_c100533892550000001823079711101380_s1_p0/104637/0_12315 -m130624_235350_00127_c100506232550000001823078908081303_s1_p0/94391/0_1316 -m130618_190615_00127_c100506252550000001823078908081381_s1_p0/71198/7921_17119 -m130718_155531_00127_c100533942550000001823079711101301_s1_p0/1730/0_7297 -m130715_181627_00127_c100533892550000001823079711101380_s1_p0/104637/0_12315 -m130726_192240_00127_c100534082550000001823079711101322_s1_p0/157159/0_4674 -m130801_214507_00127_c100541292550000001823084511241303_s1_p0/147978/0_1292 -m130709_182622_00127_c100534572550000001823079711101380_s1_p0/87060/7581_11148 -m130725_182546_00127_c100534042550000001823079711101360_s1_p0/50065/259_10177 -m130630_063807_00127_c100506292550000001823078908081347_s1_p0/34076/1830_2197 -m130803_024915_00127_c100560082550000001823094812221336_s1_p0/2442/2663_6172 -m130624_213730_00127_c100506232550000001823078908081302_s1_p0/6571/4819_13348 -m130713_053616_00127_c100534092550000001823079711101316_s1_p0/128237/105_2176 -m130803_024915_00127_c100560082550000001823094812221336_s1_p0/2442/2663_6172 -m130725_182546_00127_c100534042550000001823079711101360_s1_p0/50065/259_10177 -m130713_074939_00127_c100534092550000001823079711101317_s1_p0/144470/6051_8091 -m130713_074939_00127_c100534092550000001823079711101317_s1_p0/144470/0_6006 -m130719_100547_00127_c100533902550000001823079711101340_s1_p0/125726/4261_6551 -m130719_100547_00127_c100533902550000001823079711101340_s1_p0/125726/6602_8875 -m130718_202811_00127_c100533942550000001823079711101303_s1_p0/27237/1378_2356 -m130801_214507_00127_c100541292550000001823084511241303_s1_p0/136992/2912_6748 -m130702_180854_00127_c100506222550000001823078908081312_s1_p0/67040/1083_2725 -m130710_102042_00127_c100534572550000001823079711101387_s1_p0/154917/14196_15103 -m130710_102042_00127_c100534572550000001823079711101387_s1_p0/154917/10056_14148 -m130710_102042_00127_c100534572550000001823079711101387_s1_p0/154917/5951_10011 -m130710_102042_00127_c100534572550000001823079711101387_s1_p0/154917/1879_5904 -m130803_024915_00127_c100560082550000001823094812221336_s1_p0/2442/2663_6172 -m130726_235520_00127_c100534082550000001823079711101324_s1_p0/43713/1168_3575 -m130725_182546_00127_c100534042550000001823079711101360_s1_p0/50065/259_10177 -m130628_022025_00127_c100506132550000001823078908081336_s1_p0/141349/1954_4400 -m130715_181627_00127_c100533892550000001823079711101380_s1_p0/104637/0_12315 -m130703_031724_00127_c100506222550000001823078908081316_s1_p0/136261/0_9684 -m130719_122150_00127_c100533902550000001823079711101341_s1_p0/115473/1105_4513 -m130702_180854_00127_c100506222550000001823078908081312_s1_p0/67040/0_1036 -m130712_023333_00127_c100533802550000001823079711101377_s1_p0/21877/8447_12958 -m130714_041419_00127_c100534072550000001823079711101337_s1_p0/111690/6087_8834 -m130715_181627_00127_c100533892550000001823079711101380_s1_p0/114471/0_3150 -m130715_181627_00127_c100533892550000001823079711101380_s1_p0/114471/3194_7954 -m130714_041419_00127_c100534072550000001823079711101337_s1_p0/111690/11807_12926 -m130801_031922_00127_c100534262550000001823079711101386_s1_p0/132016/2660_5909 -m130709_093322_00127_c100534312550000001823079711101307_s1_p0/12835/12107_12660 -m130726_080606_00127_c100534042550000001823079711101366_s1_p0/11414/0_5734 -m130710_011522_00127_c100534572550000001823079711101383_s1_p0/14148/5515_8507 -m130705_075619_00127_c100506302550000001823078908081306_s1_p0/126845/681_3220 -m130717_210534_00127_c100534262550000001823079711101381_s1_p0/27046/0_750 -m130703_053047_00127_c100506222550000001823078908081317_s1_p0/30291/3562_7526 -m130719_191050_00127_c100533902550000001823079711101344_s1_p0/54246/0_898 -m130710_033142_00127_c100534572550000001823079711101384_s1_p0/100735/4097_5949 -m130710_033142_00127_c100534572550000001823079711101384_s1_p0/100735/7861_9825 -m130710_033142_00127_c100534572550000001823079711101384_s1_p0/100735/5993_7817 -m130801_171227_00127_c100541292550000001823084511241301_s1_p0/152482/0_3116 -m130731_224642_00127_c100534282550000001823079711101366_s1_p0/27016/6674_9688 -m130803_024915_00127_c100560082550000001823094812221336_s1_p0/3453/0_1970 -m130713_122008_00127_c100534072550000001823079711101330_s1_p0/106519/3650_8791 -m130719_165430_00127_c100533902550000001823079711101343_s1_p0/41925/3211_4626 -m130618_190615_00127_c100506252550000001823078908081381_s1_p0/34960/5153_12890 -m130719_191050_00127_c100533902550000001823079711101344_s1_p0/54246/947_1266 -m130710_102042_00127_c100534572550000001823079711101387_s1_p0/154917/0_1833 -m130712_044953_00127_c100506252550000001823078908081387_s1_p0/35190/0_2567 -m130712_001713_00127_c100533802550000001823079711101376_s1_p0/17873/0_8194 -m130709_182622_00127_c100534572550000001823079711101380_s1_p0/43628/3881_7008 -m130718_224433_00127_c100533942550000001823079711101304_s1_p0/161547/1877_3240 -m130712_224419_00127_c100534092550000001823079711101313_s1_p0/19954/5076_13562 -m130629_144357_00127_c100506292550000001823078908081340_s1_p0/114813/10151_12767 -m130703_005756_00127_c100506222550000001823078908081315_s1_p0/28098/0_1897 -m130718_181151_00127_c100533942550000001823079711101302_s1_p0/96948/0_1956 -m130714_041419_00127_c100534072550000001823079711101337_s1_p0/111690/2317_3253 -m130716_032147_00127_c100533892550000001823079711101384_s1_p0/11640/3410_5934 -m130715_181627_00127_c100533892550000001823079711101380_s1_p0/104637/0_12315 -m130702_180854_00127_c100506222550000001823078908081312_s1_p0/146603/483_808 -m130715_224907_00127_c100533892550000001823079711101382_s1_p0/35731/3285_5787 -m130712_155519_00127_c100534092550000001823079711101310_s1_p0/13805/0_6503 -m130630_020527_00127_c100506292550000001823078908081345_s1_p0/35289/614_1749 -m130624_170450_00127_c100506232550000001823078908081300_s1_p0/63807/21848_23551 -m130705_075619_00127_c100506302550000001823078908081306_s1_p0/126845/0_634 -m130618_233855_00127_c100506252550000001823078908081383_s1_p0/103000/5844_5913 -m130630_063807_00127_c100506292550000001823078908081347_s1_p0/65087/2816_4577 -m130630_063807_00127_c100506292550000001823078908081347_s1_p0/65087/0_2770 -m130726_054726_00127_c100534042550000001823079711101365_s1_p0/128381/3709_5786 diff --git a/src/htslib-1.18/htscodecs/tests/names/05.names b/src/htslib-1.18/htscodecs/tests/names/05.names deleted file mode 100644 index b8b9603..0000000 --- a/src/htslib-1.18/htscodecs/tests/names/05.names +++ /dev/null @@ -1,1000 +0,0 @@ -HS25_09827:2:2215:4133:22216#49 -HS25_09827:2:1212:15822:94146#49 -HS25_09827:2:1209:9304:17097#49 -HS25_09827:2:2309:1998:52482#49 -HS25_09827:2:2311:5542:13577#49 -HS25_09827:2:1305:16063:74761#49 -HS25_09827:2:1307:5554:63318#49 -HS25_09827:2:2116:19707:49600#49 -HS25_09827:2:2211:10488:84065#49 -HS25_09827:2:2311:16620:47618#49 -HS25_09827:2:1204:18768:54085#49 -HS25_09827:2:2212:5165:20821#49 -HS25_09827:2:2113:18908:75092#49 -HS25_09827:2:1303:2431:94090#49 -HS25_09827:2:2111:10072:81092#49 -HS25_09827:2:2307:12053:87715#49 -HS25_09827:2:2112:9111:77934#49 -HS25_09827:2:1305:15909:64318#49 -HS25_09827:2:1309:19751:66473#49 -HS25_09827:2:2215:4133:22216#49 -HS25_09827:2:2311:5542:13577#49 -HS25_09827:2:1204:20312:89698#49 -HS25_09827:2:1212:12994:23561#49 -HS25_09827:2:1303:10406:77761#49 -HS25_09827:2:2212:4945:69540#49 -HS25_09827:2:1316:4695:99651#49 -HS25_09827:2:2109:20170:68804#49 -HS25_09827:2:1209:8116:95615#49 -HS25_09827:2:2116:19707:49600#49 -HS25_09827:2:1308:18996:43310#49 -HS25_09827:2:2102:11274:80442#49 -HS25_09827:2:2109:12941:31311#49 -HS25_09827:2:2209:7198:31083#49 -HS25_09827:2:2109:17221:31733#49 -HS25_09827:2:1209:8116:95615#49 -HS25_09827:2:2201:10447:94593#49 -HS25_09827:2:1209:9304:17097#49 -HS25_09827:2:2309:1998:52482#49 -HS25_09827:2:1308:18996:43310#49 -HS25_09827:2:1303:10406:77761#49 -HS25_09827:2:2102:11274:80442#49 -HS25_09827:2:1316:4695:99651#49 -HS25_09827:2:2109:3588:76844#49 -HS25_09827:2:1303:2431:94090#49 -HS25_09827:2:1303:17579:94862#49 -HS25_09827:2:1212:12994:23561#49 -HS25_09827:2:2307:12053:87715#49 -HS25_09827:2:1212:15822:94146#49 -HS25_09827:2:1305:15909:64318#49 -HS25_09827:2:2111:10072:81092#49 -HS25_09827:2:1306:1453:37679#49 -HS25_09827:2:2209:7198:31083#49 -HS25_09827:2:2212:4945:69540#49 -HS25_09827:2:2215:4244:93529#49 -HS25_09827:2:1309:19751:66473#49 -HS25_09827:2:2109:17221:31733#49 -HS25_09827:2:2109:12941:31311#49 -HS25_09827:2:1309:3834:12348#49 -HS25_09827:2:2309:15700:86999#49 -HS25_09827:2:2211:18911:39071#49 -HS25_09827:2:2307:10276:43922#49 -HS25_09827:2:1308:8848:75718#49 -HS25_09827:2:1313:13872:81211#49 -HS25_09827:2:1306:20208:56968#49 -HS25_09827:2:1313:13872:81211#49 -HS25_09827:2:1211:11693:47536#49 -HS25_09827:2:2314:17167:19250#49 -HS25_09827:2:1311:10185:38164#49 -HS25_09827:2:1309:3834:12348#49 -HS25_09827:2:1203:8634:55226#49 -HS25_09827:2:2309:15700:86999#49 -HS25_09827:2:2210:19541:75169#49 -HS25_09827:2:2211:18911:39071#49 -HS25_09827:2:1308:8848:75718#49 -HS25_09827:2:2211:17436:48551#49 -HS25_09827:2:2112:13679:96189#49 -HS25_09827:2:2307:10276:43922#49 -HS25_09827:2:2302:17388:87449#49 -HS25_09827:2:2213:4224:68651#49 -HS25_09827:2:1306:20208:56968#49 -HS25_09827:2:1311:10185:38164#49 -HS25_09827:2:2314:17167:19250#49 -HS25_09827:2:1203:8634:55226#49 -HS25_09827:2:2311:10407:85792#49 -HS25_09827:2:2301:20380:93634#49 -HS25_09827:2:1215:11520:29546#49 -HS25_09827:2:2210:19541:75169#49 -HS25_09827:2:2108:7845:86942#49 -HS25_09827:2:2211:17436:48551#49 -HS25_09827:2:2302:17388:87449#49 -HS25_09827:2:2108:14381:19629#49 -HS25_09827:2:2112:13679:96189#49 -HS25_09827:2:2213:4224:68651#49 -HS25_09827:2:1212:13045:20838#49 -HS25_09827:2:2216:13841:44405#49 -HS25_09827:2:2311:2042:9297#49 -HS25_09827:2:1303:15466:68125#49 -HS25_09827:2:2311:10407:85792#49 -HS25_09827:2:2301:20380:93634#49 -HS25_09827:2:2216:13841:44405#49 -HS25_09827:2:2302:10639:71974#49 -HS25_09827:2:2109:9464:80087#49 -HS25_09827:2:1215:11520:29546#49 -HS25_09827:2:1303:12790:9734#49 -HS25_09827:2:1212:13045:20838#49 -HS25_09827:2:2108:7845:86942#49 -HS25_09827:2:2302:10639:71974#49 -HS25_09827:2:1202:17256:91495#49 -HS25_09827:2:1309:16069:100966#49 -HS25_09827:2:2108:14381:19629#49 -HS25_09827:2:1303:15466:68125#49 -HS25_09827:2:2105:4448:26372#49 -HS25_09827:2:2311:2042:9297#49 -HS25_09827:2:2312:4111:83859#49 -HS25_09827:2:2109:9464:80087#49 -HS25_09827:2:1303:6619:46139#49 -HS25_09827:2:1303:12790:9734#49 -HS25_09827:2:1206:19055:30331#49 -HS25_09827:2:1202:17256:91495#49 -HS25_09827:2:2203:11447:3177#49 -HS25_09827:2:2313:16810:86560#49 -HS25_09827:2:1206:11872:5127#49 -HS25_09827:2:2105:4448:26372#49 -HS25_09827:2:1314:3399:9965#49 -HS25_09827:2:2102:8028:12629#49 -HS25_09827:2:1305:6415:54092#49 -HS25_09827:2:1303:6619:46139#49 -HS25_09827:2:1206:19055:30331#49 -HS25_09827:2:2313:16810:86560#49 -HS25_09827:2:2214:18450:48073#49 -HS25_09827:2:1206:11872:5127#49 -HS25_09827:2:2113:19983:64397#49 -HS25_09827:2:2106:10497:88947#49 -HS25_09827:2:2210:20360:75764#49 -HS25_09827:2:1314:3399:9965#49 -HS25_09827:2:2201:18483:22145#49 -HS25_09827:2:2206:12169:77506#49 -HS25_09827:2:2311:15041:36605#49 -HS25_09827:2:1305:6415:54092#49 -HS25_09827:2:2212:12754:68167#49 -HS25_09827:2:2113:2345:52945#49 -HS25_09827:2:2307:14473:68395#49 -HS25_09827:2:2212:8830:20912#49 -HS25_09827:2:2311:15041:36605#49 -HS25_09827:2:2312:6742:68500#49 -HS25_09827:2:2206:8765:35023#49 -HS25_09827:2:2213:7742:93893#49 -HS25_09827:2:2214:18450:48073#49 -HS25_09827:2:2106:10497:88947#49 -HS25_09827:2:2210:20360:75764#49 -HS25_09827:2:2111:12096:9980#49 -HS25_09827:2:2113:19983:64397#49 -HS25_09827:2:2201:18483:22145#49 -HS25_09827:2:2206:8765:35023#49 -HS25_09827:2:2206:12169:77506#49 -HS25_09827:2:1214:3359:3557#49 -HS25_09827:2:2212:12754:68167#49 -HS25_09827:2:2312:6742:68500#49 -HS25_09827:2:2307:14473:68395#49 -HS25_09827:2:2113:2345:52945#49 -HS25_09827:2:2213:7742:93893#49 -HS25_09827:2:2212:8830:20912#49 -HS25_09827:2:2313:17952:63221#49 -HS25_09827:2:2111:12096:9980#49 -HS25_09827:2:1212:12539:99884#49 -HS25_09827:2:1204:2538:7866#49 -HS25_09827:2:1215:17819:11931#49 -HS25_09827:2:1204:10767:41389#49 -HS25_09827:2:2306:14879:43078#49 -HS25_09827:2:2208:3891:6867#49 -HS25_09827:2:1315:6254:42288#49 -HS25_09827:2:2209:19447:73505#49 -HS25_09827:2:2313:19361:87261#49 -HS25_09827:2:1212:9711:9590#49 -HS25_09827:2:2313:17952:63221#49 -HS25_09827:2:2212:9988:20554#49 -HS25_09827:2:1203:17564:9657#49 -HS25_09827:2:1212:12539:99884#49 -HS25_09827:2:1204:2538:7866#49 -HS25_09827:2:2105:5404:53491#49 -HS25_09827:2:1207:16847:3501#49 -HS25_09827:2:1210:2898:58719#49 -HS25_09827:2:2101:5685:74391#49 -HS25_09827:2:1312:11238:47935#49 -HS25_09827:2:1311:2793:89639#49 -HS25_09827:2:1215:17819:11931#49 -HS25_09827:2:2306:14879:43078#49 -HS25_09827:2:1315:6254:42288#49 -HS25_09827:2:2112:20357:41896#49 -HS25_09827:2:1204:10767:41389#49 -HS25_09827:2:2208:3891:6867#49 -HS25_09827:2:2313:19361:87261#49 -HS25_09827:2:2114:19469:96206#49 -HS25_09827:2:2209:19447:73505#49 -HS25_09827:2:2206:13412:45447#49 -HS25_09827:2:2212:9988:20554#49 -HS25_09827:2:1212:9711:9590#49 -HS25_09827:2:1204:20902:21615#49 -HS25_09827:2:2308:9088:53281#49 -HS25_09827:2:2105:5404:53491#49 -HS25_09827:2:1203:17564:9657#49 -HS25_09827:2:1207:16847:3501#49 -HS25_09827:2:1210:2898:58719#49 -HS25_09827:2:2101:5685:74391#49 -HS25_09827:2:2106:18086:14185#49 -HS25_09827:2:2112:20357:41896#49 -HS25_09827:2:2110:10293:100027#49 -HS25_09827:2:1312:11238:47935#49 -HS25_09827:2:2114:19469:96206#49 -HS25_09827:2:2206:13412:45447#49 -HS25_09827:2:1214:2285:69133#49 -HS25_09827:2:1310:8640:93999#49 -HS25_09827:2:1204:20902:21615#49 -HS25_09827:2:2204:19283:61389#49 -HS25_09827:2:2308:9088:53281#49 -HS25_09827:2:1313:11154:53028#49 -HS25_09827:2:2106:18086:14185#49 -HS25_09827:2:2209:14947:38514#49 -HS25_09827:2:1214:2285:69133#49 -HS25_09827:2:2307:14023:72288#49 -HS25_09827:2:2113:2658:52358#49 -HS25_09827:2:2110:10293:100027#49 -HS25_09827:2:2204:19283:61389#49 -HS25_09827:2:1310:8640:93999#49 -HS25_09827:2:2309:5424:100707#49 -HS25_09827:2:1202:6939:19107#49 -HS25_09827:2:1311:4237:12448#49 -HS25_09827:2:1208:16944:65053#49 -HS25_09827:2:2209:14947:38514#49 -HS25_09827:2:2108:16936:49104#49 -HS25_09827:2:1305:17922:21018#49 -HS25_09827:2:2113:2658:52358#49 -HS25_09827:2:2307:14023:72288#49 -HS25_09827:2:2101:11863:80235#49 -HS25_09827:2:1205:16193:10228#49 -HS25_09827:2:1210:10009:91467#49 -HS25_09827:2:2309:5424:100707#49 -HS25_09827:2:2204:17302:14746#49 -HS25_09827:2:1201:21083:51862#49 -HS25_09827:2:1202:6939:19107#49 -HS25_09827:2:1313:4623:39826#49 -HS25_09827:2:1311:4237:12448#49 -HS25_09827:2:2214:20192:41103#49 -HS25_09827:2:1208:16944:65053#49 -HS25_09827:2:2204:17302:14746#49 -HS25_09827:2:2101:11863:80235#49 -HS25_09827:2:2208:9732:56894#49 -HS25_09827:2:2108:16936:49104#49 -HS25_09827:2:1305:17922:21018#49 -HS25_09827:2:1205:16193:10228#49 -HS25_09827:2:1210:10009:91467#49 -HS25_09827:2:1308:9605:97421#49 -HS25_09827:2:1215:4057:37510#49 -HS25_09827:2:1201:21083:51862#49 -HS25_09827:2:2214:20192:41103#49 -HS25_09827:2:2301:13298:41476#49 -HS25_09827:2:2211:1356:79667#49 -HS25_09827:2:1201:7965:8858#49 -HS25_09827:2:2208:9732:56894#49 -HS25_09827:2:2207:13184:38521#49 -HS25_09827:2:1308:9605:97421#49 -HS25_09827:2:2101:5420:70014#49 -HS25_09827:2:1215:4057:37510#49 -HS25_09827:2:2314:5936:86287#49 -HS25_09827:2:1205:9397:57189#49 -HS25_09827:2:2306:9478:94892#49 -HS25_09827:2:2115:14391:4234#49 -HS25_09827:2:2211:1356:79667#49 -HS25_09827:2:2310:19222:15649#49 -HS25_09827:2:2307:10940:69805#49 -HS25_09827:2:2301:13298:41476#49 -HS25_09827:2:1205:7171:18240#49 -HS25_09827:2:2106:21321:3081#49 -HS25_09827:2:1201:7965:8858#49 -HS25_09827:2:2205:5389:33730#49 -HS25_09827:2:1313:19679:65934#49 -HS25_09827:2:2207:13184:38521#49 -HS25_09827:2:1311:7330:49915#49 -HS25_09827:2:1312:15067:85485#49 -HS25_09827:2:2314:5936:86287#49 -HS25_09827:2:2101:5420:70014#49 -HS25_09827:2:1310:14138:50339#49 -HS25_09827:2:1205:9397:57189#49 -HS25_09827:2:2306:9478:94892#49 -HS25_09827:2:2106:21321:3081#49 -HS25_09827:2:2301:14468:79810#49 -HS25_09827:2:1211:12643:10033#49 -HS25_09827:2:2307:10940:69805#49 -HS25_09827:2:2115:14391:4234#49 -HS25_09827:2:1305:10140:86325#49 -HS25_09827:2:2205:5389:33730#49 -HS25_09827:2:2204:8915:68371#49 -HS25_09827:2:2310:19222:15649#49 -HS25_09827:2:1311:7330:49915#49 -HS25_09827:2:1205:7171:18240#49 -HS25_09827:2:1313:19679:65934#49 -HS25_09827:2:1312:15067:85485#49 -HS25_09827:2:2102:18827:43055#49 -HS25_09827:2:2104:19240:43505#49 -HS25_09827:2:1210:20908:63098#49 -HS25_09827:2:2306:12066:84349#49 -HS25_09827:2:2104:11907:27534#49 -HS25_09827:2:1310:14138:50339#49 -HS25_09827:2:2301:14468:79810#49 -HS25_09827:2:1305:10140:86325#49 -HS25_09827:2:1211:12643:10033#49 -HS25_09827:2:1210:20908:63098#49 -HS25_09827:2:2204:8915:68371#49 -HS25_09827:2:2104:19240:43505#49 -HS25_09827:2:2104:11907:27534#49 -HS25_09827:2:2205:9081:2496#49 -HS25_09827:2:2102:18827:43055#49 -HS25_09827:2:2315:7734:33001#49 -HS25_09827:2:2205:9081:2496#49 -HS25_09827:2:2306:12066:84349#49 -HS25_09827:2:1209:13054:3360#49 -HS25_09827:2:2106:6387:2480#49 -HS25_09827:2:2315:7734:33001#49 -HS25_09827:2:1209:13054:3360#49 -HS25_09827:2:2106:6387:2480#49 -HS25_09827:2:2307:5704:80107#49 -HS25_09827:2:1211:17330:49667#49 -HS25_09827:2:1314:3228:7667#49 -HS25_09827:2:1208:17955:15896#49 -HS25_09827:2:1309:17289:11672#49 -HS25_09827:2:1303:12707:72149#49 -HS25_09827:2:1310:3860:57927#49 -HS25_09827:2:2307:5704:80107#49 -HS25_09827:2:2210:16937:42130#49 -HS25_09827:2:1208:17955:15896#49 -HS25_09827:2:1314:3228:7667#49 -HS25_09827:2:1211:17330:49667#49 -HS25_09827:2:1309:17289:11672#49 -HS25_09827:2:1303:12707:72149#49 -HS25_09827:2:1310:3860:57927#49 -HS25_09827:2:2305:6716:85022#49 -HS25_09827:2:2206:7323:31961#49 -HS25_09827:2:2210:16937:42130#49 -HS25_09827:2:2115:10546:65950#49 -HS25_09827:2:1213:16054:79587#49 -HS25_09827:2:1212:10093:90040#49 -HS25_09827:2:2210:12605:60360#49 -HS25_09827:2:2206:7323:31961#49 -HS25_09827:2:2309:6915:76693#49 -HS25_09827:2:2308:14841:4066#49 -HS25_09827:2:1213:16054:79587#49 -HS25_09827:2:2115:10546:65950#49 -HS25_09827:2:2113:2147:36615#49 -HS25_09827:2:1303:15888:60814#49 -HS25_09827:2:2204:16725:12627#49 -HS25_09827:2:2303:3977:34377#49 -HS25_09827:2:2205:8322:78058#49 -HS25_09827:2:1212:10093:90040#49 -HS25_09827:2:2301:16854:78229#49 -HS25_09827:2:2216:14694:4479#49 -HS25_09827:2:1210:13829:26970#49 -HS25_09827:2:2101:2342:65041#49 -HS25_09827:2:1203:4218:26511#49 -HS25_09827:2:1303:2809:12191#49 -HS25_09827:2:2308:14841:4066#49 -HS25_09827:2:1210:6705:3820#49 -HS25_09827:2:1309:16514:32971#49 -HS25_09827:2:2204:8151:68202#49 -HS25_09827:2:2309:6915:76693#49 -HS25_09827:2:1303:15888:60814#49 -HS25_09827:2:2210:12605:60360#49 -HS25_09827:2:1201:13624:17251#49 -HS25_09827:2:2312:4191:15549#49 -HS25_09827:2:2204:16725:12627#49 -HS25_09827:2:2113:2147:36615#49 -HS25_09827:2:2101:12568:25345#49 -HS25_09827:2:2313:19819:33406#49 -HS25_09827:2:2303:3977:34377#49 -HS25_09827:2:2205:8322:78058#49 -HS25_09827:2:2301:16854:78229#49 -HS25_09827:2:1213:7913:41005#49 -HS25_09827:2:2101:12568:25345#49 -HS25_09827:2:1315:16523:73973#49 -HS25_09827:2:2305:16556:35023#49 -HS25_09827:2:1210:13829:26970#49 -HS25_09827:2:2204:10739:48613#49 -HS25_09827:2:1210:6705:3820#49 -HS25_09827:2:2113:4557:85104#49 -HS25_09827:2:1309:16514:32971#49 -HS25_09827:2:2204:8151:68202#49 -HS25_09827:2:2101:2342:65041#49 -HS25_09827:2:1303:2809:12191#49 -HS25_09827:2:1315:16523:73973#49 -HS25_09827:2:1203:4218:26511#49 -HS25_09827:2:1206:20941:52384#49 -HS25_09827:2:2312:4191:15549#49 -HS25_09827:2:2114:19633:92865#49 -HS25_09827:2:1201:13624:17251#49 -HS25_09827:2:2313:19819:33406#49 -HS25_09827:2:1213:7913:41005#49 -HS25_09827:2:2110:11379:95588#49 -HS25_09827:2:2305:16556:35023#49 -HS25_09827:2:2311:3903:9948#49 -HS25_09827:2:2113:4557:85104#49 -HS25_09827:2:2301:5860:60639#49 -HS25_09827:2:1206:20941:52384#49 -HS25_09827:2:2114:19633:92865#49 -HS25_09827:2:2311:3903:9948#49 -HS25_09827:2:2110:11379:95588#49 -HS25_09827:2:2115:17483:39635#49 -HS25_09827:2:1215:17766:40063#49 -HS25_09827:2:2205:20244:26458#49 -HS25_09827:2:2115:17483:39635#49 -HS25_09827:2:1215:17766:40063#49 -HS25_09827:2:2205:20244:26458#49 -HS25_09827:2:1302:19696:73143#49 -HS25_09827:2:2204:18371:24442#49 -HS25_09827:2:1213:19575:11729#49 -HS25_09827:2:1205:16847:44424#49 -HS25_09827:2:1302:19696:73143#49 -HS25_09827:2:2314:8359:77266#49 -HS25_09827:2:2204:18371:24442#49 -HS25_09827:2:2103:15077:33719#49 -HS25_09827:2:1205:16847:44424#49 -HS25_09827:2:1213:19575:11729#49 -HS25_09827:2:1309:20635:74829#49 -HS25_09827:2:2208:16407:2775#49 -HS25_09827:2:2314:8359:77266#49 -HS25_09827:2:1305:10803:57134#49 -HS25_09827:2:1309:20635:74829#49 -HS25_09827:2:1213:15139:13225#49 -HS25_09827:2:1305:10803:57134#49 -HS25_09827:2:2304:20732:56457#49 -HS25_09827:2:1309:11997:41555#49 -HS25_09827:2:1209:4710:29286#49 -HS25_09827:2:1314:5056:52117#49 -HS25_09827:2:1213:15139:13225#49 -HS25_09827:2:2109:2499:67403#49 -HS25_09827:2:2106:14113:30585#49 -HS25_09827:2:2109:2499:67403#49 -HS25_09827:2:1209:4710:29286#49 -HS25_09827:2:1309:11997:41555#49 -HS25_09827:2:2304:20732:56457#49 -HS25_09827:2:1314:5056:52117#49 -HS25_09827:2:1201:8909:14160#49 -HS25_09827:2:2104:10615:13523#49 -HS25_09827:2:2115:6478:62570#49 -HS25_09827:2:2211:14650:49140#49 -HS25_09827:2:1201:8909:14160#49 -HS25_09827:2:2104:10615:13523#49 -HS25_09827:2:2201:15103:97933#49 -HS25_09827:2:2211:14650:49140#49 -HS25_09827:2:1215:13420:49989#49 -HS25_09827:2:2104:7990:54107#49 -HS25_09827:2:2115:7536:61615#49 -HS25_09827:2:1308:3316:89787#49 -HS25_09827:2:1306:2886:78642#49 -HS25_09827:2:1305:13792:33859#49 -HS25_09827:2:2213:9676:30482#49 -HS25_09827:2:2307:3338:50144#49 -HS25_09827:2:1215:13420:49989#49 -HS25_09827:2:1314:17022:18860#49 -HS25_09827:2:1216:7335:87206#49 -HS25_09827:2:1306:2886:78642#49 -HS25_09827:2:2115:7536:61615#49 -HS25_09827:2:2307:3338:50144#49 -HS25_09827:2:2213:11756:97895#49 -HS25_09827:2:1203:7815:52161#49 -HS25_09827:2:1309:3459:92693#49 -HS25_09827:2:2213:11756:97895#49 -HS25_09827:2:1216:7335:87206#49 -HS25_09827:2:1314:17022:18860#49 -HS25_09827:2:1203:7815:52161#49 -HS25_09827:2:1309:3459:92693#49 -HS25_09827:2:2204:5587:25070#49 -HS25_09827:2:2212:19166:26294#49 -HS25_09827:2:2106:6786:26039#49 -HS25_09827:2:1314:5559:46368#49 -HS25_09827:2:2104:4677:17868#49 -HS25_09827:2:1205:4512:64092#49 -HS25_09827:2:2204:5587:25070#49 -HS25_09827:2:2201:8195:81306#49 -HS25_09827:2:1214:2226:73587#49 -HS25_09827:2:2106:16974:36947#49 -HS25_09827:2:1214:2226:73587#49 -HS25_09827:2:2212:19166:26294#49 -HS25_09827:2:2214:15330:25571#49 -HS25_09827:2:2304:7017:29832#49 -HS25_09827:2:1314:5559:46368#49 -HS25_09827:2:1313:3486:91752#49 -HS25_09827:2:2310:19758:77443#49 -HS25_09827:2:2106:6786:26039#49 -HS25_09827:2:1205:4512:64092#49 -HS25_09827:2:2201:8195:81306#49 -HS25_09827:2:2301:11770:24562#49 -HS25_09827:2:2104:4677:17868#49 -HS25_09827:2:2106:16974:36947#49 -HS25_09827:2:1211:20137:61302#49 -HS25_09827:2:2214:15330:25571#49 -HS25_09827:2:1307:9545:75384#49 -HS25_09827:2:2310:19758:77443#49 -HS25_09827:2:2304:7017:29832#49 -HS25_09827:2:1313:3486:91752#49 -HS25_09827:2:2105:7239:83002#49 -HS25_09827:2:2213:15545:79306#49 -HS25_09827:2:2301:11770:24562#49 -HS25_09827:2:1314:6112:69448#49 -HS25_09827:2:1208:8727:22371#49 -HS25_09827:2:2206:17318:2709#49 -HS25_09827:2:1211:20137:61302#49 -HS25_09827:2:1208:8727:22371#49 -HS25_09827:2:1307:9545:75384#49 -HS25_09827:2:2106:7650:33278#49 -HS25_09827:2:2309:16798:5562#49 -HS25_09827:2:1209:6654:25445#49 -HS25_09827:2:2213:6632:39463#49 -HS25_09827:2:1314:6112:69448#49 -HS25_09827:2:2213:15545:79306#49 -HS25_09827:2:2105:7239:83002#49 -HS25_09827:2:1213:11302:89002#49 -HS25_09827:2:2206:17318:2709#49 -HS25_09827:2:2114:21318:12979#49 -HS25_09827:2:2203:14800:25215#49 -HS25_09827:2:2115:7907:56958#49 -HS25_09827:2:1305:3312:67479#49 -HS25_09827:2:2106:7650:33278#49 -HS25_09827:2:2209:1480:31833#49 -HS25_09827:2:1315:21291:84059#49 -HS25_09827:2:2309:16798:5562#49 -HS25_09827:2:2213:6632:39463#49 -HS25_09827:2:1209:6654:25445#49 -HS25_09827:2:1210:13018:27047#49 -HS25_09827:2:2114:21318:12979#49 -HS25_09827:2:2203:14800:25215#49 -HS25_09827:2:1206:21305:5793#49 -HS25_09827:2:1213:11302:89002#49 -HS25_09827:2:2313:16261:45879#49 -HS25_09827:2:1305:3312:67479#49 -HS25_09827:2:2209:1480:31833#49 -HS25_09827:2:2115:7907:56958#49 -HS25_09827:2:1215:7868:62062#49 -HS25_09827:2:1315:21291:84059#49 -HS25_09827:2:1301:20439:67809#49 -HS25_09827:2:1206:21305:5793#49 -HS25_09827:2:1210:13018:27047#49 -HS25_09827:2:2313:16261:45879#49 -HS25_09827:2:1209:12442:64767#49 -HS25_09827:2:1313:8091:96034#49 -HS25_09827:2:2205:4298:12428#49 -HS25_09827:2:1301:20439:67809#49 -HS25_09827:2:1209:12442:64767#49 -HS25_09827:2:2209:4461:40815#49 -HS25_09827:2:2116:16361:41377#49 -HS25_09827:2:1313:8091:96034#49 -HS25_09827:2:1312:3346:86415#49 -HS25_09827:2:2114:13791:70630#49 -HS25_09827:2:2109:4728:79802#49 -HS25_09827:2:1307:17245:84668#49 -HS25_09827:2:2205:4298:12428#49 -HS25_09827:2:1309:11383:27300#49 -HS25_09827:2:1206:15832:79285#49 -HS25_09827:2:2209:4461:40815#49 -HS25_09827:2:2116:16361:41377#49 -HS25_09827:2:1309:11383:27300#49 -HS25_09827:2:1307:17245:84668#49 -HS25_09827:2:2309:4394:79570#49 -HS25_09827:2:1206:15832:79285#49 -HS25_09827:2:1203:11994:7785#49 -HS25_09827:2:2215:10153:33095#49 -HS25_09827:2:2309:4394:79570#49 -HS25_09827:2:1302:21009:63471#49 -HS25_09827:2:2310:16661:88087#49 -HS25_09827:2:1203:11994:7785#49 -HS25_09827:2:2212:8246:59065#49 -HS25_09827:2:1306:8624:10962#49 -HS25_09827:2:1315:7035:40186#49 -HS25_09827:2:1314:11015:44212#49 -HS25_09827:2:2201:2180:6886#49 -HS25_09827:2:2215:10153:33095#49 -HS25_09827:2:1205:7990:75237#49 -HS25_09827:2:1313:20446:3145#49 -HS25_09827:2:1302:21009:63471#49 -HS25_09827:2:2310:16661:88087#49 -HS25_09827:2:2303:4214:49761#49 -HS25_09827:2:1306:8624:10962#49 -HS25_09827:2:2212:8246:59065#49 -HS25_09827:2:1305:4929:43183#49 -HS25_09827:2:2313:13185:85750#49 -HS25_09827:2:2209:14477:95232#49 -HS25_09827:2:1315:7035:40186#49 -HS25_09827:2:2301:11036:83091#49 -HS25_09827:2:2201:2180:6886#49 -HS25_09827:2:1314:11015:44212#49 -HS25_09827:2:1205:12651:88456#49 -HS25_09827:2:2110:14788:80589#49 -HS25_09827:2:1313:20446:3145#49 -HS25_09827:2:1205:7990:75237#49 -HS25_09827:2:2303:4214:49761#49 -HS25_09827:2:1204:19257:24710#49 -HS25_09827:2:1305:4929:43183#49 -HS25_09827:2:2209:14477:95232#49 -HS25_09827:2:2307:17798:87127#49 -HS25_09827:2:2313:13185:85750#49 -HS25_09827:2:2301:11036:83091#49 -HS25_09827:2:1205:12651:88456#49 -HS25_09827:2:1206:17250:10755#49 -HS25_09827:2:2110:14788:80589#49 -HS25_09827:2:2209:9869:18812#49 -HS25_09827:2:1204:19257:24710#49 -HS25_09827:2:2201:20341:65319#49 -HS25_09827:2:2307:17798:87127#49 -HS25_09827:2:1206:17250:10755#49 -HS25_09827:2:2209:9869:18812#49 -HS25_09827:2:2114:2267:7901#49 -HS25_09827:2:1211:5541:84312#49 -HS25_09827:2:2201:20341:65319#49 -HS25_09827:2:1316:4210:8170#49 -HS25_09827:2:2209:11572:91701#49 -HS25_09827:2:2105:14015:78703#49 -HS25_09827:2:1211:5541:84312#49 -HS25_09827:2:1210:8178:8978#49 -HS25_09827:2:2106:6380:6425#49 -HS25_09827:2:1316:4210:8170#49 -HS25_09827:2:2209:11572:91701#49 -HS25_09827:2:2101:12020:82471#49 -HS25_09827:2:2202:5612:71317#49 -HS25_09827:2:2105:7697:81048#49 -HS25_09827:2:1312:17799:78270#49 -HS25_09827:2:2105:14015:78703#49 -HS25_09827:2:2101:3887:14393#49 -HS25_09827:2:2309:8179:42977#49 -HS25_09827:2:1312:17799:78270#49 -HS25_09827:2:2106:6380:6425#49 -HS25_09827:2:2208:18201:81908#49 -HS25_09827:2:2101:12020:82471#49 -HS25_09827:2:2105:7697:81048#49 -HS25_09827:2:1314:6451:5712#49 -HS25_09827:2:2204:16849:75418#49 -HS25_09827:2:2205:16947:72173#49 -HS25_09827:2:2116:16623:88282#49 -HS25_09827:2:2202:5612:71317#49 -HS25_09827:2:2208:18201:81908#49 -HS25_09827:2:2101:3887:14393#49 -HS25_09827:2:2304:8009:57301#49 -HS25_09827:2:2309:8179:42977#49 -HS25_09827:2:2103:20770:95412#49 -HS25_09827:2:2204:16849:75418#49 -HS25_09827:2:2205:16947:72173#49 -HS25_09827:2:1302:16918:77713#49 -HS25_09827:2:1314:6451:5712#49 -HS25_09827:2:2116:16623:88282#49 -HS25_09827:2:2112:6672:15226#49 -HS25_09827:2:1203:9918:5248#49 -HS25_09827:2:1306:5808:96239#49 -HS25_09827:2:2304:8009:57301#49 -HS25_09827:2:1208:6443:56127#49 -HS25_09827:2:2103:20770:95412#49 -HS25_09827:2:1212:12700:82117#49 -HS25_09827:2:1302:16918:77713#49 -HS25_09827:2:2305:8653:24821#49 -HS25_09827:2:2211:3988:78834#49 -HS25_09827:2:2112:6672:15226#49 -HS25_09827:2:1203:9918:5248#49 -HS25_09827:2:2308:6740:40827#49 -HS25_09827:2:1306:5808:96239#49 -HS25_09827:2:1208:6443:56127#49 -HS25_09827:2:2316:14045:30873#49 -HS25_09827:2:1210:2428:17885#49 -HS25_09827:2:1204:18797:67106#49 -HS25_09827:2:1301:17026:23795#49 -HS25_09827:2:2107:18069:84965#49 -HS25_09827:2:1316:11343:70937#49 -HS25_09827:2:2310:7888:6696#49 -HS25_09827:2:2210:5085:88674#49 -HS25_09827:2:1302:3823:19416#49 -HS25_09827:2:2211:3988:78834#49 -HS25_09827:2:2113:8737:80628#49 -HS25_09827:2:2305:8653:24821#49 -HS25_09827:2:1212:12700:82117#49 -HS25_09827:2:2308:6740:40827#49 -HS25_09827:2:2314:9626:68480#49 -HS25_09827:2:2313:7708:12406#49 -HS25_09827:2:1210:2428:17885#49 -HS25_09827:2:2208:7332:36500#49 -HS25_09827:2:1204:18797:67106#49 -HS25_09827:2:2316:14045:30873#49 -HS25_09827:2:2107:15316:34637#49 -HS25_09827:2:1301:17026:23795#49 -HS25_09827:2:1207:2389:32380#49 -HS25_09827:2:1313:9376:68098#49 -HS25_09827:2:2203:5056:63270#49 -HS25_09827:2:2310:7888:6696#49 -HS25_09827:2:2113:8737:80628#49 -HS25_09827:2:1302:3823:19416#49 -HS25_09827:2:2105:6005:10317#49 -HS25_09827:2:2314:9626:68480#49 -HS25_09827:2:2106:11622:85483#49 -HS25_09827:2:2308:19039:15364#49 -HS25_09827:2:2208:7332:36500#49 -HS25_09827:2:1209:8926:93369#49 -HS25_09827:2:1311:17806:99873#49 -HS25_09827:2:2114:7698:50830#49 -HS25_09827:2:2107:15316:34637#49 -HS25_09827:2:2304:5310:69017#49 -HS25_09827:2:2303:18882:13001#49 -HS25_09827:2:1201:3236:100584#49 -HS25_09827:2:1313:9376:68098#49 -HS25_09827:2:1207:2389:32380#49 -HS25_09827:2:2203:5056:63270#49 -HS25_09827:2:1311:15633:5273#49 -HS25_09827:2:2105:6005:10317#49 -HS25_09827:2:2308:19039:15364#49 -HS25_09827:2:1311:15633:5273#49 -HS25_09827:2:2114:7698:50830#49 -HS25_09827:2:2106:11622:85483#49 -HS25_09827:2:1311:17806:99873#49 -HS25_09827:2:1209:8926:93369#49 -HS25_09827:2:2104:11093:58529#49 -HS25_09827:2:2304:5310:69017#49 -HS25_09827:2:2211:14731:32241#49 -HS25_09827:2:1201:3236:100584#49 -HS25_09827:2:2313:16199:9632#49 -HS25_09827:2:2303:18882:13001#49 -HS25_09827:2:2105:10190:92973#49 -HS25_09827:2:2215:5889:100196#49 -HS25_09827:2:1309:4584:38870#49 -HS25_09827:2:1316:4338:25533#49 -HS25_09827:2:2105:10190:92973#49 -HS25_09827:2:2209:16855:86378#49 -HS25_09827:2:2215:5889:100196#49 -HS25_09827:2:1309:4584:38870#49 -HS25_09827:2:2209:16855:86378#49 -HS25_09827:2:1206:12898:32321#49 -HS25_09827:2:2306:8829:60424#49 -HS25_09827:2:2112:7407:61239#49 -HS25_09827:2:1210:13249:39244#49 -HS25_09827:2:1205:2855:23114#49 -HS25_09827:2:2112:7407:61239#49 -HS25_09827:2:1206:12898:32321#49 -HS25_09827:2:2306:8829:60424#49 -HS25_09827:2:1301:14371:68255#49 -HS25_09827:2:1210:13249:39244#49 -HS25_09827:2:2115:20970:88656#49 -HS25_09827:2:2106:20801:66713#49 -HS25_09827:2:2207:5349:15730#49 -HS25_09827:2:2103:7969:15850#49 -HS25_09827:2:1308:16440:52592#49 -HS25_09827:2:2108:11291:49727#49 -HS25_09827:2:2115:20970:88656#49 -HS25_09827:2:1301:14371:68255#49 -HS25_09827:2:2106:20801:66713#49 -HS25_09827:2:2102:10580:82123#49 -HS25_09827:2:2207:5349:15730#49 -HS25_09827:2:2108:11291:49727#49 -HS25_09827:2:2103:7969:15850#49 -HS25_09827:2:1308:16440:52592#49 -HS25_09827:2:2102:10580:82123#49 -HS25_09827:2:2208:8423:98594#49 -HS25_09827:2:1211:11612:11428#49 -HS25_09827:2:2308:11186:16045#49 -HS25_09827:2:1311:20518:50516#49 -HS25_09827:2:2311:20238:41808#49 -HS25_09827:2:2215:17378:66148#49 -HS25_09827:2:2208:8423:98594#49 -HS25_09827:2:1211:11612:11428#49 -HS25_09827:2:2204:12033:5041#49 -HS25_09827:2:2204:12033:5041#49 -HS25_09827:2:2308:11186:16045#49 -HS25_09827:2:2302:8506:6146#49 -HS25_09827:2:2213:16513:94197#49 -HS25_09827:2:2215:17378:66148#49 -HS25_09827:2:1311:20518:50516#49 -HS25_09827:2:2216:13693:29366#49 -HS25_09827:2:2307:7422:56694#49 -HS25_09827:2:2205:17535:57646#49 -HS25_09827:2:1210:19673:64208#49 -HS25_09827:2:2302:8506:6146#49 -HS25_09827:2:2213:16513:94197#49 -HS25_09827:2:1310:4290:100666#49 -HS25_09827:2:2301:15321:68779#49 -HS25_09827:2:1210:19673:64208#49 -HS25_09827:2:2315:19977:77514#49 -HS25_09827:2:2106:2431:8391#49 -HS25_09827:2:2216:13693:29366#49 -HS25_09827:2:2307:7422:56694#49 -HS25_09827:2:2205:17535:57646#49 -HS25_09827:2:1310:8088:87508#49 -HS25_09827:2:2109:6504:27530#49 -HS25_09827:2:2109:13027:34285#49 -HS25_09827:2:1213:19977:65160#49 -HS25_09827:2:2108:16495:6596#49 -HS25_09827:2:1310:4290:100666#49 -HS25_09827:2:2301:15321:68779#49 -HS25_09827:2:2111:18573:99069#49 -HS25_09827:2:1204:16490:97598#49 -HS25_09827:2:2114:1546:45882#49 -HS25_09827:2:2106:2431:8391#49 -HS25_09827:2:2207:10805:29251#49 -HS25_09827:2:1310:8088:87508#49 -HS25_09827:2:2109:13027:34285#49 -HS25_09827:2:2109:6504:27530#49 -HS25_09827:2:1204:16490:97598#49 -HS25_09827:2:1213:19977:65160#49 -HS25_09827:2:2108:16495:6596#49 -HS25_09827:2:2111:18573:99069#49 -HS25_09827:2:1306:5652:18688#49 -HS25_09827:2:2114:1546:45882#49 -HS25_09827:2:1206:2097:69044#49 -HS25_09827:2:2207:10805:29251#49 -HS25_09827:2:2307:14034:89270#49 -HS25_09827:2:2312:20038:98868#49 -HS25_09827:2:2312:20038:98868#49 -HS25_09827:2:2101:16364:15831#49 -HS25_09827:2:1206:2097:69044#49 -HS25_09827:2:1311:17140:80083#49 -HS25_09827:2:1314:3373:80867#49 -HS25_09827:2:1306:5652:18688#49 -HS25_09827:2:1308:7272:28984#49 -HS25_09827:2:1205:19825:33358#49 -HS25_09827:2:2301:14658:92273#49 -HS25_09827:2:2212:14943:54243#49 -HS25_09827:2:2103:7493:77982#49 -HS25_09827:2:2101:16364:15831#49 -HS25_09827:2:1311:3390:97398#49 -HS25_09827:2:1314:3373:80867#49 -HS25_09827:2:1311:17140:80083#49 -HS25_09827:2:2213:18537:94331#49 -HS25_09827:2:2314:12584:16760#49 -HS25_09827:2:2212:14943:54243#49 -HS25_09827:2:1308:7272:28984#49 -HS25_09827:2:2114:5251:61228#49 -HS25_09827:2:1205:19825:33358#49 -HS25_09827:2:2113:15317:29114#49 -HS25_09827:2:2103:7493:77982#49 -HS25_09827:2:1311:3390:97398#49 -HS25_09827:2:2106:20915:96061#49 -HS25_09827:2:1315:9123:25371#49 -HS25_09827:2:2214:9661:78764#49 -HS25_09827:2:2312:17776:59159#49 -HS25_09827:2:2213:18537:94331#49 -HS25_09827:2:2114:5251:61228#49 -HS25_09827:2:2314:12584:16760#49 -HS25_09827:2:2113:15317:29114#49 -HS25_09827:2:2214:8461:69698#49 -HS25_09827:2:2302:10105:31096#49 -HS25_09827:2:2214:9661:78764#49 -HS25_09827:2:1315:9123:25371#49 -HS25_09827:2:2106:20915:96061#49 -HS25_09827:2:2312:17776:59159#49 -HS25_09827:2:1302:9192:97489#49 -HS25_09827:2:2214:8461:69698#49 -HS25_09827:2:1214:17469:22254#49 -HS25_09827:2:1202:7075:29749#49 -HS25_09827:2:2103:7471:51098#49 -HS25_09827:2:2302:10105:31096#49 -HS25_09827:2:2116:6587:6102#49 -HS25_09827:2:1205:4638:44107#49 -HS25_09827:2:1203:13078:91845#49 -HS25_09827:2:2101:11742:97546#49 -HS25_09827:2:1202:7075:29749#49 -HS25_09827:2:1302:9192:97489#49 -HS25_09827:2:2116:6587:6102#49 -HS25_09827:2:1214:17469:22254#49 -HS25_09827:2:2103:7471:51098#49 -HS25_09827:2:1205:4638:44107#49 -HS25_09827:2:1304:11617:5013#49 -HS25_09827:2:1312:9981:81175#49 -HS25_09827:2:1203:13078:91845#49 -HS25_09827:2:2112:1653:93732#49 -HS25_09827:2:2311:12335:4457#49 -HS25_09827:2:2101:11742:97546#49 -HS25_09827:2:2311:12335:4457#49 -HS25_09827:2:1312:9981:81175#49 -HS25_09827:2:2301:13593:38842#49 -HS25_09827:2:2112:1653:93732#49 -HS25_09827:2:1206:16181:40140#49 -HS25_09827:2:2213:12218:11317#49 -HS25_09827:2:1204:18296:83970#49 -HS25_09827:2:2301:13593:38842#49 -HS25_09827:2:1204:8096:53881#49 -HS25_09827:2:2305:5717:60080#49 -HS25_09827:2:1206:16181:40140#49 -HS25_09827:2:2213:12218:11317#49 -HS25_09827:2:2209:13044:37709#49 -HS25_09827:2:1204:18296:83970#49 -HS25_09827:2:2303:5286:85765#49 -HS25_09827:2:1204:8096:53881#49 -HS25_09827:2:2305:5717:60080#49 -HS25_09827:2:2209:13044:37709#49 -HS25_09827:2:2113:6442:3122#49 -HS25_09827:2:2101:15947:86891#49 -HS25_09827:2:2301:11411:14573#49 -HS25_09827:2:2315:19733:35800#49 -HS25_09827:2:2201:7082:19167#49 -HS25_09827:2:2109:2450:54257#49 -HS25_09827:2:2201:7082:19167#49 -HS25_09827:2:2301:11411:14573#49 -HS25_09827:2:2315:19733:35800#49 -HS25_09827:2:2212:10634:47241#49 -HS25_09827:2:2101:15947:86891#49 -HS25_09827:2:2216:5792:29166#49 -HS25_09827:2:2303:11846:87193#49 -HS25_09827:2:1304:5440:11335#49 -HS25_09827:2:1304:5440:11335#49 -HS25_09827:2:2109:2450:54257#49 -HS25_09827:2:2214:10009:21814#49 -HS25_09827:2:1306:19624:74584#49 -HS25_09827:2:2216:16283:88747#49 -HS25_09827:2:2212:10634:47241#49 -HS25_09827:2:1312:9310:70162#49 -HS25_09827:2:2303:11846:87193#49 -HS25_09827:2:1211:17208:86550#49 -HS25_09827:2:2216:5792:29166#49 -HS25_09827:2:1208:18293:14781#49 -HS25_09827:2:2310:7193:56304#49 -HS25_09827:2:1313:4004:54225#49 -HS25_09827:2:2214:10009:21814#49 -HS25_09827:2:1306:19624:74584#49 -HS25_09827:2:2310:7193:56304#49 -HS25_09827:2:1208:18293:14781#49 -HS25_09827:2:2216:16283:88747#49 -HS25_09827:2:1312:9310:70162#49 -HS25_09827:2:1313:4004:54225#49 -HS25_09827:2:2213:17996:63946#49 -HS25_09827:2:2204:2277:67988#49 -HS25_09827:2:1211:17208:86550#49 -HS25_09827:2:1305:13783:94707#49 -HS25_09827:2:1308:11643:96333#49 -HS25_09827:2:1302:4919:35994#49 -HS25_09827:2:2311:1929:40187#49 -HS25_09827:2:2105:15117:62592#49 -HS25_09827:2:2213:17996:63946#49 -HS25_09827:2:2204:2277:67988#49 -HS25_09827:2:1213:15078:67378#49 -HS25_09827:2:2311:1929:40187#49 -HS25_09827:2:1305:13783:94707#49 -HS25_09827:2:1308:11643:96333#49 -HS25_09827:2:2310:6098:16158#49 -HS25_09827:2:1306:20627:98985#49 -HS25_09827:2:1302:4919:35994#49 -HS25_09827:2:2310:6098:16158#49 -HS25_09827:2:2105:15117:62592#49 -HS25_09827:2:1213:15078:67378#49 -HS25_09827:2:2303:17745:60651#49 -HS25_09827:2:1310:6428:9780#49 -HS25_09827:2:2102:18230:89095#49 -HS25_09827:2:1306:20627:98985#49 -HS25_09827:2:2303:17745:60651#49 -HS25_09827:2:1209:6341:23365#49 -HS25_09827:2:2102:18230:89095#49 -HS25_09827:2:1310:6428:9780#49 -HS25_09827:2:1312:18749:67350#49 -HS25_09827:2:2308:16843:91501#49 -HS25_09827:2:2208:13877:48644#49 -HS25_09827:2:2105:11579:73656#49 -HS25_09827:2:2316:16204:91534#49 -HS25_09827:2:1209:6341:23365#49 -HS25_09827:2:1312:18749:67350#49 -HS25_09827:2:1310:11618:35611#49 -HS25_09827:2:2308:16843:91501#49 -HS25_09827:2:2208:13877:48644#49 -HS25_09827:2:2307:15141:35690#49 -HS25_09827:2:2113:17544:35773#49 -HS25_09827:2:2115:10620:49851#49 -HS25_09827:2:2105:11579:73656#49 -HS25_09827:2:1303:10545:71510#49 -HS25_09827:2:2110:7626:28983#49 -HS25_09827:2:1313:10849:70919#49 -HS25_09827:2:1313:8193:64166#49 -HS25_09827:2:2316:16204:91534#49 -HS25_09827:2:2306:1922:15489#49 -HS25_09827:2:1213:20557:88077#49 -HS25_09827:2:1207:2002:3615#49 -HS25_09827:2:1310:11618:35611#49 -HS25_09827:2:2301:18414:12262#49 -HS25_09827:2:1208:2692:55723#49 -HS25_09827:2:2215:7188:72942#49 -HS25_09827:2:2305:18257:46394#49 -HS25_09827:2:2108:2637:13493#49 -HS25_09827:2:2115:10620:49851#49 -HS25_09827:2:2113:17544:35773#49 -HS25_09827:2:2110:7626:28983#49 -HS25_09827:2:1313:10849:70919#49 -HS25_09827:2:2103:14340:48787#49 -HS25_09827:2:2116:19798:72665#49 -HS25_09827:2:2301:19917:10266#49 -HS25_09827:2:2307:15141:35690#49 -HS25_09827:2:1303:10545:71510#49 -HS25_09827:2:2215:9931:93823#49 -HS25_09827:2:1312:21281:9988#49 -HS25_09827:2:2309:11733:55774#49 -HS25_09827:2:1313:8193:64166#49 -HS25_09827:2:2306:1922:15489#49 -HS25_09827:2:2103:3811:24661#49 -HS25_09827:2:2213:7024:28184#49 -HS25_09827:2:2203:8550:77823#49 -HS25_09827:2:1213:20557:88077#49 -HS25_09827:2:1208:2692:55723#49 -HS25_09827:2:2301:18414:12262#49 -HS25_09827:2:2215:7188:72942#49 -HS25_09827:2:1207:2002:3615#49 -HS25_09827:2:1204:6858:64981#49 -HS25_09827:2:1216:5773:50381#49 -HS25_09827:2:2105:3960:6282#49 -HS25_09827:2:2108:2637:13493#49 diff --git a/src/htslib-1.18/htscodecs/tests/names/08.names b/src/htslib-1.18/htscodecs/tests/names/08.names deleted file mode 100644 index b2db985..0000000 --- a/src/htslib-1.18/htscodecs/tests/names/08.names +++ /dev/null @@ -1,1000 +0,0 @@ -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/57/0_572 RQ=0.828 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/62/0_2109 RQ=0.851 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/71/3203_11046 RQ=0.856 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/83/0_5212 RQ=0.828 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/85/519_20183 RQ=0.796 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/91/7805_11090 RQ=0.821 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/105/0_12388 RQ=0.793 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/110/8889_13504 RQ=0.854 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/113/12294_27468 RQ=0.834 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/134/6040_6919 RQ=0.856 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/134/6963_13548 RQ=0.856 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/134/13594_17210 RQ=0.856 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/137/0_5891 RQ=0.761 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/143/0_9335 RQ=0.819 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/144/0_4787 RQ=0.846 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/147/0_13664 RQ=0.838 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/147/13705_17626 RQ=0.838 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/148/0_15655 RQ=0.839 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/150/0_1007 RQ=0.804 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/154/2767_23604 RQ=0.821 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/154/23638_26791 RQ=0.821 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/165/0_1655 RQ=0.841 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/165/1695_3215 RQ=0.841 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/168/0_4829 RQ=0.824 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/169/0_7781 RQ=0.845 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/181/0_5499 RQ=0.846 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/186/1151_2169 RQ=0.822 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/186/2216_3397 RQ=0.822 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/189/5186_8775 RQ=0.818 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/194/0_1994 RQ=0.844 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/195/3371_4377 RQ=0.838 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/198/8171_22863 RQ=0.841 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/198/22903_28156 RQ=0.841 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/210/4127_6592 RQ=0.826 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/211/0_9463 RQ=0.840 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/214/490_1176 RQ=0.837 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/214/1215_2134 RQ=0.837 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/229/0_429 RQ=0.812 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/246/6746_6923 RQ=0.845 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/251/1649_3393 RQ=0.794 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/258/0_332 RQ=0.811 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/259/0_16308 RQ=0.823 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/259/16345_16490 RQ=0.823 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/261/0_10004 RQ=0.800 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/262/1835_12935 RQ=0.863 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/267/2319_9589 RQ=0.841 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/280/0_1681 RQ=0.810 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/287/0_3218 RQ=0.849 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/289/0_6970 RQ=0.830 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/290/2148_4523 RQ=0.835 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/291/0_9189 RQ=0.816 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/294/0_10381 RQ=0.863 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/295/0_8404 RQ=0.798 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/297/0_14245 RQ=0.843 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/298/6244_8221 RQ=0.818 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/298/8267_13102 RQ=0.818 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/309/0_2885 RQ=0.816 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/310/0_14040 RQ=0.800 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/314/3374_14861 RQ=0.846 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/314/14903_16917 RQ=0.846 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/320/0_1709 RQ=0.805 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/325/0_3263 RQ=0.812 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/329/374_1830 RQ=0.803 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/329/1870_2960 RQ=0.803 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/333/711_10496 RQ=0.841 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/346/0_519 RQ=0.820 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/348/0_3222 RQ=0.846 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/348/3266_5009 RQ=0.846 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/358/2172_3757 RQ=0.834 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/358/3798_5136 RQ=0.834 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/365/5178_5363 RQ=0.856 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/366/3273_15876 RQ=0.761 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/367/4828_5140 RQ=0.790 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/370/7232_16928 RQ=0.850 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/372/1313_11297 RQ=0.839 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/380/0_1071 RQ=0.845 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/381/0_434 RQ=0.794 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/382/20023_25199 RQ=0.810 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/382/25244_29236 RQ=0.810 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/385/5288_18851 RQ=0.850 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/392/0_14077 RQ=0.836 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/398/520_7471 RQ=0.846 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/401/1637_14121 RQ=0.801 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/407/6993_13037 RQ=0.850 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/412/0_5897 RQ=0.820 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/420/493_3270 RQ=0.783 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/421/0_3453 RQ=0.833 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/421/3499_4255 RQ=0.833 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/423/0_1311 RQ=0.866 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/424/6305_14295 RQ=0.778 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/435/0_2602 RQ=0.776 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/443/0_5528 RQ=0.851 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/444/0_771 RQ=0.838 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/457/0_8207 RQ=0.868 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/459/0_15678 RQ=0.851 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/459/15717_16716 RQ=0.851 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/461/0_4342 RQ=0.799 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/471/0_2447 RQ=0.803 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/472/5760_8642 RQ=0.834 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/483/0_18251 RQ=0.862 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/484/0_19769 RQ=0.840 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/484/19815_27779 RQ=0.840 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/489/0_240 RQ=0.828 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/491/5160_21417 RQ=0.846 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/496/0_9234 RQ=0.802 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/503/0_17577 RQ=0.827 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/509/6413_18673 RQ=0.839 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/511/0_13071 RQ=0.828 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/516/4689_14983 RQ=0.835 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/516/15027_19140 RQ=0.835 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/521/1470_4022 RQ=0.864 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/536/1731_10114 RQ=0.800 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/540/7511_10412 RQ=0.820 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/543/0_6254 RQ=0.771 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/546/0_2942 RQ=0.862 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/546/2984_3781 RQ=0.862 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/550/0_11758 RQ=0.850 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/551/0_20269 RQ=0.812 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/553/0_3895 RQ=0.792 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/557/0_7226 RQ=0.833 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/561/0_13531 RQ=0.832 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/567/3628_15347 RQ=0.819 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/572/6664_17033 RQ=0.830 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/572/17078_27325 RQ=0.830 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/578/0_14273 RQ=0.865 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/578/14316_17633 RQ=0.865 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/585/0_2137 RQ=0.866 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/585/2184_4226 RQ=0.866 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/590/0_14082 RQ=0.840 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/594/865_3002 RQ=0.839 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/598/4389_14854 RQ=0.825 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/598/14896_18875 RQ=0.825 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/599/0_2015 RQ=0.845 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/599/2059_4013 RQ=0.845 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/602/0_3700 RQ=0.774 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/609/0_2730 RQ=0.836 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/616/6405_15766 RQ=0.818 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/616/15809_26742 RQ=0.818 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/617/0_14718 RQ=0.841 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/624/0_6227 RQ=0.866 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/626/5650_14022 RQ=0.831 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/634/1057_6613 RQ=0.823 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/636/0_2796 RQ=0.850 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/637/370_11392 RQ=0.785 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/638/0_11242 RQ=0.863 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/639/0_1940 RQ=0.830 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/639/1982_3787 RQ=0.830 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/645/9597_9829 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/645/9874_10910 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/645/10952_13256 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/645/13305_14449 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/645/14490_15423 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/645/15462_16387 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/645/16431_17383 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/645/17421_18352 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/645/18393_19050 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/649/0_10394 RQ=0.788 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/650/2515_9901 RQ=0.823 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/652/6920_25602 RQ=0.854 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/655/0_20378 RQ=0.835 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/656/0_19721 RQ=0.850 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/658/0_424 RQ=0.816 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/661/7077_11242 RQ=0.826 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/662/0_1648 RQ=0.822 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/670/0_1439 RQ=0.857 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/670/1482_2823 RQ=0.857 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/672/212_20743 RQ=0.852 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/673/0_2044 RQ=0.825 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/674/0_15983 RQ=0.824 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/675/0_1108 RQ=0.852 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/684/0_12094 RQ=0.824 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/694/0_1840 RQ=0.856 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/711/0_6001 RQ=0.812 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/715/0_9842 RQ=0.856 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/726/0_865 RQ=0.832 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/726/903_1737 RQ=0.832 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/728/0_6846 RQ=0.823 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/731/0_3144 RQ=0.831 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/733/10077_10413 RQ=0.836 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/733/10456_13417 RQ=0.836 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/736/0_10559 RQ=0.865 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/737/9255_12154 RQ=0.828 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/739/2879_8559 RQ=0.819 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/739/8600_15139 RQ=0.819 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/740/0_3339 RQ=0.869 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/746/0_4270 RQ=0.847 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/747/0_7177 RQ=0.837 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/750/3999_6077 RQ=0.868 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/751/3188_3509 RQ=0.870 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/753/0_6075 RQ=0.844 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/755/1157_3538 RQ=0.829 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/763/0_10856 RQ=0.837 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/770/904_9914 RQ=0.824 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/771/3095_3425 RQ=0.832 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/771/3467_7769 RQ=0.832 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/774/8565_16465 RQ=0.863 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/775/0_6644 RQ=0.849 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/787/4087_10021 RQ=0.816 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/796/0_8337 RQ=0.833 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/797/6003_17376 RQ=0.812 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/797/17423_26340 RQ=0.812 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/802/0_19330 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/803/905_4877 RQ=0.814 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/806/0_3149 RQ=0.821 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/807/4208_10637 RQ=0.860 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/812/5868_7191 RQ=0.824 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/818/1092_6123 RQ=0.821 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/826/0_4891 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/831/0_3386 RQ=0.858 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/841/0_1637 RQ=0.800 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/843/1402_2189 RQ=0.843 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/847/232_8361 RQ=0.837 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/848/0_6101 RQ=0.831 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/849/0_4691 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/855/0_14488 RQ=0.843 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/855/14532_25809 RQ=0.843 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/858/8528_13219 RQ=0.764 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/866/0_4848 RQ=0.845 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/866/4897_10340 RQ=0.845 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/868/0_3959 RQ=0.841 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/872/0_1123 RQ=0.805 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/881/0_2315 RQ=0.840 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/889/0_5833 RQ=0.817 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/894/0_5881 RQ=0.842 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/896/0_11745 RQ=0.823 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/897/19050_24609 RQ=0.833 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/897/24649_26277 RQ=0.833 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/905/5589_21380 RQ=0.824 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/905/21425_23606 RQ=0.824 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/907/14395_14967 RQ=0.826 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/908/0_8731 RQ=0.848 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/911/0_13560 RQ=0.837 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/914/0_828 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/917/0_10562 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/919/235_3398 RQ=0.858 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/924/1123_9900 RQ=0.829 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/928/11521_18786 RQ=0.835 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/928/18840_25196 RQ=0.835 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/930/10615_24283 RQ=0.842 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/931/2848_4208 RQ=0.849 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/935/288_10567 RQ=0.838 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/935/10607_13038 RQ=0.838 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/937/0_4360 RQ=0.874 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/939/3667_7524 RQ=0.851 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/945/3539_6092 RQ=0.820 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/945/6141_6760 RQ=0.820 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/962/0_2931 RQ=0.807 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/964/0_3080 RQ=0.848 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/982/0_1827 RQ=0.834 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/982/1865_3355 RQ=0.834 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/989/0_1927 RQ=0.804 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/995/0_1244 RQ=0.826 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1001/0_24672 RQ=0.872 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1005/0_29417 RQ=0.864 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1005/29467_29979 RQ=0.864 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1007/0_846 RQ=0.838 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1007/891_1701 RQ=0.838 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1009/737_11972 RQ=0.818 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1010/0_18939 RQ=0.861 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1014/0_4755 RQ=0.827 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1018/0_1220 RQ=0.789 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1018/1262_9535 RQ=0.789 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1024/6378_12984 RQ=0.810 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1028/0_15033 RQ=0.827 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1032/0_4559 RQ=0.753 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1046/0_1822 RQ=0.837 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1047/0_6059 RQ=0.831 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1051/2236_16682 RQ=0.810 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1051/16725_18664 RQ=0.810 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1055/0_11345 RQ=0.834 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1055/11391_13510 RQ=0.834 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1055/13560_18247 RQ=0.834 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1057/3925_15968 RQ=0.869 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1061/0_6702 RQ=0.771 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1073/8817_13931 RQ=0.847 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1073/13974_19695 RQ=0.847 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1074/8176_13535 RQ=0.843 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1092/0_6607 RQ=0.763 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1094/9550_12283 RQ=0.800 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1096/10760_22156 RQ=0.821 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1099/3082_15553 RQ=0.847 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1100/9495_11253 RQ=0.824 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1101/15078_21663 RQ=0.840 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1103/0_10059 RQ=0.852 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1106/13198_22123 RQ=0.831 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1107/0_2277 RQ=0.838 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1110/0_1553 RQ=0.834 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1112/2998_13523 RQ=0.793 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1114/3150_10191 RQ=0.867 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1116/915_10305 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1118/4277_17479 RQ=0.838 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1119/0_5565 RQ=0.819 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1120/0_5274 RQ=0.854 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1121/0_5110 RQ=0.859 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1123/3521_4220 RQ=0.858 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1131/0_16274 RQ=0.865 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1131/16320_28332 RQ=0.865 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1137/0_12437 RQ=0.864 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1140/0_3407 RQ=0.824 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1140/3456_6266 RQ=0.824 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1144/0_7409 RQ=0.840 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1153/0_2588 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1153/2631_3708 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1157/0_15872 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1158/3758_8959 RQ=0.783 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1160/0_1250 RQ=0.800 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1164/0_1168 RQ=0.826 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1165/0_7480 RQ=0.813 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1168/0_1433 RQ=0.830 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1194/1116_2089 RQ=0.836 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1196/340_4852 RQ=0.777 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1201/3439_8149 RQ=0.819 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1205/6963_24728 RQ=0.815 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1206/0_5787 RQ=0.844 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1213/2555_12307 RQ=0.828 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1215/2934_3628 RQ=0.847 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1220/562_4190 RQ=0.806 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1226/0_2241 RQ=0.829 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1226/2286_3575 RQ=0.829 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1228/0_2892 RQ=0.832 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1228/2935_3937 RQ=0.832 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1228/3981_8011 RQ=0.832 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1233/3408_12216 RQ=0.809 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1236/337_4130 RQ=0.848 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1241/0_780 RQ=0.825 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1243/6551_7291 RQ=0.829 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1247/0_1607 RQ=0.860 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1249/0_1742 RQ=0.818 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1253/2166_16851 RQ=0.838 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1253/16895_19169 RQ=0.838 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1254/0_23938 RQ=0.872 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1255/2014_3193 RQ=0.859 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1256/0_2288 RQ=0.849 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1257/7703_13411 RQ=0.827 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1258/0_1555 RQ=0.846 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1262/0_2957 RQ=0.806 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1268/8038_17641 RQ=0.813 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1271/0_2252 RQ=0.809 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1275/1635_5656 RQ=0.758 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1284/0_4307 RQ=0.835 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1293/1518_9886 RQ=0.816 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1294/0_4124 RQ=0.807 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1296/4092_7978 RQ=0.821 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1298/2639_5080 RQ=0.857 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1301/0_4792 RQ=0.802 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1303/248_4708 RQ=0.817 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1316/0_11108 RQ=0.826 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1319/0_5264 RQ=0.771 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1321/0_2056 RQ=0.849 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1322/0_1373 RQ=0.826 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1324/2105_11341 RQ=0.810 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1326/948_2567 RQ=0.838 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1330/0_4127 RQ=0.839 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1331/11470_17575 RQ=0.828 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1331/17614_18734 RQ=0.828 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1335/27120_33950 RQ=0.814 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1335/33993_38193 RQ=0.814 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1336/12614_26314 RQ=0.852 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1348/967_4602 RQ=0.834 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1348/4641_7419 RQ=0.834 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1349/7246_22005 RQ=0.861 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1349/22054_24879 RQ=0.861 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1350/0_3744 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1353/200_1407 RQ=0.815 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1357/0_15835 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1367/0_772 RQ=0.834 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1369/2630_3441 RQ=0.815 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1369/3486_5441 RQ=0.815 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1380/17390_27601 RQ=0.854 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1380/27645_31664 RQ=0.854 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1386/0_3403 RQ=0.812 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1387/1806_22771 RQ=0.846 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1391/0_1993 RQ=0.840 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1391/2039_3212 RQ=0.840 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1392/0_1374 RQ=0.834 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1402/3159_10533 RQ=0.827 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1405/0_10555 RQ=0.816 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1408/0_3711 RQ=0.844 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1411/9572_17906 RQ=0.809 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1424/0_330 RQ=0.847 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1429/0_10765 RQ=0.818 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1432/3602_8268 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1433/0_4224 RQ=0.868 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1434/0_3377 RQ=0.859 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1437/3254_14521 RQ=0.849 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1439/4131_18598 RQ=0.818 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1439/18649_18759 RQ=0.818 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1441/1221_5180 RQ=0.870 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1442/0_8474 RQ=0.840 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1450/0_5521 RQ=0.822 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1451/6028_18508 RQ=0.809 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1451/18552_19423 RQ=0.809 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1453/0_15858 RQ=0.842 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1453/15899_19530 RQ=0.842 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1456/1282_4867 RQ=0.845 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1458/0_10080 RQ=0.849 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1462/13340_31214 RQ=0.844 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1462/31253_33514 RQ=0.844 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1464/4376_12792 RQ=0.850 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1464/12836_15328 RQ=0.850 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1465/16810_21452 RQ=0.876 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1466/0_3169 RQ=0.847 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1466/3218_6354 RQ=0.847 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1469/323_1352 RQ=0.816 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1480/0_3054 RQ=0.821 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1487/1745_12101 RQ=0.809 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1489/0_9418 RQ=0.782 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1491/0_13109 RQ=0.866 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1494/0_3809 RQ=0.802 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1496/4906_8448 RQ=0.858 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1497/0_17389 RQ=0.811 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1497/17434_20405 RQ=0.811 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1498/0_20097 RQ=0.850 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1501/10594_19995 RQ=0.825 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1501/20037_24078 RQ=0.825 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1513/0_3721 RQ=0.840 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1514/4677_5350 RQ=0.850 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1516/0_7578 RQ=0.821 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1517/8962_9968 RQ=0.845 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1518/17404_24342 RQ=0.840 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1520/0_16608 RQ=0.841 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1520/16651_19490 RQ=0.841 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1522/0_16947 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1523/8048_15487 RQ=0.811 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1523/15525_17580 RQ=0.811 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1526/0_12373 RQ=0.850 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1528/0_5695 RQ=0.769 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1531/6860_8940 RQ=0.777 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1533/6686_21962 RQ=0.849 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1534/4871_9733 RQ=0.842 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1535/2036_28319 RQ=0.839 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1535/28359_29305 RQ=0.839 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1539/15374_16706 RQ=0.872 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1541/3061_19141 RQ=0.841 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1541/19185_22316 RQ=0.841 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1544/3294_21465 RQ=0.856 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1545/77_4016 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1546/0_3805 RQ=0.753 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1549/537_16175 RQ=0.820 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1551/0_821 RQ=0.833 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1554/0_2995 RQ=0.839 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1560/7007_15290 RQ=0.844 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1564/390_1639 RQ=0.837 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1564/1681_2964 RQ=0.837 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1565/1102_7799 RQ=0.836 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1567/1456_13966 RQ=0.838 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1568/8209_17933 RQ=0.819 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1574/0_3970 RQ=0.829 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1578/2700_4867 RQ=0.874 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1579/0_3140 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1581/8970_15386 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1581/15425_23713 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1585/9359_11477 RQ=0.841 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1587/1243_9174 RQ=0.802 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1589/0_990 RQ=0.821 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1597/1614_9257 RQ=0.834 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1603/305_3578 RQ=0.829 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1603/3627_6867 RQ=0.829 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1606/0_1160 RQ=0.838 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1617/9432_10053 RQ=0.803 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1633/8844_9346 RQ=0.863 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1638/11149_23329 RQ=0.870 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1639/0_5544 RQ=0.827 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1641/1108_14093 RQ=0.786 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1643/7014_15344 RQ=0.805 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1644/9825_21053 RQ=0.839 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1652/9129_12835 RQ=0.840 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1655/0_3864 RQ=0.863 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1657/0_1615 RQ=0.851 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1659/9500_11007 RQ=0.803 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1660/5231_17439 RQ=0.845 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1665/5032_16096 RQ=0.834 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1673/8902_13045 RQ=0.828 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1676/1810_8912 RQ=0.843 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1677/3570_6739 RQ=0.872 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1681/0_5153 RQ=0.863 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1682/0_7640 RQ=0.845 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1690/8416_13357 RQ=0.864 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1693/22407_32857 RQ=0.837 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1696/0_4536 RQ=0.822 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1698/240_8302 RQ=0.869 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1698/8343_10650 RQ=0.869 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1702/0_1827 RQ=0.848 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1704/0_14702 RQ=0.859 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1704/14744_16944 RQ=0.859 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1705/0_1658 RQ=0.846 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1706/4424_11676 RQ=0.807 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1709/9010_18616 RQ=0.870 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1709/18660_26771 RQ=0.870 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1715/7386_8591 RQ=0.852 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1719/6407_7373 RQ=0.849 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1720/12069_15650 RQ=0.849 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1727/0_2752 RQ=0.861 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1728/11523_13121 RQ=0.844 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1731/0_7557 RQ=0.796 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1733/5095_6773 RQ=0.818 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1735/3884_7768 RQ=0.756 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1747/0_3276 RQ=0.835 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1748/6927_9723 RQ=0.839 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1749/13694_24512 RQ=0.792 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1753/10658_36483 RQ=0.859 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1759/13339_31948 RQ=0.833 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1763/0_3945 RQ=0.861 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1766/540_15219 RQ=0.863 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1772/801_1371 RQ=0.863 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1778/301_1294 RQ=0.830 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1784/11917_14144 RQ=0.816 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1790/0_6888 RQ=0.794 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1793/0_1766 RQ=0.818 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1793/1818_3616 RQ=0.818 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1798/0_5552 RQ=0.859 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1800/268_2508 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1802/0_4047 RQ=0.809 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1803/9480_11838 RQ=0.847 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1807/0_13014 RQ=0.848 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1809/498_6230 RQ=0.870 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1812/1551_8715 RQ=0.832 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1814/0_2832 RQ=0.840 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1816/0_11107 RQ=0.849 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1817/0_2443 RQ=0.866 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1824/0_5668 RQ=0.837 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1826/3289_4434 RQ=0.845 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1831/1702_4151 RQ=0.837 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1833/0_2252 RQ=0.846 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1834/0_1261 RQ=0.858 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1835/0_19215 RQ=0.861 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1837/0_2116 RQ=0.860 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1844/9033_12025 RQ=0.808 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1844/12075_13237 RQ=0.808 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1848/0_5735 RQ=0.806 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1849/0_3121 RQ=0.837 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1849/3161_4406 RQ=0.837 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1853/0_15972 RQ=0.824 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1853/16012_18212 RQ=0.824 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1861/3645_5759 RQ=0.822 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1862/1336_14622 RQ=0.825 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1862/14665_24832 RQ=0.825 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1864/0_8379 RQ=0.839 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1865/5516_7849 RQ=0.840 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1866/5275_14076 RQ=0.869 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1867/24067_40792 RQ=0.865 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1870/12632_16394 RQ=0.868 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1872/3873_17072 RQ=0.841 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1872/17130_24568 RQ=0.841 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1872/24609_25275 RQ=0.841 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1875/0_14497 RQ=0.867 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1875/14539_15151 RQ=0.867 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1877/384_2173 RQ=0.770 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1882/0_623 RQ=0.828 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1883/0_9564 RQ=0.847 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1887/0_6716 RQ=0.848 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1888/0_2660 RQ=0.827 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1889/0_2970 RQ=0.864 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1890/4783_5521 RQ=0.846 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1895/0_2371 RQ=0.822 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1901/0_11351 RQ=0.832 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1902/937_8355 RQ=0.845 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1904/7206_9419 RQ=0.834 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1904/9465_24298 RQ=0.834 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1905/194_10386 RQ=0.846 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1906/2003_6574 RQ=0.843 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1907/3290_4789 RQ=0.835 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1911/10399_14846 RQ=0.802 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1913/0_2619 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1913/2667_4975 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1914/291_2136 RQ=0.841 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1917/3739_4218 RQ=0.829 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1918/2672_10274 RQ=0.819 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1920/10918_23499 RQ=0.876 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1924/711_11701 RQ=0.802 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1925/2869_6548 RQ=0.815 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1927/0_19634 RQ=0.876 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1927/19680_19749 RQ=0.876 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1928/0_8721 RQ=0.764 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1932/0_3618 RQ=0.858 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1952/615_8978 RQ=0.834 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1954/0_1389 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1964/2728_13953 RQ=0.814 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1967/0_10068 RQ=0.822 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1970/1107_5813 RQ=0.850 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1973/0_7386 RQ=0.826 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1975/0_14796 RQ=0.838 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1986/0_7607 RQ=0.845 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1988/15510_15675 RQ=0.829 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1994/4000_8419 RQ=0.832 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/1995/0_1724 RQ=0.835 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2000/0_572 RQ=0.772 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2012/0_2567 RQ=0.790 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2013/14103_17940 RQ=0.819 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2017/347_7369 RQ=0.835 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2023/11118_17820 RQ=0.820 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2027/0_1416 RQ=0.757 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2028/5463_21162 RQ=0.845 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2030/257_7256 RQ=0.817 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2037/371_8353 RQ=0.826 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2039/3145_25440 RQ=0.835 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2045/1680_17407 RQ=0.860 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2045/17452_17788 RQ=0.860 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2049/0_20607 RQ=0.858 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2049/20646_21863 RQ=0.858 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2051/8404_20355 RQ=0.834 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2054/7980_8463 RQ=0.845 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2057/0_1129 RQ=0.825 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2059/0_11046 RQ=0.849 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2059/11094_13285 RQ=0.849 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2062/2294_7618 RQ=0.848 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2065/3134_10985 RQ=0.812 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2065/11033_12992 RQ=0.812 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2067/342_18427 RQ=0.850 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2069/1730_6071 RQ=0.835 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2071/9835_11100 RQ=0.869 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2071/11144_12450 RQ=0.869 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2071/12490_13761 RQ=0.869 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2071/13804_15118 RQ=0.869 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2071/15164_16465 RQ=0.869 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2071/16504_17805 RQ=0.869 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2071/17844_19140 RQ=0.869 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2071/19189_20507 RQ=0.869 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2071/20546_21871 RQ=0.869 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2071/21915_23200 RQ=0.869 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2071/23245_24543 RQ=0.869 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2071/24586_25832 RQ=0.869 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2071/25881_27194 RQ=0.869 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2071/27234_28536 RQ=0.869 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2071/28577_29856 RQ=0.869 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2071/29893_31198 RQ=0.869 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2071/31236_32529 RQ=0.869 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2071/32572_33850 RQ=0.869 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2071/33893_34486 RQ=0.869 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2078/0_13096 RQ=0.857 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2080/0_3502 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2080/3544_6233 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2086/1535_2670 RQ=0.849 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2090/5476_18753 RQ=0.857 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2092/1637_3032 RQ=0.871 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2093/5368_6124 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2093/6169_7962 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2093/8008_9884 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2093/9926_11729 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2093/11770_13638 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2093/13684_14533 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2094/3796_11521 RQ=0.842 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2097/0_7768 RQ=0.848 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2098/1075_4968 RQ=0.851 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2108/6058_10376 RQ=0.768 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2108/10419_11147 RQ=0.768 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2108/11195_12188 RQ=0.768 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2109/1103_9317 RQ=0.777 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2109/9362_14565 RQ=0.777 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2112/4631_11055 RQ=0.842 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2118/0_1722 RQ=0.843 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2123/2827_17543 RQ=0.828 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2123/17581_18045 RQ=0.828 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2125/6781_9674 RQ=0.867 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2132/8454_8826 RQ=0.847 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2135/0_9732 RQ=0.834 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2138/0_14930 RQ=0.850 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2144/0_1974 RQ=0.876 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2145/0_336 RQ=0.818 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2147/2461_11108 RQ=0.869 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2148/3544_16804 RQ=0.868 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2149/0_3248 RQ=0.831 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2152/0_504 RQ=0.802 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2153/10179_10877 RQ=0.821 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2154/0_5112 RQ=0.772 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2155/0_8811 RQ=0.825 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2159/5334_8346 RQ=0.804 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2164/1630_7449 RQ=0.851 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2165/4675_4914 RQ=0.825 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2166/0_16323 RQ=0.860 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2166/16367_21578 RQ=0.860 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2168/0_2103 RQ=0.862 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2171/0_276 RQ=0.849 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2174/0_5163 RQ=0.779 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2175/0_11832 RQ=0.858 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2179/886_7628 RQ=0.798 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2186/720_13775 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2194/0_16368 RQ=0.826 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2195/2835_3800 RQ=0.854 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2195/3842_5021 RQ=0.854 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2195/5065_5600 RQ=0.854 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2205/0_20716 RQ=0.868 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2205/20756_31348 RQ=0.868 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2209/1611_11477 RQ=0.831 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2211/0_2273 RQ=0.833 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2213/743_8815 RQ=0.845 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2216/0_3363 RQ=0.833 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2219/0_1022 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2219/1068_1985 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2223/439_5813 RQ=0.837 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2240/0_2268 RQ=0.827 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2243/10586_10809 RQ=0.807 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2245/10332_21442 RQ=0.793 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2248/0_6771 RQ=0.851 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2253/5144_11580 RQ=0.827 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2253/11618_17117 RQ=0.827 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2256/0_14757 RQ=0.847 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2259/0_2104 RQ=0.793 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2260/0_2277 RQ=0.825 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2262/0_4673 RQ=0.861 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2264/0_5570 RQ=0.857 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2264/5617_11227 RQ=0.857 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2267/0_9689 RQ=0.831 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2269/271_20865 RQ=0.834 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2269/20906_25987 RQ=0.834 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2271/732_11741 RQ=0.867 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2272/0_2054 RQ=0.837 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2278/0_3044 RQ=0.851 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2279/0_3918 RQ=0.844 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2285/18245_24410 RQ=0.827 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2287/0_2716 RQ=0.841 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2287/2769_4510 RQ=0.841 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2289/503_7148 RQ=0.832 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2290/3111_9683 RQ=0.859 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2292/489_2127 RQ=0.836 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2295/1040_5176 RQ=0.827 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2299/0_2069 RQ=0.846 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2305/707_11059 RQ=0.838 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2306/1875_4963 RQ=0.873 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2306/5008_7167 RQ=0.873 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2306/7213_9291 RQ=0.873 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2306/9338_11410 RQ=0.873 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2306/11455_13567 RQ=0.873 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2306/13611_15679 RQ=0.873 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2306/15727_17807 RQ=0.873 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2306/17852_19873 RQ=0.873 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2306/19917_21946 RQ=0.873 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2306/21989_23958 RQ=0.873 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2306/23999_26050 RQ=0.873 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2306/26098_28076 RQ=0.873 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2306/28121_30175 RQ=0.873 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2306/30221_32270 RQ=0.873 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2306/32316_34219 RQ=0.873 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2307/0_5921 RQ=0.846 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2315/3536_10882 RQ=0.828 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2316/0_6100 RQ=0.842 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2318/0_2861 RQ=0.840 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2321/2975_5837 RQ=0.852 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2321/5881_8195 RQ=0.852 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2327/0_8511 RQ=0.837 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2328/3698_7937 RQ=0.830 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2330/0_19066 RQ=0.845 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2330/19112_22130 RQ=0.845 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2331/2427_12694 RQ=0.805 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2331/12735_14381 RQ=0.805 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2331/14426_20065 RQ=0.805 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2340/0_10519 RQ=0.859 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2347/1201_8703 RQ=0.827 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2347/8743_10932 RQ=0.827 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2358/0_3505 RQ=0.806 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2361/0_12727 RQ=0.823 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2363/0_3883 RQ=0.817 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2370/565_4491 RQ=0.839 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2374/7188_9810 RQ=0.823 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2376/0_7151 RQ=0.814 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2377/6280_13906 RQ=0.866 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2379/1703_8757 RQ=0.836 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2381/1770_4772 RQ=0.802 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2382/0_9061 RQ=0.785 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2385/0_21143 RQ=0.845 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2385/21189_23506 RQ=0.845 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2386/0_1784 RQ=0.842 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2387/5917_15748 RQ=0.829 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2400/0_2202 RQ=0.821 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2407/0_2463 RQ=0.851 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2409/0_12300 RQ=0.849 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2409/12344_23076 RQ=0.849 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2412/13779_23374 RQ=0.849 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2412/23417_23456 RQ=0.849 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2413/0_10478 RQ=0.836 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2414/0_4995 RQ=0.794 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2415/3660_4629 RQ=0.854 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2420/0_1428 RQ=0.861 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2422/375_7748 RQ=0.822 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2425/13551_29606 RQ=0.787 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2427/0_1181 RQ=0.847 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2428/0_14128 RQ=0.854 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2429/1055_21513 RQ=0.809 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2434/301_2399 RQ=0.862 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2435/0_12148 RQ=0.850 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2441/0_3216 RQ=0.840 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2444/2799_10049 RQ=0.833 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2444/10084_11117 RQ=0.833 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2447/2869_12479 RQ=0.861 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2449/0_7127 RQ=0.870 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2452/1537_4729 RQ=0.836 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2455/2085_3874 RQ=0.802 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2457/0_13115 RQ=0.840 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2459/3977_16365 RQ=0.834 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2462/0_8845 RQ=0.860 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2463/1535_12306 RQ=0.840 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2474/3070_16931 RQ=0.862 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2476/0_1194 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2476/1236_2248 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2485/0_2107 RQ=0.840 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2498/23679_28296 RQ=0.849 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2499/0_2182 RQ=0.792 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2500/5380_15632 RQ=0.830 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2504/6677_13464 RQ=0.817 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2509/0_7490 RQ=0.842 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2512/7129_12059 RQ=0.837 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2515/0_2125 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2516/0_3141 RQ=0.872 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2516/3183_3873 RQ=0.872 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2519/0_11568 RQ=0.857 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2521/797_4639 RQ=0.836 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2522/0_21978 RQ=0.842 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2523/2412_3442 RQ=0.801 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2524/0_5111 RQ=0.862 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2524/5153_10131 RQ=0.862 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2529/0_3920 RQ=0.837 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2535/4459_9031 RQ=0.870 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2542/410_14988 RQ=0.857 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2542/15033_25171 RQ=0.857 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2553/7125_7333 RQ=0.852 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2553/7376_15458 RQ=0.852 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2557/1322_8199 RQ=0.851 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2560/2660_10243 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2562/13999_30997 RQ=0.860 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2567/5055_5688 RQ=0.826 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2580/2573_16693 RQ=0.863 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2580/16735_17090 RQ=0.863 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2581/0_14745 RQ=0.854 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2583/284_3518 RQ=0.838 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2583/3559_6850 RQ=0.838 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2588/4523_14875 RQ=0.819 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2590/2455_22594 RQ=0.851 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2593/2616_3755 RQ=0.856 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2605/0_1340 RQ=0.849 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2608/9623_24608 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2616/0_10886 RQ=0.832 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2627/0_4483 RQ=0.856 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2636/0_1359 RQ=0.840 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2638/3498_7657 RQ=0.791 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2639/0_3553 RQ=0.852 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2643/66_4665 RQ=0.852 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2643/4709_6264 RQ=0.852 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2649/0_1740 RQ=0.844 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2650/0_21049 RQ=0.828 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2657/3900_5469 RQ=0.839 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2660/0_6540 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2660/6579_13656 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2660/13700_21019 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2660/21060_25315 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2663/6547_14437 RQ=0.832 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2665/0_4447 RQ=0.839 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2668/9468_13163 RQ=0.840 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2670/359_2517 RQ=0.786 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2673/0_23788 RQ=0.861 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2679/224_2637 RQ=0.867 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2679/2678_5116 RQ=0.867 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2683/0_6297 RQ=0.861 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2683/6345_18940 RQ=0.861 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2684/0_19048 RQ=0.860 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2685/461_27315 RQ=0.821 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2688/0_1022 RQ=0.856 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2688/1067_2067 RQ=0.856 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2693/0_14601 RQ=0.860 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2699/1926_2712 RQ=0.808 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2704/602_12779 RQ=0.854 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2713/9105_23988 RQ=0.838 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2717/882_3056 RQ=0.778 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2721/960_3127 RQ=0.809 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2724/1873_2528 RQ=0.804 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2726/3477_4401 RQ=0.832 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2729/0_19681 RQ=0.772 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2732/587_7144 RQ=0.813 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2734/948_15190 RQ=0.801 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2735/5946_6552 RQ=0.774 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2738/0_15465 RQ=0.868 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2740/0_1727 RQ=0.847 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2741/1051_8906 RQ=0.785 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2744/629_2518 RQ=0.851 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2746/3101_8918 RQ=0.849 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2750/3619_8728 RQ=0.785 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2752/0_836 RQ=0.767 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2760/1117_9189 RQ=0.805 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2765/2356_14956 RQ=0.821 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2768/0_982 RQ=0.840 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2768/1028_1948 RQ=0.840 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2773/0_11281 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2777/11694_14731 RQ=0.800 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2778/0_7640 RQ=0.799 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2780/243_17950 RQ=0.807 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2790/9571_20681 RQ=0.813 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2792/7255_9173 RQ=0.781 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2796/10475_16744 RQ=0.753 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2798/3663_16212 RQ=0.861 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2799/13065_16610 RQ=0.858 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2806/0_1845 RQ=0.830 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2807/14017_18531 RQ=0.813 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2807/18577_25994 RQ=0.813 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2808/11811_22956 RQ=0.794 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2809/0_16480 RQ=0.866 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2810/0_2363 RQ=0.848 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2810/2408_4597 RQ=0.848 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2811/0_2776 RQ=0.852 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2812/166_3900 RQ=0.851 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2814/0_9021 RQ=0.840 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2815/2305_3904 RQ=0.832 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2816/0_2603 RQ=0.813 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2818/0_16965 RQ=0.850 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2819/2390_11059 RQ=0.845 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2820/0_2750 RQ=0.862 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2820/2790_4725 RQ=0.862 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2821/0_6427 RQ=0.810 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2822/10921_10973 RQ=0.826 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2824/8898_12535 RQ=0.815 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2828/6028_6853 RQ=0.803 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2837/2924_3109 RQ=0.830 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2841/0_12279 RQ=0.771 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2843/3884_10011 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2847/4557_17393 RQ=0.863 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2847/17437_24741 RQ=0.863 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2849/517_2614 RQ=0.837 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2853/4324_6185 RQ=0.874 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2855/0_6424 RQ=0.857 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2857/0_6880 RQ=0.856 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2858/0_1613 RQ=0.852 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2862/0_10252 RQ=0.841 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2863/0_5118 RQ=0.818 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2863/5159_6063 RQ=0.818 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2864/0_4800 RQ=0.856 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2865/0_5226 RQ=0.836 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2866/0_326 RQ=0.832 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2868/313_9753 RQ=0.826 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2869/0_17394 RQ=0.843 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2869/17433_19687 RQ=0.843 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2874/0_2104 RQ=0.854 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2878/0_7842 RQ=0.815 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2879/280_3945 RQ=0.839 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2898/2026_7585 RQ=0.805 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2901/0_9771 RQ=0.850 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2906/16555_24873 RQ=0.822 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2909/0_3714 RQ=0.814 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2911/0_4532 RQ=0.821 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2912/19372_21341 RQ=0.805 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2912/21383_31351 RQ=0.805 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2913/3538_4720 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2914/0_2426 RQ=0.874 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2914/2466_4630 RQ=0.874 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2915/0_13294 RQ=0.823 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2918/0_498 RQ=0.844 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2923/0_2170 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2923/2213_2738 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2925/1635_3335 RQ=0.839 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2929/5105_20817 RQ=0.840 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2930/4416_13636 RQ=0.872 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2931/0_5774 RQ=0.864 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2937/4570_5536 RQ=0.833 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2938/2868_10312 RQ=0.863 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2939/3827_10478 RQ=0.846 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2945/2969_12575 RQ=0.844 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2949/0_13312 RQ=0.868 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2952/7055_17179 RQ=0.825 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2955/0_13909 RQ=0.862 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2962/8378_10768 RQ=0.810 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2963/409_3822 RQ=0.875 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2964/1347_2385 RQ=0.851 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2966/11003_14098 RQ=0.841 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2972/2069_19292 RQ=0.873 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2972/19337_21091 RQ=0.873 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2973/13266_20214 RQ=0.810 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2975/0_1991 RQ=0.845 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2976/0_430 RQ=0.803 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2983/2256_3336 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2983/3382_5905 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2984/0_5636 RQ=0.772 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2986/0_1481 RQ=0.790 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2987/8204_19594 RQ=0.809 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2989/3151_17787 RQ=0.827 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2993/0_2757 RQ=0.841 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/2994/656_4472 RQ=0.758 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3002/0_658 RQ=0.820 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3003/0_13288 RQ=0.832 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3005/0_1356 RQ=0.826 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3007/0_16488 RQ=0.850 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3007/16534_22696 RQ=0.850 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3012/27481_37279 RQ=0.875 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3015/0_14739 RQ=0.852 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3017/3307_11560 RQ=0.800 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3019/3006_10853 RQ=0.852 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3027/0_13848 RQ=0.854 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3027/13892_14001 RQ=0.854 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3031/0_1717 RQ=0.831 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3040/11817_17246 RQ=0.853 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3047/26050_28933 RQ=0.855 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3052/127_2841 RQ=0.839 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3056/2653_8836 RQ=0.869 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3057/0_17342 RQ=0.867 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3058/0_3646 RQ=0.800 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3062/1926_6987 RQ=0.825 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3064/0_1413 RQ=0.838 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3065/0_15513 RQ=0.857 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3066/0_5982 RQ=0.866 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3068/6270_6503 RQ=0.809 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3076/0_23149 RQ=0.863 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3077/0_6576 RQ=0.773 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3080/386_13325 RQ=0.844 -@m131003_220056_42213_c100579662550000001823095604021477_s1_p0/3080/13373_20632 RQ=0.844 diff --git a/src/htslib-1.18/htscodecs/tests/names/09.names b/src/htslib-1.18/htscodecs/tests/names/09.names deleted file mode 100644 index 10e86e9..0000000 --- a/src/htslib-1.18/htscodecs/tests/names/09.names +++ /dev/null @@ -1,1000 +0,0 @@ -DB45V:02583:03087 -DB45V:00493:00236 -D3VVV:02506:00646 -1L5GV:00545:00054 -1L5GV:00578:01141 -1L5GV:01993:02798 -1L5GV:03007:01815 -XOH00:00931:02763 -D3VVV:00362:01046 -1L5GV:01932:03033 -1L5GV:00563:01485 -XOH00:02676:00773 -1L5GV:01867:01891 -1L5GV:02705:01068 -1L5GV:00895:01223 -1L5GV:01700:02988 -D3VVV:03394:01209 -1L5GV:03660:02232 -1L5GV:02013:01776 -1L5GV:02262:01799 -XOH00:02348:00842 -1L5GV:01981:02688 -DB45V:00842:00720 -DB45V:02180:01785 -D3VVV:02543:02291 -D3VVV:02728:00426 -XOH00:02503:03186 -1L5GV:02778:03299 -XOH00:03499:01777 -XOH00:03436:01247 -XOH00:02864:01636 -XOH00:02697:02283 -XOH00:00052:00513 -XOH00:00983:01204 -D3VVV:01109:01776 -DB45V:02018:00960 -D3VVV:00186:02018 -D3VVV:00508:01616 -XOH00:01061:01549 -1L5GV:00774:01722 -DB45V:00682:01635 -DB45V:01640:00938 -DB45V:02480:01271 -D3VVV:01428:01358 -DB45V:01846:02150 -D3VVV:03765:02251 -1L5GV:00869:02343 -1L5GV:01141:02705 -D3VVV:02144:03129 -D3VVV:03104:03009 -XOH00:02635:01633 -XOH00:02857:01167 -D3VVV:01334:00823 -D3VVV:02852:02573 -XOH00:02223:00237 -XOH00:00842:01656 -1L5GV:01165:01985 -D3VVV:00879:01859 -D3VVV:01309:03156 -D3VVV:02808:02115 -XOH00:01033:02433 -XOH00:03043:02153 -1L5GV:00997:01068 -D3VVV:02355:01435 -D3VVV:02842:02936 -XOH00:01645:01769 -D3VVV:02737:01649 -DB45V:01172:02326 -DB45V:01230:02068 -XOH00:03378:02096 -D3VVV:02841:02636 -DB45V:03122:01571 -1L5GV:01406:01372 -1L5GV:02190:02349 -D3VVV:01634:01732 -DB45V:01271:03230 -XOH00:03355:02933 -D3VVV:02323:00735 -XOH00:00622:02241 -XOH00:00070:02003 -DB45V:01959:00622 -DB45V:02618:02093 -D3VVV:01466:00665 -D3VVV:03458:01624 -D3VVV:01870:01409 -XOH00:03244:01145 -D3VVV:03691:02285 -DB45V:03524:02485 -D3VVV:00903:02241 -D3VVV:01965:01922 -1L5GV:00743:01019 -D3VVV:02477:02960 -1L5GV:03017:02922 -1L5GV:01566:00439 -XOH00:02961:01274 -1L5GV:00429:01603 -DB45V:02403:02539 -DB45V:01909:03340 -XOH00:02978:00671 -XOH00:00061:00021 -XOH00:00961:00123 -XOH00:03269:00993 -XOH00:03633:01152 -1L5GV:00744:01076 -1L5GV:02260:02255 -1L5GV:02549:02209 -1L5GV:03447:01347 -D3VVV:00372:00099 -D3VVV:00574:02168 -D3VVV:01545:00644 -D3VVV:02807:00641 -D3VVV:02834:02340 -DB45V:01827:02033 -DB45V:02175:02951 -DB45V:02431:00914 -XOH00:01084:01763 -XOH00:01391:02441 -XOH00:01876:03025 -XOH00:01957:00922 -XOH00:02219:00147 -XOH00:02495:01640 -XOH00:02550:00921 -XOH00:02871:01056 -XOH00:02948:01755 -DB45V:01637:02948 -XOH00:01577:00089 -XOH00:02514:01528 -D3VVV:01915:03122 -DB45V:02207:01404 -1L5GV:01490:02910 -1L5GV:02433:00423 -D3VVV:00934:02507 -D3VVV:01408:03072 -DB45V:00689:01414 -DB45V:02277:01722 -1L5GV:01840:03036 -D3VVV:03745:01737 -1L5GV:02397:03008 -1L5GV:02722:02634 -D3VVV:03317:02802 -DB45V:00568:01578 -DB45V:02608:02730 -1L5GV:02525:02988 -D3VVV:03401:01144 -DB45V:01334:00212 -XOH00:02714:00632 -1L5GV:01175:01517 -DB45V:01939:02609 -DB45V:00901:00473 -XOH00:03221:01299 -1L5GV:00316:00693 -1L5GV:01998:00225 -1L5GV:02466:00700 -1L5GV:00356:00715 -D3VVV:02275:01182 -XOH00:03628:01862 -XOH00:01098:01505 -DB45V:01210:01685 -1L5GV:02023:01960 -XOH00:02653:00789 -DB45V:03314:01902 -D3VVV:01376:01180 -DB45V:00072:00992 -D3VVV:00287:02510 -1L5GV:00916:00781 -1L5GV:01989:03052 -D3VVV:03087:02006 -XOH00:00014:00853 -1L5GV:02517:01134 -D3VVV:00186:01415 -D3VVV:01443:02299 -DB45V:03258:01701 -DB45V:03372:01212 -XOH00:00337:00770 -1L5GV:03448:02502 -DB45V:00393:01110 -XOH00:01206:00845 -XOH00:02657:02730 -1L5GV:01863:00279 -XOH00:00583:01000 -1L5GV:02596:02323 -XOH00:00948:01307 -D3VVV:03516:02572 -1L5GV:03035:02991 -1L5GV:00240:01337 -XOH00:01734:01238 -XOH00:02809:03081 -DB45V:02572:01111 -XOH00:03719:03232 -XOH00:02738:00617 -D3VVV:01737:00565 -DB45V:01549:02867 -DB45V:00155:00733 -D3VVV:02560:01907 -XOH00:01064:02313 -D3VVV:02553:02325 -1L5GV:01065:01803 -1L5GV:02252:01741 -XOH00:02235:03282 -1L5GV:01357:00267 -XOH00:01813:02119 -D3VVV:03544:02221 -DB45V:00993:02431 -DB45V:02895:01994 -D3VVV:02210:01137 -XOH00:03549:01109 -1L5GV:01165:00166 -1L5GV:01692:01252 -1L5GV:01801:03109 -1L5GV:02207:02655 -D3VVV:00410:00258 -D3VVV:01482:02631 -D3VVV:01868:02182 -D3VVV:02482:02708 -D3VVV:02555:01521 -D3VVV:02886:02972 -DB45V:00527:02784 -DB45V:01493:02648 -DB45V:01516:03202 -DB45V:01562:02663 -DB45V:01595:02139 -DB45V:02725:03110 -XOH00:00584:01827 -XOH00:00653:01929 -XOH00:01202:00910 -XOH00:01631:00515 -XOH00:01790:02735 -XOH00:02096:01154 -XOH00:02191:00414 -XOH00:02421:01478 -XOH00:02518:01429 -XOH00:02736:03044 -XOH00:02749:01835 -XOH00:02912:02079 -XOH00:03224:01900 -XOH00:03440:01982 -DB45V:03062:00430 -1L5GV:01053:01921 -1L5GV:01074:01196 -1L5GV:01488:01677 -D3VVV:00763:02968 -D3VVV:01336:00573 -D3VVV:01505:02431 -D3VVV:01563:01804 -D3VVV:01651:02113 -D3VVV:02430:01508 -D3VVV:02815:02108 -D3VVV:03127:02729 -DB45V:01179:02883 -DB45V:01707:00316 -DB45V:02203:02239 -DB45V:02267:02130 -DB45V:02348:00385 -DB45V:03012:02790 -DB45V:03443:00856 -XOH00:00481:00080 -XOH00:00708:01714 -XOH00:00801:01093 -XOH00:00956:02296 -XOH00:00958:02359 -XOH00:01323:02528 -XOH00:01941:02048 -XOH00:02359:03181 -XOH00:02799:00862 -XOH00:02872:01824 -XOH00:03713:02316 -XOH00:03626:01860 -DB45V:00774:02832 -D3VVV:03781:01788 -1L5GV:01818:00495 -1L5GV:02788:02538 -DB45V:00004:00424 -1L5GV:01936:02410 -XOH00:02164:01066 -D3VVV:02960:01608 -DB45V:01974:00447 -1L5GV:02615:00797 -XOH00:03396:03020 -D3VVV:02194:01650 -DB45V:02432:02075 -DB45V:02614:02128 -XOH00:02504:00835 -XOH00:01088:02409 -D3VVV:01371:02306 -XOH00:01319:02970 -DB45V:01024:00591 -1L5GV:01373:02702 -1L5GV:02581:01662 -DB45V:01833:01620 -DB45V:01876:00732 -XOH00:03509:02438 -1L5GV:02604:02233 -XOH00:02219:00501 -1L5GV:03081:03012 -DB45V:00162:00367 -DB45V:01232:02403 -XOH00:02593:02975 -XOH00:02607:02419 -XOH00:01133:00937 -D3VVV:02566:00206 -1L5GV:01501:00368 -1L5GV:02791:00930 -DB45V:00334:02178 -XOH00:00543:00728 -XOH00:00331:02408 -D3VVV:00874:01451 -XOH00:02361:02939 -1L5GV:03311:02492 -DB45V:01778:02986 -1L5GV:01095:02096 -1L5GV:02990:00462 -XOH00:01657:01079 -DB45V:01211:01083 -XOH00:02973:01947 -D3VVV:03219:02231 -1L5GV:01680:00202 -1L5GV:01971:03376 -XOH00:02203:03159 -XOH00:02608:00427 -XOH00:03042:02649 -1L5GV:01565:03235 -1L5GV:01283:00447 -XOH00:03108:00778 -XOH00:02189:01738 -D3VVV:03422:01861 -1L5GV:02884:01145 -XOH00:02816:01739 -DB45V:01002:00460 -D3VVV:01168:02066 -1L5GV:01910:00608 -XOH00:02913:02036 -1L5GV:03024:00634 -DB45V:00440:00098 -XOH00:01603:02984 -DB45V:00704:02643 -1L5GV:02388:01904 -DB45V:01386:00667 -DB45V:02583:02602 -XOH00:01127:00036 -DB45V:02698:01644 -1L5GV:01644:02855 -XOH00:02193:01954 -XOH00:02130:02715 -D3VVV:01158:02223 -D3VVV:02265:02836 -1L5GV:00374:01804 -D3VVV:02132:01615 -1L5GV:01599:02639 -D3VVV:02250:00987 -1L5GV:01165:00690 -1L5GV:01558:00807 -XOH00:00891:00762 -XOH00:00925:00319 -XOH00:01059:01141 -XOH00:01190:02920 -XOH00:01601:00340 -XOH00:02911:02358 -XOH00:01181:00011 -XOH00:01782:02098 -DB45V:00587:02010 -D3VVV:02122:03370 -1L5GV:00334:00366 -1L5GV:00805:02156 -D3VVV:02364:02363 -D3VVV:02896:01715 -DB45V:00331:01443 -DB45V:00920:01995 -DB45V:01837:01808 -DB45V:02106:00665 -XOH00:00068:00648 -D3VVV:01597:01402 -D3VVV:02045:03117 -XOH00:00349:02387 -XOH00:01180:00700 -1L5GV:00447:02551 -XOH00:01900:01109 -XOH00:03028:01026 -1L5GV:03097:02329 -D3VVV:01835:02496 -XOH00:01358:00703 -D3VVV:03299:01906 -XOH00:02208:00326 -1L5GV:02330:00465 -D3VVV:01162:01529 -XOH00:00540:01020 -XOH00:03519:01594 -DB45V:02214:00515 -DB45V:00726:01328 -XOH00:02158:03261 -XOH00:03452:03106 -XOH00:02387:01077 -DB45V:02452:02016 -XOH00:01343:01828 -XOH00:02925:01750 -1L5GV:02371:01391 -D3VVV:02598:01209 -DB45V:03377:02008 -XOH00:02749:01641 -1L5GV:02463:02250 -XOH00:03711:03139 -DB45V:02742:01808 -D3VVV:02879:02222 -D3VVV:00772:00740 -XOH00:02431:03222 -XOH00:02056:00733 -1L5GV:00156:01329 -1L5GV:01669:00104 -1L5GV:02033:00640 -1L5GV:02033:00999 -1L5GV:02405:00952 -1L5GV:03619:02134 -D3VVV:00971:01706 -D3VVV:01950:02804 -D3VVV:02601:01752 -D3VVV:02889:02327 -D3VVV:03267:01063 -D3VVV:03373:01440 -DB45V:00325:01546 -DB45V:00806:00313 -DB45V:01308:00306 -DB45V:02455:01596 -DB45V:02522:01024 -DB45V:02984:01034 -DB45V:03056:01513 -DB45V:03141:01296 -DB45V:03217:01265 -XOH00:00046:00516 -XOH00:00283:02089 -XOH00:01249:01661 -XOH00:01364:01286 -XOH00:01989:00605 -XOH00:02398:01947 -XOH00:02420:00991 -1L5GV:00983:02143 -1L5GV:01694:00462 -1L5GV:03287:01358 -D3VVV:00587:01623 -D3VVV:01624:02461 -DB45V:03593:02136 -D3VVV:02338:02236 -1L5GV:03127:01735 -DB45V:00776:02108 -XOH00:01863:02837 -1L5GV:02791:03017 -1L5GV:01224:03121 -1L5GV:01804:02135 -XOH00:01516:02455 -XOH00:01164:00180 -D3VVV:02754:00435 -DB45V:01451:02007 -XOH00:01945:03321 -XOH00:02010:01892 -D3VVV:03323:01998 -D3VVV:01652:02374 -DB45V:02550:01912 -XOH00:02009:00998 -1L5GV:01502:00422 -XOH00:02080:02763 -XOH00:02457:01309 -XOH00:02297:00989 -DB45V:00374:00378 -DB45V:00551:02207 -XOH00:01758:01340 -XOH00:00884:00434 -XOH00:01418:00996 -1L5GV:00835:02261 -1L5GV:01005:01711 -1L5GV:02327:03200 -D3VVV:02789:01228 -XOH00:03434:03169 -1L5GV:00593:02014 -1L5GV:00616:00416 -1L5GV:01142:00651 -1L5GV:01381:01793 -1L5GV:02125:03276 -1L5GV:02340:02910 -1L5GV:03642:01796 -D3VVV:00042:00409 -D3VVV:02309:03098 -D3VVV:02933:00713 -DB45V:00268:00635 -DB45V:00271:00819 -DB45V:00684:02356 -DB45V:00746:00284 -DB45V:01758:00299 -DB45V:02445:00182 -DB45V:02447:02900 -XOH00:01572:00515 -XOH00:01871:01857 -XOH00:02045:00695 -XOH00:02456:01159 -XOH00:02689:03253 -XOH00:03164:01222 -XOH00:03771:01953 -1L5GV:01376:01071 -1L5GV:02413:03067 -1L5GV:02629:02488 -D3VVV:02684:00685 -D3VVV:02747:02989 -D3VVV:03005:00537 -D3VVV:03658:01504 -XOH00:01089:01432 -XOH00:01455:02951 -XOH00:03102:02432 -1L5GV:02623:00765 -1L5GV:03282:02385 -D3VVV:01361:02569 -DB45V:02305:02626 -DB45V:03235:02857 -XOH00:01855:01781 -XOH00:02483:02418 -D3VVV:01095:01810 -D3VVV:02541:00559 -DB45V:02379:01754 -XOH00:03023:01826 -XOH00:00671:02209 -XOH00:01872:01030 -XOH00:03332:01139 -1L5GV:02690:02916 -XOH00:01012:01057 -XOH00:01222:01098 -DB45V:01918:00575 -1L5GV:00198:00417 -1L5GV:00305:00364 -1L5GV:00389:01759 -1L5GV:00458:00027 -1L5GV:00495:00836 -1L5GV:00546:00929 -1L5GV:00629:00181 -1L5GV:00631:00177 -1L5GV:00651:00655 -1L5GV:00680:01663 -1L5GV:00761:02321 -1L5GV:00878:00739 -1L5GV:00899:00970 -1L5GV:00932:00051 -1L5GV:00994:01213 -1L5GV:01029:02440 -1L5GV:01052:02041 -1L5GV:01075:00499 -1L5GV:01114:00577 -1L5GV:01148:01137 -1L5GV:01236:02015 -1L5GV:01315:00033 -1L5GV:01340:01974 -1L5GV:01353:01464 -1L5GV:01396:02436 -1L5GV:01657:00863 -1L5GV:01665:00729 -1L5GV:01691:02439 -1L5GV:01724:00199 -1L5GV:01810:00030 -1L5GV:01829:02878 -1L5GV:01850:01954 -1L5GV:01921:01370 -1L5GV:02010:00392 -1L5GV:02021:01377 -1L5GV:02083:01395 -1L5GV:02179:02482 -1L5GV:02193:01486 -1L5GV:02231:03026 -1L5GV:02240:00887 -1L5GV:02293:01735 -1L5GV:02296:01670 -1L5GV:02353:02192 -1L5GV:02354:01112 -1L5GV:02360:01149 -1L5GV:02394:01057 -1L5GV:02416:02522 -1L5GV:02464:00515 -1L5GV:02467:00632 -1L5GV:02500:02634 -1L5GV:02556:02775 -1L5GV:02622:00764 -1L5GV:02701:01155 -1L5GV:02820:02323 -1L5GV:02872:01014 -1L5GV:02881:02402 -1L5GV:02948:00468 -1L5GV:02965:00698 -1L5GV:02972:01871 -1L5GV:03017:01772 -1L5GV:03057:00641 -1L5GV:03115:01670 -1L5GV:03187:01493 -1L5GV:03242:00962 -1L5GV:03496:01991 -1L5GV:03672:02007 -1L5GV:03675:03193 -1L5GV:03694:03069 -D3VVV:00073:01325 -D3VVV:00074:01631 -D3VVV:00344:02123 -D3VVV:00427:00734 -D3VVV:00523:00357 -D3VVV:00574:02335 -D3VVV:00711:02381 -D3VVV:00731:02256 -D3VVV:00800:02221 -D3VVV:00805:02071 -D3VVV:00806:01142 -D3VVV:00830:01600 -D3VVV:00875:00506 -D3VVV:00899:02825 -D3VVV:00900:01558 -D3VVV:00974:01736 -D3VVV:01006:00966 -D3VVV:01037:00272 -D3VVV:01038:01164 -D3VVV:01102:02404 -D3VVV:01233:01124 -D3VVV:01274:03257 -D3VVV:01471:02392 -D3VVV:01582:00286 -D3VVV:01675:03361 -D3VVV:01721:02672 -D3VVV:01722:00840 -D3VVV:01746:03252 -D3VVV:01770:02083 -D3VVV:01943:01284 -D3VVV:01981:02947 -D3VVV:01983:02336 -D3VVV:02015:00627 -D3VVV:02080:00560 -D3VVV:02086:00845 -D3VVV:02184:00676 -D3VVV:02209:02686 -D3VVV:02223:02034 -D3VVV:02275:02575 -D3VVV:02354:02519 -D3VVV:02441:00256 -D3VVV:02545:00509 -D3VVV:02638:02603 -D3VVV:02644:03290 -D3VVV:02652:02541 -D3VVV:02686:00367 -D3VVV:02813:00695 -D3VVV:02830:01310 -D3VVV:02836:03337 -D3VVV:02915:01152 -D3VVV:03055:00532 -D3VVV:03126:02589 -D3VVV:03138:01023 -D3VVV:03158:02240 -D3VVV:03263:01549 -D3VVV:03318:01467 -D3VVV:03330:02905 -D3VVV:03344:00707 -D3VVV:03403:02523 -D3VVV:03428:01384 -D3VVV:03471:01397 -D3VVV:03530:01645 -D3VVV:03670:02416 -D3VVV:03756:01788 -DB45V:00152:02001 -DB45V:00176:01458 -DB45V:00226:00542 -DB45V:00233:01100 -DB45V:00407:02390 -DB45V:00444:02241 -DB45V:00477:01243 -DB45V:00567:01005 -DB45V:00577:02628 -DB45V:00654:01014 -DB45V:00801:02120 -DB45V:00855:01131 -DB45V:00959:01388 -DB45V:00962:02300 -DB45V:00974:01907 -DB45V:01012:02093 -DB45V:01040:02887 -DB45V:01046:00844 -DB45V:01072:01827 -DB45V:01082:02910 -DB45V:01153:00254 -DB45V:01219:00920 -DB45V:01254:01927 -DB45V:01284:01995 -DB45V:01409:03083 -DB45V:01433:02182 -DB45V:01525:01623 -DB45V:01560:03089 -DB45V:01591:02062 -DB45V:01599:00790 -DB45V:01721:00967 -DB45V:01765:02891 -DB45V:01806:03221 -DB45V:01875:00961 -DB45V:01884:02317 -DB45V:01980:01964 -DB45V:02047:01894 -DB45V:02111:02107 -DB45V:02124:02152 -DB45V:02175:02319 -DB45V:02181:01940 -DB45V:02209:02285 -DB45V:02276:02601 -DB45V:02324:01839 -DB45V:02355:02147 -DB45V:02380:00645 -DB45V:02436:00363 -DB45V:02460:01817 -DB45V:02463:00556 -DB45V:02566:02383 -DB45V:02636:02469 -DB45V:02658:02348 -DB45V:02731:00659 -DB45V:02777:01771 -DB45V:02794:00824 -DB45V:02817:00898 -DB45V:03121:01480 -DB45V:03135:01104 -DB45V:03191:03044 -DB45V:03236:01193 -DB45V:03241:02285 -DB45V:03286:02040 -DB45V:03290:02364 -DB45V:03497:01792 -XOH00:00353:01974 -XOH00:00359:01899 -XOH00:00372:02552 -XOH00:00516:01886 -XOH00:00537:01629 -XOH00:00557:01711 -XOH00:00654:01882 -XOH00:00678:01786 -XOH00:00744:01351 -XOH00:00813:00607 -XOH00:00821:02419 -XOH00:00855:02163 -XOH00:00994:02055 -XOH00:01028:00469 -XOH00:01069:01750 -XOH00:01161:01623 -XOH00:01230:02947 -XOH00:01280:03011 -XOH00:01286:02494 -XOH00:01317:00381 -XOH00:01346:02157 -XOH00:01351:00715 -XOH00:01553:01237 -XOH00:01567:03006 -XOH00:01582:01329 -XOH00:01592:02074 -XOH00:01710:02154 -XOH00:01756:02647 -XOH00:01770:01843 -XOH00:01784:02509 -XOH00:01847:02197 -XOH00:01857:02387 -XOH00:01869:01175 -XOH00:01944:01649 -XOH00:01959:00822 -XOH00:02006:00053 -XOH00:02069:01826 -XOH00:02121:03102 -XOH00:02128:01783 -XOH00:02154:02208 -XOH00:02218:02468 -XOH00:02245:00994 -XOH00:02270:01385 -XOH00:02335:00366 -XOH00:02464:02250 -XOH00:02495:02789 -XOH00:02510:02238 -XOH00:02617:01294 -XOH00:02618:02247 -XOH00:02625:01802 -XOH00:02626:01323 -XOH00:02721:01088 -XOH00:02768:00969 -XOH00:02790:00767 -XOH00:02841:02092 -XOH00:02864:03366 -XOH00:02899:02583 -XOH00:02950:03021 -XOH00:03000:02595 -XOH00:03078:03347 -XOH00:03159:01000 -XOH00:03192:00659 -XOH00:03232:02322 -XOH00:03401:01147 -XOH00:03489:02166 -XOH00:03550:01425 -XOH00:03709:02118 -XOH00:03732:02296 -1L5GV:00080:01728 -1L5GV:00407:02261 -1L5GV:00484:01949 -1L5GV:00604:01980 -1L5GV:00623:00635 -1L5GV:00786:00937 -1L5GV:00868:02491 -1L5GV:00882:02429 -1L5GV:01070:01155 -1L5GV:01268:01643 -1L5GV:01406:02502 -1L5GV:01423:00456 -1L5GV:01505:02609 -1L5GV:01538:01836 -1L5GV:01555:00945 -1L5GV:01601:02980 -1L5GV:01605:01883 -1L5GV:01618:00281 -1L5GV:01689:02308 -1L5GV:01744:01757 -1L5GV:01768:00396 -1L5GV:02185:00897 -1L5GV:02440:00532 -1L5GV:02585:02471 -1L5GV:02653:02220 -1L5GV:02689:01399 -1L5GV:03301:02186 -1L5GV:03480:01005 -1L5GV:03723:01544 -D3VVV:00082:01205 -D3VVV:00095:00561 -D3VVV:00146:00697 -D3VVV:00162:02030 -D3VVV:00199:02035 -D3VVV:00540:01328 -D3VVV:00660:00870 -D3VVV:00864:01269 -D3VVV:00936:02632 -D3VVV:00999:02286 -D3VVV:01284:02362 -D3VVV:01331:01211 -D3VVV:01613:01692 -D3VVV:01684:03254 -D3VVV:01761:03242 -D3VVV:01894:02783 -D3VVV:02048:00547 -D3VVV:02083:00354 -D3VVV:02148:02857 -D3VVV:02316:02721 -D3VVV:02333:00825 -D3VVV:02812:00688 -D3VVV:02857:01112 -D3VVV:03169:02943 -D3VVV:03200:00968 -D3VVV:03257:02980 -D3VVV:03456:02402 -DB45V:00495:00213 -DB45V:00588:02068 -DB45V:01049:00903 -DB45V:01148:02984 -DB45V:01398:02584 -DB45V:01545:00488 -DB45V:01838:00539 -DB45V:02076:00175 -DB45V:02129:01845 -DB45V:02168:01707 -DB45V:02840:01714 -DB45V:02886:03031 -DB45V:03051:01068 -DB45V:03148:01811 -DB45V:03190:01092 -DB45V:03366:01287 -DB45V:03409:01240 -DB45V:03693:01433 -XOH00:00074:02068 -XOH00:00086:00779 -XOH00:00136:02180 -XOH00:00641:02388 -XOH00:00923:02779 -XOH00:01008:02427 -XOH00:01023:02344 -XOH00:01075:02651 -XOH00:01441:02891 -XOH00:01544:02202 -XOH00:01597:01783 -XOH00:01637:02738 -XOH00:02737:01451 -XOH00:03247:01217 -XOH00:03457:01392 -1L5GV:00710:02381 -1L5GV:01142:00906 -D3VVV:02324:00248 -XOH00:02000:01509 -XOH00:02678:00439 -DB45V:00798:00924 -XOH00:01145:02985 -DB45V:00516:00062 -1L5GV:00960:02097 -D3VVV:01517:01085 -D3VVV:02650:01848 -DB45V:00792:01910 -DB45V:01915:01749 -DB45V:02529:02209 -DB45V:03469:03315 -XOH00:01377:01722 -XOH00:02487:01815 -XOH00:03351:02935 -XOH00:01052:02268 -XOH00:01346:03088 -1L5GV:00508:02166 -XOH00:01949:01186 -D3VVV:03774:02454 -XOH00:02647:01654 -XOH00:01018:00444 -1L5GV:00895:02656 -D3VVV:00829:02245 -DB45V:00794:00870 -DB45V:01623:01978 -1L5GV:00537:02438 -1L5GV:01115:00256 -1L5GV:01093:02802 -D3VVV:00424:00269 -DB45V:00897:00696 -DB45V:02401:00550 -XOH00:00313:02074 -XOH00:01572:00593 -1L5GV:02350:00308 -XOH00:00316:00842 -D3VVV:00096:00381 -XOH00:01937:00683 -1L5GV:01712:00580 -DB45V:00845:02574 -XOH00:02342:01667 -1L5GV:03346:03099 -1L5GV:03591:02787 -D3VVV:01648:01585 -D3VVV:02312:01499 -DB45V:01705:02145 -DB45V:01897:02936 -XOH00:01036:00273 -XOH00:01828:02062 -XOH00:01858:02735 -XOH00:02725:01584 -XOH00:03264:02306 -1L5GV:00434:02563 -1L5GV:01900:00725 -1L5GV:02597:02814 -DB45V:01903:01173 -XOH00:02477:00311 -DB45V:02114:00556 -1L5GV:00157:02306 -1L5GV:00197:01837 -1L5GV:00237:01451 -1L5GV:00259:02487 -1L5GV:00308:00690 -1L5GV:00513:01707 -1L5GV:00524:00203 -1L5GV:00712:00445 -1L5GV:00746:01989 -1L5GV:00886:02414 -1L5GV:00925:01996 -1L5GV:00929:02187 -1L5GV:00966:01864 -1L5GV:01001:00207 -1L5GV:01072:02508 -1L5GV:01085:01523 -1L5GV:01198:00578 -1L5GV:01292:00629 -1L5GV:01519:02760 -1L5GV:01648:00621 -1L5GV:01808:00061 -1L5GV:01984:01211 -1L5GV:02213:01900 -1L5GV:02909:01327 -1L5GV:02992:00656 -D3VVV:00079:01429 -D3VVV:00861:02131 -D3VVV:02325:01256 -DB45V:00127:00677 -DB45V:00249:01800 -DB45V:00404:02316 -DB45V:00412:00650 -DB45V:01078:00229 -DB45V:01211:02358 -DB45V:01214:03238 -DB45V:01814:00508 -DB45V:02360:01526 -DB45V:02391:03015 -DB45V:03024:00596 -DB45V:03361:00894 -XOH00:00036:01020 -XOH00:00187:00031 -XOH00:00235:01769 -XOH00:00313:00927 -XOH00:00344:01195 -XOH00:00504:00014 -XOH00:00506:00494 -XOH00:00551:02235 -XOH00:00611:02735 -XOH00:00686:01803 -XOH00:00756:01842 -XOH00:00759:00922 -XOH00:00830:01283 -XOH00:00876:01792 -XOH00:00914:00929 -XOH00:00932:00471 -XOH00:00965:02829 -XOH00:01007:02673 -XOH00:01041:00429 -XOH00:01045:02430 -XOH00:01107:02071 -XOH00:01110:02768 -XOH00:01189:00475 -XOH00:01258:00498 diff --git a/src/htslib-1.18/htscodecs/tests/names/10.names b/src/htslib-1.18/htscodecs/tests/names/10.names deleted file mode 100644 index e8c5e13..0000000 --- a/src/htslib-1.18/htscodecs/tests/names/10.names +++ /dev/null @@ -1,1000 +0,0 @@ -MICHAELJACKSON_0007:5:72:16920:15502#0 -MICHAELJACKSON_0007:6:20:12357:19452#0 -MICHAELJACKSON_0007:5:25:16489:1599#0 -MICHAELJACKSON_0007:7:98:14823:19478#0 -MICHAELJACKSON_0007:5:61:18882:10396#0 -MICHAELJACKSON_0007:6:1:5672:7613#0 -MICHAELJACKSON_0007:7:28:6029:15114#0 -MICHAELJACKSON_0007:6:27:12251:9100#0 -MICHAELJACKSON_0007:7:52:3569:7006#0 -MICHAELJACKSON_0007:7:90:13629:8089#0 -MICHAELJACKSON_0007:5:99:14789:12331#0 -MICHAELJACKSON_0007:6:119:13554:20499#0 -MICHAELJACKSON_0007:5:4:19126:7275#0 -MICHAELJACKSON_0007:6:55:12839:5796#0 -MICHAELJACKSON_0007:6:76:12456:6031#0 -MICHAELJACKSON_0007:7:46:15379:5996#0 -MICHAELJACKSON_0007:7:41:14274:9432#0 -MICHAELJACKSON_0007:5:105:6313:10773#0 -MICHAELJACKSON_0007:7:87:4908:12145#0 -MICHAELJACKSON_0007:7:114:3651:18729#0 -MICHAELJACKSON_0007:7:116:7750:17666#0 -MICHAELJACKSON_0007:7:51:7123:11018#0 -MICHAELJACKSON_0007:7:90:13629:8089#0 -MICHAELJACKSON_0007:5:99:14789:12331#0 -MICHAELJACKSON_0007:5:14:17785:16641#0 -MICHAELJACKSON_0007:6:13:18934:15164#0 -MICHAELJACKSON_0007:5:50:4618:16636#0 -MICHAELJACKSON_0007:7:41:14274:9432#0 -MICHAELJACKSON_0007:5:102:7664:3284#0 -MICHAELJACKSON_0007:6:75:2680:5296#0 -MICHAELJACKSON_0007:6:110:3455:4088#0 -MICHAELJACKSON_0007:7:103:10494:15531#0 -MICHAELJACKSON_0007:5:4:19126:7275#0 -MICHAELJACKSON_0007:7:46:15379:5996#0 -MICHAELJACKSON_0007:6:119:13554:20499#0 -MICHAELJACKSON_0007:6:55:12839:5796#0 -MICHAELJACKSON_0007:6:76:12456:6031#0 -MICHAELJACKSON_0007:6:1:18709:20408#0 -MICHAELJACKSON_0007:6:25:8512:8141#0 -MICHAELJACKSON_0007:7:50:11056:3338#0 -MICHAELJACKSON_0007:7:64:12739:15618#0 -MICHAELJACKSON_0007:5:120:7097:4971#0 -MICHAELJACKSON_0007:6:119:13257:16800#0 -MICHAELJACKSON_0007:7:39:4414:1987#0 -MICHAELJACKSON_0007:5:105:6313:10773#0 -MICHAELJACKSON_0007:7:87:4908:12145#0 -MICHAELJACKSON_0007:7:114:3651:18729#0 -MICHAELJACKSON_0007:7:116:7750:17666#0 -MICHAELJACKSON_0007:7:20:19628:2861#0 -MICHAELJACKSON_0007:5:102:7664:3284#0 -MICHAELJACKSON_0007:6:75:2680:5296#0 -MICHAELJACKSON_0007:6:110:3455:4088#0 -MICHAELJACKSON_0007:7:103:10494:15531#0 -MICHAELJACKSON_0007:5:50:4618:16636#0 -MICHAELJACKSON_0007:7:41:16673:18097#0 -MICHAELJACKSON_0007:7:17:1976:20877#0 -MICHAELJACKSON_0007:5:11:6413:11243#0 -MICHAELJACKSON_0007:5:114:15981:2741#0 -MICHAELJACKSON_0007:6:55:10184:6592#0 -MICHAELJACKSON_0007:7:18:11690:19353#0 -MICHAELJACKSON_0007:7:87:18966:14967#0 -MICHAELJACKSON_0007:7:39:4414:1987#0 -MICHAELJACKSON_0007:6:39:15101:4410#0 -MICHAELJACKSON_0007:5:14:17785:16641#0 -MICHAELJACKSON_0007:6:13:18934:15164#0 -MICHAELJACKSON_0007:5:91:9791:1555#0 -MICHAELJACKSON_0007:6:1:18709:20408#0 -MICHAELJACKSON_0007:6:25:8512:8141#0 -MICHAELJACKSON_0007:7:50:11056:3338#0 -MICHAELJACKSON_0007:7:64:12739:15618#0 -MICHAELJACKSON_0007:5:65:3217:8527#0 -MICHAELJACKSON_0007:5:118:16115:15590#0 -MICHAELJACKSON_0007:7:63:11163:9422#0 -MICHAELJACKSON_0007:7:73:3521:15157#0 -MICHAELJACKSON_0007:5:11:6413:11243#0 -MICHAELJACKSON_0007:5:114:15981:2741#0 -MICHAELJACKSON_0007:6:55:10184:6592#0 -MICHAELJACKSON_0007:7:18:11690:19353#0 -MICHAELJACKSON_0007:7:41:16673:18097#0 -MICHAELJACKSON_0007:7:87:18966:14967#0 -MICHAELJACKSON_0007:5:92:8514:17279#0 -MICHAELJACKSON_0007:5:118:12317:15805#0 -MICHAELJACKSON_0007:6:42:6419:10384#0 -MICHAELJACKSON_0007:6:58:7827:2783#0 -MICHAELJACKSON_0007:7:27:5331:12077#0 -MICHAELJACKSON_0007:7:29:7140:17457#0 -MICHAELJACKSON_0007:5:40:13656:10559#0 -MICHAELJACKSON_0007:5:91:9791:1555#0 -MICHAELJACKSON_0007:5:21:17473:3393#0 -MICHAELJACKSON_0007:5:65:3217:8527#0 -MICHAELJACKSON_0007:5:118:16115:15590#0 -MICHAELJACKSON_0007:7:42:11488:18511#0 -MICHAELJACKSON_0007:7:116:5984:2525#0 -MICHAELJACKSON_0007:6:20:6369:16217#0 -MICHAELJACKSON_0007:5:92:8514:17279#0 -MICHAELJACKSON_0007:5:118:12317:15805#0 -MICHAELJACKSON_0007:6:42:6419:10384#0 -MICHAELJACKSON_0007:6:58:7827:2783#0 -MICHAELJACKSON_0007:7:27:5331:12077#0 -MICHAELJACKSON_0007:7:29:7140:17457#0 -MICHAELJACKSON_0007:5:21:17473:3393#0 -MICHAELJACKSON_0007:5:40:13656:10559#0 -MICHAELJACKSON_0007:6:102:4552:16624#0 -MICHAELJACKSON_0007:5:85:11940:12217#0 -MICHAELJACKSON_0007:7:35:10657:2610#0 -MICHAELJACKSON_0007:5:120:7097:4971#0 -MICHAELJACKSON_0007:6:119:13257:16800#0 -MICHAELJACKSON_0007:7:42:11488:18511#0 -MICHAELJACKSON_0007:7:116:5984:2525#0 -MICHAELJACKSON_0007:6:20:6369:16217#0 -MICHAELJACKSON_0007:5:85:11940:12217#0 -MICHAELJACKSON_0007:6:102:4552:16624#0 -MICHAELJACKSON_0007:7:32:11955:6123#0 -MICHAELJACKSON_0007:5:32:3627:11351#0 -MICHAELJACKSON_0007:7:78:7362:13695#0 -MICHAELJACKSON_0007:7:92:11565:11795#0 -MICHAELJACKSON_0007:7:95:16306:12853#0 -MICHAELJACKSON_0007:7:91:5489:9191#0 -MICHAELJACKSON_0007:5:1:8105:13641#0 -MICHAELJACKSON_0007:5:53:4403:2670#0 -MICHAELJACKSON_0007:7:32:11955:6123#0 -MICHAELJACKSON_0007:5:97:8615:8928#0 -MICHAELJACKSON_0007:6:23:9031:12778#0 -MICHAELJACKSON_0007:6:105:5764:6152#0 -MICHAELJACKSON_0007:7:14:12665:17935#0 -MICHAELJACKSON_0007:7:50:8272:9968#0 -MICHAELJACKSON_0007:7:116:8863:17947#0 -MICHAELJACKSON_0007:6:40:6961:19434#0 -MICHAELJACKSON_0007:6:87:1664:16268#0 -MICHAELJACKSON_0007:5:50:3913:9200#0 -MICHAELJACKSON_0007:7:18:13528:18220#0 -MICHAELJACKSON_0007:6:102:8025:5413#0 -MICHAELJACKSON_0007:5:44:1501:3247#0 -MICHAELJACKSON_0007:6:35:14133:6233#0 -MICHAELJACKSON_0007:6:70:7078:17451#0 -MICHAELJACKSON_0007:7:39:17923:10784#0 -MICHAELJACKSON_0007:5:119:2390:3074#0 -MICHAELJACKSON_0007:5:73:2031:10350#0 -MICHAELJACKSON_0007:5:8:8617:13946#0 -MICHAELJACKSON_0007:5:19:14580:5716#0 -MICHAELJACKSON_0007:7:34:13172:3317#0 -MICHAELJACKSON_0007:7:66:3811:11319#0 -MICHAELJACKSON_0007:7:77:11062:14584#0 -MICHAELJACKSON_0007:7:92:11565:11795#0 -MICHAELJACKSON_0007:7:95:16306:12853#0 -MICHAELJACKSON_0007:5:97:8615:8928#0 -MICHAELJACKSON_0007:6:23:9031:12778#0 -MICHAELJACKSON_0007:6:105:5764:6152#0 -MICHAELJACKSON_0007:7:14:12665:17935#0 -MICHAELJACKSON_0007:7:50:8272:9968#0 -MICHAELJACKSON_0007:7:116:8863:17947#0 -MICHAELJACKSON_0007:5:32:3627:11351#0 -MICHAELJACKSON_0007:7:78:7362:13695#0 -MICHAELJACKSON_0007:6:35:14133:6233#0 -MICHAELJACKSON_0007:5:50:3913:9200#0 -MICHAELJACKSON_0007:7:18:13528:18220#0 -MICHAELJACKSON_0007:7:87:4545:5362#0 -MICHAELJACKSON_0007:7:91:5489:9191#0 -MICHAELJACKSON_0007:6:59:1277:3892#0 -MICHAELJACKSON_0007:6:40:6961:19434#0 -MICHAELJACKSON_0007:6:87:1664:16268#0 -MICHAELJACKSON_0007:6:102:8025:5413#0 -MICHAELJACKSON_0007:5:19:14580:5716#0 -MICHAELJACKSON_0007:7:34:13172:3317#0 -MICHAELJACKSON_0007:7:66:3811:11319#0 -MICHAELJACKSON_0007:6:45:5107:17808#0 -MICHAELJACKSON_0007:7:77:11062:14584#0 -MICHAELJACKSON_0007:5:116:3935:7721#0 -MICHAELJACKSON_0007:6:64:5588:20223#0 -MICHAELJACKSON_0007:6:73:14182:12343#0 -MICHAELJACKSON_0007:7:13:11874:19850#0 -MICHAELJACKSON_0007:7:24:3126:7910#0 -MICHAELJACKSON_0007:7:7:4880:15056#0 -MICHAELJACKSON_0007:5:48:14601:16267#0 -MICHAELJACKSON_0007:7:52:10719:2091#0 -MICHAELJACKSON_0007:6:64:5588:20223#0 -MICHAELJACKSON_0007:6:73:14182:12343#0 -MICHAELJACKSON_0007:7:13:11874:19850#0 -MICHAELJACKSON_0007:7:24:3126:7910#0 -MICHAELJACKSON_0007:5:18:14310:7078#0 -MICHAELJACKSON_0007:6:11:9431:20861#0 -MICHAELJACKSON_0007:6:68:7115:20058#0 -MICHAELJACKSON_0007:6:84:4193:3651#0 -MICHAELJACKSON_0007:5:118:12047:9764#0 -MICHAELJACKSON_0007:5:49:11139:12182#0 -MICHAELJACKSON_0007:6:2:3032:20473#0 -MICHAELJACKSON_0007:7:91:7015:7631#0 -MICHAELJACKSON_0007:5:58:10644:1402#0 -MICHAELJACKSON_0007:7:7:17645:2230#0 -MICHAELJACKSON_0007:5:116:3935:7721#0 -MICHAELJACKSON_0007:7:103:10822:2982#0 -MICHAELJACKSON_0007:5:48:14601:16267#0 -MICHAELJACKSON_0007:5:61:9536:17166#0 -MICHAELJACKSON_0007:6:28:18628:1963#0 -MICHAELJACKSON_0007:6:68:8796:13451#0 -MICHAELJACKSON_0007:6:116:16041:6333#0 -MICHAELJACKSON_0007:7:86:16843:2739#0 -MICHAELJACKSON_0007:7:92:5175:2795#0 -MICHAELJACKSON_0007:7:120:9838:1139#0 -MICHAELJACKSON_0007:5:45:11639:4624#0 -MICHAELJACKSON_0007:6:36:10885:4080#0 -MICHAELJACKSON_0007:6:44:18779:17878#0 -MICHAELJACKSON_0007:6:77:13647:6955#0 -MICHAELJACKSON_0007:6:116:12727:9721#0 -MICHAELJACKSON_0007:7:71:11554:12123#0 -MICHAELJACKSON_0007:5:105:2931:20556#0 -MICHAELJACKSON_0007:6:33:7090:5114#0 -MICHAELJACKSON_0007:7:67:17139:8145#0 -MICHAELJACKSON_0007:7:7:4880:15056#0 -MICHAELJACKSON_0007:7:8:12523:11810#0 -MICHAELJACKSON_0007:7:52:10719:2091#0 -MICHAELJACKSON_0007:5:119:3964:16147#0 -MICHAELJACKSON_0007:5:5:11201:7531#0 -MICHAELJACKSON_0007:5:52:16464:13555#0 -MICHAELJACKSON_0007:5:94:3411:18835#0 -MICHAELJACKSON_0007:6:6:13666:15673#0 -MICHAELJACKSON_0007:6:46:14761:12682#0 -MICHAELJACKSON_0007:7:13:1925:9951#0 -MICHAELJACKSON_0007:7:50:10852:18689#0 -MICHAELJACKSON_0007:7:70:1692:13841#0 -MICHAELJACKSON_0007:5:69:15307:8731#0 -MICHAELJACKSON_0007:6:49:10180:6768#0 -MICHAELJACKSON_0007:6:58:19199:1098#0 -MICHAELJACKSON_0007:6:114:17149:9960#0 -MICHAELJACKSON_0007:7:73:2199:18381#0 -MICHAELJACKSON_0007:7:96:1868:19528#0 -MICHAELJACKSON_0007:5:64:12095:11310#0 -MICHAELJACKSON_0007:6:7:5814:9322#0 -MICHAELJACKSON_0007:6:47:7117:20793#0 -MICHAELJACKSON_0007:6:115:10978:16785#0 -MICHAELJACKSON_0007:5:75:1511:14000#0 -MICHAELJACKSON_0007:5:86:19248:4693#0 -MICHAELJACKSON_0007:5:98:12583:18490#0 -MICHAELJACKSON_0007:6:24:19385:14938#0 -MICHAELJACKSON_0007:7:27:14945:18902#0 -MICHAELJACKSON_0007:7:35:4433:17394#0 -MICHAELJACKSON_0007:7:96:6158:18948#0 -MICHAELJACKSON_0007:5:36:14358:5592#0 -MICHAELJACKSON_0007:5:63:12334:8315#0 -MICHAELJACKSON_0007:5:69:2574:13367#0 -MICHAELJACKSON_0007:7:60:6686:9583#0 -MICHAELJACKSON_0007:5:2:1747:2079#0 -MICHAELJACKSON_0007:5:41:8396:5568#0 -MICHAELJACKSON_0007:5:116:5491:4467#0 -MICHAELJACKSON_0007:6:1:6187:1474#0 -MICHAELJACKSON_0007:6:32:9373:16395#0 -MICHAELJACKSON_0007:5:5:18649:19510#0 -MICHAELJACKSON_0007:5:87:8908:12799#0 -MICHAELJACKSON_0007:5:116:4468:9353#0 -MICHAELJACKSON_0007:7:93:3517:13468#0 -MICHAELJACKSON_0007:6:28:6124:10873#0 -MICHAELJACKSON_0007:7:36:2879:15823#0 -MICHAELJACKSON_0007:7:73:13119:4659#0 -MICHAELJACKSON_0007:5:94:5427:8475#0 -MICHAELJACKSON_0007:6:51:2905:17564#0 -MICHAELJACKSON_0007:6:79:1644:10739#0 -MICHAELJACKSON_0007:7:17:3811:16948#0 -MICHAELJACKSON_0007:7:45:16337:12206#0 -MICHAELJACKSON_0007:7:54:11956:5928#0 -MICHAELJACKSON_0007:7:74:12712:5713#0 -MICHAELJACKSON_0007:7:117:7966:1830#0 -MICHAELJACKSON_0007:5:58:10644:1402#0 -MICHAELJACKSON_0007:5:5:11201:7531#0 -MICHAELJACKSON_0007:5:18:14310:7078#0 -MICHAELJACKSON_0007:5:94:3411:18835#0 -MICHAELJACKSON_0007:7:50:10852:18689#0 -MICHAELJACKSON_0007:5:49:11139:12182#0 -MICHAELJACKSON_0007:5:64:12095:11310#0 -MICHAELJACKSON_0007:6:7:5814:9322#0 -MICHAELJACKSON_0007:6:47:7117:20793#0 -MICHAELJACKSON_0007:6:115:10978:16785#0 -MICHAELJACKSON_0007:5:41:8396:5568#0 -MICHAELJACKSON_0007:5:61:9536:17166#0 -MICHAELJACKSON_0007:5:75:1511:14000#0 -MICHAELJACKSON_0007:5:86:19248:4693#0 -MICHAELJACKSON_0007:5:105:2931:20556#0 -MICHAELJACKSON_0007:6:1:6187:1474#0 -MICHAELJACKSON_0007:6:2:3032:20473#0 -MICHAELJACKSON_0007:6:28:18628:1963#0 -MICHAELJACKSON_0007:6:32:9373:16395#0 -MICHAELJACKSON_0007:6:33:7090:5114#0 -MICHAELJACKSON_0007:6:68:8796:13451#0 -MICHAELJACKSON_0007:6:116:16041:6333#0 -MICHAELJACKSON_0007:7:7:17645:2230#0 -MICHAELJACKSON_0007:7:8:12523:11810#0 -MICHAELJACKSON_0007:7:27:14945:18902#0 -MICHAELJACKSON_0007:7:35:4433:17394#0 -MICHAELJACKSON_0007:7:67:17139:8145#0 -MICHAELJACKSON_0007:7:86:16843:2739#0 -MICHAELJACKSON_0007:7:91:7015:7631#0 -MICHAELJACKSON_0007:7:92:5175:2795#0 -MICHAELJACKSON_0007:7:96:6158:18948#0 -MICHAELJACKSON_0007:7:120:9838:1139#0 -MICHAELJACKSON_0007:5:52:16464:13555#0 -MICHAELJACKSON_0007:5:69:15307:8731#0 -MICHAELJACKSON_0007:6:6:13666:15673#0 -MICHAELJACKSON_0007:6:49:10180:6768#0 -MICHAELJACKSON_0007:6:58:19199:1098#0 -MICHAELJACKSON_0007:6:114:17149:9960#0 -MICHAELJACKSON_0007:7:13:1925:9951#0 -MICHAELJACKSON_0007:7:73:2199:18381#0 -MICHAELJACKSON_0007:7:96:1868:19528#0 -MICHAELJACKSON_0007:5:2:1747:2079#0 -MICHAELJACKSON_0007:5:116:5491:4467#0 -MICHAELJACKSON_0007:5:94:5427:8475#0 -MICHAELJACKSON_0007:5:36:14358:5592#0 -MICHAELJACKSON_0007:5:63:12334:8315#0 -MICHAELJACKSON_0007:5:118:12047:9764#0 -MICHAELJACKSON_0007:7:60:6686:9583#0 -MICHAELJACKSON_0007:6:51:2905:17564#0 -MICHAELJACKSON_0007:7:17:3811:16948#0 -MICHAELJACKSON_0007:7:74:12712:5713#0 -MICHAELJACKSON_0007:7:103:10822:2982#0 -MICHAELJACKSON_0007:5:45:11639:4624#0 -MICHAELJACKSON_0007:6:36:10885:4080#0 -MICHAELJACKSON_0007:6:44:18779:17878#0 -MICHAELJACKSON_0007:6:46:14761:12682#0 -MICHAELJACKSON_0007:7:70:1692:13841#0 -MICHAELJACKSON_0007:7:71:11554:12123#0 -MICHAELJACKSON_0007:6:68:7115:20058#0 -MICHAELJACKSON_0007:6:84:4193:3651#0 -MICHAELJACKSON_0007:6:77:13647:6955#0 -MICHAELJACKSON_0007:6:116:12727:9721#0 -MICHAELJACKSON_0007:5:69:2574:13367#0 -MICHAELJACKSON_0007:6:28:6124:10873#0 -MICHAELJACKSON_0007:6:79:1644:10739#0 -MICHAELJACKSON_0007:7:36:2879:15823#0 -MICHAELJACKSON_0007:7:45:16337:12206#0 -MICHAELJACKSON_0007:7:54:11956:5928#0 -MICHAELJACKSON_0007:7:73:13119:4659#0 -MICHAELJACKSON_0007:5:118:10025:19058#0 -MICHAELJACKSON_0007:7:15:6958:8828#0 -MICHAELJACKSON_0007:7:16:12520:21087#0 -MICHAELJACKSON_0007:5:20:14960:18861#0 -MICHAELJACKSON_0007:6:64:10866:11219#0 -MICHAELJACKSON_0007:7:87:17928:2115#0 -MICHAELJACKSON_0007:5:5:18649:19510#0 -MICHAELJACKSON_0007:5:87:8908:12799#0 -MICHAELJACKSON_0007:5:116:4468:9353#0 -MICHAELJACKSON_0007:7:93:3517:13468#0 -MICHAELJACKSON_0007:5:38:19122:19394#0 -MICHAELJACKSON_0007:5:46:19177:13448#0 -MICHAELJACKSON_0007:6:32:5181:16511#0 -MICHAELJACKSON_0007:6:100:14789:13054#0 -MICHAELJACKSON_0007:5:46:2043:20727#0 -MICHAELJACKSON_0007:5:25:6780:11175#0 -MICHAELJACKSON_0007:6:75:14133:14529#0 -MICHAELJACKSON_0007:7:81:3198:6204#0 -MICHAELJACKSON_0007:7:7:8678:16455#0 -MICHAELJACKSON_0007:7:112:14733:11846#0 -MICHAELJACKSON_0007:7:16:17931:4830#0 -MICHAELJACKSON_0007:7:71:10671:2918#0 -MICHAELJACKSON_0007:5:36:18880:17021#0 -MICHAELJACKSON_0007:5:37:12959:11343#0 -MICHAELJACKSON_0007:7:41:18875:3464#0 -MICHAELJACKSON_0007:5:118:10025:19058#0 -MICHAELJACKSON_0007:7:15:6958:8828#0 -MICHAELJACKSON_0007:7:16:12520:21087#0 -MICHAELJACKSON_0007:5:46:2043:20727#0 -MICHAELJACKSON_0007:5:20:14960:18861#0 -MICHAELJACKSON_0007:6:64:10866:11219#0 -MICHAELJACKSON_0007:7:117:7966:1830#0 -MICHAELJACKSON_0007:5:46:19177:13448#0 -MICHAELJACKSON_0007:5:38:19122:19394#0 -MICHAELJACKSON_0007:6:32:5181:16511#0 -MICHAELJACKSON_0007:6:100:14789:13054#0 -MICHAELJACKSON_0007:5:80:9434:9888#0 -MICHAELJACKSON_0007:7:107:10555:4936#0 -MICHAELJACKSON_0007:5:42:13168:18749#0 -MICHAELJACKSON_0007:5:120:3340:19509#0 -MICHAELJACKSON_0007:6:90:12989:14574#0 -MICHAELJACKSON_0007:5:39:3708:11419#0 -MICHAELJACKSON_0007:7:32:16880:2121#0 -MICHAELJACKSON_0007:7:92:1866:17009#0 -MICHAELJACKSON_0007:6:117:14806:11440#0 -MICHAELJACKSON_0007:6:73:1507:15064#0 -MICHAELJACKSON_0007:6:115:15802:6642#0 -MICHAELJACKSON_0007:7:64:14850:20671#0 -MICHAELJACKSON_0007:5:2:7643:18525#0 -MICHAELJACKSON_0007:5:33:3747:18263#0 -MICHAELJACKSON_0007:6:87:1405:17239#0 -MICHAELJACKSON_0007:6:119:12281:12563#0 -MICHAELJACKSON_0007:7:33:17939:13003#0 -MICHAELJACKSON_0007:7:109:11820:19034#0 -MICHAELJACKSON_0007:5:71:5638:18918#0 -MICHAELJACKSON_0007:5:115:6230:16236#0 -MICHAELJACKSON_0007:6:88:16422:3102#0 -MICHAELJACKSON_0007:7:81:16104:16648#0 -MICHAELJACKSON_0007:7:92:7419:16320#0 -MICHAELJACKSON_0007:6:4:11674:12195#0 -MICHAELJACKSON_0007:5:74:5064:14773#0 -MICHAELJACKSON_0007:7:18:16228:11395#0 -MICHAELJACKSON_0007:7:38:17623:9201#0 -MICHAELJACKSON_0007:7:64:6888:9581#0 -MICHAELJACKSON_0007:7:90:18392:18394#0 -MICHAELJACKSON_0007:7:120:16567:10699#0 -MICHAELJACKSON_0007:5:14:13234:9112#0 -MICHAELJACKSON_0007:5:16:10127:2144#0 -MICHAELJACKSON_0007:5:34:14434:12539#0 -MICHAELJACKSON_0007:5:69:10498:17103#0 -MICHAELJACKSON_0007:7:91:11665:6259#0 -MICHAELJACKSON_0007:5:50:11618:21010#0 -MICHAELJACKSON_0007:5:69:11164:11363#0 -MICHAELJACKSON_0007:6:53:6512:2748#0 -MICHAELJACKSON_0007:6:54:16091:19605#0 -MICHAELJACKSON_0007:6:64:11981:11155#0 -MICHAELJACKSON_0007:6:79:14563:11479#0 -MICHAELJACKSON_0007:6:87:17194:13462#0 -MICHAELJACKSON_0007:7:15:12076:8112#0 -MICHAELJACKSON_0007:7:35:13693:12809#0 -MICHAELJACKSON_0007:7:60:4153:1506#0 -MICHAELJACKSON_0007:7:62:13982:12475#0 -MICHAELJACKSON_0007:5:87:15899:20922#0 -MICHAELJACKSON_0007:6:39:13461:13242#0 -MICHAELJACKSON_0007:6:101:7638:4085#0 -MICHAELJACKSON_0007:7:86:9606:13813#0 -MICHAELJACKSON_0007:5:50:14183:10261#0 -MICHAELJACKSON_0007:5:59:7644:18519#0 -MICHAELJACKSON_0007:5:103:15491:11152#0 -MICHAELJACKSON_0007:6:95:1553:5348#0 -MICHAELJACKSON_0007:6:113:15313:4883#0 -MICHAELJACKSON_0007:7:5:6014:8600#0 -MICHAELJACKSON_0007:7:97:2729:2705#0 -MICHAELJACKSON_0007:7:116:13851:7441#0 -MICHAELJACKSON_0007:5:86:8459:16324#0 -MICHAELJACKSON_0007:5:51:6509:15908#0 -MICHAELJACKSON_0007:5:27:14164:18743#0 -MICHAELJACKSON_0007:6:49:10388:13960#0 -MICHAELJACKSON_0007:7:20:16752:16948#0 -MICHAELJACKSON_0007:7:34:16453:2667#0 -MICHAELJACKSON_0007:7:75:6518:4271#0 -MICHAELJACKSON_0007:7:92:1709:11942#0 -MICHAELJACKSON_0007:6:11:7547:6241#0 -MICHAELJACKSON_0007:6:76:8150:8337#0 -MICHAELJACKSON_0007:7:42:17348:7367#0 -MICHAELJACKSON_0007:5:33:6340:7208#0 -MICHAELJACKSON_0007:5:72:17458:16035#0 -MICHAELJACKSON_0007:6:23:5451:14694#0 -MICHAELJACKSON_0007:7:107:2166:12936#0 -MICHAELJACKSON_0007:5:119:12653:7658#0 -MICHAELJACKSON_0007:7:35:10685:2083#0 -MICHAELJACKSON_0007:7:98:4500:4589#0 -MICHAELJACKSON_0007:7:84:15371:5932#0 -MICHAELJACKSON_0007:5:105:13089:3950#0 -MICHAELJACKSON_0007:6:84:2723:17790#0 -MICHAELJACKSON_0007:7:47:4283:4694#0 -MICHAELJACKSON_0007:7:113:5224:4510#0 -MICHAELJACKSON_0007:6:44:7995:2456#0 -MICHAELJACKSON_0007:5:46:15772:17196#0 -MICHAELJACKSON_0007:5:44:2618:16018#0 -MICHAELJACKSON_0007:6:33:3419:7701#0 -MICHAELJACKSON_0007:6:75:6907:11093#0 -MICHAELJACKSON_0007:6:119:18864:9210#0 -MICHAELJACKSON_0007:7:30:7445:16440#0 -MICHAELJACKSON_0007:7:75:1337:10111#0 -MICHAELJACKSON_0007:7:78:5276:8973#0 -MICHAELJACKSON_0007:7:82:4683:15520#0 -MICHAELJACKSON_0007:7:111:18627:19189#0 -MICHAELJACKSON_0007:5:78:7352:10002#0 -MICHAELJACKSON_0007:6:102:4621:17755#0 -MICHAELJACKSON_0007:6:104:17871:9748#0 -MICHAELJACKSON_0007:7:11:18969:11273#0 -MICHAELJACKSON_0007:7:24:5792:12647#0 -MICHAELJACKSON_0007:7:56:11729:17070#0 -MICHAELJACKSON_0007:7:66:11395:4690#0 -MICHAELJACKSON_0007:5:14:14758:1681#0 -MICHAELJACKSON_0007:5:14:17699:14907#0 -MICHAELJACKSON_0007:5:21:9023:16849#0 -MICHAELJACKSON_0007:5:26:3167:11854#0 -MICHAELJACKSON_0007:5:38:15202:3473#0 -MICHAELJACKSON_0007:5:42:12610:14821#0 -MICHAELJACKSON_0007:5:62:7674:19562#0 -MICHAELJACKSON_0007:5:67:13602:3280#0 -MICHAELJACKSON_0007:5:88:8698:17939#0 -MICHAELJACKSON_0007:5:110:18631:16402#0 -MICHAELJACKSON_0007:5:114:12483:15359#0 -MICHAELJACKSON_0007:6:5:18565:11960#0 -MICHAELJACKSON_0007:6:6:15460:4894#0 -MICHAELJACKSON_0007:6:7:17203:7556#0 -MICHAELJACKSON_0007:6:12:16098:14363#0 -MICHAELJACKSON_0007:6:25:13515:3027#0 -MICHAELJACKSON_0007:6:27:12501:18084#0 -MICHAELJACKSON_0007:6:52:10110:15424#0 -MICHAELJACKSON_0007:6:57:10252:15015#0 -MICHAELJACKSON_0007:6:62:13305:8472#0 -MICHAELJACKSON_0007:6:66:1522:19469#0 -MICHAELJACKSON_0007:6:72:13168:4527#0 -MICHAELJACKSON_0007:6:76:15802:11781#0 -MICHAELJACKSON_0007:6:77:12236:11622#0 -MICHAELJACKSON_0007:6:86:9410:13387#0 -MICHAELJACKSON_0007:6:89:14529:15309#0 -MICHAELJACKSON_0007:6:99:12221:9898#0 -MICHAELJACKSON_0007:6:106:9440:17012#0 -MICHAELJACKSON_0007:7:9:5363:4800#0 -MICHAELJACKSON_0007:7:28:12541:11100#0 -MICHAELJACKSON_0007:7:28:13853:17193#0 -MICHAELJACKSON_0007:7:34:1512:13594#0 -MICHAELJACKSON_0007:7:44:17784:10830#0 -MICHAELJACKSON_0007:7:69:2635:14582#0 -MICHAELJACKSON_0007:7:75:15855:20984#0 -MICHAELJACKSON_0007:7:103:7480:12708#0 -MICHAELJACKSON_0007:7:110:12832:12278#0 -MICHAELJACKSON_0007:7:113:1283:4396#0 -MICHAELJACKSON_0007:5:4:8887:10606#0 -MICHAELJACKSON_0007:5:12:18946:13519#0 -MICHAELJACKSON_0007:6:80:16240:16358#0 -MICHAELJACKSON_0007:6:87:4161:10772#0 -MICHAELJACKSON_0007:5:41:6593:20354#0 -MICHAELJACKSON_0007:7:102:14773:8394#0 -MICHAELJACKSON_0007:7:109:3195:7189#0 -MICHAELJACKSON_0007:5:1:5431:6258#0 -MICHAELJACKSON_0007:5:95:19181:16343#0 -MICHAELJACKSON_0007:6:35:10974:17416#0 -MICHAELJACKSON_0007:7:18:9351:8148#0 -MICHAELJACKSON_0007:6:42:3813:10963#0 -MICHAELJACKSON_0007:7:37:6266:4371#0 -MICHAELJACKSON_0007:5:19:14682:15700#0 -MICHAELJACKSON_0007:7:116:9715:2100#0 -MICHAELJACKSON_0007:5:117:17206:2465#0 -MICHAELJACKSON_0007:6:75:7586:4673#0 -MICHAELJACKSON_0007:5:72:14627:17068#0 -MICHAELJACKSON_0007:5:86:16356:17299#0 -MICHAELJACKSON_0007:6:30:14751:7613#0 -MICHAELJACKSON_0007:7:100:12902:8764#0 -MICHAELJACKSON_0007:5:57:13725:9323#0 -MICHAELJACKSON_0007:5:117:3529:14197#0 -MICHAELJACKSON_0007:6:67:15197:11430#0 -MICHAELJACKSON_0007:7:12:14167:13045#0 -MICHAELJACKSON_0007:6:22:6890:11499#0 -MICHAELJACKSON_0007:6:93:2040:9338#0 -MICHAELJACKSON_0007:7:84:15371:5932#0 -MICHAELJACKSON_0007:6:94:18032:10828#0 -MICHAELJACKSON_0007:5:110:4512:11280#0 -MICHAELJACKSON_0007:7:65:6622:2969#0 -MICHAELJACKSON_0007:5:54:7352:18854#0 -MICHAELJACKSON_0007:6:14:5076:7425#0 -MICHAELJACKSON_0007:7:108:14546:10558#0 -MICHAELJACKSON_0007:5:11:16804:8645#0 -MICHAELJACKSON_0007:5:31:11991:5943#0 -MICHAELJACKSON_0007:5:48:2577:18540#0 -MICHAELJACKSON_0007:5:54:17230:12501#0 -MICHAELJACKSON_0007:5:90:5934:18092#0 -MICHAELJACKSON_0007:5:94:18323:2537#0 -MICHAELJACKSON_0007:5:97:10644:5905#0 -MICHAELJACKSON_0007:6:18:1539:13059#0 -MICHAELJACKSON_0007:6:95:5124:14676#0 -MICHAELJACKSON_0007:7:3:3565:16232#0 -MICHAELJACKSON_0007:7:8:10407:1906#0 -MICHAELJACKSON_0007:7:52:4766:14616#0 -MICHAELJACKSON_0007:7:81:15793:15775#0 -MICHAELJACKSON_0007:7:90:13080:13748#0 -MICHAELJACKSON_0007:7:75:1337:10111#0 -MICHAELJACKSON_0007:5:5:8852:11202#0 -MICHAELJACKSON_0007:5:8:14939:19303#0 -MICHAELJACKSON_0007:5:13:10782:12073#0 -MICHAELJACKSON_0007:5:20:9874:12365#0 -MICHAELJACKSON_0007:5:26:10564:16634#0 -MICHAELJACKSON_0007:5:55:7711:15681#0 -MICHAELJACKSON_0007:5:72:17492:20915#0 -MICHAELJACKSON_0007:5:72:8487:20997#0 -MICHAELJACKSON_0007:5:77:14470:16776#0 -MICHAELJACKSON_0007:5:83:17531:20291#0 -MICHAELJACKSON_0007:5:90:3239:8936#0 -MICHAELJACKSON_0007:5:93:18343:6854#0 -MICHAELJACKSON_0007:5:98:16941:17263#0 -MICHAELJACKSON_0007:5:99:15038:9547#0 -MICHAELJACKSON_0007:5:101:13685:21081#0 -MICHAELJACKSON_0007:6:49:11298:5242#0 -MICHAELJACKSON_0007:6:49:2242:9509#0 -MICHAELJACKSON_0007:6:58:19479:19591#0 -MICHAELJACKSON_0007:6:72:14225:16194#0 -MICHAELJACKSON_0007:6:83:6694:17055#0 -MICHAELJACKSON_0007:6:91:17409:20351#0 -MICHAELJACKSON_0007:6:108:5384:11917#0 -MICHAELJACKSON_0007:7:6:15639:5823#0 -MICHAELJACKSON_0007:7:10:15742:19765#0 -MICHAELJACKSON_0007:7:32:5623:14757#0 -MICHAELJACKSON_0007:7:34:2866:12866#0 -MICHAELJACKSON_0007:7:44:3613:15569#0 -MICHAELJACKSON_0007:7:59:19532:6019#0 -MICHAELJACKSON_0007:7:60:5708:8551#0 -MICHAELJACKSON_0007:7:78:16567:5184#0 -MICHAELJACKSON_0007:7:88:6035:5258#0 -MICHAELJACKSON_0007:7:92:7587:17683#0 -MICHAELJACKSON_0007:7:101:17176:7581#0 -MICHAELJACKSON_0007:5:40:11060:14839#0 -MICHAELJACKSON_0007:5:66:3698:18574#0 -MICHAELJACKSON_0007:5:95:9698:15795#0 -MICHAELJACKSON_0007:5:116:6494:14191#0 -MICHAELJACKSON_0007:6:7:15662:6164#0 -MICHAELJACKSON_0007:6:11:16146:13290#0 -MICHAELJACKSON_0007:6:84:14605:13723#0 -MICHAELJACKSON_0007:6:120:7887:11296#0 -MICHAELJACKSON_0007:7:10:14041:18718#0 -MICHAELJACKSON_0007:7:26:5954:13131#0 -MICHAELJACKSON_0007:7:27:4369:13690#0 -MICHAELJACKSON_0007:7:86:6379:13180#0 -MICHAELJACKSON_0007:7:91:1392:17232#0 -MICHAELJACKSON_0007:5:33:3988:7924#0 -MICHAELJACKSON_0007:5:110:17196:18975#0 -MICHAELJACKSON_0007:5:114:14661:19291#0 -MICHAELJACKSON_0007:7:42:2530:12037#0 -MICHAELJACKSON_0007:7:96:8712:15936#0 -MICHAELJACKSON_0007:5:30:6279:17155#0 -MICHAELJACKSON_0007:5:38:10152:18025#0 -MICHAELJACKSON_0007:6:38:14849:7380#0 -MICHAELJACKSON_0007:6:41:13172:15385#0 -MICHAELJACKSON_0007:7:44:13213:3010#0 -MICHAELJACKSON_0007:7:94:6332:18531#0 -MICHAELJACKSON_0007:7:97:11953:14167#0 -MICHAELJACKSON_0007:5:58:3547:20114#0 -MICHAELJACKSON_0007:5:83:5047:12335#0 -MICHAELJACKSON_0007:5:94:7862:12952#0 -MICHAELJACKSON_0007:5:107:8539:16540#0 -MICHAELJACKSON_0007:5:120:19094:17997#0 -MICHAELJACKSON_0007:6:36:14824:8838#0 -MICHAELJACKSON_0007:6:59:19402:18498#0 -MICHAELJACKSON_0007:6:69:9151:16778#0 -MICHAELJACKSON_0007:7:18:9969:15337#0 -MICHAELJACKSON_0007:7:29:3031:15136#0 -MICHAELJACKSON_0007:7:40:14820:6591#0 -MICHAELJACKSON_0007:7:58:9749:15516#0 -MICHAELJACKSON_0007:7:91:12774:11834#0 -MICHAELJACKSON_0007:5:61:8845:9898#0 -MICHAELJACKSON_0007:5:68:8748:14020#0 -MICHAELJACKSON_0007:7:33:2678:18227#0 -MICHAELJACKSON_0007:7:46:2838:20561#0 -MICHAELJACKSON_0007:7:60:14082:17814#0 -MICHAELJACKSON_0007:7:105:10322:18901#0 -MICHAELJACKSON_0007:5:62:9032:21325#0 -MICHAELJACKSON_0007:5:5:5385:17705#0 -MICHAELJACKSON_0007:5:34:18245:18103#0 -MICHAELJACKSON_0007:5:57:12004:12572#0 -MICHAELJACKSON_0007:5:74:6145:9800#0 -MICHAELJACKSON_0007:6:11:8378:16179#0 -MICHAELJACKSON_0007:6:24:8856:8687#0 -MICHAELJACKSON_0007:6:29:17936:9478#0 -MICHAELJACKSON_0007:6:103:16565:12955#0 -MICHAELJACKSON_0007:7:17:6985:5678#0 -MICHAELJACKSON_0007:7:34:15374:13868#0 -MICHAELJACKSON_0007:7:59:10678:17097#0 -MICHAELJACKSON_0007:7:69:10897:1823#0 -MICHAELJACKSON_0007:7:102:7852:8542#0 -MICHAELJACKSON_0007:7:5:9185:8151#0 -MICHAELJACKSON_0007:7:35:4038:13714#0 -MICHAELJACKSON_0007:7:39:10553:8414#0 -MICHAELJACKSON_0007:5:36:9209:18549#0 -MICHAELJACKSON_0007:5:60:1816:6596#0 -MICHAELJACKSON_0007:6:11:11565:13414#0 -MICHAELJACKSON_0007:6:17:9873:19362#0 -MICHAELJACKSON_0007:6:37:10430:6909#0 -MICHAELJACKSON_0007:6:86:13204:2952#0 -MICHAELJACKSON_0007:6:113:16517:8159#0 -MICHAELJACKSON_0007:7:33:10555:19693#0 -MICHAELJACKSON_0007:7:77:1808:11001#0 -MICHAELJACKSON_0007:5:17:8599:15636#0 -MICHAELJACKSON_0007:6:56:18159:6056#0 -MICHAELJACKSON_0007:6:60:14259:13672#0 -MICHAELJACKSON_0007:7:1:14255:8513#0 -MICHAELJACKSON_0007:7:12:18855:10711#0 -MICHAELJACKSON_0007:7:18:3845:14052#0 -MICHAELJACKSON_0007:7:69:18851:10290#0 -MICHAELJACKSON_0007:5:56:2727:1409#0 -MICHAELJACKSON_0007:7:96:9382:19183#0 -MICHAELJACKSON_0007:5:106:4323:17124#0 -MICHAELJACKSON_0007:6:31:6442:4676#0 -MICHAELJACKSON_0007:6:37:12343:14914#0 -MICHAELJACKSON_0007:6:43:3083:7542#0 -MICHAELJACKSON_0007:6:92:8712:15059#0 -MICHAELJACKSON_0007:7:1:1998:17658#0 -MICHAELJACKSON_0007:7:57:5938:11112#0 -MICHAELJACKSON_0007:7:96:6772:11590#0 -MICHAELJACKSON_0007:7:112:1432:5107#0 -MICHAELJACKSON_0007:6:9:9329:4540#0 -MICHAELJACKSON_0007:5:16:19666:14286#0 -MICHAELJACKSON_0007:5:35:2594:14781#0 -MICHAELJACKSON_0007:5:54:2675:12959#0 -MICHAELJACKSON_0007:6:56:10240:8925#0 -MICHAELJACKSON_0007:6:70:4589:18280#0 -MICHAELJACKSON_0007:6:102:13705:19095#0 -MICHAELJACKSON_0007:7:16:9749:11585#0 -MICHAELJACKSON_0007:7:64:14459:6906#0 -MICHAELJACKSON_0007:7:96:5083:3343#0 -MICHAELJACKSON_0007:5:70:19572:6361#0 -MICHAELJACKSON_0007:5:79:13041:16818#0 -MICHAELJACKSON_0007:6:27:7436:16244#0 -MICHAELJACKSON_0007:7:110:18855:1346#0 -MICHAELJACKSON_0007:5:71:19600:5072#0 -MICHAELJACKSON_0007:5:85:18621:19777#0 -MICHAELJACKSON_0007:5:93:14517:17794#0 -MICHAELJACKSON_0007:5:114:19484:16952#0 -MICHAELJACKSON_0007:7:4:18198:17700#0 -MICHAELJACKSON_0007:7:69:18928:16536#0 -MICHAELJACKSON_0007:7:94:17840:11546#0 -MICHAELJACKSON_0007:7:98:10245:2398#0 -MICHAELJACKSON_0007:7:102:1752:5421#0 -MICHAELJACKSON_0007:7:56:8907:15481#0 -MICHAELJACKSON_0007:7:69:4974:8581#0 -MICHAELJACKSON_0007:5:62:12590:20591#0 -MICHAELJACKSON_0007:5:66:7631:14873#0 -MICHAELJACKSON_0007:5:69:3014:3895#0 -MICHAELJACKSON_0007:5:80:10314:20926#0 -MICHAELJACKSON_0007:5:100:18551:13070#0 -MICHAELJACKSON_0007:5:116:14156:12623#0 -MICHAELJACKSON_0007:6:79:17121:8964#0 -MICHAELJACKSON_0007:6:98:10636:8320#0 -MICHAELJACKSON_0007:6:116:8274:8940#0 -MICHAELJACKSON_0007:7:4:6650:17239#0 -MICHAELJACKSON_0007:7:34:18499:17142#0 -MICHAELJACKSON_0007:7:63:10481:19848#0 -MICHAELJACKSON_0007:7:67:17861:6370#0 -MICHAELJACKSON_0007:7:97:13477:19883#0 -MICHAELJACKSON_0007:7:100:7451:10592#0 -MICHAELJACKSON_0007:7:113:18283:5456#0 -MICHAELJACKSON_0007:5:56:12015:16610#0 -MICHAELJACKSON_0007:5:73:10207:19095#0 -MICHAELJACKSON_0007:5:88:6967:17460#0 -MICHAELJACKSON_0007:5:113:8016:8809#0 -MICHAELJACKSON_0007:6:25:8338:6556#0 -MICHAELJACKSON_0007:6:35:16471:12571#0 -MICHAELJACKSON_0007:7:2:8324:2694#0 -MICHAELJACKSON_0007:7:23:5179:4844#0 -MICHAELJACKSON_0007:7:57:16148:14975#0 -MICHAELJACKSON_0007:7:65:19147:10367#0 -MICHAELJACKSON_0007:7:79:7213:10931#0 -MICHAELJACKSON_0007:5:80:16131:16554#0 -MICHAELJACKSON_0007:6:14:7103:10758#0 -MICHAELJACKSON_0007:6:17:3306:19450#0 -MICHAELJACKSON_0007:6:45:3710:3464#0 -MICHAELJACKSON_0007:6:77:1056:14721#0 -MICHAELJACKSON_0007:6:96:6559:17986#0 -MICHAELJACKSON_0007:6:98:7837:19409#0 -MICHAELJACKSON_0007:6:100:5744:19771#0 -MICHAELJACKSON_0007:7:58:6137:15584#0 -MICHAELJACKSON_0007:7:66:10450:15506#0 -MICHAELJACKSON_0007:7:71:11368:15072#0 -MICHAELJACKSON_0007:7:114:9456:15328#0 -MICHAELJACKSON_0007:5:100:4505:15947#0 -MICHAELJACKSON_0007:6:14:6382:10436#0 -MICHAELJACKSON_0007:6:25:7196:2674#0 -MICHAELJACKSON_0007:7:24:9465:12543#0 -MICHAELJACKSON_0007:7:87:1534:14161#0 -MICHAELJACKSON_0007:5:118:11743:1739#0 -MICHAELJACKSON_0007:5:119:16795:15124#0 -MICHAELJACKSON_0007:6:6:6018:15682#0 -MICHAELJACKSON_0007:6:13:9335:5376#0 -MICHAELJACKSON_0007:6:100:14976:8900#0 -MICHAELJACKSON_0007:7:33:18945:12241#0 -MICHAELJACKSON_0007:7:83:3909:13565#0 -MICHAELJACKSON_0007:5:25:4834:14036#0 -MICHAELJACKSON_0007:5:69:18603:5788#0 -MICHAELJACKSON_0007:6:21:2952:12462#0 -MICHAELJACKSON_0007:6:68:18664:20417#0 -MICHAELJACKSON_0007:6:110:8987:5871#0 -MICHAELJACKSON_0007:5:4:2430:11261#0 -MICHAELJACKSON_0007:6:94:12820:12039#0 -MICHAELJACKSON_0007:6:117:11759:5036#0 -MICHAELJACKSON_0007:6:118:3899:2031#0 -MICHAELJACKSON_0007:7:59:17980:10125#0 -MICHAELJACKSON_0007:5:23:2646:18043#0 -MICHAELJACKSON_0007:5:53:5706:3674#0 -MICHAELJACKSON_0007:5:98:7664:16148#0 -MICHAELJACKSON_0007:6:17:11591:7979#0 -MICHAELJACKSON_0007:6:27:2422:5071#0 -MICHAELJACKSON_0007:7:1:13391:4363#0 -MICHAELJACKSON_0007:7:34:4280:4229#0 -MICHAELJACKSON_0007:7:53:3898:15825#0 -MICHAELJACKSON_0007:7:55:1605:10521#0 -MICHAELJACKSON_0007:7:63:17939:9704#0 -MICHAELJACKSON_0007:6:119:3566:12243#0 -MICHAELJACKSON_0007:7:1:17210:11748#0 -MICHAELJACKSON_0007:7:54:17434:9228#0 -MICHAELJACKSON_0007:7:94:16935:9367#0 -MICHAELJACKSON_0007:5:34:5655:10713#0 -MICHAELJACKSON_0007:6:52:12197:7864#0 -MICHAELJACKSON_0007:6:56:14448:19900#0 -MICHAELJACKSON_0007:6:75:15058:10954#0 -MICHAELJACKSON_0007:6:76:10760:4892#0 -MICHAELJACKSON_0007:6:114:7704:6099#0 -MICHAELJACKSON_0007:6:118:10627:10151#0 -MICHAELJACKSON_0007:6:120:12348:2484#0 -MICHAELJACKSON_0007:7:71:8899:20278#0 -MICHAELJACKSON_0007:7:79:5691:12655#0 -MICHAELJACKSON_0007:7:91:2762:17575#0 -MICHAELJACKSON_0007:6:60:13720:19669#0 -MICHAELJACKSON_0007:5:53:11963:4702#0 -MICHAELJACKSON_0007:5:100:4708:21121#0 -MICHAELJACKSON_0007:6:73:7061:18203#0 -MICHAELJACKSON_0007:6:114:9413:15092#0 -MICHAELJACKSON_0007:7:19:18325:14662#0 -MICHAELJACKSON_0007:7:70:3863:2858#0 -MICHAELJACKSON_0007:7:78:6113:11510#0 -MICHAELJACKSON_0007:7:83:3483:8273#0 -MICHAELJACKSON_0007:7:92:5451:18701#0 -MICHAELJACKSON_0007:7:118:7009:4083#0 -MICHAELJACKSON_0007:5:17:3922:19657#0 -MICHAELJACKSON_0007:5:33:9217:9506#0 -MICHAELJACKSON_0007:6:79:11163:9402#0 -MICHAELJACKSON_0007:6:104:4436:1424#0 -MICHAELJACKSON_0007:5:110:9838:9553#0 -MICHAELJACKSON_0007:6:16:19457:11983#0 -MICHAELJACKSON_0007:6:116:8665:5683#0 -MICHAELJACKSON_0007:7:13:3312:3606#0 -MICHAELJACKSON_0007:7:110:6271:3759#0 -MICHAELJACKSON_0007:5:59:12564:8970#0 -MICHAELJACKSON_0007:5:69:5599:20195#0 -MICHAELJACKSON_0007:5:75:8610:11369#0 -MICHAELJACKSON_0007:5:94:11290:12621#0 -MICHAELJACKSON_0007:5:104:2438:11974#0 -MICHAELJACKSON_0007:5:111:6681:13870#0 -MICHAELJACKSON_0007:5:118:15781:13093#0 -MICHAELJACKSON_0007:7:2:10969:1440#0 -MICHAELJACKSON_0007:7:60:16200:12104#0 -MICHAELJACKSON_0007:7:65:7142:5439#0 -MICHAELJACKSON_0007:7:68:18439:14482#0 -MICHAELJACKSON_0007:7:109:19593:5515#0 -MICHAELJACKSON_0007:5:96:12058:2208#0 -MICHAELJACKSON_0007:7:37:7448:2079#0 -MICHAELJACKSON_0007:7:96:1149:4028#0 -MICHAELJACKSON_0007:7:118:3943:5184#0 -MICHAELJACKSON_0007:5:6:11898:6230#0 -MICHAELJACKSON_0007:5:30:13598:5396#0 -MICHAELJACKSON_0007:5:58:14241:19301#0 -MICHAELJACKSON_0007:5:61:15556:19642#0 -MICHAELJACKSON_0007:5:77:8922:14143#0 -MICHAELJACKSON_0007:6:24:15904:4751#0 -MICHAELJACKSON_0007:6:50:7003:2484#0 -MICHAELJACKSON_0007:6:52:6076:12895#0 -MICHAELJACKSON_0007:6:77:13380:6205#0 -MICHAELJACKSON_0007:6:108:10359:1986#0 -MICHAELJACKSON_0007:6:119:12113:7208#0 -MICHAELJACKSON_0007:7:70:18032:19147#0 -MICHAELJACKSON_0007:7:85:7887:4079#0 -MICHAELJACKSON_0007:7:100:6832:5369#0 -MICHAELJACKSON_0007:7:111:4470:15324#0 -MICHAELJACKSON_0007:7:120:12221:6258#0 -MICHAELJACKSON_0007:5:78:17024:13791#0 -MICHAELJACKSON_0007:5:89:9843:17238#0 -MICHAELJACKSON_0007:5:100:4461:1649#0 -MICHAELJACKSON_0007:5:117:19397:7760#0 -MICHAELJACKSON_0007:6:61:18083:19862#0 -MICHAELJACKSON_0007:6:66:13735:2548#0 -MICHAELJACKSON_0007:6:75:18788:2470#0 -MICHAELJACKSON_0007:7:82:16000:11701#0 -MICHAELJACKSON_0007:7:90:2305:11127#0 -MICHAELJACKSON_0007:7:106:6961:8191#0 -MICHAELJACKSON_0007:5:7:13392:1697#0 -MICHAELJACKSON_0007:5:65:12220:15631#0 -MICHAELJACKSON_0007:5:110:10110:20305#0 -MICHAELJACKSON_0007:7:73:7703:9212#0 -MICHAELJACKSON_0007:7:82:6263:20963#0 -MICHAELJACKSON_0007:6:42:10415:11852#0 -MICHAELJACKSON_0007:6:42:16783:17024#0 -MICHAELJACKSON_0007:6:101:3922:13100#0 -MICHAELJACKSON_0007:7:41:19098:18595#0 -MICHAELJACKSON_0007:7:46:1752:20457#0 -MICHAELJACKSON_0007:7:50:14175:15596#0 -MICHAELJACKSON_0007:5:22:18140:2164#0 -MICHAELJACKSON_0007:5:90:11649:3247#0 -MICHAELJACKSON_0007:6:11:3672:1388#0 -MICHAELJACKSON_0007:6:12:18294:11370#0 -MICHAELJACKSON_0007:6:17:16979:11406#0 -MICHAELJACKSON_0007:6:51:7858:2900#0 -MICHAELJACKSON_0007:6:77:3967:16087#0 -MICHAELJACKSON_0007:6:98:11227:18268#0 -MICHAELJACKSON_0007:7:17:7284:11384#0 -MICHAELJACKSON_0007:7:60:13007:10129#0 -MICHAELJACKSON_0007:7:93:2974:18918#0 -MICHAELJACKSON_0007:7:96:5107:8841#0 -MICHAELJACKSON_0007:5:47:4823:20676#0 -MICHAELJACKSON_0007:5:53:3565:15089#0 -MICHAELJACKSON_0007:5:68:14408:21091#0 -MICHAELJACKSON_0007:5:79:7370:9865#0 -MICHAELJACKSON_0007:6:26:3642:15558#0 -MICHAELJACKSON_0007:7:16:19553:19777#0 -MICHAELJACKSON_0007:6:31:9904:7570#0 -MICHAELJACKSON_0007:6:71:4739:20954#0 -MICHAELJACKSON_0007:7:36:16749:20201#0 -MICHAELJACKSON_0007:6:116:12265:19180#0 -MICHAELJACKSON_0007:6:117:7960:14793#0 -MICHAELJACKSON_0007:7:51:13249:12440#0 -MICHAELJACKSON_0007:7:103:11287:18924#0 -MICHAELJACKSON_0007:5:106:5094:12855#0 -MICHAELJACKSON_0007:6:53:7977:17409#0 -MICHAELJACKSON_0007:6:80:10104:10497#0 -MICHAELJACKSON_0007:6:93:7851:19161#0 -MICHAELJACKSON_0007:6:94:1087:21183#0 -MICHAELJACKSON_0007:6:104:6044:19995#0 -MICHAELJACKSON_0007:6:120:1238:1797#0 -MICHAELJACKSON_0007:7:24:17155:15711#0 -MICHAELJACKSON_0007:7:63:13326:19451#0 -MICHAELJACKSON_0007:7:95:7655:10691#0 -MICHAELJACKSON_0007:5:14:8079:5468#0 -MICHAELJACKSON_0007:5:39:4237:14459#0 -MICHAELJACKSON_0007:5:55:19034:8833#0 -MICHAELJACKSON_0007:5:61:6630:11051#0 -MICHAELJACKSON_0007:5:74:16078:14531#0 -MICHAELJACKSON_0007:5:76:2079:12861#0 -MICHAELJACKSON_0007:6:22:16715:19661#0 -MICHAELJACKSON_0007:6:45:7093:17581#0 -MICHAELJACKSON_0007:7:33:2141:14214#0 -MICHAELJACKSON_0007:6:1:10812:16140#0 -MICHAELJACKSON_0007:6:36:7491:4911#0 -MICHAELJACKSON_0007:5:6:7070:6352#0 -MICHAELJACKSON_0007:5:13:17310:20950#0 -MICHAELJACKSON_0007:5:26:12210:13482#0 -MICHAELJACKSON_0007:5:36:14667:12852#0 -MICHAELJACKSON_0007:5:37:13490:11590#0 -MICHAELJACKSON_0007:5:69:11287:8088#0 -MICHAELJACKSON_0007:5:81:2478:14387#0 -MICHAELJACKSON_0007:6:106:3836:15421#0 -MICHAELJACKSON_0007:7:21:17992:20060#0 -MICHAELJACKSON_0007:7:28:14450:1966#0 -MICHAELJACKSON_0007:7:35:18612:18268#0 -MICHAELJACKSON_0007:7:56:11740:17275#0 -MICHAELJACKSON_0007:7:85:16688:19089#0 -MICHAELJACKSON_0007:7:99:17946:12435#0 -MICHAELJACKSON_0007:5:52:16705:4840#0 -MICHAELJACKSON_0007:6:16:17830:6031#0 -MICHAELJACKSON_0007:6:39:11998:11523#0 -MICHAELJACKSON_0007:6:93:14748:1393#0 -MICHAELJACKSON_0007:7:73:8294:3186#0 -MICHAELJACKSON_0007:7:75:18140:8905#0 -MICHAELJACKSON_0007:6:120:13591:7377#0 -MICHAELJACKSON_0007:7:26:13147:10483#0 -MICHAELJACKSON_0007:7:55:4830:7248#0 -MICHAELJACKSON_0007:7:104:10508:2383#0 -MICHAELJACKSON_0007:5:2:10523:4940#0 -MICHAELJACKSON_0007:5:8:9217:19869#0 -MICHAELJACKSON_0007:5:13:16737:8895#0 -MICHAELJACKSON_0007:5:18:18227:6173#0 -MICHAELJACKSON_0007:5:19:13473:3713#0 -MICHAELJACKSON_0007:5:23:18427:15317#0 -MICHAELJACKSON_0007:5:39:12882:5703#0 -MICHAELJACKSON_0007:5:43:11405:3950#0 -MICHAELJACKSON_0007:5:43:5719:14595#0 -MICHAELJACKSON_0007:5:49:2656:16800#0 -MICHAELJACKSON_0007:5:63:16636:12422#0 -MICHAELJACKSON_0007:5:64:12561:12160#0 -MICHAELJACKSON_0007:5:66:6929:5598#0 -MICHAELJACKSON_0007:5:77:5878:16249#0 -MICHAELJACKSON_0007:5:78:12572:16147#0 -MICHAELJACKSON_0007:5:81:4722:12799#0 -MICHAELJACKSON_0007:5:85:5257:3609#0 -MICHAELJACKSON_0007:5:90:5392:17225#0 -MICHAELJACKSON_0007:5:93:12781:18218#0 -MICHAELJACKSON_0007:5:101:15472:17682#0 -MICHAELJACKSON_0007:5:106:16284:1707#0 -MICHAELJACKSON_0007:6:3:10693:20390#0 -MICHAELJACKSON_0007:6:6:14453:5269#0 -MICHAELJACKSON_0007:6:12:3249:13946#0 -MICHAELJACKSON_0007:6:16:9643:5744#0 -MICHAELJACKSON_0007:6:22:4596:14560#0 -MICHAELJACKSON_0007:6:25:17932:7577#0 -MICHAELJACKSON_0007:6:27:14642:2376#0 -MICHAELJACKSON_0007:6:33:15922:3437#0 -MICHAELJACKSON_0007:6:45:6830:12156#0 -MICHAELJACKSON_0007:6:49:6114:20097#0 -MICHAELJACKSON_0007:6:53:11434:2749#0 -MICHAELJACKSON_0007:6:54:17577:5842#0 -MICHAELJACKSON_0007:6:55:7245:12550#0 -MICHAELJACKSON_0007:6:56:16262:18017#0 -MICHAELJACKSON_0007:6:61:12929:5574#0 -MICHAELJACKSON_0007:6:61:11345:11739#0 -MICHAELJACKSON_0007:6:69:13893:11478#0 -MICHAELJACKSON_0007:6:70:8214:5074#0 -MICHAELJACKSON_0007:6:73:11028:7480#0 -MICHAELJACKSON_0007:6:73:2417:8724#0 -MICHAELJACKSON_0007:6:80:9887:14030#0 -MICHAELJACKSON_0007:6:82:3220:13822#0 -MICHAELJACKSON_0007:6:91:16215:4997#0 -MICHAELJACKSON_0007:6:94:4155:7566#0 -MICHAELJACKSON_0007:6:97:15636:1605#0 -MICHAELJACKSON_0007:6:101:7582:19145#0 -MICHAELJACKSON_0007:6:106:10284:18589#0 -MICHAELJACKSON_0007:6:110:1420:4428#0 -MICHAELJACKSON_0007:6:111:5288:12763#0 -MICHAELJACKSON_0007:6:113:8703:2905#0 -MICHAELJACKSON_0007:6:113:12026:13228#0 -MICHAELJACKSON_0007:6:113:1423:14250#0 -MICHAELJACKSON_0007:6:116:14553:2810#0 -MICHAELJACKSON_0007:6:119:5583:1775#0 -MICHAELJACKSON_0007:7:4:5742:2630#0 -MICHAELJACKSON_0007:7:15:2665:11765#0 -MICHAELJACKSON_0007:7:18:9939:3475#0 -MICHAELJACKSON_0007:7:19:11587:17379#0 -MICHAELJACKSON_0007:7:23:12263:9633#0 -MICHAELJACKSON_0007:7:28:18065:4263#0 -MICHAELJACKSON_0007:7:35:9788:14254#0 -MICHAELJACKSON_0007:7:36:4222:11362#0 -MICHAELJACKSON_0007:7:40:8625:20843#0 -MICHAELJACKSON_0007:7:45:14708:9272#0 -MICHAELJACKSON_0007:7:46:10498:9045#0 -MICHAELJACKSON_0007:7:56:13821:15675#0 -MICHAELJACKSON_0007:7:56:8781:17676#0 -MICHAELJACKSON_0007:7:70:15419:11786#0 -MICHAELJACKSON_0007:7:75:10860:13134#0 -MICHAELJACKSON_0007:7:75:13943:19109#0 -MICHAELJACKSON_0007:7:80:16475:14253#0 -MICHAELJACKSON_0007:7:87:13155:10948#0 diff --git a/src/htslib-1.18/htscodecs/tests/names/20.names b/src/htslib-1.18/htscodecs/tests/names/20.names deleted file mode 100644 index a20071d..0000000 --- a/src/htslib-1.18/htscodecs/tests/names/20.names +++ /dev/null @@ -1,1000 +0,0 @@ -@I330_1_FC30JM6AAXX:4:1:0:199/1 -@I330_1_FC30JM6AAXX:4:1:0:242/1 -@I330_1_FC30JM6AAXX:4:1:0:394/1 -@I330_1_FC30JM6AAXX:4:1:0:438/1 -@I330_1_FC30JM6AAXX:4:1:0:740/1 -@I330_1_FC30JM6AAXX:4:1:0:753/1 -@I330_1_FC30JM6AAXX:4:1:0:881/1 -@I330_1_FC30JM6AAXX:4:1:0:1626/1 -@I330_1_FC30JM6AAXX:4:1:0:542/1 -@I330_1_FC30JM6AAXX:4:1:0:913/1 -@I330_1_FC30JM6AAXX:4:1:0:1435/1 -@I330_1_FC30JM6AAXX:4:1:0:146/1 -@I330_1_FC30JM6AAXX:4:1:0:1126/1 -@I330_1_FC30JM6AAXX:4:1:0:313/1 -@I330_1_FC30JM6AAXX:4:1:0:172/1 -@I330_1_FC30JM6AAXX:4:1:0:13/1 -@I330_1_FC30JM6AAXX:4:1:0:1343/1 -@I330_1_FC30JM6AAXX:4:1:0:26/1 -@I330_1_FC30JM6AAXX:4:1:0:111/1 -@I330_1_FC30JM6AAXX:4:1:0:291/1 -@I330_1_FC30JM6AAXX:4:1:0:528/1 -@I330_1_FC30JM6AAXX:4:1:0:859/1 -@I330_1_FC30JM6AAXX:4:1:0:1061/1 -@I330_1_FC30JM6AAXX:4:1:0:1402/1 -@I330_1_FC30JM6AAXX:4:1:0:1408/1 -@I330_1_FC30JM6AAXX:4:1:1:856/1 -@I330_1_FC30JM6AAXX:4:1:1:1016/1 -@I330_1_FC30JM6AAXX:4:1:1:956/1 -@I330_1_FC30JM6AAXX:4:1:1:63/1 -@I330_1_FC30JM6AAXX:4:1:1:819/1 -@I330_1_FC30JM6AAXX:4:1:1:907/1 -@I330_1_FC30JM6AAXX:4:1:1:1453/1 -@I330_1_FC30JM6AAXX:4:1:1:1270/1 -@I330_1_FC30JM6AAXX:4:1:1:131/1 -@I330_1_FC30JM6AAXX:4:1:1:1261/1 -@I330_1_FC30JM6AAXX:4:1:1:1240/1 -@I330_1_FC30JM6AAXX:4:1:1:1622/1 -@I330_1_FC30JM6AAXX:4:1:1:486/1 -@I330_1_FC30JM6AAXX:4:1:1:1850/1 -@I330_1_FC30JM6AAXX:4:1:1:706/1 -@I330_1_FC30JM6AAXX:4:1:1:1577/1 -@I330_1_FC30JM6AAXX:4:1:1:804/1 -@I330_1_FC30JM6AAXX:4:1:1:1481/1 -@I330_1_FC30JM6AAXX:4:1:1:122/1 -@I330_1_FC30JM6AAXX:4:1:1:1139/1 -@I330_1_FC30JM6AAXX:4:1:1:1486/1 -@I330_1_FC30JM6AAXX:4:1:1:745/1 -@I330_1_FC30JM6AAXX:4:1:1:1217/1 -@I330_1_FC30JM6AAXX:4:1:1:427/1 -@I330_1_FC30JM6AAXX:4:1:1:478/1 -@I330_1_FC30JM6AAXX:4:1:1:1884/1 -@I330_1_FC30JM6AAXX:4:1:1:973/1 -@I330_1_FC30JM6AAXX:4:1:1:94/1 -@I330_1_FC30JM6AAXX:4:1:1:1220/1 -@I330_1_FC30JM6AAXX:4:1:1:1548/1 -@I330_1_FC30JM6AAXX:4:1:1:175/1 -@I330_1_FC30JM6AAXX:4:1:1:1228/1 -@I330_1_FC30JM6AAXX:4:1:1:1678/1 -@I330_1_FC30JM6AAXX:4:1:1:165/1 -@I330_1_FC30JM6AAXX:4:1:1:568/1 -@I330_1_FC30JM6AAXX:4:1:1:814/1 -@I330_1_FC30JM6AAXX:4:1:1:610/1 -@I330_1_FC30JM6AAXX:4:1:1:136/1 -@I330_1_FC30JM6AAXX:4:1:1:369/1 -@I330_1_FC30JM6AAXX:4:1:1:1468/1 -@I330_1_FC30JM6AAXX:4:1:1:886/1 -@I330_1_FC30JM6AAXX:4:1:2:777/1 -@I330_1_FC30JM6AAXX:4:1:2:961/1 -@I330_1_FC30JM6AAXX:4:1:2:206/1 -@I330_1_FC30JM6AAXX:4:1:2:878/1 -@I330_1_FC30JM6AAXX:4:1:2:1757/1 -@I330_1_FC30JM6AAXX:4:1:2:193/1 -@I330_1_FC30JM6AAXX:4:1:2:1379/1 -@I330_1_FC30JM6AAXX:4:1:2:168/1 -@I330_1_FC30JM6AAXX:4:1:2:1251/1 -@I330_1_FC30JM6AAXX:4:1:2:1920/1 -@I330_1_FC30JM6AAXX:4:1:2:1470/1 -@I330_1_FC30JM6AAXX:4:1:2:986/1 -@I330_1_FC30JM6AAXX:4:1:2:326/1 -@I330_1_FC30JM6AAXX:4:1:2:187/1 -@I330_1_FC30JM6AAXX:4:1:2:1872/1 -@I330_1_FC30JM6AAXX:4:1:2:991/1 -@I330_1_FC30JM6AAXX:4:1:2:1484/1 -@I330_1_FC30JM6AAXX:4:1:2:659/1 -@I330_1_FC30JM6AAXX:4:1:2:246/1 -@I330_1_FC30JM6AAXX:4:1:2:80/1 -@I330_1_FC30JM6AAXX:4:1:2:1133/1 -@I330_1_FC30JM6AAXX:4:1:2:1257/1 -@I330_1_FC30JM6AAXX:4:1:2:1244/1 -@I330_1_FC30JM6AAXX:4:1:2:1804/1 -@I330_1_FC30JM6AAXX:4:1:2:693/1 -@I330_1_FC30JM6AAXX:4:1:2:1641/1 -@I330_1_FC30JM6AAXX:4:1:2:248/1 -@I330_1_FC30JM6AAXX:4:1:2:307/1 -@I330_1_FC30JM6AAXX:4:1:2:523/1 -@I330_1_FC30JM6AAXX:4:1:2:1836/1 -@I330_1_FC30JM6AAXX:4:1:2:852/1 -@I330_1_FC30JM6AAXX:4:1:2:734/1 -@I330_1_FC30JM6AAXX:4:1:2:1614/1 -@I330_1_FC30JM6AAXX:4:1:2:829/1 -@I330_1_FC30JM6AAXX:4:1:2:631/1 -@I330_1_FC30JM6AAXX:4:1:2:1145/1 -@I330_1_FC30JM6AAXX:4:1:2:1593/1 -@I330_1_FC30JM6AAXX:4:1:2:78/1 -@I330_1_FC30JM6AAXX:4:1:2:1671/1 -@I330_1_FC30JM6AAXX:4:1:2:415/1 -@I330_1_FC30JM6AAXX:4:1:2:1169/1 -@I330_1_FC30JM6AAXX:4:1:2:474/1 -@I330_1_FC30JM6AAXX:4:1:2:1313/1 -@I330_1_FC30JM6AAXX:4:1:2:1072/1 -@I330_1_FC30JM6AAXX:4:1:2:898/1 -@I330_1_FC30JM6AAXX:4:1:2:1744/1 -@I330_1_FC30JM6AAXX:4:1:2:235/1 -@I330_1_FC30JM6AAXX:4:1:2:386/1 -@I330_1_FC30JM6AAXX:4:1:2:842/1 -@I330_1_FC30JM6AAXX:4:1:2:362/1 -@I330_1_FC30JM6AAXX:4:1:2:1188/1 -@I330_1_FC30JM6AAXX:4:1:2:280/1 -@I330_1_FC30JM6AAXX:4:1:2:771/1 -@I330_1_FC30JM6AAXX:4:1:2:1161/1 -@I330_1_FC30JM6AAXX:4:1:2:143/1 -@I330_1_FC30JM6AAXX:4:1:2:909/1 -@I330_1_FC30JM6AAXX:4:1:2:1704/1 -@I330_1_FC30JM6AAXX:4:1:2:1595/1 -@I330_1_FC30JM6AAXX:4:1:2:1321/1 -@I330_1_FC30JM6AAXX:4:1:2:1077/1 -@I330_1_FC30JM6AAXX:4:1:2:1510/1 -@I330_1_FC30JM6AAXX:4:1:2:867/1 -@I330_1_FC30JM6AAXX:4:1:2:507/1 -@I330_1_FC30JM6AAXX:4:1:2:579/1 -@I330_1_FC30JM6AAXX:4:1:2:1392/1 -@I330_1_FC30JM6AAXX:4:1:2:3/1 -@I330_1_FC30JM6AAXX:4:1:2:53/1 -@I330_1_FC30JM6AAXX:4:1:2:405/1 -@I330_1_FC30JM6AAXX:4:1:2:1367/1 -@I330_1_FC30JM6AAXX:4:1:2:683/1 -@I330_1_FC30JM6AAXX:4:1:2:1153/1 -@I330_1_FC30JM6AAXX:4:1:2:1080/1 -@I330_1_FC30JM6AAXX:4:1:2:1325/1 -@I330_1_FC30JM6AAXX:4:1:2:1423/1 -@I330_1_FC30JM6AAXX:4:1:3:1844/1 -@I330_1_FC30JM6AAXX:4:1:3:99/1 -@I330_1_FC30JM6AAXX:4:1:3:1232/1 -@I330_1_FC30JM6AAXX:4:1:3:1456/1 -@I330_1_FC30JM6AAXX:4:1:3:1267/1 -@I330_1_FC30JM6AAXX:4:1:3:1045/1 -@I330_1_FC30JM6AAXX:4:1:3:1749/1 -@I330_1_FC30JM6AAXX:4:1:3:1978/1 -@I330_1_FC30JM6AAXX:4:1:3:2011/1 -@I330_1_FC30JM6AAXX:4:1:3:689/1 -@I330_1_FC30JM6AAXX:4:1:3:349/1 -@I330_1_FC30JM6AAXX:4:1:3:1293/1 -@I330_1_FC30JM6AAXX:4:1:3:2023/1 -@I330_1_FC30JM6AAXX:4:1:3:965/1 -@I330_1_FC30JM6AAXX:4:1:3:1348/1 -@I330_1_FC30JM6AAXX:4:1:3:1288/1 -@I330_1_FC30JM6AAXX:4:1:3:1686/1 -@I330_1_FC30JM6AAXX:4:1:3:484/1 -@I330_1_FC30JM6AAXX:4:1:3:675/1 -@I330_1_FC30JM6AAXX:4:1:3:44/1 -@I330_1_FC30JM6AAXX:4:1:3:124/1 -@I330_1_FC30JM6AAXX:4:1:3:240/1 -@I330_1_FC30JM6AAXX:4:1:3:823/1 -@I330_1_FC30JM6AAXX:4:1:3:1650/1 -@I330_1_FC30JM6AAXX:4:1:3:1113/1 -@I330_1_FC30JM6AAXX:4:1:3:1695/1 -@I330_1_FC30JM6AAXX:4:1:3:22/1 -@I330_1_FC30JM6AAXX:4:1:3:1766/1 -@I330_1_FC30JM6AAXX:4:1:3:557/1 -@I330_1_FC30JM6AAXX:4:1:3:943/1 -@I330_1_FC30JM6AAXX:4:1:3:785/1 -@I330_1_FC30JM6AAXX:4:1:3:699/1 -@I330_1_FC30JM6AAXX:4:1:3:476/1 -@I330_1_FC30JM6AAXX:4:1:3:605/1 -@I330_1_FC30JM6AAXX:4:1:3:309/1 -@I330_1_FC30JM6AAXX:4:1:3:1035/1 -@I330_1_FC30JM6AAXX:4:1:3:1676/1 -@I330_1_FC30JM6AAXX:4:1:3:927/1 -@I330_1_FC30JM6AAXX:4:1:3:1580/1 -@I330_1_FC30JM6AAXX:4:1:3:1776/1 -@I330_1_FC30JM6AAXX:4:1:3:1586/1 -@I330_1_FC30JM6AAXX:4:1:3:599/1 -@I330_1_FC30JM6AAXX:4:1:3:848/1 -@I330_1_FC30JM6AAXX:4:1:3:238/1 -@I330_1_FC30JM6AAXX:4:1:3:1166/1 -@I330_1_FC30JM6AAXX:4:1:4:1736/1 -@I330_1_FC30JM6AAXX:4:1:4:317/1 -@I330_1_FC30JM6AAXX:4:1:4:547/1 -@I330_1_FC30JM6AAXX:4:1:4:1448/1 -@I330_1_FC30JM6AAXX:4:1:4:465/1 -@I330_1_FC30JM6AAXX:4:1:4:679/1 -@I330_1_FC30JM6AAXX:4:1:4:1263/1 -@I330_1_FC30JM6AAXX:4:1:4:1938/1 -@I330_1_FC30JM6AAXX:4:1:4:1103/1 -@I330_1_FC30JM6AAXX:4:1:4:1017/1 -@I330_1_FC30JM6AAXX:4:1:4:1981/1 -@I330_1_FC30JM6AAXX:4:1:4:871/1 -@I330_1_FC30JM6AAXX:4:1:4:323/1 -@I330_1_FC30JM6AAXX:4:1:4:1202/1 -@I330_1_FC30JM6AAXX:4:1:4:962/1 -@I330_1_FC30JM6AAXX:4:1:4:749/1 -@I330_1_FC30JM6AAXX:4:1:4:718/1 -@I330_1_FC30JM6AAXX:4:1:4:32/1 -@I330_1_FC30JM6AAXX:4:1:4:1708/1 -@I330_1_FC30JM6AAXX:4:1:4:1726/1 -@I330_1_FC30JM6AAXX:4:1:4:213/1 -@I330_1_FC30JM6AAXX:4:1:4:1952/1 -@I330_1_FC30JM6AAXX:4:1:4:451/1 -@I330_1_FC30JM6AAXX:4:1:4:814/1 -@I330_1_FC30JM6AAXX:4:1:4:228/1 -@I330_1_FC30JM6AAXX:4:1:4:1143/1 -@I330_1_FC30JM6AAXX:4:1:4:1964/1 -@I330_1_FC30JM6AAXX:4:1:4:410/1 -@I330_1_FC30JM6AAXX:4:1:4:1819/1 -@I330_1_FC30JM6AAXX:4:1:4:434/1 -@I330_1_FC30JM6AAXX:4:1:4:258/1 -@I330_1_FC30JM6AAXX:4:1:4:1795/1 -@I330_1_FC30JM6AAXX:4:1:4:161/1 -@I330_1_FC30JM6AAXX:4:1:4:1175/1 -@I330_1_FC30JM6AAXX:4:1:4:93/1 -@I330_1_FC30JM6AAXX:4:1:4:1770/1 -@I330_1_FC30JM6AAXX:4:1:4:1433/1 -@I330_1_FC30JM6AAXX:4:1:4:745/1 -@I330_1_FC30JM6AAXX:4:1:4:1459/1 -@I330_1_FC30JM6AAXX:4:1:4:1855/1 -@I330_1_FC30JM6AAXX:4:1:5:792/1 -@I330_1_FC30JM6AAXX:4:1:5:1259/1 -@I330_1_FC30JM6AAXX:4:1:5:807/1 -@I330_1_FC30JM6AAXX:4:1:5:1197/1 -@I330_1_FC30JM6AAXX:4:1:5:1290/1 -@I330_1_FC30JM6AAXX:4:1:5:885/1 -@I330_1_FC30JM6AAXX:4:1:5:1188/1 -@I330_1_FC30JM6AAXX:4:1:5:245/1 -@I330_1_FC30JM6AAXX:4:1:5:611/1 -@I330_1_FC30JM6AAXX:4:1:5:1493/1 -@I330_1_FC30JM6AAXX:4:1:5:320/1 -@I330_1_FC30JM6AAXX:4:1:5:481/1 -@I330_1_FC30JM6AAXX:4:1:5:1500/1 -@I330_1_FC30JM6AAXX:4:1:5:758/1 -@I330_1_FC30JM6AAXX:4:1:5:1441/1 -@I330_1_FC30JM6AAXX:4:1:5:1928/1 -@I330_1_FC30JM6AAXX:4:1:5:1456/1 -@I330_1_FC30JM6AAXX:4:1:5:1404/1 -@I330_1_FC30JM6AAXX:4:1:5:956/1 -@I330_1_FC30JM6AAXX:4:1:5:1591/1 -@I330_1_FC30JM6AAXX:4:1:5:1615/1 -@I330_1_FC30JM6AAXX:4:1:5:540/1 -@I330_1_FC30JM6AAXX:4:1:5:1473/1 -@I330_1_FC30JM6AAXX:4:1:5:585/1 -@I330_1_FC30JM6AAXX:4:1:5:996/1 -@I330_1_FC30JM6AAXX:4:1:5:1444/1 -@I330_1_FC30JM6AAXX:4:1:5:37/1 -@I330_1_FC30JM6AAXX:4:1:5:1628/1 -@I330_1_FC30JM6AAXX:4:1:5:1608/1 -@I330_1_FC30JM6AAXX:4:1:5:1919/1 -@I330_1_FC30JM6AAXX:4:1:5:579/1 -@I330_1_FC30JM6AAXX:4:1:5:1851/1 -@I330_1_FC30JM6AAXX:4:1:5:458/1 -@I330_1_FC30JM6AAXX:4:1:5:1311/1 -@I330_1_FC30JM6AAXX:4:1:5:394/1 -@I330_1_FC30JM6AAXX:4:1:5:373/1 -@I330_1_FC30JM6AAXX:4:1:5:1715/1 -@I330_1_FC30JM6AAXX:4:1:5:566/1 -@I330_1_FC30JM6AAXX:4:1:5:1045/1 -@I330_1_FC30JM6AAXX:4:1:5:1058/1 -@I330_1_FC30JM6AAXX:4:1:5:1134/1 -@I330_1_FC30JM6AAXX:4:1:5:935/1 -@I330_1_FC30JM6AAXX:4:1:5:1235/1 -@I330_1_FC30JM6AAXX:4:1:5:379/1 -@I330_1_FC30JM6AAXX:4:1:5:775/1 -@I330_1_FC30JM6AAXX:4:1:5:255/1 -@I330_1_FC30JM6AAXX:4:1:5:1193/1 -@I330_1_FC30JM6AAXX:4:1:5:314/1 -@I330_1_FC30JM6AAXX:4:1:5:1182/1 -@I330_1_FC30JM6AAXX:4:1:5:19/1 -@I330_1_FC30JM6AAXX:4:1:5:355/1 -@I330_1_FC30JM6AAXX:4:1:5:881/1 -@I330_1_FC30JM6AAXX:4:1:6:1597/1 -@I330_1_FC30JM6AAXX:4:1:6:1684/1 -@I330_1_FC30JM6AAXX:4:1:6:1829/1 -@I330_1_FC30JM6AAXX:4:1:6:1563/1 -@I330_1_FC30JM6AAXX:4:1:6:1080/1 -@I330_1_FC30JM6AAXX:4:1:6:845/1 -@I330_1_FC30JM6AAXX:4:1:6:266/1 -@I330_1_FC30JM6AAXX:4:1:6:15/1 -@I330_1_FC30JM6AAXX:4:1:6:836/1 -@I330_1_FC30JM6AAXX:4:1:6:1674/1 -@I330_1_FC30JM6AAXX:4:1:6:1812/1 -@I330_1_FC30JM6AAXX:4:1:6:1515/1 -@I330_1_FC30JM6AAXX:4:1:6:554/1 -@I330_1_FC30JM6AAXX:4:1:6:407/1 -@I330_1_FC30JM6AAXX:4:1:6:1204/1 -@I330_1_FC30JM6AAXX:4:1:6:503/1 -@I330_1_FC30JM6AAXX:4:1:6:177/1 -@I330_1_FC30JM6AAXX:4:1:6:520/1 -@I330_1_FC30JM6AAXX:4:1:6:1798/1 -@I330_1_FC30JM6AAXX:4:1:6:1429/1 -@I330_1_FC30JM6AAXX:4:1:6:1520/1 -@I330_1_FC30JM6AAXX:4:1:6:328/1 -@I330_1_FC30JM6AAXX:4:1:6:1584/1 -@I330_1_FC30JM6AAXX:4:1:6:798/1 -@I330_1_FC30JM6AAXX:4:1:6:989/1 -@I330_1_FC30JM6AAXX:4:1:6:535/1 -@I330_1_FC30JM6AAXX:4:1:6:691/1 -@I330_1_FC30JM6AAXX:4:1:6:23/1 -@I330_1_FC30JM6AAXX:4:1:6:527/1 -@I330_1_FC30JM6AAXX:4:1:6:860/1 -@I330_1_FC30JM6AAXX:4:1:6:1544/1 -@I330_1_FC30JM6AAXX:4:1:6:1145/1 -@I330_1_FC30JM6AAXX:4:1:6:1129/1 -@I330_1_FC30JM6AAXX:4:1:6:558/1 -@I330_1_FC30JM6AAXX:4:1:6:167/1 -@I330_1_FC30JM6AAXX:4:1:6:1316/1 -@I330_1_FC30JM6AAXX:4:1:6:660/1 -@I330_1_FC30JM6AAXX:4:1:6:1303/1 -@I330_1_FC30JM6AAXX:4:1:6:1835/1 -@I330_1_FC30JM6AAXX:4:1:6:399/1 -@I330_1_FC30JM6AAXX:4:1:6:197/1 -@I330_1_FC30JM6AAXX:4:1:6:818/1 -@I330_1_FC30JM6AAXX:4:1:6:974/1 -@I330_1_FC30JM6AAXX:4:1:6:1108/1 -@I330_1_FC30JM6AAXX:4:1:6:1162/1 -@I330_1_FC30JM6AAXX:4:1:6:233/1 -@I330_1_FC30JM6AAXX:4:1:6:773/1 -@I330_1_FC30JM6AAXX:4:1:6:415/1 -@I330_1_FC30JM6AAXX:4:1:6:1476/1 -@I330_1_FC30JM6AAXX:4:1:6:1076/1 -@I330_1_FC30JM6AAXX:4:1:6:123/1 -@I330_1_FC30JM6AAXX:4:1:6:892/1 -@I330_1_FC30JM6AAXX:4:1:6:1064/1 -@I330_1_FC30JM6AAXX:4:1:6:27/1 -@I330_1_FC30JM6AAXX:4:1:6:735/1 -@I330_1_FC30JM6AAXX:4:1:6:1886/1 -@I330_1_FC30JM6AAXX:4:1:6:106/1 -@I330_1_FC30JM6AAXX:4:1:6:1983/1 -@I330_1_FC30JM6AAXX:4:1:6:56/1 -@I330_1_FC30JM6AAXX:4:1:6:1347/1 -@I330_1_FC30JM6AAXX:4:1:6:453/1 -@I330_1_FC30JM6AAXX:4:1:6:240/1 -@I330_1_FC30JM6AAXX:4:1:6:931/1 -@I330_1_FC30JM6AAXX:4:1:6:1226/1 -@I330_1_FC30JM6AAXX:4:1:6:595/1 -@I330_1_FC30JM6AAXX:4:1:6:346/1 -@I330_1_FC30JM6AAXX:4:1:6:884/1 -@I330_1_FC30JM6AAXX:4:1:6:1399/1 -@I330_1_FC30JM6AAXX:4:1:7:1950/1 -@I330_1_FC30JM6AAXX:4:1:7:1704/1 -@I330_1_FC30JM6AAXX:4:1:7:1465/1 -@I330_1_FC30JM6AAXX:4:1:7:1420/1 -@I330_1_FC30JM6AAXX:4:1:7:445/1 -@I330_1_FC30JM6AAXX:4:1:7:223/1 -@I330_1_FC30JM6AAXX:4:1:7:958/1 -@I330_1_FC30JM6AAXX:4:1:7:237/1 -@I330_1_FC30JM6AAXX:4:1:7:965/1 -@I330_1_FC30JM6AAXX:4:1:7:1007/1 -@I330_1_FC30JM6AAXX:4:1:7:1490/1 -@I330_1_FC30JM6AAXX:4:1:7:1938/1 -@I330_1_FC30JM6AAXX:4:1:7:217/1 -@I330_1_FC30JM6AAXX:4:1:7:141/1 -@I330_1_FC30JM6AAXX:4:1:7:664/1 -@I330_1_FC30JM6AAXX:4:1:7:274/1 -@I330_1_FC30JM6AAXX:4:1:7:358/1 -@I330_1_FC30JM6AAXX:4:1:7:1017/1 -@I330_1_FC30JM6AAXX:4:1:7:450/1 -@I330_1_FC30JM6AAXX:4:1:7:1572/1 -@I330_1_FC30JM6AAXX:4:1:7:67/1 -@I330_1_FC30JM6AAXX:4:1:7:1709/1 -@I330_1_FC30JM6AAXX:4:1:7:763/1 -@I330_1_FC30JM6AAXX:4:1:7:262/1 -@I330_1_FC30JM6AAXX:4:1:7:1533/1 -@I330_1_FC30JM6AAXX:4:1:7:370/1 -@I330_1_FC30JM6AAXX:4:1:7:1395/1 -@I330_1_FC30JM6AAXX:4:1:7:620/1 -@I330_1_FC30JM6AAXX:4:1:7:221/1 -@I330_1_FC30JM6AAXX:4:1:7:74/1 -@I330_1_FC30JM6AAXX:4:1:7:1413/1 -@I330_1_FC30JM6AAXX:4:1:7:1727/1 -@I330_1_FC30JM6AAXX:4:1:7:533/1 -@I330_1_FC30JM6AAXX:4:1:7:1322/1 -@I330_1_FC30JM6AAXX:4:1:7:721/1 -@I330_1_FC30JM6AAXX:4:1:7:652/1 -@I330_1_FC30JM6AAXX:4:1:7:900/1 -@I330_1_FC30JM6AAXX:4:1:7:1912/1 -@I330_1_FC30JM6AAXX:4:1:7:875/1 -@I330_1_FC30JM6AAXX:4:1:7:71/1 -@I330_1_FC30JM6AAXX:4:1:7:1407/1 -@I330_1_FC30JM6AAXX:4:1:7:960/1 -@I330_1_FC30JM6AAXX:4:1:7:888/1 -@I330_1_FC30JM6AAXX:4:1:7:1747/1 -@I330_1_FC30JM6AAXX:4:1:7:1499/1 -@I330_1_FC30JM6AAXX:4:1:7:1113/1 -@I330_1_FC30JM6AAXX:4:1:7:654/1 -@I330_1_FC30JM6AAXX:4:1:7:1189/1 -@I330_1_FC30JM6AAXX:4:1:7:1665/1 -@I330_1_FC30JM6AAXX:4:1:7:2007/1 -@I330_1_FC30JM6AAXX:4:1:7:465/1 -@I330_1_FC30JM6AAXX:4:1:7:306/1 -@I330_1_FC30JM6AAXX:4:1:7:353/1 -@I330_1_FC30JM6AAXX:4:1:7:1700/1 -@I330_1_FC30JM6AAXX:4:1:7:1557/1 -@I330_1_FC30JM6AAXX:4:1:7:99/1 -@I330_1_FC30JM6AAXX:4:1:7:1310/1 -@I330_1_FC30JM6AAXX:4:1:7:1846/1 -@I330_1_FC30JM6AAXX:4:1:7:1855/1 -@I330_1_FC30JM6AAXX:4:1:7:1806/1 -@I330_1_FC30JM6AAXX:4:1:7:786/1 -@I330_1_FC30JM6AAXX:4:1:7:1184/1 -@I330_1_FC30JM6AAXX:4:1:7:696/1 -@I330_1_FC30JM6AAXX:4:1:8:363/1 -@I330_1_FC30JM6AAXX:4:1:8:114/1 -@I330_1_FC30JM6AAXX:4:1:8:1024/1 -@I330_1_FC30JM6AAXX:4:1:8:1509/1 -@I330_1_FC30JM6AAXX:4:1:8:208/1 -@I330_1_FC30JM6AAXX:4:1:8:545/1 -@I330_1_FC30JM6AAXX:4:1:8:794/1 -@I330_1_FC30JM6AAXX:4:1:8:152/1 -@I330_1_FC30JM6AAXX:4:1:8:469/1 -@I330_1_FC30JM6AAXX:4:1:8:563/1 -@I330_1_FC30JM6AAXX:4:1:8:1880/1 -@I330_1_FC30JM6AAXX:4:1:8:1219/1 -@I330_1_FC30JM6AAXX:4:1:8:1582/1 -@I330_1_FC30JM6AAXX:4:1:8:1720/1 -@I330_1_FC30JM6AAXX:4:1:8:1872/1 -@I330_1_FC30JM6AAXX:4:1:8:954/1 -@I330_1_FC30JM6AAXX:4:1:8:813/1 -@I330_1_FC30JM6AAXX:4:1:8:1966/1 -@I330_1_FC30JM6AAXX:4:1:8:1921/1 -@I330_1_FC30JM6AAXX:4:1:8:1270/1 -@I330_1_FC30JM6AAXX:4:1:8:250/1 -@I330_1_FC30JM6AAXX:4:1:8:108/1 -@I330_1_FC30JM6AAXX:4:1:8:1564/1 -@I330_1_FC30JM6AAXX:4:1:8:213/1 -@I330_1_FC30JM6AAXX:4:1:8:1159/1 -@I330_1_FC30JM6AAXX:4:1:8:1600/1 -@I330_1_FC30JM6AAXX:4:1:8:1352/1 -@I330_1_FC30JM6AAXX:4:1:8:509/1 -@I330_1_FC30JM6AAXX:4:1:8:1390/1 -@I330_1_FC30JM6AAXX:4:1:8:603/1 -@I330_1_FC30JM6AAXX:4:1:8:1946/1 -@I330_1_FC30JM6AAXX:4:1:8:1669/1 -@I330_1_FC30JM6AAXX:4:1:8:718/1 -@I330_1_FC30JM6AAXX:4:1:8:1327/1 -@I330_1_FC30JM6AAXX:4:1:8:1095/1 -@I330_1_FC30JM6AAXX:4:1:8:1655/1 -@I330_1_FC30JM6AAXX:4:1:8:1150/1 -@I330_1_FC30JM6AAXX:4:1:8:908/1 -@I330_1_FC30JM6AAXX:4:1:8:767/1 -@I330_1_FC30JM6AAXX:4:1:8:1732/1 -@I330_1_FC30JM6AAXX:4:1:8:205/1 -@I330_1_FC30JM6AAXX:4:1:8:969/1 -@I330_1_FC30JM6AAXX:4:1:8:1970/1 -@I330_1_FC30JM6AAXX:4:1:8:1792/1 -@I330_1_FC30JM6AAXX:4:1:8:701/1 -@I330_1_FC30JM6AAXX:4:1:8:1449/1 -@I330_1_FC30JM6AAXX:4:1:8:1010/1 -@I330_1_FC30JM6AAXX:4:1:8:543/1 -@I330_1_FC30JM6AAXX:4:1:8:494/1 -@I330_1_FC30JM6AAXX:4:1:8:1315/1 -@I330_1_FC30JM6AAXX:4:1:8:1758/1 -@I330_1_FC30JM6AAXX:4:1:8:1736/1 -@I330_1_FC30JM6AAXX:4:1:8:440/1 -@I330_1_FC30JM6AAXX:4:1:8:175/1 -@I330_1_FC30JM6AAXX:4:1:8:830/1 -@I330_1_FC30JM6AAXX:4:1:8:637/1 -@I330_1_FC30JM6AAXX:4:1:8:1814/1 -@I330_1_FC30JM6AAXX:4:1:8:1393/1 -@I330_1_FC30JM6AAXX:4:1:8:825/1 -@I330_1_FC30JM6AAXX:4:1:8:747/1 -@I330_1_FC30JM6AAXX:4:1:8:1404/1 -@I330_1_FC30JM6AAXX:4:1:9:6/1 -@I330_1_FC30JM6AAXX:4:1:9:1118/1 -@I330_1_FC30JM6AAXX:4:1:9:1680/1 -@I330_1_FC30JM6AAXX:4:1:9:427/1 -@I330_1_FC30JM6AAXX:4:1:9:750/1 -@I330_1_FC30JM6AAXX:4:1:9:1437/1 -@I330_1_FC30JM6AAXX:4:1:9:234/1 -@I330_1_FC30JM6AAXX:4:1:9:1520/1 -@I330_1_FC30JM6AAXX:4:1:9:656/1 -@I330_1_FC30JM6AAXX:4:1:9:1203/1 -@I330_1_FC30JM6AAXX:4:1:9:1692/1 -@I330_1_FC30JM6AAXX:4:1:9:35/1 -@I330_1_FC30JM6AAXX:4:1:9:902/1 -@I330_1_FC30JM6AAXX:4:1:9:1930/1 -@I330_1_FC30JM6AAXX:4:1:9:1676/1 -@I330_1_FC30JM6AAXX:4:1:9:972/1 -@I330_1_FC30JM6AAXX:4:1:9:1086/1 -@I330_1_FC30JM6AAXX:4:1:9:389/1 -@I330_1_FC30JM6AAXX:4:1:9:139/1 -@I330_1_FC30JM6AAXX:4:1:9:472/1 -@I330_1_FC30JM6AAXX:4:1:9:1512/1 -@I330_1_FC30JM6AAXX:4:1:9:802/1 -@I330_1_FC30JM6AAXX:4:1:9:478/1 -@I330_1_FC30JM6AAXX:4:1:9:1606/1 -@I330_1_FC30JM6AAXX:4:1:9:293/1 -@I330_1_FC30JM6AAXX:4:1:9:726/1 -@I330_1_FC30JM6AAXX:4:1:9:16/1 -@I330_1_FC30JM6AAXX:4:1:9:924/1 -@I330_1_FC30JM6AAXX:4:1:9:51/1 -@I330_1_FC30JM6AAXX:4:1:9:312/1 -@I330_1_FC30JM6AAXX:4:1:9:1471/1 -@I330_1_FC30JM6AAXX:4:1:9:1425/1 -@I330_1_FC30JM6AAXX:4:1:9:1663/1 -@I330_1_FC30JM6AAXX:4:1:9:865/1 -@I330_1_FC30JM6AAXX:4:1:9:2011/1 -@I330_1_FC30JM6AAXX:4:1:9:1959/1 -@I330_1_FC30JM6AAXX:4:1:9:1277/1 -@I330_1_FC30JM6AAXX:4:1:9:2016/1 -@I330_1_FC30JM6AAXX:4:1:9:1055/1 -@I330_1_FC30JM6AAXX:4:1:9:456/1 -@I330_1_FC30JM6AAXX:4:1:9:32/1 -@I330_1_FC30JM6AAXX:4:1:9:1987/1 -@I330_1_FC30JM6AAXX:4:1:9:962/1 -@I330_1_FC30JM6AAXX:4:1:9:1766/1 -@I330_1_FC30JM6AAXX:4:1:9:1545/1 -@I330_1_FC30JM6AAXX:4:1:9:401/1 -@I330_1_FC30JM6AAXX:4:1:9:536/1 -@I330_1_FC30JM6AAXX:4:1:9:499/1 -@I330_1_FC30JM6AAXX:4:1:9:1524/1 -@I330_1_FC30JM6AAXX:4:1:10:1324/1 -@I330_1_FC30JM6AAXX:4:1:10:1285/1 -@I330_1_FC30JM6AAXX:4:1:10:918/1 -@I330_1_FC30JM6AAXX:4:1:10:1007/1 -@I330_1_FC30JM6AAXX:4:1:10:1092/1 -@I330_1_FC30JM6AAXX:4:1:10:404/1 -@I330_1_FC30JM6AAXX:4:1:10:1810/1 -@I330_1_FC30JM6AAXX:4:1:10:1373/1 -@I330_1_FC30JM6AAXX:4:1:10:135/1 -@I330_1_FC30JM6AAXX:4:1:10:1444/1 -@I330_1_FC30JM6AAXX:4:1:10:959/1 -@I330_1_FC30JM6AAXX:4:1:10:1541/1 -@I330_1_FC30JM6AAXX:4:1:10:323/1 -@I330_1_FC30JM6AAXX:4:1:10:863/1 -@I330_1_FC30JM6AAXX:4:1:10:670/1 -@I330_1_FC30JM6AAXX:4:1:10:330/1 -@I330_1_FC30JM6AAXX:4:1:10:1211/1 -@I330_1_FC30JM6AAXX:4:1:10:772/1 -@I330_1_FC30JM6AAXX:4:1:10:557/1 -@I330_1_FC30JM6AAXX:4:1:10:74/1 -@I330_1_FC30JM6AAXX:4:1:10:991/1 -@I330_1_FC30JM6AAXX:4:1:10:1977/1 -@I330_1_FC30JM6AAXX:4:1:10:1108/1 -@I330_1_FC30JM6AAXX:4:1:10:1950/1 -@I330_1_FC30JM6AAXX:4:1:10:1182/1 -@I330_1_FC30JM6AAXX:4:1:10:1700/1 -@I330_1_FC30JM6AAXX:4:1:10:107/1 -@I330_1_FC30JM6AAXX:4:1:10:254/1 -@I330_1_FC30JM6AAXX:4:1:10:151/1 -@I330_1_FC30JM6AAXX:4:1:10:835/1 -@I330_1_FC30JM6AAXX:4:1:10:941/1 -@I330_1_FC30JM6AAXX:4:1:10:421/1 -@I330_1_FC30JM6AAXX:4:1:10:1568/1 -@I330_1_FC30JM6AAXX:4:1:10:111/1 -@I330_1_FC30JM6AAXX:4:1:10:1530/1 -@I330_1_FC30JM6AAXX:4:1:10:1232/1 -@I330_1_FC30JM6AAXX:4:1:10:283/1 -@I330_1_FC30JM6AAXX:4:1:10:1774/1 -@I330_1_FC30JM6AAXX:4:1:10:532/1 -@I330_1_FC30JM6AAXX:4:1:10:273/1 -@I330_1_FC30JM6AAXX:4:1:10:610/1 -@I330_1_FC30JM6AAXX:4:1:10:304/1 -@I330_1_FC30JM6AAXX:4:1:10:840/1 -@I330_1_FC30JM6AAXX:4:1:10:619/1 -@I330_1_FC30JM6AAXX:4:1:10:1789/1 -@I330_1_FC30JM6AAXX:4:1:10:1724/1 -@I330_1_FC30JM6AAXX:4:1:10:410/1 -@I330_1_FC30JM6AAXX:4:1:11:332/1 -@I330_1_FC30JM6AAXX:4:1:11:603/1 -@I330_1_FC30JM6AAXX:4:1:11:970/1 -@I330_1_FC30JM6AAXX:4:1:11:1319/1 -@I330_1_FC30JM6AAXX:4:1:11:1366/1 -@I330_1_FC30JM6AAXX:4:1:11:220/1 -@I330_1_FC30JM6AAXX:4:1:11:1034/1 -@I330_1_FC30JM6AAXX:4:1:11:476/1 -@I330_1_FC30JM6AAXX:4:1:11:398/1 -@I330_1_FC30JM6AAXX:4:1:11:337/1 -@I330_1_FC30JM6AAXX:4:1:11:1133/1 -@I330_1_FC30JM6AAXX:4:1:11:1041/1 -@I330_1_FC30JM6AAXX:4:1:11:309/1 -@I330_1_FC30JM6AAXX:4:1:11:354/1 -@I330_1_FC30JM6AAXX:4:1:11:1422/1 -@I330_1_FC30JM6AAXX:4:1:11:1651/1 -@I330_1_FC30JM6AAXX:4:1:11:914/1 -@I330_1_FC30JM6AAXX:4:1:11:1609/1 -@I330_1_FC30JM6AAXX:4:1:11:1218/1 -@I330_1_FC30JM6AAXX:4:1:11:1714/1 -@I330_1_FC30JM6AAXX:4:1:11:928/1 -@I330_1_FC30JM6AAXX:4:1:11:1227/1 -@I330_1_FC30JM6AAXX:4:1:11:1645/1 -@I330_1_FC30JM6AAXX:4:1:11:1688/1 -@I330_1_FC30JM6AAXX:4:1:11:753/1 -@I330_1_FC30JM6AAXX:4:1:11:1459/1 -@I330_1_FC30JM6AAXX:4:1:11:635/1 -@I330_1_FC30JM6AAXX:4:1:11:1968/1 -@I330_1_FC30JM6AAXX:4:1:11:1623/1 -@I330_1_FC30JM6AAXX:4:1:11:1197/1 -@I330_1_FC30JM6AAXX:4:1:11:13/1 -@I330_1_FC30JM6AAXX:4:1:11:276/1 -@I330_1_FC30JM6AAXX:4:1:11:119/1 -@I330_1_FC30JM6AAXX:4:1:11:512/1 -@I330_1_FC30JM6AAXX:4:1:11:1359/1 -@I330_1_FC30JM6AAXX:4:1:11:1500/1 -@I330_1_FC30JM6AAXX:4:1:11:599/1 -@I330_1_FC30JM6AAXX:4:1:11:1598/1 -@I330_1_FC30JM6AAXX:4:1:11:778/1 -@I330_1_FC30JM6AAXX:4:1:11:127/1 -@I330_1_FC30JM6AAXX:4:1:11:1252/1 -@I330_1_FC30JM6AAXX:4:1:11:782/1 -@I330_1_FC30JM6AAXX:4:1:11:1514/1 -@I330_1_FC30JM6AAXX:4:1:11:1484/1 -@I330_1_FC30JM6AAXX:4:1:11:1168/1 -@I330_1_FC30JM6AAXX:4:1:11:225/1 -@I330_1_FC30JM6AAXX:4:1:11:881/1 -@I330_1_FC30JM6AAXX:4:1:11:1615/1 -@I330_1_FC30JM6AAXX:4:1:11:251/1 -@I330_1_FC30JM6AAXX:4:1:11:328/1 -@I330_1_FC30JM6AAXX:4:1:11:804/1 -@I330_1_FC30JM6AAXX:4:1:12:456/1 -@I330_1_FC30JM6AAXX:4:1:12:169/1 -@I330_1_FC30JM6AAXX:4:1:12:812/1 -@I330_1_FC30JM6AAXX:4:1:12:909/1 -@I330_1_FC30JM6AAXX:4:1:12:999/1 -@I330_1_FC30JM6AAXX:4:1:12:625/1 -@I330_1_FC30JM6AAXX:4:1:12:1934/1 -@I330_1_FC30JM6AAXX:4:1:12:1685/1 -@I330_1_FC30JM6AAXX:4:1:12:264/1 -@I330_1_FC30JM6AAXX:4:1:12:1356/1 -@I330_1_FC30JM6AAXX:4:1:12:1793/1 -@I330_1_FC30JM6AAXX:4:1:12:1973/1 -@I330_1_FC30JM6AAXX:4:1:12:471/1 -@I330_1_FC30JM6AAXX:4:1:12:1925/1 -@I330_1_FC30JM6AAXX:4:1:12:413/1 -@I330_1_FC30JM6AAXX:4:1:12:1848/1 -@I330_1_FC30JM6AAXX:4:1:12:1769/1 -@I330_1_FC30JM6AAXX:4:1:12:1090/1 -@I330_1_FC30JM6AAXX:4:1:12:1466/1 -@I330_1_FC30JM6AAXX:4:1:12:815/1 -@I330_1_FC30JM6AAXX:4:1:12:244/1 -@I330_1_FC30JM6AAXX:4:1:12:1370/1 -@I330_1_FC30JM6AAXX:4:1:12:141/1 -@I330_1_FC30JM6AAXX:4:1:12:995/1 -@I330_1_FC30JM6AAXX:4:1:12:1681/1 -@I330_1_FC30JM6AAXX:4:1:12:1812/1 -@I330_1_FC30JM6AAXX:4:1:12:1695/1 -@I330_1_FC30JM6AAXX:4:1:12:706/1 -@I330_1_FC30JM6AAXX:4:1:12:1956/1 -@I330_1_FC30JM6AAXX:4:1:12:1379/1 -@I330_1_FC30JM6AAXX:4:1:12:436/1 -@I330_1_FC30JM6AAXX:4:1:12:904/1 -@I330_1_FC30JM6AAXX:4:1:12:1916/1 -@I330_1_FC30JM6AAXX:4:1:12:381/1 -@I330_1_FC30JM6AAXX:4:1:12:279/1 -@I330_1_FC30JM6AAXX:4:1:12:976/1 -@I330_1_FC30JM6AAXX:4:1:12:187/1 -@I330_1_FC30JM6AAXX:4:1:12:866/1 -@I330_1_FC30JM6AAXX:4:1:12:1863/1 -@I330_1_FC30JM6AAXX:4:1:12:1049/1 -@I330_1_FC30JM6AAXX:4:1:12:23/1 -@I330_1_FC30JM6AAXX:4:1:12:1552/1 -@I330_1_FC30JM6AAXX:4:1:12:1901/1 -@I330_1_FC30JM6AAXX:4:1:12:541/1 -@I330_1_FC30JM6AAXX:4:1:12:1316/1 -@I330_1_FC30JM6AAXX:4:1:12:1573/1 -@I330_1_FC30JM6AAXX:4:1:12:1980/1 -@I330_1_FC30JM6AAXX:4:1:12:1077/1 -@I330_1_FC30JM6AAXX:4:1:12:535/1 -@I330_1_FC30JM6AAXX:4:1:12:32/1 -@I330_1_FC30JM6AAXX:4:1:12:1805/1 -@I330_1_FC30JM6AAXX:4:1:12:1223/1 -@I330_1_FC30JM6AAXX:4:1:12:1161/1 -@I330_1_FC30JM6AAXX:4:1:12:841/1 -@I330_1_FC30JM6AAXX:4:1:12:1525/1 -@I330_1_FC30JM6AAXX:4:1:12:1873/1 -@I330_1_FC30JM6AAXX:4:1:12:1394/1 -@I330_1_FC30JM6AAXX:4:1:13:2010/1 -@I330_1_FC30JM6AAXX:4:1:13:29/1 -@I330_1_FC30JM6AAXX:4:1:13:1951/1 -@I330_1_FC30JM6AAXX:4:1:13:1351/1 -@I330_1_FC30JM6AAXX:4:1:13:419/1 -@I330_1_FC30JM6AAXX:4:1:13:1338/1 -@I330_1_FC30JM6AAXX:4:1:13:434/1 -@I330_1_FC30JM6AAXX:4:1:13:709/1 -@I330_1_FC30JM6AAXX:4:1:13:121/1 -@I330_1_FC30JM6AAXX:4:1:13:206/1 -@I330_1_FC30JM6AAXX:4:1:13:1881/1 -@I330_1_FC30JM6AAXX:4:1:13:1345/1 -@I330_1_FC30JM6AAXX:4:1:13:1294/1 -@I330_1_FC30JM6AAXX:4:1:13:1424/1 -@I330_1_FC30JM6AAXX:4:1:13:591/1 -@I330_1_FC30JM6AAXX:4:1:13:655/1 -@I330_1_FC30JM6AAXX:4:1:13:159/1 -@I330_1_FC30JM6AAXX:4:1:13:571/1 -@I330_1_FC30JM6AAXX:4:1:13:1967/1 -@I330_1_FC30JM6AAXX:4:1:13:1348/1 -@I330_1_FC30JM6AAXX:4:1:13:448/1 -@I330_1_FC30JM6AAXX:4:1:13:1283/1 -@I330_1_FC30JM6AAXX:4:1:13:1569/1 -@I330_1_FC30JM6AAXX:4:1:13:1643/1 -@I330_1_FC30JM6AAXX:4:1:13:922/1 -@I330_1_FC30JM6AAXX:4:1:13:1267/1 -@I330_1_FC30JM6AAXX:4:1:13:162/1 -@I330_1_FC30JM6AAXX:4:1:13:757/1 -@I330_1_FC30JM6AAXX:4:1:13:342/1 -@I330_1_FC30JM6AAXX:4:1:13:1400/1 -@I330_1_FC30JM6AAXX:4:1:13:1826/1 -@I330_1_FC30JM6AAXX:4:1:13:1602/1 -@I330_1_FC30JM6AAXX:4:1:13:485/1 -@I330_1_FC30JM6AAXX:4:1:13:1375/1 -@I330_1_FC30JM6AAXX:4:1:13:239/1 -@I330_1_FC30JM6AAXX:4:1:13:1124/1 -@I330_1_FC30JM6AAXX:4:1:13:1389/1 -@I330_1_FC30JM6AAXX:4:1:13:1559/1 -@I330_1_FC30JM6AAXX:4:1:13:1328/1 -@I330_1_FC30JM6AAXX:4:1:13:1504/1 -@I330_1_FC30JM6AAXX:4:1:13:610/1 -@I330_1_FC30JM6AAXX:4:1:13:445/1 -@I330_1_FC30JM6AAXX:4:1:13:42/1 -@I330_1_FC30JM6AAXX:4:1:13:124/1 -@I330_1_FC30JM6AAXX:4:1:13:795/1 -@I330_1_FC30JM6AAXX:4:1:13:1845/1 -@I330_1_FC30JM6AAXX:4:1:13:1185/1 -@I330_1_FC30JM6AAXX:4:1:13:1325/1 -@I330_1_FC30JM6AAXX:4:1:13:724/1 -@I330_1_FC30JM6AAXX:4:1:13:1067/1 -@I330_1_FC30JM6AAXX:4:1:13:1855/1 -@I330_1_FC30JM6AAXX:4:1:13:1072/1 -@I330_1_FC30JM6AAXX:4:1:13:1386/1 -@I330_1_FC30JM6AAXX:4:1:13:1611/1 -@I330_1_FC30JM6AAXX:4:1:13:897/1 -@I330_1_FC30JM6AAXX:4:1:13:968/1 -@I330_1_FC30JM6AAXX:4:1:14:1990/1 -@I330_1_FC30JM6AAXX:4:1:14:1898/1 -@I330_1_FC30JM6AAXX:4:1:14:694/1 -@I330_1_FC30JM6AAXX:4:1:14:1421/1 -@I330_1_FC30JM6AAXX:4:1:14:301/1 -@I330_1_FC30JM6AAXX:4:1:14:478/1 -@I330_1_FC30JM6AAXX:4:1:14:1258/1 -@I330_1_FC30JM6AAXX:4:1:14:1013/1 -@I330_1_FC30JM6AAXX:4:1:14:1297/1 -@I330_1_FC30JM6AAXX:4:1:14:1025/1 -@I330_1_FC30JM6AAXX:4:1:14:211/1 -@I330_1_FC30JM6AAXX:4:1:14:78/1 -@I330_1_FC30JM6AAXX:4:1:14:1139/1 -@I330_1_FC30JM6AAXX:4:1:14:1358/1 -@I330_1_FC30JM6AAXX:4:1:14:313/1 -@I330_1_FC30JM6AAXX:4:1:14:1200/1 -@I330_1_FC30JM6AAXX:4:1:14:1084/1 -@I330_1_FC30JM6AAXX:4:1:14:790/1 -@I330_1_FC30JM6AAXX:4:1:14:1180/1 -@I330_1_FC30JM6AAXX:4:1:14:1110/1 -@I330_1_FC30JM6AAXX:4:1:14:1434/1 -@I330_1_FC30JM6AAXX:4:1:14:1747/1 -@I330_1_FC30JM6AAXX:4:1:14:408/1 -@I330_1_FC30JM6AAXX:4:1:14:184/1 -@I330_1_FC30JM6AAXX:4:1:14:1623/1 -@I330_1_FC30JM6AAXX:4:1:14:1942/1 -@I330_1_FC30JM6AAXX:4:1:14:1682/1 -@I330_1_FC30JM6AAXX:4:1:14:1242/1 -@I330_1_FC30JM6AAXX:4:1:14:1723/1 -@I330_1_FC30JM6AAXX:4:1:14:1307/1 -@I330_1_FC30JM6AAXX:4:1:14:1839/1 -@I330_1_FC30JM6AAXX:4:1:14:1037/1 -@I330_1_FC30JM6AAXX:4:1:14:45/1 -@I330_1_FC30JM6AAXX:4:1:14:1218/1 -@I330_1_FC30JM6AAXX:4:1:14:318/1 -@I330_1_FC30JM6AAXX:4:1:14:1789/1 -@I330_1_FC30JM6AAXX:4:1:14:529/1 -@I330_1_FC30JM6AAXX:4:1:14:602/1 -@I330_1_FC30JM6AAXX:4:1:14:133/1 -@I330_1_FC30JM6AAXX:4:1:14:1634/1 -@I330_1_FC30JM6AAXX:4:1:14:412/1 -@I330_1_FC30JM6AAXX:4:1:14:1758/1 -@I330_1_FC30JM6AAXX:4:1:14:298/1 -@I330_1_FC30JM6AAXX:4:1:14:1246/1 -@I330_1_FC30JM6AAXX:4:1:14:614/1 -@I330_1_FC30JM6AAXX:4:1:14:1919/1 -@I330_1_FC30JM6AAXX:4:1:14:963/1 -@I330_1_FC30JM6AAXX:4:1:14:1190/1 -@I330_1_FC30JM6AAXX:4:1:14:1627/1 -@I330_1_FC30JM6AAXX:4:1:14:1864/1 -@I330_1_FC30JM6AAXX:4:1:14:1131/1 -@I330_1_FC30JM6AAXX:4:1:14:779/1 -@I330_1_FC30JM6AAXX:4:1:14:981/1 -@I330_1_FC30JM6AAXX:4:1:14:1562/1 -@I330_1_FC30JM6AAXX:4:1:14:761/1 -@I330_1_FC30JM6AAXX:4:1:14:1999/1 -@I330_1_FC30JM6AAXX:4:1:14:1478/1 -@I330_1_FC30JM6AAXX:4:1:15:482/1 -@I330_1_FC30JM6AAXX:4:1:15:1154/1 -@I330_1_FC30JM6AAXX:4:1:15:1113/1 -@I330_1_FC30JM6AAXX:4:1:15:23/1 -@I330_1_FC30JM6AAXX:4:1:15:1047/1 -@I330_1_FC30JM6AAXX:4:1:15:702/1 -@I330_1_FC30JM6AAXX:4:1:15:586/1 -@I330_1_FC30JM6AAXX:4:1:15:178/1 -@I330_1_FC30JM6AAXX:4:1:15:172/1 -@I330_1_FC30JM6AAXX:4:1:15:61/1 -@I330_1_FC30JM6AAXX:4:1:15:15/1 -@I330_1_FC30JM6AAXX:4:1:15:511/1 -@I330_1_FC30JM6AAXX:4:1:15:197/1 -@I330_1_FC30JM6AAXX:4:1:15:144/1 -@I330_1_FC30JM6AAXX:4:1:15:86/1 -@I330_1_FC30JM6AAXX:4:1:15:1396/1 -@I330_1_FC30JM6AAXX:4:1:15:1310/1 -@I330_1_FC30JM6AAXX:4:1:15:434/1 -@I330_1_FC30JM6AAXX:4:1:15:1171/1 -@I330_1_FC30JM6AAXX:4:1:15:57/1 -@I330_1_FC30JM6AAXX:4:1:15:1986/1 -@I330_1_FC30JM6AAXX:4:1:15:130/1 -@I330_1_FC30JM6AAXX:4:1:15:812/1 -@I330_1_FC30JM6AAXX:4:1:15:253/1 -@I330_1_FC30JM6AAXX:4:1:15:220/1 -@I330_1_FC30JM6AAXX:4:1:15:593/1 -@I330_1_FC30JM6AAXX:4:1:15:948/1 -@I330_1_FC30JM6AAXX:4:1:15:1303/1 -@I330_1_FC30JM6AAXX:4:1:15:1410/1 -@I330_1_FC30JM6AAXX:4:1:15:658/1 -@I330_1_FC30JM6AAXX:4:1:15:973/1 -@I330_1_FC30JM6AAXX:4:1:15:113/1 -@I330_1_FC30JM6AAXX:4:1:15:377/1 -@I330_1_FC30JM6AAXX:4:1:15:451/1 -@I330_1_FC30JM6AAXX:4:1:15:227/1 -@I330_1_FC30JM6AAXX:4:1:15:1532/1 -@I330_1_FC30JM6AAXX:4:1:15:628/1 -@I330_1_FC30JM6AAXX:4:1:15:1655/1 -@I330_1_FC30JM6AAXX:4:1:15:154/1 -@I330_1_FC30JM6AAXX:4:1:15:138/1 -@I330_1_FC30JM6AAXX:4:1:15:1081/1 -@I330_1_FC30JM6AAXX:4:1:15:544/1 -@I330_1_FC30JM6AAXX:4:1:15:831/1 -@I330_1_FC30JM6AAXX:4:1:15:425/1 -@I330_1_FC30JM6AAXX:4:1:15:1550/1 -@I330_1_FC30JM6AAXX:4:1:15:1166/1 -@I330_1_FC30JM6AAXX:4:1:15:957/1 -@I330_1_FC30JM6AAXX:4:1:15:644/1 -@I330_1_FC30JM6AAXX:4:1:15:1194/1 -@I330_1_FC30JM6AAXX:4:1:15:329/1 -@I330_1_FC30JM6AAXX:4:1:15:1582/1 -@I330_1_FC30JM6AAXX:4:1:15:547/1 -@I330_1_FC30JM6AAXX:4:1:15:187/1 -@I330_1_FC30JM6AAXX:4:1:15:1952/1 -@I330_1_FC30JM6AAXX:4:1:15:1877/1 -@I330_1_FC30JM6AAXX:4:1:15:1003/1 -@I330_1_FC30JM6AAXX:4:1:15:207/1 -@I330_1_FC30JM6AAXX:4:1:15:1212/1 -@I330_1_FC30JM6AAXX:4:1:15:1317/1 -@I330_1_FC30JM6AAXX:4:1:15:557/1 -@I330_1_FC30JM6AAXX:4:1:15:1695/1 -@I330_1_FC30JM6AAXX:4:1:15:765/1 -@I330_1_FC30JM6AAXX:4:1:15:1870/1 -@I330_1_FC30JM6AAXX:4:1:15:1592/1 -@I330_1_FC30JM6AAXX:4:1:15:460/1 -@I330_1_FC30JM6AAXX:4:1:16:1814/1 -@I330_1_FC30JM6AAXX:4:1:16:728/1 -@I330_1_FC30JM6AAXX:4:1:16:1762/1 -@I330_1_FC30JM6AAXX:4:1:16:1498/1 -@I330_1_FC30JM6AAXX:4:1:16:852/1 -@I330_1_FC30JM6AAXX:4:1:16:1883/1 -@I330_1_FC30JM6AAXX:4:1:16:1619/1 -@I330_1_FC30JM6AAXX:4:1:16:1313/1 -@I330_1_FC30JM6AAXX:4:1:16:623/1 -@I330_1_FC30JM6AAXX:4:1:16:498/1 -@I330_1_FC30JM6AAXX:4:1:16:506/1 -@I330_1_FC30JM6AAXX:4:1:16:1490/1 -@I330_1_FC30JM6AAXX:4:1:16:747/1 -@I330_1_FC30JM6AAXX:4:1:16:2007/1 -@I330_1_FC30JM6AAXX:4:1:16:1745/1 -@I330_1_FC30JM6AAXX:4:1:16:1403/1 -@I330_1_FC30JM6AAXX:4:1:16:79/1 -@I330_1_FC30JM6AAXX:4:1:16:277/1 -@I330_1_FC30JM6AAXX:4:1:16:573/1 -@I330_1_FC30JM6AAXX:4:1:16:1323/1 -@I330_1_FC30JM6AAXX:4:1:16:1992/1 -@I330_1_FC30JM6AAXX:4:1:16:1828/1 -@I330_1_FC30JM6AAXX:4:1:16:439/1 -@I330_1_FC30JM6AAXX:4:1:16:1264/1 -@I330_1_FC30JM6AAXX:4:1:16:367/1 -@I330_1_FC30JM6AAXX:4:1:16:568/1 -@I330_1_FC30JM6AAXX:4:1:16:1833/1 -@I330_1_FC30JM6AAXX:4:1:16:1933/1 -@I330_1_FC30JM6AAXX:4:1:16:582/1 -@I330_1_FC30JM6AAXX:4:1:16:398/1 -@I330_1_FC30JM6AAXX:4:1:16:2028/1 -@I330_1_FC30JM6AAXX:4:1:16:266/1 -@I330_1_FC30JM6AAXX:4:1:16:321/1 -@I330_1_FC30JM6AAXX:4:1:16:994/1 -@I330_1_FC30JM6AAXX:4:1:16:19/1 -@I330_1_FC30JM6AAXX:4:1:16:721/1 -@I330_1_FC30JM6AAXX:4:1:16:892/1 -@I330_1_FC30JM6AAXX:4:1:16:665/1 -@I330_1_FC30JM6AAXX:4:1:16:1848/1 -@I330_1_FC30JM6AAXX:4:1:16:148/1 -@I330_1_FC30JM6AAXX:4:1:16:1737/1 -@I330_1_FC30JM6AAXX:4:1:16:1272/1 -@I330_1_FC30JM6AAXX:4:1:16:691/1 -@I330_1_FC30JM6AAXX:4:1:16:40/1 -@I330_1_FC30JM6AAXX:4:1:16:1365/1 -@I330_1_FC30JM6AAXX:4:1:16:1381/1 -@I330_1_FC30JM6AAXX:4:1:16:889/1 -@I330_1_FC30JM6AAXX:4:1:16:1506/1 -@I330_1_FC30JM6AAXX:4:1:16:475/1 -@I330_1_FC30JM6AAXX:4:1:16:1675/1 -@I330_1_FC30JM6AAXX:4:1:16:292/1 -@I330_1_FC30JM6AAXX:4:1:16:244/1 -@I330_1_FC30JM6AAXX:4:1:16:1842/1 -@I330_1_FC30JM6AAXX:4:1:16:1889/1 -@I330_1_FC30JM6AAXX:4:1:16:1596/1 -@I330_1_FC30JM6AAXX:4:1:16:649/1 -@I330_1_FC30JM6AAXX:4:1:17:247/1 -@I330_1_FC30JM6AAXX:4:1:17:354/1 -@I330_1_FC30JM6AAXX:4:1:17:7/1 -@I330_1_FC30JM6AAXX:4:1:17:1094/1 -@I330_1_FC30JM6AAXX:4:1:17:520/1 -@I330_1_FC30JM6AAXX:4:1:17:250/1 -@I330_1_FC30JM6AAXX:4:1:17:1071/1 -@I330_1_FC30JM6AAXX:4:1:17:1268/1 -@I330_1_FC30JM6AAXX:4:1:17:808/1 -@I330_1_FC30JM6AAXX:4:1:17:1446/1 -@I330_1_FC30JM6AAXX:4:1:17:1102/1 -@I330_1_FC30JM6AAXX:4:1:17:47/1 -@I330_1_FC30JM6AAXX:4:1:17:1121/1 -@I330_1_FC30JM6AAXX:4:1:17:777/1 -@I330_1_FC30JM6AAXX:4:1:17:1019/1 -@I330_1_FC30JM6AAXX:4:1:17:309/1 -@I330_1_FC30JM6AAXX:4:1:17:65/1 -@I330_1_FC30JM6AAXX:4:1:17:1909/1 -@I330_1_FC30JM6AAXX:4:1:17:484/1 -@I330_1_FC30JM6AAXX:4:1:17:554/1 -@I330_1_FC30JM6AAXX:4:1:17:1857/1 -@I330_1_FC30JM6AAXX:4:1:17:1399/1 -@I330_1_FC30JM6AAXX:4:1:17:675/1 -@I330_1_FC30JM6AAXX:4:1:17:1112/1 -@I330_1_FC30JM6AAXX:4:1:17:141/1 -@I330_1_FC30JM6AAXX:4:1:17:745/1 -@I330_1_FC30JM6AAXX:4:1:17:1664/1 -@I330_1_FC30JM6AAXX:4:1:17:344/1 -@I330_1_FC30JM6AAXX:4:1:17:1494/1 -@I330_1_FC30JM6AAXX:4:1:17:1925/1 -@I330_1_FC30JM6AAXX:4:1:17:1902/1 -@I330_1_FC30JM6AAXX:4:1:17:1502/1 -@I330_1_FC30JM6AAXX:4:1:17:594/1 -@I330_1_FC30JM6AAXX:4:1:17:1294/1 -@I330_1_FC30JM6AAXX:4:1:17:1080/1 -@I330_1_FC30JM6AAXX:4:1:17:1605/1 -@I330_1_FC30JM6AAXX:4:1:17:1569/1 -@I330_1_FC30JM6AAXX:4:1:17:1254/1 -@I330_1_FC30JM6AAXX:4:1:17:533/1 -@I330_1_FC30JM6AAXX:4:1:17:928/1 -@I330_1_FC30JM6AAXX:4:1:17:494/1 -@I330_1_FC30JM6AAXX:4:1:17:703/1 -@I330_1_FC30JM6AAXX:4:1:17:1955/1 -@I330_1_FC30JM6AAXX:4:1:17:2026/1 -@I330_1_FC30JM6AAXX:4:1:17:1477/1 -@I330_1_FC30JM6AAXX:4:1:17:1220/1 -@I330_1_FC30JM6AAXX:4:1:17:1562/1 -@I330_1_FC30JM6AAXX:4:1:17:1577/1 -@I330_1_FC30JM6AAXX:4:1:17:1444/1 -@I330_1_FC30JM6AAXX:4:1:17:1006/1 -@I330_1_FC30JM6AAXX:4:1:17:105/1 -@I330_1_FC30JM6AAXX:4:1:17:1277/1 -@I330_1_FC30JM6AAXX:4:1:17:68/1 -@I330_1_FC30JM6AAXX:4:1:18:960/1 -@I330_1_FC30JM6AAXX:4:1:18:1774/1 -@I330_1_FC30JM6AAXX:4:1:18:1656/1 -@I330_1_FC30JM6AAXX:4:1:18:551/1 -@I330_1_FC30JM6AAXX:4:1:18:332/1 -@I330_1_FC30JM6AAXX:4:1:18:965/1 -@I330_1_FC30JM6AAXX:4:1:18:1487/1 -@I330_1_FC30JM6AAXX:4:1:18:226/1 -@I330_1_FC30JM6AAXX:4:1:18:880/1 -@I330_1_FC30JM6AAXX:4:1:18:1806/1 -@I330_1_FC30JM6AAXX:4:1:18:92/1 -@I330_1_FC30JM6AAXX:4:1:18:1734/1 -@I330_1_FC30JM6AAXX:4:1:18:903/1 -@I330_1_FC30JM6AAXX:4:1:18:1648/1 -@I330_1_FC30JM6AAXX:4:1:18:955/1 -@I330_1_FC30JM6AAXX:4:1:18:570/1 -@I330_1_FC30JM6AAXX:4:1:18:351/1 -@I330_1_FC30JM6AAXX:4:1:18:1612/1 -@I330_1_FC30JM6AAXX:4:1:18:136/1 -@I330_1_FC30JM6AAXX:4:1:18:2009/1 -@I330_1_FC30JM6AAXX:4:1:18:1467/1 -@I330_1_FC30JM6AAXX:4:1:18:1108/1 -@I330_1_FC30JM6AAXX:4:1:18:841/1 -@I330_1_FC30JM6AAXX:4:1:18:646/1 -@I330_1_FC30JM6AAXX:4:1:18:1704/1 -@I330_1_FC30JM6AAXX:4:1:18:1660/1 -@I330_1_FC30JM6AAXX:4:1:18:1459/1 -@I330_1_FC30JM6AAXX:4:1:18:1260/1 -@I330_1_FC30JM6AAXX:4:1:18:577/1 -@I330_1_FC30JM6AAXX:4:1:18:1693/1 -@I330_1_FC30JM6AAXX:4:1:18:750/1 -@I330_1_FC30JM6AAXX:4:1:18:667/1 -@I330_1_FC30JM6AAXX:4:1:18:240/1 -@I330_1_FC30JM6AAXX:4:1:18:209/1 -@I330_1_FC30JM6AAXX:4:1:18:116/1 -@I330_1_FC30JM6AAXX:4:1:18:424/1 -@I330_1_FC30JM6AAXX:4:1:18:373/1 -@I330_1_FC30JM6AAXX:4:1:18:1809/1 -@I330_1_FC30JM6AAXX:4:1:18:860/1 -@I330_1_FC30JM6AAXX:4:1:18:187/1 diff --git a/src/htslib-1.18/htscodecs/tests/names/nv.names b/src/htslib-1.18/htscodecs/tests/names/nv.names deleted file mode 100644 index ff1723e..0000000 --- a/src/htslib-1.18/htscodecs/tests/names/nv.names +++ /dev/null @@ -1,1000 +0,0 @@ -VP2-06:112:H7LNDMCVY:1:1124:21694:10473 -VP2-06:112:H7LNDMCVY:1:1158:23665:6370 -VP2-06:112:H7LNDMCVY:1:1219:23746:16250 -VP2-06:112:H7LNDMCVY:1:1243:16414:36119 -VP2-06:112:H7LNDMCVY:1:1251:6253:36119 -VP2-06:112:H7LNDMCVY:1:1324:31412:16595 -VP2-06:112:H7LNDMCVY:1:1431:22119:16125 -VP2-06:112:H7LNDMCVY:1:2152:28881:21512 -VP2-06:112:H7LNDMCVY:1:2207:21287:33567 -VP2-06:112:H7LNDMCVY:1:2219:32651:24940 -VP2-06:112:H7LNDMCVY:1:2236:9959:8218 -VP2-06:112:H7LNDMCVY:1:2257:12436:35587 -VP2-06:112:H7LNDMCVY:1:2263:19253:23343 -VP2-06:112:H7LNDMCVY:1:2268:15962:22905 -VP2-06:112:H7LNDMCVY:1:2424:6994:29136 -VP2-06:112:H7LNDMCVY:1:2444:10782:28902 -VP2-06:112:H7LNDMCVY:1:2446:11089:8453 -VP2-06:112:H7LNDMCVY:1:2457:19578:33238 -VP2-06:112:H7LNDMCVY:1:2486:29695:2816 -VP2-06:112:H7LNDMCVY:1:1440:4173:25128 -VP2-06:112:H7LNDMCVY:1:2148:12400:26757 -VP2-06:112:H7LNDMCVY:1:2359:25391:34757 -VP2-06:112:H7LNDMCVY:1:1237:13196:28134 -VP2-06:112:H7LNDMCVY:1:1106:28320:8202 -VP2-06:112:H7LNDMCVY:1:1113:16269:11757 -VP2-06:112:H7LNDMCVY:1:1123:8232:32534 -VP2-06:112:H7LNDMCVY:1:1126:20754:16454 -VP2-06:112:H7LNDMCVY:1:1126:21775:30154 -VP2-06:112:H7LNDMCVY:1:1127:13250:1517 -VP2-06:112:H7LNDMCVY:1:1166:5367:30452 -VP2-06:112:H7LNDMCVY:1:1175:25744:34679 -VP2-06:112:H7LNDMCVY:1:1189:16947:7889 -VP2-06:112:H7LNDMCVY:1:1244:11424:36526 -VP2-06:112:H7LNDMCVY:1:1258:31241:27759 -VP2-06:112:H7LNDMCVY:1:1261:13774:35712 -VP2-06:112:H7LNDMCVY:1:1265:5529:12790 -VP2-06:112:H7LNDMCVY:1:1284:6488:31923 -VP2-06:112:H7LNDMCVY:1:1308:4679:13886 -VP2-06:112:H7LNDMCVY:1:1318:10077:27492 -VP2-06:112:H7LNDMCVY:1:1319:13937:34961 -VP2-06:112:H7LNDMCVY:1:1329:11487:13777 -VP2-06:112:H7LNDMCVY:1:1337:25012:9987 -VP2-06:112:H7LNDMCVY:1:1338:8088:3662 -VP2-06:112:H7LNDMCVY:1:1369:19253:19617 -VP2-06:112:H7LNDMCVY:1:1369:17978:33098 -VP2-06:112:H7LNDMCVY:1:1414:26223:6073 -VP2-06:112:H7LNDMCVY:1:1420:32253:17143 -VP2-06:112:H7LNDMCVY:1:1421:29866:9471 -VP2-06:112:H7LNDMCVY:1:1430:26268:27101 -VP2-06:112:H7LNDMCVY:1:1437:31060:17487 -VP2-06:112:H7LNDMCVY:1:1444:27091:17660 -VP2-06:112:H7LNDMCVY:1:1449:14172:16454 -VP2-06:112:H7LNDMCVY:1:1463:9833:8876 -VP2-06:112:H7LNDMCVY:1:1466:8287:32315 -VP2-06:112:H7LNDMCVY:1:1468:19334:35383 -VP2-06:112:H7LNDMCVY:1:1473:2184:17895 -VP2-06:112:H7LNDMCVY:1:1482:25717:8672 -VP2-06:112:H7LNDMCVY:1:1486:31222:1297 -VP2-06:112:H7LNDMCVY:1:2141:24578:14246 -VP2-06:112:H7LNDMCVY:1:2145:23032:24752 -VP2-06:112:H7LNDMCVY:1:2145:3170:33098 -VP2-06:112:H7LNDMCVY:1:2155:32117:26083 -VP2-06:112:H7LNDMCVY:1:2178:12933:9768 -VP2-06:112:H7LNDMCVY:1:2179:10411:17832 -VP2-06:112:H7LNDMCVY:1:2187:18041:6151 -VP2-06:112:H7LNDMCVY:1:2205:11668:8735 -VP2-06:112:H7LNDMCVY:1:2212:2736:11835 -VP2-06:112:H7LNDMCVY:1:2220:8034:20008 -VP2-06:112:H7LNDMCVY:1:2220:8034:20008 -VP2-06:112:H7LNDMCVY:1:2240:24885:27336 -VP2-06:112:H7LNDMCVY:1:2245:18096:15483 -VP2-06:112:H7LNDMCVY:1:2246:10673:31344 -VP2-06:112:H7LNDMCVY:1:2278:14651:24925 -VP2-06:112:H7LNDMCVY:1:2310:32036:36902 -VP2-06:112:H7LNDMCVY:1:2315:18665:26115 -VP2-06:112:H7LNDMCVY:1:2330:5041:20807 -VP2-06:112:H7LNDMCVY:1:2338:29577:19554 -VP2-06:112:H7LNDMCVY:1:2341:25138:6449 -VP2-06:112:H7LNDMCVY:1:2342:3558:8061 -VP2-06:112:H7LNDMCVY:1:2376:20455:34256 -VP2-06:112:H7LNDMCVY:1:2376:21468:34945 -VP2-06:112:H7LNDMCVY:1:2409:16694:20979 -VP2-06:112:H7LNDMCVY:1:2417:8703:1877 -VP2-06:112:H7LNDMCVY:1:2425:14895:17018 -VP2-06:112:H7LNDMCVY:1:2442:24686:27085 -VP2-06:112:H7LNDMCVY:1:2451:15926:12759 -VP2-06:112:H7LNDMCVY:1:2455:32841:32972 -VP2-06:112:H7LNDMCVY:1:2482:31494:20400 -VP2-06:112:H7LNDMCVY:1:1164:26115:7232 -VP2-06:112:H7LNDMCVY:1:1269:22851:5181 -VP2-06:112:H7LNDMCVY:1:2121:29613:28573 -VP2-06:112:H7LNDMCVY:1:1164:26115:7232 -VP2-06:112:H7LNDMCVY:1:1164:5692:8281 -VP2-06:112:H7LNDMCVY:1:1419:7572:6872 -VP2-06:112:H7LNDMCVY:1:1482:29604:35196 -VP2-06:112:H7LNDMCVY:1:2158:25491:20494 -VP2-06:112:H7LNDMCVY:1:2183:16278:9674 -VP2-06:112:H7LNDMCVY:1:2219:2374:4225 -VP2-06:112:H7LNDMCVY:1:2243:7925:13589 -VP2-06:112:H7LNDMCVY:1:2368:14651:17942 -VP2-06:112:H7LNDMCVY:1:2475:30156:24095 -VP2-06:112:H7LNDMCVY:1:1111:8015:36260 -VP2-06:112:H7LNDMCVY:1:1114:16631:18333 -VP2-06:112:H7LNDMCVY:1:1128:22435:36589 -VP2-06:112:H7LNDMCVY:1:1146:22878:9956 -VP2-06:112:H7LNDMCVY:1:1148:22263:25770 -VP2-06:112:H7LNDMCVY:1:1152:20528:19351 -VP2-06:112:H7LNDMCVY:1:1154:20094:22138 -VP2-06:112:H7LNDMCVY:1:1156:4354:28729 -VP2-06:112:H7LNDMCVY:1:1157:13801:19225 -VP2-06:112:H7LNDMCVY:1:1178:9390:19570 -VP2-06:112:H7LNDMCVY:1:1184:31458:6496 -VP2-06:112:H7LNDMCVY:1:1187:14136:29731 -VP2-06:112:H7LNDMCVY:1:1244:20419:35884 -VP2-06:112:H7LNDMCVY:1:1268:27751:18865 -VP2-06:112:H7LNDMCVY:1:1272:23529:8015 -VP2-06:112:H7LNDMCVY:1:1282:23809:5337 -VP2-06:112:H7LNDMCVY:1:1284:28510:2143 -VP2-06:112:H7LNDMCVY:1:1284:24985:7059 -VP2-06:112:H7LNDMCVY:1:1306:32642:28902 -VP2-06:112:H7LNDMCVY:1:1332:30843:20275 -VP2-06:112:H7LNDMCVY:1:1342:7536:8844 -VP2-06:112:H7LNDMCVY:1:1342:3269:13886 -VP2-06:112:H7LNDMCVY:1:1361:20211:32330 -VP2-06:112:H7LNDMCVY:1:1363:27823:6965 -VP2-06:112:H7LNDMCVY:1:1372:18756:6887 -VP2-06:112:H7LNDMCVY:1:1386:18701:8547 -VP2-06:112:H7LNDMCVY:1:1386:20898:9565 -VP2-06:112:H7LNDMCVY:1:1389:31222:19680 -VP2-06:112:H7LNDMCVY:1:1417:10294:6167 -VP2-06:112:H7LNDMCVY:1:1421:28537:31908 -VP2-06:112:H7LNDMCVY:1:1427:17300:23343 -VP2-06:112:H7LNDMCVY:1:1438:24316:26850 -VP2-06:112:H7LNDMCVY:1:1439:3667:36808 -VP2-06:112:H7LNDMCVY:1:1444:17237:9079 -VP2-06:112:H7LNDMCVY:1:1447:20735:7247 -VP2-06:112:H7LNDMCVY:1:1447:8558:23891 -VP2-06:112:H7LNDMCVY:1:1478:17083:21872 -VP2-06:112:H7LNDMCVY:1:1479:26051:7529 -VP2-06:112:H7LNDMCVY:1:2110:22191:33724 -VP2-06:112:H7LNDMCVY:1:2111:26503:15796 -VP2-06:112:H7LNDMCVY:1:2114:12735:3411 -VP2-06:112:H7LNDMCVY:1:2124:2311:30671 -VP2-06:112:H7LNDMCVY:1:2144:30246:24345 -VP2-06:112:H7LNDMCVY:1:2147:15872:23187 -VP2-06:112:H7LNDMCVY:1:2150:32488:3082 -VP2-06:112:H7LNDMCVY:1:2155:31693:3740 -VP2-06:112:H7LNDMCVY:1:2174:2971:3724 -VP2-06:112:H7LNDMCVY:1:2181:14190:35524 -VP2-06:112:H7LNDMCVY:1:2212:21856:27445 -VP2-06:112:H7LNDMCVY:1:2242:4273:24674 -VP2-06:112:H7LNDMCVY:1:2252:32615:9157 -VP2-06:112:H7LNDMCVY:1:2268:30327:7795 -VP2-06:112:H7LNDMCVY:1:2270:27064:15421 -VP2-06:112:H7LNDMCVY:1:2271:20582:14622 -VP2-06:112:H7LNDMCVY:1:2272:10438:32440 -VP2-06:112:H7LNDMCVY:1:2272:18801:32643 -VP2-06:112:H7LNDMCVY:1:2307:29053:17832 -VP2-06:112:H7LNDMCVY:1:2327:30770:2707 -VP2-06:112:H7LNDMCVY:1:2331:8929:31579 -VP2-06:112:H7LNDMCVY:1:2337:13304:13260 -VP2-06:112:H7LNDMCVY:1:2338:20735:28416 -VP2-06:112:H7LNDMCVY:1:2345:26404:11835 -VP2-06:112:H7LNDMCVY:1:2355:32660:8077 -VP2-06:112:H7LNDMCVY:1:2371:22589:32628 -VP2-06:112:H7LNDMCVY:1:2386:23077:23703 -VP2-06:112:H7LNDMCVY:1:2388:15167:36902 -VP2-06:112:H7LNDMCVY:1:2427:17852:17409 -VP2-06:112:H7LNDMCVY:1:2432:12698:18912 -VP2-06:112:H7LNDMCVY:1:2432:32163:25942 -VP2-06:112:H7LNDMCVY:1:2433:23918:22279 -VP2-06:112:H7LNDMCVY:1:2444:26521:26068 -VP2-06:112:H7LNDMCVY:1:2449:4761:4695 -VP2-06:112:H7LNDMCVY:1:2451:29451:26725 -VP2-06:112:H7LNDMCVY:1:2451:32190:32628 -VP2-06:112:H7LNDMCVY:1:2452:27959:7858 -VP2-06:112:H7LNDMCVY:1:2457:29460:29309 -VP2-06:112:H7LNDMCVY:1:2461:11433:9956 -VP2-06:112:H7LNDMCVY:1:2476:14253:8860 -VP2-06:112:H7LNDMCVY:1:2478:8196:17942 -VP2-06:112:H7LNDMCVY:1:2485:30074:27743 -VP2-06:112:H7LNDMCVY:1:2486:29749:3317 -VP2-06:112:H7LNDMCVY:1:1112:4038:19946 -VP2-06:112:H7LNDMCVY:1:1164:17978:17221 -VP2-06:112:H7LNDMCVY:1:1205:30355:26224 -VP2-06:112:H7LNDMCVY:1:1238:5358:23140 -VP2-06:112:H7LNDMCVY:1:1245:9796:34115 -VP2-06:112:H7LNDMCVY:1:1456:16089:18270 -VP2-06:112:H7LNDMCVY:1:2120:19587:31250 -VP2-06:112:H7LNDMCVY:1:2216:32850:27508 -VP2-06:112:H7LNDMCVY:1:2258:29243:29528 -VP2-06:112:H7LNDMCVY:1:2259:29143:4335 -VP2-06:112:H7LNDMCVY:1:2418:5656:11569 -VP2-06:112:H7LNDMCVY:1:2469:24677:3959 -VP2-06:112:H7LNDMCVY:1:1305:4689:14591 -VP2-06:112:H7LNDMCVY:1:2321:15402:13917 -VP2-06:112:H7LNDMCVY:1:2363:24053:28776 -VP2-06:112:H7LNDMCVY:1:2379:10203:16000 -VP2-06:112:H7LNDMCVY:1:2380:31177:28307 -VP2-06:112:H7LNDMCVY:1:2459:11559:14841 -VP2-06:112:H7LNDMCVY:1:2477:24126:4601 -VP2-06:112:H7LNDMCVY:1:1105:23231:10410 -VP2-06:112:H7LNDMCVY:1:1137:8341:21386 -VP2-06:112:H7LNDMCVY:1:1175:12735:8359 -VP2-06:112:H7LNDMCVY:1:1362:2248:22388 -VP2-06:112:H7LNDMCVY:1:1381:19940:31422 -VP2-06:112:H7LNDMCVY:1:2108:2347:15358 -VP2-06:112:H7LNDMCVY:1:2229:16125:17049 -VP2-06:112:H7LNDMCVY:1:2319:19904:20275 -VP2-06:112:H7LNDMCVY:1:2332:15393:28119 -VP2-06:112:H7LNDMCVY:1:2457:29460:29309 -VP2-06:112:H7LNDMCVY:1:2462:2401:26850 -VP2-06:112:H7LNDMCVY:1:2463:23113:3944 -VP2-06:112:H7LNDMCVY:1:2466:28989:9612 -VP2-06:112:H7LNDMCVY:1:2174:25852:20588 -VP2-06:112:H7LNDMCVY:1:2187:27887:12023 -VP2-06:112:H7LNDMCVY:1:2423:22643:24925 -VP2-06:112:H7LNDMCVY:1:2423:22643:24925 -VP2-06:112:H7LNDMCVY:1:1120:7916:30232 -VP2-06:112:H7LNDMCVY:1:1156:4354:28729 -VP2-06:112:H7LNDMCVY:1:1183:30373:22247 -VP2-06:112:H7LNDMCVY:1:1187:2736:2597 -VP2-06:112:H7LNDMCVY:1:1287:31955:13401 -VP2-06:112:H7LNDMCVY:1:1332:9778:28667 -VP2-06:112:H7LNDMCVY:1:1358:25382:21903 -VP2-06:112:H7LNDMCVY:1:1416:5990:10645 -VP2-06:112:H7LNDMCVY:1:1449:6253:16266 -VP2-06:112:H7LNDMCVY:1:2134:18222:6934 -VP2-06:112:H7LNDMCVY:1:2183:12563:35744 -VP2-06:112:H7LNDMCVY:1:2184:8034:6355 -VP2-06:112:H7LNDMCVY:1:2184:10330:6574 -VP2-06:112:H7LNDMCVY:1:2223:19379:8938 -VP2-06:112:H7LNDMCVY:1:2262:3848:31923 -VP2-06:112:H7LNDMCVY:1:1162:28727:10567 -VP2-06:112:H7LNDMCVY:1:1290:14579:30467 -VP2-06:112:H7LNDMCVY:1:1345:15754:3630 -VP2-06:112:H7LNDMCVY:1:1357:13901:32174 -VP2-06:112:H7LNDMCVY:1:1379:17598:1783 -VP2-06:112:H7LNDMCVY:1:2472:8974:30123 -VP2-06:112:H7LNDMCVY:1:1317:8974:8578 -VP2-06:112:H7LNDMCVY:1:1334:5647:21261 -VP2-06:112:H7LNDMCVY:1:1473:2184:17895 -VP2-06:112:H7LNDMCVY:1:2122:3161:16892 -VP2-06:112:H7LNDMCVY:1:2236:12662:12712 -VP2-06:112:H7LNDMCVY:1:2283:28049:27868 -VP2-06:112:H7LNDMCVY:1:2284:28085:23390 -VP2-06:112:H7LNDMCVY:1:2438:12789:32722 -VP2-06:112:H7LNDMCVY:1:1157:14904:29841 -VP2-06:112:H7LNDMCVY:1:1237:13196:28134 -VP2-06:112:H7LNDMCVY:1:1441:6967:6136 -VP2-06:112:H7LNDMCVY:1:2148:30689:24173 -VP2-06:112:H7LNDMCVY:1:2179:5475:20243 -VP2-06:112:H7LNDMCVY:1:2236:29794:17675 -VP2-06:112:H7LNDMCVY:1:2257:28736:27054 -VP2-06:112:H7LNDMCVY:1:2385:14733:29262 -VP2-06:112:H7LNDMCVY:1:2464:7817:33442 -VP2-06:112:H7LNDMCVY:1:1338:14018:3787 -VP2-06:112:H7LNDMCVY:1:1459:28827:10864 -VP2-06:112:H7LNDMCVY:1:2181:6153:25332 -VP2-06:112:H7LNDMCVY:1:2185:18313:28980 -VP2-06:112:H7LNDMCVY:1:2310:15121:7012 -VP2-06:112:H7LNDMCVY:1:2319:10538:20337 -VP2-06:112:H7LNDMCVY:1:2348:17128:3787 -VP2-06:112:H7LNDMCVY:1:2464:30183:28870 -VP2-06:112:H7LNDMCVY:1:2466:22887:25285 -VP2-06:112:H7LNDMCVY:1:1111:27724:32941 -VP2-06:112:H7LNDMCVY:1:2170:27588:26725 -VP2-06:112:H7LNDMCVY:1:2181:6153:25332 -VP2-06:112:H7LNDMCVY:1:2190:32443:31782 -VP2-06:112:H7LNDMCVY:1:2382:2220:31955 -VP2-06:112:H7LNDMCVY:1:2445:1823:32581 -VP2-06:112:H7LNDMCVY:1:2458:9362:9846 -VP2-06:112:H7LNDMCVY:1:2475:30156:24095 -VP2-06:112:H7LNDMCVY:1:1151:6415:36307 -VP2-06:112:H7LNDMCVY:1:1236:29441:12524 -VP2-06:112:H7LNDMCVY:1:1388:9200:2832 -VP2-06:112:H7LNDMCVY:1:2169:5620:15922 -VP2-06:112:H7LNDMCVY:1:2224:31611:29997 -VP2-06:112:H7LNDMCVY:1:2229:17607:14982 -VP2-06:112:H7LNDMCVY:1:2277:29206:28150 -VP2-06:112:H7LNDMCVY:1:2343:14805:3865 -VP2-06:112:H7LNDMCVY:1:1123:14507:4601 -VP2-06:112:H7LNDMCVY:1:1136:29966:29935 -VP2-06:112:H7LNDMCVY:1:1255:2031:11898 -VP2-06:112:H7LNDMCVY:1:1264:28583:30953 -VP2-06:112:H7LNDMCVY:1:1316:12274:24627 -VP2-06:112:H7LNDMCVY:1:2138:20482:6809 -VP2-06:112:H7LNDMCVY:1:2245:30120:27915 -VP2-06:112:H7LNDMCVY:1:2275:11704:11083 -VP2-06:112:H7LNDMCVY:1:2283:12924:34585 -VP2-06:112:H7LNDMCVY:1:2353:17852:14998 -VP2-06:112:H7LNDMCVY:1:2452:6352:11710 -VP2-06:112:H7LNDMCVY:1:2482:32289:19022 -VP2-06:112:H7LNDMCVY:1:1206:27679:10222 -VP2-06:112:H7LNDMCVY:1:1467:11080:27790 -VP2-06:112:H7LNDMCVY:1:1220:18412:29966 -VP2-06:112:H7LNDMCVY:1:2313:29505:1360 -VP2-06:112:H7LNDMCVY:1:1432:11424:32518 -VP2-06:112:H7LNDMCVY:1:2112:29252:1360 -VP2-06:112:H7LNDMCVY:1:2244:24225:16548 -VP2-06:112:H7LNDMCVY:1:2364:16034:24627 -VP2-06:112:H7LNDMCVY:1:2436:7428:10034 -VP2-06:112:H7LNDMCVY:1:2442:8296:21746 -VP2-06:112:H7LNDMCVY:1:1250:9661:30780 -VP2-06:112:H7LNDMCVY:1:1480:23222:34162 -VP2-06:112:H7LNDMCVY:1:2170:27588:26725 -VP2-06:112:H7LNDMCVY:1:2351:3007:33786 -VP2-06:112:H7LNDMCVY:1:2367:16875:4131 -VP2-06:112:H7LNDMCVY:1:2279:1362:21104 -VP2-06:112:H7LNDMCVY:1:2275:1099:31360 -VP2-06:112:H7LNDMCVY:1:2364:16034:24627 -VP2-06:112:H7LNDMCVY:1:1357:20763:11428 -VP2-06:112:H7LNDMCVY:1:1379:1542:17848 -VP2-06:112:H7LNDMCVY:1:2111:16315:34350 -VP2-06:112:H7LNDMCVY:1:1137:23619:2660 -VP2-06:112:H7LNDMCVY:1:1426:13322:16078 -VP2-06:112:H7LNDMCVY:1:1450:10547:3192 -VP2-06:112:H7LNDMCVY:1:2419:7166:27618 -VP2-06:112:H7LNDMCVY:1:1153:31042:8249 -VP2-06:112:H7LNDMCVY:1:2436:7428:10034 -VP2-06:112:H7LNDMCVY:1:1308:22426:16626 -VP2-06:112:H7LNDMCVY:1:1359:4670:16094 -VP2-06:112:H7LNDMCVY:1:1409:32768:5102 -VP2-06:112:H7LNDMCVY:1:2305:32443:2785 -VP2-06:112:H7LNDMCVY:1:2323:13829:20400 -VP2-06:112:H7LNDMCVY:1:2381:2682:18035 -VP2-06:112:H7LNDMCVY:1:2435:30698:3834 -VP2-06:112:H7LNDMCVY:1:2457:21640:13949 -VP2-06:112:H7LNDMCVY:1:1241:20347:13369 -VP2-06:112:H7LNDMCVY:1:1421:11623:11318 -VP2-06:112:H7LNDMCVY:1:1486:31222:1297 -VP2-06:112:H7LNDMCVY:1:2283:12924:34585 -VP2-06:112:H7LNDMCVY:1:2454:8377:3630 -VP2-06:112:H7LNDMCVY:1:1173:4426:31704 -VP2-06:112:H7LNDMCVY:1:1290:24415:6605 -VP2-06:112:H7LNDMCVY:1:1354:3712:5979 -VP2-06:112:H7LNDMCVY:1:2167:31566:11036 -VP2-06:112:H7LNDMCVY:1:2253:23728:25708 -VP2-06:112:H7LNDMCVY:1:1244:17454:18129 -VP2-06:112:H7LNDMCVY:1:1117:14913:13980 -VP2-06:112:H7LNDMCVY:1:2265:23122:1579 -VP2-06:112:H7LNDMCVY:1:2381:30825:8093 -VP2-06:112:H7LNDMCVY:1:2471:24632:31814 -VP2-06:112:H7LNDMCVY:1:1110:28556:30906 -VP2-06:112:H7LNDMCVY:1:1369:24975:21793 -VP2-06:112:H7LNDMCVY:1:1411:29324:12508 -VP2-06:112:H7LNDMCVY:1:1481:14714:20055 -VP2-06:112:H7LNDMCVY:1:2440:26259:23797 -VP2-06:112:H7LNDMCVY:1:2445:1823:32581 -VP2-06:112:H7LNDMCVY:1:2477:24126:4601 -VP2-06:112:H7LNDMCVY:1:1149:5647:34381 -VP2-06:112:H7LNDMCVY:1:1224:8874:8281 -VP2-06:112:H7LNDMCVY:1:2147:27118:36276 -VP2-06:112:H7LNDMCVY:1:2348:31006:36119 -VP2-06:112:H7LNDMCVY:1:2465:9670:22999 -VP2-06:112:H7LNDMCVY:1:1126:26323:14137 -VP2-06:112:H7LNDMCVY:1:1371:29758:14387 -VP2-06:112:H7LNDMCVY:1:1140:14733:14700 -VP2-06:112:H7LNDMCVY:1:1147:27606:36777 -VP2-06:112:H7LNDMCVY:1:2144:30246:24345 -VP2-06:112:H7LNDMCVY:1:1126:26323:14137 -VP2-06:112:H7LNDMCVY:1:1128:15239:24659 -VP2-06:112:H7LNDMCVY:1:1171:8549:36839 -VP2-06:112:H7LNDMCVY:1:1290:14579:30467 -VP2-06:112:H7LNDMCVY:1:1417:10294:6167 -VP2-06:112:H7LNDMCVY:1:2322:27416:18317 -VP2-06:112:H7LNDMCVY:1:2325:1199:29058 -VP2-06:112:H7LNDMCVY:1:1262:20980:36667 -VP2-06:112:H7LNDMCVY:1:1305:3278:14622 -VP2-06:112:H7LNDMCVY:1:1307:26594:17550 -VP2-06:112:H7LNDMCVY:1:1380:31322:9580 -VP2-06:112:H7LNDMCVY:1:2166:30400:30780 -VP2-06:112:H7LNDMCVY:1:2331:32461:9862 -VP2-06:112:H7LNDMCVY:1:2380:11595:35070 -VP2-06:112:H7LNDMCVY:1:2424:24279:31360 -VP2-06:112:H7LNDMCVY:1:2328:3730:31720 -VP2-06:112:H7LNDMCVY:1:2134:17571:1673 -VP2-06:112:H7LNDMCVY:1:2148:12400:26757 -VP2-06:112:H7LNDMCVY:1:2258:21088:20979 -VP2-06:112:H7LNDMCVY:1:2370:19425:37012 -VP2-06:112:H7LNDMCVY:1:2463:29993:20588 -VP2-06:112:H7LNDMCVY:1:1187:6894:22013 -VP2-06:112:H7LNDMCVY:1:1260:28556:34413 -VP2-06:112:H7LNDMCVY:1:2463:29993:20588 -VP2-06:112:H7LNDMCVY:1:1374:6686:18803 -VP2-06:112:H7LNDMCVY:1:2188:4056:2409 -VP2-06:112:H7LNDMCVY:1:2310:21703:10113 -VP2-06:112:H7LNDMCVY:1:2478:6813:11443 -VP2-06:112:H7LNDMCVY:1:1116:31430:24142 -VP2-06:112:H7LNDMCVY:1:1432:26431:10535 -VP2-06:112:H7LNDMCVY:1:2155:31693:3740 -VP2-06:112:H7LNDMCVY:1:2487:20763:6825 -VP2-06:112:H7LNDMCVY:1:1358:25382:21903 -VP2-06:112:H7LNDMCVY:1:2148:10167:13589 -VP2-06:112:H7LNDMCVY:1:1124:13241:6605 -VP2-06:112:H7LNDMCVY:1:1206:13413:18020 -VP2-06:112:H7LNDMCVY:1:1274:17779:25363 -VP2-06:112:H7LNDMCVY:1:1354:17806:10974 -VP2-06:112:H7LNDMCVY:1:1465:13376:15264 -VP2-06:112:H7LNDMCVY:1:2140:12102:24925 -VP2-06:112:H7LNDMCVY:1:2280:18457:26381 -VP2-06:112:H7LNDMCVY:1:2307:32027:28087 -VP2-06:112:H7LNDMCVY:1:2413:10050:31297 -VP2-06:112:H7LNDMCVY:1:1112:4038:19946 -VP2-06:112:H7LNDMCVY:1:1124:13241:6605 -VP2-06:112:H7LNDMCVY:1:1152:20528:19351 -VP2-06:112:H7LNDMCVY:1:1271:8639:12884 -VP2-06:112:H7LNDMCVY:1:1283:3920:26788 -VP2-06:112:H7LNDMCVY:1:1440:4173:25128 -VP2-06:112:H7LNDMCVY:1:2229:20021:10332 -VP2-06:112:H7LNDMCVY:1:2283:31819:18928 -VP2-06:112:H7LNDMCVY:1:2353:31141:13808 -VP2-06:112:H7LNDMCVY:1:2413:10565:1532 -VP2-06:112:H7LNDMCVY:1:1308:9390:26835 -VP2-06:112:H7LNDMCVY:1:1135:29423:1720 -VP2-06:112:H7LNDMCVY:1:1152:30083:4241 -VP2-06:112:H7LNDMCVY:1:1283:24496:25755 -VP2-06:112:H7LNDMCVY:1:1377:23484:4178 -VP2-06:112:H7LNDMCVY:1:2154:3251:23249 -VP2-06:112:H7LNDMCVY:1:2230:32217:11537 -VP2-06:112:H7LNDMCVY:1:2429:13286:15546 -VP2-06:112:H7LNDMCVY:1:2481:7952:29700 -VP2-06:112:H7LNDMCVY:1:1334:5647:21261 -VP2-06:112:H7LNDMCVY:1:1368:30626:4429 -VP2-06:112:H7LNDMCVY:1:2120:5122:12430 -VP2-06:112:H7LNDMCVY:1:2419:7166:27618 -VP2-06:112:H7LNDMCVY:1:1254:17146:11146 -VP2-06:112:H7LNDMCVY:1:1287:22607:36949 -VP2-06:112:H7LNDMCVY:1:1464:13738:1172 -VP2-06:112:H7LNDMCVY:1:2108:5041:4961 -VP2-06:112:H7LNDMCVY:1:2186:6343:35869 -VP2-06:112:H7LNDMCVY:1:2359:25391:34757 -VP2-06:112:H7LNDMCVY:1:1235:7274:28401 -VP2-06:112:H7LNDMCVY:1:1306:11532:7654 -VP2-06:112:H7LNDMCVY:1:1321:29315:32283 -VP2-06:112:H7LNDMCVY:1:1371:29758:14387 -VP2-06:112:H7LNDMCVY:1:1379:17598:1783 -VP2-06:112:H7LNDMCVY:1:2132:25346:32863 -VP2-06:112:H7LNDMCVY:1:2363:17119:13479 -VP2-06:112:H7LNDMCVY:1:1123:14507:4601 -VP2-06:112:H7LNDMCVY:1:1145:16360:36808 -VP2-06:112:H7LNDMCVY:1:1365:3613:16329 -VP2-06:112:H7LNDMCVY:1:2315:29622:9079 -VP2-06:112:H7LNDMCVY:1:2328:2284:31814 -VP2-06:112:H7LNDMCVY:1:2432:32163:25942 -VP2-06:112:H7LNDMCVY:1:1127:23836:20259 -VP2-06:112:H7LNDMCVY:1:2305:8730:3208 -VP2-06:112:H7LNDMCVY:1:1272:23529:8015 -VP2-06:112:H7LNDMCVY:1:1279:22245:8015 -VP2-06:112:H7LNDMCVY:1:1412:21133:3490 -VP2-06:112:H7LNDMCVY:1:1448:27642:8312 -VP2-06:112:H7LNDMCVY:1:2462:10239:18662 -VP2-06:112:H7LNDMCVY:1:2486:15808:24831 -VP2-06:112:H7LNDMCVY:1:2283:31819:18928 -VP2-06:112:H7LNDMCVY:1:1174:13829:30577 -VP2-06:112:H7LNDMCVY:1:1332:30843:20275 -VP2-06:112:H7LNDMCVY:1:2345:25563:19805 -VP2-06:112:H7LNDMCVY:1:2350:21757:17033 -VP2-06:112:H7LNDMCVY:1:2315:29622:9079 -VP2-06:112:H7LNDMCVY:1:1241:2727:32957 -VP2-06:112:H7LNDMCVY:1:1315:3595:23907 -VP2-06:112:H7LNDMCVY:1:1356:9480:24048 -VP2-06:112:H7LNDMCVY:1:1419:7572:6872 -VP2-06:112:H7LNDMCVY:1:2176:10402:36542 -VP2-06:112:H7LNDMCVY:1:2179:30969:32831 -VP2-06:112:H7LNDMCVY:1:2487:1922:8422 -VP2-06:112:H7LNDMCVY:1:1169:10646:12633 -VP2-06:112:H7LNDMCVY:1:1356:9480:24048 -VP2-06:112:H7LNDMCVY:1:1411:30825:5462 -VP2-06:112:H7LNDMCVY:1:2357:19000:4586 -VP2-06:112:H7LNDMCVY:1:2463:29622:15405 -VP2-06:112:H7LNDMCVY:1:1457:12491:14356 -VP2-06:112:H7LNDMCVY:1:1169:10646:12633 -VP2-06:112:H7LNDMCVY:1:1184:31458:6496 -VP2-06:112:H7LNDMCVY:1:1232:9218:34585 -VP2-06:112:H7LNDMCVY:1:1285:19253:7435 -VP2-06:112:H7LNDMCVY:1:2168:15212:25676 -VP2-06:112:H7LNDMCVY:1:2177:5231:36824 -VP2-06:112:H7LNDMCVY:1:2217:23023:19852 -VP2-06:112:H7LNDMCVY:1:2379:11505:9204 -VP2-06:112:H7LNDMCVY:1:2384:22146:31923 -VP2-06:112:H7LNDMCVY:1:2388:6686:17268 -VP2-06:112:H7LNDMCVY:1:2414:31286:30436 -VP2-06:112:H7LNDMCVY:1:2420:31846:9831 -VP2-06:112:H7LNDMCVY:1:1279:22245:8015 -VP2-06:112:H7LNDMCVY:1:1285:19253:7435 -VP2-06:112:H7LNDMCVY:1:1255:2031:11898 -VP2-06:112:H7LNDMCVY:1:1344:27579:28338 -VP2-06:112:H7LNDMCVY:1:1433:11550:29700 -VP2-06:112:H7LNDMCVY:1:1443:7048:23156 -VP2-06:112:H7LNDMCVY:1:1453:23258:1501 -VP2-06:112:H7LNDMCVY:1:2143:5909:2675 -VP2-06:112:H7LNDMCVY:1:2214:25292:33896 -VP2-06:112:H7LNDMCVY:1:2249:22309:2550 -VP2-06:112:H7LNDMCVY:1:2278:25500:31720 -VP2-06:112:H7LNDMCVY:1:2338:7021:11866 -VP2-06:112:H7LNDMCVY:1:2361:30192:16235 -VP2-06:112:H7LNDMCVY:1:1174:13829:30577 -VP2-06:112:H7LNDMCVY:1:1259:7853:23140 -VP2-06:112:H7LNDMCVY:1:1406:8712:28197 -VP2-06:112:H7LNDMCVY:1:2132:27444:29168 -VP2-06:112:H7LNDMCVY:1:1283:27308:18067 -VP2-06:112:H7LNDMCVY:1:1311:13024:9486 -VP2-06:112:H7LNDMCVY:1:1363:25391:20447 -VP2-06:112:H7LNDMCVY:1:2241:14986:27790 -VP2-06:112:H7LNDMCVY:1:2362:25843:9580 -VP2-06:112:H7LNDMCVY:1:2378:16423:34632 -VP2-06:112:H7LNDMCVY:1:2414:23882:18051 -VP2-06:112:H7LNDMCVY:1:2416:13340:21151 -VP2-06:112:H7LNDMCVY:1:2433:13675:13213 -VP2-06:112:H7LNDMCVY:1:2435:11921:15248 -VP2-06:112:H7LNDMCVY:1:2464:8675:5149 -VP2-06:112:H7LNDMCVY:1:2330:4146:14278 -VP2-06:112:H7LNDMCVY:1:1110:28556:30906 -VP2-06:112:H7LNDMCVY:1:1184:24596:24111 -VP2-06:112:H7LNDMCVY:1:1356:14760:29747 -VP2-06:112:H7LNDMCVY:1:2118:1588:21746 -VP2-06:112:H7LNDMCVY:1:2282:27570:8844 -VP2-06:112:H7LNDMCVY:1:2379:10203:16000 -VP2-06:112:H7LNDMCVY:1:2389:7265:31579 -VP2-06:112:H7LNDMCVY:1:2490:15157:28933 -VP2-06:112:H7LNDMCVY:1:1122:7609:30733 -VP2-06:112:H7LNDMCVY:1:1209:23330:30749 -VP2-06:112:H7LNDMCVY:1:1211:19542:8406 -VP2-06:112:H7LNDMCVY:1:1314:7699:17394 -VP2-06:112:H7LNDMCVY:1:1371:6985:7733 -VP2-06:112:H7LNDMCVY:1:2279:12156:26490 -VP2-06:112:H7LNDMCVY:1:2462:9824:29528 -VP2-06:112:H7LNDMCVY:1:1150:32289:34961 -VP2-06:112:H7LNDMCVY:1:1274:17779:25363 -VP2-06:112:H7LNDMCVY:1:2305:12509:7874 -VP2-06:112:H7LNDMCVY:1:2478:6813:11443 -VP2-06:112:H7LNDMCVY:1:1123:8232:32534 -VP2-06:112:H7LNDMCVY:1:1161:22616:33677 -VP2-06:112:H7LNDMCVY:1:1239:20012:30984 -VP2-06:112:H7LNDMCVY:1:1244:17454:18129 -VP2-06:112:H7LNDMCVY:1:1315:7455:15311 -VP2-06:112:H7LNDMCVY:1:1319:13937:34961 -VP2-06:112:H7LNDMCVY:1:1328:18792:16407 -VP2-06:112:H7LNDMCVY:1:1417:18358:31814 -VP2-06:112:H7LNDMCVY:1:1474:25870:36871 -VP2-06:112:H7LNDMCVY:1:2174:25852:20588 -VP2-06:112:H7LNDMCVY:1:2218:28718:28745 -VP2-06:112:H7LNDMCVY:1:2447:29577:4460 -VP2-06:112:H7LNDMCVY:1:2450:28890:7529 -VP2-06:112:H7LNDMCVY:1:2475:23592:11130 -VP2-06:112:H7LNDMCVY:1:1274:17517:25347 -VP2-06:112:H7LNDMCVY:1:2223:4119:7780 -VP2-06:112:H7LNDMCVY:1:1285:2040:4711 -VP2-06:112:H7LNDMCVY:1:1347:7012:20212 -VP2-06:112:H7LNDMCVY:1:1367:29984:2503 -VP2-06:112:H7LNDMCVY:1:1379:31232:36198 -VP2-06:112:H7LNDMCVY:1:2234:14570:31454 -VP2-06:112:H7LNDMCVY:1:2240:2220:4178 -VP2-06:112:H7LNDMCVY:1:2281:29360:22999 -VP2-06:112:H7LNDMCVY:1:2351:16857:4163 -VP2-06:112:H7LNDMCVY:1:2366:30020:34882 -VP2-06:112:H7LNDMCVY:1:1285:2040:4711 -VP2-06:112:H7LNDMCVY:1:2185:18313:28980 -VP2-06:112:H7LNDMCVY:1:2371:4408:14982 -VP2-06:112:H7LNDMCVY:1:2371:4408:14982 -VP2-06:112:H7LNDMCVY:1:1124:26738:26850 -VP2-06:112:H7LNDMCVY:1:2183:31864:4507 -VP2-06:112:H7LNDMCVY:1:2231:27001:6355 -VP2-06:112:H7LNDMCVY:1:2251:20690:3035 -VP2-06:112:H7LNDMCVY:1:2263:26530:22858 -VP2-06:112:H7LNDMCVY:1:2373:21142:8453 -VP2-06:112:H7LNDMCVY:1:2449:31638:15327 -VP2-06:112:H7LNDMCVY:1:2223:19379:8938 -VP2-06:112:H7LNDMCVY:1:2413:16080:7388 -VP2-06:112:H7LNDMCVY:1:2473:26657:21292 -VP2-06:112:H7LNDMCVY:1:1170:19018:3427 -VP2-06:112:H7LNDMCVY:1:1177:10999:24424 -VP2-06:112:H7LNDMCVY:1:2353:21287:27242 -VP2-06:112:H7LNDMCVY:1:2385:14733:29262 -VP2-06:112:H7LNDMCVY:1:2433:19144:35336 -VP2-06:112:H7LNDMCVY:1:2442:14100:24596 -VP2-06:112:H7LNDMCVY:1:2455:12961:4492 -VP2-06:112:H7LNDMCVY:1:2485:18683:11021 -VP2-06:112:H7LNDMCVY:1:1120:7916:30232 -VP2-06:112:H7LNDMCVY:1:1127:23836:20259 -VP2-06:112:H7LNDMCVY:1:1143:6958:19429 -VP2-06:112:H7LNDMCVY:1:1183:3549:12774 -VP2-06:112:H7LNDMCVY:1:1254:28483:5071 -VP2-06:112:H7LNDMCVY:1:1422:7120:30044 -VP2-06:112:H7LNDMCVY:1:2130:18457:31297 -VP2-06:112:H7LNDMCVY:1:2232:3531:14841 -VP2-06:112:H7LNDMCVY:1:2353:17852:14998 -VP2-06:112:H7LNDMCVY:1:2452:29360:30295 -VP2-06:112:H7LNDMCVY:1:2472:4842:29794 -VP2-06:112:H7LNDMCVY:1:2473:26657:21292 -VP2-06:112:H7LNDMCVY:1:2479:29378:20838 -VP2-06:112:H7LNDMCVY:1:2177:5231:36824 -VP2-06:112:H7LNDMCVY:1:1112:17309:16564 -VP2-06:112:H7LNDMCVY:1:1323:1633:36980 -VP2-06:112:H7LNDMCVY:1:1406:1922:34882 -VP2-06:112:H7LNDMCVY:1:2119:17454:36292 -VP2-06:112:H7LNDMCVY:1:2207:19298:23797 -VP2-06:112:H7LNDMCVY:1:2251:17255:21512 -VP2-06:112:H7LNDMCVY:1:2318:24352:36370 -VP2-06:112:H7LNDMCVY:1:2346:5059:22654 -VP2-06:112:H7LNDMCVY:1:2365:4273:36385 -VP2-06:112:H7LNDMCVY:1:2373:1958:22169 -VP2-06:112:H7LNDMCVY:1:2386:22399:28134 -VP2-06:112:H7LNDMCVY:1:2466:9435:19179 -VP2-06:112:H7LNDMCVY:1:1117:4616:35978 -VP2-06:112:H7LNDMCVY:1:1348:10520:12727 -VP2-06:112:H7LNDMCVY:1:2433:15646:19977 -VP2-06:112:H7LNDMCVY:1:2442:8296:21746 -VP2-06:112:H7LNDMCVY:1:1329:11487:13777 -VP2-06:112:H7LNDMCVY:1:1339:6623:17597 -VP2-06:112:H7LNDMCVY:1:1376:15167:22811 -VP2-06:112:H7LNDMCVY:1:1385:31729:9533 -VP2-06:112:H7LNDMCVY:1:2236:9959:8218 -VP2-06:112:H7LNDMCVY:1:2261:16034:29168 -VP2-06:112:H7LNDMCVY:1:2363:21730:13761 -VP2-06:112:H7LNDMCVY:1:2415:3025:16031 -VP2-06:112:H7LNDMCVY:1:1460:10086:31798 -VP2-06:112:H7LNDMCVY:1:2261:16034:29168 -VP2-06:112:H7LNDMCVY:1:1320:1940:19539 -VP2-06:112:H7LNDMCVY:1:1335:19443:33943 -VP2-06:112:H7LNDMCVY:1:1358:6424:1908 -VP2-06:112:H7LNDMCVY:1:1466:14398:23641 -VP2-06:112:H7LNDMCVY:1:2112:16920:18646 -VP2-06:112:H7LNDMCVY:1:2143:31213:4633 -VP2-06:112:H7LNDMCVY:1:2161:27199:19664 -VP2-06:112:H7LNDMCVY:1:2180:15890:18364 -VP2-06:112:H7LNDMCVY:1:2205:17119:5619 -VP2-06:112:H7LNDMCVY:1:2231:27516:25128 -VP2-06:112:H7LNDMCVY:1:2266:17616:15687 -VP2-06:112:H7LNDMCVY:1:2275:29324:17206 -VP2-06:112:H7LNDMCVY:1:2319:10592:2581 -VP2-06:112:H7LNDMCVY:1:2323:25021:4116 -VP2-06:112:H7LNDMCVY:1:2353:31566:22561 -VP2-06:112:H7LNDMCVY:1:2360:29017:29136 -VP2-06:112:H7LNDMCVY:1:2381:5575:31469 -VP2-06:112:H7LNDMCVY:1:1335:19443:33943 -VP2-06:112:H7LNDMCVY:1:2266:17616:15687 -VP2-06:112:H7LNDMCVY:1:2353:31566:22561 -VP2-06:112:H7LNDMCVY:1:1184:16504:24251 -VP2-06:112:H7LNDMCVY:1:1289:29026:19382 -VP2-06:112:H7LNDMCVY:1:1308:7708:12305 -VP2-06:112:H7LNDMCVY:1:1354:3830:33301 -VP2-06:112:H7LNDMCVY:1:1434:11740:22357 -VP2-06:112:H7LNDMCVY:1:2111:24858:26475 -VP2-06:112:H7LNDMCVY:1:2243:4589:6590 -VP2-06:112:H7LNDMCVY:1:2379:30653:26209 -VP2-06:112:H7LNDMCVY:1:2452:6352:11710 -VP2-06:112:H7LNDMCVY:1:2468:32470:5963 -VP2-06:112:H7LNDMCVY:1:2468:32470:5963 -VP2-06:112:H7LNDMCVY:1:1342:29912:27993 -VP2-06:112:H7LNDMCVY:1:2181:21531:35399 -VP2-06:112:H7LNDMCVY:1:1105:23231:10410 -VP2-06:112:H7LNDMCVY:1:2184:10330:6574 -VP2-06:112:H7LNDMCVY:1:2471:24632:31814 -VP2-06:112:H7LNDMCVY:1:1170:11008:32049 -VP2-06:112:H7LNDMCVY:1:1184:5068:13557 -VP2-06:112:H7LNDMCVY:1:1411:6831:30827 -VP2-06:112:H7LNDMCVY:1:1418:23448:4178 -VP2-06:112:H7LNDMCVY:1:1448:4444:9909 -VP2-06:112:H7LNDMCVY:1:1455:13530:29058 -VP2-06:112:H7LNDMCVY:1:2106:28989:8985 -VP2-06:112:H7LNDMCVY:1:2138:20482:6809 -VP2-06:112:H7LNDMCVY:1:2149:25039:30452 -VP2-06:112:H7LNDMCVY:1:2184:5466:11741 -VP2-06:112:H7LNDMCVY:1:2413:27407:26506 -VP2-06:112:H7LNDMCVY:1:2487:24189:30326 -VP2-06:112:H7LNDMCVY:1:2390:20907:25144 -VP2-06:112:H7LNDMCVY:1:1105:12518:34820 -VP2-06:112:H7LNDMCVY:1:1115:19822:12868 -VP2-06:112:H7LNDMCVY:1:1163:10456:12085 -VP2-06:112:H7LNDMCVY:1:1327:23366:30060 -VP2-06:112:H7LNDMCVY:1:1344:10574:29011 -VP2-06:112:H7LNDMCVY:1:1362:31295:11819 -VP2-06:112:H7LNDMCVY:1:1369:26106:4617 -VP2-06:112:H7LNDMCVY:1:1432:12057:28291 -VP2-06:112:H7LNDMCVY:1:1433:19569:4883 -VP2-06:112:H7LNDMCVY:1:2114:24523:3912 -VP2-06:112:H7LNDMCVY:1:2423:26684:32988 -VP2-06:112:H7LNDMCVY:1:1368:30933:4147 -VP2-06:112:H7LNDMCVY:1:1163:2528:6590 -VP2-06:112:H7LNDMCVY:1:1310:20455:31438 -VP2-06:112:H7LNDMCVY:1:1340:6659:35227 -VP2-06:112:H7LNDMCVY:1:2145:3170:33098 -VP2-06:112:H7LNDMCVY:1:2108:5041:4961 -VP2-06:112:H7LNDMCVY:1:1257:23728:3537 -VP2-06:112:H7LNDMCVY:1:1277:9751:20134 -VP2-06:112:H7LNDMCVY:1:1332:32642:19288 -VP2-06:112:H7LNDMCVY:1:1351:29550:14465 -VP2-06:112:H7LNDMCVY:1:1360:10077:11553 -VP2-06:112:H7LNDMCVY:1:1412:19099:16689 -VP2-06:112:H7LNDMCVY:1:1420:24930:18803 -VP2-06:112:H7LNDMCVY:1:1458:7500:34272 -VP2-06:112:H7LNDMCVY:1:1459:26711:3035 -VP2-06:112:H7LNDMCVY:1:1478:22263:15624 -VP2-06:112:H7LNDMCVY:1:2370:17553:16611 -VP2-06:112:H7LNDMCVY:1:2488:5493:15170 -VP2-06:112:H7LNDMCVY:1:2261:28275:36526 -VP2-06:112:H7LNDMCVY:1:1286:12491:13792 -VP2-06:112:H7LNDMCVY:1:1383:17390:23500 -VP2-06:112:H7LNDMCVY:1:1407:8938:17440 -VP2-06:112:H7LNDMCVY:1:1412:14904:34413 -VP2-06:112:H7LNDMCVY:1:1435:16342:24001 -VP2-06:112:H7LNDMCVY:1:1468:14344:36072 -VP2-06:112:H7LNDMCVY:1:2214:5945:14763 -VP2-06:112:H7LNDMCVY:1:2227:5276:24878 -VP2-06:112:H7LNDMCVY:1:2236:31629:8296 -VP2-06:112:H7LNDMCVY:1:2305:8730:3208 -VP2-06:112:H7LNDMCVY:1:2364:7048:35837 -VP2-06:112:H7LNDMCVY:1:2405:25807:25927 -VP2-06:112:H7LNDMCVY:1:2452:24623:32769 -VP2-06:112:H7LNDMCVY:1:2474:8712:36276 -VP2-06:112:H7LNDMCVY:1:2106:29595:10254 -VP2-06:112:H7LNDMCVY:1:1218:1723:6417 -VP2-06:112:H7LNDMCVY:1:1272:24089:34444 -VP2-06:112:H7LNDMCVY:1:1381:7012:2832 -VP2-06:112:H7LNDMCVY:1:1454:24044:13730 -VP2-06:112:H7LNDMCVY:1:2144:26802:24987 -VP2-06:112:H7LNDMCVY:1:2167:15438:10817 -VP2-06:112:H7LNDMCVY:1:2275:10474:15060 -VP2-06:112:H7LNDMCVY:1:2489:11641:27539 -VP2-06:112:H7LNDMCVY:1:1143:32289:29481 -VP2-06:112:H7LNDMCVY:1:1182:15302:26240 -VP2-06:112:H7LNDMCVY:1:2208:8612:19539 -VP2-06:112:H7LNDMCVY:1:2288:30807:36965 -VP2-06:112:H7LNDMCVY:1:2290:12581:24940 -VP2-06:112:H7LNDMCVY:1:1218:2736:26115 -VP2-06:112:H7LNDMCVY:1:1307:28962:8625 -VP2-06:112:H7LNDMCVY:1:1371:29152:2754 -VP2-06:112:H7LNDMCVY:1:1467:3188:35916 -VP2-06:112:H7LNDMCVY:1:2169:14751:2143 -VP2-06:112:H7LNDMCVY:1:2182:7129:9768 -VP2-06:112:H7LNDMCVY:1:2245:11397:21418 -VP2-06:112:H7LNDMCVY:1:2245:30120:27915 -VP2-06:112:H7LNDMCVY:1:2342:21739:5196 -VP2-06:112:H7LNDMCVY:1:1117:14922:18223 -VP2-06:112:H7LNDMCVY:1:1128:2320:6010 -VP2-06:112:H7LNDMCVY:1:1156:6162:6057 -VP2-06:112:H7LNDMCVY:1:1228:30300:30452 -VP2-06:112:H7LNDMCVY:1:1250:19063:23735 -VP2-06:112:H7LNDMCVY:1:1460:10086:31798 -VP2-06:112:H7LNDMCVY:1:2135:4481:24596 -VP2-06:112:H7LNDMCVY:1:2162:15772:2910 -VP2-06:112:H7LNDMCVY:1:2186:3332:19163 -VP2-06:112:H7LNDMCVY:1:2231:21947:32142 -VP2-06:112:H7LNDMCVY:1:2266:13865:4178 -VP2-06:112:H7LNDMCVY:1:2420:12264:35070 -VP2-06:112:H7LNDMCVY:1:1156:27407:3145 -VP2-06:112:H7LNDMCVY:1:1156:9932:13119 -VP2-06:112:H7LNDMCVY:1:1174:3007:15562 -VP2-06:112:H7LNDMCVY:1:1231:31665:15499 -VP2-06:112:H7LNDMCVY:1:1248:29640:31187 -VP2-06:112:H7LNDMCVY:1:1329:20356:3302 -VP2-06:112:H7LNDMCVY:1:1411:21368:31140 -VP2-06:112:H7LNDMCVY:1:1467:11406:32174 -VP2-06:112:H7LNDMCVY:1:2156:4092:28338 -VP2-06:112:H7LNDMCVY:1:2158:28782:10567 -VP2-06:112:H7LNDMCVY:1:2178:12933:9768 -VP2-06:112:H7LNDMCVY:1:2249:9308:3458 -VP2-06:112:H7LNDMCVY:1:2288:30807:36965 -VP2-06:112:H7LNDMCVY:1:2323:26684:21402 -VP2-06:112:H7LNDMCVY:1:2330:4146:14278 -VP2-06:112:H7LNDMCVY:1:2376:21468:34945 -VP2-06:112:H7LNDMCVY:1:2415:27082:35869 -VP2-06:112:H7LNDMCVY:1:2467:4318:33739 -VP2-06:112:H7LNDMCVY:1:2482:32289:19022 -VP2-06:112:H7LNDMCVY:1:1120:31123:12148 -VP2-06:112:H7LNDMCVY:1:1149:23963:35759 -VP2-06:112:H7LNDMCVY:1:1342:7536:8844 -VP2-06:112:H7LNDMCVY:1:1351:29496:15374 -VP2-06:112:H7LNDMCVY:1:1357:13901:32174 -VP2-06:112:H7LNDMCVY:1:1425:21612:20008 -VP2-06:112:H7LNDMCVY:1:1462:26621:10520 -VP2-06:112:H7LNDMCVY:1:2150:18665:1313 -VP2-06:112:H7LNDMCVY:1:2245:31051:3443 -VP2-06:112:H7LNDMCVY:1:2423:4544:9612 -VP2-06:112:H7LNDMCVY:1:1206:27679:10222 -VP2-06:112:H7LNDMCVY:1:2177:3974:20870 -VP2-06:112:H7LNDMCVY:1:2177:3974:20870 -VP2-06:112:H7LNDMCVY:1:1327:24777:26490 -VP2-06:112:H7LNDMCVY:1:1372:30391:12477 -VP2-06:112:H7LNDMCVY:1:1375:14290:33536 -VP2-06:112:H7LNDMCVY:1:1471:2953:7200 -VP2-06:112:H7LNDMCVY:1:2158:31132:13322 -VP2-06:112:H7LNDMCVY:1:2175:5692:28573 -VP2-06:112:H7LNDMCVY:1:2238:21178:23140 -VP2-06:112:H7LNDMCVY:1:2249:19452:15828 -VP2-06:112:H7LNDMCVY:1:2416:7500:23312 -VP2-06:112:H7LNDMCVY:1:2483:7030:5995 -VP2-06:112:H7LNDMCVY:1:1183:3549:12774 -VP2-06:112:H7LNDMCVY:1:1433:6072:25692 -VP2-06:112:H7LNDMCVY:1:2108:2347:15358 -VP2-06:112:H7LNDMCVY:1:2370:17553:16611 -VP2-06:112:H7LNDMCVY:1:1234:23375:11130 -VP2-06:112:H7LNDMCVY:1:1312:1624:6746 -VP2-06:112:H7LNDMCVY:1:1374:10782:9298 -VP2-06:112:H7LNDMCVY:1:2245:31656:21715 -VP2-06:112:H7LNDMCVY:1:2256:12201:24377 -VP2-06:112:H7LNDMCVY:1:1283:27308:18067 -VP2-06:112:H7LNDMCVY:1:2305:32443:2785 -VP2-06:112:H7LNDMCVY:1:1136:29966:29935 -VP2-06:112:H7LNDMCVY:1:1208:30264:10567 -VP2-06:112:H7LNDMCVY:1:1288:20292:14747 -VP2-06:112:H7LNDMCVY:1:1310:5981:27289 -VP2-06:112:H7LNDMCVY:1:1367:11749:33364 -VP2-06:112:H7LNDMCVY:1:1433:6072:25692 -VP2-06:112:H7LNDMCVY:1:1469:16116:36041 -VP2-06:112:H7LNDMCVY:1:1478:17083:21872 -VP2-06:112:H7LNDMCVY:1:2147:15872:23187 -VP2-06:112:H7LNDMCVY:1:2216:5981:5682 -VP2-06:112:H7LNDMCVY:1:2244:21187:19867 -VP2-06:112:H7LNDMCVY:1:2248:22345:5181 -VP2-06:112:H7LNDMCVY:1:2339:23583:25739 -VP2-06:112:H7LNDMCVY:1:2353:10511:2503 -VP2-06:112:H7LNDMCVY:1:2458:9362:9846 -VP2-06:112:H7LNDMCVY:1:2468:15944:28604 -VP2-06:112:H7LNDMCVY:1:2469:24379:9893 -VP2-06:112:H7LNDMCVY:1:1138:26368:4445 -VP2-06:112:H7LNDMCVY:1:1405:10538:20275 -VP2-06:112:H7LNDMCVY:1:2121:31901:34569 -VP2-06:112:H7LNDMCVY:1:2237:7536:13855 -VP2-06:112:H7LNDMCVY:1:2338:8449:22952 -VP2-06:112:H7LNDMCVY:1:2445:28203:1517 -VP2-06:112:H7LNDMCVY:1:1264:2003:35399 -VP2-06:112:H7LNDMCVY:1:1459:28827:10864 -VP2-06:112:H7LNDMCVY:1:1338:8088:3662 -VP2-06:112:H7LNDMCVY:1:1369:19651:36871 -VP2-06:112:H7LNDMCVY:1:2131:6696:24111 -VP2-06:112:H7LNDMCVY:1:2155:2365:5055 -VP2-06:112:H7LNDMCVY:1:2163:14660:24815 -VP2-06:112:H7LNDMCVY:1:2179:11812:24706 -VP2-06:112:H7LNDMCVY:1:2221:11849:14841 -VP2-06:112:H7LNDMCVY:1:2231:26765:29904 -VP2-06:112:H7LNDMCVY:1:2240:30011:5055 -VP2-06:112:H7LNDMCVY:1:2278:17002:20008 -VP2-06:112:H7LNDMCVY:1:2358:11650:18505 -VP2-06:112:H7LNDMCVY:1:2445:15528:22467 -VP2-06:112:H7LNDMCVY:1:2221:11849:14841 -VP2-06:112:H7LNDMCVY:1:1131:15899:6042 -VP2-06:112:H7LNDMCVY:1:1143:4743:35603 -VP2-06:112:H7LNDMCVY:1:1284:14172:20212 -VP2-06:112:H7LNDMCVY:1:1319:30129:36573 -VP2-06:112:H7LNDMCVY:1:1429:17969:35900 -VP2-06:112:H7LNDMCVY:1:1432:12364:20243 -VP2-06:112:H7LNDMCVY:1:1457:12491:14356 -VP2-06:112:H7LNDMCVY:1:1464:18584:32737 -VP2-06:112:H7LNDMCVY:1:1464:18584:32737 -VP2-06:112:H7LNDMCVY:1:2155:32244:29183 -VP2-06:112:H7LNDMCVY:1:2211:12545:35117 -VP2-06:112:H7LNDMCVY:1:2288:9796:22811 -VP2-06:112:H7LNDMCVY:1:2308:7988:3145 -VP2-06:112:H7LNDMCVY:1:2370:8187:22561 -VP2-06:112:H7LNDMCVY:1:1157:25717:5040 -VP2-06:112:H7LNDMCVY:1:1264:2003:35399 -VP2-06:112:H7LNDMCVY:1:1331:32705:34115 -VP2-06:112:H7LNDMCVY:1:1462:3965:8609 -VP2-06:112:H7LNDMCVY:1:1470:19994:3959 -VP2-06:112:H7LNDMCVY:1:2189:15167:14951 -VP2-06:112:H7LNDMCVY:1:2319:10592:2581 -VP2-06:112:H7LNDMCVY:1:1160:17626:11099 -VP2-06:112:H7LNDMCVY:1:1184:30409:6621 -VP2-06:112:H7LNDMCVY:1:1247:16893:27179 -VP2-06:112:H7LNDMCVY:1:1254:12301:8954 -VP2-06:112:H7LNDMCVY:1:1281:15863:2722 -VP2-06:112:H7LNDMCVY:1:1286:12418:4210 -VP2-06:112:H7LNDMCVY:1:1344:6849:26349 -VP2-06:112:H7LNDMCVY:1:2158:9833:17425 -VP2-06:112:H7LNDMCVY:1:2162:17436:18098 -VP2-06:112:H7LNDMCVY:1:2169:21694:3834 -VP2-06:112:H7LNDMCVY:1:2284:3396:35055 -VP2-06:112:H7LNDMCVY:1:2284:3396:35055 -VP2-06:112:H7LNDMCVY:1:2425:27407:4586 -VP2-06:112:H7LNDMCVY:1:2431:25880:2848 -VP2-06:112:H7LNDMCVY:1:2443:29270:3677 -VP2-06:112:H7LNDMCVY:1:2179:11812:24706 -VP2-06:112:H7LNDMCVY:1:1143:32289:29481 -VP2-06:112:H7LNDMCVY:1:1155:9489:36777 -VP2-06:112:H7LNDMCVY:1:1156:4372:31078 -VP2-06:112:H7LNDMCVY:1:1232:17815:10269 -VP2-06:112:H7LNDMCVY:1:1316:12274:24627 -VP2-06:112:H7LNDMCVY:1:1367:5430:8359 -VP2-06:112:H7LNDMCVY:1:1405:32045:2942 -VP2-06:112:H7LNDMCVY:1:2183:17734:22936 -VP2-06:112:H7LNDMCVY:1:2233:12210:2378 -VP2-06:112:H7LNDMCVY:1:2352:12500:1470 -VP2-06:112:H7LNDMCVY:1:2477:21404:13949 -VP2-06:112:H7LNDMCVY:1:1151:6732:33473 -VP2-06:112:H7LNDMCVY:1:1374:1434:12367 -VP2-06:112:H7LNDMCVY:1:1422:25726:34209 -VP2-06:112:H7LNDMCVY:1:2245:23113:15749 -VP2-06:112:H7LNDMCVY:1:2253:14714:6621 -VP2-06:112:H7LNDMCVY:1:2305:12509:7874 -VP2-06:112:H7LNDMCVY:1:2314:29261:32002 -VP2-06:112:H7LNDMCVY:1:2355:27615:27555 -VP2-06:112:H7LNDMCVY:1:2424:6533:10864 -VP2-06:112:H7LNDMCVY:1:2470:18177:12023 -VP2-06:112:H7LNDMCVY:1:1120:31123:12148 -VP2-06:112:H7LNDMCVY:1:1288:20292:14747 -VP2-06:112:H7LNDMCVY:1:1455:13530:29058 -VP2-06:112:H7LNDMCVY:1:1459:26711:3035 -VP2-06:112:H7LNDMCVY:1:2253:14714:6621 -VP2-06:112:H7LNDMCVY:1:1117:14217:13808 -VP2-06:112:H7LNDMCVY:1:1264:28583:30953 -VP2-06:112:H7LNDMCVY:1:1419:1741:36667 -VP2-06:112:H7LNDMCVY:1:1421:6894:6887 -VP2-06:112:H7LNDMCVY:1:2345:26404:11835 -VP2-06:112:H7LNDMCVY:1:2376:14181:35196 -VP2-06:112:H7LNDMCVY:1:2460:13720:13823 -VP2-06:112:H7LNDMCVY:1:1419:1741:36667 -VP2-06:112:H7LNDMCVY:1:2189:15167:14951 -VP2-06:112:H7LNDMCVY:1:1340:3405:5572 -VP2-06:112:H7LNDMCVY:1:1413:3278:11678 -VP2-06:112:H7LNDMCVY:1:2174:18105:17722 -VP2-06:112:H7LNDMCVY:1:2219:10366:6574 -VP2-06:112:H7LNDMCVY:1:2240:1434:3787 -VP2-06:112:H7LNDMCVY:1:2290:12581:24940 -VP2-06:112:H7LNDMCVY:1:2460:1741:16626 -VP2-06:112:H7LNDMCVY:1:2476:14253:8860 -VP2-06:112:H7LNDMCVY:1:1268:5547:2206 -VP2-06:112:H7LNDMCVY:1:1340:3405:5572 -VP2-06:112:H7LNDMCVY:1:1105:32533:4351 -VP2-06:112:H7LNDMCVY:1:1135:26765:16063 -VP2-06:112:H7LNDMCVY:1:1411:30825:5462 -VP2-06:112:H7LNDMCVY:1:1415:31222:11976 -VP2-06:112:H7LNDMCVY:1:2164:13865:6684 -VP2-06:112:H7LNDMCVY:1:2228:23710:3129 -VP2-06:112:H7LNDMCVY:1:2257:12617:21966 -VP2-06:112:H7LNDMCVY:1:2388:21414:17284 -VP2-06:112:H7LNDMCVY:1:1170:6352:1626 -VP2-06:112:H7LNDMCVY:1:1342:8947:8625 -VP2-06:112:H7LNDMCVY:1:2233:8639:7811 -VP2-06:112:H7LNDMCVY:1:2433:15646:19977 -VP2-06:112:H7LNDMCVY:1:2463:24207:31579 -VP2-06:112:H7LNDMCVY:1:1344:6849:26349 -VP2-06:112:H7LNDMCVY:1:2179:30969:32831 -VP2-06:112:H7LNDMCVY:1:2229:17607:14982 -VP2-06:112:H7LNDMCVY:1:2280:18457:26381 -VP2-06:112:H7LNDMCVY:1:2413:10565:1532 -VP2-06:112:H7LNDMCVY:1:1373:10004:35352 -VP2-06:112:H7LNDMCVY:1:1382:13367:12806 -VP2-06:112:H7LNDMCVY:1:2106:29595:10254 -VP2-06:112:H7LNDMCVY:1:2173:3549:8202 -VP2-06:112:H7LNDMCVY:1:2233:31006:33489 -VP2-06:112:H7LNDMCVY:1:2246:1913:20243 -VP2-06:112:H7LNDMCVY:1:2255:1642:35681 -VP2-06:112:H7LNDMCVY:1:2348:3730:5040 -VP2-06:112:H7LNDMCVY:1:2423:12246:6668 -VP2-06:112:H7LNDMCVY:1:2446:22896:7294 -VP2-06:112:H7LNDMCVY:1:1269:22417:16203 -VP2-06:112:H7LNDMCVY:1:1354:3712:5979 -VP2-06:112:H7LNDMCVY:1:2423:12246:6668 -VP2-06:112:H7LNDMCVY:1:1175:5249:11866 -VP2-06:112:H7LNDMCVY:1:1233:27218:35477 -VP2-06:112:H7LNDMCVY:1:1333:27661:36558 -VP2-06:112:H7LNDMCVY:1:1381:16993:11224 -VP2-06:112:H7LNDMCVY:1:2142:6623:12461 -VP2-06:112:H7LNDMCVY:1:2187:27887:12023 -VP2-06:112:H7LNDMCVY:1:2244:10592:10755 -VP2-06:112:H7LNDMCVY:1:2418:27588:27445 -VP2-06:112:H7LNDMCVY:1:2243:20654:6261 -VP2-06:112:H7LNDMCVY:1:1181:28809:11459 -VP2-06:112:H7LNDMCVY:1:1282:31693:31297 -VP2-06:112:H7LNDMCVY:1:1421:28537:31908 -VP2-06:112:H7LNDMCVY:1:1469:8504:36699 -VP2-06:112:H7LNDMCVY:1:2152:27416:29089 -VP2-06:112:H7LNDMCVY:1:2328:3730:31720 -VP2-06:112:H7LNDMCVY:1:2184:16984:27023 -VP2-06:112:H7LNDMCVY:1:2477:4634:17816 -VP2-06:112:H7LNDMCVY:1:1118:13512:22200 -VP2-06:112:H7LNDMCVY:1:1373:10004:35352 -VP2-06:112:H7LNDMCVY:1:1177:10312:5353 -VP2-06:112:H7LNDMCVY:1:1333:7419:23547 -VP2-06:112:H7LNDMCVY:1:1314:8675:13072 -VP2-06:112:H7LNDMCVY:1:1482:22218:10848 -VP2-06:112:H7LNDMCVY:1:1462:18647:9580 -VP2-06:112:H7LNDMCVY:1:1468:16206:19319 -VP2-06:112:H7LNDMCVY:1:2275:11704:11083 -VP2-06:112:H7LNDMCVY:1:1287:9498:11835 -VP2-06:112:H7LNDMCVY:1:2165:25753:25614 -VP2-06:112:H7LNDMCVY:1:2446:1145:23610 -VP2-06:112:H7LNDMCVY:1:2282:27570:8844 -VP2-06:112:H7LNDMCVY:1:1314:8675:13072 -VP2-06:112:H7LNDMCVY:1:1318:7844:7529 -VP2-06:112:H7LNDMCVY:1:1438:18150:7968 -VP2-06:112:H7LNDMCVY:1:2218:18285:26835 -VP2-06:112:H7LNDMCVY:1:2448:10999:4664 -VP2-06:112:H7LNDMCVY:1:1215:4896:14137 -VP2-06:112:H7LNDMCVY:1:2290:16134:23296 -VP2-06:112:H7LNDMCVY:1:2275:1099:31360 -VP2-06:112:H7LNDMCVY:1:2105:13313:33254 -VP2-06:112:H7LNDMCVY:1:2159:22697:10113 -VP2-06:112:H7LNDMCVY:1:2226:2682:29434 -VP2-06:112:H7LNDMCVY:1:1231:15266:6856 -VP2-06:112:H7LNDMCVY:1:1486:4698:21402 -VP2-06:112:H7LNDMCVY:1:2390:20907:25144 -VP2-06:112:H7LNDMCVY:1:2482:13178:10222 -VP2-06:112:H7LNDMCVY:1:1480:23222:34162 -VP2-06:112:H7LNDMCVY:1:2258:29243:29528 -VP2-06:112:H7LNDMCVY:1:1128:18629:8954 -VP2-06:112:H7LNDMCVY:1:1425:22525:17237 diff --git a/src/htslib-1.18/htscodecs/tests/names/nv2.names b/src/htslib-1.18/htscodecs/tests/names/nv2.names deleted file mode 100644 index 974286d..0000000 --- a/src/htslib-1.18/htscodecs/tests/names/nv2.names +++ /dev/null @@ -1,1000 +0,0 @@ -VP2-06:112:H7LNDMCVY:1:1105:1452:1063 -VP2-06:112:H7LNDMCVY:1:1105:1452:1063 -VP2-06:112:H7LNDMCVY:1:1105:1226:1078 -VP2-06:112:H7LNDMCVY:1:1105:1226:1078 -VP2-06:112:H7LNDMCVY:1:1105:1533:1078 -VP2-06:112:H7LNDMCVY:1:1105:1533:1078 -VP2-06:112:H7LNDMCVY:1:1105:1895:1078 -VP2-06:112:H7LNDMCVY:1:1105:1895:1078 -VP2-06:112:H7LNDMCVY:1:1105:2112:1078 -VP2-06:112:H7LNDMCVY:1:1105:2112:1078 -VP2-06:112:H7LNDMCVY:1:1105:2202:1078 -VP2-06:112:H7LNDMCVY:1:1105:2202:1078 -VP2-06:112:H7LNDMCVY:1:1105:2401:1078 -VP2-06:112:H7LNDMCVY:1:1105:2401:1078 -VP2-06:112:H7LNDMCVY:1:1105:3052:1078 -VP2-06:112:H7LNDMCVY:1:1105:3052:1078 -VP2-06:112:H7LNDMCVY:1:1105:3378:1078 -VP2-06:112:H7LNDMCVY:1:1105:3378:1078 -VP2-06:112:H7LNDMCVY:1:1105:3486:1078 -VP2-06:112:H7LNDMCVY:1:1105:3486:1078 -VP2-06:112:H7LNDMCVY:1:1105:3848:1078 -VP2-06:112:H7LNDMCVY:1:1105:3848:1078 -VP2-06:112:H7LNDMCVY:1:1105:3938:1078 -VP2-06:112:H7LNDMCVY:1:1105:3938:1078 -VP2-06:112:H7LNDMCVY:1:1105:4119:1078 -VP2-06:112:H7LNDMCVY:1:1105:4119:1078 -VP2-06:112:H7LNDMCVY:1:1105:4372:1078 -VP2-06:112:H7LNDMCVY:1:1105:4372:1078 -VP2-06:112:H7LNDMCVY:1:1105:4535:1078 -VP2-06:112:H7LNDMCVY:1:1105:4535:1078 -VP2-06:112:H7LNDMCVY:1:1105:4734:1078 -VP2-06:112:H7LNDMCVY:1:1105:4734:1078 -VP2-06:112:H7LNDMCVY:1:1105:4824:1078 -VP2-06:112:H7LNDMCVY:1:1105:4824:1078 -VP2-06:112:H7LNDMCVY:1:1105:5204:1078 -VP2-06:112:H7LNDMCVY:1:1105:5204:1078 -VP2-06:112:H7LNDMCVY:1:1105:5222:1078 -VP2-06:112:H7LNDMCVY:1:1105:5222:1078 -VP2-06:112:H7LNDMCVY:1:1105:5348:1078 -VP2-06:112:H7LNDMCVY:1:1105:5348:1078 -VP2-06:112:H7LNDMCVY:1:1105:5547:1078 -VP2-06:112:H7LNDMCVY:1:1105:5547:1078 -VP2-06:112:H7LNDMCVY:1:1105:1289:1094 -VP2-06:112:H7LNDMCVY:1:1105:1289:1094 -VP2-06:112:H7LNDMCVY:1:1105:1488:1094 -VP2-06:112:H7LNDMCVY:1:1105:1488:1094 -VP2-06:112:H7LNDMCVY:1:1105:2266:1094 -VP2-06:112:H7LNDMCVY:1:1105:2266:1094 -VP2-06:112:H7LNDMCVY:1:1105:2519:1094 -VP2-06:112:H7LNDMCVY:1:1105:2519:1094 -VP2-06:112:H7LNDMCVY:1:1105:3423:1094 -VP2-06:112:H7LNDMCVY:1:1105:3423:1094 -VP2-06:112:H7LNDMCVY:1:1105:3658:1094 -VP2-06:112:H7LNDMCVY:1:1105:3658:1094 -VP2-06:112:H7LNDMCVY:1:1105:3821:1094 -VP2-06:112:H7LNDMCVY:1:1105:3821:1094 -VP2-06:112:H7LNDMCVY:1:1105:4490:1094 -VP2-06:112:H7LNDMCVY:1:1105:4490:1094 -VP2-06:112:H7LNDMCVY:1:1105:4960:1094 -VP2-06:112:H7LNDMCVY:1:1105:4960:1094 -VP2-06:112:H7LNDMCVY:1:1105:5303:1094 -VP2-06:112:H7LNDMCVY:1:1105:5303:1094 -VP2-06:112:H7LNDMCVY:1:1105:5502:1094 -VP2-06:112:H7LNDMCVY:1:1105:5502:1094 -VP2-06:112:H7LNDMCVY:1:1105:5520:1094 -VP2-06:112:H7LNDMCVY:1:1105:5520:1094 -VP2-06:112:H7LNDMCVY:1:1105:5556:1094 -VP2-06:112:H7LNDMCVY:1:1105:5556:1094 -VP2-06:112:H7LNDMCVY:1:1105:6027:1094 -VP2-06:112:H7LNDMCVY:1:1105:6027:1094 -VP2-06:112:H7LNDMCVY:1:1105:6117:1094 -VP2-06:112:H7LNDMCVY:1:1105:6117:1094 -VP2-06:112:H7LNDMCVY:1:1105:6225:1094 -VP2-06:112:H7LNDMCVY:1:1105:6225:1094 -VP2-06:112:H7LNDMCVY:1:1105:6714:1094 -VP2-06:112:H7LNDMCVY:1:1105:6714:1094 -VP2-06:112:H7LNDMCVY:1:1105:6786:1094 -VP2-06:112:H7LNDMCVY:1:1105:6786:1094 -VP2-06:112:H7LNDMCVY:1:1105:6804:1094 -VP2-06:112:H7LNDMCVY:1:1105:6804:1094 -VP2-06:112:H7LNDMCVY:1:1105:6858:1094 -VP2-06:112:H7LNDMCVY:1:1105:6858:1094 -VP2-06:112:H7LNDMCVY:1:1105:7003:1094 -VP2-06:112:H7LNDMCVY:1:1105:7003:1094 -VP2-06:112:H7LNDMCVY:1:1105:7256:1094 -VP2-06:112:H7LNDMCVY:1:1105:7256:1094 -VP2-06:112:H7LNDMCVY:1:1105:7346:1094 -VP2-06:112:H7LNDMCVY:1:1105:7346:1094 -VP2-06:112:H7LNDMCVY:1:1105:7383:1094 -VP2-06:112:H7LNDMCVY:1:1105:7383:1094 -VP2-06:112:H7LNDMCVY:1:1105:7618:1094 -VP2-06:112:H7LNDMCVY:1:1105:7618:1094 -VP2-06:112:H7LNDMCVY:1:1105:7690:1094 -VP2-06:112:H7LNDMCVY:1:1105:7690:1094 -VP2-06:112:H7LNDMCVY:1:1105:7708:1094 -VP2-06:112:H7LNDMCVY:1:1105:7708:1094 -VP2-06:112:H7LNDMCVY:1:1105:7943:1094 -VP2-06:112:H7LNDMCVY:1:1105:7943:1094 -VP2-06:112:H7LNDMCVY:1:1105:8196:1094 -VP2-06:112:H7LNDMCVY:1:1105:8196:1094 -VP2-06:112:H7LNDMCVY:1:1105:8250:1094 -VP2-06:112:H7LNDMCVY:1:1105:8250:1094 -VP2-06:112:H7LNDMCVY:1:1105:9227:1094 -VP2-06:112:H7LNDMCVY:1:1105:9227:1094 -VP2-06:112:H7LNDMCVY:1:1105:9335:1094 -VP2-06:112:H7LNDMCVY:1:1105:9335:1094 -VP2-06:112:H7LNDMCVY:1:1105:9516:1094 -VP2-06:112:H7LNDMCVY:1:1105:9516:1094 -VP2-06:112:H7LNDMCVY:1:1105:1461:1110 -VP2-06:112:H7LNDMCVY:1:1105:1461:1110 -VP2-06:112:H7LNDMCVY:1:1105:1660:1110 -VP2-06:112:H7LNDMCVY:1:1105:1660:1110 -VP2-06:112:H7LNDMCVY:1:1105:2311:1110 -VP2-06:112:H7LNDMCVY:1:1105:2311:1110 -VP2-06:112:H7LNDMCVY:1:1105:2600:1110 -VP2-06:112:H7LNDMCVY:1:1105:2600:1110 -VP2-06:112:H7LNDMCVY:1:1105:3233:1110 -VP2-06:112:H7LNDMCVY:1:1105:3233:1110 -VP2-06:112:H7LNDMCVY:1:1105:4047:1110 -VP2-06:112:H7LNDMCVY:1:1105:4047:1110 -VP2-06:112:H7LNDMCVY:1:1105:4408:1110 -VP2-06:112:H7LNDMCVY:1:1105:4408:1110 -VP2-06:112:H7LNDMCVY:1:1105:4607:1110 -VP2-06:112:H7LNDMCVY:1:1105:4607:1110 -VP2-06:112:H7LNDMCVY:1:1105:4987:1110 -VP2-06:112:H7LNDMCVY:1:1105:4987:1110 -VP2-06:112:H7LNDMCVY:1:1105:5186:1110 -VP2-06:112:H7LNDMCVY:1:1105:5186:1110 -VP2-06:112:H7LNDMCVY:1:1105:5258:1110 -VP2-06:112:H7LNDMCVY:1:1105:5258:1110 -VP2-06:112:H7LNDMCVY:1:1105:5330:1110 -VP2-06:112:H7LNDMCVY:1:1105:5330:1110 -VP2-06:112:H7LNDMCVY:1:1105:5493:1110 -VP2-06:112:H7LNDMCVY:1:1105:5493:1110 -VP2-06:112:H7LNDMCVY:1:1105:6289:1110 -VP2-06:112:H7LNDMCVY:1:1105:6289:1110 -VP2-06:112:H7LNDMCVY:1:1105:6361:1110 -VP2-06:112:H7LNDMCVY:1:1105:6361:1110 -VP2-06:112:H7LNDMCVY:1:1105:7374:1110 -VP2-06:112:H7LNDMCVY:1:1105:7374:1110 -VP2-06:112:H7LNDMCVY:1:1105:7536:1110 -VP2-06:112:H7LNDMCVY:1:1105:7536:1110 -VP2-06:112:H7LNDMCVY:1:1105:7898:1110 -VP2-06:112:H7LNDMCVY:1:1105:7898:1110 -VP2-06:112:H7LNDMCVY:1:1105:8332:1110 -VP2-06:112:H7LNDMCVY:1:1105:8332:1110 -VP2-06:112:H7LNDMCVY:1:1105:8856:1110 -VP2-06:112:H7LNDMCVY:1:1105:8856:1110 -VP2-06:112:H7LNDMCVY:1:1105:8874:1110 -VP2-06:112:H7LNDMCVY:1:1105:8874:1110 -VP2-06:112:H7LNDMCVY:1:1105:9109:1110 -VP2-06:112:H7LNDMCVY:1:1105:9109:1110 -VP2-06:112:H7LNDMCVY:1:1105:9127:1110 -VP2-06:112:H7LNDMCVY:1:1105:9127:1110 -VP2-06:112:H7LNDMCVY:1:1105:9399:1110 -VP2-06:112:H7LNDMCVY:1:1105:9399:1110 -VP2-06:112:H7LNDMCVY:1:1105:9471:1110 -VP2-06:112:H7LNDMCVY:1:1105:9471:1110 -VP2-06:112:H7LNDMCVY:1:1105:9507:1110 -VP2-06:112:H7LNDMCVY:1:1105:9507:1110 -VP2-06:112:H7LNDMCVY:1:1105:9616:1110 -VP2-06:112:H7LNDMCVY:1:1105:9616:1110 -VP2-06:112:H7LNDMCVY:1:1105:9688:1110 -VP2-06:112:H7LNDMCVY:1:1105:9688:1110 -VP2-06:112:H7LNDMCVY:1:1105:10556:1110 -VP2-06:112:H7LNDMCVY:1:1105:10556:1110 -VP2-06:112:H7LNDMCVY:1:1105:10628:1110 -VP2-06:112:H7LNDMCVY:1:1105:10628:1110 -VP2-06:112:H7LNDMCVY:1:1105:11840:1110 -VP2-06:112:H7LNDMCVY:1:1105:11840:1110 -VP2-06:112:H7LNDMCVY:1:1105:12038:1110 -VP2-06:112:H7LNDMCVY:1:1105:12038:1110 -VP2-06:112:H7LNDMCVY:1:1105:12219:1110 -VP2-06:112:H7LNDMCVY:1:1105:12219:1110 -VP2-06:112:H7LNDMCVY:1:1105:12563:1110 -VP2-06:112:H7LNDMCVY:1:1105:12563:1110 -VP2-06:112:H7LNDMCVY:1:1105:12943:1110 -VP2-06:112:H7LNDMCVY:1:1105:12943:1110 -VP2-06:112:H7LNDMCVY:1:1105:12961:1110 -VP2-06:112:H7LNDMCVY:1:1105:12961:1110 -VP2-06:112:H7LNDMCVY:1:1105:13051:1110 -VP2-06:112:H7LNDMCVY:1:1105:13051:1110 -VP2-06:112:H7LNDMCVY:1:1105:13141:1110 -VP2-06:112:H7LNDMCVY:1:1105:13141:1110 -VP2-06:112:H7LNDMCVY:1:1105:13340:1110 -VP2-06:112:H7LNDMCVY:1:1105:13340:1110 -VP2-06:112:H7LNDMCVY:1:1105:1127:1125 -VP2-06:112:H7LNDMCVY:1:1105:1127:1125 -VP2-06:112:H7LNDMCVY:1:1105:1597:1125 -VP2-06:112:H7LNDMCVY:1:1105:1597:1125 -VP2-06:112:H7LNDMCVY:1:1105:1723:1125 -VP2-06:112:H7LNDMCVY:1:1105:1723:1125 -VP2-06:112:H7LNDMCVY:1:1105:1904:1125 -VP2-06:112:H7LNDMCVY:1:1105:1904:1125 -VP2-06:112:H7LNDMCVY:1:1105:2230:1125 -VP2-06:112:H7LNDMCVY:1:1105:2230:1125 -VP2-06:112:H7LNDMCVY:1:1105:2284:1125 -VP2-06:112:H7LNDMCVY:1:1105:2284:1125 -VP2-06:112:H7LNDMCVY:1:1105:2302:1125 -VP2-06:112:H7LNDMCVY:1:1105:2302:1125 -VP2-06:112:H7LNDMCVY:1:1105:2519:1125 -VP2-06:112:H7LNDMCVY:1:1105:2519:1125 -VP2-06:112:H7LNDMCVY:1:1105:3115:1125 -VP2-06:112:H7LNDMCVY:1:1105:3115:1125 -VP2-06:112:H7LNDMCVY:1:1105:3332:1125 -VP2-06:112:H7LNDMCVY:1:1105:3332:1125 -VP2-06:112:H7LNDMCVY:1:1105:3495:1125 -VP2-06:112:H7LNDMCVY:1:1105:3495:1125 -VP2-06:112:H7LNDMCVY:1:1105:3604:1125 -VP2-06:112:H7LNDMCVY:1:1105:3604:1125 -VP2-06:112:H7LNDMCVY:1:1105:4634:1125 -VP2-06:112:H7LNDMCVY:1:1105:4634:1125 -VP2-06:112:H7LNDMCVY:1:1105:5593:1125 -VP2-06:112:H7LNDMCVY:1:1105:5593:1125 -VP2-06:112:H7LNDMCVY:1:1105:5719:1125 -VP2-06:112:H7LNDMCVY:1:1105:5719:1125 -VP2-06:112:H7LNDMCVY:1:1105:6280:1125 -VP2-06:112:H7LNDMCVY:1:1105:6280:1125 -VP2-06:112:H7LNDMCVY:1:1105:6876:1125 -VP2-06:112:H7LNDMCVY:1:1105:6876:1125 -VP2-06:112:H7LNDMCVY:1:1105:7256:1125 -VP2-06:112:H7LNDMCVY:1:1105:7256:1125 -VP2-06:112:H7LNDMCVY:1:1105:7365:1125 -VP2-06:112:H7LNDMCVY:1:1105:7365:1125 -VP2-06:112:H7LNDMCVY:1:1105:7600:1125 -VP2-06:112:H7LNDMCVY:1:1105:7600:1125 -VP2-06:112:H7LNDMCVY:1:1105:7672:1125 -VP2-06:112:H7LNDMCVY:1:1105:7672:1125 -VP2-06:112:H7LNDMCVY:1:1105:7726:1125 -VP2-06:112:H7LNDMCVY:1:1105:7726:1125 -VP2-06:112:H7LNDMCVY:1:1105:7853:1125 -VP2-06:112:H7LNDMCVY:1:1105:7853:1125 -VP2-06:112:H7LNDMCVY:1:1105:7961:1125 -VP2-06:112:H7LNDMCVY:1:1105:7961:1125 -VP2-06:112:H7LNDMCVY:1:1105:8070:1125 -VP2-06:112:H7LNDMCVY:1:1105:8070:1125 -VP2-06:112:H7LNDMCVY:1:1105:8088:1125 -VP2-06:112:H7LNDMCVY:1:1105:8088:1125 -VP2-06:112:H7LNDMCVY:1:1105:8196:1125 -VP2-06:112:H7LNDMCVY:1:1105:8196:1125 -VP2-06:112:H7LNDMCVY:1:1105:8413:1125 -VP2-06:112:H7LNDMCVY:1:1105:8413:1125 -VP2-06:112:H7LNDMCVY:1:1105:8467:1125 -VP2-06:112:H7LNDMCVY:1:1105:8467:1125 -VP2-06:112:H7LNDMCVY:1:1105:9191:1125 -VP2-06:112:H7LNDMCVY:1:1105:9191:1125 -VP2-06:112:H7LNDMCVY:1:1105:9335:1125 -VP2-06:112:H7LNDMCVY:1:1105:9335:1125 -VP2-06:112:H7LNDMCVY:1:1105:9353:1125 -VP2-06:112:H7LNDMCVY:1:1105:9353:1125 -VP2-06:112:H7LNDMCVY:1:1105:10239:1125 -VP2-06:112:H7LNDMCVY:1:1105:10239:1125 -VP2-06:112:H7LNDMCVY:1:1105:10547:1125 -VP2-06:112:H7LNDMCVY:1:1105:10547:1125 -VP2-06:112:H7LNDMCVY:1:1105:11451:1125 -VP2-06:112:H7LNDMCVY:1:1105:11451:1125 -VP2-06:112:H7LNDMCVY:1:1105:11523:1125 -VP2-06:112:H7LNDMCVY:1:1105:11523:1125 -VP2-06:112:H7LNDMCVY:1:1105:11595:1125 -VP2-06:112:H7LNDMCVY:1:1105:11595:1125 -VP2-06:112:H7LNDMCVY:1:1105:11794:1125 -VP2-06:112:H7LNDMCVY:1:1105:11794:1125 -VP2-06:112:H7LNDMCVY:1:1105:11921:1125 -VP2-06:112:H7LNDMCVY:1:1105:11921:1125 -VP2-06:112:H7LNDMCVY:1:1105:12301:1125 -VP2-06:112:H7LNDMCVY:1:1105:12301:1125 -VP2-06:112:H7LNDMCVY:1:1105:12409:1125 -VP2-06:112:H7LNDMCVY:1:1105:12409:1125 -VP2-06:112:H7LNDMCVY:1:1105:12500:1125 -VP2-06:112:H7LNDMCVY:1:1105:12500:1125 -VP2-06:112:H7LNDMCVY:1:1105:13024:1125 -VP2-06:112:H7LNDMCVY:1:1105:13024:1125 -VP2-06:112:H7LNDMCVY:1:1105:13096:1125 -VP2-06:112:H7LNDMCVY:1:1105:13096:1125 -VP2-06:112:H7LNDMCVY:1:1105:13223:1125 -VP2-06:112:H7LNDMCVY:1:1105:13223:1125 -VP2-06:112:H7LNDMCVY:1:1105:13512:1125 -VP2-06:112:H7LNDMCVY:1:1105:13512:1125 -VP2-06:112:H7LNDMCVY:1:1105:13621:1125 -VP2-06:112:H7LNDMCVY:1:1105:13621:1125 -VP2-06:112:H7LNDMCVY:1:1105:13657:1125 -VP2-06:112:H7LNDMCVY:1:1105:13657:1125 -VP2-06:112:H7LNDMCVY:1:1105:13675:1125 -VP2-06:112:H7LNDMCVY:1:1105:13675:1125 -VP2-06:112:H7LNDMCVY:1:1105:13838:1125 -VP2-06:112:H7LNDMCVY:1:1105:13838:1125 -VP2-06:112:H7LNDMCVY:1:1105:14181:1125 -VP2-06:112:H7LNDMCVY:1:1105:14181:1125 -VP2-06:112:H7LNDMCVY:1:1105:14543:1125 -VP2-06:112:H7LNDMCVY:1:1105:14543:1125 -VP2-06:112:H7LNDMCVY:1:1105:14597:1125 -VP2-06:112:H7LNDMCVY:1:1105:14597:1125 -VP2-06:112:H7LNDMCVY:1:1105:14868:1125 -VP2-06:112:H7LNDMCVY:1:1105:14868:1125 -VP2-06:112:H7LNDMCVY:1:1105:15067:1125 -VP2-06:112:H7LNDMCVY:1:1105:15067:1125 -VP2-06:112:H7LNDMCVY:1:1105:15085:1125 -VP2-06:112:H7LNDMCVY:1:1105:15085:1125 -VP2-06:112:H7LNDMCVY:1:1105:15483:1125 -VP2-06:112:H7LNDMCVY:1:1105:15483:1125 -VP2-06:112:H7LNDMCVY:1:1105:15664:1125 -VP2-06:112:H7LNDMCVY:1:1105:15664:1125 -VP2-06:112:H7LNDMCVY:1:1105:15863:1125 -VP2-06:112:H7LNDMCVY:1:1105:15863:1125 -VP2-06:112:H7LNDMCVY:1:1105:16025:1125 -VP2-06:112:H7LNDMCVY:1:1105:16025:1125 -VP2-06:112:H7LNDMCVY:1:1105:16043:1125 -VP2-06:112:H7LNDMCVY:1:1105:16043:1125 -VP2-06:112:H7LNDMCVY:1:1105:16134:1125 -VP2-06:112:H7LNDMCVY:1:1105:16134:1125 -VP2-06:112:H7LNDMCVY:1:1105:16170:1125 -VP2-06:112:H7LNDMCVY:1:1105:16170:1125 -VP2-06:112:H7LNDMCVY:1:1105:16676:1125 -VP2-06:112:H7LNDMCVY:1:1105:16676:1125 -VP2-06:112:H7LNDMCVY:1:1105:16929:1125 -VP2-06:112:H7LNDMCVY:1:1105:16929:1125 -VP2-06:112:H7LNDMCVY:1:1105:16966:1125 -VP2-06:112:H7LNDMCVY:1:1105:16966:1125 -VP2-06:112:H7LNDMCVY:1:1105:1642:1141 -VP2-06:112:H7LNDMCVY:1:1105:1642:1141 -VP2-06:112:H7LNDMCVY:1:1105:1913:1141 -VP2-06:112:H7LNDMCVY:1:1105:1913:1141 -VP2-06:112:H7LNDMCVY:1:1105:2130:1141 -VP2-06:112:H7LNDMCVY:1:1105:2130:1141 -VP2-06:112:H7LNDMCVY:1:1105:2239:1141 -VP2-06:112:H7LNDMCVY:1:1105:2239:1141 -VP2-06:112:H7LNDMCVY:1:1105:2600:1141 -VP2-06:112:H7LNDMCVY:1:1105:2600:1141 -VP2-06:112:H7LNDMCVY:1:1105:2636:1141 -VP2-06:112:H7LNDMCVY:1:1105:2636:1141 -VP2-06:112:H7LNDMCVY:1:1105:2727:1141 -VP2-06:112:H7LNDMCVY:1:1105:2727:1141 -VP2-06:112:H7LNDMCVY:1:1105:2908:1141 -VP2-06:112:H7LNDMCVY:1:1105:2908:1141 -VP2-06:112:H7LNDMCVY:1:1105:3034:1141 -VP2-06:112:H7LNDMCVY:1:1105:3034:1141 -VP2-06:112:H7LNDMCVY:1:1105:3233:1141 -VP2-06:112:H7LNDMCVY:1:1105:3233:1141 -VP2-06:112:H7LNDMCVY:1:1105:3323:1141 -VP2-06:112:H7LNDMCVY:1:1105:3323:1141 -VP2-06:112:H7LNDMCVY:1:1105:3378:1141 -VP2-06:112:H7LNDMCVY:1:1105:3378:1141 -VP2-06:112:H7LNDMCVY:1:1105:4625:1141 -VP2-06:112:H7LNDMCVY:1:1105:4625:1141 -VP2-06:112:H7LNDMCVY:1:1105:4752:1141 -VP2-06:112:H7LNDMCVY:1:1105:4752:1141 -VP2-06:112:H7LNDMCVY:1:1105:4788:1141 -VP2-06:112:H7LNDMCVY:1:1105:4788:1141 -VP2-06:112:H7LNDMCVY:1:1105:4860:1141 -VP2-06:112:H7LNDMCVY:1:1105:4860:1141 -VP2-06:112:H7LNDMCVY:1:1105:5294:1141 -VP2-06:112:H7LNDMCVY:1:1105:5294:1141 -VP2-06:112:H7LNDMCVY:1:1105:5584:1141 -VP2-06:112:H7LNDMCVY:1:1105:5584:1141 -VP2-06:112:H7LNDMCVY:1:1105:5764:1141 -VP2-06:112:H7LNDMCVY:1:1105:5764:1141 -VP2-06:112:H7LNDMCVY:1:1105:5963:1141 -VP2-06:112:H7LNDMCVY:1:1105:5963:1141 -VP2-06:112:H7LNDMCVY:1:1105:6289:1141 -VP2-06:112:H7LNDMCVY:1:1105:6289:1141 -VP2-06:112:H7LNDMCVY:1:1105:6470:1141 -VP2-06:112:H7LNDMCVY:1:1105:6470:1141 -VP2-06:112:H7LNDMCVY:1:1105:6867:1141 -VP2-06:112:H7LNDMCVY:1:1105:6867:1141 -VP2-06:112:H7LNDMCVY:1:1105:7139:1141 -VP2-06:112:H7LNDMCVY:1:1105:7139:1141 -VP2-06:112:H7LNDMCVY:1:1105:7283:1141 -VP2-06:112:H7LNDMCVY:1:1105:7283:1141 -VP2-06:112:H7LNDMCVY:1:1105:7374:1141 -VP2-06:112:H7LNDMCVY:1:1105:7374:1141 -VP2-06:112:H7LNDMCVY:1:1105:7663:1141 -VP2-06:112:H7LNDMCVY:1:1105:7663:1141 -VP2-06:112:H7LNDMCVY:1:1105:7880:1141 -VP2-06:112:H7LNDMCVY:1:1105:7880:1141 -VP2-06:112:H7LNDMCVY:1:1105:7970:1141 -VP2-06:112:H7LNDMCVY:1:1105:7970:1141 -VP2-06:112:H7LNDMCVY:1:1105:8458:1141 -VP2-06:112:H7LNDMCVY:1:1105:8458:1141 -VP2-06:112:H7LNDMCVY:1:1105:8657:1141 -VP2-06:112:H7LNDMCVY:1:1105:8657:1141 -VP2-06:112:H7LNDMCVY:1:1105:8802:1141 -VP2-06:112:H7LNDMCVY:1:1105:8802:1141 -VP2-06:112:H7LNDMCVY:1:1105:8856:1141 -VP2-06:112:H7LNDMCVY:1:1105:8856:1141 -VP2-06:112:H7LNDMCVY:1:1105:8892:1141 -VP2-06:112:H7LNDMCVY:1:1105:8892:1141 -VP2-06:112:H7LNDMCVY:1:1105:8983:1141 -VP2-06:112:H7LNDMCVY:1:1105:8983:1141 -VP2-06:112:H7LNDMCVY:1:1105:9236:1141 -VP2-06:112:H7LNDMCVY:1:1105:9236:1141 -VP2-06:112:H7LNDMCVY:1:1105:9290:1141 -VP2-06:112:H7LNDMCVY:1:1105:9290:1141 -VP2-06:112:H7LNDMCVY:1:1105:9326:1141 -VP2-06:112:H7LNDMCVY:1:1105:9326:1141 -VP2-06:112:H7LNDMCVY:1:1105:9598:1141 -VP2-06:112:H7LNDMCVY:1:1105:9598:1141 -VP2-06:112:H7LNDMCVY:1:1105:9634:1141 -VP2-06:112:H7LNDMCVY:1:1105:9634:1141 -VP2-06:112:H7LNDMCVY:1:1105:9652:1141 -VP2-06:112:H7LNDMCVY:1:1105:9652:1141 -VP2-06:112:H7LNDMCVY:1:1105:9869:1141 -VP2-06:112:H7LNDMCVY:1:1105:9869:1141 -VP2-06:112:H7LNDMCVY:1:1105:10050:1141 -VP2-06:112:H7LNDMCVY:1:1105:10050:1141 -VP2-06:112:H7LNDMCVY:1:1105:10194:1141 -VP2-06:112:H7LNDMCVY:1:1105:10194:1141 -VP2-06:112:H7LNDMCVY:1:1105:10465:1141 -VP2-06:112:H7LNDMCVY:1:1105:10465:1141 -VP2-06:112:H7LNDMCVY:1:1105:11008:1141 -VP2-06:112:H7LNDMCVY:1:1105:11008:1141 -VP2-06:112:H7LNDMCVY:1:1105:11406:1141 -VP2-06:112:H7LNDMCVY:1:1105:11406:1141 -VP2-06:112:H7LNDMCVY:1:1105:11623:1141 -VP2-06:112:H7LNDMCVY:1:1105:11623:1141 -VP2-06:112:H7LNDMCVY:1:1105:11659:1141 -VP2-06:112:H7LNDMCVY:1:1105:11659:1141 -VP2-06:112:H7LNDMCVY:1:1105:12020:1141 -VP2-06:112:H7LNDMCVY:1:1105:12020:1141 -VP2-06:112:H7LNDMCVY:1:1105:12075:1141 -VP2-06:112:H7LNDMCVY:1:1105:12075:1141 -VP2-06:112:H7LNDMCVY:1:1105:12292:1141 -VP2-06:112:H7LNDMCVY:1:1105:12292:1141 -VP2-06:112:H7LNDMCVY:1:1105:12382:1141 -VP2-06:112:H7LNDMCVY:1:1105:12382:1141 -VP2-06:112:H7LNDMCVY:1:1105:12400:1141 -VP2-06:112:H7LNDMCVY:1:1105:12400:1141 -VP2-06:112:H7LNDMCVY:1:1105:12563:1141 -VP2-06:112:H7LNDMCVY:1:1105:12563:1141 -VP2-06:112:H7LNDMCVY:1:1105:12653:1141 -VP2-06:112:H7LNDMCVY:1:1105:12653:1141 -VP2-06:112:H7LNDMCVY:1:1105:12671:1141 -VP2-06:112:H7LNDMCVY:1:1105:12671:1141 -VP2-06:112:H7LNDMCVY:1:1105:12870:1141 -VP2-06:112:H7LNDMCVY:1:1105:12870:1141 -VP2-06:112:H7LNDMCVY:1:1105:13069:1141 -VP2-06:112:H7LNDMCVY:1:1105:13069:1141 -VP2-06:112:H7LNDMCVY:1:1105:13250:1141 -VP2-06:112:H7LNDMCVY:1:1105:13250:1141 -VP2-06:112:H7LNDMCVY:1:1105:13630:1141 -VP2-06:112:H7LNDMCVY:1:1105:13630:1141 -VP2-06:112:H7LNDMCVY:1:1105:13648:1141 -VP2-06:112:H7LNDMCVY:1:1105:13648:1141 -VP2-06:112:H7LNDMCVY:1:1105:13720:1141 -VP2-06:112:H7LNDMCVY:1:1105:13720:1141 -VP2-06:112:H7LNDMCVY:1:1105:13865:1141 -VP2-06:112:H7LNDMCVY:1:1105:13865:1141 -VP2-06:112:H7LNDMCVY:1:1105:14009:1141 -VP2-06:112:H7LNDMCVY:1:1105:14009:1141 -VP2-06:112:H7LNDMCVY:1:1105:14244:1141 -VP2-06:112:H7LNDMCVY:1:1105:14244:1141 -VP2-06:112:H7LNDMCVY:1:1105:14389:1141 -VP2-06:112:H7LNDMCVY:1:1105:14389:1141 -VP2-06:112:H7LNDMCVY:1:1105:14588:1141 -VP2-06:112:H7LNDMCVY:1:1105:14588:1141 -VP2-06:112:H7LNDMCVY:1:1105:14606:1141 -VP2-06:112:H7LNDMCVY:1:1105:14606:1141 -VP2-06:112:H7LNDMCVY:1:1105:14624:1141 -VP2-06:112:H7LNDMCVY:1:1105:14624:1141 -VP2-06:112:H7LNDMCVY:1:1105:14660:1141 -VP2-06:112:H7LNDMCVY:1:1105:14660:1141 -VP2-06:112:H7LNDMCVY:1:1105:14986:1141 -VP2-06:112:H7LNDMCVY:1:1105:14986:1141 -VP2-06:112:H7LNDMCVY:1:1105:15402:1141 -VP2-06:112:H7LNDMCVY:1:1105:15402:1141 -VP2-06:112:H7LNDMCVY:1:1105:15492:1141 -VP2-06:112:H7LNDMCVY:1:1105:15492:1141 -VP2-06:112:H7LNDMCVY:1:1105:15510:1141 -VP2-06:112:H7LNDMCVY:1:1105:15510:1141 -VP2-06:112:H7LNDMCVY:1:1105:16161:1141 -VP2-06:112:H7LNDMCVY:1:1105:16161:1141 -VP2-06:112:H7LNDMCVY:1:1105:16269:1141 -VP2-06:112:H7LNDMCVY:1:1105:16269:1141 -VP2-06:112:H7LNDMCVY:1:1105:16414:1141 -VP2-06:112:H7LNDMCVY:1:1105:16414:1141 -VP2-06:112:H7LNDMCVY:1:1105:16667:1141 -VP2-06:112:H7LNDMCVY:1:1105:16667:1141 -VP2-06:112:H7LNDMCVY:1:1105:16884:1141 -VP2-06:112:H7LNDMCVY:1:1105:16884:1141 -VP2-06:112:H7LNDMCVY:1:1105:17011:1141 -VP2-06:112:H7LNDMCVY:1:1105:17011:1141 -VP2-06:112:H7LNDMCVY:1:1105:17354:1141 -VP2-06:112:H7LNDMCVY:1:1105:17354:1141 -VP2-06:112:H7LNDMCVY:1:1105:18005:1141 -VP2-06:112:H7LNDMCVY:1:1105:18005:1141 -VP2-06:112:H7LNDMCVY:1:1105:18096:1141 -VP2-06:112:H7LNDMCVY:1:1105:18096:1141 -VP2-06:112:H7LNDMCVY:1:1105:18313:1141 -VP2-06:112:H7LNDMCVY:1:1105:18313:1141 -VP2-06:112:H7LNDMCVY:1:1105:18493:1141 -VP2-06:112:H7LNDMCVY:1:1105:18493:1141 -VP2-06:112:H7LNDMCVY:1:1105:18710:1141 -VP2-06:112:H7LNDMCVY:1:1105:18710:1141 -VP2-06:112:H7LNDMCVY:1:1105:19217:1141 -VP2-06:112:H7LNDMCVY:1:1105:19217:1141 -VP2-06:112:H7LNDMCVY:1:1105:19614:1141 -VP2-06:112:H7LNDMCVY:1:1105:19614:1141 -VP2-06:112:H7LNDMCVY:1:1105:19904:1141 -VP2-06:112:H7LNDMCVY:1:1105:19904:1141 -VP2-06:112:H7LNDMCVY:1:1105:20085:1141 -VP2-06:112:H7LNDMCVY:1:1105:20085:1141 -VP2-06:112:H7LNDMCVY:1:1105:20193:1141 -VP2-06:112:H7LNDMCVY:1:1105:20193:1141 -VP2-06:112:H7LNDMCVY:1:1105:20573:1141 -VP2-06:112:H7LNDMCVY:1:1105:20573:1141 -VP2-06:112:H7LNDMCVY:1:1105:20772:1141 -VP2-06:112:H7LNDMCVY:1:1105:20772:1141 -VP2-06:112:H7LNDMCVY:1:1105:20808:1141 -VP2-06:112:H7LNDMCVY:1:1105:20808:1141 -VP2-06:112:H7LNDMCVY:1:1105:1108:1157 -VP2-06:112:H7LNDMCVY:1:1105:1108:1157 -VP2-06:112:H7LNDMCVY:1:1105:2193:1157 -VP2-06:112:H7LNDMCVY:1:1105:2193:1157 -VP2-06:112:H7LNDMCVY:1:1105:2338:1157 -VP2-06:112:H7LNDMCVY:1:1105:2338:1157 -VP2-06:112:H7LNDMCVY:1:1105:2374:1157 -VP2-06:112:H7LNDMCVY:1:1105:2374:1157 -VP2-06:112:H7LNDMCVY:1:1105:2446:1157 -VP2-06:112:H7LNDMCVY:1:1105:2446:1157 -VP2-06:112:H7LNDMCVY:1:1105:3079:1157 -VP2-06:112:H7LNDMCVY:1:1105:3079:1157 -VP2-06:112:H7LNDMCVY:1:1105:4074:1157 -VP2-06:112:H7LNDMCVY:1:1105:4074:1157 -VP2-06:112:H7LNDMCVY:1:1105:4670:1157 -VP2-06:112:H7LNDMCVY:1:1105:4670:1157 -VP2-06:112:H7LNDMCVY:1:1105:5231:1157 -VP2-06:112:H7LNDMCVY:1:1105:5231:1157 -VP2-06:112:H7LNDMCVY:1:1105:5502:1157 -VP2-06:112:H7LNDMCVY:1:1105:5502:1157 -VP2-06:112:H7LNDMCVY:1:1105:5556:1157 -VP2-06:112:H7LNDMCVY:1:1105:5556:1157 -VP2-06:112:H7LNDMCVY:1:1105:5737:1157 -VP2-06:112:H7LNDMCVY:1:1105:5737:1157 -VP2-06:112:H7LNDMCVY:1:1105:5810:1157 -VP2-06:112:H7LNDMCVY:1:1105:5810:1157 -VP2-06:112:H7LNDMCVY:1:1105:5828:1157 -VP2-06:112:H7LNDMCVY:1:1105:5828:1157 -VP2-06:112:H7LNDMCVY:1:1105:5918:1157 -VP2-06:112:H7LNDMCVY:1:1105:5918:1157 -VP2-06:112:H7LNDMCVY:1:1105:6207:1157 -VP2-06:112:H7LNDMCVY:1:1105:6207:1157 -VP2-06:112:H7LNDMCVY:1:1105:6804:1157 -VP2-06:112:H7LNDMCVY:1:1105:6804:1157 -VP2-06:112:H7LNDMCVY:1:1105:7075:1157 -VP2-06:112:H7LNDMCVY:1:1105:7075:1157 -VP2-06:112:H7LNDMCVY:1:1105:7328:1157 -VP2-06:112:H7LNDMCVY:1:1105:7328:1157 -VP2-06:112:H7LNDMCVY:1:1105:7401:1157 -VP2-06:112:H7LNDMCVY:1:1105:7401:1157 -VP2-06:112:H7LNDMCVY:1:1105:7817:1157 -VP2-06:112:H7LNDMCVY:1:1105:7817:1157 -VP2-06:112:H7LNDMCVY:1:1105:8070:1157 -VP2-06:112:H7LNDMCVY:1:1105:8070:1157 -VP2-06:112:H7LNDMCVY:1:1105:8250:1157 -VP2-06:112:H7LNDMCVY:1:1105:8250:1157 -VP2-06:112:H7LNDMCVY:1:1105:8305:1157 -VP2-06:112:H7LNDMCVY:1:1105:8305:1157 -VP2-06:112:H7LNDMCVY:1:1105:8504:1157 -VP2-06:112:H7LNDMCVY:1:1105:8504:1157 -VP2-06:112:H7LNDMCVY:1:1105:8648:1157 -VP2-06:112:H7LNDMCVY:1:1105:8648:1157 -VP2-06:112:H7LNDMCVY:1:1105:8775:1157 -VP2-06:112:H7LNDMCVY:1:1105:8775:1157 -VP2-06:112:H7LNDMCVY:1:1105:8847:1157 -VP2-06:112:H7LNDMCVY:1:1105:8847:1157 -VP2-06:112:H7LNDMCVY:1:1105:8865:1157 -VP2-06:112:H7LNDMCVY:1:1105:8865:1157 -VP2-06:112:H7LNDMCVY:1:1105:8883:1157 -VP2-06:112:H7LNDMCVY:1:1105:8883:1157 -VP2-06:112:H7LNDMCVY:1:1105:9625:1157 -VP2-06:112:H7LNDMCVY:1:1105:9625:1157 -VP2-06:112:H7LNDMCVY:1:1105:9878:1157 -VP2-06:112:H7LNDMCVY:1:1105:9878:1157 -VP2-06:112:H7LNDMCVY:1:1105:10059:1157 -VP2-06:112:H7LNDMCVY:1:1105:10059:1157 -VP2-06:112:H7LNDMCVY:1:1105:10402:1157 -VP2-06:112:H7LNDMCVY:1:1105:10402:1157 -VP2-06:112:H7LNDMCVY:1:1105:10583:1157 -VP2-06:112:H7LNDMCVY:1:1105:10583:1157 -VP2-06:112:H7LNDMCVY:1:1105:10782:1157 -VP2-06:112:H7LNDMCVY:1:1105:10782:1157 -VP2-06:112:H7LNDMCVY:1:1105:11053:1157 -VP2-06:112:H7LNDMCVY:1:1105:11053:1157 -VP2-06:112:H7LNDMCVY:1:1105:11071:1157 -VP2-06:112:H7LNDMCVY:1:1105:11071:1157 -VP2-06:112:H7LNDMCVY:1:1105:11595:1157 -VP2-06:112:H7LNDMCVY:1:1105:11595:1157 -VP2-06:112:H7LNDMCVY:1:1105:11903:1157 -VP2-06:112:H7LNDMCVY:1:1105:11903:1157 -VP2-06:112:H7LNDMCVY:1:1105:12011:1157 -VP2-06:112:H7LNDMCVY:1:1105:12011:1157 -VP2-06:112:H7LNDMCVY:1:1105:12029:1157 -VP2-06:112:H7LNDMCVY:1:1105:12029:1157 -VP2-06:112:H7LNDMCVY:1:1105:12066:1157 -VP2-06:112:H7LNDMCVY:1:1105:12066:1157 -VP2-06:112:H7LNDMCVY:1:1105:12337:1157 -VP2-06:112:H7LNDMCVY:1:1105:12337:1157 -VP2-06:112:H7LNDMCVY:1:1105:13259:1157 -VP2-06:112:H7LNDMCVY:1:1105:13259:1157 -VP2-06:112:H7LNDMCVY:1:1105:13440:1157 -VP2-06:112:H7LNDMCVY:1:1105:13440:1157 -VP2-06:112:H7LNDMCVY:1:1105:13494:1157 -VP2-06:112:H7LNDMCVY:1:1105:13494:1157 -VP2-06:112:H7LNDMCVY:1:1105:13548:1157 -VP2-06:112:H7LNDMCVY:1:1105:13548:1157 -VP2-06:112:H7LNDMCVY:1:1105:13675:1157 -VP2-06:112:H7LNDMCVY:1:1105:13675:1157 -VP2-06:112:H7LNDMCVY:1:1105:13729:1157 -VP2-06:112:H7LNDMCVY:1:1105:13729:1157 -VP2-06:112:H7LNDMCVY:1:1105:14036:1157 -VP2-06:112:H7LNDMCVY:1:1105:14036:1157 -VP2-06:112:H7LNDMCVY:1:1105:14796:1157 -VP2-06:112:H7LNDMCVY:1:1105:14796:1157 -VP2-06:112:H7LNDMCVY:1:1105:14886:1157 -VP2-06:112:H7LNDMCVY:1:1105:14886:1157 -VP2-06:112:H7LNDMCVY:1:1105:14922:1157 -VP2-06:112:H7LNDMCVY:1:1105:14922:1157 -VP2-06:112:H7LNDMCVY:1:1105:15031:1157 -VP2-06:112:H7LNDMCVY:1:1105:15031:1157 -VP2-06:112:H7LNDMCVY:1:1105:15121:1157 -VP2-06:112:H7LNDMCVY:1:1105:15121:1157 -VP2-06:112:H7LNDMCVY:1:1105:15176:1157 -VP2-06:112:H7LNDMCVY:1:1105:15176:1157 -VP2-06:112:H7LNDMCVY:1:1105:15284:1157 -VP2-06:112:H7LNDMCVY:1:1105:15284:1157 -VP2-06:112:H7LNDMCVY:1:1105:15302:1157 -VP2-06:112:H7LNDMCVY:1:1105:15302:1157 -VP2-06:112:H7LNDMCVY:1:1105:15393:1157 -VP2-06:112:H7LNDMCVY:1:1105:15393:1157 -VP2-06:112:H7LNDMCVY:1:1105:15501:1157 -VP2-06:112:H7LNDMCVY:1:1105:15501:1157 -VP2-06:112:H7LNDMCVY:1:1105:15609:1157 -VP2-06:112:H7LNDMCVY:1:1105:15609:1157 -VP2-06:112:H7LNDMCVY:1:1105:15772:1157 -VP2-06:112:H7LNDMCVY:1:1105:15772:1157 -VP2-06:112:H7LNDMCVY:1:1105:15953:1157 -VP2-06:112:H7LNDMCVY:1:1105:15953:1157 -VP2-06:112:H7LNDMCVY:1:1105:16206:1157 -VP2-06:112:H7LNDMCVY:1:1105:16206:1157 -VP2-06:112:H7LNDMCVY:1:1105:16278:1157 -VP2-06:112:H7LNDMCVY:1:1105:16278:1157 -VP2-06:112:H7LNDMCVY:1:1105:16351:1157 -VP2-06:112:H7LNDMCVY:1:1105:16351:1157 -VP2-06:112:H7LNDMCVY:1:1105:16640:1157 -VP2-06:112:H7LNDMCVY:1:1105:16640:1157 -VP2-06:112:H7LNDMCVY:1:1105:16712:1157 -VP2-06:112:H7LNDMCVY:1:1105:16712:1157 -VP2-06:112:H7LNDMCVY:1:1105:17038:1157 -VP2-06:112:H7LNDMCVY:1:1105:17038:1157 -VP2-06:112:H7LNDMCVY:1:1105:17056:1157 -VP2-06:112:H7LNDMCVY:1:1105:17056:1157 -VP2-06:112:H7LNDMCVY:1:1105:17291:1157 -VP2-06:112:H7LNDMCVY:1:1105:17291:1157 -VP2-06:112:H7LNDMCVY:1:1105:17598:1157 -VP2-06:112:H7LNDMCVY:1:1105:17598:1157 -VP2-06:112:H7LNDMCVY:1:1105:17779:1157 -VP2-06:112:H7LNDMCVY:1:1105:17779:1157 -VP2-06:112:H7LNDMCVY:1:1105:17888:1157 -VP2-06:112:H7LNDMCVY:1:1105:17888:1157 -VP2-06:112:H7LNDMCVY:1:1105:18249:1157 -VP2-06:112:H7LNDMCVY:1:1105:18249:1157 -VP2-06:112:H7LNDMCVY:1:1105:18267:1157 -VP2-06:112:H7LNDMCVY:1:1105:18267:1157 -VP2-06:112:H7LNDMCVY:1:1105:18521:1157 -VP2-06:112:H7LNDMCVY:1:1105:18521:1157 -VP2-06:112:H7LNDMCVY:1:1105:18611:1157 -VP2-06:112:H7LNDMCVY:1:1105:18611:1157 -VP2-06:112:H7LNDMCVY:1:1105:18936:1157 -VP2-06:112:H7LNDMCVY:1:1105:18936:1157 -VP2-06:112:H7LNDMCVY:1:1105:18991:1157 -VP2-06:112:H7LNDMCVY:1:1105:18991:1157 -VP2-06:112:H7LNDMCVY:1:1105:19171:1157 -VP2-06:112:H7LNDMCVY:1:1105:19171:1157 -VP2-06:112:H7LNDMCVY:1:1105:19226:1157 -VP2-06:112:H7LNDMCVY:1:1105:19226:1157 -VP2-06:112:H7LNDMCVY:1:1105:19298:1157 -VP2-06:112:H7LNDMCVY:1:1105:19298:1157 -VP2-06:112:H7LNDMCVY:1:1105:19569:1157 -VP2-06:112:H7LNDMCVY:1:1105:19569:1157 -VP2-06:112:H7LNDMCVY:1:1105:19605:1157 -VP2-06:112:H7LNDMCVY:1:1105:19605:1157 -VP2-06:112:H7LNDMCVY:1:1105:19660:1157 -VP2-06:112:H7LNDMCVY:1:1105:19660:1157 -VP2-06:112:H7LNDMCVY:1:1105:19895:1157 -VP2-06:112:H7LNDMCVY:1:1105:19895:1157 -VP2-06:112:H7LNDMCVY:1:1105:20654:1157 -VP2-06:112:H7LNDMCVY:1:1105:20654:1157 -VP2-06:112:H7LNDMCVY:1:1105:21359:1157 -VP2-06:112:H7LNDMCVY:1:1105:21359:1157 -VP2-06:112:H7LNDMCVY:1:1105:21522:1157 -VP2-06:112:H7LNDMCVY:1:1105:21522:1157 -VP2-06:112:H7LNDMCVY:1:1105:21938:1157 -VP2-06:112:H7LNDMCVY:1:1105:21938:1157 -VP2-06:112:H7LNDMCVY:1:1105:22028:1157 -VP2-06:112:H7LNDMCVY:1:1105:22028:1157 -VP2-06:112:H7LNDMCVY:1:1105:22155:1157 -VP2-06:112:H7LNDMCVY:1:1105:22155:1157 -VP2-06:112:H7LNDMCVY:1:1105:22299:1157 -VP2-06:112:H7LNDMCVY:1:1105:22299:1157 -VP2-06:112:H7LNDMCVY:1:1105:22480:1157 -VP2-06:112:H7LNDMCVY:1:1105:22480:1157 -VP2-06:112:H7LNDMCVY:1:1105:22697:1157 -VP2-06:112:H7LNDMCVY:1:1105:22697:1157 -VP2-06:112:H7LNDMCVY:1:1105:22788:1157 -VP2-06:112:H7LNDMCVY:1:1105:22788:1157 -VP2-06:112:H7LNDMCVY:1:1105:23330:1157 -VP2-06:112:H7LNDMCVY:1:1105:23330:1157 -VP2-06:112:H7LNDMCVY:1:1105:23402:1157 -VP2-06:112:H7LNDMCVY:1:1105:23402:1157 -VP2-06:112:H7LNDMCVY:1:1105:23547:1157 -VP2-06:112:H7LNDMCVY:1:1105:23547:1157 -VP2-06:112:H7LNDMCVY:1:1105:23818:1157 -VP2-06:112:H7LNDMCVY:1:1105:23818:1157 -VP2-06:112:H7LNDMCVY:1:1105:24288:1157 -VP2-06:112:H7LNDMCVY:1:1105:24288:1157 -VP2-06:112:H7LNDMCVY:1:1105:24668:1157 -VP2-06:112:H7LNDMCVY:1:1105:24668:1157 -VP2-06:112:H7LNDMCVY:1:1105:24704:1157 -VP2-06:112:H7LNDMCVY:1:1105:24704:1157 -VP2-06:112:H7LNDMCVY:1:1105:1624:1172 -VP2-06:112:H7LNDMCVY:1:1105:1624:1172 -VP2-06:112:H7LNDMCVY:1:1105:1787:1172 -VP2-06:112:H7LNDMCVY:1:1105:1787:1172 -VP2-06:112:H7LNDMCVY:1:1105:3070:1172 -VP2-06:112:H7LNDMCVY:1:1105:3070:1172 -VP2-06:112:H7LNDMCVY:1:1105:3197:1172 -VP2-06:112:H7LNDMCVY:1:1105:3197:1172 -VP2-06:112:H7LNDMCVY:1:1105:3215:1172 -VP2-06:112:H7LNDMCVY:1:1105:3215:1172 -VP2-06:112:H7LNDMCVY:1:1105:3233:1172 -VP2-06:112:H7LNDMCVY:1:1105:3233:1172 -VP2-06:112:H7LNDMCVY:1:1105:3992:1172 -VP2-06:112:H7LNDMCVY:1:1105:3992:1172 -VP2-06:112:H7LNDMCVY:1:1105:4101:1172 -VP2-06:112:H7LNDMCVY:1:1105:4101:1172 -VP2-06:112:H7LNDMCVY:1:1105:4119:1172 -VP2-06:112:H7LNDMCVY:1:1105:4119:1172 -VP2-06:112:H7LNDMCVY:1:1105:4137:1172 -VP2-06:112:H7LNDMCVY:1:1105:4137:1172 -VP2-06:112:H7LNDMCVY:1:1105:4372:1172 -VP2-06:112:H7LNDMCVY:1:1105:4372:1172 -VP2-06:112:H7LNDMCVY:1:1105:4481:1172 -VP2-06:112:H7LNDMCVY:1:1105:4481:1172 -VP2-06:112:H7LNDMCVY:1:1105:4535:1172 -VP2-06:112:H7LNDMCVY:1:1105:4535:1172 -VP2-06:112:H7LNDMCVY:1:1105:5150:1172 -VP2-06:112:H7LNDMCVY:1:1105:5150:1172 -VP2-06:112:H7LNDMCVY:1:1105:6108:1172 -VP2-06:112:H7LNDMCVY:1:1105:6108:1172 -VP2-06:112:H7LNDMCVY:1:1105:6126:1172 -VP2-06:112:H7LNDMCVY:1:1105:6126:1172 -VP2-06:112:H7LNDMCVY:1:1105:6253:1172 -VP2-06:112:H7LNDMCVY:1:1105:6253:1172 -VP2-06:112:H7LNDMCVY:1:1105:6271:1172 -VP2-06:112:H7LNDMCVY:1:1105:6271:1172 -VP2-06:112:H7LNDMCVY:1:1105:6433:1172 -VP2-06:112:H7LNDMCVY:1:1105:6433:1172 -VP2-06:112:H7LNDMCVY:1:1105:6614:1172 -VP2-06:112:H7LNDMCVY:1:1105:6614:1172 -VP2-06:112:H7LNDMCVY:1:1105:6831:1172 -VP2-06:112:H7LNDMCVY:1:1105:6831:1172 -VP2-06:112:H7LNDMCVY:1:1105:6849:1172 -VP2-06:112:H7LNDMCVY:1:1105:6849:1172 -VP2-06:112:H7LNDMCVY:1:1105:7500:1172 -VP2-06:112:H7LNDMCVY:1:1105:7500:1172 -VP2-06:112:H7LNDMCVY:1:1105:8241:1172 -VP2-06:112:H7LNDMCVY:1:1105:8241:1172 -VP2-06:112:H7LNDMCVY:1:1105:8368:1172 -VP2-06:112:H7LNDMCVY:1:1105:8368:1172 -VP2-06:112:H7LNDMCVY:1:1105:9037:1172 -VP2-06:112:H7LNDMCVY:1:1105:9037:1172 -VP2-06:112:H7LNDMCVY:1:1105:9272:1172 -VP2-06:112:H7LNDMCVY:1:1105:9272:1172 -VP2-06:112:H7LNDMCVY:1:1105:9308:1172 -VP2-06:112:H7LNDMCVY:1:1105:9308:1172 -VP2-06:112:H7LNDMCVY:1:1105:9941:1172 -VP2-06:112:H7LNDMCVY:1:1105:9941:1172 -VP2-06:112:H7LNDMCVY:1:1105:10013:1172 -VP2-06:112:H7LNDMCVY:1:1105:10013:1172 -VP2-06:112:H7LNDMCVY:1:1105:10411:1172 -VP2-06:112:H7LNDMCVY:1:1105:10411:1172 -VP2-06:112:H7LNDMCVY:1:1105:10592:1172 -VP2-06:112:H7LNDMCVY:1:1105:10592:1172 -VP2-06:112:H7LNDMCVY:1:1105:10719:1172 -VP2-06:112:H7LNDMCVY:1:1105:10719:1172 -VP2-06:112:H7LNDMCVY:1:1105:10845:1172 -VP2-06:112:H7LNDMCVY:1:1105:10845:1172 -VP2-06:112:H7LNDMCVY:1:1105:11026:1172 -VP2-06:112:H7LNDMCVY:1:1105:11026:1172 -VP2-06:112:H7LNDMCVY:1:1105:11153:1172 -VP2-06:112:H7LNDMCVY:1:1105:11153:1172 -VP2-06:112:H7LNDMCVY:1:1105:11514:1172 -VP2-06:112:H7LNDMCVY:1:1105:11514:1172 -VP2-06:112:H7LNDMCVY:1:1105:11568:1172 -VP2-06:112:H7LNDMCVY:1:1105:11568:1172 -VP2-06:112:H7LNDMCVY:1:1105:11623:1172 -VP2-06:112:H7LNDMCVY:1:1105:11623:1172 -VP2-06:112:H7LNDMCVY:1:1105:11659:1172 -VP2-06:112:H7LNDMCVY:1:1105:11659:1172 -VP2-06:112:H7LNDMCVY:1:1105:11713:1172 -VP2-06:112:H7LNDMCVY:1:1105:11713:1172 -VP2-06:112:H7LNDMCVY:1:1105:12075:1172 -VP2-06:112:H7LNDMCVY:1:1105:12075:1172 -VP2-06:112:H7LNDMCVY:1:1105:12310:1172 -VP2-06:112:H7LNDMCVY:1:1105:12310:1172 -VP2-06:112:H7LNDMCVY:1:1105:12382:1172 -VP2-06:112:H7LNDMCVY:1:1105:12382:1172 -VP2-06:112:H7LNDMCVY:1:1105:12418:1172 -VP2-06:112:H7LNDMCVY:1:1105:12418:1172 -VP2-06:112:H7LNDMCVY:1:1105:12563:1172 -VP2-06:112:H7LNDMCVY:1:1105:12563:1172 -VP2-06:112:H7LNDMCVY:1:1105:12581:1172 -VP2-06:112:H7LNDMCVY:1:1105:12581:1172 -VP2-06:112:H7LNDMCVY:1:1105:12671:1172 -VP2-06:112:H7LNDMCVY:1:1105:12671:1172 -VP2-06:112:H7LNDMCVY:1:1105:12689:1172 -VP2-06:112:H7LNDMCVY:1:1105:12689:1172 -VP2-06:112:H7LNDMCVY:1:1105:12744:1172 -VP2-06:112:H7LNDMCVY:1:1105:12744:1172 -VP2-06:112:H7LNDMCVY:1:1105:13141:1172 -VP2-06:112:H7LNDMCVY:1:1105:13141:1172 -VP2-06:112:H7LNDMCVY:1:1105:13304:1172 -VP2-06:112:H7LNDMCVY:1:1105:13304:1172 -VP2-06:112:H7LNDMCVY:1:1105:13358:1172 -VP2-06:112:H7LNDMCVY:1:1105:13358:1172 -VP2-06:112:H7LNDMCVY:1:1105:13702:1172 -VP2-06:112:H7LNDMCVY:1:1105:13702:1172 -VP2-06:112:H7LNDMCVY:1:1105:14570:1172 -VP2-06:112:H7LNDMCVY:1:1105:14570:1172 -VP2-06:112:H7LNDMCVY:1:1105:14877:1172 -VP2-06:112:H7LNDMCVY:1:1105:14877:1172 -VP2-06:112:H7LNDMCVY:1:1105:15438:1172 -VP2-06:112:H7LNDMCVY:1:1105:15438:1172 -VP2-06:112:H7LNDMCVY:1:1105:15456:1172 -VP2-06:112:H7LNDMCVY:1:1105:15456:1172 -VP2-06:112:H7LNDMCVY:1:1105:15962:1172 -VP2-06:112:H7LNDMCVY:1:1105:15962:1172 -VP2-06:112:H7LNDMCVY:1:1105:16342:1172 -VP2-06:112:H7LNDMCVY:1:1105:16342:1172 -VP2-06:112:H7LNDMCVY:1:1105:16613:1172 -VP2-06:112:H7LNDMCVY:1:1105:16613:1172 -VP2-06:112:H7LNDMCVY:1:1105:16703:1172 -VP2-06:112:H7LNDMCVY:1:1105:16703:1172 -VP2-06:112:H7LNDMCVY:1:1105:17065:1172 -VP2-06:112:H7LNDMCVY:1:1105:17065:1172 -VP2-06:112:H7LNDMCVY:1:1105:17119:1172 -VP2-06:112:H7LNDMCVY:1:1105:17119:1172 -VP2-06:112:H7LNDMCVY:1:1105:17318:1172 -VP2-06:112:H7LNDMCVY:1:1105:17318:1172 -VP2-06:112:H7LNDMCVY:1:1105:17698:1172 -VP2-06:112:H7LNDMCVY:1:1105:17698:1172 -VP2-06:112:H7LNDMCVY:1:1105:17716:1172 -VP2-06:112:H7LNDMCVY:1:1105:17716:1172 -VP2-06:112:H7LNDMCVY:1:1105:17933:1172 -VP2-06:112:H7LNDMCVY:1:1105:17933:1172 -VP2-06:112:H7LNDMCVY:1:1105:18096:1172 -VP2-06:112:H7LNDMCVY:1:1105:18096:1172 -VP2-06:112:H7LNDMCVY:1:1105:18295:1172 -VP2-06:112:H7LNDMCVY:1:1105:18295:1172 -VP2-06:112:H7LNDMCVY:1:1105:18331:1172 -VP2-06:112:H7LNDMCVY:1:1105:18331:1172 -VP2-06:112:H7LNDMCVY:1:1105:18403:1172 -VP2-06:112:H7LNDMCVY:1:1105:18403:1172 -VP2-06:112:H7LNDMCVY:1:1105:18511:1172 -VP2-06:112:H7LNDMCVY:1:1105:18511:1172 -VP2-06:112:H7LNDMCVY:1:1105:18584:1172 -VP2-06:112:H7LNDMCVY:1:1105:18584:1172 -VP2-06:112:H7LNDMCVY:1:1105:18620:1172 -VP2-06:112:H7LNDMCVY:1:1105:18620:1172 -VP2-06:112:H7LNDMCVY:1:1105:19199:1172 -VP2-06:112:H7LNDMCVY:1:1105:19199:1172 -VP2-06:112:H7LNDMCVY:1:1105:19253:1172 -VP2-06:112:H7LNDMCVY:1:1105:19253:1172 -VP2-06:112:H7LNDMCVY:1:1105:19271:1172 -VP2-06:112:H7LNDMCVY:1:1105:19271:1172 -VP2-06:112:H7LNDMCVY:1:1105:19470:1172 -VP2-06:112:H7LNDMCVY:1:1105:19470:1172 -VP2-06:112:H7LNDMCVY:1:1105:19687:1172 -VP2-06:112:H7LNDMCVY:1:1105:19687:1172 -VP2-06:112:H7LNDMCVY:1:1105:19759:1172 -VP2-06:112:H7LNDMCVY:1:1105:19759:1172 -VP2-06:112:H7LNDMCVY:1:1105:19777:1172 -VP2-06:112:H7LNDMCVY:1:1105:19777:1172 -VP2-06:112:H7LNDMCVY:1:1105:19868:1172 -VP2-06:112:H7LNDMCVY:1:1105:19868:1172 -VP2-06:112:H7LNDMCVY:1:1105:19922:1172 -VP2-06:112:H7LNDMCVY:1:1105:19922:1172 -VP2-06:112:H7LNDMCVY:1:1105:19958:1172 -VP2-06:112:H7LNDMCVY:1:1105:19958:1172 -VP2-06:112:H7LNDMCVY:1:1105:20121:1172 -VP2-06:112:H7LNDMCVY:1:1105:20121:1172 -VP2-06:112:H7LNDMCVY:1:1105:21043:1172 -VP2-06:112:H7LNDMCVY:1:1105:21043:1172 -VP2-06:112:H7LNDMCVY:1:1105:21169:1172 -VP2-06:112:H7LNDMCVY:1:1105:21169:1172 -VP2-06:112:H7LNDMCVY:1:1105:21260:1172 -VP2-06:112:H7LNDMCVY:1:1105:21260:1172 -VP2-06:112:H7LNDMCVY:1:1105:21694:1172 -VP2-06:112:H7LNDMCVY:1:1105:21694:1172 -VP2-06:112:H7LNDMCVY:1:1105:21766:1172 -VP2-06:112:H7LNDMCVY:1:1105:21766:1172 -VP2-06:112:H7LNDMCVY:1:1105:22001:1172 -VP2-06:112:H7LNDMCVY:1:1105:22001:1172 -VP2-06:112:H7LNDMCVY:1:1105:22037:1172 -VP2-06:112:H7LNDMCVY:1:1105:22037:1172 -VP2-06:112:H7LNDMCVY:1:1105:22290:1172 -VP2-06:112:H7LNDMCVY:1:1105:22290:1172 -VP2-06:112:H7LNDMCVY:1:1105:23014:1172 -VP2-06:112:H7LNDMCVY:1:1105:23014:1172 -VP2-06:112:H7LNDMCVY:1:1105:23863:1172 -VP2-06:112:H7LNDMCVY:1:1105:23863:1172 -VP2-06:112:H7LNDMCVY:1:1105:24117:1172 -VP2-06:112:H7LNDMCVY:1:1105:24117:1172 -VP2-06:112:H7LNDMCVY:1:1105:24243:1172 -VP2-06:112:H7LNDMCVY:1:1105:24243:1172 -VP2-06:112:H7LNDMCVY:1:1105:24442:1172 -VP2-06:112:H7LNDMCVY:1:1105:24442:1172 -VP2-06:112:H7LNDMCVY:1:1105:25057:1172 -VP2-06:112:H7LNDMCVY:1:1105:25057:1172 -VP2-06:112:H7LNDMCVY:1:1105:25292:1172 -VP2-06:112:H7LNDMCVY:1:1105:25292:1172 -VP2-06:112:H7LNDMCVY:1:1105:25400:1172 -VP2-06:112:H7LNDMCVY:1:1105:25400:1172 -VP2-06:112:H7LNDMCVY:1:1105:25726:1172 -VP2-06:112:H7LNDMCVY:1:1105:25726:1172 -VP2-06:112:H7LNDMCVY:1:1105:26015:1172 -VP2-06:112:H7LNDMCVY:1:1105:26015:1172 -VP2-06:112:H7LNDMCVY:1:1105:26558:1172 -VP2-06:112:H7LNDMCVY:1:1105:26558:1172 -VP2-06:112:H7LNDMCVY:1:1105:26594:1172 -VP2-06:112:H7LNDMCVY:1:1105:26594:1172 -VP2-06:112:H7LNDMCVY:1:1105:26630:1172 -VP2-06:112:H7LNDMCVY:1:1105:26630:1172 -VP2-06:112:H7LNDMCVY:1:1105:26883:1172 -VP2-06:112:H7LNDMCVY:1:1105:26883:1172 -VP2-06:112:H7LNDMCVY:1:1105:26919:1172 -VP2-06:112:H7LNDMCVY:1:1105:26919:1172 -VP2-06:112:H7LNDMCVY:1:1105:27100:1172 -VP2-06:112:H7LNDMCVY:1:1105:27100:1172 -VP2-06:112:H7LNDMCVY:1:1105:27172:1172 -VP2-06:112:H7LNDMCVY:1:1105:27172:1172 -VP2-06:112:H7LNDMCVY:1:1105:27516:1172 -VP2-06:112:H7LNDMCVY:1:1105:27516:1172 -VP2-06:112:H7LNDMCVY:1:1105:27661:1172 -VP2-06:112:H7LNDMCVY:1:1105:27661:1172 -VP2-06:112:H7LNDMCVY:1:1105:27859:1172 -VP2-06:112:H7LNDMCVY:1:1105:27859:1172 -VP2-06:112:H7LNDMCVY:1:1105:28058:1172 -VP2-06:112:H7LNDMCVY:1:1105:28058:1172 -VP2-06:112:H7LNDMCVY:1:1105:28239:1172 -VP2-06:112:H7LNDMCVY:1:1105:28239:1172 -VP2-06:112:H7LNDMCVY:1:1105:1777:1188 -VP2-06:112:H7LNDMCVY:1:1105:1777:1188 -VP2-06:112:H7LNDMCVY:1:1105:2627:1188 -VP2-06:112:H7LNDMCVY:1:1105:2627:1188 -VP2-06:112:H7LNDMCVY:1:1105:3025:1188 -VP2-06:112:H7LNDMCVY:1:1105:3025:1188 -VP2-06:112:H7LNDMCVY:1:1105:3170:1188 -VP2-06:112:H7LNDMCVY:1:1105:3170:1188 -VP2-06:112:H7LNDMCVY:1:1105:3188:1188 -VP2-06:112:H7LNDMCVY:1:1105:3188:1188 -VP2-06:112:H7LNDMCVY:1:1105:3387:1188 -VP2-06:112:H7LNDMCVY:1:1105:3387:1188 -VP2-06:112:H7LNDMCVY:1:1105:3477:1188 -VP2-06:112:H7LNDMCVY:1:1105:3477:1188 -VP2-06:112:H7LNDMCVY:1:1105:3730:1188 -VP2-06:112:H7LNDMCVY:1:1105:3730:1188 -VP2-06:112:H7LNDMCVY:1:1105:3965:1188 -VP2-06:112:H7LNDMCVY:1:1105:3965:1188 -VP2-06:112:H7LNDMCVY:1:1105:4273:1188 -VP2-06:112:H7LNDMCVY:1:1105:4273:1188 -VP2-06:112:H7LNDMCVY:1:1105:4435:1188 -VP2-06:112:H7LNDMCVY:1:1105:4435:1188 -VP2-06:112:H7LNDMCVY:1:1105:4472:1188 -VP2-06:112:H7LNDMCVY:1:1105:4472:1188 -VP2-06:112:H7LNDMCVY:1:1105:4689:1188 -VP2-06:112:H7LNDMCVY:1:1105:4689:1188 -VP2-06:112:H7LNDMCVY:1:1105:5828:1188 -VP2-06:112:H7LNDMCVY:1:1105:5828:1188 -VP2-06:112:H7LNDMCVY:1:1105:6442:1188 -VP2-06:112:H7LNDMCVY:1:1105:6442:1188 -VP2-06:112:H7LNDMCVY:1:1105:7563:1188 -VP2-06:112:H7LNDMCVY:1:1105:7563:1188 -VP2-06:112:H7LNDMCVY:1:1105:7618:1188 -VP2-06:112:H7LNDMCVY:1:1105:7618:1188 -VP2-06:112:H7LNDMCVY:1:1105:7762:1188 -VP2-06:112:H7LNDMCVY:1:1105:7762:1188 -VP2-06:112:H7LNDMCVY:1:1105:8341:1188 -VP2-06:112:H7LNDMCVY:1:1105:8341:1188 -VP2-06:112:H7LNDMCVY:1:1105:8359:1188 -VP2-06:112:H7LNDMCVY:1:1105:8359:1188 -VP2-06:112:H7LNDMCVY:1:1105:8504:1188 -VP2-06:112:H7LNDMCVY:1:1105:8504:1188 -VP2-06:112:H7LNDMCVY:1:1105:8865:1188 -VP2-06:112:H7LNDMCVY:1:1105:8865:1188 -VP2-06:112:H7LNDMCVY:1:1105:8901:1188 -VP2-06:112:H7LNDMCVY:1:1105:8901:1188 -VP2-06:112:H7LNDMCVY:1:1105:8974:1188 -VP2-06:112:H7LNDMCVY:1:1105:8974:1188 -VP2-06:112:H7LNDMCVY:1:1105:8992:1188 -VP2-06:112:H7LNDMCVY:1:1105:8992:1188 diff --git a/src/htslib-1.18/htscodecs/tests/names/rr.names b/src/htslib-1.18/htscodecs/tests/names/rr.names deleted file mode 100644 index e4af571..0000000 --- a/src/htslib-1.18/htscodecs/tests/names/rr.names +++ /dev/null @@ -1,1000 +0,0 @@ -HS25_09827:2:2114:8747:77719#49 -HS25_09827:2:2313:12324:15980#49 -HS25_09827:2:2306:4431:26490#49 -HSQ1004:134:C0D8DACXX:3:2305:3121:36767 -HSQ1004:134:C0D8DACXX:1:1108:13285:181846 -HS25_09827:2:1214:1923:18110#49 -HS25_09827:2:2205:8969:83271#49 -HSQ1004:134:C0D8DACXX:4:2302:18335:166482 -HSQ1004:134:C0D8DACXX:4:1201:2163:194187 -HS25_09827:2:2115:3263:50556#49 -HS25_09827:2:2312:19671:57983#49 -HS25_09827:2:2209:3841:33947#49 -HSQ1004:134:C0D8DACXX:4:2102:5110:96106 -HS25_09827:2:1203:13643:47698#49 -HS25_09827:2:1216:2059:38837#49 -HS25_09827:2:2307:10341:16771#49 -HSQ1004:134:C0D8DACXX:3:2107:14582:19912 -HSQ1004:134:C0D8DACXX:2:1303:19072:23886 -HS25_09827:2:2113:8149:2427#49 -HS25_09827:2:1311:20518:50516#49 -HS25_09827:2:2310:3640:51154#49 -HSQ1004:134:C0D8DACXX:4:1208:8903:181874 -HSQ1004:134:C0D8DACXX:2:2206:9477:70876 -HS25_09827:2:2107:14860:50351#49 -HSQ1004:134:C0D8DACXX:2:1307:4544:122309 -HS25_09827:2:2306:8288:30394#49 -HSQ1004:134:C0D8DACXX:3:1207:7454:165536 -HS25_09827:2:2313:15446:39111#49 -HS25_09827:2:1206:7309:48339#49 -HSQ1004:134:C0D8DACXX:3:2301:2467:64732 -HS25_09827:2:1301:8312:54858#49 -HSQ1004:134:C0D8DACXX:3:1206:14036:13433 -HSQ1004:134:C0D8DACXX:3:2304:7675:51259 -HSQ1004:134:C0D8DACXX:3:2101:16528:75881 -HSQ1004:134:C0D8DACXX:2:2108:14508:109011 -HSQ1004:134:C0D8DACXX:1:2101:8290:174540 -HSQ1004:134:C0D8DACXX:3:2102:9688:74570 -HS25_09827:2:2309:17583:42089#49 -HSQ1004:134:C0D8DACXX:1:2307:21041:134358 -HS25_09827:2:1302:3932:38581#49 -HS25_09827:2:1204:12185:67072#49 -HS25_09827:2:2111:11364:91626#49 -HS25_09827:2:1310:12071:10727#49 -HS25_09827:2:1304:15575:79506#49 -HSQ1004:134:C0D8DACXX:3:2307:13834:192476 -HSQ1004:134:C0D8DACXX:1:1202:21024:89233 -HSQ1004:134:C0D8DACXX:3:2208:11367:136379 -HSQ1004:134:C0D8DACXX:3:1202:3504:66609 -HSQ1004:134:C0D8DACXX:3:1203:14626:156897 -HS25_09827:2:2106:7333:82623#49 -HS25_09827:2:2211:6852:87991#49 -HS25_09827:2:2311:19687:24078#49 -HS25_09827:2:2115:18500:98269#49 -HSQ1004:134:C0D8DACXX:3:1306:10655:167583 -HS25_09827:2:2307:10824:26157#49 -HS25_09827:2:2202:2127:77513#49 -HS25_09827:2:2108:20685:32484#49 -HSQ1004:134:C0D8DACXX:3:2105:4713:108525 -HSQ1004:134:C0D8DACXX:1:2204:2644:133871 -HS25_09827:2:2102:7280:75706#49 -HSQ1004:134:C0D8DACXX:1:2107:6099:101827 -HSQ1004:134:C0D8DACXX:2:1201:9712:103000 -HSQ1004:134:C0D8DACXX:3:2102:15633:58040 -HSQ1004:134:C0D8DACXX:3:2202:20045:139051 -HS25_09827:2:2212:14557:21550#49 -HS25_09827:2:1313:13927:95756#49 -HS25_09827:2:2106:11230:28913#49 -HS25_09827:2:2105:9655:25125#49 -HSQ1004:134:C0D8DACXX:2:2304:2693:118250 -HSQ1004:134:C0D8DACXX:4:2202:5144:142021 -HS25_09827:2:2216:4224:66349#49 -HS25_09827:2:2201:14855:88148#49 -HS25_09827:2:2203:18934:7306#49 -HS25_09827:2:2312:11277:59915#49 -HSQ1004:134:C0D8DACXX:2:2108:18292:151910 -HSQ1004:134:C0D8DACXX:1:1306:7227:175170 -HS25_09827:2:2315:9767:65919#49 -HS25_09827:2:2213:12218:11317#49 -HSQ1004:134:C0D8DACXX:1:1105:1706:5632 -HSQ1004:134:C0D8DACXX:4:1203:14898:87378 -HSQ1004:134:C0D8DACXX:3:2207:2228:93733 -HSQ1004:134:C0D8DACXX:2:1108:14782:166733 -HS25_09827:2:1303:17705:10192#49 -HS25_09827:2:2103:14605:39534#49 -HSQ1004:134:C0D8DACXX:2:1202:14200:138790 -HS25_09827:2:2104:13672:87219#49 -HSQ1004:134:C0D8DACXX:1:1101:8871:176202 -HSQ1004:134:C0D8DACXX:1:2107:18893:63156 -HS25_09827:2:2202:14911:95879#49 -HSQ1004:134:C0D8DACXX:3:1201:3301:74538 -HS25_09827:2:2107:19020:5189#49 -HSQ1004:134:C0D8DACXX:4:2104:14551:58569 -HSQ1004:134:C0D8DACXX:1:1202:13897:100295 -HSQ1004:134:C0D8DACXX:4:2106:4454:15533 -HS25_09827:2:1303:18855:9523#49 -HS25_09827:2:2308:4447:29406#49 -HSQ1004:134:C0D8DACXX:4:2106:15540:50304 -HSQ1004:134:C0D8DACXX:4:2104:11199:47217 -HSQ1004:134:C0D8DACXX:3:2302:9582:94740 -HS25_09827:2:2311:9835:23908#49 -HSQ1004:134:C0D8DACXX:3:1304:14417:127794 -HS25_09827:2:1207:6118:48222#49 -HS25_09827:2:2308:16736:10373#49 -HS25_09827:2:2209:11014:79928#49 -HSQ1004:134:C0D8DACXX:3:2204:14554:133668 -HSQ1004:134:C0D8DACXX:4:1301:19057:39210 -HSQ1004:134:C0D8DACXX:4:1208:5345:53479 -HS25_09827:2:1308:15753:99629#49 -HS25_09827:2:1308:12875:75579#49 -HS25_09827:2:2303:12441:72302#49 -HSQ1004:134:C0D8DACXX:4:1101:15730:70455 -HS25_09827:2:1312:5885:66269#49 -HSQ1004:134:C0D8DACXX:2:2102:6008:76618 -HSQ1004:134:C0D8DACXX:2:2307:1833:89188 -HS25_09827:2:2309:16231:99761#49 -HSQ1004:134:C0D8DACXX:1:2304:2001:149871 -HS25_09827:2:2110:11128:59678#49 -HS25_09827:2:2208:16372:83299#49 -HSQ1004:134:C0D8DACXX:3:1304:8394:149100 -HSQ1004:134:C0D8DACXX:4:2307:13331:22421 -HS25_09827:2:2102:15004:80414#49 -HSQ1004:134:C0D8DACXX:2:2108:11228:109904 -HSQ1004:134:C0D8DACXX:2:1105:14014:96905 -HSQ1004:134:C0D8DACXX:3:2106:21112:31023 -HSQ1004:134:C0D8DACXX:1:1201:10296:157967 -HS25_09827:2:2206:8753:72165#49 -HS25_09827:2:2209:11196:26164#49 -HSQ1004:134:C0D8DACXX:3:2105:12216:56342 -HS25_09827:2:2302:18103:27677#49 -HS25_09827:2:2115:20939:49654#49 -HSQ1004:134:C0D8DACXX:3:1202:7145:76763 -HS25_09827:2:2309:19492:75219#49 -HSQ1004:134:C0D8DACXX:2:1205:6617:28196 -HS25_09827:2:1314:17839:64752#49 -HS25_09827:2:2205:4838:94176#49 -HS25_09827:2:2314:15437:48831#49 -HSQ1004:134:C0D8DACXX:2:2301:8964:21572 -HS25_09827:2:1307:2853:31373#49 -HS25_09827:2:1307:8260:22550#49 -HSQ1004:134:C0D8DACXX:1:1102:13519:57080 -HS25_09827:2:1308:8092:70155#49 -HS25_09827:2:2302:9799:87408#49 -HSQ1004:134:C0D8DACXX:1:1307:18243:173404 -HS25_09827:2:2101:10712:13800#49 -HSQ1004:134:C0D8DACXX:2:1108:17338:70723 -HSQ1004:134:C0D8DACXX:3:1201:18350:49832 -HS25_09827:2:2213:19474:82842#49 -HS25_09827:2:2216:18088:19792#49 -HSQ1004:134:C0D8DACXX:3:1203:6331:184692 -HS25_09827:2:2208:13886:69618#49 -HSQ1004:134:C0D8DACXX:1:2306:19134:177227 -HS25_09827:2:2212:7097:90029#49 -HS25_09827:2:2311:12335:4457#49 -HS25_09827:2:2310:9116:26038#49 -HS25_09827:2:1213:8145:87592#49 -HS25_09827:2:1311:4203:24503#49 -HSQ1004:134:C0D8DACXX:2:2105:3622:158027 -HSQ1004:134:C0D8DACXX:1:1102:12016:38718 -HS25_09827:2:2102:7353:53864#49 -HS25_09827:2:1308:8090:31756#49 -HS25_09827:2:2114:13762:63038#49 -HS25_09827:2:1212:19226:61460#49 -HSQ1004:134:C0D8DACXX:1:2308:3055:68730 -HS25_09827:2:1314:16125:35530#49 -HSQ1004:134:C0D8DACXX:3:1102:9602:32853 -HS25_09827:2:1209:10326:43481#49 -HS25_09827:2:1310:4509:86306#49 -HS25_09827:2:2112:19590:28761#49 -HS25_09827:2:2111:9429:55861#49 -HSQ1004:134:C0D8DACXX:2:2202:8942:62691 -HS25_09827:2:2108:9164:67893#49 -HSQ1004:134:C0D8DACXX:2:1203:9648:191452 -HSQ1004:134:C0D8DACXX:1:2201:6427:2350 -HS25_09827:2:2316:16178:92812#49 -HS25_09827:2:1313:15137:59496#49 -HSQ1004:134:C0D8DACXX:2:2104:15845:58062 -HS25_09827:2:2204:8915:68371#49 -HSQ1004:134:C0D8DACXX:3:2307:18821:69754 -HS25_09827:2:2210:8139:41634#49 -HSQ1004:134:C0D8DACXX:3:2105:20699:89654 -HSQ1004:134:C0D8DACXX:1:1108:20997:39004 -HS25_09827:2:1211:1544:17007#49 -HS25_09827:2:1301:10741:23825#49 -HSQ1004:134:C0D8DACXX:2:2207:13509:126184 -HS25_09827:2:2101:11863:80235#49 -HSQ1004:134:C0D8DACXX:3:2302:9811:163963 -HS25_09827:2:1315:4477:39894#49 -HS25_09827:2:2304:18942:59462#49 -HSQ1004:134:C0D8DACXX:4:2306:12564:169176 -HS25_09827:2:2204:14983:91478#49 -HSQ1004:134:C0D8DACXX:1:2206:7763:108706 -HS25_09827:2:2205:5706:72006#49 -HS25_09827:2:1310:5370:82467#49 -HSQ1004:134:C0D8DACXX:4:1308:19722:68961 -HS25_09827:2:1204:8317:13130#49 -HSQ1004:134:C0D8DACXX:2:1304:14100:72047 -HS25_09827:2:1304:16205:88928#49 -HS25_09827:2:2315:17769:72003#49 -HS25_09827:2:2311:10632:59588#49 -HSQ1004:134:C0D8DACXX:2:1203:18122:60287 -HS25_09827:2:1313:12854:38781#49 -HSQ1004:134:C0D8DACXX:4:1104:17252:160265 -HS25_09827:2:1304:14698:10936#49 -HSQ1004:134:C0D8DACXX:1:1302:3010:82556 -HS25_09827:2:2214:18895:96884#49 -HS25_09827:2:2214:13866:82711#49 -HS25_09827:2:2205:18519:5056#49 -HSQ1004:134:C0D8DACXX:3:1107:2649:142382 -HSQ1004:134:C0D8DACXX:1:1306:8789:67299 -HS25_09827:2:1205:16901:23080#49 -HSQ1004:134:C0D8DACXX:3:2105:7313:125463 -HSQ1004:134:C0D8DACXX:2:1207:11229:64051 -HS25_09827:2:2116:10890:52955#49 -HSQ1004:134:C0D8DACXX:4:2203:18844:164093 -HS25_09827:2:2114:8778:32296#49 -HSQ1004:134:C0D8DACXX:4:1105:9022:190416 -HS25_09827:2:2302:19602:68133#49 -HSQ1004:134:C0D8DACXX:4:2304:16428:26359 -HSQ1004:134:C0D8DACXX:2:1306:4409:7078 -HS25_09827:2:1307:9545:75384#49 -HSQ1004:134:C0D8DACXX:4:1106:5100:156555 -HSQ1004:134:C0D8DACXX:1:1208:10204:53989 -HSQ1004:134:C0D8DACXX:1:2304:17250:29277 -HS25_09827:2:1310:6782:71822#49 -HSQ1004:134:C0D8DACXX:2:1301:8541:182615 -HSQ1004:134:C0D8DACXX:3:2101:21268:65732 -HSQ1004:134:C0D8DACXX:2:2303:21172:67178 -HS25_09827:2:2308:6506:13405#49 -HSQ1004:134:C0D8DACXX:4:2307:5807:16173 -HSQ1004:134:C0D8DACXX:1:2103:17961:10987 -HSQ1004:134:C0D8DACXX:4:1107:16548:15915 -HSQ1004:134:C0D8DACXX:3:2205:3627:19820 -HSQ1004:134:C0D8DACXX:4:2208:5238:145699 -HS25_09827:2:2203:2894:34567#49 -HSQ1004:134:C0D8DACXX:3:2208:17750:40599 -HSQ1004:134:C0D8DACXX:4:2305:12097:93393 -HS25_09827:2:1207:6118:48222#49 -HSQ1004:134:C0D8DACXX:2:2105:17419:194202 -HS25_09827:2:2214:6483:11073#49 -HSQ1004:134:C0D8DACXX:2:1103:7085:169394 -HSQ1004:134:C0D8DACXX:2:1105:16592:174914 -HS25_09827:2:2207:10121:28588#49 -HS25_09827:2:1313:3966:73396#49 -HS25_09827:2:1215:8423:43287#49 -HS25_09827:2:2209:9664:62793#49 -HS25_09827:2:1315:7387:39601#49 -HSQ1004:134:C0D8DACXX:3:2107:13872:27019 -HS25_09827:2:1213:13726:47353#49 -HSQ1004:134:C0D8DACXX:4:2303:5380:89688 -HSQ1004:134:C0D8DACXX:1:1107:13091:87366 -HSQ1004:134:C0D8DACXX:2:2205:8680:124274 -HSQ1004:134:C0D8DACXX:3:1103:19060:135411 -HSQ1004:134:C0D8DACXX:4:1103:4704:188094 -HSQ1004:134:C0D8DACXX:3:1306:1934:197320 -HSQ1004:134:C0D8DACXX:1:1202:10102:180590 -HS25_09827:2:2107:13281:76765#49 -HS25_09827:2:2310:7311:63487#49 -HSQ1004:134:C0D8DACXX:1:1201:16940:27926 -HSQ1004:134:C0D8DACXX:4:1304:10587:121097 -HSQ1004:134:C0D8DACXX:3:1205:14869:155988 -HSQ1004:134:C0D8DACXX:1:2202:8283:188476 -HS25_09827:2:1206:8293:87732#49 -HS25_09827:2:2302:3520:14656#49 -HSQ1004:134:C0D8DACXX:4:2108:10011:66146 -HSQ1004:134:C0D8DACXX:1:2208:10149:182975 -HS25_09827:2:2308:4928:6873#49 -HS25_09827:2:2213:4224:68651#49 -HSQ1004:134:C0D8DACXX:4:1206:9921:10962 -HS25_09827:2:1209:1697:62171#49 -HS25_09827:2:1202:21134:57725#49 -HS25_09827:2:2312:7923:22594#49 -HS25_09827:2:1312:7479:22485#49 -HSQ1004:134:C0D8DACXX:1:2306:15404:83465 -HS25_09827:2:2201:11828:27929#49 -HSQ1004:134:C0D8DACXX:3:1205:6342:9309 -HSQ1004:134:C0D8DACXX:3:1302:19833:48145 -HSQ1004:134:C0D8DACXX:2:1103:6629:189572 -HSQ1004:134:C0D8DACXX:3:2208:16768:198506 -HSQ1004:134:C0D8DACXX:2:2308:15205:67350 -HS25_09827:2:2106:12832:48961#49 -HS25_09827:2:1309:8609:38667#49 -HSQ1004:134:C0D8DACXX:3:1303:10044:47920 -HS25_09827:2:1302:5758:18584#49 -HSQ1004:134:C0D8DACXX:3:1203:12501:16926 -HS25_09827:2:2210:20099:48213#49 -HS25_09827:2:1311:17872:35521#49 -HS25_09827:2:2110:13843:97992#49 -HSQ1004:134:C0D8DACXX:2:1301:18674:194182 -HSQ1004:134:C0D8DACXX:3:2204:3233:131354 -HSQ1004:134:C0D8DACXX:4:2208:21311:81593 -HSQ1004:134:C0D8DACXX:4:2302:16030:150088 -HSQ1004:134:C0D8DACXX:3:1307:18344:57489 -HSQ1004:134:C0D8DACXX:4:2104:4683:200563 -HSQ1004:134:C0D8DACXX:4:2106:3811:27326 -HSQ1004:134:C0D8DACXX:3:1204:2360:66680 -HSQ1004:134:C0D8DACXX:2:2204:15517:106520 -HS25_09827:2:1304:19187:86745#49 -HS25_09827:2:2311:18060:25873#49 -HSQ1004:134:C0D8DACXX:2:1205:6415:11419 -HS25_09827:2:1215:11520:29546#49 -HS25_09827:2:1205:1897:80675#49 -HSQ1004:134:C0D8DACXX:4:2302:2287:189347 -HS25_09827:2:1301:2260:50436#49 -HS25_09827:2:1302:12129:33861#49 -HSQ1004:134:C0D8DACXX:4:1308:12140:188602 -HS25_09827:2:1211:13072:62890#49 -HSQ1004:134:C0D8DACXX:4:2108:19971:99014 -HS25_09827:2:2212:14589:85593#49 -HS25_09827:2:2302:16285:54420#49 -HS25_09827:2:2305:8653:24821#49 -HSQ1004:134:C0D8DACXX:2:2301:8461:152232 -HS25_09827:2:1207:15556:64377#49 -HSQ1004:134:C0D8DACXX:4:2202:5095:86802 -HS25_09827:2:2311:10632:59588#49 -HS25_09827:2:2105:15894:34250#49 -HS25_09827:2:2105:6143:91349#49 -HSQ1004:134:C0D8DACXX:1:1105:14963:39531 -HSQ1004:134:C0D8DACXX:3:2104:21183:174057 -HS25_09827:2:2314:6399:35081#49 -HS25_09827:2:2205:17738:22024#49 -HSQ1004:134:C0D8DACXX:4:2301:4732:116594 -HSQ1004:134:C0D8DACXX:3:2302:16797:111049 -HS25_09827:2:2106:15769:75986#49 -HSQ1004:134:C0D8DACXX:4:2108:5245:194148 -HS25_09827:2:1210:16588:54743#49 -HS25_09827:2:1313:17062:10816#49 -HSQ1004:134:C0D8DACXX:3:1107:10995:14609 -HS25_09827:2:2110:18412:68102#49 -HSQ1004:134:C0D8DACXX:3:1305:8974:144302 -HS25_09827:2:2206:9676:87312#49 -HSQ1004:134:C0D8DACXX:3:2108:7264:87189 -HSQ1004:134:C0D8DACXX:1:2308:14953:185577 -HS25_09827:2:1307:10983:68690#49 -HS25_09827:2:2213:16372:12191#49 -HSQ1004:134:C0D8DACXX:3:1202:19378:12683 -HSQ1004:134:C0D8DACXX:2:1307:6077:125721 -HSQ1004:134:C0D8DACXX:3:2208:2657:191901 -HSQ1004:134:C0D8DACXX:4:2202:1223:41515 -HSQ1004:134:C0D8DACXX:3:1208:13231:197564 -HSQ1004:134:C0D8DACXX:4:2107:20833:197593 -HSQ1004:134:C0D8DACXX:3:1307:16154:178618 -HSQ1004:134:C0D8DACXX:3:1202:7343:185129 -HS25_09827:2:2210:4340:45231#49 -HSQ1004:134:C0D8DACXX:3:2207:20928:110814 -HS25_09827:2:2205:3125:35534#49 -HSQ1004:134:C0D8DACXX:2:1208:8845:39283 -HS25_09827:2:1206:8986:69341#49 -HS25_09827:2:2309:16231:99761#49 -HSQ1004:134:C0D8DACXX:3:1101:21279:170591 -HSQ1004:134:C0D8DACXX:3:1201:20177:157397 -HSQ1004:134:C0D8DACXX:2:2208:6409:100324 -HSQ1004:134:C0D8DACXX:3:2206:20023:52129 -HS25_09827:2:1209:14849:81010#49 -HSQ1004:134:C0D8DACXX:2:1101:14850:157968 -HSQ1004:134:C0D8DACXX:3:1107:20297:193486 -HSQ1004:134:C0D8DACXX:3:1204:14956:161782 -HSQ1004:134:C0D8DACXX:3:1203:15745:28593 -HS25_09827:2:1215:14077:68964#49 -HS25_09827:2:2311:1929:40187#49 -HS25_09827:2:2314:3983:98569#49 -HSQ1004:134:C0D8DACXX:4:1105:18444:3235 -HSQ1004:134:C0D8DACXX:1:2105:8820:32150 -HSQ1004:134:C0D8DACXX:2:1302:6563:28511 -HS25_09827:2:2310:3757:55510#49 -HSQ1004:134:C0D8DACXX:3:2105:3526:191352 -HS25_09827:2:2309:6915:76693#49 -HSQ1004:134:C0D8DACXX:4:2101:14144:148883 -HSQ1004:134:C0D8DACXX:1:2202:9001:88176 -HS25_09827:2:2305:2094:13218#49 -HSQ1004:134:C0D8DACXX:1:2202:8120:83355 -HS25_09827:2:1211:8279:92047#49 -HS25_09827:2:1201:18318:79203#49 -HS25_09827:2:2110:12076:57880#49 -HSQ1004:134:C0D8DACXX:1:2201:20826:154463 -HS25_09827:2:2109:12117:14362#49 -HS25_09827:2:2201:3329:25396#49 -HS25_09827:2:1201:2148:70858#49 -HS25_09827:2:2107:4850:30482#49 -HSQ1004:134:C0D8DACXX:3:2108:15322:140350 -HSQ1004:134:C0D8DACXX:1:1108:4299:75963 -HS25_09827:2:1314:5898:59595#49 -HS25_09827:2:2103:14554:33422#49 -HS25_09827:2:2113:14757:16903#49 -HS25_09827:2:2212:19751:78414#49 -HSQ1004:134:C0D8DACXX:1:1102:5086:129795 -HS25_09827:2:2106:7918:70923#49 -HSQ1004:134:C0D8DACXX:1:2207:19310:4292 -HSQ1004:134:C0D8DACXX:1:1306:20413:135275 -HS25_09827:2:2113:4557:85104#49 -HS25_09827:2:1206:5222:57853#49 -HSQ1004:134:C0D8DACXX:1:1202:12329:185808 -HS25_09827:2:2302:3415:60930#49 -HSQ1004:134:C0D8DACXX:4:1304:13182:4081 -HSQ1004:134:C0D8DACXX:1:2207:17679:117659 -HSQ1004:134:C0D8DACXX:4:1308:19559:21052 -HS25_09827:2:1205:8989:84210#49 -HS25_09827:2:2307:4349:99332#49 -HS25_09827:2:1314:3373:80867#49 -HSQ1004:134:C0D8DACXX:2:2305:18501:116225 -HSQ1004:134:C0D8DACXX:3:2108:6062:59759 -HS25_09827:2:2304:4896:4536#49 -HSQ1004:134:C0D8DACXX:4:1202:17918:154929 -HSQ1004:134:C0D8DACXX:2:1303:21340:48481 -HSQ1004:134:C0D8DACXX:1:2201:17588:194062 -HS25_09827:2:2103:21206:86612#49 -HSQ1004:134:C0D8DACXX:3:1101:8035:185486 -HSQ1004:134:C0D8DACXX:1:2306:5714:81564 -HSQ1004:134:C0D8DACXX:2:2307:15648:133517 -HS25_09827:2:2207:8429:90092#49 -HS25_09827:2:1302:5778:20557#49 -HS25_09827:2:1312:6656:42039#49 -HS25_09827:2:1213:4555:35897#49 -HSQ1004:134:C0D8DACXX:2:1107:1820:135580 -HS25_09827:2:2202:15322:18368#49 -HSQ1004:134:C0D8DACXX:4:2308:13477:104831 -HS25_09827:2:2214:14090:46321#49 -HS25_09827:2:1308:8708:21905#49 -HSQ1004:134:C0D8DACXX:4:1305:6519:60182 -HS25_09827:2:2211:2224:13755#49 -HSQ1004:134:C0D8DACXX:4:2101:9088:22057 -HSQ1004:134:C0D8DACXX:3:2208:11012:113968 -HS25_09827:2:1204:11517:18995#49 -HS25_09827:2:2203:12145:99989#49 -HS25_09827:2:2110:7397:75254#49 -HS25_09827:2:2110:8422:62886#49 -HSQ1004:134:C0D8DACXX:1:2104:6619:52207 -HS25_09827:2:2213:7144:21041#49 -HS25_09827:2:2309:11800:65397#49 -HSQ1004:134:C0D8DACXX:4:1201:4859:36308 -HSQ1004:134:C0D8DACXX:2:2201:2956:73512 -HS25_09827:2:2208:20544:50571#49 -HSQ1004:134:C0D8DACXX:1:2205:13851:191632 -HS25_09827:2:2301:17928:65392#49 -HSQ1004:134:C0D8DACXX:2:2208:14782:96485 -HSQ1004:134:C0D8DACXX:2:2307:17236:37724 -HS25_09827:2:1209:5381:20629#49 -HSQ1004:134:C0D8DACXX:3:1303:12924:129078 -HSQ1004:134:C0D8DACXX:4:2304:18362:121940 -HS25_09827:2:1301:3826:87974#49 -HS25_09827:2:2206:2271:9299#49 -HS25_09827:2:2107:5250:91569#49 -HS25_09827:2:2114:7698:50830#49 -HSQ1004:134:C0D8DACXX:4:2206:9566:9778 -HS25_09827:2:2206:20389:69195#49 -HSQ1004:134:C0D8DACXX:3:2307:10049:91975 -HSQ1004:134:C0D8DACXX:1:2106:12291:150348 -HS25_09827:2:1201:7829:74222#49 -HSQ1004:134:C0D8DACXX:1:2101:11290:127607 -HS25_09827:2:1202:6843:44805#49 -HSQ1004:134:C0D8DACXX:4:2204:6625:180097 -HSQ1004:134:C0D8DACXX:4:1101:3001:60735 -HS25_09827:2:2206:17790:53544#49 -HS25_09827:2:1307:8814:51823#49 -HS25_09827:2:2215:1687:93023#49 -HSQ1004:134:C0D8DACXX:3:2103:4761:17695 -HS25_09827:2:1202:4062:89088#49 -HS25_09827:2:1311:21300:67638#49 -HSQ1004:134:C0D8DACXX:3:1106:13932:41089 -HS25_09827:2:1316:13926:93669#49 -HS25_09827:2:2312:18702:75973#49 -HS25_09827:2:2210:17684:60358#49 -HSQ1004:134:C0D8DACXX:2:1303:9481:54893 -HSQ1004:134:C0D8DACXX:3:1303:3996:195970 -HS25_09827:2:1304:5180:72696#49 -HSQ1004:134:C0D8DACXX:1:1204:13166:31965 -HS25_09827:2:2206:19602:66220#49 -HSQ1004:134:C0D8DACXX:1:2205:4086:19810 -HSQ1004:134:C0D8DACXX:1:1202:20553:91665 -HSQ1004:134:C0D8DACXX:1:2105:21148:113053 -HS25_09827:2:2102:18230:89095#49 -HS25_09827:2:1307:9204:54276#49 -HS25_09827:2:2113:4272:3191#49 -HSQ1004:134:C0D8DACXX:2:1301:14557:143187 -HS25_09827:2:2101:18181:81395#49 -HS25_09827:2:2109:13027:34285#49 -HSQ1004:134:C0D8DACXX:2:1104:10880:11830 -HSQ1004:134:C0D8DACXX:4:1305:14882:69187 -HSQ1004:134:C0D8DACXX:1:1302:8463:16026 -HSQ1004:134:C0D8DACXX:4:1202:10856:124721 -HSQ1004:134:C0D8DACXX:4:2202:18719:108636 -HSQ1004:134:C0D8DACXX:3:2302:12361:120782 -HS25_09827:2:2210:20360:75764#49 -HS25_09827:2:2307:11545:61283#49 -HSQ1004:134:C0D8DACXX:2:1306:13779:74086 -HSQ1004:134:C0D8DACXX:1:1101:1931:191269 -HS25_09827:2:2305:12969:58890#49 -HS25_09827:2:2209:13044:37709#49 -HS25_09827:2:2204:8712:57171#49 -HSQ1004:134:C0D8DACXX:2:1301:10382:94663 -HSQ1004:134:C0D8DACXX:3:2105:13566:61259 -HS25_09827:2:1302:4043:11351#49 -HS25_09827:2:1202:8928:68929#49 -HSQ1004:134:C0D8DACXX:3:2306:18208:175188 -HSQ1004:134:C0D8DACXX:2:2106:20833:79202 -HS25_09827:2:1211:18538:93215#49 -HSQ1004:134:C0D8DACXX:4:2305:3205:41038 -HSQ1004:134:C0D8DACXX:2:2206:3418:42677 -HS25_09827:2:1306:17683:20831#49 -HS25_09827:2:2106:6669:33787#49 -HS25_09827:2:2308:3684:48133#49 -HS25_09827:2:2105:7239:83002#49 -HSQ1004:134:C0D8DACXX:4:2205:9701:192616 -HS25_09827:2:2212:14923:69432#49 -HS25_09827:2:2107:11738:57902#49 -HS25_09827:2:2113:2345:52945#49 -HSQ1004:134:C0D8DACXX:2:2204:13962:5717 -HSQ1004:134:C0D8DACXX:3:1203:9770:174273 -HS25_09827:2:2204:1931:53605#49 -HSQ1004:134:C0D8DACXX:4:1103:7623:61249 -HS25_09827:2:1315:6882:79007#49 -HSQ1004:134:C0D8DACXX:3:2102:6282:14694 -HSQ1004:134:C0D8DACXX:3:1304:4211:51255 -HSQ1004:134:C0D8DACXX:3:1201:13818:57744 -HS25_09827:2:2107:3225:8659#49 -HS25_09827:2:2301:1589:48956#49 -HSQ1004:134:C0D8DACXX:4:2207:7624:128125 -HSQ1004:134:C0D8DACXX:4:1108:6180:152940 -HS25_09827:2:1203:11016:98816#49 -HSQ1004:134:C0D8DACXX:2:2205:11388:72341 -HSQ1004:134:C0D8DACXX:4:2207:16341:35524 -HS25_09827:2:2214:3430:16347#49 -HSQ1004:134:C0D8DACXX:3:1203:4800:161709 -HSQ1004:134:C0D8DACXX:1:1305:1928:59908 -HSQ1004:134:C0D8DACXX:3:1208:5869:87491 -HS25_09827:2:1204:12978:39960#49 -HS25_09827:2:2212:4201:28730#49 -HSQ1004:134:C0D8DACXX:4:2206:10783:48860 -HSQ1004:134:C0D8DACXX:4:1207:5194:181814 -HSQ1004:134:C0D8DACXX:2:1305:20945:176486 -HSQ1004:134:C0D8DACXX:1:1106:5570:58878 -HSQ1004:134:C0D8DACXX:2:2102:2603:115246 -HSQ1004:134:C0D8DACXX:1:1308:18892:148212 -HSQ1004:134:C0D8DACXX:1:1101:5823:8386 -HS25_09827:2:1203:12024:20353#49 -HS25_09827:2:1308:14698:49274#49 -HS25_09827:2:2209:6669:13024#49 -HSQ1004:134:C0D8DACXX:1:1103:11609:69526 -HSQ1004:134:C0D8DACXX:3:2301:20769:188457 -HSQ1004:134:C0D8DACXX:3:1105:1602:187453 -HSQ1004:134:C0D8DACXX:2:2207:20183:111327 -HSQ1004:134:C0D8DACXX:2:1101:18901:27864 -HS25_09827:2:1209:5469:83482#49 -HS25_09827:2:2108:2878:14515#49 -HSQ1004:134:C0D8DACXX:4:2305:15555:89612 -HSQ1004:134:C0D8DACXX:3:2102:4391:59408 -HSQ1004:134:C0D8DACXX:2:2105:17379:77503 -HS25_09827:2:2102:6546:79425#49 -HSQ1004:134:C0D8DACXX:1:2305:8167:118734 -HS25_09827:2:1307:16209:81271#49 -HS25_09827:2:1305:4420:57841#49 -HS25_09827:2:2305:15656:22761#49 -HS25_09827:2:1306:7169:17411#49 -HSQ1004:134:C0D8DACXX:3:1208:10379:129099 -HSQ1004:134:C0D8DACXX:2:2307:12369:69301 -HS25_09827:2:2107:21292:62365#49 -HSQ1004:134:C0D8DACXX:3:1305:2011:11327 -HSQ1004:134:C0D8DACXX:3:2101:15095:63123 -HS25_09827:2:1203:18421:82212#49 -HS25_09827:2:2107:16804:45730#49 -HS25_09827:2:1210:13829:26970#49 -HS25_09827:2:1312:14347:92538#49 -HSQ1004:134:C0D8DACXX:3:2107:11169:150089 -HS25_09827:2:2305:18257:46394#49 -HSQ1004:134:C0D8DACXX:4:1307:7664:61129 -HSQ1004:134:C0D8DACXX:4:1306:13310:39361 -HS25_09827:2:1302:3764:78536#49 -HSQ1004:134:C0D8DACXX:4:1101:11829:181246 -HS25_09827:2:2109:15051:33608#49 -HS25_09827:2:1305:3314:78955#49 -HSQ1004:134:C0D8DACXX:3:2307:17659:169838 -HSQ1004:134:C0D8DACXX:4:2302:6828:26256 -HSQ1004:134:C0D8DACXX:3:2101:4416:28360 -HSQ1004:134:C0D8DACXX:1:2305:14884:68167 -HS25_09827:2:1316:19854:2749#49 -HSQ1004:134:C0D8DACXX:4:2207:15633:9696 -HS25_09827:2:1315:10691:14865#49 -HSQ1004:134:C0D8DACXX:3:1308:5869:164744 -HSQ1004:134:C0D8DACXX:2:1305:14866:181148 -HSQ1004:134:C0D8DACXX:4:1308:18148:125275 -HS25_09827:2:1315:19239:8001#49 -HSQ1004:134:C0D8DACXX:3:2302:7714:149459 -HS25_09827:2:2115:14332:81935#49 -HS25_09827:2:2216:14689:41558#49 -HSQ1004:134:C0D8DACXX:2:1305:11977:29782 -HS25_09827:2:2106:15911:10082#49 -HSQ1004:134:C0D8DACXX:1:1301:1333:143941 -HS25_09827:2:1316:4650:92257#49 -HS25_09827:2:1216:20057:37111#49 -HSQ1004:134:C0D8DACXX:3:1205:7120:170065 -HSQ1004:134:C0D8DACXX:2:1301:10135:56408 -HSQ1004:134:C0D8DACXX:3:1107:10145:164462 -HSQ1004:134:C0D8DACXX:4:1104:8042:5823 -HSQ1004:134:C0D8DACXX:2:2201:20123:151112 -HSQ1004:134:C0D8DACXX:3:1302:20246:136134 -HS25_09827:2:1301:11977:56324#49 -HSQ1004:134:C0D8DACXX:2:1201:5899:120111 -HSQ1004:134:C0D8DACXX:4:1301:7232:126436 -HS25_09827:2:2111:19129:82840#49 -HSQ1004:134:C0D8DACXX:3:2101:5897:143794 -HS25_09827:2:2208:3209:97372#49 -HSQ1004:134:C0D8DACXX:3:1308:13586:50868 -HS25_09827:2:2210:4128:33394#49 -HSQ1004:134:C0D8DACXX:1:1108:21133:129095 -HSQ1004:134:C0D8DACXX:3:1108:3330:152252 -HSQ1004:134:C0D8DACXX:2:1107:1606:189693 -HSQ1004:134:C0D8DACXX:4:1305:3474:71019 -HSQ1004:134:C0D8DACXX:1:2104:1640:86827 -HS25_09827:2:2306:5165:66049#49 -HSQ1004:134:C0D8DACXX:4:1103:18400:77759 -HS25_09827:2:1308:2549:38006#49 -HSQ1004:134:C0D8DACXX:1:2306:18311:92804 -HS25_09827:2:2112:8497:58468#49 -HSQ1004:134:C0D8DACXX:3:2105:9699:24080 -HSQ1004:134:C0D8DACXX:3:2105:15409:79992 -HS25_09827:2:1203:17815:53694#49 -HS25_09827:2:1305:4431:100431#49 -HS25_09827:2:2114:5264:41389#49 -HSQ1004:134:C0D8DACXX:3:2302:18641:126873 -HS25_09827:2:2310:11280:63634#49 -HS25_09827:2:1314:15073:52383#49 -HS25_09827:2:1311:15327:11811#49 -HSQ1004:134:C0D8DACXX:3:2102:1792:53465 -HS25_09827:2:2209:13655:46761#49 -HSQ1004:134:C0D8DACXX:2:1106:12636:199549 -HS25_09827:2:2316:3362:57863#49 -HS25_09827:2:1208:16435:82266#49 -HS25_09827:2:1311:10185:38164#49 -HSQ1004:134:C0D8DACXX:3:1102:18351:182488 -HS25_09827:2:2315:17764:69202#49 -HS25_09827:2:1306:10949:65509#49 -HS25_09827:2:1311:2822:21650#49 -HSQ1004:134:C0D8DACXX:2:1207:17032:99082 -HSQ1004:134:C0D8DACXX:4:1108:13467:5225 -HSQ1004:134:C0D8DACXX:4:2302:11840:109111 -HS25_09827:2:1308:8080:35509#49 -HSQ1004:134:C0D8DACXX:2:2205:7059:65208 -HS25_09827:2:1305:10308:2653#49 -HS25_09827:2:2316:16654:87769#49 -HSQ1004:134:C0D8DACXX:2:2103:9816:168136 -HSQ1004:134:C0D8DACXX:4:2107:10732:29122 -HSQ1004:134:C0D8DACXX:4:1301:17617:50615 -HSQ1004:134:C0D8DACXX:2:1207:4790:14013 -HS25_09827:2:2213:14127:54704#49 -HS25_09827:2:1302:8951:85221#49 -HSQ1004:134:C0D8DACXX:4:1103:18062:7437 -HS25_09827:2:1311:10021:94720#49 -HSQ1004:134:C0D8DACXX:1:1202:18172:3456 -HS25_09827:2:1202:17543:30120#49 -HS25_09827:2:1301:14667:95011#49 -HS25_09827:2:2216:18482:39971#49 -HS25_09827:2:1201:7552:96956#49 -HSQ1004:134:C0D8DACXX:1:1104:3874:86238 -HS25_09827:2:2107:4165:3284#49 -HS25_09827:2:1314:14113:96444#49 -HSQ1004:134:C0D8DACXX:4:2203:20004:126566 -HS25_09827:2:1208:20477:70361#49 -HS25_09827:2:1304:9646:28632#49 -HSQ1004:134:C0D8DACXX:3:1102:15490:155101 -HSQ1004:134:C0D8DACXX:3:2307:5189:14151 -HS25_09827:2:1214:17868:88805#49 -HS25_09827:2:1204:17218:22554#49 -HS25_09827:2:1213:6753:57744#49 -HSQ1004:134:C0D8DACXX:4:2301:15542:156279 -HS25_09827:2:2115:10620:49851#49 -HSQ1004:134:C0D8DACXX:2:2305:16032:123018 -HSQ1004:134:C0D8DACXX:4:1304:5657:70745 -HSQ1004:134:C0D8DACXX:3:1107:3521:57523 -HS25_09827:2:1307:6415:83772#49 -HSQ1004:134:C0D8DACXX:2:2105:20041:39076 -HSQ1004:134:C0D8DACXX:4:1303:14771:73710 -HSQ1004:134:C0D8DACXX:4:1203:2828:199307 -HS25_09827:2:2103:5353:15579#49 -HS25_09827:2:1316:4695:99651#49 -HS25_09827:2:2216:17424:10006#49 -HS25_09827:2:1201:4903:35139#49 -HSQ1004:134:C0D8DACXX:3:1308:18488:176764 -HSQ1004:134:C0D8DACXX:1:1301:6024:129168 -HSQ1004:134:C0D8DACXX:3:2304:12488:153374 -HS25_09827:2:1307:4562:86732#49 -HS25_09827:2:2215:10153:33095#49 -HSQ1004:134:C0D8DACXX:3:2104:10911:124985 -HSQ1004:134:C0D8DACXX:2:1303:9586:76907 -HS25_09827:2:2210:15485:77280#49 -HSQ1004:134:C0D8DACXX:3:1107:9193:52015 -HSQ1004:134:C0D8DACXX:3:2104:3402:169679 -HS25_09827:2:2103:3167:54060#49 -HS25_09827:2:1311:12723:14289#49 -HS25_09827:2:2303:11846:87193#49 -HS25_09827:2:2302:17943:47483#49 -HSQ1004:134:C0D8DACXX:2:1107:8273:106533 -HSQ1004:134:C0D8DACXX:2:1205:20631:123773 -HS25_09827:2:2111:11364:91626#49 -HSQ1004:134:C0D8DACXX:3:2302:4420:190571 -HS25_09827:2:2304:12079:44745#49 -HS25_09827:2:2315:14814:67712#49 -HSQ1004:134:C0D8DACXX:1:2301:19657:73379 -HS25_09827:2:1312:15067:85485#49 -HSQ1004:134:C0D8DACXX:3:1203:18476:24796 -HSQ1004:134:C0D8DACXX:1:2206:9154:176074 -HS25_09827:2:2313:15537:56284#49 -HSQ1004:134:C0D8DACXX:2:2205:1832:189946 -HSQ1004:134:C0D8DACXX:4:1303:9062:94982 -HSQ1004:134:C0D8DACXX:2:2108:17928:185627 -HSQ1004:134:C0D8DACXX:3:2103:11644:42495 -HSQ1004:134:C0D8DACXX:4:1306:6680:165070 -HS25_09827:2:2108:2991:8377#49 -HSQ1004:134:C0D8DACXX:4:1304:20922:163712 -HSQ1004:134:C0D8DACXX:1:1303:13164:20243 -HSQ1004:134:C0D8DACXX:4:2301:10017:36811 -HSQ1004:134:C0D8DACXX:1:2205:12092:100970 -HS25_09827:2:2315:11618:93827#49 -HS25_09827:2:2206:19602:66220#49 -HS25_09827:2:1201:16888:97184#49 -HS25_09827:2:1308:17426:24233#49 -HS25_09827:2:1209:9659:24523#49 -HSQ1004:134:C0D8DACXX:1:2304:19337:46745 -HS25_09827:2:1309:20437:42964#49 -HS25_09827:2:1313:18528:56122#49 -HSQ1004:134:C0D8DACXX:3:2104:12390:28011 -HSQ1004:134:C0D8DACXX:4:1308:14586:35107 -HSQ1004:134:C0D8DACXX:2:1107:8093:152693 -HS25_09827:2:2303:11846:52551#49 -HS25_09827:2:1210:11984:21243#49 -HS25_09827:2:2110:7934:80781#49 -HSQ1004:134:C0D8DACXX:3:1307:19170:62441 -HS25_09827:2:2215:2190:29721#49 -HS25_09827:2:2307:19691:2548#49 -HSQ1004:134:C0D8DACXX:4:2306:2601:148226 -HS25_09827:2:1302:13512:58758#49 -HSQ1004:134:C0D8DACXX:3:1304:6691:156646 -HSQ1004:134:C0D8DACXX:2:2204:12959:114270 -HSQ1004:134:C0D8DACXX:2:2301:9791:160269 -HSQ1004:134:C0D8DACXX:2:1104:12223:92191 -HS25_09827:2:1215:14381:88552#49 -HS25_09827:2:2313:3963:65818#49 -HSQ1004:134:C0D8DACXX:1:1201:9460:190805 -HSQ1004:134:C0D8DACXX:3:2102:5435:193271 -HS25_09827:2:1310:12071:10727#49 -HS25_09827:2:2215:6539:40931#49 -HS25_09827:2:2209:9664:62793#49 -HSQ1004:134:C0D8DACXX:4:2202:10129:45815 -HS25_09827:2:2308:10100:36378#49 -HS25_09827:2:2209:14906:20246#49 -HS25_09827:2:1201:9798:98718#49 -HSQ1004:134:C0D8DACXX:2:2207:8588:88593 -HS25_09827:2:1306:2115:46700#49 -HSQ1004:134:C0D8DACXX:2:1107:9917:192410 -HSQ1004:134:C0D8DACXX:1:1105:4486:95379 -HS25_09827:2:2310:5810:3570#49 -HS25_09827:2:1203:11016:98816#49 -HSQ1004:134:C0D8DACXX:1:2306:3845:55220 -HSQ1004:134:C0D8DACXX:2:1307:16157:159452 -HSQ1004:134:C0D8DACXX:1:2101:10622:69174 -HSQ1004:134:C0D8DACXX:4:1305:9532:123845 -HS25_09827:2:2314:14488:97620#49 -HSQ1004:134:C0D8DACXX:3:1203:12538:142428 -HS25_09827:2:1313:11154:53028#49 -HSQ1004:134:C0D8DACXX:4:2305:19674:101478 -HSQ1004:134:C0D8DACXX:2:2106:5415:157394 -HSQ1004:134:C0D8DACXX:1:2306:19614:119474 -HS25_09827:2:1309:8720:95816#49 -HSQ1004:134:C0D8DACXX:3:1101:13827:96472 -HSQ1004:134:C0D8DACXX:2:1306:10400:189523 -HS25_09827:2:1202:12577:92588#49 -HS25_09827:2:2208:8423:98594#49 -HSQ1004:134:C0D8DACXX:1:1204:14682:164416 -HSQ1004:134:C0D8DACXX:1:2205:8463:28306 -HS25_09827:2:2305:9633:49844#49 -HSQ1004:134:C0D8DACXX:1:1202:19161:144822 -HSQ1004:134:C0D8DACXX:4:1301:17617:50615 -HS25_09827:2:1210:18499:87359#49 -HS25_09827:2:1307:17354:89291#49 -HS25_09827:2:1205:9397:57189#49 -HSQ1004:134:C0D8DACXX:1:1204:8923:173273 -HSQ1004:134:C0D8DACXX:4:1303:20291:105739 -HS25_09827:2:2203:17189:40901#49 -HSQ1004:134:C0D8DACXX:1:1207:1929:7264 -HSQ1004:134:C0D8DACXX:2:1206:15091:96886 -HSQ1004:134:C0D8DACXX:4:2307:17929:60928 -HS25_09827:2:2113:18257:15139#49 -HSQ1004:134:C0D8DACXX:1:1205:12523:76640 -HSQ1004:134:C0D8DACXX:1:1206:17736:156741 -HS25_09827:2:1202:7653:3054#49 -HS25_09827:2:2211:12219:39151#49 -HS25_09827:2:1305:4741:70042#49 -HSQ1004:134:C0D8DACXX:1:2205:17643:77943 -HSQ1004:134:C0D8DACXX:3:1107:12294:191040 -HS25_09827:2:2209:8102:68451#49 -HS25_09827:2:1210:18499:87359#49 -HSQ1004:134:C0D8DACXX:4:1304:9328:162708 -HSQ1004:134:C0D8DACXX:1:2305:15075:68236 -HSQ1004:134:C0D8DACXX:4:2103:6760:16363 -HSQ1004:134:C0D8DACXX:3:1104:1206:121562 -HS25_09827:2:2112:10916:83289#49 -HSQ1004:134:C0D8DACXX:4:1105:11426:34838 -HSQ1004:134:C0D8DACXX:3:2206:10970:181043 -HSQ1004:134:C0D8DACXX:2:2303:4750:5758 -HS25_09827:2:1313:4004:54225#49 -HSQ1004:134:C0D8DACXX:1:2306:15367:77754 -HSQ1004:134:C0D8DACXX:3:2106:3829:88745 -HSQ1004:134:C0D8DACXX:2:2102:7252:158339 -HS25_09827:2:1211:5533:53761#49 -HSQ1004:134:C0D8DACXX:4:2208:7062:2770 -HS25_09827:2:2309:6642:13240#49 -HSQ1004:134:C0D8DACXX:3:1101:12487:105694 -HSQ1004:134:C0D8DACXX:3:2101:20100:140424 -HSQ1004:134:C0D8DACXX:2:2202:12070:3960 -HSQ1004:134:C0D8DACXX:3:2202:6408:9262 -HSQ1004:134:C0D8DACXX:3:2301:1289:70617 -HS25_09827:2:2110:5824:88074#49 -HS25_09827:2:2102:10018:33863#49 -HSQ1004:134:C0D8DACXX:4:2107:19177:54899 -HS25_09827:2:2107:11642:99657#49 -HSQ1004:134:C0D8DACXX:3:1304:20704:174105 -HS25_09827:2:2101:11389:9006#49 -HS25_09827:2:2309:17895:46679#49 -HS25_09827:2:1301:12324:86298#49 -HS25_09827:2:1306:6867:41707#49 -HS25_09827:2:2307:4544:75596#49 -HS25_09827:2:2101:19680:8831#49 -HSQ1004:134:C0D8DACXX:3:2201:8024:28586 -HSQ1004:134:C0D8DACXX:3:1202:5297:50617 -HS25_09827:2:1202:11798:14659#49 -HS25_09827:2:2105:5535:96769#49 -HSQ1004:134:C0D8DACXX:4:1204:8082:166693 -HSQ1004:134:C0D8DACXX:3:1108:10958:117033 -HS25_09827:2:2116:4638:41818#49 -HS25_09827:2:2216:3575:24333#49 -HS25_09827:2:2202:16316:31657#49 -HS25_09827:2:1308:2947:38308#49 -HSQ1004:134:C0D8DACXX:3:2106:8945:36081 -HSQ1004:134:C0D8DACXX:4:2107:18959:196381 -HS25_09827:2:1211:17330:49667#49 -HS25_09827:2:1205:16489:9062#49 -HS25_09827:2:1304:14787:49381#49 -HSQ1004:134:C0D8DACXX:4:2304:15708:152479 -HSQ1004:134:C0D8DACXX:3:2105:17516:37649 -HSQ1004:134:C0D8DACXX:3:1307:8980:161197 -HS25_09827:2:2302:10064:59651#49 -HSQ1004:134:C0D8DACXX:1:1306:9176:125383 -HS25_09827:2:2203:4541:40634#49 -HSQ1004:134:C0D8DACXX:4:1307:14802:153947 -HSQ1004:134:C0D8DACXX:2:1306:10400:189523 -HS25_09827:2:1208:17955:15896#49 -HS25_09827:2:1205:15020:79646#49 -HS25_09827:2:1306:14394:79750#49 -HS25_09827:2:2212:9720:100086#49 -HSQ1004:134:C0D8DACXX:4:1302:14770:67740 -HSQ1004:134:C0D8DACXX:3:2306:11660:26621 -HS25_09827:2:1303:12540:12346#49 -HS25_09827:2:2314:12471:77609#49 -HSQ1004:134:C0D8DACXX:3:2304:10742:122908 -HS25_09827:2:2305:14468:80204#49 -HS25_09827:2:2203:18564:71623#49 -HS25_09827:2:1316:8679:9587#49 -HS25_09827:2:2114:3429:49463#49 -HSQ1004:134:C0D8DACXX:3:1203:11333:6269 -HS25_09827:2:1314:4090:36151#49 -HSQ1004:134:C0D8DACXX:4:2302:2711:140490 -HSQ1004:134:C0D8DACXX:1:2204:1864:28788 -HSQ1004:134:C0D8DACXX:2:1203:5413:65044 -HS25_09827:2:2113:13093:32943#49 -HSQ1004:134:C0D8DACXX:1:1308:6081:79237 -HS25_09827:2:2310:19636:49650#49 -HSQ1004:134:C0D8DACXX:2:1106:20950:152878 -HSQ1004:134:C0D8DACXX:4:2306:20769:131876 -HSQ1004:134:C0D8DACXX:3:1302:10269:143134 -HSQ1004:134:C0D8DACXX:2:2302:20819:106709 -HS25_09827:2:2304:11317:51659#49 -HSQ1004:134:C0D8DACXX:1:2104:7247:13977 -HS25_09827:2:1304:21083:23824#49 -HS25_09827:2:1309:11272:83928#49 -HSQ1004:134:C0D8DACXX:1:2208:1165:34981 -HS25_09827:2:2112:6672:15226#49 -HS25_09827:2:2107:14711:100646#49 -HSQ1004:134:C0D8DACXX:3:1108:11417:22364 -HS25_09827:2:2314:4420:65822#49 -HSQ1004:134:C0D8DACXX:2:2308:17143:7180 -HSQ1004:134:C0D8DACXX:1:1104:21395:103091 -HSQ1004:134:C0D8DACXX:3:1106:4879:148518 -HS25_09827:2:2211:15225:87288#49 -HS25_09827:2:2206:18254:55092#49 -HSQ1004:134:C0D8DACXX:3:1207:9363:112757 -HS25_09827:2:1303:17705:10192#49 -HSQ1004:134:C0D8DACXX:3:1106:13018:70473 -HSQ1004:134:C0D8DACXX:1:2304:12009:67625 -HSQ1004:134:C0D8DACXX:1:2206:8848:127008 -HSQ1004:134:C0D8DACXX:1:1203:7723:11670 -HSQ1004:134:C0D8DACXX:3:2106:18789:87324 -HSQ1004:134:C0D8DACXX:3:2108:11146:108450 -HS25_09827:2:1315:21291:84059#49 -HS25_09827:2:1213:10676:80143#49 -HSQ1004:134:C0D8DACXX:1:2301:18539:96080 -HS25_09827:2:2101:16364:15831#49 -HS25_09827:2:2110:8222:80324#49 -HSQ1004:134:C0D8DACXX:2:1103:1539:45063 -HS25_09827:2:1310:13684:70686#49 -HS25_09827:2:1203:1865:53233#49 -HSQ1004:134:C0D8DACXX:3:1206:8830:8330 -HS25_09827:2:1202:1776:92264#49 -HSQ1004:134:C0D8DACXX:4:1301:6107:173013 -HS25_09827:2:2214:2720:79425#49 -HS25_09827:2:1314:2535:17090#49 -HSQ1004:134:C0D8DACXX:4:1205:12265:91976 -HSQ1004:134:C0D8DACXX:4:2101:16906:160674 -HS25_09827:2:2207:12827:65694#49 -HSQ1004:134:C0D8DACXX:1:2305:14845:42093 -HS25_09827:2:2302:1528:16794#49 -HS25_09827:2:1213:5785:43892#49 -HSQ1004:134:C0D8DACXX:2:2203:5443:112937 -HS25_09827:2:1214:17469:22254#49 -HSQ1004:134:C0D8DACXX:2:1204:6173:170233 -HS25_09827:2:1210:5673:83270#49 -HS25_09827:2:1303:20758:12363#49 -HS25_09827:2:1308:18691:20988#49 -HSQ1004:134:C0D8DACXX:4:2102:6380:149764 -HS25_09827:2:2115:5891:36879#49 -HS25_09827:2:1305:15643:61163#49 -HSQ1004:134:C0D8DACXX:1:2205:17463:126513 -HSQ1004:134:C0D8DACXX:2:2103:3086:174563 -HSQ1004:134:C0D8DACXX:2:2307:8305:127429 -HSQ1004:134:C0D8DACXX:3:1302:3276:175449 -HSQ1004:134:C0D8DACXX:3:1308:13981:31805 -HSQ1004:134:C0D8DACXX:3:1206:14036:13433 -HS25_09827:2:2104:6058:9631#49 -HSQ1004:134:C0D8DACXX:3:2308:4584:40707 -HSQ1004:134:C0D8DACXX:3:1101:12349:104534 -HS25_09827:2:1206:13874:59516#49 -HS25_09827:2:2315:20178:6711#49 -HS25_09827:2:2305:19946:34556#49 -HSQ1004:134:C0D8DACXX:2:1208:9049:173476 -HS25_09827:2:2116:18279:18948#49 -HS25_09827:2:1203:16396:75027#49 -HSQ1004:134:C0D8DACXX:2:1203:20988:150855 -HS25_09827:2:1201:20471:32121#49 -HSQ1004:134:C0D8DACXX:2:2108:5433:163143 -HS25_09827:2:2106:15769:75986#49 -HS25_09827:2:2209:4461:40815#49 -HSQ1004:134:C0D8DACXX:2:2108:8104:183010 -HSQ1004:134:C0D8DACXX:2:1305:15659:48609 -HS25_09827:2:2309:4235:78654#49 -HSQ1004:134:C0D8DACXX:1:1108:6941:64361 -HS25_09827:2:2314:5936:86287#49 -HSQ1004:134:C0D8DACXX:2:1103:21258:167307 -HSQ1004:134:C0D8DACXX:2:2308:15599:42394 -HSQ1004:134:C0D8DACXX:3:2103:7646:134487 -HSQ1004:134:C0D8DACXX:1:1201:4025:36908 -HSQ1004:134:C0D8DACXX:3:1305:15845:96850 -HS25_09827:2:2313:5945:21114#49 -HS25_09827:2:2213:2204:24442#49 -HS25_09827:2:2211:16023:34627#49 -HSQ1004:134:C0D8DACXX:4:1303:18566:129452 -HSQ1004:134:C0D8DACXX:1:1306:1270:97146 -HSQ1004:134:C0D8DACXX:2:2106:11656:155231 -HSQ1004:134:C0D8DACXX:1:1305:15917:67032 -HS25_09827:2:1203:13914:20963#49 -HSQ1004:134:C0D8DACXX:3:2204:21087:184981 -HSQ1004:134:C0D8DACXX:2:1205:8979:77363 -HS25_09827:2:2101:8065:10454#49 -HSQ1004:134:C0D8DACXX:1:1108:12962:192074 -HSQ1004:134:C0D8DACXX:2:1104:14396:169176 -HSQ1004:134:C0D8DACXX:4:1105:7148:175361 -HS25_09827:2:2106:15202:9898#49 -HS25_09827:2:2103:12501:38913#49 -HSQ1004:134:C0D8DACXX:4:1304:10287:29518 -HSQ1004:134:C0D8DACXX:4:1206:18609:90312 -HSQ1004:134:C0D8DACXX:3:2205:10247:168123 -HSQ1004:134:C0D8DACXX:4:2305:11340:182382 -HSQ1004:134:C0D8DACXX:2:1107:6702:113824 -HSQ1004:134:C0D8DACXX:3:2101:5340:94081 -HSQ1004:134:C0D8DACXX:3:1101:20965:108753 -HS25_09827:2:1302:8726:74176#49 -HS25_09827:2:1206:12700:31503#49 -HS25_09827:2:1312:11207:34147#49 -HS25_09827:2:1210:13018:27047#49 -HSQ1004:134:C0D8DACXX:1:1202:9050:194686 -HSQ1004:134:C0D8DACXX:2:2203:16641:138739 -HSQ1004:134:C0D8DACXX:4:2201:3750:34428 -HS25_09827:2:1208:5338:23746#49 -HS25_09827:2:1303:15888:60814#49 -HSQ1004:134:C0D8DACXX:1:2101:10783:199089 -HS25_09827:2:1313:11612:44384#49 -HSQ1004:134:C0D8DACXX:3:2108:13794:127337 -HSQ1004:134:C0D8DACXX:2:2301:16868:145497 -HSQ1004:134:C0D8DACXX:3:1206:16723:180601 -HSQ1004:134:C0D8DACXX:3:1101:6120:64861 -HSQ1004:134:C0D8DACXX:1:1306:9219:31192 -HS25_09827:2:1305:12276:23377#49 -HS25_09827:2:2303:11013:90229#49 -HS25_09827:2:2307:17021:73777#49 -HS25_09827:2:2110:4045:50375#49 -HS25_09827:2:2207:17036:93328#49 -HSQ1004:134:C0D8DACXX:1:2307:17200:120246 -HSQ1004:134:C0D8DACXX:4:1205:8637:86825 -HSQ1004:134:C0D8DACXX:2:2205:13787:99115 -HS25_09827:2:2305:2550:70411#49 -HSQ1004:134:C0D8DACXX:2:1206:10534:66442 -HS25_09827:2:2204:18952:64845#49 -HS25_09827:2:1207:18338:40764#49 -HS25_09827:2:2104:16309:25528#49 diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.1 b/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.1 deleted file mode 100644 index c7f846a..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.11 b/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.11 deleted file mode 100644 index cafca3b..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.11 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.13 b/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.13 deleted file mode 100644 index 7f4ec67..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.13 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.15 b/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.15 deleted file mode 100644 index e147148..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.15 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.17 b/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.17 deleted file mode 100644 index 859bc10..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.17 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.19 b/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.19 deleted file mode 100644 index 6f4faaa..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.19 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.3 b/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.3 deleted file mode 100644 index 0a9e3eb..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.3 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.5 b/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.5 deleted file mode 100644 index 35939d9..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.5 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.7 b/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.7 deleted file mode 100644 index fc81996..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.7 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.9 b/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.9 deleted file mode 100644 index fc81996..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/01.names.9 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.1 b/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.1 deleted file mode 100644 index b3f2dcb..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.11 b/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.11 deleted file mode 100644 index d973c17..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.11 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.13 b/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.13 deleted file mode 100644 index 0cf7b13..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.13 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.15 b/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.15 deleted file mode 100644 index 2432925..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.15 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.17 b/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.17 deleted file mode 100644 index d9bfe5b..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.17 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.19 b/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.19 deleted file mode 100644 index d9bfe5b..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.19 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.3 b/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.3 deleted file mode 100644 index 480d170..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.3 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.5 b/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.5 deleted file mode 100644 index 21f7d1d..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.5 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.7 b/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.7 deleted file mode 100644 index 04e6016..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.7 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.9 b/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.9 deleted file mode 100644 index d7e14f7..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/02.names.9 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.1 b/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.1 deleted file mode 100644 index 64de82b..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.11 b/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.11 deleted file mode 100644 index cb25273..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.11 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.13 b/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.13 deleted file mode 100644 index 1bb2e86..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.13 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.15 b/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.15 deleted file mode 100644 index 0941248..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.15 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.17 b/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.17 deleted file mode 100644 index cc5ad9d..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.17 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.19 b/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.19 deleted file mode 100644 index 85e1d99..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.19 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.3 b/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.3 deleted file mode 100644 index ba4ec98..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.3 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.5 b/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.5 deleted file mode 100644 index 007681b..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.5 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.7 b/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.7 deleted file mode 100644 index 2234ab4..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.7 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.9 b/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.9 deleted file mode 100644 index 2234ab4..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/03.names.9 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.1 b/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.1 deleted file mode 100644 index 5581322..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.11 b/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.11 deleted file mode 100644 index d53d34a..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.11 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.13 b/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.13 deleted file mode 100644 index 815798d..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.13 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.15 b/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.15 deleted file mode 100644 index 9e7a85b..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.15 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.17 b/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.17 deleted file mode 100644 index 86885da..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.17 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.19 b/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.19 deleted file mode 100644 index 86885da..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.19 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.3 b/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.3 deleted file mode 100644 index 7479ec4..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.3 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.5 b/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.5 deleted file mode 100644 index 855802f..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.5 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.7 b/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.7 deleted file mode 100644 index 4c61e91..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.7 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.9 b/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.9 deleted file mode 100644 index 4c61e91..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/05.names.9 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.1 b/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.1 deleted file mode 100644 index f311edb..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.11 b/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.11 deleted file mode 100644 index 6701cd9..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.11 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.13 b/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.13 deleted file mode 100644 index 221bfeb..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.13 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.15 b/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.15 deleted file mode 100644 index e12138d..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.15 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.17 b/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.17 deleted file mode 100644 index 4d27529..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.17 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.19 b/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.19 deleted file mode 100644 index 4d27529..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.19 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.3 b/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.3 deleted file mode 100644 index 2bab8f4..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.3 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.5 b/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.5 deleted file mode 100644 index 06a148f..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.5 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.7 b/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.7 deleted file mode 100644 index a957b38..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.7 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.9 b/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.9 deleted file mode 100644 index a957b38..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/08.names.9 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.1 b/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.1 deleted file mode 100644 index 26c88b8..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.11 b/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.11 deleted file mode 100644 index 09d4fb1..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.11 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.13 b/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.13 deleted file mode 100644 index 32b17e0..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.13 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.15 b/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.15 deleted file mode 100644 index bc904a2..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.15 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.17 b/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.17 deleted file mode 100644 index 3bc9a2a..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.17 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.19 b/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.19 deleted file mode 100644 index 3bc9a2a..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.19 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.3 b/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.3 deleted file mode 100644 index 2292620..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.3 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.5 b/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.5 deleted file mode 100644 index eaf12c4..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.5 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.7 b/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.7 deleted file mode 100644 index c12b599..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.7 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.9 b/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.9 deleted file mode 100644 index c12b599..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/09.names.9 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.1 b/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.1 deleted file mode 100644 index e791dbb..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.11 b/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.11 deleted file mode 100644 index 7609c65..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.11 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.13 b/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.13 deleted file mode 100644 index f4b8466..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.13 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.15 b/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.15 deleted file mode 100644 index 6be1dac..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.15 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.17 b/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.17 deleted file mode 100644 index 6fa657b..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.17 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.19 b/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.19 deleted file mode 100644 index 6fa657b..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.19 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.3 b/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.3 deleted file mode 100644 index 2ea721f..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.3 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.5 b/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.5 deleted file mode 100644 index 7b93e7f..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.5 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.7 b/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.7 deleted file mode 100644 index 6d308cb..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.7 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.9 b/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.9 deleted file mode 100644 index ae93cc6..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/10.names.9 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.1 b/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.1 deleted file mode 100644 index e4614cc..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.11 b/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.11 deleted file mode 100644 index 15694dc..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.11 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.13 b/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.13 deleted file mode 100644 index 07aba45..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.13 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.15 b/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.15 deleted file mode 100644 index a76213d..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.15 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.17 b/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.17 deleted file mode 100644 index e3ef5e7..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.17 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.19 b/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.19 deleted file mode 100644 index e3ef5e7..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.19 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.3 b/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.3 deleted file mode 100644 index f050dce..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.3 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.5 b/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.5 deleted file mode 100644 index bab2c82..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.5 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.7 b/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.7 deleted file mode 100644 index b6988c4..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.7 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.9 b/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.9 deleted file mode 100644 index b6988c4..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/20.names.9 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.1 b/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.1 deleted file mode 100644 index 825d5e8..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.11 b/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.11 deleted file mode 100644 index f1fd758..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.11 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.13 b/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.13 deleted file mode 100644 index 3327cea..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.13 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.15 b/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.15 deleted file mode 100644 index 529a6f6..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.15 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.17 b/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.17 deleted file mode 100644 index e09e40d..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.17 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.19 b/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.19 deleted file mode 100644 index e09e40d..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.19 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.3 b/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.3 deleted file mode 100644 index c676691..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.3 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.5 b/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.5 deleted file mode 100644 index 0d5b78b..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.5 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.7 b/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.7 deleted file mode 100644 index 60197d6..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.7 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.9 b/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.9 deleted file mode 100644 index 60197d6..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/nv.names.9 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.1 b/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.1 deleted file mode 100644 index be61040..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.11 b/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.11 deleted file mode 100644 index 3087b99..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.11 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.13 b/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.13 deleted file mode 100644 index b7211ca..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.13 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.15 b/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.15 deleted file mode 100644 index 2ab18c1..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.15 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.17 b/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.17 deleted file mode 100644 index 0ba48d4..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.17 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.19 b/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.19 deleted file mode 100644 index 0ba48d4..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.19 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.3 b/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.3 deleted file mode 100644 index 03b147f..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.3 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.5 b/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.5 deleted file mode 100644 index d607d49..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.5 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.7 b/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.7 deleted file mode 100644 index e3fb555..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.7 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.9 b/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.9 deleted file mode 100644 index e3fb555..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/nv2.names.9 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.1 b/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.1 deleted file mode 100644 index 0237e32..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.1 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.11 b/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.11 deleted file mode 100644 index 36839f7..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.11 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.13 b/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.13 deleted file mode 100644 index 1edf26f..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.13 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.15 b/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.15 deleted file mode 100644 index 8a44eb1..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.15 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.17 b/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.17 deleted file mode 100644 index 52f2252..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.17 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.19 b/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.19 deleted file mode 100644 index 52f2252..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.19 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.3 b/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.3 deleted file mode 100644 index 1b28cd6..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.3 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.5 b/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.5 deleted file mode 100644 index cb11322..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.5 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.7 b/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.7 deleted file mode 100644 index c2d282f..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.7 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.9 b/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.9 deleted file mode 100644 index c2d282f..0000000 Binary files a/src/htslib-1.18/htscodecs/tests/names/tok3/rr.names.9 and /dev/null differ diff --git a/src/htslib-1.18/htscodecs/tests/rANS_static4x16pr_fuzz.c b/src/htslib-1.18/htscodecs/tests/rANS_static4x16pr_fuzz.c deleted file mode 100644 index 4bc4e99..0000000 --- a/src/htslib-1.18/htscodecs/tests/rANS_static4x16pr_fuzz.c +++ /dev/null @@ -1,139 +0,0 @@ -/* Fuzz testing target. */ -/* - * Copyright (c) 2019,2020 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#include "config.h" - -/* -For best results, configure, from a build subdir, to use the address and -undefined behaviour sanitizers, and run "make fuzz". -E.g.: - -../configure CFLAGS='-g -gdwarf-2 -O3 -Wall -fsanitize=address,undefined' CPPFLAGS='-DUBSAN' -make fuzz - -Run with: - export ASAN_OPTIONS=allow_addr2line=1 - export UBSAN_OPTION=halt_on_error=1 - tests/rANS_static4x16pr_fuzz corpus -or - tests/rANS_static4x16pr_fuzz -detect_leaks=0 corpus - -I generated corpus as a whole bunch of precompressed tiny inputs from -tests/dat/q4 for different compression modes. - -For debugging purposes, we can compile a non-fuzzer non-ASAN build using --DNOFUZZ which creates a binary we can debug on any libfuzzer generated -output using valgrind. (The rans4x16 command line test won't quite work as -it's a slightly different input format with explicit sizes in the binary -stream.) -*/ - -#include -#include -#include -#include -#include -#include -#include - -#include "htscodecs/rANS_static4x16.h" - -int LLVMFuzzerTestOneInput(uint8_t *in, size_t in_size) { - int c; - unsigned int uncomp_size = 0; - unsigned char *uncomp; - - const int cpu_dec_a[] = { - 0 -#if defined(__x86_64__) - , RANS_CPU_DEC_SSE4, RANS_CPU_DEC_AVX2, RANS_CPU_DEC_AVX512 -#endif -#if defined(__ARM_NEON) - , RANS_CPU_DEC_NEON -#endif - }; - - for (c = 0; c < sizeof(cpu_dec_a)/sizeof(*cpu_dec_a); c++) { - rans_set_cpu(cpu_dec_a[c]); - uncomp = rans_uncompress_4x16(in, in_size, &uncomp_size); - if (uncomp) - free(uncomp); - } - - return 0; -} - -#ifdef NOFUZZ -#include -#include -#include - -#define BS 1024*1024 -static unsigned char *load(char *fn, uint64_t *lenp) { - unsigned char *data = NULL; - uint64_t dsize = 0; - uint64_t dcurr = 0; - signed int len; - int fd = open(fn, O_RDONLY); - - do { - if (dsize - dcurr < BS) { - dsize = dsize ? dsize * 2 : BS; - data = realloc(data, dsize); - } - - len = read(fd, data + dcurr, BS); - if (len > 0) - dcurr += len; - } while (len > 0); - - if (len == -1) { - perror("read"); - } - - close(fd); - *lenp = dcurr; - return data; -} - -int main(int argc, char **argv) { - uint64_t in_size; - unsigned char *in = load(argv[1], &in_size); - - LLVMFuzzerTestOneInput(in, in_size); - - free(in); - - return 0; -} -#endif diff --git a/src/htslib-1.18/htscodecs/tests/rANS_static4x16pr_test.c b/src/htslib-1.18/htscodecs/tests/rANS_static4x16pr_test.c deleted file mode 100644 index fbd1b59..0000000 --- a/src/htslib-1.18/htscodecs/tests/rANS_static4x16pr_test.c +++ /dev/null @@ -1,344 +0,0 @@ -/* Tests for 4x16 rANS codec */ -/* - * Copyright (c) 2019,2020 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "htscodecs/rANS_static4x16.h" - -#ifndef BLK_SIZE -// Divisible by 4 for X4. -// Also works well for 32-way SIMD -# define BLK_SIZE 0x103810 -#endif - -unsigned char *in_buf; - -// Max 4GB -static unsigned char *load(FILE *infp, uint32_t *lenp) { - unsigned char *data = NULL; - uint32_t dsize = 0; - uint32_t dcurr = 0; - signed int len; - - do { - if (dsize - dcurr < BLK_SIZE) { - dsize = dsize ? dsize * 2 : BLK_SIZE; - data = realloc(data, dsize); - } - - len = fread(data + dcurr, 1, BLK_SIZE, infp); - if (len > 0) - dcurr += len; - } while (len > 0); - - if (len == -1) { - perror("fread"); - } - - *lenp = dcurr; - return data; -} - -int main(int argc, char **argv) { - int opt, order = 0; - int decode = 0, test = 0; - FILE *infp = stdin, *outfp = stdout; - struct timeval tv1, tv2, tv3, tv4; - size_t bytes = 0, raw = 0; - uint32_t blk_size = BLK_SIZE; - -#ifdef _WIN32 - _setmode(_fileno(stdin), _O_BINARY); - _setmode(_fileno(stdout), _O_BINARY); -#endif - - extern char *optarg; - extern int optind; - extern void force_sw32_decoder(void); - extern void rans_disable_avx512(void); - extern void rans_disable_avx2(void); - - while ((opt = getopt(argc, argv, "o:dtrc:b:")) != -1) { - switch (opt) { - case 'o': { - char *optend; - order = strtol(optarg, &optend, 0); - // 8.2 means 2-way stripe - if (*optend == '.') - order += atoi(optend+1)<<8; - break; - } - - case 'c': - rans_set_cpu(strtol(optarg, NULL, 0)); - break; - - case 'd': - decode = 1; - break; - - case 't': - test = 1; - break; - - case 'r': - raw = 1; - break; - - case 'b': - blk_size = atoi(optarg); - break; - } - } - - // Room to allow for expanded BLK_SIZE on worst case compression. - uint32_t blk_size2 = (105LL*blk_size)/100; - in_buf = malloc(blk_size2+257*257*3); - - if (optind < argc) { - if (!(infp = fopen(argv[optind], "rb"))) { - perror(argv[optind]); - return 1; - } - optind++; - } - - if (optind < argc) { - if (!(outfp = fopen(argv[optind], "wb"))) { - perror(argv[optind]); - return 1; - } - optind++; - } - - gettimeofday(&tv1, NULL); - - if (test) { - size_t len, in_sz = 0, out_sz = 0; - typedef struct { - unsigned char *blk; - uint32_t sz; - uint32_t csz; - } blocks; - blocks *b = NULL, *bc = NULL, *bu = NULL; - int nb = 0, i; - - if (raw) { - b = malloc(sizeof(*b)); - bu = malloc(sizeof(*bu)); - bc = malloc(sizeof(*bc)); - b[0].blk = load(infp, &blk_size); - - // Deliberately realloc down to in_size so we can use address - // sanitizer to check for input buffer overruns. - b[0].blk = realloc(b[0].blk, blk_size); - - b[0].sz = blk_size; - bc[0].sz = rans_compress_bound_4x16(blk_size, order); - bc[0].blk = malloc(bc[0].sz); - bu[0].sz = blk_size; - bu[0].blk = malloc(blk_size); - nb = 1; - in_sz = blk_size; - } else { - while ((len = fread(in_buf, 1, blk_size, infp)) != 0) { - // inefficient, but it'll do for testing - b = realloc(b, (nb+1)*sizeof(*b)); - bu = realloc(bu, (nb+1)*sizeof(*bu)); - bc = realloc(bc, (nb+1)*sizeof(*bc)); - b[nb].blk = malloc(len); - b[nb].sz = len; - memcpy(b[nb].blk, in_buf, len); - bc[nb].sz = rans_compress_bound_4x16(blk_size, order); - bc[nb].blk = malloc(bc[nb].sz); - bu[nb].sz = len; - bu[nb].blk = malloc(blk_size); - nb++; - in_sz += len; - } - } - fprintf(stderr, "Testing %d blocks\n", nb); - -#ifndef NTRIALS -#define NTRIALS 5 -#endif - int trials = NTRIALS; - while (trials--) { - // Warmup - for (i = 0; i < nb; i++) memset(bc[i].blk, 0, bc[i].sz); - - gettimeofday(&tv1, NULL); - - out_sz = 0; - for (i = 0; i < nb; i++) { - unsigned int csz = bc[i].sz; - bc[i].blk = rans_compress_to_4x16(b[i].blk, b[i].sz, bc[i].blk, &csz, order); - assert(csz <= bc[i].sz); - bc[i].csz = csz; - out_sz += 5 + csz; - } - - gettimeofday(&tv2, NULL); - - // Warmup - for (i = 0; i < nb; i++) memset(bu[i].blk, 0, blk_size); - - gettimeofday(&tv3, NULL); - - for (i = 0; i < nb; i++) - bu[i].blk = rans_uncompress_to_4x16(bc[i].blk, bc[i].csz, bu[i].blk, &bu[i].sz); - - gettimeofday(&tv4, NULL); - - for (i = 0; i < nb; i++) { - if (b[i].sz != bu[i].sz || memcmp(b[i].blk, bu[i].blk, b[i].sz)) { - int z; - for (z = 0; z < b[i].sz; z++) - if (b[i].blk[z] != bu[i].blk[z]) - break; - fprintf(stderr, "Mismatch in block %d, sz %d/%d, pos %d, got %d wanted %d\n", i, b[i].sz, bu[i].sz, z, b[i].blk[z], bu[i].blk[z]); - } - //free(bc[i].blk); - //free(bu[i].blk); - } - - fprintf(stderr, "%5.1f MB/s enc, %5.1f MB/s dec\t %ld bytes -> %ld bytes\n", - (double)in_sz / ((long)(tv2.tv_sec - tv1.tv_sec)*1000000 + - tv2.tv_usec - tv1.tv_usec), - (double)in_sz / ((long)(tv4.tv_sec - tv3.tv_sec)*1000000 + - tv4.tv_usec - tv3.tv_usec), - (long)in_sz, (long)out_sz); - } - - exit(0); - - } - - if (raw) { - // One naked / raw block, to match the specification - uint32_t in_size, out_size; - unsigned char *in = load(infp, &in_size), *out; - if (!in) exit(1); - - // Deliberately realloc down to in_size so we can use address - // sanitizer to check for input buffer overruns. - in = realloc(in, in_size); - - if (decode) { - if (!(out = rans_uncompress_4x16(in, in_size, &out_size))) - exit(1); - - fwrite(out, 1, out_size, outfp); - bytes = out_size; - } else { - if (!(out = rans_compress_4x16(in, in_size, &out_size, order))) - exit(1); - - fwrite(out, 1, out_size, outfp); - bytes += in_size; - } - - free(in); - free(out); - } else { - if (decode) { - // Only used in some test implementations of RC_GetFreq() - //RC_init(); - //RC_init2(); - - for (;;) { - uint32_t in_size, out_size; - unsigned char *out; - - if (4 != fread(&in_size, 1, 4, infp)) - break; - if (in_size > blk_size2) - exit(1); - - if (in_size != fread(in_buf, 1, in_size, infp)) { - fprintf(stderr, "Truncated input\n"); - exit(1); - } - out = rans_uncompress_4x16(in_buf, in_size, &out_size); - if (!out) - exit(1); - - fwrite(out, 1, out_size, outfp); - fflush(outfp); - free(out); - - bytes += out_size; - } - } else { - for (;;) { - uint32_t in_size, out_size; - unsigned char *out; - - in_size = fread(in_buf, 1, blk_size, infp); - if (in_size <= 0) - break; - - if (in_size < 4) - order &= ~1; - - out = rans_compress_4x16(in_buf, in_size, &out_size, order); - - fwrite(&out_size, 1, 4, outfp); - fwrite(out, 1, out_size, outfp); - free(out); - - bytes += in_size; - } - } - } - - gettimeofday(&tv2, NULL); - - fprintf(stderr, "Took %ld microseconds, %5.1f MB/s\n", - (long)(tv2.tv_sec - tv1.tv_sec)*1000000 + - tv2.tv_usec - tv1.tv_usec, - (double)bytes / ((long)(tv2.tv_sec - tv1.tv_sec)*1000000 + - tv2.tv_usec - tv1.tv_usec)); - - free(in_buf); - return 0; -} diff --git a/src/htslib-1.18/htscodecs/tests/rANS_static_fuzz.c b/src/htslib-1.18/htscodecs/tests/rANS_static_fuzz.c deleted file mode 100644 index 6553fe0..0000000 --- a/src/htslib-1.18/htscodecs/tests/rANS_static_fuzz.c +++ /dev/null @@ -1,125 +0,0 @@ -/* Fuzz testing target. */ -/* - * Copyright (c) 2019,2020 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#include "config.h" - -/* -For best results, configure, from a build subdir, to use the address and -undefined behaviour sanitizers, and run "make fuzz". -E.g.: - -../configure CFLAGS='-g -gdwarf-2 -O3 -Wall -fsanitize=address,undefined' CPPFLAGS='-DUBSAN' -make fuzz - -Run with: - export ASAN_OPTIONS=allow_addr2line=1 - export UBSAN_OPTION=halt_on_error=1 - tests/rANS_static_fuzz corpus -or - tests/rANS_static_fuzz -detect_leaks=0 corpus - -I generated corpus as a whole bunch of precompressed tiny inputs from -tests/dat/q4 for different compression modes. - -For debugging purposes, we can compile a non-fuzzer non-ASAN build using --DNOFUZZ which creates a binary we can debug on any libfuzzer generated -output using valgrind. (The rans4x8 command line test won't quite work as -it's a slightly different input format with explicit sizes in the binary -stream.) -*/ - -#include -#include -#include -#include -#include -#include -#include - -#include "htscodecs/rANS_static.h" - -int LLVMFuzzerTestOneInput(uint8_t *in, size_t in_size) { - unsigned int uncomp_size; - unsigned char *uncomp = rans_uncompress(in, in_size, &uncomp_size); - if (uncomp) - free(uncomp); - - return 0; -} - -#ifdef NOFUZZ -#include -#include -#include - -#define BS 1024*1024 -static unsigned char *load(char *fn, uint64_t *lenp) { - unsigned char *data = NULL; - uint64_t dsize = 0; - uint64_t dcurr = 0; - signed int len; - int fd = open(fn, O_RDONLY); - - do { - if (dsize - dcurr < BS) { - dsize = dsize ? dsize * 2 : BS; - data = realloc(data, dsize); - } - - len = read(fd, data + dcurr, BS); - if (len > 0) - dcurr += len; - } while (len > 0); - - if (len == -1) { - perror("read"); - } - - close(fd); - *lenp = dcurr; - return data; -} - -int main(int argc, char **argv) { - uint64_t in_size; - unsigned char *in = load(argv[1], &in_size); - unsigned int uncomp_size; - unsigned char *uncomp = rans_uncompress(in, in_size, &uncomp_size); - if (uncomp) - free(uncomp); - - free(in); - - return 0; -} -#endif diff --git a/src/htslib-1.18/htscodecs/tests/rANS_static_test.c b/src/htslib-1.18/htscodecs/tests/rANS_static_test.c deleted file mode 100644 index b6969d4..0000000 --- a/src/htslib-1.18/htscodecs/tests/rANS_static_test.c +++ /dev/null @@ -1,293 +0,0 @@ -/* Tests for CRAM-3.0 rANS codec */ -/* - * Copyright (c) 2019-2021 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "htscodecs/rANS_static.h" - -#ifndef BLK_SIZE -// Divisible by 4 for X4 -# define BLK_SIZE 1039*251*4 -#endif - -// Room to allow for expanded BLK_SIZE on worst case compression. -#define BLK_SIZE2 (1.05*BLK_SIZE) - -// Max 4GB -static unsigned char *load(FILE *infp, uint32_t *lenp) { - unsigned char *data = NULL; - uint32_t dsize = 0; - uint32_t dcurr = 0; - signed int len; - - do { - if (dsize - dcurr < BLK_SIZE) { - dsize = dsize ? dsize * 2 : BLK_SIZE; - data = realloc(data, dsize); - } - - len = fread(data + dcurr, 1, BLK_SIZE, infp); - if (len > 0) - dcurr += len; - } while (len > 0); - - if (len == -1) { - perror("fread"); - } - - *lenp = dcurr; - return data; -} - -/*----------------------------------------------------------------------------- - * Main. - * - * This is a simple command line tool for testing order-0 and order-1 - * compression using the rANS codec. Simply compile with - * - * gcc -DTEST_MAIN -O3 -I. cram/rANS_static.c -o cram/rANS_static - * - * Usage: cram/rANS_static -o0 < file > file.o0 - * cram/rANS_static -d < file.o0 > file2 - * - * cram/rANS_static -o1 < file > file.o1 - * cram/rANS_static -d < file.o1 > file2 - */ -int main(int argc, char **argv) { - int opt, order = 0; - unsigned char *in_buf = malloc(BLK_SIZE2+257*257*3); - int decode = 0, test = 0; - FILE *infp = stdin, *outfp = stdout; - struct timeval tv1, tv2, tv3; - size_t bytes = 0, raw = 0; - -#ifdef _WIN32 - _setmode(_fileno(stdin), _O_BINARY); - _setmode(_fileno(stdout), _O_BINARY); -#endif - - extern char *optarg; - extern int optind; - - while ((opt = getopt(argc, argv, "o:dtr")) != -1) { - switch (opt) { - case 'o': - order = atoi(optarg); - break; - - case 'd': - decode = 1; - break; - - case 't': - test = 1; - break; - - case 'r': - raw = 1; - break; - } - } - - order = order ? 1 : 0; // Only support O(0) and O(1) - - if (optind < argc) { - if (!(infp = fopen(argv[optind], "rb"))) { - perror(argv[optind]); - return 1; - } - optind++; - } - - if (optind < argc) { - if (!(outfp = fopen(argv[optind], "wb"))) { - perror(argv[optind]); - return 1; - } - optind++; - } - - gettimeofday(&tv1, NULL); - - if (test) { - size_t len, in_sz = 0, out_sz = 0; - typedef struct { - unsigned char *blk; - uint32_t sz; - } blocks; - blocks *b = NULL, *bc, *bu; - int nb = 0, i; - - while ((len = fread(in_buf, 1, BLK_SIZE, infp)) != 0) { - // inefficient, but it'll do for testing - b = realloc(b, (nb+1)*sizeof(*b)); - b[nb].blk = malloc(len); - b[nb].sz = len; - memcpy(b[nb].blk, in_buf, len); - nb++; - in_sz += len; - } - - int trials = 5; - while (trials--) { - bc = malloc(nb*sizeof(*bc)); - bu = malloc(nb*sizeof(*bu)); - - gettimeofday(&tv1, NULL); - - out_sz = 0; - for (i = 0; i < nb; i++) { - bc[i].blk = rans_compress(b[i].blk, b[i].sz, &bc[i].sz, order); - out_sz += 5 + bc[i].sz; - bc[i].blk = realloc(bc[i].blk, bc[i].sz); - } - - gettimeofday(&tv2, NULL); - - for (i = 0; i < nb; i++) { - bu[i].blk = rans_uncompress(bc[i].blk, bc[i].sz, &bu[i].sz); - } - - gettimeofday(&tv3, NULL); - - for (i = 0; i < nb; i++) { - if (b[i].sz != bu[i].sz || memcmp(b[i].blk, bu[i].blk, b[i].sz)) - fprintf(stderr, "Mismatch in block %d\n", i); - free(bc[i].blk); - free(bu[i].blk); - } - free(bc); - free(bu); - - fprintf(stderr, "%5.1f MB/s enc, %5.1f MB/s dec\t %ld bytes -> %ld bytes\n", - (double)in_sz / ((long)(tv2.tv_sec - tv1.tv_sec)*1000000 + - tv2.tv_usec - tv1.tv_usec), - (double)in_sz / ((long)(tv3.tv_sec - tv2.tv_sec)*1000000 + - tv3.tv_usec - tv2.tv_usec), - (long)in_sz, (long)out_sz); - } - - exit(0); - - } - - if (raw) { - // One naked / raw block, to match the specification - uint32_t in_size, out_size; - unsigned char *in = load(infp, &in_size), *out; - if (!in) exit(1); - - if (decode) { - if (!(out = rans_uncompress(in, in_size, &out_size))) - exit(1); - - fwrite(out, 1, out_size, outfp); - bytes = out_size; - } else { - if (!(out = rans_compress(in, in_size, &out_size, order))) - exit(1); - - fwrite(out, 1, out_size, outfp); - bytes += in_size; - } - - free(in); - free(out); - } else { - if (decode) { - // Only used in some test implementations of RC_GetFreq() - //RC_init(); - //RC_init2(); - - for (;;) { - uint32_t in_size, out_size; - unsigned char *out; - - order = fgetc(infp); - if (4 != fread(&in_size, 1, 4, infp)) - break; - if (in_size != fread(in_buf, 1, in_size, infp)) { - fprintf(stderr, "Truncated input\n"); - exit(1); - } - out = rans_uncompress(in_buf, in_size, &out_size); - if (!out) - abort(); - - fwrite(out, 1, out_size, outfp); - free(out); - - bytes += out_size; - } - } else { - for (;;) { - uint32_t in_size, out_size; - unsigned char *out; - - in_size = fread(in_buf, 1, BLK_SIZE, infp); - if (in_size <= 0) - break; - - out = rans_compress(in_buf, in_size, &out_size, - order && in_size >= 4); - - fputc(order && in_size >= 4, outfp); - fwrite(&out_size, 1, 4, outfp); - fwrite(out, 1, out_size, outfp); - free(out); - - bytes += in_size; - } - } - } - - gettimeofday(&tv2, NULL); - - fprintf(stderr, "Took %ld microseconds, %5.1f MB/s\n", - (long)(tv2.tv_sec - tv1.tv_sec)*1000000 + - tv2.tv_usec - tv1.tv_usec, - (double)bytes / ((long)(tv2.tv_sec - tv1.tv_sec)*1000000 + - tv2.tv_usec - tv1.tv_usec)); - - free(in_buf); - return 0; -} diff --git a/src/htslib-1.18/htscodecs/tests/rans4x16.test b/src/htslib-1.18/htscodecs/tests/rans4x16.test deleted file mode 100755 index bf105ff..0000000 --- a/src/htslib-1.18/htscodecs/tests/rans4x16.test +++ /dev/null @@ -1,69 +0,0 @@ -#!/bin/sh -out=test.out -if test ! -d $out -then - mkdir $out -fi - -for f in `ls -1 $srcdir/dat/q* $srcdir/htscodecs-corpus/dat/q* 2>/dev/null` -do - comp=${f%/*/*}/dat/r4x16/${f##*/} - cut -f 1 < $f | tr -d '\012' > $out/r4x16-nl - for o in 0 1 64 65 128 129 192 193 68 69 132 133 196 197 8 9 - do - if [ ! -e "$comp.$o" ] - then - continue - fi - printf 'Testing rans4x16 -r -o%s on %s\t' $o "$f" - - # Round trip - ./rans4x16pr -r -o$o $out/r4x16-nl $out/r4x16.comp 2>>$out/r4x16.stderr || exit 1 - wc -c < $out/r4x16.comp - ./rans4x16pr -r -d $out/r4x16.comp $out/r4x16.uncomp 2>>$out/r4x16.stderr || exit 1 - cmp $out/r4x16-nl $out/r4x16.uncomp || exit 1 - - # Precompressed data - ./rans4x16pr -r -d $comp.$o $out/r4x16.uncomp 2>>$out/r4x16.stderr || exit 1 - cmp $out/r4x16-nl $out/r4x16.uncomp || exit 1 - done - - # 32-way, with cross-compatibility between scalar and SIMD implementations - for o in 4 5 - do - printf 'Testing rans4x16 -r -o%s on %s\t' $o "$f" - - # Round trip - # SIMD vs SIMD (auto) - ./rans4x16pr -r -o$o $out/r4x16-nl $out/r4x16.comp 2>>$out/r4x16.stderr || exit 1 - wc -c < $out/r4x16.comp - ./rans4x16pr -r -d $out/r4x16.comp $out/r4x16.uncomp 2>>$out/r4x16.stderr || exit 1 - cmp $out/r4x16-nl $out/r4x16.uncomp || exit 1 - - # Scalar vs scalar - ./rans4x16pr -r -o$o -c 0 $out/r4x16-nl $out/r4x16.comp 2>>$out/r4x16.stderr || exit 1 - wc -c < $out/r4x16.comp - ./rans4x16pr -r -d -o$o -c 0 $out/r4x16.comp $out/r4x16.uncomp 2>>$out/r4x16.stderr || exit 1 - cmp $out/r4x16-nl $out/r4x16.uncomp || exit 1 - - # Scalar vs SIMD - ./rans4x16pr -r -o$o -c 0 $out/r4x16-nl $out/r4x16.comp 2>>$out/r4x16.stderr || exit 1 - wc -c < $out/r4x16.comp - ./rans4x16pr -r -d -o$o $out/r4x16.comp $out/r4x16.uncomp 2>>$out/r4x16.stderr || exit 1 - cmp $out/r4x16-nl $out/r4x16.uncomp || exit 1 - - # SIMD vs Scalar - ./rans4x16pr -r -o$o $out/r4x16-nl $out/r4x16.comp 2>>$out/r4x16.stderr || exit 1 - wc -c < $out/r4x16.comp - ./rans4x16pr -r -d -o$o -c 0 $out/r4x16.comp $out/r4x16.uncomp 2>>$out/r4x16.stderr || exit 1 - cmp $out/r4x16-nl $out/r4x16.uncomp || exit 1 - -# # Precompressed data - if [ ! -e "$comp.$o" ] - then - continue - fi - ./rans4x16pr -r -d $comp.$o $out/r4x16.uncomp 2>>$out/r4x16.stderr || exit 1 - cmp $out/r4x16-nl $out/r4x16.uncomp || exit 1 - done -done diff --git a/src/htslib-1.18/htscodecs/tests/rans4x8.test b/src/htslib-1.18/htscodecs/tests/rans4x8.test deleted file mode 100755 index 728d8fa..0000000 --- a/src/htslib-1.18/htscodecs/tests/rans4x8.test +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/sh -out=test.out -if test ! -d $out -then - mkdir $out -fi - -for f in `ls -1 $srcdir/dat/q* $srcdir/htscodecs-corpus/dat/q* 2>/dev/null` -do - comp=${f%/*/*}/dat/r4x8/${f##*/} - cut -f 1 < $f | tr -d '\012' > $out/r4x8-nl - for o in 0 1 - do - printf 'Testing rans4x8 -r -o%s on %s\t' $o "$f" - - # Round trip - ./rans4x8 -r -o$o $out/r4x8-nl $out/r4x8.comp 2>>$out/r4x8.stderr || exit 1 - wc -c < $out/r4x8.comp - ./rans4x8 -r -d $out/r4x8.comp $out/r4x8.uncomp 2>>$out/r4x8.stderr || exit 1 - cmp $out/r4x8-nl $out/r4x8.uncomp || exit 1 - - # Precompressed data - ./rans4x8 -r -d $comp.$o $out/r4x8.uncomp 2>>$out/r4x8.stderr || exit 1 - cmp $out/r4x8-nl $out/r4x8.uncomp || exit 1 - done -done diff --git a/src/htslib-1.18/htscodecs/tests/tok3.test b/src/htslib-1.18/htscodecs/tests/tok3.test deleted file mode 100755 index a2ad792..0000000 --- a/src/htslib-1.18/htscodecs/tests/tok3.test +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/sh -out=test.out -if test ! -d $out -then - mkdir $out -fi - -for f in `ls -1 $srcdir/names/*.names $srcdir/htscodecs-corpus/names/*.names 2>/dev/null` -do - comp=${f%/*/*}/names/tok3/${f##*/} - for lvl in 1 3 5 7 9 11 13 15 17 19 - do - printf 'Testing tokenise_name3 -r -%s on %s\t' $lvl "$f" - - # Round trip - ./tokenise_name3 -r -$lvl < $f > $out/tok3.comp - wc -c < $out/tok3.comp - ./tokenise_name3 -d -r < $out/tok3.comp | tr '\000' '\012' > $out/tok3.uncomp - cmp $f $out/tok3.uncomp || exit 1 - - # Precompressed data - ./tokenise_name3 -d -r < $comp.$lvl | tr '\000' '\012' > $out/tok3.uncomp - cmp $f $out/tok3.uncomp || exit 1 - done - echo -done diff --git a/src/htslib-1.18/htscodecs/tests/tokenise_name3_fuzz.c b/src/htslib-1.18/htscodecs/tests/tokenise_name3_fuzz.c deleted file mode 100644 index 4592ce5..0000000 --- a/src/htslib-1.18/htscodecs/tests/tokenise_name3_fuzz.c +++ /dev/null @@ -1,97 +0,0 @@ -/* Fuzz testing target. */ -/* - * Copyright (c) 2019,2020 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include - -#include "htscodecs/tokenise_name3.h" - -int LLVMFuzzerTestOneInput(uint8_t *in, size_t in_size) { - unsigned int uncomp_size; - unsigned char *uncomp = tok3_decode_names(in, in_size, &uncomp_size); - if (uncomp) - free(uncomp); - - return 0; -} - -#ifdef NOFUZZ -#include -#include -#include - -#define BS 1024*1024 -static unsigned char *load(char *fn, uint64_t *lenp) { - unsigned char *data = NULL; - uint64_t dsize = 0; - uint64_t dcurr = 0; - signed int len; - int fd = open(fn, O_RDONLY); - - do { - if (dsize - dcurr < BS) { - dsize = dsize ? dsize * 2 : BS; - data = realloc(data, dsize); - } - - len = read(fd, data + dcurr, BS); - if (len > 0) - dcurr += len; - } while (len > 0); - - if (len == -1) { - perror("read"); - } - - close(fd); - *lenp = dcurr; - return data; -} - -int main(int argc, char **argv) { - uint64_t in_size; - unsigned char *in = load(argv[1], &in_size); - - LLVMFuzzerTestOneInput(in, in_size); - - free(in); - return 0; -} -#endif diff --git a/src/htslib-1.18/htscodecs/tests/tokenise_name3_test.c b/src/htslib-1.18/htscodecs/tests/tokenise_name3_test.c deleted file mode 100644 index 182e0c2..0000000 --- a/src/htslib-1.18/htscodecs/tests/tokenise_name3_test.c +++ /dev/null @@ -1,234 +0,0 @@ -/* - * Copyright (c) 2016-2020 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "htscodecs/tokenise_name3.h" - -//----------------------------------------------------------------------------- -// main() implementation for testing - -// Large enough for whole file for now. -#ifndef BLK_SIZE -#define BLK_SIZE 1*1024*1024 -#endif -static char *blk; - -// Max 4GB -static unsigned char *load(FILE *infp, uint32_t *lenp) { - unsigned char *data = NULL; - uint32_t dsize = 0; - uint32_t dcurr = 0; - signed int len; - - do { - if (dsize - dcurr < BLK_SIZE) { - dsize = dsize ? dsize * 2 : BLK_SIZE; - data = realloc(data, dsize); - } - - len = fread(data + dcurr, 1, BLK_SIZE, infp); - if (len > 0) - dcurr += len; - } while (len > 0); - - if (len == -1) { - perror("fread"); - } - - *lenp = dcurr; - return data; -} - -static int encode(int argc, char **argv) { - FILE *fp; - int len, level = 9; - int use_arith = 0; - int raw = 0; - - while (argc > 1 && argv[1][0] == '-') { - if (strcmp(argv[1], "-r") == 0) { - raw = 1; - argc--; - argv++; - } - - else if (argv[1][1] >= '0' && argv[1][1] <= '9') { - level = atoi(argv[1]+1); - if (level > 10) { - level -= 10; - use_arith = 1; - } - argc--; - argv++; - } - - else - exit(1); - } - - if (argc > 1) { - fp = fopen(argv[1], "r"); - if (!fp) { - perror(argv[1]); - return 1; - } - } else { - fp = stdin; - } - - if (raw) { - // One naked / raw block, to match the specification - uint32_t in_len; - int out_len; - unsigned char *in = load(fp, &in_len), *out; - if (!in) exit(1); - out = tok3_encode_names((char *)in, in_len, level, use_arith, - &out_len, NULL); - if (!out || write(1, out, out_len) < out_len) exit(1); // encoded data - free(in); - free(out); - } else { - // Block based, to permit arbitrarily large files for benchmarking - int blk_offset = 0; - for (;;) { - int last_start = 0; - - len = fread(blk+blk_offset, 1, BLK_SIZE-blk_offset, fp); - if (len <= 0) - break; - len += blk_offset; - - int out_len; - uint8_t *out = tok3_encode_names(blk, len, level, use_arith, - &out_len, &last_start); - if (write(1, &out_len, 4) < 4) exit(1); - if (write(1, out, out_len) < out_len) exit(1); // encoded data - free(out); - - if (len > last_start) - memmove(blk, &blk[last_start], len - last_start); - blk_offset = len - last_start; - } - } - - if (fclose(fp) < 0) { - perror("closing file"); - return 1; - } - - return 0; -} - -static int decode(int argc, char **argv) { - uint32_t in_sz, out_sz; - int raw = 0; - - if (argc > 1 && strcmp(argv[1], "-r") == 0) { - raw = 1; - argc--; - argv++; - } - - if (raw) { - // One naked / raw block, to match the specification - uint32_t in_len; - unsigned char *in = load(stdin, &in_len), *out; - if (!in) exit(1); - - if ((out = tok3_decode_names(in, in_len, &out_sz)) == NULL) - exit(1); - if (write(1, out, out_sz) != out_sz) - exit(1); - - free(in); - free(out); - } else { - while (fread(&in_sz, 1, 4, stdin) == 4) { - uint8_t *in = malloc(in_sz), *out; - if (!in) - return -1; - - if (fread(in, 1, in_sz, stdin) != in_sz) { - free(in); - return -1; - } - - if ((out = tok3_decode_names(in, in_sz, &out_sz)) == NULL) { - free(in); - return -1; - } - - if (write(1, out, out_sz) < out_sz) exit(1); - - free(in); - free(out); - } - } - - return 0; -} - -int main(int argc, char **argv) { -#ifdef _WIN32 - _setmode(_fileno(stdin), _O_BINARY); - _setmode(_fileno(stdout), _O_BINARY); -#endif - - // temporary fix for decoder, which needs more space - blk = malloc(BLK_SIZE*2); - - int ret; - - if (argc > 1 && strcmp(argv[1], "-d") == 0) - ret = decode(argc-1, argv+1); - else - ret = encode(argc, argv); - - free(blk); - return ret; -} diff --git a/src/htslib-1.18/htscodecs/tests/varint_test.c b/src/htslib-1.18/htscodecs/tests/varint_test.c deleted file mode 100644 index 9623adf..0000000 --- a/src/htslib-1.18/htscodecs/tests/varint_test.c +++ /dev/null @@ -1,287 +0,0 @@ -/* Variable-length encoding tests */ -/* - * Copyright (c) 2020 Genome Research Ltd. - * Author(s): Rob Davies - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#include "config.h" - -#include -#include -#include -#include -#include -#include - -#include "htscodecs/varint.h" - -typedef struct unsigned_test { - uint64_t val; - int len; - uint8_t encoding[12]; -} unsigned_test; - -typedef struct signed_test { - int64_t val; - int len; - uint8_t encoding[12]; -} signed_test; - -void dump_encoding(size_t sz, const uint8_t *buffer) { - size_t byte; - for (byte = 0; byte < sz; byte++) { - printf("%s0x%02x", byte ? " " : "", buffer[byte]); - } -} - -void dump_unsigned(int bits, int is_enc, - uint64_t val, int sz, const uint8_t *buffer) { - printf("%d-bit ", bits); - if (is_enc) printf("0x%0*"PRIx64" => ", bits / 4, val); - dump_encoding(sz, buffer); - if (!is_enc) printf(" => 0x%0*"PRIx64, bits / 4, val); - printf("\n"); -} - -void dump_signed(int bits, int is_enc, - int64_t val, int sz, const uint8_t *buffer) { - printf("%d-bit ", bits); - if (is_enc) printf("%"PRId64" => ", val); - dump_encoding(sz, buffer); - if (!is_enc) printf(" => %"PRId64, val); - printf("\n"); -} - -int check_put_unsigned(int bits, const unsigned_test *t, - int len, const uint8_t buffer[16], int verbose) { - if (len != t->len || memcmp(t->encoding, buffer, len) != 0) { - printf("var_put_u%d failed:\nExpected ", bits); - dump_unsigned(bits, 1, t->val, t->len, t->encoding); - printf("Got "); - dump_unsigned(bits, 1, t->val, len, buffer); - return 1; - } - if (verbose) { - dump_unsigned(bits, 1, t->val, len, buffer); - } - return 0; -} - -int check_get_unsigned(int bits, const unsigned_test *t, - int len, uint64_t val) { - if (val == t->val && len == t->len) - return 0; - - printf("var_get_u%d failed:\nExpected ", bits); - dump_unsigned(bits, 0, t->val, t->len, t->encoding); - printf("Got "); - dump_unsigned(bits, 0, val, len, t->encoding); - return 1; -} - -int check_put_signed(int bits, const signed_test *t, - int len, const uint8_t buffer[16], int verbose) { - if (len != t->len || memcmp(t->encoding, buffer, len) != 0) { - printf("var_put_s%d failed:\nExpected ", bits); - dump_signed(bits, 1, t->val, t->len, t->encoding); - printf("Got "); - dump_signed(bits, 1, t->val, len, buffer); - return 1; - } - if (verbose) { - dump_signed(bits, 1, t->val, len, buffer); - } - return 0; -} - -int check_get_signed(int bits, const signed_test *t, - int len, int64_t val) { - if (val == t->val && len == t->len) - return 0; - - printf("var_get_s%d failed:\nExpected ", bits); - dump_signed(bits, 0, t->val, t->len, t->encoding); - printf("Got "); - dump_signed(bits, 0, val, len, t->encoding); - return 1; -} - -#define NELE(X) (sizeof(X)/sizeof(X[0])) - -int test_unsigned(int verbose) { - uint8_t buffer[16] = { 0 }; - uint8_t *endp = buffer + sizeof(buffer); - uint32_t v32; - uint64_t v64; - size_t i; - int len; - int res = 0; - unsigned_test tests32[] = { - { 0U, 1, { 0x00 } }, - { 1U, 1, { 0x01 } }, - { 0x7fU, 1, { 0x7f } }, - { 0x80U, 2, { 0x81, 0x00 } }, - { 0x1234U, 2, { 0xa4, 0x34 } }, - { 0x123456U, 3, { 0xc8, 0xe8, 0x56 } }, - { 0x12345678U, 5, { 0x81, 0x91, 0xd1, 0xac, 0x78 } }, - { 0x80000000U, 5, { 0x88, 0x80, 0x80, 0x80, 0x00 } }, - { 0xffffffffU, 5, { 0x8f, 0xff, 0xff, 0xff, 0x7f } } - }; - unsigned_test tests64[] = { - { 0x100000000ULL, 5, { 0x90, 0x80, 0x80, 0x80, 0x00 } }, - { 0x123456789abcULL, 7, { 0x84, 0xc6, 0xc5, 0xb3, 0xe2, 0xb5, 0x3c} }, - { 0x123456789abcdef0ULL, 9, - { 0x92, 0x9a, 0x95, 0xcf, 0x89, 0xd5, 0xf3, 0xbd, 0x70 } }, - { 0x8000000000000000ULL, 10, - { 0x81, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00 } }, - { 0xffffffffffffffffULL, 10, - { 0x81, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f } } - }; - - for (i = 0; i < NELE(tests32); i++) { - memset(buffer, 0x55, sizeof(buffer)); - len = var_put_u32(buffer, endp, (uint32_t) tests32[i].val); - res |= check_put_unsigned(32, &tests32[i], len, buffer, verbose); - memset(buffer, 0x55, sizeof(buffer)); - len = var_put_u64(buffer, endp, tests32[i].val); - res |= check_put_unsigned(64, &tests32[i], len, buffer, verbose); - len = var_get_u32(tests32[i].encoding, - tests32[i].encoding + tests32[i].len, - &v32); - res |= check_get_unsigned(32, &tests32[i], len, v32); - len = var_get_u64(tests32[i].encoding, - tests32[i].encoding + tests32[i].len, - &v64); - res |= check_get_unsigned(64, &tests32[i], len, v64); - } - - for (i = 0; i < NELE(tests64); i++) { - memset(buffer, 0x55, sizeof(buffer)); - len = var_put_u64(buffer, endp, tests64[i].val); - res |= check_put_unsigned(64, &tests64[i], len, buffer, verbose); - len = var_get_u64(tests64[i].encoding, - tests64[i].encoding + tests64[i].len, - &v64); - res |= check_get_unsigned(64, &tests64[i], len, v64); - } - - return res; -} - -int test_signed(int verbose) { - uint8_t buffer[16] = { 0 }; - uint8_t *endp = buffer + sizeof(buffer); - int32_t v32; - int64_t v64; - size_t i; - int len; - int res = 0; - signed_test tests32[] = { - { 0, 1, { 0x00 }, }, - { -1, 1, { 0x01 }, }, - { 1, 1, { 0x02 }, }, - { -63, 1, { 0x7d }, }, - { 63, 1, { 0x7e }, }, - { -64, 1, { 0x7f, } }, - { 64, 2, { 0x81, 0x00 } }, - { -65, 2, { 0x81, 0x01 } }, - { 65, 2, { 0x81, 0x02 } }, - { -12345678, 4, { 0x8b, 0xe3, 0x85, 0x1b } }, - { 12345678, 4, { 0x8b, 0xe3, 0x85, 0x1c } }, - { -2147483647, 5, { 0x8f, 0xff, 0xff, 0xff, 0x7d} }, - { 2147483647, 5, { 0x8f, 0xff, 0xff, 0xff, 0x7e} }, - { -2147483647-1, 5, { 0x8f, 0xff, 0xff, 0xff, 0x7f} }, - }; - - signed_test tests64[] = { - { 2147483648LL, 5, { 0x90, 0x80, 0x80, 0x80, 0x00 } }, - { -1234567890123456LL, 8, - { 0x84, 0xb1, 0xb5, 0xa7, 0xc8, 0xd5, 0xea, 0x7f } }, - { 1234567890123456LL, 8, - { 0x84, 0xb1, 0xb5, 0xa7, 0xc8, 0xd5, 0xeb, 0x00 } }, - { -9223372036854775807LL, 10, - { 0x81, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7d } }, - { 9223372036854775807LL, 10, - { 0x81, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e } }, - { -9223372036854775807LL - 1LL, 10, - { 0x81, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f } }, - }; - - for (i = 0; i < NELE(tests32); i++) { - memset(buffer, 0x55, sizeof(buffer)); - len = var_put_s32(buffer, endp, (int32_t) tests32[i].val); - res |= check_put_signed(32, &tests32[i], len, buffer, verbose); - memset(buffer, 0x55, sizeof(buffer)); - len = var_put_s64(buffer, endp, tests32[i].val); - res |= check_put_signed(64, &tests32[i], len, buffer, verbose); - len = var_get_s32(tests32[i].encoding, - tests32[i].encoding + tests32[i].len, - &v32); - res |= check_get_signed(32, &tests32[i], len, v32); - len = var_get_s64(tests32[i].encoding, - tests32[i].encoding + tests32[i].len, - &v64); - res |= check_get_signed(64, &tests32[i], len, v64); - } - - for (i = 0; i < NELE(tests64); i++) { - memset(buffer, 0x55, sizeof(buffer)); - len = var_put_s64(buffer, endp, tests64[i].val); - res |= check_put_signed(64, &tests64[i], len, buffer, verbose); - len = var_get_s64(tests64[i].encoding, - tests64[i].encoding + tests64[i].len, - &v64); - res |= check_get_signed(64, &tests64[i], len, v64); - } - - return res; -} - - -int main(int argc, char **argv) { - int opt; - int verbose = 0; - int res = 0; - - while ((opt = getopt(argc, argv, "v")) != -1) { - switch (opt) { - case 'v': - verbose++; - break; - default: - fprintf(stderr, "Unknown option '%c'\n", opt); - return EXIT_FAILURE; - } - } - - res |= test_unsigned(verbose); - res |= test_signed(verbose); - return res; -} diff --git a/src/htslib-1.18/htsfile.1 b/src/htslib-1.18/htsfile.1 deleted file mode 100644 index eeb2947..0000000 --- a/src/htslib-1.18/htsfile.1 +++ /dev/null @@ -1,94 +0,0 @@ -.TH htsfile 1 "25 July 2023" "htslib-1.18" "Bioinformatics tools" -.SH NAME -htsfile \- identify high-throughput sequencing data files -.\" -.\" Copyright (C) 2015, 2017-2018 Genome Research Ltd. -.\" -.\" Author: John Marshall -.\" -.\" Permission is hereby granted, free of charge, to any person obtaining a -.\" copy of this software and associated documentation files (the "Software"), -.\" to deal in the Software without restriction, including without limitation -.\" the rights to use, copy, modify, merge, publish, distribute, sublicense, -.\" and/or sell copies of the Software, and to permit persons to whom the -.\" Software is furnished to do so, subject to the following conditions: -.\" -.\" The above copyright notice and this permission notice shall be included in -.\" all copies or substantial portions of the Software. -.\" -.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -.\" IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -.\" FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -.\" THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -.\" LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -.\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -.\" DEALINGS IN THE SOFTWARE. -.\" -.SH SYNOPSIS -.B htsfile -.RB [ -chHv ] -.IR FILE ... -.br -.B htsfile --copy -.RB [ -v ] -.I FILE DESTFILE -.SH DESCRIPTION -The \fBhtsfile\fR utility attempts to identify what kind of high-throughput -sequencing data files the specified files are, and provides minimal viewing -capabilities for some kinds of data file. -.P -It can identify sequencing data files such as SAM, BAM, and CRAM; -variant calling data files such as VCF and BCF; -index files used to index these data files; -and compressed versions of many of them. -.P -For each \fIFILE\fR given, \fBhtsfile\fP prints a description of the file -format determined, using similar keyword conventions to \fBfile\fP(1): -"text" indicates a textual file that can probably be viewed on a terminal; -"data" indicates binary data; -"sequence", "variant calling", and "index" indicate different categories of -data file. -When it can be identified, the name of the particular file format (such as -"BAM" or "VCF") is printed at the start of the description. -.P -When used to view file contents as text, \fBhtsfile\fP can optionally show -only headers or only data records, but has no other filtering capabilities. -Use \fBsamtools\fR or \fBbcftools\fR if you need more extensive viewing or -filtering capabilities. -.P -Alternatively, when \fB--copy\fR is used, \fBhtsfile\fR takes exactly two -arguments and performs a byte-for-byte copy from \fIFILE\fR to \fIDESTFILE\fR. -This is similar to \fBcp\fR(1), but HTSlib's remote file access facilities -are available for both source and destination. -.P -The following options are accepted: -.TP 4n -.BR -c ", " --view -Instead of identifying the specified files, display a textual representation -of their contents on standard output. -.IP -By default, \fB--view\fR refuses to display files in unknown formats. -When \fB--verbose\fR is also given, the raw contents of such files are -displayed, with non-printable characters shown via C-style "\\x" hexadecimal -escape sequences. -.TP -.BR -C ", " --copy -Instead of identifying or displaying the specified files, copy the source -\fIFILE\fR to the destination \fIDESTFILE\fR. -Only \fB--verbose\fR may be used in conjunction with \fB--copy\fR. -.TP -.BR -h ", " --header-only -Display data file headers only. -Implies \fB--view\fR. -.TP -.BR -H ", " --no-header -When viewing files, display data records only. -.TP -.BR -v ", " --verbose -Display additional warnings and diagnostic messages. -Using \fB--verbose\fR repeatedly further raises the verbosity. -.PP -.SH SEE ALSO -.IR bcftools (1), -.IR file (1), -.IR samtools (1) diff --git a/src/htslib-1.18/htsfile.c b/src/htslib-1.18/htsfile.c deleted file mode 100644 index 9f7bf45..0000000 --- a/src/htslib-1.18/htsfile.c +++ /dev/null @@ -1,329 +0,0 @@ -/* htsfile.c -- file identifier and minimal viewer. - - Copyright (C) 2014-2019 Genome Research Ltd. - - Author: John Marshall - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "htslib/hfile.h" -#include "htslib/hts.h" -#include "htslib/sam.h" -#include "htslib/vcf.h" - -#ifndef EFTYPE -#define EFTYPE ENOEXEC -#endif - -enum { identify, view_headers, view_all, copy } mode = identify; -int show_headers = 1; -int verbose = 0; -int status = EXIT_SUCCESS; /* Exit status from main */ - -void error(const char *format, ...) -{ - int err = errno; - va_list args; - va_start(args, format); - fflush(stdout); - fprintf(stderr, "htsfile: "); - vfprintf(stderr, format, args); - if (err) fprintf(stderr, ": %s\n", strerror(err)); - else fprintf(stderr, "\n"); - fflush(stderr); - va_end(args); - status = EXIT_FAILURE; -} - -static htsFile *dup_stdout(const char *mode) -{ - int fd = dup(STDOUT_FILENO); - hFILE *hfp = (fd >= 0)? hdopen(fd, mode) : NULL; - return hfp? hts_hopen(hfp, "-", mode) : NULL; -} - -static void view_sam(samFile *in, const char *filename) -{ - bam1_t *b = NULL; - sam_hdr_t *hdr = NULL; - samFile *out = NULL; - - hdr = sam_hdr_read(in); - if (hdr == NULL) { - errno = 0; error("reading headers from \"%s\" failed", filename); - goto clean; - } - - out = dup_stdout("w"); - if (out == NULL) { error("reopening standard output failed"); goto clean; } - - if (show_headers) { - if (sam_hdr_write(out, hdr) != 0) { - error("writing headers to standard output failed"); - goto clean; - } - } - - if (mode == view_all) { - int ret; - - b = bam_init1(); - if (b == NULL) { error("can't create record"); goto clean; } - - while ((ret = sam_read1(in, hdr, b)) >= 0) { - if (sam_write1(out, hdr, b) < 0) { - error("writing to standard output failed"); - goto clean; - } - } - - if (ret < -1) { error("reading \"%s\" failed", filename); goto clean; } - } - - clean: - sam_hdr_destroy(hdr); - bam_destroy1(b); - if (out) hts_close(out); -} - -static void view_vcf(vcfFile *in, const char *filename) -{ - bcf1_t *rec = NULL; - bcf_hdr_t *hdr = NULL; - vcfFile *out = NULL; - - hdr = bcf_hdr_read(in); - if (hdr == NULL) { - errno = 0; error("reading headers from \"%s\" failed", filename); - goto clean; - } - - out = dup_stdout("w"); - if (out == NULL) { error("reopening standard output failed"); goto clean; } - - if (show_headers) { - if (bcf_hdr_write(out, hdr) != 0) { - error("writing headers to standard output failed"); - goto clean; - } - } - - if (mode == view_all) { - int ret; - - rec = bcf_init(); - if (rec == NULL) { error("can't create record"); goto clean; } - - while ((ret = bcf_read(in, hdr, rec)) >= 0) { - if (bcf_write(out, hdr, rec) < 0) { - error("writing to standard output failed"); - goto clean; - } - } - - if (ret < -1) { error("reading \"%s\" failed", filename); goto clean; } - } - - clean: - if (hdr) bcf_hdr_destroy(hdr); - if (rec) bcf_destroy(rec); - if (out) hts_close(out); -} - -static void view_raw(hFILE *fp, const char *filename) -{ - int c, prev; - for (prev = '\n'; (c = hgetc(fp)) != EOF; prev = c) - if (isprint(c) || c == '\n' || c == '\t') putchar(c); - else if (c == '\r') fputs("\\r", stdout); - else if (c == '\0') fputs("\\0", stdout); - else printf("\\x%02x", c); - - if (prev != '\n') putchar('\n'); - - if (herrno(fp)) { - errno = herrno(fp); - error("reading \"%s\" failed", filename); - } -} - -static void copy_raw(const char *srcfilename, const char *destfilename) -{ - hFILE *src = hopen(srcfilename, "r"); - if (src == NULL) { - error("can't open \"%s\"", srcfilename); - return; - } - - size_t bufsize = 1048576; - char *buffer = malloc(bufsize); - if (buffer == NULL) { - error("can't allocate copy buffer"); - hclose_abruptly(src); - return; - } - - hFILE *dest = hopen(destfilename, "w"); - if (dest == NULL) { - error("can't create \"%s\"", destfilename); - hclose_abruptly(src); - free(buffer); - return; - } - - ssize_t n; - while ((n = hread(src, buffer, bufsize)) > 0) - if (hwrite(dest, buffer, n) != n) { - error("writing to \"%s\" failed", destfilename); - hclose_abruptly(dest); - dest = NULL; - break; - } - - if (n < 0) { - error("reading from \"%s\" failed", srcfilename); - hclose_abruptly(src); - src = NULL; - } - - if (dest && hclose(dest) < 0) error("closing \"%s\" failed", destfilename); - if (src && hclose(src) < 0) error("closing \"%s\" failed", srcfilename); - free(buffer); -} - -static void usage(FILE *fp, int status) -{ - fprintf(fp, -"Usage: htsfile [-chHv] FILE...\n" -" htsfile --copy [-v] FILE DESTFILE\n" -"Options:\n" -" -c, --view Write textual form of FILEs to standard output\n" -" -C, --copy Copy the exact contents of FILE to DESTFILE\n" -" -h, --header-only Display only headers in view mode, not records\n" -" -H, --no-header Suppress header display in view mode\n" -" -v, --verbose Increase verbosity of warnings and diagnostics\n"); - exit(status); -} - -int main(int argc, char **argv) -{ - static const struct option options[] = { - { "copy", no_argument, NULL, 'C' }, - { "header-only", no_argument, NULL, 'h' }, - { "no-header", no_argument, NULL, 'H' }, - { "view", no_argument, NULL, 'c' }, - { "verbose", no_argument, NULL, 'v' }, - { "help", no_argument, NULL, 2 }, - { "version", no_argument, NULL, 1 }, - { NULL, 0, NULL, 0 } - }; - - int c, i; - - status = EXIT_SUCCESS; - while ((c = getopt_long(argc, argv, "cChHv", options, NULL)) >= 0) - switch (c) { - case 'c': mode = view_all; break; - case 'C': mode = copy; break; - case 'h': mode = view_headers; show_headers = 1; break; - case 'H': show_headers = 0; break; - case 'v': hts_verbose++; verbose++; break; - case 1: - printf( -"htsfile (htslib) %s\n" -"Copyright (C) 2023 Genome Research Ltd.\n", - hts_version()); - exit(EXIT_SUCCESS); - break; - case 2: usage(stdout, EXIT_SUCCESS); break; - default: usage(stderr, EXIT_FAILURE); break; - } - - if (optind == argc) usage(stderr, EXIT_FAILURE); - - if (mode == copy) { - if (optind + 2 != argc) usage(stderr, EXIT_FAILURE); - copy_raw(argv[optind], argv[optind + 1]); - return status; - } - - for (i = optind; i < argc; i++) { - hFILE *fp = hopen(argv[i], "r"); - if (fp == NULL) { - error("can't open \"%s\"", argv[i]); - continue; - } - - if (mode == identify) { - htsFormat fmt; - if (hts_detect_format2(fp, argv[i], &fmt) < 0) { - error("detecting \"%s\" format failed", argv[i]); - hclose_abruptly(fp); - continue; - } - - char *description = hts_format_description(&fmt); - printf("%s:\t%s\n", argv[i], description); - free(description); - } - else { - htsFile *hts = hts_hopen(fp, argv[i], "r"); - if (hts) { - switch (hts_get_format(hts)->category) { - case sequence_data: - view_sam(hts, argv[i]); - break; - case variant_data: - view_vcf(hts, argv[i]); - break; - default: - if (verbose) - view_raw(fp, argv[i]); - else { - errno = 0; - error("can't view \"%s\": unknown format", argv[i]); - } - break; - } - - if (hts_close(hts) < 0) error("closing \"%s\" failed", argv[i]); - fp = NULL; - } - else if ((errno == EFTYPE || errno == ENOEXEC) && verbose) - view_raw(fp, argv[i]); - else - error("can't view \"%s\"", argv[i]); - } - - if (fp && hclose(fp) < 0) error("closing \"%s\" failed", argv[i]); - } - - return status; -} diff --git a/src/htslib-1.18/htslib-s3-plugin.7 b/src/htslib-1.18/htslib-s3-plugin.7 deleted file mode 100644 index 019e8d0..0000000 --- a/src/htslib-1.18/htslib-s3-plugin.7 +++ /dev/null @@ -1,215 +0,0 @@ -.TH htslib-s3-plugin 7 "25 July 2023" "htslib-1.18" "Bioinformatics tools" -.SH NAME -htslib-s3-plugin \- htslib AWS S3 plugin -.\" -.\" Copyright (C) 2021-2022 Genome Research Ltd. -.\" -.\" Author: Andrew Whitwham -.\" -.\" Permission is hereby granted, free of charge, to any person obtaining a -.\" copy of this software and associated documentation files (the "Software"), -.\" to deal in the Software without restriction, including without limitation -.\" the rights to use, copy, modify, merge, publish, distribute, sublicense, -.\" and/or sell copies of the Software, and to permit persons to whom the -.\" Software is furnished to do so, subject to the following conditions: -.\" -.\" The above copyright notice and this permission notice shall be included in -.\" all copies or substantial portions of the Software. -.\" -.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -.\" IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -.\" FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -.\" THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -.\" LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -.\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -.\" DEALINGS IN THE SOFTWARE. -.\" -. -.\" For code blocks and examples (cf groff's Ultrix-specific man macros) -.de EX - -. in +\\$1 -. nf -. ft CR -.. -.de EE -. ft -. fi -. in - -.. - -.SH DESCRIPTION -The S3 plugin allows htslib file functions to communicate with servers that use -the AWS S3 protocol. Files are identified by their bucket and object key in a -URL format e.g. - -.B s3://mybucket/path/to/file - -With \fIpath/to/file\fR being the object key. - -Necessary security information can be provided in as part of the URL, in -environment variables or from configuration files. - -The full URL format is: - -.B s3[+SCHEME]://[ID[:SECRET[:TOKEN]]@]BUCKET/PATH - -The elements are: -.TP -.I SCHEME -The protocol used. Defaults to \fIhttps\fR. -.TP -.I ID -The user AWS access key. -.TP -.I SECRET -The secret key for use with the access key. -.TP -.I TOKEN -Token used for temporary security credentials. -.TP -.I BUCKET -AWS S3 bucket. -.TP -.I PATH -Path to the object under the bucket. -.LP - -The environment variables below will be used if the user ID is not set. -.TP -.B AWS_ACCESS_KEY_ID -The user AWS access key. -.TP -.B AWS_SECRET_ACCESS_KEY -The secret key for use with the access key. -.TP -.B AWS_DEFAULT_REGION -The region to use. Defaults to -.IR us-east-1 . -.TP -.B AWS_SESSION_TOKEN -Token used for temporary security credentials. -.TP -.B AWS_DEFAULT_PROFILE -The profile to use in \fIcredentials\fR, \fIconfig\fR or \fIs3cfg\fR files. -Defaults to -.IR default . -.TP -.B AWS_PROFILE -Same as above. -.TP -.B AWS_SHARED_CREDENTIALS_FILE -Location of the credentials file. Defaults to -.IR ~/.aws/credentials . -.TP -.B HTS_S3_S3CFG -Location of the s3cfg file. Defaults to -.IR ~/.s3cfg . -.TP -.B HTS_S3_HOST -Sets the host. Defaults to -.IR s3.amazonaws.com . -.TP -.B HTS_S3_V2 -If set use signature v2 rather the default v4. This will limit the plugin to -reading only. -.TP -.B HTS_S3_PART_SIZE -Sets the upload part size in Mb, the minimum being 5Mb. -By default the part size starts at 5Mb and expands at regular intervals to -accommodate bigger files (up to 2.5 Tbytes with the current rate). -Using this setting disables the automatic part size expansion. -.TP -.B HTS_S3_ADDRESS_STYLE -Sets the URL style. Options are auto (default), virtual or path. -.LP -In the absence of an ID from the previous two methods the credential/config -files will be used. The default file locations are either -\fI~/.aws/credentials\fR or \fI~/.s3cfg\fR (in that order). - -Entries used in aws style credentials file are aws_access_key_id, -aws_secret_access_key, aws_session_token, region, addressing_style and -expiry_time (unofficial, see SHORT-LIVED CREDENTIALS below). -Only the first two are usually needed. - -Entries used in s3cmd style config files are access_key, secret_key, -access_token, host_base, bucket_location and host_bucket. Again only the first -two are usually needed. The host_bucket option is only used to set a path-style -URL, see below. - -.SH SHORT-LIVED CREDENTIALS - -Some cloud identity and access management (IAM) systems can make short-lived -credentials that allow access to resources. -These credentials will expire after a time and need to be renewed to -give continued access. -To enable this, the S3 plugin allows an \fIexpiry_time\fR entry to be set in the -\fI.aws/credentials\fR file. -The value for this entry should be the time when the token expires, -following the format in RFC3339 section 5.6, which takes the form: - - 2012-04-29T05:20:48Z - -That is, year - month - day, the letter "T", hour : minute : second. -The time can be followed by the letter "Z", indicating the UTC timezone, -or an offset from UTC which is a "+" or "-" sign followed by two digits for -the hours offset, ":", and two digits for the minutes. - -The S3 plugin will attempt to re-read the credentials file up to 1 minute -before the given expiry time, which means the file needs to be updated with -new credentials before then. -As the exact way of doing this can vary between services and IAM providers, -the S3 plugin expects this to be done by an external user-supplied process. -This may be achieved by running a program that replaces the file as new -credentials become available. -The following script shows how it might be done for AWS instance credentials: -.EX 2 -#!/bin/sh -instance='http://169.254.169.254' -tok_url="$instance/latest/api/token" -ttl_hdr='X-aws-ec2-metadata-token-ttl-seconds: 10' -creds_url="$instance/latest/meta-data/iam/security-credentials" -key1='aws_access_key_id = \(rs(.AccessKeyId)\(rsn' -key2='aws_secret_access_key = \(rs(.SecretAccessKey)\(rsn' -key3='aws_session_token = \(rs(.Token)\(rsn' -key4='expiry_time = \(rs(.Expiration)\(rsn' -while true; do - token=`curl -X PUT -H "$ttl_hdr" "$tok_url"` - tok_hdr="X-aws-ec2-metadata-token: $token" - role=`curl -H "$tok_hdr" "$creds_url/"` - expires='now' - ( curl -H "$tok_hdr" "$creds_url/$role" \(rs - | jq -r "\(rs"${key1}${key2}${key3}${key4}\(rs"" > credentials.new ) \(rs - && mv -f credentials.new credentials \(rs - && expires=`grep expiry_time credentials | cut -d ' ' -f 3-` - if test $? -ne 0 ; then break ; fi - expiry=`date -d "$expires - 3 minutes" '+%s'` - now=`date '+%s'` - test "$expiry" -gt "$now" && sleep $((($expiry - $now) / 2)) - sleep 30 -done -.EE - -Note that the \fIexpiry_time\fR key is currently only supported for the -\fI.aws/credentials\fR file (or the file referred to in the -.B AWS_SHARED_CREDENTIALS_FILE -environment variable). - -.SH NOTES -In most cases this plugin transforms the given URL into a virtual host-style -format e.g. \fIhttps://bucket.host/path/to/file\fR. A path-style format is used -where the URL is not DNS compliant or the bucket name contains a dot e.g. -\fIhttps://host/bu.cket/path/to/file\fR. - -Path-style can be forced by setting one either HTS_S3_ADDRESS_STYLE, -addressing_style or host_bucket. The first two can be set to \fBpath\fR while -host_bucket must \fBnot\fR include the \fB%(bucket).s\fR string. - -.SH "SEE ALSO" -.IR htsfile (1) -.IR samtools (1) -.PP -RFC 3339: -.PP -htslib website: diff --git a/src/htslib-1.18/htslib.map b/src/htslib-1.18/htslib.map deleted file mode 100644 index 9542861..0000000 --- a/src/htslib-1.18/htslib.map +++ /dev/null @@ -1,638 +0,0 @@ -HTSLIB_1.0 { - bam_aux2A; - bam_aux2Z; - bam_aux2f; - bam_aux2i; - bam_aux_append; - bam_aux_del; - bam_aux_get; - bam_cigar2qlen; - bam_cigar2rlen; - bam_copy1; - bam_destroy1; - bam_dup1; - bam_endpos; - bam_flag2str; - bam_hdr_read; - bam_hdr_write; - bam_init1; - bam_mplp_auto; - bam_mplp_destroy; - bam_mplp_init; - bam_mplp_init_overlaps; - bam_mplp_set_maxcnt; - bam_plp_auto; - bam_plp_destroy; - bam_plp_init; - bam_plp_next; - bam_plp_push; - bam_plp_reset; - bam_plp_set_maxcnt; - bam_read1; - bam_str2flag; - bam_write1; - bcf_add_filter; - bcf_calc_ac; - bcf_clear; - bcf_destroy; - bcf_dup; - bcf_enc_vchar; - bcf_enc_vfloat; - bcf_enc_vint; - bcf_float_missing; - bcf_float_vector_end; - bcf_fmt_array; - bcf_fmt_sized_array; - bcf_get_fmt; - bcf_get_format_string; - bcf_get_format_values; - bcf_get_info; - bcf_get_info_values; - bcf_get_variant_type; - bcf_get_variant_types; - bcf_gt_type; - bcf_has_filter; - bcf_hdr_add_hrec; - bcf_hdr_add_sample; - bcf_hdr_append; - bcf_hdr_combine; - bcf_hdr_destroy; - bcf_hdr_dup; - bcf_hdr_fmt_text; - bcf_hdr_get_hrec; - bcf_hdr_get_version; - bcf_hdr_id2int; - bcf_hdr_init; - bcf_hdr_parse; - bcf_hdr_parse_line; - bcf_hdr_printf; - bcf_hdr_read; - bcf_hdr_remove; - bcf_hdr_seqnames; - bcf_hdr_set; - bcf_hdr_set_samples; - bcf_hdr_set_version; - bcf_hdr_subset; - bcf_hdr_sync; - bcf_hdr_write; - bcf_hrec_add_key; - bcf_hrec_destroy; - bcf_hrec_dup; - bcf_hrec_find_key; - bcf_hrec_format; - bcf_hrec_set_val; - bcf_index_build; - bcf_init; - bcf_is_snp; - bcf_read; - bcf_readrec; - bcf_remove_alleles; - bcf_remove_filter; - bcf_sr_add_reader; - bcf_sr_destroy; - bcf_sr_init; - bcf_sr_next_line; - bcf_sr_regions_destroy; - bcf_sr_regions_flush; - bcf_sr_regions_init; - bcf_sr_regions_next; - bcf_sr_regions_overlap; - bcf_sr_regions_seek; - bcf_sr_remove_reader; - bcf_sr_seek; - bcf_sr_set_regions; - bcf_sr_set_samples; - bcf_sr_set_targets; - bcf_subset; - bcf_subset_format; - bcf_sweep_bwd; - bcf_sweep_destroy; - bcf_sweep_fwd; - bcf_sweep_hdr; - bcf_sweep_init; - bcf_translate; - bcf_trim_alleles; - bcf_type_shift; - bcf_unpack; - bcf_update_alleles; - bcf_update_alleles_str; - bcf_update_filter; - bcf_update_format; - bcf_update_format_string; - bcf_update_id; - bcf_update_info; - bcf_write; - bgzf_check_EOF; - bgzf_close; - bgzf_dopen; - bgzf_flush; - bgzf_flush_try; - bgzf_getc; - bgzf_getline; - bgzf_hopen; - bgzf_index_build_init; - bgzf_index_dump; - bgzf_index_load; - bgzf_is_bgzf; - bgzf_mt; - bgzf_open; - bgzf_raw_read; - bgzf_raw_write; - bgzf_read; - bgzf_read_block; - bgzf_seek; - bgzf_set_cache_size; - bgzf_useek; - bgzf_utell; - bgzf_write; - cram_close; - cram_compress_block; - cram_dopen; - cram_eof; - cram_flush; - cram_free_block; - cram_free_container; - cram_new_block; - cram_new_container; - cram_open; - cram_read_block; - cram_read_container; - cram_seek; - cram_set_header; - cram_set_option; - cram_set_voption; - cram_uncompress_block; - cram_write_block; - cram_write_container; - fai_build; - fai_destroy; - fai_fetch; - fai_load; - faidx_fetch_nseq; - faidx_fetch_seq; - faidx_has_seq; - hclose; - hclose_abruptly; - hdopen; - hfile_destroy; - hfile_init; - hfile_oflags; - hflush; - hgetc2; - hopen; - hpeek; - hputc2; - hputs2; - hread2; - hrec_add_idx; - hseek; - hts_close; - hts_file_type; - hts_get_bgzfp; - hts_getline; - hts_idx_destroy; - hts_idx_finish; - hts_idx_get_meta; - hts_idx_get_n_no_coor; - hts_idx_get_stat; - hts_idx_init; - hts_idx_load; - hts_idx_push; - hts_idx_save; - hts_idx_seqnames; - hts_idx_set_meta; - hts_itr_destroy; - hts_itr_next; - hts_itr_query; - hts_itr_querys; - hts_open; - hts_parse_reg; - hts_readlines; - hts_readlist; - hts_set_fai_filename; - hts_set_threads; - hts_verbose; - hts_version; - hwrite2; - kf_betai; - kf_erfc; - kf_gammap; - kf_gammaq; - kf_lgamma; - kmemmem; - knet_close; - knet_dopen; - knet_open; - knet_read; - knet_seek; - ksplit_core; - ksprintf; - kstrnstr; - kstrstr; - kstrtok; - kt_fisher_exact; - kvsprintf; - sam_format1; - sam_hdr_add_lines; - sam_hdr_dup; - sam_hdr_incr_ref; - sam_hdr_length; - sam_hdr_parse; - sam_hdr_read; - sam_hdr_str; - sam_hdr_write; - sam_index_load; - sam_itr_queryi; - sam_itr_querys; - sam_open_mode; - sam_parse1; - sam_read1; - sam_write1; - seq_nt16_str; - seq_nt16_table; - stringify_argv; - tbx_conf_bed; - tbx_conf_gff; - tbx_conf_psltbl; - tbx_conf_sam; - tbx_conf_vcf; - tbx_destroy; - tbx_index; - tbx_index_build; - tbx_index_load; - tbx_name2id; - tbx_readrec; - tbx_seqnames; - vcf_format; - vcf_hdr_read; - vcf_hdr_write; - vcf_parse; - vcf_read; - vcf_write; - vcf_write_line; -}; - -HTSLIB_1.1 { - bcf_get_fmt_id; - bcf_get_info_id; - faidx_iseq; - faidx_nseq; - faidx_seq_len; -} HTSLIB_1.0; - - -HTSLIB_1.2.1 { - bcf_copy; - bcf_sr_strerror; - hisremote; - hts_detect_format; - hts_format_description; - hts_get_format; - hts_hopen; - hts_set_opt; - regidx_destroy; - regidx_init; - regidx_insert; - regidx_nregs; - regidx_overlap; - regidx_parse_bed; - regidx_parse_tab; - regidx_seq_names; - regidx_seq_nregs; - seq_nt16_int; -} HTSLIB_1.1; - -HTSLIB_1.3 { - bcf_add_id; - bcf_empty; - bcf_hdr_merge; - bcf_index_build2; - bcf_index_load2; - bcf_remove_allele_set; - bgzf_compress; - cram_block_append; - cram_block_get_comp_size; - cram_block_get_content_id; - cram_block_get_content_type; - cram_block_get_crc32; - cram_block_get_data; - cram_block_get_offset; - cram_block_get_uncomp_size; - cram_block_set_comp_size; - cram_block_set_content_id; - cram_block_set_crc32; - cram_block_set_data; - cram_block_set_offset; - cram_block_set_uncomp_size; - cram_block_size; - cram_block_update_size; - cram_container_get_landmarks; - cram_container_get_length; - cram_container_get_num_blocks; - cram_container_is_empty; - cram_container_set_landmarks; - cram_container_set_length; - cram_container_set_num_blocks; - cram_container_size; - cram_copy_slice; - cram_fd_get_fp; - cram_fd_get_header; - cram_fd_get_version; - cram_fd_set_fp; - cram_fd_set_header; - cram_fd_set_version; - cram_major_vers; - cram_minor_vers; - cram_store_container; - cram_transcode_rg; - hfile_add_scheme_handler; - hfile_always_local; - hfile_always_remote; - hts_format_file_extension; - hts_idx_load2; - hts_idx_save_as; - hts_md5_destroy; - hts_md5_final; - hts_md5_hex; - hts_md5_init; - hts_md5_reset; - hts_md5_update; - hts_open_format; - hts_opt_add; - hts_opt_apply; - hts_opt_free; - hts_parse_decimal; - hts_parse_format; - hts_parse_opt_list; - int32_put_blk; - kgetline; - sam_index_build; - sam_index_build2; - sam_index_load2; - sam_open_mode_opts; - tbx_index_build2; - tbx_index_load2; -} HTSLIB_1.2.1; - -HTSLIB_1.4 { - bam_auxB2f; - bam_auxB2i; - bam_auxB_len; - bam_aux_update_str; - bam_mplp_constructor; - bam_mplp_destructor; - bam_mplp_reset; - bam_plp_constructor; - bam_plp_destructor; - bcf_hdr_format; - bcf_index_build3; - bcf_sr_destroy_threads; - bcf_sr_set_opt; - bcf_sr_set_threads; - bgzf_block_write; - bgzf_compression; - bgzf_index_dump_hfile; - bgzf_index_load_hfile; - bgzf_thread_pool; - cram_check_EOF; - cram_get_refs; - errmod_cal; - errmod_destroy; - errmod_init; - fai_build3; - fai_load3; - hgetdelim; - hgets; - hts_check_EOF; - hts_json_fnext; - hts_json_fskip_value; - hts_json_snext; - hts_json_sskip_value; - hts_realloc_or_die; - hts_set_cache_size; - hts_set_thread_pool; - hts_tpool_delete_result; - hts_tpool_destroy; - hts_tpool_dispatch; - hts_tpool_dispatch2; - hts_tpool_init; - hts_tpool_kill; - hts_tpool_next_result; - hts_tpool_next_result_wait; - hts_tpool_process_attach; - hts_tpool_process_destroy; - hts_tpool_process_detach; - hts_tpool_process_empty; - hts_tpool_process_flush; - hts_tpool_process_init; - hts_tpool_process_len; - hts_tpool_process_qsize; - hts_tpool_process_ref_decr; - hts_tpool_process_ref_incr; - hts_tpool_process_reset; - hts_tpool_process_shutdown; - hts_tpool_process_sz; - hts_tpool_result_data; - hts_tpool_size; - hts_tpool_wake_dispatch; - kputd; - probaln_glocal; - sam_cap_mapq; - sam_index_build3; - sam_prob_realn; - tbx_index_build3; -} HTSLIB_1.3; - -HTSLIB_1.5 { - hfile_set_blksize; - hts_get_log_level; - hts_log; - hts_set_log_level; -} HTSLIB_1.4; - -HTSLIB_1.6 { - hts_drand48; - hts_erand48; - hts_lrand48; - hts_srand48; -} HTSLIB_1.5; - -HTSLIB_1.7 { - hfile_mem_get_buffer; - hfile_mem_steal_buffer; - hts_itr_multi_bam; - hts_itr_multi_cram; - hts_itr_multi_next; - hts_itr_regions; - hts_json_alloc_token; - hts_json_free_token; - hts_json_token_str; - hts_json_token_type; - hts_reglist_free; - sam_hdr_change_HD; - sam_itr_regions; -} HTSLIB_1.6; - -HTSLIB_1.9 { - bam_aux_update_array; - bam_aux_update_float; - bam_aux_update_int; - fai_fetchqual; - fai_load3_format; - fai_load_format; - faidx_fetch_qual; -} HTSLIB_1.7; - -HTSLIB_1.10 { - bam_cigar_table; - bam_mplp64_auto; - bam_plp64_auto; - bam_plp64_next; - bam_plp_insertion; - bam_set_qname; - bcf_idx_init; - bcf_idx_save; - bcf_index_load3; - bgzf_peek; - fai_fetch64; - fai_fetchqual64; - fai_parse_region; - fai_set_cache_size; - faidx_fetch_qual64; - faidx_fetch_seq64; - haddextension; - hts_free; - hts_idx_fmt; - hts_idx_load3; - hts_idx_tbi_name; - hts_parse_reg64; - hts_parse_region; - hts_reglist_create; - hts_resize_array_; - hts_tpool_dispatch3; - kgetline2; - regidx_init_string; - regidx_insert_list; - regidx_parse_reg; - regidx_parse_vcf; - regidx_push; - regitr_copy; - regitr_destroy; - regitr_init; - regitr_loop; - regitr_overlap; - regitr_reset; - sam_hdr_add_line; - sam_hdr_add_pg; - sam_hdr_count_lines; - sam_hdr_destroy; - sam_hdr_find_line_id; - sam_hdr_find_line_pos; - sam_hdr_find_tag_id; - sam_hdr_find_tag_pos; - sam_hdr_init; - sam_hdr_line_index; - sam_hdr_line_name; - sam_hdr_name2tid; - sam_hdr_nref; - sam_hdr_pg_id; - sam_hdr_remove_except; - sam_hdr_remove_line_id; - sam_hdr_remove_line_pos; - sam_hdr_remove_lines; - sam_hdr_remove_tag_id; - sam_hdr_tid2len; - sam_hdr_tid2name; - sam_hdr_update_line; - sam_idx_init; - sam_idx_save; - sam_index_load3; - sam_itr_regarray; - sam_parse_region; - tbx_index_load3; -} HTSLIB_1.9; - -HTSLIB_1.11 { - fai_path; - hts_lib_shutdown; - hts_tpool_process_is_shutdown; - vcf_open_mode; -} HTSLIB_1.10; - -HTSLIB_1.12 { - bam_parse_cigar; - bam_set1; - hfile_has_plugin; - hfile_list_plugins; - hfile_list_schemes; - hts_feature_string; - hts_features; - hts_filter_eval; - hts_filter_free; - hts_filter_init; - hts_set_filter_expression; - hts_test_feature; - sam_parse_cigar; - sam_passes_filter; -} HTSLIB_1.11; - -HTSLIB_1.13 { - hts_idx_nseq; -} HTSLIB_1.12; - -HTSLIB_1.14 { - bam_mods_at_next_pos; - bam_mods_at_qpos; - bam_next_basemod; - bam_parse_basemod; - bam_plp_insertion_mod; - hts_base_mod_state_alloc; - hts_base_mod_state_free; - hts_flush; -} HTSLIB_1.13; - -HTSLIB_1.15 { - hts_detect_format2; -} HTSLIB_1.14; - -HTSLIB_1.16 { - bam_mods_query_type; - bam_mods_recorded; - bcf_has_variant_type; - bcf_has_variant_types; - bcf_variant_length; - cram_decode_slice_header; - cram_free_slice_header; - cram_slice_hdr_get_coords; - cram_slice_hdr_get_embed_ref_id; - cram_slice_hdr_get_num_blocks; - hts_filter_eval2; -} HTSLIB_1.15; - -HTSLIB_1.17 { - bam_aux_first; - bam_aux_next; - bam_aux_remove; - bcf_strerror; - cram_block_get_method; - cram_cid2ds_free; - cram_cid2ds_query; - cram_codec_describe; - cram_codec_get_content_ids; - cram_container_get_num_bases; - cram_container_get_num_records; - cram_decode_compression_header; - cram_describe_encodings; - cram_expand_method; - cram_free_compression_header; - cram_update_cid2ds_map; - fai_adjust_region; - fai_line_length; - faidx_seq_len64; -} HTSLIB_1.16; - -HTSLIB_1.18 { - bam_mods_queryi; - bam_parse_basemod2; - fai_thread_pool; -} HTSLIB_1.17; diff --git a/src/htslib-1.18/htslib.mk b/src/htslib-1.18/htslib.mk deleted file mode 100644 index 9c60ffc..0000000 --- a/src/htslib-1.18/htslib.mk +++ /dev/null @@ -1,194 +0,0 @@ -# Makefile rules useful for third-party code using htslib's public API. -# -# Copyright (C) 2013-2017, 2019, 2021 Genome Research Ltd. -# -# Author: John Marshall -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -# The makefile fragment included below provides variables that can be used -# to express dependencies on headers supplied by an in-development htslib. -# If your source file foo.c #includes and , -# you can write the correct prerequisites for foo.o as: -# -# HTSDIR = -# include $(HTSDIR)/htslib.mk -# -# foo.o: foo.c $(htslib_hts_h) $(htslib_kstring_h) - -HTSSRCDIR = $(HTSDIR) -HTSPREFIX = $(HTSSRCDIR)/ -include $(HTSDIR)/htslib_vars.mk - -# This file provides the HTSCODECS_SOURCES variable. It may not be present -# in a freshly checked-out htslib, so is only included if available. The -# absence is unlikely to cause a problem as there will be plenty of other -# missing files that will trigger a build in htslib, and when that happens -# htslib's makefile will create it. --include $(HTSDIR)/htscodecs.mk - -# Rules for rebuilding an in-development htslib's static and shared libraries. -# If your program foo links with libhts, adding the appropriate prerequisite -# will cause the library to be rebuilt as necessary: -# -# foo: foo.o $(HTSDIR)/libhts.a -# -# or similarly if your target requires any of the tools supplied: -# -# bar.bed.bgz.tbi: bar.bed.bgz $(HTSDIR)/tabix -# $(HTSDIR)/tabix -p bed bar.bed.bgz - -HTSLIB_PUBLIC_HEADERS = \ - $(HTSSRCDIR)/htslib/bgzf.h \ - $(HTSSRCDIR)/htslib/cram.h \ - $(HTSSRCDIR)/htslib/faidx.h \ - $(HTSSRCDIR)/htslib/hfile.h \ - $(HTSSRCDIR)/htslib/hts.h \ - $(HTSSRCDIR)/htslib/hts_defs.h \ - $(HTSSRCDIR)/htslib/hts_endian.h \ - $(HTSSRCDIR)/htslib/hts_expr.h \ - $(HTSSRCDIR)/htslib/hts_log.h \ - $(HTSSRCDIR)/htslib/hts_os.h \ - $(HTSSRCDIR)/htslib/kbitset.h \ - $(HTSSRCDIR)/htslib/kfunc.h \ - $(HTSSRCDIR)/htslib/khash.h \ - $(HTSSRCDIR)/htslib/khash_str2int.h \ - $(HTSSRCDIR)/htslib/klist.h \ - $(HTSSRCDIR)/htslib/kseq.h \ - $(HTSSRCDIR)/htslib/ksort.h \ - $(HTSSRCDIR)/htslib/kstring.h \ - $(HTSSRCDIR)/htslib/regidx.h \ - $(HTSSRCDIR)/htslib/sam.h \ - $(HTSSRCDIR)/htslib/synced_bcf_reader.h \ - $(HTSSRCDIR)/htslib/tbx.h \ - $(HTSSRCDIR)/htslib/thread_pool.h \ - $(HTSSRCDIR)/htslib/vcf.h \ - $(HTSSRCDIR)/htslib/vcf_sweep.h \ - $(HTSSRCDIR)/htslib/vcfutils.h - -HTSLIB_ALL = \ - $(HTSLIB_PUBLIC_HEADERS) \ - $(HTSSRCDIR)/bcf_sr_sort.c \ - $(HTSSRCDIR)/bcf_sr_sort.h \ - $(HTSSRCDIR)/bgzf.c \ - $(HTSDIR)/config.h \ - $(HTSSRCDIR)/errmod.c \ - $(HTSSRCDIR)/faidx.c \ - $(HTSSRCDIR)/header.c \ - $(HTSSRCDIR)/header.h \ - $(HTSSRCDIR)/hfile_internal.h \ - $(HTSSRCDIR)/hfile.c \ - $(HTSSRCDIR)/hfile_gcs.c \ - $(HTSSRCDIR)/hfile_libcurl.c \ - $(HTSSRCDIR)/hfile_s3.c \ - $(HTSSRCDIR)/hfile_s3_write.c \ - $(HTSSRCDIR)/hts.c \ - $(HTSSRCDIR)/hts_expr.c \ - $(HTSSRCDIR)/hts_internal.h \ - $(HTSSRCDIR)/hts_os.c \ - $(HTSSRCDIR)/kfunc.c \ - $(HTSSRCDIR)/kstring.c \ - $(HTSSRCDIR)/md5.c \ - $(HTSSRCDIR)/multipart.c \ - $(HTSSRCDIR)/plugin.c \ - $(HTSSRCDIR)/probaln.c \ - $(HTSSRCDIR)/realn.c \ - $(HTSSRCDIR)/regidx.c \ - $(HTSSRCDIR)/region.c \ - $(HTSSRCDIR)/sam.c \ - $(HTSSRCDIR)/sam_internal.h \ - $(HTSSRCDIR)/synced_bcf_reader.c \ - $(HTSSRCDIR)/tbx.c \ - $(HTSSRCDIR)/textutils.c \ - $(HTSSRCDIR)/textutils_internal.h \ - $(HTSSRCDIR)/thread_pool.c \ - $(HTSSRCDIR)/thread_pool_internal.h \ - $(HTSSRCDIR)/vcf.c \ - $(HTSSRCDIR)/vcf_sweep.c \ - $(HTSSRCDIR)/vcfutils.c \ - $(HTSSRCDIR)/cram/cram.h \ - $(HTSSRCDIR)/cram/cram_codecs.c \ - $(HTSSRCDIR)/cram/cram_codecs.h \ - $(HTSSRCDIR)/cram/cram_decode.c \ - $(HTSSRCDIR)/cram/cram_decode.h \ - $(HTSSRCDIR)/cram/cram_encode.c \ - $(HTSSRCDIR)/cram/cram_encode.h \ - $(HTSSRCDIR)/cram/cram_external.c \ - $(HTSSRCDIR)/cram/cram_index.c \ - $(HTSSRCDIR)/cram/cram_index.h \ - $(HTSSRCDIR)/cram/cram_io.c \ - $(HTSSRCDIR)/cram/cram_io.h \ - $(HTSSRCDIR)/cram/cram_samtools.h \ - $(HTSSRCDIR)/cram/cram_stats.c \ - $(HTSSRCDIR)/cram/cram_stats.h \ - $(HTSSRCDIR)/cram/cram_structs.h \ - $(HTSSRCDIR)/cram/mFILE.c \ - $(HTSSRCDIR)/cram/mFILE.h \ - $(HTSSRCDIR)/cram/misc.h \ - $(HTSSRCDIR)/cram/open_trace_file.c \ - $(HTSSRCDIR)/cram/open_trace_file.h \ - $(HTSSRCDIR)/cram/os.h \ - $(HTSSRCDIR)/cram/pooled_alloc.c \ - $(HTSSRCDIR)/cram/pooled_alloc.h \ - $(HTSSRCDIR)/cram/string_alloc.c \ - $(HTSSRCDIR)/cram/string_alloc.h \ - $(HTSSRCDIR)/os/lzma_stub.h \ - $(HTSSRCDIR)/os/rand.c \ - $(HTSCODECS_SOURCES) - -$(HTSDIR)/config.h: - +cd $(HTSDIR) && $(MAKE) config.h - -$(HTSDIR)/hts-object-files : $(HTSLIB_ALL) - +cd $(HTSDIR) && $(MAKE) hts-object-files - -$(HTSDIR)/libhts.a: $(HTSDIR)/hts-object-files - +cd $(HTSDIR) && $(MAKE) lib-static - -$(HTSDIR)/libhts.so: $(HTSLIB_ALL) - +cd $(HTSDIR) && $(MAKE) lib-shared - -$(HTSDIR)/libhts.dylib $(HTSDIR)/libhts.dll.a $(HTSDIR)/hts.dll.a: $(HTSDIR)/hts-object-files - +cd $(HTSDIR) && $(MAKE) lib-shared - -$(HTSDIR)/bgzip: $(HTSSRCDIR)/bgzip.c $(HTSLIB_PUBLIC_HEADERS) $(HTSDIR)/libhts.a - +cd $(HTSDIR) && $(MAKE) bgzip - -$(HTSDIR)/htsfile: $(HTSSRCDIR)/htsfile.c $(HTSLIB_PUBLIC_HEADERS) $(HTSDIR)/libhts.a - +cd $(HTSDIR) && $(MAKE) htsfile - -$(HTSDIR)/tabix: $(HTSSRCDIR)/tabix.c $(HTSLIB_PUBLIC_HEADERS) $(HTSDIR)/libhts.a - +cd $(HTSDIR) && $(MAKE) tabix - -$(HTSDIR)/htslib_static.mk: $(HTSDIR)/htslib.pc.tmp - +cd $(HTSDIR) && $(MAKE) htslib_static.mk - -$(HTSDIR)/htslib.pc.tmp: - +cd $(HTSDIR) && $(MAKE) htslib.pc.tmp - -# Rules for phony targets. You may wish to have your corresponding phony -# targets invoke these in addition to their own recipes: -# -# clean: clean-htslib - -all-htslib check-htslib clean-htslib distclean-htslib install-htslib mostlyclean-htslib plugins-htslib test-htslib testclean-htslib: - +cd $(HTSDIR) && $(MAKE) $(@:-htslib=) - -.PHONY: all-htslib check-htslib clean-htslib distclean-htslib install-htslib -.PHONY: mostlyclean-htslib plugins-htslib test-htslib testclean-htslib diff --git a/src/htslib-1.18/htslib/bgzf.h b/src/htslib-1.18/htslib/bgzf.h deleted file mode 100644 index ea4ec3e..0000000 --- a/src/htslib-1.18/htslib/bgzf.h +++ /dev/null @@ -1,467 +0,0 @@ -/// @file htslib/bgzf.h -/// Low-level routines for direct BGZF operations. -/* - Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology - 2011, 2012 Attractive Chaos - Copyright (C) 2009, 2013, 2014, 2017, 2018-2019, 2022-2023 Genome Research Ltd - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. -*/ - -/* The BGZF library was originally written by Bob Handsaker from the Broad - * Institute. It was later improved by the SAMtools developers. */ - -#ifndef HTSLIB_BGZF_H -#define HTSLIB_BGZF_H - -#include -#include - -#include "hts_defs.h" - -// Ensure ssize_t exists within this header. All #includes must precede this, -// and ssize_t must be undefined again at the end of this header. -#if defined _MSC_VER && defined _INTPTR_T_DEFINED && !defined _SSIZE_T_DEFINED && !defined ssize_t -#define HTSLIB_SSIZE_T -#define ssize_t intptr_t -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -#define BGZF_BLOCK_SIZE 0xff00 // make sure compressBound(BGZF_BLOCK_SIZE) < BGZF_MAX_BLOCK_SIZE -#define BGZF_MAX_BLOCK_SIZE 0x10000 - -#define BGZF_ERR_ZLIB 1 -#define BGZF_ERR_HEADER 2 -#define BGZF_ERR_IO 4 -#define BGZF_ERR_MISUSE 8 -#define BGZF_ERR_MT 16 // stream cannot be multi-threaded -#define BGZF_ERR_CRC 32 - -struct hFILE; -struct hts_tpool; -struct kstring_t; -struct bgzf_mtaux_t; -typedef struct bgzidx_t bgzidx_t; -typedef struct bgzf_cache_t bgzf_cache_t; -struct z_stream_s; - -struct BGZF { - // Reserved bits should be written as 0; read as "don't care" - unsigned errcode:16, reserved:1, is_write:1, no_eof_block:1, is_be:1; - signed compress_level:9; - unsigned last_block_eof:1, is_compressed:1, is_gzip:1; - int cache_size; - int block_length, block_clength, block_offset; - int64_t block_address, uncompressed_address; - void *uncompressed_block, *compressed_block; - bgzf_cache_t *cache; - struct hFILE *fp; // actual file handle - struct bgzf_mtaux_t *mt; // only used for multi-threading - bgzidx_t *idx; // BGZF index - int idx_build_otf; // build index on the fly, set by bgzf_index_build_init() - struct z_stream_s *gz_stream; // for gzip-compressed files - int64_t seeked; // virtual offset of last seek -}; -#ifndef HTS_BGZF_TYPEDEF -typedef struct BGZF BGZF; -#define HTS_BGZF_TYPEDEF -#endif - - /****************** - * Basic routines * - ******************/ - - /** - * Open an existing file descriptor for reading or writing. - * - * @param fd file descriptor - * Note that the file must be opened in binary mode, or else - * there will be problems on platforms that make a difference - * between text and binary mode. - * @param mode mode matching /[rwag][u0-9]+/: 'r' for reading, 'w' for - * writing, 'a' for appending, 'g' for gzip rather than BGZF - * compression (with 'w' only), and digit specifies the zlib - * compression level. - * Note that there is a distinction between 'u' and '0': the - * first yields plain uncompressed output whereas the latter - * outputs uncompressed data wrapped in the zlib format. - * @return BGZF file handler; 0 on error - */ - HTSLIB_EXPORT - BGZF* bgzf_dopen(int fd, const char *mode); - - #define bgzf_fdopen(fd, mode) bgzf_dopen((fd), (mode)) // for backward compatibility - - /** - * Open the specified file for reading or writing. - */ - HTSLIB_EXPORT - BGZF* bgzf_open(const char* path, const char *mode); - - /** - * Open an existing hFILE stream for reading or writing. - */ - HTSLIB_EXPORT - BGZF* bgzf_hopen(struct hFILE *fp, const char *mode); - - /** - * Close the BGZF and free all associated resources. - * - * @param fp BGZF file handler - * @return 0 on success and -1 on error - */ - HTSLIB_EXPORT - int bgzf_close(BGZF *fp); - - /** - * Read up to _length_ bytes from the file storing into _data_. - * - * @param fp BGZF file handler - * @param data data array to read into - * @param length size of data to read - * @return number of bytes actually read; 0 on end-of-file and -1 on error - */ - HTSLIB_EXPORT - ssize_t bgzf_read(BGZF *fp, void *data, size_t length) HTS_RESULT_USED; - - /** - * Write _length_ bytes from _data_ to the file. If no I/O errors occur, - * the complete _length_ bytes will be written (or queued for writing). - * - * @param fp BGZF file handler - * @param data data array to write - * @param length size of data to write - * @return number of bytes written (i.e., _length_); negative on error - */ - HTSLIB_EXPORT - ssize_t bgzf_write(BGZF *fp, const void *data, size_t length) HTS_RESULT_USED; - - /** - * Write _length_ bytes from _data_ to the file, the index will be used to - * decide the amount of uncompressed data to be written to each bgzip block. - * If no I/O errors occur, the complete _length_ bytes will be written (or - * queued for writing). - * @param fp BGZF file handler - * @param data data array to write - * @param length size of data to write - * @return number of bytes written (i.e., _length_); negative on error - */ - HTSLIB_EXPORT - ssize_t bgzf_block_write(BGZF *fp, const void *data, size_t length); - - /** - * Returns the next byte in the file without consuming it. - * @param fp BGZF file handler - * @return -1 on EOF, - * -2 on error, - * otherwise the unsigned byte value. - */ - HTSLIB_EXPORT - int bgzf_peek(BGZF *fp); - - /** - * Read up to _length_ bytes directly from the underlying stream without - * decompressing. Bypasses BGZF blocking, so must be used with care in - * specialised circumstances only. - * - * @param fp BGZF file handler - * @param data data array to read into - * @param length number of raw bytes to read - * @return number of bytes actually read; 0 on end-of-file and -1 on error - */ - HTSLIB_EXPORT - ssize_t bgzf_raw_read(BGZF *fp, void *data, size_t length) HTS_RESULT_USED; - - /** - * Write _length_ bytes directly to the underlying stream without - * compressing. Bypasses BGZF blocking, so must be used with care - * in specialised circumstances only. - * - * @param fp BGZF file handler - * @param data data array to write - * @param length number of raw bytes to write - * @return number of bytes actually written; -1 on error - */ - HTSLIB_EXPORT - ssize_t bgzf_raw_write(BGZF *fp, const void *data, size_t length) HTS_RESULT_USED; - - /** - * Write the data in the buffer to the file. - * - * @param fp BGZF file handle - * @return 0 on success and -1 on error - */ - HTSLIB_EXPORT - int bgzf_flush(BGZF *fp) HTS_RESULT_USED; - - /** - * Return a virtual file pointer to the current location in the file. - * No interpretation of the value should be made, other than a subsequent - * call to bgzf_seek can be used to position the file at the same point. - * Return value is non-negative on success. - */ - #define bgzf_tell(fp) (((fp)->block_address << 16) | ((fp)->block_offset & 0xFFFF)) - - /** - * Set the file to read from the location specified by _pos_. - * - * @param fp BGZF file handler - * @param pos virtual file offset returned by bgzf_tell() - * @param whence must be SEEK_SET - * @return 0 on success and -1 on error - * - * @note It is not permitted to seek on files open for writing, - * or files compressed with gzip (as opposed to bgzip). - */ - HTSLIB_EXPORT - int64_t bgzf_seek(BGZF *fp, int64_t pos, int whence) HTS_RESULT_USED; - - /** - * Check if the BGZF end-of-file (EOF) marker is present - * - * @param fp BGZF file handler opened for reading - * @return 1 if the EOF marker is present and correct; - * 2 if it can't be checked, e.g., because fp isn't seekable; - * 0 if the EOF marker is absent; - * -1 (with errno set) on error - */ - HTSLIB_EXPORT - int bgzf_check_EOF(BGZF *fp); - - /** Return the file's compression format - * - * @param fp BGZF file handle - * @return A small integer matching the corresponding - * `enum htsCompression` value: - * - 0 / `no_compression` if the file is uncompressed - * - 1 / `gzip` if the file is plain GZIP-compressed - * - 2 / `bgzf` if the file is BGZF-compressed - * @since 1.4 - */ - HTSLIB_EXPORT - int bgzf_compression(BGZF *fp); - - /** - * Check if a file is in the BGZF format - * - * @param fn file name - * @return 1 if _fn_ is BGZF; 0 if not or on I/O error - */ - HTSLIB_EXPORT - int bgzf_is_bgzf(const char *fn) HTS_DEPRECATED("Use bgzf_compression() or hts_detect_format() instead"); - - /********************* - * Advanced routines * - *********************/ - - /** - * Set the cache size. Only effective when compiled with -DBGZF_CACHE. - * - * @param fp BGZF file handler - * @param size size of cache in bytes; 0 to disable caching (default) - */ - HTSLIB_EXPORT - void bgzf_set_cache_size(BGZF *fp, int size); - - /** - * Flush the file if the remaining buffer size is smaller than _size_ - * @return 0 if flushing succeeded or was not needed; negative on error - */ - HTSLIB_EXPORT - int bgzf_flush_try(BGZF *fp, ssize_t size) HTS_RESULT_USED; - - /** - * Read one byte from a BGZF file. It is faster than bgzf_read() - * @param fp BGZF file handler - * @return byte read; -1 on end-of-file or error - */ - HTSLIB_EXPORT - int bgzf_getc(BGZF *fp); - - /** - * Read one line from a BGZF file. It is faster than bgzf_getc() - * - * @param fp BGZF file handler - * @param delim delimiter - * @param str string to write to; must be initialized - * @return length of the string (capped at INT_MAX); - * -1 on end-of-file; <= -2 on error - */ - HTSLIB_EXPORT - int bgzf_getline(BGZF *fp, int delim, struct kstring_t *str); - - /** - * Read the next BGZF block. - */ - HTSLIB_EXPORT - int bgzf_read_block(BGZF *fp) HTS_RESULT_USED; - - /** - * Enable multi-threading via a shared thread pool. This means - * both encoder and decoder can balance usage across a single pool - * of worker jobs. - * - * @param fp BGZF file handler - * @param pool The thread pool (see hts_create_threads) - * @param qsize The size of the job queue. If 0 this is twice the - * number of threads in the pool. - */ - HTSLIB_EXPORT - int bgzf_thread_pool(BGZF *fp, struct hts_tpool *pool, int qsize); - - /** - * Enable multi-threading - * - * @param fp BGZF file handler - * @param n_threads #threads used for reading / writing - * @param n_sub_blks Unused (was #blocks processed by each thread) - */ - HTSLIB_EXPORT - int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks); - - /** - * Compress a single BGZF block. - * - * @param dst output buffer (must have size >= BGZF_MAX_BLOCK_SIZE) - * @param dlen size of output buffer; updated on return to the number - * of bytes actually written to dst - * @param src buffer to be compressed - * @param slen size of data to compress (must be <= BGZF_BLOCK_SIZE) - * @param level compression level - * @return 0 on success and negative on error - */ - HTSLIB_EXPORT - int bgzf_compress(void *dst, size_t *dlen, const void *src, size_t slen, int level); - - /******************* - * bgzidx routines * - *******************/ - - /** - * Position BGZF at the uncompressed offset - * - * @param fp BGZF file handler; must be opened for reading - * @param uoffset file offset in the uncompressed data - * @param where must be SEEK_SET - * - * Returns 0 on success and -1 on error. - * - * @note It is not permitted to seek on files open for writing, - * or files compressed with gzip (as opposed to bgzip). - */ - HTSLIB_EXPORT - int bgzf_useek(BGZF *fp, off_t uoffset, int where) HTS_RESULT_USED; - - /** - * Position in uncompressed BGZF - * - * @param fp BGZF file handler; must be opened for reading - * - * Returns the current offset on success and -1 on error. - */ - HTSLIB_EXPORT - off_t bgzf_utell(BGZF *fp); - - /** - * Tell BGZF to build index while compressing. - * - * @param fp BGZF file handler; can be opened for reading or writing. - * - * Returns 0 on success and -1 on error. - * - * @note This function must be called before any data has been read or - * written, and in particular before calling bgzf_mt() on the same - * file handle (as threads may start reading data before the index - * has been set up). - */ - HTSLIB_EXPORT - int bgzf_index_build_init(BGZF *fp); - - /// Load BGZF index - /** - * @param fp BGZF file handler - * @param bname base name - * @param suffix suffix to add to bname (can be NULL) - * @return 0 on success and -1 on error. - */ - HTSLIB_EXPORT - int bgzf_index_load(BGZF *fp, - const char *bname, const char *suffix) HTS_RESULT_USED; - - /// Load BGZF index from an hFILE - /** - * @param fp BGZF file handle - * @param idx hFILE to read from - * @param name file name (for error reporting only; can be NULL) - * @return 0 on success and -1 on error. - * - * Populates @p fp with index data read from the hFILE handle @p idx. - * The file pointer to @idx should point to the start of the index - * data when this function is called. - * - * The file name can optionally be passed in the @p name parameter. This - * is only used for printing error messages; if NULL the word "index" is - * used instead. - */ - HTSLIB_EXPORT - int bgzf_index_load_hfile(BGZF *fp, struct hFILE *idx, - const char *name) HTS_RESULT_USED; - - /// Save BGZF index - /** - * @param fp BGZF file handler - * @param bname base name - * @param suffix suffix to add to bname (can be NULL) - * @return 0 on success and -1 on error. - */ - HTSLIB_EXPORT - int bgzf_index_dump(BGZF *fp, - const char *bname, const char *suffix) HTS_RESULT_USED; - - /// Write a BGZF index to an hFILE - /** - * @param fp BGZF file handle - * @param idx hFILE to write to - * @param name file name (for error reporting only, can be NULL) - * @return 0 on success and -1 on error. - * - * Write index data from @p fp to the file @p idx. - * - * The file name can optionally be passed in the @p name parameter. This - * is only used for printing error messages; if NULL the word "index" is - * used instead. - */ - - HTSLIB_EXPORT - int bgzf_index_dump_hfile(BGZF *fp, struct hFILE *idx, - const char *name) HTS_RESULT_USED; - -#ifdef __cplusplus -} -#endif - -#ifdef HTSLIB_SSIZE_T -#undef HTSLIB_SSIZE_T -#undef ssize_t -#endif - -#endif diff --git a/src/htslib-1.18/htslib/cram.h b/src/htslib-1.18/htslib/cram.h deleted file mode 100644 index e0b5183..0000000 --- a/src/htslib-1.18/htslib/cram.h +++ /dev/null @@ -1,753 +0,0 @@ -/// @file htslib/cram.h -/// CRAM format-specific API functions. -/* - Copyright (C) 2015, 2016, 2018-2020, 2022-2023 Genome Research Ltd. - - Author: James Bonfield - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -/** @file - * Consider using the higher level hts_*() API for programs that wish to - * be file format agnostic (see htslib/hts.h). - * - * This API should be used for CRAM specific code. The specifics of the - * public API are implemented in cram_io.h, cram_encode.h and cram_decode.h - * although these should not be included directly (use this file instead). - */ - -#ifndef HTSLIB_CRAM_H -#define HTSLIB_CRAM_H - -#include -#include -#include - -#include "hts_defs.h" -#include "hts.h" -#include "sam.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// see cram/cram_structs.h for an internal more complete copy of this enum - -// Htslib 1.11 had these listed without any hts prefix, and included -// some internal values such as RANS1 and GZIP_RLE (which shouldn't have ever -// been public). -// -// We can't find evidence of these being used and the data type occurs -// nowhere in functions or structures meaning using it would be pointless. -// However for safety, if you absolute need the API to not change then -// define HTS_COMPAT to 101100 (XYYYZZ for X.Y[.Z], meaning 1.11). -#if defined(HTS_COMPAT) && HTS_COMPAT <= 101100 -enum cram_block_method { - // Public methods as defined in the CRAM spec. - BM_ERROR = -1, - - // CRAM 2.x and 3.0 - RAW = 0, - GZIP = 1, - BZIP2 = 2, - LZMA = 3, - RANS = 4, - - // NB: the subsequent numbers may change. They're simply here for - // compatibility with the old API, but may have no bearing on the - // internal way htslib works. DO NOT USE - RANS0 = 4, - RANS1 = 10, - GZIP_RLE = 11, -}; -#else - -// Values as defined in the CRAM specifications. -// See cram/cram_structs.h cram_block_method_int for an expanded version of -// this with local specialisations assigned to codes. -enum cram_block_method { - CRAM_COMP_UNKNOWN = -1, - - // CRAM 2.x and 3.0 - CRAM_COMP_RAW = 0, - CRAM_COMP_GZIP = 1, - CRAM_COMP_BZIP2 = 2, - - // CRAM 3.0 - CRAM_COMP_LZMA = 3, - CRAM_COMP_RANS4x8 = 4, // 4-way interleaving, 8-bit renormalisation - - // CRAM 3.1 - CRAM_COMP_RANSNx16 = 5, // both 4x16 and 32x16 variants, plus transforms - CRAM_COMP_ARITH = 6, // aka Range coding - CRAM_COMP_FQZ = 7, // FQZComp - CRAM_COMP_TOK3 = 8, // Name tokeniser -}; -#endif - -/* NOTE this structure may be expanded in future releases by appending - * additional fields. - * - * Do not assume the size is fixed and avoid using arrays of this struct. - */ -typedef struct { - enum cram_block_method method; - - // Generic compression level if known (0 if not). - // 1 or 9 for gzip min/max flag (else 5). 1-9 for bzip2 - // 1 or 11 for for tok3 (rans/arith encoder). - int level; - - // For rans* and arith codecs - int order; - - // ransNx16/arith specific - int rle; - int pack; - int stripe; - int cat; - int nosz; - int Nway; - - // Arithmetic coder only - int ext; // external: use gz, xz or bzip2 -} cram_method_details; - -enum cram_content_type { - CT_ERROR = -1, - FILE_HEADER = 0, - COMPRESSION_HEADER = 1, - MAPPED_SLICE = 2, - UNMAPPED_SLICE = 3, // CRAM V1.0 only - EXTERNAL = 4, - CORE = 5, -}; - -// Opaque data types, see cram_structs for the fully fledged versions. -typedef struct cram_file_def cram_file_def; -typedef struct cram_fd cram_fd; -typedef struct cram_container cram_container; -typedef struct cram_block cram_block; -typedef struct cram_slice cram_slice; -typedef struct cram_metrics cram_metrics; -typedef struct cram_block_slice_hdr cram_block_slice_hdr; -typedef struct cram_block_compression_hdr cram_block_compression_hdr; -typedef struct cram_codec cram_codec; -typedef struct refs_t refs_t; - -struct hFILE; - -// Accessor functions - -/* - *----------------------------------------------------------------------------- - * cram_fd - */ -HTSLIB_EXPORT -sam_hdr_t *cram_fd_get_header(cram_fd *fd); - -HTSLIB_EXPORT -void cram_fd_set_header(cram_fd *fd, sam_hdr_t *hdr); - -HTSLIB_EXPORT -int cram_fd_get_version(cram_fd *fd); - -HTSLIB_EXPORT -void cram_fd_set_version(cram_fd *fd, int vers); - -HTSLIB_EXPORT -int cram_major_vers(cram_fd *fd); -HTSLIB_EXPORT -int cram_minor_vers(cram_fd *fd); - -HTSLIB_EXPORT -struct hFILE *cram_fd_get_fp(cram_fd *fd); -HTSLIB_EXPORT -void cram_fd_set_fp(cram_fd *fd, struct hFILE *fp); - - -/* - *----------------------------------------------------------------------------- - * cram_container - */ -HTSLIB_EXPORT -int32_t cram_container_get_length(cram_container *c); -HTSLIB_EXPORT -void cram_container_set_length(cram_container *c, int32_t length); -HTSLIB_EXPORT -int32_t cram_container_get_num_blocks(cram_container *c); -HTSLIB_EXPORT -void cram_container_set_num_blocks(cram_container *c, int32_t num_blocks); -HTSLIB_EXPORT -int32_t *cram_container_get_landmarks(cram_container *c, int32_t *num_landmarks); -HTSLIB_EXPORT -void cram_container_set_landmarks(cram_container *c, int32_t num_landmarks, - int32_t *landmarks); -HTSLIB_EXPORT -int32_t cram_container_get_num_records(cram_container *c); -HTSLIB_EXPORT -int64_t cram_container_get_num_bases(cram_container *c); - -/* Returns true if the container is empty (EOF marker) */ -HTSLIB_EXPORT -int cram_container_is_empty(cram_fd *fd); - - -/* - *----------------------------------------------------------------------------- - * cram_block - */ -HTSLIB_EXPORT -int32_t cram_block_get_content_id(cram_block *b); -HTSLIB_EXPORT -int32_t cram_block_get_comp_size(cram_block *b); -HTSLIB_EXPORT -int32_t cram_block_get_uncomp_size(cram_block *b); -HTSLIB_EXPORT -int32_t cram_block_get_crc32(cram_block *b); -HTSLIB_EXPORT -void * cram_block_get_data(cram_block *b); -HTSLIB_EXPORT -enum cram_content_type cram_block_get_content_type(cram_block *b); -HTSLIB_EXPORT -enum cram_block_method cram_block_get_method(cram_block *b); - -HTSLIB_EXPORT -cram_method_details *cram_expand_method(uint8_t *data, int32_t size, - enum cram_block_method comp); - -HTSLIB_EXPORT -void cram_block_set_content_id(cram_block *b, int32_t id); -HTSLIB_EXPORT -void cram_block_set_comp_size(cram_block *b, int32_t size); -HTSLIB_EXPORT -void cram_block_set_uncomp_size(cram_block *b, int32_t size); -HTSLIB_EXPORT -void cram_block_set_crc32(cram_block *b, int32_t crc); -HTSLIB_EXPORT -void cram_block_set_data(cram_block *b, void *data); - -HTSLIB_EXPORT -int cram_block_append(cram_block *b, const void *data, int size); -HTSLIB_EXPORT -void cram_block_update_size(cram_block *b); - -// Offset is known as "size" internally, but it can be confusing. -HTSLIB_EXPORT -size_t cram_block_get_offset(cram_block *b); -HTSLIB_EXPORT -void cram_block_set_offset(cram_block *b, size_t offset); - -/* - * Computes the size of a cram block, including the block - * header itself. - */ -HTSLIB_EXPORT -uint32_t cram_block_size(cram_block *b); - -/* - * Returns the Block Content ID values referred to by a cram_codec in - * ids[2]. - * - * -2 is unused. - * -1 is CORE - * >= 0 is the block with that Content ID - */ -HTSLIB_EXPORT -void cram_codec_get_content_ids(cram_codec *c, int ids[2]); - -/* - * Produces a human readable description of the codec parameters. - * This is appended to an existing kstring 'ks'. - * - * Returns 0 on succes, - * <0 on failure - */ -HTSLIB_EXPORT -int cram_codec_describe(cram_codec *c, kstring_t *ks); - -/* - * Renumbers RG numbers in a cram compression header. - * - * CRAM stores RG as the Nth number in the header, rather than a - * string holding the ID: tag. This is smaller in space, but means - * "samtools cat" to join files together that contain single but - * different RG lines needs a way of renumbering them. - * - * The file descriptor is expected to be immediately after the - * cram_container structure (ie before the cram compression header). - * Due to the nature of the CRAM format, this needs to read and write - * the blocks itself. Note that there may be multiple slices within - * the container, meaning multiple compression headers to manipulate. - * Changing RG may change the size of the compression header and - * therefore the length field in the container. Hence we rewrite all - * blocks just in case and also emit the adjusted container. - * - * The current implementation can only cope with renumbering a single - * RG (and only then if it is using HUFFMAN or BETA codecs). In - * theory it *may* be possible to renumber multiple RGs if they use - * HUFFMAN to the CORE block or use an external block unshared by any - * other data series. So we have an API that can be upgraded to - * support this, but do not implement it for now. An example - * implementation of RG as an EXTERNAL block would be to find that - * block and rewrite it, returning the number of blocks consumed. - * - * Returns 0 on success; - * -1 if unable to edit; - * -2 on other errors (eg I/O). - */ -HTSLIB_EXPORT -int cram_transcode_rg(cram_fd *in, cram_fd *out, - cram_container *c, - int nrg, int *in_rg, int *out_rg); - -/* - * Copies the blocks representing the next num_slice slices from a - * container from 'in' to 'out'. It is expected that the file pointer - * is just after the read of the cram_container and cram compression - * header. - * - * Returns 0 on success - * -1 on failure - */ -HTSLIB_EXPORT -int cram_copy_slice(cram_fd *in, cram_fd *out, int32_t num_slice); - -/* - * Decodes a CRAM block compression header. - * Returns header ptr on success - * NULL on failure - */ -HTSLIB_EXPORT -cram_block_compression_hdr *cram_decode_compression_header(cram_fd *fd, - cram_block *b); -/* - * Frees a cram_block_compression_hdr structure. - */ -HTSLIB_EXPORT -void cram_free_compression_header(cram_block_compression_hdr *hdr); - -typedef struct cram_cid2ds_t cram_cid2ds_t; - -/* - * Map cram block numbers to data-series. It's normally a 1:1 mapping, - * but in rare cases it can be 1:many (or even many:many). - * The key is the block number and the value is an index into the data-series - * array, which we iterate over until reaching a negative value. - * - * Provide cid2ds as NULL to allocate a new map or pass in an existing one - * to append to this map. The new (or existing) map is returned. - * - * Returns the cid2ds (newly allocated or as provided) on success, - * NULL on failure. - */ -HTSLIB_EXPORT -cram_cid2ds_t *cram_update_cid2ds_map(cram_block_compression_hdr *hdr, - cram_cid2ds_t *cid2ds); - -/* - * Return a list of data series observed as belonging to a block with - * the specified content_id. *n is the number of data series - * returned, or 0 if block is unused. - * Block content_id of -1 is used to indicate the CORE block. - * - * The pointer returned is owned by the cram_cid2ds state and should - * not be freed by the caller. - */ -HTSLIB_EXPORT -int *cram_cid2ds_query(cram_cid2ds_t *c2d, int content_id, int *n); - -/* - * Frees a cram_cid2ds_t allocated by cram_update_cid2ds_map - */ -HTSLIB_EXPORT -void cram_cid2ds_free(cram_cid2ds_t *cid2ds); - -/* - * Produces a description of the record and tag encodings held within - * a compression header and appends to 'ks'. - * - * Returns 0 on success, - * <0 on failure. - */ -HTSLIB_EXPORT -int cram_describe_encodings(cram_block_compression_hdr *hdr, kstring_t *ks); - -/* - *----------------------------------------------------------------------------- - * cram slice interrogation - */ - -/* - * Returns the number of cram blocks within this slice. - */ -HTSLIB_EXPORT -int32_t cram_slice_hdr_get_num_blocks(cram_block_slice_hdr *hdr); - -/* - * Returns the block content_id for the block containing an embedded reference - * sequence. If none is present, -1 is returned. - */ -HTSLIB_EXPORT -int cram_slice_hdr_get_embed_ref_id(cram_block_slice_hdr *h); - -/* - * Returns slice reference ID, start and span (length) coordinates. - * Return parameters may be NULL in which case they are ignored. - */ -HTSLIB_EXPORT -void cram_slice_hdr_get_coords(cram_block_slice_hdr *h, - int *refid, hts_pos_t *start, hts_pos_t *span); - -/* - * Decodes a slice header from a cram block. - * Returns the opaque cram_block_slice_hdr pointer on success, - * NULL on failure. - */ -HTSLIB_EXPORT -cram_block_slice_hdr *cram_decode_slice_header(cram_fd *fd, cram_block *b); - -/* - * Frees a cram_block_slice_hdr structure. - */ -HTSLIB_EXPORT -void cram_free_slice_header(cram_block_slice_hdr *hdr); - -/* - *----------------------------------------------------------------------------- - * cram_io basics - */ - -/**@{ ---------------------------------------------------------------------- - * CRAM blocks - the dynamically growable data block. We have code to - * create, update, (un)compress and read/write. - * - * These are derived from the deflate_interlaced.c blocks, but with the - * CRAM extension of content types and IDs. - */ - -/*! Allocates a new cram_block structure with a specified content_type and - * id. - * - * @return - * Returns block pointer on success; - * NULL on failure - * - * The cram_block struct returned by a successful call should be freed - * via cram_free_block() when it is no longer needed. - */ -HTSLIB_EXPORT -cram_block *cram_new_block(enum cram_content_type content_type, - int content_id); - -/*! Reads a block from a cram file. - * - * @return - * Returns cram_block pointer on success; - * NULL on failure - * - * The cram_block struct returned by a successful call should be freed - * via cram_free_block() when it is no longer needed. - */ -HTSLIB_EXPORT -cram_block *cram_read_block(cram_fd *fd); - -/*! Writes a CRAM block. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -HTSLIB_EXPORT -int cram_write_block(cram_fd *fd, cram_block *b); - -/*! Frees a CRAM block, deallocating internal data too. - */ -HTSLIB_EXPORT -void cram_free_block(cram_block *b); - -/*! Uncompresses a CRAM block, if compressed. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -HTSLIB_EXPORT -int cram_uncompress_block(cram_block *b); - -/*! Compresses a block. - * - * Compresses a block using one of two different zlib strategies. If we only - * want one choice set strat2 to be -1. - * - * The logic here is that sometimes Z_RLE does a better job than Z_FILTERED - * or Z_DEFAULT_STRATEGY on quality data. If so, we'd rather use it as it is - * significantly faster. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -HTSLIB_EXPORT -int cram_compress_block(cram_fd *fd, cram_block *b, cram_metrics *metrics, - int method, int level); -int cram_compress_block2(cram_fd *fd, cram_slice *s, - cram_block *b, cram_metrics *metrics, - int method, int level); - -/**@}*/ -/**@{ ---------------------------------------------------------------------- - * Containers - */ - -/*! Creates a new container, specifying the maximum number of slices - * and records permitted. - * - * @return - * Returns cram_container ptr on success; - * NULL on failure - * - * The cram_container struct returned by a successful call should be freed - * via cram_free_container() when it is no longer needed. - */ -HTSLIB_EXPORT -cram_container *cram_new_container(int nrec, int nslice); -HTSLIB_EXPORT -void cram_free_container(cram_container *c); - -/*! Reads a container header. - * - * @return - * Returns cram_container on success; - * NULL on failure or no container left (fd->err == 0). - * - * The cram_container struct returned by a successful call should be freed - * via cram_free_container() when it is no longer needed. - */ -HTSLIB_EXPORT -cram_container *cram_read_container(cram_fd *fd); - -/*! Writes a container structure. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -HTSLIB_EXPORT -int cram_write_container(cram_fd *fd, cram_container *h); - -/* - * Stores the container structure in dat and returns *size as the - * number of bytes written to dat[]. The input size of dat is also - * held in *size and should be initialised to cram_container_size(c). - * - * Returns 0 on success; - * -1 on failure - */ -HTSLIB_EXPORT -int cram_store_container(cram_fd *fd, cram_container *c, char *dat, int *size); - -HTSLIB_EXPORT -int cram_container_size(cram_container *c); - -/**@}*/ -/**@{ ---------------------------------------------------------------------- - * The top-level cram opening, closing and option handling - */ - -/*! Opens a CRAM file for read (mode "rb") or write ("wb"). - * - * The filename may be "-" to indicate stdin or stdout. - * - * @return - * Returns file handle on success; - * NULL on failure. - */ -HTSLIB_EXPORT -cram_fd *cram_open(const char *filename, const char *mode); - -/*! Opens an existing stream for reading or writing. - * - * @return - * Returns file handle on success; - * NULL on failure. - */ -HTSLIB_EXPORT -cram_fd *cram_dopen(struct hFILE *fp, const char *filename, const char *mode); - -/*! Closes a CRAM file. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -HTSLIB_EXPORT -int cram_close(cram_fd *fd); - -/* - * Seek within a CRAM file. - * - * Returns 0 on success - * -1 on failure - */ -HTSLIB_EXPORT -int cram_seek(cram_fd *fd, off_t offset, int whence); - -/* - * Flushes a CRAM file. - * Useful for when writing to stdout without wishing to close the stream. - * - * Returns 0 on success - * -1 on failure - */ -HTSLIB_EXPORT -int cram_flush(cram_fd *fd); - -/*! Checks for end of file on a cram_fd stream. - * - * @return - * Returns 0 if not at end of file - * 1 if we hit an expected EOF (end of range or EOF block) - * 2 for other EOF (end of stream without EOF block) - */ -HTSLIB_EXPORT -int cram_eof(cram_fd *fd); - -/*! Sets options on the cram_fd. - * - * See CRAM_OPT_* definitions in hts.h. - * Use this immediately after opening. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -HTSLIB_EXPORT -int cram_set_option(cram_fd *fd, enum hts_fmt_option opt, ...); - -/*! Sets options on the cram_fd. - * - * See CRAM_OPT_* definitions in hts.h. - * Use this immediately after opening. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -HTSLIB_EXPORT -int cram_set_voption(cram_fd *fd, enum hts_fmt_option opt, va_list args); - -/*! - * Attaches a header to a cram_fd. - * - * This should be used when creating a new cram_fd for writing where - * we have an SAM_hdr already constructed (eg from a file we've read - * in). - * - * @return - * Returns 0 on success; - * -1 on failure - */ -HTSLIB_EXPORT -int cram_set_header(cram_fd *fd, sam_hdr_t *hdr); - -/*! Check if this file has a proper EOF block - * - * @return - * Returns 3 if the file is a version of CRAM that does not contain EOF blocks - * 2 if the file is a stream and thus unseekable - * 1 if the file contains an EOF block - * 0 if the file does not contain an EOF block - * -1 if an error occurred whilst reading the file or we could not seek back to where we were - * - */ -HTSLIB_EXPORT -int cram_check_EOF(cram_fd *fd); - -/* As int32_decoded/encode, but from/to blocks instead of cram_fd */ -HTSLIB_EXPORT -int int32_put_blk(cram_block *b, int32_t val); - -/**@}*/ -/**@{ ------------------------------------------------------------------- - * Old typedef and function names for compatibility with existing code. - * Header functionality is now provided by sam.h's sam_hdr_t functions. - */ - -typedef sam_hdr_t SAM_hdr; - -/*! Tokenises a SAM header into a hash table. - * - * Also extracts a few bits on specific data types, such as @RG lines. - * - * @return - * Returns a SAM_hdr struct on success (free with sam_hdr_free()); - * NULL on failure - */ -static inline SAM_hdr *sam_hdr_parse_(const char *hdr, size_t len) { return sam_hdr_parse(len, hdr); } - -/*! Deallocates all storage used by a SAM_hdr struct. - * - * This also decrements the header reference count. If after decrementing - * it is still non-zero then the header is assumed to be in use by another - * caller and the free is not done. - */ -static inline void sam_hdr_free(SAM_hdr *hdr) { sam_hdr_destroy(hdr); } - -/* sam_hdr_length() and sam_hdr_str() are now provided by sam.h. */ - -/*! Add an @PG line. - * - * If we wish complete control over this use sam_hdr_add_line() directly. This - * function uses that, but attempts to do a lot of tedious house work for - * you too. - * - * - It will generate a suitable ID if the supplied one clashes. - * - It will generate multiple @PG records if we have multiple PG chains. - * - * Call it as per sam_hdr_add_line() with a series of key,value pairs ending - * in NULL. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -#define sam_hdr_add_PG sam_hdr_add_pg - -/**@{ -------------------------------------------------------------------*/ - -/*! - * Returns the refs_t structure used by a cram file handle. - * - * This may be used in conjunction with option CRAM_OPT_SHARED_REF to - * share reference memory between multiple file handles. - * - * @return - * Returns NULL if none exists or the file handle is not a CRAM file. - */ -HTSLIB_EXPORT -refs_t *cram_get_refs(htsFile *fd); - -/**@}*/ - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.18/htslib/hfile.h b/src/htslib-1.18/htslib/hfile.h deleted file mode 100644 index 6e3a2a2..0000000 --- a/src/htslib-1.18/htslib/hfile.h +++ /dev/null @@ -1,389 +0,0 @@ -/// @file htslib/hfile.h -/// Buffered low-level input/output streams. -/* - Copyright (C) 2013-2022 Genome Research Ltd. - - Author: John Marshall - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#ifndef HTSLIB_HFILE_H -#define HTSLIB_HFILE_H - -#include - -#include - -#include "hts_defs.h" - -// Ensure ssize_t exists within this header. All #includes must precede this, -// and ssize_t must be undefined again at the end of this header. -#if defined _MSC_VER && defined _INTPTR_T_DEFINED && !defined _SSIZE_T_DEFINED && !defined ssize_t -#define HTSLIB_SSIZE_T -#define ssize_t intptr_t -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -struct hFILE_backend; -struct kstring_t; - -/// Low-level input/output stream handle -/** The fields of this structure are declared here solely for the benefit -of the hFILE-related inline functions. They may change in future releases. -User code should not use them directly; you should imagine that hFILE is an -opaque incomplete type. -*/ -typedef struct hFILE { - // @cond internal - char *buffer, *begin, *end, *limit; - const struct hFILE_backend *backend; - off_t offset; - unsigned at_eof:1, mobile:1, readonly:1; - int has_errno; - // @endcond -} hFILE; - -/// Open the named file or URL as a stream -/** @return An hFILE pointer, or `NULL` (with _errno_ set) if an error occurred. - -The usual `fopen(3)` _mode_ letters are supported: one of -`r` (read), `w` (write), `a` (append), optionally followed by any of -`+` (update), `e` (close on `exec(2)`), `x` (create exclusively), -`:` (indicates scheme-specific variable arguments follow). -*/ -HTSLIB_EXPORT -hFILE *hopen(const char *filename, const char *mode, ...) HTS_RESULT_USED; - -/// Associate a stream with an existing open file descriptor -/** @return An hFILE pointer, or `NULL` (with _errno_ set) if an error occurred. - -Note that the file must be opened in binary mode, or else -there will be problems on platforms that make a difference -between text and binary mode. - -For socket descriptors (on Windows), _mode_ should contain `s`. -*/ -HTSLIB_EXPORT -hFILE *hdopen(int fd, const char *mode) HTS_RESULT_USED; - -/// Report whether the file name or URL denotes remote storage -/** @return 0 if local, 1 if remote. - -"Remote" means involving e.g. explicit network access, with the implication -that callers may wish to cache such files' contents locally. -*/ -HTSLIB_EXPORT -int hisremote(const char *filename) HTS_RESULT_USED; - -/// Append an extension or replace an existing extension -/** @param buffer The kstring to be used to store the modified filename - @param filename The filename to be (copied and) adjusted - @param replace If non-zero, one extension (if any) is removed first - @param extension The extension to be added (e.g. ".csi") - @return The modified filename (i.e., `buffer->s`), or NULL on error. - @since 1.10 - -If _filename_ is an URL, alters extensions at the end of the `hier-part`, -leaving any trailing `?query` or `#fragment` unchanged. -*/ -HTSLIB_EXPORT -char *haddextension(struct kstring_t *buffer, const char *filename, - int replace, const char *extension) HTS_RESULT_USED; - -/// Flush (for output streams) and close the stream -/** @return 0 if successful, or `EOF` (with _errno_ set) if an error occurred. -*/ -HTSLIB_EXPORT -int hclose(hFILE *fp) HTS_RESULT_USED; - -/// Close the stream, without flushing or propagating errors -/** For use while cleaning up after an error only. Preserves _errno_. -*/ -HTSLIB_EXPORT -void hclose_abruptly(hFILE *fp); - -/// Return the stream's error indicator -/** @return Non-zero (in fact, an _errno_ value) if an error has occurred. - -This would be called `herror()` and return true/false to parallel `ferror(3)`, -but a networking-related `herror(3)` function already exists. -*/ -static inline int herrno(hFILE *fp) -{ - return fp->has_errno; -} - -/// Clear the stream's error indicator -static inline void hclearerr(hFILE *fp) -{ - fp->has_errno = 0; -} - -/// Reposition the read/write stream offset -/** @return The resulting offset within the stream (as per `lseek(2)`), - or negative if an error occurred. -*/ -HTSLIB_EXPORT -off_t hseek(hFILE *fp, off_t offset, int whence) HTS_RESULT_USED; - -/// Report the current stream offset -/** @return The offset within the stream, starting from zero. -*/ -static inline off_t htell(hFILE *fp) -{ - return fp->offset + (fp->begin - fp->buffer); -} - -/// Read one character from the stream -/** @return The character read, or `EOF` on end-of-file or error. -*/ -static inline int hgetc(hFILE *fp) -{ - HTSLIB_EXPORT - extern int hgetc2(hFILE *); - return (fp->end > fp->begin)? (unsigned char) *(fp->begin++) : hgetc2(fp); -} - -/// Read from the stream until the delimiter, up to a maximum length -/** @param buffer The buffer into which bytes will be written - @param size The size of the buffer - @param delim The delimiter (interpreted as an `unsigned char`) - @param fp The file stream - @return The number of bytes read, or negative on error. - @since 1.4 - -Bytes will be read into the buffer up to and including a delimiter, until -EOF is reached, or _size-1_ bytes have been written, whichever comes first. -The string will then be terminated with a NUL byte (`\0`). -*/ -HTSLIB_EXPORT -ssize_t hgetdelim(char *buffer, size_t size, int delim, hFILE *fp) - HTS_RESULT_USED; - -/// Read a line from the stream, up to a maximum length -/** @param buffer The buffer into which bytes will be written - @param size The size of the buffer - @param fp The file stream - @return The number of bytes read, or negative on error. - @since 1.4 - -Specialization of hgetdelim() for a `\n` delimiter. -*/ -static inline ssize_t HTS_RESULT_USED -hgetln(char *buffer, size_t size, hFILE *fp) -{ - return hgetdelim(buffer, size, '\n', fp); -} - -/// Read a line from the stream, up to a maximum length -/** @param buffer The buffer into which bytes will be written - @param size The size of the buffer (must be > 1 to be useful) - @param fp The file stream - @return _buffer_ on success, or `NULL` if an error occurred. - @since 1.4 - -This function can be used as a replacement for `fgets(3)`, or together with -kstring's `kgetline()` to read arbitrarily-long lines into a _kstring_t_. -*/ -HTSLIB_EXPORT -char *hgets(char *buffer, int size, hFILE *fp) HTS_RESULT_USED; - -/// Peek at characters to be read without removing them from buffers -/** @param fp The file stream - @param buffer The buffer to which the peeked bytes will be written - @param nbytes The number of bytes to peek at; limited by the size of the - internal buffer, which could be as small as 4K. - @return The number of bytes peeked, which may be less than _nbytes_ - if EOF is encountered; or negative, if there was an I/O error. - -The characters peeked at remain in the stream's internal buffer, and will be -returned by later hread() etc calls. -*/ -HTSLIB_EXPORT -ssize_t hpeek(hFILE *fp, void *buffer, size_t nbytes) HTS_RESULT_USED; - -/// Read a block of characters from the file -/** @return The number of bytes read, or negative if an error occurred. - -The full _nbytes_ requested will be returned, except as limited by EOF -or I/O errors. -*/ -static inline ssize_t HTS_RESULT_USED -hread(hFILE *fp, void *buffer, size_t nbytes) -{ - HTSLIB_EXPORT - extern ssize_t hread2(hFILE *, void *, size_t, size_t); - - size_t n = fp->end - fp->begin; - if (n > nbytes) n = nbytes; - memcpy(buffer, fp->begin, n); - fp->begin += n; - return (n == nbytes || !fp->mobile)? (ssize_t) n : hread2(fp, buffer, nbytes, n); -} - -/// Write a character to the stream -/** @return The character written, or `EOF` if an error occurred. -*/ -static inline int hputc(int c, hFILE *fp) -{ - HTSLIB_EXPORT - extern int hputc2(int, hFILE *); - if (fp->begin < fp->limit) *(fp->begin++) = c; - else c = hputc2(c, fp); - return c; -} - -/// Write a string to the stream -/** @return 0 if successful, or `EOF` if an error occurred. -*/ -static inline int hputs(const char *text, hFILE *fp) -{ - HTSLIB_EXPORT - extern int hputs2(const char *, size_t, size_t, hFILE *); - - size_t nbytes = strlen(text), n = fp->limit - fp->begin; - if (n > nbytes) n = nbytes; - memcpy(fp->begin, text, n); - fp->begin += n; - return (n == nbytes)? 0 : hputs2(text, nbytes, n, fp); -} - -/// Write a block of characters to the file -/** @return Either _nbytes_, or negative if an error occurred. - -In the absence of I/O errors, the full _nbytes_ will be written. -*/ -static inline ssize_t HTS_RESULT_USED -hwrite(hFILE *fp, const void *buffer, size_t nbytes) -{ - HTSLIB_EXPORT - extern ssize_t hwrite2(hFILE *, const void *, size_t, size_t); - HTSLIB_EXPORT - extern int hfile_set_blksize(hFILE *fp, size_t bufsiz); - - if (!fp->mobile) { - size_t n = fp->limit - fp->begin; - if (n < nbytes) { - hfile_set_blksize(fp, fp->limit - fp->buffer + nbytes); - fp->end = fp->limit; - } - } - - size_t n = fp->limit - fp->begin; - if (nbytes >= n && fp->begin == fp->buffer) { - // Go straight to hwrite2 if the buffer is empty and the request - // won't fit. - return hwrite2(fp, buffer, nbytes, 0); - } - - if (n > nbytes) n = nbytes; - memcpy(fp->begin, buffer, n); - fp->begin += n; - return (n==nbytes)? (ssize_t) n : hwrite2(fp, buffer, nbytes, n); -} - -/// For writing streams, flush buffered output to the underlying stream -/** @return 0 if successful, or `EOF` if an error occurred. - -This includes low-level flushing such as via `fdatasync(2)`. -*/ -HTSLIB_EXPORT -int hflush(hFILE *fp) HTS_RESULT_USED; - -/// For hfile_mem: get the internal buffer and it's size from a hfile -/** @return buffer if successful, or NULL if an error occurred - -The buffer returned should not be freed as this will happen when the -hFILE is closed. -*/ -HTSLIB_EXPORT -char *hfile_mem_get_buffer(hFILE *file, size_t *length); - -/// For hfile_mem: get the internal buffer and it's size from a hfile. -/** @return buffer if successful, or NULL if an error occurred - -This is similar to hfile_mem_get_buffer except that ownership of the -buffer is granted to the caller, who now has responsibility for freeing -it. From this point onwards, the hFILE should not be used for any -purpose other than closing. -*/ -HTSLIB_EXPORT -char *hfile_mem_steal_buffer(hFILE *file, size_t *length); - -/// Fills out sc_list[] with the list of known URL schemes. -/** - * @param plugin [in] Restricts schemes to only those from 'plugin. - * @param sc_list [out] Filled out with the scheme names - * @param nschemes [in/out] Size of sc_list (in) and number returned (out) - * - * Plugin may be passed in as NULL in which case all schemes are returned. - * Use plugin "built-in" to list the built in schemes. - * The size of sc_list is determined by the input value of *nschemes. - * This is updated to return the output size. It is up to the caller to - * determine whether to call again with a larger number if this is too small. - * - * The return value represents the total number found matching plugin, which - * may be larger than *nschemes if too small a value was specified. - * - * @return the number of schemes found on success. - * -1 on failure - */ -HTSLIB_EXPORT -int hfile_list_schemes(const char *plugin, const char *sc_list[], int *nschemes); - -/// Fills out plist[] with the list of known hFILE plugins. -/* - * @param plist [out] Filled out with the plugin names - * @param nplugins [in/out] Size of plist (in) and number returned (out) - * - * The size of plist is determined by the input value of *nplugins. - * This is updated to return the output size. It is up to the caller to - * determine whether to call again with a larger number if this is too small. - * - * The return value represents the total number found, which may be - * larger than *nplugins if too small a value was specified. - * - * @return the number of plugins found on success. - * -1 on failure - */ -HTSLIB_EXPORT -int hfile_list_plugins(const char *plist[], int *nplugins); - -/// Tests for the presence of a specific hFILE plugin. -/* - * @param name The name of the plugin to query. - * - * @return 1 if found, 0 otherwise. - */ -HTSLIB_EXPORT -int hfile_has_plugin(const char *name); - -#ifdef __cplusplus -} -#endif - -#ifdef HTSLIB_SSIZE_T -#undef HTSLIB_SSIZE_T -#undef ssize_t -#endif - -#endif diff --git a/src/htslib-1.18/htslib/hts.h b/src/htslib-1.18/htslib/hts.h deleted file mode 100644 index 4baad1e..0000000 --- a/src/htslib-1.18/htslib/hts.h +++ /dev/null @@ -1,1581 +0,0 @@ -/// @file htslib/hts.h -/// Format-neutral I/O, indexing, and iterator API functions. -/* - Copyright (C) 2012-2022 Genome Research Ltd. - Copyright (C) 2010, 2012 Broad Institute. - Portions copyright (C) 2003-2006, 2008-2010 by Heng Li - - Author: Heng Li - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#ifndef HTSLIB_HTS_H -#define HTSLIB_HTS_H - -#include -#include -#include - -#include "hts_defs.h" -#include "hts_log.h" -#include "kstring.h" -#include "kroundup.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Separator used to split HTS_PATH (for plugins); REF_PATH (cram references) -#if defined(_WIN32) || defined(__MSYS__) -#define HTS_PATH_SEPARATOR_CHAR ';' -#define HTS_PATH_SEPARATOR_STR ";" -#else -#define HTS_PATH_SEPARATOR_CHAR ':' -#define HTS_PATH_SEPARATOR_STR ":" -#endif - -#ifndef HTS_BGZF_TYPEDEF -typedef struct BGZF BGZF; -#define HTS_BGZF_TYPEDEF -#endif -struct cram_fd; -struct hFILE; -struct hts_tpool; -struct sam_hdr_t; - -/** - * @hideinitializer - * Deprecated macro to expand a dynamic array of a given type - * - * @param type_t The type of the array elements - * @param[in] n Requested number of elements of type type_t - * @param[in,out] m Size of memory allocated - * @param[in,out] ptr Pointer to the array - * - * @discussion - * Do not use this macro. Use hts_resize() instead as allows allocation - * failures to be handled more gracefully. - * - * The array *ptr will be expanded if necessary so that it can hold @p n - * or more elements. If the array is expanded then the new size will be - * written to @p m and the value in @p ptr may change. - * - * It must be possible to take the address of @p ptr and @p m must be usable - * as an lvalue. - * - * @bug - * If the memory allocation fails, this will call exit(1). This is - * not ideal behaviour in a library. - */ -#define hts_expand(type_t, n, m, ptr) do { \ - if ((n) > (m)) { \ - size_t hts_realloc_or_die(size_t, size_t, size_t, size_t, \ - int, void **, const char *); \ - (m) = hts_realloc_or_die((n) >= 1 ? (n) : 1, (m), sizeof(m), \ - sizeof(type_t), 0, \ - (void **)&(ptr), __func__); \ - } \ - } while (0) - -/** - * @hideinitializer - * Macro to expand a dynamic array, zeroing any newly-allocated memory - * - * @param type_t The type of the array elements - * @param[in] n Requested number of elements of type type_t - * @param[in,out] m Size of memory allocated - * @param[in,out] ptr Pointer to the array - * - * @discussion - * Do not use this macro. Use hts_resize() instead as allows allocation - * failures to be handled more gracefully. - * - * As for hts_expand(), except the bytes that make up the array elements - * between the old and new values of @p m are set to zero using memset(). - * - * @bug - * If the memory allocation fails, this will call exit(1). This is - * not ideal behaviour in a library. - */ - - -#define hts_expand0(type_t, n, m, ptr) do { \ - if ((n) > (m)) { \ - size_t hts_realloc_or_die(size_t, size_t, size_t, size_t, \ - int, void **, const char *); \ - (m) = hts_realloc_or_die((n) >= 1 ? (n) : 1, (m), sizeof(m), \ - sizeof(type_t), 1, \ - (void **)&(ptr), __func__); \ - } \ - } while (0) - -// For internal use (by hts_resize()) only -HTSLIB_EXPORT -int hts_resize_array_(size_t, size_t, size_t, void *, void **, int, - const char *); - -#define HTS_RESIZE_CLEAR 1 - -/** - * @hideinitializer - * Macro to expand a dynamic array of a given type - * - * @param type_t The type of the array elements - * @param[in] num Requested number of elements of type type_t - * @param[in,out] size_ptr Pointer to where the size (in elements) of the - array is stored. - * @param[in,out] ptr Location of the pointer to the array - * @param[in] flags Option flags - * - * @return 0 for success, or negative if an error occurred. - * - * @discussion - * The array *ptr will be expanded if necessary so that it can hold @p num - * or more elements. If the array is expanded then the new size will be - * written to @p *size_ptr and the value in @p *ptr may change. - * - * If ( @p flags & HTS_RESIZE_CLEAR ) is set, any newly allocated memory will - * be cleared. - */ - -#define hts_resize(type_t, num, size_ptr, ptr, flags) \ - ((num) > (*(size_ptr)) \ - ? hts_resize_array_(sizeof(type_t), (num), \ - sizeof(*(size_ptr)), (size_ptr), \ - (void **)(ptr), (flags), __func__) \ - : 0) - -/// Release resources when dlclosing a dynamically loaded HTSlib -/** @discussion - * Normally HTSlib cleans up automatically when your program exits, - * whether that is via exit(3) or returning from main(). However if you - * have dlopen(3)ed HTSlib and wish to close it before your main program - * exits, you must call hts_lib_shutdown() before dlclose(3). -*/ -HTSLIB_EXPORT -void hts_lib_shutdown(void); - -/** - * Wrapper function for free(). Enables memory deallocation across DLL - * boundary. Should be used by all applications, which are compiled - * with a different standard library than htslib and call htslib - * methods that return dynamically allocated data. - */ -HTSLIB_EXPORT -void hts_free(void *ptr); - -/************ - * File I/O * - ************/ - -// Add new entries only at the end (but before the *_maximum entry) -// of these enums, as their numbering is part of the htslib ABI. - -enum htsFormatCategory { - unknown_category, - sequence_data, // Sequence data -- SAM, BAM, CRAM, etc - variant_data, // Variant calling data -- VCF, BCF, etc - index_file, // Index file associated with some data file - region_list, // Coordinate intervals or regions -- BED, etc - category_maximum = 32767 -}; - -enum htsExactFormat { - unknown_format, - binary_format, text_format, - sam, bam, bai, cram, crai, vcf, bcf, csi, gzi, tbi, bed, - htsget, - json HTS_DEPRECATED_ENUM("Use htsExactFormat 'htsget' instead") = htsget, - empty_format, // File is empty (or empty after decompression) - fasta_format, fastq_format, fai_format, fqi_format, - hts_crypt4gh_format, - d4_format, - format_maximum = 32767 -}; - -enum htsCompression { - no_compression, gzip, bgzf, custom, bzip2_compression, razf_compression, - xz_compression, zstd_compression, - compression_maximum = 32767 -}; - -typedef struct htsFormat { - enum htsFormatCategory category; - enum htsExactFormat format; - struct { short major, minor; } version; - enum htsCompression compression; - short compression_level; // currently unused - void *specific; // format specific options; see struct hts_opt. -} htsFormat; - -struct hts_idx_t; -typedef struct hts_idx_t hts_idx_t; -struct hts_filter_t; - -/** - * @brief File handle returned by hts_open() etc. - * This structure should be considered opaque by end users. There should be - * no need to access most fields directly in user code, and in cases where - * it is desirable accessor functions such as hts_get_format() are provided. - */ -// Maintainers note htsFile cannot be an incomplete struct because some of its -// fields are part of libhts.so's ABI (hence these fields must not be moved): -// - fp is used in the public sam_itr_next()/etc macros -// - is_bin is used directly in samtools <= 1.1 and bcftools <= 1.1 -// - is_write and is_cram are used directly in samtools <= 1.1 -// - fp is used directly in samtools (up to and including current develop) -// - line is used directly in bcftools (up to and including current develop) -// - is_bgzf and is_cram flags indicate which fp union member to use. -// Note is_bgzf being set does not indicate the flag is BGZF compressed, -// nor even whether it is compressed at all (eg on naked BAMs). -typedef struct htsFile { - uint32_t is_bin:1, is_write:1, is_be:1, is_cram:1, is_bgzf:1, dummy:27; - int64_t lineno; - kstring_t line; - char *fn, *fn_aux; - union { - BGZF *bgzf; - struct cram_fd *cram; - struct hFILE *hfile; - } fp; - void *state; // format specific state information - htsFormat format; - hts_idx_t *idx; - const char *fnidx; - struct sam_hdr_t *bam_header; - struct hts_filter_t *filter; -} htsFile; - -// A combined thread pool and queue allocation size. -// The pool should already be defined, but qsize may be zero to -// indicate an appropriate queue size is taken from the pool. -// -// Reasons for explicitly setting it could be where many more file -// descriptors are in use than threads, so keeping memory low is -// important. -typedef struct htsThreadPool { - struct hts_tpool *pool; // The shared thread pool itself - int qsize; // Size of I/O queue to use for this fp -} htsThreadPool; - -// REQUIRED_FIELDS -enum sam_fields { - SAM_QNAME = 0x00000001, - SAM_FLAG = 0x00000002, - SAM_RNAME = 0x00000004, - SAM_POS = 0x00000008, - SAM_MAPQ = 0x00000010, - SAM_CIGAR = 0x00000020, - SAM_RNEXT = 0x00000040, - SAM_PNEXT = 0x00000080, - SAM_TLEN = 0x00000100, - SAM_SEQ = 0x00000200, - SAM_QUAL = 0x00000400, - SAM_AUX = 0x00000800, - SAM_RGAUX = 0x00001000, -}; - -// Mostly CRAM only, but this could also include other format options -enum hts_fmt_option { - // CRAM specific - CRAM_OPT_DECODE_MD, - CRAM_OPT_PREFIX, - CRAM_OPT_VERBOSITY, // obsolete, use hts_set_log_level() instead - CRAM_OPT_SEQS_PER_SLICE, - CRAM_OPT_SLICES_PER_CONTAINER, - CRAM_OPT_RANGE, - CRAM_OPT_VERSION, // rename to cram_version? - CRAM_OPT_EMBED_REF, - CRAM_OPT_IGNORE_MD5, - CRAM_OPT_REFERENCE, // make general - CRAM_OPT_MULTI_SEQ_PER_SLICE, - CRAM_OPT_NO_REF, - CRAM_OPT_USE_BZIP2, - CRAM_OPT_SHARED_REF, - CRAM_OPT_NTHREADS, // deprecated, use HTS_OPT_NTHREADS - CRAM_OPT_THREAD_POOL,// make general - CRAM_OPT_USE_LZMA, - CRAM_OPT_USE_RANS, - CRAM_OPT_REQUIRED_FIELDS, - CRAM_OPT_LOSSY_NAMES, - CRAM_OPT_BASES_PER_SLICE, - CRAM_OPT_STORE_MD, - CRAM_OPT_STORE_NM, - CRAM_OPT_RANGE_NOSEEK, // CRAM_OPT_RANGE minus the seek - CRAM_OPT_USE_TOK, - CRAM_OPT_USE_FQZ, - CRAM_OPT_USE_ARITH, - CRAM_OPT_POS_DELTA, // force delta for AP, even on non-pos sorted data - - // General purpose - HTS_OPT_COMPRESSION_LEVEL = 100, - HTS_OPT_NTHREADS, - HTS_OPT_THREAD_POOL, - HTS_OPT_CACHE_SIZE, - HTS_OPT_BLOCK_SIZE, - HTS_OPT_FILTER, - HTS_OPT_PROFILE, - - // Fastq - - // Boolean. - // Read / Write CASAVA 1.8 format. - // See https://emea.support.illumina.com/content/dam/illumina-support/documents/documentation/software_documentation/bcl2fastq/bcl2fastq_letterbooklet_15038058brpmi.pdf - // - // The CASAVA tag matches \d:[YN]:\d+:[ACGTN]+ - // The first \d is read 1/2 (1 or 2), [YN] is QC-PASS/FAIL flag, - // \d+ is a control number, and the sequence at the end is - // for barcode sequence. Barcodes are read into the aux tag defined - // by FASTQ_OPT_BARCODE ("BC" by default). - FASTQ_OPT_CASAVA = 1000, - - // String. - // Whether to read / write extra SAM format aux tags from the fastq - // identifier line. For reading this can simply be "1" to request - // decoding aux tags. For writing it is a comma separated list of aux - // tag types to be written out. - FASTQ_OPT_AUX, - - // Boolean. - // Whether to add /1 and /2 to read identifiers when writing FASTQ. - // These come from the BAM_FREAD1 or BAM_FREAD2 flags. - // (Detecting the /1 and /2 is automatic when reading fastq.) - FASTQ_OPT_RNUM, - - // Two character string. - // Barcode aux tag for CASAVA; defaults to "BC". - FASTQ_OPT_BARCODE, - - // Process SRA and ENA read names which pointlessly move the original - // name to the second field and insert a constructed . - // name in its place. - FASTQ_OPT_NAME2, -}; - -// Profile options for encoding; primarily used at present in CRAM -// but also usable in BAM as a synonym for deflate compression levels. -enum hts_profile_option { - HTS_PROFILE_FAST, - HTS_PROFILE_NORMAL, - HTS_PROFILE_SMALL, - HTS_PROFILE_ARCHIVE, -}; - -// For backwards compatibility -#define cram_option hts_fmt_option - -typedef struct hts_opt { - char *arg; // string form, strdup()ed - enum hts_fmt_option opt; // tokenised key - union { // ... and value - int i; - char *s; - } val; - struct hts_opt *next; -} hts_opt; - -#define HTS_FILE_OPTS_INIT {{0},0} - -/* - * Explicit index file name delimiter, see below - */ -#define HTS_IDX_DELIM "##idx##" - - -/********************** - * Exported functions * - **********************/ - -/* - * Parses arg and appends it to the option list. - * - * Returns 0 on success; - * -1 on failure. - */ -HTSLIB_EXPORT -int hts_opt_add(hts_opt **opts, const char *c_arg); - -/* - * Applies an hts_opt option list to a given htsFile. - * - * Returns 0 on success - * -1 on failure - */ -HTSLIB_EXPORT -int hts_opt_apply(htsFile *fp, hts_opt *opts); - -/* - * Frees an hts_opt list. - */ -HTSLIB_EXPORT -void hts_opt_free(hts_opt *opts); - -/* - * Accepts a string file format (sam, bam, cram, vcf, bam) optionally - * followed by a comma separated list of key=value options and splits - * these up into the fields of htsFormat struct. - * - * Returns 0 on success - * -1 on failure. - */ -HTSLIB_EXPORT -int hts_parse_format(htsFormat *opt, const char *str); - -/* - * Tokenise options as (key(=value)?,)*(key(=value)?)? - * NB: No provision for ',' appearing in the value! - * Add backslashing rules? - * - * This could be used as part of a general command line option parser or - * as a string concatenated onto the file open mode. - * - * Returns 0 on success - * -1 on failure. - */ -HTSLIB_EXPORT -int hts_parse_opt_list(htsFormat *opt, const char *str); - -/*! @abstract Table for converting a nucleotide character to 4-bit encoding. -The input character may be either an IUPAC ambiguity code, '=' for 0, or -'0'/'1'/'2'/'3' for a result of 1/2/4/8. The result is encoded as 1/2/4/8 -for A/C/G/T or combinations of these bits for ambiguous bases. -*/ -HTSLIB_EXPORT -extern const unsigned char seq_nt16_table[256]; - -/*! @abstract Table for converting a 4-bit encoded nucleotide to an IUPAC -ambiguity code letter (or '=' when given 0). -*/ -HTSLIB_EXPORT -extern const char seq_nt16_str[]; - -/*! @abstract Table for converting a 4-bit encoded nucleotide to about 2 bits. -Returns 0/1/2/3 for 1/2/4/8 (i.e., A/C/G/T), or 4 otherwise (0 or ambiguous). -*/ -HTSLIB_EXPORT -extern const int seq_nt16_int[]; - -/*! - @abstract Get the htslib version number - @return For released versions, a string like "N.N[.N]"; or git describe - output if using a library built within a Git repository. -*/ -HTSLIB_EXPORT -const char *hts_version(void); - -/*! - @abstract Compile-time HTSlib version number, for use in #if checks - @return For released versions X.Y[.Z], an integer of the form XYYYZZ; - useful for preprocessor conditionals such as - #if HTS_VERSION >= 101000 // Check for v1.10 or later -*/ -// Maintainers: Bump this in the final stage of preparing a new release. -// Immediately after release, bump ZZ to 90 to distinguish in-development -// Git repository builds from the release; you may wish to increment this -// further when significant features are merged. -#define HTS_VERSION 101800 - -/*! @abstract Introspection on the features enabled in htslib - * - * @return a bitfield of HTS_FEATURE_* macros. - */ -HTSLIB_EXPORT -unsigned int hts_features(void); - -HTSLIB_EXPORT -const char *hts_test_feature(unsigned int id); - -/*! @abstract Introspection on the features enabled in htslib, string form - * - * @return a string describing htslib build features - */ -HTSLIB_EXPORT -const char *hts_feature_string(void); - -// Whether ./configure was used or vanilla Makefile -#define HTS_FEATURE_CONFIGURE 1 - -// Whether --enable-plugins was used -#define HTS_FEATURE_PLUGINS 2 - -// Transport specific -#define HTS_FEATURE_LIBCURL (1u<<10) -#define HTS_FEATURE_S3 (1u<<11) -#define HTS_FEATURE_GCS (1u<<12) - -// Compression options -#define HTS_FEATURE_LIBDEFLATE (1u<<20) -#define HTS_FEATURE_LZMA (1u<<21) -#define HTS_FEATURE_BZIP2 (1u<<22) -#define HTS_FEATURE_HTSCODECS (1u<<23) // htscodecs library version - -// Build params -#define HTS_FEATURE_CC (1u<<27) -#define HTS_FEATURE_CFLAGS (1u<<28) -#define HTS_FEATURE_CPPFLAGS (1u<<29) -#define HTS_FEATURE_LDFLAGS (1u<<30) - - -/*! - @abstract Determine format by peeking at the start of a file - @param fp File opened for reading, positioned at the beginning - @param fmt Format structure that will be filled out on return - @return 0 for success, or negative if an error occurred. - - Equivalent to hts_detect_format2(fp, NULL, fmt). -*/ -HTSLIB_EXPORT -int hts_detect_format(struct hFILE *fp, htsFormat *fmt); - -/*! - @abstract Determine format primarily by peeking at the start of a file - @param fp File opened for reading, positioned at the beginning - @param fname Name of the file, or NULL if not available - @param fmt Format structure that will be filled out on return - @return 0 for success, or negative if an error occurred. - @since 1.15 - -Some formats are only recognised if the filename is available and has the -expected extension, as otherwise more generic files may be misrecognised. -In particular: - - FASTA/Q indexes must have .fai/.fqi extensions; without this requirement, - some similar BED files would be misrecognised as indexes. -*/ -HTSLIB_EXPORT -int hts_detect_format2(struct hFILE *fp, const char *fname, htsFormat *fmt); - -/*! - @abstract Get a human-readable description of the file format - @param fmt Format structure holding type, version, compression, etc. - @return Description string, to be freed by the caller after use. -*/ -HTSLIB_EXPORT -char *hts_format_description(const htsFormat *format); - -/*! - @abstract Open a sequence data (SAM/BAM/CRAM) or variant data (VCF/BCF) - or possibly-compressed textual line-orientated file - @param fn The file name or "-" for stdin/stdout. For indexed files - with a non-standard naming, the file name can include the - name of the index file delimited with HTS_IDX_DELIM - @param mode Mode matching / [rwa][bcefFguxz0-9]* / - @discussion - With 'r' opens for reading; any further format mode letters are ignored - as the format is detected by checking the first few bytes or BGZF blocks - of the file. With 'w' or 'a' opens for writing or appending, with format - specifier letters: - b binary format (BAM, BCF, etc) rather than text (SAM, VCF, etc) - c CRAM format - f FASTQ format - F FASTA format - g gzip compressed - u uncompressed - z bgzf compressed - [0-9] zlib compression level - and with non-format option letters (for any of 'r'/'w'/'a'): - e close the file on exec(2) (opens with O_CLOEXEC, where supported) - x create the file exclusively (opens with O_EXCL, where supported) - Note that there is a distinction between 'u' and '0': the first yields - plain uncompressed output whereas the latter outputs uncompressed data - wrapped in the zlib format. - @example - [rw]b .. compressed BCF, BAM, FAI - [rw]bu .. uncompressed BCF - [rw]z .. compressed VCF - [rw] .. uncompressed VCF -*/ -HTSLIB_EXPORT -htsFile *hts_open(const char *fn, const char *mode); - -/*! - @abstract Open a SAM/BAM/CRAM/VCF/BCF/etc file - @param fn The file name or "-" for stdin/stdout - @param mode Open mode, as per hts_open() - @param fmt Optional format specific parameters - @discussion - See hts_open() for description of fn and mode. - // TODO Update documentation for s/opts/fmt/ - Opts contains a format string (sam, bam, cram, vcf, bcf) which will, - if defined, override mode. Opts also contains a linked list of hts_opt - structures to apply to the open file handle. These can contain things - like pointers to the reference or information on compression levels, - block sizes, etc. -*/ -HTSLIB_EXPORT -htsFile *hts_open_format(const char *fn, const char *mode, const htsFormat *fmt); - -/*! - @abstract Open an existing stream as a SAM/BAM/CRAM/VCF/BCF/etc file - @param fn The already-open file handle - @param mode Open mode, as per hts_open() -*/ -HTSLIB_EXPORT -htsFile *hts_hopen(struct hFILE *fp, const char *fn, const char *mode); - -/*! - @abstract For output streams, flush any buffered data - @param fp The file handle to be flushed - @return 0 for success, or negative if an error occurred. - @since 1.14 -*/ -HTSLIB_EXPORT -int hts_flush(htsFile *fp); - -/*! - @abstract Close a file handle, flushing buffered data for output streams - @param fp The file handle to be closed - @return 0 for success, or negative if an error occurred. -*/ -HTSLIB_EXPORT -int hts_close(htsFile *fp); - -/*! - @abstract Returns the file's format information - @param fp The file handle - @return Read-only pointer to the file's htsFormat. -*/ -HTSLIB_EXPORT -const htsFormat *hts_get_format(htsFile *fp); - -/*! - @ abstract Returns a string containing the file format extension. - @ param format Format structure containing the file type. - @ return A string ("sam", "bam", etc) or "?" for unknown formats. - */ -HTSLIB_EXPORT -const char *hts_format_file_extension(const htsFormat *format); - -/*! - @abstract Sets a specified CRAM option on the open file handle. - @param fp The file handle open the open file. - @param opt The CRAM_OPT_* option. - @param ... Optional arguments, dependent on the option used. - @return 0 for success, or negative if an error occurred. -*/ -HTSLIB_EXPORT -int hts_set_opt(htsFile *fp, enum hts_fmt_option opt, ...); - -/*! - @abstract Read a line (and its \n or \r\n terminator) from a file - @param fp The file handle - @param delimiter Unused, but must be '\n' (or KS_SEP_LINE) - @param str The line (not including the terminator) is written here - @return Length of the string read (capped at INT_MAX); - -1 on end-of-file; <= -2 on error -*/ -HTSLIB_EXPORT -int hts_getline(htsFile *fp, int delimiter, kstring_t *str); - -HTSLIB_EXPORT -char **hts_readlines(const char *fn, int *_n); -/*! - @abstract Parse comma-separated list or read list from a file - @param list File name or comma-separated list - @param is_file - @param _n Size of the output array (number of items read) - @return NULL on failure or pointer to newly allocated array of - strings -*/ -HTSLIB_EXPORT -char **hts_readlist(const char *fn, int is_file, int *_n); - -/*! - @abstract Create extra threads to aid compress/decompression for this file - @param fp The file handle - @param n The number of worker threads to create - @return 0 for success, or negative if an error occurred. - @notes This function creates non-shared threads for use solely by fp. - The hts_set_thread_pool function is the recommended alternative. -*/ -HTSLIB_EXPORT -int hts_set_threads(htsFile *fp, int n); - -/*! - @abstract Create extra threads to aid compress/decompression for this file - @param fp The file handle - @param p A pool of worker threads, previously allocated by hts_create_threads(). - @return 0 for success, or negative if an error occurred. -*/ -HTSLIB_EXPORT -int hts_set_thread_pool(htsFile *fp, htsThreadPool *p); - -/*! - @abstract Adds a cache of decompressed blocks, potentially speeding up seeks. - This may not work for all file types (currently it is bgzf only). - @param fp The file handle - @param n The size of cache, in bytes -*/ -HTSLIB_EXPORT -void hts_set_cache_size(htsFile *fp, int n); - -/*! - @abstract Set .fai filename for a file opened for reading - @return 0 for success, negative on failure - @discussion - Called before *_hdr_read(), this provides the name of a .fai file - used to provide a reference list if the htsFile contains no @SQ headers. -*/ -HTSLIB_EXPORT -int hts_set_fai_filename(htsFile *fp, const char *fn_aux); - - -/*! - @abstract Sets a filter expression - @return 0 for success, negative on failure - @discussion - To clear an existing filter, specifying expr as NULL. -*/ -HTSLIB_EXPORT -int hts_set_filter_expression(htsFile *fp, const char *expr); - -/*! - @abstract Determine whether a given htsFile contains a valid EOF block - @return 3 for a non-EOF checkable filetype; - 2 for an unseekable file type where EOF cannot be checked; - 1 for a valid EOF block; - 0 for if the EOF marker is absent when it should be present; - -1 (with errno set) on failure - @discussion - Check if the BGZF end-of-file (EOF) marker is present -*/ -HTSLIB_EXPORT -int hts_check_EOF(htsFile *fp); - -/************ - * Indexing * - ************/ - -/*! -These HTS_IDX_* macros are used as special tid values for hts_itr_query()/etc, -producing iterators operating as follows: - - HTS_IDX_NOCOOR iterates over unmapped reads sorted at the end of the file - - HTS_IDX_START iterates over the entire file - - HTS_IDX_REST iterates from the current position to the end of the file - - HTS_IDX_NONE always returns "no more alignment records" -When one of these special tid values is used, beg and end are ignored. -When REST or NONE is used, idx is also ignored and may be NULL. -*/ -#define HTS_IDX_NOCOOR (-2) -#define HTS_IDX_START (-3) -#define HTS_IDX_REST (-4) -#define HTS_IDX_NONE (-5) - -#define HTS_FMT_CSI 0 -#define HTS_FMT_BAI 1 -#define HTS_FMT_TBI 2 -#define HTS_FMT_CRAI 3 -#define HTS_FMT_FAI 4 - -// Almost INT64_MAX, but when cast into a 32-bit int it's -// also INT_MAX instead of -1. This avoids bugs with old code -// using the new hts_pos_t data type. -#define HTS_POS_MAX ((((int64_t)INT_MAX)<<32)|INT_MAX) -#define HTS_POS_MIN INT64_MIN -#define PRIhts_pos PRId64 -typedef int64_t hts_pos_t; - -// For comparison with previous release: -// -// #define HTS_POS_MAX INT_MAX -// #define HTS_POS_MIN INT_MIN -// #define PRIhts_pos PRId32 -// typedef int32_t hts_pos_t; - -typedef struct hts_pair_pos_t { - hts_pos_t beg, end; -} hts_pair_pos_t; - -typedef hts_pair_pos_t hts_pair32_t; // For backwards compatibility - -typedef struct hts_pair64_t { - uint64_t u, v; -} hts_pair64_t; - -typedef struct hts_pair64_max_t { - uint64_t u, v; - uint64_t max; -} hts_pair64_max_t; - -typedef struct hts_reglist_t { - const char *reg; - hts_pair_pos_t *intervals; - int tid; - uint32_t count; - hts_pos_t min_beg, max_end; -} hts_reglist_t; - -typedef int hts_readrec_func(BGZF *fp, void *data, void *r, int *tid, hts_pos_t *beg, hts_pos_t *end); -typedef int hts_seek_func(void *fp, int64_t offset, int where); -typedef int64_t hts_tell_func(void *fp); - -/** - * @brief File iterator that can handle multiple target regions. - * This structure should be considered opaque by end users. - * It does both the stepping inside the file and the filtering of alignments. - * It can operate in single or multi-region mode, and depending on this, - * it uses different fields. - * - * read_rest (1) - read everything from the current offset, without filtering - * finished (1) - no more iterations - * is_cram (1) - current file has CRAM format - * nocoor (1) - read all unmapped reads - * - * multi (1) - multi-region moode - * reg_list - List of target regions - * n_reg - Size of the above list - * curr_reg - List index of the current region of search - * curr_intv - Interval index inside the current region; points to a (beg, end) - * end - Used for CRAM files, to preserve the max end coordinate - * - * multi (0) - single-region mode - * tid - Reference id of the target region - * beg - Start position of the target region - * end - End position of the target region - * - * Common fields: - * off - List of file offsets computed from the index - * n_off - Size of the above list - * i - List index of the current file offset - * curr_off - File offset for the next file read - * curr_tid - Reference id of the current alignment - * curr_beg - Start position of the current alignment - * curr_end - End position of the current alignment - * nocoor_off - File offset where the unmapped reads start - * - * readrec - File specific function that reads an alignment - * seek - File specific function for changing the file offset - * tell - File specific function for indicating the file offset - */ - -typedef struct hts_itr_t { - uint32_t read_rest:1, finished:1, is_cram:1, nocoor:1, multi:1, dummy:27; - int tid, n_off, i, n_reg; - hts_pos_t beg, end; - hts_reglist_t *reg_list; - int curr_tid, curr_reg, curr_intv; - hts_pos_t curr_beg, curr_end; - uint64_t curr_off, nocoor_off; - hts_pair64_max_t *off; - hts_readrec_func *readrec; - hts_seek_func *seek; - hts_tell_func *tell; - struct { - int n, m; - int *a; - } bins; -} hts_itr_t; - -typedef hts_itr_t hts_itr_multi_t; - -/// Compute the first bin on a given level -#define hts_bin_first(l) (((1<<(((l)<<1) + (l))) - 1) / 7) -/// Compute the parent bin of a given bin -#define hts_bin_parent(b) (((b) - 1) >> 3) - -/////////////////////////////////////////////////////////// -// Low-level API for building indexes. - -/// Create a BAI/CSI/TBI type index structure -/** @param n Initial number of targets - @param fmt Format, one of HTS_FMT_CSI, HTS_FMT_BAI or HTS_FMT_TBI - @param offset0 Initial file offset - @param min_shift Number of bits for the minimal interval - @param n_lvls Number of levels in the binning index - @return An initialised hts_idx_t struct on success; NULL on failure - -The struct returned by a successful call should be freed via hts_idx_destroy() -when it is no longer needed. -*/ -HTSLIB_EXPORT -hts_idx_t *hts_idx_init(int n, int fmt, uint64_t offset0, int min_shift, int n_lvls); - -/// Free a BAI/CSI/TBI type index -/** @param idx Index structure to free - */ -HTSLIB_EXPORT -void hts_idx_destroy(hts_idx_t *idx); - -/// Push an index entry -/** @param idx Index - @param tid Target id - @param beg Range start (zero-based) - @param end Range end (zero-based, half-open) - @param offset File offset - @param is_mapped Range corresponds to a mapped read - @return 0 on success; -1 on failure - -The @p is_mapped parameter is used to update the n_mapped / n_unmapped counts -stored in the meta-data bin. - */ -HTSLIB_EXPORT -int hts_idx_push(hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end, uint64_t offset, int is_mapped); - -/// Finish building an index -/** @param idx Index - @param final_offset Last file offset - @return 0 on success; non-zero on failure. -*/ -HTSLIB_EXPORT -int hts_idx_finish(hts_idx_t *idx, uint64_t final_offset); - -/// Returns index format -/** @param idx Index - @return One of HTS_FMT_CSI, HTS_FMT_BAI or HTS_FMT_TBI -*/ -HTSLIB_EXPORT -int hts_idx_fmt(hts_idx_t *idx); - -/// Add name to TBI index meta-data -/** @param idx Index - @param tid Target identifier - @param name Target name - @return Index number of name in names list on success; -1 on failure. -*/ -HTSLIB_EXPORT -int hts_idx_tbi_name(hts_idx_t *idx, int tid, const char *name); - -// Index loading and saving - -/// Save an index to a file -/** @param idx Index to be written - @param fn Input BAM/BCF/etc filename, to which .bai/.csi/etc will be added - @param fmt One of the HTS_FMT_* index formats - @return 0 if successful, or negative if an error occurred. -*/ -HTSLIB_EXPORT -int hts_idx_save(const hts_idx_t *idx, const char *fn, int fmt) HTS_RESULT_USED; - -/// Save an index to a specific file -/** @param idx Index to be written - @param fn Input BAM/BCF/etc filename - @param fnidx Output filename, or NULL to add .bai/.csi/etc to @a fn - @param fmt One of the HTS_FMT_* index formats - @return 0 if successful, or negative if an error occurred. -*/ -HTSLIB_EXPORT -int hts_idx_save_as(const hts_idx_t *idx, const char *fn, const char *fnidx, int fmt) HTS_RESULT_USED; - -/// Load an index file -/** @param fn BAM/BCF/etc filename, to which .bai/.csi/etc will be added or - the extension substituted, to search for an existing index file. - In case of a non-standard naming, the file name can include the - name of the index file delimited with HTS_IDX_DELIM. - @param fmt One of the HTS_FMT_* index formats - @return The index, or NULL if an error occurred. - -If @p fn contains the string "##idx##" (HTS_IDX_DELIM), the part before -the delimiter will be used as the name of the data file and the part after -it will be used as the name of the index. - -Otherwise, this function tries to work out the index name as follows: - - It will try appending ".csi" to @p fn - It will try substituting an existing suffix (e.g. .bam, .vcf) with ".csi" - Then, if @p fmt is HTS_FMT_BAI: - It will try appending ".bai" to @p fn - To will substituting the existing suffix (e.g. .bam) with ".bai" - else if @p fmt is HTS_FMT_TBI: - It will try appending ".tbi" to @p fn - To will substituting the existing suffix (e.g. .vcf) with ".tbi" - -If the index file is remote (served over a protocol like https), first a check -is made to see is a locally cached copy is available. This is done for all -of the possible names listed above. If a cached copy is not available then -the index will be downloaded and stored in the current working directory, -with the same name as the remote index. - - Equivalent to hts_idx_load3(fn, NULL, fmt, HTS_IDX_SAVE_REMOTE); -*/ -HTSLIB_EXPORT -hts_idx_t *hts_idx_load(const char *fn, int fmt); - -/// Load a specific index file -/** @param fn Input BAM/BCF/etc filename - @param fnidx The input index filename - @return The index, or NULL if an error occurred. - - Equivalent to hts_idx_load3(fn, fnidx, 0, 0); - - This function will not attempt to save index files locally. -*/ -HTSLIB_EXPORT -hts_idx_t *hts_idx_load2(const char *fn, const char *fnidx); - -/// Load a specific index file -/** @param fn Input BAM/BCF/etc filename - @param fnidx The input index filename - @param fmt One of the HTS_FMT_* index formats - @param flags Flags to alter behaviour (see description) - @return The index, or NULL if an error occurred. - - If @p fnidx is NULL, the index name will be derived from @p fn in the - same way as hts_idx_load(). - - If @p fnidx is not NULL, @p fmt is ignored. - - The @p flags parameter can be set to a combination of the following - values: - - HTS_IDX_SAVE_REMOTE Save a local copy of any remote indexes - HTS_IDX_SILENT_FAIL Fail silently if the index is not present - - The index struct returned by a successful call should be freed - via hts_idx_destroy() when it is no longer needed. -*/ -HTSLIB_EXPORT -hts_idx_t *hts_idx_load3(const char *fn, const char *fnidx, int fmt, int flags); - -/// Flags for hts_idx_load3() ( and also sam_idx_load3(), tbx_idx_load3() ) -#define HTS_IDX_SAVE_REMOTE 1 -#define HTS_IDX_SILENT_FAIL 2 - -/////////////////////////////////////////////////////////// -// Functions for accessing meta-data stored in indexes - -typedef const char *(*hts_id2name_f)(void*, int); - -/// Get extra index meta-data -/** @param idx The index - @param l_meta Pointer to where the length of the extra data is stored - @return Pointer to the extra data if present; NULL otherwise - - Indexes (both .tbi and .csi) made by tabix include extra data about - the indexed file. The returns a pointer to this data. Note that the - data is stored exactly as it is in the index. Callers need to interpret - the results themselves, including knowing what sort of data to expect; - byte swapping etc. -*/ -HTSLIB_EXPORT -uint8_t *hts_idx_get_meta(hts_idx_t *idx, uint32_t *l_meta); - -/// Set extra index meta-data -/** @param idx The index - @param l_meta Length of data - @param meta Pointer to the extra data - @param is_copy If not zero, a copy of the data is taken - @return 0 on success; -1 on failure (out of memory). - - Sets the data that is returned by hts_idx_get_meta(). - - If is_copy != 0, a copy of the input data is taken. If not, ownership of - the data pointed to by *meta passes to the index. -*/ -HTSLIB_EXPORT -int hts_idx_set_meta(hts_idx_t *idx, uint32_t l_meta, uint8_t *meta, int is_copy); - -/// Get number of mapped and unmapped reads from an index -/** @param idx Index - @param tid Target ID - @param[out] mapped Location to store number of mapped reads - @param[out] unmapped Location to store number of unmapped reads - @return 0 on success; -1 on failure (data not available) - - BAI and CSI indexes store information on the number of reads for each - target that were mapped or unmapped (unmapped reads will generally have - a paired read that is mapped to the target). This function returns this - information if it is available. - - @note Cram CRAI indexes do not include this information. -*/ -HTSLIB_EXPORT -int hts_idx_get_stat(const hts_idx_t* idx, int tid, uint64_t* mapped, uint64_t* unmapped); - -/// Return the number of unplaced reads from an index -/** @param idx Index - @return Unplaced reads count - - Unplaced reads are not linked to any reference (e.g. RNAME is '*' in SAM - files). -*/ -HTSLIB_EXPORT -uint64_t hts_idx_get_n_no_coor(const hts_idx_t* idx); - -/// Return a list of target names from an index -/** @param idx Index - @param[out] n Location to store the number of targets - @param getid Callback function to get the name for a target ID - @param hdr Header from indexed file - @return An array of pointers to the names on success; NULL on failure - - @note The names are pointers into the header data structure. When cleaning - up, only the array should be freed, not the names. - */ -HTSLIB_EXPORT -const char **hts_idx_seqnames(const hts_idx_t *idx, int *n, hts_id2name_f getid, void *hdr); // free only the array, not the values - -/// Return the number of targets from an index -/** @param idx Index - @return The number of targets - */ -HTSLIB_EXPORT -int hts_idx_nseq(const hts_idx_t *idx); - -/////////////////////////////////////////////////////////// -// Region parsing - -#define HTS_PARSE_THOUSANDS_SEP 1 ///< Ignore ',' separators within numbers -#define HTS_PARSE_ONE_COORD 2 ///< chr:pos means chr:pos-pos and not chr:pos-end -#define HTS_PARSE_LIST 4 ///< Expect a comma separated list of regions. (Disables HTS_PARSE_THOUSANDS_SEP) - -/// Parse a numeric string -/** The number may be expressed in scientific notation, and optionally may - contain commas in the integer part (before any decimal point or E notation). - @param str String to be parsed - @param strend If non-NULL, set on return to point to the first character - in @a str after those forming the parsed number - @param flags Or'ed-together combination of HTS_PARSE_* flags - @return Integer value of the parsed number, or 0 if no valid number - - The input string is parsed as: optional whitespace; an optional '+' or - '-' sign; decimal digits possibly including ',' characters (if @a flags - includes HTS_PARSE_THOUSANDS_SEP) and a '.' decimal point; and an optional - case-insensitive suffix, which may be either 'k', 'M', 'G', or scientific - notation consisting of 'e'/'E' followed by an optional '+' or '-' sign and - decimal digits. To be considered a valid numeric value, the main part (not - including any suffix or scientific notation) must contain at least one - digit (either before or after the decimal point). - - When @a strend is NULL, @a str is expected to contain only (optional - whitespace followed by) the numeric value. A warning will be printed - (if hts_verbose is HTS_LOG_WARNING or more) if no valid parsable number - is found or if there are any unused characters after the number. - - When @a strend is non-NULL, @a str starts with (optional whitespace - followed by) the numeric value. On return, @a strend is set to point - to the first unused character after the numeric value, or to @a str - if no valid parsable number is found. -*/ -HTSLIB_EXPORT -long long hts_parse_decimal(const char *str, char **strend, int flags); - -typedef int (*hts_name2id_f)(void*, const char*); - -/// Parse a "CHR:START-END"-style region string -/** @param str String to be parsed - @param beg Set on return to the 0-based start of the region - @param end Set on return to the 1-based end of the region - @return Pointer to the colon or '\0' after the reference sequence name, - or NULL if @a str could not be parsed. - - NOTE: For compatibility with hts_parse_reg only. - Please use hts_parse_region instead. -*/ -HTSLIB_EXPORT -const char *hts_parse_reg64(const char *str, hts_pos_t *beg, hts_pos_t *end); - -/// Parse a "CHR:START-END"-style region string -/** @param str String to be parsed - @param beg Set on return to the 0-based start of the region - @param end Set on return to the 1-based end of the region - @return Pointer to the colon or '\0' after the reference sequence name, - or NULL if @a str could not be parsed. -*/ -HTSLIB_EXPORT -const char *hts_parse_reg(const char *str, int *beg, int *end); - -/// Parse a "CHR:START-END"-style region string -/** @param str String to be parsed - @param tid Set on return (if not NULL) to be reference index (-1 if invalid) - @param beg Set on return to the 0-based start of the region - @param end Set on return to the 1-based end of the region - @param getid Function pointer. Called if not NULL to set tid. - @param hdr Caller data passed to getid. - @param flags Bitwise HTS_PARSE_* flags listed above. - @return Pointer to the byte after the end of the entire region - specifier (including any trailing comma) on success, - or NULL if @a str could not be parsed. - - A variant of hts_parse_reg which is reference-id aware. It uses - the iterator name2id callbacks to validate the region tokenisation works. - - This is necessary due to GRCh38 HLA additions which have reference names - like "HLA-DRB1*12:17". - - To work around ambiguous parsing issues, eg both "chr1" and "chr1:100-200" - are reference names, quote using curly braces. - Thus "{chr1}:100-200" and "{chr1:100-200}" disambiguate the above example. - - Flags are used to control how parsing works, and can be one of the below. - - HTS_PARSE_THOUSANDS_SEP: - Ignore commas in numbers. For example with this flag 1,234,567 - is interpreted as 1234567. - - HTS_PARSE_LIST: - If present, the region is assmed to be a comma separated list and - position parsing will not contain commas (this implicitly - clears HTS_PARSE_THOUSANDS_SEP in the call to hts_parse_decimal). - On success the return pointer will be the start of the next region, ie - the character after the comma. (If *ret != '\0' then the caller can - assume another region is present in the list.) - - If not set then positions may contain commas. In this case the return - value should point to the end of the string, or NULL on failure. - - HTS_PARSE_ONE_COORD: - If present, X:100 is treated as the single base pair region X:100-100. - In this case X:-100 is shorthand for X:1-100 and X:100- is X:100-. - (This is the standard bcftools region convention.) - - When not set X:100 is considered to be X:100- where is - the end of chromosome X (set to INT_MAX here). X:100- and X:-100 are - invalid. - (This is the standard samtools region convention.) - - Note the supplied string expects 1 based inclusive coordinates, but the - returned coordinates start from 0 and are half open, so pos0 is valid - for use in e.g. "for (pos0 = beg; pos0 < end; pos0++) {...}" - - If NULL is returned, the value in tid mat give additional information - about the error: - - -2 Failed to parse @p hdr; or out of memory - -1 The reference in @p str has mismatched braces, or does not - exist in @p hdr - >= 0 The specified range in @p str could not be parsed -*/ -HTSLIB_EXPORT -const char *hts_parse_region(const char *s, int *tid, hts_pos_t *beg, - hts_pos_t *end, hts_name2id_f getid, void *hdr, - int flags); - - -/////////////////////////////////////////////////////////// -// Generic iterators -// -// These functions provide the low-level infrastructure for iterators. -// Wrappers around these are used to make iterators for specific file types. -// See: -// htslib/sam.h for SAM/BAM/CRAM iterators -// htslib/vcf.h for VCF/BCF iterators -// htslib/tbx.h for files indexed by tabix - -/// Create a single-region iterator -/** @param idx Index - @param tid Target ID - @param beg Start of region - @param end End of region - @param readrec Callback to read a record from the input file - @return An iterator on success; NULL on failure - - The iterator struct returned by a successful call should be freed - via hts_itr_destroy() when it is no longer needed. - */ -HTSLIB_EXPORT -hts_itr_t *hts_itr_query(const hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end, hts_readrec_func *readrec); - -/// Free an iterator -/** @param iter Iterator to free - */ -HTSLIB_EXPORT -void hts_itr_destroy(hts_itr_t *iter); - -typedef hts_itr_t *hts_itr_query_func(const hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end, hts_readrec_func *readrec); - -/// Create a single-region iterator from a text region specification -/** @param idx Index - @param reg Region specifier - @param getid Callback function to return the target ID for a name - @param hdr Input file header - @param itr_query Callback function returning an iterator for a numeric tid, - start and end position - @param readrec Callback to read a record from the input file - @return An iterator on success; NULL on error - - The iterator struct returned by a successful call should be freed - via hts_itr_destroy() when it is no longer needed. - */ -HTSLIB_EXPORT -hts_itr_t *hts_itr_querys(const hts_idx_t *idx, const char *reg, hts_name2id_f getid, void *hdr, hts_itr_query_func *itr_query, hts_readrec_func *readrec); - -/// Return the next record from an iterator -/** @param fp Input file handle - @param iter Iterator - @param r Pointer to record placeholder - @param data Data passed to the readrec callback - @return >= 0 on success, -1 when there is no more data, < -1 on error - */ -HTSLIB_EXPORT -int hts_itr_next(BGZF *fp, hts_itr_t *iter, void *r, void *data) HTS_RESULT_USED; - -/********************************** - * Iterator with multiple regions * - **********************************/ - -typedef int hts_itr_multi_query_func(const hts_idx_t *idx, hts_itr_t *itr); -HTSLIB_EXPORT -int hts_itr_multi_bam(const hts_idx_t *idx, hts_itr_t *iter); -HTSLIB_EXPORT -int hts_itr_multi_cram(const hts_idx_t *idx, hts_itr_t *iter); - -/// Create a multi-region iterator from a region list -/** @param idx Index - @param reglist Region list - @param count Number of items in region list - @param getid Callback to convert names to target IDs - @param hdr Indexed file header (passed to getid) - @param itr_specific Filetype-specific callback function - @param readrec Callback to read an input file record - @param seek Callback to seek in the input file - @param tell Callback to return current input file location - @return An iterator on success; NULL on failure - - The iterator struct returned by a successful call should be freed - via hts_itr_destroy() when it is no longer needed. - */ -HTSLIB_EXPORT -hts_itr_t *hts_itr_regions(const hts_idx_t *idx, hts_reglist_t *reglist, int count, hts_name2id_f getid, void *hdr, hts_itr_multi_query_func *itr_specific, hts_readrec_func *readrec, hts_seek_func *seek, hts_tell_func *tell); - -/// Return the next record from an iterator -/** @param fp Input file handle - @param iter Iterator - @param r Pointer to record placeholder - @return >= 0 on success, -1 when there is no more data, < -1 on error - */ -HTSLIB_EXPORT -int hts_itr_multi_next(htsFile *fd, hts_itr_t *iter, void *r); - -/// Create a region list from a char array -/** @param argv Char array of target:interval elements, e.g. chr1:2500-3600, chr1:5100, chr2 - @param argc Number of items in the array - @param r_count Pointer to the number of items in the resulting region list - @param hdr Header for the sam/bam/cram file - @param getid Callback to convert target names to target ids. - @return A region list on success, NULL on failure - - The hts_reglist_t struct returned by a successful call should be freed - via hts_reglist_free() when it is no longer needed. - */ -HTSLIB_EXPORT -hts_reglist_t *hts_reglist_create(char **argv, int argc, int *r_count, void *hdr, hts_name2id_f getid); - -/// Free a region list -/** @param reglist Region list - @param count Number of items in the list - */ -HTSLIB_EXPORT -void hts_reglist_free(hts_reglist_t *reglist, int count); - -/// Free a multi-region iterator -/** @param iter Iterator to free - */ -#define hts_itr_multi_destroy(iter) hts_itr_destroy(iter) - - - /** - * hts_file_type() - Convenience function to determine file type - * DEPRECATED: This function has been replaced by hts_detect_format(). - * It and these FT_* macros will be removed in a future HTSlib release. - */ - #define FT_UNKN 0 - #define FT_GZ 1 - #define FT_VCF 2 - #define FT_VCF_GZ (FT_GZ|FT_VCF) - #define FT_BCF (1<<2) - #define FT_BCF_GZ (FT_GZ|FT_BCF) - #define FT_STDIN (1<<3) - HTSLIB_EXPORT - int hts_file_type(const char *fname); - - -/*************************** - * Revised MAQ error model * - ***************************/ - -struct errmod_t; -typedef struct errmod_t errmod_t; - -HTSLIB_EXPORT -errmod_t *errmod_init(double depcorr); -HTSLIB_EXPORT -void errmod_destroy(errmod_t *em); - -/* - n: number of bases - m: maximum base - bases[i]: qual:6, strand:1, base:4 - q[i*m+j]: phred-scaled likelihood of (i,j) - */ -HTSLIB_EXPORT -int errmod_cal(const errmod_t *em, int n, int m, uint16_t *bases, float *q); - - -/***************************************************** - * Probabilistic banded glocal alignment * - * See https://doi.org/10.1093/bioinformatics/btr076 * - *****************************************************/ - -typedef struct probaln_par_t { - float d, e; - int bw; -} probaln_par_t; - -/// Perform probabilistic banded glocal alignment -/** @param ref Reference sequence - @param l_ref Length of reference - @param query Query sequence - @param l_query Length of query sequence - @param iqual Query base qualities - @param c Alignment parameters - @param[out] state Output alignment - @param[out] q Phred scaled posterior probability of state[i] being wrong - @return Phred-scaled likelihood score, or INT_MIN on failure. - -The reference and query sequences are coded using integers 0,1,2,3,4 for -bases A,C,G,T,N respectively (N here is for any ambiguity code). - -On output, state and q are arrays of length l_query. The higher 30 -bits give the reference position the query base is matched to and the -lower two bits can be 0 (an alignment match) or 1 (an -insertion). q[i] gives the phred scaled posterior probability of -state[i] being wrong. - -On failure, errno will be set to EINVAL if the values of l_ref or l_query -were invalid; or ENOMEM if a memory allocation failed. -*/ - -HTSLIB_EXPORT -int probaln_glocal(const uint8_t *ref, int l_ref, const uint8_t *query, int l_query, const uint8_t *iqual, const probaln_par_t *c, int *state, uint8_t *q); - - - /********************** - * MD5 implementation * - **********************/ - - struct hts_md5_context; - typedef struct hts_md5_context hts_md5_context; - - /*! @abstract Initialises an MD5 context. - * @discussion - * The expected use is to allocate an hts_md5_context using - * hts_md5_init(). This pointer is then passed into one or more calls - * of hts_md5_update() to compute successive internal portions of the - * MD5 sum, which can then be externalised as a full 16-byte MD5sum - * calculation by calling hts_md5_final(). This can then be turned - * into ASCII via hts_md5_hex(). - * - * To dealloate any resources created by hts_md5_init() call the - * hts_md5_destroy() function. - * - * @return hts_md5_context pointer on success, NULL otherwise. - */ - HTSLIB_EXPORT - hts_md5_context *hts_md5_init(void); - - /*! @abstract Updates the context with the MD5 of the data. */ - HTSLIB_EXPORT - void hts_md5_update(hts_md5_context *ctx, const void *data, unsigned long size); - - /*! @abstract Computes the final 128-bit MD5 hash from the given context */ - HTSLIB_EXPORT - void hts_md5_final(unsigned char *digest, hts_md5_context *ctx); - - /*! @abstract Resets an md5_context to the initial state, as returned - * by hts_md5_init(). - */ - HTSLIB_EXPORT - void hts_md5_reset(hts_md5_context *ctx); - - /*! @abstract Converts a 128-bit MD5 hash into a 33-byte nul-termninated - * hex string. - */ - HTSLIB_EXPORT - void hts_md5_hex(char *hex, const unsigned char *digest); - - /*! @abstract Deallocates any memory allocated by hts_md5_init. */ - HTSLIB_EXPORT - void hts_md5_destroy(hts_md5_context *ctx); - -static inline int hts_reg2bin(hts_pos_t beg, hts_pos_t end, int min_shift, int n_lvls) -{ - int l, s = min_shift, t = ((1<<((n_lvls<<1) + n_lvls)) - 1) / 7; - for (--end, l = n_lvls; l > 0; --l, s += 3, t -= 1<<((l<<1)+l)) - if (beg>>s == end>>s) return t + (beg>>s); - return 0; -} - -/// Compute the level of a bin in a binning index -static inline int hts_bin_level(int bin) { - int l, b; - for (l = 0, b = bin; b; ++l, b = hts_bin_parent(b)); - return l; -} - -//! Compute the corresponding entry into the linear index of a given bin from -//! a binning index -/*! - * @param bin The bin number - * @param n_lvls The index depth (number of levels - 0 based) - * @return The integer offset into the linear index - * - * Explanation of the return value formula: - * Each bin on level l covers exp(2, (n_lvls - l)*3 + min_shift) base pairs. - * A linear index entry covers exp(2, min_shift) base pairs. - */ -static inline int hts_bin_bot(int bin, int n_lvls) -{ - int l = hts_bin_level(bin); - return (bin - hts_bin_first(l)) << (n_lvls - l) * 3; -} - -/************** - * Endianness * - **************/ - -static inline int ed_is_big(void) -{ - long one= 1; - return !(*((char *)(&one))); -} -static inline uint16_t ed_swap_2(uint16_t v) -{ - return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8)); -} -static inline void *ed_swap_2p(void *x) -{ - *(uint16_t*)x = ed_swap_2(*(uint16_t*)x); - return x; -} -static inline uint32_t ed_swap_4(uint32_t v) -{ - v = ((v & 0x0000FFFFU) << 16) | (v >> 16); - return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8); -} -static inline void *ed_swap_4p(void *x) -{ - *(uint32_t*)x = ed_swap_4(*(uint32_t*)x); - return x; -} -static inline uint64_t ed_swap_8(uint64_t v) -{ - v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32); - v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16); - return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8); -} -static inline void *ed_swap_8p(void *x) -{ - *(uint64_t*)x = ed_swap_8(*(uint64_t*)x); - return x; -} - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.18/htslib/hts_defs.h b/src/htslib-1.18/htslib/hts_defs.h deleted file mode 100644 index 7719215..0000000 --- a/src/htslib-1.18/htslib/hts_defs.h +++ /dev/null @@ -1,120 +0,0 @@ -/* hts_defs.h -- Miscellaneous definitions. - - Copyright (C) 2013-2015,2017, 2019-2020 Genome Research Ltd. - - Author: John Marshall - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#ifndef HTSLIB_HTS_DEFS_H -#define HTSLIB_HTS_DEFS_H - -#if defined __MINGW32__ -#include // For __MINGW_PRINTF_FORMAT macro -#endif - -#ifdef __clang__ -#ifdef __has_attribute -#define HTS_COMPILER_HAS(attribute) __has_attribute(attribute) -#endif - -#elif defined __GNUC__ -#define HTS_GCC_AT_LEAST(major, minor) \ - (__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))) -#endif - -#ifndef HTS_COMPILER_HAS -#define HTS_COMPILER_HAS(attribute) 0 -#endif -#ifndef HTS_GCC_AT_LEAST -#define HTS_GCC_AT_LEAST(major, minor) 0 -#endif - -#if HTS_COMPILER_HAS(__nonstring__) || HTS_GCC_AT_LEAST(8,1) -#define HTS_NONSTRING __attribute__ ((__nonstring__)) -#else -#define HTS_NONSTRING -#endif - -#if HTS_COMPILER_HAS(__noreturn__) || HTS_GCC_AT_LEAST(3,0) -#define HTS_NORETURN __attribute__ ((__noreturn__)) -#else -#define HTS_NORETURN -#endif - -// GCC introduced warn_unused_result in 3.4 but added -Wno-unused-result later -#if HTS_COMPILER_HAS(__warn_unused_result__) || HTS_GCC_AT_LEAST(4,5) -#define HTS_RESULT_USED __attribute__ ((__warn_unused_result__)) -#else -#define HTS_RESULT_USED -#endif - -#if HTS_COMPILER_HAS(__unused__) || HTS_GCC_AT_LEAST(3,0) -#define HTS_UNUSED __attribute__ ((__unused__)) -#else -#define HTS_UNUSED -#endif - -#if HTS_COMPILER_HAS(__deprecated__) || HTS_GCC_AT_LEAST(4,5) -#define HTS_DEPRECATED(message) __attribute__ ((__deprecated__ (message))) -#elif HTS_GCC_AT_LEAST(3,1) -#define HTS_DEPRECATED(message) __attribute__ ((__deprecated__)) -#else -#define HTS_DEPRECATED(message) -#endif - -#if (HTS_COMPILER_HAS(__deprecated__) || HTS_GCC_AT_LEAST(6,4)) && !defined(__ICC) -#define HTS_DEPRECATED_ENUM(message) __attribute__ ((__deprecated__ (message))) -#else -#define HTS_DEPRECATED_ENUM(message) -#endif - -// On mingw the "printf" format type doesn't work. It needs "gnu_printf" -// in order to check %lld and %z, otherwise it defaults to checking against -// the Microsoft library printf format options despite linking against the -// GNU posix implementation of printf. The __MINGW_PRINTF_FORMAT macro -// expands to printf or gnu_printf as required, but obviously may not -// exist -#ifdef __MINGW_PRINTF_FORMAT -#define HTS_PRINTF_FMT __MINGW_PRINTF_FORMAT -#else -#define HTS_PRINTF_FMT printf -#endif - -#if HTS_COMPILER_HAS(__format__) || HTS_GCC_AT_LEAST(3,0) -#define HTS_FORMAT(type, idx, first) __attribute__((__format__ (type, idx, first))) -#else -#define HTS_FORMAT(type, idx, first) -#endif - -#if defined(_WIN32) || defined(__CYGWIN__) -#if defined(HTS_BUILDING_LIBRARY) -#define HTSLIB_EXPORT __declspec(dllexport) -#else -#define HTSLIB_EXPORT -#endif -#elif HTS_COMPILER_HAS(__visibility__) || HTS_GCC_AT_LEAST(4,0) -#define HTSLIB_EXPORT __attribute__((__visibility__("default"))) -#elif defined(__SUNPRO_C) && __SUNPRO_C >= 0x550 -#define HTSLIB_EXPORT __global -#else -#define HTSLIB_EXPORT -#endif - -#endif diff --git a/src/htslib-1.18/htslib/hts_endian.h b/src/htslib-1.18/htslib/hts_endian.h deleted file mode 100644 index 30ad805..0000000 --- a/src/htslib-1.18/htslib/hts_endian.h +++ /dev/null @@ -1,362 +0,0 @@ -/// @file hts_endian.h -/// Byte swapping and unaligned access functions. -/* - Copyright (C) 2017 Genome Research Ltd. - - Author: Rob Davies - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#ifndef HTS_ENDIAN_H -#define HTS_ENDIAN_H - -#include - -/* - * Compile-time endianness tests. - * - * Note that these tests may fail. They should only be used to enable - * faster versions of endian-neutral implementations. The endian-neutral - * version should always be available as a fall-back. - * - * See https://sourceforge.net/p/predef/wiki/Endianness/ - */ - -/* Save typing as both endian and unaligned tests want to know about x86 */ -#if (defined(__i386__) || defined(__i386) || defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(__i686__) || defined(__i686)) && !defined(HTS_x86) -# define HTS_x86 /* x86 and x86_64 platform */ -#endif - -/** @def HTS_LITTLE_ENDIAN - * @brief Defined if platform is known to be little-endian - */ - -#ifndef HTS_LITTLE_ENDIAN -# if (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \ - || defined(__LITTLE_ENDIAN__) \ - || defined(HTS_x86) \ - || defined(__ARMEL__) || defined(__THUMBEL__) || defined(__AARCH64EL__) \ - || defined(_MIPSEL) || defined(__MIPSEL) || defined(__MIPSEL__) -# define HTS_LITTLE_ENDIAN -# endif -#endif - -/** @def HTS_BIG_ENDIAN - * @brief Defined if platform is known to be big-endian - */ - -#ifndef HTS_BIG_ENDIAN -# if (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) \ - || defined(__BIG_ENDIAN__) \ - || defined(__ARMEB__) || defined(__THUMBEB__) || defined(__AAARCHEB__) \ - || defined(_MIPSEB) || defined(__MIPSEB) || defined(__MIPSEB__) -# define HTS_BIG_ENDIAN -# endif -#endif - -/** @def HTS_ENDIAN_NEUTRAL - * @brief Define this to disable any endian-specific optimizations - */ - -#if defined(HTS_ENDIAN_NEUTRAL) || (defined(HTS_LITTLE_ENDIAN) && defined(HTS_BIG_ENDIAN)) -/* Disable all endian-specific code. */ -# undef HTS_LITTLE_ENDIAN -# undef HTS_BIG_ENDIAN -#endif - -/** @def HTS_ALLOW_UNALIGNED - * @brief Control use of unaligned memory access. - * - * Defining HTS_ALLOW_UNALIGNED=1 converts shift-and-or to simple casts on - * little-endian platforms that can tolerate unaligned access (notably Intel - * x86). - * - * Defining HTS_ALLOW_UNALIGNED=0 forces shift-and-or. - */ - -// Consider using AX_CHECK_ALIGNED_ACCESS_REQUIRED in autoconf. -#ifndef HTS_ALLOW_UNALIGNED -# if defined(HTS_x86) -# define HTS_ALLOW_UNALIGNED 1 -# else -# define HTS_ALLOW_UNALIGNED 0 -# endif -#endif - -#if HTS_ALLOW_UNALIGNED != 0 -# if defined (__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) -// This prevents problems with gcc's vectoriser generating the wrong -// instructions for unaligned data. -typedef uint16_t uint16_u __attribute__ ((__aligned__ (1))); -typedef uint32_t uint32_u __attribute__ ((__aligned__ (1))); -typedef uint64_t uint64_u __attribute__ ((__aligned__ (1))); -#else -typedef uint16_t uint16_u; -typedef uint32_t uint32_u; -typedef uint64_t uint64_u; -# endif -#endif - -/// Get a uint8_t value from an unsigned byte array -/** @param buf Pointer to source byte, may be unaligned - * @return An 8-bit unsigned integer - */ -static inline uint8_t le_to_u8(const uint8_t *buf) { - return *buf; -} - -/// Get a uint16_t value from an unsigned byte array -/** @param buf Pointer to source byte, may be unaligned - * @return A 16 bit unsigned integer - * The input is read in little-endian byte order. - */ -static inline uint16_t le_to_u16(const uint8_t *buf) { -#if defined(HTS_LITTLE_ENDIAN) && HTS_ALLOW_UNALIGNED != 0 - return *((uint16_u *) buf); -#else - return (uint16_t) buf[0] | ((uint16_t) buf[1] << 8); -#endif -} - -/// Get a uint32_t value from an unsigned byte array -/** @param buf Pointer to source byte array, may be unaligned - * @return A 32 bit unsigned integer - * The input is read in little-endian byte order. - */ -static inline uint32_t le_to_u32(const uint8_t *buf) { -#if defined(HTS_LITTLE_ENDIAN) && HTS_ALLOW_UNALIGNED != 0 - return *((uint32_u *) buf); -#else - return ((uint32_t) buf[0] | - ((uint32_t) buf[1] << 8) | - ((uint32_t) buf[2] << 16) | - ((uint32_t) buf[3] << 24)); -#endif -} - -/// Get a uint64_t value from an unsigned byte array -/** @param buf Pointer to source byte array, may be unaligned - * @return A 64 bit unsigned integer - * The input is read in little-endian byte order. - */ -static inline uint64_t le_to_u64(const uint8_t *buf) { -#if defined(HTS_LITTLE_ENDIAN) && HTS_ALLOW_UNALIGNED != 0 - return *((uint64_u *) buf); -#else - return ((uint64_t) buf[0] | - ((uint64_t) buf[1] << 8) | - ((uint64_t) buf[2] << 16) | - ((uint64_t) buf[3] << 24) | - ((uint64_t) buf[4] << 32) | - ((uint64_t) buf[5] << 40) | - ((uint64_t) buf[6] << 48) | - ((uint64_t) buf[7] << 56)); -#endif -} - -/// Store a uint16_t value in little-endian byte order -/** @param val The value to store - * @param buf Where to store it (may be unaligned) - */ -static inline void u16_to_le(uint16_t val, uint8_t *buf) { -#if defined(HTS_LITTLE_ENDIAN) && HTS_ALLOW_UNALIGNED != 0 - *((uint16_u *) buf) = val; -#else - buf[0] = val & 0xff; - buf[1] = (val >> 8) & 0xff; -#endif -} - -/// Store a uint32_t value in little-endian byte order -/** @param val The value to store - * @param buf Where to store it (may be unaligned) - */ -static inline void u32_to_le(uint32_t val, uint8_t *buf) { -#if defined(HTS_LITTLE_ENDIAN) && HTS_ALLOW_UNALIGNED != 0 - *((uint32_u *) buf) = val; -#else - buf[0] = val & 0xff; - buf[1] = (val >> 8) & 0xff; - buf[2] = (val >> 16) & 0xff; - buf[3] = (val >> 24) & 0xff; -#endif -} - -/// Store a uint64_t value in little-endian byte order -/** @param val The value to store - * @param buf Where to store it (may be unaligned) - */ -static inline void u64_to_le(uint64_t val, uint8_t *buf) { -#if defined(HTS_LITTLE_ENDIAN) && HTS_ALLOW_UNALIGNED != 0 - *((uint64_u *) buf) = val; -#else - buf[0] = val & 0xff; - buf[1] = (val >> 8) & 0xff; - buf[2] = (val >> 16) & 0xff; - buf[3] = (val >> 24) & 0xff; - buf[4] = (val >> 32) & 0xff; - buf[5] = (val >> 40) & 0xff; - buf[6] = (val >> 48) & 0xff; - buf[7] = (val >> 56) & 0xff; -#endif -} - -/* Signed values. Grab the data as unsigned, then convert to signed without - * triggering undefined behaviour. On any sensible platform, the conversion - * should optimise away to nothing. - */ - -/// Get an int8_t value from an unsigned byte array -/** @param buf Pointer to source byte array, may be unaligned - * @return A 8 bit signed integer - * The input data is interpreted as 2's complement representation. - */ -static inline int8_t le_to_i8(const uint8_t *buf) { - return *buf < 0x80 ? (int8_t) *buf : -((int8_t) (0xff - *buf)) - 1; -} - -/// Get an int16_t value from an unsigned byte array -/** @param buf Pointer to source byte array, may be unaligned - * @return A 16 bit signed integer - * The input data is interpreted as 2's complement representation in - * little-endian byte order. - */ -static inline int16_t le_to_i16(const uint8_t *buf) { - uint16_t v = le_to_u16(buf); - return v < 0x8000 ? (int16_t) v : -((int16_t) (0xffff - v)) - 1; -} - -/// Get an int32_t value from an unsigned byte array -/** @param buf Pointer to source byte array, may be unaligned - * @return A 32 bit signed integer - * The input data is interpreted as 2's complement representation in - * little-endian byte order. - */ -static inline int32_t le_to_i32(const uint8_t *buf) { - uint32_t v = le_to_u32(buf); - return v < 0x80000000U ? (int32_t) v : -((int32_t) (0xffffffffU - v)) - 1; -} - -/// Get an int64_t value from an unsigned byte array -/** @param buf Pointer to source byte array, may be unaligned - * @return A 64 bit signed integer - * The input data is interpreted as 2's complement representation in - * little-endian byte order. - */ -static inline int64_t le_to_i64(const uint8_t *buf) { - uint64_t v = le_to_u64(buf); - return (v < 0x8000000000000000ULL - ? (int64_t) v : -((int64_t) (0xffffffffffffffffULL - v)) - 1); -} - -// Converting the other way is easier as signed -> unsigned is well defined. - -/// Store a uint16_t value in little-endian byte order -/** @param val The value to store - * @param buf Where to store it (may be unaligned) - */ -static inline void i16_to_le(int16_t val, uint8_t *buf) { - u16_to_le(val, buf); -} - -/// Store a uint32_t value in little-endian byte order -/** @param val The value to store - * @param buf Where to store it (may be unaligned) - */ -static inline void i32_to_le(int32_t val, uint8_t *buf) { - u32_to_le(val, buf); -} - -/// Store a uint64_t value in little-endian byte order -/** @param val The value to store - * @param buf Where to store it (may be unaligned) - */ -static inline void i64_to_le(int64_t val, uint8_t *buf) { - u64_to_le(val, buf); -} - -/* Floating point. Assumptions: - * Platform uses IEEE 754 format - * sizeof(float) == sizeof(uint32_t) - * sizeof(double) == sizeof(uint64_t) - * Endian-ness is the same for both floating point and integer - * Type-punning via a union is allowed - */ - -/// Get a float value from an unsigned byte array -/** @param buf Pointer to source byte array, may be unaligned - * @return A 32 bit floating point value - * The input is interpreted as an IEEE 754 format float in little-endian - * byte order. - */ -static inline float le_to_float(const uint8_t *buf) { - union { - uint32_t u; - float f; - } convert; - - convert.u = le_to_u32(buf); - return convert.f; -} - -/// Get a double value from an unsigned byte array -/** @param buf Pointer to source byte array, may be unaligned - * @return A 64 bit floating point value - * The input is interpreted as an IEEE 754 format double in little-endian - * byte order. - */ -static inline double le_to_double(const uint8_t *buf) { - union { - uint64_t u; - double f; - } convert; - - convert.u = le_to_u64(buf); - return convert.f; -} - -/// Store a float value in little-endian byte order -/** @param val The value to store - * @param buf Where to store it (may be unaligned) - */ -static inline void float_to_le(float val, uint8_t *buf) { - union { - uint32_t u; - float f; - } convert; - - convert.f = val; - u32_to_le(convert.u, buf); -} - -/// Store a double value in little-endian byte order -/** @param val The value to store - * @param buf Where to store it (may be unaligned) - */ -static inline void double_to_le(double val, uint8_t *buf) { - union { - uint64_t u; - double f; - } convert; - - convert.f = val; - u64_to_le(convert.u, buf); -} - -#endif /* HTS_ENDIAN_H */ diff --git a/src/htslib-1.18/htslib/khash.h b/src/htslib-1.18/htslib/khash.h deleted file mode 100644 index 4cea910..0000000 --- a/src/htslib-1.18/htslib/khash.h +++ /dev/null @@ -1,670 +0,0 @@ -/* The MIT License - - Copyright (c) 2008, 2009, 2011 by Attractive Chaos - Copyright (C) 2014-2015, 2018 Genome Research Ltd. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -/* - An example: - -#include "khash.h" -KHASH_MAP_INIT_INT(32, char) -int main() { - int ret, is_missing; - khiter_t k; - khash_t(32) *h = kh_init(32); - k = kh_put(32, h, 5, &ret); - kh_value(h, k) = 10; - k = kh_get(32, h, 10); - is_missing = (k == kh_end(h)); - k = kh_get(32, h, 5); - kh_del(32, h, k); - for (k = kh_begin(h); k != kh_end(h); ++k) - if (kh_exist(h, k)) kh_value(h, k) = 1; - kh_destroy(32, h); - return 0; -} -*/ - -/* - 2013-05-02 (0.2.8): - - * Use quadratic probing. When the capacity is power of 2, stepping function - i*(i+1)/2 guarantees to traverse each bucket. It is better than double - hashing on cache performance and is more robust than linear probing. - - In theory, double hashing should be more robust than quadratic probing. - However, my implementation is probably not for large hash tables, because - the second hash function is closely tied to the first hash function, - which reduce the effectiveness of double hashing. - - Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php - - 2011-12-29 (0.2.7): - - * Minor code clean up; no actual effect. - - 2011-09-16 (0.2.6): - - * The capacity is a power of 2. This seems to dramatically improve the - speed for simple keys. Thank Zilong Tan for the suggestion. Reference: - - - http://code.google.com/p/ulib/ - - http://nothings.org/computer/judy/ - - * Allow to optionally use linear probing which usually has better - performance for random input. Double hashing is still the default as it - is more robust to certain non-random input. - - * Added Wang's integer hash function (not used by default). This hash - function is more robust to certain non-random input. - - 2011-02-14 (0.2.5): - - * Allow to declare global functions. - - 2009-09-26 (0.2.4): - - * Improve portability - - 2008-09-19 (0.2.3): - - * Corrected the example - * Improved interfaces - - 2008-09-11 (0.2.2): - - * Improved speed a little in kh_put() - - 2008-09-10 (0.2.1): - - * Added kh_clear() - * Fixed a compiling error - - 2008-09-02 (0.2.0): - - * Changed to token concatenation which increases flexibility. - - 2008-08-31 (0.1.2): - - * Fixed a bug in kh_get(), which has not been tested previously. - - 2008-08-31 (0.1.1): - - * Added destructor -*/ - - -#ifndef __AC_KHASH_H -#define __AC_KHASH_H - -/*! - @header - - Generic hash table library. - */ - -#define AC_VERSION_KHASH_H "0.2.8" - -#include -#include -#include - -#include "kstring.h" -#include "kroundup.h" - -/* compiler specific configuration */ - -#if UINT_MAX == 0xffffffffu -typedef unsigned int khint32_t; -#elif ULONG_MAX == 0xffffffffu -typedef unsigned long khint32_t; -#endif - -#if ULONG_MAX == ULLONG_MAX -typedef unsigned long khint64_t; -#else -typedef unsigned long long khint64_t; -#endif - -#ifndef kh_inline -#ifdef _MSC_VER -#define kh_inline __inline -#else -#define kh_inline inline -#endif -#endif /* kh_inline */ - -#ifndef klib_unused -#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3) -#define klib_unused __attribute__ ((__unused__)) -#else -#define klib_unused -#endif -#endif /* klib_unused */ - -typedef khint32_t khint_t; -typedef khint_t khiter_t; - -#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2) -#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1) -#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3) -#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1))) -#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1))) -#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1))) -#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1)) - -#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4) - -#ifndef kroundup32 -#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) -#endif - -#ifndef kcalloc -#define kcalloc(N,Z) calloc(N,Z) -#endif -#ifndef kmalloc -#define kmalloc(Z) malloc(Z) -#endif -#ifndef krealloc -#define krealloc(P,Z) realloc(P,Z) -#endif -#ifndef kfree -#define kfree(P) free(P) -#endif - -static const double __ac_HASH_UPPER = 0.77; - -#define __KHASH_TYPE(name, khkey_t, khval_t) \ - typedef struct kh_##name##_s { \ - khint_t n_buckets, size, n_occupied, upper_bound; \ - khint32_t *flags; \ - khkey_t *keys; \ - khval_t *vals; \ - } kh_##name##_t; - -#define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \ - extern kh_##name##_t *kh_init_##name(void); \ - extern void kh_destroy_##name(kh_##name##_t *h); \ - extern void kh_clear_##name(kh_##name##_t *h); \ - extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \ - extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \ - extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \ - extern void kh_del_##name(kh_##name##_t *h, khint_t x); - -#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ - SCOPE kh_##name##_t *kh_init_##name(void) { \ - return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t)); \ - } \ - SCOPE void kh_destroy_##name(kh_##name##_t *h) \ - { \ - if (h) { \ - kfree((void *)h->keys); kfree(h->flags); \ - kfree((void *)h->vals); \ - kfree(h); \ - } \ - } \ - SCOPE void kh_clear_##name(kh_##name##_t *h) \ - { \ - if (h && h->flags) { \ - memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \ - h->size = h->n_occupied = 0; \ - } \ - } \ - SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \ - { \ - if (h->n_buckets) { \ - khint_t k, i, last, mask, step = 0; \ - mask = h->n_buckets - 1; \ - k = __hash_func(key); i = k & mask; \ - last = i; \ - while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ - i = (i + (++step)) & mask; \ - if (i == last) return h->n_buckets; \ - } \ - return __ac_iseither(h->flags, i)? h->n_buckets : i; \ - } else return 0; \ - } \ - SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \ - { /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \ - khint32_t *new_flags = 0; \ - khint_t j = 1; \ - { \ - kroundup32(new_n_buckets); \ - if (new_n_buckets < 4) new_n_buckets = 4; \ - if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \ - else { /* hash table size to be changed (shrink or expand); rehash */ \ - new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ - if (!new_flags) return -1; \ - memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ - if (h->n_buckets < new_n_buckets) { /* expand */ \ - khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \ - if (!new_keys) { kfree(new_flags); return -1; } \ - h->keys = new_keys; \ - if (kh_is_map) { \ - khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \ - if (!new_vals) { kfree(new_flags); return -1; } \ - h->vals = new_vals; \ - } \ - } /* otherwise shrink */ \ - } \ - } \ - if (j) { /* rehashing is needed */ \ - for (j = 0; j != h->n_buckets; ++j) { \ - if (__ac_iseither(h->flags, j) == 0) { \ - khkey_t key = h->keys[j]; \ - khval_t val; \ - khint_t new_mask; \ - new_mask = new_n_buckets - 1; \ - if (kh_is_map) val = h->vals[j]; \ - __ac_set_isdel_true(h->flags, j); \ - while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ - khint_t k, i, step = 0; \ - k = __hash_func(key); \ - i = k & new_mask; \ - while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \ - __ac_set_isempty_false(new_flags, i); \ - if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \ - { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ - if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \ - __ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \ - } else { /* write the element and jump out of the loop */ \ - h->keys[i] = key; \ - if (kh_is_map) h->vals[i] = val; \ - break; \ - } \ - } \ - } \ - } \ - if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \ - h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \ - if (kh_is_map) h->vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \ - } \ - kfree(h->flags); /* free the working space */ \ - h->flags = new_flags; \ - h->n_buckets = new_n_buckets; \ - h->n_occupied = h->size; \ - h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \ - } \ - return 0; \ - } \ - SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \ - { \ - khint_t x; \ - if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \ - if (h->n_buckets > (h->size<<1)) { \ - if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \ - *ret = -1; return h->n_buckets; \ - } \ - } else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */ \ - *ret = -1; return h->n_buckets; \ - } \ - } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ - { \ - khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \ - x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \ - if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \ - else { \ - last = i; \ - while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ - if (__ac_isdel(h->flags, i)) site = i; \ - i = (i + (++step)) & mask; \ - if (i == last) { x = site; break; } \ - } \ - if (x == h->n_buckets) { \ - if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \ - else x = i; \ - } \ - } \ - } \ - if (__ac_isempty(h->flags, x)) { /* not present at all */ \ - h->keys[x] = key; \ - __ac_set_isboth_false(h->flags, x); \ - ++h->size; ++h->n_occupied; \ - *ret = 1; \ - } else if (__ac_isdel(h->flags, x)) { /* deleted */ \ - h->keys[x] = key; \ - __ac_set_isboth_false(h->flags, x); \ - ++h->size; \ - *ret = 2; \ - } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \ - return x; \ - } \ - SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \ - { \ - if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \ - __ac_set_isdel_true(h->flags, x); \ - --h->size; \ - } \ - } - -#define KHASH_DECLARE(name, khkey_t, khval_t) \ - __KHASH_TYPE(name, khkey_t, khval_t) \ - __KHASH_PROTOTYPES(name, khkey_t, khval_t) - -#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ - __KHASH_TYPE(name, khkey_t, khval_t) \ - __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) - -#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ - KHASH_INIT2(name, static kh_inline klib_unused, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) - -/* --- BEGIN OF HASH FUNCTIONS --- */ - -/*! @function - @abstract Integer hash function - @param key The integer [khint32_t] - @return The hash value [khint_t] - */ -#define kh_int_hash_func(key) (khint32_t)(key) -/*! @function - @abstract Integer comparison function - */ -#define kh_int_hash_equal(a, b) ((a) == (b)) -/*! @function - @abstract 64-bit integer hash function - @param key The integer [khint64_t] - @return The hash value [khint_t] - */ -#define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11) -/*! @function - @abstract 64-bit integer comparison function - */ -#define kh_int64_hash_equal(a, b) ((a) == (b)) -/*! @function - @abstract const char* hash function - @param s Pointer to a null terminated string - @return The hash value - */ -static kh_inline khint_t __ac_X31_hash_string(const char *s) -{ - khint_t h = (khint_t)*s; - if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s; - return h; -} -/*! @function - @abstract Another interface to const char* hash function - @param key Pointer to a nul terminated string [const char*] - @return The hash value [khint_t] - */ -#define kh_str_hash_func(key) __ac_X31_hash_string(key) -/*! @function - @abstract Const char* comparison function - */ -#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0) - -/*! @function - @abstract Kstring hash function - @param s Pointer to a kstring - @return The hash value - */ -static kh_inline khint_t __ac_X31_hash_kstring(const kstring_t ks) -{ - khint_t h = 0; - size_t i; - for (i = 0; i < ks.l; i++) - h = (h << 5) - h + (khint_t)ks.s[i]; - return h; -} -/*! @function - @abstract Interface to kstring hash function. - @param key Pointer to a khash; permits hashing on non-nul terminated strings. - @return The hash value [khint_t] - */ -#define kh_kstr_hash_func(key) __ac_X31_hash_kstring(key) -/*! @function - @abstract kstring comparison function - */ -#define kh_kstr_hash_equal(a, b) ((a).l == (b).l && strncmp((a).s, (b).s, (a).l) == 0) - -static kh_inline khint_t __ac_Wang_hash(khint_t key) -{ - key += ~(key << 15); - key ^= (key >> 10); - key += (key << 3); - key ^= (key >> 6); - key += ~(key << 11); - key ^= (key >> 16); - return key; -} -#define kh_int_hash_func2(key) __ac_Wang_hash((khint_t)(key)) - -/* --- END OF HASH FUNCTIONS --- */ - -/* Other convenient macros... */ - -/*! - @abstract Type of the hash table. - @param name Name of the hash table [symbol] - */ -#define khash_t(name) kh_##name##_t - -/*! @function - @abstract Initiate a hash table. - @param name Name of the hash table [symbol] - @return Pointer to the hash table [khash_t(name)*] - */ -#define kh_init(name) kh_init_##name() - -/*! @function - @abstract Destroy a hash table. - @param name Name of the hash table [symbol] - @param h Pointer to the hash table [khash_t(name)*] - */ -#define kh_destroy(name, h) kh_destroy_##name(h) - -/*! @function - @abstract Reset a hash table without deallocating memory. - @param name Name of the hash table [symbol] - @param h Pointer to the hash table [khash_t(name)*] - */ -#define kh_clear(name, h) kh_clear_##name(h) - -/*! @function - @abstract Resize a hash table. - @param name Name of the hash table [symbol] - @param h Pointer to the hash table [khash_t(name)*] - @param s New size [khint_t] - */ -#define kh_resize(name, h, s) kh_resize_##name(h, s) - -/*! @function - @abstract Insert a key to the hash table. - @param name Name of the hash table [symbol] - @param h Pointer to the hash table [khash_t(name)*] - @param k Key [type of keys] - @param r Extra return code: -1 if the operation failed; - 0 if the key is present in the hash table; - 1 if the bucket is empty (never used); 2 if the element in - the bucket has been deleted [int*] - @return Iterator to the inserted element [khint_t] - */ -#define kh_put(name, h, k, r) kh_put_##name(h, k, r) - -/*! @function - @abstract Retrieve a key from the hash table. - @param name Name of the hash table [symbol] - @param h Pointer to the hash table [khash_t(name)*] - @param k Key [type of keys] - @return Iterator to the found element, or kh_end(h) if the element is absent [khint_t] - */ -#define kh_get(name, h, k) kh_get_##name(h, k) - -/*! @function - @abstract Remove a key from the hash table. - @param name Name of the hash table [symbol] - @param h Pointer to the hash table [khash_t(name)*] - @param k Iterator to the element to be deleted [khint_t] - */ -#define kh_del(name, h, k) kh_del_##name(h, k) - -/*! @function - @abstract Test whether a bucket contains data. - @param h Pointer to the hash table [khash_t(name)*] - @param x Iterator to the bucket [khint_t] - @return 1 if containing data; 0 otherwise [int] - */ -#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x))) - -/*! @function - @abstract Get key given an iterator - @param h Pointer to the hash table [khash_t(name)*] - @param x Iterator to the bucket [khint_t] - @return Key [type of keys] - */ -#define kh_key(h, x) ((h)->keys[x]) - -/*! @function - @abstract Get value given an iterator - @param h Pointer to the hash table [khash_t(name)*] - @param x Iterator to the bucket [khint_t] - @return Value [type of values] - @discussion For hash sets, calling this results in segfault. - */ -#define kh_val(h, x) ((h)->vals[x]) - -/*! @function - @abstract Alias of kh_val() - */ -#define kh_value(h, x) ((h)->vals[x]) - -/*! @function - @abstract Get the start iterator - @param h Pointer to the hash table [khash_t(name)*] - @return The start iterator [khint_t] - */ -#define kh_begin(h) (khint_t)(0) - -/*! @function - @abstract Get the end iterator - @param h Pointer to the hash table [khash_t(name)*] - @return The end iterator [khint_t] - */ -#define kh_end(h) ((h)->n_buckets) - -/*! @function - @abstract Get the number of elements in the hash table - @param h Pointer to the hash table [khash_t(name)*] - @return Number of elements in the hash table [khint_t] - */ -#define kh_size(h) ((h)->size) - -/*! @function - @abstract Get the number of buckets in the hash table - @param h Pointer to the hash table [khash_t(name)*] - @return Number of buckets in the hash table [khint_t] - */ -#define kh_n_buckets(h) ((h)->n_buckets) - -/*! @function - @abstract Iterate over the entries in the hash table - @param h Pointer to the hash table [khash_t(name)*] - @param kvar Variable to which key will be assigned - @param vvar Variable to which value will be assigned - @param code Block of code to execute - */ -#define kh_foreach(h, kvar, vvar, code) { khint_t __i; \ - for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ - if (!kh_exist(h,__i)) continue; \ - (kvar) = kh_key(h,__i); \ - (vvar) = kh_val(h,__i); \ - code; \ - } } - -/*! @function - @abstract Iterate over the values in the hash table - @param h Pointer to the hash table [khash_t(name)*] - @param vvar Variable to which value will be assigned - @param code Block of code to execute - */ -#define kh_foreach_value(h, vvar, code) { khint_t __i; \ - for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ - if (!kh_exist(h,__i)) continue; \ - (vvar) = kh_val(h,__i); \ - code; \ - } } - -/* More convenient interfaces */ - -/*! @function - @abstract Instantiate a hash set containing integer keys - @param name Name of the hash table [symbol] - */ -#define KHASH_SET_INIT_INT(name) \ - KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal) - -/*! @function - @abstract Instantiate a hash map containing integer keys - @param name Name of the hash table [symbol] - @param khval_t Type of values [type] - */ -#define KHASH_MAP_INIT_INT(name, khval_t) \ - KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) - -/*! @function - @abstract Instantiate a hash set containing 64-bit integer keys - @param name Name of the hash table [symbol] - */ -#define KHASH_SET_INIT_INT64(name) \ - KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal) - -/*! @function - @abstract Instantiate a hash map containing 64-bit integer keys - @param name Name of the hash table [symbol] - @param khval_t Type of values [type] - */ -#define KHASH_MAP_INIT_INT64(name, khval_t) \ - KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal) - -typedef const char *kh_cstr_t; -/*! @function - @abstract Instantiate a hash set containing const char* keys - @param name Name of the hash table [symbol] - */ -#define KHASH_SET_INIT_STR(name) \ - KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal) - -/*! @function - @abstract Instantiate a hash map containing const char* keys - @param name Name of the hash table [symbol] - @param khval_t Type of values [type] - */ -#define KHASH_MAP_INIT_STR(name, khval_t) \ - KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal) - -/*! @function - @abstract Instantiate a hash set containing kstring_t keys - @param name Name of the hash table [symbol] - */ -#define KHASH_SET_INIT_KSTR(name) \ - KHASH_INIT(name, kstring_t, char, 0, kh_kstr_hash_func, kh_kstr_hash_equal) - -/*! @function - @abstract Instantiate a hash map containing kstring_t keys - @param name Name of the hash table [symbol] - @param khval_t Type of values [type] - */ -#define KHASH_MAP_INIT_KSTR(name, khval_t) \ - KHASH_INIT(name, kstring_t, khval_t, 1, kh_kstr_hash_func, kh_kstr_hash_equal) - -#endif /* __AC_KHASH_H */ diff --git a/src/htslib-1.18/htslib/klist.h b/src/htslib-1.18/htslib/klist.h deleted file mode 100644 index 398f205..0000000 --- a/src/htslib-1.18/htslib/klist.h +++ /dev/null @@ -1,136 +0,0 @@ -/* The MIT License - - Copyright (c) 2008-2009, by Attractive Chaos - Copyright (C) 2013, 2015 Genome Research Ltd. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#ifndef _AC_KLIST_H -#define _AC_KLIST_H - -#include - -#ifndef klib_unused -#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3) -#define klib_unused __attribute__ ((__unused__)) -#else -#define klib_unused -#endif -#endif /* klib_unused */ - -#define KMEMPOOL_INIT2(SCOPE, name, kmptype_t, kmpfree_f) \ - typedef struct { \ - size_t cnt, n, max; \ - kmptype_t **buf; \ - } kmp_##name##_t; \ - SCOPE kmp_##name##_t *kmp_init_##name(void) { \ - return calloc(1, sizeof(kmp_##name##_t)); \ - } \ - SCOPE void kmp_destroy_##name(kmp_##name##_t *mp) { \ - size_t k; \ - for (k = 0; k < mp->n; ++k) { \ - kmpfree_f(mp->buf[k]); free(mp->buf[k]); \ - } \ - free(mp->buf); free(mp); \ - } \ - SCOPE kmptype_t *kmp_alloc_##name(kmp_##name##_t *mp) { \ - ++mp->cnt; \ - if (mp->n == 0) return calloc(1, sizeof(kmptype_t)); \ - return mp->buf[--mp->n]; \ - } \ - SCOPE void kmp_free_##name(kmp_##name##_t *mp, kmptype_t *p) { \ - --mp->cnt; \ - if (mp->n == mp->max) { \ - mp->max = mp->max? mp->max<<1 : 16; \ - mp->buf = realloc(mp->buf, sizeof(kmptype_t *) * mp->max); \ - } \ - mp->buf[mp->n++] = p; \ - } - -#define KMEMPOOL_INIT(name, kmptype_t, kmpfree_f) \ - KMEMPOOL_INIT2(static inline klib_unused, name, kmptype_t, kmpfree_f) - -#define kmempool_t(name) kmp_##name##_t -#define kmp_init(name) kmp_init_##name() -#define kmp_destroy(name, mp) kmp_destroy_##name(mp) -#define kmp_alloc(name, mp) kmp_alloc_##name(mp) -#define kmp_free(name, mp, p) kmp_free_##name(mp, p) - -#define KLIST_INIT2(SCOPE, name, kltype_t, kmpfree_t) \ - struct __kl1_##name { \ - kltype_t data; \ - struct __kl1_##name *next; \ - }; \ - typedef struct __kl1_##name kl1_##name; \ - KMEMPOOL_INIT2(SCOPE, name, kl1_##name, kmpfree_t) \ - typedef struct { \ - kl1_##name *head, *tail; \ - kmp_##name##_t *mp; \ - size_t size; \ - } kl_##name##_t; \ - SCOPE kl_##name##_t *kl_init_##name(void) { \ - kl_##name##_t *kl = calloc(1, sizeof(kl_##name##_t)); \ - kl->mp = kmp_init(name); \ - kl->head = kl->tail = kmp_alloc(name, kl->mp); \ - kl->head->next = 0; \ - return kl; \ - } \ - SCOPE void kl_destroy_##name(kl_##name##_t *kl) { \ - kl1_##name *p; \ - for (p = kl->head; p != kl->tail; p = p->next) \ - kmp_free(name, kl->mp, p); \ - kmp_free(name, kl->mp, p); \ - kmp_destroy(name, kl->mp); \ - free(kl); \ - } \ - SCOPE kltype_t *kl_pushp_##name(kl_##name##_t *kl) { \ - kl1_##name *q, *p = kmp_alloc(name, kl->mp); \ - q = kl->tail; p->next = 0; kl->tail->next = p; kl->tail = p; \ - ++kl->size; \ - return &q->data; \ - } \ - SCOPE int kl_shift_##name(kl_##name##_t *kl, kltype_t *d) { \ - kl1_##name *p; \ - if (kl->head->next == 0) return -1; \ - --kl->size; \ - p = kl->head; kl->head = kl->head->next; \ - if (d) *d = p->data; \ - kmp_free(name, kl->mp, p); \ - return 0; \ - } - -#define KLIST_INIT(name, kltype_t, kmpfree_t) \ - KLIST_INIT2(static inline klib_unused, name, kltype_t, kmpfree_t) - -#define kliter_t(name) kl1_##name -#define klist_t(name) kl_##name##_t -#define kl_val(iter) ((iter)->data) -#define kl_next(iter) ((iter)->next) -#define kl_begin(kl) ((kl)->head) -#define kl_end(kl) ((kl)->tail) - -#define kl_init(name) kl_init_##name() -#define kl_destroy(name, kl) kl_destroy_##name(kl) -#define kl_pushp(name, kl) kl_pushp_##name(kl) -#define kl_shift(name, kl, d) kl_shift_##name(kl, d) - -#endif diff --git a/src/htslib-1.18/htslib/kseq.h b/src/htslib-1.18/htslib/kseq.h deleted file mode 100644 index ea887f1..0000000 --- a/src/htslib-1.18/htslib/kseq.h +++ /dev/null @@ -1,255 +0,0 @@ -/* The MIT License - - Copyright (c) 2008, 2009, 2011 Attractive Chaos - Copyright (C) 2013, 2018, 2020, 2023 Genome Research Ltd. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#ifndef AC_KSEQ_H -#define AC_KSEQ_H - -#include -#include -#include - -#include "kstring.h" - -#ifndef klib_unused -#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3) -#define klib_unused __attribute__ ((__unused__)) -#else -#define klib_unused -#endif -#endif /* klib_unused */ - -#define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r -#define KS_SEP_TAB 1 // isspace() && !' ' -#define KS_SEP_LINE 2 // line separator: "\n" (Unix) or "\r\n" (Windows) -#define KS_SEP_MAX 2 - -#define __KS_TYPE(type_t) \ - typedef struct __kstream_t { \ - int begin, end; \ - int is_eof:2, bufsize:30; \ - uint64_t seek_pos; \ - type_t f; \ - unsigned char *buf; \ - } kstream_t; - -#define ks_err(ks) ((ks)->end == -1) -#define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end) -#define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0) - -#define __KS_BASIC(SCOPE, type_t, __bufsize) \ - SCOPE kstream_t *ks_init(type_t f) \ - { \ - kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \ - ks->f = f; ks->bufsize = __bufsize; \ - ks->buf = (unsigned char*)malloc(__bufsize); \ - return ks; \ - } \ - SCOPE void ks_destroy(kstream_t *ks) \ - { \ - if (!ks) return; \ - free(ks->buf); \ - free(ks); \ - } - -#define __KS_INLINED(__read) \ - static inline klib_unused int ks_getc(kstream_t *ks) \ - { \ - if (ks_err(ks)) return -3; \ - if (ks->is_eof && ks->begin >= ks->end) return -1; \ - if (ks->begin >= ks->end) { \ - ks->begin = 0; \ - ks->end = __read(ks->f, ks->buf, ks->bufsize); \ - if (ks->end == 0) { ks->is_eof = 1; return -1; } \ - if (ks->end == -1) { ks->is_eof = 1; return -3; } \ - } \ - ks->seek_pos++; \ - return (int)ks->buf[ks->begin++]; \ - } \ - static inline klib_unused int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \ - { return ks_getuntil2(ks, delimiter, str, dret, 0); } - -#define __KS_GETUNTIL(SCOPE, __read) \ - SCOPE int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append) \ - { \ - int gotany = 0; \ - if (dret) *dret = 0; \ - str->l = append? str->l : 0; \ - uint64_t seek_pos = str->l; \ - for (;;) { \ - int i; \ - if (ks_err(ks)) return -3; \ - if (ks->begin >= ks->end) { \ - if (!ks->is_eof) { \ - ks->begin = 0; \ - ks->end = __read(ks->f, ks->buf, ks->bufsize); \ - if (ks->end == 0) { ks->is_eof = 1; break; } \ - if (ks->end == -1) { ks->is_eof = 1; return -3; } \ - } else break; \ - } \ - if (delimiter == KS_SEP_LINE) { \ - for (i = ks->begin; i < ks->end; ++i) \ - if (ks->buf[i] == '\n') break; \ - } else if (delimiter > KS_SEP_MAX) { \ - for (i = ks->begin; i < ks->end; ++i) \ - if (ks->buf[i] == delimiter) break; \ - } else if (delimiter == KS_SEP_SPACE) { \ - for (i = ks->begin; i < ks->end; ++i) \ - if (isspace(ks->buf[i])) break; \ - } else if (delimiter == KS_SEP_TAB) { \ - for (i = ks->begin; i < ks->end; ++i) \ - if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \ - } else i = 0; /* never come to here! */ \ - (void) ks_expand(str, i - ks->begin + 1); \ - seek_pos += i - ks->begin; if ( i < ks->end ) seek_pos++; \ - gotany = 1; \ - memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \ - str->l = str->l + (i - ks->begin); \ - ks->begin = i + 1; \ - if (i < ks->end) { \ - if (dret) *dret = ks->buf[i]; \ - break; \ - } \ - } \ - if (!gotany && ks_eof(ks)) return -1; \ - ks->seek_pos += seek_pos; \ - if (str->s == 0) { \ - str->m = 1; \ - str->s = (char*)calloc(1, 1); \ - } else if (delimiter == KS_SEP_LINE && str->l > 1 && str->s[str->l-1] == '\r') --str->l; \ - str->s[str->l] = '\0'; \ - return str->l; \ - } - -#define KSTREAM_INIT2(SCOPE, type_t, __read, __bufsize) \ - __KS_TYPE(type_t) \ - __KS_BASIC(SCOPE, type_t, __bufsize) \ - __KS_GETUNTIL(SCOPE, __read) \ - __KS_INLINED(__read) - -#define KSTREAM_INIT(type_t, __read, __bufsize) KSTREAM_INIT2(static, type_t, __read, __bufsize) - -#define KSTREAM_DECLARE(type_t, __read) \ - __KS_TYPE(type_t) \ - extern int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append); \ - extern kstream_t *ks_init(type_t f); \ - extern void ks_destroy(kstream_t *ks); \ - __KS_INLINED(__read) - -/****************** - * FASTA/Q parser * - ******************/ - -#define kseq_rewind(ks) ((ks)->last_char = (ks)->f->is_eof = (ks)->f->begin = (ks)->f->end = 0) - -#define __KSEQ_BASIC(SCOPE, type_t) \ - SCOPE kseq_t *kseq_init(type_t fd) \ - { \ - kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \ - s->f = ks_init(fd); \ - return s; \ - } \ - SCOPE void kseq_destroy(kseq_t *ks) \ - { \ - if (!ks) return; \ - free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \ - ks_destroy(ks->f); \ - free(ks); \ - } - -/* Return value: - >=0 length of the sequence (normal) - -1 end-of-file - -2 truncated quality string - -3 error reading stream - -4 overflow error - */ -#define __KSEQ_READ(SCOPE) \ - SCOPE int kseq_read(kseq_t *seq) \ - { \ - int c,r; \ - kstream_t *ks = seq->f; \ - if (seq->last_char == 0) { /* then jump to the next header line */ \ - while ((c = ks_getc(ks)) >= 0 && c != '>' && c != '@'); \ - if (c < 0) return c; /* end of file or error */ \ - seq->last_char = c; \ - } /* else: the first header char has been read in the previous call */ \ - seq->comment.l = seq->seq.l = seq->qual.l = 0; /* reset all members */ \ - if ((r=ks_getuntil(ks, 0, &seq->name, &c)) < 0) return r; /* normal exit: EOF or error */ \ - if (c != '\n') ks_getuntil(ks, KS_SEP_LINE, &seq->comment, 0); /* read FASTA/Q comment */ \ - if (seq->seq.s == 0) { /* we can do this in the loop below, but that is slower */ \ - seq->seq.m = 256; \ - seq->seq.s = (char*)malloc(seq->seq.m); \ - } \ - while ((c = ks_getc(ks)) >= 0 && c != '>' && c != '+' && c != '@') { \ - if (c == '\n') continue; /* skip empty lines */ \ - seq->seq.s[seq->seq.l++] = c; /* this is safe: we always have enough space for 1 char */ \ - ks_getuntil2(ks, KS_SEP_LINE, &seq->seq, 0, 1); /* read the rest of the line */ \ - } \ - if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \ - if (seq->seq.l + 1 >= seq->seq.m) { /* seq->seq.s[seq->seq.l] below may be out of boundary */ \ - seq->seq.m = seq->seq.l + 2; \ - kroundup32(seq->seq.m); /* rounded to the next closest 2^k */ \ - if (seq->seq.l + 1 >= seq->seq.m) return -4; /* error: adjusting m overflowed */ \ - seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \ - } \ - seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \ - if (c != '+') return seq->seq.l; /* FASTA */ \ - if (seq->qual.m < seq->seq.m) { /* allocate memory for qual in case insufficient */ \ - seq->qual.m = seq->seq.m; \ - seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \ - } \ - while ((c = ks_getc(ks)) >= 0 && c != '\n'); /* skip the rest of '+' line */ \ - if (c == -1) return -2; /* error: no quality string */ \ - while ((c = ks_getuntil2(ks, KS_SEP_LINE, &seq->qual, 0, 1)) >= 0 && seq->qual.l < seq->seq.l); \ - if (c == -3) return -3; /* stream error */ \ - seq->last_char = 0; /* we have not come to the next header line */ \ - if (seq->seq.l != seq->qual.l) return -2; /* error: qual string is of a different length */ \ - return seq->seq.l; \ - } - -#define __KSEQ_TYPE(type_t) \ - typedef struct { \ - kstring_t name, comment, seq, qual; \ - int last_char; \ - kstream_t *f; \ - } kseq_t; - -#define KSEQ_INIT2(SCOPE, type_t, __read) \ - KSTREAM_INIT(type_t, __read, 16384) \ - __KSEQ_TYPE(type_t) \ - __KSEQ_BASIC(SCOPE, type_t) \ - __KSEQ_READ(SCOPE) - -#define KSEQ_INIT(type_t, __read) KSEQ_INIT2(static, type_t, __read) - -#define KSEQ_DECLARE(type_t) \ - __KS_TYPE(type_t) \ - __KSEQ_TYPE(type_t) \ - extern kseq_t *kseq_init(type_t fd); \ - void kseq_destroy(kseq_t *ks); \ - int kseq_read(kseq_t *seq); - -#endif diff --git a/src/htslib-1.18/htslib/kstring.h b/src/htslib-1.18/htslib/kstring.h deleted file mode 100644 index 53a1980..0000000 --- a/src/htslib-1.18/htslib/kstring.h +++ /dev/null @@ -1,411 +0,0 @@ -/* The MIT License - - Copyright (C) 2011 by Attractive Chaos - Copyright (C) 2013-2014, 2016, 2018-2020, 2022 Genome Research Ltd. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#ifndef KSTRING_H -#define KSTRING_H - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "hts_defs.h" -#include "kroundup.h" - -#if defined __GNUC__ && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ > 4)) -#ifdef __MINGW_PRINTF_FORMAT -#define KS_ATTR_PRINTF(fmt, arg) __attribute__((__format__ (__MINGW_PRINTF_FORMAT, fmt, arg))) -#else -#define KS_ATTR_PRINTF(fmt, arg) __attribute__((__format__ (__printf__, fmt, arg))) -#endif // __MINGW_PRINTF_FORMAT -#else -#define KS_ATTR_PRINTF(fmt, arg) -#endif - -#ifndef HAVE___BUILTIN_CLZ -#if defined __GNUC__ && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) -#define HAVE___BUILTIN_CLZ 1 -#endif -#endif - -// Ensure ssize_t exists within this header. All #includes must precede this, -// and ssize_t must be undefined again at the end of this header. -#if defined _MSC_VER && defined _INTPTR_T_DEFINED && !defined _SSIZE_T_DEFINED && !defined ssize_t -#define HTSLIB_SSIZE_T -#define ssize_t intptr_t -#endif - -/* kstring_t is a simple non-opaque type whose fields are likely to be - * used directly by user code (but see also ks_str() and ks_len() below). - * A kstring_t object is initialised by either of - * kstring_t str = KS_INITIALIZE; - * kstring_t str; ...; ks_initialize(&str); - * and either ownership of the underlying buffer should be given away before - * the object disappears (see ks_release() below) or the kstring_t should be - * destroyed with ks_free(&str) or free(str.s) */ -#ifndef KSTRING_T -#define KSTRING_T kstring_t -typedef struct kstring_t { - size_t l, m; - char *s; -} kstring_t; -#endif - -typedef struct ks_tokaux_t { - uint64_t tab[4]; - int sep, finished; - const char *p; // end of the current token -} ks_tokaux_t; - -#ifdef __cplusplus -extern "C" { -#endif - - HTSLIB_EXPORT - int kvsprintf(kstring_t *s, const char *fmt, va_list ap) KS_ATTR_PRINTF(2,0); - - HTSLIB_EXPORT - int ksprintf(kstring_t *s, const char *fmt, ...) KS_ATTR_PRINTF(2,3); - - HTSLIB_EXPORT - int kputd(double d, kstring_t *s); // custom %g only handler - - HTSLIB_EXPORT - int ksplit_core(char *s, int delimiter, int *_max, int **_offsets); - - HTSLIB_EXPORT - char *kstrstr(const char *str, const char *pat, int **_prep); - - HTSLIB_EXPORT - char *kstrnstr(const char *str, const char *pat, int n, int **_prep); - - HTSLIB_EXPORT - void *kmemmem(const void *_str, int n, const void *_pat, int m, int **_prep); - - /* kstrtok() is similar to strtok_r() except that str is not - * modified and both str and sep can be NULL. For efficiency, it is - * actually recommended to set both to NULL in the subsequent calls - * if sep is not changed. */ - HTSLIB_EXPORT - char *kstrtok(const char *str, const char *sep, ks_tokaux_t *aux); - - /* kgetline() uses the supplied fgets()-like function to read a "\n"- - * or "\r\n"-terminated line from fp. The line read is appended to the - * kstring without its terminator and 0 is returned; EOF is returned at - * EOF or on error (determined by querying fp, as per fgets()). */ - typedef char *kgets_func(char *, int, void *); - HTSLIB_EXPORT - int kgetline(kstring_t *s, kgets_func *fgets_fn, void *fp); - - /* kgetline2() uses the supplied hgetln()-like function to read a "\n"- - * or "\r\n"-terminated line from fp. The line read is appended to the - * ksring without its terminator and 0 is returned; EOF is returned at - * EOF or on error (determined by querying fp, as per fgets()). */ - typedef ssize_t kgets_func2(char *, size_t, void *); - HTSLIB_EXPORT - int kgetline2(kstring_t *s, kgets_func2 *fgets_fn, void *fp); - -#ifdef __cplusplus -} -#endif - -/// kstring initializer for structure assignment -#define KS_INITIALIZE { 0, 0, NULL } - -/// kstring initializer for pointers -/** - @note Not to be used if the buffer has been allocated. Use ks_release() - or ks_clear() instead. -*/ - -static inline void ks_initialize(kstring_t *s) -{ - s->l = s->m = 0; - s->s = NULL; -} - -/// Resize a kstring to a given capacity -static inline int ks_resize(kstring_t *s, size_t size) -{ - if (s->m < size) { - char *tmp; - size = (size > (SIZE_MAX>>2)) ? size : size + (size >> 1); - tmp = (char*)realloc(s->s, size); - if (!tmp) - return -1; - s->s = tmp; - s->m = size; - } - return 0; -} - -/// Increase kstring capacity by a given number of bytes -static inline int ks_expand(kstring_t *s, size_t expansion) -{ - size_t new_size = s->l + expansion; - - if (new_size < s->l) // Overflow check - return -1; - return ks_resize(s, new_size); -} - -/// Returns the kstring buffer -static inline char *ks_str(kstring_t *s) -{ - return s->s; -} - -/// Returns the kstring buffer, or an empty string if l == 0 -/** - * Unlike ks_str(), this function will never return NULL. If the kstring is - * empty it will return a read-only empty string. As the returned value - * may be read-only, the caller should not attempt to modify it. - */ -static inline const char *ks_c_str(kstring_t *s) -{ - return s->l && s->s ? s->s : ""; -} - -static inline size_t ks_len(kstring_t *s) -{ - return s->l; -} - -/// Reset kstring length to zero -/** - @return The kstring itself - - Example use: kputsn(string, len, ks_clear(s)) -*/ -static inline kstring_t *ks_clear(kstring_t *s) -{ - s->l = 0; - return s; -} - -// Give ownership of the underlying buffer away to something else (making -// that something else responsible for freeing it), leaving the kstring_t -// empty and ready to be used again, or ready to go out of scope without -// needing free(str.s) to prevent a memory leak. -static inline char *ks_release(kstring_t *s) -{ - char *ss = s->s; - s->l = s->m = 0; - s->s = NULL; - return ss; -} - -/// Safely free the underlying buffer in a kstring. -static inline void ks_free(kstring_t *s) -{ - if (s) { - free(s->s); - ks_initialize(s); - } -} - -static inline int kputsn(const char *p, size_t l, kstring_t *s) -{ - size_t new_sz = s->l + l + 2; - if (new_sz <= s->l || ks_resize(s, new_sz) < 0) - return EOF; - memcpy(s->s + s->l, p, l); - s->l += l; - s->s[s->l] = 0; - return l; -} - -static inline int kputs(const char *p, kstring_t *s) -{ - if (!p) { errno = EFAULT; return -1; } - return kputsn(p, strlen(p), s); -} - -static inline int kputc(int c, kstring_t *s) -{ - if (ks_resize(s, s->l + 2) < 0) - return EOF; - s->s[s->l++] = c; - s->s[s->l] = 0; - return (unsigned char)c; -} - -static inline int kputc_(int c, kstring_t *s) -{ - if (ks_resize(s, s->l + 1) < 0) - return EOF; - s->s[s->l++] = c; - return 1; -} - -static inline int kputsn_(const void *p, size_t l, kstring_t *s) -{ - size_t new_sz = s->l + l; - if (new_sz < s->l || ks_resize(s, new_sz ? new_sz : 1) < 0) - return EOF; - memcpy(s->s + s->l, p, l); - s->l += l; - return l; -} - -static inline int kputuw(unsigned x, kstring_t *s) -{ -#if HAVE___BUILTIN_CLZ && UINT_MAX == 4294967295U - static const unsigned int kputuw_num_digits[32] = { - 10, 10, 10, 9, 9, 9, 8, 8, - 8, 7, 7, 7, 7, 6, 6, 6, - 5, 5, 5, 4, 4, 4, 4, 3, - 3, 3, 2, 2, 2, 1, 1, 1 - }; - static const unsigned int kputuw_thresholds[32] = { - 0, 0, 1000000000U, 0, 0, 100000000U, 0, 0, - 10000000, 0, 0, 0, 1000000, 0, 0, 100000, - 0, 0, 10000, 0, 0, 0, 1000, 0, - 0, 100, 0, 0, 10, 0, 0, 0 - }; -#else - uint64_t m; -#endif - static const char kputuw_dig2r[] = - "00010203040506070809" - "10111213141516171819" - "20212223242526272829" - "30313233343536373839" - "40414243444546474849" - "50515253545556575859" - "60616263646566676869" - "70717273747576777879" - "80818283848586878889" - "90919293949596979899"; - unsigned int l, j; - char *cp; - - // Trivial case - also prevents __builtin_clz(0), which is undefined - if (x < 10) { - if (ks_resize(s, s->l + 2) < 0) - return EOF; - s->s[s->l++] = '0'+x; - s->s[s->l] = 0; - return 0; - } - - // Find out how many digits are to be printed. -#if HAVE___BUILTIN_CLZ && UINT_MAX == 4294967295U - /* - * Table method - should be quick if clz can be done in hardware. - * Find the most significant bit of the value to print and look - * up in a table to find out how many decimal digits are needed. - * This number needs to be adjusted by 1 for cases where the decimal - * length could vary for a given number of bits (for example, - * a four bit number could be between 8 and 15). - */ - - l = __builtin_clz(x); - l = kputuw_num_digits[l] - (x < kputuw_thresholds[l]); -#else - // Fallback for when clz is not available - m = 1; - l = 0; - do { - l++; - m *= 10; - } while (x >= m); -#endif - - if (ks_resize(s, s->l + l + 2) < 0) - return EOF; - - // Add digits two at a time - j = l; - cp = s->s + s->l; - while (x >= 10) { - const char *d = &kputuw_dig2r[2*(x%100)]; - x /= 100; - memcpy(&cp[j-=2], d, 2); - } - - // Last one (if necessary). We know that x < 10 by now. - if (j == 1) - cp[0] = x + '0'; - - s->l += l; - s->s[s->l] = 0; - return 0; -} - -static inline int kputw(int c, kstring_t *s) -{ - unsigned int x = c; - if (c < 0) { - x = -x; - if (ks_resize(s, s->l + 3) < 0) - return EOF; - s->s[s->l++] = '-'; - } - - return kputuw(x, s); -} - -static inline int kputll(long long c, kstring_t *s) -{ - char buf[32]; - int i, l = 0; - unsigned long long x = c; - if (c < 0) x = -x; - do { buf[l++] = x%10 + '0'; x /= 10; } while (x > 0); - if (c < 0) buf[l++] = '-'; - if (ks_resize(s, s->l + l + 2) < 0) - return EOF; - for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i]; - s->s[s->l] = 0; - return 0; -} - -static inline int kputl(long c, kstring_t *s) { - return kputll(c, s); -} - -/* - * Returns 's' split by delimiter, with *n being the number of components; - * NULL on failure. - */ -static inline int *ksplit(kstring_t *s, int delimiter, int *n) -{ - int max = 0, *offsets = 0; - *n = ksplit_core(s->s, delimiter, &max, &offsets); - return offsets; -} - -#ifdef HTSLIB_SSIZE_T -#undef HTSLIB_SSIZE_T -#undef ssize_t -#endif - -#endif diff --git a/src/htslib-1.18/htslib/sam.h b/src/htslib-1.18/htslib/sam.h deleted file mode 100644 index cffa047..0000000 --- a/src/htslib-1.18/htslib/sam.h +++ /dev/null @@ -1,2419 +0,0 @@ -/// @file htslib/sam.h -/// High-level SAM/BAM/CRAM sequence file operations. -/* - Copyright (C) 2008, 2009, 2013-2023 Genome Research Ltd. - Copyright (C) 2010, 2012, 2013 Broad Institute. - - Author: Heng Li - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#ifndef HTSLIB_SAM_H -#define HTSLIB_SAM_H - -#include -#include -#include -#include "hts.h" -#include "hts_endian.h" - -// Ensure ssize_t exists within this header. All #includes must precede this, -// and ssize_t must be undefined again at the end of this header. -#if defined _MSC_VER && defined _INTPTR_T_DEFINED && !defined _SSIZE_T_DEFINED && !defined ssize_t -#define HTSLIB_SSIZE_T -#define ssize_t intptr_t -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -/// Highest SAM format version supported by this library -#define SAM_FORMAT_VERSION "1.6" - -/*************************** - *** SAM/BAM/CRAM header *** - ***************************/ - -/*! @typedef - * @abstract Header extension structure, grouping a collection - * of hash tables that contain the parsed header data. - */ - -typedef struct sam_hrecs_t sam_hrecs_t; - -/*! @typedef - @abstract Structure for the alignment header. - @field n_targets number of reference sequences - @field l_text length of the plain text in the header (may be zero if - the header has been edited) - @field target_len lengths of the reference sequences - @field target_name names of the reference sequences - @field text plain text (may be NULL if the header has been edited) - @field sdict header dictionary - @field hrecs pointer to the extended header struct (internal use only) - @field ref_count reference count - - @note The text and l_text fields are included for backwards compatibility. - These fields may be set to NULL and zero respectively as a side-effect - of calling some header API functions. New code that needs to access the - header text should use the sam_hdr_str() and sam_hdr_length() functions - instead of these fields. - */ - -typedef struct sam_hdr_t { - int32_t n_targets, ignore_sam_err; - size_t l_text; - uint32_t *target_len; - const int8_t *cigar_tab HTS_DEPRECATED("Use bam_cigar_table[] instead"); - char **target_name; - char *text; - void *sdict; - sam_hrecs_t *hrecs; - uint32_t ref_count; -} sam_hdr_t; - -/*! @typedef - * @abstract Old name for compatibility with existing code. - */ -typedef sam_hdr_t bam_hdr_t; - -/**************************** - *** CIGAR related macros *** - ****************************/ - -#define BAM_CMATCH 0 -#define BAM_CINS 1 -#define BAM_CDEL 2 -#define BAM_CREF_SKIP 3 -#define BAM_CSOFT_CLIP 4 -#define BAM_CHARD_CLIP 5 -#define BAM_CPAD 6 -#define BAM_CEQUAL 7 -#define BAM_CDIFF 8 -#define BAM_CBACK 9 - -#define BAM_CIGAR_STR "MIDNSHP=XB" -#define BAM_CIGAR_SHIFT 4 -#define BAM_CIGAR_MASK 0xf -#define BAM_CIGAR_TYPE 0x3C1A7 - -/*! @abstract Table for converting a CIGAR operator character to BAM_CMATCH etc. -Result is operator code or -1. Be sure to cast the index if it is a plain char: - int op = bam_cigar_table[(unsigned char) ch]; -*/ -HTSLIB_EXPORT -extern const int8_t bam_cigar_table[256]; - -#define bam_cigar_op(c) ((c)&BAM_CIGAR_MASK) -#define bam_cigar_oplen(c) ((c)>>BAM_CIGAR_SHIFT) -// Note that BAM_CIGAR_STR is padded to length 16 bytes below so that -// the array look-up will not fall off the end. '?' is chosen as the -// padding character so it's easy to spot if one is emitted, and will -// result in a parsing failure (in sam_parse1(), at least) if read. -#define bam_cigar_opchr(c) (BAM_CIGAR_STR "??????" [bam_cigar_op(c)]) -#define bam_cigar_gen(l, o) ((l)<>((o)<<1)&3) // bit 1: consume query; bit 2: consume reference - -/*! @abstract the read is paired in sequencing, no matter whether it is mapped in a pair */ -#define BAM_FPAIRED 1 -/*! @abstract the read is mapped in a proper pair */ -#define BAM_FPROPER_PAIR 2 -/*! @abstract the read itself is unmapped; conflictive with BAM_FPROPER_PAIR */ -#define BAM_FUNMAP 4 -/*! @abstract the mate is unmapped */ -#define BAM_FMUNMAP 8 -/*! @abstract the read is mapped to the reverse strand */ -#define BAM_FREVERSE 16 -/*! @abstract the mate is mapped to the reverse strand */ -#define BAM_FMREVERSE 32 -/*! @abstract this is read1 */ -#define BAM_FREAD1 64 -/*! @abstract this is read2 */ -#define BAM_FREAD2 128 -/*! @abstract not primary alignment */ -#define BAM_FSECONDARY 256 -/*! @abstract QC failure */ -#define BAM_FQCFAIL 512 -/*! @abstract optical or PCR duplicate */ -#define BAM_FDUP 1024 -/*! @abstract supplementary alignment */ -#define BAM_FSUPPLEMENTARY 2048 - -/************************* - *** Alignment records *** - *************************/ - -/* - * Assumptions made here. While pos can be 64-bit, no sequence - * itself is that long, but due to ref skip CIGAR fields it - * may span more than that. (CIGAR itself is 28-bit len + 4 bit - * type, but in theory we can combine multiples together.) - * - * Mate position and insert size also need to be 64-bit, but - * we won't accept more than 32-bit for tid. - * - * The bam1_core_t structure is the *in memory* layout and not - * the same as the on-disk format. 64-bit changes here permit - * SAM to work with very long chromosomes and permit BAM and CRAM - * to seamlessly update in the future without further API/ABI - * revisions. - */ - -/*! @typedef - @abstract Structure for core alignment information. - @field pos 0-based leftmost coordinate - @field tid chromosome ID, defined by sam_hdr_t - @field bin bin calculated by bam_reg2bin() - @field qual mapping quality - @field l_extranul length of extra NULs between qname & cigar (for alignment) - @field flag bitwise flag - @field l_qname length of the query name - @field n_cigar number of CIGAR operations - @field l_qseq length of the query sequence (read) - @field mtid chromosome ID of next read in template, defined by sam_hdr_t - @field mpos 0-based leftmost coordinate of next read in template - @field isize observed template length ("insert size") - */ -typedef struct bam1_core_t { - hts_pos_t pos; - int32_t tid; - uint16_t bin; // NB: invalid on 64-bit pos - uint8_t qual; - uint8_t l_extranul; - uint16_t flag; - uint16_t l_qname; - uint32_t n_cigar; - int32_t l_qseq; - int32_t mtid; - hts_pos_t mpos; - hts_pos_t isize; -} bam1_core_t; - -/*! @typedef - @abstract Structure for one alignment. - @field core core information about the alignment - @field id - @field data all variable-length data, concatenated; structure: qname-cigar-seq-qual-aux - @field l_data current length of bam1_t::data - @field m_data maximum length of bam1_t::data - @field mempolicy memory handling policy, see bam_set_mempolicy() - - @discussion Notes: - - 1. The data blob should be accessed using bam_get_qname, bam_get_cigar, - bam_get_seq, bam_get_qual and bam_get_aux macros. These returns pointers - to the start of each type of data. - 2. qname is terminated by one to four NULs, so that the following - cigar data is 32-bit aligned; core.l_qname includes these trailing NULs, - while core.l_extranul counts the excess NULs (so 0 <= l_extranul <= 3). - 3. Cigar data is encoded 4 bytes per CIGAR operation. - See the bam_cigar_* macros for manipulation. - 4. seq is nibble-encoded according to bam_nt16_table. - See the bam_seqi macro for retrieving individual bases. - 5. Per base qualities are stored in the Phred scale with no +33 offset. - Ie as per the BAM specification and not the SAM ASCII printable method. - */ -typedef struct bam1_t { - bam1_core_t core; - uint64_t id; - uint8_t *data; - int l_data; - uint32_t m_data; - uint32_t mempolicy:2, :30 /* Reserved */; -} bam1_t; - -/*! @function - @abstract Get whether the query is on the reverse strand - @param b pointer to an alignment - @return boolean true if query is on the reverse strand - */ -#define bam_is_rev(b) (((b)->core.flag&BAM_FREVERSE) != 0) -/*! @function - @abstract Get whether the query's mate is on the reverse strand - @param b pointer to an alignment - @return boolean true if query's mate on the reverse strand - */ -#define bam_is_mrev(b) (((b)->core.flag&BAM_FMREVERSE) != 0) -/*! @function - @abstract Get the name of the query - @param b pointer to an alignment - @return pointer to the name string, null terminated - */ -#define bam_get_qname(b) ((char*)(b)->data) -/*! @function - @abstract Get the CIGAR array - @param b pointer to an alignment - @return pointer to the CIGAR array - - @discussion In the CIGAR array, each element is a 32-bit integer. The - lower 4 bits gives a CIGAR operation and the higher 28 bits keep the - length of a CIGAR. - */ -#define bam_get_cigar(b) ((uint32_t*)((b)->data + (b)->core.l_qname)) -/*! @function - @abstract Get query sequence - @param b pointer to an alignment - @return pointer to sequence - - @discussion Each base is encoded in 4 bits: 1 for A, 2 for C, 4 for G, - 8 for T and 15 for N. Two bases are packed in one byte with the base - at the higher 4 bits having smaller coordinate on the read. It is - recommended to use bam_seqi() macro to get the base. - */ -#define bam_get_seq(b) ((b)->data + ((b)->core.n_cigar<<2) + (b)->core.l_qname) -/*! @function - @abstract Get query quality - @param b pointer to an alignment - @return pointer to quality string - */ -#define bam_get_qual(b) ((b)->data + ((b)->core.n_cigar<<2) + (b)->core.l_qname + (((b)->core.l_qseq + 1)>>1)) -/*! @function - @abstract Get auxiliary data - @param b pointer to an alignment - @return pointer to the concatenated auxiliary data - */ -#define bam_get_aux(b) ((b)->data + ((b)->core.n_cigar<<2) + (b)->core.l_qname + (((b)->core.l_qseq + 1)>>1) + (b)->core.l_qseq) -/*! @function - @abstract Get length of auxiliary data - @param b pointer to an alignment - @return length of the concatenated auxiliary data - */ -#define bam_get_l_aux(b) ((b)->l_data - ((b)->core.n_cigar<<2) - (b)->core.l_qname - (b)->core.l_qseq - (((b)->core.l_qseq + 1)>>1)) -/*! @function - @abstract Get a base on read - @param s Query sequence returned by bam_get_seq() - @param i The i-th position, 0-based - @return 4-bit integer representing the base. - */ -#define bam_seqi(s, i) ((s)[(i)>>1] >> ((~(i)&1)<<2) & 0xf) -/*! - @abstract Modifies a single base in the bam structure. - @param s Query sequence returned by bam_get_seq() - @param i The i-th position, 0-based - @param b Base in nt16 nomenclature (see seq_nt16_table) -*/ -#define bam_set_seqi(s,i,b) ((s)[(i)>>1] = ((s)[(i)>>1] & (0xf0 >> ((~(i)&1)<<2))) | ((b)<<((~(i)&1)<<2))) - -/************************** - *** Exported functions *** - **************************/ - -/*************** - *** BAM I/O *** - ***************/ - -/* Header */ - -/// Generates a new unpopulated header structure. -/*! - * - * @return A valid pointer to new header on success, NULL on failure - * - * The sam_hdr_t struct returned by a successful call should be freed - * via sam_hdr_destroy() when it is no longer needed. - */ -HTSLIB_EXPORT -sam_hdr_t *sam_hdr_init(void); - -/// Read the header from a BAM compressed file. -/*! - * @param fp File pointer - * @return A valid pointer to new header on success, NULL on failure - * - * This function only works with BAM files. It is usually better to use - * sam_hdr_read(), which works on SAM, BAM and CRAM files. - * - * The sam_hdr_t struct returned by a successful call should be freed - * via sam_hdr_destroy() when it is no longer needed. - */ -HTSLIB_EXPORT -sam_hdr_t *bam_hdr_read(BGZF *fp); - -/// Writes the header to a BAM file. -/*! - * @param fp File pointer - * @param h Header pointer - * @return 0 on success, -1 on failure - * - * This function only works with BAM files. Use sam_hdr_write() to - * write in any of the SAM, BAM or CRAM formats. - */ -HTSLIB_EXPORT -int bam_hdr_write(BGZF *fp, const sam_hdr_t *h) HTS_RESULT_USED; - -/*! - * Frees the resources associated with a header. - */ -HTSLIB_EXPORT -void sam_hdr_destroy(sam_hdr_t *h); - -/// Duplicate a header structure. -/*! - * @return A valid pointer to new header on success, NULL on failure - * - * The sam_hdr_t struct returned by a successful call should be freed - * via sam_hdr_destroy() when it is no longer needed. - */ -HTSLIB_EXPORT -sam_hdr_t *sam_hdr_dup(const sam_hdr_t *h0); - -/*! - * @abstract Old names for compatibility with existing code. - */ -static inline sam_hdr_t *bam_hdr_init(void) { return sam_hdr_init(); } -static inline void bam_hdr_destroy(sam_hdr_t *h) { sam_hdr_destroy(h); } -static inline sam_hdr_t *bam_hdr_dup(const sam_hdr_t *h0) { return sam_hdr_dup(h0); } - -typedef htsFile samFile; - -/// Create a header from existing text. -/*! - * @param l_text Length of text - * @param text Header text - * @return A populated sam_hdr_t structure on success; NULL on failure. - * @note The text field of the returned header will be NULL, and the l_text - * field will be zero. - * - * The sam_hdr_t struct returned by a successful call should be freed - * via sam_hdr_destroy() when it is no longer needed. - */ -HTSLIB_EXPORT -sam_hdr_t *sam_hdr_parse(size_t l_text, const char *text); - -/// Read a header from a SAM, BAM or CRAM file. -/*! - * @param fp Pointer to a SAM, BAM or CRAM file handle - * @return A populated sam_hdr_t struct on success; NULL on failure. - * - * The sam_hdr_t struct returned by a successful call should be freed - * via sam_hdr_destroy() when it is no longer needed. - */ -HTSLIB_EXPORT -sam_hdr_t *sam_hdr_read(samFile *fp); - -/// Write a header to a SAM, BAM or CRAM file. -/*! - * @param fp SAM, BAM or CRAM file header - * @param h Header structure to write - * @return 0 on success; -1 on failure - */ -HTSLIB_EXPORT -int sam_hdr_write(samFile *fp, const sam_hdr_t *h) HTS_RESULT_USED; - -/// Returns the current length of the header text. -/*! - * @return >= 0 on success, SIZE_MAX on failure - */ -HTSLIB_EXPORT -size_t sam_hdr_length(sam_hdr_t *h); - -/// Returns the text representation of the header. -/*! - * @return valid char pointer on success, NULL on failure - * - * The returned string is part of the header structure. It will remain - * valid until a call to a header API function causes the string to be - * invalidated, or the header is destroyed. - * - * The caller should not attempt to free or realloc this pointer. - */ -HTSLIB_EXPORT -const char *sam_hdr_str(sam_hdr_t *h); - -/// Returns the number of references in the header. -/*! - * @return >= 0 on success, -1 on failure - */ -HTSLIB_EXPORT -int sam_hdr_nref(const sam_hdr_t *h); - -/* ==== Line level methods ==== */ - -/// Add formatted lines to an existing header. -/*! - * @param lines Full SAM header record, eg "@SQ\tSN:foo\tLN:100", with - * optional new-line. If it contains more than 1 line then - * multiple lines will be added in order - * @param len The maximum length of lines (if an early NUL is not - * encountered). len may be 0 if unknown, in which case - * lines must be NUL-terminated - * @return 0 on success, -1 on failure - * - * The lines will be appended to the end of the existing header - * (apart from HD, which always comes first). - */ -HTSLIB_EXPORT -int sam_hdr_add_lines(sam_hdr_t *h, const char *lines, size_t len); - -/// Adds a single line to an existing header. -/*! - * Specify type and one or more key,value pairs, ending with the NULL key. - * Eg. sam_hdr_add_line(h, "SQ", "SN", "foo", "LN", "100", NULL). - * - * @param type Type of the added line. Eg. "SQ" - * @return 0 on success, -1 on failure - * - * The new line will be added immediately after any others of the same - * type, or at the end of the existing header if no lines of the - * given type currently exist. The exception is HD lines, which always - * come first. If an HD line already exists, it will be replaced. - */ -HTSLIB_EXPORT -int sam_hdr_add_line(sam_hdr_t *h, const char *type, ...); - -/// Returns a complete line of formatted text for a given type and ID. -/*! - * @param type Type of the searched line. Eg. "SQ" - * @param ID_key Tag key defining the line. Eg. "SN" - * @param ID_value Tag value associated with the key above. Eg. "ref1" - * @param ks kstring to hold the result - * @return 0 on success; - * -1 if no matching line is found - * -2 on other failures - * - * Puts a complete line of formatted text for a specific header type/ID - * combination into @p ks. If ID_key is NULL then it returns the first line of - * the specified type. - * - * Any existing content in @p ks will be overwritten. - */ -HTSLIB_EXPORT -int sam_hdr_find_line_id(sam_hdr_t *h, const char *type, - const char *ID_key, const char *ID_val, kstring_t *ks); - -/// Returns a complete line of formatted text for a given type and index. -/*! - * @param type Type of the searched line. Eg. "SQ" - * @param position Index in lines of this type (zero-based) - * @param ks kstring to hold the result - * @return 0 on success; - * -1 if no matching line is found - * -2 on other failures - * - * Puts a complete line of formatted text for a specific line into @p ks. - * The header line is selected using the @p type and @p position parameters. - * - * Any existing content in @p ks will be overwritten. - */ -HTSLIB_EXPORT -int sam_hdr_find_line_pos(sam_hdr_t *h, const char *type, - int pos, kstring_t *ks); - -/// Remove a line with given type / id from a header -/*! - * @param type Type of the searched line. Eg. "SQ" - * @param ID_key Tag key defining the line. Eg. "SN" - * @param ID_value Tag value associated with the key above. Eg. "ref1" - * @return 0 on success, -1 on error - * - * Remove a line from the header by specifying a tag:value that uniquely - * identifies the line, i.e. the @SQ line containing "SN:ref1". - * - * \@SQ line is uniquely identified by the SN tag. - * \@RG line is uniquely identified by the ID tag. - * \@PG line is uniquely identified by the ID tag. - * Eg. sam_hdr_remove_line_id(h, "SQ", "SN", "ref1") - * - * If no key:value pair is specified, the type MUST be followed by a NULL argument and - * the first line of the type will be removed, if any. - * Eg. sam_hdr_remove_line_id(h, "SQ", NULL, NULL) - * - * @note Removing \@PG lines is currently unsupported. - */ -HTSLIB_EXPORT -int sam_hdr_remove_line_id(sam_hdr_t *h, const char *type, const char *ID_key, const char *ID_value); - -/// Remove nth line of a given type from a header -/*! - * @param type Type of the searched line. Eg. "SQ" - * @param position Index in lines of this type (zero-based). E.g. 3 - * @return 0 on success, -1 on error - * - * Remove a line from the header by specifying the position in the type - * group, i.e. 3rd @SQ line. - */ -HTSLIB_EXPORT -int sam_hdr_remove_line_pos(sam_hdr_t *h, const char *type, int position); - -/// Add or update tag key,value pairs in a header line. -/*! - * @param type Type of the searched line. Eg. "SQ" - * @param ID_key Tag key defining the line. Eg. "SN" - * @param ID_value Tag value associated with the key above. Eg. "ref1" - * @return 0 on success, -1 on error - * - * Adds or updates tag key,value pairs in a header line. - * Eg. for adding M5 tags to @SQ lines or updating sort order for the - * @HD line. - * - * Specify multiple key,value pairs ending in NULL. Eg. - * sam_hdr_update_line(h, "RG", "ID", "rg1", "DS", "description", "PG", "samtools", NULL) - * - * Attempting to update the record name (i.e. @SQ SN or @RG ID) will - * work as long as the new name is not already in use, however doing this - * on a file opened for reading may produce unexpected results. - * - * Renaming an @RG record in this way will only change the header. Alignment - * records written later will not be updated automatically even if they - * reference the old read group name. - * - * Attempting to change an @PG ID tag is not permitted. - */ -HTSLIB_EXPORT -int sam_hdr_update_line(sam_hdr_t *h, const char *type, - const char *ID_key, const char *ID_value, ...); - -/// Remove all lines of a given type from a header, except the one matching an ID -/*! - * @param type Type of the searched line. Eg. "SQ" - * @param ID_key Tag key defining the line. Eg. "SN" - * @param ID_value Tag value associated with the key above. Eg. "ref1" - * @return 0 on success, -1 on failure - * - * Remove all lines of type from the header, except the one - * specified by tag:value, i.e. the @SQ line containing "SN:ref1". - * - * If no line matches the key:value ID, all lines of the given type are removed. - * To remove all lines of a given type, use NULL for both ID_key and ID_value. - */ -HTSLIB_EXPORT -int sam_hdr_remove_except(sam_hdr_t *h, const char *type, const char *ID_key, const char *ID_value); - -/// Remove header lines of a given type, except those in a given ID set -/*! - * @param type Type of the searched line. Eg. "RG" - * @param id Tag key defining the line. Eg. "ID" - * @param rh Hash set initialised by the caller with the values to be kept. - * See description for how to create this. If @p rh is NULL, all - * lines of this type will be removed. - * @return 0 on success, -1 on failure - * - * Remove all lines of type @p type from the header, except the ones - * specified in the hash set @p rh. If @p rh is NULL, all lines of - * this type will be removed. - * Declaration of @p rh is done using KHASH_SET_INIT_STR macro. Eg. - * @code{.c} - * #include "htslib/khash.h" - * KHASH_SET_INIT_STR(keep) - * typedef khash_t(keep) *keephash_t; - * - * void your_method() { - * samFile *sf = sam_open("alignment.bam", "r"); - * sam_hdr_t *h = sam_hdr_read(sf); - * keephash_t rh = kh_init(keep); - * int ret = 0; - * kh_put(keep, rh, strdup("chr2"), &ret); - * kh_put(keep, rh, strdup("chr3"), &ret); - * if (sam_hdr_remove_lines(h, "SQ", "SN", rh) == -1) - * fprintf(stderr, "Error removing lines\n"); - * khint_t k; - * for (k = 0; k < kh_end(rh); ++k) - * if (kh_exist(rh, k)) free((char*)kh_key(rh, k)); - * kh_destroy(keep, rh); - * sam_hdr_destroy(h); - * sam_close(sf); - * } - * @endcode - * - */ -HTSLIB_EXPORT -int sam_hdr_remove_lines(sam_hdr_t *h, const char *type, const char *id, void *rh); - -/// Count the number of lines for a given header type -/*! - * @param h BAM header - * @param type Header type to count. Eg. "RG" - * @return Number of lines of this type on success; -1 on failure - */ -HTSLIB_EXPORT -int sam_hdr_count_lines(sam_hdr_t *h, const char *type); - -/// Index of the line for the types that have dedicated look-up tables (SQ, RG, PG) -/*! - * @param h BAM header - * @param type Type of the searched line. Eg. "RG" - * @param key The value of the identifying key. Eg. "rg1" - * @return 0-based index on success; -1 if line does not exist; -2 on failure - */ -HTSLIB_EXPORT -int sam_hdr_line_index(sam_hdr_t *bh, const char *type, const char *key); - -/// Id key of the line for the types that have dedicated look-up tables (SQ, RG, PG) -/*! - * @param h BAM header - * @param type Type of the searched line. Eg. "RG" - * @param pos Zero-based index inside the type group. Eg. 2 (for the third RG line) - * @return Valid key string on success; NULL on failure - */ -HTSLIB_EXPORT -const char *sam_hdr_line_name(sam_hdr_t *bh, const char *type, int pos); - -/* ==== Key:val level methods ==== */ - -/// Return the value associated with a key for a header line identified by ID_key:ID_val -/*! - * @param type Type of the line to which the tag belongs. Eg. "SQ" - * @param ID_key Tag key defining the line. Eg. "SN". Can be NULL, if looking for the first line. - * @param ID_value Tag value associated with the key above. Eg. "ref1". Can be NULL, if ID_key is NULL. - * @param key Key of the searched tag. Eg. "LN" - * @param ks kstring where the value will be written - * @return 0 on success - * -1 if the requested tag does not exist - * -2 on other errors - * - * Looks for a specific key in a single SAM header line and writes the - * associated value into @p ks. The header line is selected using the ID_key - * and ID_value parameters. Any pre-existing content in @p ks will be - * overwritten. - */ -HTSLIB_EXPORT -int sam_hdr_find_tag_id(sam_hdr_t *h, const char *type, const char *ID_key, const char *ID_value, const char *key, kstring_t *ks); - -/// Return the value associated with a key for a header line identified by position -/*! - * @param type Type of the line to which the tag belongs. Eg. "SQ" - * @param position Index in lines of this type (zero-based). E.g. 3 - * @param key Key of the searched tag. Eg. "LN" - * @param ks kstring where the value will be written - * @return 0 on success - * -1 if the requested tag does not exist - * -2 on other errors - * - * Looks for a specific key in a single SAM header line and writes the - * associated value into @p ks. The header line is selected using the @p type - * and @p position parameters. Any pre-existing content in @p ks will be - * overwritten. - */ -HTSLIB_EXPORT -int sam_hdr_find_tag_pos(sam_hdr_t *h, const char *type, int pos, const char *key, kstring_t *ks); - -/// Remove the key from the line identified by type, ID_key and ID_value. -/*! - * @param type Type of the line to which the tag belongs. Eg. "SQ" - * @param ID_key Tag key defining the line. Eg. "SN" - * @param ID_value Tag value associated with the key above. Eg. "ref1" - * @param key Key of the targeted tag. Eg. "M5" - * @return 1 if the key was removed; 0 if it was not present; -1 on error - */ -HTSLIB_EXPORT -int sam_hdr_remove_tag_id(sam_hdr_t *h, const char *type, const char *ID_key, const char *ID_value, const char *key); - -/// Get the target id for a given reference sequence name -/*! - * @param ref Reference name - * @return Positive value on success, - * -1 if unknown reference, - * -2 if the header could not be parsed - * - * Looks up a reference sequence by name in the reference hash table - * and returns the numerical target id. - */ -HTSLIB_EXPORT -int sam_hdr_name2tid(sam_hdr_t *h, const char *ref); - -/// Get the reference sequence name from a target index -/*! - * @param tid Target index - * @return Valid reference name on success, NULL on failure - * - * Fetch the reference sequence name from the target name array, - * using the numerical target id. - */ -HTSLIB_EXPORT -const char *sam_hdr_tid2name(const sam_hdr_t *h, int tid); - -/// Get the reference sequence length from a target index -/*! - * @param tid Target index - * @return Strictly positive value on success, 0 on failure - * - * Fetch the reference sequence length from the target length array, - * using the numerical target id. - */ -HTSLIB_EXPORT -hts_pos_t sam_hdr_tid2len(const sam_hdr_t *h, int tid); - -/// Alias of sam_hdr_name2tid(), for backwards compatibility. -/*! - * @param ref Reference name - * @return Positive value on success, - * -1 if unknown reference, - * -2 if the header could not be parsed - */ -static inline int bam_name2id(sam_hdr_t *h, const char *ref) { return sam_hdr_name2tid(h, ref); } - -/// Generate a unique \@PG ID: value -/*! - * @param name Name of the program. Eg. samtools - * @return Valid ID on success, NULL on failure - * - * Returns a unique ID from a base name. The string returned will remain - * valid until the next call to this function, or the header is destroyed. - * The caller should not attempt to free() or realloc() it. - */ -HTSLIB_EXPORT -const char *sam_hdr_pg_id(sam_hdr_t *h, const char *name); - -/// Add an \@PG line. -/*! - * @param name Name of the program. Eg. samtools - * @return 0 on success, -1 on failure - * - * If we wish complete control over this use sam_hdr_add_line() directly. This - * function uses that, but attempts to do a lot of tedious house work for - * you too. - * - * - It will generate a suitable ID if the supplied one clashes. - * - It will generate multiple \@PG records if we have multiple PG chains. - * - * Call it as per sam_hdr_add_line() with a series of key,value pairs ending - * in NULL. - */ -HTSLIB_EXPORT -int sam_hdr_add_pg(sam_hdr_t *h, const char *name, ...); - -/*! - * A function to help with construction of CL tags in @PG records. - * Takes an argc, argv pair and returns a single space-separated string. - * This string should be deallocated by the calling function. - * - * @return - * Returns malloced char * on success; - * NULL on failure - */ -HTSLIB_EXPORT -char *stringify_argv(int argc, char *argv[]); - -/// Increments the reference count on a header -/*! - * This permits multiple files to share the same header, all calling - * sam_hdr_destroy when done, without causing errors for other open files. - */ -HTSLIB_EXPORT -void sam_hdr_incr_ref(sam_hdr_t *h); - -/* - * Macros for changing the \@HD line. They eliminate the need to use NULL method arguments. - */ - -/// Returns the SAM formatted text of the \@HD header line -#define sam_hdr_find_hd(h, ks) sam_hdr_find_line_id((h), "HD", NULL, NULL, (ks)) -/// Returns the value associated with a given \@HD line tag -#define sam_hdr_find_tag_hd(h, key, ks) sam_hdr_find_tag_id((h), "HD", NULL, NULL, (key), (ks)) -/// Adds or updates tags on the header \@HD line -#define sam_hdr_update_hd(h, ...) sam_hdr_update_line((h), "HD", NULL, NULL, __VA_ARGS__, NULL) -/// Removes the \@HD line tag with the given key -#define sam_hdr_remove_tag_hd(h, key) sam_hdr_remove_tag_id((h), "HD", NULL, NULL, (key)) - -/* Alignment */ - -/// Create a new bam1_t alignment structure -/** - @return An empty bam1_t structure on success, NULL on failure - - The bam1_t struct returned by a successful call should be freed - via bam_destroy1() when it is no longer needed. - */ -HTSLIB_EXPORT -bam1_t *bam_init1(void); - -/// Destroy a bam1_t structure -/** - @param b structure to destroy - - Does nothing if @p b is NULL. If not, all memory associated with @p b - will be freed, along with the structure itself. @p b should not be - accessed after calling this function. - */ -HTSLIB_EXPORT -void bam_destroy1(bam1_t *b); - -#define BAM_USER_OWNS_STRUCT 1 -#define BAM_USER_OWNS_DATA 2 - -/// Set alignment record memory policy -/** - @param b Alignment record - @param policy Desired policy - - Allows the way HTSlib reallocates and frees bam1_t data to be - changed. @policy can be set to the bitwise-or of the following - values: - - \li \c BAM_USER_OWNS_STRUCT - If this is set then bam_destroy1() will not try to free the bam1_t struct. - - \li \c BAM_USER_OWNS_DATA - If this is set, bam_destroy1() will not free the bam1_t::data pointer. - Also, functions which need to expand bam1_t::data memory will change - behaviour. Instead of calling realloc() on the pointer, they will - allocate a new data buffer and copy any existing content in to it. - The existing memory will \b not be freed. bam1_t::data will be - set to point to the new memory and the BAM_USER_OWNS_DATA flag will be - cleared. - - BAM_USER_OWNS_STRUCT allows bam_destroy1() to be called on bam1_t - structures that are members of an array. - - BAM_USER_OWNS_DATA can be used by applications that want more control - over where the variable-length parts of the bam record will be stored. - By preventing calls to free() and realloc(), it allows bam1_t::data - to hold pointers to memory that cannot be passed to those functions. - - Example: Read a block of alignment records, storing the variable-length - data in a single buffer and the records in an array. Stop when either - the array or the buffer is full. - - \code{.c} - #define MAX_RECS 1000 - #define REC_LENGTH 400 // Average length estimate, to get buffer size - size_t bufsz = MAX_RECS * REC_LENGTH, nrecs, buff_used = 0; - bam1_t *recs = calloc(MAX_RECS, sizeof(bam1_t)); - uint8_t *buffer = malloc(bufsz); - int res = 0, result = EXIT_FAILURE; - uint32_t new_m_data; - - if (!recs || !buffer) goto cleanup; - for (nrecs = 0; nrecs < MAX_RECS; nrecs++) { - bam_set_mempolicy(&recs[nrecs], BAM_USER_OWNS_STRUCT|BAM_USER_OWNS_DATA); - - // Set data pointer to unused part of buffer - recs[nrecs].data = &buffer[buff_used]; - - // Set m_data to size of unused part of buffer. On 64-bit platforms it - // will be necessary to limit this to UINT32_MAX due to the size of - // bam1_t::m_data (not done here as our buffer is only 400K). - recs[nrecs].m_data = bufsz - buff_used; - - // Read the record - res = sam_read1(file_handle, header, &recs[nrecs]); - if (res <= 0) break; // EOF or error - - // Check if the record data didn't fit - if not, stop reading - if ((bam_get_mempolicy(&recs[nrecs]) & BAM_USER_OWNS_DATA) == 0) { - nrecs++; // Include last record in count - break; - } - - // Adjust m_data to the space actually used. If space is available, - // round up to eight bytes so the next record aligns nicely. - new_m_data = ((uint32_t) recs[nrecs].l_data + 7) & (~7U); - if (new_m_data < recs[nrecs].m_data) recs[nrecs].m_data = new_m_data; - - buff_used += recs[nrecs].m_data; - } - if (res < 0) goto cleanup; - result = EXIT_SUCCESS; - - // ... use data ... - - cleanup: - if (recs) { - for (size_t i = 0; i < nrecs; i++) - bam_destroy1(&recs[i]); - free(recs); - } - free(buffer); - - \endcode -*/ -static inline void bam_set_mempolicy(bam1_t *b, uint32_t policy) { - b->mempolicy = policy; -} - -/// Get alignment record memory policy -/** @param b Alignment record - - See bam_set_mempolicy() - */ -static inline uint32_t bam_get_mempolicy(bam1_t *b) { - return b->mempolicy; -} - -/// Read a BAM format alignment record -/** - @param fp BGZF file being read - @param b Destination for the alignment data - @return number of bytes read on success - -1 at end of file - < -1 on failure - - This function can only read BAM format files. Most code should use - sam_read1() instead, which can be used with BAM, SAM and CRAM formats. -*/ -HTSLIB_EXPORT -int bam_read1(BGZF *fp, bam1_t *b) HTS_RESULT_USED; - -/// Write a BAM format alignment record -/** - @param fp BGZF file being written - @param b Alignment record to write - @return number of bytes written on success - -1 on error - - This function can only write BAM format files. Most code should use - sam_write1() instead, which can be used with BAM, SAM and CRAM formats. -*/ -HTSLIB_EXPORT -int bam_write1(BGZF *fp, const bam1_t *b) HTS_RESULT_USED; - -/// Copy alignment record data -/** - @param bdst Destination alignment record - @param bsrc Source alignment record - @return bdst on success; NULL on failure - */ -HTSLIB_EXPORT -bam1_t *bam_copy1(bam1_t *bdst, const bam1_t *bsrc) HTS_RESULT_USED; - -/// Create a duplicate alignment record -/** - @param bsrc Source alignment record - @return Pointer to a new alignment record on success; NULL on failure - - The bam1_t struct returned by a successful call should be freed - via bam_destroy1() when it is no longer needed. - */ -HTSLIB_EXPORT -bam1_t *bam_dup1(const bam1_t *bsrc); - -/// Sets all components of an alignment structure -/** - @param bam Target alignment structure. Must be initialized by a call to bam_init1(). - The data field will be reallocated automatically as needed. - @param l_qname Length of the query name. If set to 0, the placeholder query name "*" will be used. - @param qname Query name, may be NULL if l_qname = 0 - @param flag Bitwise flag, a combination of the BAM_F* constants. - @param tid Chromosome ID, defined by sam_hdr_t (a.k.a. RNAME). - @param pos 0-based leftmost coordinate. - @param mapq Mapping quality. - @param n_cigar Number of CIGAR operations. - @param cigar CIGAR data, may be NULL if n_cigar = 0. - @param mtid Chromosome ID of next read in template, defined by sam_hdr_t (a.k.a. RNEXT). - @param mpos 0-based leftmost coordinate of next read in template (a.k.a. PNEXT). - @param isize Observed template length ("insert size") (a.k.a. TLEN). - @param l_seq Length of the query sequence (read) and sequence quality string. - @param seq Sequence, may be NULL if l_seq = 0. - @param qual Sequence quality, may be NULL. - @param l_aux Length to be reserved for auxiliary field data, may be 0. - - @return >= 0 on success (number of bytes written to bam->data), negative (with errno set) on failure. -*/ -HTSLIB_EXPORT -int bam_set1(bam1_t *bam, - size_t l_qname, const char *qname, - uint16_t flag, int32_t tid, hts_pos_t pos, uint8_t mapq, - size_t n_cigar, const uint32_t *cigar, - int32_t mtid, hts_pos_t mpos, hts_pos_t isize, - size_t l_seq, const char *seq, const char *qual, - size_t l_aux); - -/// Calculate query length from CIGAR data -/** - @param n_cigar Number of items in @p cigar - @param cigar CIGAR data - @return Query length - - CIGAR data is stored as in the BAM format, i.e. (op_len << 4) | op - where op_len is the length in bases and op is a value between 0 and 8 - representing one of the operations "MIDNSHP=X" (M = 0; X = 8) - - This function returns the sum of the lengths of the M, I, S, = and X - operations in @p cigar (these are the operations that "consume" query - bases). All other operations (including invalid ones) are ignored. - - @note This return type of this function is hts_pos_t so that it can - correctly return the length of CIGAR sequences including many long - operations without overflow. However, other restrictions (notably the sizes - of bam1_core_t::l_qseq and bam1_t::data) limit the maximum query sequence - length supported by HTSlib to fewer than INT_MAX bases. - */ -HTSLIB_EXPORT -hts_pos_t bam_cigar2qlen(int n_cigar, const uint32_t *cigar); - -/// Calculate reference length from CIGAR data -/** - @param n_cigar Number of items in @p cigar - @param cigar CIGAR data - @return Reference length - - CIGAR data is stored as in the BAM format, i.e. (op_len << 4) | op - where op_len is the length in bases and op is a value between 0 and 8 - representing one of the operations "MIDNSHP=X" (M = 0; X = 8) - - This function returns the sum of the lengths of the M, D, N, = and X - operations in @p cigar (these are the operations that "consume" reference - bases). All other operations (including invalid ones) are ignored. - */ -HTSLIB_EXPORT -hts_pos_t bam_cigar2rlen(int n_cigar, const uint32_t *cigar); - -/*! - @abstract Calculate the rightmost base position of an alignment on the - reference genome. - - @param b pointer to an alignment - @return the coordinate of the first base after the alignment, 0-based - - @discussion For a mapped read, this is just b->core.pos + bam_cigar2rlen. - For an unmapped read (either according to its flags or if it has no cigar - string) or a read whose cigar string consumes no reference bases at all, - we return b->core.pos + 1 by convention. - */ -HTSLIB_EXPORT -hts_pos_t bam_endpos(const bam1_t *b); - -HTSLIB_EXPORT -int bam_str2flag(const char *str); /** returns negative value on error */ - -HTSLIB_EXPORT -char *bam_flag2str(int flag); /** The string must be freed by the user */ - -/*! @function - @abstract Set the name of the query - @param b pointer to an alignment - @return 0 on success, -1 on failure - */ -HTSLIB_EXPORT -int bam_set_qname(bam1_t *b, const char *qname); - -/*! @function - @abstract Parse a CIGAR string into a uint32_t array - @param in [in] pointer to the source string - @param end [out] address of the pointer to the new end of the input string - can be NULL - @param a_cigar [in/out] address of the destination uint32_t buffer - @param a_mem [in/out] address of the allocated number of buffer elements - @return number of processed CIGAR operators; -1 on error - */ -HTSLIB_EXPORT -ssize_t sam_parse_cigar(const char *in, char **end, uint32_t **a_cigar, size_t *a_mem); - -/*! @function - @abstract Parse a CIGAR string into a bam1_t struct - @param in [in] pointer to the source string - @param end [out] address of the pointer to the new end of the input string - can be NULL - @param b [in/out] address of the destination bam1_t struct - @return number of processed CIGAR operators; -1 on error - */ -HTSLIB_EXPORT -ssize_t bam_parse_cigar(const char *in, char **end, bam1_t *b); - -/************************* - *** BAM/CRAM indexing *** - *************************/ - -// These BAM iterator functions work only on BAM files. To work with either -// BAM or CRAM files use the sam_index_load() & sam_itr_*() functions. -#define bam_itr_destroy(iter) hts_itr_destroy(iter) -#define bam_itr_queryi(idx, tid, beg, end) sam_itr_queryi(idx, tid, beg, end) -#define bam_itr_querys(idx, hdr, region) sam_itr_querys(idx, hdr, region) -#define bam_itr_next(htsfp, itr, r) sam_itr_next((htsfp), (itr), (r)) - -// Load/build .csi or .bai BAM index file. Does not work with CRAM. -// It is recommended to use the sam_index_* functions below instead. -#define bam_index_load(fn) hts_idx_load((fn), HTS_FMT_BAI) -#define bam_index_build(fn, min_shift) (sam_index_build((fn), (min_shift))) - -/// Initialise fp->idx for the current format type for SAM, BAM and CRAM types . -/** @param fp File handle for the data file being written. - @param h Bam header structured (needed for BAI and CSI). - @param min_shift 0 for BAI, or larger for CSI (CSI defaults to 14). - @param fnidx Filename to write index to. This pointer must remain valid - until after sam_idx_save is called. - @return 0 on success, <0 on failure. - - @note This must be called after the header has been written, but before - any other data. -*/ -HTSLIB_EXPORT -int sam_idx_init(htsFile *fp, sam_hdr_t *h, int min_shift, const char *fnidx); - -/// Writes the index initialised with sam_idx_init to disk. -/** @param fp File handle for the data file being written. - @return 0 on success, <0 on failure. -*/ -HTSLIB_EXPORT -int sam_idx_save(htsFile *fp) HTS_RESULT_USED; - -/// Load a BAM (.csi or .bai) or CRAM (.crai) index file -/** @param fp File handle of the data file whose index is being opened - @param fn BAM/CRAM/etc filename to search alongside for the index file - @return The index, or NULL if an error occurred. - -Equivalent to sam_index_load3(fp, fn, NULL, HTS_IDX_SAVE_REMOTE); -*/ -HTSLIB_EXPORT -hts_idx_t *sam_index_load(htsFile *fp, const char *fn); - -/// Load a specific BAM (.csi or .bai) or CRAM (.crai) index file -/** @param fp File handle of the data file whose index is being opened - @param fn BAM/CRAM/etc data file filename - @param fnidx Index filename, or NULL to search alongside @a fn - @return The index, or NULL if an error occurred. - -Equivalent to sam_index_load3(fp, fn, fnidx, HTS_IDX_SAVE_REMOTE); -*/ -HTSLIB_EXPORT -hts_idx_t *sam_index_load2(htsFile *fp, const char *fn, const char *fnidx); - -/// Load or stream a BAM (.csi or .bai) or CRAM (.crai) index file -/** @param fp File handle of the data file whose index is being opened - @param fn BAM/CRAM/etc data file filename - @param fnidx Index filename, or NULL to search alongside @a fn - @param flags Flags to alter behaviour (see description) - @return The index, or NULL if an error occurred. - -The @p flags parameter can be set to a combination of the following values: - - HTS_IDX_SAVE_REMOTE Save a local copy of any remote indexes - HTS_IDX_SILENT_FAIL Fail silently if the index is not present - -Note that HTS_IDX_SAVE_REMOTE has no effect for remote CRAM indexes. They -are always downloaded and never cached locally. - -The index struct returned by a successful call should be freed -via hts_idx_destroy() when it is no longer needed. -*/ -HTSLIB_EXPORT -hts_idx_t *sam_index_load3(htsFile *fp, const char *fn, const char *fnidx, int flags); - -/// Generate and save an index file -/** @param fn Input BAM/etc filename, to which .csi/etc will be added - @param min_shift Positive to generate CSI, or 0 to generate BAI - @return 0 if successful, or negative if an error occurred (usually -1; or - -2: opening fn failed; -3: format not indexable; -4: - failed to create and/or save the index) -*/ -HTSLIB_EXPORT -int sam_index_build(const char *fn, int min_shift) HTS_RESULT_USED; - -/// Generate and save an index to a specific file -/** @param fn Input BAM/CRAM/etc filename - @param fnidx Output filename, or NULL to add .bai/.csi/etc to @a fn - @param min_shift Positive to generate CSI, or 0 to generate BAI - @return 0 if successful, or negative if an error occurred (see - sam_index_build for error codes) -*/ -HTSLIB_EXPORT -int sam_index_build2(const char *fn, const char *fnidx, int min_shift) HTS_RESULT_USED; - -/// Generate and save an index to a specific file -/** @param fn Input BAM/CRAM/etc filename - @param fnidx Output filename, or NULL to add .bai/.csi/etc to @a fn - @param min_shift Positive to generate CSI, or 0 to generate BAI - @param nthreads Number of threads to use when building the index - @return 0 if successful, or negative if an error occurred (see - sam_index_build for error codes) -*/ -HTSLIB_EXPORT -int sam_index_build3(const char *fn, const char *fnidx, int min_shift, int nthreads) HTS_RESULT_USED; - -/// Free a SAM iterator -/// @param iter Iterator to free -#define sam_itr_destroy(iter) hts_itr_destroy(iter) - -/// Create a BAM/CRAM iterator -/** @param idx Index - @param tid Target id - @param beg Start position in target - @param end End position in target - @return An iterator on success; NULL on failure - -The following special values (defined in htslib/hts.h)can be used for @p tid. -When using one of these values, @p beg and @p end are ignored. - - HTS_IDX_NOCOOR iterates over unmapped reads sorted at the end of the file - HTS_IDX_START iterates over the entire file - HTS_IDX_REST iterates from the current position to the end of the file - HTS_IDX_NONE always returns "no more alignment records" - -When using HTS_IDX_REST or HTS_IDX_NONE, NULL can be passed in to @p idx. - */ -HTSLIB_EXPORT -hts_itr_t *sam_itr_queryi(const hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end); - -/// Create a SAM/BAM/CRAM iterator -/** @param idx Index - @param hdr Header - @param region Region specification - @return An iterator on success; NULL on failure - -Regions are parsed by hts_parse_reg(), and take one of the following forms: - -region | Outputs ---------------- | ------------- -REF | All reads with RNAME REF -REF: | All reads with RNAME REF -REF:START | Reads with RNAME REF overlapping START to end of REF -REF:-END | Reads with RNAME REF overlapping start of REF to END -REF:START-END | Reads with RNAME REF overlapping START to END -. | All reads from the start of the file -* | Unmapped reads at the end of the file (RNAME '*' in SAM) - -The form `REF:` should be used when the reference name itself contains a colon. - -Note that SAM files must be bgzf-compressed for iterators to work. - */ -HTSLIB_EXPORT -hts_itr_t *sam_itr_querys(const hts_idx_t *idx, sam_hdr_t *hdr, const char *region); - -/// Create a multi-region iterator -/** @param idx Index - @param hdr Header - @param reglist Array of regions to iterate over - @param regcount Number of items in reglist - -Each @p reglist entry should have the reference name in the `reg` field, an -array of regions for that reference in `intervals` and the number of items -in `intervals` should be stored in `count`. No other fields need to be filled -in. - -The iterator will return all reads overlapping the given regions. If a read -overlaps more than one region, it will only be returned once. - */ -HTSLIB_EXPORT -hts_itr_t *sam_itr_regions(const hts_idx_t *idx, sam_hdr_t *hdr, hts_reglist_t *reglist, unsigned int regcount); - -/// Create a multi-region iterator -/** @param idx Index - @param hdr Header - @param regarray Array of ref:interval region specifiers - @param regcount Number of items in regarray - -Each @p regarray entry is parsed by hts_parse_reg(), and takes one of the -following forms: - -region | Outputs ---------------- | ------------- -REF | All reads with RNAME REF -REF: | All reads with RNAME REF -REF:START | Reads with RNAME REF overlapping START to end of REF -REF:-END | Reads with RNAME REF overlapping start of REF to END -REF:START-END | Reads with RNAME REF overlapping START to END -. | All reads from the start of the file -* | Unmapped reads at the end of the file (RNAME '*' in SAM) - -The form `REF:` should be used when the reference name itself contains a colon. - -The iterator will return all reads overlapping the given regions. If a read -overlaps more than one region, it will only be returned once. - */ -HTSLIB_EXPORT -hts_itr_t *sam_itr_regarray(const hts_idx_t *idx, sam_hdr_t *hdr, char **regarray, unsigned int regcount); - -/// Get the next read from a SAM/BAM/CRAM iterator -/** @param htsfp Htsfile pointer for the input file - @param itr Iterator - @param r Pointer to a bam1_t struct - @return >= 0 on success; -1 when there is no more data; < -1 on error - */ -static inline int sam_itr_next(htsFile *htsfp, hts_itr_t *itr, bam1_t *r) { - if (!htsfp->is_bgzf && !htsfp->is_cram) { - hts_log_error("%s not BGZF compressed", htsfp->fn ? htsfp->fn : "File"); - return -2; - } - if (!itr) { - hts_log_error("Null iterator"); - return -2; - } - - if (itr->multi) - return hts_itr_multi_next(htsfp, itr, r); - else - return hts_itr_next(htsfp->is_bgzf ? htsfp->fp.bgzf : NULL, itr, r, htsfp); -} - -/// Get the next read from a BAM/CRAM multi-iterator -/** @param htsfp Htsfile pointer for the input file - @param itr Iterator - @param r Pointer to a bam1_t struct - @return >= 0 on success; -1 when there is no more data; < -1 on error - */ -#define sam_itr_multi_next(htsfp, itr, r) sam_itr_next(htsfp, itr, r) - -HTSLIB_EXPORT -const char *sam_parse_region(sam_hdr_t *h, const char *s, int *tid, - hts_pos_t *beg, hts_pos_t *end, int flags); - - /*************** - *** SAM I/O *** - ***************/ - - #define sam_open(fn, mode) (hts_open((fn), (mode))) - #define sam_open_format(fn, mode, fmt) (hts_open_format((fn), (mode), (fmt))) - #define sam_flush(fp) hts_flush((fp)) - #define sam_close(fp) hts_close(fp) - - HTSLIB_EXPORT - int sam_open_mode(char *mode, const char *fn, const char *format); - - // A version of sam_open_mode that can handle ,key=value options. - // The format string is allocated and returned, to be freed by the caller. - // Prefix should be "r" or "w", - HTSLIB_EXPORT - char *sam_open_mode_opts(const char *fn, - const char *mode, - const char *format); - - HTSLIB_EXPORT - int sam_hdr_change_HD(sam_hdr_t *h, const char *key, const char *val); - - HTSLIB_EXPORT - int sam_parse1(kstring_t *s, sam_hdr_t *h, bam1_t *b) HTS_RESULT_USED; - HTSLIB_EXPORT - int sam_format1(const sam_hdr_t *h, const bam1_t *b, kstring_t *str) HTS_RESULT_USED; - -/// sam_read1 - Read a record from a file -/** @param fp Pointer to the source file - * @param h Pointer to the header previously read (fully or partially) - * @param b Pointer to the record placeholder - * @return >= 0 on successfully reading a new record, -1 on end of stream, < -1 on error - */ - HTSLIB_EXPORT - int sam_read1(samFile *fp, sam_hdr_t *h, bam1_t *b) HTS_RESULT_USED; -/// sam_write1 - Write a record to a file -/** @param fp Pointer to the destination file - * @param h Pointer to the header structure previously read - * @param b Pointer to the record to be written - * @return >= 0 on successfully writing the record, -ve on error - */ - HTSLIB_EXPORT - int sam_write1(samFile *fp, const sam_hdr_t *h, const bam1_t *b) HTS_RESULT_USED; - -// Forward declaration, see hts_expr.h for full. -struct hts_filter_t; - -/// sam_passes_filter - Checks whether a record passes an hts_filter. -/** @param h Pointer to the header structure previously read - * @param b Pointer to the BAM record to be checked - * @param filt Pointer to the filter, created from hts_filter_init. - * @return 1 if passes, 0 if not, and <0 on error. - */ -HTSLIB_EXPORT -int sam_passes_filter(const sam_hdr_t *h, const bam1_t *b, - struct hts_filter_t *filt); - - /************************************* - *** Manipulating auxiliary fields *** - *************************************/ - -/// Converts a BAM aux tag to SAM format -/* - * @param key Two letter tag key - * @param type Single letter type code: ACcSsIifHZB. - * @param tag Tag data pointer, in BAM format - * @param end Pointer to end of bam record (largest extent of tag) - * @param ks kstring to write the formatted tag to - * - * @return pointer to end of tag on success, - * NULL on failure. - * - * @discussion The three separate parameters key, type, tag may be - * derived from a s=bam_aux_get() query as s-2, *s and s+1. However - * it is recommended to use bam_aux_get_str in this situation. - * The desire to split these parameters up is for potential processing - * of non-BAM formats that encode using a BAM type mechanism - * (such as the internal CRAM representation). - */ -static inline const uint8_t *sam_format_aux1(const uint8_t *key, - const uint8_t type, - const uint8_t *tag, - const uint8_t *end, - kstring_t *ks) { - int r = 0; - const uint8_t *s = tag; // brevity and consistency with other code. - r |= kputsn_((char*)key, 2, ks) < 0; - r |= kputc_(':', ks) < 0; - if (type == 'C') { - r |= kputsn_("i:", 2, ks) < 0; - r |= kputw(*s, ks) < 0; - ++s; - } else if (type == 'c') { - r |= kputsn_("i:", 2, ks) < 0; - r |= kputw(*(int8_t*)s, ks) < 0; - ++s; - } else if (type == 'S') { - if (end - s >= 2) { - r |= kputsn_("i:", 2, ks) < 0; - r |= kputuw(le_to_u16(s), ks) < 0; - s += 2; - } else goto bad_aux; - } else if (type == 's') { - if (end - s >= 2) { - r |= kputsn_("i:", 2, ks) < 0; - r |= kputw(le_to_i16(s), ks) < 0; - s += 2; - } else goto bad_aux; - } else if (type == 'I') { - if (end - s >= 4) { - r |= kputsn_("i:", 2, ks) < 0; - r |= kputuw(le_to_u32(s), ks) < 0; - s += 4; - } else goto bad_aux; - } else if (type == 'i') { - if (end - s >= 4) { - r |= kputsn_("i:", 2, ks) < 0; - r |= kputw(le_to_i32(s), ks) < 0; - s += 4; - } else goto bad_aux; - } else if (type == 'A') { - r |= kputsn_("A:", 2, ks) < 0; - r |= kputc_(*s, ks) < 0; - ++s; - } else if (type == 'f') { - if (end - s >= 4) { - // cast to avoid triggering -Wdouble-promotion - ksprintf(ks, "f:%g", (double)le_to_float(s)); - s += 4; - } else goto bad_aux; - - } else if (type == 'd') { - // NB: "d" is not an official type in the SAM spec. - // However for unknown reasons samtools has always supported this. - // We believe, HOPE, it is not in general usage and we do not - // encourage it. - if (end - s >= 8) { - ksprintf(ks, "d:%g", le_to_double(s)); - s += 8; - } else goto bad_aux; - } else if (type == 'Z' || type == 'H') { - r |= kputc_(type, ks) < 0; - r |= kputc_(':', ks) < 0; - while (s < end && *s) r |= kputc_(*s++, ks) < 0; - r |= kputsn("", 0, ks) < 0; //ensures NUL termination - if (s >= end) - goto bad_aux; - ++s; - } else if (type == 'B') { - uint8_t sub_type = *(s++); - unsigned sub_type_size; - - // or externalise sam.c's aux_type2size function? - switch (sub_type) { - case 'A': case 'c': case 'C': - sub_type_size = 1; - break; - case 's': case 'S': - sub_type_size = 2; - break; - case 'i': case 'I': case 'f': - sub_type_size = 4; - break; - default: - sub_type_size = 0; - break; - } - - uint32_t i, n; - if (sub_type_size == 0 || end - s < 4) - goto bad_aux; - n = le_to_u32(s); - s += 4; // now points to the start of the array - if ((size_t)(end - s) / sub_type_size < n) - goto bad_aux; - r |= kputsn_("B:", 2, ks) < 0; - r |= kputc(sub_type, ks) < 0; // write the type - switch (sub_type) { - case 'c': - if (ks_expand(ks, n*2) < 0) goto mem_err; - for (i = 0; i < n; ++i) { - ks->s[ks->l++] = ','; - r |= kputw(*(int8_t*)s, ks) < 0; - ++s; - } - break; - case 'C': - if (ks_expand(ks, n*2) < 0) goto mem_err; - for (i = 0; i < n; ++i) { - ks->s[ks->l++] = ','; - r |= kputuw(*(uint8_t*)s, ks) < 0; - ++s; - } - break; - case 's': - if (ks_expand(ks, n*4) < 0) goto mem_err; - for (i = 0; i < n; ++i) { - ks->s[ks->l++] = ','; - r |= kputw(le_to_i16(s), ks) < 0; - s += 2; - } - break; - case 'S': - if (ks_expand(ks, n*4) < 0) goto mem_err; - for (i = 0; i < n; ++i) { - ks->s[ks->l++] = ','; - r |= kputuw(le_to_u16(s), ks) < 0; - s += 2; - } - break; - case 'i': - if (ks_expand(ks, n*6) < 0) goto mem_err; - for (i = 0; i < n; ++i) { - ks->s[ks->l++] = ','; - r |= kputw(le_to_i32(s), ks) < 0; - s += 4; - } - break; - case 'I': - if (ks_expand(ks, n*6) < 0) goto mem_err; - for (i = 0; i < n; ++i) { - ks->s[ks->l++] = ','; - r |= kputuw(le_to_u32(s), ks) < 0; - s += 4; - } - break; - case 'f': - if (ks_expand(ks, n*8) < 0) goto mem_err; - for (i = 0; i < n; ++i) { - ks->s[ks->l++] = ','; - // cast to avoid triggering -Wdouble-promotion - r |= kputd((double)le_to_float(s), ks) < 0; - s += 4; - } - break; - default: - goto bad_aux; - } - } else { // Unknown type - goto bad_aux; - } - return r ? NULL : s; - - bad_aux: - errno = EINVAL; - return NULL; - - mem_err: - hts_log_error("Out of memory"); - errno = ENOMEM; - return NULL; -} - -/// Return a pointer to a BAM record's first aux field -/** @param b Pointer to the BAM record - @return Aux field pointer, or NULL if the record has none - -When NULL is returned, errno will also be set to ENOENT. ("Aux field pointers" -point to the TYPE byte within the auxiliary data for that field; but in general -it is unnecessary for user code to be aware of this.) - */ -HTSLIB_EXPORT -uint8_t *bam_aux_first(const bam1_t *b); - -/// Return a pointer to a BAM record's next aux field -/** @param b Pointer to the BAM record - @param s Aux field pointer, as returned by bam_aux_first()/_next()/_get() - @return Pointer to the next aux field, or NULL if no next field or error - -Whenever NULL is returned, errno will also be set: ENOENT if @p s was the -record's last aux field; otherwise EINVAL, indicating that the BAM record's -aux data is corrupt. - */ -HTSLIB_EXPORT -uint8_t *bam_aux_next(const bam1_t *b, const uint8_t *s); - -/// Return a pointer to an aux record -/** @param b Pointer to the bam record - @param tag Desired aux tag - @return Pointer to the tag data, or NULL if tag is not present or on error - If the tag is not present, this function returns NULL and sets errno to - ENOENT. If the bam record's aux data is corrupt (either a tag has an - invalid type, or the last record is incomplete) then errno is set to - EINVAL and NULL is returned. - */ -HTSLIB_EXPORT -uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]); - -/// Return the aux field's 2-character tag -/** @param s Aux field pointer, as returned by bam_aux_first()/_next()/_get() - @return Pointer to the tag characters, NOT NUL-terminated - */ -static inline -const char *bam_aux_tag(const uint8_t *s) { return (const char *) (s-2); } - -/// Return the aux field's type character -/** @param s Aux field pointer, as returned by bam_aux_first()/_next()/_get() - @return The type character: one of cCsSiI/fd/A/Z/H/B - */ -static inline char bam_aux_type(const uint8_t *s) { return *s; } - -/// Return a SAM formatting string containing a BAM tag -/** @param b Pointer to the bam record - @param tag Desired aux tag - @param s The kstring to write to. - - @return 1 on success, - 0 on no tag found with errno = ENOENT, - -1 on error (errno will be either EINVAL or ENOMEM). - */ -static inline int bam_aux_get_str(const bam1_t *b, - const char tag[2], - kstring_t *s) { - const uint8_t *t = bam_aux_get(b, tag); - if (!t) - return errno == ENOENT ? 0 : -1; - - if (!sam_format_aux1(t-2, *t, t+1, b->data + b->l_data, s)) - return -1; - - return 1; -} - -/// Get an integer aux value -/** @param s Pointer to the tag data, as returned by bam_aux_get() - @return The value, or 0 if the tag was not an integer type - If the tag is not an integer type, errno is set to EINVAL. This function - will not return the value of floating-point tags. -*/ -HTSLIB_EXPORT -int64_t bam_aux2i(const uint8_t *s); - -/// Get an integer aux value -/** @param s Pointer to the tag data, as returned by bam_aux_get() - @return The value, or 0 if the tag was not an integer type - If the tag is not an numeric type, errno is set to EINVAL. The value of - integer flags will be returned cast to a double. -*/ -HTSLIB_EXPORT -double bam_aux2f(const uint8_t *s); - -/// Get a character aux value -/** @param s Pointer to the tag data, as returned by bam_aux_get(). - @return The value, or 0 if the tag was not a character ('A') type - If the tag is not a character type, errno is set to EINVAL. -*/ -HTSLIB_EXPORT -char bam_aux2A(const uint8_t *s); - -/// Get a string aux value -/** @param s Pointer to the tag data, as returned by bam_aux_get(). - @return Pointer to the string, or NULL if the tag was not a string type - If the tag is not a string type ('Z' or 'H'), errno is set to EINVAL. -*/ -HTSLIB_EXPORT -char *bam_aux2Z(const uint8_t *s); - -/// Get the length of an array-type ('B') tag -/** @param s Pointer to the tag data, as returned by bam_aux_get(). - @return The length of the array, or 0 if the tag is not an array type. - If the tag is not an array type, errno is set to EINVAL. - */ -HTSLIB_EXPORT -uint32_t bam_auxB_len(const uint8_t *s); - -/// Get an integer value from an array-type tag -/** @param s Pointer to the tag data, as returned by bam_aux_get(). - @param idx 0-based Index into the array - @return The idx'th value, or 0 on error. - If the array is not an integer type, errno is set to EINVAL. If idx - is greater than or equal to the value returned by bam_auxB_len(s), - errno is set to ERANGE. In both cases, 0 will be returned. - */ -HTSLIB_EXPORT -int64_t bam_auxB2i(const uint8_t *s, uint32_t idx); - -/// Get a floating-point value from an array-type tag -/** @param s Pointer to the tag data, as returned by bam_aux_get(). - @param idx 0-based Index into the array - @return The idx'th value, or 0.0 on error. - If the array is not a numeric type, errno is set to EINVAL. This can - only actually happen if the input record has an invalid type field. If - idx is greater than or equal to the value returned by bam_auxB_len(s), - errno is set to ERANGE. In both cases, 0.0 will be returned. - */ -HTSLIB_EXPORT -double bam_auxB2f(const uint8_t *s, uint32_t idx); - -/// Append tag data to a bam record -/* @param b The bam record to append to. - @param tag Tag identifier - @param type Tag data type - @param len Length of the data in bytes - @param data The data to append - @return 0 on success; -1 on failure. -If there is not enough space to store the additional tag, errno is set to -ENOMEM. If the type is invalid, errno may be set to EINVAL. errno is -also set to EINVAL if the bam record's aux data is corrupt. -*/ -HTSLIB_EXPORT -int bam_aux_append(bam1_t *b, const char tag[2], char type, int len, const uint8_t *data); - -/// Delete tag data from a bam record -/** @param b The BAM record to update - @param s Pointer to the aux field to delete, as returned by bam_aux_get() - Must not be NULL - @return 0 on success; -1 on failure - -If the BAM record's aux data is corrupt, errno is set to EINVAL and this -function returns -1. -*/ -HTSLIB_EXPORT -int bam_aux_del(bam1_t *b, uint8_t *s); - -/// Delete an aux field from a BAM record -/** @param b The BAM record to update - @param s Pointer to the aux field to delete, as returned by - bam_aux_first()/_next()/_get(); must not be NULL - @return Pointer to the following aux field, or NULL if none or on error - -Identical to @c bam_aux_del() apart from the return value, which is an -aux iterator suitable for use with @c bam_aux_next()/etc. - -Whenever NULL is returned, errno will also be set: ENOENT if the aux field -deleted was the record's last one; otherwise EINVAL, indicating that the -BAM record's aux data is corrupt. - */ -HTSLIB_EXPORT -uint8_t *bam_aux_remove(bam1_t *b, uint8_t *s); - -/// Update or add a string-type tag -/* @param b The bam record to update - @param tag Tag identifier - @param len The length of the new string - @param data The new string - @return 0 on success, -1 on failure - This function will not change the ordering of tags in the bam record. - New tags will be appended to any existing aux records. - - If @p len is less than zero, the length of the input string will be - calculated using strlen(). Otherwise exactly @p len bytes will be - copied from @p data to make the new tag. If these bytes do not - include a terminating NUL character, one will be added. (Note that - versions of HTSlib up to 1.10.2 had different behaviour here and - simply copied @p len bytes from data. To generate a valid tag it - was necessary to ensure the last character was a NUL, and include - it in @p len.) - - On failure, errno may be set to one of the following values: - - EINVAL: The bam record's aux data is corrupt or an existing tag with the - given ID is not of type 'Z'. - - ENOMEM: The bam data needs to be expanded and either the attempt to - reallocate the data buffer failed or the resulting buffer would be - longer than the maximum size allowed in a bam record (2Gbytes). -*/ -HTSLIB_EXPORT -int bam_aux_update_str(bam1_t *b, const char tag[2], int len, const char *data); - -/// Update or add an integer tag -/* @param b The bam record to update - @param tag Tag identifier - @param val The new value - @return 0 on success, -1 on failure - This function will not change the ordering of tags in the bam record. - New tags will be appended to any existing aux records. - - On failure, errno may be set to one of the following values: - - EINVAL: The bam record's aux data is corrupt or an existing tag with the - given ID is not of an integer type (c, C, s, S, i or I). - - EOVERFLOW (or ERANGE on systems that do not have EOVERFLOW): val is - outside the range that can be stored in an integer bam tag (-2147483647 - to 4294967295). - - ENOMEM: The bam data needs to be expanded and either the attempt to - reallocate the data buffer failed or the resulting buffer would be - longer than the maximum size allowed in a bam record (2Gbytes). -*/ -HTSLIB_EXPORT -int bam_aux_update_int(bam1_t *b, const char tag[2], int64_t val); - -/// Update or add a floating-point tag -/* @param b The bam record to update - @param tag Tag identifier - @param val The new value - @return 0 on success, -1 on failure - This function will not change the ordering of tags in the bam record. - New tags will be appended to any existing aux records. - - On failure, errno may be set to one of the following values: - - EINVAL: The bam record's aux data is corrupt or an existing tag with the - given ID is not of a float type. - - ENOMEM: The bam data needs to be expanded and either the attempt to - reallocate the data buffer failed or the resulting buffer would be - longer than the maximum size allowed in a bam record (2Gbytes). -*/ -HTSLIB_EXPORT -int bam_aux_update_float(bam1_t *b, const char tag[2], float val); - -/// Update or add an array tag -/* @param b The bam record to update - @param tag Tag identifier - @param type Data type (one of c, C, s, S, i, I or f) - @param items Number of items - @param data Pointer to data - @return 0 on success, -1 on failure - The type parameter indicates the how the data is interpreted: - - Letter code | Data type | Item Size (bytes) - ----------- | --------- | ----------------- - c | int8_t | 1 - C | uint8_t | 1 - s | int16_t | 2 - S | uint16_t | 2 - i | int32_t | 4 - I | uint32_t | 4 - f | float | 4 - - This function will not change the ordering of tags in the bam record. - New tags will be appended to any existing aux records. The bam record - will grow or shrink in order to accommodate the new data. - - The data parameter must not point to any data in the bam record itself or - undefined behaviour may result. - - On failure, errno may be set to one of the following values: - - EINVAL: The bam record's aux data is corrupt, an existing tag with the - given ID is not of an array type or the type parameter is not one of - the values listed above. - - ENOMEM: The bam data needs to be expanded and either the attempt to - reallocate the data buffer failed or the resulting buffer would be - longer than the maximum size allowed in a bam record (2Gbytes). -*/ -HTSLIB_EXPORT -int bam_aux_update_array(bam1_t *b, const char tag[2], - uint8_t type, uint32_t items, void *data); - -/************************** - *** Pileup and Mpileup *** - **************************/ - -#if !defined(BAM_NO_PILEUP) - -/*! @typedef - @abstract Generic pileup 'client data'. - - @discussion The pileup iterator allows setting a constructor and - destructor function, which will be called every time a sequence is - fetched and discarded. This permits caching of per-sequence data in - a tidy manner during the pileup process. This union is the cached - data to be manipulated by the "client" (the caller of pileup). -*/ -typedef union { - void *p; - int64_t i; - double f; -} bam_pileup_cd; - -/*! @typedef - @abstract Structure for one alignment covering the pileup position. - @field b pointer to the alignment - @field qpos position of the read base at the pileup site, 0-based - @field indel indel length; 0 for no indel, positive for ins and negative for del - @field level the level of the read in the "viewer" mode - @field is_del 1 iff the base on the padded read is a deletion - @field is_head 1 iff this is the first base in the query sequence - @field is_tail 1 iff this is the last base in the query sequence - @field is_refskip 1 iff the base on the padded read is part of CIGAR N op - @field aux (used by bcf_call_gap_prep()) - @field cigar_ind index of the CIGAR operator that has just been processed - - @discussion See also bam_plbuf_push() and bam_lplbuf_push(). The - difference between the two functions is that the former does not - set bam_pileup1_t::level, while the later does. Level helps the - implementation of alignment viewers, but calculating this has some - overhead. - */ -typedef struct bam_pileup1_t { - bam1_t *b; - int32_t qpos; - int indel, level; - uint32_t is_del:1, is_head:1, is_tail:1, is_refskip:1, /* reserved */ :1, aux:27; - bam_pileup_cd cd; // generic per-struct data, owned by caller. - int cigar_ind; -} bam_pileup1_t; - -typedef int (*bam_plp_auto_f)(void *data, bam1_t *b); - -struct bam_plp_s; -typedef struct bam_plp_s *bam_plp_t; - -struct bam_mplp_s; -typedef struct bam_mplp_s *bam_mplp_t; - - /** - * bam_plp_init() - sets an iterator over multiple - * @func: see mplp_func in bam_plcmd.c in samtools for an example. Expected return - * status: 0 on success, -1 on end, < -1 on non-recoverable errors - * @data: user data to pass to @func - * - * The struct returned by a successful call should be freed - * via bam_plp_destroy() when it is no longer needed. - */ - HTSLIB_EXPORT - bam_plp_t bam_plp_init(bam_plp_auto_f func, void *data); - - HTSLIB_EXPORT - void bam_plp_destroy(bam_plp_t iter); - - HTSLIB_EXPORT - int bam_plp_push(bam_plp_t iter, const bam1_t *b); - - HTSLIB_EXPORT - const bam_pileup1_t *bam_plp_next(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp); - - HTSLIB_EXPORT - const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp); - - HTSLIB_EXPORT - const bam_pileup1_t *bam_plp64_next(bam_plp_t iter, int *_tid, hts_pos_t *_pos, int *_n_plp); - - HTSLIB_EXPORT - const bam_pileup1_t *bam_plp64_auto(bam_plp_t iter, int *_tid, hts_pos_t *_pos, int *_n_plp); - - HTSLIB_EXPORT - void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt); - - HTSLIB_EXPORT - void bam_plp_reset(bam_plp_t iter); - - /** - * bam_plp_constructor() - sets a callback to initialise any per-pileup1_t fields. - * @plp: The bam_plp_t initialised using bam_plp_init. - * @func: The callback function itself. When called, it is given - * the data argument (specified in bam_plp_init), the bam - * structure and a pointer to a locally allocated - * bam_pileup_cd union. This union will also be present in - * each bam_pileup1_t created. - * The callback function should have a negative return - * value to indicate an error. (Similarly for destructor.) - */ - HTSLIB_EXPORT - void bam_plp_constructor(bam_plp_t plp, - int (*func)(void *data, const bam1_t *b, bam_pileup_cd *cd)); - HTSLIB_EXPORT - void bam_plp_destructor(bam_plp_t plp, - int (*func)(void *data, const bam1_t *b, bam_pileup_cd *cd)); - - /// Get pileup padded insertion sequence - /** - * @param p pileup data - * @param ins the kstring where the insertion sequence will be written - * @param del_len location for deletion length - * @return the length of insertion string on success; -1 on failure. - * - * Fills out the kstring with the padded insertion sequence for the current - * location in 'p'. If this is not an insertion site, the string is blank. - * - * If del_len is not NULL, the location pointed to is set to the length of - * any deletion immediately following the insertion, or zero if none. - */ - HTSLIB_EXPORT - int bam_plp_insertion(const bam_pileup1_t *p, kstring_t *ins, int *del_len) HTS_RESULT_USED; - - - /*! @typedef - @abstract An opaque type used for caching base modification state between - successive calls to bam_mods_* functions. - */ - typedef struct hts_base_mod_state hts_base_mod_state; - - /// Get pileup padded insertion sequence, including base modifications - /** - * @param p pileup data - * @param m state data for the base modification finder - * @param ins the kstring where the insertion sequence will be written - * @param del_len location for deletion length - * @return the number of insertion string on success, with string length - * being accessable via ins->l; -1 on failure. - * - * Fills out the kstring with the padded insertion sequence for the current - * location in 'p'. If this is not an insertion site, the string is blank. - * - * The modification state needs to have been previously initialised using - * bam_parse_basemod. It is permitted to be passed in as NULL, in which - * case this function outputs identically to bam_plp_insertion. - * - * If del_len is not NULL, the location pointed to is set to the length of - * any deletion immediately following the insertion, or zero if none. - */ - HTSLIB_EXPORT - int bam_plp_insertion_mod(const bam_pileup1_t *p, hts_base_mod_state *m, - kstring_t *ins, int *del_len) HTS_RESULT_USED; - - /// Create a new bam_mplp_t structure - /** The struct returned by a successful call should be freed - * via bam_mplp_destroy() when it is no longer needed. - */ - HTSLIB_EXPORT - bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data); - - /// Set up mpileup overlap detection - /** - * @param iter mpileup iterator - * @return 0 on success; a negative value on error - * - * If called, mpileup will detect overlapping - * read pairs and for each base pair set the base quality of the - * lower-quality base to zero, thus effectively discarding it from - * calling. If the two bases are identical, the quality of the other base - * is increased to the sum of their qualities (capped at 200), otherwise - * it is multiplied by 0.8. - */ - HTSLIB_EXPORT - int bam_mplp_init_overlaps(bam_mplp_t iter); - - HTSLIB_EXPORT - void bam_mplp_destroy(bam_mplp_t iter); - - HTSLIB_EXPORT - void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt); - - HTSLIB_EXPORT - int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp); - - HTSLIB_EXPORT - int bam_mplp64_auto(bam_mplp_t iter, int *_tid, hts_pos_t *_pos, int *n_plp, const bam_pileup1_t **plp); - - HTSLIB_EXPORT - void bam_mplp_reset(bam_mplp_t iter); - - HTSLIB_EXPORT - void bam_mplp_constructor(bam_mplp_t iter, - int (*func)(void *data, const bam1_t *b, bam_pileup_cd *cd)); - - HTSLIB_EXPORT - void bam_mplp_destructor(bam_mplp_t iter, - int (*func)(void *data, const bam1_t *b, bam_pileup_cd *cd)); - -#endif // ~!defined(BAM_NO_PILEUP) - - -/*********************************** - * BAQ calculation and realignment * - ***********************************/ - -HTSLIB_EXPORT -int sam_cap_mapq(bam1_t *b, const char *ref, hts_pos_t ref_len, int thres); - -// Used as flag parameter in sam_prob_realn. -enum htsRealnFlags { - BAQ_APPLY = 1, - BAQ_EXTEND = 2, - BAQ_REDO = 4, - - // Platform subfield, in bit position 3 onwards - BAQ_AUTO = 0<<3, - BAQ_ILLUMINA = 1<<3, - BAQ_PACBIOCCS = 2<<3, - BAQ_PACBIO = 3<<3, - BAQ_ONT = 4<<3, - BAQ_GENAPSYS = 5<<3 -}; - -/// Calculate BAQ scores -/** @param b BAM record - @param ref Reference sequence - @param ref_len Reference sequence length - @param flag Flags, see description - @return 0 on success \n - -1 if the read was unmapped, zero length, had no quality values, did not have at least one M, X or = CIGAR operator, or included a reference skip. \n - -3 if BAQ alignment has already been done and does not need to be applied, or has already been applied. \n - -4 if alignment failed (most likely due to running out of memory) - -This function calculates base alignment quality (BAQ) values using the method -described in "Improving SNP discovery by base alignment quality", Heng Li, -Bioinformatics, Volume 27, Issue 8 (https://doi.org/10.1093/bioinformatics/btr076). - -The @param flag value can be generated using the htsRealnFlags enum, but for -backwards compatibilty reasons is retained as an "int". An example usage -of the enum could be this, equivalent to flag 19: - - sam_prob_realn(b, ref, len, BAQ_APPLY | BAQ_EXTEND | BAQ_PACBIOCCS); - -The following @param flag bits can be used: - -Bit 0 (BAQ_APPLY): Adjust the quality values using the BAQ values - - If set, the data in the BQ:Z tag is used to adjust the quality values, and - the BQ:Z tag is renamed to ZQ:Z. - - If clear, and a ZQ:Z tag is present, the quality values are reverted using - the data in the tag, and the tag is renamed to BQ:Z. - -Bit 1 (BAQ_EXTEND): Use "extended" BAQ. - - Changes the BAQ calculation to increase sensitivity at the expense of - reduced specificity. - -Bit 2 (BAQ_REDO): Recalculate BAQ, even if a BQ tag is present. - - Force BAQ to be recalculated. Note that a ZQ:Z tag will always disable - recalculation. - -Bits 3-10: Choose parameters tailored to a specific instrument type. - - One of BAQ_AUTO, BAQ_ILLUMINA, BAQ_PACBIOCCS, BAQ_PACBIO, BAQ_ONT and - BAQ_GENAPSYS. The BAQ parameter tuning are still a work in progress and - at the time of writing mainly consist of Illumina vs long-read technology - adjustments. - -@bug -If the input read has both BQ:Z and ZQ:Z tags, the ZQ:Z one will be removed. -Depending on what previous processing happened, this may or may not be the -correct thing to do. It would be wise to avoid this situation if possible. -*/ -HTSLIB_EXPORT -int sam_prob_realn(bam1_t *b, const char *ref, hts_pos_t ref_len, int flag); - -// --------------------------- -// Base modification retrieval - -/*! @typedef - @abstract Holds a single base modification. - @field modified_base The short base code (m, h, etc) or -ChEBI (negative) - @field canonical_base The canonical base referred to in the MM tag. - One of A, C, G, T or N. Note this may not be the - explicit base recorded in the SEQ column (esp. if N). - @field stran 0 or 1, indicating + or - strand from MM tag. - @field qual Quality code (256*probability), or -1 if unknown - - @discussion - Note this doesn't hold any location data or information on which other - modifications may be possible at this site. -*/ -typedef struct hts_base_mod { - int modified_base; - int canonical_base; - int strand; - int qual; -} hts_base_mod; - -#define HTS_MOD_UNKNOWN -1 // In MM but no ML -#define HTS_MOD_UNCHECKED -2 // Not in MM and in explicit mode - -// Flags for hts_parse_basemod2 -#define HTS_MOD_REPORT_UNCHECKED 1 - -/// Allocates an hts_base_mode_state. -/** - * @return An hts_base_mode_state pointer on success, - * NULL on failure. - * - * This just allocates the memory. The initialisation of the contents is - * done using bam_parse_basemod. Successive calls may be made to that - * without the need to free and allocate a new state. - * - * The state be destroyed using the hts_base_mode_state_free function. - */ -HTSLIB_EXPORT -hts_base_mod_state *hts_base_mod_state_alloc(void); - -/// Destroys an hts_base_mode_state. -/** - * @param state The base modification state pointer. - * - * The should have previously been created by hts_base_mode_state_alloc. - */ -HTSLIB_EXPORT -void hts_base_mod_state_free(hts_base_mod_state *state); - -/// Parses the Mm and Ml tags out of a bam record. -/** - * @param b BAM alignment record - * @param state The base modification state pointer. - * @return 0 on success, - * -1 on failure. - * - * This fills out the contents of the modification state, resetting the - * iterator location to the first sequence base. - */ -HTSLIB_EXPORT -int bam_parse_basemod(const bam1_t *b, hts_base_mod_state *state); - -/// Parses the Mm and Ml tags out of a bam record. -/** - * @param b BAM alignment record - * @param state The base modification state pointer. - * @param flags A bit-field controlling base modification processing - * - * @return 0 on success, - * -1 on failure. - * - * This fills out the contents of the modification state, resetting the - * iterator location to the first sequence base. - */ -HTSLIB_EXPORT -int bam_parse_basemod2(const bam1_t *b, hts_base_mod_state *state, - uint32_t flags); - -/// Returns modification status for the next base position in the query seq. -/** - * @param b BAM alignment record - * @param state The base modification state pointer. - * @param mods A supplied array for returning base modifications - * @param n_mods The size of the mods array - * @return The number of modifications found on success, - * -1 on failure. - * - * This is intended to be used as an iterator, with one call per location - * along the query sequence. - * - * If no modifications are found, the returned value is zero. - * If more than n_mods modifications are found, the total found is returned. - * Note this means the caller needs to check whether this is higher than - * n_mods. - */ -HTSLIB_EXPORT -int bam_mods_at_next_pos(const bam1_t *b, hts_base_mod_state *state, - hts_base_mod *mods, int n_mods); - -/// Finds the next location containing base modifications and returns them -/** - * @param b BAM alignment record - * @param state The base modification state pointer. - * @param mods A supplied array for returning base modifications - * @param n_mods The size of the mods array - * @param pos Pointer holding position of modification in sequence - * @return The number of modifications found on success, - * 0 if no more modifications are present, - * -1 on failure. - * - * Unlike bam_mods_at_next_pos this skips ahead to the next site - * with modifications. - * - * If more than n_mods modifications are found, the total found is returned. - * Note this means the caller needs to check whether this is higher than - * n_mods. - */ -HTSLIB_EXPORT -int bam_next_basemod(const bam1_t *b, hts_base_mod_state *state, - hts_base_mod *mods, int n_mods, int *pos); - -/// Returns modification status for a specific query position. -/** - * @param b BAM alignment record - * @param state The base modification state pointer. - * @param mods A supplied array for returning base modifications - * @param n_mods The size of the mods array - * @return The number of modifications found on success, - * -1 on failure. - * - * Note if called multipled times, qpos must be higher than the previous call. - * Hence this is suitable for use from a pileup iterator. If more random - * access is required, bam_parse_basemod must be called each time to reset - * the state although this has an efficiency cost. - * - * If no modifications are found, the returned value is zero. - * If more than n_mods modifications are found, the total found is returned. - * Note this means the caller needs to check whether this is higher than - * n_mods. - */ -HTSLIB_EXPORT -int bam_mods_at_qpos(const bam1_t *b, int qpos, hts_base_mod_state *state, - hts_base_mod *mods, int n_mods); - - -/// Returns data about a specific modification type for the alignment record. -/** - * @param b BAM alignment record - * @param state The base modification state pointer. - * @param code Modification code. If positive this is a character code, - * if negative it is a -ChEBI code. - * - * @param strand Boolean for top (0) or bottom (1) strand - * @param implicit Boolean for whether unlisted positions should be - * implicitly assumed to be unmodified, or require an - * explicit score and should be considered as unknown. - * Returned. - * @param canonical Canonical base type associated with this modification - * Returned. - * - * @return 0 on success or -1 if not found. The strand, implicit and canonical - * fields are filled out if passed in as non-NULL pointers. - */ -HTSLIB_EXPORT -int bam_mods_query_type(hts_base_mod_state *state, int code, - int *strand, int *implicit, char *canonical); - -/// Returns data about the i^th modification type for the alignment record. -/** - * @param b BAM alignment record - * @param state The base modification state pointer. - * @param i Modification index, from 0 to ntype-1 - * @param strand Boolean for top (0) or bottom (1) strand - * @param implicit Boolean for whether unlisted positions should be - * implicitly assumed to be unmodified, or require an - * explicit score and should be considered as unknown. - * Returned. - * @param canonical Canonical base type associated with this modification - * Returned. - * - * @return 0 on success or -1 if not found. The strand, implicit and canonical - * fields are filled out if passed in as non-NULL pointers. - */ -HTSLIB_EXPORT -int bam_mods_queryi(hts_base_mod_state *state, int i, - int *strand, int *implicit, char *canonical); - -/// Returns the list of base modification codes provided for this -/// alignment record as an array of character codes (+ve) or ChEBI numbers -/// (negative). -/* - * @param b BAM alignment record - * @param state The base modification state pointer. - * @param ntype Filled out with the number of array elements returned - * - * @return the type array, with *ntype filled out with the size. - * The array returned should not be freed. - * It is a valid pointer until the state is freed using - * hts_base_mod_free(). - */ -HTSLIB_EXPORT -int *bam_mods_recorded(hts_base_mod_state *state, int *ntype); - -#ifdef __cplusplus -} -#endif - -#ifdef HTSLIB_SSIZE_T -#undef HTSLIB_SSIZE_T -#undef ssize_t -#endif - -#endif diff --git a/src/htslib-1.18/htslib/tbx.h b/src/htslib-1.18/htslib/tbx.h deleted file mode 100644 index 3d2037c..0000000 --- a/src/htslib-1.18/htslib/tbx.h +++ /dev/null @@ -1,143 +0,0 @@ -/// @file htslib/tbx.h -/// Tabix API functions. -/* - Copyright (C) 2009, 2012-2015, 2019 Genome Research Ltd. - Copyright (C) 2010, 2012 Broad Institute. - - Author: Heng Li - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#ifndef HTSLIB_TBX_H -#define HTSLIB_TBX_H - -#include "hts.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define TBX_MAX_SHIFT 31 - -#define TBX_GENERIC 0 -#define TBX_SAM 1 -#define TBX_VCF 2 -#define TBX_UCSC 0x10000 - -typedef struct tbx_conf_t { - int32_t preset; - int32_t sc, bc, ec; // seq col., beg col. and end col. - int32_t meta_char, line_skip; -} tbx_conf_t; - -typedef struct tbx_t { - tbx_conf_t conf; - hts_idx_t *idx; - void *dict; -} tbx_t; - -HTSLIB_EXPORT -extern const tbx_conf_t tbx_conf_gff, tbx_conf_bed, tbx_conf_psltbl, tbx_conf_sam, tbx_conf_vcf; - - #define tbx_itr_destroy(iter) hts_itr_destroy(iter) - #define tbx_itr_queryi(tbx, tid, beg, end) hts_itr_query((tbx)->idx, (tid), (beg), (end), tbx_readrec) - #define tbx_itr_querys(tbx, s) hts_itr_querys((tbx)->idx, (s), (hts_name2id_f)(tbx_name2id), (tbx), hts_itr_query, tbx_readrec) - #define tbx_itr_next(htsfp, tbx, itr, r) hts_itr_next(hts_get_bgzfp(htsfp), (itr), (r), (tbx)) - #define tbx_bgzf_itr_next(bgzfp, tbx, itr, r) hts_itr_next((bgzfp), (itr), (r), (tbx)) - - HTSLIB_EXPORT - int tbx_name2id(tbx_t *tbx, const char *ss); - - /* Internal helper function used by tbx_itr_next() */ - HTSLIB_EXPORT - BGZF *hts_get_bgzfp(htsFile *fp); - - HTSLIB_EXPORT - int tbx_readrec(BGZF *fp, void *tbxv, void *sv, int *tid, hts_pos_t *beg, hts_pos_t *end); - -/// Build an index of the lines in a BGZF-compressed file -/** The index struct returned by a successful call should be freed - via tbx_destroy() when it is no longer needed. -*/ - HTSLIB_EXPORT - tbx_t *tbx_index(BGZF *fp, int min_shift, const tbx_conf_t *conf); -/* - * All tbx_index_build* methods return: 0 (success), -1 (general failure) or -2 (compression not BGZF) - */ - HTSLIB_EXPORT - int tbx_index_build(const char *fn, int min_shift, const tbx_conf_t *conf); - - HTSLIB_EXPORT - int tbx_index_build2(const char *fn, const char *fnidx, int min_shift, const tbx_conf_t *conf); - - HTSLIB_EXPORT - int tbx_index_build3(const char *fn, const char *fnidx, int min_shift, int n_threads, const tbx_conf_t *conf); - - -/// Load or stream a .tbi or .csi index -/** @param fn Name of the data file corresponding to the index - - Equivalent to tbx_index_load3(fn, NULL, HTS_IDX_SAVE_REMOTE); -*/ - HTSLIB_EXPORT - tbx_t *tbx_index_load(const char *fn); - -/// Load or stream a .tbi or .csi index -/** @param fn Name of the data file corresponding to the index - @param fnidx Name of the indexed file - @return The index, or NULL if an error occurred - - If @p fnidx is NULL, the index name will be derived from @p fn. - - Equivalent to tbx_index_load3(fn, fnidx, HTS_IDX_SAVE_REMOTE); -*/ - HTSLIB_EXPORT - tbx_t *tbx_index_load2(const char *fn, const char *fnidx); - -/// Load or stream a .tbi or .csi index -/** @param fn Name of the data file corresponding to the index - @param fnidx Name of the indexed file - @param flags Flags to alter behaviour (see description) - @return The index, or NULL if an error occurred - - If @p fnidx is NULL, the index name will be derived from @p fn. - - The @p flags parameter can be set to a combination of the following - values: - - HTS_IDX_SAVE_REMOTE Save a local copy of any remote indexes - HTS_IDX_SILENT_FAIL Fail silently if the index is not present - - The index struct returned by a successful call should be freed - via tbx_destroy() when it is no longer needed. -*/ - HTSLIB_EXPORT - tbx_t *tbx_index_load3(const char *fn, const char *fnidx, int flags); - - HTSLIB_EXPORT - const char **tbx_seqnames(tbx_t *tbx, int *n); // free the array but not the values - - HTSLIB_EXPORT - void tbx_destroy(tbx_t *tbx); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.18/htslib/vcf.h b/src/htslib-1.18/htslib/vcf.h deleted file mode 100644 index 83659ae..0000000 --- a/src/htslib-1.18/htslib/vcf.h +++ /dev/null @@ -1,1647 +0,0 @@ -/// @file htslib/vcf.h -/// High-level VCF/BCF variant calling file operations. -/* - Copyright (C) 2012, 2013 Broad Institute. - Copyright (C) 2012-2020, 2022-2023 Genome Research Ltd. - - Author: Heng Li - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -/* - todo: - - make the function names consistent - - provide calls to abstract away structs as much as possible - */ - -#ifndef HTSLIB_VCF_H -#define HTSLIB_VCF_H - -#include -#include -#include -#include "hts.h" -#include "kstring.h" -#include "hts_defs.h" -#include "hts_endian.h" - -/* Included only for backwards compatibility with e.g. bcftools 1.10 */ -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/***************** - * Header struct * - *****************/ - -#define BCF_HL_FLT 0 // header line -#define BCF_HL_INFO 1 -#define BCF_HL_FMT 2 -#define BCF_HL_CTG 3 -#define BCF_HL_STR 4 // structured header line TAG= -#define BCF_HL_GEN 5 // generic header line - -#define BCF_HT_FLAG 0 // header type -#define BCF_HT_INT 1 -#define BCF_HT_REAL 2 -#define BCF_HT_STR 3 -#define BCF_HT_LONG (BCF_HT_INT | 0x100) // BCF_HT_INT, but for int64_t values; VCF only! - -#define BCF_VL_FIXED 0 // variable length -#define BCF_VL_VAR 1 -#define BCF_VL_A 2 -#define BCF_VL_G 3 -#define BCF_VL_R 4 - -/* === Dictionary === - - The header keeps three dictionaries. The first keeps IDs in the - "FILTER/INFO/FORMAT" lines, the second keeps the sequence names and lengths - in the "contig" lines and the last keeps the sample names. bcf_hdr_t::dict[] - is the actual hash table, which is opaque to the end users. In the hash - table, the key is the ID or sample name as a C string and the value is a - bcf_idinfo_t struct. bcf_hdr_t::id[] points to key-value pairs in the hash - table in the order that they appear in the VCF header. bcf_hdr_t::n[] is the - size of the hash table or, equivalently, the length of the id[] arrays. -*/ - -#define BCF_DT_ID 0 // dictionary type -#define BCF_DT_CTG 1 -#define BCF_DT_SAMPLE 2 - -// Complete textual representation of a header line -typedef struct bcf_hrec_t { - int type; // One of the BCF_HL_* type - char *key; // The part before '=', i.e. FILTER/INFO/FORMAT/contig/fileformat etc. - char *value; // Set only for generic lines, NULL for FILTER/INFO, etc. - int nkeys; // Number of structured fields - char **keys, **vals; // The key=value pairs -} bcf_hrec_t; - -typedef struct bcf_idinfo_t { - uint64_t info[3]; // stores Number:20, var:4, Type:4, ColType:4 in info[0..2] - // for BCF_HL_FLT,INFO,FMT and contig length in info[0] for BCF_HL_CTG - bcf_hrec_t *hrec[3]; - int id; -} bcf_idinfo_t; - -typedef struct bcf_idpair_t { - const char *key; - const bcf_idinfo_t *val; -} bcf_idpair_t; - -// Note that bcf_hdr_t structs must always be created via bcf_hdr_init() -typedef struct bcf_hdr_t { - int32_t n[3]; // n:the size of the dictionary block in use, (allocated size, m, is below to preserve ABI) - bcf_idpair_t *id[3]; - void *dict[3]; // ID dictionary, contig dict and sample dict - char **samples; - bcf_hrec_t **hrec; - int nhrec, dirty; - int ntransl, *transl[2]; // for bcf_translate() - int nsamples_ori; // for bcf_hdr_set_samples() - uint8_t *keep_samples; - kstring_t mem; - int32_t m[3]; // m: allocated size of the dictionary block in use (see n above) -} bcf_hdr_t; - -HTSLIB_EXPORT -extern uint8_t bcf_type_shift[]; - -/************** - * VCF record * - **************/ - -#define BCF_BT_NULL 0 -#define BCF_BT_INT8 1 -#define BCF_BT_INT16 2 -#define BCF_BT_INT32 3 -#define BCF_BT_INT64 4 // Unofficial, for internal use only. -#define BCF_BT_FLOAT 5 -#define BCF_BT_CHAR 7 - -#define VCF_REF 0 -#define VCF_SNP (1<<0) -#define VCF_MNP (1<<1) -#define VCF_INDEL (1<<2) -#define VCF_OTHER (1<<3) -#define VCF_BND (1<<4) // breakend -#define VCF_OVERLAP (1<<5) // overlapping deletion, ALT=* -#define VCF_INS (1<<6) // implies VCF_INDEL -#define VCF_DEL (1<<7) // implies VCF_INDEL -#define VCF_ANY (VCF_SNP|VCF_MNP|VCF_INDEL|VCF_OTHER|VCF_BND|VCF_OVERLAP|VCF_INS|VCF_DEL) // any variant type (but not VCF_REF) - -typedef struct bcf_variant_t { - int type, n; // variant type and the number of bases affected, negative for deletions -} bcf_variant_t; - -typedef struct bcf_fmt_t { - int id; // id: numeric tag id, the corresponding string is bcf_hdr_t::id[BCF_DT_ID][$id].key - int n, size, type; // n: number of values per-sample; size: number of bytes per-sample; type: one of BCF_BT_* types - uint8_t *p; // same as vptr and vptr_* in bcf_info_t below - uint32_t p_len; - uint32_t p_off:31, p_free:1; -} bcf_fmt_t; - -typedef struct bcf_info_t { - int key; // key: numeric tag id, the corresponding string is bcf_hdr_t::id[BCF_DT_ID][$key].key - int type; // type: one of BCF_BT_* types - union { - int64_t i; // integer value - float f; // float value - } v1; // only set if $len==1; for easier access - uint8_t *vptr; // pointer to data array in bcf1_t->shared.s, excluding the size+type and tag id bytes - uint32_t vptr_len; // length of the vptr block or, when set, of the vptr_mod block, excluding offset - uint32_t vptr_off:31, // vptr offset, i.e., the size of the INFO key plus size+type bytes - vptr_free:1; // indicates that vptr-vptr_off must be freed; set only when modified and the new - // data block is bigger than the original - int len; // vector length, 1 for scalars -} bcf_info_t; - - -#define BCF1_DIRTY_ID 1 -#define BCF1_DIRTY_ALS 2 -#define BCF1_DIRTY_FLT 4 -#define BCF1_DIRTY_INF 8 - -typedef struct bcf_dec_t { - int m_fmt, m_info, m_id, m_als, m_allele, m_flt; // allocated size (high-water mark); do not change - int n_flt; // Number of FILTER fields - int *flt; // FILTER keys in the dictionary - char *id, *als; // ID and REF+ALT block (\0-separated) - char **allele; // allele[0] is the REF (allele[] pointers to the als block); all null terminated - bcf_info_t *info; // INFO - bcf_fmt_t *fmt; // FORMAT and individual sample - bcf_variant_t *var; // $var and $var_type set only when set_variant_types called - int n_var, var_type; - int shared_dirty; // if set, shared.s must be recreated on BCF output - int indiv_dirty; // if set, indiv.s must be recreated on BCF output -} bcf_dec_t; - - -#define BCF_ERR_CTG_UNDEF 1 -#define BCF_ERR_TAG_UNDEF 2 -#define BCF_ERR_NCOLS 4 -#define BCF_ERR_LIMITS 8 -#define BCF_ERR_CHAR 16 -#define BCF_ERR_CTG_INVALID 32 -#define BCF_ERR_TAG_INVALID 64 - -/// Get error description for bcf error code -/** @param errorcode The error code which is to be described - @param buffer The buffer in which description to be added - @param maxbuffer The size of buffer passed - @return NULL on invalid buffer; buffer on other cases - -The buffer will be an empty string when @p errorcode is 0. -Description of errors present in code will be appended to @p buffer with ',' separation. -The buffer has to be at least 4 characters long. NULL will be returned if it is smaller or when buffer is NULL. - -'...' will be appended if the description doesn't fit in the given buffer. - */ - -HTSLIB_EXPORT -const char *bcf_strerror(int errorcode, char *buffer, size_t maxbuffer); - -/* - The bcf1_t structure corresponds to one VCF/BCF line. Reading from VCF file - is slower because the string is first to be parsed, packed into BCF line - (done in vcf_parse), then unpacked into internal bcf1_t structure. If it - is known in advance that some of the fields will not be required (notably - the sample columns), parsing of these can be skipped by setting max_unpack - appropriately. - Similarly, it is fast to output a BCF line because the columns (kept in - shared.s, indiv.s, etc.) are written directly by bcf_write, whereas a VCF - line must be formatted in vcf_format. - */ -typedef struct bcf1_t { - hts_pos_t pos; // POS - hts_pos_t rlen; // length of REF - int32_t rid; // CHROM - float qual; // QUAL - uint32_t n_info:16, n_allele:16; - uint32_t n_fmt:8, n_sample:24; - kstring_t shared, indiv; - bcf_dec_t d; // lazy evaluation: $d is not generated by bcf_read(), but by explicitly calling bcf_unpack() - int max_unpack; // Set to BCF_UN_STR, BCF_UN_FLT, or BCF_UN_INFO to boost performance of vcf_parse when some of the fields won't be needed - int unpacked; // remember what has been unpacked to allow calling bcf_unpack() repeatedly without redoing the work - int unpack_size[3]; // the original block size of ID, REF+ALT and FILTER - int errcode; // one of BCF_ERR_* codes -} bcf1_t; - -/******* - * API * - *******/ - - /*********************************************************************** - * BCF and VCF I/O - * - * A note about naming conventions: htslib internally represents VCF - * records as bcf1_t data structures, therefore most functions are - * prefixed with bcf_. There are a few exceptions where the functions must - * be aware of both BCF and VCF worlds, such as bcf_parse vs vcf_parse. In - * these cases, functions prefixed with bcf_ are more general and work - * with both BCF and VCF. - * - ***********************************************************************/ - - /** These macros are defined only for consistency with other parts of htslib */ - #define bcf_init1() bcf_init() - #define bcf_read1(fp,h,v) bcf_read((fp),(h),(v)) - #define vcf_read1(fp,h,v) vcf_read((fp),(h),(v)) - #define bcf_write1(fp,h,v) bcf_write((fp),(h),(v)) - #define vcf_write1(fp,h,v) vcf_write((fp),(h),(v)) - #define bcf_destroy1(v) bcf_destroy(v) - #define bcf_empty1(v) bcf_empty(v) - #define vcf_parse1(s,h,v) vcf_parse((s),(h),(v)) - #define bcf_clear1(v) bcf_clear(v) - #define vcf_format1(h,v,s) vcf_format((h),(v),(s)) - - /** - * bcf_hdr_init() - create an empty BCF header. - * @param mode "r" or "w" - * - * When opened for writing, the mandatory fileFormat and - * FILTER=PASS lines are added automatically. - * - * The bcf_hdr_t struct returned by a successful call should be freed - * via bcf_hdr_destroy() when it is no longer needed. - */ - HTSLIB_EXPORT - bcf_hdr_t *bcf_hdr_init(const char *mode); - - /** Destroy a BCF header struct */ - HTSLIB_EXPORT - void bcf_hdr_destroy(bcf_hdr_t *h); - - /** Allocate and initialize a bcf1_t object. - * - * The bcf1_t struct returned by a successful call should be freed - * via bcf_destroy() when it is no longer needed. - */ - HTSLIB_EXPORT - bcf1_t *bcf_init(void); - - /** Deallocate a bcf1_t object */ - HTSLIB_EXPORT - void bcf_destroy(bcf1_t *v); - - /** - * Same as bcf_destroy() but frees only the memory allocated by bcf1_t, - * not the bcf1_t object itself. - */ - HTSLIB_EXPORT - void bcf_empty(bcf1_t *v); - - /** - * Make the bcf1_t object ready for next read. Intended mostly for - * internal use, the user should rarely need to call this function - * directly. - */ - HTSLIB_EXPORT - void bcf_clear(bcf1_t *v); - - - /** bcf_open and vcf_open mode: please see hts_open() in hts.h */ - typedef htsFile vcfFile; - #define bcf_open(fn, mode) hts_open((fn), (mode)) - #define vcf_open(fn, mode) hts_open((fn), (mode)) - #define bcf_flush(fp) hts_flush((fp)) - #define bcf_close(fp) hts_close(fp) - #define vcf_close(fp) hts_close(fp) - - /// Read a VCF or BCF header - /** @param fp The file to read the header from - @return Pointer to a populated header structure on success; - NULL on failure - - The bcf_hdr_t struct returned by a successful call should be freed - via bcf_hdr_destroy() when it is no longer needed. - */ - HTSLIB_EXPORT - bcf_hdr_t *bcf_hdr_read(htsFile *fp) HTS_RESULT_USED; - - /** - * bcf_hdr_set_samples() - for more efficient VCF parsing when only one/few samples are needed - * @param samples samples to include or exclude from file or as a comma-separated string. - * LIST|FILE .. select samples in list/file - * ^LIST|FILE .. exclude samples from list/file - * - .. include all samples - * NULL .. exclude all samples - * @param is_file @p samples is a file (1) or a comma-separated list (0) - * - * The bottleneck of VCF reading is parsing of genotype fields. If the - * reader knows in advance that only subset of samples is needed (possibly - * no samples at all), the performance of bcf_read() can be significantly - * improved by calling bcf_hdr_set_samples after bcf_hdr_read(). - * The function bcf_read() will subset the VCF/BCF records automatically - * with the notable exception when reading records via bcf_itr_next(). - * In this case, bcf_subset_format() must be called explicitly, because - * bcf_readrec() does not see the header. - * - * Returns 0 on success, -1 on error or a positive integer if the list - * contains samples not present in the VCF header. In such a case, the - * return value is the index of the offending sample. - */ - HTSLIB_EXPORT - int bcf_hdr_set_samples(bcf_hdr_t *hdr, const char *samples, int is_file) HTS_RESULT_USED; - - HTSLIB_EXPORT - int bcf_subset_format(const bcf_hdr_t *hdr, bcf1_t *rec); - - /// Write a VCF or BCF header - /** @param fp Output file - @param h The header to write - @return 0 on success; -1 on failure - */ - HTSLIB_EXPORT - int bcf_hdr_write(htsFile *fp, bcf_hdr_t *h) HTS_RESULT_USED; - - /** - * Parse VCF line contained in kstring and populate the bcf1_t struct - * The line must not end with \n or \r characters. - */ - HTSLIB_EXPORT - int vcf_parse(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v); - - /** - * Complete the file opening mode, according to its extension. - * @param mode Preallocated mode string to be completed. - * @param fn File name to be opened. - * @param format Format string (vcf|bcf|vcf.gz) - * @return 0 on success; -1 on failure - */ - HTSLIB_EXPORT - int vcf_open_mode(char *mode, const char *fn, const char *format); - - /** The opposite of vcf_parse. It should rarely be called directly, see vcf_write */ - HTSLIB_EXPORT - int vcf_format(const bcf_hdr_t *h, const bcf1_t *v, kstring_t *s); - - /// Read next VCF or BCF record - /** @param fp The file to read the record from - @param h The header for the vcf/bcf file - @param v The bcf1_t structure to populate - @return 0 on success; -1 on end of file; < -1 on critical error - -On errors which are not critical for reading, such as missing header -definitions in vcf files, zero will be returned but v->errcode will have been -set to one of BCF_ERR* codes and must be checked before calling bcf_write(). - */ - HTSLIB_EXPORT - int bcf_read(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v) HTS_RESULT_USED; - - /** - * bcf_unpack() - unpack/decode a BCF record (fills the bcf1_t::d field) - * - * Note that bcf_unpack() must be called even when reading VCF. It is safe - * to call the function repeatedly, it will not unpack the same field - * twice. - */ - #define BCF_UN_STR 1 // up to ALT inclusive - #define BCF_UN_FLT 2 // up to FILTER - #define BCF_UN_INFO 4 // up to INFO - #define BCF_UN_SHR (BCF_UN_STR|BCF_UN_FLT|BCF_UN_INFO) // all shared information - #define BCF_UN_FMT 8 // unpack format and each sample - #define BCF_UN_IND BCF_UN_FMT // a synonym of BCF_UN_FMT - #define BCF_UN_ALL (BCF_UN_SHR|BCF_UN_FMT) // everything - HTSLIB_EXPORT - int bcf_unpack(bcf1_t *b, int which); - - /* - * bcf_dup() - create a copy of BCF record. - * - * Note that bcf_unpack() must be called on the returned copy as if it was - * obtained from bcf_read(). Also note that bcf_dup() calls bcf_sync1(src) - * internally to reflect any changes made by bcf_update_* functions. - * - * The bcf1_t struct returned by a successful call should be freed - * via bcf_destroy() when it is no longer needed. - */ - HTSLIB_EXPORT - bcf1_t *bcf_dup(bcf1_t *src); - - HTSLIB_EXPORT - bcf1_t *bcf_copy(bcf1_t *dst, bcf1_t *src); - - /// Write one VCF or BCF record. The type is determined at the open() call. - /** @param fp The file to write to - @param h The header for the vcf/bcf file - @param v The bcf1_t structure to write - @return 0 on success; -1 on error - */ - HTSLIB_EXPORT - int bcf_write(htsFile *fp, bcf_hdr_t *h, bcf1_t *v) HTS_RESULT_USED; - - /** - * The following functions work only with VCFs and should rarely be called - * directly. Usually one wants to use their bcf_* alternatives, which work - * transparently with both VCFs and BCFs. - */ - /// Read a VCF format header - /** @param fp The file to read the header from - @return Pointer to a populated header structure on success; - NULL on failure - - Use bcf_hdr_read() instead. - - The bcf_hdr_t struct returned by a successful call should be freed - via bcf_hdr_destroy() when it is no longer needed. - */ - HTSLIB_EXPORT - bcf_hdr_t *vcf_hdr_read(htsFile *fp) HTS_RESULT_USED; - - /// Write a VCF format header - /** @param fp Output file - @param h The header to write - @return 0 on success; -1 on failure - - Use bcf_hdr_write() instead - */ - HTSLIB_EXPORT - int vcf_hdr_write(htsFile *fp, const bcf_hdr_t *h) HTS_RESULT_USED; - - /// Read a record from a VCF file - /** @param fp The file to read the record from - @param h The header for the vcf file - @param v The bcf1_t structure to populate - @return 0 on success; -1 on end of file; < -1 on error - - Use bcf_read() instead - */ - HTSLIB_EXPORT - int vcf_read(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v) HTS_RESULT_USED; - - /// Write a record to a VCF file - /** @param fp The file to write to - @param h The header for the vcf file - @param v The bcf1_t structure to write - @return 0 on success; -1 on error - - Use bcf_write() instead - */ - HTSLIB_EXPORT - int vcf_write(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v) HTS_RESULT_USED; - - /** Helper function for the bcf_itr_next() macro; internal use, ignore it */ - HTSLIB_EXPORT - int bcf_readrec(BGZF *fp, void *null, void *v, int *tid, hts_pos_t *beg, hts_pos_t *end); - - /// Write a line to a VCF file - /** @param line Line to write - @param fp File to write it to - @return 0 on success; -1 on failure - - @note No checks are done on the line being added, apart from - ensuring that it ends with a newline. This function - should therefore be used with care. - */ - HTSLIB_EXPORT - int vcf_write_line(htsFile *fp, kstring_t *line); - - /************************************************************************** - * Header querying and manipulation routines - **************************************************************************/ - - /** Create a new header using the supplied template - * - * The bcf_hdr_t struct returned by a successful call should be freed - * via bcf_hdr_destroy() when it is no longer needed. - * @return NULL on failure, header otherwise - */ - HTSLIB_EXPORT - bcf_hdr_t *bcf_hdr_dup(const bcf_hdr_t *hdr); - - /** - * Copy header lines from src to dst if not already present in dst. See also bcf_translate(). - * Returns 0 on success or sets a bit on error: - * 1 .. conflicting definitions of tag length - * // todo - */ - HTSLIB_EXPORT - int bcf_hdr_combine(bcf_hdr_t *dst, const bcf_hdr_t *src) HTS_DEPRECATED("Please use bcf_hdr_merge instead"); - - /** - * bcf_hdr_merge() - copy header lines from src to dst, see also bcf_translate() - * @param dst: the destination header to be merged into, NULL on the first pass - * @param src: the source header - * @return NULL on failure, header otherwise - * - * Notes: - * - use as: - * bcf_hdr_t *dst = NULL; - * for (i=0; in[BCF_DT_SAMPLE] - - - /** The following functions are for internal use and should rarely be called directly */ - HTSLIB_EXPORT - int bcf_hdr_parse(bcf_hdr_t *hdr, char *htxt); - - /// Synchronize internal header structures - /** @param h Header - @return 0 on success, -1 on failure - - This function updates the id, sample and contig arrays in the - bcf_hdr_t structure so that they point to the same locations as - the id, sample and contig dictionaries. - */ - HTSLIB_EXPORT - int bcf_hdr_sync(bcf_hdr_t *h) HTS_RESULT_USED; - - /** - * bcf_hdr_parse_line() - parse a single line of VCF textual header - * @param h BCF header struct - * @param line One or more lines of header text - * @param len Filled out with length data parsed from 'line'. - * @return bcf_hrec_t* on success; - * NULL on error or on end of header text. - * NB: to distinguish error from end-of-header, check *len: - * *len == 0 indicates @p line did not start with "##" - * *len == -1 indicates failure, likely due to out of memory - * *len > 0 indicates a malformed header line - * - * If *len > 0 on exit, it will contain the full length of the line - * including any trailing newline (this includes cases where NULL was - * returned due to a malformed line). Callers can use this to skip to - * the next header line. - */ - HTSLIB_EXPORT - bcf_hrec_t *bcf_hdr_parse_line(const bcf_hdr_t *h, const char *line, int *len); - /// Convert a bcf header record to string form - /** - * @param hrec Header record - * @param str Destination kstring - * @return 0 on success; < 0 on error - */ - HTSLIB_EXPORT - int bcf_hrec_format(const bcf_hrec_t *hrec, kstring_t *str); - - /// Add a header record into a header - /** - * @param hdr Destination header - * @param hrec Header record - * @return 0 on success, -1 on failure - * - * If this function returns success, ownership of @p hrec will have - * been transferred to the header structure. It may also have been - * freed if it was a duplicate of a record already in the header. - * Therefore the @p hrec pointer should not be used after a successful - * return from this function. - * - * If this function returns failure, ownership will not have been taken - * and the caller is responsible for cleaning up @p hrec. - */ - - HTSLIB_EXPORT - int bcf_hdr_add_hrec(bcf_hdr_t *hdr, bcf_hrec_t *hrec); - - /** - * bcf_hdr_get_hrec() - get header line info - * @param type: one of the BCF_HL_* types: FLT,INFO,FMT,CTG,STR,GEN - * @param key: the header key for generic lines (e.g. "fileformat"), any field - * for structured lines, typically "ID". - * @param value: the value which pairs with key. Can be be NULL for BCF_HL_GEN - * @param str_class: the class of BCF_HL_STR line (e.g. "ALT" or "SAMPLE"), otherwise NULL - */ - HTSLIB_EXPORT - bcf_hrec_t *bcf_hdr_get_hrec(const bcf_hdr_t *hdr, int type, const char *key, const char *value, const char *str_class); - - /// Duplicate a header record - /** @param hrec Header record to copy - @return A new header record on success; NULL on failure - - The bcf_hrec_t struct returned by a successful call should be freed - via bcf_hrec_destroy() when it is no longer needed. - */ - HTSLIB_EXPORT - bcf_hrec_t *bcf_hrec_dup(bcf_hrec_t *hrec); - - /// Add a new header record key - /** @param hrec Header record - @param str Key name - @param len Length of @p str - @return 0 on success; -1 on failure - */ - HTSLIB_EXPORT - int bcf_hrec_add_key(bcf_hrec_t *hrec, const char *str, size_t len) HTS_RESULT_USED; - - /// Set a header record value - /** @param hrec Header record - @param i Index of value - @param str Value to set - @param len Length of @p str - @param is_quoted Value should be quoted - @return 0 on success; -1 on failure - */ - HTSLIB_EXPORT - int bcf_hrec_set_val(bcf_hrec_t *hrec, int i, const char *str, size_t len, int is_quoted) HTS_RESULT_USED; - - HTSLIB_EXPORT - int bcf_hrec_find_key(bcf_hrec_t *hrec, const char *key); - - - /// Add an IDX header record - /** @param hrec Header record - @param idx IDX value to add - @return 0 on success; -1 on failure - */ - HTSLIB_EXPORT - int hrec_add_idx(bcf_hrec_t *hrec, int idx) HTS_RESULT_USED; - - /// Free up a header record and associated structures - /** @param hrec Header record - */ - HTSLIB_EXPORT - void bcf_hrec_destroy(bcf_hrec_t *hrec); - - - - /************************************************************************** - * Individual record querying and manipulation routines - **************************************************************************/ - - /** See the description of bcf_hdr_subset() */ - HTSLIB_EXPORT - int bcf_subset(const bcf_hdr_t *h, bcf1_t *v, int n, int *imap); - - /** - * bcf_translate() - translate tags ids to be consistent with different header. This function - * is useful when lines from multiple VCF need to be combined. - * @dst_hdr: the destination header, to be used in bcf_write(), see also bcf_hdr_combine() - * @src_hdr: the source header, used in bcf_read() - * @src_line: line obtained by bcf_read() - */ - HTSLIB_EXPORT - int bcf_translate(const bcf_hdr_t *dst_hdr, bcf_hdr_t *src_hdr, bcf1_t *src_line); - - /// Get variant types in a BCF record - /** - * @param rec BCF/VCF record - * @return Types of variant present - * - * The return value will be a bitwise-or of VCF_SNP, VCF_MNP, - * VCF_INDEL, VCF_OTHER, VCF_BND or VCF_OVERLAP. If will return - * VCF_REF (i.e. 0) if none of the other types is present. - * @deprecated Please use bcf_has_variant_types() instead - */ - HTSLIB_EXPORT - int bcf_get_variant_types(bcf1_t *rec); - - /// Get variant type in a BCF record, for a given allele - /** - * @param rec BCF/VCF record - * @param ith_allele Allele to check - * @return Type of variant present - * - * The return value will be one of VCF_REF, VCF_SNP, VCF_MNP, - * VCF_INDEL, VCF_OTHER, VCF_BND or VCF_OVERLAP. - * @deprecated Please use bcf_has_variant_type() instead - */ - HTSLIB_EXPORT - int bcf_get_variant_type(bcf1_t *rec, int ith_allele); - - /// Match mode for bcf_has_variant_types() - enum bcf_variant_match { - bcf_match_exact, ///< Types present exactly match tested for - bcf_match_overlap, ///< At least one variant type in common - bcf_match_subset, ///< Test set is a subset of types present - }; - - /// Check for presence of variant types in a BCF record - /** - * @param rec BCF/VCF record - * @param bitmask Set of variant types to test for - * @param mode Match mode - * @return >0 if the variant types are present, - * 0 if not present, - * -1 on error - * - * @p bitmask should be the bitwise-or of the variant types (VCF_SNP, - * VCF_MNP, etc.) to test for. - * - * The return value is the bitwise-and of the set of types present - * and @p bitmask. Callers that want to check for the presence of more - * than one type can avoid function call overhead by passing all the - * types to be checked for in a single call to this function, in - * bcf_match_overlap mode, and then check for them individually in the - * returned value. - * - * As VCF_REF is represented by 0 (i.e. the absence of other variants) - * it should be tested for using - * bcf_has_variant_types(rec, VCF_REF, bcf_match_exact) - * which will return 1 if no other variant type is present, otherwise 0. - */ - HTSLIB_EXPORT - int bcf_has_variant_types(bcf1_t *rec, uint32_t bitmask, enum bcf_variant_match mode); - - /// Check for presence of variant types in a BCF record, for a given allele - /** - * @param rec BCF/VCF record - * @param ith_allele Allele to check - * @param bitmask Set of variant types to test for - * @return >0 if one of the variant types is present, - * 0 if not present, - * -1 on error - * - * @p bitmask should be the bitwise-or of the variant types (VCF_SNP, - * VCF_MNP, etc.) to test for, or VCF_REF on its own. - * - * The return value is the bitwise-and of the set of types present - * and @p bitmask. Callers that want to check for the presence of more - * than one type can avoid function call overhead by passing all the - * types to be checked for in a single call to this function, and then - * check for them individually in the returned value. - * - * As a special case, if @p bitmask is VCF_REF (i.e. 0), the function - * tests for an exact match. The return value will be 1 if the - * variant type calculated for the allele is VCF_REF, otherwise if - * any other type is present it will be 0. - */ - HTSLIB_EXPORT - int bcf_has_variant_type(bcf1_t *rec, int ith_allele, uint32_t bitmask); - - /// Return the number of bases affected by a variant, for a given allele - /** - * @param rec BCF/VCF record - * @param ith_allele Allele index - * @return The number of bases affected (negative for deletions), - * or bcf_int32_missing on error. - */ - HTSLIB_EXPORT - int bcf_variant_length(bcf1_t *rec, int ith_allele); - - HTSLIB_EXPORT - int bcf_is_snp(bcf1_t *v); - - /** - * bcf_update_filter() - sets the FILTER column - * @flt_ids: The filter IDs to set, numeric IDs returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS") - * @n: Number of filters. If n==0, all filters are removed - */ - HTSLIB_EXPORT - int bcf_update_filter(const bcf_hdr_t *hdr, bcf1_t *line, int *flt_ids, int n); - /** - * bcf_add_filter() - adds to the FILTER column - * @flt_id: filter ID to add, numeric ID returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS") - * - * If flt_id is PASS, all existing filters are removed first. If other than PASS, existing PASS is removed. - */ - HTSLIB_EXPORT - int bcf_add_filter(const bcf_hdr_t *hdr, bcf1_t *line, int flt_id); - /** - * bcf_remove_filter() - removes from the FILTER column - * @flt_id: filter ID to remove, numeric ID returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS") - * @pass: when set to 1 and no filters are present, set to PASS - */ - HTSLIB_EXPORT - int bcf_remove_filter(const bcf_hdr_t *hdr, bcf1_t *line, int flt_id, int pass); - /** - * Returns 1 if present, 0 if absent, or -1 if filter does not exist. "PASS" and "." can be used interchangeably. - */ - HTSLIB_EXPORT - int bcf_has_filter(const bcf_hdr_t *hdr, bcf1_t *line, char *filter); - /** - * bcf_update_alleles() and bcf_update_alleles_str() - update REF and ALT column - * @alleles: Array of alleles - * @nals: Number of alleles - * @alleles_string: Comma-separated alleles, starting with the REF allele - */ - HTSLIB_EXPORT - int bcf_update_alleles(const bcf_hdr_t *hdr, bcf1_t *line, const char **alleles, int nals); - - HTSLIB_EXPORT - int bcf_update_alleles_str(const bcf_hdr_t *hdr, bcf1_t *line, const char *alleles_string); - - /** - * bcf_update_id() - sets new ID string - * bcf_add_id() - adds to the ID string checking for duplicates - */ - HTSLIB_EXPORT - int bcf_update_id(const bcf_hdr_t *hdr, bcf1_t *line, const char *id); - - HTSLIB_EXPORT - int bcf_add_id(const bcf_hdr_t *hdr, bcf1_t *line, const char *id); - - /** - * bcf_update_info_*() - functions for updating INFO fields - * @param hdr: the BCF header - * @param line: VCF line to be edited - * @param key: the INFO tag to be updated - * @param values: pointer to the array of values. Pass NULL to remove the tag. - * @param n: number of values in the array. When set to 0, the INFO tag is removed - * @return 0 on success or negative value on error. - * - * The @p string in bcf_update_info_flag() is optional, - * @p n indicates whether the flag is set or removed. - * - * Note that updating an END info tag will cause line->rlen to be - * updated as a side-effect (removing the tag will set it to the - * string length of the REF allele). If line->pos is being changed as - * well, it is important that this is done before calling - * bcf_update_info_int32() to update the END tag, otherwise rlen will be - * set incorrectly. If the new END value is less than or equal to - * line->pos, a warning will be printed and line->rlen will be set to - * the length of the REF allele. - */ - #define bcf_update_info_int32(hdr,line,key,values,n) bcf_update_info((hdr),(line),(key),(values),(n),BCF_HT_INT) - #define bcf_update_info_float(hdr,line,key,values,n) bcf_update_info((hdr),(line),(key),(values),(n),BCF_HT_REAL) - #define bcf_update_info_flag(hdr,line,key,string,n) bcf_update_info((hdr),(line),(key),(string),(n),BCF_HT_FLAG) - #define bcf_update_info_string(hdr,line,key,string) bcf_update_info((hdr),(line),(key),(string),1,BCF_HT_STR) - HTSLIB_EXPORT - int bcf_update_info(const bcf_hdr_t *hdr, bcf1_t *line, const char *key, const void *values, int n, int type); - - /// Set or update 64-bit integer INFO values - /** - * @param hdr: the BCF header - * @param line: VCF line to be edited - * @param key: the INFO tag to be updated - * @param values: pointer to the array of values. Pass NULL to remove the tag. - * @param n: number of values in the array. When set to 0, the INFO tag is removed - * @return 0 on success or negative value on error. - * - * This function takes an int64_t values array as input. The data - * actually stored will be shrunk to the minimum size that can - * accept all of the values. - * - * INFO values outside of the range BCF_MIN_BT_INT32 to BCF_MAX_BT_INT32 - * can only be written to VCF files. - */ - static inline int bcf_update_info_int64(const bcf_hdr_t *hdr, bcf1_t *line, - const char *key, - const int64_t *values, int n) - { - return bcf_update_info(hdr, line, key, values, n, BCF_HT_LONG); - } - - /* - * bcf_update_format_*() - functions for updating FORMAT fields - * @values: pointer to the array of values, the same number of elements - * is expected for each sample. Missing values must be padded - * with bcf_*_missing or bcf_*_vector_end values. - * @n: number of values in the array. If n==0, existing tag is removed. - * - * The function bcf_update_format_string() is a higher-level (slower) variant of - * bcf_update_format_char(). The former accepts array of \0-terminated strings - * whereas the latter requires that the strings are collapsed into a single array - * of fixed-length strings. In case of strings with variable length, shorter strings - * can be \0-padded. Note that the collapsed strings passed to bcf_update_format_char() - * are not \0-terminated. - * - * Returns 0 on success or negative value on error. - */ - #define bcf_update_format_int32(hdr,line,key,values,n) bcf_update_format((hdr),(line),(key),(values),(n),BCF_HT_INT) - #define bcf_update_format_float(hdr,line,key,values,n) bcf_update_format((hdr),(line),(key),(values),(n),BCF_HT_REAL) - #define bcf_update_format_char(hdr,line,key,values,n) bcf_update_format((hdr),(line),(key),(values),(n),BCF_HT_STR) - #define bcf_update_genotypes(hdr,line,gts,n) bcf_update_format((hdr),(line),"GT",(gts),(n),BCF_HT_INT) // See bcf_gt_ macros below - - HTSLIB_EXPORT - int bcf_update_format_string(const bcf_hdr_t *hdr, bcf1_t *line, const char *key, const char **values, int n); - - HTSLIB_EXPORT - int bcf_update_format(const bcf_hdr_t *hdr, bcf1_t *line, const char *key, const void *values, int n, int type); - - // Macros for setting genotypes correctly, for use with bcf_update_genotypes only; idx corresponds - // to VCF's GT (1-based index to ALT or 0 for the reference allele) and val is the opposite, obtained - // from bcf_get_genotypes() below. - #define bcf_gt_phased(idx) (((idx)+1)<<1|1) - #define bcf_gt_unphased(idx) (((idx)+1)<<1) - #define bcf_gt_missing 0 - #define bcf_gt_is_missing(val) ((val)>>1 ? 0 : 1) - #define bcf_gt_is_phased(idx) ((idx)&1) - #define bcf_gt_allele(val) (((val)>>1)-1) - - /** Conversion between alleles indexes to Number=G genotype index (assuming diploid, all 0-based) */ - #define bcf_alleles2gt(a,b) ((a)>(b)?((a)*((a)+1)/2+(b)):((b)*((b)+1)/2+(a))) - static inline void bcf_gt2alleles(int igt, int *a, int *b) - { - int k = 0, dk = 1; - while ( k=0 on success - * -1 .. no such INFO tag defined in the header - * -2 .. clash between types defined in the header and encountered in the VCF record - * -3 .. tag is not present in the VCF record - * -4 .. the operation could not be completed (e.g. out of memory) - * - * Returns negative value on error or the number of values (including - * missing values) put in *dst on success. bcf_get_info_string() returns - * on success the number of characters stored excluding the nul- - * terminating byte. bcf_get_info_flag() does not store anything in *dst - * but returns 1 if the flag is set or 0 if not. - * - * *dst will be reallocated if it is not big enough (i.e. *ndst is too - * small) or NULL on entry. The new size will be stored in *ndst. - */ - #define bcf_get_info_int32(hdr,line,tag,dst,ndst) bcf_get_info_values(hdr,line,tag,(void**)(dst),ndst,BCF_HT_INT) - #define bcf_get_info_float(hdr,line,tag,dst,ndst) bcf_get_info_values(hdr,line,tag,(void**)(dst),ndst,BCF_HT_REAL) - #define bcf_get_info_string(hdr,line,tag,dst,ndst) bcf_get_info_values(hdr,line,tag,(void**)(dst),ndst,BCF_HT_STR) - #define bcf_get_info_flag(hdr,line,tag,dst,ndst) bcf_get_info_values(hdr,line,tag,(void**)(dst),ndst,BCF_HT_FLAG) - - HTSLIB_EXPORT - int bcf_get_info_values(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, void **dst, int *ndst, int type); - - /// Put integer INFO values into an int64_t array - /** - * @param hdr: BCF header - * @param line: BCF record - * @param tag: INFO tag to retrieve - * @param dst: *dst is pointer to a memory location, can point to NULL - * @param ndst: pointer to the size of allocated memory - * @return >=0 on success - * -1 .. no such INFO tag defined in the header - * -2 .. clash between types defined in the header and encountered in the VCF record - * -3 .. tag is not present in the VCF record - * -4 .. the operation could not be completed (e.g. out of memory) - * - * Returns negative value on error or the number of values (including - * missing values) put in *dst on success. - * - * *dst will be reallocated if it is not big enough (i.e. *ndst is too - * small) or NULL on entry. The new size will be stored in *ndst. - */ - static inline int bcf_get_info_int64(const bcf_hdr_t *hdr, bcf1_t *line, - const char *tag, int64_t **dst, - int *ndst) - { - return bcf_get_info_values(hdr, line, tag, - (void **) dst, ndst, BCF_HT_LONG); - } - - /** - * bcf_get_format_*() - same as bcf_get_info*() above - * - * The function bcf_get_format_string() is a higher-level (slower) variant of bcf_get_format_char(). - * see the description of bcf_update_format_string() and bcf_update_format_char() above. - * Unlike other bcf_get_format__*() functions, bcf_get_format_string() allocates two arrays: - * a single block of \0-terminated strings collapsed into a single array and an array of pointers - * to these strings. Both arrays must be cleaned by the user. - * - * Returns negative value on error or the number of written values on success. - * - * Use the returned number of written values for accessing valid entries of dst, as ndst is only a - * watermark that can be higher than the returned value, i.e. the end of dst can contain carry-over - * values from previous calls to bcf_get_format_*() on lines with more values per sample. - * - * Example: - * int ndst = 0; char **dst = NULL; - * if ( bcf_get_format_string(hdr, line, "XX", &dst, &ndst) > 0 ) - * for (i=0; iid[type][int_id].key) - - /** - * bcf_hdr_name2id() - Translates sequence names (chromosomes) into numeric ID - * bcf_hdr_id2name() - Translates numeric ID to sequence name - */ - static inline int bcf_hdr_name2id(const bcf_hdr_t *hdr, const char *id) { return bcf_hdr_id2int(hdr, BCF_DT_CTG, id); } - static inline const char *bcf_hdr_id2name(const bcf_hdr_t *hdr, int rid) - { - if ( !hdr || rid<0 || rid>=hdr->n[BCF_DT_CTG] ) return NULL; - return hdr->id[BCF_DT_CTG][rid].key; - } - static inline const char *bcf_seqname(const bcf_hdr_t *hdr, const bcf1_t *rec) { - return bcf_hdr_id2name(hdr, rec ? rec->rid : -1); - } - - /** Return CONTIG name, or "(unknown)" - - Like bcf_seqname(), but this function will never return NULL. If - the contig name cannot be found (either because @p hdr was not - supplied or rec->rid was out of range) it returns the string - "(unknown)". - */ - static inline const char *bcf_seqname_safe(const bcf_hdr_t *hdr, const bcf1_t *rec) { - const char *name = bcf_seqname(hdr, rec); - return name ? name : "(unknown)"; - } - - /** - * bcf_hdr_id2*() - Macros for accessing bcf_idinfo_t - * @type: one of BCF_HL_FLT, BCF_HL_INFO, BCF_HL_FMT - * @int_id: return value of bcf_hdr_id2int, must be >=0 - * - * The returned values are: - * bcf_hdr_id2length .. whether the number of values is fixed or variable, one of BCF_VL_* - * bcf_hdr_id2number .. the number of values, 0xfffff for variable length fields - * bcf_hdr_id2type .. the field type, one of BCF_HT_* - * bcf_hdr_id2coltype .. the column type, one of BCF_HL_* - * - * Notes: Prior to using the macros, the presence of the info should be - * tested with bcf_hdr_idinfo_exists(). - */ - #define bcf_hdr_id2length(hdr,type,int_id) ((hdr)->id[BCF_DT_ID][int_id].val->info[type]>>8 & 0xf) - #define bcf_hdr_id2number(hdr,type,int_id) ((hdr)->id[BCF_DT_ID][int_id].val->info[type]>>12) - #define bcf_hdr_id2type(hdr,type,int_id) (uint32_t)((hdr)->id[BCF_DT_ID][int_id].val->info[type]>>4 & 0xf) - #define bcf_hdr_id2coltype(hdr,type,int_id) (uint32_t)((hdr)->id[BCF_DT_ID][int_id].val->info[type] & 0xf) - #define bcf_hdr_idinfo_exists(hdr,type,int_id) ((int_id)>=0 && (int_id)<(hdr)->n[BCF_DT_ID] && (hdr)->id[BCF_DT_ID][int_id].val && bcf_hdr_id2coltype((hdr),(type),(int_id))!=0xf) - #define bcf_hdr_id2hrec(hdr,dict_type,col_type,int_id) ((hdr)->id[(dict_type)==BCF_DT_CTG?BCF_DT_CTG:BCF_DT_ID][int_id].val->hrec[(dict_type)==BCF_DT_CTG?0:(col_type)]) - /// Convert BCF FORMAT data to string form - /** - * @param s kstring to write into - * @param n number of items in @p data - * @param type type of items in @p data - * @param data BCF format data - * @return 0 on success - * -1 if out of memory - */ - HTSLIB_EXPORT - int bcf_fmt_array(kstring_t *s, int n, int type, void *data); - - HTSLIB_EXPORT - uint8_t *bcf_fmt_sized_array(kstring_t *s, uint8_t *ptr); - - /// Encode a variable-length char array in BCF format - /** - * @param s kstring to write into - * @param l length of input - * @param a input data to encode - * @return 0 on success; < 0 on error - */ - HTSLIB_EXPORT - int bcf_enc_vchar(kstring_t *s, int l, const char *a); - - /// Encode a variable-length integer array in BCF format - /** - * @param s kstring to write into - * @param n total number of items in @p a (<= 0 to encode BCF_BT_NULL) - * @param a input data to encode - * @param wsize vector length (<= 0 is equivalent to @p n) - * @return 0 on success; < 0 on error - * @note @p n should be an exact multiple of @p wsize - */ - HTSLIB_EXPORT - int bcf_enc_vint(kstring_t *s, int n, int32_t *a, int wsize); - - /// Encode a variable-length float array in BCF format - /** - * @param s kstring to write into - * @param n total number of items in @p a (<= 0 to encode BCF_BT_NULL) - * @param a input data to encode - * @return 0 on success; < 0 on error - */ - HTSLIB_EXPORT - int bcf_enc_vfloat(kstring_t *s, int n, float *a); - - - /************************************************************************** - * BCF index - * - * Note that these functions work with BCFs only. See synced_bcf_reader.h - * which provides (amongst other things) an API to work transparently with - * both indexed BCFs and VCFs. - **************************************************************************/ - - #define bcf_itr_destroy(iter) hts_itr_destroy(iter) - #define bcf_itr_queryi(idx, tid, beg, end) hts_itr_query((idx), (tid), (beg), (end), bcf_readrec) - #define bcf_itr_querys(idx, hdr, s) hts_itr_querys((idx), (s), (hts_name2id_f)(bcf_hdr_name2id), (hdr), hts_itr_query, bcf_readrec) - - static inline int bcf_itr_next(htsFile *htsfp, hts_itr_t *itr, void *r) { - if (htsfp->is_bgzf) - return hts_itr_next(htsfp->fp.bgzf, itr, r, 0); - - hts_log_error("Only bgzf compressed files can be used with iterators"); - errno = EINVAL; - return -2; - } -/// Load a BCF index -/** @param fn BCF file name - @return The index, or NULL if an error occurred. - @note This only works for BCF files. Consider synced_bcf_reader instead -which works for both BCF and VCF. -*/ - #define bcf_index_load(fn) hts_idx_load(fn, HTS_FMT_CSI) - #define bcf_index_seqnames(idx, hdr, nptr) hts_idx_seqnames((idx),(nptr),(hts_id2name_f)(bcf_hdr_id2name),(hdr)) - -/// Load a BCF index from a given index file name -/** @param fn Input BAM/BCF/etc filename - @param fnidx The input index filename - @return The index, or NULL if an error occurred. - @note This only works for BCF files. Consider synced_bcf_reader instead -which works for both BCF and VCF. -*/ - HTSLIB_EXPORT - hts_idx_t *bcf_index_load2(const char *fn, const char *fnidx); - -/// Load a BCF index from a given index file name -/** @param fn Input BAM/BCF/etc filename - @param fnidx The input index filename - @param flags Flags to alter behaviour (see description) - @return The index, or NULL if an error occurred. - @note This only works for BCF files. Consider synced_bcf_reader instead -which works for both BCF and VCF. - - The @p flags parameter can be set to a combination of the following - values: - - HTS_IDX_SAVE_REMOTE Save a local copy of any remote indexes - HTS_IDX_SILENT_FAIL Fail silently if the index is not present - - Equivalent to hts_idx_load3(fn, fnidx, HTS_FMT_CSI, flags); -*/ - HTSLIB_EXPORT - hts_idx_t *bcf_index_load3(const char *fn, const char *fnidx, int flags); - - /** - * bcf_index_build() - Generate and save an index file - * @fn: Input VCF(compressed)/BCF filename - * @min_shift: log2(width of the smallest bin), e.g. a value of 14 - * imposes a 16k base lower limit on the width of index bins. - * Positive to generate CSI, or 0 to generate TBI. However, a small - * value of min_shift would create a large index, which would lead to - * reduced performance when using the index. A recommended value is 14. - * For BCF files, only the CSI index can be generated. - * - * Returns 0 if successful, or negative if an error occurred. - * - * List of error codes: - * -1 .. indexing failed - * -2 .. opening @fn failed - * -3 .. format not indexable - * -4 .. failed to create and/or save the index - */ - HTSLIB_EXPORT - int bcf_index_build(const char *fn, int min_shift); - - /** - * bcf_index_build2() - Generate and save an index to a specific file - * @fn: Input VCF/BCF filename - * @fnidx: Output filename, or NULL to add .csi/.tbi to @fn - * @min_shift: Positive to generate CSI, or 0 to generate TBI - * - * Returns 0 if successful, or negative if an error occurred. - * - * List of error codes: - * -1 .. indexing failed - * -2 .. opening @fn failed - * -3 .. format not indexable - * -4 .. failed to create and/or save the index - */ - HTSLIB_EXPORT - int bcf_index_build2(const char *fn, const char *fnidx, int min_shift); - - /** - * bcf_index_build3() - Generate and save an index to a specific file - * @fn: Input VCF/BCF filename - * @fnidx: Output filename, or NULL to add .csi/.tbi to @fn - * @min_shift: Positive to generate CSI, or 0 to generate TBI - * @n_threads: Number of VCF/BCF decoder threads - * - * Returns 0 if successful, or negative if an error occurred. - * - * List of error codes: - * -1 .. indexing failed - * -2 .. opening @fn failed - * -3 .. format not indexable - * -4 .. failed to create and/or save the index - */ - HTSLIB_EXPORT - int bcf_index_build3(const char *fn, const char *fnidx, int min_shift, int n_threads); - - /// Initialise fp->idx for the current format type, for VCF and BCF files. - /** @param fp File handle for the data file being written. - @param h BCF header structured (needed for BAI and CSI). - @param min_shift CSI bin size (CSI default is 14). - @param fnidx Filename to write index to. This pointer must remain valid - until after bcf_idx_save is called. - @return 0 on success, <0 on failure. - @note This must be called after the header has been written, but before - any other data. - */ - HTSLIB_EXPORT - int bcf_idx_init(htsFile *fp, bcf_hdr_t *h, int min_shift, const char *fnidx); - - /// Writes the index initialised with bcf_idx_init to disk. - /** @param fp File handle for the data file being written. - @return 0 on success, <0 on failure. - */ - HTSLIB_EXPORT - int bcf_idx_save(htsFile *fp); - -/******************* - * Typed value I/O * - *******************/ - -/* - Note that in contrast with BCFv2.1 specification, HTSlib implementation - allows missing values in vectors. For integer types, the values 0x80, - 0x8000, 0x80000000 are interpreted as missing values and 0x81, 0x8001, - 0x80000001 as end-of-vector indicators. Similarly for floats, the value of - 0x7F800001 is interpreted as a missing value and 0x7F800002 as an - end-of-vector indicator. - Note that the end-of-vector byte is not part of the vector. - - This trial BCF version (v2.2) is compatible with the VCF specification and - enables to handle correctly vectors with different ploidy in presence of - missing values. - */ -#define bcf_int8_vector_end (-127) /* INT8_MIN + 1 */ -#define bcf_int16_vector_end (-32767) /* INT16_MIN + 1 */ -#define bcf_int32_vector_end (-2147483647) /* INT32_MIN + 1 */ -#define bcf_int64_vector_end (-9223372036854775807LL) /* INT64_MIN + 1 */ -#define bcf_str_vector_end 0 -#define bcf_int8_missing (-128) /* INT8_MIN */ -#define bcf_int16_missing (-32767-1) /* INT16_MIN */ -#define bcf_int32_missing (-2147483647-1) /* INT32_MIN */ -#define bcf_int64_missing (-9223372036854775807LL - 1LL) /* INT64_MIN */ -#define bcf_str_missing 0x07 - -// Limits on BCF values stored in given types. Max values are the same -// as for the underlying type. Min values are slightly different as -// the last 8 values for each type were reserved by BCFv2.2. -#define BCF_MAX_BT_INT8 (0x7f) /* INT8_MAX */ -#define BCF_MAX_BT_INT16 (0x7fff) /* INT16_MAX */ -#define BCF_MAX_BT_INT32 (0x7fffffff) /* INT32_MAX */ -#define BCF_MIN_BT_INT8 (-120) /* INT8_MIN + 8 */ -#define BCF_MIN_BT_INT16 (-32760) /* INT16_MIN + 8 */ -#define BCF_MIN_BT_INT32 (-2147483640) /* INT32_MIN + 8 */ - -HTSLIB_EXPORT -extern uint32_t bcf_float_vector_end; -HTSLIB_EXPORT -extern uint32_t bcf_float_missing; -static inline void bcf_float_set(float *ptr, uint32_t value) -{ - union { uint32_t i; float f; } u; - u.i = value; - *ptr = u.f; -} -#define bcf_float_set_vector_end(x) bcf_float_set(&(x),bcf_float_vector_end) -#define bcf_float_set_missing(x) bcf_float_set(&(x),bcf_float_missing) -static inline int bcf_float_is_missing(float f) -{ - union { uint32_t i; float f; } u; - u.f = f; - return u.i==bcf_float_missing ? 1 : 0; -} -static inline int bcf_float_is_vector_end(float f) -{ - union { uint32_t i; float f; } u; - u.f = f; - return u.i==bcf_float_vector_end ? 1 : 0; -} - -static inline int bcf_format_gt(bcf_fmt_t *fmt, int isample, kstring_t *str) -{ - uint32_t e = 0; - #define BRANCH(type_t, convert, missing, vector_end) { \ - uint8_t *ptr = fmt->p + isample*fmt->size; \ - int i; \ - for (i=0; in; i++, ptr += sizeof(type_t)) \ - { \ - type_t val = convert(ptr); \ - if ( val == vector_end ) break; \ - if ( i ) e |= kputc("/|"[val&1], str) < 0; \ - if ( !(val>>1) ) e |= kputc('.', str) < 0; \ - else e |= kputw((val>>1) - 1, str) < 0; \ - } \ - if (i == 0) e |= kputc('.', str) < 0; \ - } - switch (fmt->type) { - case BCF_BT_INT8: BRANCH(int8_t, le_to_i8, bcf_int8_missing, bcf_int8_vector_end); break; - case BCF_BT_INT16: BRANCH(int16_t, le_to_i16, bcf_int16_missing, bcf_int16_vector_end); break; - case BCF_BT_INT32: BRANCH(int32_t, le_to_i32, bcf_int32_missing, bcf_int32_vector_end); break; - case BCF_BT_NULL: e |= kputc('.', str) < 0; break; - default: hts_log_error("Unexpected type %d", fmt->type); return -2; - } - #undef BRANCH - return e == 0 ? 0 : -1; -} - -static inline int bcf_enc_size(kstring_t *s, int size, int type) -{ - uint32_t e = 0; - uint8_t x[4]; - if (size >= 15) { - e |= kputc(15<<4|type, s) < 0; - if (size >= 128) { - if (size >= 32768) { - i32_to_le(size, x); - e |= kputc(1<<4|BCF_BT_INT32, s) < 0; - e |= kputsn((char*)&x, 4, s) < 0; - } else { - i16_to_le(size, x); - e |= kputc(1<<4|BCF_BT_INT16, s) < 0; - e |= kputsn((char*)&x, 2, s) < 0; - } - } else { - e |= kputc(1<<4|BCF_BT_INT8, s) < 0; - e |= kputc(size, s) < 0; - } - } else e |= kputc(size<<4|type, s) < 0; - return e == 0 ? 0 : -1; -} - -static inline int bcf_enc_inttype(long x) -{ - if (x <= BCF_MAX_BT_INT8 && x >= BCF_MIN_BT_INT8) return BCF_BT_INT8; - if (x <= BCF_MAX_BT_INT16 && x >= BCF_MIN_BT_INT16) return BCF_BT_INT16; - return BCF_BT_INT32; -} - -static inline int bcf_enc_int1(kstring_t *s, int32_t x) -{ - uint32_t e = 0; - uint8_t z[4]; - if (x == bcf_int32_vector_end) { - e |= bcf_enc_size(s, 1, BCF_BT_INT8); - e |= kputc(bcf_int8_vector_end, s) < 0; - } else if (x == bcf_int32_missing) { - e |= bcf_enc_size(s, 1, BCF_BT_INT8); - e |= kputc(bcf_int8_missing, s) < 0; - } else if (x <= BCF_MAX_BT_INT8 && x >= BCF_MIN_BT_INT8) { - e |= bcf_enc_size(s, 1, BCF_BT_INT8); - e |= kputc(x, s) < 0; - } else if (x <= BCF_MAX_BT_INT16 && x >= BCF_MIN_BT_INT16) { - i16_to_le(x, z); - e |= bcf_enc_size(s, 1, BCF_BT_INT16); - e |= kputsn((char*)&z, 2, s) < 0; - } else { - i32_to_le(x, z); - e |= bcf_enc_size(s, 1, BCF_BT_INT32); - e |= kputsn((char*)&z, 4, s) < 0; - } - return e == 0 ? 0 : -1; -} - -/// Return the value of a single typed integer. -/** @param p Pointer to input data block. - @param type One of the BCF_BT_INT* type codes - @param[out] q Location to store an updated value for p - @return The integer value, or zero if @p type is not valid. - -If @p type is not one of BCF_BT_INT8, BCF_BT_INT16, BCF_BT_INT32 or -BCF_BT_INT64, zero will be returned and @p *q will not be updated. -Otherwise, the integer value will be returned and @p *q will be set -to the memory location immediately following the integer value. - -Cautious callers can detect invalid type codes by checking that *q has -actually been updated. -*/ - -static inline int64_t bcf_dec_int1(const uint8_t *p, int type, uint8_t **q) -{ - if (type == BCF_BT_INT8) { - *q = (uint8_t*)p + 1; - return le_to_i8(p); - } else if (type == BCF_BT_INT16) { - *q = (uint8_t*)p + 2; - return le_to_i16(p); - } else if (type == BCF_BT_INT32) { - *q = (uint8_t*)p + 4; - return le_to_i32(p); - } else if (type == BCF_BT_INT64) { - *q = (uint8_t*)p + 8; - return le_to_i64(p); - } else { // Invalid type. - return 0; - } -} - -/// Return the value of a single typed integer from a byte stream. -/** @param p Pointer to input data block. - @param[out] q Location to store an updated value for p - @return The integer value, or zero if the type code was not valid. - -Reads a one-byte type code from @p p, and uses it to decode an integer -value from the following bytes in @p p. - -If the type is not one of BCF_BT_INT8, BCF_BT_INT16 or BCF_BT_INT32, zero -will be returned and @p *q will unchanged. Otherwise, the integer value will -be returned and @p *q will be set to the memory location immediately following -the integer value. - -Cautious callers can detect invalid type codes by checking that *q has -actually been updated. -*/ -static inline int64_t bcf_dec_typed_int1(const uint8_t *p, uint8_t **q) -{ - return bcf_dec_int1(p + 1, *p&0xf, q); -} - -static inline int32_t bcf_dec_size(const uint8_t *p, uint8_t **q, int *type) -{ - *type = *p & 0xf; - if (*p>>4 != 15) { - *q = (uint8_t*)p + 1; - return *p>>4; - } else return bcf_dec_typed_int1(p + 1, q); -} - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.18/kstring.c b/src/htslib-1.18/kstring.c deleted file mode 100644 index 71facf9..0000000 --- a/src/htslib-1.18/kstring.c +++ /dev/null @@ -1,444 +0,0 @@ -/* The MIT License - - Copyright (C) 2011 by Attractive Chaos - Copyright (C) 2013-2018, 2020-2021 Genome Research Ltd. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include "htslib/kstring.h" - -int kputd(double d, kstring_t *s) { - int len = 0; - char buf[21], *cp = buf+20, *ep; - if (d == 0) { - if (signbit(d)) { - kputsn("-0",2,s); - return 2; - } else { - kputsn("0",1,s); - return 1; - } - } - - if (d < 0) { - kputc('-',s); - len = 1; - d=-d; - } - if (!(d >= 0.0001 && d <= 999999)) { - if (ks_resize(s, s->l + 50) < 0) - return EOF; - // We let stdio handle the exponent cases - int s2 = snprintf(s->s + s->l, s->m - s->l, "%g", d); - len += s2; - s->l += s2; - return len; - } - - uint64_t i = d*10000000000LL; - // Correction for rounding - rather ugly - - // Optimised for small numbers. - // Better still would be __builtin_clz on hi/lo 32 and get the - // starting point very rapidly. - if (d<.0001) - i+=0; - else if (d<0.001) - i+=5; - else if (d < 0.01) - i+=50; - else if (d < 0.1) - i+=500; - else if (d < 1) - i+=5000; - else if (d < 10) - i+=50000; - else if (d < 100) - i+=500000; - else if (d < 1000) - i+=5000000; - else if (d < 10000) - i+=50000000; - else if (d < 100000) - i+=500000000; - else - i+=5000000000LL; - - do { - *--cp = '0' + i%10; - i /= 10; - } while (i >= 1); - buf[20] = 0; - int p = buf+20-cp; - if (p <= 10) { // d < 1 - //assert(d/1); - cp[6] = 0; ep = cp+5;// 6 precision - while (p < 10) { - *--cp = '0'; - p++; - } - *--cp = '.'; - *--cp = '0'; - } else { - char *xp = --cp; - while (p > 10) { - xp[0] = xp[1]; - p--; - xp++; - } - xp[0] = '.'; - cp[7] = 0; ep=cp+6; - if (cp[6] == '.') cp[6] = 0; - } - - // Cull trailing zeros - while (*ep == '0' && ep > cp) - ep--; - char *z = ep+1; - while (ep > cp) { - if (*ep == '.') { - if (z[-1] == '.') - z[-1] = 0; - else - z[0] = 0; - break; - } - ep--; - } - - int sl = strlen(cp); - len += sl; - kputsn(cp, sl, s); - return len; -} - -int kvsprintf(kstring_t *s, const char *fmt, va_list ap) -{ - va_list args; - int l; - va_copy(args, ap); - - if (fmt[0] == '%' && fmt[1] == 'g' && fmt[2] == 0) { - double d = va_arg(args, double); - l = kputd(d, s); - va_end(args); - return l; - } - - if (!s->s) { - const size_t sz = 64; - s->s = malloc(sz); - if (!s->s) - return -1; - s->m = sz; - s->l = 0; - } - - l = vsnprintf(s->s + s->l, s->m - s->l, fmt, args); // This line does not work with glibc 2.0. See `man snprintf'. - va_end(args); - if (l + 1 > s->m - s->l) { - if (ks_resize(s, s->l + l + 2) < 0) - return -1; - va_copy(args, ap); - l = vsnprintf(s->s + s->l, s->m - s->l, fmt, args); - va_end(args); - } - s->l += l; - return l; -} - -int ksprintf(kstring_t *s, const char *fmt, ...) -{ - va_list ap; - int l; - va_start(ap, fmt); - l = kvsprintf(s, fmt, ap); - va_end(ap); - return l; -} - -char *kstrtok(const char *str, const char *sep_in, ks_tokaux_t *aux) -{ - const unsigned char *p, *start, *sep = (unsigned char *) sep_in; - if (sep) { // set up the table - if (str == 0 && aux->finished) return 0; // no need to set up if we have finished - aux->finished = 0; - if (sep[0] && sep[1]) { - aux->sep = -1; - aux->tab[0] = aux->tab[1] = aux->tab[2] = aux->tab[3] = 0; - for (p = sep; *p; ++p) aux->tab[*p>>6] |= 1ull<<(*p&0x3f); - } else aux->sep = sep[0]; - } - if (aux->finished) return 0; - else if (str) start = (unsigned char *) str, aux->finished = 0; - else start = (unsigned char *) aux->p + 1; - if (aux->sep < 0) { - for (p = start; *p; ++p) - if (aux->tab[*p>>6]>>(*p&0x3f)&1) break; - } else { - for (p = start; *p; ++p) - if (*p == aux->sep) break; - } - aux->p = (const char *) p; // end of token - if (*p == 0) aux->finished = 1; // no more tokens - return (char*)start; -} - -// s MUST BE a null terminated string; l = strlen(s) -int ksplit_core(char *s, int delimiter, int *_max, int **_offsets) -{ - int i, n, max, last_char, last_start, *offsets, l; - n = 0; max = *_max; offsets = *_offsets; - l = strlen(s); - -#define __ksplit_aux do { \ - if (_offsets) { \ - s[i] = 0; \ - if (n == max) { \ - int *tmp; \ - max = max? max<<1 : 2; \ - if ((tmp = (int*)realloc(offsets, sizeof(int) * max))) { \ - offsets = tmp; \ - } else { \ - free(offsets); \ - *_offsets = NULL; \ - return 0; \ - } \ - } \ - offsets[n++] = last_start; \ - } else ++n; \ - } while (0) - - for (i = 0, last_char = last_start = 0; i <= l; ++i) { - if (delimiter == 0) { - if (isspace((int)((unsigned char) s[i])) || s[i] == 0) { - if (isgraph(last_char)) - __ksplit_aux; // the end of a field - } else { - if (isspace(last_char) || last_char == 0) - last_start = i; - } - } else { - if (s[i] == delimiter || s[i] == 0) { - if (last_char != 0 && last_char != delimiter) __ksplit_aux; // the end of a field - } else { - if (last_char == delimiter || last_char == 0) last_start = i; - } - } - last_char = (int)((unsigned char)s[i]); - } - *_max = max; *_offsets = offsets; - return n; -} - -int kgetline(kstring_t *s, kgets_func *fgets_fn, void *fp) -{ - size_t l0 = s->l; - - while (s->l == l0 || s->s[s->l-1] != '\n') { - if (s->m - s->l < 200) { - if (ks_resize(s, s->m + 200) < 0) - return EOF; - } - if (fgets_fn(s->s + s->l, s->m - s->l, fp) == NULL) break; - s->l += strlen(s->s + s->l); - } - - if (s->l == l0) return EOF; - - if (s->l > l0 && s->s[s->l-1] == '\n') { - s->l--; - if (s->l > l0 && s->s[s->l-1] == '\r') s->l--; - } - s->s[s->l] = '\0'; - return 0; -} - -int kgetline2(kstring_t *s, kgets_func2 *fgets_fn, void *fp) -{ - size_t l0 = s->l; - - while (s->l == l0 || s->s[s->l-1] != '\n') { - if (s->m - s->l < 200) { - // We return EOF for both EOF and error and the caller - // needs to check for errors in fp, and we haven't - // even got there yet. - // - // The only way of propagating memory errors is to - // deliberately call something that we know triggers - // and error so fp is also set. This works for - // hgets, but not for gets where reading <= 0 bytes - // isn't an error. - if (ks_resize(s, s->m + 200) < 0) { - fgets_fn(s->s + s->l, 0, fp); - return EOF; - } - } - ssize_t len = fgets_fn(s->s + s->l, s->m - s->l, fp); - if (len <= 0) break; - s->l += len; - } - - if (s->l == l0) return EOF; - - if (s->l > l0 && s->s[s->l-1] == '\n') { - s->l--; - if (s->l > l0 && s->s[s->l-1] == '\r') s->l--; - } - s->s[s->l] = '\0'; - return 0; -} - -/********************** - * Boyer-Moore search * - **********************/ - -typedef unsigned char ubyte_t; - -// reference: http://www-igm.univ-mlv.fr/~lecroq/string/node14.html -static int *ksBM_prep(const ubyte_t *pat, int m) -{ - int i, *suff, *prep, *bmGs, *bmBc; - prep = (int*)calloc(m + 256, sizeof(int)); - if (!prep) return NULL; - bmGs = prep; bmBc = prep + m; - { // preBmBc() - for (i = 0; i < 256; ++i) bmBc[i] = m; - for (i = 0; i < m - 1; ++i) bmBc[pat[i]] = m - i - 1; - } - suff = (int*)calloc(m, sizeof(int)); - if (!suff) { free(prep); return NULL; } - { // suffixes() - int f = 0, g; - suff[m - 1] = m; - g = m - 1; - for (i = m - 2; i >= 0; --i) { - if (i > g && suff[i + m - 1 - f] < i - g) - suff[i] = suff[i + m - 1 - f]; - else { - if (i < g) g = i; - f = i; - while (g >= 0 && pat[g] == pat[g + m - 1 - f]) --g; - suff[i] = f - g; - } - } - } - { // preBmGs() - int j = 0; - for (i = 0; i < m; ++i) bmGs[i] = m; - for (i = m - 1; i >= 0; --i) - if (suff[i] == i + 1) - for (; j < m - 1 - i; ++j) - if (bmGs[j] == m) - bmGs[j] = m - 1 - i; - for (i = 0; i <= m - 2; ++i) - bmGs[m - 1 - suff[i]] = m - 1 - i; - } - free(suff); - return prep; -} - -void *kmemmem(const void *_str, int n, const void *_pat, int m, int **_prep) -{ - int i, j, *prep = 0, *bmGs, *bmBc; - const ubyte_t *str, *pat; - str = (const ubyte_t*)_str; pat = (const ubyte_t*)_pat; - prep = (_prep == 0 || *_prep == 0)? ksBM_prep(pat, m) : *_prep; - if (!prep) return NULL; - if (_prep && *_prep == 0) *_prep = prep; - bmGs = prep; bmBc = prep + m; - j = 0; - while (j <= n - m) { - for (i = m - 1; i >= 0 && pat[i] == str[i+j]; --i); - if (i >= 0) { - int max = bmBc[str[i+j]] - m + 1 + i; - if (max < bmGs[i]) max = bmGs[i]; - j += max; - } else return (void*)(str + j); - } - if (_prep == 0) free(prep); - return 0; -} - -char *kstrstr(const char *str, const char *pat, int **_prep) -{ - return (char*)kmemmem(str, strlen(str), pat, strlen(pat), _prep); -} - -char *kstrnstr(const char *str, const char *pat, int n, int **_prep) -{ - return (char*)kmemmem(str, n, pat, strlen(pat), _prep); -} - -/*********************** - * The main() function * - ***********************/ - -#ifdef KSTRING_MAIN -#include -int main() -{ - kstring_t *s; - int *fields, n, i; - ks_tokaux_t aux; - char *p; - s = (kstring_t*)calloc(1, sizeof(kstring_t)); - // test ksprintf() - ksprintf(s, " abcdefg: %d ", 100); - printf("'%s'\n", s->s); - // test ksplit() - fields = ksplit(s, 0, &n); - for (i = 0; i < n; ++i) - printf("field[%d] = '%s'\n", i, s->s + fields[i]); - // test kstrtok() - s->l = 0; - for (p = kstrtok("ab:cde:fg/hij::k", ":/", &aux); p; p = kstrtok(0, 0, &aux)) { - kputsn(p, aux.p - p, s); - kputc('\n', s); - } - printf("%s", s->s); - // free - free(s->s); free(s); free(fields); - - { - static char *str = "abcdefgcdgcagtcakcdcd"; - static char *pat = "cd"; - char *ret, *s = str; - int *prep = 0; - while ((ret = kstrstr(s, pat, &prep)) != 0) { - printf("match: %s\n", ret); - s = ret + prep[0]; - } - free(prep); - } - return 0; -} -#endif diff --git a/src/htslib-1.18/m4/hts_check_compile_flags_needed.m4 b/src/htslib-1.18/m4/hts_check_compile_flags_needed.m4 deleted file mode 100644 index fb668e8..0000000 --- a/src/htslib-1.18/m4/hts_check_compile_flags_needed.m4 +++ /dev/null @@ -1,63 +0,0 @@ -# hts_check_compile_flags_needed.m4 -# -# SYNOPSIS -# -# HTS_CHECK_COMPILE_FLAGS_NEEDED(FEATURE, FLAGS, [INPUT], [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS]) -# -# DESCRIPTION -# -# Check whether the given FLAGS are required to build and link INPUT with -# the current language's compiler. Compilation and linking are first -# tries without FLAGS. If that fails it then tries to compile and -# link again with FLAGS. -# -# FEATURE describes the feature being tested, and is used when printing -# messages and to name the cache entry (along with the tested flags). -# -# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on -# success/failure. In ACTION-SUCCESS, $flags_needed will be set to -# either an empty string or FLAGS depending on the test results. -# -# If EXTRA-FLAGS is defined, it is added to the current language's default -# flags (e.g. CFLAGS) when the check is done. The check is thus made with -# the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to -# force the compiler to issue an error when a bad flag is given. -# -# If omitted, INPUT defaults to AC_LANG_PROGRAM(), although that probably -# isn't very useful. -# -# NOTE: Implementation based on AX_CHECK_COMPILE_FLAG. -# -# LICENSE -# -# Copyright (c) 2008 Guido U. Draheim -# Copyright (c) 2011 Maarten Bosmans -# Copyright (c) 2023 Robert Davies -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -# AX_CHECK_COMPILE_FLAGS_NEEDED(FEATURE, FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT]) - -AC_DEFUN([HTS_CHECK_COMPILE_FLAGS_NEEDED], -[AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF -AS_VAR_PUSHDEF([CACHEVAR],[hts_cv_check_[]_AC_LANG_ABBREV[]flags_needed_$1_$6_$2])dnl -AC_CACHE_CHECK([_AC_LANG compiler flags needed for $1], CACHEVAR, [ - AC_LINK_IFELSE([m4_default([$3],[AC_LANG_PROGRAM()])], - [AS_VAR_SET(CACHEVAR,[none])], - [ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS - _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $6 $2" - AC_LINK_IFELSE([m4_default([$3],[AC_LANG_PROGRAM()])], - [AS_VAR_SET(CACHEVAR,[$2])], - [AS_VAR_SET(CACHEVAR,[unsupported])]) - _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])]) -AS_VAR_IF(CACHEVAR,unsupported, [ - m4_default([$5], :) -], [ - AS_VAR_IF(CACHEVAR,none,[flags_needed=""], [flags_needed="$CACHEVAR"]) - m4_default([$4], :) -]) -AS_VAR_POPDEF([CACHEVAR])dnl -])dnl HTS_CHECK_COMPILE_FLAGS_NEEDED diff --git a/src/htslib-1.18/regidx.c b/src/htslib-1.18/regidx.c deleted file mode 100644 index 67b3568..0000000 --- a/src/htslib-1.18/regidx.c +++ /dev/null @@ -1,686 +0,0 @@ -/* - Copyright (C) 2014-2019 Genome Research Ltd. - - Author: Petr Danecek - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. -*/ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include -#include -#include -#include "htslib/hts.h" -#include "htslib/kstring.h" -#include "htslib/kseq.h" -#include "htslib/khash_str2int.h" -#include "htslib/regidx.h" -#include "hts_internal.h" - -#define MAX_COOR_0 REGIDX_MAX // CSI and hts_itr_query limit, 0-based - -#define iBIN(x) ((x)>>13) - -typedef struct -{ - hts_pos_t beg, end; -} -reg_t; - -typedef struct -{ - hts_pos_t pos; // position - uint32_t ireg; // index to reglist.reg and reglist.dat -} -pos_t; - -typedef struct reglist_t reglist_t; - -typedef struct -{ - hts_pos_t beg, end; // query region - uint32_t ireg; // index of active region - regidx_t *ridx; - reglist_t *list; - int active; -} -itr_t_; - -// List of regions for one chromosome. -struct reglist_t -{ - uint32_t *idx, nidx; // index to list.reg+1 - uint32_t nreg, mreg; // n:used, m:allocated - reg_t *reg; // regions - uint8_t *dat; // payload data - char *seq; // sequence name - int unsorted; -}; - -// Container of all sequences -struct regidx_t -{ - int nseq, mseq; // n:used, m:alloced - reglist_t *seq; // regions for each sequence - void *seq2regs; // hash for fast lookup from chr name to regions - char **seq_names; - regidx_free_f free; // function to free any data allocated by regidx_parse_f - regidx_parse_f parse; // parse one input line - void *usr; // user data to pass to regidx_parse_f - int payload_size; - void *payload; // temporary payload data set by regidx_parse_f (sequence is not known beforehand) - kstring_t str; -}; - -int regidx_seq_nregs(regidx_t *idx, const char *seq) -{ - int iseq; - if ( khash_str2int_get(idx->seq2regs, seq, &iseq)!=0 ) return 0; // no such sequence - return idx->seq[iseq].nreg; -} - -int regidx_nregs(regidx_t *idx) -{ - int i, nreg = 0; - for (i=0; inseq; i++) nreg += idx->seq[i].nreg; - return nreg; -} - -char **regidx_seq_names(regidx_t *idx, int *n) -{ - *n = idx->nseq; - return idx->seq_names; -} - -int regidx_insert_list(regidx_t *idx, char *line, char delim) -{ - kstring_t tmp = KS_INITIALIZE; - char *ss = line; - while ( *ss ) - { - char *se = ss; - while ( *se && *se!=delim ) se++; - kputsn(ss, se-ss, ks_clear(&tmp)); - if ( regidx_insert(idx,tmp.s) < 0 ) - { - ks_free(&tmp); - return -1; - } - if ( !*se ) break; - ss = se+1; - } - ks_free(&tmp); - return 0; -} - -static inline int cmp_regs(reg_t *a, reg_t *b) -{ - if ( a->beg < b->beg ) return -1; - if ( a->beg > b->beg ) return 1; - if ( a->end < b->end ) return 1; // longer intervals come first - if ( a->end > b->end ) return -1; - return 0; -} -static int cmp_reg_ptrs(const void *a, const void *b) -{ - return cmp_regs((reg_t*)a,(reg_t*)b); -} -static int cmp_reg_ptrs2(const void *a, const void *b) -{ - return cmp_regs(*((reg_t**)a),*((reg_t**)b)); -} - -int regidx_push(regidx_t *idx, char *chr_beg, char *chr_end, hts_pos_t beg, hts_pos_t end, void *payload) -{ - if (beg < 0) beg = 0; - if (end < 0) end = 0; - if ( beg > MAX_COOR_0 ) beg = MAX_COOR_0; - if ( end > MAX_COOR_0 ) end = MAX_COOR_0; - - int rid; - if (kputsn(chr_beg, chr_end-chr_beg+1, ks_clear(&idx->str)) < 0) return -1; - if ( khash_str2int_get(idx->seq2regs, idx->str.s, &rid)!=0 ) - { - // new chromosome - int m_tmp = idx->mseq; - if (hts_resize(char*, idx->nseq + 1, &m_tmp, - &idx->seq_names, HTS_RESIZE_CLEAR) < 0) { - return -1; - } - if (hts_resize(reglist_t, idx->nseq + 1, &idx->mseq, - &idx->seq, HTS_RESIZE_CLEAR) < 0) { - return -1; - } - assert(m_tmp == idx->mseq); - idx->seq_names[idx->nseq] = strdup(idx->str.s); - rid = khash_str2int_inc(idx->seq2regs, idx->seq_names[idx->nseq]); - idx->nseq++; - } - - reglist_t *list = &idx->seq[rid]; - list->seq = idx->seq_names[rid]; - int mreg = list->mreg; - if (hts_resize(reg_t, list->nreg + 1, &list->mreg, &list->reg, 0) < 0) - return -1; - list->reg[list->nreg].beg = beg; - list->reg[list->nreg].end = end; - if ( idx->payload_size ) { - if ( mreg != list->mreg ) { - uint8_t *new_dat = realloc(list->dat, idx->payload_size*list->mreg); - if (!new_dat) return -1; - list->dat = new_dat; - } - memcpy(list->dat + idx->payload_size*list->nreg, payload, idx->payload_size); - } - list->nreg++; - if ( !list->unsorted && list->nreg>1 && cmp_regs(&list->reg[list->nreg-2],&list->reg[list->nreg-1])>0 ) list->unsorted = 1; - return 0; -} - -int regidx_insert(regidx_t *idx, char *line) -{ - if ( !line ) return 0; - char *chr_from, *chr_to; - hts_pos_t beg,end; - int ret = idx->parse(line,&chr_from,&chr_to,&beg,&end,idx->payload,idx->usr); - if ( ret==-2 ) return -1; // error - if ( ret==-1 ) return 0; // skip the line - return regidx_push(idx, chr_from,chr_to,beg,end,idx->payload); -} - -regidx_t *regidx_init_string(const char *str, regidx_parse_f parser, regidx_free_f free_f, size_t payload_size, void *usr_dat) -{ - kstring_t tmp = KS_INITIALIZE; - regidx_t *idx = (regidx_t*) calloc(1,sizeof(regidx_t)); - if ( !idx ) return NULL; - - idx->free = free_f; - idx->parse = parser ? parser : regidx_parse_tab; - idx->usr = usr_dat; - idx->seq2regs = khash_str2int_init(); - if (!idx->seq2regs) goto fail; - idx->payload_size = payload_size; - if ( payload_size ) { - idx->payload = malloc(payload_size); - if (!idx->payload) goto fail; - } - - const char *ss = str; - while ( *ss ) - { - while ( *ss && isspace_c(*ss) ) ss++; - const char *se = ss; - while ( *se && *se!='\r' && *se!='\n' ) se++; - if (kputsn(ss, se-ss, ks_clear(&tmp)) < 0) goto fail; - if (regidx_insert(idx, tmp.s) < 0) goto fail; - while ( *se && isspace_c(*se) ) se++; - ss = se; - } - ks_free(&tmp); - return idx; - - fail: - regidx_destroy(idx); - ks_free(&tmp); - return NULL; -} - -regidx_t *regidx_init(const char *fname, regidx_parse_f parser, regidx_free_f free_f, size_t payload_size, void *usr_dat) -{ - if ( !parser ) - { - if ( !fname ) parser = regidx_parse_tab; - else - { - int len = strlen(fname); - if ( len>=7 && !strcasecmp(".bed.gz",fname+len-7) ) - parser = regidx_parse_bed; - else if ( len>=8 && !strcasecmp(".bed.bgz",fname+len-8) ) - parser = regidx_parse_bed; - else if ( len>=4 && !strcasecmp(".bed",fname+len-4) ) - parser = regidx_parse_bed; - else if ( len>=4 && !strcasecmp(".vcf",fname+len-4) ) - parser = regidx_parse_vcf; - else if ( len>=7 && !strcasecmp(".vcf.gz",fname+len-7) ) - parser = regidx_parse_vcf; - else - parser = regidx_parse_tab; - } - } - - kstring_t str = KS_INITIALIZE; - htsFile *fp = NULL; - int ret; - regidx_t *idx = (regidx_t*) calloc(1,sizeof(regidx_t)); - if (!idx) return NULL; - idx->free = free_f; - idx->parse = parser; - idx->usr = usr_dat; - idx->seq2regs = khash_str2int_init(); - if (!idx->seq2regs) goto error; - idx->payload_size = payload_size; - if ( payload_size ) { - idx->payload = malloc(payload_size); - if (!idx->payload) goto error; - } - - if ( !fname ) return idx; - - fp = hts_open(fname,"r"); - if ( !fp ) goto error; - - while ((ret = hts_getline(fp, KS_SEP_LINE, &str)) > 0 ) { - if ( regidx_insert(idx, str.s) ) goto error; - } - if (ret < -1) goto error; - - ret = hts_close(fp); - fp = NULL; - if ( ret != 0 ) { - hts_log_error("Close failed .. %s", fname); - goto error; - } - ks_free(&str); - return idx; - -error: - ks_free(&str); - if ( fp ) hts_close(fp); - regidx_destroy(idx); - return NULL; -} - -void regidx_destroy(regidx_t *idx) -{ - int i, j; - if (!idx) return; - for (i=0; inseq; i++) - { - reglist_t *list = &idx->seq[i]; - if ( idx->free ) - { - for (j=0; jnreg; j++) - idx->free((char *)list->dat + idx->payload_size*j); - } - free(list->dat); - free(list->reg); - free(list->idx); - } - free(idx->seq_names); - free(idx->seq); - free(idx->str.s); - free(idx->payload); - khash_str2int_destroy_free(idx->seq2regs); - free(idx); -} - -static int reglist_build_index_(regidx_t *regidx, reglist_t *list) -{ - int i; - if ( list->unsorted ) { - if ( !regidx->payload_size ) { - qsort(list->reg,list->nreg,sizeof(reg_t),cmp_reg_ptrs); - } else { - reg_t **ptr = malloc(sizeof(*ptr)*list->nreg); - if (!ptr) return -1; - for (i=0; inreg; i++) ptr[i] = list->reg + i; - qsort(ptr,list->nreg,sizeof(*ptr),cmp_reg_ptrs2); - - uint8_t *tmp_dat = malloc(regidx->payload_size*list->nreg); - if (!tmp_dat) { free(ptr); return -1; } - for (i=0; inreg; i++) { - size_t iori = ptr[i] - list->reg; - memcpy(tmp_dat+i*regidx->payload_size, - list->dat+iori*regidx->payload_size, - regidx->payload_size); - } - free(list->dat); - list->dat = tmp_dat; - - reg_t *tmp_reg = (reg_t*) malloc(sizeof(reg_t)*list->nreg); - if (!tmp_reg) { free(ptr); return -1; } - for (i=0; inreg; i++) { - size_t iori = ptr[i] - list->reg; - tmp_reg[i] = list->reg[iori]; - } - free(ptr); - free(list->reg); - list->reg = tmp_reg; - list->mreg = list->nreg; - } - list->unsorted = 0; - } - - list->nidx = 0; - uint32_t j,k, midx = 0; - // Find highest index bin. It's possible that we could just look at - // the last region, but go through the list in case some entries overlap. - for (j=0; jnreg; j++) { - int iend = iBIN(list->reg[j].end); - if (midx <= iend) midx = iend; - } - midx++; - uint32_t *new_idx = calloc(midx, sizeof(uint32_t)); - if (!new_idx) return -1; - free(list->idx); // Should be NULL on entry, but just in case... - list->idx = new_idx; - list->nidx = midx; - - for (j=0; jnreg; j++) { - int ibeg = iBIN(list->reg[j].beg); - int iend = iBIN(list->reg[j].end); - if ( ibeg==iend ) { - if ( !list->idx[ibeg] ) list->idx[ibeg] = j + 1; - } else { - for (k=ibeg; k<=iend; k++) - if ( !list->idx[k] ) list->idx[k] = j + 1; - } - } - - return 0; -} - -int regidx_overlap(regidx_t *regidx, const char *chr, hts_pos_t beg, hts_pos_t end, regitr_t *regitr) -{ - if ( regitr ) regitr->seq = NULL; - - int iseq, ireg; - if ( khash_str2int_get(regidx->seq2regs, chr, &iseq)!=0 ) return 0; // no such sequence - - reglist_t *list = ®idx->seq[iseq]; - if ( !list->nreg ) return 0; - - if ( list->nreg==1 ) - { - if ( beg > list->reg[0].end ) return 0; - if ( end < list->reg[0].beg ) return 0; - ireg = 0; - } - else - { - if ( !list->idx ) { - if (reglist_build_index_(regidx,list) < 0) return -1; - } - - int ibeg = iBIN(beg); - if ( ibeg >= list->nidx ) return 0; // beg is too big - - // find a matching region - uint32_t i = list->idx[ibeg]; - if ( !i ) - { - int iend = iBIN(end); - if ( iend > list->nidx ) iend = list->nidx; - for (i=ibeg; i<=iend; i++) - if ( list->idx[i] ) break; - if ( i>iend ) return 0; - i = list->idx[i]; - } - for (ireg=i-1; iregnreg; ireg++) - { - if ( list->reg[ireg].beg > end ) return 0; // no match, past the query region - if ( list->reg[ireg].end >= beg && list->reg[ireg].beg <= end ) break; // found - } - - if ( ireg >= list->nreg ) return 0; // no match - } - - if ( !regitr ) return 1; // match, but no more info to save - - // may need to iterate over the matching regions later - itr_t_ *itr = (itr_t_*)regitr->itr; - itr->ridx = regidx; - itr->list = list; - itr->beg = beg; - itr->end = end; - itr->ireg = ireg; - itr->active = 0; - - regitr->seq = list->seq; - regitr->beg = list->reg[ireg].beg; - regitr->end = list->reg[ireg].end; - if ( regidx->payload_size ) - regitr->payload = list->dat + regidx->payload_size*ireg; - - return 1; -} - -int regidx_parse_bed(const char *line, char **chr_beg, char **chr_end, hts_pos_t *beg, hts_pos_t *end, void *payload, void *usr) -{ - char *ss = (char*) line; - while ( *ss && isspace_c(*ss) ) ss++; - if ( !*ss ) return -1; // skip blank lines - if ( *ss=='#' ) return -1; // skip comments - - char *se = ss; - while ( *se && !isspace_c(*se) ) se++; - - *chr_beg = ss; - *chr_end = se-1; - - if ( !*se ) - { - // just the chromosome name - *beg = 0; - *end = MAX_COOR_0; - return 0; - } - - ss = se+1; - *beg = hts_parse_decimal(ss, &se, 0); - if ( ss==se ) { hts_log_error("Could not parse bed line: %s", line); return -2; } - - ss = se+1; - *end = hts_parse_decimal(ss, &se, 0) - 1; - if ( ss==se ) { hts_log_error("Could not parse bed line: %s", line); return -2; } - - return 0; -} - -int regidx_parse_tab(const char *line, char **chr_beg, char **chr_end, hts_pos_t *beg, hts_pos_t *end, void *payload, void *usr) -{ - char *ss = (char*) line; - while ( *ss && isspace_c(*ss) ) ss++; - if ( !*ss ) return -1; // skip blank lines - if ( *ss=='#' ) return -1; // skip comments - - char *se = ss; - while ( *se && !isspace_c(*se) ) se++; - - *chr_beg = ss; - *chr_end = se-1; - - if ( !*se ) - { - // just the chromosome name - *beg = 0; - *end = MAX_COOR_0; - return 0; - } - - ss = se+1; - *beg = hts_parse_decimal(ss, &se, 0); - if ( ss==se ) { hts_log_error("Could not parse tab line: %s", line); return -2; } - if ( *beg==0 ) { hts_log_error("Could not parse tab line, expected 1-based coordinate: %s", line); return -2; } - (*beg)--; - - if ( !se[0] || !se[1] ) - *end = *beg; - else - { - ss = se+1; - *end = hts_parse_decimal(ss, &se, 0); - if ( ss==se || (*se && !isspace_c(*se)) ) *end = *beg; - else if ( *end==0 ) { hts_log_error("Could not parse tab line, expected 1-based coordinate: %s", line); return -2; } - else (*end)--; - } - return 0; -} - -int regidx_parse_vcf(const char *line, char **chr_beg, char **chr_end, hts_pos_t *beg, hts_pos_t *end, void *payload, void *usr) -{ - int ret = regidx_parse_tab(line, chr_beg, chr_end, beg, end, payload, usr); - if ( !ret ) *end = *beg; - return ret; -} - -int regidx_parse_reg(const char *line, char **chr_beg, char **chr_end, hts_pos_t *beg, hts_pos_t *end, void *payload, void *usr) -{ - char *ss = (char*) line; - while ( *ss && isspace_c(*ss) ) ss++; - if ( !*ss ) return -1; // skip blank lines - if ( *ss=='#' ) return -1; // skip comments - - char *se = ss; - while ( *se && *se!=':' ) se++; - - *chr_beg = ss; - *chr_end = se-1; - - if ( !*se ) - { - *beg = 0; - *end = MAX_COOR_0; - return 0; - } - - ss = se+1; - *beg = hts_parse_decimal(ss, &se, 0); - if ( ss==se ) { hts_log_error("Could not parse reg line: %s", line); return -2; } - if ( *beg==0 ) { hts_log_error("Could not parse reg line, expected 1-based coordinate: %s", line); return -2; } - (*beg)--; - - if ( !se[0] || !se[1] ) - *end = se[0]=='-' ? MAX_COOR_0 : *beg; - else - { - ss = se+1; - *end = hts_parse_decimal(ss, &se, 0); - if ( ss==se ) *end = *beg; - else if ( *end==0 ) { hts_log_error("Could not parse reg line, expected 1-based coordinate: %s", line); return -2; } - else (*end)--; - } - return 0; -} - -regitr_t *regitr_init(regidx_t *regidx) -{ - regitr_t *regitr = (regitr_t*) calloc(1,sizeof(regitr_t)); - if (!regitr) return NULL; - regitr->itr = (itr_t_*) calloc(1,sizeof(itr_t_)); - if (!regitr->itr) { - free(regitr); - return NULL; - } - itr_t_ *itr = (itr_t_*) regitr->itr; - itr->ridx = regidx; - itr->list = NULL; - return regitr; -} - -void regitr_reset(regidx_t *regidx, regitr_t *regitr) -{ - itr_t_ *itr = (itr_t_*) regitr->itr; - memset(itr,0,sizeof(itr_t_)); - itr->ridx = regidx; -} - -void regitr_destroy(regitr_t *regitr) -{ - free(regitr->itr); - free(regitr); -} - -int regitr_overlap(regitr_t *regitr) -{ - if ( !regitr || !regitr->seq || !regitr->itr ) return 0; - - itr_t_ *itr = (itr_t_*) regitr->itr; - if ( !itr->active ) - { - // is this the first call after regidx_overlap? - itr->active = 1; - itr->ireg++; - return 1; - } - - reglist_t *list = itr->list; - - int i; - for (i=itr->ireg; inreg; i++) - { - if ( list->reg[i].beg > itr->end ) return 0; // no match, past the query region - if ( list->reg[i].end >= itr->beg && list->reg[i].beg <= itr->end ) break; // found - } - - if ( i >= list->nreg ) return 0; // no match - - itr->ireg = i + 1; - regitr->seq = list->seq; - regitr->beg = list->reg[i].beg; - regitr->end = list->reg[i].end; - if ( itr->ridx->payload_size ) - regitr->payload = (char *)list->dat + itr->ridx->payload_size*i; - - return 1; -} - -int regitr_loop(regitr_t *regitr) -{ - if ( !regitr || !regitr->itr ) return 0; - - itr_t_ *itr = (itr_t_*) regitr->itr; - regidx_t *regidx = itr->ridx; - - if ( !itr->list ) // first time here - { - itr->list = regidx->seq; - itr->ireg = 0; - } - - size_t iseq = itr->list - regidx->seq; - if ( iseq >= regidx->nseq ) return 0; - - if ( itr->ireg >= itr->list->nreg ) - { - iseq++; - if ( iseq >= regidx->nseq ) return 0; // no more sequences, done - itr->ireg = 0; - itr->list = ®idx->seq[iseq]; - } - - regitr->seq = itr->list->seq; - regitr->beg = itr->list->reg[itr->ireg].beg; - regitr->end = itr->list->reg[itr->ireg].end; - if ( regidx->payload_size ) - regitr->payload = (char *)itr->list->dat + regidx->payload_size*itr->ireg; - itr->ireg++; - - return 1; -} - - -void regitr_copy(regitr_t *dst, regitr_t *src) -{ - itr_t_ *dst_itr = (itr_t_*) dst->itr; - itr_t_ *src_itr = (itr_t_*) src->itr; - *dst_itr = *src_itr; - *dst = *src; - dst->itr = dst_itr; -} diff --git a/src/htslib-1.18/sam.c b/src/htslib-1.18/sam.c deleted file mode 100644 index 48623e0..0000000 --- a/src/htslib-1.18/sam.c +++ /dev/null @@ -1,6107 +0,0 @@ -/* sam.c -- SAM and BAM file I/O and manipulation. - - Copyright (C) 2008-2010, 2012-2023 Genome Research Ltd. - Copyright (C) 2010, 2012, 2013 Broad Institute. - - Author: Heng Li - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// Suppress deprecation message for cigar_tab, which we initialise -#include "htslib/hts_defs.h" -#undef HTS_DEPRECATED -#define HTS_DEPRECATED(message) - -#include "htslib/sam.h" -#include "htslib/bgzf.h" -#include "cram/cram.h" -#include "hts_internal.h" -#include "sam_internal.h" -#include "htslib/hfile.h" -#include "htslib/hts_endian.h" -#include "htslib/hts_expr.h" -#include "header.h" - -#include "htslib/khash.h" -KHASH_DECLARE(s2i, kh_cstr_t, int64_t) -KHASH_SET_INIT_INT(tag) - -#ifndef EFTYPE -#define EFTYPE ENOEXEC -#endif -#ifndef EOVERFLOW -#define EOVERFLOW ERANGE -#endif - -/********************** - *** BAM header I/O *** - **********************/ - -HTSLIB_EXPORT -const int8_t bam_cigar_table[256] = { - // 0 .. 47 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - - // 48 .. 63 (including =) - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, BAM_CEQUAL, -1, -1, - - // 64 .. 79 (including MIDNHB) - -1, -1, BAM_CBACK, -1, BAM_CDEL, -1, -1, -1, - BAM_CHARD_CLIP, BAM_CINS, -1, -1, -1, BAM_CMATCH, BAM_CREF_SKIP, -1, - - // 80 .. 95 (including SPX) - BAM_CPAD, -1, -1, BAM_CSOFT_CLIP, -1, -1, -1, -1, - BAM_CDIFF, -1, -1, -1, -1, -1, -1, -1, - - // 96 .. 127 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - - // 128 .. 255 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 -}; - -sam_hdr_t *sam_hdr_init(void) -{ - sam_hdr_t *bh = (sam_hdr_t*)calloc(1, sizeof(sam_hdr_t)); - if (bh == NULL) return NULL; - - bh->cigar_tab = bam_cigar_table; - return bh; -} - -void sam_hdr_destroy(sam_hdr_t *bh) -{ - int32_t i; - - if (bh == NULL) return; - - if (bh->ref_count > 0) { - --bh->ref_count; - return; - } - - if (bh->target_name) { - for (i = 0; i < bh->n_targets; ++i) - free(bh->target_name[i]); - free(bh->target_name); - free(bh->target_len); - } - free(bh->text); - if (bh->hrecs) - sam_hrecs_free(bh->hrecs); - if (bh->sdict) - kh_destroy(s2i, (khash_t(s2i) *) bh->sdict); - free(bh); -} - -// Copy the sam_hdr_t::sdict hash, used to store the real lengths of long -// references before sam_hdr_t::hrecs is populated -int sam_hdr_dup_sdict(const sam_hdr_t *h0, sam_hdr_t *h) -{ - const khash_t(s2i) *src_long_refs = (khash_t(s2i) *) h0->sdict; - khash_t(s2i) *dest_long_refs = kh_init(s2i); - int i; - if (!dest_long_refs) return -1; - - for (i = 0; i < h->n_targets; i++) { - int ret; - khiter_t ksrc, kdest; - if (h->target_len[i] < UINT32_MAX) continue; - ksrc = kh_get(s2i, src_long_refs, h->target_name[i]); - if (ksrc == kh_end(src_long_refs)) continue; - kdest = kh_put(s2i, dest_long_refs, h->target_name[i], &ret); - if (ret < 0) { - kh_destroy(s2i, dest_long_refs); - return -1; - } - kh_val(dest_long_refs, kdest) = kh_val(src_long_refs, ksrc); - } - - h->sdict = dest_long_refs; - return 0; -} - -sam_hdr_t *sam_hdr_dup(const sam_hdr_t *h0) -{ - if (h0 == NULL) return NULL; - sam_hdr_t *h; - if ((h = sam_hdr_init()) == NULL) return NULL; - // copy the simple data - h->n_targets = 0; - h->ignore_sam_err = h0->ignore_sam_err; - h->l_text = 0; - - // Then the pointery stuff - - if (!h0->hrecs) { - h->target_len = (uint32_t*)calloc(h0->n_targets, sizeof(uint32_t)); - if (!h->target_len) goto fail; - h->target_name = (char**)calloc(h0->n_targets, sizeof(char*)); - if (!h->target_name) goto fail; - - int i; - for (i = 0; i < h0->n_targets; ++i) { - h->target_len[i] = h0->target_len[i]; - h->target_name[i] = strdup(h0->target_name[i]); - if (!h->target_name[i]) break; - } - h->n_targets = i; - if (i < h0->n_targets) goto fail; - - if (h0->sdict) { - if (sam_hdr_dup_sdict(h0, h) < 0) goto fail; - } - } - - if (h0->hrecs) { - kstring_t tmp = { 0, 0, NULL }; - if (sam_hrecs_rebuild_text(h0->hrecs, &tmp) != 0) { - free(ks_release(&tmp)); - goto fail; - } - - h->l_text = tmp.l; - h->text = ks_release(&tmp); - - if (sam_hdr_update_target_arrays(h, h0->hrecs, 0) != 0) - goto fail; - } else { - h->l_text = h0->l_text; - h->text = malloc(h->l_text + 1); - if (!h->text) goto fail; - memcpy(h->text, h0->text, h->l_text); - h->text[h->l_text] = '\0'; - } - - return h; - - fail: - sam_hdr_destroy(h); - return NULL; -} - -sam_hdr_t *bam_hdr_read(BGZF *fp) -{ - sam_hdr_t *h; - uint8_t buf[4]; - int magic_len, has_EOF; - int32_t i, name_len, num_names = 0; - size_t bufsize; - ssize_t bytes; - // check EOF - has_EOF = bgzf_check_EOF(fp); - if (has_EOF < 0) { - perror("[W::bam_hdr_read] bgzf_check_EOF"); - } else if (has_EOF == 0) { - hts_log_warning("EOF marker is absent. The input is probably truncated"); - } - // read "BAM1" - magic_len = bgzf_read(fp, buf, 4); - if (magic_len != 4 || memcmp(buf, "BAM\1", 4)) { - hts_log_error("Invalid BAM binary header"); - return 0; - } - h = sam_hdr_init(); - if (!h) goto nomem; - - // read plain text and the number of reference sequences - bytes = bgzf_read(fp, buf, 4); - if (bytes != 4) goto read_err; - h->l_text = le_to_u32(buf); - - bufsize = h->l_text + 1; - if (bufsize < h->l_text) goto nomem; // so large that adding 1 overflowed - h->text = (char*)malloc(bufsize); - if (!h->text) goto nomem; - h->text[h->l_text] = 0; // make sure it is NULL terminated - bytes = bgzf_read(fp, h->text, h->l_text); - if (bytes != h->l_text) goto read_err; - - bytes = bgzf_read(fp, &h->n_targets, 4); - if (bytes != 4) goto read_err; - if (fp->is_be) ed_swap_4p(&h->n_targets); - - if (h->n_targets < 0) goto invalid; - - // read reference sequence names and lengths - if (h->n_targets > 0) { - h->target_name = (char**)calloc(h->n_targets, sizeof(char*)); - if (!h->target_name) goto nomem; - h->target_len = (uint32_t*)calloc(h->n_targets, sizeof(uint32_t)); - if (!h->target_len) goto nomem; - } - else { - h->target_name = NULL; - h->target_len = NULL; - } - - for (i = 0; i != h->n_targets; ++i) { - bytes = bgzf_read(fp, &name_len, 4); - if (bytes != 4) goto read_err; - if (fp->is_be) ed_swap_4p(&name_len); - if (name_len <= 0) goto invalid; - - h->target_name[i] = (char*)malloc(name_len); - if (!h->target_name[i]) goto nomem; - num_names++; - - bytes = bgzf_read(fp, h->target_name[i], name_len); - if (bytes != name_len) goto read_err; - - if (h->target_name[i][name_len - 1] != '\0') { - /* Fix missing NUL-termination. Is this being too nice? - We could alternatively bail out with an error. */ - char *new_name; - if (name_len == INT32_MAX) goto invalid; - new_name = realloc(h->target_name[i], name_len + 1); - if (new_name == NULL) goto nomem; - h->target_name[i] = new_name; - h->target_name[i][name_len] = '\0'; - } - - bytes = bgzf_read(fp, &h->target_len[i], 4); - if (bytes != 4) goto read_err; - if (fp->is_be) ed_swap_4p(&h->target_len[i]); - } - return h; - - nomem: - hts_log_error("Out of memory"); - goto clean; - - read_err: - if (bytes < 0) { - hts_log_error("Error reading BGZF stream"); - } else { - hts_log_error("Truncated BAM header"); - } - goto clean; - - invalid: - hts_log_error("Invalid BAM binary header"); - - clean: - if (h != NULL) { - h->n_targets = num_names; // ensure we free only allocated target_names - sam_hdr_destroy(h); - } - return NULL; -} - -int bam_hdr_write(BGZF *fp, const sam_hdr_t *h) -{ - int32_t i, name_len, x; - kstring_t hdr_ks = { 0, 0, NULL }; - char *text; - uint32_t l_text; - - if (!h) return -1; - - if (h->hrecs) { - if (sam_hrecs_rebuild_text(h->hrecs, &hdr_ks) != 0) return -1; - if (hdr_ks.l > UINT32_MAX) { - hts_log_error("Header too long for BAM format"); - free(hdr_ks.s); - return -1; - } else if (hdr_ks.l > INT32_MAX) { - hts_log_warning("Header too long for BAM specification (>2GB)"); - hts_log_warning("Output file may not be portable"); - } - text = hdr_ks.s; - l_text = hdr_ks.l; - } else { - if (h->l_text > UINT32_MAX) { - hts_log_error("Header too long for BAM format"); - return -1; - } else if (h->l_text > INT32_MAX) { - hts_log_warning("Header too long for BAM specification (>2GB)"); - hts_log_warning("Output file may not be portable"); - } - text = h->text; - l_text = h->l_text; - } - // write "BAM1" - if (bgzf_write(fp, "BAM\1", 4) < 0) { free(hdr_ks.s); return -1; } - // write plain text and the number of reference sequences - if (fp->is_be) { - x = ed_swap_4(l_text); - if (bgzf_write(fp, &x, 4) < 0) { free(hdr_ks.s); return -1; } - if (l_text) { - if (bgzf_write(fp, text, l_text) < 0) { free(hdr_ks.s); return -1; } - } - x = ed_swap_4(h->n_targets); - if (bgzf_write(fp, &x, 4) < 0) { free(hdr_ks.s); return -1; } - } else { - if (bgzf_write(fp, &l_text, 4) < 0) { free(hdr_ks.s); return -1; } - if (l_text) { - if (bgzf_write(fp, text, l_text) < 0) { free(hdr_ks.s); return -1; } - } - if (bgzf_write(fp, &h->n_targets, 4) < 0) { free(hdr_ks.s); return -1; } - } - free(hdr_ks.s); - // write sequence names and lengths - for (i = 0; i != h->n_targets; ++i) { - char *p = h->target_name[i]; - name_len = strlen(p) + 1; - if (fp->is_be) { - x = ed_swap_4(name_len); - if (bgzf_write(fp, &x, 4) < 0) return -1; - } else { - if (bgzf_write(fp, &name_len, 4) < 0) return -1; - } - if (bgzf_write(fp, p, name_len) < 0) return -1; - if (fp->is_be) { - x = ed_swap_4(h->target_len[i]); - if (bgzf_write(fp, &x, 4) < 0) return -1; - } else { - if (bgzf_write(fp, &h->target_len[i], 4) < 0) return -1; - } - } - if (bgzf_flush(fp) < 0) return -1; - return 0; -} - -const char *sam_parse_region(sam_hdr_t *h, const char *s, int *tid, - hts_pos_t *beg, hts_pos_t *end, int flags) { - return hts_parse_region(s, tid, beg, end, (hts_name2id_f)bam_name2id, h, flags); -} - -/************************* - *** BAM alignment I/O *** - *************************/ - -bam1_t *bam_init1(void) -{ - return (bam1_t*)calloc(1, sizeof(bam1_t)); -} - -int sam_realloc_bam_data(bam1_t *b, size_t desired) -{ - uint32_t new_m_data; - uint8_t *new_data; - new_m_data = desired; - kroundup32(new_m_data); - if (new_m_data < desired) { - errno = ENOMEM; // Not strictly true but we can't store the size - return -1; - } - if ((bam_get_mempolicy(b) & BAM_USER_OWNS_DATA) == 0) { - new_data = realloc(b->data, new_m_data); - } else { - if ((new_data = malloc(new_m_data)) != NULL) { - if (b->l_data > 0) - memcpy(new_data, b->data, - b->l_data < b->m_data ? b->l_data : b->m_data); - bam_set_mempolicy(b, bam_get_mempolicy(b) & (~BAM_USER_OWNS_DATA)); - } - } - if (!new_data) return -1; - b->data = new_data; - b->m_data = new_m_data; - return 0; -} - -void bam_destroy1(bam1_t *b) -{ - if (b == 0) return; - if ((bam_get_mempolicy(b) & BAM_USER_OWNS_DATA) == 0) { - free(b->data); - if ((bam_get_mempolicy(b) & BAM_USER_OWNS_STRUCT) != 0) { - // In case of reuse - b->data = NULL; - b->m_data = 0; - b->l_data = 0; - } - } - - if ((bam_get_mempolicy(b) & BAM_USER_OWNS_STRUCT) == 0) - free(b); -} - -bam1_t *bam_copy1(bam1_t *bdst, const bam1_t *bsrc) -{ - if (realloc_bam_data(bdst, bsrc->l_data) < 0) return NULL; - memcpy(bdst->data, bsrc->data, bsrc->l_data); // copy var-len data - memcpy(&bdst->core, &bsrc->core, sizeof(bsrc->core)); // copy the rest - bdst->l_data = bsrc->l_data; - bdst->id = bsrc->id; - return bdst; -} - -bam1_t *bam_dup1(const bam1_t *bsrc) -{ - if (bsrc == NULL) return NULL; - bam1_t *bdst = bam_init1(); - if (bdst == NULL) return NULL; - if (bam_copy1(bdst, bsrc) == NULL) { - bam_destroy1(bdst); - return NULL; - } - return bdst; -} - -static void bam_cigar2rqlens(int n_cigar, const uint32_t *cigar, - hts_pos_t *rlen, hts_pos_t *qlen) -{ - int k; - *rlen = *qlen = 0; - for (k = 0; k < n_cigar; ++k) { - int type = bam_cigar_type(bam_cigar_op(cigar[k])); - int len = bam_cigar_oplen(cigar[k]); - if (type & 1) *qlen += len; - if (type & 2) *rlen += len; - } -} - -static int subtract_check_underflow(size_t length, size_t *limit) -{ - if (length <= *limit) { - *limit -= length; - return 0; - } - - return -1; -} - -int bam_set1(bam1_t *bam, - size_t l_qname, const char *qname, - uint16_t flag, int32_t tid, hts_pos_t pos, uint8_t mapq, - size_t n_cigar, const uint32_t *cigar, - int32_t mtid, hts_pos_t mpos, hts_pos_t isize, - size_t l_seq, const char *seq, const char *qual, - size_t l_aux) -{ - // use a default qname "*" if none is provided - if (l_qname == 0) { - l_qname = 1; - qname = "*"; - } - - // note: the qname is stored nul terminated and padded as described in the - // documentation for the bam1_t struct. - size_t qname_nuls = 4 - l_qname % 4; - - // the aligment length, needed for bam_reg2bin(), is calculated as in bam_endpos(). - // can't use bam_endpos() directly as some fields not yet set up. - hts_pos_t rlen = 0, qlen = 0; - if (!(flag & BAM_FUNMAP)) { - bam_cigar2rqlens((int)n_cigar, cigar, &rlen, &qlen); - } - if (rlen == 0) { - rlen = 1; - } - - // validate parameters - if (l_qname > 254) { - hts_log_error("Query name too long"); - errno = EINVAL; - return -1; - } - if (HTS_POS_MAX - rlen <= pos) { - hts_log_error("Read ends beyond highest supported position"); - errno = EINVAL; - return -1; - } - if (!(flag & BAM_FUNMAP) && l_seq > 0 && n_cigar == 0) { - hts_log_error("Mapped query must have a CIGAR"); - errno = EINVAL; - return -1; - } - if (!(flag & BAM_FUNMAP) && l_seq > 0 && l_seq != qlen) { - hts_log_error("CIGAR and query sequence are of different length"); - errno = EINVAL; - return -1; - } - - size_t limit = INT32_MAX; - int u = subtract_check_underflow(l_qname + qname_nuls, &limit); - u += subtract_check_underflow(n_cigar * 4, &limit); - u += subtract_check_underflow((l_seq + 1) / 2, &limit); - u += subtract_check_underflow(l_seq, &limit); - u += subtract_check_underflow(l_aux, &limit); - if (u != 0) { - hts_log_error("Size overflow"); - errno = EINVAL; - return -1; - } - - // re-allocate the data buffer as needed. - size_t data_len = l_qname + qname_nuls + n_cigar * 4 + (l_seq + 1) / 2 + l_seq; - if (realloc_bam_data(bam, data_len + l_aux) < 0) { - return -1; - } - - bam->l_data = (int)data_len; - bam->core.pos = pos; - bam->core.tid = tid; - bam->core.bin = bam_reg2bin(pos, pos + rlen); - bam->core.qual = mapq; - bam->core.l_extranul = (uint8_t)(qname_nuls - 1); - bam->core.flag = flag; - bam->core.l_qname = (uint16_t)(l_qname + qname_nuls); - bam->core.n_cigar = (uint32_t)n_cigar; - bam->core.l_qseq = (int32_t)l_seq; - bam->core.mtid = mtid; - bam->core.mpos = mpos; - bam->core.isize = isize; - - uint8_t *cp = bam->data; - strncpy((char *)cp, qname, l_qname); - int i; - for (i = 0; i < qname_nuls; i++) { - cp[l_qname + i] = '\0'; - } - cp += l_qname + qname_nuls; - - if (n_cigar > 0) { - memcpy(cp, cigar, n_cigar * 4); - } - cp += n_cigar * 4; - -#define NN 16 - const uint8_t *useq = (uint8_t *)seq; - for (i = 0; i + NN < l_seq; i += NN) { - int j; - const uint8_t *u2 = useq+i; - for (j = 0; j < NN/2; j++) - cp[j] = (seq_nt16_table[u2[j*2]]<<4) | seq_nt16_table[u2[j*2+1]]; - cp += NN/2; - } - for (; i + 1 < l_seq; i += 2) { - *cp++ = (seq_nt16_table[useq[i]] << 4) | seq_nt16_table[useq[i + 1]]; - } - - for (; i < l_seq; i++) { - *cp++ = seq_nt16_table[(unsigned char)seq[i]] << 4; - } - - if (qual) { - memcpy(cp, qual, l_seq); - } - else { - memset(cp, '\xff', l_seq); - } - - return (int)data_len; -} - -hts_pos_t bam_cigar2qlen(int n_cigar, const uint32_t *cigar) -{ - int k; - hts_pos_t l; - for (k = l = 0; k < n_cigar; ++k) - if (bam_cigar_type(bam_cigar_op(cigar[k]))&1) - l += bam_cigar_oplen(cigar[k]); - return l; -} - -hts_pos_t bam_cigar2rlen(int n_cigar, const uint32_t *cigar) -{ - int k; - hts_pos_t l; - for (k = l = 0; k < n_cigar; ++k) - if (bam_cigar_type(bam_cigar_op(cigar[k]))&2) - l += bam_cigar_oplen(cigar[k]); - return l; -} - -hts_pos_t bam_endpos(const bam1_t *b) -{ - hts_pos_t rlen = (b->core.flag & BAM_FUNMAP)? 0 : bam_cigar2rlen(b->core.n_cigar, bam_get_cigar(b)); - if (rlen == 0) rlen = 1; - return b->core.pos + rlen; -} - -static int bam_tag2cigar(bam1_t *b, int recal_bin, int give_warning) // return 0 if CIGAR is untouched; 1 if CIGAR is updated with CG -{ - bam1_core_t *c = &b->core; - uint32_t cigar_st, n_cigar4, CG_st, CG_en, ori_len = b->l_data, *cigar0, CG_len, fake_bytes; - uint8_t *CG; - - // test where there is a real CIGAR in the CG tag to move - if (c->n_cigar == 0 || c->tid < 0 || c->pos < 0) return 0; - cigar0 = bam_get_cigar(b); - if (bam_cigar_op(cigar0[0]) != BAM_CSOFT_CLIP || bam_cigar_oplen(cigar0[0]) != c->l_qseq) return 0; - fake_bytes = c->n_cigar * 4; - int saved_errno = errno; - CG = bam_aux_get(b, "CG"); - if (!CG) { - if (errno != ENOENT) return -1; // Bad aux data - errno = saved_errno; // restore errno on expected no-CG-tag case - return 0; - } - if (CG[0] != 'B' || !(CG[1] == 'I' || CG[1] == 'i')) - return 0; // not of type B,I - CG_len = le_to_u32(CG + 2); - if (CG_len < c->n_cigar || CG_len >= 1U<<29) return 0; // don't move if the real CIGAR length is shorter than the fake cigar length - - // move from the CG tag to the right position - cigar_st = (uint8_t*)cigar0 - b->data; - c->n_cigar = CG_len; - n_cigar4 = c->n_cigar * 4; - CG_st = CG - b->data - 2; - CG_en = CG_st + 8 + n_cigar4; - if (possibly_expand_bam_data(b, n_cigar4 - fake_bytes) < 0) return -1; - b->l_data = b->l_data - fake_bytes + n_cigar4; // we need c->n_cigar-fake_bytes bytes to swap CIGAR to the right place - memmove(b->data + cigar_st + n_cigar4, b->data + cigar_st + fake_bytes, ori_len - (cigar_st + fake_bytes)); // insert c->n_cigar-fake_bytes empty space to make room - memcpy(b->data + cigar_st, b->data + (n_cigar4 - fake_bytes) + CG_st + 8, n_cigar4); // copy the real CIGAR to the right place; -fake_bytes for the fake CIGAR - if (ori_len > CG_en) // move data after the CG tag - memmove(b->data + CG_st + n_cigar4 - fake_bytes, b->data + CG_en + n_cigar4 - fake_bytes, ori_len - CG_en); - b->l_data -= n_cigar4 + 8; // 8: CGBI (4 bytes) and CGBI length (4) - if (recal_bin) - b->core.bin = hts_reg2bin(b->core.pos, bam_endpos(b), 14, 5); - if (give_warning) - hts_log_error("%s encodes a CIGAR with %d operators at the CG tag", bam_get_qname(b), c->n_cigar); - return 1; -} - -static inline int aux_type2size(uint8_t type) -{ - switch (type) { - case 'A': case 'c': case 'C': - return 1; - case 's': case 'S': - return 2; - case 'i': case 'I': case 'f': - return 4; - case 'd': - return 8; - case 'Z': case 'H': case 'B': - return type; - default: - return 0; - } -} - -static void swap_data(const bam1_core_t *c, int l_data, uint8_t *data, int is_host) -{ - uint32_t *cigar = (uint32_t*)(data + c->l_qname); - uint32_t i; - for (i = 0; i < c->n_cigar; ++i) ed_swap_4p(&cigar[i]); -} - -// Fix bad records where qname is not terminated correctly. -static int fixup_missing_qname_nul(bam1_t *b) { - bam1_core_t *c = &b->core; - - // Note this is called before c->l_extranul is added to c->l_qname - if (c->l_extranul > 0) { - b->data[c->l_qname++] = '\0'; - c->l_extranul--; - } else { - if (b->l_data > INT_MAX - 4) return -1; - if (realloc_bam_data(b, b->l_data + 4) < 0) return -1; - b->l_data += 4; - b->data[c->l_qname++] = '\0'; - c->l_extranul = 3; - } - return 0; -} - -/* - * Note a second interface that returns a bam pointer instead would avoid bam_copy1 - * in multi-threaded handling. This may be worth considering for htslib2. - */ -int bam_read1(BGZF *fp, bam1_t *b) -{ - bam1_core_t *c = &b->core; - int32_t block_len, ret, i; - uint32_t x[8], new_l_data; - - b->l_data = 0; - - if ((ret = bgzf_read(fp, &block_len, 4)) != 4) { - if (ret == 0) return -1; // normal end-of-file - else return -2; // truncated - } - if (fp->is_be) - ed_swap_4p(&block_len); - if (block_len < 32) return -4; // block_len includes core data - if (bgzf_read(fp, x, 32) != 32) return -3; - if (fp->is_be) { - for (i = 0; i < 8; ++i) ed_swap_4p(x + i); - } - c->tid = x[0]; c->pos = (int32_t)x[1]; - c->bin = x[2]>>16; c->qual = x[2]>>8&0xff; c->l_qname = x[2]&0xff; - c->l_extranul = (c->l_qname%4 != 0)? (4 - c->l_qname%4) : 0; - c->flag = x[3]>>16; c->n_cigar = x[3]&0xffff; - c->l_qseq = x[4]; - c->mtid = x[5]; c->mpos = (int32_t)x[6]; c->isize = (int32_t)x[7]; - - new_l_data = block_len - 32 + c->l_extranul; - if (new_l_data > INT_MAX || c->l_qseq < 0 || c->l_qname < 1) return -4; - if (((uint64_t) c->n_cigar << 2) + c->l_qname + c->l_extranul - + (((uint64_t) c->l_qseq + 1) >> 1) + c->l_qseq > (uint64_t) new_l_data) - return -4; - if (realloc_bam_data(b, new_l_data) < 0) return -4; - b->l_data = new_l_data; - - if (bgzf_read(fp, b->data, c->l_qname) != c->l_qname) return -4; - if (b->data[c->l_qname - 1] != '\0') { // Try to fix missing NUL termination - if (fixup_missing_qname_nul(b) < 0) return -4; - } - for (i = 0; i < c->l_extranul; ++i) b->data[c->l_qname+i] = '\0'; - c->l_qname += c->l_extranul; - if (b->l_data < c->l_qname || - bgzf_read(fp, b->data + c->l_qname, b->l_data - c->l_qname) != b->l_data - c->l_qname) - return -4; - if (fp->is_be) swap_data(c, b->l_data, b->data, 0); - if (bam_tag2cigar(b, 0, 0) < 0) - return -4; - - if (c->n_cigar > 0) { // recompute "bin" and check CIGAR-qlen consistency - hts_pos_t rlen, qlen; - bam_cigar2rqlens(c->n_cigar, bam_get_cigar(b), &rlen, &qlen); - if ((b->core.flag & BAM_FUNMAP) || rlen == 0) rlen = 1; - b->core.bin = hts_reg2bin(b->core.pos, b->core.pos + rlen, 14, 5); - // Sanity check for broken CIGAR alignments - if (c->l_qseq > 0 && !(c->flag & BAM_FUNMAP) && qlen != c->l_qseq) { - hts_log_error("CIGAR and query sequence lengths differ for %s", - bam_get_qname(b)); - return -4; - } - } - - return 4 + block_len; -} - -int bam_write1(BGZF *fp, const bam1_t *b) -{ - const bam1_core_t *c = &b->core; - uint32_t x[8], block_len = b->l_data - c->l_extranul + 32, y; - int i, ok; - if (c->l_qname - c->l_extranul > 255) { - hts_log_error("QNAME \"%s\" is longer than 254 characters", bam_get_qname(b)); - errno = EOVERFLOW; - return -1; - } - if (c->n_cigar > 0xffff) block_len += 16; // "16" for "CGBI", 4-byte tag length and 8-byte fake CIGAR - if (c->pos > INT_MAX || - c->mpos > INT_MAX || - c->isize < INT_MIN || c->isize > INT_MAX) { - hts_log_error("Positional data is too large for BAM format"); - return -1; - } - x[0] = c->tid; - x[1] = c->pos; - x[2] = (uint32_t)c->bin<<16 | c->qual<<8 | (c->l_qname - c->l_extranul); - if (c->n_cigar > 0xffff) x[3] = (uint32_t)c->flag << 16 | 2; - else x[3] = (uint32_t)c->flag << 16 | (c->n_cigar & 0xffff); - x[4] = c->l_qseq; - x[5] = c->mtid; - x[6] = c->mpos; - x[7] = c->isize; - ok = (bgzf_flush_try(fp, 4 + block_len) >= 0); - if (fp->is_be) { - for (i = 0; i < 8; ++i) ed_swap_4p(x + i); - y = block_len; - if (ok) ok = (bgzf_write(fp, ed_swap_4p(&y), 4) >= 0); - swap_data(c, b->l_data, b->data, 1); - } else { - if (ok) ok = (bgzf_write(fp, &block_len, 4) >= 0); - } - if (ok) ok = (bgzf_write(fp, x, 32) >= 0); - if (ok) ok = (bgzf_write(fp, b->data, c->l_qname - c->l_extranul) >= 0); - if (c->n_cigar <= 0xffff) { // no long CIGAR; write normally - if (ok) ok = (bgzf_write(fp, b->data + c->l_qname, b->l_data - c->l_qname) >= 0); - } else { // with long CIGAR, insert a fake CIGAR record and move the real CIGAR to the CG:B,I tag - uint8_t buf[8]; - uint32_t cigar_st, cigar_en, cigar[2]; - hts_pos_t cigreflen = bam_cigar2rlen(c->n_cigar, bam_get_cigar(b)); - if (cigreflen >= (1<<28)) { - // Length of reference covered is greater than the biggest - // CIGAR operation currently allowed. - hts_log_error("Record %s with %d CIGAR ops and ref length %"PRIhts_pos - " cannot be written in BAM. Try writing SAM or CRAM instead.\n", - bam_get_qname(b), c->n_cigar, cigreflen); - return -1; - } - cigar_st = (uint8_t*)bam_get_cigar(b) - b->data; - cigar_en = cigar_st + c->n_cigar * 4; - cigar[0] = (uint32_t)c->l_qseq << 4 | BAM_CSOFT_CLIP; - cigar[1] = (uint32_t)cigreflen << 4 | BAM_CREF_SKIP; - u32_to_le(cigar[0], buf); - u32_to_le(cigar[1], buf + 4); - if (ok) ok = (bgzf_write(fp, buf, 8) >= 0); // write cigar: SN - if (ok) ok = (bgzf_write(fp, &b->data[cigar_en], b->l_data - cigar_en) >= 0); // write data after CIGAR - if (ok) ok = (bgzf_write(fp, "CGBI", 4) >= 0); // write CG:B,I - u32_to_le(c->n_cigar, buf); - if (ok) ok = (bgzf_write(fp, buf, 4) >= 0); // write the true CIGAR length - if (ok) ok = (bgzf_write(fp, &b->data[cigar_st], c->n_cigar * 4) >= 0); // write the real CIGAR - } - if (fp->is_be) swap_data(c, b->l_data, b->data, 0); - return ok? 4 + block_len : -1; -} - -/* - * Write a BAM file and append to the in-memory index simultaneously. - */ -static int bam_write_idx1(htsFile *fp, const sam_hdr_t *h, const bam1_t *b) { - BGZF *bfp = fp->fp.bgzf; - - if (!fp->idx) - return bam_write1(bfp, b); - - uint32_t block_len = b->l_data - b->core.l_extranul + 32; - if (bgzf_flush_try(bfp, 4 + block_len) < 0) - return -1; - if (!bfp->mt) - hts_idx_amend_last(fp->idx, bgzf_tell(bfp)); - else - bgzf_idx_amend_last(bfp, fp->idx, bgzf_tell(bfp)); - - int ret = bam_write1(bfp, b); - if (ret < 0) - return -1; - - if (bgzf_idx_push(bfp, fp->idx, b->core.tid, b->core.pos, bam_endpos(b), bgzf_tell(bfp), !(b->core.flag&BAM_FUNMAP)) < 0) { - hts_log_error("Read '%s' with ref_name='%s', ref_length=%"PRIhts_pos", flags=%d, pos=%"PRIhts_pos" cannot be indexed", - bam_get_qname(b), sam_hdr_tid2name(h, b->core.tid), sam_hdr_tid2len(h, b->core.tid), b->core.flag, b->core.pos+1); - ret = -1; - } - - return ret; -} - -/* - * Set the qname in a BAM record - */ -int bam_set_qname(bam1_t *rec, const char *qname) -{ - if (!rec) return -1; - if (!qname || !*qname) return -1; - - size_t old_len = rec->core.l_qname; - size_t new_len = strlen(qname) + 1; - if (new_len < 1 || new_len > 255) return -1; - - int extranul = (new_len%4 != 0) ? (4 - new_len%4) : 0; - - size_t new_data_len = rec->l_data - old_len + new_len + extranul; - if (realloc_bam_data(rec, new_data_len) < 0) return -1; - - // Make room - if (new_len + extranul != rec->core.l_qname) - memmove(rec->data + new_len + extranul, rec->data + rec->core.l_qname, rec->l_data - rec->core.l_qname); - // Copy in new name and pad if needed - memcpy(rec->data, qname, new_len); - int n; - for (n = 0; n < extranul; n++) rec->data[new_len + n] = '\0'; - - rec->l_data = new_data_len; - rec->core.l_qname = new_len + extranul; - rec->core.l_extranul = extranul; - - return 0; -} - -/******************** - *** BAM indexing *** - ********************/ - -static hts_idx_t *sam_index(htsFile *fp, int min_shift) -{ - int n_lvls, i, fmt, ret; - bam1_t *b; - hts_idx_t *idx; - sam_hdr_t *h; - h = sam_hdr_read(fp); - if (h == NULL) return NULL; - if (min_shift > 0) { - hts_pos_t max_len = 0, s; - for (i = 0; i < h->n_targets; ++i) { - hts_pos_t len = sam_hdr_tid2len(h, i); - if (max_len < len) max_len = len; - } - max_len += 256; - for (n_lvls = 0, s = 1< s; ++n_lvls, s <<= 3); - fmt = HTS_FMT_CSI; - } else min_shift = 14, n_lvls = 5, fmt = HTS_FMT_BAI; - idx = hts_idx_init(h->n_targets, fmt, bgzf_tell(fp->fp.bgzf), min_shift, n_lvls); - b = bam_init1(); - while ((ret = sam_read1(fp, h, b)) >= 0) { - ret = hts_idx_push(idx, b->core.tid, b->core.pos, bam_endpos(b), bgzf_tell(fp->fp.bgzf), !(b->core.flag&BAM_FUNMAP)); - if (ret < 0) { // unsorted or doesn't fit - hts_log_error("Read '%s' with ref_name='%s', ref_length=%"PRIhts_pos", flags=%d, pos=%"PRIhts_pos" cannot be indexed", bam_get_qname(b), sam_hdr_tid2name(h, b->core.tid), sam_hdr_tid2len(h, b->core.tid), b->core.flag, b->core.pos+1); - goto err; - } - } - if (ret < -1) goto err; // corrupted BAM file - - hts_idx_finish(idx, bgzf_tell(fp->fp.bgzf)); - sam_hdr_destroy(h); - bam_destroy1(b); - return idx; - -err: - bam_destroy1(b); - hts_idx_destroy(idx); - return NULL; -} - -int sam_index_build3(const char *fn, const char *fnidx, int min_shift, int nthreads) -{ - hts_idx_t *idx; - htsFile *fp; - int ret = 0; - - if ((fp = hts_open(fn, "r")) == 0) return -2; - if (nthreads) - hts_set_threads(fp, nthreads); - - switch (fp->format.format) { - case cram: - - ret = cram_index_build(fp->fp.cram, fn, fnidx); - break; - - case bam: - case sam: - if (fp->format.compression != bgzf) { - hts_log_error("%s file \"%s\" not BGZF compressed", - fp->format.format == bam ? "BAM" : "SAM", fn); - ret = -1; - break; - } - idx = sam_index(fp, min_shift); - if (idx) { - ret = hts_idx_save_as(idx, fn, fnidx, (min_shift > 0)? HTS_FMT_CSI : HTS_FMT_BAI); - if (ret < 0) ret = -4; - hts_idx_destroy(idx); - } - else ret = -1; - break; - - default: - ret = -3; - break; - } - hts_close(fp); - - return ret; -} - -int sam_index_build2(const char *fn, const char *fnidx, int min_shift) -{ - return sam_index_build3(fn, fnidx, min_shift, 0); -} - -int sam_index_build(const char *fn, int min_shift) -{ - return sam_index_build3(fn, NULL, min_shift, 0); -} - -// Provide bam_index_build() symbol for binary compatibility with earlier HTSlib -#undef bam_index_build -int bam_index_build(const char *fn, int min_shift) -{ - return sam_index_build2(fn, NULL, min_shift); -} - -// Initialise fp->idx for the current format type. -// This must be called after the header has been written but no other data. -int sam_idx_init(htsFile *fp, sam_hdr_t *h, int min_shift, const char *fnidx) { - fp->fnidx = fnidx; - if (fp->format.format == bam || fp->format.format == bcf || - (fp->format.format == sam && fp->format.compression == bgzf)) { - int n_lvls, fmt = HTS_FMT_CSI; - if (min_shift > 0) { - int64_t max_len = 0, s; - int i; - for (i = 0; i < h->n_targets; ++i) - if (max_len < h->target_len[i]) max_len = h->target_len[i]; - max_len += 256; - for (n_lvls = 0, s = 1< s; ++n_lvls, s <<= 3); - - } else min_shift = 14, n_lvls = 5, fmt = HTS_FMT_BAI; - - fp->idx = hts_idx_init(h->n_targets, fmt, bgzf_tell(fp->fp.bgzf), min_shift, n_lvls); - return fp->idx ? 0 : -1; - } - - if (fp->format.format == cram) { - fp->fp.cram->idxfp = bgzf_open(fnidx, "wg"); - return fp->fp.cram->idxfp ? 0 : -1; - } - - return -1; -} - -// Finishes an index. Call after the last record has been written. -// Returns 0 on success, <0 on failure. -int sam_idx_save(htsFile *fp) { - if (fp->format.format == bam || fp->format.format == bcf || - fp->format.format == vcf || fp->format.format == sam) { - int ret; - if ((ret = sam_state_destroy(fp)) < 0) { - errno = -ret; - return -1; - } - if (!fp->is_bgzf || bgzf_flush(fp->fp.bgzf) < 0) - return -1; - hts_idx_amend_last(fp->idx, bgzf_tell(fp->fp.bgzf)); - - if (hts_idx_finish(fp->idx, bgzf_tell(fp->fp.bgzf)) < 0) - return -1; - - return hts_idx_save_as(fp->idx, NULL, fp->fnidx, hts_idx_fmt(fp->idx)); - - } else if (fp->format.format == cram) { - // flushed and closed by cram_close - } - - return 0; -} - -static int sam_readrec(BGZF *ignored, void *fpv, void *bv, int *tid, hts_pos_t *beg, hts_pos_t *end) -{ - htsFile *fp = (htsFile *)fpv; - bam1_t *b = bv; - fp->line.l = 0; - int ret = sam_read1(fp, fp->bam_header, b); - if (ret >= 0) { - *tid = b->core.tid; - *beg = b->core.pos; - *end = bam_endpos(b); - } - return ret; -} - -// This is used only with read_rest=1 iterators, so need not set tid/beg/end. -static int sam_readrec_rest(BGZF *ignored, void *fpv, void *bv, int *tid, hts_pos_t *beg, hts_pos_t *end) -{ - htsFile *fp = (htsFile *)fpv; - bam1_t *b = bv; - fp->line.l = 0; - int ret = sam_read1(fp, fp->bam_header, b); - return ret; -} - -// Internal (for now) func used by bam_sym_lookup. This is copied from -// samtools/bam.c. -static const char *bam_get_library(const bam_hdr_t *h, const bam1_t *b) -{ - const char *rg; - kstring_t lib = { 0, 0, NULL }; - rg = (char *)bam_aux_get(b, "RG"); - - if (!rg) - return NULL; - else - rg++; - - if (sam_hdr_find_tag_id((bam_hdr_t *)h, "RG", "ID", rg, "LB", &lib) < 0) - return NULL; - - static char LB_text[1024]; - int len = lib.l < sizeof(LB_text) - 1 ? lib.l : sizeof(LB_text) - 1; - - memcpy(LB_text, lib.s, len); - LB_text[len] = 0; - - free(lib.s); - - return LB_text; -} - - -// Bam record pointer and SAM header combined -typedef struct { - const sam_hdr_t *h; - const bam1_t *b; -} hb_pair; - -// Looks up variable names in str and replaces them with their value. -// Also supports aux tags. -// -// Note the expression parser deliberately overallocates str size so it -// is safe to use memcmp over strcmp. -static int bam_sym_lookup(void *data, char *str, char **end, - hts_expr_val_t *res) { - hb_pair *hb = (hb_pair *)data; - const bam1_t *b = hb->b; - - res->is_str = 0; - switch(*str) { - case 'c': - if (memcmp(str, "cigar", 5) == 0) { - *end = str+5; - res->is_str = 1; - ks_clear(&res->s); - uint32_t *cigar = bam_get_cigar(b); - int i, n = b->core.n_cigar, r = 0; - if (n) { - for (i = 0; i < n; i++) { - r |= kputw (bam_cigar_oplen(cigar[i]), &res->s) < 0; - r |= kputc_(bam_cigar_opchr(cigar[i]), &res->s) < 0; - } - r |= kputs("", &res->s) < 0; - } else { - r |= kputs("*", &res->s) < 0; - } - return r ? -1 : 0; - } - break; - - case 'e': - if (memcmp(str, "endpos", 6) == 0) { - *end = str+6; - res->d = bam_endpos(b); - return 0; - } - break; - - case 'f': - if (memcmp(str, "flag", 4) == 0) { - str = *end = str+4; - if (*str != '.') { - res->d = b->core.flag; - return 0; - } else { - str++; - if (!memcmp(str, "paired", 6)) { - *end = str+6; - res->d = b->core.flag & BAM_FPAIRED; - return 0; - } else if (!memcmp(str, "proper_pair", 11)) { - *end = str+11; - res->d = b->core.flag & BAM_FPROPER_PAIR; - return 0; - } else if (!memcmp(str, "unmap", 5)) { - *end = str+5; - res->d = b->core.flag & BAM_FUNMAP; - return 0; - } else if (!memcmp(str, "munmap", 6)) { - *end = str+6; - res->d = b->core.flag & BAM_FMUNMAP; - return 0; - } else if (!memcmp(str, "reverse", 7)) { - *end = str+7; - res->d = b->core.flag & BAM_FREVERSE; - return 0; - } else if (!memcmp(str, "mreverse", 8)) { - *end = str+8; - res->d = b->core.flag & BAM_FMREVERSE; - return 0; - } else if (!memcmp(str, "read1", 5)) { - *end = str+5; - res->d = b->core.flag & BAM_FREAD1; - return 0; - } else if (!memcmp(str, "read2", 5)) { - *end = str+5; - res->d = b->core.flag & BAM_FREAD2; - return 0; - } else if (!memcmp(str, "secondary", 9)) { - *end = str+9; - res->d = b->core.flag & BAM_FSECONDARY; - return 0; - } else if (!memcmp(str, "qcfail", 6)) { - *end = str+6; - res->d = b->core.flag & BAM_FQCFAIL; - return 0; - } else if (!memcmp(str, "dup", 3)) { - *end = str+3; - res->d = b->core.flag & BAM_FDUP; - return 0; - } else if (!memcmp(str, "supplementary", 13)) { - *end = str+13; - res->d = b->core.flag & BAM_FSUPPLEMENTARY; - return 0; - } else { - hts_log_error("Unrecognised flag string"); - return -1; - } - } - } - break; - - case 'l': - if (memcmp(str, "library", 7) == 0) { - *end = str+7; - res->is_str = 1; - const char *lib = bam_get_library(hb->h, b); - kputs(lib ? lib : "", ks_clear(&res->s)); - return 0; - } - break; - - case 'm': - if (memcmp(str, "mapq", 4) == 0) { - *end = str+4; - res->d = b->core.qual; - return 0; - } else if (memcmp(str, "mpos", 4) == 0) { - *end = str+4; - res->d = b->core.mpos+1; - return 0; - } else if (memcmp(str, "mrname", 6) == 0) { - *end = str+6; - res->is_str = 1; - const char *rn = sam_hdr_tid2name(hb->h, b->core.mtid); - kputs(rn ? rn : "*", ks_clear(&res->s)); - return 0; - } else if (memcmp(str, "mrefid", 6) == 0) { - *end = str+6; - res->d = b->core.mtid; - return 0; - } - break; - - case 'n': - if (memcmp(str, "ncigar", 6) == 0) { - *end = str+6; - res->d = b->core.n_cigar; - return 0; - } - break; - - case 'p': - if (memcmp(str, "pos", 3) == 0) { - *end = str+3; - res->d = b->core.pos+1; - return 0; - } else if (memcmp(str, "pnext", 5) == 0) { - *end = str+5; - res->d = b->core.mpos+1; - return 0; - } - break; - - case 'q': - if (memcmp(str, "qlen", 4) == 0) { - *end = str+4; - res->d = bam_cigar2qlen(b->core.n_cigar, bam_get_cigar(b)); - return 0; - } else if (memcmp(str, "qname", 5) == 0) { - *end = str+5; - res->is_str = 1; - kputs(bam_get_qname(b), ks_clear(&res->s)); - return 0; - } else if (memcmp(str, "qual", 4) == 0) { - *end = str+4; - ks_clear(&res->s); - if (ks_resize(&res->s, b->core.l_qseq+1) < 0) - return -1; - memcpy(res->s.s, bam_get_qual(b), b->core.l_qseq); - res->s.l = b->core.l_qseq; - res->is_str = 1; - return 0; - } - break; - - case 'r': - if (memcmp(str, "rlen", 4) == 0) { - *end = str+4; - res->d = bam_cigar2rlen(b->core.n_cigar, bam_get_cigar(b)); - return 0; - } else if (memcmp(str, "rname", 5) == 0) { - *end = str+5; - res->is_str = 1; - const char *rn = sam_hdr_tid2name(hb->h, b->core.tid); - kputs(rn ? rn : "*", ks_clear(&res->s)); - return 0; - } else if (memcmp(str, "rnext", 5) == 0) { - *end = str+5; - res->is_str = 1; - const char *rn = sam_hdr_tid2name(hb->h, b->core.mtid); - kputs(rn ? rn : "*", ks_clear(&res->s)); - return 0; - } else if (memcmp(str, "refid", 5) == 0) { - *end = str+5; - res->d = b->core.tid; - return 0; - } - break; - - case 's': - if (memcmp(str, "seq", 3) == 0) { - *end = str+3; - ks_clear(&res->s); - if (ks_resize(&res->s, b->core.l_qseq+1) < 0) - return -1; - nibble2base(bam_get_seq(b), res->s.s, b->core.l_qseq); - res->s.s[b->core.l_qseq] = 0; - res->s.l = b->core.l_qseq; - res->is_str = 1; - return 0; - } else if (memcmp(str, "sclen", 5) == 0) { - int sclen = 0; - uint32_t *cigar = bam_get_cigar(b); - int ncigar = b->core.n_cigar; - int left = 0; - - // left - if (ncigar > 0 - && bam_cigar_op(cigar[0]) == BAM_CSOFT_CLIP) - left = 0, sclen += bam_cigar_oplen(cigar[0]); - else if (ncigar > 1 - && bam_cigar_op(cigar[0]) == BAM_CHARD_CLIP - && bam_cigar_op(cigar[1]) == BAM_CSOFT_CLIP) - left = 1, sclen += bam_cigar_oplen(cigar[1]); - - // right - if (ncigar-1 > left - && bam_cigar_op(cigar[ncigar-1]) == BAM_CSOFT_CLIP) - sclen += bam_cigar_oplen(cigar[ncigar-1]); - else if (ncigar-2 > left - && bam_cigar_op(cigar[ncigar-1]) == BAM_CHARD_CLIP - && bam_cigar_op(cigar[ncigar-2]) == BAM_CSOFT_CLIP) - sclen += bam_cigar_oplen(cigar[ncigar-2]); - - *end = str+5; - res->d = sclen; - return 0; - } - break; - - case 't': - if (memcmp(str, "tlen", 4) == 0) { - *end = str+4; - res->d = b->core.isize; - return 0; - } - break; - - case '[': - if (*str == '[' && str[1] && str[2] && str[3] == ']') { - /* aux tags */ - *end = str+4; - - uint8_t *aux = bam_aux_get(b, str+1); - if (aux) { - // we define the truth of a tag to be its presence, even if 0. - res->is_true = 1; - switch (*aux) { - case 'Z': - case 'H': - res->is_str = 1; - kputs((char *)aux+1, ks_clear(&res->s)); - break; - - case 'A': - res->is_str = 1; - kputsn((char *)aux+1, 1, ks_clear(&res->s)); - break; - - case 'i': case 'I': - case 's': case 'S': - case 'c': case 'C': - res->is_str = 0; - res->d = bam_aux2i(aux); - break; - - case 'f': - case 'd': - res->is_str = 0; - res->d = bam_aux2f(aux); - break; - - default: - hts_log_error("Aux type '%c not yet supported by filters", - *aux); - return -1; - } - return 0; - - } else { - // hence absent tags are always false (and strings) - res->is_str = 1; - res->s.l = 0; - res->d = 0; - res->is_true = 0; - return 0; - } - } - break; - } - - // All successful matches in switch should return 0. - // So if we didn't match, it's a parse error. - return -1; -} - -// Returns 1 when accepted by the filter, 0 if not, -1 on error. -int sam_passes_filter(const sam_hdr_t *h, const bam1_t *b, hts_filter_t *filt) -{ - hb_pair hb = {h, b}; - hts_expr_val_t res = HTS_EXPR_VAL_INIT; - if (hts_filter_eval2(filt, &hb, bam_sym_lookup, &res)) { - hts_log_error("Couldn't process filter expression"); - hts_expr_val_free(&res); - return -1; - } - - int t = res.is_true; - hts_expr_val_free(&res); - - return t; -} - -static int cram_readrec(BGZF *ignored, void *fpv, void *bv, int *tid, hts_pos_t *beg, hts_pos_t *end) -{ - htsFile *fp = fpv; - bam1_t *b = bv; - int pass_filter, ret; - - do { - ret = cram_get_bam_seq(fp->fp.cram, &b); - if (ret < 0) - return cram_eof(fp->fp.cram) ? -1 : -2; - - if (bam_tag2cigar(b, 1, 1) < 0) - return -2; - - *tid = b->core.tid; - *beg = b->core.pos; - *end = bam_endpos(b); - - if (fp->filter) { - pass_filter = sam_passes_filter(fp->bam_header, b, fp->filter); - if (pass_filter < 0) - return -2; - } else { - pass_filter = 1; - } - } while (pass_filter == 0); - - return ret; -} - -static int cram_pseek(void *fp, int64_t offset, int whence) -{ - cram_fd *fd = (cram_fd *)fp; - - if ((0 != cram_seek(fd, offset, SEEK_SET)) - && (0 != cram_seek(fd, offset - fd->first_container, SEEK_CUR))) - return -1; - - fd->curr_position = offset; - - if (fd->ctr) { - cram_free_container(fd->ctr); - if (fd->ctr_mt && fd->ctr_mt != fd->ctr) - cram_free_container(fd->ctr_mt); - - fd->ctr = NULL; - fd->ctr_mt = NULL; - fd->ooc = 0; - } - - return 0; -} - -/* - * cram_ptell is a pseudo-tell function, because it matches the position of the disk cursor only - * after a fresh seek call. Otherwise it indicates that the read takes place inside the buffered - * container previously fetched. It was designed like this to integrate with the functionality - * of the iterator stepping logic. - */ - -static int64_t cram_ptell(void *fp) -{ - cram_fd *fd = (cram_fd *)fp; - cram_container *c; - cram_slice *s; - int64_t ret = -1L; - - if (fd) { - if ((c = fd->ctr) != NULL) { - if ((s = c->slice) != NULL && s->max_rec) { - if ((c->curr_slice + s->curr_rec/s->max_rec) >= (c->max_slice + 1)) - fd->curr_position += c->offset + c->length; - } - } - ret = fd->curr_position; - } - - return ret; -} - -static int bam_pseek(void *fp, int64_t offset, int whence) -{ - BGZF *fd = (BGZF *)fp; - - return bgzf_seek(fd, offset, whence); -} - -static int64_t bam_ptell(void *fp) -{ - BGZF *fd = (BGZF *)fp; - if (!fd) - return -1L; - - return bgzf_tell(fd); -} - - - -static hts_idx_t *index_load(htsFile *fp, const char *fn, const char *fnidx, int flags) -{ - switch (fp->format.format) { - case bam: - case sam: - return hts_idx_load3(fn, fnidx, HTS_FMT_BAI, flags); - - case cram: { - if (cram_index_load(fp->fp.cram, fn, fnidx) < 0) return NULL; - - // Cons up a fake "index" just pointing at the associated cram_fd: - hts_cram_idx_t *idx = malloc(sizeof (hts_cram_idx_t)); - if (idx == NULL) return NULL; - idx->fmt = HTS_FMT_CRAI; - idx->cram = fp->fp.cram; - return (hts_idx_t *) idx; - } - - default: - return NULL; // TODO Would use tbx_index_load if it returned hts_idx_t - } -} - -hts_idx_t *sam_index_load3(htsFile *fp, const char *fn, const char *fnidx, int flags) -{ - return index_load(fp, fn, fnidx, flags); -} - -hts_idx_t *sam_index_load2(htsFile *fp, const char *fn, const char *fnidx) { - return index_load(fp, fn, fnidx, HTS_IDX_SAVE_REMOTE); -} - -hts_idx_t *sam_index_load(htsFile *fp, const char *fn) -{ - return index_load(fp, fn, NULL, HTS_IDX_SAVE_REMOTE); -} - -static hts_itr_t *cram_itr_query(const hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end, hts_readrec_func *readrec) -{ - const hts_cram_idx_t *cidx = (const hts_cram_idx_t *) idx; - hts_itr_t *iter = (hts_itr_t *) calloc(1, sizeof(hts_itr_t)); - if (iter == NULL) return NULL; - - // Cons up a dummy iterator for which hts_itr_next() will simply invoke - // the readrec function: - iter->is_cram = 1; - iter->read_rest = 1; - iter->off = NULL; - iter->bins.a = NULL; - iter->readrec = readrec; - - if (tid >= 0 || tid == HTS_IDX_NOCOOR || tid == HTS_IDX_START) { - cram_range r = { tid, beg+1, end }; - int ret = cram_set_option(cidx->cram, CRAM_OPT_RANGE, &r); - - iter->curr_off = 0; - // The following fields are not required by hts_itr_next(), but are - // filled in in case user code wants to look at them. - iter->tid = tid; - iter->beg = beg; - iter->end = end; - - switch (ret) { - case 0: - break; - - case -2: - // No data vs this ref, so mark iterator as completed. - // Same as HTS_IDX_NONE. - iter->finished = 1; - break; - - default: - free(iter); - return NULL; - } - } - else switch (tid) { - case HTS_IDX_REST: - iter->curr_off = 0; - break; - case HTS_IDX_NONE: - iter->curr_off = 0; - iter->finished = 1; - break; - default: - hts_log_error("Query with tid=%d not implemented for CRAM files", tid); - abort(); - break; - } - - return iter; -} - -hts_itr_t *sam_itr_queryi(const hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end) -{ - const hts_cram_idx_t *cidx = (const hts_cram_idx_t *) idx; - if (idx == NULL) - return hts_itr_query(NULL, tid, beg, end, sam_readrec_rest); - else if (cidx->fmt == HTS_FMT_CRAI) - return cram_itr_query(idx, tid, beg, end, sam_readrec); - else - return hts_itr_query(idx, tid, beg, end, sam_readrec); -} - -static int cram_name2id(void *fdv, const char *ref) -{ - cram_fd *fd = (cram_fd *) fdv; - return sam_hdr_name2tid(fd->header, ref); -} - -hts_itr_t *sam_itr_querys(const hts_idx_t *idx, sam_hdr_t *hdr, const char *region) -{ - const hts_cram_idx_t *cidx = (const hts_cram_idx_t *) idx; - return hts_itr_querys(idx, region, (hts_name2id_f)(bam_name2id), hdr, - cidx->fmt == HTS_FMT_CRAI ? cram_itr_query : hts_itr_query, - sam_readrec); -} - -hts_itr_t *sam_itr_regarray(const hts_idx_t *idx, sam_hdr_t *hdr, char **regarray, unsigned int regcount) -{ - const hts_cram_idx_t *cidx = (const hts_cram_idx_t *) idx; - hts_reglist_t *r_list = NULL; - int r_count = 0; - - if (!cidx || !hdr) - return NULL; - - hts_itr_t *itr = NULL; - if (cidx->fmt == HTS_FMT_CRAI) { - r_list = hts_reglist_create(regarray, regcount, &r_count, cidx->cram, cram_name2id); - if (!r_list) - return NULL; - itr = hts_itr_regions(idx, r_list, r_count, cram_name2id, cidx->cram, - hts_itr_multi_cram, cram_readrec, cram_pseek, cram_ptell); - } else { - r_list = hts_reglist_create(regarray, regcount, &r_count, hdr, (hts_name2id_f)(bam_name2id)); - if (!r_list) - return NULL; - itr = hts_itr_regions(idx, r_list, r_count, (hts_name2id_f)(bam_name2id), hdr, - hts_itr_multi_bam, sam_readrec, bam_pseek, bam_ptell); - } - - if (!itr) - hts_reglist_free(r_list, r_count); - - return itr; -} - -hts_itr_t *sam_itr_regions(const hts_idx_t *idx, sam_hdr_t *hdr, hts_reglist_t *reglist, unsigned int regcount) -{ - const hts_cram_idx_t *cidx = (const hts_cram_idx_t *) idx; - - if(!cidx || !hdr || !reglist) - return NULL; - - if (cidx->fmt == HTS_FMT_CRAI) - return hts_itr_regions(idx, reglist, regcount, cram_name2id, cidx->cram, - hts_itr_multi_cram, cram_readrec, cram_pseek, cram_ptell); - else - return hts_itr_regions(idx, reglist, regcount, (hts_name2id_f)(bam_name2id), hdr, - hts_itr_multi_bam, sam_readrec, bam_pseek, bam_ptell); -} - -/********************** - *** SAM header I/O *** - **********************/ - -#include "htslib/kseq.h" -#include "htslib/kstring.h" - -sam_hdr_t *sam_hdr_parse(size_t l_text, const char *text) -{ - sam_hdr_t *bh = sam_hdr_init(); - if (!bh) return NULL; - - if (sam_hdr_add_lines(bh, text, l_text) != 0) { - sam_hdr_destroy(bh); - return NULL; - } - - return bh; -} - -static int valid_sam_header_type(const char *s) { - if (s[0] != '@') return 0; - switch (s[1]) { - case 'H': - return s[2] == 'D' && s[3] == '\t'; - case 'S': - return s[2] == 'Q' && s[3] == '\t'; - case 'R': - case 'P': - return s[2] == 'G' && s[3] == '\t'; - case 'C': - return s[2] == 'O'; - } - return 0; -} - -// Minimal sanitisation of a header to ensure. -// - null terminated string. -// - all lines start with @ (also implies no blank lines). -// -// Much more could be done, but currently is not, including: -// - checking header types are known (HD, SQ, etc). -// - syntax (eg checking tab separated fields). -// - validating n_targets matches @SQ records. -// - validating target lengths against @SQ records. -static sam_hdr_t *sam_hdr_sanitise(sam_hdr_t *h) { - if (!h) - return NULL; - - // Special case for empty headers. - if (h->l_text == 0) - return h; - - size_t i; - unsigned int lnum = 0; - char *cp = h->text, last = '\n'; - for (i = 0; i < h->l_text; i++) { - // NB: l_text excludes terminating nul. This finds early ones. - if (cp[i] == 0) - break; - - // Error on \n[^@], including duplicate newlines - if (last == '\n') { - lnum++; - if (cp[i] != '@') { - hts_log_error("Malformed SAM header at line %u", lnum); - sam_hdr_destroy(h); - return NULL; - } - } - - last = cp[i]; - } - - if (i < h->l_text) { // Early nul found. Complain if not just padding. - size_t j = i; - while (j < h->l_text && cp[j] == '\0') j++; - if (j < h->l_text) - hts_log_warning("Unexpected NUL character in header. Possibly truncated"); - } - - // Add trailing newline and/or trailing nul if required. - if (last != '\n') { - hts_log_warning("Missing trailing newline on SAM header. Possibly truncated"); - - if (h->l_text < 2 || i >= h->l_text - 2) { - if (h->l_text >= SIZE_MAX - 2) { - hts_log_error("No room for extra newline"); - sam_hdr_destroy(h); - return NULL; - } - - cp = realloc(h->text, (size_t) h->l_text+2); - if (!cp) { - sam_hdr_destroy(h); - return NULL; - } - h->text = cp; - } - cp[i++] = '\n'; - - // l_text may be larger already due to multiple nul padding - if (h->l_text < i) - h->l_text = i; - cp[h->l_text] = '\0'; - } - - return h; -} - -static void known_stderr(const char *tool, const char *advice) { - hts_log_warning("SAM file corrupted by embedded %s error/log message", tool); - hts_log_warning("%s", advice); -} - -static void warn_if_known_stderr(const char *line) { - if (strstr(line, "M::bwa_idx_load_from_disk") != NULL) - known_stderr("bwa", "Use `bwa mem -o file.sam ...` or `bwa sampe -f file.sam ...` instead of `bwa ... > file.sam`"); - else if (strstr(line, "M::mem_pestat") != NULL) - known_stderr("bwa", "Use `bwa mem -o file.sam ...` instead of `bwa mem ... > file.sam`"); - else if (strstr(line, "loaded/built the index") != NULL) - known_stderr("minimap2", "Use `minimap2 -o file.sam ...` instead of `minimap2 ... > file.sam`"); -} - -static sam_hdr_t *sam_hdr_create(htsFile* fp) { - kstring_t str = { 0, 0, NULL }; - khint_t k; - sam_hdr_t* h = sam_hdr_init(); - const char *q, *r; - char* sn = NULL; - khash_t(s2i) *d = kh_init(s2i); - khash_t(s2i) *long_refs = NULL; - if (!h || !d) - goto error; - - int ret, has_SQ = 0; - int next_c = '@'; - while (next_c == '@' && (ret = hts_getline(fp, KS_SEP_LINE, &fp->line)) >= 0) { - if (fp->line.s[0] != '@') - break; - - if (fp->line.l > 3 && strncmp(fp->line.s, "@SQ", 3) == 0) { - has_SQ = 1; - hts_pos_t ln = -1; - for (q = fp->line.s + 4;; ++q) { - if (strncmp(q, "SN:", 3) == 0) { - q += 3; - for (r = q;*r != '\t' && *r != '\n' && *r != '\0';++r); - - if (sn) { - hts_log_warning("SQ header line has more than one SN: tag"); - free(sn); - } - sn = (char*)calloc(r - q + 1, 1); - if (!sn) - goto error; - - strncpy(sn, q, r - q); - q = r; - } else { - if (strncmp(q, "LN:", 3) == 0) - ln = strtoll(q + 3, (char**)&q, 10); - } - - while (*q != '\t' && *q != '\n' && *q != '\0') - ++q; - if (*q == '\0' || *q == '\n') - break; - } - if (sn) { - if (ln >= 0) { - int absent; - k = kh_put(s2i, d, sn, &absent); - if (absent < 0) - goto error; - - if (!absent) { - hts_log_warning("Duplicated sequence \"%s\" in file \"%s\"", sn, fp->fn); - free(sn); - } else { - sn = NULL; - if (ln >= UINT32_MAX) { - // Stash away ref length that - // doesn't fit in target_len array - int k2; - if (!long_refs) { - long_refs = kh_init(s2i); - if (!long_refs) - goto error; - } - k2 = kh_put(s2i, long_refs, kh_key(d, k), &absent); - if (absent < 0) - goto error; - kh_val(long_refs, k2) = ln; - kh_val(d, k) = ((int64_t) (kh_size(d) - 1) << 32 - | UINT32_MAX); - } else { - kh_val(d, k) = (int64_t) (kh_size(d) - 1) << 32 | ln; - } - } - } else { - hts_log_warning("Ignored @SQ SN:%s : bad or missing LN tag", sn); - warn_if_known_stderr(fp->line.s); - free(sn); - } - } else { - hts_log_warning("Ignored @SQ line with missing SN: tag"); - warn_if_known_stderr(fp->line.s); - } - sn = NULL; - } - else if (!valid_sam_header_type(fp->line.s)) { - hts_log_error("Invalid header line: must start with @HD/@SQ/@RG/@PG/@CO"); - warn_if_known_stderr(fp->line.s); - goto error; - } - - if (kputsn(fp->line.s, fp->line.l, &str) < 0) - goto error; - - if (kputc('\n', &str) < 0) - goto error; - - if (fp->is_bgzf) { - next_c = bgzf_peek(fp->fp.bgzf); - } else { - unsigned char nc; - ssize_t pret = hpeek(fp->fp.hfile, &nc, 1); - next_c = pret > 0 ? nc : pret - 1; - } - if (next_c < -1) - goto error; - } - if (next_c != '@') - fp->line.l = 0; - - if (ret < -1) - goto error; - - if (!has_SQ && fp->fn_aux) { - kstring_t line = { 0, 0, NULL }; - - /* The reference index (.fai) is actually needed here */ - char *fai_fn = fp->fn_aux; - char *fn_delim = strstr(fp->fn_aux, HTS_IDX_DELIM); - if (fn_delim) - fai_fn = fn_delim + strlen(HTS_IDX_DELIM); - - hFILE* f = hopen(fai_fn, "r"); - int e = 0, absent; - if (f == NULL) - goto error; - - while (line.l = 0, kgetline(&line, (kgets_func*) hgets, f) >= 0) { - char* tab = strchr(line.s, '\t'); - hts_pos_t ln; - - if (tab == NULL) - continue; - - sn = (char*)calloc(tab-line.s+1, 1); - if (!sn) { - e = 1; - break; - } - memcpy(sn, line.s, tab-line.s); - k = kh_put(s2i, d, sn, &absent); - if (absent < 0) { - e = 1; - break; - } - - ln = strtoll(tab, NULL, 10); - - if (!absent) { - hts_log_warning("Duplicated sequence \"%s\" in the file \"%s\"", sn, fai_fn); - free(sn); - sn = NULL; - } else { - sn = NULL; - if (ln >= UINT32_MAX) { - // Stash away ref length that - // doesn't fit in target_len array - khint_t k2; - int absent = -1; - if (!long_refs) { - long_refs = kh_init(s2i); - if (!long_refs) { - e = 1; - break; - } - } - k2 = kh_put(s2i, long_refs, kh_key(d, k), &absent); - if (absent < 0) { - e = 1; - break; - } - kh_val(long_refs, k2) = ln; - kh_val(d, k) = ((int64_t) (kh_size(d) - 1) << 32 - | UINT32_MAX); - } else { - kh_val(d, k) = (int64_t) (kh_size(d) - 1) << 32 | ln; - } - has_SQ = 1; - } - - e |= kputs("@SQ\tSN:", &str) < 0; - e |= kputsn(line.s, tab - line.s, &str) < 0; - e |= kputs("\tLN:", &str) < 0; - e |= kputll(ln, &str) < 0; - e |= kputc('\n', &str) < 0; - if (e) - break; - } - - ks_free(&line); - if (hclose(f) != 0) { - hts_log_error("Error on closing %s", fai_fn); - e = 1; - } - if (e) - goto error; - } - - if (has_SQ) { - // Populate the targets array - h->n_targets = kh_size(d); - - h->target_name = (char**) malloc(sizeof(char*) * h->n_targets); - if (!h->target_name) { - h->n_targets = 0; - goto error; - } - - h->target_len = (uint32_t*) malloc(sizeof(uint32_t) * h->n_targets); - if (!h->target_len) { - h->n_targets = 0; - goto error; - } - - for (k = kh_begin(d); k != kh_end(d); ++k) { - if (!kh_exist(d, k)) - continue; - - h->target_name[kh_val(d, k) >> 32] = (char*) kh_key(d, k); - h->target_len[kh_val(d, k) >> 32] = kh_val(d, k) & 0xffffffffUL; - kh_val(d, k) >>= 32; - } - } - - // Repurpose sdict to hold any references longer than UINT32_MAX - h->sdict = long_refs; - - kh_destroy(s2i, d); - - if (str.l == 0) - kputsn("", 0, &str); - h->l_text = str.l; - h->text = ks_release(&str); - fp->bam_header = sam_hdr_sanitise(h); - fp->bam_header->ref_count = 1; - - return fp->bam_header; - - error: - if (h && d && (!h->target_name || !h->target_len)) { - for (k = kh_begin(d); k != kh_end(d); ++k) - if (kh_exist(d, k)) free((void *)kh_key(d, k)); - } - sam_hdr_destroy(h); - ks_free(&str); - kh_destroy(s2i, d); - kh_destroy(s2i, long_refs); - if (sn) free(sn); - return NULL; -} - -sam_hdr_t *sam_hdr_read(htsFile *fp) -{ - if (!fp) { - errno = EINVAL; - return NULL; - } - - switch (fp->format.format) { - case bam: - return sam_hdr_sanitise(bam_hdr_read(fp->fp.bgzf)); - - case cram: - return sam_hdr_sanitise(sam_hdr_dup(fp->fp.cram->header)); - - case sam: - return sam_hdr_create(fp); - - case fastq_format: - case fasta_format: - return sam_hdr_init(); - - case empty_format: - errno = EPIPE; - return NULL; - - default: - errno = EFTYPE; - return NULL; - } -} - -int sam_hdr_write(htsFile *fp, const sam_hdr_t *h) -{ - if (!fp || !h) { - errno = EINVAL; - return -1; - } - - switch (fp->format.format) { - case binary_format: - fp->format.category = sequence_data; - fp->format.format = bam; - /* fall-through */ - case bam: - if (bam_hdr_write(fp->fp.bgzf, h) < 0) return -1; - break; - - case cram: { - cram_fd *fd = fp->fp.cram; - if (cram_set_header2(fd, h) < 0) return -1; - if (fp->fn_aux) - cram_load_reference(fd, fp->fn_aux); - if (cram_write_SAM_hdr(fd, fd->header) < 0) return -1; - } - break; - - case text_format: - fp->format.category = sequence_data; - fp->format.format = sam; - /* fall-through */ - case sam: { - if (!h->hrecs && !h->text) - return 0; - char *text; - kstring_t hdr_ks = { 0, 0, NULL }; - size_t l_text; - ssize_t bytes; - int r = 0, no_sq = 0; - - if (h->hrecs) { - if (sam_hrecs_rebuild_text(h->hrecs, &hdr_ks) != 0) - return -1; - text = hdr_ks.s; - l_text = hdr_ks.l; - } else { - const char *p = NULL; - do { - const char *q = p == NULL ? h->text : p + 4; - p = strstr(q, "@SQ\t"); - } while (!(p == NULL || p == h->text || *(p - 1) == '\n')); - no_sq = p == NULL; - text = h->text; - l_text = h->l_text; - } - - if (fp->is_bgzf) { - bytes = bgzf_write(fp->fp.bgzf, text, l_text); - } else { - bytes = hwrite(fp->fp.hfile, text, l_text); - } - free(hdr_ks.s); - if (bytes != l_text) - return -1; - - if (no_sq) { - int i; - for (i = 0; i < h->n_targets; ++i) { - fp->line.l = 0; - r |= kputsn("@SQ\tSN:", 7, &fp->line) < 0; - r |= kputs(h->target_name[i], &fp->line) < 0; - r |= kputsn("\tLN:", 4, &fp->line) < 0; - r |= kputw(h->target_len[i], &fp->line) < 0; - r |= kputc('\n', &fp->line) < 0; - if (r != 0) - return -1; - - if (fp->is_bgzf) { - bytes = bgzf_write(fp->fp.bgzf, fp->line.s, fp->line.l); - } else { - bytes = hwrite(fp->fp.hfile, fp->line.s, fp->line.l); - } - if (bytes != fp->line.l) - return -1; - } - } - if (fp->is_bgzf) { - if (bgzf_flush(fp->fp.bgzf) != 0) return -1; - } else { - if (hflush(fp->fp.hfile) != 0) return -1; - } - } - break; - - case fastq_format: - case fasta_format: - // Nothing to output; FASTQ has no file headers. - break; - - default: - errno = EBADF; - return -1; - } - return 0; -} - -static int old_sam_hdr_change_HD(sam_hdr_t *h, const char *key, const char *val) -{ - char *p, *q, *beg = NULL, *end = NULL, *newtext; - size_t new_l_text; - if (!h || !key) - return -1; - - if (h->l_text > 3) { - if (strncmp(h->text, "@HD", 3) == 0) { //@HD line exists - if ((p = strchr(h->text, '\n')) == 0) return -1; - *p = '\0'; // for strstr call - - char tmp[5] = { '\t', key[0], key[0] ? key[1] : '\0', ':', '\0' }; - - if ((q = strstr(h->text, tmp)) != 0) { // key exists - *p = '\n'; // change back - - // mark the key:val - beg = q; - for (q += 4; *q != '\n' && *q != '\t'; ++q); - end = q; - - if (val && (strncmp(beg + 4, val, end - beg - 4) == 0) - && strlen(val) == end - beg - 4) - return 0; // val is the same, no need to change - - } else { - beg = end = p; - *p = '\n'; - } - } - } - if (beg == NULL) { // no @HD - new_l_text = h->l_text; - if (new_l_text > SIZE_MAX - strlen(SAM_FORMAT_VERSION) - 9) - return -1; - new_l_text += strlen(SAM_FORMAT_VERSION) + 8; - if (val) { - if (new_l_text > SIZE_MAX - strlen(val) - 5) - return -1; - new_l_text += strlen(val) + 4; - } - newtext = (char*)malloc(new_l_text + 1); - if (!newtext) return -1; - - if (val) - snprintf(newtext, new_l_text + 1, - "@HD\tVN:%s\t%s:%s\n%s", SAM_FORMAT_VERSION, key, val, h->text); - else - snprintf(newtext, new_l_text + 1, - "@HD\tVN:%s\n%s", SAM_FORMAT_VERSION, h->text); - } else { // has @HD but different or no key - new_l_text = (beg - h->text) + (h->text + h->l_text - end); - if (val) { - if (new_l_text > SIZE_MAX - strlen(val) - 5) - return -1; - new_l_text += strlen(val) + 4; - } - newtext = (char*)malloc(new_l_text + 1); - if (!newtext) return -1; - - if (val) { - snprintf(newtext, new_l_text + 1, "%.*s\t%s:%s%s", - (int) (beg - h->text), h->text, key, val, end); - } else { //delete key - snprintf(newtext, new_l_text + 1, "%.*s%s", - (int) (beg - h->text), h->text, end); - } - } - free(h->text); - h->text = newtext; - h->l_text = new_l_text; - return 0; -} - - -int sam_hdr_change_HD(sam_hdr_t *h, const char *key, const char *val) -{ - if (!h || !key) - return -1; - - if (!h->hrecs) - return old_sam_hdr_change_HD(h, key, val); - - if (val) { - if (sam_hdr_update_line(h, "HD", NULL, NULL, key, val, NULL) != 0) - return -1; - } else { - if (sam_hdr_remove_tag_id(h, "HD", NULL, NULL, key) != 0) - return -1; - } - return sam_hdr_rebuild(h); -} -/********************** - *** SAM record I/O *** - **********************/ - -static int sam_parse_B_vals(char type, uint32_t n, char *in, char **end, - char *r, bam1_t *b) -{ - int orig_l = b->l_data; - char *q = in; - int32_t size; - size_t bytes; - int overflow = 0; - - size = aux_type2size(type); - if (size <= 0 || size > 4) { - hts_log_error("Unrecognized type B:%c", type); - return -1; - } - - // Ensure space for type + values - bytes = (size_t) n * (size_t) size; - if (bytes / size != n - || possibly_expand_bam_data(b, bytes + 2 + sizeof(uint32_t))) { - hts_log_error("Out of memory"); - return -1; - } - - b->data[b->l_data++] = 'B'; - b->data[b->l_data++] = type; - i32_to_le(n, b->data + b->l_data); - b->l_data += sizeof(uint32_t); - // This ensures that q always ends up at the next comma after - // reading a number even if it's followed by junk. It - // prevents the possibility of trying to read more than n items. -#define skip_to_comma_(q) do { while (*(q) > '\t' && *(q) != ',') (q)++; } while (0) - if (type == 'c') { - while (q < r) { - *(b->data + b->l_data) = hts_str2int(q + 1, &q, 8, &overflow); - b->l_data++; - skip_to_comma_(q); - } - } else if (type == 'C') { - while (q < r) { - if (*q != '-') { - *(b->data + b->l_data) = hts_str2uint(q + 1, &q, 8, &overflow); - b->l_data++; - } else { - overflow = 1; - } - skip_to_comma_(q); - } - } else if (type == 's') { - while (q < r) { - i16_to_le(hts_str2int(q + 1, &q, 16, &overflow), b->data + b->l_data); - b->l_data += 2; - skip_to_comma_(q); - } - } else if (type == 'S') { - while (q < r) { - if (*q != '-') { - u16_to_le(hts_str2uint(q + 1, &q, 16, &overflow), b->data + b->l_data); - b->l_data += 2; - } else { - overflow = 1; - } - skip_to_comma_(q); - } - } else if (type == 'i') { - while (q < r) { - i32_to_le(hts_str2int(q + 1, &q, 32, &overflow), b->data + b->l_data); - b->l_data += 4; - skip_to_comma_(q); - } - } else if (type == 'I') { - while (q < r) { - if (*q != '-') { - u32_to_le(hts_str2uint(q + 1, &q, 32, &overflow), b->data + b->l_data); - b->l_data += 4; - } else { - overflow = 1; - } - skip_to_comma_(q); - } - } else if (type == 'f') { - while (q < r) { - float_to_le(strtod(q + 1, &q), b->data + b->l_data); - b->l_data += 4; - skip_to_comma_(q); - } - } else { - hts_log_error("Unrecognized type B:%c", type); - return -1; - } - - if (!overflow) { - *end = q; - return 0; - } else { - int64_t max = 0, min = 0, val; - // Given type was incorrect. Try to rescue the situation. - q = in; - overflow = 0; - b->l_data = orig_l; - // Find out what range of values is present - while (q < r) { - val = hts_str2int(q + 1, &q, 64, &overflow); - if (max < val) max = val; - if (min > val) min = val; - skip_to_comma_(q); - } - // Retry with appropriate type - if (!overflow) { - if (min < 0) { - if (min >= INT8_MIN && max <= INT8_MAX) { - return sam_parse_B_vals('c', n, in, end, r, b); - } else if (min >= INT16_MIN && max <= INT16_MAX) { - return sam_parse_B_vals('s', n, in, end, r, b); - } else if (min >= INT32_MIN && max <= INT32_MAX) { - return sam_parse_B_vals('i', n, in, end, r, b); - } - } else { - if (max < UINT8_MAX) { - return sam_parse_B_vals('C', n, in, end, r, b); - } else if (max <= UINT16_MAX) { - return sam_parse_B_vals('S', n, in, end, r, b); - } else if (max <= UINT32_MAX) { - return sam_parse_B_vals('I', n, in, end, r, b); - } - } - } - // If here then at least one of the values is too big to store - hts_log_error("Numeric value in B array out of allowed range"); - return -1; - } -#undef skip_to_comma_ -} - -static inline unsigned int parse_sam_flag(char *v, char **rv, int *overflow) { - if (*v >= '1' && *v <= '9') { - return hts_str2uint(v, rv, 16, overflow); - } - else if (*v == '0') { - // handle single-digit "0" directly; otherwise it's hex or octal - if (v[1] == '\t') { *rv = v+1; return 0; } - else { - unsigned long val = strtoul(v, rv, 0); - if (val > 65535) { *overflow = 1; return 65535; } - return val; - } - } - else { - // TODO implement symbolic flag letters - *rv = v; - return 0; - } -} - -// Parse tag line and append to bam object b. -// Shared by both SAM and FASTQ parsers. -// -// The difference between the two is how lenient we are to recognising -// non-compliant strings. The FASTQ parser glosses over arbitrary -// non-SAM looking strings. -static inline int aux_parse(char *start, char *end, bam1_t *b, int lenient, - khash_t(tag) *tag_whitelist) { - int overflow = 0; - int checkpoint; - char logbuf[40]; - char *q = start, *p = end; - -#define _parse_err(cond, ...) \ - do { \ - if (cond) { \ - if (lenient) { \ - while (q < p && !isspace_c(*q)) \ - q++; \ - while (q < p && isspace_c(*q)) \ - q++; \ - b->l_data = checkpoint; \ - goto loop; \ - } else { \ - hts_log_error(__VA_ARGS__); \ - goto err_ret; \ - } \ - } \ - } while (0) - - while (q < p) loop: { - char type; - checkpoint = b->l_data; - if (p - q < 5) { - if (lenient) { - break; - } else { - hts_log_error("Incomplete aux field"); - goto err_ret; - } - } - _parse_err(q[0] < '!' || q[1] < '!', "invalid aux tag id"); - - if (lenient && (q[2] | q[4]) != ':') { - while (q < p && !isspace_c(*q)) - q++; - while (q < p && isspace_c(*q)) - q++; - continue; - } - - if (tag_whitelist) { - int tt = q[0]*256 + q[1]; - if (kh_get(tag, tag_whitelist, tt) == kh_end(tag_whitelist)) { - while (q < p && *q != '\t') - q++; - continue; - } - } - - // Copy over id - if (possibly_expand_bam_data(b, 2) < 0) goto err_ret; - memcpy(b->data + b->l_data, q, 2); b->l_data += 2; - q += 3; type = *q++; ++q; // q points to value - if (type != 'Z' && type != 'H') // the only zero length acceptable fields - _parse_err(*q <= '\t', "incomplete aux field"); - - // Ensure enough space for a double + type allocated. - if (possibly_expand_bam_data(b, 16) < 0) goto err_ret; - - if (type == 'A' || type == 'a' || type == 'c' || type == 'C') { - b->data[b->l_data++] = 'A'; - b->data[b->l_data++] = *q++; - } else if (type == 'i' || type == 'I') { - if (*q == '-') { - int32_t x = hts_str2int(q, &q, 32, &overflow); - if (x >= INT8_MIN) { - b->data[b->l_data++] = 'c'; - b->data[b->l_data++] = x; - } else if (x >= INT16_MIN) { - b->data[b->l_data++] = 's'; - i16_to_le(x, b->data + b->l_data); - b->l_data += 2; - } else { - b->data[b->l_data++] = 'i'; - i32_to_le(x, b->data + b->l_data); - b->l_data += 4; - } - } else { - uint32_t x = hts_str2uint(q, &q, 32, &overflow); - if (x <= UINT8_MAX) { - b->data[b->l_data++] = 'C'; - b->data[b->l_data++] = x; - } else if (x <= UINT16_MAX) { - b->data[b->l_data++] = 'S'; - u16_to_le(x, b->data + b->l_data); - b->l_data += 2; - } else { - b->data[b->l_data++] = 'I'; - u32_to_le(x, b->data + b->l_data); - b->l_data += 4; - } - } - } else if (type == 'f') { - b->data[b->l_data++] = 'f'; - float_to_le(strtod(q, &q), b->data + b->l_data); - b->l_data += sizeof(float); - } else if (type == 'd') { - b->data[b->l_data++] = 'd'; - double_to_le(strtod(q, &q), b->data + b->l_data); - b->l_data += sizeof(double); - } else if (type == 'Z' || type == 'H') { - char *end = strchr(q, '\t'); - if (!end) end = q + strlen(q); - _parse_err(type == 'H' && ((end-q)&1) != 0, - "hex field does not have an even number of digits"); - b->data[b->l_data++] = type; - if (possibly_expand_bam_data(b, end - q + 1) < 0) goto err_ret; - memcpy(b->data + b->l_data, q, end - q); - b->l_data += end - q; - b->data[b->l_data++] = '\0'; - q = end; - } else if (type == 'B') { - uint32_t n; - char *r; - type = *q++; // q points to the first ',' following the typing byte - _parse_err(*q && *q != ',' && *q != '\t', - "B aux field type not followed by ','"); - - for (r = q, n = 0; *r > '\t'; ++r) - if (*r == ',') ++n; - - if (sam_parse_B_vals(type, n, q, &q, r, b) < 0) - goto err_ret; - } else _parse_err(1, "unrecognized type %s", hts_strprint(logbuf, sizeof logbuf, '\'', &type, 1)); - - while (*q > '\t') { q++; } // Skip any junk to next tab - q++; - } - - _parse_err(!lenient && overflow != 0, "numeric value out of allowed range"); -#undef _parse_err - - return 0; - -err_ret: - return -2; -} - -int sam_parse1(kstring_t *s, sam_hdr_t *h, bam1_t *b) -{ -#define _read_token(_p) (_p); do { char *tab = strchr((_p), '\t'); if (!tab) goto err_ret; *tab = '\0'; (_p) = tab + 1; } while (0) - -#if HTS_ALLOW_UNALIGNED != 0 && ULONG_MAX == 0xffffffffffffffff - -// Macro that operates on 64-bits at a time. -#define COPY_MINUS_N(to,from,n,l,failed) \ - do { \ - uint64_u *from8 = (uint64_u *)(from); \ - uint64_u *to8 = (uint64_u *)(to); \ - uint64_t uflow = 0; \ - size_t l8 = (l)>>3, i; \ - for (i = 0; i < l8; i++) { \ - to8[i] = from8[i] - (n)*0x0101010101010101UL; \ - uflow |= to8[i]; \ - } \ - for (i<<=3; i < (l); ++i) { \ - to[i] = from[i] - (n); \ - uflow |= to[i]; \ - } \ - failed = (uflow & 0x8080808080808080UL) > 0; \ - } while (0) - -#else - -// Basic version which operates a byte at a time -#define COPY_MINUS_N(to,from,n,l,failed) do { \ - uint8_t uflow = 0; \ - for (i = 0; i < (l); ++i) { \ - (to)[i] = (from)[i] - (n); \ - uflow |= (uint8_t) (to)[i]; \ - } \ - failed = (uflow & 0x80) > 0; \ - } while (0) - -#endif - -#define _get_mem(type_t, x, b, l) if (possibly_expand_bam_data((b), (l)) < 0) goto err_ret; *(x) = (type_t*)((b)->data + (b)->l_data); (b)->l_data += (l) -#define _parse_err(cond, ...) do { if (cond) { hts_log_error(__VA_ARGS__); goto err_ret; } } while (0) -#define _parse_warn(cond, ...) do { if (cond) { hts_log_warning(__VA_ARGS__); } } while (0) - - uint8_t *t; - - char *p = s->s, *q; - int i, overflow = 0; - char logbuf[40]; - hts_pos_t cigreflen; - bam1_core_t *c = &b->core; - - b->l_data = 0; - memset(c, 0, 32); - - // qname - q = _read_token(p); - - _parse_warn(p - q <= 1, "empty query name"); - _parse_err(p - q > 255, "query name too long"); - // resize large enough for name + extranul - if (possibly_expand_bam_data(b, (p - q) + 4) < 0) goto err_ret; - memcpy(b->data + b->l_data, q, p-q); b->l_data += p-q; - - c->l_extranul = (4 - (b->l_data & 3)) & 3; - memcpy(b->data + b->l_data, "\0\0\0\0", c->l_extranul); - b->l_data += c->l_extranul; - - c->l_qname = p - q + c->l_extranul; - - // flag - c->flag = parse_sam_flag(p, &p, &overflow); - if (*p++ != '\t') goto err_ret; // malformated flag - - // chr - q = _read_token(p); - if (strcmp(q, "*")) { - _parse_err(h->n_targets == 0, "no SQ lines present in the header"); - c->tid = bam_name2id(h, q); - _parse_err(c->tid < -1, "failed to parse header"); - _parse_warn(c->tid < 0, "unrecognized reference name %s; treated as unmapped", hts_strprint(logbuf, sizeof logbuf, '"', q, SIZE_MAX)); - } else c->tid = -1; - - // pos - c->pos = hts_str2uint(p, &p, 63, &overflow) - 1; - if (*p++ != '\t') goto err_ret; - if (c->pos < 0 && c->tid >= 0) { - _parse_warn(1, "mapped query cannot have zero coordinate; treated as unmapped"); - c->tid = -1; - } - if (c->tid < 0) c->flag |= BAM_FUNMAP; - - // mapq - c->qual = hts_str2uint(p, &p, 8, &overflow); - if (*p++ != '\t') goto err_ret; - // cigar - if (*p != '*') { - uint32_t *cigar = NULL; - int old_l_data = b->l_data; - int n_cigar = bam_parse_cigar(p, &p, b); - if (n_cigar < 1 || *p++ != '\t') goto err_ret; - cigar = (uint32_t *)(b->data + old_l_data); - c->n_cigar = n_cigar; - - // can't use bam_endpos() directly as some fields not yet set up - cigreflen = (!(c->flag&BAM_FUNMAP))? bam_cigar2rlen(c->n_cigar, cigar) : 1; - if (cigreflen == 0) cigreflen = 1; - } else { - _parse_warn(!(c->flag&BAM_FUNMAP), "mapped query must have a CIGAR; treated as unmapped"); - c->flag |= BAM_FUNMAP; - q = _read_token(p); - cigreflen = 1; - } - _parse_err(HTS_POS_MAX - cigreflen <= c->pos, - "read ends beyond highest supported position"); - c->bin = hts_reg2bin(c->pos, c->pos + cigreflen, 14, 5); - // mate chr - q = _read_token(p); - if (strcmp(q, "=") == 0) { - c->mtid = c->tid; - } else if (strcmp(q, "*") == 0) { - c->mtid = -1; - } else { - c->mtid = bam_name2id(h, q); - _parse_err(c->mtid < -1, "failed to parse header"); - _parse_warn(c->mtid < 0, "unrecognized mate reference name %s; treated as unmapped", hts_strprint(logbuf, sizeof logbuf, '"', q, SIZE_MAX)); - } - // mpos - c->mpos = hts_str2uint(p, &p, 63, &overflow) - 1; - if (*p++ != '\t') goto err_ret; - if (c->mpos < 0 && c->mtid >= 0) { - _parse_warn(1, "mapped mate cannot have zero coordinate; treated as unmapped"); - c->mtid = -1; - } - // tlen - c->isize = hts_str2int(p, &p, 64, &overflow); - if (*p++ != '\t') goto err_ret; - // seq - q = _read_token(p); - if (strcmp(q, "*")) { - _parse_err(p - q - 1 > INT32_MAX, "read sequence is too long"); - c->l_qseq = p - q - 1; - hts_pos_t ql = bam_cigar2qlen(c->n_cigar, (uint32_t*)(b->data + c->l_qname)); - _parse_err(c->n_cigar && ql != c->l_qseq, "CIGAR and query sequence are of different length"); - i = (c->l_qseq + 1) >> 1; - _get_mem(uint8_t, &t, b, i); - - unsigned int lqs2 = c->l_qseq&~1, i; - for (i = 0; i < lqs2; i+=2) - t[i>>1] = (seq_nt16_table[(unsigned char)q[i]] << 4) | seq_nt16_table[(unsigned char)q[i+1]]; - for (; i < c->l_qseq; ++i) - t[i>>1] = seq_nt16_table[(unsigned char)q[i]] << ((~i&1)<<2); - } else c->l_qseq = 0; - // qual - _get_mem(uint8_t, &t, b, c->l_qseq); - if (p[0] == '*' && (p[1] == '\t' || p[1] == '\0')) { - memset(t, 0xff, c->l_qseq); - p += 2; - } else { - int failed = 0; - _parse_err(s->l - (p - s->s) < c->l_qseq - || (p[c->l_qseq] != '\t' && p[c->l_qseq] != '\0'), - "SEQ and QUAL are of different length"); - COPY_MINUS_N(t, p, 33, c->l_qseq, failed); - _parse_err(failed, "invalid QUAL character"); - p += c->l_qseq + 1; - } - - // aux - if (aux_parse(p, s->s + s->l, b, 0, NULL) < 0) - goto err_ret; - - if (bam_tag2cigar(b, 1, 1) < 0) - return -2; - return 0; - -#undef _parse_warn -#undef _parse_err -#undef _get_mem -#undef _read_token -err_ret: - return -2; -} - -static uint32_t read_ncigar(const char *q) { - uint32_t n_cigar = 0; - for (; *q && *q != '\t'; ++q) - if (!isdigit_c(*q)) ++n_cigar; - if (!n_cigar) { - hts_log_error("No CIGAR operations"); - return 0; - } - if (n_cigar >= 2147483647) { - hts_log_error("Too many CIGAR operations"); - return 0; - } - - return n_cigar; -} - -/*! @function - @abstract Parse a CIGAR string into preallocated a uint32_t array - @param in [in] pointer to the source string - @param a_cigar [out] address of the destination uint32_t buffer - @return number of processed input characters; 0 on error - */ -static int parse_cigar(const char *in, uint32_t *a_cigar, uint32_t n_cigar) { - int i, overflow = 0; - const char *p = in; - for (i = 0; i < n_cigar; i++) { - uint32_t len; - int op; - char *q; - len = hts_str2uint(p, &q, 28, &overflow)< *a_mem) { - uint32_t *a_tmp = realloc(*a_cigar, n_cigar*sizeof(**a_cigar)); - if (a_tmp) { - *a_cigar = a_tmp; - *a_mem = n_cigar; - } else { - hts_log_error("Memory allocation error"); - return -1; - } - } - - if (!(diff = parse_cigar(in, *a_cigar, n_cigar))) return -1; - if (end) *end = (char *)in+diff; - - return n_cigar; -} - -ssize_t bam_parse_cigar(const char *in, char **end, bam1_t *b) { - size_t n_cigar = 0; - int diff; - - if (!in || !b) { - hts_log_error("NULL pointer arguments"); - return -1; - } - if (end) *end = (char *)in; - - if (*in == '*') { - if (end) (*end)++; - return 0; - } - n_cigar = read_ncigar(in); - if (!n_cigar) return 0; - if (possibly_expand_bam_data(b, n_cigar * sizeof(uint32_t)) < 0) { - hts_log_error("Memory allocation error"); - return -1; - } - - if (!(diff = parse_cigar(in, (uint32_t *)(b->data + b->l_data), n_cigar))) return -1; - b->l_data += (n_cigar * sizeof(uint32_t)); - if (end) *end = (char *)in+diff; - - return n_cigar; -} - -/* - * ----------------------------------------------------------------------------- - * SAM threading - */ -// Size of SAM text block (reading) -#define SAM_NBYTES 240000 - -// Number of BAM records (writing, up to NB_mem in size) -#define SAM_NBAM 1000 - -struct SAM_state; - -// Output job - a block of BAM records -typedef struct sp_bams { - struct sp_bams *next; - int serial; - - bam1_t *bams; - int nbams, abams; // used and alloc for bams[] array - size_t bam_mem; // very approximate total size - - struct SAM_state *fd; -} sp_bams; - -// Input job - a block of SAM text -typedef struct sp_lines { - struct sp_lines *next; - int serial; - - char *data; - int data_size; - int alloc; - - struct SAM_state *fd; - sp_bams *bams; -} sp_lines; - -enum sam_cmd { - SAM_NONE = 0, - SAM_CLOSE, - SAM_CLOSE_DONE, -}; - -typedef struct SAM_state { - sam_hdr_t *h; - - hts_tpool *p; - int own_pool; - pthread_mutex_t lines_m; - hts_tpool_process *q; - pthread_t dispatcher; - int dispatcher_set; - - sp_lines *lines; - sp_bams *bams; - - sp_bams *curr_bam; - int curr_idx; - int serial; - - // Be warned: moving these mutexes around in this struct can reduce - // threading performance by up to 70%! - pthread_mutex_t command_m; - pthread_cond_t command_c; - enum sam_cmd command; - - // One of the E* errno codes - int errcode; - - htsFile *fp; -} SAM_state; - -// Returns a SAM_state struct from a generic hFILE. -// -// Returns NULL on failure. -static SAM_state *sam_state_create(htsFile *fp) { - // Ideally sam_open wouldn't be a #define to hts_open but instead would - // be a redirect call with an additional 'S' mode. This in turn would - // correctly set the designed format to sam instead of a generic - // text_format. - if (fp->format.format != sam && fp->format.format != text_format) - return NULL; - - SAM_state *fd = calloc(1, sizeof(*fd)); - if (!fd) - return NULL; - - fp->state = fd; - fd->fp = fp; - - return fd; -} - -static int sam_format1_append(const bam_hdr_t *h, const bam1_t *b, kstring_t *str); -static void *sam_format_worker(void *arg); - -static void sam_state_err(SAM_state *fd, int errcode) { - pthread_mutex_lock(&fd->command_m); - if (!fd->errcode) - fd->errcode = errcode; - pthread_mutex_unlock(&fd->command_m); -} - -static void sam_free_sp_bams(sp_bams *b) { - if (!b) - return; - - if (b->bams) { - int i; - for (i = 0; i < b->abams; i++) { - if (b->bams[i].data) - free(b->bams[i].data); - } - free(b->bams); - } - free(b); -} - -// Destroys the state produce by sam_state_create. -int sam_state_destroy(htsFile *fp) { - int ret = 0; - - if (!fp->state) - return 0; - - SAM_state *fd = fp->state; - if (fd->p) { - if (fd->h) { - // Notify sam_dispatcher we're closing - pthread_mutex_lock(&fd->command_m); - if (fd->command != SAM_CLOSE_DONE) - fd->command = SAM_CLOSE; - pthread_cond_signal(&fd->command_c); - ret = -fd->errcode; - if (fd->q) - hts_tpool_wake_dispatch(fd->q); // unstick the reader - - if (!fp->is_write && fd->q && fd->dispatcher_set) { - for (;;) { - // Avoid deadlocks with dispatcher - if (fd->command == SAM_CLOSE_DONE) - break; - hts_tpool_wake_dispatch(fd->q); - pthread_mutex_unlock(&fd->command_m); - usleep(10000); - pthread_mutex_lock(&fd->command_m); - } - } - pthread_mutex_unlock(&fd->command_m); - - if (fp->is_write) { - // Dispatch the last partial block. - sp_bams *gb = fd->curr_bam; - if (!ret && gb && gb->nbams > 0 && fd->q) - ret = hts_tpool_dispatch(fd->p, fd->q, sam_format_worker, gb); - - // Flush and drain output - if (fd->q) - hts_tpool_process_flush(fd->q); - pthread_mutex_lock(&fd->command_m); - if (!ret) ret = -fd->errcode; - pthread_mutex_unlock(&fd->command_m); - - while (!ret && fd->q && !hts_tpool_process_empty(fd->q)) { - usleep(10000); - pthread_mutex_lock(&fd->command_m); - ret = -fd->errcode; - // not empty but shutdown implies error - if (hts_tpool_process_is_shutdown(fd->q) && !ret) - ret = EIO; - pthread_mutex_unlock(&fd->command_m); - } - if (fd->q) - hts_tpool_process_shutdown(fd->q); - } - - // Wait for it to acknowledge - if (fd->dispatcher_set) - pthread_join(fd->dispatcher, NULL); - if (!ret) ret = -fd->errcode; - } - - // Tidy up memory - if (fd->q) - hts_tpool_process_destroy(fd->q); - - if (fd->own_pool && fp->format.compression == no_compression) { - hts_tpool_destroy(fd->p); - fd->p = NULL; - } - pthread_mutex_destroy(&fd->lines_m); - pthread_mutex_destroy(&fd->command_m); - pthread_cond_destroy(&fd->command_c); - - sp_lines *l = fd->lines; - while (l) { - sp_lines *n = l->next; - free(l->data); - free(l); - l = n; - } - - sp_bams *b = fd->bams; - while (b) { - if (fd->curr_bam == b) - fd->curr_bam = NULL; - sp_bams *n = b->next; - sam_free_sp_bams(b); - b = n; - } - - if (fd->curr_bam) - sam_free_sp_bams(fd->curr_bam); - - // Decrement counter by one, maybe destroying too. - // This is to permit the caller using bam_hdr_destroy - // before sam_close without triggering decode errors - // in the background threads. - bam_hdr_destroy(fd->h); - } - - free(fp->state); - fp->state = NULL; - return ret; -} - -// Cleanup function - job for sam_parse_worker; result for sam_format_worker -static void cleanup_sp_lines(void *arg) { - sp_lines *gl = (sp_lines *)arg; - if (!gl) return; - - // Should always be true for lines passed to / from thread workers. - assert(gl->next == NULL); - - free(gl->data); - sam_free_sp_bams(gl->bams); - free(gl); -} - -// Run from one of the worker threads. -// Convert a passed in array of lines to array of BAMs, returning -// the result back to the thread queue. -static void *sam_parse_worker(void *arg) { - sp_lines *gl = (sp_lines *)arg; - sp_bams *gb = NULL; - char *lines = gl->data; - int i; - bam1_t *b; - SAM_state *fd = gl->fd; - - // Use a block of BAM structs we had earlier if available. - pthread_mutex_lock(&fd->lines_m); - if (fd->bams) { - gb = fd->bams; - fd->bams = gb->next; - } - pthread_mutex_unlock(&fd->lines_m); - - if (gb == NULL) { - gb = calloc(1, sizeof(*gb)); - if (!gb) { - return NULL; - } - gb->abams = 100; - gb->bams = b = calloc(gb->abams, sizeof(*b)); - if (!gb->bams) { - sam_state_err(fd, ENOMEM); - goto err; - } - gb->nbams = 0; - gb->bam_mem = 0; - } - gb->serial = gl->serial; - gb->next = NULL; - - b = (bam1_t *)gb->bams; - if (!b) { - sam_state_err(fd, ENOMEM); - goto err; - } - - i = 0; - char *cp = lines, *cp_end = lines + gl->data_size; - while (cp < cp_end) { - if (i >= gb->abams) { - int old_abams = gb->abams; - gb->abams *= 2; - b = (bam1_t *)realloc(gb->bams, gb->abams*sizeof(bam1_t)); - if (!b) { - gb->abams /= 2; - sam_state_err(fd, ENOMEM); - goto err; - } - memset(&b[old_abams], 0, (gb->abams - old_abams)*sizeof(*b)); - gb->bams = b; - } - - // Ideally we'd get sam_parse1 to return the number of - // bytes decoded and to be able to stop on newline as - // well as \0. - // - // We can then avoid the additional strchr loop. - // It's around 6% of our CPU cost, albeit threadable. - // - // However this is an API change so for now we copy. - - char *nl = strchr(cp, '\n'); - char *line_end; - if (nl) { - line_end = nl; - if (line_end > cp && *(line_end - 1) == '\r') - line_end--; - nl++; - } else { - nl = line_end = cp_end; - } - *line_end = '\0'; - kstring_t ks = { line_end - cp, gl->alloc, cp }; - if (sam_parse1(&ks, fd->h, &b[i]) < 0) { - sam_state_err(fd, errno ? errno : EIO); - cleanup_sp_lines(gl); - goto err; - } - - cp = nl; - i++; - } - gb->nbams = i; - - pthread_mutex_lock(&fd->lines_m); - gl->next = fd->lines; - fd->lines = gl; - pthread_mutex_unlock(&fd->lines_m); - return gb; - - err: - sam_free_sp_bams(gb); - return NULL; -} - -static void *sam_parse_eof(void *arg) { - return NULL; -} - -// Cleanup function - result for sam_parse_worker; job for sam_format_worker -static void cleanup_sp_bams(void *arg) { - sam_free_sp_bams((sp_bams *) arg); -} - -// Runs in its own thread. -// Reads a block of text (SAM) and sends a new job to the thread queue to -// translate this to BAM. -static void *sam_dispatcher_read(void *vp) { - htsFile *fp = vp; - kstring_t line = {0}; - int line_frag = 0; - SAM_state *fd = fp->state; - sp_lines *l = NULL; - - // Pre-allocate buffer for left-over bits of line (exact size doesn't - // matter as it will grow if necessary). - if (ks_resize(&line, 1000) < 0) - goto err; - - for (;;) { - // Check for command - pthread_mutex_lock(&fd->command_m); - switch (fd->command) { - - case SAM_CLOSE: - pthread_cond_signal(&fd->command_c); - pthread_mutex_unlock(&fd->command_m); - hts_tpool_process_shutdown(fd->q); - goto tidyup; - - default: - break; - } - pthread_mutex_unlock(&fd->command_m); - - pthread_mutex_lock(&fd->lines_m); - if (fd->lines) { - // reuse existing line buffer - l = fd->lines; - fd->lines = l->next; - } - pthread_mutex_unlock(&fd->lines_m); - - if (l == NULL) { - // none to reuse, to create a new one - l = calloc(1, sizeof(*l)); - if (!l) - goto err; - l->alloc = SAM_NBYTES; - l->data = malloc(l->alloc+8); // +8 for optimisation in sam_parse1 - if (!l->data) { - free(l); - l = NULL; - goto err; - } - l->fd = fd; - } - l->next = NULL; - - if (l->alloc < line_frag+SAM_NBYTES/2) { - char *rp = realloc(l->data, line_frag+SAM_NBYTES/2 +8); - if (!rp) - goto err; - l->alloc = line_frag+SAM_NBYTES/2; - l->data = rp; - } - memcpy(l->data, line.s, line_frag); - - l->data_size = line_frag; - ssize_t nbytes; - longer_line: - if (fp->is_bgzf) - nbytes = bgzf_read(fp->fp.bgzf, l->data + line_frag, l->alloc - line_frag); - else - nbytes = hread(fp->fp.hfile, l->data + line_frag, l->alloc - line_frag); - if (nbytes < 0) { - sam_state_err(fd, errno ? errno : EIO); - goto err; - } else if (nbytes == 0) - break; // EOF - l->data_size += nbytes; - - // trim to last \n. Maybe \r\n, but that's still fine - if (nbytes == l->alloc - line_frag) { - char *cp_end = l->data + l->data_size; - char *cp = cp_end-1; - - while (cp > (char *)l->data && *cp != '\n') - cp--; - - // entire buffer is part of a single line - if (cp == l->data) { - line_frag = l->data_size; - char *rp = realloc(l->data, l->alloc * 2 + 8); - if (!rp) - goto err; - l->alloc *= 2; - l->data = rp; - assert(l->alloc >= l->data_size); - assert(l->alloc >= line_frag); - assert(l->alloc >= l->alloc - line_frag); - goto longer_line; - } - cp++; - - // line holds the remainder of our line. - if (ks_resize(&line, cp_end - cp) < 0) - goto err; - memcpy(line.s, cp, cp_end - cp); - line_frag = cp_end - cp; - l->data_size = l->alloc - line_frag; - } else { - // out of buffer - line_frag = 0; - } - - l->serial = fd->serial++; - //fprintf(stderr, "Dispatching %p, %d bytes, serial %d\n", l, l->data_size, l->serial); - if (hts_tpool_dispatch3(fd->p, fd->q, sam_parse_worker, l, - cleanup_sp_lines, cleanup_sp_bams, 0) < 0) - goto err; - pthread_mutex_lock(&fd->command_m); - if (fd->command == SAM_CLOSE) { - pthread_mutex_unlock(&fd->command_m); - l = NULL; - goto tidyup; - } - l = NULL; // Now "owned" by sam_parse_worker() - pthread_mutex_unlock(&fd->command_m); - } - - if (hts_tpool_dispatch(fd->p, fd->q, sam_parse_eof, NULL) < 0) - goto err; - - // At EOF, wait for close request. - // (In future if we add support for seek, this is where we need to catch it.) - for (;;) { - pthread_mutex_lock(&fd->command_m); - if (fd->command == SAM_NONE) - pthread_cond_wait(&fd->command_c, &fd->command_m); - switch (fd->command) { - case SAM_CLOSE: - pthread_cond_signal(&fd->command_c); - pthread_mutex_unlock(&fd->command_m); - hts_tpool_process_shutdown(fd->q); - goto tidyup; - - default: - pthread_mutex_unlock(&fd->command_m); - break; - } - } - - tidyup: - pthread_mutex_lock(&fd->command_m); - fd->command = SAM_CLOSE_DONE; - pthread_cond_signal(&fd->command_c); - pthread_mutex_unlock(&fd->command_m); - - if (l) { - pthread_mutex_lock(&fd->lines_m); - l->next = fd->lines; - fd->lines = l; - pthread_mutex_unlock(&fd->lines_m); - } - free(line.s); - - return NULL; - - err: - sam_state_err(fd, errno ? errno : ENOMEM); - hts_tpool_process_shutdown(fd->q); - goto tidyup; -} - -// Runs in its own thread. -// Takes encoded blocks of SAM off the thread results queue and writes them -// to our output stream. -static void *sam_dispatcher_write(void *vp) { - htsFile *fp = vp; - SAM_state *fd = fp->state; - hts_tpool_result *r; - - // Iterates until result queue is shutdown, where it returns NULL. - while ((r = hts_tpool_next_result_wait(fd->q))) { - sp_lines *gl = (sp_lines *)hts_tpool_result_data(r); - if (!gl) { - sam_state_err(fd, ENOMEM); - goto err; - } - - if (fp->idx) { - sp_bams *gb = gl->bams; - int i = 0, count = 0; - while (i < gl->data_size) { - int j = i; - while (i < gl->data_size && gl->data[i] != '\n') - i++; - if (i < gl->data_size) - i++; - - if (fp->is_bgzf) { - if (bgzf_flush_try(fp->fp.bgzf, i-j) < 0) - goto err; - if (bgzf_write(fp->fp.bgzf, &gl->data[j], i-j) != i-j) - goto err; - } else { - if (hwrite(fp->fp.hfile, &gl->data[j], i-j) != i-j) - goto err; - } - - bam1_t *b = &gb->bams[count++]; - if (fp->format.compression == bgzf) { - if (bgzf_idx_push(fp->fp.bgzf, fp->idx, - b->core.tid, b->core.pos, bam_endpos(b), - bgzf_tell(fp->fp.bgzf), - !(b->core.flag&BAM_FUNMAP)) < 0) { - sam_state_err(fd, errno ? errno : ENOMEM); - hts_log_error("Read '%s' with ref_name='%s', ref_length=%"PRIhts_pos", flags=%d, pos=%"PRIhts_pos" cannot be indexed", - bam_get_qname(b), sam_hdr_tid2name(fd->h, b->core.tid), sam_hdr_tid2len(fd->h, b->core.tid), b->core.flag, b->core.pos+1); - goto err; - } - } else { - if (hts_idx_push(fp->idx, b->core.tid, b->core.pos, bam_endpos(b), - bgzf_tell(fp->fp.bgzf), !(b->core.flag&BAM_FUNMAP)) < 0) { - sam_state_err(fd, errno ? errno : ENOMEM); - hts_log_error("Read '%s' with ref_name='%s', ref_length=%"PRIhts_pos", flags=%d, pos=%"PRIhts_pos" cannot be indexed", - bam_get_qname(b), sam_hdr_tid2name(fd->h, b->core.tid), sam_hdr_tid2len(fd->h, b->core.tid), b->core.flag, b->core.pos+1); - goto err; - } - } - } - - assert(count == gb->nbams); - - // Add bam array to free-list - pthread_mutex_lock(&fd->lines_m); - gb->next = fd->bams; - fd->bams = gl->bams; - gl->bams = NULL; - pthread_mutex_unlock(&fd->lines_m); - } else { - if (fp->is_bgzf) { - // We keep track of how much in the current block we have - // remaining => R. We look for the last newline in input - // [i] to [i+R], backwards => position N. - // - // If we find a newline, we write out bytes i to N. - // We know we cannot fit the next record in this bgzf block, - // so we flush what we have and copy input N to i+R into - // the start of a new block, and recompute a new R for that. - // - // If we don't find a newline (i==N) then we cannot extend - // the current block at all, so flush whatever is in it now - // if it ends on a newline. - // We still copy i(==N) to i+R to the next block and - // continue as before with a new R. - // - // The only exception on the flush is when we run out of - // data in the input. In that case we skip it as we don't - // yet know if the next record will fit. - // - // Both conditions share the same code here: - // - Look for newline (pos N) - // - Write i to N (which maybe 0) - // - Flush if block ends on newline and not end of input - // - write N to i+R - - int i = 0; - BGZF *fb = fp->fp.bgzf; - while (i < gl->data_size) { - // remaining space in block - int R = BGZF_BLOCK_SIZE - fb->block_offset; - int eod = 0; - if (R > gl->data_size-i) - R = gl->data_size-i, eod = 1; - - // Find last newline in input data - int N = i + R; - while (--N > i) { - if (gl->data[N] == '\n') - break; - } - - if (N != i) { - // Found a newline - N++; - if (bgzf_write(fb, &gl->data[i], N-i) != N-i) - goto err; - } - - // Flush bgzf block - int b_off = fb->block_offset; - if (!eod && b_off && - ((char *)fb->uncompressed_block)[b_off-1] == '\n') - if (bgzf_flush_try(fb, BGZF_BLOCK_SIZE) < 0) - goto err; - - // Copy from N onwards into next block - if (i+R > N) - if (bgzf_write(fb, &gl->data[N], i+R - N) - != i+R - N) - goto err; - - i = i+R; - } - } else { - if (hwrite(fp->fp.hfile, gl->data, gl->data_size) != gl->data_size) - goto err; - } - } - - hts_tpool_delete_result(r, 0); - - // Also updated by main thread - pthread_mutex_lock(&fd->lines_m); - gl->next = fd->lines; - fd->lines = gl; - pthread_mutex_unlock(&fd->lines_m); - } - - sam_state_err(fd, 0); // success - hts_tpool_process_shutdown(fd->q); - return NULL; - - err: - sam_state_err(fd, errno ? errno : EIO); - return (void *)-1; -} - -// Run from one of the worker threads. -// Convert a passed in array of BAMs (sp_bams) and converts to a block -// of text SAM records (sp_lines). -static void *sam_format_worker(void *arg) { - sp_bams *gb = (sp_bams *)arg; - sp_lines *gl = NULL; - int i; - SAM_state *fd = gb->fd; - htsFile *fp = fd->fp; - - // Use a block of SAM strings we had earlier if available. - pthread_mutex_lock(&fd->lines_m); - if (fd->lines) { - gl = fd->lines; - fd->lines = gl->next; - } - pthread_mutex_unlock(&fd->lines_m); - - if (gl == NULL) { - gl = calloc(1, sizeof(*gl)); - if (!gl) { - sam_state_err(fd, ENOMEM); - return NULL; - } - gl->alloc = gl->data_size = 0; - gl->data = NULL; - } - gl->serial = gb->serial; - gl->next = NULL; - - kstring_t ks = {0, gl->alloc, gl->data}; - - for (i = 0; i < gb->nbams; i++) { - if (sam_format1_append(fd->h, &gb->bams[i], &ks) < 0) { - sam_state_err(fd, errno ? errno : EIO); - goto err; - } - kputc('\n', &ks); - } - - pthread_mutex_lock(&fd->lines_m); - gl->data_size = ks.l; - gl->alloc = ks.m; - gl->data = ks.s; - - if (fp->idx) { - // Keep hold of the bam array a little longer as - // sam_dispatcher_write needs to use them for building the index. - gl->bams = gb; - } else { - // Add bam array to free-list - gb->next = fd->bams; - fd->bams = gb; - } - pthread_mutex_unlock(&fd->lines_m); - - return gl; - - err: - // Possible race between this and fd->curr_bam. - // Easier to not free and leave it on the input list so it - // gets freed there instead? - // sam_free_sp_bams(gb); - if (gl) { - free(gl->data); - free(gl); - } - return NULL; -} - -int sam_set_thread_pool(htsFile *fp, htsThreadPool *p) { - if (fp->state) - return 0; - - if (!(fp->state = sam_state_create(fp))) - return -1; - SAM_state *fd = (SAM_state *)fp->state; - - pthread_mutex_init(&fd->lines_m, NULL); - pthread_mutex_init(&fd->command_m, NULL); - pthread_cond_init(&fd->command_c, NULL); - fd->p = p->pool; - int qsize = p->qsize; - if (!qsize) - qsize = 2*hts_tpool_size(fd->p); - fd->q = hts_tpool_process_init(fd->p, qsize, 0); - if (!fd->q) { - sam_state_destroy(fp); - return -1; - } - - if (fp->format.compression == bgzf) - return bgzf_thread_pool(fp->fp.bgzf, p->pool, p->qsize); - - return 0; -} - -int sam_set_threads(htsFile *fp, int nthreads) { - if (nthreads <= 0) - return 0; - - htsThreadPool p; - p.pool = hts_tpool_init(nthreads); - p.qsize = nthreads*2; - - int ret = sam_set_thread_pool(fp, &p); - if (ret < 0) - return ret; - - SAM_state *fd = (SAM_state *)fp->state; - fd->own_pool = 1; - - return 0; -} - -typedef struct { - kstring_t name; - kstring_t comment; // NB: pointer into name, do not free - kstring_t seq; - kstring_t qual; - int casava; - int aux; - int rnum; - char BC[3]; // aux tag ID for barcode - khash_t(tag) *tags; // which aux tags to use (if empty, use all). - char nprefix; - int sra_names; -} fastq_state; - -// Initialise fastq state. -// Name char of '@' or '>' distinguishes fastq vs fasta variant -static fastq_state *fastq_state_init(int name_char) { - fastq_state *x = (fastq_state *)calloc(1, sizeof(*x)); - if (!x) - return NULL; - strcpy(x->BC, "BC"); - x->nprefix = name_char; - - return x; -} - -void fastq_state_destroy(htsFile *fp) { - if (fp->state) { - fastq_state *x = (fastq_state *)fp->state; - if (x->tags) - kh_destroy(tag, x->tags); - ks_free(&x->name); - ks_free(&x->seq); - ks_free(&x->qual); - free(fp->state); - } -} - -int fastq_state_set(samFile *fp, enum hts_fmt_option opt, ...) { - va_list args; - - if (!fp) - return -1; - if (!fp->state) - if (!(fp->state = fastq_state_init(fp->format.format == fastq_format - ? '@' : '>'))) - return -1; - - fastq_state *x = (fastq_state *)fp->state; - - switch (opt) { - case FASTQ_OPT_CASAVA: - x->casava = 1; - break; - - case FASTQ_OPT_NAME2: - x->sra_names = 1; - break; - - case FASTQ_OPT_AUX: { - va_start(args, opt); - x->aux = 1; - char *tag = va_arg(args, char *); - va_end(args); - if (tag && strcmp(tag, "1") != 0) { - if (!x->tags) - if (!(x->tags = kh_init(tag))) - return -1; - - size_t i, tlen = strlen(tag); - for (i = 0; i+3 <= tlen+1; i += 3) { - if (tag[i+0] == ',' || tag[i+1] == ',' || - !(tag[i+2] == ',' || tag[i+2] == '\0')) { - hts_log_warning("Bad tag format '%.3s'; skipping option", tag+i); - break; - } - int ret, tcode = tag[i+0]*256 + tag[i+1]; - kh_put(tag, x->tags, tcode, &ret); - if (ret < 0) - return -1; - } - } - break; - } - - case FASTQ_OPT_BARCODE: { - va_start(args, opt); - char *bc = va_arg(args, char *); - va_end(args); - strncpy(x->BC, bc, 2); - x->BC[2] = 0; - break; - } - - case FASTQ_OPT_RNUM: - x->rnum = 1; - break; - - default: - break; - } - return 0; -} - -static int fastq_parse1(htsFile *fp, bam1_t *b) { - fastq_state *x = (fastq_state *)fp->state; - size_t i, l; - int ret = 0; - - if (fp->format.format == fasta_format && fp->line.s) { - // For FASTA we've already read the >name line; steal it - // Not the most efficient, but we don't optimise for fasta reading. - if (fp->line.l == 0) - return -1; // EOF - - free(x->name.s); - x->name = fp->line; - fp->line.l = fp->line.m = 0; - fp->line.s = NULL; - } else { - // Read a FASTQ format entry. - ret = hts_getline(fp, KS_SEP_LINE, &x->name); - if (ret == -1) - return -1; // EOF - else if (ret < -1) - return ret; // ERR - } - - // Name - if (*x->name.s != x->nprefix) - return -2; - - // Reverse the SRA strangeness of putting the run_name.number before - // the read name. - i = 0; - char *name = x->name.s+1; - if (x->sra_names) { - char *cp = strpbrk(x->name.s, " \t"); - if (cp) { - while (*cp == ' ' || *cp == '\t') - cp++; - *--cp = '@'; - i = cp - x->name.s; - name = cp+1; - } - } - - l = x->name.l; - char *s = x->name.s; - while (i < l && !isspace_c(s[i])) - i++; - if (i < l) { - s[i] = 0; - x->name.l = i++; - } - - // Comment; a kstring struct, but pointer into name line. (Do not free) - while (i < l && isspace_c(s[i])) - i++; - x->comment.s = s+i; - x->comment.l = l - i; - - // Seq - x->seq.l = 0; - for (;;) { - if ((ret = hts_getline(fp, KS_SEP_LINE, &fp->line)) < 0) - if (fp->format.format == fastq_format || ret < -1) - return -2; - if (ret == -1 || - *fp->line.s == (fp->format.format == fastq_format ? '+' : '>')) - break; - if (kputsn(fp->line.s, fp->line.l, &x->seq) < 0) - return -2; - } - - // Qual - if (fp->format.format == fastq_format) { - size_t remainder = x->seq.l; - x->qual.l = 0; - do { - if (hts_getline(fp, KS_SEP_LINE, &fp->line) < 0) - return -2; - if (fp->line.l > remainder) - return -2; - if (kputsn(fp->line.s, fp->line.l, &x->qual) < 0) - return -2; - remainder -= fp->line.l; - } while (remainder > 0); - - // Decr qual - for (i = 0; i < x->qual.l; i++) - x->qual.s[i] -= '!'; - } - - int flag = BAM_FUNMAP; int pflag = BAM_FMUNMAP | BAM_FPAIRED; - if (x->name.l > 2 && - x->name.s[x->name.l-2] == '/' && - isdigit_c(x->name.s[x->name.l-1])) { - switch(x->name.s[x->name.l-1]) { - case '1': flag |= BAM_FREAD1 | pflag; break; - case '2': flag |= BAM_FREAD2 | pflag; break; - default : flag |= BAM_FREAD1 | BAM_FREAD2 | pflag; break; - } - x->name.s[x->name.l-=2] = 0; - } - - // Convert to BAM - ret = bam_set1(b, - x->name.s + x->name.l - name, name, - flag, - -1, -1, 0, // ref '*', pos, mapq, - 0, NULL, // no cigar, - -1, -1, 0, // mate - x->seq.l, x->seq.s, x->qual.s, - 0); - - // Identify Illumina CASAVA strings. - // ::: - char *barcode = NULL; - int barcode_len = 0; - kstring_t *kc = &x->comment; - char *endptr; - if (x->casava && - // \d:[YN]:\d+:[ACGTN]+ - kc->l > 6 && (kc->s[1] | kc->s[3]) == ':' && isdigit_c(kc->s[0]) && - strtol(kc->s+4, &endptr, 10) >= 0 && endptr != kc->s+4 - && *endptr == ':') { - - // read num - switch(kc->s[0]) { - case '1': b->core.flag |= BAM_FREAD1 | pflag; break; - case '2': b->core.flag |= BAM_FREAD2 | pflag; break; - default : b->core.flag |= BAM_FREAD1 | BAM_FREAD2 | pflag; break; - } - - if (kc->s[2] == 'Y') - b->core.flag |= BAM_FQCFAIL; - - // Barcode, maybe numeric in which case we skip it - if (!isdigit_c(endptr[1])) { - barcode = endptr+1; - for (i = barcode - kc->s; i < kc->l; i++) - if (isspace_c(kc->s[i])) - break; - - kc->s[i] = 0; - barcode_len = i+1-(barcode - kc->s); - } - } - - if (ret >= 0 && barcode_len) - if (bam_aux_append(b, x->BC, 'Z', barcode_len, (uint8_t *)barcode) < 0) - ret = -2; - - if (!x->aux) - return ret; - - // Identify any SAM style aux tags in comments too. - if (aux_parse(&kc->s[barcode_len], kc->s + kc->l, b, 1, x->tags) < 0) - ret = -2; - - return ret; -} - -// Internal component of sam_read1 below -static inline int sam_read1_bam(htsFile *fp, sam_hdr_t *h, bam1_t *b) { - int ret = bam_read1(fp->fp.bgzf, b); - if (h && ret >= 0) { - if (b->core.tid >= h->n_targets || b->core.tid < -1 || - b->core.mtid >= h->n_targets || b->core.mtid < -1) { - errno = ERANGE; - return -3; - } - } - return ret; -} - -// Internal component of sam_read1 below -static inline int sam_read1_cram(htsFile *fp, sam_hdr_t *h, bam1_t **b) { - int ret = cram_get_bam_seq(fp->fp.cram, b); - if (ret < 0) - return cram_eof(fp->fp.cram) ? -1 : -2; - - if (bam_tag2cigar(*b, 1, 1) < 0) - return -2; - - return ret; -} - -// Internal component of sam_read1 below -static inline int sam_read1_sam(htsFile *fp, sam_hdr_t *h, bam1_t *b) { - int ret; - - // Consume 1st line after header parsing as it wasn't using peek - if (fp->line.l != 0) { - ret = sam_parse1(&fp->line, h, b); - fp->line.l = 0; - return ret; - } - - if (fp->state) { - SAM_state *fd = (SAM_state *)fp->state; - - if (fp->format.compression == bgzf && fp->fp.bgzf->seeked) { - // We don't support multi-threaded SAM parsing with seeks yet. - int ret; - if ((ret = sam_state_destroy(fp)) < 0) { - errno = -ret; - return -2; - } - if (bgzf_seek(fp->fp.bgzf, fp->fp.bgzf->seeked, SEEK_SET) < 0) - return -1; - fp->fp.bgzf->seeked = 0; - goto err_recover; - } - - if (!fd->h) { - fd->h = h; - fd->h->ref_count++; - // Ensure hrecs is initialised now as we don't want multiple - // threads trying to do this simultaneously. - if (!fd->h->hrecs && sam_hdr_fill_hrecs(fd->h) < 0) - return -2; - - // We can only do this once we've got a header - if (pthread_create(&fd->dispatcher, NULL, sam_dispatcher_read, - fp) != 0) - return -2; - fd->dispatcher_set = 1; - } - - if (fd->h != h) { - hts_log_error("SAM multi-threaded decoding does not support changing header"); - return -1; - } - - sp_bams *gb = fd->curr_bam; - if (!gb) { - if (fd->errcode) { - // In case reader failed - errno = fd->errcode; - return -2; - } - hts_tpool_result *r = hts_tpool_next_result_wait(fd->q); - if (!r) - return -2; - fd->curr_bam = gb = (sp_bams *)hts_tpool_result_data(r); - hts_tpool_delete_result(r, 0); - } - if (!gb) - return fd->errcode ? -2 : -1; - bam1_t *b_array = (bam1_t *)gb->bams; - if (fd->curr_idx < gb->nbams) - if (!bam_copy1(b, &b_array[fd->curr_idx++])) - return -2; - if (fd->curr_idx == gb->nbams) { - pthread_mutex_lock(&fd->lines_m); - gb->next = fd->bams; - fd->bams = gb; - pthread_mutex_unlock(&fd->lines_m); - - fd->curr_bam = NULL; - fd->curr_idx = 0; - } - - ret = 0; - - } else { - err_recover: - ret = hts_getline(fp, KS_SEP_LINE, &fp->line); - if (ret < 0) return ret; - - ret = sam_parse1(&fp->line, h, b); - fp->line.l = 0; - if (ret < 0) { - hts_log_warning("Parse error at line %lld", (long long)fp->lineno); - if (h && h->ignore_sam_err) goto err_recover; - } - } - - return ret; -} - -// Returns 0 on success, -// -1 on EOF, -// <-1 on error -int sam_read1(htsFile *fp, sam_hdr_t *h, bam1_t *b) -{ - int ret, pass_filter; - - do { - switch (fp->format.format) { - case bam: - ret = sam_read1_bam(fp, h, b); - break; - - case cram: - ret = sam_read1_cram(fp, h, &b); - break; - - case sam: - ret = sam_read1_sam(fp, h, b); - break; - - case fasta_format: - case fastq_format: { - fastq_state *x = (fastq_state *)fp->state; - if (!x) { - if (!(fp->state = fastq_state_init(fp->format.format - == fastq_format ? '@' : '>'))) - return -2; - } - - return fastq_parse1(fp, b); - } - - case empty_format: - errno = EPIPE; - return -3; - - default: - errno = EFTYPE; - return -3; - } - - pass_filter = (ret >= 0 && fp->filter) - ? sam_passes_filter(h, b, fp->filter) - : 1; - } while (pass_filter == 0); - - return pass_filter < 0 ? -2 : ret; -} - - -static int sam_format1_append(const bam_hdr_t *h, const bam1_t *b, kstring_t *str) -{ - int i, r = 0; - uint8_t *s, *end; - const bam1_core_t *c = &b->core; - - if (c->l_qname == 0) - return -1; - r |= kputsn_(bam_get_qname(b), c->l_qname-1-c->l_extranul, str); - r |= kputc_('\t', str); // query name - r |= kputw(c->flag, str); r |= kputc_('\t', str); // flag - if (c->tid >= 0) { // chr - r |= kputs(h->target_name[c->tid] , str); - r |= kputc_('\t', str); - } else r |= kputsn_("*\t", 2, str); - r |= kputll(c->pos + 1, str); r |= kputc_('\t', str); // pos - r |= kputw(c->qual, str); r |= kputc_('\t', str); // qual - if (c->n_cigar) { // cigar - uint32_t *cigar = bam_get_cigar(b); - for (i = 0; i < c->n_cigar; ++i) { - r |= kputw(bam_cigar_oplen(cigar[i]), str); - r |= kputc_(bam_cigar_opchr(cigar[i]), str); - } - } else r |= kputc_('*', str); - r |= kputc_('\t', str); - if (c->mtid < 0) r |= kputsn_("*\t", 2, str); // mate chr - else if (c->mtid == c->tid) r |= kputsn_("=\t", 2, str); - else { - r |= kputs(h->target_name[c->mtid], str); - r |= kputc_('\t', str); - } - r |= kputll(c->mpos + 1, str); r |= kputc_('\t', str); // mate pos - r |= kputll(c->isize, str); r |= kputc_('\t', str); // template len - if (c->l_qseq) { // seq and qual - uint8_t *s = bam_get_seq(b); - if (ks_resize(str, str->l+2+2*c->l_qseq) < 0) goto mem_err; - char *cp = str->s + str->l; - - // Sequence, 2 bases at a time - nibble2base(s, cp, c->l_qseq); - cp[c->l_qseq] = '\t'; - cp += c->l_qseq+1; - - // Quality - s = bam_get_qual(b); - i = 0; - if (s[0] == 0xff) { - cp[i++] = '*'; - } else { - // local copy of c->l_qseq to aid unrolling - uint32_t lqseq = c->l_qseq; - for (i = 0; i < lqseq; ++i) - cp[i]=s[i]+33; - } - cp[i] = 0; - cp += i; - str->l = cp - str->s; - } else r |= kputsn_("*\t*", 3, str); - - s = bam_get_aux(b); // aux - end = b->data + b->l_data; - - while (end - s >= 4) { - r |= kputc_('\t', str); - if ((s = (uint8_t *)sam_format_aux1(s, s[2], s+3, end, str)) == NULL) - goto bad_aux; - } - r |= kputsn("", 0, str); // nul terminate - if (r < 0) goto mem_err; - - return str->l; - - bad_aux: - hts_log_error("Corrupted aux data for read %.*s", - b->core.l_qname, bam_get_qname(b)); - errno = EINVAL; - return -1; - - mem_err: - hts_log_error("Out of memory"); - errno = ENOMEM; - return -1; -} - -int sam_format1(const bam_hdr_t *h, const bam1_t *b, kstring_t *str) -{ - str->l = 0; - return sam_format1_append(h, b, str); -} - -static inline uint8_t *skip_aux(uint8_t *s, uint8_t *end); -int fastq_format1(fastq_state *x, const bam1_t *b, kstring_t *str) -{ - unsigned flag = b->core.flag; - int i, e = 0, len = b->core.l_qseq; - uint8_t *seq, *qual; - - str->l = 0; - - // Name - if (kputc(x->nprefix, str) == EOF || kputs(bam_get_qname(b), str) == EOF) - return -1; - - // /1 or /2 suffix - if (x && x->rnum && (flag & BAM_FPAIRED)) { - int r12 = flag & (BAM_FREAD1 | BAM_FREAD2); - if (r12 == BAM_FREAD1) { - if (kputs("/1", str) == EOF) - return -1; - } else if (r12 == BAM_FREAD2) { - if (kputs("/2", str) == EOF) - return -1; - } - } - - // Illumina CASAVA tag. - // This is ::: - if (x && x->casava) { - int rnum = (flag & BAM_FREAD1)? 1 : (flag & BAM_FREAD2)? 2 : 0; - char filtered = (flag & BAM_FQCFAIL)? 'Y' : 'N'; - uint8_t *bc = bam_aux_get(b, x->BC); - if (ksprintf(str, " %d:%c:0:%s", rnum, filtered, - bc ? (char *)bc+1 : "0") < 0) - return -1; - - if (bc && (*bc != 'Z' || (!isupper_c(bc[1]) && !islower_c(bc[1])))) { - hts_log_warning("BC tag starts with non-sequence base; using '0'"); - str->l -= strlen((char *)bc)-2; // limit to 1 char - str->s[str->l-1] = '0'; - str->s[str->l] = 0; - bc = NULL; - } - - // Replace any non-alpha with '+'. Ie seq-seq to seq+seq - if (bc) { - int l = strlen((char *)bc+1); - char *c = (char *)str->s + str->l - l; - for (i = 0; i < l; i++) { - if (!isalpha_c(c[i])) - c[i] = '+'; - else if (islower_c(c[i])) - c[i] = toupper_c(c[i]); - } - } - } - - // Aux tags - if (x && x->aux) { - uint8_t *s = bam_get_aux(b), *end = b->data + b->l_data; - while (s && end - s >= 4) { - int tt = s[0]*256 + s[1]; - if (x->tags == NULL || - kh_get(tag, x->tags, tt) != kh_end(x->tags)) { - e |= kputc_('\t', str) < 0; - if (!(s = (uint8_t *)sam_format_aux1(s, s[2], s+3, end, str))) - return -1; - } else { - s = skip_aux(s+2, end); - } - } - e |= kputsn("", 0, str) < 0; // nul terminate - } - - if (ks_resize(str, str->l + 1 + len+1 + 2 + len+1 + 1) < 0) return -1; - e |= kputc_('\n', str) < 0; - - // Seq line - seq = bam_get_seq(b); - if (flag & BAM_FREVERSE) - for (i = len-1; i >= 0; i--) - e |= kputc_("!TGKCYSBAWRDMHVN"[bam_seqi(seq, i)], str) < 0; - else - for (i = 0; i < len; i++) - e |= kputc_(seq_nt16_str[bam_seqi(seq, i)], str) < 0; - - - // Qual line - if (x->nprefix == '@') { - kputsn("\n+\n", 3, str); - qual = bam_get_qual(b); - if (qual[0] == 0xff) - for (i = 0; i < len; i++) - e |= kputc_('B', str) < 0; - else if (flag & BAM_FREVERSE) - for (i = len-1; i >= 0; i--) - e |= kputc_(33 + qual[i], str) < 0; - else - for (i = 0; i < len; i++) - e |= kputc_(33 + qual[i], str) < 0; - - } - e |= kputc('\n', str) < 0; - - return e ? -1 : str->l; -} - -// Sadly we need to be able to modify the bam_hdr here so we can -// reference count the structure. -int sam_write1(htsFile *fp, const sam_hdr_t *h, const bam1_t *b) -{ - switch (fp->format.format) { - case binary_format: - fp->format.category = sequence_data; - fp->format.format = bam; - /* fall-through */ - case bam: - return bam_write_idx1(fp, h, b); - - case cram: - return cram_put_bam_seq(fp->fp.cram, (bam1_t *)b); - - case text_format: - fp->format.category = sequence_data; - fp->format.format = sam; - /* fall-through */ - case sam: - if (fp->state) { - SAM_state *fd = (SAM_state *)fp->state; - - // Threaded output - if (!fd->h) { - // NB: discard const. We don't actually modify sam_hdr_t here, - // just data pointed to by it (which is a bit weasely still), - // but out cached pointer must be non-const as we want to - // destroy it later on and sam_hdr_destroy takes non-const. - // - // We do this because some tools do sam_hdr_destroy; sam_close - // while others do sam_close; sam_hdr_destroy. The former is - // an issue as we need the header still when flushing. - fd->h = (sam_hdr_t *)h; - fd->h->ref_count++; - - if (pthread_create(&fd->dispatcher, NULL, sam_dispatcher_write, - fp) != 0) - return -2; - fd->dispatcher_set = 1; - } - - if (fd->h != h) { - hts_log_error("SAM multi-threaded decoding does not support changing header"); - return -2; - } - - // Find a suitable BAM array to copy to - sp_bams *gb = fd->curr_bam; - if (!gb) { - pthread_mutex_lock(&fd->lines_m); - if (fd->bams) { - fd->curr_bam = gb = fd->bams; - fd->bams = gb->next; - gb->next = NULL; - gb->nbams = 0; - gb->bam_mem = 0; - pthread_mutex_unlock(&fd->lines_m); - } else { - pthread_mutex_unlock(&fd->lines_m); - if (!(gb = calloc(1, sizeof(*gb)))) return -1; - if (!(gb->bams = calloc(SAM_NBAM, sizeof(*gb->bams)))) { - free(gb); - return -1; - } - gb->nbams = 0; - gb->abams = SAM_NBAM; - gb->bam_mem = 0; - gb->fd = fd; - fd->curr_idx = 0; - fd->curr_bam = gb; - } - } - - if (!bam_copy1(&gb->bams[gb->nbams++], b)) - return -2; - gb->bam_mem += b->l_data + sizeof(*b); - - // Dispatch if full - if (gb->nbams == SAM_NBAM || gb->bam_mem > SAM_NBYTES*0.8) { - gb->serial = fd->serial++; - pthread_mutex_lock(&fd->command_m); - if (fd->errcode != 0) { - pthread_mutex_unlock(&fd->command_m); - return -fd->errcode; - } - if (hts_tpool_dispatch3(fd->p, fd->q, sam_format_worker, gb, - cleanup_sp_bams, - cleanup_sp_lines, 0) < 0) { - pthread_mutex_unlock(&fd->command_m); - return -1; - } - pthread_mutex_unlock(&fd->command_m); - fd->curr_bam = NULL; - } - - // Dummy value as we don't know how long it really is. - // We could track file sizes via a SAM_state field, but I don't think - // it is necessary. - return 1; - } else { - if (sam_format1(h, b, &fp->line) < 0) return -1; - kputc('\n', &fp->line); - if (fp->is_bgzf) { - if (bgzf_flush_try(fp->fp.bgzf, fp->line.l) < 0) - return -1; - if ( bgzf_write(fp->fp.bgzf, fp->line.s, fp->line.l) != fp->line.l ) return -1; - } else { - if ( hwrite(fp->fp.hfile, fp->line.s, fp->line.l) != fp->line.l ) return -1; - } - - if (fp->idx) { - if (fp->format.compression == bgzf) { - if (bgzf_idx_push(fp->fp.bgzf, fp->idx, b->core.tid, b->core.pos, bam_endpos(b), - bgzf_tell(fp->fp.bgzf), !(b->core.flag&BAM_FUNMAP)) < 0) { - hts_log_error("Read '%s' with ref_name='%s', ref_length=%"PRIhts_pos", flags=%d, pos=%"PRIhts_pos" cannot be indexed", - bam_get_qname(b), sam_hdr_tid2name(h, b->core.tid), sam_hdr_tid2len(h, b->core.tid), b->core.flag, b->core.pos+1); - return -1; - } - } else { - if (hts_idx_push(fp->idx, b->core.tid, b->core.pos, bam_endpos(b), - bgzf_tell(fp->fp.bgzf), !(b->core.flag&BAM_FUNMAP)) < 0) { - hts_log_error("Read '%s' with ref_name='%s', ref_length=%"PRIhts_pos", flags=%d, pos=%"PRIhts_pos" cannot be indexed", - bam_get_qname(b), sam_hdr_tid2name(h, b->core.tid), sam_hdr_tid2len(h, b->core.tid), b->core.flag, b->core.pos+1); - return -1; - } - } - } - - return fp->line.l; - } - - - case fasta_format: - case fastq_format: { - fastq_state *x = (fastq_state *)fp->state; - if (!x) { - if (!(fp->state = fastq_state_init(fp->format.format - == fastq_format ? '@' : '>'))) - return -2; - } - - if (fastq_format1(fp->state, b, &fp->line) < 0) - return -1; - if (fp->is_bgzf) { - if (bgzf_flush_try(fp->fp.bgzf, fp->line.l) < 0) - return -1; - if (bgzf_write(fp->fp.bgzf, fp->line.s, fp->line.l) != fp->line.l) - return -1; - } else { - if (hwrite(fp->fp.hfile, fp->line.s, fp->line.l) != fp->line.l) - return -1; - } - return fp->line.l; - } - - default: - errno = EBADF; - return -1; - } -} - -/************************ - *** Auxiliary fields *** - ************************/ -#ifndef HTS_LITTLE_ENDIAN -static int aux_to_le(char type, uint8_t *out, const uint8_t *in, size_t len) { - int tsz = aux_type2size(type); - - if (tsz >= 2 && tsz <= 8 && (len & (tsz - 1)) != 0) return -1; - - switch (tsz) { - case 'H': case 'Z': case 1: // Trivial - memcpy(out, in, len); - break; - -#define aux_val_to_le(type_t, store_le) do { \ - type_t v; \ - size_t i; \ - for (i = 0; i < len; i += sizeof(type_t), out += sizeof(type_t)) { \ - memcpy(&v, in + i, sizeof(type_t)); \ - store_le(v, out); \ - } \ - } while (0) - - case 2: aux_val_to_le(uint16_t, u16_to_le); break; - case 4: aux_val_to_le(uint32_t, u32_to_le); break; - case 8: aux_val_to_le(uint64_t, u64_to_le); break; - -#undef aux_val_to_le - - case 'B': { // Recurse! - uint32_t n; - if (len < 5) return -1; - memcpy(&n, in + 1, 4); - out[0] = in[0]; - u32_to_le(n, out + 1); - return aux_to_le(in[0], out + 5, in + 5, len - 5); - } - - default: // Unknown type code - return -1; - } - - - - return 0; -} -#endif - -int bam_aux_append(bam1_t *b, const char tag[2], char type, int len, const uint8_t *data) -{ - uint32_t new_len; - - assert(b->l_data >= 0); - new_len = b->l_data + 3 + len; - if (new_len > INT32_MAX || new_len < b->l_data) goto nomem; - - if (realloc_bam_data(b, new_len) < 0) return -1; - - b->data[b->l_data] = tag[0]; - b->data[b->l_data + 1] = tag[1]; - b->data[b->l_data + 2] = type; - -#ifdef HTS_LITTLE_ENDIAN - memcpy(b->data + b->l_data + 3, data, len); -#else - if (aux_to_le(type, b->data + b->l_data + 3, data, len) != 0) { - errno = EINVAL; - return -1; - } -#endif - - b->l_data = new_len; - - return 0; - - nomem: - errno = ENOMEM; - return -1; -} - -static inline uint8_t *skip_aux(uint8_t *s, uint8_t *end) -{ - int size; - uint32_t n; - if (s >= end) return end; - size = aux_type2size(*s); ++s; // skip type - switch (size) { - case 'Z': - case 'H': - while (s < end && *s) ++s; - return s < end ? s + 1 : end; - case 'B': - if (end - s < 5) return NULL; - size = aux_type2size(*s); ++s; - n = le_to_u32(s); - s += 4; - if (size == 0 || end - s < size * n) return NULL; - return s + size * n; - case 0: - return NULL; - default: - if (end - s < size) return NULL; - return s + size; - } -} - -uint8_t *bam_aux_first(const bam1_t *b) -{ - uint8_t *s = bam_get_aux(b); - uint8_t *end = b->data + b->l_data; - if (s >= end) { errno = ENOENT; return NULL; } - return s+2; -} - -uint8_t *bam_aux_next(const bam1_t *b, const uint8_t *s) -{ - uint8_t *end = b->data + b->l_data; - uint8_t *next = s? skip_aux((uint8_t *) s, end) : end; - if (next == NULL) goto bad_aux; - if (next >= end) { errno = ENOENT; return NULL; } - return next+2; - - bad_aux: - hts_log_error("Corrupted aux data for read %s", bam_get_qname(b)); - errno = EINVAL; - return NULL; -} - -uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]) -{ - uint8_t *s; - for (s = bam_aux_first(b); s; s = bam_aux_next(b, s)) - if (s[-2] == tag[0] && s[-1] == tag[1]) { - // Check the tag value is valid and complete - uint8_t *e = skip_aux(s, b->data + b->l_data); - if (e == NULL) goto bad_aux; - if ((*s == 'Z' || *s == 'H') && *(e - 1) != '\0') goto bad_aux; - - return s; - } - - // errno now as set by bam_aux_first()/bam_aux_next() - return NULL; - - bad_aux: - hts_log_error("Corrupted aux data for read %s", bam_get_qname(b)); - errno = EINVAL; - return NULL; -} - -int bam_aux_del(bam1_t *b, uint8_t *s) -{ - s = bam_aux_remove(b, s); - return (s || errno == ENOENT)? 0 : -1; -} - -uint8_t *bam_aux_remove(bam1_t *b, uint8_t *s) -{ - uint8_t *end = b->data + b->l_data; - uint8_t *next = skip_aux(s, end); - if (next == NULL) goto bad_aux; - - b->l_data -= next - (s-2); - if (next >= end) { errno = ENOENT; return NULL; } - - memmove(s-2, next, end - next); - return s; - - bad_aux: - hts_log_error("Corrupted aux data for read %s", bam_get_qname(b)); - errno = EINVAL; - return NULL; -} - -int bam_aux_update_str(bam1_t *b, const char tag[2], int len, const char *data) -{ - // FIXME: This is not at all efficient! - size_t ln = len >= 0 ? len : strlen(data) + 1; - size_t old_ln = 0; - int need_nul = ln == 0 || data[ln - 1] != '\0'; - int save_errno = errno; - int new_tag = 0; - uint8_t *s = bam_aux_get(b,tag), *e; - - if (s) { // Replacing existing tag - char type = *s; - if (type != 'Z') { - hts_log_error("Called bam_aux_update_str for type '%c' instead of 'Z'", type); - errno = EINVAL; - return -1; - } - s++; - e = memchr(s, '\0', b->data + b->l_data - s); - old_ln = (e ? e - s : b->data + b->l_data - s) + 1; - s -= 3; - } else { - if (errno != ENOENT) { // Invalid aux data, give up - return -1; - } else { // Tag doesn't exist - put it on the end - errno = save_errno; - s = b->data + b->l_data; - new_tag = 3; - } - } - - if (old_ln < ln + need_nul + new_tag) { - ptrdiff_t s_offset = s - b->data; - if (possibly_expand_bam_data(b, ln + need_nul + new_tag - old_ln) < 0) - return -1; - s = b->data + s_offset; - } - if (!new_tag) { - memmove(s + 3 + ln + need_nul, - s + 3 + old_ln, - b->l_data - (s + 3 - b->data) - old_ln); - } - b->l_data += new_tag + ln + need_nul - old_ln; - - s[0] = tag[0]; - s[1] = tag[1]; - s[2] = 'Z'; - memmove(s+3,data,ln); - if (need_nul) s[3 + ln] = '\0'; - return 0; -} - -int bam_aux_update_int(bam1_t *b, const char tag[2], int64_t val) -{ - uint32_t sz, old_sz = 0, new = 0; - uint8_t *s, type; - - if (val < INT32_MIN || val > UINT32_MAX) { - errno = EOVERFLOW; - return -1; - } - if (val < INT16_MIN) { type = 'i'; sz = 4; } - else if (val < INT8_MIN) { type = 's'; sz = 2; } - else if (val < 0) { type = 'c'; sz = 1; } - else if (val < UINT8_MAX) { type = 'C'; sz = 1; } - else if (val < UINT16_MAX) { type = 'S'; sz = 2; } - else { type = 'I'; sz = 4; } - - s = bam_aux_get(b, tag); - if (s) { // Tag present - how big was the old one? - switch (*s) { - case 'c': case 'C': old_sz = 1; break; - case 's': case 'S': old_sz = 2; break; - case 'i': case 'I': old_sz = 4; break; - default: errno = EINVAL; return -1; // Not an integer - } - } else { - if (errno == ENOENT) { // Tag doesn't exist - add a new one - s = b->data + b->l_data; - new = 1; - } else { // Invalid aux data, give up. - return -1; - } - } - - if (new || old_sz < sz) { - // Make room for new tag - ptrdiff_t s_offset = s - b->data; - if (possibly_expand_bam_data(b, (new ? 3 : 0) + sz - old_sz) < 0) - return -1; - s = b->data + s_offset; - if (new) { // Add tag id - *s++ = tag[0]; - *s++ = tag[1]; - } else { // Shift following data so we have space - memmove(s + sz, s + old_sz, b->l_data - s_offset - old_sz); - } - } else { - // Reuse old space. Data value may be bigger than necessary but - // we avoid having to move everything else - sz = old_sz; - type = (val < 0 ? "\0cs\0i" : "\0CS\0I")[old_sz]; - assert(type > 0); - } - *s++ = type; -#ifdef HTS_LITTLE_ENDIAN - memcpy(s, &val, sz); -#else - switch (sz) { - case 4: u32_to_le(val, s); break; - case 2: u16_to_le(val, s); break; - default: *s = val; break; - } -#endif - b->l_data += (new ? 3 : 0) + sz - old_sz; - return 0; -} - -int bam_aux_update_float(bam1_t *b, const char tag[2], float val) -{ - uint8_t *s = bam_aux_get(b, tag); - int shrink = 0, new = 0; - - if (s) { // Tag present - what was it? - switch (*s) { - case 'f': break; - case 'd': shrink = 1; break; - default: errno = EINVAL; return -1; // Not a float - } - } else { - if (errno == ENOENT) { // Tag doesn't exist - add a new one - new = 1; - } else { // Invalid aux data, give up. - return -1; - } - } - - if (new) { // Ensure there's room - if (possibly_expand_bam_data(b, 3 + 4) < 0) - return -1; - s = b->data + b->l_data; - *s++ = tag[0]; - *s++ = tag[1]; - } else if (shrink) { // Convert non-standard double tag to float - memmove(s + 5, s + 9, b->l_data - ((s + 9) - b->data)); - b->l_data -= 4; - } - *s++ = 'f'; - float_to_le(val, s); - if (new) b->l_data += 7; - - return 0; -} - -int bam_aux_update_array(bam1_t *b, const char tag[2], - uint8_t type, uint32_t items, void *data) -{ - uint8_t *s = bam_aux_get(b, tag); - size_t old_sz = 0, new_sz; - int new = 0; - - if (s) { // Tag present - if (*s != 'B') { errno = EINVAL; return -1; } - old_sz = aux_type2size(s[1]); - if (old_sz < 1 || old_sz > 4) { errno = EINVAL; return -1; } - old_sz *= le_to_u32(s + 2); - } else { - if (errno == ENOENT) { // Tag doesn't exist - add a new one - s = b->data + b->l_data; - new = 1; - } else { // Invalid aux data, give up. - return -1; - } - } - - new_sz = aux_type2size(type); - if (new_sz < 1 || new_sz > 4) { errno = EINVAL; return -1; } - if (items > INT32_MAX / new_sz) { errno = ENOMEM; return -1; } - new_sz *= items; - - if (new || old_sz < new_sz) { - // Make room for new tag - ptrdiff_t s_offset = s - b->data; - if (possibly_expand_bam_data(b, (new ? 8 : 0) + new_sz - old_sz) < 0) - return -1; - s = b->data + s_offset; - } - if (new) { // Add tag id and type - *s++ = tag[0]; - *s++ = tag[1]; - *s = 'B'; - b->l_data += 8 + new_sz; - } else if (old_sz != new_sz) { // shift following data if necessary - memmove(s + 6 + new_sz, s + 6 + old_sz, - b->l_data - ((s + 6 + old_sz) - b->data)); - b->l_data -= old_sz; - b->l_data += new_sz; - } - - s[1] = type; - u32_to_le(items, s + 2); -#ifdef HTS_LITTLE_ENDIAN - memcpy(s + 6, data, new_sz); - return 0; -#else - return aux_to_le(type, s + 6, data, new_sz); -#endif -} - -static inline int64_t get_int_aux_val(uint8_t type, const uint8_t *s, - uint32_t idx) -{ - switch (type) { - case 'c': return le_to_i8(s + idx); - case 'C': return s[idx]; - case 's': return le_to_i16(s + 2 * idx); - case 'S': return le_to_u16(s + 2 * idx); - case 'i': return le_to_i32(s + 4 * idx); - case 'I': return le_to_u32(s + 4 * idx); - default: - errno = EINVAL; - return 0; - } -} - -int64_t bam_aux2i(const uint8_t *s) -{ - int type; - type = *s++; - return get_int_aux_val(type, s, 0); -} - -double bam_aux2f(const uint8_t *s) -{ - int type; - type = *s++; - if (type == 'd') return le_to_double(s); - else if (type == 'f') return le_to_float(s); - else return get_int_aux_val(type, s, 0); -} - -char bam_aux2A(const uint8_t *s) -{ - int type; - type = *s++; - if (type == 'A') return *(char*)s; - errno = EINVAL; - return 0; -} - -char *bam_aux2Z(const uint8_t *s) -{ - int type; - type = *s++; - if (type == 'Z' || type == 'H') return (char*)s; - errno = EINVAL; - return 0; -} - -uint32_t bam_auxB_len(const uint8_t *s) -{ - if (s[0] != 'B') { - errno = EINVAL; - return 0; - } - return le_to_u32(s + 2); -} - -int64_t bam_auxB2i(const uint8_t *s, uint32_t idx) -{ - uint32_t len = bam_auxB_len(s); - if (idx >= len) { - errno = ERANGE; - return 0; - } - return get_int_aux_val(s[1], s + 6, idx); -} - -double bam_auxB2f(const uint8_t *s, uint32_t idx) -{ - uint32_t len = bam_auxB_len(s); - if (idx >= len) { - errno = ERANGE; - return 0.0; - } - if (s[1] == 'f') return le_to_float(s + 6 + 4 * idx); - else return get_int_aux_val(s[1], s + 6, idx); -} - -int sam_open_mode(char *mode, const char *fn, const char *format) -{ - // TODO Parse "bam5" etc for compression level - if (format == NULL) { - // Try to pick a format based on the filename extension - char extension[HTS_MAX_EXT_LEN]; - if (find_file_extension(fn, extension) < 0) return -1; - return sam_open_mode(mode, fn, extension); - } - else if (strcasecmp(format, "bam") == 0) strcpy(mode, "b"); - else if (strcasecmp(format, "cram") == 0) strcpy(mode, "c"); - else if (strcasecmp(format, "sam") == 0) strcpy(mode, ""); - else if (strcasecmp(format, "sam.gz") == 0) strcpy(mode, "z"); - else if (strcasecmp(format, "fastq") == 0 || - strcasecmp(format, "fq") == 0) strcpy(mode, "f"); - else if (strcasecmp(format, "fastq.gz") == 0 || - strcasecmp(format, "fq.gz") == 0) strcpy(mode, "fz"); - else if (strcasecmp(format, "fasta") == 0 || - strcasecmp(format, "fa") == 0) strcpy(mode, "F"); - else if (strcasecmp(format, "fasta.gz") == 0 || - strcasecmp(format, "fa.gz") == 0) strcpy(mode, "Fz"); - else return -1; - - return 0; -} - -// A version of sam_open_mode that can handle ,key=value options. -// The format string is allocated and returned, to be freed by the caller. -// Prefix should be "r" or "w", -char *sam_open_mode_opts(const char *fn, - const char *mode, - const char *format) -{ - char *mode_opts = malloc((format ? strlen(format) : 1) + - (mode ? strlen(mode) : 1) + 12); - char *opts, *cp; - int format_len; - - if (!mode_opts) - return NULL; - - strcpy(mode_opts, mode ? mode : "r"); - cp = mode_opts + strlen(mode_opts); - - if (format == NULL) { - // Try to pick a format based on the filename extension - char extension[HTS_MAX_EXT_LEN]; - if (find_file_extension(fn, extension) < 0) { - free(mode_opts); - return NULL; - } - if (sam_open_mode(cp, fn, extension) == 0) { - return mode_opts; - } else { - free(mode_opts); - return NULL; - } - } - - if ((opts = strchr(format, ','))) { - format_len = opts-format; - } else { - opts=""; - format_len = strlen(format); - } - - if (strncmp(format, "bam", format_len) == 0) { - *cp++ = 'b'; - } else if (strncmp(format, "cram", format_len) == 0) { - *cp++ = 'c'; - } else if (strncmp(format, "cram2", format_len) == 0) { - *cp++ = 'c'; - strcpy(cp, ",VERSION=2.1"); - cp += 12; - } else if (strncmp(format, "cram3", format_len) == 0) { - *cp++ = 'c'; - strcpy(cp, ",VERSION=3.0"); - cp += 12; - } else if (strncmp(format, "sam", format_len) == 0) { - ; // format mode="" - } else if (strncmp(format, "sam.gz", format_len) == 0) { - *cp++ = 'z'; - } else if (strncmp(format, "fastq", format_len) == 0 || - strncmp(format, "fq", format_len) == 0) { - *cp++ = 'f'; - } else if (strncmp(format, "fastq.gz", format_len) == 0 || - strncmp(format, "fq.gz", format_len) == 0) { - *cp++ = 'f'; - *cp++ = 'z'; - } else if (strncmp(format, "fasta", format_len) == 0 || - strncmp(format, "fa", format_len) == 0) { - *cp++ = 'F'; - } else if (strncmp(format, "fasta.gz", format_len) == 0 || - strncmp(format, "fa", format_len) == 0) { - *cp++ = 'F'; - *cp++ = 'z'; - } else { - free(mode_opts); - return NULL; - } - - strcpy(cp, opts); - - return mode_opts; -} - -#define STRNCMP(a,b,n) (strncasecmp((a),(b),(n)) || strlen(a)!=(n)) -int bam_str2flag(const char *str) -{ - char *end, *beg = (char*) str; - long int flag = strtol(str, &end, 0); - if ( end!=str ) return flag; // the conversion was successful - flag = 0; - while ( *str ) - { - end = beg; - while ( *end && *end!=',' ) end++; - if ( !STRNCMP("PAIRED",beg,end-beg) ) flag |= BAM_FPAIRED; - else if ( !STRNCMP("PROPER_PAIR",beg,end-beg) ) flag |= BAM_FPROPER_PAIR; - else if ( !STRNCMP("UNMAP",beg,end-beg) ) flag |= BAM_FUNMAP; - else if ( !STRNCMP("MUNMAP",beg,end-beg) ) flag |= BAM_FMUNMAP; - else if ( !STRNCMP("REVERSE",beg,end-beg) ) flag |= BAM_FREVERSE; - else if ( !STRNCMP("MREVERSE",beg,end-beg) ) flag |= BAM_FMREVERSE; - else if ( !STRNCMP("READ1",beg,end-beg) ) flag |= BAM_FREAD1; - else if ( !STRNCMP("READ2",beg,end-beg) ) flag |= BAM_FREAD2; - else if ( !STRNCMP("SECONDARY",beg,end-beg) ) flag |= BAM_FSECONDARY; - else if ( !STRNCMP("QCFAIL",beg,end-beg) ) flag |= BAM_FQCFAIL; - else if ( !STRNCMP("DUP",beg,end-beg) ) flag |= BAM_FDUP; - else if ( !STRNCMP("SUPPLEMENTARY",beg,end-beg) ) flag |= BAM_FSUPPLEMENTARY; - else return -1; - if ( !*end ) break; - beg = end + 1; - } - return flag; -} - -char *bam_flag2str(int flag) -{ - kstring_t str = {0,0,0}; - if ( flag&BAM_FPAIRED ) ksprintf(&str,"%s%s", str.l?",":"","PAIRED"); - if ( flag&BAM_FPROPER_PAIR ) ksprintf(&str,"%s%s", str.l?",":"","PROPER_PAIR"); - if ( flag&BAM_FUNMAP ) ksprintf(&str,"%s%s", str.l?",":"","UNMAP"); - if ( flag&BAM_FMUNMAP ) ksprintf(&str,"%s%s", str.l?",":"","MUNMAP"); - if ( flag&BAM_FREVERSE ) ksprintf(&str,"%s%s", str.l?",":"","REVERSE"); - if ( flag&BAM_FMREVERSE ) ksprintf(&str,"%s%s", str.l?",":"","MREVERSE"); - if ( flag&BAM_FREAD1 ) ksprintf(&str,"%s%s", str.l?",":"","READ1"); - if ( flag&BAM_FREAD2 ) ksprintf(&str,"%s%s", str.l?",":"","READ2"); - if ( flag&BAM_FSECONDARY ) ksprintf(&str,"%s%s", str.l?",":"","SECONDARY"); - if ( flag&BAM_FQCFAIL ) ksprintf(&str,"%s%s", str.l?",":"","QCFAIL"); - if ( flag&BAM_FDUP ) ksprintf(&str,"%s%s", str.l?",":"","DUP"); - if ( flag&BAM_FSUPPLEMENTARY ) ksprintf(&str,"%s%s", str.l?",":"","SUPPLEMENTARY"); - if ( str.l == 0 ) kputsn("", 0, &str); - return str.s; -} - - -/************************** - *** Pileup and Mpileup *** - **************************/ - -#if !defined(BAM_NO_PILEUP) - -#include - -/******************* - *** Memory pool *** - *******************/ - -typedef struct { - int k, y; - hts_pos_t x, end; -} cstate_t; - -static cstate_t g_cstate_null = { -1, 0, 0, 0 }; - -typedef struct __linkbuf_t { - bam1_t b; - hts_pos_t beg, end; - cstate_t s; - struct __linkbuf_t *next; - bam_pileup_cd cd; -} lbnode_t; - -typedef struct { - int cnt, n, max; - lbnode_t **buf; -} mempool_t; - -static mempool_t *mp_init(void) -{ - mempool_t *mp; - mp = (mempool_t*)calloc(1, sizeof(mempool_t)); - return mp; -} -static void mp_destroy(mempool_t *mp) -{ - int k; - for (k = 0; k < mp->n; ++k) { - free(mp->buf[k]->b.data); - free(mp->buf[k]); - } - free(mp->buf); - free(mp); -} -static inline lbnode_t *mp_alloc(mempool_t *mp) -{ - ++mp->cnt; - if (mp->n == 0) return (lbnode_t*)calloc(1, sizeof(lbnode_t)); - else return mp->buf[--mp->n]; -} -static inline void mp_free(mempool_t *mp, lbnode_t *p) -{ - --mp->cnt; p->next = 0; // clear lbnode_t::next here - if (mp->n == mp->max) { - mp->max = mp->max? mp->max<<1 : 256; - mp->buf = (lbnode_t**)realloc(mp->buf, sizeof(lbnode_t*) * mp->max); - } - mp->buf[mp->n++] = p; -} - -/********************** - *** CIGAR resolver *** - **********************/ - -/* s->k: the index of the CIGAR operator that has just been processed. - s->x: the reference coordinate of the start of s->k - s->y: the query coordinate of the start of s->k - */ -static inline int resolve_cigar2(bam_pileup1_t *p, hts_pos_t pos, cstate_t *s) -{ -#define _cop(c) ((c)&BAM_CIGAR_MASK) -#define _cln(c) ((c)>>BAM_CIGAR_SHIFT) - - bam1_t *b = p->b; - bam1_core_t *c = &b->core; - uint32_t *cigar = bam_get_cigar(b); - int k; - // determine the current CIGAR operation - //fprintf(stderr, "%s\tpos=%ld\tend=%ld\t(%d,%ld,%d)\n", bam_get_qname(b), pos, s->end, s->k, s->x, s->y); - if (s->k == -1) { // never processed - p->qpos = 0; - if (c->n_cigar == 1) { // just one operation, save a loop - if (_cop(cigar[0]) == BAM_CMATCH || _cop(cigar[0]) == BAM_CEQUAL || _cop(cigar[0]) == BAM_CDIFF) s->k = 0, s->x = c->pos, s->y = 0; - } else { // find the first match or deletion - for (k = 0, s->x = c->pos, s->y = 0; k < c->n_cigar; ++k) { - int op = _cop(cigar[k]); - int l = _cln(cigar[k]); - if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP || - op == BAM_CEQUAL || op == BAM_CDIFF) break; - else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) s->y += l; - } - assert(k < c->n_cigar); - s->k = k; - } - } else { // the read has been processed before - int op, l = _cln(cigar[s->k]); - if (pos - s->x >= l) { // jump to the next operation - assert(s->k < c->n_cigar); // otherwise a bug: this function should not be called in this case - op = _cop(cigar[s->k+1]); - if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP || op == BAM_CEQUAL || op == BAM_CDIFF) { // jump to the next without a loop - if (_cop(cigar[s->k]) == BAM_CMATCH|| _cop(cigar[s->k]) == BAM_CEQUAL || _cop(cigar[s->k]) == BAM_CDIFF) s->y += l; - s->x += l; - ++s->k; - } else { // find the next M/D/N/=/X - if (_cop(cigar[s->k]) == BAM_CMATCH|| _cop(cigar[s->k]) == BAM_CEQUAL || _cop(cigar[s->k]) == BAM_CDIFF) s->y += l; - s->x += l; - for (k = s->k + 1; k < c->n_cigar; ++k) { - op = _cop(cigar[k]), l = _cln(cigar[k]); - if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP || op == BAM_CEQUAL || op == BAM_CDIFF) break; - else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) s->y += l; - } - s->k = k; - } - assert(s->k < c->n_cigar); // otherwise a bug - } // else, do nothing - } - { // collect pileup information - int op, l; - op = _cop(cigar[s->k]); l = _cln(cigar[s->k]); - p->is_del = p->indel = p->is_refskip = 0; - if (s->x + l - 1 == pos && s->k + 1 < c->n_cigar) { // peek the next operation - int op2 = _cop(cigar[s->k+1]); - int l2 = _cln(cigar[s->k+1]); - if (op2 == BAM_CDEL && op != BAM_CDEL) { - // At start of a new deletion, merge e.g. 1D2D to 3D. - // Within a deletion (the 2D in 1D2D) we keep p->indel=0 - // and rely on is_del=1 as we would for 3D. - p->indel = -(int)l2; - for (k = s->k+2; k < c->n_cigar; ++k) { - op2 = _cop(cigar[k]); l2 = _cln(cigar[k]); - if (op2 == BAM_CDEL) p->indel -= l2; - else break; - } - } else if (op2 == BAM_CINS) { - p->indel = l2; - for (k = s->k+2; k < c->n_cigar; ++k) { - op2 = _cop(cigar[k]); l2 = _cln(cigar[k]); - if (op2 == BAM_CINS) p->indel += l2; - else if (op2 != BAM_CPAD) break; - } - } else if (op2 == BAM_CPAD && s->k + 2 < c->n_cigar) { - int l3 = 0; - for (k = s->k + 2; k < c->n_cigar; ++k) { - op2 = _cop(cigar[k]); l2 = _cln(cigar[k]); - if (op2 == BAM_CINS) l3 += l2; - else if (op2 == BAM_CDEL || op2 == BAM_CMATCH || op2 == BAM_CREF_SKIP || op2 == BAM_CEQUAL || op2 == BAM_CDIFF) break; - } - if (l3 > 0) p->indel = l3; - } - } - if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) { - p->qpos = s->y + (pos - s->x); - } else if (op == BAM_CDEL || op == BAM_CREF_SKIP) { - p->is_del = 1; p->qpos = s->y; // FIXME: distinguish D and N!!!!! - p->is_refskip = (op == BAM_CREF_SKIP); - } // cannot be other operations; otherwise a bug - p->is_head = (pos == c->pos); p->is_tail = (pos == s->end); - } - p->cigar_ind = s->k; - return 1; -} - -/******************************* - *** Expansion of insertions *** - *******************************/ - -/* - * Fills out the kstring with the padded insertion sequence for the current - * location in 'p'. If this is not an insertion site, the string is blank. - * - * This variant handles base modifications, but only when "m" is non-NULL. - * - * Returns the number of inserted base on success, with string length being - * accessable via ins->l; - * -1 on failure. - */ -int bam_plp_insertion_mod(const bam_pileup1_t *p, - hts_base_mod_state *m, - kstring_t *ins, int *del_len) { - int j, k, indel, nb = 0; - uint32_t *cigar; - - if (p->indel <= 0) { - if (ks_resize(ins, 1) < 0) - return -1; - ins->l = 0; - ins->s[0] = '\0'; - return 0; - } - - if (del_len) - *del_len = 0; - - // Measure indel length including pads - indel = 0; - k = p->cigar_ind+1; - cigar = bam_get_cigar(p->b); - while (k < p->b->core.n_cigar) { - switch (cigar[k] & BAM_CIGAR_MASK) { - case BAM_CPAD: - case BAM_CINS: - indel += (cigar[k] >> BAM_CIGAR_SHIFT); - break; - default: - k = p->b->core.n_cigar; - break; - } - k++; - } - nb = ins->l = indel; - - // Produce sequence - if (ks_resize(ins, indel+1) < 0) - return -1; - indel = 0; - k = p->cigar_ind+1; - j = 1; - while (k < p->b->core.n_cigar) { - int l, c; - switch (cigar[k] & BAM_CIGAR_MASK) { - case BAM_CPAD: - for (l = 0; l < (cigar[k]>>BAM_CIGAR_SHIFT); l++) - ins->s[indel++] = '*'; - break; - case BAM_CINS: - for (l = 0; l < (cigar[k]>>BAM_CIGAR_SHIFT); l++, j++) { - c = p->qpos + j - p->is_del < p->b->core.l_qseq - ? seq_nt16_str[bam_seqi(bam_get_seq(p->b), - p->qpos + j - p->is_del)] - : 'N'; - ins->s[indel++] = c; - int nm; - hts_base_mod mod[256]; - if (m && (nm = bam_mods_at_qpos(p->b, p->qpos + j - p->is_del, - m, mod, 256)) > 0) { - int o_indel = indel; - if (ks_resize(ins, ins->l + nm*16+3) < 0) - return -1; - ins->s[indel++] = '['; - int j; - for (j = 0; j < nm; j++) { - char qual[20]; - if (mod[j].qual >= 0) - snprintf(qual, sizeof(qual), "%d", mod[j].qual); - else - *qual=0; - if (mod[j].modified_base < 0) - // ChEBI - indel += snprintf(&ins->s[indel], ins->m - indel, - "%c(%d)%s", - "+-"[mod[j].strand], - -mod[j].modified_base, - qual); - else - indel += snprintf(&ins->s[indel], ins->m - indel, - "%c%c%s", - "+-"[mod[j].strand], - mod[j].modified_base, - qual); - } - ins->s[indel++] = ']'; - ins->l += indel - o_indel; // grow by amount we used - } - } - break; - case BAM_CDEL: - // eg cigar 1M2I1D gives mpileup output in T+2AA-1C style - if (del_len) - *del_len = cigar[k]>>BAM_CIGAR_SHIFT; - // fall through - default: - k = p->b->core.n_cigar; - break; - } - k++; - } - ins->s[indel] = '\0'; - ins->l = indel; // string length - - return nb; // base length -} - -/* - * Fills out the kstring with the padded insertion sequence for the current - * location in 'p'. If this is not an insertion site, the string is blank. - * - * This is the original interface with no capability for reporting base - * modifications. - * - * Returns the length of insertion string on success; - * -1 on failure. - */ -int bam_plp_insertion(const bam_pileup1_t *p, kstring_t *ins, int *del_len) { - return bam_plp_insertion_mod(p, NULL, ins, del_len); -} - -/*********************** - *** Pileup iterator *** - ***********************/ - -// Dictionary of overlapping reads -KHASH_MAP_INIT_STR(olap_hash, lbnode_t *) -typedef khash_t(olap_hash) olap_hash_t; - -struct bam_plp_s { - mempool_t *mp; - lbnode_t *head, *tail; - int32_t tid, max_tid; - hts_pos_t pos, max_pos; - int is_eof, max_plp, error, maxcnt; - uint64_t id; - bam_pileup1_t *plp; - // for the "auto" interface only - bam1_t *b; - bam_plp_auto_f func; - void *data; - olap_hash_t *overlaps; - - // For notification of creation and destruction events - // and associated client-owned pointer. - int (*plp_construct)(void *data, const bam1_t *b, bam_pileup_cd *cd); - int (*plp_destruct )(void *data, const bam1_t *b, bam_pileup_cd *cd); -}; - -bam_plp_t bam_plp_init(bam_plp_auto_f func, void *data) -{ - bam_plp_t iter; - iter = (bam_plp_t)calloc(1, sizeof(struct bam_plp_s)); - iter->mp = mp_init(); - iter->head = iter->tail = mp_alloc(iter->mp); - iter->max_tid = iter->max_pos = -1; - iter->maxcnt = 8000; - if (func) { - iter->func = func; - iter->data = data; - iter->b = bam_init1(); - } - return iter; -} - -int bam_plp_init_overlaps(bam_plp_t iter) -{ - iter->overlaps = kh_init(olap_hash); // hash for tweaking quality of bases in overlapping reads - return iter->overlaps ? 0 : -1; -} - -void bam_plp_destroy(bam_plp_t iter) -{ - lbnode_t *p, *pnext; - if ( iter->overlaps ) kh_destroy(olap_hash, iter->overlaps); - for (p = iter->head; p != NULL; p = pnext) { - pnext = p->next; - mp_free(iter->mp, p); - } - mp_destroy(iter->mp); - if (iter->b) bam_destroy1(iter->b); - free(iter->plp); - free(iter); -} - -void bam_plp_constructor(bam_plp_t plp, - int (*func)(void *data, const bam1_t *b, bam_pileup_cd *cd)) { - plp->plp_construct = func; -} - -void bam_plp_destructor(bam_plp_t plp, - int (*func)(void *data, const bam1_t *b, bam_pileup_cd *cd)) { - plp->plp_destruct = func; -} - -//--------------------------------- -//--- Tweak overlapping reads -//--------------------------------- - -/** - * cigar_iref2iseq_set() - find the first CMATCH setting the ref and the read index - * cigar_iref2iseq_next() - get the next CMATCH base - * @cigar: pointer to current cigar block (rw) - * @cigar_max: pointer just beyond the last cigar block - * @icig: position within the current cigar block (rw) - * @iseq: position in the sequence (rw) - * @iref: position with respect to the beginning of the read (iref_pos - b->core.pos) (rw) - * - * Returns BAM_CMATCH, -1 when there is no more cigar to process or the requested position is not covered, - * or -2 on error. - */ -static inline int cigar_iref2iseq_set(const uint32_t **cigar, - const uint32_t *cigar_max, - hts_pos_t *icig, - hts_pos_t *iseq, - hts_pos_t *iref) -{ - hts_pos_t pos = *iref; - if ( pos < 0 ) return -1; - *icig = 0; - *iseq = 0; - *iref = 0; - while ( *cigar> BAM_CIGAR_SHIFT; - - if ( cig==BAM_CSOFT_CLIP ) { (*cigar)++; *iseq += ncig; *icig = 0; continue; } - if ( cig==BAM_CHARD_CLIP || cig==BAM_CPAD ) { (*cigar)++; *icig = 0; continue; } - if ( cig==BAM_CMATCH || cig==BAM_CEQUAL || cig==BAM_CDIFF ) - { - pos -= ncig; - if ( pos < 0 ) { *icig = ncig + pos; *iseq += *icig; *iref += *icig; return BAM_CMATCH; } - (*cigar)++; *iseq += ncig; *icig = 0; *iref += ncig; - continue; - } - if ( cig==BAM_CINS ) { (*cigar)++; *iseq += ncig; *icig = 0; continue; } - if ( cig==BAM_CDEL || cig==BAM_CREF_SKIP ) - { - pos -= ncig; - if ( pos<0 ) pos = 0; - (*cigar)++; *icig = 0; *iref += ncig; - continue; - } - hts_log_error("Unexpected cigar %d", cig); - return -2; - } - *iseq = -1; - return -1; -} -static inline int cigar_iref2iseq_next(const uint32_t **cigar, - const uint32_t *cigar_max, - hts_pos_t *icig, - hts_pos_t *iseq, - hts_pos_t *iref) -{ - while ( *cigar < cigar_max ) - { - int cig = (**cigar) & BAM_CIGAR_MASK; - int ncig = (**cigar) >> BAM_CIGAR_SHIFT; - - if ( cig==BAM_CMATCH || cig==BAM_CEQUAL || cig==BAM_CDIFF ) - { - if ( *icig >= ncig - 1 ) { *icig = -1; (*cigar)++; continue; } - (*iseq)++; (*icig)++; (*iref)++; - return BAM_CMATCH; - } - if ( cig==BAM_CDEL || cig==BAM_CREF_SKIP ) { (*cigar)++; (*iref) += ncig; *icig = -1; continue; } - if ( cig==BAM_CINS ) { (*cigar)++; *iseq += ncig; *icig = -1; continue; } - if ( cig==BAM_CSOFT_CLIP ) { (*cigar)++; *iseq += ncig; *icig = -1; continue; } - if ( cig==BAM_CHARD_CLIP || cig==BAM_CPAD ) { (*cigar)++; *icig = -1; continue; } - hts_log_error("Unexpected cigar %d", cig); - return -2; - } - *iseq = -1; - *iref = -1; - return -1; -} - -// Given overlapping read 'a' (left) and 'b' (right) on the same -// template, adjust quality values to zero for either a or b. -// Note versions 1.12 and earlier always removed quality from 'b' for -// matching bases. Now we select a or b semi-randomly based on name hash. -// Returns 0 on success, -// -1 on failure -static int tweak_overlap_quality(bam1_t *a, bam1_t *b) -{ - const uint32_t *a_cigar = bam_get_cigar(a), - *a_cigar_max = a_cigar + a->core.n_cigar; - const uint32_t *b_cigar = bam_get_cigar(b), - *b_cigar_max = b_cigar + b->core.n_cigar; - hts_pos_t a_icig = 0, a_iseq = 0; - hts_pos_t b_icig = 0, b_iseq = 0; - uint8_t *a_qual = bam_get_qual(a), *b_qual = bam_get_qual(b); - uint8_t *a_seq = bam_get_seq(a), *b_seq = bam_get_seq(b); - - hts_pos_t iref = b->core.pos; - hts_pos_t a_iref = iref - a->core.pos; - hts_pos_t b_iref = iref - b->core.pos; - - int a_ret = cigar_iref2iseq_set(&a_cigar, a_cigar_max, - &a_icig, &a_iseq, &a_iref); - if ( a_ret<0 ) - // no overlap or error - return a_ret<-1 ? -1:0; - - int b_ret = cigar_iref2iseq_set(&b_cigar, b_cigar_max, - &b_icig, &b_iseq, &b_iref); - if ( b_ret<0 ) - // no overlap or error - return b_ret<-1 ? -1:0; - - // Determine which seq is the one getting modified qualities. - uint8_t amul, bmul; - if (__ac_Wang_hash(__ac_X31_hash_string(bam_get_qname(a))) & 1) { - amul = 1; - bmul = 0; - } else { - amul = 0; - bmul = 1; - } - - // Loop over the overlapping region nulling qualities in either - // seq a or b. - int err = 0; - while ( 1 ) - { - // Step to next matching reference position in a and b - while ( a_ret >= 0 && a_iref>=0 && a_iref < iref - a->core.pos ) - a_ret = cigar_iref2iseq_next(&a_cigar, a_cigar_max, - &a_icig, &a_iseq, &a_iref); - if ( a_ret<0 ) { // done - err = a_ret<-1?-1:0; - break; - } - if ( iref < a_iref + a->core.pos ) - iref = a_iref + a->core.pos; - - while ( b_ret >= 0 && b_iref>=0 && b_iref < iref - b->core.pos ) - b_ret = cigar_iref2iseq_next(&b_cigar, b_cigar_max, &b_icig, - &b_iseq, &b_iref); - if ( b_ret<0 ) { // done - err = b_ret<-1?-1:0; - break; - } - if ( iref < b_iref + b->core.pos ) - iref = b_iref + b->core.pos; - - iref++; - - if ( a_iref+a->core.pos != b_iref+b->core.pos ) - // only CMATCH positions, don't know what to do with indels - continue; - - if (a_iseq > a->core.l_qseq || b_iseq > b->core.l_qseq) - // Fell off end of sequence, bad CIGAR? - return -1; - - // We're finally at the same ref base in both a and b. - // Check if the bases match (confident) or mismatch - // (not so confident). - if ( bam_seqi(a_seq,a_iseq) == bam_seqi(b_seq,b_iseq) ) { - // We are very confident about this base. Use sum of quals - int qual = a_qual[a_iseq] + b_qual[b_iseq]; - a_qual[a_iseq] = amul * (qual>200 ? 200 : qual); - b_qual[b_iseq] = bmul * (qual>200 ? 200 : qual);; - } else { - // Not so confident about anymore given the mismatch. - // Reduce qual for lowest quality base. - if ( a_qual[a_iseq] > b_qual[b_iseq] ) { - // A highest qual base; keep - a_qual[a_iseq] = 0.8 * a_qual[a_iseq]; - b_qual[b_iseq] = 0; - } else if (a_qual[a_iseq] < b_qual[b_iseq] ) { - // B highest qual base; keep - b_qual[b_iseq] = 0.8 * b_qual[b_iseq]; - a_qual[a_iseq] = 0; - } else { - // Both equal, so pick randomly - a_qual[a_iseq] = amul * 0.8 * a_qual[a_iseq]; - b_qual[b_iseq] = bmul * 0.8 * b_qual[b_iseq]; - } - } - } - - return err; -} - -// Fix overlapping reads. Simple soft-clipping did not give good results. -// Lowering qualities of unwanted bases is more selective and works better. -// -// Returns 0 on success, -1 on failure -static int overlap_push(bam_plp_t iter, lbnode_t *node) -{ - if ( !iter->overlaps ) return 0; - - // mapped mates and paired reads only - if ( node->b.core.flag&BAM_FMUNMAP || !(node->b.core.flag&BAM_FPROPER_PAIR) ) return 0; - - // no overlap possible, unless some wild cigar - if ( (node->b.core.mtid >= 0 && node->b.core.tid != node->b.core.mtid) - || (llabs(node->b.core.isize) >= 2*node->b.core.l_qseq - && node->b.core.mpos >= node->end) // for those wild cigars - ) return 0; - - khiter_t kitr = kh_get(olap_hash, iter->overlaps, bam_get_qname(&node->b)); - if ( kitr==kh_end(iter->overlaps) ) - { - // Only add reads where the mate is still to arrive - if (node->b.core.mpos >= node->b.core.pos || - ((node->b.core.flag & BAM_FPAIRED) && node->b.core.mpos == -1)) { - int ret; - kitr = kh_put(olap_hash, iter->overlaps, bam_get_qname(&node->b), &ret); - if (ret < 0) return -1; - kh_value(iter->overlaps, kitr) = node; - } - } - else - { - lbnode_t *a = kh_value(iter->overlaps, kitr); - int err = tweak_overlap_quality(&a->b, &node->b); - kh_del(olap_hash, iter->overlaps, kitr); - assert(a->end-1 == a->s.end); - return err; - } - return 0; -} - -static void overlap_remove(bam_plp_t iter, const bam1_t *b) -{ - if ( !iter->overlaps ) return; - - khiter_t kitr; - if ( b ) - { - kitr = kh_get(olap_hash, iter->overlaps, bam_get_qname(b)); - if ( kitr!=kh_end(iter->overlaps) ) - kh_del(olap_hash, iter->overlaps, kitr); - } - else - { - // remove all - for (kitr = kh_begin(iter->overlaps); kitroverlaps); kitr++) - if ( kh_exist(iter->overlaps, kitr) ) kh_del(olap_hash, iter->overlaps, kitr); - } -} - - - -// Prepares next pileup position in bam records collected by bam_plp_auto -> user func -> bam_plp_push. Returns -// pointer to the piled records if next position is ready or NULL if there is not enough records in the -// buffer yet (the current position is still the maximum position across all buffered reads). -const bam_pileup1_t *bam_plp64_next(bam_plp_t iter, int *_tid, hts_pos_t *_pos, int *_n_plp) -{ - if (iter->error) { *_n_plp = -1; return NULL; } - *_n_plp = 0; - if (iter->is_eof && iter->head == iter->tail) return NULL; - while (iter->is_eof || iter->max_tid > iter->tid || (iter->max_tid == iter->tid && iter->max_pos > iter->pos)) { - int n_plp = 0; - // write iter->plp at iter->pos - lbnode_t **pptr = &iter->head; - while (*pptr != iter->tail) { - lbnode_t *p = *pptr; - if (p->b.core.tid < iter->tid || (p->b.core.tid == iter->tid && p->end <= iter->pos)) { // then remove - overlap_remove(iter, &p->b); - if (iter->plp_destruct) - iter->plp_destruct(iter->data, &p->b, &p->cd); - *pptr = p->next; mp_free(iter->mp, p); - } - else { - if (p->b.core.tid == iter->tid && p->beg <= iter->pos) { // here: p->end > pos; then add to pileup - if (n_plp == iter->max_plp) { // then double the capacity - iter->max_plp = iter->max_plp? iter->max_plp<<1 : 256; - iter->plp = (bam_pileup1_t*)realloc(iter->plp, sizeof(bam_pileup1_t) * iter->max_plp); - } - iter->plp[n_plp].b = &p->b; - iter->plp[n_plp].cd = p->cd; - if (resolve_cigar2(iter->plp + n_plp, iter->pos, &p->s)) ++n_plp; // actually always true... - } - pptr = &(*pptr)->next; - } - } - *_n_plp = n_plp; *_tid = iter->tid; *_pos = iter->pos; - // update iter->tid and iter->pos - if (iter->head != iter->tail) { - if (iter->tid > iter->head->b.core.tid) { - hts_log_error("Unsorted input. Pileup aborts"); - iter->error = 1; - *_n_plp = -1; - return NULL; - } - } - if (iter->tid < iter->head->b.core.tid) { // come to a new reference sequence - iter->tid = iter->head->b.core.tid; iter->pos = iter->head->beg; // jump to the next reference - } else if (iter->pos < iter->head->beg) { // here: tid == head->b.core.tid - iter->pos = iter->head->beg; // jump to the next position - } else ++iter->pos; // scan contiguously - // return - if (n_plp) return iter->plp; - if (iter->is_eof && iter->head == iter->tail) break; - } - return NULL; -} - -const bam_pileup1_t *bam_plp_next(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp) -{ - hts_pos_t pos64 = 0; - const bam_pileup1_t *p = bam_plp64_next(iter, _tid, &pos64, _n_plp); - if (pos64 < INT_MAX) { - *_pos = pos64; - } else { - hts_log_error("Position %"PRId64" too large", pos64); - *_pos = INT_MAX; - iter->error = 1; - *_n_plp = -1; - return NULL; - } - return p; -} - -int bam_plp_push(bam_plp_t iter, const bam1_t *b) -{ - if (iter->error) return -1; - if (b) { - if (b->core.tid < 0) { overlap_remove(iter, b); return 0; } - // Skip only unmapped reads here, any additional filtering must be done in iter->func - if (b->core.flag & BAM_FUNMAP) { overlap_remove(iter, b); return 0; } - if (iter->tid == b->core.tid && iter->pos == b->core.pos && iter->mp->cnt > iter->maxcnt) - { - overlap_remove(iter, b); - return 0; - } - if (bam_copy1(&iter->tail->b, b) == NULL) - return -1; - iter->tail->b.id = iter->id++; - iter->tail->beg = b->core.pos; - // Use raw rlen rather than bam_endpos() which adjusts rlen=0 to rlen=1 - iter->tail->end = b->core.pos + bam_cigar2rlen(b->core.n_cigar, bam_get_cigar(b)); - iter->tail->s = g_cstate_null; iter->tail->s.end = iter->tail->end - 1; // initialize cstate_t - if (b->core.tid < iter->max_tid) { - hts_log_error("The input is not sorted (chromosomes out of order)"); - iter->error = 1; - return -1; - } - if ((b->core.tid == iter->max_tid) && (iter->tail->beg < iter->max_pos)) { - hts_log_error("The input is not sorted (reads out of order)"); - iter->error = 1; - return -1; - } - iter->max_tid = b->core.tid; iter->max_pos = iter->tail->beg; - if (iter->tail->end > iter->pos || iter->tail->b.core.tid > iter->tid) { - lbnode_t *next = mp_alloc(iter->mp); - if (!next) { - iter->error = 1; - return -1; - } - if (iter->plp_construct) { - if (iter->plp_construct(iter->data, &iter->tail->b, - &iter->tail->cd) < 0) { - mp_free(iter->mp, next); - iter->error = 1; - return -1; - } - } - if (overlap_push(iter, iter->tail) < 0) { - mp_free(iter->mp, next); - iter->error = 1; - return -1; - } - iter->tail->next = next; - iter->tail = iter->tail->next; - } - } else iter->is_eof = 1; - return 0; -} - -const bam_pileup1_t *bam_plp64_auto(bam_plp_t iter, int *_tid, hts_pos_t *_pos, int *_n_plp) -{ - const bam_pileup1_t *plp; - if (iter->func == 0 || iter->error) { *_n_plp = -1; return 0; } - if ((plp = bam_plp64_next(iter, _tid, _pos, _n_plp)) != 0) return plp; - else { // no pileup line can be obtained; read alignments - *_n_plp = 0; - if (iter->is_eof) return 0; - int ret; - while ( (ret=iter->func(iter->data, iter->b)) >= 0) { - if (bam_plp_push(iter, iter->b) < 0) { - *_n_plp = -1; - return 0; - } - if ((plp = bam_plp64_next(iter, _tid, _pos, _n_plp)) != 0) return plp; - // otherwise no pileup line can be returned; read the next alignment. - } - if ( ret < -1 ) { iter->error = ret; *_n_plp = -1; return 0; } - if (bam_plp_push(iter, 0) < 0) { - *_n_plp = -1; - return 0; - } - if ((plp = bam_plp64_next(iter, _tid, _pos, _n_plp)) != 0) return plp; - return 0; - } -} - -const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp) -{ - hts_pos_t pos64 = 0; - const bam_pileup1_t *p = bam_plp64_auto(iter, _tid, &pos64, _n_plp); - if (pos64 < INT_MAX) { - *_pos = pos64; - } else { - hts_log_error("Position %"PRId64" too large", pos64); - *_pos = INT_MAX; - iter->error = 1; - *_n_plp = -1; - return NULL; - } - return p; -} - -void bam_plp_reset(bam_plp_t iter) -{ - overlap_remove(iter, NULL); - iter->max_tid = iter->max_pos = -1; - iter->tid = iter->pos = 0; - iter->is_eof = 0; - while (iter->head != iter->tail) { - lbnode_t *p = iter->head; - iter->head = p->next; - mp_free(iter->mp, p); - } -} - -void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt) -{ - iter->maxcnt = maxcnt; -} - -/************************ - *** Mpileup iterator *** - ************************/ - -struct bam_mplp_s { - int n; - int32_t min_tid, *tid; - hts_pos_t min_pos, *pos; - bam_plp_t *iter; - int *n_plp; - const bam_pileup1_t **plp; -}; - -bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data) -{ - int i; - bam_mplp_t iter; - iter = (bam_mplp_t)calloc(1, sizeof(struct bam_mplp_s)); - iter->pos = (hts_pos_t*)calloc(n, sizeof(hts_pos_t)); - iter->tid = (int32_t*)calloc(n, sizeof(int32_t)); - iter->n_plp = (int*)calloc(n, sizeof(int)); - iter->plp = (const bam_pileup1_t**)calloc(n, sizeof(bam_pileup1_t*)); - iter->iter = (bam_plp_t*)calloc(n, sizeof(bam_plp_t)); - iter->n = n; - iter->min_pos = HTS_POS_MAX; - iter->min_tid = (uint32_t)-1; - for (i = 0; i < n; ++i) { - iter->iter[i] = bam_plp_init(func, data[i]); - iter->pos[i] = iter->min_pos; - iter->tid[i] = iter->min_tid; - } - return iter; -} - -int bam_mplp_init_overlaps(bam_mplp_t iter) -{ - int i, r = 0; - for (i = 0; i < iter->n; ++i) - r |= bam_plp_init_overlaps(iter->iter[i]); - return r == 0 ? 0 : -1; -} - -void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt) -{ - int i; - for (i = 0; i < iter->n; ++i) - iter->iter[i]->maxcnt = maxcnt; -} - -void bam_mplp_destroy(bam_mplp_t iter) -{ - int i; - for (i = 0; i < iter->n; ++i) bam_plp_destroy(iter->iter[i]); - free(iter->iter); free(iter->pos); free(iter->tid); - free(iter->n_plp); free(iter->plp); - free(iter); -} - -int bam_mplp64_auto(bam_mplp_t iter, int *_tid, hts_pos_t *_pos, int *n_plp, const bam_pileup1_t **plp) -{ - int i, ret = 0; - hts_pos_t new_min_pos = HTS_POS_MAX; - uint32_t new_min_tid = (uint32_t)-1; - for (i = 0; i < iter->n; ++i) { - if (iter->pos[i] == iter->min_pos && iter->tid[i] == iter->min_tid) { - int tid; - hts_pos_t pos; - iter->plp[i] = bam_plp64_auto(iter->iter[i], &tid, &pos, &iter->n_plp[i]); - if ( iter->iter[i]->error ) return -1; - if (iter->plp[i]) { - iter->tid[i] = tid; - iter->pos[i] = pos; - } else { - iter->tid[i] = 0; - iter->pos[i] = 0; - } - } - if (iter->plp[i]) { - if (iter->tid[i] < new_min_tid) { - new_min_tid = iter->tid[i]; - new_min_pos = iter->pos[i]; - } else if (iter->tid[i] == new_min_tid && iter->pos[i] < new_min_pos) { - new_min_pos = iter->pos[i]; - } - } - } - iter->min_pos = new_min_pos; - iter->min_tid = new_min_tid; - if (new_min_pos == HTS_POS_MAX) return 0; - *_tid = new_min_tid; *_pos = new_min_pos; - for (i = 0; i < iter->n; ++i) { - if (iter->pos[i] == iter->min_pos && iter->tid[i] == iter->min_tid) { - n_plp[i] = iter->n_plp[i], plp[i] = iter->plp[i]; - ++ret; - } else n_plp[i] = 0, plp[i] = 0; - } - return ret; -} - -int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp) -{ - hts_pos_t pos64 = 0; - int ret = bam_mplp64_auto(iter, _tid, &pos64, n_plp, plp); - if (ret >= 0) { - if (pos64 < INT_MAX) { - *_pos = pos64; - } else { - hts_log_error("Position %"PRId64" too large", pos64); - *_pos = INT_MAX; - return -1; - } - } - return ret; -} - -void bam_mplp_reset(bam_mplp_t iter) -{ - int i; - iter->min_pos = HTS_POS_MAX; - iter->min_tid = (uint32_t)-1; - for (i = 0; i < iter->n; ++i) { - bam_plp_reset(iter->iter[i]); - iter->pos[i] = HTS_POS_MAX; - iter->tid[i] = (uint32_t)-1; - iter->n_plp[i] = 0; - iter->plp[i] = NULL; - } -} - -void bam_mplp_constructor(bam_mplp_t iter, - int (*func)(void *arg, const bam1_t *b, bam_pileup_cd *cd)) { - int i; - for (i = 0; i < iter->n; ++i) - bam_plp_constructor(iter->iter[i], func); -} - -void bam_mplp_destructor(bam_mplp_t iter, - int (*func)(void *arg, const bam1_t *b, bam_pileup_cd *cd)) { - int i; - for (i = 0; i < iter->n; ++i) - bam_plp_destructor(iter->iter[i], func); -} - -#endif // ~!defined(BAM_NO_PILEUP) diff --git a/src/htslib-1.18/sam_internal.h b/src/htslib-1.18/sam_internal.h deleted file mode 100644 index b1fce9f..0000000 --- a/src/htslib-1.18/sam_internal.h +++ /dev/null @@ -1,105 +0,0 @@ -/* sam_internal.h -- internal functions; not part of the public API. - - Copyright (C) 2019-2020 Genome Research Ltd. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#ifndef HTSLIB_SAM_INTERNAL_H -#define HTSLIB_SAM_INTERNAL_H - -#include -#include -#include "htslib/sam.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Used internally in the SAM format multi-threading. -int sam_state_destroy(samFile *fp); -int sam_set_thread_pool(htsFile *fp, htsThreadPool *p); -int sam_set_threads(htsFile *fp, int nthreads); - -// Fastq state -int fastq_state_set(samFile *fp, enum hts_fmt_option opt, ...); -void fastq_state_destroy(samFile *fp); - -// bam1_t data (re)allocation -int sam_realloc_bam_data(bam1_t *b, size_t desired); - -static inline int realloc_bam_data(bam1_t *b, size_t desired) -{ - if (desired <= b->m_data) return 0; - return sam_realloc_bam_data(b, desired); -} - -static inline int possibly_expand_bam_data(bam1_t *b, size_t bytes) { - size_t new_len = (size_t) b->l_data + bytes; - - if (new_len > INT32_MAX || new_len < bytes) { // Too big or overflow - errno = ENOMEM; - return -1; - } - if (new_len <= b->m_data) return 0; - return sam_realloc_bam_data(b, new_len); -} - -/* - * Convert a nibble encoded BAM sequence to a string of bases. - * - * We do this 2 bp at a time for speed. Equiv to: - * - * for (i = 0; i < len; i++) - * seq[i] = seq_nt16_str[bam_seqi(nib, i)]; - */ -static inline void nibble2base(uint8_t *nib, char *seq, int len) { - static const char code2base[512] = - "===A=C=M=G=R=S=V=T=W=Y=H=K=D=B=N" - "A=AAACAMAGARASAVATAWAYAHAKADABAN" - "C=CACCCMCGCRCSCVCTCWCYCHCKCDCBCN" - "M=MAMCMMMGMRMSMVMTMWMYMHMKMDMBMN" - "G=GAGCGMGGGRGSGVGTGWGYGHGKGDGBGN" - "R=RARCRMRGRRRSRVRTRWRYRHRKRDRBRN" - "S=SASCSMSGSRSSSVSTSWSYSHSKSDSBSN" - "V=VAVCVMVGVRVSVVVTVWVYVHVKVDVBVN" - "T=TATCTMTGTRTSTVTTTWTYTHTKTDTBTN" - "W=WAWCWMWGWRWSWVWTWWWYWHWKWDWBWN" - "Y=YAYCYMYGYRYSYVYTYWYYYHYKYDYBYN" - "H=HAHCHMHGHRHSHVHTHWHYHHHKHDHBHN" - "K=KAKCKMKGKRKSKVKTKWKYKHKKKDKBKN" - "D=DADCDMDGDRDSDVDTDWDYDHDKDDDBDN" - "B=BABCBMBGBRBSBVBTBWBYBHBKBDBBBN" - "N=NANCNMNGNRNSNVNTNWNYNHNKNDNBNN"; - - int i, len2 = len/2; - seq[0] = 0; - - for (i = 0; i < len2; i++) - // Note size_t cast helps gcc optimiser. - memcpy(&seq[i*2], &code2base[(size_t)nib[i]*2], 2); - - if ((i *= 2) < len) - seq[i] = seq_nt16_str[bam_seqi(nib, i)]; -} - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.18/sam_mods.c b/src/htslib-1.18/sam_mods.c deleted file mode 100644 index fe8db85..0000000 --- a/src/htslib-1.18/sam_mods.c +++ /dev/null @@ -1,683 +0,0 @@ -/* sam_mods.c -- Base modification handling in SAM and BAM. - - Copyright (C) 2020-2023 Genome Research Ltd. - - Author: James Bonfield - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include "htslib/sam.h" -#include "textutils_internal.h" - -// --------------------------- -// Base Modification retrieval -// -// These operate by recording state in an opaque type, allocated and freed -// via the functions below. -// -// Initially we call bam_parse_basemod to process the tags and record the -// modifications in the state structure, and then functions such as -// bam_next_basemod can iterate over this cached state. - -/* Overview of API. - -We start by allocating an hts_base_mod_state and parsing the MM, ML and MN -tags into it. This has optional flags controlling how we report base -modifications in "explicit" coordinates. See below - - hts_base_mod_state *m = hts_base_mod_state_alloc(); - bam_parse_basemod2(b, m, HTS_MOD_REPORT_UNCHECKED); - // Or: bam_parse_basemod(b, m), which is equiv to flags==0 - //... do something ... - hts_base_mod_state_free(m); - -In the default implicit MM coordinate system, any location not -reported is implicitly assumed to contain no modification. We only -report the places we think are likely modified. - -Some tools however only look for base modifications in particular -contexts, eg CpG islands. Here we need to distinguish between -not-looked-for and looked-for-but-didn't-find. These calls have an -explicit coordinate system, where we only know information about the -coordinates explicitly listed and everything else is considered to be -unverified. - -By default we don't get reports on the other coordinates in an -explicit MM tag, but the HTS_MOD_REPORT_UNCHECKED flag will report -them (with quality HTS_MOD_UNCHECKED) meaning we can do consensus -modification analysis with accurate counting when dealing with a -mixture of explicit and implicit records. - - -We have different ways of processing the base modifications. We can -iterate either mod-by-mod or position-by-position, or we can simply -query a specific coordinate as may be done when processing a pileup. - -To check for base modifications as a specific location within a -sequence we can use bam_mods_at_qpos. This provides complete random -access within the MM string. However currently this is inefficiently -implemented so should only be used for occasional analysis or as a way -to start iterating at a specific location. It modifies the state -position, so after the first use we can then switch to -bam_mods_at_next_pos to iterate position by position from then on. - - hts_base_mod mods[10]; - int n = bam_mods_at_qpos(b, pos, m, mods, 10); - -For base by base, we have bam_mods_at_next_pos. This strictly starts -at the first base and reports entries one at a time. It's more -efficient than a loop repeatedly calling ...at-pos. - - hts_base_mod mods[10]; - int n = bam_mods_at_next_pos(b, m, mods, 10); - for (int i = 0; i < n; i++) { - // report mod i of n - } - -Iterating over modifications instead of coordinates is simpler and -more efficient as it skips reporting of unmodified bases. This is -done with bam_next_basemod. - - hts_base_mod mods[10]; - while ((n=bam_next_basemod(b, m, mods, 10, &pos)) > 0) { - for (j = 0; j < n; j++) { - // Report 'n'th mod at sequence position 'pos' - } - } - -There are also functions that query meta-data about the MM line rather -than per-site information. - -bam_mods_recorded returns an array of ints holding the +ve code ('m') -or -ve CHEBI numeric values. - - int ntypes, *types = bam_mods_recorded(m, &ntype); - -We can then query a specific modification type to get further -information on the strand it is operating on, whether it has implicit -or explicit coordinates, and what it's corresponding canonical base it -is (The "C" in "C+m"). bam_mods_query_type does this by code name, -while bam_mods_queryi does this by numeric i^{th} type (from 0 to ntype-1). - - bam_mods_query_type(m, 'c', &strand, &implicit, &canonical); - bam_mods_queryi(m, 2, &strand, &implicit, &canonical); - -*/ - -/* - * Base modification are stored in MM/Mm tags as defined as - * - * ::= | "" - * ::= - * - * ::= "A" | "C" | "G" | "T" | "N". - * - * ::= "+" | "-". - * - * ::= | - * ::= | - * ::= - * ::= - * - * ::= "," | ";" - * - * We do not allocate additional memory other than the fixed size - * state, thus we track up to 256 pointers to different locations - * within the MM and ML tags. Each pointer is for a distinct - * modification code (simple or ChEBI), meaning some may point to the - * same delta-list when multiple codes are combined together - * (e.g. "C+mh,1,5,18,3;"). This is the MM[] array. - * - * Each numeric in the delta-list is tracked in MMcount[], counted - * down until it hits zero in which case the next delta is fetched. - * - * ML array similarly holds the locations in the quality (ML) tag per - * type, but these are interleaved so C+mhfc,10,15 will have 4 types - * all pointing to the same delta position, but in ML we store - * Q(m0)Q(h0)Q(f0)Q(c0) followed by Q(m1)Q(h1)Q(f1)Q(c1). This ML - * also has MLstride indicating how many positions along ML to jump - * each time we consume a base. (4 in our above example, but usually 1 - * for the simple case). - * - * One complexity of the base modification system is that mods are - * always stored in the original DNA orientation. This is so that - * tools that may reverse-complement a sequence (eg "samtools fastq -T - * MM,ML") can pass through these modification tags irrespective of - * whether they have any knowledge of their internal workings. - * - * Because we don't wish to allocate extra memory, we cannot simply - * reverse the MM and ML tags. Sadly this means we have to manage the - * reverse complementing ourselves on-the-fly. - * For reversed reads we start at the right end of MM and no longer - * stop at the semicolon. Instead we use MMend[] array to mark the - * termination point. - */ -#define MAX_BASE_MOD 256 -struct hts_base_mod_state { - int type[MAX_BASE_MOD]; // char or minus-CHEBI - int canonical[MAX_BASE_MOD];// canonical base, as seqi (1,2,4,8,15) - char strand[MAX_BASE_MOD]; // strand of modification; + or - - int MMcount[MAX_BASE_MOD]; // no. canonical bases left until next mod - char *MM[MAX_BASE_MOD]; // next pos delta (string) - char *MMend[MAX_BASE_MOD]; // end of pos-delta string - uint8_t *ML[MAX_BASE_MOD]; // next qual - int MLstride[MAX_BASE_MOD]; // bytes between quals for this type - int implicit[MAX_BASE_MOD]; // treat unlisted positions as non-modified? - int seq_pos; // current position along sequence - int nmods; // used array size (0 to MAX_BASE_MOD-1). - uint32_t flags; // Bit-field: see HTS_MOD_REPORT_UNCHECKED -}; - -hts_base_mod_state *hts_base_mod_state_alloc(void) { - return calloc(1, sizeof(hts_base_mod_state)); -} - -void hts_base_mod_state_free(hts_base_mod_state *state) { - free(state); -} - -/* - * Count frequency of A, C, G, T and N canonical bases in the sequence - */ -static void seq_freq(const bam1_t *b, int freq[16]) { - int i; - - memset(freq, 0, 16*sizeof(*freq)); - uint8_t *seq = bam_get_seq(b); - for (i = 0; i < b->core.l_qseq; i++) - freq[bam_seqi(seq, i)]++; - freq[15] = b->core.l_qseq; // all bases count as N for base mods -} - -//0123456789ABCDEF -//=ACMGRSVTWYHKDBN aka seq_nt16_str[] -//=TGKCYSBAWRDMHVN comp1ement of seq_nt16_str -//084C2A6E195D3B7F -static int seqi_rc[] = { 0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15 }; - -/* - * Parse the MM and ML tags to populate the base mod state. - * This structure will have been previously allocated via - * hts_base_mod_state_alloc, but it does not need to be repeatedly - * freed and allocated for each new bam record. (Although obviously - * it requires a new call to this function.) - * - * Flags are copied into the state and used to control reporting functions. - * Currently the only flag is HTS_MOD_REPORT_UNCHECKED, to control whether - * explicit "C+m?" mods report quality HTS_MOD_UNCHECKED for the bases - * outside the explicitly reported region. - */ -int bam_parse_basemod2(const bam1_t *b, hts_base_mod_state *state, - uint32_t flags) { - // Reset position, else upcoming calls may fail on - // seq pos - length comparison - state->seq_pos = 0; - state->nmods = 0; - state->flags = flags; - - // Read MM and ML tags - uint8_t *mm = bam_aux_get(b, "MM"); - if (!mm) mm = bam_aux_get(b, "Mm"); - if (!mm) - return 0; - if (mm[0] != 'Z') { - hts_log_error("%s: MM tag is not of type Z", bam_get_qname(b)); - return -1; - } - - uint8_t *mi = bam_aux_get(b, "MN"); - if (mi && bam_aux2i(mi) != b->core.l_qseq) { - // bam_aux2i with set errno = EINVAL and return 0 if the tag - // isn't integer, but 0 will be a seq-length mismatch anyway so - // triggers an error here too. - hts_log_error("%s: MM/MN data length is incompatible with" - " SEQ length", bam_get_qname(b)); - return -1; - } - - uint8_t *ml = bam_aux_get(b, "ML"); - if (!ml) ml = bam_aux_get(b, "Ml"); - if (ml && (ml[0] != 'B' || ml[1] != 'C')) { - hts_log_error("%s: ML tag is not of type B,C", bam_get_qname(b)); - return -1; - } - uint8_t *ml_end = ml ? ml+6 + le_to_u32(ml+2) : NULL; - if (ml) ml += 6; - - // Aggregate freqs of ACGTN if reversed, to get final-delta (later) - int freq[16]; - if (b->core.flag & BAM_FREVERSE) - seq_freq(b, freq); - - char *cp = (char *)mm+1; - int mod_num = 0; - int implicit = 1; - while (*cp) { - for (; *cp; cp++) { - // cp should be [ACGTNU][+-]([a-zA-Z]+|[0-9]+)[.?]?(,\d+)*; - unsigned char btype = *cp++; - - if (btype != 'A' && btype != 'C' && - btype != 'G' && btype != 'T' && - btype != 'U' && btype != 'N') - return -1; - if (btype == 'U') btype = 'T'; - - btype = seq_nt16_table[btype]; - - // Strand - if (*cp != '+' && *cp != '-') - return -1; // malformed - char strand = *cp++; - - // List of modification types - char *ms = cp, *me; // mod code start and end - char *cp_end = NULL; - int chebi = 0; - if (isdigit_c(*cp)) { - chebi = strtol(cp, &cp_end, 10); - cp = cp_end; - ms = cp-1; - } else { - while (*cp && isalpha_c(*cp)) - cp++; - if (*cp == '\0') - return -1; - } - - me = cp; - - // Optional explicit vs implicit marker - implicit = 1; - if (*cp == '.') { - // default is implicit = 1; - cp++; - } else if (*cp == '?') { - implicit = 0; - cp++; - } else if (*cp != ',' && *cp != ';') { - // parse error - return -1; - } - - long delta; - int n = 0; // nth symbol in a multi-mod string - int stride = me-ms; - int ndelta = 0; - - if (b->core.flag & BAM_FREVERSE) { - // We process the sequence in left to right order, - // but delta is successive count of bases to skip - // counting right to left. This also means the number - // of bases to skip at left edge is unrecorded (as it's - // the remainder). - // - // To output mods in left to right, we step through the - // MM list in reverse and need to identify the left-end - // "remainder" delta. - int total_seq = 0; - for (;;) { - cp += (*cp == ','); - if (*cp == 0 || *cp == ';') - break; - - delta = strtol(cp, &cp_end, 10); - if (cp_end == cp) { - hts_log_error("%s: Hit end of MM tag. Missing " - "semicolon?", bam_get_qname(b)); - return -1; - } - - cp = cp_end; - total_seq += delta+1; - ndelta++; - } - delta = freq[seqi_rc[btype]] - total_seq; // remainder - } else { - delta = *cp == ',' - ? strtol(cp+1, &cp_end, 10) - : 0; - if (!cp_end) { - // empty list - delta = INT_MAX; - cp_end = cp+1; - } - } - // Now delta is first in list or computed remainder, - // and cp_end is either start or end of the MM list. - while (ms < me) { - state->type [mod_num] = chebi ? -chebi : *ms; - state->strand [mod_num] = (strand == '-'); - state->canonical[mod_num] = btype; - state->MLstride [mod_num] = stride; - state->implicit [mod_num] = implicit; - - if (delta < 0) { - hts_log_error("%s: MM tag refers to bases beyond sequence " - "length", bam_get_qname(b)); - return -1; - } - state->MMcount [mod_num] = delta; - if (b->core.flag & BAM_FREVERSE) { - state->MM [mod_num] = cp+1; - state->MMend[mod_num] = cp_end; - state->ML [mod_num] = ml ? ml+n +(ndelta-1)*stride: NULL; - } else { - state->MM [mod_num] = cp_end; - state->MMend[mod_num] = NULL; - state->ML [mod_num] = ml ? ml+n : NULL; - } - - if (++mod_num >= MAX_BASE_MOD) { - hts_log_error("%s: Too many base modification types", - bam_get_qname(b)); - return -1; - } - ms++; n++; - } - - // Skip modification deltas - if (ml) { - if (b->core.flag & BAM_FREVERSE) { - ml += ndelta*stride; - } else { - while (*cp && *cp != ';') { - if (*cp == ',') - ml+=stride; - cp++; - } - } - if (ml > ml_end) { - hts_log_error("%s: Insufficient number of entries in ML " - "tag", bam_get_qname(b)); - return -1; - } - } else { - // cp_end already known if FREVERSE - if (cp_end && (b->core.flag & BAM_FREVERSE)) - cp = cp_end; - else - while (*cp && *cp != ';') - cp++; - } - if (!*cp) { - hts_log_error("%s: Hit end of MM tag. Missing semicolon?", - bam_get_qname(b)); - return -1; - } - } - } - - state->nmods = mod_num; - - return 0; -} - -int bam_parse_basemod(const bam1_t *b, hts_base_mod_state *state) { - return bam_parse_basemod2(b, state, 0); -} - -/* - * Fills out mods[] with the base modifications found. - * Returns the number found (0 if none), which may be more than - * the size of n_mods if more were found than reported. - * Returns <= -1 on error. - * - * This always marches left to right along sequence, irrespective of - * reverse flag or modification strand. - */ -int bam_mods_at_next_pos(const bam1_t *b, hts_base_mod_state *state, - hts_base_mod *mods, int n_mods) { - if (b->core.flag & BAM_FREVERSE) { - if (state->seq_pos < 0) - return -1; - } else { - if (state->seq_pos >= b->core.l_qseq) - return -1; - } - - int i, j, n = 0; - unsigned char base = bam_seqi(bam_get_seq(b), state->seq_pos); - state->seq_pos++; - if (b->core.flag & BAM_FREVERSE) - base = seqi_rc[base]; - - for (i = 0; i < state->nmods; i++) { - int unchecked = 0; - if (state->canonical[i] != base && state->canonical[i] != 15/*N*/) - continue; - - if (state->MMcount[i]-- > 0) { - if (!state->implicit[i] && - (state->flags & HTS_MOD_REPORT_UNCHECKED)) - unchecked = 1; - else - continue; - } - - char *MMptr = state->MM[i]; - if (n < n_mods) { - mods[n].modified_base = state->type[i]; - mods[n].canonical_base = seq_nt16_str[state->canonical[i]]; - mods[n].strand = state->strand[i]; - mods[n].qual = unchecked - ? HTS_MOD_UNCHECKED - : (state->ML[i] ? *state->ML[i] : HTS_MOD_UNKNOWN); - } - n++; - - if (unchecked) - continue; - - if (state->ML[i]) - state->ML[i] += (b->core.flag & BAM_FREVERSE) - ? -state->MLstride[i] - : +state->MLstride[i]; - - if (b->core.flag & BAM_FREVERSE) { - // process MM list backwards - char *cp; - for (cp = state->MMend[i]-1; cp != state->MM[i]; cp--) - if (*cp == ',') - break; - state->MMend[i] = cp; - if (cp != state->MM[i]) - state->MMcount[i] = strtol(cp+1, NULL, 10); - else - state->MMcount[i] = INT_MAX; - } else { - if (*state->MM[i] == ',') - state->MMcount[i] = strtol(state->MM[i]+1, &state->MM[i], 10); - else - state->MMcount[i] = INT_MAX; - } - - // Multiple mods at the same coords. - for (j=i+1; j < state->nmods && state->MM[j] == MMptr; j++) { - if (n < n_mods) { - mods[n].modified_base = state->type[j]; - mods[n].canonical_base = seq_nt16_str[state->canonical[j]]; - mods[n].strand = state->strand[j]; - mods[n].qual = state->ML[j] ? *state->ML[j] : -1; - } - n++; - state->MMcount[j] = state->MMcount[i]; - state->MM[j] = state->MM[i]; - if (state->ML[j]) - state->ML[j] += (b->core.flag & BAM_FREVERSE) - ? -state->MLstride[j] - : +state->MLstride[j]; - } - i = j-1; - } - - return n; -} - -/* - * Return data at the next modified location. - * - * bam_mods_at_next_pos does quite a bit of work, so we don't want to - * repeatedly call it for every location until we find a mod. Instead - * we check how many base types we can consume before the next mod, - * and scan through the sequence looking for them. Once we're at that - * site, we defer back to bam_mods_at_next_pos for the return values. - */ -int bam_next_basemod(const bam1_t *b, hts_base_mod_state *state, - hts_base_mod *mods, int n_mods, int *pos) { - if (state->seq_pos >= b->core.l_qseq) - return 0; - - // Look through state->MMcount arrays to see when the next lowest is - // per base type; - int next[16], freq[16] = {0}, i; - memset(next, 0x7f, 16*sizeof(*next)); - const int unchecked = state->flags & HTS_MOD_REPORT_UNCHECKED; - if (b->core.flag & BAM_FREVERSE) { - for (i = 0; i < state->nmods; i++) { - if (unchecked && !state->implicit[i]) - next[seqi_rc[state->canonical[i]]] = 1; - else if (next[seqi_rc[state->canonical[i]]] > state->MMcount[i]) - next[seqi_rc[state->canonical[i]]] = state->MMcount[i]; - } - } else { - for (i = 0; i < state->nmods; i++) { - if (unchecked && !state->implicit[i]) - next[state->canonical[i]] = 0; - else if (next[state->canonical[i]] > state->MMcount[i]) - next[state->canonical[i]] = state->MMcount[i]; - } - } - - // Now step through the sequence counting off base types. - for (i = state->seq_pos; i < b->core.l_qseq; i++) { - unsigned char bc = bam_seqi(bam_get_seq(b), i); - if (next[bc] <= freq[bc] || next[15] <= freq[15]) - break; - freq[bc]++; - if (bc != 15) // N - freq[15]++; - } - *pos = state->seq_pos = i; - - if (i >= b->core.l_qseq) { - // Check for more MM elements than bases present. - for (i = 0; i < state->nmods; i++) { - if (!(b->core.flag & BAM_FREVERSE) && - state->MMcount[i] < 0x7f000000) { - hts_log_warning("MM tag refers to bases beyond sequence length"); - return -1; - } - } - return 0; - } - - if (b->core.flag & BAM_FREVERSE) { - for (i = 0; i < state->nmods; i++) - state->MMcount[i] -= freq[seqi_rc[state->canonical[i]]]; - } else { - for (i = 0; i < state->nmods; i++) - state->MMcount[i] -= freq[state->canonical[i]]; - } - - int r = bam_mods_at_next_pos(b, state, mods, n_mods); - return r > 0 ? r : 0; -} - -/* - * As per bam_mods_at_next_pos, but at a specific qpos >= the previous qpos. - * This can only march forwards along the read, but can do so by more than - * one base-pair. - * - * This makes it useful for calling from pileup iterators where qpos may - * start part way through a read for the first occurrence of that record. - */ -int bam_mods_at_qpos(const bam1_t *b, int qpos, hts_base_mod_state *state, - hts_base_mod *mods, int n_mods) { - // FIXME: for now this is inefficient in implementation. - int r = 0; - while (state->seq_pos <= qpos) - if ((r = bam_mods_at_next_pos(b, state, mods, n_mods)) < 0) - break; - - return r; -} - -/* - * Returns the list of base modification codes provided for this - * alignment record as an array of character codes (+ve) or ChEBI numbers - * (negative). - * - * Returns the array, with *ntype filled out with the size. - * The array returned should not be freed. - * It is a valid pointer until the state is freed using - * hts_base_mod_free(). - */ -int *bam_mods_recorded(hts_base_mod_state *state, int *ntype) { - *ntype = state->nmods; - return state->type; -} - -/* - * Returns data about a specific modification type for the alignment record. - * Code is either positive (eg 'm') or negative for ChEBI numbers. - * - * Return 0 on success or -1 if not found. The strand, implicit and canonical - * fields are filled out if passed in as non-NULL pointers. - */ -int bam_mods_query_type(hts_base_mod_state *state, int code, - int *strand, int *implicit, char *canonical) { - // Find code entry - int i; - for (i = 0; i < state->nmods; i++) { - if (state->type[i] == code) - break; - } - if (i == state->nmods) - return -1; - - // Return data - if (strand) *strand = state->strand[i]; - if (implicit) *implicit = state->implicit[i]; - if (canonical) *canonical = "?AC?G???T??????N"[state->canonical[i]]; - - return 0; -} - -/* - * Returns data about the ith modification type for the alignment record. - * - * Return 0 on success or -1 if not found. The strand, implicit and canonical - * fields are filled out if passed in as non-NULL pointers. - */ -int bam_mods_queryi(hts_base_mod_state *state, int i, - int *strand, int *implicit, char *canonical) { - if (i < 0 || i >= state->nmods) - return -1; - - // Return data - if (strand) *strand = state->strand[i]; - if (implicit) *implicit = state->implicit[i]; - if (canonical) *canonical = "?AC?G???T??????N"[state->canonical[i]]; - - return 0; -} diff --git a/src/htslib-1.18/samples/DEMO.md b/src/htslib-1.18/samples/DEMO.md deleted file mode 100644 index 9117928..0000000 --- a/src/htslib-1.18/samples/DEMO.md +++ /dev/null @@ -1,1437 +0,0 @@ -# HTS API - -## HTSLib APIs and samtools - -HTSLib is a C library implementation used to access and process the genome -sequence data. HTSLib implements multiple API interfaces, HTS API, VCF API and -SAM API. HTS API provides a framework for use by other APIs and applications, -implements bgzf compression, htscodecs and provides CRAM format support. VCF -APIs work with variant data in VCF and BCF format. - -SAM API works with sequence data of different formats, SAM / BAM / CRAM / -FASTA / FASTQ, and provides methods to do operations on the data. It uses -methods from HTS API. - -'samtools' is the utility used to read and modify sequence data. It uses SAM -APIs from HTSLib to work on the sequence data. - - -## About this document - -There are a number of demonstration utilities and their source code in -'samples' directory of HTSLib and this document gives the description of them -and the usage of API of HTSLib. The samples are for demonstration -purposes only and proper error handling is required for actual usage. This -document is based on HTSLib version 1.17. - -Updates to this document may be made along with later releases when required. - - -## The sample apps - -Flags - This application showcases the basic read of alignment files and flag -access. It reads and shows the count of read1 and read2 alignments. - -Split - This application showcases the basic read and write of alignment data. -It saves the read1 and read2 as separate files in given directory, one as sam -and other as bam. - -Split2 - This application showcases the output file format selection. It saves -the read1 and read2 as separate files in given directory, both as compressed -sam though the extensions are different. - -Cram - This application showcases the different way in which cram reference -data is used for cram output creation. - -Read_fast - This application showcases the fasta/fastq data read. - -Read_header - This application showcases the read and access of header data. -It can show all header line of given type, data of a given tag on a specific -header line or for all lines of given type. - -Read_ref - This application showcases the read and access of header data. -It shows all reference names which has length equal or greater to given input. - -Read_bam - This application showcases read of different alignment data fields. -It shows contents of each alignment. - -Read_aux - This application showcases read of specific auxiliary tag data in -alignment. It shows the data retrieved using 2 APIs, one as a string with tag -data and other as raw data alternatively. - -Dump_aux - This application showcases read of all auxiliary tag data one by one -in an alignment. It shows the data retrieved. - -Add_header - This application showcases the write of header lines to a file. -It adds header line of types, SQ, RG, PG and CO and writes to standard output. - -Remove_header - This application showcases removal of header line from a file. -It removes either all header lines of given type or one specific line of given -type with given unique identifier. Modified header is written on standard -output. - -Update_header - This application shows the update of header line fields, where -update is allowed. It takes the header line type, unique identifier for the -line, tag to be modified and the new value. Updated data is written on standard -output. - -Mod_bam - This application showcases the update of alignment data. It takes -alignment name, position of field to be modified and new value of it. -Modified data is written on standard output. - -Mod_aux - This application showcases the update of auxiliary data in alignment. -It takes alignment name, tag to be modified, its type and new value. Modified -data is written on standard output. - -Mod_aux_ba - This application showcases the update of auxiliary array data in -alignment. It adds count of ATCGN base as an array in auxiliary data, BA:I. -Modified data is written on standard output. - -Write_fast - This application showcases the fasta/fastq data write. It appends -a dummy data to given file. - -Index_write - This application showcases the creation of index along with -output creation. Based on file type and shift, it creates bai, csi or crai -files. - -Read_reg - This application showcases the usage of region specification in -alignment read. - -Read_multireg - This application showcases the usage of mulitple regionn -specification in alignment read. - -Pileup - This application showcases the pileup api, where all alignments -covering a reference position are accessed together. It displays the bases -covering each position on standard output. - -Mpileup - This application showcases the mpileup api, which supports multiple -input files for pileup and gives a side by side view of them in pileup format. -It displays the bases covering each position on standard output. - -Modstate - This application showcases the access of base modifications in -alignment. It shows the modifications present in an alignment and accesses them -using available APIs. There are 2 APIs and which one to be used can be selected -through input. - -Pileup_mod - This application showcases the base modification access in pileup -mode. It shows the pileup display with base modifications. - -Flags_field - This application showcases the read of selected fields alone, -reducing the overhead / increasing the performance. It reads the flag field -alone and shows the count of read1 and read2. This has impact only on CRAM -files. - -Split_thread1 - This application showcases the use of threads in file handling. -It saves the read1 and read2 as separate files in given directory, one as sam -and other as bam. 2 threads are used for read and 1 each dedicated for each -output file. - -Split_thread2 - This application showcases the use of thread pool in file -handling. It saves the read1 and read2 as separate files in given directory, -one as sam and other as bam. A pool of 4 threads is created and shared for both -read and write. - - -## Building the sample apps - -The samples expect the HTSLib is installed, libraries and header file path are -part of the PATH environment variable. If not, these paths need to be explicitly -passed during the build time. - -Gcc and compatible compilers can be used to build the samples. - -These applications can be linked statically or dynamically to HTSLib. -For static linking, along with htslib other libraries and/or headers required -to build are, math, pthread, curl, lzma, z and bz2 libraries. - -A makefile is available along with source files which links statically to -htslib. To use dynamic linking, update the makefile's 'LDFLAGS' and 'rpath' -path. The 'rpath' path to be set as the path to lib directory of htslib -installation. - - -## Usage of HTS APIs -### Sequence data file access for read - -The sequence data file for read may be opened using the sam_open method. It -opens the file and returns samFile (htsFile) pointer on success or NULL on -failure. The input can be path to a file in disk, network, cloud or '-' -designating the standard input. - -SAM, BAM and CRAM file formats are supported and the input file format is -detected from the file content. - -Once done with the file, it needs to be closed with sam_close. - -Many times, header details would be required and can be read using -sam_hdr_read api. It returns sam_hdr_t pointer or NULL. The returned header -needs to be destroyed using sam_hdr_destroy when no longer required. - -The sequence data may be compressed or uncompressed on disk and on memory it -is read and kept as uncompressed BAM format. It can be read from a file using -sam_read1 api. samFile pointer, header and bam storage are to be passed as -argument and it returns 0 on success, -1 on end of file and < -1 in case of -errors. - -The bam storage has to be initialised using bam_init1 api before the call and -can be reused for successive reads. Once done, it needs to be destroyed using -bam_destroy1. The member field named core - bam1_core_t - in bam storage, -bam1_t, has the sequence data in an easily accessible way. Using the fields -and macros, data can easily be read from it. - - #include - - int main(int argc, char *argv[]) - { - ... - //initialize - if (!(bamdata = bam_init1())) { - ... - //open input files - r reading - if (!(infile = sam_open(inname, "r"))) { - ... - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - ... - //read data, check flags and update count - while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - if (bamdata->core.flag & BAM_FREAD1) { - cntread1++; - } - ... - //clean up - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - return ret; - } -Refer: flags_demo.c - -This shows the count of read1 and read2 alignments. - - ./flags /tmp/sample.sam.gz - -To read CRAM files, reference data is required and if it is not available, based -on configuration, library may try to download it from external repositories. - - -### Sequence data file access for write - -File access for write is similar to read with a few additional optional steps. - -The output file can be opened using sam_open api as in read, with "w" instead -of "r" as mode. This opens the file for writing and uses mode to select the -output file type. "w" alone denotes SAM, "wb" denotes BAM and "wc" denotes CRAM. - -Another way is to use sam_open_mode method, which sets the output file type and -compression based on the file name and explicit textual format specification. -This method expects a buffer to append type and compression flags. Usually a -buffer with standard file open flag is used, the buffer past the flag is passed -to the method to ensure existing flags and updates from this method are present -in the same buffer without being overwritten. This method will add more flags -indicating file type and compression based on name. If explicit format detail -given, then extension is ignored and the explicit specification is used. This -updated buffer can be used with sam_open to select the file format. - -sam_open_format method may also be used to open the file for output as more -information on the output file can be specified using this. Can use -mode buffer from sam_open_mode api or explicit format structure for this. - -The header data can be written using the sam_hdr_write api. When the header -data is copied to another variable and has different lifetime, it is good to -increase the reference count of the header using sam_hdr_incr_ref and -sam_hdr_destroy called as many times as required. - -The alignment data can be written using the sam_write1 api. It takes a samFile -pointer, header pointer and the alignment data. The header data is required to -set the reference name in the alignment. It returns -ve value on error. - - int main(int argc, char *argv[]) - { - ... - if (!(infile = sam_open(inname, "r"))) { - ... - outfile1 = sam_open(file1, "w"); //as SAM - outfile2 = sam_open(file2, "wb"); //as BAM - ... - if (!(in_samhdr = sam_hdr_read(infile))) { - ... - //write header - if ((sam_hdr_write(outfile1, in_samhdr) == -1) || - (sam_hdr_write(outfile2, in_samhdr) == -1)) { - ... - while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - if (bamdata->core.flag & BAM_FREAD1) { - if (sam_write1(outfile1, in_samhdr, bamdata) < 0) { - ... - } -Refer: split.c - -This creates 1.sam and 2.bam in /tmp/ containing read1 and read2 respectively. - - ./split /tmp/sample.sam.gz /tmp/ - -Below code excerpt shows sam_open_mode api usage. - - int main(int argc, char *argv[]) - { - ... - //set file open mode based on file name for 1st and as explicit for 2nd - if ((sam_open_mode(mode1+1, file1, NULL) == -1) || - (sam_open_mode(mode2+1, file2, "sam.gz") == -1)) { - ... - if (!(infile = sam_open(inname, "r"))) { - ... - //open output files - outfile1 = sam_open(file1, mode1); //as compressed SAM through sam_open - outfile2 = sam_open_format(file2, mode2, NULL); //as compressed SAM through sam_open_format - ... - } -Refer: split2.c - -This creates 1.sam.gz and 2.sam in /tmp/ both having compressed data. - - ./split2 /tmp/sample.sam.gz /tmp/ - -An htsFormat structure filled appropriately can also be used to specify output -file format while using sam_open_format api. - - -### CRAM writing - -CRAM files uses reference data and compresses alignment data. A CRAM file may -be created with external reference data file - most appropriate, with embedded -reference in it or with no reference data at all. It can also be created using -an autogenerated reference, based on consensus with-in the alignment data. -The reference detail can be set to an htsFormat structure using hts_parse_format -api and used with sam_open_format api to create appropriate CRAM file. - - ... - snprintf(reffmt1, size1, "cram,reference=%s", reffile); - snprintf(reffmt2, size2, "cram,embed_ref=1,reference=%s", reffile); - ... - if (hts_parse_format(&fmt1, reffmt1) == -1 || //using external reference - uses the M5/UR tags to get - reference data during read - hts_parse_format(&fmt2, reffmt2) == -1 || //embed the reference internally - hts_parse_format(&fmt3, "cram,embed_ref=2") == -1 || //embed autogenerated reference - hts_parse_format(&fmt4, "cram,no_ref=1") == -1) { //no reference data encoding at all - ... - outfile1 = sam_open_format(file1, "wc", &fmt1); outfile2 = sam_open_format(file2, "wc", &fmt2); - ... -Refer: cram.c - - -### FASTA/FASTQ data access - -FASTA/FASTQ files have the raw sequence data and the data can be read one by -one using sam_read1 or a selected range using a region. The data can be written -similar to alignment data using sam_write1 api. To write the file, format -can be set by updating mode buffer using sam_open_mode with file name -or explicit format text. This mode buffer can be used with sam_open or can be -used with sam_open_format with explicit format information in htsFormat -structure. - - ... - if (!(bamdata = bam_init1())) { - ... - if (!(infile = sam_open(inname, "r"))) { - ... - if (infile->format.format != fasta_format && infile->format.format != fastq_format) { - ... - if (!(in_samhdr = sam_hdr_read(infile))) { - ... - while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - printf("\nsequence: "); - for (c = 0; c < bamdata->core.l_qseq; ++c) { - printf("%c", seq_nt16_str[bam_seqi(bam_get_seq(bamdata), c)]); - } - if (infile->format.format == fastq_format) { - printf("\nquality: "); - for (c = 0; c < bamdata->core.l_qseq; ++c) { - printf("%c", bam_get_qual(bamdata)[c]); - ... -Refer: read_fast.c - - ... - char mode[4] = "a"; - ... - if (sam_open_mode(mode + 1, outname, NULL) < 0) { - ... - if (!(outfile = sam_open(outname, mode))) { - ... - if (bam_set1(bamdata, sizeof("test"), "test", BAM_FUNMAP, -1, -1, 0, 0, NULL, -1, -1, 0, 10, "AACTGACTGA", "1234567890", 0) - < 0) { - ... - if (sam_write1(outfile, out_samhdr, bamdata) < 0) { - printf("Failed to write data\n"); - ... -Refer: write_fast.c - - -### Header data read - -The header gives the version, reference details, read group, change history -and comments. These data are stored inside the sam_hdr_t. Each of these -entries, except comments, have their unique identifier and it is required to -access different fields of them. The api sam_hdr_count_lines gives the count -of the specified type of header line. The value of a unique identifier to a -specific type of header line can be retrieved with sam_hdr_line_name api. The -api sam_hdr_find_tag_id and sam_hdr_find_tag_pos can get the field data from a -header line using unique identifier values or using position. The full header -line can be retrieved using sam_hdr_find_line_pos or sam_hdr_line_id with -position and unique identifier values respectively. - - ... - if (!(in_samhdr = sam_hdr_read(infile))) { - ... - ret = sam_hdr_find_tag_id(in_samhdr, header, id, idval, tag, &data); - ... - ret = sam_hdr_find_line_id(in_samhdr, header, id, idval, &data); - ... - linecnt = sam_hdr_count_lines(in_samhdr, header); - ... - ret = sam_hdr_find_tag_pos(in_samhdr, header, c, tag, &data); - ... - ret = sam_hdr_find_line_pos(in_samhdr, header, c, &data); - ... -Refer: read_header.c - -This will show the VN tag's value from HD header. - - ./read_header /tmp/sample.sam.gz HD VN - -Shows the 2nd SQ line's LN field value. - - ./read_header /tmp/sample.sam.gz SQ SN T2 LN - -Below code excerpt shows the reference names which has length above given value. - - ... - linecnt = sam_hdr_count_lines(in_samhdr, "SQ"); //get reference count - ... - //iterate and check each reference's length - for (pos = 1, c = 0; c < linecnt; ++c) { - if ((ret = sam_hdr_find_tag_pos(in_samhdr, "SQ", c, "LN", &data) == -2)) { - ... - size = atoll(data.s); - if (size < minsize) { - //not required - continue; - } - if (!(id = sam_hdr_line_name(in_samhdr, "SQ", c))) { - //sam_hdr_find_tag_pos(in_samhdr, "SQ", c, "SN", &data) can also do the same! - ... - printf("%d,%s,%s\n", pos, id, data.s); - ... -Refer: read_refname.c - - -### Alignment data read - -The alignment / sequence data contains many fields. Mainly the read/query -name, flags indicating the properties of the read, reference sequence name, -position in reference to which it matches, quality of the read, CIGAR string -indicating the match status, position of mate / reverse strand, name of -reference sequence to which mate matches, the insert length, base sequence, -quality value of each base and auxiliary fields. - -Header data would be required to retrieve the reference names as alignment -contains the position of the reference in the header. - -A few of the data are directly visible in bam1_t and the rest are hidden -inside data member of bam1_t and can easily be retrieved using macros. -bam_get_qname gives the name of the read, sam_hdr_tid2name gives the reference -name. bam_get_cigar retrieves the cigar operation array, which can be decoded -using bam_cigar_oplen to get count of bases to which that operation applicable -and bam_cigar_opchr to get the cigar operation. bam_seqi retrieves the base -data at a given position in alignment and it can be converted to character by -indexing the seq_nt16_str array. - - ... - while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) - { - //QNAME FLAG RNAME POS MAPQ CIGAR RNEXT PNEXT TLEN SEQ QUAL [TAG:TYPE:VALUE] - printf("NAME: %s\n", bam_get_qname(bamdata)); //get the query name using the macro - flags = bam_flag2str(bamdata->core.flag); //flags as string - ... - tidname = sam_hdr_tid2name(in_samhdr, bamdata->core.tid); - ... - printf("MQUAL: %d\n", bamdata->core.qual); //map quality value - cigar = bam_get_cigar(bamdata); //retrieves the cigar data - for (i = 0; i < bamdata->core.n_cigar; ++i) { //no. of cigar data entries - printf("%d%c", bam_cigar_oplen(cigar[i]), bam_cigar_opchr(cigar[i])); //the macros gives the count of operation - and the symbol of operation for given cigar entry - } - printf("\nTLEN/ISIZE: %"PRIhts_pos"\n", bamdata->core.isize); - data = bam_get_seq(bamdata); - //get the sequence data - if (bamdata->core.l_qseq != bam_cigar2qlen(bamdata->core.n_cigar, cigar)) { //checks the length with CIGAR and query - ... - for (i = 0; i < bamdata->core.l_qseq ; ++i) { //sequence length - printf("%c", seq_nt16_str[bam_seqi(data, i)]); //retrieves the base from (internal compressed) sequence data - ... - printf("%c", bam_get_qual(bamdata)[i]+33); //retrives the quality value - ... -Refer: read_bam.c - -Shows the data from alignments. - - ./read_bam /tmp/sample.sam.gz - - -### Aux data read - -Auxiliary data gives extra information about the alignment. There can be a -number of such data and can be accessed by specifying required tag or by -iterating one by one through them once the alignment is read as bam1_t. The -auxiliary data are stored along with the variable length data in the data -field of bam1_t. There are macros defined to retrieve information about -auxiliary data from the data field of bam1_t. - -Data for a specific tag can be retrieved as a string or can be retrieved as raw -data. bam_aux_get_str retrieves as a string, with tag name, tag type and data. -bam_aux_get can get raw data and with bam_aux_type and bam_aux2A, bam_aux2f etc. -the raw data can be extracted. - -To iterate through all data, the start of aux data is retrieved using macro -bam_aux_first and successive ones using bam_aux_next. Macro bam_aux_tag gives -the tag of the aux field and bam_aux_type gives the information about type of -the aux field. - -Bam_aux2i, bam_aux2f, bam_aux2Z macros retrieve the aux data's value as -integer, float and string respectively. The integer value may be of different -precision / size and the bam_aux_type character indicates how to use the -value. The string/hex data are NULL terminated. - -For array data, bam_aux_type will return 'B' and bam_auxB_len gives the length -of the array. bam_aux_type with the next byte will give the type of data in -the array. bam_auxB2i, bam_auxB2f will give integer and float data from a -given position of the array. - - ... - while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - if (i % 2) { //use options alternatively to demonstrate both - //option 1 - get data as string with tag and type - if ((c = bam_aux_get_str(bamdata, tag, &sdata)) == 1) { - printf("%s\n",sdata.s); - ... - //option 2 - get raw data - if (!(data = bam_aux_get(bamdata, tag))) { - ... - if (printauxdata(stdout, bam_aux_type(data), -1, data) == EXIT_FAILURE) { - ... -Refer: read_aux.c - -Shows the MD aux tag from alignments. - - ./read_aux ../../samtools/test/mpileup/mpileup.1.bam MD - - ... - while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - data = bam_aux_first(bamdata); //get the first aux data - while (data) { - printf("%.2s:%c:", bam_aux_tag(data), NULL != strchr("cCsSiI", bam_aux_type(data)) ? 'i' : bam_aux_type(data)); - //macros gets the tag and type of aux data - //dump the data - if (printauxdata(stdout, bam_aux_type(data), -1, data) == EXIT_FAILURE) { - ... - data = bam_aux_next(bamdata, data); //get the next aux data - ... -Refer: dump_aux.c - -Shows all the tags from all alignments. - - ./dump_aux ../../samtools/test/mpileup/mpileup.1.bam - - -### Add/Remove/Update header - -There are specific types of data that can be part of header data. They have -a tag from HD, SQ, RG, PG and CO. Fully formatted header lines, separated by new -line, can be added with sam_hdr_add_lines api. A single header line can be added -using sam_hdr_add_line api where the header type, tag and value pair are passed -as arguments, terminated with a NULL argument. The PG header lines are special -that they have a kind of linkage to previous PG lines. This linkage can be auto -generated by using sam_hdr_add_pg api which sets the 'PP' field used in linkage. -sam_hdr_write api does the write of the header data to file. - - ... - //add SQ line with SN as TR1 and TR2 - if (sam_hdr_add_lines(in_samhdr, &sq[0], 0)) { //length as 0 for NULL terminated data - ... - //add RG line with ID as RG1 - if (sam_hdr_add_line(in_samhdr, "RG", "ID", "RG1", "LB", "Test", "SM", "S1", NULL)) { - ... - //add pg line - if (sam_hdr_add_pg(in_samhdr, "add_header", "VN", "Test", "CL", data.s, NULL)) { //NULL is to indicate end of args - ... - if (sam_hdr_add_line(in_samhdr, "CO", "Test data", NULL)) { //NULL is to indicate end of args - ... - //write output - if (sam_hdr_write(outfile, in_samhdr) < 0) { - ... -Refer: add_header.c - -Not all type of header data can be removed but where it is possible, either a -specific header line can be removed or all of a header type can be removed. To -remove a specific line, header type, unique identifier field tag and its value -to be used. To remove all lines of a type, header type and unique identifier -field tag are to be used. - - ... - //remove specific line - if (sam_hdr_remove_line_id(in_samhdr, header, id, idval)) { - ... - //remove multiple lines of a header type - if (sam_hdr_remove_lines(in_samhdr, header, id, NULL)) { - ... - if (sam_hdr_write(outfile, in_samhdr) < 0) { - ... -Refer: rem_header.c - -Shows the file content after removing SQ line with SN 2. - ./rem_header ../../samtools/test/mpileup/mpileup.1.bam SQ 2 - -The unique identifier for the line needs to be found to update a field, though -not all types in the header may be modifiable. The api sam_hdr_update_line -takes the unique identifier for the header line type, its value, the field -which needs to be modified and the new value with which to modify it, followed -by a NULL. -e.g. To change LN field from 2000 to 2250 in SQ line with unique identifier SN -as 'chr1', sam_hdr_update_line( header, "SQ", "SN", "chr1", "LN", "2250", -NULL). To change PP field from ABC to DEF in PG line with ID APP.10, -sam_hdr_update_line( header, "PG", "ID", "APP.10", "PP", "DEF", NULL). - - ... - //update with new data - if (sam_hdr_update_line(in_samhdr, header, id, idval, tag, val, NULL) < 0) { - printf("Failed to update data\n"); - goto end; - } - ... -Refer: update_header.c - -Shows new sam file with 2nd SQ line having length as 38. - - ./update_header /tmp/sample.sam.gz SQ T1 LN 38 - - -### Update alignment data - -Many of the bam data fields may be updated by setting new value to appropriate -field in bam1_core_t structure and for a few, creating a new bam1_t record would -be easier than update of existing record. - - ... - while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) - { - ... - case 1:// QNAME - ret = bam_set_qname(bamdata, val); - break; - case 2:// FLAG - bamdata->core.flag = atol(val) & 0xFFFF; - break; - case 3:// RNAME - case 7:// RNEXT - if ((ret = sam_hdr_name2tid(in_samhdr, val)) < 0) { - ... - if (field == 3) { - //reference - bamdata->core.tid = ret; - } - else { - //mate reference - bamdata->core.mtid = ret; - } - break; - case 4:// POS - bamdata->core.pos = atoll(val); - break; - case 5:// MAPQ - bamdata->core.qual = atoi(val) & 0x0FF; - break; - case 6:// CIGAR - { - ... - //get cigar array and set all data in new bam record - if ((ncigar = sam_parse_cigar(val, NULL, &cigar, &size)) < 0) { - ... - if (bam_set1(newbam, bamdata->core.l_qname, bam_get_qname(bamdata), bamdata->core.flag, bamdata->core.tid, - bamdata->core.pos, bamdata->core.qual, ncigar, cigar, bamdata->core.mtid, bamdata->core.mpos, - bamdata->core.isize, bamdata->core.l_qseq, (const char*)bam_get_seq(bamdata), - (const char*)bam_get_qual(bamdata), bam_get_l_aux(bamdata)) < 0) { - ... - //correct sequence data as input is expected in ascii format and not as compressed inside bam! - memcpy(bam_get_seq(newbam), bam_get_seq(bamdata), (bamdata->core.l_qseq + 1) / 2); - //copy the aux data - memcpy(bam_get_aux(newbam), bam_get_aux(bamdata), bam_get_l_aux(bamdata)); - ... - break; - case 8:// PNEXT - bamdata->core.mpos = atoll(val); - break; - case 9:// TLEN - bamdata->core.isize = atoll(val); - break; - case 10:// SEQ - ... - for( c = 0; c < i; ++c) { - bam_set_seqi(bam_get_seq(bamdata), c, seq_nt16_table[(unsigned char)val[c]]); - } - break; - case 11:// QUAL - ... - for (c = 0; c < i; ++c) { - val[c] -= 33; //phred score from ascii value - } - memcpy(bam_get_qual(bamdata), val, i); - ... -Refer: mod_bam.c - -Shows data with RNAME modified to T2. - - ./mod_bam /tmp/sample.sam ITR1 3 T2 - -The auxiliary data in bam1_t structure can be modified using -bam_aux_update_float, bam_aux_update_int etc. apis. If the aux field is not -present at all, it can be appended using bam_aux_append. - - ... - //matched to qname, update aux - if (!(data = bam_aux_get(bamdata, tag))) { - //tag not present append - ... - if (bam_aux_append(bamdata, tag, type, length, (const uint8_t*)val)) { - ... - else { - char auxtype = bam_aux_type(data); - //update the tag with newer value - switch (type) { - case 'f': - case 'd': - ... - if (bam_aux_update_float(bamdata, tag, atof(val))) { - ... - case 'C': - case 'S': - case 'I': - ... - if (bam_aux_update_int(bamdata, tag, atoll(val))) { - ... - case 'Z': - ... - if (bam_aux_update_str(bamdata, tag, length, val)) { - ... - case 'A': - ... - //update the char data directly on buffer - *(data+1) = val[0]; - ... -Refer: mod_aux.c - -Shows the given record's MD tag set to Test. - - ./mod_aux samtools/test/mpileup/mpileup.1.bam ERR013140.6157908 MD Z Test - -The array aux fields can be updated using bam_aux_update_array api. - - ... - if (bam_aux_update_array(bamdata, "BA", 'I', sizeof(cnt)/sizeof(cnt[0]), cnt)) { - ... -Refer: mod_aux_ba.c - -Shows the records updated with an array of integers, containing count of ACGT -and N in that order. - - ./mod_aux_ba samtools/test/mpileup/mpileup.1.bam - - -### Create an index - -Indexes help to read data faster without iterating sequentially through the -file. Indexes contain the position information about alignments and that they -can be read easily. There are different type of indices, BAI, CSI, CRAI, TBI, -FAI etc. and are usually used with iterators. - -Indexing of plain/textual files are not supported, compressed SAM&FASTA/Q, BAM, -and CRAM files can be indexed. CRAM files are indexed as .crai and the other two -can be indexed as .bai or .csi files. Each of these types have different -internal representations of the index information. Bai uses a fixed -configuration values where as csi has them dynamically updated based on the -alignment data. - -Indexes can be created either with save of alignment data or explicitly by -read of existing alignment file. - -To create index along with alignment write, the sam_idx_init api need to be -invoked before the start of alignment data write. This api takes the output -samFile pointer, header pointer, minimum shift and index file path. For BAI -index, the min shift has to be 0. - -At the end of write, sam_idx_save api need to be invoked to save the index. - - //write header - if (sam_hdr_write(outfile, in_samhdr)) { - ... - // initialize indexing, before start of write - if (sam_idx_init(outfile, in_samhdr, size, fileidx)) { - ... - if (sam_write1(outfile, in_samhdr, bamdata) < 0) { - ... - if (sam_idx_save(outfile)) { - ... -Refer:index_write.c - -Creates mpileup.1.bam and mpileup.1.bam.bai in /tmp/. - - ./idx_on_write ../../samtools/test/mpileup/mpileup.1.bam 0 /tmp/ - -To create index explicitly on an existing alignment data file, the -sam_index_build api or its alike can be used. sam_index_build takes the -alignment file path, min shift for the index and creates the index file in -same path. The output name will be based on the alignment file format and min -shift passed. - -The sam_index_build2 api takes the index file path as well and gives more -control than the previous one. The sam_index_build3 api provides an option to -configure the number of threads in index creation. - - -### Read with iterators - -Index file helps to read required data without sequentially accessing the file -and are required to use iterators. The interested reference, start and end -position etc. are required to read data with iterators. With index and these -information, an iterator is created and relevant alignments can be accessed by -iterating it. - -The api sam_index_load and the like does the index loading. It takes input -samFile pointer and file path. It loads the index file based on the input file -name, from the same path and with implicit index file extension - cram file -with .crai and others with .bai. The sam_index_load2 api accepts explicit path -to index file, which allows loading it from a different location and explicit -extensions. The sam_index_load3 api supports download/save of the index -locally from a remote location. These apis returns NULL on failure and index -pointer on success. - -The index file path can be appended to alignment file path and used as well. -In this case the paths are expected to be separated by '##idx##'. - -The sam_iter_queryi or sam_iter_querys apis may be used to create an iterator -and sam_itr_next api does the alignment data retrieval. Along with retrieval -of current data, it advances the iterator to next relevant data. The -sam_iter_queryi takes the interested positions as numeric values and -sam_iter_querys takes the interested position as a string. - -With sam_iter_queryi, the reference id can be the 0 based index of reference -data, -2 for unmapped alignments, -3 to start read from beginning of file, -4 -to continue from current position, -5 to return nothing. Based on the -reference id given, alignment covering the given start and end positions will -be read with sam_iter_next api. - -With sam_iter_querys, the reference sequence is identified with the name and -interested positions can be described with start and end separated by '-' as -string. When sequence is identified as '.', it begins from the start of file -and when it is '*', unmapped alignments are read. Reference with [:], -:S, :S-E, :-E retrieves all data, all data covering position -S onwards, all data covering position S to E, all data covering upto position -E of reference with ID respectively on read using sam_iter_next. - -The index and iterator created are to be destroyed once the need is over. -sam_itr_destroy and hts_idx_destroy apis does this. - - ... - //load index file - if (!(idx = sam_index_load2(infile, inname, idxfile))) { - ... - //create iterator - if (!(iter = sam_itr_querys(idx, in_samhdr, region))) { - ... - //read using iterator - while ((c = sam_itr_next(infile, iter, bamdata)) >= 0) { - ... - if (iter) { - sam_itr_destroy(iter); - } - if (idx) { - hts_idx_destroy(idx); - ... -Refer:index_reg_read.c - -With sample.sam, region as \* will show alignments with name UNMAP2 and UNMAP3 - - ./read_reg /tmp/sample.sam.gz \* - -With region as \., it shows all alignments - - ./read_reg /tmp/sample.sam.gz \. - -With region as T1:1-4, start 1 and end 4 it shows nothing and with T1:1-5 it -shows alignment with name ITR1. - - ./read_reg /tmp/sample.sam.gz T1:1-5 - -With region as T2:30-100, it shows alignment with name ITR2M which refers the -reference data T2. - - ./read_reg /tmp/sample.sam.gz T2:30-100 - - -Multiple interested regions can be specified for read using sam_itr_regarray. -It takes index path, header, count of regions and region descriptions as array -of char array / string. This array passed need to be released by the user -itself. - - ... - //load index file, assume it to be present in same location - if (!(idx = sam_index_load(infile, inname))) { - ... - //create iterator - if (!(iter = sam_itr_regarray(idx, in_samhdr, regions, regcnt))) { - ... - if (regions) { - //can be freed as it is no longer required - free(regions); - regions = NULL; - } - //get required area - while ((c = sam_itr_multi_next(infile, iter, bamdata) >= 0)) { - ... -Refer:index_multireg_read.c - -With compressed sample.sam and 2 regions from reference T1 (30 to 32) and 1 -region from T2 (34 onwards), alignments with name A1, B1, A2 and ITR2M would -be shown. - - ./read_multireg /tmp/sample.sam.gz 2 T1:30-32,T2:34 - -To use numeric indices instead of textual regions, sam_itr_regions can be used. -It takes index file path, header, count of regions and an array of region -description (hts_reglist_t*), which has the start end positions as numerals. - -The index and iterators are to be destroyed using the sam_itr_destroy and -hts_idx_destroy. The hts_reglist_t* array passed is destroyed by the library -on iterator destroy. The regions array (array of char array/string) needs to be -destroyed by the user itself. - - -### Pileup and MPileup - -Pileup shows the transposed view of the SAM alignment data, i.e. it shows the -the reference positions and bases which cover that position through different -reads side by side. MPileup facilitates the piling up of multiple sam files -against each other and same reference at the same time. - -Mpileup has replaced the pileup. The input expects the data to be sorted by -position. - -Pileup needs to be initialized with bam_pileup_init method which takes pointer -to a method, which will be called by pileup to read data from required files, -and pointer to data which might be required for this read method to do the -read operation. It returns a pointer to the pileup iterator. - -User can specify methods which need to be invoked during the load and unload -of an alignment, like constructor and destructor of objects. -Bam_plp_constructor and bam_plp_destructor methods does the setup of -these methods in the pileup iterator. During invocation of these methods, the -pointer to data passed in the initialization is passed as well. If user want -to do any custom status handling or actions during load or unload, it can be -done in these methods. Alignment specific data can be created and stored in -an argument passed to the constructor and the same will be accessible during -pileup status return. The same will be accessible during destructor as well -where any deallocation can be made. - -User is expected to invoke bam_plp_auto api to get the pileup status. It -returns the pileup status or NULL on end. During this all alignments are read -one by one, using the method given in initialization for data read, until one -for a new reference is found or all alignment covering a position is read. On -such condition, the pileup status is returned and the same continuous on next -bam_plp_auto call. The pileup status returned is an array for all positions -for which the processing is completed. Along with the result, the reference -index, position in reference data and number of alignments which covers this -position are passed. User can iterate the result array and get bases from each -alignment which covers the given reference position. The alignment specific -custom data which were created in constructor function will also be available -in the result. - -The bam_plp_auto api invokes the data read method to load an alignment and the -constructor method is invoked during the load. Once the end of alignment is -passed, it is removed from the processing and destructor method is invoked, -that user could do deallocations and custom actions as in load during this -time. The custom data passed during the initialization is passed to the -constructor and destructor methods during invocation. - -Once the forward and reverse strands are identified, the better of the quality -is identified and used. Both reads are required for this and hence reads are -cached until its mate is read. The maximum number of reads that can be cached -is controlled by bam_plp_set_maxcnt. Reads covering a position are cached and -as soon as mate is found, quality is adjusted and is removed from cache. Reads -above the cache limit are discarded. - -Once done, the pileup iterator to be discarded by sam_plp_destroy api. - - ... - if (!(plpiter = bam_plp_init(readdata, &conf))) { - ... - //set constructor destructor callbacks - bam_plp_constructor(plpiter, plpconstructor); - bam_plp_destructor(plpiter, plpdestructor); - - while ((plp = bam_plp_auto(plpiter, &tid, &refpos, &n))) { - printf("%d\t%d\t", tid+1, refpos+1); - for (j = 0; j < n; ++j) { - //doesnt detect succeeding insertion and deletion together here, only insertion is identified - //deletion is detected in plp->is_del as and when pos reaches the position - //if detection ahead is required, use bam_plp_insertion here which gives deletion length along with insertion - if (plp[j].is_del || plp[j].is_refskip) { - printf("*"); - continue; - } - //start and end are displayed in UPPER and rest on LOWER - printf("%c", plp[j].is_head ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]) : - (plp[j].is_tail ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]) : - tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]))); - if (plp[j].indel > 0) { - //insertions, anyway not start or end - printf("+%d", plp[j].indel); - for (k = 0; k < plp[j].indel; ++k) { - printf("%c", tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos + k + 1)])); - } - } - else if (plp[j].indel < 0) { - printf("%d", plp[j].indel); - for (k = 0; k < -plp[j].indel; ++k) { - printf("?"); - } - ... - if (plpiter) { - bam_plp_destroy(plpiter); - ... -Refer:pileup.c - -The read method may use a simple read or it could be an advanced read using -indices, iterators and region specifications based on the need. The constructor -method may create any custom data and store it in the pointer passed to it. The -same need to be released by use on destructor method. - -MPileup works same as the pileup and supports multiple inputs against the same -reference, giving side by side view of reference and alignments from different -inputs. - -MPileup needs to be initialized with bam_mpileup_init method which takes -pointer to a method, which will be called by pileup to read data from required -files, and an array of pointer to data which might be required for this read -method to do the read operation. It returns a pointer to the mpileup iterator. - -User can specify methods which need to be invoked during the load and unload -of an alignment, like constructor and destructor of objects. -bam_mplp_constructor and bam_mplp_destructor methods does the setup -of these methods in the pileup iterator. During invocation of these methods, -the pointer to data passed in the initialization is passed as well. If user -want to do any custom status handling or actions during load or unload, it can -be done on these methods. Alignment specific data can be created and -stored in the custom data pointer and the same will be accessible during -return of pileup status. The same will be accessible during destructor as well -where any deallocation can be made. - -User is expected to invoke bam_mplp_auto api to get the pileup status. It -returns the pileup status. During this all alignments are read one by one, -using the method given in initialization for data read, until one for a new -reference is found or all alignment covering a position is read. On such -condition, the pileup status is returned and the same continuous on next -bam_mplp_auto call. - -The pileup status is returned through a parameter in the method itself, is an -array for all inputs, each containing array for positions on which the -processing is completed. Along with the result, the reference index, position -in reference data and number of alignments which covers this position are -passed. User can iterate the result array and get bases from each alignment -which covers the given reference position. The alignment specific custom data -which were created in constructor function will also be available in the -result. - -Once the forward and reverse strands are identified, the better of the quality -is identified and used. Both reads are required for this and hence reads are -cached until its mate is read. The maximum number of reads that can be cached -is controlled by bam_mplp_set_maxcnt. Reads covering a position are cached and -as soon as mate is found, quality is adjusted and is removed from cache. Reads -above the cache limit are discarded. - -Once done, the pileup iterator to be discarded by sam_mplp_destroy api. - - ... - if (!(mplpiter = bam_mplp_init(argc - 1, readdata, (void**) conf))) { - ... - //set constructor destructor callbacks - bam_mplp_constructor(mplpiter, plpconstructor); - bam_mplp_destructor(mplpiter, plpdestructor); - - while (bam_mplp64_auto(mplpiter, &tid, &refpos, depth, plp) > 0) { - printf("%d\t%"PRIhts_pos"\t", tid+1, refpos+1); - - for (input = 0; input < argc - 1; ++input) { - for (dpt = 0; dpt < depth[input]; ++dpt) { - if (plp[input][dpt].is_del || plp[input][dpt].is_refskip) { - printf("*"); - continue; - } - //start and end are displayed in UPPER and rest on LOWER - printf("%c", plp[input][dpt].is_head ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[input][dpt].b), - plp[input][dpt].qpos)]) : (plp[input]->is_tail ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[input][dpt].b), - plp[input][dpt].qpos)]) : tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[input][dpt].b), - plp[input][dpt].qpos)]))); - if (plp[input][dpt].indel > 0) { - //insertions, anyway not start or end - printf("+%d", plp[input][dpt].indel); - for (k = 0; k < plp[input][dpt].indel; ++k) { - printf("%c", tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[input][dpt].b), - plp[input][dpt].qpos + k + 1)])); - } - } - else if (plp[input][dpt].indel < 0) { - printf("%d", plp[input][dpt].indel); - for (k = 0; k < -plp[input][dpt].indel; ++k) { - printf("?"); - ... - if (mplpiter) { - bam_mplp_destroy(mplpiter); - } - ... - if (plp) { - free(plp); - ... -Refer:mpileup.c - -This sample takes multiple sam files and shows the pileup of data side by side. - - ./mpileup /tmp/mp.bam /tmp/mp.sam - - -### Base modifications - -The alignment data may contain base modification information as well. This -gives the base, modifications found, orientation in which it was found and the -quality for the modification. The base modification can be identified using -hts_parse_basemod api. It stores the modification details on hts_base_mod_state -and this has to be initialized using hts_base_mod_state_alloc api. - -Once the modifications are identified, they can be accessed through different -ways. bam_mods_recorded api gives the modifications identified for an alignment. -Modifications can be queried for each base position iteratively using -bam_mods_at_next_pos api. Check the returned value with buffer size to see -whether the buffer is big enough to retrieve all modifications. -Instead of querying for each position, the next modified position can be -directly retrieved directly using bam_next_basemod api. An alignment can be -queried to have a specific modification using bam_mods_query_type api. At the -end of processing, the state need to be released using hts_base_mod_state_free -api. - - ... - if (!(ms = hts_base_mod_state_alloc())) { - ... - while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) - { - ... - if (bam_parse_basemod(bamdata, ms)) { - ... - bm = bam_mods_recorded(ms, &cnt); - for (k = 0; k < cnt; ++k) { - printf("%c", bm[k]); - } - printf("\n"); - hts_base_mod mod[5] = {0}; //for ATCGN - if (opt) { - //option 1 - for (; i < bamdata->core.l_qseq; ++i) { - if ((r = bam_mods_at_next_pos(bamdata, ms, mod, sizeof(mod)/sizeof(mod[0]))) <= -1) { - printf("Failed to get modifications\n"); - goto end; - } - else if (r > (sizeof(mod) / sizeof(mod[0]))) { - printf("More modifications than this app can handle, update the app\n"); - goto end; - } - else if (!r) { - //no modification at this pos - printf("%c", seq_nt16_str[bam_seqi(data, i)]); - } - //modifications - for (j = 0; j < r; ++j) { - printf("%c%c%c", mod[j].canonical_base, mod[j].strand ? '-' : '+', mod[j].modified_base); - ... - else { - //option 2 - while ((r = bam_next_basemod(bamdata, ms, mod, sizeof(mod)/sizeof(mod[0]), &pos)) >= 0) { - for (; i < bamdata->core.l_qseq && i < pos; ++i) { - printf("%c", seq_nt16_str[bam_seqi(data, i)]); - } - //modifications - for (j = 0; j < r; ++j) { - printf("%c%c%c", mod[j].canonical_base, mod[j].strand ? '-' : '+', mod[j].modified_base); - } - ... - //check last alignment's base modification - int strand = 0, impl = 0; - char canonical = 0, modification[] = "mhfcgebaon"; //possible modifications - printf("\n\nLast alignment has \n"); - for (k = 0; k < sizeof(modification) - 1; ++k) { //avoiding NUL termination - if (bam_mods_query_type(ms, modification[k], &strand, &impl, &canonical)) { - printf ("No modification of %c type\n", modification[k]); - } - else { - printf("%s strand has %c modified with %c, can %sassume unlisted as unmodified\n", strand ? "-/bottom/reverse" : - "+/top/forward", canonical, modification[k], impl?"" : "not " ); - } - } - ... - if (ms) { - hts_base_mod_state_free(ms); - ... -Refer:modstate.c - -The modification can be accessed in pileup mode as well. bam_mods_at_qpos gives -the modification at given pileup position. Insertion and deletion to the given -position with possible modification can be retrieved using bam_plp_insertion_mod -api. - - ... - int plpconstructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { - //when using cd, initialize and use as it will be reused after destructor - cd->p = hts_base_mod_state_alloc(); - //parse the bam data and gather modification data from MM tags - return (-1 == bam_parse_basemod(b, (hts_base_mod_state*)cd->p)) ? 1 : 0; - } - - int plpdestructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { - if (cd->p) { - hts_base_mod_state_free((hts_base_mod_state *)cd->p); - cd->p = NULL; - } - return 0; - } - - int main(int argc, char *argv[]) - { - ... - if (!(plpiter = bam_plp_init(readdata, &conf))) { - ... - //set constructor destructor callbacks - bam_plp_constructor(plpiter, plpconstructor); - bam_plp_destructor(plpiter, plpdestructor); - - while ((plp = bam_plp_auto(plpiter, &tid, &refpos, &depth))) { - memset(&mods, 0, sizeof(mods)); - printf("%d\t%d\t", tid+1, refpos+1); - - for (j = 0; j < depth; ++j) { - dellen = 0; - if (plp[j].is_del || plp[j].is_refskip) { - printf("*"); - continue; - } - /*invoke bam mods_mods_at_qpos before bam_plp_insertion_mod that the base modification - is retrieved before change in pileup pos thr' plp_insertion_mod call*/ - if ((modlen = bam_mods_at_qpos(plp[j].b, plp[j].qpos, plp[j].cd.p, mods, NMODS)) == -1) { - ... - //use plp_insertion/_mod to get insertion and del at the same position - if ((inslen = bam_plp_insertion_mod(&plp[j], (hts_base_mod_state*)plp[j].cd.p, &insdata, &dellen)) == -1) { - ... - //start and end are displayed in UPPER and rest on LOWER, only 1st modification considered - //base and modification - printf("%c%c%c", plp[j].is_head ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]) : - (plp[j].is_tail ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]) : - tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)])), - modlen > 0 ? mods[0].strand ? '-' : '+' : '\0', modlen > 0 ? mods[0].modified_base : '\0'); - //insertion and deletions - if (plp[j].indel > 0) { - //insertion - /*insertion data from plp_insertion_mod, note this shows the quality value as well - which is different from base and modification above;the lower case display is not attempted either*/ - printf("+%d%s", plp[j].indel, insdata.s); - //handle deletion if any - if (dellen) { - printf("-%d", dellen); - for (k = 0; k < dellen; ++k) { - printf("?"); - ... - else if (plp[j].indel < 0) { - //deletion - printf("%d", plp[j].indel); - for (k = 0; k < -plp[j].indel; ++k) { - printf("?"); - } - } - ... -Refer:pileup_mod.c - - -### Read selected fields - -At times the whole alignment data may not be of interest and it would be -better to read required fields alone from the alignment data. CRAM file format -supports such specific data read and HTSLib provides an option to use this. -This can improve the performance on read operation. - -The hts_set_opt method does the selection of specified fields. There are flags -indicating specific fields, like SAM_FLAG, SAM_SEQ, SAM_QNAME, in alignment -data and a combination of flags for the required fields can be passed with -CRAM_OPT_REQUIRED_FIELDS to this api. - - ... - //select required field alone, this is useful for CRAM alone - if (hts_set_opt(infile, CRAM_OPT_REQUIRED_FIELDS, SAM_FLAG) < 0) { - ... - //read header - in_samhdr = sam_hdr_read(infile); - ... - //read data, check flags and update count - while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - if (bamdata->core.flag & BAM_FREAD1) { - cntread1++; - ... -Refer: flags_htsopt_field.c - - -### Thread-pool to read / write - -The HTSLib api supports thread pooling for better performance. There are a few -ways in which this can be used. The pool can be made specific for a file or a -generic pool can be created and shared across multiple files. Another way to -use thread pool is to schedule tasks explicitly to queues which gets executed -using threads in pool. - -To have a thread pool specific for a file, hts_set_opt api can be used with the -file pointer, HTS_OPT_NTHREADS and the number of threads to use in the pool. -Closure of file releases the thread pool as well. To have a thread pool which -can be shared across different files, it needs to be initialized using -hts_tpool_init api, passing number of threads as argument. This thread pool can -be associated with a file using hts_set_opt api. The file pointer, -HTS_OPT_THREAD_POOL and the thread pool address are to be passed as arguments -to api. The thread pool has to be released with hts_tpool_destroy. - -Below excerpt shows file specific thread pool, - - ... - //create file specific threads - if (hts_set_opt(infile, HTS_OPT_NTHREADS, 2) < 0 || //2 thread specific for reading - hts_set_opt(outfile1, HTS_OPT_NTHREADS, 1) < 0 || //1 thread specific for sam write - hts_set_opt(outfile2, HTS_OPT_NTHREADS, 1) < 0) { //1 thread specific for bam write - printf("Failed to set thread options\n"); - goto end; - } -Refer: split_thread1.c - -Below excerpt shows thread pool shared across files, - - ... - //create a pool of 4 threads - if (!(tpool.pool = hts_tpool_init(4))) { - ... - //share the pool with all the 3 files - if (hts_set_opt(infile, HTS_OPT_THREAD_POOL, &tpool) < 0 || - hts_set_opt(outfile1, HTS_OPT_THREAD_POOL, &tpool) < 0 || - hts_set_opt(outfile2, HTS_OPT_THREAD_POOL, &tpool) < 0) { - ... - if (tpool.pool) { - hts_tpool_destroy(tpool.pool); - } - ... -Refer: split_thread2.c - - -## More Information - -### CRAM reference files - -The cram reference data is required for the read of sequence data in CRAM -format. The sequence data file may have it as embedded or as a reference to -the actual file. When it is a reference, it is downloaded locally, in the -cache directory for later usage. It will be stored in a directory structure -based on the MD5 checksum in the cache directory. - -Each chromosome in a reference file gets saved as a separate file with md5sum -as its path and name. The initial 4 numerals make the directory name and rest -as the file name (/<1st 2 of md5sum>/<2nd 2 of md5sum>/). - -The download would be attempted from standard location, EBI ENA -(https://www.ebi.ac.uk/ena). - - -### Bam1_t - -This structure holds the sequence data in BAM format. There are fixed and -variable size fields, basic and extended information on sequence -data. Variable size data and extended information are kept together in a -buffer, named data in bam1_t. Fields in the member named core, bam1_core_t, -and a few macros together support the storage and handling of the whole -sequence data. - -- core has a link to reference as a 0 based index in field tid. The mate / - reverse strand's link to reference is given by mtid. - -- Field pos and mpos gives the position in reference to which the sequence and - its mate / reverse strand match. - -- Field flag gives the properties of the given alignment. It shows the - alignment's orientation, mate status, read order etc. - -- Field qual gives the quality of the alignment read. - -- l_qname gives the length of the name of the alignment / read, l_extranul gives - the extra space used internally in the data field. - -- l_qseq gives the length of the alignment / read in the data field. - --- n_cigar gives the number of CIGAR operations for the given alignment. - -- isize gives the insert size of the read / alignment. - -The bases in sequence data are stored by compressing 2 bases together in a -byte. When the reverse flag is set, the base data is reversed and -complemented from the actual read (i.e. if the forward read is ACTG, the -reverse read to be CAGT; it will be stored in SAM format with reversed and -complemented format as ACTG with reverse flag set). - -Macros bam_get_qname, bam_get_seq, bam_get_qual, bam_get_aux, bam_get_l_aux, -bam_seqi etc access the data field and retrieve the required data. The aux -macros support the retrieval of auxiliary data from the data field. - - -### Sam_hdr_t - -This structure holds the header information. This holds the number of targets -/ SQ lines in the file, each one's length, name and reference count to this -structure. It also has this information in an internal data structure for -easier access of each field of this data. - -When this data is shared or assigned to another variable of a different scope -or purpose, the reference count needs to be incremented to ensure that it is -valid till the end of the variable's scope. sam_hdr_incr_ref and it needs to -be destroyed as many times with sam_hdr_destroy api. - - -### Index - -Indices need the data to be sorted by position. They can be of different -types with extension .bai, .csi or .tbi for compressed SAM/BAM files and .crai -for CRAM files. The index name can be passed along with the alignment file -itself by appending a specific character sequence. The apis can detect this -sequence and extract the index path. ##idx## is the sequence which separates -the file path and index path. - - -### Data files - -The data files can be a local file, a network file, a file accessible through -the web or in cloud storage like google and amazon. The data files can be -represented with URIs like file://, file://localhost/.., ,ftp://.., -gs+http[s].., s3+http[s]:// - diff --git a/src/htslib-1.18/samples/Makefile b/src/htslib-1.18/samples/Makefile deleted file mode 100644 index 40991d7..0000000 --- a/src/htslib-1.18/samples/Makefile +++ /dev/null @@ -1,106 +0,0 @@ -HTS_DIR = ../ -include $(HTS_DIR)/htslib_static.mk - -CC = gcc -CFLAGS = -Wall -g -O0 - -#to statically link to libhts -LDFLAGS = $(HTS_DIR)/libhts.a -L$(HTS_DIR) $(HTSLIB_static_LDFLAGS) $(HTSLIB_static_LIBS) - -#to dynamically link to libhts -#LDFLAGS = -L $(HTS_DIR) -lhts -Wl,-rpath, - -PRGS = flags split split2 cram read_fast read_header read_ref read_bam \ - read_aux dump_aux add_header rem_header update_header mod_bam mod_aux \ - mod_aux_ba write_fast idx_on_write read_reg read_multireg pileup \ - mpileup modstate pileup_mod flags_field split_t1 split_t2 - -all: $(PRGS) - -flags: - $(CC) $(CFLAGS) -I $(HTS_DIR) flags_demo.c -o $@ $(LDFLAGS) - -split: - $(CC) $(CFLAGS) -I $(HTS_DIR) split.c -o $@ $(LDFLAGS) - -split2: - $(CC) $(CFLAGS) -I $(HTS_DIR) split2.c -o $@ $(LDFLAGS) - -cram: - $(CC) $(CFLAGS) -I $(HTS_DIR) cram.c -o $@ $(LDFLAGS) - -read_fast: - $(CC) $(CFLAGS) -I $(HTS_DIR) read_fast.c -o $@ $(LDFLAGS) - -read_header: - $(CC) $(CFLAGS) -I $(HTS_DIR) read_header.c -o $@ $(LDFLAGS) - -read_ref: - $(CC) $(CFLAGS) -I $(HTS_DIR) read_refname.c -o $@ $(LDFLAGS) - -read_bam: - $(CC) $(CFLAGS) -I $(HTS_DIR) read_bam.c -o $@ $(LDFLAGS) - -read_aux: - $(CC) $(CFLAGS) -I $(HTS_DIR) read_aux.c -o $@ $(LDFLAGS) - -dump_aux: - $(CC) $(CFLAGS) -I $(HTS_DIR) dump_aux.c -o $@ $(LDFLAGS) - -add_header: - $(CC) $(CFLAGS) -I $(HTS_DIR) add_header.c -o $@ $(LDFLAGS) - -rem_header: - $(CC) $(CFLAGS) -I $(HTS_DIR) rem_header.c -o $@ $(LDFLAGS) - -update_header: - $(CC) $(CFLAGS) -I $(HTS_DIR) update_header.c -o $@ $(LDFLAGS) - -mod_bam: - $(CC) $(CFLAGS) -I $(HTS_DIR) mod_bam.c -o $@ $(LDFLAGS) - -mod_aux: - $(CC) $(CFLAGS) -I $(HTS_DIR) mod_aux.c -o $@ $(LDFLAGS) - -mod_aux_ba: - $(CC) $(CFLAGS) -I $(HTS_DIR) mod_aux_ba.c -o $@ $(LDFLAGS) - -write_fast: - $(CC) $(CFLAGS) -I $(HTS_DIR) write_fast.c -o $@ $(LDFLAGS) - -idx_on_write: - $(CC) $(CFLAGS) -I $(HTS_DIR) index_write.c -o $@ $(LDFLAGS) - -read_reg: - $(CC) $(CFLAGS) -I $(HTS_DIR) index_reg_read.c -o $@ $(LDFLAGS) - -read_multireg: - $(CC) $(CFLAGS) -I $(HTS_DIR) index_multireg_read.c -o $@ $(LDFLAGS) - -pileup: - $(CC) $(CFLAGS) -I $(HTS_DIR) pileup.c -o $@ $(LDFLAGS) - -mpileup: - $(CC) $(CFLAGS) -I $(HTS_DIR) mpileup.c -o $@ $(LDFLAGS) - -modstate: - $(CC) $(CFLAGS) -I $(HTS_DIR) modstate.c -o $@ $(LDFLAGS) - -pileup_mod: - $(CC) $(CFLAGS) -I $(HTS_DIR) pileup_mod.c -o $@ $(LDFLAGS) - -flags_field: - $(CC) $(CFLAGS) -I $(HTS_DIR) flags_htsopt_field.c -o $@ $(LDFLAGS) - -split_t1: - $(CC) $(CFLAGS) -I $(HTS_DIR) split_thread1.c -o $@ $(LDFLAGS) - -split_t2: - $(CC) $(CFLAGS) -I $(HTS_DIR) split_thread2.c -o $@ $(LDFLAGS) - -clean: - find . -name "*.o" | xargs rm -rf - find . -name "*.dSYM" | xargs rm -rf - rm $(PRGS) - - diff --git a/src/htslib-1.18/samples/add_header.c b/src/htslib-1.18/samples/add_header.c deleted file mode 100644 index d1a2fc1..0000000 --- a/src/htslib-1.18/samples/add_header.c +++ /dev/null @@ -1,128 +0,0 @@ -/* add_header.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: add_header infile\n\ -Adds new header lines of SQ, RG, PG and CO typs\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, sq[] = "@SQ\tSN:TR1\tLN:100\n@SQ\tSN:TR2\tLN:50"; - int c = 0, ret = EXIT_FAILURE; - samFile *infile = NULL, *outfile = NULL; - sam_hdr_t *in_samhdr = NULL; - kstring_t data = KS_INITIALIZE; - - //update_header infile header idval tag value - if (argc != 2) { - print_usage(stderr); - goto end; - } - inname = argv[1]; - - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - if (!(outfile = sam_open("-", "w"))) { //use stdout as the output file for ease of display of update - printf("Could not open stdout\n"); - goto end; - } - - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - - //dump command line arguments for PG line - for (c = 0; c < argc; ++c) { - kputs(argv[c], &data); - kputc(' ', &data); - } - - //add SQ line with SN as TR1 and TR2 - if (sam_hdr_add_lines(in_samhdr, &sq[0], 0)) { //length as 0 for NULL terminated data - printf("Failed to add SQ lines\n"); - goto end; - } - - //add RG line with ID as RG1 - if (sam_hdr_add_line(in_samhdr, "RG", "ID", "RG1", "LB", "Test", "SM", "S1", NULL)) { - printf("Failed to add RG line\n"); - goto end; - } - - //add pg line - if (sam_hdr_add_pg(in_samhdr, "add_header", "VN", "Test", "CL", data.s, NULL)) { //NULL is to indicate end of args - printf("Failed to add PG line\n"); - goto end; - } - - if (sam_hdr_add_line(in_samhdr, "CO", "Test data", NULL)) { //NULL is to indicate end of args - printf("Failed to add PG line\n"); - goto end; - } - - //write output - if (sam_hdr_write(outfile, in_samhdr) < 0) { - printf("Failed to write output\n"); - goto end; - } - ret = EXIT_SUCCESS; - //bam data write to follow.... -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (outfile) { - sam_close(outfile); - } - ks_free(&data); - return ret; -} diff --git a/src/htslib-1.18/samples/cram.c b/src/htslib-1.18/samples/cram.c deleted file mode 100644 index 5f55e65..0000000 --- a/src/htslib-1.18/samples/cram.c +++ /dev/null @@ -1,168 +0,0 @@ -/* cram.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: cram infile reffile outdir\n\ -Dumps the input file alignments in cram format in given directory\n\ -1.cram has external reference\n\ -2.cram has reference embedded\n\ -3.cram has autogenerated reference\n\ -4.cram has no reference data in it\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *outdir = NULL, *reffile = NULL; - char *file1 = NULL, *file2 = NULL, *file3 = NULL, *file4 = NULL, *reffmt1 = NULL, *reffmt2 = NULL; - int c = 0, ret = EXIT_FAILURE, size1 = 0, size2 = 0, size3 = 0; - samFile *infile = NULL, *outfile1 = NULL, *outfile2 = NULL, *outfile3 = NULL, *outfile4 = NULL; - sam_hdr_t *in_samhdr = NULL; - bam1_t *bamdata = NULL; - htsFormat fmt1 = {0}, fmt2 = {0}, fmt3 = {0}, fmt4 = {0}; - - //cram infile reffile outdir - if (argc != 4) { - print_usage(stdout); - goto end; - } - inname = argv[1]; - reffile = argv[2]; - outdir = argv[3]; - - //allocate space for option string and output file names - size1 = sizeof(char) * (strlen(reffile) + sizeof("cram,reference=") + 1); - size2 = sizeof(char) * (strlen(reffile) + sizeof("cram,embed_ref=1,reference=") + 1); - size3 = sizeof(char) * (strlen(outdir) + sizeof("/1.cram") + 1); - - reffmt1 = malloc(size1); reffmt2 = malloc(size2); - file1 = malloc(size3); file2 = malloc(size3); - file3 = malloc(size3); file4 = malloc(size3); - - if (!file1 || !file2 || !file3 || !file4 || !reffmt1 || !reffmt2) { - printf("Failed to create buffers\n"); - goto end; - } - - snprintf(reffmt1, size1, "cram,reference=%s", reffile); - snprintf(reffmt2, size2, "cram,embed_ref=1,reference=%s", reffile); - snprintf(file1, size3, "%s/1.cram", outdir); snprintf(file2, size3, "%s/2.cram", outdir); - snprintf(file3, size3, "%s/3.cram", outdir); snprintf(file4, size3, "%s/4.cram", outdir); - - if (hts_parse_format(&fmt1, reffmt1) == -1 || //using external reference - uses the M5/UR tags to get reference data during read - hts_parse_format(&fmt2, reffmt2) == -1 || //embed the reference internally - hts_parse_format(&fmt3, "cram,embed_ref=2") == -1 || //embed autogenerated reference - hts_parse_format(&fmt4, "cram,no_ref=1") == -1) { //no reference data encoding at all - printf("Failed to set output option\n"); - goto end; - } - - //bam data storage - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - //open input file - r reading - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - //open output files - w write as SAM, wb write as BAM, wc as CRAM (equivalent to fmt3) - outfile1 = sam_open_format(file1, "wc", &fmt1); outfile2 = sam_open_format(file2, "wc", &fmt2); - outfile3 = sam_open_format(file3, "wc", &fmt3); outfile4 = sam_open_format(file4, "wc", &fmt4); - if (!outfile1 || !outfile2 || !outfile3 || !outfile4) { - printf("Could not open output file\n"); - goto end; - } - - //read header, required to resolve the target names to proper ids - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - //write header - if ((sam_hdr_write(outfile1, in_samhdr) == -1) || (sam_hdr_write(outfile2, in_samhdr) == -1) || - (sam_hdr_write(outfile3, in_samhdr) == -1) || (sam_hdr_write(outfile4, in_samhdr) == -1)) { - printf("Failed to write header\n"); - goto end; - } - - //check flags and write - while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - if (sam_write1(outfile1, in_samhdr, bamdata) < 0 || - sam_write1(outfile2, in_samhdr, bamdata) < 0 || - sam_write1(outfile3, in_samhdr, bamdata) < 0 || - sam_write1(outfile4, in_samhdr, bamdata) < 0) { - printf("Failed to write output data\n"); - goto end; - } - } - if (-1 == c) { - //EOF - ret = EXIT_SUCCESS; - } - else { - printf("Error in reading data\n"); - } -end: -#define IF_OL(X,Y) if((X)) {(Y);} //if one liner - //cleanup - IF_OL(in_samhdr, sam_hdr_destroy(in_samhdr)); - IF_OL(infile, sam_close(infile)); - IF_OL(outfile1, sam_close(outfile1)); - IF_OL(outfile2, sam_close(outfile2)); - IF_OL(outfile3, sam_close(outfile3)); - IF_OL(outfile4, sam_close(outfile4)); - IF_OL(file1, free(file1)); - IF_OL(file2, free(file2)); - IF_OL(file3, free(file3)); - IF_OL(file4, free(file4)); - IF_OL(reffmt1, free(reffmt1)); - IF_OL(reffmt2, free(reffmt2)); - IF_OL(fmt1.specific, hts_opt_free(fmt1.specific)); - IF_OL(fmt2.specific, hts_opt_free(fmt2.specific)); - IF_OL(fmt3.specific, hts_opt_free(fmt3.specific)); - IF_OL(fmt4.specific, hts_opt_free(fmt4.specific)); - IF_OL(bamdata, bam_destroy1(bamdata)); - - return ret; -} diff --git a/src/htslib-1.18/samples/dump_aux.c b/src/htslib-1.18/samples/dump_aux.c deleted file mode 100644 index 49251fe..0000000 --- a/src/htslib-1.18/samples/dump_aux.c +++ /dev/null @@ -1,188 +0,0 @@ -/* dump_aux.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: dump_aux infile\n\ -Dump the aux tags from alignments\n"); -} - -/// printauxdata - prints aux data -/** @param fp - file to which it to be printed - stdout or null - * @param type - aux type - * @param idx - index in array, -1 when not an array type - * @param data - data - * recurses when the data is array type -returns 1 on failure 0 on success -*/ -int printauxdata(FILE *fp, char type, int32_t idx, const uint8_t *data) -{ - uint32_t auxBcnt = 0; - int i = 0; - char auxBType = 'Z'; - - //the tag is already queried and ensured to exist and the type is retrieved from the tag data, also iterated within index for arrays, so no error is expected here. - //when these apis are used explicitly, these error conditions needs to be handled based on return value and errno - switch(type) { - case 'A': - fprintf(fp, "%c", bam_aux2A(data)); //byte data - break; - case 'c': - fprintf(fp, "%d", (int8_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //signed 1 byte data; bam_auxB2i - from array or bam_aux2i - non array data - break; - case 'C': - fprintf(fp, "%u", (uint8_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //unsigned 1 byte data - break; - case 's': - fprintf(fp, "%d", (int16_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //signed 2 byte data - break; - case 'S': - fprintf(fp, "%u", (uint16_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //unsigned 2 byte data - break; - case 'i': - fprintf(fp, "%d", (int32_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //signed 4 byte data - break; - case 'I': - fprintf(fp, "%u", (uint32_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //unsigned 4 byte data - break; - case 'f': - case 'd': - fprintf(fp, "%g", (float)(idx > -1 ? bam_auxB2f(data, idx) : bam_aux2f(data))); //floating point data, 4 bytes - break; - case 'H': - case 'Z': - fprintf(fp, "%s", bam_aux2Z(data)); //array of char or hex data - break; - case 'B': //array of char/int/float - auxBcnt = bam_auxB_len(data); //length of array - auxBType = bam_aux_type(data + 1); //type of element in array - fprintf(fp, "%c", auxBType); - for (i = 0; i < auxBcnt; ++i) { //iterate the array - fprintf(fp, ","); - //calling recurssively with index to reuse a few lines - if (printauxdata(fp, auxBType, i, data) == EXIT_FAILURE) { - return EXIT_FAILURE; - } - } - break; - default: - printf("Invalid aux tag?\n"); - return EXIT_FAILURE; - break; - } - return EXIT_SUCCESS; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL; - int ret = EXIT_FAILURE; - sam_hdr_t *in_samhdr = NULL; - samFile *infile = NULL; - int ret_r = 0; - bam1_t *bamdata = NULL; - uint8_t *data = NULL; - - //dump_aux infile - if (argc != 2) { - print_usage(stderr); - goto end; - } - inname = argv[1]; - - if (!(bamdata = bam_init1())) { - printf("Failed to allocate data memory!\n"); - goto end; - } - - //open input file - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - - while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - errno = 0; - data = NULL; - data = bam_aux_first(bamdata); //get the first aux data - while (data) { - printf("%.2s:%c:", bam_aux_tag(data), NULL != strchr("cCsSiI", bam_aux_type(data)) ? 'i' : bam_aux_type(data)); //macros gets the tag and type of aux data - //dump the data - if (printauxdata(stdout, bam_aux_type(data), -1, data) == EXIT_FAILURE) { - printf("Failed to dump aux data\n"); - goto end; - } - else { - printf(" "); - } - data = bam_aux_next(bamdata, data); //get the next aux data - } - if (ENOENT != errno) { - printf("\nFailed to get aux data\n"); - goto end; - } - printf("\n"); - } - if (ret_r < -1) { - //read error - printf("Failed to read data\n"); - goto end; - } - - ret = EXIT_SUCCESS; -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - return ret; -} diff --git a/src/htslib-1.18/samples/flags_demo.c b/src/htslib-1.18/samples/flags_demo.c deleted file mode 100644 index e03fc6c..0000000 --- a/src/htslib-1.18/samples/flags_demo.c +++ /dev/null @@ -1,110 +0,0 @@ -/* flags_demo.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - show flags_demo usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: flags \n\ -Shows the count of read1 and read2 alignments\n\ -This shows basic reading and alignment flag access\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL; //input file name - int c = 0, ret = EXIT_FAILURE; - int64_t cntread1 = 0, cntread2 = 0; //count - samFile *infile = NULL; //sam file - sam_hdr_t *in_samhdr = NULL; //header of file - bam1_t *bamdata = NULL; //to hold the read data - - if (argc != 2) { - print_usage(stdout); - goto end; - } - inname = argv[1]; - - //initialize - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - //open input files - r reading - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf( "Failed to read header from file\n"); - goto end; - } - - //read data, check flags and update count - while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - if (bamdata->core.flag & BAM_FREAD1) { - cntread1++; - } - if (bamdata->core.flag & BAM_FREAD2) { - cntread2++; - } - } - if (c != -1) { - //error - printf("Failed to get data\n"); - goto end; - } - //else -1 / EOF - printf("File %s has %"PRIhts_pos" read1 and %"PRIhts_pos" read2 alignments\n", inname, cntread1, cntread2); - ret = EXIT_SUCCESS; -end: - //clean up - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - return ret; -} diff --git a/src/htslib-1.18/samples/flags_htsopt_field.c b/src/htslib-1.18/samples/flags_htsopt_field.c deleted file mode 100644 index 4b64445..0000000 --- a/src/htslib-1.18/samples/flags_htsopt_field.c +++ /dev/null @@ -1,115 +0,0 @@ -/* flags_htsopt_field.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - show flags_demo usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: flags_field \n\ -Shows the count of read1 and read2 alignments\n\ -This shows reading selected fields from CRAM file\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL; //input file name - int c = 0, ret = EXIT_FAILURE; - int64_t cntread1 = 0, cntread2 = 0; //count - samFile *infile = NULL; //sam file - sam_hdr_t *in_samhdr = NULL; //header of file - bam1_t *bamdata = NULL; //to hold the read data - - if (argc != 2) { - print_usage(stdout); - goto end; - } - inname = argv[1]; - - //initialize - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - //open input files - r reading - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - //select required field alone, this is useful for CRAM alone - if (hts_set_opt(infile, CRAM_OPT_REQUIRED_FIELDS, SAM_FLAG) < 0) { - printf("Failed to set htsoption\n"); - goto end; - } - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file\n"); - goto end; - } - - //read data, check flags and update count - while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - if (bamdata->core.flag & BAM_FREAD1) { - cntread1++; - } - if (bamdata->core.flag & BAM_FREAD2) { - cntread2++; - } - } - if (c != -1) { - //error - printf("Failed to get data\n"); - goto end; - } - //else -1 / EOF - printf("File %s has %"PRIhts_pos" read1 and %"PRIhts_pos" read2 alignments\n", inname, cntread1, cntread2); - ret = EXIT_SUCCESS; -end: - //clean up - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - return ret; -} diff --git a/src/htslib-1.18/samples/index_multireg_read.c b/src/htslib-1.18/samples/index_multireg_read.c deleted file mode 100644 index dbe8f15..0000000 --- a/src/htslib-1.18/samples/index_multireg_read.c +++ /dev/null @@ -1,150 +0,0 @@ -/* index_multireg_read.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the print_usage -/** @param fp pointer to the file / terminal to which print_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: read_multireg infile count regspec_csv\n\ - Reads alignment of a target matching to given region specifications\n\ - read_multireg infile.sam 2 R1:10-100,R2:200"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL; - char *ptr = NULL; - int c = 0, ret = EXIT_FAILURE; - samFile *infile = NULL, *outfile = NULL; - sam_hdr_t *in_samhdr = NULL; - bam1_t *bamdata = NULL; - hts_idx_t *idx = NULL; - hts_itr_t *iter = NULL; - unsigned int regcnt = 0; - char **regions = NULL; - - //read_multireg infile count regspec_csv - if (argc != 4) { - print_usage(stderr); - goto end; - } - inname = argv[1]; - regcnt = atoi(argv[2]); - regions = calloc(regcnt, sizeof(char*)); - //set each regspec as separate entry in region array - ptr = argv[3]; - for (c = 0; ptr && (c < regcnt); ++c) { - regions[c] = ptr; - ptr = strchr(ptr, ','); - if (ptr) { *ptr = '\0'; ++ptr; } - } - - if (regcnt == 0) { - printf("Region count can not be 0\n"); - goto end; - } - //initialize bam data storage - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - //open files, use stdout as output SAM file for ease of display - infile = sam_open(inname, "r"); - outfile = sam_open("-", "w"); - if (!outfile || !infile) { - printf("Could not open in/out files\n"); - goto end; - } - //load index file, assume it to be present in same location - if (!(idx = sam_index_load(infile, inname))) { - printf("Failed to load the index\n"); - goto end; - } - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - //create iterator - if (!(iter = sam_itr_regarray(idx, in_samhdr, regions, regcnt))) { - printf("Failed to get iterator\n"); - goto end; - } - if (regions) { - //can be freed as it is no longer required - free(regions); - regions = NULL; - } - - //get required area - while ((c = sam_itr_multi_next(infile, iter, bamdata) >= 0)) { - //write to output - if (sam_write1(outfile, in_samhdr, bamdata) < 0) { - printf("Failed to write output\n"); - goto end; - } - } - if (c != -1) { - printf("Error during read\n"); - goto end; - } - ret = EXIT_SUCCESS; - -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (outfile) { - sam_close(outfile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - if (iter) { - sam_itr_destroy(iter); - } - if (idx) - hts_idx_destroy(idx); - return ret; -} diff --git a/src/htslib-1.18/samples/index_reg_read.c b/src/htslib-1.18/samples/index_reg_read.c deleted file mode 100644 index 346d542..0000000 --- a/src/htslib-1.18/samples/index_reg_read.c +++ /dev/null @@ -1,143 +0,0 @@ -/* index_reg_read.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the print_usage -/** @param fp pointer to the file / terminal to which print_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: readreg infile idxfile region\n\ -Reads alignments matching to a specific region\n\ -\\. from start of file\n\ -\\* only unmapped reads\n\ -REFNAME all reads referring REFNAME\n\ -REFNAME:S all reads referring REFNAME and overlapping from S onwards\n\ -REFNAME:S-E all reads referring REFNAME overlapping from S to E\n\ -REFNAME:-E all reads referring REFNAME overlapping upto E\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *region = NULL; - char *idxfile = NULL; - int c = 0, ret = EXIT_FAILURE; - samFile *infile = NULL, *outfile = NULL; - sam_hdr_t *in_samhdr = NULL; - bam1_t *bamdata = NULL; - hts_idx_t *idx = NULL; - hts_itr_t *iter = NULL; - - //readreg infile indexfile region - if (argc != 4) { - print_usage(stderr); - goto end; - } - inname = argv[1]; - idxfile = argv[2]; - region = argv[3]; - - //initialize bam data storage - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - - //open files - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open input file\n"); - goto end; - } - //using stdout as output file for ease of dumping data - if (!(outfile = sam_open("-", "w"))) { - printf("Could not open out file\n"); - goto end; - } - //load index file - if (!(idx = sam_index_load2(infile, inname, idxfile))) { - printf("Failed to load the index\n"); - goto end; - } - //can use sam_index_load if the index file is present in same location and follows standard naming conventions (i.e. .) - - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - //create iterator - if (!(iter = sam_itr_querys(idx, in_samhdr, region))) { - printf("Failed to get iterator\n"); - goto end; - } - //read using iterator - while ((c = sam_itr_next(infile, iter, bamdata)) >= 0) { - //write to output - if (sam_write1(outfile, in_samhdr, bamdata) < 0) { - printf("Failed to write output\n"); - goto end; - } - } - if (c != -1) { - printf("Error during read\n"); - goto end; - } - ret = EXIT_SUCCESS; - -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (outfile) { - sam_close(outfile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - if (iter) { - sam_itr_destroy(iter); - } - if (idx) { - hts_idx_destroy(idx); - } - return ret; -} diff --git a/src/htslib-1.18/samples/index_write.c b/src/htslib-1.18/samples/index_write.c deleted file mode 100644 index 8fd2bc9..0000000 --- a/src/htslib-1.18/samples/index_write.c +++ /dev/null @@ -1,166 +0,0 @@ -/* index_write.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: idx_on_write infile shiftsize outdir\n\ -Creates compressed sam file and index file for it in given directory\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *outdir = NULL; - char *inname = NULL, *fileidx = NULL, *outname = NULL, outmode[4] = "w"; - int c = 0, ret = EXIT_FAILURE, size = 0; - samFile *infile = NULL, *outfile = NULL; - sam_hdr_t *in_samhdr = NULL; - bam1_t *bamdata = NULL; - - //idx_on_write infile sizeshift outputdirectory - if (argc != 4) { - print_usage(stderr); - goto end; - } - inname = argv[1]; - size = atoi(argv[2]); - outdir = argv[3]; - - //allocate space for output name - outdir/filename.ext.idxextNUL - c = strlen(basename(inname)) + strlen(outdir) + 10; - fileidx = malloc(sizeof(char) * c); - outname = malloc(sizeof(char) * c); - if (!fileidx || !outname) { - printf("Couldnt allocate memory\n"); - goto end; - } - //initialize bam storage - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - - //open files - if ((infile = sam_open(inname, "r"))) { - //get file type and create output names - if (infile->format.format == cram) { - //set as crai - snprintf(fileidx, c, "%s/%s.crai", outdir, basename(inname)); - snprintf(outname, c, "%s/%s", outdir, basename(inname)); - } - else { - //set as either bai or csi based on interval - if (infile->format.format == sam && infile->format.compression == no_compression) { - //create as gzip compressed - snprintf(outname, c, "%s/%s.gz", outdir, basename(inname)); - snprintf(fileidx, c, "%s/%s.gz.%s", outdir, basename(inname), !size ? "bai" : "csi"); - } - else { - //with same name as input - snprintf(outname, c, "%s/%s", outdir, basename(inname)); - snprintf(fileidx, c, "%s/%s.%s", outdir, basename(inname), !size ? "bai" : "csi"); - } - } - } - c = 0; - sam_open_mode(outmode + 1, outname, NULL); //set extra write options based on name - outfile = sam_open(outname, outmode); - if (!outfile || !infile) { - printf("Could not open files\n"); - goto end; - } - - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - //write header - if (sam_hdr_write(outfile, in_samhdr)) { - printf("Failed to write header\n"); - goto end; - } - - // initialize indexing, before start of write - if (sam_idx_init(outfile, in_samhdr, size, fileidx)) { - printf("idx initialization failed\n"); - goto end; - } - //read and write alignments - while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - if (sam_write1(outfile, in_samhdr, bamdata) < 0) { - printf("Failed to write data\n"); - goto end; - } - } - if (c != -1) { - printf("Error in reading data\n"); - goto end; - } - //else EOF, save index - if (sam_idx_save(outfile)) { - printf("Could not save index\n"); - goto end; - } - ret = EXIT_SUCCESS; -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - if (fileidx) { - free(fileidx); - } - if (outname) { - free(outname); - } - if (outfile) { - sam_close(outfile); - } - return ret; -} diff --git a/src/htslib-1.18/samples/mod_aux.c b/src/htslib-1.18/samples/mod_aux.c deleted file mode 100644 index d5ed18c..0000000 --- a/src/htslib-1.18/samples/mod_aux.c +++ /dev/null @@ -1,221 +0,0 @@ -/* mod_aux.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: mod_aux infile QNAME tag type val\n\ -Add/update the given aux tag to all alignments\n\ -type A-char C-int F-float Z-string\n"); -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *tag = NULL, *qname = NULL, *val = NULL; - char type = '\0'; - int ret = EXIT_FAILURE, ret_r = 0, length = 0; - sam_hdr_t *in_samhdr = NULL; - samFile *infile = NULL, *outfile = NULL; - bam1_t *bamdata = NULL; - uint8_t *data = NULL; - - //mod_aux infile QNAME tag type val - if (argc != 6) { - print_usage(stderr); - goto end; - } - inname = argv[1]; - qname = argv[2]; - tag = argv[3]; - type = argv[4][0]; - val = argv[5]; - - if (!(bamdata = bam_init1())) { - printf("Failed to allocate data memory!\n"); - goto end; - } - - //open input file - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - //open output file - if (!(outfile = sam_open("-", "w"))) { - printf("Could not open std output\n"); - goto end; - } - - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - - if (sam_hdr_write(outfile, in_samhdr) == -1) { - printf("Failed to write header\n"); - goto end; - } - - while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - if (strcasecmp(bam_get_qname(bamdata), qname)) { - if (sam_write1(outfile, in_samhdr, bamdata) < 0) { - printf("Failed to write output\n"); - goto end; - } - continue; //not matching - } - - errno = 0; - //matched to qname, update aux - if (!(data = bam_aux_get(bamdata, tag))) { - int i = 0; float f = 0; - //tag not present append - switch (type) { - case 'f': - case 'd': - length = sizeof(float); - f = atof(val); - val = (const char*) &f; - type = 'f'; - break; - case 'C': - case 'S': - case 'I': - length = sizeof(int); - i = atoi(val); - val = (const char*) &i; - break; - case 'Z': - length = strlen(val) + 1; //1 for NUL termination - break; - case 'A': - length = 1; - break; - default: - printf("Invalid type mentioned\n"); - goto end; - break; - } - if (bam_aux_append(bamdata, tag, type, length, (const uint8_t*)val)) { - printf("Failed to append aux data, errno: %d\n", errno); - goto end; - } - } - else { - char auxtype = bam_aux_type(data); - //update the tag with newer value - switch (type) { - case 'f': - case 'd': - if (auxtype != 'f' && auxtype != 'd') { - printf("Invalid aux type passed\n"); - goto end; - } - if (bam_aux_update_float(bamdata, tag, atof(val))) { - printf("Failed to update float data, errno: %d\n", errno); - goto end; - } - break; - case 'C': - case 'S': - case 'I': - if (auxtype != 'c' && auxtype != 'C' && auxtype != 's' && auxtype != 'S' && auxtype != 'i' && auxtype != 'I') { - printf("Invalid aux type passed\n"); - goto end; - } - if (bam_aux_update_int(bamdata, tag, atoll(val))) { - printf("Failed to update int data, errno: %d\n", errno); - goto end; - } - break; - case 'Z': - if (auxtype != 'Z') { - printf("Invalid aux type passed\n"); - goto end; - } - length = strlen(val) + 1; //1 for NUL termination - if (bam_aux_update_str(bamdata, tag, length, val)) { - //with length as -1, length will be detected based on null terminated val data - printf("Failed to update string data, errno: %d\n", errno); - goto end; - } - break; - case 'A': - if (auxtype != 'A') { - printf("Invalid aux type passed\n"); - goto end; - } - //update the char data directly on buffer - *(data+1) = val[0]; - break; - default: - printf("Invalid data type\n"); - goto end; - break; - } - } - if (sam_write1(outfile, in_samhdr, bamdata) < 0) { - printf("Failed to write output\n"); - goto end; - } - } - if (ret_r < -1) { - //read error - printf("Failed to read data\n"); - goto end; - } - - ret = EXIT_SUCCESS; -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (outfile) { - sam_close(outfile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - return ret; -} diff --git a/src/htslib-1.18/samples/mod_aux_ba.c b/src/htslib-1.18/samples/mod_aux_ba.c deleted file mode 100644 index 8ef90ee..0000000 --- a/src/htslib-1.18/samples/mod_aux_ba.c +++ /dev/null @@ -1,147 +0,0 @@ -/* mod_aux_ba.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: mod_aux_ba infile\n\ -Updates the count of bases as an aux array on all alignments\n\ -BA:B:I,count of ACTGN\n"); -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL; - int i = 0, ret = EXIT_FAILURE, ret_r = 0; - uint32_t cnt[5] = {0}; //A C G T N - sam_hdr_t *in_samhdr = NULL; - samFile *infile = NULL, *outfile = NULL; - bam1_t *bamdata = NULL; - - //mod_aux infile - if (argc != 2) { - print_usage(stderr); - goto end; - } - inname = argv[1]; - - if (!(bamdata = bam_init1())) { - printf("Failed to allocate data memory!\n"); - goto end; - } - - //open input file - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - - //open output file - if (!(outfile = sam_open("-", "w"))) { - printf("Could not open std output\n"); - goto end; - } - - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - - if (sam_hdr_write(outfile, in_samhdr) == -1) { - printf("Failed to write header\n"); - goto end; - } - - while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - errno = 0; - memset(cnt, 0, sizeof(cnt)); - for (i = 0; i < bamdata->core.l_qseq; ++i) { - switch (seq_nt16_str[bam_seqi(bam_get_seq(bamdata),i)]) { - case 'A': - ++cnt[0]; - break; - case 'C': - ++cnt[1]; - break; - case 'G': - ++cnt[2]; - break; - case 'T': - ++cnt[3]; - break; - default: //N - ++cnt[4]; - break; - } - } - - if (bam_aux_update_array(bamdata, "BA", 'I', sizeof(cnt)/sizeof(cnt[0]), cnt)) { - printf("Failed to update base array, errno %d", errno); - goto end; - } - - if (sam_write1(outfile, in_samhdr, bamdata) < 0) { - printf("Failed to write output\n"); - goto end; - } - } - if (ret_r < -1) { - //read error - printf("Failed to read data\n"); - goto end; - } - - ret = EXIT_SUCCESS; -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (outfile) { - sam_close(outfile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - return ret; -} diff --git a/src/htslib-1.18/samples/mod_bam.c b/src/htslib-1.18/samples/mod_bam.c deleted file mode 100644 index 9f1eb32..0000000 --- a/src/htslib-1.18/samples/mod_bam.c +++ /dev/null @@ -1,229 +0,0 @@ -/* mod_bam.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: mod_bam infile QNAME fieldpos newval\n\ -Modifies the alignment data field\n\ -fieldpos - 1 QNAME 2 FLAG 3 RNAME 4 POS 5 MAPQ 6 CIGAR 7 RNEXT 8 PNEXT 9 TLEN 10 SEQ 11 QUAL\n"); -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *qname = NULL; - char *val = NULL; - int c = 0, ret = EXIT_FAILURE, field = 0; - sam_hdr_t *in_samhdr = NULL; - samFile *infile = NULL, *outfile = NULL; - int ret_r = 0, i = 0; - bam1_t *bamdata = NULL; - - //mod_bam infile QNAME fieldpos newval - if (argc != 5) { - print_usage(stderr); - goto end; - } - inname = argv[1]; - qname = argv[2]; - //1 QNAME 2 FLAG 3 RNAME 4 POS 5 MAPQ 6 CIGAR 7 RNEXT 8 PNEXT 9 TLEN 10 SEQ 11 QUAL - field = atoi(argv[3]); - val = argv[4]; - - if (!(bamdata = bam_init1())) { - printf("Failed to allocate data memory!\n"); - goto end; - } - - //open input file - if (!(infile = sam_open(inname, "r")) || !(outfile = sam_open("-", "w"))) { - printf("Could not open input/output\n"); - goto end; - } - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - - if (sam_hdr_write(outfile, in_samhdr) == -1) { - printf("Failed to write header\n"); - goto end; - } - - while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) - { - //QNAME FLAG RNAME POS MAPQ CIGAR RNEXT PNEXT TLEN SEQ QUAL [TAG:TYPE:VALUE]… - ret = 0; - if (!strcasecmp(qname, bam_get_qname(bamdata))) { - //the required one - switch(field) { - case 1:// QNAME - ret = bam_set_qname(bamdata, val); - break; - case 2:// FLAG - bamdata->core.flag = atol(val) & 0xFFFF; - break; - case 3:// RNAME - case 7:// RNEXT - if ((ret = sam_hdr_name2tid(in_samhdr, val)) < 0) { - printf("Invalid reference name\n"); - ret = -1; - break; - } - if (field == 3) { - //reference - bamdata->core.tid = ret; - } - else { - //mate reference - bamdata->core.mtid = ret; - } - break; - case 4:// POS - bamdata->core.pos = atoll(val); - break; - case 5:// MAPQ - bamdata->core.qual = atoi(val) & 0x0FF; - break; - case 6:// CIGAR - { - uint32_t *cigar = NULL; - size_t size = 0; - ssize_t ncigar = 0; - bam1_t *newbam = bam_init1(); - if (!newbam) { - printf("Failed to create new bam data\n"); - ret = -1; - break; - } - //get cigar array and set all data in new bam record - if ((ncigar = sam_parse_cigar(val, NULL, &cigar, &size)) < 0) { - printf("Failed to parse cigar\n"); - ret = -1; - break; - } - if (bam_set1(newbam, bamdata->core.l_qname, bam_get_qname(bamdata), bamdata->core.flag, bamdata->core.tid, bamdata->core.pos, bamdata->core.qual, - ncigar, cigar, bamdata->core.mtid, bamdata->core.mpos, bamdata->core.isize, bamdata->core.l_qseq, (const char*)bam_get_seq(bamdata), (const char*)bam_get_qual(bamdata), bam_get_l_aux(bamdata)) < 0) { - printf("Failed to set bamdata\n"); - ret = -1; - break; - } - //correct sequence data as input is expected in ascii format and not as compressed inside bam! - memcpy(bam_get_seq(newbam), bam_get_seq(bamdata), (bamdata->core.l_qseq + 1) / 2); - //copy the aux data - memcpy(bam_get_aux(newbam), bam_get_aux(bamdata), bam_get_l_aux(bamdata)); - - bam_destroy1(bamdata); - bamdata = newbam; - } - break; - case 8:// PNEXT - bamdata->core.mpos = atoll(val); - break; - case 9:// TLEN - bamdata->core.isize = atoll(val); - break; - case 10:// SEQ - i = strlen(val); - if (bamdata->core.l_qseq != i) { - printf("SEQ length different\n"); - ret = -1; - //as it is different, have to update quality data and cigar data as well and more info is required for it, which is not handled in this sample - //accessing raw memory and moving is one option; creating and using new bam1_t object is another option. - break; - } - for( c = 0; c < i; ++c) { - bam_set_seqi(bam_get_seq(bamdata), c, seq_nt16_table[(unsigned char)val[c]]); - } - break; - case 11:// QUAL - i = strlen(val); - if (i != bamdata->core.l_qseq) { - printf("Qual length different than sequence\n"); - ret = -1; - break; - } - for (c = 0; c < i; ++c) { - val[c] -= 33; //phred score from ascii value - } - memcpy(bam_get_qual(bamdata), val, i); - break; - default: - printf("Invalid input\n"); - goto end; - break; - } - if (ret < 0) { - printf("Failed to set new data\n"); - ret = EXIT_FAILURE; - goto end; - } - } - if (sam_write1(outfile, in_samhdr, bamdata) < 0) { - printf("Failed to write bam data\n"); - ret = EXIT_FAILURE; - goto end; - } - } - - if (ret_r == -1 || ret != EXIT_FAILURE) { - // no error! - ret = EXIT_SUCCESS; - } - else { - printf("Failed to read data\n"); - } -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (outfile) { - sam_close(outfile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - return ret; -} diff --git a/src/htslib-1.18/samples/modstate.c b/src/htslib-1.18/samples/modstate.c deleted file mode 100644 index 9763916..0000000 --- a/src/htslib-1.18/samples/modstate.c +++ /dev/null @@ -1,190 +0,0 @@ -/* modstate.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: modstate infile option\n\ -Shows the base modifications on the alignment\n\ -Option can be 1 or 2 to select the api to use\n"); -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL; - int ret = EXIT_FAILURE; - sam_hdr_t *in_samhdr = NULL; - samFile *infile = NULL; - - int ret_r = 0, i = 0 , r = 0, j = 0, pos = 0, opt = 0, k = 0, cnt = 0, *bm = NULL; - bam1_t *bamdata = NULL; - uint8_t *data = NULL; - hts_base_mod_state *ms = NULL; - - - //modstate infile 1/2 - if (argc != 3) { - print_usage(stderr); - goto end; - } - inname = argv[1]; - opt = atoi(argv[2]) - 1; //option 1 or 2? - - if (!(bamdata = bam_init1())) { - printf("Failed to allocate data memory!\n"); - goto end; - } - - if (!(ms = hts_base_mod_state_alloc())) { - printf("Failed to allocate state memory\n"); - goto end; - } - - //open input file - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - - while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) - { - i = 0; - data = bam_get_seq(bamdata); - if (bam_parse_basemod(bamdata, ms)) { - printf("Failed to parse the base mods\n"); - goto end; - } - //dump the modifications - printf("Modifications:"); - bm = bam_mods_recorded(ms, &cnt); - for (k = 0; k < cnt; ++k) { - printf("%c", bm[k]); - } - printf("\n"); - hts_base_mod mod[5] = {0}; //for ATCGN - if (opt) { - //option 1 - for (; i < bamdata->core.l_qseq; ++i) { - if ((r = bam_mods_at_next_pos(bamdata, ms, mod, sizeof(mod)/sizeof(mod[0]))) <= -1) { - printf("Failed to get modifications\n"); - goto end; - } - else if (r > (sizeof(mod) / sizeof(mod[0]))) { - printf("More modifications than this app can handle, update the app\n"); - goto end; - } - else if (!r) { - //no modification at this pos - printf("%c", seq_nt16_str[bam_seqi(data, i)]); - } - //modifications - for (j = 0; j < r; ++j) { - printf("%c%c%c", mod[j].canonical_base, mod[j].strand ? '-' : '+', mod[j].modified_base); - } - } - } - else { - //option 2 - while ((r = bam_next_basemod(bamdata, ms, mod, sizeof(mod)/sizeof(mod[0]), &pos)) >= 0) { - for (; i < bamdata->core.l_qseq && i < pos; ++i) { - printf("%c", seq_nt16_str[bam_seqi(data, i)]); - } - //modifications - for (j = 0; j < r; ++j) { - printf("%c%c%c", mod[j].canonical_base, mod[j].strand ? '-' : '+', mod[j].modified_base); - } - if (i == pos) - i++; //skip the modification already displayed - if (!r) { - for (; i < bamdata->core.l_qseq; ++i) { - printf("%c", seq_nt16_str[bam_seqi(data, i)]); - } - break; - } - } - if (r <= -1) { - printf("Failed to get modifications\n"); - goto end; - } - } - printf("\n"); - } - - if (ret_r == -1) { - //check last alignment's base modification - int strand = 0, impl = 0; - char canonical = 0, modification[] = "mhfcgebaon"; //possible modifications - printf("\n\nLast alignment has \n"); - for (k = 0; k < sizeof(modification) - 1; ++k) { //avoiding NUL termination - if (bam_mods_query_type(ms, modification[k], &strand, &impl, &canonical)) { - printf ("No modification of %c type\n", modification[k]); - } - else { - printf("%s strand has %c modified with %c, can %sassume unlisted as unmodified\n", strand?"-/bottom/reverse":"+/top/forward", canonical, modification[k], impl?"" : "not " ); - } - } - // no error! - ret = EXIT_SUCCESS; - } - else { - printf("Failed to read data\n"); - } -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - - if (ms) { - hts_base_mod_state_free(ms); - } - return ret; -} diff --git a/src/htslib-1.18/samples/mpileup.c b/src/htslib-1.18/samples/mpileup.c deleted file mode 100644 index fe93374..0000000 --- a/src/htslib-1.18/samples/mpileup.c +++ /dev/null @@ -1,204 +0,0 @@ -/* mpileup.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include -#include - -/// print_usage - show flags_demo usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: mpileup infile ...\n\ -Shows the mpileup api usage.\n"); - return; -} - -typedef struct plpconf { - char *inname; - samFile *infile; - sam_hdr_t *in_samhdr; -} plpconf; - -/// @brief plpconstructor -/// @param data client data? -/// @param b bam being loaded -/// @param cd client data -/// @return -int plpconstructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { - return 0; -} - -int plpdestructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { - return 0; -} - -/// @brief bam_plp_auto_f reads alignment data for pileup operation -/// @param data client callback data holding alignment file handle -/// @param b bamdata read -/// @return same as sam_read1 -int readdata(void *data, bam1_t *b) -{ - plpconf *conf = (plpconf*)data; - if (!conf || !conf->infile) { - return -2; //cant read data - } - - //read alignment and send - return sam_read1(conf->infile, conf->infile->bam_header, b); -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - int ret = EXIT_FAILURE; - bam1_t *bamdata = NULL; - plpconf** conf = NULL; - bam_mplp_t mplpiter = NULL; - int tid = -1, input = 0, k = 0, dpt = 0, *depth = NULL; - hts_pos_t refpos = -1; - const bam_pileup1_t **plp = NULL; - - //infile ... - if (argc < 2) { - print_usage(stderr); - goto end; - } - if ((conf = calloc(argc - 1, sizeof(plpconf*)))) { - for (input = 0; input < argc - 1; ++input) { - conf[input] = calloc(1, sizeof(plpconf)); - } - } - depth = calloc(argc - 1, sizeof(int)); - plp = calloc(argc - 1, sizeof(bam_pileup1_t*)); - if (!conf || !depth || !plp) { - printf("Failed to allocate memory\n"); - goto end; - } - for (input = 0; input < argc - 1; ++input) { - conf[input]->inname = argv[input+1]; - } - - //initialize - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - //open input files - for(input = 0; input < argc - 1; ++input) { - if (!(conf[input]->infile = sam_open(conf[input]->inname, "r"))) { - printf("Could not open %s\n", conf[input]->inname); - goto end; - } - //read header - if (!(conf[input]->in_samhdr = sam_hdr_read(conf[input]->infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - } - - if (!(mplpiter = bam_mplp_init(argc - 1, readdata, (void**) conf))) { - printf("Failed to initialize mpileup data\n"); - goto end; - } - - //set constructor destructor callbacks - bam_mplp_constructor(mplpiter, plpconstructor); - bam_mplp_destructor(mplpiter, plpdestructor); - - while (bam_mplp64_auto(mplpiter, &tid, &refpos, depth, plp) > 0) { - printf("%d\t%"PRIhts_pos"\t", tid+1, refpos+1); - - for (input = 0; input < argc - 1; ++input) { - for (dpt = 0; dpt < depth[input]; ++dpt) { - if (plp[input][dpt].is_del || plp[input][dpt].is_refskip) { - printf("*"); - continue; - } - //start and end are displayed in UPPER and rest on LOWER - printf("%c", plp[input][dpt].is_head ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[input][dpt].b), plp[input][dpt].qpos)]) : - (plp[input]->is_tail ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[input][dpt].b), plp[input][dpt].qpos)]) : tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[input][dpt].b), plp[input][dpt].qpos)]))); - if (plp[input][dpt].indel > 0) { - //insertions, anyway not start or end - printf("+%d", plp[input][dpt].indel); - for (k = 0; k < plp[input][dpt].indel; ++k) { - printf("%c", tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[input][dpt].b), plp[input][dpt].qpos + k + 1)])); - } - } - else if (plp[input][dpt].indel < 0) { - printf("%d", plp[input][dpt].indel); - for (k = 0; k < -plp[input][dpt].indel; ++k) { - printf("?"); - } - } - } - printf(" "); - } - printf("\n"); - fflush(stdout); - } - - ret = EXIT_SUCCESS; -end: - //clean up - if (conf) { - for (input = 0; input < argc - 1; ++input) { - if (conf[input] && conf[input]->in_samhdr) { - sam_hdr_destroy(conf[input]->in_samhdr); - } - if (conf[input] && conf[input]->infile) { - sam_close(conf[input]->infile); - } - if (conf[input]) { - free(conf[input]); - } - } - free(conf); - } - - if (bamdata) { - bam_destroy1(bamdata); - } - if (mplpiter) { - bam_mplp_destroy(mplpiter); - } - if (depth) { - free(depth); - } - if (plp) { - free(plp); - } - return ret; -} diff --git a/src/htslib-1.18/samples/pileup.c b/src/htslib-1.18/samples/pileup.c deleted file mode 100644 index 11e2fb0..0000000 --- a/src/htslib-1.18/samples/pileup.c +++ /dev/null @@ -1,183 +0,0 @@ -/* pileup.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include -#include - -/// print_usage - show flags_demo usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: pileup infile\n\ -Shows the pileup api usage.\n"); - return; -} - -typedef struct plpconf { - char *inname; - samFile *infile; - sam_hdr_t *in_samhdr; -} plpconf; - -/// @brief plpconstructor -/// @param data client data? -/// @param b bam being loaded -/// @param cd client data -/// @return -int plpconstructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { - /*plpconf *conf= (plpconf*)data; - can access the data passed to pileup init from data - can do any alignment specific allocation / data storage here in param cd - it can hold either a float, 64 bit int or a pointer - when using cd, initialize and use as it will be reused after destructor*/ - return 0; -} - -int plpdestructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { - /*plpconf *conf= (plpconf*)data; - can access the data passed to pileup init from data - deallocate any alignment specific allocation made in constructor and stored in param cd*/ - return 0; -} - -/// @brief bam_plp_auto_f reads alignment data for pileup operation -/// @param data client callback data holding alignment file handle -/// @param b bamdata read -/// @return same as sam_read1 -int readdata(void *data, bam1_t *b) -{ - plpconf *conf = (plpconf*)data; - if (!conf || !conf->infile) { - return -2; //cant read data - } - - //read alignment and send - return sam_read1(conf->infile, conf->infile->bam_header, b); -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - int ret = EXIT_FAILURE; - bam1_t *bamdata = NULL; - plpconf conf = {0}; - bam_plp_t plpiter = NULL; - int tid = -1, n = -1, j = 0, k = 0; - int refpos = -1; - const bam_pileup1_t *plp = NULL; - - //infile - if (argc != 2) { - print_usage(stderr); - goto end; - } - conf.inname = argv[1]; - - //initialize - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - //open input files - if (!(conf.infile = sam_open(conf.inname, "r"))) { - printf("Could not open %s\n", conf.inname); - goto end; - } - //read header - if (!(conf.in_samhdr = sam_hdr_read(conf.infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - - if (!(plpiter = bam_plp_init(readdata, &conf))) { - printf("Failed to initialize pileup data\n"); - goto end; - } - - //set constructor destructor callbacks - bam_plp_constructor(plpiter, plpconstructor); - bam_plp_destructor(plpiter, plpdestructor); - - while ((plp = bam_plp_auto(plpiter, &tid, &refpos, &n))) { - printf("%d\t%d\t", tid+1, refpos+1); - - for (j = 0; j < n; ++j) { - //doesnt detect succeeding insertion and deletion together here, only insertion is identified - //deletion is detected in plp->is_del as and when pos reaches the position - //if detection ahead is required, use bam_plp_insertion here which gives deletion length along with insertion - if (plp[j].is_del || plp[j].is_refskip) { - printf("*"); - continue; - } - //start and end are displayed in UPPER and rest on LOWER - printf("%c", plp[j].is_head ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]) : - (plp[j].is_tail ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]) : tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]))); - if (plp[j].indel > 0) { - //insertions, anyway not start or end - printf("+%d", plp[j].indel); - for (k = 0; k < plp[j].indel; ++k) { - printf("%c", tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos + k + 1)])); - } - } - else if (plp[j].indel < 0) { - printf("%d", plp[j].indel); - for (k = 0; k < -plp[j].indel; ++k) { - printf("?"); - } - } - printf(" "); - } - printf("\n"); - fflush(stdout); - } - - ret = EXIT_SUCCESS; -end: - //clean up - if (conf.in_samhdr) { - sam_hdr_destroy(conf.in_samhdr); - } - if (conf.infile) { - sam_close(conf.infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - if (plpiter) { - bam_plp_destroy(plpiter); - } - return ret; -} diff --git a/src/htslib-1.18/samples/pileup_mod.c b/src/htslib-1.18/samples/pileup_mod.c deleted file mode 100644 index 24d6cf5..0000000 --- a/src/htslib-1.18/samples/pileup_mod.c +++ /dev/null @@ -1,218 +0,0 @@ -/* pileup_mod.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include -#include - -/// print_usage - show flags_demo usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: pileup_mod infile\n\ -Shows the pileup api usage with base modification.\n"); - return; -} - -typedef struct plpconf { - char *inname; - samFile *infile; - sam_hdr_t *in_samhdr; -} plpconf; - -/// @brief plpconstructor -/// @param data client data? -/// @param b bam being loaded -/// @param cd client data -/// @return -int plpconstructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { - //plpconf *conf= (plpconf*)data; can use this to access anything required from the data in pileup init - - //when using cd, initialize and use as it will be reused after destructor - cd->p = hts_base_mod_state_alloc(); - if (!cd->p) { - printf("Failed to allocate base modification state\n"); - return 1; - } - - //parse the bam data and gather modification data from MM tags - return (-1 == bam_parse_basemod(b, (hts_base_mod_state*)cd->p)) ? 1 : 0; -} - -int plpdestructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { - if (cd->p) { - hts_base_mod_state_free((hts_base_mod_state *)cd->p); - cd->p = NULL; - } - return 0; -} - -/// @brief bam_plp_auto_f reads alignment data for pileup operation -/// @param data client callback data holding alignment file handle -/// @param b bamdata read -/// @return same as sam_read1 -int readdata(void *data, bam1_t *b) -{ - plpconf *conf = (plpconf*)data; - if (!conf || !conf->infile) { - return -2; //cant read data - } - - //read alignment and send - return sam_read1(conf->infile, conf->infile->bam_header, b); -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - int ret = EXIT_FAILURE; - bam1_t *bamdata = NULL; - plpconf conf = {0}; - bam_plp_t plpiter = NULL; - int tid = -1, depth = -1, j = 0, k = 0, inslen = 0, dellen = 0, modlen = 0; - #define NMODS 5 - hts_base_mod mods[NMODS] = {0}; //ACGT N - int refpos = -1; - const bam_pileup1_t *plp = NULL; - kstring_t insdata = KS_INITIALIZE; - - //infile - if (argc != 2) { - print_usage(stderr); - goto end; - } - conf.inname = argv[1]; - - //initialize - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - //open input files - if (!(conf.infile = sam_open(conf.inname, "r"))) { - printf("Could not open %s\n", conf.inname); - goto end; - } - //read header - if (!(conf.in_samhdr = sam_hdr_read(conf.infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - - if (!(plpiter = bam_plp_init(readdata, &conf))) { - printf("Failed to initialize pileup data\n"); - goto end; - } - - //set constructor destructor callbacks - bam_plp_constructor(plpiter, plpconstructor); - bam_plp_destructor(plpiter, plpdestructor); - - while ((plp = bam_plp_auto(plpiter, &tid, &refpos, &depth))) { - memset(&mods, 0, sizeof(mods)); - printf("%d\t%d\t", tid+1, refpos+1); - - for (j = 0; j < depth; ++j) { - dellen = 0; - - if (plp[j].is_del || plp[j].is_refskip) { - printf("*"); - continue; - } - /*invoke bam_mods_at_qpos before bam_plp_insertion_mod that the base modification - is retrieved before change in pileup pos thr' plp_insertion_mod call*/ - if ((modlen = bam_mods_at_qpos(plp[j].b, plp[j].qpos, plp[j].cd.p, mods, NMODS)) == -1) { - printf("Failed to get modifications\n"); - goto end; - } - - //use plp_insertion/_mod to get insertion and del at the same position - if ((inslen = bam_plp_insertion_mod(&plp[j], (hts_base_mod_state*)plp[j].cd.p, &insdata, &dellen)) == -1) { - printf("Failed to get insertion status\n"); - goto end; - } - - //start and end are displayed in UPPER and rest on LOWER, only 1st modification considered - //base and modification - printf("%c%c%c", plp[j].is_head ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]) : - (plp[j].is_tail ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]) : - tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)])), - modlen > 0 ? mods[0].strand ? '-' : '+' : '\0', - modlen > 0 ? mods[0].modified_base : '\0'); - //insertion and deletions - if (plp[j].indel > 0) { - //insertion - /*insertion data from plp_insertion_mod, note this shows the quality value as well - which is different from base and modification above;the lower case display is not attempted either*/ - printf("+%d%s", plp[j].indel, insdata.s); - //handle deletion if any - if (dellen) { - printf("-%d", dellen); - for (k = 0; k < dellen; ++k) { - printf("?"); - } - } - } - else if (plp[j].indel < 0) { - //deletion - printf("%d", plp[j].indel); - for (k = 0; k < -plp[j].indel; ++k) { - printf("?"); - } - } - printf(" "); - } - printf("\n"); - fflush(stdout); - } - - ret = EXIT_SUCCESS; -end: - //clean up - if (conf.in_samhdr) { - sam_hdr_destroy(conf.in_samhdr); - } - if (conf.infile) { - sam_close(conf.infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - if (plpiter) { - bam_plp_destroy(plpiter); - } - ks_free(&insdata); - return ret; -} diff --git a/src/htslib-1.18/samples/read_aux.c b/src/htslib-1.18/samples/read_aux.c deleted file mode 100644 index cbf972b..0000000 --- a/src/htslib-1.18/samples/read_aux.c +++ /dev/null @@ -1,207 +0,0 @@ -/* read_aux.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: read_aux infile tag\n\ -Read the given aux tag from alignments either as SAM string or as raw data\n"); -} - -/// printauxdata - prints aux data -/** @param fp - file to which it to be printed - stdout or null - * @param type - aux type - * @param idx - index in array, -1 when not an array type - * @param data - data - * recurses when the data is array type -returns 1 on failure 0 on success -*/ -int printauxdata(FILE *fp, char type, int32_t idx, const uint8_t *data) -{ - uint32_t auxBcnt = 0; - int i = 0; - char auxBType = 'Z'; - - //the tag is already queried and ensured to exist and the type is retrieved from the tag data, also iterated within index for arrays, so no error is expected here. - //when these apis are used explicitly, these error conditions needs to be handled based on return value and errno - switch(type) { - case 'A': - fprintf(fp, "%c", bam_aux2A(data)); //byte data - break; - case 'c': - fprintf(fp, "%d", (int8_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //signed 1 byte data; bam_auxB2i - from array or bam_aux2i - non array data - break; - case 'C': - fprintf(fp, "%u", (uint8_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //unsigned 1 byte data - break; - case 's': - fprintf(fp, "%d", (int16_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //signed 2 byte data - break; - case 'S': - fprintf(fp, "%u", (uint16_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //unsigned 2 byte data - break; - case 'i': - fprintf(fp, "%d", (int32_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //signed 4 byte data - break; - case 'I': - fprintf(fp, "%u", (uint32_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //unsigned 4 byte data - break; - case 'f': - case 'd': - fprintf(fp, "%g", (float)(idx > -1 ? bam_auxB2f(data, idx) : bam_aux2f(data))); //floating point data, 4 bytes - break; - case 'H': - case 'Z': - fprintf(fp, "%s", bam_aux2Z(data)); //array of char or hex data - break; - case 'B': //array of char/int/float - auxBcnt = bam_auxB_len(data); //length of array - auxBType = bam_aux_type(data + 1); //type of element in array - fprintf(fp, "%c", auxBType); - for (i = 0; i < auxBcnt; ++i) { //iterate the array - fprintf(fp, ","); - //calling recurssively with index to reuse a few lines - if (printauxdata(fp, auxBType, i, data) == EXIT_FAILURE) { - return EXIT_FAILURE; - } - } - break; - default: - printf("Invalid aux tag?\n"); - return EXIT_FAILURE; - break; - } - return EXIT_SUCCESS; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *tag = NULL; - int c = 0, ret = EXIT_FAILURE, ret_r = 0, i = 0; - sam_hdr_t *in_samhdr = NULL; - samFile *infile = NULL; - bam1_t *bamdata = NULL; - uint8_t *data = NULL; - kstring_t sdata = KS_INITIALIZE; - - //read_aux infile tag - if (argc != 3) { - print_usage(stderr); - goto end; - } - inname = argv[1]; - tag = argv[2]; - - if (!(bamdata = bam_init1())) { - printf("Failed to allocate data memory!\n"); - goto end; - } - - //open input file - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - - while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - errno = 0; i++; - ks_clear(&sdata); - if (i % 2) { //use options alternatively to demonstrate both - //option 1 - get data as string with tag and type - if ((c = bam_aux_get_str(bamdata, tag, &sdata)) == 1) { - printf("%s\n",sdata.s); - } - else if (c == 0 && errno == ENOENT) { - //tag not present - printf("Tag not present\n"); - } - else { - //error - printf("Failed to get tag\n"); - goto end; - } - } - else { - //option 2 - get raw data - if (!(data = bam_aux_get(bamdata, tag))) { - //tag data not returned, errono gives the reason - if (errno == ENOENT) { - printf("Tag not present\n"); - } - else { - printf("Invalid aux data\n"); - } - } - else { - //got the tag, read and print - if (printauxdata(stdout, bam_aux_type(data), -1, data) == EXIT_FAILURE) { - printf("Failed to read aux data\n"); - goto end; - } - printf("\n"); - } - } - } - if (ret_r < -1) { - //read error - printf("Failed to read data\n"); - goto end; - } - - ret = EXIT_SUCCESS; -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - ks_free(&sdata); - return ret; -} diff --git a/src/htslib-1.18/samples/read_bam.c b/src/htslib-1.18/samples/read_bam.c deleted file mode 100644 index 7fca8c5..0000000 --- a/src/htslib-1.18/samples/read_bam.c +++ /dev/null @@ -1,139 +0,0 @@ -/* read_bam.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: read_bam infile\n\ -Shows the alignment data from file\n"); -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *tidname = NULL, *flags = NULL; - int ret = EXIT_FAILURE; - sam_hdr_t *in_samhdr = NULL; - samFile *infile = NULL; - - int ret_r = 0, i = 0; - bam1_t *bamdata = NULL; - uint8_t *data = NULL; - uint32_t *cigar = NULL; - - - //read_bam infile - if (argc != 2) { - print_usage(stderr); - goto end; - } - inname = argv[1]; - - if (!(bamdata = bam_init1())) { - printf("Failed to allocate data memory!\n"); - goto end; - } - - //open input file - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - - while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) - { - //QNAME FLAG RNAME POS MAPQ CIGAR RNEXT PNEXT TLEN SEQ QUAL [TAG:TYPE:VALUE]… - printf("NAME: %s\n", bam_get_qname(bamdata)); //get the query name using the macro - flags = bam_flag2str(bamdata->core.flag); //flags as string - printf("FLG: %d - %s\n", bamdata->core.flag, flags); //flag is available in core structure - free((void*)flags); - tidname = sam_hdr_tid2name(in_samhdr, bamdata->core.tid); - printf("RNAME/TID: %d - %s\n", bamdata->core.tid, tidname? tidname: "" ); //retrieves the target name using the value in bam and by referring the header - printf("POS: %"PRIhts_pos"\n", bamdata->core.pos + 1); //internally position is 0 based and on text output / SAM it is 1 based - printf("MQUAL: %d\n", bamdata->core.qual); //map quality value - - cigar = bam_get_cigar(bamdata); //retrieves the cigar data - printf("CGR: "); - for (i = 0; i < bamdata->core.n_cigar; ++i) { //no. of cigar data entries - printf("%d%c", bam_cigar_oplen(cigar[i]), bam_cigar_opchr(cigar[i])); //the macros gives the count of operation and the symbol of operation for given cigar entry - } - printf("\nTLEN/ISIZE: %"PRIhts_pos"\n", bamdata->core.isize); - - data = bam_get_seq(bamdata); //get the sequence data - if (bamdata->core.l_qseq != bam_cigar2qlen(bamdata->core.n_cigar, cigar)) { //checks the length with CIGAR and query - printf("\nLength doesnt matches to cigar data\n"); - goto end; - } - - printf("SEQ: "); - for (i = 0; i < bamdata->core.l_qseq ; ++i) { //sequence length - printf("%c", seq_nt16_str[bam_seqi(data, i)]); //retrieves the base from (internal compressed) sequence data - } - printf("\nQUAL: "); - for (int i = 0; i < bamdata->core.l_qseq ; ++i) { - printf("%c", bam_get_qual(bamdata)[i]+33); //retrives the quality value - } - printf("\n\n"); - } - - if (ret_r == -1) { - // no error! - ret = EXIT_SUCCESS; - } - else { - printf("Failed to read data\n"); - } -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - return ret; -} diff --git a/src/htslib-1.18/samples/read_fast.c b/src/htslib-1.18/samples/read_fast.c deleted file mode 100644 index f74b255..0000000 --- a/src/htslib-1.18/samples/read_fast.c +++ /dev/null @@ -1,116 +0,0 @@ -/* read_fast.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - show flags_demo usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: read_fast \n\ -Reads the fasta/fastq file and shows the content.\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL; //input file name - int c = 0, ret = EXIT_FAILURE; - samFile *infile = NULL; //sam file - sam_hdr_t *in_samhdr = NULL; //header of file - bam1_t *bamdata = NULL; //to hold the read data - - if (argc != 2) { - print_usage(stdout); - goto end; - } - inname = argv[1]; - - //initialize - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - //open input files - r reading - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - if (infile->format.format != fasta_format && infile->format.format != fastq_format) { - printf("Invalid file specified\n"); - goto end; - } - - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf( "Failed to read header from file\n"); - goto end; - } - - //read data - while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - printf("\nsequence: "); - for (c = 0; c < bamdata->core.l_qseq; ++c) { - printf("%c", seq_nt16_str[bam_seqi(bam_get_seq(bamdata), c)]); - } - if (infile->format.format == fastq_format) { - printf("\nquality: "); - for (c = 0; c < bamdata->core.l_qseq; ++c) { - printf("%c", bam_get_qual(bamdata)[c]); - } - } - } - if (c != -1) { - //error - printf("Failed to get data\n"); - goto end; - } - //else -1 / EOF - ret = EXIT_SUCCESS; -end: - //clean up - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - return ret; -} diff --git a/src/htslib-1.18/samples/read_header.c b/src/htslib-1.18/samples/read_header.c deleted file mode 100644 index eb14dae..0000000 --- a/src/htslib-1.18/samples/read_header.c +++ /dev/null @@ -1,173 +0,0 @@ -/* read_header.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: read_header infile header [id val] [tag]\n\ -This shows given tag from given header or the whole line\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *header = NULL, *tag = NULL, *idval = NULL; - char *id = NULL; - int c = 0, ret = EXIT_FAILURE, linecnt = 0; - samFile *infile = NULL; - sam_hdr_t *in_samhdr = NULL; - kstring_t data = KS_INITIALIZE; - - //read_header infile header tag - if (argc < 3 || argc > 6) { - print_usage(stderr); - goto end; - } - inname = argv[1]; - header = argv[2]; - if (argc == 4) { //header and tag - tag = argv[3]; - //find unique identifier field name for requested header type - if (header[0] == 'H' && header[1] == 'D') { - id = NULL; - } - else if (header[0] == 'S' && header[1] == 'Q') { - id = "SN"; - } - else if (header[0] == 'R' && header[1] == 'G') { - id = "ID"; - } - else if (header[0] == 'P' && header[1] == 'G') { - id = "ID"; - } - else if (header[0] == 'C' && header[1] == 'O') { - id = ""; - } - else { - printf("Invalid header type\n"); - goto end; - } - } - else if (argc == 5) { //header id val - id = argv[3]; - idval = argv[4]; - } - else if (argc == 6) { //header id val tag - id = argv[3]; - idval = argv[4]; - tag = argv[5]; - } - - //open input files - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - - if (id && idval) { - if (tag) { - ret = sam_hdr_find_tag_id(in_samhdr, header, id, idval, tag, &data); - } - else { - ret = sam_hdr_find_line_id(in_samhdr, header, id, idval, &data); - } - - if (ret == 0) { - printf("%s\n", data.s); - } - else if (ret == -1) { - printf("No matching tag found\n"); - goto end; - } - else { - printf("Failed to find header line\n"); - goto end; - } - } - else { - //get count of given header type - linecnt = sam_hdr_count_lines(in_samhdr, header); - if (linecnt == 0) { - printf("No matching line found\n"); - goto end; - } - for (c = 0; c < linecnt; ++c ) { - if (tag) { - //non CO, get the tag requested - ret = sam_hdr_find_tag_pos(in_samhdr, header, c, tag, &data); - } - else { - //CO header, there are no tags but the whole line - ret = sam_hdr_find_line_pos(in_samhdr, header, c, &data); - } - - if (ret == 0) { - printf("%s\n", data.s); - continue; - } - else if (ret == -1) { - printf("Tag not present\n"); - continue; - } - else { - printf("Failed to get tag\n"); - goto end; - } - } - } - ret = EXIT_SUCCESS; - -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - ks_free(&data); - return ret; -} diff --git a/src/htslib-1.18/samples/read_refname.c b/src/htslib-1.18/samples/read_refname.c deleted file mode 100644 index adbc711..0000000 --- a/src/htslib-1.18/samples/read_refname.c +++ /dev/null @@ -1,125 +0,0 @@ -/* read_refname.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: read_refname infile minsize\n\ -This shows name of references which has length above the given size\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *id = NULL; - int c = 0, ret = EXIT_FAILURE, linecnt = 0, pos = 0; - samFile *infile = NULL; - sam_hdr_t *in_samhdr = NULL; - kstring_t data = KS_INITIALIZE; - int64_t minsize = 0, size = 0; - - if (argc != 3 && argc != 2) { - print_usage(stdout); - goto end; - } - inname = argv[1]; - if (argc == 3) { - minsize = atoll(argv[2]); - } - - //open input files - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - - linecnt = sam_hdr_count_lines(in_samhdr, "SQ"); //get reference count - if (linecnt <= 0) { - if (!linecnt) { - printf("No reference line present\n"); - } - else { - printf("Failed to get reference line count\n"); - } - goto end; - } - //iterate and check each reference's length - for (pos = 1, c = 0; c < linecnt; ++c) { - if ((ret = sam_hdr_find_tag_pos(in_samhdr, "SQ", c, "LN", &data) == -2)) { - printf("Failed to get length\n"); - goto end; - } - else if (ret == -1) { - //length not present, ignore - continue; - } - //else have length - size = atoll(data.s); - if (size < minsize) { - //not required - continue; - } - if (!(id = sam_hdr_line_name(in_samhdr, "SQ", c))) { //sam_hdr_find_tag_pos(in_samhdr, "SQ", c, "SN", &data) can also do the same! - printf("Failed to get id for reference data\n"); - goto end; - } - printf("%d,%s,%s\n", pos, id, data.s); - pos++; - } - - ret = EXIT_SUCCESS; - -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - ks_free(&data); - return ret; -} diff --git a/src/htslib-1.18/samples/rem_header.c b/src/htslib-1.18/samples/rem_header.c deleted file mode 100644 index a0b6510..0000000 --- a/src/htslib-1.18/samples/rem_header.c +++ /dev/null @@ -1,138 +0,0 @@ -/* rem_header.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: rem_header infile header [id]\n\ -Removes header line of given type and id\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *header = NULL, *idval = NULL; - char *id = NULL; - int ret = EXIT_FAILURE; - samFile *infile = NULL, *outfile = NULL; - sam_hdr_t *in_samhdr = NULL; - - //update_header infile header idval tag value - if (argc <3 || argc > 4) { - //3 & 4 are ok, 3-> all of given header type, 4->given id of given header type to be removed - print_usage(stderr); - goto end; - } - inname = argv[1]; - header = argv[2]; - if (argc == 4) { - idval = argv[3]; - } - - //unique identifier for each of the header types - if (header[0] == 'H' && header[1] == 'D') { - id = NULL; - } - else if (header[0] == 'S' && header[1] == 'Q') { - id = "SN"; - } - else if (header[0] == 'R' && header[1] == 'G') { - id = "ID"; - } - else if (header[0] == 'P' && header[1] == 'G') { - id = "ID"; - } - else if (header[0] == 'C' && header[1] == 'O') { - //CO field can be removed using the position of it using sam_hdr_remove_line_pos - id = ""; - } - else { - printf("Invalid header type\n"); - goto end; - } - - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - if (!(outfile = sam_open("-", "w"))) { //use stdout as the output file for ease of display of update - printf("Could not open stdout\n"); - goto end; - } - - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - if (idval) { - //remove specific line - if (sam_hdr_remove_line_id(in_samhdr, header, id, idval)) { - printf("Failed to remove header line\n"); - goto end; - } - } - else { - //remove multiple lines of a header type - if (sam_hdr_remove_lines(in_samhdr, header, id, NULL)) { - printf("Failed to remove header line\n"); - goto end; - } - } - //write output - if (sam_hdr_write(outfile, in_samhdr) < 0) { - printf("Failed to write output\n"); - goto end; - } - ret = EXIT_SUCCESS; - //bam data write to follow.... -end: - //cleanupq - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (outfile) { - sam_close(outfile); - } - return ret; -} diff --git a/src/htslib-1.18/samples/sample.sam b/src/htslib-1.18/samples/sample.sam deleted file mode 100644 index e56efd6..0000000 --- a/src/htslib-1.18/samples/sample.sam +++ /dev/null @@ -1,29 +0,0 @@ -@HD VN:1.17 SO:unknown -@SQ SN:T1 LN:40 -@SQ SN:T2 LN:40 -@CO @SQ SN* LN* AH AN AS DS M5 SP TP UR -@CO @RG ID* BC CN DS DT FO KS LB PG PI PL PM PU SM -@CO @PG ID* PN CL PP DS VN -@CO this is a dummy alignment file to demonstrate different abilities of hts apis -@CO QNAME FLAG RNAME POS MAPQ CIGAR RNEXT PNEXT TLEN SEQ QUAL [TAG:TYPE:VALUE]… -@CO 1234567890123456789012345678901234567890 -@CO AAAAACTGAAAACCCCTTTTGGGGACTGTTAACAGTTTTT T1 -@CO TTTTCCCCACTGAAAACCCCTTTTGGGGACTGTTAACAGT T2 -@CO ITR1-ITR2M, ITR2-ITR2M are proper pairs in T1 and T2, UNMP1 is partly mapped and pair is unmapped, UNMP2 & 3 are unmappped -@CO A1-A2, A4-A3 are proper pairs with A4-A3 in different read order. A5 is secondary alignment -ITR1 99 T1 5 40 4M = 33 10 ACTG ()() -ITR2 147 T2 23 49 2M = 35 -10 TT ** -ITR2M 99 T2 35 51 2M = 23 10 AA && -ITR1M 147 T1 33 37 4M = 5 -10 ACTG $$$$ -UNMP1 73 T1 21 40 3M * 0 5 GGG &&1 -UNMP2 141 * 0 0 * * 0 7 AA && -UNMP3 77 * 0 0 * * 0 5 GGG &&2 -A1 99 T1 25 35 6M = 31 8 ACTGTT ****** -A2 147 T1 31 33 6M = 25 -8 ACTGTT ()()() -A3 147 T2 23 47 2M1X = 12 -5 TTG ((( -A4 99 T2 12 50 3M = 23 5 GAA ()( -A5 355 T1 25 35 4M = 33 5 ACTG PPPP -B1 99 T1 25 35 6M = 31 8 GCTATT ****** -B3 147 T2 23 47 2M1X = 12 -5 TAG ((( -B4 99 T2 12 50 3M = 23 5 GAT ()( -B5 355 T1 25 35 4M = 33 5 AGTG PPPP diff --git a/src/htslib-1.18/samples/split.c b/src/htslib-1.18/samples/split.c deleted file mode 100644 index 2eb9e6b..0000000 --- a/src/htslib-1.18/samples/split.c +++ /dev/null @@ -1,153 +0,0 @@ -/* split.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: split infile outdir\n\ -Splits the input file alignments to read1 and read2 and saves as 1.sam and 2.bam in given directory\n\ -Shows the basic writing of output\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *outdir = NULL; - char *file1 = NULL, *file2 = NULL; - int c = 0, ret = EXIT_FAILURE, size = 0; - samFile *infile = NULL, *outfile1 = NULL, *outfile2 = NULL; - sam_hdr_t *in_samhdr = NULL; - bam1_t *bamdata = NULL; - - if (argc != 3) { - print_usage(stdout); - goto end; - } - inname = argv[1]; - outdir = argv[2]; - - //allocate space for output - size = sizeof(char) * (strlen(outdir) + sizeof("/1.sam") + 1); //space for output file name and null termination - file1 = malloc(size); - file2 = malloc(size); - if (!file1 || !file2) { - printf("Failed to set output path\n"); - goto end; - } - - //output file names - snprintf(file1, size, "%s/1.sam", outdir); //for SAM output - snprintf(file2, size, "%s/2.bam", outdir); //for BAM output - //bam data storage - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - //open input file - r reading - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - //open output files - w write as SAM, wb write as BAM - outfile1 = sam_open(file1, "w"); //as SAM - outfile2 = sam_open(file2, "wb"); //as BAM - if (!outfile1 || !outfile2) { - printf("Could not open output file\n"); - goto end; - } - - //read header, required to resolve the target names to proper ids - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - //write header - if ((sam_hdr_write(outfile1, in_samhdr) == -1) || (sam_hdr_write(outfile2, in_samhdr) == -1)) { - printf("Failed to write header\n"); - goto end; - } - - //check flags and write - while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - if (bamdata->core.flag & BAM_FREAD1) { - if (sam_write1(outfile1, in_samhdr, bamdata) < 0) { - printf("Failed to write output data\n"); - goto end; - } - } - else if (bamdata->core.flag & BAM_FREAD2) { - if (sam_write1(outfile2, in_samhdr, bamdata) < 0) { - printf("Failed to write output data\n"); - goto end; - } - } - } - if (-1 == c) { - //EOF - ret = EXIT_SUCCESS; - } - else { - printf("Error in reading data\n"); - } -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - if (file1) { - free(file1); - } - if (file2) { - free(file2); - } - if (outfile1) { - sam_close(outfile1); - } - if (outfile2) { - sam_close(outfile2); - } - return ret; -} diff --git a/src/htslib-1.18/samples/split2.c b/src/htslib-1.18/samples/split2.c deleted file mode 100644 index 2354abf..0000000 --- a/src/htslib-1.18/samples/split2.c +++ /dev/null @@ -1,158 +0,0 @@ -/* split2.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: split infile outdir\n\ -Splits the input file alignments to read1 and read2 and saves as 1.sam and 2.bam in given directory\n\ -Shows file type selection through name and format api\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *outdir = NULL; - char *file1 = NULL, *file2 = NULL, mode1[5] = "w", mode2[5] = "w"; - int c = 0, ret = EXIT_FAILURE, size = 0; - samFile *infile = NULL, *outfile1 = NULL, *outfile2 = NULL; - sam_hdr_t *in_samhdr = NULL; - bam1_t *bamdata = NULL; - - if (argc != 3) { - print_usage(stdout); - goto end; - } - inname = argv[1]; - outdir = argv[2]; - - //allocate space for output - size = sizeof(char) * (strlen(outdir) + sizeof("/1.sam.gz") + 1); //space for output file name and null termination - file1 = malloc(size); - file2 = malloc(size); - if (!file1 || !file2) { - printf("Failed to set output path\n"); - goto end; - } - - //output file names - snprintf(file1, size, "%s/1.sam.gz", outdir); //name of Read1 file - snprintf(file2, size, "%s/2.sam", outdir); //name of Read2 file - //bam data storage - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - //set file open mode based on file name for 1st and as explicit for 2nd - if ((sam_open_mode(mode1+1, file1, NULL) == -1) || (sam_open_mode(mode2+1, file2, "sam.gz") == -1)) { - printf("Failed to set open mode\n"); - goto end; - } - //open input file - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - //open output files - outfile1 = sam_open(file1, mode1); //as compressed SAM through sam_open - outfile2 = sam_open_format(file2, mode2, NULL); //as compressed SAM through sam_open_format - if (!outfile1 || !outfile2) { - printf("Could not open output file\n"); - goto end; - } - - //read header, required to resolve the target names to proper ids - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - //write header - if ((sam_hdr_write(outfile1, in_samhdr) == -1) || (sam_hdr_write(outfile2, in_samhdr) == -1)) { - printf("Failed to write header\n"); - goto end; - } - - //check flags and write - while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - if (bamdata->core.flag & BAM_FREAD1) { - if (sam_write1(outfile1, in_samhdr, bamdata) < 0) { - printf("Failed to write output data\n"); - goto end; - } - } - else if (bamdata->core.flag & BAM_FREAD2) { - if (sam_write1(outfile2, in_samhdr, bamdata) < 0) { - printf("Failed to write output data\n"); - goto end; - } - } - } - if (-1 == c) { - //EOF - ret = EXIT_SUCCESS; - } - else { - printf("Error in reading data\n"); - } -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - if (file1) { - free(file1); - } - if (file2) { - free(file2); - } - if (outfile1) { - sam_close(outfile1); - } - if (outfile2) { - sam_close(outfile2); - } - return ret; -} diff --git a/src/htslib-1.18/samples/split_thread1.c b/src/htslib-1.18/samples/split_thread1.c deleted file mode 100644 index 40d2dfd..0000000 --- a/src/htslib-1.18/samples/split_thread1.c +++ /dev/null @@ -1,161 +0,0 @@ -/* split_thread1.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: split_t1 infile outdir\n\ -Splits the input file alignments to read1 and read2 and saves as 1.sam and 2.bam in given directory\n\ -Shows the usage of basic thread in htslib\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *outdir = NULL; - char *file1 = NULL, *file2 = NULL; - int c = 0, ret = EXIT_FAILURE, size = 0; - samFile *infile = NULL, *outfile1 = NULL, *outfile2 = NULL; - sam_hdr_t *in_samhdr = NULL; - bam1_t *bamdata = NULL; - - if (argc != 3) { - print_usage(stdout); - goto end; - } - inname = argv[1]; - outdir = argv[2]; - - //allocate space for output - size = sizeof(char) * (strlen(outdir) + sizeof("/1.sam") + 1); //space for output file name and null termination - file1 = malloc(size); - file2 = malloc(size); - if (!file1 || !file2) { - printf("Failed to set output path\n"); - goto end; - } - - //output file names - snprintf(file1, size, "%s/1.sam", outdir); //for SAM output - snprintf(file2, size, "%s/2.bam", outdir); //for BAM output - //bam data storage - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - //open input file - r reading - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - //open output files - w write as SAM, wb write as BAM - outfile1 = sam_open(file1, "w"); //as SAM - outfile2 = sam_open(file2, "wb"); //as BAM - if (!outfile1 || !outfile2) { - printf("Could not open output file\n"); - goto end; - } - - //create file specific threads - if (hts_set_opt(infile, HTS_OPT_NTHREADS, 2) < 0 || //2 thread specific for reading - hts_set_opt(outfile1, HTS_OPT_NTHREADS, 1) < 0 || //1 thread specific for sam write - hts_set_opt(outfile2, HTS_OPT_NTHREADS, 1) < 0) { //1 thread specific for bam write - printf("Failed to set thread options\n"); - goto end; - } - - //read header, required to resolve the target names to proper ids - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - //write header - if ((sam_hdr_write(outfile1, in_samhdr) == -1) || (sam_hdr_write(outfile2, in_samhdr) == -1)) { - printf("Failed to write header\n"); - goto end; - } - - //check flags and write - while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - if (bamdata->core.flag & BAM_FREAD1) { - if (sam_write1(outfile1, in_samhdr, bamdata) < 0) { - printf("Failed to write output data\n"); - goto end; - } - } - else if (bamdata->core.flag & BAM_FREAD2) { - if (sam_write1(outfile2, in_samhdr, bamdata) < 0) { - printf("Failed to write output data\n"); - goto end; - } - } - } - if (-1 == c) { - //EOF - ret = EXIT_SUCCESS; - } - else { - printf("Error in reading data\n"); - } -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - if (file1) { - free(file1); - } - if (file2) { - free(file2); - } - if (outfile1) { - sam_close(outfile1); - } - if (outfile2) { - sam_close(outfile2); - } - return ret; -} diff --git a/src/htslib-1.18/samples/split_thread2.c b/src/htslib-1.18/samples/split_thread2.c deleted file mode 100644 index dab897b..0000000 --- a/src/htslib-1.18/samples/split_thread2.c +++ /dev/null @@ -1,171 +0,0 @@ -/* split_thread2.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: split_t2 infile outdir\n\ -Splits the input file alignments to read1 and read2 and saves as 1.sam and 2.bam in given directory\n\ -Shows the usage of thread pool\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *outdir = NULL; - char *file1 = NULL, *file2 = NULL; - int c = 0, ret = EXIT_FAILURE, size = 0; - samFile *infile = NULL, *outfile1 = NULL, *outfile2 = NULL; - sam_hdr_t *in_samhdr = NULL; - bam1_t *bamdata = NULL; - htsThreadPool tpool = {NULL, 0}; - - if (argc != 3) { - print_usage(stdout); - goto end; - } - inname = argv[1]; - outdir = argv[2]; - - //allocate space for output - size = sizeof(char) * (strlen(outdir) + sizeof("/1.sam") + 1); //space for output file name and null termination - file1 = malloc(size); - file2 = malloc(size); - if (!file1 || !file2) { - printf("Failed to set output path\n"); - goto end; - } - - //output file names - snprintf(file1, size, "%s/1.sam", outdir); //for SAM output - snprintf(file2, size, "%s/2.bam", outdir); //for BAM output - //bam data storage - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - //open input file - r reading - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - //open output files - w write as SAM, wb write as BAM - outfile1 = sam_open(file1, "w"); //as SAM - outfile2 = sam_open(file2, "wb"); //as BAM - if (!outfile1 || !outfile2) { - printf("Could not open output file\n"); - goto end; - } - - //create a pool of 4 threads - if (!(tpool.pool = hts_tpool_init(4))) { - printf("Failed to initialize the thread pool\n"); - goto end; - } - //share the pool with all the 3 files - if (hts_set_opt(infile, HTS_OPT_THREAD_POOL, &tpool) < 0 || - hts_set_opt(outfile1, HTS_OPT_THREAD_POOL, &tpool) < 0 || - hts_set_opt(outfile2, HTS_OPT_THREAD_POOL, &tpool) < 0) { - printf("Failed to set thread options\n"); - goto end; - } - - //read header, required to resolve the target names to proper ids - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - //write header - if ((sam_hdr_write(outfile1, in_samhdr) == -1) || (sam_hdr_write(outfile2, in_samhdr) == -1)) { - printf("Failed to write header\n"); - goto end; - } - - //check flags and write - while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - if (bamdata->core.flag & BAM_FREAD1) { - if (sam_write1(outfile1, in_samhdr, bamdata) < 0) { - printf("Failed to write output data\n"); - goto end; - } - } - else if (bamdata->core.flag & BAM_FREAD2) { - if (sam_write1(outfile2, in_samhdr, bamdata) < 0) { - printf("Failed to write output data\n"); - goto end; - } - } - } - if (-1 == c) { - //EOF - ret = EXIT_SUCCESS; - } - else { - printf("Error in reading data\n"); - } -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - if (file1) { - free(file1); - } - if (file2) { - free(file2); - } - if (outfile1) { - sam_close(outfile1); - } - if (outfile2) { - sam_close(outfile2); - } - if (tpool.pool) { - hts_tpool_destroy(tpool.pool); - } - return ret; -} diff --git a/src/htslib-1.18/samples/update_header.c b/src/htslib-1.18/samples/update_header.c deleted file mode 100644 index f6b1680..0000000 --- a/src/htslib-1.18/samples/update_header.c +++ /dev/null @@ -1,131 +0,0 @@ -/* update_header.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: update_header infile header idval tag value\n\ -Updates the tag's value on line given in id on header of given type\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *tag = NULL, *idval = NULL, *val = NULL, *header = NULL; - char *id = NULL; - int ret = EXIT_FAILURE; - samFile *infile = NULL, *outfile = NULL; - sam_hdr_t *in_samhdr = NULL; - - //update_header infile header idval tag value - if (argc != 6) { - print_usage(stderr); - goto end; - } - inname = argv[1]; - header = argv[2]; - idval = argv[3]; - tag = argv[4]; - val = argv[5]; - - //unique identifier for each of the header types - if (header[0] == 'H' && header[1] == 'D') { - id = NULL; - printf("This sample doesnt not support modifying HD fields\n"); - } - else if (header[0] == 'S' && header[1] == 'Q') { - id = "SN"; - } - else if (header[0] == 'R' && header[1] == 'G') { - id = "ID"; - } - else if (header[0] == 'P' && header[1] == 'G') { - id = "ID"; - } - else if (header[0] == 'C' && header[1] == 'O') { - tag = NULL; - id = ""; - printf("This sample doesnt not support modifying CO fields\n"); - } - else { - printf("Invalid header type\n"); - goto end; - } - - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - if (!(outfile = sam_open("-", "w"))) { //use stdout as the output file for ease of display of update - printf("Could not open stdout\n"); - goto end; - } - - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - - //update with new data - if (sam_hdr_update_line(in_samhdr, header, id, idval, tag, val, NULL) < 0) { - printf("Failed to update data\n"); - goto end; - } - //write output - if (sam_hdr_write(outfile, in_samhdr) < 0) { - printf("Failed to write output\n"); - goto end; - } - ret = EXIT_SUCCESS; - //bam data write to follow.... -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (outfile) { - sam_close(outfile); - } - return ret; -} diff --git a/src/htslib-1.18/samples/write_fast.c b/src/htslib-1.18/samples/write_fast.c deleted file mode 100644 index ef78176..0000000 --- a/src/htslib-1.18/samples/write_fast.c +++ /dev/null @@ -1,101 +0,0 @@ -/* write_fast.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - show flags_demo usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: write_fast \n\ -Appends a fasta/fastq file.\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *outname = NULL; //output file name - int ret = EXIT_FAILURE; - samFile *outfile = NULL; //sam file - sam_hdr_t *out_samhdr = NULL; //header of file - bam1_t *bamdata = NULL; //to hold the read data - char mode[4] = "a"; - - if (argc != 2) { - print_usage(stdout); - goto end; - } - outname = argv[1]; - - //initialize - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - if (sam_open_mode(mode + 1, outname, NULL) < 0) { - printf("Invalid file name\n"); - goto end; - } - //open output file - if (!(outfile = sam_open(outname, mode))) { - printf("Could not open %s\n", outname); - goto end; - } - //dummy data - if (bam_set1(bamdata, sizeof("test"), "test", BAM_FUNMAP, -1, -1, 0, 0, NULL, -1, -1, 0, 10, "AACTGACTGA", "1234567890", 0) < 0) { - printf("Failed to set data\n"); - goto end; - } - if (sam_write1(outfile, out_samhdr, bamdata) < 0) { - printf("Failed to write data\n"); - goto end; - } - - ret = EXIT_SUCCESS; -end: - //clean up - if (out_samhdr) { - sam_hdr_destroy(out_samhdr); - } - if (outfile) { - sam_close(outfile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - return ret; -} diff --git a/src/htslib-1.18/synced_bcf_reader.c b/src/htslib-1.18/synced_bcf_reader.c deleted file mode 100644 index a43ab15..0000000 --- a/src/htslib-1.18/synced_bcf_reader.c +++ /dev/null @@ -1,1500 +0,0 @@ -/* synced_bcf_reader.c -- stream through multiple VCF files. - - Copyright (C) 2012-2023 Genome Research Ltd. - - Author: Petr Danecek - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "htslib/synced_bcf_reader.h" -#include "htslib/kseq.h" -#include "htslib/khash_str2int.h" -#include "htslib/bgzf.h" -#include "htslib/thread_pool.h" -#include "bcf_sr_sort.h" - -#define REQUIRE_IDX_ 1 -#define ALLOW_NO_IDX_ 2 - -// Maximum indexable coordinate of .csi, for default min_shift of 14. -// This comes out to about 17 Tbp. Limiting factor is the bin number, -// which is a uint32_t in CSI. The highest number of levels compatible -// with this is 10 (needs 31 bits). -#define MAX_CSI_COOR ((1LL << (14 + 30)) - 1) - -typedef struct -{ - hts_pos_t start, end; // records are marked for skipping have start>end -} -region1_t; - -typedef struct bcf_sr_region_t -{ - region1_t *regs; // regions will sorted and merged, redundant records marked for skipping have start>end - int nregs, mregs, creg; // creg: the current active region -} -region_t; - -#define BCF_SR_AUX(x) ((aux_t*)((x)->aux)) -typedef struct -{ - sr_sort_t sort; - int regions_overlap, targets_overlap; -} -aux_t; - -static int _regions_add(bcf_sr_regions_t *reg, const char *chr, hts_pos_t start, hts_pos_t end); -static bcf_sr_regions_t *_regions_init_string(const char *str); -static int _regions_match_alleles(bcf_sr_regions_t *reg, int als_idx, bcf1_t *rec); -static void _regions_sort_and_merge(bcf_sr_regions_t *reg); -static int _bcf_sr_regions_overlap(bcf_sr_regions_t *reg, const char *seq, hts_pos_t start, hts_pos_t end, int missed_reg_handler); -static void bcf_sr_seek_start(bcf_srs_t *readers); - -char *bcf_sr_strerror(int errnum) -{ - switch (errnum) - { - case open_failed: - return strerror(errno); - case not_bgzf: - return "not compressed with bgzip"; - case idx_load_failed: - return "could not load index"; - case file_type_error: - return "unknown file type"; - case api_usage_error: - return "API usage error"; - case header_error: - return "could not parse header"; - case no_eof: - return "no BGZF EOF marker; file may be truncated"; - case no_memory: - return "Out of memory"; - case vcf_parse_error: - return "VCF parse error"; - case bcf_read_error: - return "BCF read error"; - case noidx_error: - return "merge of unindexed files failed"; - default: return ""; - } -} - -int bcf_sr_set_opt(bcf_srs_t *readers, bcf_sr_opt_t opt, ...) -{ - va_list args; - switch (opt) - { - case BCF_SR_REQUIRE_IDX: - readers->require_index = REQUIRE_IDX_; - return 0; - - case BCF_SR_ALLOW_NO_IDX: - readers->require_index = ALLOW_NO_IDX_; - return 0; - - case BCF_SR_PAIR_LOGIC: - va_start(args, opt); - BCF_SR_AUX(readers)->sort.pair = va_arg(args, int); - return 0; - - case BCF_SR_REGIONS_OVERLAP: - va_start(args, opt); - BCF_SR_AUX(readers)->regions_overlap = va_arg(args, int); - if ( readers->regions ) readers->regions->overlap = BCF_SR_AUX(readers)->regions_overlap; - return 0; - - case BCF_SR_TARGETS_OVERLAP: - va_start(args, opt); - BCF_SR_AUX(readers)->targets_overlap = va_arg(args, int); - if ( readers->targets ) readers->targets->overlap = BCF_SR_AUX(readers)->targets_overlap; - return 0; - - default: - break; - } - return 1; -} - -static int *init_filters(bcf_hdr_t *hdr, const char *filters, int *nfilters) -{ - kstring_t str = {0,0,0}; - const char *tmp = filters, *prev = filters; - int nout = 0, *out = NULL; - while ( 1 ) - { - if ( *tmp==',' || !*tmp ) - { - int *otmp = (int*) realloc(out, (nout+1)*sizeof(int)); - if (!otmp) - goto err; - out = otmp; - if ( tmp-prev==1 && *prev=='.' ) - { - out[nout] = -1; - nout++; - } - else - { - str.l = 0; - kputsn(prev, tmp-prev, &str); - out[nout] = bcf_hdr_id2int(hdr, BCF_DT_ID, str.s); - if ( out[nout]>=0 ) nout++; - } - if ( !*tmp ) break; - prev = tmp+1; - } - tmp++; - } - if ( str.m ) free(str.s); - *nfilters = nout; - return out; - - err: - if (str.m) free(str.s); - free(out); - return NULL; -} - -int bcf_sr_set_regions(bcf_srs_t *readers, const char *regions, int is_file) -{ - if ( readers->nreaders || readers->regions ) - { - if ( readers->regions ) bcf_sr_regions_destroy(readers->regions); - readers->regions = bcf_sr_regions_init(regions,is_file,0,1,-2); - bcf_sr_seek_start(readers); - return 0; - } - - readers->regions = bcf_sr_regions_init(regions,is_file,0,1,-2); - if ( !readers->regions ) return -1; - readers->explicit_regs = 1; - readers->require_index = REQUIRE_IDX_; - readers->regions->overlap = BCF_SR_AUX(readers)->regions_overlap; - return 0; -} - -int bcf_sr_set_targets(bcf_srs_t *readers, const char *targets, int is_file, int alleles) -{ - if ( readers->nreaders || readers->targets ) - { - hts_log_error("Must call bcf_sr_set_targets() before bcf_sr_add_reader()"); - return -1; - } - if ( targets[0]=='^' ) - { - readers->targets_exclude = 1; - targets++; - } - readers->targets = bcf_sr_regions_init(targets,is_file,0,1,-2); - if ( !readers->targets ) return -1; - readers->targets_als = alleles; - readers->targets->overlap = BCF_SR_AUX(readers)->targets_overlap; - return 0; -} - -int bcf_sr_set_threads(bcf_srs_t *files, int n_threads) -{ - if (!(files->n_threads = n_threads)) - return 0; - - files->p = calloc(1, sizeof(*files->p)); - if (!files->p) { - files->errnum = no_memory; - return -1; - } - if (!(files->p->pool = hts_tpool_init(n_threads))) - return -1; - - return 0; -} - -void bcf_sr_destroy_threads(bcf_srs_t *files) { - if (!files->p) - return; - - if (files->p->pool) - hts_tpool_destroy(files->p->pool); - free(files->p); -} - -int bcf_sr_add_reader(bcf_srs_t *files, const char *fname) -{ - char fmode[5]; - strcpy(fmode, "r"); - vcf_open_mode(fmode+1, fname, NULL); - htsFile* file_ptr = hts_open(fname, fmode); - if ( ! file_ptr ) { - files->errnum = open_failed; - return 0; - } - - files->has_line = (int*) realloc(files->has_line, sizeof(int)*(files->nreaders+1)); - files->has_line[files->nreaders] = 0; - files->readers = (bcf_sr_t*) realloc(files->readers, sizeof(bcf_sr_t)*(files->nreaders+1)); - bcf_sr_t *reader = &files->readers[files->nreaders++]; - memset(reader,0,sizeof(bcf_sr_t)); - - reader->file = file_ptr; - - files->errnum = 0; - - if ( reader->file->format.compression==bgzf ) - { - BGZF *bgzf = hts_get_bgzfp(reader->file); - if ( bgzf && bgzf_check_EOF(bgzf) == 0 ) { - files->errnum = no_eof; - hts_log_warning("No BGZF EOF marker; file '%s' may be truncated", fname); - } - if (files->p) - bgzf_thread_pool(bgzf, files->p->pool, files->p->qsize); - } - - if ( files->require_index==REQUIRE_IDX_ ) - { - if ( reader->file->format.format==vcf ) - { - if ( reader->file->format.compression!=bgzf ) - { - files->errnum = not_bgzf; - return 0; - } - - reader->tbx_idx = tbx_index_load(fname); - if ( !reader->tbx_idx ) - { - files->errnum = idx_load_failed; - return 0; - } - - reader->header = bcf_hdr_read(reader->file); - } - else if ( reader->file->format.format==bcf ) - { - if ( reader->file->format.compression!=bgzf ) - { - files->errnum = not_bgzf; - return 0; - } - - reader->header = bcf_hdr_read(reader->file); - - reader->bcf_idx = bcf_index_load(fname); - if ( !reader->bcf_idx ) - { - files->errnum = idx_load_failed; - return 0; - } - } - else - { - files->errnum = file_type_error; - return 0; - } - } - else - { - if ( reader->file->format.format==bcf || reader->file->format.format==vcf ) - { - reader->header = bcf_hdr_read(reader->file); - } - else - { - files->errnum = file_type_error; - return 0; - } - files->streaming = 1; - } - if ( files->streaming && files->nreaders>1 ) - { - static int no_index_warned = 0; - if ( files->require_index==ALLOW_NO_IDX_ && !no_index_warned ) - { - hts_log_warning("Using multiple unindexed files may produce errors, make sure chromosomes are in the same order!"); - no_index_warned = 1; - } - if ( files->require_index!=ALLOW_NO_IDX_ ) - { - files->errnum = api_usage_error; - hts_log_error("Must set require_index when the number of readers is greater than one"); - return 0; - } - } - if ( files->streaming && files->regions ) - { - files->errnum = api_usage_error; - hts_log_error("Cannot tabix-jump in streaming mode"); - return 0; - } - if ( !reader->header ) - { - files->errnum = header_error; - return 0; - } - - reader->fname = strdup(fname); - if ( files->apply_filters ) - reader->filter_ids = init_filters(reader->header, files->apply_filters, &reader->nfilter_ids); - - // Update list of chromosomes - if ( !files->explicit_regs && !files->streaming ) - { - int n = 0, i; - const char **names = reader->tbx_idx ? tbx_seqnames(reader->tbx_idx, &n) : bcf_hdr_seqnames(reader->header, &n); - for (i=0; iregions ) - files->regions = _regions_init_string(names[i]); - else - _regions_add(files->regions, names[i], -1, -1); - } - free(names); - _regions_sort_and_merge(files->regions); - } - - if ( files->require_index==ALLOW_NO_IDX_ && files->nreaders > 1 ) - { - bcf_hdr_t *hdr0 = files->readers[0].header; - bcf_hdr_t *hdr1 = reader->header; - if ( hdr0->n[BCF_DT_CTG]!=hdr1->n[BCF_DT_CTG] ) - { - files->errnum = noidx_error; - hts_log_error("Different number of sequences in the header, refusing to stream multiple unindexed files"); - return 0; - } - int i; - for (i=0; in[BCF_DT_CTG]; i++) - { - if ( strcmp(bcf_hdr_id2name(hdr0,i),bcf_hdr_id2name(hdr1,i)) ) - { - files->errnum = noidx_error; - hts_log_error("Sequences in the header appear in different order, refusing to stream multiple unindexed files"); - return 0; - } - } - } - - return 1; -} - -bcf_srs_t *bcf_sr_init(void) -{ - bcf_srs_t *files = (bcf_srs_t*) calloc(1,sizeof(bcf_srs_t)); - files->aux = (aux_t*) calloc(1,sizeof(aux_t)); - bcf_sr_sort_init(&BCF_SR_AUX(files)->sort); - bcf_sr_set_opt(files,BCF_SR_REGIONS_OVERLAP,1); - bcf_sr_set_opt(files,BCF_SR_TARGETS_OVERLAP,0); - return files; -} - -static void bcf_sr_destroy1(bcf_sr_t *reader) -{ - free(reader->fname); - if ( reader->tbx_idx ) tbx_destroy(reader->tbx_idx); - if ( reader->bcf_idx ) hts_idx_destroy(reader->bcf_idx); - bcf_hdr_destroy(reader->header); - hts_close(reader->file); - if ( reader->itr ) tbx_itr_destroy(reader->itr); - int j; - for (j=0; jmbuffer; j++) - bcf_destroy1(reader->buffer[j]); - free(reader->buffer); - free(reader->samples); - free(reader->filter_ids); -} - -void bcf_sr_destroy(bcf_srs_t *files) -{ - int i; - for (i=0; inreaders; i++) - bcf_sr_destroy1(&files->readers[i]); - free(files->has_line); - free(files->readers); - for (i=0; in_smpl; i++) free(files->samples[i]); - free(files->samples); - if (files->targets) bcf_sr_regions_destroy(files->targets); - if (files->regions) bcf_sr_regions_destroy(files->regions); - if (files->tmps.m) free(files->tmps.s); - if (files->n_threads) bcf_sr_destroy_threads(files); - bcf_sr_sort_destroy(&BCF_SR_AUX(files)->sort); - free(files->aux); - free(files); -} - -void bcf_sr_remove_reader(bcf_srs_t *files, int i) -{ - assert( !files->samples ); // not ready for this yet - bcf_sr_sort_remove_reader(files, &BCF_SR_AUX(files)->sort, i); - bcf_sr_destroy1(&files->readers[i]); - if ( i+1 < files->nreaders ) - { - memmove(&files->readers[i], &files->readers[i+1], (files->nreaders-i-1)*sizeof(bcf_sr_t)); - memmove(&files->has_line[i], &files->has_line[i+1], (files->nreaders-i-1)*sizeof(int)); - } - files->nreaders--; -} - -#if DEBUG_SYNCED_READER -void debug_buffer(FILE *fp, bcf_sr_t *reader) -{ - int j; - for (j=0; j<=reader->nbuffer; j++) - { - bcf1_t *line = reader->buffer[j]; - fprintf(fp,"\t%p\t%s%s\t%s:%"PRIhts_pos"\t%s ", (void*)line,reader->fname,j==0?"*":" ",reader->header->id[BCF_DT_CTG][line->rid].key,line->pos+1,line->n_allele?line->d.allele[0]:""); - int k; - for (k=1; kn_allele; k++) fprintf(fp," %s", line->d.allele[k]); - fprintf(fp,"\n"); - } -} - -void debug_buffers(FILE *fp, bcf_srs_t *files) -{ - int i; - for (i=0; inreaders; i++) - { - fprintf(fp, "has_line: %d\t%s\n", bcf_sr_has_line(files,i),files->readers[i].fname); - debug_buffer(fp, &files->readers[i]); - } - fprintf(fp,"\n"); -} -#endif - -static inline int has_filter(bcf_sr_t *reader, bcf1_t *line) -{ - int i, j; - if ( !line->d.n_flt ) - { - for (j=0; jnfilter_ids; j++) - if ( reader->filter_ids[j]<0 ) return 1; - return 0; - } - for (i=0; id.n_flt; i++) - { - for (j=0; jnfilter_ids; j++) - if ( line->d.flt[i]==reader->filter_ids[j] ) return 1; - } - return 0; -} - -static int _reader_seek(bcf_sr_t *reader, const char *seq, hts_pos_t start, hts_pos_t end) -{ - if ( end>=MAX_CSI_COOR ) - { - hts_log_error("The coordinate is out of csi index limit: %"PRIhts_pos, end+1); - exit(1); - } - if ( reader->itr ) - { - hts_itr_destroy(reader->itr); - reader->itr = NULL; - } - reader->nbuffer = 0; - if ( reader->tbx_idx ) - { - int tid = tbx_name2id(reader->tbx_idx, seq); - if ( tid==-1 ) return -1; // the sequence not present in this file - reader->itr = tbx_itr_queryi(reader->tbx_idx,tid,start,end+1); - } - else - { - int tid = bcf_hdr_name2id(reader->header, seq); - if ( tid==-1 ) return -1; // the sequence not present in this file - reader->itr = bcf_itr_queryi(reader->bcf_idx,tid,start,end+1); - } - if (!reader->itr) { - hts_log_error("Could not seek: %s:%"PRIhts_pos"-%"PRIhts_pos, seq, start + 1, end + 1); - assert(0); - } - return 0; -} - -/* - * _readers_next_region() - jumps to next region if necessary - * Returns 0 on success or -1 when there are no more regions left - */ -static int _readers_next_region(bcf_srs_t *files) -{ - // Need to open new chromosome? Check number of lines in all readers' buffers - int i, eos = 0; - for (i=0; inreaders; i++) - if ( !files->readers[i].itr && !files->readers[i].nbuffer ) eos++; - - if ( eos!=files->nreaders ) - { - // Some of the readers still has buffered lines - return 0; - } - - // No lines in the buffer, need to open new region or quit. - int prev_iseq = files->regions->iseq; - hts_pos_t prev_end = files->regions->end; - if ( bcf_sr_regions_next(files->regions)<0 ) return -1; - files->regions->prev_end = prev_iseq==files->regions->iseq ? prev_end : -1; - - for (i=0; inreaders; i++) - _reader_seek(&files->readers[i],files->regions->seq_names[files->regions->iseq],files->regions->start,files->regions->end); - - return 0; -} - -static void _set_variant_boundaries(bcf1_t *rec, hts_pos_t *beg, hts_pos_t *end) -{ - hts_pos_t off; - if ( rec->n_allele ) - { - off = rec->rlen; - bcf_unpack(rec, BCF_UN_STR); - int i; - for (i=1; in_allele; i++) - { - // Make symbolic alleles start at POS, although this is not strictly true for - // , where POS should be the position BEFORE the deletion/insertion. - // However, since arbitrary symbolic alleles can be defined by the user, we - // will simplify the interpretation of --targets-overlap and --region-overlap. - int j = 0; - char *ref = rec->d.allele[0]; - char *alt = rec->d.allele[i]; - while ( ref[j] && alt[j] && ref[j]==alt[j] ) j++; - if ( off > j ) off = j; - if ( !off ) break; - } - } - else - off = 0; - - *beg = rec->pos + off; - *end = rec->pos + rec->rlen - 1; -} - -/* - * _reader_fill_buffer() - buffers all records with the same coordinate - */ -static int _reader_fill_buffer(bcf_srs_t *files, bcf_sr_t *reader) -{ - // Return if the buffer is full: the coordinate of the last buffered record differs - if ( reader->nbuffer && reader->buffer[reader->nbuffer]->pos != reader->buffer[1]->pos ) return 0; - - // No iterator (sequence not present in this file) and not streaming - if ( !reader->itr && !files->streaming ) return 0; - - // Fill the buffer with records starting at the same position - int i, ret = 0; - while (1) - { - if ( reader->nbuffer+1 >= reader->mbuffer ) - { - // Increase buffer size - reader->mbuffer += 8; - reader->buffer = (bcf1_t**) realloc(reader->buffer, sizeof(bcf1_t*)*reader->mbuffer); - for (i=8; i>0; i--) // initialize - { - reader->buffer[reader->mbuffer-i] = bcf_init1(); - reader->buffer[reader->mbuffer-i]->max_unpack = files->max_unpack; - reader->buffer[reader->mbuffer-i]->pos = -1; // for rare cases when VCF starts from 1 - } - } - if ( files->streaming ) - { - if ( reader->file->format.format==vcf ) - { - ret = hts_getline(reader->file, KS_SEP_LINE, &files->tmps); - if ( ret < -1 ) files->errnum = bcf_read_error; - if ( ret < 0 ) break; // no more lines or an error - ret = vcf_parse1(&files->tmps, reader->header, reader->buffer[reader->nbuffer+1]); - if ( ret<0 ) { files->errnum = vcf_parse_error; break; } - } - else if ( reader->file->format.format==bcf ) - { - ret = bcf_read1(reader->file, reader->header, reader->buffer[reader->nbuffer+1]); - if ( ret < -1 ) files->errnum = bcf_read_error; - if ( ret < 0 ) break; // no more lines or an error - } - else - { - hts_log_error("Fixme: not ready for this"); - exit(1); - } - } - else if ( reader->tbx_idx ) - { - ret = tbx_itr_next(reader->file, reader->tbx_idx, reader->itr, &files->tmps); - if ( ret < -1 ) files->errnum = bcf_read_error; - if ( ret < 0 ) break; // no more lines or an error - ret = vcf_parse1(&files->tmps, reader->header, reader->buffer[reader->nbuffer+1]); - if ( ret<0 ) { files->errnum = vcf_parse_error; break; } - } - else - { - ret = bcf_itr_next(reader->file, reader->itr, reader->buffer[reader->nbuffer+1]); - if ( ret < -1 ) files->errnum = bcf_read_error; - if ( ret < 0 ) break; // no more lines or an error - bcf_subset_format(reader->header,reader->buffer[reader->nbuffer+1]); - } - - // Prevent creation of duplicates from records overlapping multiple regions - // and recognize true variant overlaps vs record overlaps (e.g. TA>T vs A>-) - if ( files->regions ) - { - hts_pos_t beg, end; - if ( BCF_SR_AUX(files)->regions_overlap==0 ) - beg = end = reader->buffer[reader->nbuffer+1]->pos; - else if ( BCF_SR_AUX(files)->regions_overlap==1 ) - { - beg = reader->buffer[reader->nbuffer+1]->pos; - end = reader->buffer[reader->nbuffer+1]->pos + reader->buffer[reader->nbuffer+1]->rlen - 1; - } - else if ( BCF_SR_AUX(files)->regions_overlap==2 ) - _set_variant_boundaries(reader->buffer[reader->nbuffer+1], &beg,&end); - else - { - hts_log_error("This should never happen, just to keep clang compiler happy: %d",BCF_SR_AUX(files)->targets_overlap); - exit(1); - } - if ( beg <= files->regions->prev_end || end < files->regions->start || beg > files->regions->end ) continue; - } - - // apply filter - if ( !reader->nfilter_ids ) - bcf_unpack(reader->buffer[reader->nbuffer+1], BCF_UN_STR); - else - { - bcf_unpack(reader->buffer[reader->nbuffer+1], BCF_UN_STR|BCF_UN_FLT); - if ( !has_filter(reader, reader->buffer[reader->nbuffer+1]) ) continue; - } - reader->nbuffer++; - - if ( reader->buffer[reader->nbuffer]->rid != reader->buffer[1]->rid ) break; - if ( reader->buffer[reader->nbuffer]->pos != reader->buffer[1]->pos ) break; // the buffer is full - } - if ( ret<0 ) - { - // done for this region - tbx_itr_destroy(reader->itr); - reader->itr = NULL; - } - if ( files->require_index==ALLOW_NO_IDX_ && reader->buffer[reader->nbuffer]->rid < reader->buffer[1]->rid ) - { - hts_log_error("Sequences out of order, cannot stream multiple unindexed files: %s", reader->fname); - exit(1); - } - return 0; // FIXME: Check for more errs in this function -} - -/* - * _readers_shift_buffer() - removes the first line - */ -static void _reader_shift_buffer(bcf_sr_t *reader) -{ - if ( !reader->nbuffer ) return; - int i; - bcf1_t *tmp = reader->buffer[1]; - for (i=2; i<=reader->nbuffer; i++) - reader->buffer[i-1] = reader->buffer[i]; - if ( reader->nbuffer > 1 ) - reader->buffer[reader->nbuffer] = tmp; - reader->nbuffer--; -} - -static int next_line(bcf_srs_t *files) -{ - const char *chr = NULL; - hts_pos_t min_pos = HTS_POS_MAX; - - // Loop until next suitable line is found or all readers have finished - while ( 1 ) - { - // Get all readers ready for the next region. - if ( files->regions && _readers_next_region(files)<0 ) break; - - // Fill buffers and find the minimum chromosome - int i, min_rid = INT32_MAX; - for (i=0; inreaders; i++) - { - _reader_fill_buffer(files, &files->readers[i]); - if ( files->require_index==ALLOW_NO_IDX_ ) - { - if ( !files->readers[i].nbuffer ) continue; - if ( min_rid > files->readers[i].buffer[1]->rid ) min_rid = files->readers[i].buffer[1]->rid; - } - } - - for (i=0; inreaders; i++) - { - if ( !files->readers[i].nbuffer ) continue; - if ( files->require_index==ALLOW_NO_IDX_ && min_rid != files->readers[i].buffer[1]->rid ) continue; - - // Update the minimum coordinate - if ( min_pos > files->readers[i].buffer[1]->pos ) - { - min_pos = files->readers[i].buffer[1]->pos; - chr = bcf_seqname(files->readers[i].header, files->readers[i].buffer[1]); - assert(chr); - bcf_sr_sort_set_active(&BCF_SR_AUX(files)->sort, i); - } - else if ( min_pos==files->readers[i].buffer[1]->pos ) - bcf_sr_sort_add_active(&BCF_SR_AUX(files)->sort, i); - } - if ( min_pos==HTS_POS_MAX ) - { - if ( !files->regions ) break; - continue; - } - - // Skip this position if not present in targets - if ( files->targets ) - { - int match = 0; - for (i=0; inreaders; i++) - { - if ( !files->readers[i].nbuffer || files->readers[i].buffer[1]->pos!=min_pos ) continue; - hts_pos_t beg, end; - if ( BCF_SR_AUX(files)->targets_overlap==0 ) - beg = end = min_pos; - else if ( BCF_SR_AUX(files)->targets_overlap==1 ) - { - beg = min_pos; - end = min_pos + files->readers[i].buffer[1]->rlen - 1; - } - else if ( BCF_SR_AUX(files)->targets_overlap==2 ) - _set_variant_boundaries(files->readers[i].buffer[1], &beg,&end); - else - { - hts_log_error("This should never happen, just to keep clang compiler happy: %d",BCF_SR_AUX(files)->targets_overlap); - exit(1); - } - int overlap = bcf_sr_regions_overlap(files->targets, chr, beg, end)==0 ? 1 : 0; - if ( (!files->targets_exclude && !overlap) || (files->targets_exclude && overlap) ) - _reader_shift_buffer(&files->readers[i]); - else - match = 1; - } - if ( !match ) - { - min_pos = HTS_POS_MAX; - chr = NULL; - continue; - } - } - break; // done: chr and min_pos are set - } - if ( !chr ) return 0; - - return bcf_sr_sort_next(files, &BCF_SR_AUX(files)->sort, chr, min_pos); -} - -int bcf_sr_next_line(bcf_srs_t *files) -{ - if ( !files->targets_als ) - return next_line(files); - - while (1) - { - int i, ret = next_line(files); - if ( !ret ) return ret; - - for (i=0; inreaders; i++) - if ( files->has_line[i] ) break; - - if ( _regions_match_alleles(files->targets, files->targets_als-1, files->readers[i].buffer[0]) ) return ret; - - // Check if there are more duplicate lines in the buffers. If not, return this line as if it - // matched the targets, even if there is a type mismatch - for (i=0; inreaders; i++) - { - if ( !files->has_line[i] ) continue; - if ( files->readers[i].nbuffer==0 || files->readers[i].buffer[1]->pos!=files->readers[i].buffer[0]->pos ) continue; - break; - } - if ( i==files->nreaders ) return ret; // no more lines left, output even if target alleles are not of the same type - } -} - -static void bcf_sr_seek_start(bcf_srs_t *readers) -{ - bcf_sr_regions_t *reg = readers->regions; - int i; - for (i=0; inseqs; i++) - reg->regs[i].creg = -1; - reg->iseq = 0; - reg->start = -1; - reg->end = -1; - reg->prev_seq = -1; - reg->prev_start = -1; - reg->prev_end = -1; -} - - -int bcf_sr_seek(bcf_srs_t *readers, const char *seq, hts_pos_t pos) -{ - if ( !readers->regions ) return 0; - bcf_sr_sort_reset(&BCF_SR_AUX(readers)->sort); - if ( !seq && !pos ) - { - // seek to start - bcf_sr_seek_start(readers); - return 0; - } - - int i, nret = 0; - - // Need to position both the readers and the regions. The latter is a bit of a mess - // because we can have in memory or external regions. The safe way is: - // - reset all regions as if they were not read from at all (bcf_sr_seek_start) - // - find the requested iseq (stored in the seq_hash) - // - position regions to the requested position (bcf_sr_regions_overlap) - bcf_sr_seek_start(readers); - if ( khash_str2int_get(readers->regions->seq_hash, seq, &i)>=0 ) readers->regions->iseq = i; - _bcf_sr_regions_overlap(readers->regions, seq, pos, pos, 0); - - for (i=0; inreaders; i++) - { - nret += _reader_seek(&readers->readers[i],seq,pos,MAX_CSI_COOR-1); - } - return nret; -} - -int bcf_sr_set_samples(bcf_srs_t *files, const char *fname, int is_file) -{ - int i, j, nsmpl, free_smpl = 0; - char **smpl = NULL; - - void *exclude = (fname[0]=='^') ? khash_str2int_init() : NULL; - if ( exclude || strcmp("-",fname) ) // "-" stands for all samples - { - smpl = hts_readlist(fname, is_file, &nsmpl); - if ( !smpl ) - { - hts_log_error("Could not read the file: \"%s\"", fname); - return 0; - } - if ( exclude ) - { - for (i=0; ireaders[0].header->samples; // intersection of all samples - nsmpl = bcf_hdr_nsamples(files->readers[0].header); - } - - files->samples = NULL; - files->n_smpl = 0; - for (i=0; inreaders; j++) - { - if ( bcf_hdr_id2int(files->readers[j].header, BCF_DT_SAMPLE, smpl[i])<0 ) break; - n_isec++; - } - if ( n_isec!=files->nreaders ) - { - hts_log_warning("The sample \"%s\" was not found in %s, skipping", - smpl[i], files->readers[n_isec].fname); - continue; - } - - files->samples = (char**) realloc(files->samples, (files->n_smpl+1)*sizeof(const char*)); - files->samples[files->n_smpl++] = strdup(smpl[i]); - } - - if ( exclude ) khash_str2int_destroy(exclude); - if ( free_smpl ) - { - for (i=0; in_smpl ) - { - if ( files->nreaders>1 ) - hts_log_warning("No samples in common"); - return 0; - } - for (i=0; inreaders; i++) - { - bcf_sr_t *reader = &files->readers[i]; - reader->samples = (int*) malloc(sizeof(int)*files->n_smpl); - reader->n_smpl = files->n_smpl; - for (j=0; jn_smpl; j++) - reader->samples[j] = bcf_hdr_id2int(reader->header, BCF_DT_SAMPLE, files->samples[j]); - } - return 1; -} - -// Add a new region into a list. On input the coordinates are 1-based, inclusive, then stored 0-based, -// inclusive. Sorting and merging step needed afterwards: qsort(..,cmp_regions) and merge_regions(). -static int _regions_add(bcf_sr_regions_t *reg, const char *chr, hts_pos_t start, hts_pos_t end) -{ - if ( start==-1 && end==-1 ) - { - start = 0; end = MAX_CSI_COOR-1; - } - else - { - start--; end--; // store 0-based coordinates - } - - if ( !reg->seq_hash ) - reg->seq_hash = khash_str2int_init(); - - int iseq; - if ( khash_str2int_get(reg->seq_hash, chr, &iseq)<0 ) - { - // the chromosome block does not exist - iseq = reg->nseqs++; - reg->seq_names = (char**) realloc(reg->seq_names,sizeof(char*)*reg->nseqs); - reg->regs = (region_t*) realloc(reg->regs,sizeof(region_t)*reg->nseqs); - memset(®->regs[reg->nseqs-1],0,sizeof(region_t)); - reg->seq_names[iseq] = strdup(chr); - reg->regs[iseq].creg = -1; - khash_str2int_set(reg->seq_hash,reg->seq_names[iseq],iseq); - } - - region_t *creg = ®->regs[iseq]; - hts_expand(region1_t,creg->nregs+1,creg->mregs,creg->regs); - creg->regs[creg->nregs].start = start; - creg->regs[creg->nregs].end = end; - creg->nregs++; - - return 0; // FIXME: check for errs in this function -} - -static int regions_cmp(const void *aptr, const void *bptr) -{ - region1_t *a = (region1_t*)aptr; - region1_t *b = (region1_t*)bptr; - if ( a->start < b->start ) return -1; - if ( a->start > b->start ) return 1; - if ( a->end < b->end ) return -1; - if ( a->end > b->end ) return 1; - return 0; -} -static void regions_merge(region_t *reg) -{ - int i = 0, j; - while ( inregs ) - { - j = i + 1; - while ( jnregs && reg->regs[i].end >= reg->regs[j].start ) - { - if ( reg->regs[i].end < reg->regs[j].end ) reg->regs[i].end = reg->regs[j].end; - reg->regs[j].start = 1; reg->regs[j].end = 0; // if beg>end, this region marked for skipping - j++; - } - i = j; - } -} -void _regions_sort_and_merge(bcf_sr_regions_t *reg) -{ - if ( !reg ) return; - - int i; - for (i=0; inseqs; i++) - { - qsort(reg->regs[i].regs, reg->regs[i].nregs, sizeof(*reg->regs[i].regs), regions_cmp); - regions_merge(®->regs[i]); - } -} - -// File name or a list of genomic locations. If file name, NULL is returned. -// Recognises regions in the form chr, chr:pos, chr:beg-end, chr:beg-, {weird-chr-name}:pos. -// Cannot use hts_parse_region() as that requires the header and if header is not present, -// wouldn't learn the chromosome name. -static bcf_sr_regions_t *_regions_init_string(const char *str) -{ - bcf_sr_regions_t *reg = (bcf_sr_regions_t *) calloc(1, sizeof(bcf_sr_regions_t)); - reg->start = reg->end = -1; - reg->prev_start = reg->prev_end = reg->prev_seq = -1; - - kstring_t tmp = {0,0,0}; - const char *sp = str, *ep = str; - hts_pos_t from, to; - while ( 1 ) - { - tmp.l = 0; - if ( *ep=='{' ) - { - while ( *ep && *ep!='}' ) ep++; - if ( !*ep ) - { - hts_log_error("Could not parse the region, mismatching braces in: \"%s\"", str); - goto exit_nicely; - } - ep++; - kputsn(sp+1,ep-sp-2,&tmp); - } - else - { - while ( *ep && *ep!=',' && *ep!=':' ) ep++; - kputsn(sp,ep-sp,&tmp); - } - if ( *ep==':' ) - { - sp = ep+1; - from = hts_parse_decimal(sp,(char**)&ep,0); - if ( sp==ep ) - { - hts_log_error("Could not parse the region(s): %s", str); - goto exit_nicely; - } - if ( !*ep || *ep==',' ) - { - _regions_add(reg, tmp.s, from, from); - sp = ep; - continue; - } - if ( *ep!='-' ) - { - hts_log_error("Could not parse the region(s): %s", str); - goto exit_nicely; - } - ep++; - sp = ep; - to = hts_parse_decimal(sp,(char**)&ep,0); - if ( *ep && *ep!=',' ) - { - hts_log_error("Could not parse the region(s): %s", str); - goto exit_nicely; - } - if ( sp==ep ) to = MAX_CSI_COOR-1; - _regions_add(reg, tmp.s, from, to); - if ( !*ep ) break; - sp = ep; - } - else if ( !*ep || *ep==',' ) - { - if ( tmp.l ) _regions_add(reg, tmp.s, -1, -1); - if ( !*ep ) break; - sp = ++ep; - } - else - { - hts_log_error("Could not parse the region(s): %s", str); - goto exit_nicely; - } - } - free(tmp.s); - return reg; - -exit_nicely: - bcf_sr_regions_destroy(reg); - free(tmp.s); - return NULL; -} - -// ichr,ifrom,ito are 0-based; -// returns -1 on error, 0 if the line is a comment line, 1 on success -static int _regions_parse_line(char *line, int ichr, int ifrom, int ito, char **chr, char **chr_end, hts_pos_t *from, hts_pos_t *to) -{ - if (ifrom < 0 || ito < 0) return -1; - *chr_end = NULL; - - if ( line[0]=='#' ) return 0; - - int k,l; // index of the start and end column of the tab-delimited file - if ( ifrom <= ito ) - k = ifrom, l = ito; - else - l = ifrom, k = ito; - - int i; - char *se = line, *ss = NULL; // start and end - char *tmp; - for (i=0; i<=k && *se; i++) - { - ss = i==0 ? se++ : ++se; - while (*se && *se!='\t') se++; - } - if ( i<=k ) return -1; - if ( k==l ) - { - *from = *to = hts_parse_decimal(ss, &tmp, 0); - if ( tmp==ss || (*tmp && *tmp!='\t') ) return -1; - } - else - { - if ( k==ifrom ) - *from = hts_parse_decimal(ss, &tmp, 0); - else - *to = hts_parse_decimal(ss, &tmp, 0); - if ( ss==tmp || (*tmp && *tmp!='\t') ) return -1; - - for (i=k; i0 ) ss = ++se; - while (*se && *se!='\t') se++; - } - if ( i<=ichr ) return -1; - *chr_end = se; - *chr = ss; - return 1; -} - -bcf_sr_regions_t *bcf_sr_regions_init(const char *regions, int is_file, int ichr, int ifrom, int ito) -{ - bcf_sr_regions_t *reg; - if ( !is_file ) - { - reg = _regions_init_string(regions); - _regions_sort_and_merge(reg); - return reg; - } - - reg = (bcf_sr_regions_t *) calloc(1, sizeof(bcf_sr_regions_t)); - reg->start = reg->end = -1; - reg->prev_start = reg->prev_end = reg->prev_seq = -1; - - reg->file = hts_open(regions, "rb"); - if ( !reg->file ) - { - hts_log_error("Could not open file: %s", regions); - free(reg); - return NULL; - } - - reg->tbx = tbx_index_load3(regions, NULL, HTS_IDX_SAVE_REMOTE|HTS_IDX_SILENT_FAIL); - if ( !reg->tbx ) - { - size_t iline = 0; - int len = strlen(regions); - int is_bed = strcasecmp(".bed",regions+len-4) ? 0 : 1; - if ( !is_bed && !strcasecmp(".bed.gz",regions+len-7) ) is_bed = 1; - - if ( reg->file->format.format==vcf ) ito = 1; - - // read the whole file, tabix index is not present - while ( hts_getline(reg->file, KS_SEP_LINE, ®->line) > 0 ) - { - iline++; - char *chr, *chr_end; - hts_pos_t from, to; - int ret; - ret = _regions_parse_line(reg->line.s, ichr,ifrom,abs(ito), &chr,&chr_end,&from,&to); - if ( ret < 0 ) - { - if ( ito<0 ) - ret = _regions_parse_line(reg->line.s, ichr,ifrom,ifrom, &chr,&chr_end,&from,&to); - if ( ret<0 ) - { - hts_log_error("Could not parse %zu-th line of file %s, using the columns %d,%d[,%d]", - iline, regions,ichr+1,ifrom+1,ito+1); - hts_close(reg->file); reg->file = NULL; free(reg); - return NULL; - } - ito = ifrom; - } - else if ( ito<0 ) - ito = abs(ito); - if ( !ret ) continue; - if ( is_bed ) from++; - *chr_end = 0; - _regions_add(reg, chr, from, to); - *chr_end = '\t'; - } - hts_close(reg->file); reg->file = NULL; - if ( !reg->nseqs ) { free(reg); return NULL; } - _regions_sort_and_merge(reg); - return reg; - } - - reg->seq_names = (char**) tbx_seqnames(reg->tbx, ®->nseqs); - if ( !reg->seq_hash ) - reg->seq_hash = khash_str2int_init(); - int i; - for (i=0; inseqs; i++) - { - khash_str2int_set(reg->seq_hash,reg->seq_names[i],i); - } - reg->fname = strdup(regions); - reg->is_bin = 1; - return reg; -} - -void bcf_sr_regions_destroy(bcf_sr_regions_t *reg) -{ - int i; - free(reg->fname); - if ( reg->itr ) tbx_itr_destroy(reg->itr); - if ( reg->tbx ) tbx_destroy(reg->tbx); - if ( reg->file ) hts_close(reg->file); - if ( reg->als ) free(reg->als); - if ( reg->als_str.s ) free(reg->als_str.s); - free(reg->line.s); - if ( reg->regs ) - { - // free only in-memory names, tbx names are const - for (i=0; inseqs; i++) - { - free(reg->seq_names[i]); - free(reg->regs[i].regs); - } - } - free(reg->regs); - free(reg->seq_names); - khash_str2int_destroy(reg->seq_hash); - free(reg); -} - -int bcf_sr_regions_seek(bcf_sr_regions_t *reg, const char *seq) -{ - reg->iseq = reg->start = reg->end = -1; - if ( khash_str2int_get(reg->seq_hash, seq, ®->iseq) < 0 ) return -1; // sequence seq not in regions - - // using in-memory regions - if ( reg->regs ) - { - reg->regs[reg->iseq].creg = -1; - return 0; - } - - // reading regions from tabix - if ( reg->itr ) tbx_itr_destroy(reg->itr); - reg->itr = tbx_itr_querys(reg->tbx, seq); - if ( reg->itr ) return 0; - - return -1; -} - -// Returns 0 on success, -1 when done -static int advance_creg(region_t *reg) -{ - int i = reg->creg + 1; - while ( inregs && reg->regs[i].start > reg->regs[i].end ) i++; // regions with start>end are marked to skip by merge_regions() - reg->creg = i; - if ( i>=reg->nregs ) return -1; - return 0; -} - -int bcf_sr_regions_next(bcf_sr_regions_t *reg) -{ - if ( reg->iseq<0 ) return -1; - reg->start = reg->end = -1; - reg->nals = 0; - - // using in-memory regions - if ( reg->regs ) - { - while ( reg->iseq < reg->nseqs ) - { - if ( advance_creg(®->regs[reg->iseq])==0 ) break; // a valid record was found - reg->iseq++; - } - if ( reg->iseq >= reg->nseqs ) { reg->iseq = -1; return -1; } // no more regions left - region1_t *creg = ®->regs[reg->iseq].regs[reg->regs[reg->iseq].creg]; - reg->start = creg->start; - reg->end = creg->end; - return 0; - } - - // reading from tabix - char *chr, *chr_end; - int ichr = 0, ifrom = 1, ito = 2, is_bed = 0; - hts_pos_t from, to; - if ( reg->tbx ) - { - ichr = reg->tbx->conf.sc-1; - ifrom = reg->tbx->conf.bc-1; - ito = reg->tbx->conf.ec-1; - if ( ito<0 ) ito = ifrom; - is_bed = reg->tbx->conf.preset==TBX_UCSC ? 1 : 0; - } - - int ret = 0; - while ( !ret ) - { - if ( reg->itr ) - { - // tabix index present, reading a chromosome block - ret = tbx_itr_next(reg->file, reg->tbx, reg->itr, ®->line); - if ( ret<0 ) { reg->iseq = -1; return -1; } - } - else - { - if ( reg->is_bin ) - { - // Waited for seek which never came. Reopen in text mode and stream - // through the regions, otherwise hts_getline would fail - hts_close(reg->file); - reg->file = hts_open(reg->fname, "r"); - if ( !reg->file ) - { - hts_log_error("Could not open file: %s", reg->fname); - reg->file = NULL; - bcf_sr_regions_destroy(reg); - return -1; - } - reg->is_bin = 0; - } - - // tabix index absent, reading the whole file - ret = hts_getline(reg->file, KS_SEP_LINE, ®->line); - if ( ret<0 ) { reg->iseq = -1; return -1; } - } - ret = _regions_parse_line(reg->line.s, ichr,ifrom,ito, &chr,&chr_end,&from,&to); - if ( ret<0 ) - { - hts_log_error("Could not parse the file %s, using the columns %d,%d,%d", - reg->fname,ichr+1,ifrom+1,ito+1); - return -1; - } - } - if ( is_bed ) from++; - - *chr_end = 0; - if ( khash_str2int_get(reg->seq_hash, chr, ®->iseq)<0 ) - { - hts_log_error("Broken tabix index? The sequence \"%s\" not in dictionary [%s]", - chr, reg->line.s); - exit(1); - } - *chr_end = '\t'; - - reg->start = from - 1; - reg->end = to - 1; - return 0; -} - -static int _regions_match_alleles(bcf_sr_regions_t *reg, int als_idx, bcf1_t *rec) -{ - if ( reg->regs ) - { - // payload is not supported for in-memory regions, switch to regidx instead in future - hts_log_error("Compressed and indexed targets file is required"); - exit(1); - } - - int i = 0, max_len = 0; - if ( !reg->nals ) - { - char *ss = reg->line.s; - while ( inals = 1; - while ( *se && *se!='\t' ) - { - if ( *se==',' ) reg->nals++; - se++; - } - ks_resize(®->als_str, se-ss+1+reg->nals); - reg->als_str.l = 0; - hts_expand(char*,reg->nals,reg->mals,reg->als); - reg->nals = 0; - - se = ss; - while ( *(++se) ) - { - if ( *se=='\t' ) break; - if ( *se!=',' ) continue; - reg->als[reg->nals] = ®->als_str.s[reg->als_str.l]; - kputsn(ss,se-ss,®->als_str); - if ( ®->als_str.s[reg->als_str.l] - reg->als[reg->nals] > max_len ) max_len = ®->als_str.s[reg->als_str.l] - reg->als[reg->nals]; - reg->als_str.l++; - reg->nals++; - ss = ++se; - } - reg->als[reg->nals] = ®->als_str.s[reg->als_str.l]; - kputsn(ss,se-ss,®->als_str); - if ( ®->als_str.s[reg->als_str.l] - reg->als[reg->nals] > max_len ) max_len = ®->als_str.s[reg->als_str.l] - reg->als[reg->nals]; - reg->nals++; - reg->als_type = max_len > 1 ? VCF_INDEL : VCF_SNP; // this is a simplified check, see vcf.c:bcf_set_variant_types - } - int type = bcf_get_variant_types(rec); - if ( reg->als_type & VCF_INDEL ) - return type & VCF_INDEL ? 1 : 0; - return !(type & VCF_INDEL) ? 1 : 0; -} - -int bcf_sr_regions_overlap(bcf_sr_regions_t *reg, const char *seq, hts_pos_t start, hts_pos_t end) -{ - return _bcf_sr_regions_overlap(reg,seq,start,end,1); -} - -static int _bcf_sr_regions_overlap(bcf_sr_regions_t *reg, const char *seq, hts_pos_t start, hts_pos_t end, int missed_reg_handler) -{ - int iseq; - if ( khash_str2int_get(reg->seq_hash, seq, &iseq)<0 ) return -1; // no such sequence - if ( missed_reg_handler && !reg->missed_reg_handler ) missed_reg_handler = 0; - - if ( reg->prev_seq==-1 || iseq!=reg->prev_seq || reg->prev_start > start ) // new chromosome or after a seek - { - // flush regions left on previous chromosome - if ( missed_reg_handler && reg->prev_seq!=-1 && reg->iseq!=-1 ) - bcf_sr_regions_flush(reg); - - bcf_sr_regions_seek(reg, seq); - reg->start = reg->end = -1; - } - if ( reg->prev_seq==iseq && reg->iseq!=iseq ) return -2; // no more regions on this chromosome - reg->prev_seq = reg->iseq; - reg->prev_start = start; - - while ( iseq==reg->iseq && reg->end < start ) - { - if ( bcf_sr_regions_next(reg) < 0 ) return -2; // no more regions left - if ( reg->iseq != iseq ) return -1; // does not overlap any regions - if ( missed_reg_handler && reg->end < start ) reg->missed_reg_handler(reg, reg->missed_reg_data); - } - if ( reg->start <= end ) return 0; // region overlap - return -1; // no overlap -} - -int bcf_sr_regions_flush(bcf_sr_regions_t *reg) -{ - if ( !reg->missed_reg_handler || reg->prev_seq==-1 ) return 0; - while ( !bcf_sr_regions_next(reg) ) reg->missed_reg_handler(reg, reg->missed_reg_data); - return 0; // FIXME: check for errs in this function -} - diff --git a/src/htslib-1.18/tabix.1 b/src/htslib-1.18/tabix.1 deleted file mode 100644 index b069a51..0000000 --- a/src/htslib-1.18/tabix.1 +++ /dev/null @@ -1,203 +0,0 @@ -.TH tabix 1 "25 July 2023" "htslib-1.18" "Bioinformatics tools" -.SH NAME -.PP -tabix \- Generic indexer for TAB-delimited genome position files -.\" -.\" Copyright (C) 2009-2011 Broad Institute. -.\" Copyright (C) 2014, 2016, 2018, 2020, 2022 Genome Research Ltd. -.\" -.\" Author: Heng Li -.\" -.\" Permission is hereby granted, free of charge, to any person obtaining a -.\" copy of this software and associated documentation files (the "Software"), -.\" to deal in the Software without restriction, including without limitation -.\" the rights to use, copy, modify, merge, publish, distribute, sublicense, -.\" and/or sell copies of the Software, and to permit persons to whom the -.\" Software is furnished to do so, subject to the following conditions: -.\" -.\" The above copyright notice and this permission notice shall be included in -.\" all copies or substantial portions of the Software. -.\" -.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -.\" IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -.\" FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -.\" THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -.\" LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -.\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -.\" DEALINGS IN THE SOFTWARE. -.\" -.SH SYNOPSIS -.PP -.B tabix -.RB [ -0lf ] -.RB [ -p -gff|bed|sam|vcf] -.RB [ -s -.IR seqCol ] -.RB [ -b -.IR begCol ] -.RB [ -e -.IR endCol ] -.RB [ -S -.IR lineSkip ] -.RB [ -c -.IR metaChar ] -.I in.tab.bgz -.RI [ "region1 " [ "region2 " [ ... "]]]" - -.SH DESCRIPTION -.PP -Tabix indexes a TAB-delimited genome position file -.I in.tab.bgz -and creates an index file -.RI ( in.tab.bgz.tbi -or -.IR in.tab.bgz.csi ) -when -.I region -is absent from the command-line. The input data file must be position -sorted and compressed by -.B bgzip -which has a -.BR gzip (1) -like interface. - -After indexing, tabix is able to quickly retrieve data lines overlapping -.I regions -specified in the format "chr:beginPos-endPos". -(Coordinates specified in this region format are 1-based and inclusive.) - -Fast data retrieval also -works over network if URI is given as a file name and in this case the -index file will be downloaded if it is not present locally. - -The tabix -.RI ( .tbi ) -and BAI index formats can handle individual chromosomes up to 512 Mbp -(2^29 bases) in length. -If your input file might contain data lines with begin or end positions -greater than that, you will need to use a CSI index. - -.SH INDEXING OPTIONS -.TP 10 -.B -0, --zero-based -Specify that the position in the data file is 0-based half-open -(e.g. UCSC files) rather than 1-based. -.TP -.BI "-b, --begin " INT -Column of start chromosomal position. [4] -.TP -.BI "-c, --comment " CHAR -Skip lines started with character CHAR. [#] -.TP -.BI "-C, --csi" -Produce CSI format index instead of classical tabix or BAI style indices. -.TP -.BI "-e, --end " INT -Column of end chromosomal position. The end column can be the same as the -start column. [5] -.TP -.B "-f, --force " -Force to overwrite the index file if it is present. -.TP -.BI "-m, --min-shift " INT -Set minimal interval size for CSI indices to 2^INT [14] -.TP -.BI "-p, --preset " STR -Input format for indexing. Valid values are: gff, bed, sam, vcf. -This option should not be applied together with any of -.BR -s ", " -b ", " -e ", " -c " and " -0 ; -it is not used for data retrieval because this setting is stored in -the index file. [gff] -.TP -.BI "-s, --sequence " INT -Column of sequence name. Option -.BR -s ", " -b ", " -e ", " -S ", " -c " and " -0 -are all stored in the index file and thus not used in data retrieval. [1] -.TP -.BI "-S, --skip-lines " INT -Skip first INT lines in the data file. [0] - -.SH QUERYING AND OTHER OPTIONS -.TP -.B "-h, --print-header " -Print also the header/meta lines. -.TP -.B "-H, --only-header " -Print only the header/meta lines. -.TP -.B "-l, --list-chroms " -List the sequence names stored in the index file. -.TP -.BI "-r, --reheader " FILE -Replace the header with the content of FILE -.TP -.BI "-R, --regions " FILE -Restrict to regions listed in the FILE. The FILE can be BED file (requires .bed, .bed.gz, .bed.bgz -file name extension) or a TAB-delimited file with CHROM, POS, and, optionally, -POS_TO columns, where positions are 1-based and inclusive. When this option is in use, the input -file may not be sorted. -.TP -.BI "-T, --targets " FILE -Similar to -.B -R -but the entire input will be read sequentially and regions not listed in FILE will be skipped. -.TP -.BI "-D " -Do not download the index file before opening it. Valid for remote files only. -.TP -.BI "--cache " INT -Set the BGZF block cache size to INT megabytes. [10] - -This is of most benefit when the -.B -R -option is used, which can cause blocks to be read more than once. -Setting the size to 0 will disable the cache. -.TP -.B --separate-regions -This option can be used when multiple regions are supplied in the command line -and the user needs to quickly see which file records belong to which region. -For this, a line with the name of the region, preceded by the file specific -comment symbol, is inserted in the output before its corresponding group of -records. -.TP -.BI "--verbosity " INT -Set verbosity of logging messages printed to stderr. -The default is 3, which turns on error and warning messages; -2 reduces warning messages; -1 prints only error messages and 0 is mostly silent. -Values higher than 3 produce additional informational and debugging messages. -.PP -.SH EXAMPLE -(grep "^#" in.gff; grep -v "^#" in.gff | sort -t"`printf '\(rst'`" -k1,1 -k4,4n) | bgzip > sorted.gff.gz; - -tabix -p gff sorted.gff.gz; - -tabix sorted.gff.gz chr1:10,000,000-20,000,000; - -.SH NOTES -It is straightforward to achieve overlap queries using the standard -B-tree index (with or without binning) implemented in all SQL databases, -or the R-tree index in PostgreSQL and Oracle. But there are still many -reasons to use tabix. Firstly, tabix directly works with a lot of widely -used TAB-delimited formats such as GFF/GTF and BED. We do not need to -design database schema or specialized binary formats. Data do not need -to be duplicated in different formats, either. Secondly, tabix works on -compressed data files while most SQL databases do not. The GenCode -annotation GTF can be compressed down to 4%. Thirdly, tabix is -fast. The same indexing algorithm is known to work efficiently for an -alignment with a few billion short reads. SQL databases probably cannot -easily handle data at this scale. Last but not the least, tabix supports -remote data retrieval. One can put the data file and the index at an FTP -or HTTP server, and other users or even web services will be able to get -a slice without downloading the entire file. - -.SH AUTHOR -.PP -Tabix was written by Heng Li. The BGZF library was originally -implemented by Bob Handsaker and modified by Heng Li for remote file -access and in-memory caching. - -.SH SEE ALSO -.IR bgzip (1), -.IR samtools (1) diff --git a/src/htslib-1.18/tabix.c b/src/htslib-1.18/tabix.c deleted file mode 100644 index 0798b27..0000000 --- a/src/htslib-1.18/tabix.c +++ /dev/null @@ -1,720 +0,0 @@ -/* tabix.c -- Generic indexer for TAB-delimited genome position files. - - Copyright (C) 2009-2011 Broad Institute. - Copyright (C) 2010-2012, 2014-2020 Genome Research Ltd. - - Author: Heng Li - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "htslib/tbx.h" -#include "htslib/sam.h" -#include "htslib/vcf.h" -#include "htslib/kseq.h" -#include "htslib/bgzf.h" -#include "htslib/hts.h" -#include "htslib/regidx.h" -#include "htslib/hts_defs.h" -#include "htslib/hts_log.h" - -typedef struct -{ - char *regions_fname, *targets_fname; - int print_header, header_only, cache_megs, download_index, separate_regs; -} -args_t; - -static void HTS_FORMAT(HTS_PRINTF_FMT, 1, 2) HTS_NORETURN -error(const char *format, ...) -{ - va_list ap; - fflush(stdout); - va_start(ap, format); - vfprintf(stderr, format, ap); - va_end(ap); - fflush(stderr); - exit(EXIT_FAILURE); -} - -static void HTS_FORMAT(HTS_PRINTF_FMT, 1, 2) HTS_NORETURN -error_errno(const char *format, ...) -{ - va_list ap; - int eno = errno; - fflush(stdout); - if (format) { - va_start(ap, format); - vfprintf(stderr, format, ap); - va_end(ap); - } - if (eno) { - fprintf(stderr, "%s%s\n", format ? ": " : "", strerror(eno)); - } else { - fprintf(stderr, "\n"); - } - fflush(stderr); - exit(EXIT_FAILURE); -} - - -#define IS_GFF (1<<0) -#define IS_BED (1<<1) -#define IS_SAM (1<<2) -#define IS_VCF (1<<3) -#define IS_BCF (1<<4) -#define IS_BAM (1<<5) -#define IS_CRAM (1<<6) -#define IS_TXT (IS_GFF|IS_BED|IS_SAM|IS_VCF) - -int file_type(const char *fname) -{ - int l = strlen(fname); - if (l>=7 && strcasecmp(fname+l-7, ".gff.gz") == 0) return IS_GFF; - else if (l>=7 && strcasecmp(fname+l-7, ".bed.gz") == 0) return IS_BED; - else if (l>=7 && strcasecmp(fname+l-7, ".sam.gz") == 0) return IS_SAM; - else if (l>=7 && strcasecmp(fname+l-7, ".vcf.gz") == 0) return IS_VCF; - else if (l>=4 && strcasecmp(fname+l-4, ".bcf") == 0) return IS_BCF; - else if (l>=4 && strcasecmp(fname+l-4, ".bam") == 0) return IS_BAM; - else if (l>=4 && strcasecmp(fname+l-5, ".cram") == 0) return IS_CRAM; - - htsFile *fp = hts_open(fname,"r"); - if (!fp) { - if (errno == ENOEXEC) { - // hts_open() uses this to report that it didn't understand the - // file format. - error("Couldn't understand format of \"%s\"\n", fname); - } else { - error_errno("Couldn't open \"%s\"", fname); - } - } - enum htsExactFormat format = hts_get_format(fp)->format; - hts_close(fp); - if ( format == bcf ) return IS_BCF; - if ( format == bam ) return IS_BAM; - if ( format == cram ) return IS_CRAM; - if ( format == vcf ) return IS_VCF; - - return 0; -} - -static char **parse_regions(char *regions_fname, char **argv, int argc, int *nregs) -{ - kstring_t str = {0,0,0}; - int iseq = 0, ireg = 0; - char **regs = NULL; - *nregs = argc; - - if ( regions_fname ) - { - // improve me: this is a too heavy machinery for parsing regions... - - regidx_t *idx = regidx_init(regions_fname, NULL, NULL, 0, NULL); - if ( !idx ) { - error_errno("Could not build region list for \"%s\"", regions_fname); - } - regitr_t *itr = regitr_init(idx); - if ( !itr ) { - error_errno("Could not initialize an iterator over \"%s\"", - regions_fname); - } - - (*nregs) += regidx_nregs(idx); - regs = (char**) malloc(sizeof(char*)*(*nregs)); - if (!regs) error_errno(NULL); - - int nseq; - char **seqs = regidx_seq_names(idx, &nseq); - for (iseq=0; iseqbeg+1, itr->end+1) < 0) { - error_errno(NULL); - } - regs[ireg] = strdup(str.s); - if (!regs[ireg]) error_errno(NULL); - ireg++; - } - } - regidx_destroy(idx); - regitr_destroy(itr); - } - free(str.s); - - if ( !ireg ) - { - if ( argc ) - { - regs = (char**) malloc(sizeof(char*)*argc); - if (!regs) error_errno(NULL); - } - else - { - regs = (char**) malloc(sizeof(char*)); - if (!regs) error_errno(NULL); - regs[0] = strdup("."); - if (!regs[0]) error_errno(NULL); - *nregs = 1; - } - } - - for (iseq=0; iseqformat; - - if (args->cache_megs) - hts_set_cache_size(fp, args->cache_megs * 1048576); - - regidx_t *reg_idx = NULL; - if ( args->targets_fname ) - { - reg_idx = regidx_init(args->targets_fname, NULL, NULL, 0, NULL); - if (!reg_idx) - error_errno("Could not build region list for \"%s\"", - args->targets_fname); - } - - if ( format == bcf ) - { - htsFile *out = hts_open("-","w"); - if ( !out ) error_errno("Could not open stdout"); - hts_idx_t *idx = bcf_index_load3(fname, NULL, args->download_index ? HTS_IDX_SAVE_REMOTE : 0); - if ( !idx ) error_errno("Could not load .csi index of \"%s\"", fname); - - bcf_hdr_t *hdr = bcf_hdr_read(fp); - if ( !hdr ) error_errno("Could not read the header from \"%s\"", fname); - - if ( args->print_header ) { - if ( bcf_hdr_write(out,hdr)!=0 ) - error_errno("Failed to write to stdout"); - } - if ( !args->header_only ) - { - assert(regs != NULL); - bcf1_t *rec = bcf_init(); - if (!rec) error_errno(NULL); - for (i=0; i=0 ) - { - if ( reg_idx ) - { - const char *chr = bcf_seqname(hdr,rec); - if (!chr) { - error("Bad BCF record in \"%s\" : " - "Invalid CONTIG id %d\n", - fname, rec->rid); - } - if ( !regidx_overlap(reg_idx,chr,rec->pos,rec->pos+rec->rlen-1, NULL) ) continue; - } - if (!found) { - if (args->separate_regs) printf("%c%s\n", conf->meta_char, regs[i]); - found = 1; - } - if ( bcf_write(out,hdr,rec)!=0 ) { - error_errno("Failed to write to stdout"); - } - } - - if (ret < -1) { - error_errno("Reading \"%s\" failed", fname); - } - bcf_itr_destroy(itr); - } - bcf_destroy(rec); - } - if ( hts_close(out) ) - error_errno("hts_close returned non-zero status for stdout"); - - bcf_hdr_destroy(hdr); - hts_idx_destroy(idx); - } - else if ( format==vcf || format==sam || format==bed || format==text_format || format==unknown_format ) - { - tbx_t *tbx = tbx_index_load3(fname, NULL, args->download_index ? HTS_IDX_SAVE_REMOTE : 0); - if ( !tbx ) error_errno("Could not load .tbi/.csi index of %s", fname); - kstring_t str = {0,0,0}; - if ( args->print_header ) - { - int ret; - while ((ret = hts_getline(fp, KS_SEP_LINE, &str)) >= 0) - { - if ( !str.l || str.s[0]!=tbx->conf.meta_char ) break; - if (puts(str.s) < 0) - error_errno("Error writing to stdout"); - } - if (ret < -1) error_errno("Reading \"%s\" failed", fname); - } - if ( !args->header_only ) - { - int nseq; - const char **seq = NULL; - if ( reg_idx ) { - seq = tbx_seqnames(tbx, &nseq); - if (!seq) error_errno("Failed to get sequence names list"); - } - for (i=0; i= 0) - { - if ( reg_idx && !regidx_overlap(reg_idx,seq[itr->curr_tid],itr->curr_beg,itr->curr_end-1, NULL) ) continue; - if (!found) { - if (args->separate_regs) printf("%c%s\n", conf->meta_char, regs[i]); - found = 1; - } - if (puts(str.s) < 0) - error_errno("Failed to write to stdout"); - } - if (ret < -1) error_errno("Reading \"%s\" failed", fname); - tbx_itr_destroy(itr); - } - free(seq); - } - free(str.s); - tbx_destroy(tbx); - } - else if ( format==bam ) - error("Please use \"samtools view\" for querying BAM files.\n"); - - if ( reg_idx ) regidx_destroy(reg_idx); - if ( hts_close(fp) ) - error_errno("hts_close returned non-zero status: %s", fname); - - for (i=0; iblock_length ) return -1; - - char *buffer = fp->uncompressed_block; - int skip_until = 0; - - // Skip the header: find out the position of the data block - if ( buffer[0]==conf->meta_char ) - { - skip_until = 1; - while (1) - { - if ( buffer[skip_until]=='\n' ) - { - skip_until++; - if ( skip_until>=fp->block_length ) - { - if ( bgzf_read_block(fp) != 0 || !fp->block_length ) error("FIXME: No body in the file: %s\n", fname); - skip_until = 0; - } - // The header has finished - if ( buffer[skip_until]!=conf->meta_char ) break; - } - skip_until++; - if ( skip_until>=fp->block_length ) - { - if (bgzf_read_block(fp) != 0 || !fp->block_length) error("FIXME: No body in the file: %s\n", fname); - skip_until = 0; - } - } - } - - // Output the new header - FILE *hdr = fopen(header,"r"); - if ( !hdr ) error("%s: %s", header,strerror(errno)); - const size_t page_size = 32768; - char *buf = malloc(page_size); - BGZF *bgzf_out = bgzf_open("-", "w"); - ssize_t nread; - - if (!buf) error("%s\n", strerror(errno)); - if (!bgzf_out) - error_errno("Couldn't open output stream"); - while ( (nread=fread(buf,1,page_size-1,hdr))>0 ) - { - if ( nreaderrcode); - } - if ( ferror(hdr) ) error_errno("Failed to read \"%s\"", header); - if ( fclose(hdr) ) error_errno("Closing \"%s\" failed", header); - - // Output all remaining data read with the header block - if ( fp->block_length - skip_until > 0 ) - { - if (bgzf_write(bgzf_out, buffer+skip_until, fp->block_length-skip_until) < 0) error_errno("Write error %d",fp->errcode); - } - if (bgzf_flush(bgzf_out) < 0) - error_errno("Write error %d", bgzf_out->errcode); - - while (1) - { - nread = bgzf_raw_read(fp, buf, page_size); - if ( nread<=0 ) break; - - int count = bgzf_raw_write(bgzf_out, buf, nread); - if (count != nread) error_errno("Write failed, wrote %d instead of %d bytes", count,(int)nread); - } - if (nread < 0) error_errno("Error reading \"%s\"", fname); - if (bgzf_close(bgzf_out) < 0) - error_errno("Error %d closing output", bgzf_out->errcode); - if (bgzf_close(fp) < 0) - error_errno("Error %d closing \"%s\"", bgzf_out->errcode, fname); - free(buf); - } - else - error("todo: reheader BCF, BAM\n"); // BCF is difficult, records contain pointers to the header. - return 0; -} - -static int usage(FILE *fp, int status) -{ - fprintf(fp, "\n"); - fprintf(fp, "Version: %s\n", hts_version()); - fprintf(fp, "Usage: tabix [OPTIONS] [FILE] [REGION [...]]\n"); - fprintf(fp, "\n"); - fprintf(fp, "Indexing Options:\n"); - fprintf(fp, " -0, --zero-based coordinates are zero-based\n"); - fprintf(fp, " -b, --begin INT column number for region start [4]\n"); - fprintf(fp, " -c, --comment CHAR skip comment lines starting with CHAR [null]\n"); - fprintf(fp, " -C, --csi generate CSI index for VCF (default is TBI)\n"); - fprintf(fp, " -e, --end INT column number for region end (if no end, set INT to -b) [5]\n"); - fprintf(fp, " -f, --force overwrite existing index without asking\n"); - fprintf(fp, " -m, --min-shift INT set minimal interval size for CSI indices to 2^INT [14]\n"); - fprintf(fp, " -p, --preset STR gff, bed, sam, vcf\n"); - fprintf(fp, " -s, --sequence INT column number for sequence names (suppressed by -p) [1]\n"); - fprintf(fp, " -S, --skip-lines INT skip first INT lines [0]\n"); - fprintf(fp, "\n"); - fprintf(fp, "Querying and other options:\n"); - fprintf(fp, " -h, --print-header print also the header lines\n"); - fprintf(fp, " -H, --only-header print only the header lines\n"); - fprintf(fp, " -l, --list-chroms list chromosome names\n"); - fprintf(fp, " -r, --reheader FILE replace the header with the content of FILE\n"); - fprintf(fp, " -R, --regions FILE restrict to regions listed in the file\n"); - fprintf(fp, " -T, --targets FILE similar to -R but streams rather than index-jumps\n"); - fprintf(fp, " -D do not download the index file\n"); - fprintf(fp, " --cache INT set cache size to INT megabytes (0 disables) [10]\n"); - fprintf(fp, " --separate-regions separate the output by corresponding regions\n"); - fprintf(fp, " --verbosity INT set verbosity [3]\n"); - fprintf(fp, "\n"); - return status; -} - -int main(int argc, char *argv[]) -{ - int c, detect = 1, min_shift = 0, is_force = 0, list_chroms = 0, do_csi = 0; - tbx_conf_t conf = tbx_conf_gff; - char *reheader = NULL; - args_t args; - memset(&args,0,sizeof(args_t)); - args.cache_megs = 10; - args.download_index = 1; - int32_t new_line_skip = -1; - - static const struct option loptions[] = - { - {"help", no_argument, NULL, 2}, - {"regions", required_argument, NULL, 'R'}, - {"targets", required_argument, NULL, 'T'}, - {"csi", no_argument, NULL, 'C'}, - {"zero-based", no_argument, NULL, '0'}, - {"print-header", no_argument, NULL, 'h'}, - {"only-header", no_argument, NULL, 'H'}, - {"begin", required_argument, NULL, 'b'}, - {"comment", required_argument, NULL, 'c'}, - {"end", required_argument, NULL, 'e'}, - {"force", no_argument, NULL, 'f'}, - {"min-shift", required_argument, NULL, 'm'}, - {"preset", required_argument, NULL, 'p'}, - {"sequence", required_argument, NULL, 's'}, - {"skip-lines", required_argument, NULL, 'S'}, - {"list-chroms", no_argument, NULL, 'l'}, - {"reheader", required_argument, NULL, 'r'}, - {"version", no_argument, NULL, 1}, - {"verbosity", required_argument, NULL, 3}, - {"cache", required_argument, NULL, 4}, - {"separate-regions", no_argument, NULL, 5}, - {NULL, 0, NULL, 0} - }; - - char *tmp; - while ((c = getopt_long(argc, argv, "hH?0b:c:e:fm:p:s:S:lr:CR:T:D", loptions,NULL)) >= 0) - { - switch (c) - { - case 'R': args.regions_fname = optarg; break; - case 'T': args.targets_fname = optarg; break; - case 'C': do_csi = 1; break; - case 'r': reheader = optarg; break; - case 'h': args.print_header = 1; break; - case 'H': args.print_header = 1; args.header_only = 1; break; - case 'l': list_chroms = 1; break; - case '0': conf.preset |= TBX_UCSC; detect = 0; break; - case 'b': - conf.bc = strtol(optarg,&tmp,10); - if ( *tmp ) error("Could not parse argument: -b %s\n", optarg); - detect = 0; - break; - case 'e': - conf.ec = strtol(optarg,&tmp,10); - if ( *tmp ) error("Could not parse argument: -e %s\n", optarg); - detect = 0; - break; - case 'c': conf.meta_char = *optarg; detect = 0; break; - case 'f': is_force = 1; break; - case 'm': - min_shift = strtol(optarg,&tmp,10); - if ( *tmp ) error("Could not parse argument: -m %s\n", optarg); - break; - case 'p': - detect = 0; - if (strcmp(optarg, "gff") == 0) conf = tbx_conf_gff; - else if (strcmp(optarg, "bed") == 0) conf = tbx_conf_bed; - else if (strcmp(optarg, "sam") == 0) conf = tbx_conf_sam; - else if (strcmp(optarg, "vcf") == 0) conf = tbx_conf_vcf; - else if (strcmp(optarg, "bcf") == 0) detect = 1; // bcf is autodetected, preset is not needed - else if (strcmp(optarg, "bam") == 0) detect = 1; // same as bcf - else error("The preset string not recognised: '%s'\n", optarg); - break; - case 's': - conf.sc = strtol(optarg,&tmp,10); - if ( *tmp ) error("Could not parse argument: -s %s\n", optarg); - detect = 0; - break; - case 'S': - new_line_skip = strtol(optarg,&tmp,10); - if ( *tmp ) error("Could not parse argument: -S %s\n", optarg); - detect = 0; - break; - case 'D': - args.download_index = 0; - break; - case 1: - printf( -"tabix (htslib) %s\n" -"Copyright (C) 2023 Genome Research Ltd.\n", hts_version()); - return EXIT_SUCCESS; - case 2: - return usage(stdout, EXIT_SUCCESS); - case 3: { - int v = atoi(optarg); - if (v < 0) v = 0; - hts_set_log_level(v); - break; - } - case 4: - args.cache_megs = atoi(optarg); - if (args.cache_megs < 0) { - args.cache_megs = 0; - } else if (args.cache_megs >= INT_MAX / 1048576) { - args.cache_megs = INT_MAX / 1048576; - } - break; - case 5: - args.separate_regs = 1; - break; - default: return usage(stderr, EXIT_FAILURE); - } - } - - if (new_line_skip >= 0) - conf.line_skip = new_line_skip; - - if ( optind==argc ) return usage(stderr, EXIT_FAILURE); - - if ( list_chroms ) - return query_chroms(argv[optind], args.download_index); - - char *fname = argv[optind]; - int ftype = file_type(fname); - if ( detect ) // no preset given - { - if ( ftype==IS_GFF ) conf = tbx_conf_gff; - else if ( ftype==IS_BED ) conf = tbx_conf_bed; - else if ( ftype==IS_SAM ) conf = tbx_conf_sam; - else if ( ftype==IS_VCF ) - { - conf = tbx_conf_vcf; - if ( !min_shift && do_csi ) min_shift = 14; - } - else if ( ftype==IS_BCF ) - { - if ( !min_shift ) min_shift = 14; - } - else if ( ftype==IS_BAM ) - { - if ( !min_shift ) min_shift = 14; - } - } - if ( argc > optind+1 || args.header_only || args.regions_fname || args.targets_fname ) - { - int nregs = 0; - char **regs = NULL; - if ( !args.header_only ) - regs = parse_regions(args.regions_fname, argv+optind+1, argc-optind-1, &nregs); - return query_regions(&args, &conf, fname, regs, nregs); - } - if ( do_csi ) - { - if ( !min_shift ) min_shift = 14; - min_shift *= do_csi; // positive for CSIv2, negative for CSIv1 - } - if ( min_shift!=0 && !do_csi ) do_csi = 1; - - if ( reheader ) - return reheader_file(fname, reheader, ftype, &conf); - - char *suffix = ".tbi"; - if ( do_csi ) suffix = ".csi"; - else if ( ftype==IS_BAM ) suffix = ".bai"; - else if ( ftype==IS_CRAM ) suffix = ".crai"; - - char *idx_fname = calloc(strlen(fname) + 6, 1); - if (!idx_fname) error("%s\n", strerror(errno)); - strcat(strcpy(idx_fname, fname), suffix); - - struct stat stat_tbi, stat_file; - if ( !is_force && stat(idx_fname, &stat_tbi)==0 ) - { - // Before complaining about existing index, check if the VCF file isn't - // newer. This is a common source of errors, people tend not to notice - // that tabix failed - stat(fname, &stat_file); - if ( stat_file.st_mtime <= stat_tbi.st_mtime ) - error("[tabix] the index file exists. Please use '-f' to overwrite.\n"); - } - free(idx_fname); - - int ret; - if ( ftype==IS_CRAM ) - { - if ( bam_index_build(fname, min_shift)!=0 ) error("bam_index_build failed: %s\n", fname); - return 0; - } - else if ( do_csi ) - { - if ( ftype==IS_BCF ) - { - if ( bcf_index_build(fname, min_shift)!=0 ) error("bcf_index_build failed: %s\n", fname); - return 0; - } - if ( ftype==IS_BAM ) - { - if ( bam_index_build(fname, min_shift)!=0 ) error("bam_index_build failed: %s\n", fname); - return 0; - } - - switch (ret = tbx_index_build(fname, min_shift, &conf)) - { - case 0: - return 0; - case -2: - error("[tabix] the compression of '%s' is not BGZF\n", fname); - default: - error("tbx_index_build failed: %s\n", fname); - } - } - else // TBI index - { - switch (ret = tbx_index_build(fname, min_shift, &conf)) - { - case 0: - return 0; - case -2: - error("[tabix] the compression of '%s' is not BGZF\n", fname); - default: - error("tbx_index_build failed: %s\n", fname); - } - } - - return 0; -} diff --git a/src/htslib-1.18/tbx.c b/src/htslib-1.18/tbx.c deleted file mode 100644 index c2c5c6f..0000000 --- a/src/htslib-1.18/tbx.c +++ /dev/null @@ -1,496 +0,0 @@ -/* tbx.c -- tabix API functions. - - Copyright (C) 2009, 2010, 2012-2015, 2017-2020, 2022-2023 Genome Research Ltd. - Copyright (C) 2010-2012 Broad Institute. - - Author: Heng Li - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include "htslib/tbx.h" -#include "htslib/bgzf.h" -#include "htslib/hts_endian.h" -#include "hts_internal.h" - -#include "htslib/khash.h" -KHASH_DECLARE(s2i, kh_cstr_t, int64_t) - -HTSLIB_EXPORT -const tbx_conf_t tbx_conf_gff = { 0, 1, 4, 5, '#', 0 }; - -HTSLIB_EXPORT -const tbx_conf_t tbx_conf_bed = { TBX_UCSC, 1, 2, 3, '#', 0 }; - -HTSLIB_EXPORT -const tbx_conf_t tbx_conf_psltbl = { TBX_UCSC, 15, 17, 18, '#', 0 }; - -HTSLIB_EXPORT -const tbx_conf_t tbx_conf_sam = { TBX_SAM, 3, 4, 0, '@', 0 }; - -HTSLIB_EXPORT -const tbx_conf_t tbx_conf_vcf = { TBX_VCF, 1, 2, 0, '#', 0 }; - -typedef struct { - int64_t beg, end; - char *ss, *se; - int tid; -} tbx_intv_t; - -static inline int get_tid(tbx_t *tbx, const char *ss, int is_add) -{ - khint_t k; - khash_t(s2i) *d; - if (tbx->dict == 0) tbx->dict = kh_init(s2i); - if (!tbx->dict) return -1; // Out of memory - d = (khash_t(s2i)*)tbx->dict; - if (is_add) { - int absent; - k = kh_put(s2i, d, ss, &absent); - if (absent < 0) { - return -1; // Out of memory - } else if (absent) { - char *ss_dup = strdup(ss); - if (ss_dup) { - kh_key(d, k) = ss_dup; - kh_val(d, k) = kh_size(d) - 1; - } else { - kh_del(s2i, d, k); - return -1; // Out of memory - } - } - } else k = kh_get(s2i, d, ss); - return k == kh_end(d)? -1 : kh_val(d, k); -} - -int tbx_name2id(tbx_t *tbx, const char *ss) -{ - return get_tid(tbx, ss, 0); -} - -int tbx_parse1(const tbx_conf_t *conf, size_t len, char *line, tbx_intv_t *intv) -{ - size_t i, b = 0; - int id = 1; - char *s; - intv->ss = intv->se = 0; intv->beg = intv->end = -1; - for (i = 0; i <= len; ++i) { - if (line[i] == '\t' || line[i] == 0) { - if (id == conf->sc) { - intv->ss = line + b; intv->se = line + i; - } else if (id == conf->bc) { - // here ->beg is 0-based. - intv->beg = strtoll(line + b, &s, 0); - - if (conf->bc <= conf->ec) // don't overwrite an already set end point - intv->end = intv->beg; - - if ( s==line+b ) return -1; // expected int - - if (!(conf->preset&TBX_UCSC)) - --intv->beg; - else if (conf->bc <= conf->ec) - ++intv->end; - - if (intv->beg < 0) { - hts_log_warning("Coordinate <= 0 detected. " - "Did you forget to use the -0 option?"); - intv->beg = 0; - } - if (intv->end < 1) intv->end = 1; - } else { - if ((conf->preset&0xffff) == TBX_GENERIC) { - if (id == conf->ec) - { - intv->end = strtoll(line + b, &s, 0); - if ( s==line+b ) return -1; // expected int - } - } else if ((conf->preset&0xffff) == TBX_SAM) { - if (id == 6) { // CIGAR - int l = 0; - char *t; - for (s = line + b; s < line + i;) { - long x = strtol(s, &t, 10); - char op = toupper_c(*t); - if (op == 'M' || op == 'D' || op == 'N') l += x; - s = t + 1; - } - if (l == 0) l = 1; - intv->end = intv->beg + l; - } - } else if ((conf->preset&0xffff) == TBX_VCF) { - if (id == 4) { - if (b < i) intv->end = intv->beg + (i - b); - } else if (id == 8) { // look for "END=" - int c = line[i]; - line[i] = 0; - s = strstr(line + b, "END="); - if (s == line + b) s += 4; - else if (s) { - s = strstr(line + b, ";END="); - if (s) s += 5; - } - if (s && *s != '.') { - long long end = strtoll(s, &s, 0); - if (end <= intv->beg) { - static int reported = 0; - if (!reported) { - int l = intv->ss ? (int) (intv->se - intv->ss) : 0; - hts_log_warning("VCF INFO/END=%lld is smaller than POS at %.*s:%"PRIhts_pos"\n" - "This tag will be ignored. " - "Note: only one invalid END tag will be reported.", - end, l >= 0 ? l : 0, - intv->ss ? intv->ss : "", - intv->beg); - reported = 1; - } - } else { - intv->end = end; - } - } - line[i] = c; - } - } - } - b = i + 1; - ++id; - } - } - if (intv->ss == 0 || intv->se == 0 || intv->beg < 0 || intv->end < 0) return -1; - return 0; -} - -static inline int get_intv(tbx_t *tbx, kstring_t *str, tbx_intv_t *intv, int is_add) -{ - if (tbx_parse1(&tbx->conf, str->l, str->s, intv) == 0) { - int c = *intv->se; - *intv->se = '\0'; intv->tid = get_tid(tbx, intv->ss, is_add); *intv->se = c; - if (intv->tid < 0) return -2; // get_tid out of memory - return (intv->beg >= 0 && intv->end >= 0)? 0 : -1; - } else { - char *type = NULL; - switch (tbx->conf.preset&0xffff) - { - case TBX_SAM: type = "TBX_SAM"; break; - case TBX_VCF: type = "TBX_VCF"; break; - case TBX_UCSC: type = "TBX_UCSC"; break; - default: type = "TBX_GENERIC"; break; - } - hts_log_error("Failed to parse %s, was wrong -p [type] used?\nThe offending line was: \"%s\"", - type, str->s); - return -1; - } -} - -/* - * Called by tabix iterator to read the next record. - * Returns >= 0 on success - * -1 on EOF - * <= -2 on error - */ -int tbx_readrec(BGZF *fp, void *tbxv, void *sv, int *tid, hts_pos_t *beg, hts_pos_t *end) -{ - tbx_t *tbx = (tbx_t *) tbxv; - kstring_t *s = (kstring_t *) sv; - int ret; - if ((ret = bgzf_getline(fp, '\n', s)) >= 0) { - tbx_intv_t intv; - if (get_intv(tbx, s, &intv, 0) < 0) - return -2; - *tid = intv.tid; *beg = intv.beg; *end = intv.end; - } - return ret; -} - -static int tbx_set_meta(tbx_t *tbx) -{ - int i, l = 0, l_nm; - uint32_t x[7]; - char **name; - uint8_t *meta; - khint_t k; - khash_t(s2i) *d = (khash_t(s2i)*)tbx->dict; - - memcpy(x, &tbx->conf, 24); - name = (char**)malloc(sizeof(char*) * kh_size(d)); - if (!name) return -1; - for (k = kh_begin(d), l = 0; k != kh_end(d); ++k) { - if (!kh_exist(d, k)) continue; - name[kh_val(d, k)] = (char*)kh_key(d, k); - l += strlen(kh_key(d, k)) + 1; // +1 to include '\0' - } - l_nm = x[6] = l; - meta = (uint8_t*)malloc(l_nm + 28); - if (!meta) { free(name); return -1; } - if (ed_is_big()) - for (i = 0; i < 7; ++i) - x[i] = ed_swap_4(x[i]); - memcpy(meta, x, 28); - for (l = 28, i = 0; i < (int)kh_size(d); ++i) { - int x = strlen(name[i]) + 1; - memcpy(meta + l, name[i], x); - l += x; - } - free(name); - hts_idx_set_meta(tbx->idx, l, meta, 0); - return 0; -} - -// Minimal effort parser to extract reference length out of VCF header line -// This is used only used to adjust the number of levels if necessary, -// so not a major problem if it doesn't always work. -static void adjust_max_ref_len_vcf(const char *str, int64_t *max_ref_len) -{ - const char *ptr; - int64_t len; - if (strncmp(str, "##contig", 8) != 0) return; - ptr = strstr(str + 8, "length"); - if (!ptr) return; - for (ptr += 6; *ptr == ' ' || *ptr == '='; ptr++) {} - len = strtoll(ptr, NULL, 10); - if (*max_ref_len < len) *max_ref_len = len; -} - -// Same for sam files -static void adjust_max_ref_len_sam(const char *str, int64_t *max_ref_len) -{ - const char *ptr; - int64_t len; - if (strncmp(str, "@SQ", 3) != 0) return; - ptr = strstr(str + 3, "\tLN:"); - if (!ptr) return; - ptr += 4; - len = strtoll(ptr, NULL, 10); - if (*max_ref_len < len) *max_ref_len = len; -} - -// Adjusts number of levels if not big enough. This can happen for -// files with very large contigs. -static int adjust_n_lvls(int min_shift, int n_lvls, int64_t max_len) -{ - int64_t s = 1LL << (min_shift + n_lvls * 3); - max_len += 256; - for (; max_len > s; ++n_lvls, s <<= 3) {} - return n_lvls; -} - -tbx_t *tbx_index(BGZF *fp, int min_shift, const tbx_conf_t *conf) -{ - tbx_t *tbx; - kstring_t str; - int ret, first = 0, n_lvls, fmt; - int64_t lineno = 0; - uint64_t last_off = 0; - tbx_intv_t intv; - int64_t max_ref_len = 0; - - str.s = 0; str.l = str.m = 0; - tbx = (tbx_t*)calloc(1, sizeof(tbx_t)); - if (!tbx) return NULL; - tbx->conf = *conf; - if (min_shift > 0) n_lvls = (TBX_MAX_SHIFT - min_shift + 2) / 3, fmt = HTS_FMT_CSI; - else min_shift = 14, n_lvls = 5, fmt = HTS_FMT_TBI; - while ((ret = bgzf_getline(fp, '\n', &str)) >= 0) { - ++lineno; - if (str.s[0] == tbx->conf.meta_char && fmt == HTS_FMT_CSI) { - switch (tbx->conf.preset) { - case TBX_SAM: - adjust_max_ref_len_sam(str.s, &max_ref_len); break; - case TBX_VCF: - adjust_max_ref_len_vcf(str.s, &max_ref_len); break; - default: - break; - } - } - if (lineno <= tbx->conf.line_skip || str.s[0] == tbx->conf.meta_char) { - last_off = bgzf_tell(fp); - continue; - } - if (first == 0) { - if (fmt == HTS_FMT_CSI) { - if (!max_ref_len) - max_ref_len = (int64_t)100*1024*1024*1024; // 100G default - n_lvls = adjust_n_lvls(min_shift, n_lvls, max_ref_len); - } - tbx->idx = hts_idx_init(0, fmt, last_off, min_shift, n_lvls); - if (!tbx->idx) goto fail; - first = 1; - } - ret = get_intv(tbx, &str, &intv, 1); - if (ret < -1) goto fail; // Out of memory - if (ret < 0) continue; // Skip unparsable lines - if (hts_idx_push(tbx->idx, intv.tid, intv.beg, intv.end, - bgzf_tell(fp), 1) < 0) { - goto fail; - } - } - if (ret < -1) goto fail; - if ( !tbx->idx ) tbx->idx = hts_idx_init(0, fmt, last_off, min_shift, n_lvls); // empty file - if (!tbx->idx) goto fail; - if ( !tbx->dict ) tbx->dict = kh_init(s2i); - if (!tbx->dict) goto fail; - if (hts_idx_finish(tbx->idx, bgzf_tell(fp)) != 0) goto fail; - if (tbx_set_meta(tbx) != 0) goto fail; - free(str.s); - return tbx; - - fail: - free(str.s); - tbx_destroy(tbx); - return NULL; -} - -void tbx_destroy(tbx_t *tbx) -{ - khash_t(s2i) *d = (khash_t(s2i)*)tbx->dict; - if (d != NULL) - { - khint_t k; - for (k = kh_begin(d); k != kh_end(d); ++k) - if (kh_exist(d, k)) free((char*)kh_key(d, k)); - } - hts_idx_destroy(tbx->idx); - kh_destroy(s2i, d); - free(tbx); -} - -int tbx_index_build3(const char *fn, const char *fnidx, int min_shift, int n_threads, const tbx_conf_t *conf) -{ - tbx_t *tbx; - BGZF *fp; - int ret; - if ((fp = bgzf_open(fn, "r")) == 0) return -1; - if ( n_threads ) bgzf_mt(fp, n_threads, 256); - if ( bgzf_compression(fp) != bgzf ) { bgzf_close(fp); return -2; } - tbx = tbx_index(fp, min_shift, conf); - bgzf_close(fp); - if ( !tbx ) return -1; - ret = hts_idx_save_as(tbx->idx, fn, fnidx, min_shift > 0? HTS_FMT_CSI : HTS_FMT_TBI); - tbx_destroy(tbx); - return ret; -} - -int tbx_index_build2(const char *fn, const char *fnidx, int min_shift, const tbx_conf_t *conf) -{ - return tbx_index_build3(fn, fnidx, min_shift, 0, conf); -} - -int tbx_index_build(const char *fn, int min_shift, const tbx_conf_t *conf) -{ - return tbx_index_build3(fn, NULL, min_shift, 0, conf); -} - -static tbx_t *index_load(const char *fn, const char *fnidx, int flags) -{ - tbx_t *tbx; - uint8_t *meta; - char *nm, *p; - uint32_t l_meta, l_nm; - tbx = (tbx_t*)calloc(1, sizeof(tbx_t)); - if (!tbx) - return NULL; - tbx->idx = hts_idx_load3(fn, fnidx, HTS_FMT_TBI, flags); - if ( !tbx->idx ) - { - free(tbx); - return NULL; - } - meta = hts_idx_get_meta(tbx->idx, &l_meta); - if ( !meta || l_meta < 28) goto invalid; - - tbx->conf.preset = le_to_i32(&meta[0]); - tbx->conf.sc = le_to_i32(&meta[4]); - tbx->conf.bc = le_to_i32(&meta[8]); - tbx->conf.ec = le_to_i32(&meta[12]); - tbx->conf.meta_char = le_to_i32(&meta[16]); - tbx->conf.line_skip = le_to_i32(&meta[20]); - l_nm = le_to_u32(&meta[24]); - if (l_nm > l_meta - 28) goto invalid; - - p = nm = (char*)meta + 28; - // This assumes meta is NUL-terminated, so we can merrily strlen away. - // hts_idx_load_local() assures this for us by adding a NUL on the end - // of whatever it reads. - for (; p - nm < l_nm; p += strlen(p) + 1) { - if (get_tid(tbx, p, 1) < 0) { - hts_log_error("%s", strerror(errno)); - goto fail; - } - } - return tbx; - - invalid: - hts_log_error("Invalid index header for %s", fnidx ? fnidx : fn); - - fail: - tbx_destroy(tbx); - return NULL; -} - -tbx_t *tbx_index_load3(const char *fn, const char *fnidx, int flags) -{ - return index_load(fn, fnidx, flags); -} - -tbx_t *tbx_index_load2(const char *fn, const char *fnidx) -{ - return index_load(fn, fnidx, 1); -} - -tbx_t *tbx_index_load(const char *fn) -{ - return index_load(fn, NULL, 1); -} - -const char **tbx_seqnames(tbx_t *tbx, int *n) -{ - khash_t(s2i) *d = (khash_t(s2i)*)tbx->dict; - if (d == NULL) - { - *n = 0; - return calloc(1, sizeof(char *)); - } - int tid, m = kh_size(d); - const char **names = (const char**) calloc(m,sizeof(const char*)); - khint_t k; - if (!names) { - *n = 0; - return NULL; - } - for (k=kh_begin(d); k - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#ifndef HTSLIB_TEXTUTILS_INTERNAL_H -#define HTSLIB_TEXTUTILS_INTERNAL_H - -/* N.B. These interfaces may be used by plug-ins */ - -#include -#include -#include "htslib/kstring.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/// Decode percent-encoded (URL-encoded) text -/** On input, _dest_ should be a buffer at least the same size as _s_, - and may be equal to _s_ to decode in place. On output, _dest_ will be - NUL-terminated and the number of characters written (not including the - NUL) is stored in _destlen_. -*/ -int hts_decode_percent(char *dest, size_t *destlen, const char *s); - -/// Return decoded data length given length of base64-encoded text -/** This gives an upper bound, as it overestimates by a byte or two when - the encoded text ends with (possibly omitted) `=` padding characters. -*/ -size_t hts_base64_decoded_length(size_t len); - -/// Decode base64-encoded data -/** On input, _dest_ should be a sufficient buffer (see `hts_base64_length()`), - and may be equal to _s_ to decode in place. On output, the number of - bytes written is stored in _destlen_. -*/ -int hts_decode_base64(char *dest, size_t *destlen, const char *s); - -/// Token structure returned by JSON lexing functions -/** Structure is defined in hts_internal.h - */ - -typedef struct hts_json_token hts_json_token; - -/// Allocate an empty JSON token structure, for use with hts_json_* functions -/** @return An empty token on success; NULL on failure - */ -HTSLIB_EXPORT -hts_json_token *hts_json_alloc_token(void); - -/// Free a JSON token -HTSLIB_EXPORT -void hts_json_free_token(hts_json_token *token); - -/// Accessor function to get JSON token type -/** @param token Pointer to JSON token - @return Character indicating the token type - -Token types correspond to scalar JSON values and selected punctuation -as follows: - - `s` string - - `n` number - - `b` boolean literal - - `.` null literal - - `{`, `}`, `[`, `]` object and array delimiters - - `?` lexing error - - `!` other errors (e.g. out of memory) - - `\0` terminator at end of input -*/ -HTSLIB_EXPORT -char hts_json_token_type(hts_json_token *token); - -/// Accessor function to get JSON token in string form -/** @param token Pointer to JSON token - @return String representation of the JSON token; NULL if unset - -If the token was parsed from a string using hts_json_snext(), the return value -will point into the string passed as the first parameter to hts_json_snext(). -If the token was parsed from a file using hts_json_fnext(), the return value -will point at the kstring_t buffer passed as the third parameter to -hts_json_fnext(). In that case, the value will only be valid until the -next call to hts_json_fnext(). - */ -HTSLIB_EXPORT -char *hts_json_token_str(hts_json_token *token); - -/// Read one JSON token from a string -/** @param str The input C string - @param state The input string state - @param token On return, filled in with the token read - @return The type of the token read - -On return, `token->str` points into the supplied input string, which -is modified by having token-terminating characters overwritten as NULs. -The `state` argument records the current position within `str` after each -`hts_json_snext()` call, and should be set to 0 before the first call. -*/ -HTSLIB_EXPORT -char hts_json_snext(char *str, size_t *state, hts_json_token *token); - -/// Read and discard a complete JSON value from a string -/** @param str The input C string - @param state The input string state, as per `hts_json_snext()` - @param type If the first token of the value to be discarded has already - been read, provide its type; otherwise `'\0'` - @return One of `v` (success), `\0` (end of string), and `?` (lexing error) - -Skips a complete JSON value, which may be a single token or an entire object -or array. -*/ -HTSLIB_EXPORT -char hts_json_sskip_value(char *str, size_t *state, char type); - -struct hFILE; - -/// Read one JSON token from a file -/** @param fp The file stream - @param token On return, filled in with the token read - @param kstr Buffer used to store the token string returned - @return The type of the token read - -The `kstr` buffer is used to store the string value of the token read, -so `token->str` is only valid until the next time `hts_json_fnext()` is -called with the same `kstr` argument. -*/ -HTSLIB_EXPORT -char hts_json_fnext(struct hFILE *fp, hts_json_token *token, kstring_t *kstr); - -/// Read and discard a complete JSON value from a file -/** @param fp The file stream - @param type If the first token of the value to be discarded has already - been read, provide its type; otherwise `'\0'` - @return One of `v` (success), `\0` (EOF), and `?` (lexing error) - -Skips a complete JSON value, which may be a single token or an entire object -or array. -*/ -HTSLIB_EXPORT -char hts_json_fskip_value(struct hFILE *fp, char type); - -// The functions operate on ints such as are returned by fgetc(), -// i.e., characters represented as unsigned-char-valued ints, or EOF. -// To operate on plain chars (and to avoid warnings on some platforms), -// technically one must cast to unsigned char everywhere (see CERT STR37-C) -// or less painfully use these *_c() functions that operate on plain chars -// (but not EOF, which must be considered separately where it is applicable). -// TODO We may eventually wish to implement these functions directly without -// using their equivalents, and thus make them immune to locales. -static inline int isalnum_c(char c) { return isalnum((unsigned char) c); } -static inline int isalpha_c(char c) { return isalpha((unsigned char) c); } -static inline int isdigit_c(char c) { return isdigit((unsigned char) c); } -static inline int isgraph_c(char c) { return isgraph((unsigned char) c); } -static inline int islower_c(char c) { return islower((unsigned char) c); } -static inline int isprint_c(char c) { return isprint((unsigned char) c); } -static inline int ispunct_c(char c) { return ispunct((unsigned char) c); } -static inline int isspace_c(char c) { return isspace((unsigned char) c); } -static inline int isupper_c(char c) { return isupper((unsigned char) c); } -static inline int isxdigit_c(char c) { return isxdigit((unsigned char) c); } -static inline char tolower_c(char c) { return tolower((unsigned char) c); } -static inline char toupper_c(char c) { return toupper((unsigned char) c); } - -/// Copy possibly malicious text data to a buffer -/** @param buf Destination buffer - @param buflen Size of the destination buffer (>= 4; >= 6 when quotes used) - @param quote Quote character (or '\0' for no quoting of the output) - @param s String to be copied - @param len Length of the input string, or SIZE_MAX to copy until '\0' - @return The destination buffer, @a buf. - -Copies the source text string (escaping any unprintable characters) to the -destination buffer. The destination buffer will always be NUL-terminated; -the text will be truncated (and "..." appended) if necessary to make it fit. - */ -const char *hts_strprint(char *buf, size_t buflen, char quote, - const char *s, size_t len); - -// Faster replacements for strtol, for use when parsing lots of numbers. -// Note that these only handle base 10 and do not skip leading whitespace - -/// Convert a string to a signed integer, with overflow detection -/** @param[in] in Input string - @param[out] end Returned end pointer - @param[in] bits Bits available for the converted value - @param[out] failed Location of overflow flag - @return String value converted to an int64_t - -Converts a signed decimal string to an int64_t. The string should -consist of an optional '+' or '-' sign followed by one or more of -the digits 0 to 9. The output value will be limited to fit in the -given number of bits (including the sign bit). If the value is too big, -the largest possible value will be returned and *failed will be set to 1. - -The address of the first character following the converted number will -be stored in *end. - -Both end and failed must be non-NULL. - */ -static inline int64_t hts_str2int(const char *in, char **end, int bits, - int *failed) { - uint64_t n = 0, limit = (1ULL << (bits - 1)) - 1; - uint32_t fast = (bits - 1) * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322 - const unsigned char *v = (const unsigned char *) in; - const unsigned int ascii_zero = '0'; // Prevents conversion to signed - unsigned char d; - int neg = 1; - - switch(*v) { - case '-': - neg=-1; - limit++; /* fall through */ - case '+': - v++; - break; - default: - break; - } - - while (--fast && *v>='0' && *v<='9') - n = n*10 + *v++ - ascii_zero; - - if (!fast) { - uint64_t limit_d_10 = limit / 10; - uint64_t limit_m_10 = limit - 10 * limit_d_10; - while ((d = *v - ascii_zero) < 10) { - if (n < limit_d_10 || (n == limit_d_10 && d <= limit_m_10)) { - n = n*10 + d; - v++; - } else { - do { v++; } while (*v - ascii_zero < 10); - n = limit; - *failed = 1; - break; - } - } - } - - *end = (char *)v; - - return (n && neg < 0) ? -((int64_t) (n - 1)) - 1 : (int64_t) n; -} - -/// Convert a string to an unsigned integer, with overflow detection -/** @param[in] in Input string - @param[out] end Returned end pointer - @param[in] bits Bits available for the converted value - @param[out] failed Location of overflow flag - @return String value converted to a uint64_t - -Converts an unsigned decimal string to a uint64_t. The string should -consist of an optional '+' sign followed by one or more of the digits 0 -to 9. The output value will be limited to fit in the given number of bits. -If the value is too big, the largest possible value will be returned -and *failed will be set to 1. - -The address of the first character following the converted number will -be stored in *end. - -Both end and failed must be non-NULL. - */ - -static inline uint64_t hts_str2uint(const char *in, char **end, int bits, - int *failed) { - uint64_t n = 0, limit = (bits < 64 ? (1ULL << bits) : 0) - 1; - const unsigned char *v = (const unsigned char *) in; - const unsigned int ascii_zero = '0'; // Prevents conversion to signed - uint32_t fast = bits * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322 - unsigned char d; - - if (*v == '+') - v++; - - while (--fast && *v>='0' && *v<='9') - n = n*10 + *v++ - ascii_zero; - - if (!fast) { - uint64_t limit_d_10 = limit / 10; - uint64_t limit_m_10 = limit - 10 * limit_d_10; - while ((d = *v - ascii_zero) < 10) { - if (n < limit_d_10 || (n == limit_d_10 && d <= limit_m_10)) { - n = n*10 + d; - v++; - } else { - do { v++; } while (*v - ascii_zero < 10); - n = limit; - *failed = 1; - break; - } - } - } - - *end = (char *)v; - return n; -} - -/// Convert a string to a double, with overflow detection -/** @param[in] in Input string - @param[out] end Returned end pointer - @param[out] failed Location of overflow flag - @return String value converted to a double - -Converts a floating point value string to a double. The string should -have the format [+-]?[0-9]*[.]?[0-9]* with at least one and no more than 15 -digits. Strings that do not match (inf, nan, values with exponents) will -be passed on to strtod() for processing. - -If the value is too big, the largest possible value will be returned; -if it is too small to be represented in a double zero will be returned. -In both cases errno will be set to ERANGE. - -If no characters could be converted, *failed will be set to 1. - -The address of the first character following the converted number will -be stored in *end. - -Both end and failed must be non-NULL. - */ - -static inline double hts_str2dbl(const char *in, char **end, int *failed) { - uint64_t n = 0; - int max_len = 15; - const unsigned char *v = (const unsigned char *) in; - const unsigned int ascii_zero = '0'; // Prevents conversion to signed - int neg = 0, point = -1; - double d; - static double D[] = {1,1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, - 1e8, 1e9, 1e10,1e11,1e12,1e13,1e14,1e15, - 1e16,1e17,1e18,1e19,1e20}; - - while (isspace(*v)) - v++; - - if (*v == '-') { - neg = 1; - v++; - } else if (*v == '+') { - v++; - } - - switch(*v) { - case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - break; - - case '0': - if (v[1] != 'x' && v[1] != 'X') break; - // else fall through - hex number - - default: - // Non numbers, like NaN, Inf - d = strtod(in, end); - if (*end == in) - *failed = 1; - return d; - } - - while (*v == '0') ++v; - - const unsigned char *start = v; - - while (--max_len && *v>='0' && *v<='9') - n = n*10 + *v++ - ascii_zero; - if (max_len && *v == '.') { - point = v - start; - v++; - while (--max_len && *v>='0' && *v<='9') - n = n*10 + *v++ - ascii_zero; - } - if (point < 0) - point = v - start; - - // Outside the scope of this quick and dirty parser. - if (!max_len || *v == 'e' || *v == 'E') { - d = strtod(in, end); - if (*end == in) - *failed = 1; - return d; - } - - *end = (char *)v; - d = n / D[v - start - point]; - - return neg ? -d : d; -} - - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.18/vcf.c b/src/htslib-1.18/vcf.c deleted file mode 100644 index 032d8af..0000000 --- a/src/htslib-1.18/vcf.c +++ /dev/null @@ -1,5336 +0,0 @@ -/* vcf.c -- VCF/BCF API functions. - - Copyright (C) 2012, 2013 Broad Institute. - Copyright (C) 2012-2023 Genome Research Ltd. - Portions copyright (C) 2014 Intel Corporation. - - Author: Heng Li - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "htslib/vcf.h" -#include "htslib/bgzf.h" -#include "htslib/tbx.h" -#include "htslib/hfile.h" -#include "hts_internal.h" -#include "htslib/hts_endian.h" -#include "htslib/khash_str2int.h" -#include "htslib/kstring.h" -#include "htslib/sam.h" - -#include "htslib/khash.h" -KHASH_MAP_INIT_STR(vdict, bcf_idinfo_t) -typedef khash_t(vdict) vdict_t; - -KHASH_MAP_INIT_STR(hdict, bcf_hrec_t*) -typedef khash_t(hdict) hdict_t; - - -#include "htslib/kseq.h" -HTSLIB_EXPORT -uint32_t bcf_float_missing = 0x7F800001; - -HTSLIB_EXPORT -uint32_t bcf_float_vector_end = 0x7F800002; - -HTSLIB_EXPORT -uint8_t bcf_type_shift[] = { 0, 0, 1, 2, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - -static bcf_idinfo_t bcf_idinfo_def = { .info = { 15, 15, 15 }, .hrec = { NULL, NULL, NULL}, .id = -1 }; - -/* - Partial support for 64-bit POS and Number=1 INFO tags. - Notes: - - the support for 64-bit values is motivated by POS and INFO/END for large genomes - - the use of 64-bit values does not conform to the specification - - cannot output 64-bit BCF and if it does, it is not compatible with anything - - experimental, use at your risk -*/ -#ifdef VCF_ALLOW_INT64 - #define BCF_MAX_BT_INT64 (0x7fffffffffffffff) /* INT64_MAX, for internal use only */ - #define BCF_MIN_BT_INT64 -9223372036854775800LL /* INT64_MIN + 8, for internal use only */ -#endif - -#define BCF_IS_64BIT (1<<30) - - -// Opaque structure with auxilary data which allows to extend bcf_hdr_t without breaking ABI. -// Note that this preserving API and ABI requires that the first element is vdict_t struct -// rather than a pointer, as user programs may (and in some cases do) access the dictionary -// directly as (vdict_t*)hdr->dict. -typedef struct -{ - vdict_t dict; // bcf_hdr_t.dict[0] vdict_t dictionary which keeps bcf_idinfo_t for BCF_HL_FLT,BCF_HL_INFO,BCF_HL_FMT - hdict_t *gen; // hdict_t dictionary which keeps bcf_hrec_t* pointers for generic and structured fields -} -bcf_hdr_aux_t; - -static inline bcf_hdr_aux_t *get_hdr_aux(const bcf_hdr_t *hdr) -{ - return (bcf_hdr_aux_t *)hdr->dict[0]; -} - -static char *find_chrom_header_line(char *s) -{ - char *nl; - if (strncmp(s, "#CHROM\t", 7) == 0) return s; - else if ((nl = strstr(s, "\n#CHROM\t")) != NULL) return nl+1; - else return NULL; -} - -/************************* - *** VCF header parser *** - *************************/ - -static int bcf_hdr_add_sample_len(bcf_hdr_t *h, const char *s, size_t len) -{ - const char *ss = s; - while ( *ss && isspace_c(*ss) && ss - s < len) ss++; - if ( !*ss || ss - s == len) - { - hts_log_error("Empty sample name: trailing spaces/tabs in the header line?"); - return -1; - } - - vdict_t *d = (vdict_t*)h->dict[BCF_DT_SAMPLE]; - int ret; - char *sdup = malloc(len + 1); - if (!sdup) return -1; - memcpy(sdup, s, len); - sdup[len] = 0; - - // Ensure space is available in h->samples - size_t n = kh_size(d); - char **new_samples = realloc(h->samples, sizeof(char*) * (n + 1)); - if (!new_samples) { - free(sdup); - return -1; - } - h->samples = new_samples; - - int k = kh_put(vdict, d, sdup, &ret); - if (ret < 0) { - free(sdup); - return -1; - } - if (ret) { // absent - kh_val(d, k) = bcf_idinfo_def; - kh_val(d, k).id = n; - } else { - hts_log_error("Duplicated sample name '%s'", sdup); - free(sdup); - return -1; - } - h->samples[n] = sdup; - h->dirty = 1; - return 0; -} - -int bcf_hdr_add_sample(bcf_hdr_t *h, const char *s) -{ - if (!s) { - // Allowed for backwards-compatibility, calling with s == NULL - // used to trigger bcf_hdr_sync(h); - return 0; - } - return bcf_hdr_add_sample_len(h, s, strlen(s)); -} - -int HTS_RESULT_USED bcf_hdr_parse_sample_line(bcf_hdr_t *hdr, const char *str) -{ - const char *mandatory = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"; - if ( strncmp(str,mandatory,strlen(mandatory)) ) - { - hts_log_error("Could not parse the \"#CHROM..\" line, either the fields are incorrect or spaces are present instead of tabs:\n\t%s",str); - return -1; - } - - const char *beg = str + strlen(mandatory), *end; - if ( !*beg || *beg=='\n' ) return 0; - if ( strncmp(beg,"\tFORMAT\t",8) ) - { - hts_log_error("Could not parse the \"#CHROM..\" line, either FORMAT is missing or spaces are present instead of tabs:\n\t%s",str); - return -1; - } - beg += 8; - - int ret = 0; - while ( *beg ) - { - end = beg; - while ( *end && *end!='\t' && *end!='\n' ) end++; - if ( bcf_hdr_add_sample_len(hdr, beg, end-beg) < 0 ) ret = -1; - if ( !*end || *end=='\n' || ret<0 ) break; - beg = end + 1; - } - return ret; -} - -int bcf_hdr_sync(bcf_hdr_t *h) -{ - int i; - for (i = 0; i < 3; i++) - { - vdict_t *d = (vdict_t*)h->dict[i]; - khint_t k; - if ( h->n[i] < kh_size(d) ) - { - bcf_idpair_t *new_idpair; - // this should be true only for i=2, BCF_DT_SAMPLE - new_idpair = (bcf_idpair_t*) realloc(h->id[i], kh_size(d)*sizeof(bcf_idpair_t)); - if (!new_idpair) return -1; - h->n[i] = kh_size(d); - h->id[i] = new_idpair; - } - for (k=kh_begin(d); kid[i][kh_val(d,k).id].key = kh_key(d,k); - h->id[i][kh_val(d,k).id].val = &kh_val(d,k); - } - } - h->dirty = 0; - return 0; -} - -void bcf_hrec_destroy(bcf_hrec_t *hrec) -{ - if (!hrec) return; - free(hrec->key); - if ( hrec->value ) free(hrec->value); - int i; - for (i=0; inkeys; i++) - { - free(hrec->keys[i]); - free(hrec->vals[i]); - } - free(hrec->keys); - free(hrec->vals); - free(hrec); -} - -// Copies all fields except IDX. -bcf_hrec_t *bcf_hrec_dup(bcf_hrec_t *hrec) -{ - int save_errno; - bcf_hrec_t *out = (bcf_hrec_t*) calloc(1,sizeof(bcf_hrec_t)); - if (!out) return NULL; - - out->type = hrec->type; - if ( hrec->key ) { - out->key = strdup(hrec->key); - if (!out->key) goto fail; - } - if ( hrec->value ) { - out->value = strdup(hrec->value); - if (!out->value) goto fail; - } - out->nkeys = hrec->nkeys; - out->keys = (char**) malloc(sizeof(char*)*hrec->nkeys); - if (!out->keys) goto fail; - out->vals = (char**) malloc(sizeof(char*)*hrec->nkeys); - if (!out->vals) goto fail; - int i, j = 0; - for (i=0; inkeys; i++) - { - if ( hrec->keys[i] && !strcmp("IDX",hrec->keys[i]) ) continue; - if ( hrec->keys[i] ) { - out->keys[j] = strdup(hrec->keys[i]); - if (!out->keys[j]) goto fail; - } - if ( hrec->vals[i] ) { - out->vals[j] = strdup(hrec->vals[i]); - if (!out->vals[j]) goto fail; - } - j++; - } - if ( i!=j ) out->nkeys -= i-j; // IDX was omitted - return out; - - fail: - save_errno = errno; - hts_log_error("%s", strerror(errno)); - bcf_hrec_destroy(out); - errno = save_errno; - return NULL; -} - -void bcf_hrec_debug(FILE *fp, bcf_hrec_t *hrec) -{ - fprintf(fp, "key=[%s] value=[%s]", hrec->key, hrec->value?hrec->value:""); - int i; - for (i=0; inkeys; i++) - fprintf(fp, "\t[%s]=[%s]", hrec->keys[i],hrec->vals[i]); - fprintf(fp, "\n"); -} - -void bcf_header_debug(bcf_hdr_t *hdr) -{ - int i, j; - for (i=0; inhrec; i++) - { - if ( !hdr->hrec[i]->value ) - { - fprintf(stderr, "##%s=<", hdr->hrec[i]->key); - fprintf(stderr,"%s=%s", hdr->hrec[i]->keys[0], hdr->hrec[i]->vals[0]); - for (j=1; jhrec[i]->nkeys; j++) - fprintf(stderr,",%s=%s", hdr->hrec[i]->keys[j], hdr->hrec[i]->vals[j]); - fprintf(stderr,">\n"); - } - else - fprintf(stderr,"##%s=%s\n", hdr->hrec[i]->key,hdr->hrec[i]->value); - } -} - -int bcf_hrec_add_key(bcf_hrec_t *hrec, const char *str, size_t len) -{ - char **tmp; - size_t n = hrec->nkeys + 1; - assert(len > 0 && len < SIZE_MAX); - tmp = realloc(hrec->keys, sizeof(char*)*n); - if (!tmp) return -1; - hrec->keys = tmp; - tmp = realloc(hrec->vals, sizeof(char*)*n); - if (!tmp) return -1; - hrec->vals = tmp; - - hrec->keys[hrec->nkeys] = (char*) malloc((len+1)*sizeof(char)); - if (!hrec->keys[hrec->nkeys]) return -1; - memcpy(hrec->keys[hrec->nkeys],str,len); - hrec->keys[hrec->nkeys][len] = 0; - hrec->vals[hrec->nkeys] = NULL; - hrec->nkeys = n; - return 0; -} - -int bcf_hrec_set_val(bcf_hrec_t *hrec, int i, const char *str, size_t len, int is_quoted) -{ - if ( hrec->vals[i] ) { - free(hrec->vals[i]); - hrec->vals[i] = NULL; - } - if ( !str ) return 0; - if ( is_quoted ) - { - if (len >= SIZE_MAX - 3) { - errno = ENOMEM; - return -1; - } - hrec->vals[i] = (char*) malloc((len+3)*sizeof(char)); - if (!hrec->vals[i]) return -1; - hrec->vals[i][0] = '"'; - memcpy(&hrec->vals[i][1],str,len); - hrec->vals[i][len+1] = '"'; - hrec->vals[i][len+2] = 0; - } - else - { - if (len == SIZE_MAX) { - errno = ENOMEM; - return -1; - } - hrec->vals[i] = (char*) malloc((len+1)*sizeof(char)); - if (!hrec->vals[i]) return -1; - memcpy(hrec->vals[i],str,len); - hrec->vals[i][len] = 0; - } - return 0; -} - -int hrec_add_idx(bcf_hrec_t *hrec, int idx) -{ - int n = hrec->nkeys + 1; - char **tmp = (char**) realloc(hrec->keys, sizeof(char*)*n); - if (!tmp) return -1; - hrec->keys = tmp; - - tmp = (char**) realloc(hrec->vals, sizeof(char*)*n); - if (!tmp) return -1; - hrec->vals = tmp; - - hrec->keys[hrec->nkeys] = strdup("IDX"); - if (!hrec->keys[hrec->nkeys]) return -1; - - kstring_t str = {0,0,0}; - if (kputw(idx, &str) < 0) { - free(hrec->keys[hrec->nkeys]); - return -1; - } - hrec->vals[hrec->nkeys] = str.s; - hrec->nkeys = n; - return 0; -} - -int bcf_hrec_find_key(bcf_hrec_t *hrec, const char *key) -{ - int i; - for (i=0; inkeys; i++) - if ( !strcasecmp(key,hrec->keys[i]) ) return i; - return -1; -} - -static void bcf_hrec_set_type(bcf_hrec_t *hrec) -{ - if ( !strcmp(hrec->key, "contig") ) hrec->type = BCF_HL_CTG; - else if ( !strcmp(hrec->key, "INFO") ) hrec->type = BCF_HL_INFO; - else if ( !strcmp(hrec->key, "FILTER") ) hrec->type = BCF_HL_FLT; - else if ( !strcmp(hrec->key, "FORMAT") ) hrec->type = BCF_HL_FMT; - else if ( hrec->nkeys>0 ) hrec->type = BCF_HL_STR; - else hrec->type = BCF_HL_GEN; -} - - -/** - The arrays were generated with - - valid_ctg: - perl -le '@v = (split(//,q[!#$%&*+./:;=?@^_|~-]),"a"..."z","A"..."Z","0"..."9"); @a = (0) x 256; foreach $c (@v) { $a[ord($c)] = 1; } print join(", ",@a)' | fold -w 48 - - valid_tag: - perl -le '@v = (split(//,q[_.]),"a"..."z","A"..."Z","0"..."9"); @a = (0) x 256; foreach $c (@v) { $a[ord($c)] = 1; } print join(", ",@a)' | fold -w 48 -*/ -static const uint8_t valid_ctg[256] = -{ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; -static const uint8_t valid_tag[256] = -{ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -/** - bcf_hrec_check() - check the validity of structured header lines - - Returns 0 on success or negative value on error. - - Currently the return status is not checked by the caller - and only a warning is printed on stderr. This should be improved - to propagate the error all the way up to the caller and let it - decide what to do: throw an error or proceed anyway. - */ -static int bcf_hrec_check(bcf_hrec_t *hrec) -{ - int i; - bcf_hrec_set_type(hrec); - - if ( hrec->type==BCF_HL_CTG ) - { - i = bcf_hrec_find_key(hrec,"ID"); - if ( i<0 ) goto err_missing_id; - char *val = hrec->vals[i]; - if ( val[0]=='*' || val[0]=='=' || !valid_ctg[(uint8_t)val[0]] ) goto err_invalid_ctg; - while ( *(++val) ) - if ( !valid_ctg[(uint8_t)*val] ) goto err_invalid_ctg; - return 0; - } - if ( hrec->type==BCF_HL_INFO ) - { - i = bcf_hrec_find_key(hrec,"ID"); - if ( i<0 ) goto err_missing_id; - char *val = hrec->vals[i]; - if ( !strcmp(val,"1000G") ) return 0; - if ( val[0]=='.' || (val[0]>='0' && val[0]<='9') || !valid_tag[(uint8_t)val[0]] ) goto err_invalid_tag; - while ( *(++val) ) - if ( !valid_tag[(uint8_t)*val] ) goto err_invalid_tag; - return 0; - } - if ( hrec->type==BCF_HL_FMT ) - { - i = bcf_hrec_find_key(hrec,"ID"); - if ( i<0 ) goto err_missing_id; - char *val = hrec->vals[i]; - if ( val[0]=='.' || (val[0]>='0' && val[0]<='9') || !valid_tag[(uint8_t)val[0]] ) goto err_invalid_tag; - while ( *(++val) ) - if ( !valid_tag[(uint8_t)*val] ) goto err_invalid_tag; - return 0; - } - return 0; - - err_missing_id: - hts_log_warning("Missing ID attribute in one or more header lines"); - return -1; - - err_invalid_ctg: - hts_log_warning("Invalid contig name: \"%s\"", hrec->vals[i]); - return -1; - - err_invalid_tag: - hts_log_warning("Invalid tag name: \"%s\"", hrec->vals[i]); - return -1; -} - -static inline int is_escaped(const char *min, const char *str) -{ - int n = 0; - while ( --str>=min && *str=='\\' ) n++; - return n%2; -} - -bcf_hrec_t *bcf_hdr_parse_line(const bcf_hdr_t *h, const char *line, int *len) -{ - bcf_hrec_t *hrec = NULL; - const char *p = line; - if (p[0] != '#' || p[1] != '#') { *len = 0; return NULL; } - p += 2; - - const char *q = p; - while ( *q && *q!='=' && *q != '\n' ) q++; - ptrdiff_t n = q-p; - if ( *q!='=' || !n ) // wrong format - goto malformed_line; - - hrec = (bcf_hrec_t*) calloc(1,sizeof(bcf_hrec_t)); - if (!hrec) { *len = -1; return NULL; } - hrec->key = (char*) malloc(sizeof(char)*(n+1)); - if (!hrec->key) goto fail; - memcpy(hrec->key,p,n); - hrec->key[n] = 0; - hrec->type = -1; - - p = ++q; - if ( *p!='<' ) // generic field, e.g. ##samtoolsVersion=0.1.18-r579 - { - while ( *q && *q!='\n' ) q++; - hrec->value = (char*) malloc((q-p+1)*sizeof(char)); - if (!hrec->value) goto fail; - memcpy(hrec->value, p, q-p); - hrec->value[q-p] = 0; - *len = q - line + (*q ? 1 : 0); // Skip \n but not \0 - return hrec; - } - - // structured line, e.g. - // ##INFO= - // ##PEDIGREE= - int nopen = 1; - while ( *q && *q!='\n' && nopen>0 ) - { - p = ++q; - while ( *q && *q==' ' ) { p++; q++; } - // ^[A-Za-z_][0-9A-Za-z_.]*$ - if (p==q && *q && (isalpha_c(*q) || *q=='_')) - { - q++; - while ( *q && (isalnum_c(*q) || *q=='_' || *q=='.') ) q++; - } - n = q-p; - int m = 0; - while ( *q && *q==' ' ) { q++; m++; } - if ( *q!='=' || !n ) - goto malformed_line; - - if (bcf_hrec_add_key(hrec, p, q-p-m) < 0) goto fail; - p = ++q; - while ( *q && *q==' ' ) { p++; q++; } - - int quoted = 0; - char ending = '\0'; - switch (*p) { - case '"': - quoted = 1; - ending = '"'; - p++; - break; - case '[': - quoted = 1; - ending = ']'; - break; - } - if ( quoted ) q++; - while ( *q && *q != '\n' ) - { - if ( quoted ) { if ( *q==ending && !is_escaped(p,q) ) break; } - else - { - if ( *q=='<' ) nopen++; - if ( *q=='>' ) nopen--; - if ( !nopen ) break; - if ( *q==',' && nopen==1 ) break; - } - q++; - } - const char *r = q; - if (quoted && ending == ']') { - if (*q == ending) { - r++; - q++; - quoted = 0; - } else { - char buffer[320]; - hts_log_error("Missing ']' in header line %s", - hts_strprint(buffer, sizeof(buffer), '"', - line, q-line)); - goto fail; - } - } - while ( r > p && r[-1] == ' ' ) r--; - if (bcf_hrec_set_val(hrec, hrec->nkeys-1, p, r-p, quoted) < 0) - goto fail; - if ( quoted && *q==ending ) q++; - if ( *q=='>' ) - { - if (nopen) nopen--; // this can happen with nested angle brackets <> - q++; - } - } - if ( nopen ) - hts_log_warning("Incomplete header line, trying to proceed anyway:\n\t[%s]\n\t[%d]",line,q[0]); - - // Skip to end of line - int nonspace = 0; - p = q; - while ( *q && *q!='\n' ) { nonspace |= !isspace_c(*q); q++; } - if (nonspace) { - char buffer[320]; - hts_log_warning("Dropped trailing junk from header line '%s'", - hts_strprint(buffer, sizeof(buffer), - '"', line, q - line)); - } - - *len = q - line + (*q ? 1 : 0); - return hrec; - - fail: - *len = -1; - bcf_hrec_destroy(hrec); - return NULL; - - malformed_line: - { - char buffer[320]; - while ( *q && *q!='\n' ) q++; // Ensure *len includes full line - hts_log_error("Could not parse the header line: %s", - hts_strprint(buffer, sizeof(buffer), - '"', line, q - line)); - *len = q - line + (*q ? 1 : 0); - bcf_hrec_destroy(hrec); - return NULL; - } -} - -static int bcf_hdr_set_idx(bcf_hdr_t *hdr, int dict_type, const char *tag, bcf_idinfo_t *idinfo) -{ - size_t new_n; - - // If available, preserve existing IDX - if ( idinfo->id==-1 ) - idinfo->id = hdr->n[dict_type]; - else if ( idinfo->id < hdr->n[dict_type] && hdr->id[dict_type][idinfo->id].key ) - { - hts_log_error("Conflicting IDX=%d lines in the header dictionary, the new tag is %s", - idinfo->id, tag); - errno = EINVAL; - return -1; - } - - new_n = idinfo->id >= hdr->n[dict_type] ? idinfo->id+1 : hdr->n[dict_type]; - if (hts_resize(bcf_idpair_t, new_n, &hdr->m[dict_type], - &hdr->id[dict_type], HTS_RESIZE_CLEAR)) { - return -1; - } - hdr->n[dict_type] = new_n; - - // NB: the next kh_put call can invalidate the idinfo pointer, therefore - // we leave it unassigned here. It must be set explicitly in bcf_hdr_sync. - hdr->id[dict_type][idinfo->id].key = tag; - - return 0; -} - -// returns: 1 when hdr needs to be synced, -1 on error, 0 otherwise -static int bcf_hdr_register_hrec(bcf_hdr_t *hdr, bcf_hrec_t *hrec) -{ - // contig - int i, ret, replacing = 0; - khint_t k; - char *str = NULL; - - bcf_hrec_set_type(hrec); - - if ( hrec->type==BCF_HL_CTG ) - { - hts_pos_t len = 0; - - // Get the contig ID ($str) and length ($j) - i = bcf_hrec_find_key(hrec,"length"); - if ( i<0 ) len = 0; - else { - char *end = hrec->vals[i]; - len = strtoll(hrec->vals[i], &end, 10); - if (end == hrec->vals[i] || len < 0) return 0; - } - - i = bcf_hrec_find_key(hrec,"ID"); - if ( i<0 ) return 0; - str = strdup(hrec->vals[i]); - if (!str) return -1; - - // Register in the dictionary - vdict_t *d = (vdict_t*)hdr->dict[BCF_DT_CTG]; - khint_t k = kh_get(vdict, d, str); - if ( k != kh_end(d) ) { // already present - free(str); str=NULL; - if (kh_val(d, k).hrec[0] != NULL) // and not removed - return 0; - replacing = 1; - } else { - k = kh_put(vdict, d, str, &ret); - if (ret < 0) { free(str); return -1; } - } - - int idx = bcf_hrec_find_key(hrec,"IDX"); - if ( idx!=-1 ) - { - char *tmp = hrec->vals[idx]; - idx = strtol(hrec->vals[idx], &tmp, 10); - if ( *tmp || idx < 0 || idx >= INT_MAX - 1) - { - if (!replacing) { - kh_del(vdict, d, k); - free(str); - } - hts_log_warning("Error parsing the IDX tag, skipping"); - return 0; - } - } - - kh_val(d, k) = bcf_idinfo_def; - kh_val(d, k).id = idx; - kh_val(d, k).info[0] = len; - kh_val(d, k).hrec[0] = hrec; - if (bcf_hdr_set_idx(hdr, BCF_DT_CTG, kh_key(d,k), &kh_val(d,k)) < 0) { - if (!replacing) { - kh_del(vdict, d, k); - free(str); - } - return -1; - } - if ( idx==-1 ) { - if (hrec_add_idx(hrec, kh_val(d,k).id) < 0) { - return -1; - } - } - - return 1; - } - - if ( hrec->type==BCF_HL_STR ) return 1; - if ( hrec->type!=BCF_HL_INFO && hrec->type!=BCF_HL_FLT && hrec->type!=BCF_HL_FMT ) return 0; - - // INFO/FILTER/FORMAT - char *id = NULL; - uint32_t type = UINT32_MAX, var = UINT32_MAX; - int num = -1, idx = -1; - for (i=0; inkeys; i++) - { - if ( !strcmp(hrec->keys[i], "ID") ) id = hrec->vals[i]; - else if ( !strcmp(hrec->keys[i], "IDX") ) - { - char *tmp = hrec->vals[i]; - idx = strtol(hrec->vals[i], &tmp, 10); - if ( *tmp || idx < 0 || idx >= INT_MAX - 1) - { - hts_log_warning("Error parsing the IDX tag, skipping"); - return 0; - } - } - else if ( !strcmp(hrec->keys[i], "Type") ) - { - if ( !strcmp(hrec->vals[i], "Integer") ) type = BCF_HT_INT; - else if ( !strcmp(hrec->vals[i], "Float") ) type = BCF_HT_REAL; - else if ( !strcmp(hrec->vals[i], "String") ) type = BCF_HT_STR; - else if ( !strcmp(hrec->vals[i], "Character") ) type = BCF_HT_STR; - else if ( !strcmp(hrec->vals[i], "Flag") ) type = BCF_HT_FLAG; - else - { - hts_log_warning("The type \"%s\" is not supported, assuming \"String\"", hrec->vals[i]); - type = BCF_HT_STR; - } - } - else if ( !strcmp(hrec->keys[i], "Number") ) - { - if ( !strcmp(hrec->vals[i],"A") ) var = BCF_VL_A; - else if ( !strcmp(hrec->vals[i],"R") ) var = BCF_VL_R; - else if ( !strcmp(hrec->vals[i],"G") ) var = BCF_VL_G; - else if ( !strcmp(hrec->vals[i],".") ) var = BCF_VL_VAR; - else - { - sscanf(hrec->vals[i],"%d",&num); - var = BCF_VL_FIXED; - } - if (var != BCF_VL_FIXED) num = 0xfffff; - } - } - if (hrec->type == BCF_HL_INFO || hrec->type == BCF_HL_FMT) { - if (type == -1) { - hts_log_warning("%s %s field has no Type defined. Assuming String", - *hrec->key == 'I' ? "An" : "A", hrec->key); - type = BCF_HT_STR; - } - if (var == -1) { - hts_log_warning("%s %s field has no Number defined. Assuming '.'", - *hrec->key == 'I' ? "An" : "A", hrec->key); - var = BCF_VL_VAR; - } - if ( type==BCF_HT_FLAG && (var!=BCF_VL_FIXED || num!=0) ) - { - hts_log_warning("The definition of Flag \"%s/%s\" is invalid, forcing Number=0", hrec->key,id); - var = BCF_VL_FIXED; - num = 0; - } - } - uint32_t info = ((((uint32_t)num) & 0xfffff)<<12 | - (var & 0xf) << 8 | - (type & 0xf) << 4 | - (((uint32_t) hrec->type) & 0xf)); - - if ( !id ) return 0; - str = strdup(id); - if (!str) return -1; - - vdict_t *d = (vdict_t*)hdr->dict[BCF_DT_ID]; - k = kh_get(vdict, d, str); - if ( k != kh_end(d) ) - { - // already present - free(str); - if ( kh_val(d, k).hrec[info&0xf] ) return 0; - kh_val(d, k).info[info&0xf] = info; - kh_val(d, k).hrec[info&0xf] = hrec; - if ( idx==-1 ) { - if (hrec_add_idx(hrec, kh_val(d, k).id) < 0) { - return -1; - } - } - return 1; - } - k = kh_put(vdict, d, str, &ret); - if (ret < 0) { - free(str); - return -1; - } - kh_val(d, k) = bcf_idinfo_def; - kh_val(d, k).info[info&0xf] = info; - kh_val(d, k).hrec[info&0xf] = hrec; - kh_val(d, k).id = idx; - if (bcf_hdr_set_idx(hdr, BCF_DT_ID, kh_key(d,k), &kh_val(d,k)) < 0) { - kh_del(vdict, d, k); - free(str); - return -1; - } - if ( idx==-1 ) { - if (hrec_add_idx(hrec, kh_val(d,k).id) < 0) { - return -1; - } - } - - return 1; -} - -static void bcf_hdr_unregister_hrec(bcf_hdr_t *hdr, bcf_hrec_t *hrec) -{ - if (hrec->type == BCF_HL_FLT || - hrec->type == BCF_HL_INFO || - hrec->type == BCF_HL_FMT || - hrec->type == BCF_HL_CTG) { - int id = bcf_hrec_find_key(hrec, "ID"); - if (id < 0 || !hrec->vals[id]) - return; - vdict_t *dict = (hrec->type == BCF_HL_CTG - ? (vdict_t*)hdr->dict[BCF_DT_CTG] - : (vdict_t*)hdr->dict[BCF_DT_ID]); - khint_t k = kh_get(vdict, dict, hrec->vals[id]); - if (k != kh_end(dict)) - kh_val(dict, k).hrec[hrec->type==BCF_HL_CTG ? 0 : hrec->type] = NULL; - } -} - -static void bcf_hdr_remove_from_hdict(bcf_hdr_t *hdr, bcf_hrec_t *hrec) -{ - kstring_t str = KS_INITIALIZE; - bcf_hdr_aux_t *aux = get_hdr_aux(hdr); - khint_t k; - int id; - - switch (hrec->type) { - case BCF_HL_GEN: - if (ksprintf(&str, "##%s=%s", hrec->key,hrec->value) < 0) - str.l = 0; - break; - case BCF_HL_STR: - id = bcf_hrec_find_key(hrec, "ID"); - if (id < 0) - return; - if (!hrec->vals[id] || - ksprintf(&str, "##%s=", hrec->key, hrec->vals[id]) < 0) - str.l = 0; - break; - default: - return; - } - if (str.l) { - k = kh_get(hdict, aux->gen, str.s); - } else { - // Couldn't get a string for some reason, so try the hard way... - for (k = kh_begin(aux->gen); k < kh_end(aux->gen); k++) { - if (kh_exist(aux->gen, k) && kh_val(aux->gen, k) == hrec) - break; - } - } - if (k != kh_end(aux->gen) && kh_val(aux->gen, k) == hrec) { - kh_val(aux->gen, k) = NULL; - free((char *) kh_key(aux->gen, k)); - kh_key(aux->gen, k) = NULL; - kh_del(hdict, aux->gen, k); - } - free(str.s); -} - -int bcf_hdr_update_hrec(bcf_hdr_t *hdr, bcf_hrec_t *hrec, const bcf_hrec_t *tmp) -{ - // currently only for bcf_hdr_set_version - assert( hrec->type==BCF_HL_GEN ); - int ret; - khint_t k; - bcf_hdr_aux_t *aux = get_hdr_aux(hdr); - for (k=kh_begin(aux->gen); kgen); k++) - { - if ( !kh_exist(aux->gen,k) ) continue; - if ( hrec!=(bcf_hrec_t*)kh_val(aux->gen,k) ) continue; - break; - } - assert( kgen) ); // something went wrong, should never happen - free((char*)kh_key(aux->gen,k)); - kh_del(hdict,aux->gen,k); - kstring_t str = {0,0,0}; - if ( ksprintf(&str, "##%s=%s", tmp->key,tmp->value) < 0 ) - { - free(str.s); - return -1; - } - k = kh_put(hdict, aux->gen, str.s, &ret); - if ( ret<0 ) - { - free(str.s); - return -1; - } - free(hrec->value); - hrec->value = strdup(tmp->value); - if ( !hrec->value ) return -1; - return 0; -} - -int bcf_hdr_add_hrec(bcf_hdr_t *hdr, bcf_hrec_t *hrec) -{ - kstring_t str = {0,0,0}; - bcf_hdr_aux_t *aux = get_hdr_aux(hdr); - - int res; - if ( !hrec ) return 0; - - bcf_hrec_check(hrec); // todo: check return status and propagate errors up - - res = bcf_hdr_register_hrec(hdr,hrec); - if (res < 0) return -1; - if ( !res ) - { - // If one of the hashed field, then it is already present - if ( hrec->type != BCF_HL_GEN ) - { - bcf_hrec_destroy(hrec); - return 0; - } - - // Is one of the generic fields and already present? - if ( ksprintf(&str, "##%s=%s", hrec->key,hrec->value) < 0 ) - { - free(str.s); - return -1; - } - khint_t k = kh_get(hdict, aux->gen, str.s); - if ( k != kh_end(aux->gen) ) - { - // duplicate record - bcf_hrec_destroy(hrec); - free(str.s); - return 0; - } - } - - int i; - if ( hrec->type==BCF_HL_STR && (i=bcf_hrec_find_key(hrec,"ID"))>=0 ) - { - if ( ksprintf(&str, "##%s=", hrec->key,hrec->vals[i]) < 0 ) - { - free(str.s); - return -1; - } - khint_t k = kh_get(hdict, aux->gen, str.s); - if ( k != kh_end(aux->gen) ) - { - // duplicate record - bcf_hrec_destroy(hrec); - free(str.s); - return 0; - } - } - - // New record, needs to be added - int n = hdr->nhrec + 1; - bcf_hrec_t **new_hrec = realloc(hdr->hrec, n*sizeof(bcf_hrec_t*)); - if (!new_hrec) { - free(str.s); - bcf_hdr_unregister_hrec(hdr, hrec); - return -1; - } - hdr->hrec = new_hrec; - - if ( str.s ) - { - khint_t k = kh_put(hdict, aux->gen, str.s, &res); - if ( res<0 ) - { - free(str.s); - return -1; - } - kh_val(aux->gen,k) = hrec; - } - - hdr->hrec[hdr->nhrec] = hrec; - hdr->dirty = 1; - hdr->nhrec = n; - - return hrec->type==BCF_HL_GEN ? 0 : 1; -} - -bcf_hrec_t *bcf_hdr_get_hrec(const bcf_hdr_t *hdr, int type, const char *key, const char *value, const char *str_class) -{ - int i; - if ( type==BCF_HL_GEN ) - { - // e.g. ##fileformat=VCFv4.2 - // ##source=GenomicsDBImport - // ##bcftools_viewVersion=1.16-80-gdfdb0923+htslib-1.16-34-g215d364 - if ( value ) - { - kstring_t str = {0,0,0}; - ksprintf(&str, "##%s=%s", key,value); - bcf_hdr_aux_t *aux = get_hdr_aux(hdr); - khint_t k = kh_get(hdict, aux->gen, str.s); - free(str.s); - if ( k == kh_end(aux->gen) ) return NULL; - return kh_val(aux->gen, k); - } - for (i=0; inhrec; i++) - { - if ( hdr->hrec[i]->type!=type ) continue; - if ( strcmp(hdr->hrec[i]->key,key) ) continue; - return hdr->hrec[i]; - } - return NULL; - } - else if ( type==BCF_HL_STR ) - { - // e.g. ##GATKCommandLine= - // ##ALT= - if (!str_class) return NULL; - if ( !strcmp("ID",key) ) - { - kstring_t str = {0,0,0}; - ksprintf(&str, "##%s=<%s=%s>",str_class,key,value); - bcf_hdr_aux_t *aux = get_hdr_aux(hdr); - khint_t k = kh_get(hdict, aux->gen, str.s); - free(str.s); - if ( k == kh_end(aux->gen) ) return NULL; - return kh_val(aux->gen, k); - } - for (i=0; inhrec; i++) - { - if ( hdr->hrec[i]->type!=type ) continue; - if ( strcmp(hdr->hrec[i]->key,str_class) ) continue; - int j = bcf_hrec_find_key(hdr->hrec[i],key); - if ( j>=0 && !strcmp(hdr->hrec[i]->vals[j],value) ) return hdr->hrec[i]; - } - return NULL; - } - vdict_t *d = type==BCF_HL_CTG ? (vdict_t*)hdr->dict[BCF_DT_CTG] : (vdict_t*)hdr->dict[BCF_DT_ID]; - khint_t k = kh_get(vdict, d, value); - if ( k == kh_end(d) ) return NULL; - return kh_val(d, k).hrec[type==BCF_HL_CTG?0:type]; -} - -void bcf_hdr_check_sanity(bcf_hdr_t *hdr) -{ - static int PL_warned = 0, GL_warned = 0; - - if ( !PL_warned ) - { - int id = bcf_hdr_id2int(hdr, BCF_DT_ID, "PL"); - if ( bcf_hdr_idinfo_exists(hdr,BCF_HL_FMT,id) && bcf_hdr_id2length(hdr,BCF_HL_FMT,id)!=BCF_VL_G ) - { - hts_log_warning("PL should be declared as Number=G"); - PL_warned = 1; - } - } - if ( !GL_warned ) - { - int id = bcf_hdr_id2int(hdr, BCF_DT_ID, "GL"); - if ( bcf_hdr_idinfo_exists(hdr,BCF_HL_FMT,id) && bcf_hdr_id2length(hdr,BCF_HL_FMT,id)!=BCF_VL_G ) - { - hts_log_warning("GL should be declared as Number=G"); - GL_warned = 1; - } - } -} - -int bcf_hdr_parse(bcf_hdr_t *hdr, char *htxt) -{ - int len, done = 0; - char *p = htxt; - - // Check sanity: "fileformat" string must come as first - bcf_hrec_t *hrec = bcf_hdr_parse_line(hdr,p,&len); - if ( !hrec || !hrec->key || strcasecmp(hrec->key,"fileformat") ) - hts_log_warning("The first line should be ##fileformat; is the VCF/BCF header broken?"); - if (bcf_hdr_add_hrec(hdr, hrec) < 0) { - bcf_hrec_destroy(hrec); - return -1; - } - - // The filter PASS must appear first in the dictionary - hrec = bcf_hdr_parse_line(hdr,"##FILTER=",&len); - if (!hrec || bcf_hdr_add_hrec(hdr, hrec) < 0) { - bcf_hrec_destroy(hrec); - return -1; - } - - // Parse the whole header - do { - while (NULL != (hrec = bcf_hdr_parse_line(hdr, p, &len))) { - if (bcf_hdr_add_hrec(hdr, hrec) < 0) { - bcf_hrec_destroy(hrec); - return -1; - } - p += len; - } - assert(hrec == NULL); - if (len < 0) { - // len < 0 indicates out-of-memory, or similar error - hts_log_error("Could not parse header line: %s", strerror(errno)); - return -1; - } else if (len > 0) { - // Bad header line. bcf_hdr_parse_line() will have logged it. - // Skip and try again on the next line (p + len will be the start - // of the next one). - p += len; - continue; - } - - // Next should be the sample line. If not, it was a malformed - // header, in which case print a warning and skip (many VCF - // operations do not really care about a few malformed lines). - // In the future we may want to add a strict mode that errors in - // this case. - if ( strncmp("#CHROM\t",p,7) && strncmp("#CHROM ",p,7) ) { - char *eol = strchr(p, '\n'); - if (*p != '\0') { - char buffer[320]; - hts_log_warning("Could not parse header line: %s", - hts_strprint(buffer, sizeof(buffer), - '"', p, - eol ? (eol - p) : SIZE_MAX)); - } - if (eol) { - p = eol + 1; // Try from the next line. - } else { - done = -1; // No more lines left, give up. - } - } else { - done = 1; // Sample line found - } - } while (!done); - - if (done < 0) { - // No sample line is fatal. - hts_log_error("Could not parse the header, sample line not found"); - return -1; - } - - if (bcf_hdr_parse_sample_line(hdr,p) < 0) - return -1; - if (bcf_hdr_sync(hdr) < 0) - return -1; - bcf_hdr_check_sanity(hdr); - return 0; -} - -int bcf_hdr_append(bcf_hdr_t *hdr, const char *line) -{ - int len; - bcf_hrec_t *hrec = bcf_hdr_parse_line(hdr, (char*) line, &len); - if ( !hrec ) return -1; - if (bcf_hdr_add_hrec(hdr, hrec) < 0) - return -1; - return 0; -} - -void bcf_hdr_remove(bcf_hdr_t *hdr, int type, const char *key) -{ - int i = 0; - bcf_hrec_t *hrec; - if ( !key ) - { - // no key, remove all entries of this type - while ( inhrec ) - { - if ( hdr->hrec[i]->type!=type ) { i++; continue; } - hrec = hdr->hrec[i]; - bcf_hdr_unregister_hrec(hdr, hrec); - bcf_hdr_remove_from_hdict(hdr, hrec); - hdr->dirty = 1; - hdr->nhrec--; - if ( i < hdr->nhrec ) - memmove(&hdr->hrec[i],&hdr->hrec[i+1],(hdr->nhrec-i)*sizeof(bcf_hrec_t*)); - bcf_hrec_destroy(hrec); - } - return; - } - while (1) - { - if ( type==BCF_HL_FLT || type==BCF_HL_INFO || type==BCF_HL_FMT || type== BCF_HL_CTG ) - { - hrec = bcf_hdr_get_hrec(hdr, type, "ID", key, NULL); - if ( !hrec ) return; - - for (i=0; inhrec; i++) - if ( hdr->hrec[i]==hrec ) break; - assert( inhrec ); - - vdict_t *d = type==BCF_HL_CTG ? (vdict_t*)hdr->dict[BCF_DT_CTG] : (vdict_t*)hdr->dict[BCF_DT_ID]; - khint_t k = kh_get(vdict, d, key); - kh_val(d, k).hrec[type==BCF_HL_CTG?0:type] = NULL; - } - else - { - for (i=0; inhrec; i++) - { - if ( hdr->hrec[i]->type!=type ) continue; - if ( type==BCF_HL_GEN ) - { - if ( !strcmp(hdr->hrec[i]->key,key) ) break; - } - else - { - // not all structured lines have ID, we could be more sophisticated as in bcf_hdr_get_hrec() - int j = bcf_hrec_find_key(hdr->hrec[i], "ID"); - if ( j>=0 && !strcmp(hdr->hrec[i]->vals[j],key) ) break; - } - } - if ( i==hdr->nhrec ) return; - hrec = hdr->hrec[i]; - bcf_hdr_remove_from_hdict(hdr, hrec); - } - - hdr->nhrec--; - if ( i < hdr->nhrec ) - memmove(&hdr->hrec[i],&hdr->hrec[i+1],(hdr->nhrec-i)*sizeof(bcf_hrec_t*)); - bcf_hrec_destroy(hrec); - hdr->dirty = 1; - } -} - -int bcf_hdr_printf(bcf_hdr_t *hdr, const char *fmt, ...) -{ - char tmp[256], *line = tmp; - va_list ap; - va_start(ap, fmt); - int n = vsnprintf(line, sizeof(tmp), fmt, ap); - va_end(ap); - - if (n >= sizeof(tmp)) { - n++; // For trailing NUL - line = (char*)malloc(n); - if (!line) - return -1; - - va_start(ap, fmt); - vsnprintf(line, n, fmt, ap); - va_end(ap); - } - - int ret = bcf_hdr_append(hdr, line); - - if (line != tmp) free(line); - return ret; -} - - -/********************** - *** BCF header I/O *** - **********************/ - -const char *bcf_hdr_get_version(const bcf_hdr_t *hdr) -{ - bcf_hrec_t *hrec = bcf_hdr_get_hrec(hdr, BCF_HL_GEN, "fileformat", NULL, NULL); - if ( !hrec ) - { - hts_log_warning("No version string found, assuming VCFv4.2"); - return "VCFv4.2"; - } - return hrec->value; -} - -int bcf_hdr_set_version(bcf_hdr_t *hdr, const char *version) -{ - bcf_hrec_t *hrec = bcf_hdr_get_hrec(hdr, BCF_HL_GEN, "fileformat", NULL, NULL); - if ( !hrec ) - { - int len; - kstring_t str = {0,0,0}; - if ( ksprintf(&str,"##fileformat=%s", version) < 0 ) return -1; - hrec = bcf_hdr_parse_line(hdr, str.s, &len); - free(str.s); - } - else - { - bcf_hrec_t *tmp = bcf_hrec_dup(hrec); - if ( !tmp ) return -1; - free(tmp->value); - tmp->value = strdup(version); - if ( !tmp->value ) return -1; - bcf_hdr_update_hrec(hdr, hrec, tmp); - bcf_hrec_destroy(tmp); - } - hdr->dirty = 1; - return 0; // FIXME: check for errs in this function (return < 0 if so) -} - -bcf_hdr_t *bcf_hdr_init(const char *mode) -{ - int i; - bcf_hdr_t *h; - h = (bcf_hdr_t*)calloc(1, sizeof(bcf_hdr_t)); - if (!h) return NULL; - for (i = 0; i < 3; ++i) - if ((h->dict[i] = kh_init(vdict)) == NULL) goto fail; - - bcf_hdr_aux_t *aux = (bcf_hdr_aux_t*)calloc(1,sizeof(bcf_hdr_aux_t)); - if ( !aux ) goto fail; - if ( (aux->gen = kh_init(hdict))==NULL ) { free(aux); goto fail; } - aux->dict = *((vdict_t*)h->dict[0]); - free(h->dict[0]); - h->dict[0] = aux; - - if ( strchr(mode,'w') ) - { - bcf_hdr_append(h, "##fileformat=VCFv4.2"); - // The filter PASS must appear first in the dictionary - bcf_hdr_append(h, "##FILTER="); - } - return h; - - fail: - for (i = 0; i < 3; ++i) - kh_destroy(vdict, h->dict[i]); - free(h); - return NULL; -} - -void bcf_hdr_destroy(bcf_hdr_t *h) -{ - int i; - khint_t k; - if (!h) return; - for (i = 0; i < 3; ++i) { - vdict_t *d = (vdict_t*)h->dict[i]; - if (d == 0) continue; - for (k = kh_begin(d); k != kh_end(d); ++k) - if (kh_exist(d, k)) free((char*)kh_key(d, k)); - if ( i==0 ) - { - bcf_hdr_aux_t *aux = get_hdr_aux(h); - for (k=kh_begin(aux->gen); kgen); k++) - if ( kh_exist(aux->gen,k) ) free((char*)kh_key(aux->gen,k)); - kh_destroy(hdict, aux->gen); - } - kh_destroy(vdict, d); - free(h->id[i]); - } - for (i=0; inhrec; i++) - bcf_hrec_destroy(h->hrec[i]); - if (h->nhrec) free(h->hrec); - if (h->samples) free(h->samples); - free(h->keep_samples); - free(h->transl[0]); free(h->transl[1]); - free(h->mem.s); - free(h); -} - -bcf_hdr_t *bcf_hdr_read(htsFile *hfp) -{ - if (hfp->format.format == vcf) - return vcf_hdr_read(hfp); - if (hfp->format.format != bcf) { - hts_log_error("Input is not detected as bcf or vcf format"); - return NULL; - } - - assert(hfp->is_bgzf); - - BGZF *fp = hfp->fp.bgzf; - uint8_t magic[5]; - bcf_hdr_t *h; - h = bcf_hdr_init("r"); - if (!h) { - hts_log_error("Failed to allocate bcf header"); - return NULL; - } - if (bgzf_read(fp, magic, 5) != 5) - { - hts_log_error("Failed to read the header (reading BCF in text mode?)"); - bcf_hdr_destroy(h); - return NULL; - } - if (strncmp((char*)magic, "BCF\2\2", 5) != 0) - { - if (!strncmp((char*)magic, "BCF", 3)) - hts_log_error("Invalid BCF2 magic string: only BCFv2.2 is supported"); - else - hts_log_error("Invalid BCF2 magic string"); - bcf_hdr_destroy(h); - return NULL; - } - uint8_t buf[4]; - size_t hlen; - char *htxt = NULL; - if (bgzf_read(fp, buf, 4) != 4) goto fail; - hlen = buf[0] | (buf[1] << 8) | (buf[2] << 16) | ((size_t) buf[3] << 24); - if (hlen >= SIZE_MAX) { errno = ENOMEM; goto fail; } - htxt = (char*)malloc(hlen + 1); - if (!htxt) goto fail; - if (bgzf_read(fp, htxt, hlen) != hlen) goto fail; - htxt[hlen] = '\0'; // Ensure htxt is terminated - if ( bcf_hdr_parse(h, htxt) < 0 ) goto fail; - free(htxt); - return h; - fail: - hts_log_error("Failed to read BCF header"); - free(htxt); - bcf_hdr_destroy(h); - return NULL; -} - -int bcf_hdr_write(htsFile *hfp, bcf_hdr_t *h) -{ - if (!h) { - errno = EINVAL; - return -1; - } - if ( h->dirty ) { - if (bcf_hdr_sync(h) < 0) return -1; - } - hfp->format.category = variant_data; - if (hfp->format.format == vcf || hfp->format.format == text_format) { - hfp->format.format = vcf; - return vcf_hdr_write(hfp, h); - } - - if (hfp->format.format == binary_format) - hfp->format.format = bcf; - - kstring_t htxt = {0,0,0}; - if (bcf_hdr_format(h, 1, &htxt) < 0) { - free(htxt.s); - return -1; - } - kputc('\0', &htxt); // include the \0 byte - - BGZF *fp = hfp->fp.bgzf; - if ( bgzf_write(fp, "BCF\2\2", 5) !=5 ) return -1; - uint8_t hlen[4]; - u32_to_le(htxt.l, hlen); - if ( bgzf_write(fp, hlen, 4) !=4 ) return -1; - if ( bgzf_write(fp, htxt.s, htxt.l) != htxt.l ) return -1; - - free(htxt.s); - return 0; -} - -/******************** - *** BCF site I/O *** - ********************/ - -bcf1_t *bcf_init(void) -{ - bcf1_t *v; - v = (bcf1_t*)calloc(1, sizeof(bcf1_t)); - return v; -} - -void bcf_clear(bcf1_t *v) -{ - int i; - for (i=0; id.m_info; i++) - { - if ( v->d.info[i].vptr_free ) - { - free(v->d.info[i].vptr - v->d.info[i].vptr_off); - v->d.info[i].vptr_free = 0; - } - } - for (i=0; id.m_fmt; i++) - { - if ( v->d.fmt[i].p_free ) - { - free(v->d.fmt[i].p - v->d.fmt[i].p_off); - v->d.fmt[i].p_free = 0; - } - } - v->rid = v->pos = v->rlen = v->unpacked = 0; - bcf_float_set_missing(v->qual); - v->n_info = v->n_allele = v->n_fmt = v->n_sample = 0; - v->shared.l = v->indiv.l = 0; - v->d.var_type = -1; - v->d.shared_dirty = 0; - v->d.indiv_dirty = 0; - v->d.n_flt = 0; - v->errcode = 0; - if (v->d.m_als) v->d.als[0] = 0; - if (v->d.m_id) v->d.id[0] = 0; -} - -void bcf_empty(bcf1_t *v) -{ - bcf_clear1(v); - free(v->d.id); - free(v->d.als); - free(v->d.allele); free(v->d.flt); free(v->d.info); free(v->d.fmt); - if (v->d.var ) free(v->d.var); - free(v->shared.s); free(v->indiv.s); - memset(&v->d,0,sizeof(v->d)); - memset(&v->shared,0,sizeof(v->shared)); - memset(&v->indiv,0,sizeof(v->indiv)); -} - -void bcf_destroy(bcf1_t *v) -{ - if (!v) return; - bcf_empty1(v); - free(v); -} - -static inline int bcf_read1_core(BGZF *fp, bcf1_t *v) -{ - uint8_t x[32]; - ssize_t ret; - uint32_t shared_len, indiv_len; - if ((ret = bgzf_read(fp, x, 32)) != 32) { - if (ret == 0) return -1; - return -2; - } - bcf_clear1(v); - shared_len = le_to_u32(x); - if (shared_len < 24) return -2; - shared_len -= 24; // to exclude six 32-bit integers - if (ks_resize(&v->shared, shared_len ? shared_len : 1) != 0) return -2; - indiv_len = le_to_u32(x + 4); - if (ks_resize(&v->indiv, indiv_len ? indiv_len : 1) != 0) return -2; - v->rid = le_to_i32(x + 8); - v->pos = le_to_u32(x + 12); - if ( v->pos==UINT32_MAX ) v->pos = -1; // this is for telomere coordinate, e.g. MT:0 - v->rlen = le_to_i32(x + 16); - v->qual = le_to_float(x + 20); - v->n_info = le_to_u16(x + 24); - v->n_allele = le_to_u16(x + 26); - v->n_sample = le_to_u32(x + 28) & 0xffffff; - v->n_fmt = x[31]; - v->shared.l = shared_len; - v->indiv.l = indiv_len; - // silent fix of broken BCFs produced by earlier versions of bcf_subset, prior to and including bd6ed8b4 - if ( (!v->indiv.l || !v->n_sample) && v->n_fmt ) v->n_fmt = 0; - - if (bgzf_read(fp, v->shared.s, v->shared.l) != v->shared.l) return -2; - if (bgzf_read(fp, v->indiv.s, v->indiv.l) != v->indiv.l) return -2; - return 0; -} - -#define bit_array_size(n) ((n)/8+1) -#define bit_array_set(a,i) ((a)[(i)/8] |= 1 << ((i)%8)) -#define bit_array_clear(a,i) ((a)[(i)/8] &= ~(1 << ((i)%8))) -#define bit_array_test(a,i) ((a)[(i)/8] & (1 << ((i)%8))) - -static int bcf_dec_typed_int1_safe(uint8_t *p, uint8_t *end, uint8_t **q, - int32_t *val) { - uint32_t t; - if (end - p < 2) return -1; - t = *p++ & 0xf; - /* Use if .. else if ... else instead of switch to force order. Assumption - is that small integers are more frequent than big ones. */ - if (t == BCF_BT_INT8) { - *val = *(int8_t *) p++; - } else { - if (end - p < (1<= end) return -1; - *type = *p & 0xf; - if (*p>>4 != 15) { - *q = p + 1; - *num = *p >> 4; - return 0; - } - r = bcf_dec_typed_int1_safe(p + 1, end, q, num); - if (r) return r; - return *num >= 0 ? 0 : -1; -} - -static const char *get_type_name(int type) { - const char *types[9] = { - "null", "int (8-bit)", "int (16 bit)", "int (32 bit)", - "unknown", "float", "unknown", "char", "unknown" - }; - int t = (type >= 0 && type < 8) ? type : 8; - return types[t]; -} - -static void bcf_record_check_err(const bcf_hdr_t *hdr, bcf1_t *rec, - char *type, uint32_t *reports, int i) { - if (*reports == 0 || hts_verbose >= HTS_LOG_DEBUG) - hts_log_warning("Bad BCF record at %s:%"PRIhts_pos - ": Invalid FORMAT %s %d", - bcf_seqname_safe(hdr,rec), rec->pos+1, type, i); - (*reports)++; -} - -static int bcf_record_check(const bcf_hdr_t *hdr, bcf1_t *rec) { - uint8_t *ptr, *end; - size_t bytes; - uint32_t err = 0; - int type = 0; - int num = 0; - int reflen = 0; - uint32_t i, reports; - const uint32_t is_integer = ((1 << BCF_BT_INT8) | - (1 << BCF_BT_INT16) | -#ifdef VCF_ALLOW_INT64 - (1 << BCF_BT_INT64) | -#endif - (1 << BCF_BT_INT32)); - const uint32_t is_valid_type = (is_integer | - (1 << BCF_BT_NULL) | - (1 << BCF_BT_FLOAT) | - (1 << BCF_BT_CHAR)); - int32_t max_id = hdr ? hdr->n[BCF_DT_ID] : 0; - - // Check for valid contig ID - if (rec->rid < 0 - || (hdr && (rec->rid >= hdr->n[BCF_DT_CTG] - || hdr->id[BCF_DT_CTG][rec->rid].key == NULL))) { - hts_log_warning("Bad BCF record at %"PRIhts_pos": Invalid %s id %d", rec->pos+1, "CONTIG", rec->rid); - err |= BCF_ERR_CTG_INVALID; - } - - // Check ID - ptr = (uint8_t *) rec->shared.s; - end = ptr + rec->shared.l; - if (bcf_dec_size_safe(ptr, end, &ptr, &num, &type) != 0) goto bad_shared; - if (type != BCF_BT_CHAR) { - hts_log_warning("Bad BCF record at %s:%"PRIhts_pos": Invalid %s type %d (%s)", bcf_seqname_safe(hdr,rec), rec->pos+1, "ID", type, get_type_name(type)); - err |= BCF_ERR_TAG_INVALID; - } - bytes = (size_t) num << bcf_type_shift[type]; - if (end - ptr < bytes) goto bad_shared; - ptr += bytes; - - // Check REF and ALT - if (rec->n_allele < 1) { - hts_log_warning("Bad BCF record at %s:%"PRIhts_pos": No REF allele", - bcf_seqname_safe(hdr,rec), rec->pos+1); - err |= BCF_ERR_TAG_UNDEF; - } - - reports = 0; - for (i = 0; i < rec->n_allele; i++) { - if (bcf_dec_size_safe(ptr, end, &ptr, &num, &type) != 0) goto bad_shared; - if (type != BCF_BT_CHAR) { - if (!reports++ || hts_verbose >= HTS_LOG_DEBUG) - hts_log_warning("Bad BCF record at %s:%"PRIhts_pos": Invalid %s type %d (%s)", bcf_seqname_safe(hdr,rec), rec->pos+1, "REF/ALT", type, get_type_name(type)); - err |= BCF_ERR_CHAR; - } - if (i == 0) reflen = num; - bytes = (size_t) num << bcf_type_shift[type]; - if (end - ptr < bytes) goto bad_shared; - ptr += bytes; - } - - // Check FILTER - reports = 0; - if (bcf_dec_size_safe(ptr, end, &ptr, &num, &type) != 0) goto bad_shared; - if (num > 0) { - bytes = (size_t) num << bcf_type_shift[type]; - if (((1 << type) & is_integer) == 0) { - hts_log_warning("Bad BCF record at %s:%"PRIhts_pos": Invalid %s type %d (%s)", bcf_seqname_safe(hdr,rec), rec->pos+1, "FILTER", type, get_type_name(type)); - err |= BCF_ERR_TAG_INVALID; - if (end - ptr < bytes) goto bad_shared; - ptr += bytes; - } else { - if (end - ptr < bytes) goto bad_shared; - for (i = 0; i < num; i++) { - int32_t key = bcf_dec_int1(ptr, type, &ptr); - if (key < 0 - || (hdr && (key >= max_id - || hdr->id[BCF_DT_ID][key].key == NULL))) { - if (!reports++ || hts_verbose >= HTS_LOG_DEBUG) - hts_log_warning("Bad BCF record at %s:%"PRIhts_pos": Invalid %s id %d", bcf_seqname_safe(hdr,rec), rec->pos+1, "FILTER", key); - err |= BCF_ERR_TAG_UNDEF; - } - } - } - } - - // Check INFO - reports = 0; - bcf_idpair_t *id_tmp = hdr ? hdr->id[BCF_DT_ID] : NULL; - for (i = 0; i < rec->n_info; i++) { - int32_t key = -1; - if (bcf_dec_typed_int1_safe(ptr, end, &ptr, &key) != 0) goto bad_shared; - if (key < 0 || (hdr && (key >= max_id - || id_tmp[key].key == NULL))) { - if (!reports++ || hts_verbose >= HTS_LOG_DEBUG) - hts_log_warning("Bad BCF record at %s:%"PRIhts_pos": Invalid %s id %d", bcf_seqname_safe(hdr,rec), rec->pos+1, "INFO", key); - err |= BCF_ERR_TAG_UNDEF; - } - if (bcf_dec_size_safe(ptr, end, &ptr, &num, &type) != 0) goto bad_shared; - if (((1 << type) & is_valid_type) == 0 - || (type == BCF_BT_NULL && num > 0)) { - if (!reports++ || hts_verbose >= HTS_LOG_DEBUG) - hts_log_warning("Bad BCF record at %s:%"PRIhts_pos": Invalid %s type %d (%s)", bcf_seqname_safe(hdr,rec), rec->pos+1, "INFO", type, get_type_name(type)); - err |= BCF_ERR_TAG_INVALID; - } - bytes = (size_t) num << bcf_type_shift[type]; - if (end - ptr < bytes) goto bad_shared; - ptr += bytes; - } - - // Check FORMAT and individual information - ptr = (uint8_t *) rec->indiv.s; - end = ptr + rec->indiv.l; - reports = 0; - for (i = 0; i < rec->n_fmt; i++) { - int32_t key = -1; - if (bcf_dec_typed_int1_safe(ptr, end, &ptr, &key) != 0) goto bad_indiv; - if (key < 0 - || (hdr && (key >= max_id - || id_tmp[key].key == NULL))) { - bcf_record_check_err(hdr, rec, "id", &reports, key); - err |= BCF_ERR_TAG_UNDEF; - } - if (bcf_dec_size_safe(ptr, end, &ptr, &num, &type) != 0) goto bad_indiv; - if (((1 << type) & is_valid_type) == 0 - || (type == BCF_BT_NULL && num > 0)) { - bcf_record_check_err(hdr, rec, "type", &reports, type); - err |= BCF_ERR_TAG_INVALID; - } - bytes = ((size_t) num << bcf_type_shift[type]) * rec->n_sample; - if (end - ptr < bytes) goto bad_indiv; - ptr += bytes; - } - - if (!err && rec->rlen < 0) { - // Treat bad rlen as a warning instead of an error, and try to - // fix up by using the length of the stored REF allele. - static int warned = 0; - if (!warned) { - hts_log_warning("BCF record at %s:%"PRIhts_pos" has invalid RLEN (%"PRIhts_pos"). " - "Only one invalid RLEN will be reported.", - bcf_seqname_safe(hdr,rec), rec->pos+1, rec->rlen); - warned = 1; - } - rec->rlen = reflen >= 0 ? reflen : 0; - } - - rec->errcode |= err; - - return err ? -2 : 0; // Return -2 so bcf_read() reports an error - - bad_shared: - hts_log_error("Bad BCF record at %s:%"PRIhts_pos" - shared section malformed or too short", bcf_seqname_safe(hdr,rec), rec->pos+1); - return -2; - - bad_indiv: - hts_log_error("Bad BCF record at %s:%"PRIhts_pos" - individuals section malformed or too short", bcf_seqname_safe(hdr,rec), rec->pos+1); - return -2; -} - -static inline uint8_t *bcf_unpack_fmt_core1(uint8_t *ptr, int n_sample, bcf_fmt_t *fmt); -int bcf_subset_format(const bcf_hdr_t *hdr, bcf1_t *rec) -{ - if ( !hdr->keep_samples ) return 0; - if ( !bcf_hdr_nsamples(hdr) ) - { - rec->indiv.l = rec->n_sample = 0; - return 0; - } - - int i, j; - uint8_t *ptr = (uint8_t*)rec->indiv.s, *dst = NULL, *src; - bcf_dec_t *dec = &rec->d; - hts_expand(bcf_fmt_t, rec->n_fmt, dec->m_fmt, dec->fmt); - for (i=0; im_fmt; ++i) dec->fmt[i].p_free = 0; - - for (i=0; in_fmt; i++) - { - ptr = bcf_unpack_fmt_core1(ptr, rec->n_sample, &dec->fmt[i]); - src = dec->fmt[i].p - dec->fmt[i].size; - if ( dst ) - { - memmove(dec->fmt[i-1].p + dec->fmt[i-1].p_len, dec->fmt[i].p - dec->fmt[i].p_off, dec->fmt[i].p_off); - dec->fmt[i].p = dec->fmt[i-1].p + dec->fmt[i-1].p_len + dec->fmt[i].p_off; - } - dst = dec->fmt[i].p; - for (j=0; jnsamples_ori; j++) - { - src += dec->fmt[i].size; - if ( !bit_array_test(hdr->keep_samples,j) ) continue; - memmove(dst, src, dec->fmt[i].size); - dst += dec->fmt[i].size; - } - rec->indiv.l -= dec->fmt[i].p_len - (dst - dec->fmt[i].p); - dec->fmt[i].p_len = dst - dec->fmt[i].p; - } - rec->unpacked |= BCF_UN_FMT; - - rec->n_sample = bcf_hdr_nsamples(hdr); - return 0; -} - -int bcf_read(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v) -{ - if (fp->format.format == vcf) return vcf_read(fp,h,v); - int ret = bcf_read1_core(fp->fp.bgzf, v); - if (ret == 0) ret = bcf_record_check(h, v); - if ( ret!=0 || !h->keep_samples ) return ret; - return bcf_subset_format(h,v); -} - -int bcf_readrec(BGZF *fp, void *null, void *vv, int *tid, hts_pos_t *beg, hts_pos_t *end) -{ - bcf1_t *v = (bcf1_t *) vv; - int ret = bcf_read1_core(fp, v); - if (ret == 0) ret = bcf_record_check(NULL, v); - if (ret >= 0) - *tid = v->rid, *beg = v->pos, *end = v->pos + v->rlen; - return ret; -} - -static inline int bcf1_sync_id(bcf1_t *line, kstring_t *str) -{ - // single typed string - if ( line->d.id && strcmp(line->d.id, ".") ) { - return bcf_enc_vchar(str, strlen(line->d.id), line->d.id); - } else { - return bcf_enc_size(str, 0, BCF_BT_CHAR); - } -} -static inline int bcf1_sync_alleles(bcf1_t *line, kstring_t *str) -{ - // list of typed strings - int i; - for (i=0; in_allele; i++) { - if (bcf_enc_vchar(str, strlen(line->d.allele[i]), line->d.allele[i]) < 0) - return -1; - } - if ( !line->rlen && line->n_allele ) line->rlen = strlen(line->d.allele[0]); - return 0; -} -static inline int bcf1_sync_filter(bcf1_t *line, kstring_t *str) -{ - // typed vector of integers - if ( line->d.n_flt ) { - return bcf_enc_vint(str, line->d.n_flt, line->d.flt, -1); - } else { - return bcf_enc_vint(str, 0, 0, -1); - } -} - -static inline int bcf1_sync_info(bcf1_t *line, kstring_t *str) -{ - // pairs of typed vectors - int i, irm = -1, e = 0; - for (i=0; in_info; i++) - { - bcf_info_t *info = &line->d.info[i]; - if ( !info->vptr ) - { - // marked for removal - if ( irm < 0 ) irm = i; - continue; - } - e |= kputsn_(info->vptr - info->vptr_off, info->vptr_len + info->vptr_off, str) < 0; - if ( irm >=0 ) - { - bcf_info_t tmp = line->d.info[irm]; line->d.info[irm] = line->d.info[i]; line->d.info[i] = tmp; - while ( irm<=i && line->d.info[irm].vptr ) irm++; - } - } - if ( irm>=0 ) line->n_info = irm; - return e == 0 ? 0 : -1; -} - -static int bcf1_sync(bcf1_t *line) -{ - char *shared_ori = line->shared.s; - size_t prev_len; - - kstring_t tmp = {0,0,0}; - if ( !line->shared.l ) - { - // New line created via API, BCF data blocks do not exist. Get it ready for BCF output - tmp = line->shared; - bcf1_sync_id(line, &tmp); - line->unpack_size[0] = tmp.l; prev_len = tmp.l; - - bcf1_sync_alleles(line, &tmp); - line->unpack_size[1] = tmp.l - prev_len; prev_len = tmp.l; - - bcf1_sync_filter(line, &tmp); - line->unpack_size[2] = tmp.l - prev_len; - - bcf1_sync_info(line, &tmp); - line->shared = tmp; - } - else if ( line->d.shared_dirty ) - { - // The line was edited, update the BCF data block. - - if ( !(line->unpacked & BCF_UN_STR) ) bcf_unpack(line,BCF_UN_STR); - - // ptr_ori points to the original unchanged BCF data. - uint8_t *ptr_ori = (uint8_t *) line->shared.s; - - // ID: single typed string - if ( line->d.shared_dirty & BCF1_DIRTY_ID ) - bcf1_sync_id(line, &tmp); - else - kputsn_(ptr_ori, line->unpack_size[0], &tmp); - ptr_ori += line->unpack_size[0]; - line->unpack_size[0] = tmp.l; prev_len = tmp.l; - - // REF+ALT: list of typed strings - if ( line->d.shared_dirty & BCF1_DIRTY_ALS ) - bcf1_sync_alleles(line, &tmp); - else - { - kputsn_(ptr_ori, line->unpack_size[1], &tmp); - if ( !line->rlen && line->n_allele ) line->rlen = strlen(line->d.allele[0]); - } - ptr_ori += line->unpack_size[1]; - line->unpack_size[1] = tmp.l - prev_len; prev_len = tmp.l; - - if ( line->unpacked & BCF_UN_FLT ) - { - // FILTER: typed vector of integers - if ( line->d.shared_dirty & BCF1_DIRTY_FLT ) - bcf1_sync_filter(line, &tmp); - else if ( line->d.n_flt ) - kputsn_(ptr_ori, line->unpack_size[2], &tmp); - else - bcf_enc_vint(&tmp, 0, 0, -1); - ptr_ori += line->unpack_size[2]; - line->unpack_size[2] = tmp.l - prev_len; - - if ( line->unpacked & BCF_UN_INFO ) - { - // INFO: pairs of typed vectors - if ( line->d.shared_dirty & BCF1_DIRTY_INF ) - { - bcf1_sync_info(line, &tmp); - ptr_ori = (uint8_t*)line->shared.s + line->shared.l; - } - } - } - - int size = line->shared.l - (size_t)ptr_ori + (size_t)line->shared.s; - if ( size ) kputsn_(ptr_ori, size, &tmp); - - free(line->shared.s); - line->shared = tmp; - } - if ( line->shared.s != shared_ori && line->unpacked & BCF_UN_INFO ) - { - // Reallocated line->shared.s block invalidated line->d.info[].vptr pointers - size_t off_new = line->unpack_size[0] + line->unpack_size[1] + line->unpack_size[2]; - int i; - for (i=0; in_info; i++) - { - uint8_t *vptr_free = line->d.info[i].vptr_free ? line->d.info[i].vptr - line->d.info[i].vptr_off : NULL; - line->d.info[i].vptr = (uint8_t*) line->shared.s + off_new + line->d.info[i].vptr_off; - off_new += line->d.info[i].vptr_len + line->d.info[i].vptr_off; - if ( vptr_free ) - { - free(vptr_free); - line->d.info[i].vptr_free = 0; - } - } - } - - if ( line->n_sample && line->n_fmt && (!line->indiv.l || line->d.indiv_dirty) ) - { - // The genotype fields changed or are not present - tmp.l = tmp.m = 0; tmp.s = NULL; - int i, irm = -1; - for (i=0; in_fmt; i++) - { - bcf_fmt_t *fmt = &line->d.fmt[i]; - if ( !fmt->p ) - { - // marked for removal - if ( irm < 0 ) irm = i; - continue; - } - kputsn_(fmt->p - fmt->p_off, fmt->p_len + fmt->p_off, &tmp); - if ( irm >=0 ) - { - bcf_fmt_t tfmt = line->d.fmt[irm]; line->d.fmt[irm] = line->d.fmt[i]; line->d.fmt[i] = tfmt; - while ( irm<=i && line->d.fmt[irm].p ) irm++; - } - - } - if ( irm>=0 ) line->n_fmt = irm; - free(line->indiv.s); - line->indiv = tmp; - - // Reallocated line->indiv.s block invalidated line->d.fmt[].p pointers - size_t off_new = 0; - for (i=0; in_fmt; i++) - { - uint8_t *p_free = line->d.fmt[i].p_free ? line->d.fmt[i].p - line->d.fmt[i].p_off : NULL; - line->d.fmt[i].p = (uint8_t*) line->indiv.s + off_new + line->d.fmt[i].p_off; - off_new += line->d.fmt[i].p_len + line->d.fmt[i].p_off; - if ( p_free ) - { - free(p_free); - line->d.fmt[i].p_free = 0; - } - } - } - if ( !line->n_sample ) line->n_fmt = 0; - line->d.shared_dirty = line->d.indiv_dirty = 0; - return 0; -} - -bcf1_t *bcf_copy(bcf1_t *dst, bcf1_t *src) -{ - bcf1_sync(src); - - bcf_clear(dst); - dst->rid = src->rid; - dst->pos = src->pos; - dst->rlen = src->rlen; - dst->qual = src->qual; - dst->n_info = src->n_info; dst->n_allele = src->n_allele; - dst->n_fmt = src->n_fmt; dst->n_sample = src->n_sample; - - if ( dst->shared.m < src->shared.l ) - { - dst->shared.s = (char*) realloc(dst->shared.s, src->shared.l); - dst->shared.m = src->shared.l; - } - dst->shared.l = src->shared.l; - memcpy(dst->shared.s,src->shared.s,dst->shared.l); - - if ( dst->indiv.m < src->indiv.l ) - { - dst->indiv.s = (char*) realloc(dst->indiv.s, src->indiv.l); - dst->indiv.m = src->indiv.l; - } - dst->indiv.l = src->indiv.l; - memcpy(dst->indiv.s,src->indiv.s,dst->indiv.l); - - return dst; -} -bcf1_t *bcf_dup(bcf1_t *src) -{ - bcf1_t *out = bcf_init1(); - return bcf_copy(out, src); -} - -int bcf_write(htsFile *hfp, bcf_hdr_t *h, bcf1_t *v) -{ - if ( h->dirty ) { - if (bcf_hdr_sync(h) < 0) return -1; - } - if ( bcf_hdr_nsamples(h)!=v->n_sample ) - { - hts_log_error("Broken VCF record, the number of columns at %s:%"PRIhts_pos" does not match the number of samples (%d vs %d)", - bcf_seqname_safe(h,v), v->pos+1, v->n_sample, bcf_hdr_nsamples(h)); - return -1; - } - - if ( hfp->format.format == vcf || hfp->format.format == text_format ) - return vcf_write(hfp,h,v); - - if ( v->errcode ) - { - // vcf_parse1() encountered a new contig or tag, undeclared in the - // header. At this point, the header must have been printed, - // proceeding would lead to a broken BCF file. Errors must be checked - // and cleared by the caller before we can proceed. - char errdescription[1024] = ""; - hts_log_error("Unchecked error (%d %s) at %s:%"PRIhts_pos, v->errcode, bcf_strerror(v->errcode, errdescription, sizeof(errdescription)), bcf_seqname_safe(h,v), v->pos+1); - return -1; - } - bcf1_sync(v); // check if the BCF record was modified - - if ( v->unpacked & BCF_IS_64BIT ) - { - hts_log_error("Data at %s:%"PRIhts_pos" contains 64-bit values not representable in BCF. Please use VCF instead", bcf_seqname_safe(h,v), v->pos+1); - return -1; - } - - BGZF *fp = hfp->fp.bgzf; - uint8_t x[32]; - u32_to_le(v->shared.l + 24, x); // to include six 32-bit integers - u32_to_le(v->indiv.l, x + 4); - i32_to_le(v->rid, x + 8); - u32_to_le(v->pos, x + 12); - u32_to_le(v->rlen, x + 16); - float_to_le(v->qual, x + 20); - u16_to_le(v->n_info, x + 24); - u16_to_le(v->n_allele, x + 26); - u32_to_le((uint32_t)v->n_fmt<<24 | (v->n_sample & 0xffffff), x + 28); - if ( bgzf_write(fp, x, 32) != 32 ) return -1; - if ( bgzf_write(fp, v->shared.s, v->shared.l) != v->shared.l ) return -1; - if ( bgzf_write(fp, v->indiv.s, v->indiv.l) != v->indiv.l ) return -1; - - if (hfp->idx) { - if (hts_idx_push(hfp->idx, v->rid, v->pos, v->pos + v->rlen, bgzf_tell(fp), 1) < 0) - return -1; - } - - return 0; -} - -/********************** - *** VCF header I/O *** - **********************/ - -static int add_missing_contig_hrec(bcf_hdr_t *h, const char *name) { - bcf_hrec_t *hrec = calloc(1, sizeof(bcf_hrec_t)); - int save_errno; - if (!hrec) goto fail; - - hrec->key = strdup("contig"); - if (!hrec->key) goto fail; - - if (bcf_hrec_add_key(hrec, "ID", strlen("ID")) < 0) goto fail; - if (bcf_hrec_set_val(hrec, hrec->nkeys-1, name, strlen(name), 0) < 0) - goto fail; - if (bcf_hdr_add_hrec(h, hrec) < 0) - goto fail; - return 0; - - fail: - save_errno = errno; - hts_log_error("%s", strerror(errno)); - if (hrec) bcf_hrec_destroy(hrec); - errno = save_errno; - return -1; -} - -bcf_hdr_t *vcf_hdr_read(htsFile *fp) -{ - kstring_t txt, *s = &fp->line; - int ret; - bcf_hdr_t *h; - tbx_t *idx = NULL; - const char **names = NULL; - h = bcf_hdr_init("r"); - if (!h) { - hts_log_error("Failed to allocate bcf header"); - return NULL; - } - txt.l = txt.m = 0; txt.s = 0; - while ((ret = hts_getline(fp, KS_SEP_LINE, s)) >= 0) { - int e = 0; - if (s->l == 0) continue; - if (s->s[0] != '#') { - hts_log_error("No sample line"); - goto error; - } - if (s->s[1] != '#' && fp->fn_aux) { // insert contigs here - kstring_t tmp = { 0, 0, NULL }; - hFILE *f = hopen(fp->fn_aux, "r"); - if (f == NULL) { - hts_log_error("Couldn't open \"%s\"", fp->fn_aux); - goto error; - } - while (tmp.l = 0, kgetline(&tmp, (kgets_func *) hgets, f) >= 0) { - char *tab = strchr(tmp.s, '\t'); - if (tab == NULL) continue; - e |= (kputs("##contig=\n", 2, &txt) < 0); - } - free(tmp.s); - if (hclose(f) != 0) { - hts_log_error("Error on closing %s", fp->fn_aux); - goto error; - } - if (e) goto error; - } - if (kputsn(s->s, s->l, &txt) < 0) goto error; - if (kputc('\n', &txt) < 0) goto error; - if (s->s[1] != '#') break; - } - if ( ret < -1 ) goto error; - if ( !txt.s ) - { - hts_log_error("Could not read the header"); - goto error; - } - if ( bcf_hdr_parse(h, txt.s) < 0 ) goto error; - - // check tabix index, are all contigs listed in the header? add the missing ones - idx = tbx_index_load3(fp->fn, NULL, HTS_IDX_SILENT_FAIL); - if ( idx ) - { - int i, n, need_sync = 0; - names = tbx_seqnames(idx, &n); - if (!names) goto error; - for (i=0; ivalue ) - { - int j, nout = 0; - e |= ksprintf(str, "##%s=<", hrec->key) < 0; - for (j=0; jnkeys; j++) - { - // do not output IDX if output is VCF - if ( !is_bcf && !strcmp("IDX",hrec->keys[j]) ) continue; - if ( nout ) e |= kputc(',',str) < 0; - e |= ksprintf(str,"%s=%s", hrec->keys[j], hrec->vals[j]) < 0; - nout++; - } - e |= ksprintf(str,">\n") < 0; - } - else - e |= ksprintf(str,"##%s=%s\n", hrec->key,hrec->value) < 0; - - return e == 0 ? 0 : -1; -} - -int bcf_hrec_format(const bcf_hrec_t *hrec, kstring_t *str) -{ - return _bcf_hrec_format(hrec,0,str); -} - -int bcf_hdr_format(const bcf_hdr_t *hdr, int is_bcf, kstring_t *str) -{ - int i, r = 0; - for (i=0; inhrec; i++) - r |= _bcf_hrec_format(hdr->hrec[i], is_bcf, str) < 0; - - r |= ksprintf(str, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO") < 0; - if ( bcf_hdr_nsamples(hdr) ) - { - r |= ksprintf(str, "\tFORMAT") < 0; - for (i=0; isamples[i]) < 0; - } - r |= ksprintf(str, "\n") < 0; - - return r ? -1 : 0; -} - -char *bcf_hdr_fmt_text(const bcf_hdr_t *hdr, int is_bcf, int *len) -{ - kstring_t txt = {0,0,0}; - if (bcf_hdr_format(hdr, is_bcf, &txt) < 0) - return NULL; - if ( len ) *len = txt.l; - return txt.s; -} - -const char **bcf_hdr_seqnames(const bcf_hdr_t *h, int *n) -{ - vdict_t *d = (vdict_t*)h->dict[BCF_DT_CTG]; - int i, tid, m = kh_size(d); - const char **names = (const char**) calloc(m,sizeof(const char*)); - if ( !names ) - { - hts_log_error("Failed to allocate memory"); - *n = 0; - return NULL; - } - khint_t k; - for (k=kh_begin(d); k= m ) - { - // This can happen after a contig has been removed from BCF header via bcf_hdr_remove() - if ( hts_resize(const char*, tid + 1, &m, &names, HTS_RESIZE_CLEAR)<0 ) - { - hts_log_error("Failed to allocate memory"); - *n = 0; - free(names); - return NULL; - } - m = tid + 1; - } - names[tid] = kh_key(d,k); - } - // ensure there are no gaps - for (i=0,tid=0; tidformat.compression!=no_compression ) { - ret = bgzf_write(fp->fp.bgzf, htxt.s, htxt.l); - if (bgzf_flush(fp->fp.bgzf) != 0) return -1; - } else { - ret = hwrite(fp->fp.hfile, htxt.s, htxt.l); - } - free(htxt.s); - return ret<0 ? -1 : 0; -} - -/*********************** - *** Typed value I/O *** - ***********************/ - -int bcf_enc_vint(kstring_t *s, int n, int32_t *a, int wsize) -{ - int32_t max = INT32_MIN, min = INT32_MAX; - int i; - if (n <= 0) bcf_enc_size(s, 0, BCF_BT_NULL); - else if (n == 1) bcf_enc_int1(s, a[0]); - else { - if (wsize <= 0) wsize = n; - for (i = 0; i < n; ++i) { - if (a[i] == bcf_int32_missing || a[i] == bcf_int32_vector_end ) continue; - if (max < a[i]) max = a[i]; - if (min > a[i]) min = a[i]; - } - if (max <= BCF_MAX_BT_INT8 && min >= BCF_MIN_BT_INT8) { - bcf_enc_size(s, wsize, BCF_BT_INT8); - for (i = 0; i < n; ++i) - if ( a[i]==bcf_int32_vector_end ) kputc(bcf_int8_vector_end, s); - else if ( a[i]==bcf_int32_missing ) kputc(bcf_int8_missing, s); - else kputc(a[i], s); - } else if (max <= BCF_MAX_BT_INT16 && min >= BCF_MIN_BT_INT16) { - uint8_t *p; - bcf_enc_size(s, wsize, BCF_BT_INT16); - ks_resize(s, s->l + n * sizeof(int16_t)); - p = (uint8_t *) s->s + s->l; - for (i = 0; i < n; ++i) - { - int16_t x; - if ( a[i]==bcf_int32_vector_end ) x = bcf_int16_vector_end; - else if ( a[i]==bcf_int32_missing ) x = bcf_int16_missing; - else x = a[i]; - i16_to_le(x, p); - p += sizeof(int16_t); - } - s->l += n * sizeof(int16_t); - } else { - uint8_t *p; - bcf_enc_size(s, wsize, BCF_BT_INT32); - ks_resize(s, s->l + n * sizeof(int32_t)); - p = (uint8_t *) s->s + s->l; - for (i = 0; i < n; ++i) { - i32_to_le(a[i], p); - p += sizeof(int32_t); - } - s->l += n * sizeof(int32_t); - } - } - - return 0; // FIXME: check for errs in this function -} - -#ifdef VCF_ALLOW_INT64 -static int bcf_enc_long1(kstring_t *s, int64_t x) { - uint32_t e = 0; - if (x <= BCF_MAX_BT_INT32 && x >= BCF_MIN_BT_INT32) - return bcf_enc_int1(s, x); - if (x == bcf_int64_vector_end) { - e |= bcf_enc_size(s, 1, BCF_BT_INT8); - e |= kputc(bcf_int8_vector_end, s) < 0; - } else if (x == bcf_int64_missing) { - e |= bcf_enc_size(s, 1, BCF_BT_INT8); - e |= kputc(bcf_int8_missing, s) < 0; - } else { - e |= bcf_enc_size(s, 1, BCF_BT_INT64); - e |= ks_expand(s, 8); - if (e == 0) { u64_to_le(x, (uint8_t *) s->s + s->l); s->l += 8; } - } - return e == 0 ? 0 : -1; -} -#endif - -static inline int serialize_float_array(kstring_t *s, size_t n, const float *a) { - uint8_t *p; - size_t i; - size_t bytes = n * sizeof(float); - - if (bytes / sizeof(float) != n) return -1; - if (ks_resize(s, s->l + bytes) < 0) return -1; - - p = (uint8_t *) s->s + s->l; - for (i = 0; i < n; i++) { - float_to_le(a[i], p); - p += sizeof(float); - } - s->l += bytes; - - return 0; -} - -int bcf_enc_vfloat(kstring_t *s, int n, float *a) -{ - assert(n >= 0); - bcf_enc_size(s, n, BCF_BT_FLOAT); - serialize_float_array(s, n, a); - return 0; // FIXME: check for errs in this function -} - -int bcf_enc_vchar(kstring_t *s, int l, const char *a) -{ - bcf_enc_size(s, l, BCF_BT_CHAR); - kputsn(a, l, s); - return 0; // FIXME: check for errs in this function -} - -int bcf_fmt_array(kstring_t *s, int n, int type, void *data) -{ - int j = 0; - uint32_t e = 0; - if (n == 0) { - return kputc('.', s) >= 0 ? 0 : -1; - } - if (type == BCF_BT_CHAR) - { - char *p = (char*)data; - for (j = 0; j < n && *p; ++j, ++p) - { - if ( *p==bcf_str_missing ) e |= kputc('.', s) < 0; - else e |= kputc(*p, s) < 0; - } - } - else - { - #define BRANCH(type_t, convert, is_missing, is_vector_end, kprint) { \ - uint8_t *p = (uint8_t *) data; \ - for (j=0; jl&7) { - uint64_t zero = 0; - e = kputsn((char*)&zero, 8 - (s->l&7), s) < 0; - } - return e == 0 ? 0 : -1; -} - -// p,q is the start and the end of the FORMAT field -#define MAX_N_FMT 255 /* Limited by size of bcf1_t n_fmt field */ -static int vcf_parse_format(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v, char *p, char *q) -{ - if ( !bcf_hdr_nsamples(h) ) return 0; - - static int extreme_val_warned = 0; - char *r, *t; - int j, l, m, g, overflow = 0; - khint_t k; - ks_tokaux_t aux1; - vdict_t *d = (vdict_t*)h->dict[BCF_DT_ID]; - kstring_t *mem = (kstring_t*)&h->mem; - fmt_aux_t fmt[MAX_N_FMT]; - mem->l = 0; - - char *end = s->s + s->l; - if ( q>=end ) - { - hts_log_error("FORMAT column with no sample columns starting at %s:%"PRIhts_pos"", bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_NCOLS; - return -1; - } - - v->n_fmt = 0; - if ( p[0]=='.' && p[1]==0 ) // FORMAT field is empty "." - { - v->n_sample = bcf_hdr_nsamples(h); - return 0; - } - - // get format information from the dictionary - for (j = 0, t = kstrtok(p, ":", &aux1); t; t = kstrtok(0, 0, &aux1), ++j) { - if (j >= MAX_N_FMT) { - v->errcode |= BCF_ERR_LIMITS; - hts_log_error("FORMAT column at %s:%"PRIhts_pos" lists more identifiers than htslib can handle", - bcf_seqname_safe(h,v), v->pos+1); - return -1; - } - - *(char*)aux1.p = 0; - k = kh_get(vdict, d, t); - if (k == kh_end(d) || kh_val(d, k).info[BCF_HL_FMT] == 15) { - if ( t[0]=='.' && t[1]==0 ) - { - hts_log_error("Invalid FORMAT tag name '.' at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_TAG_INVALID; - return -1; - } - hts_log_warning("FORMAT '%s' at %s:%"PRIhts_pos" is not defined in the header, assuming Type=String", t, bcf_seqname_safe(h,v), v->pos+1); - kstring_t tmp = {0,0,0}; - int l; - ksprintf(&tmp, "##FORMAT=", t); - bcf_hrec_t *hrec = bcf_hdr_parse_line(h,tmp.s,&l); - free(tmp.s); - int res = hrec ? bcf_hdr_add_hrec((bcf_hdr_t*)h, hrec) : -1; - if (res < 0) bcf_hrec_destroy(hrec); - if (res > 0) res = bcf_hdr_sync((bcf_hdr_t*)h); - - k = kh_get(vdict, d, t); - v->errcode |= BCF_ERR_TAG_UNDEF; - if (res || k == kh_end(d)) { - hts_log_error("Could not add dummy header for FORMAT '%s' at %s:%"PRIhts_pos, t, bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_TAG_INVALID; - return -1; - } - } - fmt[j].max_l = fmt[j].max_m = fmt[j].max_g = 0; - fmt[j].key = kh_val(d, k).id; - fmt[j].is_gt = !strcmp(t, "GT"); - fmt[j].y = h->id[0][fmt[j].key].val->info[BCF_HL_FMT]; - v->n_fmt++; - } - // compute max - int n_sample_ori = -1; - r = q + 1; // r: position in the format string - l = 0, m = g = 1, v->n_sample = 0; // m: max vector size, l: max field len, g: max number of alleles - while ( rkeep_samples ) - { - n_sample_ori++; - if ( !bit_array_test(h->keep_samples,n_sample_ori) ) - { - while ( *r!='\t' && ris_gt) g++; - break; - - case '\t': - *r = 0; // fall through - - case '\0': - case ':': - if (f->max_m < m) f->max_m = m; - if (f->max_l < l) f->max_l = l; - if (f->is_gt && f->max_g < g) f->max_g = g; - l = 0, m = g = 1; - if ( *r==':' ) { - j++; f++; - if ( j>=v->n_fmt ) { - hts_log_error("Incorrect number of FORMAT fields at %s:%"PRIhts_pos"", - h->id[BCF_DT_CTG][v->rid].key, v->pos+1); - v->errcode |= BCF_ERR_NCOLS; - return -1; - } - } else goto end_for; - break; - } - if ( r>=end ) break; - r++; l++; - } - end_for: - v->n_sample++; - if ( v->n_sample == bcf_hdr_nsamples(h) ) break; - r++; - } - - // allocate memory for arrays - for (j = 0; j < v->n_fmt; ++j) { - fmt_aux_t *f = &fmt[j]; - if ( !f->max_m ) f->max_m = 1; // omitted trailing format field - if ((f->y>>4&0xf) == BCF_HT_STR) { - f->size = f->is_gt? f->max_g << 2 : f->max_l; - } else if ((f->y>>4&0xf) == BCF_HT_REAL || (f->y>>4&0xf) == BCF_HT_INT) { - f->size = f->max_m << 2; - } else - { - hts_log_error("The format type %d at %s:%"PRIhts_pos" is currently not supported", f->y>>4&0xf, bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_TAG_INVALID; - return -1; - } - if (align_mem(mem) < 0) { - hts_log_error("Memory allocation failure at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_LIMITS; - return -1; - } - - // Limit the total memory to ~2Gb per VCF row. This should mean - // malformed VCF data is less likely to take excessive memory and/or - // time. - if ((uint64_t) mem->l + v->n_sample * (uint64_t)f->size > INT_MAX) { - hts_log_error("Excessive memory required by FORMAT fields at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_LIMITS; - return -1; - } - - f->offset = mem->l; - if (ks_resize(mem, mem->l + v->n_sample * (size_t)f->size) < 0) { - hts_log_error("Memory allocation failure at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_LIMITS; - return -1; - } - mem->l += v->n_sample * f->size; - } - for (j = 0; j < v->n_fmt; ++j) - fmt[j].buf = (uint8_t*)mem->s + fmt[j].offset; - // fill the sample fields; at beginning of the loop, t points to the first char of a format - n_sample_ori = -1; - t = q + 1; m = 0; // m: sample id - while ( tkeep_samples ) - { - n_sample_ori++; - if ( !bit_array_test(h->keep_samples,n_sample_ori) ) - { - while ( *t && tbuf) { - hts_log_error("Memory allocation failure for FORMAT field type %d at %s:%"PRIhts_pos, - z->y>>4&0xf, bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_LIMITS; - return -1; - } - if ((z->y>>4&0xf) == BCF_HT_STR) { - if (z->is_gt) { // genotypes - int32_t is_phased = 0; - uint32_t *x = (uint32_t*)(z->buf + z->size * (size_t)m); - uint32_t unreadable = 0; - uint32_t max = 0; - overflow = 0; - for (l = 0;; ++t) { - if (*t == '.') { - ++t, x[l++] = is_phased; - } else { - char *tt = t; - uint32_t val = hts_str2uint(t, &t, sizeof(val) * CHAR_MAX - 2, &overflow); - unreadable |= tt == t; - if (max < val) max = val; - x[l++] = (val + 1) << 1 | is_phased; - } - is_phased = (*t == '|'); - if (*t != '|' && *t != '/') break; - } - // Possibly check max against v->n_allele instead? - if (overflow || max > (INT32_MAX >> 1) - 1) { - hts_log_error("Couldn't read GT data: value too large at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); - return -1; - } - if (unreadable) { - hts_log_error("Couldn't read GT data: value not a number or '.' at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); - return -1; - } - if ( !l ) x[l++] = 0; // An empty field, insert missing value - for (; l < z->size>>2; ++l) x[l] = bcf_int32_vector_end; - } else { - char *x = (char*)z->buf + z->size * (size_t)m; - for (r = t, l = 0; *t != ':' && *t; ++t) x[l++] = *t; - for (; l < z->size; ++l) x[l] = 0; - } - } else if ((z->y>>4&0xf) == BCF_HT_INT) { - int32_t *x = (int32_t*)(z->buf + z->size * (size_t)m); - for (l = 0;; ++t) { - if (*t == '.') { - x[l++] = bcf_int32_missing, ++t; // ++t to skip "." - } else { - overflow = 0; - char *te; - long int tmp_val = hts_str2int(t, &te, sizeof(tmp_val)*CHAR_BIT, &overflow); - if ( te==t || overflow || tmp_valBCF_MAX_BT_INT32 ) - { - if ( !extreme_val_warned ) - { - hts_log_warning("Extreme FORMAT/%s value encountered and set to missing at %s:%"PRIhts_pos, h->id[BCF_DT_ID][fmt[j-1].key].key, bcf_seqname_safe(h,v), v->pos+1); - extreme_val_warned = 1; - } - tmp_val = bcf_int32_missing; - } - x[l++] = tmp_val; - t = te; - } - if (*t != ',') break; - } - if ( !l ) x[l++] = bcf_int32_missing; - for (; l < z->size>>2; ++l) x[l] = bcf_int32_vector_end; - } else if ((z->y>>4&0xf) == BCF_HT_REAL) { - float *x = (float*)(z->buf + z->size * (size_t)m); - for (l = 0;; ++t) { - if (*t == '.' && !isdigit_c(t[1])) { - bcf_float_set_missing(x[l++]), ++t; // ++t to skip "." - } else { - overflow = 0; - char *te; - float tmp_val = hts_str2dbl(t, &te, &overflow); - if ( (te==t || overflow) && !extreme_val_warned ) - { - hts_log_warning("Extreme FORMAT/%s value encountered at %s:%"PRIhts_pos, h->id[BCF_DT_ID][fmt[j-1].key].key, bcf_seqname(h,v), v->pos+1); - extreme_val_warned = 1; - } - x[l++] = tmp_val; - t = te; - } - if (*t != ',') break; - } - if ( !l ) bcf_float_set_missing(x[l++]); // An empty field, insert missing value - for (; l < z->size>>2; ++l) bcf_float_set_vector_end(x[l]); - } else { - hts_log_error("Unknown FORMAT field type %d at %s:%"PRIhts_pos, z->y>>4&0xf, bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_TAG_INVALID; - return -1; - } - - if (*t == '\0') { - break; - } - else if (*t == ':') { - t++; - } - else { - char buffer[8]; - hts_log_error("Invalid character %s in '%s' FORMAT field at %s:%"PRIhts_pos"", - hts_strprint(buffer, sizeof buffer, '\'', t, 1), - h->id[BCF_DT_ID][z->key].key, bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_CHAR; - return -1; - } - } - - for (; j < v->n_fmt; ++j) { // fill end-of-vector values - fmt_aux_t *z = &fmt[j]; - if ((z->y>>4&0xf) == BCF_HT_STR) { - if (z->is_gt) { - int32_t *x = (int32_t*)(z->buf + z->size * (size_t)m); - if (z->size) x[0] = bcf_int32_missing; - for (l = 1; l < z->size>>2; ++l) x[l] = bcf_int32_vector_end; - } else { - char *x = (char*)z->buf + z->size * (size_t)m; - if ( z->size ) x[0] = '.'; - for (l = 1; l < z->size; ++l) x[l] = 0; - } - } else if ((z->y>>4&0xf) == BCF_HT_INT) { - int32_t *x = (int32_t*)(z->buf + z->size * (size_t)m); - x[0] = bcf_int32_missing; - for (l = 1; l < z->size>>2; ++l) x[l] = bcf_int32_vector_end; - } else if ((z->y>>4&0xf) == BCF_HT_REAL) { - float *x = (float*)(z->buf + z->size * (size_t)m); - bcf_float_set_missing(x[0]); - for (l = 1; l < z->size>>2; ++l) bcf_float_set_vector_end(x[l]); - } - } - - m++; t++; - } - - // write individual genotype information - kstring_t *str = &v->indiv; - int i; - if (v->n_sample > 0) { - for (i = 0; i < v->n_fmt; ++i) { - fmt_aux_t *z = &fmt[i]; - bcf_enc_int1(str, z->key); - if ((z->y>>4&0xf) == BCF_HT_STR && !z->is_gt) { - bcf_enc_size(str, z->size, BCF_BT_CHAR); - kputsn((char*)z->buf, z->size * (size_t)v->n_sample, str); - } else if ((z->y>>4&0xf) == BCF_HT_INT || z->is_gt) { - bcf_enc_vint(str, (z->size>>2) * v->n_sample, (int32_t*)z->buf, z->size>>2); - } else { - bcf_enc_size(str, z->size>>2, BCF_BT_FLOAT); - if (serialize_float_array(str, (z->size>>2) * (size_t)v->n_sample, - (float *) z->buf) != 0) { - v->errcode |= BCF_ERR_LIMITS; - hts_log_error("Out of memory at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); - return -1; - } - } - } - } - - if ( v->n_sample!=bcf_hdr_nsamples(h) ) - { - hts_log_error("Number of columns at %s:%"PRIhts_pos" does not match the number of samples (%d vs %d)", - bcf_seqname_safe(h,v), v->pos+1, v->n_sample, bcf_hdr_nsamples(h)); - v->errcode |= BCF_ERR_NCOLS; - return -1; - } - if ( v->indiv.l > 0xffffffff ) - { - hts_log_error("The FORMAT at %s:%"PRIhts_pos" is too long", bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_LIMITS; - - // Error recovery: return -1 if this is a critical error or 0 if we want to ignore the FORMAT and proceed - v->n_fmt = 0; - return -1; - } - - return 0; -} - -static khint_t fix_chromosome(const bcf_hdr_t *h, vdict_t *d, const char *p) { - // Simple error recovery for chromosomes not defined in the header. It will not help when VCF header has - // been already printed, but will enable tools like vcfcheck to proceed. - - kstring_t tmp = {0,0,0}; - khint_t k; - int l; - if (ksprintf(&tmp, "##contig=", p) < 0) - return kh_end(d); - bcf_hrec_t *hrec = bcf_hdr_parse_line(h,tmp.s,&l); - free(tmp.s); - int res = hrec ? bcf_hdr_add_hrec((bcf_hdr_t*)h, hrec) : -1; - if (res < 0) bcf_hrec_destroy(hrec); - if (res > 0) res = bcf_hdr_sync((bcf_hdr_t*)h); - k = kh_get(vdict, d, p); - - return k; -} - -static int vcf_parse_filter(kstring_t *str, const bcf_hdr_t *h, bcf1_t *v, char *p, char *q) { - int i, n_flt = 1, max_n_flt = 0; - char *r, *t; - int32_t *a_flt = NULL; - ks_tokaux_t aux1; - khint_t k; - vdict_t *d = (vdict_t*)h->dict[BCF_DT_ID]; - // count the number of filters - if (*(q-1) == ';') *(q-1) = 0; - for (r = p; *r; ++r) - if (*r == ';') ++n_flt; - if (n_flt > max_n_flt) { - a_flt = malloc(n_flt * sizeof(*a_flt)); - if (!a_flt) { - hts_log_error("Could not allocate memory at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_LIMITS; // No appropriate code? - return -1; - } - max_n_flt = n_flt; - } - // add filters - for (t = kstrtok(p, ";", &aux1), i = 0; t; t = kstrtok(0, 0, &aux1)) { - *(char*)aux1.p = 0; - k = kh_get(vdict, d, t); - if (k == kh_end(d)) - { - // Simple error recovery for FILTERs not defined in the header. It will not help when VCF header has - // been already printed, but will enable tools like vcfcheck to proceed. - hts_log_warning("FILTER '%s' is not defined in the header", t); - kstring_t tmp = {0,0,0}; - int l; - ksprintf(&tmp, "##FILTER=", t); - bcf_hrec_t *hrec = bcf_hdr_parse_line(h,tmp.s,&l); - free(tmp.s); - int res = hrec ? bcf_hdr_add_hrec((bcf_hdr_t*)h, hrec) : -1; - if (res < 0) bcf_hrec_destroy(hrec); - if (res > 0) res = bcf_hdr_sync((bcf_hdr_t*)h); - k = kh_get(vdict, d, t); - v->errcode |= BCF_ERR_TAG_UNDEF; - if (res || k == kh_end(d)) { - hts_log_error("Could not add dummy header for FILTER '%s' at %s:%"PRIhts_pos, t, bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_TAG_INVALID; - free(a_flt); - return -1; - } - } - a_flt[i++] = kh_val(d, k).id; - } - - bcf_enc_vint(str, n_flt, a_flt, -1); - free(a_flt); - - return 0; -} - -static int vcf_parse_info(kstring_t *str, const bcf_hdr_t *h, bcf1_t *v, char *p, char *q) { - static int extreme_int_warned = 0, negative_rlen_warned = 0; - int max_n_val = 0, overflow = 0; - char *r, *key; - khint_t k; - vdict_t *d = (vdict_t*)h->dict[BCF_DT_ID]; - int32_t *a_val = NULL; - - v->n_info = 0; - if (*(q-1) == ';') *(q-1) = 0; - for (r = key = p;; ++r) { - int c; - char *val, *end; - if (*r != ';' && *r != '=' && *r != 0) continue; - if (v->n_info == UINT16_MAX) { - hts_log_error("Too many INFO entries at %s:%"PRIhts_pos, - bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_LIMITS; - goto fail; - } - val = end = 0; - c = *r; *r = 0; - if (c == '=') { - val = r + 1; - for (end = val; *end != ';' && *end != 0; ++end); - c = *end; *end = 0; - } else end = r; - if ( !*key ) { if (c==0) break; r = end; key = r + 1; continue; } // faulty VCF, ";;" in the INFO - k = kh_get(vdict, d, key); - if (k == kh_end(d) || kh_val(d, k).info[BCF_HL_INFO] == 15) - { - hts_log_warning("INFO '%s' is not defined in the header, assuming Type=String", key); - kstring_t tmp = {0,0,0}; - int l; - ksprintf(&tmp, "##INFO=", key); - bcf_hrec_t *hrec = bcf_hdr_parse_line(h,tmp.s,&l); - free(tmp.s); - int res = hrec ? bcf_hdr_add_hrec((bcf_hdr_t*)h, hrec) : -1; - if (res < 0) bcf_hrec_destroy(hrec); - if (res > 0) res = bcf_hdr_sync((bcf_hdr_t*)h); - k = kh_get(vdict, d, key); - v->errcode |= BCF_ERR_TAG_UNDEF; - if (res || k == kh_end(d)) { - hts_log_error("Could not add dummy header for INFO '%s' at %s:%"PRIhts_pos, key, bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_TAG_INVALID; - goto fail; - } - } - uint32_t y = kh_val(d, k).info[BCF_HL_INFO]; - ++v->n_info; - bcf_enc_int1(str, kh_val(d, k).id); - if (val == 0) { - bcf_enc_size(str, 0, BCF_BT_NULL); - } else if ((y>>4&0xf) == BCF_HT_FLAG || (y>>4&0xf) == BCF_HT_STR) { // if Flag has a value, treat it as a string - bcf_enc_vchar(str, end - val, val); - } else { // int/float value/array - int i, n_val; - char *t, *te; - for (t = val, n_val = 1; *t; ++t) // count the number of values - if (*t == ',') ++n_val; - // Check both int and float size in one step for simplicity - if (n_val > max_n_val) { - int32_t *a_tmp = (int32_t *)realloc(a_val, n_val * sizeof(*a_val)); - if (!a_tmp) { - hts_log_error("Could not allocate memory at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_LIMITS; // No appropriate code? - goto fail; - } - a_val = a_tmp; - max_n_val = n_val; - } - if ((y>>4&0xf) == BCF_HT_INT) { - i = 0, t = val; - int64_t val1; - int is_int64 = 0; -#ifdef VCF_ALLOW_INT64 - if ( n_val==1 ) - { - overflow = 0; - long long int tmp_val = hts_str2int(val, &te, sizeof(tmp_val)*CHAR_BIT, &overflow); - if ( te==val ) tmp_val = bcf_int32_missing; - else if ( overflow || tmp_valBCF_MAX_BT_INT64 ) - { - if ( !extreme_int_warned ) - { - hts_log_warning("Extreme INFO/%s value encountered and set to missing at %s:%"PRIhts_pos,key,bcf_seqname_safe(h,v), v->pos+1); - extreme_int_warned = 1; - } - tmp_val = bcf_int32_missing; - } - else - is_int64 = 1; - val1 = tmp_val; - t = te; - i = 1; // this is just to avoid adding another nested block... - } -#endif - for (; i < n_val; ++i, ++t) - { - overflow = 0; - long int tmp_val = hts_str2int(t, &te, sizeof(tmp_val)*CHAR_BIT, &overflow); - if ( te==t ) tmp_val = bcf_int32_missing; - else if ( overflow || tmp_valBCF_MAX_BT_INT32 ) - { - if ( !extreme_int_warned ) - { - hts_log_warning("Extreme INFO/%s value encountered and set to missing at %s:%"PRIhts_pos,key,bcf_seqname_safe(h,v), v->pos+1); - extreme_int_warned = 1; - } - tmp_val = bcf_int32_missing; - } - a_val[i] = tmp_val; - for (t = te; *t && *t != ','; t++); - } - if (n_val == 1) { -#ifdef VCF_ALLOW_INT64 - if ( is_int64 ) - { - v->unpacked |= BCF_IS_64BIT; - bcf_enc_long1(str, val1); - } - else - bcf_enc_int1(str, (int32_t)val1); -#else - val1 = a_val[0]; - bcf_enc_int1(str, (int32_t)val1); -#endif - } else { - bcf_enc_vint(str, n_val, a_val, -1); - } - if (n_val==1 && (val1!=bcf_int32_missing || is_int64) && strcmp(key, "END") == 0) - { - if ( val1 <= v->pos ) - { - if ( !negative_rlen_warned ) - { - hts_log_warning("INFO/END=%"PRIhts_pos" is smaller than POS at %s:%"PRIhts_pos,val1,bcf_seqname_safe(h,v),v->pos+1); - negative_rlen_warned = 1; - } - } - else - v->rlen = val1 - v->pos; - } - } else if ((y>>4&0xf) == BCF_HT_REAL) { - float *val_f = (float *)a_val; - for (i = 0, t = val; i < n_val; ++i, ++t) - { - overflow = 0; - val_f[i] = hts_str2dbl(t, &te, &overflow); - if ( te==t || overflow ) // conversion failed - bcf_float_set_missing(val_f[i]); - for (t = te; *t && *t != ','; t++); - } - bcf_enc_vfloat(str, n_val, val_f); - } - } - if (c == 0) break; - r = end; - key = r + 1; - } - - free(a_val); - return 0; - - fail: - free(a_val); - return -1; -} - -int vcf_parse(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v) -{ - int i = 0, ret = -2, overflow = 0; - char *p, *q, *r, *t; - kstring_t *str; - khint_t k; - ks_tokaux_t aux; - - if (!s || !h || !v || !(s->s)) - return ret; - - // Assumed in lots of places, but we may as well spot this early - assert(sizeof(float) == sizeof(int32_t)); - - bcf_clear1(v); - str = &v->shared; - memset(&aux, 0, sizeof(ks_tokaux_t)); - for (p = kstrtok(s->s, "\t", &aux), i = 0; p; p = kstrtok(0, 0, &aux), ++i) { - q = (char*)aux.p; - *q = 0; - if (i == 0) { // CHROM - vdict_t *d = (vdict_t*)h->dict[BCF_DT_CTG]; - k = kh_get(vdict, d, p); - if (k == kh_end(d)) - { - hts_log_warning("Contig '%s' is not defined in the header. (Quick workaround: index the file with tabix.)", p); - v->errcode = BCF_ERR_CTG_UNDEF; - if ((k = fix_chromosome(h, d, p)) == kh_end(d)) { - hts_log_error("Could not add dummy header for contig '%s'", p); - v->errcode |= BCF_ERR_CTG_INVALID; - goto err; - } - } - v->rid = kh_val(d, k).id; - } else if (i == 1) { // POS - overflow = 0; - char *tmp = p; - v->pos = hts_str2uint(p, &p, 63, &overflow); - if (overflow) { - hts_log_error("Position value '%s' is too large", tmp); - goto err; - } else if ( *p ) { - hts_log_error("Could not parse the position '%s'", tmp); - goto err; - } else { - v->pos -= 1; - } - if (v->pos >= INT32_MAX) - v->unpacked |= BCF_IS_64BIT; - } else if (i == 2) { // ID - if (strcmp(p, ".")) bcf_enc_vchar(str, q - p, p); - else bcf_enc_size(str, 0, BCF_BT_CHAR); - } else if (i == 3) { // REF - bcf_enc_vchar(str, q - p, p); - v->n_allele = 1, v->rlen = q - p; - } else if (i == 4) { // ALT - if (strcmp(p, ".")) { - for (r = t = p;; ++r) { - if (*r == ',' || *r == 0) { - if (v->n_allele == UINT16_MAX) { - hts_log_error("Too many ALT alleles at %s:%"PRIhts_pos, - bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_LIMITS; - goto err; - } - bcf_enc_vchar(str, r - t, t); - t = r + 1; - ++v->n_allele; - } - if (r == q) break; - } - } - } else if (i == 5) { // QUAL - if (strcmp(p, ".")) v->qual = atof(p); - else bcf_float_set_missing(v->qual); - if ( v->max_unpack && !(v->max_unpack>>1) ) goto end; // BCF_UN_STR - } else if (i == 6) { // FILTER - if (strcmp(p, ".")) { - if (vcf_parse_filter(str, h, v, p, q)) goto err; - } else bcf_enc_vint(str, 0, 0, -1); - if ( v->max_unpack && !(v->max_unpack>>2) ) goto end; // BCF_UN_FLT - } else if (i == 7) { // INFO - if (strcmp(p, ".")) { - if (vcf_parse_info(str, h, v, p, q)) goto err; - } - if ( v->max_unpack && !(v->max_unpack>>3) ) goto end; - } else if (i == 8) {// FORMAT - return vcf_parse_format(s, h, v, p, q) == 0 ? 0 : -2; - } - } - - end: - ret = 0; - - err: - return ret; -} - -int vcf_open_mode(char *mode, const char *fn, const char *format) -{ - if (format == NULL) { - // Try to pick a format based on the filename extension - char extension[HTS_MAX_EXT_LEN]; - if (find_file_extension(fn, extension) < 0) return -1; - return vcf_open_mode(mode, fn, extension); - } - else if (strcasecmp(format, "bcf") == 0) strcpy(mode, "b"); - else if (strcasecmp(format, "vcf") == 0) strcpy(mode, ""); - else if (strcasecmp(format, "vcf.gz") == 0 || strcasecmp(format, "vcf.bgz") == 0) strcpy(mode, "z"); - else return -1; - - return 0; -} - -int vcf_read(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v) -{ - int ret; - ret = hts_getline(fp, KS_SEP_LINE, &fp->line); - if (ret < 0) return ret; - return vcf_parse1(&fp->line, h, v); -} - -static inline uint8_t *bcf_unpack_fmt_core1(uint8_t *ptr, int n_sample, bcf_fmt_t *fmt) -{ - uint8_t *ptr_start = ptr; - fmt->id = bcf_dec_typed_int1(ptr, &ptr); - fmt->n = bcf_dec_size(ptr, &ptr, &fmt->type); - fmt->size = fmt->n << bcf_type_shift[fmt->type]; - fmt->p = ptr; - fmt->p_off = ptr - ptr_start; - fmt->p_free = 0; - ptr += n_sample * fmt->size; - fmt->p_len = ptr - fmt->p; - return ptr; -} - -static inline uint8_t *bcf_unpack_info_core1(uint8_t *ptr, bcf_info_t *info) -{ - uint8_t *ptr_start = ptr; - info->key = bcf_dec_typed_int1(ptr, &ptr); - info->len = bcf_dec_size(ptr, &ptr, &info->type); - info->vptr = ptr; - info->vptr_off = ptr - ptr_start; - info->vptr_free = 0; - info->v1.i = 0; - if (info->len == 1) { - if (info->type == BCF_BT_INT8 || info->type == BCF_BT_CHAR) info->v1.i = *(int8_t*)ptr; - else if (info->type == BCF_BT_INT32) info->v1.i = le_to_i32(ptr); - else if (info->type == BCF_BT_FLOAT) info->v1.f = le_to_float(ptr); - else if (info->type == BCF_BT_INT16) info->v1.i = le_to_i16(ptr); - else if (info->type == BCF_BT_INT64) info->v1.i = le_to_i64(ptr); - } - ptr += info->len << bcf_type_shift[info->type]; - info->vptr_len = ptr - info->vptr; - return ptr; -} - -int bcf_unpack(bcf1_t *b, int which) -{ - if ( !b->shared.l ) return 0; // Building a new BCF record from scratch - uint8_t *ptr = (uint8_t*)b->shared.s, *ptr_ori; - int i; - bcf_dec_t *d = &b->d; - if (which & BCF_UN_FLT) which |= BCF_UN_STR; - if (which & BCF_UN_INFO) which |= BCF_UN_SHR; - if ((which&BCF_UN_STR) && !(b->unpacked&BCF_UN_STR)) - { - kstring_t tmp; - - // ID - tmp.l = 0; tmp.s = d->id; tmp.m = d->m_id; - ptr_ori = ptr; - ptr = bcf_fmt_sized_array(&tmp, ptr); - b->unpack_size[0] = ptr - ptr_ori; - kputc('\0', &tmp); - d->id = tmp.s; d->m_id = tmp.m; - - // REF and ALT are in a single block (d->als) and d->alleles are pointers into this block - hts_expand(char*, b->n_allele, d->m_allele, d->allele); // NM: hts_expand() is a macro - tmp.l = 0; tmp.s = d->als; tmp.m = d->m_als; - ptr_ori = ptr; - for (i = 0; i < b->n_allele; ++i) { - // Use offset within tmp.s as realloc may change pointer - d->allele[i] = (char *)(intptr_t)tmp.l; - ptr = bcf_fmt_sized_array(&tmp, ptr); - kputc('\0', &tmp); - } - b->unpack_size[1] = ptr - ptr_ori; - d->als = tmp.s; d->m_als = tmp.m; - - // Convert our offsets within tmp.s back to pointers again - for (i = 0; i < b->n_allele; ++i) - d->allele[i] = d->als + (ptrdiff_t)d->allele[i]; - b->unpacked |= BCF_UN_STR; - } - if ((which&BCF_UN_FLT) && !(b->unpacked&BCF_UN_FLT)) { // FILTER - ptr = (uint8_t*)b->shared.s + b->unpack_size[0] + b->unpack_size[1]; - ptr_ori = ptr; - if (*ptr>>4) { - int type; - d->n_flt = bcf_dec_size(ptr, &ptr, &type); - hts_expand(int, d->n_flt, d->m_flt, d->flt); - for (i = 0; i < d->n_flt; ++i) - d->flt[i] = bcf_dec_int1(ptr, type, &ptr); - } else ++ptr, d->n_flt = 0; - b->unpack_size[2] = ptr - ptr_ori; - b->unpacked |= BCF_UN_FLT; - } - if ((which&BCF_UN_INFO) && !(b->unpacked&BCF_UN_INFO)) { // INFO - ptr = (uint8_t*)b->shared.s + b->unpack_size[0] + b->unpack_size[1] + b->unpack_size[2]; - hts_expand(bcf_info_t, b->n_info, d->m_info, d->info); - for (i = 0; i < d->m_info; ++i) d->info[i].vptr_free = 0; - for (i = 0; i < b->n_info; ++i) - ptr = bcf_unpack_info_core1(ptr, &d->info[i]); - b->unpacked |= BCF_UN_INFO; - } - if ((which&BCF_UN_FMT) && b->n_sample && !(b->unpacked&BCF_UN_FMT)) { // FORMAT - ptr = (uint8_t*)b->indiv.s; - hts_expand(bcf_fmt_t, b->n_fmt, d->m_fmt, d->fmt); - for (i = 0; i < d->m_fmt; ++i) d->fmt[i].p_free = 0; - for (i = 0; i < b->n_fmt; ++i) - ptr = bcf_unpack_fmt_core1(ptr, b->n_sample, &d->fmt[i]); - b->unpacked |= BCF_UN_FMT; - } - return 0; -} - -int vcf_format(const bcf_hdr_t *h, const bcf1_t *v, kstring_t *s) -{ - int i; - int32_t max_dt_id = h->n[BCF_DT_ID]; - const char *chrom = bcf_seqname(h, v); - if (!chrom) { - hts_log_error("Invalid BCF, CONTIG id=%d not present in the header", - v->rid); - errno = EINVAL; - return -1; - } - bcf_unpack((bcf1_t*)v, BCF_UN_ALL); - kputs(chrom, s); // CHROM - kputc('\t', s); kputll(v->pos + 1, s); // POS - kputc('\t', s); kputs(v->d.id ? v->d.id : ".", s); // ID - kputc('\t', s); // REF - if (v->n_allele > 0) kputs(v->d.allele[0], s); - else kputc('.', s); - kputc('\t', s); // ALT - if (v->n_allele > 1) { - for (i = 1; i < v->n_allele; ++i) { - if (i > 1) kputc(',', s); - kputs(v->d.allele[i], s); - } - } else kputc('.', s); - kputc('\t', s); // QUAL - if ( bcf_float_is_missing(v->qual) ) kputc('.', s); // QUAL - else kputd(v->qual, s); - kputc('\t', s); // FILTER - if (v->d.n_flt) { - for (i = 0; i < v->d.n_flt; ++i) { - int32_t idx = v->d.flt[i]; - if (idx < 0 || idx >= max_dt_id - || h->id[BCF_DT_ID][idx].key == NULL) { - hts_log_error("Invalid BCF, the FILTER tag id=%d at %s:%"PRIhts_pos" not present in the header", - idx, bcf_seqname_safe(h, v), v->pos + 1); - errno = EINVAL; - return -1; - } - if (i) kputc(';', s); - kputs(h->id[BCF_DT_ID][idx].key, s); - } - } else kputc('.', s); - kputc('\t', s); // INFO - if (v->n_info) { - int first = 1; - for (i = 0; i < v->n_info; ++i) { - bcf_info_t *z = &v->d.info[i]; - if ( !z->vptr ) continue; - if ( !first ) kputc(';', s); - first = 0; - if (z->key < 0 || z->key >= max_dt_id - || h->id[BCF_DT_ID][z->key].key == NULL) { - hts_log_error("Invalid BCF, the INFO tag id=%d is %s at %s:%"PRIhts_pos, - z->key, - z->key < 0 ? "negative" - : (z->key >= max_dt_id ? "too large" : "not present in the header"), - bcf_seqname_safe(h, v), v->pos+1); - errno = EINVAL; - return -1; - } - kputs(h->id[BCF_DT_ID][z->key].key, s); - if (z->len <= 0) continue; - kputc('=', s); - if (z->len == 1) - { - switch (z->type) - { - case BCF_BT_INT8: if ( z->v1.i==bcf_int8_missing ) kputc('.', s); else kputw(z->v1.i, s); break; - case BCF_BT_INT16: if ( z->v1.i==bcf_int16_missing ) kputc('.', s); else kputw(z->v1.i, s); break; - case BCF_BT_INT32: if ( z->v1.i==bcf_int32_missing ) kputc('.', s); else kputw(z->v1.i, s); break; - case BCF_BT_INT64: if ( z->v1.i==bcf_int64_missing ) kputc('.', s); else kputll(z->v1.i, s); break; - case BCF_BT_FLOAT: if ( bcf_float_is_missing(z->v1.f) ) kputc('.', s); else kputd(z->v1.f, s); break; - case BCF_BT_CHAR: kputc(z->v1.i, s); break; - default: - hts_log_error("Unexpected type %d at %s:%"PRIhts_pos, z->type, bcf_seqname_safe(h, v), v->pos+1); - errno = EINVAL; - return -1; - } - } - else bcf_fmt_array(s, z->len, z->type, z->vptr); - } - if ( first ) kputc('.', s); - } else kputc('.', s); - // FORMAT and individual information - if (v->n_sample) - { - int i,j; - if ( v->n_fmt) - { - int gt_i = -1; - bcf_fmt_t *fmt = v->d.fmt; - int first = 1; - for (i = 0; i < (int)v->n_fmt; ++i) { - if ( !fmt[i].p ) continue; - kputc(!first ? ':' : '\t', s); first = 0; - if (fmt[i].id < 0 || fmt[i].id >= max_dt_id - || h->id[BCF_DT_ID][fmt[i].id].key == NULL) //!bcf_hdr_idinfo_exists(h,BCF_HL_FMT,fmt[i].id) ) - { - hts_log_error("Invalid BCF, the FORMAT tag id=%d at %s:%"PRIhts_pos" not present in the header", fmt[i].id, bcf_seqname_safe(h, v), v->pos+1); - errno = EINVAL; - return -1; - } - kputs(h->id[BCF_DT_ID][fmt[i].id].key, s); - if (strcmp(h->id[BCF_DT_ID][fmt[i].id].key, "GT") == 0) gt_i = i; - } - if ( first ) kputs("\t.", s); - for (j = 0; j < v->n_sample; ++j) { - kputc('\t', s); - first = 1; - for (i = 0; i < (int)v->n_fmt; ++i) { - bcf_fmt_t *f = &fmt[i]; - if ( !f->p ) continue; - if (!first) kputc(':', s); - first = 0; - if (gt_i == i) - bcf_format_gt(f,j,s); - else - bcf_fmt_array(s, f->n, f->type, f->p + j * (size_t)f->size); - } - if ( first ) kputc('.', s); - } - } - else - for (j=0; j<=v->n_sample; j++) - kputs("\t.", s); - } - kputc('\n', s); - return 0; -} - -int vcf_write_line(htsFile *fp, kstring_t *line) -{ - int ret; - if ( line->s[line->l-1]!='\n' ) kputc('\n',line); - if ( fp->format.compression!=no_compression ) - ret = bgzf_write(fp->fp.bgzf, line->s, line->l); - else - ret = hwrite(fp->fp.hfile, line->s, line->l); - return ret==line->l ? 0 : -1; -} - -int vcf_write(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v) -{ - ssize_t ret; - fp->line.l = 0; - if (vcf_format1(h, v, &fp->line) != 0) - return -1; - if ( fp->format.compression!=no_compression ) { - if (bgzf_flush_try(fp->fp.bgzf, fp->line.l) < 0) - return -1; - if (fp->idx) - hts_idx_amend_last(fp->idx, bgzf_tell(fp->fp.bgzf)); - ret = bgzf_write(fp->fp.bgzf, fp->line.s, fp->line.l); - } else { - ret = hwrite(fp->fp.hfile, fp->line.s, fp->line.l); - } - - if (fp->idx) { - int tid; - if ((tid = hts_idx_tbi_name(fp->idx, v->rid, bcf_seqname_safe(h, v))) < 0) - return -1; - - if (hts_idx_push(fp->idx, tid, v->pos, v->pos + v->rlen, bgzf_tell(fp->fp.bgzf), 1) < 0) - return -1; - } - - return ret==fp->line.l ? 0 : -1; -} - -/************************ - * Data access routines * - ************************/ - -int bcf_hdr_id2int(const bcf_hdr_t *h, int which, const char *id) -{ - khint_t k; - vdict_t *d = (vdict_t*)h->dict[which]; - k = kh_get(vdict, d, id); - return k == kh_end(d)? -1 : kh_val(d, k).id; -} - - -/******************** - *** BCF indexing *** - ********************/ - -// Calculate number of index levels given min_shift and the header contig -// list. Also returns number of contigs in *nids_out. -static int idx_calc_n_lvls_ids(const bcf_hdr_t *h, int min_shift, - int starting_n_lvls, int *nids_out) -{ - int n_lvls, i, nids = 0; - int64_t max_len = 0, s; - - for (i = 0; i < h->n[BCF_DT_CTG]; ++i) - { - if ( !h->id[BCF_DT_CTG][i].val ) continue; - if ( max_len < h->id[BCF_DT_CTG][i].val->info[0] ) - max_len = h->id[BCF_DT_CTG][i].val->info[0]; - nids++; - } - if ( !max_len ) max_len = (1LL<<31) - 1; // In case contig line is broken. - max_len += 256; - s = 1LL << (min_shift + starting_n_lvls * 3); - for (n_lvls = starting_n_lvls; max_len > s; ++n_lvls, s <<= 3); - - if (nids_out) *nids_out = nids; - return n_lvls; -} - -hts_idx_t *bcf_index(htsFile *fp, int min_shift) -{ - int n_lvls; - bcf1_t *b = NULL; - hts_idx_t *idx = NULL; - bcf_hdr_t *h; - int r; - h = bcf_hdr_read(fp); - if ( !h ) return NULL; - int nids = 0; - n_lvls = idx_calc_n_lvls_ids(h, min_shift, 0, &nids); - idx = hts_idx_init(nids, HTS_FMT_CSI, bgzf_tell(fp->fp.bgzf), min_shift, n_lvls); - if (!idx) goto fail; - b = bcf_init1(); - if (!b) goto fail; - while ((r = bcf_read1(fp,h, b)) >= 0) { - int ret; - ret = hts_idx_push(idx, b->rid, b->pos, b->pos + b->rlen, bgzf_tell(fp->fp.bgzf), 1); - if (ret < 0) goto fail; - } - if (r < -1) goto fail; - hts_idx_finish(idx, bgzf_tell(fp->fp.bgzf)); - bcf_destroy1(b); - bcf_hdr_destroy(h); - return idx; - - fail: - hts_idx_destroy(idx); - bcf_destroy1(b); - bcf_hdr_destroy(h); - return NULL; -} - -hts_idx_t *bcf_index_load2(const char *fn, const char *fnidx) -{ - return fnidx? hts_idx_load2(fn, fnidx) : bcf_index_load(fn); -} - -hts_idx_t *bcf_index_load3(const char *fn, const char *fnidx, int flags) -{ - return hts_idx_load3(fn, fnidx, HTS_FMT_CSI, flags); -} - -int bcf_index_build3(const char *fn, const char *fnidx, int min_shift, int n_threads) -{ - htsFile *fp; - hts_idx_t *idx; - tbx_t *tbx; - int ret; - if ((fp = hts_open(fn, "rb")) == 0) return -2; - if (n_threads) - hts_set_threads(fp, n_threads); - if ( fp->format.compression!=bgzf ) { hts_close(fp); return -3; } - switch (fp->format.format) { - case bcf: - if (!min_shift) { - hts_log_error("TBI indices for BCF files are not supported"); - ret = -1; - } else { - idx = bcf_index(fp, min_shift); - if (idx) { - ret = hts_idx_save_as(idx, fn, fnidx, HTS_FMT_CSI); - if (ret < 0) ret = -4; - hts_idx_destroy(idx); - } - else ret = -1; - } - break; - - case vcf: - tbx = tbx_index(hts_get_bgzfp(fp), min_shift, &tbx_conf_vcf); - if (tbx) { - ret = hts_idx_save_as(tbx->idx, fn, fnidx, min_shift > 0 ? HTS_FMT_CSI : HTS_FMT_TBI); - if (ret < 0) ret = -4; - tbx_destroy(tbx); - } - else ret = -1; - break; - - default: - ret = -3; - break; - } - hts_close(fp); - return ret; -} - -int bcf_index_build2(const char *fn, const char *fnidx, int min_shift) -{ - return bcf_index_build3(fn, fnidx, min_shift, 0); -} - -int bcf_index_build(const char *fn, int min_shift) -{ - return bcf_index_build3(fn, NULL, min_shift, 0); -} - -// Initialise fp->idx for the current format type. -// This must be called after the header has been written but no other data. -static int vcf_idx_init(htsFile *fp, bcf_hdr_t *h, int min_shift, const char *fnidx) { - int n_lvls, fmt; - - if (min_shift == 0) { - min_shift = 14; - n_lvls = 5; - fmt = HTS_FMT_TBI; - } else { - // Set initial n_lvls to match tbx_index() - int starting_n_lvls = (TBX_MAX_SHIFT - min_shift + 2) / 3; - // Increase if necessary - n_lvls = idx_calc_n_lvls_ids(h, min_shift, starting_n_lvls, NULL); - fmt = HTS_FMT_CSI; - } - - fp->idx = hts_idx_init(0, fmt, bgzf_tell(fp->fp.bgzf), min_shift, n_lvls); - if (!fp->idx) return -1; - - // Tabix meta data, added even in CSI for VCF - uint8_t conf[4*7]; - u32_to_le(TBX_VCF, conf+0); // fmt - u32_to_le(1, conf+4); // name col - u32_to_le(2, conf+8); // beg col - u32_to_le(0, conf+12); // end col - u32_to_le('#', conf+16); // comment - u32_to_le(0, conf+20); // n.skip - u32_to_le(0, conf+24); // ref name len - if (hts_idx_set_meta(fp->idx, sizeof(conf)*sizeof(*conf), (uint8_t *)conf, 1) < 0) { - hts_idx_destroy(fp->idx); - fp->idx = NULL; - return -1; - } - fp->fnidx = fnidx; - - return 0; -} - -// Initialise fp->idx for the current format type. -// This must be called after the header has been written but no other data. -int bcf_idx_init(htsFile *fp, bcf_hdr_t *h, int min_shift, const char *fnidx) { - int n_lvls, nids = 0; - - if (fp->format.format == vcf) - return vcf_idx_init(fp, h, min_shift, fnidx); - - if (!min_shift) - min_shift = 14; - - n_lvls = idx_calc_n_lvls_ids(h, min_shift, 0, &nids); - - fp->idx = hts_idx_init(nids, HTS_FMT_CSI, bgzf_tell(fp->fp.bgzf), min_shift, n_lvls); - if (!fp->idx) return -1; - fp->fnidx = fnidx; - - return 0; -} - -// Finishes an index. Call after the last record has been written. -// Returns 0 on success, <0 on failure. -// -// NB: same format as SAM/BAM as it uses bgzf. -int bcf_idx_save(htsFile *fp) { - return sam_idx_save(fp); -} - -/***************** - *** Utilities *** - *****************/ - -int bcf_hdr_combine(bcf_hdr_t *dst, const bcf_hdr_t *src) -{ - int i, ndst_ori = dst->nhrec, need_sync = 0, ret = 0, res; - for (i=0; inhrec; i++) - { - if ( src->hrec[i]->type==BCF_HL_GEN && src->hrec[i]->value ) - { - int j; - for (j=0; jhrec[j]->type!=BCF_HL_GEN ) continue; - - // Checking only the key part of generic lines, otherwise - // the VCFs are too verbose. Should we perhaps add a flag - // to bcf_hdr_combine() and make this optional? - if ( !strcmp(src->hrec[i]->key,dst->hrec[j]->key) ) break; - } - if ( j>=ndst_ori ) { - res = bcf_hdr_add_hrec(dst, bcf_hrec_dup(src->hrec[i])); - if (res < 0) return -1; - need_sync += res; - } - } - else if ( src->hrec[i]->type==BCF_HL_STR ) - { - // NB: we are ignoring fields without ID - int j = bcf_hrec_find_key(src->hrec[i],"ID"); - if ( j>=0 ) - { - bcf_hrec_t *rec = bcf_hdr_get_hrec(dst, src->hrec[i]->type, "ID", src->hrec[i]->vals[j], src->hrec[i]->key); - if ( !rec ) { - res = bcf_hdr_add_hrec(dst, bcf_hrec_dup(src->hrec[i])); - if (res < 0) return -1; - need_sync += res; - } - } - } - else - { - int j = bcf_hrec_find_key(src->hrec[i],"ID"); - assert( j>=0 ); // this should always be true for valid VCFs - - bcf_hrec_t *rec = bcf_hdr_get_hrec(dst, src->hrec[i]->type, "ID", src->hrec[i]->vals[j], NULL); - if ( !rec ) { - res = bcf_hdr_add_hrec(dst, bcf_hrec_dup(src->hrec[i])); - if (res < 0) return -1; - need_sync += res; - } else if ( src->hrec[i]->type==BCF_HL_INFO || src->hrec[i]->type==BCF_HL_FMT ) - { - // Check that both records are of the same type. The bcf_hdr_id2length - // macro cannot be used here because dst header is not synced yet. - vdict_t *d_src = (vdict_t*)src->dict[BCF_DT_ID]; - vdict_t *d_dst = (vdict_t*)dst->dict[BCF_DT_ID]; - khint_t k_src = kh_get(vdict, d_src, src->hrec[i]->vals[0]); - khint_t k_dst = kh_get(vdict, d_dst, src->hrec[i]->vals[0]); - if ( (kh_val(d_src,k_src).info[rec->type]>>8 & 0xf) != (kh_val(d_dst,k_dst).info[rec->type]>>8 & 0xf) ) - { - hts_log_warning("Trying to combine \"%s\" tag definitions of different lengths", - src->hrec[i]->vals[0]); - ret |= 1; - } - if ( (kh_val(d_src,k_src).info[rec->type]>>4 & 0xf) != (kh_val(d_dst,k_dst).info[rec->type]>>4 & 0xf) ) - { - hts_log_warning("Trying to combine \"%s\" tag definitions of different types", - src->hrec[i]->vals[0]); - ret |= 1; - } - } - } - } - if ( need_sync ) { - if (bcf_hdr_sync(dst) < 0) return -1; - } - return ret; -} - -bcf_hdr_t *bcf_hdr_merge(bcf_hdr_t *dst, const bcf_hdr_t *src) -{ - if ( !dst ) - { - // this will effectively strip existing IDX attributes from src to become dst - dst = bcf_hdr_init("r"); - kstring_t htxt = {0,0,0}; - if (bcf_hdr_format(src, 0, &htxt) < 0) { - free(htxt.s); - return NULL; - } - if ( bcf_hdr_parse(dst, htxt.s) < 0 ) { - bcf_hdr_destroy(dst); - dst = NULL; - } - free(htxt.s); - return dst; - } - - int i, ndst_ori = dst->nhrec, need_sync = 0, res; - for (i=0; inhrec; i++) - { - if ( src->hrec[i]->type==BCF_HL_GEN && src->hrec[i]->value ) - { - int j; - for (j=0; jhrec[j]->type!=BCF_HL_GEN ) continue; - - // Checking only the key part of generic lines, otherwise - // the VCFs are too verbose. Should we perhaps add a flag - // to bcf_hdr_combine() and make this optional? - if ( !strcmp(src->hrec[i]->key,dst->hrec[j]->key) ) break; - } - if ( j>=ndst_ori ) { - res = bcf_hdr_add_hrec(dst, bcf_hrec_dup(src->hrec[i])); - if (res < 0) return NULL; - need_sync += res; - } - } - else if ( src->hrec[i]->type==BCF_HL_STR ) - { - // NB: we are ignoring fields without ID - int j = bcf_hrec_find_key(src->hrec[i],"ID"); - if ( j>=0 ) - { - bcf_hrec_t *rec = bcf_hdr_get_hrec(dst, src->hrec[i]->type, "ID", src->hrec[i]->vals[j], src->hrec[i]->key); - if ( !rec ) { - res = bcf_hdr_add_hrec(dst, bcf_hrec_dup(src->hrec[i])); - if (res < 0) return NULL; - need_sync += res; - } - } - } - else - { - int j = bcf_hrec_find_key(src->hrec[i],"ID"); - assert( j>=0 ); // this should always be true for valid VCFs - - bcf_hrec_t *rec = bcf_hdr_get_hrec(dst, src->hrec[i]->type, "ID", src->hrec[i]->vals[j], NULL); - if ( !rec ) { - res = bcf_hdr_add_hrec(dst, bcf_hrec_dup(src->hrec[i])); - if (res < 0) return NULL; - need_sync += res; - } else if ( src->hrec[i]->type==BCF_HL_INFO || src->hrec[i]->type==BCF_HL_FMT ) - { - // Check that both records are of the same type. The bcf_hdr_id2length - // macro cannot be used here because dst header is not synced yet. - vdict_t *d_src = (vdict_t*)src->dict[BCF_DT_ID]; - vdict_t *d_dst = (vdict_t*)dst->dict[BCF_DT_ID]; - khint_t k_src = kh_get(vdict, d_src, src->hrec[i]->vals[0]); - khint_t k_dst = kh_get(vdict, d_dst, src->hrec[i]->vals[0]); - if ( (kh_val(d_src,k_src).info[rec->type]>>8 & 0xf) != (kh_val(d_dst,k_dst).info[rec->type]>>8 & 0xf) ) - { - hts_log_warning("Trying to combine \"%s\" tag definitions of different lengths", - src->hrec[i]->vals[0]); - } - if ( (kh_val(d_src,k_src).info[rec->type]>>4 & 0xf) != (kh_val(d_dst,k_dst).info[rec->type]>>4 & 0xf) ) - { - hts_log_warning("Trying to combine \"%s\" tag definitions of different types", - src->hrec[i]->vals[0]); - } - } - } - } - if ( need_sync ) { - if (bcf_hdr_sync(dst) < 0) return NULL; - } - return dst; -} - -int bcf_translate(const bcf_hdr_t *dst_hdr, bcf_hdr_t *src_hdr, bcf1_t *line) -{ - int i; - if ( line->errcode ) - { - char errordescription[1024] = ""; - hts_log_error("Unchecked error (%d %s) at %s:%"PRIhts_pos", exiting", line->errcode, bcf_strerror(line->errcode, errordescription, sizeof(errordescription)), bcf_seqname_safe(src_hdr,line), line->pos+1); - exit(1); - } - if ( src_hdr->ntransl==-1 ) return 0; // no need to translate, all tags have the same id - if ( !src_hdr->ntransl ) // called for the first time, see what needs translating - { - int dict; - for (dict=0; dict<2; dict++) // BCF_DT_ID and BCF_DT_CTG - { - src_hdr->transl[dict] = (int*) malloc(src_hdr->n[dict]*sizeof(int)); - for (i=0; in[dict]; i++) - { - if ( !src_hdr->id[dict][i].key ) // gap left after removed BCF header lines - { - src_hdr->transl[dict][i] = -1; - continue; - } - src_hdr->transl[dict][i] = bcf_hdr_id2int(dst_hdr,dict,src_hdr->id[dict][i].key); - if ( src_hdr->transl[dict][i]!=-1 && i!=src_hdr->transl[dict][i] ) src_hdr->ntransl++; - } - } - if ( !src_hdr->ntransl ) - { - free(src_hdr->transl[0]); src_hdr->transl[0] = NULL; - free(src_hdr->transl[1]); src_hdr->transl[1] = NULL; - src_hdr->ntransl = -1; - } - if ( src_hdr->ntransl==-1 ) return 0; - } - bcf_unpack(line,BCF_UN_ALL); - - // CHROM - if ( src_hdr->transl[BCF_DT_CTG][line->rid] >=0 ) line->rid = src_hdr->transl[BCF_DT_CTG][line->rid]; - - // FILTER - for (i=0; id.n_flt; i++) - { - int src_id = line->d.flt[i]; - if ( src_hdr->transl[BCF_DT_ID][src_id] >=0 ) - line->d.flt[i] = src_hdr->transl[BCF_DT_ID][src_id]; - line->d.shared_dirty |= BCF1_DIRTY_FLT; - } - - // INFO - for (i=0; in_info; i++) - { - int src_id = line->d.info[i].key; - int dst_id = src_hdr->transl[BCF_DT_ID][src_id]; - if ( dst_id<0 ) continue; - line->d.info[i].key = dst_id; - if ( !line->d.info[i].vptr ) continue; // skip deleted - int src_size = src_id>>7 ? ( src_id>>15 ? BCF_BT_INT32 : BCF_BT_INT16) : BCF_BT_INT8; - int dst_size = dst_id>>7 ? ( dst_id>>15 ? BCF_BT_INT32 : BCF_BT_INT16) : BCF_BT_INT8; - if ( src_size==dst_size ) // can overwrite - { - uint8_t *vptr = line->d.info[i].vptr - line->d.info[i].vptr_off; - if ( dst_size==BCF_BT_INT8 ) { vptr[1] = (uint8_t)dst_id; } - else if ( dst_size==BCF_BT_INT16 ) { *(uint16_t*)vptr = (uint16_t)dst_id; } - else { *(uint32_t*)vptr = (uint32_t)dst_id; } - } - else // must realloc - { - bcf_info_t *info = &line->d.info[i]; - kstring_t str = {0,0,0}; - bcf_enc_int1(&str, dst_id); - bcf_enc_size(&str, info->len,info->type); - uint32_t vptr_off = str.l; - kputsn((char*)info->vptr, info->vptr_len, &str); - if( info->vptr_free ) free(info->vptr - info->vptr_off); - info->vptr_off = vptr_off; - info->vptr = (uint8_t*)str.s + info->vptr_off; - info->vptr_free = 1; - line->d.shared_dirty |= BCF1_DIRTY_INF; - } - } - - // FORMAT - for (i=0; in_fmt; i++) - { - int src_id = line->d.fmt[i].id; - int dst_id = src_hdr->transl[BCF_DT_ID][src_id]; - if ( dst_id<0 ) continue; - line->d.fmt[i].id = dst_id; - if( !line->d.fmt[i].p ) continue; // skip deleted - int src_size = src_id>>7 ? ( src_id>>15 ? BCF_BT_INT32 : BCF_BT_INT16) : BCF_BT_INT8; - int dst_size = dst_id>>7 ? ( dst_id>>15 ? BCF_BT_INT32 : BCF_BT_INT16) : BCF_BT_INT8; - if ( src_size==dst_size ) // can overwrite - { - uint8_t *p = line->d.fmt[i].p - line->d.fmt[i].p_off; // pointer to the vector size (4bits) and BT type (4bits) - if ( dst_size==BCF_BT_INT8 ) { p[1] = dst_id; } - else if ( dst_size==BCF_BT_INT16 ) { i16_to_le(dst_id, p + 1); } - else { i32_to_le(dst_id, p + 1); } - } - else // must realloc - { - bcf_fmt_t *fmt = &line->d.fmt[i]; - kstring_t str = {0,0,0}; - bcf_enc_int1(&str, dst_id); - bcf_enc_size(&str, fmt->n, fmt->type); - uint32_t p_off = str.l; - kputsn((char*)fmt->p, fmt->p_len, &str); - if( fmt->p_free ) free(fmt->p - fmt->p_off); - fmt->p_off = p_off; - fmt->p = (uint8_t*)str.s + fmt->p_off; - fmt->p_free = 1; - line->d.indiv_dirty = 1; - } - } - return 0; -} - -bcf_hdr_t *bcf_hdr_dup(const bcf_hdr_t *hdr) -{ - bcf_hdr_t *hout = bcf_hdr_init("r"); - if (!hout) { - hts_log_error("Failed to allocate bcf header"); - return NULL; - } - kstring_t htxt = {0,0,0}; - if (bcf_hdr_format(hdr, 1, &htxt) < 0) { - free(htxt.s); - return NULL; - } - if ( bcf_hdr_parse(hout, htxt.s) < 0 ) { - bcf_hdr_destroy(hout); - hout = NULL; - } - free(htxt.s); - return hout; -} - -bcf_hdr_t *bcf_hdr_subset(const bcf_hdr_t *h0, int n, char *const* samples, int *imap) -{ - void *names_hash = khash_str2int_init(); - kstring_t htxt = {0,0,0}; - kstring_t str = {0,0,0}; - bcf_hdr_t *h = bcf_hdr_init("w"); - int r = 0; - if (!h || !names_hash) { - hts_log_error("Failed to allocate bcf header"); - goto err; - } - if (bcf_hdr_format(h0, 1, &htxt) < 0) { - hts_log_error("Failed to get header text"); - goto err; - } - bcf_hdr_set_version(h,bcf_hdr_get_version(h0)); - int j; - for (j=0; j 0) { - char *p = find_chrom_header_line(htxt.s); - int i = 0, end = n? 8 : 7; - while ((p = strchr(p, '\t')) != 0 && i < end) ++i, ++p; - if (i != end) { - hts_log_error("Wrong number of columns in header #CHROM line"); - goto err; - } - r |= kputsn(htxt.s, p - htxt.s, &str) < 0; - for (i = 0; i < n; ++i) { - if ( khash_str2int_has_key(names_hash,samples[i]) ) - { - hts_log_error("Duplicate sample name \"%s\"", samples[i]); - goto err; - } - imap[i] = bcf_hdr_id2int(h0, BCF_DT_SAMPLE, samples[i]); - if (imap[i] < 0) continue; - r |= kputc('\t', &str) < 0; - r |= kputs(samples[i], &str) < 0; - r |= khash_str2int_inc(names_hash,samples[i]) < 0; - } - } else r |= kputsn(htxt.s, htxt.l, &str) < 0; - while (str.l && (!str.s[str.l-1] || str.s[str.l-1]=='\n') ) str.l--; // kill trailing zeros and newlines - r |= kputc('\n',&str) < 0; - if (r) { - hts_log_error("%s", strerror(errno)); - goto err; - } - if ( bcf_hdr_parse(h, str.s) < 0 ) { - bcf_hdr_destroy(h); - h = NULL; - } - free(str.s); - free(htxt.s); - khash_str2int_destroy(names_hash); - return h; - - err: - ks_free(&str); - ks_free(&htxt); - khash_str2int_destroy(names_hash); - bcf_hdr_destroy(h); - return NULL; -} - -int bcf_hdr_set_samples(bcf_hdr_t *hdr, const char *samples, int is_file) -{ - if ( samples && !strcmp("-",samples) ) return 0; // keep all samples - - int i, narr = bit_array_size(bcf_hdr_nsamples(hdr)); - hdr->keep_samples = (uint8_t*) calloc(narr,1); - if (!hdr->keep_samples) return -1; - - hdr->nsamples_ori = bcf_hdr_nsamples(hdr); - if ( !samples ) - { - // exclude all samples - khint_t k; - vdict_t *d = (vdict_t*)hdr->dict[BCF_DT_SAMPLE], *new_dict; - new_dict = kh_init(vdict); - if (!new_dict) return -1; - - bcf_hdr_nsamples(hdr) = 0; - - for (k = kh_begin(d); k != kh_end(d); ++k) - if (kh_exist(d, k)) free((char*)kh_key(d, k)); - kh_destroy(vdict, d); - hdr->dict[BCF_DT_SAMPLE] = new_dict; - if (bcf_hdr_sync(hdr) < 0) return -1; - - return 0; - } - - if ( samples[0]=='^' ) - for (i=0; ikeep_samples,i); - - int idx, n, ret = 0; - char **smpls = hts_readlist(samples[0]=='^'?samples+1:samples, is_file, &n); - if ( !smpls ) return -1; - for (i=0; ikeep_samples, idx); - else - bit_array_set(hdr->keep_samples, idx); - } - for (i=0; insamples_ori; i++) - if ( bit_array_test(hdr->keep_samples,i) ) bcf_hdr_nsamples(hdr)++; - - if ( !bcf_hdr_nsamples(hdr) ) { free(hdr->keep_samples); hdr->keep_samples=NULL; } - else - { - // Make new list and dictionary with desired samples - char **samples = (char**) malloc(sizeof(char*)*bcf_hdr_nsamples(hdr)); - vdict_t *new_dict, *d; - int k, res; - if (!samples) return -1; - - new_dict = kh_init(vdict); - if (!new_dict) { - free(samples); - return -1; - } - idx = 0; - for (i=0; insamples_ori; i++) { - if ( bit_array_test(hdr->keep_samples,i) ) { - samples[idx] = hdr->samples[i]; - k = kh_put(vdict, new_dict, hdr->samples[i], &res); - if (res < 0) { - free(samples); - kh_destroy(vdict, new_dict); - return -1; - } - kh_val(new_dict, k) = bcf_idinfo_def; - kh_val(new_dict, k).id = idx; - idx++; - } - } - - // Delete desired samples from old dictionary, so we don't free them - d = (vdict_t*)hdr->dict[BCF_DT_SAMPLE]; - for (i=0; i < idx; i++) { - int k = kh_get(vdict, d, samples[i]); - if (k < kh_end(d)) kh_del(vdict, d, k); - } - - // Free everything else - for (k = kh_begin(d); k != kh_end(d); ++k) - if (kh_exist(d, k)) free((char*)kh_key(d, k)); - kh_destroy(vdict, d); - hdr->dict[BCF_DT_SAMPLE] = new_dict; - - free(hdr->samples); - hdr->samples = samples; - - if (bcf_hdr_sync(hdr) < 0) - return -1; - } - - return ret; -} - -int bcf_subset(const bcf_hdr_t *h, bcf1_t *v, int n, int *imap) -{ - kstring_t ind; - ind.s = 0; ind.l = ind.m = 0; - if (n) { - bcf_fmt_t fmt[MAX_N_FMT]; - int i, j; - uint8_t *ptr = (uint8_t*)v->indiv.s; - for (i = 0; i < v->n_fmt; ++i) - ptr = bcf_unpack_fmt_core1(ptr, v->n_sample, &fmt[i]); - for (i = 0; i < (int)v->n_fmt; ++i) { - bcf_fmt_t *f = &fmt[i]; - bcf_enc_int1(&ind, f->id); - bcf_enc_size(&ind, f->n, f->type); - for (j = 0; j < n; ++j) - if (imap[j] >= 0) kputsn((char*)(f->p + imap[j] * f->size), f->size, &ind); - } - for (i = j = 0; j < n; ++j) if (imap[j] >= 0) ++i; - v->n_sample = i; - } else v->n_sample = 0; - if ( !v->n_sample ) v->n_fmt = 0; - free(v->indiv.s); - v->indiv = ind; - v->unpacked &= ~BCF_UN_FMT; // only BCF is ready for output, VCF will need to unpack again - return 0; -} - -int bcf_is_snp(bcf1_t *v) -{ - int i; - bcf_unpack(v, BCF_UN_STR); - for (i = 0; i < v->n_allele; ++i) - { - if ( v->d.allele[i][1]==0 && v->d.allele[i][0]!='*' ) continue; - - // mpileup's allele, see also below. This is not completely satisfactory, - // a general library is here narrowly tailored to fit samtools. - if ( v->d.allele[i][0]=='<' && v->d.allele[i][1]=='X' && v->d.allele[i][2]=='>' ) continue; - if ( v->d.allele[i][0]=='<' && v->d.allele[i][1]=='*' && v->d.allele[i][2]=='>' ) continue; - - break; - } - return i == v->n_allele; -} - -static void bcf_set_variant_type(const char *ref, const char *alt, bcf_variant_t *var) -{ - if ( *alt == '*' && !alt[1] ) { var->n = 0; var->type = VCF_OVERLAP; return; } // overlapping variant - - // The most frequent case - if ( !ref[1] && !alt[1] ) - { - if ( *alt == '.' || *ref==*alt ) { var->n = 0; var->type = VCF_REF; return; } - if ( *alt == 'X' ) { var->n = 0; var->type = VCF_REF; return; } // mpileup's X allele shouldn't be treated as variant - var->n = 1; var->type = VCF_SNP; return; - } - if ( alt[0]=='<' ) - { - if ( alt[1]=='X' && alt[2]=='>' ) { var->n = 0; var->type = VCF_REF; return; } // mpileup's X allele shouldn't be treated as variant - if ( alt[1]=='*' && alt[2]=='>' ) { var->n = 0; var->type = VCF_REF; return; } - if ( !strcmp("NON_REF>",alt+1) ) { var->n = 0; var->type = VCF_REF; return; } - var->type = VCF_OTHER; - return; - } - - // Catch "joined before" breakend case - if ( alt[0]==']' || alt[0] == '[' ) - { - var->type = VCF_BND; return; - } - - // Iterate through alt characters that match the reference - const char *r = ref, *a = alt; - while (*r && *a && toupper_c(*r)==toupper_c(*a) ) { r++; a++; } // unfortunately, matching REF,ALT case is not guaranteed - - if ( *a && !*r ) - { - if ( *a==']' || *a=='[' ) { var->type = VCF_BND; return; } // "joined after" breakend - while ( *a ) a++; - var->n = (a-alt)-(r-ref); var->type = VCF_INDEL | VCF_INS; return; - } - else if ( *r && !*a ) - { - while ( *r ) r++; - var->n = (a-alt)-(r-ref); var->type = VCF_INDEL | VCF_DEL; return; - } - else if ( !*r && !*a ) - { - var->n = 0; var->type = VCF_REF; return; - } - - const char *re = r, *ae = a; - while ( re[1] ) re++; - while ( ae[1] ) ae++; - while ( re>r && ae>a && toupper_c(*re)==toupper_c(*ae) ) { re--; ae--; } - if ( ae==a ) - { - if ( re==r ) { var->n = 1; var->type = VCF_SNP; return; } - var->n = -(re-r); - if ( toupper_c(*re)==toupper_c(*ae) ) { var->type = VCF_INDEL | VCF_DEL; return; } - var->type = VCF_OTHER; return; - } - else if ( re==r ) - { - var->n = ae-a; - if ( toupper_c(*re)==toupper_c(*ae) ) { var->type = VCF_INDEL | VCF_INS; return; } - var->type = VCF_OTHER; return; - } - - var->type = ( re-r == ae-a ) ? VCF_MNP : VCF_OTHER; - var->n = ( re-r > ae-a ) ? -(re-r+1) : ae-a+1; - - // should do also complex events, SVs, etc... -} - -static int bcf_set_variant_types(bcf1_t *b) -{ - if ( !(b->unpacked & BCF_UN_STR) ) bcf_unpack(b, BCF_UN_STR); - bcf_dec_t *d = &b->d; - if ( d->n_var < b->n_allele ) - { - bcf_variant_t *new_var = realloc(d->var, sizeof(bcf_variant_t)*b->n_allele); - if (!new_var) - return -1; - d->var = new_var; - d->n_var = b->n_allele; - } - int i; - b->d.var_type = 0; - d->var[0].type = VCF_REF; - d->var[0].n = 0; - for (i=1; in_allele; i++) - { - bcf_set_variant_type(d->allele[0],d->allele[i], &d->var[i]); - b->d.var_type |= d->var[i].type; - //fprintf(stderr,"[set_variant_type] %d %s %s -> %d %d .. %d\n", b->pos+1,d->allele[0],d->allele[i],d->var[i].type,d->var[i].n, b->d.var_type); - } - return 0; -} - -// bcf_get_variant_type/bcf_get_variant_types should only return the following, -// to be compatible with callers that are not expecting newer values -// like VCF_INS, VCF_DEL. The full set is available from the newer -// vcf_has_variant_type* interfaces. -#define ORIG_VAR_TYPES (VCF_SNP|VCF_MNP|VCF_INDEL|VCF_OTHER|VCF_BND|VCF_OVERLAP) -int bcf_get_variant_types(bcf1_t *rec) -{ - if ( rec->d.var_type==-1 ) { - if (bcf_set_variant_types(rec) != 0) { - hts_log_error("Couldn't get variant types: %s", strerror(errno)); - exit(1); // Due to legacy API having no way to report failures - } - } - return rec->d.var_type & ORIG_VAR_TYPES; -} - -int bcf_get_variant_type(bcf1_t *rec, int ith_allele) -{ - if ( rec->d.var_type==-1 ) { - if (bcf_set_variant_types(rec) != 0) { - hts_log_error("Couldn't get variant types: %s", strerror(errno)); - exit(1); // Due to legacy API having no way to report failures - } - } - if (ith_allele < 0 || ith_allele >= rec->n_allele) { - hts_log_error("Requested allele outside valid range"); - exit(1); - } - return rec->d.var[ith_allele].type & ORIG_VAR_TYPES; -} -#undef ORIG_VAR_TYPES - -int bcf_has_variant_type(bcf1_t *rec, int ith_allele, uint32_t bitmask) -{ - if ( rec->d.var_type==-1 ) { - if (bcf_set_variant_types(rec) != 0) return -1; - } - if (ith_allele < 0 || ith_allele >= rec->n_allele) return -1; - if (bitmask == VCF_REF) { // VCF_REF is 0, so handled as a special case - return rec->d.var[ith_allele].type == VCF_REF; - } - return bitmask & rec->d.var[ith_allele].type; -} - -int bcf_variant_length(bcf1_t *rec, int ith_allele) -{ - if ( rec->d.var_type==-1 ) { - if (bcf_set_variant_types(rec) != 0) return bcf_int32_missing; - } - if (ith_allele < 0 || ith_allele >= rec->n_allele) return bcf_int32_missing; - return rec->d.var[ith_allele].n; -} - -int bcf_has_variant_types(bcf1_t *rec, uint32_t bitmask, - enum bcf_variant_match mode) -{ - if ( rec->d.var_type==-1 ) { - if (bcf_set_variant_types(rec) != 0) return -1; - } - uint32_t type = rec->d.var_type; - if ( mode==bcf_match_overlap ) return bitmask & type; - - // VCF_INDEL is always set with VCF_INS and VCF_DEL by bcf_set_variant_type[s], but the bitmask may - // ask for say `VCF_INS` or `VCF_INDEL` only - if ( bitmask&(VCF_INS|VCF_DEL) && !(bitmask&VCF_INDEL) ) type &= ~VCF_INDEL; - else if ( bitmask&VCF_INDEL && !(bitmask&(VCF_INS|VCF_DEL)) ) type &= ~(VCF_INS|VCF_DEL); - - if ( mode==bcf_match_subset ) - { - if ( ~bitmask & type ) return 0; - else return bitmask & type; - } - // mode == bcf_match_exact - return type==bitmask ? type : 0; -} - -int bcf_update_info(const bcf_hdr_t *hdr, bcf1_t *line, const char *key, const void *values, int n, int type) -{ - static int negative_rlen_warned = 0; - int is_end_tag; - - // Is the field already present? - int i, inf_id = bcf_hdr_id2int(hdr,BCF_DT_ID,key); - if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_INFO,inf_id) ) return -1; // No such INFO field in the header - if ( !(line->unpacked & BCF_UN_INFO) ) bcf_unpack(line, BCF_UN_INFO); - - is_end_tag = strcmp(key, "END") == 0; - - for (i=0; in_info; i++) - if ( inf_id==line->d.info[i].key ) break; - bcf_info_t *inf = i==line->n_info ? NULL : &line->d.info[i]; - - if ( !n || (type==BCF_HT_STR && !values) ) - { - if ( n==0 && is_end_tag ) - line->rlen = line->n_allele ? strlen(line->d.allele[0]) : 0; - if ( inf ) - { - // Mark the tag for removal, free existing memory if necessary - if ( inf->vptr_free ) - { - free(inf->vptr - inf->vptr_off); - inf->vptr_free = 0; - } - line->d.shared_dirty |= BCF1_DIRTY_INF; - inf->vptr = NULL; - inf->vptr_off = inf->vptr_len = 0; - } - return 0; - } - - if (is_end_tag) - { - if (n != 1) - { - hts_log_error("END info tag should only have one value at %s:%"PRIhts_pos, bcf_seqname_safe(hdr,line), line->pos+1); - line->errcode |= BCF_ERR_TAG_INVALID; - return -1; - } - if (type != BCF_HT_INT && type != BCF_HT_LONG) - { - hts_log_error("Wrong type (%d) for END info tag at %s:%"PRIhts_pos, type, bcf_seqname_safe(hdr,line), line->pos+1); - line->errcode |= BCF_ERR_TAG_INVALID; - return -1; - } - } - - // Encode the values and determine the size required to accommodate the values - kstring_t str = {0,0,0}; - bcf_enc_int1(&str, inf_id); - if ( type==BCF_HT_INT ) - bcf_enc_vint(&str, n, (int32_t*)values, -1); - else if ( type==BCF_HT_REAL ) - bcf_enc_vfloat(&str, n, (float*)values); - else if ( type==BCF_HT_FLAG || type==BCF_HT_STR ) - { - if ( values==NULL ) - bcf_enc_size(&str, 0, BCF_BT_NULL); - else - bcf_enc_vchar(&str, strlen((char*)values), (char*)values); - } -#ifdef VCF_ALLOW_INT64 - else if ( type==BCF_HT_LONG ) - { - if (n != 1) { - hts_log_error("Only storing a single BCF_HT_LONG value is supported at %s:%"PRIhts_pos, bcf_seqname_safe(hdr,line), line->pos+1); - abort(); - } - bcf_enc_long1(&str, *(int64_t *) values); - } -#endif - else - { - hts_log_error("The type %d not implemented yet at %s:%"PRIhts_pos, type, bcf_seqname_safe(hdr,line), line->pos+1); - abort(); - } - - // Is the INFO tag already present - if ( inf ) - { - // Is it big enough to accommodate new block? - if ( inf->vptr && str.l <= inf->vptr_len + inf->vptr_off ) - { - if ( str.l != inf->vptr_len + inf->vptr_off ) line->d.shared_dirty |= BCF1_DIRTY_INF; - uint8_t *ptr = inf->vptr - inf->vptr_off; - memcpy(ptr, str.s, str.l); - free(str.s); - int vptr_free = inf->vptr_free; - bcf_unpack_info_core1(ptr, inf); - inf->vptr_free = vptr_free; - } - else - { - if ( inf->vptr_free ) - free(inf->vptr - inf->vptr_off); - bcf_unpack_info_core1((uint8_t*)str.s, inf); - inf->vptr_free = 1; - line->d.shared_dirty |= BCF1_DIRTY_INF; - } - } - else - { - // The tag is not present, create new one - line->n_info++; - hts_expand0(bcf_info_t, line->n_info, line->d.m_info , line->d.info); - inf = &line->d.info[line->n_info-1]; - bcf_unpack_info_core1((uint8_t*)str.s, inf); - inf->vptr_free = 1; - line->d.shared_dirty |= BCF1_DIRTY_INF; - } - line->unpacked |= BCF_UN_INFO; - - if ( n==1 && is_end_tag) { - hts_pos_t end = type == BCF_HT_INT ? *(int32_t *) values : *(int64_t *) values; - if ( (type == BCF_HT_INT && end!=bcf_int32_missing) || (type == BCF_HT_LONG && end!=bcf_int64_missing) ) - { - if ( end <= line->pos ) - { - if ( !negative_rlen_warned ) - { - hts_log_warning("INFO/END=%"PRIhts_pos" is smaller than POS at %s:%"PRIhts_pos,end,bcf_seqname_safe(hdr,line),line->pos+1); - negative_rlen_warned = 1; - } - line->rlen = line->n_allele ? strlen(line->d.allele[0]) : 0; - } - else - line->rlen = end - line->pos; - } - } - return 0; -} - -int bcf_update_format_string(const bcf_hdr_t *hdr, bcf1_t *line, const char *key, const char **values, int n) -{ - if ( !n ) - return bcf_update_format(hdr,line,key,NULL,0,BCF_HT_STR); - - int i, max_len = 0; - for (i=0; i max_len ) max_len = len; - } - char *out = (char*) malloc(max_len*n); - if ( !out ) return -2; - for (i=0; iunpacked & BCF_UN_FMT) ) bcf_unpack(line, BCF_UN_FMT); - - for (i=0; in_fmt; i++) - if ( line->d.fmt[i].id==fmt_id ) break; - bcf_fmt_t *fmt = i==line->n_fmt ? NULL : &line->d.fmt[i]; - - if ( !n ) - { - if ( fmt ) - { - // Mark the tag for removal, free existing memory if necessary - if ( fmt->p_free ) - { - free(fmt->p - fmt->p_off); - fmt->p_free = 0; - } - line->d.indiv_dirty = 1; - fmt->p = NULL; - } - return 0; - } - - line->n_sample = bcf_hdr_nsamples(hdr); - int nps = n / line->n_sample; // number of values per sample - assert( nps && nps*line->n_sample==n ); // must be divisible by n_sample - - // Encode the values and determine the size required to accommodate the values - kstring_t str = {0,0,0}; - bcf_enc_int1(&str, fmt_id); - assert(values != NULL); - if ( type==BCF_HT_INT ) - bcf_enc_vint(&str, n, (int32_t*)values, nps); - else if ( type==BCF_HT_REAL ) - { - bcf_enc_size(&str, nps, BCF_BT_FLOAT); - serialize_float_array(&str, nps*line->n_sample, (float *) values); - } - else if ( type==BCF_HT_STR ) - { - bcf_enc_size(&str, nps, BCF_BT_CHAR); - kputsn((char*)values, nps*line->n_sample, &str); - } - else - { - hts_log_error("The type %d not implemented yet at %s:%"PRIhts_pos, type, bcf_seqname_safe(hdr,line), line->pos+1); - abort(); - } - - if ( !fmt ) - { - // Not present, new format field - line->n_fmt++; - hts_expand0(bcf_fmt_t, line->n_fmt, line->d.m_fmt, line->d.fmt); - - // Special case: VCF specification requires that GT is always first - if ( line->n_fmt > 1 && key[0]=='G' && key[1]=='T' && !key[2] ) - { - for (i=line->n_fmt-1; i>0; i--) - line->d.fmt[i] = line->d.fmt[i-1]; - fmt = &line->d.fmt[0]; - } - else - fmt = &line->d.fmt[line->n_fmt-1]; - bcf_unpack_fmt_core1((uint8_t*)str.s, line->n_sample, fmt); - line->d.indiv_dirty = 1; - fmt->p_free = 1; - } - else - { - // The tag is already present, check if it is big enough to accommodate the new block - if ( fmt->p && str.l <= fmt->p_len + fmt->p_off ) - { - // good, the block is big enough - if ( str.l != fmt->p_len + fmt->p_off ) line->d.indiv_dirty = 1; - uint8_t *ptr = fmt->p - fmt->p_off; - memcpy(ptr, str.s, str.l); - free(str.s); - int p_free = fmt->p_free; - bcf_unpack_fmt_core1(ptr, line->n_sample, fmt); - fmt->p_free = p_free; - } - else - { - if ( fmt->p_free ) - free(fmt->p - fmt->p_off); - bcf_unpack_fmt_core1((uint8_t*)str.s, line->n_sample, fmt); - fmt->p_free = 1; - line->d.indiv_dirty = 1; - } - } - line->unpacked |= BCF_UN_FMT; - return 0; -} - - -int bcf_update_filter(const bcf_hdr_t *hdr, bcf1_t *line, int *flt_ids, int n) -{ - if ( !(line->unpacked & BCF_UN_FLT) ) bcf_unpack(line, BCF_UN_FLT); - line->d.shared_dirty |= BCF1_DIRTY_FLT; - line->d.n_flt = n; - if ( !n ) return 0; - hts_expand(int, line->d.n_flt, line->d.m_flt, line->d.flt); - int i; - for (i=0; id.flt[i] = flt_ids[i]; - return 0; -} - -int bcf_add_filter(const bcf_hdr_t *hdr, bcf1_t *line, int flt_id) -{ - if ( !(line->unpacked & BCF_UN_FLT) ) bcf_unpack(line, BCF_UN_FLT); - int i; - for (i=0; id.n_flt; i++) - if ( flt_id==line->d.flt[i] ) break; - if ( id.n_flt ) return 0; // this filter is already set - line->d.shared_dirty |= BCF1_DIRTY_FLT; - if ( flt_id==0 ) // set to PASS - line->d.n_flt = 1; - else if ( line->d.n_flt==1 && line->d.flt[0]==0 ) - line->d.n_flt = 1; - else - line->d.n_flt++; - hts_expand(int, line->d.n_flt, line->d.m_flt, line->d.flt); - line->d.flt[line->d.n_flt-1] = flt_id; - return 1; -} -int bcf_remove_filter(const bcf_hdr_t *hdr, bcf1_t *line, int flt_id, int pass) -{ - if ( !(line->unpacked & BCF_UN_FLT) ) bcf_unpack(line, BCF_UN_FLT); - int i; - for (i=0; id.n_flt; i++) - if ( flt_id==line->d.flt[i] ) break; - if ( i==line->d.n_flt ) return 0; // the filter is not present - line->d.shared_dirty |= BCF1_DIRTY_FLT; - if ( i!=line->d.n_flt-1 ) memmove(line->d.flt+i,line->d.flt+i+1,(line->d.n_flt-i-1)*sizeof(*line->d.flt)); - line->d.n_flt--; - if ( !line->d.n_flt && pass ) bcf_add_filter(hdr,line,0); - return 0; -} - -int bcf_has_filter(const bcf_hdr_t *hdr, bcf1_t *line, char *filter) -{ - if ( filter[0]=='.' && !filter[1] ) filter = "PASS"; - int id = bcf_hdr_id2int(hdr, BCF_DT_ID, filter); - if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_FLT,id) ) return -1; // not defined in the header - - if ( !(line->unpacked & BCF_UN_FLT) ) bcf_unpack(line, BCF_UN_FLT); - if ( id==0 && !line->d.n_flt) return 1; // PASS - - int i; - for (i=0; id.n_flt; i++) - if ( line->d.flt[i]==id ) return 1; - return 0; -} - -static inline int _bcf1_sync_alleles(const bcf_hdr_t *hdr, bcf1_t *line, int nals) -{ - line->d.shared_dirty |= BCF1_DIRTY_ALS; - - line->n_allele = nals; - hts_expand(char*, line->n_allele, line->d.m_allele, line->d.allele); - - char *als = line->d.als; - int n = 0; - while (nd.allele[n] = als; - while ( *als ) als++; - als++; - n++; - } - - // Update REF length. Note that END is 1-based while line->pos 0-based - bcf_info_t *end_info = bcf_get_info(hdr,line,"END"); - if ( end_info ) - { - if ( end_info->type==BCF_HT_INT && end_info->v1.i==bcf_int32_missing ) end_info = NULL; - else if ( end_info->type==BCF_HT_LONG && end_info->v1.i==bcf_int64_missing ) end_info = NULL; - } - if ( end_info && end_info->v1.i > line->pos ) - line->rlen = end_info->v1.i - line->pos; - else if ( nals > 0 ) - line->rlen = strlen(line->d.allele[0]); - else - line->rlen = 0; - - return 0; -} -int bcf_update_alleles(const bcf_hdr_t *hdr, bcf1_t *line, const char **alleles, int nals) -{ - if ( !(line->unpacked & BCF_UN_STR) ) bcf_unpack(line, BCF_UN_STR); - char *free_old = NULL; - char buffer[256]; - size_t used = 0; - - // The pointers in alleles may point into the existing line->d.als memory, - // so care needs to be taken not to clobber them while updating. Usually - // they will be short so we can copy through an intermediate buffer. - // If they're longer, or won't fit in the existing allocation we - // can allocate a new buffer to write into. Note that in either case - // pointers to line->d.als memory in alleles may not be valid when we've - // finished. - int i; - size_t avail = line->d.m_als < sizeof(buffer) ? line->d.m_als : sizeof(buffer); - for (i=0; id.m_als) // Don't shrink the buffer - needed = line->d.m_als; - if (needed > INT_MAX) { - hts_log_error("REF + alleles too long to fit in a BCF record"); - return -1; - } - new_als = malloc(needed); - if (!new_als) - return -1; - free_old = line->d.als; - line->d.als = new_als; - line->d.m_als = needed; - } - - // Copy from the temp buffer to the destination - if (used) { - assert(used <= line->d.m_als); - memcpy(line->d.als, buffer, used); - } - - // Add in any remaining entries - if this happens we will always be - // writing to a newly-allocated buffer. - for (; i < nals; i++) { - size_t sz = strlen(alleles[i]) + 1; - memcpy(line->d.als + used, alleles[i], sz); - used += sz; - } - - if (free_old) - free(free_old); - return _bcf1_sync_alleles(hdr,line,nals); -} - -int bcf_update_alleles_str(const bcf_hdr_t *hdr, bcf1_t *line, const char *alleles_string) -{ - if ( !(line->unpacked & BCF_UN_STR) ) bcf_unpack(line, BCF_UN_STR); - kstring_t tmp; - tmp.l = 0; tmp.s = line->d.als; tmp.m = line->d.m_als; - kputs(alleles_string, &tmp); - line->d.als = tmp.s; line->d.m_als = tmp.m; - - int nals = 1; - char *t = line->d.als; - while (*t) - { - if ( *t==',' ) { *t = 0; nals++; } - t++; - } - return _bcf1_sync_alleles(hdr, line, nals); -} - -int bcf_update_id(const bcf_hdr_t *hdr, bcf1_t *line, const char *id) -{ - if ( !(line->unpacked & BCF_UN_STR) ) bcf_unpack(line, BCF_UN_STR); - kstring_t tmp; - tmp.l = 0; tmp.s = line->d.id; tmp.m = line->d.m_id; - if ( id ) - kputs(id, &tmp); - else - kputs(".", &tmp); - line->d.id = tmp.s; line->d.m_id = tmp.m; - line->d.shared_dirty |= BCF1_DIRTY_ID; - return 0; -} - -int bcf_add_id(const bcf_hdr_t *hdr, bcf1_t *line, const char *id) -{ - if ( !id ) return 0; - if ( !(line->unpacked & BCF_UN_STR) ) bcf_unpack(line, BCF_UN_STR); - - kstring_t tmp; - tmp.l = 0; tmp.s = line->d.id; tmp.m = line->d.m_id; - - int len = strlen(id); - char *dst = line->d.id; - while ( *dst && (dst=strstr(dst,id)) ) - { - if ( dst[len]!=0 && dst[len]!=';' ) dst++; // a prefix, not a match - else if ( dst==line->d.id || dst[-1]==';' ) return 0; // already present - dst++; // a suffix, not a match - } - if ( line->d.id && (line->d.id[0]!='.' || line->d.id[1]) ) - { - tmp.l = strlen(line->d.id); - kputc(';',&tmp); - } - kputs(id,&tmp); - - line->d.id = tmp.s; line->d.m_id = tmp.m; - line->d.shared_dirty |= BCF1_DIRTY_ID; - return 0; - -} - -bcf_fmt_t *bcf_get_fmt(const bcf_hdr_t *hdr, bcf1_t *line, const char *key) -{ - int id = bcf_hdr_id2int(hdr, BCF_DT_ID, key); - if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_FMT,id) ) return NULL; // no such FMT field in the header - return bcf_get_fmt_id(line, id); -} - -bcf_info_t *bcf_get_info(const bcf_hdr_t *hdr, bcf1_t *line, const char *key) -{ - int id = bcf_hdr_id2int(hdr, BCF_DT_ID, key); - if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_INFO,id) ) return NULL; // no such INFO field in the header - return bcf_get_info_id(line, id); -} - -bcf_fmt_t *bcf_get_fmt_id(bcf1_t *line, const int id) -{ - int i; - if ( !(line->unpacked & BCF_UN_FMT) ) bcf_unpack(line, BCF_UN_FMT); - for (i=0; in_fmt; i++) - { - if ( line->d.fmt[i].id==id ) return &line->d.fmt[i]; - } - return NULL; -} - -bcf_info_t *bcf_get_info_id(bcf1_t *line, const int id) -{ - int i; - if ( !(line->unpacked & BCF_UN_INFO) ) bcf_unpack(line, BCF_UN_INFO); - for (i=0; in_info; i++) - { - if ( line->d.info[i].key==id ) return &line->d.info[i]; - } - return NULL; -} - - -int bcf_get_info_values(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, void **dst, int *ndst, int type) -{ - int i, ret = -4, tag_id = bcf_hdr_id2int(hdr, BCF_DT_ID, tag); - if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_INFO,tag_id) ) return -1; // no such INFO field in the header - if ( bcf_hdr_id2type(hdr,BCF_HL_INFO,tag_id)!=(type & 0xff) ) return -2; // expected different type - - if ( !(line->unpacked & BCF_UN_INFO) ) bcf_unpack(line, BCF_UN_INFO); - - for (i=0; in_info; i++) - if ( line->d.info[i].key==tag_id ) break; - if ( i==line->n_info ) return ( type==BCF_HT_FLAG ) ? 0 : -3; // the tag is not present in this record - if ( type==BCF_HT_FLAG ) return 1; - - bcf_info_t *info = &line->d.info[i]; - if ( !info->vptr ) return -3; // the tag was marked for removal - if ( type==BCF_HT_STR ) - { - if ( *ndst < info->len+1 ) - { - *ndst = info->len + 1; - *dst = realloc(*dst, *ndst); - } - memcpy(*dst,info->vptr,info->len); - ((uint8_t*)*dst)[info->len] = 0; - return info->len; - } - - // Make sure the buffer is big enough - int size1; - switch (type) { - case BCF_HT_INT: size1 = sizeof(int32_t); break; - case BCF_HT_LONG: size1 = sizeof(int64_t); break; - case BCF_HT_REAL: size1 = sizeof(float); break; - default: - hts_log_error("Unexpected output type %d at %s:%"PRIhts_pos, type, bcf_seqname_safe(hdr,line), line->pos+1); - return -2; - } - if ( *ndst < info->len ) - { - *ndst = info->len; - *dst = realloc(*dst, *ndst * size1); - } - - #define BRANCH(type_t, convert, is_missing, is_vector_end, set_missing, set_regular, out_type_t) do { \ - out_type_t *tmp = (out_type_t *) *dst; \ - int j; \ - for (j=0; jlen; j++) \ - { \ - type_t p = convert(info->vptr + j * sizeof(type_t)); \ - if ( is_vector_end ) break; \ - if ( is_missing ) set_missing; \ - else set_regular; \ - tmp++; \ - } \ - ret = j; \ - } while (0) - switch (info->type) { - case BCF_BT_INT8: - if (type == BCF_HT_LONG) { - BRANCH(int8_t, le_to_i8, p==bcf_int8_missing, p==bcf_int8_vector_end, *tmp=bcf_int64_missing, *tmp=p, int64_t); - } else { - BRANCH(int8_t, le_to_i8, p==bcf_int8_missing, p==bcf_int8_vector_end, *tmp=bcf_int32_missing, *tmp=p, int32_t); - } - break; - case BCF_BT_INT16: - if (type == BCF_HT_LONG) { - BRANCH(int16_t, le_to_i16, p==bcf_int16_missing, p==bcf_int16_vector_end, *tmp=bcf_int64_missing, *tmp=p, int64_t); - } else { - BRANCH(int16_t, le_to_i16, p==bcf_int16_missing, p==bcf_int16_vector_end, *tmp=bcf_int32_missing, *tmp=p, int32_t); - } - break; - case BCF_BT_INT32: - if (type == BCF_HT_LONG) { - BRANCH(int32_t, le_to_i32, p==bcf_int32_missing, p==bcf_int32_vector_end, *tmp=bcf_int64_missing, *tmp=p, int64_t); break; - } else { - BRANCH(int32_t, le_to_i32, p==bcf_int32_missing, p==bcf_int32_vector_end, *tmp=bcf_int32_missing, *tmp=p, int32_t); break; - } - case BCF_BT_FLOAT: BRANCH(uint32_t, le_to_u32, p==bcf_float_missing, p==bcf_float_vector_end, bcf_float_set_missing(*tmp), bcf_float_set(tmp, p), float); break; - default: hts_log_error("Unexpected type %d at %s:%"PRIhts_pos, info->type, bcf_seqname_safe(hdr,line), line->pos+1); return -2; - } - #undef BRANCH - return ret; // set by BRANCH -} - -int bcf_get_format_string(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, char ***dst, int *ndst) -{ - int i,tag_id = bcf_hdr_id2int(hdr, BCF_DT_ID, tag); - if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_FMT,tag_id) ) return -1; // no such FORMAT field in the header - if ( bcf_hdr_id2type(hdr,BCF_HL_FMT,tag_id)!=BCF_HT_STR ) return -2; // expected different type - - if ( !(line->unpacked & BCF_UN_FMT) ) bcf_unpack(line, BCF_UN_FMT); - - for (i=0; in_fmt; i++) - if ( line->d.fmt[i].id==tag_id ) break; - if ( i==line->n_fmt ) return -3; // the tag is not present in this record - bcf_fmt_t *fmt = &line->d.fmt[i]; - if ( !fmt->p ) return -3; // the tag was marked for removal - - int nsmpl = bcf_hdr_nsamples(hdr); - if ( !*dst ) - { - *dst = (char**) malloc(sizeof(char*)*nsmpl); - if ( !*dst ) return -4; // could not alloc - (*dst)[0] = NULL; - } - int n = (fmt->n+1)*nsmpl; - if ( *ndst < n ) - { - (*dst)[0] = realloc((*dst)[0], n); - if ( !(*dst)[0] ) return -4; // could not alloc - *ndst = n; - } - for (i=0; ip + i*fmt->n; - uint8_t *tmp = (uint8_t*)(*dst)[0] + i*(fmt->n+1); - memcpy(tmp,src,fmt->n); - tmp[fmt->n] = 0; - (*dst)[i] = (char*) tmp; - } - return n; -} - -int bcf_get_format_values(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, void **dst, int *ndst, int type) -{ - int i,j, tag_id = bcf_hdr_id2int(hdr, BCF_DT_ID, tag); - if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_FMT,tag_id) ) return -1; // no such FORMAT field in the header - if ( tag[0]=='G' && tag[1]=='T' && tag[2]==0 ) - { - // Ugly: GT field is considered to be a string by the VCF header but BCF represents it as INT. - if ( bcf_hdr_id2type(hdr,BCF_HL_FMT,tag_id)!=BCF_HT_STR ) return -2; - } - else if ( bcf_hdr_id2type(hdr,BCF_HL_FMT,tag_id)!=type ) return -2; // expected different type - - if ( !(line->unpacked & BCF_UN_FMT) ) bcf_unpack(line, BCF_UN_FMT); - - for (i=0; in_fmt; i++) - if ( line->d.fmt[i].id==tag_id ) break; - if ( i==line->n_fmt ) return -3; // the tag is not present in this record - bcf_fmt_t *fmt = &line->d.fmt[i]; - if ( !fmt->p ) return -3; // the tag was marked for removal - - if ( type==BCF_HT_STR ) - { - int n = fmt->n*bcf_hdr_nsamples(hdr); - if ( *ndst < n ) - { - *dst = realloc(*dst, n); - if ( !*dst ) return -4; // could not alloc - *ndst = n; - } - memcpy(*dst,fmt->p,n); - return n; - } - - // Make sure the buffer is big enough - int nsmpl = bcf_hdr_nsamples(hdr); - int size1 = type==BCF_HT_INT ? sizeof(int32_t) : sizeof(float); - if ( *ndst < fmt->n*nsmpl ) - { - *ndst = fmt->n*nsmpl; - *dst = realloc(*dst, *ndst*size1); - if ( !*dst ) return -4; // could not alloc - } - - #define BRANCH(type_t, convert, is_missing, is_vector_end, set_missing, set_vector_end, set_regular, out_type_t) { \ - out_type_t *tmp = (out_type_t *) *dst; \ - uint8_t *fmt_p = fmt->p; \ - for (i=0; in; j++) \ - { \ - type_t p = convert(fmt_p + j * sizeof(type_t)); \ - if ( is_missing ) set_missing; \ - else if ( is_vector_end ) { set_vector_end; break; } \ - else set_regular; \ - tmp++; \ - } \ - for (; jn; j++) { set_vector_end; tmp++; } \ - fmt_p += fmt->size; \ - } \ - } - switch (fmt->type) { - case BCF_BT_INT8: BRANCH(int8_t, le_to_i8, p==bcf_int8_missing, p==bcf_int8_vector_end, *tmp=bcf_int32_missing, *tmp=bcf_int32_vector_end, *tmp=p, int32_t); break; - case BCF_BT_INT16: BRANCH(int16_t, le_to_i16, p==bcf_int16_missing, p==bcf_int16_vector_end, *tmp=bcf_int32_missing, *tmp=bcf_int32_vector_end, *tmp=p, int32_t); break; - case BCF_BT_INT32: BRANCH(int32_t, le_to_i32, p==bcf_int32_missing, p==bcf_int32_vector_end, *tmp=bcf_int32_missing, *tmp=bcf_int32_vector_end, *tmp=p, int32_t); break; - case BCF_BT_FLOAT: BRANCH(uint32_t, le_to_u32, p==bcf_float_missing, p==bcf_float_vector_end, bcf_float_set_missing(*tmp), bcf_float_set_vector_end(*tmp), bcf_float_set(tmp, p), float); break; - default: hts_log_error("Unexpected type %d at %s:%"PRIhts_pos, fmt->type, bcf_seqname_safe(hdr,line), line->pos+1); exit(1); - } - #undef BRANCH - return nsmpl*fmt->n; -} - -//error description structure definition -typedef struct err_desc { - int errorcode; - const char *description; -}err_desc; - -// error descriptions -static const err_desc errdesc_bcf[] = { - { BCF_ERR_CTG_UNDEF, "Contig not defined in header"}, - { BCF_ERR_TAG_UNDEF, "Tag not defined in header" }, - { BCF_ERR_NCOLS, "Incorrect number of columns" }, - { BCF_ERR_LIMITS, "Limits reached" }, - { BCF_ERR_CHAR, "Invalid character" }, - { BCF_ERR_CTG_INVALID, "Invalid contig" }, - { BCF_ERR_TAG_INVALID, "Invalid tag" }, -}; - -/// append given description to buffer based on available size and add ... when not enough space - /** @param buffer buffer to which description to be appended - @param offset offset at which to be appended - @param maxbuffer maximum size of the buffer - @param description the description to be appended -on failure returns -1 - when buffer is not big enough; returns -1 on invalid params and on too small buffer which are improbable due to validation at caller site -on success returns 0 - */ -static int add_desc_to_buffer(char *buffer, size_t *offset, size_t maxbuffer, const char *description) { - - if (!description || !buffer || !offset || (maxbuffer < 4)) - return -1; - - size_t rembuffer = maxbuffer - *offset; - if (rembuffer > (strlen(description) + (rembuffer == maxbuffer ? 0 : 1))) { //add description with optionally required ',' - *offset += snprintf(buffer + *offset, rembuffer, "%s%s", (rembuffer == maxbuffer)? "": ",", description); - } else { //not enough space for description, put ... - size_t tmppos = (rembuffer <= 4) ? maxbuffer - 4 : *offset; - snprintf(buffer + tmppos, 4, "..."); //ignore offset update - return -1; - } - return 0; -} - -//get description for given error code. return NULL on error -const char *bcf_strerror(int errorcode, char *buffer, size_t maxbuffer) { - size_t usedup = 0; - int ret = 0; - int idx; - - if (!buffer || maxbuffer < 4) - return NULL; //invalid / insufficient buffer - - if (!errorcode) { - buffer[0] = '\0'; //no error, set null - return buffer; - } - - for (idx = 0; idx < sizeof(errdesc_bcf) / sizeof(err_desc); ++idx) { - if (errorcode & errdesc_bcf[idx].errorcode) { //error is set, add description - ret = add_desc_to_buffer(buffer, &usedup, maxbuffer, errdesc_bcf[idx].description); - if (ret < 0) - break; //not enough space, ... added, no need to continue - - errorcode &= ~errdesc_bcf[idx].errorcode; //reset the error - } - } - - if (errorcode && (ret >= 0)) { //undescribed error is present in error code and had enough buffer, try to add unkonwn error as well§ - add_desc_to_buffer(buffer, &usedup, maxbuffer, "Unknown error"); - } - return buffer; -} - diff --git a/src/htslib-1.18/version.sh b/src/htslib-1.18/version.sh deleted file mode 100755 index 7cb5c17..0000000 --- a/src/htslib-1.18/version.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/sh -# version.sh -- Script to build the htslib version string -# -# Author : James Bonfield -# -# Copyright (C) 2017-2018, 2021 Genome Research Ltd. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -# Master version, for use in tarballs or non-git source copies -VERSION=1.18 - -# If we have a git clone, then check against the current tag -srcdir=${0%/version.sh} -if [ -e $srcdir/.git ] -then - # If we ever get to 10.x this will need to be more liberal - v=`cd $srcdir && git describe --always --match '[0-9].[0-9]*' --dirty` - case $v in - [0-9]*.[0-9]*) VERSION="$v" ;; - [0-9a-f][0-9a-f]*) VERSION="$VERSION-1-g$v" ;; - esac -fi - -# Numeric version is for use in .dylib or .so libraries -# -# Follows the same logic from the Makefile commit c2e93911 -# as non-numeric versions get bumped to patch level 255 to indicate -# an unknown value. -if [ "$1" = "numeric" ] -then - v1=`expr "$VERSION" : '\([0-9]*\)'` - v2=`expr "$VERSION" : '[0-9]*.\([0-9]*\)'` - v3=`expr "$VERSION" : '[0-9]*.[0-9]*.\([0-9]*\)'` - if [ -z "`expr "$VERSION" : '\([0-9.]*\)$'`" ] - then - VERSION="$v1.$v2.255" - else - VERSION="$v1.$v2${v3:+.}$v3" - fi -fi - -echo $VERSION diff --git a/src/htslib-1.19.1/INSTALL b/src/htslib-1.19.1/INSTALL deleted file mode 100644 index e0fddd9..0000000 --- a/src/htslib-1.19.1/INSTALL +++ /dev/null @@ -1,309 +0,0 @@ - Building and Installing HTSlib - ============================== - -Requirements -============ - -Building HTSlib requires a few programs and libraries to be present. -See the "System Specific Details" below for guidance on how to install -these. - -At least the following are required: - - GNU make - C compiler (e.g. gcc or clang) - -In addition, building the configure script requires: - - autoheader - autoconf - autoreconf - -Running the configure script uses awk, along with a number of -standard UNIX tools (cat, cp, grep, mv, rm, sed, among others). Almost -all installations will have these already. - -Running the test harness (make test) uses: - - bash - perl - -HTSlib uses the following external libraries. Building requires both the -library itself, and include files needed to compile code that uses functions -from the library. Note that some Linux distributions put include files in -a development ('-dev' or '-devel') package separate from the main library. - - zlib (required) - libbz2 (required, unless configured with --disable-bz2) - liblzma (required, unless configured with --disable-lzma) - libcurl (optional, but strongly recommended) - libcrypto (optional for Amazon S3 support; not needed on MacOS) - -Disabling libbzip2 and liblzma will make some CRAM files unreadable, so -is not recommended. - -Using libcurl provides HTSlib with network protocol support, for -example it enables the use of ftp://, http://, and https:// URLs. -It is also required if direct access to Amazon S3 or Google Cloud -Storage is enabled. - -Amazon S3 support requires an HMAC function to calculate a message -authentication code. On MacOS, the CCHmac function from the standard -library is used. Systems that do not have CCHmac will get this from -libcrypto. libcrypto is part of OpenSSL or one of its derivatives (LibreSSL -or BoringSSL). - -On Microsoft Windows we recommend use of Mingw64/Msys2. Whilst the -code may work on Windows with other environments, these have not been -verified. Use of the configure script is a requirement too. - -Update htscodecs submodule -========================== - -Note that this section only applies to git checkouts. If you're building -from a release tar file, you can skip this section. - -Some parts of HTSlib are provided by the external "htscodecs" project. This -is included as a submodule. When building from the git repository, -either clone the project using "git clone --recurse-submodules", or run: - - git submodule update --init --recursive - -to ensure the correct version of the submodule is present. - -It is also possible to link against an external libhtscodecs library -by using the '--with-external-htscodecs' configure option. When -this is used, the submodule files will be ignored. - -Building Configure -================== - -This step is only needed if configure.ac has been changed, or if configure -does not exist (for example, when building from a git clone). The -configure script and config.h.in can be built by running: - - autoreconf -i - -Basic Installation -================== - -To build and install HTSlib, 'cd' to the htslib-1.x directory containing -the package's source and type the following commands: - - ./configure - make - make install - -The './configure' command checks your build environment and allows various -optional functionality to be enabled (see Configuration below). If you -don't want to select any optional functionality, you may wish to omit -configure and just type 'make; make install' as for previous versions -of HTSlib. However if the build fails you should run './configure' as -it can diagnose the common reasons for build failures. - -The 'make' command builds the HTSlib library and various useful -utilities: bgzip, htsfile, and tabix. If compilation fails you should -run './configure' as it can diagnose problems with your build environment -that cause build failures. - -The 'make install' command installs the libraries, library header files, -utilities, several manual pages, and a pkgconfig file to /usr/local. -The installation location can be changed by configuring with --prefix=DIR -or via 'make prefix=DIR install' (see Installation Locations below). -Shared library permissions can be set via e.g. 'make install LIB_PERM=755'. - - -Configuration -============= - -By default, './configure' examines your build environment, checking for -requirements such as the zlib development files, and arranges for a plain -HTSlib build. The following configure options can be used to enable -various features and specify further optional external requirements: - ---enable-plugins - Use plugins to implement exotic file access protocols and other - specialised facilities. This enables such facilities to be developed - and packaged outwith HTSlib, and somewhat isolates HTSlib-using programs - from their library dependencies. By default (or with --disable-plugins), - any enabled pluggable facilities (such as libcurl file access) are built - directly within HTSlib. - - Programs that are statically linked to a libhts.a with plugins enabled - need to be linked using -rdynamic or a similar linker option. - - The repository contains - several additional plugins, including the iRODS () - file access plugin previously distributed with HTSlib. - ---with-plugin-dir=DIR - Specifies the directory into which plugins built while building HTSlib - should be installed; by default, LIBEXECDIR/htslib. - ---with-plugin-path=DIR:DIR:DIR... - Specifies the list of directories that HTSlib will search for plugins. - By default, only the directory specified via --with-plugin-dir will be - searched; you can use --with-plugin-path='DIR:$(plugindir):DIR' and so - on to cause additional directories to be searched. - ---with-external-htscodecs - Build and link against an external copy of the htscodecs library - instead of using the source files in the htscodecs directory. - ---enable-libcurl - Use libcurl () to implement network access to - remote files via FTP, HTTP, HTTPS, etc. By default or with - --enable-libcurl=check, configure will probe for libcurl and include - this functionality if libcurl is available. Use --disable-libcurl - to prevent this. - ---enable-gcs - Implement network access to Google Cloud Storage. By default or with - --enable-gcs=check, this is enabled when libcurl is enabled. - ---enable-s3 - Implement network access to Amazon AWS S3. By default or with - --enable-s3=check, this is enabled when libcurl is enabled. - ---disable-bz2 - Bzip2 is an optional compression codec format for CRAM, included - in HTSlib by default. It can be disabled with --disable-bz2, but - be aware that not all CRAM files may be possible to decode. - ---disable-lzma - LZMA is an optional compression codec for CRAM, included in HTSlib - by default. It can be disabled with --disable-lzma, but be aware - that not all CRAM files may be possible to decode. - ---with-libdeflate - Libdeflate is a heavily optimized library for DEFLATE-based compression - and decompression. It also includes a fast crc32 implementation. - By default, ./configure will probe for libdeflate and use it if - available. To prevent this, use --without-libdeflate. - -Each --enable-FEATURE/--disable-FEATURE/--with-PACKAGE/--without-PACKAGE -option listed also has an opposite, e.g., --without-external-htscodecs -or --disable-plugins. However, apart from those options for which the -default is to probe for related facilities, using these opposite options -is mostly unnecessary as they just select the default configure behaviour. - -The configure script also accepts the usual options and environment variables -for tuning installation locations and compilers: type './configure --help' -for details. For example, - - ./configure CC=icc --prefix=/opt/icc-compiled - -would specify that HTSlib is to be built with icc and installed into bin, -lib, etc subdirectories under /opt/icc-compiled. - -If dependencies have been installed in non-standard locations (i.e. not on -the normal include and library search paths) then the CPPFLAGS and LDFLAGS -environment variables can be used to set the options needed to find them. -For example, NetBSD users may use: - - ./configure CPPFLAGS=-I/usr/pkg/include \ - LDFLAGS='-L/usr/pkg/lib -Wl,-R/usr/pkg/lib' - -to allow compiling and linking against dependencies installed via the ports -collection. - -Installation Locations -====================== - -By default, 'make install' installs HTSlib libraries under /usr/local/lib, -HTSlib header files under /usr/local/include, utility programs under -/usr/local/bin, etc. (To be precise, the header files are installed within -a fixed 'htslib' subdirectory under the specified .../include location.) - -You can specify a different location to install HTSlib by configuring -with --prefix=DIR or specify locations for particular parts of HTSlib by -configuring with --libdir=DIR and so on. Type './configure --help' for -the full list of such install directory options. - -Alternatively you can specify different locations at install time by -typing 'make prefix=DIR install' or 'make libdir=DIR install' and so on. -Consult the list of prefix/exec_prefix/etc variables near the top of the -Makefile for the full list of such variables that can be overridden. - -You can also specify a staging area by typing 'make DESTDIR=DIR install', -possibly in conjunction with other --prefix or prefix=DIR settings. -For example, - - make DESTDIR=/tmp/staging prefix=/opt - -would install into bin, lib, etc subdirectories under /tmp/staging/opt. - - -System Specific Details -======================= - -Installing the prerequisites is system dependent and there is more -than one correct way of satisfying these, including downloading them -from source, compiling and installing them yourself. - -For people with super-user access, we provide an example set of commands -below for installing the dependencies on a variety of operating system -distributions. Note these are not specific recommendations on distribution, -compiler or SSL implementation. It is assumed you already have the core set -of packages for the given distribution - the lists may be incomplete if -this is not the case. - -Debian / Ubuntu ---------------- - -sudo apt-get update # Ensure the package list is up to date -sudo apt-get install autoconf automake make gcc perl zlib1g-dev libbz2-dev liblzma-dev libcurl4-gnutls-dev libssl-dev - -Note: libcurl4-openssl-dev can be used as an alternative to libcurl4-gnutls-dev. - -RedHat / CentOS ---------------- - -sudo yum install autoconf automake make gcc perl-Data-Dumper zlib-devel bzip2 bzip2-devel xz-devel curl-devel openssl-devel - -Note: On some versions perl FindBin will need to be installed to make the tests work. - -sudo yum install perl-FindBin - -Alpine Linux ------------- - -doas apk update # Ensure the package list is up to date -doas apk add autoconf automake make gcc musl-dev perl bash zlib-dev bzip2-dev xz-dev curl-dev openssl-dev - -Note: some older Alpine versions use libressl-dev rather than openssl-dev. - -OpenSUSE --------- - -sudo zypper install autoconf automake make gcc perl zlib-devel libbz2-devel xz-devel libcurl-devel libopenssl-devel - -Windows MSYS2/MINGW64 ---------------------- - -The configure script must be used as without it the compilation will -likely fail. - -Follow MSYS2 installation instructions at -https://www.msys2.org/wiki/MSYS2-installation/ - -Then relaunch to MSYS2 shell using the "MSYS2 MinGW x64" executable. -Once in that environment (check $MSYSTEM equals "MINGW64") install the -compilers using pacman -S and the following package list: - -base-devel mingw-w64-x86_64-toolchain -mingw-w64-x86_64-libdeflate mingw-w64-x86_64-zlib mingw-w64-x86_64-bzip2 -mingw-w64-x86_64-xz mingw-w64-x86_64-curl mingw-w64-x86_64-autotools -mingw-w64-x86_64-tools-git - -(The last is only needed for building libraries compatible with MSVC.) - -HP-UX ------ - -HP-UX requires that shared libraries have execute permission. The -default for HTSlib is to install with permission 644 (read-write for -owner and read-only for group / other). This can be overridden by -setting the LIB_PERM variable at install time with: - - make install LIB_PERM=755 diff --git a/src/htslib-1.19.1/Makefile b/src/htslib-1.19.1/Makefile deleted file mode 100644 index c5aa217..0000000 --- a/src/htslib-1.19.1/Makefile +++ /dev/null @@ -1,972 +0,0 @@ -# Makefile for htslib, a C library for high-throughput sequencing data formats. -# -# Copyright (C) 2013-2023 Genome Research Ltd. -# -# Author: John Marshall -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -CC = gcc -AR = ar -RANLIB = ranlib - -# Default libraries to link if configure is not used -htslib_default_libs = -lz -lm -lbz2 -llzma -lcurl - -CPPFLAGS = -# TODO: make the 64-bit support for VCF optional via configure, for now add -DVCF_ALLOW_INT64 -# to CFLAGS manually, here or in config.mk if the latter exists. -# TODO: probably update cram code to make it compile cleanly with -Wc++-compat -# For testing strict C99 support add -std=c99 -D_XOPEN_SOURCE=600 -#CFLAGS = -g -Wall -O2 -pedantic -std=c99 -D_XOPEN_SOURCE=600 -CFLAGS = -g -Wall -O2 -fvisibility=hidden -EXTRA_CFLAGS_PIC = -fpic -TARGET_CFLAGS = -LDFLAGS = -fvisibility=hidden -VERSION_SCRIPT_LDFLAGS = -Wl,-version-script,$(srcprefix)htslib.map -LIBS = $(htslib_default_libs) - -prefix = /usr/local -exec_prefix = $(prefix) -bindir = $(exec_prefix)/bin -includedir = $(prefix)/include -libdir = $(exec_prefix)/lib -libexecdir = $(exec_prefix)/libexec -datarootdir = $(prefix)/share -mandir = $(datarootdir)/man -man1dir = $(mandir)/man1 -man5dir = $(mandir)/man5 -man7dir = $(mandir)/man7 -pkgconfigdir= $(libdir)/pkgconfig - -MKDIR_P = mkdir -p -INSTALL = install -p -INSTALL_DATA = $(INSTALL) -m 644 -INSTALL_DIR = $(MKDIR_P) -m 755 -LIB_PERM = 644 -INSTALL_LIB = $(INSTALL) -m $(LIB_PERM) -INSTALL_MAN = $(INSTALL_DATA) -INSTALL_PROGRAM = $(INSTALL) - -# Set by config.mk if plugins are enabled -plugindir = - -BUILT_PROGRAMS = \ - annot-tsv \ - bgzip \ - htsfile \ - tabix - -BUILT_TEST_PROGRAMS = \ - test/hts_endian \ - test/fieldarith \ - test/hfile \ - test/pileup \ - test/pileup_mod \ - test/plugins-dlhts \ - test/sam \ - test/test_bgzf \ - test/test_expr \ - test/test_faidx \ - test/test_kfunc \ - test/test_kstring \ - test/test_mod \ - test/test_realn \ - test/test-regidx \ - test/test_str2int \ - test/test_time_funcs \ - test/test_view \ - test/test_index \ - test/test-vcf-api \ - test/test-vcf-sweep \ - test/test-bcf-sr \ - test/fuzz/hts_open_fuzzer.o \ - test/test-bcf-translate \ - test/test-parse-reg \ - test/test_introspection \ - test/test-bcf_set_variant_type - -BUILT_THRASH_PROGRAMS = \ - test/thrash_threads1 \ - test/thrash_threads2 \ - test/thrash_threads3 \ - test/thrash_threads4 \ - test/thrash_threads5 \ - test/thrash_threads6 \ - test/thrash_threads7 - -all: lib-static lib-shared $(BUILT_PROGRAMS) plugins $(BUILT_TEST_PROGRAMS) \ - htslib_static.mk htslib-uninstalled.pc - -ALL_CPPFLAGS = -I. $(CPPFLAGS) - -# Usually htscodecs.mk is generated by running configure or config.status, -# but if those aren't used create a default here. -htscodecs.mk: - echo '# Default htscodecs.mk generated by Makefile' > $@ - echo 'include $$(HTSPREFIX)htscodecs_bundled.mk' >> $@ - $(srcdir)/hts_probe_cc.sh '$(CC)' '$(CFLAGS) $(CPPFLAGS)' '$(LDFLAGS)' >> $@ - -srcdir = . -srcprefix = -HTSPREFIX = - -# Flags for SIMD code -HTS_CFLAGS_AVX2 = -HTS_CFLAGS_AVX512 = -HTS_CFLAGS_SSE4 = - -# Control building of SIMD code. Not used if configure has been run. -HTS_BUILD_AVX2 = -HTS_BUILD_AVX512 = -HTS_BUILD_SSE4 = - -include htslib_vars.mk -include htscodecs.mk - -# If not using GNU make, you need to copy the version number from version.sh -# into here. -PACKAGE_VERSION := $(shell $(srcdir)/version.sh) - -LIBHTS_SOVERSION = 3 - -# Version numbers for the Mac dynamic library. Note that the leading 3 -# is not strictly necessary and should be removed the next time -# LIBHTS_SOVERSION is bumped (see #1144 and -# https://developer.apple.com/library/archive/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html#//apple_ref/doc/uid/TP40002013-SW23) -MACH_O_COMPATIBILITY_VERSION = 3.1.19 -MACH_O_CURRENT_VERSION = 3.1.19 - -# Force version.h to be remade if $(PACKAGE_VERSION) has changed. -version.h: $(if $(wildcard version.h),$(if $(findstring "$(PACKAGE_VERSION)",$(shell cat version.h)),,force)) - -version.h: - echo '#define HTS_VERSION_TEXT "$(PACKAGE_VERSION)"' > $@ - -print-version: - @echo $(PACKAGE_VERSION) - -show-version: - @echo PACKAGE_VERSION = $(PACKAGE_VERSION) - -config_vars.h: override escape=$(subst ',\x27,$(subst ",\",$(subst \,\\,$(1)))) -config_vars.h: override hts_cc_escaped=$(call escape,$(CC)) -config_vars.h: override hts_cppflags_escaped=$(call escape,$(CPPFLAGS)) -config_vars.h: override hts_cflags_escaped=$(call escape,$(CFLAGS)) -config_vars.h: override hts_ldflags_escaped=$(call escape,$(LDFLAGS)) -config_vars.h: override hts_libs_escaped=$(call escape,$(LIBS)) - -config_vars.h: - printf '#define HTS_CC "%s"\n#define HTS_CPPFLAGS "%s"\n#define HTS_CFLAGS "%s"\n#define HTS_LDFLAGS "%s"\n#define HTS_LIBS "%s"\n' \ - '$(hts_cc_escaped)' \ - '$(hts_cppflags_escaped)' \ - '$(hts_cflags_escaped)' \ - '$(hts_ldflags_escaped)' \ - '$(hts_libs_escaped)' > $@ - -.SUFFIXES: .bundle .c .cygdll .dll .o .pico .so - -.c.o: - $(CC) $(CFLAGS) $(TARGET_CFLAGS) $(ALL_CPPFLAGS) -c -o $@ $< - -.c.pico: - $(CC) $(CFLAGS) $(TARGET_CFLAGS) $(ALL_CPPFLAGS) $(EXTRA_CFLAGS_PIC) -c -o $@ $< - - -LIBHTS_OBJS = \ - kfunc.o \ - kstring.o \ - bcf_sr_sort.o \ - bgzf.o \ - errmod.o \ - faidx.o \ - header.o \ - hfile.o \ - hts.o \ - hts_expr.o \ - hts_os.o\ - md5.o \ - multipart.o \ - probaln.o \ - realn.o \ - regidx.o \ - region.o \ - sam.o \ - sam_mods.o \ - synced_bcf_reader.o \ - vcf_sweep.o \ - tbx.o \ - textutils.o \ - thread_pool.o \ - vcf.o \ - vcfutils.o \ - cram/cram_codecs.o \ - cram/cram_decode.o \ - cram/cram_encode.o \ - cram/cram_external.o \ - cram/cram_index.o \ - cram/cram_io.o \ - cram/cram_stats.o \ - cram/mFILE.o \ - cram/open_trace_file.o \ - cram/pooled_alloc.o \ - cram/string_alloc.o \ - $(HTSCODECS_OBJS) \ - $(NONCONFIGURE_OBJS) - -# Without configure we wish to have a rich set of default figures, -# but we still need conditional inclusion as we wish to still -# support ./configure --disable-blah. -NONCONFIGURE_OBJS = hfile_libcurl.o - -PLUGIN_EXT = -PLUGIN_OBJS = - -cram_h = cram/cram.h $(cram_samtools_h) $(header_h) $(cram_structs_h) $(cram_io_h) cram/cram_encode.h cram/cram_decode.h cram/cram_stats.h cram/cram_codecs.h cram/cram_index.h $(htslib_cram_h) -cram_io_h = cram/cram_io.h $(cram_misc_h) -cram_misc_h = cram/misc.h -cram_os_h = cram/os.h $(htslib_hts_endian_h) -cram_samtools_h = cram/cram_samtools.h $(htslib_sam_h) -cram_structs_h = cram/cram_structs.h $(htslib_thread_pool_h) $(htslib_cram_h) cram/string_alloc.h cram/mFILE.h $(htslib_khash_h) -cram_open_trace_file_h = cram/open_trace_file.h cram/mFILE.h -bcf_sr_sort_h = bcf_sr_sort.h $(htslib_synced_bcf_reader_h) $(htslib_kbitset_h) -fuzz_settings_h = fuzz_settings.h -header_h = header.h cram/string_alloc.h cram/pooled_alloc.h $(htslib_khash_h) $(htslib_kstring_h) $(htslib_sam_h) -hfile_internal_h = hfile_internal.h $(htslib_hts_defs_h) $(htslib_hfile_h) $(textutils_internal_h) -hts_internal_h = hts_internal.h $(htslib_hts_h) $(textutils_internal_h) -hts_time_funcs_h = hts_time_funcs.h -sam_internal_h = sam_internal.h $(htslib_sam_h) -textutils_internal_h = textutils_internal.h $(htslib_kstring_h) -thread_pool_internal_h = thread_pool_internal.h $(htslib_thread_pool_h) - -# To be effective, config.mk needs to appear after most Makefile variables are -# set but before most rules appear, so that it can both use previously-set -# variables in its own rules' prerequisites and also update variables for use -# in later rules' prerequisites. - -# If your make doesn't accept -include, change this to 'include' if you are -# using the configure script or just comment the line out if you are not. --include config.mk - -# Usually config.h is generated by running configure or config.status, -# but if those aren't used create a default config.h here. -config.h: - echo '/* Default config.h generated by Makefile */' > $@ - echo '#ifndef _XOPEN_SOURCE' >> $@ - echo '#define _XOPEN_SOURCE 600' >> $@ - echo '#endif' >> $@ - echo '#define HAVE_LIBBZ2 1' >> $@ - echo '#define HAVE_LIBLZMA 1' >> $@ - echo '#ifndef __APPLE__' >> $@ - echo '#define HAVE_LZMA_H 1' >> $@ - echo '#endif' >> $@ - echo '#define HAVE_DRAND48 1' >> $@ - echo '#define HAVE_LIBCURL 1' >> $@ - if [ "x$(HTS_BUILD_SSE4)" != "x" ]; then \ - echo '#define HAVE_POPCNT 1' >> $@ ; \ - echo '#define HAVE_SSE4_1 1' >> $@ ; \ - echo '#define HAVE_SSSE3 1' >> $@ ; \ - echo '#if defined(HTS_ALLOW_UNALIGNED) && HTS_ALLOW_UNALIGNED == 0' >> $@ ; \ - echo '#define UBSAN 1' >> $@ ; \ - echo '#endif' >> $@ ; \ - fi - if [ "x$(HTS_BUILD_AVX2)" != "x" ] ; then \ - echo '#define HAVE_AVX2 1' >> $@ ; \ - fi - if [ "x$(HTS_BUILD_AVX512)" != "x" ] ; then \ - echo '#define HAVE_AVX512 1' >> $@ ; \ - fi - -# And similarly for htslib.pc.tmp ("pkg-config template"). No dependency -# on htslib.pc.in listed, as if that file is newer the usual way to regenerate -# this target is via configure or config.status rather than this rule. -htslib.pc.tmp: - sed -e '/^static_libs=/s/@static_LIBS@/$(htslib_default_libs)/;s#@[^-][^@]*@##g' $(srcprefix)htslib.pc.in > $@ - -# Create a makefile fragment listing the libraries and LDFLAGS needed for -# static linking. This can be included by projects that want to build -# and link against the htslib source tree instead of an installed library. -htslib_static.mk: htslib.pc.tmp - sed -n '/^static_libs=/s/[^=]*=/HTSLIB_static_LIBS = /p;/^static_ldflags=/s/[^=]*=/HTSLIB_static_LDFLAGS = /p' $< > $@ - - -lib-static: libhts.a - -# $(shell), :=, and ifeq/.../endif are GNU Make-specific. If you don't have -# GNU Make, comment out the parts of these conditionals that don't apply. -ifneq "$(origin PLATFORM)" "file" -PLATFORM := $(shell uname -s) -endif -ifeq "$(PLATFORM)" "Darwin" -SHLIB_FLAVOUR = dylib -lib-shared: libhts.dylib -else ifeq "$(findstring CYGWIN,$(PLATFORM))" "CYGWIN" -SHLIB_FLAVOUR = cygdll -lib-shared: cyghts-$(LIBHTS_SOVERSION).dll -else ifeq "$(findstring MSYS,$(PLATFORM))" "MSYS" -SHLIB_FLAVOUR = dll -lib-shared: hts-$(LIBHTS_SOVERSION).dll hts-$(LIBHTS_SOVERSION).def hts-$(LIBHTS_SOVERSION).lib -else ifeq "$(findstring MINGW,$(PLATFORM))" "MINGW" -SHLIB_FLAVOUR = dll -lib-shared: hts-$(LIBHTS_SOVERSION).dll hts-$(LIBHTS_SOVERSION).def hts-$(LIBHTS_SOVERSION).lib -else -SHLIB_FLAVOUR = so -lib-shared: libhts.so -endif - -BUILT_PLUGINS = $(PLUGIN_OBJS:.o=$(PLUGIN_EXT)) - -ifneq "$(BUILT_PLUGINS)" "" -plugins: lib-shared -endif -plugins: $(BUILT_PLUGINS) - - -libhts.a: $(LIBHTS_OBJS) - @-rm -f $@ - $(AR) -rc $@ $(LIBHTS_OBJS) - -$(RANLIB) $@ - -print-config: - @echo HTS_CFLAGS_AVX2 = $(HTS_CFLAGS_AVX2) - @echo HTS_CFLAGS_AVX512 = $(HTS_CFLAGS_AVX512) - @echo HTS_CFLAGS_SSE4 = $(HTS_CFLAGS_SSE4) - @echo LDFLAGS = $(LDFLAGS) - @echo LIBHTS_OBJS = $(LIBHTS_OBJS) - @echo LIBS = $(LIBS) - @echo PLATFORM = $(PLATFORM) - -# The target here is libhts.so, as that is the built file that other rules -# depend upon and that is used when -lhts appears in other program's recipes. -# As a byproduct invisible to make, libhts.so.NN is also created, as it is the -# file used at runtime (when $LD_LIBRARY_PATH includes the build directory). - -libhts.so: $(LIBHTS_OBJS:.o=.pico) - $(CC) -shared -Wl,-soname,libhts.so.$(LIBHTS_SOVERSION) $(VERSION_SCRIPT_LDFLAGS) $(LDFLAGS) -o $@ $(LIBHTS_OBJS:.o=.pico) $(LIBS) -lpthread - ln -sf $@ libhts.so.$(LIBHTS_SOVERSION) - -# Similarly this also creates libhts.NN.dylib as a byproduct, so that programs -# when run can find this uninstalled shared library (when $DYLD_LIBRARY_PATH -# includes this project's build directory). - -libhts.dylib: $(LIBHTS_OBJS) - $(CC) -dynamiclib -install_name $(libdir)/libhts.$(LIBHTS_SOVERSION).dylib -current_version $(MACH_O_CURRENT_VERSION) -compatibility_version $(MACH_O_COMPATIBILITY_VERSION) $(LDFLAGS) -o $@ $(LIBHTS_OBJS) $(LIBS) - ln -sf $@ libhts.$(LIBHTS_SOVERSION).dylib - -cyghts-$(LIBHTS_SOVERSION).dll libhts.dll.a: $(LIBHTS_OBJS) - $(CC) -shared -Wl,--out-implib=libhts.dll.a -Wl,--enable-auto-import $(LDFLAGS) -o $@ -Wl,--whole-archive $(LIBHTS_OBJS) -Wl,--no-whole-archive $(LIBS) -lpthread - -hts-$(LIBHTS_SOVERSION).dll hts.dll.a: $(LIBHTS_OBJS) - $(CC) -shared -Wl,--out-implib=hts.dll.a -Wl,--enable-auto-import -Wl,--exclude-all-symbols $(LDFLAGS) -o $@ -Wl,--whole-archive $(LIBHTS_OBJS) -Wl,--no-whole-archive $(LIBS) -lpthread - -hts-$(LIBHTS_SOVERSION).def: hts-$(LIBHTS_SOVERSION).dll - gendef hts-$(LIBHTS_SOVERSION).dll - -hts-$(LIBHTS_SOVERSION).lib: hts-$(LIBHTS_SOVERSION).def - dlltool -m i386:x86-64 -d hts-$(LIBHTS_SOVERSION).def -l hts-$(LIBHTS_SOVERSION).lib - -# Bundling libraries, binaries, dll dependencies, and licenses into a -# single directory. NB: This is not needed for end-users, but a test bed -# for maintainers building binary distributions. -# -# NOTE: only tested on the supported MSYS2/MINGW64 environment. -dist-windows: DESTDIR= -dist-windows: prefix=dist-windows -dist-windows: install - cp hts-$(LIBHTS_SOVERSION).def hts-$(LIBHTS_SOVERSION).lib dist-windows/lib - cp `ldd hts-$(LIBHTS_SOVERSION).dll| awk '/mingw64/ {print $$3}'` dist-windows/bin - mkdir -p dist-windows/share/licenses/htslib - -cp -r /mingw64/share/licenses/mingw-w64-libraries \ - /mingw64/share/licenses/brotli \ - /mingw64/share/licenses/bzip2 \ - /mingw64/share/licenses/gcc-libs \ - /mingw64/share/licenses/libdeflate \ - /mingw64/share/licenses/libpsl \ - /mingw64/share/licenses/libtre \ - /mingw64/share/licenses/libwinpthread \ - /mingw64/share/licenses/openssl \ - /mingw64/share/licenses/xz \ - /mingw64/share/licenses/zlib \ - /mingw64/share/licenses/zstd \ - dist-windows/share/licenses/ - -cp -r /usr/share/licenses/curl \ - dist-windows/share/licenses/ - cp LICENSE dist-windows/share/licenses/htslib/ - - -# Target to allow htslib.mk to build all the object files before it -# links the shared and static libraries. -hts-object-files: $(LIBHTS_OBJS) - touch $@ - -# On Unix dlopen("libhts.so.NN", RTLD_LAZY) may default to RTLD_LOCAL. -# Hence plugins need to link to (shared) libhts.so.NN themselves, as they -# may not be able to access libhts symbols via the main program's libhts -# if that was dynamically loaded without an explicit RTLD_GLOBAL. -%.so: %.pico libhts.so - $(CC) -shared -Wl,-E $(LDFLAGS) -o $@ $< libhts.so $(LIBS) -lpthread - -# For programs *statically* linked to libhts.a, on macOS loading a plugin -# linked to a shared libhts.NN.dylib would lead to conflicting duplicate -# symbols. Fortunately macOS dlopen() defaults to RTLD_GLOBAL so there -# is less need for plugins to link back to libhts themselves. -%.bundle: %.o - $(CC) -bundle -Wl,-undefined,dynamic_lookup $(LDFLAGS) -o $@ $< $(LIBS) - -%.cygdll: %.o libhts.dll.a - $(CC) -shared $(LDFLAGS) -o $@ $< libhts.dll.a $(LIBS) - -%.dll: %.o hts.dll.a - $(CC) -shared $(LDFLAGS) -o $@ $< hts.dll.a $(LIBS) - - -bgzf.o bgzf.pico: bgzf.c config.h $(htslib_hts_h) $(htslib_bgzf_h) $(htslib_hfile_h) $(htslib_thread_pool_h) $(htslib_hts_endian_h) cram/pooled_alloc.h $(hts_internal_h) $(htslib_khash_h) -errmod.o errmod.pico: errmod.c config.h $(htslib_hts_h) $(htslib_ksort_h) $(htslib_hts_os_h) -kstring.o kstring.pico: kstring.c config.h $(htslib_kstring_h) -header.o header.pico: header.c config.h $(textutils_internal_h) $(header_h) -hfile.o hfile.pico: hfile.c config.h $(htslib_hfile_h) $(hfile_internal_h) $(htslib_kstring_h) $(hts_internal_h) $(htslib_khash_h) -hfile_gcs.o hfile_gcs.pico: hfile_gcs.c config.h $(htslib_hts_h) $(htslib_kstring_h) $(hfile_internal_h) -hfile_libcurl.o hfile_libcurl.pico: hfile_libcurl.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_h) -hfile_s3_write.o hfile_s3_write.pico: hfile_s3_write.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_h) -hfile_s3.o hfile_s3.pico: hfile_s3.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(hts_time_funcs_h) -hts.o hts.pico: hts.c config.h os/lzma_stub.h $(htslib_hts_h) $(htslib_bgzf_h) $(cram_h) $(htslib_hfile_h) $(htslib_hts_endian_h) version.h config_vars.h $(hts_internal_h) $(hfile_internal_h) $(sam_internal_h) $(htslib_hts_expr_h) $(htslib_hts_os_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_ksort_h) $(htslib_tbx_h) $(htscodecs_htscodecs_h) -hts_expr.o hts_expr.pico: hts_expr.c config.h $(htslib_hts_expr_h) $(htslib_hts_log_h) $(textutils_internal_h) -hts_os.o hts_os.pico: hts_os.c config.h $(htslib_hts_defs_h) os/rand.c -vcf.o vcf.pico: vcf.c config.h $(fuzz_settings_h) $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_hfile_h) $(hts_internal_h) $(htslib_khash_str2int_h) $(htslib_kstring_h) $(htslib_sam_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_hts_endian_h) -sam.o sam.pico: sam.c config.h $(fuzz_settings_h) $(htslib_hts_defs_h) $(htslib_sam_h) $(htslib_bgzf_h) $(cram_h) $(hts_internal_h) $(sam_internal_h) $(htslib_hfile_h) $(htslib_hts_endian_h) $(htslib_hts_expr_h) $(header_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_kstring_h) -sam_mods.o sam_mods.pico: sam_mods.c config.h $(htslib_sam_h) $(textutils_internal_h) -tbx.o tbx.pico: tbx.c config.h $(htslib_tbx_h) $(htslib_bgzf_h) $(htslib_hts_endian_h) $(hts_internal_h) $(htslib_khash_h) -faidx.o faidx.pico: faidx.c config.h $(htslib_bgzf_h) $(htslib_faidx_h) $(htslib_hfile_h) $(htslib_khash_h) $(htslib_kstring_h) $(hts_internal_h) -bcf_sr_sort.o bcf_sr_sort.pico: bcf_sr_sort.c config.h $(bcf_sr_sort_h) $(htslib_khash_str2int_h) $(htslib_kbitset_h) -synced_bcf_reader.o synced_bcf_reader.pico: synced_bcf_reader.c config.h $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(htslib_bgzf_h) $(htslib_thread_pool_h) $(bcf_sr_sort_h) -vcf_sweep.o vcf_sweep.pico: vcf_sweep.c config.h $(htslib_vcf_sweep_h) $(htslib_bgzf_h) -vcfutils.o vcfutils.pico: vcfutils.c config.h $(htslib_vcfutils_h) $(htslib_kbitset_h) -kfunc.o kfunc.pico: kfunc.c config.h $(htslib_kfunc_h) -regidx.o regidx.pico: regidx.c config.h $(htslib_hts_h) $(htslib_kstring_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(htslib_regidx_h) $(hts_internal_h) -region.o region.pico: region.c config.h $(htslib_hts_h) $(htslib_khash_h) -md5.o md5.pico: md5.c config.h $(htslib_hts_h) $(htslib_hts_endian_h) -multipart.o multipart.pico: multipart.c config.h $(htslib_kstring_h) $(hts_internal_h) $(hfile_internal_h) -plugin.o plugin.pico: plugin.c config.h $(hts_internal_h) $(htslib_kstring_h) -probaln.o probaln.pico: probaln.c config.h $(htslib_hts_h) -realn.o realn.pico: realn.c config.h $(htslib_hts_h) $(htslib_sam_h) -textutils.o textutils.pico: textutils.c config.h $(htslib_hfile_h) $(htslib_kstring_h) $(htslib_sam_h) $(hts_internal_h) - -cram/cram_codecs.o cram/cram_codecs.pico: cram/cram_codecs.c config.h $(fuzz_settings_h) $(htslib_hts_endian_h) $(htscodecs_varint_h) $(htscodecs_pack_h) $(htscodecs_rle_h) $(cram_h) -cram/cram_decode.o cram/cram_decode.pico: cram/cram_decode.c config.h $(cram_h) $(cram_os_h) $(htslib_hts_h) -cram/cram_encode.o cram/cram_encode.pico: cram/cram_encode.c config.h $(cram_h) $(cram_os_h) $(sam_internal_h) $(htslib_hts_h) $(htslib_hts_endian_h) $(textutils_internal_h) -cram/cram_external.o cram/cram_external.pico: cram/cram_external.c config.h $(htscodecs_rANS_static4x16_h) $(htslib_hfile_h) $(cram_h) -cram/cram_index.o cram/cram_index.pico: cram/cram_index.c config.h $(htslib_bgzf_h) $(htslib_hfile_h) $(hts_internal_h) $(cram_h) $(cram_os_h) -cram/cram_io.o cram/cram_io.pico: cram/cram_io.c config.h os/lzma_stub.h $(fuzz_settings_h) $(cram_h) $(cram_os_h) $(htslib_hts_h) $(cram_open_trace_file_h) $(htscodecs_rANS_static_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_arith_dynamic_h) $(htscodecs_tokenise_name3_h) $(htscodecs_fqzcomp_qual_h) $(htscodecs_varint_h) $(htslib_hfile_h) $(htslib_bgzf_h) $(htslib_faidx_h) $(hts_internal_h) -cram/cram_stats.o cram/cram_stats.pico: cram/cram_stats.c config.h $(cram_h) $(cram_os_h) -cram/mFILE.o cram/mFILE.pico: cram/mFILE.c config.h $(htslib_hts_log_h) $(cram_os_h) cram/mFILE.h -cram/open_trace_file.o cram/open_trace_file.pico: cram/open_trace_file.c config.h $(cram_os_h) $(cram_open_trace_file_h) $(cram_misc_h) $(htslib_hfile_h) $(htslib_hts_log_h) $(htslib_hts_h) -cram/pooled_alloc.o cram/pooled_alloc.pico: cram/pooled_alloc.c config.h cram/pooled_alloc.h $(cram_misc_h) -cram/string_alloc.o cram/string_alloc.pico: cram/string_alloc.c config.h cram/string_alloc.h -thread_pool.o thread_pool.pico: thread_pool.c config.h $(thread_pool_internal_h) $(htslib_hts_log_h) - -htscodecs/htscodecs/arith_dynamic.o htscodecs/htscodecs/arith_dynamic.pico: htscodecs/htscodecs/arith_dynamic.c config.h $(htscodecs_arith_dynamic_h) $(htscodecs_varint_h) $(htscodecs_pack_h) $(htscodecs_utils_h) $(htscodecs_c_simple_model_h) -htscodecs/htscodecs/fqzcomp_qual.o htscodecs/htscodecs/fqzcomp_qual.pico: htscodecs/htscodecs/fqzcomp_qual.c config.h $(htscodecs_fqzcomp_qual_h) $(htscodecs_varint_h) $(htscodecs_utils_h) $(htscodecs_c_simple_model_h) -htscodecs/htscodecs/htscodecs.o htscodecs/htscodecs/htscodecs.pico: htscodecs/htscodecs/htscodecs.c $(htscodecs_htscodecs_h) $(htscodecs_version_h) -htscodecs/htscodecs/pack.o htscodecs/htscodecs/pack.pico: htscodecs/htscodecs/pack.c config.h $(htscodecs_pack_h) -htscodecs/htscodecs/rANS_static32x16pr.o htscodecs/htscodecs/rANS_static32x16pr.pico: htscodecs/htscodecs/rANS_static32x16pr.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/rANS_static32x16pr_avx2.o htscodecs/htscodecs/rANS_static32x16pr_avx2.pico: htscodecs/htscodecs/rANS_static32x16pr_avx2.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) $(htscodecs_permute_h) -htscodecs/htscodecs/rANS_static32x16pr_avx512.o htscodecs/htscodecs/rANS_static32x16pr_avx512.pico: htscodecs/htscodecs/rANS_static32x16pr_avx512.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/rANS_static32x16pr_neon.o htscodecs/htscodecs/rANS_static32x16pr_neon.pico: htscodecs/htscodecs/rANS_static32x16pr_neon.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/rANS_static32x16pr_sse4.o htscodecs/htscodecs/rANS_static32x16pr_sse4.pico: htscodecs/htscodecs/rANS_static32x16pr_sse4.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/rANS_static4x16pr.o htscodecs/htscodecs/rANS_static4x16pr.pico: htscodecs/htscodecs/rANS_static4x16pr.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_pack_h) $(htscodecs_rle_h) $(htscodecs_utils_h) $(htscodecs_rANS_static32x16pr_h) -htscodecs/htscodecs/rANS_static.o htscodecs/htscodecs/rANS_static.pico: htscodecs/htscodecs/rANS_static.c config.h $(htscodecs_rANS_byte_h) $(htscodecs_utils_h) $(htscodecs_rANS_static_h) -htscodecs/htscodecs/rle.o htscodecs/htscodecs/rle.pico: htscodecs/htscodecs/rle.c config.h $(htscodecs_varint_h) $(htscodecs_rle_h) -htscodecs/htscodecs/tokenise_name3.o htscodecs/htscodecs/tokenise_name3.pico: htscodecs/htscodecs/tokenise_name3.c config.h $(htscodecs_pooled_alloc_h) $(htscodecs_arith_dynamic_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_tokenise_name3_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/utils.o htscodecs/htscodecs/utils.pico: htscodecs/htscodecs/utils.c config.h $(htscodecs_utils_h) - -# Extra CFLAGS for specific files -htscodecs/htscodecs/rANS_static32x16pr_avx2.o htscodecs/htscodecs/rANS_static32x16pr_avx2.pico: TARGET_CFLAGS = $(HTS_CFLAGS_AVX2) -htscodecs/htscodecs/rANS_static32x16pr_avx512.o htscodecs/htscodecs/rANS_static32x16pr_avx512.pico: TARGET_CFLAGS = $(HTS_CFLAGS_AVX512) -htscodecs/htscodecs/rANS_static32x16pr_sse4.o htscodecs/htscodecs/rANS_static32x16pr_sse4.pico: TARGET_CFLAGS = $(HTS_CFLAGS_SSE4) - -annot-tsv: annot-tsv.o libhts.a - $(CC) $(LDFLAGS) -o $@ annot-tsv.o libhts.a $(LIBS) -lpthread - -bgzip: bgzip.o libhts.a - $(CC) $(LDFLAGS) -o $@ bgzip.o libhts.a $(LIBS) -lpthread - -htsfile: htsfile.o libhts.a - $(CC) $(LDFLAGS) -o $@ htsfile.o libhts.a $(LIBS) -lpthread - -tabix: tabix.o libhts.a - $(CC) $(LDFLAGS) -o $@ tabix.o libhts.a $(LIBS) -lpthread - -annot-tsv.o: annot-tsv.c config.h $(htslib_hts_h) $(htslib_hts_defs_h) $(htslib_khash_str2int_h) $(htslib_kstring_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(htslib_regidx_h) -bgzip.o: bgzip.c config.h $(htslib_bgzf_h) $(htslib_hts_h) $(htslib_hfile_h) -htsfile.o: htsfile.c config.h $(htslib_hfile_h) $(htslib_hts_h) $(htslib_sam_h) $(htslib_vcf_h) -tabix.o: tabix.c config.h $(htslib_tbx_h) $(htslib_sam_h) $(htslib_vcf_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(htslib_hts_h) $(htslib_regidx_h) $(htslib_hts_defs_h) $(htslib_hts_log_h) - -# Runes to check that the htscodecs submodule is present -ifdef HTSCODECS_SOURCES -htscodecs/htscodecs/%.c: | htscodecs/htscodecs - @if test -e htscodecs/.git && test ! -e "$@" ; then \ - echo "Missing file '$@'" ; \ - echo " - Do you need to update the htscodecs submodule?" ; \ - false ; \ - fi - -htscodecs/htscodecs/%.h: | htscodecs/htscodecs - @if test -e htscodecs/.git && test ! -e "$@" ; then \ - echo "Missing file '$@'" ; \ - echo " - Do you need to update the htscodecs submodule?" ; \ - false ; \ - fi - -htscodecs/htscodecs: - @if test -e .git ; then \ - printf "\\n\\nError: htscodecs submodule files not present for htslib.\\n\ - Try running: \\n\ - git submodule update --init --recursive\\n\ - in the top-level htslib directory and then re-run make.\\n\\n\\n" ; \ - else \ - printf "\\n\\nError: htscodecs submodule files not present and this is not a git checkout.\\n\ - You have an incomplete distribution. Please try downloading one of the\\n\ - official releases from https://www.htslib.org/\\n" ; \ - fi - @false - -# Build the htscodecs/htscodecs/version.h file if necessary -htscodecs/htscodecs/version.h: force - @if test -e $(srcdir)/htscodecs/.git && test -e $(srcdir)/htscodecs/configure.ac ; then \ - vers=`cd $(srcdir)/htscodecs && git describe --always --dirty --match 'v[0-9]\.[0-9]*'` && \ - case "$$vers" in \ - v*) vers=$${vers#v} ;; \ - *) iv=`awk '/^AC_INIT/ { match($$0, /^AC_INIT\(htscodecs, *([0-9](\.[0-9])*)\)/, m); print substr($$0, m[1, "start"], m[1, "length"]) }' $(srcdir)/htscodecs/configure.ac` ; vers="$$iv$${vers:+-g$$vers}" ;; \ - esac ; \ - if ! grep -s -q '"'"$$vers"'"' $@ ; then \ - echo 'Updating $@ : #define HTSCODECS_VERSION_TEXT "'"$$vers"'"' ; \ - echo '#define HTSCODECS_VERSION_TEXT "'"$$vers"'"' > $@ ; \ - fi ; \ - fi -endif - -# Maintainer extra targets built -# - compile public headers as C++ -# Maintainer source code checks -# - copyright boilerplate presence -# - tab and trailing space detection -maintainer-check: test/usepublic.o - test/maintainer/check_copyright.pl . - test/maintainer/check_spaces.pl . - -# Look for untracked files in the git repository. -check-untracked: - @if test -e .git && git status --porcelain | grep '^\?'; then \ - echo 'Untracked files detected (see above). Please either clean up, add to .gitignore, or for test output files consider naming them to match *.tmp or *.tmp.*' ; \ - false ; \ - fi - -# Create a shorthand. We use $(SRC) or $(srcprefix) rather than $(srcdir)/ -# for brevity in test and install rules, and so that build logs do not have -# ./ sprinkled throughout. -SRC = $(srcprefix) - -# For tests that might use it, set $REF_PATH explicitly to use only reference -# areas within the test suite (or set it to ':' to use no reference areas). -# -# If using MSYS, avoid poor shell expansion via: -# MSYS2_ARG_CONV_EXCL="*" make check -check test: all $(HTSCODECS_TEST_TARGETS) - test/hts_endian - test/test_expr - test/test_kfunc - test/test_kstring - test/test_str2int - test/test_time_funcs - test/fieldarith test/fieldarith.sam - test/hfile - if test "x$(BUILT_PLUGINS)" != "x"; then \ - HTS_PATH=. test/with-shlib.sh test/plugins-dlhts -g ./libhts.$(SHLIB_FLAVOUR); \ - fi - if test "x$(BUILT_PLUGINS)" != "x"; then \ - HTS_PATH=. test/with-shlib.sh test/plugins-dlhts -l ./libhts.$(SHLIB_FLAVOUR); \ - fi - test/test_bgzf test/bgziptest.txt - test/test-parse-reg -t test/colons.bam - cd test/faidx && ./test-faidx.sh faidx.tst - cd test/sam_filter && ./filter.sh filter.tst - cd test/tabix && ./test-tabix.sh tabix.tst - cd test/mpileup && ./test-pileup.sh mpileup.tst - cd test/fastq && ./test-fastq.sh - cd test/base_mods && ./base-mods.sh base-mods.tst - REF_PATH=: test/sam test/ce.fa test/faidx/faidx.fa test/faidx/fastqs.fq - test/test-regidx - cd test && REF_PATH=: ./test.pl $${TEST_OPTS:-} - -test/hts_endian: test/hts_endian.o - $(CC) $(LDFLAGS) -o $@ test/hts_endian.o $(LIBS) - -# To build the fuzzer, try: -# make CC="clang16 -fsanitize=address,undefined,fuzzer" \ -# CFLAGS="-g -O3 -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION" \ -# test/fuzz/hts_open_fuzzer -test/fuzz/hts_open_fuzzer: test/fuzz/hts_open_fuzzer.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/fuzz/hts_open_fuzzer.o libhts.a $(LIBS) -lpthread - -test/fieldarith: test/fieldarith.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/fieldarith.o libhts.a $(LIBS) -lpthread - -test/hfile: test/hfile.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/hfile.o libhts.a $(LIBS) -lpthread - -test/pileup: test/pileup.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/pileup.o libhts.a $(LIBS) -lpthread - -test/pileup_mod: test/pileup_mod.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/pileup_mod.o libhts.a $(LIBS) -lpthread - -test/plugins-dlhts: test/plugins-dlhts.o - $(CC) $(LDFLAGS) -o $@ test/plugins-dlhts.o $(LIBS) - -test/sam: test/sam.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/sam.o libhts.a $(LIBS) -lpthread - -test/test_bgzf: test/test_bgzf.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_bgzf.o libhts.a -lz $(LIBS) -lpthread - -test/test_expr: test/test_expr.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_expr.o libhts.a -lz $(LIBS) -lpthread - -test/test_faidx: test/test_faidx.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_faidx.o libhts.a -lz $(LIBS) -lpthread - -test/test_kfunc: test/test_kfunc.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_kfunc.o libhts.a -lz $(LIBS) -lpthread - -test/test_kstring: test/test_kstring.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_kstring.o libhts.a -lz $(LIBS) -lpthread - -test/test_mod: test/test_mod.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_mod.o libhts.a $(LIBS) -lpthread - -test/test_realn: test/test_realn.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_realn.o libhts.a $(LIBS) -lpthread - -test/test-regidx: test/test-regidx.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-regidx.o libhts.a $(LIBS) -lpthread - -test/test-parse-reg: test/test-parse-reg.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-parse-reg.o libhts.a $(LIBS) -lpthread - -test/test_str2int: test/test_str2int.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_str2int.o libhts.a $(LIBS) -lpthread - -test/test_time_funcs: test/test_time_funcs.o - $(CC) $(LDFLAGS) -o $@ test/test_time_funcs.o - -test/test_view: test/test_view.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_view.o libhts.a $(LIBS) -lpthread - -test/test_index: test/test_index.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_index.o libhts.a $(LIBS) -lpthread - -test/test-vcf-api: test/test-vcf-api.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-vcf-api.o libhts.a $(LIBS) -lpthread - -test/test-vcf-sweep: test/test-vcf-sweep.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-vcf-sweep.o libhts.a $(LIBS) -lpthread - -test/test-bcf-sr: test/test-bcf-sr.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-bcf-sr.o libhts.a -lz $(LIBS) -lpthread - -test/test-bcf-translate: test/test-bcf-translate.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-bcf-translate.o libhts.a -lz $(LIBS) -lpthread - -test/test_introspection: test/test_introspection.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_introspection.o libhts.a $(LIBS) -lpthread - -test/test-bcf_set_variant_type: test/test-bcf_set_variant_type.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-bcf_set_variant_type.o libhts.a $(LIBS) -lpthread - -# Extra tests for bundled htscodecs -test_htscodecs_rans4x8: htscodecs/tests/rans4x8 - cd htscodecs/tests && srcdir=. && export srcdir && ./rans4x8.test - -test_htscodecs_rans4x16: htscodecs/tests/rans4x16pr - cd htscodecs/tests && srcdir=. && export srcdir && ./rans4x16.test - -test_htscodecs_arith: htscodecs/tests/arith_dynamic - cd htscodecs/tests && srcdir=. && export srcdir && ./arith.test - -test_htscodecs_tok3: htscodecs/tests/tokenise_name3 - cd htscodecs/tests && srcdir=. && export srcdir && ./tok3.test - -test_htscodecs_fqzcomp: htscodecs/tests/fqzcomp_qual - cd htscodecs/tests && srcdir=. && export srcdir && ./fqzcomp.test - -test_htscodecs_varint: htscodecs/tests/varint - cd htscodecs/tests && ./varint - -htscodecs/tests/arith_dynamic: htscodecs/tests/arith_dynamic_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/fqzcomp_qual: htscodecs/tests/fqzcomp_qual_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/rans4x16pr: htscodecs/tests/rANS_static4x16pr_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/rans4x8: htscodecs/tests/rANS_static_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/tokenise_name3: htscodecs/tests/tokenise_name3_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/varint: htscodecs/tests/varint_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/arith_dynamic_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/arith_dynamic_test.o: htscodecs/tests/arith_dynamic_test.c config.h $(htscodecs_arith_dynamic_h) -htscodecs/tests/fqzcomp_qual_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/fqzcomp_qual_test.o: htscodecs/tests/fqzcomp_qual_test.c config.h $(htscodecs_fqzcomp_qual_h) $(htscodecs_varint_h) -htscodecs/tests/rANS_static4x16pr_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/rANS_static4x16pr_test.o: htscodecs/tests/rANS_static4x16pr_test.c config.h $(htscodecs_rANS_static4x16_h) -htscodecs/tests/rANS_static_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/rANS_static_test.o: htscodecs/tests/rANS_static_test.c config.h $(htscodecs_rANS_static_h) -htscodecs/tests/tokenise_name3_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/tokenise_name3_test.o: htscodecs/tests/tokenise_name3_test.c config.h $(htscodecs_tokenise_name3_h) -htscodecs/tests/varint_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/varint_test.o: htscodecs/tests/varint_test.c config.h $(htscodecs_varint_h) - -test/hts_endian.o: test/hts_endian.c config.h $(htslib_hts_endian_h) -test/fuzz/hts_open_fuzzer.o: test/fuzz/hts_open_fuzzer.c config.h $(htslib_hfile_h) $(htslib_hts_h) $(htslib_sam_h) $(htslib_vcf_h) -test/fieldarith.o: test/fieldarith.c config.h $(htslib_sam_h) -test/hfile.o: test/hfile.c config.h $(htslib_hfile_h) $(htslib_hts_defs_h) $(htslib_kstring_h) -test/pileup.o: test/pileup.c config.h $(htslib_sam_h) $(htslib_kstring_h) -test/pileup_mod.o: test/pileup_mod.c config.h $(htslib_sam_h) -test/plugins-dlhts.o: test/plugins-dlhts.c config.h -test/sam.o: test/sam.c config.h $(htslib_hts_defs_h) $(htslib_sam_h) $(htslib_faidx_h) $(htslib_khash_h) $(htslib_hts_log_h) -test/test_bgzf.o: test/test_bgzf.c config.h $(htslib_bgzf_h) $(htslib_hfile_h) $(htslib_hts_log_h) $(hfile_internal_h) -test/test_expr.o: test/test_expr.c config.h $(htslib_hts_expr_h) -test/test_kfunc.o: test/test_kfunc.c config.h $(htslib_kfunc_h) -test/test_kstring.o: test/test_kstring.c config.h $(htslib_kstring_h) -test/test_mod.o: test/test_mod.c config.h $(htslib_sam_h) -test/test-parse-reg.o: test/test-parse-reg.c config.h $(htslib_hts_h) $(htslib_sam_h) -test/test_realn.o: test/test_realn.c config.h $(htslib_hts_h) $(htslib_sam_h) $(htslib_faidx_h) -test/test-regidx.o: test/test-regidx.c config.h $(htslib_kstring_h) $(htslib_regidx_h) $(htslib_hts_defs_h) $(textutils_internal_h) -test/test_str2int.o: test/test_str2int.c config.h $(textutils_internal_h) -test/test_time_funcs.o: test/test_time_funcs.c config.h $(hts_time_funcs_h) -test/test_view.o: test/test_view.c config.h $(cram_h) $(htslib_sam_h) $(htslib_vcf_h) $(htslib_hts_log_h) -test/test_faidx.o: test/test_faidx.c config.h $(htslib_faidx_h) -test/test_index.o: test/test_index.c config.h $(htslib_sam_h) $(htslib_vcf_h) -test/test-vcf-api.o: test/test-vcf-api.c config.h $(htslib_hts_h) $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_kseq_h) -test/test-vcf-sweep.o: test/test-vcf-sweep.c config.h $(htslib_vcf_sweep_h) -test/test-bcf-sr.o: test/test-bcf-sr.c config.h $(htslib_synced_bcf_reader_h) $(htslib_hts_h) $(htslib_vcf_h) -test/test-bcf-translate.o: test/test-bcf-translate.c config.h $(htslib_vcf_h) -test/test_introspection.o: test/test_introspection.c config.h $(htslib_hts_h) $(htslib_hfile_h) -test/test-bcf_set_variant_type.o: test/test-bcf_set_variant_type.c config.h $(htslib_hts_h) vcf.c - -# Standalone target not added to $(BUILT_TEST_PROGRAMS) as some may not -# have a compiler that compiles as C++ when given a .cpp source file. -test/usepublic.o: test/usepublic.cpp config.h $(htslib_bgzf_h) $(htslib_cram_h) $(htslib_faidx_h) $(htslib_hfile_h) $(htslib_hts_h) $(htslib_hts_defs_h) $(htslib_hts_endian_h) $(htslib_hts_expr_h) $(htslib_hts_log_h) $(htslib_hts_os_h) $(htslib_kbitset_h) $(htslib_kfunc_h) $(htslib_khash_h) $(htslib_khash_str2int_h) $(htslib_klist_h) $(HTSPREFIX)htslib/knetfile.h $(htslib_kroundup_h) $(htslib_kseq_h) $(htslib_ksort_h) $(htslib_kstring_h) $(htslib_regidx_h) $(htslib_sam_h) $(htslib_synced_bcf_reader_h) $(htslib_tbx_h) $(htslib_thread_pool_h) $(htslib_vcf_h) $(htslib_vcf_sweep_h) $(htslib_vcfutils_h) - $(CC) $(CFLAGS) $(TARGET_CFLAGS) $(ALL_CPPFLAGS) -c -o $@ test/usepublic.cpp - - -test/thrash_threads1: test/thrash_threads1.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads1.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads2: test/thrash_threads2.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads2.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads3: test/thrash_threads3.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads3.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads4: test/thrash_threads4.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads4.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads5: test/thrash_threads5.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads5.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads6: test/thrash_threads6.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads6.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads7: test/thrash_threads7.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads7.o libhts.a -lz $(LIBS) -lpthread - -test_thrash: $(BUILT_THRASH_PROGRAMS) - -# Test to ensure the functions in the header files are exported by the shared -# library. This currently works by comparing the output from ctags on -# the headers with the list of functions exported by the shared library. -# Note that functions marked as exported in the .c files and not the public -# headers will be missed by this test. -test-shlib-exports: header-exports.txt shlib-exports-$(SHLIB_FLAVOUR).txt - @echo "Checking shared library exports" - @if test ! -s header-exports.txt ; then echo "Error: header-exports.txt empty" ; false ; fi - @if test ! -s shlib-exports-$(SHLIB_FLAVOUR).txt ; then echo "Error: shlib-exports-$(SHLIB_FLAVOUR).txt empty" ; false ; fi - @! comm -23 header-exports.txt shlib-exports-$(SHLIB_FLAVOUR).txt | grep . || \ - ( echo "Error: Found unexported symbols (listed above)" ; false ) - -# Extract symbols that should be exported from public headers using ctags -# Filter out macros in htslib/hts_defs.h. -header-exports.txt: test/header_syms.pl htslib/*.h - test/header_syms.pl htslib/*.h | sort -u -o $@ - -shlib-exports-so.txt: libhts.so - nm -D -g libhts.so | awk '$$2 == "T" { sub("@.*", "", $$3); print $$3 }' | sort -u -o $@ - -shlib-exports-dylib.txt: libhts.dylib - nm -Ug libhts.dylib | awk '$$2 == "T" { sub("^_", "", $$3); print $$3 }' | sort -u -o $@ - -shlib-exports-dll.txt: hts.dll.a - nm -g hts.dll.a | awk '$$2 == "T" { print $$3 }' | sort -u -o $@ - -$(srcprefix)htslib.map: libhts.so - LC_ALL=C ; export LC_ALL; \ - curr_vers=`expr 'X$(PACKAGE_VERSION)' : 'X\([0-9]*\.[0-9.]*\)'` ; \ - last_vers=`awk '/^HTSLIB_[0-9](\.[0-9]+)+/ { lv = $$1 } END { print lv }' htslib.map` ; \ - if test "x$$curr_vers" = 'x' || test "x$$last_vers" = 'x' ; then \ - echo "Version check failed : $$curr_vers / $$las_vers" 1>&2 ; \ - exit 1 ; \ - fi && \ - if test "HTSLIB_$$curr_vers" = "$$last_vers" ; then \ - echo "Refusing to update $@ - HTSlib version not changed" 1>&2 ; \ - exit 1 ; \ - fi && \ - nm --with-symbol-versions -D -g libhts.so | awk '$$2 ~ /^[DGRT]$$/ && $$3 ~ /@@Base$$/ && $$3 !~ /^(_init|_fini|_edata)@@/ { sub(/@@Base$$/, ";", $$3); print " " $$3 }' > $@.tmp && \ - if [ -s $@.tmp ] ; then \ - cat $@ > $@.new.tmp && \ - printf '\n%s {\n' "HTSLIB_$$curr_vers" >> $@.new.tmp && \ - cat $@.tmp >> $@.new.tmp && \ - printf '} %s;\n' "$$last_vers" >> $@.new.tmp && \ - rm -f $@.tmp && \ - mv $@.new.tmp $@ ; \ - fi ; \ - else \ - rm -f $@.tmp ; \ - fi - -install: libhts.a $(BUILT_PROGRAMS) $(BUILT_PLUGINS) installdirs install-$(SHLIB_FLAVOUR) install-pkgconfig - $(INSTALL_PROGRAM) $(BUILT_PROGRAMS) $(DESTDIR)$(bindir) - if test -n "$(BUILT_PLUGINS)"; then $(INSTALL_PROGRAM) $(BUILT_PLUGINS) $(DESTDIR)$(plugindir); fi - $(INSTALL_DATA) $(SRC)htslib/*.h $(DESTDIR)$(includedir)/htslib - $(INSTALL_DATA) libhts.a $(DESTDIR)$(libdir)/libhts.a - $(INSTALL_MAN) $(SRC)annot-tsv.1 $(SRC)bgzip.1 $(SRC)htsfile.1 $(SRC)tabix.1 $(DESTDIR)$(man1dir) - $(INSTALL_MAN) $(SRC)faidx.5 $(SRC)sam.5 $(SRC)vcf.5 $(DESTDIR)$(man5dir) - $(INSTALL_MAN) $(SRC)htslib-s3-plugin.7 $(DESTDIR)$(man7dir) - -installdirs: - $(INSTALL_DIR) $(DESTDIR)$(bindir) $(DESTDIR)$(includedir) $(DESTDIR)$(includedir)/htslib $(DESTDIR)$(libdir) $(DESTDIR)$(man1dir) $(DESTDIR)$(man5dir) $(DESTDIR)$(man7dir) $(DESTDIR)$(pkgconfigdir) - if test -n "$(plugindir)"; then $(INSTALL_DIR) $(DESTDIR)$(plugindir); fi - -# After installation, the real file in $(libdir) will be libhts.so.X.Y.Z, -# with symlinks libhts.so (used via -lhts during linking of client programs) -# and libhts.so.NN (used by client executables at runtime). - -install-so: libhts.so installdirs - $(INSTALL_LIB) libhts.so $(DESTDIR)$(libdir)/libhts.so.$(PACKAGE_VERSION) - ln -sf libhts.so.$(PACKAGE_VERSION) $(DESTDIR)$(libdir)/libhts.so - ln -sf libhts.so.$(PACKAGE_VERSION) $(DESTDIR)$(libdir)/libhts.so.$(LIBHTS_SOVERSION) - -install-cygdll: cyghts-$(LIBHTS_SOVERSION).dll installdirs - $(INSTALL_PROGRAM) cyghts-$(LIBHTS_SOVERSION).dll $(DESTDIR)$(bindir)/cyghts-$(LIBHTS_SOVERSION).dll - $(INSTALL_PROGRAM) libhts.dll.a $(DESTDIR)$(libdir)/libhts.dll.a - -install-dll: hts-$(LIBHTS_SOVERSION).dll installdirs - $(INSTALL_PROGRAM) hts-$(LIBHTS_SOVERSION).dll $(DESTDIR)$(bindir)/hts-$(LIBHTS_SOVERSION).dll - $(INSTALL_PROGRAM) hts.dll.a $(DESTDIR)$(libdir)/hts.dll.a - -install-dylib: libhts.dylib installdirs - $(INSTALL_PROGRAM) libhts.dylib $(DESTDIR)$(libdir)/libhts.$(PACKAGE_VERSION).dylib - ln -sf libhts.$(PACKAGE_VERSION).dylib $(DESTDIR)$(libdir)/libhts.dylib - ln -sf libhts.$(PACKAGE_VERSION).dylib $(DESTDIR)$(libdir)/libhts.$(LIBHTS_SOVERSION).dylib - -# Substitute these pseudo-autoconf variables only at install time -# so that "make install prefix=/prefix/path" etc continue to work. -install-pkgconfig: htslib.pc.tmp installdirs - sed -e 's#@-includedir@#$(includedir)#g;s#@-libdir@#$(libdir)#g;s#@-PACKAGE_VERSION@#$(PACKAGE_VERSION)#g' htslib.pc.tmp > $(DESTDIR)$(pkgconfigdir)/htslib.pc - chmod 644 $(DESTDIR)$(pkgconfigdir)/htslib.pc - -# A pkg-config file (suitable for copying to $PKG_CONFIG_PATH) that provides -# flags for building against the uninstalled library in this build directory. -htslib-uninstalled.pc: htslib.pc.tmp - sed -e 's#@-includedir@#'`pwd`'#g;s#@-libdir@#'`pwd`'#g' htslib.pc.tmp > $@ - - -testclean: - -rm -f test/*.tmp test/*.tmp.* test/faidx/*.tmp* test/faidx/FAIL* \ - test/longrefs/*.tmp.* test/tabix/*.tmp.* test/tabix/FAIL* \ - header-exports.txt shlib-exports-$(SHLIB_FLAVOUR).txt - -rm -rf htscodecs/tests/test.out - -# Only remove this in git checkouts -DEL_HTSCODECS_VERSION := $(if $(wildcard htscodecs/.git),htscodecs/htscodecs/version.h) - -mostlyclean: testclean - -rm -f *.o *.pico cram/*.o cram/*.pico test/*.o test/*.dSYM config_vars.h version.h - -rm -f htscodecs/htscodecs/*.o htscodecs/htscodecs/*.pico $(DEL_HTSCODECS_VERSION) - -rm -f hts-object-files - -rm -f htscodecs/tests/*.o - -clean: mostlyclean clean-$(SHLIB_FLAVOUR) - -rm -f libhts.a $(BUILT_PROGRAMS) $(BUILT_PLUGINS) $(BUILT_TEST_PROGRAMS) $(BUILT_THRASH_PROGRAMS) - -rm -f htscodecs/tests/rans4x8 htscodecs/tests/rans4x16pr htscodecs/tests/arith_dynamic htscodecs/tests/tokenise_name3 htscodecs/tests/fqzcomp_qual htscodecs/tests/varint - -distclean maintainer-clean: clean - -rm -f config.cache config.h config.log config.mk config.status - -rm -f TAGS *.pc.tmp *-uninstalled.pc htslib_static.mk htscodecs.mk - -rm -rf autom4te.cache - -clean-so: - -rm -f libhts.so libhts.so.* - -clean-cygdll: - -rm -f cyghts-*.dll libhts.dll.a - -clean-dll: - -rm -f hts-*.dll hts.dll.a - -clean-dylib: - -rm -f libhts.dylib libhts.*.dylib - - -tags TAGS: - ctags -f TAGS *.[ch] cram/*.[ch] htslib/*.h - -# We recommend libhts-using programs be built against a separate htslib -# installation. However if you feel that you must bundle htslib source -# code with your program, this hook enables Automake-style "make dist" -# for this subdirectory. If you do bundle an htslib snapshot, please -# add identifying information to $(PACKAGE_VERSION) as appropriate. -# (The wildcards attempt to omit non-exported files (.git*, README.md, -# etc) and other detritus that might be in the top-level directory.) -distdir: - @if [ -z "$(distdir)" ]; then echo "Please supply a distdir=DIR argument."; false; fi - tar -c *.[ch15] [ILMNRchtv]*[ELSbcekmnth] | (cd $(distdir) && tar -x) - +cd $(distdir) && $(MAKE) distclean - -force: - - -.PHONY: all check check-untracked clean distclean distdir force -.PHONY: install install-pkgconfig installdirs lib-shared lib-static -.PHONY: maintainer-check maintainer-clean mostlyclean plugins -.PHONY: print-config print-version show-version tags -.PHONY: test test-shlib-exports test_thrash testclean -.PHONY: clean-so install-so -.PHONY: clean-cygdll install-cygdll -.PHONY: clean-dll install-dll -.PHONY: clean-dylib install-dylib -.PHONY: test_htscodecs_rans4x8 test_htscodecs_rans4x16 test_htscodecs_arith -.PHONY: test_htscodecs_tok3 test_htscodecs_fqzcomp test_htscodecs_varint diff --git a/src/htslib-1.19.1/Makefile.vcfppR b/src/htslib-1.19.1/Makefile.vcfppR deleted file mode 100644 index f535348..0000000 --- a/src/htslib-1.19.1/Makefile.vcfppR +++ /dev/null @@ -1,963 +0,0 @@ -# Makefile for htslib, a C library for high-throughput sequencing data formats. -# -# Copyright (C) 2013-2023 Genome Research Ltd. -# -# Author: John Marshall -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -CC := $(shell ${R_HOME}/bin/R CMD config CC) -AR := $(shell ${R_HOME}/bin/R CMD config AR) -RANLIB := $(shell ${R_HOME}/bin/R CMD config RANLIB) -CFLAGS := $(shell ${R_HOME}/bin/R CMD config CFLAGS) -CPPFLAGS := $(shell ${R_HOME}/bin/R CMD config CPPFLAGS) -LDFLAGS := $(shell ${R_HOME}/bin/R CMD config LDFLAGS) - -# Default libraries to link if configure is not used -htslib_default_libs = -lz -lm -lbz2 -llzma -lcurl - -CPPFLAGS = -# TODO: make the 64-bit support for VCF optional via configure, for now add -DVCF_ALLOW_INT64 -# to CFLAGS manually, here or in config.mk if the latter exists. -# TODO: probably update cram code to make it compile cleanly with -Wc++-compat -# For testing strict C99 support add -std=c99 -D_XOPEN_SOURCE=600 -#CFLAGS = -g -Wall -O2 -pedantic -std=c99 -D_XOPEN_SOURCE=600 -CFLAGS += -fpic -fvisibility=hidden -Wstrict-prototypes -EXTRA_CFLAGS_PIC = -TARGET_CFLAGS = -LDFLAGS += -fvisibility=hidden -VERSION_SCRIPT_LDFLAGS = -Wl,-version-script,$(srcprefix)htslib.map -LIBS = $(htslib_default_libs) - -prefix = /usr/local -exec_prefix = $(prefix) -bindir = $(exec_prefix)/bin -includedir = $(prefix)/include -libdir = $(exec_prefix)/lib -libexecdir = $(exec_prefix)/libexec -datarootdir = $(prefix)/share -mandir = $(datarootdir)/man -man1dir = $(mandir)/man1 -man5dir = $(mandir)/man5 -man7dir = $(mandir)/man7 -pkgconfigdir= $(libdir)/pkgconfig - -MKDIR_P = mkdir -p -INSTALL = install -p -INSTALL_DATA = $(INSTALL) -m 644 -INSTALL_DIR = $(MKDIR_P) -m 755 -LIB_PERM = 644 -INSTALL_LIB = $(INSTALL) -m $(LIB_PERM) -INSTALL_MAN = $(INSTALL_DATA) -INSTALL_PROGRAM = $(INSTALL) - -# Set by config.mk if plugins are enabled -plugindir = - -BUILT_PROGRAMS = \ - bgzip \ - htsfile \ - tabix - -BUILT_TEST_PROGRAMS = \ - test/hts_endian \ - test/fieldarith \ - test/hfile \ - test/pileup \ - test/pileup_mod \ - test/plugins-dlhts \ - test/sam \ - test/test_bgzf \ - test/test_expr \ - test/test_faidx \ - test/test_kfunc \ - test/test_kstring \ - test/test_mod \ - test/test_realn \ - test/test-regidx \ - test/test_str2int \ - test/test_time_funcs \ - test/test_view \ - test/test_index \ - test/test-vcf-api \ - test/test-vcf-sweep \ - test/test-bcf-sr \ - test/fuzz/hts_open_fuzzer.o \ - test/test-bcf-translate \ - test/test-parse-reg \ - test/test_introspection \ - test/test-bcf_set_variant_type - -BUILT_THRASH_PROGRAMS = \ - test/thrash_threads1 \ - test/thrash_threads2 \ - test/thrash_threads3 \ - test/thrash_threads4 \ - test/thrash_threads5 \ - test/thrash_threads6 \ - test/thrash_threads7 - -all: lib-static lib-shared $(BUILT_PROGRAMS) plugins $(BUILT_TEST_PROGRAMS) \ - htslib_static.mk htslib-uninstalled.pc - -ALL_CPPFLAGS = -I. $(CPPFLAGS) - -# Usually htscodecs.mk is generated by running configure or config.status, -# but if those aren't used create a default here. -htscodecs.mk: - echo '# Default htscodecs.mk generated by Makefile' > $@ - echo 'include $$(HTSPREFIX)htscodecs_bundled.mk' >> $@ - $(srcdir)/hts_probe_cc.sh '$(CC)' '$(CFLAGS) $(CPPFLAGS)' '$(LDFLAGS)' >> $@ - -srcdir = . -srcprefix = -HTSPREFIX = - -# Flags for SIMD code -HTS_CFLAGS_AVX2 = -HTS_CFLAGS_AVX512 = -HTS_CFLAGS_SSE4 = - -# Control building of SIMD code. Not used if configure has been run. -HTS_BUILD_AVX2 = -HTS_BUILD_AVX512 = -HTS_BUILD_SSSE3 = -HTS_BUILD_POPCNT = -HTS_BUILD_SSE4_1 = - -include htslib_vars.mk -include htscodecs.mk - -# If not using GNU make, you need to copy the version number from version.sh -# into here. -PACKAGE_VERSION := $(shell $(srcdir)/version.sh) - -LIBHTS_SOVERSION = 3 - -# Version numbers for the Mac dynamic library. Note that the leading 3 -# is not strictly necessary and should be removed the next time -# LIBHTS_SOVERSION is bumped (see #1144 and -# https://developer.apple.com/library/archive/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html#//apple_ref/doc/uid/TP40002013-SW23) -MACH_O_COMPATIBILITY_VERSION = 3.1.18 -MACH_O_CURRENT_VERSION = 3.1.18 - -# $(NUMERIC_VERSION) is for items that must have a numeric X.Y.Z string -# even if this is a dirty or untagged Git working tree. -NUMERIC_VERSION := $(shell $(srcdir)/version.sh numeric) - -# Force version.h to be remade if $(PACKAGE_VERSION) has changed. -version.h: $(if $(wildcard version.h),$(if $(findstring "$(PACKAGE_VERSION)",$(shell cat version.h)),,force)) - -version.h: - echo '#define HTS_VERSION_TEXT "$(PACKAGE_VERSION)"' > $@ - -print-version: - @echo $(PACKAGE_VERSION) - -show-version: - @echo PACKAGE_VERSION = $(PACKAGE_VERSION) - @echo NUMERIC_VERSION = $(NUMERIC_VERSION) - -config_vars.h: override escape=$(subst ',\x27,$(subst ",\",$(subst \,\\,$(1)))) -config_vars.h: override hts_cc_escaped=$(call escape,$(CC)) -config_vars.h: override hts_cppflags_escaped=$(call escape,$(CPPFLAGS)) -config_vars.h: override hts_cflags_escaped=$(call escape,$(CFLAGS)) -config_vars.h: override hts_ldflags_escaped=$(call escape,$(LDFLAGS)) -config_vars.h: override hts_libs_escaped=$(call escape,$(LIBS)) - -config_vars.h: - printf '#define HTS_CC "%s"\n#define HTS_CPPFLAGS "%s"\n#define HTS_CFLAGS "%s"\n#define HTS_LDFLAGS "%s"\n#define HTS_LIBS "%s"\n' \ - '$(hts_cc_escaped)' \ - '$(hts_cppflags_escaped)' \ - '$(hts_cflags_escaped)' \ - '$(hts_ldflags_escaped)' \ - '$(hts_libs_escaped)' > $@ - -.SUFFIXES: .bundle .c .cygdll .dll .o .pico .so - -.c.o: - $(CC) $(CFLAGS) $(TARGET_CFLAGS) $(ALL_CPPFLAGS) -c -o $@ $< - -.c.pico: - $(CC) $(CFLAGS) $(TARGET_CFLAGS) $(ALL_CPPFLAGS) $(EXTRA_CFLAGS_PIC) -c -o $@ $< - - -LIBHTS_OBJS = \ - kfunc.o \ - kstring.o \ - bcf_sr_sort.o \ - bgzf.o \ - errmod.o \ - faidx.o \ - header.o \ - hfile.o \ - hts.o \ - hts_expr.o \ - hts_os.o\ - md5.o \ - multipart.o \ - probaln.o \ - realn.o \ - regidx.o \ - region.o \ - sam.o \ - sam_mods.o \ - synced_bcf_reader.o \ - vcf_sweep.o \ - tbx.o \ - textutils.o \ - thread_pool.o \ - vcf.o \ - vcfutils.o \ - cram/cram_codecs.o \ - cram/cram_decode.o \ - cram/cram_encode.o \ - cram/cram_external.o \ - cram/cram_index.o \ - cram/cram_io.o \ - cram/cram_stats.o \ - cram/mFILE.o \ - cram/open_trace_file.o \ - cram/pooled_alloc.o \ - cram/string_alloc.o \ - $(HTSCODECS_OBJS) \ - $(NONCONFIGURE_OBJS) - -# Without configure we wish to have a rich set of default figures, -# but we still need conditional inclusion as we wish to still -# support ./configure --disable-blah. -NONCONFIGURE_OBJS = hfile_libcurl.o - -PLUGIN_EXT = -PLUGIN_OBJS = - -cram_h = cram/cram.h $(cram_samtools_h) $(header_h) $(cram_structs_h) $(cram_io_h) cram/cram_encode.h cram/cram_decode.h cram/cram_stats.h cram/cram_codecs.h cram/cram_index.h $(htslib_cram_h) -cram_io_h = cram/cram_io.h $(cram_misc_h) -cram_misc_h = cram/misc.h -cram_os_h = cram/os.h $(htslib_hts_endian_h) -cram_samtools_h = cram/cram_samtools.h $(htslib_sam_h) -cram_structs_h = cram/cram_structs.h $(htslib_thread_pool_h) $(htslib_cram_h) cram/string_alloc.h cram/mFILE.h $(htslib_khash_h) -cram_open_trace_file_h = cram/open_trace_file.h cram/mFILE.h -bcf_sr_sort_h = bcf_sr_sort.h $(htslib_synced_bcf_reader_h) $(htslib_kbitset_h) -header_h = header.h cram/string_alloc.h cram/pooled_alloc.h $(htslib_khash_h) $(htslib_kstring_h) $(htslib_sam_h) -hfile_internal_h = hfile_internal.h $(htslib_hts_defs_h) $(htslib_hfile_h) $(textutils_internal_h) -hts_internal_h = hts_internal.h $(htslib_hts_h) $(textutils_internal_h) -hts_time_funcs_h = hts_time_funcs.h -sam_internal_h = sam_internal.h $(htslib_sam_h) -textutils_internal_h = textutils_internal.h $(htslib_kstring_h) -thread_pool_internal_h = thread_pool_internal.h $(htslib_thread_pool_h) - -# To be effective, config.mk needs to appear after most Makefile variables are -# set but before most rules appear, so that it can both use previously-set -# variables in its own rules' prerequisites and also update variables for use -# in later rules' prerequisites. - -# If your make doesn't accept -include, change this to 'include' if you are -# using the configure script or just comment the line out if you are not. --include config.mk - -# Usually config.h is generated by running configure or config.status, -# but if those aren't used create a default config.h here. -config.h: - echo '/* Default config.h generated by Makefile */' > $@ - echo '#ifndef _XOPEN_SOURCE' >> $@ - echo '#define _XOPEN_SOURCE 600' >> $@ - echo '#endif' >> $@ - echo '#define HAVE_LIBBZ2 1' >> $@ - echo '#define HAVE_LIBLZMA 1' >> $@ - echo '#ifndef __APPLE__' >> $@ - echo '#define HAVE_LZMA_H 1' >> $@ - echo '#endif' >> $@ - echo '#define HAVE_DRAND48 1' >> $@ - echo '#define HAVE_LIBCURL 1' >> $@ - if [ "x$(HTS_BUILD_SSE4)" != "x" ]; then \ - echo '#if defined(HTS_ALLOW_UNALIGNED) && HTS_ALLOW_UNALIGNED == 0' >> $@ ; \ - echo '#define UBSAN 1' >> $@ ; \ - echo '#endif' >> $@ ; \ - fi - if [ "x$(HTS_BUILD_AVX2)" != "x" ] ; then \ - echo '#define HAVE_AVX2 1' >> $@ ; \ - fi - if [ "x$(HTS_BUILD_AVX512)" != "x" ] ; then \ - echo '#define HAVE_AVX512 1' >> $@ ; \ - fi - -# And similarly for htslib.pc.tmp ("pkg-config template"). No dependency -# on htslib.pc.in listed, as if that file is newer the usual way to regenerate -# this target is via configure or config.status rather than this rule. -htslib.pc.tmp: - sed -e '/^static_libs=/s/@static_LIBS@/$(htslib_default_libs)/;s#@[^-][^@]*@##g' $(srcprefix)htslib.pc.in > $@ - -# Create a makefile fragment listing the libraries and LDFLAGS needed for -# static linking. This can be included by projects that want to build -# and link against the htslib source tree instead of an installed library. -htslib_static.mk: htslib.pc.tmp - sed -n '/^static_libs=/s/[^=]*=/HTSLIB_static_LIBS = /p;/^static_ldflags=/s/[^=]*=/HTSLIB_static_LDFLAGS = /p' $< > $@ - - -lib-static: libhts.a - -# $(shell), :=, and ifeq/.../endif are GNU Make-specific. If you don't have -# GNU Make, comment out the parts of these conditionals that don't apply. -ifneq "$(origin PLATFORM)" "file" -PLATFORM := $(shell uname -s) -endif -ifeq "$(PLATFORM)" "Darwin" -SHLIB_FLAVOUR = dylib -lib-shared: libhts.dylib -else ifeq "$(findstring CYGWIN,$(PLATFORM))" "CYGWIN" -SHLIB_FLAVOUR = cygdll -lib-shared: cyghts-$(LIBHTS_SOVERSION).dll -else ifeq "$(findstring MSYS,$(PLATFORM))" "MSYS" -SHLIB_FLAVOUR = dll -lib-shared: hts-$(LIBHTS_SOVERSION).dll hts-$(LIBHTS_SOVERSION).def hts-$(LIBHTS_SOVERSION).lib -else ifeq "$(findstring MINGW,$(PLATFORM))" "MINGW" -SHLIB_FLAVOUR = dll -lib-shared: hts-$(LIBHTS_SOVERSION).dll hts-$(LIBHTS_SOVERSION).def hts-$(LIBHTS_SOVERSION).lib -else -SHLIB_FLAVOUR = so -lib-shared: libhts.so -endif - -BUILT_PLUGINS = $(PLUGIN_OBJS:.o=$(PLUGIN_EXT)) - -ifneq "$(BUILT_PLUGINS)" "" -plugins: lib-shared -endif -plugins: $(BUILT_PLUGINS) - - -libhts.a: $(LIBHTS_OBJS) - @-rm -f $@ - $(AR) -rc $@ $(LIBHTS_OBJS) - -$(RANLIB) $@ - -print-config: - @echo HTS_CFLAGS_AVX2 = $(HTS_CFLAGS_AVX2) - @echo HTS_CFLAGS_AVX512 = $(HTS_CFLAGS_AVX512) - @echo HTS_CFLAGS_SSE4 = $(HTS_CFLAGS_SSE4) - @echo HTS_HAVE_NEON = $(HTS_HAVE_NEON) - @echo LDFLAGS = $(LDFLAGS) - @echo LIBHTS_OBJS = $(LIBHTS_OBJS) - @echo LIBS = $(LIBS) - @echo PLATFORM = $(PLATFORM) - -# The target here is libhts.so, as that is the built file that other rules -# depend upon and that is used when -lhts appears in other program's recipes. -# As a byproduct invisible to make, libhts.so.NN is also created, as it is the -# file used at runtime (when $LD_LIBRARY_PATH includes the build directory). - -libhts.so: $(LIBHTS_OBJS:.o=.pico) - $(CC) -shared -Wl,-soname,libhts.so.$(LIBHTS_SOVERSION) $(VERSION_SCRIPT_LDFLAGS) $(LDFLAGS) -o $@ $(LIBHTS_OBJS:.o=.pico) $(LIBS) -lpthread - ln -sf $@ libhts.so.$(LIBHTS_SOVERSION) - -# Similarly this also creates libhts.NN.dylib as a byproduct, so that programs -# when run can find this uninstalled shared library (when $DYLD_LIBRARY_PATH -# includes this project's build directory). - -libhts.dylib: $(LIBHTS_OBJS) - $(CC) -dynamiclib -install_name $(libdir)/libhts.$(LIBHTS_SOVERSION).dylib -current_version $(MACH_O_CURRENT_VERSION) -compatibility_version $(MACH_O_COMPATIBILITY_VERSION) $(LDFLAGS) -o $@ $(LIBHTS_OBJS) $(LIBS) - ln -sf $@ libhts.$(LIBHTS_SOVERSION).dylib - -cyghts-$(LIBHTS_SOVERSION).dll libhts.dll.a: $(LIBHTS_OBJS) - $(CC) -shared -Wl,--out-implib=libhts.dll.a -Wl,--enable-auto-import $(LDFLAGS) -o $@ -Wl,--whole-archive $(LIBHTS_OBJS) -Wl,--no-whole-archive $(LIBS) -lpthread - -hts-$(LIBHTS_SOVERSION).dll hts.dll.a: $(LIBHTS_OBJS) - $(CC) -shared -Wl,--out-implib=hts.dll.a -Wl,--enable-auto-import -Wl,--exclude-all-symbols $(LDFLAGS) -o $@ -Wl,--whole-archive $(LIBHTS_OBJS) -Wl,--no-whole-archive $(LIBS) -lpthread - -hts-$(LIBHTS_SOVERSION).def: hts-$(LIBHTS_SOVERSION).dll - gendef hts-$(LIBHTS_SOVERSION).dll - -hts-$(LIBHTS_SOVERSION).lib: hts-$(LIBHTS_SOVERSION).def - dlltool -m i386:x86-64 -d hts-$(LIBHTS_SOVERSION).def -l hts-$(LIBHTS_SOVERSION).lib - -# Bundling libraries, binaries, dll dependencies, and licenses into a -# single directory. NB: This is not needed for end-users, but a test bed -# for maintainers building binary distributions. -# -# NOTE: only tested on the supported MSYS2/MINGW64 environment. -dist-windows: DESTDIR= -dist-windows: prefix=dist-windows -dist-windows: install - cp hts-$(LIBHTS_SOVERSION).def hts-$(LIBHTS_SOVERSION).lib dist-windows/lib - cp `ldd hts-$(LIBHTS_SOVERSION).dll| awk '/mingw64/ {print $$3}'` dist-windows/bin - mkdir -p dist-windows/share/licenses/htslib - -cp -r /mingw64/share/licenses/mingw-w64-libraries \ - /mingw64/share/licenses/brotli \ - /mingw64/share/licenses/bzip2 \ - /mingw64/share/licenses/gcc-libs \ - /mingw64/share/licenses/libdeflate \ - /mingw64/share/licenses/libpsl \ - /mingw64/share/licenses/libtre \ - /mingw64/share/licenses/libwinpthread \ - /mingw64/share/licenses/openssl \ - /mingw64/share/licenses/xz \ - /mingw64/share/licenses/zlib \ - /mingw64/share/licenses/zstd \ - dist-windows/share/licenses/ - -cp -r /usr/share/licenses/curl \ - dist-windows/share/licenses/ - cp LICENSE dist-windows/share/licenses/htslib/ - - -# Target to allow htslib.mk to build all the object files before it -# links the shared and static libraries. -hts-object-files: $(LIBHTS_OBJS) - touch $@ - -# On Unix dlopen("libhts.so.NN", RTLD_LAZY) may default to RTLD_LOCAL. -# Hence plugins need to link to (shared) libhts.so.NN themselves, as they -# may not be able to access libhts symbols via the main program's libhts -# if that was dynamically loaded without an explicit RTLD_GLOBAL. -%.so: %.pico libhts.so - $(CC) -shared -Wl,-E $(LDFLAGS) -o $@ $< libhts.so $(LIBS) -lpthread - -# For programs *statically* linked to libhts.a, on macOS loading a plugin -# linked to a shared libhts.NN.dylib would lead to conflicting duplicate -# symbols. Fortunately macOS dlopen() defaults to RTLD_GLOBAL so there -# is less need for plugins to link back to libhts themselves. -%.bundle: %.o - $(CC) -bundle -Wl,-undefined,dynamic_lookup $(LDFLAGS) -o $@ $< $(LIBS) - -%.cygdll: %.o libhts.dll.a - $(CC) -shared $(LDFLAGS) -o $@ $< libhts.dll.a $(LIBS) - -%.dll: %.o hts.dll.a - $(CC) -shared $(LDFLAGS) -o $@ $< hts.dll.a $(LIBS) - - -bgzf.o bgzf.pico: bgzf.c config.h $(htslib_hts_h) $(htslib_bgzf_h) $(htslib_hfile_h) $(htslib_thread_pool_h) $(htslib_hts_endian_h) cram/pooled_alloc.h $(hts_internal_h) $(htslib_khash_h) -errmod.o errmod.pico: errmod.c config.h $(htslib_hts_h) $(htslib_ksort_h) $(htslib_hts_os_h) -kstring.o kstring.pico: kstring.c config.h $(htslib_kstring_h) -header.o header.pico: header.c config.h $(textutils_internal_h) $(header_h) -hfile.o hfile.pico: hfile.c config.h $(htslib_hfile_h) $(hfile_internal_h) $(htslib_kstring_h) $(hts_internal_h) $(htslib_khash_h) -hfile_gcs.o hfile_gcs.pico: hfile_gcs.c config.h $(htslib_hts_h) $(htslib_kstring_h) $(hfile_internal_h) -hfile_libcurl.o hfile_libcurl.pico: hfile_libcurl.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_h) -hfile_s3_write.o hfile_s3_write.pico: hfile_s3_write.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_h) -hfile_s3.o hfile_s3.pico: hfile_s3.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(hts_time_funcs_h) -hts.o hts.pico: hts.c config.h os/lzma_stub.h $(htslib_hts_h) $(htslib_bgzf_h) $(cram_h) $(htslib_hfile_h) $(htslib_hts_endian_h) version.h config_vars.h $(hts_internal_h) $(hfile_internal_h) $(sam_internal_h) $(htslib_hts_expr_h) $(htslib_hts_os_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_ksort_h) $(htslib_tbx_h) $(htscodecs_htscodecs_h) -hts_expr.o hts_expr.pico: hts_expr.c config.h $(htslib_hts_expr_h) $(htslib_hts_log_h) $(textutils_internal_h) -hts_os.o hts_os.pico: hts_os.c config.h $(htslib_hts_defs_h) os/rand.c -vcf.o vcf.pico: vcf.c config.h $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_hfile_h) $(hts_internal_h) $(htslib_khash_str2int_h) $(htslib_kstring_h) $(htslib_sam_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_hts_endian_h) -sam.o sam.pico: sam.c config.h $(htslib_hts_defs_h) $(htslib_sam_h) $(htslib_bgzf_h) $(cram_h) $(hts_internal_h) $(sam_internal_h) $(htslib_hfile_h) $(htslib_hts_endian_h) $(htslib_hts_expr_h) $(header_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_kstring_h) -sam_mods.o sam_mods.pico: sam_mods.c config.h $(htslib_sam_h) $(textutils_internal_h) -tbx.o tbx.pico: tbx.c config.h $(htslib_tbx_h) $(htslib_bgzf_h) $(htslib_hts_endian_h) $(hts_internal_h) $(htslib_khash_h) -faidx.o faidx.pico: faidx.c config.h $(htslib_bgzf_h) $(htslib_faidx_h) $(htslib_hfile_h) $(htslib_khash_h) $(htslib_kstring_h) $(hts_internal_h) -bcf_sr_sort.o bcf_sr_sort.pico: bcf_sr_sort.c config.h $(bcf_sr_sort_h) $(htslib_khash_str2int_h) $(htslib_kbitset_h) -synced_bcf_reader.o synced_bcf_reader.pico: synced_bcf_reader.c config.h $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(htslib_bgzf_h) $(htslib_thread_pool_h) $(bcf_sr_sort_h) -vcf_sweep.o vcf_sweep.pico: vcf_sweep.c config.h $(htslib_vcf_sweep_h) $(htslib_bgzf_h) -vcfutils.o vcfutils.pico: vcfutils.c config.h $(htslib_vcfutils_h) $(htslib_kbitset_h) -kfunc.o kfunc.pico: kfunc.c config.h $(htslib_kfunc_h) -regidx.o regidx.pico: regidx.c config.h $(htslib_hts_h) $(htslib_kstring_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(htslib_regidx_h) $(hts_internal_h) -region.o region.pico: region.c config.h $(htslib_hts_h) $(htslib_khash_h) -md5.o md5.pico: md5.c config.h $(htslib_hts_h) $(htslib_hts_endian_h) -multipart.o multipart.pico: multipart.c config.h $(htslib_kstring_h) $(hts_internal_h) $(hfile_internal_h) -plugin.o plugin.pico: plugin.c config.h $(hts_internal_h) $(htslib_kstring_h) -probaln.o probaln.pico: probaln.c config.h $(htslib_hts_h) -realn.o realn.pico: realn.c config.h $(htslib_hts_h) $(htslib_sam_h) -textutils.o textutils.pico: textutils.c config.h $(htslib_hfile_h) $(htslib_kstring_h) $(htslib_sam_h) $(hts_internal_h) - -cram/cram_codecs.o cram/cram_codecs.pico: cram/cram_codecs.c config.h $(htslib_hts_endian_h) $(htscodecs_varint_h) $(htscodecs_pack_h) $(htscodecs_rle_h) $(cram_h) -cram/cram_decode.o cram/cram_decode.pico: cram/cram_decode.c config.h $(cram_h) $(cram_os_h) $(htslib_hts_h) -cram/cram_encode.o cram/cram_encode.pico: cram/cram_encode.c config.h $(cram_h) $(cram_os_h) $(sam_internal_h) $(htslib_hts_h) $(htslib_hts_endian_h) $(textutils_internal_h) -cram/cram_external.o cram/cram_external.pico: cram/cram_external.c config.h $(htscodecs_rANS_static4x16_h) $(htslib_hfile_h) $(cram_h) -cram/cram_index.o cram/cram_index.pico: cram/cram_index.c config.h $(htslib_bgzf_h) $(htslib_hfile_h) $(hts_internal_h) $(cram_h) $(cram_os_h) -cram/cram_io.o cram/cram_io.pico: cram/cram_io.c config.h os/lzma_stub.h $(cram_h) $(cram_os_h) $(htslib_hts_h) $(cram_open_trace_file_h) $(htscodecs_rANS_static_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_arith_dynamic_h) $(htscodecs_tokenise_name3_h) $(htscodecs_fqzcomp_qual_h) $(htscodecs_varint_h) $(htslib_hfile_h) $(htslib_bgzf_h) $(htslib_faidx_h) $(hts_internal_h) -cram/cram_stats.o cram/cram_stats.pico: cram/cram_stats.c config.h $(cram_h) $(cram_os_h) -cram/mFILE.o cram/mFILE.pico: cram/mFILE.c config.h $(htslib_hts_log_h) $(cram_os_h) cram/mFILE.h -cram/open_trace_file.o cram/open_trace_file.pico: cram/open_trace_file.c config.h $(cram_os_h) $(cram_open_trace_file_h) $(cram_misc_h) $(htslib_hfile_h) $(htslib_hts_log_h) $(htslib_hts_h) -cram/pooled_alloc.o cram/pooled_alloc.pico: cram/pooled_alloc.c config.h cram/pooled_alloc.h $(cram_misc_h) -cram/string_alloc.o cram/string_alloc.pico: cram/string_alloc.c config.h cram/string_alloc.h -thread_pool.o thread_pool.pico: thread_pool.c config.h $(thread_pool_internal_h) $(htslib_hts_log_h) - -htscodecs/htscodecs/arith_dynamic.o htscodecs/htscodecs/arith_dynamic.pico: htscodecs/htscodecs/arith_dynamic.c config.h $(htscodecs_arith_dynamic_h) $(htscodecs_varint_h) $(htscodecs_pack_h) $(htscodecs_utils_h) $(htscodecs_c_simple_model_h) -htscodecs/htscodecs/fqzcomp_qual.o htscodecs/htscodecs/fqzcomp_qual.pico: htscodecs/htscodecs/fqzcomp_qual.c config.h $(htscodecs_fqzcomp_qual_h) $(htscodecs_varint_h) $(htscodecs_utils_h) $(htscodecs_c_simple_model_h) -htscodecs/htscodecs/htscodecs.o htscodecs/htscodecs/htscodecs.pico: htscodecs/htscodecs/htscodecs.c $(htscodecs_htscodecs_h) $(htscodecs_version_h) -htscodecs/htscodecs/pack.o htscodecs/htscodecs/pack.pico: htscodecs/htscodecs/pack.c config.h $(htscodecs_pack_h) -htscodecs/htscodecs/rANS_static32x16pr.o htscodecs/htscodecs/rANS_static32x16pr.pico: htscodecs/htscodecs/rANS_static32x16pr.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/rANS_static32x16pr_avx2.o htscodecs/htscodecs/rANS_static32x16pr_avx2.pico: htscodecs/htscodecs/rANS_static32x16pr_avx2.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) $(htscodecs_permute_h) -htscodecs/htscodecs/rANS_static32x16pr_avx512.o htscodecs/htscodecs/rANS_static32x16pr_avx512.pico: htscodecs/htscodecs/rANS_static32x16pr_avx512.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/rANS_static32x16pr_neon.o htscodecs/htscodecs/rANS_static32x16pr_neon.pico: htscodecs/htscodecs/rANS_static32x16pr_neon.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/rANS_static32x16pr_sse4.o htscodecs/htscodecs/rANS_static32x16pr_sse4.pico: htscodecs/htscodecs/rANS_static32x16pr_sse4.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/rANS_static4x16pr.o htscodecs/htscodecs/rANS_static4x16pr.pico: htscodecs/htscodecs/rANS_static4x16pr.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_pack_h) $(htscodecs_rle_h) $(htscodecs_utils_h) $(htscodecs_rANS_static32x16pr_h) -htscodecs/htscodecs/rANS_static.o htscodecs/htscodecs/rANS_static.pico: htscodecs/htscodecs/rANS_static.c config.h $(htscodecs_rANS_byte_h) $(htscodecs_utils_h) $(htscodecs_rANS_static_h) -htscodecs/htscodecs/rle.o htscodecs/htscodecs/rle.pico: htscodecs/htscodecs/rle.c config.h $(htscodecs_varint_h) $(htscodecs_rle_h) -htscodecs/htscodecs/tokenise_name3.o htscodecs/htscodecs/tokenise_name3.pico: htscodecs/htscodecs/tokenise_name3.c config.h $(htscodecs_pooled_alloc_h) $(htscodecs_arith_dynamic_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_tokenise_name3_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/utils.o htscodecs/htscodecs/utils.pico: htscodecs/htscodecs/utils.c config.h $(htscodecs_utils_h) - -# Extra CFLAGS for specific files -htscodecs/htscodecs/rANS_static32x16pr_avx2.o htscodecs/htscodecs/rANS_static32x16pr_avx2.pico: TARGET_CFLAGS = $(HTS_CFLAGS_AVX2) -htscodecs/htscodecs/rANS_static32x16pr_avx512.o htscodecs/htscodecs/rANS_static32x16pr_avx512.pico: TARGET_CFLAGS = $(HTS_CFLAGS_AVX512) -htscodecs/htscodecs/rANS_static32x16pr_sse4.o htscodecs/htscodecs/rANS_static32x16pr_sse4.pico: TARGET_CFLAGS = $(HTS_CFLAGS_SSE4) - -bgzip: bgzip.o libhts.a - $(CC) $(LDFLAGS) -o $@ bgzip.o libhts.a $(LIBS) -lpthread - -htsfile: htsfile.o libhts.a - $(CC) $(LDFLAGS) -o $@ htsfile.o libhts.a $(LIBS) -lpthread - -tabix: tabix.o libhts.a - $(CC) $(LDFLAGS) -o $@ tabix.o libhts.a $(LIBS) -lpthread - -bgzip.o: bgzip.c config.h $(htslib_bgzf_h) $(htslib_hts_h) $(htslib_hfile_h) -htsfile.o: htsfile.c config.h $(htslib_hfile_h) $(htslib_hts_h) $(htslib_sam_h) $(htslib_vcf_h) -tabix.o: tabix.c config.h $(htslib_tbx_h) $(htslib_sam_h) $(htslib_vcf_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(htslib_hts_h) $(htslib_regidx_h) $(htslib_hts_defs_h) $(htslib_hts_log_h) - -# Runes to check that the htscodecs submodule is present -ifdef HTSCODECS_SOURCES -htscodecs/htscodecs/%.c: | htscodecs/htscodecs - @if test -e htscodecs/.git && test ! -e "$@" ; then \ - echo "Missing file '$@'" ; \ - echo " - Do you need to update the htscodecs submodule?" ; \ - false ; \ - fi - -htscodecs/htscodecs/%.h: | htscodecs/htscodecs - @if test -e htscodecs/.git && test ! -e "$@" ; then \ - echo "Missing file '$@'" ; \ - echo " - Do you need to update the htscodecs submodule?" ; \ - false ; \ - fi - -htscodecs/htscodecs: - @if test -e .git ; then \ - printf "\\n\\nError: htscodecs submodule files not present for htslib.\\n\ - Try running: \\n\ - git submodule update --init --recursive\\n\ - in the top-level htslib directory and then re-run make.\\n\\n\\n" ; \ - else \ - printf "\\n\\nError: htscodecs submodule files not present and this is not a git checkout.\\n\ - You have an incomplete distribution. Please try downloading one of the\\n\ - official releases from https://www.htslib.org/\\n" ; \ - fi - @false - -# Build the htscodecs/htscodecs/version.h file if necessary -htscodecs/htscodecs/version.h: force - @if test -e $(srcdir)/htscodecs/.git && test -e $(srcdir)/htscodecs/configure.ac ; then \ - vers=`cd $(srcdir)/htscodecs && git describe --always --dirty --match 'v[0-9]\.[0-9]*'` && \ - case "$$vers" in \ - v*) vers=$${vers#v} ;; \ - *) iv=`awk '/^AC_INIT/ { match($$0, /^AC_INIT\(htscodecs, *([0-9](\.[0-9])*)\)/, m); print substr($$0, m[1, "start"], m[1, "length"]) }' $(srcdir)/htscodecs/configure.ac` ; vers="$$iv$${vers:+-g$$vers}" ;; \ - esac ; \ - if ! grep -s -q '"'"$$vers"'"' $@ ; then \ - echo 'Updating $@ : #define HTSCODECS_VERSION_TEXT "'"$$vers"'"' ; \ - echo '#define HTSCODECS_VERSION_TEXT "'"$$vers"'"' > $@ ; \ - fi ; \ - fi -endif - -# Maintainer source code checks -# - copyright boilerplate presence -# - tab and trailing space detection -maintainer-check: - test/maintainer/check_copyright.pl . - test/maintainer/check_spaces.pl . - -# Look for untracked files in the git repository. -check-untracked: - @if test -e .git && git status --porcelain | grep '^\?'; then \ - echo 'Untracked files detected (see above). Please either clean up, add to .gitignore, or for test output files consider naming them to match *.tmp or *.tmp.*' ; \ - false ; \ - fi - -# Create a shorthand. We use $(SRC) or $(srcprefix) rather than $(srcdir)/ -# for brevity in test and install rules, and so that build logs do not have -# ./ sprinkled throughout. -SRC = $(srcprefix) - -# For tests that might use it, set $REF_PATH explicitly to use only reference -# areas within the test suite (or set it to ':' to use no reference areas). -# -# If using MSYS, avoid poor shell expansion via: -# MSYS2_ARG_CONV_EXCL="*" make check -check test: all $(HTSCODECS_TEST_TARGETS) - test/hts_endian - test/test_expr - test/test_kfunc - test/test_kstring - test/test_str2int - test/test_time_funcs - test/fieldarith test/fieldarith.sam - test/hfile - if test "x$(BUILT_PLUGINS)" != "x"; then \ - HTS_PATH=. test/with-shlib.sh test/plugins-dlhts -g ./libhts.$(SHLIB_FLAVOUR); \ - fi - if test "x$(BUILT_PLUGINS)" != "x"; then \ - HTS_PATH=. test/with-shlib.sh test/plugins-dlhts -l ./libhts.$(SHLIB_FLAVOUR); \ - fi - test/test_bgzf test/bgziptest.txt - test/test-parse-reg -t test/colons.bam - cd test/faidx && ./test-faidx.sh faidx.tst - cd test/sam_filter && ./filter.sh filter.tst - cd test/tabix && ./test-tabix.sh tabix.tst - cd test/mpileup && ./test-pileup.sh mpileup.tst - cd test/fastq && ./test-fastq.sh - cd test/base_mods && ./base-mods.sh base-mods.tst - REF_PATH=: test/sam test/ce.fa test/faidx/faidx.fa test/faidx/fastqs.fq - test/test-regidx - cd test && REF_PATH=: ./test.pl $${TEST_OPTS:-} - -test/hts_endian: test/hts_endian.o - $(CC) $(LDFLAGS) -o $@ test/hts_endian.o $(LIBS) - -test/fuzz/hts_open_fuzzer: test/fuzz/hts_open_fuzzer.o - $(CC) $(LDFLAGS) -o $@ test/fuzz/hts_open_fuzzer.o libhts.a $(LIBS) -lpthread - -test/fieldarith: test/fieldarith.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/fieldarith.o libhts.a $(LIBS) -lpthread - -test/hfile: test/hfile.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/hfile.o libhts.a $(LIBS) -lpthread - -test/pileup: test/pileup.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/pileup.o libhts.a $(LIBS) -lpthread - -test/pileup_mod: test/pileup_mod.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/pileup_mod.o libhts.a $(LIBS) -lpthread - -test/plugins-dlhts: test/plugins-dlhts.o - $(CC) $(LDFLAGS) -o $@ test/plugins-dlhts.o $(LIBS) - -test/sam: test/sam.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/sam.o libhts.a $(LIBS) -lpthread - -test/test_bgzf: test/test_bgzf.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_bgzf.o libhts.a -lz $(LIBS) -lpthread - -test/test_expr: test/test_expr.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_expr.o libhts.a -lz $(LIBS) -lpthread - -test/test_faidx: test/test_faidx.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_faidx.o libhts.a -lz $(LIBS) -lpthread - -test/test_kfunc: test/test_kfunc.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_kfunc.o libhts.a -lz $(LIBS) -lpthread - -test/test_kstring: test/test_kstring.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_kstring.o libhts.a -lz $(LIBS) -lpthread - -test/test_mod: test/test_mod.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_mod.o libhts.a $(LIBS) -lpthread - -test/test_realn: test/test_realn.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_realn.o libhts.a $(LIBS) -lpthread - -test/test-regidx: test/test-regidx.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-regidx.o libhts.a $(LIBS) -lpthread - -test/test-parse-reg: test/test-parse-reg.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-parse-reg.o libhts.a $(LIBS) -lpthread - -test/test_str2int: test/test_str2int.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_str2int.o libhts.a $(LIBS) -lpthread - -test/test_time_funcs: test/test_time_funcs.o - $(CC) $(LDFLAGS) -o $@ test/test_time_funcs.o - -test/test_view: test/test_view.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_view.o libhts.a $(LIBS) -lpthread - -test/test_index: test/test_index.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_index.o libhts.a $(LIBS) -lpthread - -test/test-vcf-api: test/test-vcf-api.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-vcf-api.o libhts.a $(LIBS) -lpthread - -test/test-vcf-sweep: test/test-vcf-sweep.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-vcf-sweep.o libhts.a $(LIBS) -lpthread - -test/test-bcf-sr: test/test-bcf-sr.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-bcf-sr.o libhts.a -lz $(LIBS) -lpthread - -test/test-bcf-translate: test/test-bcf-translate.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-bcf-translate.o libhts.a -lz $(LIBS) -lpthread - -test/test_introspection: test/test_introspection.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_introspection.o libhts.a $(LIBS) -lpthread - -test/test-bcf_set_variant_type: test/test-bcf_set_variant_type.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-bcf_set_variant_type.o libhts.a $(LIBS) -lpthread - -# Extra tests for bundled htscodecs -test_htscodecs_rans4x8: htscodecs/tests/rans4x8 - cd htscodecs/tests && srcdir=. && export srcdir && ./rans4x8.test - -test_htscodecs_rans4x16: htscodecs/tests/rans4x16pr - cd htscodecs/tests && srcdir=. && export srcdir && ./rans4x16.test - -test_htscodecs_arith: htscodecs/tests/arith_dynamic - cd htscodecs/tests && srcdir=. && export srcdir && ./arith.test - -test_htscodecs_tok3: htscodecs/tests/tokenise_name3 - cd htscodecs/tests && srcdir=. && export srcdir && ./tok3.test - -test_htscodecs_fqzcomp: htscodecs/tests/fqzcomp_qual - cd htscodecs/tests && srcdir=. && export srcdir && ./fqzcomp.test - -test_htscodecs_varint: htscodecs/tests/varint - cd htscodecs/tests && ./varint - -htscodecs/tests/arith_dynamic: htscodecs/tests/arith_dynamic_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/fqzcomp_qual: htscodecs/tests/fqzcomp_qual_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/rans4x16pr: htscodecs/tests/rANS_static4x16pr_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/rans4x8: htscodecs/tests/rANS_static_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/tokenise_name3: htscodecs/tests/tokenise_name3_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/varint: htscodecs/tests/varint_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/arith_dynamic_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/arith_dynamic_test.o: htscodecs/tests/arith_dynamic_test.c config.h $(htscodecs_arith_dynamic_h) -htscodecs/tests/fqzcomp_qual_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/fqzcomp_qual_test.o: htscodecs/tests/fqzcomp_qual_test.c config.h $(htscodecs_fqzcomp_qual_h) $(htscodecs_varint_h) -htscodecs/tests/rANS_static4x16pr_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/rANS_static4x16pr_test.o: htscodecs/tests/rANS_static4x16pr_test.c config.h $(htscodecs_rANS_static4x16_h) -htscodecs/tests/rANS_static_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/rANS_static_test.o: htscodecs/tests/rANS_static_test.c config.h $(htscodecs_rANS_static_h) -htscodecs/tests/tokenise_name3_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/tokenise_name3_test.o: htscodecs/tests/tokenise_name3_test.c config.h $(htscodecs_tokenise_name3_h) -htscodecs/tests/varint_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/varint_test.o: htscodecs/tests/varint_test.c config.h $(htscodecs_varint_h) - -test/hts_endian.o: test/hts_endian.c config.h $(htslib_hts_endian_h) -test/fuzz/hts_open_fuzzer.o: test/fuzz/hts_open_fuzzer.c config.h $(htslib_hfile_h) $(htslib_hts_h) $(htslib_sam_h) $(htslib_vcf_h) -test/fieldarith.o: test/fieldarith.c config.h $(htslib_sam_h) -test/hfile.o: test/hfile.c config.h $(htslib_hfile_h) $(htslib_hts_defs_h) $(htslib_kstring_h) -test/pileup.o: test/pileup.c config.h $(htslib_sam_h) $(htslib_kstring_h) -test/pileup_mod.o: test/pileup_mod.c config.h $(htslib_sam_h) -test/plugins-dlhts.o: test/plugins-dlhts.c config.h -test/sam.o: test/sam.c config.h $(htslib_hts_defs_h) $(htslib_sam_h) $(htslib_faidx_h) $(htslib_khash_h) $(htslib_hts_log_h) -test/test_bgzf.o: test/test_bgzf.c config.h $(htslib_bgzf_h) $(htslib_hfile_h) $(htslib_hts_log_h) $(hfile_internal_h) -test/test_expr.o: test/test_expr.c config.h $(htslib_hts_expr_h) -test/test_kfunc.o: test/test_kfunc.c config.h $(htslib_kfunc_h) -test/test_kstring.o: test/test_kstring.c config.h $(htslib_kstring_h) -test/test_mod.o: test/test_mod.c config.h $(htslib_sam_h) -test/test-parse-reg.o: test/test-parse-reg.c config.h $(htslib_hts_h) $(htslib_sam_h) -test/test_realn.o: test/test_realn.c config.h $(htslib_hts_h) $(htslib_sam_h) $(htslib_faidx_h) -test/test-regidx.o: test/test-regidx.c config.h $(htslib_kstring_h) $(htslib_regidx_h) $(htslib_hts_defs_h) $(textutils_internal_h) -test/test_str2int.o: test/test_str2int.c config.h $(textutils_internal_h) -test/test_time_funcs.o: test/test_time_funcs.c config.h $(hts_time_funcs_h) -test/test_view.o: test/test_view.c config.h $(cram_h) $(htslib_sam_h) $(htslib_vcf_h) $(htslib_hts_log_h) -test/test_faidx.o: test/test_faidx.c config.h $(htslib_faidx_h) -test/test_index.o: test/test_index.c config.h $(htslib_sam_h) $(htslib_vcf_h) -test/test-vcf-api.o: test/test-vcf-api.c config.h $(htslib_hts_h) $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_kseq_h) -test/test-vcf-sweep.o: test/test-vcf-sweep.c config.h $(htslib_vcf_sweep_h) -test/test-bcf-sr.o: test/test-bcf-sr.c config.h $(htslib_synced_bcf_reader_h) $(htslib_hts_h) $(htslib_vcf_h) -test/test-bcf-translate.o: test/test-bcf-translate.c config.h $(htslib_vcf_h) -test/test_introspection.o: test/test_introspection.c config.h $(htslib_hts_h) $(htslib_hfile_h) -test/test-bcf_set_variant_type.o: test/test-bcf_set_variant_type.c config.h $(htslib_hts_h) vcf.c - - -test/thrash_threads1: test/thrash_threads1.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads1.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads2: test/thrash_threads2.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads2.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads3: test/thrash_threads3.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads3.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads4: test/thrash_threads4.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads4.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads5: test/thrash_threads5.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads5.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads6: test/thrash_threads6.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads6.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads7: test/thrash_threads7.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads7.o libhts.a -lz $(LIBS) -lpthread - -test_thrash: $(BUILT_THRASH_PROGRAMS) - -# Test to ensure the functions in the header files are exported by the shared -# library. This currently works by comparing the output from ctags on -# the headers with the list of functions exported by the shared library. -# Note that functions marked as exported in the .c files and not the public -# headers will be missed by this test. -test-shlib-exports: header-exports.txt shlib-exports-$(SHLIB_FLAVOUR).txt - @echo "Checking shared library exports" - @if test ! -s header-exports.txt ; then echo "Error: header-exports.txt empty" ; false ; fi - @if test ! -s shlib-exports-$(SHLIB_FLAVOUR).txt ; then echo "Error: shlib-exports-$(SHLIB_FLAVOUR).txt empty" ; false ; fi - @! comm -23 header-exports.txt shlib-exports-$(SHLIB_FLAVOUR).txt | grep . || \ - ( echo "Error: Found unexported symbols (listed above)" ; false ) - -# Extract symbols that should be exported from public headers using ctags -# Filter out macros in htslib/hts_defs.h. -header-exports.txt: test/header_syms.pl htslib/*.h - test/header_syms.pl htslib/*.h | sort -u -o $@ - -shlib-exports-so.txt: libhts.so - nm -D -g libhts.so | awk '$$2 == "T" { sub("@.*", "", $$3); print $$3 }' | sort -u -o $@ - -shlib-exports-dylib.txt: libhts.dylib - nm -Ug libhts.dylib | awk '$$2 == "T" { sub("^_", "", $$3); print $$3 }' | sort -u -o $@ - -shlib-exports-dll.txt: hts.dll.a - nm -g hts.dll.a | awk '$$2 == "T" { print $$3 }' | sort -u -o $@ - -$(srcprefix)htslib.map: libhts.so - LC_ALL=C ; export LC_ALL; \ - curr_vers=`expr 'X$(PACKAGE_VERSION)' : 'X\([0-9]*\.[0-9.]*\)'` ; \ - last_vers=`awk '/^HTSLIB_[0-9](\.[0-9]+)+/ { lv = $$1 } END { print lv }' htslib.map` ; \ - if test "x$$curr_vers" = 'x' || test "x$$last_vers" = 'x' ; then \ - echo "Version check failed : $$curr_vers / $$las_vers" 1>&2 ; \ - exit 1 ; \ - fi && \ - if test "HTSLIB_$$curr_vers" = "$$last_vers" ; then \ - echo "Refusing to update $@ - HTSlib version not changed" 1>&2 ; \ - exit 1 ; \ - fi && \ - nm --with-symbol-versions -D -g libhts.so | awk '$$2 ~ /^[DGRT]$$/ && $$3 ~ /@@Base$$/ && $$3 !~ /^(_init|_fini|_edata)@@/ { sub(/@@Base$$/, ";", $$3); print " " $$3 }' > $@.tmp && \ - if [ -s $@.tmp ] ; then \ - cat $@ > $@.new.tmp && \ - printf '\n%s {\n' "HTSLIB_$$curr_vers" >> $@.new.tmp && \ - cat $@.tmp >> $@.new.tmp && \ - printf '} %s;\n' "$$last_vers" >> $@.new.tmp && \ - rm -f $@.tmp && \ - mv $@.new.tmp $@ ; \ - fi ; \ - else \ - rm -f $@.tmp ; \ - fi - -install: libhts.a $(BUILT_PROGRAMS) $(BUILT_PLUGINS) installdirs install-$(SHLIB_FLAVOUR) install-pkgconfig - $(INSTALL_PROGRAM) $(BUILT_PROGRAMS) $(DESTDIR)$(bindir) - if test -n "$(BUILT_PLUGINS)"; then $(INSTALL_PROGRAM) $(BUILT_PLUGINS) $(DESTDIR)$(plugindir); fi - $(INSTALL_DATA) $(SRC)htslib/*.h $(DESTDIR)$(includedir)/htslib - $(INSTALL_DATA) libhts.a $(DESTDIR)$(libdir)/libhts.a - $(INSTALL_MAN) $(SRC)bgzip.1 $(SRC)htsfile.1 $(SRC)tabix.1 $(DESTDIR)$(man1dir) - $(INSTALL_MAN) $(SRC)faidx.5 $(SRC)sam.5 $(SRC)vcf.5 $(DESTDIR)$(man5dir) - $(INSTALL_MAN) $(SRC)htslib-s3-plugin.7 $(DESTDIR)$(man7dir) - -installdirs: - $(INSTALL_DIR) $(DESTDIR)$(bindir) $(DESTDIR)$(includedir) $(DESTDIR)$(includedir)/htslib $(DESTDIR)$(libdir) $(DESTDIR)$(man1dir) $(DESTDIR)$(man5dir) $(DESTDIR)$(man7dir) $(DESTDIR)$(pkgconfigdir) - if test -n "$(plugindir)"; then $(INSTALL_DIR) $(DESTDIR)$(plugindir); fi - -# After installation, the real file in $(libdir) will be libhts.so.X.Y.Z, -# with symlinks libhts.so (used via -lhts during linking of client programs) -# and libhts.so.NN (used by client executables at runtime). - -install-so: libhts.so installdirs - $(INSTALL_LIB) libhts.so $(DESTDIR)$(libdir)/libhts.so.$(PACKAGE_VERSION) - ln -sf libhts.so.$(PACKAGE_VERSION) $(DESTDIR)$(libdir)/libhts.so - ln -sf libhts.so.$(PACKAGE_VERSION) $(DESTDIR)$(libdir)/libhts.so.$(LIBHTS_SOVERSION) - -install-cygdll: cyghts-$(LIBHTS_SOVERSION).dll installdirs - $(INSTALL_PROGRAM) cyghts-$(LIBHTS_SOVERSION).dll $(DESTDIR)$(bindir)/cyghts-$(LIBHTS_SOVERSION).dll - $(INSTALL_PROGRAM) libhts.dll.a $(DESTDIR)$(libdir)/libhts.dll.a - -install-dll: hts-$(LIBHTS_SOVERSION).dll installdirs - $(INSTALL_PROGRAM) hts-$(LIBHTS_SOVERSION).dll $(DESTDIR)$(bindir)/hts-$(LIBHTS_SOVERSION).dll - $(INSTALL_PROGRAM) hts.dll.a $(DESTDIR)$(libdir)/hts.dll.a - -install-dylib: libhts.dylib installdirs - $(INSTALL_PROGRAM) libhts.dylib $(DESTDIR)$(libdir)/libhts.$(PACKAGE_VERSION).dylib - ln -sf libhts.$(PACKAGE_VERSION).dylib $(DESTDIR)$(libdir)/libhts.dylib - ln -sf libhts.$(PACKAGE_VERSION).dylib $(DESTDIR)$(libdir)/libhts.$(LIBHTS_SOVERSION).dylib - -# Substitute these pseudo-autoconf variables only at install time -# so that "make install prefix=/prefix/path" etc continue to work. -install-pkgconfig: htslib.pc.tmp installdirs - sed -e 's#@-includedir@#$(includedir)#g;s#@-libdir@#$(libdir)#g;s#@-PACKAGE_VERSION@#$(PACKAGE_VERSION)#g' htslib.pc.tmp > $(DESTDIR)$(pkgconfigdir)/htslib.pc - chmod 644 $(DESTDIR)$(pkgconfigdir)/htslib.pc - -# A pkg-config file (suitable for copying to $PKG_CONFIG_PATH) that provides -# flags for building against the uninstalled library in this build directory. -htslib-uninstalled.pc: htslib.pc.tmp - sed -e 's#@-includedir@#'`pwd`'#g;s#@-libdir@#'`pwd`'#g' htslib.pc.tmp > $@ - - -testclean: - -rm -f test/*.tmp test/*.tmp.* test/faidx/*.tmp* test/faidx/FAIL* \ - test/longrefs/*.tmp.* test/tabix/*.tmp.* test/tabix/FAIL* \ - header-exports.txt shlib-exports-$(SHLIB_FLAVOUR).txt - -rm -rf htscodecs/tests/test.out - -# Only remove this in git checkouts -DEL_HTSCODECS_VERSION := $(if $(wildcard htscodecs/.git),htscodecs/htscodecs/version.h) - -mostlyclean: testclean - -rm -f *.o *.pico cram/*.o cram/*.pico test/*.o test/*.dSYM config_vars.h version.h - -rm -f htscodecs/htscodecs/*.o htscodecs/htscodecs/*.pico $(DEL_HTSCODECS_VERSION) - -rm -f hts-object-files - -rm -f htscodecs/tests/*.o - -clean: mostlyclean clean-$(SHLIB_FLAVOUR) - -rm -f libhts.a $(BUILT_PROGRAMS) $(BUILT_PLUGINS) $(BUILT_TEST_PROGRAMS) $(BUILT_THRASH_PROGRAMS) - -rm -f htscodecs/tests/rans4x8 htscodecs/tests/rans4x16pr htscodecs/tests/arith_dynamic htscodecs/tests/tokenise_name3 htscodecs/tests/fqzcomp_qual htscodecs/tests/varint - -distclean maintainer-clean: clean - -rm -f config.cache config.h config.log config.mk config.status - -rm -f TAGS *.pc.tmp *-uninstalled.pc htslib_static.mk htscodecs.mk - -rm -rf autom4te.cache - -clean-so: - -rm -f libhts.so libhts.so.* - -clean-cygdll: - -rm -f cyghts-*.dll libhts.dll.a - -clean-dll: - -rm -f hts-*.dll hts.dll.a - -clean-dylib: - -rm -f libhts.dylib libhts.*.dylib - - -tags TAGS: - ctags -f TAGS *.[ch] cram/*.[ch] htslib/*.h - -# We recommend libhts-using programs be built against a separate htslib -# installation. However if you feel that you must bundle htslib source -# code with your program, this hook enables Automake-style "make dist" -# for this subdirectory. If you do bundle an htslib snapshot, please -# add identifying information to $(PACKAGE_VERSION) as appropriate. -# (The wildcards attempt to omit non-exported files (.git*, README.md, -# etc) and other detritus that might be in the top-level directory.) -distdir: - @if [ -z "$(distdir)" ]; then echo "Please supply a distdir=DIR argument."; false; fi - tar -c *.[ch15] [ILMNRchtv]*[ELSbcekmnth] | (cd $(distdir) && tar -x) - +cd $(distdir) && $(MAKE) distclean - -force: - - -.PHONY: all check check-untracked clean distclean distdir force -.PHONY: install install-pkgconfig installdirs lib-shared lib-static -.PHONY: maintainer-check maintainer-clean mostlyclean plugins -.PHONY: print-config print-version show-version tags -.PHONY: test test-shlib-exports test_thrash testclean -.PHONY: clean-so install-so -.PHONY: clean-cygdll install-cygdll -.PHONY: clean-dll install-dll -.PHONY: clean-dylib install-dylib -.PHONY: test_htscodecs_rans4x8 test_htscodecs_rans4x16 test_htscodecs_arith -.PHONY: test_htscodecs_tok3 test_htscodecs_fqzcomp test_htscodecs_varint diff --git a/src/htslib-1.19.1/Makefile.win b/src/htslib-1.19.1/Makefile.win deleted file mode 100644 index e82c351..0000000 --- a/src/htslib-1.19.1/Makefile.win +++ /dev/null @@ -1,970 +0,0 @@ -# Makefile for htslib, a C library for high-throughput sequencing data formats. -# -# Copyright (C) 2013-2023 Genome Research Ltd. -# -# Author: John Marshall -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -# Get CC, AR, RANLIB, CFLAGS, CPPFLAGS, and LDFLAGS values from -# ${R_HOME}/etc/Makeconf or from a customized Makevars file (site-wide -# or user-specified). -CC := $(shell ${R_HOME}/bin/R CMD config CC) -AR := $(shell ${R_HOME}/bin/R CMD config AR) -RANLIB := $(shell ${R_HOME}/bin/R CMD config RANLIB) -CFLAGS := $(shell ${R_HOME}/bin/R CMD config CFLAGS) -CPPFLAGS := $(shell ${R_HOME}/bin/R CMD config CPPFLAGS) -LDFLAGS := $(shell ${R_HOME}/bin/R CMD config LDFLAGS) - -# Default libraries to link if configure is not used -htslib_default_libs = -lz -lm -lbz2 -llzma -lcurl -lbcrypt -lidn2 -lunistring -liconv -lssl -lcrypto -lcrypt32 -lwsock32 -lwldap32 -lssh2 -lgcrypt -lgpg-error -lws2_32 -lzstd -lregex - -CPPFLAGS += -D_FILE_OFFSET_BITS=64 -DCURL_STATICLIB -# TODO: make the 64-bit support for VCF optional via configure, for now add -DVCF_ALLOW_INT64 -# to CFLAGS manually, here or in config.mk if the latter exists. -# TODO: probably update cram code to make it compile cleanly with -Wc++-compat -# For testing strict C99 support add -std=c99 -D_XOPEN_SOURCE=600 -#CFLAGS = -g -Wall -O2 -pedantic -std=c99 -D_XOPEN_SOURCE=600 -CFLAGS += -fpic -fvisibility=hidden -Wstrict-prototypes -EXTRA_CFLAGS_PIC = -TARGET_CFLAGS = -LDFLAGS = -fvisibility=hidden -VERSION_SCRIPT_LDFLAGS = -Wl,-version-script,$(srcprefix)htslib.map -LIBS = $(htslib_default_libs) - -prefix = /usr/local -exec_prefix = $(prefix) -bindir = $(exec_prefix)/bin -includedir = $(prefix)/include -libdir = $(exec_prefix)/lib -libexecdir = $(exec_prefix)/libexec -datarootdir = $(prefix)/share -mandir = $(datarootdir)/man -man1dir = $(mandir)/man1 -man5dir = $(mandir)/man5 -man7dir = $(mandir)/man7 -pkgconfigdir= $(libdir)/pkgconfig - -MKDIR_P = mkdir -p -INSTALL = install -p -INSTALL_DATA = $(INSTALL) -m 644 -INSTALL_DIR = $(MKDIR_P) -m 755 -LIB_PERM = 644 -INSTALL_LIB = $(INSTALL) -m $(LIB_PERM) -INSTALL_MAN = $(INSTALL_DATA) -INSTALL_PROGRAM = $(INSTALL) - -# Set by config.mk if plugins are enabled -plugindir = - -BUILT_PROGRAMS = \ - bgzip \ - htsfile \ - tabix - -BUILT_TEST_PROGRAMS = \ - test/hts_endian \ - test/fieldarith \ - test/hfile \ - test/pileup \ - test/pileup_mod \ - test/plugins-dlhts \ - test/sam \ - test/test_bgzf \ - test/test_expr \ - test/test_faidx \ - test/test_kfunc \ - test/test_kstring \ - test/test_mod \ - test/test_realn \ - test/test-regidx \ - test/test_str2int \ - test/test_time_funcs \ - test/test_view \ - test/test_index \ - test/test-vcf-api \ - test/test-vcf-sweep \ - test/test-bcf-sr \ - test/fuzz/hts_open_fuzzer.o \ - test/test-bcf-translate \ - test/test-parse-reg \ - test/test_introspection \ - test/test-bcf_set_variant_type - -BUILT_THRASH_PROGRAMS = \ - test/thrash_threads1 \ - test/thrash_threads2 \ - test/thrash_threads3 \ - test/thrash_threads4 \ - test/thrash_threads5 \ - test/thrash_threads6 \ - test/thrash_threads7 - -all: lib-static $(BUILT_PROGRAMS) plugins $(BUILT_TEST_PROGRAMS) \ - htslib_static.mk htslib-uninstalled.pc - -ALL_CPPFLAGS = -I. $(CPPFLAGS) - -# Usually htscodecs.mk is generated by running configure or config.status, -# but if those aren't used create a default here. -htscodecs.mk: - echo '# Default htscodecs.mk generated by Makefile' > $@ - echo 'include $$(HTSPREFIX)htscodecs_bundled.mk' >> $@ - $(srcdir)/hts_probe_cc.sh '$(CC)' '$(CFLAGS) $(CPPFLAGS)' '$(LDFLAGS)' >> $@ - -srcdir = . -srcprefix = -HTSPREFIX = - -# Flags for SIMD code -HTS_CFLAGS_AVX2 = -HTS_CFLAGS_AVX512 = -HTS_CFLAGS_SSE4 = - -# Control building of SIMD code. Not used if configure has been run. -HTS_BUILD_AVX2 = -HTS_BUILD_AVX512 = -HTS_BUILD_SSSE3 = -HTS_BUILD_POPCNT = -HTS_BUILD_SSE4_1 = - -include htslib_vars.mk -include htscodecs.mk - -# If not using GNU make, you need to copy the version number from version.sh -# into here. -PACKAGE_VERSION := $(shell $(srcdir)/version.sh) - -LIBHTS_SOVERSION = 3 - -# Version numbers for the Mac dynamic library. Note that the leading 3 -# is not strictly necessary and should be removed the next time -# LIBHTS_SOVERSION is bumped (see #1144 and -# https://developer.apple.com/library/archive/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html#//apple_ref/doc/uid/TP40002013-SW23) -MACH_O_COMPATIBILITY_VERSION = 3.1.18 -MACH_O_CURRENT_VERSION = 3.1.18 - -# $(NUMERIC_VERSION) is for items that must have a numeric X.Y.Z string -# even if this is a dirty or untagged Git working tree. -NUMERIC_VERSION := $(shell $(srcdir)/version.sh numeric) - -# Force version.h to be remade if $(PACKAGE_VERSION) has changed. -version.h: $(if $(wildcard version.h),$(if $(findstring "$(PACKAGE_VERSION)",$(shell cat version.h)),,force)) - -version.h: - echo '#define HTS_VERSION_TEXT "$(PACKAGE_VERSION)"' > $@ - -print-version: - @echo $(PACKAGE_VERSION) - -show-version: - @echo PACKAGE_VERSION = $(PACKAGE_VERSION) - @echo NUMERIC_VERSION = $(NUMERIC_VERSION) - -config_vars.h: override escape=$(subst ',\x27,$(subst ",\",$(subst \,\\,$(1)))) -config_vars.h: override hts_cc_escaped=$(call escape,$(CC)) -config_vars.h: override hts_cppflags_escaped=$(call escape,$(CPPFLAGS)) -config_vars.h: override hts_cflags_escaped=$(call escape,$(CFLAGS)) -config_vars.h: override hts_ldflags_escaped=$(call escape,$(LDFLAGS)) -config_vars.h: override hts_libs_escaped=$(call escape,$(LIBS)) - -config_vars.h: - printf '#define HTS_CC "%s"\n#define HTS_CPPFLAGS "%s"\n#define HTS_CFLAGS "%s"\n#define HTS_LDFLAGS "%s"\n#define HTS_LIBS "%s"\n' \ - '$(hts_cc_escaped)' \ - '$(hts_cppflags_escaped)' \ - '$(hts_cflags_escaped)' \ - '$(hts_ldflags_escaped)' \ - '$(hts_libs_escaped)' > $@ - -.SUFFIXES: .bundle .c .cygdll .dll .o .pico .so - -.c.o: - $(CC) $(CFLAGS) $(TARGET_CFLAGS) $(ALL_CPPFLAGS) -c -o $@ $< - -.c.pico: - $(CC) $(CFLAGS) $(TARGET_CFLAGS) $(ALL_CPPFLAGS) $(EXTRA_CFLAGS_PIC) -c -o $@ $< - - -LIBHTS_OBJS = \ - kfunc.o \ - kstring.o \ - bcf_sr_sort.o \ - bgzf.o \ - errmod.o \ - faidx.o \ - header.o \ - hfile.o \ - hts.o \ - hts_expr.o \ - hts_os.o\ - md5.o \ - multipart.o \ - probaln.o \ - realn.o \ - regidx.o \ - region.o \ - sam.o \ - sam_mods.o \ - synced_bcf_reader.o \ - vcf_sweep.o \ - tbx.o \ - textutils.o \ - thread_pool.o \ - vcf.o \ - vcfutils.o \ - cram/cram_codecs.o \ - cram/cram_decode.o \ - cram/cram_encode.o \ - cram/cram_external.o \ - cram/cram_index.o \ - cram/cram_io.o \ - cram/cram_stats.o \ - cram/mFILE.o \ - cram/open_trace_file.o \ - cram/pooled_alloc.o \ - cram/string_alloc.o \ - $(HTSCODECS_OBJS) \ - $(NONCONFIGURE_OBJS) - -# Without configure we wish to have a rich set of default figures, -# but we still need conditional inclusion as we wish to still -# support ./configure --disable-blah. -NONCONFIGURE_OBJS = hfile_libcurl.o - -PLUGIN_EXT = -PLUGIN_OBJS = - -cram_h = cram/cram.h $(cram_samtools_h) $(header_h) $(cram_structs_h) $(cram_io_h) cram/cram_encode.h cram/cram_decode.h cram/cram_stats.h cram/cram_codecs.h cram/cram_index.h $(htslib_cram_h) -cram_io_h = cram/cram_io.h $(cram_misc_h) -cram_misc_h = cram/misc.h -cram_os_h = cram/os.h $(htslib_hts_endian_h) -cram_samtools_h = cram/cram_samtools.h $(htslib_sam_h) -cram_structs_h = cram/cram_structs.h $(htslib_thread_pool_h) $(htslib_cram_h) cram/string_alloc.h cram/mFILE.h $(htslib_khash_h) -cram_open_trace_file_h = cram/open_trace_file.h cram/mFILE.h -bcf_sr_sort_h = bcf_sr_sort.h $(htslib_synced_bcf_reader_h) $(htslib_kbitset_h) -header_h = header.h cram/string_alloc.h cram/pooled_alloc.h $(htslib_khash_h) $(htslib_kstring_h) $(htslib_sam_h) -hfile_internal_h = hfile_internal.h $(htslib_hts_defs_h) $(htslib_hfile_h) $(textutils_internal_h) -hts_internal_h = hts_internal.h $(htslib_hts_h) $(textutils_internal_h) -hts_time_funcs_h = hts_time_funcs.h -sam_internal_h = sam_internal.h $(htslib_sam_h) -textutils_internal_h = textutils_internal.h $(htslib_kstring_h) -thread_pool_internal_h = thread_pool_internal.h $(htslib_thread_pool_h) - -# To be effective, config.mk needs to appear after most Makefile variables are -# set but before most rules appear, so that it can both use previously-set -# variables in its own rules' prerequisites and also update variables for use -# in later rules' prerequisites. - -# If your make doesn't accept -include, change this to 'include' if you are -# using the configure script or just comment the line out if you are not. --include config.mk - -# Usually config.h is generated by running configure or config.status, -# but if those aren't used create a default config.h here. -config.h: - echo '/* Default config.h generated by Makefile */' > $@ - echo '#ifndef _XOPEN_SOURCE' >> $@ - echo '#define _XOPEN_SOURCE 600' >> $@ - echo '#endif' >> $@ - echo '#define HAVE_LIBBZ2 1' >> $@ - echo '#define HAVE_LIBLZMA 1' >> $@ - echo '#ifndef __APPLE__' >> $@ - echo '#define HAVE_LZMA_H 1' >> $@ - echo '#endif' >> $@ - echo '#define HAVE_LIBCURL 1' >> $@ - if [ "x$(HTS_BUILD_POPCNT)" != "x" ] && \ - [ "x$(HTS_BUILD_SSE4_1)" != "x" ] && \ - [ "x$(HTS_BUILD_SSSE3)" != "x" ]; then \ - echo '#define HAVE_POPCNT 1' >> $@ ; \ - echo '#define HAVE_SSE4_1 1' >> $@ ; \ - echo '#define HAVE_SSSE3 1' >> $@ ; \ - echo '#if defined(HTS_ALLOW_UNALIGNED) && HTS_ALLOW_UNALIGNED == 0' >> $@ ; \ - echo '#define UBSAN 1' >> $@ ; \ - echo '#endif' >> $@ ; \ - fi - if [ "x$(HTS_BUILD_AVX2)" != "x" ] ; then \ - echo '#define HAVE_AVX2 1' >> $@ ; \ - fi - if [ "x$(HTS_BUILD_AVX512)" != "x" ] ; then \ - echo '#define HAVE_AVX512 1' >> $@ ; \ - fi - -# And similarly for htslib.pc.tmp ("pkg-config template"). No dependency -# on htslib.pc.in listed, as if that file is newer the usual way to regenerate -# this target is via configure or config.status rather than this rule. -htslib.pc.tmp: - sed -e '/^static_libs=/s/@static_LIBS@/$(htslib_default_libs)/;s#@[^-][^@]*@##g' $(srcprefix)htslib.pc.in > $@ - -# Create a makefile fragment listing the libraries and LDFLAGS needed for -# static linking. This can be included by projects that want to build -# and link against the htslib source tree instead of an installed library. -htslib_static.mk: htslib.pc.tmp - sed -n '/^static_libs=/s/[^=]*=/HTSLIB_static_LIBS = /p;/^static_ldflags=/s/[^=]*=/HTSLIB_static_LDFLAGS = /p' $< > $@ - - -lib-static: libhts.a - -# $(shell), :=, and ifeq/.../endif are GNU Make-specific. If you don't have -# GNU Make, comment out the parts of these conditionals that don't apply. -ifneq "$(origin PLATFORM)" "file" -PLATFORM := $(shell uname -s) -endif -ifeq "$(PLATFORM)" "Darwin" -SHLIB_FLAVOUR = dylib -lib-shared: libhts.dylib -else ifeq "$(findstring CYGWIN,$(PLATFORM))" "CYGWIN" -SHLIB_FLAVOUR = cygdll -lib-shared: cyghts-$(LIBHTS_SOVERSION).dll -else ifeq "$(findstring MSYS,$(PLATFORM))" "MSYS" -SHLIB_FLAVOUR = dll -lib-shared: hts-$(LIBHTS_SOVERSION).dll hts-$(LIBHTS_SOVERSION).def hts-$(LIBHTS_SOVERSION).lib -else ifeq "$(findstring MINGW,$(PLATFORM))" "MINGW" -SHLIB_FLAVOUR = dll -lib-shared: hts-$(LIBHTS_SOVERSION).dll hts-$(LIBHTS_SOVERSION).def hts-$(LIBHTS_SOVERSION).lib -else -SHLIB_FLAVOUR = so -lib-shared: libhts.so -endif - -BUILT_PLUGINS = $(PLUGIN_OBJS:.o=$(PLUGIN_EXT)) - -ifneq "$(BUILT_PLUGINS)" "" -plugins: lib-shared -endif -plugins: $(BUILT_PLUGINS) - - -libhts.a: $(LIBHTS_OBJS) - @-rm -f $@ - $(AR) -rc $@ $(LIBHTS_OBJS) - -$(RANLIB) $@ - -print-config: - @echo HTS_CFLAGS_AVX2 = $(HTS_CFLAGS_AVX2) - @echo HTS_CFLAGS_AVX512 = $(HTS_CFLAGS_AVX512) - @echo HTS_CFLAGS_SSE4 = $(HTS_CFLAGS_SSE4) - @echo HTS_HAVE_NEON = $(HTS_HAVE_NEON) - @echo LDFLAGS = $(LDFLAGS) - @echo LIBHTS_OBJS = $(LIBHTS_OBJS) - @echo LIBS = $(LIBS) - @echo PLATFORM = $(PLATFORM) - -# The target here is libhts.so, as that is the built file that other rules -# depend upon and that is used when -lhts appears in other program's recipes. -# As a byproduct invisible to make, libhts.so.NN is also created, as it is the -# file used at runtime (when $LD_LIBRARY_PATH includes the build directory). - -libhts.so: $(LIBHTS_OBJS:.o=.pico) - $(CC) -shared -Wl,-soname,libhts.so.$(LIBHTS_SOVERSION) $(VERSION_SCRIPT_LDFLAGS) $(LDFLAGS) -o $@ $(LIBHTS_OBJS:.o=.pico) $(LIBS) -lpthread - ln -sf $@ libhts.so.$(LIBHTS_SOVERSION) - -# Similarly this also creates libhts.NN.dylib as a byproduct, so that programs -# when run can find this uninstalled shared library (when $DYLD_LIBRARY_PATH -# includes this project's build directory). - -libhts.dylib: $(LIBHTS_OBJS) - $(CC) -dynamiclib -install_name $(libdir)/libhts.$(LIBHTS_SOVERSION).dylib -current_version $(MACH_O_CURRENT_VERSION) -compatibility_version $(MACH_O_COMPATIBILITY_VERSION) $(LDFLAGS) -o $@ $(LIBHTS_OBJS) $(LIBS) - ln -sf $@ libhts.$(LIBHTS_SOVERSION).dylib - -cyghts-$(LIBHTS_SOVERSION).dll libhts.dll.a: $(LIBHTS_OBJS) - $(CC) -shared -Wl,--out-implib=libhts.dll.a -Wl,--enable-auto-import $(LDFLAGS) -o $@ -Wl,--whole-archive $(LIBHTS_OBJS) -Wl,--no-whole-archive $(LIBS) -lpthread - -hts-$(LIBHTS_SOVERSION).dll hts.dll.a: $(LIBHTS_OBJS) - $(CC) -shared -Wl,--out-implib=hts.dll.a -Wl,--enable-auto-import -Wl,--exclude-all-symbols $(LDFLAGS) -o $@ -Wl,--whole-archive $(LIBHTS_OBJS) -Wl,--no-whole-archive $(LIBS) -lpthread - -hts-$(LIBHTS_SOVERSION).def: hts-$(LIBHTS_SOVERSION).dll - gendef hts-$(LIBHTS_SOVERSION).dll - -hts-$(LIBHTS_SOVERSION).lib: hts-$(LIBHTS_SOVERSION).def - dlltool -m i386:x86-64 -d hts-$(LIBHTS_SOVERSION).def -l hts-$(LIBHTS_SOVERSION).lib - -# Bundling libraries, binaries, dll dependencies, and licenses into a -# single directory. NB: This is not needed for end-users, but a test bed -# for maintainers building binary distributions. -# -# NOTE: only tested on the supported MSYS2/MINGW64 environment. -dist-windows: DESTDIR= -dist-windows: prefix=dist-windows -dist-windows: install - cp hts-$(LIBHTS_SOVERSION).def hts-$(LIBHTS_SOVERSION).lib dist-windows/lib - cp `ldd hts-$(LIBHTS_SOVERSION).dll| awk '/mingw64/ {print $$3}'` dist-windows/bin - mkdir -p dist-windows/share/licenses/htslib - -cp -r /mingw64/share/licenses/mingw-w64-libraries \ - /mingw64/share/licenses/brotli \ - /mingw64/share/licenses/bzip2 \ - /mingw64/share/licenses/gcc-libs \ - /mingw64/share/licenses/libdeflate \ - /mingw64/share/licenses/libpsl \ - /mingw64/share/licenses/libtre \ - /mingw64/share/licenses/libwinpthread \ - /mingw64/share/licenses/openssl \ - /mingw64/share/licenses/xz \ - /mingw64/share/licenses/zlib \ - /mingw64/share/licenses/zstd \ - dist-windows/share/licenses/ - -cp -r /usr/share/licenses/curl \ - dist-windows/share/licenses/ - cp LICENSE dist-windows/share/licenses/htslib/ - - -# Target to allow htslib.mk to build all the object files before it -# links the shared and static libraries. -hts-object-files: $(LIBHTS_OBJS) - touch $@ - -# On Unix dlopen("libhts.so.NN", RTLD_LAZY) may default to RTLD_LOCAL. -# Hence plugins need to link to (shared) libhts.so.NN themselves, as they -# may not be able to access libhts symbols via the main program's libhts -# if that was dynamically loaded without an explicit RTLD_GLOBAL. -%.so: %.pico libhts.so - $(CC) -shared -Wl,-E $(LDFLAGS) -o $@ $< libhts.so $(LIBS) -lpthread - -# For programs *statically* linked to libhts.a, on macOS loading a plugin -# linked to a shared libhts.NN.dylib would lead to conflicting duplicate -# symbols. Fortunately macOS dlopen() defaults to RTLD_GLOBAL so there -# is less need for plugins to link back to libhts themselves. -%.bundle: %.o - $(CC) -bundle -Wl,-undefined,dynamic_lookup $(LDFLAGS) -o $@ $< $(LIBS) - -%.cygdll: %.o libhts.dll.a - $(CC) -shared $(LDFLAGS) -o $@ $< libhts.dll.a $(LIBS) - -%.dll: %.o hts.dll.a - $(CC) -shared $(LDFLAGS) -o $@ $< hts.dll.a $(LIBS) - - -bgzf.o bgzf.pico: bgzf.c config.h $(htslib_hts_h) $(htslib_bgzf_h) $(htslib_hfile_h) $(htslib_thread_pool_h) $(htslib_hts_endian_h) cram/pooled_alloc.h $(hts_internal_h) $(htslib_khash_h) -errmod.o errmod.pico: errmod.c config.h $(htslib_hts_h) $(htslib_ksort_h) $(htslib_hts_os_h) -kstring.o kstring.pico: kstring.c config.h $(htslib_kstring_h) -header.o header.pico: header.c config.h $(textutils_internal_h) $(header_h) -hfile.o hfile.pico: hfile.c config.h $(htslib_hfile_h) $(hfile_internal_h) $(htslib_kstring_h) $(hts_internal_h) $(htslib_khash_h) -hfile_gcs.o hfile_gcs.pico: hfile_gcs.c config.h $(htslib_hts_h) $(htslib_kstring_h) $(hfile_internal_h) -hfile_libcurl.o hfile_libcurl.pico: hfile_libcurl.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_h) -hfile_s3_write.o hfile_s3_write.pico: hfile_s3_write.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_h) -hfile_s3.o hfile_s3.pico: hfile_s3.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(hts_time_funcs_h) -hts.o hts.pico: hts.c config.h os/lzma_stub.h $(htslib_hts_h) $(htslib_bgzf_h) $(cram_h) $(htslib_hfile_h) $(htslib_hts_endian_h) version.h config_vars.h $(hts_internal_h) $(hfile_internal_h) $(sam_internal_h) $(htslib_hts_expr_h) $(htslib_hts_os_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_ksort_h) $(htslib_tbx_h) $(htscodecs_htscodecs_h) -hts_expr.o hts_expr.pico: hts_expr.c config.h $(htslib_hts_expr_h) $(htslib_hts_log_h) $(textutils_internal_h) -hts_os.o hts_os.pico: hts_os.c config.h $(htslib_hts_defs_h) os/rand.c -vcf.o vcf.pico: vcf.c config.h $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_hfile_h) $(hts_internal_h) $(htslib_khash_str2int_h) $(htslib_kstring_h) $(htslib_sam_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_hts_endian_h) -sam.o sam.pico: sam.c config.h $(htslib_hts_defs_h) $(htslib_sam_h) $(htslib_bgzf_h) $(cram_h) $(hts_internal_h) $(sam_internal_h) $(htslib_hfile_h) $(htslib_hts_endian_h) $(htslib_hts_expr_h) $(header_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_kstring_h) -sam_mods.o sam_mods.pico: sam_mods.c config.h $(htslib_sam_h) $(textutils_internal_h) -tbx.o tbx.pico: tbx.c config.h $(htslib_tbx_h) $(htslib_bgzf_h) $(htslib_hts_endian_h) $(hts_internal_h) $(htslib_khash_h) -faidx.o faidx.pico: faidx.c config.h $(htslib_bgzf_h) $(htslib_faidx_h) $(htslib_hfile_h) $(htslib_khash_h) $(htslib_kstring_h) $(hts_internal_h) -bcf_sr_sort.o bcf_sr_sort.pico: bcf_sr_sort.c config.h $(bcf_sr_sort_h) $(htslib_khash_str2int_h) $(htslib_kbitset_h) -synced_bcf_reader.o synced_bcf_reader.pico: synced_bcf_reader.c config.h $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(htslib_bgzf_h) $(htslib_thread_pool_h) $(bcf_sr_sort_h) -vcf_sweep.o vcf_sweep.pico: vcf_sweep.c config.h $(htslib_vcf_sweep_h) $(htslib_bgzf_h) -vcfutils.o vcfutils.pico: vcfutils.c config.h $(htslib_vcfutils_h) $(htslib_kbitset_h) -kfunc.o kfunc.pico: kfunc.c config.h $(htslib_kfunc_h) -regidx.o regidx.pico: regidx.c config.h $(htslib_hts_h) $(htslib_kstring_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(htslib_regidx_h) $(hts_internal_h) -region.o region.pico: region.c config.h $(htslib_hts_h) $(htslib_khash_h) -md5.o md5.pico: md5.c config.h $(htslib_hts_h) $(htslib_hts_endian_h) -multipart.o multipart.pico: multipart.c config.h $(htslib_kstring_h) $(hts_internal_h) $(hfile_internal_h) -plugin.o plugin.pico: plugin.c config.h $(hts_internal_h) $(htslib_kstring_h) -probaln.o probaln.pico: probaln.c config.h $(htslib_hts_h) -realn.o realn.pico: realn.c config.h $(htslib_hts_h) $(htslib_sam_h) -textutils.o textutils.pico: textutils.c config.h $(htslib_hfile_h) $(htslib_kstring_h) $(htslib_sam_h) $(hts_internal_h) - -cram/cram_codecs.o cram/cram_codecs.pico: cram/cram_codecs.c config.h $(htslib_hts_endian_h) $(htscodecs_varint_h) $(htscodecs_pack_h) $(htscodecs_rle_h) $(cram_h) -cram/cram_decode.o cram/cram_decode.pico: cram/cram_decode.c config.h $(cram_h) $(cram_os_h) $(htslib_hts_h) -cram/cram_encode.o cram/cram_encode.pico: cram/cram_encode.c config.h $(cram_h) $(cram_os_h) $(sam_internal_h) $(htslib_hts_h) $(htslib_hts_endian_h) $(textutils_internal_h) -cram/cram_external.o cram/cram_external.pico: cram/cram_external.c config.h $(htscodecs_rANS_static4x16_h) $(htslib_hfile_h) $(cram_h) -cram/cram_index.o cram/cram_index.pico: cram/cram_index.c config.h $(htslib_bgzf_h) $(htslib_hfile_h) $(hts_internal_h) $(cram_h) $(cram_os_h) -cram/cram_io.o cram/cram_io.pico: cram/cram_io.c config.h os/lzma_stub.h $(cram_h) $(cram_os_h) $(htslib_hts_h) $(cram_open_trace_file_h) $(htscodecs_rANS_static_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_arith_dynamic_h) $(htscodecs_tokenise_name3_h) $(htscodecs_fqzcomp_qual_h) $(htscodecs_varint_h) $(htslib_hfile_h) $(htslib_bgzf_h) $(htslib_faidx_h) $(hts_internal_h) -cram/cram_stats.o cram/cram_stats.pico: cram/cram_stats.c config.h $(cram_h) $(cram_os_h) -cram/mFILE.o cram/mFILE.pico: cram/mFILE.c config.h $(htslib_hts_log_h) $(cram_os_h) cram/mFILE.h -cram/open_trace_file.o cram/open_trace_file.pico: cram/open_trace_file.c config.h $(cram_os_h) $(cram_open_trace_file_h) $(cram_misc_h) $(htslib_hfile_h) $(htslib_hts_log_h) $(htslib_hts_h) -cram/pooled_alloc.o cram/pooled_alloc.pico: cram/pooled_alloc.c config.h cram/pooled_alloc.h $(cram_misc_h) -cram/string_alloc.o cram/string_alloc.pico: cram/string_alloc.c config.h cram/string_alloc.h -thread_pool.o thread_pool.pico: thread_pool.c config.h $(thread_pool_internal_h) $(htslib_hts_log_h) - -htscodecs/htscodecs/arith_dynamic.o htscodecs/htscodecs/arith_dynamic.pico: htscodecs/htscodecs/arith_dynamic.c config.h $(htscodecs_arith_dynamic_h) $(htscodecs_varint_h) $(htscodecs_pack_h) $(htscodecs_utils_h) $(htscodecs_c_simple_model_h) -htscodecs/htscodecs/fqzcomp_qual.o htscodecs/htscodecs/fqzcomp_qual.pico: htscodecs/htscodecs/fqzcomp_qual.c config.h $(htscodecs_fqzcomp_qual_h) $(htscodecs_varint_h) $(htscodecs_utils_h) $(htscodecs_c_simple_model_h) -htscodecs/htscodecs/htscodecs.o htscodecs/htscodecs/htscodecs.pico: htscodecs/htscodecs/htscodecs.c $(htscodecs_htscodecs_h) $(htscodecs_version_h) -htscodecs/htscodecs/pack.o htscodecs/htscodecs/pack.pico: htscodecs/htscodecs/pack.c config.h $(htscodecs_pack_h) -htscodecs/htscodecs/rANS_static32x16pr.o htscodecs/htscodecs/rANS_static32x16pr.pico: htscodecs/htscodecs/rANS_static32x16pr.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/rANS_static32x16pr_avx2.o htscodecs/htscodecs/rANS_static32x16pr_avx2.pico: htscodecs/htscodecs/rANS_static32x16pr_avx2.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) $(htscodecs_permute_h) -htscodecs/htscodecs/rANS_static32x16pr_avx512.o htscodecs/htscodecs/rANS_static32x16pr_avx512.pico: htscodecs/htscodecs/rANS_static32x16pr_avx512.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/rANS_static32x16pr_neon.o htscodecs/htscodecs/rANS_static32x16pr_neon.pico: htscodecs/htscodecs/rANS_static32x16pr_neon.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/rANS_static32x16pr_sse4.o htscodecs/htscodecs/rANS_static32x16pr_sse4.pico: htscodecs/htscodecs/rANS_static32x16pr_sse4.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/rANS_static4x16pr.o htscodecs/htscodecs/rANS_static4x16pr.pico: htscodecs/htscodecs/rANS_static4x16pr.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_pack_h) $(htscodecs_rle_h) $(htscodecs_utils_h) $(htscodecs_rANS_static32x16pr_h) -htscodecs/htscodecs/rANS_static.o htscodecs/htscodecs/rANS_static.pico: htscodecs/htscodecs/rANS_static.c config.h $(htscodecs_rANS_byte_h) $(htscodecs_utils_h) $(htscodecs_rANS_static_h) -htscodecs/htscodecs/rle.o htscodecs/htscodecs/rle.pico: htscodecs/htscodecs/rle.c config.h $(htscodecs_varint_h) $(htscodecs_rle_h) -htscodecs/htscodecs/tokenise_name3.o htscodecs/htscodecs/tokenise_name3.pico: htscodecs/htscodecs/tokenise_name3.c config.h $(htscodecs_pooled_alloc_h) $(htscodecs_arith_dynamic_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_tokenise_name3_h) $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs/htscodecs/utils.o htscodecs/htscodecs/utils.pico: htscodecs/htscodecs/utils.c config.h $(htscodecs_utils_h) - -# Extra CFLAGS for specific files -htscodecs/htscodecs/rANS_static32x16pr_avx2.o htscodecs/htscodecs/rANS_static32x16pr_avx2.pico: TARGET_CFLAGS = $(HTS_CFLAGS_AVX2) -htscodecs/htscodecs/rANS_static32x16pr_avx512.o htscodecs/htscodecs/rANS_static32x16pr_avx512.pico: TARGET_CFLAGS = $(HTS_CFLAGS_AVX512) -htscodecs/htscodecs/rANS_static32x16pr_sse4.o htscodecs/htscodecs/rANS_static32x16pr_sse4.pico: TARGET_CFLAGS = $(HTS_CFLAGS_SSE4) - -bgzip: bgzip.o libhts.a - $(CC) $(LDFLAGS) -o $@ bgzip.o libhts.a $(LIBS) -lpthread - -htsfile: htsfile.o libhts.a - $(CC) $(LDFLAGS) -o $@ htsfile.o libhts.a $(LIBS) -lpthread - -tabix: tabix.o libhts.a - $(CC) $(LDFLAGS) -o $@ tabix.o libhts.a $(LIBS) -lpthread - -bgzip.o: bgzip.c config.h $(htslib_bgzf_h) $(htslib_hts_h) $(htslib_hfile_h) -htsfile.o: htsfile.c config.h $(htslib_hfile_h) $(htslib_hts_h) $(htslib_sam_h) $(htslib_vcf_h) -tabix.o: tabix.c config.h $(htslib_tbx_h) $(htslib_sam_h) $(htslib_vcf_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(htslib_hts_h) $(htslib_regidx_h) $(htslib_hts_defs_h) $(htslib_hts_log_h) - -# Runes to check that the htscodecs submodule is present -ifdef HTSCODECS_SOURCES -htscodecs/htscodecs/%.c: | htscodecs/htscodecs - @if test -e htscodecs/.git && test ! -e "$@" ; then \ - echo "Missing file '$@'" ; \ - echo " - Do you need to update the htscodecs submodule?" ; \ - false ; \ - fi - -htscodecs/htscodecs/%.h: | htscodecs/htscodecs - @if test -e htscodecs/.git && test ! -e "$@" ; then \ - echo "Missing file '$@'" ; \ - echo " - Do you need to update the htscodecs submodule?" ; \ - false ; \ - fi - -htscodecs/htscodecs: - @if test -e .git ; then \ - printf "\\n\\nError: htscodecs submodule files not present for htslib.\\n\ - Try running: \\n\ - git submodule update --init --recursive\\n\ - in the top-level htslib directory and then re-run make.\\n\\n\\n" ; \ - else \ - printf "\\n\\nError: htscodecs submodule files not present and this is not a git checkout.\\n\ - You have an incomplete distribution. Please try downloading one of the\\n\ - official releases from https://www.htslib.org/\\n" ; \ - fi - @false - -# Build the htscodecs/htscodecs/version.h file if necessary -htscodecs/htscodecs/version.h: force - @if test -e $(srcdir)/htscodecs/.git && test -e $(srcdir)/htscodecs/configure.ac ; then \ - vers=`cd $(srcdir)/htscodecs && git describe --always --dirty --match 'v[0-9]\.[0-9]*'` && \ - case "$$vers" in \ - v*) vers=$${vers#v} ;; \ - *) iv=`awk '/^AC_INIT/ { match($$0, /^AC_INIT\(htscodecs, *([0-9](\.[0-9])*)\)/, m); print substr($$0, m[1, "start"], m[1, "length"]) }' $(srcdir)/htscodecs/configure.ac` ; vers="$$iv$${vers:+-g$$vers}" ;; \ - esac ; \ - if ! grep -s -q '"'"$$vers"'"' $@ ; then \ - echo 'Updating $@ : #define HTSCODECS_VERSION_TEXT "'"$$vers"'"' ; \ - echo '#define HTSCODECS_VERSION_TEXT "'"$$vers"'"' > $@ ; \ - fi ; \ - fi -endif - -# Maintainer source code checks -# - copyright boilerplate presence -# - tab and trailing space detection -maintainer-check: - test/maintainer/check_copyright.pl . - test/maintainer/check_spaces.pl . - -# Look for untracked files in the git repository. -check-untracked: - @if test -e .git && git status --porcelain | grep '^\?'; then \ - echo 'Untracked files detected (see above). Please either clean up, add to .gitignore, or for test output files consider naming them to match *.tmp or *.tmp.*' ; \ - false ; \ - fi - -# Create a shorthand. We use $(SRC) or $(srcprefix) rather than $(srcdir)/ -# for brevity in test and install rules, and so that build logs do not have -# ./ sprinkled throughout. -SRC = $(srcprefix) - -# For tests that might use it, set $REF_PATH explicitly to use only reference -# areas within the test suite (or set it to ':' to use no reference areas). -# -# If using MSYS, avoid poor shell expansion via: -# MSYS2_ARG_CONV_EXCL="*" make check -check test: all $(HTSCODECS_TEST_TARGETS) - test/hts_endian - test/test_expr - test/test_kfunc - test/test_kstring - test/test_str2int - test/test_time_funcs - test/fieldarith test/fieldarith.sam - test/hfile - if test "x$(BUILT_PLUGINS)" != "x"; then \ - HTS_PATH=. test/with-shlib.sh test/plugins-dlhts -g ./libhts.$(SHLIB_FLAVOUR); \ - fi - if test "x$(BUILT_PLUGINS)" != "x"; then \ - HTS_PATH=. test/with-shlib.sh test/plugins-dlhts -l ./libhts.$(SHLIB_FLAVOUR); \ - fi - test/test_bgzf test/bgziptest.txt - test/test-parse-reg -t test/colons.bam - cd test/faidx && ./test-faidx.sh faidx.tst - cd test/sam_filter && ./filter.sh filter.tst - cd test/tabix && ./test-tabix.sh tabix.tst - cd test/mpileup && ./test-pileup.sh mpileup.tst - cd test/fastq && ./test-fastq.sh - cd test/base_mods && ./base-mods.sh base-mods.tst - REF_PATH=: test/sam test/ce.fa test/faidx/faidx.fa test/faidx/fastqs.fq - test/test-regidx - cd test && REF_PATH=: ./test.pl $${TEST_OPTS:-} - -test/hts_endian: test/hts_endian.o - $(CC) $(LDFLAGS) -o $@ test/hts_endian.o $(LIBS) - -test/fuzz/hts_open_fuzzer: test/fuzz/hts_open_fuzzer.o - $(CC) $(LDFLAGS) -o $@ test/fuzz/hts_open_fuzzer.o libhts.a $(LIBS) -lpthread - -test/fieldarith: test/fieldarith.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/fieldarith.o libhts.a $(LIBS) -lpthread - -test/hfile: test/hfile.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/hfile.o libhts.a $(LIBS) -lpthread - -test/pileup: test/pileup.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/pileup.o libhts.a $(LIBS) -lpthread - -test/pileup_mod: test/pileup_mod.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/pileup_mod.o libhts.a $(LIBS) -lpthread - -test/plugins-dlhts: test/plugins-dlhts.o - $(CC) $(LDFLAGS) -o $@ test/plugins-dlhts.o $(LIBS) - -test/sam: test/sam.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/sam.o libhts.a $(LIBS) -lpthread - -test/test_bgzf: test/test_bgzf.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_bgzf.o libhts.a -lz $(LIBS) -lpthread - -test/test_expr: test/test_expr.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_expr.o libhts.a -lz $(LIBS) -lpthread - -test/test_faidx: test/test_faidx.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_faidx.o libhts.a -lz $(LIBS) -lpthread - -test/test_kfunc: test/test_kfunc.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_kfunc.o libhts.a -lz $(LIBS) -lpthread - -test/test_kstring: test/test_kstring.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_kstring.o libhts.a -lz $(LIBS) -lpthread - -test/test_mod: test/test_mod.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_mod.o libhts.a $(LIBS) -lpthread - -test/test_realn: test/test_realn.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_realn.o libhts.a $(LIBS) -lpthread - -test/test-regidx: test/test-regidx.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-regidx.o libhts.a $(LIBS) -lpthread - -test/test-parse-reg: test/test-parse-reg.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-parse-reg.o libhts.a $(LIBS) -lpthread - -test/test_str2int: test/test_str2int.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_str2int.o libhts.a $(LIBS) -lpthread - -test/test_time_funcs: test/test_time_funcs.o - $(CC) $(LDFLAGS) -o $@ test/test_time_funcs.o - -test/test_view: test/test_view.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_view.o libhts.a $(LIBS) -lpthread - -test/test_index: test/test_index.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_index.o libhts.a $(LIBS) -lpthread - -test/test-vcf-api: test/test-vcf-api.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-vcf-api.o libhts.a $(LIBS) -lpthread - -test/test-vcf-sweep: test/test-vcf-sweep.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-vcf-sweep.o libhts.a $(LIBS) -lpthread - -test/test-bcf-sr: test/test-bcf-sr.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-bcf-sr.o libhts.a -lz $(LIBS) -lpthread - -test/test-bcf-translate: test/test-bcf-translate.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-bcf-translate.o libhts.a -lz $(LIBS) -lpthread - -test/test_introspection: test/test_introspection.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test_introspection.o libhts.a $(LIBS) -lpthread - -test/test-bcf_set_variant_type: test/test-bcf_set_variant_type.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/test-bcf_set_variant_type.o libhts.a $(LIBS) -lpthread - -# Extra tests for bundled htscodecs -test_htscodecs_rans4x8: htscodecs/tests/rans4x8 - cd htscodecs/tests && srcdir=. && export srcdir && ./rans4x8.test - -test_htscodecs_rans4x16: htscodecs/tests/rans4x16pr - cd htscodecs/tests && srcdir=. && export srcdir && ./rans4x16.test - -test_htscodecs_arith: htscodecs/tests/arith_dynamic - cd htscodecs/tests && srcdir=. && export srcdir && ./arith.test - -test_htscodecs_tok3: htscodecs/tests/tokenise_name3 - cd htscodecs/tests && srcdir=. && export srcdir && ./tok3.test - -test_htscodecs_fqzcomp: htscodecs/tests/fqzcomp_qual - cd htscodecs/tests && srcdir=. && export srcdir && ./fqzcomp.test - -test_htscodecs_varint: htscodecs/tests/varint - cd htscodecs/tests && ./varint - -htscodecs/tests/arith_dynamic: htscodecs/tests/arith_dynamic_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/fqzcomp_qual: htscodecs/tests/fqzcomp_qual_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/rans4x16pr: htscodecs/tests/rANS_static4x16pr_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/rans4x8: htscodecs/tests/rANS_static_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/tokenise_name3: htscodecs/tests/tokenise_name3_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/varint: htscodecs/tests/varint_test.o $(HTSCODECS_OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread - -htscodecs/tests/arith_dynamic_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/arith_dynamic_test.o: htscodecs/tests/arith_dynamic_test.c config.h $(htscodecs_arith_dynamic_h) -htscodecs/tests/fqzcomp_qual_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/fqzcomp_qual_test.o: htscodecs/tests/fqzcomp_qual_test.c config.h $(htscodecs_fqzcomp_qual_h) $(htscodecs_varint_h) -htscodecs/tests/rANS_static4x16pr_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/rANS_static4x16pr_test.o: htscodecs/tests/rANS_static4x16pr_test.c config.h $(htscodecs_rANS_static4x16_h) -htscodecs/tests/rANS_static_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/rANS_static_test.o: htscodecs/tests/rANS_static_test.c config.h $(htscodecs_rANS_static_h) -htscodecs/tests/tokenise_name3_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/tokenise_name3_test.o: htscodecs/tests/tokenise_name3_test.c config.h $(htscodecs_tokenise_name3_h) -htscodecs/tests/varint_test.o: CPPFLAGS += -Ihtscodecs -htscodecs/tests/varint_test.o: htscodecs/tests/varint_test.c config.h $(htscodecs_varint_h) - -test/hts_endian.o: test/hts_endian.c config.h $(htslib_hts_endian_h) -test/fuzz/hts_open_fuzzer.o: test/fuzz/hts_open_fuzzer.c config.h $(htslib_hfile_h) $(htslib_hts_h) $(htslib_sam_h) $(htslib_vcf_h) -test/fieldarith.o: test/fieldarith.c config.h $(htslib_sam_h) -test/hfile.o: test/hfile.c config.h $(htslib_hfile_h) $(htslib_hts_defs_h) $(htslib_kstring_h) -test/pileup.o: test/pileup.c config.h $(htslib_sam_h) $(htslib_kstring_h) -test/pileup_mod.o: test/pileup_mod.c config.h $(htslib_sam_h) -test/plugins-dlhts.o: test/plugins-dlhts.c config.h -test/sam.o: test/sam.c config.h $(htslib_hts_defs_h) $(htslib_sam_h) $(htslib_faidx_h) $(htslib_khash_h) $(htslib_hts_log_h) -test/test_bgzf.o: test/test_bgzf.c config.h $(htslib_bgzf_h) $(htslib_hfile_h) $(htslib_hts_log_h) $(hfile_internal_h) -test/test_expr.o: test/test_expr.c config.h $(htslib_hts_expr_h) -test/test_kfunc.o: test/test_kfunc.c config.h $(htslib_kfunc_h) -test/test_kstring.o: test/test_kstring.c config.h $(htslib_kstring_h) -test/test_mod.o: test/test_mod.c config.h $(htslib_sam_h) -test/test-parse-reg.o: test/test-parse-reg.c config.h $(htslib_hts_h) $(htslib_sam_h) -test/test_realn.o: test/test_realn.c config.h $(htslib_hts_h) $(htslib_sam_h) $(htslib_faidx_h) -test/test-regidx.o: test/test-regidx.c config.h $(htslib_kstring_h) $(htslib_regidx_h) $(htslib_hts_defs_h) $(textutils_internal_h) -test/test_str2int.o: test/test_str2int.c config.h $(textutils_internal_h) -test/test_time_funcs.o: test/test_time_funcs.c config.h $(hts_time_funcs_h) -test/test_view.o: test/test_view.c config.h $(cram_h) $(htslib_sam_h) $(htslib_vcf_h) $(htslib_hts_log_h) -test/test_faidx.o: test/test_faidx.c config.h $(htslib_faidx_h) -test/test_index.o: test/test_index.c config.h $(htslib_sam_h) $(htslib_vcf_h) -test/test-vcf-api.o: test/test-vcf-api.c config.h $(htslib_hts_h) $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_kseq_h) -test/test-vcf-sweep.o: test/test-vcf-sweep.c config.h $(htslib_vcf_sweep_h) -test/test-bcf-sr.o: test/test-bcf-sr.c config.h $(htslib_synced_bcf_reader_h) $(htslib_hts_h) $(htslib_vcf_h) -test/test-bcf-translate.o: test/test-bcf-translate.c config.h $(htslib_vcf_h) -test/test_introspection.o: test/test_introspection.c config.h $(htslib_hts_h) $(htslib_hfile_h) -test/test-bcf_set_variant_type.o: test/test-bcf_set_variant_type.c config.h $(htslib_hts_h) vcf.c - - -test/thrash_threads1: test/thrash_threads1.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads1.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads2: test/thrash_threads2.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads2.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads3: test/thrash_threads3.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads3.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads4: test/thrash_threads4.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads4.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads5: test/thrash_threads5.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads5.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads6: test/thrash_threads6.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads6.o libhts.a -lz $(LIBS) -lpthread - -test/thrash_threads7: test/thrash_threads7.o libhts.a - $(CC) $(LDFLAGS) -o $@ test/thrash_threads7.o libhts.a -lz $(LIBS) -lpthread - -test_thrash: $(BUILT_THRASH_PROGRAMS) - -# Test to ensure the functions in the header files are exported by the shared -# library. This currently works by comparing the output from ctags on -# the headers with the list of functions exported by the shared library. -# Note that functions marked as exported in the .c files and not the public -# headers will be missed by this test. -test-shlib-exports: header-exports.txt shlib-exports-$(SHLIB_FLAVOUR).txt - @echo "Checking shared library exports" - @if test ! -s header-exports.txt ; then echo "Error: header-exports.txt empty" ; false ; fi - @if test ! -s shlib-exports-$(SHLIB_FLAVOUR).txt ; then echo "Error: shlib-exports-$(SHLIB_FLAVOUR).txt empty" ; false ; fi - @! comm -23 header-exports.txt shlib-exports-$(SHLIB_FLAVOUR).txt | grep . || \ - ( echo "Error: Found unexported symbols (listed above)" ; false ) - -# Extract symbols that should be exported from public headers using ctags -# Filter out macros in htslib/hts_defs.h. -header-exports.txt: test/header_syms.pl htslib/*.h - test/header_syms.pl htslib/*.h | sort -u -o $@ - -shlib-exports-so.txt: libhts.so - nm -D -g libhts.so | awk '$$2 == "T" { sub("@.*", "", $$3); print $$3 }' | sort -u -o $@ - -shlib-exports-dylib.txt: libhts.dylib - nm -Ug libhts.dylib | awk '$$2 == "T" { sub("^_", "", $$3); print $$3 }' | sort -u -o $@ - -shlib-exports-dll.txt: hts.dll.a - nm -g hts.dll.a | awk '$$2 == "T" { print $$3 }' | sort -u -o $@ - -$(srcprefix)htslib.map: libhts.so - LC_ALL=C ; export LC_ALL; \ - curr_vers=`expr 'X$(PACKAGE_VERSION)' : 'X\([0-9]*\.[0-9.]*\)'` ; \ - last_vers=`awk '/^HTSLIB_[0-9](\.[0-9]+)+/ { lv = $$1 } END { print lv }' htslib.map` ; \ - if test "x$$curr_vers" = 'x' || test "x$$last_vers" = 'x' ; then \ - echo "Version check failed : $$curr_vers / $$las_vers" 1>&2 ; \ - exit 1 ; \ - fi && \ - if test "HTSLIB_$$curr_vers" = "$$last_vers" ; then \ - echo "Refusing to update $@ - HTSlib version not changed" 1>&2 ; \ - exit 1 ; \ - fi && \ - nm --with-symbol-versions -D -g libhts.so | awk '$$2 ~ /^[DGRT]$$/ && $$3 ~ /@@Base$$/ && $$3 !~ /^(_init|_fini|_edata)@@/ { sub(/@@Base$$/, ";", $$3); print " " $$3 }' > $@.tmp && \ - if [ -s $@.tmp ] ; then \ - cat $@ > $@.new.tmp && \ - printf '\n%s {\n' "HTSLIB_$$curr_vers" >> $@.new.tmp && \ - cat $@.tmp >> $@.new.tmp && \ - printf '} %s;\n' "$$last_vers" >> $@.new.tmp && \ - rm -f $@.tmp && \ - mv $@.new.tmp $@ ; \ - fi ; \ - else \ - rm -f $@.tmp ; \ - fi - -install: libhts.a $(BUILT_PROGRAMS) $(BUILT_PLUGINS) installdirs install-$(SHLIB_FLAVOUR) install-pkgconfig - $(INSTALL_PROGRAM) $(BUILT_PROGRAMS) $(DESTDIR)$(bindir) - if test -n "$(BUILT_PLUGINS)"; then $(INSTALL_PROGRAM) $(BUILT_PLUGINS) $(DESTDIR)$(plugindir); fi - $(INSTALL_DATA) $(SRC)htslib/*.h $(DESTDIR)$(includedir)/htslib - $(INSTALL_DATA) libhts.a $(DESTDIR)$(libdir)/libhts.a - $(INSTALL_MAN) $(SRC)bgzip.1 $(SRC)htsfile.1 $(SRC)tabix.1 $(DESTDIR)$(man1dir) - $(INSTALL_MAN) $(SRC)faidx.5 $(SRC)sam.5 $(SRC)vcf.5 $(DESTDIR)$(man5dir) - $(INSTALL_MAN) $(SRC)htslib-s3-plugin.7 $(DESTDIR)$(man7dir) - -installdirs: - $(INSTALL_DIR) $(DESTDIR)$(bindir) $(DESTDIR)$(includedir) $(DESTDIR)$(includedir)/htslib $(DESTDIR)$(libdir) $(DESTDIR)$(man1dir) $(DESTDIR)$(man5dir) $(DESTDIR)$(man7dir) $(DESTDIR)$(pkgconfigdir) - if test -n "$(plugindir)"; then $(INSTALL_DIR) $(DESTDIR)$(plugindir); fi - -# After installation, the real file in $(libdir) will be libhts.so.X.Y.Z, -# with symlinks libhts.so (used via -lhts during linking of client programs) -# and libhts.so.NN (used by client executables at runtime). - -install-so: libhts.so installdirs - $(INSTALL_LIB) libhts.so $(DESTDIR)$(libdir)/libhts.so.$(PACKAGE_VERSION) - ln -sf libhts.so.$(PACKAGE_VERSION) $(DESTDIR)$(libdir)/libhts.so - ln -sf libhts.so.$(PACKAGE_VERSION) $(DESTDIR)$(libdir)/libhts.so.$(LIBHTS_SOVERSION) - -install-cygdll: cyghts-$(LIBHTS_SOVERSION).dll installdirs - $(INSTALL_PROGRAM) cyghts-$(LIBHTS_SOVERSION).dll $(DESTDIR)$(bindir)/cyghts-$(LIBHTS_SOVERSION).dll - $(INSTALL_PROGRAM) libhts.dll.a $(DESTDIR)$(libdir)/libhts.dll.a - -install-dll: hts-$(LIBHTS_SOVERSION).dll installdirs - $(INSTALL_PROGRAM) hts-$(LIBHTS_SOVERSION).dll $(DESTDIR)$(bindir)/hts-$(LIBHTS_SOVERSION).dll - $(INSTALL_PROGRAM) hts.dll.a $(DESTDIR)$(libdir)/hts.dll.a - -install-dylib: libhts.dylib installdirs - $(INSTALL_PROGRAM) libhts.dylib $(DESTDIR)$(libdir)/libhts.$(PACKAGE_VERSION).dylib - ln -sf libhts.$(PACKAGE_VERSION).dylib $(DESTDIR)$(libdir)/libhts.dylib - ln -sf libhts.$(PACKAGE_VERSION).dylib $(DESTDIR)$(libdir)/libhts.$(LIBHTS_SOVERSION).dylib - -# Substitute these pseudo-autoconf variables only at install time -# so that "make install prefix=/prefix/path" etc continue to work. -install-pkgconfig: htslib.pc.tmp installdirs - sed -e 's#@-includedir@#$(includedir)#g;s#@-libdir@#$(libdir)#g;s#@-PACKAGE_VERSION@#$(PACKAGE_VERSION)#g' htslib.pc.tmp > $(DESTDIR)$(pkgconfigdir)/htslib.pc - chmod 644 $(DESTDIR)$(pkgconfigdir)/htslib.pc - -# A pkg-config file (suitable for copying to $PKG_CONFIG_PATH) that provides -# flags for building against the uninstalled library in this build directory. -htslib-uninstalled.pc: htslib.pc.tmp - sed -e 's#@-includedir@#'`pwd`'#g;s#@-libdir@#'`pwd`'#g' htslib.pc.tmp > $@ - - -testclean: - -rm -f test/*.tmp test/*.tmp.* test/faidx/*.tmp* test/faidx/FAIL* \ - test/longrefs/*.tmp.* test/tabix/*.tmp.* test/tabix/FAIL* \ - header-exports.txt shlib-exports-$(SHLIB_FLAVOUR).txt - -rm -rf htscodecs/tests/test.out - -# Only remove this in git checkouts -DEL_HTSCODECS_VERSION := $(if $(wildcard htscodecs/.git),htscodecs/htscodecs/version.h) - -mostlyclean: testclean - -rm -f *.o *.pico cram/*.o cram/*.pico test/*.o test/*.dSYM config_vars.h version.h - -rm -f htscodecs/htscodecs/*.o htscodecs/htscodecs/*.pico $(DEL_HTSCODECS_VERSION) - -rm -f hts-object-files - -rm -f htscodecs/tests/*.o - -clean: mostlyclean clean-$(SHLIB_FLAVOUR) - -rm -f libhts.a $(BUILT_PROGRAMS) $(BUILT_PLUGINS) $(BUILT_TEST_PROGRAMS) $(BUILT_THRASH_PROGRAMS) - -rm -f htscodecs/tests/rans4x8 htscodecs/tests/rans4x16pr htscodecs/tests/arith_dynamic htscodecs/tests/tokenise_name3 htscodecs/tests/fqzcomp_qual htscodecs/tests/varint - -distclean maintainer-clean: clean - -rm -f config.cache config.h config.log config.mk config.status - -rm -f TAGS *.pc.tmp *-uninstalled.pc htslib_static.mk htscodecs.mk - -rm -rf autom4te.cache - -clean-so: - -rm -f libhts.so libhts.so.* - -clean-cygdll: - -rm -f cyghts-*.dll libhts.dll.a - -clean-dll: - -rm -f hts-*.dll hts.dll.a - -clean-dylib: - -rm -f libhts.dylib libhts.*.dylib - - -tags TAGS: - ctags -f TAGS *.[ch] cram/*.[ch] htslib/*.h - -# We recommend libhts-using programs be built against a separate htslib -# installation. However if you feel that you must bundle htslib source -# code with your program, this hook enables Automake-style "make dist" -# for this subdirectory. If you do bundle an htslib snapshot, please -# add identifying information to $(PACKAGE_VERSION) as appropriate. -# (The wildcards attempt to omit non-exported files (.git*, README.md, -# etc) and other detritus that might be in the top-level directory.) -distdir: - @if [ -z "$(distdir)" ]; then echo "Please supply a distdir=DIR argument."; false; fi - tar -c *.[ch15] [ILMNRchtv]*[ELSbcekmnth] | (cd $(distdir) && tar -x) - +cd $(distdir) && $(MAKE) distclean - -force: - - -.PHONY: all check check-untracked clean distclean distdir force -.PHONY: install install-pkgconfig installdirs lib-shared lib-static -.PHONY: maintainer-check maintainer-clean mostlyclean plugins -.PHONY: print-config print-version show-version tags -.PHONY: test test-shlib-exports test_thrash testclean -.PHONY: clean-so install-so -.PHONY: clean-cygdll install-cygdll -.PHONY: clean-dll install-dll -.PHONY: clean-dylib install-dylib -.PHONY: test_htscodecs_rans4x8 test_htscodecs_rans4x16 test_htscodecs_arith -.PHONY: test_htscodecs_tok3 test_htscodecs_fqzcomp test_htscodecs_varint diff --git a/src/htslib-1.19.1/NEWS b/src/htslib-1.19.1/NEWS deleted file mode 100644 index e9de8b9..0000000 --- a/src/htslib-1.19.1/NEWS +++ /dev/null @@ -1,2299 +0,0 @@ -Noteworthy changes in release 1.19.1 (22nd January 2024) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -* Fixed a regression in release 1.19 that caused all aux records to - be stored uncompressed in CRAM files. The resulting files were - correctly formatted, but bigger than they needed to be. - (PR#1729, fixes samtools#1968. Reported by Clockris) - -* Fixed possible out-of-bounds reads due to an incorrect check on - B tag lengths in cram_encode_aux(). (PR#1725) - -* Fixed an incorrect check on tag length which could fail to catch a - two byte out-of-bounds read in bam_get_aux(). (PR#1728) - -* Made errors reported by hts_open_format() less confusing when it can't - open the reference file. (PR#1724, fixes #1723. Reported by - Alex Leonard) - -* Made hts_close() fail more gracefully if it's passed a NULL pointer - (PR#1724) - -Noteworthy changes in release 1.19 (12th December 2023) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Updates -------- - -* A temporary work-around has been put in the VCF parser so that it is - less likely to fail on rows with a large number of ALT alleles, - where Number=G tags like PL can expand beyond the 2Gb limit enforced - by HTSlib. For now, where this happens the offending tag will be dropped - so the data can be processed, albeit without the likelihood data. - - In future work, the library will instead convert such tags into their - local alternatives (see https://github.com/samtools/hts-specs/pull/434). - -* New program. Adds annot-tsv which annotates regions in a destination file with - texts from overlapping regions in a source file. - (PR#1619) - -* Change bam_parse_cigar() so that it can modify existing BAM records. This - makes more useful as public API. Previously it could only handle partially - formed BAM records. - (PR#1651, fixes #1650. Reported by Oleksii Nikolaienko) - -* Add "uncompressed" to hts_format_description() where appropriate. This adds - an "uncompressed" description to uncompressed files that would normally be - compressed, such as BAM and BCF. - (PR#1656, in relation to samtools#1884. Thanks to John Marshall) - -* Speed up to the VCF parser and writer. - (PR#1644 and PR#1663) - -* Add an hclen (hard clip length) SAM filter function. - (PR#1660, with reference to samtools#813) - -* Avoid really closing stdin/stdout in hclose()/hts_close()/et al. - See discussion in PR for details. - (PR#1665. Thanks to John Marshall) - -* Add support to handle multiple files in bgzip. - (PR#1658, fixes #1642. Requested by bw2) - -* Enable auto-vectorisation in CRAM 3.1 codecs. Speeds decoding on some - sequencing platform data. - (PR#1669) - -* Speed up removal of lines in large headers. - (PR#1662, fixes #1460. Reported by Anže Starič) - -* Apply seqtk PR to improve kseq.h parsing performance. Port of - Fabian Klötzl's (kloetzl) lh3/seqtk#123 and attractivechaos/klib#173 to - HTSlib. - (PR#1674. Thanks to John Marshall) - -Build changes -------------- - -* Updated htscodecs submodule to 1.6.0. - (PR#1685, PR#1717, PR#1719) - -* Apply the packed attribute to uint*_u types for Clang to prevent - -fsanitize=alignment failures. - (PR#1667. Thanks to Fangrui Song) - -* Fuzz testing improvements. - (PR#1664) - -* Add C++ casts for external headers in klist.h and kseq.h. - (PR#1683. See also PR#1674 and PR#1682) - -* Add test case compiling the public headers as C++. - (PR#1682. Thanks to John Marshall) - -* Enable optimisation level -O3 for SAM QUAL+33 formatting. - (PR#1679) - -* Make compiler flag detection work with zig cc. - (PR#1687) - -* Fix unused value warnings when built with NDEBUG. - (PR#1688) - -* Remove some disused Makefile variables, fix typos and a warning. Improve - bam_parse_basemod() documentation. - (PR#1705, Thanks to John Marshall) - -Bug fixes ---------- - -* Fail bgzf_useek() when offset is above block limits. - (PR#1668) - -* Fix multi-threaded on-the-fly indexing problems. - (PR#1672, fixes samtools#1861 and bcftools#1985. Reported by Mark Ebbert and - lacek) - -* Fix hfile_libcurl small seek bug. - (PR#1676, fixes samtools#1918. Also may fix #1037, #1625 and samtools#1622. - Reported by Alex Reynolds, Mark Walker, Arthur Gilly and skatragadda-nygc. - Thanks to John Marshall) - -* Fix a minor memory leak in malformed CRAM EXTERNAL blocks. [fuzz] - (PR#1671) - -* Fix a cram decode hang from block_resize(). - (PR#1680. Reported by Sebastian Deorowicz) - -* Cram fuzzing improvements. Fixes a number of cram errors. - (PR#1701, fixes #1691, #1692, #1693, #1696, #1697, #1698, #1699 and #1700. - Thanks to Octavio Galland for finding and reporting all these) - -* Fix crypt4gh redirection. - (PR#1675, fixes grbot/crypt4gh-tutorial#2. Reported by hth4) - -* Fix PG header linking when records make a loop. - (PR#1702, fixes #1694. Reported by Octavio Galland) - -* Prevent issues with no-stored-sequence records in CRAM files, by ensuring - they are accounted for properly in block size calculations, and by limiting - the maximum query length in the CIGAR data. Originally seen as an overflow - by OSS-Fuzz / UBSAN, it turned out this could lead to excessive time and - memory use by HTSlib, and could result in it writing out unreadable CRAM - files. - (PR#1710) - -* Fix some illegal shifts and integer overflows found by OSS-Fuzz / UBSAN. - (PR#1707, PR#1712, PR#1713) - -Noteworthy changes in release 1.18 (25th July 2023) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Updates -------- - -* Using CRAM 3.1 no longer gives a warning about the specification - being draft. Note CRAM 3.0 is still the default output format. - (PR#1583) - -* Replaced use of sprintf with snprintf, to silence potential warnings - from Apple's compilers and those who implement similar checks. - (PR#1594, fixes #1586. Reported by Oleksii Nikolaienko) - -* Fastq output will now generate empty records for reads with no - sequence data (i.e. sequence is "*" in SAM format). (PR#1576, - fixes samtools/samtools#1576. Reported by Nils Homer) - -* CRAM decoding speed-ups. (PR#1580) - -* A new MN aux tag can now be used to verify that MM/ML base modification - data has not been broken by hard clipping. (PR#1590, PR#1612. See also - PR samtools/hts-specs#714 and issue samtools/hts-specs#646. - Reported by Jared Simpson) - -* The base modification API has been improved to make it easier for callers - to tell unchecked bases from unmodified ones. (PR#1636, fixes #1550. - Requested by Chris Wright) - -* A new bam_mods_queryi() API has been added to return additional - data about the i-th base modification returned by bam_mods_recorded(). - (PR#1636, fixes #1550 and #1635. Requested by Jared Simpson) - -* Speed up index look-ups for whole-chromosome queries. (PR#1596) - -* Mpileup now merges adjacent (mis)match CIGAR operations, so CIGARs - using the X/= operators give the same results as if the M operator - was used. (PR#1607, fixes #1597. Reported by Marcel Martin) - -* It's now possible to call bcf_sr_set_regions() after adding readers - using bcf_sr_add_reader() (previously this returned an error). Doing so - will discard any unread data, and reset the readers so they iterate over - the new regions. (PR#1624, fixes samtools/bcftools#1918. Reported by - Gregg Thomas) - -* The synced BCF reader can now accept regions with reference names including - colons and hyphens, by enclosing them in curly braces. For example, - {chr_part:1-1001}:10-20 will return bases 10 to 20 from reference - "chr_part:1-1001". (PR#1630, fixes #1620. Reported by Bren) - -* Add a "samples" directory with code demonstrating usage of HTSlib plus - a tutorial document. (PR#1589) - -Build changes -------------- - -* Htscodecs has been updated to 1.5.1 (PR#1654) - -* Htscodecs SIMD code now works with Apple multiarch binaries. - (PR#1587, HTSlib fix for samtools/htscodecs#76. Reported by John Marshall) - -* Improve portability of "expr" usage in version.sh. - (PR#1593, fixes #1592. Reported by John Marshall) - -* Improve portability to *BSD targets by ensuring _XOPEN_SOURCE is defined - correctly and that source files properly include "config.h". Perl - scripts also now all use #!/usr/bin/env instead of assuming that - it's in /usr/bin/perl. (PR#1628, fixes #1606. - Reported by Robert Clausecker) - -* Fixed NAME entry in htslib-s3-plugin man page so the whatis and apropos - commands find it. (PR#1634, thanks to Étienne Mollier) - -* Assorted dependency tracking fixes. (PR#1653, thanks to John Marshall) - -Documentation updates ---------------------- - -* Changed Alpine build instructions as they've switched back to using openssl. - (PR#1609) - -* Recommend using -rdynamic when statically linking a libhts.a with - plugins enabled. (PR#1611, thanks to John Marshall. Fixes #1600, - reported by Jack Wimberley) - -* Fixed example in docs for sam_hdr_add_line(). (PR#1618, thanks to kojix2) - -* Improved test harness for base modifications API. (PR#1648) - -Bug fixes ---------- - -* Fix a major bug when searching against a CRAM index where one container - has start and end coordinates entirely contained within the previous - container. This would occasionally miss data, and sometimes return much - more than required. The bug affected versions 1.11 to 1.17, although the - change in 1.11 was bug-fixing multi-threaded index queries. This bug did - not affect index building. There is no need to reindex your CRAM files. - (PR#1574, PR#1640. Fixes #1569, #1639, samtools/samtools#1808, - samtools/samtools#1819. Reported by xuxif, Jens Reeder and Jared Simpson) - -* Prevent CRAM blocks from becoming too big in files with short - sequences but very long aux tags. (PR #1613) - -* Fix bug where the CRAM decoder for CONST_INT and CONST_BYTE - codecs may incorrectly look for extra data in the CORE block. - Note that this bug only affected the experimental CRAM v4.0 decoder. - (PR#1614) - -* Fix crypt4gh redirection so it works in conjunction with non-file - IO, such as using htsget. (PR#1577) - -* Improve error checking for the VCF POS column, when facing invalid - data. (PR#1575, replaces #1570 originally reported and fixed - by Colin Nolan.) - -* Improved error checking on VCF indexing to validate the data is BGZF - compressed. (PR#1581) - -* Fix bug where bin number calculation could overflow when making iterators - over regions that go to the end of a chromosome. (PR#1595) - -* Backport attractivechaos/klib#78 (by Pall Melsted) to HTSlib. - Prevents infinite loops in kseq_read() when reading broken gzip files. - (PR#1582, fixes #1579. Reported by Goran Vinterhalter) - -* Backport attractivechaos/klib@384277a (by innoink) to HTSlib. - Fixes the kh_int_hash_func2() macro definition. - (PR#1599, fixes #1598. Reported by fanxinping) - -* Remove a compilation warning on systems with newer libcurl releases. - (PR#1572) - -* Windows: Fixed BGZF EOF check for recent MinGW releases. (PR#1601, - fixes samtools/bcftools#1901) - -* Fixed bug where tabix would not return the correct regions for files - where the column ordering is end, ..., begin instead of begin, ..., end. - (PR#1626, fixes #1622. Reported by Hiruna Samarakoon) - -* sam_format_aux1() now always NUL-terminates Z/H tags. (PR#1631) - -* Ensure base modification iterator is reset when no MM tag is present. - (PR#1631, PR#1647) - -* Fix segfault when attempting to write an uncompressed BAM file opened using - hts_open(name, "wbu"). This was attempting to write BAM data without - wrapping it in BGZF blocks, which is invalid according to the BAM - specification. "wbu" is now internally converted to "wb0" to output - uncompressed data wrapped in BGZF blocks. (PR#1632, fixes #1617. - Reported by Joyjit Daw) - -* Fixed over-strict bounds check in probaln_glocal() which caused it to make - sub-optimal alignments when the requested band width was greater than the - query length. (PR#1616, fixes #1605. Reported by Jared Simpson) - -* Fixed possible double frees when handling errors in bcf_hdr_add_hrec(), - if particular memory allocations fail. (PR#1637) - -* Ensure that bcf_hdr_remove() clears up all pointers to the items removed - from dictionaries. Failing to do this could have resulted in a call - requesting a deleted item via bcf_hdr_get_hrec() returning a stale pointer. - (PR#1637) - -* Stop the gzip decompresser from finishing prematurely when an empty - gzip block is followed by more data. (PR#1643, PR#1646) - -Noteworthy changes in release 1.17 (21st February 2023) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -* A new API for iterating through a BAM record's aux field. - (PR#1354, addresses #1319. Thanks to John Marshall) - -* Text mode for bgzip. Allows bgzip to compress lines of text with block breaks - at newlines. - (PR#1493, thanks to Mike Lin for the initial version PR#1369) - -* Make tabix support CSI indices with large positions. Unlike SAM and VCF - files, BED files do not set a maximum reference length which hindered CSI - support. This change sets an arbitrary large size of 100G to enable it to - work. - (PR#1506) - -* Add a fai_line_length function. Exposes the internal line-wrap length. - (PR#1516) - -* Check for invalid barcode tags in fastq output. - (PR#1518, fixes samtools#1728. Reported by Poshi) - -* Warn if reference found in a CRAM file is not contained in the specified - reference file. - (PR#1517 and PR#1521, adds diagnostics for #1515. Reported by Wei WeiDeng) - -* Add a faidx_seq_len64 function that can return sequence lengths longer than - INT_MAX. At the same time limit faidx_seq_len to INT_MAX output. Also add a - fai_adjust_region to ensure given ranges do not go beyond the end of the - requested sequence. - (PR#1519) - -* Add a bcf_strerror function to give text descriptions of BCF errors. - (PR#1510) - -* Add CRAM SQ/M5 header checking when specifying a fasta file. This is to - prevent creating a CRAM that cannot be decoded again. - (PR#1522. In response to samtools#1748 though not a direct fix) - -* Improve support for very long input lines (> 2Gbyte). This is mostly useful - for tabix which does not do much interpretation of its input. - (PR#1542, a partial fix for #1539) - -* Speed up load_ref_portion. This function has been sped up by about 7x, which - speeds up low-depth CRAM decoding by about 10%. - (PR#1551) - -* Expand CRAM API to cope with new samtools cram_size command. - (PR#1546) - -* Merges neighbouring I and D ops into one op within pileup. This means - 4M1D1D1D3M is reported as 4M3D3M. Fixing this in sam.c means not only is - samtools mpileup now looking better, but any tool using the mpileup API will - be getting consistent results. - (PR#1552, fixes the last remaining part of samtools#139) - -* Update the API documentation for bgzf_mt as it refered to a previous - iteration. - (PR#1556, fixes #1553. Reported by Raghavendra Padmanabhan) - - -Build changes -------------- - -* Use POSIX grep in testing as egrep and fgrep are considered obsolete. - (PR#1509, thanks to David Seifert) - -* Switch to building libdefalte with cmake for Cirris CI. - (PR#1511) - -* Ensure strings in config_vars.h are escaped correctly. - (PR#1530, fixes #1527. Reported by Lucas Czech) - -* Easier modification of shared library permissions during install. - (PR#1532, fixes #1525. Reported by StephDC) - -* Fix build on ancient compilers. Added -std=gnu90 to build tests so older - C compilers will still be happy. - (PR#1524, fixes #1523. Reported by Martin Jakt) - -* Switch MacOS CI tests to an ARM-based image. - (PR#1536) - -* Cut down the number of embed_ref=2 tests that get run. - (PR#1537) - -* Add symbol versions to libhts.so. This is to aid package developers. - (PR#1560 addresses #1505, thanks to John Marshall. Reported by Stefan Bruens) - -* htscodecs now updated to v1.4.0. - (PR#1563) - -* Cleaned up misleading system error reports in test_bgzf. - (PR#1565) - -Bug fixes ---------- - -* VCF. Fix n-squared complexity in sample line with many adjacent tabs [fuzz]. - (PR#1503) - -* Improved bcftools detection and reporting of bgzf decode errors. - (PR#1504, thanks to Lilian Janin. PR#1529 thanks to Bergur Ragnarsson, fixes - #1528. PR#1554) - -* Prevent crash when the only FASTA entry has no sequence [fuzz]. - (PR#1507) - -* Fixed typo in sam.h documentation. - (PR#1512, thanks to kojix2) - -* Fix buffer read-overrun in bam_plp_insertion_mod. - (PR#1520) - -* Fix hash keys being left behind by bcf_hdr_remove. - (PR#1535, fixes #1533. Reported by Giulio Genovese in #842) - -* Make bcf_hdr_idinfo_exists more robust by checking id value exists. - (PR#1544, fixes #1538. Reported by Giulio Genovese) - -* CRAM improvements. Fixed crash with multi-threaded CRAM. Fixed a bug in the - codec parameter learning for CRAM 3.1 name tokeniser. Fixed Cram compression - container substitution matrix generation, - (PR#1558, PR#1559 and PR#1562) - -Noteworthy changes in release 1.16 (18th August 2022) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -* Make hfile_s3 refresh AWS credentials on expiry in order to make HTSlib work - better with AWS IAM credentials, which have a limited lifespan. - (PR#1462 and PR#1474, addresses #344) - -* Allow BAM headers between 2GB and 4GB in size once more. This is not - permitted in the BAM specification but was allowed in an earlier version of - HTSlib. There is now a warning at 2GB and a hard failure at 4GB. - (PR#1421, fixes #1420 and samtools#1613. Reported by John Marshall and - R C Mueller) - -* Improve error message when failing to load an index. - (PR#1468, example of the problem samtools#1637) - -* Permit MM (base modification) tags containing "." and "?" suffixes. These - define implicit vs explicit coordinates. See the SAM tags specification for - details. - (PR#1423 and PR#1426, fixes #1418. PR#1469, fixes #1466. Reported - by cjw85) - -* Warn if spaces instead of tabs are detected in a VCF file to prevent - confusion. - (PR#1328, fixes bcftools#1575. Reported by ketkijoshi278) - -* Add an "sclen" filter expression keyword. This is the length of a soft-clip, - both left and right end. It may be combined with qlen (qlen-sclen) to obtain - the number of bases in the query sequence that have been aligned to the genome - ie it provides a way to compare local-alignment vs global-alignment length. - (PR#1441 and PR/samtools#1661, fixes #1436. Requested by Chang Y) - -* Improve error messages for CRAM reference mismatches. If the user specifies - the wrong reference, the CRAM slice header MD5sum checks fail. We now report - the SQ line M5 string too so it is possible to validate against the whole - chr in the ref.fa file. The error message has also been improved to report - the reference name instead of #num. Finally, we now hint at the likely cause, - which counters the misleading samtools supplied error of "truncated or - corrupt" file. - (PR#1427, fixes samtools#1640. Reported by Jian-Guo Zhou) - -* Expose more of the CRAM API and add new functionality to extract the reference - from a CRAM file. - (PR#1429 and PR#1442) - -* Improvements to the implementation of embedded references in CRAM where no - external reference is specified. - (PR#1449, addresses some of the issues in #1445) - -* The CRAM writer now allows alignment records with RG:Z: aux tags that - don't have a corresponding @RG ID in the file header. Previously these - tags would have been silently dropped. HTSlib will complain whenever it - has to add one though, as such tags do not conform to recommended practice - for the SAM, BAM and CRAM formats. - (PR#1480, fixes #1479. Reported by Alex Leonard) - -* Set tab delimiter in man page for tabix GFF3 sort. - (PR#1457. Thanks to Colin Diesh) - -* When using libdeflate, the 1...9 scale of BGZF compression levels is - now remapped to the 1...12 range used by libdeflate instead of being - passed directly. In particular, HTSlib levels 8 and 9 now map to - libdeflate levels 10 and 12, so it is possible to select the highest (but - slowest) compression offered by libdeflate. - (PR#1488, fixes #1477. Reported by Gert Hulselmans) - -* The VCF variant API has been extended so that it can return separate flags - for INS and DEL variants as well as the existing INDEL one. These flags - have not been added to the old bcf_get_variant_types() interface as - it could break existing users. To access them, it is necessary to use new - functions bcf_has_variant_type() and bcf_has_variant_types(). - (PR#1467) - -* The missing, but trivial, `le_to_u8()` function has been added to hts_endian. - (PR#1494, Thanks to John Marshall) - -* bcf_format_gt() now works properly on big-endian platforms. - (PR#1495, Thanks to John Marshall) - -Build changes -------------- - -These are compiler, configuration and makefile based changes. - -* Update htscodecs to version 1.3.0 for new SIMD code + various fixes. - Updates the htscodecs submodule and adds changes necessary to make HTSlib - build the new SIMD codec implementations. - (PR#1438, PR#1489, PR#1500) - -* Fix clang builds under mingw. Under mingw, clang requires dllexport to be - applied to both function declarations and function definitions. - (PR#1435, PR#1497, PR#1498 fixes #1433. Reported by teepean) - -* Fix curl type warning with gcc 12.1 on Windows. - (PR#1443) - -* Detect ARM Neon support and only build appropriate SIMD object files. - (PR#1451, fixes #1450. Thanks to John Marshall) - -* `make print-config` now reports extra CFLAGS that are needed to build the - SIMD parts of htscodecs. These may be of use to third-party build - systems that don't use HTSlib's or htscodecs' build infrastructure. (PR#1485. - Thanks to John Marshall) - -* Fixed some Makefile dependency issues for the "check"/"test" targets - and plugins. In particular, "make check" will now build the "all" target, - if not done already, before running the tests. - (PR#1496) - -Bug fixes ---------- - -* Fix bug when reading position -1 in BCF (0 in VCF), which is used to indicate - telomeric regions. The BCF reader was incorrectly assuming the value stored - in the file was unsigned, so a VCF->BCF->VCF round-trip would change it - from 0 to 4294967296. - (PR#1476, fixes #1475 and bcftools#1753. Reported by Rodrigo Martin) - -* Various bugs and quirks have been fixed in the filter expression engine, - mostly related to the handling of absent tags, and the is_true flag. - Note that as a result of these fixes, some filter expressions may give - different results: - - Fixed and-expressions including aux tag values which could give an invalid - true result depending on the order of terms. - - The expression `![NM]` is now true if only `NM` does not exist. In - earlier versions it would also report true for tags like `NM:i:0` which - exist but have a value of zero. - - The expression `[X1] != 0` is now false when `X1` does not exist. Earlier - versions would return true for this comparison when the tag was missing. - - NULL values due to missing tags now propagate through string, bitwise - and mathematical operations. Logical operations always treat them as - false. - (PR#1463, fixes samtools#1670. Reported by Gert Hulselmans; - PR#1478, fixes samtools#1677. Reported by johnsonzcode) - -* Fix buffer overrun in bam_plp_insertion_mod. Memory now grows to the proper - size needed for base modification data. - (PR#1430, fixes samtools#1652. Reported by hd2326) - -* Remove limit of returned size from fai_retrieve(). - (PR#1446, fixes samtools#1660. Reported by Shane McCarthy) - -* Cap hts_getline() return value at INT_MAX. Prevents hts_getline() from - returning a negative number (a fail) for very long string length values. - (PR#1448. Thanks to John Marshall) - -* Fix breakend detection and test bcf_set_variant_type(). - (PR#1456, fixes #1455. Thanks to Martin Pollard) - -* Prevent arrays of BCF_BT_NULL values found in BCF files from causing - bcf_fmt_array() to call exit() as the type is unsupported. These are - now tested for and caught by bcf_record_check(), which returns an - error code instead. (PR#1486) - -* Improved detection of fasta and fastq files that have very long comments - following identifiers. (PR#1491, thanks to John Marshall. - Fixes samtools/samtools#1689, reported by cjw85) - -* Fixed a SEGV triggered by giving a SAM file to `samtools import`. - (PR#1492) - -Noteworthy changes in release 1.15.1 (7th April 2022) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -* Security fix: Fixed broken error reporting in the sam_prob_realn() - function, due to a missing hts_log() parameter. Prior to this fix - (i.e., in HTSlib versions 1.8 to 1.15) it was possible to abuse - the log message format string by passing a specially crafted - alignment record to this function. (PR#1406) - -* HTSlib now uses libhtscodecs release 1.2.2. This fixes a number - of bugs where invalid compressed data could trigger usage of - uninitialised values. (PR#1416) - -* Fixed excessive memory used by multi-threaded SAM output on - long reads. (Part of PR#1384) - -* Fixed a bug where tabix would misinterpret region specifiers - starting at position 0. It will also now warn if the file - being indexed is supposed to be 1-based but has positions - less than or equal to 0. (PR#1411) - -* The VCF header parser will now issue a warning if it finds an - INFO header with Type=Flag but Number not equal to 0. It will - also ignore the incorrect Number so the flag can be used. (PR#1415) - -Noteworthy changes in release 1.15 (21st February 2022) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Features and Updates --------------------- - -* Bgzip now has a --keep option to not remove the input file after - compressing. (PR#1331) - -* Improved file format detection so some BED files are no longer - detected as FASTQ or FASTA. (PR#1350, thanks to John Marshall) - -* Added xz (lzma), zstd and D4 formats to the file type detection - functions. We don't actively support reading these data types, but - function calls and htsfile can detect them. (PR#1340, thanks to - John Marshall) - -* CRAM now also uses libdeflate for read-names if the libdeflate - version is new enough (1.9 onwards). Previously we used zlib for - this due to poor performance of libdeflate. This gives a slight - speed up and reduction in file size. (PR#1383) - -* The VCF and BCF readers will now issue a warning if contig, INFO - or FORMAT IDs do not match the formats described in the VCFv4.3 - specification. Note that while the invalid names will mostly still - be accepted, future updates will convert the warnings to errors - causing files including invalid names to be rejected. (PR#1389) - -Build changes -------------- - -These are compiler, configuration and makefile based changes. - -* HTSlib now uses libhtscodecs release 1.2.1. - -* Improved support for compiling and linking against HTSlib with - Microsoft Visual Studio. (PR#1380, #1377, #1375. Thanks to - Aidan Bickford and John Marshall) - -* Various internal CI improvements. - -Bug fixes ---------- - -* Fixed CRAM index queries for HTSJDK output (PR#1388, reported by - Chris Norman). Note this also fixes writing CRAM writing, to match - the specification (and HTSJDK), from version 3.1 onwards. - -* Fixed CRAM index queries when required-fields settings are selected - to ignore CIGARs (PR#1372, reported by Giulio Genovese). - -* Unmapped but placed (having chr/pos) are now included in the BAM - indices. (PR#1352, thanks to John Marshall) - -* CRAM now honours the filename##idx##index nomenclature for - specifying non-standard index locations. (PR#1360, reported by - Michael Cariaso) - -* Minor CRAM v1.0 read-group fix (PR#1349, thanks to John Marshall) - -* Permit .fa and .fq file type detection as synonyms for FASTA and - FASTQ. (PR#1386). - -* Empty VCF format fields are now output ":.:" as instead of "::". - (PR#1370) - -* Repeated bcf_sr_seek calls now work. (PR#1363, reported by - Giulio Genovese) - -* Bcf_remove_allele_set now works on unpacked BCF records. (PR#1358, - reported by Brent Pedersen). - -* The hts_parse_decimal() function used to read numbers in region lists - is now better at rejecting non-numeric values. In particular it - now rejects a lone 'G' instead of interpreting it as '0G', i.e. zero. - (PR#1396, PR#1400, reported by SSSimon Yang; thanks to John Marshall). - -* Improve support for GPU issues listed by -Wdouble-promotion. - (PR#1365, reported by David Seisert) - -* Fix example code in header file documentation. (PR#1381, Thanks to - Aidan Bickford) - -Noteworthy changes in release 1.14 (22nd October 2021) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Features and Updates --------------------- - -* Added a keep option to bgzip to leave the original file untouched. This - brings bgzip into line with gzip. (PR #1331, thanks to Alex Petty) - -* "endpos" has been added to the filter language, giving the position - of the rightmost mapped base as measured by the CIGAR string. For - unmapped reads it is the same as "pos". (PR #1307, thanks to John Marshall) - -* Interfaces have been added to interpret the new base modification tags - added to the SAMtags document in samtools/hts-specs#418. (PR #1132) - -* New API functions hts_flush()/sam_flush()/bcf_flush() for flushing output - htsFile/samFile/vcfFile streams. (PR #1326, thanks to John Marshall) - -* The synced_bcf_reader now sorts lines with symbolic alleles by END tag as - well as POS. (PR #1321) - -* Added synced_bcf_reader options BCF_SR_REGIONS_OVERLAP and - BCF_SR_TARGETS_OVERLAP for better control of records that start outside - the desired region but overlap it are handled. Fixes samtools/bcftools#1420 - and samtools/bcftools#1421 raised by John Marshall. (PR #1327) - -* HTSlib will now accept long-cigar CG:B: tags made by htsjdk which don't - quite follow the specification properly (using signed values instead of - unsigned). Thanks to Colin Diesh for reporting an example file. (PR #1317) - -* The warning printed when the BGZF reader finds a file with no EOF block - has been changed to be less alarming. Unfortunately some third-party - BGZF encoders don't write EOF blocks at the end of files. Thanks to - Keiran Raine for reporting an example file. (PR #1323) - -* The FASTA and FASTQ readers get an option to skip over the first item on - the header line, and use the second as the read name. It allows the original - name to be restored on some of the fastq files served from the European - Nucleotide Archive (ENA). (PR #1325) - -* HTSlib is now more strict when parsing the VCF samples line (beginning - #CHROM). It will only accept tabs between the mandatory field names and - sample names must be separated with tabs. (PR #1328) - -* HTSlib will now warn if it looks like the header has been corrupted - by diagnostic messages from the program that made it. This can happen when - using `nohup`, which by default mixes stdout and stderr into the same - stream. (PR#1339, thanks to John Marshall) - -* File format detection will now recognise signatures for XZ, Zstd and D4 - files (note that HTSlib will not read them yet). (PR #1340, thanks to - John Marshall) - -Build changes -------------- - -These are compiler, configuration and makefile based changes. - -* Some redundant tests have been removed from the test harness, speeding it up. - (PR #1308) - -* The version.sh script now works better on shallow checkouts. (PR #1324) - -* A check-untracked Makefile target has been added to catch untracked files - (mostly) left by the test harness. (PR #1324) - -Bug fixes ---------- - -* Fixed a case where flushing the thread pool could very occasionally cause - a deadlock. (PR #1309) - -* Fixed a bug where some CRAM files could fail to decode if the required_fields - option was in use. Thanks to Matt Sexton for reporting the issue. - (PR #1314, fixes samtools/samtools#1475) - -* Fixed a regression where the S3 plugin could not read public files unless - you supplied some Amazon credentials. Thanks to Chris Saunders for reporting. - (PR #1332, fixes samtools/samtools#1491) - -* Fixed a possible CRAM thread deadlock discovered by @ryancaicse. - (PR #1330, fixes #1329) - -* Some set-but-unused variables have been removed. (PR #1334) - -* Fixed a bug which prevented "flag.read2" from working in the filter - language unless it was at the end of the expression. Thanks to Vamsi Kodali - for reporting the issue. (PR #1342) - -* Fixed a memory leak that could happen if CRAM fails to inflate a LZMA - block. (PR #1340, thanks to John Marshall) - -Noteworthy changes in release 1.13 (7th July 2021) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Features and Updates --------------------- - -* In case a PG header line has multiple ID tags supplied by other applications, - the header API now selects the first one encountered as the identifying tag - and issues a warning when detecting subsequent ID tags. - (#1256; fixed samtools/samtools#1393) - -* VCF header reading function (vcf_hdr_read) no longer tries to download a - remote index file by default. - (#1266; fixes #380) - -* Support reading and writing FASTQ format in the same way as SAM, BAM or CRAM. - Records read from a FASTQ file will be treated as unmapped data. - (#1156) - -* Added GCP requester pays bucket access. Thanks to @indraniel. - (#1255) - -* Made mpileup's overlap removal choose which copy to remove at random instead - of always removing the second one. This avoids strand bias in experiments - where the +ve and -ve strand reads always appear in the same order. - (#1273; fixes samtools/bcftools#1459) - -* It is now possible to use platform specific BAQ parameters. This also - selects long-read parameters for read lengths bigger than 1kb, which helps - bcftools mpileup call SNPs on PacBio CCS reads. - (#1275) - -* Improved bcf_remove_allele_set. This fixes a bug that stopped iteration over - alleles prematurely, marks removed alleles as 'missing' and does automatic - lazy unpacking. - (#1288; fixes #1259) - -* Improved compression metrics for unsorted CRAM files. This improves the - choice of codecs when handling unsorted data. - (#1291) - -* Linear index entries for empty intervals are now initialised with the file - offset in the next non-empty interval instead of the previous one. This - may reduce the amount of data iterators have to discard before reaching - the desired region, when the starting location is in a sequence gap. - Thanks to @carsonh for reporting the issue. - (#1286; fixes #486) - -* A new hts_bin_level API function has been added, to compute the level of a - given bin in the binning index. - (#1286) - -* Related to the above, a new API method, hts_idx_nseq, now returns the total - number of contigs from an index. - (#1295 and #1299) - -* Added bracket handling to bcf_hdr_parse_line, for use with ##META lines. - Thanks to Alberto Casas Ortiz. - (#1240) - -Build changes -------------- - -These are compiler, configuration and makefile based changes. - -* HTSlib now uses libhtscodecs release 1.1.1. - -* Added a curl/curl.h check to configure and improved INSTALL documentation on - build options. Thanks to Melanie Kirsche and John Marshall. - (#1265; fixes #1261) - -* Some fixes to address GCC 11.1 warnings. - (#1280, #1284, #1285; fixes #1283) - -* Supports building HTSlib in a separate directory. Thanks to John Marshall. - (#1277; fixes #231) - -* Supports building HTSlib on MinGW 32-bit environments. Thanks to - John Marshall. - (#1301) - -Bug fixes ---------- - -* Fixed hts_itr_query() et al region queries: fixed bug introduced in - HTSlib 1.12, which led to iterators producing very few reads for some - queries (especially for larger target regions) when unmapped reads were - present. HTSlib 1.11 had a related problem in which iterators would omit - a few unmapped reads that should have been produced; cf #1142. - Thanks to Daniel Cooke for reporting the issue. - (#1281; fixes #1279) - -* Removed compressBound assertions on opening bgzf files. Thanks to - Gurt Hulselmans for reporting the issue. - (#1258; fixed #1257) - -* Duplicate sample name error message for a VCF file now only displays the - duplicated name rather the entire same name list. - (#1262; fixes samtools/bcftools#1451) - -* Fix to make samtools cat work on CRAMs again. - (#1276; fixes samtools/samtools#1420) - -* Fix for a double memory free in SAM header creation. Thanks to @ihsineme. - (#1274) - -* Prevent assert in bcf_sr_set_regions. Thanks to Dr K D Murray. - (#1270) - -* Fixed crash in knet_open() etc stubs. Thanks to John Marshall. - (#1289) - -* Fixed filter expression "cigar" on unmapped reads. Stop treating an empty - CIGAR string as an error. Thanks to Chang Y for reporting the issue. - (#1298, fixes samtools/samtools#1445) - -* Bug fixes in the bundled copy of htscodecs: - - - Fixed an uninitialized access in the name tokeniser decoder. - (samtools/htscodecs#23) - - - Fixed a bug with name tokeniser and variable number of names per slice, - causing it to incorrectly report an error on certain valid inputs. - (samtools/htscodecs#24) - - -Noteworthy changes in release 1.12 (17th March 2021) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Features and Updates --------------------- - -* Added experimental CRAM 3.1 and 4.0 support. (#929) - - These should not be used for long term data storage as the - specification still needs to be ratified by GA4GH and may be subject - to changes in format. (This is highly likely for 4.0). However it - may be tested using: - - test/test_view -t ref.fa -C -o version=3.1 in.bam -p out31.cram - - For smaller but slower files, try varying the compression profile - with an additional "-o small". Profile choices are fast, normal, - small and archive, and can be applied to all CRAM versions. - -* Added a general filtering syntax for alignment records in SAM/BAM/CRAM - readers. (#1181, #1203) - - An example to find chromosome spanning read-pairs with high mapping - quality: 'mqual >= 30 && mrname != rname' - - To find significant sized deletions: - 'cigar =~ "[0-9]{2}D"' or 'rlen - qlen > 10'. - - To report duplicates that aren't part of a "proper pair": - 'flag.dup && !flag.proper_pair' - - More details are in the samtools.1 man page under "FILTER EXPRESSIONS". - -* The knet networking code has been removed. It only supported the http - and ftp protocols, and a better and safer alternative using libcurl - has been available since release 1.3. If you need access to ftp:// and - http:// URLs, HTSlib should be built with libcurl support. (#1200) - -* The old htslib/knetfile.h interfaces have been marked as deprecated. Any - code still using them should be updated to use hFILE instead. (#1200) - -* Added an introspection API for checking some of the capabilities provided - by HTSlib. (#1170) Thanks also to John Marshall for contributions. (#1222) - - `hfile_list_schemes`: returns the number of schemes found - - `hfile_list_plugins`: returns the number of plugins found - - `hfile_has_plugin`: checks if a specific plugin is available - - `hts_features`: returns a bit mask with all available features - - `hts_test_feature`: test if a feature is available - - `hts_feature_string`: return a string summary of enabled features - -* Made performance improvements to `probaln_glocal` method, which - speeds up mpileup BAQ calculations. (#1188) - - Caching of reused loop variables and removal of loop invariants - - Code reordering to remove instruction latency. - - Other refactoring and tidyups. - -* Added a public method for constructing a BAM record from the - component pieces. Thanks to Anders Kaplan. (#1159, #1164) - -* Added two public methods, `sam_parse_cigar` and `bam_parse_cigar`, as part of - a small CIGAR API (#1169, #1182). Thanks to Daniel Cameron for input. (#1147) - -* HTSlib, and the included htsfile program, will now recognise the old - RAZF compressed file format. Note that while the format is detected, - HTSlib is unable to read it. It is recommended that RAZF files are - uncompressed with `gunzip` before using them with HTSlib. Thanks to - John Marshall (#1244); and Matthew J. Oldach who reported problems - with uncompressing some RAZF files (samtools/samtools#1387). - -* The S3 plugin now has options to force the address style. It will recognise - the addressing_style and host_bucket entries in the respective aws - .credentials and s3cmd .s3cfg files. There is also a new HTS_S3_ADDRESS_STYLE - environment variable. Details are in the htslib-s3-plugin.7 man file (#1249). - -Build changes -------------- - -These are compiler, configuration and makefile based changes. - -* Added new Makefile targets for the applications that embed HTSlib and - want to run its test suite or clean its generated artefacts. (#1230, #1238) - -* The CRAM codecs are now obtained via the htscodecs submodule, hence - when cloning it is now best to use "git clone --recursive". In an - existing clone, you may use "git submodule update --init" to obtain - the htscodecs submodule checkout. - -* Updated CI test configuration to recurse HTSlib submodules. (#1359) - -* Added Cirrus-CI integration as a replacement for Travis, which was - phased out. (#1175; #1212) - -* Updated the Windows image used by Appveyor to 'Visual Studio 2019'. (#1172; - fixed #1166) - -* Fixed a buglet in configure.ac, exposed by the release 2.70 of autoconf. - Thanks to John Marshall. (#1198) - -* Fixed plugin linking on macOS, to prevent symbol conflict when linking - with a static HTSlib. Thanks to John Marshall. (#1184) - -* Fixed a clang++9 error in `cram_io.h`. Thanks to Pjotr Prins. (#1190) - -* Introduced $(ALL_CPPFLAGS) to allow for more flexibility in setting the - compiler flags. Thanks to John Marshall. (#1187) - -* Added 'fall through' comments to prevent warnings issued by Clang on - intentional fall through case statements, when building with - `-Wextra flag`. Thanks to John Marshall. (#1163) - -* Non-configure builds now define _XOPEN_SOURCE=600 to allow them to work - when the `gcc -std=c99` option is used. Thanks to John Marshall. (#1246) - -Bug fixes ---------- - -* Fixed VCF `#CHROM` header parsing to only separate columns at tab characters. - Thanks to Sam Morris for reporting the issue. - (#1237; fixed samtools/bcftools#1408) - -* Fixed a crash reported in `bcf_sr_sort_set`, which expects REF to be present. - (#1204; fixed samtools/bcftools#1361) - -* Fixed a bcf synced reader bug when filtering with a region list, and - the first record for a chromosome had the same position as the last - record for the previous chromosome. (#1254; fixed samtools/bcftools#1441) - -* Fixed a bug in the overlapping logic of mpileup, dealing with iterating over - CIGAR segments. Thanks to `@wulj2` for the analysis. (#1202; fixed #1196) - -* Fixed a tabix bug that prevented setting the correct number of lines to be - skipped in a region file. Thanks to Jim Robinson for reporting it. (#1189; - fixed #1186) - -* Made `bam_itr_next` an alias for `sam_itr_next`, to prevent it from crashing - when working with htsFile pointers. Thanks to Torbjörn Klatt for - reporting it. (#1180; fixed #1179) - -* Fixed once per outgoing multi-threaded block `bgzf_idx_flush` assertion, to - accommodate situations when a single record could span multiple blocks. - Thanks to `@lacek`. (#1168; fixed samtools/samtools#1328) - -* Fixed assumption of pthread_t being a non-structure, as permitted by POSIX. - Thanks also to John Marshall and Anders Kaplan. (#1167, #1153, #1153) - -* Fixed the minimum offset of a BAI index bin, to account for unmapped reads. - Thanks to John Marshall for spotting the issue. (#1158; fixed #1142) - -* Fixed the CRLF handling in `sam_parse_worker` method. Thanks to - Anders Kaplan. (#1149; fixed #1148) - -* Included unistd.h and errno.h directly in HTSlib files, as opposed to - including them indirectly, via third party code. Thanks to - Andrew Patterson (#1143) and John Marshall (#1145). - - -Noteworthy changes in release 1.11 (22nd September 2020) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Features and Updates --------------------- - -* Support added for remote reference files. fai_path() can take a remote - reference file and will return the corresponding index file. Remote indexes - can be handled by refs_load_fai(). UR tags in @SQ lines can now be set to - remote URIs. (#1017) - -* Added tabix --separate-regions option, which adds header comment lines - separating different regions' output records when multiple target regions - are supplied on the command line. (#1108) - -* Added tabix --cache option to set a BGZF block cache size. Most beneficial - when the -R option is used and the same blocks need to be re-read multiple - times. (#1053) - -* Improved error checking in tabix and added a --verbosity option so - it is possible to change the amount of logging when it runs. (#1040) - -* A note about the maximum chromosome length usable with TBI indexes has been - added to the tabix manual page. Thanks to John Marshall. (#1070) - -* New method vcf_open_mode() changes the opening mode of a variant file - based on its file extension. Similar to sam_open_mode(). (#1096) - -* The VCF parser has been made faster and easier to maintain. (#1057) - -* bcf_record_check() has been made faster, giving a 15% speed increase when - reading an uncompressed BCF file. (#1130) - -* The VCF parser now recognises the "" symbolic allele produced - by GATK. (#1045) - -* Support has been added for simultaneous reading of unindexed VCF/BCF files - when using the synced_bcf_reader interface. Input files must have the - chromosomes in the same order as each other and be consistent with the order - of sequences in the header. (#1089) - -* The VCF and BCF readers will now attempt to fix up invalid INFO/END tags - where the stored END value is less than POS, resulting in an apparently - negative record length. Such files have been generated by programs which - used END incorrectly, and by broken lift-over processes that failed to - update any END tags present. (#1021; fixed samtools/bcftools#1154) - -* The htsFile interface can now detect the crypt4gh encrypted format (see - https://samtools.github.io/hts-specs/crypt4gh.pdf). If HTSlib is - built with external plug-in support, and the hfile_crypt4gh plug-in is - present, the file will be passed to it for decryption. The plug-in - can be obtained from https://github.com/samtools/htslib-crypt4gh. (#1046) - -* hts_srand48() now seeds the same POSIX-standard sequences of pseudo-random - numbers regardless of platform, including on OpenBSD where plain srand48() - produces a different cryptographically-strong non-deterministic sequence. - Thanks to John Marshall. (#1002) - -* Iterators now work with 64 bit positions. (#1018) - -* Improved the speed of range queries when using BAI indexes by - making better use of the linear index data included in the file. - The best improvement is on low-coverage data. (#1031) - -* Alignments which consume no reference bases are now considered to have - length 1. This would make such alignments cover 1 reference position in - the same manner as alignments that are unmapped or have no CIGAR strings. - These alignments can now be returned by iterator-based queries. Thanks - to John Marshall. (#1063; fixed samtools/samtools#1240, see also - samtools/hts-specs#521). - -* A bam_set_seqi() function to modify a single base in the BAM structure - has been added. This is a companion function to bam_seqi(). (#1022) - -* Writing SAM format is around 30% faster. (#1035) - -* Added sam_format_aux1() which converts a BAM aux tag to a SAM format string. - (#1134) - -* bam_aux_update_str() no longer requires NUL-terminated strings. It - is also now possible to create tags containing part of a longer string. - (#1088) - -* It is now possible to use external plug-ins in language bindings that - dynamically load HTSlib. Note that a side-effect of this change is that - some plug-ins now link against libhts.so, which means that they have to be - able to find the shared library when they are started up. Thanks to - John Marshall. (#1072) - -* bgzf_close(), and therefore hts_close(), will now return non-zero when - closing a BGZF handle on which errors have been detected. (Part of #1117) - -* Added a special case to the kt_fisher_exact() test for when the table - probability is too small to be represented in a double. This fixes a - bug where it would, for some inputs, fail to correctly determine which - side of the distribution the table was on resulting in swapped p-values - being returned for the left- and right-tailed tests. The two-tailed - test value was not affected by this problem. (#1126) - -* Improved error diagnostics in the CRAM decoder (#1042), BGZF (#1049), - the VCF and BCF readers (#1059), and the SAM parser (#1073). - -* ks_resize() now allocates 1.5 times the requested size when it needs - to expand a kstring instead of rounding up to the next power of two. - This has been done mainly to make the inlined function smaller, but it - also reduces the overhead of storing data in kstrings at the expense of - possibly needing a few more reallocations. (#1129) - -CRAM improvements ------------------ - -* Delay CRAM crc32 checks until the data actually needs to be used. With - other changes this leads to a 20x speed up in indexing and other sub-query - based actions. (#988) - -* CRAM now handles the transition from mapped to unmapped data in a better - way, improving compression of the unmapped data. (#961) - -* CRAM can now use libdeflate. (#961) - -* Fixed bug in MD tag generation with "b" read feature codes, causing the - numbers in the tag to be too large. Note that HTSlib never uses this - feature code so it is unlikely that this bug would be seen on real data. - The problem was found when testing against hand-crafted CRAM files. (#1086) - -* Fixed a regression where the CRAM multi-region iterator became much less - efficient when using threads. It now works more like the single iterator - and does not preemptively decode the next container unless it will be used. - (#1061) - -* Set CRAM default quality in lossy quality modes. If lossy quality is enabled - and 'B', 'q' or 'Q' features are used, CRAM starts off with QUAL being all 255 - (as per BAM spec and "*" quality) and then modifies individual qualities as - dictated by the specific features. - - However that then produces ASCII quality " " (space, q=-1) for the unmodified - bases. Instead ASCII quality "?" (q=30) is used, as per HTSJDK. Quality 255 - is still used for sequences with no modifications at all. (#1094) - - -Build changes -------------- - -These are compiler, configuration and makefile based changes. - -* `make all` now also builds htslib_static.mk and htslib-uninstalled.pc. - Thanks to John Marshall. (#1011) - -* Various cppcheck-1.90 warnings have been fixed. (#995, #1011) - -* HTSlib now prefers its own headers when being compiled, fixing build - failures on machines that already had a system-installed HTSlib. Thanks to - John Marshall. (#1078; fixed #347) - -* Define HTSLIB_EXPORT without using a helper macro to reduce the length of - compiler diagnostics that mention exported functions. Thanks to - John Marshall. (#1029) - -* Fix dirty default build by including latest pkg.m4 instead of using - aclocal.m4. Thanks to Damien Zammit. (#1091) - -* Struct tags have been added to htslib/*.h public typedefs. This makes it - possible to forward declare htsFile without including htslib/hts.h. Thanks - to Lucas Czech and John Marshall. (#1115; fixed #1106) - -* Fixed compiler warnings emitted by the latest gcc and clang releases - when compiling HTSlib, along with some -Wextra warnings in the public - include files. Thanks to John Marshall. (#1066, #1063, #1083) - -Bug fixes ---------- - -* Fixed hfile_libcurl breakage when using libcurl 7.69.1 or later. Thanks to - John Marshall for tracking down the exact libcurl change that caused the - incompatibility. (#1105; fixed samtools/samtools#1254 and - samtools/samtools#1284) - -* Fixed overflows kroundup32() and kroundup_size_t() which caused them to - return zero when rounding up values where the most significant bit was - set. When this happens they now return the highest value that can - be stored (#1044). All of the kroundup macro definitions have also been - gathered together into a unified implementation (#1051). - -* Fixed missing return parameter value in idx_test_and_fetch(). Thanks to - Lilian Janin. (#1014) - -* Fixed crashes due to inconsistent selection between BGZF and plain (hFILE) - interfaces when reading files. [fuzz] (#1019) - -* Added and/or fixed byte swapping code for big-endian platforms. Thanks - to Jun Aruga, John Marshall, Michael R Crusoe and Gianfranco Costamagna - for their help. (#1023; fixed #119 and #355) - -* Fixed a problem with multi-threaded on-the-fly indexes which would - occasionally write virtual offsets pointing at the end of a BGZF block. - Attempting to read from such an offset caused EOF to be incorrectly - reported. These offsets are now handled correctly, and the indexer - has been updated to avoid generating them. (#1028; fixed - samtools/samtools#1197) - -* In sam_hdr_create(), free newly allocated SN strings when encountering an - error. [fuzz] (#1034) - -* Prevent double free in case of idx_test_and_fetch() failure. Thanks to - @fanwayne for the bug report. (#1047; fixed #1033) - -* In the header, link a new PG line only to valid chains. Prevents an - explosive growth of PG lines on headers where PG lines are already present - but not linked together correctly. (#1062; fixed samtools/samtools#1235) - -* Also in the header, when calling sam_hdr_update_line(), update target arrays - only when the name or length is changed. (#1007) - -* Fixed buffer overflows in CRAM MD5 calculation triggered by - files with invalid compression headers, or files with embedded - references that were one byte too short. [fuzz] (#1024, #1068) - -* Fix mpileup regression between 1.9 and 1.10 where overlap detection - was incorrectly skipped on reads where RNEXT, PNEXT and TLEN were - set to the "unavailable" values ("*", 0, 0 in SAM). (#1097) - -* kputs() now checks for null pointer in source string. [fuzz] (#1087) - -* Fix potential bcf_update_alleles() crash on 0 alleles. Thanks to - John Marshall. (#994) - -* Added bcf_unpack() calls to some bcf_update functions to fix a bug - where updates made after a call to bcf_dup() could be lost. (#1032; - fixed #1030) - -* Error message typo "Number=R" instead of "Number=G" fixed in - bcf_remove_allele_set(). Thanks to Ilya Vorontsov. (#1100) - -* Fixed crashes that could occur in BCF files that use IDX= header annotations - to create a sparse set of CHROM, FILTER or FORMAT indexes, and - include records that use one of the missing index values. [fuzz] (#1092) - -* Fixed potential integer overflows in the VCF parser and ensured that - the total length of FORMAT fields cannot go over 2Gbytes. [fuzz] (#1044, - #1104; latter is CVE-2020-36403 affecting all HTSlib versions up to 1.10.2) - -* Download index files atomically in idx_test_and_fetch(). This prevents - corruption when running parallel jobs on S3 files. Thanks to John Marshall. - (#1112; samtools/samtools#1242). - -* The pileup constructor callback is now given the copy of the bam1_t struct - made by pileup instead of the original one passed to bam_plp_push(). This - makes it the same as the one passed to the destructor and ensures that - cached data, for example the location of an aux tag, will remain valid. - (#1127) - -* Fixed possible error in code_sort() on negative CRAM Huffman code - length. (#1008) - -* Fixed possible undefined shift in cram_byte_array_stop_decode_init(). (#1009) - -* Fixed a bug where range queries to the end of a given reference - would return incorrect results on CRAM files. (#1016; - fixed samtools/samtools#1173) - -* Fixed an integer overflow in cram_read_slice(). [fuzz] (#1026) - -* Fixed a memory leak on failure in cram_decode_slice(). [fuzz] (#1054) - -* Fixed a regression which caused cram_transcode_rg() to fail, resulting - in a crash in "samtools cat" on CRAM files. (#1093; - fixed samtools/samtools#1276) - -* Fixed an undersized string reallocation in the threaded SAM reader which - caused it to crash when reading SAM files with very long lines. Numerous - memory allocation checks have also been added. (#1117) - - -Noteworthy changes in release 1.10.2 (19th December 2019) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is a release fix that corrects minor inconsistencies discovered in -previous deliverables. - - -Noteworthy changes in release 1.10.1 (17th December 2019) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The support for 64-bit coordinates in VCF brought problems for files -not conforming to VCF/BCF specification. While previous versions would -make out-of-range values silently overflow creating nonsense values -but parseable file, the version 1.10 would silently create an invalid BCF. - - -Noteworthy changes in release 1.10 (6th December 2019) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Brief summary -------------- - -There are many changes in this release, so the executive summary is: - -* Addition of support for references longer than 2Gb (NB: SAM and VCF - formats only, not their binary counterparts). This may need changes - in code using HTSlib. See README.large_positions.md for more information. - -* Added a SAM header API. - -* Major speed up to SAM reading and writing. This also now supports - multi-threading. - -* We can now auto-index on-the-fly while writing a file. This also - includes to bgzipped SAM.gz. - -* Overhaul of the S3 interface, which now supports version 4 - signatures. This also makes writing to S3 work. - -These also required some ABI changes. See below for full details. - - -Features / updates ------------------- - -* A new SAM/BAM/CRAM header API has been added to HTSlib, allowing header - data to be updated without having to parse or rewrite large parts of the - header text. See htslib/sam.h for function definitions and - documentation. (#812) - - The header typedef and several pre-existing functions have been renamed - to have a sam_hdr_ prefix: sam_hdr_t, sam_hdr_init(), sam_hdr_destroy(), - and sam_hdr_dup(). (The existing bam_hdr_-prefixed names are still - provided for compatibility with existing code.) (#887, thanks to - John Marshall) - -* Changes to hfile_s3, which provides support for the AWS S3 API. (#839) - - - hfile_s3 now uses version 4 signatures by default. Attempting to write to - an S3 bucket will also now work correctly. It is possible to force - version 2 signatures by creating environment variable HTS_S3_V2 (the exact - value does not matter, it just has to exist). Note that writing depends - on features that need version 4 signatures, so forcing version 2 will - disable writes. - - - hfile_s3 will automatically retry requests where the region endpoint - was not specified correctly, either by following the 301 redirect (when - using path-style requests) or reading the 400 response (when using - virtual-hosted style requests and version 4 signatures). The first - region to try can be set by using the AWS_DEFAULT_REGION environment - variable, by setting "region" in ".aws/credentials" or by setting - "bucket_location" in ".s3cfg". - - - hfile_s3 now percent-escapes the path component of s3:// URLs. For - backwards-compatibility it will ignore any paths that have already - been escaped (detected by looking for '%' followed by two hexadecimal - digits.) - - - New environment variables HTS_S3_V2, HTS_S3_HOST, HTS_S3_S3CFG - and HTS_S3_PART_SIZE to force version-2 signatures, control the - S3 server hostname, the configuration file and upload chunk - sizes respectively. - -* Numerous SAM format improvements. - - - Bgzipped SAM files can now be indexed and queried. The library now - recognises sam.gz as a format name to ease this usage. (#718, #916) - - - The SAM reader and writer now supports multi-threading via the - thread-pool. (#916) - - Note that the multi-threaded SAM reader does not currently support seek - operations. Trying to do this (for example with an iterator range request) - will result in the SAM readers dropping back to single-threaded mode. - - - Major speed up of SAM decoding and encoding, by around 2x. (#722) - - - SAM format can now handle 64-bit coordinates and references. This - has implications for the ABI too (see below). Note BAM and CRAM - currently cannot handle references longer than 2Gb, however given - the speed and threading improvements SAM.gz is a viable workaround. (#709) - -* We can now automatically build indices on-the-fly while writing - SAM, BAM, CRAM, VCF and BCF files. (Note for SAM and VCF this only - works when bgzipped.) (#718) - -* HTSlib now supports the @SQ-AN header field, which lists alternative names - for reference sequences. This means given "@SQ SN:1 AN:chr1", tools like - samtools can accept requests for "1" or "chr1" equivalently. (#931) - -* Zero-length files are no longer considered to be valid SAM files - (with no header and no alignments). This has been changed so that pipelines - such as `somecmd | samtools ...` with `somecmd` aborting before outputting - anything will now propagate the error to the second command. (#721, thanks - to John Marshall; #261 reported by Adrian Tan) - -* Added support for use of non-standard index names by pasting the - data filename and index filename with ##idx##. For example - "/path1/my_data.bam##idx##/path2/my_index.csi" will open bam file - "/path1/my_data.bam" and index file "/path2/my_index.csi". (#884) - - This affects hts_idx_load() and hts_open() functions. - -* Improved the region parsing code to handle colons in reference - names. Strings can be disambiguated by the use of braces, so for - example when reference sequences called "chr1" and "chr1:100-200" - are both present, the regions "{chr1}:100-200" and "{chr1:100-200}" - unambiguously indicate which reference is being used. (#708) - - A new function hts_parse_region() has been added along with - specialisations for sam_parse_region() and fai_parse_region(). - -* CRAM encoding now has additional checks for MD/NM validity. If - they are incorrect, it stores the (incorrect copy) verbatim so - round-trips "work". (#792) - -* Sped up decoding of CRAM by around 10% when the MD tag is being - generated. (#874) - -* CRAM REF_PATH now supports %Ns (where N is a single digit) - expansion in http URLs, similar to how it already supported this - for directories. (#791) - -* BGZF now permits indexing and seeking using virtual offsets in - completely uncompressed streams. (#904, thanks to Adam Novak) - -* bgzip now asks for extra confirmation before decompressing files - that don't have a known compression extension (e.g. .gz). This avoids - `bgzip -d foo.bam.bai` producing a foo.bam file that is very much not - a BAM-formatted file. (#927, thanks to John Marshall) - -* The htsfile utility can now copy files (including to/from URLs using - HTSlib's remote access facilities) with the --copy option, in - addition to its existing uses of identifying file formats and - displaying sequence or variant data. (#756, thanks to John Marshall) - -* Added tabix --min-shift option. (#752, thanks to Garrett Stevens) - -* Tabix now has an -D option to disable storing a local copy of a - remote index. (#870) - -* Improved support for MSYS Windows compiler environment. (#966) - -* External htslib plugins are now supported on Windows. (#966) - - -API additions and improvements ------------------------------- - -* New API functions bam_set_mempolicy() and bam_get_mempolicy() have - been added. These allow more control over the ownership of bam1_t - alignment record data; see documentation in htslib/sam.h for more - information. (#922) - -* Added more HTS_RESULT_USED checks, this time for VCF I/O. (#805) - -* khash can now hash kstrings. This makes it easier to hash - non-NUL-terminated strings. (#713) - -* New haddextension() filename extension API function. (#788, thanks to - John Marshall) - -* New hts_resize() macro, designed to replace uses of hts_expand() - and hts_expand0(). (#805) - -* Added way of cleaning up unused jobs in the thread pool via the new - hts_tpool_dispatch3() function. (#830) - -* New API functions hts_reglist_create() and sam_itr_regarray() are added - to create hts_reglist_t region lists from `chr:-` type region - specifiers. (#836) - -* Ksort has been improved to facilitate library use. See KSORT_INIT2 - (adds scope / namespace capabilities) and KSORT_INIT_STATIC interfaces. - (#851, thanks to John Marshall) - -* New kstring functions (#879): - KS_INITIALIZE - Initializer for structure assignment - ks_initialize() - Initializer for pointed-to kstrings - ks_expand() - Increase kstring capacity by a given amount - ks_clear() - Set kstring length to zero - ks_free() - Free the underlying buffer - ks_c_str() - Returns the kstring buffer as a const char *, - or an empty string if the length is zero. - -* New API functions hts_idx_load3(), sam_index_load3(), tbx_index_load3() - and bcf_index_load3() have been added. These allow control of whether - remote indexes should be cached locally, and allow the error message - printed when the index does not exist to be suppressed. (#870) - -* Improved hts_detect_format() so it no longer assumes all text is - SAM unless positively identified otherwise. It also makes a stab - at detecting bzip2 format and identifying BED, FASTA and FASTQ - files. (#721, thanks to John Marshall; #200, #719 both reported by - Torsten Seemann) - -* File format errors now set errno to EFTYPE (BSD, MacOS) when - available instead of ENOEXEC. (#721) - -* New API function bam_set_qname (#942) - -* In addition to the existing hts_version() function, which reflects the - HTSlib version being used at runtime, now also provides - HTS_VERSION, a preprocessor macro reflecting the HTSlib version that - a program is being compiled against. (#951, thanks to John Marshall; #794) - - -ABI changes ------------ - -This release contains a number of things which change the Application -Binary Interface (ABI). This means code compiled against an earlier -library will require recompiling. The shared library soversion has -been bumped. - -* On systems that support it, the default symbol visibility has been - changed to hidden and the only exported symbols are ones that form part - of the officially supported ABI. This is to make clear exactly which - symbols are considered parts of the library interface. It also - helps packagers who want to check compatibility between HTSlib versions. - (#946; see for example issues #311, #616, and #695) - -* HTSlib now supports 64 bit reference positions. This means several - structures, function parameters, and return values have been made bigger - to allow larger values to be stored. While most code that uses - HTSlib interfaces should still build after this change, some alterations - may be needed - notably to printf() formats where the values of structure - members are being printed. (#709) - - Due to file format limitations, large positions are only supported - when reading and writing SAM and VCF files. - - See README.large_positions.md for more information. - -* An extra field has been added to the kbitset_t struct so bitsets can - be made smaller (and later enlarged) without involving memory allocation. - (#710, thanks to John Marshall) - -* A new field has been added to the bam_pileup1_t structure to keep track - of which CIGAR operator is being processed. This is used by a new - bam_plp_insertion() function which can be used to return the sequence of - any inserted bases at a given pileup location. If the alignment includes - CIGAR P operators, the returned sequence will include pads. (#699) - -* The hts_itr_t and hts_itr_multi_t structures have been merged and can be - used interchangeably. Extra fields have been added to hts_itr_t to support - this. hts_itr_multi_t is now a typedef for hts_itr_t; sam_itr_multi_next() - is now an alias for sam_itr_next() and hts_itr_multi_destroy() is an alias - for hts_itr_destroy(). (#836) - -* An improved regidx interface has been added. To allow this, struct - reg_t has been removed, regitr_t has been modified and various new - API functions have been added to htslib/regidx.h. While parts of - the old regidx API have been retained for backwards compatibility, - it is recommended that all code using regidx should be changed to use - the new interface. (#761) - -* Elements in the hts_reglist_t structure have been reordered slightly - so that they pack together better. (#761) - -* bgzf_utell() and bgzf_useek() now use type off_t instead of long for - the offset. This allows them to work correctly on files longer than - 2G bytes on Windows and 32-bit Linux. (#868) - -* A number of functions that used to return void now return int so that - they can report problems like memory allocation failures. Callers - should take care to check the return values from these functions. (#834) - - The affected functions are: - ksort.h: ks_introsort(), ks_mergesort() - sam.h: bam_mplp_init_overlaps() - synced_bcf_reader.h: bcf_sr_regions_flush() - vcf.h: bcf_format_gt(), bcf_fmt_array(), - bcf_enc_int1(), bcf_enc_size(), - bcf_enc_vchar(), bcf_enc_vfloat(), bcf_enc_vint(), - bcf_hdr_set_version(), bcf_hrec_format() - vcfutils.h: bcf_remove_alleles() - -* bcf_set_variant_type() now outputs VCF_OVERLAP for spanning - deletions (ALT=*). (#726) - -* A new field (hrecs) has been added to the bam_hdr_t structure for - use by the new header API. The old sdict field is now not used and - marked as deprecated. The l_text field has been changed from uint32_t - to size_t, to allow for very large headers in SAM files. The text - and l_text fields have been left for backwards compatibility, but - should not be accessed directly in code that uses the new header API. - To access the header text, the new functions sam_hdr_length() and - sam_hdr_str() should be used instead. (#812) - -* The old cigar_tab field is now marked as deprecated; use the new - bam_cigar_table[] instead. (#891, thanks to John Marshall) - -* The bam1_core_t structure's l_qname and l_extranul fields have been - rearranged and enlarged; l_qname still includes the extra NULs. - (Almost all code should use bam_get_qname(), bam_get_cigar(), etc, - and has no need to use these fields directly.) HTSlib now supports - the SAM specification's full 254 QNAME length again. (#900, thanks - to John Marshall; #520) - -* bcf_index_load() no longer tries the '.tbi' suffix when looking for - BCF index files (.tbi indexes are for text files, not binary BCF). (#870) - -* htsFile has a new 'state' member to support SAM multi-threading. (#916) - -* A new field has been added to the bam1_t structure, and others - have been rearranged to remove structure holes. (#709; #922) - - -Bug fixes ---------- - -* Several BGZF format fixes: - - - Support for multi-member gzip files. (#744, thanks to Adam Novak; #742) - - - Fixed error handling code for native gzip formatted files. (64c4927) - - - CRCs checked when threading too (previously only when non-threaded). (#745) - - - Made bgzf_useek function work with threads. (#818) - - - Fixed rare threading deadlocks. (#831) - - - Reading of very short files (<28 bytes) that do not contain an EOF block. - (#910) - -* Fixed some thread pool deadlocks caused by race conditions. (#746, #906) - -* Many additional memory allocation checks in VCF, BCF, SAM and CRAM - code. This also changes the return type of some functions. See ABI - changes above. (#920 amongst others) - -* Replace some sam parsing abort() calls with proper errors. - (#721, thanks to John Marshall; #576) - -* Fixed to permit SAM read names of length 252 to 254 (the maximum - specified by the SAM specification). (#900, thanks to John Marshall) - -* Fixed mpileup overlap detection heuristic to work with BAMs having - long CIGARs (more than 65536 operations). (#802) - -* Security fix: CIGAR strings starting with the "N" operation can no - longer cause underflow on the bam CIGAR structure. Similarly CIGAR - strings that are entirely "D" ops could leak the contents of - uninitialised variables. (#699) - -* Fixed bug where alignments starting 0M could cause an invalid - memory access in sam_prob_realn(). (#699) - -* Fixed out of bounds memory access in mpileup when given a reference - with binary characters (top-bit set). (#808, thanks to John Marshall) - -* Fixed crash in mpileup overlap_push() function. (#882; #852 reported - by Pierre Lindenbaum) - -* Fixed various potential CRAM memory leaks when recovering from - error cases. - -* Fixed CRAM index queries for unmapped reads (#911; samtools/samtools#958 - reported by @acorvelo) - -* Fixed the combination of CRAM embedded references and multiple - slices per container. This was incorrectly setting the header - MD5sum. (No impact on default CRAM behaviour.) (b2552fd) - -* Removed unwanted explicit data flushing in CRAM writing, which on - some OSes caused major slowdowns. (#883) - -* Fixed inefficiencies in CRAM encoding when many small references - occur within the middle of large chromosomes. Previously it - switched into multi-ref mode, but not back out of it which caused - the read POS field to be stored poorly. (#896) - -* Fixed CRAM handling of references when the order of sequences in a - supplied fasta file differs to the order of the @SQ headers. (#935) - -* Fixed BAM and CRAM multi-threaded decoding when used in conjunction - with the multi-region iterator. (#830; #577, #822, #926 all reported by - Brent Pedersen) - -* Removed some unaligned memory accesses in CRAM encoder and - undefined behaviour in BCF reading (#867, thanks to David Seifert) - -* Repeated calling of bcf_empty() no longer crashes. (#741) - -* Fixed bug where some 8 or 16-bit negative integers were stored using values - reserved by the BCF specification. These numbers are now promoted to the - next size up, so -121 to -128 are stored using at least 16 bits, and -32761 - to -32768 are stored using 32 bits. - - Note that while BCF files affected by this bug are technically incorrect, - it is still possible to read them. When converting to VCF format, - HTSlib (and therefore bcftools) will interpret the values as intended - and write out the correct negative numbers. (#766, thanks to John Marshall; - samtools/bcftools#874) - -* Allow repeated invocations of bcf_update_info() and bcf_update_format_*() - functions. (#856, thanks to John Marshall; #813 reported by Steffen Möller) - -* Memory leak removed in knetfile's kftp_parse_url() function. (#759, thanks - to David Alexander) - -* Fixed various crashes found by libfuzzer (invalid data leading to - errors), mostly but not exclusively in CRAM, VCF and BCF decoding. (#805) - -* Improved robustness of BAI and CSI index creation and loading. (#870; #967) - -* Prevent (invalid) creation of TBI indices for BCF files. - (#837; samtools/bcftools#707) - -* Better parsing of handling of remote URLs with ?param=val - components and their interaction with remote index URLs. (#790; #784 - reported by Mark Ebbert) - -* hts_idx_load() now checks locally for all possible index names before - attempting to download a remote index. It also checks that the remote - file it downloads is actually an index before trying to save and use - it. (#870; samtools/samtools#1045 reported by Albert Vilella) - -* hts_open_format() now honours the compression field, no longer also - requiring an explicit "z" in the mode string. Also fixed a 1 byte - buffer overrun. (#880) - -* Removed duplicate hts_tpool_process_flush prototype. (#816, reported by - James S Blachly) - -* Deleted defunct cram_tell declaration. (66c41e2; #915 reported by - Martin Morgan) - -* Fixed overly aggressive filename suffix checking in bgzip. (#927, thanks to - John Marshall; #129, reported by @hguturu) - -* Tabix and bgzip --help output now goes to standard output. (#754, thanks to - John Marshall) - -* Fixed bgzip index creation when using multiple threads. (#817) - -* Made bgzip -b option honour -I (index filename). (#817) - -* Bgzip -d no longer attempts to unlink(NULL) when decompressing stdin. (#718) - - -Miscellaneous other changes ---------------------------- - -* Integration with Google OSS fuzzing for automatic detection of - more bugs. (Thanks to Google for their assistance and the bugs it - has found.) (#796, thanks to Markus Kusano) - -* aclocal.m4 now has the pkg-config macros. (6ec3b94d; #733 reported by - Thomas Hickman) - -* Improved C++ compatibility of some header files. (#772; #771 reported - by @cwrussell) - -* Improved strict C99 compatibility. (#860, thanks to John Marshall) - -* Travis and AppVeyor improvements to aid testing. (#747; #773 thanks to - Lennard Berger; #781; #809; #804; #860; #909) - -* Various minor compiler warnings fixed. (#708; #765; #846, #860, thanks to - John Marshall; #865; #966; #973) - -* Various new and improved error messages. - -* Documentation updates (mostly in the header files). - -* Even more testing with "make check". - -* Corrected many copyright dates. (#979) - -* The default non-configure Makefile now uses libcurl instead of - knet, so it can support https. (#895) - - - - - - -Noteworthy changes in release 1.9 (18th July 2018) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -* If `./configure` fails, `make` will stop working until either configure - is re-run successfully, or `make distclean` is used. This makes - configuration failures more obvious. (#711, thanks to John Marshall) - -* The default SAM version has been changed to 1.6. This is in line with the - latest version specification and indicates that HTSlib supports the - CG tag used to store long CIGAR data in BAM format. - -* bgzip integrity check option '--test' (#682, thanks to @sd4B75bJ, @jrayner) - -* Faidx can now index fastq files as well as fasta. The fastq index adds - an extra column to the `.fai` index which gives the offset to the quality - values. New interfaces have been added to `htslib/faidx.h` to read the - fastq index and retrieve the quality values. It is possible to open - a fastq index as if fasta (only sequences will be returned), but not - the other way round. (#701) - -* New API interfaces to add or update integer, float and array aux tags. (#694) - -* Add `level=` option to `hts_set_opt()` to allow the compression - level to be set. Setting `level=0` enables uncompressed output. (#715) - -* Improved bgzip error reporting. - -* Better error reporting when CRAM reference files can't be opened. (#706) - -* Fixes to make tests work properly on Windows/MinGW - mainly to handle - line ending differences. (#716) - -* Efficiency improvements: - - - Small speed-up for CRAM indexing. - - - Reduce the number of unnecessary wake-ups in the thread pool. (#703) - - - Avoid some memory copies when writing data, notably for uncompressed - BGZF output. (#703) - -* Bug fixes: - - - Fix multi-region iterator bugs on CRAM files. (#684) - - - Fixed multi-region iterator bug that caused some reads to be skipped - incorrectly when reading BAM files. (#687) - - - Fixed synced_bcf_reader() bug when reading contigs multiple times. (#691, - reported by @freeseek) - - - Fixed bug where bcf_hdr_set_samples() did not update the sample dictionary - when removing samples. (#692, reported by @freeseek) - - - Fixed bug where the VCF record ref length was calculated incorrectly - if an INFO END tag was present. (71b00a) - - - Fixed warnings found when compiling with gcc 8.1.0. (#700) - - - sam_hdr_read() and sam_hdr_write() will now return an error code - if passed a NULL file pointer, instead of crashing. - - - Fixed possible negative array look-up in sam_parse1() that somehow escaped - previous fuzz testing. (CVE-2018-13845, #731, reported by @fCorleone) - - - Fixed bug where cram range queries could incorrectly report an error - when using multiple threads. (#734, reported by Brent Pedersen) - - - Fixed very rare rANS normalisation bug that could cause an assertion - failure when writing CRAM files. (#739, reported by @carsonhh) - -Noteworthy changes in release 1.8 (3rd April 2018) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -* The URL to get sequences from the EBI reference server has been changed - to https://. This is because the EBI no longer serve sequences via - plain HTTP - requests to the http:// endpoint just get redirected. - HTSlib needs to be linked against libcurl to download https:// URLs, - so CRAM users who want to get references from the EBI will need to - run configure and ensure libcurl support is enabled using the - --enable-libcurl option. - -* Added libdeflate as a build option for alternative faster compression and - decompression. Results vary by CPU but compression should be twice as fast - and decompression faster. - -* It is now possible to set the compression level in bgzip. (#675; thanks - to Nathan Weeks). - -* bgzip now gets its own manual page. - -* CRAM encoding now stored MD and NM tags verbatim where the reference - contains 'N' characters, to work around ambiguities in the SAM - specification (samtools #717/762). - Also added "store_md" and "store_nm" cram-options for forcing these - tags to be stored at all locations. This is best when combined with - a subsequent decode_md=0 option while reading CRAM. - -* Multiple CRAM bug fixes, including a fix to free and the subsequent reuse of - references with `-T ref.fa`. (#654; reported by Chris Saunders) - -* CRAM multi-threading bugs fixed: don't try to call flush on reading; - processing of multiple range queries; problems with multi-slice containers. - -* Fixed crashes caused when decoding some cramtools produced CRAM files. - -* Fixed a couple of minor rANS issues with handling invalid data. - -* Fixed bug where probaln_glocal() tried to allocate far more memory than - needed when the query sequence was much longer than the reference. This - caused crashes in samtools and bcftools mpileup when used on data with very - long reads. (#572, problem reported by Felix Bemm via minimap2). - -* sam_prop_realn() now returns -1 (the same value as for unmapped reads) - on reads that do not include at least one 'M', 'X' or '=' CIGAR operator, - and no longer adds BQ or ZQ tags. BAQ adjustments are only made to bases - covered by these operators so there is no point in trying to align - reads that do not have them. (#572) - -Noteworthy changes in release 1.7 (26th January 2018) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -* BAM: HTSlib now supports BAMs which include CIGARs with more than - 65535 operations as per HTS-Specs 18th November (dab57f4 and 2f915a8). - -* BCF/VCF: - - Removed the need for long double in pileup calculations. - - Sped up the synced reader in some situations. - - Bug fixing: removed memory leak in bcf_copy. - -* CRAM: - - Added support for HTS_IDX_START in cram iterators. - - Easier to build when lzma header files are absent. - - Bug fixing: a region query with REQUIRED_FIELDS option to - disable sequence retrieval now gives correct results. - - Bug fixing: stop queries to regions starting after the last - read on a chromosome from incorrectly reporting errors - (#651, #653; reported by Imran Haque and @egafni via pysam). - -* Multi-region iterator: The new structure takes a list of regions and - iterates over all, deduplicating reads in the process, and producing a - full list of file offset intervals. This is usually much faster than - repeatedly using the old single-region iterator on a series of regions. - -* Curl improvements: - - Add Bearer token support via HTS_AUTH_LOCATION env (#600). - - Use CURL_CA_BUNDLE environment variable to override the CA (#622; - thanks to Garret Kelly & David Alexander). - - Speed up (removal of excessive waiting) for both http(s) and ftp. - - Avoid repeatedly reconnecting by removal of unnecessary seeks. - - Bug fixing: double free when libcurl_open fails. - -* BGZF block caching, if enabled, now performs far better (#629; reported - by Ram Yalamanchili). - -* Added an hFILE layer for in-memory I/O buffers (#590; thanks to Thomas - Hickman). - -* Tidied up the drand48 support (intended for systems that do not - provide this function). - -Noteworthy changes in release 1.6 (28th September 2017) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -* Fixed bug where iterators on CRAM files did not propagate error return - values to the caller correctly. Thanks go to Chris Saunders. - -* Overhauled Windows builds. Building with msys2/mingw64 now works - correctly and passes all tests. - -* More improvements to logging output (thanks again to Anders Kaplan). - -* Return codes from sam_read1() when reading cram have been made - consistent with those returned when reading sam/bam. Thanks to - Chris Saunders (#575). - -* BGZF CRC32 checksums are now always verified. - -* It's now possible to set nthreads = 1 for cram files. - -* hfile_libcurl has been modified to make it thread-safe. It's also - better at handling web servers that do not honour byte range requests - when attempting to seek - it now sets errno to ESPIPE and keeps - the existing connection open so callers can revert to streaming mode - it they want to. - -* hfile_s3 now recalculates access tokens if they have become stale. This - fixes a reported problem where authentication failed after a file - had been in use for more than 15 minutes. - -* Fixed bug where remote index fetches would fail to notice errors when - writing files. - -* bam_read1() now checks that the query sequence length derived from the - CIGAR alignment matches the sequence length in the BAM record. - -Noteworthy changes in release 1.5 (21st June 2017) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -* Added a new logging API: hts_log(), along with hts_log_error(), - hts_log_warn() etc. convenience macros. Thanks go to Anders Kaplan - for the implementation. (#499, #543, #551) - -* Added a new file I/O option "block_size" (HTS_OPT_BLOCK_SIZE) to - alter the hFILE buffer size. - -* Fixed various bugs, including compilation issues samtools/bcftools#610, - samtools/bcftools#611 and robustness to corrupted data #537, #538, - #541, #546, #548, #549, #554. - - -Noteworthy changes in release 1.4.1 (8th May 2017) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is primarily a security bug fix update. - -* Fixed SECURITY (CVE-2017-1000206) issue with buffer overruns with - malicious data. (#514) - -* S3 support for non Amazon AWS endpoints. (#506) - -* Support for variant breakpoints in bcftools. (#516) - -* Improved handling of BCF NaNs. (#485) - -* Compilation / portability improvements. (#255, #423, #498, #488) - -* Miscellaneous bug fixes (#482, #521, #522, #523, #524). - -* Sanitise headers (#509) - - -Release 1.4 (13 March 2017) - -* Incompatible changes: several functions and data types have been changed - in this release, and the shared library soversion has been bumped to 2. - - - bam_pileup1_t has an additional field (which holds user data) - - bam1_core_t has been modified to allow for >64K CIGAR operations - and (along with bam1_t) so that CIGAR entries are aligned in memory - - hopen() has vararg arguments for setting URL scheme-dependent options - - the various tbx_conf_* presets are now const - - auxiliary fields in bam1_t are now always stored in little-endian byte - order (previously this depended on if you read a bam, sam or cram file) - - index metadata (accessible via hts_idx_get_meta()) is now always - stored in little-endian byte order (previously this depended on if - the index was in tbi or csi format) - - bam_aux2i() now returns an int64_t value - - fai_load() will no longer save local copies of remote fasta indexes - - hts_idx_get_meta() now takes a uint32_t * for l_meta (was int32_t *) - -* HTSlib now links against libbz2 and liblzma by default. To remove these - dependencies, run configure with options --disable-bz2 and --disable-lzma, - but note that this may make some CRAM files produced elsewhere unreadable. - -* Added a thread pool interface and replaced the bgzf multi-threading - code to use this pool. BAM and CRAM decoding is now multi-threaded - too, using the pool to automatically balance the number of threads - between decode, encode and any data processing jobs. - -* New errmod_cal(), probaln_glocal(), sam_cap_mapq(), and sam_prob_realn() - functions, previously internal to SAMtools, have been added to HTSlib. - -* Files can now be accessed via Google Cloud Storage using gs: URLs, when - HTSlib is configured to use libcurl for network file access rather than - the included basic knetfile networking. - -* S3 file access now also supports the "host_base" setting in the - $HOME/.s3cfg configuration file. - -* Data URLs ("data:,text") now follow the standard RFC 2397 format and may - be base64-encoded (when written as "data:;base64,text") or may include - percent-encoded characters. HTSlib's previous over-simplified "data:text" - format is no longer supported -- you will need to add an initial comma. - -* When plugins are enabled, S3 support is now provided by a separate - hfile_s3 plugin rather than by hfile_libcurl itself as previously. - When --enable-libcurl is used, by default both GCS and S3 support - and plugins will also be built; they can be individually disabled - via --disable-gcs and --disable-s3. - -* The iRODS file access plugin has been moved to a separate repository. - Configure no longer has a --with-irods option; instead build the plugin - found at . - -* APIs to portably read and write (possibly unaligned) data in little-endian - byte order have been added. - -* New functions bam_auxB_len(), bam_auxB2i() and bam_auxB2f() have been - added to make accessing array-type auxiliary data easier. bam_aux2i() - can now return the full range of values that can be stored in an integer - tag (including unsigned 32 bit tags). bam_aux2f() will return the value - of integer tags (as a double) as well as floating-point ones. All of - the bam_aux2 and bam_auxB2 functions will set errno if the requested - conversion is not valid. - -* New functions fai_load3() and fai_build3() allow fasta indexes to be - stored in a different location to the indexed fasta file. - -* New functions bgzf_index_dump_hfile() and bgzf_index_load_hfile() - allow bgzf index files (.gzi) to be written to / read from an existing - hFILE handle. - -* hts_idx_push() will report when trying to add a range to an index that - is beyond the limits that the given index can handle. This means trying - to index chromosomes longer than 2^29 bases with a .bai or .tbi index - will report an error instead of apparently working but creating an invalid - index entry. - -* VCF formatting is now approximately 4x faster. (Whether this is - noticeable depends on what was creating the VCF.) - -* CRAM lossy_names mode now works with TLEN of 0 or TLEN within +/- 1 - of the computed value. Note in these situations TLEN will be - generated / fixed during CRAM decode. - -* CRAM now supports bzip2 and lzma codecs. Within htslib these are - disabled by default, but can be enabled by specifying "use_bzip2" or - "use_lzma" in an hts_opt_add() call or via the mode string of the - hts_open_format() function. - -Noteworthy changes in release 1.3.2 (13 September 2016) - -* Corrected bin calculation when converting directly from CRAM to BAM. - Previously a small fraction of converted reads would fail Picard's - validation with "bin field of BAM record does not equal value computed" - (SAMtools issue #574). - -* Plugins can now signal to HTSlib which of RTLD_LOCAL and RTLD_GLOBAL - they wish to be opened with -- previously they were always RTLD_LOCAL. - - -Noteworthy changes in release 1.3.1 (22 April 2016) - -* Improved error checking and reporting, especially of I/O errors when - writing output files (#17, #315, PR #271, PR #317). - -* Build fixes for 32-bit systems; be sure to run configure to enable - large file support and access to 2GiB+ files. - -* Numerous VCF parsing fixes (#321, #322, #323, #324, #325; PR #370). - Particular thanks to Kostya Kortchinsky of the Google Security Team - for testing and numerous input parsing bug reports. - -* HTSlib now prints an informational message when initially creating a - CRAM reference cache in the default location under your $HOME directory. - (No message is printed if you are using $REF_CACHE to specify a location.) - -* Avoided rare race condition when caching downloaded CRAM reference sequence - files, by using distinctive names for temporary files (in addition to O_EXCL, - which has always been used). Occasional corruption would previously occur - when multiple tools were simultaneously caching the same reference sequences - on an NFS filesystem that did not support O_EXCL (PR #320). - -* Prevented race condition in file access plugin loading (PR #341). - -* Fixed mpileup memory leak, so no more "[bam_plp_destroy] memory leak [...] - Continue anyway" warning messages (#299). - -* Various minor CRAM fixes. - -* Fixed documentation problems #348 and #358. - - -Noteworthy changes in release 1.3 (15 December 2015) - -* Files can now be accessed via HTTPS and Amazon S3 in addition to HTTP - and FTP, when HTSlib is configured to use libcurl for network file access - rather than the included basic knetfile networking. - -* HTSlib can be built to use remote access hFILE backends (such as iRODS - and libcurl) via a plugin mechanism. This allows other backends to be - easily added and facilitates building tools that use HTSlib, as they - don't need to be linked with the backends' various required libraries. - -* When writing CRAM output, sam_open() etc now default to writing CRAM v3.0 - rather than v2.1. - -* fai_build() and samtools faidx now accept initial whitespace in ">" - headers (e.g., "> chr1 description" is taken to refer to "chr1"). - -* tabix --only-header works again (was broken in 1.2.x; #249). - -* HTSlib's configure script and Makefile now fully support the standard - convention of allowing CC/CPPFLAGS/CFLAGS/LDFLAGS/LIBS to be overridden - as needed. Previously the Makefile listened to $(LDLIBS) instead; if you - were overriding that, you should now override LIBS rather than LDLIBS. - -* Fixed bugs #168, #172, #176, #197, #206, #225, #245, #265, #295, and #296. - - -Noteworthy changes in release 1.2.1 (3 February 2015) - -* Reinstated hts_file_type() and FT_* macros, which were available until 1.1 - but briefly removed in 1.2. This function is deprecated and will be removed - in a future release -- you should use hts_detect_format() etc instead - - -Noteworthy changes in release 1.2 (2 February 2015) - -* HTSlib now has a configure script which checks your build environment - and allows for selection of optional extras. See INSTALL for details - -* By default, reference sequences are fetched from the EBI CRAM Reference - Registry and cached in your $HOME cache directory. This behaviour can - be controlled by setting REF_PATH and REF_CACHE environment variables - (see the samtools(1) man page for details) - -* Numerous CRAM improvements: - - Support for CRAM v3.0, an upcoming revision to CRAM supporting - better compression and per-container checksums - - EOF checking for v2.1 and v3.0 (similar to checking BAM EOF blocks) - - Non-standard values for PNEXT and TLEN fields are now preserved - - hts_set_fai_filename() now provides a reference file when encoding - - Generated read names are now numbered from 1, rather than being - labelled 'slice:record-in-slice' - - Multi-threading and speed improvements - -* New htsfile command for identifying file formats, and corresponding - file format detection APIs - -* New tabix --regions FILE, --targets FILE options for filtering via BED files - -* Optional iRODS file access, disabled by default. Configure with --with-irods - to enable accessing iRODS data objects directly via 'irods:DATAOBJ' - -* All occurrences of 2^29 in the source have been eliminated, so indexing - and querying against reference sequences larger than 512Mbp works (when - using CSI indices) - -* Support for plain GZIP compression in various places - -* VCF header editing speed improvements - -* Added seq_nt16_int[] (equivalent to the samtools API's bam_nt16_nt4_table) - -* Reinstated faidx_fetch_nseq(), which was accidentally removed from 1.1. - Now faidx_fetch_nseq() and faidx_nseq() are equivalent; eventually - faidx_fetch_nseq() will be deprecated and removed [#156] - -* Fixed bugs #141, #152, #155, #158, #159, and various memory leaks diff --git a/src/htslib-1.19.1/README b/src/htslib-1.19.1/README deleted file mode 100644 index db368af..0000000 --- a/src/htslib-1.19.1/README +++ /dev/null @@ -1,27 +0,0 @@ -HTSlib is an implementation of a unified C library for accessing common file -formats, such as SAM, CRAM, VCF, and BCF, used for high-throughput sequencing -data. It is the core library used by samtools and bcftools. - -See INSTALL for building and installation instructions. - -Please cite this paper when using HTSlib for your publications: - -HTSlib: C library for reading/writing high-throughput sequencing data -James K Bonfield, John Marshall, Petr Danecek, Heng Li, Valeriu Ohan, Andrew Whitwham, Thomas Keane, Robert M Davies -GigaScience, Volume 10, Issue 2, February 2021, giab007, https://doi.org/10.1093/gigascience/giab007 - -@article{10.1093/gigascience/giab007, - author = {Bonfield, James K and Marshall, John and Danecek, Petr and Li, Heng and Ohan, Valeriu and Whitwham, Andrew and Keane, Thomas and Davies, Robert M}, - title = "{HTSlib: C library for reading/writing high-throughput sequencing data}", - journal = {GigaScience}, - volume = {10}, - number = {2}, - year = {2021}, - month = {02}, - abstract = "{Since the original publication of the VCF and SAM formats, an explosion of software tools have been created to process these data files. To facilitate this a library was produced out of the original SAMtools implementation, with a focus on performance and robustness. The file formats themselves have become international standards under the jurisdiction of the Global Alliance for Genomics and Health.We present a software library for providing programmatic access to sequencing alignment and variant formats. It was born out of the widely used SAMtools and BCFtools applications. Considerable improvements have been made to the original code plus many new features including newer access protocols, the addition of the CRAM file format, better indexing and iterators, and better use of threading.Since the original Samtools release, performance has been considerably improved, with a BAM read-write loop running 5 times faster and BAM to SAM conversion 13 times faster (both using 16 threads, compared to Samtools 0.1.19). Widespread adoption has seen HTSlib downloaded \\>1 million times from GitHub and conda. The C library has been used directly by an estimated 900 GitHub projects and has been incorporated into Perl, Python, Rust, and R, significantly expanding the number of uses via other languages. HTSlib is open source and is freely available from htslib.org under MIT/BSD license.}", - issn = {2047-217X}, - doi = {10.1093/gigascience/giab007}, - url = {https://doi.org/10.1093/gigascience/giab007}, - note = {giab007}, - eprint = {https://academic.oup.com/gigascience/article-pdf/10/2/giab007/36332285/giab007.pdf}, -} diff --git a/src/htslib-1.19.1/README.large_positions.md b/src/htslib-1.19.1/README.large_positions.md deleted file mode 100644 index 3e2b2c9..0000000 --- a/src/htslib-1.19.1/README.large_positions.md +++ /dev/null @@ -1,234 +0,0 @@ -# HTSlib 64 bit reference positions - -HTSlib version 1.10 onwards internally use 64 bit reference positions. This -is to support analysis of species like axolotl, tulip and marbled lungfish -which have, or are expected to have, chromosomes longer than two gigabases. - -# File format support - -Currently 64 bit positions can only be stored in SAM and VCF format files. -Binary BAM, CRAM and BCF cannot be used due to limitations in the formats -themselves. As SAM and VCF are text formats, they have no limit on the -size of numeric values. Note that while 64 bit positions are supported by -default for SAM, for VCF they must be enabled explicitly at compile time -by editing Makefile and adding -DVCF_ALLOW_INT64=1 to CFLAGS. - -# Compatibility issues to check - -Various data structure members, function parameters, and return values have -been expanded from 32 to 64 bits. As a result, some changes may be needed to -code that uses the library, even if it does not support long references. - -## Variadic functions taking format strings - -The type of various structure members (e.g. `bam1_core_t::pos`) and return -values from some functions (e.g. `bam_cigar2rlen()`) have been changed to -`hts_pos_t`, which is a 64-bit signed integer. Using these in 32-bit -code will generally work (as long as the stored positions are within range), -however care needs to be taken when these values are passed directly -to functions like `printf()` which take a variable-length argument list and -a format string. - -Header file `htslib/hts.h` defines macro `PRIhts_pos` which can be -used in `printf()` format strings to get the correct format specifier for -an `hts_pos_t` value. Code that needs to print positions should be -changed from: - -```c -printf("Position is %d\n", bam->core.pos); -``` - -to: - -```c -printf("Position is %"PRIhts_pos"\n", bam->core.pos); -``` - -If for some reason compatibility with older versions of HTSlib (which do -not have `hts_pos_t` or `PRIhts_pos`) is needed, the value can be cast to -`int64_t` and printed as an explicitly 64-bit value: - -```c -#include // For PRId64 and int64_t - -printf("Position is %" PRId64 "\n", (int64_t) bam->core.pos); -``` - -Passing incorrect types to variadic functions like `printf()` can lead -to incorrect behaviour and security risks, so it important to track down -and fix all of the places where this may happen. Modern C compilers like -gcc (version 3.0 onwards) and clang can check `printf()` and `scanf()` -parameter types for compatibility against the format string. To -enable this, build code with `-Wall` or `-Wformat` and fix all the -reported warnings. - -Where functions that take `printf`-style format strings are implemented, -they should use the appropriate gcc attributes to enable format string -checking. `htslib/hts_defs.h` includes macros `HTS_FORMAT` and -`HTS_PRINTF_FMT` which can be used to provide the attribute declaration -in a portable way. For example, `test/sam.c` uses them for a function -that prints error messages: - -``` -void HTS_FORMAT(HTS_PRINTF_FMT, 1, 2) fail(const char *fmt, ...) { /* ... */ } -``` - -## Implicit type conversions - -Conversion of signed `int` or `int32_t` to `hts_pos_t` will always work. - -Conversion of `hts_pos_t` to `int` or `int32_t` will work as long as the value -converted is within the range that can be stored in the destination. - -Code that casts unsigned `uint32_t` values to signed with the expectation -that the result may be negative will no longer work as `hts_pos_t` can store -values over UINT32_MAX. Such code should be changed to use signed values. - -Functions hts_parse_region() and hts_parse_reg64() return special value -`HTS_POS_MAX` for regions which extend to the end of the reference. -This value is slightly smaller than INT64_MAX, but should be larger than -any reference that is likely to be used. When cast to `int32_t` the -result should be `INT32_MAX`. - -# Upgrading code to work with 64 bit positions - -Variables used to store reference positions should be changed to -type `hts_pos_t`. Use `PRIhts_pos` in format strings when printing them. - -When converting positions stored in strings, use `strtoll()` in place of -`atoi()` or `strtol()` (which produces a 32 bit value on 64-bit Windows and -all 32-bit platforms). - -Programs which need to look up a reference sequence length from a `sam_hdr_t` -structure should use `sam_hdr_tid2len()` instead of the old -`sam_hdr_t::target_len` array (which is left as 32-bit for reasons of -compatibility). `sam_hdr_tid2len()` returns `hts_pos_t`, so works correctly -for large references. - -Various functions which take pointer arguments have new versions which -support `hts_pos_t *` arguments. Code supporting 64-bit positions should -use the new versions. These are: - -Original function | 64-bit version ------------------- | -------------------- -fai_fetch() | fai_fetch64() -fai_fetchqual() | fai_fetchqual64() -faidx_fetch_seq() | faidx_fetch_seq64() -faidx_fetch_qual() | faidx_fetch_qual64() -hts_parse_reg() | hts_parse_reg64() or hts_parse_region() -bam_plp_auto() | bam_plp64_auto() -bam_plp_next() | bam_plp64_next() -bam_mplp_auto() | bam_mplp64_auto() - -Limited support has been added for 64-bit INFO values in VCF files, for large -values in structural variant END tags. New functions `bcf_update_info_int64()` -and `bcf_get_info_int64()` can be used to set and fetch 64-bit INFO values. -They both take arrays of `int64_t`. `bcf_int64_missing` and -`bcf_int64_vector_end` can be used to set missing and vector end values in -these arrays. The INFO data is stored in the minimum size needed, so there -is no harm in using these functions to store smaller integer values. - -# Structure members that have changed size - -``` -File htslib/hts.h: - hts_pair32_t::begin - hts_pair32_t::end - - (typedef hts_pair_pos_t is provided as a better-named replacement for hts_pair32_t) - - hts_reglist_t::min_beg - hts_reglist_t::max_end - - hts_itr_t::beg - hts_itr_t::end - hts_itr_t::curr_beg - hts_itr_t::curr_end - -File htslib/regidx.h: - reg_t::start - reg_t::end - -File htslib/sam.h: - bam1_core_t::pos - bam1_core_t::mpos - bam1_core_t::isize - -File htslib/synced_bcf_reader.h: - bcf_sr_regions_t::start - bcf_sr_regions_t::end - bcf_sr_regions_t::prev_start - -File htslib/vcf.h: - bcf_idinfo_t::info - - bcf_info_t::v1::i - - bcf1_t::pos - bcf1_t::rlen -``` - -# Functions where parameters or the return value have changed size - -Functions are annotated as follows: - -* `[new]` The function has been added since version 1.9 -* `[parameters]` Function parameters have changed size -* `[return]` Function return value has changed size - -``` -File htslib/faidx.h: - - [new] fai_fetch64() - [new] fai_fetchqual64() - [new] faidx_fetch_seq64() - [new] faidx_fetch_qual64() - [new] fai_parse_region() - -File htslib/hts.h: - - [parameters] hts_idx_push() - [new] hts_parse_reg64() - [parameters] hts_itr_query() - [parameters] hts_reg2bin() - -File htslib/kstring.h: - - [new] kputll() - -File htslib/regidx.h: - - [parameters] regidx_overlap() - -File htslib/sam.h: - - [new] sam_hdr_tid2len() - [return] bam_cigar2qlen() - [return] bam_cigar2rlen() - [return] bam_endpos() - [parameters] bam_itr_queryi() - [parameters] sam_itr_queryi() - [new] bam_plp64_next() - [new] bam_plp64_auto() - [new] bam_mplp64_auto() - [parameters] sam_cap_mapq() - [parameters] sam_prob_realn() - -File htslib/synced_bcf_reader.h: - - [parameters] bcf_sr_seek() - [parameters] bcf_sr_regions_overlap() - -File htslib/tbx.h: - - [parameters] tbx_readrec() - -File htslib/vcf.h: - - [parameters] bcf_readrec() - [new] bcf_update_info_int64() - [new] bcf_get_info_int64() - [return] bcf_dec_int1() - [return] bcf_dec_typed_int1() - -``` diff --git a/src/htslib-1.19.1/bcf_sr_sort.c b/src/htslib-1.19.1/bcf_sr_sort.c deleted file mode 100644 index 01e98bb..0000000 --- a/src/htslib-1.19.1/bcf_sr_sort.c +++ /dev/null @@ -1,707 +0,0 @@ -/* - Copyright (C) 2017-2021 Genome Research Ltd. - - Author: Petr Danecek - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. -*/ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include - -#include "bcf_sr_sort.h" -#include "htslib/khash_str2int.h" -#include "htslib/kbitset.h" - -#define SR_REF 1 -#define SR_SNP 2 -#define SR_INDEL 4 -#define SR_OTHER 8 -#define SR_SCORE(srt,a,b) (srt)->score[((a)<<4)|(b)] - -// Logical AND -static inline int kbs_logical_and(kbitset_t *bs1, kbitset_t *bs2) -{ - // General case, bitsets of unequal size: - // int i, n = bs1->n < bs2->n ? bs1->n : bs2->n; - int i, n = bs1->n; - - for (i=0; ib[i] & bs2->b[i] ) return 1; - return 0; -} - -// Bitwise OR, dst will be modified, src will be left unchanged -static inline void kbs_bitwise_or(kbitset_t *dst, kbitset_t *src) -{ - int i; - for (i=0; in; i++) dst->b[i] |= src->b[i]; -} - - -static void bcf_sr_init_scores(sr_sort_t *srt) -{ - int i,jbit,kbit; - - // lower number = lower priority, zero means forbidden - - if ( srt->pair & BCF_SR_PAIR_ANY ) srt->pair |= (BCF_SR_PAIR_SNPS | BCF_SR_PAIR_INDELS | BCF_SR_PAIR_SNP_REF | BCF_SR_PAIR_INDEL_REF); - if ( srt->pair & BCF_SR_PAIR_SNPS ) SR_SCORE(srt,SR_SNP,SR_SNP) = 3; - if ( srt->pair & BCF_SR_PAIR_INDELS ) SR_SCORE(srt,SR_INDEL,SR_INDEL) = 3; - if ( srt->pair & BCF_SR_PAIR_SNP_REF ) - { - SR_SCORE(srt,SR_SNP,SR_REF) = 2; - SR_SCORE(srt,SR_REF,SR_SNP) = 2; - } - if ( srt->pair & BCF_SR_PAIR_INDEL_REF ) - { - SR_SCORE(srt,SR_INDEL,SR_REF) = 2; - SR_SCORE(srt,SR_REF,SR_INDEL) = 2; - } - if ( srt->pair & BCF_SR_PAIR_ANY ) - { - for (i=0; i<256; i++) - if ( !srt->score[i] ) srt->score[i] = 1; - } - - // set all combinations - for (i=0; i<256; i++) - { - if ( srt->score[i] ) continue; // already set - int max = 0; - for (jbit=0; jbit<4; jbit++) // high bits - { - int j = 1<score[i] = max; - } -} -static int multi_is_exact(var_t *avar, var_t *bvar) -{ - if ( avar->nalt != bvar->nalt ) return 0; - - int alen = strlen(avar->str); - int blen = strlen(bvar->str); - if ( alen != blen ) return 0; - - char *abeg = avar->str; - while ( *abeg ) - { - char *aend = abeg; - while ( *aend && *aend!=',' ) aend++; - - char *bbeg = bvar->str; - while ( *bbeg ) - { - char *bend = bbeg; - while ( *bend && *bend!=',' ) bend++; - if ( bend - bbeg == aend - abeg && !strncasecmp(abeg,bbeg,bend-bbeg) ) break; - bbeg = *bend ? bend+1 : bend; - } - if ( !*bbeg ) return 0; - - abeg = *aend ? aend+1 : aend; - } - return 1; -} -static int multi_is_subset(var_t *avar, var_t *bvar) -{ - char *abeg = avar->str; - while ( *abeg ) - { - char *aend = abeg; - while ( *aend && *aend!=',' ) aend++; - - char *bbeg = bvar->str; - while ( *bbeg ) - { - char *bend = bbeg; - while ( *bend && *bend!=',' ) bend++; - if ( bend - bbeg == aend - abeg && !strncasecmp(abeg,bbeg,bend-bbeg) ) return 1; - bbeg = *bend ? bend+1 : bend; - } - abeg = *aend ? aend+1 : aend; - } - return 0; -} -static uint32_t pairing_score(sr_sort_t *srt, int ivset, int jvset) -{ - varset_t *iv = &srt->vset[ivset]; - varset_t *jv = &srt->vset[jvset]; - - // Restrictive logic: the strictest type from a group is selected, - // so that, for example, snp+ref does not lead to the inclusion of an indel - int i,j; - uint32_t min = UINT32_MAX; - for (i=0; invar; i++) - { - var_t *ivar = &srt->var[iv->var[i]]; - for (j=0; jnvar; j++) - { - var_t *jvar = &srt->var[jv->var[j]]; - if ( srt->pair & BCF_SR_PAIR_EXACT ) - { - if ( ivar->type != jvar->type ) continue; - if ( !strcmp(ivar->str,jvar->str) ) return UINT32_MAX; // exact match, best possibility - if ( multi_is_exact(ivar,jvar) ) return UINT32_MAX; // identical alleles - continue; - } - if ( ivar->type==jvar->type && !strcmp(ivar->str,jvar->str) ) return UINT32_MAX; // exact match, best possibility - if ( ivar->type & jvar->type && multi_is_subset(ivar,jvar) ) return UINT32_MAX; // one of the alleles is identical - - uint32_t score = SR_SCORE(srt,ivar->type,jvar->type); - if ( !score ) return 0; // some of the varsets in the two groups are not compatible, will not pair - if ( min>score ) min = score; - } - } - if ( srt->pair & BCF_SR_PAIR_EXACT ) return 0; - - assert( min!=UINT32_MAX ); - - uint32_t cnt = 0; - for (i=0; invar; i++) cnt += srt->var[iv->var[i]].nvcf; - for (j=0; jnvar; j++) cnt += srt->var[jv->var[j]].nvcf; - - return (1u<<(28+min)) + cnt; -} -static void remove_vset(sr_sort_t *srt, int jvset) -{ - if ( jvset+1 < srt->nvset ) - { - varset_t tmp = srt->vset[jvset]; - memmove(&srt->vset[jvset], &srt->vset[jvset+1], sizeof(varset_t)*(srt->nvset - jvset - 1)); - srt->vset[srt->nvset-1] = tmp; - - int *jmat = srt->pmat + jvset*srt->ngrp; - memmove(jmat, &jmat[srt->ngrp],sizeof(int)*(srt->nvset - jvset - 1)*srt->ngrp); - - memmove(&srt->cnt[jvset], &srt->cnt[jvset+1], sizeof(int)*(srt->nvset - jvset - 1)); - } - srt->nvset--; -} -static int merge_vsets(sr_sort_t *srt, int ivset, int jvset) -{ - int i,j; - if ( ivset > jvset ) { i = ivset; ivset = jvset; jvset = i; } - - varset_t *iv = &srt->vset[ivset]; - varset_t *jv = &srt->vset[jvset]; - - kbs_bitwise_or(iv->mask,jv->mask); - - i = iv->nvar; - iv->nvar += jv->nvar; - hts_expand(int, iv->nvar, iv->mvar, iv->var); - for (j=0; jnvar; j++,i++) iv->var[i] = jv->var[j]; - - int *imat = srt->pmat + ivset*srt->ngrp; - int *jmat = srt->pmat + jvset*srt->ngrp; - for (i=0; ingrp; i++) imat[i] += jmat[i]; - srt->cnt[ivset] += srt->cnt[jvset]; - - remove_vset(srt, jvset); - - return ivset; -} - -static int push_vset(sr_sort_t *srt, int ivset) -{ - varset_t *iv = &srt->vset[ivset]; - int i,j; - for (i=0; isr->nreaders; i++) - { - vcf_buf_t *buf = &srt->vcf_buf[i]; - buf->nrec++; - hts_expand(bcf1_t*,buf->nrec,buf->mrec,buf->rec); - buf->rec[buf->nrec-1] = NULL; - } - for (i=0; invar; i++) - { - var_t *var = &srt->var[ iv->var[i] ]; - for (j=0; jnvcf; j++) - { - int jvcf = var->vcf[j]; - vcf_buf_t *buf = &srt->vcf_buf[jvcf]; - buf->rec[buf->nrec-1] = var->rec[j]; - } - } - remove_vset(srt, ivset); - return 0; // FIXME: check for errs in this function -} - -static int cmpstringp(const void *p1, const void *p2) -{ - return strcmp(* (char * const *) p1, * (char * const *) p2); -} - -#define DEBUG_VSETS 0 -#if DEBUG_VSETS -void debug_vsets(sr_sort_t *srt) -{ - int i,j,k; - for (i=0; invset; i++) - { - fprintf(stderr,"dbg_vset %d:", i); - for (j=0; jvset[i].mask->n; j++) fprintf(stderr,"%c%lu",j==0?' ':':',srt->vset[i].mask->b[j]); - fprintf(stderr,"\t"); - for (j=0; jvset[i].nvar; j++) - { - var_t *var = &srt->var[srt->vset[i].var[j]]; - fprintf(stderr,"\t%s",var->str); - for (k=0; knvcf; k++) - fprintf(stderr,"%c%d", k==0?':':',',var->vcf[k]); - } - fprintf(stderr,"\n"); - } -} -#endif - -#define DEBUG_VBUF 0 -#if DEBUG_VBUF -void debug_vbuf(sr_sort_t *srt) -{ - int i, j; - for (j=0; jvcf_buf[0].nrec; j++) - { - fprintf(stderr,"dbg_vbuf %d:\t", j); - for (i=0; isr->nreaders; i++) - { - vcf_buf_t *buf = &srt->vcf_buf[i]; - fprintf(stderr,"\t%"PRIhts_pos, buf->rec[j] ? buf->rec[j]->pos+1 : 0); - } - fprintf(stderr,"\n"); - } -} -#endif - -static char *grp_create_key(sr_sort_t *srt) -{ - if ( !srt->str.l ) return strdup(""); - int i; - hts_expand(char*,srt->noff,srt->mcharp,srt->charp); - for (i=0; inoff; i++) - { - srt->charp[i] = srt->str.s + srt->off[i]; - if ( i>0 ) srt->charp[i][-1] = 0; - } - qsort(srt->charp, srt->noff, sizeof(*srt->charp), cmpstringp); - char *ret = (char*) malloc(srt->str.l + 1), *ptr = ret; - for (i=0; inoff; i++) - { - int len = strlen(srt->charp[i]); - memcpy(ptr, srt->charp[i], len); - ptr += len + 1; - ptr[-1] = i+1==srt->noff ? 0 : ';'; - } - return ret; -} -int bcf_sr_sort_set_active(sr_sort_t *srt, int idx) -{ - hts_expand(int,idx+1,srt->mactive,srt->active); - srt->nactive = 1; - srt->active[srt->nactive - 1] = idx; - return 0; // FIXME: check for errs in this function -} -int bcf_sr_sort_add_active(sr_sort_t *srt, int idx) -{ - hts_expand(int,idx+1,srt->mactive,srt->active); - srt->nactive++; - srt->active[srt->nactive - 1] = idx; - return 0; // FIXME: check for errs in this function -} -static int bcf_sr_sort_set(bcf_srs_t *readers, sr_sort_t *srt, const char *chr, hts_pos_t min_pos) -{ - if ( !srt->grp_str2int ) - { - // first time here, initialize - if ( !srt->pair ) - { - if ( readers->collapse==COLLAPSE_NONE ) readers->collapse = BCF_SR_PAIR_EXACT; - bcf_sr_set_opt(readers, BCF_SR_PAIR_LOGIC, readers->collapse); - } - bcf_sr_init_scores(srt); - srt->grp_str2int = khash_str2int_init(); - srt->var_str2int = khash_str2int_init(); - } - int k; - khash_t(str2int) *hash; - hash = srt->grp_str2int; - for (k=0; k < kh_end(hash); k++) - if ( kh_exist(hash,k) ) free((char*)kh_key(hash,k)); - hash = srt->var_str2int; - for (k=0; k < kh_end(hash); k++) - if ( kh_exist(hash,k) ) free((char*)kh_key(hash,k)); - kh_clear(str2int, srt->grp_str2int); - kh_clear(str2int, srt->var_str2int); - srt->ngrp = srt->nvar = srt->nvset = 0; - - grp_t grp; - memset(&grp,0,sizeof(grp_t)); - - // group VCFs into groups, each with a unique combination of variants in the duplicate lines - int ireader,ivar,irec,igrp,ivset,iact; - for (ireader=0; ireadernreaders; ireader++) srt->vcf_buf[ireader].nrec = 0; - for (iact=0; iactnactive; iact++) - { - ireader = srt->active[iact]; - bcf_sr_t *reader = &readers->readers[ireader]; - int rid = bcf_hdr_name2id(reader->header, chr); - grp.nvar = 0; - hts_expand(int,reader->nbuffer,srt->moff,srt->off); - srt->noff = 0; - srt->str.l = 0; - for (irec=1; irec<=reader->nbuffer; irec++) - { - bcf1_t *line = reader->buffer[irec]; - if ( line->rid!=rid || line->pos!=min_pos ) break; - - if ( srt->str.l ) kputc(';',&srt->str); - srt->off[srt->noff++] = srt->str.l; - size_t beg = srt->str.l; - int end_pos = -1; - for (ivar=1; ivarn_allele; ivar++) - { - if ( ivar>1 ) kputc(',',&srt->str); - kputs(line->d.allele[0],&srt->str); - kputc('>',&srt->str); - kputs(line->d.allele[ivar],&srt->str); - - // If symbolic allele, check also the END tag in case there are multiple events, - // such as s, starting at the same positions - if ( line->d.allele[ivar][0]=='<' ) - { - if ( end_pos==-1 ) - { - bcf_info_t *end_info = bcf_get_info(reader->header,line,"END"); - if ( end_info ) - end_pos = (int)end_info->v1.i; // this is only to create a unique id, we don't mind a potential int64 overflow - else - end_pos = 0; - } - if ( end_pos ) - { - kputc('/',&srt->str); - kputw(end_pos, &srt->str); - } - } - } - if ( line->n_allele==1 ) - { - kputs(line->d.allele[0],&srt->str); - kputsn(">.",2,&srt->str); - } - - // Create new variant or attach to existing one. But careful, there can be duplicate - // records with the same POS,REF,ALT (e.g. in dbSNP-b142) - char *var_str = beg + srt->str.s; - int ret, var_idx = 0, var_end = srt->str.l; - while ( 1 ) - { - ret = khash_str2int_get(srt->var_str2int, var_str, &ivar); - if ( ret==-1 ) break; - - var_t *var = &srt->var[ivar]; - if ( var->vcf[var->nvcf-1] != ireader ) break; - - srt->str.l = var_end; - kputw(var_idx, &srt->str); - var_str = beg + srt->str.s; - var_idx++; - } - if ( ret==-1 ) - { - ivar = srt->nvar++; - hts_expand0(var_t,srt->nvar,srt->mvar,srt->var); - srt->var[ivar].nvcf = 0; - khash_str2int_set(srt->var_str2int, strdup(var_str), ivar); - free(srt->var[ivar].str); // possible left-over from the previous position - } - var_t *var = &srt->var[ivar]; - var->nalt = line->n_allele - 1; - var->type = bcf_get_variant_types(line); - srt->str.s[var_end] = 0; - if ( ret==-1 ) - var->str = strdup(var_str); - - int mvcf = var->mvcf; - var->nvcf++; - hts_expand0(int*, var->nvcf, var->mvcf, var->vcf); - if ( mvcf != var->mvcf ) var->rec = (bcf1_t **) realloc(var->rec,sizeof(bcf1_t*)*var->mvcf); - var->vcf[var->nvcf-1] = ireader; - var->rec[var->nvcf-1] = line; - - grp.nvar++; - hts_expand(var_t,grp.nvar,grp.mvar,grp.var); - grp.var[grp.nvar-1] = ivar; - } - char *grp_key = grp_create_key(srt); - int ret = khash_str2int_get(srt->grp_str2int, grp_key, &igrp); - if ( ret==-1 ) - { - igrp = srt->ngrp++; - hts_expand0(grp_t, srt->ngrp, srt->mgrp, srt->grp); - free(srt->grp[igrp].var); - srt->grp[igrp] = grp; - srt->grp[igrp].key = grp_key; - khash_str2int_set(srt->grp_str2int, grp_key, igrp); - memset(&grp,0,sizeof(grp_t)); - } - else - free(grp_key); - srt->grp[igrp].nvcf++; - } - free(grp.var); - - // initialize bitmask - which groups is the variant present in - for (ivar=0; ivarnvar; ivar++) - { - if ( kbs_resize(&srt->var[ivar].mask, srt->ngrp) < 0 ) - { - fprintf(stderr, "[%s:%d %s] kbs_resize failed\n", __FILE__,__LINE__,__func__); - exit(1); - } - kbs_clear(srt->var[ivar].mask); - } - for (igrp=0; igrpngrp; igrp++) - { - for (ivar=0; ivargrp[igrp].nvar; ivar++) - { - int i = srt->grp[igrp].var[ivar]; - kbs_insert(srt->var[i].mask, igrp); - } - } - - // create the initial list of variant sets - for (ivar=0; ivarnvar; ivar++) - { - ivset = srt->nvset++; - hts_expand0(varset_t, srt->nvset, srt->mvset, srt->vset); - - varset_t *vset = &srt->vset[ivset]; - vset->nvar = 1; - hts_expand0(var_t, vset->nvar, vset->mvar, vset->var); - vset->var[vset->nvar-1] = ivar; - var_t *var = &srt->var[ivar]; - vset->cnt = var->nvcf; - if ( kbs_resize(&vset->mask, srt->ngrp) < 0 ) - { - fprintf(stderr, "[%s:%d %s] kbs_resize failed\n", __FILE__,__LINE__,__func__); - exit(1); - } - kbs_clear(vset->mask); - kbs_bitwise_or(vset->mask, var->mask); - - int type = 0; - if ( var->type==VCF_REF ) type |= SR_REF; - else - { - if ( var->type & VCF_SNP ) type |= SR_SNP; - if ( var->type & VCF_MNP ) type |= SR_SNP; - if ( var->type & VCF_INDEL ) type |= SR_INDEL; - if ( var->type & VCF_OTHER ) type |= SR_OTHER; - } - var->type = type; - } -#if DEBUG_VSETS - debug_vsets(srt); -#endif - - // initialize the pairing matrix - hts_expand(int, srt->ngrp*srt->nvset, srt->mpmat, srt->pmat); - hts_expand(int, srt->nvset, srt->mcnt, srt->cnt); - memset(srt->pmat, 0, sizeof(*srt->pmat)*srt->ngrp*srt->nvset); - for (ivset=0; ivsetnvset; ivset++) - { - varset_t *vset = &srt->vset[ivset]; - for (igrp=0; igrpngrp; igrp++) srt->pmat[ivset*srt->ngrp+igrp] = 0; - srt->cnt[ivset] = vset->cnt; - } - - // pair the lines - while ( srt->nvset ) - { -#if DEBUG_VSETS - fprintf(stderr,"\n"); - debug_vsets(srt); -#endif - - int imax = 0; - for (ivset=1; ivsetnvset; ivset++) - if ( srt->cnt[imax] < srt->cnt[ivset] ) imax = ivset; - - int ipair = -1; - uint32_t max_score = 0; - for (ivset=0; ivsetnvset; ivset++) - { - if ( kbs_logical_and(srt->vset[imax].mask,srt->vset[ivset].mask) ) continue; // cannot be merged - uint32_t score = pairing_score(srt, imax, ivset); - // fprintf(stderr,"score: %d %d, logic=%d \t..\t %u\n", imax,ivset,srt->pair,score); - if ( max_score < score ) { max_score = score; ipair = ivset; } - } - - // merge rows creating a new variant set this way - if ( ipair!=-1 && ipair!=imax ) - { - imax = merge_vsets(srt, imax, ipair); - continue; - } - - push_vset(srt, imax); - } - - srt->chr = chr; - srt->pos = min_pos; - - return 0; // FIXME: check for errs in this function -} - -int bcf_sr_sort_next(bcf_srs_t *readers, sr_sort_t *srt, const char *chr, hts_pos_t min_pos) -{ - int i,j; - assert( srt->nactive>0 ); - - if ( srt->nsr != readers->nreaders ) - { - srt->sr = readers; - if ( srt->nsr < readers->nreaders ) - { - srt->vcf_buf = (vcf_buf_t*) realloc(srt->vcf_buf,readers->nreaders*sizeof(vcf_buf_t)); - memset(srt->vcf_buf + srt->nsr, 0, sizeof(vcf_buf_t)*(readers->nreaders - srt->nsr)); - if ( srt->msr < srt->nsr ) srt->msr = srt->nsr; - } - srt->nsr = readers->nreaders; - srt->chr = NULL; - } - if ( srt->nactive == 1 ) - { - if ( readers->nreaders>1 ) - memset(readers->has_line, 0, readers->nreaders*sizeof(*readers->has_line)); - bcf_sr_t *reader = &readers->readers[srt->active[0]]; - assert( reader->buffer[1]->pos==min_pos ); - bcf1_t *tmp = reader->buffer[0]; - for (j=1; j<=reader->nbuffer; j++) reader->buffer[j-1] = reader->buffer[j]; - reader->buffer[ reader->nbuffer ] = tmp; - reader->nbuffer--; - readers->has_line[srt->active[0]] = 1; - return 1; - } - if ( !srt->chr || srt->pos!=min_pos || strcmp(srt->chr,chr) ) bcf_sr_sort_set(readers, srt, chr, min_pos); - - if ( !srt->vcf_buf[0].nrec ) return 0; - -#if DEBUG_VBUF - debug_vbuf(srt); -#endif - - int nret = 0; - for (i=0; isr->nreaders; i++) - { - vcf_buf_t *buf = &srt->vcf_buf[i]; - - if ( buf->rec[0] ) - { - bcf_sr_t *reader = &srt->sr->readers[i]; - for (j=1; j<=reader->nbuffer; j++) - if ( reader->buffer[j] == buf->rec[0] ) break; - - assert( j<=reader->nbuffer ); - - bcf1_t *tmp = reader->buffer[0]; - reader->buffer[0] = reader->buffer[j++]; - for (; j<=reader->nbuffer; j++) reader->buffer[j-1] = reader->buffer[j]; - reader->buffer[ reader->nbuffer ] = tmp; - reader->nbuffer--; - - nret++; - srt->sr->has_line[i] = 1; - } - else - srt->sr->has_line[i] = 0; - - buf->nrec--; - if ( buf->nrec > 0 ) - memmove(buf->rec, &buf->rec[1], buf->nrec*sizeof(bcf1_t*)); - } - return nret; -} -void bcf_sr_sort_remove_reader(bcf_srs_t *readers, sr_sort_t *srt, int i) -{ - //vcf_buf is allocated only in bcf_sr_sort_next - //So, a call to bcf_sr_add_reader() followed immediately by bcf_sr_remove_reader() - //would cause the program to crash in this segment - if (srt->vcf_buf) - { - free(srt->vcf_buf[i].rec); - if ( i+1 < srt->nsr ) - memmove(&srt->vcf_buf[i], &srt->vcf_buf[i+1], (srt->nsr - i - 1)*sizeof(vcf_buf_t)); - memset(srt->vcf_buf + srt->nsr - 1, 0, sizeof(vcf_buf_t)); - } -} -sr_sort_t *bcf_sr_sort_init(sr_sort_t *srt) -{ - if ( !srt ) return calloc(1,sizeof(sr_sort_t)); - memset(srt,0,sizeof(sr_sort_t)); - return srt; -} -void bcf_sr_sort_reset(sr_sort_t *srt) -{ - srt->chr = NULL; -} -void bcf_sr_sort_destroy(sr_sort_t *srt) -{ - free(srt->active); - if ( srt->var_str2int ) khash_str2int_destroy_free(srt->var_str2int); - if ( srt->grp_str2int ) khash_str2int_destroy_free(srt->grp_str2int); - int i; - for (i=0; insr; i++) free(srt->vcf_buf[i].rec); - free(srt->vcf_buf); - for (i=0; imvar; i++) - { - free(srt->var[i].str); - free(srt->var[i].vcf); - free(srt->var[i].rec); - kbs_destroy(srt->var[i].mask); - } - free(srt->var); - for (i=0; imgrp; i++) - free(srt->grp[i].var); - free(srt->grp); - for (i=0; imvset; i++) - { - kbs_destroy(srt->vset[i].mask); - free(srt->vset[i].var); - } - free(srt->vset); - free(srt->str.s); - free(srt->off); - free(srt->charp); - free(srt->cnt); - free(srt->pmat); - memset(srt,0,sizeof(*srt)); -} - diff --git a/src/htslib-1.19.1/bcf_sr_sort.h b/src/htslib-1.19.1/bcf_sr_sort.h deleted file mode 100644 index c8bd787..0000000 --- a/src/htslib-1.19.1/bcf_sr_sort.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - Copyright (C) 2017 Genome Research Ltd. - - Author: Petr Danecek - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. -*/ - -/* - Reorder duplicate lines so that compatible variant types are - returned together by bcf_sr_next_line() - - - readers grouped by variants. Even with many readers there will be - typically only several groups - -*/ - -#ifndef BCF_SR_SORT_H -#define BCF_SR_SORT_H - -#include "htslib/synced_bcf_reader.h" -#include "htslib/kbitset.h" - -typedef struct -{ - int nrec, mrec; - bcf1_t **rec; -} -vcf_buf_t; - -typedef struct -{ - char *str; // "A>C" for biallelic records or "A>C,A>CC" for multiallelic records - int type; // VCF_SNP, VCF_REF, etc. - int nalt; // number of alternate alleles in this record - int nvcf, mvcf, *vcf; // the list of readers with the same variants - bcf1_t **rec; // list of VCF records in the readers - kbitset_t *mask; // which groups contain the variant -} -var_t; - -typedef struct -{ - char *key; // only for debugging - int nvar, mvar, *var; // the variants and their type - int nvcf; // number of readers with the same variants -} -grp_t; - -typedef struct -{ - int nvar, mvar, *var; // list of compatible variants that can be output together - int cnt; // number of readers in this group - kbitset_t *mask; // which groups are populated in this set (replace with expandable bitmask) -} -varset_t; - -typedef struct -{ - uint8_t score[256]; - int nvar, mvar; - var_t *var; // list of all variants from all readers - int nvset, mvset; - int mpmat, *pmat; // pairing matrix, i-th vset and j-th group accessible as i*ngrp+j - int ngrp, mgrp; - int mcnt, *cnt; // number of VCF covered by a varset - grp_t *grp; // list of VCF representatives, each with a unique combination of duplicate lines - varset_t *vset; // list of variant sets - combinations of compatible variants across multiple groups ready for output - vcf_buf_t *vcf_buf; // records sorted in output order, for each VCF - bcf_srs_t *sr; - void *grp_str2int; - void *var_str2int; - kstring_t str; - int moff, noff, *off, mcharp; - char **charp; - const char *chr; - hts_pos_t pos; - int nsr, msr; - int pair; - int nactive, mactive, *active; // list of readers with lines at the current pos -} -sr_sort_t; - -sr_sort_t *bcf_sr_sort_init(sr_sort_t *srt); -void bcf_sr_sort_reset(sr_sort_t *srt); -int bcf_sr_sort_next(bcf_srs_t *readers, sr_sort_t *srt, const char *chr, hts_pos_t pos); -int bcf_sr_sort_set_active(sr_sort_t *srt, int i); -int bcf_sr_sort_add_active(sr_sort_t *srt, int i); -void bcf_sr_sort_destroy(sr_sort_t *srt); -void bcf_sr_sort_remove_reader(bcf_srs_t *readers, sr_sort_t *srt, int i); - -#endif diff --git a/src/htslib-1.19.1/bgzf.c b/src/htslib-1.19.1/bgzf.c deleted file mode 100644 index 45fbd3d..0000000 --- a/src/htslib-1.19.1/bgzf.c +++ /dev/null @@ -1,2597 +0,0 @@ -/* The MIT License - - Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology - 2011, 2012 Attractive Chaos - Copyright (C) 2009, 2013-2022 Genome Research Ltd - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. -*/ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef HAVE_LIBDEFLATE -#include -#endif - -#include "htslib/hts.h" -#include "htslib/bgzf.h" -#include "htslib/hfile.h" -#include "htslib/thread_pool.h" -#include "htslib/hts_endian.h" -#include "cram/pooled_alloc.h" -#include "hts_internal.h" - -#ifndef EFTYPE -#define EFTYPE ENOEXEC -#endif - -#define BGZF_CACHE -#define BGZF_MT - -#define BLOCK_HEADER_LENGTH 18 -#define BLOCK_FOOTER_LENGTH 8 - - -/* BGZF/GZIP header (specialized from RFC 1952; little endian): - +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ - | 31|139| 8| 4| 0| 0|255| 6| 66| 67| 2|BLK_LEN| - +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ - BGZF extension: - ^ ^ ^ ^ - | | | | - FLG.EXTRA XLEN B C - - BGZF format is compatible with GZIP. It limits the size of each compressed - block to 2^16 bytes and adds and an extra "BC" field in the gzip header which - records the size. - -*/ -static const uint8_t g_magic[19] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\0\0"; - -#ifdef BGZF_CACHE -typedef struct { - int size; - uint8_t *block; - int64_t end_offset; -} cache_t; - -#include "htslib/khash.h" -KHASH_MAP_INIT_INT64(cache, cache_t) -#endif - -struct bgzf_cache_t { - khash_t(cache) *h; - khint_t last_pos; -}; - -#ifdef BGZF_MT - -typedef struct bgzf_job { - BGZF *fp; - unsigned char comp_data[BGZF_MAX_BLOCK_SIZE]; - size_t comp_len; - unsigned char uncomp_data[BGZF_MAX_BLOCK_SIZE]; - size_t uncomp_len; - int errcode; - int64_t block_address; - int hit_eof; -} bgzf_job; - -enum mtaux_cmd { - NONE = 0, - SEEK, - SEEK_DONE, - HAS_EOF, - HAS_EOF_DONE, - CLOSE, -}; - -// When multi-threaded bgzf_tell won't work, so we delay the hts_idx_push -// until we've written the last block. -typedef struct { - hts_pos_t beg, end; - int tid, is_mapped; // args for hts_idx_push - uint64_t offset, block_number; -} hts_idx_cache_entry; - -typedef struct { - int nentries, mentries; // used and allocated - hts_idx_cache_entry *e; // hts_idx elements -} hts_idx_cache_t; - -typedef struct bgzf_mtaux_t { - // Memory pool for bgzf_job structs, to avoid many malloc/free - pool_alloc_t *job_pool; - bgzf_job *curr_job; - - // Thread pool - int n_threads; - int own_pool; - hts_tpool *pool; - - // Output queue holding completed bgzf_jobs - hts_tpool_process *out_queue; - - // I/O thread. - pthread_t io_task; - pthread_mutex_t job_pool_m; - int jobs_pending; // number of jobs waiting - int flush_pending; - void *free_block; - int hit_eof; // r/w entirely within main thread - - // Message passing to the reader thread; eg seek requests - int errcode; - uint64_t block_address; - int eof; - pthread_mutex_t command_m; // Set whenever fp is being updated - pthread_cond_t command_c; - enum mtaux_cmd command; - - // For multi-threaded on-the-fly indexing. See bgzf_idx_push below. - pthread_mutex_t idx_m; - hts_idx_t *hts_idx; - uint64_t block_number, block_written; - hts_idx_cache_t idx_cache; -} mtaux_t; -#endif - -typedef struct -{ - uint64_t uaddr; // offset w.r.t. uncompressed data - uint64_t caddr; // offset w.r.t. compressed data -} -bgzidx1_t; - -struct bgzidx_t -{ - int noffs, moffs; // the size of the index, n:used, m:allocated - bgzidx1_t *offs; // offsets - uint64_t ublock_addr; // offset of the current block (uncompressed data) -}; - -/* - * Buffers up arguments to hts_idx_push for later use, once we've written all bar - * this block. This is necessary when multiple blocks are in flight (threading) - * and fp->block_address isn't known at the time of call as we have in-flight - * blocks that haven't yet been compressed. - * - * NB: this only matters when we're indexing on the fly (writing). - * Normal indexing is threaded reads, but we already know block sizes - * so it's a simpler process - * - * Returns 0 on success, - * -1 on failure - */ -int bgzf_idx_push(BGZF *fp, hts_idx_t *hidx, int tid, hts_pos_t beg, hts_pos_t end, uint64_t offset, int is_mapped) { - hts_idx_cache_entry *e; - mtaux_t *mt = fp->mt; - - if (!mt) - return hts_idx_push(hidx, tid, beg, end, offset, is_mapped); - - // Early check for out of range positions which would fail in hts_idx_push() - if (hts_idx_check_range(hidx, tid, beg, end) < 0) - return -1; - - pthread_mutex_lock(&mt->idx_m); - - mt->hts_idx = hidx; - hts_idx_cache_t *ic = &mt->idx_cache; - - if (ic->nentries >= ic->mentries) { - int new_sz = ic->mentries ? ic->mentries*2 : 1024; - if (!(e = realloc(ic->e, new_sz * sizeof(*ic->e)))) { - pthread_mutex_unlock(&mt->idx_m); - return -1; - } - ic->e = e; - ic->mentries = new_sz; - } - - e = &ic->e[ic->nentries++]; - e->tid = tid; - e->beg = beg; - e->end = end; - e->is_mapped = is_mapped; - e->offset = offset & 0xffff; - e->block_number = mt->block_number; - - pthread_mutex_unlock(&mt->idx_m); - - return 0; -} - -static int bgzf_idx_flush(BGZF *fp, - size_t block_uncomp_len, size_t block_comp_len) { - mtaux_t *mt = fp->mt; - - if (!mt->idx_cache.e) { - mt->block_written++; - return 0; - } - - pthread_mutex_lock(&mt->idx_m); - - hts_idx_cache_entry *e = mt->idx_cache.e; - int i; - - assert(mt->idx_cache.nentries == 0 || mt->block_written <= e[0].block_number); - - for (i = 0; i < mt->idx_cache.nentries && e[i].block_number == mt->block_written; i++) { - if (block_uncomp_len > 0 && e[i].offset == block_uncomp_len) { - /* - * If the virtual offset is at the end of the current block, - * adjust it to point to the start of the next one. This - * is needed when on-the-fly indexing has recorded a virtual - * offset just before a new block has been started, and makes - * on-the-fly and standard indexing give exactly the same results. - * - * In theory the two virtual offsets are equivalent, but pointing - * to the end of a block is inefficient, and caused problems with - * versions of HTSlib before 1.11 where bgzf_read() would - * incorrectly return EOF. - */ - - // Assert that this is the last entry for the current block_number - assert(i == mt->idx_cache.nentries - 1 - || e[i].block_number < e[i + 1].block_number); - - // Work out where the next block starts. For this entry, the - // offset will be zero. - uint64_t next_block_addr = mt->block_address + block_comp_len; - if (hts_idx_push(mt->hts_idx, e[i].tid, e[i].beg, e[i].end, - next_block_addr << 16, e[i].is_mapped) < 0) { - pthread_mutex_unlock(&mt->idx_m); - return -1; - } - // Count this entry and drop out of the loop - i++; - break; - } - - if (hts_idx_push(mt->hts_idx, e[i].tid, e[i].beg, e[i].end, - (mt->block_address << 16) + e[i].offset, - e[i].is_mapped) < 0) { - pthread_mutex_unlock(&mt->idx_m); - return -1; - } - } - - memmove(&e[0], &e[i], (mt->idx_cache.nentries - i) * sizeof(*e)); - mt->idx_cache.nentries -= i; - mt->block_written++; - - pthread_mutex_unlock(&mt->idx_m); - return 0; -} - -void bgzf_index_destroy(BGZF *fp); -int bgzf_index_add_block(BGZF *fp); -static int mt_destroy(mtaux_t *mt); - -static inline void packInt16(uint8_t *buffer, uint16_t value) -{ - buffer[0] = value; - buffer[1] = value >> 8; -} - -static inline int unpackInt16(const uint8_t *buffer) -{ - return buffer[0] | buffer[1] << 8; -} - -static inline void packInt32(uint8_t *buffer, uint32_t value) -{ - buffer[0] = value; - buffer[1] = value >> 8; - buffer[2] = value >> 16; - buffer[3] = value >> 24; -} - -static void razf_info(hFILE *hfp, const char *filename) -{ - uint64_t usize, csize; - off_t sizes_pos; - - if (filename == NULL || strcmp(filename, "-") == 0) filename = "FILE"; - - // RAZF files end with USIZE,CSIZE stored as big-endian uint64_t - if ((sizes_pos = hseek(hfp, -16, SEEK_END)) < 0) goto no_sizes; - if (hread(hfp, &usize, 8) != 8 || hread(hfp, &csize, 8) != 8) goto no_sizes; - if (!ed_is_big()) ed_swap_8p(&usize), ed_swap_8p(&csize); - if (csize >= sizes_pos) goto no_sizes; // Very basic validity check - - hts_log_error( -"To decompress this file, use the following commands:\n" -" truncate -s %" PRIu64 " %s\n" -" gunzip %s\n" -"The resulting uncompressed file should be %" PRIu64 " bytes in length.\n" -"If you do not have a truncate command, skip that step (though gunzip will\n" -"likely produce a \"trailing garbage ignored\" message, which can be ignored).", - csize, filename, filename, usize); - return; - -no_sizes: - hts_log_error( -"To decompress this file, use the following command:\n" -" gunzip %s\n" -"This will likely produce a \"trailing garbage ignored\" message, which can\n" -"usually be safely ignored.", filename); -} - -static const char *bgzf_zerr(int errnum, z_stream *zs) -{ - static char buffer[32]; - - /* Return zs->msg if available. - zlib doesn't set this very reliably. Looking at the source suggests - that it may get set to a useful message for deflateInit2, inflateInit2 - and inflate when it returns Z_DATA_ERROR. For inflate with other - return codes, deflate, deflateEnd and inflateEnd it doesn't appear - to be useful. For the likely non-useful cases, the caller should - pass NULL into zs. */ - - if (zs && zs->msg) return zs->msg; - - // gzerror OF((gzFile file, int *errnum) - switch (errnum) { - case Z_ERRNO: - return strerror(errno); - case Z_STREAM_ERROR: - return "invalid parameter/compression level, or inconsistent stream state"; - case Z_DATA_ERROR: - return "invalid or incomplete IO"; - case Z_MEM_ERROR: - return "out of memory"; - case Z_BUF_ERROR: - return "progress temporarily not possible, or in() / out() returned an error"; - case Z_VERSION_ERROR: - return "zlib version mismatch"; - case Z_NEED_DICT: - return "data was compressed using a dictionary"; - case Z_OK: // 0: maybe gzgets error Z_NULL - default: - snprintf(buffer, sizeof(buffer), "[%d] unknown", errnum); - return buffer; // FIXME: Not thread-safe. - } -} - -static BGZF *bgzf_read_init(hFILE *hfpr, const char *filename) -{ - BGZF *fp; - uint8_t magic[18]; - ssize_t n = hpeek(hfpr, magic, 18); - if (n < 0) return NULL; - - fp = (BGZF*)calloc(1, sizeof(BGZF)); - if (fp == NULL) return NULL; - - fp->is_write = 0; - fp->uncompressed_block = malloc(2 * BGZF_MAX_BLOCK_SIZE); - if (fp->uncompressed_block == NULL) { free(fp); return NULL; } - fp->compressed_block = (char *)fp->uncompressed_block + BGZF_MAX_BLOCK_SIZE; - fp->is_compressed = (n==18 && magic[0]==0x1f && magic[1]==0x8b); - fp->is_gzip = ( !fp->is_compressed || ((magic[3]&4) && memcmp(&magic[12], "BC\2\0",4)==0) ) ? 0 : 1; - if (fp->is_compressed && (magic[3]&4) && memcmp(&magic[12], "RAZF", 4)==0) { - hts_log_error("Cannot decompress legacy RAZF format"); - razf_info(hfpr, filename); - free(fp->uncompressed_block); - free(fp); - errno = EFTYPE; - return NULL; - } -#ifdef BGZF_CACHE - if (!(fp->cache = malloc(sizeof(*fp->cache)))) { - free(fp->uncompressed_block); - free(fp); - return NULL; - } - if (!(fp->cache->h = kh_init(cache))) { - free(fp->uncompressed_block); - free(fp->cache); - free(fp); - return NULL; - } - fp->cache->last_pos = 0; -#endif - return fp; -} - -// get the compress level from the mode string: compress_level==-1 for the default level, -2 plain uncompressed -static int mode2level(const char *mode) -{ - int i, compress_level = -1; - for (i = 0; mode[i]; ++i) - if (mode[i] >= '0' && mode[i] <= '9') break; - if (mode[i]) compress_level = (int)mode[i] - '0'; - if (strchr(mode, 'u')) compress_level = -2; - return compress_level; -} -static BGZF *bgzf_write_init(const char *mode) -{ - BGZF *fp; - fp = (BGZF*)calloc(1, sizeof(BGZF)); - if (fp == NULL) goto mem_fail; - fp->is_write = 1; - int compress_level = mode2level(mode); - if ( compress_level==-2 ) - { - fp->is_compressed = 0; - return fp; - } - fp->is_compressed = 1; - - fp->uncompressed_block = malloc(2 * BGZF_MAX_BLOCK_SIZE); - if (fp->uncompressed_block == NULL) goto mem_fail; - fp->compressed_block = (char *)fp->uncompressed_block + BGZF_MAX_BLOCK_SIZE; - - fp->compress_level = compress_level < 0? Z_DEFAULT_COMPRESSION : compress_level; // Z_DEFAULT_COMPRESSION==-1 - if (fp->compress_level > 9) fp->compress_level = Z_DEFAULT_COMPRESSION; - if ( strchr(mode,'g') ) - { - // gzip output - fp->is_gzip = 1; - fp->gz_stream = (z_stream*)calloc(1,sizeof(z_stream)); - if (fp->gz_stream == NULL) goto mem_fail; - fp->gz_stream->zalloc = NULL; - fp->gz_stream->zfree = NULL; - fp->gz_stream->msg = NULL; - - int ret = deflateInit2(fp->gz_stream, fp->compress_level, Z_DEFLATED, 15|16, 8, Z_DEFAULT_STRATEGY); - if (ret!=Z_OK) { - hts_log_error("Call to deflateInit2 failed: %s", bgzf_zerr(ret, fp->gz_stream)); - goto fail; - } - } - return fp; - -mem_fail: - hts_log_error("%s", strerror(errno)); - -fail: - if (fp != NULL) { - free(fp->uncompressed_block); - free(fp->gz_stream); - free(fp); - } - return NULL; -} - -BGZF *bgzf_open(const char *path, const char *mode) -{ - BGZF *fp = 0; - if (strchr(mode, 'r')) { - hFILE *fpr; - if ((fpr = hopen(path, mode)) == 0) return 0; - fp = bgzf_read_init(fpr, path); - if (fp == 0) { hclose_abruptly(fpr); return NULL; } - fp->fp = fpr; - } else if (strchr(mode, 'w') || strchr(mode, 'a')) { - hFILE *fpw; - if ((fpw = hopen(path, mode)) == 0) return 0; - fp = bgzf_write_init(mode); - if (fp == NULL) return NULL; - fp->fp = fpw; - } - else { errno = EINVAL; return 0; } - - fp->is_be = ed_is_big(); - return fp; -} - -BGZF *bgzf_dopen(int fd, const char *mode) -{ - BGZF *fp = 0; - if (strchr(mode, 'r')) { - hFILE *fpr; - if ((fpr = hdopen(fd, mode)) == 0) return 0; - fp = bgzf_read_init(fpr, NULL); - if (fp == 0) { hclose_abruptly(fpr); return NULL; } // FIXME this closes fd - fp->fp = fpr; - } else if (strchr(mode, 'w') || strchr(mode, 'a')) { - hFILE *fpw; - if ((fpw = hdopen(fd, mode)) == 0) return 0; - fp = bgzf_write_init(mode); - if (fp == NULL) return NULL; - fp->fp = fpw; - } - else { errno = EINVAL; return 0; } - - fp->is_be = ed_is_big(); - return fp; -} - -BGZF *bgzf_hopen(hFILE *hfp, const char *mode) -{ - BGZF *fp = NULL; - if (strchr(mode, 'r')) { - fp = bgzf_read_init(hfp, NULL); - if (fp == NULL) return NULL; - } else if (strchr(mode, 'w') || strchr(mode, 'a')) { - fp = bgzf_write_init(mode); - if (fp == NULL) return NULL; - } - else { errno = EINVAL; return 0; } - - fp->fp = hfp; - fp->is_be = ed_is_big(); - return fp; -} - -#ifdef HAVE_LIBDEFLATE -int bgzf_compress(void *_dst, size_t *dlen, const void *src, size_t slen, int level) -{ - if (slen == 0) { - // EOF block - if (*dlen < 28) return -1; - memcpy(_dst, "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0", 28); - *dlen = 28; - return 0; - } - - uint8_t *dst = (uint8_t*)_dst; - - if (level == 0) { - // Uncompressed data - if (*dlen < slen+5 + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH) return -1; - dst[BLOCK_HEADER_LENGTH] = 1; // BFINAL=1, BTYPE=00; see RFC1951 - u16_to_le(slen, &dst[BLOCK_HEADER_LENGTH+1]); // length - u16_to_le(~slen, &dst[BLOCK_HEADER_LENGTH+3]); // ones-complement length - memcpy(dst + BLOCK_HEADER_LENGTH+5, src, slen); - *dlen = slen+5 + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH; - - } else { - level = level > 0 ? level : 6; // libdeflate doesn't honour -1 as default - // NB levels go up to 12 here. - int lvl_map[] = {0,1,2,3,5,6,7,8,10,12}; - level = lvl_map[level>9 ?9 :level]; - struct libdeflate_compressor *z = libdeflate_alloc_compressor(level); - if (!z) return -1; - - // Raw deflate - size_t clen = - libdeflate_deflate_compress(z, src, slen, - dst + BLOCK_HEADER_LENGTH, - *dlen - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH); - - if (clen <= 0) { - hts_log_error("Call to libdeflate_deflate_compress failed"); - libdeflate_free_compressor(z); - return -1; - } - - *dlen = clen + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH; - - libdeflate_free_compressor(z); - } - - // write the header - memcpy(dst, g_magic, BLOCK_HEADER_LENGTH); // the last two bytes are a place holder for the length of the block - packInt16(&dst[16], *dlen - 1); // write the compressed length; -1 to fit 2 bytes - - // write the footer - uint32_t crc = libdeflate_crc32(0, src, slen); - packInt32((uint8_t*)&dst[*dlen - 8], crc); - packInt32((uint8_t*)&dst[*dlen - 4], slen); - return 0; -} - -#else - -int bgzf_compress(void *_dst, size_t *dlen, const void *src, size_t slen, int level) -{ - uint32_t crc; - z_stream zs; - uint8_t *dst = (uint8_t*)_dst; - - if (level == 0) { - uncomp: - // Uncompressed data - if (*dlen < slen+5 + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH) return -1; - dst[BLOCK_HEADER_LENGTH] = 1; // BFINAL=1, BTYPE=00; see RFC1951 - u16_to_le(slen, &dst[BLOCK_HEADER_LENGTH+1]); // length - u16_to_le(~slen, &dst[BLOCK_HEADER_LENGTH+3]); // ones-complement length - memcpy(dst + BLOCK_HEADER_LENGTH+5, src, slen); - *dlen = slen+5 + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH; - } else { - // compress the body - zs.zalloc = NULL; zs.zfree = NULL; - zs.msg = NULL; - zs.next_in = (Bytef*)src; - zs.avail_in = slen; - zs.next_out = dst + BLOCK_HEADER_LENGTH; - zs.avail_out = *dlen - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH; - int ret = deflateInit2(&zs, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY); // -15 to disable zlib header/footer - if (ret!=Z_OK) { - hts_log_error("Call to deflateInit2 failed: %s", bgzf_zerr(ret, &zs)); - return -1; - } - if ((ret = deflate(&zs, Z_FINISH)) != Z_STREAM_END) { - if (ret == Z_OK && zs.avail_out == 0) { - deflateEnd(&zs); - goto uncomp; - } else { - hts_log_error("Deflate operation failed: %s", bgzf_zerr(ret, ret == Z_DATA_ERROR ? &zs : NULL)); - } - return -1; - } - // If we used up the entire output buffer, then we either ran out of - // room or we *just* fitted, but either way we may as well store - // uncompressed for faster decode. - if (zs.avail_out == 0) { - deflateEnd(&zs); - goto uncomp; - } - if ((ret = deflateEnd(&zs)) != Z_OK) { - hts_log_error("Call to deflateEnd failed: %s", bgzf_zerr(ret, NULL)); - return -1; - } - *dlen = zs.total_out + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH; - } - - // write the header - memcpy(dst, g_magic, BLOCK_HEADER_LENGTH); // the last two bytes are a place holder for the length of the block - packInt16(&dst[16], *dlen - 1); // write the compressed length; -1 to fit 2 bytes - // write the footer - crc = crc32(crc32(0L, NULL, 0L), (Bytef*)src, slen); - packInt32((uint8_t*)&dst[*dlen - 8], crc); - packInt32((uint8_t*)&dst[*dlen - 4], slen); - return 0; -} -#endif // HAVE_LIBDEFLATE - -static int bgzf_gzip_compress(BGZF *fp, void *_dst, size_t *dlen, const void *src, size_t slen, int level) -{ - uint8_t *dst = (uint8_t*)_dst; - z_stream *zs = fp->gz_stream; - int flush = slen ? Z_PARTIAL_FLUSH : Z_FINISH; - zs->next_in = (Bytef*)src; - zs->avail_in = slen; - zs->next_out = dst; - zs->avail_out = *dlen; - int ret = deflate(zs, flush); - if (ret == Z_STREAM_ERROR) { - hts_log_error("Deflate operation failed: %s", bgzf_zerr(ret, NULL)); - return -1; - } - if (zs->avail_in != 0) { - hts_log_error("Deflate block too large for output buffer"); - return -1; - } - *dlen = *dlen - zs->avail_out; - return 0; -} - -// Deflate the block in fp->uncompressed_block into fp->compressed_block. Also adds an extra field that stores the compressed block length. -static int deflate_block(BGZF *fp, int block_length) -{ - size_t comp_size = BGZF_MAX_BLOCK_SIZE; - int ret; - if ( !fp->is_gzip ) - ret = bgzf_compress(fp->compressed_block, &comp_size, fp->uncompressed_block, block_length, fp->compress_level); - else - ret = bgzf_gzip_compress(fp, fp->compressed_block, &comp_size, fp->uncompressed_block, block_length, fp->compress_level); - - if ( ret != 0 ) - { - hts_log_debug("Compression error %d", ret); - fp->errcode |= BGZF_ERR_ZLIB; - return -1; - } - fp->block_offset = 0; - return comp_size; -} - -#ifdef HAVE_LIBDEFLATE - -static int bgzf_uncompress(uint8_t *dst, size_t *dlen, - const uint8_t *src, size_t slen, - uint32_t expected_crc) { - struct libdeflate_decompressor *z = libdeflate_alloc_decompressor(); - if (!z) { - hts_log_error("Call to libdeflate_alloc_decompressor failed"); - return -1; - } - - int ret = libdeflate_deflate_decompress(z, src, slen, dst, *dlen, dlen); - libdeflate_free_decompressor(z); - - if (ret != LIBDEFLATE_SUCCESS) { - hts_log_error("Inflate operation failed: %d", ret); - return -1; - } - - uint32_t crc = libdeflate_crc32(0, (unsigned char *)dst, *dlen); -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - // Pretend the CRC was OK so the fuzzer doesn't have to get it right - crc = expected_crc; -#endif - if (crc != expected_crc) { - hts_log_error("CRC32 checksum mismatch"); - return -2; - } - - return 0; -} - -#else - -static int bgzf_uncompress(uint8_t *dst, size_t *dlen, - const uint8_t *src, size_t slen, - uint32_t expected_crc) { - z_stream zs = { - .zalloc = NULL, - .zfree = NULL, - .msg = NULL, - .next_in = (Bytef*)src, - .avail_in = slen, - .next_out = (Bytef*)dst, - .avail_out = *dlen - }; - - int ret = inflateInit2(&zs, -15); - if (ret != Z_OK) { - hts_log_error("Call to inflateInit2 failed: %s", bgzf_zerr(ret, &zs)); - return -1; - } - if ((ret = inflate(&zs, Z_FINISH)) != Z_STREAM_END) { - hts_log_error("Inflate operation failed: %s", bgzf_zerr(ret, ret == Z_DATA_ERROR ? &zs : NULL)); - if ((ret = inflateEnd(&zs)) != Z_OK) { - hts_log_warning("Call to inflateEnd failed: %s", bgzf_zerr(ret, NULL)); - } - return -1; - } - if ((ret = inflateEnd(&zs)) != Z_OK) { - hts_log_error("Call to inflateEnd failed: %s", bgzf_zerr(ret, NULL)); - return -1; - } - *dlen = *dlen - zs.avail_out; - - uint32_t crc = crc32(crc32(0L, NULL, 0L), (unsigned char *)dst, *dlen); -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - // Pretend the CRC was OK so the fuzzer doesn't have to get it right - crc = expected_crc; -#endif - if (crc != expected_crc) { - hts_log_error("CRC32 checksum mismatch"); - return -2; - } - - return 0; -} -#endif // HAVE_LIBDEFLATE - -// Inflate the block in fp->compressed_block into fp->uncompressed_block -static int inflate_block(BGZF* fp, int block_length) -{ - size_t dlen = BGZF_MAX_BLOCK_SIZE; - uint32_t crc = le_to_u32((uint8_t *)fp->compressed_block + block_length-8); - int ret = bgzf_uncompress(fp->uncompressed_block, &dlen, - (Bytef*)fp->compressed_block + 18, - block_length - 18, crc); - if (ret < 0) { - if (ret == -2) - fp->errcode |= BGZF_ERR_CRC; - else - fp->errcode |= BGZF_ERR_ZLIB; - return -1; - } - - return dlen; -} - -// Decompress the next part of a non-blocked GZIP file. -// Return the number of uncompressed bytes read, 0 on EOF, or a negative number on error. -// Will fill the output buffer unless the end of the GZIP file is reached. -static int inflate_gzip_block(BGZF *fp) -{ - // we will set this to true when we detect EOF, so we don't bang against the EOF more than once per call - int input_eof = 0; - - // write to the part of the output buffer after block_offset - fp->gz_stream->next_out = (Bytef*)fp->uncompressed_block + fp->block_offset; - fp->gz_stream->avail_out = BGZF_MAX_BLOCK_SIZE - fp->block_offset; - - while ( fp->gz_stream->avail_out != 0 ) { - // until we fill the output buffer (or hit EOF) - - if ( !input_eof && fp->gz_stream->avail_in == 0 ) { - // we are out of input data in the buffer. Get more. - fp->gz_stream->next_in = fp->compressed_block; - int ret = hread(fp->fp, fp->compressed_block, BGZF_BLOCK_SIZE); - if ( ret < 0 ) { - // hread had an error. Pass it on. - return ret; - } - fp->gz_stream->avail_in = ret; - if ( fp->gz_stream->avail_in < BGZF_BLOCK_SIZE ) { - // we have reached EOF but the decompressor hasn't necessarily - input_eof = 1; - } - } - - fp->gz_stream->msg = NULL; - // decompress as much data as we can - int ret = inflate(fp->gz_stream, Z_SYNC_FLUSH); - - if ( (ret < 0 && ret != Z_BUF_ERROR) || ret == Z_NEED_DICT ) { - // an error occurred, other than running out of space - hts_log_error("Inflate operation failed: %s", bgzf_zerr(ret, ret == Z_DATA_ERROR ? fp->gz_stream : NULL)); - fp->errcode |= BGZF_ERR_ZLIB; - return -1; - } else if ( ret == Z_STREAM_END ) { - // we finished a GZIP member - - // scratch for peeking to see if the file is over - char c; - if (fp->gz_stream->avail_in > 0 || hpeek(fp->fp, &c, 1) == 1) { - // there is more data; try and read another GZIP member in the remaining data - int reset_ret = inflateReset(fp->gz_stream); - if (reset_ret != Z_OK) { - hts_log_error("Call to inflateReset failed: %s", bgzf_zerr(reset_ret, NULL)); - fp->errcode |= BGZF_ERR_ZLIB; - return -1; - } - } else { - // we consumed all the input data and hit Z_STREAM_END - // so stop looping, even if we never fill the output buffer - break; - } - } else if ( ret == Z_BUF_ERROR && input_eof && fp->gz_stream->avail_out > 0 ) { - // the gzip file has ended prematurely - hts_log_error("Gzip file truncated"); - fp->errcode |= BGZF_ERR_IO; - return -1; - } - } - - // when we get here, the buffer is full or there is an EOF after a complete gzip member - return BGZF_MAX_BLOCK_SIZE - fp->gz_stream->avail_out; -} - -// Returns: 0 on success (BGZF header); -1 on non-BGZF GZIP header; -2 on error -static int check_header(const uint8_t *header) -{ - if ( header[0] != 31 || header[1] != 139 || header[2] != 8 ) return -2; - return ((header[3] & 4) != 0 - && unpackInt16((uint8_t*)&header[10]) == 6 - && header[12] == 'B' && header[13] == 'C' - && unpackInt16((uint8_t*)&header[14]) == 2) ? 0 : -1; -} - -#ifdef BGZF_CACHE -static void free_cache(BGZF *fp) -{ - khint_t k; - if (fp->is_write) return; - khash_t(cache) *h = fp->cache->h; - for (k = kh_begin(h); k < kh_end(h); ++k) - if (kh_exist(h, k)) free(kh_val(h, k).block); - kh_destroy(cache, h); - free(fp->cache); -} - -static int load_block_from_cache(BGZF *fp, int64_t block_address) -{ - khint_t k; - cache_t *p; - - khash_t(cache) *h = fp->cache->h; - k = kh_get(cache, h, block_address); - if (k == kh_end(h)) return 0; - p = &kh_val(h, k); - if (fp->block_length != 0) fp->block_offset = 0; - fp->block_address = block_address; - fp->block_length = p->size; - memcpy(fp->uncompressed_block, p->block, p->size); - if ( hseek(fp->fp, p->end_offset, SEEK_SET) < 0 ) - { - // todo: move the error up - hts_log_error("Could not hseek to %" PRId64, p->end_offset); - exit(1); - } - return p->size; -} - -static void cache_block(BGZF *fp, int size) -{ - int ret; - khint_t k, k_orig; - uint8_t *block = NULL; - cache_t *p; - //fprintf(stderr, "Cache block at %llx\n", (int)fp->block_address); - khash_t(cache) *h = fp->cache->h; - if (BGZF_MAX_BLOCK_SIZE >= fp->cache_size) return; - if (fp->block_length < 0 || fp->block_length > BGZF_MAX_BLOCK_SIZE) return; - if ((kh_size(h) + 1) * BGZF_MAX_BLOCK_SIZE > (uint32_t)fp->cache_size) { - /* Remove uniformly from any position in the hash by a simple - * round-robin approach. An alternative strategy would be to - * remove the least recently accessed block, but the round-robin - * removal is simpler and is not expected to have a big impact - * on performance */ - if (fp->cache->last_pos >= kh_end(h)) fp->cache->last_pos = kh_begin(h); - k_orig = k = fp->cache->last_pos; - if (++k >= kh_end(h)) k = kh_begin(h); - while (k != k_orig) { - if (kh_exist(h, k)) - break; - if (++k == kh_end(h)) - k = kh_begin(h); - } - fp->cache->last_pos = k; - - if (k != k_orig) { - block = kh_val(h, k).block; - kh_del(cache, h, k); - } - } else { - block = (uint8_t*)malloc(BGZF_MAX_BLOCK_SIZE); - } - if (!block) return; - k = kh_put(cache, h, fp->block_address, &ret); - if (ret <= 0) { // kh_put failed, or in there already (shouldn't happen) - free(block); - return; - } - p = &kh_val(h, k); - p->size = fp->block_length; - p->end_offset = fp->block_address + size; - p->block = block; - memcpy(p->block, fp->uncompressed_block, p->size); -} -#else -static void free_cache(BGZF *fp) {} -static int load_block_from_cache(BGZF *fp, int64_t block_address) {return 0;} -static void cache_block(BGZF *fp, int size) {} -#endif - -/* - * Absolute htell in this compressed file. - * - * Do not confuse with the external bgzf_tell macro which returns the virtual - * offset. - */ -static off_t bgzf_htell(BGZF *fp) { - if (fp->mt) { - pthread_mutex_lock(&fp->mt->job_pool_m); - off_t pos = fp->block_address + fp->block_clength; - pthread_mutex_unlock(&fp->mt->job_pool_m); - return pos; - } else { - return htell(fp->fp); - } -} - -int bgzf_read_block(BGZF *fp) -{ - hts_tpool_result *r; - - if (fp->errcode) return -1; - - if (fp->mt) { - again: - if (fp->mt->hit_eof) { - // Further reading at EOF will always return 0 - fp->block_length = 0; - return 0; - } - r = hts_tpool_next_result_wait(fp->mt->out_queue); - bgzf_job *j = r ? (bgzf_job *)hts_tpool_result_data(r) : NULL; - - if (!j || j->errcode == BGZF_ERR_MT) { - if (!fp->mt->free_block) { - fp->uncompressed_block = malloc(2 * BGZF_MAX_BLOCK_SIZE); - if (fp->uncompressed_block == NULL) return -1; - fp->compressed_block = (char *)fp->uncompressed_block + BGZF_MAX_BLOCK_SIZE; - } // else it's already allocated with malloc, maybe even in-use. - if (mt_destroy(fp->mt) < 0) { - fp->errcode = BGZF_ERR_IO; - } - fp->mt = NULL; - hts_tpool_delete_result(r, 0); - if (fp->errcode) { - return -1; - } - goto single_threaded; - } - - if (j->errcode) { - fp->errcode = j->errcode; - hts_log_error("BGZF decode jobs returned error %d " - "for block offset %"PRId64, - j->errcode, j->block_address); - hts_tpool_delete_result(r, 0); - return -1; - } - - if (j->hit_eof) { - if (!fp->last_block_eof && !fp->no_eof_block) { - fp->no_eof_block = 1; - hts_log_warning("EOF marker is absent. The input may be truncated"); - } - fp->mt->hit_eof = 1; - } - - // Zero length blocks in the middle of a file are (wrongly) - // considered as EOF by many callers. We work around this by - // trying again to see if we hit a genuine EOF. - if (!j->hit_eof && j->uncomp_len == 0) { - fp->last_block_eof = 1; - hts_tpool_delete_result(r, 0); - goto again; - } - - // block_length=0 and block_offset set by bgzf_seek. - if (fp->block_length != 0) fp->block_offset = 0; - if (!j->hit_eof) fp->block_address = j->block_address; - fp->block_clength = j->comp_len; - fp->block_length = j->uncomp_len; - // bgzf_read() can change fp->block_length - fp->last_block_eof = (fp->block_length == 0); - - if ( j->uncomp_len && j->fp->idx_build_otf ) - { - bgzf_index_add_block(j->fp); - j->fp->idx->ublock_addr += j->uncomp_len; - } - - // Steal the data block as it's quicker than a memcpy. - // We just need to make sure we delay the pool free. - if (fp->mt->curr_job) { - pthread_mutex_lock(&fp->mt->job_pool_m); - pool_free(fp->mt->job_pool, fp->mt->curr_job); - pthread_mutex_unlock(&fp->mt->job_pool_m); - } - fp->uncompressed_block = j->uncomp_data; - fp->mt->curr_job = j; - if (fp->mt->free_block) { - free(fp->mt->free_block); // clear up last non-mt block - fp->mt->free_block = NULL; - } - - hts_tpool_delete_result(r, 0); - return 0; - } - - uint8_t header[BLOCK_HEADER_LENGTH], *compressed_block; - int count, size, block_length, remaining; - - single_threaded: - size = 0; - - int64_t block_address; - block_address = bgzf_htell(fp); - - // Reading an uncompressed file - if ( !fp->is_compressed ) - { - count = hread(fp->fp, fp->uncompressed_block, BGZF_MAX_BLOCK_SIZE); - if (count < 0) // Error - { - hts_log_error("Failed to read uncompressed data " - "at offset %"PRId64"%s%s", - block_address, errno ? ": " : "", strerror(errno)); - fp->errcode |= BGZF_ERR_IO; - return -1; - } - else if (count == 0) // EOF - { - fp->block_length = 0; - return 0; - } - if (fp->block_length != 0) fp->block_offset = 0; - fp->block_address = block_address; - fp->block_length = count; - return 0; - } - - // Reading compressed file - if ( fp->is_gzip && fp->gz_stream ) // is this is an initialized gzip stream? - { - count = inflate_gzip_block(fp); - if ( count<0 ) - { - hts_log_error("Reading GZIP stream failed at offset %"PRId64, - block_address); - fp->errcode |= BGZF_ERR_ZLIB; - return -1; - } - fp->block_length = count; - fp->block_address = block_address; - return 0; - } - if (fp->cache_size && load_block_from_cache(fp, block_address)) return 0; - - // loop to skip empty bgzf blocks - while (1) - { - count = hread(fp->fp, header, sizeof(header)); - if (count == 0) { // no data read - if (!fp->last_block_eof && !fp->no_eof_block && !fp->is_gzip) { - fp->no_eof_block = 1; - hts_log_warning("EOF marker is absent. The input may be truncated"); - } - fp->block_length = 0; - return 0; - } - int ret = 0; - if ( count != sizeof(header) || (ret=check_header(header))==-2 ) - { - fp->errcode |= BGZF_ERR_HEADER; - hts_log_error("%s BGZF header at offset %"PRId64, - ret ? "Invalid" : "Failed to read", - block_address); - return -1; - } - if ( ret==-1 ) - { - // GZIP, not BGZF - uint8_t *cblock = (uint8_t*)fp->compressed_block; - memcpy(cblock, header, sizeof(header)); - count = hread(fp->fp, cblock+sizeof(header), BGZF_BLOCK_SIZE - sizeof(header)) + sizeof(header); - - fp->is_gzip = 1; - fp->gz_stream = (z_stream*) calloc(1,sizeof(z_stream)); - // Set up zlib, using a window size of 15, and its built-in GZIP header processing (+16). - int ret = inflateInit2(fp->gz_stream, 15 + 16); - if (ret != Z_OK) - { - hts_log_error("Call to inflateInit2 failed: %s", bgzf_zerr(ret, fp->gz_stream)); - fp->errcode |= BGZF_ERR_ZLIB; - return -1; - } - fp->gz_stream->avail_in = count; - fp->gz_stream->next_in = cblock; - count = inflate_gzip_block(fp); - if ( count<0 ) - { - hts_log_error("Reading GZIP stream failed at offset %"PRId64, - block_address); - fp->errcode |= BGZF_ERR_ZLIB; - return -1; - } - fp->block_length = count; - fp->block_address = block_address; - if ( fp->idx_build_otf ) return -1; // cannot build index for gzip - return 0; - } - size = count; - block_length = unpackInt16((uint8_t*)&header[16]) + 1; // +1 because when writing this number, we used "-1" - if (block_length < BLOCK_HEADER_LENGTH) - { - hts_log_error("Invalid BGZF block length at offset %"PRId64, - block_address); - fp->errcode |= BGZF_ERR_HEADER; - return -1; - } - compressed_block = (uint8_t*)fp->compressed_block; - memcpy(compressed_block, header, BLOCK_HEADER_LENGTH); - remaining = block_length - BLOCK_HEADER_LENGTH; - count = hread(fp->fp, &compressed_block[BLOCK_HEADER_LENGTH], remaining); - if (count != remaining) { - hts_log_error("Failed to read BGZF block data at offset %"PRId64 - " expected %d bytes; hread returned %d", - block_address, remaining, count); - fp->errcode |= BGZF_ERR_IO; - return -1; - } - size += count; - if ((count = inflate_block(fp, block_length)) < 0) { - hts_log_debug("Inflate block operation failed for " - "block at offset %"PRId64": %s", - block_address, bgzf_zerr(count, NULL)); - fp->errcode |= BGZF_ERR_ZLIB; - return -1; - } - fp->last_block_eof = (count == 0); - if ( count ) break; // otherwise an empty bgzf block - block_address = bgzf_htell(fp); // update for new block start - } - if (fp->block_length != 0) fp->block_offset = 0; // Do not reset offset if this read follows a seek. - fp->block_address = block_address; - fp->block_length = count; - if ( fp->idx_build_otf ) - { - bgzf_index_add_block(fp); - fp->idx->ublock_addr += count; - } - cache_block(fp, size); - return 0; -} - -ssize_t bgzf_read(BGZF *fp, void *data, size_t length) -{ - ssize_t bytes_read = 0; - uint8_t *output = (uint8_t*)data; - if (length <= 0) return 0; - assert(fp->is_write == 0); - while (bytes_read < length) { - int copy_length, available = fp->block_length - fp->block_offset; - uint8_t *buffer; - if (available <= 0) { - int ret = bgzf_read_block(fp); - if (ret != 0) { - hts_log_error("Read block operation failed with error %d after %zd of %zu bytes", fp->errcode, bytes_read, length); - fp->errcode |= BGZF_ERR_ZLIB; - return -1; - } - available = fp->block_length - fp->block_offset; - if (available == 0) { - if (fp->block_length == 0) - break; // EOF - - // Offset was at end of block (see commit e9863a0) - fp->block_address = bgzf_htell(fp); - fp->block_offset = fp->block_length = 0; - continue; - } else if (available < 0) { - // Block offset was set to an invalid coordinate - hts_log_error("BGZF block offset %d set beyond block size %d", - fp->block_offset, fp->block_length); - fp->errcode |= BGZF_ERR_MISUSE; - return -1; - } - } - copy_length = length - bytes_read < available? length - bytes_read : available; - buffer = (uint8_t*)fp->uncompressed_block; - memcpy(output, buffer + fp->block_offset, copy_length); - fp->block_offset += copy_length; - output += copy_length; - bytes_read += copy_length; - - // For raw gzip streams this avoids short reads. - if (fp->block_offset == fp->block_length) { - fp->block_address = bgzf_htell(fp); - fp->block_offset = fp->block_length = 0; - } - } - - fp->uncompressed_address += bytes_read; - - return bytes_read; -} - -// -1 for EOF, -2 for error, 0-255 for byte. -int bgzf_peek(BGZF *fp) { - int available = fp->block_length - fp->block_offset; - if (available <= 0) { - if (bgzf_read_block(fp) < 0) { - hts_log_error("Read block operation failed with error %d", fp->errcode); - fp->errcode = BGZF_ERR_ZLIB; - return -2; - } - } - available = fp->block_length - fp->block_offset; - if (available) - return ((unsigned char *)fp->uncompressed_block)[fp->block_offset]; - - return -1; -} - -ssize_t bgzf_raw_read(BGZF *fp, void *data, size_t length) -{ - ssize_t ret = hread(fp->fp, data, length); - if (ret < 0) fp->errcode |= BGZF_ERR_IO; - return ret; -} - -#ifdef BGZF_MT - -/* Function to clean up when jobs are discarded (e.g. during seek) - * This works for results too, as results are the same struct with - * decompressed data stored in it. */ -static void job_cleanup(void *arg) { - bgzf_job *j = (bgzf_job *)arg; - mtaux_t *mt = j->fp->mt; - pthread_mutex_lock(&mt->job_pool_m); - pool_free(mt->job_pool, j); - pthread_mutex_unlock(&mt->job_pool_m); -} - -static void *bgzf_encode_func(void *arg) { - bgzf_job *j = (bgzf_job *)arg; - - j->comp_len = BGZF_MAX_BLOCK_SIZE; - int ret = bgzf_compress(j->comp_data, &j->comp_len, - j->uncomp_data, j->uncomp_len, - j->fp->compress_level); - if (ret != 0) - j->errcode |= BGZF_ERR_ZLIB; - - return arg; -} - -// Optimisation for compression level 0 (uncompressed deflate blocks) -// Avoids memcpy of the data from uncompressed to compressed buffer. -static void *bgzf_encode_level0_func(void *arg) { - bgzf_job *j = (bgzf_job *)arg; - uint32_t crc; - j->comp_len = j->uncomp_len + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH + 5; - - // Data will have already been copied in to - // j->comp_data + BLOCK_HEADER_LENGTH + 5 - - // Add preamble - memcpy(j->comp_data, g_magic, BLOCK_HEADER_LENGTH); - u16_to_le(j->comp_len-1, j->comp_data + 16); - - // Deflate uncompressed data header - j->comp_data[BLOCK_HEADER_LENGTH] = 1; // BFINAL=1, BTYPE=00; see RFC1951 - u16_to_le(j->uncomp_len, j->comp_data + BLOCK_HEADER_LENGTH + 1); - u16_to_le(~j->uncomp_len, j->comp_data + BLOCK_HEADER_LENGTH + 3); - - // Trailer (CRC, uncompressed length) -#ifdef HAVE_LIBDEFLATE - crc = libdeflate_crc32(0, j->comp_data + BLOCK_HEADER_LENGTH + 5, - j->uncomp_len); -#else - crc = crc32(crc32(0L, NULL, 0L), - (Bytef*)j->comp_data + BLOCK_HEADER_LENGTH + 5, j->uncomp_len); -#endif - u32_to_le(crc, j->comp_data + j->comp_len - 8); - u32_to_le(j->uncomp_len, j->comp_data + j->comp_len - 4); - - return arg; -} - -// Our input block has already been decoded by bgzf_mt_read_block(). -// We need to split that into a fetch block (compressed) and make this -// do the actual decompression step. -static void *bgzf_decode_func(void *arg) { - bgzf_job *j = (bgzf_job *)arg; - - j->uncomp_len = BGZF_MAX_BLOCK_SIZE; - uint32_t crc = le_to_u32((uint8_t *)j->comp_data + j->comp_len-8); - int ret = bgzf_uncompress(j->uncomp_data, &j->uncomp_len, - j->comp_data+18, j->comp_len-18, crc); - if (ret != 0) - j->errcode |= BGZF_ERR_ZLIB; - - return arg; -} - -/* - * Nul function so we can dispatch a job with the correct serial - * to mark failure or to indicate an empty read (EOF). - */ -static void *bgzf_nul_func(void *arg) { return arg; } - -/* - * Takes compressed blocks off the results queue and calls hwrite to - * punt them to the output stream. - * - * Returns NULL when no more are left, or -1 on error - */ -static void *bgzf_mt_writer(void *vp) { - BGZF *fp = (BGZF *)vp; - mtaux_t *mt = fp->mt; - hts_tpool_result *r; - - if (fp->idx_build_otf) { - fp->idx->moffs = fp->idx->noffs = 1; - fp->idx->offs = (bgzidx1_t*) calloc(fp->idx->moffs, sizeof(bgzidx1_t)); - if (!fp->idx->offs) goto err; - } - - // Iterates until result queue is shutdown, where it returns NULL. - while ((r = hts_tpool_next_result_wait(mt->out_queue))) { - bgzf_job *j = (bgzf_job *)hts_tpool_result_data(r); - assert(j); - - if (fp->idx_build_otf) { - fp->idx->noffs++; - if ( fp->idx->noffs > fp->idx->moffs ) - { - fp->idx->moffs = fp->idx->noffs; - kroundup32(fp->idx->moffs); - fp->idx->offs = (bgzidx1_t*) realloc(fp->idx->offs, fp->idx->moffs*sizeof(bgzidx1_t)); - if ( !fp->idx->offs ) goto err; - } - fp->idx->offs[ fp->idx->noffs-1 ].uaddr = fp->idx->offs[ fp->idx->noffs-2 ].uaddr + j->uncomp_len; - fp->idx->offs[ fp->idx->noffs-1 ].caddr = fp->idx->offs[ fp->idx->noffs-2 ].caddr + j->comp_len; - } - - // Flush any cached hts_idx_push calls - if (bgzf_idx_flush(fp, j->uncomp_len, j->comp_len) < 0) - goto err; - - if (hwrite(fp->fp, j->comp_data, j->comp_len) != j->comp_len) - goto err; - - // Update our local block_address. Cannot be fp->block_address due to no - // locking in bgzf_tell. - pthread_mutex_lock(&mt->idx_m); - mt->block_address += j->comp_len; - pthread_mutex_unlock(&mt->idx_m); - - /* - * Periodically call hflush (which calls fsync when on a file). - * This avoids the fsync being done at the bgzf_close stage, - * which can sometimes cause significant delays. As this is in - * a separate thread, spreading the sync delays throughout the - * program execution seems better. - * Frequency of 1/512 has been chosen by experimentation - * across local XFS, NFS and Lustre tests. - */ - if (++mt->flush_pending % 512 == 0) - if (hflush(fp->fp) != 0) - goto err; - - - hts_tpool_delete_result(r, 0); - - // Also updated by main thread - pthread_mutex_lock(&mt->job_pool_m); - pool_free(mt->job_pool, j); - mt->jobs_pending--; - pthread_mutex_unlock(&mt->job_pool_m); - } - - if (hflush(fp->fp) != 0) - goto err; - - hts_tpool_process_destroy(mt->out_queue); - - return NULL; - - err: - hts_tpool_process_destroy(mt->out_queue); - return (void *)-1; -} - - -/* - * Reads a compressed block of data using hread and dispatches it to - * the thread pool for decompression. This is the analogue of the old - * non-threaded bgzf_read_block() function, but without modifying fp - * in any way (except for the read offset). All output goes via the - * supplied bgzf_job struct. - * - * Returns NULL when no more are left, or -1 on error - */ -int bgzf_mt_read_block(BGZF *fp, bgzf_job *j) -{ - uint8_t header[BLOCK_HEADER_LENGTH], *compressed_block; - int count, block_length, remaining; - - // NOTE: Guaranteed to be compressed as we block multi-threading in - // uncompressed mode. However it may be gzip compression instead - // of bgzf. - - // Reading compressed file - int64_t block_address; - block_address = htell(fp->fp); - - j->block_address = block_address; // in case we exit with j->errcode - - if (fp->cache_size && load_block_from_cache(fp, block_address)) return 0; - count = hpeek(fp->fp, header, sizeof(header)); - if (count == 0) // no data read - return -1; - int ret; - if ( count != sizeof(header) || (ret=check_header(header))==-2 ) - { - j->errcode |= BGZF_ERR_HEADER; - return -1; - } - if (ret == -1) { - j->errcode |= BGZF_ERR_MT; - return -1; - } - - count = hread(fp->fp, header, sizeof(header)); - if (count != sizeof(header)) // no data read - return -1; - - block_length = unpackInt16((uint8_t*)&header[16]) + 1; // +1 because when writing this number, we used "-1" - if (block_length < BLOCK_HEADER_LENGTH) { - j->errcode |= BGZF_ERR_HEADER; - return -1; - } - compressed_block = (uint8_t*)j->comp_data; - memcpy(compressed_block, header, BLOCK_HEADER_LENGTH); - remaining = block_length - BLOCK_HEADER_LENGTH; - count = hread(fp->fp, &compressed_block[BLOCK_HEADER_LENGTH], remaining); - if (count != remaining) { - j->errcode |= BGZF_ERR_IO; - return -1; - } - j->comp_len = block_length; - j->uncomp_len = BGZF_MAX_BLOCK_SIZE; - j->block_address = block_address; - j->fp = fp; - j->errcode = 0; - - return 0; -} - - -static int bgzf_check_EOF_common(BGZF *fp) -{ - uint8_t buf[28]; - off_t offset = htell(fp->fp); - if (hseek(fp->fp, -28, SEEK_END) < 0) { - if (errno == ESPIPE) { hclearerr(fp->fp); return 2; } -#ifdef _WIN32 - if (errno == EINVAL) { hclearerr(fp->fp); return 2; } -#else - // Assume that EINVAL was due to the file being less than 28 bytes - // long, rather than being a random error return from an hfile backend. - // This should be reported as "no EOF block" rather than an error. - if (errno == EINVAL) { hclearerr(fp->fp); return 0; } -#endif - return -1; - } - if ( hread(fp->fp, buf, 28) != 28 ) return -1; - if ( hseek(fp->fp, offset, SEEK_SET) < 0 ) return -1; - return (memcmp("\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0", buf, 28) == 0)? 1 : 0; -} - -/* - * Checks EOF from the reader thread. - */ -static void bgzf_mt_eof(BGZF *fp) { - mtaux_t *mt = fp->mt; - - pthread_mutex_lock(&mt->job_pool_m); - mt->eof = bgzf_check_EOF_common(fp); - pthread_mutex_unlock(&mt->job_pool_m); - mt->command = HAS_EOF_DONE; - pthread_cond_signal(&mt->command_c); -} - - -/* - * Performs the seek (called by reader thread). - * - * This simply drains the entire queue, throwing away blocks, seeks, - * and starts it up again. Brute force, but maybe sufficient. - */ -static void bgzf_mt_seek(BGZF *fp) { - mtaux_t *mt = fp->mt; - - hts_tpool_process_reset(mt->out_queue, 0); - pthread_mutex_lock(&mt->job_pool_m); - mt->errcode = 0; - - if (hseek(fp->fp, mt->block_address, SEEK_SET) < 0) - mt->errcode = BGZF_ERR_IO; - - pthread_mutex_unlock(&mt->job_pool_m); - mt->command = SEEK_DONE; - pthread_cond_signal(&mt->command_c); -} - -static void *bgzf_mt_reader(void *vp) { - BGZF *fp = (BGZF *)vp; - mtaux_t *mt = fp->mt; - -restart: - pthread_mutex_lock(&mt->job_pool_m); - bgzf_job *j = pool_alloc(mt->job_pool); - pthread_mutex_unlock(&mt->job_pool_m); - if (!j) goto err; - j->errcode = 0; - j->comp_len = 0; - j->uncomp_len = 0; - j->hit_eof = 0; - j->fp = fp; - - while (bgzf_mt_read_block(fp, j) == 0) { - // Dispatch - if (hts_tpool_dispatch3(mt->pool, mt->out_queue, bgzf_decode_func, j, - job_cleanup, job_cleanup, 0) < 0) { - job_cleanup(j); - goto err; - } - - // Check for command - pthread_mutex_lock(&mt->command_m); - switch (mt->command) { - case SEEK: - bgzf_mt_seek(fp); // Sets mt->command to SEEK_DONE - pthread_mutex_unlock(&mt->command_m); - goto restart; - - case HAS_EOF: - bgzf_mt_eof(fp); // Sets mt->command to HAS_EOF_DONE - break; - - case SEEK_DONE: - case HAS_EOF_DONE: - pthread_cond_signal(&mt->command_c); - break; - - case CLOSE: - pthread_cond_signal(&mt->command_c); - pthread_mutex_unlock(&mt->command_m); - hts_tpool_process_destroy(mt->out_queue); - return NULL; - - default: - break; - } - pthread_mutex_unlock(&mt->command_m); - - // Allocate buffer for next block - pthread_mutex_lock(&mt->job_pool_m); - j = pool_alloc(mt->job_pool); - pthread_mutex_unlock(&mt->job_pool_m); - if (!j) { - hts_tpool_process_destroy(mt->out_queue); - return NULL; - } - j->errcode = 0; - j->comp_len = 0; - j->uncomp_len = 0; - j->hit_eof = 0; - j->fp = fp; - } - - if (j->errcode == BGZF_ERR_MT) { - // Attempt to multi-thread decode a raw gzip stream cannot be done. - // We tear down the multi-threaded decoder and revert to the old code. - if (hts_tpool_dispatch3(mt->pool, mt->out_queue, bgzf_nul_func, j, - job_cleanup, job_cleanup, 0) < 0) { - job_cleanup(j); - hts_tpool_process_destroy(mt->out_queue); - return NULL; - } - hts_tpool_process_ref_decr(mt->out_queue); - return &j->errcode; - } - - // Dispatch an empty block so EOF is spotted. - // We also use this mechanism for returning errors, in which case - // j->errcode is set already. - - j->hit_eof = 1; - if (hts_tpool_dispatch3(mt->pool, mt->out_queue, bgzf_nul_func, j, - job_cleanup, job_cleanup, 0) < 0) { - job_cleanup(j); - hts_tpool_process_destroy(mt->out_queue); - return NULL; - } - if (j->errcode != 0) { - hts_tpool_process_destroy(mt->out_queue); - return &j->errcode; - } - - // We hit EOF so can stop reading, but we may get a subsequent - // seek request. In this case we need to restart the reader. - // - // To handle this we wait on a condition variable and then - // monitor the command. (This could be either seek or close.) - for (;;) { - pthread_mutex_lock(&mt->command_m); - if (mt->command == NONE) - pthread_cond_wait(&mt->command_c, &mt->command_m); - switch(mt->command) { - default: - pthread_mutex_unlock(&mt->command_m); - break; - - case SEEK: - bgzf_mt_seek(fp); - pthread_mutex_unlock(&mt->command_m); - goto restart; - - case HAS_EOF: - bgzf_mt_eof(fp); // Sets mt->command to HAS_EOF_DONE - pthread_mutex_unlock(&mt->command_m); - break; - - case SEEK_DONE: - case HAS_EOF_DONE: - pthread_cond_signal(&mt->command_c); - pthread_mutex_unlock(&mt->command_m); - break; - - case CLOSE: - pthread_cond_signal(&mt->command_c); - pthread_mutex_unlock(&mt->command_m); - hts_tpool_process_destroy(mt->out_queue); - return NULL; - } - } - - err: - pthread_mutex_lock(&mt->command_m); - mt->command = CLOSE; - pthread_cond_signal(&mt->command_c); - pthread_mutex_unlock(&mt->command_m); - hts_tpool_process_destroy(mt->out_queue); - return NULL; -} - -int bgzf_thread_pool(BGZF *fp, hts_tpool *pool, int qsize) { - // No gain from multi-threading when not compressed - if (!fp->is_compressed) - return 0; - - mtaux_t *mt; - mt = (mtaux_t*)calloc(1, sizeof(mtaux_t)); - if (!mt) return -1; - fp->mt = mt; - - mt->pool = pool; - mt->n_threads = hts_tpool_size(pool); - if (!qsize) - qsize = mt->n_threads*2; - if (!(mt->out_queue = hts_tpool_process_init(mt->pool, qsize, 0))) - goto err; - hts_tpool_process_ref_incr(mt->out_queue); - - mt->job_pool = pool_create(sizeof(bgzf_job)); - if (!mt->job_pool) - goto err; - - pthread_mutex_init(&mt->job_pool_m, NULL); - pthread_mutex_init(&mt->command_m, NULL); - pthread_mutex_init(&mt->idx_m, NULL); - pthread_cond_init(&mt->command_c, NULL); - mt->flush_pending = 0; - mt->jobs_pending = 0; - mt->free_block = fp->uncompressed_block; // currently in-use block - mt->block_address = fp->block_address; - pthread_create(&mt->io_task, NULL, - fp->is_write ? bgzf_mt_writer : bgzf_mt_reader, fp); - - return 0; - - err: - free(mt); - fp->mt = NULL; - return -1; -} - -int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks) -{ - // No gain from multi-threading when not compressed - if (!fp->is_compressed || fp->is_gzip) - return 0; - - if (n_threads < 1) return -1; - hts_tpool *p = hts_tpool_init(n_threads); - if (!p) - return -1; - - if (bgzf_thread_pool(fp, p, 0) != 0) { - hts_tpool_destroy(p); - return -1; - } - - fp->mt->own_pool = 1; - - return 0; -} - -static int mt_destroy(mtaux_t *mt) -{ - int ret = 0; - - // Tell the reader to shut down - pthread_mutex_lock(&mt->command_m); - mt->command = CLOSE; - pthread_cond_signal(&mt->command_c); - hts_tpool_wake_dispatch(mt->out_queue); // unstick the reader - pthread_mutex_unlock(&mt->command_m); - - // Check for thread worker failure, indicated by is_shutdown returning 2 - // It's possible really late errors might be missed, but we can live with - // that. - ret = -(hts_tpool_process_is_shutdown(mt->out_queue) > 1); - // Destroying the queue first forces the writer to exit. - // mt->out_queue is reference counted, so destroy gets called in both - // this and the IO threads. The last to do it will clean up. - hts_tpool_process_destroy(mt->out_queue); - - // IO thread will now exit. Wait for it and perform final clean-up. - // If it returned non-NULL, it was not happy. - void *retval = NULL; - pthread_join(mt->io_task, &retval); - ret = retval != NULL ? -1 : ret; - - pthread_mutex_destroy(&mt->job_pool_m); - pthread_mutex_destroy(&mt->command_m); - pthread_mutex_destroy(&mt->idx_m); - pthread_cond_destroy(&mt->command_c); - if (mt->curr_job) - pool_free(mt->job_pool, mt->curr_job); - - if (mt->own_pool) - hts_tpool_destroy(mt->pool); - - pool_destroy(mt->job_pool); - - if (mt->idx_cache.e) - free(mt->idx_cache.e); - - free(mt); - fflush(stderr); - - return ret; -} - -static int mt_queue(BGZF *fp) -{ - mtaux_t *mt = fp->mt; - - mt->block_number++; - - // Also updated by writer thread - pthread_mutex_lock(&mt->job_pool_m); - bgzf_job *j = pool_alloc(mt->job_pool); - if (j) mt->jobs_pending++; - pthread_mutex_unlock(&mt->job_pool_m); - if (!j) return -1; - - j->fp = fp; - j->errcode = 0; - j->uncomp_len = fp->block_offset; - if (fp->compress_level == 0) { - memcpy(j->comp_data + BLOCK_HEADER_LENGTH + 5, fp->uncompressed_block, - j->uncomp_len); - if (hts_tpool_dispatch3(mt->pool, mt->out_queue, - bgzf_encode_level0_func, j, - job_cleanup, job_cleanup, 0) < 0) { - goto fail; - } - } else { - memcpy(j->uncomp_data, fp->uncompressed_block, j->uncomp_len); - - // Need non-block vers & job_pending? - if (hts_tpool_dispatch3(mt->pool, mt->out_queue, bgzf_encode_func, j, - job_cleanup, job_cleanup, 0) < 0) { - goto fail; - } - } - - fp->block_offset = 0; - return 0; - - fail: - job_cleanup(j); - pthread_mutex_lock(&mt->job_pool_m); - mt->jobs_pending--; - pthread_mutex_unlock(&mt->job_pool_m); - return -1; -} - -static int mt_flush_queue(BGZF *fp) -{ - mtaux_t *mt = fp->mt; - - // Drain the encoder jobs. - // We cannot use hts_tpool_flush here as it can cause deadlock if - // the queue is full up of decoder tasks. The best solution would - // be to have one input queue per type of job, but we don't right now. - //hts_tpool_flush(mt->pool); - pthread_mutex_lock(&mt->job_pool_m); - int shutdown = 0; - while (mt->jobs_pending != 0) { - if ((shutdown = hts_tpool_process_is_shutdown(mt->out_queue))) - break; - pthread_mutex_unlock(&mt->job_pool_m); - usleep(10000); // FIXME: replace by condition variable - pthread_mutex_lock(&mt->job_pool_m); - } - pthread_mutex_unlock(&mt->job_pool_m); - - if (shutdown) - return -1; - - // Wait on bgzf_mt_writer to drain the queue - if (hts_tpool_process_flush(mt->out_queue) != 0) - return -1; - - return (fp->errcode == 0)? 0 : -1; -} - -static int lazy_flush(BGZF *fp) -{ - if (fp->mt) - return fp->block_offset ? mt_queue(fp) : 0; - else - return bgzf_flush(fp); -} - -#else // ~ #ifdef BGZF_MT - -int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks) -{ - return 0; -} - -static inline int lazy_flush(BGZF *fp) -{ - return bgzf_flush(fp); -} - -#endif // ~ #ifdef BGZF_MT - -int bgzf_flush(BGZF *fp) -{ - if (!fp->is_write) return 0; -#ifdef BGZF_MT - if (fp->mt) { - int ret = 0; - if (fp->block_offset) ret = mt_queue(fp); - if (!ret) ret = mt_flush_queue(fp); - - // We maintain mt->block_address when threading as the - // main code can call bgzf_tell without any locks. - // (The result from tell are wrong, but we only care about the last - // 16-bits worth except for the final flush process. - pthread_mutex_lock(&fp->mt->idx_m); - fp->block_address = fp->mt->block_address; - pthread_mutex_unlock(&fp->mt->idx_m); - - return ret; - } -#endif - while (fp->block_offset > 0) { - int block_length; - if ( fp->idx_build_otf ) - { - bgzf_index_add_block(fp); - fp->idx->ublock_addr += fp->block_offset; - } - block_length = deflate_block(fp, fp->block_offset); - if (block_length < 0) { - hts_log_debug("Deflate block operation failed: %s", bgzf_zerr(block_length, NULL)); - return -1; - } - if (hwrite(fp->fp, fp->compressed_block, block_length) != block_length) { - hts_log_error("File write failed (wrong size)"); - fp->errcode |= BGZF_ERR_IO; // possibly truncated file - return -1; - } - fp->block_address += block_length; - } - return 0; -} - -int bgzf_flush_try(BGZF *fp, ssize_t size) -{ - if (fp->block_offset + size > BGZF_BLOCK_SIZE) return lazy_flush(fp); - return 0; -} - -ssize_t bgzf_write(BGZF *fp, const void *data, size_t length) -{ - if ( !fp->is_compressed ) { - size_t push = length + (size_t) fp->block_offset; - fp->block_offset = push % BGZF_MAX_BLOCK_SIZE; - fp->block_address += (push - fp->block_offset); - return hwrite(fp->fp, data, length); - } - - const uint8_t *input = (const uint8_t*)data; - ssize_t remaining = length; - assert(fp->is_write); - while (remaining > 0) { - uint8_t* buffer = (uint8_t*)fp->uncompressed_block; - int copy_length = BGZF_BLOCK_SIZE - fp->block_offset; - if (copy_length > remaining) copy_length = remaining; - memcpy(buffer + fp->block_offset, input, copy_length); - fp->block_offset += copy_length; - input += copy_length; - remaining -= copy_length; - if (fp->block_offset == BGZF_BLOCK_SIZE) { - if (lazy_flush(fp) != 0) return -1; - } - } - return length - remaining; -} - -ssize_t bgzf_block_write(BGZF *fp, const void *data, size_t length) -{ - if ( !fp->is_compressed ) { - size_t push = length + (size_t) fp->block_offset; - fp->block_offset = push % BGZF_MAX_BLOCK_SIZE; - fp->block_address += (push - fp->block_offset); - return hwrite(fp->fp, data, length); - } - - const uint8_t *input = (const uint8_t*)data; - ssize_t remaining = length; - assert(fp->is_write); - uint64_t current_block; //keep track of current block - uint64_t ublock_size; // amount of uncompressed data to be fed into next block - while (remaining > 0) { - current_block = fp->idx->moffs - fp->idx->noffs; - ublock_size = current_block + 1 < fp->idx->moffs ? fp->idx->offs[current_block+1].uaddr-fp->idx->offs[current_block].uaddr : BGZF_MAX_BLOCK_SIZE; - uint8_t* buffer = (uint8_t*)fp->uncompressed_block; - int copy_length = ublock_size - fp->block_offset; - if (copy_length > remaining) copy_length = remaining; - memcpy(buffer + fp->block_offset, input, copy_length); - fp->block_offset += copy_length; - input += copy_length; - remaining -= copy_length; - if (fp->block_offset == ublock_size) { - if (lazy_flush(fp) != 0) return -1; - if (fp->idx->noffs > 0) - fp->idx->noffs--; // decrement noffs to track the blocks - } - } - return length - remaining; -} - - -ssize_t bgzf_raw_write(BGZF *fp, const void *data, size_t length) -{ - ssize_t ret = hwrite(fp->fp, data, length); - if (ret < 0) fp->errcode |= BGZF_ERR_IO; - return ret; -} - -// Helper function for tidying up fp->mt and setting errcode -static void bgzf_close_mt(BGZF *fp) { - if (fp->mt) { - if (!fp->mt->free_block) - fp->uncompressed_block = NULL; - if (mt_destroy(fp->mt) < 0) - fp->errcode = BGZF_ERR_IO; - } -} - -int bgzf_close(BGZF* fp) -{ - int ret, block_length; - if (fp == 0) return -1; - if (fp->is_write && fp->is_compressed) { - if (bgzf_flush(fp) != 0) { - bgzf_close_mt(fp); - return -1; - } - fp->compress_level = -1; - block_length = deflate_block(fp, 0); // write an empty block - if (block_length < 0) { - hts_log_debug("Deflate block operation failed: %s", bgzf_zerr(block_length, NULL)); - bgzf_close_mt(fp); - return -1; - } - if (hwrite(fp->fp, fp->compressed_block, block_length) < 0 - || hflush(fp->fp) != 0) { - hts_log_error("File write failed"); - fp->errcode |= BGZF_ERR_IO; - return -1; - } - } - - bgzf_close_mt(fp); - - if ( fp->is_gzip ) - { - if (fp->gz_stream == NULL) ret = Z_OK; - else if (!fp->is_write) ret = inflateEnd(fp->gz_stream); - else ret = deflateEnd(fp->gz_stream); - if (ret != Z_OK) { - hts_log_error("Call to inflateEnd/deflateEnd failed: %s", bgzf_zerr(ret, NULL)); - } - free(fp->gz_stream); - } - ret = hclose(fp->fp); - if (ret != 0) return -1; - bgzf_index_destroy(fp); - free(fp->uncompressed_block); - free_cache(fp); - ret = fp->errcode ? -1 : 0; - free(fp); - return ret; -} - -void bgzf_set_cache_size(BGZF *fp, int cache_size) -{ - if (fp && fp->mt) return; // Not appropriate when multi-threading - if (fp && fp->cache) fp->cache_size = cache_size; -} - -int bgzf_check_EOF(BGZF *fp) { - int has_eof; - - if (fp->mt) { - pthread_mutex_lock(&fp->mt->command_m); - // fp->mt->command state transitions should be: - // NONE -> HAS_EOF -> HAS_EOF_DONE -> NONE - // (HAS_EOF -> HAS_EOF_DONE happens in bgzf_mt_reader thread) - if (fp->mt->command != CLOSE) - fp->mt->command = HAS_EOF; - pthread_cond_signal(&fp->mt->command_c); - hts_tpool_wake_dispatch(fp->mt->out_queue); - do { - if (fp->mt->command == CLOSE) { - // possible error in bgzf_mt_reader - pthread_mutex_unlock(&fp->mt->command_m); - return 0; - } - pthread_cond_wait(&fp->mt->command_c, &fp->mt->command_m); - switch (fp->mt->command) { - case HAS_EOF_DONE: break; - case HAS_EOF: - // Resend signal intended for bgzf_mt_reader() - pthread_cond_signal(&fp->mt->command_c); - break; - case CLOSE: - continue; - default: - abort(); // Should not get to any other state - } - } while (fp->mt->command != HAS_EOF_DONE); - fp->mt->command = NONE; - has_eof = fp->mt->eof; - pthread_mutex_unlock(&fp->mt->command_m); - } else { - has_eof = bgzf_check_EOF_common(fp); - } - - fp->no_eof_block = (has_eof == 0); - - return has_eof; -} - -static inline int64_t bgzf_seek_common(BGZF* fp, - int64_t block_address, int block_offset) -{ - if (fp->mt) { - // The reader runs asynchronous and does loops of: - // Read block - // Check & process command - // Dispatch decode job - // - // Once at EOF it then switches to loops of - // Wait for command - // Process command (possibly switching back to above loop). - // - // To seek we therefore send the reader thread a SEEK command, - // waking it up if blocked in dispatch and signalling if - // waiting for a command. We then wait for the response so we - // know the seek succeeded. - pthread_mutex_lock(&fp->mt->command_m); - fp->mt->hit_eof = 0; - // fp->mt->command state transitions should be: - // NONE -> SEEK -> SEEK_DONE -> NONE - // (SEEK -> SEEK_DONE happens in bgzf_mt_reader thread) - fp->mt->command = SEEK; - fp->mt->block_address = block_address; - pthread_cond_signal(&fp->mt->command_c); - hts_tpool_wake_dispatch(fp->mt->out_queue); - do { - pthread_cond_wait(&fp->mt->command_c, &fp->mt->command_m); - switch (fp->mt->command) { - case SEEK_DONE: break; - case SEEK: - // Resend signal intended for bgzf_mt_reader() - pthread_cond_signal(&fp->mt->command_c); - break; - default: - abort(); // Should not get to any other state - } - } while (fp->mt->command != SEEK_DONE); - fp->mt->command = NONE; - - fp->block_length = 0; // indicates current block has not been loaded - fp->block_address = block_address; - fp->block_offset = block_offset; - - pthread_mutex_unlock(&fp->mt->command_m); - } else { - if (hseek(fp->fp, block_address, SEEK_SET) < 0) { - fp->errcode |= BGZF_ERR_IO; - return -1; - } - fp->block_length = 0; // indicates current block has not been loaded - fp->block_address = block_address; - fp->block_offset = block_offset; - } - - return 0; -} - -int64_t bgzf_seek(BGZF* fp, int64_t pos, int where) -{ - if (fp->is_write || where != SEEK_SET || fp->is_gzip) { - fp->errcode |= BGZF_ERR_MISUSE; - return -1; - } - - // This is a flag to indicate we've jumped elsewhere in the stream, to act - // as a hint to any other code which is wrapping up bgzf for its own - // purposes. We may not be able to tell when seek happens as it can be - // done on our behalf, eg by the iterator. - // - // This is never cleared here. Any tool that needs to handle it is also - // responsible for clearing it. - fp->seeked = pos; - - return bgzf_seek_common(fp, pos >> 16, pos & 0xFFFF); -} - -int bgzf_is_bgzf(const char *fn) -{ - uint8_t buf[16]; - int n; - hFILE *fp; - if ((fp = hopen(fn, "r")) == 0) return 0; - n = hread(fp, buf, 16); - if (hclose(fp) < 0) return 0; - if (n != 16) return 0; - return check_header(buf) == 0? 1 : 0; -} - -int bgzf_compression(BGZF *fp) -{ - return (!fp->is_compressed)? no_compression : (fp->is_gzip)? gzip : bgzf; -} - -int bgzf_getc(BGZF *fp) -{ - if (fp->block_offset+1 < fp->block_length) { - fp->uncompressed_address++; - return ((unsigned char*)fp->uncompressed_block)[fp->block_offset++]; - } - - int c; - if (fp->block_offset >= fp->block_length) { - if (bgzf_read_block(fp) != 0) return -2; /* error */ - if (fp->block_length == 0) return -1; /* end-of-file */ - } - c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++]; - if (fp->block_offset == fp->block_length) { - fp->block_address = bgzf_htell(fp); - fp->block_offset = 0; - fp->block_length = 0; - } - fp->uncompressed_address++; - return c; -} - -int bgzf_getline(BGZF *fp, int delim, kstring_t *str) -{ - int l, state = 0; - str->l = 0; - do { - if (fp->block_offset >= fp->block_length) { - if (bgzf_read_block(fp) != 0) { state = -2; break; } - if (fp->block_length == 0) { state = -1; break; } - } - unsigned char *buf = fp->uncompressed_block; - - // Equivalent to a naive byte by byte search from - // buf + block_offset to buf + block_length. - void *e = memchr(&buf[fp->block_offset], delim, - fp->block_length - fp->block_offset); - l = e ? (unsigned char *)e - buf : fp->block_length; - - if (l < fp->block_length) state = 1; - l -= fp->block_offset; - if (ks_expand(str, l + 2) < 0) { state = -3; break; } - memcpy(str->s + str->l, buf + fp->block_offset, l); - str->l += l; - fp->block_offset += l + 1; - if (fp->block_offset >= fp->block_length) { - fp->block_address = bgzf_htell(fp); - fp->block_offset = 0; - fp->block_length = 0; - } - } while (state == 0); - if (state < -1) return state; - if (str->l == 0 && state < 0) return state; - fp->uncompressed_address += str->l + 1; - if ( delim=='\n' && str->l>0 && str->s[str->l-1]=='\r' ) str->l--; - str->s[str->l] = 0; - return str->l <= INT_MAX ? (int) str->l : INT_MAX; -} - -void bgzf_index_destroy(BGZF *fp) -{ - if ( !fp->idx ) return; - free(fp->idx->offs); - free(fp->idx); - fp->idx = NULL; - fp->idx_build_otf = 0; -} - -int bgzf_index_build_init(BGZF *fp) -{ - bgzf_index_destroy(fp); - fp->idx = (bgzidx_t*) calloc(1,sizeof(bgzidx_t)); - if ( !fp->idx ) return -1; - fp->idx_build_otf = 1; // build index on the fly - return 0; -} - -int bgzf_index_add_block(BGZF *fp) -{ - fp->idx->noffs++; - if ( fp->idx->noffs > fp->idx->moffs ) - { - fp->idx->moffs = fp->idx->noffs; - kroundup32(fp->idx->moffs); - fp->idx->offs = (bgzidx1_t*) realloc(fp->idx->offs, fp->idx->moffs*sizeof(bgzidx1_t)); - if ( !fp->idx->offs ) return -1; - } - fp->idx->offs[ fp->idx->noffs-1 ].uaddr = fp->idx->ublock_addr; - fp->idx->offs[ fp->idx->noffs-1 ].caddr = fp->block_address; - return 0; -} - -static inline int hwrite_uint64(uint64_t x, hFILE *f) -{ - if (ed_is_big()) x = ed_swap_8(x); - if (hwrite(f, &x, sizeof(x)) != sizeof(x)) return -1; - return 0; -} - -static char * get_name_suffix(const char *bname, const char *suffix) -{ - size_t len = strlen(bname) + strlen(suffix) + 1; - char *buff = malloc(len); - if (!buff) return NULL; - snprintf(buff, len, "%s%s", bname, suffix); - return buff; -} - -int bgzf_index_dump_hfile(BGZF *fp, struct hFILE *idx, const char *name) -{ - // Note that the index contains one extra record when indexing files opened - // for reading. The terminating record is not present when opened for writing. - // This is not a bug. - - int i; - - if (!fp->idx) { - hts_log_error("Called for BGZF handle with no index"); - errno = EINVAL; - return -1; - } - - if (bgzf_flush(fp) != 0) return -1; - - // discard the entry marking the end of the file - if (fp->mt && fp->idx) - fp->idx->noffs--; - - if (hwrite_uint64(fp->idx->noffs - 1, idx) < 0) goto fail; - for (i=1; iidx->noffs; i++) - { - if (hwrite_uint64(fp->idx->offs[i].caddr, idx) < 0) goto fail; - if (hwrite_uint64(fp->idx->offs[i].uaddr, idx) < 0) goto fail; - } - return 0; - - fail: - hts_log_error("Error writing to %s : %s", name ? name : "index", strerror(errno)); - return -1; -} - -int bgzf_index_dump(BGZF *fp, const char *bname, const char *suffix) -{ - const char *name = bname, *msg = NULL; - char *tmp = NULL; - hFILE *idx = NULL; - - if (!fp->idx) { - hts_log_error("Called for BGZF handle with no index"); - errno = EINVAL; - return -1; - } - - if ( suffix ) - { - tmp = get_name_suffix(bname, suffix); - if ( !tmp ) return -1; - name = tmp; - } - - idx = hopen(name, "wb"); - if ( !idx ) { - msg = "Error opening"; - goto fail; - } - - if (bgzf_index_dump_hfile(fp, idx, name) != 0) goto fail; - - if (hclose(idx) < 0) - { - idx = NULL; - msg = "Error on closing"; - goto fail; - } - - free(tmp); - return 0; - - fail: - if (msg != NULL) { - hts_log_error("%s %s : %s", msg, name, strerror(errno)); - } - if (idx) hclose_abruptly(idx); - free(tmp); - return -1; -} - -static inline int hread_uint64(uint64_t *xptr, hFILE *f) -{ - if (hread(f, xptr, sizeof(*xptr)) != sizeof(*xptr)) return -1; - if (ed_is_big()) ed_swap_8p(xptr); - return 0; -} - -int bgzf_index_load_hfile(BGZF *fp, struct hFILE *idx, const char *name) -{ - fp->idx = (bgzidx_t*) calloc(1,sizeof(bgzidx_t)); - if (fp->idx == NULL) goto fail; - uint64_t x; - if (hread_uint64(&x, idx) < 0) goto fail; - - fp->idx->noffs = fp->idx->moffs = x + 1; - fp->idx->offs = (bgzidx1_t*) malloc(fp->idx->moffs*sizeof(bgzidx1_t)); - if (fp->idx->offs == NULL) goto fail; - fp->idx->offs[0].caddr = fp->idx->offs[0].uaddr = 0; - - int i; - for (i=1; iidx->noffs; i++) - { - if (hread_uint64(&fp->idx->offs[i].caddr, idx) < 0) goto fail; - if (hread_uint64(&fp->idx->offs[i].uaddr, idx) < 0) goto fail; - } - - return 0; - - fail: - hts_log_error("Error reading %s : %s", name ? name : "index", strerror(errno)); - if (fp->idx) { - free(fp->idx->offs); - free(fp->idx); - fp->idx = NULL; - } - return -1; -} - -int bgzf_index_load(BGZF *fp, const char *bname, const char *suffix) -{ - const char *name = bname, *msg = NULL; - char *tmp = NULL; - hFILE *idx = NULL; - if ( suffix ) - { - tmp = get_name_suffix(bname, suffix); - if ( !tmp ) return -1; - name = tmp; - } - - idx = hopen(name, "rb"); - if ( !idx ) { - msg = "Error opening"; - goto fail; - } - - if (bgzf_index_load_hfile(fp, idx, name) != 0) goto fail; - - if (hclose(idx) != 0) { - idx = NULL; - msg = "Error closing"; - goto fail; - } - - free(tmp); - return 0; - - fail: - if (msg != NULL) { - hts_log_error("%s %s : %s", msg, name, strerror(errno)); - } - if (idx) hclose_abruptly(idx); - free(tmp); - return -1; -} - -int bgzf_useek(BGZF *fp, off_t uoffset, int where) -{ - if (fp->is_write || where != SEEK_SET || fp->is_gzip) { - fp->errcode |= BGZF_ERR_MISUSE; - return -1; - } - if (uoffset >= fp->uncompressed_address - fp->block_offset && - uoffset < fp->uncompressed_address + fp->block_length - fp->block_offset) { - // Can seek into existing data - fp->block_offset += uoffset - fp->uncompressed_address; - fp->uncompressed_address = uoffset; - return 0; - } - if ( !fp->is_compressed ) - { - if (hseek(fp->fp, uoffset, SEEK_SET) < 0) - { - fp->errcode |= BGZF_ERR_IO; - return -1; - } - fp->block_length = 0; // indicates current block has not been loaded - fp->block_address = uoffset; - fp->block_offset = 0; - if (bgzf_read_block(fp) < 0) { - fp->errcode |= BGZF_ERR_IO; - return -1; - } - fp->uncompressed_address = uoffset; - return 0; - } - - if ( !fp->idx ) - { - fp->errcode |= BGZF_ERR_IO; - return -1; - } - - // binary search - int ilo = 0, ihi = fp->idx->noffs - 1; - while ( ilo<=ihi ) - { - int i = (ilo+ihi)*0.5; - if ( uoffset < fp->idx->offs[i].uaddr ) ihi = i - 1; - else if ( uoffset >= fp->idx->offs[i].uaddr ) ilo = i + 1; - else break; - } - int i = ilo-1; - off_t offset = 0; - if (bgzf_seek_common(fp, fp->idx->offs[i].caddr, 0) < 0) - return -1; - - if ( bgzf_read_block(fp) < 0 ) { - fp->errcode |= BGZF_ERR_IO; - return -1; - } - offset = uoffset - fp->idx->offs[i].uaddr; - if ( offset > 0 ) - { - if (offset > fp->block_length) { - fp->errcode |= BGZF_ERR_IO; - return -1; //offset outside the available data - } - fp->block_offset = offset; - assert( fp->block_offset <= fp->block_length ); // todo: skipped, unindexed, blocks - } - fp->uncompressed_address = uoffset; - return 0; -} - -off_t bgzf_utell(BGZF *fp) -{ - return fp->uncompressed_address; // currently maintained only when reading -} - -/* prototype is in hfile_internal.h */ -struct hFILE *bgzf_hfile(struct BGZF *fp) { - return fp->fp; -} diff --git a/src/htslib-1.19.1/bgzip.1 b/src/htslib-1.19.1/bgzip.1 deleted file mode 100644 index 3be9a5b..0000000 --- a/src/htslib-1.19.1/bgzip.1 +++ /dev/null @@ -1,198 +0,0 @@ -.TH bgzip 1 "22 January 2024" "htslib-1.19.1" "Bioinformatics tools" -.SH NAME -.PP -bgzip \- Block compression/decompression utility -.\" -.\" Copyright (C) 2009-2011 Broad Institute. -.\" Copyright (C) 2018, 2021-2023 Genome Research Limited. -.\" -.\" Author: Heng Li -.\" -.\" Permission is hereby granted, free of charge, to any person obtaining a -.\" copy of this software and associated documentation files (the "Software"), -.\" to deal in the Software without restriction, including without limitation -.\" the rights to use, copy, modify, merge, publish, distribute, sublicense, -.\" and/or sell copies of the Software, and to permit persons to whom the -.\" Software is furnished to do so, subject to the following conditions: -.\" -.\" The above copyright notice and this permission notice shall be included in -.\" all copies or substantial portions of the Software. -.\" -.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -.\" IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -.\" FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -.\" THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -.\" LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -.\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -.\" DEALINGS IN THE SOFTWARE. -.\" -. -.\" For code blocks and examples (cf groff's Ultrix-specific man macros) -.de EX - -. in +\\$1 -. nf -. ft CR -.. -.de EE -. ft -. fi -. in - -.. -.SH SYNOPSIS -.PP -.B bgzip -.RB [ -cdfhikrt ] -.RB [ -b -.IR virtualOffset ] -.RB [ -I -.IR index_name ] -.RB [ -l -.IR compression_level ] -.RB [ -s -.IR size ] -.RB [ -@ -.IR threads ] -.RI [ file " ...]" -.PP -.SH DESCRIPTION -.PP -Bgzip compresses files in a similar manner to, and compatible with, gzip(1). -The file is compressed into a series of small (less than 64K) 'BGZF' blocks. -This allows indexes to be built against the compressed file and used to -retrieve portions of the data without having to decompress the entire file. - -If no files are specified on the command line, bgzip will compress (or -decompress if the -d option is used) standard input to standard output. -If a file is specified, it will be compressed (or decompressed with -d). -If the -c option is used, the result will be written to standard output, -otherwise when compressing bgzip will write to a new file with a .gz -suffix and remove the original. When decompressing the input file must -have a .gz suffix, which will be removed to make the output name. Again -after decompression completes the input file will be removed. When multiple -files are given as input, the operation is performed on all of them. - -.SH OPTIONS -.TP 10 -.B "--binary" -Bgzip will attempt to ensure BGZF blocks end on a newline when the -input is a text file. The exception to this is where a single line is -larger than a BGZF block (64Kb). This can aid tools that use the -index to perform random access on the compressed stream, as the start -of a block is likely to also be the start of a text record. - -This option processes text files as if they were binary content, -ignoring the location of newlines. This also restores the behaviour -for text files to bgzip version 1.15 and earlier. -.TP -.BI "-b, --offset " INT -Decompress to standard output from virtual file position (0-based uncompressed -offset). -Implies -c and -d. -.TP -.B "-c, --stdout" -Write to standard output, keep original files unchanged. -.TP -.B "-d, --decompress" -Decompress. -.TP -.B "-f, --force" -Overwrite files without asking, or decompress files that don't have a known -compression filename extension (e.g., \fI.gz\fR) without asking. -Use \fB--force\fR twice to do both without asking. -.TP -.B "-g, --rebgzip" -Try to use an existing index to create a compressed file with matching -block offsets. The index must be specified using the \fB-I -\fIfile.gzi\fR option. -Note that this assumes that the same compression library and level are in use -as when making the original file. -Don't use it unless you know what you're doing. -.TP -.B "-h, --help" -Displays a help message. -.TP -.B "-i, --index" -Create a BGZF index while compressing. -Unless the -I option is used, this will have the name of the compressed -file with .gzi appended to it. -.TP -.BI "-I, --index-name " FILE -Index file name. -.TP -.B "-k, --keep" -Do not delete input file during operation. -.TP -.BI "-l, --compress-level " INT -Compression level to use when compressing. -From 0 to 9, or -1 for the default level set by the compression library. [-1] -.TP -.B "-r, --reindex" -Rebuild the index on an existing compressed file. -.TP -.BI "-s, --size " INT -Decompress INT bytes (uncompressed size) to standard output. -Implies -c. -.TP -.B "-t, --test" -Test the intregrity of the compressed file. -.TP -.BI "-@, --threads " INT -Number of threads to use [1]. -.PP - -.SH BGZF FORMAT -The BGZF format written by bgzip is described in the SAM format specification -available from http://samtools.github.io/hts-specs/SAMv1.pdf. - -It makes use of a gzip feature which allows compressed files to be -concatenated. -The input data is divided into blocks which are no larger than 64 kilobytes -both before and after compression (including compression headers). -Each block is compressed into a gzip file. -The gzip header includes an extra sub-field with identifier 'BC' and the length -of the compressed block, including all headers. - -.SH GZI FORMAT -The index format is a binary file listing pairs of compressed and -uncompressed offsets in a BGZF file. -Each compressed offset points to the start of a BGZF block. -The uncompressed offset is the corresponding location in the uncompressed -data stream. - -All values are stored as little-endian 64-bit unsigned integers. - -The file contents are: -.EX 4 -uint64_t number_entries -.EE -followed by number_entries pairs of: -.EX 4 -uint64_t compressed_offset -uint64_t uncompressed_offset -.EE - -.SH EXAMPLES -.EX 4 -# Compress stdin to stdout -bgzip < /usr/share/dict/words > /tmp/words.gz - -# Make a .gzi index -bgzip -r /tmp/words.gz - -# Extract part of the data using the index -bgzip -b 367635 -s 4 /tmp/words.gz - -# Uncompress the whole file, removing the compressed copy -bgzip -d /tmp/words.gz -.EE - -.SH AUTHOR -.PP -The BGZF library was originally implemented by Bob Handsaker and modified -by Heng Li for remote file access and in-memory caching. - -.SH SEE ALSO -.IR gzip (1), -.IR tabix (1) diff --git a/src/htslib-1.19.1/bgzip.c b/src/htslib-1.19.1/bgzip.c deleted file mode 100644 index e7caa78..0000000 --- a/src/htslib-1.19.1/bgzip.c +++ /dev/null @@ -1,567 +0,0 @@ -/* bgzip.c -- Block compression/decompression utility. - - Copyright (C) 2008, 2009 Broad Institute / Massachusetts Institute of Technology - Copyright (C) 2010, 2013-2019, 2021-2023 Genome Research Ltd. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notices and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. -*/ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "htslib/bgzf.h" -#include "htslib/hts.h" -#include "htslib/hfile.h" - -#ifdef _WIN32 -# define WIN32_LEAN_AND_MEAN -# include -#endif - -static const int WINDOW_SIZE = BGZF_BLOCK_SIZE; - -static void error(const char *format, ...) -{ - va_list ap; - va_start(ap, format); - vfprintf(stderr, format, ap); - va_end(ap); - exit(EXIT_FAILURE); -} - -static int ask_yn() -{ - char line[1024]; - if (fgets(line, sizeof line, stdin) == NULL) - return 0; - return line[0] == 'Y' || line[0] == 'y'; -} - -static int confirm_overwrite(const char *fn) -{ - int save_errno = errno; - int ret = 0; - - if (isatty(STDIN_FILENO)) { - fprintf(stderr, "[bgzip] %s already exists; do you wish to overwrite (y or n)? ", fn); - if (ask_yn()) ret = 1; - } - - errno = save_errno; - return ret; -} - -static int known_extension(const char *ext) -{ - static const char *known[] = { - "gz", "bgz", "bgzf", - NULL - }; - - const char **p; - for (p = known; *p; p++) - if (strcasecmp(ext, *p) == 0) return 1; - return 0; -} - -static int confirm_filename(int *is_forced, const char *name, const char *ext) -{ - if (*is_forced) { - (*is_forced)--; - return 1; - } - - if (!isatty(STDIN_FILENO)) - return 0; - - fprintf(stderr, "[bgzip] .%s is not a known extension; do you wish to decompress to %s (y or n)? ", ext, name); - return ask_yn(); -} - -static int bgzip_main_usage(FILE *fp, int status) -{ - fprintf(fp, "\n"); - fprintf(fp, "Version: %s\n", hts_version()); - fprintf(fp, "Usage: bgzip [OPTIONS] [FILE] ...\n"); - fprintf(fp, "Options:\n"); - fprintf(fp, " -b, --offset INT decompress at virtual file pointer (0-based uncompressed offset)\n"); - fprintf(fp, " -c, --stdout write on standard output, keep original files unchanged\n"); - fprintf(fp, " -d, --decompress decompress\n"); - fprintf(fp, " -f, --force overwrite files without asking\n"); - fprintf(fp, " -g, --rebgzip use an index file to bgzip a file\n"); - fprintf(fp, " -h, --help give this help\n"); - fprintf(fp, " -i, --index compress and create BGZF index\n"); - fprintf(fp, " -I, --index-name FILE name of BGZF index file [file.gz.gzi]\n"); - fprintf(fp, " -k, --keep don't delete input files during operation\n"); - fprintf(fp, " -l, --compress-level INT Compression level to use when compressing; 0 to 9, or -1 for default [-1]\n"); - fprintf(fp, " -r, --reindex (re)index compressed file\n"); - fprintf(fp, " -s, --size INT decompress INT bytes (uncompressed size)\n"); - fprintf(fp, " -t, --test test integrity of compressed file\n"); - fprintf(fp, " --binary Don't align blocks with text lines\n"); - fprintf(fp, " -@, --threads INT number of compression threads to use [1]\n"); - return status; -} - -int main(int argc, char **argv) -{ - int c, compress, compress_level = -1, pstdout, is_forced, test, index = 0, rebgzip = 0, reindex = 0, keep, binary; - BGZF *fp; - char *buffer; - long start, end, size; - char *index_fname = NULL; - int threads = 1, isstdin = 0, usedstdout = 0, ret = 0; - - static const struct option loptions[] = - { - {"help", no_argument, NULL, 'h'}, - {"offset", required_argument, NULL, 'b'}, - {"stdout", no_argument, NULL, 'c'}, - {"decompress", no_argument, NULL, 'd'}, - {"force", no_argument, NULL, 'f'}, - {"index", no_argument, NULL, 'i'}, - {"index-name", required_argument, NULL, 'I'}, - {"compress-level", required_argument, NULL, 'l'}, - {"reindex", no_argument, NULL, 'r'}, - {"rebgzip",no_argument,NULL,'g'}, - {"size", required_argument, NULL, 's'}, - {"threads", required_argument, NULL, '@'}, - {"test", no_argument, NULL, 't'}, - {"version", no_argument, NULL, 1}, - {"keep", no_argument, NULL, 'k'}, - {"binary", no_argument, NULL, 2}, - {NULL, 0, NULL, 0} - }; - - compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0; test = 0; keep = 0; binary = 0; - while((c = getopt_long(argc, argv, "cdh?fb:@:s:iI:l:grtk",loptions,NULL)) >= 0){ - switch(c){ - case 'd': compress = 0; break; - case 'c': pstdout = 1; break; - case 'b': start = atol(optarg); compress = 0; pstdout = 1; break; - case 's': size = atol(optarg); pstdout = 1; break; - case 'f': is_forced++; break; - case 'i': index = 1; break; - case 'I': index_fname = optarg; break; - case 'l': compress_level = atol(optarg); break; - case 'g': rebgzip = 1; break; - case 'r': reindex = 1; compress = 0; break; - case '@': threads = atoi(optarg); break; - case 't': test = 1; compress = 0; reindex = 0; break; - case 'k': keep = 1; break; - case 1: - printf( -"bgzip (htslib) %s\n" -"Copyright (C) 2024 Genome Research Ltd.\n", hts_version()); - return EXIT_SUCCESS; - case 2: binary = 1; break; - case 'h': return bgzip_main_usage(stdout, EXIT_SUCCESS); - case '?': return bgzip_main_usage(stderr, EXIT_FAILURE); - } - } - if (size >= 0) end = start + size; - if (end >= 0 && end < start) { - fprintf(stderr, "[bgzip] Illegal region: [%ld, %ld]\n", start, end); - return 1; - } - - if ( (index || reindex) && rebgzip ) - { - fprintf(stderr, "[bgzip] Can't produce a index and rebgzip simultaneously\n"); - return 1; - } - if ( rebgzip && !index_fname ) - { - fprintf(stderr, "[bgzip] Index file name expected with rebgzip. See -I option.\n"); - return 1; - } - /* avoid -I / indexfile with multiple inputs while index/reindex. these wont be set during - read/decompress and are not considered even if set */ - if ( (index || reindex) && index_fname && argc - optind > 1) { - fprintf(stderr, "[bgzip] Cannot specify index filename with multiple data file on index, reindex.\n"); - return 1; - } - - do { - isstdin = optind >= argc ? 1 : !strcmp("-", argv[optind]); //using stdin or not? - /*stdout is in use when explicitly selected or when stdin in is in use, it need to be closed - explicitly to get all io errors*/ - usedstdout |= isstdin || pstdout || test; - - if (compress == 1) { - hFILE* f_src = NULL; - char out_mode[3] = "w\0"; - char out_mode_exclusive[4] = "wx\0"; - - if (compress_level < -1 || compress_level > 9) { - fprintf(stderr, "[bgzip] Invalid compress-level: %d\n", compress_level); - return 1; - } - if (compress_level >= 0) { - out_mode[1] = compress_level + '0'; - out_mode_exclusive[2] = compress_level + '0'; - } - if (!(f_src = hopen(!isstdin ? argv[optind] : "-", "r"))) { - fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), isstdin ? "stdin" : argv[optind]); - return 1; - } - - if ( argc>optind && !isstdin ) //named input file that isn't an explicit "-" - { - if (pstdout) - fp = bgzf_open("-", out_mode); - else - { - char *name = malloc(strlen(argv[optind]) + 5); - strcpy(name, argv[optind]); - strcat(name, ".gz"); - fp = bgzf_open(name, is_forced? out_mode : out_mode_exclusive); - if (fp == NULL && errno == EEXIST) { - if (confirm_overwrite(name)) { - fp = bgzf_open(name, out_mode); - } - else { - ret = 2; //explicit N - no overwrite, continue and return 2 - if (hclose(f_src) < 0) - ; //ignoring return value - free(name); - continue; - } - } - if (fp == NULL) { - fprintf(stderr, "[bgzip] can't create %s: %s\n", name, strerror(errno)); - free(name); - return 1; - } - free(name); - } - } - else if (!pstdout && isatty(fileno((FILE *)stdout)) ) - return bgzip_main_usage(stderr, EXIT_FAILURE); - else if ( index && !index_fname ) - { - fprintf(stderr, "[bgzip] Index file name expected when writing to stdout\n"); - return 1; - } - else - fp = bgzf_open("-", out_mode); - - if ( index ) bgzf_index_build_init(fp); - if (threads > 1) - bgzf_mt(fp, threads, 256); - - buffer = malloc(WINDOW_SIZE); - if (!buffer) - return 1; - if (rebgzip){ - if ( bgzf_index_load(fp, index_fname, NULL) < 0 ) error("Could not load index: %s.%s\n", !isstdin ? argv[optind] : index_fname, !isstdin ? "gzi" : ""); - - while ((c = hread(f_src, buffer, WINDOW_SIZE)) > 0) - if (bgzf_block_write(fp, buffer, c) < 0) error("Could not write %d bytes: Error %d\n", c, fp->errcode); - } - else { - htsFormat fmt; - int textual = 0; - if (!binary - && hts_detect_format(f_src, &fmt) == 0 - && fmt.compression == no_compression) { - switch(fmt.format) { - case text_format: - case sam: - case vcf: - case bed: - case fasta_format: - case fastq_format: - case fai_format: - case fqi_format: - textual = 1; - break; - default: break; // silence clang warnings - } - } - - if (binary || !textual) { - // Binary data, either detected or explicit - while ((c = hread(f_src, buffer, WINDOW_SIZE)) > 0) - if (bgzf_write(fp, buffer, c) < 0) - error("Could not write %d bytes: Error %d\n", - c, fp->errcode); - } else { - /* Text mode, try a flush after a newline */ - int in_header = 1, n = 0, long_line = 0; - while ((c = hread(f_src, buffer+n, WINDOW_SIZE-n)) > 0) { - int c2 = c+n; - int flush = 0; - if (in_header && - (long_line || buffer[0] == '@' || buffer[0] == '#')) { - // Scan forward to find the last header line. - int last_start = 0; - n = 0; - while (n < c2) { - if (buffer[n++] != '\n') - continue; - - last_start = n; - if (n < c2 && - !(buffer[n] == '@' || buffer[n] == '#')) { - in_header = 0; - break; - } - } - if (!last_start) { - n = c2; - long_line = 1; - } else { - n = last_start; - flush = 1; - long_line = 0; - } - } else { - // Scan backwards to find the last newline. - n += c; // c read plus previous n overflow - while (--n >= 0 && ((char *)buffer)[n] != '\n') - ; - - if (n >= 0) { - flush = 1; - n++; - } else { - n = c2; - } - } - - // Pos n is either at the end of the buffer with flush==0, - // or the first byte after a newline and a flush point. - if (bgzf_write(fp, buffer, n) < 0) - error("Could not write %d bytes: Error %d\n", - n, fp->errcode); - if (flush) - if (bgzf_flush_try(fp, 65536) < 0) // force - return -1; - - memmove(buffer, buffer+n, c2-n); - n = c2-n; - } - - // Trailing data. - if (bgzf_write(fp, buffer, n) < 0) - error("Could not write %d bytes: Error %d\n", - n, fp->errcode); - } - } - if ( index ) - { - if (index_fname) { - if (bgzf_index_dump(fp, index_fname, NULL) < 0) - error("Could not write index to '%s'\n", index_fname); - } else if (!isstdin) { - if (bgzf_index_dump(fp, argv[optind], ".gz.gzi") < 0) - error("Could not write index to '%s.gz.gzi'\n", argv[optind]); - } - else { - //stdin, cant create index file as name is not present "-.gz.gzi" not a valid one! - error("Can not write index for stdin data without index filename, use -I option to set index file.\n"); - } - } - if (bgzf_close(fp) < 0) - error("Output close failed: Error %d\n", fp->errcode); - if (hclose(f_src) < 0) - error("Input close failed\n"); - if (argc > optind && !pstdout && !keep && !isstdin) unlink(argv[optind]); - free(buffer); - } - else if ( reindex ) - { - if ( argc>optind && !isstdin ) - { - fp = bgzf_open(argv[optind], "r"); - if ( !fp ) error("[bgzip] Could not open file: %s\n", argv[optind]); - } - else - { - if ( !index_fname ) error("[bgzip] Index file name expected when reading from stdin\n"); - fp = bgzf_open("-", "r"); - if ( !fp ) error("[bgzip] Could not read from stdin: %s\n", strerror(errno)); - } - - buffer = malloc(BGZF_BLOCK_SIZE); - bgzf_index_build_init(fp); - int ret; - while ( (ret=bgzf_read(fp, buffer, BGZF_BLOCK_SIZE))>0 ) ; - free(buffer); - if ( ret<0 ) error("Is the file gzipped or bgzipped? The latter is required for indexing.\n"); - - if ( index_fname ) { - if (bgzf_index_dump(fp, index_fname, NULL) < 0) - error("Could not write index to '%s'\n", index_fname); - } else if (!isstdin) { - if (bgzf_index_dump(fp, argv[optind], ".gzi") < 0) - error("Could not write index to '%s.gzi'\n", argv[optind]); - } - else { - //stdin, cant create index file as name is not present "-.gzi" not a valid one! - error("Can not write index for stdin data without index filename, use -I option to set index file.\n"); - } - - if ( bgzf_close(fp)<0 ) error("Close failed: Error %d\n",fp->errcode); - } - else - { - int f_dst, is_forced_tmp = is_forced; - - if ( argc>optind && !isstdin ) - { - fp = bgzf_open(argv[optind], "r"); - if (fp == NULL) { - fprintf(stderr, "[bgzip] Could not open %s: %s\n", argv[optind], strerror(errno)); - return 1; - } - if (bgzf_compression(fp) == no_compression) { - fprintf(stderr, "[bgzip] %s: not a compressed file -- ignored\n", argv[optind]); - bgzf_close(fp); - return 1; - } - - if (pstdout || test) { - f_dst = fileno(stdout); - } - else { - const int wrflags = O_WRONLY | O_CREAT | O_TRUNC; - char *name = argv[optind], *ext; - size_t pos; - for (pos = strlen(name); pos > 0; --pos) - if (name[pos] == '.' || name[pos] == '/') break; - if (pos == 0 || name[pos] != '.') { - fprintf(stderr, "[bgzip] can't remove an extension from %s -- please rename\n", argv[optind]); - bgzf_close(fp); - return 1; - } - name = strdup(argv[optind]); - name[pos] = '\0'; - ext = &name[pos+1]; - if (! (known_extension(ext) || confirm_filename(&is_forced_tmp, name, ext))) { - fprintf(stderr, "[bgzip] unknown extension .%s -- declining to decompress to %s\n", ext, name); - bgzf_close(fp); - free(name); - ret = 2; //explicit N, continue and return 2 - continue; - } - f_dst = open(name, is_forced_tmp? wrflags : wrflags|O_EXCL, 0666); - if (f_dst < 0 && errno == EEXIST) { - if (confirm_overwrite(name)) { - f_dst = open(name, wrflags, 0666); - } - else { - ret = 2; //explicit N - no overwrite, continue and return 2 - free(name); - bgzf_close(fp); - continue; - } - } - if (f_dst < 0) { - fprintf(stderr, "[bgzip] can't create %s: %s\n", name, strerror(errno)); - free(name); - return 1; - } - free(name); - } - } - else if (!pstdout && isatty(fileno((FILE *)stdin)) ) - return bgzip_main_usage(stderr, EXIT_FAILURE); - else - { - f_dst = fileno(stdout); - fp = bgzf_open("-", "r"); - if (fp == NULL) { - fprintf(stderr, "[bgzip] Could not read from stdin: %s\n", strerror(errno)); - return 1; - } - if (bgzf_compression(fp) == no_compression) { - fprintf(stderr, "[bgzip] stdin is not compressed -- ignored\n"); - bgzf_close(fp); - return 1; - } - } - - buffer = malloc(WINDOW_SIZE); - if ( start>0 ) - { - if (index_fname) { - if ( bgzf_index_load(fp, index_fname, NULL) < 0 ) - error("Could not load index: %s\n", index_fname); - } else { - if (optind >= argc || isstdin) { - error("The -b option requires -I when reading from stdin " - "(and stdin must be seekable)\n"); - } - if ( bgzf_index_load(fp, argv[optind], ".gzi") < 0 ) - error("Could not load index: %s.gzi\n", argv[optind]); - } - if ( bgzf_useek(fp, start, SEEK_SET) < 0 ) error("Could not seek to %d-th (uncompressd) byte\n", start); - } - - if (threads > 1) - bgzf_mt(fp, threads, 256); - - #ifdef _WIN32 - _setmode(f_dst, O_BINARY); - #endif - long start_reg = start, end_reg = end; - while (1) { - if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE); - else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start)); - if (c == 0) break; - if (c < 0) error("Error %d in block starting at offset %" PRId64 "(%" PRIX64 ")\n", fp->errcode, fp->block_address, fp->block_address); - start += c; - if ( !test && write(f_dst, buffer, c) != c ) { - #ifdef _WIN32 - if (GetLastError() != ERROR_NO_DATA) - #endif - error("Could not write %d bytes\n", c); - } - if (end >= 0 && start >= end) break; - } - start = start_reg; - end = end_reg; - free(buffer); - if (bgzf_close(fp) < 0) error("Close failed: Error %d\n",fp->errcode); - if (argc > optind && !pstdout && !test && !keep && !isstdin) unlink(argv[optind]); - if (!isstdin && !pstdout && !test) { - close(f_dst); //close output file when it is not stdout - } - } - } while (++optind < argc); - - if (usedstdout && !reindex) { - //stdout in use, have to close explicitly to get any pending write errors - if (fclose(stdout) != 0 && errno != EBADF) { - fprintf(stderr, "[bgzip] Failed to close stdout, errno %d", errno); - ret = 1; - } - } - return ret; -} diff --git a/src/htslib-1.19.1/builddir_vars.mk.in b/src/htslib-1.19.1/builddir_vars.mk.in deleted file mode 100644 index 09bb20f..0000000 --- a/src/htslib-1.19.1/builddir_vars.mk.in +++ /dev/null @@ -1,58 +0,0 @@ -# Separate build directory Makefile overrides for htslib. -# -# Copyright (C) 2021 University of Glasgow. -# -# Author: John Marshall -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -# This is @configure_input@ -# -# When building from a separate build directory, this file is included by -# HTSlib's Makefile or htslib.mk instead of htslib_vars.mk. It adjusts -# variables to account for a separate source directory and then includes -# the real makefile fragment. - -ifneq "$(HTSPREFIX)" "" -# When included externally via htslib.mk, just update $(HTSSRCDIR) and hence -# $(HTSPREFIX) to point to the source directory (without using any extra -# unprefixed variables, which would be in the external project's namespace). - -# Set to $(HTSDIR)/$(srcdir) (or just $(srcdir) if that's absolute) -HTSSRCDIR = @HTSDIRslash_if_relsrcdir@@srcdir@ - -include $(HTSSRCDIR)/htslib_vars.mk - -else -# When included from HTSlib's Makefile, override $(srcdir) and set VPATH, -# and make any other adjustments required. ($(HTSPREFIX) remains empty as -# the items it prefixes will be found via VPATH instead.) - -srcdir = @srcdir@ -VPATH = @srcdir@ - -srcprefix = $(srcdir)/ - -# Ensure that htscodecs.c can include its version.h. This -I option must come -# before -I. so that these targets get this version.h rather than HTSlib's. -htscodecs/htscodecs/htscodecs.o htscodecs/htscodecs/htscodecs.pico: ALL_CPPFLAGS = -Ihtscodecs/htscodecs -I. $(CPPFLAGS) - -include $(srcdir)/htslib_vars.mk - -endif diff --git a/src/htslib-1.19.1/config.h.in b/src/htslib-1.19.1/config.h.in deleted file mode 100644 index 1ca09d3..0000000 --- a/src/htslib-1.19.1/config.h.in +++ /dev/null @@ -1,160 +0,0 @@ -/* config.h.in. Generated from configure.ac by autoheader. */ - -/* If you use configure, this file provides #defines reflecting your - configuration choices. If you have not run configure, suitable - conservative defaults will be used. - - Autoheader adds a number of items to this template file that are not - used by HTSlib: STDC_HEADERS and most HAVE_*_H header file defines - are immaterial, as we assume standard ISO C headers and facilities; - the PACKAGE_* defines are unused and are overridden by the more - accurate PACKAGE_VERSION as computed by the Makefile. */ - -/* Define if HTSlib should enable GCS support. */ -#undef ENABLE_GCS - -/* Define if HTSlib should enable plugins. */ -#undef ENABLE_PLUGINS - -/* Define if HTSlib should enable S3 support. */ -#undef ENABLE_S3 - -/* Defined to 1 if rANS source using AVX2 can be compiled. */ -#undef HAVE_AVX2 - -/* Defined to 1 if rANS source using AVX512F can be compiled. */ -#undef HAVE_AVX512 - -/* Define if you have the Common Crypto library. */ -#undef HAVE_COMMONCRYPTO - -/* Define to 1 if you have the 'drand48' function. */ -#undef HAVE_DRAND48 - -/* Define if using an external libhtscodecs */ -#undef HAVE_EXTERNAL_LIBHTSCODECS - -/* Define to 1 if you have the 'fdatasync' function. */ -#undef HAVE_FDATASYNC - -/* Define to 1 if you have the 'fsync' function. */ -#undef HAVE_FSYNC - -/* Define to 1 if you have the 'getpagesize' function. */ -#undef HAVE_GETPAGESIZE - -/* Define to 1 if you have the 'gmtime_r' function. */ -#undef HAVE_GMTIME_R - -/* Define if you have libcrypto-style HMAC(). */ -#undef HAVE_HMAC - -/* Define to 1 if you have the header file. */ -#undef HAVE_INTTYPES_H - -/* Define to 1 if you have the 'bz2' library (-lbz2). */ -#undef HAVE_LIBBZ2 - -/* Define if libcurl file access is enabled. */ -#undef HAVE_LIBCURL - -/* Define if libdeflate is available. */ -#undef HAVE_LIBDEFLATE - -/* Define to 1 if you have the 'lzma' library (-llzma). */ -#undef HAVE_LIBLZMA - -/* Define to 1 if you have the 'z' library (-lz). */ -#undef HAVE_LIBZ - -/* Define to 1 if you have the header file. */ -#undef HAVE_LZMA_H - -/* Define to 1 if you have a working 'mmap' system call. */ -#undef HAVE_MMAP - -/* Defined to 1 if rANS source using popcnt can be compiled. */ -#undef HAVE_POPCNT - -/* Define to 1 if you have the 'srand48_deterministic' function. */ -#undef HAVE_SRAND48_DETERMINISTIC - -/* Defined to 1 if rANS source using SSE4.1 can be compiled. */ -#undef HAVE_SSE4_1 - -/* Defined to 1 if rANS source using SSSE3 can be compiled. */ -#undef HAVE_SSSE3 - -/* Define to 1 if you have the header file. */ -#undef HAVE_STDINT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STDIO_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STDLIB_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STRINGS_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STRING_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_PARAM_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_STAT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_TYPES_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_UNISTD_H - -/* Define to the address where bug reports for this package should be sent. */ -#undef PACKAGE_BUGREPORT - -/* Define to the full name of this package. */ -#undef PACKAGE_NAME - -/* Define to the full name and version of this package. */ -#undef PACKAGE_STRING - -/* Define to the one symbol short name of this package. */ -#undef PACKAGE_TARNAME - -/* Define to the home page for this package. */ -#undef PACKAGE_URL - -/* Define to the version of this package. */ -#undef PACKAGE_VERSION - -/* Platform-dependent plugin filename extension. */ -#undef PLUGIN_EXT - -/* Define to 1 if all of the C89 standard headers exist (not just the ones - required in a freestanding environment). This macro is provided for - backward compatibility; new code need not use it. */ -#undef STDC_HEADERS - - -/* Prevent unaligned access in htscodecs SSE4 rANS codec */ -#if defined(HTS_ALLOW_UNALIGNED) && HTS_ALLOW_UNALIGNED == 0 -#undef UBSAN -#endif - -/* Number of bits in a file offset, on hosts where this is settable. */ -#undef _FILE_OFFSET_BITS - -/* Define to 1 on platforms where this makes off_t a 64-bit type. */ -#undef _LARGE_FILES - -/* Number of bits in time_t, on hosts where this is settable. */ -#undef _TIME_BITS - -/* Specify X/Open requirements */ -#undef _XOPEN_SOURCE - -/* Define to 1 on platforms where this makes time_t a 64-bit type. */ -#undef __MINGW_USE_VC2005_COMPAT diff --git a/src/htslib-1.19.1/configure b/src/htslib-1.19.1/configure deleted file mode 100755 index a68325a..0000000 --- a/src/htslib-1.19.1/configure +++ /dev/null @@ -1,8121 +0,0 @@ -#! /bin/sh -# Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.72 for HTSlib 1.19.1. -# -# Report bugs to . -# -# -# Copyright (C) 1992-1996, 1998-2017, 2020-2023 Free Software Foundation, -# Inc. -# -# -# This configure script is free software; the Free Software Foundation -# gives unlimited permission to copy, distribute and modify it. -# -# Portions copyright (C) 2020-2023 Genome Research Ltd. -# -# This configure script is free software: you are free to change and -# redistribute it. There is NO WARRANTY, to the extent permitted by law. -## -------------------- ## -## M4sh Initialization. ## -## -------------------- ## - -# Be more Bourne compatible -DUALCASE=1; export DUALCASE # for MKS sh -if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 -then : - emulate sh - NULLCMD=: - # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which - # is contrary to our usage. Disable this feature. - alias -g '${1+"$@"}'='"$@"' - setopt NO_GLOB_SUBST -else case e in #( - e) case `(set -o) 2>/dev/null` in #( - *posix*) : - set -o posix ;; #( - *) : - ;; -esac ;; -esac -fi - - - -# Reset variables that may have inherited troublesome values from -# the environment. - -# IFS needs to be set, to space, tab, and newline, in precisely that order. -# (If _AS_PATH_WALK were called with IFS unset, it would have the -# side effect of setting IFS to empty, thus disabling word splitting.) -# Quoting is to prevent editors from complaining about space-tab. -as_nl=' -' -export as_nl -IFS=" "" $as_nl" - -PS1='$ ' -PS2='> ' -PS4='+ ' - -# Ensure predictable behavior from utilities with locale-dependent output. -LC_ALL=C -export LC_ALL -LANGUAGE=C -export LANGUAGE - -# We cannot yet rely on "unset" to work, but we need these variables -# to be unset--not just set to an empty or harmless value--now, to -# avoid bugs in old shells (e.g. pre-3.0 UWIN ksh). This construct -# also avoids known problems related to "unset" and subshell syntax -# in other old shells (e.g. bash 2.01 and pdksh 5.2.14). -for as_var in BASH_ENV ENV MAIL MAILPATH CDPATH -do eval test \${$as_var+y} \ - && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : -done - -# Ensure that fds 0, 1, and 2 are open. -if (exec 3>&0) 2>/dev/null; then :; else exec 0&1) 2>/dev/null; then :; else exec 1>/dev/null; fi -if (exec 3>&2) ; then :; else exec 2>/dev/null; fi - -# The user is always right. -if ${PATH_SEPARATOR+false} :; then - PATH_SEPARATOR=: - (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { - (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || - PATH_SEPARATOR=';' - } -fi - - -# Find who we are. Look in the path if we contain no directory separator. -as_myself= -case $0 in #(( - *[\\/]* ) as_myself=$0 ;; - *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - case $as_dir in #((( - '') as_dir=./ ;; - */) ;; - *) as_dir=$as_dir/ ;; - esac - test -r "$as_dir$0" && as_myself=$as_dir$0 && break - done -IFS=$as_save_IFS - - ;; -esac -# We did not find ourselves, most probably we were run as 'sh COMMAND' -# in which case we are not to be found in the path. -if test "x$as_myself" = x; then - as_myself=$0 -fi -if test ! -f "$as_myself"; then - printf "%s\n" "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 - exit 1 -fi - - -# Use a proper internal environment variable to ensure we don't fall - # into an infinite loop, continuously re-executing ourselves. - if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then - _as_can_reexec=no; export _as_can_reexec; - # We cannot yet assume a decent shell, so we have to provide a -# neutralization value for shells without unset; and this also -# works around shells that cannot unset nonexistent variables. -# Preserve -v and -x to the replacement shell. -BASH_ENV=/dev/null -ENV=/dev/null -(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV -case $- in # (((( - *v*x* | *x*v* ) as_opts=-vx ;; - *v* ) as_opts=-v ;; - *x* ) as_opts=-x ;; - * ) as_opts= ;; -esac -exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} -# Admittedly, this is quite paranoid, since all the known shells bail -# out after a failed 'exec'. -printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2 -exit 255 - fi - # We don't want this to propagate to other subprocesses. - { _as_can_reexec=; unset _as_can_reexec;} -if test "x$CONFIG_SHELL" = x; then - as_bourne_compatible="if test \${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 -then : - emulate sh - NULLCMD=: - # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which - # is contrary to our usage. Disable this feature. - alias -g '\${1+\"\$@\"}'='\"\$@\"' - setopt NO_GLOB_SUBST -else case e in #( - e) case \`(set -o) 2>/dev/null\` in #( - *posix*) : - set -o posix ;; #( - *) : - ;; -esac ;; -esac -fi -" - as_required="as_fn_return () { (exit \$1); } -as_fn_success () { as_fn_return 0; } -as_fn_failure () { as_fn_return 1; } -as_fn_ret_success () { return 0; } -as_fn_ret_failure () { return 1; } - -exitcode=0 -as_fn_success || { exitcode=1; echo as_fn_success failed.; } -as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } -as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } -as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } -if ( set x; as_fn_ret_success y && test x = \"\$1\" ) -then : - -else case e in #( - e) exitcode=1; echo positional parameters were not saved. ;; -esac -fi -test x\$exitcode = x0 || exit 1 -blah=\$(echo \$(echo blah)) -test x\"\$blah\" = xblah || exit 1 -test -x / || exit 1" - as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO - as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO - eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && - test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 -test \$(( 1 + 1 )) = 2 || exit 1" - if (eval "$as_required") 2>/dev/null -then : - as_have_required=yes -else case e in #( - e) as_have_required=no ;; -esac -fi - if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null -then : - -else case e in #( - e) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -as_found=false -for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH -do - IFS=$as_save_IFS - case $as_dir in #((( - '') as_dir=./ ;; - */) ;; - *) as_dir=$as_dir/ ;; - esac - as_found=: - case $as_dir in #( - /*) - for as_base in sh bash ksh sh5; do - # Try only shells that exist, to save several forks. - as_shell=$as_dir$as_base - if { test -f "$as_shell" || test -f "$as_shell.exe"; } && - as_run=a "$as_shell" -c "$as_bourne_compatible""$as_required" 2>/dev/null -then : - CONFIG_SHELL=$as_shell as_have_required=yes - if as_run=a "$as_shell" -c "$as_bourne_compatible""$as_suggested" 2>/dev/null -then : - break 2 -fi -fi - done;; - esac - as_found=false -done -IFS=$as_save_IFS -if $as_found -then : - -else case e in #( - e) if { test -f "$SHELL" || test -f "$SHELL.exe"; } && - as_run=a "$SHELL" -c "$as_bourne_compatible""$as_required" 2>/dev/null -then : - CONFIG_SHELL=$SHELL as_have_required=yes -fi ;; -esac -fi - - - if test "x$CONFIG_SHELL" != x -then : - export CONFIG_SHELL - # We cannot yet assume a decent shell, so we have to provide a -# neutralization value for shells without unset; and this also -# works around shells that cannot unset nonexistent variables. -# Preserve -v and -x to the replacement shell. -BASH_ENV=/dev/null -ENV=/dev/null -(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV -case $- in # (((( - *v*x* | *x*v* ) as_opts=-vx ;; - *v* ) as_opts=-v ;; - *x* ) as_opts=-x ;; - * ) as_opts= ;; -esac -exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} -# Admittedly, this is quite paranoid, since all the known shells bail -# out after a failed 'exec'. -printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2 -exit 255 -fi - - if test x$as_have_required = xno -then : - printf "%s\n" "$0: This script requires a shell more modern than all" - printf "%s\n" "$0: the shells that I found on your system." - if test ${ZSH_VERSION+y} ; then - printf "%s\n" "$0: In particular, zsh $ZSH_VERSION has bugs and should" - printf "%s\n" "$0: be upgraded to zsh 4.3.4 or later." - else - printf "%s\n" "$0: Please tell bug-autoconf@gnu.org and -$0: samtools-help@lists.sourceforge.net about your system, -$0: including any error possibly output before this -$0: message. Then install a modern shell, or manually run -$0: the script under such a shell if you do have one." - fi - exit 1 -fi ;; -esac -fi -fi -SHELL=${CONFIG_SHELL-/bin/sh} -export SHELL -# Unset more variables known to interfere with behavior of common tools. -CLICOLOR_FORCE= GREP_OPTIONS= -unset CLICOLOR_FORCE GREP_OPTIONS - -## --------------------- ## -## M4sh Shell Functions. ## -## --------------------- ## -# as_fn_unset VAR -# --------------- -# Portably unset VAR. -as_fn_unset () -{ - { eval $1=; unset $1;} -} -as_unset=as_fn_unset - - -# as_fn_set_status STATUS -# ----------------------- -# Set $? to STATUS, without forking. -as_fn_set_status () -{ - return $1 -} # as_fn_set_status - -# as_fn_exit STATUS -# ----------------- -# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. -as_fn_exit () -{ - set +e - as_fn_set_status $1 - exit $1 -} # as_fn_exit - -# as_fn_mkdir_p -# ------------- -# Create "$as_dir" as a directory, including parents if necessary. -as_fn_mkdir_p () -{ - - case $as_dir in #( - -*) as_dir=./$as_dir;; - esac - test -d "$as_dir" || eval $as_mkdir_p || { - as_dirs= - while :; do - case $as_dir in #( - *\'*) as_qdir=`printf "%s\n" "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( - *) as_qdir=$as_dir;; - esac - as_dirs="'$as_qdir' $as_dirs" - as_dir=`$as_dirname -- "$as_dir" || -$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ - X"$as_dir" : 'X\(//\)[^/]' \| \ - X"$as_dir" : 'X\(//\)$' \| \ - X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || -printf "%s\n" X"$as_dir" | - sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ - s//\1/ - q - } - /^X\(\/\/\)[^/].*/{ - s//\1/ - q - } - /^X\(\/\/\)$/{ - s//\1/ - q - } - /^X\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - test -d "$as_dir" && break - done - test -z "$as_dirs" || eval "mkdir $as_dirs" - } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" - - -} # as_fn_mkdir_p - -# as_fn_executable_p FILE -# ----------------------- -# Test if FILE is an executable regular file. -as_fn_executable_p () -{ - test -f "$1" && test -x "$1" -} # as_fn_executable_p -# as_fn_append VAR VALUE -# ---------------------- -# Append the text in VALUE to the end of the definition contained in VAR. Take -# advantage of any shell optimizations that allow amortized linear growth over -# repeated appends, instead of the typical quadratic growth present in naive -# implementations. -if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null -then : - eval 'as_fn_append () - { - eval $1+=\$2 - }' -else case e in #( - e) as_fn_append () - { - eval $1=\$$1\$2 - } ;; -esac -fi # as_fn_append - -# as_fn_arith ARG... -# ------------------ -# Perform arithmetic evaluation on the ARGs, and store the result in the -# global $as_val. Take advantage of shells that can avoid forks. The arguments -# must be portable across $(()) and expr. -if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null -then : - eval 'as_fn_arith () - { - as_val=$(( $* )) - }' -else case e in #( - e) as_fn_arith () - { - as_val=`expr "$@" || test $? -eq 1` - } ;; -esac -fi # as_fn_arith - - -# as_fn_error STATUS ERROR [LINENO LOG_FD] -# ---------------------------------------- -# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are -# provided, also output the error to LOG_FD, referencing LINENO. Then exit the -# script with STATUS, using 1 if that was 0. -as_fn_error () -{ - as_status=$1; test $as_status -eq 0 && as_status=1 - if test "$4"; then - as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 - fi - printf "%s\n" "$as_me: error: $2" >&2 - as_fn_exit $as_status -} # as_fn_error - -if expr a : '\(a\)' >/dev/null 2>&1 && - test "X`expr 00001 : '.*\(...\)'`" = X001; then - as_expr=expr -else - as_expr=false -fi - -if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then - as_basename=basename -else - as_basename=false -fi - -if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then - as_dirname=dirname -else - as_dirname=false -fi - -as_me=`$as_basename -- "$0" || -$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ - X"$0" : 'X\(//\)$' \| \ - X"$0" : 'X\(/\)' \| . 2>/dev/null || -printf "%s\n" X/"$0" | - sed '/^.*\/\([^/][^/]*\)\/*$/{ - s//\1/ - q - } - /^X\/\(\/\/\)$/{ - s//\1/ - q - } - /^X\/\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - -# Avoid depending upon Character Ranges. -as_cr_letters='abcdefghijklmnopqrstuvwxyz' -as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' -as_cr_Letters=$as_cr_letters$as_cr_LETTERS -as_cr_digits='0123456789' -as_cr_alnum=$as_cr_Letters$as_cr_digits - - - as_lineno_1=$LINENO as_lineno_1a=$LINENO - as_lineno_2=$LINENO as_lineno_2a=$LINENO - eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && - test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { - # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) - sed -n ' - p - /[$]LINENO/= - ' <$as_myself | - sed ' - t clear - :clear - s/[$]LINENO.*/&-/ - t lineno - b - :lineno - N - :loop - s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ - t loop - s/-\n.*// - ' >$as_me.lineno && - chmod +x "$as_me.lineno" || - { printf "%s\n" "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } - - # If we had to re-execute with $CONFIG_SHELL, we're ensured to have - # already done that, so ensure we don't try to do so again and fall - # in an infinite loop. This has already happened in practice. - _as_can_reexec=no; export _as_can_reexec - # Don't try to exec as it changes $[0], causing all sort of problems - # (the dirname of $[0] is not the place where we might find the - # original and so on. Autoconf is especially sensitive to this). - . "./$as_me.lineno" - # Exit status is that of the last command. - exit -} - - -# Determine whether it's possible to make 'echo' print without a newline. -# These variables are no longer used directly by Autoconf, but are AC_SUBSTed -# for compatibility with existing Makefiles. -ECHO_C= ECHO_N= ECHO_T= -case `echo -n x` in #((((( --n*) - case `echo 'xy\c'` in - *c*) ECHO_T=' ';; # ECHO_T is single tab character. - xy) ECHO_C='\c';; - *) echo `echo ksh88 bug on AIX 6.1` > /dev/null - ECHO_T=' ';; - esac;; -*) - ECHO_N='-n';; -esac - -# For backward compatibility with old third-party macros, we provide -# the shell variables $as_echo and $as_echo_n. New code should use -# AS_ECHO(["message"]) and AS_ECHO_N(["message"]), respectively. -as_echo='printf %s\n' -as_echo_n='printf %s' - -rm -f conf$$ conf$$.exe conf$$.file -if test -d conf$$.dir; then - rm -f conf$$.dir/conf$$.file -else - rm -f conf$$.dir - mkdir conf$$.dir 2>/dev/null -fi -if (echo >conf$$.file) 2>/dev/null; then - if ln -s conf$$.file conf$$ 2>/dev/null; then - as_ln_s='ln -s' - # ... but there are two gotchas: - # 1) On MSYS, both 'ln -s file dir' and 'ln file dir' fail. - # 2) DJGPP < 2.04 has no symlinks; 'ln -s' creates a wrapper executable. - # In both cases, we have to default to 'cp -pR'. - ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || - as_ln_s='cp -pR' - elif ln conf$$.file conf$$ 2>/dev/null; then - as_ln_s=ln - else - as_ln_s='cp -pR' - fi -else - as_ln_s='cp -pR' -fi -rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file -rmdir conf$$.dir 2>/dev/null - -if mkdir -p . 2>/dev/null; then - as_mkdir_p='mkdir -p "$as_dir"' -else - test -d ./-p && rmdir ./-p - as_mkdir_p=false -fi - -as_test_x='test -x' -as_executable_p=as_fn_executable_p - -# Sed expression to map a string onto a valid CPP name. -as_sed_cpp="y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g" -as_tr_cpp="eval sed '$as_sed_cpp'" # deprecated - -# Sed expression to map a string onto a valid variable name. -as_sed_sh="y%*+%pp%;s%[^_$as_cr_alnum]%_%g" -as_tr_sh="eval sed '$as_sed_sh'" # deprecated - - -test -n "$DJDIR" || exec 7<&0 &1 - -# Name of the host. -# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, -# so uname gets run too. -ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` - -# -# Initializations. -# -ac_default_prefix=/usr/local -ac_clean_files= -ac_config_libobj_dir=. -LIBOBJS= -cross_compiling=no -subdirs= -MFLAGS= -MAKEFLAGS= - -# Identity of this package. -PACKAGE_NAME='HTSlib' -PACKAGE_TARNAME='htslib' -PACKAGE_VERSION='1.19.1' -PACKAGE_STRING='HTSlib 1.19.1' -PACKAGE_BUGREPORT='samtools-help@lists.sourceforge.net' -PACKAGE_URL='http://www.htslib.org/' - -ac_unique_file="hts.c" -# Factoring default headers for most tests. -ac_includes_default="\ -#include -#ifdef HAVE_STDIO_H -# include -#endif -#ifdef HAVE_STDLIB_H -# include -#endif -#ifdef HAVE_STRING_H -# include -#endif -#ifdef HAVE_INTTYPES_H -# include -#endif -#ifdef HAVE_STDINT_H -# include -#endif -#ifdef HAVE_STRINGS_H -# include -#endif -#ifdef HAVE_SYS_TYPES_H -# include -#endif -#ifdef HAVE_SYS_STAT_H -# include -#endif -#ifdef HAVE_UNISTD_H -# include -#endif" - -ac_header_c_list= -ac_func_c_list= -enable_year2038=no -ac_subst_vars='LTLIBOBJS -LIBOBJS -HTSDIRslash_if_relsrcdir -static_LIBS -static_LDFLAGS -private_LIBS -pc_requires -CRYPTO_LIBS -s3 -gcs -libcurl -PLUGIN_EXT -host_os -host_vendor -host_cpu -host -build_os -build_vendor -build_cpu -build -VERSION_SCRIPT_LDFLAGS -PLATFORM -pluginpath -plugindir -with_external_htscodecs -enable_plugins -PKG_CONFIG_LIBDIR -PKG_CONFIG_PATH -PKG_CONFIG -hts_cflags_avx512 -hts_cflags_avx2 -hts_cflags_sse4 -GREP -RANLIB -OBJEXT -EXEEXT -ac_ct_CC -CPPFLAGS -LDFLAGS -CFLAGS -CC -target_alias -host_alias -build_alias -LIBS -ECHO_T -ECHO_N -ECHO_C -DEFS -mandir -localedir -libdir -psdir -pdfdir -dvidir -htmldir -infodir -docdir -oldincludedir -includedir -runstatedir -localstatedir -sharedstatedir -sysconfdir -datadir -datarootdir -libexecdir -sbindir -bindir -program_transform_name -prefix -exec_prefix -PACKAGE_URL -PACKAGE_BUGREPORT -PACKAGE_STRING -PACKAGE_VERSION -PACKAGE_TARNAME -PACKAGE_NAME -PATH_SEPARATOR -SHELL' -ac_subst_files='' -ac_user_opts=' -enable_option_checking -enable_warnings -enable_werror -enable_versioned_symbols -enable_bz2 -enable_gcs -enable_largefile -enable_libcurl -enable_lzma -enable_plugins -with_external_htscodecs -with_libdeflate -with_plugin_dir -with_plugin_path -enable_s3 -enable_year2038 -' - ac_precious_vars='build_alias -host_alias -target_alias -CC -CFLAGS -LDFLAGS -LIBS -CPPFLAGS -PKG_CONFIG -PKG_CONFIG_PATH -PKG_CONFIG_LIBDIR' - - -# Initialize some variables set by options. -ac_init_help= -ac_init_version=false -ac_unrecognized_opts= -ac_unrecognized_sep= -# The variables have the same names as the options, with -# dashes changed to underlines. -cache_file=/dev/null -exec_prefix=NONE -no_create= -no_recursion= -prefix=NONE -program_prefix=NONE -program_suffix=NONE -program_transform_name=s,x,x, -silent= -site= -srcdir= -verbose= -x_includes=NONE -x_libraries=NONE - -# Installation directory options. -# These are left unexpanded so users can "make install exec_prefix=/foo" -# and all the variables that are supposed to be based on exec_prefix -# by default will actually change. -# Use braces instead of parens because sh, perl, etc. also accept them. -# (The list follows the same order as the GNU Coding Standards.) -bindir='${exec_prefix}/bin' -sbindir='${exec_prefix}/sbin' -libexecdir='${exec_prefix}/libexec' -datarootdir='${prefix}/share' -datadir='${datarootdir}' -sysconfdir='${prefix}/etc' -sharedstatedir='${prefix}/com' -localstatedir='${prefix}/var' -runstatedir='${localstatedir}/run' -includedir='${prefix}/include' -oldincludedir='/usr/include' -docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' -infodir='${datarootdir}/info' -htmldir='${docdir}' -dvidir='${docdir}' -pdfdir='${docdir}' -psdir='${docdir}' -libdir='${exec_prefix}/lib' -localedir='${datarootdir}/locale' -mandir='${datarootdir}/man' - -ac_prev= -ac_dashdash= -for ac_option -do - # If the previous option needs an argument, assign it. - if test -n "$ac_prev"; then - eval $ac_prev=\$ac_option - ac_prev= - continue - fi - - case $ac_option in - *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; - *=) ac_optarg= ;; - *) ac_optarg=yes ;; - esac - - case $ac_dashdash$ac_option in - --) - ac_dashdash=yes ;; - - -bindir | --bindir | --bindi | --bind | --bin | --bi) - ac_prev=bindir ;; - -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) - bindir=$ac_optarg ;; - - -build | --build | --buil | --bui | --bu) - ac_prev=build_alias ;; - -build=* | --build=* | --buil=* | --bui=* | --bu=*) - build_alias=$ac_optarg ;; - - -cache-file | --cache-file | --cache-fil | --cache-fi \ - | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) - ac_prev=cache_file ;; - -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ - | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) - cache_file=$ac_optarg ;; - - --config-cache | -C) - cache_file=config.cache ;; - - -datadir | --datadir | --datadi | --datad) - ac_prev=datadir ;; - -datadir=* | --datadir=* | --datadi=* | --datad=*) - datadir=$ac_optarg ;; - - -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ - | --dataroo | --dataro | --datar) - ac_prev=datarootdir ;; - -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ - | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) - datarootdir=$ac_optarg ;; - - -disable-* | --disable-*) - ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` - # Reject names that are not valid shell variable names. - expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid feature name: '$ac_useropt'" - ac_useropt_orig=$ac_useropt - ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` - case $ac_user_opts in - *" -"enable_$ac_useropt" -"*) ;; - *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" - ac_unrecognized_sep=', ';; - esac - eval enable_$ac_useropt=no ;; - - -docdir | --docdir | --docdi | --doc | --do) - ac_prev=docdir ;; - -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) - docdir=$ac_optarg ;; - - -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) - ac_prev=dvidir ;; - -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) - dvidir=$ac_optarg ;; - - -enable-* | --enable-*) - ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` - # Reject names that are not valid shell variable names. - expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid feature name: '$ac_useropt'" - ac_useropt_orig=$ac_useropt - ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` - case $ac_user_opts in - *" -"enable_$ac_useropt" -"*) ;; - *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" - ac_unrecognized_sep=', ';; - esac - eval enable_$ac_useropt=\$ac_optarg ;; - - -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ - | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ - | --exec | --exe | --ex) - ac_prev=exec_prefix ;; - -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ - | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ - | --exec=* | --exe=* | --ex=*) - exec_prefix=$ac_optarg ;; - - -gas | --gas | --ga | --g) - # Obsolete; use --with-gas. - with_gas=yes ;; - - -help | --help | --hel | --he | -h) - ac_init_help=long ;; - -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) - ac_init_help=recursive ;; - -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) - ac_init_help=short ;; - - -host | --host | --hos | --ho) - ac_prev=host_alias ;; - -host=* | --host=* | --hos=* | --ho=*) - host_alias=$ac_optarg ;; - - -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) - ac_prev=htmldir ;; - -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ - | --ht=*) - htmldir=$ac_optarg ;; - - -includedir | --includedir | --includedi | --included | --include \ - | --includ | --inclu | --incl | --inc) - ac_prev=includedir ;; - -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ - | --includ=* | --inclu=* | --incl=* | --inc=*) - includedir=$ac_optarg ;; - - -infodir | --infodir | --infodi | --infod | --info | --inf) - ac_prev=infodir ;; - -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) - infodir=$ac_optarg ;; - - -libdir | --libdir | --libdi | --libd) - ac_prev=libdir ;; - -libdir=* | --libdir=* | --libdi=* | --libd=*) - libdir=$ac_optarg ;; - - -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ - | --libexe | --libex | --libe) - ac_prev=libexecdir ;; - -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ - | --libexe=* | --libex=* | --libe=*) - libexecdir=$ac_optarg ;; - - -localedir | --localedir | --localedi | --localed | --locale) - ac_prev=localedir ;; - -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) - localedir=$ac_optarg ;; - - -localstatedir | --localstatedir | --localstatedi | --localstated \ - | --localstate | --localstat | --localsta | --localst | --locals) - ac_prev=localstatedir ;; - -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ - | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) - localstatedir=$ac_optarg ;; - - -mandir | --mandir | --mandi | --mand | --man | --ma | --m) - ac_prev=mandir ;; - -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) - mandir=$ac_optarg ;; - - -nfp | --nfp | --nf) - # Obsolete; use --without-fp. - with_fp=no ;; - - -no-create | --no-create | --no-creat | --no-crea | --no-cre \ - | --no-cr | --no-c | -n) - no_create=yes ;; - - -no-recursion | --no-recursion | --no-recursio | --no-recursi \ - | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) - no_recursion=yes ;; - - -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ - | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ - | --oldin | --oldi | --old | --ol | --o) - ac_prev=oldincludedir ;; - -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ - | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ - | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) - oldincludedir=$ac_optarg ;; - - -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) - ac_prev=prefix ;; - -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) - prefix=$ac_optarg ;; - - -program-prefix | --program-prefix | --program-prefi | --program-pref \ - | --program-pre | --program-pr | --program-p) - ac_prev=program_prefix ;; - -program-prefix=* | --program-prefix=* | --program-prefi=* \ - | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) - program_prefix=$ac_optarg ;; - - -program-suffix | --program-suffix | --program-suffi | --program-suff \ - | --program-suf | --program-su | --program-s) - ac_prev=program_suffix ;; - -program-suffix=* | --program-suffix=* | --program-suffi=* \ - | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) - program_suffix=$ac_optarg ;; - - -program-transform-name | --program-transform-name \ - | --program-transform-nam | --program-transform-na \ - | --program-transform-n | --program-transform- \ - | --program-transform | --program-transfor \ - | --program-transfo | --program-transf \ - | --program-trans | --program-tran \ - | --progr-tra | --program-tr | --program-t) - ac_prev=program_transform_name ;; - -program-transform-name=* | --program-transform-name=* \ - | --program-transform-nam=* | --program-transform-na=* \ - | --program-transform-n=* | --program-transform-=* \ - | --program-transform=* | --program-transfor=* \ - | --program-transfo=* | --program-transf=* \ - | --program-trans=* | --program-tran=* \ - | --progr-tra=* | --program-tr=* | --program-t=*) - program_transform_name=$ac_optarg ;; - - -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) - ac_prev=pdfdir ;; - -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) - pdfdir=$ac_optarg ;; - - -psdir | --psdir | --psdi | --psd | --ps) - ac_prev=psdir ;; - -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) - psdir=$ac_optarg ;; - - -q | -quiet | --quiet | --quie | --qui | --qu | --q \ - | -silent | --silent | --silen | --sile | --sil) - silent=yes ;; - - -runstatedir | --runstatedir | --runstatedi | --runstated \ - | --runstate | --runstat | --runsta | --runst | --runs \ - | --run | --ru | --r) - ac_prev=runstatedir ;; - -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \ - | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \ - | --run=* | --ru=* | --r=*) - runstatedir=$ac_optarg ;; - - -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) - ac_prev=sbindir ;; - -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ - | --sbi=* | --sb=*) - sbindir=$ac_optarg ;; - - -sharedstatedir | --sharedstatedir | --sharedstatedi \ - | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ - | --sharedst | --shareds | --shared | --share | --shar \ - | --sha | --sh) - ac_prev=sharedstatedir ;; - -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ - | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ - | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ - | --sha=* | --sh=*) - sharedstatedir=$ac_optarg ;; - - -site | --site | --sit) - ac_prev=site ;; - -site=* | --site=* | --sit=*) - site=$ac_optarg ;; - - -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) - ac_prev=srcdir ;; - -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) - srcdir=$ac_optarg ;; - - -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ - | --syscon | --sysco | --sysc | --sys | --sy) - ac_prev=sysconfdir ;; - -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ - | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) - sysconfdir=$ac_optarg ;; - - -target | --target | --targe | --targ | --tar | --ta | --t) - ac_prev=target_alias ;; - -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) - target_alias=$ac_optarg ;; - - -v | -verbose | --verbose | --verbos | --verbo | --verb) - verbose=yes ;; - - -version | --version | --versio | --versi | --vers | -V) - ac_init_version=: ;; - - -with-* | --with-*) - ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` - # Reject names that are not valid shell variable names. - expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid package name: '$ac_useropt'" - ac_useropt_orig=$ac_useropt - ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` - case $ac_user_opts in - *" -"with_$ac_useropt" -"*) ;; - *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" - ac_unrecognized_sep=', ';; - esac - eval with_$ac_useropt=\$ac_optarg ;; - - -without-* | --without-*) - ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` - # Reject names that are not valid shell variable names. - expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid package name: '$ac_useropt'" - ac_useropt_orig=$ac_useropt - ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` - case $ac_user_opts in - *" -"with_$ac_useropt" -"*) ;; - *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" - ac_unrecognized_sep=', ';; - esac - eval with_$ac_useropt=no ;; - - --x) - # Obsolete; use --with-x. - with_x=yes ;; - - -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ - | --x-incl | --x-inc | --x-in | --x-i) - ac_prev=x_includes ;; - -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ - | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) - x_includes=$ac_optarg ;; - - -x-libraries | --x-libraries | --x-librarie | --x-librari \ - | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) - ac_prev=x_libraries ;; - -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ - | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) - x_libraries=$ac_optarg ;; - - -*) as_fn_error $? "unrecognized option: '$ac_option' -Try '$0 --help' for more information" - ;; - - *=*) - ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` - # Reject names that are not valid shell variable names. - case $ac_envvar in #( - '' | [0-9]* | *[!_$as_cr_alnum]* ) - as_fn_error $? "invalid variable name: '$ac_envvar'" ;; - esac - eval $ac_envvar=\$ac_optarg - export $ac_envvar ;; - - *) - # FIXME: should be removed in autoconf 3.0. - printf "%s\n" "$as_me: WARNING: you should use --build, --host, --target" >&2 - expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && - printf "%s\n" "$as_me: WARNING: invalid host type: $ac_option" >&2 - : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" - ;; - - esac -done - -if test -n "$ac_prev"; then - ac_option=--`echo $ac_prev | sed 's/_/-/g'` - as_fn_error $? "missing argument to $ac_option" -fi - -if test -n "$ac_unrecognized_opts"; then - case $enable_option_checking in - no) ;; - fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; - *) printf "%s\n" "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; - esac -fi - -# Check all directory arguments for consistency. -for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ - datadir sysconfdir sharedstatedir localstatedir includedir \ - oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ - libdir localedir mandir runstatedir -do - eval ac_val=\$$ac_var - # Remove trailing slashes. - case $ac_val in - */ ) - ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` - eval $ac_var=\$ac_val;; - esac - # Be sure to have absolute directory names. - case $ac_val in - [\\/$]* | ?:[\\/]* ) continue;; - NONE | '' ) case $ac_var in *prefix ) continue;; esac;; - esac - as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" -done - -# There might be people who depend on the old broken behavior: '$host' -# used to hold the argument of --host etc. -# FIXME: To remove some day. -build=$build_alias -host=$host_alias -target=$target_alias - -# FIXME: To remove some day. -if test "x$host_alias" != x; then - if test "x$build_alias" = x; then - cross_compiling=maybe - elif test "x$build_alias" != "x$host_alias"; then - cross_compiling=yes - fi -fi - -ac_tool_prefix= -test -n "$host_alias" && ac_tool_prefix=$host_alias- - -test "$silent" = yes && exec 6>/dev/null - - -ac_pwd=`pwd` && test -n "$ac_pwd" && -ac_ls_di=`ls -di .` && -ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || - as_fn_error $? "working directory cannot be determined" -test "X$ac_ls_di" = "X$ac_pwd_ls_di" || - as_fn_error $? "pwd does not report name of working directory" - - -# Find the source files, if location was not specified. -if test -z "$srcdir"; then - ac_srcdir_defaulted=yes - # Try the directory containing this script, then the parent directory. - ac_confdir=`$as_dirname -- "$as_myself" || -$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ - X"$as_myself" : 'X\(//\)[^/]' \| \ - X"$as_myself" : 'X\(//\)$' \| \ - X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || -printf "%s\n" X"$as_myself" | - sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ - s//\1/ - q - } - /^X\(\/\/\)[^/].*/{ - s//\1/ - q - } - /^X\(\/\/\)$/{ - s//\1/ - q - } - /^X\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - srcdir=$ac_confdir - if test ! -r "$srcdir/$ac_unique_file"; then - srcdir=.. - fi -else - ac_srcdir_defaulted=no -fi -if test ! -r "$srcdir/$ac_unique_file"; then - test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." - as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" -fi -ac_msg="sources are in $srcdir, but 'cd $srcdir' does not work" -ac_abs_confdir=`( - cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" - pwd)` -# When building in place, set srcdir=. -if test "$ac_abs_confdir" = "$ac_pwd"; then - srcdir=. -fi -# Remove unnecessary trailing slashes from srcdir. -# Double slashes in file names in object file debugging info -# mess up M-x gdb in Emacs. -case $srcdir in -*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; -esac -for ac_var in $ac_precious_vars; do - eval ac_env_${ac_var}_set=\${${ac_var}+set} - eval ac_env_${ac_var}_value=\$${ac_var} - eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} - eval ac_cv_env_${ac_var}_value=\$${ac_var} -done - -# -# Report the --help message. -# -if test "$ac_init_help" = "long"; then - # Omit some internal or obsolete options to make the list less imposing. - # This message is too long to be a string in the A/UX 3.1 sh. - cat <<_ACEOF -'configure' configures HTSlib 1.19.1 to adapt to many kinds of systems. - -Usage: $0 [OPTION]... [VAR=VALUE]... - -To assign environment variables (e.g., CC, CFLAGS...), specify them as -VAR=VALUE. See below for descriptions of some of the useful variables. - -Defaults for the options are specified in brackets. - -Configuration: - -h, --help display this help and exit - --help=short display options specific to this package - --help=recursive display the short help of all the included packages - -V, --version display version information and exit - -q, --quiet, --silent do not print 'checking ...' messages - --cache-file=FILE cache test results in FILE [disabled] - -C, --config-cache alias for '--cache-file=config.cache' - -n, --no-create do not create output files - --srcdir=DIR find the sources in DIR [configure dir or '..'] - -Installation directories: - --prefix=PREFIX install architecture-independent files in PREFIX - [$ac_default_prefix] - --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX - [PREFIX] - -By default, 'make install' will install all the files in -'$ac_default_prefix/bin', '$ac_default_prefix/lib' etc. You can specify -an installation prefix other than '$ac_default_prefix' using '--prefix', -for instance '--prefix=\$HOME'. - -For better control, use the options below. - -Fine tuning of the installation directories: - --bindir=DIR user executables [EPREFIX/bin] - --sbindir=DIR system admin executables [EPREFIX/sbin] - --libexecdir=DIR program executables [EPREFIX/libexec] - --sysconfdir=DIR read-only single-machine data [PREFIX/etc] - --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] - --localstatedir=DIR modifiable single-machine data [PREFIX/var] - --runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run] - --libdir=DIR object code libraries [EPREFIX/lib] - --includedir=DIR C header files [PREFIX/include] - --oldincludedir=DIR C header files for non-gcc [/usr/include] - --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] - --datadir=DIR read-only architecture-independent data [DATAROOTDIR] - --infodir=DIR info documentation [DATAROOTDIR/info] - --localedir=DIR locale-dependent data [DATAROOTDIR/locale] - --mandir=DIR man documentation [DATAROOTDIR/man] - --docdir=DIR documentation root [DATAROOTDIR/doc/htslib] - --htmldir=DIR html documentation [DOCDIR] - --dvidir=DIR dvi documentation [DOCDIR] - --pdfdir=DIR pdf documentation [DOCDIR] - --psdir=DIR ps documentation [DOCDIR] -_ACEOF - - cat <<\_ACEOF - -System types: - --build=BUILD configure for building on BUILD [guessed] - --host=HOST cross-compile to build programs to run on HOST [BUILD] -_ACEOF -fi - -if test -n "$ac_init_help"; then - case $ac_init_help in - short | recursive ) echo "Configuration of HTSlib 1.19.1:";; - esac - cat <<\_ACEOF - -Optional Features: - --disable-option-checking ignore unrecognized --enable/--with options - --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) - --enable-FEATURE[=ARG] include FEATURE [ARG=yes] - --disable-warnings turn off compiler warnings - --enable-werror change warnings into errors, where supported - --disable-versioned-symbols - disable versioned symbols in shared library - --disable-bz2 omit support for BZ2-compressed CRAM files - --enable-gcs support Google Cloud Storage URLs - --disable-largefile omit support for large files - --enable-libcurl enable libcurl-based support for http/https/etc URLs - --disable-lzma omit support for LZMA-compressed CRAM files - --enable-plugins enable separately-compiled plugins for file access - --enable-s3 support Amazon AWS S3 URLs - --enable-year2038 support timestamps after 2038 - -Optional Packages: - --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] - --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) - --with-external-htscodecs - get htscodecs functions from a shared library - --with-libdeflate use libdeflate for faster crc and deflate algorithms - --with-plugin-dir=DIR plugin installation location [LIBEXECDIR/htslib] - --with-plugin-path=PATH default HTS_PATH plugin search path [PLUGINDIR] - -Some influential environment variables: - CC C compiler command - CFLAGS C compiler flags - LDFLAGS linker flags, e.g. -L if you have libraries in a - nonstandard directory - LIBS libraries to pass to the linker, e.g. -l - CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if - you have headers in a nonstandard directory - PKG_CONFIG path to pkg-config utility - PKG_CONFIG_PATH - directories to add to pkg-config's search path - PKG_CONFIG_LIBDIR - path overriding pkg-config's built-in search path - -Use these variables to override the choices made by 'configure' or to help -it to find libraries and programs with nonstandard names/locations. - -Report bugs to . -HTSlib home page: . -_ACEOF -ac_status=$? -fi - -if test "$ac_init_help" = "recursive"; then - # If there are subdirs, report their specific --help. - for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue - test -d "$ac_dir" || - { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || - continue - ac_builddir=. - -case "$ac_dir" in -.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; -*) - ac_dir_suffix=/`printf "%s\n" "$ac_dir" | sed 's|^\.[\\/]||'` - # A ".." for each directory in $ac_dir_suffix. - ac_top_builddir_sub=`printf "%s\n" "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` - case $ac_top_builddir_sub in - "") ac_top_builddir_sub=. ac_top_build_prefix= ;; - *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; - esac ;; -esac -ac_abs_top_builddir=$ac_pwd -ac_abs_builddir=$ac_pwd$ac_dir_suffix -# for backward compatibility: -ac_top_builddir=$ac_top_build_prefix - -case $srcdir in - .) # We are building in place. - ac_srcdir=. - ac_top_srcdir=$ac_top_builddir_sub - ac_abs_top_srcdir=$ac_pwd ;; - [\\/]* | ?:[\\/]* ) # Absolute name. - ac_srcdir=$srcdir$ac_dir_suffix; - ac_top_srcdir=$srcdir - ac_abs_top_srcdir=$srcdir ;; - *) # Relative name. - ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix - ac_top_srcdir=$ac_top_build_prefix$srcdir - ac_abs_top_srcdir=$ac_pwd/$srcdir ;; -esac -ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix - - cd "$ac_dir" || { ac_status=$?; continue; } - # Check for configure.gnu first; this name is used for a wrapper for - # Metaconfig's "Configure" on case-insensitive file systems. - if test -f "$ac_srcdir/configure.gnu"; then - echo && - $SHELL "$ac_srcdir/configure.gnu" --help=recursive - elif test -f "$ac_srcdir/configure"; then - echo && - $SHELL "$ac_srcdir/configure" --help=recursive - else - printf "%s\n" "$as_me: WARNING: no configuration information is in $ac_dir" >&2 - fi || ac_status=$? - cd "$ac_pwd" || { ac_status=$?; break; } - done -fi - -test -n "$ac_init_help" && exit $ac_status -if $ac_init_version; then - cat <<\_ACEOF -HTSlib configure 1.19.1 -generated by GNU Autoconf 2.72 - -Copyright (C) 2023 Free Software Foundation, Inc. -This configure script is free software; the Free Software Foundation -gives unlimited permission to copy, distribute and modify it. - -Portions copyright (C) 2020-2023 Genome Research Ltd. - -This configure script is free software: you are free to change and -redistribute it. There is NO WARRANTY, to the extent permitted by law. -_ACEOF - exit -fi - -## ------------------------ ## -## Autoconf initialization. ## -## ------------------------ ## - -# ac_fn_c_try_compile LINENO -# -------------------------- -# Try to compile conftest.$ac_ext, and return whether this succeeded. -ac_fn_c_try_compile () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - rm -f conftest.$ac_objext conftest.beam - if { { ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -printf "%s\n" "$ac_try_echo"; } >&5 - (eval "$ac_compile") 2>conftest.err - ac_status=$? - if test -s conftest.err; then - grep -v '^ *+' conftest.err >conftest.er1 - cat conftest.er1 >&5 - mv -f conftest.er1 conftest.err - fi - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext -then : - ac_retval=0 -else case e in #( - e) printf "%s\n" "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_retval=1 ;; -esac -fi - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - as_fn_set_status $ac_retval - -} # ac_fn_c_try_compile - -# ac_fn_check_decl LINENO SYMBOL VAR INCLUDES EXTRA-OPTIONS FLAG-VAR -# ------------------------------------------------------------------ -# Tests whether SYMBOL is declared in INCLUDES, setting cache variable VAR -# accordingly. Pass EXTRA-OPTIONS to the compiler, using FLAG-VAR. -ac_fn_check_decl () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - as_decl_name=`echo $2|sed 's/ *(.*//'` - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $as_decl_name is declared" >&5 -printf %s "checking whether $as_decl_name is declared... " >&6; } -if eval test \${$3+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) as_decl_use=`echo $2|sed -e 's/(/((/' -e 's/)/) 0&/' -e 's/,/) 0& (/g'` - eval ac_save_FLAGS=\$$6 - as_fn_append $6 " $5" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$4 -int -main (void) -{ -#ifndef $as_decl_name -#ifdef __cplusplus - (void) $as_decl_use; -#else - (void) $as_decl_name; -#endif -#endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - eval "$3=yes" -else case e in #( - e) eval "$3=no" ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - eval $6=\$ac_save_FLAGS - ;; -esac -fi -eval ac_res=\$$3 - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 -printf "%s\n" "$ac_res" >&6; } - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - -} # ac_fn_check_decl - -# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES -# ------------------------------------------------------- -# Tests whether HEADER exists and can be compiled using the include files in -# INCLUDES, setting the cache variable VAR accordingly. -ac_fn_c_check_header_compile () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 -printf %s "checking for $2... " >&6; } -if eval test \${$3+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$4 -#include <$2> -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - eval "$3=yes" -else case e in #( - e) eval "$3=no" ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; -esac -fi -eval ac_res=\$$3 - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 -printf "%s\n" "$ac_res" >&6; } - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - -} # ac_fn_c_check_header_compile - -# ac_fn_c_try_link LINENO -# ----------------------- -# Try to link conftest.$ac_ext, and return whether this succeeded. -ac_fn_c_try_link () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - rm -f conftest.$ac_objext conftest.beam conftest$ac_exeext - if { { ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -printf "%s\n" "$ac_try_echo"; } >&5 - (eval "$ac_link") 2>conftest.err - ac_status=$? - if test -s conftest.err; then - grep -v '^ *+' conftest.err >conftest.er1 - cat conftest.er1 >&5 - mv -f conftest.er1 conftest.err - fi - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && { - test "$cross_compiling" = yes || - test -x conftest$ac_exeext - } -then : - ac_retval=0 -else case e in #( - e) printf "%s\n" "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_retval=1 ;; -esac -fi - # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information - # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would - # interfere with the next link command; also delete a directory that is - # left behind by Apple's compiler. We do this before executing the actions. - rm -rf conftest.dSYM conftest_ipa8_conftest.oo - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - as_fn_set_status $ac_retval - -} # ac_fn_c_try_link - -# ac_fn_c_check_func LINENO FUNC VAR -# ---------------------------------- -# Tests whether FUNC exists, setting the cache variable VAR accordingly -ac_fn_c_check_func () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 -printf %s "checking for $2... " >&6; } -if eval test \${$3+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -/* Define $2 to an innocuous variant, in case declares $2. - For example, HP-UX 11i declares gettimeofday. */ -#define $2 innocuous_$2 - -/* System header to define __stub macros and hopefully few prototypes, - which can conflict with char $2 (void); below. */ - -#include -#undef $2 - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char $2 (void); -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined __stub_$2 || defined __stub___$2 -choke me -#endif - -int -main (void) -{ -return $2 (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" -then : - eval "$3=yes" -else case e in #( - e) eval "$3=no" ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext ;; -esac -fi -eval ac_res=\$$3 - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 -printf "%s\n" "$ac_res" >&6; } - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - -} # ac_fn_c_check_func - -# ac_fn_c_try_run LINENO -# ---------------------- -# Try to run conftest.$ac_ext, and return whether this succeeded. Assumes that -# executables *can* be run. -ac_fn_c_try_run () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - if { { ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -printf "%s\n" "$ac_try_echo"; } >&5 - (eval "$ac_link") 2>&5 - ac_status=$? - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' - { { case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -printf "%s\n" "$ac_try_echo"; } >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; } -then : - ac_retval=0 -else case e in #( - e) printf "%s\n" "$as_me: program exited with status $ac_status" >&5 - printf "%s\n" "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_retval=$ac_status ;; -esac -fi - rm -rf conftest.dSYM conftest_ipa8_conftest.oo - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - as_fn_set_status $ac_retval - -} # ac_fn_c_try_run -ac_configure_args_raw= -for ac_arg -do - case $ac_arg in - *\'*) - ac_arg=`printf "%s\n" "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; - esac - as_fn_append ac_configure_args_raw " '$ac_arg'" -done - -case $ac_configure_args_raw in - *$as_nl*) - ac_safe_unquote= ;; - *) - ac_unsafe_z='|&;<>()$`\\"*?[ '' ' # This string ends in space, tab. - ac_unsafe_a="$ac_unsafe_z#~" - ac_safe_unquote="s/ '\\([^$ac_unsafe_a][^$ac_unsafe_z]*\\)'/ \\1/g" - ac_configure_args_raw=` printf "%s\n" "$ac_configure_args_raw" | sed "$ac_safe_unquote"`;; -esac - -cat >config.log <<_ACEOF -This file contains any messages produced by compilers while -running configure, to aid debugging if configure makes a mistake. - -It was created by HTSlib $as_me 1.19.1, which was -generated by GNU Autoconf 2.72. Invocation command line was - - $ $0$ac_configure_args_raw - -_ACEOF -exec 5>>config.log -{ -cat <<_ASUNAME -## --------- ## -## Platform. ## -## --------- ## - -hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` -uname -m = `(uname -m) 2>/dev/null || echo unknown` -uname -r = `(uname -r) 2>/dev/null || echo unknown` -uname -s = `(uname -s) 2>/dev/null || echo unknown` -uname -v = `(uname -v) 2>/dev/null || echo unknown` - -/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` -/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` - -/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` -/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` -/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` -/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` -/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` -/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` -/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` - -_ASUNAME - -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - case $as_dir in #((( - '') as_dir=./ ;; - */) ;; - *) as_dir=$as_dir/ ;; - esac - printf "%s\n" "PATH: $as_dir" - done -IFS=$as_save_IFS - -} >&5 - -cat >&5 <<_ACEOF - - -## ----------- ## -## Core tests. ## -## ----------- ## - -_ACEOF - - -# Keep a trace of the command line. -# Strip out --no-create and --no-recursion so they do not pile up. -# Strip out --silent because we don't want to record it for future runs. -# Also quote any args containing shell meta-characters. -# Make two passes to allow for proper duplicate-argument suppression. -ac_configure_args= -ac_configure_args0= -ac_configure_args1= -ac_must_keep_next=false -for ac_pass in 1 2 -do - for ac_arg - do - case $ac_arg in - -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; - -q | -quiet | --quiet | --quie | --qui | --qu | --q \ - | -silent | --silent | --silen | --sile | --sil) - continue ;; - *\'*) - ac_arg=`printf "%s\n" "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; - esac - case $ac_pass in - 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; - 2) - as_fn_append ac_configure_args1 " '$ac_arg'" - if test $ac_must_keep_next = true; then - ac_must_keep_next=false # Got value, back to normal. - else - case $ac_arg in - *=* | --config-cache | -C | -disable-* | --disable-* \ - | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ - | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ - | -with-* | --with-* | -without-* | --without-* | --x) - case "$ac_configure_args0 " in - "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; - esac - ;; - -* ) ac_must_keep_next=true ;; - esac - fi - as_fn_append ac_configure_args " '$ac_arg'" - ;; - esac - done -done -{ ac_configure_args0=; unset ac_configure_args0;} -{ ac_configure_args1=; unset ac_configure_args1;} - -# When interrupted or exit'd, cleanup temporary files, and complete -# config.log. We remove comments because anyway the quotes in there -# would cause problems or look ugly. -# WARNING: Use '\'' to represent an apostrophe within the trap. -# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. -trap 'exit_status=$? - # Sanitize IFS. - IFS=" "" $as_nl" - # Save into config.log some information that might help in debugging. - { - echo - - printf "%s\n" "## ---------------- ## -## Cache variables. ## -## ---------------- ##" - echo - # The following way of writing the cache mishandles newlines in values, -( - for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do - eval ac_val=\$$ac_var - case $ac_val in #( - *${as_nl}*) - case $ac_var in #( - *_cv_*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 -printf "%s\n" "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; - esac - case $ac_var in #( - _ | IFS | as_nl) ;; #( - BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( - *) { eval $ac_var=; unset $ac_var;} ;; - esac ;; - esac - done - (set) 2>&1 | - case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( - *${as_nl}ac_space=\ *) - sed -n \ - "s/'\''/'\''\\\\'\'''\''/g; - s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" - ;; #( - *) - sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" - ;; - esac | - sort -) - echo - - printf "%s\n" "## ----------------- ## -## Output variables. ## -## ----------------- ##" - echo - for ac_var in $ac_subst_vars - do - eval ac_val=\$$ac_var - case $ac_val in - *\'\''*) ac_val=`printf "%s\n" "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; - esac - printf "%s\n" "$ac_var='\''$ac_val'\''" - done | sort - echo - - if test -n "$ac_subst_files"; then - printf "%s\n" "## ------------------- ## -## File substitutions. ## -## ------------------- ##" - echo - for ac_var in $ac_subst_files - do - eval ac_val=\$$ac_var - case $ac_val in - *\'\''*) ac_val=`printf "%s\n" "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; - esac - printf "%s\n" "$ac_var='\''$ac_val'\''" - done | sort - echo - fi - - if test -s confdefs.h; then - printf "%s\n" "## ----------- ## -## confdefs.h. ## -## ----------- ##" - echo - cat confdefs.h - echo - fi - test "$ac_signal" != 0 && - printf "%s\n" "$as_me: caught signal $ac_signal" - printf "%s\n" "$as_me: exit $exit_status" - } >&5 - rm -f core *.core core.conftest.* && - rm -f -r conftest* confdefs* conf$$* $ac_clean_files && - exit $exit_status -' 0 -for ac_signal in 1 2 13 15; do - trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal -done -ac_signal=0 - -# confdefs.h avoids OS command line length limits that DEFS can exceed. -rm -f -r conftest* confdefs.h - -printf "%s\n" "/* confdefs.h */" > confdefs.h - -# Predefined preprocessor variables. - -printf "%s\n" "#define PACKAGE_NAME \"$PACKAGE_NAME\"" >>confdefs.h - -printf "%s\n" "#define PACKAGE_TARNAME \"$PACKAGE_TARNAME\"" >>confdefs.h - -printf "%s\n" "#define PACKAGE_VERSION \"$PACKAGE_VERSION\"" >>confdefs.h - -printf "%s\n" "#define PACKAGE_STRING \"$PACKAGE_STRING\"" >>confdefs.h - -printf "%s\n" "#define PACKAGE_BUGREPORT \"$PACKAGE_BUGREPORT\"" >>confdefs.h - -printf "%s\n" "#define PACKAGE_URL \"$PACKAGE_URL\"" >>confdefs.h - - -# Let the site file select an alternate cache file if it wants to. -# Prefer an explicitly selected file to automatically selected ones. -if test -n "$CONFIG_SITE"; then - ac_site_files="$CONFIG_SITE" -elif test "x$prefix" != xNONE; then - ac_site_files="$prefix/share/config.site $prefix/etc/config.site" -else - ac_site_files="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" -fi - -for ac_site_file in $ac_site_files -do - case $ac_site_file in #( - */*) : - ;; #( - *) : - ac_site_file=./$ac_site_file ;; -esac - if test -f "$ac_site_file" && test -r "$ac_site_file"; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 -printf "%s\n" "$as_me: loading site script $ac_site_file" >&6;} - sed 's/^/| /' "$ac_site_file" >&5 - . "$ac_site_file" \ - || { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} -as_fn_error $? "failed to load site script $ac_site_file -See 'config.log' for more details" "$LINENO" 5; } - fi -done - -if test -r "$cache_file"; then - # Some versions of bash will fail to source /dev/null (special files - # actually), so we avoid doing that. DJGPP emulates it as a regular file. - if test /dev/null != "$cache_file" && test -f "$cache_file"; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 -printf "%s\n" "$as_me: loading cache $cache_file" >&6;} - case $cache_file in - [\\/]* | ?:[\\/]* ) . "$cache_file";; - *) . "./$cache_file";; - esac - fi -else - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 -printf "%s\n" "$as_me: creating cache $cache_file" >&6;} - >$cache_file -fi - -# Test code for whether the C compiler supports C89 (global declarations) -ac_c_conftest_c89_globals=' -/* Does the compiler advertise C89 conformance? - Do not test the value of __STDC__, because some compilers set it to 0 - while being otherwise adequately conformant. */ -#if !defined __STDC__ -# error "Compiler does not advertise C89 conformance" -#endif - -#include -#include -struct stat; -/* Most of the following tests are stolen from RCS 5.7 src/conf.sh. */ -struct buf { int x; }; -struct buf * (*rcsopen) (struct buf *, struct stat *, int); -static char *e (char **p, int i) -{ - return p[i]; -} -static char *f (char * (*g) (char **, int), char **p, ...) -{ - char *s; - va_list v; - va_start (v,p); - s = g (p, va_arg (v,int)); - va_end (v); - return s; -} - -/* C89 style stringification. */ -#define noexpand_stringify(a) #a -const char *stringified = noexpand_stringify(arbitrary+token=sequence); - -/* C89 style token pasting. Exercises some of the corner cases that - e.g. old MSVC gets wrong, but not very hard. */ -#define noexpand_concat(a,b) a##b -#define expand_concat(a,b) noexpand_concat(a,b) -extern int vA; -extern int vbee; -#define aye A -#define bee B -int *pvA = &expand_concat(v,aye); -int *pvbee = &noexpand_concat(v,bee); - -/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has - function prototypes and stuff, but not \xHH hex character constants. - These do not provoke an error unfortunately, instead are silently treated - as an "x". The following induces an error, until -std is added to get - proper ANSI mode. Curiously \x00 != x always comes out true, for an - array size at least. It is necessary to write \x00 == 0 to get something - that is true only with -std. */ -int osf4_cc_array ['\''\x00'\'' == 0 ? 1 : -1]; - -/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters - inside strings and character constants. */ -#define FOO(x) '\''x'\'' -int xlc6_cc_array[FOO(a) == '\''x'\'' ? 1 : -1]; - -int test (int i, double x); -struct s1 {int (*f) (int a);}; -struct s2 {int (*f) (double a);}; -int pairnames (int, char **, int *(*)(struct buf *, struct stat *, int), - int, int);' - -# Test code for whether the C compiler supports C89 (body of main). -ac_c_conftest_c89_main=' -ok |= (argc == 0 || f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]); -' - -# Test code for whether the C compiler supports C99 (global declarations) -ac_c_conftest_c99_globals=' -/* Does the compiler advertise C99 conformance? */ -#if !defined __STDC_VERSION__ || __STDC_VERSION__ < 199901L -# error "Compiler does not advertise C99 conformance" -#endif - -// See if C++-style comments work. - -#include -extern int puts (const char *); -extern int printf (const char *, ...); -extern int dprintf (int, const char *, ...); -extern void *malloc (size_t); -extern void free (void *); - -// Check varargs macros. These examples are taken from C99 6.10.3.5. -// dprintf is used instead of fprintf to avoid needing to declare -// FILE and stderr. -#define debug(...) dprintf (2, __VA_ARGS__) -#define showlist(...) puts (#__VA_ARGS__) -#define report(test,...) ((test) ? puts (#test) : printf (__VA_ARGS__)) -static void -test_varargs_macros (void) -{ - int x = 1234; - int y = 5678; - debug ("Flag"); - debug ("X = %d\n", x); - showlist (The first, second, and third items.); - report (x>y, "x is %d but y is %d", x, y); -} - -// Check long long types. -#define BIG64 18446744073709551615ull -#define BIG32 4294967295ul -#define BIG_OK (BIG64 / BIG32 == 4294967297ull && BIG64 % BIG32 == 0) -#if !BIG_OK - #error "your preprocessor is broken" -#endif -#if BIG_OK -#else - #error "your preprocessor is broken" -#endif -static long long int bignum = -9223372036854775807LL; -static unsigned long long int ubignum = BIG64; - -struct incomplete_array -{ - int datasize; - double data[]; -}; - -struct named_init { - int number; - const wchar_t *name; - double average; -}; - -typedef const char *ccp; - -static inline int -test_restrict (ccp restrict text) -{ - // Iterate through items via the restricted pointer. - // Also check for declarations in for loops. - for (unsigned int i = 0; *(text+i) != '\''\0'\''; ++i) - continue; - return 0; -} - -// Check varargs and va_copy. -static bool -test_varargs (const char *format, ...) -{ - va_list args; - va_start (args, format); - va_list args_copy; - va_copy (args_copy, args); - - const char *str = ""; - int number = 0; - float fnumber = 0; - - while (*format) - { - switch (*format++) - { - case '\''s'\'': // string - str = va_arg (args_copy, const char *); - break; - case '\''d'\'': // int - number = va_arg (args_copy, int); - break; - case '\''f'\'': // float - fnumber = va_arg (args_copy, double); - break; - default: - break; - } - } - va_end (args_copy); - va_end (args); - - return *str && number && fnumber; -} -' - -# Test code for whether the C compiler supports C99 (body of main). -ac_c_conftest_c99_main=' - // Check bool. - _Bool success = false; - success |= (argc != 0); - - // Check restrict. - if (test_restrict ("String literal") == 0) - success = true; - char *restrict newvar = "Another string"; - - // Check varargs. - success &= test_varargs ("s, d'\'' f .", "string", 65, 34.234); - test_varargs_macros (); - - // Check flexible array members. - struct incomplete_array *ia = - malloc (sizeof (struct incomplete_array) + (sizeof (double) * 10)); - ia->datasize = 10; - for (int i = 0; i < ia->datasize; ++i) - ia->data[i] = i * 1.234; - // Work around memory leak warnings. - free (ia); - - // Check named initializers. - struct named_init ni = { - .number = 34, - .name = L"Test wide string", - .average = 543.34343, - }; - - ni.number = 58; - - int dynamic_array[ni.number]; - dynamic_array[0] = argv[0][0]; - dynamic_array[ni.number - 1] = 543; - - // work around unused variable warnings - ok |= (!success || bignum == 0LL || ubignum == 0uLL || newvar[0] == '\''x'\'' - || dynamic_array[ni.number - 1] != 543); -' - -# Test code for whether the C compiler supports C11 (global declarations) -ac_c_conftest_c11_globals=' -/* Does the compiler advertise C11 conformance? */ -#if !defined __STDC_VERSION__ || __STDC_VERSION__ < 201112L -# error "Compiler does not advertise C11 conformance" -#endif - -// Check _Alignas. -char _Alignas (double) aligned_as_double; -char _Alignas (0) no_special_alignment; -extern char aligned_as_int; -char _Alignas (0) _Alignas (int) aligned_as_int; - -// Check _Alignof. -enum -{ - int_alignment = _Alignof (int), - int_array_alignment = _Alignof (int[100]), - char_alignment = _Alignof (char) -}; -_Static_assert (0 < -_Alignof (int), "_Alignof is signed"); - -// Check _Noreturn. -int _Noreturn does_not_return (void) { for (;;) continue; } - -// Check _Static_assert. -struct test_static_assert -{ - int x; - _Static_assert (sizeof (int) <= sizeof (long int), - "_Static_assert does not work in struct"); - long int y; -}; - -// Check UTF-8 literals. -#define u8 syntax error! -char const utf8_literal[] = u8"happens to be ASCII" "another string"; - -// Check duplicate typedefs. -typedef long *long_ptr; -typedef long int *long_ptr; -typedef long_ptr long_ptr; - -// Anonymous structures and unions -- taken from C11 6.7.2.1 Example 1. -struct anonymous -{ - union { - struct { int i; int j; }; - struct { int k; long int l; } w; - }; - int m; -} v1; -' - -# Test code for whether the C compiler supports C11 (body of main). -ac_c_conftest_c11_main=' - _Static_assert ((offsetof (struct anonymous, i) - == offsetof (struct anonymous, w.k)), - "Anonymous union alignment botch"); - v1.i = 2; - v1.w.k = 5; - ok |= v1.i != 5; -' - -# Test code for whether the C compiler supports C11 (complete). -ac_c_conftest_c11_program="${ac_c_conftest_c89_globals} -${ac_c_conftest_c99_globals} -${ac_c_conftest_c11_globals} - -int -main (int argc, char **argv) -{ - int ok = 0; - ${ac_c_conftest_c89_main} - ${ac_c_conftest_c99_main} - ${ac_c_conftest_c11_main} - return ok; -} -" - -# Test code for whether the C compiler supports C99 (complete). -ac_c_conftest_c99_program="${ac_c_conftest_c89_globals} -${ac_c_conftest_c99_globals} - -int -main (int argc, char **argv) -{ - int ok = 0; - ${ac_c_conftest_c89_main} - ${ac_c_conftest_c99_main} - return ok; -} -" - -# Test code for whether the C compiler supports C89 (complete). -ac_c_conftest_c89_program="${ac_c_conftest_c89_globals} - -int -main (int argc, char **argv) -{ - int ok = 0; - ${ac_c_conftest_c89_main} - return ok; -} -" - -as_fn_append ac_header_c_list " stdio.h stdio_h HAVE_STDIO_H" -as_fn_append ac_header_c_list " stdlib.h stdlib_h HAVE_STDLIB_H" -as_fn_append ac_header_c_list " string.h string_h HAVE_STRING_H" -as_fn_append ac_header_c_list " inttypes.h inttypes_h HAVE_INTTYPES_H" -as_fn_append ac_header_c_list " stdint.h stdint_h HAVE_STDINT_H" -as_fn_append ac_header_c_list " strings.h strings_h HAVE_STRINGS_H" -as_fn_append ac_header_c_list " sys/stat.h sys_stat_h HAVE_SYS_STAT_H" -as_fn_append ac_header_c_list " sys/types.h sys_types_h HAVE_SYS_TYPES_H" -as_fn_append ac_header_c_list " unistd.h unistd_h HAVE_UNISTD_H" -as_fn_append ac_header_c_list " sys/param.h sys_param_h HAVE_SYS_PARAM_H" -as_fn_append ac_func_c_list " getpagesize HAVE_GETPAGESIZE" - -# Auxiliary files required by this configure script. -ac_aux_files="config.guess config.sub" - -# Locations in which to look for auxiliary files. -ac_aux_dir_candidates="${srcdir}${PATH_SEPARATOR}${srcdir}/..${PATH_SEPARATOR}${srcdir}/../.." - -# Search for a directory containing all of the required auxiliary files, -# $ac_aux_files, from the $PATH-style list $ac_aux_dir_candidates. -# If we don't find one directory that contains all the files we need, -# we report the set of missing files from the *first* directory in -# $ac_aux_dir_candidates and give up. -ac_missing_aux_files="" -ac_first_candidate=: -printf "%s\n" "$as_me:${as_lineno-$LINENO}: looking for aux files: $ac_aux_files" >&5 -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -as_found=false -for as_dir in $ac_aux_dir_candidates -do - IFS=$as_save_IFS - case $as_dir in #((( - '') as_dir=./ ;; - */) ;; - *) as_dir=$as_dir/ ;; - esac - as_found=: - - printf "%s\n" "$as_me:${as_lineno-$LINENO}: trying $as_dir" >&5 - ac_aux_dir_found=yes - ac_install_sh= - for ac_aux in $ac_aux_files - do - # As a special case, if "install-sh" is required, that requirement - # can be satisfied by any of "install-sh", "install.sh", or "shtool", - # and $ac_install_sh is set appropriately for whichever one is found. - if test x"$ac_aux" = x"install-sh" - then - if test -f "${as_dir}install-sh"; then - printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}install-sh found" >&5 - ac_install_sh="${as_dir}install-sh -c" - elif test -f "${as_dir}install.sh"; then - printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}install.sh found" >&5 - ac_install_sh="${as_dir}install.sh -c" - elif test -f "${as_dir}shtool"; then - printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}shtool found" >&5 - ac_install_sh="${as_dir}shtool install -c" - else - ac_aux_dir_found=no - if $ac_first_candidate; then - ac_missing_aux_files="${ac_missing_aux_files} install-sh" - else - break - fi - fi - else - if test -f "${as_dir}${ac_aux}"; then - printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}${ac_aux} found" >&5 - else - ac_aux_dir_found=no - if $ac_first_candidate; then - ac_missing_aux_files="${ac_missing_aux_files} ${ac_aux}" - else - break - fi - fi - fi - done - if test "$ac_aux_dir_found" = yes; then - ac_aux_dir="$as_dir" - break - fi - ac_first_candidate=false - - as_found=false -done -IFS=$as_save_IFS -if $as_found -then : - -else case e in #( - e) as_fn_error $? "cannot find required auxiliary files:$ac_missing_aux_files" "$LINENO" 5 ;; -esac -fi - - -# These three variables are undocumented and unsupported, -# and are intended to be withdrawn in a future Autoconf release. -# They can cause serious problems if a builder's source tree is in a directory -# whose full name contains unusual characters. -if test -f "${ac_aux_dir}config.guess"; then - ac_config_guess="$SHELL ${ac_aux_dir}config.guess" -fi -if test -f "${ac_aux_dir}config.sub"; then - ac_config_sub="$SHELL ${ac_aux_dir}config.sub" -fi -if test -f "$ac_aux_dir/configure"; then - ac_configure="$SHELL ${ac_aux_dir}configure" -fi - -# Check that the precious variables saved in the cache have kept the same -# value. -ac_cache_corrupted=false -for ac_var in $ac_precious_vars; do - eval ac_old_set=\$ac_cv_env_${ac_var}_set - eval ac_new_set=\$ac_env_${ac_var}_set - eval ac_old_val=\$ac_cv_env_${ac_var}_value - eval ac_new_val=\$ac_env_${ac_var}_value - case $ac_old_set,$ac_new_set in - set,) - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: '$ac_var' was set to '$ac_old_val' in the previous run" >&5 -printf "%s\n" "$as_me: error: '$ac_var' was set to '$ac_old_val' in the previous run" >&2;} - ac_cache_corrupted=: ;; - ,set) - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: '$ac_var' was not set in the previous run" >&5 -printf "%s\n" "$as_me: error: '$ac_var' was not set in the previous run" >&2;} - ac_cache_corrupted=: ;; - ,);; - *) - if test "x$ac_old_val" != "x$ac_new_val"; then - # differences in whitespace do not lead to failure. - ac_old_val_w=`echo x $ac_old_val` - ac_new_val_w=`echo x $ac_new_val` - if test "$ac_old_val_w" != "$ac_new_val_w"; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: '$ac_var' has changed since the previous run:" >&5 -printf "%s\n" "$as_me: error: '$ac_var' has changed since the previous run:" >&2;} - ac_cache_corrupted=: - else - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in '$ac_var' since the previous run:" >&5 -printf "%s\n" "$as_me: warning: ignoring whitespace changes in '$ac_var' since the previous run:" >&2;} - eval $ac_var=\$ac_old_val - fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: former value: '$ac_old_val'" >&5 -printf "%s\n" "$as_me: former value: '$ac_old_val'" >&2;} - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: current value: '$ac_new_val'" >&5 -printf "%s\n" "$as_me: current value: '$ac_new_val'" >&2;} - fi;; - esac - # Pass precious variables to config.status. - if test "$ac_new_set" = set; then - case $ac_new_val in - *\'*) ac_arg=$ac_var=`printf "%s\n" "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; - *) ac_arg=$ac_var=$ac_new_val ;; - esac - case " $ac_configure_args " in - *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. - *) as_fn_append ac_configure_args " '$ac_arg'" ;; - esac - fi -done -if $ac_cache_corrupted; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 -printf "%s\n" "$as_me: error: changes in the environment can compromise the build" >&2;} - as_fn_error $? "run '${MAKE-make} distclean' and/or 'rm $cache_file' - and start over" "$LINENO" 5 -fi -## -------------------- ## -## Main body of script. ## -## -------------------- ## - -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - - - -ac_config_headers="$ac_config_headers config.h" - - - - -# SYNOPSIS -# -# HTS_PROG_CC_WERROR(FLAGS_VAR) -# -# Set FLAGS_VAR to the flags needed to make the C compiler treat warnings -# as errors. - - -# hts_check_compile_flags_needed.m4 -# -# SYNOPSIS -# -# HTS_CHECK_COMPILE_FLAGS_NEEDED(FEATURE, FLAGS, [INPUT], [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS]) -# -# DESCRIPTION -# -# Check whether the given FLAGS are required to build and link INPUT with -# the current language's compiler. Compilation and linking are first -# tries without FLAGS. If that fails it then tries to compile and -# link again with FLAGS. -# -# FEATURE describes the feature being tested, and is used when printing -# messages and to name the cache entry (along with the tested flags). -# -# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on -# success/failure. In ACTION-SUCCESS, $flags_needed will be set to -# either an empty string or FLAGS depending on the test results. -# -# If EXTRA-FLAGS is defined, it is added to the current language's default -# flags (e.g. CFLAGS) when the check is done. The check is thus made with -# the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to -# force the compiler to issue an error when a bad flag is given. -# -# If omitted, INPUT defaults to AC_LANG_PROGRAM(), although that probably -# isn't very useful. -# -# NOTE: Implementation based on AX_CHECK_COMPILE_FLAG. -# -# LICENSE -# -# Copyright (c) 2008 Guido U. Draheim -# Copyright (c) 2011 Maarten Bosmans -# Copyright (c) 2023 Robert Davies -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -# HTS_CHECK_COMPILE_FLAGS_NEEDED(FEATURE, FLAGS, [INPUT], [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS]) - - - -# SYNOPSIS -# -# HTS_TEST_CC_C_LD_FLAG(FLAG, FOUND_VAR) -# -# Test if FLAG can be used on both CFLAGS and LDFLAGS. It it works, -# variable FOUND_VAR is set to FLAG. - - - - - -# pkg.m4 - Macros to locate and use pkg-config. -*- Autoconf -*- -# serial 12 (pkg-config-0.29.2) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu -if test -n "$ac_tool_prefix"; then - # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. -set dummy ${ac_tool_prefix}gcc; ac_word=$2 -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -printf %s "checking for $ac_word... " >&6; } -if test ${ac_cv_prog_CC+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - case $as_dir in #((( - '') as_dir=./ ;; - */) ;; - *) as_dir=$as_dir/ ;; - esac - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then - ac_cv_prog_CC="${ac_tool_prefix}gcc" - printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi ;; -esac -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -printf "%s\n" "$CC" >&6; } -else - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } -fi - - -fi -if test -z "$ac_cv_prog_CC"; then - ac_ct_CC=$CC - # Extract the first word of "gcc", so it can be a program name with args. -set dummy gcc; ac_word=$2 -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -printf %s "checking for $ac_word... " >&6; } -if test ${ac_cv_prog_ac_ct_CC+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$ac_ct_CC"; then - ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - case $as_dir in #((( - '') as_dir=./ ;; - */) ;; - *) as_dir=$as_dir/ ;; - esac - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then - ac_cv_prog_ac_ct_CC="gcc" - printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi ;; -esac -fi -ac_ct_CC=$ac_cv_prog_ac_ct_CC -if test -n "$ac_ct_CC"; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 -printf "%s\n" "$ac_ct_CC" >&6; } -else - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } -fi - - if test "x$ac_ct_CC" = x; then - CC="" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} -ac_tool_warned=yes ;; -esac - CC=$ac_ct_CC - fi -else - CC="$ac_cv_prog_CC" -fi - -if test -z "$CC"; then - if test -n "$ac_tool_prefix"; then - # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. -set dummy ${ac_tool_prefix}cc; ac_word=$2 -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -printf %s "checking for $ac_word... " >&6; } -if test ${ac_cv_prog_CC+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - case $as_dir in #((( - '') as_dir=./ ;; - */) ;; - *) as_dir=$as_dir/ ;; - esac - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then - ac_cv_prog_CC="${ac_tool_prefix}cc" - printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi ;; -esac -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -printf "%s\n" "$CC" >&6; } -else - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } -fi - - - fi -fi -if test -z "$CC"; then - # Extract the first word of "cc", so it can be a program name with args. -set dummy cc; ac_word=$2 -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -printf %s "checking for $ac_word... " >&6; } -if test ${ac_cv_prog_CC+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else - ac_prog_rejected=no -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - case $as_dir in #((( - '') as_dir=./ ;; - */) ;; - *) as_dir=$as_dir/ ;; - esac - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then - if test "$as_dir$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then - ac_prog_rejected=yes - continue - fi - ac_cv_prog_CC="cc" - printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -if test $ac_prog_rejected = yes; then - # We found a bogon in the path, so make sure we never use it. - set dummy $ac_cv_prog_CC - shift - if test $# != 0; then - # We chose a different compiler from the bogus one. - # However, it has the same basename, so the bogon will be chosen - # first if we set CC to just the basename; use the full file name. - shift - ac_cv_prog_CC="$as_dir$ac_word${1+' '}$@" - fi -fi -fi ;; -esac -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -printf "%s\n" "$CC" >&6; } -else - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } -fi - - -fi -if test -z "$CC"; then - if test -n "$ac_tool_prefix"; then - for ac_prog in cl.exe - do - # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. -set dummy $ac_tool_prefix$ac_prog; ac_word=$2 -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -printf %s "checking for $ac_word... " >&6; } -if test ${ac_cv_prog_CC+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - case $as_dir in #((( - '') as_dir=./ ;; - */) ;; - *) as_dir=$as_dir/ ;; - esac - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then - ac_cv_prog_CC="$ac_tool_prefix$ac_prog" - printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi ;; -esac -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -printf "%s\n" "$CC" >&6; } -else - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } -fi - - - test -n "$CC" && break - done -fi -if test -z "$CC"; then - ac_ct_CC=$CC - for ac_prog in cl.exe -do - # Extract the first word of "$ac_prog", so it can be a program name with args. -set dummy $ac_prog; ac_word=$2 -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -printf %s "checking for $ac_word... " >&6; } -if test ${ac_cv_prog_ac_ct_CC+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$ac_ct_CC"; then - ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - case $as_dir in #((( - '') as_dir=./ ;; - */) ;; - *) as_dir=$as_dir/ ;; - esac - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then - ac_cv_prog_ac_ct_CC="$ac_prog" - printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi ;; -esac -fi -ac_ct_CC=$ac_cv_prog_ac_ct_CC -if test -n "$ac_ct_CC"; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 -printf "%s\n" "$ac_ct_CC" >&6; } -else - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } -fi - - - test -n "$ac_ct_CC" && break -done - - if test "x$ac_ct_CC" = x; then - CC="" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} -ac_tool_warned=yes ;; -esac - CC=$ac_ct_CC - fi -fi - -fi -if test -z "$CC"; then - if test -n "$ac_tool_prefix"; then - # Extract the first word of "${ac_tool_prefix}clang", so it can be a program name with args. -set dummy ${ac_tool_prefix}clang; ac_word=$2 -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -printf %s "checking for $ac_word... " >&6; } -if test ${ac_cv_prog_CC+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - case $as_dir in #((( - '') as_dir=./ ;; - */) ;; - *) as_dir=$as_dir/ ;; - esac - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then - ac_cv_prog_CC="${ac_tool_prefix}clang" - printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi ;; -esac -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -printf "%s\n" "$CC" >&6; } -else - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } -fi - - -fi -if test -z "$ac_cv_prog_CC"; then - ac_ct_CC=$CC - # Extract the first word of "clang", so it can be a program name with args. -set dummy clang; ac_word=$2 -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -printf %s "checking for $ac_word... " >&6; } -if test ${ac_cv_prog_ac_ct_CC+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$ac_ct_CC"; then - ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - case $as_dir in #((( - '') as_dir=./ ;; - */) ;; - *) as_dir=$as_dir/ ;; - esac - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then - ac_cv_prog_ac_ct_CC="clang" - printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi ;; -esac -fi -ac_ct_CC=$ac_cv_prog_ac_ct_CC -if test -n "$ac_ct_CC"; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 -printf "%s\n" "$ac_ct_CC" >&6; } -else - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } -fi - - if test "x$ac_ct_CC" = x; then - CC="" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} -ac_tool_warned=yes ;; -esac - CC=$ac_ct_CC - fi -else - CC="$ac_cv_prog_CC" -fi - -fi - - -test -z "$CC" && { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} -as_fn_error $? "no acceptable C compiler found in \$PATH -See 'config.log' for more details" "$LINENO" 5; } - -# Provide some information about the compiler. -printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 -set X $ac_compile -ac_compiler=$2 -for ac_option in --version -v -V -qversion -version; do - { { ac_try="$ac_compiler $ac_option >&5" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -printf "%s\n" "$ac_try_echo"; } >&5 - (eval "$ac_compiler $ac_option >&5") 2>conftest.err - ac_status=$? - if test -s conftest.err; then - sed '10a\ -... rest of stderr output deleted ... - 10q' conftest.err >conftest.er1 - cat conftest.er1 >&5 - fi - rm -f conftest.er1 conftest.err - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } -done - -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main (void) -{ - - ; - return 0; -} -_ACEOF -ac_clean_files_save=$ac_clean_files -ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" -# Try to create an executable without -o first, disregard a.out. -# It will help us diagnose broken compilers, and finding out an intuition -# of exeext. -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 -printf %s "checking whether the C compiler works... " >&6; } -ac_link_default=`printf "%s\n" "$ac_link" | sed 's/ -o *conftest[^ ]*//'` - -# The possible output files: -ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" - -ac_rmfiles= -for ac_file in $ac_files -do - case $ac_file in - *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; - * ) ac_rmfiles="$ac_rmfiles $ac_file";; - esac -done -rm -f $ac_rmfiles - -if { { ac_try="$ac_link_default" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -printf "%s\n" "$ac_try_echo"; } >&5 - (eval "$ac_link_default") 2>&5 - ac_status=$? - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } -then : - # Autoconf-2.13 could set the ac_cv_exeext variable to 'no'. -# So ignore a value of 'no', otherwise this would lead to 'EXEEXT = no' -# in a Makefile. We should not override ac_cv_exeext if it was cached, -# so that the user can short-circuit this test for compilers unknown to -# Autoconf. -for ac_file in $ac_files '' -do - test -f "$ac_file" || continue - case $ac_file in - *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) - ;; - [ab].out ) - # We found the default executable, but exeext='' is most - # certainly right. - break;; - *.* ) - if test ${ac_cv_exeext+y} && test "$ac_cv_exeext" != no; - then :; else - ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` - fi - # We set ac_cv_exeext here because the later test for it is not - # safe: cross compilers may not add the suffix if given an '-o' - # argument, so we may need to know it at that point already. - # Even if this section looks crufty: it has the advantage of - # actually working. - break;; - * ) - break;; - esac -done -test "$ac_cv_exeext" = no && ac_cv_exeext= - -else case e in #( - e) ac_file='' ;; -esac -fi -if test -z "$ac_file" -then : - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } -printf "%s\n" "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - -{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} -as_fn_error 77 "C compiler cannot create executables -See 'config.log' for more details" "$LINENO" 5; } -else case e in #( - e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -printf "%s\n" "yes" >&6; } ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 -printf %s "checking for C compiler default output file name... " >&6; } -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 -printf "%s\n" "$ac_file" >&6; } -ac_exeext=$ac_cv_exeext - -rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out -ac_clean_files=$ac_clean_files_save -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 -printf %s "checking for suffix of executables... " >&6; } -if { { ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -printf "%s\n" "$ac_try_echo"; } >&5 - (eval "$ac_link") 2>&5 - ac_status=$? - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } -then : - # If both 'conftest.exe' and 'conftest' are 'present' (well, observable) -# catch 'conftest.exe'. For instance with Cygwin, 'ls conftest' will -# work properly (i.e., refer to 'conftest.exe'), while it won't with -# 'rm'. -for ac_file in conftest.exe conftest conftest.*; do - test -f "$ac_file" || continue - case $ac_file in - *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; - *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` - break;; - * ) break;; - esac -done -else case e in #( - e) { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} -as_fn_error $? "cannot compute suffix of executables: cannot compile and link -See 'config.log' for more details" "$LINENO" 5; } ;; -esac -fi -rm -f conftest conftest$ac_cv_exeext -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 -printf "%s\n" "$ac_cv_exeext" >&6; } - -rm -f conftest.$ac_ext -EXEEXT=$ac_cv_exeext -ac_exeext=$EXEEXT -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -int -main (void) -{ -FILE *f = fopen ("conftest.out", "w"); - if (!f) - return 1; - return ferror (f) || fclose (f) != 0; - - ; - return 0; -} -_ACEOF -ac_clean_files="$ac_clean_files conftest.out" -# Check that the compiler produces executables we can run. If not, either -# the compiler is broken, or we cross compile. -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 -printf %s "checking whether we are cross compiling... " >&6; } -if test "$cross_compiling" != yes; then - { { ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -printf "%s\n" "$ac_try_echo"; } >&5 - (eval "$ac_link") 2>&5 - ac_status=$? - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } - if { ac_try='./conftest$ac_cv_exeext' - { { case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -printf "%s\n" "$ac_try_echo"; } >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; }; then - cross_compiling=no - else - if test "$cross_compiling" = maybe; then - cross_compiling=yes - else - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} -as_fn_error 77 "cannot run C compiled programs. -If you meant to cross compile, use '--host'. -See 'config.log' for more details" "$LINENO" 5; } - fi - fi -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 -printf "%s\n" "$cross_compiling" >&6; } - -rm -f conftest.$ac_ext conftest$ac_cv_exeext \ - conftest.o conftest.obj conftest.out -ac_clean_files=$ac_clean_files_save -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 -printf %s "checking for suffix of object files... " >&6; } -if test ${ac_cv_objext+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main (void) -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.o conftest.obj -if { { ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -printf "%s\n" "$ac_try_echo"; } >&5 - (eval "$ac_compile") 2>&5 - ac_status=$? - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } -then : - for ac_file in conftest.o conftest.obj conftest.*; do - test -f "$ac_file" || continue; - case $ac_file in - *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; - *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` - break;; - esac -done -else case e in #( - e) printf "%s\n" "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - -{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} -as_fn_error $? "cannot compute suffix of object files: cannot compile -See 'config.log' for more details" "$LINENO" 5; } ;; -esac -fi -rm -f conftest.$ac_cv_objext conftest.$ac_ext ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 -printf "%s\n" "$ac_cv_objext" >&6; } -OBJEXT=$ac_cv_objext -ac_objext=$OBJEXT -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler supports GNU C" >&5 -printf %s "checking whether the compiler supports GNU C... " >&6; } -if test ${ac_cv_c_compiler_gnu+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main (void) -{ -#ifndef __GNUC__ - choke me -#endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - ac_compiler_gnu=yes -else case e in #( - e) ac_compiler_gnu=no ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext -ac_cv_c_compiler_gnu=$ac_compiler_gnu - ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 -printf "%s\n" "$ac_cv_c_compiler_gnu" >&6; } -ac_compiler_gnu=$ac_cv_c_compiler_gnu - -if test $ac_compiler_gnu = yes; then - GCC=yes -else - GCC= -fi -ac_test_CFLAGS=${CFLAGS+y} -ac_save_CFLAGS=$CFLAGS -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 -printf %s "checking whether $CC accepts -g... " >&6; } -if test ${ac_cv_prog_cc_g+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) ac_save_c_werror_flag=$ac_c_werror_flag - ac_c_werror_flag=yes - ac_cv_prog_cc_g=no - CFLAGS="-g" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main (void) -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - ac_cv_prog_cc_g=yes -else case e in #( - e) CFLAGS="" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main (void) -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - -else case e in #( - e) ac_c_werror_flag=$ac_save_c_werror_flag - CFLAGS="-g" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main (void) -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - ac_cv_prog_cc_g=yes -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - ac_c_werror_flag=$ac_save_c_werror_flag ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 -printf "%s\n" "$ac_cv_prog_cc_g" >&6; } -if test $ac_test_CFLAGS; then - CFLAGS=$ac_save_CFLAGS -elif test $ac_cv_prog_cc_g = yes; then - if test "$GCC" = yes; then - CFLAGS="-g -O2" - else - CFLAGS="-g" - fi -else - if test "$GCC" = yes; then - CFLAGS="-O2" - else - CFLAGS= - fi -fi -ac_prog_cc_stdc=no -if test x$ac_prog_cc_stdc = xno -then : - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C11 features" >&5 -printf %s "checking for $CC option to enable C11 features... " >&6; } -if test ${ac_cv_prog_cc_c11+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) ac_cv_prog_cc_c11=no -ac_save_CC=$CC -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$ac_c_conftest_c11_program -_ACEOF -for ac_arg in '' -std=gnu11 -do - CC="$ac_save_CC $ac_arg" - if ac_fn_c_try_compile "$LINENO" -then : - ac_cv_prog_cc_c11=$ac_arg -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam - test "x$ac_cv_prog_cc_c11" != "xno" && break -done -rm -f conftest.$ac_ext -CC=$ac_save_CC ;; -esac -fi - -if test "x$ac_cv_prog_cc_c11" = xno -then : - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 -printf "%s\n" "unsupported" >&6; } -else case e in #( - e) if test "x$ac_cv_prog_cc_c11" = x -then : - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 -printf "%s\n" "none needed" >&6; } -else case e in #( - e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c11" >&5 -printf "%s\n" "$ac_cv_prog_cc_c11" >&6; } - CC="$CC $ac_cv_prog_cc_c11" ;; -esac -fi - ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c11 - ac_prog_cc_stdc=c11 ;; -esac -fi -fi -if test x$ac_prog_cc_stdc = xno -then : - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C99 features" >&5 -printf %s "checking for $CC option to enable C99 features... " >&6; } -if test ${ac_cv_prog_cc_c99+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) ac_cv_prog_cc_c99=no -ac_save_CC=$CC -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$ac_c_conftest_c99_program -_ACEOF -for ac_arg in '' -std=gnu99 -std=c99 -c99 -qlanglvl=extc1x -qlanglvl=extc99 -AC99 -D_STDC_C99= -do - CC="$ac_save_CC $ac_arg" - if ac_fn_c_try_compile "$LINENO" -then : - ac_cv_prog_cc_c99=$ac_arg -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam - test "x$ac_cv_prog_cc_c99" != "xno" && break -done -rm -f conftest.$ac_ext -CC=$ac_save_CC ;; -esac -fi - -if test "x$ac_cv_prog_cc_c99" = xno -then : - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 -printf "%s\n" "unsupported" >&6; } -else case e in #( - e) if test "x$ac_cv_prog_cc_c99" = x -then : - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 -printf "%s\n" "none needed" >&6; } -else case e in #( - e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c99" >&5 -printf "%s\n" "$ac_cv_prog_cc_c99" >&6; } - CC="$CC $ac_cv_prog_cc_c99" ;; -esac -fi - ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c99 - ac_prog_cc_stdc=c99 ;; -esac -fi -fi -if test x$ac_prog_cc_stdc = xno -then : - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C89 features" >&5 -printf %s "checking for $CC option to enable C89 features... " >&6; } -if test ${ac_cv_prog_cc_c89+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) ac_cv_prog_cc_c89=no -ac_save_CC=$CC -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$ac_c_conftest_c89_program -_ACEOF -for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" -do - CC="$ac_save_CC $ac_arg" - if ac_fn_c_try_compile "$LINENO" -then : - ac_cv_prog_cc_c89=$ac_arg -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam - test "x$ac_cv_prog_cc_c89" != "xno" && break -done -rm -f conftest.$ac_ext -CC=$ac_save_CC ;; -esac -fi - -if test "x$ac_cv_prog_cc_c89" = xno -then : - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 -printf "%s\n" "unsupported" >&6; } -else case e in #( - e) if test "x$ac_cv_prog_cc_c89" = x -then : - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 -printf "%s\n" "none needed" >&6; } -else case e in #( - e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 -printf "%s\n" "$ac_cv_prog_cc_c89" >&6; } - CC="$CC $ac_cv_prog_cc_c89" ;; -esac -fi - ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c89 - ac_prog_cc_stdc=c89 ;; -esac -fi -fi - -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - -if test -n "$ac_tool_prefix"; then - # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. -set dummy ${ac_tool_prefix}ranlib; ac_word=$2 -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -printf %s "checking for $ac_word... " >&6; } -if test ${ac_cv_prog_RANLIB+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$RANLIB"; then - ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - case $as_dir in #((( - '') as_dir=./ ;; - */) ;; - *) as_dir=$as_dir/ ;; - esac - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then - ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" - printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi ;; -esac -fi -RANLIB=$ac_cv_prog_RANLIB -if test -n "$RANLIB"; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5 -printf "%s\n" "$RANLIB" >&6; } -else - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } -fi - - -fi -if test -z "$ac_cv_prog_RANLIB"; then - ac_ct_RANLIB=$RANLIB - # Extract the first word of "ranlib", so it can be a program name with args. -set dummy ranlib; ac_word=$2 -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -printf %s "checking for $ac_word... " >&6; } -if test ${ac_cv_prog_ac_ct_RANLIB+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$ac_ct_RANLIB"; then - ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - case $as_dir in #((( - '') as_dir=./ ;; - */) ;; - *) as_dir=$as_dir/ ;; - esac - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then - ac_cv_prog_ac_ct_RANLIB="ranlib" - printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi ;; -esac -fi -ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB -if test -n "$ac_ct_RANLIB"; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5 -printf "%s\n" "$ac_ct_RANLIB" >&6; } -else - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } -fi - - if test "x$ac_ct_RANLIB" = x; then - RANLIB=":" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} -ac_tool_warned=yes ;; -esac - RANLIB=$ac_ct_RANLIB - fi -else - RANLIB="$ac_cv_prog_RANLIB" -fi - - -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 -printf %s "checking for grep that handles long lines and -e... " >&6; } -if test ${ac_cv_path_GREP+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) if test -z "$GREP"; then - ac_path_GREP_found=false - # Loop through the user's path and test for each of PROGNAME-LIST - as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin -do - IFS=$as_save_IFS - case $as_dir in #((( - '') as_dir=./ ;; - */) ;; - *) as_dir=$as_dir/ ;; - esac - for ac_prog in grep ggrep - do - for ac_exec_ext in '' $ac_executable_extensions; do - ac_path_GREP="$as_dir$ac_prog$ac_exec_ext" - as_fn_executable_p "$ac_path_GREP" || continue -# Check for GNU ac_path_GREP and select it if it is found. - # Check for GNU $ac_path_GREP -case `"$ac_path_GREP" --version 2>&1` in #( -*GNU*) - ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; -#( -*) - ac_count=0 - printf %s 0123456789 >"conftest.in" - while : - do - cat "conftest.in" "conftest.in" >"conftest.tmp" - mv "conftest.tmp" "conftest.in" - cp "conftest.in" "conftest.nl" - printf "%s\n" 'GREP' >> "conftest.nl" - "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break - diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break - as_fn_arith $ac_count + 1 && ac_count=$as_val - if test $ac_count -gt ${ac_path_GREP_max-0}; then - # Best one so far, save it but keep looking for a better one - ac_cv_path_GREP="$ac_path_GREP" - ac_path_GREP_max=$ac_count - fi - # 10*(2^10) chars as input seems more than enough - test $ac_count -gt 10 && break - done - rm -f conftest.in conftest.tmp conftest.nl conftest.out;; -esac - - $ac_path_GREP_found && break 3 - done - done - done -IFS=$as_save_IFS - if test -z "$ac_cv_path_GREP"; then - as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 - fi -else - ac_cv_path_GREP=$GREP -fi - ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 -printf "%s\n" "$ac_cv_path_GREP" >&6; } - GREP="$ac_cv_path_GREP" - - - - # Check whether --enable-warnings was given. -if test ${enable_warnings+y} -then : - enableval=$enable_warnings; -else case e in #( - e) enable_warnings=yes ;; -esac -fi - - - if test "x$enable_warnings" != xno -then : - - - - ansi="" - if test "x$ansi" = "x" -then : - msg="for C compiler warning flags" -else case e in #( - e) msg="for C compiler warning and ANSI conformance flags" ;; -esac -fi - - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking $msg" >&5 -printf %s "checking $msg... " >&6; } - if test ${hts_cv_prog_cc_warnings+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) hts_cv_prog_cc_warnings="" - if test "x$CC" != "x" -then : - - cat > conftest.c < /dev/null 2>&1 && - test -f conftest.o -then : - if test "x$ansi" = "x" -then : - hts_cv_prog_cc_warnings="-Wall" -else case e in #( - e) hts_cv_prog_cc_warnings="-Wall -ansi -pedantic" ;; -esac -fi - -elif # Sun Studio or Solaris C compiler - "$CC" -V 2>&1 | $GREP -i -E "WorkShop|Sun C" > /dev/null 2>&1 && - "$CC" -c -v -Xc conftest.c > /dev/null 2>&1 && - test -f conftest.o -then : - if test "x$ansi" = "x" -then : - hts_cv_prog_cc_warnings="-v" -else case e in #( - e) hts_cv_prog_cc_warnings="-v -Xc" ;; -esac -fi - -elif # Digital Unix C compiler - "$CC" -V 2>&1 | $GREP -i "Digital UNIX Compiler" > /dev/null 2>&1 && - "$CC" -c -verbose -w0 -warnprotos -std1 conftest.c > /dev/null 2>&1 && - test -f conftest.o -then : - if test "x$ansi" = "x" -then : - hts_cv_prog_cc_warnings="-verbose -w0 -warnprotos" -else case e in #( - e) hts_cv_prog_cc_warnings="-verbose -w0 -warnprotos -std1" ;; -esac -fi - -elif # C for AIX Compiler - "$CC" 2>&1 | $GREP -i "C for AIX Compiler" > /dev/null 2>&1 && - "$CC" -c -qlanglvl=ansi -qinfo=all conftest.c > /dev/null 2>&1 && - test -f conftest.o -then : - if test "x$ansi" = "x" -then : - hts_cv_prog_cc_warnings="-qsrcmsg -qinfo=all:noppt:noppc:noobs:nocnd" -else case e in #( - e) hts_cv_prog_cc_warnings="-qsrcmsg -qinfo=all:noppt:noppc:noobs:nocnd -qlanglvl=ansi" ;; -esac -fi - -elif # IRIX C compiler - "$CC" -version 2>&1 | $GREP -i "MIPSpro Compilers" > /dev/null 2>&1 && - "$CC" -c -fullwarn -ansi -ansiE conftest.c > /dev/null 2>&1 && - test -f conftest.o -then : - if test "x$ansi" = "x" -then : - hts_cv_prog_cc_warnings="-fullwarn" -else case e in #( - e) hts_cv_prog_cc_warnings="-fullwarn -ansi -ansiE" ;; -esac -fi - -elif # HP-UX C compiler - what "$CC" 2>&1 | $GREP -i "HP C Compiler" > /dev/null 2>&1 && - "$CC" -c -Aa +w1 conftest.c > /dev/null 2>&1 && - test -f conftest.o -then : - if test "x$ansi" = "x" -then : - hts_cv_prog_cc_warnings="+w1" -else case e in #( - e) hts_cv_prog_cc_warnings="+w1 -Aa" ;; -esac -fi - -elif # The NEC SX series (Super-UX 10) C compiler - "$CC" -V 2>&1 | $GREP "/SX" > /dev/null 2>&1 && - "$CC" -c -pvctl,fullmsg -Xc conftest.c > /dev/null 2>&1 && - test -f conftest.o -then : - - if test "x$ansi" = "x" -then : - hts_cv_prog_cc_warnings="-pvctl,fullmsg" -else case e in #( - e) hts_cv_prog_cc_warnings="-pvctl,fullmsg -Xc" ;; -esac -fi - -elif # The Cray C compiler (Unicos) - "$CC" -V 2>&1 | $GREP -i "Cray" > /dev/null 2>&1 && - "$CC" -c -h msglevel_2 conftest.c > /dev/null 2>&1 && - test -f conftest.o -then : - if test "x$ansi" = "x" -then : - hts_cv_prog_cc_warnings="-h#msglevel_2" -else case e in #( - e) hts_cv_prog_cc_warnings="-h#msglevel_2,conform" ;; -esac -fi - -elif # The Tiny C Compiler - "$CC" -v 2>&1 | $GREP "tcc version" > /dev/null && - "$CC" -Wall -c conftest.c > /dev/null 2>&1 && - test -f conftest.o -then : - hts_cv_prog_cc_warnings="-Wall" - -fi - rm -f conftest.* - -fi - ;; -esac -fi - - - if test "x$hts_cv_prog_cc_warnings" != "x" -then : - -ac_arg_result=`echo "$hts_cv_prog_cc_warnings" | tr '#' ' '` -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_arg_result" >&5 -printf "%s\n" "$ac_arg_result" >&6; } - -ac_arg_needed="" -for ac_arg in $hts_cv_prog_cc_warnings -do - ac_arg_sp=`echo "$ac_arg" | tr '#' ' '` - case " $CFLAGS " in #( - *" $ac_arg_sp "*) : - ;; #( - *) : - ac_arg_needed="$ac_arg_all $ac_arg_sp" ;; -esac -done -CFLAGS="$ac_arg_needed $CFLAGS" -else case e in #( - e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unknown" >&5 -printf "%s\n" "unknown" >&6; } - ;; -esac -fi - -fi - - - # Check whether --enable-werror was given. -if test ${enable_werror+y} -then : - enableval=$enable_werror; -else case e in #( - e) enable_werror=no ;; -esac -fi - - - if test "x$enable_werror" != xno -then : - - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler flags to error on warnings" >&5 -printf %s "checking for C compiler flags to error on warnings... " >&6; } - if test ${hts_cv_prog_cc_werror+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) hts_cv_prog_cc_werror="" - if test "x$CC" != "x" -then : - - cat > conftest.c < /dev/null 2>&1 && - test -f conftest.o -then : - hts_cv_prog_cc_werror="-Werror" -elif # Sun Studio or Solaris C compiler - "$CC" -V 2>&1 | $GREP -i -E "WorkShop|Sun C" > /dev/null 2>&1 && - "$CC" -c -errwarn=%all conftest.c > /dev/null 2>&1 && - test -f conftest.o -then : - hts_cv_prog_cc_werror="-errwarn=%all" -elif # The Tiny C Compiler - "$CC" -v 2>&1 | $GREP "tcc version" > /dev/null && - "$CC" -Wall -c conftest.c > /dev/null 2>&1 && - test -f conftest.o -then : - hts_cv_prog_cc_werror="-Werror" - -fi - rm -f conftest.* - -fi - ;; -esac -fi - - if test "x$hts_cv_prog_cc_werror" != x -then : - - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hts_cv_prog_cc_werror" >&5 -printf "%s\n" "$hts_cv_prog_cc_werror" >&6; } - if test "xhts_late_cflags" != x -then : - eval hts_late_cflags="$hts_cv_prog_cc_werror" -fi - -else case e in #( - e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unknown" >&5 -printf "%s\n" "unknown" >&6; } - ;; -esac -fi - -fi - - -# HTSlib uses X/Open-only facilities (M_SQRT2 etc, drand48() etc), and -# various POSIX functions that are provided by various _POSIX_C_SOURCE values -# or by _XOPEN_SOURCE >= 500. It also uses usleep(), which is removed when -# _XOPEN_SOURCE >= 700. Additionally, some definitions may require -# _XOPEN_SOURCE >= 600 on some platforms (snprintf on MinGW, -# PTHREAD_MUTEX_RECURSIVE on some Linux distributions). Hence we set it to 600. - -# Define _XOPEN_SOURCE unless the user has already done so via $CPPFLAGS etc. - -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC options needed to detect all undeclared functions" >&5 -printf %s "checking for $CC options needed to detect all undeclared functions... " >&6; } -if test ${ac_cv_c_undeclared_builtin_options+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) ac_save_CFLAGS=$CFLAGS - ac_cv_c_undeclared_builtin_options='cannot detect' - for ac_arg in '' -fno-builtin; do - CFLAGS="$ac_save_CFLAGS $ac_arg" - # This test program should *not* compile successfully. - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main (void) -{ -(void) strchr; - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - -else case e in #( - e) # This test program should compile successfully. - # No library function is consistently available on - # freestanding implementations, so test against a dummy - # declaration. Include always-available headers on the - # off chance that they somehow elicit warnings. - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -#include -#include -#include -extern void ac_decl (int, char *); - -int -main (void) -{ -(void) ac_decl (0, (char *) 0); - (void) ac_decl; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - if test x"$ac_arg" = x -then : - ac_cv_c_undeclared_builtin_options='none needed' -else case e in #( - e) ac_cv_c_undeclared_builtin_options=$ac_arg ;; -esac -fi - break -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - done - CFLAGS=$ac_save_CFLAGS - ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_undeclared_builtin_options" >&5 -printf "%s\n" "$ac_cv_c_undeclared_builtin_options" >&6; } - case $ac_cv_c_undeclared_builtin_options in #( - 'cannot detect') : - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} -as_fn_error $? "cannot make $CC report undeclared builtins -See 'config.log' for more details" "$LINENO" 5; } ;; #( - 'none needed') : - ac_c_undeclared_builtin_options='' ;; #( - *) : - ac_c_undeclared_builtin_options=$ac_cv_c_undeclared_builtin_options ;; -esac - -ac_header= ac_cache= -for ac_item in $ac_header_c_list -do - if test $ac_cache; then - ac_fn_c_check_header_compile "$LINENO" $ac_header ac_cv_header_$ac_cache "$ac_includes_default" - if eval test \"x\$ac_cv_header_$ac_cache\" = xyes; then - printf "%s\n" "#define $ac_item 1" >> confdefs.h - fi - ac_header= ac_cache= - elif test $ac_header; then - ac_cache=$ac_item - else - ac_header=$ac_item - fi -done - - - - - - - - -if test $ac_cv_header_stdlib_h = yes && test $ac_cv_header_string_h = yes -then : - -printf "%s\n" "#define STDC_HEADERS 1" >>confdefs.h - -fi -ac_fn_check_decl "$LINENO" "_XOPEN_SOURCE" "ac_cv_have_decl__XOPEN_SOURCE" "$ac_includes_default" "$ac_c_undeclared_builtin_options" "CFLAGS" -if test "x$ac_cv_have_decl__XOPEN_SOURCE" = xyes -then : - -else case e in #( - e) -printf "%s\n" "#define _XOPEN_SOURCE 600" >>confdefs.h - ;; -esac -fi - -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking C compiler flags needed for sse4.1" >&5 -printf %s "checking C compiler flags needed for sse4.1... " >&6; } -if test ${hts_cv_check_cflags_needed_sse4_1___msse4_1__mssse3__mpopcnt+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - #ifdef __x86_64__ - #include "x86intrin.h" - #endif - -int -main (void) -{ - - #ifdef __x86_64__ - __m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1); - __m128i c = _mm_shuffle_epi8(_mm_max_epu32(a, b), b); - return _mm_popcnt_u32(*((char *) &c)); - #endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" -then : - hts_cv_check_cflags_needed_sse4_1___msse4_1__mssse3__mpopcnt=none -else case e in #( - e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -msse4.1 -mssse3 -mpopcnt" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - #ifdef __x86_64__ - #include "x86intrin.h" - #endif - -int -main (void) -{ - - #ifdef __x86_64__ - __m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1); - __m128i c = _mm_shuffle_epi8(_mm_max_epu32(a, b), b); - return _mm_popcnt_u32(*((char *) &c)); - #endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" -then : - hts_cv_check_cflags_needed_sse4_1___msse4_1__mssse3__mpopcnt="-msse4.1 -mssse3 -mpopcnt" -else case e in #( - e) hts_cv_check_cflags_needed_sse4_1___msse4_1__mssse3__mpopcnt=unsupported ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext - CFLAGS=$ax_check_save_flags ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hts_cv_check_cflags_needed_sse4_1___msse4_1__mssse3__mpopcnt" >&5 -printf "%s\n" "$hts_cv_check_cflags_needed_sse4_1___msse4_1__mssse3__mpopcnt" >&6; } -if test "x$hts_cv_check_cflags_needed_sse4_1___msse4_1__mssse3__mpopcnt" = xunsupported -then : - - : - -else case e in #( - e) - if test "x$hts_cv_check_cflags_needed_sse4_1___msse4_1__mssse3__mpopcnt" = xnone -then : - flags_needed="" -else case e in #( - e) flags_needed="$hts_cv_check_cflags_needed_sse4_1___msse4_1__mssse3__mpopcnt" ;; -esac -fi - - hts_cflags_sse4="$flags_needed" - -printf "%s\n" "#define HAVE_SSSE3 1" >>confdefs.h - - -printf "%s\n" "#define HAVE_POPCNT 1" >>confdefs.h - - -printf "%s\n" "#define HAVE_SSE4_1 1" >>confdefs.h - - - printf "%s\n" "#define UBSAN 1" >>confdefs.h - - - ;; -esac -fi - - - -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking C compiler flags needed for avx2" >&5 -printf %s "checking C compiler flags needed for avx2... " >&6; } -if test ${hts_cv_check_cflags_needed_avx2___mavx2__mpopcnt+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - #ifdef __x86_64__ - #include "x86intrin.h" - #endif - -int -main (void) -{ - - #ifdef __x86_64__ - __m256i a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); - __m256i b = _mm256_add_epi32(a, a); - long long c = _mm256_extract_epi64(b, 0); - return _mm_popcnt_u32((int) c); - #endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" -then : - hts_cv_check_cflags_needed_avx2___mavx2__mpopcnt=none -else case e in #( - e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx2 -mpopcnt" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - #ifdef __x86_64__ - #include "x86intrin.h" - #endif - -int -main (void) -{ - - #ifdef __x86_64__ - __m256i a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); - __m256i b = _mm256_add_epi32(a, a); - long long c = _mm256_extract_epi64(b, 0); - return _mm_popcnt_u32((int) c); - #endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" -then : - hts_cv_check_cflags_needed_avx2___mavx2__mpopcnt="-mavx2 -mpopcnt" -else case e in #( - e) hts_cv_check_cflags_needed_avx2___mavx2__mpopcnt=unsupported ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext - CFLAGS=$ax_check_save_flags ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hts_cv_check_cflags_needed_avx2___mavx2__mpopcnt" >&5 -printf "%s\n" "$hts_cv_check_cflags_needed_avx2___mavx2__mpopcnt" >&6; } -if test "x$hts_cv_check_cflags_needed_avx2___mavx2__mpopcnt" = xunsupported -then : - - : - -else case e in #( - e) - if test "x$hts_cv_check_cflags_needed_avx2___mavx2__mpopcnt" = xnone -then : - flags_needed="" -else case e in #( - e) flags_needed="$hts_cv_check_cflags_needed_avx2___mavx2__mpopcnt" ;; -esac -fi - - hts_cflags_avx2="$flags_needed" - - -printf "%s\n" "#define HAVE_POPCNT 1" >>confdefs.h - - -printf "%s\n" "#define HAVE_AVX2 1" >>confdefs.h - - - ;; -esac -fi - - -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking C compiler flags needed for avx512f" >&5 -printf %s "checking C compiler flags needed for avx512f... " >&6; } -if test ${hts_cv_check_cflags_needed_avx512f___mavx512f__mpopcnt+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - #ifdef __x86_64__ - #include "x86intrin.h" - #endif - -int -main (void) -{ - - #ifdef __x86_64__ - __m512i a = _mm512_set1_epi32(1); - __m512i b = _mm512_add_epi32(a, a); - return _mm_popcnt_u32(*((char *) &b)); - #endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" -then : - hts_cv_check_cflags_needed_avx512f___mavx512f__mpopcnt=none -else case e in #( - e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512f -mpopcnt" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - #ifdef __x86_64__ - #include "x86intrin.h" - #endif - -int -main (void) -{ - - #ifdef __x86_64__ - __m512i a = _mm512_set1_epi32(1); - __m512i b = _mm512_add_epi32(a, a); - return _mm_popcnt_u32(*((char *) &b)); - #endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" -then : - hts_cv_check_cflags_needed_avx512f___mavx512f__mpopcnt="-mavx512f -mpopcnt" -else case e in #( - e) hts_cv_check_cflags_needed_avx512f___mavx512f__mpopcnt=unsupported ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext - CFLAGS=$ax_check_save_flags ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hts_cv_check_cflags_needed_avx512f___mavx512f__mpopcnt" >&5 -printf "%s\n" "$hts_cv_check_cflags_needed_avx512f___mavx512f__mpopcnt" >&6; } -if test "x$hts_cv_check_cflags_needed_avx512f___mavx512f__mpopcnt" = xunsupported -then : - - : - -else case e in #( - e) - if test "x$hts_cv_check_cflags_needed_avx512f___mavx512f__mpopcnt" = xnone -then : - flags_needed="" -else case e in #( - e) flags_needed="$hts_cv_check_cflags_needed_avx512f___mavx512f__mpopcnt" ;; -esac -fi - - hts_cflags_avx512="$flags_needed" - - -printf "%s\n" "#define HAVE_POPCNT 1" >>confdefs.h - - -printf "%s\n" "#define HAVE_AVX512 1" >>confdefs.h - - - ;; -esac -fi - - - - - - - - - -if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then - if test -n "$ac_tool_prefix"; then - # Extract the first word of "${ac_tool_prefix}pkg-config", so it can be a program name with args. -set dummy ${ac_tool_prefix}pkg-config; ac_word=$2 -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -printf %s "checking for $ac_word... " >&6; } -if test ${ac_cv_path_PKG_CONFIG+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) case $PKG_CONFIG in - [\\/]* | ?:[\\/]*) - ac_cv_path_PKG_CONFIG="$PKG_CONFIG" # Let the user override the test with a path. - ;; - *) - as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - case $as_dir in #((( - '') as_dir=./ ;; - */) ;; - *) as_dir=$as_dir/ ;; - esac - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then - ac_cv_path_PKG_CONFIG="$as_dir$ac_word$ac_exec_ext" - printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - - ;; -esac ;; -esac -fi -PKG_CONFIG=$ac_cv_path_PKG_CONFIG -if test -n "$PKG_CONFIG"; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $PKG_CONFIG" >&5 -printf "%s\n" "$PKG_CONFIG" >&6; } -else - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } -fi - - -fi -if test -z "$ac_cv_path_PKG_CONFIG"; then - ac_pt_PKG_CONFIG=$PKG_CONFIG - # Extract the first word of "pkg-config", so it can be a program name with args. -set dummy pkg-config; ac_word=$2 -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -printf %s "checking for $ac_word... " >&6; } -if test ${ac_cv_path_ac_pt_PKG_CONFIG+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) case $ac_pt_PKG_CONFIG in - [\\/]* | ?:[\\/]*) - ac_cv_path_ac_pt_PKG_CONFIG="$ac_pt_PKG_CONFIG" # Let the user override the test with a path. - ;; - *) - as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - case $as_dir in #((( - '') as_dir=./ ;; - */) ;; - *) as_dir=$as_dir/ ;; - esac - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then - ac_cv_path_ac_pt_PKG_CONFIG="$as_dir$ac_word$ac_exec_ext" - printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - - ;; -esac ;; -esac -fi -ac_pt_PKG_CONFIG=$ac_cv_path_ac_pt_PKG_CONFIG -if test -n "$ac_pt_PKG_CONFIG"; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_pt_PKG_CONFIG" >&5 -printf "%s\n" "$ac_pt_PKG_CONFIG" >&6; } -else - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } -fi - - if test "x$ac_pt_PKG_CONFIG" = x; then - PKG_CONFIG="" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} -ac_tool_warned=yes ;; -esac - PKG_CONFIG=$ac_pt_PKG_CONFIG - fi -else - PKG_CONFIG="$ac_cv_path_PKG_CONFIG" -fi - -fi -if test -n "$PKG_CONFIG"; then - _pkg_min_version=0.9.0 - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking pkg-config is at least version $_pkg_min_version" >&5 -printf %s "checking pkg-config is at least version $_pkg_min_version... " >&6; } - if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -printf "%s\n" "yes" >&6; } - else - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } - PKG_CONFIG="" - fi -fi - -need_crypto=no -pc_requires= -static_LDFLAGS=$LDFLAGS -static_LIBS='-lpthread -lz -lm' -private_LIBS=$LDFLAGS - -# Check whether --enable-versioned-symbols was given. -if test ${enable_versioned_symbols+y} -then : - enableval=$enable_versioned_symbols; -else case e in #( - e) enable_versioned_symbols=yes ;; -esac -fi - - -# Check whether --enable-bz2 was given. -if test ${enable_bz2+y} -then : - enableval=$enable_bz2; -else case e in #( - e) enable_bz2=yes ;; -esac -fi - - -# Check whether --enable-gcs was given. -if test ${enable_gcs+y} -then : - enableval=$enable_gcs; -else case e in #( - e) enable_gcs=check ;; -esac -fi - - -# Check whether --enable-largefile was given. -if test ${enable_largefile+y} -then : - enableval=$enable_largefile; -fi -if test "$enable_largefile,$enable_year2038" != no,no -then : - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable large file support" >&5 -printf %s "checking for $CC option to enable large file support... " >&6; } -if test ${ac_cv_sys_largefile_opts+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) ac_save_CC="$CC" - ac_opt_found=no - for ac_opt in "none needed" "-D_FILE_OFFSET_BITS=64" "-D_LARGE_FILES=1" "-n32"; do - if test x"$ac_opt" != x"none needed" -then : - CC="$ac_save_CC $ac_opt" -fi - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -#ifndef FTYPE -# define FTYPE off_t -#endif - /* Check that FTYPE can represent 2**63 - 1 correctly. - We can't simply define LARGE_FTYPE to be 9223372036854775807, - since some C++ compilers masquerading as C compilers - incorrectly reject 9223372036854775807. */ -#define LARGE_FTYPE (((FTYPE) 1 << 31 << 31) - 1 + ((FTYPE) 1 << 31 << 31)) - int FTYPE_is_large[(LARGE_FTYPE % 2147483629 == 721 - && LARGE_FTYPE % 2147483647 == 1) - ? 1 : -1]; -int -main (void) -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - if test x"$ac_opt" = x"none needed" -then : - # GNU/Linux s390x and alpha need _FILE_OFFSET_BITS=64 for wide ino_t. - CC="$CC -DFTYPE=ino_t" - if ac_fn_c_try_compile "$LINENO" -then : - -else case e in #( - e) CC="$CC -D_FILE_OFFSET_BITS=64" - if ac_fn_c_try_compile "$LINENO" -then : - ac_opt='-D_FILE_OFFSET_BITS=64' -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam -fi - ac_cv_sys_largefile_opts=$ac_opt - ac_opt_found=yes -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - test $ac_opt_found = no || break - done - CC="$ac_save_CC" - - test $ac_opt_found = yes || ac_cv_sys_largefile_opts="support not detected" ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sys_largefile_opts" >&5 -printf "%s\n" "$ac_cv_sys_largefile_opts" >&6; } - -ac_have_largefile=yes -case $ac_cv_sys_largefile_opts in #( - "none needed") : - ;; #( - "supported through gnulib") : - ;; #( - "support not detected") : - ac_have_largefile=no ;; #( - "-D_FILE_OFFSET_BITS=64") : - -printf "%s\n" "#define _FILE_OFFSET_BITS 64" >>confdefs.h - ;; #( - "-D_LARGE_FILES=1") : - -printf "%s\n" "#define _LARGE_FILES 1" >>confdefs.h - ;; #( - "-n32") : - CC="$CC -n32" ;; #( - *) : - as_fn_error $? "internal error: bad value for \$ac_cv_sys_largefile_opts" "$LINENO" 5 ;; -esac - -if test "$enable_year2038" != no -then : - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option for timestamps after 2038" >&5 -printf %s "checking for $CC option for timestamps after 2038... " >&6; } -if test ${ac_cv_sys_year2038_opts+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) ac_save_CPPFLAGS="$CPPFLAGS" - ac_opt_found=no - for ac_opt in "none needed" "-D_TIME_BITS=64" "-D__MINGW_USE_VC2005_COMPAT" "-U_USE_32_BIT_TIME_T -D__MINGW_USE_VC2005_COMPAT"; do - if test x"$ac_opt" != x"none needed" -then : - CPPFLAGS="$ac_save_CPPFLAGS $ac_opt" -fi - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - #include - /* Check that time_t can represent 2**32 - 1 correctly. */ - #define LARGE_TIME_T \\ - ((time_t) (((time_t) 1 << 30) - 1 + 3 * ((time_t) 1 << 30))) - int verify_time_t_range[(LARGE_TIME_T / 65537 == 65535 - && LARGE_TIME_T % 65537 == 0) - ? 1 : -1]; - -int -main (void) -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - ac_cv_sys_year2038_opts="$ac_opt" - ac_opt_found=yes -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - test $ac_opt_found = no || break - done - CPPFLAGS="$ac_save_CPPFLAGS" - test $ac_opt_found = yes || ac_cv_sys_year2038_opts="support not detected" ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sys_year2038_opts" >&5 -printf "%s\n" "$ac_cv_sys_year2038_opts" >&6; } - -ac_have_year2038=yes -case $ac_cv_sys_year2038_opts in #( - "none needed") : - ;; #( - "support not detected") : - ac_have_year2038=no ;; #( - "-D_TIME_BITS=64") : - -printf "%s\n" "#define _TIME_BITS 64" >>confdefs.h - ;; #( - "-D__MINGW_USE_VC2005_COMPAT") : - -printf "%s\n" "#define __MINGW_USE_VC2005_COMPAT 1" >>confdefs.h - ;; #( - "-U_USE_32_BIT_TIME_T"*) : - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} -as_fn_error $? "the 'time_t' type is currently forced to be 32-bit. It -will stop working after mid-January 2038. Remove -_USE_32BIT_TIME_T from the compiler flags. -See 'config.log' for more details" "$LINENO" 5; } ;; #( - *) : - as_fn_error $? "internal error: bad value for \$ac_cv_sys_year2038_opts" "$LINENO" 5 ;; -esac - -fi - -fi - -# Check whether --enable-libcurl was given. -if test ${enable_libcurl+y} -then : - enableval=$enable_libcurl; -else case e in #( - e) enable_libcurl=check ;; -esac -fi - - -# Check whether --enable-lzma was given. -if test ${enable_lzma+y} -then : - enableval=$enable_lzma; -else case e in #( - e) enable_lzma=yes ;; -esac -fi - - -# Check whether --enable-plugins was given. -if test ${enable_plugins+y} -then : - enableval=$enable_plugins; -else case e in #( - e) enable_plugins=no ;; -esac -fi - - - - -# Check whether --with-external-htscodecs was given. -if test ${with_external_htscodecs+y} -then : - withval=$with_external_htscodecs; -else case e in #( - e) with_external_htscodecs=no ;; -esac -fi - - - - -# Check whether --with-libdeflate was given. -if test ${with_libdeflate+y} -then : - withval=$with_libdeflate; -else case e in #( - e) with_libdeflate=check ;; -esac -fi - - - -# Check whether --with-plugin-dir was given. -if test ${with_plugin_dir+y} -then : - withval=$with_plugin_dir; case $withval in - yes|no) cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "no directory specified for --with-plugin-dir" "$LINENO" 5 ;; - esac -else case e in #( - e) with_plugin_dir='$(libexecdir)/htslib' ;; -esac -fi - -plugindir=$with_plugin_dir - - - -# Check whether --with-plugin-path was given. -if test ${with_plugin_path+y} -then : - withval=$with_plugin_path; case $withval in - yes) cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "no path specified for --with-plugin-path" "$LINENO" 5 ;; - no) with_plugin_path= ;; - esac -else case e in #( - e) with_plugin_path=$with_plugin_dir ;; -esac -fi - -pluginpath=$with_plugin_path - - -# Check whether --enable-s3 was given. -if test ${enable_s3+y} -then : - enableval=$enable_s3; -else case e in #( - e) enable_s3=check ;; -esac -fi - - -basic_host=${host_alias:-unknown-`uname -s`} -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking shared library type for $basic_host" >&5 -printf %s "checking shared library type for $basic_host... " >&6; } -case $basic_host in - *-cygwin* | *-CYGWIN*) - host_result="Cygwin DLL" - PLATFORM=CYGWIN - PLUGIN_EXT=.cygdll - ;; - *-darwin* | *-Darwin*) - host_result="Darwin dylib" - PLATFORM=Darwin - PLUGIN_EXT=.bundle - ;; - *-msys* | *-MSYS* | *-mingw* | *-MINGW*) - host_result="MSYS dll" - PLATFORM=MSYS - PLUGIN_EXT=.dll - # This also sets __USE_MINGW_ANSI_STDIO which in turn makes PRId64, - # %lld and %z printf formats work. It also enforces the snprintf to - # be C99 compliant so it returns the correct values (in kstring.c). - - # Now set by default, so no need to do it here. - # CPPFLAGS="$CPPFLAGS -D_XOPEN_SOURCE=600" - ;; - *) - host_result="plain .so" - PLATFORM=default - PLUGIN_EXT=.so - ;; -esac -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $host_result" >&5 -printf "%s\n" "$host_result" >&6; } - - -if test x"$PLATFORM" = xdefault && test x"$enable_versioned_symbols" = xyes -then : - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the linker supports versioned symbols" >&5 -printf %s "checking whether the linker supports versioned symbols... " >&6; } -if test ${hts_cv_have_versioned_symbols+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) - save_LDFLAGS=$LDFLAGS - LDFLAGS="-Wl,-version-script,$srcdir/htslib.map $LDFLAGS" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main (void) -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" -then : - hts_cv_have_versioned_symbols=yes -else case e in #( - e) hts_cv_have_versioned_symbols=no ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext - LDFLAGS=$save_LDFLAGS - ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hts_cv_have_versioned_symbols" >&5 -printf "%s\n" "$hts_cv_have_versioned_symbols" >&6; } - if test "x$hts_cv_have_versioned_symbols" = xyes -then : - - VERSION_SCRIPT_LDFLAGS='-Wl,-version-script,$(srcprefix)htslib.map' - - -fi - -fi - - - # Test for flags to set default shared library visibility to hidden - # -fvisibility=hidden : GCC compatible - # -xldscope=hidden : SunStudio - ac_opt_found=no - if test "x$ac_opt_found" = "xno" -then : - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler accepts -fvisibility=hidden" >&5 -printf %s "checking whether the compiler accepts -fvisibility=hidden... " >&6; } -if test ${hts_cv_check__fvisibility_hidden+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) ac_check_save_cflags=$CFLAGS - ac_check_save_ldflags=$LDFLAGS - CFLAGS="$CFLAGS -fvisibility=hidden" - LDFLAGS="$LDFLAGS -fvisibility=hidden" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main (void) -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" -then : - hts_cv_check__fvisibility_hidden=yes - if test "xac_opt_found" != x -then : - eval ac_opt_found="-fvisibility=hidden" -fi -else case e in #( - e) hts_cv_check__fvisibility_hidden=no ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext - CFLAGS=$ac_check_save_cflags - LDFLAGS=$ac_check_save_ldflags ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hts_cv_check__fvisibility_hidden" >&5 -printf "%s\n" "$hts_cv_check__fvisibility_hidden" >&6; } - -fi - if test "x$ac_opt_found" = "xno" -then : - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler accepts -xldscope=hidden" >&5 -printf %s "checking whether the compiler accepts -xldscope=hidden... " >&6; } -if test ${hts_cv_check__xldscope_hidden+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) ac_check_save_cflags=$CFLAGS - ac_check_save_ldflags=$LDFLAGS - CFLAGS="$CFLAGS -xldscope=hidden" - LDFLAGS="$LDFLAGS -xldscope=hidden" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main (void) -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" -then : - hts_cv_check__xldscope_hidden=yes - if test "xac_opt_found" != x -then : - eval ac_opt_found="-xldscope=hidden" -fi -else case e in #( - e) hts_cv_check__xldscope_hidden=no ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext - CFLAGS=$ac_check_save_cflags - LDFLAGS=$ac_check_save_ldflags ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hts_cv_check__xldscope_hidden" >&5 -printf "%s\n" "$hts_cv_check__xldscope_hidden" >&6; } - -fi - - if test "x$ac_opt_found" != "xno" -then : - CFLAGS="$CFLAGS $ac_opt_found" - LDFLAGS="$LDFLAGS $ac_opt_found" -fi - - - - - - # Make sure we can run config.sub. -$SHELL "${ac_aux_dir}config.sub" sun4 >/dev/null 2>&1 || - as_fn_error $? "cannot run $SHELL ${ac_aux_dir}config.sub" "$LINENO" 5 - -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking build system type" >&5 -printf %s "checking build system type... " >&6; } -if test ${ac_cv_build+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) ac_build_alias=$build_alias -test "x$ac_build_alias" = x && - ac_build_alias=`$SHELL "${ac_aux_dir}config.guess"` -test "x$ac_build_alias" = x && - as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5 -ac_cv_build=`$SHELL "${ac_aux_dir}config.sub" $ac_build_alias` || - as_fn_error $? "$SHELL ${ac_aux_dir}config.sub $ac_build_alias failed" "$LINENO" 5 - ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5 -printf "%s\n" "$ac_cv_build" >&6; } -case $ac_cv_build in -*-*-*) ;; -*) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;; -esac -build=$ac_cv_build -ac_save_IFS=$IFS; IFS='-' -set x $ac_cv_build -shift -build_cpu=$1 -build_vendor=$2 -shift; shift -# Remember, the first character of IFS is used to create $*, -# except with old shells: -build_os=$* -IFS=$ac_save_IFS -case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac - - -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking host system type" >&5 -printf %s "checking host system type... " >&6; } -if test ${ac_cv_host+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) if test "x$host_alias" = x; then - ac_cv_host=$ac_cv_build -else - ac_cv_host=`$SHELL "${ac_aux_dir}config.sub" $host_alias` || - as_fn_error $? "$SHELL ${ac_aux_dir}config.sub $host_alias failed" "$LINENO" 5 -fi - ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_host" >&5 -printf "%s\n" "$ac_cv_host" >&6; } -case $ac_cv_host in -*-*-*) ;; -*) as_fn_error $? "invalid value of canonical host" "$LINENO" 5;; -esac -host=$ac_cv_host -ac_save_IFS=$IFS; IFS='-' -set x $ac_cv_host -shift -host_cpu=$1 -host_vendor=$2 -shift; shift -# Remember, the first character of IFS is used to create $*, -# except with old shells: -host_os=$* -IFS=$ac_save_IFS -case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac - - - -ac_func= -for ac_item in $ac_func_c_list -do - if test $ac_func; then - ac_fn_c_check_func "$LINENO" $ac_func ac_cv_func_$ac_func - if eval test \"x\$ac_cv_func_$ac_func\" = xyes; then - echo "#define $ac_item 1" >> confdefs.h - fi - ac_func= - else - ac_func=$ac_item - fi -done - - -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for working mmap" >&5 -printf %s "checking for working mmap... " >&6; } -if test ${ac_cv_func_mmap_fixed_mapped+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) if test "$cross_compiling" = yes -then : - case "$host_os" in # (( - # Guess yes on platforms where we know the result. - linux*) ac_cv_func_mmap_fixed_mapped=yes ;; - # If we don't know, assume the worst. - *) ac_cv_func_mmap_fixed_mapped=no ;; - esac -else case e in #( - e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$ac_includes_default -/* malloc might have been renamed as rpl_malloc. */ -#undef malloc - -/* Thanks to Mike Haertel and Jim Avera for this test. - Here is a matrix of mmap possibilities: - mmap private not fixed - mmap private fixed at somewhere currently unmapped - mmap private fixed at somewhere already mapped - mmap shared not fixed - mmap shared fixed at somewhere currently unmapped - mmap shared fixed at somewhere already mapped - For private mappings, we should verify that changes cannot be read() - back from the file, nor mmap's back from the file at a different - address. (There have been systems where private was not correctly - implemented like the infamous i386 svr4.0, and systems where the - VM page cache was not coherent with the file system buffer cache - like early versions of FreeBSD and possibly contemporary NetBSD.) - For shared mappings, we should conversely verify that changes get - propagated back to all the places they're supposed to be. */ - -#include -#include - -#ifndef getpagesize -/* Prefer sysconf to the legacy getpagesize function, as getpagesize has - been removed from POSIX and is limited to page sizes that fit in 'int'. */ -# ifdef _SC_PAGESIZE -# define getpagesize() sysconf (_SC_PAGESIZE) -# elif defined _SC_PAGE_SIZE -# define getpagesize() sysconf (_SC_PAGE_SIZE) -# elif HAVE_GETPAGESIZE -int getpagesize (); -# else -# ifdef HAVE_SYS_PARAM_H -# include -# ifdef EXEC_PAGESIZE -# define getpagesize() EXEC_PAGESIZE -# else /* no EXEC_PAGESIZE */ -# ifdef NBPG -# define getpagesize() NBPG * CLSIZE -# ifndef CLSIZE -# define CLSIZE 1 -# endif /* no CLSIZE */ -# else /* no NBPG */ -# ifdef NBPC -# define getpagesize() NBPC -# else /* no NBPC */ -# ifdef PAGESIZE -# define getpagesize() PAGESIZE -# endif /* PAGESIZE */ -# endif /* no NBPC */ -# endif /* no NBPG */ -# endif /* no EXEC_PAGESIZE */ -# else /* no HAVE_SYS_PARAM_H */ -# define getpagesize() 8192 /* punt totally */ -# endif /* no HAVE_SYS_PARAM_H */ -# endif -#endif - -int -main (void) -{ - char *data, *data2, *data3; - const char *cdata2; - long i, pagesize; - int fd, fd2; - - pagesize = getpagesize (); - - /* First, make a file with some known garbage in it. */ - data = (char *) malloc (pagesize); - if (!data) - return 1; - for (i = 0; i < pagesize; ++i) - *(data + i) = rand (); - umask (0); - fd = creat ("conftest.mmap", 0600); - if (fd < 0) - return 2; - if (write (fd, data, pagesize) != pagesize) - return 3; - close (fd); - - /* Next, check that the tail of a page is zero-filled. File must have - non-zero length, otherwise we risk SIGBUS for entire page. */ - fd2 = open ("conftest.txt", O_RDWR | O_CREAT | O_TRUNC, 0600); - if (fd2 < 0) - return 4; - cdata2 = ""; - if (write (fd2, cdata2, 1) != 1) - return 5; - data2 = (char *) mmap (0, pagesize, PROT_READ | PROT_WRITE, MAP_SHARED, fd2, 0L); - if (data2 == MAP_FAILED) - return 6; - for (i = 0; i < pagesize; ++i) - if (*(data2 + i)) - return 7; - close (fd2); - /* 'return 8;' not currently used. */ - - /* Next, try to mmap the file at a fixed address which already has - something else allocated at it. If we can, also make sure that - we see the same garbage. */ - fd = open ("conftest.mmap", O_RDWR); - if (fd < 0) - return 9; - if (data2 != mmap (data2, pagesize, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_FIXED, fd, 0L)) - return 10; - for (i = 0; i < pagesize; ++i) - if (*(data + i) != *(data2 + i)) - return 11; - - /* Finally, make sure that changes to the mapped area do not - percolate back to the file as seen by read(). (This is a bug on - some variants of i386 svr4.0.) */ - for (i = 0; i < pagesize; ++i) - *(data2 + i) = *(data2 + i) + 1; - data3 = (char *) malloc (pagesize); - if (!data3) - return 12; - if (read (fd, data3, pagesize) != pagesize) - return 13; - for (i = 0; i < pagesize; ++i) - if (*(data + i) != *(data3 + i)) - return 14; - close (fd); - free (data); - free (data3); - return 0; -} -_ACEOF -if ac_fn_c_try_run "$LINENO" -then : - ac_cv_func_mmap_fixed_mapped=yes -else case e in #( - e) ac_cv_func_mmap_fixed_mapped=no ;; -esac -fi -rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ - conftest.$ac_objext conftest.beam conftest.$ac_ext ;; -esac -fi - ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_mmap_fixed_mapped" >&5 -printf "%s\n" "$ac_cv_func_mmap_fixed_mapped" >&6; } -if test $ac_cv_func_mmap_fixed_mapped = yes; then - -printf "%s\n" "#define HAVE_MMAP 1" >>confdefs.h - -fi -rm -f conftest.mmap conftest.txt - -ac_fn_c_check_func "$LINENO" "gmtime_r" "ac_cv_func_gmtime_r" -if test "x$ac_cv_func_gmtime_r" = xyes -then : - printf "%s\n" "#define HAVE_GMTIME_R 1" >>confdefs.h - -fi -ac_fn_c_check_func "$LINENO" "fsync" "ac_cv_func_fsync" -if test "x$ac_cv_func_fsync" = xyes -then : - printf "%s\n" "#define HAVE_FSYNC 1" >>confdefs.h - -fi -ac_fn_c_check_func "$LINENO" "drand48" "ac_cv_func_drand48" -if test "x$ac_cv_func_drand48" = xyes -then : - printf "%s\n" "#define HAVE_DRAND48 1" >>confdefs.h - -fi -ac_fn_c_check_func "$LINENO" "srand48_deterministic" "ac_cv_func_srand48_deterministic" -if test "x$ac_cv_func_srand48_deterministic" = xyes -then : - printf "%s\n" "#define HAVE_SRAND48_DETERMINISTIC 1" >>confdefs.h - -fi - - -# Darwin has a dubious fdatasync() symbol, but no declaration in -as_ac_Symbol=`printf "%s\n" "ac_cv_have_decl_fdatasync(int)" | sed "$as_sed_sh"` -ac_fn_check_decl "$LINENO" "fdatasync(int)" "$as_ac_Symbol" "$ac_includes_default" "$ac_c_undeclared_builtin_options" "CFLAGS" -if eval test \"x\$"$as_ac_Symbol"\" = x"yes" -then : - ac_fn_c_check_func "$LINENO" "fdatasync" "ac_cv_func_fdatasync" -if test "x$ac_cv_func_fdatasync" = xyes -then : - printf "%s\n" "#define HAVE_FDATASYNC 1" >>confdefs.h - -fi - -fi - -if test $enable_plugins != no; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing dlsym" >&5 -printf %s "checking for library containing dlsym... " >&6; } -if test ${ac_cv_search_dlsym+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) ac_func_search_save_LIBS=$LIBS -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. - The 'extern "C"' is for builds by C++ compilers; - although this is not generally supported in C code supporting it here - has little cost and some practical benefit (sr 110532). */ -#ifdef __cplusplus -extern "C" -#endif -char dlsym (void); -int -main (void) -{ -return dlsym (); - ; - return 0; -} -_ACEOF -for ac_lib in '' dl -do - if test -z "$ac_lib"; then - ac_res="none required" - else - ac_res=-l$ac_lib - LIBS="-l$ac_lib $ac_func_search_save_LIBS" - fi - if ac_fn_c_try_link "$LINENO" -then : - ac_cv_search_dlsym=$ac_res -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext - if test ${ac_cv_search_dlsym+y} -then : - break -fi -done -if test ${ac_cv_search_dlsym+y} -then : - -else case e in #( - e) ac_cv_search_dlsym=no ;; -esac -fi -rm conftest.$ac_ext -LIBS=$ac_func_search_save_LIBS ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_dlsym" >&5 -printf "%s\n" "$ac_cv_search_dlsym" >&6; } -ac_res=$ac_cv_search_dlsym -if test "$ac_res" != no -then : - test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" - -else case e in #( - e) cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "dlsym() not found - -Plugin support requires dynamic linking facilities from the operating system. -Either configure with --disable-plugins or resolve this error to build HTSlib." "$LINENO" 5 ;; -esac -fi - - # Check if the compiler understands -rdynamic - # TODO Test whether this is required and/or needs tweaking per-platform - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler accepts -rdynamic" >&5 -printf %s "checking whether the compiler accepts -rdynamic... " >&6; } -if test ${hts_cv_check__rdynamic+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) ac_check_save_cflags=$CFLAGS - ac_check_save_ldflags=$LDFLAGS - CFLAGS="$CFLAGS -rdynamic" - LDFLAGS="$LDFLAGS -rdynamic" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main (void) -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" -then : - hts_cv_check__rdynamic=yes - if test "xrdynamic_flag" != x -then : - eval rdynamic_flag="-rdynamic" -fi -else case e in #( - e) hts_cv_check__rdynamic=no ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext - CFLAGS=$ac_check_save_cflags - LDFLAGS=$ac_check_save_ldflags ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hts_cv_check__rdynamic" >&5 -printf "%s\n" "$hts_cv_check__rdynamic" >&6; } - - if test x"$rdynamic_flag" != "xno" -then : - LDFLAGS="$LDFLAGS $rdynamic_flag" - static_LDFLAGS="$static_LDFLAGS $rdynamic_flag" -fi - case "$ac_cv_search_dlsym" in - -l*) static_LIBS="$static_LIBS $ac_cv_search_dlsym" ;; - esac - -printf "%s\n" "#define ENABLE_PLUGINS 1" >>confdefs.h - - - -printf "%s\n" "#define PLUGIN_EXT \"$PLUGIN_EXT\"" >>confdefs.h - -fi - -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing log" >&5 -printf %s "checking for library containing log... " >&6; } -if test ${ac_cv_search_log+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) ac_func_search_save_LIBS=$LIBS -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. - The 'extern "C"' is for builds by C++ compilers; - although this is not generally supported in C code supporting it here - has little cost and some practical benefit (sr 110532). */ -#ifdef __cplusplus -extern "C" -#endif -char log (void); -int -main (void) -{ -return log (); - ; - return 0; -} -_ACEOF -for ac_lib in '' m -do - if test -z "$ac_lib"; then - ac_res="none required" - else - ac_res=-l$ac_lib - LIBS="-l$ac_lib $ac_func_search_save_LIBS" - fi - if ac_fn_c_try_link "$LINENO" -then : - ac_cv_search_log=$ac_res -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext - if test ${ac_cv_search_log+y} -then : - break -fi -done -if test ${ac_cv_search_log+y} -then : - -else case e in #( - e) ac_cv_search_log=no ;; -esac -fi -rm conftest.$ac_ext -LIBS=$ac_func_search_save_LIBS ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_log" >&5 -printf "%s\n" "$ac_cv_search_log" >&6; } -ac_res=$ac_cv_search_log -if test "$ac_res" != no -then : - test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" - -else case e in #( - e) cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "log() not found - -HTSLIB requires a working floating-point math library. -FAILED. This error must be resolved in order to build HTSlib successfully." "$LINENO" 5 ;; -esac -fi - - -zlib_devel=ok -ac_fn_c_check_header_compile "$LINENO" "zlib.h" "ac_cv_header_zlib_h" "; -" -if test "x$ac_cv_header_zlib_h" = xyes -then : - -else case e in #( - e) zlib_devel=missing ;; -esac -fi - -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for inflate in -lz" >&5 -printf %s "checking for inflate in -lz... " >&6; } -if test ${ac_cv_lib_z_inflate+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) ac_check_lib_save_LIBS=$LIBS -LIBS="-lz $LIBS" -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. - The 'extern "C"' is for builds by C++ compilers; - although this is not generally supported in C code supporting it here - has little cost and some practical benefit (sr 110532). */ -#ifdef __cplusplus -extern "C" -#endif -char inflate (void); -int -main (void) -{ -return inflate (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" -then : - ac_cv_lib_z_inflate=yes -else case e in #( - e) ac_cv_lib_z_inflate=no ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_z_inflate" >&5 -printf "%s\n" "$ac_cv_lib_z_inflate" >&6; } -if test "x$ac_cv_lib_z_inflate" = xyes -then : - printf "%s\n" "#define HAVE_LIBZ 1" >>confdefs.h - - LIBS="-lz $LIBS" - -else case e in #( - e) zlib_devel=missing ;; -esac -fi - - -if test $zlib_devel != ok; then - cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "zlib development files not found - -HTSlib uses compression routines from the zlib library . -Building HTSlib requires zlib development files to be installed on the build -machine; you may need to ensure a package such as zlib1g-dev (on Debian or -Ubuntu Linux) or zlib-devel (on RPM-based Linux distributions or Cygwin) -is installed. - -FAILED. This error must be resolved in order to build HTSlib successfully." "$LINENO" 5 -fi - -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing recv" >&5 -printf %s "checking for library containing recv... " >&6; } -if test ${ac_cv_search_recv+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) ac_func_search_save_LIBS=$LIBS -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. - The 'extern "C"' is for builds by C++ compilers; - although this is not generally supported in C code supporting it here - has little cost and some practical benefit (sr 110532). */ -#ifdef __cplusplus -extern "C" -#endif -char recv (void); -int -main (void) -{ -return recv (); - ; - return 0; -} -_ACEOF -for ac_lib in '' socket ws2_32 -do - if test -z "$ac_lib"; then - ac_res="none required" - else - ac_res=-l$ac_lib - LIBS="-l$ac_lib $ac_func_search_save_LIBS" - fi - if ac_fn_c_try_link "$LINENO" -then : - ac_cv_search_recv=$ac_res -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext - if test ${ac_cv_search_recv+y} -then : - break -fi -done -if test ${ac_cv_search_recv+y} -then : - -else case e in #( - e) ac_cv_search_recv=no ;; -esac -fi -rm conftest.$ac_ext -LIBS=$ac_func_search_save_LIBS ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_recv" >&5 -printf "%s\n" "$ac_cv_search_recv" >&6; } -ac_res=$ac_cv_search_recv -if test "$ac_res" != no -then : - test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" - -if test "$ac_cv_search_recv" != "none required" -then - static_LIBS="$static_LIBS $ac_cv_search_recv" -fi -else case e in #( - e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing recv using declaration" >&5 -printf %s "checking for library containing recv using declaration... " >&6; } - LIBS="-lws2_32 $LIBS" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -int -main (void) -{ -recv(0, 0, 0, 0); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" -then : - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: -lws2_32" >&5 -printf "%s\n" "-lws2_32" >&6; } - static_LIBS="$static_LIBS -lws2_32" -else case e in #( - e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } - cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "unable to find the recv() function" "$LINENO" 5 ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext ;; -esac -fi - - -if test "$enable_bz2" != no; then - bz2_devel=ok - ac_fn_c_check_header_compile "$LINENO" "bzlib.h" "ac_cv_header_bzlib_h" "; -" -if test "x$ac_cv_header_bzlib_h" = xyes -then : - -else case e in #( - e) bz2_devel=missing ;; -esac -fi - - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for BZ2_bzBuffToBuffCompress in -lbz2" >&5 -printf %s "checking for BZ2_bzBuffToBuffCompress in -lbz2... " >&6; } -if test ${ac_cv_lib_bz2_BZ2_bzBuffToBuffCompress+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) ac_check_lib_save_LIBS=$LIBS -LIBS="-lbz2 $LIBS" -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. - The 'extern "C"' is for builds by C++ compilers; - although this is not generally supported in C code supporting it here - has little cost and some practical benefit (sr 110532). */ -#ifdef __cplusplus -extern "C" -#endif -char BZ2_bzBuffToBuffCompress (void); -int -main (void) -{ -return BZ2_bzBuffToBuffCompress (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" -then : - ac_cv_lib_bz2_BZ2_bzBuffToBuffCompress=yes -else case e in #( - e) ac_cv_lib_bz2_BZ2_bzBuffToBuffCompress=no ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_bz2_BZ2_bzBuffToBuffCompress" >&5 -printf "%s\n" "$ac_cv_lib_bz2_BZ2_bzBuffToBuffCompress" >&6; } -if test "x$ac_cv_lib_bz2_BZ2_bzBuffToBuffCompress" = xyes -then : - printf "%s\n" "#define HAVE_LIBBZ2 1" >>confdefs.h - - LIBS="-lbz2 $LIBS" - -else case e in #( - e) bz2_devel=missing ;; -esac -fi - - if test $bz2_devel != ok; then - cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "libbzip2 development files not found - -The CRAM format may use bzip2 compression, which is implemented in HTSlib -by using compression routines from libbzip2 . - -Building HTSlib requires libbzip2 development files to be installed on the -build machine; you may need to ensure a package such as libbz2-dev (on Debian -or Ubuntu Linux) or bzip2-devel (on RPM-based Linux distributions or Cygwin) -is installed. - -Either configure with --disable-bz2 (which will make some CRAM files -produced elsewhere unreadable) or resolve this error to build HTSlib." "$LINENO" 5 - fi - if test -n "$PKG_CONFIG" && "$PKG_CONFIG" --exists bzip2; then - pc_requires="$pc_requires bzip2" - else - private_LIBS="$private_LIBS -lbz2" - fi - static_LIBS="$static_LIBS -lbz2" -fi - -if test "$enable_lzma" != no; then - lzma_devel=ok - for ac_header in lzma.h -do : - ac_fn_c_check_header_compile "$LINENO" "lzma.h" "ac_cv_header_lzma_h" "; -" -if test "x$ac_cv_header_lzma_h" = xyes -then : - printf "%s\n" "#define HAVE_LZMA_H 1" >>confdefs.h - -else case e in #( - e) lzma_devel=header-missing ;; -esac -fi - -done - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for lzma_easy_buffer_encode in -llzma" >&5 -printf %s "checking for lzma_easy_buffer_encode in -llzma... " >&6; } -if test ${ac_cv_lib_lzma_lzma_easy_buffer_encode+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) ac_check_lib_save_LIBS=$LIBS -LIBS="-llzma $LIBS" -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. - The 'extern "C"' is for builds by C++ compilers; - although this is not generally supported in C code supporting it here - has little cost and some practical benefit (sr 110532). */ -#ifdef __cplusplus -extern "C" -#endif -char lzma_easy_buffer_encode (void); -int -main (void) -{ -return lzma_easy_buffer_encode (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" -then : - ac_cv_lib_lzma_lzma_easy_buffer_encode=yes -else case e in #( - e) ac_cv_lib_lzma_lzma_easy_buffer_encode=no ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_lzma_lzma_easy_buffer_encode" >&5 -printf "%s\n" "$ac_cv_lib_lzma_lzma_easy_buffer_encode" >&6; } -if test "x$ac_cv_lib_lzma_lzma_easy_buffer_encode" = xyes -then : - printf "%s\n" "#define HAVE_LIBLZMA 1" >>confdefs.h - - LIBS="-llzma $LIBS" - -else case e in #( - e) lzma_devel=missing ;; -esac -fi - - if test $lzma_devel = missing; then - cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "liblzma development files not found - -The CRAM format may use LZMA2 compression, which is implemented in HTSlib -by using compression routines from liblzma . - -Building HTSlib requires liblzma development files to be installed on the -build machine; you may need to ensure a package such as liblzma-dev (on Debian -or Ubuntu Linux), xz-devel (on RPM-based Linux distributions or Cygwin), or -xz (via Homebrew on macOS) is installed; or build XZ Utils from source. - -Either configure with --disable-lzma (which will make some CRAM files -produced elsewhere unreadable) or resolve this error to build HTSlib." "$LINENO" 5 - fi - pc_requires="$pc_requires liblzma" - static_LIBS="$static_LIBS -llzma" -fi - -if test "x$with_external_htscodecs" != "xno" -then : - libhtscodecs=ok - ac_fn_c_check_header_compile "$LINENO" "htscodecs/rANS_static4x16.h" "ac_cv_header_htscodecs_rANS_static4x16_h" "; -" -if test "x$ac_cv_header_htscodecs_rANS_static4x16_h" = xyes -then : - -else case e in #( - e) libhtscodecs='missing header' ;; -esac -fi - - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for rans_compress_bound_4x16 in -lhtscodecs" >&5 -printf %s "checking for rans_compress_bound_4x16 in -lhtscodecs... " >&6; } -if test ${ac_cv_lib_htscodecs_rans_compress_bound_4x16+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) ac_check_lib_save_LIBS=$LIBS -LIBS="-lhtscodecs $LIBS" -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. - The 'extern "C"' is for builds by C++ compilers; - although this is not generally supported in C code supporting it here - has little cost and some practical benefit (sr 110532). */ -#ifdef __cplusplus -extern "C" -#endif -char rans_compress_bound_4x16 (void); -int -main (void) -{ -return rans_compress_bound_4x16 (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" -then : - ac_cv_lib_htscodecs_rans_compress_bound_4x16=yes -else case e in #( - e) ac_cv_lib_htscodecs_rans_compress_bound_4x16=no ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_htscodecs_rans_compress_bound_4x16" >&5 -printf "%s\n" "$ac_cv_lib_htscodecs_rans_compress_bound_4x16" >&6; } -if test "x$ac_cv_lib_htscodecs_rans_compress_bound_4x16" = xyes -then : - : -else case e in #( - e) libhtscodecs='missing library' ;; -esac -fi - - if test "$libhtscodecs" = "ok" -then : - -printf "%s\n" "#define HAVE_EXTERNAL_LIBHTSCODECS 1" >>confdefs.h - - LIBS="-lhtscodecs $LIBS" - private_LIBS="-lhtscodecs $private_LIBS" - static_LIBS="-lhtscodecs $static_LIBS" - selected_htscodecs_mk="htscodecs_external.mk" -else case e in #( - e) cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "libhtscodecs development files not found: $libhtscodecs - -You asked to use an external htscodecs library, but do not have the -required header / library files. You either need to supply these and -if necessary set CPPFLAGS and LDFLAGS so the compiler can find them; -or configure using --without-external-htscodecs to build the required -functions from the htscodecs submodule. -" "$LINENO" 5 ;; -esac -fi -else case e in #( - e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether htscodecs files are present" >&5 -printf %s "checking whether htscodecs files are present... " >&6; } - if test -e "$srcdir/htscodecs/htscodecs/rANS_static4x16.h" -then : - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -printf "%s\n" "yes" >&6; } - selected_htscodecs_mk="htscodecs_bundled.mk" -else case e in #( - e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } - if test -e "$srcdir/.git" -then : - cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "htscodecs submodule files not present. - -HTSlib uses some functions from the htscodecs project, which is normally -included as a submodule. Try running: - - git submodule update --init --recursive - -in the top-level htslib directory to update it, and then re-run configure. -" "$LINENO" 5 -else case e in #( - e) cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "htscodecs submodule files not present. - -You have an incomplete distribution. Please try downloading one of the -official releases from https://www.htslib.org -" "$LINENO" 5 ;; -esac -fi ;; -esac -fi ;; -esac -fi - -if test "x$with_libdeflate" != "xno" -then : - libdeflate=ok - ac_fn_c_check_header_compile "$LINENO" "libdeflate.h" "ac_cv_header_libdeflate_h" "; -" -if test "x$ac_cv_header_libdeflate_h" = xyes -then : - -else case e in #( - e) libdeflate='missing header' ;; -esac -fi - - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for libdeflate_deflate_compress in -ldeflate" >&5 -printf %s "checking for libdeflate_deflate_compress in -ldeflate... " >&6; } -if test ${ac_cv_lib_deflate_libdeflate_deflate_compress+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) ac_check_lib_save_LIBS=$LIBS -LIBS="-ldeflate $LIBS" -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. - The 'extern "C"' is for builds by C++ compilers; - although this is not generally supported in C code supporting it here - has little cost and some practical benefit (sr 110532). */ -#ifdef __cplusplus -extern "C" -#endif -char libdeflate_deflate_compress (void); -int -main (void) -{ -return libdeflate_deflate_compress (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" -then : - ac_cv_lib_deflate_libdeflate_deflate_compress=yes -else case e in #( - e) ac_cv_lib_deflate_libdeflate_deflate_compress=no ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_deflate_libdeflate_deflate_compress" >&5 -printf "%s\n" "$ac_cv_lib_deflate_libdeflate_deflate_compress" >&6; } -if test "x$ac_cv_lib_deflate_libdeflate_deflate_compress" = xyes -then : - : -else case e in #( - e) libdeflate='missing library' ;; -esac -fi - - if test "$libdeflate" = "ok" -then : - -printf "%s\n" "#define HAVE_LIBDEFLATE 1" >>confdefs.h - - LIBS="-ldeflate $LIBS" - private_LIBS="$private_LIBS -ldeflate" - static_LIBS="$static_LIBS -ldeflate" -else case e in #( - e) if test "x$with_libdeflate" != "xcheck" -then : - cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "libdeflate development files not found: $libdeflate - -You requested libdeflate, but do not have the required header / library -files. The source for libdeflate is available from -. You may have to adjust -search paths in CPPFLAGS and/or LDFLAGS if the header and library -are not currently on them. - -Either configure with --without-libdeflate or resolve this error to build -HTSlib." "$LINENO" 5 -fi ;; -esac -fi -fi - -libcurl=disabled -if test "$enable_libcurl" != no; then - libcurl_devel=ok - ac_fn_c_check_header_compile "$LINENO" "curl/curl.h" "ac_cv_header_curl_curl_h" "; -" -if test "x$ac_cv_header_curl_curl_h" = xyes -then : - -else case e in #( - e) libcurl_devel="headers not found" ;; -esac -fi - - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for curl_easy_pause in -lcurl" >&5 -printf %s "checking for curl_easy_pause in -lcurl... " >&6; } -if test ${ac_cv_lib_curl_curl_easy_pause+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) ac_check_lib_save_LIBS=$LIBS -LIBS="-lcurl $LIBS" -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. - The 'extern "C"' is for builds by C++ compilers; - although this is not generally supported in C code supporting it here - has little cost and some practical benefit (sr 110532). */ -#ifdef __cplusplus -extern "C" -#endif -char curl_easy_pause (void); -int -main (void) -{ -return curl_easy_pause (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" -then : - ac_cv_lib_curl_curl_easy_pause=yes -else case e in #( - e) ac_cv_lib_curl_curl_easy_pause=no ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_curl_curl_easy_pause" >&5 -printf "%s\n" "$ac_cv_lib_curl_curl_easy_pause" >&6; } -if test "x$ac_cv_lib_curl_curl_easy_pause" = xyes -then : - : -else case e in #( - e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for curl_easy_init in -lcurl" >&5 -printf %s "checking for curl_easy_init in -lcurl... " >&6; } -if test ${ac_cv_lib_curl_curl_easy_init+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) ac_check_lib_save_LIBS=$LIBS -LIBS="-lcurl $LIBS" -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. - The 'extern "C"' is for builds by C++ compilers; - although this is not generally supported in C code supporting it here - has little cost and some practical benefit (sr 110532). */ -#ifdef __cplusplus -extern "C" -#endif -char curl_easy_init (void); -int -main (void) -{ -return curl_easy_init (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" -then : - ac_cv_lib_curl_curl_easy_init=yes -else case e in #( - e) ac_cv_lib_curl_curl_easy_init=no ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_curl_curl_easy_init" >&5 -printf "%s\n" "$ac_cv_lib_curl_curl_easy_init" >&6; } -if test "x$ac_cv_lib_curl_curl_easy_init" = xyes -then : - libcurl_devel="library is too old (7.18+ required)" -else case e in #( - e) libcurl_devel="library not found" ;; -esac -fi - ;; -esac -fi - - - if test "$libcurl_devel" = ok; then - -printf "%s\n" "#define HAVE_LIBCURL 1" >>confdefs.h - - libcurl=enabled - elif test "$enable_libcurl" = check; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: libcurl not enabled: $libcurl_devel" >&5 -printf "%s\n" "$as_me: WARNING: libcurl not enabled: $libcurl_devel" >&2;} - else - cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "libcurl $libcurl_devel - -Support for HTTPS and other SSL-based URLs requires routines from the libcurl -library . Building HTSlib with libcurl enabled -requires libcurl development files to be installed on the build machine; you -may need to ensure a package such as libcurl4-{gnutls,nss,openssl}-dev (on -Debian or Ubuntu Linux) or libcurl-devel (on RPM-based Linux distributions -or Cygwin) is installed. - -Either configure with --disable-libcurl or resolve this error to build HTSlib." "$LINENO" 5 - fi - - if test "$libcurl" = enabled ; then - if test "$enable_plugins" != yes ; then - static_LIBS="$static_LIBS -lcurl" - fi - fi -fi - - -gcs=disabled -if test "$enable_gcs" != no; then - if test $libcurl = enabled; then - -printf "%s\n" "#define ENABLE_GCS 1" >>confdefs.h - - gcs=enabled - else - case "$enable_gcs" in - check) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: GCS support not enabled: requires libcurl support" >&5 -printf "%s\n" "$as_me: WARNING: GCS support not enabled: requires libcurl support" >&2;} ;; - *) cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "GCS support not enabled - -Support for Google Cloud Storage URLs requires libcurl support to be enabled -in HTSlib. Configure with --enable-libcurl in order to use GCS URLs." "$LINENO" 5 - ;; - esac - fi -fi - - -s3=disabled -if test "$enable_s3" != no; then - if test $libcurl = enabled; then - s3=enabled - need_crypto="$enable_s3" - else - case "$enable_s3" in - check) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: S3 support not enabled: requires libcurl support" >&5 -printf "%s\n" "$as_me: WARNING: S3 support not enabled: requires libcurl support" >&2;} ;; - *) cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "S3 support not enabled - -Support for Amazon AWS S3 URLs requires libcurl support to be enabled -in HTSlib. Configure with --enable-libcurl in order to use S3 URLs." "$LINENO" 5 - ;; - esac - fi -fi - -CRYPTO_LIBS= -if test $need_crypto != no; then - ac_fn_c_check_func "$LINENO" "CCHmac" "ac_cv_func_CCHmac" -if test "x$ac_cv_func_CCHmac" = xyes -then : - -printf "%s\n" "#define HAVE_COMMONCRYPTO 1" >>confdefs.h - -else case e in #( - e) save_LIBS=$LIBS - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing HMAC" >&5 -printf %s "checking for library containing HMAC... " >&6; } -if test ${ac_cv_search_HMAC+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) ac_func_search_save_LIBS=$LIBS -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. - The 'extern "C"' is for builds by C++ compilers; - although this is not generally supported in C code supporting it here - has little cost and some practical benefit (sr 110532). */ -#ifdef __cplusplus -extern "C" -#endif -char HMAC (void); -int -main (void) -{ -return HMAC (); - ; - return 0; -} -_ACEOF -for ac_lib in '' crypto -do - if test -z "$ac_lib"; then - ac_res="none required" - else - ac_res=-l$ac_lib - LIBS="-l$ac_lib $ac_func_search_save_LIBS" - fi - if ac_fn_c_try_link "$LINENO" -then : - ac_cv_search_HMAC=$ac_res -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext - if test ${ac_cv_search_HMAC+y} -then : - break -fi -done -if test ${ac_cv_search_HMAC+y} -then : - -else case e in #( - e) ac_cv_search_HMAC=no ;; -esac -fi -rm conftest.$ac_ext -LIBS=$ac_func_search_save_LIBS ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_HMAC" >&5 -printf "%s\n" "$ac_cv_search_HMAC" >&6; } -ac_res=$ac_cv_search_HMAC -if test "$ac_res" != no -then : - test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" - -printf "%s\n" "#define HAVE_HMAC 1" >>confdefs.h - - case "$ac_cv_search_HMAC" in - -l*) CRYPTO_LIBS=$ac_cv_search_HMAC ;; - esac -else case e in #( - e) case "$need_crypto" in - check) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: S3 support not enabled: requires SSL development files" >&5 -printf "%s\n" "$as_me: WARNING: S3 support not enabled: requires SSL development files" >&2;} - s3=disabled ;; - *) cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - as_fn_error $? "SSL development files not found - -Support for AWS S3 URLs requires routines from an SSL library. Building -HTSlib with libcurl enabled requires SSL development files to be installed -on the build machine; you may need to ensure a package such as libgnutls-dev, -libnss3-dev, or libssl-dev (on Debian or Ubuntu Linux, corresponding to the -libcurl4-*-dev package installed), or openssl-devel (on RPM-based Linux -distributions or Cygwin) is installed. - -Either configure with --disable-s3 or resolve this error to build HTSlib." "$LINENO" 5 ;; - esac ;; -esac -fi - - LIBS=$save_LIBS ;; -esac -fi - - if test "$enable_plugins" != yes ; then - static_LIBS="$static_LIBS $CRYPTO_LIBS" - fi -fi - -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing regcomp" >&5 -printf %s "checking for library containing regcomp... " >&6; } -if test ${ac_cv_search_regcomp+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) ac_func_search_save_LIBS=$LIBS -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. - The 'extern "C"' is for builds by C++ compilers; - although this is not generally supported in C code supporting it here - has little cost and some practical benefit (sr 110532). */ -#ifdef __cplusplus -extern "C" -#endif -char regcomp (void); -int -main (void) -{ -return regcomp (); - ; - return 0; -} -_ACEOF -for ac_lib in '' regex -do - if test -z "$ac_lib"; then - ac_res="none required" - else - ac_res=-l$ac_lib - LIBS="-l$ac_lib $ac_func_search_save_LIBS" - fi - if ac_fn_c_try_link "$LINENO" -then : - ac_cv_search_regcomp=$ac_res -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext - if test ${ac_cv_search_regcomp+y} -then : - break -fi -done -if test ${ac_cv_search_regcomp+y} -then : - -else case e in #( - e) ac_cv_search_regcomp=no ;; -esac -fi -rm conftest.$ac_ext -LIBS=$ac_func_search_save_LIBS ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_regcomp" >&5 -printf "%s\n" "$ac_cv_search_regcomp" >&6; } -ac_res=$ac_cv_search_regcomp -if test "$ac_res" != no -then : - test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" - libregex=needed -fi - - - -if test "$s3" = enabled ; then - -printf "%s\n" "#define ENABLE_S3 1" >>confdefs.h - -fi - -if test "x$hts_late_cflags" != x -then : - CFLAGS="$CFLAGS $hts_late_cflags" -fi - - - - - - - - - -ac_config_files="$ac_config_files config.mk htslib.pc.tmp:htslib.pc.in" - -ac_config_links="$ac_config_links htscodecs.mk:$selected_htscodecs_mk" - - -if test "$srcdir" != .; then - # Set up for a separate build directory. As HTSlib uses a non-recursive - # makefile, we need to create additional build subdirectories explicitly. - ac_config_links="$ac_config_links Makefile:Makefile htslib.mk:htslib.mk" - - ac_config_files="$ac_config_files htslib_vars.mk:builddir_vars.mk.in" - - ac_config_commands="$ac_config_commands mkdir" - -fi - -# @HTSDIRslash_if_relsrcdir@ will be empty when $srcdir is absolute -case "$srcdir" in - /*) HTSDIRslash_if_relsrcdir= ;; - *) HTSDIRslash_if_relsrcdir='$(HTSDIR)/' ;; -esac - - -cat >confcache <<\_ACEOF -# This file is a shell script that caches the results of configure -# tests run on this system so they can be shared between configure -# scripts and configure runs, see configure's option --config-cache. -# It is not useful on other systems. If it contains results you don't -# want to keep, you may remove or edit it. -# -# config.status only pays attention to the cache file if you give it -# the --recheck option to rerun configure. -# -# 'ac_cv_env_foo' variables (set or unset) will be overridden when -# loading this file, other *unset* 'ac_cv_foo' will be assigned the -# following values. - -_ACEOF - -# The following way of writing the cache mishandles newlines in values, -# but we know of no workaround that is simple, portable, and efficient. -# So, we kill variables containing newlines. -# Ultrix sh set writes to stderr and can't be redirected directly, -# and sets the high bit in the cache file unless we assign to the vars. -( - for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do - eval ac_val=\$$ac_var - case $ac_val in #( - *${as_nl}*) - case $ac_var in #( - *_cv_*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 -printf "%s\n" "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; - esac - case $ac_var in #( - _ | IFS | as_nl) ;; #( - BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( - *) { eval $ac_var=; unset $ac_var;} ;; - esac ;; - esac - done - - (set) 2>&1 | - case $as_nl`(ac_space=' '; set) 2>&1` in #( - *${as_nl}ac_space=\ *) - # 'set' does not quote correctly, so add quotes: double-quote - # substitution turns \\\\ into \\, and sed turns \\ into \. - sed -n \ - "s/'/'\\\\''/g; - s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" - ;; #( - *) - # 'set' quotes correctly as required by POSIX, so do not add quotes. - sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" - ;; - esac | - sort -) | - sed ' - /^ac_cv_env_/b end - t clear - :clear - s/^\([^=]*\)=\(.*[{}].*\)$/test ${\1+y} || &/ - t end - s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ - :end' >>confcache -if diff "$cache_file" confcache >/dev/null 2>&1; then :; else - if test -w "$cache_file"; then - if test "x$cache_file" != "x/dev/null"; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 -printf "%s\n" "$as_me: updating cache $cache_file" >&6;} - if test ! -f "$cache_file" || test -h "$cache_file"; then - cat confcache >"$cache_file" - else - case $cache_file in #( - */* | ?:*) - mv -f confcache "$cache_file"$$ && - mv -f "$cache_file"$$ "$cache_file" ;; #( - *) - mv -f confcache "$cache_file" ;; - esac - fi - fi - else - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 -printf "%s\n" "$as_me: not updating unwritable cache $cache_file" >&6;} - fi -fi -rm -f confcache - -test "x$prefix" = xNONE && prefix=$ac_default_prefix -# Let make expand exec_prefix. -test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' - -DEFS=-DHAVE_CONFIG_H - -ac_libobjs= -ac_ltlibobjs= -U= -for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue - # 1. Remove the extension, and $U if already installed. - ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' - ac_i=`printf "%s\n" "$ac_i" | sed "$ac_script"` - # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR - # will be set to the directory where LIBOBJS objects are built. - as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" - as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' -done -LIBOBJS=$ac_libobjs - -LTLIBOBJS=$ac_ltlibobjs - - -# Check whether --enable-year2038 was given. -if test ${enable_year2038+y} -then : - enableval=$enable_year2038; -fi - - -: "${CONFIG_STATUS=./config.status}" -ac_write_fail=0 -ac_clean_files_save=$ac_clean_files -ac_clean_files="$ac_clean_files $CONFIG_STATUS" -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 -printf "%s\n" "$as_me: creating $CONFIG_STATUS" >&6;} -as_write_fail=0 -cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 -#! $SHELL -# Generated by $as_me. -# Run this file to recreate the current configuration. -# Compiler output produced by configure, useful for debugging -# configure, is in config.log if it exists. - -debug=false -ac_cs_recheck=false -ac_cs_silent=false - -SHELL=\${CONFIG_SHELL-$SHELL} -export SHELL -_ASEOF -cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 -## -------------------- ## -## M4sh Initialization. ## -## -------------------- ## - -# Be more Bourne compatible -DUALCASE=1; export DUALCASE # for MKS sh -if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 -then : - emulate sh - NULLCMD=: - # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which - # is contrary to our usage. Disable this feature. - alias -g '${1+"$@"}'='"$@"' - setopt NO_GLOB_SUBST -else case e in #( - e) case `(set -o) 2>/dev/null` in #( - *posix*) : - set -o posix ;; #( - *) : - ;; -esac ;; -esac -fi - - - -# Reset variables that may have inherited troublesome values from -# the environment. - -# IFS needs to be set, to space, tab, and newline, in precisely that order. -# (If _AS_PATH_WALK were called with IFS unset, it would have the -# side effect of setting IFS to empty, thus disabling word splitting.) -# Quoting is to prevent editors from complaining about space-tab. -as_nl=' -' -export as_nl -IFS=" "" $as_nl" - -PS1='$ ' -PS2='> ' -PS4='+ ' - -# Ensure predictable behavior from utilities with locale-dependent output. -LC_ALL=C -export LC_ALL -LANGUAGE=C -export LANGUAGE - -# We cannot yet rely on "unset" to work, but we need these variables -# to be unset--not just set to an empty or harmless value--now, to -# avoid bugs in old shells (e.g. pre-3.0 UWIN ksh). This construct -# also avoids known problems related to "unset" and subshell syntax -# in other old shells (e.g. bash 2.01 and pdksh 5.2.14). -for as_var in BASH_ENV ENV MAIL MAILPATH CDPATH -do eval test \${$as_var+y} \ - && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : -done - -# Ensure that fds 0, 1, and 2 are open. -if (exec 3>&0) 2>/dev/null; then :; else exec 0&1) 2>/dev/null; then :; else exec 1>/dev/null; fi -if (exec 3>&2) ; then :; else exec 2>/dev/null; fi - -# The user is always right. -if ${PATH_SEPARATOR+false} :; then - PATH_SEPARATOR=: - (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { - (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || - PATH_SEPARATOR=';' - } -fi - - -# Find who we are. Look in the path if we contain no directory separator. -as_myself= -case $0 in #(( - *[\\/]* ) as_myself=$0 ;; - *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - case $as_dir in #((( - '') as_dir=./ ;; - */) ;; - *) as_dir=$as_dir/ ;; - esac - test -r "$as_dir$0" && as_myself=$as_dir$0 && break - done -IFS=$as_save_IFS - - ;; -esac -# We did not find ourselves, most probably we were run as 'sh COMMAND' -# in which case we are not to be found in the path. -if test "x$as_myself" = x; then - as_myself=$0 -fi -if test ! -f "$as_myself"; then - printf "%s\n" "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 - exit 1 -fi - - - -# as_fn_error STATUS ERROR [LINENO LOG_FD] -# ---------------------------------------- -# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are -# provided, also output the error to LOG_FD, referencing LINENO. Then exit the -# script with STATUS, using 1 if that was 0. -as_fn_error () -{ - as_status=$1; test $as_status -eq 0 && as_status=1 - if test "$4"; then - as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 - fi - printf "%s\n" "$as_me: error: $2" >&2 - as_fn_exit $as_status -} # as_fn_error - - -# as_fn_set_status STATUS -# ----------------------- -# Set $? to STATUS, without forking. -as_fn_set_status () -{ - return $1 -} # as_fn_set_status - -# as_fn_exit STATUS -# ----------------- -# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. -as_fn_exit () -{ - set +e - as_fn_set_status $1 - exit $1 -} # as_fn_exit - -# as_fn_unset VAR -# --------------- -# Portably unset VAR. -as_fn_unset () -{ - { eval $1=; unset $1;} -} -as_unset=as_fn_unset - -# as_fn_append VAR VALUE -# ---------------------- -# Append the text in VALUE to the end of the definition contained in VAR. Take -# advantage of any shell optimizations that allow amortized linear growth over -# repeated appends, instead of the typical quadratic growth present in naive -# implementations. -if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null -then : - eval 'as_fn_append () - { - eval $1+=\$2 - }' -else case e in #( - e) as_fn_append () - { - eval $1=\$$1\$2 - } ;; -esac -fi # as_fn_append - -# as_fn_arith ARG... -# ------------------ -# Perform arithmetic evaluation on the ARGs, and store the result in the -# global $as_val. Take advantage of shells that can avoid forks. The arguments -# must be portable across $(()) and expr. -if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null -then : - eval 'as_fn_arith () - { - as_val=$(( $* )) - }' -else case e in #( - e) as_fn_arith () - { - as_val=`expr "$@" || test $? -eq 1` - } ;; -esac -fi # as_fn_arith - - -if expr a : '\(a\)' >/dev/null 2>&1 && - test "X`expr 00001 : '.*\(...\)'`" = X001; then - as_expr=expr -else - as_expr=false -fi - -if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then - as_basename=basename -else - as_basename=false -fi - -if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then - as_dirname=dirname -else - as_dirname=false -fi - -as_me=`$as_basename -- "$0" || -$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ - X"$0" : 'X\(//\)$' \| \ - X"$0" : 'X\(/\)' \| . 2>/dev/null || -printf "%s\n" X/"$0" | - sed '/^.*\/\([^/][^/]*\)\/*$/{ - s//\1/ - q - } - /^X\/\(\/\/\)$/{ - s//\1/ - q - } - /^X\/\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - -# Avoid depending upon Character Ranges. -as_cr_letters='abcdefghijklmnopqrstuvwxyz' -as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' -as_cr_Letters=$as_cr_letters$as_cr_LETTERS -as_cr_digits='0123456789' -as_cr_alnum=$as_cr_Letters$as_cr_digits - - -# Determine whether it's possible to make 'echo' print without a newline. -# These variables are no longer used directly by Autoconf, but are AC_SUBSTed -# for compatibility with existing Makefiles. -ECHO_C= ECHO_N= ECHO_T= -case `echo -n x` in #((((( --n*) - case `echo 'xy\c'` in - *c*) ECHO_T=' ';; # ECHO_T is single tab character. - xy) ECHO_C='\c';; - *) echo `echo ksh88 bug on AIX 6.1` > /dev/null - ECHO_T=' ';; - esac;; -*) - ECHO_N='-n';; -esac - -# For backward compatibility with old third-party macros, we provide -# the shell variables $as_echo and $as_echo_n. New code should use -# AS_ECHO(["message"]) and AS_ECHO_N(["message"]), respectively. -as_echo='printf %s\n' -as_echo_n='printf %s' - -rm -f conf$$ conf$$.exe conf$$.file -if test -d conf$$.dir; then - rm -f conf$$.dir/conf$$.file -else - rm -f conf$$.dir - mkdir conf$$.dir 2>/dev/null -fi -if (echo >conf$$.file) 2>/dev/null; then - if ln -s conf$$.file conf$$ 2>/dev/null; then - as_ln_s='ln -s' - # ... but there are two gotchas: - # 1) On MSYS, both 'ln -s file dir' and 'ln file dir' fail. - # 2) DJGPP < 2.04 has no symlinks; 'ln -s' creates a wrapper executable. - # In both cases, we have to default to 'cp -pR'. - ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || - as_ln_s='cp -pR' - elif ln conf$$.file conf$$ 2>/dev/null; then - as_ln_s=ln - else - as_ln_s='cp -pR' - fi -else - as_ln_s='cp -pR' -fi -rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file -rmdir conf$$.dir 2>/dev/null - - -# as_fn_mkdir_p -# ------------- -# Create "$as_dir" as a directory, including parents if necessary. -as_fn_mkdir_p () -{ - - case $as_dir in #( - -*) as_dir=./$as_dir;; - esac - test -d "$as_dir" || eval $as_mkdir_p || { - as_dirs= - while :; do - case $as_dir in #( - *\'*) as_qdir=`printf "%s\n" "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( - *) as_qdir=$as_dir;; - esac - as_dirs="'$as_qdir' $as_dirs" - as_dir=`$as_dirname -- "$as_dir" || -$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ - X"$as_dir" : 'X\(//\)[^/]' \| \ - X"$as_dir" : 'X\(//\)$' \| \ - X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || -printf "%s\n" X"$as_dir" | - sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ - s//\1/ - q - } - /^X\(\/\/\)[^/].*/{ - s//\1/ - q - } - /^X\(\/\/\)$/{ - s//\1/ - q - } - /^X\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - test -d "$as_dir" && break - done - test -z "$as_dirs" || eval "mkdir $as_dirs" - } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" - - -} # as_fn_mkdir_p -if mkdir -p . 2>/dev/null; then - as_mkdir_p='mkdir -p "$as_dir"' -else - test -d ./-p && rmdir ./-p - as_mkdir_p=false -fi - - -# as_fn_executable_p FILE -# ----------------------- -# Test if FILE is an executable regular file. -as_fn_executable_p () -{ - test -f "$1" && test -x "$1" -} # as_fn_executable_p -as_test_x='test -x' -as_executable_p=as_fn_executable_p - -# Sed expression to map a string onto a valid CPP name. -as_sed_cpp="y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g" -as_tr_cpp="eval sed '$as_sed_cpp'" # deprecated - -# Sed expression to map a string onto a valid variable name. -as_sed_sh="y%*+%pp%;s%[^_$as_cr_alnum]%_%g" -as_tr_sh="eval sed '$as_sed_sh'" # deprecated - - -exec 6>&1 -## ----------------------------------- ## -## Main body of $CONFIG_STATUS script. ## -## ----------------------------------- ## -_ASEOF -test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 - -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 -# Save the log message, to keep $0 and so on meaningful, and to -# report actual input values of CONFIG_FILES etc. instead of their -# values after options handling. -ac_log=" -This file was extended by HTSlib $as_me 1.19.1, which was -generated by GNU Autoconf 2.72. Invocation command line was - - CONFIG_FILES = $CONFIG_FILES - CONFIG_HEADERS = $CONFIG_HEADERS - CONFIG_LINKS = $CONFIG_LINKS - CONFIG_COMMANDS = $CONFIG_COMMANDS - $ $0 $@ - -on `(hostname || uname -n) 2>/dev/null | sed 1q` -" - -_ACEOF - -case $ac_config_files in *" -"*) set x $ac_config_files; shift; ac_config_files=$*;; -esac - -case $ac_config_headers in *" -"*) set x $ac_config_headers; shift; ac_config_headers=$*;; -esac - - -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 -# Files that config.status was made for. -config_files="$ac_config_files" -config_headers="$ac_config_headers" -config_links="$ac_config_links" -config_commands="$ac_config_commands" - -_ACEOF - -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 -ac_cs_usage="\ -'$as_me' instantiates files and other configuration actions -from templates according to the current configuration. Unless the files -and actions are specified as TAGs, all are instantiated by default. - -Usage: $0 [OPTION]... [TAG]... - - -h, --help print this help, then exit - -V, --version print version number and configuration settings, then exit - --config print configuration, then exit - -q, --quiet, --silent - do not print progress messages - -d, --debug don't remove temporary files - --recheck update $as_me by reconfiguring in the same conditions - --file=FILE[:TEMPLATE] - instantiate the configuration file FILE - --header=FILE[:TEMPLATE] - instantiate the configuration header FILE - -Configuration files: -$config_files - -Configuration headers: -$config_headers - -Configuration links: -$config_links - -Configuration commands: -$config_commands - -Report bugs to . -HTSlib home page: ." - -_ACEOF -ac_cs_config=`printf "%s\n" "$ac_configure_args" | sed "$ac_safe_unquote"` -ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\''/g"` -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 -ac_cs_config='$ac_cs_config_escaped' -ac_cs_version="\\ -HTSlib config.status 1.19.1 -configured by $0, generated by GNU Autoconf 2.72, - with options \\"\$ac_cs_config\\" - -Copyright (C) 2023 Free Software Foundation, Inc. -This config.status script is free software; the Free Software Foundation -gives unlimited permission to copy, distribute and modify it." - -ac_pwd='$ac_pwd' -srcdir='$srcdir' -test -n "\$AWK" || AWK=awk -_ACEOF - -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 -# The default lists apply if the user does not specify any file. -ac_need_defaults=: -while test $# != 0 -do - case $1 in - --*=?*) - ac_option=`expr "X$1" : 'X\([^=]*\)='` - ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` - ac_shift=: - ;; - --*=) - ac_option=`expr "X$1" : 'X\([^=]*\)='` - ac_optarg= - ac_shift=: - ;; - *) - ac_option=$1 - ac_optarg=$2 - ac_shift=shift - ;; - esac - - case $ac_option in - # Handling of the options. - -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) - ac_cs_recheck=: ;; - --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) - printf "%s\n" "$ac_cs_version"; exit ;; - --config | --confi | --conf | --con | --co | --c ) - printf "%s\n" "$ac_cs_config"; exit ;; - --debug | --debu | --deb | --de | --d | -d ) - debug=: ;; - --file | --fil | --fi | --f ) - $ac_shift - case $ac_optarg in - *\'*) ac_optarg=`printf "%s\n" "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; - '') as_fn_error $? "missing file argument" ;; - esac - as_fn_append CONFIG_FILES " '$ac_optarg'" - ac_need_defaults=false;; - --header | --heade | --head | --hea ) - $ac_shift - case $ac_optarg in - *\'*) ac_optarg=`printf "%s\n" "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; - esac - as_fn_append CONFIG_HEADERS " '$ac_optarg'" - ac_need_defaults=false;; - --he | --h) - # Conflict between --help and --header - as_fn_error $? "ambiguous option: '$1' -Try '$0 --help' for more information.";; - --help | --hel | -h ) - printf "%s\n" "$ac_cs_usage"; exit ;; - -q | -quiet | --quiet | --quie | --qui | --qu | --q \ - | -silent | --silent | --silen | --sile | --sil | --si | --s) - ac_cs_silent=: ;; - - # This is an error. - -*) as_fn_error $? "unrecognized option: '$1' -Try '$0 --help' for more information." ;; - - *) as_fn_append ac_config_targets " $1" - ac_need_defaults=false ;; - - esac - shift -done - -ac_configure_extra_args= - -if $ac_cs_silent; then - exec 6>/dev/null - ac_configure_extra_args="$ac_configure_extra_args --silent" -fi - -_ACEOF -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 -if \$ac_cs_recheck; then - set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion - shift - \printf "%s\n" "running CONFIG_SHELL=$SHELL \$*" >&6 - CONFIG_SHELL='$SHELL' - export CONFIG_SHELL - exec "\$@" -fi - -_ACEOF -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 -exec 5>>config.log -{ - echo - sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX -## Running $as_me. ## -_ASBOX - printf "%s\n" "$ac_log" -} >&5 - -_ACEOF -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 -_ACEOF - -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 - -# Handling of arguments. -for ac_config_target in $ac_config_targets -do - case $ac_config_target in - "config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;; - "config.mk") CONFIG_FILES="$CONFIG_FILES config.mk" ;; - "htslib.pc.tmp") CONFIG_FILES="$CONFIG_FILES htslib.pc.tmp:htslib.pc.in" ;; - "htscodecs.mk") CONFIG_LINKS="$CONFIG_LINKS htscodecs.mk:$selected_htscodecs_mk" ;; - "Makefile") CONFIG_LINKS="$CONFIG_LINKS Makefile:Makefile" ;; - "htslib.mk") CONFIG_LINKS="$CONFIG_LINKS htslib.mk:htslib.mk" ;; - "htslib_vars.mk") CONFIG_FILES="$CONFIG_FILES htslib_vars.mk:builddir_vars.mk.in" ;; - "mkdir") CONFIG_COMMANDS="$CONFIG_COMMANDS mkdir" ;; - - *) as_fn_error $? "invalid argument: '$ac_config_target'" "$LINENO" 5;; - esac -done - - -# If the user did not use the arguments to specify the items to instantiate, -# then the envvar interface is used. Set only those that are not. -# We use the long form for the default assignment because of an extremely -# bizarre bug on SunOS 4.1.3. -if $ac_need_defaults; then - test ${CONFIG_FILES+y} || CONFIG_FILES=$config_files - test ${CONFIG_HEADERS+y} || CONFIG_HEADERS=$config_headers - test ${CONFIG_LINKS+y} || CONFIG_LINKS=$config_links - test ${CONFIG_COMMANDS+y} || CONFIG_COMMANDS=$config_commands -fi - -# Have a temporary directory for convenience. Make it in the build tree -# simply because there is no reason against having it here, and in addition, -# creating and moving files from /tmp can sometimes cause problems. -# Hook for its removal unless debugging. -# Note that there is a small window in which the directory will not be cleaned: -# after its creation but before its name has been assigned to '$tmp'. -$debug || -{ - tmp= ac_tmp= - trap 'exit_status=$? - : "${ac_tmp:=$tmp}" - { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status -' 0 - trap 'as_fn_exit 1' 1 2 13 15 -} -# Create a (secure) tmp directory for tmp files. - -{ - tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && - test -d "$tmp" -} || -{ - tmp=./conf$$-$RANDOM - (umask 077 && mkdir "$tmp") -} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 -ac_tmp=$tmp - -# Set up the scripts for CONFIG_FILES section. -# No need to generate them if there are no CONFIG_FILES. -# This happens for instance with './config.status config.h'. -if test -n "$CONFIG_FILES"; then - - -ac_cr=`echo X | tr X '\015'` -# On cygwin, bash can eat \r inside `` if the user requested igncr. -# But we know of no other shell where ac_cr would be empty at this -# point, so we can use a bashism as a fallback. -if test "x$ac_cr" = x; then - eval ac_cr=\$\'\\r\' -fi -ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` -if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then - ac_cs_awk_cr='\\r' -else - ac_cs_awk_cr=$ac_cr -fi - -echo 'BEGIN {' >"$ac_tmp/subs1.awk" && -_ACEOF - - -{ - echo "cat >conf$$subs.awk <<_ACEOF" && - echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && - echo "_ACEOF" -} >conf$$subs.sh || - as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 -ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` -ac_delim='%!_!# ' -for ac_last_try in false false false false false :; do - . ./conf$$subs.sh || - as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 - - ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` - if test $ac_delim_n = $ac_delim_num; then - break - elif $ac_last_try; then - as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 - else - ac_delim="$ac_delim!$ac_delim _$ac_delim!! " - fi -done -rm -f conf$$subs.sh - -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 -cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && -_ACEOF -sed -n ' -h -s/^/S["/; s/!.*/"]=/ -p -g -s/^[^!]*!// -:repl -t repl -s/'"$ac_delim"'$// -t delim -:nl -h -s/\(.\{148\}\)..*/\1/ -t more1 -s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ -p -n -b repl -:more1 -s/["\\]/\\&/g; s/^/"/; s/$/"\\/ -p -g -s/.\{148\}// -t nl -:delim -h -s/\(.\{148\}\)..*/\1/ -t more2 -s/["\\]/\\&/g; s/^/"/; s/$/"/ -p -b -:more2 -s/["\\]/\\&/g; s/^/"/; s/$/"\\/ -p -g -s/.\{148\}// -t delim -' >$CONFIG_STATUS || ac_write_fail=1 -rm -f conf$$subs.awk -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 -_ACAWK -cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && - for (key in S) S_is_set[key] = 1 - FS = "" - -} -{ - line = $ 0 - nfields = split(line, field, "@") - substed = 0 - len = length(field[1]) - for (i = 2; i < nfields; i++) { - key = field[i] - keylen = length(key) - if (S_is_set[key]) { - value = S[key] - line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) - len += length(value) + length(field[++i]) - substed = 1 - } else - len += 1 + keylen - } - - print line -} - -_ACAWK -_ACEOF -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 -if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then - sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" -else - cat -fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ - || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 -_ACEOF - -# VPATH may cause trouble with some makes, so we remove sole $(srcdir), -# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and -# trailing colons and then remove the whole line if VPATH becomes empty -# (actually we leave an empty line to preserve line numbers). -if test "x$srcdir" = x.; then - ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ -h -s/// -s/^/:/ -s/[ ]*$/:/ -s/:\$(srcdir):/:/g -s/:\${srcdir}:/:/g -s/:@srcdir@:/:/g -s/^:*// -s/:*$// -x -s/\(=[ ]*\).*/\1/ -G -s/\n// -s/^[^=]*=[ ]*$// -}' -fi - -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 -fi # test -n "$CONFIG_FILES" - -# Set up the scripts for CONFIG_HEADERS section. -# No need to generate them if there are no CONFIG_HEADERS. -# This happens for instance with './config.status Makefile'. -if test -n "$CONFIG_HEADERS"; then -cat >"$ac_tmp/defines.awk" <<\_ACAWK || -BEGIN { -_ACEOF - -# Transform confdefs.h into an awk script 'defines.awk', embedded as -# here-document in config.status, that substitutes the proper values into -# config.h.in to produce config.h. - -# Create a delimiter string that does not exist in confdefs.h, to ease -# handling of long lines. -ac_delim='%!_!# ' -for ac_last_try in false false :; do - ac_tt=`sed -n "/$ac_delim/p" confdefs.h` - if test -z "$ac_tt"; then - break - elif $ac_last_try; then - as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5 - else - ac_delim="$ac_delim!$ac_delim _$ac_delim!! " - fi -done - -# For the awk script, D is an array of macro values keyed by name, -# likewise P contains macro parameters if any. Preserve backslash -# newline sequences. - -ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* -sed -n ' -s/.\{148\}/&'"$ac_delim"'/g -t rset -:rset -s/^[ ]*#[ ]*define[ ][ ]*/ / -t def -d -:def -s/\\$// -t bsnl -s/["\\]/\\&/g -s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ -D["\1"]=" \3"/p -s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2"/p -d -:bsnl -s/["\\]/\\&/g -s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ -D["\1"]=" \3\\\\\\n"\\/p -t cont -s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p -t cont -d -:cont -n -s/.\{148\}/&'"$ac_delim"'/g -t clear -:clear -s/\\$// -t bsnlc -s/["\\]/\\&/g; s/^/"/; s/$/"/p -d -:bsnlc -s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p -b cont -' >$CONFIG_STATUS || ac_write_fail=1 - -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 - for (key in D) D_is_set[key] = 1 - FS = "" -} -/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ { - line = \$ 0 - split(line, arg, " ") - if (arg[1] == "#") { - defundef = arg[2] - mac1 = arg[3] - } else { - defundef = substr(arg[1], 2) - mac1 = arg[2] - } - split(mac1, mac2, "(") #) - macro = mac2[1] - prefix = substr(line, 1, index(line, defundef) - 1) - if (D_is_set[macro]) { - # Preserve the white space surrounding the "#". - print prefix "define", macro P[macro] D[macro] - next - } else { - # Replace #undef with comments. This is necessary, for example, - # in the case of _POSIX_SOURCE, which is predefined and required - # on some systems where configure will not decide to define it. - if (defundef == "undef") { - print "/*", prefix defundef, macro, "*/" - next - } - } -} -{ print } -_ACAWK -_ACEOF -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 - as_fn_error $? "could not setup config headers machinery" "$LINENO" 5 -fi # test -n "$CONFIG_HEADERS" - - -eval set X " :F $CONFIG_FILES :H $CONFIG_HEADERS :L $CONFIG_LINKS :C $CONFIG_COMMANDS" -shift -for ac_tag -do - case $ac_tag in - :[FHLC]) ac_mode=$ac_tag; continue;; - esac - case $ac_mode$ac_tag in - :[FHL]*:*);; - :L* | :C*:*) as_fn_error $? "invalid tag '$ac_tag'" "$LINENO" 5;; - :[FH]-) ac_tag=-:-;; - :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; - esac - ac_save_IFS=$IFS - IFS=: - set x $ac_tag - IFS=$ac_save_IFS - shift - ac_file=$1 - shift - - case $ac_mode in - :L) ac_source=$1;; - :[FH]) - ac_file_inputs= - for ac_f - do - case $ac_f in - -) ac_f="$ac_tmp/stdin";; - *) # Look for the file first in the build tree, then in the source tree - # (if the path is not absolute). The absolute path cannot be DOS-style, - # because $ac_f cannot contain ':'. - test -f "$ac_f" || - case $ac_f in - [\\/$]*) false;; - *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; - esac || - as_fn_error 1 "cannot find input file: '$ac_f'" "$LINENO" 5;; - esac - case $ac_f in *\'*) ac_f=`printf "%s\n" "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac - as_fn_append ac_file_inputs " '$ac_f'" - done - - # Let's still pretend it is 'configure' which instantiates (i.e., don't - # use $as_me), people would be surprised to read: - # /* config.h. Generated by config.status. */ - configure_input='Generated from '` - printf "%s\n" "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' - `' by configure.' - if test x"$ac_file" != x-; then - configure_input="$ac_file. $configure_input" - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 -printf "%s\n" "$as_me: creating $ac_file" >&6;} - fi - # Neutralize special characters interpreted by sed in replacement strings. - case $configure_input in #( - *\&* | *\|* | *\\* ) - ac_sed_conf_input=`printf "%s\n" "$configure_input" | - sed 's/[\\\\&|]/\\\\&/g'`;; #( - *) ac_sed_conf_input=$configure_input;; - esac - - case $ac_tag in - *:-:* | *:-) cat >"$ac_tmp/stdin" \ - || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; - esac - ;; - esac - - ac_dir=`$as_dirname -- "$ac_file" || -$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ - X"$ac_file" : 'X\(//\)[^/]' \| \ - X"$ac_file" : 'X\(//\)$' \| \ - X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || -printf "%s\n" X"$ac_file" | - sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ - s//\1/ - q - } - /^X\(\/\/\)[^/].*/{ - s//\1/ - q - } - /^X\(\/\/\)$/{ - s//\1/ - q - } - /^X\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - as_dir="$ac_dir"; as_fn_mkdir_p - ac_builddir=. - -case "$ac_dir" in -.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; -*) - ac_dir_suffix=/`printf "%s\n" "$ac_dir" | sed 's|^\.[\\/]||'` - # A ".." for each directory in $ac_dir_suffix. - ac_top_builddir_sub=`printf "%s\n" "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` - case $ac_top_builddir_sub in - "") ac_top_builddir_sub=. ac_top_build_prefix= ;; - *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; - esac ;; -esac -ac_abs_top_builddir=$ac_pwd -ac_abs_builddir=$ac_pwd$ac_dir_suffix -# for backward compatibility: -ac_top_builddir=$ac_top_build_prefix - -case $srcdir in - .) # We are building in place. - ac_srcdir=. - ac_top_srcdir=$ac_top_builddir_sub - ac_abs_top_srcdir=$ac_pwd ;; - [\\/]* | ?:[\\/]* ) # Absolute name. - ac_srcdir=$srcdir$ac_dir_suffix; - ac_top_srcdir=$srcdir - ac_abs_top_srcdir=$srcdir ;; - *) # Relative name. - ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix - ac_top_srcdir=$ac_top_build_prefix$srcdir - ac_abs_top_srcdir=$ac_pwd/$srcdir ;; -esac -ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix - - - case $ac_mode in - :F) - # - # CONFIG_FILE - # - -_ACEOF - -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 -# If the template does not know about datarootdir, expand it. -# FIXME: This hack should be removed a few years after 2.60. -ac_datarootdir_hack=; ac_datarootdir_seen= -ac_sed_dataroot=' -/datarootdir/ { - p - q -} -/@datadir@/p -/@docdir@/p -/@infodir@/p -/@localedir@/p -/@mandir@/p' -case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in -*datarootdir*) ac_datarootdir_seen=yes;; -*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 -printf "%s\n" "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} -_ACEOF -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 - ac_datarootdir_hack=' - s&@datadir@&$datadir&g - s&@docdir@&$docdir&g - s&@infodir@&$infodir&g - s&@localedir@&$localedir&g - s&@mandir@&$mandir&g - s&\\\${datarootdir}&$datarootdir&g' ;; -esac -_ACEOF - -# Neutralize VPATH when '$srcdir' = '.'. -# Shell code in configure.ac might set extrasub. -# FIXME: do we really want to maintain this feature? -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 -ac_sed_extra="$ac_vpsub -$extrasub -_ACEOF -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 -:t -/@[a-zA-Z_][a-zA-Z_0-9]*@/!b -s|@configure_input@|$ac_sed_conf_input|;t t -s&@top_builddir@&$ac_top_builddir_sub&;t t -s&@top_build_prefix@&$ac_top_build_prefix&;t t -s&@srcdir@&$ac_srcdir&;t t -s&@abs_srcdir@&$ac_abs_srcdir&;t t -s&@top_srcdir@&$ac_top_srcdir&;t t -s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t -s&@builddir@&$ac_builddir&;t t -s&@abs_builddir@&$ac_abs_builddir&;t t -s&@abs_top_builddir@&$ac_abs_top_builddir&;t t -$ac_datarootdir_hack -" -eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ - >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 - -test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && - { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && - { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ - "$ac_tmp/out"`; test -z "$ac_out"; } && - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable 'datarootdir' -which seems to be undefined. Please make sure it is defined" >&5 -printf "%s\n" "$as_me: WARNING: $ac_file contains a reference to the variable 'datarootdir' -which seems to be undefined. Please make sure it is defined" >&2;} - - rm -f "$ac_tmp/stdin" - case $ac_file in - -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; - *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; - esac \ - || as_fn_error $? "could not create $ac_file" "$LINENO" 5 - ;; - :H) - # - # CONFIG_HEADER - # - if test x"$ac_file" != x-; then - { - printf "%s\n" "/* $configure_input */" >&1 \ - && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" - } >"$ac_tmp/config.h" \ - || as_fn_error $? "could not create $ac_file" "$LINENO" 5 - if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 -printf "%s\n" "$as_me: $ac_file is unchanged" >&6;} - else - rm -f "$ac_file" - mv "$ac_tmp/config.h" "$ac_file" \ - || as_fn_error $? "could not create $ac_file" "$LINENO" 5 - fi - else - printf "%s\n" "/* $configure_input */" >&1 \ - && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \ - || as_fn_error $? "could not create -" "$LINENO" 5 - fi - ;; - :L) - # - # CONFIG_LINK - # - - if test "$ac_source" = "$ac_file" && test "$srcdir" = '.'; then - : - else - # Prefer the file from the source tree if names are identical. - if test "$ac_source" = "$ac_file" || test ! -r "$ac_source"; then - ac_source=$srcdir/$ac_source - fi - - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: linking $ac_source to $ac_file" >&5 -printf "%s\n" "$as_me: linking $ac_source to $ac_file" >&6;} - - if test ! -r "$ac_source"; then - as_fn_error $? "$ac_source: file not found" "$LINENO" 5 - fi - rm -f "$ac_file" - - # Try a relative symlink, then a hard link, then a copy. - case $ac_source in - [\\/$]* | ?:[\\/]* ) ac_rel_source=$ac_source ;; - *) ac_rel_source=$ac_top_build_prefix$ac_source ;; - esac - ln -s "$ac_rel_source" "$ac_file" 2>/dev/null || - ln "$ac_source" "$ac_file" 2>/dev/null || - cp -p "$ac_source" "$ac_file" || - as_fn_error $? "cannot link or copy $ac_source to $ac_file" "$LINENO" 5 - fi - ;; - :C) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5 -printf "%s\n" "$as_me: executing $ac_file commands" >&6;} - ;; - esac - - - case $ac_file$ac_mode in - "mkdir":C) as_dir=cram; as_fn_mkdir_p - as_dir=htscodecs/htscodecs; as_fn_mkdir_p - as_dir=htscodecs/tests; as_fn_mkdir_p - as_dir=test/fuzz; as_fn_mkdir_p - as_dir=test/longrefs; as_fn_mkdir_p - as_dir=test/tabix; as_fn_mkdir_p ;; - - esac -done # for ac_tag - - -as_fn_exit 0 -_ACEOF -ac_clean_files=$ac_clean_files_save - -test $ac_write_fail = 0 || - as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 - - -# configure is writing to config.log, and then calls config.status. -# config.status does its own redirection, appending to config.log. -# Unfortunately, on DOS this fails, as config.log is still kept open -# by configure, so config.status won't be able to write to it; its -# output is simply discarded. So we exec the FD to /dev/null, -# effectively closing config.log, so it can be properly (re)opened and -# appended to by config.status. When coming back to configure, we -# need to make the FD available again. -if test "$no_create" != yes; then - ac_cs_success=: - ac_config_status_args= - test "$silent" = yes && - ac_config_status_args="$ac_config_status_args --quiet" - exec 5>/dev/null - $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false - exec 5>>config.log - # Use ||, not &&, to avoid exiting from the if with $? = 1, which - # would make configure fail if this is the last instruction. - $ac_cs_success || as_fn_exit 1 -fi -if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 -printf "%s\n" "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} -fi - - diff --git a/src/htslib-1.19.1/configure.ac b/src/htslib-1.19.1/configure.ac deleted file mode 100644 index 49f2cbc..0000000 --- a/src/htslib-1.19.1/configure.ac +++ /dev/null @@ -1,614 +0,0 @@ -# Configure script for htslib, a C library for high-throughput sequencing data. -# -# Copyright (C) 2015-2023 Genome Research Ltd. -# -# Author: John Marshall -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -dnl Process this file with autoconf to produce a configure script -AC_INIT([HTSlib], m4_esyscmd_s([./version.sh 2>/dev/null]), - [samtools-help@lists.sourceforge.net], [], [http://www.htslib.org/]) -AC_PREREQ(2.63) dnl This version introduced 4-argument AC_CHECK_HEADER -AC_CONFIG_SRCDIR(hts.c) -AC_CONFIG_HEADERS(config.h) - -m4_include([m4/hts_prog_cc_warnings.m4]) -m4_include([m4/hts_check_compile_flags_needed.m4]) -m4_include([m4/hts_hide_dynamic_syms.m4]) -m4_include([m4/pkg.m4]) - -dnl Copyright notice to be copied into the generated configure script -AC_COPYRIGHT([Portions copyright (C) 2020-2023 Genome Research Ltd. - -This configure script is free software: you are free to change and -redistribute it. There is NO WARRANTY, to the extent permitted by law.]) - -dnl Notes to be copied (by autoheader) into the generated config.h.in -AH_TOP([/* If you use configure, this file provides @%:@defines reflecting your - configuration choices. If you have not run configure, suitable - conservative defaults will be used. - - Autoheader adds a number of items to this template file that are not - used by HTSlib: STDC_HEADERS and most HAVE_*_H header file defines - are immaterial, as we assume standard ISO C headers and facilities; - the PACKAGE_* defines are unused and are overridden by the more - accurate PACKAGE_VERSION as computed by the Makefile. */]) - -dnl Variant of AC_MSG_ERROR that ensures subsequent make(1) invocations fail -dnl until the configuration error is resolved and configure is run again. -AC_DEFUN([MSG_ERROR], - [cat > config.mk <<'EOF' -ifneq ($(MAKECMDGOALS),distclean) -$(error Resolve configure error first) -endif -EOF - AC_MSG_ERROR([$1], [$2])]) - -AC_PROG_CC -AC_PROG_RANLIB - -dnl Turn on compiler warnings, if possible -HTS_PROG_CC_WARNINGS -dnl Flags to treat warnings as errors. These need to be applied to CFLAGS -dnl later as they can interfere with some of the tests (notably AC_SEARCH_LIBS) -HTS_PROG_CC_WERROR(hts_late_cflags) - -# HTSlib uses X/Open-only facilities (M_SQRT2 etc, drand48() etc), and -# various POSIX functions that are provided by various _POSIX_C_SOURCE values -# or by _XOPEN_SOURCE >= 500. It also uses usleep(), which is removed when -# _XOPEN_SOURCE >= 700. Additionally, some definitions may require -# _XOPEN_SOURCE >= 600 on some platforms (snprintf on MinGW, -# PTHREAD_MUTEX_RECURSIVE on some Linux distributions). Hence we set it to 600. - -# Define _XOPEN_SOURCE unless the user has already done so via $CPPFLAGS etc. -AC_CHECK_DECL([_XOPEN_SOURCE], [], - [AC_DEFINE([_XOPEN_SOURCE], [600], [Specify X/Open requirements])], - []) - -dnl Options for rANS32x16 sse4.1 version - sse4.1 -HTS_CHECK_COMPILE_FLAGS_NEEDED([sse4.1], [-msse4.1 -mssse3 -mpopcnt], - [AC_LANG_PROGRAM([[ - #ifdef __x86_64__ - #include "x86intrin.h" - #endif - ]],[[ - #ifdef __x86_64__ - __m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1); - __m128i c = _mm_shuffle_epi8(_mm_max_epu32(a, b), b); - return _mm_popcnt_u32(*((char *) &c)); - #endif - ]])], [ - hts_cflags_sse4="$flags_needed" - AC_DEFINE([HAVE_SSSE3],1,[Defined to 1 if rANS source using SSSE3 can be compiled.]) - AC_DEFINE([HAVE_POPCNT],1,[Defined to 1 if rANS source using popcnt can be compiled.]) - AC_DEFINE([HAVE_SSE4_1],1,[Defined to 1 if rANS source using SSE4.1 can be compiled. -]) -dnl Propagate HTSlib's unaligned access preference to htscodecs - AH_VERBATIM([UBSAN],[ -/* Prevent unaligned access in htscodecs SSE4 rANS codec */ -#if defined(HTS_ALLOW_UNALIGNED) && HTS_ALLOW_UNALIGNED == 0 -#undef UBSAN -#endif]) - AC_DEFINE([UBSAN],1,[]) -]) -AC_SUBST([hts_cflags_sse4]) - -dnl Options for rANS32x16 avx2 version -HTS_CHECK_COMPILE_FLAGS_NEEDED([avx2], [-mavx2 -mpopcnt], [AC_LANG_PROGRAM([[ - #ifdef __x86_64__ - #include "x86intrin.h" - #endif - ]],[[ - #ifdef __x86_64__ - __m256i a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); - __m256i b = _mm256_add_epi32(a, a); - long long c = _mm256_extract_epi64(b, 0); - return _mm_popcnt_u32((int) c); - #endif - ]])], [ - hts_cflags_avx2="$flags_needed" - AC_SUBST([hts_cflags_avx2]) - AC_DEFINE([HAVE_POPCNT],1,[Defined to 1 if rANS source using popcnt can be compiled.]) - AC_DEFINE([HAVE_AVX2],1,[Defined to 1 if rANS source using AVX2 can be compiled.]) -]) - -dnl Options for rANS32x16 avx512 version -HTS_CHECK_COMPILE_FLAGS_NEEDED([avx512f], [-mavx512f -mpopcnt], - [AC_LANG_PROGRAM([[ - #ifdef __x86_64__ - #include "x86intrin.h" - #endif - ]],[[ - #ifdef __x86_64__ - __m512i a = _mm512_set1_epi32(1); - __m512i b = _mm512_add_epi32(a, a); - return _mm_popcnt_u32(*((char *) &b)); - #endif - ]])], [ - hts_cflags_avx512="$flags_needed" - AC_SUBST([hts_cflags_avx512]) - AC_DEFINE([HAVE_POPCNT],1,[Defined to 1 if rANS source using popcnt can be compiled.]) - AC_DEFINE([HAVE_AVX512],1,[Defined to 1 if rANS source using AVX512F can be compiled.]) -]) - -dnl Avoid chicken-and-egg problem where pkg-config supplies the -dnl PKG_PROG_PKG_CONFIG macro, but we want to use it to check -dnl for pkg-config... -m4_ifdef([PKG_PROG_PKG_CONFIG], [PKG_PROG_PKG_CONFIG], [PKG_CONFIG=""]) - -need_crypto=no -pc_requires= -static_LDFLAGS=$LDFLAGS -static_LIBS='-lpthread -lz -lm' -private_LIBS=$LDFLAGS - -AC_ARG_ENABLE([versioned-symbols], - [AS_HELP_STRING([--disable-versioned-symbols], - [disable versioned symbols in shared library])], - [], [enable_versioned_symbols=yes]) - -AC_ARG_ENABLE([bz2], - [AS_HELP_STRING([--disable-bz2], - [omit support for BZ2-compressed CRAM files])], - [], [enable_bz2=yes]) - -AC_ARG_ENABLE([gcs], - [AS_HELP_STRING([--enable-gcs], - [support Google Cloud Storage URLs])], - [], [enable_gcs=check]) - -AC_SYS_LARGEFILE - -AC_ARG_ENABLE([libcurl], - [AS_HELP_STRING([--enable-libcurl], - [enable libcurl-based support for http/https/etc URLs])], - [], [enable_libcurl=check]) - -AC_ARG_ENABLE([lzma], - [AS_HELP_STRING([--disable-lzma], - [omit support for LZMA-compressed CRAM files])], - [], [enable_lzma=yes]) - -AC_ARG_ENABLE([plugins], - [AS_HELP_STRING([--enable-plugins], - [enable separately-compiled plugins for file access])], - [], [enable_plugins=no]) -AC_SUBST(enable_plugins) - -AC_ARG_WITH([external-htscodecs], - [AS_HELP_STRING([--with-external-htscodecs], - [get htscodecs functions from a shared library])], - [], [with_external_htscodecs=no]) -AC_SUBST(with_external_htscodecs) - -AC_ARG_WITH([libdeflate], - [AS_HELP_STRING([--with-libdeflate], - [use libdeflate for faster crc and deflate algorithms])], - [], [with_libdeflate=check]) - -AC_ARG_WITH([plugin-dir], - [AS_HELP_STRING([--with-plugin-dir=DIR], - [plugin installation location [LIBEXECDIR/htslib]])], - [case $withval in - yes|no) MSG_ERROR([no directory specified for --with-plugin-dir]) ;; - esac], - [with_plugin_dir='$(libexecdir)/htslib']) -AC_SUBST([plugindir], $with_plugin_dir) - -AC_ARG_WITH([plugin-path], - [AS_HELP_STRING([--with-plugin-path=PATH], - [default HTS_PATH plugin search path [PLUGINDIR]])], - [case $withval in - yes) MSG_ERROR([no path specified for --with-plugin-path]) ;; - no) with_plugin_path= ;; - esac], - [with_plugin_path=$with_plugin_dir]) -AC_SUBST([pluginpath], $with_plugin_path) - -AC_ARG_ENABLE([s3], - [AS_HELP_STRING([--enable-s3], - [support Amazon AWS S3 URLs])], - [], [enable_s3=check]) - -basic_host=${host_alias:-unknown-`uname -s`} -AC_MSG_CHECKING([shared library type for $basic_host]) -case $basic_host in - *-cygwin* | *-CYGWIN*) - host_result="Cygwin DLL" - PLATFORM=CYGWIN - PLUGIN_EXT=.cygdll - ;; - *-darwin* | *-Darwin*) - host_result="Darwin dylib" - PLATFORM=Darwin - PLUGIN_EXT=.bundle - ;; - *-msys* | *-MSYS* | *-mingw* | *-MINGW*) - host_result="MSYS dll" - PLATFORM=MSYS - PLUGIN_EXT=.dll - # This also sets __USE_MINGW_ANSI_STDIO which in turn makes PRId64, - # %lld and %z printf formats work. It also enforces the snprintf to - # be C99 compliant so it returns the correct values (in kstring.c). - - # Now set by default, so no need to do it here. - # CPPFLAGS="$CPPFLAGS -D_XOPEN_SOURCE=600" - ;; - *) - host_result="plain .so" - PLATFORM=default - PLUGIN_EXT=.so - ;; -esac -AC_MSG_RESULT([$host_result]) -AC_SUBST([PLATFORM]) - -dnl Check for versioned symbol support -dnl Only try for .so shared libraries as other types won't work -AS_IF([test x"$PLATFORM" = xdefault && test x"$enable_versioned_symbols" = xyes], - [AC_CACHE_CHECK([whether the linker supports versioned symbols], - [hts_cv_have_versioned_symbols], [ - save_LDFLAGS=$LDFLAGS - LDFLAGS="-Wl,-version-script,$srcdir/htslib.map $LDFLAGS" - AC_LINK_IFELSE([AC_LANG_PROGRAM()], - [hts_cv_have_versioned_symbols=yes], - [hts_cv_have_versioned_symbols=no]) - LDFLAGS=$save_LDFLAGS - ]) - AS_IF([test "x$hts_cv_have_versioned_symbols" = xyes],[ - VERSION_SCRIPT_LDFLAGS='-Wl,-version-script,$(srcprefix)htslib.map' - AC_SUBST([VERSION_SCRIPT_LDFLAGS]) - ]) -]) - -dnl Try to get more control over which symbols are exported in the shared -dnl library. -HTS_HIDE_DYNAMIC_SYMBOLS - -dnl FIXME This pulls in dozens of standard header checks -AC_FUNC_MMAP -AC_CHECK_FUNCS([gmtime_r fsync drand48 srand48_deterministic]) - -# Darwin has a dubious fdatasync() symbol, but no declaration in -AC_CHECK_DECL([fdatasync(int)], [AC_CHECK_FUNCS(fdatasync)]) - -if test $enable_plugins != no; then - AC_SEARCH_LIBS([dlsym], [dl], [], - [MSG_ERROR([dlsym() not found - -Plugin support requires dynamic linking facilities from the operating system. -Either configure with --disable-plugins or resolve this error to build HTSlib.])]) - # Check if the compiler understands -rdynamic - # TODO Test whether this is required and/or needs tweaking per-platform - HTS_TEST_CC_C_LD_FLAG([-rdynamic],[rdynamic_flag]) - AS_IF([test x"$rdynamic_flag" != "xno"], - [LDFLAGS="$LDFLAGS $rdynamic_flag" - static_LDFLAGS="$static_LDFLAGS $rdynamic_flag"]) - case "$ac_cv_search_dlsym" in - -l*) static_LIBS="$static_LIBS $ac_cv_search_dlsym" ;; - esac - AC_DEFINE([ENABLE_PLUGINS], 1, [Define if HTSlib should enable plugins.]) - AC_SUBST([PLUGIN_EXT]) - AC_DEFINE_UNQUOTED([PLUGIN_EXT], ["$PLUGIN_EXT"], - [Platform-dependent plugin filename extension.]) -fi - -AC_SEARCH_LIBS([log], [m], [], - [MSG_ERROR([log() not found - -HTSLIB requires a working floating-point math library. -FAILED. This error must be resolved in order to build HTSlib successfully.])]) - -zlib_devel=ok -dnl Set a trivial non-empty INCLUDES to avoid excess default includes tests -AC_CHECK_HEADER([zlib.h], [], [zlib_devel=missing], [;]) -AC_CHECK_LIB(z, inflate, [], [zlib_devel=missing]) - -if test $zlib_devel != ok; then - MSG_ERROR([zlib development files not found - -HTSlib uses compression routines from the zlib library . -Building HTSlib requires zlib development files to be installed on the build -machine; you may need to ensure a package such as zlib1g-dev (on Debian or -Ubuntu Linux) or zlib-devel (on RPM-based Linux distributions or Cygwin) -is installed. - -FAILED. This error must be resolved in order to build HTSlib successfully.]) -fi - -dnl connect() etc. fns are in libc on linux, but libsocket on illumos/Solaris -AC_SEARCH_LIBS([recv], [socket ws2_32], [ -if test "$ac_cv_search_recv" != "none required" -then - static_LIBS="$static_LIBS $ac_cv_search_recv" -fi], - dnl on MinGW-i686, checking recv() linking requires an annotated declaration - [AC_MSG_CHECKING([for library containing recv using declaration]) - LIBS="-lws2_32 $LIBS" - AC_LINK_IFELSE( - [AC_LANG_PROGRAM([[#include ]], [[recv(0, 0, 0, 0);]])], - [AC_MSG_RESULT([-lws2_32]) - static_LIBS="$static_LIBS -lws2_32"], - [AC_MSG_RESULT([no]) - MSG_ERROR([unable to find the recv() function])])]) - -if test "$enable_bz2" != no; then - bz2_devel=ok - AC_CHECK_HEADER([bzlib.h], [], [bz2_devel=missing], [;]) - AC_CHECK_LIB([bz2], [BZ2_bzBuffToBuffCompress], [], [bz2_devel=missing]) - if test $bz2_devel != ok; then - MSG_ERROR([libbzip2 development files not found - -The CRAM format may use bzip2 compression, which is implemented in HTSlib -by using compression routines from libbzip2 . - -Building HTSlib requires libbzip2 development files to be installed on the -build machine; you may need to ensure a package such as libbz2-dev (on Debian -or Ubuntu Linux) or bzip2-devel (on RPM-based Linux distributions or Cygwin) -is installed. - -Either configure with --disable-bz2 (which will make some CRAM files -produced elsewhere unreadable) or resolve this error to build HTSlib.]) - fi -dnl Unfortunately the 'bzip2' package-cfg module is not standard. -dnl Redhat/Fedora has it; Debian/Ubuntu does not. - if test -n "$PKG_CONFIG" && "$PKG_CONFIG" --exists bzip2; then - pc_requires="$pc_requires bzip2" - else - private_LIBS="$private_LIBS -lbz2" - fi - static_LIBS="$static_LIBS -lbz2" -fi - -if test "$enable_lzma" != no; then - lzma_devel=ok - AC_CHECK_HEADERS([lzma.h], [], [lzma_devel=header-missing], [;]) - AC_CHECK_LIB([lzma], [lzma_easy_buffer_encode], [], [lzma_devel=missing]) - if test $lzma_devel = missing; then - MSG_ERROR([liblzma development files not found - -The CRAM format may use LZMA2 compression, which is implemented in HTSlib -by using compression routines from liblzma . - -Building HTSlib requires liblzma development files to be installed on the -build machine; you may need to ensure a package such as liblzma-dev (on Debian -or Ubuntu Linux), xz-devel (on RPM-based Linux distributions or Cygwin), or -xz (via Homebrew on macOS) is installed; or build XZ Utils from source. - -Either configure with --disable-lzma (which will make some CRAM files -produced elsewhere unreadable) or resolve this error to build HTSlib.]) - fi - pc_requires="$pc_requires liblzma" - static_LIBS="$static_LIBS -llzma" -fi - -AS_IF([test "x$with_external_htscodecs" != "xno"], - [libhtscodecs=ok - AC_CHECK_HEADER([htscodecs/rANS_static4x16.h],[], - [libhtscodecs='missing header'],[;]) - AC_CHECK_LIB([htscodecs],[rans_compress_bound_4x16], - [:],[libhtscodecs='missing library']) - AS_IF([test "$libhtscodecs" = "ok"], - [AC_DEFINE([HAVE_EXTERNAL_LIBHTSCODECS], 1, [Define if using an external libhtscodecs]) - LIBS="-lhtscodecs $LIBS" - private_LIBS="-lhtscodecs $private_LIBS" - static_LIBS="-lhtscodecs $static_LIBS" - selected_htscodecs_mk="htscodecs_external.mk"], - [MSG_ERROR([libhtscodecs development files not found: $libhtscodecs - -You asked to use an external htscodecs library, but do not have the -required header / library files. You either need to supply these and -if necessary set CPPFLAGS and LDFLAGS so the compiler can find them; -or configure using --without-external-htscodecs to build the required -functions from the htscodecs submodule. -])])], - [AC_MSG_CHECKING([whether htscodecs files are present]) - AS_IF([test -e "$srcdir/htscodecs/htscodecs/rANS_static4x16.h"], - [AC_MSG_RESULT([yes]) - selected_htscodecs_mk="htscodecs_bundled.mk"], - [AC_MSG_RESULT([no]) - AS_IF([test -e "$srcdir/.git"], - [MSG_ERROR([htscodecs submodule files not present. - -HTSlib uses some functions from the htscodecs project, which is normally -included as a submodule. Try running: - - git submodule update --init --recursive - -in the top-level htslib directory to update it, and then re-run configure. -])], - [MSG_ERROR([htscodecs submodule files not present. - -You have an incomplete distribution. Please try downloading one of the -official releases from https://www.htslib.org -])])])]) - -AS_IF([test "x$with_libdeflate" != "xno"], - [libdeflate=ok - AC_CHECK_HEADER([libdeflate.h],[],[libdeflate='missing header'],[;]) - AC_CHECK_LIB([deflate], [libdeflate_deflate_compress],[:],[libdeflate='missing library']) - AS_IF([test "$libdeflate" = "ok"], - [AC_DEFINE([HAVE_LIBDEFLATE], 1, [Define if libdeflate is available.]) - LIBS="-ldeflate $LIBS" - private_LIBS="$private_LIBS -ldeflate" - static_LIBS="$static_LIBS -ldeflate"], - [AS_IF([test "x$with_libdeflate" != "xcheck"], - [MSG_ERROR([libdeflate development files not found: $libdeflate - -You requested libdeflate, but do not have the required header / library -files. The source for libdeflate is available from -. You may have to adjust -search paths in CPPFLAGS and/or LDFLAGS if the header and library -are not currently on them. - -Either configure with --without-libdeflate or resolve this error to build -HTSlib.])])])]) - -libcurl=disabled -if test "$enable_libcurl" != no; then - libcurl_devel=ok - AC_CHECK_HEADER([curl/curl.h], [], [libcurl_devel="headers not found"], [;]) - AC_CHECK_LIB([curl], [curl_easy_pause], [:], - [AC_CHECK_LIB([curl], [curl_easy_init], - [libcurl_devel="library is too old (7.18+ required)"], - [libcurl_devel="library not found"])]) - - if test "$libcurl_devel" = ok; then - AC_DEFINE([HAVE_LIBCURL], 1, [Define if libcurl file access is enabled.]) - libcurl=enabled - elif test "$enable_libcurl" = check; then - AC_MSG_WARN([libcurl not enabled: $libcurl_devel]) - else - MSG_ERROR([libcurl $libcurl_devel - -Support for HTTPS and other SSL-based URLs requires routines from the libcurl -library . Building HTSlib with libcurl enabled -requires libcurl development files to be installed on the build machine; you -may need to ensure a package such as libcurl4-{gnutls,nss,openssl}-dev (on -Debian or Ubuntu Linux) or libcurl-devel (on RPM-based Linux distributions -or Cygwin) is installed. - -Either configure with --disable-libcurl or resolve this error to build HTSlib.]) - fi - -dnl -lcurl is only needed for static linking if hfile_libcurl is not a plugin - if test "$libcurl" = enabled ; then - if test "$enable_plugins" != yes ; then - static_LIBS="$static_LIBS -lcurl" - fi - fi -fi -AC_SUBST([libcurl]) - -gcs=disabled -if test "$enable_gcs" != no; then - if test $libcurl = enabled; then - AC_DEFINE([ENABLE_GCS], 1, [Define if HTSlib should enable GCS support.]) - gcs=enabled - else - case "$enable_gcs" in - check) AC_MSG_WARN([GCS support not enabled: requires libcurl support]) ;; - *) MSG_ERROR([GCS support not enabled - -Support for Google Cloud Storage URLs requires libcurl support to be enabled -in HTSlib. Configure with --enable-libcurl in order to use GCS URLs.]) - ;; - esac - fi -fi -AC_SUBST([gcs]) - -s3=disabled -if test "$enable_s3" != no; then - if test $libcurl = enabled; then - s3=enabled - need_crypto="$enable_s3" - else - case "$enable_s3" in - check) AC_MSG_WARN([S3 support not enabled: requires libcurl support]) ;; - *) MSG_ERROR([S3 support not enabled - -Support for Amazon AWS S3 URLs requires libcurl support to be enabled -in HTSlib. Configure with --enable-libcurl in order to use S3 URLs.]) - ;; - esac - fi -fi - -CRYPTO_LIBS= -if test $need_crypto != no; then - AC_CHECK_FUNC([CCHmac], - [AC_DEFINE([HAVE_COMMONCRYPTO], 1, - [Define if you have the Common Crypto library.])], - [save_LIBS=$LIBS - AC_SEARCH_LIBS([HMAC], [crypto], - [AC_DEFINE([HAVE_HMAC], 1, [Define if you have libcrypto-style HMAC().]) - case "$ac_cv_search_HMAC" in - -l*) CRYPTO_LIBS=$ac_cv_search_HMAC ;; - esac], - [case "$need_crypto" in - check) AC_MSG_WARN([S3 support not enabled: requires SSL development files]) - s3=disabled ;; - *) MSG_ERROR([SSL development files not found - -Support for AWS S3 URLs requires routines from an SSL library. Building -HTSlib with libcurl enabled requires SSL development files to be installed -on the build machine; you may need to ensure a package such as libgnutls-dev, -libnss3-dev, or libssl-dev (on Debian or Ubuntu Linux, corresponding to the -libcurl4-*-dev package installed), or openssl-devel (on RPM-based Linux -distributions or Cygwin) is installed. - -Either configure with --disable-s3 or resolve this error to build HTSlib.]) ;; - esac]) - LIBS=$save_LIBS]) -dnl Only need to add to static_LIBS if not building as a plugin - if test "$enable_plugins" != yes ; then - static_LIBS="$static_LIBS $CRYPTO_LIBS" - fi -fi - -dnl Look for regcomp in various libraries (needed on windows/mingw). -AC_SEARCH_LIBS(regcomp, regex, [libregex=needed], []) - -dnl Look for PTHREAD_MUTEX_RECURSIVE. -dnl This is normally in pthread.h except on some broken glibc implementations. -dnl Now set by default -dnl AC_CHECK_DECL(PTHREAD_MUTEX_RECURSIVE, [], [AC_DEFINE([_XOPEN_SOURCE],[600], [Needed for PTHREAD_MUTEX_RECURSIVE])], [[#include ]]) - -if test "$s3" = enabled ; then - AC_DEFINE([ENABLE_S3], 1, [Define if HTSlib should enable S3 support.]) -fi - -dnl Apply value from HTS_PROG_CC_WERROR (if set) -AS_IF([test "x$hts_late_cflags" != x],[CFLAGS="$CFLAGS $hts_late_cflags"]) - -AC_SUBST([s3]) -AC_SUBST([CRYPTO_LIBS]) - -AC_SUBST([pc_requires]) -AC_SUBST([private_LIBS]) -AC_SUBST([static_LDFLAGS]) -AC_SUBST([static_LIBS]) - -AC_CONFIG_FILES([config.mk htslib.pc.tmp:htslib.pc.in]) -AC_CONFIG_LINKS([htscodecs.mk:$selected_htscodecs_mk]) - -if test "$srcdir" != .; then - # Set up for a separate build directory. As HTSlib uses a non-recursive - # makefile, we need to create additional build subdirectories explicitly. - AC_CONFIG_LINKS([Makefile:Makefile htslib.mk:htslib.mk]) - AC_CONFIG_FILES([htslib_vars.mk:builddir_vars.mk.in]) - AC_CONFIG_COMMANDS([mkdir], - [AS_MKDIR_P([cram]) - AS_MKDIR_P([htscodecs/htscodecs]) - AS_MKDIR_P([htscodecs/tests]) - AS_MKDIR_P([test/fuzz]) - AS_MKDIR_P([test/longrefs]) - AS_MKDIR_P([test/tabix])]) -fi - -# @HTSDIRslash_if_relsrcdir@ will be empty when $srcdir is absolute -case "$srcdir" in - /*) HTSDIRslash_if_relsrcdir= ;; - *) HTSDIRslash_if_relsrcdir='$(HTSDIR)/' ;; -esac -AC_SUBST([HTSDIRslash_if_relsrcdir]) - -AC_OUTPUT diff --git a/src/htslib-1.19.1/cram/README b/src/htslib-1.19.1/cram/README deleted file mode 100644 index 1354382..0000000 --- a/src/htslib-1.19.1/cram/README +++ /dev/null @@ -1,214 +0,0 @@ -CRAM encoding internals -======================= - -A quick summary of functions involved. - -The encoder works by accumulating a bunch of BAM records (via the -cram_put_bam_seq function), and at a certain point (eg counter of -records, or switching reference) the array of BAM records it turned -into a container, which in turn creates slices, holding CRAM -data-series in blocks. The function that turns an array of BAM -objects into the container is below. - -cram_encode_container func: - Validate references MD5 against header, unless no_ref mode - If embed_ref <= 1, fetch ref - Switch to embed_ref=2 if failed - - Foreach slice: - If embed_ref == 2 - call cram_generate_reference - if failed switch to no_ref mode - Foreach sequence - call process_one_read to append BAM onto each data series (DS) - call cram_stats_add for each DS to gather metrics - call cram_encode_aux - - # We now have cram DS, per slice - call cram_encoder_init, per DS (based on cram_stats_add data) - - Foreach slice: - call cram_encode_slice to turn DS to blocks - call cram_compess_slice - - call cram_encode_compression_header - -Threading ---------- - -CRAM can be multi-threaded, but this brings complications. - -The above function is the main CPU user, so it is this bit which can -be executed in parallel from multiple threads. To understand this we -need to now look at how the primary loop works when writing a CRAM: - -Encoding main thread: - repeatedly calls cram_put_bam_seq - calls cram_new_container on first time through to initialise - calls cram_next_container when current is full or we need to flush - calls cram_flush_container_mt to flush last container - pushes BAM object onto current container - -If non-threaded, cram_flush_container_mt does: - call cram_flush_container - call cram_encode_container to go from BAM to CRAM data-series - call cram_flush_container2 (writes it out) - -If threaded, cram_flush_container_mt does: - Main: Dispatch cram_flush_thread job - Thread: call cram_encode_container to go from BAM to CRAM data-series - Main: Call cram_flush_result to drain queue of encoded containers - Main: Call cram_flush_container2 (writes it out); - - - -Decisions on when to create new containers, detection of sorted vs unsorted, -switching to multi-seq mode, etc occur at the main thread in -cram_put_bam_seq. - -We can change our mind on container parameters at any point up until -the cram_encode_container call. At that point these parameters get -baked into a container compression header and all data-series -generated need to be in sync with the parameters. - -It is possible that some parameter changes can get detected while -encoding the container, as it is there where we fetch references. Eg -the need to enable embedded reference or switch to non-ref mode. - -While encoding a container, we can change the parameters for *this* -container, and we can also set the default parameter for subsequent -new parameters via the global cram fd to avoid spamming attempts to -load a reference which doesn't exist, but we cannot change other -containers that are being processed in parallel. They'll fend for -themselves. - -References ----------- - -To avoid spamming the reference servers, there is a shared cache of -references being currently used by all the worker threads (leading to -confusing terminology of reference-counting of references). So each -container fetches its section of reference, but the memory for that is -handled via its own layer. - -The shared references and ref meta-data is held in cram_fd -> refs (a -refs_t pointer): - - // References structure. - struct refs_t { - string_alloc_t *pool; // String pool for holding filenames and SN vals - - khash_t(refs) *h_meta; // ref_entry*, index by name - ref_entry **ref_id; // ref_entry*, index by ID - int nref; // number of ref_entry - - char *fn; // current file opened - BGZF *fp; // and the hFILE* to go with it. - - int count; // how many cram_fd sharing this refs struct - - pthread_mutex_t lock; // Mutex for multi-threaded updating - ref_entry *last; // Last queried sequence - int last_id; // Used in cram_ref_decr_locked to delay free - }; - -Within this, ref_entry is the per-reference information: - - typedef struct ref_entry { - char *name; - char *fn; - int64_t length; - int64_t offset; - int bases_per_line; - int line_length; - int64_t count; // for shared references so we know to dealloc seq - char *seq; - mFILE *mf; - int is_md5; // Reference comes from a raw seq found by MD5 - int validated_md5; - } ref_entry; - -Sharing of references to track use between threads is via -cram_ref_incr* and cram_ref_decr* (which locked and unlocked -variants). We free a reference when the usage count hits zero. To -avoid spamming discard and reload in single-thread creation of a -pos-sorted CRAM, we keep track of the last reference in cram_fd and -delay discard by one loop iteration. - -There are complexities here around whether the references come from a -single ref.fa file, are from a local MD5sum cache with one file per -reference (mmapped), or whether they're fetched from some remote -REF_PATH query such as the EBI. (This later case typically downloads -to a local md5 based ref-cache first and mmaps from there.) - -The refs struct start off by being populated from the SAM header. We -have M5 tag and name known, maybe a filename, but length is 0 and seq -is NULL. This is done by cram_load_reference: - -cram_load_reference (cram_fd, filename): - if filename non-NULL - call refs_load_fai - Populates ref_entry with filename, name, length, line-len, etc - sanitise_SQ_lines - If no refs loaded - call refs_from_header - populates ref_entry with name. - Sets length=0 as marker for not-yet-loaded - -The main interface used from the code is cram_get_ref(). It takes a -reference ID, start and end coordinate and returns a pointer to the -relevant sub-sequence. - -cram_get_ref: - r = fd->refs->ref_id[id]; // current ref - call cram_populate_ref if stored length is 0 (ie ref.fa set) - search REF_PATH / REF_CACHE - call bgzf_open if local_path - call open_path_mfile otherwise - copy to local REF_CACHE if required (eg remote fetch) - - If start = 1 and end = ref-length - If ref seq unknown - call cram_ref_load to load entire ref and use that - - If ref seq now known, return it - - // Otherwise known via .fai or we've errored by now. - call load_ref_portion to return a sub-seq from index fasta - -The encoder asks for the entire reference rather than a small portion -of it as we're usually encoding a large amount. The decoder may be -dealing with small range queries, so it only asks for the relevant -sub-section of reference as specified in the cram slice headers. - - -TODO -==== - -- Multi-ref mode is enabled when we have too many small containers in - a row. - - Instead of firing off new containers when we switch reference, we - could always make a new container after N records, separating off - M <= N to make the container such that all M are the same reference, - and shuffling any remaining N-M down as the start of the next. - - This means we can detect how many new containers we would create, - and enable multi-ref mode straight away rather than keeping a recent - history of how many small containers we've emitted. - -- The cache of references currently being used is a better place to - track the global embed-ref and non-ref logic. Better than cram_fd. - Cram_fd is a one-way change, as once we enable non-ref we'll stick - with it. - - However if it was per-ref in the ref-cache then we'd probe and try - each reference once, and then all new containers for that ref would - honour the per-ref parameters. So a single missing reference in the - middle of a large file wouldn't change behaviour for all subsequence - references. - - Optionally we could still do meta-analysis on how many references - are failing, and switch the global cram_fd params to avoid repeated - testing of reference availability if it's becoming obvious that none - of them are known. diff --git a/src/htslib-1.19.1/cram/cram.h b/src/htslib-1.19.1/cram/cram.h deleted file mode 100644 index ba7b130..0000000 --- a/src/htslib-1.19.1/cram/cram.h +++ /dev/null @@ -1,61 +0,0 @@ -/* -Copyright (c) 2012-2013, 2015, 2018 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/*! \file - * CRAM interface. - * - * Consider using the higher level hts_*() API for programs that wish to - * be file format agnostic (see htslib/hts.h). - * - * This API should be used for CRAM specific code. The specifics of the - * public API are implemented in cram_io.h, cram_encode.h and cram_decode.h - * although these should not be included directly (use this file instead). - */ - -#ifndef CRAM_ALL_H -#define CRAM_ALL_H - -#include "cram_samtools.h" -#include "../header.h" -#include "cram_structs.h" -#include "cram_io.h" -#include "cram_encode.h" -#include "cram_decode.h" -#include "cram_stats.h" -#include "cram_codecs.h" -#include "cram_index.h" - -// Validate against the external cram.h, -// -// This contains duplicated portions from cram_io.h and cram_structs.h, -// so we want to ensure that the prototypes match. -#include "../htslib/cram.h" - -#endif diff --git a/src/htslib-1.19.1/cram/cram_codecs.h b/src/htslib-1.19.1/cram/cram_codecs.h deleted file mode 100644 index d93d995..0000000 --- a/src/htslib-1.19.1/cram/cram_codecs.h +++ /dev/null @@ -1,264 +0,0 @@ -/* -Copyright (c) 2012-2015, 2018, 2020, 2023 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef CRAM_CODECS_H -#define CRAM_CODECS_H - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -struct cram_codec; - -/* - * Slow but simple huffman decoder to start with. - * Read a bit at a time, keeping track of {length, value} - * eg. 1 1 0 1 => {1,1}, {2,3}, {3,6}, {4,13} - * - * Keep track of this through the huffman code table. - * For fast scanning we have an index of where the first code of length X - * appears. - */ -typedef struct { - int64_t symbol; - int32_t p; // next code start value, minus index to codes[] - int32_t code; - int32_t len; -} cram_huffman_code; - -typedef struct { - int ncodes; - cram_huffman_code *codes; - int option; -} cram_huffman_decoder; - -#define MAX_HUFF 128 -typedef struct { - cram_huffman_code *codes; - int nvals; - int val2code[MAX_HUFF+1]; // value to code lookup for small values - int option; -} cram_huffman_encoder; - -typedef struct { - int32_t offset; - int32_t nbits; -} cram_beta_decoder; - -// A PACK transform, packing multiple values into a single byte -typedef struct { - int32_t nbits; - enum cram_encoding sub_encoding; - void *sub_codec_dat; - struct cram_codec *sub_codec; - int nval; // number of items in maps - uint32_t rmap[256]; // 0,1,2,3 -> P,A,C,K - int map[256]; // P,A,C,K -> 0,1,2,3 // NB: max input is uint8_tb? Or use hash? -} cram_xpack_decoder; -typedef cram_xpack_decoder cram_xpack_encoder; - -// Transforms symbols X,Y,Z to bytes 0,1,2. -typedef struct { - enum cram_encoding len_encoding; - enum cram_encoding lit_encoding; - void *len_dat; - void *lit_dat; - struct cram_codec *len_codec; - struct cram_codec *lit_codec; - int cur_len; - int cur_lit; - int rep_score[256]; - char *to_flush; - size_t to_flush_size; -} cram_xrle_decoder; -typedef cram_xrle_decoder cram_xrle_encoder; - -// DELTA + zigzag + varint encoding -typedef struct { - // FIXME: define endian here too. Require little endian? - int64_t last; - uint8_t word_size; // 1, 2, 4, 8 - //uint8_t sign; // true if input data is already signed - enum cram_encoding sub_encoding; - void *sub_codec_dat; - struct cram_codec *sub_codec; -} cram_xdelta_decoder; -typedef cram_xdelta_decoder cram_xdelta_encoder; - -typedef struct { - int32_t offset; -} cram_gamma_decoder; - -typedef struct { - int32_t offset; - int32_t k; -} cram_subexp_decoder; - -typedef struct { - int32_t content_id; - enum cram_external_type type; -} cram_external_decoder; - -typedef struct { - int32_t content_id; - int64_t offset; - enum cram_external_type type; -} cram_varint_decoder; - -typedef struct { - struct cram_codec *len_codec; - struct cram_codec *val_codec; -} cram_byte_array_len_decoder; - -typedef struct { - unsigned char stop; - int32_t content_id; -} cram_byte_array_stop_decoder; - -typedef struct { - enum cram_encoding len_encoding; - enum cram_encoding val_encoding; - void *len_dat; - void *val_dat; - struct cram_codec *len_codec; - struct cram_codec *val_codec; -} cram_byte_array_len_encoder; - -typedef struct { - int64_t val; -} cram_const_codec; - -/* - * A generic codec structure. - */ -struct cram_codec { - enum cram_encoding codec; - cram_block *out; - varint_vec *vv; - int codec_id; - void (*free)(struct cram_codec *codec); - int (*decode)(cram_slice *slice, struct cram_codec *codec, - cram_block *in, char *out, int *out_size); - int (*encode)(cram_slice *slice, struct cram_codec *codec, - char *in, int in_size); - int (*store)(struct cram_codec *codec, cram_block *b, char *prefix, - int version); - int (*size)(cram_slice *slice, struct cram_codec *codec); - int (*flush)(struct cram_codec *codec); - cram_block *(*get_block)(cram_slice *slice, struct cram_codec *codec); - int (*describe)(struct cram_codec *codec, kstring_t *ks); - - union { - cram_huffman_decoder huffman; - cram_external_decoder external; - cram_beta_decoder beta; - cram_gamma_decoder gamma; - cram_subexp_decoder subexp; - cram_byte_array_len_decoder byte_array_len; - cram_byte_array_stop_decoder byte_array_stop; - cram_xpack_decoder xpack; - cram_xrle_decoder xrle; - cram_xdelta_decoder xdelta; - cram_const_codec xconst; - cram_varint_decoder varint; - - cram_huffman_encoder e_huffman; - cram_external_decoder e_external; - cram_byte_array_stop_decoder e_byte_array_stop; - cram_byte_array_len_encoder e_byte_array_len; - cram_beta_decoder e_beta; - cram_xpack_decoder e_xpack; - cram_xrle_decoder e_xrle; - cram_xdelta_decoder e_xdelta; - cram_const_codec e_xconst; - cram_varint_decoder e_varint; - } u; -}; - -const char *cram_encoding2str(enum cram_encoding t); - -cram_codec *cram_decoder_init(cram_block_compression_hdr *hdr, - enum cram_encoding codec, char *data, int size, - enum cram_external_type option, - int version, varint_vec *vv); -cram_codec *cram_encoder_init(enum cram_encoding codec, cram_stats *st, - enum cram_external_type option, void *dat, - int version, varint_vec *vv); - -//int cram_decode(void *codes, char *in, int in_size, char *out, int *out_size); -//void cram_decoder_free(void *codes); - -//#define GET_BIT_MSB(b,v) (void)(v<<=1, v|=(b->data[b->byte] >> b->bit)&1, (--b->bit == -1) && (b->bit = 7, b->byte++)) - -#define GET_BIT_MSB(b,v) (void)(v<<=1, v|=(b->data[b->byte] >> b->bit)&1, b->byte += (--b->bit<0), b->bit&=7) - -/* - * Check that enough bits are left in a block to satisy a bit-based decoder. - * Return 0 if there are enough - * 1 if not. - */ - -static inline int cram_not_enough_bits(cram_block *blk, int nbits) { - if (nbits < 0 || - (blk->byte >= blk->uncomp_size && nbits > 0) || - (blk->uncomp_size - blk->byte <= INT32_MAX / 8 + 1 && - (blk->uncomp_size - blk->byte) * 8 + blk->bit - 7 < nbits)) { - return 1; - } - return 0; -} - -/* - * Returns the content_id used by this codec, also in id2 if byte_array_len. - * Returns -1 for the CORE block and -2 for unneeded. - * id2 is only filled out for BYTE_ARRAY_LEN which uses 2 codecs. - */ -int cram_codec_to_id(cram_codec *c, int *id2); - -/* - * cram_codec structures are specialised for decoding or encoding. - * Unfortunately this makes turning a decoder into an encoder (such as - * when transcoding files) problematic. - * - * This function converts a cram decoder codec into an encoder version - * in-place (ie it modifiers the codec itself). - * - * Returns 0 on success; - * -1 on failure. - */ -int cram_codec_decoder2encoder(cram_fd *fd, cram_codec *c); - -#ifdef __cplusplus -} -#endif - -#endif /* CRAM_CODECS_H */ diff --git a/src/htslib-1.19.1/cram/cram_decode.c b/src/htslib-1.19.1/cram/cram_decode.c deleted file mode 100644 index 86e2ef9..0000000 --- a/src/htslib-1.19.1/cram/cram_decode.c +++ /dev/null @@ -1,3587 +0,0 @@ -/* -Copyright (c) 2012-2020, 2022-2023 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* - * - In-memory decoding of CRAM data structures. - * - Iterator for reading CRAM record by record. - */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cram.h" -#include "os.h" -#include "../htslib/hts.h" - -//Whether CIGAR has just M or uses = and X to indicate match and mismatch -//#define USE_X - -/* ---------------------------------------------------------------------- - * CRAM compression headers - */ - -/* - * Decodes the Tag Dictionary record in the preservation map - * Updates the cram compression header. - * - * Returns number of bytes decoded on success - * -1 on failure - */ -int cram_decode_TD(cram_fd *fd, char *cp, const char *endp, - cram_block_compression_hdr *h) { - char *op = cp; - unsigned char *dat; - cram_block *b; - int32_t blk_size = 0; - int nTL, i, sz, err = 0; - - if (!(b = cram_new_block(0, 0))) - return -1; - - if (h->TD_blk || h->TL) { - hts_log_warning("More than one TD block found in compression header"); - cram_free_block(h->TD_blk); - free(h->TL); - h->TD_blk = NULL; - h->TL = NULL; - } - - /* Decode */ - blk_size = fd->vv.varint_get32(&cp, endp, &err); - if (!blk_size) { - h->nTL = 0; - cram_free_block(b); - return cp - op; - } - - if (err || blk_size < 0 || endp - cp < blk_size) { - cram_free_block(b); - return -1; - } - - BLOCK_APPEND(b, cp, blk_size); - cp += blk_size; - sz = cp - op; - // Force nul termination if missing - if (BLOCK_DATA(b)[BLOCK_SIZE(b)-1]) - BLOCK_APPEND_CHAR(b, '\0'); - - /* Set up TL lookup table */ - dat = BLOCK_DATA(b); - - // Count - for (nTL = i = 0; i < BLOCK_SIZE(b); i++) { - nTL++; - while (dat[i]) - i++; - } - - // Copy - if (!(h->TL = calloc(nTL, sizeof(*h->TL)))) { - cram_free_block(b); - return -1; - } - for (nTL = i = 0; i < BLOCK_SIZE(b); i++) { - h->TL[nTL++] = &dat[i]; - while (dat[i]) - i++; - } - h->TD_blk = b; - h->nTL = nTL; - - return sz; - - block_err: - cram_free_block(b); - return -1; -} - -/* - * Decodes a CRAM block compression header. - * Returns header ptr on success - * NULL on failure - */ -cram_block_compression_hdr *cram_decode_compression_header(cram_fd *fd, - cram_block *b) { - char *cp, *endp, *cp_copy; - cram_block_compression_hdr *hdr = calloc(1, sizeof(*hdr)); - int i, err = 0; - int32_t map_size = 0, map_count = 0; - - if (!hdr) - return NULL; - - if (b->method != RAW) { - if (cram_uncompress_block(b)) { - free(hdr); - return NULL; - } - } - - cp = (char *)b->data; - endp = cp + b->uncomp_size; - - if (CRAM_MAJOR_VERS(fd->version) == 1) { - hdr->ref_seq_id = fd->vv.varint_get32(&cp, endp, &err); - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - hdr->ref_seq_start = fd->vv.varint_get64(&cp, endp, &err); - hdr->ref_seq_span = fd->vv.varint_get64(&cp, endp, &err); - } else { - hdr->ref_seq_start = fd->vv.varint_get32(&cp, endp, &err); - hdr->ref_seq_span = fd->vv.varint_get32(&cp, endp, &err); - } - hdr->num_records = fd->vv.varint_get32(&cp, endp, &err); - hdr->num_landmarks = fd->vv.varint_get32(&cp, endp, &err); - if (hdr->num_landmarks < 0 || - hdr->num_landmarks >= SIZE_MAX / sizeof(int32_t) || - endp - cp < hdr->num_landmarks) { - free(hdr); - return NULL; - } - if (!(hdr->landmark = malloc(hdr->num_landmarks * sizeof(int32_t)))) { - free(hdr); - return NULL; - } - for (i = 0; i < hdr->num_landmarks; i++) - hdr->landmark[i] = fd->vv.varint_get32(&cp, endp, &err);; - } - - hdr->preservation_map = kh_init(map); - - memset(hdr->rec_encoding_map, 0, - CRAM_MAP_HASH * sizeof(hdr->rec_encoding_map[0])); - memset(hdr->tag_encoding_map, 0, - CRAM_MAP_HASH * sizeof(hdr->tag_encoding_map[0])); - - if (!hdr->preservation_map) { - cram_free_compression_header(hdr); - return NULL; - } - - /* Initialise defaults for preservation map */ - hdr->read_names_included = 0; - hdr->AP_delta = 1; - hdr->qs_seq_orient = 1; - memcpy(hdr->substitution_matrix, "CGTNAGTNACTNACGNACGT", 20); - - /* Preservation map */ - map_size = fd->vv.varint_get32(&cp, endp, &err); cp_copy = cp; - map_count = fd->vv.varint_get32(&cp, endp, &err); - for (i = 0; i < map_count; i++) { - pmap_t hd; - khint_t k; - int r; - - if (endp - cp < 3) { - cram_free_compression_header(hdr); - return NULL; - } - cp += 2; - switch(CRAM_KEY(cp[-2],cp[-1])) { - case CRAM_KEY('M','I'): // was mapped QS included in V1.0 - case CRAM_KEY('U','I'): // was unmapped QS included in V1.0 - case CRAM_KEY('P','I'): // was unmapped placed in V1.0 - hd.i = *cp++; - break; - - case CRAM_KEY('R','N'): - hd.i = *cp++; - k = kh_put(map, hdr->preservation_map, "RN", &r); - if (-1 == r) { - cram_free_compression_header(hdr); - return NULL; - } - - kh_val(hdr->preservation_map, k) = hd; - hdr->read_names_included = hd.i; - break; - - case CRAM_KEY('A','P'): - hd.i = *cp++; - k = kh_put(map, hdr->preservation_map, "AP", &r); - if (-1 == r) { - cram_free_compression_header(hdr); - return NULL; - } - - kh_val(hdr->preservation_map, k) = hd; - hdr->AP_delta = hd.i; - break; - - case CRAM_KEY('R','R'): - hd.i = *cp++; - k = kh_put(map, hdr->preservation_map, "RR", &r); - if (-1 == r) { - cram_free_compression_header(hdr); - return NULL; - } - - kh_val(hdr->preservation_map, k) = hd; - hdr->no_ref = !hd.i; - break; - - case CRAM_KEY('Q','O'): - hd.i = *cp++; - k = kh_put(map, hdr->preservation_map, "QO", &r); - if (-1 == r) { - cram_free_compression_header(hdr); - return NULL; - } - - kh_val(hdr->preservation_map, k) = hd; - hdr->qs_seq_orient = hd.i; - break; - - case CRAM_KEY('S','M'): - if (endp - cp < 5) { - cram_free_compression_header(hdr); - return NULL; - } - hdr->substitution_matrix[0][(cp[0]>>6)&3] = 'C'; - hdr->substitution_matrix[0][(cp[0]>>4)&3] = 'G'; - hdr->substitution_matrix[0][(cp[0]>>2)&3] = 'T'; - hdr->substitution_matrix[0][(cp[0]>>0)&3] = 'N'; - - hdr->substitution_matrix[1][(cp[1]>>6)&3] = 'A'; - hdr->substitution_matrix[1][(cp[1]>>4)&3] = 'G'; - hdr->substitution_matrix[1][(cp[1]>>2)&3] = 'T'; - hdr->substitution_matrix[1][(cp[1]>>0)&3] = 'N'; - - hdr->substitution_matrix[2][(cp[2]>>6)&3] = 'A'; - hdr->substitution_matrix[2][(cp[2]>>4)&3] = 'C'; - hdr->substitution_matrix[2][(cp[2]>>2)&3] = 'T'; - hdr->substitution_matrix[2][(cp[2]>>0)&3] = 'N'; - - hdr->substitution_matrix[3][(cp[3]>>6)&3] = 'A'; - hdr->substitution_matrix[3][(cp[3]>>4)&3] = 'C'; - hdr->substitution_matrix[3][(cp[3]>>2)&3] = 'G'; - hdr->substitution_matrix[3][(cp[3]>>0)&3] = 'N'; - - hdr->substitution_matrix[4][(cp[4]>>6)&3] = 'A'; - hdr->substitution_matrix[4][(cp[4]>>4)&3] = 'C'; - hdr->substitution_matrix[4][(cp[4]>>2)&3] = 'G'; - hdr->substitution_matrix[4][(cp[4]>>0)&3] = 'T'; - - hd.p = cp; - cp += 5; - - k = kh_put(map, hdr->preservation_map, "SM", &r); - if (-1 == r) { - cram_free_compression_header(hdr); - return NULL; - } - kh_val(hdr->preservation_map, k) = hd; - break; - - case CRAM_KEY('T','D'): { - int sz = cram_decode_TD(fd, cp, endp, hdr); // tag dictionary - if (sz < 0) { - cram_free_compression_header(hdr); - return NULL; - } - - hd.p = cp; - cp += sz; - - k = kh_put(map, hdr->preservation_map, "TD", &r); - if (-1 == r) { - cram_free_compression_header(hdr); - return NULL; - } - kh_val(hdr->preservation_map, k) = hd; - break; - } - - default: - hts_log_warning("Unrecognised preservation map key %c%c", cp[-2], cp[-1]); - // guess byte; - cp++; - break; - } - } - if (cp - cp_copy != map_size) { - cram_free_compression_header(hdr); - return NULL; - } - - /* Record encoding map */ - map_size = fd->vv.varint_get32(&cp, endp, &err); cp_copy = cp; - map_count = fd->vv.varint_get32(&cp, endp, &err); - int is_v4 = CRAM_MAJOR_VERS(fd->version) >= 4 ? 1 : 0; - for (i = 0; i < map_count; i++) { - char *key = cp; - int32_t encoding = E_NULL; - int32_t size = 0; - ptrdiff_t offset; - cram_map *m; - enum cram_DS_ID ds_id; - enum cram_external_type type; - - if (endp - cp < 4) { - cram_free_compression_header(hdr); - return NULL; - } - - cp += 2; - encoding = fd->vv.varint_get32(&cp, endp, &err); - size = fd->vv.varint_get32(&cp, endp, &err); - - offset = cp - (char *)b->data; - - if (encoding == E_NULL) - continue; - - if (size < 0 || endp - cp < size) { - cram_free_compression_header(hdr); - return NULL; - } - - //printf("%s codes for %.2s\n", cram_encoding2str(encoding), key); - - /* - * For CRAM1.0 CF and BF are Byte and not Int. - * Practically speaking it makes no difference unless we have a - * 1.0 format file that stores these in EXTERNAL as only then - * does Byte vs Int matter. - * - * Neither this C code nor Java reference implementations did this, - * so we gloss over it and treat them as int. - */ - ds_id = DS_CORE; - if (key[0] == 'B' && key[1] == 'F') { - ds_id = DS_BF; type = E_INT; - } else if (key[0] == 'C' && key[1] == 'F') { - ds_id = DS_CF; type = E_INT; - } else if (key[0] == 'R' && key[1] == 'I') { - ds_id = DS_RI; type = E_INT; - } else if (key[0] == 'R' && key[1] == 'L') { - ds_id = DS_RL; type = E_INT; - } else if (key[0] == 'A' && key[1] == 'P') { - ds_id = DS_AP; - type = is_v4 ? E_SLONG : E_INT; - } else if (key[0] == 'R' && key[1] == 'G') { - ds_id = DS_RG; - type = E_INT; - } else if (key[0] == 'M' && key[1] == 'F') { - ds_id = DS_MF; type = E_INT; - } else if (key[0] == 'N' && key[1] == 'S') { - ds_id = DS_NS; type = E_INT; - } else if (key[0] == 'N' && key[1] == 'P') { - ds_id = DS_NP; - type = is_v4 ? E_LONG : E_INT; - } else if (key[0] == 'T' && key[1] == 'S') { - ds_id = DS_TS; - type = is_v4 ? E_SLONG : E_INT; - } else if (key[0] == 'N' && key[1] == 'F') { - ds_id = DS_NF; type = E_INT; - } else if (key[0] == 'T' && key[1] == 'C') { - ds_id = DS_TC; type = E_BYTE; - } else if (key[0] == 'T' && key[1] == 'N') { - ds_id = DS_TN; type = E_INT; - } else if (key[0] == 'F' && key[1] == 'N') { - ds_id = DS_FN; type = E_INT; - } else if (key[0] == 'F' && key[1] == 'C') { - ds_id = DS_FC; type = E_BYTE; - } else if (key[0] == 'F' && key[1] == 'P') { - ds_id = DS_FP; type = E_INT; - } else if (key[0] == 'B' && key[1] == 'S') { - ds_id = DS_BS; type = E_BYTE; - } else if (key[0] == 'I' && key[1] == 'N') { - ds_id = DS_IN; type = E_BYTE_ARRAY; - } else if (key[0] == 'S' && key[1] == 'C') { - ds_id = DS_SC; type = E_BYTE_ARRAY; - } else if (key[0] == 'D' && key[1] == 'L') { - ds_id = DS_DL; type = E_INT; - } else if (key[0] == 'B' && key[1] == 'A') { - ds_id = DS_BA; type = E_BYTE; - } else if (key[0] == 'B' && key[1] == 'B') { - ds_id = DS_BB; type = E_BYTE_ARRAY; - } else if (key[0] == 'R' && key[1] == 'S') { - ds_id = DS_RS; type = E_INT; - } else if (key[0] == 'P' && key[1] == 'D') { - ds_id = DS_PD; type = E_INT; - } else if (key[0] == 'H' && key[1] == 'C') { - ds_id = DS_HC; type = E_INT; - } else if (key[0] == 'M' && key[1] == 'Q') { - ds_id = DS_MQ; type = E_INT; - } else if (key[0] == 'R' && key[1] == 'N') { - ds_id = DS_RN; type = E_BYTE_ARRAY_BLOCK; - } else if (key[0] == 'Q' && key[1] == 'S') { - ds_id = DS_QS; type = E_BYTE; - } else if (key[0] == 'Q' && key[1] == 'Q') { - ds_id = DS_QQ; type = E_BYTE_ARRAY; - } else if (key[0] == 'T' && key[1] == 'L') { - ds_id = DS_TL; type = E_INT; - } else if (key[0] == 'T' && key[1] == 'M') { - } else if (key[0] == 'T' && key[1] == 'V') { - } else { - hts_log_warning("Unrecognised key: %.2s", key); - } - - if (ds_id != DS_CORE) { - if (hdr->codecs[ds_id] != NULL) { - hts_log_warning("Codec for key %.2s defined more than once", - key); - hdr->codecs[ds_id]->free(hdr->codecs[ds_id]); - } - hdr->codecs[ds_id] = cram_decoder_init(hdr, encoding, cp, size, - type, fd->version, &fd->vv); - if (!hdr->codecs[ds_id]) { - cram_free_compression_header(hdr); - return NULL; - } - } - - cp += size; - - // Fill out cram_map purely for cram_dump to dump out. - m = malloc(sizeof(*m)); - if (!m) { - cram_free_compression_header(hdr); - return NULL; - } - m->key = CRAM_KEY(key[0], key[1]); - m->encoding = encoding; - m->size = size; - m->offset = offset; - m->codec = NULL; - - m->next = hdr->rec_encoding_map[CRAM_MAP(key[0], key[1])]; - hdr->rec_encoding_map[CRAM_MAP(key[0], key[1])] = m; - } - if (cp - cp_copy != map_size) { - cram_free_compression_header(hdr); - return NULL; - } - - /* Tag encoding map */ - map_size = fd->vv.varint_get32(&cp, endp, &err); cp_copy = cp; - map_count = fd->vv.varint_get32(&cp, endp, &err); - for (i = 0; i < map_count; i++) { - int32_t encoding = E_NULL; - int32_t size = 0; - cram_map *m = malloc(sizeof(*m)); // FIXME: use pooled_alloc - uint8_t key[3]; - - if (!m || endp - cp < 6) { - free(m); - cram_free_compression_header(hdr); - return NULL; - } - - m->key = fd->vv.varint_get32(&cp, endp, &err); - key[0] = m->key>>16; - key[1] = m->key>>8; - key[2] = m->key; - encoding = fd->vv.varint_get32(&cp, endp, &err); - size = fd->vv.varint_get32(&cp, endp, &err); - - m->encoding = encoding; - m->size = size; - m->offset = cp - (char *)b->data; - if (size < 0 || endp - cp < size || - !(m->codec = cram_decoder_init(hdr, encoding, cp, size, - E_BYTE_ARRAY_BLOCK, fd->version, &fd->vv))) { - cram_free_compression_header(hdr); - free(m); - return NULL; - } - - cp += size; - - m->next = hdr->tag_encoding_map[CRAM_MAP(key[0],key[1])]; - hdr->tag_encoding_map[CRAM_MAP(key[0],key[1])] = m; - } - if (err || cp - cp_copy != map_size) { - cram_free_compression_header(hdr); - return NULL; - } - - return hdr; -} - -/* - * Note we also need to scan through the record encoding map to - * see which data series share the same block, either external or - * CORE. For example if we need the BF data series but MQ and CF - * are also encoded in the same block then we need to add those in - * as a dependency in order to correctly decode BF. - * - * Returns 0 on success - * -1 on failure - */ -int cram_dependent_data_series(cram_fd *fd, - cram_block_compression_hdr *hdr, - cram_slice *s) { - int *block_used; - int core_used = 0; - int i; - static int i_to_id[] = { - DS_BF, DS_AP, DS_FP, DS_RL, DS_DL, DS_NF, DS_BA, DS_QS, - DS_FC, DS_FN, DS_BS, DS_IN, DS_RG, DS_MQ, DS_TL, DS_RN, - DS_NS, DS_NP, DS_TS, DS_MF, DS_CF, DS_RI, DS_RS, DS_PD, - DS_HC, DS_SC, DS_BB, DS_QQ, - }; - uint32_t orig_ds; - - /* - * Set the data_series bit field based on fd->required_fields - * contents. - */ - if (fd->required_fields && fd->required_fields != INT_MAX) { - s->data_series = 0; - - if (fd->required_fields & SAM_QNAME) - s->data_series |= CRAM_RN; - - if (fd->required_fields & SAM_FLAG) - s->data_series |= CRAM_BF; - - if (fd->required_fields & SAM_RNAME) - s->data_series |= CRAM_RI | CRAM_BF; - - if (fd->required_fields & SAM_POS) - s->data_series |= CRAM_AP | CRAM_BF; - - if (fd->required_fields & SAM_MAPQ) - s->data_series |= CRAM_MQ; - - if (fd->required_fields & SAM_CIGAR) - s->data_series |= CRAM_CIGAR; - - if (fd->required_fields & SAM_RNEXT) - s->data_series |= CRAM_CF | CRAM_NF | CRAM_RI | CRAM_NS |CRAM_BF; - - if (fd->required_fields & SAM_PNEXT) - s->data_series |= CRAM_CF | CRAM_NF | CRAM_AP | CRAM_NP | CRAM_BF; - - if (fd->required_fields & SAM_TLEN) - s->data_series |= CRAM_CF | CRAM_NF | CRAM_AP | CRAM_TS | - CRAM_BF | CRAM_MF | CRAM_RI | CRAM_CIGAR; - - if (fd->required_fields & SAM_SEQ) - s->data_series |= CRAM_SEQ; - - if (!(fd->required_fields & SAM_AUX)) - // No easy way to get MD/NM without other tags at present - s->decode_md = 0; - - if (fd->required_fields & SAM_QUAL) - s->data_series |= CRAM_QUAL; - - if (fd->required_fields & SAM_AUX) - s->data_series |= CRAM_RG | CRAM_TL | CRAM_aux; - - if (fd->required_fields & SAM_RGAUX) - s->data_series |= CRAM_RG | CRAM_BF; - - // Always uncompress CORE block - if (cram_uncompress_block(s->block[0])) - return -1; - } else { - s->data_series = CRAM_ALL; - - for (i = 0; i < s->hdr->num_blocks; i++) { - if (cram_uncompress_block(s->block[i])) - return -1; - } - - return 0; - } - - block_used = calloc(s->hdr->num_blocks+1, sizeof(int)); - if (!block_used) - return -1; - - do { - /* - * Also set data_series based on code prerequisites. Eg if we need - * CRAM_QS then we also need to know CRAM_RL so we know how long it - * is, or if we need FC/FP then we also need FN (number of features). - * - * It's not reciprocal though. We may be needing to decode FN - * but have no need to decode FC, FP and cigar ops. - */ - if (s->data_series & CRAM_RS) s->data_series |= CRAM_FC|CRAM_FP; - if (s->data_series & CRAM_PD) s->data_series |= CRAM_FC|CRAM_FP; - if (s->data_series & CRAM_HC) s->data_series |= CRAM_FC|CRAM_FP; - if (s->data_series & CRAM_QS) s->data_series |= CRAM_FC|CRAM_FP; - if (s->data_series & CRAM_IN) s->data_series |= CRAM_FC|CRAM_FP; - if (s->data_series & CRAM_SC) s->data_series |= CRAM_FC|CRAM_FP; - if (s->data_series & CRAM_BS) s->data_series |= CRAM_FC|CRAM_FP; - if (s->data_series & CRAM_DL) s->data_series |= CRAM_FC|CRAM_FP; - if (s->data_series & CRAM_BA) s->data_series |= CRAM_FC|CRAM_FP; - if (s->data_series & CRAM_BB) s->data_series |= CRAM_FC|CRAM_FP; - if (s->data_series & CRAM_QQ) s->data_series |= CRAM_FC|CRAM_FP; - - // cram_decode_seq() needs seq[] array - if (s->data_series & (CRAM_SEQ|CRAM_CIGAR)) s->data_series |= CRAM_RL; - - if (s->data_series & CRAM_FP) s->data_series |= CRAM_FC; - if (s->data_series & CRAM_FC) s->data_series |= CRAM_FN; - if (s->data_series & CRAM_aux) s->data_series |= CRAM_TL; - if (s->data_series & CRAM_MF) s->data_series |= CRAM_CF; - if (s->data_series & CRAM_MQ) s->data_series |= CRAM_BF; - if (s->data_series & CRAM_BS) s->data_series |= CRAM_RI; - if (s->data_series & (CRAM_MF |CRAM_NS |CRAM_NP |CRAM_TS |CRAM_NF)) - s->data_series |= CRAM_CF; - if (!hdr->read_names_included && s->data_series & CRAM_RN) - s->data_series |= CRAM_CF | CRAM_NF; - if (s->data_series & (CRAM_BA | CRAM_QS | CRAM_BB | CRAM_QQ)) - s->data_series |= CRAM_BF | CRAM_CF | CRAM_RL; - if (s->data_series & CRAM_FN) { - // The CRAM_FN loop checks for reference length boundaries, - // which needs a working seq_pos. Some fields are fixed size - // irrespective of if we decode (BS), but others need to know - // the size of the string fetched back (SC, IN, BB). - s->data_series |= CRAM_SC | CRAM_IN | CRAM_BB; - } - - orig_ds = s->data_series; - - // Find which blocks are in use. - for (i = 0; i < sizeof(i_to_id)/sizeof(*i_to_id); i++) { - int bnum1, bnum2, j; - cram_codec *c = hdr->codecs[i_to_id[i]]; - - if (!(s->data_series & (1<hdr->num_blocks; j++) { - if (s->block[j]->content_type == EXTERNAL && - s->block[j]->content_id == bnum1) { - block_used[j] = 1; - if (cram_uncompress_block(s->block[j])) { - free(block_used); - return -1; - } - } - } - break; - } - - if (bnum2 == -2 || bnum1 == bnum2) - break; - - bnum1 = bnum2; // 2nd pass - } - } - - // Tags too - if ((fd->required_fields & SAM_AUX) || - (s->data_series & CRAM_aux)) { - for (i = 0; i < CRAM_MAP_HASH; i++) { - int bnum1, bnum2, j; - cram_map *m = hdr->tag_encoding_map[i]; - - while (m) { - cram_codec *c = m->codec; - if (!c) { - m = m->next; - continue; - } - - bnum1 = cram_codec_to_id(c, &bnum2); - - for (;;) { - switch (bnum1) { - case -2: - break; - - case -1: - core_used = 1; - break; - - default: - for (j = 0; j < s->hdr->num_blocks; j++) { - if (s->block[j]->content_type == EXTERNAL && - s->block[j]->content_id == bnum1) { - block_used[j] = 1; - if (cram_uncompress_block(s->block[j])) { - free(block_used); - return -1; - } - } - } - break; - } - - if (bnum2 == -2 || bnum1 == bnum2) - break; - - bnum1 = bnum2; // 2nd pass - } - - m = m->next; - } - } - } - - // We now know which blocks are in used, so repeat and find - // which other data series need to be added. - for (i = 0; i < sizeof(i_to_id)/sizeof(*i_to_id); i++) { - int bnum1, bnum2, j; - cram_codec *c = hdr->codecs[i_to_id[i]]; - - if (!c) - continue; - - bnum1 = cram_codec_to_id(c, &bnum2); - - for (;;) { - switch (bnum1) { - case -2: - break; - - case -1: - if (core_used) { - //printf(" + data series %08x:\n", 1<data_series |= 1<hdr->num_blocks; j++) { - if (s->block[j]->content_type == EXTERNAL && - s->block[j]->content_id == bnum1) { - if (block_used[j]) { - //printf(" + data series %08x:\n", 1<data_series |= 1<tag_encoding_map[i]; - - while (m) { - cram_codec *c = m->codec; - if (!c) { - m = m->next; - continue; - } - - bnum1 = cram_codec_to_id(c, &bnum2); - - for (;;) { - switch (bnum1) { - case -2: - break; - - case -1: - //printf(" + data series %08x:\n", CRAM_aux); - s->data_series |= CRAM_aux; - break; - - default: - for (j = 0; j < s->hdr->num_blocks; j++) { - if (s->block[j]->content_type == EXTERNAL && - s->block[j]->content_id == bnum1) { - if (block_used[j]) { - //printf(" + data series %08x:\n", - // CRAM_aux); - s->data_series |= CRAM_aux; - } - } - } - break; - } - - if (bnum2 == -2 || bnum1 == bnum2) - break; - - bnum1 = bnum2; // 2nd pass - } - - m = m->next; - } - } - } while (orig_ds != s->data_series); - - free(block_used); - return 0; -} - -/* - * Checks whether an external block is used solely by a single data series. - * Returns the codec type if so (EXTERNAL, BYTE_ARRAY_LEN, BYTE_ARRAY_STOP) - * or 0 if not (E_NULL). - */ -static int cram_ds_unique(cram_block_compression_hdr *hdr, cram_codec *c, - int id) { - int i, n_id = 0; - enum cram_encoding e_type = 0; - - for (i = 0; i < DS_END; i++) { - cram_codec *c; - int bnum1, bnum2, old_n_id; - - if (!(c = hdr->codecs[i])) - continue; - - bnum1 = cram_codec_to_id(c, &bnum2); - - old_n_id = n_id; - if (bnum1 == id) { - n_id++; - e_type = c->codec; - } - if (bnum2 == id) { - n_id++; - e_type = c->codec; - } - - if (n_id == old_n_id+2) - n_id--; // len/val in same place counts once only. - } - - return n_id == 1 ? e_type : 0; -} - -/* - * Attempts to estimate the size of some blocks so we can preallocate them - * before decoding. Although decoding will automatically grow the blocks, - * it is typically more efficient to preallocate. - */ -void cram_decode_estimate_sizes(cram_block_compression_hdr *hdr, cram_slice *s, - int *qual_size, int *name_size, - int *q_id) { - int bnum1, bnum2; - cram_codec *cd; - - *qual_size = 0; - *name_size = 0; - - /* Qual */ - cd = hdr->codecs[DS_QS]; - if (cd == NULL) return; - bnum1 = cram_codec_to_id(cd, &bnum2); - if (bnum1 < 0 && bnum2 >= 0) bnum1 = bnum2; - if (cram_ds_unique(hdr, cd, bnum1)) { - cram_block *b = cram_get_block_by_id(s, bnum1); - if (b) *qual_size = b->uncomp_size; - if (q_id && cd->codec == E_EXTERNAL) - *q_id = bnum1; - } - - /* Name */ - cd = hdr->codecs[DS_RN]; - if (cd == NULL) return; - bnum1 = cram_codec_to_id(cd, &bnum2); - if (bnum1 < 0 && bnum2 >= 0) bnum1 = bnum2; - if (cram_ds_unique(hdr, cd, bnum1)) { - cram_block *b = cram_get_block_by_id(s, bnum1); - if (b) *name_size = b->uncomp_size; - } -} - - -/* ---------------------------------------------------------------------- - * CRAM slices - */ - -/* - * Decodes a CRAM (un)mapped slice header block. - * Returns slice header ptr on success - * NULL on failure - */ -cram_block_slice_hdr *cram_decode_slice_header(cram_fd *fd, cram_block *b) { - cram_block_slice_hdr *hdr; - unsigned char *cp; - unsigned char *cp_end; - int i, err = 0; - - if (b->method != RAW) { - /* Spec. says slice header should be RAW, but we can future-proof - by trying to decode it if it isn't. */ - if (cram_uncompress_block(b) < 0) - return NULL; - } - cp = (unsigned char *)BLOCK_DATA(b); - cp_end = cp + b->uncomp_size; - - if (b->content_type != MAPPED_SLICE && - b->content_type != UNMAPPED_SLICE) - return NULL; - - if (!(hdr = calloc(1, sizeof(*hdr)))) - return NULL; - - hdr->content_type = b->content_type; - - if (b->content_type == MAPPED_SLICE) { - hdr->ref_seq_id = fd->vv.varint_get32s((char **)&cp, (char *)cp_end, &err); - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - hdr->ref_seq_start = fd->vv.varint_get64((char **)&cp, (char *)cp_end, &err); - hdr->ref_seq_span = fd->vv.varint_get64((char **)&cp, (char *)cp_end, &err); - } else { - hdr->ref_seq_start = fd->vv.varint_get32((char **)&cp, (char *)cp_end, &err); - hdr->ref_seq_span = fd->vv.varint_get32((char **)&cp, (char *)cp_end, &err); - } - if (hdr->ref_seq_start < 0 || hdr->ref_seq_span < 0) { - free(hdr); - hts_log_error("Negative values not permitted for header " - "sequence start or span fields"); - return NULL; - } - } - hdr->num_records = fd->vv.varint_get32((char **)&cp, (char *) cp_end, &err); - hdr->record_counter = 0; - if (CRAM_MAJOR_VERS(fd->version) == 2) { - hdr->record_counter = fd->vv.varint_get32((char **)&cp, (char *)cp_end, &err); - } else if (CRAM_MAJOR_VERS(fd->version) >= 3) { - hdr->record_counter = fd->vv.varint_get64((char **)&cp, (char *)cp_end, &err); - } - hdr->num_blocks = fd->vv.varint_get32((char **)&cp, (char *)cp_end, &err); - hdr->num_content_ids = fd->vv.varint_get32((char **)&cp, (char *)cp_end, &err); - if (hdr->num_content_ids < 1 || - hdr->num_content_ids >= 10000) { - // Slice must have at least one data block, and there is no need - // for more than 2 per possible aux-tag plus ancillary. - free(hdr); - return NULL; - } - hdr->block_content_ids = malloc(hdr->num_content_ids * sizeof(int32_t)); - if (!hdr->block_content_ids) { - free(hdr); - return NULL; - } - - for (i = 0; i < hdr->num_content_ids; i++) - hdr->block_content_ids[i] = fd->vv.varint_get32((char **)&cp, - (char *)cp_end, - &err); - if (err) { - free(hdr->block_content_ids); - free(hdr); - return NULL; - } - - if (b->content_type == MAPPED_SLICE) - hdr->ref_base_id = fd->vv.varint_get32((char **)&cp, (char *) cp_end, &err); - - if (CRAM_MAJOR_VERS(fd->version) != 1) { - if (cp_end - cp < 16) { - free(hdr->block_content_ids); - free(hdr); - return NULL; - } - memcpy(hdr->md5, cp, 16); - } else { - memset(hdr->md5, 0, 16); - } - - if (!err) - return hdr; - - free(hdr->block_content_ids); - free(hdr); - return NULL; -} - - -#if 0 -/* Returns the number of bits set in val; it the highest bit used */ -static int nbits(int v) { - static const int MultiplyDeBruijnBitPosition[32] = { - 1, 10, 2, 11, 14, 22, 3, 30, 12, 15, 17, 19, 23, 26, 4, 31, - 9, 13, 21, 29, 16, 18, 25, 8, 20, 28, 24, 7, 27, 6, 5, 32 - }; - - v |= v >> 1; // first up to set all bits 1 after the first 1 */ - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - - // DeBruijn magic to find top bit - return MultiplyDeBruijnBitPosition[(uint32_t)(v * 0x07C4ACDDU) >> 27]; -} -#endif - -#if 0 -static int sort_freqs(const void *vp1, const void *vp2) { - const int i1 = *(const int *)vp1; - const int i2 = *(const int *)vp2; - return i1-i2; -} -#endif - -/* ---------------------------------------------------------------------- - * Primary CRAM sequence decoder - */ - -static inline int add_md_char(cram_slice *s, int decode_md, char c, int32_t *md_dist) { - if (decode_md) { - BLOCK_APPEND_UINT(s->aux_blk, *md_dist); - BLOCK_APPEND_CHAR(s->aux_blk, c); - *md_dist = 0; - } - return 0; - - block_err: - return -1; -} - -/* - * Internal part of cram_decode_slice(). - * Generates the sequence, quality and cigar components. - */ -static int cram_decode_seq(cram_fd *fd, cram_container *c, cram_slice *s, - cram_block *blk, cram_record *cr, sam_hdr_t *sh, - int cf, char *seq, char *qual, - int has_MD, int has_NM) { - int prev_pos = 0, f, r = 0, out_sz = 1; - int seq_pos = 1; - int cig_len = 0; - int64_t ref_pos = cr->apos; - int32_t fn, i32; - enum cigar_op cig_op = BAM_CMATCH; - uint32_t *cigar = s->cigar; - uint32_t ncigar = s->ncigar; - uint32_t cigar_alloc = s->cigar_alloc; - uint32_t nm = 0; - int32_t md_dist = 0; - int orig_aux = 0; - // CRAM < 4.0 decode_md is off/on - // CRAM >= 4.0 decode_md is auto/on (auto=on if MD* present, off otherwise) - int do_md = CRAM_MAJOR_VERS(fd->version) >= 4 - ? (s->decode_md > 0) - : (s->decode_md != 0); - int decode_md = s->ref && cr->ref_id >= 0 && ((do_md && !has_MD) || has_MD < 0); - int decode_nm = s->ref && cr->ref_id >= 0 && ((do_md && !has_NM) || has_NM < 0); - uint32_t ds = s->data_series; - sam_hrecs_t *bfd = sh->hrecs; - - cram_codec **codecs = c->comp_hdr->codecs; - - if ((ds & CRAM_QS) && !(cf & CRAM_FLAG_PRESERVE_QUAL_SCORES)) { - memset(qual, 255, cr->len); - } - - if (cr->cram_flags & CRAM_FLAG_NO_SEQ) - decode_md = decode_nm = 0; - - if (decode_md) { - orig_aux = BLOCK_SIZE(s->aux_blk); - if (has_MD == 0) - BLOCK_APPEND(s->aux_blk, "MDZ", 3); - } - - if (ds & CRAM_FN) { - if (!codecs[DS_FN]) return -1; - r |= codecs[DS_FN]->decode(s,codecs[DS_FN], - blk, (char *)&fn, &out_sz); - if (r) return r; - } else { - fn = 0; - } - - ref_pos--; // count from 0 - cr->cigar = ncigar; - - if (!(ds & (CRAM_FC | CRAM_FP))) - goto skip_cigar; - - if (fn) { - if ((ds & CRAM_FC) && !codecs[DS_FC]) - return -1; - if ((ds & CRAM_FP) && !codecs[DS_FP]) - return -1; - } - - for (f = 0; f < fn; f++) { - int32_t pos = 0; - char op; - - if (ncigar+2 >= cigar_alloc) { - cigar_alloc = cigar_alloc ? cigar_alloc*2 : 1024; - if (!(cigar = realloc(s->cigar, cigar_alloc * sizeof(*cigar)))) - return -1; - s->cigar = cigar; - } - - if (ds & CRAM_FC) { - r |= codecs[DS_FC]->decode(s, - codecs[DS_FC], - blk, - &op, &out_sz); - if (r) return r; - } - - if (!(ds & CRAM_FP)) - continue; - - r |= codecs[DS_FP]->decode(s, - codecs[DS_FP], - blk, - (char *)&pos, &out_sz); - if (r) return r; - pos += prev_pos; - - if (pos <= 0) { - hts_log_error("Feature position %d before start of read", pos); - return -1; - } - - if (pos > seq_pos) { - if (pos > cr->len+1) - return -1; - - if (s->ref && cr->ref_id >= 0) { - if (ref_pos + pos - seq_pos > bfd->ref[cr->ref_id].len) { - static int whinged = 0; - int rlen; - if (!whinged) - hts_log_warning("Ref pos outside of ref sequence boundary"); - whinged = 1; - rlen = bfd->ref[cr->ref_id].len - ref_pos; - // May miss MD/NM cases where both seq/ref are N, but this is a - // malformed cram file anyway. - if (rlen > 0) { - if (ref_pos + rlen > s->ref_end) - goto beyond_slice; - - memcpy(&seq[seq_pos-1], - &s->ref[ref_pos - s->ref_start +1], rlen); - if ((pos - seq_pos) - rlen > 0) - memset(&seq[seq_pos-1+rlen], 'N', - (pos - seq_pos) - rlen); - } else { - memset(&seq[seq_pos-1], 'N', cr->len - seq_pos + 1); - } - if (md_dist >= 0) - md_dist += pos - seq_pos; - } else { - // 'N' in both ref and seq is also mismatch for NM/MD - if (ref_pos + pos-seq_pos > s->ref_end) - goto beyond_slice; - - const char *refp = s->ref + ref_pos - s->ref_start + 1; - const int frag_len = pos - seq_pos; - int do_cpy = 1; - if (decode_md || decode_nm) { - char *N = memchr(refp, 'N', frag_len); - if (N) { - int i; - for (i = 0; i < frag_len; i++) { - char base = refp[i]; - if (base == 'N') { - if (add_md_char(s, decode_md, - 'N', &md_dist) < 0) - return -1; - nm++; - } else { - md_dist++; - } - seq[seq_pos-1+i] = base; - } - do_cpy = 0; - } else { - md_dist += frag_len; - } - } - if (do_cpy) - memcpy(&seq[seq_pos-1], refp, frag_len); - } - } -#ifdef USE_X - if (cig_len && cig_op != BAM_CBASE_MATCH) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - cig_op = BAM_CBASE_MATCH; -#else - if (cig_len && cig_op != BAM_CMATCH) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - cig_op = BAM_CMATCH; -#endif - cig_len += pos - seq_pos; - ref_pos += pos - seq_pos; - seq_pos = pos; - } - - prev_pos = pos; - - if (!(ds & CRAM_FC)) - goto skip_cigar; - - switch(op) { - case 'S': { // soft clip: IN - int32_t out_sz2 = 1; - int have_sc = 0; - - if (cig_len) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - switch (CRAM_MAJOR_VERS(fd->version)) { - case 1: - if (ds & CRAM_IN) { - r |= codecs[DS_IN] - ? codecs[DS_IN]->decode(s, codecs[DS_IN], - blk, - cr->len ? &seq[pos-1] : NULL, - &out_sz2) - : (seq[pos-1] = 'N', out_sz2 = 1, 0); - have_sc = 1; - } - break; - case 2: - default: - if (ds & CRAM_SC) { - r |= codecs[DS_SC] - ? codecs[DS_SC]->decode(s, codecs[DS_SC], - blk, - cr->len ? &seq[pos-1] : NULL, - &out_sz2) - : (seq[pos-1] = 'N', out_sz2 = 1, 0); - have_sc = 1; - } - break; - - //default: - // r |= codecs[DS_BB] - // ? codecs[DS_BB]->decode(s, codecs[DS_BB], - // blk, &seq[pos-1], &out_sz2) - // : (seq[pos-1] = 'N', out_sz2 = 1, 0); - } - if (have_sc) { - if (r) return r; - cigar[ncigar++] = (out_sz2<<4) + BAM_CSOFT_CLIP; - cig_op = BAM_CSOFT_CLIP; - seq_pos += out_sz2; - } - break; - } - - case 'X': { // Substitution; BS - unsigned char base; -#ifdef USE_X - if (cig_len && cig_op != BAM_CBASE_MISMATCH) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - if (ds & CRAM_BS) { - if (!codecs[DS_BS]) return -1; - r |= codecs[DS_BS]->decode(s, codecs[DS_BS], blk, - (char *)&base, &out_sz); - if (pos-1 < cr->len) - seq[pos-1] = 'N'; // FIXME look up BS=base value - } - cig_op = BAM_CBASE_MISMATCH; -#else - int ref_base; - if (cig_len && cig_op != BAM_CMATCH) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - if (ds & CRAM_BS) { - if (!codecs[DS_BS]) return -1; - r |= codecs[DS_BS]->decode(s, codecs[DS_BS], blk, - (char *)&base, &out_sz); - if (r) return -1; - if (cr->ref_id < 0 || ref_pos >= bfd->ref[cr->ref_id].len || !s->ref) { - if (pos-1 < cr->len) - seq[pos-1] = c->comp_hdr-> - substitution_matrix[fd->L1['N']][base]; - if (decode_md || decode_nm) { - if (md_dist >= 0 && decode_md) - BLOCK_APPEND_UINT(s->aux_blk, md_dist); - md_dist = -1; - nm--; - } - } else { - unsigned char ref_call = ref_pos < s->ref_end - ? (uc)s->ref[ref_pos - s->ref_start +1] - : 'N'; - ref_base = fd->L1[ref_call]; - if (pos-1 < cr->len) - seq[pos-1] = c->comp_hdr-> - substitution_matrix[ref_base][base]; - if (add_md_char(s, decode_md, ref_call, &md_dist) < 0) - return -1; - } - } - cig_op = BAM_CMATCH; -#endif - nm++; - cig_len++; - seq_pos++; - ref_pos++; - break; - } - - case 'D': { // Deletion; DL - if (cig_len && cig_op != BAM_CDEL) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - if (ds & CRAM_DL) { - if (!codecs[DS_DL]) return -1; - r |= codecs[DS_DL]->decode(s, codecs[DS_DL], blk, - (char *)&i32, &out_sz); - if (r) return r; - if (decode_md || decode_nm) { - if (ref_pos + i32 > s->ref_end) - goto beyond_slice; - if (md_dist >= 0 && decode_md) - BLOCK_APPEND_UINT(s->aux_blk, md_dist); - if (ref_pos + i32 <= bfd->ref[cr->ref_id].len) { - if (decode_md) { - BLOCK_APPEND_CHAR(s->aux_blk, '^'); - BLOCK_APPEND(s->aux_blk, - &s->ref[ref_pos - s->ref_start +1], - i32); - md_dist = 0; - } - nm += i32; - } else { - uint32_t dlen; - if (bfd->ref[cr->ref_id].len >= ref_pos) { - if (decode_md) { - BLOCK_APPEND_CHAR(s->aux_blk, '^'); - BLOCK_APPEND(s->aux_blk, - &s->ref[ref_pos - s->ref_start+1], - bfd->ref[cr->ref_id].len-ref_pos); - BLOCK_APPEND_UINT(s->aux_blk, 0); - } - dlen = i32 - (bfd->ref[cr->ref_id].len - ref_pos); - nm += i32 - dlen; - } else { - dlen = i32; - } - - md_dist = -1; - } - } - cig_op = BAM_CDEL; - cig_len += i32; - ref_pos += i32; - //printf(" %d: DL = %d (ret %d)\n", f, i32, r); - } - break; - } - - case 'I': { // Insertion (several bases); IN - int32_t out_sz2 = 1; - - if (cig_len && cig_op != BAM_CINS) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - - if (ds & CRAM_IN) { - if (!codecs[DS_IN]) return -1; - r |= codecs[DS_IN]->decode(s, codecs[DS_IN], blk, - cr->len ? &seq[pos-1] : NULL, - &out_sz2); - if (r) return r; - cig_op = BAM_CINS; - cig_len += out_sz2; - seq_pos += out_sz2; - nm += out_sz2; - //printf(" %d: IN(I) = %.*s (ret %d, out_sz %d)\n", f, out_sz2, dat, r, out_sz2); - } - break; - } - - case 'i': { // Insertion (single base); BA - if (cig_len && cig_op != BAM_CINS) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - if (ds & CRAM_BA) { - if (!codecs[DS_BA]) return -1; - r |= codecs[DS_BA]->decode(s, codecs[DS_BA], blk, - cr->len ? &seq[pos-1] : NULL, - &out_sz); - if (r) return r; - } - cig_op = BAM_CINS; - cig_len++; - seq_pos++; - nm++; - break; - } - - case 'b': { // Several bases - int32_t len = 1; - - if (cig_len && cig_op != BAM_CMATCH) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - - if (ds & CRAM_BB) { - if (!codecs[DS_BB]) return -1; - r |= codecs[DS_BB]->decode(s, codecs[DS_BB], blk, - cr->len ? &seq[pos-1] : NULL, - &len); - if (r) return r; - - if (decode_md || decode_nm) { - int x; - if (md_dist >= 0 && decode_md) - BLOCK_APPEND_UINT(s->aux_blk, md_dist); - - for (x = 0; x < len; x++) { - if (x && decode_md) - BLOCK_APPEND_UINT(s->aux_blk, 0); - if (ref_pos+x >= bfd->ref[cr->ref_id].len || !s->ref) { - md_dist = -1; - break; - } else { - if (decode_md) { - if (ref_pos + x > s->ref_end) - goto beyond_slice; - char r = s->ref[ref_pos+x-s->ref_start +1]; - BLOCK_APPEND_CHAR(s->aux_blk, r); - } - } - } - - nm += x; - md_dist = 0; - } - } - - cig_op = BAM_CMATCH; - - cig_len+=len; - seq_pos+=len; - ref_pos+=len; - //prev_pos+=len; - break; - } - - case 'q': { // Several quality values - int32_t len = 1; - - if (cig_len && cig_op != BAM_CMATCH) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - - if (ds & CRAM_QQ) { - if (!codecs[DS_QQ]) return -1; - if ((ds & CRAM_QS) && !(cf & CRAM_FLAG_PRESERVE_QUAL_SCORES) - && (unsigned char)*qual == 255) - memset(qual, 30, cr->len); // ? - r |= codecs[DS_QQ]->decode(s, codecs[DS_QQ], blk, - (char *)&qual[pos-1], &len); - if (r) return r; - } - - cig_op = BAM_CMATCH; - - //prev_pos+=len; - break; - } - - case 'B': { // Read base; BA, QS -#ifdef USE_X - if (cig_len && cig_op != BAM_CBASE_MISMATCH) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } -#else - if (cig_len && cig_op != BAM_CMATCH) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } -#endif - if (ds & CRAM_BA) { - if (!codecs[DS_BA]) return -1; - r |= codecs[DS_BA]->decode(s, codecs[DS_BA], blk, - cr->len ? &seq[pos-1] : NULL, - &out_sz); - - if (decode_md || decode_nm) { - if (md_dist >= 0 && decode_md) - BLOCK_APPEND_UINT(s->aux_blk, md_dist); - if (ref_pos >= bfd->ref[cr->ref_id].len || !s->ref) { - md_dist = -1; - } else { - if (decode_md) { - if (ref_pos > s->ref_end) - goto beyond_slice; - BLOCK_APPEND_CHAR(s->aux_blk, - s->ref[ref_pos-s->ref_start +1]); - } - nm++; - md_dist = 0; - } - } - } - if (ds & CRAM_QS) { - if (!codecs[DS_QS]) return -1; - if (!(cf & CRAM_FLAG_PRESERVE_QUAL_SCORES) - && (unsigned char)*qual == 255) - memset(qual, 30, cr->len); // ASCII ?. Same as htsjdk - r |= codecs[DS_QS]->decode(s, codecs[DS_QS], blk, - (char *)&qual[pos-1], &out_sz); - } -#ifdef USE_X - cig_op = BAM_CBASE_MISMATCH; -#else - cig_op = BAM_CMATCH; -#endif - cig_len++; - seq_pos++; - ref_pos++; - //printf(" %d: BA/QS(B) = %c/%d (ret %d)\n", f, i32, qc, r); - break; - } - - case 'Q': { // Quality score; QS - if (ds & CRAM_QS) { - if (!codecs[DS_QS]) return -1; - if (!(cf & CRAM_FLAG_PRESERVE_QUAL_SCORES) && - (unsigned char)*qual == 255) - memset(qual, 30, cr->len); // ? - r |= codecs[DS_QS]->decode(s, codecs[DS_QS], blk, - (char *)&qual[pos-1], &out_sz); - //printf(" %d: QS = %d (ret %d)\n", f, qc, r); - } - break; - } - - case 'H': { // hard clip; HC - if (cig_len && cig_op != BAM_CHARD_CLIP) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - if (ds & CRAM_HC) { - if (!codecs[DS_HC]) return -1; - r |= codecs[DS_HC]->decode(s, codecs[DS_HC], blk, - (char *)&i32, &out_sz); - if (r) return r; - cig_op = BAM_CHARD_CLIP; - cig_len += i32; - } - break; - } - - case 'P': { // padding; PD - if (cig_len && cig_op != BAM_CPAD) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - if (ds & CRAM_PD) { - if (!codecs[DS_PD]) return -1; - r |= codecs[DS_PD]->decode(s, codecs[DS_PD], blk, - (char *)&i32, &out_sz); - if (r) return r; - cig_op = BAM_CPAD; - cig_len += i32; - } - break; - } - - case 'N': { // Ref skip; RS - if (cig_len && cig_op != BAM_CREF_SKIP) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - if (ds & CRAM_RS) { - if (!codecs[DS_RS]) return -1; - r |= codecs[DS_RS]->decode(s, codecs[DS_RS], blk, - (char *)&i32, &out_sz); - if (r) return r; - cig_op = BAM_CREF_SKIP; - cig_len += i32; - ref_pos += i32; - } - break; - } - - default: - hts_log_error("Unknown feature code '%c'", op); - return -1; - } - } - - if (!(ds & CRAM_FC)) - goto skip_cigar; - - /* An implicit match op for any unaccounted for bases */ - if ((ds & CRAM_FN) && cr->len >= seq_pos) { - if (s->ref && cr->ref_id >= 0) { - if (ref_pos + cr->len - seq_pos + 1 > bfd->ref[cr->ref_id].len) { - static int whinged = 0; - int rlen; - if (!whinged) - hts_log_warning("Ref pos outside of ref sequence boundary"); - whinged = 1; - rlen = bfd->ref[cr->ref_id].len - ref_pos; - // May miss MD/NM cases where both seq/ref are N, but this is a - // malformed cram file anyway. - if (rlen > 0) { - if (seq_pos-1 + rlen < cr->len) - memcpy(&seq[seq_pos-1], - &s->ref[ref_pos - s->ref_start +1], rlen); - if ((cr->len - seq_pos + 1) - rlen > 0) - memset(&seq[seq_pos-1+rlen], 'N', - (cr->len - seq_pos + 1) - rlen); - } else { - if (cr->len - seq_pos + 1 > 0) - memset(&seq[seq_pos-1], 'N', cr->len - seq_pos + 1); - } - if (md_dist >= 0) - md_dist += cr->len - seq_pos + 1; - } else { - if (cr->len - seq_pos + 1 > 0) { - if (ref_pos + cr->len-seq_pos +1 > s->ref_end) - goto beyond_slice; - int remainder = cr->len - (seq_pos-1); - int j = ref_pos - s->ref_start + 1; - if (decode_md || decode_nm) { - int i; - char *N = memchr(&s->ref[j], 'N', remainder); - if (!N) { - // short cut the common case - md_dist += cr->len - (seq_pos-1); - } else { - char *refp = &s->ref[j-(seq_pos-1)]; - md_dist += N-&s->ref[j]; - int i_start = seq_pos-1 + (N - &s->ref[j]); - for (i = i_start; i < cr->len; i++) { - char base = refp[i]; - if (base == 'N') { - if (add_md_char(s, decode_md, 'N', - &md_dist) < 0) - return -1; - nm++; - } else { - md_dist++; - } - } - } - } - memcpy(&seq[seq_pos-1], &s->ref[j], remainder); - } - ref_pos += cr->len - seq_pos + 1; - } - } else if (cr->ref_id >= 0) { - // So alignment end can be computed even when not decoding sequence - ref_pos += cr->len - seq_pos + 1; - } - - if (ncigar+1 >= cigar_alloc) { - cigar_alloc = cigar_alloc ? cigar_alloc*2 : 1024; - if (!(cigar = realloc(s->cigar, cigar_alloc * sizeof(*cigar)))) - return -1; - s->cigar = cigar; - } -#ifdef USE_X - if (cig_len && cig_op != BAM_CBASE_MATCH) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - cig_op = BAM_CBASE_MATCH; -#else - if (cig_len && cig_op != BAM_CMATCH) { - cigar[ncigar++] = (cig_len<<4) + cig_op; - cig_len = 0; - } - cig_op = BAM_CMATCH; -#endif - cig_len += cr->len - seq_pos+1; - } - - skip_cigar: - - if ((ds & CRAM_FN) && decode_md) { - if (md_dist >= 0) - BLOCK_APPEND_UINT(s->aux_blk, md_dist); - } - - if (cig_len) { - if (ncigar >= cigar_alloc) { - cigar_alloc = cigar_alloc ? cigar_alloc*2 : 1024; - if (!(cigar = realloc(s->cigar, cigar_alloc * sizeof(*cigar)))) - return -1; - s->cigar = cigar; - } - - cigar[ncigar++] = (cig_len<<4) + cig_op; - } - - cr->ncigar = ncigar - cr->cigar; - cr->aend = ref_pos > cr->apos ? ref_pos : cr->apos; - - //printf("2: %.*s %d .. %d\n", cr->name_len, DSTRING_STR(name_ds) + cr->name, cr->apos, ref_pos); - - if (ds & CRAM_MQ) { - if (!codecs[DS_MQ]) return -1; - r |= codecs[DS_MQ]->decode(s, codecs[DS_MQ], blk, - (char *)&cr->mqual, &out_sz); - } else { - cr->mqual = 40; - } - - if ((ds & CRAM_QS) && (cf & CRAM_FLAG_PRESERVE_QUAL_SCORES)) { - int32_t out_sz2 = cr->len; - - if (!codecs[DS_QS]) return -1; - r |= codecs[DS_QS]->decode(s, codecs[DS_QS], blk, - qual, &out_sz2); - } - - s->cigar = cigar; - s->cigar_alloc = cigar_alloc; - s->ncigar = ncigar; - - if (cr->cram_flags & CRAM_FLAG_NO_SEQ) - cr->len = 0; - - if (decode_md) { - BLOCK_APPEND_CHAR(s->aux_blk, '\0'); // null terminate MD:Z: - size_t sz = BLOCK_SIZE(s->aux_blk) - orig_aux; - if (has_MD < 0) { - // has_MD < 0; already have MDZ allocated in aux at -has_MD, - // but wrote MD to end of aux (at orig_aux). - // We need some memmoves to shuffle it around. - char tmp_MD_[1024], *tmp_MD = tmp_MD_; - unsigned char *orig_aux_p = BLOCK_DATA(s->aux_blk) + orig_aux; - if (sz > 1024) { - tmp_MD = malloc(sz); - if (!tmp_MD) - return -1; - } - memcpy(tmp_MD, orig_aux_p, sz); - memmove(&BLOCK_DATA(s->aux_blk)[-has_MD] + sz, - &BLOCK_DATA(s->aux_blk)[-has_MD], - orig_aux_p - &BLOCK_DATA(s->aux_blk)[-has_MD]); - memcpy(&BLOCK_DATA(s->aux_blk)[-has_MD], tmp_MD, sz); - if (tmp_MD != tmp_MD_) - free(tmp_MD); - - if (-has_NM > -has_MD) - // we inserted before NM, so move it up a bit - has_NM -= sz; - } - // else has_MD == 0 and we've already appended MD to the end. - - cr->aux_size += sz; - } - - if (decode_nm) { - if (has_NM == 0) { - char buf[7]; - size_t buf_size; - buf[0] = 'N'; buf[1] = 'M'; - if (nm <= UINT8_MAX) { - buf_size = 4; - buf[2] = 'C'; - buf[3] = (nm>> 0) & 0xff; - } else if (nm <= UINT16_MAX) { - buf_size = 5; - buf[2] = 'S'; - buf[3] = (nm>> 0) & 0xff; - buf[4] = (nm>> 8) & 0xff; - } else { - buf_size = 7; - buf[2] = 'I'; - buf[3] = (nm>> 0) & 0xff; - buf[4] = (nm>> 8) & 0xff; - buf[5] = (nm>>16) & 0xff; - buf[6] = (nm>>24) & 0xff; - } - BLOCK_APPEND(s->aux_blk, buf, buf_size); - cr->aux_size += buf_size; - } else { - // Preallocated space for NM at -has_NM into aux block - unsigned char *buf = BLOCK_DATA(s->aux_blk) + -has_NM; - buf[0] = (nm>> 0) & 0xff; - buf[1] = (nm>> 8) & 0xff; - buf[2] = (nm>>16) & 0xff; - buf[3] = (nm>>24) & 0xff; - } - } - - return r; - - beyond_slice: - // Cramtools can create CRAMs that have sequence features outside the - // stated range of the container & slice reference extents (start + span). - // We have to check for these in many places, but for brevity have the - // error reporting in only one. - hts_log_error("CRAM CIGAR extends beyond slice reference extents"); - return -1; - - block_err: - return -1; -} - -/* - * Quick and simple hash lookup for cram_map arrays - */ -static cram_map *map_find(cram_map **map, unsigned char *key, int id) { - cram_map *m; - - m = map[CRAM_MAP(key[0],key[1])]; - while (m && m->key != id) - m= m->next; - - return m; -} - -//#define map_find(M,K,I) M[CRAM_MAP(K[0],K[1])];while (m && m->key != I);m= m->next - - -static int cram_decode_aux_1_0(cram_container *c, cram_slice *s, - cram_block *blk, cram_record *cr) { - int i, r = 0, out_sz = 1; - unsigned char ntags; - - if (!c->comp_hdr->codecs[DS_TC]) return -1; - r |= c->comp_hdr->codecs[DS_TC]->decode(s, c->comp_hdr->codecs[DS_TC], blk, - (char *)&ntags, &out_sz); - cr->ntags = ntags; - - //printf("TC=%d\n", cr->ntags); - cr->aux_size = 0; - cr->aux = BLOCK_SIZE(s->aux_blk); - - for (i = 0; i < cr->ntags; i++) { - int32_t id, out_sz = 1; - unsigned char tag_data[3]; - cram_map *m; - - //printf("Tag %d/%d\n", i+1, cr->ntags); - if (!c->comp_hdr->codecs[DS_TN]) return -1; - r |= c->comp_hdr->codecs[DS_TN]->decode(s, c->comp_hdr->codecs[DS_TN], - blk, (char *)&id, &out_sz); - if (out_sz == 3) { - // Tag name stored as 3 chars instead of an int? - memcpy(tag_data, &id, 3); - } else { - tag_data[0] = (id>>16) & 0xff; - tag_data[1] = (id>>8) & 0xff; - tag_data[2] = id & 0xff; - } - - m = map_find(c->comp_hdr->tag_encoding_map, tag_data, id); - if (!m) - return -1; - BLOCK_APPEND(s->aux_blk, (char *)tag_data, 3); - - if (!m->codec) return -1; - r |= m->codec->decode(s, m->codec, blk, (char *)s->aux_blk, &out_sz); - - cr->aux_size += out_sz + 3; - } - - return r; - - block_err: - return -1; -} - -// has_MD and has_NM are filled out with 0 for none present, -// 1 for present and verbatim, and -pos for present as placeholder -// (MD*, NM*) to be generated and filled out at offset +pos. -static int cram_decode_aux(cram_fd *fd, - cram_container *c, cram_slice *s, - cram_block *blk, cram_record *cr, - int *has_MD, int *has_NM) { - int i, r = 0, out_sz = 1; - int32_t TL = 0; - unsigned char *TN; - uint32_t ds = s->data_series; - - if (!(ds & (CRAM_TL|CRAM_aux))) { - cr->aux = 0; - cr->aux_size = 0; - return 0; - } - - if (!c->comp_hdr->codecs[DS_TL]) return -1; - r |= c->comp_hdr->codecs[DS_TL]->decode(s, c->comp_hdr->codecs[DS_TL], blk, - (char *)&TL, &out_sz); - if (r || TL < 0 || TL >= c->comp_hdr->nTL) - return -1; - - TN = c->comp_hdr->TL[TL]; - cr->ntags = strlen((char *)TN)/3; // optimise to remove strlen - - //printf("TC=%d\n", cr->ntags); - cr->aux_size = 0; - cr->aux = BLOCK_SIZE(s->aux_blk); - - if (!(ds & CRAM_aux)) - return 0; - - for (i = 0; i < cr->ntags; i++) { - int32_t id, out_sz = 1; - unsigned char tag_data[7]; - cram_map *m; - - if (TN[0] == 'M' && TN[1] == 'D' && has_MD) - *has_MD = (BLOCK_SIZE(s->aux_blk)+3) * (TN[2] == '*' ? -1 : 1); - if (TN[0] == 'N' && TN[1] == 'M' && has_NM) - *has_NM = (BLOCK_SIZE(s->aux_blk)+3) * (TN[2] == '*' ? -1 : 1);; - - //printf("Tag %d/%d\n", i+1, cr->ntags); - tag_data[0] = TN[0]; - tag_data[1] = TN[1]; - tag_data[2] = TN[2]; - id = (tag_data[0]<<16) | (tag_data[1]<<8) | tag_data[2]; - - if (CRAM_MAJOR_VERS(fd->version) >= 4 && TN[2] == '*') { - // Place holder, fill out contents later. - int tag_data_size; - if (TN[0] == 'N' && TN[1] == 'M') { - // Use a fixed size, so we can allocate room for it now. - memcpy(&tag_data[2], "I\0\0\0\0", 5); - tag_data_size = 7; - } else if (TN[0] == 'R' && TN[1] == 'G') { - // RG is variable size, but known already. Insert now - TN += 3; - // Equiv to fd->header->hrecs->rg[cr->rg], but this is the - // new header API equivalent. - const char *rg = sam_hdr_line_name(fd->header, "RG", cr->rg); - if (!rg) - continue; - - size_t rg_len = strlen(rg); - tag_data[2] = 'Z'; - BLOCK_APPEND(s->aux_blk, (char *)tag_data, 3); - BLOCK_APPEND(s->aux_blk, rg, rg_len); - BLOCK_APPEND_CHAR(s->aux_blk, '\0'); - cr->aux_size += 3 + rg_len + 1; - cr->rg = -1; // prevents auto-add later - continue; - } else { - // Unknown size. We'll insert MD into stream later. - tag_data[2] = 'Z'; - tag_data_size = 3; - } - BLOCK_APPEND(s->aux_blk, (char *)tag_data, tag_data_size); - cr->aux_size += tag_data_size; - TN += 3; - } else { - TN += 3; - m = map_find(c->comp_hdr->tag_encoding_map, tag_data, id); - if (!m) - return -1; - - BLOCK_APPEND(s->aux_blk, (char *)tag_data, 3); - - if (!m->codec) return -1; - r |= m->codec->decode(s, m->codec, blk, (char *)s->aux_blk, &out_sz); - if (r) break; - cr->aux_size += out_sz + 3; - - // cF CRAM flags. - if (TN[-3]=='c' && TN[-2]=='F' && TN[-1]=='C' && out_sz == 1) { - // Remove cF tag - uint8_t cF = BLOCK_END(s->aux_blk)[-1]; - BLOCK_SIZE(s->aux_blk) -= out_sz+3; - cr->aux_size -= out_sz+3; - - // bit 1 => don't auto-decode MD. - // Pretend MD is present verbatim, so we don't auto-generate - if ((cF & 1) && has_MD && *has_MD == 0) - *has_MD = 1; - - // bit 1 => don't auto-decode NM - if ((cF & 2) && has_NM && *has_NM == 0) - *has_NM = 1; - } - } - - // We could go to 2^32 fine, but we shouldn't be hitting this anyway, - // and it's protecting against memory hogs too. - if (BLOCK_SIZE(s->aux_blk) > (1u<<31)) { - hts_log_error("CRAM->BAM aux block size overflow"); - goto block_err; - } - } - - return r; - - block_err: - return -1; -} - -/* Resolve mate pair cross-references between recs within this slice */ -static int cram_decode_slice_xref(cram_slice *s, int required_fields) { - int rec; - - if (!(required_fields & (SAM_RNEXT | SAM_PNEXT | SAM_TLEN))) { - for (rec = 0; rec < s->hdr->num_records; rec++) { - cram_record *cr = &s->crecs[rec]; - - cr->tlen = 0; - cr->mate_pos = 0; - cr->mate_ref_id = -1; - } - - return 0; - } - - for (rec = 0; rec < s->hdr->num_records; rec++) { - cram_record *cr = &s->crecs[rec]; - - if (cr->mate_line >= 0) { - if (cr->mate_line < s->hdr->num_records) { - /* - * On the first read, loop through computing lengths. - * It's not perfect as we have one slice per reference so we - * cannot detect when TLEN should be zero due to seqs that - * map to multiple references. - * - * We also cannot set tlen correct when it spans a slice for - * other reasons. This may make tlen too small. Should we - * fix this by forcing TLEN to be stored verbatim in such cases? - * - * Or do we just admit defeat and output 0 for tlen? It's the - * safe option... - */ - if (cr->tlen == INT64_MIN) { - int id1 = rec, id2 = rec; - int64_t aleft = cr->apos, aright = cr->aend; - int64_t tlen; - int ref = cr->ref_id; - - // number of segments starting at the same point. - int left_cnt = 0; - - do { - if (aleft > s->crecs[id2].apos) - aleft = s->crecs[id2].apos, left_cnt = 1; - else if (aleft == s->crecs[id2].apos) - left_cnt++; - if (aright < s->crecs[id2].aend) - aright = s->crecs[id2].aend; - if (s->crecs[id2].mate_line == -1) { - s->crecs[id2].mate_line = rec; - break; - } - if (s->crecs[id2].mate_line <= id2 || - s->crecs[id2].mate_line >= s->hdr->num_records) - return -1; - id2 = s->crecs[id2].mate_line; - - if (s->crecs[id2].ref_id != ref) - ref = -1; - } while (id2 != id1); - - if (ref != -1) { - tlen = aright - aleft + 1; - id1 = id2 = rec; - - /* - * When we have two seqs with identical start and - * end coordinates, set +/- tlen based on 1st/last - * bit flags instead, as a tie breaker. - */ - if (s->crecs[id2].apos == aleft) { - if (left_cnt == 1 || - (s->crecs[id2].flags & BAM_FREAD1)) - s->crecs[id2].tlen = tlen; - else - s->crecs[id2].tlen = -tlen; - } else { - s->crecs[id2].tlen = -tlen; - } - - id2 = s->crecs[id2].mate_line; - while (id2 != id1) { - if (s->crecs[id2].apos == aleft) { - if (left_cnt == 1 || - (s->crecs[id2].flags & BAM_FREAD1)) - s->crecs[id2].tlen = tlen; - else - s->crecs[id2].tlen = -tlen; - } else { - s->crecs[id2].tlen = -tlen; - } - id2 = s->crecs[id2].mate_line; - } - } else { - id1 = id2 = rec; - - s->crecs[id2].tlen = 0; - id2 = s->crecs[id2].mate_line; - while (id2 != id1) { - s->crecs[id2].tlen = 0; - id2 = s->crecs[id2].mate_line; - } - } - } - - cr->mate_pos = s->crecs[cr->mate_line].apos; - cr->mate_ref_id = s->crecs[cr->mate_line].ref_id; - - // paired - cr->flags |= BAM_FPAIRED; - - // set mate unmapped if needed - if (s->crecs[cr->mate_line].flags & BAM_FUNMAP) { - cr->flags |= BAM_FMUNMAP; - cr->tlen = 0; - } - if (cr->flags & BAM_FUNMAP) { - cr->tlen = 0; - } - - // set mate reversed if needed - if (s->crecs[cr->mate_line].flags & BAM_FREVERSE) - cr->flags |= BAM_FMREVERSE; - } else { - hts_log_error("Mate line out of bounds: %d vs [0, %d]", - cr->mate_line, s->hdr->num_records-1); - } - - /* FIXME: construct read names here too if needed */ - } else { - if (cr->mate_flags & CRAM_M_REVERSE) { - cr->flags |= BAM_FPAIRED | BAM_FMREVERSE; - } - if (cr->mate_flags & CRAM_M_UNMAP) { - cr->flags |= BAM_FMUNMAP; - //cr->mate_ref_id = -1; - } - if (!(cr->flags & BAM_FPAIRED)) - cr->mate_ref_id = -1; - } - - if (cr->tlen == INT64_MIN) - cr->tlen = 0; // Just incase - } - - for (rec = 0; rec < s->hdr->num_records; rec++) { - cram_record *cr = &s->crecs[rec]; - if (cr->explicit_tlen != INT64_MIN) - cr->tlen = cr->explicit_tlen; - } - - return 0; -} - -static char *md5_print(unsigned char *md5, char *out) { - int i; - for (i = 0; i < 16; i++) { - out[i*2+0] = "0123456789abcdef"[md5[i]>>4]; - out[i*2+1] = "0123456789abcdef"[md5[i]&15]; - } - out[32] = 0; - - return out; -} - -/* - * Utility function to decode tlen (ISIZE), as it's called - * in multiple places. - * - * Returns codec return value (0 on success). - */ -static int cram_decode_tlen(cram_fd *fd, cram_container *c, cram_slice *s, - cram_block *blk, int64_t *tlen) { - int out_sz = 1, r = 0; - - if (!c->comp_hdr->codecs[DS_TS]) return -1; - if (CRAM_MAJOR_VERS(fd->version) < 4) { - int32_t i32; - r |= c->comp_hdr->codecs[DS_TS] - ->decode(s, c->comp_hdr->codecs[DS_TS], blk, - (char *)&i32, &out_sz); - *tlen = i32; - } else { - r |= c->comp_hdr->codecs[DS_TS] - ->decode(s, c->comp_hdr->codecs[DS_TS], blk, - (char *)tlen, &out_sz); - } - return r; -} - -/* - * Decode an entire slice from container blocks. Fills out s->crecs[] array. - * Returns 0 on success - * -1 on failure - */ -int cram_decode_slice(cram_fd *fd, cram_container *c, cram_slice *s, - sam_hdr_t *sh) { - cram_block *blk = s->block[0]; - int32_t bf, ref_id; - unsigned char cf; - int out_sz, r = 0; - int rec; - char *seq = NULL, *qual = NULL; - int unknown_rg = -1; - int embed_ref; - char **refs = NULL; - uint32_t ds; - sam_hrecs_t *bfd = sh->hrecs; - - if (cram_dependent_data_series(fd, c->comp_hdr, s) != 0) - return -1; - - ds = s->data_series; - - blk->bit = 7; // MSB first - - // Study the blocks and estimate approx sizes to preallocate. - // This looks to speed up decoding by around 8-9%. - // We can always shrink back down at the end if we overestimated. - // However it's likely that this also saves memory as own growth - // factor (*=1.5) is never applied. - { - int qsize, nsize, q_id; - cram_decode_estimate_sizes(c->comp_hdr, s, &qsize, &nsize, &q_id); - //fprintf(stderr, "qsize=%d nsize=%d\n", qsize, nsize); - - if (qsize && (ds & CRAM_RL)) BLOCK_RESIZE_EXACT(s->seqs_blk, qsize+1); - if (qsize && (ds & CRAM_RL)) BLOCK_RESIZE_EXACT(s->qual_blk, qsize+1); - if (nsize && (ds & CRAM_NS)) BLOCK_RESIZE_EXACT(s->name_blk, nsize+1); - - // To do - consider using q_id here to usurp the quality block and - // avoid a memcpy during decode. - // Specifically when quality is an external block uniquely used by - // DS_QS only, then we can set s->qual_blk directly to this - // block and save the codec->decode() calls. (Approx 3% cpu saving) - } - - /* Look for unknown RG, added as last by Java CRAM? */ - if (bfd->nrg > 0 && - bfd->rg[bfd->nrg-1].name != NULL && - !strcmp(bfd->rg[bfd->nrg-1].name, "UNKNOWN")) - unknown_rg = bfd->nrg-1; - - if (blk->content_type != CORE) - return -1; - - if (s->crecs) - free(s->crecs); - if (!(s->crecs = malloc(s->hdr->num_records * sizeof(*s->crecs)))) - return -1; - - ref_id = s->hdr->ref_seq_id; - if (CRAM_MAJOR_VERS(fd->version) < 4) - embed_ref = s->hdr->ref_base_id >= 0 ? 1 : 0; - else - embed_ref = s->hdr->ref_base_id > 0 ? 1 : 0; - - if (ref_id >= 0) { - if (embed_ref) { - cram_block *b; - if (s->hdr->ref_base_id < 0) { - hts_log_error("No reference specified and no embedded reference is available" - " at #%d:%"PRId64"-%"PRId64, ref_id, s->hdr->ref_seq_start, - s->hdr->ref_seq_start + s->hdr->ref_seq_span-1); - return -1; - } - b = cram_get_block_by_id(s, s->hdr->ref_base_id); - if (!b) - return -1; - if (cram_uncompress_block(b) != 0) - return -1; - s->ref = (char *)BLOCK_DATA(b); - s->ref_start = s->hdr->ref_seq_start; - s->ref_end = s->hdr->ref_seq_start + s->hdr->ref_seq_span-1; - if (s->hdr->ref_seq_span > b->uncomp_size) { - hts_log_error("Embedded reference is too small at #%d:%"PRIhts_pos"-%"PRIhts_pos, - ref_id, s->ref_start, s->ref_end); - return -1; - } - } else if (!c->comp_hdr->no_ref) { - //// Avoid Java cramtools bug by loading entire reference seq - //s->ref = cram_get_ref(fd, s->hdr->ref_seq_id, 1, 0); - //s->ref_start = 1; - - if (fd->required_fields & SAM_SEQ) { - s->ref = - cram_get_ref(fd, s->hdr->ref_seq_id, - s->hdr->ref_seq_start, - s->hdr->ref_seq_start + s->hdr->ref_seq_span -1); - } - s->ref_start = s->hdr->ref_seq_start; - s->ref_end = s->hdr->ref_seq_start + s->hdr->ref_seq_span-1; - - /* Sanity check */ - if (s->ref_start < 0) { - hts_log_warning("Slice starts before base 1" - " at #%d:%"PRId64"-%"PRId64, ref_id, s->hdr->ref_seq_start, - s->hdr->ref_seq_start + s->hdr->ref_seq_span-1); - s->ref_start = 0; - } - pthread_mutex_lock(&fd->ref_lock); - pthread_mutex_lock(&fd->refs->lock); - if ((fd->required_fields & SAM_SEQ) && - ref_id < fd->refs->nref && fd->refs->ref_id && - s->ref_end > fd->refs->ref_id[ref_id]->length) { - s->ref_end = fd->refs->ref_id[ref_id]->length; - } - pthread_mutex_unlock(&fd->refs->lock); - pthread_mutex_unlock(&fd->ref_lock); - } - } - - if ((fd->required_fields & SAM_SEQ) && - s->ref == NULL && s->hdr->ref_seq_id >= 0 && !c->comp_hdr->no_ref) { - hts_log_error("Unable to fetch reference #%d:%"PRId64"-%"PRId64"\n", - ref_id, s->hdr->ref_seq_start, - s->hdr->ref_seq_start + s->hdr->ref_seq_span-1); - return -1; - } - - if (CRAM_MAJOR_VERS(fd->version) != 1 - && (fd->required_fields & SAM_SEQ) - && s->hdr->ref_seq_id >= 0 - && !fd->ignore_md5 - && memcmp(s->hdr->md5, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 16)) { - hts_md5_context *md5; - unsigned char digest[16]; - - if (s->ref && s->hdr->ref_seq_id >= 0) { - int start, len; - - if (s->hdr->ref_seq_start >= s->ref_start) { - start = s->hdr->ref_seq_start - s->ref_start; - } else { - hts_log_warning("Slice starts before base 1 at #%d:%"PRIhts_pos"-%"PRIhts_pos, - ref_id, s->ref_start, s->ref_end); - start = 0; - } - - if (s->hdr->ref_seq_span <= s->ref_end - s->ref_start + 1) { - len = s->hdr->ref_seq_span; - } else { - hts_log_warning("Slice ends beyond reference end at #%d:%"PRIhts_pos"-%"PRIhts_pos, - ref_id, s->ref_start, s->ref_end); - len = s->ref_end - s->ref_start + 1; - } - - if (!(md5 = hts_md5_init())) - return -1; - if (start + len > s->ref_end - s->ref_start + 1) - len = s->ref_end - s->ref_start + 1 - start; - if (len >= 0) - hts_md5_update(md5, s->ref + start, len); - hts_md5_final(digest, md5); - hts_md5_destroy(md5); - } else if (!s->ref && s->hdr->ref_base_id >= 0) { - cram_block *b = cram_get_block_by_id(s, s->hdr->ref_base_id); - if (b) { - if (!(md5 = hts_md5_init())) - return -1; - hts_md5_update(md5, b->data, b->uncomp_size); - hts_md5_final(digest, md5); - hts_md5_destroy(md5); - } - } - - if (!c->comp_hdr->no_ref && - ((!s->ref && s->hdr->ref_base_id < 0) - || memcmp(digest, s->hdr->md5, 16) != 0)) { - char M[33]; - const char *rname = sam_hdr_tid2name(sh, ref_id); - if (!rname) rname="?"; // cannot happen normally - hts_log_error("MD5 checksum reference mismatch at %s:%"PRIhts_pos"-%"PRIhts_pos, - rname, s->ref_start, s->ref_end); - hts_log_error("CRAM : %s", md5_print(s->hdr->md5, M)); - hts_log_error("Ref : %s", md5_print(digest, M)); - kstring_t ks = KS_INITIALIZE; - if (sam_hdr_find_tag_id(sh, "SQ", "SN", rname, "M5", &ks) == 0) - hts_log_error("@SQ M5: %s", ks.s); - hts_log_error("Please check the reference given is correct"); - ks_free(&ks); - return -1; - } - } - - if (ref_id == -2) { - pthread_mutex_lock(&fd->ref_lock); - pthread_mutex_lock(&fd->refs->lock); - refs = calloc(fd->refs->nref, sizeof(char *)); - pthread_mutex_unlock(&fd->refs->lock); - pthread_mutex_unlock(&fd->ref_lock); - if (!refs) - return -1; - } - - int last_ref_id = -9; // Arbitrary -ve marker for not-yet-set - for (rec = 0; rec < s->hdr->num_records; rec++) { - cram_record *cr = &s->crecs[rec]; - int has_MD, has_NM; - - //fprintf(stderr, "Decode seq %d, %d/%d\n", rec, blk->byte, blk->bit); - - cr->s = s; - - out_sz = 1; /* decode 1 item */ - if (ds & CRAM_BF) { - if (!c->comp_hdr->codecs[DS_BF]) goto block_err; - r |= c->comp_hdr->codecs[DS_BF] - ->decode(s, c->comp_hdr->codecs[DS_BF], blk, - (char *)&bf, &out_sz); - if (r || bf < 0 || - bf >= sizeof(fd->bam_flag_swap)/sizeof(*fd->bam_flag_swap)) - goto block_err; - bf = fd->bam_flag_swap[bf]; - cr->flags = bf; - } else { - cr->flags = bf = 0x4; // unmapped - } - - if (ds & CRAM_CF) { - if (CRAM_MAJOR_VERS(fd->version) == 1) { - /* CF is byte in 1.0, int32 in 2.0 */ - if (!c->comp_hdr->codecs[DS_CF]) goto block_err; - r |= c->comp_hdr->codecs[DS_CF] - ->decode(s, c->comp_hdr->codecs[DS_CF], blk, - (char *)&cf, &out_sz); - if (r) goto block_err; - cr->cram_flags = cf; - } else { - if (!c->comp_hdr->codecs[DS_CF]) goto block_err; - r |= c->comp_hdr->codecs[DS_CF] - ->decode(s, c->comp_hdr->codecs[DS_CF], blk, - (char *)&cr->cram_flags, &out_sz); - if (r) goto block_err; - cf = cr->cram_flags; - } - } else { - cf = cr->cram_flags = 0; - } - - if (CRAM_MAJOR_VERS(fd->version) != 1 && ref_id == -2) { - if (ds & CRAM_RI) { - if (!c->comp_hdr->codecs[DS_RI]) goto block_err; - r |= c->comp_hdr->codecs[DS_RI] - ->decode(s, c->comp_hdr->codecs[DS_RI], blk, - (char *)&cr->ref_id, &out_sz); - if (r) goto block_err; - if ((fd->required_fields & (SAM_SEQ|SAM_TLEN)) - && cr->ref_id >= 0 - && cr->ref_id != last_ref_id) { - if (!c->comp_hdr->no_ref) { - // Range(fd): seq >= 0, unmapped -1, unspecified -2 - // Slice(s): seq >= 0, unmapped -1, multiple refs -2 - // Record(cr): seq >= 0, unmapped -1 - pthread_mutex_lock(&fd->range_lock); - int need_ref = (fd->range.refid == -2 || cr->ref_id == fd->range.refid); - pthread_mutex_unlock(&fd->range_lock); - if (need_ref) { - if (!refs[cr->ref_id]) - refs[cr->ref_id] = cram_get_ref(fd, cr->ref_id, 1, 0); - if (!(s->ref = refs[cr->ref_id])) - goto block_err; - } else { - // For multi-ref containers, we don't need to fetch all - // refs if we're only querying one. - s->ref = NULL; - } - - pthread_mutex_lock(&fd->range_lock); - int discard_last_ref = (last_ref_id >= 0 && - refs[last_ref_id] && - (fd->range.refid == -2 || - last_ref_id == fd->range.refid)); - pthread_mutex_unlock(&fd->range_lock); - if (discard_last_ref) { - pthread_mutex_lock(&fd->ref_lock); - discard_last_ref = !fd->unsorted; - pthread_mutex_unlock(&fd->ref_lock); - } - if (discard_last_ref) { - cram_ref_decr(fd->refs, last_ref_id); - refs[last_ref_id] = NULL; - } - } - s->ref_start = 1; - pthread_mutex_lock(&fd->ref_lock); - pthread_mutex_lock(&fd->refs->lock); - s->ref_end = fd->refs->ref_id[cr->ref_id]->length; - pthread_mutex_unlock(&fd->refs->lock); - pthread_mutex_unlock(&fd->ref_lock); - - last_ref_id = cr->ref_id; - } - } else { - cr->ref_id = -1; - } - } else { - cr->ref_id = ref_id; // Forced constant in CRAM 1.0 - } - if (cr->ref_id < -1 || cr->ref_id >= bfd->nref) { - hts_log_error("Requested unknown reference ID %d", cr->ref_id); - goto block_err; - } - - if (ds & CRAM_RL) { - if (!c->comp_hdr->codecs[DS_RL]) goto block_err; - r |= c->comp_hdr->codecs[DS_RL] - ->decode(s, c->comp_hdr->codecs[DS_RL], blk, - (char *)&cr->len, &out_sz); - if (r) goto block_err; - if (cr->len < 0) { - hts_log_error("Read has negative length"); - goto block_err; - } - } - - if (ds & CRAM_AP) { - if (!c->comp_hdr->codecs[DS_AP]) goto block_err; - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - r |= c->comp_hdr->codecs[DS_AP] - ->decode(s, c->comp_hdr->codecs[DS_AP], blk, - (char *)&cr->apos, &out_sz); - } else { - int32_t i32; - r |= c->comp_hdr->codecs[DS_AP] - ->decode(s, c->comp_hdr->codecs[DS_AP], blk, - (char *)&i32, &out_sz); - cr->apos = i32; - } - if (r) goto block_err;; - if (c->comp_hdr->AP_delta) { - if (cr->apos < 0 && c->unsorted == 0) { - // cache locally in c->unsorted so we don't have an - // excessive number of locks - pthread_mutex_lock(&fd->ref_lock); - c->unsorted = fd->unsorted = 1; - pthread_mutex_unlock(&fd->ref_lock); - } - cr->apos += s->last_apos; - } - s->last_apos= cr->apos; - } else { - cr->apos = c->ref_seq_start; - } - - if (ds & CRAM_RG) { - if (!c->comp_hdr->codecs[DS_RG]) goto block_err; - r |= c->comp_hdr->codecs[DS_RG] - ->decode(s, c->comp_hdr->codecs[DS_RG], blk, - (char *)&cr->rg, &out_sz); - if (r) goto block_err; - if (cr->rg == unknown_rg) - cr->rg = -1; - } else { - cr->rg = -1; - } - - cr->name_len = 0; - - if (c->comp_hdr->read_names_included) { - int32_t out_sz2 = 1; - - // Read directly into name cram_block - cr->name = BLOCK_SIZE(s->name_blk); - if (ds & CRAM_RN) { - if (!c->comp_hdr->codecs[DS_RN]) goto block_err; - r |= c->comp_hdr->codecs[DS_RN] - ->decode(s, c->comp_hdr->codecs[DS_RN], blk, - (char *)s->name_blk, &out_sz2); - if (r) goto block_err; - cr->name_len = out_sz2; - } - } - - cr->mate_pos = 0; - cr->mate_line = -1; - cr->mate_ref_id = -1; - cr->explicit_tlen = INT64_MIN; - if ((ds & CRAM_CF) && (cf & CRAM_FLAG_DETACHED)) { - if (ds & CRAM_MF) { - if (CRAM_MAJOR_VERS(fd->version) == 1) { - /* MF is byte in 1.0, int32 in 2.0 */ - unsigned char mf; - if (!c->comp_hdr->codecs[DS_MF]) goto block_err; - r |= c->comp_hdr->codecs[DS_MF] - ->decode(s, c->comp_hdr->codecs[DS_MF], - blk, (char *)&mf, &out_sz); - if (r) goto block_err; - cr->mate_flags = mf; - } else { - if (!c->comp_hdr->codecs[DS_MF]) goto block_err; - r |= c->comp_hdr->codecs[DS_MF] - ->decode(s, c->comp_hdr->codecs[DS_MF], - blk, - (char *)&cr->mate_flags, - &out_sz); - if (r) goto block_err; - } - } else { - cr->mate_flags = 0; - } - - if (!c->comp_hdr->read_names_included) { - int32_t out_sz2 = 1; - - // Read directly into name cram_block - cr->name = BLOCK_SIZE(s->name_blk); - if (ds & CRAM_RN) { - if (!c->comp_hdr->codecs[DS_RN]) goto block_err; - r |= c->comp_hdr->codecs[DS_RN] - ->decode(s, c->comp_hdr->codecs[DS_RN], - blk, (char *)s->name_blk, - &out_sz2); - if (r) goto block_err; - cr->name_len = out_sz2; - } - } - - if (ds & CRAM_NS) { - if (!c->comp_hdr->codecs[DS_NS]) goto block_err; - r |= c->comp_hdr->codecs[DS_NS] - ->decode(s, c->comp_hdr->codecs[DS_NS], blk, - (char *)&cr->mate_ref_id, &out_sz); - if (r) goto block_err; - } - - // Skip as mate_ref of "*" is legit. It doesn't mean unmapped, just unknown. - // if (cr->mate_ref_id == -1 && cr->flags & 0x01) { - // /* Paired, but unmapped */ - // cr->flags |= BAM_FMUNMAP; - // } - - if (ds & CRAM_NP) { - if (!c->comp_hdr->codecs[DS_NP]) goto block_err;; - if (CRAM_MAJOR_VERS(fd->version) < 4) { - int32_t i32; - r |= c->comp_hdr->codecs[DS_NP] - ->decode(s, c->comp_hdr->codecs[DS_NP], blk, - (char *)&i32, &out_sz); - cr->mate_pos = i32; - } else { - r |= c->comp_hdr->codecs[DS_NP] - ->decode(s, c->comp_hdr->codecs[DS_NP], blk, - (char *)&cr->mate_pos, &out_sz); - } - if (r) goto block_err; - } - - if (ds & CRAM_TS) { - if (!c->comp_hdr->codecs[DS_TS]) goto block_err; - r = cram_decode_tlen(fd, c, s, blk, &cr->tlen); - if (r) goto block_err; - } else { - cr->tlen = INT64_MIN; - } - } else if ((ds & CRAM_CF) && (cf & CRAM_FLAG_MATE_DOWNSTREAM)) { - // else not detached - if (ds & CRAM_NF) { - if (!c->comp_hdr->codecs[DS_NF]) goto block_err; - r |= c->comp_hdr->codecs[DS_NF] - ->decode(s, c->comp_hdr->codecs[DS_NF], blk, - (char *)&cr->mate_line, &out_sz); - if (r) goto block_err; - cr->mate_line += rec + 1; - - //cr->name_len = sprintf(name, "%d", name_id++); - //cr->name = DSTRING_LEN(name_ds); - //dstring_nappend(name_ds, name, cr->name_len); - - cr->mate_ref_id = -1; - cr->tlen = INT64_MIN; - cr->mate_pos = 0; - } else { - cr->mate_flags = 0; - cr->tlen = INT64_MIN; - } - if ((ds & CRAM_CF) && (cf & CRAM_FLAG_EXPLICIT_TLEN)) { - if (ds & CRAM_TS) { - r = cram_decode_tlen(fd, c, s, blk, &cr->explicit_tlen); - if (r) return r; - } else { - cr->mate_flags = 0; - cr->tlen = INT64_MIN; - } - } - } else if ((ds & CRAM_CF) && (cf & CRAM_FLAG_EXPLICIT_TLEN)) { - if (ds & CRAM_TS) { - r = cram_decode_tlen(fd, c, s, blk, &cr->explicit_tlen); - if (r) return r; - } else { - cr->mate_flags = 0; - cr->tlen = INT64_MIN; - } - } else { - cr->mate_flags = 0; - cr->tlen = INT64_MIN; - } - /* - else if (!name[0]) { - //name[0] = '?'; name[1] = 0; - //cr->name_len = 1; - //cr->name= DSTRING_LEN(s->name_ds); - //dstring_nappend(s->name_ds, "?", 1); - - cr->mate_ref_id = -1; - cr->tlen = 0; - cr->mate_pos = 0; - } - */ - - /* Auxiliary tags */ - has_MD = has_NM = 0; - if (CRAM_MAJOR_VERS(fd->version) == 1) - r |= cram_decode_aux_1_0(c, s, blk, cr); - else - r |= cram_decode_aux(fd, c, s, blk, cr, &has_MD, &has_NM); - if (r) goto block_err; - - /* Fake up dynamic string growth and appending */ - if (ds & CRAM_RL) { - cr->seq = BLOCK_SIZE(s->seqs_blk); - BLOCK_GROW(s->seqs_blk, cr->len); - seq = (char *)BLOCK_END(s->seqs_blk); - BLOCK_SIZE(s->seqs_blk) += cr->len; - - if (!seq) - goto block_err; - - cr->qual = BLOCK_SIZE(s->qual_blk); - BLOCK_GROW(s->qual_blk, cr->len); - qual = (char *)BLOCK_END(s->qual_blk); - BLOCK_SIZE(s->qual_blk) += cr->len; - - if (!s->ref) - memset(seq, '=', cr->len); - } - - if (!(bf & BAM_FUNMAP)) { - if ((ds & CRAM_AP) && cr->apos <= 0) { - hts_log_error("Read has alignment position %"PRId64 - " but no unmapped flag", - cr->apos); - goto block_err; - } - /* Decode sequence and generate CIGAR */ - if (ds & (CRAM_SEQ | CRAM_MQ)) { - r |= cram_decode_seq(fd, c, s, blk, cr, sh, cf, seq, qual, - has_MD, has_NM); - if (r) goto block_err; - } else { - cr->cigar = 0; - cr->ncigar = 0; - cr->aend = cr->apos; - cr->mqual = 0; - } - } else { - int out_sz2 = cr->len; - - //puts("Unmapped"); - cr->cigar = 0; - cr->ncigar = 0; - cr->aend = cr->apos; - cr->mqual = 0; - - if (ds & CRAM_BA && cr->len) { - if (!c->comp_hdr->codecs[DS_BA]) goto block_err; - r |= c->comp_hdr->codecs[DS_BA] - ->decode(s, c->comp_hdr->codecs[DS_BA], blk, - (char *)seq, &out_sz2); - if (r) goto block_err; - } - - if ((ds & CRAM_CF) && (cf & CRAM_FLAG_PRESERVE_QUAL_SCORES)) { - out_sz2 = cr->len; - if (ds & CRAM_QS && cr->len >= 0) { - if (!c->comp_hdr->codecs[DS_QS]) goto block_err; - r |= c->comp_hdr->codecs[DS_QS] - ->decode(s, c->comp_hdr->codecs[DS_QS], - blk, qual, &out_sz2); - if (r) goto block_err; - } - } else { - if (ds & CRAM_RL) - memset(qual, 255, cr->len); - } - } - - if (!c->comp_hdr->qs_seq_orient && (ds & CRAM_QS) && (cr->flags & BAM_FREVERSE)) { - int i, j; - for (i = 0, j = cr->len-1; i < j; i++, j--) { - unsigned char c; - c = qual[i]; - qual[i] = qual[j]; - qual[j] = c; - } - } - } - - pthread_mutex_lock(&fd->ref_lock); - if (refs) { - int i; - for (i = 0; i < fd->refs->nref; i++) { - if (refs[i]) - cram_ref_decr(fd->refs, i); - } - free(refs); - refs = NULL; - } else if (ref_id >= 0 && s->ref != fd->ref_free && !embed_ref) { - cram_ref_decr(fd->refs, ref_id); - } - pthread_mutex_unlock(&fd->ref_lock); - - /* Resolve mate pair cross-references between recs within this slice */ - r |= cram_decode_slice_xref(s, fd->required_fields); - - // Free the original blocks as we no longer need these. - { - int i; - for (i = 0; i < s->hdr->num_blocks; i++) { - cram_block *b = s->block[i]; - cram_free_block(b); - s->block[i] = NULL; - } - } - - // Also see initial BLOCK_RESIZE_EXACT at top of function. - // As we grow blocks we overallocate by up to 50%. So shrink - // back to their final sizes here. - // - //fprintf(stderr, "%d %d // %d %d // %d %d // %d %d\n", - // (int)s->seqs_blk->byte, (int)s->seqs_blk->alloc, - // (int)s->qual_blk->byte, (int)s->qual_blk->alloc, - // (int)s->name_blk->byte, (int)s->name_blk->alloc, - // (int)s->aux_blk->byte, (int)s->aux_blk->alloc); - BLOCK_RESIZE_EXACT(s->seqs_blk, BLOCK_SIZE(s->seqs_blk)+1); - BLOCK_RESIZE_EXACT(s->qual_blk, BLOCK_SIZE(s->qual_blk)+1); - BLOCK_RESIZE_EXACT(s->name_blk, BLOCK_SIZE(s->name_blk)+1); - BLOCK_RESIZE_EXACT(s->aux_blk, BLOCK_SIZE(s->aux_blk)+1); - - return r; - - block_err: - if (refs) { - int i; - pthread_mutex_lock(&fd->ref_lock); - for (i = 0; i < fd->refs->nref; i++) { - if (refs[i]) - cram_ref_decr(fd->refs, i); - } - free(refs); - pthread_mutex_unlock(&fd->ref_lock); - } - - return -1; -} - -typedef struct { - cram_fd *fd; - cram_container *c; - cram_slice *s; - sam_hdr_t *h; - int exit_code; -} cram_decode_job; - -void *cram_decode_slice_thread(void *arg) { - cram_decode_job *j = (cram_decode_job *)arg; - - j->exit_code = cram_decode_slice(j->fd, j->c, j->s, j->h); - - return j; -} - -/* - * Spawn a multi-threaded version of cram_decode_slice(). - */ -int cram_decode_slice_mt(cram_fd *fd, cram_container *c, cram_slice *s, - sam_hdr_t *bfd) { - cram_decode_job *j; - int nonblock; - - if (!fd->pool) - return cram_decode_slice(fd, c, s, bfd); - - if (!(j = malloc(sizeof(*j)))) - return -1; - - j->fd = fd; - j->c = c; - j->s = s; - j->h = bfd; - - nonblock = hts_tpool_process_sz(fd->rqueue) ? 1 : 0; - - int saved_errno = errno; - errno = 0; - if (-1 == hts_tpool_dispatch2(fd->pool, fd->rqueue, cram_decode_slice_thread, - j, nonblock)) { - /* Would block */ - if (errno != EAGAIN) - return -1; - fd->job_pending = j; - } else { - fd->job_pending = NULL; - } - errno = saved_errno; - - // flush too - return 0; -} - - -/* ---------------------------------------------------------------------- - * CRAM sequence iterators. - */ - -/* - * Converts a cram in-memory record into a bam in-memory record. We - * pass a pointer to a bam_seq_t pointer along with the a pointer to - * the allocated size. These can initially be pointers to NULL and zero. - * - * This function will reallocate the bam buffer as required and update - * (*bam)->alloc accordingly, allowing it to be used within a loop - * efficiently without needing to allocate new bam objects over and - * over again. - * - * Returns the used size of the bam record on success - * -1 on failure. - */ -static int cram_to_bam(sam_hdr_t *sh, cram_fd *fd, cram_slice *s, - cram_record *cr, int rec, bam_seq_t **bam) { - int ret, rg_len; - char name_a[1024], *name; - int name_len; - char *aux; - char *seq, *qual; - sam_hrecs_t *bfd = sh->hrecs; - - /* Assign names if not explicitly set */ - if (fd->required_fields & SAM_QNAME) { - if (cr->name_len) { - name = (char *)BLOCK_DATA(s->name_blk) + cr->name; - name_len = cr->name_len; - } else { - name = name_a; - if (cr->mate_line >= 0 && cr->mate_line < s->max_rec && - s->crecs[cr->mate_line].name_len > 0) { - // Copy our mate if non-zero. - memcpy(name_a, BLOCK_DATA(s->name_blk)+s->crecs[cr->mate_line].name, - s->crecs[cr->mate_line].name_len); - name = name_a + s->crecs[cr->mate_line].name_len; - } else { - // Otherwise generate a name based on prefix - name_len = strlen(fd->prefix); - memcpy(name, fd->prefix, name_len); - name += name_len; - *name++ = ':'; - if (cr->mate_line >= 0 && cr->mate_line < rec) { - name = (char *)append_uint64((unsigned char *)name, - s->hdr->record_counter + - cr->mate_line + 1); - } else { - name = (char *)append_uint64((unsigned char *)name, - s->hdr->record_counter + - rec + 1); - } - } - name_len = name - name_a; - name = name_a; - } - } else { - name = "?"; - name_len = 1; - } - - /* Generate BAM record */ - if (cr->rg < -1 || cr->rg >= bfd->nrg) - return -1; - rg_len = (cr->rg != -1) ? bfd->rg[cr->rg].name_len + 4 : 0; - - if (fd->required_fields & (SAM_SEQ | SAM_QUAL)) { - if (!BLOCK_DATA(s->seqs_blk)) - return -1; - seq = (char *)BLOCK_DATA(s->seqs_blk) + cr->seq; - } else { - seq = "*"; - cr->len = 0; - } - - if (fd->required_fields & SAM_QUAL) { - if (!BLOCK_DATA(s->qual_blk)) - return -1; - qual = (char *)BLOCK_DATA(s->qual_blk) + cr->qual; - } else { - qual = NULL; - } - - ret = bam_set1(*bam, - name_len, name, - cr->flags, cr->ref_id, cr->apos - 1, cr->mqual, - cr->ncigar, &s->cigar[cr->cigar], - cr->mate_ref_id, cr->mate_pos - 1, cr->tlen, - cr->len, seq, qual, - cr->aux_size + rg_len); - if (ret < 0) { - return ret; - } - - aux = (char *)bam_aux(*bam); - - /* Auxiliary strings */ - if (cr->aux_size != 0) { - memcpy(aux, BLOCK_DATA(s->aux_blk) + cr->aux, cr->aux_size); - aux += cr->aux_size; - (*bam)->l_data += cr->aux_size; - } - - /* RG:Z: */ - if (rg_len > 0) { - *aux++ = 'R'; *aux++ = 'G'; *aux++ = 'Z'; - int len = bfd->rg[cr->rg].name_len; - memcpy(aux, bfd->rg[cr->rg].name, len); - aux += len; - *aux++ = 0; - (*bam)->l_data += rg_len; - } - - return (*bam)->l_data; -} - -/* - * Here be dragons! The multi-threading code in this is crufty beyond belief. - */ - -/* - * Load first container. - * Called when fd->ctr is NULL> - * - * Returns container on success - * NULL on failure. - */ -static cram_container *cram_first_slice(cram_fd *fd) { - cram_container *c; - - do { - if (fd->ctr) - cram_free_container(fd->ctr); - - if (!(c = fd->ctr = cram_read_container(fd))) - return NULL; - c->curr_slice_mt = c->curr_slice; - } while (c->length == 0); - - /* - * The first container may be a result of a sub-range query. - * In which case it may still not be the optimal starting point - * due to skipped containers/slices in the index. - */ - // No need for locks here as we're in the main thread. - if (fd->range.refid != -2) { - while (c->ref_seq_id != -2 && - (c->ref_seq_id < fd->range.refid || - (fd->range.refid >= 0 && c->ref_seq_id == fd->range.refid - && c->ref_seq_start + c->ref_seq_span-1 < fd->range.start))) { - if (0 != cram_seek(fd, c->length, SEEK_CUR)) - return NULL; - cram_free_container(fd->ctr); - do { - if (!(c = fd->ctr = cram_read_container(fd))) - return NULL; - } while (c->length == 0); - } - - if (c->ref_seq_id != -2 && c->ref_seq_id != fd->range.refid) { - fd->eof = 1; - return NULL; - } - } - - if (!(c->comp_hdr_block = cram_read_block(fd))) - return NULL; - if (c->comp_hdr_block->content_type != COMPRESSION_HEADER) - return NULL; - - c->comp_hdr = cram_decode_compression_header(fd, c->comp_hdr_block); - if (!c->comp_hdr) - return NULL; - if (!c->comp_hdr->AP_delta && - sam_hrecs_sort_order(fd->header->hrecs) != ORDER_COORD) { - pthread_mutex_lock(&fd->ref_lock); - fd->unsorted = 1; - pthread_mutex_unlock(&fd->ref_lock); - } - - return c; -} - -static cram_slice *cram_next_slice(cram_fd *fd, cram_container **cp) { - cram_container *c_curr; // container being consumed via cram_get_seq() - cram_slice *s_curr = NULL; - - // Populate the first container if unknown. - if (!(c_curr = fd->ctr)) { - if (!(c_curr = cram_first_slice(fd))) - return NULL; - } - - // Discard previous slice - if ((s_curr = c_curr->slice)) { - c_curr->slice = NULL; - cram_free_slice(s_curr); - s_curr = NULL; - } - - // If we've consumed all slices in this container, also discard - // the container too. - if (c_curr->curr_slice == c_curr->max_slice) { - if (fd->ctr == c_curr) - fd->ctr = NULL; - if (fd->ctr_mt == c_curr) - fd->ctr_mt = NULL; - cram_free_container(c_curr); - c_curr = NULL; - } - - if (!fd->ctr_mt) - fd->ctr_mt = c_curr; - - // Fetch the next slice (and the container if necessary). - // - // If single threaded this loop bails out as soon as it finds - // a slice in range. In this case c_next and c_curr end up being - // the same thing. - // - // If multi-threaded, we loop until we have filled out - // thread pool input queue. Here c_next and c_curr *may* differ, as - // can fd->ctr and fd->ctr_mt. - for (;;) { - cram_container *c_next = fd->ctr_mt; - cram_slice *s_next = NULL; - - // Next slice; either from the last job we failed to push - // to the input queue or via more I/O. - if (fd->job_pending) { - cram_decode_job *j = (cram_decode_job *)fd->job_pending; - c_next = j->c; - s_next = j->s; - free(fd->job_pending); - fd->job_pending = NULL; - } else if (!fd->ooc) { - empty_container: - if (!c_next || c_next->curr_slice_mt == c_next->max_slice) { - // new container - for(;;) { - if (!(c_next = cram_read_container(fd))) { - if (fd->pool) { - fd->ooc = 1; - break; - } - - return NULL; - } - c_next->curr_slice_mt = c_next->curr_slice; - - if (c_next->length != 0) - break; - - cram_free_container(c_next); - } - if (fd->ooc) - break; - - /* Skip containers not yet spanning our range */ - if (fd->range.refid != -2 && c_next->ref_seq_id != -2) { - // ref_id beyond end of range; bail out - if (c_next->ref_seq_id != fd->range.refid) { - cram_free_container(c_next); - fd->ctr_mt = NULL; - fd->ooc = 1; - break; - } - - // position beyond end of range; bail out - if (fd->range.refid != -1 && - c_next->ref_seq_start > fd->range.end) { - cram_free_container(c_next); - fd->ctr_mt = NULL; - fd->ooc = 1; - break; - } - - // before start of range; skip to next container - if (fd->range.refid != -1 && - c_next->ref_seq_start + c_next->ref_seq_span-1 < - fd->range.start) { - c_next->curr_slice_mt = c_next->max_slice; - cram_seek(fd, c_next->length, SEEK_CUR); - cram_free_container(c_next); - c_next = NULL; - continue; - } - } - - // Container is valid range, so remember it for restarting - // this function. - fd->ctr_mt = c_next; - - if (!(c_next->comp_hdr_block = cram_read_block(fd))) - return NULL; - if (c_next->comp_hdr_block->content_type != COMPRESSION_HEADER) - return NULL; - - c_next->comp_hdr = - cram_decode_compression_header(fd, c_next->comp_hdr_block); - if (!c_next->comp_hdr) - return NULL; - - if (!c_next->comp_hdr->AP_delta && - sam_hrecs_sort_order(fd->header->hrecs) != ORDER_COORD) { - pthread_mutex_lock(&fd->ref_lock); - fd->unsorted = 1; - pthread_mutex_unlock(&fd->ref_lock); - } - } - - if (c_next->num_records == 0) { - if (fd->ctr == c_next) - fd->ctr = NULL; - if (c_curr == c_next) - c_curr = NULL; - if (fd->ctr_mt == c_next) - fd->ctr_mt = NULL; - cram_free_container(c_next); - c_next = NULL; - goto empty_container; - } - - if (!(s_next = c_next->slice = cram_read_slice(fd))) - return NULL; - - s_next->slice_num = ++c_next->curr_slice_mt; - s_next->curr_rec = 0; - s_next->max_rec = s_next->hdr->num_records; - - s_next->last_apos = s_next->hdr->ref_seq_start; - - // We know the container overlaps our range, but with multi-slice - // containers we may have slices that do not. Skip these also. - if (fd->range.refid != -2 && s_next->hdr->ref_seq_id != -2) { - // ref_id beyond end of range; bail out - if (s_next->hdr->ref_seq_id != fd->range.refid) { - fd->ooc = 1; - cram_free_slice(s_next); - c_next->slice = s_next = NULL; - break; - } - - // position beyond end of range; bail out - if (fd->range.refid != -1 && - s_next->hdr->ref_seq_start > fd->range.end) { - fd->ooc = 1; - cram_free_slice(s_next); - c_next->slice = s_next = NULL; - break; - } - - // before start of range; skip to next slice - if (fd->range.refid != -1 && - s_next->hdr->ref_seq_start + s_next->hdr->ref_seq_span-1 < - fd->range.start) { - cram_free_slice(s_next); - c_next->slice = s_next = NULL; - continue; - } - } - } // end: if (!fd->ooc) - - if (!c_next || !s_next) - break; - - // Decode the slice, either right now (non-threaded) or by pushing - // it to the a decode queue (threaded). - if (cram_decode_slice_mt(fd, c_next, s_next, fd->header) != 0) { - hts_log_error("Failure to decode slice"); - cram_free_slice(s_next); - c_next->slice = NULL; - return NULL; - } - - // No thread pool, so don't loop again - if (!fd->pool) { - c_curr = c_next; - s_curr = s_next; - break; - } - - // With thread pool, but we have a job pending so our decode queue - // is full. - if (fd->job_pending) - break; - - // Otherwise we're threaded with room in the decode input queue, so - // keep reading slices for decode. - // Push it a bit far, to qsize in queue rather than pending arrival, - // as cram tends to be a bit bursty in decode timings. - if (hts_tpool_process_len(fd->rqueue) > - hts_tpool_process_qsize(fd->rqueue)) - break; - } // end of for(;;) - - - // When not threaded we've already have c_curr and s_curr. - // Otherwise we need get them by pulling off the decode output queue. - if (fd->pool) { - hts_tpool_result *res; - cram_decode_job *j; - - if (fd->ooc && hts_tpool_process_empty(fd->rqueue)) { - fd->eof = 1; - return NULL; - } - - res = hts_tpool_next_result_wait(fd->rqueue); - - if (!res || !hts_tpool_result_data(res)) { - hts_log_error("Call to hts_tpool_next_result failed"); - return NULL; - } - - j = (cram_decode_job *)hts_tpool_result_data(res); - c_curr = j->c; - s_curr = j->s; - - if (j->exit_code != 0) { - hts_log_error("Slice decode failure"); - fd->eof = 0; - hts_tpool_delete_result(res, 1); - return NULL; - } - - hts_tpool_delete_result(res, 1); - } - - *cp = c_curr; - - // Update current slice being processed (as opposed to current - // slice in the multi-threaded reahead. - fd->ctr = c_curr; - if (c_curr) { - c_curr->slice = s_curr; - if (s_curr) - c_curr->curr_slice = s_curr->slice_num; - } - if (s_curr) - s_curr->curr_rec = 0; - else - fd->eof = 1; - - return s_curr; -} - -/* - * Read the next cram record and return it. - * Note that to decode cram_record the caller will need to look up some data - * in the current slice, pointed to by fd->ctr->slice. This is valid until - * the next call to cram_get_seq (which may invalidate it). - * - * Returns record pointer on success (do not free) - * NULL on failure - */ -cram_record *cram_get_seq(cram_fd *fd) { - cram_container *c; - cram_slice *s; - - for (;;) { - c = fd->ctr; - if (c && c->slice && c->slice->curr_rec < c->slice->max_rec) { - s = c->slice; - } else { - if (!(s = cram_next_slice(fd, &c))) - return NULL; - continue; /* In case slice contains no records */ - } - - // No need to lock here as get_seq is running in the main thread, - // which is also the same one that does the range modifications. - if (fd->range.refid != -2) { - if (fd->range.refid == -1 && s->crecs[s->curr_rec].ref_id != -1) { - // Special case when looking for unmapped blocks at end. - // If these are mixed in with mapped data (c->ref_id == -2) - // then we need skip until we find the unmapped data, if at all - s->curr_rec++; - continue; - } - if (s->crecs[s->curr_rec].ref_id < fd->range.refid && - s->crecs[s->curr_rec].ref_id != -1) { - // Looking for a mapped read, but not there yet. Special case - // as -1 (unmapped) shouldn't be considered < refid. - s->curr_rec++; - continue; - } - - if (s->crecs[s->curr_rec].ref_id != fd->range.refid) { - fd->eof = 1; - cram_free_slice(s); - c->slice = NULL; - return NULL; - } - - if (fd->range.refid != -1 && s->crecs[s->curr_rec].apos > fd->range.end) { - fd->eof = 1; - cram_free_slice(s); - c->slice = NULL; - return NULL; - } - - if (fd->range.refid != -1 && s->crecs[s->curr_rec].aend < fd->range.start) { - s->curr_rec++; - continue; - } - } - - break; - } - - fd->ctr = c; - c->slice = s; - return &s->crecs[s->curr_rec++]; -} - -/* - * Read the next cram record and convert it to a bam_seq_t struct. - * - * Returns >= 0 success (number of bytes written to *bam) - * -1 on EOF or failure (check fd->err) - */ -int cram_get_bam_seq(cram_fd *fd, bam_seq_t **bam) { - cram_record *cr; - cram_container *c; - cram_slice *s; - - if (!(cr = cram_get_seq(fd))) - return -1; - - c = fd->ctr; - s = c->slice; - - return cram_to_bam(fd->header, fd, s, cr, s->curr_rec-1, bam); -} - -/* - * Drains and frees the decode read-queue for a multi-threaded reader. - */ -void cram_drain_rqueue(cram_fd *fd) { - cram_container *lc = NULL; - - if (!fd->pool || !fd->rqueue) - return; - - // drain queue of any in-flight decode jobs - while (!hts_tpool_process_empty(fd->rqueue)) { - hts_tpool_result *r = hts_tpool_next_result_wait(fd->rqueue); - if (!r) - break; - cram_decode_job *j = (cram_decode_job *)hts_tpool_result_data(r); - if (j->c->slice == j->s) - j->c->slice = NULL; - if (j->c != lc) { - if (lc) { - if (fd->ctr == lc) - fd->ctr = NULL; - if (fd->ctr_mt == lc) - fd->ctr_mt = NULL; - cram_free_container(lc); - } - lc = j->c; - } - cram_free_slice(j->s); - hts_tpool_delete_result(r, 1); - } - - // Also tidy up any pending decode job that we didn't submit to the workers - // due to the input queue being full. - if (fd->job_pending) { - cram_decode_job *j = (cram_decode_job *)fd->job_pending; - if (j->c->slice == j->s) - j->c->slice = NULL; - if (j->c != lc) { - if (lc) { - if (fd->ctr == lc) - fd->ctr = NULL; - if (fd->ctr_mt == lc) - fd->ctr_mt = NULL; - cram_free_container(lc); - } - lc = j->c; - } - cram_free_slice(j->s); - free(j); - fd->job_pending = NULL; - } - - if (lc) { - if (fd->ctr == lc) - fd->ctr = NULL; - if (fd->ctr_mt == lc) - fd->ctr_mt = NULL; - cram_free_container(lc); - } -} diff --git a/src/htslib-1.19.1/cram/cram_decode.h b/src/htslib-1.19.1/cram/cram_decode.h deleted file mode 100644 index 400eb6b..0000000 --- a/src/htslib-1.19.1/cram/cram_decode.h +++ /dev/null @@ -1,117 +0,0 @@ -/* -Copyright (c) 2012-2013, 2018 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/*! \file - * Include cram.h instead. - * - * This is an internal part of the CRAM system and is automatically included - * when you #include cram.h. - * - * Implements the decoding portion of CRAM I/O. Also see - * cram_codecs.[ch] for the actual encoding functions themselves. - */ - -#ifndef CRAM_DECODE_H -#define CRAM_DECODE_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* ---------------------------------------------------------------------- - * CRAM sequence iterators. - */ - -/*! Read the next cram record and return it as a cram_record. - * - * Note that to decode cram_record the caller will need to look up some data - * in the current slice, pointed to by fd->ctr->slice. This is valid until - * the next call to cram_get_seq (which may invalidate it). - * - * @return - * Returns record pointer on success (do not free); - * NULL on failure - */ -cram_record *cram_get_seq(cram_fd *fd); - -/*! Read the next cram record and convert it to a bam_seq_t struct. - * - * @return - * Returns 0 on success; - * -1 on EOF or failure (check fd->err) - */ -int cram_get_bam_seq(cram_fd *fd, bam_seq_t **bam); - - -/* ---------------------------------------------------------------------- - * Internal functions - */ - -/*! INTERNAL: - * Decodes a CRAM block compression header. - * - * @return - * Returns header ptr on success; - * NULL on failure - */ -cram_block_compression_hdr *cram_decode_compression_header(cram_fd *fd, - cram_block *b); - -/*! INTERNAL: - * Decodes a CRAM (un)mapped slice header block. - * - * @return - * Returns slice header ptr on success; - * NULL on failure - */ -cram_block_slice_hdr *cram_decode_slice_header(cram_fd *fd, cram_block *b); - - -/*! INTERNAL: - * Decode an entire slice from container blocks. Fills out s->crecs[] array. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_decode_slice(cram_fd *fd, cram_container *c, cram_slice *s, - sam_hdr_t *hdr); - - -/* - * Drains and frees the decode read-queue for a multi-threaded reader. - */ -void cram_drain_rqueue(cram_fd *fd); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.19.1/cram/cram_encode.c b/src/htslib-1.19.1/cram/cram_encode.c deleted file mode 100644 index 9651abd..0000000 --- a/src/htslib-1.19.1/cram/cram_encode.c +++ /dev/null @@ -1,4155 +0,0 @@ -/* -Copyright (c) 2012-2020, 2022-2024 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cram.h" -#include "os.h" -#include "../sam_internal.h" // for nibble2base -#include "../htslib/hts.h" -#include "../htslib/hts_endian.h" -#include "../textutils_internal.h" - -KHASH_MAP_INIT_STR(m_s2u64, uint64_t) - -#define Z_CRAM_STRAT Z_FILTERED -//#define Z_CRAM_STRAT Z_RLE -//#define Z_CRAM_STRAT Z_HUFFMAN_ONLY -//#define Z_CRAM_STRAT Z_DEFAULT_STRATEGY - -static int process_one_read(cram_fd *fd, cram_container *c, - cram_slice *s, cram_record *cr, - bam_seq_t *b, int rnum, kstring_t *MD, - int embed_ref, int no_ref); - -/* - * Returns index of val into key. - * Basically strchr(key, val)-key; - */ -static int sub_idx(char *key, char val) { - int i; - - for (i = 0; i < 4 && *key++ != val; i++); - return i; -} - -/* - * Encodes a compression header block into a generic cram_block structure. - * - * Returns cram_block ptr on success - * NULL on failure - */ -cram_block *cram_encode_compression_header(cram_fd *fd, cram_container *c, - cram_block_compression_hdr *h, - int embed_ref) { - cram_block *cb = cram_new_block(COMPRESSION_HEADER, 0); - cram_block *map = cram_new_block(COMPRESSION_HEADER, 0); - int i, mc, r = 0; - - int no_ref = c->no_ref; - - if (!cb || !map) - return NULL; - - /* - * This is a concatenation of several blocks of data: - * header + landmarks, preservation map, read encoding map, and the tag - * encoding map. - * All 4 are variable sized and we need to know how large these are - * before creating the compression header itself as this starts with - * the total size (stored as a variable length string). - */ - - // Duplicated from container itself, and removed in 1.1 - if (CRAM_MAJOR_VERS(fd->version) == 1) { - r |= itf8_put_blk(cb, h->ref_seq_id); - r |= itf8_put_blk(cb, h->ref_seq_start); - r |= itf8_put_blk(cb, h->ref_seq_span); - r |= itf8_put_blk(cb, h->num_records); - r |= itf8_put_blk(cb, h->num_landmarks); - for (i = 0; i < h->num_landmarks; i++) { - r |= itf8_put_blk(cb, h->landmark[i]); - } - } - - if (h->preservation_map) { - kh_destroy(map, h->preservation_map); - h->preservation_map = NULL; - } - - /* Create in-memory preservation map */ - /* FIXME: should create this when we create the container */ - if (c->num_records > 0) { - khint_t k; - int r; - - if (!(h->preservation_map = kh_init(map))) - return NULL; - - k = kh_put(map, h->preservation_map, "RN", &r); - if (-1 == r) return NULL; - kh_val(h->preservation_map, k).i = !fd->lossy_read_names; - - if (CRAM_MAJOR_VERS(fd->version) == 1) { - k = kh_put(map, h->preservation_map, "PI", &r); - if (-1 == r) return NULL; - kh_val(h->preservation_map, k).i = 0; - - k = kh_put(map, h->preservation_map, "UI", &r); - if (-1 == r) return NULL; - kh_val(h->preservation_map, k).i = 1; - - k = kh_put(map, h->preservation_map, "MI", &r); - if (-1 == r) return NULL; - kh_val(h->preservation_map, k).i = 1; - - } else { - // Technically SM was in 1.0, but wasn't in Java impl. - k = kh_put(map, h->preservation_map, "SM", &r); - if (-1 == r) return NULL; - kh_val(h->preservation_map, k).i = 0; - - k = kh_put(map, h->preservation_map, "TD", &r); - if (-1 == r) return NULL; - kh_val(h->preservation_map, k).i = 0; - - k = kh_put(map, h->preservation_map, "AP", &r); - if (-1 == r) return NULL; - kh_val(h->preservation_map, k).i = h->AP_delta; - - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - k = kh_put(map, h->preservation_map, "QO", &r); - if (-1 == r) return NULL; - kh_val(h->preservation_map, k).i = h->qs_seq_orient; - } - - if (no_ref || embed_ref>0) { - // Reference Required == No - k = kh_put(map, h->preservation_map, "RR", &r); - if (-1 == r) return NULL; - kh_val(h->preservation_map, k).i = 0; - } - } - } - - /* Encode preservation map; could collapse this and above into one */ - mc = 0; - BLOCK_SIZE(map) = 0; - if (h->preservation_map) { - khint_t k; - - for (k = kh_begin(h->preservation_map); - k != kh_end(h->preservation_map); - k++) { - const char *key; - khash_t(map) *pmap = h->preservation_map; - - - if (!kh_exist(pmap, k)) - continue; - - key = kh_key(pmap, k); - BLOCK_APPEND(map, key, 2); - - switch(CRAM_KEY(key[0], key[1])) { - case CRAM_KEY('M','I'): - case CRAM_KEY('U','I'): - case CRAM_KEY('P','I'): - case CRAM_KEY('A','P'): - case CRAM_KEY('R','N'): - case CRAM_KEY('R','R'): - case CRAM_KEY('Q','O'): - BLOCK_APPEND_CHAR(map, kh_val(pmap, k).i); - break; - - case CRAM_KEY('S','M'): { - char smat[5], *mp = smat; - // Output format is for order ACGTN (minus ref base) - // to store the code value 0-3 for each symbol. - // - // Note this is different to storing the symbols in order - // that the codes occur from 0-3, which is what we used to - // do. (It didn't matter as we always had a fixed table in - // the order.) - *mp++ = - (sub_idx(h->substitution_matrix[0], 'C') << 6) | - (sub_idx(h->substitution_matrix[0], 'G') << 4) | - (sub_idx(h->substitution_matrix[0], 'T') << 2) | - (sub_idx(h->substitution_matrix[0], 'N') << 0); - *mp++ = - (sub_idx(h->substitution_matrix[1], 'A') << 6) | - (sub_idx(h->substitution_matrix[1], 'G') << 4) | - (sub_idx(h->substitution_matrix[1], 'T') << 2) | - (sub_idx(h->substitution_matrix[1], 'N') << 0); - *mp++ = - (sub_idx(h->substitution_matrix[2], 'A') << 6) | - (sub_idx(h->substitution_matrix[2], 'C') << 4) | - (sub_idx(h->substitution_matrix[2], 'T') << 2) | - (sub_idx(h->substitution_matrix[2], 'N') << 0); - *mp++ = - (sub_idx(h->substitution_matrix[3], 'A') << 6) | - (sub_idx(h->substitution_matrix[3], 'C') << 4) | - (sub_idx(h->substitution_matrix[3], 'G') << 2) | - (sub_idx(h->substitution_matrix[3], 'N') << 0); - *mp++ = - (sub_idx(h->substitution_matrix[4], 'A') << 6) | - (sub_idx(h->substitution_matrix[4], 'C') << 4) | - (sub_idx(h->substitution_matrix[4], 'G') << 2) | - (sub_idx(h->substitution_matrix[4], 'T') << 0); - BLOCK_APPEND(map, smat, 5); - break; - } - - case CRAM_KEY('T','D'): { - r |= (fd->vv.varint_put32_blk(map, BLOCK_SIZE(h->TD_blk)) <= 0); - BLOCK_APPEND(map, - BLOCK_DATA(h->TD_blk), - BLOCK_SIZE(h->TD_blk)); - break; - } - - default: - hts_log_warning("Unknown preservation key '%.2s'", key); - break; - } - - mc++; - } - } - r |= (fd->vv.varint_put32_blk(cb, BLOCK_SIZE(map) + fd->vv.varint_size(mc)) <= 0); - r |= (fd->vv.varint_put32_blk(cb, mc) <= 0); - BLOCK_APPEND(cb, BLOCK_DATA(map), BLOCK_SIZE(map)); - - /* rec encoding map */ - mc = 0; - BLOCK_SIZE(map) = 0; - if (h->codecs[DS_BF]) { - if (-1 == h->codecs[DS_BF]->store(h->codecs[DS_BF], map, "BF", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_CF]) { - if (-1 == h->codecs[DS_CF]->store(h->codecs[DS_CF], map, "CF", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_RL]) { - if (-1 == h->codecs[DS_RL]->store(h->codecs[DS_RL], map, "RL", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_AP]) { - if (-1 == h->codecs[DS_AP]->store(h->codecs[DS_AP], map, "AP", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_RG]) { - if (-1 == h->codecs[DS_RG]->store(h->codecs[DS_RG], map, "RG", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_MF]) { - if (-1 == h->codecs[DS_MF]->store(h->codecs[DS_MF], map, "MF", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_NS]) { - if (-1 == h->codecs[DS_NS]->store(h->codecs[DS_NS], map, "NS", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_NP]) { - if (-1 == h->codecs[DS_NP]->store(h->codecs[DS_NP], map, "NP", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_TS]) { - if (-1 == h->codecs[DS_TS]->store(h->codecs[DS_TS], map, "TS", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_NF]) { - if (-1 == h->codecs[DS_NF]->store(h->codecs[DS_NF], map, "NF", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_TC]) { - if (-1 == h->codecs[DS_TC]->store(h->codecs[DS_TC], map, "TC", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_TN]) { - if (-1 == h->codecs[DS_TN]->store(h->codecs[DS_TN], map, "TN", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_TL]) { - if (-1 == h->codecs[DS_TL]->store(h->codecs[DS_TL], map, "TL", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_FN]) { - if (-1 == h->codecs[DS_FN]->store(h->codecs[DS_FN], map, "FN", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_FC]) { - if (-1 == h->codecs[DS_FC]->store(h->codecs[DS_FC], map, "FC", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_FP]) { - if (-1 == h->codecs[DS_FP]->store(h->codecs[DS_FP], map, "FP", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_BS]) { - if (-1 == h->codecs[DS_BS]->store(h->codecs[DS_BS], map, "BS", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_IN]) { - if (-1 == h->codecs[DS_IN]->store(h->codecs[DS_IN], map, "IN", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_DL]) { - if (-1 == h->codecs[DS_DL]->store(h->codecs[DS_DL], map, "DL", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_BA]) { - if (-1 == h->codecs[DS_BA]->store(h->codecs[DS_BA], map, "BA", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_BB]) { - if (-1 == h->codecs[DS_BB]->store(h->codecs[DS_BB], map, "BB", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_MQ]) { - if (-1 == h->codecs[DS_MQ]->store(h->codecs[DS_MQ], map, "MQ", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_RN]) { - if (-1 == h->codecs[DS_RN]->store(h->codecs[DS_RN], map, "RN", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_QS]) { - if (-1 == h->codecs[DS_QS]->store(h->codecs[DS_QS], map, "QS", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_QQ]) { - if (-1 == h->codecs[DS_QQ]->store(h->codecs[DS_QQ], map, "QQ", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_RI]) { - if (-1 == h->codecs[DS_RI]->store(h->codecs[DS_RI], map, "RI", - fd->version)) - return NULL; - mc++; - } - if (CRAM_MAJOR_VERS(fd->version) != 1) { - if (h->codecs[DS_SC]) { - if (-1 == h->codecs[DS_SC]->store(h->codecs[DS_SC], map, "SC", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_RS]) { - if (-1 == h->codecs[DS_RS]->store(h->codecs[DS_RS], map, "RS", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_PD]) { - if (-1 == h->codecs[DS_PD]->store(h->codecs[DS_PD], map, "PD", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_HC]) { - if (-1 == h->codecs[DS_HC]->store(h->codecs[DS_HC], map, "HC", - fd->version)) - return NULL; - mc++; - } - } - if (h->codecs[DS_TM]) { - if (-1 == h->codecs[DS_TM]->store(h->codecs[DS_TM], map, "TM", - fd->version)) - return NULL; - mc++; - } - if (h->codecs[DS_TV]) { - if (-1 == h->codecs[DS_TV]->store(h->codecs[DS_TV], map, "TV", - fd->version)) - return NULL; - mc++; - } - r |= (fd->vv.varint_put32_blk(cb, BLOCK_SIZE(map) + fd->vv.varint_size(mc)) <= 0); - r |= (fd->vv.varint_put32_blk(cb, mc) <= 0); - BLOCK_APPEND(cb, BLOCK_DATA(map), BLOCK_SIZE(map)); - - /* tag encoding map */ - mc = 0; - BLOCK_SIZE(map) = 0; - if (c->tags_used) { - khint_t k; - - for (k = kh_begin(c->tags_used); k != kh_end(c->tags_used); k++) { - int key; - if (!kh_exist(c->tags_used, k)) - continue; - - key = kh_key(c->tags_used, k); - cram_codec *cd = kh_val(c->tags_used, k)->codec; - - r |= (fd->vv.varint_put32_blk(map, key) <= 0); - if (-1 == cd->store(cd, map, NULL, fd->version)) - return NULL; - - mc++; - } - } - - r |= (fd->vv.varint_put32_blk(cb, BLOCK_SIZE(map) + fd->vv.varint_size(mc)) <= 0); - r |= (fd->vv.varint_put32_blk(cb, mc) <= 0); - BLOCK_APPEND(cb, BLOCK_DATA(map), BLOCK_SIZE(map)); - - hts_log_info("Wrote compression block header in %d bytes", (int)BLOCK_SIZE(cb)); - - BLOCK_UPLEN(cb); - - cram_free_block(map); - - if (r >= 0) - return cb; - - block_err: - return NULL; -} - - -/* - * Encodes a slice compression header. - * - * Returns cram_block on success - * NULL on failure - */ -cram_block *cram_encode_slice_header(cram_fd *fd, cram_slice *s) { - char *buf; - char *cp; - cram_block *b = cram_new_block(MAPPED_SLICE, 0); - int j; - - if (!b) - return NULL; - - cp = buf = malloc(22+16+5*(8+s->hdr->num_blocks)); - if (NULL == buf) { - cram_free_block(b); - return NULL; - } - - cp += fd->vv.varint_put32s(cp, NULL, s->hdr->ref_seq_id); - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - cp += fd->vv.varint_put64(cp, NULL, s->hdr->ref_seq_start); - cp += fd->vv.varint_put64(cp, NULL, s->hdr->ref_seq_span); - } else { - if (s->hdr->ref_seq_start < 0 || s->hdr->ref_seq_start > INT_MAX) { - hts_log_error("Reference position too large for CRAM 3"); - cram_free_block(b); - free(buf); - return NULL; - } - cp += fd->vv.varint_put32(cp, NULL, s->hdr->ref_seq_start); - cp += fd->vv.varint_put32(cp, NULL, s->hdr->ref_seq_span); - } - cp += fd->vv.varint_put32(cp, NULL, s->hdr->num_records); - if (CRAM_MAJOR_VERS(fd->version) == 2) - cp += fd->vv.varint_put32(cp, NULL, s->hdr->record_counter); - else if (CRAM_MAJOR_VERS(fd->version) >= 3) - cp += fd->vv.varint_put64(cp, NULL, s->hdr->record_counter); - cp += fd->vv.varint_put32(cp, NULL, s->hdr->num_blocks); - cp += fd->vv.varint_put32(cp, NULL, s->hdr->num_content_ids); - for (j = 0; j < s->hdr->num_content_ids; j++) { - cp += fd->vv.varint_put32(cp, NULL, s->hdr->block_content_ids[j]); - } - if (s->hdr->content_type == MAPPED_SLICE) - cp += fd->vv.varint_put32(cp, NULL, s->hdr->ref_base_id); - - if (CRAM_MAJOR_VERS(fd->version) != 1) { - memcpy(cp, s->hdr->md5, 16); cp += 16; - } - - assert(cp-buf <= 22+16+5*(8+s->hdr->num_blocks)); - - b->data = (unsigned char *)buf; - b->comp_size = b->uncomp_size = cp-buf; - - return b; -} - - -/* - * Encodes a single read. - * - * Returns 0 on success - * -1 on failure - */ -static int cram_encode_slice_read(cram_fd *fd, - cram_container *c, - cram_block_compression_hdr *h, - cram_slice *s, - cram_record *cr, - int64_t *last_pos) { - int r = 0; - int32_t i32; - int64_t i64; - unsigned char uc; - - //fprintf(stderr, "Encode seq %d, %d/%d FN=%d, %s\n", rec, core->byte, core->bit, cr->nfeature, s->name_ds->str + cr->name); - - //printf("BF=0x%x\n", cr->flags); - // bf = cram_flag_swap[cr->flags]; - i32 = fd->cram_flag_swap[cr->flags & 0xfff]; - r |= h->codecs[DS_BF]->encode(s, h->codecs[DS_BF], (char *)&i32, 1); - - i32 = cr->cram_flags & CRAM_FLAG_MASK; - r |= h->codecs[DS_CF]->encode(s, h->codecs[DS_CF], (char *)&i32, 1); - - if (CRAM_MAJOR_VERS(fd->version) != 1 && s->hdr->ref_seq_id == -2) - r |= h->codecs[DS_RI]->encode(s, h->codecs[DS_RI], (char *)&cr->ref_id, 1); - - r |= h->codecs[DS_RL]->encode(s, h->codecs[DS_RL], (char *)&cr->len, 1); - - if (c->pos_sorted) { - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - i64 = cr->apos - *last_pos; - r |= h->codecs[DS_AP]->encode(s, h->codecs[DS_AP], (char *)&i64, 1); - } else { - i32 = cr->apos - *last_pos; - r |= h->codecs[DS_AP]->encode(s, h->codecs[DS_AP], (char *)&i32, 1); - } - *last_pos = cr->apos; - } else { - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - i64 = cr->apos; - r |= h->codecs[DS_AP]->encode(s, h->codecs[DS_AP], (char *)&i64, 1); - } else { - i32 = cr->apos; - r |= h->codecs[DS_AP]->encode(s, h->codecs[DS_AP], (char *)&i32, 1); - } - } - - r |= h->codecs[DS_RG]->encode(s, h->codecs[DS_RG], (char *)&cr->rg, 1); - - if (cr->cram_flags & CRAM_FLAG_DETACHED) { - i32 = cr->mate_flags; - r |= h->codecs[DS_MF]->encode(s, h->codecs[DS_MF], (char *)&i32, 1); - - r |= h->codecs[DS_NS]->encode(s, h->codecs[DS_NS], - (char *)&cr->mate_ref_id, 1); - - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - r |= h->codecs[DS_NP]->encode(s, h->codecs[DS_NP], - (char *)&cr->mate_pos, 1); - r |= h->codecs[DS_TS]->encode(s, h->codecs[DS_TS], - (char *)&cr->tlen, 1); - } else { - i32 = cr->mate_pos; - r |= h->codecs[DS_NP]->encode(s, h->codecs[DS_NP], - (char *)&i32, 1); - i32 = cr->tlen; - r |= h->codecs[DS_TS]->encode(s, h->codecs[DS_TS], - (char *)&i32, 1); - } - } else { - if (cr->cram_flags & CRAM_FLAG_MATE_DOWNSTREAM) { - r |= h->codecs[DS_NF]->encode(s, h->codecs[DS_NF], - (char *)&cr->mate_line, 1); - } - if (cr->cram_flags & CRAM_FLAG_EXPLICIT_TLEN) { - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - r |= h->codecs[DS_TS]->encode(s, h->codecs[DS_TS], - (char *)&cr->tlen, 1); - } - } - } - - /* Aux tags */ - if (CRAM_MAJOR_VERS(fd->version) == 1) { - int j; - uc = cr->ntags; - r |= h->codecs[DS_TC]->encode(s, h->codecs[DS_TC], (char *)&uc, 1); - - for (j = 0; j < cr->ntags; j++) { - uint32_t i32 = s->TN[cr->TN_idx + j]; // id - r |= h->codecs[DS_TN]->encode(s, h->codecs[DS_TN], (char *)&i32, 1); - } - } else { - r |= h->codecs[DS_TL]->encode(s, h->codecs[DS_TL], (char *)&cr->TL, 1); - } - - // qual - // QS codec : Already stored in block[2]. - - // features (diffs) - if (!(cr->flags & BAM_FUNMAP)) { - int prev_pos = 0, j; - - r |= h->codecs[DS_FN]->encode(s, h->codecs[DS_FN], - (char *)&cr->nfeature, 1); - for (j = 0; j < cr->nfeature; j++) { - cram_feature *f = &s->features[cr->feature + j]; - - uc = f->X.code; - r |= h->codecs[DS_FC]->encode(s, h->codecs[DS_FC], (char *)&uc, 1); - i32 = f->X.pos - prev_pos; - r |= h->codecs[DS_FP]->encode(s, h->codecs[DS_FP], (char *)&i32, 1); - prev_pos = f->X.pos; - - switch(f->X.code) { - //char *seq; - - case 'X': - //fprintf(stderr, " FC=%c FP=%d base=%d\n", f->X.code, i32, f->X.base); - - uc = f->X.base; - r |= h->codecs[DS_BS]->encode(s, h->codecs[DS_BS], - (char *)&uc, 1); - break; - case 'S': - // Already done - //r |= h->codecs[DS_SC]->encode(s, h->codecs[DS_SC], - // BLOCK_DATA(s->soft_blk) + f->S.seq_idx, - // f->S.len); - - //if (CRAM_MAJOR_VERS(fd->version) >= 3) { - // r |= h->codecs[DS_BB]->encode(s, h->codecs[DS_BB], - // BLOCK_DATA(s->seqs_blk) + f->S.seq_idx, - // f->S.len); - //} - break; - case 'I': - //seq = DSTRING_STR(s->seqs_ds) + f->S.seq_idx; - //r |= h->codecs[DS_IN]->encode(s, h->codecs[DS_IN], - // seq, f->S.len); - //if (CRAM_MAJOR_VERS(fd->version) >= 3) { - // r |= h->codecs[DS_BB]->encode(s, h->codecs[DS_BB], - // BLOCK_DATA(s->seqs_blk) + f->I.seq_idx, - // f->I.len); - //} - break; - case 'i': - uc = f->i.base; - r |= h->codecs[DS_BA]->encode(s, h->codecs[DS_BA], - (char *)&uc, 1); - //seq = DSTRING_STR(s->seqs_ds) + f->S.seq_idx; - //r |= h->codecs[DS_IN]->encode(s, h->codecs[DS_IN], - // seq, 1); - break; - case 'D': - i32 = f->D.len; - r |= h->codecs[DS_DL]->encode(s, h->codecs[DS_DL], - (char *)&i32, 1); - break; - - case 'B': - // // Used when we try to store a non ACGTN base or an N - // // that aligns against a non ACGTN reference - - uc = f->B.base; - r |= h->codecs[DS_BA]->encode(s, h->codecs[DS_BA], - (char *)&uc, 1); - - // Already added - // uc = f->B.qual; - // r |= h->codecs[DS_QS]->encode(s, h->codecs[DS_QS], - // (char *)&uc, 1); - break; - - case 'b': - // string of bases - r |= h->codecs[DS_BB]->encode(s, h->codecs[DS_BB], - (char *)BLOCK_DATA(s->seqs_blk) - + f->b.seq_idx, - f->b.len); - break; - - case 'Q': - // Already added - // uc = f->B.qual; - // r |= h->codecs[DS_QS]->encode(s, h->codecs[DS_QS], - // (char *)&uc, 1); - break; - - case 'N': - i32 = f->N.len; - r |= h->codecs[DS_RS]->encode(s, h->codecs[DS_RS], - (char *)&i32, 1); - break; - - case 'P': - i32 = f->P.len; - r |= h->codecs[DS_PD]->encode(s, h->codecs[DS_PD], - (char *)&i32, 1); - break; - - case 'H': - i32 = f->H.len; - r |= h->codecs[DS_HC]->encode(s, h->codecs[DS_HC], - (char *)&i32, 1); - break; - - - default: - hts_log_error("Unhandled feature code %c", f->X.code); - return -1; - } - } - - r |= h->codecs[DS_MQ]->encode(s, h->codecs[DS_MQ], - (char *)&cr->mqual, 1); - } else { - char *seq = (char *)BLOCK_DATA(s->seqs_blk) + cr->seq; - if (cr->len) - r |= h->codecs[DS_BA]->encode(s, h->codecs[DS_BA], seq, cr->len); - } - - return r ? -1 : 0; -} - - -/* - * Applies various compression methods to specific blocks, depending on - * known observations of how data series compress. - * - * Returns 0 on success - * -1 on failure - */ -static int cram_compress_slice(cram_fd *fd, cram_container *c, cram_slice *s) { - int level = fd->level, i; - int method = 1<version >= (3<<8)+1); - - /* Compress the CORE Block too, with minimal zlib level */ - if (level > 5 && s->block[0]->uncomp_size > 500) - cram_compress_block2(fd, s, s->block[0], NULL, 1<use_bz2) - method |= 1<use_rans) { - method_ranspr = (1< 1) - method_ranspr |= - (1< 5) - method_ranspr |= (1<use_rans) { - methodF |= v31_or_above ? method_ranspr : method_rans; - method |= v31_or_above ? method_ranspr : method_rans; - } - - int method_arith = 0; - if (fd->use_arith) { - method_arith = (1< 1) - method_arith |= - (1<use_arith && v31_or_above) { - methodF |= method_arith; - method |= method_arith; - } - - if (fd->use_lzma) - method |= (1<= 5) { - method |= 1<use_fqz) { - qmethod |= 1<level > 4) { - qmethod |= 1<level > 6) { - qmethod |= (1<metrics_lock); - for (i = 0; i < DS_END; i++) - if (c->stats[i] && c->stats[i]->nvals > 16) - fd->m[i]->unpackable = 1; - pthread_mutex_unlock(&fd->metrics_lock); - - /* Specific compression methods for certain block types */ - if (cram_compress_block2(fd, s, s->block[DS_IN], fd->m[DS_IN], //IN (seq) - method, level)) - return -1; - - if (fd->level == 0) { - /* Do nothing */ - } else if (fd->level == 1) { - if (cram_compress_block2(fd, s, s->block[DS_QS], fd->m[DS_QS], - qmethodF, 1)) - return -1; - for (i = DS_aux; i <= DS_aux_oz; i++) { - if (s->block[i]) - if (cram_compress_block2(fd, s, s->block[i], fd->m[i], - method, 1)) - return -1; - } - } else if (fd->level < 3) { - if (cram_compress_block2(fd, s, s->block[DS_QS], fd->m[DS_QS], - qmethod, 1)) - return -1; - if (cram_compress_block2(fd, s, s->block[DS_BA], fd->m[DS_BA], - method, 1)) - return -1; - if (s->block[DS_BB]) - if (cram_compress_block2(fd, s, s->block[DS_BB], fd->m[DS_BB], - method, 1)) - return -1; - for (i = DS_aux; i <= DS_aux_oz; i++) { - if (s->block[i]) - if (cram_compress_block2(fd, s, s->block[i], fd->m[i], - method, level)) - return -1; - } - } else { - if (cram_compress_block2(fd, s, s->block[DS_QS], fd->m[DS_QS], - qmethod, level)) - return -1; - if (cram_compress_block2(fd, s, s->block[DS_BA], fd->m[DS_BA], - method, level)) - return -1; - if (s->block[DS_BB]) - if (cram_compress_block2(fd, s, s->block[DS_BB], fd->m[DS_BB], - method, level)) - return -1; - for (i = DS_aux; i <= DS_aux_oz; i++) { - if (s->block[i]) - if (cram_compress_block2(fd, s, s->block[i], fd->m[i], - method, level)) - return -1; - } - } - - // NAME: best is generally xz, bzip2, zlib then rans1 - int method_rn = method & ~(method_rans | method_ranspr | 1<version >= (3<<8)+1 && fd->use_tok) - method_rn |= fd->use_arith ? (1<block[DS_RN], fd->m[DS_RN], - method_rn, level)) - return -1; - - // NS shows strong local correlation as rearrangements are localised - if (s->block[DS_NS] && s->block[DS_NS] != s->block[0]) - if (cram_compress_block2(fd, s, s->block[DS_NS], fd->m[DS_NS], - method, level)) - return -1; - - - /* - * Compress any auxiliary tags with their own per-tag metrics - */ - { - int i; - for (i = DS_END /*num_blk - naux_blk*/; i < s->hdr->num_blocks; i++) { - if (!s->block[i] || s->block[i] == s->block[0]) - continue; - - if (s->block[i]->method != RAW) - continue; - - if (cram_compress_block2(fd, s, s->block[i], s->block[i]->m, - method, level)) - return -1; - } - } - - /* - * Minimal compression of any block still uncompressed, bar CORE - */ - { - int i; - for (i = 1; i < s->hdr->num_blocks && i < DS_END; i++) { - if (!s->block[i] || s->block[i] == s->block[0]) - continue; - - if (s->block[i]->method != RAW) - continue; - - if (cram_compress_block2(fd, s, s->block[i], fd->m[i], - methodF, level)) - return -1; - } - } - - return 0; -} - -/* - * Allocates a block associated with the cram codec associated with - * data series ds_id or the internal codec_id (depending on codec - * type). - * - * The ds_ids are what end up written to disk as an external block. - * The c_ids are internal and used when daisy-chaining transforms - * such as MAP and RLE. These blocks are also allocated, but - * are ephemeral in nature. (The codecs themselves cannot allocate - * these as the same codec pointer may be operating on multiple slices - * if we're using a multi-slice container.) - * - * Returns 0 on success - * -1 on failure - */ -static int cram_allocate_block(cram_codec *codec, cram_slice *s, int ds_id) { - if (!codec) - return 0; - - switch(codec->codec) { - // Codecs which are hard-coded to use the CORE block - case E_GOLOMB: - case E_HUFFMAN: - case E_BETA: - case E_SUBEXP: - case E_GOLOMB_RICE: - case E_GAMMA: - codec->out = s->block[0]; - break; - - // Codecs which don't use external blocks - case E_CONST_BYTE: - case E_CONST_INT: - codec->out = NULL; - break; - - // Codecs that emit directly to external blocks - case E_EXTERNAL: - case E_VARINT_UNSIGNED: - case E_VARINT_SIGNED: - if (!(s->block[ds_id] = cram_new_block(EXTERNAL, ds_id))) - return -1; - codec->u.external.content_id = ds_id; - codec->out = s->block[ds_id]; - break; - - case E_BYTE_ARRAY_STOP: // Why no sub-codec? - if (!(s->block[ds_id] = cram_new_block(EXTERNAL, ds_id))) - return -1; - codec->u.byte_array_stop.content_id = ds_id; - codec->out = s->block[ds_id]; - break; - - - // Codecs that contain sub-codecs which may in turn emit to external blocks - case E_BYTE_ARRAY_LEN: { - cram_codec *bal = codec->u.e_byte_array_len.len_codec; - if (cram_allocate_block(bal, s, bal->u.external.content_id)) - return -1; - bal = codec->u.e_byte_array_len.val_codec; - if (cram_allocate_block(bal, s, bal->u.external.content_id)) - return -1; - - break; - } - - case E_XRLE: - if (cram_allocate_block(codec->u.e_xrle.len_codec, s, ds_id)) - //ds_id == DS_QS ? DS_QS_len : ds_id)) - return -1; - if (cram_allocate_block(codec->u.e_xrle.lit_codec, s, ds_id)) - return -1; - - break; - - case E_XPACK: - if (cram_allocate_block(codec->u.e_xpack.sub_codec, s, ds_id)) - return -1; - codec->out = cram_new_block(0, 0); // ephemeral - if (!codec->out) - return -1; - - break; - - case E_XDELTA: - if (cram_allocate_block(codec->u.e_xdelta.sub_codec, s, ds_id)) - return -1; - codec->out = cram_new_block(0, 0); // ephemeral - if (!codec->out) - return -1; - - break; - - default: - break; - } - - return 0; -} - -/* - * Encodes a single slice from a container - * - * Returns 0 on success - * -1 on failure - */ -static int cram_encode_slice(cram_fd *fd, cram_container *c, - cram_block_compression_hdr *h, cram_slice *s, - int embed_ref) { - int rec, r = 0; - int64_t last_pos; - enum cram_DS_ID id; - - /* - * Slice external blocks: - * ID 0 => base calls (insertions, soft-clip) - * ID 1 => qualities - * ID 2 => names - * ID 3 => TS (insert size), NP (next frag) - * ID 4 => tag values - * ID 6 => tag IDs (TN), if CRAM_V1.0 - * ID 7 => TD tag dictionary, if !CRAM_V1.0 - */ - - /* Create cram slice header */ - s->hdr->ref_base_id = embed_ref>0 && s->hdr->ref_seq_span > 0 - ? DS_ref - : (CRAM_MAJOR_VERS(fd->version) >= 4 ? 0 : -1); - s->hdr->record_counter = c->num_records + c->record_counter; - c->num_records += s->hdr->num_records; - - int ntags = c->tags_used ? c->tags_used->n_occupied : 0; - s->block = calloc(DS_END + ntags*2, sizeof(s->block[0])); - s->hdr->block_content_ids = malloc(DS_END * sizeof(int32_t)); - if (!s->block || !s->hdr->block_content_ids) - return -1; - - // Create first fixed blocks, always external. - // CORE - if (!(s->block[0] = cram_new_block(CORE, 0))) - return -1; - - // TN block for CRAM v1 - if (CRAM_MAJOR_VERS(fd->version) == 1) { - if (h->codecs[DS_TN]->codec == E_EXTERNAL) { - if (!(s->block[DS_TN] = cram_new_block(EXTERNAL,DS_TN))) return -1; - h->codecs[DS_TN]->u.external.content_id = DS_TN; - } else { - s->block[DS_TN] = s->block[0]; - } - } - - // Embedded reference - if (embed_ref>0) { - if (!(s->block[DS_ref] = cram_new_block(EXTERNAL, DS_ref))) - return -1; - s->ref_id = DS_ref; // needed? - BLOCK_APPEND(s->block[DS_ref], - c->ref + s->hdr->ref_seq_start - c->ref_start, - s->hdr->ref_seq_span); - } - - /* - * All the data-series blocks if appropriate. - */ - for (id = DS_QS; id < DS_TN; id++) { - if (cram_allocate_block(h->codecs[id], s, id) < 0) - return -1; - } - - /* - * Add in the external tag blocks too. - */ - if (c->tags_used) { - int n; - s->hdr->num_blocks = DS_END; - for (n = 0; n < s->naux_block; n++) { - s->block[s->hdr->num_blocks++] = s->aux_block[n]; - s->aux_block[n] = NULL; - } - } - - /* Encode reads */ - last_pos = s->hdr->ref_seq_start; - for (rec = 0; rec < s->hdr->num_records; rec++) { - cram_record *cr = &s->crecs[rec]; - if (cram_encode_slice_read(fd, c, h, s, cr, &last_pos) == -1) - return -1; - } - - s->block[0]->uncomp_size = s->block[0]->byte + (s->block[0]->bit < 7); - s->block[0]->comp_size = s->block[0]->uncomp_size; - - // Make sure the fixed blocks point to the correct sources - if (s->block[DS_IN]) cram_free_block(s->block[DS_IN]); - s->block[DS_IN] = s->base_blk; s->base_blk = NULL; - if (s->block[DS_QS]) cram_free_block(s->block[DS_QS]); - s->block[DS_QS] = s->qual_blk; s->qual_blk = NULL; - if (s->block[DS_RN]) cram_free_block(s->block[DS_RN]); - s->block[DS_RN] = s->name_blk; s->name_blk = NULL; - if (s->block[DS_SC]) cram_free_block(s->block[DS_SC]); - s->block[DS_SC] = s->soft_blk; s->soft_blk = NULL; - - // Finalise any data transforms. - for (id = DS_QS; id < DS_TN; id++) { - if (h->codecs[id] && h->codecs[id]->flush) - h->codecs[id]->flush(h->codecs[id]); - } - - // Ensure block sizes are up to date. - for (id = 1; id < s->hdr->num_blocks; id++) { - if (!s->block[id] || s->block[id] == s->block[0]) - continue; - - if (s->block[id]->uncomp_size == 0) - BLOCK_UPLEN(s->block[id]); - } - - // Compress it all - if (cram_compress_slice(fd, c, s) == -1) - return -1; - - // Collapse empty blocks and create hdr_block - { - int i, j; - - s->hdr->block_content_ids = realloc(s->hdr->block_content_ids, - s->hdr->num_blocks * sizeof(int32_t)); - if (!s->hdr->block_content_ids) - return -1; - - for (i = j = 1; i < s->hdr->num_blocks; i++) { - if (!s->block[i] || s->block[i] == s->block[0]) - continue; - if (s->block[i]->uncomp_size == 0) { - cram_free_block(s->block[i]); - s->block[i] = NULL; - continue; - } - s->block[j] = s->block[i]; - s->hdr->block_content_ids[j-1] = s->block[i]->content_id; - j++; - } - s->hdr->num_content_ids = j-1; - s->hdr->num_blocks = j; - - if (!(s->hdr_block = cram_encode_slice_header(fd, s))) - return -1; - } - - return r ? -1 : 0; - - block_err: - return -1; -} - -static inline const char *bam_data_end(bam1_t *b) { - return (const char *)b->data + b->l_data; -} - -/* - * A bounds checking version of bam_aux2i. - */ -static inline int bam_aux2i_end(const uint8_t *aux, const uint8_t *aux_end) { - int type = *aux++; - switch (type) { - case 'c': - if (aux_end - aux < 1) { - errno = EINVAL; - return 0; - } - return *(int8_t *)aux; - case 'C': - if (aux_end - aux < 1) { - errno = EINVAL; - return 0; - } - return *aux; - case 's': - if (aux_end - aux < 2) { - errno = EINVAL; - return 0; - } - return le_to_i16(aux); - case 'S': - if (aux_end - aux < 2) { - errno = EINVAL; - return 0; - } - return le_to_u16(aux); - case 'i': - if (aux_end - aux < 4) { - errno = EINVAL; - return 0; - } - return le_to_i32(aux); - case 'I': - if (aux_end - aux < 4) { - errno = EINVAL; - return 0; - } - return le_to_u32(aux); - default: - errno = EINVAL; - } - return 0; -} - -/* - * Returns the number of expected read names for this record. - */ -static int expected_template_count(bam_seq_t *b) { - int expected = bam_flag(b) & BAM_FPAIRED ? 2 : 1; - - uint8_t *TC = (uint8_t *)bam_aux_get(b, "TC"); - if (TC) { - int n = bam_aux2i_end(TC, (uint8_t *)bam_data_end(b)); - if (expected < n) - expected = n; - } - - if (!TC && bam_aux_get(b, "SA")) { - // We could count the semicolons, but we'd have to do this for - // read1, read2 and read(not-1-or-2) combining the results - // together. This is a cheap and safe alternative for now. - expected = INT_MAX; - } - - return expected; -} - -/* - * Lossily reject read names. - * - * The rule here is that if all reads for this template reside in the - * same slice then we can lose the name. Otherwise we keep them as we - * do not know when (or if) the other reads will turn up. - * - * Note there may be only 1 read (non-paired library) or more than 2 - * reads (paired library with supplementary reads), or other weird - * setups. We need to know how many are expected. Ways to guess: - * - * - Flags (0x1 - has > 1 read) - * - TC aux field (not mandatory) - * - SA tags (count semicolons, NB per fragment so sum - hard) - * - RNEXT/PNEXT uniqueness count. (not implemented, tricky) - * - * Returns 0 on success - * -1 on failure - */ -static int lossy_read_names(cram_fd *fd, cram_container *c, cram_slice *s, - int bam_start) { - int r1, r2, ret = -1; - - // Initialise cram_flags - for (r2 = 0; r2 < s->hdr->num_records; r2++) - s->crecs[r2].cram_flags = 0; - - if (!fd->lossy_read_names) - return 0; - - khash_t(m_s2u64) *names = kh_init(m_s2u64); - if (!names) - goto fail; - - // 1: Iterate through names to count frequency - for (r1 = bam_start, r2 = 0; r2 < s->hdr->num_records; r1++, r2++) { - //cram_record *cr = &s->crecs[r2]; - bam_seq_t *b = c->bams[r1]; - khint_t k; - int n; - uint64_t e; - union { - uint64_t i64; - struct { - int32_t e,c; // expected & observed counts. - } counts; - } u; - - e = expected_template_count(b); - u.counts.e = e; u.counts.c = 1; - - k = kh_put(m_s2u64, names, bam_name(b), &n); - if (n == -1) - goto fail; - - if (n == 0) { - // not a new name - u.i64 = kh_val(names, k); - if (u.counts.e != e) { - // different expectation or already hit the max - //fprintf(stderr, "Err computing no. %s recs\n", bam_name(b)); - kh_val(names, k) = 0; - } else { - u.counts.c++; - if (u.counts.e == u.counts.c) { - // Reached expected count. - kh_val(names, k) = -1; - } else { - kh_val(names, k) = u.i64; - } - } - } else { - // new name - kh_val(names, k) = u.i64; - } - } - - // 2: Remove names if all present (hd.i == -1) - for (r1 = bam_start, r2 = 0; r2 < s->hdr->num_records; r1++, r2++) { - cram_record *cr = &s->crecs[r2]; - bam_seq_t *b = c->bams[r1]; - khint_t k; - - k = kh_get(m_s2u64, names, bam_name(b)); - - if (k == kh_end(names)) - goto fail; - - if (kh_val(names, k) == -1) - cr->cram_flags = CRAM_FLAG_DISCARD_NAME; - } - - ret = 0; - fail: // ret==-1 - - if (names) - kh_destroy(m_s2u64, names); - - return ret; -} - -/* - * Adds the reading names. We do this here as a separate pass rather - * than per record in the process_one_read calls as that function can - * go back and change the CRAM_FLAG_DETACHED status of a previously - * processed read if it subsequently determines the TLEN field is - * incorrect. Given DETACHED reads always try to decode read names, - * we need to know their status before generating the read-name block. - * - * Output is an update s->name_blk, and cr->name / cr->name_len - * fields. - */ -static int add_read_names(cram_fd *fd, cram_container *c, cram_slice *s, - int bam_start) { - int r1, r2; - int keep_names = !fd->lossy_read_names; - - for (r1 = bam_start, r2 = 0; - r1 < c->curr_c_rec && r2 < s->hdr->num_records; - r1++, r2++) { - cram_record *cr = &s->crecs[r2]; - bam_seq_t *b = c->bams[r1]; - - cr->name = BLOCK_SIZE(s->name_blk); - if ((cr->cram_flags & CRAM_FLAG_DETACHED) || keep_names) { - if (CRAM_MAJOR_VERS(fd->version) >= 4 - && (cr->cram_flags & CRAM_FLAG_MATE_DOWNSTREAM) - && cr->mate_line) { - // Dedup read names in V4 - BLOCK_APPEND(s->name_blk, "\0", 1); - cr->name_len = 1; - } else { - BLOCK_APPEND(s->name_blk, bam_name(b), bam_name_len(b)); - cr->name_len = bam_name_len(b); - } - } else { - // Can only discard duplicate names if not detached - cr->name_len = 0; - } - - if (cram_stats_add(c->stats[DS_RN], cr->name_len) < 0) - goto block_err; - } - - return 0; - - block_err: - return -1; -} - -// CRAM version >= 3.1 -#define CRAM_ge31(v) ((v) >= 0x301) - -// Returns the next cigar op code: one of the BAM_C* codes, -// or -1 if no more are present. -static inline -int next_cigar_op(uint32_t *cigar, uint32_t ncigar, int *skip, int *spos, - uint32_t *cig_ind, uint32_t *cig_op, uint32_t *cig_len) { - for(;;) { - while (*cig_len == 0) { - if (*cig_ind < ncigar) { - *cig_op = cigar[*cig_ind] & BAM_CIGAR_MASK; - *cig_len = cigar[*cig_ind] >> BAM_CIGAR_SHIFT; - (*cig_ind)++; - } else { - return -1; - } - } - - if (skip[*cig_op]) { - *spos += (bam_cigar_type(*cig_op)&1) * *cig_len; - *cig_len = 0; - continue; - } - - (*cig_len)--; - break; - } - - return *cig_op; -} - -// Ensure ref and hist are large enough. -static inline int extend_ref(char **ref, uint32_t (**hist)[5], hts_pos_t pos, - hts_pos_t ref_start, hts_pos_t *ref_end) { - if (pos < ref_start) - return -1; - if (pos < *ref_end) - return 0; - - // realloc - if (pos - ref_start > UINT_MAX) - return -2; // protect overflow in new_end calculation - - hts_pos_t old_end = *ref_end ? *ref_end : ref_start; - hts_pos_t new_end = ref_start + 1000 + (pos-ref_start)*1.5; - - // Refuse to work on excessively large blocks. - // We'll just switch to referenceless encoding, which is probably better - // here as this must be very sparse data anyway. - if (new_end - ref_start > UINT_MAX/sizeof(**hist)/2) - return -2; - - char *tmp = realloc(*ref, new_end-ref_start+1); - if (!tmp) - return -1; - *ref = tmp; - - uint32_t (*tmp5)[5] = realloc(**hist, - (new_end - ref_start)*sizeof(**hist)); - if (!tmp5) - return -1; - *hist = tmp5; - *ref_end = new_end; - - // initialise - old_end -= ref_start; - new_end -= ref_start; - memset(&(*ref)[old_end], 0, new_end-old_end); - memset(&(*hist)[old_end], 0, (new_end-old_end)*sizeof(**hist)); - - return 0; -} - -// Walk through MD + seq to generate ref -// Returns 1 on success, <0 on failure -static int cram_add_to_ref_MD(bam1_t *b, char **ref, uint32_t (**hist)[5], - hts_pos_t ref_start, hts_pos_t *ref_end, - const uint8_t *MD) { - uint8_t *seq = bam_get_seq(b); - uint32_t *cigar = bam_get_cigar(b); - uint32_t ncigar = b->core.n_cigar; - uint32_t cig_op = 0, cig_len = 0, cig_ind = 0; - - int iseq = 0, next_op; - hts_pos_t iref = b->core.pos - ref_start; - - // Skip INS, REF_SKIP, *CLIP, PAD. and BACK. - static int cig_skip[16] = {0,1,0,1,1,1,1,0,0,1,1,1,1,1,1,1}; - while (iseq < b->core.l_qseq && *MD) { - if (isdigit(*MD)) { - // match - int overflow = 0; - int len = hts_str2uint((char *)MD, (char **)&MD, 31, &overflow); - if (overflow || - extend_ref(ref, hist, iref+ref_start + len, - ref_start, ref_end) < 0) - return -1; - while (iseq < b->core.l_qseq && len) { - // rewrite to have internal loops? - if ((next_op = next_cigar_op(cigar, ncigar, cig_skip, - &iseq, &cig_ind, &cig_op, - &cig_len)) < 0) - return -1; - - if (next_op != BAM_CMATCH && - next_op != BAM_CEQUAL) { - hts_log_info("MD:Z and CIGAR are incompatible for " - "record %s", bam_get_qname(b)); - return -1; - } - - // Short-cut loop over same cigar op for efficiency - cig_len++; - do { - cig_len--; - (*ref)[iref++] = seq_nt16_str[bam_seqi(seq, iseq)]; - iseq++; - len--; - } while (cig_len && iseq < b->core.l_qseq && len); - } - if (len > 0) - return -1; // MD is longer than seq - } else if (*MD == '^') { - // deletion - MD++; - while (isalpha(*MD)) { - if (extend_ref(ref, hist, iref+ref_start, ref_start, - ref_end) < 0) - return -1; - if ((next_op = next_cigar_op(cigar, ncigar, cig_skip, - &iseq, &cig_ind, &cig_op, - &cig_len)) < 0) - return -1; - - if (next_op != BAM_CDEL) { - hts_log_info("MD:Z and CIGAR are incompatible"); - return -1; - } - - (*ref)[iref++] = *MD++ & ~0x20; - } - } else { - // substitution - if (extend_ref(ref, hist, iref+ref_start, ref_start, ref_end) < 0) - return -1; - if ((next_op = next_cigar_op(cigar, ncigar, cig_skip, - &iseq, &cig_ind, &cig_op, - &cig_len)) < 0) - return -1; - - if (next_op != BAM_CMATCH && next_op != BAM_CDIFF) { - hts_log_info("MD:Z and CIGAR are incompatible"); - return -1; - } - - (*ref)[iref++] = *MD++ & ~0x20; - iseq++; - } - } - - return 1; -} - -// Append a sequence to a ref/consensus structure. -// We maintain both an absolute refefence (ACGTN where MD:Z is -// present) and a 5-way frequency array for when no MD:Z is known. -// We then subsequently convert the 5-way frequencies to a consensus -// ref in a second pass. -// -// Returns >=0 on success, -// -1 on failure (eg inconsistent data) -static int cram_add_to_ref(bam1_t *b, char **ref, uint32_t (**hist)[5], - hts_pos_t ref_start, hts_pos_t *ref_end) { - const uint8_t *MD = bam_aux_get(b, "MD"); - int ret = 0; - if (MD && *MD == 'Z') { - // We can use MD to directly compute the reference - int ret = cram_add_to_ref_MD(b, ref, hist, ref_start, ref_end, MD+1); - - if (ret > 0) - return ret; - } - - // Otherwise we just use SEQ+CIGAR and build a consensus which we later - // turn into a fake reference - uint32_t *cigar = bam_get_cigar(b); - uint32_t ncigar = b->core.n_cigar; - uint32_t i, j; - hts_pos_t iseq = 0, iref = b->core.pos - ref_start; - uint8_t *seq = bam_get_seq(b); - for (i = 0; i < ncigar; i++) { - switch (bam_cigar_op(cigar[i])) { - case BAM_CSOFT_CLIP: - case BAM_CINS: - iseq += bam_cigar_oplen(cigar[i]); - break; - - case BAM_CMATCH: - case BAM_CEQUAL: - case BAM_CDIFF: { - int len = bam_cigar_oplen(cigar[i]); - // Maps an nt16 (A=1 C=2 G=4 T=8 bits) to 0123 plus N=4 - static uint8_t L16[16] = {4,0,1,4, 2,4,4,4, 3,4,4,4, 4,4,4,4}; - - if (extend_ref(ref, hist, iref+ref_start + len, - ref_start, ref_end) < 0) - return -1; - if (iseq + len <= b->core.l_qseq) { - // Nullify failed MD:Z if appropriate - if (ret < 0) - memset(&(*ref)[iref], 0, len); - - for (j = 0; j < len; j++, iref++, iseq++) - (*hist)[iref][L16[bam_seqi(seq, iseq)]]++; - } else { - // Probably a 2ndary read with seq "*" - iseq += len; - iref += len; - } - break; - } - - case BAM_CDEL: - case BAM_CREF_SKIP: - iref += bam_cigar_oplen(cigar[i]); - } - } - - return 1; -} - -// Automatically generates the reference and stashed it in c->ref, also -// setting c->ref_start and c->ref_end. -// -// If we have MD:Z tags then we use them to directly infer the reference, -// along with SEQ + CIGAR. Otherwise we use SEQ/CIGAR only to build up -// a consensus and then assume the reference as the majority rule. -// -// In this latter scenario we need to be wary of auto-generating MD and NM -// during decode, but that's handled elsewhere via an additional aux tag. -// -// Returns 0 on success, -// -1 on failure -static int cram_generate_reference(cram_container *c, cram_slice *s, int r1) { - // TODO: if we can find an external reference then use it, even if the - // user told us to do embed_ref=2. - char *ref = NULL; - uint32_t (*hist)[5] = NULL; - hts_pos_t ref_start = c->bams[r1]->core.pos, ref_end = 0; - if (ref_start < 0) - return -1; // cannot build consensus from unmapped data - - // initial allocation - if (extend_ref(&ref, &hist, - c->bams[r1 + s->hdr->num_records-1]->core.pos + - c->bams[r1 + s->hdr->num_records-1]->core.l_qseq, - ref_start, &ref_end) < 0) - return -1; - - // Add each bam file to the reference/consensus arrays - int r2; - hts_pos_t last_pos = -1; - for (r2 = 0; r1 < c->curr_c_rec && r2 < s->hdr->num_records; r1++, r2++) { - if (c->bams[r1]->core.pos < last_pos) { - hts_log_error("Cannot build reference with unsorted data"); - goto err; - } - last_pos = c->bams[r1]->core.pos; - if (cram_add_to_ref(c->bams[r1], &ref, &hist, ref_start, &ref_end) < 0) - goto err; - } - - // Compute the consensus - hts_pos_t i; - for (i = 0; i < ref_end-ref_start; i++) { - if (!ref[i]) { - int max_v = 0, max_j = 4, j; - for (j = 0; j < 4; j++) - // don't call N (j==4) unless no coverage - if (max_v < hist[i][j]) - max_v = hist[i][j], max_j = j; - ref[i] = "ACGTN"[max_j]; - } - } - free(hist); - - // Put the reference in place so it appears to be an external - // ref file. - c->ref = ref; - c->ref_start = ref_start+1; - c->ref_end = ref_end+1; - c->ref_free = 1; - - return 0; - - err: - free(ref); - free(hist); - return -1; -} - -// Check if the SQ M5 tag matches the reference we've loaded. -static int validate_md5(cram_fd *fd, int ref_id) { - if (fd->ignore_md5 || ref_id < 0 || ref_id >= fd->refs->nref) - return 0; - - // Have we already checked this ref? - if (fd->refs->ref_id[ref_id]->validated_md5) - return 0; - - // Check if we have the MD5 known. - // We should, but maybe we're using embedded references? - sam_hrecs_t *hrecs = fd->header->hrecs; - sam_hrec_type_t *ty = sam_hrecs_find_type_id(hrecs, "SQ", "SN", - hrecs->ref[ref_id].name); - if (!ty) - return 0; - - sam_hrec_tag_t *m5tag = sam_hrecs_find_key(ty, "M5", NULL); - if (!m5tag) - return 0; - - // It's known, so compute md5 on the loaded reference sequence. - char *ref = fd->refs->ref_id[ref_id]->seq; - int64_t len = fd->refs->ref_id[ref_id]->length; - hts_md5_context *md5; - char unsigned buf[16]; - char buf2[33]; - - if (!(md5 = hts_md5_init())) - return -1; - hts_md5_update(md5, ref, len); - hts_md5_final(buf, md5); - hts_md5_destroy(md5); - hts_md5_hex(buf2, buf); - - // Compare it to header @SQ M5 tag - if (strcmp(m5tag->str+3, buf2)) { - hts_log_error("SQ header M5 tag discrepancy for reference '%s'", - hrecs->ref[ref_id].name); - hts_log_error("Please use the correct reference, or " - "consider using embed_ref=2"); - return -1; - } - fd->refs->ref_id[ref_id]->validated_md5 = 1; - - return 0; -} - -/* - * Encodes all slices in a container into blocks. - * Returns 0 on success - * -1 on failure - */ -int cram_encode_container(cram_fd *fd, cram_container *c) { - int i, j, slice_offset; - cram_block_compression_hdr *h = c->comp_hdr; - cram_block *c_hdr; - int multi_ref = 0; - int r1, r2, sn, nref, embed_ref, no_ref; - spare_bams *spares; - - if (!c->bams) - goto err; - - if (CRAM_MAJOR_VERS(fd->version) == 1) - goto err; - -//#define goto_err {fprintf(stderr, "ERR at %s:%d\n", __FILE__, __LINE__);goto err;} -#define goto_err goto err - - // Don't try embed ref if we repeatedly fail - pthread_mutex_lock(&fd->ref_lock); - int failed_embed = (fd->no_ref_counter >= 5); // maximum 5 tries - if (!failed_embed && c->embed_ref == -2) { - hts_log_warning("Retrying embed_ref=2 mode for #%d/5", fd->no_ref_counter); - fd->no_ref = c->no_ref = 0; - fd->embed_ref = c->embed_ref = 2; - } else if (failed_embed && c->embed_ref == -2) { - // We've tried several times, so this time give up for good - hts_log_warning("Keeping non-ref mode from now on"); - fd->embed_ref = c->embed_ref = 0; - } - pthread_mutex_unlock(&fd->ref_lock); - - restart: - /* Cache references up-front if we have unsorted access patterns */ - pthread_mutex_lock(&fd->ref_lock); - nref = fd->refs->nref; - pthread_mutex_unlock(&fd->ref_lock); - embed_ref = c->embed_ref; - no_ref = c->no_ref; - - /* To create M5 strings */ - /* Fetch reference sequence */ - if (!no_ref) { - if (!c->bams || !c->curr_c_rec || !c->bams[0]) - goto_err; - bam_seq_t *b = c->bams[0]; - - if (embed_ref <= 1) { - char *ref = cram_get_ref(fd, bam_ref(b), 1, 0); - if (!ref && bam_ref(b) >= 0) { - if (!c->pos_sorted) { - // TODO: maybe also check fd->no_ref? - hts_log_warning("Failed to load reference #%d", - bam_ref(b)); - hts_log_warning("Switching to non-ref mode"); - - pthread_mutex_lock(&fd->ref_lock); - c->embed_ref = fd->embed_ref = 0; - c->no_ref = fd->no_ref = 1; - pthread_mutex_unlock(&fd->ref_lock); - goto restart; - } - - if (c->multi_seq || embed_ref == 0) { - hts_log_error("Failed to load reference #%d", bam_ref(b)); - return -1; - } - hts_log_warning("Failed to load reference #%d", bam_ref(b)); - hts_log_warning("Enabling embed_ref=2 mode to auto-generate" - " reference"); - if (embed_ref <= 0) - hts_log_warning("NOTE: the CRAM file will be bigger than" - " using an external reference"); - pthread_mutex_lock(&fd->ref_lock); - embed_ref = c->embed_ref = fd->embed_ref = 2; - pthread_mutex_unlock(&fd->ref_lock); - goto auto_ref; - } else if (ref) { - if (validate_md5(fd, c->ref_seq_id) < 0) - goto_err; - } - if ((c->ref_id = bam_ref(b)) >= 0) { - c->ref_seq_id = c->ref_id; - c->ref = fd->refs->ref_id[c->ref_seq_id]->seq; - c->ref_start = 1; - c->ref_end = fd->refs->ref_id[c->ref_seq_id]->length; - } - } else { - auto_ref: - // Auto-embed ref. - // This starts as 'N' and is amended on-the-fly as we go - // based on MD:Z tags. - if ((c->ref_id = bam_ref(b)) >= 0) { - c->ref = NULL; - // c->ref_free is boolean; whether to free c->ref. In this - // case c->ref will be our auto-embedded sequence instead of - // a "global" portion of reference from fd->refs. - // Do not confuse with fd->ref_free which is a pointer to a - // reference string to free. - c->ref_free = 1; - } - } - c->ref_seq_id = c->ref_id; - } else { - c->ref_id = bam_ref(c->bams[0]); - cram_ref_incr(fd->refs, c->ref_id); - c->ref_seq_id = c->ref_id; - } - - if (!no_ref && c->refs_used) { - for (i = 0; i < nref; i++) { - if (c->refs_used[i]) { - if (cram_get_ref(fd, i, 1, 0)) { - if (validate_md5(fd, i) < 0) - goto_err; - } else { - hts_log_warning("Failed to find reference, " - "switching to non-ref mode"); - no_ref = c->no_ref = 1; - } - } - } - } - - /* Turn bams into cram_records and gather basic stats */ - for (r1 = sn = 0; r1 < c->curr_c_rec; sn++) { - cram_slice *s = c->slices[sn]; - int64_t first_base = INT64_MAX, last_base = INT64_MIN; - - int r1_start = r1; - - assert(sn < c->curr_slice); - - // Discover which read names *may* be safely removed. - // Ie which ones have all their records in this slice. - if (lossy_read_names(fd, c, s, r1_start) != 0) - return -1; - - // Tracking of MD tags so we can spot when the auto-generated values - // will differ from the current stored ones. The kstring here is - // simply to avoid excessive malloc and free calls. All initialisation - // is done within process_one_read(). - kstring_t MD = {0}; - - // Embed consensus / MD-generated ref - if (embed_ref == 2) { - if (cram_generate_reference(c, s, r1) < 0) { - // Should this be a permanent thing via fd->no_ref? - // Doing so means we cannot easily switch back again should - // things fix themselves later on. This is likely not a - // concern though as failure to generate a reference implies - // unsorted data which is rarely recovered from. - - // Only if sn == 0. We're hosed if we're on the 2nd slice and - // the first worked, as no-ref is a container global param. - if (sn > 0) { - hts_log_error("Failed to build reference, " - "switching to non-ref mode"); - return -1; - } else { - hts_log_warning("Failed to build reference, " - "switching to non-ref mode"); - } - pthread_mutex_lock(&fd->ref_lock); - c->embed_ref = fd->embed_ref = -2; // was previously embed_ref - c->no_ref = fd->no_ref = 1; - fd->no_ref_counter++; // more likely to keep permanent action - pthread_mutex_unlock(&fd->ref_lock); - failed_embed = 1; - goto restart; - } else { - pthread_mutex_lock(&fd->ref_lock); - fd->no_ref_counter -= (fd->no_ref_counter > 0); - pthread_mutex_unlock(&fd->ref_lock); - } - } - - // Iterate through records creating the cram blocks for some - // fields and just gathering stats for others. - for (r2 = 0; r1 < c->curr_c_rec && r2 < s->hdr->num_records; r1++, r2++) { - cram_record *cr = &s->crecs[r2]; - bam_seq_t *b = c->bams[r1]; - - /* If multi-ref we need to cope with changing reference per seq */ - if (c->multi_seq && !no_ref) { - if (bam_ref(b) != c->ref_seq_id && bam_ref(b) >= 0) { - if (c->ref_seq_id >= 0) - cram_ref_decr(fd->refs, c->ref_seq_id); - - if (!cram_get_ref(fd, bam_ref(b), 1, 0)) { - hts_log_error("Failed to load reference #%d", bam_ref(b)); - free(MD.s); - return -1; - } - if (validate_md5(fd, bam_ref(b)) < 0) - return -1; - - c->ref_seq_id = bam_ref(b); // overwritten later by -2 - if (!fd->refs->ref_id[c->ref_seq_id]->seq) - return -1; - c->ref = fd->refs->ref_id[c->ref_seq_id]->seq; - c->ref_start = 1; - c->ref_end = fd->refs->ref_id[c->ref_seq_id]->length; - } - } - - if (process_one_read(fd, c, s, cr, b, r2, &MD, embed_ref, - no_ref) != 0) { - free(MD.s); - return -1; - } - - if (first_base > cr->apos) - first_base = cr->apos; - - if (last_base < cr->aend) - last_base = cr->aend; - } - - free(MD.s); - - // Process_one_read doesn't add read names as it can change - // its mind during the loop on the CRAM_FLAG_DETACHED setting - // of earlier records (if it detects the auto-generation of - // TLEN is incorrect). This affects which read-names can be - // lossily compressed, so we do these in another pass. - if (add_read_names(fd, c, s, r1_start) < 0) - return -1; - - if (c->multi_seq) { - s->hdr->ref_seq_id = -2; - s->hdr->ref_seq_start = 0; - s->hdr->ref_seq_span = 0; - } else if (c->ref_id == -1 && CRAM_ge31(fd->version)) { - // Spec states span=0, but it broke our range queries. - // See commit message for this and prior. - s->hdr->ref_seq_id = -1; - s->hdr->ref_seq_start = 0; - s->hdr->ref_seq_span = 0; - } else { - s->hdr->ref_seq_id = c->ref_id; - s->hdr->ref_seq_start = first_base; - s->hdr->ref_seq_span = MAX(0, last_base - first_base + 1); - } - s->hdr->num_records = r2; - - // Processed a slice, now stash the aux blocks so the next - // slice can start aggregating them from the start again. - if (c->tags_used->n_occupied) { - int ntags = c->tags_used->n_occupied; - s->aux_block = calloc(ntags*2, sizeof(*s->aux_block)); - if (!s->aux_block) - return -1; - - khint_t k; - - s->naux_block = 0; - for (k = kh_begin(c->tags_used); k != kh_end(c->tags_used); k++) { - if (!kh_exist(c->tags_used, k)) - continue; - - cram_tag_map *tm = kh_val(c->tags_used, k); - if (!tm) goto_err; - if (!tm->blk) continue; - s->aux_block[s->naux_block++] = tm->blk; - tm->blk = NULL; - if (!tm->blk2) continue; - s->aux_block[s->naux_block++] = tm->blk2; - tm->blk2 = NULL; - } - assert(s->naux_block <= 2*c->tags_used->n_occupied); - } - } - - if (c->multi_seq && !no_ref) { - if (c->ref_seq_id >= 0) - cram_ref_decr(fd->refs, c->ref_seq_id); - } - - /* Link our bams[] array onto the spare bam list for reuse */ - spares = malloc(sizeof(*spares)); - if (!spares) goto_err; - pthread_mutex_lock(&fd->bam_list_lock); - spares->bams = c->bams; - spares->next = fd->bl; - fd->bl = spares; - pthread_mutex_unlock(&fd->bam_list_lock); - c->bams = NULL; - - /* Detect if a multi-seq container */ - cram_stats_encoding(fd, c->stats[DS_RI]); - multi_ref = c->stats[DS_RI]->nvals > 1; - pthread_mutex_lock(&fd->metrics_lock); - fd->last_RI_count = c->stats[DS_RI]->nvals; - pthread_mutex_unlock(&fd->metrics_lock); - - - if (multi_ref) { - hts_log_info("Multi-ref container"); - c->ref_seq_id = -2; - c->ref_seq_start = 0; - c->ref_seq_span = 0; - } - - - /* Compute MD5s */ - no_ref = c->no_ref; - int is_v4 = CRAM_MAJOR_VERS(fd->version) >= 4 ? 1 : 0; - - for (i = 0; i < c->curr_slice; i++) { - cram_slice *s = c->slices[i]; - - if (CRAM_MAJOR_VERS(fd->version) != 1) { - if (s->hdr->ref_seq_id >= 0 && c->multi_seq == 0 && !no_ref) { - hts_md5_context *md5 = hts_md5_init(); - if (!md5) - return -1; - hts_md5_update(md5, - c->ref + s->hdr->ref_seq_start - c->ref_start, - s->hdr->ref_seq_span); - hts_md5_final(s->hdr->md5, md5); - hts_md5_destroy(md5); - } else { - memset(s->hdr->md5, 0, 16); - } - } - } - - c->num_records = 0; - c->num_blocks = 1; // cram_block_compression_hdr - c->length = 0; - - //fprintf(stderr, "=== BF ===\n"); - h->codecs[DS_BF] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_BF]), - c->stats[DS_BF], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_BF]->nvals && !h->codecs[DS_BF]) goto_err; - - //fprintf(stderr, "=== CF ===\n"); - h->codecs[DS_CF] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_CF]), - c->stats[DS_CF], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_CF]->nvals && !h->codecs[DS_CF]) goto_err; - - //fprintf(stderr, "=== RN ===\n"); - //h->codecs[DS_RN] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_RN]), - // c->stats[DS_RN], E_BYTE_ARRAY, NULL, - // fd->version); - - //fprintf(stderr, "=== AP ===\n"); - if (c->pos_sorted || CRAM_MAJOR_VERS(fd->version) >= 4) { - if (c->pos_sorted) - h->codecs[DS_AP] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_AP]), - c->stats[DS_AP], - is_v4 ? E_LONG : E_INT, - NULL, fd->version, &fd->vv); - else - // Unsorted data has no stats, but hard-code VARINT_SIGNED / EXT. - h->codecs[DS_AP] = cram_encoder_init(is_v4 ? E_VARINT_SIGNED - : E_EXTERNAL, - NULL, - is_v4 ? E_LONG : E_INT, - NULL, fd->version, &fd->vv); - } else { - // Removed BETA in v4.0. - // Should we consider dropping use of it for 3.0 too? - hts_pos_t p[2] = {0, c->max_apos}; - h->codecs[DS_AP] = cram_encoder_init(E_BETA, NULL, - is_v4 ? E_LONG : E_INT, - p, fd->version, &fd->vv); -// cram_xdelta_encoder e; -// e.word_size = is_v4 ? 8 : 4; -// e.sub_encoding = E_EXTERNAL; -// e.sub_codec_dat = (void *)DS_AP; -// -// h->codecs[DS_AP] = cram_encoder_init(E_XDELTA, NULL, -// is_v4 ? E_LONG : E_INT, -// &e, fd->version, &fd->vv); - } - if (!h->codecs[DS_AP]) goto_err; - - //fprintf(stderr, "=== RG ===\n"); - h->codecs[DS_RG] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_RG]), - c->stats[DS_RG], - E_INT, - NULL, - fd->version, &fd->vv); - if (c->stats[DS_RG]->nvals && !h->codecs[DS_RG]) goto_err; - - //fprintf(stderr, "=== MQ ===\n"); - h->codecs[DS_MQ] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_MQ]), - c->stats[DS_MQ], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_MQ]->nvals && !h->codecs[DS_MQ]) goto_err; - - //fprintf(stderr, "=== NS ===\n"); - h->codecs[DS_NS] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_NS]), - c->stats[DS_NS], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_NS]->nvals && !h->codecs[DS_NS]) goto_err; - - //fprintf(stderr, "=== MF ===\n"); - h->codecs[DS_MF] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_MF]), - c->stats[DS_MF], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_MF]->nvals && !h->codecs[DS_MF]) goto_err; - - //fprintf(stderr, "=== TS ===\n"); - h->codecs[DS_TS] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_TS]), - c->stats[DS_TS], - is_v4 ? E_LONG : E_INT, - NULL, fd->version, &fd->vv); - if (c->stats[DS_TS]->nvals && !h->codecs[DS_TS]) goto_err; - - //fprintf(stderr, "=== NP ===\n"); - h->codecs[DS_NP] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_NP]), - c->stats[DS_NP], - is_v4 ? E_LONG : E_INT, - NULL, fd->version, &fd->vv); - if (c->stats[DS_NP]->nvals && !h->codecs[DS_NP]) goto_err; - - //fprintf(stderr, "=== NF ===\n"); - h->codecs[DS_NF] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_NF]), - c->stats[DS_NF], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_NF]->nvals && !h->codecs[DS_NF]) goto_err; - - //fprintf(stderr, "=== RL ===\n"); - h->codecs[DS_RL] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_RL]), - c->stats[DS_RL], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_RL]->nvals && !h->codecs[DS_RL]) goto_err; - - //fprintf(stderr, "=== FN ===\n"); - h->codecs[DS_FN] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_FN]), - c->stats[DS_FN], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_FN]->nvals && !h->codecs[DS_FN]) goto_err; - - //fprintf(stderr, "=== FC ===\n"); - h->codecs[DS_FC] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_FC]), - c->stats[DS_FC], E_BYTE, NULL, - fd->version, &fd->vv); - if (c->stats[DS_FC]->nvals && !h->codecs[DS_FC]) goto_err; - - //fprintf(stderr, "=== FP ===\n"); - h->codecs[DS_FP] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_FP]), - c->stats[DS_FP], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_FP]->nvals && !h->codecs[DS_FP]) goto_err; - - //fprintf(stderr, "=== DL ===\n"); - h->codecs[DS_DL] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_DL]), - c->stats[DS_DL], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_DL]->nvals && !h->codecs[DS_DL]) goto_err; - - //fprintf(stderr, "=== BA ===\n"); - h->codecs[DS_BA] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_BA]), - c->stats[DS_BA], E_BYTE, NULL, - fd->version, &fd->vv); - if (c->stats[DS_BA]->nvals && !h->codecs[DS_BA]) goto_err; - - if (CRAM_MAJOR_VERS(fd->version) >= 3) { - cram_byte_array_len_encoder e; - - e.len_encoding = CRAM_MAJOR_VERS(fd->version) >= 4 - ? E_VARINT_UNSIGNED - : E_EXTERNAL; - e.len_dat = (void *)DS_BB_len; - //e.len_dat = (void *)DS_BB; - - e.val_encoding = E_EXTERNAL; - e.val_dat = (void *)DS_BB; - - h->codecs[DS_BB] = cram_encoder_init(E_BYTE_ARRAY_LEN, NULL, - E_BYTE_ARRAY, (void *)&e, - fd->version, &fd->vv); - if (!h->codecs[DS_BB]) goto_err; - } else { - h->codecs[DS_BB] = NULL; - } - - //fprintf(stderr, "=== BS ===\n"); - h->codecs[DS_BS] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_BS]), - c->stats[DS_BS], E_BYTE, NULL, - fd->version, &fd->vv); - if (c->stats[DS_BS]->nvals && !h->codecs[DS_BS]) goto_err; - - if (CRAM_MAJOR_VERS(fd->version) == 1) { - h->codecs[DS_TL] = NULL; - h->codecs[DS_RI] = NULL; - h->codecs[DS_RS] = NULL; - h->codecs[DS_PD] = NULL; - h->codecs[DS_HC] = NULL; - h->codecs[DS_SC] = NULL; - - //fprintf(stderr, "=== TC ===\n"); - h->codecs[DS_TC] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_TC]), - c->stats[DS_TC], E_BYTE, NULL, - fd->version, &fd->vv); - if (c->stats[DS_TC]->nvals && !h->codecs[DS_TC]) goto_err; - - //fprintf(stderr, "=== TN ===\n"); - h->codecs[DS_TN] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_TN]), - c->stats[DS_TN], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_TN]->nvals && !h->codecs[DS_TN]) goto_err; - } else { - h->codecs[DS_TC] = NULL; - h->codecs[DS_TN] = NULL; - - //fprintf(stderr, "=== TL ===\n"); - h->codecs[DS_TL] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_TL]), - c->stats[DS_TL], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_TL]->nvals && !h->codecs[DS_TL]) goto_err; - - - //fprintf(stderr, "=== RI ===\n"); - h->codecs[DS_RI] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_RI]), - c->stats[DS_RI], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_RI]->nvals && !h->codecs[DS_RI]) goto_err; - - //fprintf(stderr, "=== RS ===\n"); - h->codecs[DS_RS] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_RS]), - c->stats[DS_RS], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_RS]->nvals && !h->codecs[DS_RS]) goto_err; - - //fprintf(stderr, "=== PD ===\n"); - h->codecs[DS_PD] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_PD]), - c->stats[DS_PD], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_PD]->nvals && !h->codecs[DS_PD]) goto_err; - - //fprintf(stderr, "=== HC ===\n"); - h->codecs[DS_HC] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_HC]), - c->stats[DS_HC], E_INT, NULL, - fd->version, &fd->vv); - if (c->stats[DS_HC]->nvals && !h->codecs[DS_HC]) goto_err; - - //fprintf(stderr, "=== SC ===\n"); - if (1) { - int i2[2] = {0, DS_SC}; - - h->codecs[DS_SC] = cram_encoder_init(E_BYTE_ARRAY_STOP, NULL, - E_BYTE_ARRAY, (void *)i2, - fd->version, &fd->vv); - } else { - // Appears to be no practical benefit to using this method, - // but it may work better if we start mixing SC, IN and BB - // elements into the same external block. - cram_byte_array_len_encoder e; - - e.len_encoding = CRAM_MAJOR_VERS(fd->version) >= 4 - ? E_VARINT_UNSIGNED - : E_EXTERNAL; - e.len_dat = (void *)DS_SC_len; - - e.val_encoding = E_EXTERNAL; - e.val_dat = (void *)DS_SC; - - h->codecs[DS_SC] = cram_encoder_init(E_BYTE_ARRAY_LEN, NULL, - E_BYTE_ARRAY, (void *)&e, - fd->version, &fd->vv); - } - if (!h->codecs[DS_SC]) goto_err; - } - - //fprintf(stderr, "=== IN ===\n"); - { - int i2[2] = {0, DS_IN}; - h->codecs[DS_IN] = cram_encoder_init(E_BYTE_ARRAY_STOP, NULL, - E_BYTE_ARRAY, (void *)i2, - fd->version, &fd->vv); - if (!h->codecs[DS_IN]) goto_err; - } - - h->codecs[DS_QS] = cram_encoder_init(E_EXTERNAL, NULL, E_BYTE, - (void *)DS_QS, - fd->version, &fd->vv); - if (!h->codecs[DS_QS]) goto_err; - { - int i2[2] = {0, DS_RN}; - h->codecs[DS_RN] = cram_encoder_init(E_BYTE_ARRAY_STOP, NULL, - E_BYTE_ARRAY, (void *)i2, - fd->version, &fd->vv); - if (!h->codecs[DS_RN]) goto_err; - } - - - /* Encode slices */ - for (i = 0; i < c->curr_slice; i++) { - hts_log_info("Encode slice %d", i); - - int local_embed_ref = - embed_ref>0 && c->slices[i]->hdr->ref_seq_id != -1 ? 1 : 0; - if (cram_encode_slice(fd, c, h, c->slices[i], local_embed_ref) != 0) - return -1; - } - - /* Create compression header */ - { - h->ref_seq_id = c->ref_seq_id; - h->ref_seq_start = c->ref_seq_start; - h->ref_seq_span = c->ref_seq_span; - h->num_records = c->num_records; - h->qs_seq_orient = c->qs_seq_orient; - // slight misnomer - sorted or treat as-if sorted (ap_delta force to 1) - h->AP_delta = c->pos_sorted; - memcpy(h->substitution_matrix, CRAM_SUBST_MATRIX, 20); - - if (!(c_hdr = cram_encode_compression_header(fd, c, h, embed_ref))) - return -1; - } - - /* Compute landmarks */ - /* Fill out slice landmarks */ - c->num_landmarks = c->curr_slice; - c->landmark = malloc(c->num_landmarks * sizeof(*c->landmark)); - if (!c->landmark) - return -1; - - /* - * Slice offset starts after the first block, so we need to simulate - * writing it to work out the correct offset - */ - { - slice_offset = c_hdr->method == RAW - ? c_hdr->uncomp_size - : c_hdr->comp_size; - slice_offset += 2 + 4*(CRAM_MAJOR_VERS(fd->version) >= 3) + - fd->vv.varint_size(c_hdr->content_id) + - fd->vv.varint_size(c_hdr->comp_size) + - fd->vv.varint_size(c_hdr->uncomp_size); - } - - c->ref_seq_id = c->slices[0]->hdr->ref_seq_id; - if (c->ref_seq_id == -1 && CRAM_ge31(fd->version)) { - // Spec states span=0, but it broke our range queries. - // See commit message for this and prior. - c->ref_seq_start = 0; - c->ref_seq_span = 0; - } else { - c->ref_seq_start = c->slices[0]->hdr->ref_seq_start; - c->ref_seq_span = c->slices[0]->hdr->ref_seq_span; - } - for (i = 0; i < c->curr_slice; i++) { - cram_slice *s = c->slices[i]; - - c->num_blocks += s->hdr->num_blocks + 1; // slice header - c->landmark[i] = slice_offset; - - if (s->hdr->ref_seq_start + s->hdr->ref_seq_span > - c->ref_seq_start + c->ref_seq_span) { - c->ref_seq_span = s->hdr->ref_seq_start + s->hdr->ref_seq_span - - c->ref_seq_start; - } - - slice_offset += s->hdr_block->method == RAW - ? s->hdr_block->uncomp_size - : s->hdr_block->comp_size; - - slice_offset += 2 + 4*(CRAM_MAJOR_VERS(fd->version) >= 3) + - fd->vv.varint_size(s->hdr_block->content_id) + - fd->vv.varint_size(s->hdr_block->comp_size) + - fd->vv.varint_size(s->hdr_block->uncomp_size); - - for (j = 0; j < s->hdr->num_blocks; j++) { - slice_offset += 2 + 4*(CRAM_MAJOR_VERS(fd->version) >= 3) + - fd->vv.varint_size(s->block[j]->content_id) + - fd->vv.varint_size(s->block[j]->comp_size) + - fd->vv.varint_size(s->block[j]->uncomp_size); - - slice_offset += s->block[j]->method == RAW - ? s->block[j]->uncomp_size - : s->block[j]->comp_size; - } - } - c->length += slice_offset; // just past the final slice - - c->comp_hdr_block = c_hdr; - - if (c->ref_seq_id >= 0) { - if (c->ref_free) { - free(c->ref); - c->ref = NULL; - } else { - cram_ref_decr(fd->refs, c->ref_seq_id); - } - } - - /* Cache references up-front if we have unsorted access patterns */ - if (!no_ref && c->refs_used) { - for (i = 0; i < fd->refs->nref; i++) { - if (c->refs_used[i]) - cram_ref_decr(fd->refs, i); - } - } - - return 0; - - err: - return -1; -} - - -/* - * Adds a feature code to a read within a slice. For purposes of minimising - * memory allocations and fragmentation we have one array of features for all - * reads within the slice. We return the index into this array for this new - * feature. - * - * Returns feature index on success - * -1 on failure. - */ -static int cram_add_feature(cram_container *c, cram_slice *s, - cram_record *r, cram_feature *f) { - if (s->nfeatures >= s->afeatures) { - s->afeatures = s->afeatures ? s->afeatures*2 : 1024; - s->features = realloc(s->features, s->afeatures*sizeof(*s->features)); - if (!s->features) - return -1; - } - - if (!r->nfeature++) { - r->feature = s->nfeatures; - if (cram_stats_add(c->stats[DS_FP], f->X.pos) < 0) - return -1; - } else { - if (cram_stats_add(c->stats[DS_FP], - f->X.pos - s->features[r->feature + r->nfeature-2].X.pos) < 0) - return -1; - - } - if (cram_stats_add(c->stats[DS_FC], f->X.code) < 0) - return -1; - - s->features[s->nfeatures++] = *f; - - return 0; -} - -static int cram_add_substitution(cram_fd *fd, cram_container *c, - cram_slice *s, cram_record *r, - int pos, char base, char qual, char ref) { - cram_feature f; - - // seq=ACGTN vs ref=ACGT or seq=ACGT vs ref=ACGTN - if (fd->L2[(uc)base]<4 || (fd->L2[(uc)base]<5 && fd->L2[(uc)ref]<4)) { - f.X.pos = pos+1; - f.X.code = 'X'; - f.X.base = fd->cram_sub_matrix[ref&0x1f][base&0x1f]; - if (cram_stats_add(c->stats[DS_BS], f.X.base) < 0) - return -1; - } else { - f.B.pos = pos+1; - f.B.code = 'B'; - f.B.base = base; - f.B.qual = qual; - if (cram_stats_add(c->stats[DS_BA], f.B.base) < 0) return -1; - if (cram_stats_add(c->stats[DS_QS], f.B.qual) < 0) return -1; - BLOCK_APPEND_CHAR(s->qual_blk, qual); - } - return cram_add_feature(c, s, r, &f); - - block_err: - return -1; -} - -static int cram_add_bases(cram_fd *fd, cram_container *c, - cram_slice *s, cram_record *r, - int pos, int len, char *base) { - cram_feature f; - - f.b.pos = pos+1; - f.b.code = 'b'; - f.b.seq_idx = base - (char *)BLOCK_DATA(s->seqs_blk); - f.b.len = len; - - return cram_add_feature(c, s, r, &f); -} - -static int cram_add_base(cram_fd *fd, cram_container *c, - cram_slice *s, cram_record *r, - int pos, char base, char qual) { - cram_feature f; - f.B.pos = pos+1; - f.B.code = 'B'; - f.B.base = base; - f.B.qual = qual; - if (cram_stats_add(c->stats[DS_BA], base) < 0) return -1; - if (cram_stats_add(c->stats[DS_QS], qual) < 0) return -1; - BLOCK_APPEND_CHAR(s->qual_blk, qual); - return cram_add_feature(c, s, r, &f); - - block_err: - return -1; -} - -static int cram_add_quality(cram_fd *fd, cram_container *c, - cram_slice *s, cram_record *r, - int pos, char qual) { - cram_feature f; - f.Q.pos = pos+1; - f.Q.code = 'Q'; - f.Q.qual = qual; - if (cram_stats_add(c->stats[DS_QS], qual) < 0) return -1; - BLOCK_APPEND_CHAR(s->qual_blk, qual); - return cram_add_feature(c, s, r, &f); - - block_err: - return -1; -} - -static int cram_add_deletion(cram_container *c, cram_slice *s, cram_record *r, - int pos, int len, char *base) { - cram_feature f; - f.D.pos = pos+1; - f.D.code = 'D'; - f.D.len = len; - if (cram_stats_add(c->stats[DS_DL], len) < 0) return -1; - return cram_add_feature(c, s, r, &f); -} - -static int cram_add_softclip(cram_container *c, cram_slice *s, cram_record *r, - int pos, int len, char *base, int version) { - cram_feature f; - f.S.pos = pos+1; - f.S.code = 'S'; - f.S.len = len; - switch (CRAM_MAJOR_VERS(version)) { - case 1: - f.S.seq_idx = BLOCK_SIZE(s->base_blk); - BLOCK_APPEND(s->base_blk, base, len); - BLOCK_APPEND_CHAR(s->base_blk, '\0'); - break; - - case 2: - default: - f.S.seq_idx = BLOCK_SIZE(s->soft_blk); - if (base) { - BLOCK_APPEND(s->soft_blk, base, len); - } else { - int i; - for (i = 0; i < len; i++) - BLOCK_APPEND_CHAR(s->soft_blk, 'N'); - } - BLOCK_APPEND_CHAR(s->soft_blk, '\0'); - break; - - //default: - // // v3.0 onwards uses BB data-series - // f.S.seq_idx = BLOCK_SIZE(s->soft_blk); - } - return cram_add_feature(c, s, r, &f); - - block_err: - return -1; -} - -static int cram_add_hardclip(cram_container *c, cram_slice *s, cram_record *r, - int pos, int len, char *base) { - cram_feature f; - f.S.pos = pos+1; - f.S.code = 'H'; - f.S.len = len; - if (cram_stats_add(c->stats[DS_HC], len) < 0) return -1; - return cram_add_feature(c, s, r, &f); -} - -static int cram_add_skip(cram_container *c, cram_slice *s, cram_record *r, - int pos, int len, char *base) { - cram_feature f; - f.S.pos = pos+1; - f.S.code = 'N'; - f.S.len = len; - if (cram_stats_add(c->stats[DS_RS], len) < 0) return -1; - return cram_add_feature(c, s, r, &f); -} - -static int cram_add_pad(cram_container *c, cram_slice *s, cram_record *r, - int pos, int len, char *base) { - cram_feature f; - f.S.pos = pos+1; - f.S.code = 'P'; - f.S.len = len; - if (cram_stats_add(c->stats[DS_PD], len) < 0) return -1; - return cram_add_feature(c, s, r, &f); -} - -static int cram_add_insertion(cram_container *c, cram_slice *s, cram_record *r, - int pos, int len, char *base) { - cram_feature f; - f.I.pos = pos+1; - if (len == 1) { - char b = base ? *base : 'N'; - f.i.code = 'i'; - f.i.base = b; - if (cram_stats_add(c->stats[DS_BA], b) < 0) return -1; - } else { - f.I.code = 'I'; - f.I.len = len; - f.S.seq_idx = BLOCK_SIZE(s->base_blk); - if (base) { - BLOCK_APPEND(s->base_blk, base, len); - } else { - int i; - for (i = 0; i < len; i++) - BLOCK_APPEND_CHAR(s->base_blk, 'N'); - } - BLOCK_APPEND_CHAR(s->base_blk, '\0'); - } - return cram_add_feature(c, s, r, &f); - - block_err: - return -1; -} - -/* - * Encodes auxiliary data. Largely duplicated from above, but done so to - * keep it simple and avoid a myriad of version ifs. - * - * Returns the RG header line pointed to by the BAM aux fields on success, - * NULL on failure or no rg present, also sets "*err" to non-zero - */ -static sam_hrec_rg_t *cram_encode_aux(cram_fd *fd, bam_seq_t *b, - cram_container *c, - cram_slice *s, cram_record *cr, - int verbatim_NM, int verbatim_MD, - int NM, kstring_t *MD, int cf_tag, - int no_ref, int *err) { - char *aux, *orig; - sam_hrec_rg_t *brg = NULL; - int aux_size = bam_get_l_aux(b); - const char *aux_end = bam_data_end(b); - cram_block *td_b = c->comp_hdr->TD_blk; - int TD_blk_size = BLOCK_SIZE(td_b), new; - char *key; - khint_t k; - - if (err) *err = 1; - - orig = aux = (char *)bam_aux(b); - - - // cF:i => Extra CRAM bit flags. - // 1: Don't auto-decode MD (may be invalid) - // 2: Don't auto-decode NM (may be invalid) - if (cf_tag && CRAM_MAJOR_VERS(fd->version) < 4) { - // Temporary copy of aux so we can ammend it. - aux = malloc(aux_size+4); - if (!aux) - return NULL; - - memcpy(aux, orig, aux_size); - aux[aux_size++] = 'c'; - aux[aux_size++] = 'F'; - aux[aux_size++] = 'C'; - aux[aux_size++] = cf_tag; - orig = aux; - aux_end = aux + aux_size; - } - - // Copy aux keys to td_b and aux values to slice aux blocks - while (aux_end - aux >= 1 && aux[0] != 0) { - int r; - - // Room for code + type + at least 1 byte of data - if (aux - orig >= aux_size - 3) - goto err; - - // RG:Z - if (aux[0] == 'R' && aux[1] == 'G' && aux[2] == 'Z') { - char *rg = &aux[3]; - brg = sam_hrecs_find_rg(fd->header->hrecs, rg); - if (brg) { - while (aux < aux_end && *aux++); - if (CRAM_MAJOR_VERS(fd->version) >= 4) - BLOCK_APPEND(td_b, "RG*", 3); - continue; - } else { - // RG:Z tag will be stored verbatim - hts_log_warning("Missing @RG header for RG \"%s\"", rg); - } - } - - // MD:Z - if (aux[0] == 'M' && aux[1] == 'D' && aux[2] == 'Z') { - if (cr->len && !no_ref && !(cr->flags & BAM_FUNMAP) && !verbatim_MD) { - if (MD && MD->s && strncasecmp(MD->s, aux+3, orig + aux_size - (aux+3)) == 0) { - while (aux < aux_end && *aux++); - if (CRAM_MAJOR_VERS(fd->version) >= 4) - BLOCK_APPEND(td_b, "MD*", 3); - continue; - } - } - } - - // NM:i - if (aux[0] == 'N' && aux[1] == 'M') { - if (cr->len && !no_ref && !(cr->flags & BAM_FUNMAP) && !verbatim_NM) { - int NM_ = bam_aux2i_end((uint8_t *)aux+2, (uint8_t *)aux_end); - if (NM_ == NM) { - switch(aux[2]) { - case 'A': case 'C': case 'c': aux+=4; break; - case 'S': case 's': aux+=5; break; - case 'I': case 'i': case 'f': aux+=7; break; - default: - hts_log_error("Unhandled type code for NM tag"); - goto err; - } - if (CRAM_MAJOR_VERS(fd->version) >= 4) - BLOCK_APPEND(td_b, "NM*", 3); - continue; - } - } - } - - BLOCK_APPEND(td_b, aux, 3); - - // Container level tags_used, for TD series - // Maps integer key ('X0i') to cram_tag_map struct. - int key = (((unsigned char *) aux)[0]<<16 | - ((unsigned char *) aux)[1]<<8 | - ((unsigned char *) aux)[2]); - k = kh_put(m_tagmap, c->tags_used, key, &r); - if (-1 == r) - goto err; - else if (r != 0) - kh_val(c->tags_used, k) = NULL; - - if (r == 1) { - khint_t k_global; - - // Global tags_used for cram_metrics support - pthread_mutex_lock(&fd->metrics_lock); - k_global = kh_put(m_metrics, fd->tags_used, key, &r); - if (-1 == r) { - pthread_mutex_unlock(&fd->metrics_lock); - goto err; - } - if (r >= 1) { - kh_val(fd->tags_used, k_global) = cram_new_metrics(); - if (!kh_val(fd->tags_used, k_global)) { - kh_del(m_metrics, fd->tags_used, k_global); - pthread_mutex_unlock(&fd->metrics_lock); - goto err; - } - } - - pthread_mutex_unlock(&fd->metrics_lock); - - int i2[2] = {'\t',key}; - size_t sk = key; - cram_tag_map *m = calloc(1, sizeof(*m)); - if (!m) - goto_err; - kh_val(c->tags_used, k) = m; - - cram_codec *c; - - // Use a block content id based on the tag id. - // Codec type depends on tag data type. - switch(aux[2]) { - case 'Z': case 'H': - // string as byte_array_stop - c = cram_encoder_init(E_BYTE_ARRAY_STOP, NULL, - E_BYTE_ARRAY, (void *)i2, - fd->version, &fd->vv); - break; - - case 'A': case 'c': case 'C': { - // byte array len, 1 byte - cram_byte_array_len_encoder e; - cram_stats st; - - if (CRAM_MAJOR_VERS(fd->version) <= 3) { - e.len_encoding = E_HUFFMAN; - e.len_dat = NULL; // will get codes from st - } else { - e.len_encoding = E_CONST_INT; - e.len_dat = NULL; // will get codes from st - } - memset(&st, 0, sizeof(st)); - if (cram_stats_add(&st, 1) < 0) goto block_err; - cram_stats_encoding(fd, &st); - - e.val_encoding = E_EXTERNAL; - e.val_dat = (void *)sk; - - c = cram_encoder_init(E_BYTE_ARRAY_LEN, &st, - E_BYTE_ARRAY, (void *)&e, - fd->version, &fd->vv); - break; - } - - case 's': case 'S': { - // byte array len, 2 byte - cram_byte_array_len_encoder e; - cram_stats st; - - if (CRAM_MAJOR_VERS(fd->version) <= 3) { - e.len_encoding = E_HUFFMAN; - e.len_dat = NULL; // will get codes from st - } else { - e.len_encoding = E_CONST_INT; - e.len_dat = NULL; // will get codes from st - } - memset(&st, 0, sizeof(st)); - if (cram_stats_add(&st, 2) < 0) goto block_err; - cram_stats_encoding(fd, &st); - - e.val_encoding = E_EXTERNAL; - e.val_dat = (void *)sk; - - c = cram_encoder_init(E_BYTE_ARRAY_LEN, &st, - E_BYTE_ARRAY, (void *)&e, - fd->version, &fd->vv); - break; - } - case 'i': case 'I': case 'f': { - // byte array len, 4 byte - cram_byte_array_len_encoder e; - cram_stats st; - - if (CRAM_MAJOR_VERS(fd->version) <= 3) { - e.len_encoding = E_HUFFMAN; - e.len_dat = NULL; // will get codes from st - } else { - e.len_encoding = E_CONST_INT; - e.len_dat = NULL; // will get codes from st - } - memset(&st, 0, sizeof(st)); - if (cram_stats_add(&st, 4) < 0) goto block_err; - cram_stats_encoding(fd, &st); - - e.val_encoding = E_EXTERNAL; - e.val_dat = (void *)sk; - - c = cram_encoder_init(E_BYTE_ARRAY_LEN, &st, - E_BYTE_ARRAY, (void *)&e, - fd->version, &fd->vv); - break; - } - - case 'B': { - // Byte array of variable size, but we generate our tag - // byte stream at the wrong stage (during reading and not - // after slice header construction). So we use - // BYTE_ARRAY_LEN with the length codec being external - // too. - cram_byte_array_len_encoder e; - - e.len_encoding = CRAM_MAJOR_VERS(fd->version) >= 4 - ? E_VARINT_UNSIGNED - : E_EXTERNAL; - e.len_dat = (void *)sk; // or key+128 for len? - - e.val_encoding = E_EXTERNAL; - e.val_dat = (void *)sk; - - c = cram_encoder_init(E_BYTE_ARRAY_LEN, NULL, - E_BYTE_ARRAY, (void *)&e, - fd->version, &fd->vv); - break; - } - - default: - hts_log_error("Unsupported SAM aux type '%c'", aux[2]); - c = NULL; - } - - if (!c) - goto_err; - - m->codec = c; - - // Link to fd-global tag metrics - pthread_mutex_lock(&fd->metrics_lock); - m->m = k_global ? (cram_metrics *)kh_val(fd->tags_used, k_global) : NULL; - pthread_mutex_unlock(&fd->metrics_lock); - } - - cram_tag_map *tm = (cram_tag_map *)kh_val(c->tags_used, k); - if (!tm) goto_err; - cram_codec *codec = tm->codec; - if (!tm->codec) goto_err; - - switch(aux[2]) { - case 'A': case 'C': case 'c': - if (aux_end - aux < 3+1) - goto err; - - if (!tm->blk) { - if (!(tm->blk = cram_new_block(EXTERNAL, key))) - goto err; - codec->u.e_byte_array_len.val_codec->out = tm->blk; - } - - aux+=3; - //codec->encode(s, codec, aux, 1); - // Functionally equivalent, but less code. - BLOCK_APPEND_CHAR(tm->blk, *aux); - aux++; - break; - - case 'S': case 's': - if (aux_end - aux < 3+2) - goto err; - - if (!tm->blk) { - if (!(tm->blk = cram_new_block(EXTERNAL, key))) - goto err; - codec->u.e_byte_array_len.val_codec->out = tm->blk; - } - - aux+=3; - //codec->encode(s, codec, aux, 2); - BLOCK_APPEND(tm->blk, aux, 2); - aux+=2; - break; - - case 'I': case 'i': case 'f': - if (aux_end - aux < 3+4) - goto err; - - if (!tm->blk) { - if (!(tm->blk = cram_new_block(EXTERNAL, key))) - goto err; - codec->u.e_byte_array_len.val_codec->out = tm->blk; - } - - aux+=3; - //codec->encode(s, codec, aux, 4); - BLOCK_APPEND(tm->blk, aux, 4); - aux+=4; - break; - - case 'd': - if (aux_end - aux < 3+8) - goto err; - - if (!tm->blk) { - if (!(tm->blk = cram_new_block(EXTERNAL, key))) - goto err; - codec->u.e_byte_array_len.val_codec->out = tm->blk; - } - - aux+=3; //*tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++; - //codec->encode(s, codec, aux, 8); - BLOCK_APPEND(tm->blk, aux, 8); - aux+=8; - break; - - case 'Z': case 'H': { - if (aux_end - aux < 3) - goto err; - - if (!tm->blk) { - if (!(tm->blk = cram_new_block(EXTERNAL, key))) - goto err; - codec->out = tm->blk; - } - - char *aux_s; - aux += 3; - aux_s = aux; - while (aux < aux_end && *aux++); - if (codec->encode(s, codec, aux_s, aux - aux_s) < 0) - goto err; - break; - } - - case 'B': { - if (aux_end - aux < 4+4) - goto err; - - int type = aux[3]; - uint64_t count = (((uint64_t)((unsigned char *)aux)[4]) << 0 | - ((uint64_t)((unsigned char *)aux)[5]) << 8 | - ((uint64_t)((unsigned char *)aux)[6]) <<16 | - ((uint64_t)((unsigned char *)aux)[7]) <<24); - uint64_t blen; - if (!tm->blk) { - if (!(tm->blk = cram_new_block(EXTERNAL, key))) - goto err; - if (codec->u.e_byte_array_len.val_codec->codec == E_XDELTA) { - if (!(tm->blk2 = cram_new_block(EXTERNAL, key+128))) - goto err; - codec->u.e_byte_array_len.len_codec->out = tm->blk2; - codec->u.e_byte_array_len.val_codec->u.e_xdelta.sub_codec->out = tm->blk; - } else { - codec->u.e_byte_array_len.len_codec->out = tm->blk; - codec->u.e_byte_array_len.val_codec->out = tm->blk; - } - } - - // skip TN field - aux+=3; - - // We use BYTE_ARRAY_LEN with external length, so store that first - switch (type) { - case 'c': case 'C': - blen = count; - break; - case 's': case 'S': - blen = 2*count; - break; - case 'i': case 'I': case 'f': - blen = 4*count; - break; - default: - hts_log_error("Unknown sub-type '%c' for aux type 'B'", type); - goto err; - } - - blen += 5; // sub-type & length - if (aux_end - aux < blen || blen > INT_MAX) - goto err; - - if (codec->encode(s, codec, aux, (int) blen) < 0) - goto err; - aux += blen; - break; - } - default: - hts_log_error("Unknown aux type '%c'", aux_end - aux < 2 ? '?' : aux[2]); - goto err; - } - tm->blk->m = tm->m; - } - - // FIXME: sort BLOCK_DATA(td_b) by char[3] triples - - // And and increment TD hash entry - BLOCK_APPEND_CHAR(td_b, 0); - - // Duplicate key as BLOCK_DATA() can be realloced to a new pointer. - key = string_ndup(c->comp_hdr->TD_keys, - (char *)BLOCK_DATA(td_b) + TD_blk_size, - BLOCK_SIZE(td_b) - TD_blk_size); - if (!key) - goto block_err; - k = kh_put(m_s2i, c->comp_hdr->TD_hash, key, &new); - if (new < 0) { - goto err; - } else if (new == 0) { - BLOCK_SIZE(td_b) = TD_blk_size; - } else { - kh_val(c->comp_hdr->TD_hash, k) = c->comp_hdr->nTL; - c->comp_hdr->nTL++; - } - - cr->TL = kh_val(c->comp_hdr->TD_hash, k); - if (cram_stats_add(c->stats[DS_TL], cr->TL) < 0) - goto block_err; - - if (orig != (char *)bam_aux(b)) - free(orig); - - if (err) *err = 0; - - return brg; - - err: - block_err: - if (orig != (char *)bam_aux(b)) - free(orig); - return NULL; -} - -/* - * During cram_next_container or before the final flush at end of - * file, we update the current slice headers and increment the slice - * number to the next slice. - * - * See cram_next_container() and cram_close(). - */ -void cram_update_curr_slice(cram_container *c, int version) { - cram_slice *s = c->slice; - if (c->multi_seq) { - s->hdr->ref_seq_id = -2; - s->hdr->ref_seq_start = 0; - s->hdr->ref_seq_span = 0; - } else if (c->curr_ref == -1 && CRAM_ge31(version)) { - // Spec states span=0, but it broke our range queries. - // See commit message for this and prior. - s->hdr->ref_seq_id = -1; - s->hdr->ref_seq_start = 0; - s->hdr->ref_seq_span = 0; - } else { - s->hdr->ref_seq_id = c->curr_ref; - s->hdr->ref_seq_start = c->first_base; - s->hdr->ref_seq_span = MAX(0, c->last_base - c->first_base + 1); - } - s->hdr->num_records = c->curr_rec; - - if (c->curr_slice == 0) { - if (c->ref_seq_id != s->hdr->ref_seq_id) - c->ref_seq_id = s->hdr->ref_seq_id; - c->ref_seq_start = c->first_base; - } - - c->curr_slice++; -} - -/* - * Handles creation of a new container or new slice, flushing any - * existing containers when appropriate. - * - * Really this is next slice, which may or may not lead to a new container. - * - * Returns cram_container pointer on success - * NULL on failure. - */ -static cram_container *cram_next_container(cram_fd *fd, bam_seq_t *b) { - cram_container *c = fd->ctr; - int i; - - /* First occurrence */ - if (c->curr_ref == -2) - c->curr_ref = bam_ref(b); - - if (c->slice) - cram_update_curr_slice(c, fd->version); - - /* Flush container */ - if (c->curr_slice == c->max_slice || - (bam_ref(b) != c->curr_ref && !c->multi_seq)) { - c->ref_seq_span = fd->last_base - c->ref_seq_start + 1; - hts_log_info("Flush container %d/%"PRId64"..%"PRId64, - c->ref_seq_id, c->ref_seq_start, - c->ref_seq_start + c->ref_seq_span -1); - - /* Encode slices */ - if (-1 == cram_flush_container_mt(fd, c)) - return NULL; - if (!fd->pool) { - // Move to sep func, as we need cram_flush_container for - // the closing phase to flush the partial container. - for (i = 0; i < c->max_slice; i++) { - cram_free_slice(c->slices[i]); - c->slices[i] = NULL; - } - - c->slice = NULL; - c->curr_slice = 0; - - /* Easy approach for purposes of freeing stats */ - cram_free_container(c); - } - - c = fd->ctr = cram_new_container(fd->seqs_per_slice, - fd->slices_per_container); - if (!c) - return NULL; - - pthread_mutex_lock(&fd->ref_lock); - c->no_ref = fd->no_ref; - c->embed_ref = fd->embed_ref; - c->record_counter = fd->record_counter; - pthread_mutex_unlock(&fd->ref_lock); - c->curr_ref = bam_ref(b); - } - - c->last_pos = c->first_base = c->last_base = bam_pos(b)+1; - - /* New slice */ - c->slice = c->slices[c->curr_slice] = - cram_new_slice(MAPPED_SLICE, c->max_rec); - if (!c->slice) - return NULL; - - if (c->multi_seq) { - c->slice->hdr->ref_seq_id = -2; - c->slice->hdr->ref_seq_start = 0; - c->slice->last_apos = 1; - } else { - c->slice->hdr->ref_seq_id = bam_ref(b); - // wrong for unsorted data, will fix during encoding. - c->slice->hdr->ref_seq_start = bam_pos(b)+1; - c->slice->last_apos = bam_pos(b)+1; - } - - c->curr_rec = 0; - c->s_num_bases = 0; - c->n_mapped = 0; - - // QO field: 0 implies original orientation, 1 implies sequence orientation - // 1 is often preferable for NovaSeq, but impact is slight. ~0.5% diff. - // Conversely other data sets it's often better than 1% saving for 0. - // Short of trying both and learning, for now we use use 0 for V4, 1 for V3. - c->qs_seq_orient = CRAM_MAJOR_VERS(fd->version) >= 4 ? 0 : 1; - - return c; -} - - -/* - * Converts a single bam record into a cram record. - * Possibly used within a thread. - * - * Returns 0 on success; - * -1 on failure - */ -static int process_one_read(cram_fd *fd, cram_container *c, - cram_slice *s, cram_record *cr, - bam_seq_t *b, int rnum, kstring_t *MD, - int embed_ref, int no_ref) { - int i, fake_qual = -1, NM = 0; - char *cp; - char *ref, *seq, *qual; - - // Any places with N in seq and/or reference can lead to ambiguous - // interpretation of the SAM NM:i tag. So we store these verbatim - // to ensure valid data round-trips the same regardless of who - // defines it as valid. - // Similarly when alignments go beyond end of the reference. - int verbatim_NM = fd->store_nm; - int verbatim_MD = fd->store_md; - - // FIXME: multi-ref containers - - cr->flags = bam_flag(b); - cr->len = bam_seq_len(b); - uint8_t *md; - if (!(md = bam_aux_get(b, "MD"))) - MD = NULL; - else - MD->l = 0; - - int cf_tag = 0; - - if (embed_ref == 2) { - cf_tag = MD ? 0 : 1; // No MD - cf_tag |= bam_aux_get(b, "NM") ? 0 : 2; // No NM - } - - //fprintf(stderr, "%s => %d\n", rg ? rg : "\"\"", cr->rg); - - ref = c->ref ? c->ref - (c->ref_start-1) : NULL; - cr->ref_id = bam_ref(b); - if (cram_stats_add(c->stats[DS_RI], cr->ref_id) < 0) - goto block_err; - if (cram_stats_add(c->stats[DS_BF], fd->cram_flag_swap[cr->flags & 0xfff]) < 0) - goto block_err; - - // Non reference based encoding means storing the bases verbatim as features, which in - // turn means every base also has a quality already stored. - if (!no_ref || CRAM_MAJOR_VERS(fd->version) >= 3) - cr->cram_flags |= CRAM_FLAG_PRESERVE_QUAL_SCORES; - - if (cr->len <= 0 && CRAM_MAJOR_VERS(fd->version) >= 3) - cr->cram_flags |= CRAM_FLAG_NO_SEQ; - //cram_stats_add(c->stats[DS_CF], cr->cram_flags & CRAM_FLAG_MASK); - - c->num_bases += cr->len; - cr->apos = bam_pos(b)+1; - if (c->pos_sorted) { - if (cr->apos < s->last_apos && !fd->ap_delta) { - c->pos_sorted = 0; - } else { - if (cram_stats_add(c->stats[DS_AP], cr->apos - s->last_apos) < 0) - goto block_err; - s->last_apos = cr->apos; - } - } else { - //cram_stats_add(c->stats[DS_AP], cr->apos); - } - c->max_apos += (cr->apos > c->max_apos) * (cr->apos - c->max_apos); - - /* - * This seqs_ds is largely pointless and it could reuse the same memory - * over and over. - * s->base_blk is what we need for encoding. - */ - cr->seq = BLOCK_SIZE(s->seqs_blk); - cr->qual = BLOCK_SIZE(s->qual_blk); - BLOCK_GROW(s->seqs_blk, cr->len+1); - BLOCK_GROW(s->qual_blk, cr->len); - - // Convert BAM nibble encoded sequence to string of base pairs - seq = cp = (char *)BLOCK_END(s->seqs_blk); - *seq = 0; - nibble2base(bam_seq(b), cp, cr->len); - BLOCK_SIZE(s->seqs_blk) += cr->len; - - qual = cp = (char *)bam_qual(b); - - - /* Copy and parse */ - if (!(cr->flags & BAM_FUNMAP)) { - uint32_t *cig_to, *cig_from; - int64_t apos = cr->apos-1, spos = 0; - int64_t MD_last = apos; // last position of edit in MD tag - - cr->cigar = s->ncigar; - cr->ncigar = bam_cigar_len(b); - while (cr->cigar + cr->ncigar >= s->cigar_alloc) { - s->cigar_alloc = s->cigar_alloc ? s->cigar_alloc*2 : 1024; - s->cigar = realloc(s->cigar, s->cigar_alloc * sizeof(*s->cigar)); - if (!s->cigar) - return -1; - } - - cig_to = (uint32_t *)s->cigar; - cig_from = (uint32_t *)bam_cigar(b); - - cr->feature = 0; - cr->nfeature = 0; - for (i = 0; i < cr->ncigar; i++) { - enum cigar_op cig_op = cig_from[i] & BAM_CIGAR_MASK; - uint32_t cig_len = cig_from[i] >> BAM_CIGAR_SHIFT; - cig_to[i] = cig_from[i]; - - /* Can also generate events from here for CRAM diffs */ - - switch (cig_op) { - int l; - - // Don't trust = and X ops to be correct. - case BAM_CMATCH: - case BAM_CBASE_MATCH: - case BAM_CBASE_MISMATCH: - //fprintf(stderr, "\nBAM_CMATCH\nR: %.*s\nS: %.*s\n", - // cig_len, &ref[apos], cig_len, &seq[spos]); - l = 0; - if (!no_ref && cr->len) { - int end = cig_len+apos < c->ref_end - ? cig_len : c->ref_end - apos; - char *sp = &seq[spos]; - char *rp = &ref[apos]; - char *qp = &qual[spos]; - if (end > cr->len) { - hts_log_error("CIGAR and query sequence are of different length"); - return -1; - } - for (l = 0; l < end; l++) { - // This case is just too disputed and different tools - // interpret these in different ways. We give up and - // store verbatim. - if (rp[l] == 'N' && sp[l] == 'N') - verbatim_NM = verbatim_MD = 1; - if (rp[l] != sp[l]) { - // Build our own MD tag if one is on the sequence, so - // we can ensure it matches and thus can be discarded. - if (MD && ref) { - if (kputuw(apos+l - MD_last, MD) < 0) goto err; - if (kputc(rp[l], MD) < 0) goto err; - MD_last = apos+l+1; - } - NM++; - if (!sp[l]) - break; - if (0 && CRAM_MAJOR_VERS(fd->version) >= 3) { -#if 0 - // Disabled for the time being as it doesn't - // seem to gain us much. - int ol=l; - while (l 1) { - if (cram_add_bases(fd, c, s, cr, spos+ol, - l-ol, &seq[spos+ol])) - return -1; - l--; - } else { - l = ol; - if (cram_add_substitution(fd, c, s, cr, - spos+l, sp[l], - qp[l], rp[l])) - return -1; - } -#else - // With urmap pushed to the limit and lots - // of unaligned data (should be soft-clipped) - // this saves ~2-7%. Worth it? - int nl = l; - int max_end = nl, max_score = 0, score = 0; - while (nl < end) { - if (rp[nl] != sp[nl]) { - score += 3; - if (max_score < score) { - max_score = score; - max_end = nl; - } - } else { - score--; - if (score < -2 || - max_score - score > 7) - break; - } - nl++; - } - if (max_score > 20) { - cram_add_bases(fd, c, s, cr, spos+l, - max_end-l, &seq[spos+l]); - l = max_end-1; - } else { - while (l < nl) { - if (rp[l] != sp[l]) - cram_add_substitution(fd, c, s, - cr, spos+l, - sp[l], qp[l], - rp[l]); - l++; - } - l--; - } -#endif - } else { - if (cram_add_substitution(fd, c, s, cr, spos+l, - sp[l], qp[l], rp[l])) - return -1; - } - } - } - spos += l; - apos += l; - } - - if (l < cig_len && cr->len) { - if (no_ref) { - if (CRAM_MAJOR_VERS(fd->version) == 3) { - if (cram_add_bases(fd, c, s, cr, spos, - cig_len-l, &seq[spos])) - return -1; - spos += cig_len-l; - } else { - for (; l < cig_len && seq[spos]; l++, spos++) { - if (cram_add_base(fd, c, s, cr, spos, - seq[spos], qual[spos])) - return -1; - } - } - } else { - /* off end of sequence or non-ref based output */ - verbatim_NM = verbatim_MD = 1; - for (; l < cig_len && seq[spos]; l++, spos++) { - if (cram_add_base(fd, c, s, cr, spos, - seq[spos], qual[spos])) - return -1; - } - } - apos += cig_len; - } else if (!cr->len) { - /* Seq "*" */ - verbatim_NM = verbatim_MD = 1; - apos += cig_len; - spos += cig_len; - } - break; - - case BAM_CDEL: - if (MD && ref) { - if (kputuw(apos - MD_last, MD) < 0) goto err; - if (apos < c->ref_end) { - if (kputc_('^', MD) < 0) goto err; - if (kputsn(&ref[apos], MIN(c->ref_end - apos, cig_len), MD) < 0) - goto err; - } - } - NM += cig_len; - - if (cram_add_deletion(c, s, cr, spos, cig_len, &seq[spos])) - return -1; - apos += cig_len; - MD_last = apos; - break; - - case BAM_CREF_SKIP: - if (cram_add_skip(c, s, cr, spos, cig_len, &seq[spos])) - return -1; - apos += cig_len; - MD_last += cig_len; - break; - - case BAM_CINS: - if (cram_add_insertion(c, s, cr, spos, cig_len, - cr->len ? &seq[spos] : NULL)) - return -1; - if (no_ref && cr->len) { - for (l = 0; l < cig_len; l++, spos++) { - cram_add_quality(fd, c, s, cr, spos, qual[spos]); - } - } else { - spos += cig_len; - } - NM += cig_len; - break; - - case BAM_CSOFT_CLIP: - if (cram_add_softclip(c, s, cr, spos, cig_len, - cr->len ? &seq[spos] : NULL, - fd->version)) - return -1; - - if (no_ref && - !(cr->cram_flags & CRAM_FLAG_PRESERVE_QUAL_SCORES)) { - if (cr->len) { - for (l = 0; l < cig_len; l++, spos++) { - cram_add_quality(fd, c, s, cr, spos, qual[spos]); - } - } else { - for (l = 0; l < cig_len; l++, spos++) { - cram_add_quality(fd, c, s, cr, spos, -1); - } - } - } else { - spos += cig_len; - } - break; - - case BAM_CHARD_CLIP: - if (cram_add_hardclip(c, s, cr, spos, cig_len, &seq[spos])) - return -1; - break; - - case BAM_CPAD: - if (cram_add_pad(c, s, cr, spos, cig_len, &seq[spos])) - return -1; - break; - - default: - hts_log_error("Unknown CIGAR op code %d", cig_op); - return -1; - } - } - if (cr->len && spos != cr->len) { - hts_log_error("CIGAR and query sequence are of different length"); - return -1; - } - fake_qual = spos; - cr->aend = no_ref ? apos : MIN(apos, c->ref_end); - if (cram_stats_add(c->stats[DS_FN], cr->nfeature) < 0) - goto block_err; - - if (MD && ref) - if (kputuw(apos - MD_last, MD) < 0) goto err; - } else { - // Unmapped - cr->cram_flags |= CRAM_FLAG_PRESERVE_QUAL_SCORES; - cr->cigar = 0; - cr->ncigar = 0; - cr->nfeature = 0; - cr->aend = MIN(cr->apos, c->ref_end); - for (i = 0; i < cr->len; i++) - if (cram_stats_add(c->stats[DS_BA], seq[i]) < 0) - goto block_err; - fake_qual = 0; - } - - cr->ntags = 0; //cram_stats_add(c->stats[DS_TC], cr->ntags); - int err = 0; - sam_hrec_rg_t *brg = - cram_encode_aux(fd, b, c, s, cr, verbatim_NM, verbatim_MD, NM, MD, - cf_tag, no_ref, &err); - if (err) - goto block_err; - - /* Read group, identified earlier */ - if (brg) { - cr->rg = brg->id; - } else if (CRAM_MAJOR_VERS(fd->version) == 1) { - sam_hrec_rg_t *brg = sam_hrecs_find_rg(fd->header->hrecs, "UNKNOWN"); - if (!brg) goto block_err; - cr->rg = brg->id; - } else { - cr->rg = -1; - } - if (cram_stats_add(c->stats[DS_RG], cr->rg) < 0) - goto block_err; - - /* - * Append to the qual block now. We do this here as - * cram_add_substitution() can generate BA/QS events which need to - * be in the qual block before we append the rest of the data. - */ - if (cr->cram_flags & CRAM_FLAG_PRESERVE_QUAL_SCORES) { - /* Special case of seq "*" */ - if (cr->len == 0) { - cr->len = fake_qual; - BLOCK_GROW(s->qual_blk, cr->len); - cp = (char *)BLOCK_END(s->qual_blk); - memset(cp, 255, cr->len); - } else { - BLOCK_GROW(s->qual_blk, cr->len); - cp = (char *)BLOCK_END(s->qual_blk); - char *from = (char *)&bam_qual(b)[0]; - char *to = &cp[0]; - memcpy(to, from, cr->len); - - // Store quality in original orientation for better compression. - if (!c->qs_seq_orient) { - if (cr->flags & BAM_FREVERSE) { - int i, j; - for (i = 0, j = cr->len-1; i < j; i++, j--) { - unsigned char c; - c = to[i]; - to[i] = to[j]; - to[j] = c; - } - } - } - } - BLOCK_SIZE(s->qual_blk) += cr->len; - } else { - if (cr->len == 0) - cr->len = fake_qual >= 0 ? fake_qual : cr->aend - cr->apos + 1; - } - - if (cram_stats_add(c->stats[DS_RL], cr->len) < 0) - goto block_err; - - /* Now we know apos and aend both, update mate-pair information */ - { - int new; - khint_t k; - int sec = (cr->flags & BAM_FSECONDARY) ? 1 : 0; - - //fprintf(stderr, "Checking %"PRId64"/%.*s\t", rnum, - // cr->name_len, DSTRING_STR(s->name_ds)+cr->name); - if (cr->flags & BAM_FPAIRED) { - char *key = string_ndup(s->pair_keys, bam_name(b), bam_name_len(b)); - if (!key) - return -1; - - k = kh_put(m_s2i, s->pair[sec], key, &new); - if (-1 == new) - return -1; - else if (new > 0) - kh_val(s->pair[sec], k) = rnum; - } else { - new = 1; - k = 0; // Prevents false-positive warning from gcc -Og - } - - if (new == 0) { - cram_record *p = &s->crecs[kh_val(s->pair[sec], k)]; - int64_t aleft, aright; - int sign; - - aleft = MIN(cr->apos, p->apos); - aright = MAX(cr->aend, p->aend); - if (cr->apos < p->apos) { - sign = 1; - } else if (cr->apos > p->apos) { - sign = -1; - } else if (cr->flags & BAM_FREAD1) { - sign = 1; - } else { - sign = -1; - } - - // This vs p: tlen, matepos, flags. Permit TLEN 0 and/or TLEN +/- - // a small amount, if appropriate options set. - if ((!fd->tlen_zero && MAX(bam_mate_pos(b)+1, 0) != p->apos) && - !(fd->tlen_zero && bam_mate_pos(b) == 0)) - goto detached; - - if (((bam_flag(b) & BAM_FMUNMAP) != 0) != - ((p->flags & BAM_FUNMAP) != 0)) - goto detached; - - if (((bam_flag(b) & BAM_FMREVERSE) != 0) != - ((p->flags & BAM_FREVERSE) != 0)) - goto detached; - - - // p vs this: tlen, matepos, flags - if (p->ref_id != cr->ref_id && - !(fd->tlen_zero && p->ref_id == -1)) - goto detached; - - if (p->mate_pos != cr->apos && - !(fd->tlen_zero && p->mate_pos == 0)) - goto detached; - - if (((p->flags & BAM_FMUNMAP) != 0) != - ((p->mate_flags & CRAM_M_UNMAP) != 0)) - goto detached; - - if (((p->flags & BAM_FMREVERSE) != 0) != - ((p->mate_flags & CRAM_M_REVERSE) != 0)) - goto detached; - - // Supplementary reads are just too ill defined - if ((cr->flags & BAM_FSUPPLEMENTARY) || - (p->flags & BAM_FSUPPLEMENTARY)) - goto detached; - - // When in lossy name mode, if a read isn't detached we - // cannot store the name. The corollary is that when we - // must store the name, it must be detached (inefficient). - if (fd->lossy_read_names && - (!(cr->cram_flags & CRAM_FLAG_DISCARD_NAME) || - !((p->cram_flags & CRAM_FLAG_DISCARD_NAME)))) - goto detached; - - // Now check TLEN. We do this last as sometimes it's the - // only thing that differs. In CRAM4 we have a better way - // of handling this that doesn't break detached status - int explicit_tlen = 0; - int tflag1 = ((bam_ins_size(b) && - llabs(bam_ins_size(b) - sign*(aright-aleft+1)) - > fd->tlen_approx) - || (!bam_ins_size(b) && !fd->tlen_zero)); - - int tflag2 = ((p->tlen && llabs(p->tlen - -sign*(aright-aleft+1)) - > fd->tlen_approx) - || (!p->tlen && !fd->tlen_zero)); - - if (tflag1 || tflag2) { - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - explicit_tlen = CRAM_FLAG_EXPLICIT_TLEN; - } else { - // Stil do detached for unmapped data in CRAM4 as this - // also impacts RNEXT calculation. - goto detached; - } - } - - /* - * The fields below are unused when encoding this read as it is - * no longer detached. In theory they may get referred to when - * processing a 3rd or 4th read in this template?, so we set them - * here just to be sure. - * - * They do not need cram_stats_add() calls those as they are - * not emitted. - */ - cr->mate_pos = p->apos; - cram_stats_add(c->stats[DS_NP], cr->mate_pos); - cr->tlen = explicit_tlen ? bam_ins_size(b) : sign*(aright-aleft+1); - cram_stats_add(c->stats[DS_TS], cr->tlen); - cr->mate_flags = - ((p->flags & BAM_FMUNMAP) == BAM_FMUNMAP) * CRAM_M_UNMAP + - ((p->flags & BAM_FMREVERSE) == BAM_FMREVERSE) * CRAM_M_REVERSE; - - // Decrement statistics aggregated earlier - if (p->cram_flags & CRAM_FLAG_STATS_ADDED) { - cram_stats_del(c->stats[DS_NP], p->mate_pos); - cram_stats_del(c->stats[DS_MF], p->mate_flags); - if (!(p->cram_flags & CRAM_FLAG_EXPLICIT_TLEN)) - cram_stats_del(c->stats[DS_TS], p->tlen); - cram_stats_del(c->stats[DS_NS], p->mate_ref_id); - } - - /* Similarly we could correct the p-> values too, but these will no - * longer have any code that refers back to them as the new 'p' - * for this template is our current 'cr'. - */ - //p->mate_pos = cr->apos; - //p->mate_flags = - // ((cr->flags & BAM_FMUNMAP) == BAM_FMUNMAP) * CRAM_M_UNMAP + - // ((cr->flags & BAM_FMREVERSE) == BAM_FMREVERSE)* CRAM_M_REVERSE; - //p->tlen = p->apos - cr->aend; - - // Clear detached from cr flags - cr->cram_flags &= ~CRAM_FLAG_DETACHED; - cr->cram_flags |= explicit_tlen; - if (cram_stats_add(c->stats[DS_CF], cr->cram_flags & CRAM_FLAG_MASK) < 0) - goto block_err; - - // Clear detached from p flags and set downstream - if (p->cram_flags & CRAM_FLAG_STATS_ADDED) { - cram_stats_del(c->stats[DS_CF], p->cram_flags & CRAM_FLAG_MASK); - p->cram_flags &= ~CRAM_FLAG_STATS_ADDED; - } - - p->cram_flags &= ~CRAM_FLAG_DETACHED; - p->cram_flags |= CRAM_FLAG_MATE_DOWNSTREAM | explicit_tlen;; - if (cram_stats_add(c->stats[DS_CF], p->cram_flags & CRAM_FLAG_MASK) < 0) - goto block_err; - - p->mate_line = rnum - (kh_val(s->pair[sec], k) + 1); - if (cram_stats_add(c->stats[DS_NF], p->mate_line) < 0) - goto block_err; - - kh_val(s->pair[sec], k) = rnum; - } else { - detached: - //fprintf(stderr, "unpaired\n"); - - /* Derive mate flags from this flag */ - cr->mate_flags = 0; - if (bam_flag(b) & BAM_FMUNMAP) - cr->mate_flags |= CRAM_M_UNMAP; - if (bam_flag(b) & BAM_FMREVERSE) - cr->mate_flags |= CRAM_M_REVERSE; - - if (cram_stats_add(c->stats[DS_MF], cr->mate_flags) < 0) - goto block_err; - - cr->mate_pos = MAX(bam_mate_pos(b)+1, 0); - if (cram_stats_add(c->stats[DS_NP], cr->mate_pos) < 0) - goto block_err; - - cr->tlen = bam_ins_size(b); - if (cram_stats_add(c->stats[DS_TS], cr->tlen) < 0) - goto block_err; - - cr->cram_flags |= CRAM_FLAG_DETACHED; - if (cram_stats_add(c->stats[DS_CF], cr->cram_flags & CRAM_FLAG_MASK) < 0) - goto block_err; - if (cram_stats_add(c->stats[DS_NS], bam_mate_ref(b)) < 0) - goto block_err; - - cr->cram_flags |= CRAM_FLAG_STATS_ADDED; - } - } - - cr->mqual = bam_map_qual(b); - if (cram_stats_add(c->stats[DS_MQ], cr->mqual) < 0) - goto block_err; - - cr->mate_ref_id = bam_mate_ref(b); - - if (!(bam_flag(b) & BAM_FUNMAP)) { - if (c->first_base > cr->apos) - c->first_base = cr->apos; - - if (c->last_base < cr->aend) - c->last_base = cr->aend; - } - - return 0; - - block_err: - err: - return -1; -} - -/* - * Write iterator: put BAM format sequences into a CRAM file. - * We buffer up a containers worth of data at a time. - * - * Returns 0 on success - * -1 on failure - */ -int cram_put_bam_seq(cram_fd *fd, bam_seq_t *b) { - cram_container *c; - - if (!fd->ctr) { - fd->ctr = cram_new_container(fd->seqs_per_slice, - fd->slices_per_container); - if (!fd->ctr) - return -1; - fd->ctr->record_counter = fd->record_counter; - - pthread_mutex_lock(&fd->ref_lock); - fd->ctr->no_ref = fd->no_ref; - fd->ctr->embed_ref = fd->embed_ref; - pthread_mutex_unlock(&fd->ref_lock); - } - c = fd->ctr; - - int embed_ref = c->embed_ref; - - if (!c->slice || c->curr_rec == c->max_rec || - (bam_ref(b) != c->curr_ref && c->curr_ref >= -1) || - (c->s_num_bases + c->s_aux_bytes >= fd->bases_per_slice)) { - int slice_rec, curr_rec, multi_seq = fd->multi_seq == 1; - int curr_ref = c->slice ? c->curr_ref : bam_ref(b); - - /* - * Start packing slices when we routinely have under 1/4tr full. - * - * This option isn't available if we choose to embed references - * since we can only have one per slice. - * - * The multi_seq var here refers to our intention for the next slice. - * This slice has already been encoded so we output as-is. - */ - if (fd->multi_seq == -1 && c->curr_rec < c->max_rec/4+10 && - fd->last_slice && fd->last_slice < c->max_rec/4+10 && - embed_ref<=0) { - if (!c->multi_seq) - hts_log_info("Multi-ref enabled for next container"); - multi_seq = 1; - } else if (fd->multi_seq == 1) { - pthread_mutex_lock(&fd->metrics_lock); - if (fd->last_RI_count <= c->max_slice && fd->multi_seq_user != 1) { - multi_seq = 0; - hts_log_info("Multi-ref disabled for next container"); - } - pthread_mutex_unlock(&fd->metrics_lock); - } - - slice_rec = c->slice_rec; - curr_rec = c->curr_rec; - - if (CRAM_MAJOR_VERS(fd->version) == 1 || - c->curr_rec == c->max_rec || fd->multi_seq != 1 || !c->slice || - c->s_num_bases + c->s_aux_bytes >= fd->bases_per_slice) { - if (NULL == (c = cram_next_container(fd, b))) { - if (fd->ctr) { - // prevent cram_close attempting to flush - fd->ctr_mt = fd->ctr; // delay free when threading - fd->ctr = NULL; - } - return -1; - } - } - - /* - * Due to our processing order, some things we've already done we - * cannot easily undo. So when we first notice we should be packing - * multiple sequences per container we emit the small partial - * container as-is and then start a fresh one in a different mode. - */ - if (multi_seq == 0 && fd->multi_seq == 1 && fd->multi_seq_user == -1) { - // User selected auto-mode, we're currently using multi-seq, but - // have detected we don't need to. Switch back to auto. - fd->multi_seq = -1; - } else if (multi_seq) { - // We detected we need multi-seq - fd->multi_seq = 1; - c->multi_seq = 1; - c->pos_sorted = 0; - - // Cram_next_container may end up flushing an existing one and - // triggering fd->embed_ref=2 if no reference is found. - // Embedded refs are incompatible with multi-seq, so we bail - // out and switch to no_ref in this scenario. We do this - // within the container only, as multi_seq may be temporary - // and we switch back away from it again. - pthread_mutex_lock(&fd->ref_lock); - if (fd->embed_ref > 0 && c->curr_rec == 0 && c->curr_slice == 0) { - hts_log_warning("Changing from embed_ref to no_ref mode"); - // Should we update fd->embed_ref and no_ref here too? - // Doing so means if we go into multi-seq and back out - // again, eg due a cluster of tiny refs in the middle of - // much larger ones, then we bake in no-ref mode. - // - // However for unsorted data we're realistically not - // going to switch back. - c->embed_ref = fd->embed_ref = 0; // or -1 for auto? - c->no_ref = fd->no_ref = 1; - } - pthread_mutex_unlock(&fd->ref_lock); - - if (!c->refs_used) { - pthread_mutex_lock(&fd->ref_lock); - c->refs_used = calloc(fd->refs->nref, sizeof(int)); - pthread_mutex_unlock(&fd->ref_lock); - if (!c->refs_used) - return -1; - } - } - - fd->last_slice = curr_rec - slice_rec; - c->slice_rec = c->curr_rec; - - // Have we seen this reference before? - if (bam_ref(b) >= 0 && curr_ref >= 0 && bam_ref(b) != curr_ref && - embed_ref<=0 && !fd->unsorted && multi_seq) { - - if (!c->refs_used) { - pthread_mutex_lock(&fd->ref_lock); - c->refs_used = calloc(fd->refs->nref, sizeof(int)); - pthread_mutex_unlock(&fd->ref_lock); - if (!c->refs_used) - return -1; - } else if (c->refs_used && c->refs_used[bam_ref(b)]) { - pthread_mutex_lock(&fd->ref_lock); - fd->unsorted = 1; - fd->multi_seq = 1; - pthread_mutex_unlock(&fd->ref_lock); - } - } - - c->curr_ref = bam_ref(b); - if (c->refs_used && c->curr_ref >= 0) c->refs_used[c->curr_ref]++; - } - - if (!c->bams) { - /* First time through, allocate a set of bam pointers */ - pthread_mutex_lock(&fd->bam_list_lock); - if (fd->bl) { - spare_bams *spare = fd->bl; - c->bams = spare->bams; - fd->bl = spare->next; - free(spare); - } else { - c->bams = calloc(c->max_c_rec, sizeof(bam_seq_t *)); - if (!c->bams) { - pthread_mutex_unlock(&fd->bam_list_lock); - return -1; - } - } - pthread_mutex_unlock(&fd->bam_list_lock); - } - - /* Copy or alloc+copy the bam record, for later encoding */ - if (c->bams[c->curr_c_rec]) { - if (bam_copy1(c->bams[c->curr_c_rec], b) == NULL) - return -1; - } else { - c->bams[c->curr_c_rec] = bam_dup1(b); - if (c->bams[c->curr_c_rec] == NULL) - return -1; - } - if (bam_seq_len(b)) { - c->s_num_bases += bam_seq_len(b); - } else { - // No sequence in BAM record. CRAM doesn't directly support this - // case, it ends up being stored as a string of N's for each query - // consuming CIGAR operation. As this can become very inefficient - // in time and memory, data where the query length is excessively - // long are rejected. - hts_pos_t qlen = bam_cigar2qlen(b->core.n_cigar, bam_get_cigar(b)); - if (qlen > 100000000) { - hts_log_error("CIGAR query length %"PRIhts_pos - " for read \"%s\" is too long", - qlen, bam_get_qname(b)); - return -1; - } - c->s_num_bases += qlen; - } - c->curr_rec++; - c->curr_c_rec++; - c->s_aux_bytes += bam_get_l_aux(b); - c->n_mapped += (bam_flag(b) & BAM_FUNMAP) ? 0 : 1; - fd->record_counter++; - - return 0; -} diff --git a/src/htslib-1.19.1/cram/cram_encode.h b/src/htslib-1.19.1/cram/cram_encode.h deleted file mode 100644 index 03b8054..0000000 --- a/src/htslib-1.19.1/cram/cram_encode.h +++ /dev/null @@ -1,116 +0,0 @@ -/* -Copyright (c) 2012-2013, 2018 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/*! \file - * Include cram.h instead. - * - * This is an internal part of the CRAM system and is automatically included - * when you #include cram.h. - * - * Implements the encoding portion of CRAM I/O. Also see - * cram_codecs.[ch] for the actual encoding functions themselves. - */ - -#ifndef CRAM_ENCODE_H -#define CRAM_ENCODE_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* ---------------------------------------------------------------------- - * CRAM sequence iterators. - */ - -/*! Write iterator: put BAM format sequences into a CRAM file. - * - * We buffer up a containers worth of data at a time. - * - * FIXME: break this into smaller pieces. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_put_bam_seq(cram_fd *fd, bam_seq_t *b); - - -/* ---------------------------------------------------------------------- - * Internal functions - */ - -/*! INTERNAL: - * Encodes a compression header block into a generic cram_block structure. - * - * @return - * Returns cram_block ptr on success; - * NULL on failure - */ -cram_block *cram_encode_compression_header(cram_fd *fd, cram_container *c, - cram_block_compression_hdr *h, - int embed_ref); - -/*! INTERNAL: - * Encodes a slice compression header. - * - * @return - * Returns cram_block on success; - * NULL on failure - */ -cram_block *cram_encode_slice_header(cram_fd *fd, cram_slice *s); - -/*! INTERNAL: - * Encodes all slices in a container into blocks. - * - * @return - * Returns 0 on success; - * -1 on failure - * - * FIXME: separate into encode_container and write_container. Ideally - * we should be able to do read_container / write_container or - * decode_container / encode_container. - */ -int cram_encode_container(cram_fd *fd, cram_container *c); - -/*! INTERNAL: - * - * During cram_next_container or before the final flush at end of - * file, we update the current slice headers and increment the slice - * number to the next slice. - * - * See cram_next_container() and cram_close(). - */ -void cram_update_curr_slice(cram_container *c, int version); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.19.1/cram/cram_external.c b/src/htslib-1.19.1/cram/cram_external.c deleted file mode 100644 index 7455185..0000000 --- a/src/htslib-1.19.1/cram/cram_external.c +++ /dev/null @@ -1,836 +0,0 @@ -/* -Copyright (c) 2015, 2018-2020, 2022-2023 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/*! \file - * External CRAM interface. - * - * Internally we're happy to use macros and to grub around in the cram - * structures. This isn't very sustainable for an externally usable - * ABI though, so we have anonymous structs and accessor functions too - * to permit software such as samtools reheader to manipulate cram - * containers and blocks in a robust manner. - */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include -#include - -#if defined(HAVE_EXTERNAL_LIBHTSCODECS) -#include -#else -#include "../htscodecs/htscodecs/rANS_static4x16.h" -#endif - -#include "../htslib/hfile.h" -#include "cram.h" - -/* - *----------------------------------------------------------------------------- - * cram_fd - */ -sam_hdr_t *cram_fd_get_header(cram_fd *fd) { return fd->header; } -void cram_fd_set_header(cram_fd *fd, sam_hdr_t *hdr) { fd->header = hdr; } - -int cram_fd_get_version(cram_fd *fd) { return fd->version; } -void cram_fd_set_version(cram_fd *fd, int vers) { fd->version = vers; } - -int cram_major_vers(cram_fd *fd) { return CRAM_MAJOR_VERS(fd->version); } -int cram_minor_vers(cram_fd *fd) { return CRAM_MINOR_VERS(fd->version); } - -hFILE *cram_fd_get_fp(cram_fd *fd) { return fd->fp; } -void cram_fd_set_fp(cram_fd *fd, hFILE *fp) { fd->fp = fp; } - - -/* - *----------------------------------------------------------------------------- - * cram_container - */ -int32_t cram_container_get_length(cram_container *c) { - return c->length; -} - -void cram_container_set_length(cram_container *c, int32_t length) { - c->length = length; -} - - -int32_t cram_container_get_num_blocks(cram_container *c) { - return c->num_blocks; -} - -void cram_container_set_num_blocks(cram_container *c, int32_t num_blocks) { - c->num_blocks = num_blocks; -} - -int32_t cram_container_get_num_records(cram_container *c) { - return c->num_records; -} - -int64_t cram_container_get_num_bases(cram_container *c) { - return c->num_bases; -} - - -/* Returns the landmarks[] array and the number of elements - * in num_landmarks. - */ -int32_t *cram_container_get_landmarks(cram_container *c, int32_t *num_landmarks) { - *num_landmarks = c->num_landmarks; - return c->landmark; -} - -/* Sets the landmarks[] array (pointer copy, not a memory dup) and - * num_landmarks value. - */ -void cram_container_set_landmarks(cram_container *c, int32_t num_landmarks, - int32_t *landmarks) { - c->num_landmarks = num_landmarks; - c->landmark = landmarks; -} - - -/* Returns true if the container is empty (EOF marker) */ -int cram_container_is_empty(cram_fd *fd) { - return fd->empty_container; -} - - -/* - *----------------------------------------------------------------------------- - * cram_block_compression_hdr - */ - -/* - * Utility function to edit an RG id. - * This is only possible if there is one single RG value used and it - * is in the container compression header using HUFFMAN or BETA - * codec. In this case it is essentially hard coded and needs no - * editing of external (or worse, CORE) blocks. - * - * Returns 0 on success - * -1 on failure - */ -// Or arbitrary set compression header constant? - -static int cram_block_compression_hdr_set_DS(cram_block_compression_hdr *ch, - int ds, int new_rg) { - if (!ch || !ch->codecs[ds]) - return -1; - - switch (ch->codecs[ds]->codec) { - case E_HUFFMAN: - if (ch->codecs[ds]->u.huffman.ncodes != 1) - return -1; - ch->codecs[ds]->u.huffman.codes[0].symbol = new_rg; - return 0; - - case E_BETA: - if (ch->codecs[ds]->u.beta.nbits != 0) - return -1; - ch->codecs[ds]->u.beta.offset = -new_rg; - return 0; - - default: - break; - } - - return -1; -} - -int cram_block_compression_hdr_set_rg(cram_block_compression_hdr *ch, int new_rg) { - return cram_block_compression_hdr_set_DS(ch, DS_RG, new_rg); -} - -/* - * Converts a cram_block_compression_hdr struct used for decoding to - * one used for encoding. Maybe this should be a transparent - * operation applied on-demand. - * - * Returns 0 on success - * -1 on failure - */ -int cram_block_compression_hdr_decoder2encoder(cram_fd *fd, - cram_block_compression_hdr *ch) { - int i; - - if (!ch) - return -1; - - for (i = 0; i < DS_END; i++) { - cram_codec *co = ch->codecs[i]; - if (!co) - continue; - - if (-1 == cram_codec_decoder2encoder(fd, co)) - return -1; - } - - return 0; -} - -typedef struct { - cram_block_compression_hdr *hdr; - cram_map *curr_map; - int idx; - int is_tag; // phase 2 using tag_encoding_map -} cram_codec_iter; - -static void cram_codec_iter_init(cram_block_compression_hdr *hdr, - cram_codec_iter *iter) { - iter->hdr = hdr; - iter->curr_map = NULL; - iter->idx = 0; - iter->is_tag = 0; -} - -// See enum cram_DS_ID in cram/cram_structs -static int cram_ds_to_key(enum cram_DS_ID ds) { - switch(ds) { - case DS_RN: return 256*'R'+'N'; - case DS_QS: return 256*'Q'+'S'; - case DS_IN: return 256*'I'+'N'; - case DS_SC: return 256*'S'+'C'; - case DS_BF: return 256*'B'+'F'; - case DS_CF: return 256*'C'+'F'; - case DS_AP: return 256*'A'+'P'; - case DS_RG: return 256*'R'+'G'; - case DS_MQ: return 256*'M'+'Q'; - case DS_NS: return 256*'N'+'S'; - case DS_MF: return 256*'M'+'F'; - case DS_TS: return 256*'T'+'S'; - case DS_NP: return 256*'N'+'P'; - case DS_NF: return 256*'N'+'F'; - case DS_RL: return 256*'R'+'L'; - case DS_FN: return 256*'F'+'N'; - case DS_FC: return 256*'F'+'C'; - case DS_FP: return 256*'F'+'P'; - case DS_DL: return 256*'D'+'L'; - case DS_BA: return 256*'B'+'A'; - case DS_BS: return 256*'B'+'S'; - case DS_TL: return 256*'T'+'L'; - case DS_RI: return 256*'R'+'I'; - case DS_RS: return 256*'R'+'S'; - case DS_PD: return 256*'P'+'D'; - case DS_HC: return 256*'H'+'C'; - case DS_BB: return 256*'B'+'B'; - case DS_QQ: return 256*'Q'+'Q'; - case DS_TN: return 256*'T'+'N'; - case DS_TC: return 256*'T'+'C'; - case DS_TM: return 256*'T'+'M'; - case DS_TV: return 256*'T'+'V'; - default: break; - } - - return -1; // unknown -} - -static cram_codec *cram_codec_iter_next(cram_codec_iter *iter, - int *key) { - cram_codec *cc = NULL; - cram_block_compression_hdr *hdr = iter->hdr; - - if (!iter->is_tag) { - // 1: Iterating through main data-series - do { - cc = hdr->codecs[iter->idx++]; - } while(!cc && iter->idx < DS_END); - if (cc) { - *key = cram_ds_to_key(iter->idx-1); - return cc; - } - - // Reset index for phase 2 - iter->idx = 0; - iter->is_tag = 1; - } - - do { - if (!iter->curr_map) - iter->curr_map = hdr->tag_encoding_map[iter->idx++]; - - cc = iter->curr_map ? iter->curr_map->codec : NULL; - if (cc) { - *key = iter->curr_map->key; - iter->curr_map = iter->curr_map->next; - return cc; - } - } while (iter->idx <= CRAM_MAP_HASH); - - // End of codecs - return NULL; -} - -/* - * A list of data-series, used to create a linked list threaded through - * a single array. - */ -typedef struct ds_list { - int data_series; - int next; -} ds_list; - -KHASH_MAP_INIT_INT(cid, int64_t) - -// Opaque struct for the CRAM block content-id -> data-series map. -struct cram_cid2ds_t { - ds_list *ds; // array of data-series with linked lists threading through it - int ds_size; - int ds_idx; - khash_t(cid) *hash; // key=content_id, value=index to ds array - int *ds_a; // serialised array of data-series returned by queries. -}; - -void cram_cid2ds_free(cram_cid2ds_t *cid2ds) { - if (cid2ds) { - if (cid2ds->hash) - kh_destroy(cid, cid2ds->hash); - free(cid2ds->ds); - free(cid2ds->ds_a); - free(cid2ds); - } -} - -/* - * Map cram block numbers to data-series. It's normally a 1:1 mapping, - * but in rare cases it can be 1:many (or even many:many). - * The key is the block number and the value is an index into the data-series - * array, which we iterate over until reaching a negative value. - * - * Provide cid2ds as NULL to allocate a new map or pass in an existing one - * to append to this map. The new (or existing) map is returned. - * - * Returns the cid2ds (newly allocated or as provided) on success, - * NULL on failure. - */ -cram_cid2ds_t *cram_update_cid2ds_map(cram_block_compression_hdr *hdr, - cram_cid2ds_t *cid2ds) { - cram_cid2ds_t *c2d = cid2ds; - if (!c2d) { - c2d = calloc(1, sizeof(*c2d)); - if (!c2d) - return NULL; - - c2d->hash = kh_init(cid); - if (!c2d->hash) - goto err; - } - - // Iterate through codecs. Initially primary two-left ones in - // rec_encoding_map, and then the three letter in tag_encoding_map. - cram_codec_iter citer; - cram_codec_iter_init(hdr, &citer); - cram_codec *codec; - int key; - - while ((codec = cram_codec_iter_next(&citer, &key))) { - // Having got a codec, we can then use cram_codec_to_id to get - // the block IDs utilised by that codec. This is then our - // map for allocating data blocks to data series, but for shared - // blocks we can't separate out how much is used by each DS. - int bnum[2]; - cram_codec_get_content_ids(codec, bnum); - - khiter_t k; - int ret, i; - for (i = 0; i < 2; i++) { - if (bnum[i] > -2) { - k = kh_put(cid, c2d->hash, bnum[i], &ret); - if (ret < 0) - goto err; - - if (c2d->ds_idx >= c2d->ds_size) { - c2d->ds_size += 100; - c2d->ds_size *= 2; - ds_list *ds_new = realloc(c2d->ds, - c2d->ds_size * sizeof(*ds_new)); - if (!ds_new) - goto err; - c2d->ds = ds_new; - } - - if (ret == 0) { - // Shared content_id, so add to list of DS - - // Maybe data-series should be part of the hash key? - // - // So top-32 bit is content-id, bot-32 bit is key. - // Sort hash by key and then can group all the data-series - // known together. ?? - // - // Brute force for now, scan to see if recorded. - // Typically this is minimal effort as we almost always - // have 1 data-series per block content-id, so the list to - // search is of size 1. - int dsi = kh_value(c2d->hash, k); - while (dsi >= 0) { - if (c2d->ds[dsi].data_series == key) - break; - dsi = c2d->ds[dsi].next; - } - - if (dsi == -1) { - // Block content_id seen before, but not with this DS - c2d->ds[c2d->ds_idx].data_series = key; - c2d->ds[c2d->ds_idx].next = kh_value(c2d->hash, k); - kh_value(c2d->hash, k) = c2d->ds_idx; - c2d->ds_idx++; - } - } else { - // First time this content id has been used - c2d->ds[c2d->ds_idx].data_series = key; - c2d->ds[c2d->ds_idx].next = -1; - kh_value(c2d->hash, k) = c2d->ds_idx; - c2d->ds_idx++; - } - } - } - } - - return c2d; - - err: - if (c2d != cid2ds) - cram_cid2ds_free(c2d); - return NULL; -} - -/* - * Return a list of data series observed as belonging to a block with - * the specified content_id. *n is the number of data series - * returned, or 0 if block is unused. - * Block content_id of -1 is used to indicate the CORE block. - * - * The pointer returned is owned by the cram_cid2ds state and should - * not be freed by the caller. - */ -int *cram_cid2ds_query(cram_cid2ds_t *c2d, int content_id, int *n) { - *n = 0; - if (!c2d || !c2d->hash) - return NULL; - - khiter_t k = kh_get(cid, c2d->hash, content_id); - if (k == kh_end(c2d->hash)) - return NULL; - - if (!c2d->ds_a) { - c2d->ds_a = malloc(c2d->ds_idx * sizeof(int)); - if (!c2d->ds_a) - return NULL; - } - - int dsi = kh_value(c2d->hash, k); // initial ds array index from hash - int idx = 0; - while (dsi >= 0) { - c2d->ds_a[idx++] = c2d->ds[dsi].data_series; - dsi = c2d->ds[dsi].next; // iterate over list within ds array - } - - *n = idx; - return c2d->ds_a; -} - -/* - * Produces a description of the record and tag encodings held within - * a compression header and appends to 'ks'. - * - * Returns 0 on success, - * <0 on failure. - */ -int cram_describe_encodings(cram_block_compression_hdr *hdr, kstring_t *ks) { - cram_codec_iter citer; - cram_codec_iter_init(hdr, &citer); - cram_codec *codec; - int key, r = 0; - - while ((codec = cram_codec_iter_next(&citer, &key))) { - char key_s[4] = {0}; - int key_i = 0; - if (key>>16) key_s[key_i++] = key>>16; - key_s[key_i++] = (key>>8)&0xff; - key_s[key_i++] = key&0xff; - r |= ksprintf(ks, "\t%s\t", key_s) < 0; - r |= cram_codec_describe(codec, ks) < 0; - r |= kputc('\n', ks) < 0; - } - - return r ? -1 : 0; -} - -/* - *----------------------------------------------------------------------------- - * cram_slice - */ -int32_t cram_slice_hdr_get_num_blocks(cram_block_slice_hdr *hdr) { - return hdr->num_blocks; -} - -int cram_slice_hdr_get_embed_ref_id(cram_block_slice_hdr *h) { - return h->ref_base_id; -} - -void cram_slice_hdr_get_coords(cram_block_slice_hdr *h, - int *refid, hts_pos_t *start, hts_pos_t *span) { - if (refid) - *refid = h->ref_seq_id; - if (start) - *start = h->ref_seq_start; - if (span) - *span = h->ref_seq_span; -} - -/* - *----------------------------------------------------------------------------- - * cram_block - */ -int32_t cram_block_get_content_id(cram_block *b) { - return b->content_type == CORE ? -1 : b->content_id; -} -int32_t cram_block_get_comp_size(cram_block *b) { return b->comp_size; } -int32_t cram_block_get_uncomp_size(cram_block *b) { return b->uncomp_size; } -int32_t cram_block_get_crc32(cram_block *b) { return b->crc32; } -void * cram_block_get_data(cram_block *b) { return BLOCK_DATA(b); } -int32_t cram_block_get_size(cram_block *b) { return BLOCK_SIZE(b); } -enum cram_block_method cram_block_get_method(cram_block *b) { - return (enum cram_block_method)b->orig_method; -} -enum cram_content_type cram_block_get_content_type(cram_block *b) { - return b->content_type; -} - -void cram_block_set_content_id(cram_block *b, int32_t id) { b->content_id = id; } -void cram_block_set_comp_size(cram_block *b, int32_t size) { b->comp_size = size; } -void cram_block_set_uncomp_size(cram_block *b, int32_t size) { b->uncomp_size = size; } -void cram_block_set_crc32(cram_block *b, int32_t crc) { b->crc32 = crc; } -void cram_block_set_data(cram_block *b, void *data) { BLOCK_DATA(b) = data; } -void cram_block_set_size(cram_block *b, int32_t size) { BLOCK_SIZE(b) = size; } - -int cram_block_append(cram_block *b, const void *data, int size) { - BLOCK_APPEND(b, data, size); - return 0; - - block_err: - return -1; -} -void cram_block_update_size(cram_block *b) { BLOCK_UPLEN(b); } - -// Offset is known as "size" internally, but it can be confusing. -size_t cram_block_get_offset(cram_block *b) { return BLOCK_SIZE(b); } -void cram_block_set_offset(cram_block *b, size_t offset) { BLOCK_SIZE(b) = offset; } - -/* - * Given a compressed block of data in a specified compression method, - * fill out the 'cm' field with meta-data gleaned from the compressed - * block. - * - * If comp is CRAM_COMP_UNKNOWN, we attempt to auto-detect the compression - * format, but this doesn't work for all methods. - * - * Retuns the detected or specified comp method, and fills out *cm - * if non-NULL. - */ -cram_method_details *cram_expand_method(uint8_t *data, int32_t size, - enum cram_block_method comp) { - cram_method_details *cm = calloc(1, sizeof(*cm)); - if (!cm) - return NULL; - - const char *xz_header = "\xFD""7zXZ"; // including nul - - if (comp == CRAM_COMP_UNKNOWN) { - // Auto-detect - if (size > 1 && data[0] == 0x1f && data[1] == 0x8b) - comp = CRAM_COMP_GZIP; - else if (size > 3 && data[1] == 'B' && data[2] == 'Z' - && data[3] == 'h') - comp = CRAM_COMP_BZIP2; - else if (size > 6 && memcmp(xz_header, data, 6) == 0) - comp = CRAM_COMP_LZMA; - else - comp = CRAM_COMP_UNKNOWN; - } - cm->method = comp; - - // Interrogate the compressed data stream to fill out additional fields. - switch (comp) { - case CRAM_COMP_GZIP: - if (size > 8) { - if (data[8] == 4) - cm->level = 1; - else if (data[8] == 2) - cm->level = 9; - else - cm->level = 5; - } - break; - - case CRAM_COMP_BZIP2: - if (size > 3 && data[3] >= '1' && data[3] <= '9') - cm->level = data[3]-'0'; - break; - - case CRAM_COMP_RANS4x8: - cm->Nway = 4; - if (size > 0 && data[0] == 1) - cm->order = 1; - else - cm->order = 0; - break; - - case CRAM_COMP_RANSNx16: - if (size > 0) { - cm->order = data[0] & 1; - cm->Nway = data[0] & RANS_ORDER_X32 ? 32 : 4; - cm->rle = data[0] & RANS_ORDER_RLE ? 1 : 0; - cm->pack = data[0] & RANS_ORDER_PACK ? 1 : 0; - cm->cat = data[0] & RANS_ORDER_CAT ? 1 : 0; - cm->stripe = data[0] & RANS_ORDER_STRIPE ? 1 : 0; - cm->nosz = data[0] & RANS_ORDER_NOSZ ? 1 : 0; - } - break; - - case CRAM_COMP_ARITH: - if (size > 0) { - // Not in a public header, but the same transforms as rANSNx16 - cm->order = data[0] & 3; - cm->rle = data[0] & RANS_ORDER_RLE ? 1 : 0; - cm->pack = data[0] & RANS_ORDER_PACK ? 1 : 0; - cm->cat = data[0] & RANS_ORDER_CAT ? 1 : 0; - cm->stripe = data[0] & RANS_ORDER_STRIPE ? 1 : 0; - cm->nosz = data[0] & RANS_ORDER_NOSZ ? 1 : 0; - cm->ext = data[0] & 4 /*external*/ ? 1 : 0; - } - break; - - case CRAM_COMP_TOK3: - if (size > 8) { - if (data[8] == 1) - cm->level = 11; - else if (data[8] == 0) - cm->level = 1; - } - break; - - default: - break; - } - - return cm; -} - -/* - *----------------------------------------------------------------------------- - * cram_codecs - */ - -// -2 is unused. -// -1 is CORE -// >= 0 is the block with that Content ID -void cram_codec_get_content_ids(cram_codec *c, int ids[2]) { - ids[0] = cram_codec_to_id(c, &ids[1]); -} - -/* - *----------------------------------------------------------------------------- - * Utility functions - */ - -/* - * Copies the blocks representing the next num_slice slices from a - * container from 'in' to 'out'. It is expected that the file pointer - * is just after the read of the cram_container and cram compression - * header. - * - * Returns 0 on success - * -1 on failure - */ -int cram_copy_slice(cram_fd *in, cram_fd *out, int32_t num_slice) { - int32_t i, j; - - for (i = 0; i < num_slice; i++) { - cram_block *blk; - cram_block_slice_hdr *hdr; - - if (!(blk = cram_read_block(in))) - return -1; - if (!(hdr = cram_decode_slice_header(in, blk))) { - cram_free_block(blk); - return -1; - } - if (cram_write_block(out, blk) != 0) { - cram_free_block(blk); - return -1; - } - cram_free_block(blk); - - int num_blocks = cram_slice_hdr_get_num_blocks(hdr); - for (j = 0; j < num_blocks; j++) { - blk = cram_read_block(in); - if (!blk || cram_write_block(out, blk) != 0) { - if (blk) cram_free_block(blk); - return -1; - } - cram_free_block(blk); - } - cram_free_slice_header(hdr); - } - - return 0; -} - -/* - * Renumbers RG numbers in a cram compression header. - * - * CRAM stores RG as the Nth number in the header, rather than a - * string holding the ID: tag. This is smaller in space, but means - * "samtools cat" to join files together that contain single but - * different RG lines needs a way of renumbering them. - * - * The file descriptor is expected to be immediately after the - * cram_container structure (ie before the cram compression header). - * Due to the nature of the CRAM format, this needs to read and write - * the blocks itself. Note that there may be multiple slices within - * the container, meaning multiple compression headers to manipulate. - * Changing RG may change the size of the compression header and - * therefore the length field in the container. Hence we rewrite all - * blocks just in case and also emit the adjusted container. - * - * The current implementation can only cope with renumbering a single - * RG (and only then if it is using HUFFMAN or BETA codecs). In - * theory it *may* be possible to renumber multiple RGs if they use - * HUFFMAN to the CORE block or use an external block unshared by any - * other data series. So we have an API that can be upgraded to - * support this, but do not implement it for now. An example - * implementation of RG as an EXTERNAL block would be to find that - * block and rewrite it, returning the number of blocks consumed. - * - * Returns 0 on success; - * -1 if unable to edit; - * -2 on other errors (eg I/O). - */ -int cram_transcode_rg(cram_fd *in, cram_fd *out, - cram_container *c, - int nrg, int *in_rg, int *out_rg) { - int new_rg = *out_rg, old_size, new_size; - cram_block *o_blk, *n_blk; - cram_block_compression_hdr *ch; - - if (nrg != 1) { - hts_log_error("CRAM transcode supports only a single RG"); - return -2; - } - - // Produce a new block holding the updated compression header, - // with RG transcoded to a new value. (Single only supported.) - o_blk = cram_read_block(in); - old_size = cram_block_size(o_blk); - ch = cram_decode_compression_header(in, o_blk); - if (cram_block_compression_hdr_set_rg(ch, new_rg) != 0) - return -1; - if (cram_block_compression_hdr_decoder2encoder(in, ch) != 0) - return -1; - n_blk = cram_encode_compression_header(in, c, ch, in->embed_ref); - cram_free_compression_header(ch); - - /* - * Warning: this has internal knowledge of the cram compression - * header format. - * - * The decoder doesn't set c->tags_used, so the encoder puts a two - * byte blank segment. This means n_blk is too short. We skip - * through the decoded old block (o_blk) and copy from there. - */ - char *cp = cram_block_get_data(o_blk); - char *op = cp; - char *endp = cp + cram_block_get_uncomp_size(o_blk); - //fprintf(stderr, "sz = %d\n", (int)(endp-cp)); - int32_t i32, err = 0; - - i32 = in->vv.varint_get32(&cp, endp, &err); - cp += i32; - i32 = in->vv.varint_get32(&cp, endp, &err); - cp += i32; - op = cp; - i32 = in->vv.varint_get32(&cp, endp, &err); - i32 += (cp-op); - if (err) - return -2; - - //fprintf(stderr, "remaining %d bytes\n", i32); - cram_block_set_size(n_blk, cram_block_get_size(n_blk)-2); - cram_block_append(n_blk, op, i32); - cram_block_update_size(n_blk); - - new_size = cram_block_size(n_blk); - - //fprintf(stderr, "size %d -> %d\n", old_size, new_size); - - // Now we've constructedthe updated compression header, - // amend the container too (it may have changed size). - int32_t *landmarks, num_landmarks; - landmarks = cram_container_get_landmarks(c, &num_landmarks); - - if (old_size != new_size) { - int diff = new_size - old_size, j; - - for (j = 0; j < num_landmarks; j++) - landmarks[j] += diff; - //cram_container_set_landmarks(c, num_landmarks, landmarks); - cram_container_set_length(c, cram_container_get_length(c) + diff); - } - - // Finally write it all out; container, compression header, - // and then all the remaining slice blocks. - if (cram_write_container(out, c) != 0) - return -2; - - cram_write_block(out, n_blk); - cram_free_block(o_blk); - cram_free_block(n_blk); - - // Container num_blocks can be invalid, due to a bug. - // Instead we iterate in slice context instead. - return cram_copy_slice(in, out, num_landmarks); -} - - -/*! - * Returns the refs_t structure used by a cram file handle. - * - * This may be used in conjunction with option CRAM_OPT_SHARED_REF to - * share reference memory between multiple file handles. - * - * @return - * Returns NULL if none exists or the file handle is not a CRAM file. - */ -refs_t *cram_get_refs(htsFile *fd) { - return fd->format.format == cram - ? fd->fp.cram->refs - : NULL; -} diff --git a/src/htslib-1.19.1/cram/cram_index.c b/src/htslib-1.19.1/cram/cram_index.c deleted file mode 100644 index 0908736..0000000 --- a/src/htslib-1.19.1/cram/cram_index.c +++ /dev/null @@ -1,846 +0,0 @@ -/* -Copyright (c) 2013-2020, 2023 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* - * The index is a gzipped tab-delimited text file with one line per slice. - * The columns are: - * 1: reference number (0 to N-1, as per BAM ref_id) - * 2: reference position of 1st read in slice (1..?) - * 3: number of reads in slice - * 4: offset of container start (relative to end of SAM header, so 1st - * container is offset 0). - * 5: slice number within container (ie which landmark). - * - * In memory, we hold this in a nested containment list. Each list element is - * a cram_index struct. Each element in turn can contain its own list of - * cram_index structs. - * - * Any start..end range which is entirely contained within another (and - * earlier as it is sorted) range will be held within it. This ensures that - * the outer list will never have containments and we can safely do a - * binary search to find the first range which overlaps any given coordinate. - */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../htslib/bgzf.h" -#include "../htslib/hfile.h" -#include "../hts_internal.h" -#include "cram.h" -#include "os.h" - -#if 0 -static void dump_index_(cram_index *e, int level) { - int i, n; - n = printf("%*s%d / %d .. %d, ", level*4, "", e->refid, e->start, e->end); - printf("%*soffset %"PRId64" %p %p\n", MAX(0,50-n), "", e->offset, e, e->e_next); - for (i = 0; i < e->nslice; i++) { - dump_index_(&e->e[i], level+1); - } -} - -static void dump_index(cram_fd *fd) { - int i; - for (i = 0; i < fd->index_sz; i++) { - dump_index_(&fd->index[i], 0); - } -} -#endif - -// Thread a linked list through the nested containment list. -// This makes navigating it and finding the "next" index entry -// trivial. -static cram_index *link_index_(cram_index *e, cram_index *e_last) { - int i; - if (e_last) - e_last->e_next = e; - - // We don't want to link in the top-level cram_index with - // offset=0 and start/end = INT_MIN/INT_MAX. - if (e->offset) - e_last = e; - - for (i = 0; i < e->nslice; i++) - e_last = link_index_(&e->e[i], e_last); - - return e_last; -} - -static void link_index(cram_fd *fd) { - int i; - cram_index *e_last = NULL; - - for (i = 0; i < fd->index_sz; i++) { - e_last = link_index_(&fd->index[i], e_last); - } - - if (e_last) - e_last->e_next = NULL; -} - -static int kget_int32(kstring_t *k, size_t *pos, int32_t *val_p) { - int sign = 1; - int32_t val = 0; - size_t p = *pos; - - while (p < k->l && (k->s[p] == ' ' || k->s[p] == '\t')) - p++; - - if (p < k->l && k->s[p] == '-') - sign = -1, p++; - - if (p >= k->l || !(k->s[p] >= '0' && k->s[p] <= '9')) - return -1; - - while (p < k->l && k->s[p] >= '0' && k->s[p] <= '9') { - int digit = k->s[p++]-'0'; - val = val*10 + digit; - } - - *pos = p; - *val_p = sign*val; - - return 0; -} - -static int kget_int64(kstring_t *k, size_t *pos, int64_t *val_p) { - int sign = 1; - int64_t val = 0; - size_t p = *pos; - - while (p < k->l && (k->s[p] == ' ' || k->s[p] == '\t')) - p++; - - if (p < k->l && k->s[p] == '-') - sign = -1, p++; - - if (p >= k->l || !(k->s[p] >= '0' && k->s[p] <= '9')) - return -1; - - while (p < k->l && k->s[p] >= '0' && k->s[p] <= '9') { - int digit = k->s[p++]-'0'; - val = val*10 + digit; - } - - *pos = p; - *val_p = sign*val; - - return 0; -} - -/* - * Loads a CRAM .crai index into memory. - * - * Returns 0 for success - * -1 for failure - */ -int cram_index_load(cram_fd *fd, const char *fn, const char *fn_idx) { - - char *tfn_idx = NULL; - char buf[65536]; - ssize_t len; - kstring_t kstr = {0}; - hFILE *fp; - cram_index *idx; - cram_index **idx_stack = NULL, *ep, e; - int idx_stack_alloc = 0, idx_stack_ptr = 0; - size_t pos = 0; - - /* Check if already loaded */ - if (fd->index) - return 0; - - fd->index = calloc((fd->index_sz = 1), sizeof(*fd->index)); - if (!fd->index) - return -1; - - idx = &fd->index[0]; - idx->refid = -1; - idx->start = INT_MIN; - idx->end = INT_MAX; - - idx_stack = calloc(++idx_stack_alloc, sizeof(*idx_stack)); - if (!idx_stack) - goto fail; - - idx_stack[idx_stack_ptr] = idx; - - // Support pathX.cram##idx##pathY.crai - const char *fn_delim = strstr(fn, HTS_IDX_DELIM); - if (fn_delim && !fn_idx) - fn_idx = fn_delim + strlen(HTS_IDX_DELIM); - - if (!fn_idx) { - if (hts_idx_check_local(fn, HTS_FMT_CRAI, &tfn_idx) == 0 && hisremote(fn)) - tfn_idx = hts_idx_getfn(fn, ".crai"); - - if (!tfn_idx) { - hts_log_error("Could not retrieve index file for '%s'", fn); - goto fail; - } - fn_idx = tfn_idx; - } - - if (!(fp = hopen(fn_idx, "r"))) { - hts_log_error("Could not open index file '%s'", fn_idx); - goto fail; - } - - // Load the file into memory - while ((len = hread(fp, buf, sizeof(buf))) > 0) { - if (kputsn(buf, len, &kstr) < 0) - goto fail; - } - - if (len < 0 || kstr.l < 2) - goto fail; - - if (hclose(fp) < 0) - goto fail; - - // Uncompress if required - if (kstr.s[0] == 31 && (uc)kstr.s[1] == 139) { - size_t l = 0; - char *s = zlib_mem_inflate(kstr.s, kstr.l, &l); - if (!s) - goto fail; - - free(kstr.s); - kstr.s = s; - kstr.l = l; - kstr.m = l; // conservative estimate of the size allocated - if (kputsn("", 0, &kstr) < 0) // ensure kstr.s is NUL-terminated - goto fail; - } - - - // Parse it line at a time - while (pos < kstr.l) { - /* 1.1 layout */ - if (kget_int32(&kstr, &pos, &e.refid) == -1) - goto fail; - - if (kget_int32(&kstr, &pos, &e.start) == -1) - goto fail; - - if (kget_int32(&kstr, &pos, &e.end) == -1) - goto fail; - - if (kget_int64(&kstr, &pos, &e.offset) == -1) - goto fail; - - if (kget_int32(&kstr, &pos, &e.slice) == -1) - goto fail; - - if (kget_int32(&kstr, &pos, &e.len) == -1) - goto fail; - - e.end += e.start-1; - //printf("%d/%d..%d-offset=%" PRIu64 ",len=%d,slice=%d\n", e.refid, e.start, e.end, e.offset, e.len, e.slice); - - if (e.refid < -1) { - hts_log_error("Malformed index file, refid %d", e.refid); - goto fail; - } - - if (e.refid != idx->refid) { - if (fd->index_sz < e.refid+2) { - cram_index *new_idx; - int new_sz = e.refid+2; - size_t index_end = fd->index_sz * sizeof(*fd->index); - new_idx = realloc(fd->index, - new_sz * sizeof(*fd->index)); - if (!new_idx) - goto fail; - - fd->index = new_idx; - fd->index_sz = new_sz; - memset(((char *)fd->index) + index_end, 0, - fd->index_sz * sizeof(*fd->index) - index_end); - } - idx = &fd->index[e.refid+1]; - idx->refid = e.refid; - idx->start = INT_MIN; - idx->end = INT_MAX; - idx->nslice = idx->nalloc = 0; - idx->e = NULL; - idx_stack[(idx_stack_ptr = 0)] = idx; - } - - while (!(e.start >= idx->start && e.end <= idx->end) || - (idx->start == 0 && idx->refid == -1)) { - idx = idx_stack[--idx_stack_ptr]; - } - - // Now contains, so append - if (idx->nslice+1 >= idx->nalloc) { - cram_index *new_e; - idx->nalloc = idx->nalloc ? idx->nalloc*2 : 16; - new_e = realloc(idx->e, idx->nalloc * sizeof(*idx->e)); - if (!new_e) - goto fail; - - idx->e = new_e; - } - - e.nalloc = e.nslice = 0; e.e = NULL; - *(ep = &idx->e[idx->nslice++]) = e; - idx = ep; - - if (++idx_stack_ptr >= idx_stack_alloc) { - cram_index **new_stack; - idx_stack_alloc *= 2; - new_stack = realloc(idx_stack, idx_stack_alloc*sizeof(*idx_stack)); - if (!new_stack) - goto fail; - idx_stack = new_stack; - } - idx_stack[idx_stack_ptr] = idx; - - while (pos < kstr.l && kstr.s[pos] != '\n') - pos++; - pos++; - } - - free(idx_stack); - free(kstr.s); - free(tfn_idx); - - // Convert NCList to linear linked list - link_index(fd); - - //dump_index(fd); - - return 0; - - fail: - free(kstr.s); - free(idx_stack); - free(tfn_idx); - cram_index_free(fd); // Also sets fd->index = NULL - return -1; -} - -static void cram_index_free_recurse(cram_index *e) { - if (e->e) { - int i; - for (i = 0; i < e->nslice; i++) { - cram_index_free_recurse(&e->e[i]); - } - free(e->e); - } -} - -void cram_index_free(cram_fd *fd) { - int i; - - if (!fd->index) - return; - - for (i = 0; i < fd->index_sz; i++) { - cram_index_free_recurse(&fd->index[i]); - } - free(fd->index); - - fd->index = NULL; -} - -/* - * Searches the index for the first slice overlapping a reference ID - * and position, or one immediately preceding it if none is found in - * the index to overlap this position. (Our index may have missing - * entries, but we require at least one per reference.) - * - * If the index finds multiple slices overlapping this position we - * return the first one only. Subsequent calls should specify - * "from" as the last slice we checked to find the next one. Otherwise - * set "from" to be NULL to find the first one. - * - * Refid can also be any of the special HTS_IDX_ values. - * For backwards compatibility, refid -1 is equivalent to HTS_IDX_NOCOOR. - * - * Returns the cram_index pointer on success - * NULL on failure - */ -cram_index *cram_index_query(cram_fd *fd, int refid, hts_pos_t pos, - cram_index *from) { - int i, j, k; - cram_index *e; - - if (from) { - // Continue from a previous search. - // We switch to just scanning the linked list, as the nested - // lists are typically short. - e = from->e_next; - if (e && e->refid == refid && e->start <= pos) - return e; - else - return NULL; - } - - switch(refid) { - case HTS_IDX_NONE: - case HTS_IDX_REST: - // fail, or already there, dealt with elsewhere. - return NULL; - - case HTS_IDX_NOCOOR: - refid = -1; - pos = 0; - break; - - case HTS_IDX_START: { - int64_t min_idx = INT64_MAX; - for (i = 0, j = -1; i < fd->index_sz; i++) { - if (fd->index[i].e && fd->index[i].e[0].offset < min_idx) { - min_idx = fd->index[i].e[0].offset; - j = i; - } - } - if (j < 0) - return NULL; - return fd->index[j].e; - } - - default: - if (refid < HTS_IDX_NONE || refid+1 >= fd->index_sz) - return NULL; - } - - from = &fd->index[refid+1]; - - // Ref with nothing aligned against it. - if (!from->e) - return NULL; - - // This sequence is covered by the index, so binary search to find - // the optimal starting block. - i = 0, j = fd->index[refid+1].nslice-1; - for (k = j/2; k != i; k = (j-i)/2 + i) { - if (from->e[k].refid > refid) { - j = k; - continue; - } - - if (from->e[k].refid < refid) { - i = k; - continue; - } - - if (from->e[k].start >= pos) { - j = k; - continue; - } - - if (from->e[k].start < pos) { - i = k; - continue; - } - } - // i==j or i==j-1. Check if j is better. - if (j >= 0 && from->e[j].start < pos && from->e[j].refid == refid) - i = j; - - /* The above found *a* bin overlapping, but not necessarily the first */ - while (i > 0 && from->e[i-1].end >= pos) - i--; - - /* We may be one bin before the optimum, so check */ - while (i+1 < from->nslice && - (from->e[i].refid < refid || - from->e[i].end < pos)) - i++; - - e = &from->e[i]; - - return e; -} - -// Return the index entry for last slice on a specific reference. -cram_index *cram_index_last(cram_fd *fd, int refid, cram_index *from) { - int slice; - - if (refid+1 < 0 || refid+1 >= fd->index_sz) - return NULL; - - if (!from) - from = &fd->index[refid+1]; - - // Ref with nothing aligned against it. - if (!from->e) - return NULL; - - slice = fd->index[refid+1].nslice - 1; - - // e is the last entry in the nested containment list, but it may - // contain further slices within it. - cram_index *e = &from->e[slice]; - while (e->e_next) - e = e->e_next; - - return e; -} - -/* - * Find the last container overlapping pos 'end', and the file offset of - * its end (equivalent to the start offset of the container following it). - */ -cram_index *cram_index_query_last(cram_fd *fd, int refid, hts_pos_t end) { - cram_index *e = NULL, *prev_e; - do { - prev_e = e; - e = cram_index_query(fd, refid, end, prev_e); - } while (e); - - if (!prev_e) - return NULL; - e = prev_e; - - // Note: offset of e and e->e_next may be the same if we're using a - // multi-ref container where a single container generates multiple - // index entries. - // - // We need to keep iterating until offset differs in order to find - // the genuine file offset for the end of container. - do { - prev_e = e; - e = e->e_next; - } while (e && e->offset == prev_e->offset); - - return prev_e; -} - -/* - * Skips to a container overlapping the start coordinate listed in - * cram_range. - * - * In theory we call cram_index_query multiple times, once per slice - * overlapping the range. However slices may be absent from the index - * which makes this problematic. Instead we find the left-most slice - * and then read from then on, skipping decoding of slices and/or - * whole containers when they don't overlap the specified cram_range. - * - * This function also updates the cram_fd range field. - * - * Returns 0 on success - * -1 on general failure - * -2 on no-data (empty chromosome) - */ -int cram_seek_to_refpos(cram_fd *fd, cram_range *r) { - int ret = 0; - cram_index *e; - - if (r->refid == HTS_IDX_NONE) { - ret = -2; goto err; - } - - // Ideally use an index, so see if we have one. - if ((e = cram_index_query(fd, r->refid, r->start, NULL))) { - if (0 != cram_seek(fd, e->offset, SEEK_SET)) { - if (0 != cram_seek(fd, e->offset - fd->first_container, SEEK_CUR)) { - ret = -1; goto err; - } - } - } else { - // Absent from index, but this most likely means it simply has no data. - ret = -2; goto err; - } - - pthread_mutex_lock(&fd->range_lock); - fd->range = *r; - if (r->refid == HTS_IDX_NOCOOR) { - fd->range.refid = -1; - fd->range.start = 0; - } else if (r->refid == HTS_IDX_START || r->refid == HTS_IDX_REST) { - fd->range.refid = -2; // special case in cram_next_slice - } - pthread_mutex_unlock(&fd->range_lock); - - if (fd->ctr) { - cram_free_container(fd->ctr); - if (fd->ctr_mt && fd->ctr_mt != fd->ctr) - cram_free_container(fd->ctr_mt); - fd->ctr = NULL; - fd->ctr_mt = NULL; - fd->ooc = 0; - fd->eof = 0; - } - - return 0; - - err: - // It's unlikely fd->range will be accessed after EOF or error, - // but this maintains identical behaviour to the previous code. - pthread_mutex_lock(&fd->range_lock); - fd->range = *r; - pthread_mutex_unlock(&fd->range_lock); - return ret; -} - - -/* - * A specialised form of cram_index_build (below) that deals with slices - * having multiple references in this (ref_id -2). In this scenario we - * decode the slice to look at the RI data series instead. - * - * Returns 0 on success - * -1 on read failure - * -2 on wrong sort order - * -4 on write failure - */ -static int cram_index_build_multiref(cram_fd *fd, - cram_container *c, - cram_slice *s, - BGZF *fp, - off_t cpos, - int32_t landmark, - int sz) { - int i, ref = -2; - int64_t ref_start = 0, ref_end; - char buf[1024]; - - if (fd->mode != 'w') { - if (0 != cram_decode_slice(fd, c, s, fd->header)) - return -1; - } - - ref_end = INT_MIN; - - int32_t last_ref = -9; - int32_t last_pos = -9; - for (i = 0; i < s->hdr->num_records; i++) { - if (s->crecs[i].ref_id == last_ref && s->crecs[i].apos < last_pos) { - hts_log_error("CRAM file is not sorted by chromosome / position"); - return -2; - } - last_ref = s->crecs[i].ref_id; - last_pos = s->crecs[i].apos; - - if (s->crecs[i].ref_id == ref) { - if (ref_end < s->crecs[i].aend) - ref_end = s->crecs[i].aend; - continue; - } - - if (ref != -2) { - snprintf(buf, sizeof(buf), - "%d\t%"PRId64"\t%"PRId64"\t%"PRId64"\t%d\t%d\n", - ref, ref_start, ref_end - ref_start + 1, - (int64_t)cpos, landmark, sz); - if (bgzf_write(fp, buf, strlen(buf)) < 0) - return -4; - } - - ref = s->crecs[i].ref_id; - ref_start = s->crecs[i].apos; - ref_end = s->crecs[i].aend; - } - - if (ref != -2) { - snprintf(buf, sizeof(buf), - "%d\t%"PRId64"\t%"PRId64"\t%"PRId64"\t%d\t%d\n", - ref, ref_start, ref_end - ref_start + 1, - (int64_t)cpos, landmark, sz); - if (bgzf_write(fp, buf, strlen(buf)) < 0) - return -4; - } - - return 0; -} - -/* - * Adds a single slice to the index. - */ -int cram_index_slice(cram_fd *fd, - cram_container *c, - cram_slice *s, - BGZF *fp, - off_t cpos, - off_t spos, // relative to cpos - off_t sz) { - int ret; - char buf[1024]; - - if (sz > INT_MAX) { - hts_log_error("CRAM slice is too big (%"PRId64" bytes)", - (int64_t) sz); - return -1; - } - - if (s->hdr->ref_seq_id == -2) { - ret = cram_index_build_multiref(fd, c, s, fp, cpos, spos, sz); - } else { - snprintf(buf, sizeof(buf), - "%d\t%"PRId64"\t%"PRId64"\t%"PRId64"\t%d\t%d\n", - s->hdr->ref_seq_id, s->hdr->ref_seq_start, - s->hdr->ref_seq_span, (int64_t)cpos, (int)spos, (int)sz); - ret = (bgzf_write(fp, buf, strlen(buf)) >= 0)? 0 : -4; - } - - return ret; -} - -/* - * Adds a single container to the index. - */ -static -int cram_index_container(cram_fd *fd, - cram_container *c, - BGZF *fp, - off_t cpos) { - int j; - off_t spos; - - // 2.0 format - for (j = 0; j < c->num_landmarks; j++) { - cram_slice *s; - off_t sz; - int ret; - - spos = htell(fd->fp); - if (spos - cpos - (off_t) c->offset != c->landmark[j]) { - hts_log_error("CRAM slice offset %"PRId64" does not match" - " landmark %d in container header (%"PRId32")", - (int64_t) (spos - cpos - (off_t) c->offset), - j, c->landmark[j]); - return -1; - } - - if (!(s = cram_read_slice(fd))) { - return -1; - } - - sz = htell(fd->fp) - spos; - ret = cram_index_slice(fd, c, s, fp, cpos, c->landmark[j], sz); - - cram_free_slice(s); - - if (ret < 0) { - return ret; - } - } - - return 0; -} - - -/* - * Builds an index file. - * - * fd is a newly opened cram file that we wish to index. - * fn_base is the filename of the associated CRAM file. - * fn_idx is the filename of the index file to be written; - * if NULL, we add ".crai" to fn_base to get the index filename. - * - * Returns 0 on success, - * negative on failure (-1 for read failure, -4 for write failure) - */ -int cram_index_build(cram_fd *fd, const char *fn_base, const char *fn_idx) { - cram_container *c; - off_t cpos, hpos; - BGZF *fp; - kstring_t fn_idx_str = {0}; - int64_t last_ref = -9, last_start = -9; - - // Useful for cram_index_build_multiref - cram_set_option(fd, CRAM_OPT_REQUIRED_FIELDS, SAM_RNAME | SAM_POS | SAM_CIGAR); - - if (! fn_idx) { - kputs(fn_base, &fn_idx_str); - kputs(".crai", &fn_idx_str); - fn_idx = fn_idx_str.s; - } - - if (!(fp = bgzf_open(fn_idx, "wg"))) { - perror(fn_idx); - free(fn_idx_str.s); - return -4; - } - - free(fn_idx_str.s); - - cpos = htell(fd->fp); - while ((c = cram_read_container(fd))) { - if (fd->err) { - perror("Cram container read"); - return -1; - } - - hpos = htell(fd->fp); - - if (!(c->comp_hdr_block = cram_read_block(fd))) - return -1; - assert(c->comp_hdr_block->content_type == COMPRESSION_HEADER); - - c->comp_hdr = cram_decode_compression_header(fd, c->comp_hdr_block); - if (!c->comp_hdr) - return -1; - - if (c->ref_seq_id == last_ref && c->ref_seq_start < last_start) { - hts_log_error("CRAM file is not sorted by chromosome / position"); - return -2; - } - last_ref = c->ref_seq_id; - last_start = c->ref_seq_start; - - if (cram_index_container(fd, c, fp, cpos) < 0) { - bgzf_close(fp); - return -1; - } - - off_t next_cpos = htell(fd->fp); - if (next_cpos != hpos + c->length) { - hts_log_error("Length %"PRId32" in container header at offset %lld does not match block lengths (%lld)", - c->length, (long long) cpos, (long long) next_cpos - hpos); - return -1; - } - cpos = next_cpos; - - cram_free_container(c); - } - if (fd->err) { - bgzf_close(fp); - return -1; - } - - return (bgzf_close(fp) >= 0)? 0 : -4; -} diff --git a/src/htslib-1.19.1/cram/cram_index.h b/src/htslib-1.19.1/cram/cram_index.h deleted file mode 100644 index 5fa1154..0000000 --- a/src/htslib-1.19.1/cram/cram_index.h +++ /dev/null @@ -1,115 +0,0 @@ -/* -Copyright (c) 2013, 2018 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef CRAM_INDEX_H -#define CRAM_INDEX_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Loads a CRAM .crai index into memory. - * Returns 0 for success - * -1 for failure - */ -int cram_index_load(cram_fd *fd, const char *fn, const char *fn_idx); - -void cram_index_free(cram_fd *fd); - -/* - * Searches the index for the first slice overlapping a reference ID - * and position. - * - * Returns the cram_index pointer on success - * NULL on failure - */ -cram_index *cram_index_query(cram_fd *fd, int refid, hts_pos_t pos, cram_index *frm); -cram_index *cram_index_last(cram_fd *fd, int refid, cram_index *from); -cram_index *cram_index_query_last(cram_fd *fd, int refid, hts_pos_t end); - -/* - * Skips to a container overlapping the start coordinate listed in - * cram_range. - * - * Returns 0 on success - * -1 on failure - */ -int cram_seek_to_refpos(cram_fd *fd, cram_range *r); - -void cram_index_free(cram_fd *fd); - -/* - * Skips to a container overlapping the start coordinate listed in - * cram_range. - * - * In theory we call cram_index_query multiple times, once per slice - * overlapping the range. However slices may be absent from the index - * which makes this problematic. Instead we find the left-most slice - * and then read from then on, skipping decoding of slices and/or - * whole containers when they don't overlap the specified cram_range. - * - * Returns 0 on success - * -1 on failure - */ -int cram_seek_to_refpos(cram_fd *fd, cram_range *r); - -/* - * Builds an index file. - * - * fd is a newly opened cram file that we wish to index. - * fn_base is the filename of the associated CRAM file. - * fn_idx is the filename of the index file to be written; - * if NULL, we add ".crai" to fn_base to get the index filename. - * - * Returns 0 on success, - * negative on failure (-1 for read failure, -4 for write failure) - */ -int cram_index_build(cram_fd *fd, const char *fn_base, const char *fn_idx); - -/* - * Adds a single slice to the index. - * - * Returns 0 on success, - * -1 on failure - */ -int cram_index_slice(cram_fd *fd, - cram_container *c, - cram_slice *s, - BGZF *fp, - off_t cpos, - off_t spos, // relative to cpos - off_t sz); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.19.1/cram/cram_io.c b/src/htslib-1.19.1/cram/cram_io.c deleted file mode 100644 index c3efb73..0000000 --- a/src/htslib-1.19.1/cram/cram_io.c +++ /dev/null @@ -1,6008 +0,0 @@ -/* -Copyright (c) 2012-2023 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* - * CRAM I/O primitives. - * - * - ITF8 encoding and decoding. - * - Block based I/O - * - Zlib inflating and deflating (memory) - * - CRAM basic data structure reading and writing - * - File opening / closing - * - Reference sequence handling - */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#ifdef HAVE_LIBBZ2 -#include -#endif -#ifdef HAVE_LIBLZMA -#ifdef HAVE_LZMA_H -#include -#else -#include "../os/lzma_stub.h" -#endif -#endif -#include -#include -#include -#include - -#ifdef HAVE_LIBDEFLATE -#include -#define crc32(a,b,c) libdeflate_crc32((a),(b),(c)) -#endif - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -#include "../fuzz_settings.h" -#endif - -#include "cram.h" -#include "os.h" -#include "../htslib/hts.h" -#include "open_trace_file.h" - -#if defined(HAVE_EXTERNAL_LIBHTSCODECS) -#include -#include -#include -#include -#include -#include // CRAM v4.0 variable-size integers -#else -#include "../htscodecs/htscodecs/rANS_static.h" -#include "../htscodecs/htscodecs/rANS_static4x16.h" -#include "../htscodecs/htscodecs/arith_dynamic.h" -#include "../htscodecs/htscodecs/tokenise_name3.h" -#include "../htscodecs/htscodecs/fqzcomp_qual.h" -#include "../htscodecs/htscodecs/varint.h" -#endif - -//#define REF_DEBUG - -#ifdef REF_DEBUG -#include -#define gettid() (int)syscall(SYS_gettid) - -#define RP(...) fprintf (stderr, __VA_ARGS__) -#else -#define RP(...) -#endif - -#include "../htslib/hfile.h" -#include "../htslib/bgzf.h" -#include "../htslib/faidx.h" -#include "../hts_internal.h" - -#ifndef PATH_MAX -#define PATH_MAX FILENAME_MAX -#endif - -#define TRIAL_SPAN 70 -#define NTRIALS 3 - -#define CRAM_DEFAULT_LEVEL 5 - -/* ---------------------------------------------------------------------- - * ITF8 encoding and decoding. - * - * Also see the itf8_get and itf8_put macros in cram_io.h - */ - -/* - * LEGACY: consider using itf8_decode_crc. - * - * Reads an integer in ITF-8 encoding from 'cp' and stores it in - * *val. - * - * Returns the number of bytes read on success - * -1 on failure - */ -int itf8_decode(cram_fd *fd, int32_t *val_p) { - static int nbytes[16] = { - 0,0,0,0, 0,0,0,0, // 0000xxxx - 0111xxxx - 1,1,1,1, // 1000xxxx - 1011xxxx - 2,2, // 1100xxxx - 1101xxxx - 3, // 1110xxxx - 4, // 1111xxxx - }; - - static int nbits[16] = { - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, // 0000xxxx - 0111xxxx - 0x3f, 0x3f, 0x3f, 0x3f, // 1000xxxx - 1011xxxx - 0x1f, 0x1f, // 1100xxxx - 1101xxxx - 0x0f, // 1110xxxx - 0x0f, // 1111xxxx - }; - - int32_t val = hgetc(fd->fp); - if (val == -1) - return -1; - - int i = nbytes[val>>4]; - val &= nbits[val>>4]; - - switch(i) { - case 0: - *val_p = val; - return 1; - - case 1: - val = (val<<8) | (unsigned char)hgetc(fd->fp); - *val_p = val; - return 2; - - case 2: - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - *val_p = val; - return 3; - - case 3: - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - *val_p = val; - return 4; - - case 4: // really 3.5 more, why make it different? - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<4) | (((unsigned char)hgetc(fd->fp)) & 0x0f); - *val_p = val; - } - - return 5; -} - -int itf8_decode_crc(cram_fd *fd, int32_t *val_p, uint32_t *crc) { - static int nbytes[16] = { - 0,0,0,0, 0,0,0,0, // 0000xxxx - 0111xxxx - 1,1,1,1, // 1000xxxx - 1011xxxx - 2,2, // 1100xxxx - 1101xxxx - 3, // 1110xxxx - 4, // 1111xxxx - }; - - static int nbits[16] = { - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, // 0000xxxx - 0111xxxx - 0x3f, 0x3f, 0x3f, 0x3f, // 1000xxxx - 1011xxxx - 0x1f, 0x1f, // 1100xxxx - 1101xxxx - 0x0f, // 1110xxxx - 0x0f, // 1111xxxx - }; - unsigned char c[5]; - - int32_t val = hgetc(fd->fp); - if (val == -1) - return -1; - - c[0]=val; - - int i = nbytes[val>>4]; - val &= nbits[val>>4]; - - if (i > 0) { - if (hread(fd->fp, &c[1], i) < i) - return -1; - } - - switch(i) { - case 0: - *val_p = val; - *crc = crc32(*crc, c, 1); - return 1; - - case 1: - val = (val<<8) | c[1]; - *val_p = val; - *crc = crc32(*crc, c, 2); - return 2; - - case 2: - val = (val<<8) | c[1]; - val = (val<<8) | c[2]; - *val_p = val; - *crc = crc32(*crc, c, 3); - return 3; - - case 3: - val = (val<<8) | c[1]; - val = (val<<8) | c[2]; - val = (val<<8) | c[3]; - *val_p = val; - *crc = crc32(*crc, c, 4); - return 4; - - case 4: // really 3.5 more, why make it different? - { - uint32_t uv = val; - uv = (uv<<8) | c[1]; - uv = (uv<<8) | c[2]; - uv = (uv<<8) | c[3]; - uv = (uv<<4) | (c[4] & 0x0f); - // Avoid implementation-defined behaviour on negative values - *val_p = uv < 0x80000000UL ? (int32_t) uv : -((int32_t) (0xffffffffUL - uv)) - 1; - *crc = crc32(*crc, c, 5); - } - } - - return 5; -} - -/* - * Stores a value to memory in ITF-8 format. - * - * Returns the number of bytes required to store the number. - * This is a maximum of 5 bytes. - */ -static inline int itf8_put(char *cp, int32_t val) { - unsigned char *up = (unsigned char *)cp; - if (!(val & ~0x00000007f)) { // 1 byte - *up = val; - return 1; - } else if (!(val & ~0x00003fff)) { // 2 byte - *up++ = (val >> 8 ) | 0x80; - *up = val & 0xff; - return 2; - } else if (!(val & ~0x01fffff)) { // 3 byte - *up++ = (val >> 16) | 0xc0; - *up++ = (val >> 8 ) & 0xff; - *up = val & 0xff; - return 3; - } else if (!(val & ~0x0fffffff)) { // 4 byte - *up++ = (val >> 24) | 0xe0; - *up++ = (val >> 16) & 0xff; - *up++ = (val >> 8 ) & 0xff; - *up = val & 0xff; - return 4; - } else { // 5 byte - *up++ = 0xf0 | ((val>>28) & 0xff); - *up++ = (val >> 20) & 0xff; - *up++ = (val >> 12) & 0xff; - *up++ = (val >> 4 ) & 0xff; - *up = val & 0x0f; - return 5; - } -} - - -/* 64-bit itf8 variant */ -static inline int ltf8_put(char *cp, int64_t val) { - unsigned char *up = (unsigned char *)cp; - if (!(val & ~((1LL<<7)-1))) { - *up = val; - return 1; - } else if (!(val & ~((1LL<<(6+8))-1))) { - *up++ = (val >> 8 ) | 0x80; - *up = val & 0xff; - return 2; - } else if (!(val & ~((1LL<<(5+2*8))-1))) { - *up++ = (val >> 16) | 0xc0; - *up++ = (val >> 8 ) & 0xff; - *up = val & 0xff; - return 3; - } else if (!(val & ~((1LL<<(4+3*8))-1))) { - *up++ = (val >> 24) | 0xe0; - *up++ = (val >> 16) & 0xff; - *up++ = (val >> 8 ) & 0xff; - *up = val & 0xff; - return 4; - } else if (!(val & ~((1LL<<(3+4*8))-1))) { - *up++ = (val >> 32) | 0xf0; - *up++ = (val >> 24) & 0xff; - *up++ = (val >> 16) & 0xff; - *up++ = (val >> 8 ) & 0xff; - *up = val & 0xff; - return 5; - } else if (!(val & ~((1LL<<(2+5*8))-1))) { - *up++ = (val >> 40) | 0xf8; - *up++ = (val >> 32) & 0xff; - *up++ = (val >> 24) & 0xff; - *up++ = (val >> 16) & 0xff; - *up++ = (val >> 8 ) & 0xff; - *up = val & 0xff; - return 6; - } else if (!(val & ~((1LL<<(1+6*8))-1))) { - *up++ = (val >> 48) | 0xfc; - *up++ = (val >> 40) & 0xff; - *up++ = (val >> 32) & 0xff; - *up++ = (val >> 24) & 0xff; - *up++ = (val >> 16) & 0xff; - *up++ = (val >> 8 ) & 0xff; - *up = val & 0xff; - return 7; - } else if (!(val & ~((1LL<<(7*8))-1))) { - *up++ = (val >> 56) | 0xfe; - *up++ = (val >> 48) & 0xff; - *up++ = (val >> 40) & 0xff; - *up++ = (val >> 32) & 0xff; - *up++ = (val >> 24) & 0xff; - *up++ = (val >> 16) & 0xff; - *up++ = (val >> 8 ) & 0xff; - *up = val & 0xff; - return 8; - } else { - *up++ = 0xff; - *up++ = (val >> 56) & 0xff; - *up++ = (val >> 48) & 0xff; - *up++ = (val >> 40) & 0xff; - *up++ = (val >> 32) & 0xff; - *up++ = (val >> 24) & 0xff; - *up++ = (val >> 16) & 0xff; - *up++ = (val >> 8 ) & 0xff; - *up = val & 0xff; - return 9; - } -} - -/* - * Encodes and writes a single integer in ITF-8 format. - * Returns 0 on success - * -1 on failure - */ -int itf8_encode(cram_fd *fd, int32_t val) { - char buf[5]; - int len = itf8_put(buf, val); - return hwrite(fd->fp, buf, len) == len ? 0 : -1; -} - -const int itf8_bytes[16] = { - 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 3, 3, 4, 5 -}; - -const int ltf8_bytes[256] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - - 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 9 -}; - -/* - * LEGACY: consider using ltf8_decode_crc. - */ -int ltf8_decode(cram_fd *fd, int64_t *val_p) { - int c = hgetc(fd->fp); - int64_t val = (unsigned char)c; - if (c == -1) - return -1; - - if (val < 0x80) { - *val_p = val; - return 1; - - } else if (val < 0xc0) { - val = (val<<8) | (unsigned char)hgetc(fd->fp); - *val_p = val & (((1LL<<(6+8)))-1); - return 2; - - } else if (val < 0xe0) { - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - *val_p = val & ((1LL<<(5+2*8))-1); - return 3; - - } else if (val < 0xf0) { - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - *val_p = val & ((1LL<<(4+3*8))-1); - return 4; - - } else if (val < 0xf8) { - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - *val_p = val & ((1LL<<(3+4*8))-1); - return 5; - - } else if (val < 0xfc) { - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - *val_p = val & ((1LL<<(2+5*8))-1); - return 6; - - } else if (val < 0xfe) { - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - *val_p = val & ((1LL<<(1+6*8))-1); - return 7; - - } else if (val < 0xff) { - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - *val_p = val & ((1LL<<(7*8))-1); - return 8; - - } else { - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - val = (val<<8) | (unsigned char)hgetc(fd->fp); - *val_p = val; - } - - return 9; -} - -int ltf8_decode_crc(cram_fd *fd, int64_t *val_p, uint32_t *crc) { - unsigned char c[9]; - int64_t val = hgetc(fd->fp); - if (val < 0) - return -1; - - c[0] = val; - - if (val < 0x80) { - *val_p = val; - *crc = crc32(*crc, c, 1); - return 1; - - } else if (val < 0xc0) { - int v = hgetc(fd->fp); - if (v < 0) - return -1; - val = (val<<8) | (c[1]=v); - *val_p = val & (((1LL<<(6+8)))-1); - *crc = crc32(*crc, c, 2); - return 2; - - } else if (val < 0xe0) { - if (hread(fd->fp, &c[1], 2) < 2) - return -1; - val = (val<<8) | c[1]; - val = (val<<8) | c[2]; - *val_p = val & ((1LL<<(5+2*8))-1); - *crc = crc32(*crc, c, 3); - return 3; - - } else if (val < 0xf0) { - if (hread(fd->fp, &c[1], 3) < 3) - return -1; - val = (val<<8) | c[1]; - val = (val<<8) | c[2]; - val = (val<<8) | c[3]; - *val_p = val & ((1LL<<(4+3*8))-1); - *crc = crc32(*crc, c, 4); - return 4; - - } else if (val < 0xf8) { - if (hread(fd->fp, &c[1], 4) < 4) - return -1; - val = (val<<8) | c[1]; - val = (val<<8) | c[2]; - val = (val<<8) | c[3]; - val = (val<<8) | c[4]; - *val_p = val & ((1LL<<(3+4*8))-1); - *crc = crc32(*crc, c, 5); - return 5; - - } else if (val < 0xfc) { - if (hread(fd->fp, &c[1], 5) < 5) - return -1; - val = (val<<8) | c[1]; - val = (val<<8) | c[2]; - val = (val<<8) | c[3]; - val = (val<<8) | c[4]; - val = (val<<8) | c[5]; - *val_p = val & ((1LL<<(2+5*8))-1); - *crc = crc32(*crc, c, 6); - return 6; - - } else if (val < 0xfe) { - if (hread(fd->fp, &c[1], 6) < 6) - return -1; - val = (val<<8) | c[1]; - val = (val<<8) | c[2]; - val = (val<<8) | c[3]; - val = (val<<8) | c[4]; - val = (val<<8) | c[5]; - val = (val<<8) | c[6]; - *val_p = val & ((1LL<<(1+6*8))-1); - *crc = crc32(*crc, c, 7); - return 7; - - } else if (val < 0xff) { - uint64_t uval = val; - if (hread(fd->fp, &c[1], 7) < 7) - return -1; - uval = (uval<<8) | c[1]; - uval = (uval<<8) | c[2]; - uval = (uval<<8) | c[3]; - uval = (uval<<8) | c[4]; - uval = (uval<<8) | c[5]; - uval = (uval<<8) | c[6]; - uval = (uval<<8) | c[7]; - *val_p = uval & ((1ULL<<(7*8))-1); - *crc = crc32(*crc, c, 8); - return 8; - - } else { - uint64_t uval; - if (hread(fd->fp, &c[1], 8) < 8) - return -1; - uval = c[1]; - uval = (uval<<8) | c[2]; - uval = (uval<<8) | c[3]; - uval = (uval<<8) | c[4]; - uval = (uval<<8) | c[5]; - uval = (uval<<8) | c[6]; - uval = (uval<<8) | c[7]; - uval = (uval<<8) | c[8]; - *crc = crc32(*crc, c, 9); - // Avoid implementation-defined behaviour on negative values - *val_p = c[1] < 0x80 ? (int64_t) uval : -((int64_t) (0xffffffffffffffffULL - uval)) - 1; - } - - return 9; -} - -/* - * Pushes a value in ITF8 format onto the end of a block. - * This shouldn't be used for high-volume data as it is not the fastest - * method. - * - * Returns the number of bytes written - */ -int itf8_put_blk(cram_block *blk, int32_t val) { - char buf[5]; - int sz; - - sz = itf8_put(buf, val); - BLOCK_APPEND(blk, buf, sz); - return sz; - - block_err: - return -1; -} - -int ltf8_put_blk(cram_block *blk, int64_t val) { - char buf[9]; - int sz; - - sz = ltf8_put(buf, val); - BLOCK_APPEND(blk, buf, sz); - return sz; - - block_err: - return -1; -} - -static int64_t safe_itf8_get(char **cp, const char *endp, int *err) { - const unsigned char *up = (unsigned char *)*cp; - - if (endp && endp - *cp < 5 && - (*cp >= endp || endp - *cp < itf8_bytes[up[0]>>4])) { - if (err) *err = 1; - return 0; - } - - if (up[0] < 0x80) { - (*cp)++; - return up[0]; - } else if (up[0] < 0xc0) { - (*cp)+=2; - return ((up[0] <<8) | up[1]) & 0x3fff; - } else if (up[0] < 0xe0) { - (*cp)+=3; - return ((up[0]<<16) | (up[1]<< 8) | up[2]) & 0x1fffff; - } else if (up[0] < 0xf0) { - (*cp)+=4; - uint32_t uv = (((uint32_t)up[0]<<24) | (up[1]<<16) | (up[2]<<8) | up[3]) & 0x0fffffff; - return (int32_t)uv; - } else { - (*cp)+=5; - uint32_t uv = (((uint32_t)up[0] & 0x0f)<<28) | (up[1]<<20) | (up[2]<<12) | (up[3]<<4) | (up[4] & 0x0f); - return (int32_t)uv; - } -} - -static int64_t safe_ltf8_get(char **cp, const char *endp, int *err) { - unsigned char *up = (unsigned char *)*cp; - - if (endp && endp - *cp < 9 && - (*cp >= endp || endp - *cp < ltf8_bytes[up[0]])) { - if (err) *err = 1; - return 0; - } - - if (up[0] < 0x80) { - (*cp)++; - return up[0]; - } else if (up[0] < 0xc0) { - (*cp)+=2; - return (((uint64_t)up[0]<< 8) | - (uint64_t)up[1]) & (((1LL<<(6+8)))-1); - } else if (up[0] < 0xe0) { - (*cp)+=3; - return (((uint64_t)up[0]<<16) | - ((uint64_t)up[1]<< 8) | - (uint64_t)up[2]) & ((1LL<<(5+2*8))-1); - } else if (up[0] < 0xf0) { - (*cp)+=4; - return (((uint64_t)up[0]<<24) | - ((uint64_t)up[1]<<16) | - ((uint64_t)up[2]<< 8) | - (uint64_t)up[3]) & ((1LL<<(4+3*8))-1); - } else if (up[0] < 0xf8) { - (*cp)+=5; - return (((uint64_t)up[0]<<32) | - ((uint64_t)up[1]<<24) | - ((uint64_t)up[2]<<16) | - ((uint64_t)up[3]<< 8) | - (uint64_t)up[4]) & ((1LL<<(3+4*8))-1); - } else if (up[0] < 0xfc) { - (*cp)+=6; - return (((uint64_t)up[0]<<40) | - ((uint64_t)up[1]<<32) | - ((uint64_t)up[2]<<24) | - ((uint64_t)up[3]<<16) | - ((uint64_t)up[4]<< 8) | - (uint64_t)up[5]) & ((1LL<<(2+5*8))-1); - } else if (up[0] < 0xfe) { - (*cp)+=7; - return (((uint64_t)up[0]<<48) | - ((uint64_t)up[1]<<40) | - ((uint64_t)up[2]<<32) | - ((uint64_t)up[3]<<24) | - ((uint64_t)up[4]<<16) | - ((uint64_t)up[5]<< 8) | - (uint64_t)up[6]) & ((1LL<<(1+6*8))-1); - } else if (up[0] < 0xff) { - (*cp)+=8; - return (((uint64_t)up[1]<<48) | - ((uint64_t)up[2]<<40) | - ((uint64_t)up[3]<<32) | - ((uint64_t)up[4]<<24) | - ((uint64_t)up[5]<<16) | - ((uint64_t)up[6]<< 8) | - (uint64_t)up[7]) & ((1LL<<(7*8))-1); - } else { - (*cp)+=9; - return (((uint64_t)up[1]<<56) | - ((uint64_t)up[2]<<48) | - ((uint64_t)up[3]<<40) | - ((uint64_t)up[4]<<32) | - ((uint64_t)up[5]<<24) | - ((uint64_t)up[6]<<16) | - ((uint64_t)up[7]<< 8) | - (uint64_t)up[8]); - } -} - -// Wrapper for now -static int safe_itf8_put(char *cp, char *cp_end, int32_t val) { - return itf8_put(cp, val); -} - -static int safe_ltf8_put(char *cp, char *cp_end, int64_t val) { - return ltf8_put(cp, val); -} - -static int itf8_size(int64_t v) { - return ((!((v)&~0x7f))?1:(!((v)&~0x3fff))?2:(!((v)&~0x1fffff))?3:(!((v)&~0xfffffff))?4:5); -} - -//----------------------------------------------------------------------------- - -// CRAM v4.0 onwards uses a different variable sized integer encoding -// that is size agnostic. - -// Local interface to varint.h inline version, so we can use in func ptr. -// Note a lot of these use the unsigned interface but take signed int64_t. -// This is because the old CRAM ITF8 inteface had signed -1 as unsigned -// 0xffffffff. -static int uint7_size(int64_t v) { - return var_size_u64(v); -} - -static int64_t uint7_get_32(char **cp, const char *endp, int *err) { - uint32_t val; - int nb = var_get_u32((uint8_t *)(*cp), (const uint8_t *)endp, &val); - (*cp) += nb; - if (!nb && err) *err = 1; - return val; -} - -static int64_t sint7_get_32(char **cp, const char *endp, int *err) { - int32_t val; - int nb = var_get_s32((uint8_t *)(*cp), (const uint8_t *)endp, &val); - (*cp) += nb; - if (!nb && err) *err = 1; - return val; -} - -static int64_t uint7_get_64(char **cp, const char *endp, int *err) { - uint64_t val; - int nb = var_get_u64((uint8_t *)(*cp), (const uint8_t *)endp, &val); - (*cp) += nb; - if (!nb && err) *err = 1; - return val; -} - -static int64_t sint7_get_64(char **cp, const char *endp, int *err) { - int64_t val; - int nb = var_get_s64((uint8_t *)(*cp), (const uint8_t *)endp, &val); - (*cp) += nb; - if (!nb && err) *err = 1; - return val; -} - -static int uint7_put_32(char *cp, char *endp, int32_t val) { - return var_put_u32((uint8_t *)cp, (uint8_t *)endp, val); -} - -static int sint7_put_32(char *cp, char *endp, int32_t val) { - return var_put_s32((uint8_t *)cp, (uint8_t *)endp, val); -} - -static int uint7_put_64(char *cp, char *endp, int64_t val) { - return var_put_u64((uint8_t *)cp, (uint8_t *)endp, val); -} - -static int sint7_put_64(char *cp, char *endp, int64_t val) { - return var_put_s64((uint8_t *)cp, (uint8_t *)endp, val); -} - -// Put direct to to cram_block -static int uint7_put_blk_32(cram_block *blk, int32_t v) { - uint8_t buf[10]; - int sz = var_put_u32(buf, buf+10, v); - BLOCK_APPEND(blk, buf, sz); - return sz; - - block_err: - return -1; -} - -static int sint7_put_blk_32(cram_block *blk, int32_t v) { - uint8_t buf[10]; - int sz = var_put_s32(buf, buf+10, v); - BLOCK_APPEND(blk, buf, sz); - return sz; - - block_err: - return -1; -} - -static int uint7_put_blk_64(cram_block *blk, int64_t v) { - uint8_t buf[10]; - int sz = var_put_u64(buf, buf+10, v); - BLOCK_APPEND(blk, buf, sz); - return sz; - - block_err: - return -1; -} - -static int sint7_put_blk_64(cram_block *blk, int64_t v) { - uint8_t buf[10]; - int sz = var_put_s64(buf, buf+10, v); - BLOCK_APPEND(blk, buf, sz); - return sz; - - block_err: - return -1; -} - -// Decode 32-bits with CRC update from cram_fd -static int uint7_decode_crc32(cram_fd *fd, int32_t *val_p, uint32_t *crc) { - uint8_t b[5], i = 0; - int c; - uint32_t v = 0; - -#ifdef VARINT2 - b[0] = hgetc(fd->fp); - if (b[0] < 177) { - } else if (b[0] < 241) { - b[1] = hgetc(fd->fp); - } else if (b[0] < 249) { - b[1] = hgetc(fd->fp); - b[2] = hgetc(fd->fp); - } else { - int n = b[0]+2, z = 1; - while (n-- >= 249) - b[z++] = hgetc(fd->fp); - } - i = var_get_u32(b, NULL, &v); -#else -// // Little endian -// int s = 0; -// do { -// b[i++] = c = hgetc(fd->fp); -// if (c < 0) -// return -1; -// v |= (c & 0x7f) << s; -// s += 7; -// } while (i < 5 && (c & 0x80)); - - // Big endian, see also htscodecs/varint.h - do { - b[i++] = c = hgetc(fd->fp); - if (c < 0) - return -1; - v = (v<<7) | (c & 0x7f); - } while (i < 5 && (c & 0x80)); -#endif - *crc = crc32(*crc, b, i); - - *val_p = v; - return i; -} - -// Decode 32-bits with CRC update from cram_fd -static int sint7_decode_crc32(cram_fd *fd, int32_t *val_p, uint32_t *crc) { - uint8_t b[5], i = 0; - int c; - uint32_t v = 0; - -#ifdef VARINT2 - b[0] = hgetc(fd->fp); - if (b[0] < 177) { - } else if (b[0] < 241) { - b[1] = hgetc(fd->fp); - } else if (b[0] < 249) { - b[1] = hgetc(fd->fp); - b[2] = hgetc(fd->fp); - } else { - int n = b[0]+2, z = 1; - while (n-- >= 249) - b[z++] = hgetc(fd->fp); - } - i = var_get_u32(b, NULL, &v); -#else -// // Little endian -// int s = 0; -// do { -// b[i++] = c = hgetc(fd->fp); -// if (c < 0) -// return -1; -// v |= (c & 0x7f) << s; -// s += 7; -// } while (i < 5 && (c & 0x80)); - - // Big endian, see also htscodecs/varint.h - do { - b[i++] = c = hgetc(fd->fp); - if (c < 0) - return -1; - v = (v<<7) | (c & 0x7f); - } while (i < 5 && (c & 0x80)); -#endif - *crc = crc32(*crc, b, i); - - *val_p = (v>>1) ^ -(v&1); - return i; -} - - -// Decode 64-bits with CRC update from cram_fd -static int uint7_decode_crc64(cram_fd *fd, int64_t *val_p, uint32_t *crc) { - uint8_t b[10], i = 0; - int c; - uint64_t v = 0; - -#ifdef VARINT2 - b[0] = hgetc(fd->fp); - if (b[0] < 177) { - } else if (b[0] < 241) { - b[1] = hgetc(fd->fp); - } else if (b[0] < 249) { - b[1] = hgetc(fd->fp); - b[2] = hgetc(fd->fp); - } else { - int n = b[0]+2, z = 1; - while (n-- >= 249) - b[z++] = hgetc(fd->fp); - } - i = var_get_u64(b, NULL, &v); -#else -// // Little endian -// int s = 0; -// do { -// b[i++] = c = hgetc(fd->fp); -// if (c < 0) -// return -1; -// v |= (c & 0x7f) << s; -// s += 7; -// } while (i < 10 && (c & 0x80)); - - // Big endian, see also htscodecs/varint.h - do { - b[i++] = c = hgetc(fd->fp); - if (c < 0) - return -1; - v = (v<<7) | (c & 0x7f); - } while (i < 5 && (c & 0x80)); -#endif - *crc = crc32(*crc, b, i); - - *val_p = v; - return i; -} - -//----------------------------------------------------------------------------- - -/* - * Decodes a 32-bit little endian value from fd and stores in val. - * - * Returns the number of bytes read on success - * -1 on failure - */ -static int int32_decode(cram_fd *fd, int32_t *val) { - int32_t i; - if (4 != hread(fd->fp, &i, 4)) - return -1; - - *val = le_int4(i); - return 4; -} - -/* - * Encodes a 32-bit little endian value 'val' and writes to fd. - * - * Returns the number of bytes written on success - * -1 on failure - */ -static int int32_encode(cram_fd *fd, int32_t val) { - uint32_t v = le_int4(val); - if (4 != hwrite(fd->fp, &v, 4)) - return -1; - - return 4; -} - -/* As int32_decoded/encode, but from/to blocks instead of cram_fd */ -int int32_get_blk(cram_block *b, int32_t *val) { - if (b->uncomp_size - BLOCK_SIZE(b) < 4) - return -1; - - uint32_t v = - ((uint32_t) b->data[b->byte ]) | - (((uint32_t) b->data[b->byte+1]) << 8) | - (((uint32_t) b->data[b->byte+2]) << 16) | - (((uint32_t) b->data[b->byte+3]) << 24); - // Avoid implementation-defined behaviour on negative values - *val = v < 0x80000000U ? (int32_t) v : -((int32_t) (0xffffffffU - v)) - 1; - BLOCK_SIZE(b) += 4; - return 4; -} - -/* As int32_decoded/encode, but from/to blocks instead of cram_fd */ -int int32_put_blk(cram_block *b, int32_t val) { - unsigned char cp[4]; - uint32_t v = val; - cp[0] = ( v & 0xff); - cp[1] = ((v>>8) & 0xff); - cp[2] = ((v>>16) & 0xff); - cp[3] = ((v>>24) & 0xff); - - BLOCK_APPEND(b, cp, 4); - return 0; - - block_err: - return -1; -} - -#ifdef HAVE_LIBDEFLATE -/* ---------------------------------------------------------------------- - * libdeflate compression code, with interface to match - * zlib_mem_{in,de}flate for simplicity elsewhere. - */ - -// Named the same as the version that uses zlib as we always use libdeflate for -// decompression when available. -char *zlib_mem_inflate(char *cdata, size_t csize, size_t *size) { - struct libdeflate_decompressor *z = libdeflate_alloc_decompressor(); - if (!z) { - hts_log_error("Call to libdeflate_alloc_decompressor failed"); - return NULL; - } - - uint8_t *data = NULL, *new_data; - if (!*size) - *size = csize*2; - for(;;) { - new_data = realloc(data, *size); - if (!new_data) { - hts_log_error("Memory allocation failure"); - goto fail; - } - data = new_data; - - int ret = libdeflate_gzip_decompress(z, cdata, csize, data, *size, size); - - // Auto grow output buffer size if needed and try again. - // Fortunately for all bar one call of this we know the size already. - if (ret == LIBDEFLATE_INSUFFICIENT_SPACE) { - (*size) *= 1.5; - continue; - } - - if (ret != LIBDEFLATE_SUCCESS) { - hts_log_error("Inflate operation failed: %d", ret); - goto fail; - } else { - break; - } - } - - libdeflate_free_decompressor(z); - return (char *)data; - - fail: - libdeflate_free_decompressor(z); - free(data); - return NULL; -} - -// Named differently as we use both zlib/libdeflate for compression. -static char *libdeflate_deflate(char *data, size_t size, size_t *cdata_size, - int level, int strat) { - level = level > 0 ? level : 6; // libdeflate doesn't honour -1 as default - level *= 1.23; // NB levels go up to 12 here; 5 onwards is +1 - level += level>=8; // 5,6,7->6,7,8 8->10 9->12 - if (level > 12) level = 12; - - if (strat == Z_RLE) // not supported by libdeflate - level = 1; - - struct libdeflate_compressor *z = libdeflate_alloc_compressor(level); - if (!z) { - hts_log_error("Call to libdeflate_alloc_compressor failed"); - return NULL; - } - - unsigned char *cdata = NULL; /* Compressed output */ - size_t cdata_alloc; - cdata = malloc(cdata_alloc = size*1.05+100); - if (!cdata) { - hts_log_error("Memory allocation failure"); - libdeflate_free_compressor(z); - return NULL; - } - - *cdata_size = libdeflate_gzip_compress(z, data, size, cdata, cdata_alloc); - libdeflate_free_compressor(z); - - if (*cdata_size == 0) { - hts_log_error("Call to libdeflate_gzip_compress failed"); - free(cdata); - return NULL; - } - - return (char *)cdata; -} - -#else - -/* ---------------------------------------------------------------------- - * zlib compression code - from Gap5's tg_iface_g.c - * They're static here as they're only used within the cram_compress_block - * and cram_uncompress_block functions, which are the external interface. - */ -char *zlib_mem_inflate(char *cdata, size_t csize, size_t *size) { - z_stream s; - unsigned char *data = NULL; /* Uncompressed output */ - int data_alloc = 0; - int err; - - /* Starting point at uncompressed size, and scale after that */ - data = malloc(data_alloc = csize*1.2+100); - if (!data) - return NULL; - - /* Initialise zlib stream */ - s.zalloc = Z_NULL; /* use default allocation functions */ - s.zfree = Z_NULL; - s.opaque = Z_NULL; - s.next_in = (unsigned char *)cdata; - s.avail_in = csize; - s.total_in = 0; - s.next_out = data; - s.avail_out = data_alloc; - s.total_out = 0; - - //err = inflateInit(&s); - err = inflateInit2(&s, 15 + 32); - if (err != Z_OK) { - hts_log_error("Call to zlib inflateInit failed: %s", s.msg); - free(data); - return NULL; - } - - /* Decode to 'data' array */ - for (;s.avail_in;) { - unsigned char *data_tmp; - int alloc_inc; - - s.next_out = &data[s.total_out]; - err = inflate(&s, Z_NO_FLUSH); - if (err == Z_STREAM_END) - break; - - if (err != Z_OK) { - hts_log_error("Call to zlib inflate failed: %s", s.msg); - free(data); - inflateEnd(&s); - return NULL; - } - - /* More to come, so realloc based on growth so far */ - alloc_inc = (double)s.avail_in/s.total_in * s.total_out + 100; - data = realloc((data_tmp = data), data_alloc += alloc_inc); - if (!data) { - free(data_tmp); - inflateEnd(&s); - return NULL; - } - s.avail_out += alloc_inc; - } - inflateEnd(&s); - - *size = s.total_out; - return (char *)data; -} -#endif - -#if !defined(HAVE_LIBDEFLATE) || LIBDEFLATE_VERSION_MAJOR < 1 || (LIBDEFLATE_VERSION_MAJOR == 1 && LIBDEFLATE_VERSION_MINOR <= 8) -static char *zlib_mem_deflate(char *data, size_t size, size_t *cdata_size, - int level, int strat) { - z_stream s; - unsigned char *cdata = NULL; /* Compressed output */ - int cdata_alloc = 0; - int cdata_pos = 0; - int err; - - cdata = malloc(cdata_alloc = size*1.05+100); - if (!cdata) - return NULL; - cdata_pos = 0; - - /* Initialise zlib stream */ - s.zalloc = Z_NULL; /* use default allocation functions */ - s.zfree = Z_NULL; - s.opaque = Z_NULL; - s.next_in = (unsigned char *)data; - s.avail_in = size; - s.total_in = 0; - s.next_out = cdata; - s.avail_out = cdata_alloc; - s.total_out = 0; - s.data_type = Z_BINARY; - - err = deflateInit2(&s, level, Z_DEFLATED, 15|16, 9, strat); - if (err != Z_OK) { - hts_log_error("Call to zlib deflateInit2 failed: %s", s.msg); - return NULL; - } - - /* Encode to 'cdata' array */ - for (;s.avail_in;) { - s.next_out = &cdata[cdata_pos]; - s.avail_out = cdata_alloc - cdata_pos; - if (cdata_alloc - cdata_pos <= 0) { - hts_log_error("Deflate produced larger output than expected"); - return NULL; - } - err = deflate(&s, Z_NO_FLUSH); - cdata_pos = cdata_alloc - s.avail_out; - if (err != Z_OK) { - hts_log_error("Call to zlib deflate failed: %s", s.msg); - break; - } - } - if (deflate(&s, Z_FINISH) != Z_STREAM_END) { - hts_log_error("Call to zlib deflate failed: %s", s.msg); - } - *cdata_size = s.total_out; - - if (deflateEnd(&s) != Z_OK) { - hts_log_error("Call to zlib deflate failed: %s", s.msg); - } - return (char *)cdata; -} -#endif - -#ifdef HAVE_LIBLZMA -/* ------------------------------------------------------------------------ */ -/* - * Data compression routines using liblzma (xz) - * - * On a test set this shrunk the main db from 136157104 bytes to 114796168, but - * caused tg_index to grow from 2m43.707s to 15m3.961s. Exporting as bfastq - * went from 18.3s to 36.3s. So decompression suffers too, but not as bad - * as compression times. - * - * For now we disable this functionality. If it's to be reenabled make sure you - * improve the mem_inflate implementation as it's just a test hack at the - * moment. - */ - -static char *lzma_mem_deflate(char *data, size_t size, size_t *cdata_size, - int level) { - char *out; - size_t out_size = lzma_stream_buffer_bound(size); - *cdata_size = 0; - - out = malloc(out_size); - - /* Single call compression */ - if (LZMA_OK != lzma_easy_buffer_encode(level, LZMA_CHECK_CRC32, NULL, - (uint8_t *)data, size, - (uint8_t *)out, cdata_size, - out_size)) - return NULL; - - return out; -} - -static char *lzma_mem_inflate(char *cdata, size_t csize, size_t *size) { - lzma_stream strm = LZMA_STREAM_INIT; - size_t out_size = 0, out_pos = 0; - char *out = NULL, *new_out; - int r; - - /* Initiate the decoder */ - if (LZMA_OK != lzma_stream_decoder(&strm, lzma_easy_decoder_memusage(9), 0)) - return NULL; - - /* Decode loop */ - strm.avail_in = csize; - strm.next_in = (uint8_t *)cdata; - - for (;strm.avail_in;) { - if (strm.avail_in > out_size - out_pos) { - out_size += strm.avail_in * 4 + 32768; - new_out = realloc(out, out_size); - if (!new_out) - goto fail; - out = new_out; - } - strm.avail_out = out_size - out_pos; - strm.next_out = (uint8_t *)&out[out_pos]; - - r = lzma_code(&strm, LZMA_RUN); - if (LZMA_OK != r && LZMA_STREAM_END != r) { - hts_log_error("LZMA decode failure (error %d)", r); - goto fail; - } - - out_pos = strm.total_out; - - if (r == LZMA_STREAM_END) - break; - } - - /* finish up any unflushed data; necessary? */ - r = lzma_code(&strm, LZMA_FINISH); - if (r != LZMA_OK && r != LZMA_STREAM_END) { - hts_log_error("Call to lzma_code failed with error %d", r); - goto fail; - } - - new_out = realloc(out, strm.total_out > 0 ? strm.total_out : 1); - if (new_out) - out = new_out; - *size = strm.total_out; - - lzma_end(&strm); - - return out; - - fail: - lzma_end(&strm); - free(out); - return NULL; -} -#endif - -/* ---------------------------------------------------------------------- - * CRAM blocks - the dynamically growable data block. We have code to - * create, update, (un)compress and read/write. - * - * These are derived from the deflate_interlaced.c blocks, but with the - * CRAM extension of content types and IDs. - */ - -/* - * Allocates a new cram_block structure with a specified content_type and - * id. - * - * Returns block pointer on success - * NULL on failure - */ -cram_block *cram_new_block(enum cram_content_type content_type, - int content_id) { - cram_block *b = malloc(sizeof(*b)); - if (!b) - return NULL; - b->method = b->orig_method = RAW; - b->content_type = content_type; - b->content_id = content_id; - b->comp_size = 0; - b->uncomp_size = 0; - b->data = NULL; - b->alloc = 0; - b->byte = 0; - b->bit = 7; // MSB - b->crc32 = 0; - b->idx = 0; - b->m = NULL; - - return b; -} - -/* - * Reads a block from a cram file. - * Returns cram_block pointer on success. - * NULL on failure - */ -cram_block *cram_read_block(cram_fd *fd) { - cram_block *b = malloc(sizeof(*b)); - unsigned char c; - uint32_t crc = 0; - if (!b) - return NULL; - - //fprintf(stderr, "Block at %d\n", (int)ftell(fd->fp)); - - if (-1 == (b->method = hgetc(fd->fp))) { free(b); return NULL; } - c = b->method; crc = crc32(crc, &c, 1); - if (-1 == (b->content_type= hgetc(fd->fp))) { free(b); return NULL; } - c = b->content_type; crc = crc32(crc, &c, 1); - if (-1 == fd->vv.varint_decode32_crc(fd, &b->content_id, &crc)) { free(b); return NULL; } - if (-1 == fd->vv.varint_decode32_crc(fd, &b->comp_size, &crc)) { free(b); return NULL; } - if (-1 == fd->vv.varint_decode32_crc(fd, &b->uncomp_size, &crc)) { free(b); return NULL; } - - //fprintf(stderr, " method %d, ctype %d, cid %d, csize %d, ucsize %d\n", - // b->method, b->content_type, b->content_id, b->comp_size, b->uncomp_size); - - if (b->method == RAW) { - if (b->uncomp_size < 0 || b->comp_size != b->uncomp_size) { - free(b); - return NULL; - } - b->alloc = b->uncomp_size; - if (!(b->data = malloc(b->uncomp_size))){ free(b); return NULL; } - if (b->uncomp_size != hread(fd->fp, b->data, b->uncomp_size)) { - free(b->data); - free(b); - return NULL; - } - } else { - if (b->comp_size < 0 || b->uncomp_size < 0) { - free(b); - return NULL; - } - b->alloc = b->comp_size; - if (!(b->data = malloc(b->comp_size))) { free(b); return NULL; } - if (b->comp_size != hread(fd->fp, b->data, b->comp_size)) { - free(b->data); - free(b); - return NULL; - } - } - - if (CRAM_MAJOR_VERS(fd->version) >= 3) { - if (-1 == int32_decode(fd, (int32_t *)&b->crc32)) { - free(b->data); - free(b); - return NULL; - } - - b->crc32_checked = fd->ignore_md5; - b->crc_part = crc; - } else { - b->crc32_checked = 1; // CRC not present - } - - b->orig_method = b->method; - b->idx = 0; - b->byte = 0; - b->bit = 7; // MSB - - return b; -} - - -/* - * Computes the size of a cram block, including the block - * header itself. - */ -uint32_t cram_block_size(cram_block *b) { - unsigned char dat[100], *cp = dat;; - uint32_t sz; - - *cp++ = b->method; - *cp++ = b->content_type; - cp += itf8_put((char*)cp, b->content_id); - cp += itf8_put((char*)cp, b->comp_size); - cp += itf8_put((char*)cp, b->uncomp_size); - - sz = cp-dat + 4; - sz += b->method == RAW ? b->uncomp_size : b->comp_size; - - return sz; -} - -/* - * Writes a CRAM block. - * Returns 0 on success - * -1 on failure - */ -int cram_write_block(cram_fd *fd, cram_block *b) { - char vardata[100]; - int vardata_o = 0; - - assert(b->method != RAW || (b->comp_size == b->uncomp_size)); - - if (hputc(b->method, fd->fp) == EOF) return -1; - if (hputc(b->content_type, fd->fp) == EOF) return -1; - vardata_o += fd->vv.varint_put32(vardata , vardata+100, b->content_id); - vardata_o += fd->vv.varint_put32(vardata+vardata_o, vardata+100, b->comp_size); - vardata_o += fd->vv.varint_put32(vardata+vardata_o, vardata+100, b->uncomp_size); - if (vardata_o != hwrite(fd->fp, vardata, vardata_o)) - return -1; - - if (b->data) { - if (b->method == RAW) { - if (b->uncomp_size != hwrite(fd->fp, b->data, b->uncomp_size)) - return -1; - } else { - if (b->comp_size != hwrite(fd->fp, b->data, b->comp_size)) - return -1; - } - } else { - // Absent blocks should be size 0 - assert(b->method == RAW && b->uncomp_size == 0); - } - - if (CRAM_MAJOR_VERS(fd->version) >= 3) { - char dat[100], *cp = (char *)dat; - uint32_t crc; - - *cp++ = b->method; - *cp++ = b->content_type; - cp += fd->vv.varint_put32(cp, dat+100, b->content_id); - cp += fd->vv.varint_put32(cp, dat+100, b->comp_size); - cp += fd->vv.varint_put32(cp, dat+100, b->uncomp_size); - crc = crc32(0L, (uc *)dat, cp-dat); - - if (b->method == RAW) { - b->crc32 = crc32(crc, b->data ? b->data : (uc*)"", b->uncomp_size); - } else { - b->crc32 = crc32(crc, b->data ? b->data : (uc*)"", b->comp_size); - } - - if (-1 == int32_encode(fd, b->crc32)) - return -1; - } - - return 0; -} - -/* - * Frees a CRAM block, deallocating internal data too. - */ -void cram_free_block(cram_block *b) { - if (!b) - return; - if (b->data) - free(b->data); - free(b); -} - -/* - * Uncompresses a CRAM block, if compressed. - */ -int cram_uncompress_block(cram_block *b) { - char *uncomp; - size_t uncomp_size = 0; - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - // Pretend the CRC was OK so the fuzzer doesn't have to get it right - b->crc32_checked = 1; -#endif - - if (b->crc32_checked == 0) { - uint32_t crc = crc32(b->crc_part, b->data ? b->data : (uc *)"", b->alloc); - b->crc32_checked = 1; - if (crc != b->crc32) { - hts_log_error("Block CRC32 failure"); - return -1; - } - } - - if (b->uncomp_size == 0) { - // blank block - b->method = RAW; - return 0; - } - assert(b->uncomp_size >= 0); // cram_read_block should ensure this - - switch (b->method) { - case RAW: - return 0; - - case GZIP: - uncomp_size = b->uncomp_size; - uncomp = zlib_mem_inflate((char *)b->data, b->comp_size, &uncomp_size); - - if (!uncomp) - return -1; - if (uncomp_size != b->uncomp_size) { - free(uncomp); - return -1; - } - free(b->data); - b->data = (unsigned char *)uncomp; - b->alloc = uncomp_size; - b->method = RAW; - break; - -#ifdef HAVE_LIBBZ2 - case BZIP2: { - unsigned int usize = b->uncomp_size; - if (!(uncomp = malloc(usize))) - return -1; - if (BZ_OK != BZ2_bzBuffToBuffDecompress(uncomp, &usize, - (char *)b->data, b->comp_size, - 0, 0)) { - free(uncomp); - return -1; - } - free(b->data); - b->data = (unsigned char *)uncomp; - b->alloc = usize; - b->method = RAW; - b->uncomp_size = usize; // Just in case it differs - break; - } -#else - case BZIP2: - hts_log_error("Bzip2 compression is not compiled into this version. Please rebuild and try again"); - return -1; -#endif - -#ifdef HAVE_LIBLZMA - case LZMA: - uncomp = lzma_mem_inflate((char *)b->data, b->comp_size, &uncomp_size); - if (!uncomp) - return -1; - if (uncomp_size != b->uncomp_size) { - free(uncomp); - return -1; - } - free(b->data); - b->data = (unsigned char *)uncomp; - b->alloc = uncomp_size; - b->method = RAW; - break; -#else - case LZMA: - hts_log_error("Lzma compression is not compiled into this version. Please rebuild and try again"); - return -1; - break; -#endif - - case RANS: { - unsigned int usize = b->uncomp_size, usize2; - uncomp = (char *)rans_uncompress(b->data, b->comp_size, &usize2); - if (!uncomp) - return -1; - if (usize != usize2) { - free(uncomp); - return -1; - } - free(b->data); - b->data = (unsigned char *)uncomp; - b->alloc = usize2; - b->method = RAW; - b->uncomp_size = usize2; // Just in case it differs - //fprintf(stderr, "Expanded %d to %d\n", b->comp_size, b->uncomp_size); - break; - } - - case FQZ: { - uncomp_size = b->uncomp_size; - uncomp = fqz_decompress((char *)b->data, b->comp_size, &uncomp_size, NULL, 0); - if (!uncomp) - return -1; - free(b->data); - b->data = (unsigned char *)uncomp; - b->alloc = uncomp_size; - b->method = RAW; - b->uncomp_size = uncomp_size; - break; - } - - case RANS_PR0: { - unsigned int usize = b->uncomp_size, usize2; - uncomp = (char *)rans_uncompress_4x16(b->data, b->comp_size, &usize2); - if (!uncomp) - return -1; - if (usize != usize2) { - free(uncomp); - return -1; - } - b->orig_method = RANS_PR0 + (b->data[0]&1) - + 2*((b->data[0]&0x40)>0) + 4*((b->data[0]&0x80)>0); - free(b->data); - b->data = (unsigned char *)uncomp; - b->alloc = usize2; - b->method = RAW; - b->uncomp_size = usize2; // Just incase it differs - //fprintf(stderr, "Expanded %d to %d\n", b->comp_size, b->uncomp_size); - break; - } - - case ARITH_PR0: { - unsigned int usize = b->uncomp_size, usize2; - uncomp = (char *)arith_uncompress_to(b->data, b->comp_size, NULL, &usize2); - if (!uncomp) - return -1; - if (usize != usize2) { - free(uncomp); - return -1; - } - b->orig_method = ARITH_PR0 + (b->data[0]&1) - + 2*((b->data[0]&0x40)>0) + 4*((b->data[0]&0x80)>0); - free(b->data); - b->data = (unsigned char *)uncomp; - b->alloc = usize2; - b->method = RAW; - b->uncomp_size = usize2; // Just incase it differs - //fprintf(stderr, "Expanded %d to %d\n", b->comp_size, b->uncomp_size); - break; - } - - case TOK3: { - uint32_t out_len; - uint8_t *cp = tok3_decode_names(b->data, b->comp_size, &out_len); - if (!cp) - return -1; - b->orig_method = TOK3; - b->method = RAW; - free(b->data); - b->data = cp; - b->alloc = out_len; - b->uncomp_size = out_len; - break; - } - - default: - return -1; - } - - return 0; -} - -static char *cram_compress_by_method(cram_slice *s, char *in, size_t in_size, - int content_id, size_t *out_size, - enum cram_block_method_int method, - int level, int strat) { - switch (method) { - case GZIP: - case GZIP_RLE: - case GZIP_1: - // Read names bizarrely benefit from zlib over libdeflate for - // mid-range compression levels. Focusing purely of ratio or - // speed, libdeflate still wins. It also seems to win for - // other data series too. - // - // Eg RN at level 5; libdeflate=55.9MB zlib=51.6MB -#ifdef HAVE_LIBDEFLATE -# if (LIBDEFLATE_VERSION_MAJOR < 1 || (LIBDEFLATE_VERSION_MAJOR == 1 && LIBDEFLATE_VERSION_MINOR <= 8)) - if (content_id == DS_RN && level >= 4 && level <= 7) - return zlib_mem_deflate(in, in_size, out_size, level, strat); - else -# endif - return libdeflate_deflate(in, in_size, out_size, level, strat); -#else - return zlib_mem_deflate(in, in_size, out_size, level, strat); -#endif - - case BZIP2: { -#ifdef HAVE_LIBBZ2 - unsigned int comp_size = in_size*1.01 + 600; - char *comp = malloc(comp_size); - if (!comp) - return NULL; - - if (BZ_OK != BZ2_bzBuffToBuffCompress(comp, &comp_size, - in, in_size, - level, 0, 30)) { - free(comp); - return NULL; - } - *out_size = comp_size; - return comp; -#else - return NULL; -#endif - } - - case FQZ: - case FQZ_b: - case FQZ_c: - case FQZ_d: { - // Extract the necessary portion of the slice into an fqz_slice struct. - // These previously were the same thing, but this permits us to detach - // the codec from the rest of this CRAM implementation. - fqz_slice *f = malloc(2*s->hdr->num_records * sizeof(uint32_t) + sizeof(fqz_slice)); - if (!f) - return NULL; - f->num_records = s->hdr->num_records; - f->len = (uint32_t *)(((char *)f) + sizeof(fqz_slice)); - f->flags = f->len + s->hdr->num_records; - int i; - for (i = 0; i < s->hdr->num_records; i++) { - f->flags[i] = s->crecs[i].flags; - f->len[i] = (i+1 < s->hdr->num_records - ? s->crecs[i+1].qual - s->crecs[i].qual - : s->block[DS_QS]->uncomp_size - s->crecs[i].qual); - } - char *comp = fqz_compress(strat & 0xff /* cram vers */, f, - in, in_size, out_size, strat >> 8, NULL); - free(f); - return comp; - } - - case LZMA: -#ifdef HAVE_LIBLZMA - return lzma_mem_deflate(in, in_size, out_size, level); -#else - return NULL; -#endif - - case RANS0: - case RANS1: { - unsigned int out_size_i; - unsigned char *cp; - cp = rans_compress((unsigned char *)in, in_size, &out_size_i, - method == RANS0 ? 0 : 1); - *out_size = out_size_i; - return (char *)cp; - } - - case RANS_PR0: - case RANS_PR1: - case RANS_PR64: - case RANS_PR9: - case RANS_PR128: - case RANS_PR129: - case RANS_PR192: - case RANS_PR193: { - unsigned int out_size_i; - unsigned char *cp; - - // see enum cram_block. We map RANS_* methods to order bit-fields - static int methmap[] = { 1, 64,9, 128,129, 192,193 }; - - int m = method == RANS_PR0 ? 0 : methmap[method - RANS_PR1]; - cp = rans_compress_4x16((unsigned char *)in, in_size, &out_size_i, - m | RANS_ORDER_SIMD_AUTO); - *out_size = out_size_i; - return (char *)cp; - } - - case ARITH_PR0: - case ARITH_PR1: - case ARITH_PR64: - case ARITH_PR9: - case ARITH_PR128: - case ARITH_PR129: - case ARITH_PR192: - case ARITH_PR193: { - unsigned int out_size_i; - unsigned char *cp; - - // see enum cram_block. We map ARITH_* methods to order bit-fields - static int methmap[] = { 1, 64,9, 128,129, 192,193 }; - - cp = arith_compress_to((unsigned char *)in, in_size, NULL, &out_size_i, - method == ARITH_PR0 ? 0 : methmap[method - ARITH_PR1]); - *out_size = out_size_i; - return (char *)cp; - } - - case TOK3: - case TOKA: { - int out_len; - int lev = level; - if (method == TOK3 && lev > 3) - lev = 3; - uint8_t *cp = tok3_encode_names(in, in_size, lev, strat, &out_len, NULL); - *out_size = out_len; - return (char *)cp; - } - - case RAW: - break; - - default: - return NULL; - } - - return NULL; -} - - -/* - * Compresses a block using one of two different zlib strategies. If we only - * want one choice set strat2 to be -1. - * - * The logic here is that sometimes Z_RLE does a better job than Z_FILTERED - * or Z_DEFAULT_STRATEGY on quality data. If so, we'd rather use it as it is - * significantly faster. - * - * Method and level -1 implies defaults, as specified in cram_fd. - */ -int cram_compress_block2(cram_fd *fd, cram_slice *s, - cram_block *b, cram_metrics *metrics, - int method, int level) { - - if (!b) - return 0; - - char *comp = NULL; - size_t comp_size = 0; - int strat; - - // Internally we have parameterised methods that externally map - // to the same CRAM method value. - // See enum_cram_block_method_int in cram_structs.h. - int methmap[] = { - // Externally defined values - RAW, GZIP, BZIP2, LZMA, RANS, RANSPR, ARITH, FQZ, TOK3, - - // Reserved for possible expansion - 0, 0, - - // Internally parameterised versions matching back to above - // external values - GZIP, GZIP, - FQZ, FQZ, FQZ, - RANS, - RANSPR, RANSPR, RANSPR, RANSPR, RANSPR, RANSPR, RANSPR, - TOK3, - ARITH, ARITH, ARITH, ARITH, ARITH, ARITH, ARITH, - }; - - if (b->method != RAW) { - // Maybe already compressed if s->block[0] was compressed and - // we have e.g. s->block[DS_BA] set to s->block[0] due to only - // one base type present and hence using E_HUFFMAN on block 0. - // A second explicit attempt to compress the same block then - // occurs. - return 0; - } - - if (method == -1) { - method = 1<use_bz2) - method |= 1<use_lzma) - method |= 1<level; - - //fprintf(stderr, "IN: block %d, sz %d\n", b->content_id, b->uncomp_size); - - if (method == RAW || level == 0 || b->uncomp_size == 0) { - b->method = RAW; - b->comp_size = b->uncomp_size; - //fprintf(stderr, "Skip block id %d\n", b->content_id); - return 0; - } - -#ifndef ABS -# define ABS(a) ((a)>=0?(a):-(a)) -#endif - - if (metrics) { - pthread_mutex_lock(&fd->metrics_lock); - // Sudden changes in size trigger a retrial. These are mainly - // triggered when switching to sorted / unsorted, where the number - // of elements in a slice radically changes. - // - // We also get large fluctuations based on genome coordinate for - // e.g. SA:Z and SC series, but we consider the typical scale of - // delta between blocks and use this to look for abnormality. - if (metrics->input_avg_sz && - (b->uncomp_size + 1000 > 4*(metrics->input_avg_sz+1000) || - b->uncomp_size + 1000 < (metrics->input_avg_sz+1000)/4) && - ABS(b->uncomp_size-metrics->input_avg_sz) - > 10*metrics->input_avg_delta) { - metrics->next_trial = 0; - } - - if (metrics->trial > 0 || --metrics->next_trial <= 0) { - int m, unpackable = metrics->unpackable; - size_t sz_best = b->uncomp_size; - size_t sz[CRAM_MAX_METHOD] = {0}; - int method_best = 0; // RAW - char *c_best = NULL, *c = NULL; - - metrics->input_avg_delta = - 0.9 * (metrics->input_avg_delta + - ABS(b->uncomp_size - metrics->input_avg_sz)); - - metrics->input_avg_sz += b->uncomp_size*.2; - metrics->input_avg_sz *= 0.8; - - if (metrics->revised_method) - method = metrics->revised_method; - else - metrics->revised_method = method; - - if (metrics->next_trial <= 0) { - metrics->next_trial = TRIAL_SPAN; - metrics->trial = NTRIALS; - for (m = 0; m < CRAM_MAX_METHOD; m++) - metrics->sz[m] /= 2; - metrics->unpackable = 0; - } - - // Compress this block using the best method - if (unpackable && CRAM_MAJOR_VERS(fd->version) > 3) { - // No point trying bit-pack if 17+ symbols. - if (method & (1<metrics_lock); - - for (m = 0; m < CRAM_MAX_METHOD; m++) { - if (method & (1u<version); break; - case FQZ_b: strat = CRAM_MAJOR_VERS(fd->version)+256; break; - case FQZ_c: strat = CRAM_MAJOR_VERS(fd->version)+2*256; break; - case FQZ_d: strat = CRAM_MAJOR_VERS(fd->version)+3*256; break; - case TOK3: strat = 0; break; - case TOKA: strat = 1; break; - default: strat = 0; - } - - c = cram_compress_by_method(s, (char *)b->data, b->uncomp_size, - b->content_id, &sz[m], m, lvl, strat); - - if (c && sz_best > sz[m]) { - sz_best = sz[m]; - method_best = m; - if (c_best) - free(c_best); - c_best = c; - } else if (c) { - free(c); - } else { - sz[m] = UINT_MAX; // arbitrarily worse than raw - } - } else { - sz[m] = UINT_MAX; // arbitrarily worse than raw - } - } - - if (c_best) { - free(b->data); - b->data = (unsigned char *)c_best; - b->method = method_best; // adjusted to methmap[method_best] later - b->comp_size = sz_best; - } - - // Accumulate stats for all methods tried - pthread_mutex_lock(&fd->metrics_lock); - for (m = 0; m < CRAM_MAX_METHOD; m++) - // don't be overly sure on small blocks. - // +2000 means eg bzip2 vs gzip (1.07 to 1.04) or gz vs rans1 - // needs to be at least 60 bytes smaller to overcome the - // fixed size addition. - metrics->sz[m] += sz[m]+2000; - - // When enough trials performed, find the best on average - if (--metrics->trial == 0) { - int best_method = RAW; - int best_sz = INT_MAX; - - // Relative costs of methods. See enum_cram_block_method_int - // and methmap - double meth_cost[32] = { - // Externally defined methods - 1, // 0 raw - 1.04, // 1 gzip (Z_FILTERED) - 1.07, // 2 bzip2 - 1.08, // 3 lzma - 1.00, // 4 rans (O0) - 1.00, // 5 ranspr (O0) - 1.04, // 6 arithpr (O0) - 1.05, // 7 fqz - 1.05, // 8 tok3 (rans) - 1.00, 1.00, // 9,10 reserved - - // Paramterised versions of above - 1.01, // gzip rle - 1.01, // gzip -1 - - 1.05, 1.05, 1.05, // FQZ_b,c,d - - 1.01, // rans O1 - - 1.01, // rans_pr1 - 1.00, // rans_pr64; if smaller, usually fast - 1.03, // rans_pr65/9 - 1.00, // rans_pr128 - 1.01, // rans_pr129 - 1.00, // rans_pr192 - 1.01, // rans_pr193 - - 1.07, // tok3 arith - - 1.04, // arith_pr1 - 1.04, // arith_pr64 - 1.04, // arith_pr9 - 1.03, // arith_pr128 - 1.04, // arith_pr129 - 1.04, // arith_pr192 - 1.04, // arith_pr193 - }; - - // Scale methods by cost based on compression level - if (fd->level <= 1) { - for (m = 0; m < CRAM_MAX_METHOD; m++) - metrics->sz[m] *= 1+(meth_cost[m]-1)*4; - } else if (fd->level <= 3) { - for (m = 0; m < CRAM_MAX_METHOD; m++) - metrics->sz[m] *= 1+(meth_cost[m]-1); - } else if (fd->level <= 6) { - for (m = 0; m < CRAM_MAX_METHOD; m++) - metrics->sz[m] *= 1+(meth_cost[m]-1)/2; - } else if (fd->level <= 7) { - for (m = 0; m < CRAM_MAX_METHOD; m++) - metrics->sz[m] *= 1+(meth_cost[m]-1)/3; - } // else cost is ignored - - // Ensure these are never used; BSC and ZSTD - metrics->sz[9] = metrics->sz[10] = INT_MAX; - - for (m = 0; m < CRAM_MAX_METHOD; m++) { - if ((!metrics->sz[m]) || (!(method & (1u< metrics->sz[m]) - best_sz = metrics->sz[m], best_method = m; - } - - if (best_method != metrics->method) { - //metrics->trial = (NTRIALS+1)/2; // be sure - //metrics->next_trial /= 1.5; - metrics->consistency = 0; - } else { - metrics->next_trial *= MIN(2, 1+metrics->consistency/4.0); - metrics->consistency++; - } - - metrics->method = best_method; - switch (best_method) { - case GZIP: strat = Z_FILTERED; break; - case GZIP_1: strat = Z_DEFAULT_STRATEGY; break; - case GZIP_RLE: strat = Z_RLE; break; - case FQZ: strat = CRAM_MAJOR_VERS(fd->version); break; - case FQZ_b: strat = CRAM_MAJOR_VERS(fd->version)+256; break; - case FQZ_c: strat = CRAM_MAJOR_VERS(fd->version)+2*256; break; - case FQZ_d: strat = CRAM_MAJOR_VERS(fd->version)+3*256; break; - case TOK3: strat = 0; break; - case TOKA: strat = 1; break; - default: strat = 0; - } - metrics->strat = strat; - - // If we see at least MAXFAIL trials in a row for a specific - // compression method with more than MAXDELTA aggregate - // size then we drop this from the list of methods used - // for this block type. -#define MAXDELTA 0.20 -#define MAXFAILS 4 - for (m = 0; m < CRAM_MAX_METHOD; m++) { - if (best_method == m) { - metrics->cnt[m] = 0; - metrics->extra[m] = 0; - } else if (best_sz < metrics->sz[m]) { - double r = (double)metrics->sz[m] / best_sz - 1; - int mul = 1+(fd->level>=7); - if (++metrics->cnt[m] >= MAXFAILS*mul && - (metrics->extra[m] += r) >= MAXDELTA*mul) - method &= ~(1u<sz[m] > best_sz) - method &= ~(1u<verbose > 1 && method != metrics->revised_method) - // fprintf(stderr, "%d: revising method from %x to %x\n", - // b->content_id, metrics->revised_method, method); - metrics->revised_method = method; - } - pthread_mutex_unlock(&fd->metrics_lock); - } else { - metrics->input_avg_delta = - 0.9 * (metrics->input_avg_delta + - ABS(b->uncomp_size - metrics->input_avg_sz)); - - metrics->input_avg_sz += b->uncomp_size*.2; - metrics->input_avg_sz *= 0.8; - - strat = metrics->strat; - method = metrics->method; - - pthread_mutex_unlock(&fd->metrics_lock); - comp = cram_compress_by_method(s, (char *)b->data, b->uncomp_size, - b->content_id, &comp_size, method, - method == GZIP_1 ? 1 : level, - strat); - if (!comp) - return -1; - - if (comp_size < b->uncomp_size) { - free(b->data); - b->data = (unsigned char *)comp; - b->comp_size = comp_size; - b->method = method; - } else { - free(comp); - } - } - - } else { - // no cached metrics, so just do zlib? - comp = cram_compress_by_method(s, (char *)b->data, b->uncomp_size, - b->content_id, &comp_size, GZIP, level, Z_FILTERED); - if (!comp) { - hts_log_error("Compression failed!"); - return -1; - } - - if (comp_size < b->uncomp_size) { - free(b->data); - b->data = (unsigned char *)comp; - b->comp_size = comp_size; - b->method = GZIP; - } else { - free(comp); - } - strat = Z_FILTERED; - } - - hts_log_info("Compressed block ID %d from %d to %d by method %s", - b->content_id, b->uncomp_size, b->comp_size, - cram_block_method2str(b->method)); - - b->method = methmap[b->method]; - - return 0; -} -int cram_compress_block(cram_fd *fd, cram_block *b, cram_metrics *metrics, - int method, int level) { - return cram_compress_block2(fd, NULL, b, metrics, method, level); -} - -cram_metrics *cram_new_metrics(void) { - cram_metrics *m = calloc(1, sizeof(*m)); - if (!m) - return NULL; - m->trial = NTRIALS-1; - m->next_trial = TRIAL_SPAN/2; // learn quicker at start - m->method = RAW; - m->strat = 0; - m->revised_method = 0; - m->unpackable = 0; - - return m; -} - -char *cram_block_method2str(enum cram_block_method_int m) { - switch(m) { - case RAW: return "RAW"; - case GZIP: return "GZIP"; - case BZIP2: return "BZIP2"; - case LZMA: return "LZMA"; - case RANS0: return "RANS0"; - case RANS1: return "RANS1"; - case GZIP_RLE: return "GZIP_RLE"; - case GZIP_1: return "GZIP_1"; - case FQZ: return "FQZ"; - case FQZ_b: return "FQZ_b"; - case FQZ_c: return "FQZ_c"; - case FQZ_d: return "FQZ_d"; - case RANS_PR0: return "RANS_PR0"; - case RANS_PR1: return "RANS_PR1"; - case RANS_PR64: return "RANS_PR64"; - case RANS_PR9: return "RANS_PR9"; - case RANS_PR128: return "RANS_PR128"; - case RANS_PR129: return "RANS_PR129"; - case RANS_PR192: return "RANS_PR192"; - case RANS_PR193: return "RANS_PR193"; - case TOK3: return "TOK3_R"; - case TOKA: return "TOK3_A"; - case ARITH_PR0: return "ARITH_PR0"; - case ARITH_PR1: return "ARITH_PR1"; - case ARITH_PR64: return "ARITH_PR64"; - case ARITH_PR9: return "ARITH_PR9"; - case ARITH_PR128: return "ARITH_PR128"; - case ARITH_PR129: return "ARITH_PR129"; - case ARITH_PR192: return "ARITH_PR192"; - case ARITH_PR193: return "ARITH_PR193"; - case BM_ERROR: break; - } - return "?"; -} - -char *cram_content_type2str(enum cram_content_type t) { - switch (t) { - case FILE_HEADER: return "FILE_HEADER"; - case COMPRESSION_HEADER: return "COMPRESSION_HEADER"; - case MAPPED_SLICE: return "MAPPED_SLICE"; - case UNMAPPED_SLICE: return "UNMAPPED_SLICE"; - case EXTERNAL: return "EXTERNAL"; - case CORE: return "CORE"; - case CT_ERROR: break; - } - return "?"; -} - -/* ---------------------------------------------------------------------- - * Reference sequence handling - * - * These revolve around the refs_t structure, which may potentially be - * shared between multiple cram_fd. - * - * We start with refs_create() to allocate an empty refs_t and then - * populate it with @SQ line data using refs_from_header(). This is done on - * cram_open(). Also at start up we can call cram_load_reference() which - * is used with "scramble -r foo.fa". This replaces the fd->refs with the - * new one specified. In either case refs2id() is then called which - * maps ref_entry names to @SQ ids (refs_t->ref_id[]). - * - * Later, possibly within a thread, we will want to know the actual ref - * seq itself, obtained by calling cram_get_ref(). This may use the - * UR: or M5: fields or the filename specified in the original - * cram_load_reference() call. - * - * Given the potential for multi-threaded reference usage, we have - * reference counting (sorry for the confusing double use of "ref") to - * track the number of callers interested in any specific reference. - */ - -/* - * Frees/unmaps a reference sequence and associated file handles. - */ -static void ref_entry_free_seq(ref_entry *e) { - if (e->mf) - mfclose(e->mf); - if (e->seq && !e->mf) - free(e->seq); - - e->seq = NULL; - e->mf = NULL; -} - -void refs_free(refs_t *r) { - RP("refs_free()\n"); - - if (--r->count > 0) - return; - - if (!r) - return; - - if (r->pool) - string_pool_destroy(r->pool); - - if (r->h_meta) { - khint_t k; - - for (k = kh_begin(r->h_meta); k != kh_end(r->h_meta); k++) { - ref_entry *e; - - if (!kh_exist(r->h_meta, k)) - continue; - if (!(e = kh_val(r->h_meta, k))) - continue; - ref_entry_free_seq(e); - free(e); - } - - kh_destroy(refs, r->h_meta); - } - - if (r->ref_id) - free(r->ref_id); - - if (r->fp) - bgzf_close(r->fp); - - pthread_mutex_destroy(&r->lock); - - free(r); -} - -static refs_t *refs_create(void) { - refs_t *r = calloc(1, sizeof(*r)); - - RP("refs_create()\n"); - - if (!r) - return NULL; - - if (!(r->pool = string_pool_create(8192))) - goto err; - - r->ref_id = NULL; // see refs2id() to populate. - r->count = 1; - r->last = NULL; - r->last_id = -1; - - if (!(r->h_meta = kh_init(refs))) - goto err; - - pthread_mutex_init(&r->lock, NULL); - - return r; - - err: - refs_free(r); - return NULL; -} - -/* - * Opens a reference fasta file as a BGZF stream, allowing for - * compressed files. It automatically builds a .fai file if - * required and if compressed a .gzi bgzf index too. - * - * Returns a BGZF handle on success; - * NULL on failure. - */ -static BGZF *bgzf_open_ref(char *fn, char *mode, int is_md5) { - BGZF *fp; - - if (!is_md5 && !hisremote(fn)) { - char fai_file[PATH_MAX]; - - snprintf(fai_file, PATH_MAX, "%s.fai", fn); - if (access(fai_file, R_OK) != 0) - if (fai_build(fn) != 0) - return NULL; - } - - if (!(fp = bgzf_open(fn, mode))) { - perror(fn); - return NULL; - } - - if (fp->is_compressed == 1 && bgzf_index_load(fp, fn, ".gzi") < 0) { - hts_log_error("Unable to load .gzi index '%s.gzi'", fn); - bgzf_close(fp); - return NULL; - } - - return fp; -} - -/* - * Loads a FAI file for a reference.fasta. - * "is_err" indicates whether failure to load is worthy of emitting an - * error message. In some cases (eg with embedded references) we - * speculatively load, just in case, and silently ignore errors. - * - * Returns the refs_t struct on success (maybe newly allocated); - * NULL on failure - */ -static refs_t *refs_load_fai(refs_t *r_orig, const char *fn, int is_err) { - hFILE *fp = NULL; - char fai_fn[PATH_MAX]; - char line[8192]; - refs_t *r = r_orig; - size_t fn_l = strlen(fn); - int id = 0, id_alloc = 0; - - RP("refs_load_fai %s\n", fn); - - if (!r) - if (!(r = refs_create())) - goto err; - - if (r->fp) - if (bgzf_close(r->fp) != 0) - goto err; - r->fp = NULL; - - /* Look for a FASTA##idx##FAI format */ - char *fn_delim = strstr(fn, HTS_IDX_DELIM); - if (fn_delim) { - if (!(r->fn = string_ndup(r->pool, fn, fn_delim - fn))) - goto err; - fn_delim += strlen(HTS_IDX_DELIM); - snprintf(fai_fn, PATH_MAX, "%s", fn_delim); - } else { - /* An index file was provided, instead of the actual reference file */ - if (fn_l > 4 && strcmp(&fn[fn_l-4], ".fai") == 0) { - if (!r->fn) { - if (!(r->fn = string_ndup(r->pool, fn, fn_l-4))) - goto err; - } - snprintf(fai_fn, PATH_MAX, "%s", fn); - } else { - /* Only the reference file provided. Get the index file name from it */ - if (!(r->fn = string_dup(r->pool, fn))) - goto err; - snprintf(fai_fn, PATH_MAX, "%.*s.fai", PATH_MAX-5, fn); - } - } - - if (!(r->fp = bgzf_open_ref(r->fn, "r", 0))) { - hts_log_error("Failed to open reference file '%s'", r->fn); - goto err; - } - - if (!(fp = hopen(fai_fn, "r"))) { - hts_log_error("Failed to open index file '%s'", fai_fn); - if (is_err) - perror(fai_fn); - goto err; - } - while (hgets(line, 8192, fp) != NULL) { - ref_entry *e = malloc(sizeof(*e)); - char *cp; - int n; - khint_t k; - - if (!e) - return NULL; - - // id - for (cp = line; *cp && !isspace_c(*cp); cp++) - ; - *cp++ = 0; - e->name = string_dup(r->pool, line); - - // length - while (*cp && isspace_c(*cp)) - cp++; - e->length = strtoll(cp, &cp, 10); - - // offset - while (*cp && isspace_c(*cp)) - cp++; - e->offset = strtoll(cp, &cp, 10); - - // bases per line - while (*cp && isspace_c(*cp)) - cp++; - e->bases_per_line = strtol(cp, &cp, 10); - - // line length - while (*cp && isspace_c(*cp)) - cp++; - e->line_length = strtol(cp, &cp, 10); - - // filename - e->fn = r->fn; - - e->count = 0; - e->seq = NULL; - e->mf = NULL; - e->is_md5 = 0; - e->validated_md5 = 0; - - k = kh_put(refs, r->h_meta, e->name, &n); - if (-1 == n) { - free(e); - return NULL; - } - - if (n) { - kh_val(r->h_meta, k) = e; - } else { - ref_entry *re = kh_val(r->h_meta, k); - if (re && (re->count != 0 || re->length != 0)) { - /* Keep old */ - free(e); - } else { - /* Replace old */ - if (re) - free(re); - kh_val(r->h_meta, k) = e; - } - } - - if (id >= id_alloc) { - ref_entry **new_refs; - int x; - - id_alloc = id_alloc ?id_alloc*2 : 16; - new_refs = realloc(r->ref_id, id_alloc * sizeof(*r->ref_id)); - if (!new_refs) - goto err; - r->ref_id = new_refs; - - for (x = id; x < id_alloc; x++) - r->ref_id[x] = NULL; - } - r->ref_id[id] = e; - r->nref = ++id; - } - - if(hclose(fp) < 0) - goto err; - return r; - - err: - if (fp) - hclose_abruptly(fp); - - if (!r_orig) - refs_free(r); - - return NULL; -} - -/* - * Verifies that the CRAM @SQ lines and .fai files match. - */ -static void sanitise_SQ_lines(cram_fd *fd) { - int i; - - if (!fd->header || !fd->header->hrecs) - return; - - if (!fd->refs || !fd->refs->h_meta) - return; - - for (i = 0; i < fd->header->hrecs->nref; i++) { - const char *name = fd->header->hrecs->ref[i].name; - khint_t k = kh_get(refs, fd->refs->h_meta, name); - ref_entry *r; - - // We may have @SQ lines which have no known .fai, but do not - // in themselves pose a problem because they are unused in the file. - if (k == kh_end(fd->refs->h_meta)) - continue; - - if (!(r = (ref_entry *)kh_val(fd->refs->h_meta, k))) - continue; - - if (r->length && r->length != fd->header->hrecs->ref[i].len) { - assert(strcmp(r->name, fd->header->hrecs->ref[i].name) == 0); - - // Should we also check MD5sums here to ensure the correct - // reference was given? - hts_log_warning("Header @SQ length mismatch for ref %s, %"PRIhts_pos" vs %d", - r->name, fd->header->hrecs->ref[i].len, (int)r->length); - - // Fixing the parsed @SQ header will make MD:Z: strings work - // and also stop it producing N for the sequence. - fd->header->hrecs->ref[i].len = r->length; - } - } -} - -/* - * Indexes references by the order they appear in a BAM file. This may not - * necessarily be the same order they appear in the fasta reference file. - * - * Returns 0 on success - * -1 on failure - */ -int refs2id(refs_t *r, sam_hdr_t *hdr) { - int i; - sam_hrecs_t *h = hdr->hrecs; - - if (r->ref_id) - free(r->ref_id); - if (r->last) - r->last = NULL; - - r->ref_id = calloc(h->nref, sizeof(*r->ref_id)); - if (!r->ref_id) - return -1; - - r->nref = h->nref; - for (i = 0; i < h->nref; i++) { - khint_t k = kh_get(refs, r->h_meta, h->ref[i].name); - if (k != kh_end(r->h_meta)) { - r->ref_id[i] = kh_val(r->h_meta, k); - } else { - hts_log_warning("Unable to find ref name '%s'", h->ref[i].name); - } - } - - return 0; -} - -/* - * Generates refs_t entries based on @SQ lines in the header. - * Returns 0 on success - * -1 on failure - */ -static int refs_from_header(cram_fd *fd) { - if (!fd) - return -1; - - refs_t *r = fd->refs; - if (!r) - return -1; - - sam_hdr_t *h = fd->header; - if (!h) - return 0; - - if (!h->hrecs) { - if (-1 == sam_hdr_fill_hrecs(h)) - return -1; - } - - if (h->hrecs->nref == 0) - return 0; - - //fprintf(stderr, "refs_from_header for %p mode %c\n", fd, fd->mode); - - /* Existing refs are fine, as long as they're compatible with the hdr. */ - ref_entry **new_ref_id = realloc(r->ref_id, (r->nref + h->hrecs->nref) * sizeof(*r->ref_id)); - if (!new_ref_id) - return -1; - r->ref_id = new_ref_id; - - int i, j; - /* Copy info from h->ref[i] over to r */ - for (i = 0, j = r->nref; i < h->hrecs->nref; i++) { - sam_hrec_type_t *ty; - sam_hrec_tag_t *tag; - khint_t k; - int n; - - k = kh_get(refs, r->h_meta, h->hrecs->ref[i].name); - if (k != kh_end(r->h_meta)) - // Ref already known about - continue; - - if (!(r->ref_id[j] = calloc(1, sizeof(ref_entry)))) - return -1; - - if (!h->hrecs->ref[i].name) - return -1; - - r->ref_id[j]->name = string_dup(r->pool, h->hrecs->ref[i].name); - if (!r->ref_id[j]->name) return -1; - r->ref_id[j]->length = 0; // marker for not yet loaded - - /* Initialise likely filename if known */ - if ((ty = sam_hrecs_find_type_id(h->hrecs, "SQ", "SN", h->hrecs->ref[i].name))) { - if ((tag = sam_hrecs_find_key(ty, "M5", NULL))) { - r->ref_id[j]->fn = string_dup(r->pool, tag->str+3); - //fprintf(stderr, "Tagging @SQ %s / %s\n", r->ref_id[h]->name, r->ref_id[h]->fn); - } - } - - k = kh_put(refs, r->h_meta, r->ref_id[j]->name, &n); - if (n <= 0) // already exists or error - return -1; - kh_val(r->h_meta, k) = r->ref_id[j]; - - j++; - } - r->nref = j; - - return 0; -} - -/* - * Attaches a header to a cram_fd. - * - * This should be used when creating a new cram_fd for writing where - * we have a header already constructed (eg from a file we've read - * in). - */ -int cram_set_header2(cram_fd *fd, const sam_hdr_t *hdr) { - if (!fd || !hdr ) - return -1; - - if (fd->header != hdr) { - if (fd->header) - sam_hdr_destroy(fd->header); - fd->header = sam_hdr_dup(hdr); - if (!fd->header) - return -1; - } - return refs_from_header(fd); -} - -int cram_set_header(cram_fd *fd, sam_hdr_t *hdr) { - return cram_set_header2(fd, hdr); -} - -/* - * Returns whether the path refers to a directory. - */ -static int is_directory(char *fn) { - struct stat buf; - if ( stat(fn,&buf) ) return 0; - return S_ISDIR(buf.st_mode); -} - -/* - * Converts a directory and a filename into an expanded path, replacing %s - * in directory with the filename and %[0-9]+s with portions of the filename - * Any remaining parts of filename are added to the end with /%s. - */ -static int expand_cache_path(char *path, char *dir, const char *fn) { - char *cp, *start = path; - size_t len; - size_t sz = PATH_MAX; - - while ((cp = strchr(dir, '%'))) { - if (cp-dir >= sz) return -1; - strncpy(path, dir, cp-dir); - path += cp-dir; - sz -= cp-dir; - - if (*++cp == 's') { - len = strlen(fn); - if (len >= sz) return -1; - strcpy(path, fn); - path += len; - sz -= len; - fn += len; - cp++; - } else if (*cp >= '0' && *cp <= '9') { - char *endp; - long l; - - l = strtol(cp, &endp, 10); - l = MIN(l, strlen(fn)); - if (*endp == 's') { - if (l >= sz) return -1; - strncpy(path, fn, l); - path += l; - fn += l; - sz -= l; - *path = 0; - cp = endp+1; - } else { - if (sz < 3) return -1; - *path++ = '%'; - *path++ = *cp++; - } - } else { - if (sz < 3) return -1; - *path++ = '%'; - *path++ = *cp++; - } - dir = cp; - } - - len = strlen(dir); - if (len >= sz) return -1; - strcpy(path, dir); - path += len; - sz -= len; - - len = strlen(fn) + ((*fn && path > start && path[-1] != '/') ? 1 : 0); - if (len >= sz) return -1; - if (*fn && path > start && path[-1] != '/') - *path++ = '/'; - strcpy(path, fn); - return 0; -} - -/* - * Make the directory containing path and any prefix directories. - */ -static void mkdir_prefix(char *path, int mode) { - char *cp = strrchr(path, '/'); - if (!cp) - return; - - *cp = 0; - if (is_directory(path)) { - *cp = '/'; - return; - } - - if (mkdir(path, mode) == 0) { - chmod(path, mode); - *cp = '/'; - return; - } - - mkdir_prefix(path, mode); - mkdir(path, mode); - chmod(path, mode); - *cp = '/'; -} - -/* - * Return the cache directory to use, based on the first of these - * environment variables to be set to a non-empty value. - */ -static const char *get_cache_basedir(const char **extra) { - char *base; - - *extra = ""; - - base = getenv("XDG_CACHE_HOME"); - if (base && *base) return base; - - base = getenv("HOME"); - if (base && *base) { *extra = "/.cache"; return base; } - - base = getenv("TMPDIR"); - if (base && *base) return base; - - base = getenv("TEMP"); - if (base && *base) return base; - - return "/tmp"; -} - -/* - * Queries the M5 string from the header and attempts to populate the - * reference from this using the REF_PATH environment. - * - * Returns 0 on success - * -1 on failure - */ -static int cram_populate_ref(cram_fd *fd, int id, ref_entry *r) { - char *ref_path = getenv("REF_PATH"); - sam_hrec_type_t *ty; - sam_hrec_tag_t *tag; - char path[PATH_MAX]; - kstring_t path_tmp = KS_INITIALIZE; - char cache[PATH_MAX], cache_root[PATH_MAX]; - char *local_cache = getenv("REF_CACHE"); - mFILE *mf; - int local_path = 0; - - hts_log_info("Running cram_populate_ref on fd %p, id %d", (void *)fd, id); - - cache_root[0] = '\0'; - - if (!ref_path || *ref_path == '\0') { - /* - * If we have no ref path, we use the EBI server. - * However to avoid spamming it we require a local ref cache too. - */ - ref_path = "https://www.ebi.ac.uk/ena/cram/md5/%s"; - if (!local_cache || *local_cache == '\0') { - const char *extra; - const char *base = get_cache_basedir(&extra); - snprintf(cache_root, PATH_MAX, "%s%s/hts-ref", base, extra); - snprintf(cache,PATH_MAX, "%s%s/hts-ref/%%2s/%%2s/%%s", base, extra); - local_cache = cache; - hts_log_info("Populating local cache: %s", local_cache); - } - } - - if (!r->name) - return -1; - - if (!(ty = sam_hrecs_find_type_id(fd->header->hrecs, "SQ", "SN", r->name))) - return -1; - - if (!(tag = sam_hrecs_find_key(ty, "M5", NULL))) - goto no_M5; - - hts_log_info("Querying ref %s", tag->str+3); - - /* Use cache if available */ - if (local_cache && *local_cache) { - if (expand_cache_path(path, local_cache, tag->str+3) == 0) - local_path = 1; - } - -#ifndef HAVE_MMAP - char *path2; - /* Search local files in REF_PATH; we can open them and return as above */ - if (!local_path && (path2 = find_path(tag->str+3, ref_path))) { - int len = snprintf(path, PATH_MAX, "%s", path2); - free(path2); - if (len > 0 && len < PATH_MAX) // in case it's too long - local_path = 1; - } -#endif - - /* Found via REF_CACHE or local REF_PATH file */ - if (local_path) { - struct stat sb; - BGZF *fp; - - if (0 == stat(path, &sb) - && S_ISREG(sb.st_mode) - && (fp = bgzf_open(path, "r"))) { - r->length = sb.st_size; - r->offset = r->line_length = r->bases_per_line = 0; - - r->fn = string_dup(fd->refs->pool, path); - - if (fd->refs->fp) - if (bgzf_close(fd->refs->fp) != 0) - return -1; - fd->refs->fp = fp; - fd->refs->fn = r->fn; - r->is_md5 = 1; - r->validated_md5 = 1; - - // Fall back to cram_get_ref() where it'll do the actual - // reading of the file. - return 0; - } - } - - - /* Otherwise search full REF_PATH; slower as loads entire file */ - if ((mf = open_path_mfile(tag->str+3, ref_path, NULL))) { - size_t sz; - r->seq = mfsteal(mf, &sz); - if (r->seq) { - r->mf = NULL; - } else { - // keep mf around as we couldn't detach - r->seq = mf->data; - r->mf = mf; - } - r->length = sz; - r->is_md5 = 1; - r->validated_md5 = 1; - } else { - refs_t *refs; - const char *fn; - - no_M5: - /* Failed to find in search path or M5 cache, see if @SQ UR: tag? */ - if (!(tag = sam_hrecs_find_key(ty, "UR", NULL))) - return -1; - - fn = (strncmp(tag->str+3, "file:", 5) == 0) - ? tag->str+8 - : tag->str+3; - - if (fd->refs->fp) { - if (bgzf_close(fd->refs->fp) != 0) - return -1; - fd->refs->fp = NULL; - } - if (!(refs = refs_load_fai(fd->refs, fn, 0))) - return -1; - sanitise_SQ_lines(fd); - - fd->refs = refs; - if (fd->refs->fp) { - if (bgzf_close(fd->refs->fp) != 0) - return -1; - fd->refs->fp = NULL; - } - - if (!fd->refs->fn) - return -1; - - if (-1 == refs2id(fd->refs, fd->header)) - return -1; - if (!fd->refs->ref_id || !fd->refs->ref_id[id]) - return -1; - - // Local copy already, so fall back to cram_get_ref(). - return 0; - } - - /* Populate the local disk cache if required */ - if (local_cache && *local_cache) { - hFILE *fp; - - if (*cache_root && !is_directory(cache_root)) { - hts_log_warning("Creating reference cache directory %s\n" - "This may become large; see the samtools(1) manual page REF_CACHE discussion", - cache_root); - } - - if (expand_cache_path(path, local_cache, tag->str+3) < 0) { - return 0; // Not fatal - we have the data already so keep going. - } - hts_log_info("Writing cache file '%s'", path); - mkdir_prefix(path, 01777); - - fp = hts_open_tmpfile(path, "wx", &path_tmp); - if (!fp) { - perror(path_tmp.s); - free(path_tmp.s); - - // Not fatal - we have the data already so keep going. - return 0; - } - - // Check md5sum - hts_md5_context *md5; - char unsigned md5_buf1[16]; - char md5_buf2[33]; - - if (!(md5 = hts_md5_init())) { - hclose_abruptly(fp); - unlink(path_tmp.s); - free(path_tmp.s); - return -1; - } - hts_md5_update(md5, r->seq, r->length); - hts_md5_final(md5_buf1, md5); - hts_md5_destroy(md5); - hts_md5_hex(md5_buf2, md5_buf1); - - if (strncmp(tag->str+3, md5_buf2, 32) != 0) { - hts_log_error("Mismatching md5sum for downloaded reference"); - hclose_abruptly(fp); - unlink(path_tmp.s); - free(path_tmp.s); - return -1; - } - - ssize_t length_written = hwrite(fp, r->seq, r->length); - if (hclose(fp) < 0 || length_written != r->length || - chmod(path_tmp.s, 0444) < 0 || - rename(path_tmp.s, path) < 0) { - hts_log_error("Creating reference at %s failed: %s", - path, strerror(errno)); - unlink(path_tmp.s); - } - } - - free(path_tmp.s); - return 0; -} - -static void cram_ref_incr_locked(refs_t *r, int id) { - RP("%d INC REF %d, %d %p\n", gettid(), id, - (int)(id>=0 && r->ref_id[id]?r->ref_id[id]->count+1:-999), - id>=0 && r->ref_id[id]?r->ref_id[id]->seq:(char *)1); - - if (id < 0 || !r->ref_id[id] || !r->ref_id[id]->seq) - return; - - if (r->last_id == id) - r->last_id = -1; - - ++r->ref_id[id]->count; -} - -void cram_ref_incr(refs_t *r, int id) { - pthread_mutex_lock(&r->lock); - cram_ref_incr_locked(r, id); - pthread_mutex_unlock(&r->lock); -} - -static void cram_ref_decr_locked(refs_t *r, int id) { - RP("%d DEC REF %d, %d %p\n", gettid(), id, - (int)(id>=0 && r->ref_id[id]?r->ref_id[id]->count-1:-999), - id>=0 && r->ref_id[id]?r->ref_id[id]->seq:(char *)1); - - if (id < 0 || !r->ref_id[id] || !r->ref_id[id]->seq) { - return; - } - - if (--r->ref_id[id]->count <= 0) { - assert(r->ref_id[id]->count == 0); - if (r->last_id >= 0) { - if (r->ref_id[r->last_id]->count <= 0 && - r->ref_id[r->last_id]->seq) { - RP("%d FREE REF %d (%p)\n", gettid(), - r->last_id, r->ref_id[r->last_id]->seq); - ref_entry_free_seq(r->ref_id[r->last_id]); - if (r->ref_id[r->last_id]->is_md5) r->ref_id[r->last_id]->length = 0; - } - } - r->last_id = id; - } -} - -void cram_ref_decr(refs_t *r, int id) { - pthread_mutex_lock(&r->lock); - cram_ref_decr_locked(r, id); - pthread_mutex_unlock(&r->lock); -} - -/* - * Used by cram_ref_load and cram_get_ref. The file handle will have - * already been opened, so we can catch it. The ref_entry *e informs us - * of whether this is a multi-line fasta file or a raw MD5 style file. - * Either way we create a single contiguous sequence. - * - * Returns all or part of a reference sequence on success (malloced); - * NULL on failure. - */ -static char *load_ref_portion(BGZF *fp, ref_entry *e, int start, int end) { - off_t offset, len; - char *seq; - - if (end < start) - end = start; - - /* - * Compute locations in file. This is trivial for the MD5 files, but - * is still necessary for the fasta variants. - * - * Note the offset here, as with faidx, has the assumption that white- - * space (the diff between line_length and bases_per_line) only occurs - * at the end of a line of text. - */ - offset = e->line_length - ? e->offset + (start-1)/e->bases_per_line * e->line_length + - (start-1) % e->bases_per_line - : start-1; - - len = (e->line_length - ? e->offset + (end-1)/e->bases_per_line * e->line_length + - (end-1) % e->bases_per_line - : end-1) - offset + 1; - - if (bgzf_useek(fp, offset, SEEK_SET) < 0) { - perror("bgzf_useek() on reference file"); - return NULL; - } - - if (len == 0 || !(seq = malloc(len))) { - return NULL; - } - - if (len != bgzf_read(fp, seq, len)) { - perror("bgzf_read() on reference file"); - free(seq); - return NULL; - } - - /* Strip white-space if required. */ - if (len != end-start+1) { - hts_pos_t i, j; - char *cp = seq; - char *cp_to; - - // Copy up to the first white-space, and then repeatedly just copy - // bases_per_line verbatim, and use the slow method to end again. - // - // This may seem excessive, but this code can be a significant - // portion of total CRAM decode CPU time for shallow data sets. - for (i = j = 0; i < len; i++) { - if (!isspace_c(cp[i])) - cp[j++] = cp[i] & ~0x20; - else - break; - } - while (i < len && isspace_c(cp[i])) - i++; - while (i < len - e->line_length) { - hts_pos_t j_end = j + e->bases_per_line; - while (j < j_end) - cp[j++] = cp[i++] & ~0x20; // toupper equiv - i += e->line_length - e->bases_per_line; - } - for (; i < len; i++) { - if (!isspace_c(cp[i])) - cp[j++] = cp[i] & ~0x20; - } - - cp_to = cp+j; - - if (cp_to - seq != end-start+1) { - hts_log_error("Malformed reference file"); - free(seq); - return NULL; - } - } else { - int i; - for (i = 0; i < len; i++) { - seq[i] = toupper_c(seq[i]); - } - } - - return seq; -} - -/* - * Load the entire reference 'id'. - * This also increments the reference count by 1. - * - * Returns ref_entry on success; - * NULL on failure - */ -ref_entry *cram_ref_load(refs_t *r, int id, int is_md5) { - ref_entry *e = r->ref_id[id]; - int start = 1, end = e->length; - char *seq; - - if (e->seq) { - return e; - } - - assert(e->count == 0); - - if (r->last) { -#ifdef REF_DEBUG - int idx = 0; - for (idx = 0; idx < r->nref; idx++) - if (r->last == r->ref_id[idx]) - break; - RP("%d cram_ref_load DECR %d\n", gettid(), idx); -#endif - assert(r->last->count > 0); - if (--r->last->count <= 0) { - RP("%d FREE REF %d (%p)\n", gettid(), id, r->ref_id[id]->seq); - if (r->last->seq) - ref_entry_free_seq(r->last); - } - } - - if (!r->fn) - return NULL; - - /* Open file if it's not already the current open reference */ - if (strcmp(r->fn, e->fn) || r->fp == NULL) { - if (r->fp) - if (bgzf_close(r->fp) != 0) - return NULL; - r->fn = e->fn; - if (!(r->fp = bgzf_open_ref(r->fn, "r", is_md5))) - return NULL; - } - - RP("%d Loading ref %d (%d..%d)\n", gettid(), id, start, end); - - if (!(seq = load_ref_portion(r->fp, e, start, end))) { - return NULL; - } - - RP("%d Loaded ref %d (%d..%d) = %p\n", gettid(), id, start, end, seq); - - RP("%d INC REF %d, %"PRId64"\n", gettid(), id, (e->count+1)); - e->seq = seq; - e->mf = NULL; - e->count++; - - /* - * Also keep track of last used ref so incr/decr loops on the same - * sequence don't cause load/free loops. - */ - RP("%d cram_ref_load INCR %d => %"PRId64"\n", gettid(), id, e->count+1); - r->last = e; - e->count++; - - return e; -} - -/* - * Returns a portion of a reference sequence from start to end inclusive. - * The returned pointer is owned by either the cram_file fd or by the - * internal refs_t structure and should not be freed by the caller. - * - * The difference is whether or not this refs_t is in use by just the one - * cram_fd or by multiples, or whether we have multiple threads accessing - * references. In either case fd->shared will be true and we start using - * reference counting to track the number of users of a specific reference - * sequence. - * - * Otherwise the ref seq returned is allocated as part of cram_fd itself - * and will be freed up on the next call to cram_get_ref or cram_close. - * - * To return the entire reference sequence, specify start as 1 and end - * as 0. - * - * To cease using a reference, call cram_ref_decr(). - * - * Returns reference on success, - * NULL on failure - */ -char *cram_get_ref(cram_fd *fd, int id, int start, int end) { - ref_entry *r; - char *seq; - int ostart = start; - - if (id == -1 || start < 1) - return NULL; - - /* FIXME: axiomatic query of r->seq being true? - * Or shortcut for unsorted data where we load once and never free? - */ - - //fd->shared_ref = 1; // hard code for now to simplify things - - pthread_mutex_lock(&fd->ref_lock); - - RP("%d cram_get_ref on fd %p, id %d, range %d..%d\n", gettid(), fd, id, start, end); - - /* - * Unsorted data implies we want to fetch an entire reference at a time. - * We just deal with this at the moment by claiming we're sharing - * references instead, which has the same requirement. - */ - if (fd->unsorted) - fd->shared_ref = 1; - - - /* Sanity checking: does this ID exist? */ - if (id >= fd->refs->nref) { - hts_log_error("No reference found for id %d", id); - pthread_mutex_unlock(&fd->ref_lock); - return NULL; - } - - if (!fd->refs || !fd->refs->ref_id[id]) { - hts_log_error("No reference found for id %d", id); - pthread_mutex_unlock(&fd->ref_lock); - return NULL; - } - - if (!(r = fd->refs->ref_id[id])) { - hts_log_error("No reference found for id %d", id); - pthread_mutex_unlock(&fd->ref_lock); - return NULL; - } - - - /* - * It has an entry, but may not have been populated yet. - * Any manually loaded .fai files have their lengths known. - * A ref entry computed from @SQ lines (M5 or UR field) will have - * r->length == 0 unless it's been loaded once and verified that we have - * an on-disk filename for it. - * - * 19 Sep 2013: Moved the lock here as the cram_populate_ref code calls - * open_path_mfile and libcurl, which isn't multi-thread safe unless I - * rewrite my code to have one curl handle per thread. - */ - pthread_mutex_lock(&fd->refs->lock); - if (r->length == 0) { - if (fd->ref_fn) - hts_log_warning("Reference file given, but ref '%s' not present", - r->name); - if (cram_populate_ref(fd, id, r) == -1) { - hts_log_warning("Failed to populate reference for id %d", id); - pthread_mutex_unlock(&fd->refs->lock); - pthread_mutex_unlock(&fd->ref_lock); - return NULL; - } - r = fd->refs->ref_id[id]; - if (fd->unsorted) - cram_ref_incr_locked(fd->refs, id); - } - - - /* - * We now know that we the filename containing the reference, so check - * for limits. If it's over half the reference we'll load all of it in - * memory as this will speed up subsequent calls. - */ - if (end < 1) - end = r->length; - if (end >= r->length) - end = r->length; - - if (end - start >= 0.5*r->length || fd->shared_ref) { - start = 1; - end = r->length; - } - - /* - * Maybe we have it cached already? If so use it. - * - * Alternatively if we don't have the sequence but we're sharing - * references and/or are asking for the entire length of it, then - * load the full reference into the refs structure and return - * a pointer to that one instead. - */ - if (fd->shared_ref || r->seq || (start == 1 && end == r->length)) { - char *cp; - - if (id >= 0) { - if (r->seq) { - cram_ref_incr_locked(fd->refs, id); - } else { - ref_entry *e; - if (!(e = cram_ref_load(fd->refs, id, r->is_md5))) { - pthread_mutex_unlock(&fd->refs->lock); - pthread_mutex_unlock(&fd->ref_lock); - return NULL; - } - - /* unsorted data implies cache ref indefinitely, to avoid - * continually loading and unloading. - */ - if (fd->unsorted) - cram_ref_incr_locked(fd->refs, id); - } - - fd->ref = NULL; /* We never access it directly */ - fd->ref_start = 1; - fd->ref_end = r->length; - fd->ref_id = id; - - cp = fd->refs->ref_id[id]->seq + ostart-1; - } else { - fd->ref = NULL; - cp = NULL; - } - - RP("%d cram_get_ref returning for id %d, count %d\n", gettid(), id, (int)r->count); - - pthread_mutex_unlock(&fd->refs->lock); - pthread_mutex_unlock(&fd->ref_lock); - return cp; - } - - /* - * Otherwise we're not sharing, we don't have a copy of it already and - * we're only asking for a small portion of it. - * - * In this case load up just that segment ourselves, freeing any old - * small segments in the process. - */ - - /* Unmapped ref ID */ - if (id < 0 || !fd->refs->fn) { - if (fd->ref_free) { - free(fd->ref_free); - fd->ref_free = NULL; - } - fd->ref = NULL; - fd->ref_id = id; - pthread_mutex_unlock(&fd->refs->lock); - pthread_mutex_unlock(&fd->ref_lock); - return NULL; - } - - /* Open file if it's not already the current open reference */ - if (strcmp(fd->refs->fn, r->fn) || fd->refs->fp == NULL) { - if (fd->refs->fp) - if (bgzf_close(fd->refs->fp) != 0) - return NULL; - fd->refs->fn = r->fn; - if (!(fd->refs->fp = bgzf_open_ref(fd->refs->fn, "r", r->is_md5))) { - pthread_mutex_unlock(&fd->refs->lock); - pthread_mutex_unlock(&fd->ref_lock); - return NULL; - } - } - - if (!(fd->ref = load_ref_portion(fd->refs->fp, r, start, end))) { - pthread_mutex_unlock(&fd->refs->lock); - pthread_mutex_unlock(&fd->ref_lock); - return NULL; - } - - if (fd->ref_free) - free(fd->ref_free); - - fd->ref_id = id; - fd->ref_start = start; - fd->ref_end = end; - fd->ref_free = fd->ref; - seq = fd->ref; - - pthread_mutex_unlock(&fd->refs->lock); - pthread_mutex_unlock(&fd->ref_lock); - - return seq ? seq + ostart - start : NULL; -} - -/* - * If fd has been opened for reading, it may be permitted to specify 'fn' - * as NULL and let the code auto-detect the reference by parsing the - * SAM header @SQ lines. - */ -int cram_load_reference(cram_fd *fd, char *fn) { - int ret = 0; - - if (fn) { - fd->refs = refs_load_fai(fd->refs, fn, - !(fd->embed_ref>0 && fd->mode == 'r')); - fn = fd->refs ? fd->refs->fn : NULL; - if (!fn) - ret = -1; - sanitise_SQ_lines(fd); - } - fd->ref_fn = fn; - - if ((!fd->refs || (fd->refs->nref == 0 && !fn)) && fd->header) { - if (fd->refs) - refs_free(fd->refs); - if (!(fd->refs = refs_create())) - return -1; - if (-1 == refs_from_header(fd)) - return -1; - } - - if (fd->header) - if (-1 == refs2id(fd->refs, fd->header)) - return -1; - - return ret; -} - -/* ---------------------------------------------------------------------- - * Containers - */ - -/* - * Creates a new container, specifying the maximum number of slices - * and records permitted. - * - * Returns cram_container ptr on success - * NULL on failure - */ -cram_container *cram_new_container(int nrec, int nslice) { - cram_container *c = calloc(1, sizeof(*c)); - enum cram_DS_ID id; - - if (!c) - return NULL; - - c->curr_ref = -2; - - c->max_c_rec = nrec * nslice; - c->curr_c_rec = 0; - - c->max_rec = nrec; - c->record_counter = 0; - c->num_bases = 0; - c->s_num_bases = 0; - - c->max_slice = nslice; - c->curr_slice = 0; - - c->pos_sorted = 1; - c->max_apos = 0; - c->multi_seq = 0; - c->qs_seq_orient = 1; - c->no_ref = 0; - c->embed_ref = -1; // automatic selection - - c->bams = NULL; - - if (!(c->slices = calloc(nslice != 0 ? nslice : 1, sizeof(cram_slice *)))) - goto err; - c->slice = NULL; - - if (!(c->comp_hdr = cram_new_compression_header())) - goto err; - c->comp_hdr_block = NULL; - - for (id = DS_RN; id < DS_TN; id++) - if (!(c->stats[id] = cram_stats_create())) goto err; - - //c->aux_B_stats = cram_stats_create(); - - if (!(c->tags_used = kh_init(m_tagmap))) - goto err; - c->refs_used = 0; - c->ref_free = 0; - - return c; - - err: - if (c) { - if (c->slices) - free(c->slices); - free(c); - } - return NULL; -} - -static void free_bam_list(bam_seq_t **bams, int max_rec) { - int i; - for (i = 0; i < max_rec; i++) - bam_free(bams[i]); - - free(bams); -} - -void cram_free_container(cram_container *c) { - enum cram_DS_ID id; - int i; - - if (!c) - return; - - if (c->refs_used) - free(c->refs_used); - - if (c->landmark) - free(c->landmark); - - if (c->comp_hdr) - cram_free_compression_header(c->comp_hdr); - - if (c->comp_hdr_block) - cram_free_block(c->comp_hdr_block); - - // Free the slices; filled out by encoder only - if (c->slices) { - for (i = 0; i < c->max_slice; i++) { - if (c->slices[i]) - cram_free_slice(c->slices[i]); - if (c->slices[i] == c->slice) - c->slice = NULL; - } - free(c->slices); - } - - // Free the current slice; set by both encoder & decoder - if (c->slice) { - cram_free_slice(c->slice); - c->slice = NULL; - } - - for (id = DS_RN; id < DS_TN; id++) - if (c->stats[id]) cram_stats_free(c->stats[id]); - - //if (c->aux_B_stats) cram_stats_free(c->aux_B_stats); - - if (c->tags_used) { - khint_t k; - - for (k = kh_begin(c->tags_used); k != kh_end(c->tags_used); k++) { - if (!kh_exist(c->tags_used, k)) - continue; - - cram_tag_map *tm = (cram_tag_map *)kh_val(c->tags_used, k); - if (tm) { - cram_codec *c = tm->codec; - - if (c) c->free(c); - - // If tm->blk or tm->blk2 is set, then we haven't yet got to - // cram_encode_container which copies the blocks to s->aux_block - // and NULLifies tm->blk*. In this case we failed to complete - // the container construction, so we have to free up our partially - // converted CRAM. - cram_free_block(tm->blk); - cram_free_block(tm->blk2); - free(tm); - } - } - - kh_destroy(m_tagmap, c->tags_used); - } - - if (c->ref_free) - free(c->ref); - - if (c->bams) - free_bam_list(c->bams, c->max_c_rec); - - free(c); -} - -/* - * Reads a container header. - * - * Returns cram_container on success - * NULL on failure or no container left (fd->err == 0). - */ -cram_container *cram_read_container(cram_fd *fd) { - cram_container c2, *c; - int i, s; - size_t rd = 0; - uint32_t crc = 0; - - fd->err = 0; - fd->eof = 0; - - memset(&c2, 0, sizeof(c2)); - if (CRAM_MAJOR_VERS(fd->version) == 1) { - if ((s = fd->vv.varint_decode32_crc(fd, &c2.length, &crc)) == -1) { - fd->eof = fd->empty_container ? 1 : 2; - return NULL; - } else { - rd+=s; - } - } else if (CRAM_MAJOR_VERS(fd->version) < 4) { - uint32_t len; - if ((s = int32_decode(fd, &c2.length)) == -1) { - if (CRAM_MAJOR_VERS(fd->version) == 2 && - CRAM_MINOR_VERS(fd->version) == 0) - fd->eof = 1; // EOF blocks arrived in v2.1 - else - fd->eof = fd->empty_container ? 1 : 2; - return NULL; - } else { - rd+=s; - } - len = le_int4(c2.length); - crc = crc32(0L, (unsigned char *)&len, 4); - } else { - if ((s = fd->vv.varint_decode32_crc(fd, &c2.length, &crc)) == -1) { - fd->eof = fd->empty_container ? 1 : 2; - return NULL; - } else { - rd+=s; - } - } - if ((s = fd->vv.varint_decode32s_crc(fd, &c2.ref_seq_id, &crc)) == -1) return NULL; else rd+=s; - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - int64_t i64; - if ((s = fd->vv.varint_decode64_crc(fd, &i64, &crc))== -1) return NULL; else rd+=s; - c2.ref_seq_start = i64; - if ((s = fd->vv.varint_decode64_crc(fd, &i64, &crc)) == -1) return NULL; else rd+=s; - c2.ref_seq_span = i64; - } else { - int32_t i32; - if ((s = fd->vv.varint_decode32_crc(fd, &i32, &crc))== -1) return NULL; else rd+=s; - c2.ref_seq_start = i32; - if ((s = fd->vv.varint_decode32_crc(fd, &i32, &crc)) == -1) return NULL; else rd+=s; - c2.ref_seq_span = i32; - } - if ((s = fd->vv.varint_decode32_crc(fd, &c2.num_records, &crc)) == -1) return NULL; else rd+=s; - - if (CRAM_MAJOR_VERS(fd->version) == 1) { - c2.record_counter = 0; - c2.num_bases = 0; - } else { - if (CRAM_MAJOR_VERS(fd->version) >= 3) { - if ((s = fd->vv.varint_decode64_crc(fd, &c2.record_counter, &crc)) == -1) - return NULL; - else - rd += s; - } else { - int32_t i32; - if ((s = fd->vv.varint_decode32_crc(fd, &i32, &crc)) == -1) - return NULL; - else - rd += s; - c2.record_counter = i32; - } - - if ((s = fd->vv.varint_decode64_crc(fd, &c2.num_bases, &crc))== -1) - return NULL; - else - rd += s; - } - if ((s = fd->vv.varint_decode32_crc(fd, &c2.num_blocks, &crc)) == -1) - return NULL; - else - rd+=s; - if ((s = fd->vv.varint_decode32_crc(fd, &c2.num_landmarks, &crc))== -1) - return NULL; - else - rd+=s; - - if (c2.num_landmarks < 0 || c2.num_landmarks >= SIZE_MAX / sizeof(int32_t)) - return NULL; - - if (!(c = calloc(1, sizeof(*c)))) - return NULL; - - *c = c2; -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (c->num_landmarks > FUZZ_ALLOC_LIMIT/sizeof(int32_t)) { - fd->err = errno = ENOMEM; - cram_free_container(c); - return NULL; - } -#endif - if (c->num_landmarks && !(c->landmark = malloc(c->num_landmarks * sizeof(int32_t)))) { - fd->err = errno; - cram_free_container(c); - return NULL; - } - for (i = 0; i < c->num_landmarks; i++) { - if ((s = fd->vv.varint_decode32_crc(fd, &c->landmark[i], &crc)) == -1) { - cram_free_container(c); - return NULL; - } else { - rd += s; - } - } - - if (CRAM_MAJOR_VERS(fd->version) >= 3) { - if (-1 == int32_decode(fd, (int32_t *)&c->crc32)) { - cram_free_container(c); - return NULL; - } else { - rd+=4; - } - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - // Pretend the CRC was OK so the fuzzer doesn't have to get it right - crc = c->crc32; -#endif - - if (crc != c->crc32) { - hts_log_error("Container header CRC32 failure"); - cram_free_container(c); - return NULL; - } - } - - c->offset = rd; - c->slices = NULL; - c->slice = NULL; - c->curr_slice = 0; - c->max_slice = c->num_landmarks; - c->slice_rec = 0; - c->curr_rec = 0; - c->max_rec = 0; - - if (c->ref_seq_id == -2) { - c->multi_seq = 1; - fd->multi_seq = 1; - } - - fd->empty_container = - (c->num_records == 0 && - c->ref_seq_id == -1 && - c->ref_seq_start == 0x454f46 /* EOF */) ? 1 : 0; - - return c; -} - - -/* MAXIMUM storage size needed for the container. */ -int cram_container_size(cram_container *c) { - return 55 + 5*c->num_landmarks; -} - - -/* - * Stores the container structure in dat and returns *size as the - * number of bytes written to dat[]. The input size of dat is also - * held in *size and should be initialised to cram_container_size(c). - * - * Returns 0 on success; - * -1 on failure - */ -int cram_store_container(cram_fd *fd, cram_container *c, char *dat, int *size) -{ - char *cp = (char *)dat; - int i; - - // Check the input buffer is large enough according to our stated - // requirements. (NOTE: it may actually take less.) - if (cram_container_size(c) > *size) - return -1; - - if (CRAM_MAJOR_VERS(fd->version) == 1) { - cp += itf8_put(cp, c->length); - } else { - *(int32_t *)cp = le_int4(c->length); - cp += 4; - } - if (c->multi_seq) { - cp += fd->vv.varint_put32(cp, NULL, -2); - cp += fd->vv.varint_put32(cp, NULL, 0); - cp += fd->vv.varint_put32(cp, NULL, 0); - } else { - cp += fd->vv.varint_put32s(cp, NULL, c->ref_seq_id); - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - cp += fd->vv.varint_put64(cp, NULL, c->ref_seq_start); - cp += fd->vv.varint_put64(cp, NULL, c->ref_seq_span); - } else { - cp += fd->vv.varint_put32(cp, NULL, c->ref_seq_start); - cp += fd->vv.varint_put32(cp, NULL, c->ref_seq_span); - } - } - cp += fd->vv.varint_put32(cp, NULL, c->num_records); - if (CRAM_MAJOR_VERS(fd->version) == 2) { - cp += fd->vv.varint_put64(cp, NULL, c->record_counter); - } else if (CRAM_MAJOR_VERS(fd->version) >= 3) { - cp += fd->vv.varint_put32(cp, NULL, c->record_counter); - } - cp += fd->vv.varint_put64(cp, NULL, c->num_bases); - cp += fd->vv.varint_put32(cp, NULL, c->num_blocks); - cp += fd->vv.varint_put32(cp, NULL, c->num_landmarks); - for (i = 0; i < c->num_landmarks; i++) - cp += fd->vv.varint_put32(cp, NULL, c->landmark[i]); - - if (CRAM_MAJOR_VERS(fd->version) >= 3) { - c->crc32 = crc32(0L, (uc *)dat, cp-dat); - cp[0] = c->crc32 & 0xff; - cp[1] = (c->crc32 >> 8) & 0xff; - cp[2] = (c->crc32 >> 16) & 0xff; - cp[3] = (c->crc32 >> 24) & 0xff; - cp += 4; - } - - *size = cp-dat; // actual used size - - return 0; -} - - -/* - * Writes a container structure. - * - * Returns 0 on success - * -1 on failure - */ -int cram_write_container(cram_fd *fd, cram_container *c) { - char buf_a[1024], *buf = buf_a, *cp; - int i; - - if (61 + c->num_landmarks * 10 >= 1024) { - buf = malloc(61 + c->num_landmarks * 10); - if (!buf) - return -1; - } - cp = buf; - - if (CRAM_MAJOR_VERS(fd->version) == 1) { - cp += itf8_put(cp, c->length); - } else if (CRAM_MAJOR_VERS(fd->version) <= 3) { - *(int32_t *)cp = le_int4(c->length); - cp += 4; - } else { - cp += fd->vv.varint_put32(cp, NULL, c->length); - } - if (c->multi_seq) { - cp += fd->vv.varint_put32(cp, NULL, (uint32_t)-2); - cp += fd->vv.varint_put32(cp, NULL, 0); - cp += fd->vv.varint_put32(cp, NULL, 0); - } else { - cp += fd->vv.varint_put32s(cp, NULL, c->ref_seq_id); - if (CRAM_MAJOR_VERS(fd->version) >= 4) { - cp += fd->vv.varint_put64(cp, NULL, c->ref_seq_start); - cp += fd->vv.varint_put64(cp, NULL, c->ref_seq_span); - } else { - cp += fd->vv.varint_put32(cp, NULL, c->ref_seq_start); - cp += fd->vv.varint_put32(cp, NULL, c->ref_seq_span); - } - } - cp += fd->vv.varint_put32(cp, NULL, c->num_records); - if (CRAM_MAJOR_VERS(fd->version) >= 3) - cp += fd->vv.varint_put64(cp, NULL, c->record_counter); - else - cp += fd->vv.varint_put32(cp, NULL, c->record_counter); - cp += fd->vv.varint_put64(cp, NULL, c->num_bases); - cp += fd->vv.varint_put32(cp, NULL, c->num_blocks); - cp += fd->vv.varint_put32(cp, NULL, c->num_landmarks); - for (i = 0; i < c->num_landmarks; i++) - cp += fd->vv.varint_put32(cp, NULL, c->landmark[i]); - - if (CRAM_MAJOR_VERS(fd->version) >= 3) { - c->crc32 = crc32(0L, (uc *)buf, cp-buf); - cp[0] = c->crc32 & 0xff; - cp[1] = (c->crc32 >> 8) & 0xff; - cp[2] = (c->crc32 >> 16) & 0xff; - cp[3] = (c->crc32 >> 24) & 0xff; - cp += 4; - } - - if (cp-buf != hwrite(fd->fp, buf, cp-buf)) { - if (buf != buf_a) - free(buf); - return -1; - } - - if (buf != buf_a) - free(buf); - - return 0; -} - -// common component shared by cram_flush_container{,_mt} -static int cram_flush_container2(cram_fd *fd, cram_container *c) { - int i, j; - - if (c->curr_slice > 0 && !c->slices) - return -1; - - //fprintf(stderr, "Writing container %d, sum %u\n", c->record_counter, sum); - - off_t c_offset = htell(fd->fp); // File offset of container - - /* Write the container struct itself */ - if (0 != cram_write_container(fd, c)) - return -1; - - off_t hdr_size = htell(fd->fp) - c_offset; - - /* And the compression header */ - if (0 != cram_write_block(fd, c->comp_hdr_block)) - return -1; - - /* Followed by the slice blocks */ - off_t file_offset = htell(fd->fp); - for (i = 0; i < c->curr_slice; i++) { - cram_slice *s = c->slices[i]; - off_t spos = file_offset - c_offset - hdr_size; - - if (0 != cram_write_block(fd, s->hdr_block)) - return -1; - - for (j = 0; j < s->hdr->num_blocks; j++) { - if (0 != cram_write_block(fd, s->block[j])) - return -1; - } - - file_offset = htell(fd->fp); - off_t sz = file_offset - c_offset - hdr_size - spos; - - if (fd->idxfp) { - if (cram_index_slice(fd, c, s, fd->idxfp, c_offset, spos, sz) < 0) - return -1; - } - } - - return 0; -} - -/* - * Flushes a completely or partially full container to disk, writing - * container structure, header and blocks. This also calls the encoder - * functions. - * - * Returns 0 on success - * -1 on failure - */ -int cram_flush_container(cram_fd *fd, cram_container *c) { - /* Encode the container blocks and generate compression header */ - if (0 != cram_encode_container(fd, c)) - return -1; - - return cram_flush_container2(fd, c); -} - -typedef struct { - cram_fd *fd; - cram_container *c; -} cram_job; - -void *cram_flush_thread(void *arg) { - cram_job *j = (cram_job *)arg; - - /* Encode the container blocks and generate compression header */ - if (0 != cram_encode_container(j->fd, j->c)) { - hts_log_error("Call to cram_encode_container failed"); - return NULL; - } - - return arg; -} - -static int cram_flush_result(cram_fd *fd) { - int i, ret = 0; - hts_tpool_result *r; - cram_container *lc = NULL; - - // NB: we can have one result per slice, not per container, - // so we need to free the container only after all slices - // within it have been freed. (Automatic via reference counting.) - while ((r = hts_tpool_next_result(fd->rqueue))) { - cram_job *j = (cram_job *)hts_tpool_result_data(r); - cram_container *c; - - if (!j) { - hts_tpool_delete_result(r, 0); - return -1; - } - - fd = j->fd; - c = j->c; - - if (fd->mode == 'w') - if (0 != cram_flush_container2(fd, c)) - return -1; - - // Free the slices; filled out by encoder only - if (c->slices) { - for (i = 0; i < c->max_slice; i++) { - if (c->slices[i]) - cram_free_slice(c->slices[i]); - if (c->slices[i] == c->slice) - c->slice = NULL; - c->slices[i] = NULL; - } - } - - // Free the current slice; set by both encoder & decoder - if (c->slice) { - cram_free_slice(c->slice); - c->slice = NULL; - } - c->curr_slice = 0; - - // Our jobs will be in order, so we free the last - // container when our job has switched to a new one. - if (c != lc) { - if (lc) { - if (fd->ctr == lc) - fd->ctr = NULL; - if (fd->ctr_mt == lc) - fd->ctr_mt = NULL; - cram_free_container(lc); - } - lc = c; - } - - hts_tpool_delete_result(r, 1); - } - if (lc) { - if (fd->ctr == lc) - fd->ctr = NULL; - if (fd->ctr_mt == lc) - fd->ctr_mt = NULL; - cram_free_container(lc); - } - - return ret; -} - -// Note: called while metrics_lock is held. -// Will be left in this state too, but may temporarily unlock. -void reset_metrics(cram_fd *fd) { - int i; - - if (fd->pool) { - // If multi-threaded we have multiple blocks being - // compressed already and several on the to-do list - // (fd->rqueue->pending). It's tricky to reset the - // metrics exactly the correct point, so instead we - // just flush the pool, reset, and then continue again. - - // Don't bother starting a new trial before then though. - for (i = 0; i < DS_END; i++) { - cram_metrics *m = fd->m[i]; - if (!m) - continue; - m->next_trial = 999; - } - - pthread_mutex_unlock(&fd->metrics_lock); - hts_tpool_process_flush(fd->rqueue); - pthread_mutex_lock(&fd->metrics_lock); - } - - for (i = 0; i < DS_END; i++) { - cram_metrics *m = fd->m[i]; - if (!m) - continue; - - m->trial = NTRIALS; - m->next_trial = TRIAL_SPAN; - m->revised_method = 0; - m->unpackable = 0; - - memset(m->sz, 0, sizeof(m->sz)); - } -} - -int cram_flush_container_mt(cram_fd *fd, cram_container *c) { - cram_job *j; - - // At the junction of mapped to unmapped data the compression - // methods may need to change due to very different statistical - // properties; particularly BA if minhash sorted. - // - // However with threading we'll have several in-flight blocks - // arriving out of order. - // - // So we do one trial reset of NThreads to last for NThreads - // duration to get us over this transition period, followed - // by another retrial of the usual ntrials & trial span. - pthread_mutex_lock(&fd->metrics_lock); - if (c->n_mapped < 0.3*c->curr_rec && - fd->last_mapped > 0.7*c->max_rec) { - reset_metrics(fd); - } - fd->last_mapped = c->n_mapped * (c->max_rec+1)/(c->curr_rec+1) ; - pthread_mutex_unlock(&fd->metrics_lock); - - if (!fd->pool) - return cram_flush_container(fd, c); - - if (!(j = malloc(sizeof(*j)))) - return -1; - j->fd = fd; - j->c = c; - - // Flush the job. Note our encoder queue may be full, so we - // either have to keep trying in non-blocking mode (what we do) or - // use a dedicated separate thread for draining the queue. - for (;;) { - errno = 0; - hts_tpool_dispatch2(fd->pool, fd->rqueue, cram_flush_thread, j, 1); - int pending = (errno == EAGAIN); - if (cram_flush_result(fd) != 0) - return -1; - if (!pending) - break; - - usleep(1000); - } - - return 0; -} - -/* ---------------------------------------------------------------------- - * Compression headers; the first part of the container - */ - -/* - * Creates a new blank container compression header - * - * Returns header ptr on success - * NULL on failure - */ -cram_block_compression_hdr *cram_new_compression_header(void) { - cram_block_compression_hdr *hdr = calloc(1, sizeof(*hdr)); - if (!hdr) - return NULL; - - if (!(hdr->TD_blk = cram_new_block(CORE, 0))) { - free(hdr); - return NULL; - } - - if (!(hdr->TD_hash = kh_init(m_s2i))) { - cram_free_block(hdr->TD_blk); - free(hdr); - return NULL; - } - - if (!(hdr->TD_keys = string_pool_create(8192))) { - kh_destroy(m_s2i, hdr->TD_hash); - cram_free_block(hdr->TD_blk); - free(hdr); - return NULL; - } - - return hdr; -} - -void cram_free_compression_header(cram_block_compression_hdr *hdr) { - int i; - - if (hdr->landmark) - free(hdr->landmark); - - if (hdr->preservation_map) - kh_destroy(map, hdr->preservation_map); - - for (i = 0; i < CRAM_MAP_HASH; i++) { - cram_map *m, *m2; - for (m = hdr->rec_encoding_map[i]; m; m = m2) { - m2 = m->next; - if (m->codec) - m->codec->free(m->codec); - free(m); - } - } - - for (i = 0; i < CRAM_MAP_HASH; i++) { - cram_map *m, *m2; - for (m = hdr->tag_encoding_map[i]; m; m = m2) { - m2 = m->next; - if (m->codec) - m->codec->free(m->codec); - free(m); - } - } - - for (i = 0; i < DS_END; i++) { - if (hdr->codecs[i]) - hdr->codecs[i]->free(hdr->codecs[i]); - } - - if (hdr->TL) - free(hdr->TL); - if (hdr->TD_blk) - cram_free_block(hdr->TD_blk); - if (hdr->TD_hash) - kh_destroy(m_s2i, hdr->TD_hash); - if (hdr->TD_keys) - string_pool_destroy(hdr->TD_keys); - - free(hdr); -} - - -/* ---------------------------------------------------------------------- - * Slices and slice headers - */ - -void cram_free_slice_header(cram_block_slice_hdr *hdr) { - if (!hdr) - return; - - if (hdr->block_content_ids) - free(hdr->block_content_ids); - - free(hdr); - - return; -} - -void cram_free_slice(cram_slice *s) { - if (!s) - return; - - if (s->hdr_block) - cram_free_block(s->hdr_block); - - if (s->block) { - int i; - - if (s->hdr) { - for (i = 0; i < s->hdr->num_blocks; i++) { - if (i > 0 && s->block[i] == s->block[0]) - continue; - cram_free_block(s->block[i]); - } - } - free(s->block); - } - - { - // Normally already copied into s->block[], but potentially still - // here if we error part way through cram_encode_slice. - int i; - for (i = 0; i < s->naux_block; i++) - cram_free_block(s->aux_block[i]); - } - - if (s->block_by_id) - free(s->block_by_id); - - if (s->hdr) - cram_free_slice_header(s->hdr); - - if (s->seqs_blk) - cram_free_block(s->seqs_blk); - - if (s->qual_blk) - cram_free_block(s->qual_blk); - - if (s->name_blk) - cram_free_block(s->name_blk); - - if (s->aux_blk) - cram_free_block(s->aux_blk); - - if (s->base_blk) - cram_free_block(s->base_blk); - - if (s->soft_blk) - cram_free_block(s->soft_blk); - - if (s->cigar) - free(s->cigar); - - if (s->crecs) - free(s->crecs); - - if (s->features) - free(s->features); - - if (s->TN) - free(s->TN); - - if (s->pair_keys) - string_pool_destroy(s->pair_keys); - - if (s->pair[0]) - kh_destroy(m_s2i, s->pair[0]); - if (s->pair[1]) - kh_destroy(m_s2i, s->pair[1]); - - if (s->aux_block) - free(s->aux_block); - - free(s); -} - -/* - * Creates a new empty slice in memory, for subsequent writing to - * disk. - * - * Returns cram_slice ptr on success - * NULL on failure - */ -cram_slice *cram_new_slice(enum cram_content_type type, int nrecs) { - cram_slice *s = calloc(1, sizeof(*s)); - if (!s) - return NULL; - - if (!(s->hdr = (cram_block_slice_hdr *)calloc(1, sizeof(*s->hdr)))) - goto err; - s->hdr->content_type = type; - - s->hdr_block = NULL; - s->block = NULL; - s->block_by_id = NULL; - s->last_apos = 0; - if (!(s->crecs = malloc(nrecs * sizeof(cram_record)))) goto err; - s->cigar_alloc = 1024; - if (!(s->cigar = malloc(s->cigar_alloc * sizeof(*s->cigar)))) goto err; - s->ncigar = 0; - - if (!(s->seqs_blk = cram_new_block(EXTERNAL, 0))) goto err; - if (!(s->qual_blk = cram_new_block(EXTERNAL, DS_QS))) goto err; - if (!(s->name_blk = cram_new_block(EXTERNAL, DS_RN))) goto err; - if (!(s->aux_blk = cram_new_block(EXTERNAL, DS_aux))) goto err; - if (!(s->base_blk = cram_new_block(EXTERNAL, DS_IN))) goto err; - if (!(s->soft_blk = cram_new_block(EXTERNAL, DS_SC))) goto err; - - s->features = NULL; - s->nfeatures = s->afeatures = 0; - -#ifndef TN_external - s->TN = NULL; - s->nTN = s->aTN = 0; -#endif - - // Volatile keys as we do realloc in dstring - if (!(s->pair_keys = string_pool_create(8192))) goto err; - if (!(s->pair[0] = kh_init(m_s2i))) goto err; - if (!(s->pair[1] = kh_init(m_s2i))) goto err; - -#ifdef BA_external - s->BA_len = 0; -#endif - - return s; - - err: - if (s) - cram_free_slice(s); - - return NULL; -} - -/* - * Loads an entire slice. - * FIXME: In 1.0 the native unit of slices within CRAM is broken - * as slices contain references to objects in other slices. - * To work around this while keeping the slice oriented outer loop - * we read all slices and stitch them together into a fake large - * slice instead. - * - * Returns cram_slice ptr on success - * NULL on failure - */ -cram_slice *cram_read_slice(cram_fd *fd) { - cram_block *b = cram_read_block(fd); - cram_slice *s = calloc(1, sizeof(*s)); - int i, n, max_id, min_id; - - if (!b || !s) - goto err; - - s->hdr_block = b; - switch (b->content_type) { - case MAPPED_SLICE: - case UNMAPPED_SLICE: - if (!(s->hdr = cram_decode_slice_header(fd, b))) - goto err; - break; - - default: - hts_log_error("Unexpected block of type %s", - cram_content_type2str(b->content_type)); - goto err; - } - - if (s->hdr->num_blocks < 1) { - hts_log_error("Slice does not include any data blocks"); - goto err; - } - - s->block = calloc(n = s->hdr->num_blocks, sizeof(*s->block)); - if (!s->block) - goto err; - - for (max_id = i = 0, min_id = INT_MAX; i < n; i++) { - if (!(s->block[i] = cram_read_block(fd))) - goto err; - - if (s->block[i]->content_type == EXTERNAL) { - if (max_id < s->block[i]->content_id) - max_id = s->block[i]->content_id; - if (min_id > s->block[i]->content_id) - min_id = s->block[i]->content_id; - } - } - - if (!(s->block_by_id = calloc(512, sizeof(s->block[0])))) - goto err; - - for (i = 0; i < n; i++) { - if (s->block[i]->content_type != EXTERNAL) - continue; - uint32_t v = s->block[i]->content_id; - if (v >= 256) - v = 256 + v % 251; - s->block_by_id[v] = s->block[i]; - } - - /* Initialise encoding/decoding tables */ - s->cigar_alloc = 1024; - if (!(s->cigar = malloc(s->cigar_alloc * sizeof(*s->cigar)))) goto err; - s->ncigar = 0; - - if (!(s->seqs_blk = cram_new_block(EXTERNAL, 0))) goto err; - if (!(s->qual_blk = cram_new_block(EXTERNAL, DS_QS))) goto err; - if (!(s->name_blk = cram_new_block(EXTERNAL, DS_RN))) goto err; - if (!(s->aux_blk = cram_new_block(EXTERNAL, DS_aux))) goto err; - if (!(s->base_blk = cram_new_block(EXTERNAL, DS_IN))) goto err; - if (!(s->soft_blk = cram_new_block(EXTERNAL, DS_SC))) goto err; - - s->crecs = NULL; - - s->last_apos = s->hdr->ref_seq_start; - s->decode_md = fd->decode_md; - - return s; - - err: - if (b) - cram_free_block(b); - if (s) { - s->hdr_block = NULL; - cram_free_slice(s); - } - return NULL; -} - - -/* ---------------------------------------------------------------------- - * CRAM file definition (header) - */ - -/* - * Reads a CRAM file definition structure. - * Returns file_def ptr on success - * NULL on failure - */ -cram_file_def *cram_read_file_def(cram_fd *fd) { - cram_file_def *def = malloc(sizeof(*def)); - if (!def) - return NULL; - - if (26 != hread(fd->fp, &def->magic[0], 26)) { - free(def); - return NULL; - } - - if (memcmp(def->magic, "CRAM", 4) != 0) { - free(def); - return NULL; - } - - if (def->major_version > 4) { - hts_log_error("CRAM version number mismatch. Expected 1.x, 2.x, 3.x or 4.x, got %d.%d", - def->major_version, def->minor_version); - free(def); - return NULL; - } - - fd->first_container += 26; - fd->curr_position = fd->first_container; - fd->last_slice = 0; - - return def; -} - -/* - * Writes a cram_file_def structure to cram_fd. - * Returns 0 on success - * -1 on failure - */ -int cram_write_file_def(cram_fd *fd, cram_file_def *def) { - return (hwrite(fd->fp, &def->magic[0], 26) == 26) ? 0 : -1; -} - -void cram_free_file_def(cram_file_def *def) { - if (def) free(def); -} - -/* ---------------------------------------------------------------------- - * SAM header I/O - */ - - -/* - * Reads the SAM header from the first CRAM data block. - * Also performs minimal parsing to extract read-group - * and sample information. - - * Returns SAM hdr ptr on success - * NULL on failure - */ -sam_hdr_t *cram_read_SAM_hdr(cram_fd *fd) { - int32_t header_len; - char *header; - sam_hdr_t *hdr; - - /* 1.1 onwards stores the header in the first block of a container */ - if (CRAM_MAJOR_VERS(fd->version) == 1) { - /* Length */ - if (-1 == int32_decode(fd, &header_len)) - return NULL; - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (header_len > FUZZ_ALLOC_LIMIT) - return NULL; -#endif - - /* Alloc and read */ - if (header_len < 0 || NULL == (header = malloc((size_t) header_len+1))) - return NULL; - - if (header_len != hread(fd->fp, header, header_len)) { - free(header); - return NULL; - } - header[header_len] = '\0'; - - fd->first_container += 4 + header_len; - } else { - cram_container *c = cram_read_container(fd); - cram_block *b; - int i; - int64_t len; - - if (!c) - return NULL; - - fd->first_container += c->length + c->offset; - fd->curr_position = fd->first_container; - - if (c->num_blocks < 1) { - cram_free_container(c); - return NULL; - } - - if (!(b = cram_read_block(fd))) { - cram_free_container(c); - return NULL; - } - if (cram_uncompress_block(b) != 0) { - cram_free_container(c); - cram_free_block(b); - return NULL; - } - - len = b->comp_size + 2 + 4*(CRAM_MAJOR_VERS(fd->version) >= 3) + - fd->vv.varint_size(b->content_id) + - fd->vv.varint_size(b->uncomp_size) + - fd->vv.varint_size(b->comp_size); - - /* Extract header from 1st block */ - if (-1 == int32_get_blk(b, &header_len) || - header_len < 0 || /* Spec. says signed... why? */ - b->uncomp_size - 4 < header_len) { - cram_free_container(c); - cram_free_block(b); - return NULL; - } - if (NULL == (header = malloc((size_t) header_len+1))) { - cram_free_container(c); - cram_free_block(b); - return NULL; - } - memcpy(header, BLOCK_END(b), header_len); - header[header_len] = '\0'; - cram_free_block(b); - - /* Consume any remaining blocks */ - for (i = 1; i < c->num_blocks; i++) { - if (!(b = cram_read_block(fd))) { - cram_free_container(c); - free(header); - return NULL; - } - len += b->comp_size + 2 + 4*(CRAM_MAJOR_VERS(fd->version) >= 3) + - fd->vv.varint_size(b->content_id) + - fd->vv.varint_size(b->uncomp_size) + - fd->vv.varint_size(b->comp_size); - cram_free_block(b); - } - - if (c->length > 0 && len > 0 && c->length > len) { - // Consume padding - char *pads = malloc(c->length - len); - if (!pads) { - cram_free_container(c); - free(header); - return NULL; - } - - if (c->length - len != hread(fd->fp, pads, c->length - len)) { - cram_free_container(c); - free(header); - free(pads); - return NULL; - } - free(pads); - } - - cram_free_container(c); - } - - /* Parse */ - hdr = sam_hdr_init(); - if (!hdr) { - free(header); - return NULL; - } - - if (-1 == sam_hdr_add_lines(hdr, header, header_len)) { - free(header); - sam_hdr_destroy(hdr); - return NULL; - } - - hdr->l_text = header_len; - hdr->text = header; - - return hdr; - -} - -/* - * Converts 'in' to a full pathname to store in out. - * Out must be at least PATH_MAX bytes long. - */ -static void full_path(char *out, char *in) { - size_t in_l = strlen(in); - if (hisremote(in)) { - if (in_l > PATH_MAX) { - hts_log_error("Reference path is longer than %d", PATH_MAX); - return; - } - strncpy(out, in, PATH_MAX-1); - out[PATH_MAX-1] = 0; - return; - } - if (*in == '/' || - // Windows paths - (in_l > 3 && toupper_c(*in) >= 'A' && toupper_c(*in) <= 'Z' && - in[1] == ':' && (in[2] == '/' || in[2] == '\\'))) { - strncpy(out, in, PATH_MAX-1); - out[PATH_MAX-1] = 0; - } else { - size_t len; - - // unable to get dir or out+in is too long - if (!getcwd(out, PATH_MAX) || - (len = strlen(out))+1+strlen(in) >= PATH_MAX) { - strncpy(out, in, PATH_MAX-1); - out[PATH_MAX-1] = 0; - return; - } - - snprintf(out+len, PATH_MAX - len, "/%s", in); - - // FIXME: cope with `pwd`/../../../foo.fa ? - } -} - -/* - * Writes a CRAM SAM header. - * Returns 0 on success - * -1 on failure - */ -int cram_write_SAM_hdr(cram_fd *fd, sam_hdr_t *hdr) { - size_t header_len; - int blank_block = (CRAM_MAJOR_VERS(fd->version) >= 3); - - /* Write CRAM MAGIC if not yet written. */ - if (fd->file_def->major_version == 0) { - fd->file_def->major_version = CRAM_MAJOR_VERS(fd->version); - fd->file_def->minor_version = CRAM_MINOR_VERS(fd->version); - if (0 != cram_write_file_def(fd, fd->file_def)) - return -1; - } - - /* 1.0 requires an UNKNOWN read-group */ - if (CRAM_MAJOR_VERS(fd->version) == 1) { - if (!sam_hrecs_find_rg(hdr->hrecs, "UNKNOWN")) - if (sam_hdr_add_line(hdr, "RG", - "ID", "UNKNOWN", "SM", "UNKNOWN", NULL)) - return -1; - } - - if (-1 == refs_from_header(fd)) - return -1; - if (-1 == refs2id(fd->refs, fd->header)) - return -1; - - /* Fix M5 strings */ - if (fd->refs && !fd->no_ref && fd->embed_ref <= 1) { - int i; - for (i = 0; i < hdr->hrecs->nref; i++) { - sam_hrec_type_t *ty; - char *ref; - - if (!(ty = sam_hrecs_find_type_id(hdr->hrecs, "SQ", "SN", hdr->hrecs->ref[i].name))) - return -1; - - if (!sam_hrecs_find_key(ty, "M5", NULL)) { - char unsigned buf[16]; - char buf2[33]; - int rlen; - hts_md5_context *md5; - - if (!fd->refs || - !fd->refs->ref_id || - !fd->refs->ref_id[i]) { - return -1; - } - rlen = fd->refs->ref_id[i]->length; - ref = cram_get_ref(fd, i, 1, rlen); - if (NULL == ref) { - if (fd->embed_ref == -1) { - // auto embed-ref - hts_log_warning("No M5 tags present and could not " - "find reference"); - hts_log_warning("Enabling embed_ref=2 option"); - hts_log_warning("NOTE: the CRAM file will be bigger " - "than using an external reference"); - pthread_mutex_lock(&fd->ref_lock); - fd->embed_ref = 2; - pthread_mutex_unlock(&fd->ref_lock); - break; - } - return -1; - } - rlen = fd->refs->ref_id[i]->length; /* In case it just loaded */ - if (!(md5 = hts_md5_init())) - return -1; - hts_md5_update(md5, ref, rlen); - hts_md5_final(buf, md5); - hts_md5_destroy(md5); - cram_ref_decr(fd->refs, i); - - hts_md5_hex(buf2, buf); - fd->refs->ref_id[i]->validated_md5 = 1; - if (sam_hdr_update_line(hdr, "SQ", "SN", hdr->hrecs->ref[i].name, "M5", buf2, NULL)) - return -1; - } - - if (fd->ref_fn) { - char ref_fn[PATH_MAX]; - full_path(ref_fn, fd->ref_fn); - if (sam_hdr_update_line(hdr, "SQ", "SN", hdr->hrecs->ref[i].name, "UR", ref_fn, NULL)) - return -1; - } - } - } - - /* Length */ - header_len = sam_hdr_length(hdr); - if (header_len > INT32_MAX) { - hts_log_error("Header is too long for CRAM format"); - return -1; - } - if (CRAM_MAJOR_VERS(fd->version) == 1) { - if (-1 == int32_encode(fd, header_len)) - return -1; - - /* Text data */ - if (header_len != hwrite(fd->fp, sam_hdr_str(hdr), header_len)) - return -1; - } else { - /* Create block(s) inside a container */ - cram_block *b = cram_new_block(FILE_HEADER, 0); - cram_container *c = cram_new_container(0, 0); - int padded_length; - char *pads; - int is_cram_3 = (CRAM_MAJOR_VERS(fd->version) >= 3); - - if (!b || !c) { - if (b) cram_free_block(b); - if (c) cram_free_container(c); - return -1; - } - - if (int32_put_blk(b, header_len) < 0) - return -1; - if (header_len) - BLOCK_APPEND(b, sam_hdr_str(hdr), header_len); - BLOCK_UPLEN(b); - - // Compress header block if V3.0 and above - if (CRAM_MAJOR_VERS(fd->version) >= 3) - if (cram_compress_block(fd, b, NULL, -1, -1) < 0) - return -1; - - if (blank_block) { - c->length = b->comp_size + 2 + 4*is_cram_3 + - fd->vv.varint_size(b->content_id) + - fd->vv.varint_size(b->uncomp_size) + - fd->vv.varint_size(b->comp_size); - - c->num_blocks = 2; - c->num_landmarks = 2; - if (!(c->landmark = malloc(2*sizeof(*c->landmark)))) { - cram_free_block(b); - cram_free_container(c); - return -1; - } - c->landmark[0] = 0; - c->landmark[1] = c->length; - - // Plus extra storage for uncompressed secondary blank block - padded_length = MIN(c->length*.5, 10000); - c->length += padded_length + 2 + 4*is_cram_3 + - fd->vv.varint_size(b->content_id) + - fd->vv.varint_size(padded_length)*2; - } else { - // Pad the block instead. - c->num_blocks = 1; - c->num_landmarks = 1; - if (!(c->landmark = malloc(sizeof(*c->landmark)))) - return -1; - c->landmark[0] = 0; - - padded_length = MAX(c->length*1.5, 10000) - c->length; - - c->length = b->comp_size + padded_length + - 2 + 4*is_cram_3 + - fd->vv.varint_size(b->content_id) + - fd->vv.varint_size(b->uncomp_size) + - fd->vv.varint_size(b->comp_size); - - if (NULL == (pads = calloc(1, padded_length))) { - cram_free_block(b); - cram_free_container(c); - return -1; - } - BLOCK_APPEND(b, pads, padded_length); - BLOCK_UPLEN(b); - free(pads); - } - - if (-1 == cram_write_container(fd, c)) { - cram_free_block(b); - cram_free_container(c); - return -1; - } - - if (-1 == cram_write_block(fd, b)) { - cram_free_block(b); - cram_free_container(c); - return -1; - } - - if (blank_block) { - BLOCK_RESIZE(b, padded_length); - memset(BLOCK_DATA(b), 0, padded_length); - BLOCK_SIZE(b) = padded_length; - BLOCK_UPLEN(b); - b->method = RAW; - if (-1 == cram_write_block(fd, b)) { - cram_free_block(b); - cram_free_container(c); - return -1; - } - } - - cram_free_block(b); - cram_free_container(c); - } - - if (0 != hflush(fd->fp)) - return -1; - - RP("=== Finishing saving header ===\n"); - - return 0; - - block_err: - return -1; -} - -/* ---------------------------------------------------------------------- - * The top-level cram opening, closing and option handling - */ - -/* - * Sets CRAM variable sized integer decode function tables. - * CRAM 1, 2, and 3.x all used ITF8 for uint32 and UTF8 for uint64. - * CRAM 4.x uses the same encoding mechanism for 32-bit and 64-bit - * (or anything inbetween), but also now supports signed values. - * - * Version is the CRAM major version number. - * vv is the vector table (probably &cram_fd->vv) - */ -static void cram_init_varint(varint_vec *vv, int version) { - if (version >= 4) { - vv->varint_get32 = uint7_get_32; // FIXME: varint.h API should be size agnostic - vv->varint_get32s = sint7_get_32; - vv->varint_get64 = uint7_get_64; - vv->varint_get64s = sint7_get_64; - vv->varint_put32 = uint7_put_32; - vv->varint_put32s = sint7_put_32; - vv->varint_put64 = uint7_put_64; - vv->varint_put64s = sint7_put_64; - vv->varint_put32_blk = uint7_put_blk_32; - vv->varint_put32s_blk = sint7_put_blk_32; - vv->varint_put64_blk = uint7_put_blk_64; - vv->varint_put64s_blk = sint7_put_blk_64; - vv->varint_size = uint7_size; - vv->varint_decode32_crc = uint7_decode_crc32; - vv->varint_decode32s_crc = sint7_decode_crc32; - vv->varint_decode64_crc = uint7_decode_crc64; - } else { - vv->varint_get32 = safe_itf8_get; - vv->varint_get32s = safe_itf8_get; - vv->varint_get64 = safe_ltf8_get; - vv->varint_get64s = safe_ltf8_get; - vv->varint_put32 = safe_itf8_put; - vv->varint_put32s = safe_itf8_put; - vv->varint_put64 = safe_ltf8_put; - vv->varint_put64s = safe_ltf8_put; - vv->varint_put32_blk = itf8_put_blk; - vv->varint_put32s_blk = itf8_put_blk; - vv->varint_put64_blk = ltf8_put_blk; - vv->varint_put64s_blk = ltf8_put_blk; - vv->varint_size = itf8_size; - vv->varint_decode32_crc = itf8_decode_crc; - vv->varint_decode32s_crc = itf8_decode_crc; - vv->varint_decode64_crc = ltf8_decode_crc; - } -} - -/* - * Initialises the lookup tables. These could be global statics, but they're - * clumsy to setup in a multi-threaded environment unless we generate - * verbatim code and include that. - */ -static void cram_init_tables(cram_fd *fd) { - int i; - - memset(fd->L1, 4, 256); - fd->L1['A'] = 0; fd->L1['a'] = 0; - fd->L1['C'] = 1; fd->L1['c'] = 1; - fd->L1['G'] = 2; fd->L1['g'] = 2; - fd->L1['T'] = 3; fd->L1['t'] = 3; - - memset(fd->L2, 5, 256); - fd->L2['A'] = 0; fd->L2['a'] = 0; - fd->L2['C'] = 1; fd->L2['c'] = 1; - fd->L2['G'] = 2; fd->L2['g'] = 2; - fd->L2['T'] = 3; fd->L2['t'] = 3; - fd->L2['N'] = 4; fd->L2['n'] = 4; - - if (CRAM_MAJOR_VERS(fd->version) == 1) { - for (i = 0; i < 0x200; i++) { - int f = 0; - - if (i & CRAM_FPAIRED) f |= BAM_FPAIRED; - if (i & CRAM_FPROPER_PAIR) f |= BAM_FPROPER_PAIR; - if (i & CRAM_FUNMAP) f |= BAM_FUNMAP; - if (i & CRAM_FREVERSE) f |= BAM_FREVERSE; - if (i & CRAM_FREAD1) f |= BAM_FREAD1; - if (i & CRAM_FREAD2) f |= BAM_FREAD2; - if (i & CRAM_FSECONDARY) f |= BAM_FSECONDARY; - if (i & CRAM_FQCFAIL) f |= BAM_FQCFAIL; - if (i & CRAM_FDUP) f |= BAM_FDUP; - - fd->bam_flag_swap[i] = f; - } - - for (i = 0; i < 0x1000; i++) { - int g = 0; - - if (i & BAM_FPAIRED) g |= CRAM_FPAIRED; - if (i & BAM_FPROPER_PAIR) g |= CRAM_FPROPER_PAIR; - if (i & BAM_FUNMAP) g |= CRAM_FUNMAP; - if (i & BAM_FREVERSE) g |= CRAM_FREVERSE; - if (i & BAM_FREAD1) g |= CRAM_FREAD1; - if (i & BAM_FREAD2) g |= CRAM_FREAD2; - if (i & BAM_FSECONDARY) g |= CRAM_FSECONDARY; - if (i & BAM_FQCFAIL) g |= CRAM_FQCFAIL; - if (i & BAM_FDUP) g |= CRAM_FDUP; - - fd->cram_flag_swap[i] = g; - } - } else { - /* NOP */ - for (i = 0; i < 0x1000; i++) - fd->bam_flag_swap[i] = i; - for (i = 0; i < 0x1000; i++) - fd->cram_flag_swap[i] = i; - } - - memset(fd->cram_sub_matrix, 4, 32*32); - for (i = 0; i < 32; i++) { - fd->cram_sub_matrix[i]['A'&0x1f]=0; - fd->cram_sub_matrix[i]['C'&0x1f]=1; - fd->cram_sub_matrix[i]['G'&0x1f]=2; - fd->cram_sub_matrix[i]['T'&0x1f]=3; - fd->cram_sub_matrix[i]['N'&0x1f]=4; - } - for (i = 0; i < 20; i+=4) { - int j; - for (j = 0; j < 20; j++) { - fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][j]=3; - fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][j]=3; - fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][j]=3; - fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][j]=3; - } - fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][CRAM_SUBST_MATRIX[i+0]&0x1f]=0; - fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][CRAM_SUBST_MATRIX[i+1]&0x1f]=1; - fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][CRAM_SUBST_MATRIX[i+2]&0x1f]=2; - fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][CRAM_SUBST_MATRIX[i+3]&0x1f]=3; - } - - cram_init_varint(&fd->vv, CRAM_MAJOR_VERS(fd->version)); -} - -// Default version numbers for CRAM -static int major_version = 3; -static int minor_version = 0; - -/* - * Opens a CRAM file for read (mode "rb") or write ("wb"). - * The filename may be "-" to indicate stdin or stdout. - * - * Returns file handle on success - * NULL on failure. - */ -cram_fd *cram_open(const char *filename, const char *mode) { - hFILE *fp; - cram_fd *fd; - char fmode[3]= { mode[0], '\0', '\0' }; - - if (strlen(mode) > 1 && (mode[1] == 'b' || mode[1] == 'c')) { - fmode[1] = 'b'; - } - - fp = hopen(filename, fmode); - if (!fp) - return NULL; - - fd = cram_dopen(fp, filename, mode); - if (!fd) - hclose_abruptly(fp); - - return fd; -} - -/* Opens an existing stream for reading or writing. - * - * Returns file handle on success; - * NULL on failure. - */ -cram_fd *cram_dopen(hFILE *fp, const char *filename, const char *mode) { - int i; - char *cp; - cram_fd *fd = calloc(1, sizeof(*fd)); - if (!fd) - return NULL; - - fd->level = CRAM_DEFAULT_LEVEL; - for (i = 0; mode[i]; i++) { - if (mode[i] >= '0' && mode[i] <= '9') { - fd->level = mode[i] - '0'; - break; - } - } - - fd->fp = fp; - fd->mode = *mode; - fd->first_container = 0; - fd->curr_position = 0; - - if (fd->mode == 'r') { - /* Reader */ - - if (!(fd->file_def = cram_read_file_def(fd))) - goto err; - - fd->version = fd->file_def->major_version * 256 + - fd->file_def->minor_version; - - cram_init_tables(fd); - - if (!(fd->header = cram_read_SAM_hdr(fd))) { - cram_free_file_def(fd->file_def); - goto err; - } - - } else { - /* Writer */ - cram_file_def *def = calloc(1, sizeof(*def)); - if (!def) - return NULL; - - fd->file_def = def; - - def->magic[0] = 'C'; - def->magic[1] = 'R'; - def->magic[2] = 'A'; - def->magic[3] = 'M'; - def->major_version = 0; // Indicator to write file def later. - def->minor_version = 0; - memset(def->file_id, 0, 20); - strncpy(def->file_id, filename, 20); - - fd->version = major_version * 256 + minor_version; - cram_init_tables(fd); - - /* SAM header written later along with this file_def */ - } - - fd->prefix = strdup((cp = strrchr(filename, '/')) ? cp+1 : filename); - if (!fd->prefix) - goto err; - fd->first_base = fd->last_base = -1; - fd->record_counter = 0; - - fd->ctr = NULL; - fd->ctr_mt = NULL; - fd->refs = refs_create(); - if (!fd->refs) - goto err; - fd->ref_id = -2; - fd->ref = NULL; - - fd->decode_md = 0; - fd->seqs_per_slice = SEQS_PER_SLICE; - fd->bases_per_slice = BASES_PER_SLICE; - fd->slices_per_container = SLICE_PER_CNT; - fd->embed_ref = -1; // automatic selection - fd->no_ref = 0; - fd->no_ref_counter = 0; - fd->ap_delta = 0; - fd->ignore_md5 = 0; - fd->lossy_read_names = 0; - fd->use_bz2 = 0; - fd->use_rans = (CRAM_MAJOR_VERS(fd->version) >= 3); - fd->use_tok = (CRAM_MAJOR_VERS(fd->version) >= 3) && (CRAM_MINOR_VERS(fd->version) >= 1); - fd->use_lzma = 0; - fd->multi_seq = -1; - fd->multi_seq_user = -1; - fd->unsorted = 0; - fd->shared_ref = 0; - fd->store_md = 0; - fd->store_nm = 0; - fd->last_RI_count = 0; - - fd->index = NULL; - fd->own_pool = 0; - fd->pool = NULL; - fd->rqueue = NULL; - fd->job_pending = NULL; - fd->ooc = 0; - fd->required_fields = INT_MAX; - - pthread_mutex_init(&fd->metrics_lock, NULL); - pthread_mutex_init(&fd->ref_lock, NULL); - pthread_mutex_init(&fd->range_lock, NULL); - pthread_mutex_init(&fd->bam_list_lock, NULL); - - for (i = 0; i < DS_END; i++) { - fd->m[i] = cram_new_metrics(); - if (!fd->m[i]) - goto err; - } - - if (!(fd->tags_used = kh_init(m_metrics))) - goto err; - - fd->range.refid = -2; // no ref. - fd->eof = 1; // See samtools issue #150 - fd->ref_fn = NULL; - - fd->bl = NULL; - - /* Initialise dummy refs from the @SQ headers */ - if (-1 == refs_from_header(fd)) - goto err; - - return fd; - - err: - if (fd) - free(fd); - - return NULL; -} - -/* - * Seek within a CRAM file. - * - * Returns 0 on success - * -1 on failure - */ -int cram_seek(cram_fd *fd, off_t offset, int whence) { - char buf[65536]; - - fd->ooc = 0; - - cram_drain_rqueue(fd); - - if (hseek(fd->fp, offset, whence) >= 0) { - return 0; - } - - if (!(whence == SEEK_CUR && offset >= 0)) - return -1; - - /* Couldn't fseek, but we're in SEEK_CUR mode so read instead */ - while (offset > 0) { - int len = MIN(65536, offset); - if (len != hread(fd->fp, buf, len)) - return -1; - offset -= len; - } - - return 0; -} - -/* - * Flushes a CRAM file. - * Useful for when writing to stdout without wishing to close the stream. - * - * Returns 0 on success - * -1 on failure - */ -int cram_flush(cram_fd *fd) { - if (!fd) - return -1; - - int ret = 0; - - if (fd->mode == 'w' && fd->ctr) { - if(fd->ctr->slice) - cram_update_curr_slice(fd->ctr, fd->version); - - if (-1 == cram_flush_container_mt(fd, fd->ctr)) - ret = -1; - - cram_free_container(fd->ctr); - if (fd->ctr_mt == fd->ctr) - fd->ctr_mt = NULL; - fd->ctr = NULL; - } - - return ret; -} - -/* - * Writes an EOF block to a CRAM file. - * - * Returns 0 on success - * -1 on failure - */ -int cram_write_eof_block(cram_fd *fd) { - // EOF block is a container with special values to aid detection - if (CRAM_MAJOR_VERS(fd->version) >= 2) { - // Empty container with - // ref_seq_id -1 - // start pos 0x454f46 ("EOF") - // span 0 - // nrec 0 - // counter 0 - // nbases 0 - // 1 block (landmark 0) - // (CRC32) - cram_container c; - memset(&c, 0, sizeof(c)); - c.ref_seq_id = -1; - c.ref_seq_start = 0x454f46; // "EOF" - c.ref_seq_span = 0; - c.record_counter = 0; - c.num_bases = 0; - c.num_blocks = 1; - int32_t land[1] = {0}; - c.landmark = land; - - // An empty compression header block with - // method raw (0) - // type comp header (1) - // content id 0 - // block contents size 6 - // raw size 6 - // empty preservation map (01 00) - // empty data series map (01 00) - // empty tag map (01 00) - // block CRC - cram_block_compression_hdr ch; - memset(&ch, 0, sizeof(ch)); - c.comp_hdr_block = cram_encode_compression_header(fd, &c, &ch, 0); - - c.length = c.comp_hdr_block->byte // Landmark[0] - + 5 // block struct - + 4*(CRAM_MAJOR_VERS(fd->version) >= 3); // CRC - if (cram_write_container(fd, &c) < 0 || - cram_write_block(fd, c.comp_hdr_block) < 0) { - cram_close(fd); - cram_free_block(c.comp_hdr_block); - return -1; - } - if (ch.preservation_map) - kh_destroy(map, ch.preservation_map); - cram_free_block(c.comp_hdr_block); - - // V2.1 bytes - // 0b 00 00 00 ff ff ff ff 0f // Cont HDR: size, ref seq id - // e0 45 4f 46 00 00 00 // Cont HDR: pos, span, nrec, counter - // 00 01 00 // Cont HDR: nbase, nblk, landmark - // 00 01 00 06 06 // Comp.HDR blk - // 01 00 01 00 01 00 // Comp.HDR blk - - // V3.0 bytes: - // 0f 00 00 00 ff ff ff ff 0f // Cont HDR: size, ref seq id - // e0 45 4f 46 00 00 00 // Cont HDR: pos, span, nrec, counter - // 00 01 00 // Cont HDR: nbase, nblk, landmark - // 05 bd d9 4f // CRC32 - // 00 01 00 06 06 // Comp.HDR blk - // 01 00 01 00 01 00 // Comp.HDR blk - // ee 63 01 4b // CRC32 - - // V4.0 bytes: - // 0f 00 00 00 8f ff ff ff // Cont HDR: size, ref seq id - // 82 95 9e 46 00 00 00 // Cont HDR: pos, span, nrec, counter - // 00 01 00 // Cont HDR: nbase, nblk, landmark - // ac d6 05 bc // CRC32 - // 00 01 00 06 06 // Comp.HDR blk - // 01 00 01 00 01 00 // Comp.HDR blk - // ee 63 01 4b // CRC32 - } - - return 0; -} - -/* - * Closes a CRAM file. - * Returns 0 on success - * -1 on failure - */ -int cram_close(cram_fd *fd) { - spare_bams *bl, *next; - int i, ret = 0; - - if (!fd) - return -1; - - if (fd->mode == 'w' && fd->ctr) { - if(fd->ctr->slice) - cram_update_curr_slice(fd->ctr, fd->version); - - if (-1 == cram_flush_container_mt(fd, fd->ctr)) - ret = -1; - } - - if (fd->mode != 'w') - cram_drain_rqueue(fd); - - if (fd->pool && fd->eof >= 0 && fd->rqueue) { - hts_tpool_process_flush(fd->rqueue); - - if (0 != cram_flush_result(fd)) - ret = -1; - - if (fd->mode == 'w') - fd->ctr = NULL; // prevent double freeing - - //fprintf(stderr, "CRAM: destroy queue %p\n", fd->rqueue); - - hts_tpool_process_destroy(fd->rqueue); - } - - pthread_mutex_destroy(&fd->metrics_lock); - pthread_mutex_destroy(&fd->ref_lock); - pthread_mutex_destroy(&fd->range_lock); - pthread_mutex_destroy(&fd->bam_list_lock); - - if (ret == 0 && fd->mode == 'w') { - /* Write EOF block */ - if (0 != cram_write_eof_block(fd)) - ret = -1; - } - - for (bl = fd->bl; bl; bl = next) { - int max_rec = fd->seqs_per_slice * fd->slices_per_container; - - next = bl->next; - free_bam_list(bl->bams, max_rec); - free(bl); - } - - if (hclose(fd->fp) != 0) - ret = -1; - - if (fd->file_def) - cram_free_file_def(fd->file_def); - - if (fd->header) - sam_hdr_destroy(fd->header); - - free(fd->prefix); - - if (fd->ctr) - cram_free_container(fd->ctr); - - if (fd->ctr_mt && fd->ctr_mt != fd->ctr) - cram_free_container(fd->ctr_mt); - - if (fd->refs) - refs_free(fd->refs); - if (fd->ref_free) - free(fd->ref_free); - - for (i = 0; i < DS_END; i++) - if (fd->m[i]) - free(fd->m[i]); - - if (fd->tags_used) { - khint_t k; - - for (k = kh_begin(fd->tags_used); k != kh_end(fd->tags_used); k++) { - if (kh_exist(fd->tags_used, k)) - free(kh_val(fd->tags_used, k)); - } - - kh_destroy(m_metrics, fd->tags_used); - } - - if (fd->index) - cram_index_free(fd); - - if (fd->own_pool && fd->pool) - hts_tpool_destroy(fd->pool); - - if (fd->idxfp) - if (bgzf_close(fd->idxfp) < 0) - ret = -1; - - free(fd); - - return ret; -} - -/* - * Returns 1 if we hit an EOF while reading. - */ -int cram_eof(cram_fd *fd) { - return fd->eof; -} - - -/* - * Sets options on the cram_fd. See CRAM_OPT_* definitions in cram_structs.h. - * Use this immediately after opening. - * - * Returns 0 on success - * -1 on failure - */ -int cram_set_option(cram_fd *fd, enum hts_fmt_option opt, ...) { - int r; - va_list args; - - va_start(args, opt); - r = cram_set_voption(fd, opt, args); - va_end(args); - - return r; -} - -/* - * Sets options on the cram_fd. See CRAM_OPT_* definitions in cram_structs.h. - * Use this immediately after opening. - * - * Returns 0 on success - * -1 on failure - */ -int cram_set_voption(cram_fd *fd, enum hts_fmt_option opt, va_list args) { - refs_t *refs; - - if (!fd) { - errno = EBADF; - return -1; - } - - switch (opt) { - case CRAM_OPT_DECODE_MD: - fd->decode_md = va_arg(args, int); - break; - - case CRAM_OPT_PREFIX: - if (fd->prefix) - free(fd->prefix); - if (!(fd->prefix = strdup(va_arg(args, char *)))) - return -1; - break; - - case CRAM_OPT_VERBOSITY: - break; - - case CRAM_OPT_SEQS_PER_SLICE: - fd->seqs_per_slice = va_arg(args, int); - if (fd->bases_per_slice == BASES_PER_SLICE) - fd->bases_per_slice = fd->seqs_per_slice * 500; - break; - - case CRAM_OPT_BASES_PER_SLICE: - fd->bases_per_slice = va_arg(args, int); - break; - - case CRAM_OPT_SLICES_PER_CONTAINER: - fd->slices_per_container = va_arg(args, int); - break; - - case CRAM_OPT_EMBED_REF: - fd->embed_ref = va_arg(args, int); - break; - - case CRAM_OPT_NO_REF: - fd->no_ref = va_arg(args, int); - break; - - case CRAM_OPT_POS_DELTA: - fd->ap_delta = va_arg(args, int); - break; - - case CRAM_OPT_IGNORE_MD5: - fd->ignore_md5 = va_arg(args, int); - break; - - case CRAM_OPT_LOSSY_NAMES: - fd->lossy_read_names = va_arg(args, int); - // Currently lossy read names required paired (attached) reads. - // TLEN 0 or being 1 out causes read pairs to be detached, breaking - // the lossy read name compression, so we have extra options to - // slacken the exact TLEN round-trip checks. - fd->tlen_approx = fd->lossy_read_names; - fd->tlen_zero = fd->lossy_read_names; - break; - - case CRAM_OPT_USE_BZIP2: - fd->use_bz2 = va_arg(args, int); - break; - - case CRAM_OPT_USE_RANS: - fd->use_rans = va_arg(args, int); - break; - - case CRAM_OPT_USE_TOK: - fd->use_tok = va_arg(args, int); - break; - - case CRAM_OPT_USE_FQZ: - fd->use_fqz = va_arg(args, int); - break; - - case CRAM_OPT_USE_ARITH: - fd->use_arith = va_arg(args, int); - break; - - case CRAM_OPT_USE_LZMA: - fd->use_lzma = va_arg(args, int); - break; - - case CRAM_OPT_SHARED_REF: - fd->shared_ref = 1; - refs = va_arg(args, refs_t *); - if (refs != fd->refs) { - if (fd->refs) - refs_free(fd->refs); - fd->refs = refs; - fd->refs->count++; - } - break; - - case CRAM_OPT_RANGE: { - int r = cram_seek_to_refpos(fd, va_arg(args, cram_range *)); - pthread_mutex_lock(&fd->range_lock); - if (fd->range.refid != -2) - fd->required_fields |= SAM_POS; - pthread_mutex_unlock(&fd->range_lock); - return r; - } - - case CRAM_OPT_RANGE_NOSEEK: { - // As per CRAM_OPT_RANGE, but no seeking - pthread_mutex_lock(&fd->range_lock); - cram_range *r = va_arg(args, cram_range *); - fd->range = *r; - if (r->refid == HTS_IDX_NOCOOR) { - fd->range.refid = -1; - fd->range.start = 0; - } else if (r->refid == HTS_IDX_START || r->refid == HTS_IDX_REST) { - fd->range.refid = -2; // special case in cram_next_slice - } - if (fd->range.refid != -2) - fd->required_fields |= SAM_POS; - fd->ooc = 0; - fd->eof = 0; - pthread_mutex_unlock(&fd->range_lock); - return 0; - } - - case CRAM_OPT_REFERENCE: - return cram_load_reference(fd, va_arg(args, char *)); - - case CRAM_OPT_VERSION: { - int major, minor; - char *s = va_arg(args, char *); - if (2 != sscanf(s, "%d.%d", &major, &minor)) { - hts_log_error("Malformed version string %s", s); - return -1; - } - if (!((major == 1 && minor == 0) || - (major == 2 && (minor == 0 || minor == 1)) || - (major == 3 && (minor == 0 || minor == 1)) || - (major == 4 && minor == 0))) { - hts_log_error("Unknown version string; use 1.0, 2.0, 2.1, 3.0, 3.1 or 4.0"); - errno = EINVAL; - return -1; - } - - if (major > 3 || (major == 3 && minor > 1)) { - hts_log_warning( - "CRAM version %s is still a draft and subject to change.\n" - "This is a technology demonstration that should not be " - "used for archival data.", s); - } - - fd->version = major*256 + minor; - - fd->use_rans = (CRAM_MAJOR_VERS(fd->version) >= 3) ? 1 : 0; - - fd->use_tok = ((CRAM_MAJOR_VERS(fd->version) == 3 && - CRAM_MINOR_VERS(fd->version) >= 1) || - CRAM_MAJOR_VERS(fd->version) >= 4) ? 1 : 0; - cram_init_tables(fd); - - break; - } - - case CRAM_OPT_MULTI_SEQ_PER_SLICE: - fd->multi_seq_user = fd->multi_seq = va_arg(args, int); - break; - - case CRAM_OPT_NTHREADS: { - int nthreads = va_arg(args, int); - if (nthreads >= 1) { - if (!(fd->pool = hts_tpool_init(nthreads))) - return -1; - - fd->rqueue = hts_tpool_process_init(fd->pool, nthreads*2, 0); - fd->shared_ref = 1; - fd->own_pool = 1; - } - break; - } - - case CRAM_OPT_THREAD_POOL: { - htsThreadPool *p = va_arg(args, htsThreadPool *); - fd->pool = p ? p->pool : NULL; - if (fd->pool) { - fd->rqueue = hts_tpool_process_init(fd->pool, - p->qsize ? p->qsize : hts_tpool_size(fd->pool)*2, - 0); - } - fd->shared_ref = 1; // Needed to avoid clobbering ref between threads - fd->own_pool = 0; - - //fd->qsize = 1; - //fd->decoded = calloc(fd->qsize, sizeof(cram_container *)); - //hts_tpool_dispatch(fd->pool, cram_decoder_thread, fd); - break; - } - - case CRAM_OPT_REQUIRED_FIELDS: - fd->required_fields = va_arg(args, int); - if (fd->range.refid != -2) - fd->required_fields |= SAM_POS; - break; - - case CRAM_OPT_STORE_MD: - fd->store_md = va_arg(args, int); - break; - - case CRAM_OPT_STORE_NM: - fd->store_nm = va_arg(args, int); - break; - - case HTS_OPT_COMPRESSION_LEVEL: - fd->level = va_arg(args, int); - break; - - case HTS_OPT_PROFILE: { - enum hts_profile_option prof = va_arg(args, int); - switch (prof) { - case HTS_PROFILE_FAST: - if (fd->level == CRAM_DEFAULT_LEVEL) fd->level = 1; - fd->use_tok = 0; - fd->seqs_per_slice = 10000; - break; - - case HTS_PROFILE_NORMAL: - break; - - case HTS_PROFILE_SMALL: - if (fd->level == CRAM_DEFAULT_LEVEL) fd->level = 6; - fd->use_bz2 = 1; - fd->use_fqz = 1; - fd->seqs_per_slice = 25000; - break; - - case HTS_PROFILE_ARCHIVE: - if (fd->level == CRAM_DEFAULT_LEVEL) fd->level = 7; - fd->use_bz2 = 1; - fd->use_fqz = 1; - fd->use_arith = 1; - if (fd->level > 7) - fd->use_lzma = 1; - fd->seqs_per_slice = 100000; - break; - } - - if (fd->bases_per_slice == BASES_PER_SLICE) - fd->bases_per_slice = fd->seqs_per_slice * 500; - break; - } - - default: - hts_log_error("Unknown CRAM option code %d", opt); - errno = EINVAL; - return -1; - } - - return 0; -} - -int cram_check_EOF(cram_fd *fd) -{ - // Byte 9 in these templates is & with 0x0f to resolve differences - // between ITF-8 interpretations between early Java and C - // implementations of CRAM - static const unsigned char TEMPLATE_2_1[30] = { - 0x0b, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x0f, 0xe0, - 0x45, 0x4f, 0x46, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, - 0x01, 0x00, 0x06, 0x06, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00 - }; - static const unsigned char TEMPLATE_3[38] = { - 0x0f, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x0f, 0xe0, - 0x45, 0x4f, 0x46, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x05, - 0xbd, 0xd9, 0x4f, 0x00, 0x01, 0x00, 0x06, 0x06, 0x01, 0x00, - 0x01, 0x00, 0x01, 0x00, 0xee, 0x63, 0x01, 0x4b - }; - - unsigned char buf[38]; // max(sizeof TEMPLATE_*) - - uint8_t major = CRAM_MAJOR_VERS(fd->version); - uint8_t minor = CRAM_MINOR_VERS(fd->version); - - const unsigned char *template; - ssize_t template_len; - if ((major < 2) || - (major == 2 && minor == 0)) { - return 3; // No EOF support in cram versions less than 2.1 - } else if (major == 2 && minor == 1) { - template = TEMPLATE_2_1; - template_len = sizeof TEMPLATE_2_1; - } else { - template = TEMPLATE_3; - template_len = sizeof TEMPLATE_3; - } - - off_t offset = htell(fd->fp); - if (hseek(fd->fp, -template_len, SEEK_END) < 0) { - if (errno == ESPIPE) { - hclearerr(fd->fp); - return 2; - } - else { - return -1; - } - } - if (hread(fd->fp, buf, template_len) != template_len) return -1; - if (hseek(fd->fp, offset, SEEK_SET) < 0) return -1; - buf[8] &= 0x0f; - return (memcmp(template, buf, template_len) == 0)? 1 : 0; -} diff --git a/src/htslib-1.19.1/cram/cram_io.h b/src/htslib-1.19.1/cram/cram_io.h deleted file mode 100644 index 11279d4..0000000 --- a/src/htslib-1.19.1/cram/cram_io.h +++ /dev/null @@ -1,648 +0,0 @@ -/* -Copyright (c) 2012-2020 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/*! \file - * Include cram.h instead. - * - * This is an internal part of the CRAM system and is automatically included - * when you #include cram.h. - * - * Implements the low level CRAM I/O primitives. - * This includes basic data types such as byte, int, ITF-8, - * maps, bitwise I/O, etc. - */ - -#ifndef CRAM_IO_H -#define CRAM_IO_H - -#include - -#include "misc.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/**@{ ---------------------------------------------------------------------- - * ITF8 encoding and decoding. - * - * Also see the itf8_get and itf8_put macros. - */ - -/*! INTERNAL: Converts two characters into an integer for use in switch{} */ -#define CRAM_KEY(a,b) ((((unsigned char) a)<<8)|(((unsigned char) b))) - -/*! Reads an integer in ITF-8 encoding from 'fd' and stores it in - * *val. - * - * @return - * Returns the number of bytes read on success; - * -1 on failure - */ -int itf8_decode(cram_fd *fd, int32_t *val); - -extern const int itf8_bytes[16]; -extern const int ltf8_bytes[256]; - -/*! Pushes a value in ITF8 format onto the end of a block. - * - * This shouldn't be used for high-volume data as it is not the fastest - * method. - * - * @return - * Returns the number of bytes written - */ -int itf8_put_blk(cram_block *blk, int32_t val); -int ltf8_put_blk(cram_block *blk, int64_t val); - -/*! Pulls a literal 32-bit value from a block. - * - * @returns the number of bytes decoded; - * -1 on failure. - */ -int int32_get_blk(cram_block *b, int32_t *val); - -/*! Pushes a literal 32-bit value onto the end of a block. - * - * @return - * Returns 0 on success; - * -1 on failure. - */ -int int32_put_blk(cram_block *blk, int32_t val); - - -/**@}*/ -/**@{ ---------------------------------------------------------------------- - * CRAM blocks - the dynamically growable data block. We have code to - * create, update, (un)compress and read/write. - * - * These are derived from the deflate_interlaced.c blocks, but with the - * CRAM extension of content types and IDs. - */ - -/*! Allocates a new cram_block structure with a specified content_type and - * id. - * - * @return - * Returns block pointer on success; - * NULL on failure - */ -cram_block *cram_new_block(enum cram_content_type content_type, - int content_id); - -/*! Reads a block from a cram file. - * - * @return - * Returns cram_block pointer on success; - * NULL on failure - */ -cram_block *cram_read_block(cram_fd *fd); - -/*! Writes a CRAM block. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_write_block(cram_fd *fd, cram_block *b); - -/*! Frees a CRAM block, deallocating internal data too. - */ -void cram_free_block(cram_block *b); - -/*! Uncompress a memory block using Zlib. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -char *zlib_mem_inflate(char *cdata, size_t csize, size_t *size); - -/*! Uncompresses a CRAM block, if compressed. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_uncompress_block(cram_block *b); - -/*! Compresses a block. - * - * Compresses a block using one of two different zlib strategies. If we only - * want one choice set strat2 to be -1. - * - * The logic here is that sometimes Z_RLE does a better job than Z_FILTERED - * or Z_DEFAULT_STRATEGY on quality data. If so, we'd rather use it as it is - * significantly faster. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_compress_block(cram_fd *fd, cram_block *b, cram_metrics *metrics, - int method, int level); -int cram_compress_block2(cram_fd *fd, cram_slice *s, - cram_block *b, cram_metrics *metrics, - int method, int level); - -cram_metrics *cram_new_metrics(void); -char *cram_block_method2str(enum cram_block_method_int m); -char *cram_content_type2str(enum cram_content_type t); - -/* - * Find an external block by its content_id - */ - -static inline cram_block *cram_get_block_by_id(cram_slice *slice, int id) { - //fprintf(stderr, "%d\t%p\n", id, slice->block_by_id); - uint32_t v = id; - if (slice->block_by_id && v < 256) { - return slice->block_by_id[v]; - } else { - v = 256 + v % 251; - if (slice->block_by_id && - slice->block_by_id[v] && - slice->block_by_id[v]->content_id == id) - return slice->block_by_id[v]; - - // Otherwise a linear search in case of collision - int i; - for (i = 0; i < slice->hdr->num_blocks; i++) { - cram_block *b = slice->block[i]; - if (b && b->content_type == EXTERNAL && b->content_id == id) - return b; - } - } - return NULL; -} - -/* --- Accessor macros for manipulating blocks on a byte by byte basis --- */ - -/* Block size and data pointer. */ -#define BLOCK_SIZE(b) ((b)->byte) -#define BLOCK_DATA(b) ((b)->data) - -/* Returns the address one past the end of the block */ -#define BLOCK_END(b) (&(b)->data[(b)->byte]) - -/* Make block exactly 'l' bytes long */ -static inline int block_resize_exact(cram_block *b, size_t len) { - unsigned char *tmp = realloc(b->data, len); - if (!tmp) - return -1; - b->alloc = len; - b->data = tmp; - return 0; -} - -/* Request block to be at least 'l' bytes long */ -static inline int block_resize(cram_block *b, size_t len) { - if (b->alloc > len) - return 0; - - size_t alloc = b->alloc+800; - alloc = MAX(alloc + (alloc>>2), len); - return block_resize_exact(b, alloc); -} - - -/* Ensure the block can hold at least another 'l' bytes */ -static inline int block_grow(cram_block *b, size_t len) { - return block_resize(b, BLOCK_SIZE(b) + len); -} - -/* Append string 's' of length 'l'. */ -static inline int block_append(cram_block *b, const void *s, size_t len) { - if (block_grow(b, len) < 0) - return -1; - - if (len) { - memcpy(BLOCK_END(b), s, len); - BLOCK_SIZE(b) += len; - } - - return 0; -} - -/* Append as single character 'c' */ -static inline int block_append_char(cram_block *b, char c) { - if (block_grow(b, 1) < 0) - return -1; - - b->data[b->byte++] = c; - return 0; -} - -/* Append a single unsigned integer */ -static inline unsigned char *append_uint32(unsigned char *cp, uint32_t i); -static inline int block_append_uint(cram_block *b, unsigned int i) { - if (block_grow(b, 11) < 0) - return -1; - - unsigned char *cp = &b->data[b->byte]; - b->byte += append_uint32(cp, i) - cp; - return 0; -} - -// Versions of above with built in goto block_err calls. -#define BLOCK_RESIZE_EXACT(b,l) if (block_resize_exact((b),(l))<0) goto block_err -#define BLOCK_RESIZE(b,l) if (block_resize((b),(l)) <0) goto block_err -#define BLOCK_GROW(b,l) if (block_grow((b),(l)) <0) goto block_err -#define BLOCK_APPEND(b,s,l) if (block_append((b),(s),(l)) <0) goto block_err -#define BLOCK_APPEND_CHAR(b,c) if (block_append_char((b),(c)) <0) goto block_err -#define BLOCK_APPEND_UINT(b,i) if (block_append_uint((b),(i)) <0) goto block_err - -static inline unsigned char *append_uint32(unsigned char *cp, uint32_t i) { - uint32_t j; - - if (i == 0) { - *cp++ = '0'; - return cp; - } - - if (i < 100) goto b1; - if (i < 10000) goto b3; - if (i < 1000000) goto b5; - if (i < 100000000) goto b7; - - if ((j = i / 1000000000)) {*cp++ = j + '0'; i -= j*1000000000; goto x8;} - if ((j = i / 100000000)) {*cp++ = j + '0'; i -= j*100000000; goto x7;} - b7:if ((j = i / 10000000)) {*cp++ = j + '0'; i -= j*10000000; goto x6;} - if ((j = i / 1000000)) {*cp++ = j + '0', i -= j*1000000; goto x5;} - b5:if ((j = i / 100000)) {*cp++ = j + '0', i -= j*100000; goto x4;} - if ((j = i / 10000)) {*cp++ = j + '0', i -= j*10000; goto x3;} - b3:if ((j = i / 1000)) {*cp++ = j + '0', i -= j*1000; goto x2;} - if ((j = i / 100)) {*cp++ = j + '0', i -= j*100; goto x1;} - b1:if ((j = i / 10)) {*cp++ = j + '0', i -= j*10; goto x0;} - if (i) *cp++ = i + '0'; - return cp; - - x8: *cp++ = i / 100000000 + '0', i %= 100000000; - x7: *cp++ = i / 10000000 + '0', i %= 10000000; - x6: *cp++ = i / 1000000 + '0', i %= 1000000; - x5: *cp++ = i / 100000 + '0', i %= 100000; - x4: *cp++ = i / 10000 + '0', i %= 10000; - x3: *cp++ = i / 1000 + '0', i %= 1000; - x2: *cp++ = i / 100 + '0', i %= 100; - x1: *cp++ = i / 10 + '0', i %= 10; - x0: *cp++ = i + '0'; - - return cp; -} - -static inline unsigned char *append_sub32(unsigned char *cp, uint32_t i) { - *cp++ = i / 100000000 + '0', i %= 100000000; - *cp++ = i / 10000000 + '0', i %= 10000000; - *cp++ = i / 1000000 + '0', i %= 1000000; - *cp++ = i / 100000 + '0', i %= 100000; - *cp++ = i / 10000 + '0', i %= 10000; - *cp++ = i / 1000 + '0', i %= 1000; - *cp++ = i / 100 + '0', i %= 100; - *cp++ = i / 10 + '0', i %= 10; - *cp++ = i + '0'; - - return cp; -} - -static inline unsigned char *append_uint64(unsigned char *cp, uint64_t i) { - uint64_t j; - - if (i <= 0xffffffff) - return append_uint32(cp, i); - - if ((j = i/1000000000) > 1000000000) { - cp = append_uint32(cp, j/1000000000); - j %= 1000000000; - cp = append_sub32(cp, j); - } else { - cp = append_uint32(cp, i / 1000000000); - } - cp = append_sub32(cp, i % 1000000000); - - return cp; -} - -#define BLOCK_UPLEN(b) \ - (b)->comp_size = (b)->uncomp_size = BLOCK_SIZE((b)) - -/**@}*/ -/**@{ ---------------------------------------------------------------------- - * Reference sequence handling - */ - -/*! Loads a reference set from fn and stores in the cram_fd. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_load_reference(cram_fd *fd, char *fn); - -/*! Generates a lookup table in refs based on the SQ headers in sam_hdr_t. - * - * Indexes references by the order they appear in a BAM file. This may not - * necessarily be the same order they appear in the fasta reference file. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int refs2id(refs_t *r, sam_hdr_t *hdr); - -void refs_free(refs_t *r); - -/*! Returns a portion of a reference sequence from start to end inclusive. - * - * The returned pointer is owned by the cram_file fd and should not be freed - * by the caller. It is valid only until the next cram_get_ref is called - * with the same fd parameter (so is thread-safe if given multiple files). - * - * To return the entire reference sequence, specify start as 1 and end - * as 0. - * - * @return - * Returns reference on success; - * NULL on failure - */ -char *cram_get_ref(cram_fd *fd, int id, int start, int end); -void cram_ref_incr(refs_t *r, int id); -void cram_ref_decr(refs_t *r, int id); -/**@}*/ -/**@{ ---------------------------------------------------------------------- - * Containers - */ - -/*! Creates a new container, specifying the maximum number of slices - * and records permitted. - * - * @return - * Returns cram_container ptr on success; - * NULL on failure - */ -cram_container *cram_new_container(int nrec, int nslice); -void cram_free_container(cram_container *c); - -/*! Reads a container header. - * - * @return - * Returns cram_container on success; - * NULL on failure or no container left (fd->err == 0). - */ -cram_container *cram_read_container(cram_fd *fd); - -/*! Writes a container structure. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_write_container(cram_fd *fd, cram_container *h); - -/*! Flushes a container to disk. - * - * Flushes a completely or partially full container to disk, writing - * container structure, header and blocks. This also calls the encoder - * functions. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_flush_container(cram_fd *fd, cram_container *c); -int cram_flush_container_mt(cram_fd *fd, cram_container *c); - - -/**@}*/ -/**@{ ---------------------------------------------------------------------- - * Compression headers; the first part of the container - */ - -/*! Creates a new blank container compression header - * - * @return - * Returns header ptr on success; - * NULL on failure - */ -cram_block_compression_hdr *cram_new_compression_header(void); - -/*! Frees a cram_block_compression_hdr */ -void cram_free_compression_header(cram_block_compression_hdr *hdr); - - -/**@}*/ -/**@{ ---------------------------------------------------------------------- - * Slices and slice headers - */ - -/*! Frees a slice header */ -void cram_free_slice_header(cram_block_slice_hdr *hdr); - -/*! Frees a slice */ -void cram_free_slice(cram_slice *s); - -/*! Creates a new empty slice in memory, for subsequent writing to - * disk. - * - * @return - * Returns cram_slice ptr on success; - * NULL on failure - */ -cram_slice *cram_new_slice(enum cram_content_type type, int nrecs); - -/*! Loads an entire slice. - * - * FIXME: In 1.0 the native unit of slices within CRAM is broken - * as slices contain references to objects in other slices. - * To work around this while keeping the slice oriented outer loop - * we read all slices and stitch them together into a fake large - * slice instead. - * - * @return - * Returns cram_slice ptr on success; - * NULL on failure - */ -cram_slice *cram_read_slice(cram_fd *fd); - - - -/**@}*/ -/**@{ ---------------------------------------------------------------------- - * CRAM file definition (header) - */ - -/*! Reads a CRAM file definition structure. - * - * @return - * Returns file_def ptr on success; - * NULL on failure - */ -cram_file_def *cram_read_file_def(cram_fd *fd); - -/*! Writes a cram_file_def structure to cram_fd. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_write_file_def(cram_fd *fd, cram_file_def *def); - -/*! Frees a cram_file_def structure. */ -void cram_free_file_def(cram_file_def *def); - - -/**@}*/ -/**@{ ---------------------------------------------------------------------- - * SAM header I/O - */ - -/*! Reads the SAM header from the first CRAM data block. - * - * Also performs minimal parsing to extract read-group - * and sample information. - * - * @return - * Returns SAM hdr ptr on success; - * NULL on failure - */ -sam_hdr_t *cram_read_SAM_hdr(cram_fd *fd); - -/*! Writes a CRAM SAM header. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_write_SAM_hdr(cram_fd *fd, sam_hdr_t *hdr); - - -/**@}*/ -/**@{ ---------------------------------------------------------------------- - * The top-level cram opening, closing and option handling - */ - -/*! Opens a CRAM file for read (mode "rb") or write ("wb"). - * - * The filename may be "-" to indicate stdin or stdout. - * - * @return - * Returns file handle on success; - * NULL on failure. - */ -cram_fd *cram_open(const char *filename, const char *mode); - -/*! Opens an existing stream for reading or writing. - * - * @return - * Returns file handle on success; - * NULL on failure. - */ -cram_fd *cram_dopen(struct hFILE *fp, const char *filename, const char *mode); - -/*! Closes a CRAM file. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_close(cram_fd *fd); - -/* - * Seek within a CRAM file. - * - * Returns 0 on success - * -1 on failure - */ -int cram_seek(cram_fd *fd, off_t offset, int whence); - -/* - * Flushes a CRAM file. - * Useful for when writing to stdout without wishing to close the stream. - * - * Returns 0 on success - * -1 on failure - */ -int cram_flush(cram_fd *fd); - -/*! Checks for end of file on a cram_fd stream. - * - * @return - * Returns 0 if not at end of file - * 1 if we hit an expected EOF (end of range or EOF block) - * 2 for other EOF (end of stream without EOF block) - */ -int cram_eof(cram_fd *fd); - -/*! Sets options on the cram_fd. - * - * See CRAM_OPT_* definitions in cram_structs.h. - * Use this immediately after opening. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_set_option(cram_fd *fd, enum hts_fmt_option opt, ...); - -/*! Sets options on the cram_fd. - * - * See CRAM_OPT_* definitions in cram_structs.h. - * Use this immediately after opening. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_set_voption(cram_fd *fd, enum hts_fmt_option opt, va_list args); - -/*! - * Attaches a header to a cram_fd. - * - * This should be used when creating a new cram_fd for writing where - * we have an sam_hdr_t already constructed (eg from a file we've read - * in). - * - * @return - * Returns 0 on success; - * -1 on failure - */ -int cram_set_header2(cram_fd *fd, const sam_hdr_t *hdr); - -/*! - * Returns the hFILE connected to a cram_fd. - */ -static inline struct hFILE *cram_hfile(cram_fd *fd) { - return fd->fp; -} - -#ifdef __cplusplus -} -#endif - -#endif /* CRAM_IO_H */ diff --git a/src/htslib-1.19.1/cram/cram_samtools.h b/src/htslib-1.19.1/cram/cram_samtools.h deleted file mode 100644 index a4c9bf5..0000000 --- a/src/htslib-1.19.1/cram/cram_samtools.h +++ /dev/null @@ -1,75 +0,0 @@ -/* -Copyright (c) 2010-2013, 2018, 2020 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef CRAM_SAMTOOLS_H -#define CRAM_SAMTOOLS_H - -/* Samtools compatible API */ -#define bam_blk_size(b) ((b)->l_data) -#define bam_set_blk_size(b,v) ((b)->data_len = (v)) - -#define bam_ref(b) (b)->core.tid -#define bam_pos(b) (b)->core.pos -#define bam_mate_pos(b) (b)->core.mpos -#define bam_mate_ref(b) (b)->core.mtid -#define bam_ins_size(b) (b)->core.isize -#define bam_seq_len(b) (b)->core.l_qseq -#define bam_cigar_len(b) (b)->core.n_cigar -#define bam_flag(b) (b)->core.flag -#define bam_bin(b) (b)->core.bin -#define bam_map_qual(b) (b)->core.qual -#define bam_name_len(b) ((b)->core.l_qname - (b)->core.l_extranul) -#define bam_name(b) bam_get_qname((b)) -#define bam_qual(b) bam_get_qual((b)) -#define bam_seq(b) bam_get_seq((b)) -#define bam_cigar(b) bam_get_cigar((b)) -#define bam_aux(b) bam_get_aux((b)) - -#define bam_free(b) bam_destroy1((b)) - -#define bam_reg2bin(beg,end) hts_reg2bin((beg),(end),14,5) - -#include "../htslib/sam.h" - -enum cigar_op { - BAM_CMATCH_=BAM_CMATCH, - BAM_CINS_=BAM_CINS, - BAM_CDEL_=BAM_CDEL, - BAM_CREF_SKIP_=BAM_CREF_SKIP, - BAM_CSOFT_CLIP_=BAM_CSOFT_CLIP, - BAM_CHARD_CLIP_=BAM_CHARD_CLIP, - BAM_CPAD_=BAM_CPAD, - BAM_CBASE_MATCH=BAM_CEQUAL, - BAM_CBASE_MISMATCH=BAM_CDIFF -}; - -typedef bam1_t bam_seq_t; - -#endif /* CRAM_SAMTOOLS_H */ diff --git a/src/htslib-1.19.1/cram/cram_stats.h b/src/htslib-1.19.1/cram/cram_stats.h deleted file mode 100644 index 5f8cfec..0000000 --- a/src/htslib-1.19.1/cram/cram_stats.h +++ /dev/null @@ -1,59 +0,0 @@ -/* -Copyright (c) 2012-2013, 2018 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef CRAM_STATS_H -#define CRAM_STATS_H - -#ifdef __cplusplus -extern "C" { -#endif - -cram_stats *cram_stats_create(void); -int cram_stats_add(cram_stats *st, int64_t val); -void cram_stats_del(cram_stats *st, int64_t val); -void cram_stats_dump(cram_stats *st); -void cram_stats_free(cram_stats *st); - -/* - * Computes entropy from integer frequencies for various encoding methods and - * picks the best encoding. - * - * FIXME: we could reuse some of the code here for the actual encoding - * parameters too. Eg the best 'k' for SUBEXP or the code lengths for huffman. - * - * Returns the best codec to use. - */ -enum cram_encoding cram_stats_encoding(cram_fd *fd, cram_stats *st); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.19.1/cram/mFILE.c b/src/htslib-1.19.1/cram/mFILE.c deleted file mode 100644 index 3ecdca3..0000000 --- a/src/htslib-1.19.1/cram/mFILE.c +++ /dev/null @@ -1,668 +0,0 @@ -/* -Copyright (c) 2005-2006, 2008-2009, 2013, 2015, 2017-2019 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../htslib/hts_log.h" -#include "os.h" -#include "mFILE.h" - -#ifdef HAVE_MMAP -#include -#endif - -/* - * This file contains memory-based versions of the most commonly used - * (by io_lib) stdio functions. - * - * Actual file IO takes place either on opening or closing an mFILE. - * - * Coupled to this are a bunch of rather scary macros which can be obtained - * by including stdio_hack.h. It is recommended though that you use mFILE.h - * instead and replace fopen with mfopen (etc). This is more or less - * mandatory if you wish to use both FILE and mFILE structs in a single file. - */ - -static mFILE *m_channel[3]; /* stdin, stdout and stderr fakes */ - -/* - * Reads the entirety of fp into memory. If 'fn' exists it is the filename - * associated with fp. This will be used for more optimal reading (via a - * stat to identify the size and a single read). Otherwise we use successive - * reads until EOF. - * - * Returns a malloced buffer on success of length *size - * NULL on failure - */ -static char *mfload(FILE *fp, const char *fn, size_t *size, int binary) { - struct stat sb; - char *data = NULL; - size_t allocated = 0, used = 0; - int bufsize = 8192; - -#ifdef _WIN32 - if (binary) - _setmode(_fileno(fp), _O_BINARY); - else - _setmode(_fileno(fp), _O_TEXT); -#endif - - if (fn && -1 != stat(fn, &sb)) { - data = malloc(allocated = sb.st_size); - if (!data) - return NULL; - bufsize = sb.st_size; - } else { - fn = NULL; - } - - do { - size_t len; - if (used + bufsize > allocated) { - allocated += bufsize; - char *datan = realloc(data, allocated); - if (datan) { - data = datan; - } else { - free(data); - return NULL; - } - } - len = fread(data + used, 1, allocated - used, fp); - if (len > 0) - used += len; - } while (!feof(fp) && (fn == NULL || used < sb.st_size)); - - *size = used; - - return data; -} - - -#ifdef HAVE_MMAP -/* - * mmaps in the file, but only for reading currently. - * - * Returns 0 on success - * -1 on failure - */ -int mfmmap(mFILE *mf, FILE *fp, const char *fn) { - struct stat sb; - - if (stat(fn, &sb) != 0) - return -1; - - mf->size = sb.st_size; - mf->data = mmap(NULL, mf->size, PROT_READ, MAP_SHARED, - fileno(fp), 0); - - if (!mf->data || mf->data == (void *)-1) - return -1; - - mf->alloced = 0; - return 0; -} -#endif - - -/* - * Creates and returns m_channel[0]. - * We initialise this on the first attempted read, which then slurps in - * all of stdin until EOF is met. - */ -mFILE *mstdin(void) { - if (m_channel[0]) - return m_channel[0]; - - m_channel[0] = mfcreate(NULL, 0); - if (NULL == m_channel[0]) return NULL; - m_channel[0]->fp = stdin; - return m_channel[0]; -} - -static void init_mstdin(void) { - static int done_stdin = 0; - if (done_stdin) - return; - - m_channel[0]->data = mfload(stdin, NULL, &m_channel[0]->size, 1); - m_channel[0]->mode = MF_READ; - done_stdin = 1; -} - -/* - * Creates and returns m_channel[1]. This is the fake for stdout. It starts as - * an empty buffer which is physically written out only when mfflush or - * mfclose are called. - */ -mFILE *mstdout(void) { - if (m_channel[1]) - return m_channel[1]; - - m_channel[1] = mfcreate(NULL, 0); - if (NULL == m_channel[1]) return NULL; - m_channel[1]->fp = stdout; - m_channel[1]->mode = MF_WRITE; - return m_channel[1]; -} - -/* - * Stderr as an mFILE. - * The code handles stderr by returning m_channel[2], but also checking - * for stderr in fprintf (the common usage of it) to auto-flush. - */ -mFILE *mstderr(void) { - if (m_channel[2]) - return m_channel[2]; - - m_channel[2] = mfcreate(NULL, 0); - if (NULL == m_channel[2]) return NULL; - m_channel[2]->fp = stderr; - m_channel[2]->mode = MF_WRITE; - return m_channel[2]; -} - - -/* - * For creating existing mFILE pointers directly from memory buffers. - */ -mFILE *mfcreate(char *data, int size) { - mFILE *mf = (mFILE *)malloc(sizeof(*mf)); - if (NULL == mf) return NULL; - mf->fp = NULL; - mf->data = data; - mf->alloced = size; - mf->size = size; - mf->eof = 0; - mf->offset = 0; - mf->flush_pos = 0; - mf->mode = MF_READ | MF_WRITE; - return mf; -} - -/* - * Recreate an existing mFILE to house new data/size. - * It also rewinds the file. - */ -void mfrecreate(mFILE *mf, char *data, int size) { - if (mf->data) - free(mf->data); - mf->data = data; - mf->size = size; - mf->alloced = size; - mf->eof = 0; - mf->offset = 0; - mf->flush_pos = 0; -} - - -/* - * Creates a new mFILE to contain the contents of the FILE pointer. - * This mFILE is purely for in-memory operations and has no links to the - * original FILE* it came from. It also doesn't close the FILE pointer. - * Consider using mfreopen() is you need different behaviour. - * - * Returns mFILE * on success - * NULL on failure. - */ -mFILE *mfcreate_from(const char *path, const char *mode_str, FILE *fp) { - mFILE *mf; - - /* Open using mfreopen() */ - if (NULL == (mf = mfreopen(path, mode_str, fp))) - return NULL; - - /* Disassociate from the input stream */ - mf->fp = NULL; - - return mf; -} - -/* - * Converts a FILE * to an mFILE *. - * Use this for wrapper functions to turn external prototypes requiring - * FILE * as an argument into internal code using mFILE *. - */ -mFILE *mfreopen(const char *path, const char *mode_str, FILE *fp) { - mFILE *mf; - int r = 0, w = 0, a = 0, b = 0, x = 0, mode = 0; - - /* Parse mode: - * r = read file contents (if truncated => don't read) - * w = write on close - * a = position at end of buffer - * x = position at same location as the original fp, don't seek on flush - * + = for update (read and write) - * m = mmap (read only) - */ - if (strchr(mode_str, 'r')) - r = 1, mode |= MF_READ; - if (strchr(mode_str, 'w')) - w = 1, mode |= MF_WRITE | MF_TRUNC; - if (strchr(mode_str, 'a')) - w = a = 1, mode |= MF_WRITE | MF_APPEND; - if (strchr(mode_str, 'b')) - b = 1, mode |= MF_BINARY; - if (strchr(mode_str, 'x')) - x = 1; - if (strchr(mode_str, '+')) { - w = 1, mode |= MF_READ | MF_WRITE; - if (a) - r = 1; - } -#ifdef HAVE_MMAP - if (strchr(mode_str, 'm')) - if (!w) mode |= MF_MMAP; -#endif - - if (r) { - mf = mfcreate(NULL, 0); - if (NULL == mf) return NULL; - if (!(mode & MF_TRUNC)) { -#ifdef HAVE_MMAP - if (mode & MF_MMAP) { - if (mfmmap(mf, fp, path) == -1) { - mf->data = NULL; - mode &= ~MF_MMAP; - } - } -#endif - if (!mf->data) { - mf->data = mfload(fp, path, &mf->size, b); - if (!mf->data) { - free(mf); - return NULL; - } - mf->alloced = mf->size; - if (!a) - fseek(fp, 0, SEEK_SET); - } - } - } else if (w) { - /* Write - initialise the data structures */ - mf = mfcreate(NULL, 0); - if (NULL == mf) return NULL; - } else { - hts_log_error("Must specify either r, w or a for mode"); - return NULL; - } - mf->fp = fp; - mf->mode = mode; - - if (x) { - mf->mode |= MF_MODEX; - } - - if (a) { - mf->flush_pos = mf->size; - fseek(fp, 0, SEEK_END); - } - - return mf; -} - -/* - * Opens a file. If we have read access (r or a+) then it loads the entire - * file into memory. If We have write access then the pathname is stored. - * We do not actually write until an mfclose, which then checks this pathname. - */ -mFILE *mfopen(const char *path, const char *mode) { - FILE *fp; - - if (NULL == (fp = fopen(path, mode))) - return NULL; - return mfreopen(path, mode, fp); -} - -/* - * Closes an mFILE. If the filename is known (implying write access) then this - * also writes the data to disk. - * - * Stdout is handled by calling mfflush which writes to stdout if appropriate. - */ -int mfclose(mFILE *mf) { - if (!mf) - return -1; - - mfflush(mf); - -#ifdef HAVE_MMAP - if ((mf->mode & MF_MMAP) && mf->data) { - /* Mmaped */ - munmap(mf->data, mf->size); - mf->data = NULL; - } -#endif - - if (mf->fp) - fclose(mf->fp); - - mfdestroy(mf); - - return 0; -} - -/* - * Closes the file pointer contained within the mFILE without destroying - * the in-memory data. - * - * Attempting to do this on an mmaped buffer is an error. - */ -int mfdetach(mFILE *mf) { - if (!mf) - return -1; - - mfflush(mf); - if (mf->mode & MF_MMAP) - return -1; - - if (mf->fp) { - fclose(mf->fp); - mf->fp = NULL; - } - - return 0; -} - -/* - * Destroys an mFILE structure but does not flush or close it - */ -int mfdestroy(mFILE *mf) { - if (!mf) - return -1; - - if (mf->data) - free(mf->data); - free(mf); - - return 0; -} - -/* - * Steals that data out of an mFILE. The mFILE itself will be closed. - * It is up to the caller to free the stolen buffer. If size_out is - * not NULL, mf->size will be stored in it. - * This is more-or-less the opposite of mfcreate(). - * - * Note, we cannot steal the allocated buffer from an mmaped mFILE. - */ - -void *mfsteal(mFILE *mf, size_t *size_out) { - void *data; - - if (!mf) return NULL; - - data = mf->data; - - if (NULL != size_out) *size_out = mf->size; - - if (mfdetach(mf) != 0) - return NULL; - - mf->data = NULL; - mfdestroy(mf); - - return data; -} - -/* - * Seek/tell functions. Nothing more than updating and reporting an - * in-memory index. NB we can seek on stdin or stdout even provided we - * haven't been flushing. - */ -int mfseek(mFILE *mf, long offset, int whence) { - switch (whence) { - case SEEK_SET: - mf->offset = offset; - break; - case SEEK_CUR: - mf->offset += offset; - break; - case SEEK_END: - mf->offset = mf->size + offset; - break; - default: - errno = EINVAL; - return -1; - } - - mf->eof = 0; - return 0; -} - -long mftell(mFILE *mf) { - return mf->offset; -} - -void mrewind(mFILE *mf) { - mf->offset = 0; - mf->eof = 0; -} - -/* - * mftruncate is not directly a translation of ftruncate as the latter - * takes a file descriptor instead of a FILE *. It performs the analogous - * role though. - * - * If offset is -1 then the file is truncated to be the current file - * offset. - */ -void mftruncate(mFILE *mf, long offset) { - mf->size = offset != -1 ? offset : mf->offset; - if (mf->offset > mf->size) - mf->offset = mf->size; -} - -int mfeof(mFILE *mf) { - return mf->eof; -} - -/* - * mFILE read/write functions. Basically these turn fread/fwrite syntax - * into memcpy statements, with appropriate memory handling for writing. - */ -size_t mfread(void *ptr, size_t size, size_t nmemb, mFILE *mf) { - size_t len; - char *cptr = (char *)ptr; - - if (mf == m_channel[0]) init_mstdin(); - - if (mf->size <= mf->offset) - return 0; - - len = size * nmemb <= mf->size - mf->offset - ? size * nmemb - : mf->size - mf->offset; - if (!size) - return 0; - - memcpy(cptr, &mf->data[mf->offset], len); - mf->offset += len; - - if (len != size * nmemb) { - mf->eof = 1; - } - - return len / size; -} - -size_t mfwrite(void *ptr, size_t size, size_t nmemb, mFILE *mf) { - if (!(mf->mode & MF_WRITE)) - return 0; - - /* Append mode => forced all writes to end of file */ - if (mf->mode & MF_APPEND) - mf->offset = mf->size; - - /* Make sure we have enough room */ - while (size * nmemb + mf->offset > mf->alloced) { - size_t new_alloced = mf->alloced ? mf->alloced * 2 : 1024; - void * new_data = realloc(mf->data, new_alloced); - if (NULL == new_data) return 0; - mf->alloced = new_alloced; - mf->data = new_data; - } - - /* Record where we need to reflush from */ - if (mf->offset < mf->flush_pos) - mf->flush_pos = mf->offset; - - /* Copy the data over */ - memcpy(&mf->data[mf->offset], ptr, size * nmemb); - mf->offset += size * nmemb; - if (mf->size < mf->offset) - mf->size = mf->offset; - - return nmemb; -} - -int mfgetc(mFILE *mf) { - if (mf == m_channel[0]) init_mstdin(); - if (mf->offset < mf->size) { - return (unsigned char)mf->data[mf->offset++]; - } - - mf->eof = 1; - return -1; -} - -int mungetc(int c, mFILE *mf) { - if (mf->offset > 0) { - mf->data[--mf->offset] = c; - return c; - } - - mf->eof = 1; - return -1; -} - -char *mfgets(char *s, int size, mFILE *mf) { - int i; - - if (mf == m_channel[0]) init_mstdin(); - *s = 0; - for (i = 0; i < size-1;) { - if (mf->offset < mf->size) { - s[i] = mf->data[mf->offset++]; - if (s[i++] == '\n') - break; - } else { - mf->eof = 1; - break; - } - } - - s[i] = 0; - return i ? s : NULL; -} - -/* - * Flushes an mFILE. If this is a real open of a file in write mode then - * mFILE->fp will be set. We then write out any new data in mFILE since the - * last flush. We cannot tell what may have been modified as we don't keep - * track of that, so we typically rewrite out the entire file contents between - * the last flush_pos and the end of file. - * - * For stderr/stdout we also reset the offsets so we cannot modify things - * we've already output. - */ -int mfflush(mFILE *mf) { - if (!mf->fp) - return 0; - - /* FIXME: only do this when opened in write mode */ - if (mf == m_channel[1] || mf == m_channel[2]) { - if (mf->flush_pos < mf->size) { - size_t bytes = mf->size - mf->flush_pos; - if (fwrite(mf->data + mf->flush_pos, 1, bytes, mf->fp) < bytes) - return -1; - if (0 != fflush(mf->fp)) - return -1; - } - - /* Stdout & stderr are non-seekable streams so throw away the data */ - mf->offset = mf->size = mf->flush_pos = 0; - } - - /* only flush when opened in write mode */ - if (mf->mode & MF_WRITE) { - if (mf->flush_pos < mf->size) { - size_t bytes = mf->size - mf->flush_pos; - if (!(mf->mode & MF_MODEX)) { - fseek(mf->fp, mf->flush_pos, SEEK_SET); - } - if (fwrite(mf->data + mf->flush_pos, 1, bytes, mf->fp) < bytes) - return -1; - if (0 != fflush(mf->fp)) - return -1; - } - if (ftell(mf->fp) != -1 && - ftruncate(fileno(mf->fp), ftell(mf->fp)) == -1) - return -1; - mf->flush_pos = mf->size; - } - - return 0; -} - -/* - * Converts an mFILE from binary to ascii mode by replacing all - * cr-nl with nl. - * - * Primarily used on windows when we've uncompressed a binary file which - * happens to be a text file (eg Experiment File). Previously we would have - * seeked back to the start and used _setmode(fileno(fp), _O_TEXT). - * - * Side effect: resets offset and flush_pos back to the start. - */ -void mfascii(mFILE *mf) { - size_t p1, p2; - - for (p1 = p2 = 1; p1 < mf->size; p1++, p2++) { - if (mf->data[p1] == '\n' && mf->data[p1-1] == '\r') { - p2--; /* delete the \r */ - } - mf->data[p2] = mf->data[p1]; - } - mf->size = p2; - - mf->offset = mf->flush_pos = 0; -} diff --git a/src/htslib-1.19.1/cram/mFILE.h b/src/htslib-1.19.1/cram/mFILE.h deleted file mode 100644 index ca7062c..0000000 --- a/src/htslib-1.19.1/cram/mFILE.h +++ /dev/null @@ -1,93 +0,0 @@ -/* -Copyright (c) 2005-2006, 2008-2009, 2013, 2018 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef CRAM_MFILE_H -#define CRAM_MFILE_H - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - FILE *fp; - char *data; - size_t alloced; - int eof; - int mode; /* open mode in MF_?? define bit pattern */ - size_t size; - size_t offset; - size_t flush_pos; -} mFILE; - -// Work around a clash with winuser.h -#ifdef MF_APPEND -# undef MF_APPEND -#endif - -#define MF_READ 1 -#define MF_WRITE 2 -#define MF_APPEND 4 -#define MF_BINARY 8 -#define MF_TRUNC 16 -#define MF_MODEX 32 -#define MF_MMAP 64 - -mFILE *mfreopen(const char *path, const char *mode, FILE *fp); -mFILE *mfopen(const char *path, const char *mode); -int mfdetach(mFILE *mf); -int mfclose(mFILE *mf); -int mfdestroy(mFILE *mf); -int mfseek(mFILE *mf, long offset, int whence); -long mftell(mFILE *mf); -void mrewind(mFILE *mf); -void mftruncate(mFILE *mf, long offset); -int mfeof(mFILE *mf); -size_t mfread(void *ptr, size_t size, size_t nmemb, mFILE *mf); -size_t mfwrite(void *ptr, size_t size, size_t nmemb, mFILE *mf); -int mfgetc(mFILE *mf); -int mungetc(int c, mFILE *mf); -mFILE *mfcreate(char *data, int size); -mFILE *mfcreate_from(const char *path, const char *mode_str, FILE *fp); -void mfrecreate(mFILE *mf, char *data, int size); -void *mfsteal(mFILE *mf, size_t *size_out); -char *mfgets(char *s, int size, mFILE *mf); -int mfflush(mFILE *mf); -mFILE *mstdin(void); -mFILE *mstdout(void); -mFILE *mstderr(void); -void mfascii(mFILE *mf); - -#ifdef __cplusplus -} -#endif - -#endif /* CRAM_MFILE_H */ diff --git a/src/htslib-1.19.1/cram/misc.h b/src/htslib-1.19.1/cram/misc.h deleted file mode 100644 index 312dc7d..0000000 --- a/src/htslib-1.19.1/cram/misc.h +++ /dev/null @@ -1,77 +0,0 @@ -/* -Copyright (c) 1994-1997, 2001-2002 MEDICAL RESEARCH COUNCIL -All rights reserved - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1 Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2 Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3 Neither the name of the MEDICAL RESEARCH COUNCIL, THE LABORATORY OF -MOLECULAR BIOLOGY nor the names of its contributors may be used to endorse or -promote products derived from this software without specific prior written -permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* -Copyright (c) 2003-2013, 2018-2019 Genome Research Ltd. - -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef CRAM_MISC_H -#define CRAM_MISC_H - -#ifdef __cplusplus -extern "C" { -#endif - -#define MIN(A,B) ( ( (A) < (B) ) ? (A) : (B) ) -#define MAX(A,B) ( ( (A) > (B) ) ? (A) : (B) ) - -#ifdef __cplusplus -} -#endif - -#endif /* CRAM_MISC_H */ diff --git a/src/htslib-1.19.1/cram/open_trace_file.c b/src/htslib-1.19.1/cram/open_trace_file.c deleted file mode 100644 index 4d617b7..0000000 --- a/src/htslib-1.19.1/cram/open_trace_file.c +++ /dev/null @@ -1,438 +0,0 @@ -/* -Author: James Bonfield - -Copyright (c) 2000-2001 MEDICAL RESEARCH COUNCIL -All rights reserved - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the name of the MEDICAL RESEARCH COUNCIL, THE LABORATORY OF -MOLECULAR BIOLOGY nor the names of its contributors may be used to endorse or -promote products derived from this software without specific prior written -permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* -Copyright (c) 2008, 2009, 2013, 2014-2015, 2018-2020 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "os.h" -#ifndef PATH_MAX -# define PATH_MAX 1024 -#endif - -#include "open_trace_file.h" -#include "misc.h" -#include "../htslib/hfile.h" -#include "../htslib/hts_log.h" -#include "../htslib/hts.h" - -/* - * Returns whether the path refers to a regular file. - */ -static int is_file(char *fn) { - struct stat buf; - if ( stat(fn,&buf) ) return 0; - return S_ISREG(buf.st_mode); -} - -/* - * Tokenises the search path splitting on colons (unix) or semicolons - * (windows). - * We also explicitly add a "./" to the end of the search path - * - * Returns: A new search path with items separated by nul chars. Two nul - * chars in a row represent the end of the tokenised path. - * Returns NULL for a failure. - * - * The returned data has been malloced. It is up to the caller to free this - * memory. - */ -char *tokenise_search_path(const char *searchpath) { - char *newsearch; - unsigned int i, j; - size_t len; - char path_sep = HTS_PATH_SEPARATOR_CHAR; - - if (!searchpath) - searchpath=""; - - newsearch = (char *)malloc((len = strlen(searchpath))+5); - if (!newsearch) - return NULL; - - for (i = 0, j = 0; i < len; i++) { - /* "::" => ":". Used for escaping colons in http://foo */ - if (i < len-1 && searchpath[i] == ':' && searchpath[i+1] == ':') { - newsearch[j++] = ':'; - i++; - continue; - } - - /* Handle http:// and ftp:// too without :: */ - if (path_sep == ':') { - if ((i == 0 || (i > 0 && searchpath[i-1] == ':')) && - (!strncmp(&searchpath[i], "http:", 5) || - !strncmp(&searchpath[i], "https:", 6) || - !strncmp(&searchpath[i], "ftp:", 4) || - !strncmp(&searchpath[i], "|http:", 6) || - !strncmp(&searchpath[i], "|https:", 7) || - !strncmp(&searchpath[i], "|ftp:", 5) || - !strncmp(&searchpath[i], "URL=http:", 9) || - !strncmp(&searchpath[i], "URL=https:",10)|| - !strncmp(&searchpath[i], "URL=ftp:", 8))) { - do { - newsearch[j++] = searchpath[i]; - } while (i 0) { - if (mfwrite(buf, len, 1, mf) <= 0) { - hclose_abruptly(hf); - goto fail; - } - } - if (hclose(hf) < 0 || len < 0) { - hts_log_warning("Failed to read reference \"%s\": %s", path, strerror(errno)); - goto fail; - } - - free(path); - mrewind(mf); - return mf; - - fail: - mfdestroy(mf); - free(path); - return NULL; -} - -/* - * Takes a dirname possibly including % rules and appends the filename - * to it. - * - * Returns expanded pathname or NULL for malloc failure. - */ -static char *expand_path(const char *file, char *dirname, int max_s_digits) { - size_t len = strlen(dirname); - size_t lenf = strlen(file); - char *cp, *path; - - path = malloc(len+lenf+2); // worst expansion DIR/FILE - if (!path) { - hts_log_error("Out of memory"); - return NULL; - } - - if (dirname[len-1] == '/') - len--; - - /* Special case for "./" or absolute filenames */ - if (*file == '/' || (len==1 && *dirname == '.')) { - memcpy(path, file, lenf + 1); - } else { - /* Handle %[0-9]*s expansions, if required */ - char *path_end = path; - *path = 0; - while ((cp = strchr(dirname, '%'))) { - char *endp; - long l = strtol(cp+1, &endp, 10); - if (*endp != 's' || endp - cp - 1 > max_s_digits) { - strncpy(path_end, dirname, (endp+1)-dirname); - path_end += (endp+1)-dirname; - dirname = endp+1; - continue; - } - - strncpy(path_end, dirname, cp-dirname); - path_end += cp-dirname; - if (l) { - strncpy(path_end, file, l); - path_end += MIN(strlen(file), l); - file += MIN(strlen(file), l); - } else { - strcpy(path_end, file); - path_end += strlen(file); - file += strlen(file); - } - len -= (endp+1) - dirname; - dirname = endp+1; - } - strncpy(path_end, dirname, len); - path_end += MIN(strlen(dirname), len); - *path_end = 0; - if (*file) { - *path_end++ = '/'; - strcpy(path_end, file); - } - } - - //fprintf(stderr, "*PATH=\"%s\"\n", path); - return path; -} - -/* - * Searches for file in the directory 'dirname'. If it finds it, it opens - * it. This also searches for compressed versions of the file in dirname - * too. - * - * Returns mFILE pointer if found - * NULL if not - */ -static mFILE *find_file_dir(const char *file, char *dirname) { - char *path; - mFILE *mf = NULL; - - path = expand_path(file, dirname, INT_MAX); - if (!path) - return NULL; - - if (is_file(path)) - mf = mfopen(path, "rbm"); - - free(path); - return mf; -} - -/* - * ------------------------------------------------------------------------ - * Public functions below. - */ - -/* - * Opens a trace file named 'file'. This is initially looked for as a - * pathname relative to a file named "relative_to". This may (for - * example) be the name of an experiment file referencing the trace - * file. In this case by passing relative_to as the experiment file - * filename the trace file will be picked up in the same directory as - * the experiment file. Relative_to may be supplied as NULL. - * - * 'file' is looked for at relative_to, then the current directory, and then - * all of the locations listed in 'path' (which is a colon separated list). - * If 'path' is NULL it uses the RAWDATA environment variable instead. - * - * Returns a mFILE pointer when found. - * NULL otherwise. - */ -mFILE *open_path_mfile(const char *file, char *path, char *relative_to) { - char *newsearch; - char *ele; - mFILE *fp; - - /* Use path first */ - if (!path) - path = getenv("RAWDATA"); - if (NULL == (newsearch = tokenise_search_path(path))) - return NULL; - - /* - * Step through the search path testing out each component. - * We now look through each path element treating some prefixes as - * special, otherwise we treat the element as a directory. - */ - for (ele = newsearch; *ele; ele += strlen(ele)+1) { - char *ele2; - - /* - * '|' prefixing a path component indicates that we do not - * wish to perform the compression extension searching in that - * location. - * - * NB: this has been removed from the htslib implementation. - */ - if (*ele == '|') { - ele2 = ele+1; - } else { - ele2 = ele; - } - - if (0 == strncmp(ele2, "URL=", 4)) { - if ((fp = find_file_url(file, ele2+4))) { - free(newsearch); - return fp; - } - } else if (!strncmp(ele2, "http:", 5) || - !strncmp(ele2, "https:", 6) || - !strncmp(ele2, "ftp:", 4)) { - if ((fp = find_file_url(file, ele2))) { - free(newsearch); - return fp; - } - } else if ((fp = find_file_dir(file, ele2))) { - free(newsearch); - return fp; - } - } - - free(newsearch); - - /* Look in the same location as the incoming 'relative_to' filename */ - if (relative_to) { - char *cp; - char relative_path[PATH_MAX+1]; - strcpy(relative_path, relative_to); - if ((cp = strrchr(relative_path, '/'))) - *cp = 0; - if ((fp = find_file_dir(file, relative_path))) - return fp; - } - - return NULL; -} - - -/* - * As per open_path_mfile, but searching only for local filenames. - * This is useful as we may avoid doing a full mfopen and loading - * the entire file into memory. - * - * Returns the expanded pathname if found. - * NULL if not - */ -char *find_path(const char *file, const char *path) { - char *newsearch; - char *ele; - char *outpath = NULL; - - /* Use path first */ - if (!path) - path = getenv("RAWDATA"); - if (NULL == (newsearch = tokenise_search_path(path))) - return NULL; - - for (ele = newsearch; *ele; ele += strlen(ele)+1) { - char *ele2 = (*ele == '|') ? ele+1 : ele; - - if (!strncmp(ele2, "URL=", 4) || - !strncmp(ele2, "http:", 5) || - !strncmp(ele2, "https:", 6) || - !strncmp(ele2, "ftp:", 4)) { - continue; - } else { - outpath = expand_path(file, ele2, INT_MAX); - if (is_file(outpath)) { - free(newsearch); - return outpath; - } else { - free(outpath); - } - } - } - - free(newsearch); - - return NULL; -} diff --git a/src/htslib-1.19.1/cram/open_trace_file.h b/src/htslib-1.19.1/cram/open_trace_file.h deleted file mode 100644 index 4586098..0000000 --- a/src/htslib-1.19.1/cram/open_trace_file.h +++ /dev/null @@ -1,125 +0,0 @@ -/* -Author: James Bonfield - -Copyright (c) 2000-2001 MEDICAL RESEARCH COUNCIL -All rights reserved - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - . Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - . Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - . Neither the name of the MEDICAL RESEARCH COUNCIL, THE LABORATORY OF -MOLECULAR BIOLOGY nor the names of its contributors may be used to endorse or -promote products derived from this software without specific prior written -permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* -Copyright (c) 2008, 2009, 2013, 2018 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef OPEN_TRACE_FILE_H -#define OPEN_TRACE_FILE_H - -#include "mFILE.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Tokenises the search path splitting on colons (unix) or semicolons - * (windows). - * We also explicitly add a "./" to the end of the search path - * - * Returns: A new search path with items separated by nul chars. Two nul - * chars in a row represent the end of the tokenised path. - * Returns NULL for a failure. - * - * The returned data has been malloced. It is up to the caller to free this - * memory. - */ -char *tokenise_search_path(const char *searchpath); - -/* - * Opens a trace file named 'file'. This is initially looked for as a - * pathname relative to a file named "relative_to". This may (for - * example) be the name of an experiment file referencing the trace - * file. In this case by passing relative_to as the experiment file - * filename the trace file will be picked up in the same directory as - * the experiment file. Relative_to may be supplied as NULL. - * - * 'file' is looked for at relative_to, then the current directory, and then - * all of the locations listed in 'path' (which is a colon separated list). - * If 'path' is NULL it uses the RAWDATA environment variable instead. - * - * Returns a mFILE pointer when found. - * NULL otherwise. - */ -mFILE *open_path_mfile(const char *file, char *path, char *relative_to); - -/* - * Returns a mFILE containing the entire contents of the url; - * NULL on failure. - */ -mFILE *find_file_url(const char *file, char *url); - - -/* - * As per open_path_mfile, but searching only for local filenames. - * This is useful as we may avoid doing a full mfopen and loading - * the entire file into memory. - * - * Returns the expanded pathname if found. - * NULL if not - */ -char *find_path(const char *file, const char *path); - -#ifdef __cplusplus -} -#endif - -#endif /* OPEN_TRACE_FILE_H */ diff --git a/src/htslib-1.19.1/cram/os.h b/src/htslib-1.19.1/cram/os.h deleted file mode 100644 index 1f39887..0000000 --- a/src/htslib-1.19.1/cram/os.h +++ /dev/null @@ -1,205 +0,0 @@ -/* -Copyright (c) 1993, 1995-2002 MEDICAL RESEARCH COUNCIL -All rights reserved - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1 Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2 Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3 Neither the name of the MEDICAL RESEARCH COUNCIL, THE LABORATORY OF -MOLECULAR BIOLOGY nor the names of its contributors may be used to endorse or -promote products derived from this software without specific prior written -permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* -Copyright (c) 2004, 2006, 2009-2011, 2013, 2017-2018 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* - * File: os.h - * - * Author: - * MRC Laboratory of Molecular Biology - * Hills Road - * Cambridge CB2 2QH - * United Kingdom - * - * Description: operating system specific type definitions - * - */ - -#ifndef CRAM_OS_H -#define CRAM_OS_H - -#include -#include - -#include "../htslib/hts_endian.h" - -#ifdef __cplusplus -extern "C" { -#endif - - -/*----------------------------------------------------------------------------- - * Byte swapping macros - */ - -/* - * Our new swap runs at the same speed on Ultrix, but substantially faster - * (300% for swap_int4, ~50% for swap_int2) on an Alpha (due to the lack of - * decent 'char' support). - * - * They also have the ability to swap in situ (src == dst). Newer code now - * relies on this so don't change back! - */ -#define iswap_int8(x) \ - (((x & 0x00000000000000ffLL) << 56) + \ - ((x & 0x000000000000ff00LL) << 40) + \ - ((x & 0x0000000000ff0000LL) << 24) + \ - ((x & 0x00000000ff000000LL) << 8) + \ - ((x & 0x000000ff00000000LL) >> 8) + \ - ((x & 0x0000ff0000000000LL) >> 24) + \ - ((x & 0x00ff000000000000LL) >> 40) + \ - ((x & 0xff00000000000000LL) >> 56)) - -#define iswap_int4(x) \ - (((x & 0x000000ff) << 24) + \ - ((x & 0x0000ff00) << 8) + \ - ((x & 0x00ff0000) >> 8) + \ - ((x & 0xff000000) >> 24)) - -#define iswap_int2(x) \ - (((x & 0x00ff) << 8) + \ - ((x & 0xff00) >> 8)) - -/* - * Linux systems may use byteswap.h to get assembly versions of byte-swap - * on intel systems. This can be as trivial as the bswap opcode, which works - * out at over 2-times faster than iswap_int4 above. - */ -#if 0 -#if defined(__linux__) -# include -# undef iswap_int8 -# undef iswap_int4 -# undef iswap_int2 -# define iswap_int8 bswap_64 -# define iswap_int4 bswap_32 -# define iswap_int2 bswap_16 -#endif -#endif - - -/* - * Macros to specify that data read in is of a particular endianness. - * The macros here swap to the appropriate order for the particular machine - * running the macro and return the new answer. These may also be used when - * writing to a file to specify that we wish to write in (eg) big endian - * format. - * - * This leads to efficient code as most of the time these macros are - * trivial. - */ -#if defined(HTS_BIG_ENDIAN) -#define le_int4(x) iswap_int4((x)) -#define le_int2(x) iswap_int2((x)) -#elif defined(HTS_LITTLE_ENDIAN) -#define le_int4(x) (x) -#define le_int2(x) (x) -#else -static inline uint32_t le_int4(uint32_t x) { - return le_to_u32((uint8_t *) &x); -} -static inline uint16_t le_int2(uint16_t x) { - return le_to_u16((uint8_t *) &x); -} -#endif - -/*----------------------------------------------------------------------------- - * Operating system specifics. - * These ought to be done by autoconf, but are legacy code. - */ -/* - * SunOS 4.x - * Even though we use the ANSI gcc, we make use the the standard SunOS 4.x - * libraries and include files, which are non-ansi - */ -#if defined(__sun__) && !defined(__svr4__) -#define SEEK_SET 0 -#define SEEK_CUR 1 -#define SEEK_END 2 -#endif - -/* - * Microsoft Visual C++ - * Windows - */ -#if defined(_MSC_VER) -#define popen _popen -#define pclose _pclose -#define ftruncate(fd,len) _chsize(fd,len) -#endif - - -/* - * Microsoft Windows running MinGW - */ -#if defined(__MINGW32__) -#include -#define mkdir(filename,mode) mkdir((filename)) -#define sysconf(x) 512 -#ifndef ftruncate -# define ftruncate(fd,len) _chsize(fd,len) -#endif -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* CRAM_OS_H */ diff --git a/src/htslib-1.19.1/cram/pooled_alloc.c b/src/htslib-1.19.1/cram/pooled_alloc.c deleted file mode 100644 index 4601a7f..0000000 --- a/src/htslib-1.19.1/cram/pooled_alloc.c +++ /dev/null @@ -1,205 +0,0 @@ -/* -Copyright (c) 2009, 2013, 2015, 2018-2019 Genome Research Ltd. -Author: Rob Davies - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include - -#include "pooled_alloc.h" -#include "misc.h" - -//#define DISABLE_POOLED_ALLOC -//#define TEST_MAIN - -#define PSIZE 1024*1024 - -// credit to http://graphics.stanford.edu/~seander/bithacks.html -static int next_power_2(unsigned int v) { - v--; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - v++; - - return v; -} - -/* - * Creates a pool. - * Pool allocations are approx minimum of 1024*dsize or PSIZE. - * (Assumes we're not trying to use pools for >= 2Gb or more) - */ -pool_alloc_t *pool_create(size_t dsize) { - pool_alloc_t *p; - - if (NULL == (p = (pool_alloc_t *)malloc(sizeof(*p)))) - return NULL; - - /* Minimum size is a pointer, for free list */ - dsize = (dsize + sizeof(void *) - 1) & ~(sizeof(void *)-1); - if (dsize < sizeof(void *)) - dsize = sizeof(void *); - p->dsize = dsize; - p->psize = MIN(PSIZE, next_power_2(p->dsize*1024)); - - p->npools = 0; - p->pools = NULL; - p->free = NULL; - - return p; -} - -void pool_destroy(pool_alloc_t *p) { - size_t i; - - for (i = 0; i < p->npools; i++) { - free(p->pools[i].pool); - } - free(p->pools); - free(p); -} - -#ifndef DISABLE_POOLED_ALLOC - -static pool_t *new_pool(pool_alloc_t *p) { - size_t n = p->psize / p->dsize; - pool_t *pool; - - pool = realloc(p->pools, (p->npools + 1) * sizeof(*p->pools)); - if (NULL == pool) return NULL; - p->pools = pool; - pool = &p->pools[p->npools]; - - pool->pool = malloc(n * p->dsize); - if (NULL == pool->pool) return NULL; - - pool->used = 0; - - p->npools++; - - return pool; -} - -void *pool_alloc(pool_alloc_t *p) { - pool_t *pool; - void *ret; - - /* Look on free list */ - if (NULL != p->free) { - ret = p->free; - p->free = *((void **)p->free); - return ret; - } - - /* Look for space in the last pool */ - if (p->npools) { - pool = &p->pools[p->npools - 1]; - if (pool->used + p->dsize < p->psize) { - ret = ((char *) pool->pool) + pool->used; - pool->used += p->dsize; - return ret; - } - } - - /* Need a new pool */ - pool = new_pool(p); - if (NULL == pool) return NULL; - - pool->used = p->dsize; - return pool->pool; -} - -void pool_free(pool_alloc_t *p, void *ptr) { - *(void **)ptr = p->free; - p->free = ptr; -} - -#else - -void *pool_alloc(pool_alloc_t *p) { - return malloc(p->dsize); -} - -void pool_free(pool_alloc_t *p, void *ptr) { - free(ptr); -} - -#endif - -#ifdef TEST_MAIN -typedef struct { - int x, y, z; -} xyz; - -#define NP 10000 -int main(void) { - int i; - xyz *item; - xyz **items; - pool_alloc_t *p = pool_create(sizeof(xyz)); - - items = (xyz **)malloc(NP * sizeof(*items)); - - for (i = 0; i < NP; i++) { - item = pool_alloc(p); - item->x = i; - item->y = i+1; - item->z = i+2; - items[i] = item; - } - - for (i = 0; i < NP; i++) { - item = items[i]; - if (i % 3) - pool_free(p, item); - } - - for (i = 0; i < NP; i++) { - item = pool_alloc(p); - item->x = 1000000+i; - item->y = 1000000+i+1; - item->z = 1000000+i+2; - } - - for (i = 0; i < NP; i++) { - item = items[i]; - printf("%d\t%d\t%d\t%d\n", i, item->x, item->y, item->z); - pool_free(p, item); - } - - free(items); - return 0; -} -#endif diff --git a/src/htslib-1.19.1/cram/pooled_alloc.h b/src/htslib-1.19.1/cram/pooled_alloc.h deleted file mode 100644 index bb49d11..0000000 --- a/src/htslib-1.19.1/cram/pooled_alloc.h +++ /dev/null @@ -1,66 +0,0 @@ -/* -Copyright (c) 2009, 2013, 2018 Genome Research Ltd. -Author: Rob Davies - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef POOLED_ALLOC_H -#define POOLED_ALLOC_H - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Implements a pooled block allocator where all items are the same size, - * but we need many of them. - */ -typedef struct { - void *pool; - size_t used; -} pool_t; - -typedef struct { - size_t dsize; - size_t psize; - size_t npools; - pool_t *pools; - void *free; -} pool_alloc_t; - -pool_alloc_t *pool_create(size_t dsize); -void pool_destroy(pool_alloc_t *p); -void *pool_alloc(pool_alloc_t *p); -void pool_free(pool_alloc_t *p, void *ptr); - -#ifdef __cplusplus -} -#endif - -#endif /* POOLED_ALLOC_H */ diff --git a/src/htslib-1.19.1/cram/string_alloc.c b/src/htslib-1.19.1/cram/string_alloc.c deleted file mode 100644 index c339b10..0000000 --- a/src/htslib-1.19.1/cram/string_alloc.c +++ /dev/null @@ -1,162 +0,0 @@ -/* -Copyright (c) 2010, 2013, 2018-2019 Genome Research Ltd. -Author: Andrew Whitwham - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - - -/* - A pooled string allocator intended to cut down on the - memory overhead of many small string allocations. - - Andrew Whitwham, September 2010. -*/ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include - -#include "string_alloc.h" - -#define MIN_STR_SIZE 1024 - - -/* creates the string pool. max_length is the initial size - a single string can be. The max_length can grow as - needed */ - -string_alloc_t *string_pool_create(size_t max_length) { - string_alloc_t *a_str; - - if (NULL == (a_str = (string_alloc_t *)malloc(sizeof(*a_str)))) { - return NULL; - } - - if (max_length < MIN_STR_SIZE) max_length = MIN_STR_SIZE; - - a_str->nstrings = 0; - a_str->max_strings = 0; - a_str->max_length = max_length; - a_str->strings = NULL; - - return a_str; -} - - -/* internal function to do the actual memory allocation */ - -static string_t *new_string_pool(string_alloc_t *a_str) { - string_t *str; - - if (a_str->nstrings == a_str->max_strings) { - size_t new_max = (a_str->max_strings | (a_str->max_strings >> 2)) + 1; - str = realloc(a_str->strings, new_max * sizeof(*a_str->strings)); - - if (NULL == str) return NULL; - - a_str->strings = str; - a_str->max_strings = new_max; - } - - str = &a_str->strings[a_str->nstrings]; - - str->str = malloc(a_str->max_length); - - if (NULL == str->str) return NULL; - - str->used = 0; - a_str->nstrings++; - - return str; -} - - -/* free allocated memory */ - -void string_pool_destroy(string_alloc_t *a_str) { - size_t i; - - for (i = 0; i < a_str->nstrings; i++) { - free(a_str->strings[i].str); - } - - free(a_str->strings); - free(a_str); -} - - -/* allocate space for a string */ - -char *string_alloc(string_alloc_t *a_str, size_t length) { - string_t *str; - char *ret; - - if (length <= 0) return NULL; - - // add to last string pool if we have space - if (a_str->nstrings) { - str = &a_str->strings[a_str->nstrings - 1]; - - if (str->used + length < a_str->max_length) { - ret = str->str + str->used; - str->used += length; - return ret; - } - } - - // increase the max length if needs be - if (length > a_str->max_length) a_str->max_length = length; - - // need a new string pool - str = new_string_pool(a_str); - - if (NULL == str) return NULL; - - str->used = length; - return str->str; -} - - -/* equivalent to strdup */ - -char *string_dup(string_alloc_t *a_str, const char *instr) { - return string_ndup(a_str, instr, strlen(instr)); -} - -char *string_ndup(string_alloc_t *a_str, const char *instr, size_t len) { - char *str = string_alloc(a_str, len + 1); - - if (NULL == str) return NULL; - - memcpy(str, instr, len); - str[len] = 0; - - return str; -} diff --git a/src/htslib-1.19.1/cram/string_alloc.h b/src/htslib-1.19.1/cram/string_alloc.h deleted file mode 100644 index 42ebb0a..0000000 --- a/src/htslib-1.19.1/cram/string_alloc.h +++ /dev/null @@ -1,69 +0,0 @@ -/* -Copyright (c) 2010, 2013, 2018 Genome Research Ltd. -Author: Andrew Whitwham - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef STRING_ALLOC_H -#define STRING_ALLOC_H - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * A pooled string allocator intended to cut down on the - * memory overhead of many small string allocations. - * - * Andrew Whitwham, September 2010. - */ - -typedef struct { - char *str; - size_t used; -} string_t; - -typedef struct { - size_t max_length; - size_t nstrings; - size_t max_strings; - string_t *strings; -} string_alloc_t; - -string_alloc_t *string_pool_create(size_t max_length); -void string_pool_destroy(string_alloc_t *a_str); -char *string_alloc(string_alloc_t *a_str, size_t length); -char *string_dup(string_alloc_t *a_str, const char *instr); -char *string_ndup(string_alloc_t *a_str, const char *instr, size_t len); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.19.1/errmod.c b/src/htslib-1.19.1/errmod.c deleted file mode 100644 index df708e1..0000000 --- a/src/htslib-1.19.1/errmod.c +++ /dev/null @@ -1,208 +0,0 @@ -/* errmod.c -- revised MAQ error model. - - Copyright (C) 2010 Broad Institute. - Copyright (C) 2012, 2013, 2016-2017, 2019 Genome Research Ltd. - - Author: Heng Li - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include "htslib/hts.h" -#include "htslib/ksort.h" -#include "htslib/hts_os.h" // for drand48 - -KSORT_INIT_STATIC_GENERIC(uint16_t) - -struct errmod_t { - double depcorr; - /* table of constants generated for given depcorr and eta */ - double *fk, *beta, *lhet; -}; - -typedef struct { - double fsum[16], bsum[16]; - uint32_t c[16]; -} call_aux_t; - -/* \Gamma(n) = (n-1)! */ -#define lfact(n) lgamma(n+1) - -/* generates a success * trials table of bionomial probability densities (log transformed) */ -static double* logbinomial_table( const int n_size ) -{ - /* prob distribution for binom var is p(k) = {n! \over k! (n-k)! } p^k (1-p)^{n-k} */ - /* this calcs p(k) = {log(n!) - log(k!) - log((n-k)!) */ - int k, n; - double *logbinom = (double*)calloc(n_size * n_size, sizeof(double)); - if (!logbinom) return NULL; - for (n = 1; n < n_size; ++n) { - double lfn = lfact(n); - for (k = 1; k <= n; ++k) - logbinom[n<<8|k] = lfn - lfact(k) - lfact(n-k); - } - return logbinom; -} - -static int cal_coef(errmod_t *em, double depcorr, double eta) -{ - int k, n, q; - double sum, sum1; - double *lC; - - // initialize ->fk - em->fk = (double*)calloc(256, sizeof(double)); - if (!em->fk) return -1; - em->fk[0] = 1.0; - for (n = 1; n < 256; ++n) - em->fk[n] = pow(1. - depcorr, n) * (1.0 - eta) + eta; - - // initialize ->beta - em->beta = (double*)calloc(256 * 256 * 64, sizeof(double)); - if (!em->beta) return -1; - - lC = logbinomial_table( 256 ); - if (!lC) return -1; - - for (q = 1; q < 64; ++q) { - double e = pow(10.0, -q/10.0); - double le = log(e); - double le1 = log(1.0 - e); - for (n = 1; n <= 255; ++n) { - double *beta = em->beta + (q<<16|n<<8); - sum1 = lC[n<<8|n] + n*le; - beta[n] = HUGE_VAL; - for (k = n - 1; k >= 0; --k, sum1 = sum) { - sum = sum1 + log1p(exp(lC[n<<8|k] + k*le + (n-k)*le1 - sum1)); - beta[k] = -10. / M_LN10 * (sum1 - sum); - } - } - } - - // initialize ->lhet - em->lhet = (double*)calloc(256 * 256, sizeof(double)); - if (!em->lhet) { - free(lC); - return -1; - } - for (n = 0; n < 256; ++n) - for (k = 0; k < 256; ++k) - em->lhet[n<<8|k] = lC[n<<8|k] - M_LN2 * n; - free(lC); - return 0; -} - -/** - * Create errmod_t object with obj.depcorr set to depcorr and initialise - */ -errmod_t *errmod_init(double depcorr) -{ - errmod_t *em; - em = (errmod_t*)calloc(1, sizeof(errmod_t)); - if (!em) return NULL; - em->depcorr = depcorr; - cal_coef(em, depcorr, 0.03); - return em; -} - -/** - * Deallocate an errmod_t object - */ -void errmod_destroy(errmod_t *em) -{ - if (em == 0) return; - free(em->lhet); free(em->fk); free(em->beta); - free(em); -} - -// -// em: error model to fit to data -// m: number of alleles across all samples -// n: number of bases observed in sample -// bases[i]: bases observed in pileup [6 bit quality|1 bit strand|4 bit base] -// q[i*m+j]: (Output) phred-scaled likelihood of each genotype (i,j) -int errmod_cal(const errmod_t *em, int n, int m, uint16_t *bases, float *q) -{ - // Aux - // aux.c is total count of each base observed (ignoring strand) - call_aux_t aux; - // Loop variables - int i, j, k; - // The total count of each base observed per strand - int w[32]; - - memset(q, 0, m * m * sizeof(float)); // initialise q to 0 - if (n == 0) return 0; - // This section randomly downsamples to 255 depth so as not to go beyond our precalculated matrix - if (n > 255) { // if we exceed 255 bases observed then shuffle them to sample and only keep the first 255 - ks_shuffle(uint16_t, n, bases); - n = 255; - } - ks_introsort(uint16_t, n, bases); - /* zero out w and aux */ - memset(w, 0, 32 * sizeof(int)); - memset(&aux, 0, sizeof(call_aux_t)); - - for (j = n - 1; j >= 0; --j) { // calculate esum and fsum - uint16_t b = bases[j]; - /* extract quality and cap at 63 */ - int qual = b>>5 < 4? 4 : b>>5; - if (qual > 63) qual = 63; - /* extract base ORed with strand */ - int basestrand = b&0x1f; - /* extract base */ - int base = b&0xf; - aux.fsum[base] += em->fk[w[basestrand]]; - aux.bsum[base] += em->fk[w[basestrand]] * em->beta[qual<<16|n<<8|aux.c[base]]; - ++aux.c[base]; - ++w[basestrand]; - } - - // generate likelihood - for (j = 0; j < m; ++j) { - float tmp1, tmp3; - int tmp2; - // homozygous - for (k = 0, tmp1 = tmp3 = 0.0, tmp2 = 0; k < m; ++k) { - if (k == j) continue; - tmp1 += aux.bsum[k]; tmp2 += aux.c[k]; tmp3 += aux.fsum[k]; - } - if (tmp2) { - q[j*m+j] = tmp1; - } - // heterozygous - for (k = j + 1; k < m; ++k) { - int cjk = aux.c[j] + aux.c[k]; - for (i = 0, tmp2 = 0, tmp1 = tmp3 = 0.0; i < m; ++i) { - if (i == j || i == k) continue; - tmp1 += aux.bsum[i]; tmp2 += aux.c[i]; tmp3 += aux.fsum[i]; - } - if (tmp2) { - q[j*m+k] = q[k*m+j] = -4.343 * em->lhet[cjk<<8|aux.c[k]] + tmp1; - } else q[j*m+k] = q[k*m+j] = -4.343 * em->lhet[cjk<<8|aux.c[k]]; // all the bases are either j or k - } - /* clamp to greater than 0 */ - for (k = 0; k < m; ++k) if (q[j*m+k] < 0.0) q[j*m+k] = 0.0; - } - - return 0; -} diff --git a/src/htslib-1.19.1/faidx.5 b/src/htslib-1.19.1/faidx.5 deleted file mode 100644 index fb84fb2..0000000 --- a/src/htslib-1.19.1/faidx.5 +++ /dev/null @@ -1,238 +0,0 @@ -'\" t -.TH faidx 5 "June 2018" "htslib" "Bioinformatics formats" -.SH NAME -faidx \- an index enabling random access to FASTA and FASTQ files -.\" -.\" Copyright (C) 2013, 2015, 2018 Genome Research Ltd. -.\" -.\" Author: John Marshall -.\" -.\" Permission is hereby granted, free of charge, to any person obtaining a -.\" copy of this software and associated documentation files (the "Software"), -.\" to deal in the Software without restriction, including without limitation -.\" the rights to use, copy, modify, merge, publish, distribute, sublicense, -.\" and/or sell copies of the Software, and to permit persons to whom the -.\" Software is furnished to do so, subject to the following conditions: -.\" -.\" The above copyright notice and this permission notice shall be included in -.\" all copies or substantial portions of the Software. -.\" -.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -.\" IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -.\" FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -.\" THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -.\" LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -.\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -.\" DEALINGS IN THE SOFTWARE. -.\" -.SH SYNOPSIS -.IR file.fa .fai, -.IR file.fasta .fai, -.IR file.fq .fai, -.IR file.fastq .fai -.SH DESCRIPTION -Using an \fBfai index\fP file in conjunction with a FASTA/FASTQ file containing -reference sequences enables efficient access to arbitrary regions within -those reference sequences. -The index file typically has the same filename as the corresponding FASTA/FASTQ -file, with \fB.fai\fP appended. -.P -An \fBfai index\fP file is a text file consisting of lines each with -five TAB-delimited columns for a FASTA file and six for FASTQ: -.TS -lbl. -NAME Name of this reference sequence -LENGTH Total length of this reference sequence, in bases -OFFSET Offset in the FASTA/FASTQ file of this sequence's first base -LINEBASES The number of bases on each line -LINEWIDTH The number of bytes in each line, including the newline -QUALOFFSET Offset of sequence's first quality within the FASTQ file -.TE -.P -The \fBNAME\fP and \fBLENGTH\fP columns contain the same -data as would appear in the \fBSN\fP and \fBLN\fP fields of a -SAM \fB@SQ\fP header for the same reference sequence. -.P -The \fBOFFSET\fP column contains the offset within the FASTA/FASTQ file, in -bytes starting from zero, of the first base of this reference sequence, i.e., of -the character following the newline at the end of the header line (the -"\fB>\fP" line in FASTA, "\fB@\fP" in FASTQ). Typically the lines of a -\fBfai index\fP file appear in the order in which the reference sequences -appear in the FASTA/FASTQ file, so \fB.fai\fP files are typically sorted -according to this column. -.P -The \fBLINEBASES\fP column contains the number of bases in each of the sequence -lines that form the body of this reference sequence, apart from the final line -which may be shorter. -The \fBLINEWIDTH\fP column contains the number of \fIbytes\fP in each of -the sequence lines (except perhaps the final line), thus differing from -\fBLINEBASES\fP in that it also counts the bytes forming the line terminator. -.P -The \fBQUALOFFSET\fP works the same way as \fBOFFSET\fP but for the first -quality score of this reference sequence. This would be the first character -following the newline at the end of the "\fB+\fP" line. For FASTQ files only. -.SS FASTA Files -In order to be indexed with \fBsamtools faidx\fP, a FASTA file must be a text -file of the form -.LP -.RS -.RI > name -.RI [ description ...] -.br -ATGCATGCATGCATGCATGCATGCATGCAT -.br -GCATGCATGCATGCATGCATGCATGCATGC -.br -ATGCAT -.br -.RI > name -.RI [ description ...] -.br -ATGCATGCATGCAT -.br -GCATGCATGCATGC -.br -[...] -.RE -.LP -In particular, each reference sequence must be "well-formatted", i.e., all -of its sequence lines must be the same length, apart from the final sequence -line which may be shorter. -(While this sequence line length must be the same within each sequence, -it may vary between different reference sequences in the same FASTA file.) -.P -This also means that although the FASTA file may have Unix- or Windows-style -or other line termination, the newline characters present must be consistent, -at least within each reference sequence. -.P -The \fBsamtools\fP implementation uses the first word of the "\fB>\fP" header -line text (i.e., up to the first whitespace character, having skipped any -initial whitespace after the ">") as the \fBNAME\fP column. -.SS FASTQ Files -FASTQ files for indexing work in the same way as the FASTA files. -.LP -.RS -.RI @ name -.RI [ description...] -.br -ATGCATGCATGCATGCATGCATGCATGCAT -.br -GCATGCATGCATGCATGCATGCATGCATGC -.br -ATGCAT -.br -.RI + -.br -FFFA@@FFFFFFFFFFHHB:::@BFFFFGG -.br -HIHIIIIIIIIIIIIIIIIIIIIIIIFFFF -.br -8011<< -.br -.RI @ name -.RI [ description...] -.br -ATGCATGCATGCAT -.br -GCATGCATGCATGC -.br -.RI + -.br -IIA94445EEII== -.br -=>IIIIIIIIICCC -.br -[...] -.RE -.LP -Quality lines must be wrapped at the same length as the corresponding -sequence lines. -.SH EXAMPLE -For example, given this FASTA file -.LP -.RS ->one -.br -ATGCATGCATGCATGCATGCATGCATGCAT -.br -GCATGCATGCATGCATGCATGCATGCATGC -.br -ATGCAT -.br ->two another chromosome -.br -ATGCATGCATGCAT -.br -GCATGCATGCATGC -.br -.RE -.LP -formatted with Unix-style (LF) line termination, the corresponding fai index -would be -.RS -.TS -lnnnn. -one 66 5 30 31 -two 28 98 14 15 -.TE -.RE -.LP -If the FASTA file were formatted with Windows-style (CR-LF) line termination, -the fai index would be -.RS -.TS -lnnnn. -one 66 6 30 32 -two 28 103 14 16 -.TE -.RE -.LP -An example FASTQ file -.LP -.RS -@fastq1 -.br -ATGCATGCATGCATGCATGCATGCATGCAT -.br -GCATGCATGCATGCATGCATGCATGCATGC -.br -ATGCAT -.br -+ -.br -FFFA@@FFFFFFFFFFHHB:::@BFFFFGG -.br -HIHIIIIIIIIIIIIIIIIIIIIIIIFFFF -.br -8011<< -.br -@fastq2 -.br -ATGCATGCATGCAT -.br -GCATGCATGCATGC -.br -+ -.br -IIA94445EEII== -.br -=>IIIIIIIIICCC -.br -.RE -.LP -Formatted with Unix-style line termination would give this fai index -.RS -.TS -lnnnnn. -fastq1 66 8 30 31 79 -fastq2 28 156 14 15 188 -.TE -.RE -.SH SEE ALSO -.IR samtools (1) -.TP -https://en.wikipedia.org/wiki/FASTA_format -.TP -https://en.wikipedia.org/wiki/FASTQ_format - -Further description of the FASTA and FASTQ formats diff --git a/src/htslib-1.19.1/faidx.c b/src/htslib-1.19.1/faidx.c deleted file mode 100644 index de58514..0000000 --- a/src/htslib-1.19.1/faidx.c +++ /dev/null @@ -1,1012 +0,0 @@ -/* faidx.c -- FASTA and FASTQ random access. - - Copyright (C) 2008, 2009, 2013-2020, 2022 Genome Research Ltd. - Portions copyright (C) 2011 Broad Institute. - - Author: Heng Li - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "htslib/bgzf.h" -#include "htslib/faidx.h" -#include "htslib/hfile.h" -#include "htslib/khash.h" -#include "htslib/kstring.h" -#include "hts_internal.h" - -typedef struct { - int id; // faidx_t->name[id] is for this struct. - uint32_t line_len, line_blen; - uint64_t len; - uint64_t seq_offset; - uint64_t qual_offset; -} faidx1_t; -KHASH_MAP_INIT_STR(s, faidx1_t) - -struct faidx_t { - BGZF *bgzf; - int n, m; - char **name; - khash_t(s) *hash; - enum fai_format_options format; -}; - -static int fai_name2id(void *v, const char *ref) -{ - faidx_t *fai = (faidx_t *)v; - khint_t k = kh_get(s, fai->hash, ref); - return k == kh_end(fai->hash) ? -1 : kh_val(fai->hash, k).id; -} - -static inline int fai_insert_index(faidx_t *idx, const char *name, uint64_t len, uint32_t line_len, uint32_t line_blen, uint64_t seq_offset, uint64_t qual_offset) -{ - if (!name) { - hts_log_error("Malformed line"); - return -1; - } - - char *name_key = strdup(name); - int absent; - khint_t k = kh_put(s, idx->hash, name_key, &absent); - faidx1_t *v = &kh_value(idx->hash, k); - - if (! absent) { - hts_log_warning("Ignoring duplicate sequence \"%s\" at byte offset %" PRIu64, name, seq_offset); - free(name_key); - return 0; - } - - if (idx->n == idx->m) { - char **tmp; - idx->m = idx->m? idx->m<<1 : 16; - if (!(tmp = (char**)realloc(idx->name, sizeof(char*) * idx->m))) { - hts_log_error("Out of memory"); - return -1; - } - idx->name = tmp; - } - v->id = idx->n; - idx->name[idx->n++] = name_key; - v->len = len; - v->line_len = line_len; - v->line_blen = line_blen; - v->seq_offset = seq_offset; - v->qual_offset = qual_offset; - - return 0; -} - - -static faidx_t *fai_build_core(BGZF *bgzf) { - kstring_t name = { 0, 0, NULL }; - int c, read_done, line_num; - faidx_t *idx; - uint64_t seq_offset, qual_offset; - uint64_t seq_len, qual_len; - uint64_t char_len, cl, line_len, ll; - enum read_state {OUT_READ, IN_NAME, IN_SEQ, SEQ_END, IN_QUAL} state; - - idx = (faidx_t*)calloc(1, sizeof(faidx_t)); - idx->hash = kh_init(s); - idx->format = FAI_NONE; - - state = OUT_READ, read_done = 0, line_num = 1; - seq_offset = qual_offset = seq_len = qual_len = char_len = cl = line_len = ll = 0; - - while ((c = bgzf_getc(bgzf)) >= 0) { - switch (state) { - case OUT_READ: - switch (c) { - case '>': - if (idx->format == FAI_FASTQ) { - hts_log_error("Found '>' in a FASTQ file, error at line %d", line_num); - goto fail; - } - - idx->format = FAI_FASTA; - state = IN_NAME; - break; - - case '@': - if (idx->format == FAI_FASTA) { - hts_log_error("Found '@' in a FASTA file, error at line %d", line_num); - goto fail; - } - - idx->format = FAI_FASTQ; - state = IN_NAME; - break; - - case '\r': - // Blank line with cr-lf ending? - if ((c = bgzf_getc(bgzf)) == '\n') { - line_num++; - } else { - hts_log_error("Format error, carriage return not followed by new line at line %d", line_num); - goto fail; - } - break; - - case '\n': - // just move onto the next line - line_num++; - break; - - default: { - char s[4] = { '"', c, '"', '\0' }; - hts_log_error("Format error, unexpected %s at line %d", isprint(c) ? s : "character", line_num); - goto fail; - } - } - break; - - case IN_NAME: - if (read_done) { - if (fai_insert_index(idx, name.s, seq_len, line_len, char_len, seq_offset, qual_offset) != 0) - goto fail; - - read_done = 0; - } - - name.l = 0; - - do { - if (!isspace(c)) { - kputc(c, &name); - } else if (name.l > 0 || c == '\n') { - break; - } - } while ((c = bgzf_getc(bgzf)) >= 0); - - kputsn("", 0, &name); - - if (c < 0) { - hts_log_error("The last entry '%s' has no sequence", name.s); - goto fail; - } - - // read the rest of the line if necessary - if (c != '\n') while ((c = bgzf_getc(bgzf)) >= 0 && c != '\n'); - - state = IN_SEQ; seq_len = qual_len = char_len = line_len = 0; - seq_offset = bgzf_utell(bgzf); - line_num++; - break; - - case IN_SEQ: - if (idx->format == FAI_FASTA) { - if (c == '\n') { - state = OUT_READ; - line_num++; - continue; - } else if (c == '>') { - state = IN_NAME; - continue; - } - } else if (idx->format == FAI_FASTQ) { - if (c == '+') { - state = IN_QUAL; - if (c != '\n') while ((c = bgzf_getc(bgzf)) >= 0 && c != '\n'); - qual_offset = bgzf_utell(bgzf); - line_num++; - continue; - } else if (c == '\n') { - hts_log_error("Inlined empty line is not allowed in sequence '%s' at line %d", name.s, line_num); - goto fail; - } - } - - ll = cl = 0; - - if (idx->format == FAI_FASTA) read_done = 1; - - do { - ll++; - if (isgraph(c)) cl++; - } while ((c = bgzf_getc(bgzf)) >= 0 && c != '\n'); - - ll++; seq_len += cl; - - if (line_len == 0) { - line_len = ll; - char_len = cl; - } else if (line_len > ll) { - - if (idx->format == FAI_FASTA) - state = OUT_READ; - else - state = SEQ_END; - - } else if (line_len < ll) { - hts_log_error("Different line length in sequence '%s'", name.s); - goto fail; - } - - line_num++; - break; - - case SEQ_END: - if (c == '+') { - state = IN_QUAL; - while ((c = bgzf_getc(bgzf)) >= 0 && c != '\n'); - qual_offset = bgzf_utell(bgzf); - line_num++; - } else { - hts_log_error("Format error, expecting '+', got '%c' at line %d", c, line_num); - goto fail; - } - break; - - case IN_QUAL: - if (c == '\n') { - if (!read_done) { - hts_log_error("Inlined empty line is not allowed in quality of sequence '%s'", name.s); - goto fail; - } - - state = OUT_READ; - line_num++; - continue; - } else if (c == '@' && read_done) { - state = IN_NAME; - continue; - } - - ll = cl = 0; - - do { - ll++; - if (isgraph(c)) cl++; - } while ((c = bgzf_getc(bgzf)) >= 0 && c != '\n'); - - ll++; qual_len += cl; - - if (line_len < ll) { - hts_log_error("Quality line length too long in '%s' at line %d", name.s, line_num); - goto fail; - } else if (qual_len == seq_len) { - read_done = 1; - } else if (qual_len > seq_len) { - hts_log_error("Quality length longer than sequence in '%s' at line %d", name.s, line_num); - goto fail; - } else if (line_len > ll) { - hts_log_error("Quality line length too short in '%s' at line %d", name.s, line_num); - goto fail; - } - - line_num++; - break; - } - } - - if (read_done) { - if (fai_insert_index(idx, name.s, seq_len, line_len, char_len, seq_offset, qual_offset) != 0) - goto fail; - } else { - goto fail; - } - - free(name.s); - return idx; - -fail: - free(name.s); - fai_destroy(idx); - return NULL; -} - - -static int fai_save(const faidx_t *fai, hFILE *fp) { - khint_t k; - int i; - char buf[96]; // Must be big enough for format below. - - for (i = 0; i < fai->n; ++i) { - faidx1_t x; - k = kh_get(s, fai->hash, fai->name[i]); - assert(k < kh_end(fai->hash)); - x = kh_value(fai->hash, k); - - if (fai->format == FAI_FASTA) { - snprintf(buf, sizeof(buf), - "\t%"PRIu64"\t%"PRIu64"\t%"PRIu32"\t%"PRIu32"\n", - x.len, x.seq_offset, x.line_blen, x.line_len); - } else { - snprintf(buf, sizeof(buf), - "\t%"PRIu64"\t%"PRIu64"\t%"PRIu32"\t%"PRIu32"\t%"PRIu64"\n", - x.len, x.seq_offset, x.line_blen, x.line_len, x.qual_offset); - } - - if (hputs(fai->name[i], fp) != 0) return -1; - if (hputs(buf, fp) != 0) return -1; - } - return 0; -} - - -static faidx_t *fai_read(hFILE *fp, const char *fname, int format) -{ - faidx_t *fai; - char *buf = NULL, *p; - ssize_t l, lnum = 1; - - fai = (faidx_t*)calloc(1, sizeof(faidx_t)); - if (!fai) return NULL; - - fai->hash = kh_init(s); - if (!fai->hash) goto fail; - - buf = (char*)calloc(0x10000, 1); - if (!buf) goto fail; - - while ((l = hgetln(buf, 0x10000, fp)) > 0) { - uint32_t line_len, line_blen, n; - uint64_t len; - uint64_t seq_offset; - uint64_t qual_offset = 0; - - for (p = buf; *p && !isspace_c(*p); ++p); - - if (p - buf < l) { - *p = 0; ++p; - } - - if (format == FAI_FASTA) { - n = sscanf(p, "%"SCNu64"%"SCNu64"%"SCNu32"%"SCNu32, &len, &seq_offset, &line_blen, &line_len); - - if (n != 4) { - hts_log_error("Could not understand FASTA index %s line %zd", fname, lnum); - goto fail; - } - } else { - n = sscanf(p, "%"SCNu64"%"SCNu64"%"SCNu32"%"SCNu32"%"SCNu64, &len, &seq_offset, &line_blen, &line_len, &qual_offset); - - if (n != 5) { - if (n == 4) { - hts_log_error("Possibly this is a FASTA index, try using faidx. Problem in %s line %zd", fname, lnum); - } else { - hts_log_error("Could not understand FASTQ index %s line %zd", fname, lnum); - } - - goto fail; - } - } - - if (fai_insert_index(fai, buf, len, line_len, line_blen, seq_offset, qual_offset) != 0) { - goto fail; - } - - if (buf[l - 1] == '\n') ++lnum; - } - - if (l < 0) { - hts_log_error("Error while reading %s: %s", fname, strerror(errno)); - goto fail; - } - free(buf); - return fai; - - fail: - free(buf); - fai_destroy(fai); - return NULL; -} - -void fai_destroy(faidx_t *fai) -{ - int i; - if (!fai) return; - for (i = 0; i < fai->n; ++i) free(fai->name[i]); - free(fai->name); - kh_destroy(s, fai->hash); - if (fai->bgzf) bgzf_close(fai->bgzf); - free(fai); -} - - -static int fai_build3_core(const char *fn, const char *fnfai, const char *fngzi) -{ - kstring_t fai_kstr = { 0, 0, NULL }; - kstring_t gzi_kstr = { 0, 0, NULL }; - BGZF *bgzf = NULL; - hFILE *fp = NULL; - faidx_t *fai = NULL; - int save_errno, res; - char *file_type; - - bgzf = bgzf_open(fn, "r"); - - if ( !bgzf ) { - hts_log_error("Failed to open the file %s : %s", fn, strerror(errno)); - goto fail; - } - - if ( bgzf->is_compressed ) { - if (bgzf_index_build_init(bgzf) != 0) { - hts_log_error("Failed to allocate bgzf index"); - goto fail; - } - } - - fai = fai_build_core(bgzf); - - if ( !fai ) { - if (bgzf->is_compressed && bgzf->is_gzip) { - hts_log_error("Cannot index files compressed with gzip, please use bgzip"); - } - goto fail; - } - - if (fai->format == FAI_FASTA) { - file_type = "FASTA"; - } else { - file_type = "FASTQ"; - } - - if (!fnfai) { - if (ksprintf(&fai_kstr, "%s.fai", fn) < 0) goto fail; - fnfai = fai_kstr.s; - } - - if (!fngzi) { - if (ksprintf(&gzi_kstr, "%s.gzi", fn) < 0) goto fail; - fngzi = gzi_kstr.s; - } - - if ( bgzf->is_compressed ) { - if (bgzf_index_dump(bgzf, fngzi, NULL) < 0) { - hts_log_error("Failed to make bgzf index %s", fngzi); - goto fail; - } - } - - res = bgzf_close(bgzf); - bgzf = NULL; - - if (res < 0) { - hts_log_error("Error on closing %s : %s", fn, strerror(errno)); - goto fail; - } - - fp = hopen(fnfai, "wb"); - - if ( !fp ) { - hts_log_error("Failed to open %s index %s : %s", file_type, fnfai, strerror(errno)); - goto fail; - } - - if (fai_save(fai, fp) != 0) { - hts_log_error("Failed to write %s index %s : %s", file_type, fnfai, strerror(errno)); - goto fail; - } - - if (hclose(fp) != 0) { - hts_log_error("Failed on closing %s index %s : %s", file_type, fnfai, strerror(errno)); - goto fail; - } - - free(fai_kstr.s); - free(gzi_kstr.s); - fai_destroy(fai); - return 0; - - fail: - save_errno = errno; - free(fai_kstr.s); - free(gzi_kstr.s); - bgzf_close(bgzf); - fai_destroy(fai); - errno = save_errno; - return -1; -} - - -int fai_build3(const char *fn, const char *fnfai, const char *fngzi) { - return fai_build3_core(fn, fnfai, fngzi); -} - - -int fai_build(const char *fn) { - return fai_build3(fn, NULL, NULL); -} - - -static faidx_t *fai_load3_core(const char *fn, const char *fnfai, const char *fngzi, - int flags, int format) -{ - kstring_t fai_kstr = { 0, 0, NULL }; - kstring_t gzi_kstr = { 0, 0, NULL }; - hFILE *fp = NULL; - faidx_t *fai = NULL; - int res, gzi_index_needed = 0; - char *file_type; - - if (format == FAI_FASTA) { - file_type = "FASTA"; - } else { - file_type = "FASTQ"; - } - - if (fn == NULL) - return NULL; - - if (fnfai == NULL) { - if (ksprintf(&fai_kstr, "%s.fai", fn) < 0) goto fail; - fnfai = fai_kstr.s; - } - if (fngzi == NULL) { - if (ksprintf(&gzi_kstr, "%s.gzi", fn) < 0) goto fail; - fngzi = gzi_kstr.s; - } - - fp = hopen(fnfai, "rb"); - - if (fp) { - // index file present, check if a compressed index is needed - hFILE *gz = NULL; - BGZF *bgzf = bgzf_open(fn, "rb"); - - if (bgzf == 0) { - hts_log_error("Failed to open %s file %s", file_type, fn); - goto fail; - } - - if (bgzf_compression(bgzf) == 2) { // BGZF compression - if ((gz = hopen(fngzi, "rb")) == 0) { - - if (!(flags & FAI_CREATE) || errno != ENOENT) { - hts_log_error("Failed to open %s index %s: %s", file_type, fngzi, strerror(errno)); - bgzf_close(bgzf); - goto fail; - } - - gzi_index_needed = 1; - res = hclose(fp); // closed as going to be re-indexed - - if (res < 0) { - hts_log_error("Failed on closing %s index %s : %s", file_type, fnfai, strerror(errno)); - goto fail; - } - } else { - res = hclose(gz); - - if (res < 0) { - hts_log_error("Failed on closing %s index %s : %s", file_type, fngzi, strerror(errno)); - goto fail; - } - } - } - - bgzf_close(bgzf); - } - - if (fp == 0 || gzi_index_needed) { - if (!(flags & FAI_CREATE) || errno != ENOENT) { - hts_log_error("Failed to open %s index %s: %s", file_type, fnfai, strerror(errno)); - goto fail; - } - - hts_log_info("Build %s index", file_type); - - if (fai_build3_core(fn, fnfai, fngzi) < 0) { - goto fail; - } - - fp = hopen(fnfai, "rb"); - if (fp == 0) { - hts_log_error("Failed to open %s index %s: %s", file_type, fnfai, strerror(errno)); - goto fail; - } - } - - fai = fai_read(fp, fnfai, format); - if (fai == NULL) { - hts_log_error("Failed to read %s index %s", file_type, fnfai); - goto fail; - } - - res = hclose(fp); - fp = NULL; - if (res < 0) { - hts_log_error("Failed on closing %s index %s : %s", file_type, fnfai, strerror(errno)); - goto fail; - } - - fai->bgzf = bgzf_open(fn, "rb"); - if (fai->bgzf == 0) { - hts_log_error("Failed to open %s file %s", file_type, fn); - goto fail; - } - - if ( fai->bgzf->is_compressed==1 ) { - if ( bgzf_index_load(fai->bgzf, fngzi, NULL) < 0 ) { - hts_log_error("Failed to load .gzi index: %s", fngzi); - goto fail; - } - } - free(fai_kstr.s); - free(gzi_kstr.s); - return fai; - - fail: - if (fai) fai_destroy(fai); - if (fp) hclose_abruptly(fp); - free(fai_kstr.s); - free(gzi_kstr.s); - return NULL; -} - - -faidx_t *fai_load3(const char *fn, const char *fnfai, const char *fngzi, - int flags) { - return fai_load3_core(fn, fnfai, fngzi, flags, FAI_FASTA); -} - - -faidx_t *fai_load(const char *fn) -{ - return fai_load3(fn, NULL, NULL, FAI_CREATE); -} - - -faidx_t *fai_load3_format(const char *fn, const char *fnfai, const char *fngzi, - int flags, enum fai_format_options format) { - return fai_load3_core(fn, fnfai, fngzi, flags, format); -} - - -faidx_t *fai_load_format(const char *fn, enum fai_format_options format) { - return fai_load3_format(fn, NULL, NULL, FAI_CREATE, format); -} - - -static char *fai_retrieve(const faidx_t *fai, const faidx1_t *val, - uint64_t offset, hts_pos_t beg, hts_pos_t end, hts_pos_t *len) { - char *s; - size_t l; - int c = 0; - int ret; - - if ((uint64_t) end - (uint64_t) beg >= SIZE_MAX - 2) { - hts_log_error("Range %"PRId64"..%"PRId64" too big", beg, end); - *len = -1; - return NULL; - } - - if (val->line_blen <= 0) { - hts_log_error("Invalid line length in index: %d", val->line_blen); - *len = -1; - return NULL; - } - - ret = bgzf_useek(fai->bgzf, - offset - + beg / val->line_blen * val->line_len - + beg % val->line_blen, SEEK_SET); - - if (ret < 0) { - *len = -1; - hts_log_error("Failed to retrieve block. (Seeking in a compressed, .gzi unindexed, file?)"); - return NULL; - } - - l = 0; - s = (char*)malloc((size_t) end - beg + 2); - if (!s) { - *len = -1; - return NULL; - } - - while ( l < end - beg && (c=bgzf_getc(fai->bgzf))>=0 ) - if (isgraph(c)) s[l++] = c; - if (c < 0) { - hts_log_error("Failed to retrieve block: %s", - c == -1 ? "unexpected end of file" : "error reading file"); - free(s); - *len = -1; - return NULL; - } - - s[l] = '\0'; - *len = l; - return s; -} - -static int fai_get_val(const faidx_t *fai, const char *str, - hts_pos_t *len, faidx1_t *val, hts_pos_t *fbeg, hts_pos_t *fend) { - khiter_t iter; - khash_t(s) *h; - int id; - hts_pos_t beg, end; - - if (!fai_parse_region(fai, str, &id, &beg, &end, 0)) { - hts_log_warning("Reference %s not found in FASTA file, returning empty sequence", str); - *len = -2; - return 1; - } - - h = fai->hash; - iter = kh_get(s, h, faidx_iseq(fai, id)); - if (iter >= kh_end(h)) { - // should have already been caught above - abort(); - } - *val = kh_value(h, iter); - - if (beg >= val->len) beg = val->len; - if (end >= val->len) end = val->len; - if (beg > end) beg = end; - - *fbeg = beg; - *fend = end; - - return 0; -} - -/* - * The internal still has line_blen as uint32_t, but our references - * can be longer, so for future proofing we use hts_pos_t. We also needed - * a signed value so we can return negatives as an error. - */ -hts_pos_t fai_line_length(const faidx_t *fai, const char *str) -{ - faidx1_t val; - int64_t beg, end; - hts_pos_t len; - - if (fai_get_val(fai, str, &len, &val, &beg, &end)) - return -1; - else - return val.line_blen; -} - -char *fai_fetch64(const faidx_t *fai, const char *str, hts_pos_t *len) -{ - faidx1_t val; - int64_t beg, end; - - if (fai_get_val(fai, str, len, &val, &beg, &end)) { - return NULL; - } - - // now retrieve the sequence - return fai_retrieve(fai, &val, val.seq_offset, beg, end, len); -} - -char *fai_fetch(const faidx_t *fai, const char *str, int *len) -{ - hts_pos_t len64; - char *ret = fai_fetch64(fai, str, &len64); - *len = len64 < INT_MAX ? len64 : INT_MAX; // trunc - return ret; -} - -char *fai_fetchqual64(const faidx_t *fai, const char *str, hts_pos_t *len) { - faidx1_t val; - int64_t beg, end; - - if (fai_get_val(fai, str, len, &val, &beg, &end)) { - return NULL; - } - - // now retrieve the sequence - return fai_retrieve(fai, &val, val.qual_offset, beg, end, len); -} - -char *fai_fetchqual(const faidx_t *fai, const char *str, int *len) { - hts_pos_t len64; - char *ret = fai_fetchqual64(fai, str, &len64); - *len = len64 < INT_MAX ? len64 : INT_MAX; // trunc - return ret; -} - -int faidx_fetch_nseq(const faidx_t *fai) -{ - return fai->n; -} - -int faidx_nseq(const faidx_t *fai) -{ - return fai->n; -} - -const char *faidx_iseq(const faidx_t *fai, int i) -{ - return fai->name[i]; -} - -hts_pos_t faidx_seq_len64(const faidx_t *fai, const char *seq) -{ - khint_t k = kh_get(s, fai->hash, seq); - if ( k == kh_end(fai->hash) ) return -1; - return kh_val(fai->hash, k).len; -} - -int faidx_seq_len(const faidx_t *fai, const char *seq) -{ - hts_pos_t len = faidx_seq_len64(fai, seq); - return len < INT_MAX ? len : INT_MAX; -} - -static int faidx_adjust_position(const faidx_t *fai, int end_adjust, - faidx1_t *val_out, const char *c_name, - hts_pos_t *p_beg_i, hts_pos_t *p_end_i, - hts_pos_t *len) { - khiter_t iter; - faidx1_t *val; - - // Adjust position - iter = kh_get(s, fai->hash, c_name); - - if (iter == kh_end(fai->hash)) { - if (len) - *len = -2; - hts_log_error("The sequence \"%s\" was not found", c_name); - return 1; - } - - val = &kh_value(fai->hash, iter); - - if (val_out) - *val_out = *val; - - if(*p_end_i < *p_beg_i) - *p_beg_i = *p_end_i; - - if(*p_beg_i < 0) - *p_beg_i = 0; - else if(val->len <= *p_beg_i) - *p_beg_i = val->len; - - if(*p_end_i < 0) - *p_end_i = 0; - else if(val->len <= *p_end_i) - *p_end_i = val->len - end_adjust; - - return 0; -} - -int fai_adjust_region(const faidx_t *fai, int tid, - hts_pos_t *beg, hts_pos_t *end) -{ - hts_pos_t orig_beg, orig_end; - - if (!fai || !beg || !end || tid < 0 || tid >= fai->n) - return -1; - - orig_beg = *beg; - orig_end = *end; - if (faidx_adjust_position(fai, 0, NULL, fai->name[tid], beg, end, NULL) != 0) { - hts_log_error("Inconsistent faidx internal state - couldn't find \"%s\"", - fai->name[tid]); - return -1; - } - - return ((orig_beg != *beg ? 1 : 0) | - (orig_end != *end && orig_end < HTS_POS_MAX ? 2 : 0)); -} - -char *faidx_fetch_seq64(const faidx_t *fai, const char *c_name, hts_pos_t p_beg_i, hts_pos_t p_end_i, hts_pos_t *len) -{ - faidx1_t val; - - // Adjust position - if (faidx_adjust_position(fai, 1, &val, c_name, &p_beg_i, &p_end_i, len)) { - return NULL; - } - - // Now retrieve the sequence - return fai_retrieve(fai, &val, val.seq_offset, p_beg_i, p_end_i + 1, len); -} - -char *faidx_fetch_seq(const faidx_t *fai, const char *c_name, int p_beg_i, int p_end_i, int *len) -{ - hts_pos_t len64; - char *ret = faidx_fetch_seq64(fai, c_name, p_beg_i, p_end_i, &len64); - *len = len64 < INT_MAX ? len64 : INT_MAX; // trunc - return ret; -} - -char *faidx_fetch_qual64(const faidx_t *fai, const char *c_name, hts_pos_t p_beg_i, hts_pos_t p_end_i, hts_pos_t *len) -{ - faidx1_t val; - - // Adjust position - if (faidx_adjust_position(fai, 1, &val, c_name, &p_beg_i, &p_end_i, len)) { - return NULL; - } - - // Now retrieve the sequence - return fai_retrieve(fai, &val, val.qual_offset, p_beg_i, p_end_i + 1, len); -} - -char *faidx_fetch_qual(const faidx_t *fai, const char *c_name, int p_beg_i, int p_end_i, int *len) -{ - hts_pos_t len64; - char *ret = faidx_fetch_qual64(fai, c_name, p_beg_i, p_end_i, &len64); - *len = len64 < INT_MAX ? len64 : INT_MAX; // trunc - return ret; -} - -int faidx_has_seq(const faidx_t *fai, const char *seq) -{ - khiter_t iter = kh_get(s, fai->hash, seq); - if (iter == kh_end(fai->hash)) return 0; - return 1; -} - -const char *fai_parse_region(const faidx_t *fai, const char *s, - int *tid, hts_pos_t *beg, hts_pos_t *end, - int flags) -{ - return hts_parse_region(s, tid, beg, end, (hts_name2id_f)fai_name2id, (void *)fai, flags); -} - -void fai_set_cache_size(faidx_t *fai, int cache_size) { - bgzf_set_cache_size(fai->bgzf, cache_size); -} - -// Adds a thread pool to the underlying BGZF layer. -int fai_thread_pool(faidx_t *fai, struct hts_tpool *pool, int qsize) { - return bgzf_thread_pool(fai->bgzf, pool, qsize); -} - -char *fai_path(const char *fa) { - char *fai = NULL; - if (!fa) { - hts_log_error("No reference file specified"); - } else { - char *fai_tmp = strstr(fa, HTS_IDX_DELIM); - if (fai_tmp) { - fai_tmp += strlen(HTS_IDX_DELIM); - fai = strdup(fai_tmp); - if (!fai) - hts_log_error("Failed to allocate memory"); - } else { - if (hisremote(fa)) { - fai = hts_idx_locatefn(fa, ".fai"); // get the remote fai file name, if any, but do not download the file - if (!fai) - hts_log_error("Failed to locate index file for remote reference file '%s'", fa); - } else{ - if (hts_idx_check_local(fa, HTS_FMT_FAI, &fai) == 0 && fai) { - if (fai_build3(fa, fai, NULL) == -1) { // create local fai file by indexing local fasta - hts_log_error("Failed to build index file for reference file '%s'", fa); - free(fai); - fai = NULL; - } - } - } - } - } - - return fai; -} diff --git a/src/htslib-1.19.1/header.h b/src/htslib-1.19.1/header.h deleted file mode 100644 index a98d306..0000000 --- a/src/htslib-1.19.1/header.h +++ /dev/null @@ -1,319 +0,0 @@ -/* -Copyright (c) 2013-2019 Genome Research Ltd. -Authors: James Bonfield , Valeriu Ohan - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/*! \file - * SAM header parsing. - * - * These functions can be shared between SAM, BAM and CRAM file - * formats as all three internally use the same string encoding for - * header fields. - */ - - -#ifndef HEADER_H_ -#define HEADER_H_ - -#include - -#include "cram/string_alloc.h" -#include "cram/pooled_alloc.h" - -#include "htslib/khash.h" -#include "htslib/kstring.h" -#include "htslib/sam.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/*! Make a single integer out of a two-letter type code */ -static inline khint32_t TYPEKEY(const char *type) { - unsigned int u0 = (unsigned char) type[0]; - unsigned int u1 = (unsigned char) type[1]; - return (u0 << 8) | u1; -} - -/* - * Proposed new SAM header parsing - -1 @SQ ID:foo LN:100 -2 @SQ ID:bar LN:200 -3 @SQ ID:ram LN:300 UR:xyz -4 @RG ID:r ... -5 @RG ID:s ... - -Hash table for 2-char @keys without dup entries. -If dup lines, we form a circular linked list. Ie hash keys = {RG, SQ}. - -HASH("SQ")--\ - | - (3) <-> 1 <-> 2 <-> 3 <-> (1) - -HASH("RG")--\ - | - (5) <-> 4 <-> 5 <-> (4) - -Items stored in the hash values also form their own linked lists: -Ie SQ->ID(foo)->LN(100) - SQ->ID(bar)->LN(200) - SQ->ID(ram)->LN(300)->UR(xyz) - RG->ID(r) - */ - -/*! A single key:value pair on a header line - * - * These form a linked list and hold strings. The strings are - * allocated from a string_alloc_t pool referenced in the master - * sam_hrecs_t structure. Do not attempt to free, malloc or manipulate - * these strings directly. - */ -typedef struct sam_hrec_tag_s { - struct sam_hrec_tag_s *next; - const char *str; - int len; -} sam_hrec_tag_t; - -/*! The parsed version of the SAM header string. - * - * Each header type (SQ, RG, HD, etc) points to its own sam_hdr_type - * struct via the main hash table h in the sam_hrecs_t struct. - * - * These in turn consist of circular bi-directional linked lists (ie - * rings) to hold the multiple instances of the same header type - * code. For example if we have 5 \@SQ lines the primary hash table - * will key on \@SQ pointing to the first sam_hdr_type and that in turn - * will be part of a ring of 5 elements. - * - * For each sam_hdr_type structure we also point to a sam_hdr_tag - * structure which holds the tokenised attributes; the tab separated - * key:value pairs per line. - */ -typedef struct sam_hrec_type_s { - struct sam_hrec_type_s *next; // circular list of this type - struct sam_hrec_type_s *prev; // circular list of this type - struct sam_hrec_type_s *global_next; // circular list of all lines - struct sam_hrec_type_s *global_prev; // circular list of all lines - sam_hrec_tag_t *tag; // first tag - khint32_t type; // Two-letter type code as an int -} sam_hrec_type_t; - -/*! Parsed \@SQ lines */ -typedef struct { - const char *name; - hts_pos_t len; - sam_hrec_type_t *ty; -} sam_hrec_sq_t; - -/*! Parsed \@RG lines */ -typedef struct { - const char *name; - sam_hrec_type_t *ty; - int name_len; - int id; // numerical ID -} sam_hrec_rg_t; - -/*! Parsed \@PG lines */ -typedef struct { - const char *name; - sam_hrec_type_t *ty; - int name_len; - int id; // numerical ID - int prev_id; // -1 if none -} sam_hrec_pg_t; - - -/*! Sort order parsed from @HD line */ -enum sam_sort_order { - ORDER_UNKNOWN =-1, - ORDER_UNSORTED = 0, - ORDER_NAME = 1, - ORDER_COORD = 2 - //ORDER_COLLATE = 3 // maybe one day! -}; - -enum sam_group_order { - ORDER_NONE =-1, - ORDER_QUERY = 0, - ORDER_REFERENCE = 1 -}; - -KHASH_MAP_INIT_INT(sam_hrecs_t, sam_hrec_type_t*) -KHASH_MAP_INIT_STR(m_s2i, int) - -/*! Primary structure for header manipulation - * - * The initial header text is held in the text kstring_t, but is also - * parsed out into SQ, RG and PG arrays. These have a hash table - * associated with each to allow lookup by ID or SN fields instead of - * their numeric array indices. Additionally PG has an array to hold - * the linked list start points (the last in a PP chain). - * - * Use the appropriate sam_hdr_* functions to edit the header, and - * call sam_hdr_rebuild() any time the textual form needs to be - * updated again. - */ -struct sam_hrecs_t { - khash_t(sam_hrecs_t) *h; - sam_hrec_type_t *first_line; //!< First line (usually @HD) - string_alloc_t *str_pool; //!< Pool of sam_hdr_tag->str strings - pool_alloc_t *type_pool;//!< Pool of sam_hdr_type structs - pool_alloc_t *tag_pool; //!< Pool of sam_hdr_tag structs - - // @SQ lines / references - int nref; //!< Number of \@SQ lines - int ref_sz; //!< Number of entries available in ref[] - sam_hrec_sq_t *ref; //!< Array of parsed \@SQ lines - khash_t(m_s2i) *ref_hash; //!< Maps SQ SN field to ref[] index - - // @RG lines / read-groups - int nrg; //!< Number of \@RG lines - int rg_sz; //!< number of entries available in rg[] - sam_hrec_rg_t *rg; //!< Array of parsed \@RG lines - khash_t(m_s2i) *rg_hash; //!< Maps RG ID field to rg[] index - - // @PG lines / programs - int npg; //!< Number of \@PG lines - int pg_sz; //!< Number of entries available in pg[] - int npg_end; //!< Number of terminating \@PG lines - int npg_end_alloc; //!< Size of pg_end field - sam_hrec_pg_t *pg; //!< Array of parsed \@PG lines - khash_t(m_s2i) *pg_hash; //!< Maps PG ID field to pg[] index - int *pg_end; //!< \@PG chain termination IDs - - // @cond internal - char *ID_buf; // temporary buffer for sam_hdr_pg_id - uint32_t ID_buf_sz; - int ID_cnt; - // @endcond - - int dirty; // marks the header as modified, so it can be rebuilt - int refs_changed; // Index of first changed ref (-1 if unchanged) - int pgs_changed; // New PG line added - int type_count; - char (*type_order)[3]; -}; - -/*! - * Method for parsing the header text and populating the - * internal hash tables. After calling this method, the - * parsed representation becomes the single source of truth. - * - * @param bh Header structure, previously initialised by a - * sam_hdr_init call - * @return 0 on success, -1 on failure - */ -int sam_hdr_fill_hrecs(sam_hdr_t *bh); - -/*! - * Reconstructs the text representation of the header from - * the hash table data after a change has been performed on - * the header. - * - * @return 0 on success, -1 on failure - */ -int sam_hdr_rebuild(sam_hdr_t *bh); - -/*! Creates an empty SAM header, ready to be populated. - * - * @return - * Returns a sam_hrecs_t struct on success (free with sam_hrecs_free()) - * NULL on failure - */ -sam_hrecs_t *sam_hrecs_new(void); - -/*! Produces a duplicate copy of hrecs and returns it. - * @return - * Returns NULL on failure - */ -sam_hrecs_t *sam_hrecs_dup(sam_hrecs_t *hrecs); - -/*! Update sam_hdr_t target_name and target_len arrays - * - * sam_hdr_t and sam_hrecs_t are specified separately so that sam_hdr_dup - * can use it to construct target arrays from the source header. - * - * @return 0 on success; -1 on failure - */ -int sam_hdr_update_target_arrays(sam_hdr_t *bh, const sam_hrecs_t *hrecs, - int refs_changed); - -/*! Reconstructs a kstring from the header hash table. - * - * @return - * Returns 0 on success - * -1 on failure - */ -int sam_hrecs_rebuild_text(const sam_hrecs_t *hrecs, kstring_t *ks); - -/*! Deallocates all storage used by a sam_hrecs_t struct. - * - * This also decrements the header reference count. If after decrementing - * it is still non-zero then the header is assumed to be in use by another - * caller and the free is not done. - */ -void sam_hrecs_free(sam_hrecs_t *hrecs); - -/*! - * @return - * Returns the first header item matching 'type'. If ID is non-NULL it checks - * for the tag ID: and compares against the specified ID. - * - * Returns NULL if no type/ID is found - */ -sam_hrec_type_t *sam_hrecs_find_type_id(sam_hrecs_t *hrecs, const char *type, - const char *ID_key, const char *ID_value); - -sam_hrec_tag_t *sam_hrecs_find_key(sam_hrec_type_t *type, - const char *key, - sam_hrec_tag_t **prev); - -int sam_hrecs_remove_key(sam_hrecs_t *hrecs, - sam_hrec_type_t *type, - const char *key); - -/*! Looks up a read-group by name and returns a pointer to the start of the - * associated tag list. - * - * @return - * Returns NULL on failure - */ -sam_hrec_rg_t *sam_hrecs_find_rg(sam_hrecs_t *hrecs, const char *rg); - -/*! Returns the sort order from the @HD SO: field */ -enum sam_sort_order sam_hrecs_sort_order(sam_hrecs_t *hrecs); - -/*! Returns the group order from the @HD SO: field */ -enum sam_group_order sam_hrecs_group_order(sam_hrecs_t *hrecs); - -#ifdef __cplusplus -} -#endif - -#endif /* HEADER_H_ */ diff --git a/src/htslib-1.19.1/hfile.c b/src/htslib-1.19.1/hfile.c deleted file mode 100644 index 3f9407f..0000000 --- a/src/htslib-1.19.1/hfile.c +++ /dev/null @@ -1,1429 +0,0 @@ -/* hfile.c -- buffered low-level input/output streams. - - Copyright (C) 2013-2021 Genome Research Ltd. - - Author: John Marshall - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include - -#include - -#ifdef ENABLE_PLUGINS -#if defined(_WIN32) || defined(__CYGWIN__) || defined(__MSYS__) -#define USING_WINDOWS_PLUGIN_DLLS -#include -#endif -#endif - -#include "htslib/hfile.h" -#include "hfile_internal.h" -#include "htslib/kstring.h" - -#ifndef ENOTSUP -#define ENOTSUP EINVAL -#endif -#ifndef EOVERFLOW -#define EOVERFLOW ERANGE -#endif -#ifndef EPROTONOSUPPORT -#define EPROTONOSUPPORT ENOSYS -#endif - -#ifndef SSIZE_MAX /* SSIZE_MAX is POSIX 1 */ -#define SSIZE_MAX LONG_MAX -#endif - -/* hFILE fields are used as follows: - - char *buffer; // Pointer to the start of the I/O buffer - char *begin; // First not-yet-read character / unused position - char *end; // First unfilled/unfillable position - char *limit; // Pointer to the first position past the buffer - - const hFILE_backend *backend; // Methods to refill/flush I/O buffer - - off_t offset; // Offset within the stream of buffer position 0 - unsigned at_eof:1;// For reading, whether EOF has been seen - unsigned mobile:1;// Buffer is a mobile window or fixed full contents - unsigned readonly:1;// Whether opened as "r" rather than "r+"/"w"/"a" - int has_errno; // Error number from the last failure on this stream - -For reading, begin is the first unread character in the buffer and end is the -first unfilled position: - - -----------ABCDEFGHIJKLMNO--------------- - ^buffer ^begin ^end ^limit - -For writing, begin is the first unused position and end is unused so remains -equal to buffer: - - ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------- - ^buffer ^begin ^limit - ^end - -Thus if begin > end then there is a non-empty write buffer, if begin < end -then there is a non-empty read buffer, and if begin == end then both buffers -are empty. In all cases, the stream's file position indicator corresponds -to the position pointed to by begin. - -The above is the normal scenario of a mobile window. For in-memory -streams (eg via hfile_init_fixed) the buffer can be used as the full -contents without any separate backend behind it. These always have at_eof -set, offset set to 0, need no read() method, and should just return EINVAL -for seek(): - - abcdefghijkLMNOPQRSTUVWXYZ------ - ^buffer ^begin ^end ^limit -*/ -HTSLIB_EXPORT -hFILE *hfile_init(size_t struct_size, const char *mode, size_t capacity) -{ - hFILE *fp = (hFILE *) malloc(struct_size); - if (fp == NULL) goto error; - - if (capacity == 0) capacity = 32768; - // FIXME For now, clamp input buffer sizes so mpileup doesn't eat memory - if (strchr(mode, 'r') && capacity > 32768) capacity = 32768; - - fp->buffer = (char *) malloc(capacity); - if (fp->buffer == NULL) goto error; - - fp->begin = fp->end = fp->buffer; - fp->limit = &fp->buffer[capacity]; - - fp->offset = 0; - fp->at_eof = 0; - fp->mobile = 1; - fp->readonly = (strchr(mode, 'r') && ! strchr(mode, '+')); - fp->preserve = 0; - fp->has_errno = 0; - return fp; - -error: - hfile_destroy(fp); - return NULL; -} - -hFILE *hfile_init_fixed(size_t struct_size, const char *mode, - char *buffer, size_t buf_filled, size_t buf_size) -{ - hFILE *fp = (hFILE *) malloc(struct_size); - if (fp == NULL) return NULL; - - fp->buffer = fp->begin = buffer; - fp->end = &fp->buffer[buf_filled]; - fp->limit = &fp->buffer[buf_size]; - - fp->offset = 0; - fp->at_eof = 1; - fp->mobile = 0; - fp->readonly = (strchr(mode, 'r') && ! strchr(mode, '+')); - fp->preserve = 0; - fp->has_errno = 0; - return fp; -} - -static const struct hFILE_backend mem_backend; - -HTSLIB_EXPORT -void hfile_destroy(hFILE *fp) -{ - int save = errno; - if (fp) free(fp->buffer); - free(fp); - errno = save; -} - -static inline int writebuffer_is_nonempty(hFILE *fp) -{ - return fp->begin > fp->end; -} - -/* Refills the read buffer from the backend (once, so may only partially - fill the buffer), returning the number of additional characters read - (which might be 0), or negative when an error occurred. */ -static ssize_t refill_buffer(hFILE *fp) -{ - ssize_t n; - - // Move any unread characters to the start of the buffer - if (fp->mobile && fp->begin > fp->buffer) { - fp->offset += fp->begin - fp->buffer; - memmove(fp->buffer, fp->begin, fp->end - fp->begin); - fp->end = &fp->buffer[fp->end - fp->begin]; - fp->begin = fp->buffer; - } - - // Read into the available buffer space at fp->[end,limit) - if (fp->at_eof || fp->end == fp->limit) n = 0; - else { - n = fp->backend->read(fp, fp->end, fp->limit - fp->end); - if (n < 0) { fp->has_errno = errno; return n; } - else if (n == 0) fp->at_eof = 1; - } - - fp->end += n; - return n; -} - -/* - * Changes the buffer size for an hFILE. Ideally this is done - * immediately after opening. If performed later, this function may - * fail if we are reducing the buffer size and the current offset into - * the buffer is beyond the new capacity. - * - * Returns 0 on success; - * -1 on failure. - */ -HTSLIB_EXPORT -int hfile_set_blksize(hFILE *fp, size_t bufsiz) { - char *buffer; - ptrdiff_t curr_used; - if (!fp) return -1; - curr_used = (fp->begin > fp->end ? fp->begin : fp->end) - fp->buffer; - if (bufsiz == 0) bufsiz = 32768; - - // Ensure buffer resize will not erase live data - if (bufsiz < curr_used) - return -1; - - if (!(buffer = (char *) realloc(fp->buffer, bufsiz))) return -1; - - fp->begin = buffer + (fp->begin - fp->buffer); - fp->end = buffer + (fp->end - fp->buffer); - fp->buffer = buffer; - fp->limit = &fp->buffer[bufsiz]; - - return 0; -} - -/* Called only from hgetc(), when our buffer is empty. */ -HTSLIB_EXPORT -int hgetc2(hFILE *fp) -{ - return (refill_buffer(fp) > 0)? (unsigned char) *(fp->begin++) : EOF; -} - -ssize_t hgetdelim(char *buffer, size_t size, int delim, hFILE *fp) -{ - char *found; - size_t n, copied = 0; - ssize_t got; - - if (size < 1 || size > SSIZE_MAX) { - fp->has_errno = errno = EINVAL; - return -1; - } - if (writebuffer_is_nonempty(fp)) { - fp->has_errno = errno = EBADF; - return -1; - } - - --size; /* to allow space for the NUL terminator */ - - do { - n = fp->end - fp->begin; - if (n > size - copied) n = size - copied; - - /* Look in the hFILE buffer for the delimiter */ - found = memchr(fp->begin, delim, n); - if (found != NULL) { - n = found - fp->begin + 1; - memcpy(buffer + copied, fp->begin, n); - buffer[n + copied] = '\0'; - fp->begin += n; - return n + copied; - } - - /* No delimiter yet, copy as much as we can and refill if necessary */ - memcpy(buffer + copied, fp->begin, n); - fp->begin += n; - copied += n; - - if (copied == size) { /* Output buffer full */ - buffer[copied] = '\0'; - return copied; - } - - got = refill_buffer(fp); - } while (got > 0); - - if (got < 0) return -1; /* Error on refill. */ - - buffer[copied] = '\0'; /* EOF, return anything that was copied. */ - return copied; -} - -char *hgets(char *buffer, int size, hFILE *fp) -{ - if (size < 1) { - fp->has_errno = errno = EINVAL; - return NULL; - } - return hgetln(buffer, size, fp) > 0 ? buffer : NULL; -} - -ssize_t hpeek(hFILE *fp, void *buffer, size_t nbytes) -{ - size_t n = fp->end - fp->begin; - while (n < nbytes) { - ssize_t ret = refill_buffer(fp); - if (ret < 0) return ret; - else if (ret == 0) break; - else n += ret; - } - - if (n > nbytes) n = nbytes; - memcpy(buffer, fp->begin, n); - return n; -} - -/* Called only from hread(); when called, our buffer is empty and nread bytes - have already been placed in the destination buffer. */ -HTSLIB_EXPORT -ssize_t hread2(hFILE *fp, void *destv, size_t nbytes, size_t nread) -{ - const size_t capacity = fp->limit - fp->buffer; - int buffer_invalidated = 0; - char *dest = (char *) destv; - dest += nread, nbytes -= nread; - - // Read large requests directly into the destination buffer - while (nbytes * 2 >= capacity && !fp->at_eof) { - ssize_t n = fp->backend->read(fp, dest, nbytes); - if (n < 0) { fp->has_errno = errno; return n; } - else if (n == 0) fp->at_eof = 1; - else buffer_invalidated = 1; - fp->offset += n; - dest += n, nbytes -= n; - nread += n; - } - - if (buffer_invalidated) { - // Our unread buffer is empty, so begin == end, but our already-read - // buffer [buffer,begin) is likely non-empty and is no longer valid as - // its contents are no longer adjacent to the file position indicator. - // Discard it so that hseek() can't try to take advantage of it. - fp->offset += fp->begin - fp->buffer; - fp->begin = fp->end = fp->buffer; - } - - while (nbytes > 0 && !fp->at_eof) { - size_t n; - ssize_t ret = refill_buffer(fp); - if (ret < 0) return ret; - - n = fp->end - fp->begin; - if (n > nbytes) n = nbytes; - memcpy(dest, fp->begin, n); - fp->begin += n; - dest += n, nbytes -= n; - nread += n; - } - - return nread; -} - -/* Flushes the write buffer, fp->[buffer,begin), out through the backend - returning 0 on success or negative if an error occurred. */ -static ssize_t flush_buffer(hFILE *fp) -{ - const char *buffer = fp->buffer; - while (buffer < fp->begin) { - ssize_t n = fp->backend->write(fp, buffer, fp->begin - buffer); - if (n < 0) { fp->has_errno = errno; return n; } - buffer += n; - fp->offset += n; - } - - fp->begin = fp->buffer; // Leave the buffer empty - return 0; -} - -int hflush(hFILE *fp) -{ - if (flush_buffer(fp) < 0) return EOF; - if (fp->backend->flush) { - if (fp->backend->flush(fp) < 0) { fp->has_errno = errno; return EOF; } - } - return 0; -} - -/* Called only from hputc(), when our buffer is already full. */ -HTSLIB_EXPORT -int hputc2(int c, hFILE *fp) -{ - if (flush_buffer(fp) < 0) return EOF; - *(fp->begin++) = c; - return c; -} - -/* Called only from hwrite() and hputs2(); when called, our buffer is either - full and ncopied bytes from the source have already been copied to our - buffer; or completely empty, ncopied is zero and totalbytes is greater than - the buffer size. */ -HTSLIB_EXPORT -ssize_t hwrite2(hFILE *fp, const void *srcv, size_t totalbytes, size_t ncopied) -{ - const char *src = (const char *) srcv; - ssize_t ret; - const size_t capacity = fp->limit - fp->buffer; - size_t remaining = totalbytes - ncopied; - src += ncopied; - - ret = flush_buffer(fp); - if (ret < 0) return ret; - - // Write large blocks out directly from the source buffer - while (remaining * 2 >= capacity) { - ssize_t n = fp->backend->write(fp, src, remaining); - if (n < 0) { fp->has_errno = errno; return n; } - fp->offset += n; - src += n, remaining -= n; - } - - // Just buffer any remaining characters - memcpy(fp->begin, src, remaining); - fp->begin += remaining; - - return totalbytes; -} - -/* Called only from hputs(), when our buffer is already full. */ -HTSLIB_EXPORT -int hputs2(const char *text, size_t totalbytes, size_t ncopied, hFILE *fp) -{ - return (hwrite2(fp, text, totalbytes, ncopied) >= 0)? 0 : EOF; -} - -off_t hseek(hFILE *fp, off_t offset, int whence) -{ - off_t curpos, pos; - - if (writebuffer_is_nonempty(fp) && fp->mobile) { - int ret = flush_buffer(fp); - if (ret < 0) return ret; - } - - curpos = htell(fp); - - // Relative offsets are given relative to the hFILE's stream position, - // which may differ from the backend's physical position due to buffering - // read-ahead. Correct for this by converting to an absolute position. - if (whence == SEEK_CUR) { - if (curpos + offset < 0) { - // Either a negative offset resulted in a position before the - // start of the file, or we overflowed when given a positive offset - fp->has_errno = errno = (offset < 0)? EINVAL : EOVERFLOW; - return -1; - } - - whence = SEEK_SET; - offset = curpos + offset; - } - // For fixed immobile buffers, convert everything else to SEEK_SET too - // so that seeking can be avoided for all (within range) requests. - else if (! fp->mobile && whence == SEEK_END) { - size_t length = fp->end - fp->buffer; - if (offset > 0 || -offset > length) { - fp->has_errno = errno = EINVAL; - return -1; - } - - whence = SEEK_SET; - offset = length + offset; - } - - // Avoid seeking if the desired position is within our read buffer. - // (But not when the next operation may be a write on a mobile buffer.) - if (whence == SEEK_SET && (! fp->mobile || fp->readonly) && - offset >= fp->offset && offset - fp->offset <= fp->end - fp->buffer) { - fp->begin = &fp->buffer[offset - fp->offset]; - return offset; - } - - pos = fp->backend->seek(fp, offset, whence); - if (pos < 0) { fp->has_errno = errno; return pos; } - - // Seeking succeeded, so discard any non-empty read buffer - fp->begin = fp->end = fp->buffer; - fp->at_eof = 0; - - fp->offset = pos; - return pos; -} - -int hclose(hFILE *fp) -{ - int err = fp->has_errno; - - if (writebuffer_is_nonempty(fp) && hflush(fp) < 0) err = fp->has_errno; - if (!fp->preserve) { - if (fp->backend->close(fp) < 0) err = errno; - hfile_destroy(fp); - } - - if (err) { - errno = err; - return EOF; - } - else return 0; -} - -void hclose_abruptly(hFILE *fp) -{ - int save = errno; - if (fp->preserve) - return; - if (fp->backend->close(fp) < 0) { /* Ignore subsequent errors */ } - hfile_destroy(fp); - errno = save; -} - - -/*************************** - * File descriptor backend * - ***************************/ - -#ifndef _WIN32 -#include -#include -#define HAVE_STRUCT_STAT_ST_BLKSIZE -#else -#include -#define HAVE_CLOSESOCKET -#define HAVE_SETMODE -#endif -#include -#include - -/* For Unix, it doesn't matter whether a file descriptor is a socket. - However Windows insists on send()/recv() and its own closesocket() - being used when fd happens to be a socket. */ - -typedef struct { - hFILE base; - int fd; - unsigned is_socket:1, is_shared:1; -} hFILE_fd; - -static ssize_t fd_read(hFILE *fpv, void *buffer, size_t nbytes) -{ - hFILE_fd *fp = (hFILE_fd *) fpv; - ssize_t n; - do { - n = fp->is_socket? recv(fp->fd, buffer, nbytes, 0) - : read(fp->fd, buffer, nbytes); - } while (n < 0 && errno == EINTR); - return n; -} - -static ssize_t fd_write(hFILE *fpv, const void *buffer, size_t nbytes) -{ - hFILE_fd *fp = (hFILE_fd *) fpv; - ssize_t n; - do { - n = fp->is_socket? send(fp->fd, buffer, nbytes, 0) - : write(fp->fd, buffer, nbytes); - } while (n < 0 && errno == EINTR); -#ifdef _WIN32 - // On windows we have no SIGPIPE. Instead write returns - // EINVAL. We check for this and our fd being a pipe. - // If so, we raise SIGTERM instead of SIGPIPE. It's not - // ideal, but I think the only alternative is extra checking - // in every single piece of code. - if (n < 0 && errno == EINVAL && - GetLastError() == ERROR_NO_DATA && - GetFileType((HANDLE)_get_osfhandle(fp->fd)) == FILE_TYPE_PIPE) { - raise(SIGTERM); - } -#endif - return n; -} - -static off_t fd_seek(hFILE *fpv, off_t offset, int whence) -{ - hFILE_fd *fp = (hFILE_fd *) fpv; -#ifdef _WIN32 - // On windows lseek can return non-zero values even on a pipe. Instead - // it's likely to seek somewhere within the pipe memory buffer. - // This breaks bgzf_check_EOF among other things. - if (GetFileType((HANDLE)_get_osfhandle(fp->fd)) == FILE_TYPE_PIPE) { - errno = ESPIPE; - return -1; - } -#endif - - return lseek(fp->fd, offset, whence); -} - -static int fd_flush(hFILE *fpv) -{ - int ret = 0; - do { -#ifdef HAVE_FDATASYNC - hFILE_fd *fp = (hFILE_fd *) fpv; - ret = fdatasync(fp->fd); -#elif defined(HAVE_FSYNC) - hFILE_fd *fp = (hFILE_fd *) fpv; - ret = fsync(fp->fd); -#endif - // Ignore invalid-for-fsync(2) errors due to being, e.g., a pipe, - // and operation-not-supported errors (Mac OS X) - if (ret < 0 && (errno == EINVAL || errno == ENOTSUP)) ret = 0; - } while (ret < 0 && errno == EINTR); - return ret; -} - -static int fd_close(hFILE *fpv) -{ - hFILE_fd *fp = (hFILE_fd *) fpv; - int ret; - - // If we don't own the fd, return successfully without actually closing it - if (fp->is_shared) return 0; - - do { -#ifdef HAVE_CLOSESOCKET - ret = fp->is_socket? closesocket(fp->fd) : close(fp->fd); -#else - ret = close(fp->fd); -#endif - } while (ret < 0 && errno == EINTR); - return ret; -} - -static const struct hFILE_backend fd_backend = -{ - fd_read, fd_write, fd_seek, fd_flush, fd_close -}; - -static size_t blksize(int fd) -{ -#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE - struct stat sbuf; - if (fstat(fd, &sbuf) != 0) return 0; - return sbuf.st_blksize; -#else - return 0; -#endif -} - -static hFILE *hopen_fd(const char *filename, const char *mode) -{ - hFILE_fd *fp = NULL; - int fd = open(filename, hfile_oflags(mode), 0666); - if (fd < 0) goto error; - - fp = (hFILE_fd *) hfile_init(sizeof (hFILE_fd), mode, blksize(fd)); - if (fp == NULL) goto error; - - fp->fd = fd; - fp->is_socket = 0; - fp->is_shared = 0; - fp->base.backend = &fd_backend; - return &fp->base; - -error: - if (fd >= 0) { int save = errno; (void) close(fd); errno = save; } - hfile_destroy((hFILE *) fp); - return NULL; -} - -// Loads the contents of filename to produced a read-only, in memory, -// immobile hfile. fp is the already opened file. We always close this -// input fp, irrespective of whether we error or whether we return a new -// immobile hfile. -static hFILE *hpreload(hFILE *fp) { - hFILE *mem_fp; - char *buf = NULL; - off_t buf_sz = 0, buf_a = 0, buf_inc = 8192, len; - - for (;;) { - if (buf_a - buf_sz < 5000) { - buf_a += buf_inc; - char *t = realloc(buf, buf_a); - if (!t) goto err; - buf = t; - if (buf_inc < 1000000) buf_inc *= 1.3; - } - len = hread(fp, buf+buf_sz, buf_a-buf_sz); - if (len > 0) - buf_sz += len; - else - break; - } - - if (len < 0) goto err; - mem_fp = hfile_init_fixed(sizeof(hFILE), "r", buf, buf_sz, buf_a); - if (!mem_fp) goto err; - mem_fp->backend = &mem_backend; - - if (hclose(fp) < 0) { - hclose_abruptly(mem_fp); - goto err; - } - return mem_fp; - - err: - free(buf); - hclose_abruptly(fp); - return NULL; -} - -static int is_preload_url_remote(const char *url){ - return hisremote(url + 8); // len("preload:") = 8 -} - -static hFILE *hopen_preload(const char *url, const char *mode){ - hFILE* fp = hopen(url + 8, mode); - return hpreload(fp); -} - -hFILE *hdopen(int fd, const char *mode) -{ - hFILE_fd *fp = (hFILE_fd*) hfile_init(sizeof (hFILE_fd), mode, blksize(fd)); - if (fp == NULL) return NULL; - - fp->fd = fd; - fp->is_socket = (strchr(mode, 's') != NULL); - fp->is_shared = (strchr(mode, 'S') != NULL); - fp->base.backend = &fd_backend; - return &fp->base; -} - -static hFILE *hopen_fd_fileuri(const char *url, const char *mode) -{ - if (strncmp(url, "file://localhost/", 17) == 0) url += 16; - else if (strncmp(url, "file:///", 8) == 0) url += 7; - else { errno = EPROTONOSUPPORT; return NULL; } - -#if defined(_WIN32) || defined(__MSYS__) - // For cases like C:/foo - if (url[0] == '/' && url[1] && url[2] == ':' && url[3] == '/') url++; -#endif - - return hopen_fd(url, mode); -} - -static hFILE *hopen_fd_stdinout(const char *mode) -{ - int fd = (strchr(mode, 'r') != NULL)? STDIN_FILENO : STDOUT_FILENO; - char mode_shared[101]; - snprintf(mode_shared, sizeof mode_shared, "S%s", mode); -#if defined HAVE_SETMODE && defined O_BINARY - if (setmode(fd, O_BINARY) < 0) return NULL; -#endif - return hdopen(fd, mode_shared); -} - -HTSLIB_EXPORT -int hfile_oflags(const char *mode) -{ - int rdwr = 0, flags = 0; - const char *s; - for (s = mode; *s; s++) - switch (*s) { - case 'r': rdwr = O_RDONLY; break; - case 'w': rdwr = O_WRONLY; flags |= O_CREAT | O_TRUNC; break; - case 'a': rdwr = O_WRONLY; flags |= O_CREAT | O_APPEND; break; - case '+': rdwr = O_RDWR; break; -#ifdef O_CLOEXEC - case 'e': flags |= O_CLOEXEC; break; -#endif -#ifdef O_EXCL - case 'x': flags |= O_EXCL; break; -#endif - default: break; - } - -#ifdef O_BINARY - flags |= O_BINARY; -#endif - - return rdwr | flags; -} - - -/********************* - * In-memory backend * - *********************/ - -#include "hts_internal.h" - -typedef struct { - hFILE base; -} hFILE_mem; - -static off_t mem_seek(hFILE *fpv, off_t offset, int whence) -{ - errno = EINVAL; - return -1; -} - -static int mem_close(hFILE *fpv) -{ - return 0; -} - -static const struct hFILE_backend mem_backend = -{ - NULL, NULL, mem_seek, NULL, mem_close -}; - -static int cmp_prefix(const char *key, const char *s) -{ - while (*key) - if (tolower_c(*s) != *key) return +1; - else s++, key++; - - return 0; -} - -static hFILE *create_hfile_mem(char* buffer, const char* mode, size_t buf_filled, size_t buf_size) -{ - hFILE_mem *fp = (hFILE_mem *) hfile_init_fixed(sizeof(hFILE_mem), mode, buffer, buf_filled, buf_size); - if (fp == NULL) - return NULL; - - fp->base.backend = &mem_backend; - return &fp->base; -} - -static hFILE *hopen_mem(const char *url, const char *mode) -{ - size_t length, size; - char *buffer; - const char *data, *comma = strchr(url, ','); - if (comma == NULL) { errno = EINVAL; return NULL; } - data = comma+1; - - // TODO Implement write modes - if (strchr(mode, 'r') == NULL) { errno = EROFS; return NULL; } - - if (comma - url >= 7 && cmp_prefix(";base64", &comma[-7]) == 0) { - size = hts_base64_decoded_length(strlen(data)); - buffer = malloc(size); - if (buffer == NULL) return NULL; - hts_decode_base64(buffer, &length, data); - } - else { - size = strlen(data) + 1; - buffer = malloc(size); - if (buffer == NULL) return NULL; - hts_decode_percent(buffer, &length, data); - } - hFILE* hf; - - if(!(hf = create_hfile_mem(buffer, mode, length, size))){ - free(buffer); - return NULL; - } - - return hf; -} - -static hFILE *hopenv_mem(const char *filename, const char *mode, va_list args) -{ - char* buffer = va_arg(args, char*); - size_t sz = va_arg(args, size_t); - va_end(args); - - hFILE* hf; - - if(!(hf = create_hfile_mem(buffer, mode, sz, sz))){ - free(buffer); - return NULL; - } - - return hf; -} - -char *hfile_mem_get_buffer(hFILE *file, size_t *length) { - if (file->backend != &mem_backend) { - errno = EINVAL; - return NULL; - } - - if (length) - *length = file->buffer - file->limit; - - return file->buffer; -} - -char *hfile_mem_steal_buffer(hFILE *file, size_t *length) { - char *buf = hfile_mem_get_buffer(file, length); - if (buf) - file->buffer = NULL; - return buf; -} - -int hfile_plugin_init_mem(struct hFILE_plugin *self) -{ - // mem files are declared remote so they work with a tabix index - static const struct hFILE_scheme_handler handler = - {NULL, hfile_always_remote, "mem", 2000 + 50, hopenv_mem}; - self->name = "mem"; - hfile_add_scheme_handler("mem", &handler); - return 0; -} - -/********************************************************************** - * Dummy crypt4gh plug-in. Does nothing apart from advise how to get * - * the real one. It will be overridden by the actual plug-in. * - **********************************************************************/ - -static hFILE *crypt4gh_needed(const char *url, const char *mode) -{ - const char *u = strncmp(url, "crypt4gh:", 9) == 0 ? url + 9 : url; -#if defined(ENABLE_PLUGINS) - const char *enable_plugins = ""; -#else - const char *enable_plugins = "You also need to rebuild HTSlib with plug-ins enabled.\n"; -#endif - - hts_log_error("Accessing \"%s\" needs the crypt4gh plug-in.\n" - "It can be found at " - "https://github.com/samtools/htslib-crypt4gh\n" - "%s" - "If you have the plug-in, please ensure it can be " - "found on your HTS_PATH.", - u, enable_plugins); - - errno = EPROTONOSUPPORT; - return NULL; -} - -int hfile_plugin_init_crypt4gh_needed(struct hFILE_plugin *self) -{ - static const struct hFILE_scheme_handler handler = - { crypt4gh_needed, NULL, "crypt4gh-needed", 0, NULL }; - self->name = "crypt4gh-needed"; - hfile_add_scheme_handler("crypt4gh", &handler); - return 0; -} - - -/***************************************** - * Plugin and hopen() backend dispatcher * - *****************************************/ - -#include "htslib/khash.h" - -KHASH_MAP_INIT_STR(scheme_string, const struct hFILE_scheme_handler *) -static khash_t(scheme_string) *schemes = NULL; - -struct hFILE_plugin_list { - struct hFILE_plugin plugin; - struct hFILE_plugin_list *next; -}; - -static struct hFILE_plugin_list *plugins = NULL; -static pthread_mutex_t plugins_lock = PTHREAD_MUTEX_INITIALIZER; - -void hfile_shutdown(int do_close_plugin) -{ - pthread_mutex_lock(&plugins_lock); - - if (schemes) { - kh_destroy(scheme_string, schemes); - schemes = NULL; - } - - while (plugins != NULL) { - struct hFILE_plugin_list *p = plugins; - if (p->plugin.destroy) p->plugin.destroy(); -#ifdef ENABLE_PLUGINS - if (p->plugin.obj && do_close_plugin) close_plugin(p->plugin.obj); -#endif - plugins = p->next; - free(p); - } - - pthread_mutex_unlock(&plugins_lock); -} - -static void hfile_exit(void) -{ - hfile_shutdown(0); - pthread_mutex_destroy(&plugins_lock); -} - -static inline int priority(const struct hFILE_scheme_handler *handler) -{ - return handler->priority % 1000; -} - -#ifdef USING_WINDOWS_PLUGIN_DLLS -/* - * Work-around for Windows plug-in dlls where the plug-in could be - * using a different HTSlib library to the executable (for example - * because the latter was build against a static libhts.a). When this - * happens, the plug-in can call the wrong copy of hfile_add_scheme_handler(). - * If this is detected, it calls this function which attempts to fix the - * problem by redirecting to the hfile_add_scheme_handler() in the main - * executable. - */ -static int try_exe_add_scheme_handler(const char *scheme, - const struct hFILE_scheme_handler *handler) -{ - static void (*add_scheme_handler)(const char *scheme, - const struct hFILE_scheme_handler *handler); - if (!add_scheme_handler) { - // dlopen the main executable and resolve hfile_add_scheme_handler - void *exe_handle = dlopen(NULL, RTLD_LAZY); - if (!exe_handle) return -1; - *(void **) (&add_scheme_handler) = dlsym(exe_handle, "hfile_add_scheme_handler"); - dlclose(exe_handle); - } - // Check that the symbol was obtained and isn't the one in this copy - // of the library (to avoid infinite recursion) - if (!add_scheme_handler || add_scheme_handler == hfile_add_scheme_handler) - return -1; - add_scheme_handler(scheme, handler); - return 0; -} -#else -static int try_exe_add_scheme_handler(const char *scheme, - const struct hFILE_scheme_handler *handler) -{ - return -1; -} -#endif - -HTSLIB_EXPORT -void hfile_add_scheme_handler(const char *scheme, - const struct hFILE_scheme_handler *handler) -{ - int absent; - if (!schemes) { - if (try_exe_add_scheme_handler(scheme, handler) != 0) { - hts_log_warning("Couldn't register scheme handler for %s", scheme); - } - return; - } - khint_t k = kh_put(scheme_string, schemes, scheme, &absent); - if (absent < 0) { - hts_log_warning("Couldn't register scheme handler for %s : %s", - scheme, strerror(errno)); - return; - } - if (absent || priority(handler) > priority(kh_value(schemes, k))) { - kh_value(schemes, k) = handler; - } -} - -static int init_add_plugin(void *obj, int (*init)(struct hFILE_plugin *), - const char *pluginname) -{ - struct hFILE_plugin_list *p = malloc (sizeof (struct hFILE_plugin_list)); - if (p == NULL) { - hts_log_debug("Failed to allocate memory for plugin \"%s\"", pluginname); - return -1; - } - - p->plugin.api_version = 1; - p->plugin.obj = obj; - p->plugin.name = NULL; - p->plugin.destroy = NULL; - - int ret = (*init)(&p->plugin); - - if (ret != 0) { - hts_log_debug("Initialisation failed for plugin \"%s\": %d", pluginname, ret); - free(p); - return ret; - } - - hts_log_debug("Loaded \"%s\"", pluginname); - - p->next = plugins, plugins = p; - return 0; -} - -/* - * Returns 0 on success, - * <0 on failure - */ -static int load_hfile_plugins(void) -{ - static const struct hFILE_scheme_handler - data = { hopen_mem, hfile_always_local, "built-in", 80 }, - file = { hopen_fd_fileuri, hfile_always_local, "built-in", 80 }, - preload = { hopen_preload, is_preload_url_remote, "built-in", 80 }; - - schemes = kh_init(scheme_string); - if (schemes == NULL) - return -1; - - hfile_add_scheme_handler("data", &data); - hfile_add_scheme_handler("file", &file); - hfile_add_scheme_handler("preload", &preload); - init_add_plugin(NULL, hfile_plugin_init_mem, "mem"); - init_add_plugin(NULL, hfile_plugin_init_crypt4gh_needed, "crypt4gh-needed"); - -#ifdef ENABLE_PLUGINS - struct hts_path_itr path; - const char *pluginname; - hts_path_itr_setup(&path, NULL, NULL, "hfile_", 6, NULL, 0); - while ((pluginname = hts_path_itr_next(&path)) != NULL) { - void *obj; - int (*init)(struct hFILE_plugin *) = (int (*)(struct hFILE_plugin *)) - load_plugin(&obj, pluginname, "hfile_plugin_init"); - - if (init) { - if (init_add_plugin(obj, init, pluginname) != 0) - close_plugin(obj); - } - } -#else - -#ifdef HAVE_LIBCURL - init_add_plugin(NULL, hfile_plugin_init_libcurl, "libcurl"); -#endif -#ifdef ENABLE_GCS - init_add_plugin(NULL, hfile_plugin_init_gcs, "gcs"); -#endif -#ifdef ENABLE_S3 - init_add_plugin(NULL, hfile_plugin_init_s3, "s3"); - init_add_plugin(NULL, hfile_plugin_init_s3_write, "s3w"); -#endif - -#endif - - // In the unlikely event atexit() fails, it's better to succeed here and - // carry on; then eventually when the program exits, we'll merely close - // down the plugins uncleanly, as if we had aborted. - (void) atexit(hfile_exit); - - return 0; -} - -/* A filename like "foo:bar" in which we don't recognise the scheme is - either an ordinary file or an indication of a missing or broken plugin. - Try to open it as an ordinary file; but if there's no such file, set - errno distinctively to make the plugin issue apparent. */ -static hFILE *hopen_unknown_scheme(const char *fname, const char *mode) -{ - hFILE *fp = hopen_fd(fname, mode); - if (fp == NULL && errno == ENOENT) errno = EPROTONOSUPPORT; - return fp; -} - -/* Returns the appropriate handler, or NULL if the string isn't an URL. */ -static const struct hFILE_scheme_handler *find_scheme_handler(const char *s) -{ - static const struct hFILE_scheme_handler unknown_scheme = - { hopen_unknown_scheme, hfile_always_local, "built-in", 0 }; - - char scheme[12]; - int i; - - for (i = 0; i < sizeof scheme; i++) - if (isalnum_c(s[i]) || s[i] == '+' || s[i] == '-' || s[i] == '.') - scheme[i] = tolower_c(s[i]); - else if (s[i] == ':') break; - else return NULL; - - // 1 byte schemes are likely windows C:/foo pathnames - if (i <= 1 || i >= sizeof scheme) return NULL; - scheme[i] = '\0'; - - pthread_mutex_lock(&plugins_lock); - if (!schemes && load_hfile_plugins() < 0) { - pthread_mutex_unlock(&plugins_lock); - return NULL; - } - pthread_mutex_unlock(&plugins_lock); - - khint_t k = kh_get(scheme_string, schemes, scheme); - return (k != kh_end(schemes))? kh_value(schemes, k) : &unknown_scheme; -} - - -/*************************** - * Library introspection functions - ***************************/ - -/* - * Fills out sc_list[] with the list of known URL schemes. - * This can be restricted to just ones from a specific plugin, - * or all (plugin == NULL). - * - * Returns number of schemes found on success; - * -1 on failure. - */ -HTSLIB_EXPORT -int hfile_list_schemes(const char *plugin, const char *sc_list[], int *nschemes) -{ - pthread_mutex_lock(&plugins_lock); - if (!schemes && load_hfile_plugins() < 0) { - pthread_mutex_unlock(&plugins_lock); - return -1; - } - pthread_mutex_unlock(&plugins_lock); - - khiter_t k; - int ns = 0; - - for (k = kh_begin(schemes); k != kh_end(schemes); k++) { - if (!kh_exist(schemes, k)) - continue; - - const struct hFILE_scheme_handler *s = kh_value(schemes, k); - if (plugin && strcmp(s->provider, plugin) != 0) - continue; - - if (ns < *nschemes) - sc_list[ns] = kh_key(schemes, k); - ns++; - } - - if (*nschemes > ns) - *nschemes = ns; - - return ns; -} - - -/* - * Fills out plist[] with the list of known hFILE plugins. - * - * Returns number of schemes found on success; - * -1 on failure - */ -HTSLIB_EXPORT -int hfile_list_plugins(const char *plist[], int *nplugins) -{ - pthread_mutex_lock(&plugins_lock); - if (!schemes && load_hfile_plugins() < 0) { - pthread_mutex_unlock(&plugins_lock); - return -1; - } - pthread_mutex_unlock(&plugins_lock); - - int np = 0; - if (*nplugins) - plist[np++] = "built-in"; - - struct hFILE_plugin_list *p = plugins; - while (p) { - if (np < *nplugins) - plist[np] = p->plugin.name; - - p = p->next; - np++; - } - - if (*nplugins > np) - *nplugins = np; - - return np; -} - - -/* - * Tests for the presence of a specific hFILE plugin. - * - * Returns 1 if true - * 0 otherwise - */ -HTSLIB_EXPORT -int hfile_has_plugin(const char *name) -{ - pthread_mutex_lock(&plugins_lock); - if (!schemes && load_hfile_plugins() < 0) { - pthread_mutex_unlock(&plugins_lock); - return -1; - } - pthread_mutex_unlock(&plugins_lock); - - struct hFILE_plugin_list *p = plugins; - while (p) { - if (strcmp(p->plugin.name, name) == 0) - return 1; - p = p->next; - } - - return 0; -} - -/*************************** - * hFILE interface proper - ***************************/ - -hFILE *hopen(const char *fname, const char *mode, ...) -{ - const struct hFILE_scheme_handler *handler = find_scheme_handler(fname); - if (handler) { - if (strchr(mode, ':') == NULL - || handler->priority < 2000 - || handler->vopen == NULL) { - return handler->open(fname, mode); - } - else { - hFILE *fp; - va_list arg; - va_start(arg, mode); - fp = handler->vopen(fname, mode, arg); - va_end(arg); - return fp; - } - } - else if (strcmp(fname, "-") == 0) return hopen_fd_stdinout(mode); - else return hopen_fd(fname, mode); -} - -HTSLIB_EXPORT -int hfile_always_local (const char *fname) { return 0; } - -HTSLIB_EXPORT -int hfile_always_remote(const char *fname) { return 1; } - -int hisremote(const char *fname) -{ - const struct hFILE_scheme_handler *handler = find_scheme_handler(fname); - return handler? handler->isremote(fname) : 0; -} - -// Remove an extension, if any, from the basename part of [start,limit). -// Note: Doesn't notice percent-encoded '.' and '/' characters. Don't do that. -static const char *strip_extension(const char *start, const char *limit) -{ - const char *s = limit; - while (s > start) { - --s; - if (*s == '.') return s; - else if (*s == '/') break; - } - return limit; -} - -char *haddextension(struct kstring_t *buffer, const char *filename, - int replace, const char *new_extension) -{ - const char *trailing, *end; - - if (find_scheme_handler(filename)) { - // URL, so alter extensions before any trailing query or fragment parts - // Allow # symbols in s3 URLs - trailing = filename + ((strncmp(filename, "s3://", 5) && strncmp(filename, "s3+http://", 10) && strncmp(filename, "s3+https://", 11)) ? strcspn(filename, "?#") : strcspn(filename, "?")); - } - else { - // Local path, so alter extensions at the end of the filename - trailing = strchr(filename, '\0'); - } - - end = replace? strip_extension(filename, trailing) : trailing; - - buffer->l = 0; - if (kputsn(filename, end - filename, buffer) >= 0 && - kputs(new_extension, buffer) >= 0 && - kputs(trailing, buffer) >= 0) return buffer->s; - else return NULL; -} - - -/* - * ---------------------------------------------------------------------- - * Minimal stub functions for knet, added after the removal of - * hfile_net.c and knetfile.c. - * - * They exist purely for ABI compatibility, but are simply wrappers to - * hFILE. API should be compatible except knet_fileno (unused?). - * - * CULL THESE and knetfile.h at the next .so version bump. - */ -typedef struct knetFile_s { - // As per htslib/knetfile.h. Duplicated here as we don't wish to - // have any dependence on the deprecated knetfile.h interface, plus - // it's hopefully only temporary. - int type, fd; - int64_t offset; - char *host, *port; - int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready; - char *response, *retr, *size_cmd; - int64_t seek_offset; - int64_t file_size; - char *path, *http_host; - - // Our local addition - hFILE *hf; -} knetFile; - -HTSLIB_EXPORT -knetFile *knet_open(const char *fn, const char *mode) { - knetFile *fp = calloc(1, sizeof(*fp)); - if (!fp) return NULL; - if (!(fp->hf = hopen(fn, mode))) { - free(fp); - return NULL; - } - - // FD backend is the only one implementing knet_fileno - fp->fd = fp->hf->backend == &fd_backend - ? ((hFILE_fd *)fp->hf)->fd - : -1; - - return fp; -} - -HTSLIB_EXPORT -knetFile *knet_dopen(int fd, const char *mode) { - knetFile *fp = calloc(1, sizeof(*fp)); - if (!fp) return NULL; - if (!(fp->hf = hdopen(fd, mode))) { - free(fp); - return NULL; - } - fp->fd = fd; - return fp; -} - -HTSLIB_EXPORT -ssize_t knet_read(knetFile *fp, void *buf, size_t len) { - ssize_t r = hread(fp->hf, buf, len); - fp->offset += r>0?r:0; - return r; -} - -HTSLIB_EXPORT -off_t knet_seek(knetFile *fp, off_t off, int whence) { - off_t r = hseek(fp->hf, off, whence); - if (r >= 0) - fp->offset = r; - return r; -} - -HTSLIB_EXPORT -int knet_close(knetFile *fp) { - int r = hclose(fp->hf); - free(fp); - return r; -} diff --git a/src/htslib-1.19.1/hfile_gcs.c b/src/htslib-1.19.1/hfile_gcs.c deleted file mode 100644 index 2f01a20..0000000 --- a/src/htslib-1.19.1/hfile_gcs.c +++ /dev/null @@ -1,160 +0,0 @@ -/* hfile_gcs.c -- Google Cloud Storage backend for low-level file streams. - - Copyright (C) 2016, 2021 Genome Research Ltd. - - Author: John Marshall - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include - -#include "htslib/hts.h" -#include "htslib/kstring.h" -#include "hfile_internal.h" -#ifdef ENABLE_PLUGINS -#include "version.h" -#endif - -static hFILE * -gcs_rewrite(const char *gsurl, const char *mode, int mode_has_colon, - va_list *argsp) -{ - const char *bucket, *path, *access_token, *requester_pays_project; - kstring_t mode_colon = { 0, 0, NULL }; - kstring_t url = { 0, 0, NULL }; - kstring_t auth_hdr = { 0, 0, NULL }; - kstring_t requester_pays_hdr = { 0, 0, NULL }; - hFILE *fp = NULL; - - // GCS URL format is gs[+SCHEME]://BUCKET/PATH - - if (gsurl[2] == '+') { - bucket = strchr(gsurl, ':') + 1; - kputsn(&gsurl[3], bucket - &gsurl[3], &url); - } - else { - kputs("https:", &url); - bucket = &gsurl[3]; - } - while (*bucket == '/') kputc(*bucket++, &url); - - path = bucket + strcspn(bucket, "/?#"); - - kputsn(bucket, path - bucket, &url); - if (strchr(mode, 'r')) kputs(".storage-download", &url); - else if (strchr(mode, 'w')) kputs(".storage-upload", &url); - else kputs(".storage", &url); - kputs(".googleapis.com", &url); - - kputs(path, &url); - - if (hts_verbose >= 8) - fprintf(stderr, "[M::gcs_open] rewrote URL as %s\n", url.s); - - // TODO Find the access token in a more standard way - access_token = getenv("GCS_OAUTH_TOKEN"); - - if (access_token) { - kputs("Authorization: Bearer ", &auth_hdr); - kputs(access_token, &auth_hdr); - } - - requester_pays_project = getenv("GCS_REQUESTER_PAYS_PROJECT"); - - if (requester_pays_project) { - kputs("X-Goog-User-Project: ", &requester_pays_hdr); - kputs(requester_pays_project, &requester_pays_hdr); - } - - if (argsp || mode_has_colon || auth_hdr.l > 0 || requester_pays_hdr.l > 0) { - if (! mode_has_colon) { - kputs(mode, &mode_colon); - kputc(':', &mode_colon); - mode = mode_colon.s; - } - - if (auth_hdr.l > 0 && requester_pays_hdr.l > 0) { - fp = hopen( - url.s, mode, "va_list", argsp, - "httphdr:l", - auth_hdr.s, - requester_pays_hdr.s, - NULL, - NULL - ); - - } - else { - fp = hopen(url.s, mode, "va_list", argsp, - "httphdr", (auth_hdr.l > 0)? auth_hdr.s : NULL, NULL); - } - } - else - fp = hopen(url.s, mode); - - free(mode_colon.s); - free(url.s); - free(auth_hdr.s); - free(requester_pays_hdr.s); - return fp; -} - -static hFILE *gcs_open(const char *url, const char *mode) -{ - return gcs_rewrite(url, mode, 0, NULL); -} - -static hFILE *gcs_vopen(const char *url, const char *mode_colon, va_list args0) -{ - // Need to use va_copy() as we can only take the address of an actual - // va_list object, not that of a parameter as its type may have decayed. - va_list args; - va_copy(args, args0); - hFILE *fp = gcs_rewrite(url, mode_colon, 1, &args); - va_end(args); - return fp; -} - -int PLUGIN_GLOBAL(hfile_plugin_init,_gcs)(struct hFILE_plugin *self) -{ - static const struct hFILE_scheme_handler handler = - { gcs_open, hfile_always_remote, "Google Cloud Storage", - 2000 + 50, gcs_vopen - }; - -#ifdef ENABLE_PLUGINS - // Embed version string for examination via strings(1) or what(1) - static const char id[] = "@(#)hfile_gcs plugin (htslib)\t" HTS_VERSION_TEXT; - if (hts_verbose >= 9) - fprintf(stderr, "[M::hfile_gcs.init] version %s\n", strchr(id, '\t')+1); -#endif - - self->name = "Google Cloud Storage"; - hfile_add_scheme_handler("gs", &handler); - hfile_add_scheme_handler("gs+http", &handler); - hfile_add_scheme_handler("gs+https", &handler); - return 0; -} diff --git a/src/htslib-1.19.1/hfile_internal.h b/src/htslib-1.19.1/hfile_internal.h deleted file mode 100644 index 2e365ae..0000000 --- a/src/htslib-1.19.1/hfile_internal.h +++ /dev/null @@ -1,209 +0,0 @@ -/* hfile_internal.h -- internal parts of low-level input/output streams. - - Copyright (C) 2013-2016, 2019 Genome Research Ltd. - - Author: John Marshall - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#ifndef HFILE_INTERNAL_H -#define HFILE_INTERNAL_H - -#include - -#include "htslib/hts_defs.h" -#include "htslib/hfile.h" - -#include "textutils_internal.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/*! - @abstract Resizes the buffer within an hFILE. - - @notes Changes the buffer size for an hFILE. Ideally this is done - immediately after opening. If performed later, this function may - fail if we are reducing the buffer size and the current offset into - the buffer is beyond the new capacity. - - @param fp The file stream - @param bufsiz The size of the new buffer - - @return Returns 0 on success, -1 on failure. - */ -int hfile_set_blksize(hFILE *fp, size_t bufsiz); - -struct BGZF; -/*! - @abstract Return the hFILE connected to a BGZF - */ -struct hFILE *bgzf_hfile(struct BGZF *fp); - -/*! - @abstract Closes all hFILE plugins that have been loaded -*/ -void hfile_shutdown(int do_close_plugin); - -struct hFILE_backend { - /* As per read(2), returning the number of bytes read (possibly 0) or - negative (and setting errno) on errors. Front-end code will call this - repeatedly if necessary to attempt to get the desired byte count. */ - ssize_t (*read)(hFILE *fp, void *buffer, size_t nbytes) HTS_RESULT_USED; - - /* As per write(2), returning the number of bytes written or negative (and - setting errno) on errors. Front-end code will call this repeatedly if - necessary until the desired block is written or an error occurs. */ - ssize_t (*write)(hFILE *fp, const void *buffer, size_t nbytes) - HTS_RESULT_USED; - - /* As per lseek(2), returning the resulting offset within the stream or - negative (and setting errno) on errors. */ - off_t (*seek)(hFILE *fp, off_t offset, int whence) HTS_RESULT_USED; - - /* Performs low-level flushing, if any, e.g., fsync(2); for writing streams - only. Returns 0 for success or negative (and sets errno) on errors. */ - int (*flush)(hFILE *fp) HTS_RESULT_USED; - - /* Closes the underlying stream (for output streams, the buffer will - already have been flushed), returning 0 for success or negative (and - setting errno) on errors, as per close(2). */ - int (*close)(hFILE *fp) HTS_RESULT_USED; -}; - -/* May be called by hopen_*() functions to decode a fopen()-style mode into - open(2)-style flags. */ -HTSLIB_EXPORT -int hfile_oflags(const char *mode); - -/* Must be called by hopen_*() functions to allocate the hFILE struct and set - up its base. Capacity is a suggested buffer size (e.g., via fstat(2)) - or 0 for a default-sized buffer. */ -HTSLIB_EXPORT -hFILE *hfile_init(size_t struct_size, const char *mode, size_t capacity); - -/* Alternative to hfile_init() for in-memory backends for which the base - buffer is the only storage. Buffer is already allocated via malloc(2) - of size buf_size and with buf_filled bytes already filled. Ownership - of the buffer is transferred to the resulting hFILE. */ -hFILE *hfile_init_fixed(size_t struct_size, const char *mode, - char *buffer, size_t buf_filled, size_t buf_size); - -/* May be called by hopen_*() functions to undo the effects of hfile_init() - in the event opening the stream subsequently fails. (This is safe to use - even if fp is NULL. This takes care to preserve errno.) */ -HTSLIB_EXPORT -void hfile_destroy(hFILE *fp); - - -struct hFILE_scheme_handler { - /* Opens a stream when dispatched by hopen(); should call hfile_init() - to malloc a struct "derived" from hFILE and initialise it appropriately, - including setting base.backend to its own backend vector. */ - hFILE *(*open)(const char *filename, const char *mode) HTS_RESULT_USED; - - /* Returns whether the URL denotes remote storage when dispatched by - hisremote(). For simple cases, use one of hfile_always_*() below. */ - int (*isremote)(const char *filename) HTS_RESULT_USED; - - /* The name of the plugin or other code providing this handler. */ - const char *provider; - - /* If multiple handlers are registered for the same scheme, the one with - the highest priority is used; range is 0 (lowest) to 100 (highest). - This field is used modulo 1000 as a priority; thousands indicate - later revisions to this structure, as noted below. */ - int priority; - - /* Fields below are present when priority >= 2000. */ - - /* Same as the open() method, used when extra arguments have been given - to hopen(). */ - hFILE *(*vopen)(const char *filename, const char *mode, va_list args) - HTS_RESULT_USED; -}; - -/* May be used as an isremote() function in simple cases. */ -HTSLIB_EXPORT -extern int hfile_always_local (const char *fname); -HTSLIB_EXPORT -extern int hfile_always_remote(const char *fname); - -/* Should be called by plugins for each URL scheme they wish to handle. */ -HTSLIB_EXPORT -void hfile_add_scheme_handler(const char *scheme, - const struct hFILE_scheme_handler *handler); - -struct hFILE_plugin { - /* On entry, HTSlib's plugin API version (currently 1). */ - int api_version; - - /* On entry, the plugin's handle as returned by dlopen() etc. */ - void *obj; - - /* The plugin should fill this in with its (human-readable) name. */ - const char *name; - - /* The plugin may wish to fill in a function to be called on closing. */ - void (*destroy)(void); -}; - -#ifdef ENABLE_PLUGINS -#define PLUGIN_GLOBAL(identifier,suffix) identifier - -/* Plugins must define an entry point with this signature. */ -HTSLIB_EXPORT -extern int hfile_plugin_init(struct hFILE_plugin *self); - -#else -#define PLUGIN_GLOBAL(identifier,suffix) identifier##suffix - -/* Only plugins distributed within the HTSlib source that might be built - even with --disable-plugins need to use PLUGIN_GLOBAL and be listed here; - others can simply define hfile_plugin_init(). */ - -extern int hfile_plugin_init_gcs(struct hFILE_plugin *self); -extern int hfile_plugin_init_libcurl(struct hFILE_plugin *self); -extern int hfile_plugin_init_s3(struct hFILE_plugin *self); -extern int hfile_plugin_init_s3_write(struct hFILE_plugin *self); -#endif - -// Callback to allow headers to be set in http connections. Currently used -// to allow s3 to renew tokens when seeking. Kept internal for now, -// although we may consider exposing it in the API later. -typedef int (* hts_httphdr_callback) (void *cb_data, char ***hdrs); - -/** Callback for handling 3xx redirect responses from http connections. - - @param data is passed to the callback - @param response http response code (e.g. 301) - @param headers http response headers - @param new_url the callback should write the url to switch to in here - - Currently used by s3 to handle switching region endpoints. -*/ -typedef int (*redirect_callback) (void *data, long response, - kstring_t *headers, kstring_t *new_url); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.19.1/hfile_s3.c b/src/htslib-1.19.1/hfile_s3.c deleted file mode 100644 index e2718f6..0000000 --- a/src/htslib-1.19.1/hfile_s3.c +++ /dev/null @@ -1,1408 +0,0 @@ -/* hfile_s3.c -- Amazon S3 backend for low-level file streams. - - Copyright (C) 2015-2017, 2019-2023 Genome Research Ltd. - - Author: John Marshall - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include - -#include - -#include "hfile_internal.h" -#ifdef ENABLE_PLUGINS -#include "version.h" -#endif -#include "htslib/hts.h" // for hts_version() and hts_verbose -#include "htslib/kstring.h" -#include "hts_time_funcs.h" - -typedef struct s3_auth_data { - kstring_t id; - kstring_t token; - kstring_t secret; - kstring_t region; - kstring_t canonical_query_string; - kstring_t user_query_string; - kstring_t host; - kstring_t profile; - time_t creds_expiry_time; - char *bucket; - kstring_t auth_hdr; - time_t auth_time; - char date[40]; - char date_long[17]; - char date_short[9]; - kstring_t date_html; - char mode; - char *headers[5]; - int refcount; -} s3_auth_data; - -#define AUTH_LIFETIME 60 // Regenerate auth headers if older than this -#define CREDENTIAL_LIFETIME 60 // Seconds before expiry to reread credentials - -#if defined HAVE_COMMONCRYPTO - -#include - -#define DIGEST_BUFSIZ CC_SHA1_DIGEST_LENGTH -#define SHA256_DIGEST_BUFSIZE CC_SHA256_DIGEST_LENGTH -#define HASH_LENGTH_SHA256 (SHA256_DIGEST_BUFSIZE * 2) + 1 - -static size_t -s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message) -{ - CCHmac(kCCHmacAlgSHA1, key->s, key->l, message->s, message->l, digest); - return CC_SHA1_DIGEST_LENGTH; -} - - -static void s3_sha256(const unsigned char *in, size_t length, unsigned char *out) { - CC_SHA256(in, length, out); -} - - -static void s3_sign_sha256(const void *key, int key_len, const unsigned char *d, int n, unsigned char *md, unsigned int *md_len) { - CCHmac(kCCHmacAlgSHA256, key, key_len, d, n, md); - *md_len = CC_SHA256_DIGEST_LENGTH; -} - - -#elif defined HAVE_HMAC - -#include -#include - -#define DIGEST_BUFSIZ EVP_MAX_MD_SIZE -#define SHA256_DIGEST_BUFSIZE SHA256_DIGEST_LENGTH -#define HASH_LENGTH_SHA256 (SHA256_DIGEST_BUFSIZE * 2) + 1 - -static size_t -s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message) -{ - unsigned int len; - HMAC(EVP_sha1(), key->s, key->l, - (unsigned char *) message->s, message->l, digest, &len); - return len; -} - - -static void s3_sha256(const unsigned char *in, size_t length, unsigned char *out) { - SHA256(in, length, out); -} - - -static void s3_sign_sha256(const void *key, int key_len, const unsigned char *d, int n, unsigned char *md, unsigned int *md_len) { - HMAC(EVP_sha256(), key, key_len, d, n, md, md_len); -} - -#else -#error No HMAC() routine found by configure -#endif - -static void -urldecode_kput(const char *s, int len, kstring_t *str) -{ - char buf[3]; - int i = 0; - - while (i < len) - if (s[i] == '%' && i+2 < len) { - buf[0] = s[i+1], buf[1] = s[i+2], buf[2] = '\0'; - kputc(strtol(buf, NULL, 16), str); - i += 3; - } - else kputc(s[i++], str); -} - -static void base64_kput(const unsigned char *data, size_t len, kstring_t *str) -{ - static const char base64[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - - size_t i = 0; - unsigned x = 0; - int bits = 0, pad = 0; - - while (bits || i < len) { - if (bits < 6) { - x <<= 8, bits += 8; - if (i < len) x |= data[i++]; - else pad++; - } - - bits -= 6; - kputc(base64[(x >> bits) & 63], str); - } - - str->l -= pad; - kputsn("==", pad, str); -} - -static int is_dns_compliant(const char *s0, const char *slim, int is_https) -{ - int has_nondigit = 0, len = 0; - const char *s; - - for (s = s0; s < slim; len++, s++) - if (islower_c(*s)) - has_nondigit = 1; - else if (*s == '-') { - has_nondigit = 1; - if (s == s0 || s+1 == slim) return 0; - } - else if (isdigit_c(*s)) - ; - else if (*s == '.') { - if (is_https) return 0; - if (s == s0 || ! isalnum_c(s[-1])) return 0; - if (s+1 == slim || ! isalnum_c(s[1])) return 0; - } - else return 0; - - return has_nondigit && len >= 3 && len <= 63; -} - -static FILE *expand_tilde_open(const char *fname, const char *mode) -{ - FILE *fp; - - if (strncmp(fname, "~/", 2) == 0) { - kstring_t full_fname = { 0, 0, NULL }; - const char *home = getenv("HOME"); - if (! home) return NULL; - - kputs(home, &full_fname); - kputs(&fname[1], &full_fname); - - fp = fopen(full_fname.s, mode); - free(full_fname.s); - } - else - fp = fopen(fname, mode); - - return fp; -} - -static void parse_ini(const char *fname, const char *section, ...) -{ - kstring_t line = { 0, 0, NULL }; - int active = 1; // Start active, so global properties are accepted - char *s; - - FILE *fp = expand_tilde_open(fname, "r"); - if (fp == NULL) return; - - while (line.l = 0, kgetline(&line, (kgets_func *) fgets, fp) >= 0) - if (line.s[0] == '[' && (s = strchr(line.s, ']')) != NULL) { - *s = '\0'; - active = (strcmp(&line.s[1], section) == 0); - } - else if (active && (s = strpbrk(line.s, ":=")) != NULL) { - const char *key = line.s, *value = &s[1], *akey; - va_list args; - - while (isspace_c(*key)) key++; - while (s > key && isspace_c(s[-1])) s--; - *s = '\0'; - - while (isspace_c(*value)) value++; - while (line.l > 0 && isspace_c(line.s[line.l-1])) - line.s[--line.l] = '\0'; - - va_start(args, section); - while ((akey = va_arg(args, const char *)) != NULL) { - kstring_t *avar = va_arg(args, kstring_t *); - if (strcmp(key, akey) == 0) { - avar->l = 0; - kputs(value, avar); - break; } - } - va_end(args); - } - - fclose(fp); - free(line.s); -} - -static void parse_simple(const char *fname, kstring_t *id, kstring_t *secret) -{ - kstring_t text = { 0, 0, NULL }; - char *s; - size_t len; - - FILE *fp = expand_tilde_open(fname, "r"); - if (fp == NULL) return; - - while (kgetline(&text, (kgets_func *) fgets, fp) >= 0) - kputc(' ', &text); - fclose(fp); - - s = text.s; - while (isspace_c(*s)) s++; - kputsn(s, len = strcspn(s, " \t"), id); - - s += len; - while (isspace_c(*s)) s++; - kputsn(s, strcspn(s, " \t"), secret); - - free(text.s); -} - -static int copy_auth_headers(s3_auth_data *ad, char ***hdrs) { - char **hdr = &ad->headers[0]; - int idx = 0; - *hdrs = hdr; - - hdr[idx] = strdup(ad->date); - if (!hdr[idx]) return -1; - idx++; - - if (ad->token.l) { - kstring_t token_hdr = KS_INITIALIZE; - kputs("X-Amz-Security-Token: ", &token_hdr); - kputs(ad->token.s, &token_hdr); - if (token_hdr.s) { - hdr[idx++] = token_hdr.s; - } else { - goto fail; - } - } - - if (ad->auth_hdr.l) { - hdr[idx] = strdup(ad->auth_hdr.s); - if (!hdr[idx]) goto fail; - idx++; - } - - hdr[idx] = NULL; - return 0; - - fail: - for (--idx; idx >= 0; --idx) - free(hdr[idx]); - return -1; -} - -static void free_auth_data(s3_auth_data *ad) { - if (ad->refcount > 0) { - --ad->refcount; - return; - } - free(ad->profile.s); - free(ad->id.s); - free(ad->token.s); - free(ad->secret.s); - free(ad->region.s); - free(ad->canonical_query_string.s); - free(ad->user_query_string.s); - free(ad->host.s); - free(ad->bucket); - free(ad->auth_hdr.s); - free(ad->date_html.s); - free(ad); -} - -static time_t parse_rfc3339_date(kstring_t *datetime) -{ - int offset = 0; - time_t when; - int num; - char should_be_t = '\0', timezone[10] = { '\0' }; - unsigned int year, mon, day, hour, min, sec; - - if (!datetime->s) - return 0; - - // It should be possible to do this with strptime(), but it seems - // to not get on with our feature definitions. - num = sscanf(datetime->s, "%4u-%2u-%2u%c%2u:%2u:%2u%9s", - &year, &mon, &day, &should_be_t, &hour, &min, &sec, timezone); - if (num < 8) - return 0; - if (should_be_t != 'T' && should_be_t != 't' && should_be_t != ' ') - return 0; - struct tm parsed = { sec, min, hour, day, mon - 1, year - 1900, 0, 0, 0 }; - - switch (timezone[0]) { - case 'Z': - case 'z': - case '\0': - break; - case '+': - case '-': { - unsigned hr_off, min_off; - if (sscanf(timezone + 1, "%2u:%2u", &hr_off, &min_off)) { - if (hr_off < 24 && min_off <= 60) { - offset = ((hr_off * 60 + min_off) - * (timezone[0] == '+' ? -60 : 60)); - } - } - break; - } - default: - return 0; - } - - when = hts_time_gm(&parsed); - return when >= 0 ? when + offset : 0; -} - -static void refresh_auth_data(s3_auth_data *ad) { - // Basically a copy of the AWS_SHARED_CREDENTIALS_FILE part of - // setup_auth_data(), but this only reads the authorisation parts. - const char *v = getenv("AWS_SHARED_CREDENTIALS_FILE"); - kstring_t expiry_time = KS_INITIALIZE; - parse_ini(v? v : "~/.aws/credentials", ad->profile.s, - "aws_access_key_id", &ad->id, - "aws_secret_access_key", &ad->secret, - "aws_session_token", &ad->token, - "expiry_time", &expiry_time); - if (expiry_time.l) { - ad->creds_expiry_time = parse_rfc3339_date(&expiry_time); - } - ks_free(&expiry_time); -} - -static int auth_header_callback(void *ctx, char ***hdrs) { - s3_auth_data *ad = (s3_auth_data *) ctx; - - time_t now = time(NULL); -#ifdef HAVE_GMTIME_R - struct tm tm_buffer; - struct tm *tm = gmtime_r(&now, &tm_buffer); -#else - struct tm *tm = gmtime(&now); -#endif - kstring_t message = { 0, 0, NULL }; - unsigned char digest[DIGEST_BUFSIZ]; - size_t digest_len; - - if (!hdrs) { // Closing connection - free_auth_data(ad); - return 0; - } - - if (ad->creds_expiry_time > 0 - && ad->creds_expiry_time - now < CREDENTIAL_LIFETIME) { - refresh_auth_data(ad); - } else if (now - ad->auth_time < AUTH_LIFETIME) { - // Last auth string should still be valid - *hdrs = NULL; - return 0; - } - - strftime(ad->date, sizeof(ad->date), "Date: %a, %d %b %Y %H:%M:%S GMT", tm); - if (!ad->id.l || !ad->secret.l) { - ad->auth_time = now; - return copy_auth_headers(ad, hdrs); - } - - if (ksprintf(&message, "%s\n\n\n%s\n%s%s%s%s", - ad->mode == 'r' ? "GET" : "PUT", ad->date + 6, - ad->token.l ? "x-amz-security-token:" : "", - ad->token.l ? ad->token.s : "", - ad->token.l ? "\n" : "", - ad->bucket) < 0) { - return -1; - } - - digest_len = s3_sign(digest, &ad->secret, &message); - ad->auth_hdr.l = 0; - if (ksprintf(&ad->auth_hdr, "Authorization: AWS %s:", ad->id.s) < 0) - goto fail; - base64_kput(digest, digest_len, &ad->auth_hdr); - - free(message.s); - ad->auth_time = now; - return copy_auth_headers(ad, hdrs); - - fail: - free(message.s); - return -1; -} - - -/* like a escape path but for query strings '=' and '&' are untouched */ -static char *escape_query(const char *qs) { - size_t i, j = 0, length, alloced; - char *escaped; - - length = strlen(qs); - alloced = length * 3 + 1; - if ((escaped = malloc(alloced)) == NULL) { - return NULL; - } - - for (i = 0; i < length; i++) { - int c = qs[i]; - - if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || - c == '_' || c == '-' || c == '~' || c == '.' || c == '/' || c == '=' || c == '&') { - escaped[j++] = c; - } else { - snprintf(escaped + j, alloced - j, "%%%02X", c); - j += 3; - } - } - - escaped[j] = '\0'; - - return escaped; -} - - -static char *escape_path(const char *path) { - size_t i, j = 0, length, alloced; - char *escaped; - - length = strlen(path); - alloced = length * 3 + 1; - - if ((escaped = malloc(alloced)) == NULL) { - return NULL; - } - - for (i = 0; i < length; i++) { - int c = path[i]; - - if (c == '?') break; // don't escape ? or beyond - - if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || - c == '_' || c == '-' || c == '~' || c == '.' || c == '/') { - escaped[j++] = c; - } else { - snprintf(escaped + j, alloced - j, "%%%02X", c); - j += 3; - } - } - - if (i != length) { - // in the case of a '?' copy the rest of the path across unchanged - strcpy(escaped + j, path + i); - } else { - escaped[j] = '\0'; - } - - return escaped; -} - - -static int is_escaped(const char *str) { - const char *c = str; - int escaped = 0; - int needs_escape = 0; - - while (*c != '\0') { - if (*c == '%' && c[1] != '\0' && c[2] != '\0') { - if (isxdigit_c(c[1]) && isxdigit_c(c[2])) { - escaped = 1; - c += 3; - continue; - } else { - // only escaped if all % signs are escaped - escaped = 0; - } - } - if (!((*c >= '0' && *c <= '9') || (*c >= 'A' && *c <= 'Z') - || (*c >= 'a' && *c <= 'z') || - *c == '_' || *c == '-' || *c == '~' || *c == '.' || *c == '/')) { - needs_escape = 1; - } - c++; - } - - return escaped || !needs_escape; -} - -static int redirect_endpoint_callback(void *auth, long response, - kstring_t *header, kstring_t *url) { - s3_auth_data *ad = (s3_auth_data *)auth; - char *new_region; - char *end; - int ret = -1; - - // get the new region from the reply header - if ((new_region = strstr(header->s, "x-amz-bucket-region: "))) { - - new_region += strlen("x-amz-bucket-region: "); - end = new_region; - - while (isalnum_c(*end) || ispunct_c(*end)) end++; - - *end = 0; - - if (strstr(ad->host.s, "amazonaws.com")) { - ad->region.l = 0; - kputs(new_region, &ad->region); - - ad->host.l = 0; - ksprintf(&ad->host, "s3.%s.amazonaws.com", new_region); - - if (ad->region.l && ad->host.l) { - url->l = 0; - kputs(ad->host.s, url); - kputsn(ad->bucket, strlen(ad->bucket), url); - if (ad->user_query_string.l) { - kputc('?', url); - kputsn(ad->user_query_string.s, ad->user_query_string.l, url); - } - ret = 0; - } - } - } - - return ret; -} - -static s3_auth_data * setup_auth_data(const char *s3url, const char *mode, - int sigver, kstring_t *url) -{ - s3_auth_data *ad = calloc(1, sizeof(*ad)); - const char *bucket, *path; - char *escaped = NULL; - size_t url_path_pos; - ptrdiff_t bucket_len; - int is_https = 1, dns_compliant; - char *query_start; - enum {s3_auto, s3_virtual, s3_path} address_style = s3_auto; - - if (!ad) - return NULL; - ad->mode = strchr(mode, 'r') ? 'r' : 'w'; - - // Our S3 URL format is s3[+SCHEME]://[ID[:SECRET[:TOKEN]]@]BUCKET/PATH - - if (s3url[2] == '+') { - bucket = strchr(s3url, ':') + 1; - if (bucket == NULL) { - free(ad); - return NULL; - } - kputsn(&s3url[3], bucket - &s3url[3], url); - is_https = strncmp(url->s, "https:", 6) == 0; - } - else { - kputs("https:", url); - bucket = &s3url[3]; - } - while (*bucket == '/') kputc(*bucket++, url); - - path = bucket + strcspn(bucket, "/?#@"); - - if (*path == '@') { - const char *colon = strpbrk(bucket, ":@"); - if (*colon != ':') { - urldecode_kput(bucket, colon - bucket, &ad->profile); - } - else { - const char *colon2 = strpbrk(&colon[1], ":@"); - urldecode_kput(bucket, colon - bucket, &ad->id); - urldecode_kput(&colon[1], colon2 - &colon[1], &ad->secret); - if (*colon2 == ':') - urldecode_kput(&colon2[1], path - &colon2[1], &ad->token); - } - - bucket = &path[1]; - path = bucket + strcspn(bucket, "/?#"); - } - else { - // If the URL has no ID[:SECRET]@, consider environment variables. - const char *v; - if ((v = getenv("AWS_ACCESS_KEY_ID")) != NULL) kputs(v, &ad->id); - if ((v = getenv("AWS_SECRET_ACCESS_KEY")) != NULL) kputs(v, &ad->secret); - if ((v = getenv("AWS_SESSION_TOKEN")) != NULL) kputs(v, &ad->token); - if ((v = getenv("AWS_DEFAULT_REGION")) != NULL) kputs(v, &ad->region); - if ((v = getenv("HTS_S3_HOST")) != NULL) kputs(v, &ad->host); - - if ((v = getenv("AWS_DEFAULT_PROFILE")) != NULL) kputs(v, &ad->profile); - else if ((v = getenv("AWS_PROFILE")) != NULL) kputs(v, &ad->profile); - else kputs("default", &ad->profile); - - if ((v = getenv("HTS_S3_ADDRESS_STYLE")) != NULL) { - if (strcasecmp(v, "virtual") == 0) { - address_style = s3_virtual; - } else if (strcasecmp(v, "path") == 0) { - address_style = s3_path; - } - } - } - - if (ad->id.l == 0) { - kstring_t url_style = KS_INITIALIZE; - kstring_t expiry_time = KS_INITIALIZE; - const char *v = getenv("AWS_SHARED_CREDENTIALS_FILE"); - parse_ini(v? v : "~/.aws/credentials", ad->profile.s, - "aws_access_key_id", &ad->id, - "aws_secret_access_key", &ad->secret, - "aws_session_token", &ad->token, - "region", &ad->region, - "addressing_style", &url_style, - "expiry_time", &expiry_time, - NULL); - - if (url_style.l) { - if (strcmp(url_style.s, "virtual") == 0) { - address_style = s3_virtual; - } else if (strcmp(url_style.s, "path") == 0) { - address_style = s3_path; - } else { - address_style = s3_auto; - } - } - if (expiry_time.l) { - // Not a real part of the AWS configuration file, but it allows - // support for short-term credentials like those for the IAM - // service. The botocore library uses the key "expiry_time" - // internally for this purpose. - // See https://github.com/boto/botocore/blob/develop/botocore/credentials.py - ad->creds_expiry_time = parse_rfc3339_date(&expiry_time); - } - - ks_free(&url_style); - ks_free(&expiry_time); - } - - if (ad->id.l == 0) { - kstring_t url_style = KS_INITIALIZE; - const char *v = getenv("HTS_S3_S3CFG"); - parse_ini(v? v : "~/.s3cfg", ad->profile.s, "access_key", &ad->id, - "secret_key", &ad->secret, "access_token", &ad->token, - "host_base", &ad->host, - "bucket_location", &ad->region, - "host_bucket", &url_style, - NULL); - - if (url_style.l) { - // Conforming to s3cmd's GitHub PR#416, host_bucket without the "%(bucket)s" string - // indicates use of path style adressing. - if (strstr(url_style.s, "%(bucket)s") == NULL) { - address_style = s3_path; - } else { - address_style = s3_auto; - } - } - - ks_free(&url_style); - } - - if (ad->id.l == 0) - parse_simple("~/.awssecret", &ad->id, &ad->secret); - - - // if address_style is set, force the dns_compliant setting - if (address_style == s3_virtual) { - dns_compliant = 1; - } else if (address_style == s3_path) { - dns_compliant = 0; - } else { - dns_compliant = is_dns_compliant(bucket, path, is_https); - } - - if (ad->host.l == 0) - kputs("s3.amazonaws.com", &ad->host); - - if (!dns_compliant && ad->region.l > 0 - && strcmp(ad->host.s, "s3.amazonaws.com") == 0) { - // Can avoid a redirection by including the region in the host name - // (assuming the right one has been specified) - ad->host.l = 0; - ksprintf(&ad->host, "s3.%s.amazonaws.com", ad->region.s); - } - - if (ad->region.l == 0) - kputs("us-east-1", &ad->region); - - if (!is_escaped(path)) { - escaped = escape_path(path); - if (escaped == NULL) { - goto error; - } - } - - bucket_len = path - bucket; - - // Use virtual hosted-style access if possible, otherwise path-style. - if (dns_compliant) { - size_t url_host_pos = url->l; - // Append "bucket.host" to url - kputsn_(bucket, bucket_len, url); - kputc('.', url); - kputsn(ad->host.s, ad->host.l, url); - url_path_pos = url->l; - - if (sigver == 4) { - // Copy back to ad->host to use when making the signature - ad->host.l = 0; - kputsn(url->s + url_host_pos, url->l - url_host_pos, &ad->host); - } - } - else { - // Append "host/bucket" to url - kputsn(ad->host.s, ad->host.l, url); - url_path_pos = url->l; - kputc('/', url); - kputsn(bucket, bucket_len, url); - } - - kputs(escaped == NULL ? path : escaped, url); - - if (sigver == 4 || !dns_compliant) { - ad->bucket = malloc(url->l - url_path_pos + 1); - if (ad->bucket == NULL) { - goto error; - } - memcpy(ad->bucket, url->s + url_path_pos, url->l - url_path_pos + 1); - } - else { - ad->bucket = malloc(url->l - url_path_pos + bucket_len + 2); - if (ad->bucket == NULL) { - goto error; - } - ad->bucket[0] = '/'; - memcpy(ad->bucket + 1, bucket, bucket_len); - memcpy(ad->bucket + bucket_len + 1, - url->s + url_path_pos, url->l - url_path_pos + 1); - } - - // write any query strings to its own place to use later - if ((query_start = strchr(ad->bucket, '?'))) { - kputs(query_start + 1, &ad->user_query_string); - *query_start = 0; - } - - free(escaped); - - return ad; - - error: - free(escaped); - free_auth_data(ad); - return NULL; -} - -static hFILE * s3_rewrite(const char *s3url, const char *mode, va_list *argsp) -{ - kstring_t url = { 0, 0, NULL }; - s3_auth_data *ad = setup_auth_data(s3url, mode, 2, &url); - - if (!ad) - return NULL; - - hFILE *fp = hopen(url.s, mode, "va_list", argsp, - "httphdr_callback", auth_header_callback, - "httphdr_callback_data", ad, - "redirect_callback", redirect_endpoint_callback, - "redirect_callback_data", ad, - NULL); - if (!fp) goto fail; - - free(url.s); - return fp; - - fail: - free(url.s); - free_auth_data(ad); - return NULL; -} - -/*************************************************************** - -AWS S3 sig version 4 writing code - -****************************************************************/ - -static void hash_string(char *in, size_t length, char *out, size_t out_len) { - unsigned char hashed[SHA256_DIGEST_BUFSIZE]; - int i, j; - - s3_sha256((const unsigned char *)in, length, hashed); - - for (i = 0, j = 0; i < SHA256_DIGEST_BUFSIZE; i++, j+= 2) { - snprintf(out + j, out_len - j, "%02x", hashed[i]); - } -} - -static void ksinit(kstring_t *s) { - s->l = 0; - s->m = 0; - s->s = NULL; -} - - -static void ksfree(kstring_t *s) { - free(s->s); - ksinit(s); -} - - -static int make_signature(s3_auth_data *ad, kstring_t *string_to_sign, char *signature_string, size_t sig_string_len) { - unsigned char date_key[SHA256_DIGEST_BUFSIZE]; - unsigned char date_region_key[SHA256_DIGEST_BUFSIZE]; - unsigned char date_region_service_key[SHA256_DIGEST_BUFSIZE]; - unsigned char signing_key[SHA256_DIGEST_BUFSIZE]; - unsigned char signature[SHA256_DIGEST_BUFSIZE]; - - const unsigned char service[] = "s3"; - const unsigned char request[] = "aws4_request"; - - kstring_t secret_access_key = {0, 0, NULL}; - unsigned int len; - unsigned int i, j; - - ksprintf(&secret_access_key, "AWS4%s", ad->secret.s); - - if (secret_access_key.l == 0) { - return -1; - } - - s3_sign_sha256(secret_access_key.s, secret_access_key.l, (const unsigned char *)ad->date_short, strlen(ad->date_short), date_key, &len); - s3_sign_sha256(date_key, len, (const unsigned char *)ad->region.s, ad->region.l, date_region_key, &len); - s3_sign_sha256(date_region_key, len, service, 2, date_region_service_key, &len); - s3_sign_sha256(date_region_service_key, len, request, 12, signing_key, &len); - s3_sign_sha256(signing_key, len, (const unsigned char *)string_to_sign->s, string_to_sign->l, signature, &len); - - for (i = 0, j = 0; i < len; i++, j+= 2) { - snprintf(signature_string + j, sig_string_len - j, "%02x", signature[i]); - } - - ksfree(&secret_access_key); - - return 0; -} - - -static int make_authorisation(s3_auth_data *ad, char *http_request, char *content, kstring_t *auth) { - kstring_t signed_headers = {0, 0, NULL}; - kstring_t canonical_headers = {0, 0, NULL}; - kstring_t canonical_request = {0, 0, NULL}; - kstring_t scope = {0, 0, NULL}; - kstring_t string_to_sign = {0, 0, NULL}; - char cr_hash[HASH_LENGTH_SHA256]; - char signature_string[HASH_LENGTH_SHA256]; - int ret = -1; - - - if (!ad->token.l) { - kputs("host;x-amz-content-sha256;x-amz-date", &signed_headers); - } else { - kputs("host;x-amz-content-sha256;x-amz-date;x-amz-security-token", &signed_headers); - } - - if (signed_headers.l == 0) { - return -1; - } - - - if (!ad->token.l) { - ksprintf(&canonical_headers, "host:%s\nx-amz-content-sha256:%s\nx-amz-date:%s\n", - ad->host.s, content, ad->date_long); - } else { - ksprintf(&canonical_headers, "host:%s\nx-amz-content-sha256:%s\nx-amz-date:%s\nx-amz-security-token:%s\n", - ad->host.s, content, ad->date_long, ad->token.s); - } - - if (canonical_headers.l == 0) { - goto cleanup; - } - - // bucket == canonical_uri - ksprintf(&canonical_request, "%s\n%s\n%s\n%s\n%s\n%s", - http_request, ad->bucket, ad->canonical_query_string.s, - canonical_headers.s, signed_headers.s, content); - - if (canonical_request.l == 0) { - goto cleanup; - } - - hash_string(canonical_request.s, canonical_request.l, cr_hash, sizeof(cr_hash)); - - ksprintf(&scope, "%s/%s/s3/aws4_request", ad->date_short, ad->region.s); - - if (scope.l == 0) { - goto cleanup; - } - - ksprintf(&string_to_sign, "AWS4-HMAC-SHA256\n%s\n%s\n%s", ad->date_long, scope.s, cr_hash); - - if (string_to_sign.l == 0) { - goto cleanup; - } - - if (make_signature(ad, &string_to_sign, signature_string, sizeof(signature_string))) { - goto cleanup; - } - - ksprintf(auth, "Authorization: AWS4-HMAC-SHA256 Credential=%s/%s/%s/s3/aws4_request,SignedHeaders=%s,Signature=%s", - ad->id.s, ad->date_short, ad->region.s, signed_headers.s, signature_string); - - if (auth->l == 0) { - goto cleanup; - } - - ret = 0; - - cleanup: - ksfree(&signed_headers); - ksfree(&canonical_headers); - ksfree(&canonical_request); - ksfree(&scope); - ksfree(&string_to_sign); - - return ret; -} - - -static int update_time(s3_auth_data *ad, time_t now) { - int ret = -1; -#ifdef HAVE_GMTIME_R - struct tm tm_buffer; - struct tm *tm = gmtime_r(&now, &tm_buffer); -#else - struct tm *tm = gmtime(&now); -#endif - - if (now - ad->auth_time > AUTH_LIFETIME) { - // update timestamp - ad->auth_time = now; - - if (strftime(ad->date_long, 17, "%Y%m%dT%H%M%SZ", tm) != 16) { - return -1; - } - - if (strftime(ad->date_short, 9, "%Y%m%d", tm) != 8) { - return -1;; - } - - ad->date_html.l = 0; - ksprintf(&ad->date_html, "x-amz-date: %s", ad->date_long); - } - - if (ad->date_html.l) ret = 0; - - return ret; -} - - -static int query_cmp(const void *p1, const void *p2) { - char **q1 = (char **)p1; - char **q2 = (char **)p2; - - return strcmp(*q1, *q2); -} - - -/* Query strings must be in alphabetical order for authorisation */ - -static int order_query_string(kstring_t *qs) { - int *query_offset = NULL; - int num_queries, i; - char **queries = NULL; - kstring_t ordered = {0, 0, NULL}; - char *escaped = NULL; - int ret = -1; - - if ((query_offset = ksplit(qs, '&', &num_queries)) == NULL) { - return -1; - } - - if ((queries = malloc(num_queries * sizeof(char*))) == NULL) - goto err; - - for (i = 0; i < num_queries; i++) { - queries[i] = qs->s + query_offset[i]; - } - - qsort(queries, num_queries, sizeof(char *), query_cmp); - - for (i = 0; i < num_queries; i++) { - if (i) { - kputs("&", &ordered); - } - - kputs(queries[i], &ordered); - } - - if ((escaped = escape_query(ordered.s)) == NULL) - goto err; - - qs->l = 0; - kputs(escaped, qs); - - ret = 0; - err: - free(ordered.s); - free(queries); - free(query_offset); - free(escaped); - - return ret; -} - - -static int write_authorisation_callback(void *auth, char *request, kstring_t *content, char *cqs, - kstring_t *hash, kstring_t *auth_str, kstring_t *date, - kstring_t *token, int uqs) { - s3_auth_data *ad = (s3_auth_data *)auth; - char content_hash[HASH_LENGTH_SHA256]; - time_t now; - - if (request == NULL) { - // signal to free auth data - free_auth_data(ad); - return 0; - } - - now = time(NULL); - - if (update_time(ad, now)) { - return -1; - } - if (ad->creds_expiry_time > 0 - && ad->creds_expiry_time - now < CREDENTIAL_LIFETIME) { - refresh_auth_data(ad); - } - - if (content) { - hash_string(content->s, content->l, content_hash, sizeof(content_hash)); - } else { - // empty hash - hash_string("", 0, content_hash, sizeof(content_hash)); - } - - ad->canonical_query_string.l = 0; - kputs(cqs, &ad->canonical_query_string); - - if (ad->canonical_query_string.l == 0) { - return -1; - } - - /* add a user provided query string, normally only useful on upload initiation */ - if (uqs) { - kputs("&", &ad->canonical_query_string); - kputs(ad->user_query_string.s, &ad->canonical_query_string); - - if (order_query_string(&ad->canonical_query_string)) { - return -1; - } - } - - if (make_authorisation(ad, request, content_hash, auth_str)) { - return -1; - } - - kputs(ad->date_html.s, date); - kputsn(content_hash, HASH_LENGTH_SHA256, hash); - - if (date->l == 0 || hash->l == 0) { - return -1; - } - - if (ad->token.l) { - ksprintf(token, "x-amz-security-token: %s", ad->token.s); - } - - return 0; -} - - -static int v4_auth_header_callback(void *ctx, char ***hdrs) { - s3_auth_data *ad = (s3_auth_data *) ctx; - char content_hash[HASH_LENGTH_SHA256]; - kstring_t content = KS_INITIALIZE; - kstring_t authorisation = KS_INITIALIZE; - kstring_t token_hdr = KS_INITIALIZE; - char *date_html = NULL; - time_t now; - int idx; - - if (!hdrs) { // Closing connection - free_auth_data(ad); - return 0; - } - - now = time(NULL); - - if (update_time(ad, now)) { - return -1; - } - - if (ad->creds_expiry_time > 0 - && ad->creds_expiry_time - now < CREDENTIAL_LIFETIME) { - refresh_auth_data(ad); - } - - if (!ad->id.l || !ad->secret.l) { - return copy_auth_headers(ad, hdrs); - } - - hash_string("", 0, content_hash, sizeof(content_hash)); // empty hash - - ad->canonical_query_string.l = 0; - - if (ad->user_query_string.l > 0) { - kputs(ad->user_query_string.s, &ad->canonical_query_string); - - if (order_query_string(&ad->canonical_query_string)) { - return -1; - } - } else { - kputs("", &ad->canonical_query_string); - } - - if (make_authorisation(ad, "GET", content_hash, &authorisation)) { - return -1; - } - - ksprintf(&content, "x-amz-content-sha256: %s", content_hash); - date_html = strdup(ad->date_html.s); - - if (ad->token.l > 0) { - kputs("X-Amz-Security-Token: ", &token_hdr); - kputs(ad->token.s, &token_hdr); - } - - if (content.l == 0 || date_html == NULL) { - ksfree(&authorisation); - ksfree(&content); - ksfree(&token_hdr); - free(date_html); - return -1; - } - - *hdrs = &ad->headers[0]; - idx = 0; - ad->headers[idx++] = ks_release(&authorisation); - ad->headers[idx++] = date_html; - ad->headers[idx++] = ks_release(&content); - if (token_hdr.s) - ad->headers[idx++] = ks_release(&token_hdr); - ad->headers[idx++] = NULL; - - return 0; -} - -static int handle_400_response(hFILE *fp, s3_auth_data *ad) { - // v4 signatures in virtual hosted mode return 400 Bad Request if the - // wrong region is used to make the signature. The response is an xml - // document which includes the name of the correct region. This can - // be extracted and used to generate a corrected signature. - // As the xml is fairly simple, go with something "good enough" instead - // of trying to parse it properly. - - char buffer[1024], *region, *reg_end; - ssize_t bytes; - - bytes = hread(fp, buffer, sizeof(buffer) - 1); - if (bytes < 0) { - return -1; - } - buffer[bytes] = '\0'; - region = strstr(buffer, ""); - if (region == NULL) { - return -1; - } - region += 8; - while (isspace((unsigned char) *region)) ++region; - reg_end = strchr(region, '<'); - if (reg_end == NULL || strncmp(reg_end + 1, "/Region>", 8) != 0) { - return -1; - } - while (reg_end > region && isspace((unsigned char) reg_end[-1])) --reg_end; - ad->region.l = 0; - kputsn(region, reg_end - region, &ad->region); - if (ad->region.l == 0) { - return -1; - } - - return 0; -} - -static int set_region(void *adv, kstring_t *region) { - s3_auth_data *ad = (s3_auth_data *) adv; - - ad->region.l = 0; - return kputsn(region->s, region->l, &ad->region) < 0; -} - -static int http_status_errno(int status) -{ - if (status >= 500) - switch (status) { - case 501: return ENOSYS; - case 503: return EBUSY; - case 504: return ETIMEDOUT; - default: return EIO; - } - else if (status >= 400) - switch (status) { - case 401: return EPERM; - case 403: return EACCES; - case 404: return ENOENT; - case 405: return EROFS; - case 407: return EPERM; - case 408: return ETIMEDOUT; - case 410: return ENOENT; - default: return EINVAL; - } - else return 0; -} - -static hFILE *s3_open_v4(const char *s3url, const char *mode, va_list *argsp) { - kstring_t url = { 0, 0, NULL }; - - s3_auth_data *ad = setup_auth_data(s3url, mode, 4, &url); - hFILE *fp = NULL; - - if (ad == NULL) { - return NULL; - } - - if (ad->mode == 'r') { - long http_response = 0; - - fp = hopen(url.s, mode, "va_list", argsp, - "httphdr_callback", v4_auth_header_callback, - "httphdr_callback_data", ad, - "redirect_callback", redirect_endpoint_callback, - "redirect_callback_data", ad, - "http_response_ptr", &http_response, - "fail_on_error", 0, - NULL); - - if (fp == NULL) goto error; - - if (http_response == 400) { - ad->refcount = 1; - if (handle_400_response(fp, ad) != 0) { - goto error; - } - hclose_abruptly(fp); - fp = hopen(url.s, mode, "va_list", argsp, - "httphdr_callback", v4_auth_header_callback, - "httphdr_callback_data", ad, - "redirect_callback", redirect_endpoint_callback, - "redirect_callback_data", ad, - NULL); - } else if (http_response > 400) { - ad->refcount = 1; - errno = http_status_errno(http_response); - goto error; - } - - if (fp == NULL) goto error; - } else { - kstring_t final_url = {0, 0, NULL}; - - // add the scheme marker - ksprintf(&final_url, "s3w+%s", url.s); - - if(final_url.l == 0) goto error; - - fp = hopen(final_url.s, mode, "va_list", argsp, - "s3_auth_callback", write_authorisation_callback, - "s3_auth_callback_data", ad, - "redirect_callback", redirect_endpoint_callback, - "set_region_callback", set_region, - NULL); - free(final_url.s); - - if (fp == NULL) goto error; - } - - free(url.s); - - return fp; - - error: - - if (fp) hclose_abruptly(fp); - free(url.s); - free_auth_data(ad); - - return NULL; -} - - -static hFILE *s3_open(const char *url, const char *mode) -{ - hFILE *fp; - - kstring_t mode_colon = { 0, 0, NULL }; - kputs(mode, &mode_colon); - kputc(':', &mode_colon); - - if (getenv("HTS_S3_V2") == NULL) { // Force the v2 signature code - fp = s3_open_v4(url, mode_colon.s, NULL); - } else { - fp = s3_rewrite(url, mode_colon.s, NULL); - } - - free(mode_colon.s); - - return fp; -} - -static hFILE *s3_vopen(const char *url, const char *mode_colon, va_list args0) -{ - hFILE *fp; - // Need to use va_copy() as we can only take the address of an actual - // va_list object, not that of a parameter whose type may have decayed. - va_list args; - va_copy(args, args0); - - if (getenv("HTS_S3_V2") == NULL) { // Force the v2 signature code - fp = s3_open_v4(url, mode_colon, &args); - } else { - fp = s3_rewrite(url, mode_colon, &args); - } - - va_end(args); - return fp; -} - -int PLUGIN_GLOBAL(hfile_plugin_init,_s3)(struct hFILE_plugin *self) -{ - static const struct hFILE_scheme_handler handler = - { s3_open, hfile_always_remote, "Amazon S3", 2000 + 50, s3_vopen - }; - -#ifdef ENABLE_PLUGINS - // Embed version string for examination via strings(1) or what(1) - static const char id[] = "@(#)hfile_s3 plugin (htslib)\t" HTS_VERSION_TEXT; - if (hts_verbose >= 9) - fprintf(stderr, "[M::hfile_s3.init] version %s\n", strchr(id, '\t')+1); -#endif - - self->name = "Amazon S3"; - hfile_add_scheme_handler("s3", &handler); - hfile_add_scheme_handler("s3+http", &handler); - hfile_add_scheme_handler("s3+https", &handler); - return 0; -} diff --git a/src/htslib-1.19.1/hfile_s3_write.c b/src/htslib-1.19.1/hfile_s3_write.c deleted file mode 100644 index d549458..0000000 --- a/src/htslib-1.19.1/hfile_s3_write.c +++ /dev/null @@ -1,896 +0,0 @@ -/* - hfile_s3_write.c - Code to handle multipart uploading to S3. - - Copyright (C) 2019 Genome Research Ltd. - - Author: Andrew Whitwham - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - - -S3 Multipart Upload -------------------- - -There are several steps in the Mulitipart upload. - - -1) Initiate Upload ------------------- - -Initiate the upload and get an upload ID. This ID is used in all other steps. - - -2) Upload Part --------------- - -Upload a part of the data. 5Mb minimum part size (except for the last part). -Each part is numbered and a successful upload returns an Etag header value that -needs to used for the completion step. - -Step repeated till all data is uploaded. - - -3) Completion -------------- - -Complete the upload by sending all the part numbers along with their associated -Etag values. - - -Optional - Abort ----------------- - -If something goes wrong this instructs the server to delete all the partial -uploads and abandon the upload process. - - -Andrew Whitwham, January 2019 -*/ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#ifdef __MSYS__ -#include -#endif -#include -#include - -#include "hfile_internal.h" -#ifdef ENABLE_PLUGINS -#include "version.h" -#endif -#include "htslib/hts.h" -#include "htslib/kstring.h" -#include "htslib/khash.h" - -#include - -#define MINIMUM_S3_WRITE_SIZE 5242880 -#define S3_MOVED_PERMANENTLY 301 -#define S3_BAD_REQUEST 400 - -// Lets the part memory size grow to about 1Gb giving a 2.5Tb max file size. -// Max. parts allowed by AWS is 10000, so use ceil(10000.0/9.0) -#define EXPAND_ON 1112 - -static struct { - kstring_t useragent; - CURLSH *share; - pthread_mutex_t share_lock; -} curl = { { 0, 0, NULL }, NULL, PTHREAD_MUTEX_INITIALIZER }; - -static void share_lock(CURL *handle, curl_lock_data data, - curl_lock_access access, void *userptr) { - pthread_mutex_lock(&curl.share_lock); -} - -static void share_unlock(CURL *handle, curl_lock_data data, void *userptr) { - pthread_mutex_unlock(&curl.share_lock); -} - -typedef int (*s3_auth_callback) (void *auth_data, char *, kstring_t*, char*, kstring_t*, kstring_t*, kstring_t*, kstring_t*, int); - -typedef int (*set_region_callback) (void *auth_data, kstring_t *region); - -typedef struct { - s3_auth_callback callback; - redirect_callback redirect_callback; - set_region_callback set_region_callback; - void *callback_data; -} s3_authorisation; - -typedef struct { - hFILE base; - CURL *curl; - CURLcode ret; - s3_authorisation *au; - kstring_t buffer; - kstring_t url; - kstring_t upload_id; - kstring_t completion_message; - int part_no; - int aborted; - size_t index; - long verbose; - int part_size; - int expand; -} hFILE_s3_write; - - -static void ksinit(kstring_t *s) { - s->l = 0; - s->m = 0; - s->s = NULL; -} - - -static void ksfree(kstring_t *s) { - free(s->s); - ksinit(s); -} - - -static size_t response_callback(void *contents, size_t size, size_t nmemb, void *userp) { - size_t realsize = size * nmemb; - kstring_t *resp = (kstring_t *)userp; - - if (kputsn((const char *)contents, realsize, resp) == EOF) { - return 0; - } - - return realsize; -} - - -static int get_entry(char *in, char *start_tag, char *end_tag, kstring_t *out) { - char *start; - char *end; - - if (!in) { - return EOF; - } - - start = strstr(in, start_tag); - if (!start) return EOF; - - start += strlen(start_tag); - end = strstr(start, end_tag); - - if (!end) return EOF; - - return kputsn(start, end - start, out); -} - - -static void cleanup_local(hFILE_s3_write *fp) { - ksfree(&fp->buffer); - ksfree(&fp->url); - ksfree(&fp->upload_id); - ksfree(&fp->completion_message); - curl_easy_cleanup(fp->curl); - free(fp->au); - -} - - -static void cleanup(hFILE_s3_write *fp) { - // free up authorisation data - fp->au->callback(fp->au->callback_data, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0); - cleanup_local(fp); -} - - -static struct curl_slist *set_html_headers(hFILE_s3_write *fp, kstring_t *auth, kstring_t *date, kstring_t *content, kstring_t *token) { - struct curl_slist *headers = NULL; - - headers = curl_slist_append(headers, "Content-Type:"); // get rid of this - headers = curl_slist_append(headers, "Expect:"); // and this - headers = curl_slist_append(headers, auth->s); - headers = curl_slist_append(headers, date->s); - headers = curl_slist_append(headers, content->s); - - if (token->l) { - headers = curl_slist_append(headers, token->s); - } - - curl_easy_setopt(fp->curl, CURLOPT_HTTPHEADER, headers); - - return headers; -} - - -/* - The partially uploaded file will hang around unless the delete command is sent. -*/ -static int abort_upload(hFILE_s3_write *fp) { - kstring_t content_hash = {0, 0, NULL}; - kstring_t authorisation = {0, 0, NULL}; - kstring_t url = {0, 0, NULL}; - kstring_t content = {0, 0, NULL}; - kstring_t canonical_query_string = {0, 0, NULL}; - kstring_t date = {0, 0, NULL}; - kstring_t token = {0, 0, NULL}; - int ret = -1; - struct curl_slist *headers = NULL; - char http_request[] = "DELETE"; - - if (ksprintf(&canonical_query_string, "uploadId=%s", fp->upload_id.s) < 0) { - goto out; - } - - if (fp->au->callback(fp->au->callback_data, http_request, NULL, - canonical_query_string.s, &content_hash, - &authorisation, &date, &token, 0) != 0) { - goto out; - } - - if (ksprintf(&url, "%s?%s", fp->url.s, canonical_query_string.s) < 0) { - goto out; - } - - if (ksprintf(&content, "x-amz-content-sha256: %s", content_hash.s) < 0) { - goto out; - } - - curl_easy_reset(fp->curl); - curl_easy_setopt(fp->curl, CURLOPT_CUSTOMREQUEST, http_request); - curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s); - curl_easy_setopt(fp->curl, CURLOPT_URL, url.s); - - curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose); - - headers = set_html_headers(fp, &authorisation, &date, &content, &token); - fp->ret = curl_easy_perform(fp->curl); - - if (fp->ret == CURLE_OK) { - ret = 0; - } - - out: - ksfree(&authorisation); - ksfree(&content); - ksfree(&content_hash); - ksfree(&url); - ksfree(&date); - ksfree(&canonical_query_string); - ksfree(&token); - curl_slist_free_all(headers); - - fp->aborted = 1; - cleanup(fp); - - return ret; -} - - -static int complete_upload(hFILE_s3_write *fp, kstring_t *resp) { - kstring_t content_hash = {0, 0, NULL}; - kstring_t authorisation = {0, 0, NULL}; - kstring_t url = {0, 0, NULL}; - kstring_t content = {0, 0, NULL}; - kstring_t canonical_query_string = {0, 0, NULL}; - kstring_t date = {0, 0, NULL}; - kstring_t token = {0, 0, NULL}; - int ret = -1; - struct curl_slist *headers = NULL; - char http_request[] = "POST"; - - if (ksprintf(&canonical_query_string, "uploadId=%s", fp->upload_id.s) < 0) { - return -1; - } - - // finish off the completion reply - if (kputs("\n", &fp->completion_message) < 0) { - goto out; - } - - if (fp->au->callback(fp->au->callback_data, http_request, - &fp->completion_message, canonical_query_string.s, - &content_hash, &authorisation, &date, &token, 0) != 0) { - goto out; - } - - if (ksprintf(&url, "%s?%s", fp->url.s, canonical_query_string.s) < 0) { - goto out; - } - - if (ksprintf(&content, "x-amz-content-sha256: %s", content_hash.s) < 0) { - goto out; - } - - curl_easy_reset(fp->curl); - curl_easy_setopt(fp->curl, CURLOPT_POST, 1L); - curl_easy_setopt(fp->curl, CURLOPT_POSTFIELDS, fp->completion_message.s); - curl_easy_setopt(fp->curl, CURLOPT_POSTFIELDSIZE, (long) fp->completion_message.l); - curl_easy_setopt(fp->curl, CURLOPT_WRITEFUNCTION, response_callback); - curl_easy_setopt(fp->curl, CURLOPT_WRITEDATA, (void *)resp); - curl_easy_setopt(fp->curl, CURLOPT_URL, url.s); - curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s); - - curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose); - - headers = set_html_headers(fp, &authorisation, &date, &content, &token); - fp->ret = curl_easy_perform(fp->curl); - - if (fp->ret == CURLE_OK) { - ret = 0; - } - - out: - ksfree(&authorisation); - ksfree(&content); - ksfree(&content_hash); - ksfree(&url); - ksfree(&date); - ksfree(&token); - ksfree(&canonical_query_string); - curl_slist_free_all(headers); - - return ret; -} - - -static size_t upload_callback(void *ptr, size_t size, size_t nmemb, void *stream) { - size_t realsize = size * nmemb; - hFILE_s3_write *fp = (hFILE_s3_write *)stream; - size_t read_length; - - if (realsize > (fp->buffer.l - fp->index)) { - read_length = fp->buffer.l - fp->index; - } else { - read_length = realsize; - } - - memcpy(ptr, fp->buffer.s + fp->index, read_length); - fp->index += read_length; - - return read_length; -} - - -static int upload_part(hFILE_s3_write *fp, kstring_t *resp) { - kstring_t content_hash = {0, 0, NULL}; - kstring_t authorisation = {0, 0, NULL}; - kstring_t url = {0, 0, NULL}; - kstring_t content = {0, 0, NULL}; - kstring_t canonical_query_string = {0, 0, NULL}; - kstring_t date = {0, 0, NULL}; - kstring_t token = {0, 0, NULL}; - int ret = -1; - struct curl_slist *headers = NULL; - char http_request[] = "PUT"; - - if (ksprintf(&canonical_query_string, "partNumber=%d&uploadId=%s", fp->part_no, fp->upload_id.s) < 0) { - return -1; - } - - if (fp->au->callback(fp->au->callback_data, http_request, &fp->buffer, - canonical_query_string.s, &content_hash, - &authorisation, &date, &token, 0) != 0) { - goto out; - } - - if (ksprintf(&url, "%s?%s", fp->url.s, canonical_query_string.s) < 0) { - goto out; - } - - fp->index = 0; - if (ksprintf(&content, "x-amz-content-sha256: %s", content_hash.s) < 0) { - goto out; - } - - curl_easy_reset(fp->curl); - - curl_easy_setopt(fp->curl, CURLOPT_UPLOAD, 1L); - curl_easy_setopt(fp->curl, CURLOPT_READFUNCTION, upload_callback); - curl_easy_setopt(fp->curl, CURLOPT_READDATA, fp); - curl_easy_setopt(fp->curl, CURLOPT_INFILESIZE_LARGE, (curl_off_t)fp->buffer.l); - curl_easy_setopt(fp->curl, CURLOPT_HEADERFUNCTION, response_callback); - curl_easy_setopt(fp->curl, CURLOPT_HEADERDATA, (void *)resp); - curl_easy_setopt(fp->curl, CURLOPT_URL, url.s); - curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s); - - curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose); - - headers = set_html_headers(fp, &authorisation, &date, &content, &token); - fp->ret = curl_easy_perform(fp->curl); - - if (fp->ret == CURLE_OK) { - ret = 0; - } - - out: - ksfree(&authorisation); - ksfree(&content); - ksfree(&content_hash); - ksfree(&url); - ksfree(&date); - ksfree(&token); - ksfree(&canonical_query_string); - curl_slist_free_all(headers); - - return ret; -} - - -static ssize_t s3_write(hFILE *fpv, const void *bufferv, size_t nbytes) { - hFILE_s3_write *fp = (hFILE_s3_write *)fpv; - const char *buffer = (const char *)bufferv; - - if (kputsn(buffer, nbytes, &fp->buffer) == EOF) { - return -1; - } - - if (fp->buffer.l > fp->part_size) { - // time to write out our data - kstring_t response = {0, 0, NULL}; - int ret; - - ret = upload_part(fp, &response); - - if (!ret) { - long response_code; - kstring_t etag = {0, 0, NULL}; - - curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code); - - if (response_code > 200) { - ret = -1; - } else { - if (get_entry(response.s, "ETag: \"", "\"", &etag) == EOF) { - ret = -1; - } else { - ksprintf(&fp->completion_message, "\t\n\t\t%d\n\t\t%s\n\t\n", - fp->part_no, etag.s); - - ksfree(&etag); - } - } - } - - ksfree(&response); - - if (ret) { - abort_upload(fp); - return -1; - } - - fp->part_no++; - fp->buffer.l = 0; - - if (fp->expand && (fp->part_no % EXPAND_ON == 0)) { - fp->part_size *= 2; - } - } - - return nbytes; -} - - -static int s3_close(hFILE *fpv) { - hFILE_s3_write *fp = (hFILE_s3_write *)fpv; - kstring_t response = {0, 0, NULL}; - int ret = 0; - - if (!fp->aborted) { - - if (fp->buffer.l) { - // write the last part - - ret = upload_part(fp, &response); - - if (!ret) { - long response_code; - kstring_t etag = {0, 0, NULL}; - - curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code); - - if (response_code > 200) { - ret = -1; - } else { - if (get_entry(response.s, "ETag: \"", "\"", &etag) == EOF) { - ret = -1; - } else { - ksprintf(&fp->completion_message, "\t\n\t\t%d\n\t\t%s\n\t\n", - fp->part_no, etag.s); - - ksfree(&etag); - } - } - } - - ksfree(&response); - - if (ret) { - abort_upload(fp); - return -1; - } - - fp->part_no++; - } - - if (fp->part_no > 1) { - ret = complete_upload(fp, &response); - - if (!ret) { - if (strstr(response.s, "CompleteMultipartUploadResult") == NULL) { - ret = -1; - } - } - } else { - ret = -1; - } - - if (ret) { - abort_upload(fp); - } else { - cleanup(fp); - } - } - - ksfree(&response); - - return ret; -} - - -static int redirect_endpoint(hFILE_s3_write *fp, kstring_t *head) { - int ret = -1; - - if (fp->au->redirect_callback) { - ret = fp->au->redirect_callback(fp->au->callback_data, 301, head, &fp->url); - } - - return ret; -} - -static int handle_bad_request(hFILE_s3_write *fp, kstring_t *resp) { - kstring_t region = {0, 0, NULL}; - int ret = -1; - - if (fp->au->set_region_callback) { - if (get_entry(resp->s, "", "", ®ion) == EOF) { - return -1; - } - - ret = fp->au->set_region_callback(fp->au->callback_data, ®ion); - - ksfree(®ion); - } - - return ret; -} - -static int initialise_upload(hFILE_s3_write *fp, kstring_t *head, kstring_t *resp, int user_query) { - kstring_t content_hash = {0, 0, NULL}; - kstring_t authorisation = {0, 0, NULL}; - kstring_t url = {0, 0, NULL}; - kstring_t content = {0, 0, NULL}; - kstring_t date = {0, 0, NULL}; - kstring_t token = {0, 0, NULL}; - int ret = -1; - struct curl_slist *headers = NULL; - char http_request[] = "POST"; - char delimiter = '?'; - - if (user_query) { - delimiter = '&'; - } - - if (fp->au->callback(fp->au->callback_data, http_request, NULL, "uploads=", - &content_hash, &authorisation, &date, &token, user_query) != 0) { - goto out; - } - - if (ksprintf(&url, "%s%cuploads", fp->url.s, delimiter) < 0) { - goto out; - } - - if (ksprintf(&content, "x-amz-content-sha256: %s", content_hash.s) < 0) { - goto out; - } - - curl_easy_setopt(fp->curl, CURLOPT_URL, url.s); - curl_easy_setopt(fp->curl, CURLOPT_POST, 1L); - curl_easy_setopt(fp->curl, CURLOPT_POSTFIELDS, ""); // send no data - curl_easy_setopt(fp->curl, CURLOPT_WRITEFUNCTION, response_callback); - curl_easy_setopt(fp->curl, CURLOPT_WRITEDATA, (void *)resp); - curl_easy_setopt(fp->curl, CURLOPT_HEADERFUNCTION, response_callback); - curl_easy_setopt(fp->curl, CURLOPT_HEADERDATA, (void *)head); - curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s); - - curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose); - - headers = set_html_headers(fp, &authorisation, &date, &content, &token); - fp->ret = curl_easy_perform(fp->curl); - - if (fp->ret == CURLE_OK) { - ret = 0; - } - - out: - ksfree(&authorisation); - ksfree(&content); - ksfree(&content_hash); - ksfree(&url); - ksfree(&date); - ksfree(&token); - curl_slist_free_all(headers); - - return ret; -} - - -static int get_upload_id(hFILE_s3_write *fp, kstring_t *resp) { - int ret = 0; - - ksinit(&fp->upload_id); - - if (get_entry(resp->s, "", "", &fp->upload_id) == EOF) { - ret = -1; - } - - return ret; -} - - -static const struct hFILE_backend s3_write_backend = { - NULL, s3_write, NULL, NULL, s3_close -}; - - -static hFILE *s3_write_open(const char *url, s3_authorisation *auth) { - hFILE_s3_write *fp; - kstring_t response = {0, 0, NULL}; - kstring_t header = {0, 0, NULL}; - int ret, has_user_query = 0; - char *query_start; - const char *env; - - - if (!auth || !auth->callback || !auth->callback_data) { - return NULL; - } - - fp = (hFILE_s3_write *)hfile_init(sizeof(hFILE_s3_write), "w", 0); - - if (fp == NULL) { - return NULL; - } - - if ((fp->curl = curl_easy_init()) == NULL) { - errno = ENOMEM; - goto error; - } - - if ((fp->au = calloc(1, sizeof(s3_authorisation))) == NULL) { - goto error; - } - - memcpy(fp->au, auth, sizeof(s3_authorisation)); - - ksinit(&fp->buffer); - ksinit(&fp->url); - ksinit(&fp->completion_message); - fp->aborted = 0; - - fp->part_size = MINIMUM_S3_WRITE_SIZE; - fp->expand = 1; - - if ((env = getenv("HTS_S3_PART_SIZE")) != NULL) { - int part_size = atoi(env) * 1024 * 1024; - - if (part_size > fp->part_size) - fp->part_size = part_size; - - fp->expand = 0; - } - - if (hts_verbose >= 8) { - fp->verbose = 1L; - } else { - fp->verbose = 0L; - } - - kputs(url + 4, &fp->url); - - if ((query_start = strchr(fp->url.s, '?'))) { - has_user_query = 1;; - } - - ret = initialise_upload(fp, &header, &response, has_user_query); - - if (ret == 0) { - long response_code; - - curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code); - - if (response_code == S3_MOVED_PERMANENTLY) { - if (redirect_endpoint(fp, &header) == 0) { - ksfree(&response); - ksfree(&header); - - ret = initialise_upload(fp, &header, &response, has_user_query); - } - } else if (response_code == S3_BAD_REQUEST) { - if (handle_bad_request(fp, &response) == 0) { - ksfree(&response); - ksfree(&header); - - ret = initialise_upload(fp, &header, &response, has_user_query); - } - } - - ksfree(&header); // no longer needed - } - - if (ret) goto error; - - if (get_upload_id(fp, &response)) goto error; - - // start the completion message (a formatted list of parts) - ksinit(&fp->completion_message); - - if (kputs("\n", &fp->completion_message) == EOF) { - goto error; - } - - fp->part_no = 1; - - // user query string no longer a useful part of the URL - if (query_start) - *query_start = '\0'; - - fp->base.backend = &s3_write_backend; - ksfree(&response); - - return &fp->base; - -error: - ksfree(&response); - cleanup_local(fp); - hfile_destroy((hFILE *)fp); - return NULL; -} - - -static hFILE *hopen_s3_write(const char *url, const char *mode) { - if (hts_verbose >= 1) { - fprintf(stderr, "[E::%s] s3w:// URLs should not be used directly; use s3:// instead.\n", __func__); - } - return NULL; -} - - -static int parse_va_list(s3_authorisation *auth, va_list args) { - const char *argtype; - - while ((argtype = va_arg(args, const char *)) != NULL) { - if (strcmp(argtype, "s3_auth_callback") == 0) { - auth->callback = va_arg(args, s3_auth_callback); - } else if (strcmp(argtype, "s3_auth_callback_data") == 0) { - auth->callback_data = va_arg(args, void *); - } else if (strcmp(argtype, "redirect_callback") == 0) { - auth->redirect_callback = va_arg(args, redirect_callback); - } else if (strcmp(argtype, "set_region_callback") == 0) { - auth->set_region_callback = va_arg(args, set_region_callback); - } else if (strcmp(argtype, "va_list") == 0) { - va_list *args2 = va_arg(args, va_list *); - - if (args2) { - if (parse_va_list(auth, *args2) < 0) return -1; - } - } else { - errno = EINVAL; - return -1; - } - } - - return 0; -} - - -static hFILE *vhopen_s3_write(const char *url, const char *mode, va_list args) { - hFILE *fp = NULL; - s3_authorisation auth = {NULL, NULL, NULL}; - - if (parse_va_list(&auth, args) == 0) { - fp = s3_write_open(url, &auth); - } - - return fp; -} - - -static void s3_write_exit() { - if (curl_share_cleanup(curl.share) == CURLSHE_OK) - curl.share = NULL; - - free(curl.useragent.s); - curl.useragent.l = curl.useragent.m = 0; curl.useragent.s = NULL; - curl_global_cleanup(); -} - - -int PLUGIN_GLOBAL(hfile_plugin_init,_s3_write)(struct hFILE_plugin *self) { - - static const struct hFILE_scheme_handler handler = - { hopen_s3_write, hfile_always_remote, "S3 Multipart Upload", - 2000 + 50, vhopen_s3_write - }; - -#ifdef ENABLE_PLUGINS - // Embed version string for examination via strings(1) or what(1) - static const char id[] = - "@(#)hfile_s3_write plugin (htslib)\t" HTS_VERSION_TEXT; - const char *version = strchr(id, '\t') + 1; - - if (hts_verbose >= 9) - fprintf(stderr, "[M::hfile_s3_write.init] version %s\n", - version); -#else - const char *version = hts_version(); -#endif - - const curl_version_info_data *info; - CURLcode err; - CURLSHcode errsh; - - err = curl_global_init(CURL_GLOBAL_ALL); - - if (err != CURLE_OK) { - // look at putting in an errno here - return -1; - } - - curl.share = curl_share_init(); - - if (curl.share == NULL) { - curl_global_cleanup(); - errno = EIO; - return -1; - } - - errsh = curl_share_setopt(curl.share, CURLSHOPT_LOCKFUNC, share_lock); - errsh |= curl_share_setopt(curl.share, CURLSHOPT_UNLOCKFUNC, share_unlock); - errsh |= curl_share_setopt(curl.share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS); - - if (errsh != 0) { - curl_share_cleanup(curl.share); - curl_global_cleanup(); - errno = EIO; - return -1; - } - - info = curl_version_info(CURLVERSION_NOW); - ksprintf(&curl.useragent, "htslib/%s libcurl/%s", version, info->version); - - self->name = "S3 Multipart Upload"; - self->destroy = s3_write_exit; - - hfile_add_scheme_handler("s3w", &handler); - hfile_add_scheme_handler("s3w+http", &handler); - hfile_add_scheme_handler("s3w+https", &handler); - - return 0; -} diff --git a/src/htslib-1.19.1/hts.c b/src/htslib-1.19.1/hts.c deleted file mode 100644 index 72e6d79..0000000 --- a/src/htslib-1.19.1/hts.c +++ /dev/null @@ -1,5012 +0,0 @@ -/* hts.c -- format-neutral I/O, indexing, and iterator API functions. - - Copyright (C) 2008, 2009, 2012-2024 Genome Research Ltd. - Copyright (C) 2012, 2013 Broad Institute. - - Author: Heng Li - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef HAVE_LIBLZMA -#ifdef HAVE_LZMA_H -#include -#else -#include "os/lzma_stub.h" -#endif -#endif - -#include "htslib/hts.h" -#include "htslib/bgzf.h" -#include "cram/cram.h" -#include "htslib/hfile.h" -#include "htslib/hts_endian.h" -#include "version.h" -#include "config_vars.h" -#include "hts_internal.h" -#include "hfile_internal.h" -#include "sam_internal.h" -#include "htslib/hts_expr.h" -#include "htslib/hts_os.h" // drand48 - -#include "htslib/khash.h" -#include "htslib/kseq.h" -#include "htslib/ksort.h" -#include "htslib/tbx.h" -#if defined(HAVE_EXTERNAL_LIBHTSCODECS) -#include -#else -#include "htscodecs/htscodecs/htscodecs.h" -#endif - -#ifndef EFTYPE -#define EFTYPE ENOEXEC -#endif - -KHASH_INIT2(s2i,, kh_cstr_t, int64_t, 1, kh_str_hash_func, kh_str_hash_equal) - -HTSLIB_EXPORT -int hts_verbose = HTS_LOG_WARNING; - -const char *hts_version(void) -{ - return HTS_VERSION_TEXT; -} - -unsigned int hts_features(void) { - unsigned int feat = HTS_FEATURE_HTSCODECS; // Always present - -#ifdef PACKAGE_URL - feat |= HTS_FEATURE_CONFIGURE; -#endif - -#ifdef ENABLE_PLUGINS - feat |= HTS_FEATURE_PLUGINS; -#endif - -#ifdef HAVE_LIBCURL - feat |= HTS_FEATURE_LIBCURL; -#endif - -#ifdef ENABLE_S3 - feat |= HTS_FEATURE_S3; -#endif - -#ifdef ENABLE_GCS - feat |= HTS_FEATURE_GCS; -#endif - -#ifdef HAVE_LIBDEFLATE - feat |= HTS_FEATURE_LIBDEFLATE; -#endif - -#ifdef HAVE_LIBLZMA - feat |= HTS_FEATURE_LZMA; -#endif - -#ifdef HAVE_LIBBZ2 - feat |= HTS_FEATURE_BZIP2; -#endif - - return feat; -} - -const char *hts_test_feature(unsigned int id) { - unsigned int feat = hts_features(); - - switch (id) { - case HTS_FEATURE_CONFIGURE: - return feat & HTS_FEATURE_CONFIGURE ? "yes" : NULL; - case HTS_FEATURE_PLUGINS: - return feat & HTS_FEATURE_PLUGINS ? "yes" : NULL; - case HTS_FEATURE_LIBCURL: - return feat & HTS_FEATURE_LIBCURL ? "yes" : NULL; - case HTS_FEATURE_S3: - return feat & HTS_FEATURE_S3 ? "yes" : NULL; - case HTS_FEATURE_GCS: - return feat & HTS_FEATURE_GCS ? "yes" : NULL; - case HTS_FEATURE_LIBDEFLATE: - return feat & HTS_FEATURE_LIBDEFLATE ? "yes" : NULL; - case HTS_FEATURE_BZIP2: - return feat & HTS_FEATURE_BZIP2 ? "yes" : NULL; - case HTS_FEATURE_LZMA: - return feat & HTS_FEATURE_LZMA ? "yes" : NULL; - - case HTS_FEATURE_HTSCODECS: - return htscodecs_version(); - - case HTS_FEATURE_CC: - return HTS_CC; - case HTS_FEATURE_CFLAGS: - return HTS_CFLAGS; - case HTS_FEATURE_LDFLAGS: - return HTS_LDFLAGS; - case HTS_FEATURE_CPPFLAGS: - return HTS_CPPFLAGS; - - default: - fprintf(stderr, "Unknown feature code: %u\n", id); - } - - return NULL; -} - -// Note this implementation also means we can just "strings" the library -// to find the configuration parameters. -const char *hts_feature_string(void) { - static char config[1200]; - const char *flags= - -#ifdef PACKAGE_URL - "build=configure " -#else - "build=Makefile " -#endif - -#ifdef HAVE_LIBCURL - "libcurl=yes " -#else - "libcurl=no " -#endif - -#ifdef ENABLE_S3 - "S3=yes " -#else - "S3=no " -#endif - -#ifdef ENABLE_GCS - "GCS=yes " -#else - "GCS=no " -#endif - -#ifdef HAVE_LIBDEFLATE - "libdeflate=yes " -#else - "libdeflate=no " -#endif - -#ifdef HAVE_LIBLZMA - "lzma=yes " -#else - "lzma=no " -#endif - -#ifdef HAVE_LIBBZ2 - "bzip2=yes " -#else - "bzip2=no " -#endif - -// "plugins=" must stay at the end as it is followed by "plugin-path=" -#ifdef ENABLE_PLUGINS - "plugins=yes"; -#else - "plugins=no"; -#endif - -#ifdef ENABLE_PLUGINS - snprintf(config, sizeof(config), - "%s plugin-path=%.1000s htscodecs=%.40s", - flags, hts_plugin_path(), htscodecs_version()); -#else - snprintf(config, sizeof(config), - "%s htscodecs=%.40s", - flags, htscodecs_version()); -#endif - return config; -} - - -HTSLIB_EXPORT -const unsigned char seq_nt16_table[256] = { - 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, - 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, - 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, - 1, 2, 4, 8, 15,15,15,15, 15,15,15,15, 15, 0 /*=*/,15,15, - 15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15, - 15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15, - 15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15, - 15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15, - - 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, - 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, - 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, - 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, - 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, - 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, - 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, - 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15 -}; - -HTSLIB_EXPORT -const char seq_nt16_str[] = "=ACMGRSVTWYHKDBN"; - -HTSLIB_EXPORT -const int seq_nt16_int[] = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 }; - -/********************** - *** Basic file I/O *** - **********************/ - -static enum htsFormatCategory format_category(enum htsExactFormat fmt) -{ - switch (fmt) { - case bam: - case sam: - case cram: - case fastq_format: - case fasta_format: - return sequence_data; - - case vcf: - case bcf: - return variant_data; - - case bai: - case crai: - case csi: - case fai_format: - case fqi_format: - case gzi: - case tbi: - return index_file; - - case bed: - case d4_format: - return region_list; - - case htsget: - case hts_crypt4gh_format: - return unknown_category; - - case unknown_format: - case binary_format: - case text_format: - case empty_format: - case format_maximum: - break; - } - - return unknown_category; -} - -// Decompress several hundred bytes by peeking at the file, which must be -// positioned at the start of a GZIP block. -static ssize_t -decompress_peek_gz(hFILE *fp, unsigned char *dest, size_t destsize) -{ - unsigned char buffer[2048]; - z_stream zs; - ssize_t npeek = hpeek(fp, buffer, sizeof buffer); - - if (npeek < 0) return -1; - - zs.zalloc = NULL; - zs.zfree = NULL; - zs.next_in = buffer; - zs.avail_in = npeek; - zs.next_out = dest; - zs.avail_out = destsize; - if (inflateInit2(&zs, 31) != Z_OK) return -1; - - int ret; - const unsigned char *last_in = buffer; - while (zs.avail_out > 0) { - ret = inflate(&zs, Z_SYNC_FLUSH); - if (ret == Z_STREAM_END) { - if (last_in == zs.next_in) - break; // Paranoia to avoid potential looping. Shouldn't happen - else - last_in = zs.next_in; - inflateReset(&zs); - } else if (ret != Z_OK) { - // eg Z_BUF_ERROR due to avail_in/out becoming zero - break; - } - } - - // NB: zs.total_out is changed by inflateReset, so use pointer diff instead - destsize = zs.next_out - dest; - inflateEnd(&zs); - - return destsize; -} - -#ifdef HAVE_LIBLZMA -// Similarly decompress a portion by peeking at the file, which must be -// positioned at the start of the file. -static ssize_t -decompress_peek_xz(hFILE *fp, unsigned char *dest, size_t destsize) -{ - unsigned char buffer[2048]; - ssize_t npeek = hpeek(fp, buffer, sizeof buffer); - if (npeek < 0) return -1; - - lzma_stream ls = LZMA_STREAM_INIT; - if (lzma_stream_decoder(&ls, lzma_easy_decoder_memusage(9), 0) != LZMA_OK) - return -1; - - ls.next_in = buffer; - ls.avail_in = npeek; - ls.next_out = dest; - ls.avail_out = destsize; - - int r = lzma_code(&ls, LZMA_RUN); - if (! (r == LZMA_OK || r == LZMA_STREAM_END)) { - lzma_end(&ls); - return -1; - } - - destsize = ls.total_out; - lzma_end(&ls); - - return destsize; -} -#endif - -// Parse "x.y" text, taking care because the string is not NUL-terminated -// and filling in major/minor only when the digits are followed by a delimiter, -// so we don't misread "1.10" as "1.1" due to reaching the end of the buffer. -static void -parse_version(htsFormat *fmt, const unsigned char *u, const unsigned char *ulim) -{ - const char *s = (const char *) u; - const char *slim = (const char *) ulim; - short v; - - fmt->version.major = fmt->version.minor = -1; - - for (v = 0; s < slim && isdigit_c(*s); s++) - v = 10 * v + *s - '0'; - - if (s < slim) { - fmt->version.major = v; - if (*s == '.') { - s++; - for (v = 0; s < slim && isdigit_c(*s); s++) - v = 10 * v + *s - '0'; - if (s < slim) - fmt->version.minor = v; - } - else - fmt->version.minor = 0; - } -} - -static int -cmp_nonblank(const char *key, const unsigned char *u, const unsigned char *ulim) -{ - const unsigned char *ukey = (const unsigned char *) key; - - while (*ukey) - if (u >= ulim) return +1; - else if (isspace_c(*u)) u++; - else if (*u != *ukey) return (*ukey < *u)? -1 : +1; - else u++, ukey++; - - return 0; -} - -static int is_text_only(const unsigned char *u, const unsigned char *ulim) -{ - for (; u < ulim; u++) - if (! (*u >= ' ' || *u == '\t' || *u == '\r' || *u == '\n')) - return 0; - - return 1; -} - -static int is_fastaq(const unsigned char *u, const unsigned char *ulim) -{ - const unsigned char *eol = memchr(u, '\n', ulim - u); - - // Check that the first line is entirely textual - if (! is_text_only(u, eol? eol : ulim)) return 0; - - // If the first line is very long, consider the file to indeed be FASTA/Q - if (eol == NULL) return 1; - - u = eol+1; // Now points to the first character of the second line - - // Scan over all base-encoding letters (including 'N' but not SEQ's '=') - while (u < ulim && (seq_nt16_table[*u] != 15 || toupper(*u) == 'N')) { - if (*u == '=') return 0; - u++; - } - - return (u == ulim || *u == '\r' || *u == '\n')? 1 : 0; -} - -// Parse tab-delimited text, filling in a string of column types and returning -// the number of columns spotted (within [u,ulim), and up to column_len) or -1 -// if non-printable characters were seen. Column types: -// i: integer, s: strand sign, C: CIGAR, O: SAM optional field, Z: anything -static int -parse_tabbed_text(char *columns, int column_len, - const unsigned char *u, const unsigned char *ulim, - int *complete) -{ - const char *str = (const char *) u; - const char *slim = (const char *) ulim; - const char *s; - int ncolumns = 0; - - enum { digit = 1, leading_sign = 2, cigar_operator = 4, other = 8 }; - unsigned seen = 0; - *complete = 0; - - for (s = str; s < slim; s++) - if (*s >= ' ') { - if (isdigit_c(*s)) - seen |= digit; - else if ((*s == '+' || *s == '-') && s == str) - seen |= leading_sign; - else if (strchr(BAM_CIGAR_STR, *s) && s > str && isdigit_c(s[-1])) - seen |= cigar_operator; - else - seen |= other; - } - else if (*s == '\t' || *s == '\r' || *s == '\n') { - size_t len = s - str; - char type; - - if (seen == digit || seen == (leading_sign|digit)) type = 'i'; - else if (seen == (digit|cigar_operator)) type = 'C'; - else if (len == 1) - switch (str[0]) { - case '*': type = 'C'; break; - case '+': case '-': case '.': type = 's'; break; - default: type = 'Z'; break; - } - else if (len >= 5 && str[2] == ':' && str[4] == ':') type = 'O'; - else type = 'Z'; - - columns[ncolumns++] = type; - if (*s != '\t' || ncolumns >= column_len - 1) { - *complete = 1; // finished the line or more columns than needed - break; - } - - str = s + 1; - seen = 0; - } - else return -1; - - columns[ncolumns] = '\0'; - return ncolumns; -} - -// Match COLUMNS as a prefix against PATTERN (so COLUMNS may run out first). -// Returns len(COLUMNS) (modulo '+'), or 0 if there is a mismatched entry. -static int colmatch(const char *columns, const char *pattern) -{ - int i; - for (i = 0; columns[i] != '\0'; i++) { - if (pattern[i] == '+') return i; - if (! (columns[i] == pattern[i] || pattern[i] == 'Z')) return 0; - } - - return i; -} - -int hts_detect_format(hFILE *hfile, htsFormat *fmt) -{ - return hts_detect_format2(hfile, NULL, fmt); -} - -int hts_detect_format2(hFILE *hfile, const char *fname, htsFormat *fmt) -{ - char extension[HTS_MAX_EXT_LEN], columns[24]; - unsigned char s[1024]; - int complete = 0; - ssize_t len = hpeek(hfile, s, 18); - if (len < 0) return -1; - - fmt->category = unknown_category; - fmt->format = unknown_format; - fmt->version.major = fmt->version.minor = -1; - fmt->compression = no_compression; - fmt->compression_level = -1; - fmt->specific = NULL; - - if (len >= 2 && s[0] == 0x1f && s[1] == 0x8b) { - // The stream is either gzip-compressed or BGZF-compressed. - // Determine which, and decompress the first few records or lines. - fmt->compression = gzip; - if (len >= 18 && (s[3] & 4)) { - if (memcmp(&s[12], "BC\2\0", 4) == 0) - fmt->compression = bgzf; - else if (memcmp(&s[12], "RAZF", 4) == 0) - fmt->compression = razf_compression; - } - if (len >= 9 && s[2] == 8) - fmt->compression_level = (s[8] == 2)? 9 : (s[8] == 4)? 1 : -1; - - len = decompress_peek_gz(hfile, s, sizeof s); - } - else if (len >= 10 && memcmp(s, "BZh", 3) == 0 && - (memcmp(&s[4], "\x31\x41\x59\x26\x53\x59", 6) == 0 || - memcmp(&s[4], "\x17\x72\x45\x38\x50\x90", 6) == 0)) { - fmt->compression = bzip2_compression; - fmt->compression_level = s[3] - '0'; - // Decompressing via libbz2 produces no output until it has a whole - // block (of size 100Kb x level), which is too large for peeking. - // So unfortunately we can recognise bzip2 but not the contents, - // except that \x1772... magic indicates the stream is empty. - if (s[4] == '\x31') return 0; - else len = 0; - } - else if (len >= 6 && memcmp(s, "\xfd""7zXZ\0", 6) == 0) { - fmt->compression = xz_compression; -#ifdef HAVE_LIBLZMA - len = decompress_peek_xz(hfile, s, sizeof s); -#else - // Without liblzma, we can't recognise the decompressed contents. - return 0; -#endif - } - else if (len >= 4 && memcmp(s, "\x28\xb5\x2f\xfd", 4) == 0) { - fmt->compression = zstd_compression; - return 0; - } - else { - len = hpeek(hfile, s, sizeof s); - } - if (len < 0) return -1; - - if (len == 0) { - fmt->format = empty_format; - return 0; - } - - // We avoid using filename extensions wherever possible (as filenames are - // not always available), but in a few cases they must be considered: - // - FASTA/Q indexes are simply tab-separated text; files that match these - // patterns but not the fai/fqi extension are usually generic BED files - // - GZI indexes have no magic numbers so can only be detected by filename - if (fname && strcmp(fname, "-") != 0) { - char *s; - if (find_file_extension(fname, extension) < 0) extension[0] = '\0'; - for (s = extension; *s; s++) *s = tolower_c(*s); - } - else extension[0] = '\0'; - - if (len >= 6 && memcmp(s,"CRAM",4) == 0 && s[4]>=1 && s[4]<=7 && s[5]<=7) { - fmt->category = sequence_data; - fmt->format = cram; - fmt->version.major = s[4], fmt->version.minor = s[5]; - fmt->compression = custom; - return 0; - } - else if (len >= 4 && s[3] <= '\4') { - if (memcmp(s, "BAM\1", 4) == 0) { - fmt->category = sequence_data; - fmt->format = bam; - // TODO Decompress enough to pick version from @HD-VN header - fmt->version.major = 1, fmt->version.minor = -1; - return 0; - } - else if (memcmp(s, "BAI\1", 4) == 0) { - fmt->category = index_file; - fmt->format = bai; - fmt->version.major = -1, fmt->version.minor = -1; - return 0; - } - else if (memcmp(s, "BCF\4", 4) == 0) { - fmt->category = variant_data; - fmt->format = bcf; - fmt->version.major = 1, fmt->version.minor = -1; - return 0; - } - else if (memcmp(s, "BCF\2", 4) == 0) { - fmt->category = variant_data; - fmt->format = bcf; - fmt->version.major = s[3]; - fmt->version.minor = (len >= 5 && s[4] <= 2)? s[4] : 0; - return 0; - } - else if (memcmp(s, "CSI\1", 4) == 0) { - fmt->category = index_file; - fmt->format = csi; - fmt->version.major = 1, fmt->version.minor = -1; - return 0; - } - else if (memcmp(s, "TBI\1", 4) == 0) { - fmt->category = index_file; - fmt->format = tbi; - return 0; - } - // GZI indexes have no magic numbers, so must be recognised solely by - // filename extension. - else if (strcmp(extension, "gzi") == 0) { - fmt->category = index_file; - fmt->format = gzi; - return 0; - } - } - else if (len >= 16 && memcmp(s, "##fileformat=VCF", 16) == 0) { - fmt->category = variant_data; - fmt->format = vcf; - if (len >= 21 && s[16] == 'v') - parse_version(fmt, &s[17], &s[len]); - return 0; - } - else if (len >= 4 && s[0] == '@' && - (memcmp(s, "@HD\t", 4) == 0 || memcmp(s, "@SQ\t", 4) == 0 || - memcmp(s, "@RG\t", 4) == 0 || memcmp(s, "@PG\t", 4) == 0 || - memcmp(s, "@CO\t", 4) == 0)) { - fmt->category = sequence_data; - fmt->format = sam; - // @HD-VN is not guaranteed to be the first tag, but then @HD is - // not guaranteed to be present at all... - if (len >= 9 && memcmp(s, "@HD\tVN:", 7) == 0) - parse_version(fmt, &s[7], &s[len]); - else - fmt->version.major = 1, fmt->version.minor = -1; - return 0; - } - else if (len >= 8 && memcmp(s, "d4\xdd\xdd", 4) == 0) { - fmt->category = region_list; - fmt->format = d4_format; - // How to decode the D4 Format Version bytes is not yet specified - // so we don't try to set fmt->version.{major,minor}. - return 0; - } - else if (cmp_nonblank("{\"htsget\":", s, &s[len]) == 0) { - fmt->category = unknown_category; - fmt->format = htsget; - return 0; - } - else if (len > 8 && memcmp(s, "crypt4gh", 8) == 0) { - fmt->category = unknown_category; - fmt->format = hts_crypt4gh_format; - return 0; - } - else if (len >= 1 && s[0] == '>' && is_fastaq(s, &s[len])) { - fmt->category = sequence_data; - fmt->format = fasta_format; - return 0; - } - else if (len >= 1 && s[0] == '@' && is_fastaq(s, &s[len])) { - fmt->category = sequence_data; - fmt->format = fastq_format; - return 0; - } - else if (parse_tabbed_text(columns, sizeof columns, s, - &s[len], &complete) > 0) { - // A complete SAM line is at least 11 columns. On unmapped long reads may - // be missing two. (On mapped long reads we must have an @ header so long - // CIGAR is irrelevant.) - if (colmatch(columns, "ZiZiiCZiiZZOOOOOOOOOOOOOOOOOOOO+") - >= 9 + 2*complete) { - fmt->category = sequence_data; - fmt->format = sam; - fmt->version.major = 1, fmt->version.minor = -1; - return 0; - } - else if (fmt->compression == gzip && colmatch(columns, "iiiiii") == 6) { - fmt->category = index_file; - fmt->format = crai; - return 0; - } - else if (strstr(extension, "fqi") && colmatch(columns, "Ziiiii") == 6) { - fmt->category = index_file; - fmt->format = fqi_format; - return 0; - } - else if (strstr(extension, "fai") && colmatch(columns, "Ziiii") == 5) { - fmt->category = index_file; - fmt->format = fai_format; - return 0; - } - else if (colmatch(columns, "Zii+") >= 3) { - fmt->category = region_list; - fmt->format = bed; - return 0; - } - } - - // Arbitrary text files can be read using hts_getline(). - if (is_text_only(s, &s[len])) fmt->format = text_format; - - // Nothing recognised: leave unset fmt-> fields as unknown. - return 0; -} - -char *hts_format_description(const htsFormat *format) -{ - kstring_t str = { 0, 0, NULL }; - - switch (format->format) { - case sam: kputs("SAM", &str); break; - case bam: kputs("BAM", &str); break; - case cram: kputs("CRAM", &str); break; - case fasta_format: kputs("FASTA", &str); break; - case fastq_format: kputs("FASTQ", &str); break; - case vcf: kputs("VCF", &str); break; - case bcf: - if (format->version.major == 1) kputs("Legacy BCF", &str); - else kputs("BCF", &str); - break; - case bai: kputs("BAI", &str); break; - case crai: kputs("CRAI", &str); break; - case csi: kputs("CSI", &str); break; - case fai_format: kputs("FASTA-IDX", &str); break; - case fqi_format: kputs("FASTQ-IDX", &str); break; - case gzi: kputs("GZI", &str); break; - case tbi: kputs("Tabix", &str); break; - case bed: kputs("BED", &str); break; - case d4_format: kputs("D4", &str); break; - case htsget: kputs("htsget", &str); break; - case hts_crypt4gh_format: kputs("crypt4gh", &str); break; - case empty_format: kputs("empty", &str); break; - default: kputs("unknown", &str); break; - } - - if (format->version.major >= 0) { - kputs(" version ", &str); - kputw(format->version.major, &str); - if (format->version.minor >= 0) { - kputc('.', &str); - kputw(format->version.minor, &str); - } - } - - switch (format->compression) { - case bzip2_compression: kputs(" bzip2-compressed", &str); break; - case razf_compression: kputs(" legacy-RAZF-compressed", &str); break; - case xz_compression: kputs(" XZ-compressed", &str); break; - case zstd_compression: kputs(" Zstandard-compressed", &str); break; - case custom: kputs(" compressed", &str); break; - case gzip: kputs(" gzip-compressed", &str); break; - - case bgzf: - switch (format->format) { - case bam: - case bcf: - case csi: - case tbi: - // These are by definition BGZF, so just use the generic term - kputs(" compressed", &str); - break; - default: - kputs(" BGZF-compressed", &str); - break; - } - break; - - case no_compression: - switch (format->format) { - case bam: - case bcf: - case cram: - case csi: - case tbi: - // These are normally compressed, so emphasise that this one isn't - kputs(" uncompressed", &str); - break; - default: - break; - } - break; - - default: break; - } - - switch (format->category) { - case sequence_data: kputs(" sequence", &str); break; - case variant_data: kputs(" variant calling", &str); break; - case index_file: kputs(" index", &str); break; - case region_list: kputs(" genomic region", &str); break; - default: break; - } - - if (format->compression == no_compression) - switch (format->format) { - case text_format: - case sam: - case crai: - case vcf: - case bed: - case fai_format: - case fqi_format: - case fasta_format: - case fastq_format: - case htsget: - kputs(" text", &str); - break; - - case empty_format: - break; - - default: - kputs(" data", &str); - break; - } - else - kputs(" data", &str); - - return ks_release(&str); -} - -htsFile *hts_open_format(const char *fn, const char *mode, const htsFormat *fmt) -{ - char smode[101], *cp, *cp2, *mode_c, *uncomp = NULL; - htsFile *fp = NULL; - hFILE *hfile = NULL; - char fmt_code = '\0'; - // see enum htsExactFormat in htslib/hts.h - const char format_to_mode[] = "\0g\0\0b\0c\0\0b\0g\0\0\0\0\0Ff\0\0"; - - strncpy(smode, mode, 99); - smode[99]=0; - if ((cp = strchr(smode, ','))) - *cp = '\0'; - - // Migrate format code (b or c) to the end of the smode buffer. - for (cp2 = cp = smode; *cp; cp++) { - if (*cp == 'b') - fmt_code = 'b'; - else if (*cp == 'c') - fmt_code = 'c'; - else { - *cp2++ = *cp; - // Cache the uncompress flag 'u' pos if present - if (!uncomp && (*cp == 'u')) { - uncomp = cp2 - 1; - } - } - } - mode_c = cp2; - *cp2++ = fmt_code; - *cp2++ = 0; - - // Set or reset the format code if opts->format is used - if (fmt && fmt->format > unknown_format - && fmt->format < sizeof(format_to_mode)) { - *mode_c = format_to_mode[fmt->format]; - } - - // Uncompressed bam/bcf is not supported, change 'u' to '0' on write - if (uncomp && *mode_c == 'b' && (strchr(smode, 'w') || strchr(smode, 'a'))) { - *uncomp = '0'; - } - - // If we really asked for a compressed text format then mode_c above will - // point to nul. We set to 'z' to enable bgzf. - if (strchr(mode, 'w') && fmt && fmt->compression == bgzf) { - if (fmt->format == sam || fmt->format == vcf || fmt->format == text_format) - *mode_c = 'z'; - } - - char *rmme = NULL, *fnidx = strstr(fn, HTS_IDX_DELIM); - if ( fnidx ) { - rmme = strdup(fn); - if ( !rmme ) goto error; - rmme[fnidx-fn] = 0; - fn = rmme; - } - - hfile = hopen(fn, smode); - if (hfile == NULL) goto error; - - fp = hts_hopen(hfile, fn, smode); - if (fp == NULL) goto error; - - // Compensate for the loss of exactness in htsExactFormat. - // hts_hopen returns generics such as binary or text, but we - // have been given something explicit here so use that instead. - if (fp->is_write && fmt && - (fmt->format == bam || fmt->format == sam || - fmt->format == vcf || fmt->format == bcf || - fmt->format == bed || fmt->format == fasta_format || - fmt->format == fastq_format)) - fp->format.format = fmt->format; - - if (fmt && fmt->specific) { - if (hts_opt_apply(fp, fmt->specific) != 0) { - if (((hts_opt*)fmt->specific)->opt == CRAM_OPT_REFERENCE && - (errno == ENOENT || errno == EIO || errno == EBADF || - errno == EACCES || errno == EISDIR)) { - /* error during reference file operation - for these specific errors, set the error as EINVAL */ - errno = EINVAL; - } - goto error; - } - } - if ( rmme ) free(rmme); - return fp; - -error: - hts_log_error("Failed to open file \"%s\"%s%s", fn, - errno ? " : " : "", errno ? strerror(errno) : ""); - if ( rmme ) free(rmme); - - if (hfile) - hclose_abruptly(hfile); - - return NULL; -} - -htsFile *hts_open(const char *fn, const char *mode) { - return hts_open_format(fn, mode, NULL); -} - -/* - * Splits str into a prefix, delimiter ('\0' or delim), and suffix, writing - * the prefix in lowercase into buf and returning a pointer to the suffix. - * On return, buf is always NUL-terminated; thus assumes that the "keyword" - * prefix should be one of several known values of maximum length buflen-2. - * (If delim is not found, returns a pointer to the '\0'.) - */ -static const char * -scan_keyword(const char *str, char delim, char *buf, size_t buflen) -{ - size_t i = 0; - while (*str && *str != delim) { - if (i < buflen-1) buf[i++] = tolower_c(*str); - str++; - } - - buf[i] = '\0'; - return *str? str+1 : str; -} - -/* - * Parses arg and appends it to the option list. - * - * Returns 0 on success; - * -1 on failure. - */ -int hts_opt_add(hts_opt **opts, const char *c_arg) { - hts_opt *o, *t; - char *val; - - /* - * IMPORTANT!!! - * If you add another string option here, don't forget to also add - * it to the case statement in hts_opt_apply. - */ - - if (!c_arg) - return -1; - - if (!(o = malloc(sizeof(*o)))) - return -1; - - if (!(o->arg = strdup(c_arg))) { - free(o); - return -1; - } - - if (!(val = strchr(o->arg, '='))) - val = "1"; // assume boolean - else - *val++ = '\0'; - - if (strcmp(o->arg, "decode_md") == 0 || - strcmp(o->arg, "DECODE_MD") == 0) - o->opt = CRAM_OPT_DECODE_MD, o->val.i = atoi(val); - - else if (strcmp(o->arg, "verbosity") == 0 || - strcmp(o->arg, "VERBOSITY") == 0) - o->opt = CRAM_OPT_VERBOSITY, o->val.i = atoi(val); - - else if (strcmp(o->arg, "seqs_per_slice") == 0 || - strcmp(o->arg, "SEQS_PER_SLICE") == 0) - o->opt = CRAM_OPT_SEQS_PER_SLICE, o->val.i = atoi(val); - - else if (strcmp(o->arg, "bases_per_slice") == 0 || - strcmp(o->arg, "BASES_PER_SLICE") == 0) - o->opt = CRAM_OPT_BASES_PER_SLICE, o->val.i = atoi(val); - - else if (strcmp(o->arg, "slices_per_container") == 0 || - strcmp(o->arg, "SLICES_PER_CONTAINER") == 0) - o->opt = CRAM_OPT_SLICES_PER_CONTAINER, o->val.i = atoi(val); - - else if (strcmp(o->arg, "embed_ref") == 0 || - strcmp(o->arg, "EMBED_REF") == 0) - o->opt = CRAM_OPT_EMBED_REF, o->val.i = atoi(val); - - else if (strcmp(o->arg, "no_ref") == 0 || - strcmp(o->arg, "NO_REF") == 0) - o->opt = CRAM_OPT_NO_REF, o->val.i = atoi(val); - - else if (strcmp(o->arg, "pos_delta") == 0 || - strcmp(o->arg, "POS_DELTA") == 0) - o->opt = CRAM_OPT_POS_DELTA, o->val.i = atoi(val); - - else if (strcmp(o->arg, "ignore_md5") == 0 || - strcmp(o->arg, "IGNORE_MD5") == 0) - o->opt = CRAM_OPT_IGNORE_MD5, o->val.i = atoi(val); - - else if (strcmp(o->arg, "use_bzip2") == 0 || - strcmp(o->arg, "USE_BZIP2") == 0) - o->opt = CRAM_OPT_USE_BZIP2, o->val.i = atoi(val); - - else if (strcmp(o->arg, "use_rans") == 0 || - strcmp(o->arg, "USE_RANS") == 0) - o->opt = CRAM_OPT_USE_RANS, o->val.i = atoi(val); - - else if (strcmp(o->arg, "use_lzma") == 0 || - strcmp(o->arg, "USE_LZMA") == 0) - o->opt = CRAM_OPT_USE_LZMA, o->val.i = atoi(val); - - else if (strcmp(o->arg, "use_tok") == 0 || - strcmp(o->arg, "USE_TOK") == 0) - o->opt = CRAM_OPT_USE_TOK, o->val.i = atoi(val); - - else if (strcmp(o->arg, "use_fqz") == 0 || - strcmp(o->arg, "USE_FQZ") == 0) - o->opt = CRAM_OPT_USE_FQZ, o->val.i = atoi(val); - - else if (strcmp(o->arg, "use_arith") == 0 || - strcmp(o->arg, "USE_ARITH") == 0) - o->opt = CRAM_OPT_USE_ARITH, o->val.i = atoi(val); - - else if (strcmp(o->arg, "fast") == 0 || - strcmp(o->arg, "FAST") == 0) - o->opt = HTS_OPT_PROFILE, o->val.i = HTS_PROFILE_FAST; - - else if (strcmp(o->arg, "normal") == 0 || - strcmp(o->arg, "NORMAL") == 0) - o->opt = HTS_OPT_PROFILE, o->val.i = HTS_PROFILE_NORMAL; - - else if (strcmp(o->arg, "small") == 0 || - strcmp(o->arg, "SMALL") == 0) - o->opt = HTS_OPT_PROFILE, o->val.i = HTS_PROFILE_SMALL; - - else if (strcmp(o->arg, "archive") == 0 || - strcmp(o->arg, "ARCHIVE") == 0) - o->opt = HTS_OPT_PROFILE, o->val.i = HTS_PROFILE_ARCHIVE; - - else if (strcmp(o->arg, "reference") == 0 || - strcmp(o->arg, "REFERENCE") == 0) - o->opt = CRAM_OPT_REFERENCE, o->val.s = val; - - else if (strcmp(o->arg, "version") == 0 || - strcmp(o->arg, "VERSION") == 0) - o->opt = CRAM_OPT_VERSION, o->val.s =val; - - else if (strcmp(o->arg, "multi_seq_per_slice") == 0 || - strcmp(o->arg, "MULTI_SEQ_PER_SLICE") == 0) - o->opt = CRAM_OPT_MULTI_SEQ_PER_SLICE, o->val.i = atoi(val); - - else if (strcmp(o->arg, "nthreads") == 0 || - strcmp(o->arg, "NTHREADS") == 0) - o->opt = HTS_OPT_NTHREADS, o->val.i = atoi(val); - - else if (strcmp(o->arg, "cache_size") == 0 || - strcmp(o->arg, "CACHE_SIZE") == 0) { - char *endp; - o->opt = HTS_OPT_CACHE_SIZE; - o->val.i = strtol(val, &endp, 0); - // NB: Doesn't support floats, eg 1.5g - // TODO: extend hts_parse_decimal? See also samtools sort. - switch (*endp) { - case 'g': case 'G': o->val.i *= 1024; // fall through - case 'm': case 'M': o->val.i *= 1024; // fall through - case 'k': case 'K': o->val.i *= 1024; break; - case '\0': break; - default: - hts_log_error("Unrecognised cache size suffix '%c'", *endp); - free(o->arg); - free(o); - return -1; - } - } - - else if (strcmp(o->arg, "required_fields") == 0 || - strcmp(o->arg, "REQUIRED_FIELDS") == 0) - o->opt = CRAM_OPT_REQUIRED_FIELDS, o->val.i = strtol(val, NULL, 0); - - else if (strcmp(o->arg, "lossy_names") == 0 || - strcmp(o->arg, "LOSSY_NAMES") == 0) - o->opt = CRAM_OPT_LOSSY_NAMES, o->val.i = strtol(val, NULL, 0); - - else if (strcmp(o->arg, "name_prefix") == 0 || - strcmp(o->arg, "NAME_PREFIX") == 0) - o->opt = CRAM_OPT_PREFIX, o->val.s = val; - - else if (strcmp(o->arg, "store_md") == 0 || - strcmp(o->arg, "store_md") == 0) - o->opt = CRAM_OPT_STORE_MD, o->val.i = atoi(val); - - else if (strcmp(o->arg, "store_nm") == 0 || - strcmp(o->arg, "store_nm") == 0) - o->opt = CRAM_OPT_STORE_NM, o->val.i = atoi(val); - - else if (strcmp(o->arg, "block_size") == 0 || - strcmp(o->arg, "BLOCK_SIZE") == 0) - o->opt = HTS_OPT_BLOCK_SIZE, o->val.i = strtol(val, NULL, 0); - - else if (strcmp(o->arg, "level") == 0 || - strcmp(o->arg, "LEVEL") == 0) - o->opt = HTS_OPT_COMPRESSION_LEVEL, o->val.i = strtol(val, NULL, 0); - - else if (strcmp(o->arg, "filter") == 0 || - strcmp(o->arg, "FILTER") == 0) - o->opt = HTS_OPT_FILTER, o->val.s = val; - - else if (strcmp(o->arg, "fastq_aux") == 0 || - strcmp(o->arg, "FASTQ_AUX") == 0) - o->opt = FASTQ_OPT_AUX, o->val.s = val; - - else if (strcmp(o->arg, "fastq_barcode") == 0 || - strcmp(o->arg, "FASTQ_BARCODE") == 0) - o->opt = FASTQ_OPT_BARCODE, o->val.s = val; - - else if (strcmp(o->arg, "fastq_rnum") == 0 || - strcmp(o->arg, "FASTQ_RNUM") == 0) - o->opt = FASTQ_OPT_RNUM, o->val.i = 1; - - else if (strcmp(o->arg, "fastq_casava") == 0 || - strcmp(o->arg, "FASTQ_CASAVA") == 0) - o->opt = FASTQ_OPT_CASAVA, o->val.i = 1; - - else if (strcmp(o->arg, "fastq_name2") == 0 || - strcmp(o->arg, "FASTQ_NAME2") == 0) - o->opt = FASTQ_OPT_NAME2, o->val.i = 1; - - else { - hts_log_error("Unknown option '%s'", o->arg); - free(o->arg); - free(o); - return -1; - } - - o->next = NULL; - - // Append; assumes small list. - if (*opts) { - t = *opts; - while (t->next) - t = t->next; - t->next = o; - } else { - *opts = o; - } - - return 0; -} - -/* - * Applies an hts_opt option list to a given htsFile. - * - * Returns 0 on success - * -1 on failure - */ -int hts_opt_apply(htsFile *fp, hts_opt *opts) { - hts_opt *last = NULL; - - for (; opts; opts = (last=opts)->next) { - switch (opts->opt) { - case CRAM_OPT_REFERENCE: - if (!(fp->fn_aux = strdup(opts->val.s))) - return -1; - // fall through - case CRAM_OPT_VERSION: - case CRAM_OPT_PREFIX: - case HTS_OPT_FILTER: - case FASTQ_OPT_AUX: - case FASTQ_OPT_BARCODE: - if (hts_set_opt(fp, opts->opt, opts->val.s) != 0) - return -1; - break; - default: - if (hts_set_opt(fp, opts->opt, opts->val.i) != 0) - return -1; - break; - } - } - - return 0; -} - -/* - * Frees an hts_opt list. - */ -void hts_opt_free(hts_opt *opts) { - hts_opt *last = NULL; - while (opts) { - opts = (last=opts)->next; - free(last->arg); - free(last); - } -} - - -/* - * Tokenise options as (key(=value)?,)*(key(=value)?)? - * NB: No provision for ',' appearing in the value! - * Add backslashing rules? - * - * This could be used as part of a general command line option parser or - * as a string concatenated onto the file open mode. - * - * Returns 0 on success - * -1 on failure. - */ -int hts_parse_opt_list(htsFormat *fmt, const char *str) { - while (str && *str) { - const char *str_start; - int len; - char arg[8001]; - - while (*str && *str == ',') - str++; - - for (str_start = str; *str && *str != ','; str++); - len = str - str_start; - - // Produce a nul terminated copy of the option - strncpy(arg, str_start, len < 8000 ? len : 8000); - arg[len < 8000 ? len : 8000] = '\0'; - - if (hts_opt_add((hts_opt **)&fmt->specific, arg) != 0) - return -1; - - if (*str) - str++; - } - - return 0; -} - -/* - * Accepts a string file format (sam, bam, cram, vcf, bam) optionally - * followed by a comma separated list of key=value options and splits - * these up into the fields of htsFormat struct. - * - * format is assumed to be already initialised, either to blank - * "unknown" values or via previous hts_opt_add calls. - * - * Returns 0 on success - * -1 on failure. - */ -int hts_parse_format(htsFormat *format, const char *str) { - char fmt[8]; - const char *cp = scan_keyword(str, ',', fmt, sizeof fmt); - - format->version.minor = 0; // unknown - format->version.major = 0; // unknown - - if (strcmp(fmt, "sam") == 0) { - format->category = sequence_data; - format->format = sam; - format->compression = no_compression; - format->compression_level = 0; - } else if (strcmp(fmt, "sam.gz") == 0) { - format->category = sequence_data; - format->format = sam; - format->compression = bgzf; - format->compression_level = -1; - } else if (strcmp(fmt, "bam") == 0) { - format->category = sequence_data; - format->format = bam; - format->compression = bgzf; - format->compression_level = -1; - } else if (strcmp(fmt, "cram") == 0) { - format->category = sequence_data; - format->format = cram; - format->compression = custom; - format->compression_level = -1; - } else if (strcmp(fmt, "vcf") == 0) { - format->category = variant_data; - format->format = vcf; - format->compression = no_compression; - format->compression_level = 0; - } else if (strcmp(fmt, "bcf") == 0) { - format->category = variant_data; - format->format = bcf; - format->compression = bgzf; - format->compression_level = -1; - } else if (strcmp(fmt, "fastq") == 0 || strcmp(fmt, "fq") == 0) { - format->category = sequence_data; - format->format = fastq_format; - format->compression = no_compression; - format->compression_level = 0; - } else if (strcmp(fmt, "fastq.gz") == 0 || strcmp(fmt, "fq.gz") == 0) { - format->category = sequence_data; - format->format = fastq_format; - format->compression = bgzf; - format->compression_level = 0; - } else if (strcmp(fmt, "fasta") == 0 || strcmp(fmt, "fa") == 0) { - format->category = sequence_data; - format->format = fasta_format; - format->compression = no_compression; - format->compression_level = 0; - } else if (strcmp(fmt, "fasta.gz") == 0 || strcmp(fmt, "fa.gz") == 0) { - format->category = sequence_data; - format->format = fasta_format; - format->compression = bgzf; - format->compression_level = 0; - } else { - return -1; - } - - return hts_parse_opt_list(format, cp); -} - - -/* - * Tokenise options as (key(=value)?,)*(key(=value)?)? - * NB: No provision for ',' appearing in the value! - * Add backslashing rules? - * - * This could be used as part of a general command line option parser or - * as a string concatenated onto the file open mode. - * - * Returns 0 on success - * -1 on failure. - */ -static int hts_process_opts(htsFile *fp, const char *opts) { - htsFormat fmt; - - fmt.specific = NULL; - if (hts_parse_opt_list(&fmt, opts) != 0) - return -1; - - if (hts_opt_apply(fp, fmt.specific) != 0) { - hts_opt_free(fmt.specific); - return -1; - } - - hts_opt_free(fmt.specific); - - return 0; -} - -static int hts_crypt4gh_redirect(const char *fn, const char *mode, - hFILE **hfile_ptr, htsFile *fp) { - hFILE *hfile1 = *hfile_ptr; - hFILE *hfile2 = NULL; - char fn_buf[512], *fn2 = fn_buf; - char mode2[102]; // Size set by sizeof(simple_mode) in hts_hopen() - const char *prefix = "crypt4gh:"; - size_t fn2_len = strlen(prefix) + strlen(fn) + 1; - int ret = -1; - - if (fn2_len > sizeof(fn_buf)) { - if (fn2_len >= INT_MAX) // Silence gcc format-truncation warning - return -1; - fn2 = malloc(fn2_len); - if (!fn2) return -1; - } - - // Reopen fn using the crypt4gh plug-in (if available) - snprintf(fn2, fn2_len, "%s%s", prefix, fn); - snprintf(mode2, sizeof(mode2), "%s%s", mode, strchr(mode, ':') ? "" : ":"); - hfile2 = hopen(fn2, mode2, "parent", hfile1, NULL); - if (hfile2) { - // Replace original hfile with the new one. The original is now - // enclosed within hfile2 - *hfile_ptr = hfile2; - ret = 0; - } - - if (fn2 != fn_buf) - free(fn2); - return ret; -} - -htsFile *hts_hopen(hFILE *hfile, const char *fn, const char *mode) -{ - hFILE *hfile_orig = hfile; - hFILE *hfile_cleanup = hfile; - htsFile *fp = (htsFile*)calloc(1, sizeof(htsFile)); - char simple_mode[101], *cp, *opts; - simple_mode[100] = '\0'; - - if (fp == NULL) goto error; - - fp->fn = strdup(fn); - fp->is_be = ed_is_big(); - - // Split mode into simple_mode,opts strings - if ((cp = strchr(mode, ','))) { - strncpy(simple_mode, mode, cp-mode <= 100 ? cp-mode : 100); - simple_mode[cp-mode] = '\0'; - opts = cp+1; - } else { - strncpy(simple_mode, mode, 100); - opts = NULL; - } - - if (strchr(simple_mode, 'r')) { - const int max_loops = 5; // Should be plenty - int loops = 0; - if (hts_detect_format2(hfile, fn, &fp->format) < 0) goto error; - - // Deal with formats that re-direct an underlying file via a plug-in. - // Loops as we may have crypt4gh served via htsget, or - // crypt4gh-in-crypt4gh. - - while (fp->format.format == htsget || - fp->format.format == hts_crypt4gh_format) { - // Ensure we don't get stuck in an endless redirect loop - if (++loops > max_loops) { - errno = ELOOP; - goto error; - } - - if (fp->format.format == htsget) { - hFILE *hfile2 = hopen_htsget_redirect(hfile, simple_mode); - if (hfile2 == NULL) goto error; - - if (hfile != hfile_cleanup) { - // Close the result of an earlier redirection - hclose_abruptly(hfile); - } - - hfile = hfile2; - } - else if (fp->format.format == hts_crypt4gh_format) { - int should_preserve = (hfile == hfile_orig); - int update_cleanup = (hfile == hfile_cleanup); - if (hts_crypt4gh_redirect(fn, simple_mode, &hfile, fp) < 0) - goto error; - if (should_preserve) { - // The original hFILE is now contained in a crypt4gh - // wrapper. Should we need to close the wrapper due - // to a later error, we need to prevent the wrapped - // handle from being closed as the caller will see - // this function return NULL and try to clean up itself. - hfile_orig->preserve = 1; - } - if (update_cleanup) { - // Update handle to close at the end if redirected by htsget - hfile_cleanup = hfile; - } - } - - // Re-detect format against the result of the redirection - if (hts_detect_format2(hfile, fn, &fp->format) < 0) goto error; - } - } - else if (strchr(simple_mode, 'w') || strchr(simple_mode, 'a')) { - htsFormat *fmt = &fp->format; - fp->is_write = 1; - - if (strchr(simple_mode, 'b')) fmt->format = binary_format; - else if (strchr(simple_mode, 'c')) fmt->format = cram; - else if (strchr(simple_mode, 'f')) fmt->format = fastq_format; - else if (strchr(simple_mode, 'F')) fmt->format = fasta_format; - else fmt->format = text_format; - - if (strchr(simple_mode, 'z')) fmt->compression = bgzf; - else if (strchr(simple_mode, 'g')) fmt->compression = gzip; - else if (strchr(simple_mode, 'u')) fmt->compression = no_compression; - else { - // No compression mode specified, set to the default for the format - switch (fmt->format) { - case binary_format: fmt->compression = bgzf; break; - case cram: fmt->compression = custom; break; - case fastq_format: fmt->compression = no_compression; break; - case fasta_format: fmt->compression = no_compression; break; - case text_format: fmt->compression = no_compression; break; - default: abort(); - } - } - - // Fill in category (if determinable; e.g. 'b' could be BAM or BCF) - fmt->category = format_category(fmt->format); - - fmt->version.major = fmt->version.minor = -1; - fmt->compression_level = -1; - fmt->specific = NULL; - } - else { errno = EINVAL; goto error; } - - switch (fp->format.format) { - case binary_format: - case bam: - case bcf: - fp->fp.bgzf = bgzf_hopen(hfile, simple_mode); - if (fp->fp.bgzf == NULL) goto error; - fp->is_bin = fp->is_bgzf = 1; - break; - - case cram: - fp->fp.cram = cram_dopen(hfile, fn, simple_mode); - if (fp->fp.cram == NULL) goto error; - if (!fp->is_write) - cram_set_option(fp->fp.cram, CRAM_OPT_DECODE_MD, -1); // auto - fp->is_cram = 1; - break; - - case empty_format: - case text_format: - case bed: - case fasta_format: - case fastq_format: - case sam: - case vcf: - if (fp->format.compression != no_compression) { - fp->fp.bgzf = bgzf_hopen(hfile, simple_mode); - if (fp->fp.bgzf == NULL) goto error; - fp->is_bgzf = 1; - } - else - fp->fp.hfile = hfile; - break; - - default: - errno = EFTYPE; - goto error; - } - - if (opts) - hts_process_opts(fp, opts); - - // Allow original file to close if it was preserved earlier by crypt4gh - hfile_orig->preserve = 0; - - // If redirecting via htsget, close the original hFILE now (pedantically - // we would instead close it in hts_close(), but this a simplifying - // optimisation) - if (hfile != hfile_cleanup) hclose_abruptly(hfile_cleanup); - - return fp; - -error: - hts_log_error("Failed to open file %s", fn); - - // If redirecting, close the failed redirection hFILE that we have opened - if (hfile != hfile_orig) hclose_abruptly(hfile); - hfile_orig->preserve = 0; // Allow caller to close the original hfile - - if (fp) { - free(fp->fn); - free(fp->fn_aux); - free(fp); - } - return NULL; -} - -int hts_close(htsFile *fp) -{ - int ret = 0, save; - if (!fp) { - errno = EINVAL; - return -1; - } - - switch (fp->format.format) { - case binary_format: - case bam: - case bcf: - ret = bgzf_close(fp->fp.bgzf); - break; - - case cram: - if (!fp->is_write) { - switch (cram_eof(fp->fp.cram)) { - case 2: - hts_log_warning("EOF marker is absent. The input is probably truncated"); - break; - case 0: /* not at EOF, but may not have wanted all seqs */ - default: /* case 1, expected EOF */ - break; - } - } - ret = cram_close(fp->fp.cram); - break; - - case empty_format: - case text_format: - case bed: - case fasta_format: - case fastq_format: - case sam: - case vcf: - if (fp->format.format == sam) - ret = sam_state_destroy(fp); - else if (fp->format.format == fastq_format || - fp->format.format == fasta_format) - fastq_state_destroy(fp); - - if (fp->format.compression != no_compression) - ret |= bgzf_close(fp->fp.bgzf); - else - ret |= hclose(fp->fp.hfile); - break; - - default: - ret = -1; - break; - } - - save = errno; - sam_hdr_destroy(fp->bam_header); - hts_idx_destroy(fp->idx); - hts_filter_free(fp->filter); - free(fp->fn); - free(fp->fn_aux); - free(fp->line.s); - free(fp); - errno = save; - return ret; -} - -int hts_flush(htsFile *fp) -{ - if (fp == NULL) return 0; - - switch (fp->format.format) { - case binary_format: - case bam: - case bcf: - return bgzf_flush(fp->fp.bgzf); - - case cram: - return cram_flush(fp->fp.cram); - - case empty_format: - case text_format: - case bed: - case fasta_format: - case fastq_format: - case sam: - case vcf: - if (fp->format.compression != no_compression) - return bgzf_flush(fp->fp.bgzf); - else - return hflush(fp->fp.hfile); - - default: - break; - } - - return 0; -} - -const htsFormat *hts_get_format(htsFile *fp) -{ - return fp? &fp->format : NULL; -} - -const char *hts_format_file_extension(const htsFormat *format) { - if (!format) - return "?"; - - switch (format->format) { - case sam: return "sam"; - case bam: return "bam"; - case bai: return "bai"; - case cram: return "cram"; - case crai: return "crai"; - case vcf: return "vcf"; - case bcf: return "bcf"; - case csi: return "csi"; - case fai_format: return "fai"; - case fqi_format: return "fqi"; - case gzi: return "gzi"; - case tbi: return "tbi"; - case bed: return "bed"; - case d4_format: return "d4"; - case fasta_format: return "fa"; - case fastq_format: return "fq"; - default: return "?"; - } -} - -static hFILE *hts_hfile(htsFile *fp) { - switch (fp->format.format) { - case binary_format:// fall through - case bcf: // fall through - case bam: return bgzf_hfile(fp->fp.bgzf); - case cram: return cram_hfile(fp->fp.cram); - case text_format: return fp->fp.hfile; - case vcf: // fall through - case fastq_format: // fall through - case fasta_format: // fall through - case sam: return fp->format.compression != no_compression - ? bgzf_hfile(fp->fp.bgzf) - : fp->fp.hfile; - default: return NULL; - } -} - -int hts_set_opt(htsFile *fp, enum hts_fmt_option opt, ...) { - int r; - va_list args; - - switch (opt) { - case HTS_OPT_NTHREADS: { - va_start(args, opt); - int nthreads = va_arg(args, int); - va_end(args); - return hts_set_threads(fp, nthreads); - } - - case HTS_OPT_BLOCK_SIZE: { - hFILE *hf = hts_hfile(fp); - - if (hf) { - va_start(args, opt); - if (hfile_set_blksize(hf, va_arg(args, int)) != 0) - hts_log_warning("Failed to change block size"); - va_end(args); - } - else { - // To do - implement for vcf/bcf. - hts_log_warning("Cannot change block size for this format"); - } - - return 0; - } - - case HTS_OPT_THREAD_POOL: { - va_start(args, opt); - htsThreadPool *p = va_arg(args, htsThreadPool *); - va_end(args); - return hts_set_thread_pool(fp, p); - } - - case HTS_OPT_CACHE_SIZE: { - va_start(args, opt); - int cache_size = va_arg(args, int); - va_end(args); - hts_set_cache_size(fp, cache_size); - return 0; - } - - case FASTQ_OPT_CASAVA: - case FASTQ_OPT_RNUM: - case FASTQ_OPT_NAME2: - if (fp->format.format == fastq_format || - fp->format.format == fasta_format) - return fastq_state_set(fp, opt); - return 0; - - case FASTQ_OPT_AUX: - if (fp->format.format == fastq_format || - fp->format.format == fasta_format) { - va_start(args, opt); - char *list = va_arg(args, char *); - va_end(args); - return fastq_state_set(fp, opt, list); - } - return 0; - - case FASTQ_OPT_BARCODE: - if (fp->format.format == fastq_format || - fp->format.format == fasta_format) { - va_start(args, opt); - char *bc = va_arg(args, char *); - va_end(args); - return fastq_state_set(fp, opt, bc); - } - return 0; - - // Options below here flow through to cram_set_voption - case HTS_OPT_COMPRESSION_LEVEL: { - va_start(args, opt); - int level = va_arg(args, int); - va_end(args); - if (fp->is_bgzf) - fp->fp.bgzf->compress_level = level; - else if (fp->format.format == cram) - return cram_set_option(fp->fp.cram, opt, level); - return 0; - } - - case HTS_OPT_FILTER: { - va_start(args, opt); - char *expr = va_arg(args, char *); - va_end(args); - return hts_set_filter_expression(fp, expr); - } - - case HTS_OPT_PROFILE: { - va_start(args, opt); - enum hts_profile_option prof = va_arg(args, int); - va_end(args); - if (fp->is_bgzf) { - switch (prof) { -#ifdef HAVE_LIBDEFLATE - case HTS_PROFILE_FAST: fp->fp.bgzf->compress_level = 2; break; - case HTS_PROFILE_NORMAL: fp->fp.bgzf->compress_level = -1; break; - case HTS_PROFILE_SMALL: fp->fp.bgzf->compress_level = 10; break; - case HTS_PROFILE_ARCHIVE: fp->fp.bgzf->compress_level = 12; break; -#else - case HTS_PROFILE_FAST: fp->fp.bgzf->compress_level = 1; break; - case HTS_PROFILE_NORMAL: fp->fp.bgzf->compress_level = -1; break; - case HTS_PROFILE_SMALL: fp->fp.bgzf->compress_level = 8; break; - case HTS_PROFILE_ARCHIVE: fp->fp.bgzf->compress_level = 9; break; -#endif - } - } // else CRAM manages this in its own way - break; - } - - default: - break; - } - - if (fp->format.format != cram) - return 0; - - va_start(args, opt); - r = cram_set_voption(fp->fp.cram, opt, args); - va_end(args); - - return r; -} - -BGZF *hts_get_bgzfp(htsFile *fp); - -int hts_set_threads(htsFile *fp, int n) -{ - if (fp->format.format == sam) { - return sam_set_threads(fp, n); - } else if (fp->format.compression == bgzf) { - return bgzf_mt(hts_get_bgzfp(fp), n, 256/*unused*/); - } else if (fp->format.format == cram) { - return hts_set_opt(fp, CRAM_OPT_NTHREADS, n); - } - else return 0; -} - -int hts_set_thread_pool(htsFile *fp, htsThreadPool *p) { - if (fp->format.format == sam || fp->format.format == text_format) { - return sam_set_thread_pool(fp, p); - } else if (fp->format.compression == bgzf) { - return bgzf_thread_pool(hts_get_bgzfp(fp), p->pool, p->qsize); - } else if (fp->format.format == cram) { - return hts_set_opt(fp, CRAM_OPT_THREAD_POOL, p); - } - else return 0; -} - -void hts_set_cache_size(htsFile *fp, int n) -{ - if (fp->format.compression == bgzf) - bgzf_set_cache_size(hts_get_bgzfp(fp), n); -} - -int hts_set_fai_filename(htsFile *fp, const char *fn_aux) -{ - free(fp->fn_aux); - if (fn_aux) { - fp->fn_aux = strdup(fn_aux); - if (fp->fn_aux == NULL) return -1; - } - else fp->fn_aux = NULL; - - if (fp->format.format == cram) - if (cram_set_option(fp->fp.cram, CRAM_OPT_REFERENCE, fp->fn_aux)) - return -1; - - return 0; -} - -int hts_set_filter_expression(htsFile *fp, const char *expr) -{ - if (fp->filter) - hts_filter_free(fp->filter); - - if (!expr) - return 0; - - return (fp->filter = hts_filter_init(expr)) - ? 0 : -1; -} - -hFILE *hts_open_tmpfile(const char *fname, const char *mode, kstring_t *tmpname) -{ - int pid = (int) getpid(); - unsigned ptr = (uintptr_t) tmpname; - int n = 0; - hFILE *fp = NULL; - - do { - // Attempt to further uniquify the temporary filename - unsigned t = ((unsigned) time(NULL)) ^ ((unsigned) clock()) ^ ptr; - n++; - - ks_clear(tmpname); - if (ksprintf(tmpname, "%s.tmp_%d_%d_%u", fname, pid, n, t) < 0) break; - - fp = hopen(tmpname->s, mode); - } while (fp == NULL && errno == EEXIST && n < 100); - - return fp; -} - -// For VCF/BCF backward sweeper. Not exposing these functions because their -// future is uncertain. Things will probably have to change with hFILE... -BGZF *hts_get_bgzfp(htsFile *fp) -{ - if (fp->is_bgzf) - return fp->fp.bgzf; - else - return NULL; -} -int hts_useek(htsFile *fp, off_t uoffset, int where) -{ - if (fp->is_bgzf) - return bgzf_useek(fp->fp.bgzf, uoffset, where); - else - return (hseek(fp->fp.hfile, uoffset, SEEK_SET) >= 0)? 0 : -1; -} -off_t hts_utell(htsFile *fp) -{ - if (fp->is_bgzf) - return bgzf_utell(fp->fp.bgzf); - else - return htell(fp->fp.hfile); -} - -int hts_getline(htsFile *fp, int delimiter, kstring_t *str) -{ - int ret; - if (! (delimiter == KS_SEP_LINE || delimiter == '\n')) { - hts_log_error("Unexpected delimiter %d", delimiter); - abort(); - } - - switch (fp->format.compression) { - case no_compression: - str->l = 0; - ret = kgetline2(str, (kgets_func2 *) hgetln, fp->fp.hfile); - if (ret >= 0) ret = (str->l <= INT_MAX)? (int) str->l : INT_MAX; - else if (herrno(fp->fp.hfile)) ret = -2, errno = herrno(fp->fp.hfile); - else ret = -1; - break; - - case gzip: - case bgzf: - ret = bgzf_getline(fp->fp.bgzf, '\n', str); - break; - - default: - abort(); - } - - ++fp->lineno; - return ret; -} - -char **hts_readlist(const char *string, int is_file, int *_n) -{ - unsigned int m = 0, n = 0; - char **s = 0, **s_new; - if ( is_file ) - { - BGZF *fp = bgzf_open(string, "r"); - if ( !fp ) return NULL; - - kstring_t str; - int ret; - str.s = 0; str.l = str.m = 0; - while ((ret = bgzf_getline(fp, '\n', &str)) >= 0) - { - if (str.l == 0) continue; - if (hts_resize(char*, n + 1, &m, &s, 0) < 0) - goto err; - s[n] = strdup(str.s); - if (!s[n]) - goto err; - n++; - } - if (ret < -1) // Read error - goto err; - bgzf_close(fp); - free(str.s); - } - else - { - const char *q = string, *p = string; - while ( 1 ) - { - if (*p == ',' || *p == 0) - { - if (hts_resize(char*, n + 1, &m, &s, 0) < 0) - goto err; - s[n] = (char*)calloc(p - q + 1, 1); - if (!s[n]) - goto err; - strncpy(s[n++], q, p - q); - q = p + 1; - } - if ( !*p ) break; - p++; - } - } - // Try to shrink s to the minimum size needed - s_new = (char**)realloc(s, n * sizeof(char*)); - if (!s_new) - goto err; - - s = s_new; - assert(n < INT_MAX); // hts_resize() should ensure this - *_n = n; - return s; - - err: - for (m = 0; m < n; m++) - free(s[m]); - free(s); - return NULL; -} - -char **hts_readlines(const char *fn, int *_n) -{ - unsigned int m = 0, n = 0; - char **s = 0, **s_new; - BGZF *fp = bgzf_open(fn, "r"); - if ( fp ) { // read from file - kstring_t str; - int ret; - str.s = 0; str.l = str.m = 0; - while ((ret = bgzf_getline(fp, '\n', &str)) >= 0) { - if (str.l == 0) continue; - if (hts_resize(char *, n + 1, &m, &s, 0) < 0) - goto err; - s[n] = strdup(str.s); - if (!s[n]) - goto err; - n++; - } - if (ret < -1) // Read error - goto err; - bgzf_close(fp); - free(str.s); - } else if (*fn == ':') { // read from string - const char *q, *p; - for (q = p = fn + 1;; ++p) - if (*p == ',' || *p == 0) { - if (hts_resize(char *, n + 1, &m, &s, 0) < 0) - goto err; - s[n] = (char*)calloc(p - q + 1, 1); - if (!s[n]) - goto err; - strncpy(s[n++], q, p - q); - q = p + 1; - if (*p == 0) break; - } - } else return 0; - // Try to shrink s to the minimum size needed - s_new = (char**)realloc(s, n * sizeof(char*)); - if (!s_new) - goto err; - - s = s_new; - assert(n < INT_MAX); // hts_resize() should ensure this - *_n = n; - return s; - - err: - for (m = 0; m < n; m++) - free(s[m]); - free(s); - return NULL; -} - -// DEPRECATED: To be removed in a future HTSlib release -int hts_file_type(const char *fname) -{ - int len = strlen(fname); - if ( !strcasecmp(".vcf.gz",fname+len-7) ) return FT_VCF_GZ; - if ( !strcasecmp(".vcf",fname+len-4) ) return FT_VCF; - if ( !strcasecmp(".bcf",fname+len-4) ) return FT_BCF_GZ; - if ( !strcmp("-",fname) ) return FT_STDIN; - - hFILE *f = hopen(fname, "r"); - if (f == NULL) return 0; - - htsFormat fmt; - if (hts_detect_format2(f, fname, &fmt) < 0) { hclose_abruptly(f); return 0; } - if (hclose(f) < 0) return 0; - - switch (fmt.format) { - case vcf: return (fmt.compression == no_compression)? FT_VCF : FT_VCF_GZ; - case bcf: return (fmt.compression == no_compression)? FT_BCF : FT_BCF_GZ; - default: return 0; - } -} - -int hts_check_EOF(htsFile *fp) -{ - if (fp->format.compression == bgzf) - return bgzf_check_EOF(hts_get_bgzfp(fp)); - else if (fp->format.format == cram) - return cram_check_EOF(fp->fp.cram); - else - return 3; -} - - -/**************** - *** Indexing *** - ****************/ - -#define HTS_MIN_MARKER_DIST 0x10000 - -// Finds the special meta bin -// ((1<<(3 * n_lvls + 3)) - 1) / 7 + 1 -#define META_BIN(idx) ((idx)->n_bins + 1) - -#define pair64_lt(a,b) ((a).u < (b).u) -#define pair64max_lt(a,b) ((a).u < (b).u || \ - ((a).u == (b).u && (a).max < (b).max)) - -KSORT_INIT_STATIC(_off, hts_pair64_t, pair64_lt) -KSORT_INIT_STATIC(_off_max, hts_pair64_max_t, pair64max_lt) - -typedef struct { - int32_t m, n; - uint64_t loff; - hts_pair64_t *list; -} bins_t; - -KHASH_MAP_INIT_INT(bin, bins_t) -typedef khash_t(bin) bidx_t; - -typedef struct { - hts_pos_t n, m; - uint64_t *offset; -} lidx_t; - -struct hts_idx_t { - int fmt, min_shift, n_lvls, n_bins; - uint32_t l_meta; - int32_t n, m; - uint64_t n_no_coor; - bidx_t **bidx; - lidx_t *lidx; - uint8_t *meta; // MUST have a terminating NUL on the end - int tbi_n, last_tbi_tid; - struct { - uint32_t last_bin, save_bin; - hts_pos_t last_coor; - int last_tid, save_tid, finished; - uint64_t last_off, save_off; - uint64_t off_beg, off_end; - uint64_t n_mapped, n_unmapped; - } z; // keep internal states -}; - -static char * idx_format_name(int fmt) { - switch (fmt) { - case HTS_FMT_CSI: return "csi"; - case HTS_FMT_BAI: return "bai"; - case HTS_FMT_TBI: return "tbi"; - case HTS_FMT_CRAI: return "crai"; - default: return "unknown"; - } -} - -#ifdef DEBUG_INDEX -static void idx_dump(const hts_idx_t *idx) { - int i; - int64_t j; - - if (!idx) fprintf(stderr, "Null index\n"); - - fprintf(stderr, "format='%s', min_shift=%d, n_lvls=%d, n_bins=%d, l_meta=%u ", - idx_format_name(idx->fmt), idx->min_shift, idx->n_lvls, idx->n_bins, idx->l_meta); - fprintf(stderr, "n=%d, m=%d, n_no_coor=%"PRIu64"\n", idx->n, idx->m, idx->n_no_coor); - for (i = 0; i < idx->n; i++) { - bidx_t *bidx = idx->bidx[i]; - lidx_t *lidx = &idx->lidx[i]; - if (bidx) { - fprintf(stderr, "======== BIN Index - tid=%d, n_buckets=%d, size=%d\n", i, bidx->n_buckets, bidx->size); - int b; - for (b = 0; b < META_BIN(idx); b++) { - khint_t k; - if ((k = kh_get(bin, bidx, b)) != kh_end(bidx)) { - bins_t *entries = &kh_value(bidx, k); - int l = hts_bin_level(b); - int64_t bin_width = 1LL << ((idx->n_lvls - l) * 3 + idx->min_shift); - fprintf(stderr, "\tbin=%d, level=%d, parent=%d, n_chunks=%d, loff=%"PRIu64", interval=[%"PRId64" - %"PRId64"]\n", - b, l, hts_bin_parent(b), entries->n, entries->loff, (b-hts_bin_first(l))*bin_width+1, (b+1-hts_bin_first(l))*bin_width); - for (j = 0; j < entries->n; j++) - fprintf(stderr, "\t\tchunk=%"PRId64", u=%"PRIu64", v=%"PRIu64"\n", j, entries->list[j].u, entries->list[j].v); - } - } - } - if (lidx) { - fprintf(stderr, "======== LINEAR Index - tid=%d, n_values=%"PRId64"\n", i, lidx->n); - for (j = 0; j < lidx->n; j++) { - fprintf(stderr, "\t\tentry=%"PRId64", offset=%"PRIu64", interval=[%"PRId64" - %"PRId64"]\n", - j, lidx->offset[j], j*(1<min_shift)+1, (j+1)*(1<min_shift)); - } - } - } -} -#endif - -static inline int insert_to_b(bidx_t *b, int bin, uint64_t beg, uint64_t end) -{ - khint_t k; - bins_t *l; - int absent; - k = kh_put(bin, b, bin, &absent); - if (absent < 0) return -1; // Out of memory - l = &kh_value(b, k); - if (absent) { - l->m = 1; l->n = 0; - l->list = (hts_pair64_t*)calloc(l->m, sizeof(hts_pair64_t)); - if (!l->list) { - kh_del(bin, b, k); - return -1; - } - } else if (l->n == l->m) { - uint32_t new_m = l->m ? l->m << 1 : 1; - hts_pair64_t *new_list = realloc(l->list, new_m * sizeof(hts_pair64_t)); - if (!new_list) return -1; - l->list = new_list; - l->m = new_m; - } - l->list[l->n].u = beg; - l->list[l->n++].v = end; - return 0; -} - -static inline int insert_to_l(lidx_t *l, int64_t _beg, int64_t _end, uint64_t offset, int min_shift) -{ - int i; - hts_pos_t beg, end; - beg = _beg >> min_shift; - end = (_end - 1) >> min_shift; - if (l->m < end + 1) { - size_t new_m = l->m * 2 > end + 1 ? l->m * 2 : end + 1; - uint64_t *new_offset; - - new_offset = (uint64_t*)realloc(l->offset, new_m * sizeof(uint64_t)); - if (!new_offset) return -1; - - // fill unused memory with (uint64_t)-1 - memset(new_offset + l->m, 0xff, sizeof(uint64_t) * (new_m - l->m)); - l->m = new_m; - l->offset = new_offset; - } - for (i = beg; i <= end; ++i) { - if (l->offset[i] == (uint64_t)-1) l->offset[i] = offset; - } - if (l->n < end + 1) l->n = end + 1; - return 0; -} - -hts_idx_t *hts_idx_init(int n, int fmt, uint64_t offset0, int min_shift, int n_lvls) -{ - hts_idx_t *idx; - idx = (hts_idx_t*)calloc(1, sizeof(hts_idx_t)); - if (idx == NULL) return NULL; - idx->fmt = fmt; - idx->min_shift = min_shift; - idx->n_lvls = n_lvls; - idx->n_bins = ((1<<(3 * n_lvls + 3)) - 1) / 7; - idx->z.save_tid = idx->z.last_tid = -1; - idx->z.save_bin = idx->z.last_bin = 0xffffffffu; - idx->z.save_off = idx->z.last_off = idx->z.off_beg = idx->z.off_end = offset0; - idx->z.last_coor = 0xffffffffu; - if (n) { - idx->n = idx->m = n; - idx->bidx = (bidx_t**)calloc(n, sizeof(bidx_t*)); - if (idx->bidx == NULL) { free(idx); return NULL; } - idx->lidx = (lidx_t*) calloc(n, sizeof(lidx_t)); - if (idx->lidx == NULL) { free(idx->bidx); free(idx); return NULL; } - } - idx->tbi_n = -1; - idx->last_tbi_tid = -1; - return idx; -} - -static void update_loff(hts_idx_t *idx, int i, int free_lidx) -{ - bidx_t *bidx = idx->bidx[i]; - lidx_t *lidx = &idx->lidx[i]; - khint_t k; - int l; - // the last entry is always valid - for (l=lidx->n-2; l >= 0; l--) { - if (lidx->offset[l] == (uint64_t)-1) - lidx->offset[l] = lidx->offset[l+1]; - } - if (bidx == 0) return; - for (k = kh_begin(bidx); k != kh_end(bidx); ++k) // set loff - if (kh_exist(bidx, k)) - { - if ( kh_key(bidx, k) < idx->n_bins ) - { - int bot_bin = hts_bin_bot(kh_key(bidx, k), idx->n_lvls); - // disable linear index if bot_bin out of bounds - kh_val(bidx, k).loff = bot_bin < lidx->n ? lidx->offset[bot_bin] : 0; - } - else - kh_val(bidx, k).loff = 0; - } - if (free_lidx) { - free(lidx->offset); - lidx->m = lidx->n = 0; - lidx->offset = 0; - } -} - -static int compress_binning(hts_idx_t *idx, int i) -{ - bidx_t *bidx = idx->bidx[i]; - khint_t k; - int l, m; - if (bidx == 0) return 0; - // merge a bin to its parent if the bin is too small - for (l = idx->n_lvls; l > 0; --l) { - unsigned start = hts_bin_first(l); - for (k = kh_begin(bidx); k != kh_end(bidx); ++k) { - bins_t *p, *q; - if (!kh_exist(bidx, k) || kh_key(bidx, k) >= idx->n_bins || kh_key(bidx, k) < start) continue; - p = &kh_value(bidx, k); - if (l < idx->n_lvls && p->n > 1) ks_introsort(_off, p->n, p->list); - if ((p->list[p->n - 1].v>>16) - (p->list[0].u>>16) < HTS_MIN_MARKER_DIST) { - khint_t kp; - kp = kh_get(bin, bidx, hts_bin_parent(kh_key(bidx, k))); - if (kp == kh_end(bidx)) continue; - q = &kh_val(bidx, kp); - if (q->n + p->n > q->m) { - uint32_t new_m = q->n + p->n; - hts_pair64_t *new_list; - kroundup32(new_m); - if (new_m > INT32_MAX) return -1; // Limited by index format - new_list = realloc(q->list, new_m * sizeof(*new_list)); - if (!new_list) return -1; - q->m = new_m; - q->list = new_list; - } - memcpy(q->list + q->n, p->list, p->n * sizeof(hts_pair64_t)); - q->n += p->n; - free(p->list); - kh_del(bin, bidx, k); - } - } - } - k = kh_get(bin, bidx, 0); - if (k != kh_end(bidx)) ks_introsort(_off, kh_val(bidx, k).n, kh_val(bidx, k).list); - // merge adjacent chunks that start from the same BGZF block - for (k = kh_begin(bidx); k != kh_end(bidx); ++k) { - bins_t *p; - if (!kh_exist(bidx, k) || kh_key(bidx, k) >= idx->n_bins) continue; - p = &kh_value(bidx, k); - for (l = 1, m = 0; l < p->n; ++l) { - if (p->list[m].v>>16 >= p->list[l].u>>16) { - if (p->list[m].v < p->list[l].v) p->list[m].v = p->list[l].v; - } else p->list[++m] = p->list[l]; - } - p->n = m + 1; - } - return 0; -} - -int hts_idx_finish(hts_idx_t *idx, uint64_t final_offset) -{ - int i, ret = 0; - if (idx == NULL || idx->z.finished) return 0; // do not run this function on an empty index or multiple times - if (idx->z.save_tid >= 0) { - ret |= insert_to_b(idx->bidx[idx->z.save_tid], idx->z.save_bin, idx->z.save_off, final_offset); - ret |= insert_to_b(idx->bidx[idx->z.save_tid], META_BIN(idx), idx->z.off_beg, final_offset); - ret |= insert_to_b(idx->bidx[idx->z.save_tid], META_BIN(idx), idx->z.n_mapped, idx->z.n_unmapped); - } - for (i = 0; i < idx->n; ++i) { - update_loff(idx, i, (idx->fmt == HTS_FMT_CSI)); - ret |= compress_binning(idx, i); - } - idx->z.finished = 1; - - return ret; -} - -int hts_idx_check_range(hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end) -{ - int64_t maxpos = (int64_t) 1 << (idx->min_shift + idx->n_lvls * 3); - if (tid < 0 || (beg <= maxpos && end <= maxpos)) - return 0; - - if (idx->fmt == HTS_FMT_CSI) { - hts_log_error("Region %"PRIhts_pos"..%"PRIhts_pos" " - "cannot be stored in a csi index with these parameters. " - "Please use a larger min_shift or depth", - beg, end); - } else { - hts_log_error("Region %"PRIhts_pos"..%"PRIhts_pos - " cannot be stored in a %s index. Try using a csi index", - beg, end, idx_format_name(idx->fmt)); - } - errno = ERANGE; - return -1; -} - -int hts_idx_push(hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end, uint64_t offset, int is_mapped) -{ - int bin; - if (tid<0) beg = -1, end = 0; - if (hts_idx_check_range(idx, tid, beg, end) < 0) - return -1; - if (tid >= idx->m) { // enlarge the index - uint32_t new_m = idx->m * 2 > tid + 1 ? idx->m * 2 : tid + 1; - bidx_t **new_bidx; - lidx_t *new_lidx; - - new_bidx = (bidx_t**)realloc(idx->bidx, new_m * sizeof(bidx_t*)); - if (!new_bidx) return -1; - idx->bidx = new_bidx; - - new_lidx = (lidx_t*) realloc(idx->lidx, new_m * sizeof(lidx_t)); - if (!new_lidx) return -1; - idx->lidx = new_lidx; - - memset(&idx->bidx[idx->m], 0, (new_m - idx->m) * sizeof(bidx_t*)); - memset(&idx->lidx[idx->m], 0, (new_m - idx->m) * sizeof(lidx_t)); - idx->m = new_m; - } - if (idx->n < tid + 1) idx->n = tid + 1; - if (idx->z.finished) return 0; - if (idx->z.last_tid != tid || (idx->z.last_tid >= 0 && tid < 0)) { // change of chromosome - if ( tid>=0 && idx->n_no_coor ) - { - hts_log_error("NO_COOR reads not in a single block at the end %d %d", tid, idx->z.last_tid); - return -1; - } - if (tid>=0 && idx->bidx[tid] != 0) - { - hts_log_error("Chromosome blocks not continuous"); - return -1; - } - idx->z.last_tid = tid; - idx->z.last_bin = 0xffffffffu; - } else if (tid >= 0 && idx->z.last_coor > beg) { // test if positions are out of order - hts_log_error("Unsorted positions on sequence #%d: %"PRIhts_pos" followed by %"PRIhts_pos, tid+1, idx->z.last_coor+1, beg+1); - return -1; - } - if (end < beg) { - // Malformed ranges are errors. (Empty ranges (beg==end) are unusual but acceptable.) - hts_log_error("Invalid record on sequence #%d: end %"PRId64" < begin %"PRId64, tid+1, end, beg+1); - return -1; - } - if ( tid>=0 ) - { - if (idx->bidx[tid] == 0) idx->bidx[tid] = kh_init(bin); - // shoehorn [-1,0) (VCF POS=0) into the leftmost bottom-level bin - if (beg < 0) beg = 0; - if (end <= 0) end = 1; - // idx->z.last_off points to the start of the current record - if (insert_to_l(&idx->lidx[tid], beg, end, - idx->z.last_off, idx->min_shift) < 0) return -1; - } - else idx->n_no_coor++; - bin = hts_reg2bin(beg, end, idx->min_shift, idx->n_lvls); - if ((int)idx->z.last_bin != bin) { // then possibly write the binning index - if (idx->z.save_bin != 0xffffffffu) { // save_bin==0xffffffffu only happens to the first record - if (insert_to_b(idx->bidx[idx->z.save_tid], idx->z.save_bin, - idx->z.save_off, idx->z.last_off) < 0) return -1; - } - if (idx->z.last_bin == 0xffffffffu && idx->z.save_bin != 0xffffffffu) { // change of chr; keep meta information - idx->z.off_end = idx->z.last_off; - if (insert_to_b(idx->bidx[idx->z.save_tid], META_BIN(idx), - idx->z.off_beg, idx->z.off_end) < 0) return -1; - if (insert_to_b(idx->bidx[idx->z.save_tid], META_BIN(idx), - idx->z.n_mapped, idx->z.n_unmapped) < 0) return -1; - idx->z.n_mapped = idx->z.n_unmapped = 0; - idx->z.off_beg = idx->z.off_end; - } - idx->z.save_off = idx->z.last_off; - idx->z.save_bin = idx->z.last_bin = bin; - idx->z.save_tid = tid; - } - if (is_mapped) ++idx->z.n_mapped; - else ++idx->z.n_unmapped; - idx->z.last_off = offset; - idx->z.last_coor = beg; - return 0; -} - -// Needed for TBI only. Ensure 'tid' with 'name' is in the index meta data. -// idx->meta needs to have been initialised first with an appropriate Tabix -// configuration via hts_idx_set_meta. -// -// NB number of references (first 4 bytes of tabix header) aren't in -// idx->meta, but held in idx->n instead. -int hts_idx_tbi_name(hts_idx_t *idx, int tid, const char *name) { - // Horrid - we have to map incoming tid to a tbi alternative tid. - // This is because TBI counts tids by "covered" refs while everything - // else counts by Nth SQ/contig record in header. - if (tid == idx->last_tbi_tid || tid < 0 || !name) - return idx->tbi_n; - - uint32_t len = strlen(name)+1; - uint8_t *tmp = (uint8_t *)realloc(idx->meta, idx->l_meta + len); - if (!tmp) - return -1; - - // Append name - idx->meta = tmp; - strcpy((char *)idx->meta + idx->l_meta, name); - idx->l_meta += len; - - // Update seq length - u32_to_le(le_to_u32(idx->meta+24)+len, idx->meta+24); - - idx->last_tbi_tid = tid; - return ++idx->tbi_n; -} - -// When doing samtools index we have a read_bam / hts_idx_push(bgzf_tell()) -// loop. idx->z.last_off is the previous bzgf_tell location, so we know -// the location the current bam record started at as well as where it ends. -// -// When building an index on the fly via a write_bam / hts_idx_push loop, -// this isn't quite identical as we may amend the virtual coord returned -// by bgzf_tell to the start of a new block if the next bam struct doesn't -// fit. It's essentially the same thing, but for bit-identical indices -// we need to amend the idx->z.last_off when we know we're starting a new -// block. -void hts_idx_amend_last(hts_idx_t *idx, uint64_t offset) -{ - idx->z.last_off = offset; -} - -void hts_idx_destroy(hts_idx_t *idx) -{ - khint_t k; - int i; - if (idx == 0) return; - - // For HTS_FMT_CRAI, idx actually points to a different type -- see sam.c - if (idx->fmt == HTS_FMT_CRAI) { - hts_cram_idx_t *cidx = (hts_cram_idx_t *) idx; - cram_index_free(cidx->cram); - free(cidx); - return; - } - - for (i = 0; i < idx->m; ++i) { - bidx_t *bidx = idx->bidx[i]; - free(idx->lidx[i].offset); - if (bidx == 0) continue; - for (k = kh_begin(bidx); k != kh_end(bidx); ++k) - if (kh_exist(bidx, k)) - free(kh_value(bidx, k).list); - kh_destroy(bin, bidx); - } - free(idx->bidx); free(idx->lidx); free(idx->meta); - free(idx); -} - -int hts_idx_fmt(hts_idx_t *idx) { - return idx->fmt; -} - -// The optimizer eliminates these ed_is_big() calls; still it would be good to -// TODO Determine endianness at configure- or compile-time - -static inline ssize_t HTS_RESULT_USED idx_write_int32(BGZF *fp, int32_t x) -{ - if (ed_is_big()) x = ed_swap_4(x); - return bgzf_write(fp, &x, sizeof x); -} - -static inline ssize_t HTS_RESULT_USED idx_write_uint32(BGZF *fp, uint32_t x) -{ - if (ed_is_big()) x = ed_swap_4(x); - return bgzf_write(fp, &x, sizeof x); -} - -static inline ssize_t HTS_RESULT_USED idx_write_uint64(BGZF *fp, uint64_t x) -{ - if (ed_is_big()) x = ed_swap_8(x); - return bgzf_write(fp, &x, sizeof x); -} - -static inline void swap_bins(bins_t *p) -{ - int i; - for (i = 0; i < p->n; ++i) { - ed_swap_8p(&p->list[i].u); - ed_swap_8p(&p->list[i].v); - } -} - -static int idx_save_core(const hts_idx_t *idx, BGZF *fp, int fmt) -{ - int32_t i, j; - - #define check(ret) if ((ret) < 0) return -1 - - // VCF TBI/CSI only writes IDs for non-empty bins (ie covered references) - // - // NOTE: CSI meta is undefined in spec, so this code has an assumption - // that we're only using it for Tabix data. - int nids = idx->n; - if (idx->meta && idx->l_meta >= 4 && le_to_u32(idx->meta) == TBX_VCF) { - for (i = nids = 0; i < idx->n; ++i) { - if (idx->bidx[i]) - nids++; - } - } - check(idx_write_int32(fp, nids)); - if (fmt == HTS_FMT_TBI && idx->l_meta) - check(bgzf_write(fp, idx->meta, idx->l_meta)); - - for (i = 0; i < idx->n; ++i) { - khint_t k; - bidx_t *bidx = idx->bidx[i]; - lidx_t *lidx = &idx->lidx[i]; - - // write binning index - if (nids == idx->n || bidx) - check(idx_write_int32(fp, bidx? kh_size(bidx) : 0)); - if (bidx) - for (k = kh_begin(bidx); k != kh_end(bidx); ++k) - if (kh_exist(bidx, k)) { - bins_t *p = &kh_value(bidx, k); - check(idx_write_uint32(fp, kh_key(bidx, k))); - if (fmt == HTS_FMT_CSI) check(idx_write_uint64(fp, p->loff)); - //int j;for(j=0;jn;++j)fprintf(stderr,"%d,%llx,%d,%llx:%llx\n",kh_key(bidx,k),kh_val(bidx, k).loff,j,p->list[j].u,p->list[j].v); - check(idx_write_int32(fp, p->n)); - for (j = 0; j < p->n; ++j) { - //fprintf(stderr, "\t%ld\t%ld\n", p->list[j].u, p->list[j].v); - check(idx_write_uint64(fp, p->list[j].u)); - check(idx_write_uint64(fp, p->list[j].v)); - } - } - - // write linear index - if (fmt != HTS_FMT_CSI) { - check(idx_write_int32(fp, lidx->n)); - for (j = 0; j < lidx->n; ++j) - check(idx_write_uint64(fp, lidx->offset[j])); - } - } - - check(idx_write_uint64(fp, idx->n_no_coor)); -#ifdef DEBUG_INDEX - idx_dump(idx); -#endif - - return 0; - #undef check -} - -int hts_idx_save(const hts_idx_t *idx, const char *fn, int fmt) -{ - int ret, save; - if (idx == NULL || fn == NULL) { errno = EINVAL; return -1; } - char *fnidx = (char*)calloc(1, strlen(fn) + 5); - if (fnidx == NULL) return -1; - - strcpy(fnidx, fn); - switch (fmt) { - case HTS_FMT_BAI: strcat(fnidx, ".bai"); break; - case HTS_FMT_CSI: strcat(fnidx, ".csi"); break; - case HTS_FMT_TBI: strcat(fnidx, ".tbi"); break; - default: abort(); - } - - ret = hts_idx_save_as(idx, fn, fnidx, fmt); - save = errno; - free(fnidx); - errno = save; - return ret; -} - -int hts_idx_save_as(const hts_idx_t *idx, const char *fn, const char *fnidx, int fmt) -{ - BGZF *fp; - - #define check(ret) if ((ret) < 0) goto fail - - if (fnidx == NULL) return hts_idx_save(idx, fn, fmt); - - fp = bgzf_open(fnidx, (fmt == HTS_FMT_BAI)? "wu" : "w"); - if (fp == NULL) return -1; - - if (fmt == HTS_FMT_CSI) { - check(bgzf_write(fp, "CSI\1", 4)); - check(idx_write_int32(fp, idx->min_shift)); - check(idx_write_int32(fp, idx->n_lvls)); - check(idx_write_uint32(fp, idx->l_meta)); - if (idx->l_meta) check(bgzf_write(fp, idx->meta, idx->l_meta)); - } else if (fmt == HTS_FMT_TBI) { - check(bgzf_write(fp, "TBI\1", 4)); - } else if (fmt == HTS_FMT_BAI) { - check(bgzf_write(fp, "BAI\1", 4)); - } else abort(); - - check(idx_save_core(idx, fp, fmt)); - - return bgzf_close(fp); - #undef check - -fail: - bgzf_close(fp); - return -1; -} - -static int idx_read_core(hts_idx_t *idx, BGZF *fp, int fmt) -{ - int32_t i, n, is_be; - is_be = ed_is_big(); - if (idx == NULL) return -4; - for (i = 0; i < idx->n; ++i) { - bidx_t *h; - lidx_t *l = &idx->lidx[i]; - uint32_t key; - int j, absent; - bins_t *p; - h = idx->bidx[i] = kh_init(bin); - if (bgzf_read(fp, &n, 4) != 4) return -1; - if (is_be) ed_swap_4p(&n); - if (n < 0) return -3; - for (j = 0; j < n; ++j) { - khint_t k; - if (bgzf_read(fp, &key, 4) != 4) return -1; - if (is_be) ed_swap_4p(&key); - k = kh_put(bin, h, key, &absent); - if (absent < 0) return -2; // No memory - if (absent == 0) return -3; // Duplicate bin number - p = &kh_val(h, k); - if (fmt == HTS_FMT_CSI) { - if (bgzf_read(fp, &p->loff, 8) != 8) return -1; - if (is_be) ed_swap_8p(&p->loff); - } else p->loff = 0; - if (bgzf_read(fp, &p->n, 4) != 4) return -1; - if (is_be) ed_swap_4p(&p->n); - if (p->n < 0) return -3; - if ((size_t) p->n > SIZE_MAX / sizeof(hts_pair64_t)) return -2; - p->m = p->n; - p->list = (hts_pair64_t*)malloc(p->m * sizeof(hts_pair64_t)); - if (p->list == NULL) return -2; - if (bgzf_read(fp, p->list, ((size_t) p->n)<<4) != ((size_t) p->n)<<4) return -1; - if (is_be) swap_bins(p); - } - if (fmt != HTS_FMT_CSI) { // load linear index - int j, k; - uint32_t x; - if (bgzf_read(fp, &x, 4) != 4) return -1; - if (is_be) ed_swap_4p(&x); - l->n = x; - if (l->n < 0) return -3; - if ((size_t) l->n > SIZE_MAX / sizeof(uint64_t)) return -2; - l->m = l->n; - l->offset = (uint64_t*)malloc(l->n * sizeof(uint64_t)); - if (l->offset == NULL) return -2; - if (bgzf_read(fp, l->offset, l->n << 3) != l->n << 3) return -1; - if (is_be) for (j = 0; j < l->n; ++j) ed_swap_8p(&l->offset[j]); - for (k = j = 0; j < l->n && l->offset[j] == 0; k = ++j); // stop at the first non-zero entry - for (j = l->n-1; j > k; j--) // fill missing values; may happen given older samtools and tabix - if (l->offset[j-1] == 0) l->offset[j-1] = l->offset[j]; - update_loff(idx, i, 0); - } - } - if (bgzf_read(fp, &idx->n_no_coor, 8) != 8) idx->n_no_coor = 0; - if (is_be) ed_swap_8p(&idx->n_no_coor); -#ifdef DEBUG_INDEX - idx_dump(idx); -#endif - - return 0; -} - -static hts_idx_t *idx_read(const char *fn) -{ - uint8_t magic[4]; - int i, is_be; - hts_idx_t *idx = NULL; - uint8_t *meta = NULL; - BGZF *fp = bgzf_open(fn, "r"); - if (fp == NULL) return NULL; - is_be = ed_is_big(); - if (bgzf_read(fp, magic, 4) != 4) goto fail; - - if (memcmp(magic, "CSI\1", 4) == 0) { - uint32_t x[3], n; - if (bgzf_read(fp, x, 12) != 12) goto fail; - if (is_be) for (i = 0; i < 3; ++i) ed_swap_4p(&x[i]); - if (x[2]) { - if (SIZE_MAX - x[2] < 1) goto fail; // Prevent possible overflow - if ((meta = (uint8_t*)malloc((size_t) x[2] + 1)) == NULL) goto fail; - if (bgzf_read(fp, meta, x[2]) != x[2]) goto fail; - // Prevent possible strlen past the end in tbx_index_load2 - meta[x[2]] = '\0'; - } - if (bgzf_read(fp, &n, 4) != 4) goto fail; - if (is_be) ed_swap_4p(&n); - if (n > INT32_MAX) goto fail; - if ((idx = hts_idx_init(n, HTS_FMT_CSI, 0, x[0], x[1])) == NULL) goto fail; - idx->l_meta = x[2]; - idx->meta = meta; - meta = NULL; - if (idx_read_core(idx, fp, HTS_FMT_CSI) < 0) goto fail; - } - else if (memcmp(magic, "TBI\1", 4) == 0) { - uint8_t x[8 * 4]; - uint32_t n; - // Read file header - if (bgzf_read(fp, x, sizeof(x)) != sizeof(x)) goto fail; - n = le_to_u32(&x[0]); // location of n_ref - if (n > INT32_MAX) goto fail; - if ((idx = hts_idx_init(n, HTS_FMT_TBI, 0, 14, 5)) == NULL) goto fail; - n = le_to_u32(&x[7*4]); // location of l_nm - if (n > UINT32_MAX - 29) goto fail; // Prevent possible overflow - idx->l_meta = 28 + n; - if ((idx->meta = (uint8_t*)malloc(idx->l_meta + 1)) == NULL) goto fail; - // copy format, col_seq, col_beg, col_end, meta, skip, l_nm - // N.B. left in little-endian byte order. - memcpy(idx->meta, &x[1*4], 28); - // Read in sequence names. - if (bgzf_read(fp, idx->meta + 28, n) != n) goto fail; - // Prevent possible strlen past the end in tbx_index_load2 - idx->meta[idx->l_meta] = '\0'; - if (idx_read_core(idx, fp, HTS_FMT_TBI) < 0) goto fail; - } - else if (memcmp(magic, "BAI\1", 4) == 0) { - uint32_t n; - if (bgzf_read(fp, &n, 4) != 4) goto fail; - if (is_be) ed_swap_4p(&n); - if (n > INT32_MAX) goto fail; - if ((idx = hts_idx_init(n, HTS_FMT_BAI, 0, 14, 5)) == NULL) goto fail; - if (idx_read_core(idx, fp, HTS_FMT_BAI) < 0) goto fail; - } - else { errno = EINVAL; goto fail; } - - bgzf_close(fp); - return idx; - -fail: - bgzf_close(fp); - hts_idx_destroy(idx); - free(meta); - return NULL; -} - -int hts_idx_set_meta(hts_idx_t *idx, uint32_t l_meta, uint8_t *meta, - int is_copy) -{ - uint8_t *new_meta = meta; - if (is_copy) { - size_t l = l_meta; - if (l > SIZE_MAX - 1) { - errno = ENOMEM; - return -1; - } - new_meta = malloc(l + 1); - if (!new_meta) return -1; - memcpy(new_meta, meta, l); - // Prevent possible strlen past the end in tbx_index_load2 - new_meta[l] = '\0'; - } - if (idx->meta) free(idx->meta); - idx->l_meta = l_meta; - idx->meta = new_meta; - return 0; -} - -uint8_t *hts_idx_get_meta(hts_idx_t *idx, uint32_t *l_meta) -{ - *l_meta = idx->l_meta; - return idx->meta; -} - -const char **hts_idx_seqnames(const hts_idx_t *idx, int *n, hts_id2name_f getid, void *hdr) -{ - if ( !idx || !idx->n ) - { - *n = 0; - return NULL; - } - - int tid = 0, i; - const char **names = (const char**) calloc(idx->n,sizeof(const char*)); - for (i=0; in; i++) - { - bidx_t *bidx = idx->bidx[i]; - if ( !bidx ) continue; - names[tid++] = getid(hdr,i); - } - *n = tid; - return names; -} - -int hts_idx_nseq(const hts_idx_t *idx) { - if (!idx) return -1; - return idx->n; -} - -int hts_idx_get_stat(const hts_idx_t* idx, int tid, uint64_t* mapped, uint64_t* unmapped) -{ - if (!idx) return -1; - if ( idx->fmt == HTS_FMT_CRAI ) { - *mapped = 0; *unmapped = 0; - return -1; - } - - bidx_t *h = idx->bidx[tid]; - if (!h) return -1; - khint_t k = kh_get(bin, h, META_BIN(idx)); - if (k != kh_end(h)) { - *mapped = kh_val(h, k).list[1].u; - *unmapped = kh_val(h, k).list[1].v; - return 0; - } else { - *mapped = 0; *unmapped = 0; - return -1; - } -} - -uint64_t hts_idx_get_n_no_coor(const hts_idx_t* idx) -{ - if (idx->fmt == HTS_FMT_CRAI) return 0; - return idx->n_no_coor; -} - -/**************** - *** Iterator *** - ****************/ - -// Note: even with 32-bit hts_pos_t, end needs to be 64-bit here due to 1LL<>s); e = t + (end>>s); - for (i = b; i <= e; ++i) { - if (kh_get(bin, bidx, i) != kh_end(bidx)) { - assert(itr->bins.n < itr->bins.m); - itr->bins.a[itr->bins.n++] = i; - } - } - } - return itr->bins.n; -} - -static inline int reg2bins_wide(int64_t beg, int64_t end, hts_itr_t *itr, int min_shift, int n_lvls, bidx_t *bidx) -{ - khint_t i; - hts_pos_t max_shift = 3 * n_lvls + min_shift; - --end; - if (beg < 0) beg = 0; - for (i = kh_begin(bidx); i != kh_end(bidx); i++) { - if (!kh_exist(bidx, i)) continue; - hts_pos_t bin = (hts_pos_t) kh_key(bidx, i); - int level = hts_bin_level(bin); - if (level > n_lvls) continue; // Dodgy index? - hts_pos_t first = hts_bin_first(level); - hts_pos_t beg_at_level = first + (beg >> (max_shift - 3 * level)); - hts_pos_t end_at_level = first + (end >> (max_shift - 3 * level)); - if (beg_at_level <= bin && bin <= end_at_level) { - assert(itr->bins.n < itr->bins.m); - itr->bins.a[itr->bins.n++] = bin; - } - } - return itr->bins.n; -} - -static inline int reg2bins(int64_t beg, int64_t end, hts_itr_t *itr, int min_shift, int n_lvls, bidx_t *bidx) -{ - int l, t, s = min_shift + (n_lvls<<1) + n_lvls; - size_t reg_bin_count = 0, hash_bin_count = kh_n_buckets(bidx), max_bins; - hts_pos_t end1; - if (end >= 1LL<= end) return 0; - end1 = end - 1; - - // Count bins to see if it's faster to iterate through the hash table - // or the set of bins covering the region - for (l = 0, t = 0; l <= n_lvls; s -= 3, t += 1<<((l<<1)+l), ++l) { - reg_bin_count += (end1 >> s) - (beg >> s) + 1; - } - max_bins = reg_bin_count < kh_size(bidx) ? reg_bin_count : kh_size(bidx); - if (itr->bins.m - itr->bins.n < max_bins) { - // Worst-case memory usage. May be wasteful on very sparse - // data, but the bin list usually won't be too big anyway. - size_t new_m = max_bins + itr->bins.n; - if (new_m > INT_MAX || new_m > SIZE_MAX / sizeof(int)) { - errno = ENOMEM; - return -1; - } - int *new_a = realloc(itr->bins.a, new_m * sizeof(*new_a)); - if (!new_a) return -1; - itr->bins.a = new_a; - itr->bins.m = new_m; - } - if (reg_bin_count < hash_bin_count) { - return reg2bins_narrow(beg, end, itr, min_shift, n_lvls, bidx); - } else { - return reg2bins_wide(beg, end, itr, min_shift, n_lvls, bidx); - } -} - -static inline int add_to_interval(hts_itr_t *iter, bins_t *bin, - int tid, uint32_t interval, - uint64_t min_off, uint64_t max_off) -{ - hts_pair64_max_t *off; - int j; - - if (!bin->n) - return 0; - off = realloc(iter->off, (iter->n_off + bin->n) * sizeof(*off)); - if (!off) - return -2; - - iter->off = off; - for (j = 0; j < bin->n; ++j) { - if (bin->list[j].v > min_off && bin->list[j].u < max_off) { - iter->off[iter->n_off].u = min_off > bin->list[j].u - ? min_off : bin->list[j].u; - iter->off[iter->n_off].v = max_off < bin->list[j].v - ? max_off : bin->list[j].v; - // hts_pair64_max_t::max is now used to link - // file offsets to region list entries. - // The iterator can use this to decide if it - // can skip some file regions. - iter->off[iter->n_off].max = ((uint64_t) tid << 32) | interval; - iter->n_off++; - } - } - return 0; -} - -static inline int reg2intervals_narrow(hts_itr_t *iter, const bidx_t *bidx, - int tid, int64_t beg, int64_t end, - uint32_t interval, - uint64_t min_off, uint64_t max_off, - int min_shift, int n_lvls) -{ - int l, t, s = min_shift + n_lvls * 3; - hts_pos_t b, e, i; - - for (--end, l = 0, t = 0; l <= n_lvls; s -= 3, t += 1<<((l<<1)+l), ++l) { - b = t + (beg>>s); e = t + (end>>s); - for (i = b; i <= e; ++i) { - khint_t k = kh_get(bin, bidx, i); - if (k != kh_end(bidx)) { - bins_t *bin = &kh_value(bidx, k); - int res = add_to_interval(iter, bin, tid, interval, min_off, max_off); - if (res < 0) - return res; - } - } - } - return 0; -} - -static inline int reg2intervals_wide(hts_itr_t *iter, const bidx_t *bidx, - int tid, int64_t beg, int64_t end, - uint32_t interval, - uint64_t min_off, uint64_t max_off, - int min_shift, int n_lvls) -{ - khint_t i; - hts_pos_t max_shift = 3 * n_lvls + min_shift; - --end; - if (beg < 0) beg = 0; - for (i = kh_begin(bidx); i != kh_end(bidx); i++) { - if (!kh_exist(bidx, i)) continue; - hts_pos_t bin = (hts_pos_t) kh_key(bidx, i); - int level = hts_bin_level(bin); - if (level > n_lvls) continue; // Dodgy index? - hts_pos_t first = hts_bin_first(level); - hts_pos_t beg_at_level = first + (beg >> (max_shift - 3 * level)); - hts_pos_t end_at_level = first + (end >> (max_shift - 3 * level)); - if (beg_at_level <= bin && bin <= end_at_level) { - bins_t *bin = &kh_value(bidx, i); - int res = add_to_interval(iter, bin, tid, interval, min_off, max_off); - if (res < 0) - return res; - } - } - return 0; -} - -static inline int reg2intervals(hts_itr_t *iter, const hts_idx_t *idx, int tid, int64_t beg, int64_t end, uint32_t interval, uint64_t min_off, uint64_t max_off, int min_shift, int n_lvls) -{ - int l, t, s; - int i, j; - hts_pos_t end1; - bidx_t *bidx; - int start_n_off; - size_t reg_bin_count = 0, hash_bin_count; - int res; - - if (!iter || !idx || (bidx = idx->bidx[tid]) == NULL || beg >= end) - return -1; - - hash_bin_count = kh_n_buckets(bidx); - - s = min_shift + (n_lvls<<1) + n_lvls; - if (end >= 1LL<> s) - (beg >> s) + 1; - } - - start_n_off = iter->n_off; - - // Populate iter->off with the intervals for this region - if (reg_bin_count < hash_bin_count) { - res = reg2intervals_narrow(iter, bidx, tid, beg, end, interval, - min_off, max_off, min_shift, n_lvls); - } else { - res = reg2intervals_wide(iter, bidx, tid, beg, end, interval, - min_off, max_off, min_shift, n_lvls); - } - if (res < 0) - return res; - - if (iter->n_off - start_n_off > 1) { - ks_introsort(_off_max, iter->n_off - start_n_off, iter->off + start_n_off); - for (i = start_n_off, j = start_n_off + 1; j < iter->n_off; j++) { - if (iter->off[i].v >= iter->off[j].u) { - if (iter->off[i].v < iter->off[j].v) - iter->off[i].v = iter->off[j].v; - } else { - i++; - if (i < j) - iter->off[i] = iter->off[j]; - } - } - iter->n_off = i + 1; - } - - return iter->n_off; -} - -static int compare_regions(const void *r1, const void *r2) { - hts_reglist_t *reg1 = (hts_reglist_t *)r1; - hts_reglist_t *reg2 = (hts_reglist_t *)r2; - - if (reg1->tid < 0 && reg2->tid >= 0) - return 1; - else if (reg1->tid >= 0 && reg2->tid < 0) - return -1; - else - return reg1->tid - reg2->tid; -} - -uint64_t hts_itr_off(const hts_idx_t* idx, int tid) { - - int i; - bidx_t* bidx; - uint64_t off0 = (uint64_t) -1; - khint_t k; - switch (tid) { - case HTS_IDX_START: - // Find the smallest offset, note that sequence ids may not be ordered sequentially - for (i = 0; i < idx->n; i++) { - bidx = idx->bidx[i]; - k = kh_get(bin, bidx, META_BIN(idx)); - if (k == kh_end(bidx)) - continue; - - if (off0 > kh_val(bidx, k).list[0].u) - off0 = kh_val(bidx, k).list[0].u; - } - if (off0 == (uint64_t) -1 && idx->n_no_coor) - off0 = 0; - // only no-coor reads in this bam - break; - case HTS_IDX_NOCOOR: - /* No-coor reads sort after all of the mapped reads. The position - is not stored in the index itself, so need to find the end - offset for the last mapped read. A loop is needed here in - case references at the end of the file have no mapped reads, - or sequence ids are not ordered sequentially. - See issue samtools#568 and commits b2aab8, 60c22d and cc207d. */ - for (i = 0; i < idx->n; i++) { - bidx = idx->bidx[i]; - k = kh_get(bin, bidx, META_BIN(idx)); - if (k != kh_end(bidx)) { - if (off0 == (uint64_t) -1 || off0 < kh_val(bidx, k).list[0].v) { - off0 = kh_val(bidx, k).list[0].v; - } - } - } - if (off0 == (uint64_t) -1 && idx->n_no_coor) - off0 = 0; - // only no-coor reads in this bam - break; - case HTS_IDX_REST: - off0 = 0; - break; - case HTS_IDX_NONE: - off0 = 0; - break; - } - - return off0; -} - -hts_itr_t *hts_itr_query(const hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end, hts_readrec_func *readrec) -{ - int i, n_off, l, bin; - hts_pair64_max_t *off; - khint_t k; - bidx_t *bidx; - uint64_t min_off, max_off; - hts_itr_t *iter; - uint32_t unmapped = 0, rel_off; - - // It's possible to call this function with NULL idx iff - // tid is one of the special values HTS_IDX_REST or HTS_IDX_NONE - if (!idx && !(tid == HTS_IDX_REST || tid == HTS_IDX_NONE)) { - errno = EINVAL; - return NULL; - } - - iter = (hts_itr_t*)calloc(1, sizeof(hts_itr_t)); - if (iter) { - if (tid < 0) { - uint64_t off = hts_itr_off(idx, tid); - if (off != (uint64_t) -1) { - iter->read_rest = 1; - iter->curr_off = off; - iter->readrec = readrec; - if (tid == HTS_IDX_NONE) - iter->finished = 1; - } else { - free(iter); - iter = NULL; - } - } else if (tid >= idx->n || (bidx = idx->bidx[tid]) == NULL) { - iter->finished = 1; - } else { - if (beg < 0) beg = 0; - if (end < beg) { - free(iter); - return NULL; - } - - k = kh_get(bin, bidx, META_BIN(idx)); - if (k != kh_end(bidx)) - unmapped = kh_val(bidx, k).list[1].v; - else - unmapped = 1; - - iter->tid = tid, iter->beg = beg, iter->end = end; iter->i = -1; - iter->readrec = readrec; - - if ( !kh_size(bidx) ) { iter->finished = 1; return iter; } - - rel_off = beg>>idx->min_shift; - // compute min_off - bin = hts_bin_first(idx->n_lvls) + rel_off; - do { - int first; - k = kh_get(bin, bidx, bin); - if (k != kh_end(bidx)) break; - first = (hts_bin_parent(bin)<<3) + 1; - if (bin > first) --bin; - else bin = hts_bin_parent(bin); - } while (bin); - if (bin == 0) k = kh_get(bin, bidx, bin); - min_off = k != kh_end(bidx)? kh_val(bidx, k).loff : 0; - // min_off can be calculated more accurately if the - // linear index is available - if (idx->lidx[tid].offset - && rel_off < idx->lidx[tid].n) { - if (min_off < idx->lidx[tid].offset[rel_off]) - min_off = idx->lidx[tid].offset[rel_off]; - if (unmapped) { - // unmapped reads are not covered by the linear index, - // so search backwards for a smaller offset - int tmp_off; - for (tmp_off = rel_off-1; tmp_off >= 0; tmp_off--) { - if (idx->lidx[tid].offset[tmp_off] < min_off) { - min_off = idx->lidx[tid].offset[tmp_off]; - break; - } - } - // if the search went too far back or no satisfactory entry - // was found, revert to the bin index loff value - if (k != kh_end(bidx) && (min_off < kh_val(bidx, k).loff || tmp_off < 0)) - min_off = kh_val(bidx, k).loff; - } - } else if (unmapped) { //CSI index - if (k != kh_end(bidx)) - min_off = kh_val(bidx, k).loff; - } - - // compute max_off: a virtual offset from a bin to the right of end - // First check if end lies within the range of the index (it won't - // if it's HTS_POS_MAX) - if (end < 1LL << (idx->min_shift + 3 * idx->n_lvls)) { - bin = hts_bin_first(idx->n_lvls) + ((end-1) >> idx->min_shift) + 1; - if (bin >= idx->n_bins) bin = 0; - while (1) { - // search for an extant bin by moving right, but moving up to the - // parent whenever we get to a first child (which also covers falling - // off the RHS, which wraps around and immediately goes up to bin 0) - while (bin % 8 == 1) bin = hts_bin_parent(bin); - if (bin == 0) { max_off = UINT64_MAX; break; } - k = kh_get(bin, bidx, bin); - if (k != kh_end(bidx) && kh_val(bidx, k).n > 0) { max_off = kh_val(bidx, k).list[0].u; break; } - bin++; - } - } else { - // Searching to end of reference - max_off = UINT64_MAX; - } - - // retrieve bins - if (reg2bins(beg, end, iter, idx->min_shift, idx->n_lvls, bidx) < 0) { - hts_itr_destroy(iter); - return NULL; - } - - for (i = n_off = 0; i < iter->bins.n; ++i) - if ((k = kh_get(bin, bidx, iter->bins.a[i])) != kh_end(bidx)) - n_off += kh_value(bidx, k).n; - if (n_off == 0) { - // No overlapping bins means the iterator has already finished. - iter->finished = 1; - return iter; - } - off = calloc(n_off, sizeof(*off)); - for (i = n_off = 0; i < iter->bins.n; ++i) { - if ((k = kh_get(bin, bidx, iter->bins.a[i])) != kh_end(bidx)) { - int j; - bins_t *p = &kh_value(bidx, k); - for (j = 0; j < p->n; ++j) - if (p->list[j].v > min_off && p->list[j].u < max_off) { - off[n_off].u = min_off > p->list[j].u - ? min_off : p->list[j].u; - off[n_off].v = max_off < p->list[j].v - ? max_off : p->list[j].v; - // hts_pair64_max_t::max is now used to link - // file offsets to region list entries. - // The iterator can use this to decide if it - // can skip some file regions. - off[n_off].max = ((uint64_t) tid << 32) | j; - n_off++; - } - } - } - - if (n_off == 0) { - free(off); - iter->finished = 1; - return iter; - } - ks_introsort(_off_max, n_off, off); - // resolve completely contained adjacent blocks - for (i = 1, l = 0; i < n_off; ++i) - if (off[l].v < off[i].v) off[++l] = off[i]; - n_off = l + 1; - // resolve overlaps between adjacent blocks; this may happen due to the merge in indexing - for (i = 1; i < n_off; ++i) - if (off[i-1].v >= off[i].u) off[i-1].v = off[i].u; - // merge adjacent blocks - for (i = 1, l = 0; i < n_off; ++i) { - if (off[l].v>>16 == off[i].u>>16) off[l].v = off[i].v; - else off[++l] = off[i]; - } - n_off = l + 1; - iter->n_off = n_off; iter->off = off; - } - } - - return iter; -} - -int hts_itr_multi_bam(const hts_idx_t *idx, hts_itr_t *iter) -{ - int i, j, bin; - khint_t k; - bidx_t *bidx; - uint64_t min_off, max_off, t_off = (uint64_t)-1; - int tid; - hts_pos_t beg, end; - hts_reglist_t *curr_reg; - uint32_t unmapped = 0, rel_off; - - if (!idx || !iter || !iter->multi) - return -1; - - iter->i = -1; - for (i=0; in_reg; i++) { - - curr_reg = &iter->reg_list[i]; - tid = curr_reg->tid; - - if (tid < 0) { - t_off = hts_itr_off(idx, tid); - if (t_off != (uint64_t)-1) { - switch (tid) { - case HTS_IDX_NONE: - iter->finished = 1; - // fall through - case HTS_IDX_START: - case HTS_IDX_REST: - iter->curr_off = t_off; - iter->n_reg = 0; - iter->reg_list = NULL; - iter->read_rest = 1; - return 0; - case HTS_IDX_NOCOOR: - iter->nocoor = 1; - iter->nocoor_off = t_off; - } - } - } else { - if (tid >= idx->n || (bidx = idx->bidx[tid]) == NULL || !kh_size(bidx)) - continue; - - k = kh_get(bin, bidx, META_BIN(idx)); - if (k != kh_end(bidx)) - unmapped = kh_val(bidx, k).list[1].v; - else - unmapped = 1; - - for(j=0; jcount; j++) { - hts_pair32_t *curr_intv = &curr_reg->intervals[j]; - if (curr_intv->end < curr_intv->beg) - continue; - - beg = curr_intv->beg; - end = curr_intv->end; - rel_off = beg>>idx->min_shift; - - /* Compute 'min_off' by searching the lowest level bin containing 'beg'. - If the computed bin is not in the index, try the next bin to the - left, belonging to the same parent. If it is the first sibling bin, - try the parent bin. */ - bin = hts_bin_first(idx->n_lvls) + rel_off; - do { - int first; - k = kh_get(bin, bidx, bin); - if (k != kh_end(bidx)) break; - first = (hts_bin_parent(bin)<<3) + 1; - if (bin > first) --bin; - else bin = hts_bin_parent(bin); - } while (bin); - if (bin == 0) - k = kh_get(bin, bidx, bin); - min_off = k != kh_end(bidx)? kh_val(bidx, k).loff : 0; - // min_off can be calculated more accurately if the - // linear index is available - if (idx->lidx[tid].offset - && rel_off < idx->lidx[tid].n) { - if (min_off < idx->lidx[tid].offset[rel_off]) - min_off = idx->lidx[tid].offset[rel_off]; - if (unmapped) { - int tmp_off; - for (tmp_off = rel_off-1; tmp_off >= 0; tmp_off--) { - if (idx->lidx[tid].offset[tmp_off] < min_off) { - min_off = idx->lidx[tid].offset[tmp_off]; - break; - } - } - - if (k != kh_end(bidx) && (min_off < kh_val(bidx, k).loff || tmp_off < 0)) - min_off = kh_val(bidx, k).loff; - } - } else if (unmapped) { //CSI index - if (k != kh_end(bidx)) - min_off = kh_val(bidx, k).loff; - } - - // compute max_off: a virtual offset from a bin to the right of end - // First check if end lies within the range of the index (it - // won't if it's HTS_POS_MAX) - if (end < 1LL << (idx->min_shift + 3 * idx->n_lvls)) { - bin = hts_bin_first(idx->n_lvls) + ((end-1) >> idx->min_shift) + 1; - if (bin >= idx->n_bins) bin = 0; - while (1) { - // search for an extant bin by moving right, but moving up to the - // parent whenever we get to a first child (which also covers falling - // off the RHS, which wraps around and immediately goes up to bin 0) - while (bin % 8 == 1) bin = hts_bin_parent(bin); - if (bin == 0) { max_off = UINT64_MAX; break; } - k = kh_get(bin, bidx, bin); - if (k != kh_end(bidx) && kh_val(bidx, k).n > 0) { - max_off = kh_val(bidx, k).list[0].u; - break; - } - bin++; - } - } else { - // Searching to end of reference - max_off = UINT64_MAX; - } - - //convert coordinates to file offsets - if (reg2intervals(iter, idx, tid, beg, end, j, - min_off, max_off, - idx->min_shift, idx->n_lvls) < 0) { - return -1; - } - } - } - } - - if (iter->n_off > 1) - ks_introsort(_off_max, iter->n_off, iter->off); - - if(!iter->n_off && !iter->nocoor) - iter->finished = 1; - - return 0; -} - -int hts_itr_multi_cram(const hts_idx_t *idx, hts_itr_t *iter) -{ - const hts_cram_idx_t *cidx = (const hts_cram_idx_t *) idx; - int tid, i, n_off = 0; - uint32_t j; - hts_pos_t beg, end; - hts_reglist_t *curr_reg; - hts_pair32_t *curr_intv; - hts_pair64_max_t *off = NULL, *tmp; - cram_index *e = NULL; - - if (!cidx || !iter || !iter->multi) - return -1; - - iter->is_cram = 1; - iter->read_rest = 0; - iter->off = NULL; - iter->n_off = 0; - iter->curr_off = 0; - iter->i = -1; - - for (i=0; in_reg; i++) { - - curr_reg = &iter->reg_list[i]; - tid = curr_reg->tid; - - if (tid >= 0) { - tmp = realloc(off, (n_off + curr_reg->count) * sizeof(*off)); - if (!tmp) - goto err; - off = tmp; - - for (j=0; j < curr_reg->count; j++) { - curr_intv = &curr_reg->intervals[j]; - if (curr_intv->end < curr_intv->beg) - continue; - - beg = curr_intv->beg; - end = curr_intv->end; - -/* First, fetch the container overlapping 'beg' and assign its file offset to u, then - * find the container overlapping 'end' and assign the relative end of the slice to v. - * The cram_ptell function will adjust with the container offset, which is not stored - * in the index. - */ - e = cram_index_query(cidx->cram, tid, beg+1, NULL); - if (e) { - off[n_off].u = e->offset; - // hts_pair64_max_t::max is now used to link - // file offsets to region list entries. - // The iterator can use this to decide if it - // can skip some file regions. - off[n_off].max = ((uint64_t) tid << 32) | j; - - if (end >= HTS_POS_MAX) { - e = cram_index_last(cidx->cram, tid, NULL); - } else { - e = cram_index_query_last(cidx->cram, tid, end+1); - } - - if (e) { - off[n_off++].v = e->e_next - ? e->e_next->offset - : e->offset + e->slice + e->len; - } else { - hts_log_warning("Could not set offset end for region %d:%"PRIhts_pos"-%"PRIhts_pos". Skipping", tid, beg, end); - } - } - } - } else { - switch (tid) { - case HTS_IDX_NOCOOR: - e = cram_index_query(cidx->cram, tid, 1, NULL); - if (e) { - iter->nocoor = 1; - iter->nocoor_off = e->offset; - } else { - hts_log_warning("No index entry for NOCOOR region"); - } - break; - case HTS_IDX_START: - e = cram_index_query(cidx->cram, tid, 1, NULL); - if (e) { - iter->read_rest = 1; - tmp = realloc(off, sizeof(*off)); - if (!tmp) - goto err; - off = tmp; - off[0].u = e->offset; - off[0].v = 0; - n_off=1; - } else { - hts_log_warning("No index entries"); - } - break; - case HTS_IDX_REST: - break; - case HTS_IDX_NONE: - iter->finished = 1; - break; - default: - hts_log_error("Query with tid=%d not implemented for CRAM files", tid); - } - } - } - - if (n_off) { - ks_introsort(_off_max, n_off, off); - iter->n_off = n_off; iter->off = off; - } - - if(!n_off && !iter->nocoor) - iter->finished = 1; - - return 0; - - err: - free(off); - return -1; -} - -void hts_itr_destroy(hts_itr_t *iter) -{ - if (iter) { - if (iter->multi) { - hts_reglist_free(iter->reg_list, iter->n_reg); - } else { - free(iter->bins.a); - } - - if (iter->off) - free(iter->off); - free(iter); - } -} - -static inline long long push_digit(long long i, char c) -{ - // ensure subtraction occurs first, avoiding overflow for >= MAX-48 or so - int digit = c - '0'; - return 10 * i + digit; -} - -long long hts_parse_decimal(const char *str, char **strend, int flags) -{ - long long n = 0; - int digits = 0, decimals = 0, e = 0, lost = 0; - char sign = '+', esign = '+'; - const char *s, *str_orig = str; - - while (isspace_c(*str)) str++; - s = str; - - if (*s == '+' || *s == '-') sign = *s++; - while (*s) - if (isdigit_c(*s)) digits++, n = push_digit(n, *s++); - else if (*s == ',' && (flags & HTS_PARSE_THOUSANDS_SEP)) s++; - else break; - - if (*s == '.') { - s++; - while (isdigit_c(*s)) decimals++, digits++, n = push_digit(n, *s++); - } - - switch (*s) { - case 'e': case 'E': - s++; - if (*s == '+' || *s == '-') esign = *s++; - while (isdigit_c(*s)) e = push_digit(e, *s++); - if (esign == '-') e = -e; - break; - - case 'k': case 'K': e += 3; s++; break; - case 'm': case 'M': e += 6; s++; break; - case 'g': case 'G': e += 9; s++; break; - } - - e -= decimals; - while (e > 0) n *= 10, e--; - while (e < 0) lost += n % 10, n /= 10, e++; - - if (lost > 0) { - hts_log_warning("Discarding fractional part of %.*s", (int)(s - str), str); - } - - if (strend) { - // Set to the original input str pointer if not valid number syntax - *strend = (digits > 0)? (char *)s : (char *)str_orig; - } else if (digits == 0) { - hts_log_warning("Invalid numeric value %.8s[truncated]", str); - } else if (*s) { - if ((flags & HTS_PARSE_THOUSANDS_SEP) || (!(flags & HTS_PARSE_THOUSANDS_SEP) && *s != ',')) - hts_log_warning("Ignoring unknown characters after %.*s[%s]", (int)(s - str), str, s); - } - - return (sign == '+')? n : -n; -} - -static void *hts_memrchr(const void *s, int c, size_t n) { - size_t i; - unsigned char *u = (unsigned char *)s; - for (i = n; i > 0; i--) { - if (u[i-1] == c) - return u+i-1; - } - - return NULL; -} - -/* - * A variant of hts_parse_reg which is reference-id aware. It uses - * the iterator name2id callbacks to validate the region tokenisation works. - * - * This is necessary due to GRCh38 HLA additions which have reference names - * like "HLA-DRB1*12:17". - * - * All parameters are mandatory. - * - * To work around ambiguous parsing issues, eg both "chr1" and "chr1:100-200" - * are reference names, we may quote using curly braces. - * Thus "{chr1}:100-200" and "{chr1:100-200}" disambiguate the above example. - * - * Flags are used to control how parsing works, and can be one of the below. - * - * HTS_PARSE_LIST: - * If present, the region is assmed to be a comma separated list and - * position parsing will not contain commas (this implicitly - * clears HTS_PARSE_THOUSANDS_SEP in the call to hts_parse_decimal). - * On success the return pointer will be the start of the next region, ie - * the character after the comma. (If *ret != '\0' then the caller can - * assume another region is present in the list.) - * - * If not set then positions may contain commas. In this case the return - * value should point to the end of the string, or NULL on failure. - * - * HTS_PARSE_ONE_COORD: - * If present, X:100 is treated as the single base pair region X:100-100. - * In this case X:-100 is shorthand for X:1-100 and X:100- is X:100-. - * (This is the standard bcftools region convention.) - * - * When not set X:100 is considered to be X:100- where is - * the end of chromosome X (set to HTS_POS_MAX here). X:100- and X:-100 - * are invalid. - * (This is the standard samtools region convention.) - * - * Note the supplied string expects 1 based inclusive coordinates, but the - * returned coordinates start from 0 and are half open, so pos0 is valid - * for use in e.g. "for (pos0 = beg; pos0 < end; pos0++) {...}" - * - * On success a pointer to the byte after the end of the entire region - * specifier is returned (plus any trailing comma), and tid, - * beg & end will be set. - * On failure NULL is returned. - */ -const char *hts_parse_region(const char *s, int *tid, hts_pos_t *beg, - hts_pos_t *end, hts_name2id_f getid, void *hdr, - int flags) -{ - if (!s || !tid || !beg || !end || !getid) - return NULL; - - size_t s_len = strlen(s); - kstring_t ks = { 0, 0, NULL }; - - const char *colon = NULL, *comma = NULL; - int quoted = 0; - - if (flags & HTS_PARSE_LIST) - flags &= ~HTS_PARSE_THOUSANDS_SEP; - else - flags |= HTS_PARSE_THOUSANDS_SEP; - - const char *s_end = s + s_len; - - // Braced quoting of references is permitted to resolve ambiguities. - if (*s == '{') { - const char *close = memchr(s, '}', s_len); - if (!close) { - hts_log_error("Mismatching braces in \"%s\"", s); - *tid = -1; - return NULL; - } - s++; - s_len--; - if (close[1] == ':') - colon = close+1; - quoted = 1; // number of trailing characters to trim - - // Truncate to this item only, if appropriate. - if (flags & HTS_PARSE_LIST) { - comma = strchr(close, ','); - if (comma) { - s_len = comma-s; - s_end = comma+1; - } - } - } else { - // Truncate to this item only, if appropriate. - if (flags & HTS_PARSE_LIST) { - comma = strchr(s, ','); - if (comma) { - s_len = comma-s; - s_end = comma+1; - } - } - - colon = hts_memrchr(s, ':', s_len); - } - - // No colon is simplest case; just check and return. - if (colon == NULL) { - *beg = 0; *end = HTS_POS_MAX; - kputsn(s, s_len-quoted, &ks); // convert to nul terminated string - if (!ks.s) { - *tid = -2; - return NULL; - } - - *tid = getid(hdr, ks.s); - free(ks.s); - - return *tid >= 0 ? s_end : NULL; - } - - // Has a colon, but check whole name first. - if (!quoted) { - *beg = 0; *end = HTS_POS_MAX; - kputsn(s, s_len, &ks); // convert to nul terminated string - if (!ks.s) { - *tid = -2; - return NULL; - } - if ((*tid = getid(hdr, ks.s)) >= 0) { - // Entire name matches, but also check this isn't - // ambiguous. eg we have ref chr1 and ref chr1:100-200 - // both present. - ks.l = 0; - kputsn(s, colon-s, &ks); // convert to nul terminated string - if (!ks.s) { - *tid = -2; - return NULL; - } - if (getid(hdr, ks.s) >= 0) { - free(ks.s); - *tid = -1; - hts_log_error("Range is ambiguous. " - "Use {%s} or {%.*s}%s instead", - s, (int)(colon-s), s, colon); - return NULL; - } - free(ks.s); - - return s_end; - } - if (*tid < -1) // Failed to parse header - return NULL; - } - - // Quoted, or unquoted and whole string isn't a name. - // Check the pre-colon part is valid. - ks.l = 0; - kputsn(s, colon-s-quoted, &ks); // convert to nul terminated string - if (!ks.s) { - *tid = -2; - return NULL; - } - *tid = getid(hdr, ks.s); - free(ks.s); - if (*tid < 0) - return NULL; - - // Finally parse the post-colon coordinates - char *hyphen; - *beg = hts_parse_decimal(colon+1, &hyphen, flags) - 1; - if (*beg < 0) { - if (*beg != -1 && *hyphen == '-' && colon[1] != '\0') { - // User specified zero, but we're 1-based. - hts_log_error("Coordinates must be > 0"); - return NULL; - } - if (isdigit_c(*hyphen) || *hyphen == '\0' || *hyphen == ',') { - // interpret chr:-100 as chr:1-100 - *end = *beg==-1 ? HTS_POS_MAX : -(*beg+1); - *beg = 0; - return s_end; - } else if (*beg < -1) { - hts_log_error("Unexpected string \"%s\" after region", hyphen); - return NULL; - } - } - - if (*hyphen == '\0' || ((flags & HTS_PARSE_LIST) && *hyphen == ',')) { - *end = flags & HTS_PARSE_ONE_COORD ? *beg+1 : HTS_POS_MAX; - } else if (*hyphen == '-') { - *end = hts_parse_decimal(hyphen+1, &hyphen, flags); - if (*hyphen != '\0' && *hyphen != ',') { - hts_log_error("Unexpected string \"%s\" after region", hyphen); - return NULL; - } - } else { - hts_log_error("Unexpected string \"%s\" after region", hyphen); - return NULL; - } - - if (*end == 0) - *end = HTS_POS_MAX; // interpret chr:100- as chr:100- - - if (*beg >= *end) return NULL; - - return s_end; -} - -// Next release we should mark this as deprecated? -// Use hts_parse_region above instead. -const char *hts_parse_reg64(const char *s, hts_pos_t *beg, hts_pos_t *end) -{ - char *hyphen; - const char *colon = strrchr(s, ':'); - if (colon == NULL) { - *beg = 0; *end = HTS_POS_MAX; - return s + strlen(s); - } - - *beg = hts_parse_decimal(colon+1, &hyphen, HTS_PARSE_THOUSANDS_SEP) - 1; - if (*beg < 0) *beg = 0; - - if (*hyphen == '\0') *end = HTS_POS_MAX; - else if (*hyphen == '-') *end = hts_parse_decimal(hyphen+1, NULL, HTS_PARSE_THOUSANDS_SEP); - else return NULL; - - if (*beg >= *end) return NULL; - return colon; -} - -const char *hts_parse_reg(const char *s, int *beg, int *end) -{ - hts_pos_t beg64 = 0, end64 = 0; - const char *colon = hts_parse_reg64(s, &beg64, &end64); - if (beg64 > INT_MAX) { - hts_log_error("Position %"PRId64" too large", beg64); - return NULL; - } - if (end64 > INT_MAX) { - if (end64 == HTS_POS_MAX) { - end64 = INT_MAX; - } else { - hts_log_error("Position %"PRId64" too large", end64); - return NULL; - } - } - *beg = beg64; - *end = end64; - return colon; -} - -hts_itr_t *hts_itr_querys(const hts_idx_t *idx, const char *reg, hts_name2id_f getid, void *hdr, hts_itr_query_func *itr_query, hts_readrec_func *readrec) -{ - int tid; - hts_pos_t beg, end; - - if (strcmp(reg, ".") == 0) - return itr_query(idx, HTS_IDX_START, 0, 0, readrec); - else if (strcmp(reg, "*") == 0) - return itr_query(idx, HTS_IDX_NOCOOR, 0, 0, readrec); - - if (!hts_parse_region(reg, &tid, &beg, &end, getid, hdr, HTS_PARSE_THOUSANDS_SEP)) - return NULL; - - return itr_query(idx, tid, beg, end, readrec); -} - -hts_itr_t *hts_itr_regions(const hts_idx_t *idx, hts_reglist_t *reglist, int count, hts_name2id_f getid, void *hdr, hts_itr_multi_query_func *itr_specific, hts_readrec_func *readrec, hts_seek_func *seek, hts_tell_func *tell) { - - int i; - - if (!reglist) - return NULL; - - hts_itr_t *itr = (hts_itr_t*)calloc(1, sizeof(hts_itr_t)); - if (itr) { - itr->n_reg = count; - itr->readrec = readrec; - itr->seek = seek; - itr->tell = tell; - itr->reg_list = reglist; - itr->finished = 0; - itr->nocoor = 0; - itr->multi = 1; - - for (i = 0; i < itr->n_reg; i++) { - if (itr->reg_list[i].reg) { - if (!strcmp(itr->reg_list[i].reg, ".")) { - itr->reg_list[i].tid = HTS_IDX_START; - continue; - } - - if (!strcmp(itr->reg_list[i].reg, "*")) { - itr->reg_list[i].tid = HTS_IDX_NOCOOR; - continue; - } - - itr->reg_list[i].tid = getid(hdr, reglist[i].reg); - if (itr->reg_list[i].tid < 0) { - if (itr->reg_list[i].tid < -1) { - hts_log_error("Failed to parse header"); - hts_itr_destroy(itr); - return NULL; - } else { - hts_log_warning("Region '%s' specifies an unknown reference name. Continue anyway", reglist[i].reg); - } - } - } - } - - qsort(itr->reg_list, itr->n_reg, sizeof(hts_reglist_t), compare_regions); - if (itr_specific(idx, itr) != 0) { - hts_log_error("Failed to create the multi-region iterator!"); - hts_itr_destroy(itr); - itr = NULL; - } - } - - return itr; -} - -int hts_itr_next(BGZF *fp, hts_itr_t *iter, void *r, void *data) -{ - int ret, tid; - hts_pos_t beg, end; - if (iter == NULL || iter->finished) return -1; - if (iter->read_rest) { - if (iter->curr_off) { // seek to the start - if (bgzf_seek(fp, iter->curr_off, SEEK_SET) < 0) { - hts_log_error("Failed to seek to offset %"PRIu64"%s%s", - iter->curr_off, - errno ? ": " : "", strerror(errno)); - return -2; - } - iter->curr_off = 0; // only seek once - } - ret = iter->readrec(fp, data, r, &tid, &beg, &end); - if (ret < 0) iter->finished = 1; - iter->curr_tid = tid; - iter->curr_beg = beg; - iter->curr_end = end; - return ret; - } - // A NULL iter->off should always be accompanied by iter->finished. - assert(iter->off != NULL); - for (;;) { - if (iter->curr_off == 0 || iter->curr_off >= iter->off[iter->i].v) { // then jump to the next chunk - if (iter->i == iter->n_off - 1) { ret = -1; break; } // no more chunks - if (iter->i < 0 || iter->off[iter->i].v != iter->off[iter->i+1].u) { // not adjacent chunks; then seek - if (bgzf_seek(fp, iter->off[iter->i+1].u, SEEK_SET) < 0) { - hts_log_error("Failed to seek to offset %"PRIu64"%s%s", - iter->off[iter->i+1].u, - errno ? ": " : "", strerror(errno)); - return -2; - } - iter->curr_off = bgzf_tell(fp); - } - ++iter->i; - } - if ((ret = iter->readrec(fp, data, r, &tid, &beg, &end)) >= 0) { - iter->curr_off = bgzf_tell(fp); - if (tid != iter->tid || beg >= iter->end) { // no need to proceed - ret = -1; break; - } else if (end > iter->beg && iter->end > beg) { - iter->curr_tid = tid; - iter->curr_beg = beg; - iter->curr_end = end; - return ret; - } - } else break; // end of file or error - } - iter->finished = 1; - return ret; -} - -int hts_itr_multi_next(htsFile *fd, hts_itr_t *iter, void *r) -{ - void *fp; - int ret, tid, i, cr, ci; - hts_pos_t beg, end; - hts_reglist_t *found_reg; - - if (iter == NULL || iter->finished) return -1; - - if (iter->is_cram) { - fp = fd->fp.cram; - } else { - fp = fd->fp.bgzf; - } - - if (iter->read_rest) { - if (iter->curr_off) { // seek to the start - if (iter->seek(fp, iter->curr_off, SEEK_SET) < 0) { - hts_log_error("Seek at offset %" PRIu64 " failed.", iter->curr_off); - return -1; - } - iter->curr_off = 0; // only seek once - } - - ret = iter->readrec(fp, fd, r, &tid, &beg, &end); - if (ret < 0) - iter->finished = 1; - - iter->curr_tid = tid; - iter->curr_beg = beg; - iter->curr_end = end; - - return ret; - } - // A NULL iter->off should always be accompanied by iter->finished. - assert(iter->off != NULL || iter->nocoor != 0); - - int next_range = 0; - for (;;) { - // Note that due to the way bam indexing works, iter->off may contain - // file chunks that are not actually needed as they contain data - // beyond the end of the requested region. These are filtered out - // by comparing the tid and index into hts_reglist_t::intervals - // (packed for reasons of convenience into iter->off[iter->i].max) - // associated with the file region with iter->curr_tid and - // iter->curr_intv. - - if (next_range - || iter->curr_off == 0 - || iter->i >= iter->n_off - || iter->curr_off >= iter->off[iter->i].v - || (iter->off[iter->i].max >> 32 == iter->curr_tid - && (iter->off[iter->i].max & 0xffffffff) < iter->curr_intv)) { - - // Jump to the next chunk. It may be necessary to skip more - // than one as the iter->off list can include overlapping entries. - do { - iter->i++; - } while (iter->i < iter->n_off - && (iter->curr_off >= iter->off[iter->i].v - || (iter->off[iter->i].max >> 32 == iter->curr_tid - && (iter->off[iter->i].max & 0xffffffff) < iter->curr_intv))); - - if (iter->is_cram && iter->i < iter->n_off) { - // Ensure iter->curr_reg is correct. - // - // We need this for CRAM as we shortcut some of the later - // logic by getting an end-of-range and continuing to the - // next offset. - // - // We cannot do this for BAM (and fortunately do not need to - // either) because in BAM world a query to genomic positions - // GX and GY leading to a seek offsets PX and PY may have - // GX > GY and PX < PY. (This is due to the R-tree and falling - // between intervals, bumping up to a higher bin.) - // CRAM strictly follows PX >= PY if GX >= GY, so this logic - // works. - int want_tid = iter->off[iter->i].max >> 32; - if (!(iter->curr_reg < iter->n_reg && - iter->reg_list[iter->curr_reg].tid == want_tid)) { - int j; - for (j = 0; j < iter->n_reg; j++) - if (iter->reg_list[j].tid == want_tid) - break; - if (j == iter->n_reg) - return -1; - iter->curr_reg = j; - iter->curr_tid = iter->reg_list[iter->curr_reg].tid; - }; - iter->curr_intv = iter->off[iter->i].max & 0xffffffff; - } - - if (iter->i >= iter->n_off) { // no more chunks, except NOCOORs - if (iter->nocoor) { - next_range = 0; - if (iter->seek(fp, iter->nocoor_off, SEEK_SET) < 0) { - hts_log_error("Seek at offset %" PRIu64 " failed.", iter->nocoor_off); - return -1; - } - if (iter->is_cram) { - cram_range r = { HTS_IDX_NOCOOR }; - cram_set_option(fp, CRAM_OPT_RANGE_NOSEEK, &r); - } - - // The first slice covering the unmapped reads might - // contain a few mapped reads, so scroll - // forward until finding the first unmapped read. - do { - ret = iter->readrec(fp, fd, r, &tid, &beg, &end); - } while (tid >= 0 && ret >=0); - - if (ret < 0) - iter->finished = 1; - else - iter->read_rest = 1; - - iter->curr_off = 0; // don't seek any more - iter->curr_tid = tid; - iter->curr_beg = beg; - iter->curr_end = end; - - return ret; - } else { - ret = -1; break; - } - } else if (iter->i < iter->n_off) { - // New chunk may overlap the last one, so ensure we - // only seek forwards. - if (iter->curr_off < iter->off[iter->i].u || next_range) { - iter->curr_off = iter->off[iter->i].u; - - // CRAM has the capability of setting an end location. - // This means multi-threaded decodes can stop once they - // reach that point, rather than pointlessly decoding - // more slices than we'll be using. - // - // We have to be careful here. Whenever we set the cram - // range we need a corresponding seek in order to ensure - // we can safely decode at that offset. We use next_range - // var to ensure this is always true; this is set on - // end-of-range condition. It's never modified for BAM. - if (iter->is_cram) { - // Next offset.[uv] tuple, but it's already been - // included in our cram range, so don't seek and don't - // reset range so we can efficiently multi-thread. - if (next_range || iter->curr_off >= iter->end) { - if (iter->seek(fp, iter->curr_off, SEEK_SET) < 0) { - hts_log_error("Seek at offset %" PRIu64 - " failed.", iter->curr_off); - return -1; - } - - // Find the genomic range matching this interval. - int j; - hts_reglist_t *rl = &iter->reg_list[iter->curr_reg]; - cram_range r = { - rl->tid, - rl->intervals[iter->curr_intv].beg, - rl->intervals[iter->curr_intv].end - }; - - // Expand it up to cover neighbouring intervals. - // Note we can only have a single chromosome in a - // range, so if we detect our blocks span chromosomes - // or we have a multi-ref mode slice, we just use - // HTS_IDX_START refid instead. This doesn't actually - // seek (due to CRAM_OPT_RANGE_NOSEEK) and is simply - // and indicator of decoding with no end limit. - // - // That isn't as efficient as it could be, but it's - // no poorer than before and it works. - int tid = r.refid; - int64_t end = r.end; - int64_t v = iter->off[iter->i].v; - j = iter->i+1; - while (j < iter->n_off) { - if (iter->off[j].u > v) - break; - - uint64_t max = iter->off[j].max; - if ((max>>32) != tid) - tid = HTS_IDX_START; // => no range limit - - if (end < rl->intervals[max & 0xffffffff].end) - end = rl->intervals[max & 0xffffffff].end; - if (v < iter->off[j].v) - v = iter->off[j].v; - j++; - } - r.refid = tid; - r.end = end; - - // Remember maximum 'v' here so we don't do - // unnecessary subsequent seeks for the next - // regions. We can't change curr_off, but - // beg/end are used only by single region iterator so - // we cache it there to avoid changing the struct. - iter->end = v; - - cram_set_option(fp, CRAM_OPT_RANGE_NOSEEK, &r); - next_range = 0; - } - } else { // Not CRAM - if (iter->seek(fp, iter->curr_off, SEEK_SET) < 0) { - hts_log_error("Seek at offset %" PRIu64 " failed.", - iter->curr_off); - return -1; - } - } - } - } - } - - ret = iter->readrec(fp, fd, r, &tid, &beg, &end); - if (ret < 0) { - if (iter->is_cram && cram_eof(fp)) { - // Skip to end of range - // - // We should never be adjusting curr_off manually unless - // we also can guarantee we'll be doing a seek after to - // a new location. Otherwise we'll be reading wrong offset - // for the next container. - // - // We ensure this by adjusting our CRAM_OPT_RANGE - // accordingly above, but to double check we also - // set the skipped_block flag to enforce a seek also. - iter->curr_off = iter->off[iter->i].v; - next_range = 1; - - // Next region - if (++iter->curr_intv >= iter->reg_list[iter->curr_reg].count){ - if (++iter->curr_reg >= iter->n_reg) - break; - iter->curr_intv = 0; - iter->curr_tid = iter->reg_list[iter->curr_reg].tid; - } - continue; - } else { - break; - } - } - - iter->curr_off = iter->tell(fp); - - if (tid != iter->curr_tid) { - hts_reglist_t key; - key.tid = tid; - - found_reg = (hts_reglist_t *)bsearch(&key, iter->reg_list, - iter->n_reg, - sizeof(hts_reglist_t), - compare_regions); - if (!found_reg) - continue; - - iter->curr_reg = (found_reg - iter->reg_list); - iter->curr_tid = tid; - iter->curr_intv = 0; - } - - cr = iter->curr_reg; - ci = iter->curr_intv; - - for (i = ci; i < iter->reg_list[cr].count; i++) { - if (end > iter->reg_list[cr].intervals[i].beg && - iter->reg_list[cr].intervals[i].end > beg) { - iter->curr_beg = beg; - iter->curr_end = end; - iter->curr_intv = i; - - return ret; - } - - // Check if the read starts beyond intervals[i].end - // If so, the interval is finished so move on to the next. - if (beg > iter->reg_list[cr].intervals[i].end) - iter->curr_intv = i + 1; - - // No need to keep searching if the read ends before intervals[i].beg - if (end < iter->reg_list[cr].intervals[i].beg) - break; - } - } - iter->finished = 1; - - return ret; -} - -/********************** - *** Retrieve index *** - **********************/ -// Local_fn and local_len will return a sub-region of 'fn'. -// Eg http://elsewhere/dir/foo.bam.bai?a=b may return -// foo.bam.bai via local_fn and local_len. -// -// Returns -1 if index couldn't be opened. -// -2 on other errors -static int idx_test_and_fetch(const char *fn, const char **local_fn, int *local_len, int download) -{ - hFILE *remote_hfp = NULL; - hFILE *local_fp = NULL; - int save_errno; - htsFormat fmt; - kstring_t s = KS_INITIALIZE; - kstring_t tmps = KS_INITIALIZE; - - if (hisremote(fn)) { - const int buf_size = 1 * 1024 * 1024; - int l; - const char *p, *e; - // Ignore ?# params: eg any file.fmt?param=val, except for S3 URLs - e = fn + ((strncmp(fn, "s3://", 5) && strncmp(fn, "s3+http://", 10) && strncmp(fn, "s3+https://", 11)) ? strcspn(fn, "?#") : strcspn(fn, "?")); - // Find the previous slash from there. - p = e; - while (p > fn && *p != '/') p--; - if (*p == '/') p++; - - // Attempt to open local file first - kputsn(p, e-p, &s); - if (access(s.s, R_OK) == 0) - { - free(s.s); - *local_fn = p; - *local_len = e-p; - return 0; - } - - // Attempt to open remote file. Stay quiet on failure, it is OK to fail when trying first .csi then .bai or .tbi index. - if ((remote_hfp = hopen(fn, "r")) == 0) { - hts_log_info("Failed to open index file '%s'", fn); - free(s.s); - return -1; - } - if (hts_detect_format2(remote_hfp, fn, &fmt)) { - hts_log_error("Failed to detect format of index file '%s'", fn); - goto fail; - } - if (fmt.category != index_file || (fmt.format != bai && fmt.format != csi && fmt.format != tbi - && fmt.format != crai && fmt.format != fai_format)) { - hts_log_error("Format of index file '%s' is not supported", fn); - goto fail; - } - - if (download) { - if ((local_fp = hts_open_tmpfile(s.s, "wx", &tmps)) == NULL) { - hts_log_error("Failed to create file %s in the working directory", p); - goto fail; - } - hts_log_info("Downloading file %s to local directory", fn); - uint8_t *buf = (uint8_t*)calloc(buf_size, 1); - if (!buf) { - hts_log_error("%s", strerror(errno)); - goto fail; - } - while ((l = hread(remote_hfp, buf, buf_size)) > 0) { - if (hwrite(local_fp, buf, l) != l) { - hts_log_error("Failed to write data to %s : %s", - fn, strerror(errno)); - free(buf); - goto fail; - } - } - free(buf); - if (l < 0) { - hts_log_error("Error reading \"%s\"", fn); - goto fail; - } - if (hclose(local_fp) < 0) { - hts_log_error("Error closing %s : %s", fn, strerror(errno)); - local_fp = NULL; - goto fail; - } - local_fp = NULL; - if (rename(tmps.s, s.s) < 0) { - hts_log_error("Error renaming %s : %s", tmps.s, strerror(errno)); - goto fail; - } - ks_clear(&tmps); - - *local_fn = p; - *local_len = e-p; - } else { - *local_fn = fn; - *local_len = e-fn; - } - - if (hclose(remote_hfp) != 0) { - hts_log_error("Failed to close remote file %s", fn); - } - - free(tmps.s); - free(s.s); - return 0; - } else { - hFILE *local_hfp; - if ((local_hfp = hopen(fn, "r")) == 0) return -1; - hclose_abruptly(local_hfp); - *local_fn = fn; - *local_len = strlen(fn); - return 0; - } - - fail: - save_errno = errno; - if (remote_hfp) hclose_abruptly(remote_hfp); - if (local_fp) hclose_abruptly(local_fp); - if (tmps.l > 0) unlink(tmps.s); - free(tmps.s); - free(s.s); - errno = save_errno; - return -2; -} - -/* - * Check the existence of a local index file using part of the alignment file name. - * The order is alignment.bam.csi, alignment.csi, alignment.bam.bai, alignment.bai - * @param fn - pointer to the file name - * @param fnidx - pointer to the index file name placeholder - * @return 1 for success, 0 for failure - */ -int hts_idx_check_local(const char *fn, int fmt, char **fnidx) { - int i, l_fn, l_ext; - const char *fn_tmp = NULL; - char *fnidx_tmp; - char *csi_ext = ".csi"; - char *bai_ext = ".bai"; - char *tbi_ext = ".tbi"; - char *crai_ext = ".crai"; - char *fai_ext = ".fai"; - - if (!fn) - return 0; - - if (hisremote(fn)) { - for (i = strlen(fn) - 1; i >= 0; --i) - if (fn[i] == '/') { - fn_tmp = (char *)&fn[i+1]; - break; - } - } else { - // Borrowed from hopen_fd_fileuri() - if (strncmp(fn, "file://localhost/", 17) == 0) fn_tmp = fn + 16; - else if (strncmp(fn, "file:///", 8) == 0) fn_tmp = fn + 7; - else fn_tmp = fn; -#if defined(_WIN32) || defined(__MSYS__) - // For cases like C:/foo - if (fn_tmp[0] == '/' && fn_tmp[1] && fn_tmp[2] == ':' && fn_tmp[3] == '/') - fn_tmp++; -#endif - } - - if (!fn_tmp) return 0; - hts_log_info("Using alignment file '%s'", fn_tmp); - l_fn = strlen(fn_tmp); l_ext = 5; - fnidx_tmp = (char*)calloc(l_fn + l_ext + 1, 1); - if (!fnidx_tmp) return 0; - - struct stat sbuf; - - // Try alignment.bam.csi first - strcpy(fnidx_tmp, fn_tmp); strcpy(fnidx_tmp + l_fn, csi_ext); - if(stat(fnidx_tmp, &sbuf) == 0) { - *fnidx = fnidx_tmp; - return 1; - } else { // Then try alignment.csi - for (i = l_fn - 1; i > 0; --i) - if (fnidx_tmp[i] == '.') { - strcpy(fnidx_tmp + i, csi_ext); - if(stat(fnidx_tmp, &sbuf) == 0) { - *fnidx = fnidx_tmp; - return 1; - } - break; - } - } - if (fmt == HTS_FMT_BAI) { - // Next, try alignment.bam.bai - strcpy(fnidx_tmp, fn_tmp); strcpy(fnidx_tmp + l_fn, bai_ext); - if(stat(fnidx_tmp, &sbuf) == 0) { - *fnidx = fnidx_tmp; - return 1; - } else { // And finally, try alignment.bai - for (i = l_fn - 1; i > 0; --i) - if (fnidx_tmp[i] == '.') { - strcpy(fnidx_tmp + i, bai_ext); - if(stat(fnidx_tmp, &sbuf) == 0) { - *fnidx = fnidx_tmp; - return 1; - } - break; - } - } - } else if (fmt == HTS_FMT_TBI) { // Or .tbi - strcpy(fnidx_tmp, fn_tmp); strcpy(fnidx_tmp + l_fn, tbi_ext); - if(stat(fnidx_tmp, &sbuf) == 0) { - *fnidx = fnidx_tmp; - return 1; - } else { - for (i = l_fn - 1; i > 0; --i) - if (fnidx_tmp[i] == '.') { - strcpy(fnidx_tmp + i, tbi_ext); - if(stat(fnidx_tmp, &sbuf) == 0) { - *fnidx = fnidx_tmp; - return 1; - } - break; - } - } - } else if (fmt == HTS_FMT_CRAI) { // Or .crai - strcpy(fnidx_tmp, fn_tmp); strcpy(fnidx_tmp + l_fn, crai_ext); - if(stat(fnidx_tmp, &sbuf) == 0) { - *fnidx = fnidx_tmp; - return 1; - } else { - for (i = l_fn - 1; i > 0; --i) - if (fnidx_tmp[i] == '.') { - strcpy(fnidx_tmp + i, crai_ext); - if(stat(fnidx_tmp, &sbuf) == 0) { - *fnidx = fnidx_tmp; - return 1; - } - break; - } - } - } else if (fmt == HTS_FMT_FAI) { // Or .fai - strcpy(fnidx_tmp, fn_tmp); strcpy(fnidx_tmp + l_fn, fai_ext); - *fnidx = fnidx_tmp; - if(stat(fnidx_tmp, &sbuf) == 0) - return 1; - else - return 0; - } - - free(fnidx_tmp); - return 0; -} - -static char *idx_filename(const char *fn, const char *ext, int download) { - int ret, local_len; - char *fnidx; - const char *local_fn = NULL; - kstring_t buffer = KS_INITIALIZE; - - // First try : append `ext` to `fn` - if (!(fnidx = haddextension(&buffer, fn, 0, ext))) { - free(buffer.s); - return NULL; - } - if ((ret = idx_test_and_fetch(fnidx, &local_fn, &local_len, download)) == -1) { - // Second try : replace suffix of `fn` with `ext` - if (!(fnidx = haddextension(&buffer, fn, 1, ext))) { - free(buffer.s); - return NULL; - } - ret = idx_test_and_fetch(fnidx, &local_fn, &local_len, download); - } - - if (ret < 0) { - free(buffer.s); - return NULL; - } - - memmove(fnidx, local_fn, local_len); - fnidx[local_len] = 0; - return fnidx; -} - -char *hts_idx_getfn(const char *fn, const char *ext) -{ - return idx_filename(fn, ext, HTS_IDX_SAVE_REMOTE); -} - -char *hts_idx_locatefn(const char *fn, const char *ext) -{ - return idx_filename(fn, ext, 0); -} - -static hts_idx_t *idx_find_and_load(const char *fn, int fmt, int flags) -{ - char *fnidx = strstr(fn, HTS_IDX_DELIM); - hts_idx_t *idx; - - if ( fnidx ) { - char *fn2 = strdup(fn); - if (!fn2) { - hts_log_error("%s", strerror(errno)); - return NULL; - } - fn2[fnidx - fn] = '\0'; - fnidx += strlen(HTS_IDX_DELIM); - idx = hts_idx_load3(fn2, fnidx, fmt, flags); - free(fn2); - return idx; - } - - if (hts_idx_check_local(fn, fmt, &fnidx) == 0 && hisremote(fn)) { - if (flags & HTS_IDX_SAVE_REMOTE) { - fnidx = idx_filename(fn, ".csi", HTS_IDX_SAVE_REMOTE); - if (!fnidx) { - switch (fmt) { - case HTS_FMT_BAI: fnidx = idx_filename(fn, ".bai", HTS_IDX_SAVE_REMOTE); break; - case HTS_FMT_TBI: fnidx = idx_filename(fn, ".tbi", HTS_IDX_SAVE_REMOTE); break; - default: break; - } - } - } else { - fnidx = idx_filename(fn, ".csi", 0); - if (!fnidx) { - switch (fmt) { - case HTS_FMT_BAI: fnidx = idx_filename(fn, ".bai", 0); break; - case HTS_FMT_TBI: fnidx = idx_filename(fn, ".tbi", 0); break; - default: break; - } - } - } - } - if (!fnidx) { - if (!(flags & HTS_IDX_SILENT_FAIL)) - hts_log_error("Could not retrieve index file for '%s'", fn); - return 0; - } - - if (flags & HTS_IDX_SAVE_REMOTE) - idx = hts_idx_load3(fn, fnidx, fmt, flags); - else - idx = idx_read(fnidx); - free(fnidx); - return idx; -} - -hts_idx_t *hts_idx_load(const char *fn, int fmt) { - return idx_find_and_load(fn, fmt, 1); -} - -hts_idx_t *hts_idx_load2(const char *fn, const char *fnidx) -{ - return hts_idx_load3(fn, fnidx, 0, 0); -} - -hts_idx_t *hts_idx_load3(const char *fn, const char *fnidx, int fmt, int flags) -{ - const char *local_fn = NULL; - char *local_fnidx = NULL; - int local_len; - if (!fnidx) - return idx_find_and_load(fn, fmt, flags); - - // Check that the index file is up to date, the main file might have changed - struct stat stat_idx,stat_main; - int remote_fn = hisremote(fn), remote_fnidx = hisremote(fnidx); - if ( !remote_fn && !remote_fnidx - && !stat(fn, &stat_main) && !stat(fnidx, &stat_idx) ) - { - if ( stat_idx.st_mtime < stat_main.st_mtime ) - hts_log_warning("The index file is older than the data file: %s", fnidx); - } - - if (remote_fnidx && (flags & HTS_IDX_SAVE_REMOTE)) - { - int ret = idx_test_and_fetch(fnidx, &local_fn, &local_len, 1); - if (ret == 0) { - local_fnidx = strdup(local_fn); - if (local_fnidx) { - local_fnidx[local_len] = '\0'; - fnidx = local_fnidx; - } - } - } - - hts_idx_t *idx = idx_read(fnidx); - if (!idx && !(flags & HTS_IDX_SILENT_FAIL)) - hts_log_error("Could not load local index file '%s'%s%s", fnidx, - errno ? " : " : "", errno ? strerror(errno) : ""); - - - free(local_fnidx); - - return idx; -} - - - -/********************** - *** Memory *** - **********************/ - -/* For use with hts_expand macros *only* */ -HTSLIB_EXPORT -size_t hts_realloc_or_die(size_t n, size_t m, size_t m_sz, size_t size, - int clear, void **ptr, const char *func) { - /* If new_m and size are both below this limit, multiplying them - together can't overflow */ - const size_t safe = (size_t) 1 << (sizeof(size_t) * 4); - void *new_ptr; - size_t bytes, new_m; - - new_m = n; - kroundup_size_t(new_m); - - bytes = size * new_m; - - /* Check for overflow. Both ensure that new_m will fit in m (we make the - pessimistic assumption that m is signed), and that bytes has not - wrapped around. */ - if (new_m > (((size_t) 1 << (m_sz * 8 - 1)) - 1) - || ((size > safe || new_m > safe) - && bytes / new_m != size)) { - errno = ENOMEM; - goto die; - } - - new_ptr = realloc(*ptr, bytes); - if (new_ptr == NULL) goto die; - - if (clear) { - if (new_m > m) { - memset((char *) new_ptr + m * size, 0, (new_m - m) * size); - } - } - - *ptr = new_ptr; - - return new_m; - - die: - hts_log_error("%s", strerror(errno)); - exit(1); -} - -/* - * Companion to hts_resize() macro that does the actual allocation. - * - * Somewhat complicated as hts_resize() needs to write the new allocated - * size back into *size_in_out, and the value pointed to may either be - * int32_t, uint32_t or size_t depending on which array is being resized. - * This is solved by making `size_in_out` a void pointer, getting the macro - * to pass in the size of the item pointed to (in `size_sz`) and then using - * an appropriate cast (based on the value of size_sz). The function - * ensures that the maximum size will be storable in a signed type of - * the given size so storing to an int32_t should work correctly. - * - * Assumes that sizeof(uint32_t) and sizeof(int32_t) is 4, - * sizeof(uint64_t) and sizeof(int64_t) is 8 and sizeof(size_t) is - * either 4 or 8. It also assumes casting from unsigned to signed will - * work as long as the top bit isn't set. - */ - -int hts_resize_array_(size_t item_size, size_t num, size_t size_sz, - void *size_in_out, void **ptr_in_out, int flags, - const char *func) { - /* If new_size and item_size are both below this limit, multiplying them - together can't overflow */ - const size_t safe = (size_t) 1 << (sizeof(size_t) * 4); - void *new_ptr; - size_t bytes, new_size; - - new_size = num; - kroundup_size_t(new_size); - bytes = item_size * new_size; - - /* Check for overflow. Both ensure that alloc will fit in alloc_in_out (we - make the pessimistic assumption that *alloc_in_out is signed), and that - bytes has not wrapped around. */ - - if ((new_size > (((size_t) 1 << (size_sz * 8 - 1)) - 1)) - || (((item_size > safe) || (new_size > safe)) - && bytes / new_size != item_size)) { - hts_log(HTS_LOG_ERROR, func, "Memory allocation too large"); - errno = ENOMEM; - return -1; - } - - new_ptr = realloc(*ptr_in_out, bytes); - if (new_ptr == NULL) { - int save_errno = errno; - hts_log(HTS_LOG_ERROR, func, "%s", strerror(errno)); - errno = save_errno; - return -1; - } - - if (flags & HTS_RESIZE_CLEAR) { - size_t old_size; - switch (size_sz) { - case 4: old_size = *((uint32_t *) size_in_out); break; - case 8: old_size = *((uint64_t *) size_in_out); break; - default: abort(); - } - if (new_size > old_size) { - memset((char *) new_ptr + old_size * item_size, 0, - (new_size - old_size) * item_size); - } - } - - switch (size_sz) { - case 4: *((uint32_t *) size_in_out) = new_size; break; - case 8: *((uint64_t *) size_in_out) = new_size; break; - default: abort(); - } - - *ptr_in_out = new_ptr; - return 0; -} - -void hts_lib_shutdown(void) -{ - hfile_shutdown(1); -} - -void hts_free(void *ptr) { - free(ptr); -} - -void hts_set_log_level(enum htsLogLevel level) -{ - hts_verbose = level; -} - -enum htsLogLevel hts_get_log_level(void) -{ - return hts_verbose; -} - -static char get_severity_tag(enum htsLogLevel severity) -{ - switch (severity) { - case HTS_LOG_ERROR: - return 'E'; - case HTS_LOG_WARNING: - return 'W'; - case HTS_LOG_INFO: - return 'I'; - case HTS_LOG_DEBUG: - return 'D'; - case HTS_LOG_TRACE: - return 'T'; - default: - break; - } - - return '*'; -} - -void hts_log(enum htsLogLevel severity, const char *context, const char *format, ...) -{ - int save_errno = errno; - if (severity <= hts_verbose) { - va_list argptr; - - fprintf(stderr, "[%c::%s] ", get_severity_tag(severity), context); - - va_start(argptr, format); - vfprintf(stderr, format, argptr); - va_end(argptr); - - fprintf(stderr, "\n"); - } - errno = save_errno; -} diff --git a/src/htslib-1.19.1/hts_expr.c b/src/htslib-1.19.1/hts_expr.c deleted file mode 100644 index 5e5a132..0000000 --- a/src/htslib-1.19.1/hts_expr.c +++ /dev/null @@ -1,921 +0,0 @@ -/* hts_expr.c -- filter expression parsing and processing. - - Copyright (C) 2020-2022 Genome Research Ltd. - - Author: James Bonfield - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notices and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -// TODO: -// - ?: operator for conditionals? - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "htslib/hts_expr.h" -#include "htslib/hts_log.h" -#include "textutils_internal.h" - -// Could also cache hts_expr_val_t stack here for kstring reuse? -#define MAX_REGEX 10 -struct hts_filter_t { - char *str; - int parsed; - int curr_regex, max_regex; - regex_t preg[MAX_REGEX]; -}; - -/* - * This is designed to be mostly C like with mostly same the precedence rules, - * with the exception of bit operators (widely considered as a mistake in C). - * It's not full C (eg no bit-shifting), but good enough for our purposes. - * - * Supported syntax, in order of precedence: - * - * Grouping: (, ), eg "(1+2)*3" - * Values: integers, floats, strings or variables - * Unary ops: +, -, !, ~ eg -10 +10, !10 (0), ~5 (bitwise not) - * Math ops: *, /, % [TODO: add // for floor division?] - * Math ops: +, - - * Bit-wise: &, ^, | [NB as 3 precedence levels, in that order] - * Conditionals: >, >=, <, <=, - * Equality: ==, !=, =~, !~ - * Boolean: &&, || - */ - -// Skip to start of term -static char *ws(char *str) { - while (*str && (*str == ' ' || *str == '\t')) - str++; - return str; -} - -static int expression(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, - char *str, char **end, hts_expr_val_t *res); - -/* - * Simple functions operating on strings only. - * length, min, max, avg. - * - * All return 0 on success, - * -1 on failure - */ -static int expr_func_length(hts_expr_val_t *res) { - if (!res->is_str) - return -1; - - res->is_str = 0; - res->d = res->s.l; - return 0; -} - -static int expr_func_min(hts_expr_val_t *res) { - if (!res->is_str) - return -1; - - size_t l = res->s.l; - int v = INT_MAX; - const uint8_t *x = (uint8_t *)res->s.s; - for (l = 0; l < res->s.l; l++) - if (v > x[l]) - v = x[l]; - - res->is_str = 0; - res->d = v == INT_MAX ? NAN : v; - - return 0; -} - -static int expr_func_max(hts_expr_val_t *res) { - if (!res->is_str) - return -1; - - size_t l = res->s.l; - int v = INT_MIN; - const uint8_t *x = (uint8_t *)res->s.s; - for (l = 0; l < res->s.l; l++) - if (v < x[l]) - v = x[l]; - - res->is_str = 0; - res->d = v == INT_MIN ? NAN : v; - - return 0; -} - -static int expr_func_avg(hts_expr_val_t *res) { - if (!res->is_str) - return -1; - - size_t l = res->s.l; - double v = 0; - const uint8_t *x = (uint8_t *)res->s.s; - for (l = 0; l < res->s.l; l++) - v += x[l]; - if (l) - v /= l; - - res->is_str = 0; - res->d = v; - - return 0; -} - -/* - * functions: FUNC(expr). - * Note for simplicity of parsing, the "(" must immediately follow FUNC, - * so "FUNC (x)" is invalid. - */ -static int func_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, - char *str, char **end, hts_expr_val_t *res) { - int func_ok = -1; - switch (*str) { - case 'a': - if (strncmp(str, "avg(", 4) == 0) { - if (expression(filt, data, fn, str+4, end, res)) return -1; - func_ok = expr_func_avg(res); - } - break; - - case 'd': - if (strncmp(str, "default(", 8) == 0) { - if (expression(filt, data, fn, str+8, end, res)) return -1; - if (**end != ',') - return -1; - (*end)++; - hts_expr_val_t val = HTS_EXPR_VAL_INIT; - if (expression(filt, data, fn, ws(*end), end, &val)) return -1; - func_ok = 1; - if (!hts_expr_val_existsT(res)) { - kstring_t swap = res->s; - *res = val; - val.s = swap; - hts_expr_val_free(&val); - } - } - break; - - case 'e': - if (strncmp(str, "exists(", 7) == 0) { - if (expression(filt, data, fn, str+7, end, res)) return -1; - func_ok = 1; - res->is_true = res->d = hts_expr_val_existsT(res); - res->is_str = 0; - } else if (strncmp(str, "exp(", 4) == 0) { - if (expression(filt, data, fn, str+4, end, res)) return -1; - func_ok = 1; - res->d = exp(res->d); - res->is_str = 0; - if (isnan(res->d)) - hts_expr_val_undef(res); - } - - break; - - case 'l': - if (strncmp(str, "length(", 7) == 0) { - if (expression(filt, data, fn, str+7, end, res)) return -1; - func_ok = expr_func_length(res); - } else if (strncmp(str, "log(", 4) == 0) { - if (expression(filt, data, fn, str+4, end, res)) return -1; - func_ok = 1; - res->d = log(res->d); - res->is_str = 0; - if (isnan(res->d)) - hts_expr_val_undef(res); - } - break; - - case 'm': - if (strncmp(str, "min(", 4) == 0) { - if (expression(filt, data, fn, str+4, end, res)) return -1; - func_ok = expr_func_min(res); - } else if (strncmp(str, "max(", 4) == 0) { - if (expression(filt, data, fn, str+4, end, res)) return -1; - func_ok = expr_func_max(res); - } - break; - - case 'p': - if (strncmp(str, "pow(", 4) == 0) { - if (expression(filt, data, fn, str+4, end, res)) return -1; - func_ok = 1; - - if (**end != ',') - return -1; - (*end)++; - hts_expr_val_t val = HTS_EXPR_VAL_INIT; - if (expression(filt, data, fn, ws(*end), end, &val)) return -1; - if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) { - hts_expr_val_undef(res); - } else if (res->is_str || val.is_str) { - hts_expr_val_free(&val); // arith on strings - return -1; - } else { - func_ok = 1; - res->d = pow(res->d, val.d); - hts_expr_val_free(&val); - res->is_str = 0; - } - - if (isnan(res->d)) - hts_expr_val_undef(res); - } - break; - - case 's': - if (strncmp(str, "sqrt(", 5) == 0) { - if (expression(filt, data, fn, str+5, end, res)) return -1; - func_ok = 1; - res->d = sqrt(res->d); - res->is_str = 0; - if (isnan(res->d)) - hts_expr_val_undef(res); - } - break; - } - - if (func_ok < 0) - return -1; - - str = ws(*end); - if (*str != ')') { - fprintf(stderr, "Missing ')'\n"); - return -1; - } - *end = str+1; - - return 0; -} - -/* - * simple_expr - * : identifier - * | constant - * | string - * | func_expr - * | '(' expression ')' -*/ -static int simple_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, - char *str, char **end, hts_expr_val_t *res) { - // Main recursion step - str = ws(str); - if (*str == '(') { - if (expression(filt, data, fn, str+1, end, res)) return -1; - str = ws(*end); - if (*str != ')') { - fprintf(stderr, "Missing ')'\n"); - return -1; - } - *end = str+1; - - return 0; - } - - // Otherwise a basic element. - int fail = 0; - double d = hts_str2dbl(str, end, &fail); - if (str != *end) { - res->is_str = 0; - res->d = d; - } else { - // Not valid floating point syntax. - // TODO: add function call names in here; len(), sqrt(), pow(), etc - if (*str == '"') { - res->is_str = 1; - char *e = str+1; - int backslash = 0; - while (*e && *e != '"') { - if (*e == '\\') - backslash=1, e+=1+(e[1]!='\0'); - else - e++; - } - - kputsn(str+1, e-(str+1), ks_clear(&res->s)); - if (backslash) { - size_t i, j; - for (i = j = 0; i < res->s.l; i++) { - res->s.s[j++] = res->s.s[i]; - if (res->s.s[i] == '\\') { - switch (res->s.s[++i]) { - case '"': res->s.s[j-1] = '"'; break; - case '\\':res->s.s[j-1] = '\\'; break; - case 't': res->s.s[j-1] = '\t'; break; - case 'n': res->s.s[j-1] = '\n'; break; - case 'r': res->s.s[j-1] = '\r'; break; - default: res->s.s[j++] = res->s.s[i]; - } - } - } - res->s.s[j] = 0; - res->s.l = j; - } - if (*e != '"') - return -1; - *end = e+1; - } else if (fn) { - // Try lookup as variable, if not as function - if (fn(data, str, end, res) == 0) - return 0; - else - return func_expr(filt, data, fn, str, end, res); - } else { - return -1; - } - } - - return 0; -} - -/* - * unary_expr - * : simple_expr - * | '+' simple_expr - * | '-' simple_expr - * | '!' unary_expr // higher precedence - * | '~' unary_expr // higher precedence - */ -static int unary_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, - char *str, char **end, hts_expr_val_t *res) { - int err; - str = ws(str); - if (*str == '+' || *str == '-') { - err = simple_expr(filt, data, fn, str+1, end, res); - if (!hts_expr_val_exists(res)) { - hts_expr_val_undef(res); - } else { - err |= res->is_str; - if (*str == '-') - res->d = -res->d; - res->is_true = res->d != 0; - } - } else if (*str == '!') { - err = unary_expr(filt, data, fn, str+1, end, res); - if (res->is_true) { - // Any explicitly true value becomes false - res->d = res->is_true = 0; - } else if (!hts_expr_val_exists(res)) { - // We can also still negate undef values by toggling the - // is_true override value. - res->d = res->is_true = !res->is_true; - } else if (res->is_str) { - // !null = true, !"foo" = false, NOTE: !"" = false also - res->d = res->is_true = (res->s.s == NULL); - } else { - res->d = !(int64_t)res->d; - res->is_true = res->d != 0; - } - res->is_str = 0; - } else if (*str == '~') { - err = unary_expr(filt, data, fn, str+1, end, res); - if (!hts_expr_val_exists(res)) { - hts_expr_val_undef(res); - } else { - err |= res->is_str; - if (!hts_expr_val_exists(res)) { - hts_expr_val_undef(res); - } else { - res->d = ~(int64_t)res->d; - res->is_true = res->d != 0; - } - } - } else { - err = simple_expr(filt, data, fn, str, end, res); - } - return err ? -1 : 0; -} - - -/* - * mul_expr - * : unary_expr ( - * '*' unary_expr - * | '/' unary_expr - * | '%' unary_expr - * )* - */ -static int mul_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, - char *str, char **end, hts_expr_val_t *res) { - if (unary_expr(filt, data, fn, str, end, res)) - return -1; - - str = *end; - hts_expr_val_t val = HTS_EXPR_VAL_INIT; - while (*str) { - str = ws(str); - if (*str == '*' || *str == '/' || *str == '%') { - if (unary_expr(filt, data, fn, str+1, end, &val)) return -1; - if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) { - hts_expr_val_undef(res); - } else if (val.is_str || res->is_str) { - hts_expr_val_free(&val); - return -1; // arith on strings - } - } - - if (*str == '*') - res->d *= val.d; - else if (*str == '/') - res->d /= val.d; - else if (*str == '%') { - if (val.d) - res->d = (int64_t)res->d % (int64_t)val.d; - else - hts_expr_val_undef(res); - } else - break; - - res->is_true = hts_expr_val_exists(res) && (res->d != 0); - str = *end; - } - - hts_expr_val_free(&val); - - return 0; -} - -/* - * add_expr - * : mul_expr ( - * '+' mul_expr - * | '-' mul_expr - * )* - */ -static int add_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, - char *str, char **end, hts_expr_val_t *res) { - if (mul_expr(filt, data, fn, str, end, res)) - return -1; - - str = *end; - hts_expr_val_t val = HTS_EXPR_VAL_INIT; - while (*str) { - str = ws(str); - int undef = 0; - if (*str == '+' || *str == '-') { - if (mul_expr(filt, data, fn, str+1, end, &val)) return -1; - if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) { - undef = 1; - } else if (val.is_str || res->is_str) { - hts_expr_val_free(&val); - return -1; // arith on strings - } - } - - if (*str == '+') - res->d += val.d; - else if (*str == '-') - res->d -= val.d; - else - break; - - if (undef) - hts_expr_val_undef(res); - else - res->is_true = res->d != 0; - - str = *end; - } - - hts_expr_val_free(&val); - - return 0; -} - -/* - * bitand_expr - * : add_expr - * | bitand_expr '&' add_expr - */ -static int bitand_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, - char *str, char **end, hts_expr_val_t *res) { - if (add_expr(filt, data, fn, str, end, res)) return -1; - - hts_expr_val_t val = HTS_EXPR_VAL_INIT; - int undef = 0; - for (;;) { - str = ws(*end); - if (*str == '&' && str[1] != '&') { - if (add_expr(filt, data, fn, str+1, end, &val)) return -1; - if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) { - undef = 1; - } else if (res->is_str || val.is_str) { - hts_expr_val_free(&val); - return -1; - } - res->is_true = (res->d = ((int64_t)res->d & (int64_t)val.d)) != 0; - } else { - break; - } - } - hts_expr_val_free(&val); - if (undef) - hts_expr_val_undef(res); - - return 0; -} - -/* - * bitxor_expr - * : bitand_expr - * | bitxor_expr '^' bitand_expr - */ -static int bitxor_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, - char *str, char **end, hts_expr_val_t *res) { - if (bitand_expr(filt, data, fn, str, end, res)) return -1; - - hts_expr_val_t val = HTS_EXPR_VAL_INIT; - int undef = 0; - for (;;) { - str = ws(*end); - if (*str == '^') { - if (bitand_expr(filt, data, fn, str+1, end, &val)) return -1; - if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) { - undef = 1; - } else if (res->is_str || val.is_str) { - hts_expr_val_free(&val); - return -1; - } - res->is_true = (res->d = ((int64_t)res->d ^ (int64_t)val.d)) != 0; - } else { - break; - } - } - hts_expr_val_free(&val); - if (undef) - hts_expr_val_undef(res); - - return 0; -} - -/* - * bitor_expr - * : bitxor_expr - * | bitor_expr '|' bitxor_expr - */ -static int bitor_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, - char *str, char **end, hts_expr_val_t *res) { - if (bitxor_expr(filt, data, fn, str, end, res)) return -1; - - hts_expr_val_t val = HTS_EXPR_VAL_INIT; - int undef = 0; - for (;;) { - str = ws(*end); - if (*str == '|' && str[1] != '|') { - if (bitxor_expr(filt, data, fn, str+1, end, &val)) return -1; - if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) { - undef = 1; - } else if (res->is_str || val.is_str) { - hts_expr_val_free(&val); - return -1; - } - res->is_true = (res->d = ((int64_t)res->d | (int64_t)val.d)) != 0; - } else { - break; - } - } - hts_expr_val_free(&val); - if (undef) - hts_expr_val_undef(res); - - return 0; -} - -/* - * cmp_expr - * : bitor_expr - * | cmp_expr '<=' bitor_expr - * | cmp_expr '<' bitor_expr - * | cmp_expr '>=' bitor_expr - * | cmp_expr '>' bitor_expr - */ -static int cmp_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, - char *str, char **end, hts_expr_val_t *res) { - if (bitor_expr(filt, data, fn, str, end, res)) return -1; - - str = ws(*end); - hts_expr_val_t val = HTS_EXPR_VAL_INIT; - int err = 0, cmp_done = 0; - - if (*str == '>' && str[1] == '=') { - cmp_done = 1; - err = cmp_expr(filt, data, fn, str+2, end, &val); - if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) { - hts_expr_val_undef(res); - } else { - res->is_true=res->d - = res->is_str && res->s.s && val.is_str && val.s.s - ? strcmp(res->s.s, val.s.s) >= 0 - : !res->is_str && !val.is_str && res->d >= val.d; - res->is_str = 0; - } - } else if (*str == '>') { - cmp_done = 1; - err = cmp_expr(filt, data, fn, str+1, end, &val); - if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) { - hts_expr_val_undef(res); - } else { - res->is_true=res->d - = res->is_str && res->s.s && val.is_str && val.s.s - ? strcmp(res->s.s, val.s.s) > 0 - : !res->is_str && !val.is_str && res->d > val.d; - res->is_str = 0; - } - } else if (*str == '<' && str[1] == '=') { - cmp_done = 1; - err = cmp_expr(filt, data, fn, str+2, end, &val); - if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) { - hts_expr_val_undef(res); - } else { - res->is_true=res->d - = res->is_str && res->s.s && val.is_str && val.s.s - ? strcmp(res->s.s, val.s.s) <= 0 - : !res->is_str && !val.is_str && res->d <= val.d; - res->is_str = 0; - } - } else if (*str == '<') { - cmp_done = 1; - err = cmp_expr(filt, data, fn, str+1, end, &val); - if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) { - hts_expr_val_undef(res); - } else { - res->is_true=res->d - = res->is_str && res->s.s && val.is_str && val.s.s - ? strcmp(res->s.s, val.s.s) < 0 - : !res->is_str && !val.is_str && res->d < val.d; - res->is_str = 0; - } - } - - if (cmp_done && (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res))) - hts_expr_val_undef(res); - hts_expr_val_free(&val); - - return err ? -1 : 0; -} - -/* - * eq_expr - * : cmp_expr - * | eq_expr '==' cmp_expr - * | eq_expr '!=' cmp_expr - * | eq_expr '=~' cmp_expr - * | eq_expr '!~' cmp_expr - */ -static int eq_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, - char *str, char **end, hts_expr_val_t *res) { - if (cmp_expr(filt, data, fn, str, end, res)) return -1; - - str = ws(*end); - - int err = 0, eq_done = 0; - hts_expr_val_t val = HTS_EXPR_VAL_INIT; - - // numeric vs numeric comparison is as expected - // string vs string comparison is as expected - // numeric vs string is false - if (str[0] == '=' && str[1] == '=') { - eq_done = 1; - if ((err = eq_expr(filt, data, fn, str+2, end, &val))) { - res->is_true = res->d = 0; - } else { - if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) { - hts_expr_val_undef(res); - } else { - res->is_true = res->d = res->is_str - ? (res->s.s && val.s.s ?strcmp(res->s.s, val.s.s)==0 :0) - : !res->is_str && !val.is_str && res->d == val.d; - } - } - res->is_str = 0; - - } else if (str[0] == '!' && str[1] == '=') { - eq_done = 1; - if ((err = eq_expr(filt, data, fn, str+2, end, &val))) { - res->is_true = res->d = 0; - } else { - if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) { - hts_expr_val_undef(res); - } else { - res->is_true = res->d = res->is_str - ? (res->s.s && val.s.s ?strcmp(res->s.s, val.s.s) != 0 :1) - : res->is_str != val.is_str || res->d != val.d; - } - } - res->is_str = 0; - - } else if ((str[0] == '=' && str[1] == '~') || - (str[0] == '!' && str[1] == '~')) { - eq_done = 1; - err = eq_expr(filt, data, fn, str+2, end, &val); - if (!val.is_str || !res->is_str) { - hts_expr_val_free(&val); - return -1; - } - if (val.s.s && res->s.s && val.is_true >= 0 && res->is_true >= 0) { - regex_t preg_, *preg; - if (filt->curr_regex >= filt->max_regex) { - // Compile regex if not seen before - if (filt->curr_regex >= MAX_REGEX) { - preg = &preg_; - } else { - preg = &filt->preg[filt->curr_regex]; - filt->max_regex++; - } - - int ec = regcomp(preg, val.s.s, REG_EXTENDED | REG_NOSUB); - if (ec != 0) { - char errbuf[1024]; - regerror(ec, preg, errbuf, 1024); - fprintf(stderr, "Failed regex: %.1024s\n", errbuf); - hts_expr_val_free(&val); - return -1; - } - } else { - preg = &filt->preg[filt->curr_regex]; - } - res->is_true = res->d = regexec(preg, res->s.s, 0, NULL, 0) == 0 - ? *str == '=' // matcn - : *str == '!'; // no-match - if (preg == &preg_) - regfree(preg); - - filt->curr_regex++; - } else { - // nul regexp or input is considered false - res->is_true = 0; - } - res->is_str = 0; - } - - if (eq_done && ((!hts_expr_val_exists(&val)) || !hts_expr_val_exists(res))) - hts_expr_val_undef(res); - hts_expr_val_free(&val); - - return err ? -1 : 0; -} - -/* - * and_expr - * : eq_expr - * | and_expr 'and' eq_expr - * | and_expr 'or' eq_expr - */ -static int and_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, - char *str, char **end, hts_expr_val_t *res) { - if (eq_expr(filt, data, fn, str, end, res)) return -1; - - for (;;) { - hts_expr_val_t val = HTS_EXPR_VAL_INIT; - str = ws(*end); - if (str[0] == '&' && str[1] == '&') { - if (eq_expr(filt, data, fn, str+2, end, &val)) return -1; - if (!hts_expr_val_existsT(res) || !hts_expr_val_existsT(&val)) { - hts_expr_val_undef(res); - res->d = 0; - } else { - res->is_true = res->d = - (res->is_true || (res->is_str && res->s.s) || res->d) && - (val.is_true || (val.is_str && val.s.s) || val.d); - res->is_str = 0; - } - } else if (str[0] == '|' && str[1] == '|') { - if (eq_expr(filt, data, fn, str+2, end, &val)) return -1; - if (!hts_expr_val_existsT(res) && !hts_expr_val_existsT(&val)) { - // neither defined - hts_expr_val_undef(res); - res->d = 0; - } else if (!hts_expr_val_existsT(res) && - !(val.is_true || (val.is_str && val.s.s ) || val.d)) { - // LHS undef and RHS false - hts_expr_val_undef(res); - res->d = 0; - } else if (!hts_expr_val_existsT(&val) && - !(res->is_true || (res->is_str && res->s.s) || res->d)){ - // RHS undef and LHS false - hts_expr_val_undef(res); - res->d = 0; - } else { - res->is_true = res->d = - res->is_true || (res->is_str && res->s.s) || res->d || - val.is_true || (val.is_str && val.s.s ) || val.d; - res->is_str = 0; - } - } else { - break; - } - hts_expr_val_free(&val); - } - - return 0; -} - -static int expression(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, - char *str, char **end, hts_expr_val_t *res) { - return and_expr(filt, data, fn, str, end, res); -} - -hts_filter_t *hts_filter_init(const char *str) { - hts_filter_t *f = calloc(1, sizeof(*f)); - if (!f) return NULL; - - // Oversize to permit faster comparisons with memcmp over strcmp - size_t len = strlen(str)+100; - if (!(f->str = malloc(len))) { - free(f); - return NULL; - } - strcpy(f->str, str); - return f; -} - -void hts_filter_free(hts_filter_t *filt) { - if (!filt) - return; - - int i; - for (i = 0; i < filt->max_regex; i++) - regfree(&filt->preg[i]); - - free(filt->str); - free(filt); -} - -static int hts_filter_eval_(hts_filter_t *filt, - void *data, hts_expr_sym_func *fn, - hts_expr_val_t *res) { - char *end = NULL; - - filt->curr_regex = 0; - if (expression(filt, data, fn, filt->str, &end, res)) - return -1; - - if (end && *ws(end)) { - fprintf(stderr, "Unable to parse expression at %s\n", filt->str); - return -1; - } - - // Strings evaluate to true. An empty string is also true, but an - // absent (null) string is false, unless overriden by is_true. An - // empty string has kstring length of zero, but a pointer as it's - // nul-terminated. - if (res->is_str) { - res->is_true |= res->s.s != NULL; - res->d = res->is_true; - } else if (hts_expr_val_exists(res)) { - res->is_true |= res->d != 0; - } - - return 0; -} - -int hts_filter_eval(hts_filter_t *filt, - void *data, hts_expr_sym_func *fn, - hts_expr_val_t *res) { - if (res->s.l != 0 || res->s.m != 0 || res->s.s != NULL) { - // As *res is cleared below, it's not safe to call this function - // with res->s.s set, as memory would be leaked. It's also not - // possible to know is res was initialised correctly, so in - // either case we fail. - hts_log_error("Results structure must be cleared before calling this function"); - return -1; - } - - memset(res, 0, sizeof(*res)); - - return hts_filter_eval_(filt, data, fn, res); -} - -int hts_filter_eval2(hts_filter_t *filt, - void *data, hts_expr_sym_func *fn, - hts_expr_val_t *res) { - ks_free(&res->s); - memset(res, 0, sizeof(*res)); - - return hts_filter_eval_(filt, data, fn, res); -} diff --git a/src/htslib-1.19.1/hts_internal.h b/src/htslib-1.19.1/hts_internal.h deleted file mode 100644 index 61956da..0000000 --- a/src/htslib-1.19.1/hts_internal.h +++ /dev/null @@ -1,153 +0,0 @@ -/* hts_internal.h -- internal functions; not part of the public API. - - Copyright (C) 2015-2016, 2018-2020 Genome Research Ltd. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#ifndef HTSLIB_HTS_INTERNAL_H -#define HTSLIB_HTS_INTERNAL_H - -#include -#include - -#include "htslib/hts.h" -#include "textutils_internal.h" - -#define HTS_MAX_EXT_LEN 9 - -#ifdef __cplusplus -extern "C" { -#endif - -struct hFILE; - -struct hts_json_token { - char type; ///< Token type - char *str; ///< Value as a C string (filled in for all token types) - // TODO Add other fields to fill in for particular data types, e.g. - // int inum; - // float fnum; -}; - -struct cram_fd; - -/* - * Check the existence of a local index file using part of the alignment file name. - * The order is alignment.bam.csi, alignment.csi, alignment.bam.bai, alignment.bai - * @param fn - pointer to the file name - * @param fnidx - pointer to the index file name placeholder - * @return 1 for success, 0 for failure - */ -int hts_idx_check_local(const char *fn, int fmt, char **fnidx); - -// Retrieve the name of the index file and also download it, if it is remote -char *hts_idx_getfn(const char *fn, const char *ext); - -// Retrieve the name of the index file, but do not download it, if it is remote -char *hts_idx_locatefn(const char *fn, const char *ext); - -// Used for on-the-fly indexing. See the comments in hts.c. -void hts_idx_amend_last(hts_idx_t *idx, uint64_t offset); - -int hts_idx_fmt(hts_idx_t *idx); - -// Construct a unique filename based on fname and open it. -struct hFILE *hts_open_tmpfile(const char *fname, const char *mode, kstring_t *tmpname); - -// Check that index is capable of storing items in range beg..end -int hts_idx_check_range(hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end); - -// The CRAM implementation stores the loaded index within the cram_fd rather -// than separately as is done elsewhere in htslib. So if p is a pointer to -// an hts_idx_t with p->fmt == HTS_FMT_CRAI, then it actually points to an -// hts_cram_idx_t and should be cast accordingly. -typedef struct hts_cram_idx_t { - int fmt; - struct cram_fd *cram; -} hts_cram_idx_t; - - -// Entry point to hFILE_multipart backend. -struct hFILE *hopen_htsget_redirect(struct hFILE *hfile, const char *mode); - -struct hts_path_itr { - kstring_t path, entry; - void *dirv; // DIR * privately - const char *pathdir, *prefix, *suffix; - size_t prefix_len, suffix_len, entry_dir_l; -}; - -void hts_path_itr_setup(struct hts_path_itr *itr, const char *path, - const char *builtin_path, const char *prefix, size_t prefix_len, - const char *suffix, size_t suffix_len); - -const char *hts_path_itr_next(struct hts_path_itr *itr); - -typedef void plugin_void_func(void); -plugin_void_func *load_plugin(void **pluginp, const char *filename, const char *symbol); -void *plugin_sym(void *plugin, const char *name, const char **errmsg); -plugin_void_func *plugin_func(void *plugin, const char *name, const char **errmsg); -void close_plugin(void *plugin); -const char *hts_plugin_path(void); - -/* - * Buffers up arguments to hts_idx_push for later use, once we've written all bar - * this block. This is necessary when multiple blocks are in flight (threading). - * - * Returns 0 on success, - * -1 on failure - */ -int bgzf_idx_push(BGZF *fp, hts_idx_t *hidx, int tid, hts_pos_t beg, hts_pos_t end, uint64_t offset, int is_mapped); - -/* - * bgzf analogue to hts_idx_amend_last. - * - * This is needed when multi-threading and writing indices on the fly. - * At the point of writing a record we know the virtual offset for start - * and end, but that end virtual offset may be the end of the current - * block. In standard indexing our end virtual offset becomes the start - * of the next block. Thus to ensure bit for bit compatibility we - * detect this boundary case and fix it up here. - */ -void bgzf_idx_amend_last(BGZF *fp, hts_idx_t *hidx, uint64_t offset); - -static inline int find_file_extension(const char *fn, char ext_out[static HTS_MAX_EXT_LEN]) -{ - const char *delim = fn ? strstr(fn, HTS_IDX_DELIM) : NULL, *ext; - if (!fn) return -1; - if (!delim) delim = fn + strlen(fn); - for (ext = delim; ext > fn && *ext != '.' && *ext != '/'; --ext) {} - if (*ext == '.' && - ((delim - ext == 3 && ext[1] == 'g' && ext[2] == 'z') || // permit .sam.gz as a valid file extension - (delim - ext == 4 && ext[1] == 'b' && ext[2] == 'g' && ext[3] == 'z'))) // permit .vcf.bgz as a valid file extension - { - for (ext--; ext > fn && *ext != '.' && *ext != '/'; --ext) {} - } - if (*ext != '.' || delim - ext > HTS_MAX_EXT_LEN || delim - ext < 3) - return -1; - memcpy(ext_out, ext + 1, delim - ext - 1); - ext_out[delim - ext - 1] = '\0'; - return 0; -} - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.19.1/hts_probe_cc.sh b/src/htslib-1.19.1/hts_probe_cc.sh deleted file mode 100755 index 48d0159..0000000 --- a/src/htslib-1.19.1/hts_probe_cc.sh +++ /dev/null @@ -1,114 +0,0 @@ -#!/bin/sh - -# Check compiler options for non-configure builds and create Makefile fragment -# -# Copyright (C) 2022-2023 Genome Research Ltd. -# -# Author: Rob Davies -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -# Arguments are: -# 1. C compiler command -# 2. Initial CFLAGS -# 3. LDFLAGS - -CC=$1 -CFLAGS=$2 -LDFLAGS=$3 - -# Try running the compiler. Uses the same contest.* names as -# configure for temporary files. -run_compiler () -{ - $CC $CFLAGS $1 $LDFLAGS -o conftest conftest.c 2> conftest.err - retval=$? - rm -f conftest.err conftest - return $retval -} - -# Run a test. $1 is the flag to try, $2 is the Makefile variable to set -# with the flag probe result, $3 is a Makefile variable which will be -# set to 1 if the code was built successfully. The code to test should -# be passed in via fd 0. -# First try compiling conftest.c without the flag. If that fails, try -# again with it to see if the flag is needed. -run_test () -{ - rm -f conftest conftest.err conftest.c - cat - > conftest.c - if run_compiler ; then - echo "$2 =" - echo "$3 = 1" - elif run_compiler "$1" ; then - echo "$2 = $1" - echo "$3 = 1" - else - echo "$3 =" - fi -} - -echo "# Compiler probe results, generated by $0" - -# Check for sse4.1 etc. support -run_test "-msse4.1 -mpopcnt -mssse3" HTS_CFLAGS_SSE4 HTS_BUILD_SSE4 <<'EOF' -#ifdef __x86_64__ -#include "x86intrin.h" -int main(int argc, char **argv) { - __m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1); - __m128i c = _mm_shuffle_epi8(_mm_max_epu32(a, b), b); - return _mm_popcnt_u32(*((char *) &c)); -} -#else -int main(int argc, char **argv) { return 0; } -#endif -EOF - -# Check for avx2 - -run_test "-mavx2 -mpopcnt" HTS_CFLAGS_AVX2 HTS_BUILD_AVX2 <<'EOF' -#ifdef __x86_64__ -#include "x86intrin.h" -int main(int argc, char **argv) { - __m256i a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); - __m256i b = _mm256_add_epi32(a, a); - long long c = _mm256_extract_epi64(b, 0); - return _mm_popcnt_u32((int) c); -} -#else -int main(int argc, char **argv) { return 0; } -#endif -EOF - -# Check for avx512 - -run_test "-mavx512f -mpopcnt" HTS_CFLAGS_AVX512 HTS_BUILD_AVX512 <<'EOF' -#ifdef __x86_64__ -#include "x86intrin.h" -int main(int argc, char **argv) { - __m512i a = _mm512_set1_epi32(1); - __m512i b = _mm512_add_epi32(a, a); - return _mm_popcnt_u32(*((char *) &b)); -} -#else -int main(int argc, char **argv) { return 0; } -#endif -EOF - -rm -f conftest.c diff --git a/src/htslib-1.19.1/hts_time_funcs.h b/src/htslib-1.19.1/hts_time_funcs.h deleted file mode 100644 index 2a05084..0000000 --- a/src/htslib-1.19.1/hts_time_funcs.h +++ /dev/null @@ -1,170 +0,0 @@ -/* hts_time_funcs.h -- Implementations of non-standard time functions - - Copyright (C) 2022 Genome Research Ltd. - - Author: Rob Davies - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -/* - This mainly exists because timegm() is not a standard function, and so - Cannot be used in portable code. Unfortunately the standard one (mktime) - always takes the local timezone into accout so doing a UTC conversion - with it involves changing the TZ environment variable, which is rather - messy and not likely to go well with threaded code. - - The code here is a much simplified version of the BSD timegm() implementation. - It currently rejects dates before 1970, avoiding problems with -ve time_t. - It also works strictly in UTC, so doesn't have to worry about tm_isdst - which makes the calculation much easier. - - Some of this is derived from BSD sources, for example - https://github.com/NetBSD/src/blob/trunk/lib/libc/time/localtime.c - which state: - - ** This file is in the public domain, so clarified as of - ** 1996-06-05 by Arthur David Olson. - - Non-derived code is copyright as above. -*/ - -#include -#include -#include -#include - -static inline int hts_time_normalise(int *tens, int *units, int base) { - if (*units < 0 || *units >= base) { - int delta = *units >= 0 ? *units / base : (-1 - (-1 - *units) / base); - int64_t tmp = (int64_t) (*tens) + delta; - if (tmp < INT_MIN || tmp > INT_MAX) return 1; - *tens = tmp; - *units -= delta * base; - } - return 0; -} - -static inline int hts_year_is_leap(int64_t year) { - return ((year % 4 == 0) && (year % 100 != 0)) || (year % 400 == 0); -} - -// Number of leap years to start of year -// Only works for year >= 1. -static inline int64_t hts_leaps_to_year_start(int64_t year) { - --year; - return year / 4 - year / 100 + year / 400; -} - -static inline int hts_time_normalise_tm(struct tm *t) -{ - const int days_per_mon[2][12] = { - { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }, - { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 } - }; - const int year_days[2] = { 365, 366 }; - int overflow = 0; - int64_t year; - - if (t->tm_sec > 62) { - overflow |= hts_time_normalise(&t->tm_min, &t->tm_sec, 60); - } - overflow |= hts_time_normalise(&t->tm_hour, &t->tm_min, 60); - overflow |= hts_time_normalise(&t->tm_mday, &t->tm_hour, 24); - overflow |= hts_time_normalise(&t->tm_year, &t->tm_mon, 12); - if (overflow) - return 1; - - year = (int64_t) t->tm_year + 1900LL; - while (t->tm_mday <= 0) { - --year; - t->tm_mday += year_days[hts_year_is_leap(year + (1 < t->tm_mon))]; - } - while (t->tm_mday > 366) { - t->tm_mday -= year_days[hts_year_is_leap(year + (1 < t->tm_mon))]; - ++year; - } - for (;;) { - int mdays = days_per_mon[hts_year_is_leap(year)][t->tm_mon]; - if (t->tm_mday <= mdays) - break; - t->tm_mday -= mdays; - t->tm_mon++; - if (t->tm_mon >= 12) { - year++; - t->tm_mon = 0; - } - } - year -= 1900; - if (year != t->tm_year) { - if (year < INT_MIN || year > INT_MAX) - return 1; - t->tm_year = year; - } - return 0; -} - -/** - * Convert broken-down time to an equivalent time_t value - * @param target Target broken-down time structure - * @return Equivalent time_t value on success; -1 on failure - * - * This function first normalises the time in @p target so that the - * structure members are in the valid range. It then calculates the - * number of seconds (ignoring leap seconds) between midnight Jan 1st 1970 - * and the target date. - * - * If @p target is outside the range that can be represented in a time_t, - * or tm_year is less than 70 (which would return a negative value) then - * it returns -1 and sets errno to EOVERFLOW. - */ - -static inline time_t hts_time_gm(struct tm *target) -{ - int month_start[2][12] = { - { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 }, - { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335 } - }; - int years_from_epoch, leaps, days; - int64_t secs; - - if (hts_time_normalise_tm(target) != 0) - goto overflow; - - if (target->tm_year < 70) - goto overflow; - - years_from_epoch = target->tm_year - 70; - leaps = (hts_leaps_to_year_start(target->tm_year + 1900) - - hts_leaps_to_year_start(1970)); - days = ((365 * (years_from_epoch - leaps) + 366 * leaps) - + month_start[hts_year_is_leap(target->tm_year + 1900)][target->tm_mon] - + target->tm_mday - 1); - secs = ((int64_t) days * 86400LL - + target->tm_hour * 3600 - + target->tm_min * 60 - + target->tm_sec); - if (sizeof(time_t) < 8 && secs > INT_MAX) - goto overflow; - - return (time_t) secs; - - overflow: - errno = EOVERFLOW; - return (time_t) -1; -} diff --git a/src/htslib-1.19.1/htscodecs.mk b/src/htslib-1.19.1/htscodecs.mk deleted file mode 120000 index 2a91c26..0000000 --- a/src/htslib-1.19.1/htscodecs.mk +++ /dev/null @@ -1 +0,0 @@ -htscodecs_bundled.mk \ No newline at end of file diff --git a/src/htslib-1.19.1/htscodecs/BENCHMARKS.md b/src/htslib-1.19.1/htscodecs/BENCHMARKS.md deleted file mode 100644 index 18d5765..0000000 --- a/src/htslib-1.19.1/htscodecs/BENCHMARKS.md +++ /dev/null @@ -1,146 +0,0 @@ --c option species decode method XX encode method YY where - - 00 is scalar - 01 is SSE4 - 02 is AVX2 - 04 is AVX512 - -Input data is 10MB worth of NovaSeq quality values; approx 100k -records. Performance is data specific, so these figures are purely a -snapshot and not indicative of all data types. The test machine -reports as: - - Intel(R) Xeon(R) Gold 6142 CPU @ 2.60GHz - -The -o field is a bit field where. - - 0/1 Order 0 or 1 - 4 32-way variant (permits SIMD) - 64 RLE - 128 Bit packing (4 novaseq quals to a byte) - -Hence -o133-c0202 is pack 4 quals to a bit and order-1 encode with AVX2 -32-way encode/decode. - - - r4x8-o0 10000000 uncomp, 665848 comp 395.3 enc MB/s 718.3 dec MB/s - r4x16-o0 10000000 uncomp, 665415 comp 400.3 enc MB/s 716.1 dec MB/s - arith-o0 10000000 uncomp, 660701 comp 105.0 enc MB/s 86.3 dec MB/s - - r4x8-o1 10000000 uncomp, 615304 comp 274.8 enc MB/s 385.1 dec MB/s - r4x16-o1 10000000 uncomp, 616134 comp 289.6 enc MB/s 536.6 dec MB/s - arith-o1 10000000 uncomp, 613736 comp 75.4 enc MB/s 87.1 dec MB/s - - r4x16-o64 10000000 uncomp, 712335 comp 382.0 enc MB/s 749.2 dec MB/s - arith-o64 10000000 uncomp, 744000 comp 153.1 enc MB/s 112.2 dec MB/s - - r4x16-o65 10000000 uncomp, 591457 comp 360.6 enc MB/s 705.8 dec MB/s - arith-o65 10000000 uncomp, 585233 comp 161.3 enc MB/s 117.7 dec MB/s - - r4x16-o128 10000000 uncomp, 615915 comp 780.2 enc MB/s 2092.5 dec MB/s - arith-o128 10000000 uncomp, 609977 comp 257.4 enc MB/s 219.0 dec MB/s - - r4x16-o129 10000000 uncomp, 553081 comp 645.1 enc MB/s 1394.1 dec MB/s - arith-o129 10000000 uncomp, 550377 comp 165.1 enc MB/s 180.7 dec MB/s - - r4x16-o192 10000000 uncomp, 621771 comp 513.1 enc MB/s 1003.0 dec MB/s - arith-o192 10000000 uncomp, 621415 comp 217.9 enc MB/s 180.8 dec MB/s - - r4x16-o193 10000000 uncomp, 550325 comp 474.1 enc MB/s 920.6 dec MB/s - arith-o193 10000000 uncomp, 543687 comp 195.7 enc MB/s 158.0 dec MB/s - - r32x16-o4-c0000 10000000 uncomp, 665501 comp 399.0 enc MB/s 613.9 dec MB/s - r32x16-o4-c0101 10000000 uncomp, 665501 comp 402.1 enc MB/s 968.0 dec MB/s - r32x16-o4-c0202 10000000 uncomp, 665501 comp 690.8 enc MB/s 1796.0 dec MB/s - r32x16-o4-c0404 10000000 uncomp, 665501 comp 866.9 enc MB/s 2098.6 dec MB/s - - r32x16-o5-c0000 10000000 uncomp, 616223 comp 274.6 enc MB/s 426.5 dec MB/s - r32x16-o5-c0101 10000000 uncomp, 616223 comp 274.1 enc MB/s 626.8 dec MB/s - r32x16-o5-c0202 10000000 uncomp, 616223 comp 391.8 enc MB/s 1472.8 dec MB/s - r32x16-o5-c0404 10000000 uncomp, 616223 comp 563.5 enc MB/s 1673.9 dec MB/s - - r32x16-o68-c0000 10000000 uncomp, 712513 comp 363.8 enc MB/s 717.4 dec MB/s - r32x16-o68-c0101 10000000 uncomp, 712513 comp 384.7 enc MB/s 836.5 dec MB/s - r32x16-o68-c0202 10000000 uncomp, 712513 comp 438.8 enc MB/s 913.6 dec MB/s - r32x16-o68-c0404 10000000 uncomp, 712513 comp 450.8 enc MB/s 918.0 dec MB/s - - r32x16-o69-c0000 10000000 uncomp, 591639 comp 369.7 enc MB/s 684.2 dec MB/s - r32x16-o69-c0101 10000000 uncomp, 591639 comp 370.2 enc MB/s 780.1 dec MB/s - r32x16-o69-c0202 10000000 uncomp, 591639 comp 408.5 enc MB/s 894.9 dec MB/s - r32x16-o69-c0404 10000000 uncomp, 591639 comp 431.6 enc MB/s 906.5 dec MB/s - - r32x16-o132-c0000 10000000 uncomp, 615999 comp 659.2 enc MB/s 1861.9 dec MB/s - r32x16-o132-c0101 10000000 uncomp, 615999 comp 660.0 enc MB/s 2580.6 dec MB/s - r32x16-o132-c0202 10000000 uncomp, 615999 comp 971.6 enc MB/s 3679.2 dec MB/s - r32x16-o132-c0404 10000000 uncomp, 615999 comp 1050.6 enc MB/s 3947.9 dec MB/s - - r32x16-o133-c0000 10000000 uncomp, 553181 comp 573.2 enc MB/s 848.8 dec MB/s - r32x16-o133-c0101 10000000 uncomp, 553181 comp 566.3 enc MB/s 1517.0 dec MB/s - r32x16-o133-c0202 10000000 uncomp, 553181 comp 759.1 enc MB/s 1923.8 dec MB/s - r32x16-o133-c0404 10000000 uncomp, 553181 comp 914.4 enc MB/s 1981.4 dec MB/s - - r32x16-o194-c0000 10000000 uncomp, 621771 comp 558.0 enc MB/s 1085.0 dec MB/s - r32x16-o194-c0101 10000000 uncomp, 621771 comp 559.2 enc MB/s 1088.6 dec MB/s - r32x16-o194-c0202 10000000 uncomp, 621771 comp 552.9 enc MB/s 1091.2 dec MB/s - r32x16-o194-c0404 10000000 uncomp, 621771 comp 550.1 enc MB/s 1070.3 dec MB/s - - r32x16-o197-c0000 10000000 uncomp, 550497 comp 484.2 enc MB/s 791.8 dec MB/s - r32x16-o197-c0101 10000000 uncomp, 550497 comp 487.2 enc MB/s 1004.4 dec MB/s - r32x16-o197-c0202 10000000 uncomp, 550497 comp 488.0 enc MB/s 1033.9 dec MB/s - r32x16-o197-c0404 10000000 uncomp, 550497 comp 502.0 enc MB/s 1027.6 dec MB/s - -For completeness, a couple other tools are also shown below. Note -fqzcomp here is slightly smaller as it has been trimmed to end on a -whole line. - - fqzcomp -s1 9999975 uncomp, 494485 comp 27.4 enc MB/s 27.1 dec MB/s - - bsc -m3e1tT 10000000 uncomp, 553958 comp 43.7 enc MB/s 31.6 dec MB/s - bsc -m0e2tT 10000000 uncomp, 531536 comp 19.0 enc MB/s 25.5 dec MB/s - ------------------------------------------------------------------------------ - -10MB worth of Illumina HiSeq data with 40 distinct quality values. -Note this sequencing run had a few erratic cycles, leading to -unusually good performance from fqzcomp. The bit-packing modes of -rANS are not relevant (nor shown) here due to the cardinality of the -data. - - r4x8-o0 10000000 uncomp, 5092977 comp 303.9 enc MB/s 553.3 dec MB/s - r4x16-o0 10000000 uncomp, 5092608 comp 357.4 enc MB/s 579.8 dec MB/s - arith-o0 10000000 uncomp, 5079029 comp 51.9 enc MB/s 33.1 dec MB/s - - r4x8-o1 10000000 uncomp, 4911113 comp 278.1 enc MB/s 356.4 dec MB/s - r4x16-o1 10000000 uncomp, 4918609 comp 290.5 enc MB/s 542.4 dec MB/s - arith-o1 10000000 uncomp, 4911347 comp 42.1 enc MB/s 32.3 dec MB/s - - r4x16-o64 10000000 uncomp, 5092608 comp 215.5 enc MB/s 782.7 dec MB/s - arith-o64 10000000 uncomp, 5194241 comp 36.8 enc MB/s 26.6 dec MB/s - - r4x16-o65 10000000 uncomp, 4918609 comp 167.0 enc MB/s 484.0 dec MB/s - arith-o65 10000000 uncomp, 4909925 comp 33.4 enc MB/s 23.8 dec MB/s - - r32x16-o4-c0000 10000000 uncomp, 5092684 comp 367.2 enc MB/s 642.1 dec MB/s - r32x16-o4-c0101 10000000 uncomp, 5092684 comp 340.7 enc MB/s 1005.1 dec MB/s - r32x16-o4-c0202 10000000 uncomp, 5092684 comp 666.8 enc MB/s 1777.5 dec MB/s - r32x16-o4-c0404 10000000 uncomp, 5092684 comp 827.0 enc MB/s 2158.9 dec MB/s - - r32x16-o5-c0000 10000000 uncomp, 4918685 comp 273.9 enc MB/s 391.5 dec MB/s - r32x16-o5-c0101 10000000 uncomp, 4918685 comp 268.5 enc MB/s 524.0 dec MB/s - r32x16-o5-c0202 10000000 uncomp, 4918685 comp 396.0 enc MB/s 1218.2 dec MB/s - r32x16-o5-c0404 10000000 uncomp, 4918685 comp 553.4 enc MB/s 1418.4 dec MB/s - - r32x16-o68-c0000 10000000 uncomp, 5092684 comp 216.3 enc MB/s 646.6 dec MB/s - r32x16-o68-c0101 10000000 uncomp, 5092684 comp 235.2 enc MB/s 1016.3 dec MB/s - r32x16-o68-c0202 10000000 uncomp, 5092684 comp 336.4 enc MB/s 1804.4 dec MB/s - r32x16-o68-c0404 10000000 uncomp, 5092684 comp 376.5 enc MB/s 2162.2 dec MB/s - - r32x16-o69-c0000 10000000 uncomp, 4918685 comp 194.3 enc MB/s 390.1 dec MB/s - r32x16-o69-c0101 10000000 uncomp, 4918685 comp 195.3 enc MB/s 593.4 dec MB/s - r32x16-o69-c0202 10000000 uncomp, 4918685 comp 251.6 enc MB/s 1212.7 dec MB/s - r32x16-o69-c0404 10000000 uncomp, 4918685 comp 306.3 enc MB/s 1415.6 dec MB/s - - fqzcomp -s1 10000000 uncomp, 3196746 comp 16.6 enc MB/s 16.0 dec MB/s - - bsc -m3e1tT 10000000 uncomp, 4762846 comp 12.9 enc MB/s 17.5 dec MB/s - bsc -m0e2tT 10000000 uncomp, 4477056 comp 6.1 enc MB/s 8.8 dec MB/s diff --git a/src/htslib-1.19.1/htscodecs/LICENSE.md b/src/htslib-1.19.1/htscodecs/LICENSE.md deleted file mode 100644 index 14d3778..0000000 --- a/src/htslib-1.19.1/htscodecs/LICENSE.md +++ /dev/null @@ -1,45 +0,0 @@ -All files except those explicitly listed below are copyright Genome -Research Limited and are made available under the BSD license. - -> Redistribution and use in source and binary forms, with or without -> modification, are permitted provided that the following conditions -> are met: -> -> (1) Redistributions of source code must retain the above copyright -> notice, this list of conditions and the following disclaimer. -> -> (2) Redistributions in binary form must reproduce the above copyright -> notice, this list of conditions and the following disclaimer in -> the documentation and/or other materials provided with the distribution. -> -> (3)The name of the author may not be used to endorse or promote -> products derived from this software without specific prior written -> permission. -> -> THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -> IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -> WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -> DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, -> INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -> (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -> SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -> HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -> STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -> IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -> POSSIBILITY OF SUCH DAMAGE. - -c_range_coder.h is Public Domain, derived from work by Eugene -Shelwien. - -rANS_byte.h and rANS_word.h are derived from Fabien Giesen's work and -is Public Domain. https://github.com/rygorous/ryg_rans This work was -in turn based on the ANS family of entropy encoders as described by -Jarek Duda's paper: http://arxiv.org/abs/1311.2540 - -> To the extent possible under law, Fabian Giesen has waived all -> copyright and related or neighboring rights to ryg_rans, as -> per the terms of the CC0 license: -> -> https://creativecommons.org/publicdomain/zero/1.0 -> -> This work is published from the United States. diff --git a/src/htslib-1.19.1/htscodecs/MAINTAINERS.md b/src/htslib-1.19.1/htscodecs/MAINTAINERS.md deleted file mode 100644 index 7ec161b..0000000 --- a/src/htslib-1.19.1/htscodecs/MAINTAINERS.md +++ /dev/null @@ -1,55 +0,0 @@ -Notes to maintainers for building releases. -This is best done as a release PR so we can check it first. - -1. Places to update the version number include: - - - htscodecs/htscodecs.h (used for program introspection) - - - configure.ac AC_INIT macro - - - configure.ac VERS_CURRENT, VERS_REVISION and VERS_AGE variables. - See the long comment above for instructions of how these change. - - - NEWS files. - - -2. Ensure NEWS and README files are up to date. NEWS is a git log - summary. README likely doesn't change unless something major needs - mentioning. - - - At time of merging, set the date at the top of NEWS. - - -3. Test it all. - - Push to github PR so the CI can validate for us. - - - make distcheck - This also makes the tarball htscodecs-${vers}.tar.gz. - - -4. Merge into master - - -5. Add an annotated tag with minimal message, eg: - - - git tag -a v1.1 -m v1.1 - - -6. Push master and --tags upstream to github - - -7. Make a new release on github. - - - Title: "htscodecs ${vers}" - - - Message: this is just a copy of NEWS. - It's already in Markdown format, but double check the preview panel. - - - Upload the tarball produced from distcheck to the assets. - - -8. Finally, consider updating any packages that use this as a - submodule to ensure they have the latest tagged release. - - This will invariably help OS distributions keep their package - dependencies neatly in sync. diff --git a/src/htslib-1.19.1/htscodecs/NEWS.md b/src/htslib-1.19.1/htscodecs/NEWS.md deleted file mode 100644 index ab1c0db..0000000 --- a/src/htslib-1.19.1/htscodecs/NEWS.md +++ /dev/null @@ -1,409 +0,0 @@ -Release 1.6.0: 7th December 2023 --------------------------------- - -This release is primarily bug fixes, mostly spotted through improved fuzz -testing. - -One big change however is the SIMD rANS codecs are now performant on Intel -CPUs with the DownFall mitigation microcode applied. - - -Changes - -- Replaced the rANS codec SIMD gathers with simulated gathers via scalar - memory fetches. This helps AMD Zen4, but importantly it also fixes a - disastrous performance regression caused by Intel's DownFall microcode fix. - - There is an impact on pre-DownFall speeds, but we should focus on patched - CPUs as a priority. - -- A small speed up to the rans_F_to_s3 function used by order-0 rans decode. - -- Small speed up to SIMD rans32x16 order-1 encoder by reducing cache misses. - Also sped up the rans4x8 order-1 encoder, particularly on AMD Zen4. - -- Now supports building with "zig cc" - (Issue #109, reported by David Jackson) - - -Bug fixes - -- Improve robustness of name tokeniser when given non 7-bit ASCII and on - machines where "char" defaults to unsigned. - (Issue #105, reported by Shubham Chandak) - -- Also fixed a 1 byte buffer read-overrun in name tokeniser. - -- Fix name tokeniser encoder failure with some duplicated streams. - -- Fixed rans_set_cpu to work multiple times, as well as reinstating the - ability to change decode and encode side independently (accidentally lost in - commit 958032c). No effect on usage, but it improves the test coverage. - -- Added a round-trip fuzz tester to test the ability to encode. The old fuzz - testing was decode streams only. - -- Fixed bounds checking in rans_uncompress_O0_32x16_avx2, fixing buffer read - overruns. - -- Removed undefined behaviour in transpose_and_copy(), fixing zig cc builds. - - -Release 1.5.2: 6th October 2023 -------------------------------- - -*** SECURITY FIXES *** - -This release contains multiple bug fixes, including a couple -buffer overruns that could corrupt memory when used in specific -scenarios. These have not been observed with real data, but could -represent an attack vector for a malicious user. (We know of no -exploit.) - - -Changes - -- The range coder has been extended to do bounds checking if the - new RC_SetOutputEnd() is called. This has a small performance hit - for the encoder, depending on compiler, but tests showed within 10% - at worst. - -Bug fixes - -- Fix write-buffer overruns in fqzcomp and name tokeniser. - - SECURITY ISSUE: FQZComp could overflow the computed maximum growth - size, causing writes beyond the ends of the allocated memory. This - is triggered by many very small 1bp reads. Fixed the maximum - bounds for compressed data. - - SECURITY ISSUE: The name tokeniser using the maximum number of - tokens (128) would erroneously write a 129th token. This is a - restricted overflow of a few bytes. - - (PR#97, reported by Shubham Chandak) - -- Fix an maximum 8-byte read overflow in the AVX2 rans decoder. - SECURITY ISSUE: This was only present when using gcc. - (PR#100, reported by Rob Davies) - -- The rANS Order-1 SSE4 decoder could decode incorrectly. - When a single symbol only occurs and we're using 12-bit freqs, the - frequency of 4096 was interpreted as freq 0. This only happens in - the non-SIMD tidy-up stage at the end of the decode, so at worst the - final 31 bytes may be incorrect. (PR#102) - -- Fixed a 1-byte heap read-buffer overflow. Existed since 6a87ead2 - (Oct 2021). Low severity security due to size and high likelihood - it's just malloc meta-data. (PR#95; OSS-Fuzz 62270) - -- rans_compress_4x16 now works on zero length input. - Previously this was giving divide-by-zero errors. - (PR#101, reported by Shubham Chandak) - -- Remove asserts which caused warnings about unused variables when - building with -DNDEBUG. - -- Fix ARM builds when HWCAP_ASIMD is missing (on Conda) (PR#91) - -- Improve FreeBSD CI testing - -- Fix undefined behaviour from signed bit-shifting (PR#90). - - -Release 1.5.1: 19th July 2023 ------------------------------ - -This release is mainly small updates and bug fixes focusing on -specific platforms, with no new features added. - -Changes - -- Be more selective in use of AVX512 on AMD Zen4 processors. This can - be faster (e.g. with 64-way unrolling), but in the current rANS codec - implementations AVX2 is faster for certain operations (PR#85). - -- Add config.h to test programs to help them pick up definitions such - as XOPEN_SOURCE (PR#84) - -- Add FreeBSD to CI testing (PR#83) - -Bug fixes - -- Trivial bug fix to the rans4x16pr test harness when given - incompressible data (PR#86). - -- Make ARM NEON checks specific to AArch64 and exclude AArch32 systems. - (PR#82 to fix issue#81, reported by Robert Clausecker) - - -Release 1.5.0: 14th April 2023 ------------------------------- - -Changes - -- Significant speed ups to the fqzcomp codec via code restructuring - and use of memory prefetch instructions. Encode is 30-40% faster - and decode 5-8% faster. (PR#75 James Bonfield) - -- Improve multiarch builds on MacOS, fixing issues with getting the - various SIMD implementations integrated. (Issue#76 John Marshall, - PR#77/#78 Rob Davies) - -- Remove unused ax_with_libdeflate.m4 file from build system. - - -Release 1.4.0: Februrary 2023 ------------------------------ - -This is almost entirely minor bug fixing with a few small updates. - -Changes - -- Optimise compression / speed of the name tokeniser. - - In arithmetic coding mode, it can now utilise bzip2 at higher levels. - - For both rans / arith entropy encoders, the choice of method / order - is now optimised per token type, giving faster compression. - - Culled a pointless zlib check in the configure script. - - Made lack of bzip2 a hard failure in configure, unless an explicit - --disable-bz2 option is given. - (#72, #73) - -- Switch CI to use ARM for MacOS builds - (#69, thanks to Rob Davies) - - -Bug fixes - -- Remove some newer compiler warnings (#61) - -- Improvements for Intel -m32 builds, including better AVX2 validation - (m32 misses _mm256_extract_epi64) and improved data alignment. - (#62. See also samtools/htslib#1500) - -- Detect Neon capability at runtime via operating system APIs. - (#63, thanks to John Marshall) - -- Improve FreeBSD diagnostics when neglecting to use -lpthread / -lthr. - Plus additional extra error checking too. - (#68, #64, thanks to John Marshall) - -- Update hts_pack to operate in line with CRAMcodecs spec, where the - number of symbols > 16. - (#65/#66, reported by Michael Macias) - -- Fixed too-stringent buffer overflow checking in O1 rans decoder. - (#71, reported by Divon Lan) - - -Release 1.3.0: 9th August 2022 ------------------------------- - -The primary change in this release is a new SIMD enabled rANS codec. - -Changes - -- There is a 32-way unrolled rANS implementation. This is accessed - using the existing rans 4x16 API with the RANS_ORDER_X32 bit set. - Implementations exist for SSE4.1, AVX2, AVX512 and ARM Neon, as - well as traditional non-SIMD scalar code in C and JavaScript. See - the commit logs for benchmarks. - -- Improved memory allocation via a new htscodecs_tls_alloc function. - This uses Thread Local Storage (TLS) to avoid multiple malloc/free - calls, reducing system CPU time. - -- Some external functions have been renamed, with the old ones still - existing in a deprecated fashion. Every symbol should now start - hts_, rans_, arith_, fqz_ or tok3_*. - -- Improved test framework with an "entropy" tool that iterates over - all entropy encoders. - -- Updated the Appveyor CI image to user a newer gcc. Also added ARM - to the list of processors to test on. - -- Tab vs space code changes. Use "git diff -w" to see through these. - -- Reworked fuzzing infrastructure. - -- Small speed improvements to various rANS encoders and decoders. - These were tested on a broad range of compilers, versions and - systems. The new code may be slightly slower with some combinations, - but is faster overall and removes a few outliers with considerably - degraded performance. - -- Substantial memory reduction to the name tokeniser (tok3). - -Bug fixes - -- Fixed undefined behaviour in our use of _builtin_clz(). - -- Fixed a few redundant #includes. - -- Work around strict aliasing bugs, uncovered with gcc -O2. - -- Fixed an issue with encoding data blocks close to 2GB in size. - (Additionally blocks above 2GB now error, rather than crashing or - returning incorrect results.) - -- Fix encode error with large blocks using RANS_ORDER_STRIPE. - - -Release 1.2.2: 1st April 2022 ------------------------------ - -This release contains some fixes found during fuzzing with Clang's -memory-sanitizer. None of these are involving writing memory so there -is no possibility for code execution vulnerabilities. However some do -could access uninitialised elements in locally allocated memory, which -could leak private data if the library was used in conjunction with -other tools which don't zero sensitive data before freeing. - -Bug fixes: - -- The name tokeniser now validates the stored length in the data - stream matches the actual decoded length. Discovered by Taotao Gu. - -- Fixed an endless loop in arith_dynamic and rans4x16pr involving - X_STRIPE with 0 stripes. - -- Avoid a harmless (and wrong?) undefined behaviour sanitizer error - when calling memcpy(ptr, NULL, 0) in the name tokeniser. - -- Fixed possible uninitialised memory access in - rans_uncompress_O1_4x16. If the frequency table didn't add up to - the correct amount, parts of the "fb" table were left unpopulated. - It was then possible to use these array elements in some of the rANS - calculations. - -- Similarly rans_uncompress_O0 could access an uninitialised element - 4095 of the decoder tables if the frequencies summed to 4095 instead - of the expected 4096. - -- Improved error detection from fqzcomp's read_array function. - -- Reject fqzcomp parameters with inconsistent "sel" parameters, which - could lead to uninitialised access to the model.sel range coder. - - -Release 1.2.1: 15th February 2022 ---------------------------------- - -The only change in this release is a minor adjustment to the histogram -code so it works on systems with small stacks. This was detected on -Windows Mingw builds. - - -Release 1.2: 10th February 2022 -------------------------------- - -This release contains the following minor changes. -Please see the "git log" for the full details. - -Improvements / changes: - -- Speed up of rANS4x16 order-0. We now use a branchless encoder - renormalisation step. For complex data it's between 13 and 50% - speed up depending on compiler. - -- Improve rANS4x16 compute_shift estimates. The entropy calculation - is now more accurate. This leads to more frequent use of the 10-bit - frequency mode, at an expense of up to 1% size growth. - -- Speed improvements to the striped rANS mode, both encoding and - decoding. Encoder gains ~8% and decoder ~5%, but varies - considerably by compiler and data. - -- Added new var_put_u64_safe and var_put_u32_safe interfaces. - These are automatically used by var_put_u64 and var_put_u32 when - near the end of the buffer, but may also be called directly. - -- Small speed ups to the hist8 and hist1_4 functions. - -- Minor speed up to RLE decoding. - -Bug fixes: - -- Work around an icc-2021 compiler bug, but also speed up the varint - encoding too (#29). - -- Fix an off-by-one error in the initial size check in arith_dynamic. - This meant the very smallest of blocks could fail to decode. - Reported by Divon Lan. - -- Fixed hist1_4 to also count the last byte when computing T0[]. - -- Fixed overly harsh bounds checking in the fqzcomp read_array - function, which meant it failed to decode some configurations. - - -Release 1.1.1: 6th July 2021 ----------------------------- - -This release contains the following minor changes. -Please see the "git log" for the full details. - -Improvements / changes: - -- Modernised autoconf usage to avoid warnings with newer versions. - (John Marshall) - -- Avoid using awk with large records, due to some systems - (e.g. Solaris / OpenIndiana) with line length limits . - (John Marshall) - -- Applied Debian patch to make the library link against -lm. - -Bug fixes: - -- Fixed an issue with the name tokeniser when a slice (name_context) - has exactly 1 more name than the previous call. (James Bonfield) - -- Removed access to an uninitialised variable in the name tokeniser - decode when given malformed data. This occurs when we use delta - encoding for the very first name. (James Bonfield, OSS-Fuzz) - -- Minor fixes to distcheck and distclean targets - - -Release 1.0: 23rd Feb 2021 --------------------------- - -This marks the first non-beta release of htscodecs, following a -perioid of integration with Htslib and automated fuzzing by Google's -OSS-Fuzz program. - -[Note this testing only applies to the C implementation. The -JavaScript code should still be considered as examples of the codecs, -more for purposes of understanding and clarity than as a fully -optimised and tested release.] - -Since the last release (0.5) the key changes are: - -- Improved support for big endian platforms - -- Speed improvements to CRAM 3.0 4x8 rANS order-1 encoding. - It's between 10 and 50% faster at encoding, based on input data. - -- Improved autoconf bzip2 checks and tidy up "make test" output. - -- Added some more files into "make install", so that "make distcheck" - now passes. - -- Replaced Travis with Cirrus-CI testing. - -- Removed various C undefined behaviour, such as left shifting of - negative values and integer overflows. As far as we know these were - currently harmless on the supported platforms, but may break future - compiler optimisations. - -- Fixed numerous OSS-Fuzz identified flaws. Some of these were - potential security issues such as small buffer overruns. - -- Tidied up some code to prevent warnings. - -- The name tokeniser now has a limit on the size of data it can encode - (10 million records). This may still be too high given the memory - it will require, so it may be reduced again. - diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/arith_dynamic.h b/src/htslib-1.19.1/htscodecs/htscodecs/arith_dynamic.h deleted file mode 100644 index 2ae2033..0000000 --- a/src/htslib-1.19.1/htscodecs/htscodecs/arith_dynamic.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2019 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef ARITH_DYNAMIC_H -#define ARITH_DYNAMIC_H - -#ifdef __cplusplus -extern "C" { -#endif - -unsigned char *arith_compress(unsigned char *in, unsigned int in_size, - unsigned int *out_size, int order); - -unsigned char *arith_uncompress(unsigned char *in, unsigned int in_size, - unsigned int *out_size); - -unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size, - unsigned char *out, unsigned int *out_size, - int order); - -unsigned char *arith_uncompress_to(unsigned char *in, unsigned int in_size, - unsigned char *out, unsigned int *out_sz); - -unsigned int arith_compress_bound(unsigned int size, int order); - -#ifdef __cplusplus -} -#endif - -#endif /* ARITH_DYNAMIC_H */ diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/c_simple_model.h b/src/htslib-1.19.1/htscodecs/htscodecs/c_simple_model.h deleted file mode 100644 index 0c81430..0000000 --- a/src/htslib-1.19.1/htscodecs/htscodecs/c_simple_model.h +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (c) 2012, 2018-2019 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include "c_range_coder.h" - -/* - *-------------------------------------------------------------------------- - * A simple frequency model. - * - * Define NSYM to be an integer value before including this file. - * It will then generate types and functions specific to that - * maximum number of symbols. - * - * This keeps a list of symbols and their frequencies, approximately - * sorted by symbol frequency. We allow for a single symbol to periodically - * move up the list when emitted, effectively doing a single step of - * bubble sort periodically. This means it's largely the same complexity - * irrespective of alphabet size. - * It's more efficient on strongly biased distributions than random data. - * - * There is no escape symbol, so the model is tailored to relatively - * stationary samples (although we do have occasional normalisation to - * avoid frequency counters getting too high). - *-------------------------------------------------------------------------- - */ - -//----------------------------------------------------------------------------- -// Bits we want included once only - constants, types, etc -#ifndef C_SIMPLE_MODEL_H -#define C_SIMPLE_MODEL_H - -#define MAX_FREQ (1<<16)-17 -#define PASTE3(a,b,c) a##b##c -#define SIMPLE_MODEL(a,b) PASTE3(SIMPLE_MODEL,a,b) -#define STEP 16 -typedef struct { - uint16_t Freq; - uint16_t Symbol; -} SymFreqs; -#endif /* C_SIMPLE_MODEL_H */ - - -//----------------------------------------------------------------------------- -// Bits we regenerate for each NSYM value. - -typedef struct { - uint32_t TotFreq; // Total frequency - - // Array of Symbols approximately sorted by Freq. - SymFreqs sentinel, F[NSYM+1], terminal; -} SIMPLE_MODEL(NSYM,_); - - -static inline void SIMPLE_MODEL(NSYM,_init)(SIMPLE_MODEL(NSYM,_) *m, int max_sym) { - int i; - - for (i=0; iF[i].Symbol = i; - m->F[i].Freq = 1; - } - for (; iF[i].Symbol = i; - m->F[i].Freq = 0; - } - - m->TotFreq = max_sym; - m->sentinel.Symbol = 0; - m->sentinel.Freq = MAX_FREQ; // Always first; simplifies sorting. - m->terminal.Symbol = 0; - m->terminal.Freq = MAX_FREQ; - m->F[NSYM].Freq = 0; // terminates normalize() loop. See below. -} - - -static inline void SIMPLE_MODEL(NSYM,_normalize)(SIMPLE_MODEL(NSYM,_) *m) { - SymFreqs *s; - - /* Faster than F[i].Freq for 0 <= i < NSYM */ - m->TotFreq=0; - for (s = m->F; s->Freq; s++) { - s->Freq -= s->Freq>>1; - m->TotFreq += s->Freq; - } -} - -static inline void SIMPLE_MODEL(NSYM,_encodeSymbol)(SIMPLE_MODEL(NSYM,_) *m, - RangeCoder *rc, uint16_t sym) { - SymFreqs *s = m->F; - uint32_t AccFreq = 0; - - while (s->Symbol != sym) - AccFreq += s++->Freq; - - RC_Encode(rc, AccFreq, s->Freq, m->TotFreq); - s->Freq += STEP; - m->TotFreq += STEP; - - if (m->TotFreq > MAX_FREQ) - SIMPLE_MODEL(NSYM,_normalize)(m); - - /* Keep approx sorted */ - if (s[0].Freq > s[-1].Freq) { - SymFreqs t = s[0]; - s[0] = s[-1]; - s[-1] = t; - } -} - -static inline uint16_t SIMPLE_MODEL(NSYM,_decodeSymbol)(SIMPLE_MODEL(NSYM,_) *m, RangeCoder *rc) { - SymFreqs* s = m->F; - uint32_t freq = RC_GetFreq(rc, m->TotFreq); - uint32_t AccFreq; - - if (freq > MAX_FREQ) - return 0; // error - - for (AccFreq = 0; (AccFreq += s->Freq) <= freq; s++) - ; - if (s - m->F > NSYM) - return 0; // error - - AccFreq -= s->Freq; - - RC_Decode(rc, AccFreq, s->Freq, m->TotFreq); - s->Freq += STEP; - m->TotFreq += STEP; - - if (m->TotFreq > MAX_FREQ) - SIMPLE_MODEL(NSYM,_normalize)(m); - - /* Keep approx sorted */ - if (s[0].Freq > s[-1].Freq) { - SymFreqs t = s[0]; - s[0] = s[-1]; - s[-1] = t; - return t.Symbol; - } - - return s->Symbol; -} diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/fqzcomp_qual.c b/src/htslib-1.19.1/htscodecs/htscodecs/fqzcomp_qual.c deleted file mode 100644 index 4d96cb9..0000000 --- a/src/htslib-1.19.1/htscodecs/htscodecs/fqzcomp_qual.c +++ /dev/null @@ -1,1630 +0,0 @@ -/* - * Copyright (c) 2011-2013, 2018-2022 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -// We use generic maps to turn 0-M into 0-N where N <= M -// before adding these into the context. These are used -// for positions, running-diffs and quality values. -// -// This can be used as a simple divisor, eg pos/24 to get -// 2 bits of positional data for each quarter along a 100bp -// read, or it can be tailored for specific such as noting -// the first 5 cycles are poor, then we have stability and -// a gradual drop off in the last 20 or so. Perhaps we then -// map pos 0-4=0, 5-79=1, 80-89=2, 90-99=3. -// -// We don't need to specify how many bits of data we are -// using (2 in the above example), as that is just implicit -// in the values in the map. Specify not to use a map simply -// disables that context type (our map is essentially 0-M -> 0). - -// Example of command line usage: -// -// f=~/scratch/data/q4 -// cc -Wall -DTEST_MAIN -O3 -g fqzcomp_qual2.c -lm -// ./a.out $f > /tmp/_ && ./a.out -d < /tmp/_ > /tmp/__ && cmp /tmp/__ $f - -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "fqzcomp_qual.h" -#include "varint.h" -#include "utils.h" - -#define CTX_BITS 16 -#define CTX_SIZE (1<(b)?(a):(b)) -#endif - -#define QMAX 256 -#define QBITS 12 -#define QSIZE (1< 255 therefore means we need to repeatedly read to find -// the actual run length. -// Alternatively we could bit-encode instead of byte encode, eg BETA. -static int store_array(unsigned char *out, unsigned int *array, int size) { - unsigned char tmp[2048]; - - int i, j, k; - for (i = j = k = 0; i < size; j++) { - int run_len = i; - while (i < size && array[i] == j) - i++; - run_len = i-run_len; - - int r; - do { - r = MIN(255, run_len); - tmp[k++] = r; - run_len -= r; - } while (r == 255); - } - while (i < size) - tmp[k++] = 0, j++; - - // RLE on out. - // 1 2 3 3 3 3 3 4 4 5 - // => 1 2 3 3 +3... 4 4 +0 5 - int last = -1; - for (i = j = 0; j < k; i++) { - out[i] = tmp[j++]; - if (out[i] == last) { - int n = j; - while (j < k && tmp[j] == last) - j++; - out[++i] = j-n; - } else { - last = out[i]; - } - } - k = i; - -// fprintf(stderr, "Store_array %d => %d {", size, k); -// for (i = 0; i < k; i++) -// fprintf(stderr, "%d,", out[i]); -// fprintf(stderr, "}\n"); - return k; -} - -static int read_array(unsigned char *in, size_t in_size, unsigned int *array, int size) { - unsigned char R[1024]; - int i, j, z, last = -1, nb = 0; - - size = MIN(1024, size); - - // Remove level one of run-len encoding - for (i = j = z = 0; z < size && i < in_size; i++) { - int run = in[i]; - R[j++] = run; - z += run; - if (run == last) { - if (i+1 >= in_size) - return -1; - int copy = in[++i]; - z += run * copy; - while (copy-- && z <= size && j < 1024) - R[j++] = run; - } - if (j >= 1024) - return -1; - last = run; - } - nb = i; - - // Now expand inner level of run-length encoding - int R_max = j; - for (i = j = z = 0; j < size; i++) { - int run_len = 0; - int run_part; - if (z >= R_max) - return -1; - do { - run_part = R[z++]; - run_len += run_part; - } while (run_part == 255 && z < R_max); - if (run_part == 255) - return -1; - - while (run_len && j < size) - run_len--, array[j++] = i; - } - - return nb; -} - -// FIXME: how to auto-tune these rather than trial and error? -// r2 = READ2 -// qa = qual avg (0, 2, 4) -static int strat_opts[][12] = { -// qb qs pb ps db ds ql sl pl dl r2 qa - {10, 5, 4,-1, 2, 1, 0, 14, 10, 14, 0,-1}, // basic options (level < 7) - {8, 5, 7, 0, 0, 0, 0, 14, 8, 14, 1,-1}, // e.g. HiSeq 2000 - {12, 6, 2, 0, 2, 3, 0, 9, 12, 14, 0, 0}, // e.g. MiSeq - {12, 6, 0, 0, 0, 0, 0, 12, 0, 0, 0, 0}, // e.g. IonTorrent; adaptive O1 - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // custom -}; -static int nstrats = sizeof(strat_opts) / sizeof(*strat_opts); - -#ifdef HAVE_BUILTIN_PREFETCH -static inline void mm_prefetch(void *x) { - __builtin_prefetch(x); -} -#else -static inline void mm_prefetch(void *x) { - // Fetch and discard is quite close to a genuine prefetch - *(volatile int *)x; -} -#endif - -typedef struct { - unsigned int qctx; // quality sub-context - unsigned int p; // pos (bytes remaining) - unsigned int delta; // delta running total - unsigned int prevq; // previous quality - unsigned int s; // selector - unsigned int qtot, qlen; - unsigned int first_len; - unsigned int last_len; - ssize_t rec; - unsigned int ctx; -} fqz_state; - -static void dump_table(unsigned int *tab, int size, char *name) { - int i, last = -99, run = 0; - fprintf(stderr, "\t%s\t{", name); - for (i = 0; i < size; i++) { - if (tab[i] == last) { - run++; - } else if (run == 1 && tab[i] == last+1) { - int first = last; - do { - last = tab[i]; - i++; - } while (i < size && tab[i] == last+1); - i--; - - // Want 0,1,2,3,3,3 as 0..2 3x3, not 0..3 3x2 - if (tab[i] == tab[i+1]) - i--; - if (tab[i] != first) - fprintf(stderr, "..%d", tab[i]); - run = 1; - last = -99; - } else { - if (run > 1) - fprintf(stderr, " x %d%s%d", run, i?", ":"", tab[i]); - else - fprintf(stderr, "%s%d", i?", ":"", tab[i]); - run = 1; - last = tab[i]; - } - } - if (run > 1) - fprintf(stderr, " x %d", run); - fprintf(stderr, "}\n"); -} - -static void dump_map(unsigned int *map, int size, char *name) { - int i, c = 0; - fprintf(stderr, "\t%s\t{", name); - for (i = 0; i < size; i++) - if (map[i] != INT_MAX) - fprintf(stderr, "%s%d=%d", c++?", ":"", i, map[i]); - fprintf(stderr, "}\n"); -} - -/* #pragma GCC diagnostic ignored "-Wunused-function" */ -static void dump_params(fqz_gparams *gp) { - fprintf(stderr, "Global params = {\n"); - fprintf(stderr, "\tvers\t%d\n", gp->vers); - fprintf(stderr, "\tgflags\t0x%02x\n", gp->gflags); - fprintf(stderr, "\tnparam\t%d\n", gp->nparam); - fprintf(stderr, "\tmax_sel\t%d\n", gp->max_sel); - fprintf(stderr, "\tmax_sym\t%d\n", gp->max_sym); - if (gp->gflags & GFLAG_HAVE_STAB) - dump_table(gp->stab, 256, "stab"); - fprintf(stderr, "}\n"); - - int i; - for (i = 0; i < gp->nparam; i++) { - fqz_param *pm = &gp->p[i]; - fprintf(stderr, "\nParam[%d] = {\n", i); - fprintf(stderr, "\tcontext\t0x%04x\n", pm->context); - fprintf(stderr, "\tpflags\t0x%02x\n", pm->pflags); - fprintf(stderr, "\tmax_sym\t%d\n", pm->max_sym); - fprintf(stderr, "\tqbits\t%d\n", pm->qbits); - fprintf(stderr, "\tqshift\t%d\n", pm->qshift); - fprintf(stderr, "\tqloc\t%d\n", pm->qloc); - fprintf(stderr, "\tsloc\t%d\n", pm->sloc); - fprintf(stderr, "\tploc\t%d\n", pm->ploc); - fprintf(stderr, "\tdloc\t%d\n", pm->dloc); - - if (pm->pflags & PFLAG_HAVE_QMAP) - dump_map(pm->qmap, 256, "qmap"); - - if (pm->pflags & PFLAG_HAVE_QTAB) - dump_table(pm->qtab, 256, "qtab"); - if (pm->pflags & PFLAG_HAVE_PTAB) - dump_table(pm->ptab, 1024, "ptab"); - if (pm->pflags & PFLAG_HAVE_DTAB) - dump_table(pm->dtab, 256, "dtab"); - fprintf(stderr, "}\n"); - } -} - -typedef struct { - SIMPLE_MODEL(QMAX,_) *qual; - SIMPLE_MODEL(256,_) len[4]; - SIMPLE_MODEL(2,_) revcomp; - SIMPLE_MODEL(256,_) sel; - SIMPLE_MODEL(2,_) dup; -} fqz_model; - -static int fqz_create_models(fqz_model *m, fqz_gparams *gp) { - int i; - - if (!(m->qual = htscodecs_tls_alloc(sizeof(*m->qual) * CTX_SIZE))) - return -1; - - for (i = 0; i < CTX_SIZE; i++) - SIMPLE_MODEL(QMAX,_init)(&m->qual[i], gp->max_sym+1); - - for (i = 0; i < 4; i++) - SIMPLE_MODEL(256,_init)(&m->len[i],256); - - SIMPLE_MODEL(2,_init)(&m->revcomp,2); - SIMPLE_MODEL(2,_init)(&m->dup,2); - if (gp->max_sel > 0) - SIMPLE_MODEL(256,_init)(&m->sel, gp->max_sel+1); - - return 0; -} - -static void fqz_destroy_models(fqz_model *m) { - htscodecs_tls_free(m->qual); -} - -static inline unsigned int fqz_update_ctx(fqz_param *pm, fqz_state *state, int q) { - unsigned int last = 0; // pm->context - state->qctx = (state->qctx << pm->qshift) + pm->qtab[q]; - last += (state->qctx & pm->qmask) << pm->qloc; - - // The final shifts have been factored into the tables already. - last += pm->ptab[MIN(1023, state->p)]; // << pm->ploc - last += pm->dtab[MIN(255, state->delta)]; // << pm->dloc - last += state->s << pm->sloc; - - // On the fly average is slow work. - // However it can be slightly better than using a selector bit - // as it's something we can compute on the fly and thus doesn't - // consume output bits for storing the selector itself. - // - // Q4 (novaseq.bam) - // qtot+=q*q -DQ1=8.84 -DQ2=8.51 -DQ3=7.70; 7203598 (-0.7%) - // qtot+=q -DQ1=2.96 -DQ2=2.85 -DQ3=2.69; 7207315 - // vs old delta; 7255614 (default params) - // vs 2 bit selector (no delta) 7203006 (-x 0x8261000e80) - // vs 2 bit selector (no delta) 7199153 (-x 0x7270000e70) -0.8% - // vs 2 bit selector (no delta) 7219668 (-x 0xa243000ea0) - //{ - // double qa = state->qtot / (state->qlen+.01); - // //fprintf(stderr, "%f\n", qa); - // int x = 0; - // if (qa>=Q1) x=3; - // else if (qa>=Q2) x=2; - // else if (qa>=Q3) x=1; - // else x=0; - // last += x << pm->dloc; // tmp reuse of delta pos - // state->qtot += q*q; - // state->qlen++; - //} - - // Only update delta after 1st base. - state->delta += (state->prevq != q); - state->prevq = q; - - state->p--; - - return last & (CTX_SIZE-1); -} - -// Build quality stats for qhist and set nsym, do_dedup and do_sel params. -// One_param is -1 to gather stats on all data, or >= 0 to gather data -// on one specific selector parameter. Used only in TEST_MAIN via -// fqz_manual_parameters at the moment. -void fqz_qual_stats(fqz_slice *s, - unsigned char *in, size_t in_size, - fqz_param *pm, - uint32_t qhist[256], - int one_param) { -#define NP 32 - uint32_t qhistb[NP][256] = {{0}}; // both - uint32_t qhist1[NP][256] = {{0}}; // READ1 only - uint32_t qhist2[NP][256] = {{0}}; // READ2 only - uint64_t t1[NP] = {0}; // Count for READ1 - uint64_t t2[NP] = {0}; // COUNT for READ2 - uint32_t avg[2560] = {0}; // Avg qual *and later* avg-to-selector map. - - int dir = 0; - int last_len = 0; - int do_dedup = 0; - size_t rec; - size_t i, j; - int num_rec = 0; - - // See what info we've been given. - // Do we have READ1 / READ2? - // Do we have selector hidden in the top bits of flag? - int max_sel = 0; - int has_r2 = 0; - for (rec = 0; rec < s->num_records; rec++) { - if (one_param >= 0 && (s->flags[rec] >> 16) != one_param) - continue; - num_rec++; - if (max_sel < (s->flags[rec] >> 16)) - max_sel = (s->flags[rec] >> 16); - if (s->flags[rec] & FQZ_FREAD2) - has_r2 = 1; - } - - // Dedup detection and histogram stats gathering - int *avg_qual = calloc((s->num_records+1), sizeof(int)); - if (!avg_qual) - return; - - rec = i = j = 0; - while (i < in_size) { - if (one_param >= 0 && (s->flags[rec] >> 16) != one_param) { - avg_qual[rec] = 0; - i += s->len[rec++]; - continue; - } - if (rec < s->num_records) { - j = s->len[rec]; - dir = s->flags[rec] & FQZ_FREAD2 ? 1 : 0; - if (i > 0 && j == last_len - && !memcmp(in+i-last_len, in+i, j)) - do_dedup++; // cache which records are dup? - } else { - j = in_size - i; - dir = 0; - } - last_len = j; - - uint32_t (*qh)[256] = dir ? qhist2 : qhist1; - uint64_t *th = dir ? t2 : t1; - - uint32_t tot = 0; - for (; i < in_size && j > 0; i++, j--) { - tot += in[i]; - qhist[in[i]]++; - qhistb[j & (NP-1)][in[i]]++; - qh[j & (NP-1)][in[i]]++; - th[j & (NP-1)]++; - } - tot = last_len ? (tot*10.0)/last_len+.5 : 0; - - avg_qual[rec] = tot; - avg[MIN(2559, tot)]++; - - rec++; - } - pm->do_dedup = ((rec+1)/(do_dedup+1) < 500); - - last_len = 0; - - // Unique symbol count - for (i = pm->max_sym = pm->nsym = 0; i < 256; i++) { - if (qhist[i]) - pm->max_sym = i, pm->nsym++; - } - - - // Auto tune: does average quality helps us? - if (pm->do_qa != 0) { - // Histogram of average qual in avg[] - // NB: we convert avg[] from count to selector index - - // Few symbols means high compression which means - // selector bits become more significant fraction. - // Reduce selector bits by skewing the distribution - // to not be even binning. - double qf0 = pm->nsym > 8 ? 0.2 : 0.05; - double qf1 = pm->nsym > 8 ? 0.5 : 0.22; - double qf2 = pm->nsym > 8 ? 0.8 : 0.60; - - int total = 0; - i = 0; - while (i < 2560) { - total += avg[i]; - if (total > qf0 * num_rec) { - //fprintf(stderr, "Q1=%d\n", (int)i); - break; - } - avg[i++] = 0; - } - while (i < 2560) { - total += avg[i]; - if (total > qf1 * num_rec) { - //fprintf(stderr, "Q2=%d\n", (int)i); - break; - } - avg[i++] = 1; - } - while (i < 2560) { - total += avg[i]; - if (total > qf2 * num_rec) { - //fprintf(stderr, "Q3=%d\n", (int)i); - break; - } - avg[i++] = 2; - } - while (i < 2560) - avg[i++] = 3; - - // Compute simple entropy of merged signal vs split signal. - i = 0; - rec = 0; - - int qbin4[4][NP][256] = {{{0}}}; - int qbin2[2][NP][256] = {{{0}}}; - int qbin1 [NP][256] = {{0}}; - int qcnt4[4][NP] = {{0}}; - int qcnt2[4][NP] = {{0}}; - int qcnt1 [NP] = {0}; - while (i < in_size) { - if (one_param >= 0 && (s->flags[rec] >> 16) != one_param) { - i += s->len[rec++]; - continue; - } - if ((rec & 7) && rec < s->num_records) { - // subsample for speed - i += s->len[rec++]; - continue; - } - if (rec < s->num_records) - j = s->len[rec]; - else - j = in_size - i; - last_len = j; - - uint32_t tot = avg_qual[rec]; - int qb4 = avg[MIN(2559, tot)]; - int qb2 = qb4/2; - - for (; i < in_size && j > 0; i++, j--) { - int x = j & (NP-1); - qbin4[qb4][x][in[i]]++; qcnt4[qb4][x]++; - qbin2[qb2][x][in[i]]++; qcnt2[qb2][x]++; - qbin1 [x][in[i]]++; qcnt1 [x]++; - } - rec++; - } - - double e1 = 0, e2 = 0, e4 = 0; - for (j = 0; j < NP; j++) { - for (i = 0; i < 256; i++) { - if (qbin1 [j][i]) e1 += qbin1 [j][i] * fast_log(qbin1 [j][i] / (double)qcnt1 [j]); - if (qbin2[0][j][i]) e2 += qbin2[0][j][i] * fast_log(qbin2[0][j][i] / (double)qcnt2[0][j]); - if (qbin2[1][j][i]) e2 += qbin2[1][j][i] * fast_log(qbin2[1][j][i] / (double)qcnt2[1][j]); - if (qbin4[0][j][i]) e4 += qbin4[0][j][i] * fast_log(qbin4[0][j][i] / (double)qcnt4[0][j]); - if (qbin4[1][j][i]) e4 += qbin4[1][j][i] * fast_log(qbin4[1][j][i] / (double)qcnt4[1][j]); - if (qbin4[2][j][i]) e4 += qbin4[2][j][i] * fast_log(qbin4[2][j][i] / (double)qcnt4[2][j]); - if (qbin4[3][j][i]) e4 += qbin4[3][j][i] * fast_log(qbin4[3][j][i] / (double)qcnt4[3][j]); - } - } - e1 /= -log(2)/8; - e2 /= -log(2)/8; - e4 /= -log(2)/8; - //fprintf(stderr, "E1=%f E2=%f E4=%f %f\n", e1, e2+s->num_records/8, e4+s->num_records/4, (e4+s->num_records/4)/(e2+s->num_records/8)); - - // Note by using the selector we're robbing bits from elsewhere in - // the context, which may reduce compression better. - // We don't know how much by, so this is basically a guess! - // For now we just say need 5% saving here. - double qm = pm->do_qa > 0 ? 1 : 0.98; - if ((pm->do_qa == -1 || pm->do_qa >= 4) && - e4 + s->num_records/4 < e2*qm + s->num_records/8 && - e4 + s->num_records/4 < e1*qm) { - //fprintf(stderr, "do q4\n"); - for (i = 0; i < s->num_records; i++) { - //fprintf(stderr, "%d -> %d -> %d, %d\n", (int)i, avg_qual[i], avg[MIN(2559, avg_qual[i])], s->flags[i]>>16); - s->flags[i] |= avg[MIN(2559, avg_qual[i])] <<16; - } - pm->do_sel = 1; - max_sel = 3; - } else if ((pm->do_qa == -1 || pm->do_qa >= 2) && e2 + s->num_records/8 < e1*qm) { - //fprintf(stderr, "do q2\n"); - for (i = 0; i < s->num_records; i++) - s->flags[i] |= (avg[MIN(2559, avg_qual[i])]>>1) <<16; - pm->do_sel = 1; - max_sel = 1; - } - - if (pm->do_qa == -1) { - // assume qual, pos, delta in that order. - if (pm->pbits > 0 && pm->dbits > 0) { - // 1 from pos/delta - pm->sloc = pm->dloc-1; - pm->pbits--; - pm->dbits--; - pm->dloc++; - } else if (pm->dbits >= 2) { - // 2 from delta - pm->sloc = pm->dloc; - pm->dbits -= 2; - pm->dloc += 2; - } else if (pm->qbits >= 2) { - pm->qbits -= 2; - pm->ploc -= 2; - pm->sloc = 16-2 - pm->do_r2; - if (pm->qbits == 6 && pm->qshift == 5) - pm->qbits--; - } - pm->do_qa = 4; - } - } - - // Auto tune: does splitting up READ1 and READ2 help us? - if (has_r2 || pm->do_r2) { // FIXME: && but debug for now - double e1 = 0, e2 = 0; // entropy sum - - for (j = 0; j < NP; j++) { - if (!t1[j] || !t2[j]) continue; - for (i = 0; i < 256; i++) { - if (!qhistb[j][i]) continue; - e1 -= (qhistb[j][i])*log(qhistb[j][i] / (double)(t1[j]+t2[j])); - if (qhist1[j][i]) - e2 -= qhist1[j][i] * log(qhist1[j][i] / (double)t1[j]); - if (qhist2[j][i]) - e2 -= qhist2[j][i] * log(qhist2[j][i] / (double)t2[j]); - } - } - e1 /= log(2)*8; // bytes - e2 /= log(2)*8; - - //fprintf(stderr, "read1/2 entropy merge %f split %f\n", e1, e2); - - // Note by using the selector we're robbing bits from elsewhere in - // the context, which may reduce compression better. - // We don't know how much by, so this is basically a guess! - // For now we just say need 5% saving here. - double qm = pm->do_r2 > 0 ? 1 : 0.95; - if (e2 + (8+s->num_records/8) < e1*qm) { - for (rec = 0; rec < s->num_records; rec++) { - if (one_param >= 0 && (s->flags[rec] >> 16) != one_param) - continue; - int sel = s->flags[rec] >> 16; - s->flags[rec] = (s->flags[rec] & 0xffff) - | ((s->flags[rec] & FQZ_FREAD2) - ? ((sel*2)+1) << 16 - : ((sel*2)+0) << 16); - if (max_sel < (s->flags[rec]>>16)) - max_sel = (s->flags[rec]>>16); - } - } - } - - // We provided explicit selector data or auto-tuned it - if (max_sel > 0) { - pm->do_sel = 1; - pm->max_sel = max_sel; - } - - free(avg_qual); -} - -static inline -int fqz_store_parameters1(fqz_param *pm, unsigned char *comp) { - int comp_idx = 0, i, j; - - // Starting context - comp[comp_idx++] = pm->context; - comp[comp_idx++] = pm->context >> 8; - - comp[comp_idx++] = pm->pflags; - comp[comp_idx++] = pm->max_sym; - - comp[comp_idx++] = (pm->qbits<<4)|pm->qshift; - comp[comp_idx++] = (pm->qloc<<4)|pm->sloc; - comp[comp_idx++] = (pm->ploc<<4)|pm->dloc; - - if (pm->store_qmap) { - for (i = j = 0; i < 256; i++) - if (pm->qmap[i] != INT_MAX) - comp[comp_idx++] = i; - } - - if (pm->qbits && pm->use_qtab) - // custom qtab - comp_idx += store_array(comp+comp_idx, pm->qtab, 256); - - if (pm->pbits && pm->use_ptab) - // custom ptab - comp_idx += store_array(comp+comp_idx, pm->ptab, 1024); - - if (pm->dbits && pm->use_dtab) - // custom dtab - comp_idx += store_array(comp+comp_idx, pm->dtab, 256); - - return comp_idx; -} - -static -int fqz_store_parameters(fqz_gparams *gp, unsigned char *comp) { - int comp_idx = 0; - comp[comp_idx++] = gp->vers; // Format number - - comp[comp_idx++] = gp->gflags; - - if (gp->gflags & GFLAG_MULTI_PARAM) - comp[comp_idx++] = gp->nparam; - - if (gp->gflags & GFLAG_HAVE_STAB) { - comp[comp_idx++] = gp->max_sel; - comp_idx += store_array(comp+comp_idx, gp->stab, 256); - } - - int i; - for (i = 0; i < gp->nparam; i++) - comp_idx += fqz_store_parameters1(&gp->p[i], comp+comp_idx); - - //fprintf(stderr, "Encoded %d bytes of param\n", comp_idx); - return comp_idx; -} - -// Choose a set of parameters based on quality statistics and -// some predefined options (selected via "strat"). -static inline -int fqz_pick_parameters(fqz_gparams *gp, - int vers, - int strat, - fqz_slice *s, - unsigned char *in, - size_t in_size) { - //approx sqrt(delta), must be sequential - int dsqr[] = { - 0, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 - }; - uint32_t qhist[256] = {0}; - - if (strat >= nstrats) strat = nstrats-1; - - // Start with 1 set of parameters. - // FIXME: add support for multiple params later. - memset(gp, 0, sizeof(*gp)); - gp->vers = FQZ_VERS; - - if (!(gp->p = calloc(1, sizeof(fqz_param)))) - return -1; - gp->nparam = 1; - gp->max_sel = 0; - - if (vers == 3) // V3.0 doesn't store qual in original orientation - gp->gflags |= GFLAG_DO_REV; - - fqz_param *pm = gp->p; - - // Programmed strategies, which we then amend based on our - // statistical analysis of the quality stream. - pm->qbits = strat_opts[strat][0]; - pm->qshift = strat_opts[strat][1]; - pm->pbits = strat_opts[strat][2]; - pm->pshift = strat_opts[strat][3]; - pm->dbits = strat_opts[strat][4]; - pm->dshift = strat_opts[strat][5]; - pm->qloc = strat_opts[strat][6]; - pm->sloc = strat_opts[strat][7]; - pm->ploc = strat_opts[strat][8]; - pm->dloc = strat_opts[strat][9]; - - // Params for controlling behaviour here. - pm->do_r2 = strat_opts[strat][10]; - pm->do_qa = strat_opts[strat][11]; - - // Validity check input lengths and buffer size - size_t tlen = 0, i; - for (i = 0; i < s->num_records; i++) { - if (tlen + s->len[i] > in_size) - // Oversized buffer - s->len[i] = in_size - tlen; - tlen += s->len[i]; - } - if (s->num_records > 0 && tlen < in_size) - // Undersized buffer - s->len[s->num_records-1] += in_size - tlen; - - // Quality metrics, for all recs - fqz_qual_stats(s, in, in_size, pm, qhist, -1); - - pm->store_qmap = (pm->nsym <= 8 && pm->nsym*2 < pm->max_sym); - - // Check for fixed length. - uint32_t first_len = s->len[0]; - for (i = 1; i < s->num_records; i++) { - if (s->len[i] != first_len) - break; - } - pm->fixed_len = (i == s->num_records); - pm->use_qtab = 0; // unused by current encoder - - if (strat >= nstrats-1) - goto manually_set; // used in TEST_MAIN for debugging - - if (pm->pshift < 0) - pm->pshift = MAX(0, log((double)s->len[0]/(1<pbits))/log(2)+.5); - - if (pm->nsym <= 4) { - // NovaSeq - pm->qshift = 2; // qmax 64, although we can store up to 256 if needed - if (in_size < 5000000) { - pm->pbits =2; - pm->pshift=5; - } - } else if (pm->nsym <= 8) { - // HiSeqX - pm->qbits =MIN(pm->qbits,9); - pm->qshift=3; - if (in_size < 5000000) - pm->qbits =6; - } - - if (in_size < 300000) { - pm->qbits=pm->qshift; - pm->dbits=2; - } - - manually_set: -// fprintf(stderr, "-x 0x%x%x%x%x%x%x%x%x%x%x%x%x\n", -// pm->qbits, pm->qshift, -// pm->pbits, pm->pshift, -// pm->dbits, pm->dshift, -// pm->qloc, pm->sloc, pm->ploc, pm->dloc, -// pm->do_r2, pm->do_qa); - - for (i = 0; i < sizeof(dsqr)/sizeof(*dsqr); i++) - if (dsqr[i] > (1<dbits)-1) - dsqr[i] = (1<dbits)-1; - - if (pm->store_qmap) { - int j; - for (i = j = 0; i < 256; i++) - if (qhist[i]) - pm->qmap[i] = j++; - else - pm->qmap[i] = INT_MAX; - pm->max_sym = pm->nsym; - } else { - pm->nsym = 255; - for (i = 0; i < 256; i++) - pm->qmap[i] = i; - } - if (gp->max_sym < pm->max_sym) - gp->max_sym = pm->max_sym; - - // Produce ptab from pshift. - if (pm->qbits) { - for (i = 0; i < 256; i++) { - pm->qtab[i] = i; // 1:1 - - // Alternative mappings: - //qtab[i] = i > 30 ? MIN(max_sym,i)-15 : i/2; // eg for 9827 BAM - } - - } - pm->qmask = (1<qbits)-1; - - if (pm->pbits) { - for (i = 0; i < 1024; i++) - pm->ptab[i] = MIN((1<pbits)-1, i>>pm->pshift); - - // Alternatively via analysis of quality distributions we - // may select a bunch of positions that are special and - // have a non-uniform ptab[]. - // Manual experimentation on a NovaSeq run saved 2.8% here. - } - - if (pm->dbits) { - for (i = 0; i < 256; i++) - pm->dtab[i] = dsqr[MIN(sizeof(dsqr)/sizeof(*dsqr)-1, i>>pm->dshift)]; - } - - pm->use_ptab = (pm->pbits > 0); - pm->use_dtab = (pm->dbits > 0); - - pm->pflags = - (pm->use_qtab ?PFLAG_HAVE_QTAB :0)| - (pm->use_dtab ?PFLAG_HAVE_DTAB :0)| - (pm->use_ptab ?PFLAG_HAVE_PTAB :0)| - (pm->do_sel ?PFLAG_DO_SEL :0)| - (pm->fixed_len ?PFLAG_DO_LEN :0)| - (pm->do_dedup ?PFLAG_DO_DEDUP :0)| - (pm->store_qmap ?PFLAG_HAVE_QMAP :0); - - gp->max_sel = 0; - if (pm->do_sel) { - // 2 selectors values, but 1 parameter block. - // We'll use the sloc instead to encode the selector bits into - // the context. - gp->max_sel = 1; // indicator to check recs - gp->gflags |= GFLAG_HAVE_STAB; - // NB: stab is already all zero - } - - if (gp->max_sel && s->num_records) { - int max = 0; - for (i = 0; i < s->num_records; i++) { - if (max < (s->flags[i] >> 16)) - max = (s->flags[i] >> 16); - } - gp->max_sel = max; - } - - return 0; -} - -static void fqz_free_parameters(fqz_gparams *gp) { - if (gp && gp->p) free(gp->p); -} - -static int compress_new_read(fqz_slice *s, - fqz_state *state, - fqz_gparams *gp, - fqz_param *pm, - fqz_model *model, - RangeCoder *rc, - unsigned char *in, - size_t *in_i, // in[in_i], - unsigned int *last) { - ssize_t rec = state->rec; - size_t i = *in_i; - if (pm->do_sel || (gp->gflags & GFLAG_MULTI_PARAM)) { - state->s = rec < s->num_records - ? s->flags[rec] >> 16 // reuse spare bits - : 0; - SIMPLE_MODEL(256,_encodeSymbol)(&model->sel, rc, state->s); - } else { - state->s = 0; - } - int x = (gp->gflags & GFLAG_HAVE_STAB) ? gp->stab[state->s] : state->s; - pm = &gp->p[x]; - - int len = s->len[rec]; - if (!pm->fixed_len || state->first_len) { - SIMPLE_MODEL(256,_encodeSymbol)(&model->len[0], rc, (len>> 0) & 0xff); - SIMPLE_MODEL(256,_encodeSymbol)(&model->len[1], rc, (len>> 8) & 0xff); - SIMPLE_MODEL(256,_encodeSymbol)(&model->len[2], rc, (len>>16) & 0xff); - SIMPLE_MODEL(256,_encodeSymbol)(&model->len[3], rc, (len>>24) & 0xff); - state->first_len = 0; - } - - if (gp->gflags & GFLAG_DO_REV) { - // no need to reverse complement for V4.0 as the core format - // already has this feature. - if (s->flags[rec] & FQZ_FREVERSE) - SIMPLE_MODEL(2,_encodeSymbol)(&model->revcomp, rc, 1); - else - SIMPLE_MODEL(2,_encodeSymbol)(&model->revcomp, rc, 0); - } - - state->rec++; - - state->qtot = 0; - state->qlen = 0; - - state->p = len; - state->delta = 0; - state->qctx = 0; - state->prevq = 0; - - *last = pm->context; - - if (pm->do_dedup) { - // Possible dup of previous read? - if (i && len == state->last_len && - !memcmp(in+i-state->last_len, in+i, len)) { - SIMPLE_MODEL(2,_encodeSymbol)(&model->dup, rc, 1); - i += len-1; - state->p = 0; - *in_i = i; - return 1; // is a dup - } else { - SIMPLE_MODEL(2,_encodeSymbol)(&model->dup, rc, 0); - } - - state->last_len = len; - } - - *in_i = i; - - return 0; // not dup -} - -static -unsigned char *compress_block_fqz2f(int vers, - int strat, - fqz_slice *s, - unsigned char *in, - size_t in_size, - size_t *out_size, - fqz_gparams *gp) { - fqz_gparams local_gp; - int free_params = 0; - - unsigned int last = 0; - size_t i, j; - ssize_t rec = 0; - - int comp_idx = 0; - RangeCoder rc; - - // Pick and store params - if (!gp) { - gp = &local_gp; - if (fqz_pick_parameters(gp, vers, strat, s, in, in_size) < 0) - return NULL; - free_params = 1; - } - - // Worst case scenario assuming random input data and no way to compress - // is NBytes*growth for some small growth factor (arith_dynamic uses 1.05), - // plus fixed overheads for the header / params. Growth can be high - // here as we're modelling things and pathological cases may trigger a - // bad probability model. - // - // Per read is 4-byte len if not fixed length (but less if avg smaller) - // up to 1 byte for selection state (log2(max_sel) bits) - // 1-bit for reverse flag - // 1-bit for dup-last flag (but then no quals) - // Per qual is 1-byte (assuming QMAX==256) - // - // Header size is total guess, as depends on params, but it's almost - // always tiny, so a few K extra should be sufficient. - // - // => Total of (s->num_records*4.25 + in_size)*growth + hdr - int sel_bits = 0, sel = gp->max_sel; - while (sel) { - sel_bits++; - sel >>= 1; - } - double len_sz = gp->p[0].fixed_len ? 0.25 : 4.25; - len_sz += sel_bits / 8.0; - size_t comp_sz = (s->num_records*len_sz + in_size)*1.1 + 10000; - - unsigned char *comp = (unsigned char *)malloc(comp_sz); - unsigned char *compe = comp + (size_t)comp_sz; - if (!comp) - return NULL; - - //dump_params(gp); - comp_idx = var_put_u32(comp, compe, in_size); - comp_idx += fqz_store_parameters(gp, comp+comp_idx); - - fqz_param *pm; - - // Optimise tables to remove shifts in loop (NB: cannot do this in next vers) - for (j = 0; j < gp->nparam; j++) { - pm = &gp->p[j]; - - for (i = 0; i < 1024; i++) - pm->ptab[i] <<= pm->ploc; - - for (i = 0; i < 256; i++) - pm->dtab[i] <<= pm->dloc; - } - - // Create models and initialise range coder - fqz_model model; - if (fqz_create_models(&model, gp) < 0) - return NULL; - - RC_SetOutput(&rc, (char *)comp+comp_idx); - RC_SetOutputEnd(&rc, (char *)comp+comp_sz); - RC_StartEncode(&rc); - - // For CRAM3.1, reverse upfront if needed - pm = &gp->p[0]; - if (gp->gflags & GFLAG_DO_REV) { - i = rec = j = 0; - while (i < in_size) { - int len = rec < s->num_records-1 - ? s->len[rec] : in_size - i; - - if (s->flags[rec] & FQZ_FREVERSE) { - // Reverse complement sequence - note: modifies buffer - int I,J; - unsigned char *cp = in+i; - for (I = 0, J = len-1; I < J; I++, J--) { - unsigned char c; - c = cp[I]; - cp[I] = cp[J]; - cp[J] = c; - } - } - - i += len; - rec++; - } - rec = 0; - } - - fqz_state state = {0}; - pm = &gp->p[0]; - state.p = 0; - state.first_len = 1; - state.last_len = 0; - state.rec = rec; - - for (i = 0; i < in_size; i++) { - if (state.p == 0) { - if (state.rec >= s->num_records || s->len[state.rec] <= 0) { - free(comp); - comp = NULL; - goto err; - } - - if (compress_new_read(s, &state, gp, pm, &model, &rc, - in, &i, /*&rec,*/ &last)) - continue; - } - -#if 0 - // fqz_qual_stats imp. - // q40 6.876 6.852 5.96 - // q4 6.566 5.07 - // _Q 1.383 1.11 - unsigned char q = in[i]; - unsigned char qm = pm->qmap[q]; - - SIMPLE_MODEL(QMAX,_encodeSymbol)(&model.qual[last], &rc, qm); - last = fqz_update_ctx(pm, &state, qm); -#else - // gcc clang gcc+fqz_qual_stats imp. - // q40 5.033 5.026 -27% 4.137 -38% - // q4 5.595 -15% 4.011 -36% - // _Q 1.225 -11% 0.956 - int j = -1; - - while (state.p >= 4 && i+j+4 < in_size) { - int l1 = last, l2, l3, l4; - // Model has symbols sorted by frequency, so most common are at - // start. So while model is approx 1Kb, the first cache line is - // a big win. - mm_prefetch(&model.qual[l1]); - unsigned char qm1 = pm->qmap[in[i + ++j]]; - last = fqz_update_ctx(pm, &state, qm1); l2 = last; - - mm_prefetch(&model.qual[l2]); - unsigned char qm2 = pm->qmap[in[i + ++j]]; - last = fqz_update_ctx(pm, &state, qm2); l3 = last; - - mm_prefetch(&model.qual[l3]); - unsigned char qm3 = pm->qmap[in[i + ++j]]; - last = fqz_update_ctx(pm, &state, qm3); l4 = last; - - mm_prefetch(&model.qual[l4]); - unsigned char qm4 = pm->qmap[in[i + ++j]]; - last = fqz_update_ctx(pm, &state, qm4); - - SIMPLE_MODEL(QMAX,_encodeSymbol)(&model.qual[l1], &rc, qm1); - SIMPLE_MODEL(QMAX,_encodeSymbol)(&model.qual[l2], &rc, qm2); - SIMPLE_MODEL(QMAX,_encodeSymbol)(&model.qual[l3], &rc, qm3); - SIMPLE_MODEL(QMAX,_encodeSymbol)(&model.qual[l4], &rc, qm4); - } - - while (state.p > 0) { - int l2 = last; - mm_prefetch(&model.qual[last]); - unsigned char qm = pm->qmap[in[i + ++j]]; - last = fqz_update_ctx(pm, &state, qm); - SIMPLE_MODEL(QMAX,_encodeSymbol)(&model.qual[l2], &rc, qm); - } - i += j; -#endif - } - - if (RC_FinishEncode(&rc) < 0) { - free(comp); - comp = NULL; - *out_size = 0; - goto err; - } - - // For CRAM3.1, undo our earlier reversal step - rec = state.rec; - if (gp->gflags & GFLAG_DO_REV) { - i = rec = j = 0; - while (i < in_size) { - int len = rec < s->num_records-1 - ? s->len[rec] - : in_size - i; - - if (s->flags[rec] & FQZ_FREVERSE) { - // Reverse complement sequence - note: modifies buffer - int I,J; - unsigned char *cp = in+i; - for (I = 0, J = len-1; I < J; I++, J--) { - unsigned char c; - c = cp[I]; - cp[I] = cp[J]; - cp[J] = c; - } - } - - i += len; - rec++; - } - } - - // Clear selector abuse of flags - for (rec = 0; rec < s->num_records; rec++) - s->flags[rec] &= 0xffff; - - *out_size = comp_idx + RC_OutSize(&rc); - //fprintf(stderr, "%d -> %d\n", (int)in_size, (int)*out_size); - - err: - fqz_destroy_models(&model); - if (free_params) - fqz_free_parameters(gp); - - return comp; -} - -// Read fqz paramaters. -// -// FIXME: pass in and check in_size. -// -// Returns number of bytes read on success, -// -1 on failure. -static inline -int fqz_read_parameters1(fqz_param *pm, unsigned char *in, size_t in_size) { - int in_idx = 0; - size_t i; - - if (in_size < 7) - return -1; - - // Starting context - pm->context = in[in_idx] + (in[in_idx+1]<<8); - in_idx += 2; - - // Bit flags - pm->pflags = in[in_idx++]; - pm->use_qtab = pm->pflags & PFLAG_HAVE_QTAB; - pm->use_dtab = pm->pflags & PFLAG_HAVE_DTAB; - pm->use_ptab = pm->pflags & PFLAG_HAVE_PTAB; - pm->do_sel = pm->pflags & PFLAG_DO_SEL; - pm->fixed_len = pm->pflags & PFLAG_DO_LEN; - pm->do_dedup = pm->pflags & PFLAG_DO_DEDUP; - pm->store_qmap = pm->pflags & PFLAG_HAVE_QMAP; - pm->max_sym = in[in_idx++]; - - // Sub-context sizes and locations - pm->qbits = in[in_idx]>>4; - pm->qmask = (1<qbits)-1; - pm->qshift = in[in_idx++]&15; - pm->qloc = in[in_idx]>>4; - pm->sloc = in[in_idx++]&15; - pm->ploc = in[in_idx]>>4; - pm->dloc = in[in_idx++]&15; - - // Maps and tables - if (pm->store_qmap) { - for (i = 0; i < 256; i++) pm->qmap[i] = INT_MAX; // so dump_map works - if (in_idx + pm->max_sym > in_size) - return -1; - for (i = 0; i < pm->max_sym; i++) - pm->qmap[i] = in[in_idx++]; - } else { - for (i = 0; i < 256; i++) - pm->qmap[i] = i; - } - - if (pm->qbits) { - if (pm->use_qtab) { - int used = read_array(in+in_idx, in_size-in_idx, pm->qtab, 256); - if (used < 0) - return -1; - in_idx += used; - } else { - for (i = 0; i < 256; i++) - pm->qtab[i] = i; - } - } - - if (pm->use_ptab) { - int used = read_array(in+in_idx, in_size-in_idx, pm->ptab, 1024); - if (used < 0) - return -1; - in_idx += used; - } else { - for (i = 0; i < 1024; i++) - pm->ptab[i] = 0; - } - - if (pm->use_dtab) { - int used = read_array(in+in_idx, in_size-in_idx, pm->dtab, 256); - if (used < 0) - return -1; - in_idx += used; - } else { - for (i = 0; i < 256; i++) - pm->dtab[i] = 0; - } - - return in_idx; -} - -static -int fqz_read_parameters(fqz_gparams *gp, unsigned char *in, size_t in_size) { - int in_idx = 0; - int i; - - if (in_size < 10) - return -1; - - // Format version - gp->vers = in[in_idx++]; - if (gp->vers != FQZ_VERS) - return -1; - - // Global glags - gp->gflags = in[in_idx++]; - - // Number of param blocks and param selector details - gp->nparam = (gp->gflags & GFLAG_MULTI_PARAM) ? in[in_idx++] : 1; - if (gp->nparam <= 0) - return -1; - gp->max_sel = gp->nparam > 1 ? gp->nparam : 0; - - if (gp->gflags & GFLAG_HAVE_STAB) { - gp->max_sel = in[in_idx++]; - int used = read_array(in+in_idx, in_size-in_idx, gp->stab, 256); - if (used < 0) - goto err; - in_idx += used; - } else { - for (i = 0; i < gp->nparam; i++) - gp->stab[i] = i; - for (; i < 256; i++) - gp->stab[i] = gp->nparam-1; - } - - // Load the individual parameter locks - if (!(gp->p = malloc(gp->nparam * sizeof(*gp->p)))) - return -1; - - gp->max_sym = 0; - for (i = 0; i < gp->nparam; i++) { - int e = fqz_read_parameters1(&gp->p[i], in + in_idx, in_size-in_idx); - if (e < 0) - goto err; - if (gp->p[i].do_sel && gp->max_sel == 0) - goto err; // Inconsistent - in_idx += e; - - if (gp->max_sym < gp->p[i].max_sym) - gp->max_sym = gp->p[i].max_sym; - } - - //fprintf(stderr, "Decoded %d bytes of param\n", in_idx); - return in_idx; - - err: - fqz_free_parameters(gp); - gp->nparam = 0; - return -1; -} - -// Handles the state.p==0 section of uncompress_block_fqz2f -static int decompress_new_read(fqz_slice *s, - fqz_state *state, - fqz_gparams *gp, - fqz_param *pm, - fqz_model *model, - RangeCoder *rc, - unsigned char *in, ssize_t *in_i, // in[in_i], - unsigned char *uncomp, size_t *out_size, - int *rev, char *rev_a, int *len_a, - int *lengths, int nlengths) { - size_t i = *in_i; - ssize_t rec = state->rec; - - if (pm->do_sel) { - state->s = SIMPLE_MODEL(256,_decodeSymbol)(&model->sel, rc); - } else { - state->s = 0; - } - - int x = (gp->gflags & GFLAG_HAVE_STAB) - ? gp->stab[MIN(255, state->s)] - : state->s; - if (x >= gp->nparam) - return -1; - pm = &gp->p[x]; - - unsigned int len = state->last_len; - if (!pm->fixed_len || state->first_len) { - len = SIMPLE_MODEL(256,_decodeSymbol)(&model->len[0], rc); - len |= SIMPLE_MODEL(256,_decodeSymbol)(&model->len[1], rc)<<8; - len |= SIMPLE_MODEL(256,_decodeSymbol)(&model->len[2], rc)<<16; - len |= ((unsigned)SIMPLE_MODEL(256,_decodeSymbol)(&model->len[3], rc))<<24; - state->first_len = 0; - state->last_len = len; - } - if (len > *out_size-i || len <= 0) - return -1; - - if (lengths && rec < nlengths) - lengths[rec] = len; - - if (gp->gflags & GFLAG_DO_REV) { - *rev = SIMPLE_MODEL(2,_decodeSymbol)(&model->revcomp, rc); - rev_a[rec] = *rev; - len_a[rec] = len; - } - - if (pm->do_dedup) { - if (SIMPLE_MODEL(2,_decodeSymbol)(&model->dup, rc)) { - // Dup of last line - if (len > i) - return -1; - memcpy(uncomp+i, uncomp+i-len, len); - i += len; - state->p = 0; - state->rec++; - *in_i = i; - return 1; // dup => continue - } - } - - state->rec++; - state->p = len; - state->delta = 0; - state->prevq = 0; - state->qctx = 0; - state->ctx = pm->context; - - *in_i = i; - - return 0; -} - - -static -unsigned char *uncompress_block_fqz2f(fqz_slice *s, - unsigned char *in, - size_t in_size, - size_t *out_size, - int *lengths, - int nlengths) { - fqz_gparams gp; - fqz_param *pm; - char *rev_a = NULL; - int *len_a = NULL; - memset(&gp, 0, sizeof(gp)); - - uint32_t len; - ssize_t i, rec = 0, in_idx; - in_idx = var_get_u32(in, in+in_size, &len); - *out_size = len; - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (len > 100000) - return NULL; -#endif - - unsigned char *uncomp = NULL; - RangeCoder rc; - unsigned int last = 0; - - // Decode parameter blocks - if ((i = fqz_read_parameters(&gp, in+in_idx, in_size-in_idx)) < 0) - return NULL; - //dump_params(&gp); - in_idx += i; - - // Optimisations to remove shifts from main loop - for (i = 0; i < gp.nparam; i++) { - int j; - pm = &gp.p[i]; - for (j = 0; j < 1024; j++) - pm->ptab[j] <<= pm->ploc; - for (j = 0; j < 256; j++) - pm->dtab[j] <<= pm->dloc; - } - - // Initialise models and entropy coder - fqz_model model; - if (fqz_create_models(&model, &gp) < 0) - return NULL; - - RC_SetInput(&rc, (char *)in+in_idx, (char *)in+in_size); - RC_StartDecode(&rc); - - - // Allocate buffers - uncomp = (unsigned char *)malloc(*out_size); - if (!uncomp) - goto err; - - int nrec = 1000; - rev_a = malloc(nrec); - len_a = malloc(nrec * sizeof(int)); - if (!rev_a || !len_a) - goto err; - - // Main decode loop - fqz_state state; - state.delta = 0; - state.prevq = 0; - state.qctx = 0; - state.p = 0; - state.s = 0; - state.first_len = 1; - state.last_len = 0; - state.rec = 0; - state.ctx = last; - - int rev = 0; - int x = 0; - pm = &gp.p[x]; - for (i = 0; i < len; ) { - if (state.rec >= nrec) { - nrec *= 2; - rev_a = realloc(rev_a, nrec); - len_a = realloc(len_a, nrec*sizeof(int)); - if (!rev_a || !len_a) - goto err; - } - - if (state.p == 0) { - int r = decompress_new_read(s, &state, &gp, pm, &model, &rc, - in, &i, uncomp, out_size, - &rev, rev_a, len_a, - lengths, nlengths); - if (r < 0) - goto err; - if (r > 0) - continue; - last = state.ctx; - } - - // Decode and update context - do { - unsigned char Q = SIMPLE_MODEL(QMAX,_decodeSymbol) - (&model.qual[last], &rc); - - last = fqz_update_ctx(pm, &state, Q); - uncomp[i++] = pm->qmap[Q]; - } while (state.p != 0 && i < len); - } - - rec = state.rec; - if (rec >= nrec) { - nrec *= 2; - rev_a = realloc(rev_a, nrec); - len_a = realloc(len_a, nrec*sizeof(int)); - if (!rev_a || !len_a) - goto err; - } - rev_a[rec] = rev; - len_a[rec] = len; - - if (gp.gflags & GFLAG_DO_REV) { - for (i = rec = 0; i < len && rec < nrec; i += len_a[rec++]) { - if (!rev_a[rec]) - continue; - - int I, J; - unsigned char *cp = uncomp+i; - for (I = 0, J = len_a[rec]-1; I < J; I++, J--) { - unsigned char c; - c = cp[I]; - cp[I] = cp[J]; - cp[J] = c; - } - } - } - - if (RC_FinishDecode(&rc) < 0) - goto err; - - fqz_destroy_models(&model); - free(rev_a); - free(len_a); - fqz_free_parameters(&gp); - -#ifdef TEST_MAIN - s->num_records = rec; -#endif - - return uncomp; - - err: - fqz_destroy_models(&model); - free(rev_a); - free(len_a); - fqz_free_parameters(&gp); - free(uncomp); - - return NULL; -} - -char *fqz_compress(int vers, fqz_slice *s, char *in, size_t uncomp_size, - size_t *comp_size, int strat, fqz_gparams *gp) { - if (uncomp_size > INT_MAX) { - *comp_size = 0; - return NULL; - } - - return (char *)compress_block_fqz2f(vers, strat, s, (unsigned char *)in, - uncomp_size, comp_size, gp); -} - -char *fqz_decompress(char *in, size_t comp_size, size_t *uncomp_size, - int *lengths, int nlengths) { - return (char *)uncompress_block_fqz2f(NULL, (unsigned char *)in, - comp_size, uncomp_size, lengths, nlengths); -} diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/fqzcomp_qual.h b/src/htslib-1.19.1/htscodecs/htscodecs/fqzcomp_qual.h deleted file mode 100644 index d3aa267..0000000 --- a/src/htslib-1.19.1/htscodecs/htscodecs/fqzcomp_qual.h +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Copyright (c) 2011-2013, 2018-2019 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef FQZ_COMP_QUAL_H -#define FQZ_COMP_QUAL_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include - -/* Bit flags, deliberately mirroring BAM ones */ -#define FQZ_FREVERSE 16 -#define FQZ_FREAD2 128 - -/* Current FQZ format version */ -#define FQZ_VERS 5 - -#define FQZ_MAX_STRAT 3 - -/* - * Minimal per-record information taken from a cram slice. - * - * To compress we need to know the junction from one quality string to - * the next (len), whether it is first/second read and whether it is - * reverse complemented (flags). - */ -typedef struct { - int num_records; - uint32_t *len; // of size num_records - uint32_t *flags; // of size num_records -} fqz_slice; - - -// Global flags -static const int GFLAG_MULTI_PARAM = 1; -static const int GFLAG_HAVE_STAB = 2; -static const int GFLAG_DO_REV = 4; - -// Param flags -// Add PFLAG_HAVE_DMAP and a dmap[] for delta incr? -static const int PFLAG_DO_DEDUP = 2; -static const int PFLAG_DO_LEN = 4; -static const int PFLAG_DO_SEL = 8; -static const int PFLAG_HAVE_QMAP = 16; -static const int PFLAG_HAVE_PTAB = 32; -static const int PFLAG_HAVE_DTAB = 64; -static const int PFLAG_HAVE_QTAB = 128; - -/* - * FQZ parameters. These may be simply passed in as NULL to fqz_compress - * and it'll automatically choose, but if we wish to have complete control - * then this (long) struct contains all the details. - * - * TODO: document all this! - */ - -// A single parameter block -typedef struct { - // Starting context value - uint16_t context; - - // flags - unsigned int pflags; - unsigned int do_sel, do_dedup, store_qmap, fixed_len; - unsigned char use_qtab, use_dtab, use_ptab; - - // context bits and locations - unsigned int qbits, qloc; - unsigned int pbits, ploc; - unsigned int dbits, dloc; - unsigned int sbits, sloc; - - // models - int max_sym, nsym, max_sel; - - // tables / maps - unsigned int qmap[256]; - unsigned int qtab[256]; - unsigned int ptab[1024]; - unsigned int dtab[256]; - - // Not stored paramters, but computed as part of encoder - // parameterisation. - int qshift; - int pshift; - int dshift; - int sshift; - unsigned int qmask; // (1< -#include -#include -#include - -#include "pack.h" - -//----------------------------------------------------------------------------- - -/* - * Packs multiple symbols into a single byte if the total alphabet of symbols - * used is <= 16. Each new symbol takes up 1, 2, 4 or 8 bits, or 0 if the - * alphabet used is 1 (constant). - * - * If successful, out_meta/out_meta_len are set to hold the mapping table - * to be used during decompression. - * - * Returns the packed buffer on success with new length in out_len, - * NULL of failure - */ -uint8_t *hts_pack(uint8_t *data, int64_t len, - uint8_t *out_meta, int *out_meta_len, uint64_t *out_len) { - int p[256] = {0}, n; - uint64_t i, j; - - // count syms - for (i = 0; i < len; i++) - p[data[i]]=1; - - for (i = n = 0; i < 256; i++) { - if (p[i]) { - p[i] = n++; // p[i] is now the code number - out_meta[n] = i; - } - } - out_meta[0] = n; // 256 wraps to 0 - j = n+1; - - // 1 value per byte - if (n > 16) - return NULL; - - uint8_t *out = malloc(len+1); - if (!out) - return NULL; - - // Work out how many values per byte to encode. - int val_per_byte; - if (n > 4) - val_per_byte = 2; - else if (n > 2) - val_per_byte = 4; - else if (n > 1) - val_per_byte = 8; - else - val_per_byte = 0; // infinite - - *out_meta_len = j; - j = 0; - - switch (val_per_byte) { - case 2: - for (i = 0; i < (len & ~1); i+=2) - out[j++] = (p[data[i]]<<0) | (p[data[i+1]]<<4); - switch (len-i) { - case 1: out[j++] = p[data[i]]; - } - *out_len = j; - return out; - - case 4: { - for (i = 0; i < (len & ~3); i+=4) - out[j++] = (p[data[i]]<<0) | (p[data[i+1]]<<2) | (p[data[i+2]]<<4) | (p[data[i+3]]<<6); - out[j] = 0; - int s = len-i, x = 0; - switch (s) { - case 3: out[j] |= p[data[i++]] << x; x+=2; - case 2: out[j] |= p[data[i++]] << x; x+=2; - case 1: out[j] |= p[data[i++]] << x; x+=2; - j++; - } - *out_len = j; - return out; - } - - case 8: { - for (i = 0; i < (len & ~7); i+=8) - out[j++] = (p[data[i+0]]<<0) | (p[data[i+1]]<<1) | (p[data[i+2]]<<2) | (p[data[i+3]]<<3) - | (p[data[i+4]]<<4) | (p[data[i+5]]<<5) | (p[data[i+6]]<<6) | (p[data[i+7]]<<7); - out[j] = 0; - int s = len-i, x = 0; - switch (s) { - case 7: out[j] |= p[data[i++]] << x++; - case 6: out[j] |= p[data[i++]] << x++; - case 5: out[j] |= p[data[i++]] << x++; - case 4: out[j] |= p[data[i++]] << x++; - case 3: out[j] |= p[data[i++]] << x++; - case 2: out[j] |= p[data[i++]] << x++; - case 1: out[j] |= p[data[i++]] << x++; - j++; - } - *out_len = j; - return out; - } - - case 0: - *out_len = j; - return out; - } - - return NULL; -} - - -/* - * Unpacks the meta-data portions of the hts_pack algorithm. - * This consists of the count of symbols and their values. - * - * The "map" array is filled out with the used symbols. - * "nsym" is set to contain the number of symbols per byte; - * 0, 1, 2, 4 or 8. - * - * Returns number of bytes of data[] consumed on success, - * zero on failure. - */ -uint8_t hts_unpack_meta(uint8_t *data, uint32_t data_len, - uint64_t udata_len, uint8_t *map, int *nsym) { - if (data_len == 0) - return 0; - - // Number of symbols used - unsigned int n = data[0]; - if (n == 0) - n = 256; - - // Symbols per byte - if (n <= 1) - *nsym = 0; - else if (n <= 2) - *nsym = 8; - else if (n <= 4) - *nsym = 4; - else if (n <= 16) - *nsym = 2; - else { - *nsym = 1; // no packing - return 1; - } - - if (data_len <= 1) - return 0; - - int j = 1, c = 0; - do { - map[c++] = data[j++]; - } while (c < n && j < data_len); - - return c < n ? 0 : j; -} - -/* - * Unpacks a packed data steam (given the unpacked meta-data). - * - * "map" is the pack map, mapping 0->n to the expanded symbols. - * The "out" buffer must be preallocated by the caller to be the correct - * size. For error checking purposes, out_len is set to the size of - * this buffer. - * - * Returns uncompressed data (out) on success, - * NULL on failure. - */ -uint8_t *hts_unpack(uint8_t *data, int64_t len, uint8_t *out, uint64_t out_len, int nsym, uint8_t *p) { - //uint8_t *out; - uint8_t c = 0; - int64_t i, j = 0, olen; - - if (nsym == 1) { - // raw data; FIXME: shortcut the need for malloc & memcpy here - memcpy(out, data, len); - return out; - } - - switch(nsym) { - case 8: { - union { - uint64_t w; - uint8_t c[8]; - } map[256]; - int x; - for (x = 0; x < 256; x++) { - map[x].c[0] = p[x>>0&1]; - map[x].c[1] = p[x>>1&1]; - map[x].c[2] = p[x>>2&1]; - map[x].c[3] = p[x>>3&1]; - map[x].c[4] = p[x>>4&1]; - map[x].c[5] = p[x>>5&1]; - map[x].c[6] = p[x>>6&1]; - map[x].c[7] = p[x>>7&1]; - } - if ((out_len+7)/8 > len) - return NULL; - olen = out_len & ~7; - - for (i = 0; i < olen; i+=8) - memcpy(&out[i], &map[data[j++]].w, 8); - - if (out_len != olen) { - c = data[j++]; - while (i < out_len) { - out[i++] = p[c & 1]; - c >>= 1; - } - } - break; - } - - case 4: { - union { - uint32_t w; - uint8_t c[4]; - } map[256]; - - int x, y, z, _, P=0; - for (x = 0; x < 4; x++) - for (y = 0; y < 4; y++) - for (z = 0; z < 4; z++) - for (_ = 0; _ < 4; _++, P++) { - map[P].c[0] = p[_]; - map[P].c[1] = p[z]; - map[P].c[2] = p[y]; - map[P].c[3] = p[x]; - } - - if ((out_len+3)/4 > len) - return NULL; - olen = out_len & ~3; - - for (i = 0; i < olen-12; i+=16) { - uint32_t w[] = { - map[data[j+0]].w, - map[data[j+1]].w, - map[data[j+2]].w, - map[data[j+3]].w - }; - j += 4; - memcpy(&out[i], &w, 16); - } - - for (; i < olen; i+=4) - memcpy(&out[i], &map[data[j++]].w, 4); - - if (out_len != olen) { - c = data[j++]; - while (i < out_len) { - out[i++] = p[c & 3]; - c >>= 2; - } - } - break; - } - - case 2: { - union { - uint16_t w; - uint8_t c[2]; - } map[256]; - - int x, y; - for (x = 0; x < 16; x++) { - for (y = 0; y < 16; y++) { - map[x*16+y].c[0] = p[y]; - map[x*16+y].c[1] = p[x]; - } - } - - if ((out_len+1)/2 > len) - return NULL; - olen = out_len & ~1; - - for (i = j = 0; i+2 < olen; i+=4) { - uint16_t w[] = { - map[data[j+0]].w, - map[data[j+1]].w - }; - memcpy(&out[i], &w, 4); - - j += 2; - } - - for (; i < olen; i+=2) - memcpy(&out[i], &map[data[j++]].w, 2); - - if (out_len != olen) { - c = data[j++]; - out[i+0] = p[c&15]; - } - break; - } - - case 0: - memset(out, p[0], out_len); - break; - - default: - return NULL; - } - - return out; -} - - -uint8_t *hts_unpack_(uint8_t *data, int64_t len, uint8_t *out, uint64_t out_len, int nsym, uint8_t *p) { - //uint8_t *out; - uint8_t c = 0; - int64_t i, j = 0, olen; - - if (nsym == 1) { - // raw data; FIXME: shortcut the need for malloc & memcpy here - memcpy(out, data, len); - return out; - } - - switch(nsym) { - case 2: { - uint16_t map[256], x, y; - for (x = 0; x < 16; x++) - for (y = 0; y < 16; y++) - map[x*16+y] = p[x]*256+p[y]; - - if ((out_len+1)/2 > len) - return NULL; - olen = out_len & ~1; - - uint16_t *o16 = (uint16_t *)out; - for (i = 0; i+4 < olen/2; i+=4) { - int k; - for (k = 0; k < 4; k++) - o16[i+k] = map[data[i+k]]; - } - j = i; i *= 2; - - for (; i < olen; i+=2) { - uint16_t w1 = map[data[j++]]; - *(uint16_t *)&out[i] = w1; - } - - if (out_len != olen) { - c = data[j++]; - out[i+0] = p[c&15]; - } - break; - } - - default: - return NULL; - } - - return out; -} diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/pack.h b/src/htslib-1.19.1/htscodecs/htscodecs/pack.h deleted file mode 100644 index 79b05df..0000000 --- a/src/htslib-1.19.1/htscodecs/htscodecs/pack.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2019 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef HTS_PACK_H -#define HTS_PACK_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Packs multiple symbols into a single byte if the total alphabet of symbols - * used is <= 16. Each new symbol takes up 1, 2, 4 or 8 bits, or 0 if the - * alphabet used is 1 (constant). - * - * If successful, out_meta/out_meta_len are set to hold the mapping table - * to be used during decompression. - * - * Returns the packed buffer on success with new length in out_len, - * NULL of failure - */ -uint8_t *hts_pack(uint8_t *data, int64_t len, - uint8_t *out_meta, int *out_meta_len, uint64_t *out_len); - -/* - * Unpacks the meta-data portions of the hts_pack algorithm. - * This consists of the count of symbols and their values. - * - * The "map" array is filled out with the used symbols. - * "nsym" is set to contain the number of symbols per byte; - * 0, 1, 2, 4 or 8. - * - * Returns number of bytes of data[] consumed on success, - * zero on failure. - */ -uint8_t hts_unpack_meta(uint8_t *data, uint32_t data_len, - uint64_t udata_len, uint8_t *map, int *nsym); - -/* - * Unpacks a packed data steam (given the unpacked meta-data). - * - * "map" is the pack map, mapping 0->n to the expanded symbols. - * The "out" buffer must be preallocated by the caller to be the correct - * size. For error checking purposes, out_len is set to the size of - * this buffer. - * - * Returns uncompressed data (out) on success, - * NULL on failure. - */ -uint8_t *hts_unpack(uint8_t *data, int64_t len, uint8_t *out, uint64_t out_len, int nsym, uint8_t *map); - -#ifdef __cplusplus -} -#endif - -#endif /* HTS_PACK_H */ diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/permute.h b/src/htslib-1.19.1/htscodecs/htscodecs/permute.h deleted file mode 100644 index 71a4b7e..0000000 --- a/src/htslib-1.19.1/htscodecs/htscodecs/permute.h +++ /dev/null @@ -1,605 +0,0 @@ -#ifdef MAIN -#include - -/* - * Shuffle based on input bits. - * So bit N true => keep Nth byte. - * bit N false => skip Nth byte. - */ - -int main(void) { - int i, j; - - FILE *fp = fopen(__FILE__, "r"); - char line[8192]; - while(fgets(line, 8192, fp)) { - printf("%s", line); - } - close(fp); - printf("\n"); - - // Decode table; distributes N adjacent values across lanes - printf("#define _ 9\n"); - printf("static uint32_t permute[256][8] = { // reverse binary bit order\n"); - for (i = 0; i < 256; i++) { - int b = 0; - int v[8] = {0}; - for (j = 0; j < 8; j++) { - if (i & (1<= 0 && v[j]) - printf("%d,", v[j]-1); - else - printf("_,"); - } - printf("},\n"); - } - printf("};\n"); - - return 0; -} -#endif - -/* - * These tables are 8k. On older systems with small L1 cache, this may be - * a problem. - * - * #define PM(a,b,c,d,e,f,g,h) ((a<<0)|(b<<4)|(c<<8)|(d<<12)|(e<<16)|(f<<20)|(g<<24)|(h<<28)) - * - * Instead of permute via - * __m256i idx1 = _mm256_load_si256((const __m256i*)permute[imask1]); - * - * we can pack the indices and shift them back again - * __m256i idx1 = _mm256_srlv_epi32(_mm256_set1_epi32(permute2[imask1]), - * _mm256_set_epi32(28,24,20,16,12,8,4,0)); - * - * However on my Haswell system this slows down r32x16b_avx2 from 1440 to - * 1200 MB/s decode speeds. - * It's much closer for order-1 decoder, but still doesn't help. - * - * The encoder side seems to make no difference either way or be very marginal. - */ - -#define _ 9 -static uint32_t permute[256][8] __attribute__((aligned(32))) = { // reverse binary bit order - { _,_,_,_,_,_,_,_,}, - { 0,_,_,_,_,_,_,_,}, - { _,0,_,_,_,_,_,_,}, - { 0,1,_,_,_,_,_,_,}, - { _,_,0,_,_,_,_,_,}, - { 0,_,1,_,_,_,_,_,}, - { _,0,1,_,_,_,_,_,}, - { 0,1,2,_,_,_,_,_,}, - { _,_,_,0,_,_,_,_,}, - { 0,_,_,1,_,_,_,_,}, - { _,0,_,1,_,_,_,_,}, - { 0,1,_,2,_,_,_,_,}, - { _,_,0,1,_,_,_,_,}, - { 0,_,1,2,_,_,_,_,}, - { _,0,1,2,_,_,_,_,}, - { 0,1,2,3,_,_,_,_,}, - { _,_,_,_,0,_,_,_,}, - { 0,_,_,_,1,_,_,_,}, - { _,0,_,_,1,_,_,_,}, - { 0,1,_,_,2,_,_,_,}, - { _,_,0,_,1,_,_,_,}, - { 0,_,1,_,2,_,_,_,}, - { _,0,1,_,2,_,_,_,}, - { 0,1,2,_,3,_,_,_,}, - { _,_,_,0,1,_,_,_,}, - { 0,_,_,1,2,_,_,_,}, - { _,0,_,1,2,_,_,_,}, - { 0,1,_,2,3,_,_,_,}, - { _,_,0,1,2,_,_,_,}, - { 0,_,1,2,3,_,_,_,}, - { _,0,1,2,3,_,_,_,}, - { 0,1,2,3,4,_,_,_,}, - { _,_,_,_,_,0,_,_,}, - { 0,_,_,_,_,1,_,_,}, - { _,0,_,_,_,1,_,_,}, - { 0,1,_,_,_,2,_,_,}, - { _,_,0,_,_,1,_,_,}, - { 0,_,1,_,_,2,_,_,}, - { _,0,1,_,_,2,_,_,}, - { 0,1,2,_,_,3,_,_,}, - { _,_,_,0,_,1,_,_,}, - { 0,_,_,1,_,2,_,_,}, - { _,0,_,1,_,2,_,_,}, - { 0,1,_,2,_,3,_,_,}, - { _,_,0,1,_,2,_,_,}, - { 0,_,1,2,_,3,_,_,}, - { _,0,1,2,_,3,_,_,}, - { 0,1,2,3,_,4,_,_,}, - { _,_,_,_,0,1,_,_,}, - { 0,_,_,_,1,2,_,_,}, - { _,0,_,_,1,2,_,_,}, - { 0,1,_,_,2,3,_,_,}, - { _,_,0,_,1,2,_,_,}, - { 0,_,1,_,2,3,_,_,}, - { _,0,1,_,2,3,_,_,}, - { 0,1,2,_,3,4,_,_,}, - { _,_,_,0,1,2,_,_,}, - { 0,_,_,1,2,3,_,_,}, - { _,0,_,1,2,3,_,_,}, - { 0,1,_,2,3,4,_,_,}, - { _,_,0,1,2,3,_,_,}, - { 0,_,1,2,3,4,_,_,}, - { _,0,1,2,3,4,_,_,}, - { 0,1,2,3,4,5,_,_,}, - { _,_,_,_,_,_,0,_,}, - { 0,_,_,_,_,_,1,_,}, - { _,0,_,_,_,_,1,_,}, - { 0,1,_,_,_,_,2,_,}, - { _,_,0,_,_,_,1,_,}, - { 0,_,1,_,_,_,2,_,}, - { _,0,1,_,_,_,2,_,}, - { 0,1,2,_,_,_,3,_,}, - { _,_,_,0,_,_,1,_,}, - { 0,_,_,1,_,_,2,_,}, - { _,0,_,1,_,_,2,_,}, - { 0,1,_,2,_,_,3,_,}, - { _,_,0,1,_,_,2,_,}, - { 0,_,1,2,_,_,3,_,}, - { _,0,1,2,_,_,3,_,}, - { 0,1,2,3,_,_,4,_,}, - { _,_,_,_,0,_,1,_,}, - { 0,_,_,_,1,_,2,_,}, - { _,0,_,_,1,_,2,_,}, - { 0,1,_,_,2,_,3,_,}, - { _,_,0,_,1,_,2,_,}, - { 0,_,1,_,2,_,3,_,}, - { _,0,1,_,2,_,3,_,}, - { 0,1,2,_,3,_,4,_,}, - { _,_,_,0,1,_,2,_,}, - { 0,_,_,1,2,_,3,_,}, - { _,0,_,1,2,_,3,_,}, - { 0,1,_,2,3,_,4,_,}, - { _,_,0,1,2,_,3,_,}, - { 0,_,1,2,3,_,4,_,}, - { _,0,1,2,3,_,4,_,}, - { 0,1,2,3,4,_,5,_,}, - { _,_,_,_,_,0,1,_,}, - { 0,_,_,_,_,1,2,_,}, - { _,0,_,_,_,1,2,_,}, - { 0,1,_,_,_,2,3,_,}, - { _,_,0,_,_,1,2,_,}, - { 0,_,1,_,_,2,3,_,}, - { _,0,1,_,_,2,3,_,}, - { 0,1,2,_,_,3,4,_,}, - { _,_,_,0,_,1,2,_,}, - { 0,_,_,1,_,2,3,_,}, - { _,0,_,1,_,2,3,_,}, - { 0,1,_,2,_,3,4,_,}, - { _,_,0,1,_,2,3,_,}, - { 0,_,1,2,_,3,4,_,}, - { _,0,1,2,_,3,4,_,}, - { 0,1,2,3,_,4,5,_,}, - { _,_,_,_,0,1,2,_,}, - { 0,_,_,_,1,2,3,_,}, - { _,0,_,_,1,2,3,_,}, - { 0,1,_,_,2,3,4,_,}, - { _,_,0,_,1,2,3,_,}, - { 0,_,1,_,2,3,4,_,}, - { _,0,1,_,2,3,4,_,}, - { 0,1,2,_,3,4,5,_,}, - { _,_,_,0,1,2,3,_,}, - { 0,_,_,1,2,3,4,_,}, - { _,0,_,1,2,3,4,_,}, - { 0,1,_,2,3,4,5,_,}, - { _,_,0,1,2,3,4,_,}, - { 0,_,1,2,3,4,5,_,}, - { _,0,1,2,3,4,5,_,}, - { 0,1,2,3,4,5,6,_,}, - { _,_,_,_,_,_,_,0,}, - { 0,_,_,_,_,_,_,1,}, - { _,0,_,_,_,_,_,1,}, - { 0,1,_,_,_,_,_,2,}, - { _,_,0,_,_,_,_,1,}, - { 0,_,1,_,_,_,_,2,}, - { _,0,1,_,_,_,_,2,}, - { 0,1,2,_,_,_,_,3,}, - { _,_,_,0,_,_,_,1,}, - { 0,_,_,1,_,_,_,2,}, - { _,0,_,1,_,_,_,2,}, - { 0,1,_,2,_,_,_,3,}, - { _,_,0,1,_,_,_,2,}, - { 0,_,1,2,_,_,_,3,}, - { _,0,1,2,_,_,_,3,}, - { 0,1,2,3,_,_,_,4,}, - { _,_,_,_,0,_,_,1,}, - { 0,_,_,_,1,_,_,2,}, - { _,0,_,_,1,_,_,2,}, - { 0,1,_,_,2,_,_,3,}, - { _,_,0,_,1,_,_,2,}, - { 0,_,1,_,2,_,_,3,}, - { _,0,1,_,2,_,_,3,}, - { 0,1,2,_,3,_,_,4,}, - { _,_,_,0,1,_,_,2,}, - { 0,_,_,1,2,_,_,3,}, - { _,0,_,1,2,_,_,3,}, - { 0,1,_,2,3,_,_,4,}, - { _,_,0,1,2,_,_,3,}, - { 0,_,1,2,3,_,_,4,}, - { _,0,1,2,3,_,_,4,}, - { 0,1,2,3,4,_,_,5,}, - { _,_,_,_,_,0,_,1,}, - { 0,_,_,_,_,1,_,2,}, - { _,0,_,_,_,1,_,2,}, - { 0,1,_,_,_,2,_,3,}, - { _,_,0,_,_,1,_,2,}, - { 0,_,1,_,_,2,_,3,}, - { _,0,1,_,_,2,_,3,}, - { 0,1,2,_,_,3,_,4,}, - { _,_,_,0,_,1,_,2,}, - { 0,_,_,1,_,2,_,3,}, - { _,0,_,1,_,2,_,3,}, - { 0,1,_,2,_,3,_,4,}, - { _,_,0,1,_,2,_,3,}, - { 0,_,1,2,_,3,_,4,}, - { _,0,1,2,_,3,_,4,}, - { 0,1,2,3,_,4,_,5,}, - { _,_,_,_,0,1,_,2,}, - { 0,_,_,_,1,2,_,3,}, - { _,0,_,_,1,2,_,3,}, - { 0,1,_,_,2,3,_,4,}, - { _,_,0,_,1,2,_,3,}, - { 0,_,1,_,2,3,_,4,}, - { _,0,1,_,2,3,_,4,}, - { 0,1,2,_,3,4,_,5,}, - { _,_,_,0,1,2,_,3,}, - { 0,_,_,1,2,3,_,4,}, - { _,0,_,1,2,3,_,4,}, - { 0,1,_,2,3,4,_,5,}, - { _,_,0,1,2,3,_,4,}, - { 0,_,1,2,3,4,_,5,}, - { _,0,1,2,3,4,_,5,}, - { 0,1,2,3,4,5,_,6,}, - { _,_,_,_,_,_,0,1,}, - { 0,_,_,_,_,_,1,2,}, - { _,0,_,_,_,_,1,2,}, - { 0,1,_,_,_,_,2,3,}, - { _,_,0,_,_,_,1,2,}, - { 0,_,1,_,_,_,2,3,}, - { _,0,1,_,_,_,2,3,}, - { 0,1,2,_,_,_,3,4,}, - { _,_,_,0,_,_,1,2,}, - { 0,_,_,1,_,_,2,3,}, - { _,0,_,1,_,_,2,3,}, - { 0,1,_,2,_,_,3,4,}, - { _,_,0,1,_,_,2,3,}, - { 0,_,1,2,_,_,3,4,}, - { _,0,1,2,_,_,3,4,}, - { 0,1,2,3,_,_,4,5,}, - { _,_,_,_,0,_,1,2,}, - { 0,_,_,_,1,_,2,3,}, - { _,0,_,_,1,_,2,3,}, - { 0,1,_,_,2,_,3,4,}, - { _,_,0,_,1,_,2,3,}, - { 0,_,1,_,2,_,3,4,}, - { _,0,1,_,2,_,3,4,}, - { 0,1,2,_,3,_,4,5,}, - { _,_,_,0,1,_,2,3,}, - { 0,_,_,1,2,_,3,4,}, - { _,0,_,1,2,_,3,4,}, - { 0,1,_,2,3,_,4,5,}, - { _,_,0,1,2,_,3,4,}, - { 0,_,1,2,3,_,4,5,}, - { _,0,1,2,3,_,4,5,}, - { 0,1,2,3,4,_,5,6,}, - { _,_,_,_,_,0,1,2,}, - { 0,_,_,_,_,1,2,3,}, - { _,0,_,_,_,1,2,3,}, - { 0,1,_,_,_,2,3,4,}, - { _,_,0,_,_,1,2,3,}, - { 0,_,1,_,_,2,3,4,}, - { _,0,1,_,_,2,3,4,}, - { 0,1,2,_,_,3,4,5,}, - { _,_,_,0,_,1,2,3,}, - { 0,_,_,1,_,2,3,4,}, - { _,0,_,1,_,2,3,4,}, - { 0,1,_,2,_,3,4,5,}, - { _,_,0,1,_,2,3,4,}, - { 0,_,1,2,_,3,4,5,}, - { _,0,1,2,_,3,4,5,}, - { 0,1,2,3,_,4,5,6,}, - { _,_,_,_,0,1,2,3,}, - { 0,_,_,_,1,2,3,4,}, - { _,0,_,_,1,2,3,4,}, - { 0,1,_,_,2,3,4,5,}, - { _,_,0,_,1,2,3,4,}, - { 0,_,1,_,2,3,4,5,}, - { _,0,1,_,2,3,4,5,}, - { 0,1,2,_,3,4,5,6,}, - { _,_,_,0,1,2,3,4,}, - { 0,_,_,1,2,3,4,5,}, - { _,0,_,1,2,3,4,5,}, - { 0,1,_,2,3,4,5,6,}, - { _,_,0,1,2,3,4,5,}, - { 0,_,1,2,3,4,5,6,}, - { _,0,1,2,3,4,5,6,}, - { 0,1,2,3,4,5,6,7,}, -}; - -static uint32_t permutec[256][8] __attribute__((aligned(32))) = { // reverse binary bit order - { _,_,_,_,_,_,_,_,}, - { _,_,_,_,_,_,_,0,}, - { _,_,_,_,_,_,_,1,}, - { _,_,_,_,_,_,0,1,}, - { _,_,_,_,_,_,_,2,}, - { _,_,_,_,_,_,0,2,}, - { _,_,_,_,_,_,1,2,}, - { _,_,_,_,_,0,1,2,}, - { _,_,_,_,_,_,_,3,}, - { _,_,_,_,_,_,0,3,}, - { _,_,_,_,_,_,1,3,}, - { _,_,_,_,_,0,1,3,}, - { _,_,_,_,_,_,2,3,}, - { _,_,_,_,_,0,2,3,}, - { _,_,_,_,_,1,2,3,}, - { _,_,_,_,0,1,2,3,}, - { _,_,_,_,_,_,_,4,}, - { _,_,_,_,_,_,0,4,}, - { _,_,_,_,_,_,1,4,}, - { _,_,_,_,_,0,1,4,}, - { _,_,_,_,_,_,2,4,}, - { _,_,_,_,_,0,2,4,}, - { _,_,_,_,_,1,2,4,}, - { _,_,_,_,0,1,2,4,}, - { _,_,_,_,_,_,3,4,}, - { _,_,_,_,_,0,3,4,}, - { _,_,_,_,_,1,3,4,}, - { _,_,_,_,0,1,3,4,}, - { _,_,_,_,_,2,3,4,}, - { _,_,_,_,0,2,3,4,}, - { _,_,_,_,1,2,3,4,}, - { _,_,_,0,1,2,3,4,}, - { _,_,_,_,_,_,_,5,}, - { _,_,_,_,_,_,0,5,}, - { _,_,_,_,_,_,1,5,}, - { _,_,_,_,_,0,1,5,}, - { _,_,_,_,_,_,2,5,}, - { _,_,_,_,_,0,2,5,}, - { _,_,_,_,_,1,2,5,}, - { _,_,_,_,0,1,2,5,}, - { _,_,_,_,_,_,3,5,}, - { _,_,_,_,_,0,3,5,}, - { _,_,_,_,_,1,3,5,}, - { _,_,_,_,0,1,3,5,}, - { _,_,_,_,_,2,3,5,}, - { _,_,_,_,0,2,3,5,}, - { _,_,_,_,1,2,3,5,}, - { _,_,_,0,1,2,3,5,}, - { _,_,_,_,_,_,4,5,}, - { _,_,_,_,_,0,4,5,}, - { _,_,_,_,_,1,4,5,}, - { _,_,_,_,0,1,4,5,}, - { _,_,_,_,_,2,4,5,}, - { _,_,_,_,0,2,4,5,}, - { _,_,_,_,1,2,4,5,}, - { _,_,_,0,1,2,4,5,}, - { _,_,_,_,_,3,4,5,}, - { _,_,_,_,0,3,4,5,}, - { _,_,_,_,1,3,4,5,}, - { _,_,_,0,1,3,4,5,}, - { _,_,_,_,2,3,4,5,}, - { _,_,_,0,2,3,4,5,}, - { _,_,_,1,2,3,4,5,}, - { _,_,0,1,2,3,4,5,}, - { _,_,_,_,_,_,_,6,}, - { _,_,_,_,_,_,0,6,}, - { _,_,_,_,_,_,1,6,}, - { _,_,_,_,_,0,1,6,}, - { _,_,_,_,_,_,2,6,}, - { _,_,_,_,_,0,2,6,}, - { _,_,_,_,_,1,2,6,}, - { _,_,_,_,0,1,2,6,}, - { _,_,_,_,_,_,3,6,}, - { _,_,_,_,_,0,3,6,}, - { _,_,_,_,_,1,3,6,}, - { _,_,_,_,0,1,3,6,}, - { _,_,_,_,_,2,3,6,}, - { _,_,_,_,0,2,3,6,}, - { _,_,_,_,1,2,3,6,}, - { _,_,_,0,1,2,3,6,}, - { _,_,_,_,_,_,4,6,}, - { _,_,_,_,_,0,4,6,}, - { _,_,_,_,_,1,4,6,}, - { _,_,_,_,0,1,4,6,}, - { _,_,_,_,_,2,4,6,}, - { _,_,_,_,0,2,4,6,}, - { _,_,_,_,1,2,4,6,}, - { _,_,_,0,1,2,4,6,}, - { _,_,_,_,_,3,4,6,}, - { _,_,_,_,0,3,4,6,}, - { _,_,_,_,1,3,4,6,}, - { _,_,_,0,1,3,4,6,}, - { _,_,_,_,2,3,4,6,}, - { _,_,_,0,2,3,4,6,}, - { _,_,_,1,2,3,4,6,}, - { _,_,0,1,2,3,4,6,}, - { _,_,_,_,_,_,5,6,}, - { _,_,_,_,_,0,5,6,}, - { _,_,_,_,_,1,5,6,}, - { _,_,_,_,0,1,5,6,}, - { _,_,_,_,_,2,5,6,}, - { _,_,_,_,0,2,5,6,}, - { _,_,_,_,1,2,5,6,}, - { _,_,_,0,1,2,5,6,}, - { _,_,_,_,_,3,5,6,}, - { _,_,_,_,0,3,5,6,}, - { _,_,_,_,1,3,5,6,}, - { _,_,_,0,1,3,5,6,}, - { _,_,_,_,2,3,5,6,}, - { _,_,_,0,2,3,5,6,}, - { _,_,_,1,2,3,5,6,}, - { _,_,0,1,2,3,5,6,}, - { _,_,_,_,_,4,5,6,}, - { _,_,_,_,0,4,5,6,}, - { _,_,_,_,1,4,5,6,}, - { _,_,_,0,1,4,5,6,}, - { _,_,_,_,2,4,5,6,}, - { _,_,_,0,2,4,5,6,}, - { _,_,_,1,2,4,5,6,}, - { _,_,0,1,2,4,5,6,}, - { _,_,_,_,3,4,5,6,}, - { _,_,_,0,3,4,5,6,}, - { _,_,_,1,3,4,5,6,}, - { _,_,0,1,3,4,5,6,}, - { _,_,_,2,3,4,5,6,}, - { _,_,0,2,3,4,5,6,}, - { _,_,1,2,3,4,5,6,}, - { _,0,1,2,3,4,5,6,}, - { _,_,_,_,_,_,_,7,}, - { _,_,_,_,_,_,0,7,}, - { _,_,_,_,_,_,1,7,}, - { _,_,_,_,_,0,1,7,}, - { _,_,_,_,_,_,2,7,}, - { _,_,_,_,_,0,2,7,}, - { _,_,_,_,_,1,2,7,}, - { _,_,_,_,0,1,2,7,}, - { _,_,_,_,_,_,3,7,}, - { _,_,_,_,_,0,3,7,}, - { _,_,_,_,_,1,3,7,}, - { _,_,_,_,0,1,3,7,}, - { _,_,_,_,_,2,3,7,}, - { _,_,_,_,0,2,3,7,}, - { _,_,_,_,1,2,3,7,}, - { _,_,_,0,1,2,3,7,}, - { _,_,_,_,_,_,4,7,}, - { _,_,_,_,_,0,4,7,}, - { _,_,_,_,_,1,4,7,}, - { _,_,_,_,0,1,4,7,}, - { _,_,_,_,_,2,4,7,}, - { _,_,_,_,0,2,4,7,}, - { _,_,_,_,1,2,4,7,}, - { _,_,_,0,1,2,4,7,}, - { _,_,_,_,_,3,4,7,}, - { _,_,_,_,0,3,4,7,}, - { _,_,_,_,1,3,4,7,}, - { _,_,_,0,1,3,4,7,}, - { _,_,_,_,2,3,4,7,}, - { _,_,_,0,2,3,4,7,}, - { _,_,_,1,2,3,4,7,}, - { _,_,0,1,2,3,4,7,}, - { _,_,_,_,_,_,5,7,}, - { _,_,_,_,_,0,5,7,}, - { _,_,_,_,_,1,5,7,}, - { _,_,_,_,0,1,5,7,}, - { _,_,_,_,_,2,5,7,}, - { _,_,_,_,0,2,5,7,}, - { _,_,_,_,1,2,5,7,}, - { _,_,_,0,1,2,5,7,}, - { _,_,_,_,_,3,5,7,}, - { _,_,_,_,0,3,5,7,}, - { _,_,_,_,1,3,5,7,}, - { _,_,_,0,1,3,5,7,}, - { _,_,_,_,2,3,5,7,}, - { _,_,_,0,2,3,5,7,}, - { _,_,_,1,2,3,5,7,}, - { _,_,0,1,2,3,5,7,}, - { _,_,_,_,_,4,5,7,}, - { _,_,_,_,0,4,5,7,}, - { _,_,_,_,1,4,5,7,}, - { _,_,_,0,1,4,5,7,}, - { _,_,_,_,2,4,5,7,}, - { _,_,_,0,2,4,5,7,}, - { _,_,_,1,2,4,5,7,}, - { _,_,0,1,2,4,5,7,}, - { _,_,_,_,3,4,5,7,}, - { _,_,_,0,3,4,5,7,}, - { _,_,_,1,3,4,5,7,}, - { _,_,0,1,3,4,5,7,}, - { _,_,_,2,3,4,5,7,}, - { _,_,0,2,3,4,5,7,}, - { _,_,1,2,3,4,5,7,}, - { _,0,1,2,3,4,5,7,}, - { _,_,_,_,_,_,6,7,}, - { _,_,_,_,_,0,6,7,}, - { _,_,_,_,_,1,6,7,}, - { _,_,_,_,0,1,6,7,}, - { _,_,_,_,_,2,6,7,}, - { _,_,_,_,0,2,6,7,}, - { _,_,_,_,1,2,6,7,}, - { _,_,_,0,1,2,6,7,}, - { _,_,_,_,_,3,6,7,}, - { _,_,_,_,0,3,6,7,}, - { _,_,_,_,1,3,6,7,}, - { _,_,_,0,1,3,6,7,}, - { _,_,_,_,2,3,6,7,}, - { _,_,_,0,2,3,6,7,}, - { _,_,_,1,2,3,6,7,}, - { _,_,0,1,2,3,6,7,}, - { _,_,_,_,_,4,6,7,}, - { _,_,_,_,0,4,6,7,}, - { _,_,_,_,1,4,6,7,}, - { _,_,_,0,1,4,6,7,}, - { _,_,_,_,2,4,6,7,}, - { _,_,_,0,2,4,6,7,}, - { _,_,_,1,2,4,6,7,}, - { _,_,0,1,2,4,6,7,}, - { _,_,_,_,3,4,6,7,}, - { _,_,_,0,3,4,6,7,}, - { _,_,_,1,3,4,6,7,}, - { _,_,0,1,3,4,6,7,}, - { _,_,_,2,3,4,6,7,}, - { _,_,0,2,3,4,6,7,}, - { _,_,1,2,3,4,6,7,}, - { _,0,1,2,3,4,6,7,}, - { _,_,_,_,_,5,6,7,}, - { _,_,_,_,0,5,6,7,}, - { _,_,_,_,1,5,6,7,}, - { _,_,_,0,1,5,6,7,}, - { _,_,_,_,2,5,6,7,}, - { _,_,_,0,2,5,6,7,}, - { _,_,_,1,2,5,6,7,}, - { _,_,0,1,2,5,6,7,}, - { _,_,_,_,3,5,6,7,}, - { _,_,_,0,3,5,6,7,}, - { _,_,_,1,3,5,6,7,}, - { _,_,0,1,3,5,6,7,}, - { _,_,_,2,3,5,6,7,}, - { _,_,0,2,3,5,6,7,}, - { _,_,1,2,3,5,6,7,}, - { _,0,1,2,3,5,6,7,}, - { _,_,_,_,4,5,6,7,}, - { _,_,_,0,4,5,6,7,}, - { _,_,_,1,4,5,6,7,}, - { _,_,0,1,4,5,6,7,}, - { _,_,_,2,4,5,6,7,}, - { _,_,0,2,4,5,6,7,}, - { _,_,1,2,4,5,6,7,}, - { _,0,1,2,4,5,6,7,}, - { _,_,_,3,4,5,6,7,}, - { _,_,0,3,4,5,6,7,}, - { _,_,1,3,4,5,6,7,}, - { _,0,1,3,4,5,6,7,}, - { _,_,2,3,4,5,6,7,}, - { _,0,2,3,4,5,6,7,}, - { _,1,2,3,4,5,6,7,}, - { 0,1,2,3,4,5,6,7,}, -}; diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/pooled_alloc.h b/src/htslib-1.19.1/htscodecs/htscodecs/pooled_alloc.h deleted file mode 100644 index fa1218e..0000000 --- a/src/htslib-1.19.1/htscodecs/htscodecs/pooled_alloc.h +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (c) 2009-2010, 2013 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -// Defined static here as we only use in one file for now and don't -// want to pollute the library name space (io_lib has the same named -// functions). - -#ifndef _POOLED_ALLOC_H_ -#define _POOLED_ALLOC_H_ - -#include -#include -#include - -/* - * Implements a pooled block allocator where all items are the same size, - * but we need many of them. - */ -typedef struct { - void *pool; - size_t used; -} pool_t; - -typedef struct { - size_t dsize; - size_t npools; - pool_t *pools; - void *free; -} pool_alloc_t; - -#define PSIZE 1024*1024 - -static pool_alloc_t *pool_create(size_t dsize) { - pool_alloc_t *p; - - if (NULL == (p = (pool_alloc_t *)malloc(sizeof(*p)))) - return NULL; - - /* Minimum size is a pointer, for free list */ - dsize = (dsize + sizeof(void *) - 1) & ~(sizeof(void *)-1); - if (dsize < sizeof(void *)) - dsize = sizeof(void *); - p->dsize = dsize; - - p->npools = 0; - p->pools = NULL; - p->free = NULL; - - return p; -} - -static pool_t *new_pool(pool_alloc_t *p) { - size_t n = PSIZE / p->dsize; - pool_t *pool; - - pool = realloc(p->pools, (p->npools + 1) * sizeof(*p->pools)); - if (NULL == pool) return NULL; - p->pools = pool; - pool = &p->pools[p->npools]; - - pool->pool = malloc(n * p->dsize); - if (NULL == pool->pool) return NULL; - - pool->used = 0; - - p->npools++; - - return pool; -} - -static void pool_destroy(pool_alloc_t *p) { - size_t i; - - for (i = 0; i < p->npools; i++) { - free(p->pools[i].pool); - } - free(p->pools); - free(p); -} - -static void *pool_alloc(pool_alloc_t *p) { - pool_t *pool; - void *ret; - - /* Look on free list */ - if (NULL != p->free) { - ret = p->free; - p->free = *((void **)p->free); - return ret; - } - - /* Look for space in the last pool */ - if (p->npools) { - pool = &p->pools[p->npools - 1]; - if (pool->used + p->dsize < PSIZE) { - ret = ((char *) pool->pool) + pool->used; - pool->used += p->dsize; - return ret; - } - } - - /* Need a new pool */ - pool = new_pool(p); - if (NULL == pool) return NULL; - - pool->used = p->dsize; - return pool->pool; -} - -// static void pool_free(pool_alloc_t *p, void *ptr) { -// *(void **)ptr = p->free; -// p->free = ptr; -// } - -#endif /*_POOLED_ALLOC_H_*/ diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/rANS_byte.h b/src/htslib-1.19.1/htscodecs/htscodecs/rANS_byte.h deleted file mode 100644 index 968d157..0000000 --- a/src/htslib-1.19.1/htscodecs/htscodecs/rANS_byte.h +++ /dev/null @@ -1,569 +0,0 @@ -/* rans_byte.h originally from https://github.com/rygorous/ryg_rans - * - * This is a public-domain implementation of several rANS variants. rANS is an - * entropy coder from the ANS family, as described in Jarek Duda's paper - * "Asymmetric numeral systems" (http://arxiv.org/abs/1311.2540). - */ - -/*-------------------------------------------------------------------------- */ -/* rans_byte.h from https://github.com/rygorous/ryg_rans */ - -// Simple byte-aligned rANS encoder/decoder - public domain - Fabian 'ryg' Giesen 2014 -// -// Not intended to be "industrial strength"; just meant to illustrate the general -// idea. - -#ifndef RANS_BYTE_HEADER -#define RANS_BYTE_HEADER - -#include -#include -#include - -#include "utils.h" - -#ifdef assert -#define RansAssert assert -#else -#define RansAssert(x) -#endif - -// READ ME FIRST: -// -// This is designed like a typical arithmetic coder API, but there's three -// twists you absolutely should be aware of before you start hacking: -// -// 1. You need to encode data in *reverse* - last symbol first. rANS works -// like a stack: last in, first out. -// 2. Likewise, the encoder outputs bytes *in reverse* - that is, you give -// it a pointer to the *end* of your buffer (exclusive), and it will -// slowly move towards the beginning as more bytes are emitted. -// 3. Unlike basically any other entropy coder implementation you might -// have used, you can interleave data from multiple independent rANS -// encoders into the same bytestream without any extra signaling; -// you can also just write some bytes by yourself in the middle if -// you want to. This is in addition to the usual arithmetic encoder -// property of being able to switch models on the fly. Writing raw -// bytes can be useful when you have some data that you know is -// incompressible, and is cheaper than going through the rANS encode -// function. Using multiple rANS coders on the same byte stream wastes -// a few bytes compared to using just one, but execution of two -// independent encoders can happen in parallel on superscalar and -// Out-of-Order CPUs, so this can be *much* faster in tight decoding -// loops. -// -// This is why all the rANS functions take the write pointer as an -// argument instead of just storing it in some context struct. - -// -------------------------------------------------------------------------- - -// L ('l' in the paper) is the lower bound of our normalization interval. -// Between this and our byte-aligned emission, we use 31 (not 32!) bits. -// This is done intentionally because exact reciprocals for 31-bit uints -// fit in 32-bit uints: this permits some optimizations during encoding. -#define RANS_BYTE_L (1u << 23) // lower bound of our normalization interval - -// State for a rANS encoder. Yep, that's all there is to it. -typedef uint32_t RansState; - -// Initialize a rANS encoder. -static inline void RansEncInit(RansState* r) -{ - *r = RANS_BYTE_L; -} - -#if 0 /* Curently unused */ -// Renormalize the encoder. Internal function. -static inline RansState RansEncRenorm(RansState x, uint8_t** pptr, uint32_t freq, uint32_t scale_bits) -{ - uint32_t x_max = ((RANS_BYTE_L >> scale_bits) << 8) * freq; // this turns into a shift. - if (x >= x_max) { - uint8_t* ptr = *pptr; - do { - *--ptr = (uint8_t) (x & 0xff); - x >>= 8; - } while (x >= x_max); - *pptr = ptr; - } - return x; -} - -// Encodes a single symbol with range start "start" and frequency "freq". -// All frequencies are assumed to sum to "1 << scale_bits", and the -// resulting bytes get written to ptr (which is updated). -// -// NOTE: With rANS, you need to encode symbols in *reverse order*, i.e. from -// beginning to end! Likewise, the output bytestream is written *backwards*: -// ptr starts pointing at the end of the output buffer and keeps decrementing. -static inline void RansEncPut(RansState* r, uint8_t** pptr, uint32_t start, uint32_t freq, uint32_t scale_bits) -{ - // renormalize - RansState x = RansEncRenorm(*r, pptr, freq, scale_bits); - - // x = C(s,x) - *r = ((x / freq) << scale_bits) + (x % freq) + start; -} -#endif /* Curently unused */ - -// Flushes the rANS encoder. -static inline void RansEncFlush(RansState* r, uint8_t** pptr) -{ - uint32_t x = *r; - uint8_t* ptr = *pptr; - - ptr -= 4; - ptr[0] = (uint8_t) (x >> 0); - ptr[1] = (uint8_t) (x >> 8); - ptr[2] = (uint8_t) (x >> 16); - ptr[3] = (uint8_t) (x >> 24); - - *pptr = ptr; -} - -// Initializes a rANS decoder. -// Unlike the encoder, the decoder works forwards as you'd expect. -static inline void RansDecInit(RansState* r, uint8_t** pptr) -{ - uint32_t x; - uint8_t* ptr = *pptr; - - x = ptr[0] << 0; - x |= ptr[1] << 8; - x |= ptr[2] << 16; - x |= ((uint32_t)ptr[3]) << 24; - ptr += 4; - - *pptr = ptr; - *r = x; -} - -// Returns the current cumulative frequency (map it to a symbol yourself!) -static inline uint32_t RansDecGet(RansState* r, uint32_t scale_bits) -{ - return *r & ((1u << scale_bits) - 1); -} - -// Advances in the bit stream by "popping" a single symbol with range start -// "start" and frequency "freq". All frequencies are assumed to sum to "1 << scale_bits", -// and the resulting bytes get written to ptr (which is updated). -static inline void RansDecAdvance(RansState* r, uint8_t** pptr, uint32_t start, uint32_t freq, uint32_t scale_bits) -{ - uint32_t mask = (1u << scale_bits) - 1; - - // s, x = D(x) - uint32_t x = *r; - x = freq * (x >> scale_bits) + (x & mask) - start; - - // renormalize - if (x < RANS_BYTE_L) { - uint8_t* ptr = *pptr; - do x = (x << 8) | *ptr++; while (x < RANS_BYTE_L); - *pptr = ptr; - } - - *r = x; -} - -// -------------------------------------------------------------------------- - -// That's all you need for a full encoder; below here are some utility -// functions with extra convenience or optimizations. - -// Encoder symbol description -// This (admittedly odd) selection of parameters was chosen to make -// RansEncPutSymbol as cheap as possible. -typedef struct { - uint32_t x_max; // (Exclusive) upper bound of pre-normalization interval - uint32_t rcp_freq; // Fixed-point reciprocal frequency - uint32_t bias; // Bias - uint16_t cmpl_freq; // Complement of frequency: (1 << scale_bits) - freq - uint16_t rcp_shift; // Reciprocal shift -} RansEncSymbol; - -// Decoder symbols are straightforward. -// 32-bit means more memory, but oddly faster on old gcc? Why? -// 322MB/s vs 309MB/s for order-1. -typedef struct { - uint16_t freq; // Symbol frequency. - uint16_t start; // Start of range. -} RansDecSymbol; - -typedef struct { - uint32_t freq; // Symbol frequency. - uint32_t start; // Start of range. -} RansDecSymbol32; - -// Initializes an encoder symbol to start "start" and frequency "freq" -static inline void RansEncSymbolInit(RansEncSymbol* s, uint32_t start, uint32_t freq, uint32_t scale_bits) -{ - RansAssert(scale_bits <= 16); - RansAssert(start <= (1u << scale_bits)); - RansAssert(freq <= (1u << scale_bits) - start); - - // Say M := 1 << scale_bits. - // - // The original encoder does: - // x_new = (x/freq)*M + start + (x%freq) - // - // The fast encoder does (schematically): - // q = mul_hi(x, rcp_freq) >> rcp_shift (division) - // r = x - q*freq (remainder) - // x_new = q*M + bias + r (new x) - // plugging in r into x_new yields: - // x_new = bias + x + q*(M - freq) - // =: bias + x + q*cmpl_freq (*) - // - // and we can just precompute cmpl_freq. Now we just need to - // set up our parameters such that the original encoder and - // the fast encoder agree. - - s->x_max = ((RANS_BYTE_L >> scale_bits) << 8) * freq; - s->cmpl_freq = (uint16_t) ((1 << scale_bits) - freq); - if (freq < 2) { - // freq=0 symbols are never valid to encode, so it doesn't matter what - // we set our values to. - // - // freq=1 is tricky, since the reciprocal of 1 is 1; unfortunately, - // our fixed-point reciprocal approximation can only multiply by values - // smaller than 1. - // - // So we use the "next best thing": rcp_freq=0xffffffff, rcp_shift=0. - // This gives: - // q = mul_hi(x, rcp_freq) >> rcp_shift - // = mul_hi(x, (1<<32) - 1)) >> 0 - // = floor(x - x/(2^32)) - // = x - 1 if 1 <= x < 2^32 - // and we know that x>0 (x=0 is never in a valid normalization interval). - // - // So we now need to choose the other parameters such that - // x_new = x*M + start - // plug it in: - // x*M + start (desired result) - // = bias + x + q*cmpl_freq (*) - // = bias + x + (x - 1)*(M - 1) (plug in q=x-1, cmpl_freq) - // = bias + 1 + (x - 1)*M - // = x*M + (bias + 1 - M) - // - // so we have start = bias + 1 - M, or equivalently - // bias = start + M - 1. - s->rcp_freq = ~0u; - s->rcp_shift = 0; - s->bias = start + (1 << scale_bits) - 1; - } else { - // Alverson, "Integer Division using reciprocals" - // shift=ceil(log2(freq)) - uint32_t shift = 0; - while (freq > (1u << shift)) - shift++; - - s->rcp_freq = (uint32_t) (((1ull << (shift + 31)) + freq-1) / freq); - s->rcp_shift = shift - 1; - - // With these values, 'q' is the correct quotient, so we - // have bias=start. - s->bias = start; - } - - s->rcp_shift += 32; // Avoid the extra >>32 in RansEncPutSymbol -} - -// Initialize a decoder symbol to start "start" and frequency "freq" -static inline void RansDecSymbolInit(RansDecSymbol* s, uint32_t start, uint32_t freq) -{ - RansAssert(start <= (1 << 16)); - RansAssert(freq <= (1 << 16) - start); - s->start = (uint16_t) start; - s->freq = (uint16_t) freq; -} - -// Encodes a given symbol. This is faster than straight RansEnc since we can do -// multiplications instead of a divide. -// -// See RansEncSymbolInit for a description of how this works. -static inline void RansEncPutSymbol(RansState* r, uint8_t** pptr, RansEncSymbol const* sym) -{ - RansAssert(sym->x_max != 0); // can't encode symbol with freq=0 - - // renormalize - uint32_t x = *r; - uint32_t x_max = sym->x_max; - - // This is better for 40-qual illumina (3.7% quicker overall CRAM). - // The old method was better for low complexity data such as NovaSeq - // quals (2.6% quicker overall CRAM). - int o = x >= x_max; - uint8_t* ptr = *pptr; - ptr[-1] = x & 0xff; - ptr -= o; - x >>= o*8; - - if (unlikely(x >= x_max)) { - *--ptr = (uint8_t) (x & 0xff); - x >>= 8; - } - *pptr = ptr; - - //uint32_t q = (uint32_t) (((uint64_t)x * sym->rcp_freq) >> sym->rcp_shift); - //*r = q * sym->cmpl_freq + x + sym->bias; - - // x = C(s,x) - // NOTE: written this way so we get a 32-bit "multiply high" when - // available. If you're on a 64-bit platform with cheap multiplies - // (e.g. x64), just bake the +32 into rcp_shift. - //uint32_t q = (uint32_t) (((uint64_t)x * sym->rcp_freq) >> 32) >> sym->rcp_shift; - - // The extra >>32 has already been added to RansEncSymbolInit - uint32_t q = (uint32_t) (((uint64_t)x * sym->rcp_freq) >> sym->rcp_shift); - *r = q * sym->cmpl_freq + x + sym->bias; -} - -// A 4-way version of RansEncPutSymbol, renormalising 4 states -// simulatenously with their results written to the same ptr buffer. -// (This is perhaps a failing as it makes optmisation tricky.) -static inline void RansEncPutSymbol4(RansState *r0, - RansState *r1, - RansState *r2, - RansState *r3, - uint8_t** pptr, - RansEncSymbol const *sym0, - RansEncSymbol const *sym1, - RansEncSymbol const *sym2, - RansEncSymbol const *sym3) -{ - RansAssert(sym0->x_max != 0); // can't encode symbol with freq=0 - RansAssert(sym1->x_max != 0); // can't encode symbol with freq=0 - RansAssert(sym2->x_max != 0); // can't encode symbol with freq=0 - RansAssert(sym3->x_max != 0); // can't encode symbol with freq=0 - - // renormalize - uint32_t x0, x1, x2, x3; - uint8_t* ptr = *pptr; - - int o; - uint32_t m[4] = { - sym0->x_max, - sym1->x_max, - sym2->x_max, - sym3->x_max - }; - - x0 = *r0; - o = x0 >= m[0]; - ptr[-1] = x0; - ptr -= o; - x0 >>= o*8; - if (x0 >= m[0]) { - *--ptr = x0; - x0 >>= 8; - } - - x1 = *r1; - o = x1 >= m[1]; - ptr[-1] = x1; - ptr -= o; - x1 >>= o*8; - if (x1 >= m[1]) { - *--ptr = x1; - x1 >>= 8; - } - - x2 = *r2; - o = x2 >= m[2]; - ptr[-1] = x2; - ptr -= o; - x2 >>= o*8; - if (x2 >= m[2]) { - *--ptr = x2; - x2 >>= 8; - } - - x3 = *r3; - o = x3 >= m[3]; - ptr[-1] = x3; - ptr -= o; - x3 >>= o*8; - if (x3 >= m[3]) { - *--ptr = x3; - x3 >>= 8; - } - - *pptr = ptr; - - // x = C(s,x) - uint32_t qa, qb; - qa = (uint32_t) (((uint64_t)x0 * sym0->rcp_freq) >> sym0->rcp_shift); - uint32_t X0 = qa * sym0->cmpl_freq; - qb = (uint32_t) (((uint64_t)x1 * sym1->rcp_freq) >> sym1->rcp_shift); - uint32_t X1 = qb * sym1->cmpl_freq; - - *r0 = X0 + x0 + sym0->bias; - *r1 = X1 + x1 + sym1->bias; - - qa = (uint32_t) (((uint64_t)x2 * sym2->rcp_freq) >> sym2->rcp_shift); - uint32_t X2 = qa * sym2->cmpl_freq; - qb = (uint32_t) (((uint64_t)x3 * sym3->rcp_freq) >> sym3->rcp_shift); - uint32_t X3 = qb * sym3->cmpl_freq; - - *r2 = X2 + x2 + sym2->bias; - *r3 = X3 + x3 + sym3->bias; -} - -// Equivalent to RansDecAdvance that takes a symbol. -static inline void RansDecAdvanceSymbol(RansState* r, uint8_t** pptr, RansDecSymbol const* sym, uint32_t scale_bits) -{ - RansDecAdvance(r, pptr, sym->start, sym->freq, scale_bits); -} - -// Advances in the bit stream by "popping" a single symbol with range start -// "start" and frequency "freq". All frequencies are assumed to sum to "1 << scale_bits". -// No renormalization or output happens. -static inline void RansDecAdvanceStep(RansState* r, uint32_t start, uint32_t freq, uint32_t scale_bits) -{ - uint32_t mask = (1u << scale_bits) - 1; - - // s, x = D(x) - uint32_t x = *r; - *r = freq * (x >> scale_bits) + (x & mask) - start; -} - -// Equivalent to RansDecAdvanceStep that takes a symbol. -static inline void RansDecAdvanceSymbolStep(RansState* r, RansDecSymbol const* sym, uint32_t scale_bits) -{ - RansDecAdvanceStep(r, sym->start, sym->freq, scale_bits); -} - -// Renormalize. -#if defined(__x86_64) && !defined(__ILP32__) -/* - * Assembly variants of the RansDecRenorm code. - * These are based on joint ideas from Rob Davies and from looking at - * the clang assembly output. - */ -static inline void RansDecRenorm(RansState* r, uint8_t** pptr) { - uint32_t x = *r; - uint8_t *ptr = *pptr; - - __asm__ ("movzbl (%0), %%eax\n\t" - "mov %1, %%edx\n\t" - "shl $0x8,%%edx\n\t" - "or %%eax,%%edx\n\t" - "cmp $0x800000,%1\n\t" - "cmovb %%edx,%1\n\t" - "adc $0x0,%0\n\t" - : "=r" (ptr), "=r" (x) - : "0" (ptr), "1" (x) - : "eax", "edx" - ); - if (x < 0x800000) x = (x << 8) | *ptr++; - *pptr = ptr; - *r = x; -} - -/* - * A variant that normalises two rans states. - * The only minor tweak here is to adjust the reorder a few opcodes - * to reduce dependency delays. - */ -static inline void RansDecRenorm2(RansState* r1, RansState* r2, uint8_t** pptr) { - uint32_t x1 = *r1; - uint32_t x2 = *r2; - uint8_t *ptr = *pptr; - - __asm__ ("movzbl (%0), %%eax\n\t" - "mov %1, %%edx\n\t" - "shl $0x8, %%edx\n\t" - "or %%eax, %%edx\n\t" - "cmp $0x800000, %1\n\t" - "cmovb %%edx, %1\n\t" - "adc $0x0, %0\n\t" - "mov %2, %%edx\n\t" - "shl $0x8, %%edx\n\t" - "cmp $0x800000, %1\n\t" - "jae 1f\n\t" - "movzbl (%0), %%eax\n\t" - "shl $0x8, %1\n\t" - "or %%eax, %1\n\t" - "add $0x1, %0\n\t" - "1:\n\t" - "movzbl (%0), %%eax\n\t" - "or %%eax, %%edx\n\t" - "cmp $0x800000, %2\n\t" - "cmovb %%edx, %2\n\t" - "adc $0x0, %0\n\t" - "cmp $0x800000, %2\n\t" - "jae 2f\n\t" - "movzbl (%0), %%eax\n\t" - "shl $0x8, %2\n\t" - "or %%eax, %2\n\t" - "add $0x1, %0\n\t" - "2:\n\t" - : "=r" (ptr), "=r" (x1), "=r" (x2) - : "0" (ptr), "1" (x1), "2" (x2) - : "eax", "edx" - ); - - *pptr = ptr; - *r1 = x1; - *r2 = x2; -} - -#else /* __x86_64 */ - -static inline void RansDecRenorm(RansState* r, uint8_t** pptr) -{ - // renormalize - uint32_t x = *r; - -#ifdef __clang__ - // Generates cmov instructions on clang, but alas not gcc - uint8_t* ptr = *pptr; - uint32_t y = (x << 8) | *ptr; - uint32_t cond = x < RANS_BYTE_L; - x = cond ? y : x; - ptr += cond ? 1 : 0; - if (x < RANS_BYTE_L) x = (x<<8) | *ptr++; - *pptr = ptr; -#else - if (x >= RANS_BYTE_L) return; - uint8_t* ptr = *pptr; - x = (x << 8) | *ptr++; - if (x < RANS_BYTE_L) x = (x << 8) | *ptr++; - *pptr = ptr; -#endif /* __clang__ */ - - *r = x; -} - -static inline void RansDecRenorm2(RansState* r1, RansState* r2, uint8_t** pptr) { - RansDecRenorm(r1, pptr); - RansDecRenorm(r2, pptr); -} - -#endif /* __x86_64 */ - -static inline void RansDecRenormSafe(RansState* r, uint8_t** pptr, uint8_t *ptr_end) -{ - uint32_t x = *r; - uint8_t* ptr = *pptr; - if (x >= RANS_BYTE_L || ptr >= ptr_end) return; - x = (x << 8) | *ptr++; - if (x < RANS_BYTE_L && ptr < ptr_end) - x = (x << 8) | *ptr++; - *pptr = ptr; - *r = x; -} - -static inline void RansDecSymbolInit32(RansDecSymbol32* s, uint32_t start, uint32_t freq) -{ - RansAssert(start <= (1 << 16)); - RansAssert(freq <= (1 << 16) - start); - s->start = (uint16_t) start; - s->freq = (uint16_t) freq; -} - -static inline void RansDecAdvanceSymbol32(RansState* r, uint8_t** pptr, RansDecSymbol32 const* sym, uint32_t scale_bits) -{ - RansDecAdvance(r, pptr, sym->start, sym->freq, scale_bits); -} - -#endif // RANS_BYTE_HEADER diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/rANS_static.c b/src/htslib-1.19.1/htscodecs/htscodecs/rANS_static.c deleted file mode 100644 index e3c34a6..0000000 --- a/src/htslib-1.19.1/htscodecs/htscodecs/rANS_static.c +++ /dev/null @@ -1,844 +0,0 @@ -/* - * Copyright (c) 2014-2022 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -// Use 11 for order-1? -#define TF_SHIFT 12 -#define TOTFREQ (1< -#include -#include -#include -#include -#include -#include -#include -#ifndef NO_THREADS -#include -#endif - -#include "rANS_static.h" - -#define ABS(a) ((a)>0?(a):-(a)) - -/*----------------------------------------------------------------------------- - * Memory to memory compression functions. - * - * These are original versions without any manual loop unrolling. They - * are easier to understand, but can be up to 2x slower. - */ - -static -unsigned char *rans_compress_O0(unsigned char *in, unsigned int in_size, - unsigned int *out_size) { - unsigned char *out_buf = malloc(1.05*in_size + 257*257*3 + 9); - unsigned char *cp, *out_end; - RansEncSymbol syms[256]; - RansState rans0; - RansState rans2; - RansState rans1; - RansState rans3; - uint8_t* ptr; - int F[256+MAGIC] = {0}, i, j, tab_size, rle, x, fsum = 0; - int m = 0, M = 0; - uint64_t tr; - - if (!out_buf) - return NULL; - - ptr = out_end = out_buf + (uint32_t)(1.05*in_size) + 257*257*3 + 9; - - // Compute statistics - if (hist8(in, in_size, (uint32_t *)F) < 0) { - free(out_buf); - return NULL; - } - tr = in_size ? ((uint64_t)TOTFREQ<<31)/in_size + (1<<30)/in_size : 0; - - normalise_harder: - // Normalise so T[i] == TOTFREQ - for (fsum = m = M = j = 0; j < 256; j++) { - if (!F[j]) - continue; - - if (m < F[j]) - m = F[j], M = j; - - if ((F[j] = (F[j]*tr)>>31) == 0) - F[j] = 1; - fsum += F[j]; - } - - fsum++; - if (fsum < TOTFREQ) { - F[M] += TOTFREQ-fsum; - } else if (fsum-TOTFREQ > F[M]/2) { - // Corner case to avoid excessive frequency reduction - tr = 2104533975; goto normalise_harder; // equiv to *0.98. - } else { - F[M] -= fsum-TOTFREQ; - } - - //printf("F[%d]=%d\n", M, F[M]); - assert(F[M]>0); - - // Encode statistics. - cp = out_buf+9; - - for (x = rle = j = 0; j < 256; j++) { - if (F[j]) { - // j - if (rle) { - rle--; - } else { - *cp++ = j; - if (!rle && j && F[j-1]) { - for(rle=j+1; rle<256 && F[rle]; rle++) - ; - rle -= j+1; - *cp++ = rle; - } - //fprintf(stderr, "%d: %d %d\n", j, rle, N[j]); - } - - // F[j] - if (F[j]<128) { - *cp++ = F[j]; - } else { - *cp++ = 128 | (F[j]>>8); - *cp++ = F[j]&0xff; - } - RansEncSymbolInit(&syms[j], x, F[j], TF_SHIFT); - x += F[j]; - } - } - *cp++ = 0; - - //write(2, out_buf+4, cp-(out_buf+4)); - tab_size = cp-out_buf; - - RansEncInit(&rans0); - RansEncInit(&rans1); - RansEncInit(&rans2); - RansEncInit(&rans3); - - switch (i=(in_size&3)) { - case 3: RansEncPutSymbol(&rans2, &ptr, &syms[in[in_size-(i-2)]]); - case 2: RansEncPutSymbol(&rans1, &ptr, &syms[in[in_size-(i-1)]]); - case 1: RansEncPutSymbol(&rans0, &ptr, &syms[in[in_size-(i-0)]]); - case 0: - break; - } - for (i=(in_size &~3); likely(i>0); i-=4) { - RansEncSymbol *s3 = &syms[in[i-1]]; - RansEncSymbol *s2 = &syms[in[i-2]]; - RansEncSymbol *s1 = &syms[in[i-3]]; - RansEncSymbol *s0 = &syms[in[i-4]]; - - RansEncPutSymbol(&rans3, &ptr, s3); - RansEncPutSymbol(&rans2, &ptr, s2); - RansEncPutSymbol(&rans1, &ptr, s1); - RansEncPutSymbol(&rans0, &ptr, s0); - } - - RansEncFlush(&rans3, &ptr); - RansEncFlush(&rans2, &ptr); - RansEncFlush(&rans1, &ptr); - RansEncFlush(&rans0, &ptr); - - // Finalise block size and return it - *out_size = (out_end - ptr) + tab_size; - - cp = out_buf; - - *cp++ = 0; // order - *cp++ = ((*out_size-9)>> 0) & 0xff; - *cp++ = ((*out_size-9)>> 8) & 0xff; - *cp++ = ((*out_size-9)>>16) & 0xff; - *cp++ = ((*out_size-9)>>24) & 0xff; - - *cp++ = (in_size>> 0) & 0xff; - *cp++ = (in_size>> 8) & 0xff; - *cp++ = (in_size>>16) & 0xff; - *cp++ = (in_size>>24) & 0xff; - - memmove(out_buf + tab_size, ptr, out_end-ptr); - - return out_buf; -} - -typedef struct { - unsigned char R[TOTFREQ]; -} ari_decoder; - -static -unsigned char *rans_uncompress_O0(unsigned char *in, unsigned int in_size, - unsigned int *out_size) { - /* Load in the static tables */ - unsigned char *cp = in + 9; - unsigned char *cp_end = in + in_size; - const uint32_t mask = (1u << TF_SHIFT)-1; - int i, j, rle; - unsigned int x, y; - unsigned int out_sz, in_sz; - char *out_buf; - RansState R[4]; - RansState m[4]; - uint16_t sfreq[TOTFREQ+32]; - uint16_t ssym [TOTFREQ+32]; // faster, but only needs uint8_t - uint32_t sbase[TOTFREQ+16]; // faster, but only needs uint16_t - - if (in_size < 26) // Need at least this many bytes just to start - return NULL; - - if (*in++ != 0) // Order-0 check - return NULL; - - in_sz = ((in[0])<<0) | ((in[1])<<8) | ((in[2])<<16) | (((uint32_t)in[3])<<24); - out_sz = ((in[4])<<0) | ((in[5])<<8) | ((in[6])<<16) | (((uint32_t)in[7])<<24); - if (in_sz != in_size-9) - return NULL; - - if (out_sz >= INT_MAX) - return NULL; // protect against some overflow cases - - // For speeding up the fuzzer only. - // Small input can lead to large uncompressed data. - // We reject this as it just slows things up instead of testing more code - // paths (once we've verified a few times for large data). -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (out_sz > 100000) - return NULL; -#endif - - out_buf = malloc(out_sz); - if (!out_buf) - return NULL; - - //fprintf(stderr, "out_sz=%d\n", out_sz); - - // Precompute reverse lookup of frequency. - rle = x = y = 0; - j = *cp++; - do { - int F, C; - if (cp > cp_end - 16) goto cleanup; // Not enough input bytes left - if ((F = *cp++) >= 128) { - F &= ~128; - F = ((F & 127) << 8) | *cp++; - } - C = x; - - if (x + F > TOTFREQ) - goto cleanup; - - for (y = 0; y < F; y++) { - ssym [y + C] = j; - sfreq[y + C] = F; - sbase[y + C] = y; - } - x += F; - - if (!rle && j+1 == *cp) { - j = *cp++; - rle = *cp++; - } else if (rle) { - rle--; - j++; - if (j > 255) - goto cleanup; - } else { - j = *cp++; - } - } while(j); - - if (x < TOTFREQ-1 || x > TOTFREQ) - goto cleanup; - if (x != TOTFREQ) { - // Protection against accessing uninitialised memory in the case - // where SUM(freqs) == 4095 and not 4096. - ssym [x] = ssym [x-1]; - sfreq[x] = sfreq[x-1]; - sbase[x] = sbase[x-1]+1; - } - - // 16 bytes of cp here. Also why cp - 16 in above loop. - if (cp > cp_end - 16) goto cleanup; // Not enough input bytes left - - RansDecInit(&R[0], &cp); if (R[0] < RANS_BYTE_L) goto cleanup; - RansDecInit(&R[1], &cp); if (R[1] < RANS_BYTE_L) goto cleanup; - RansDecInit(&R[2], &cp); if (R[2] < RANS_BYTE_L) goto cleanup; - RansDecInit(&R[3], &cp); if (R[3] < RANS_BYTE_L) goto cleanup; - - int out_end = (out_sz&~3); - cp_end -= 8; // within 8 for simplicity of loop below - // 2 x likely() here harms gcc 7.5 by about 8% rate drop, but only in O2 - for (i=0; likely(i < out_end); i+=4) { - // /curr code - // gcc7 O2 513/497 562/556++ 556/547 ok - // gcc7 O3 566/552 569/553 581/563+ - // gcc10 O2 544/538 563/547 541/537-? - // gcc10 O3 531/519 546/530 575/546+ - // gcc11 O2 512/490 588/540 540/535 mid - // gcc11 O3 482/471 553/541 549/535 - // gcc12 O2 533/526 544/534 539/535 - // gcc12 O3 548/533 502/497-- 553/527 ok - // clang10 555/542 564/549 560/541 - // clang13 560/553 572/559 556/559 - m[0] = R[0] & mask; - R[0] = sfreq[m[0]] * (R[0] >> TF_SHIFT) + sbase[m[0]]; - - m[1] = R[1] & mask; - R[1] = sfreq[m[1]] * (R[1] >> TF_SHIFT) + sbase[m[1]]; - - m[2] = R[2] & mask; - R[2] = sfreq[m[2]] * (R[2] >> TF_SHIFT) + sbase[m[2]]; - - m[3] = R[3] & mask; - R[3] = sfreq[m[3]] * (R[3] >> TF_SHIFT) + sbase[m[3]]; - - // likely() here harms gcc12 -O3 - if (cp>2)]]++; - F[0][in[2*(in_size>>2)]]++; - F[0][in[3*(in_size>>2)]]++; - T[0]+=3; - - - // Normalise so T[i] == TOTFREQ - for (rle_i = i = 0; i < 256; i++) { - int t2, m, M; - unsigned int x; - - if (T[i] == 0) - continue; - - //uint64_t p = (TOTFREQ * TOTFREQ) / t; - double p = ((double)TOTFREQ)/T[i]; - normalise_harder: - for (t2 = m = M = j = 0; j < 256; j++) { - if (!F[i][j]) - continue; - - if (m < F[i][j]) - m = F[i][j], M = j; - - //if ((F[i][j] = (F[i][j] * p) / TOTFREQ) == 0) - if ((F[i][j] *= p) == 0) - F[i][j] = 1; - t2 += F[i][j]; - } - - t2++; - if (t2 < TOTFREQ) { - F[i][M] += TOTFREQ-t2; - } else if (t2-TOTFREQ >= F[i][M]/2) { - // Corner case to avoid excessive frequency reduction - p = .98; goto normalise_harder; - } else { - F[i][M] -= t2-TOTFREQ; - } - - // Store frequency table - // i - if (rle_i) { - rle_i--; - } else { - *cp++ = i; - // FIXME: could use order-0 statistics to observe which alphabet - // symbols are present and base RLE on that ordering instead. - if (i && T[i-1]) { - for(rle_i=i+1; rle_i<256 && T[rle_i]; rle_i++) - ; - rle_i -= i+1; - *cp++ = rle_i; - } - } - - int *F_i_ = F[i]; - x = 0; - rle_j = 0; - for (j = 0; j < 256; j++) { - if (F_i_[j]) { - //fprintf(stderr, "F[%d][%d]=%d, x=%d\n", i, j, F_i_[j], x); - - // j - if (rle_j) { - rle_j--; - } else { - *cp++ = j; - if (!rle_j && j && F_i_[j-1]) { - for(rle_j=j+1; rle_j<256 && F_i_[rle_j]; rle_j++) - ; - rle_j -= j+1; - *cp++ = rle_j; - } - } - - // F_i_[j] - if (F_i_[j]<128) { - *cp++ = F_i_[j]; - } else { - *cp++ = 128 | (F_i_[j]>>8); - *cp++ = F_i_[j]&0xff; - } - - RansEncSymbolInit(&syms[i][j], x, F_i_[j], TF_SHIFT); - x += F_i_[j]; - } - } - *cp++ = 0; - } - *cp++ = 0; - - //write(2, out_buf+4, cp-(out_buf+4)); - tab_size = cp - out_buf; - assert(tab_size < 257*257*3); - - RansState rans0, rans1, rans2, rans3; - RansEncInit(&rans0); - RansEncInit(&rans1); - RansEncInit(&rans2); - RansEncInit(&rans3); - - uint8_t* ptr = out_end; - - int isz4 = in_size>>2; - int i0 = 1*isz4-2; - int i1 = 2*isz4-2; - int i2 = 3*isz4-2; - int i3 = 4*isz4-2; - - unsigned char l0 = in[i0+1]; - unsigned char l1 = in[i1+1]; - unsigned char l2 = in[i2+1]; - unsigned char l3 = in[i3+1]; - - // Deal with the remainder - l3 = in[in_size-1]; - for (i3 = in_size-2; i3 > 4*isz4-2; i3--) { - unsigned char c3 = in[i3]; - RansEncPutSymbol(&rans3, &ptr, &syms[c3][l3]); - l3 = c3; - } - - for (; likely(i0 >= 0); i0--, i1--, i2--, i3--) { - unsigned char c3 = in[i3]; - unsigned char c2 = in[i2]; - unsigned char c1 = in[i1]; - unsigned char c0 = in[i0]; - - RansEncSymbol *s3 = &syms[c3][l3]; - RansEncSymbol *s2 = &syms[c2][l2]; - RansEncSymbol *s1 = &syms[c1][l1]; - RansEncSymbol *s0 = &syms[c0][l0]; - - RansEncPutSymbol4(&rans3, &rans2, &rans1, &rans0, &ptr, - s3, s2, s1, s0); - - l3 = c3; - l2 = c2; - l1 = c1; - l0 = c0; - } - - RansEncPutSymbol(&rans3, &ptr, &syms[0][l3]); - RansEncPutSymbol(&rans2, &ptr, &syms[0][l2]); - RansEncPutSymbol(&rans1, &ptr, &syms[0][l1]); - RansEncPutSymbol(&rans0, &ptr, &syms[0][l0]); - - RansEncFlush(&rans3, &ptr); - RansEncFlush(&rans2, &ptr); - RansEncFlush(&rans1, &ptr); - RansEncFlush(&rans0, &ptr); - - *out_size = (out_end - ptr) + tab_size; - - cp = out_buf; - *cp++ = 1; // order - - *cp++ = ((*out_size-9)>> 0) & 0xff; - *cp++ = ((*out_size-9)>> 8) & 0xff; - *cp++ = ((*out_size-9)>>16) & 0xff; - *cp++ = ((*out_size-9)>>24) & 0xff; - - *cp++ = (in_size>> 0) & 0xff; - *cp++ = (in_size>> 8) & 0xff; - *cp++ = (in_size>>16) & 0xff; - *cp++ = (in_size>>24) & 0xff; - - memmove(out_buf + tab_size, ptr, out_end-ptr); - - cleanup: - htscodecs_tls_free(syms); - - return out_buf; -} - -static -unsigned char *rans_uncompress_O1(unsigned char *in, unsigned int in_size, - unsigned int *out_size) { - /* Load in the static tables */ - unsigned char *cp = in + 9; - unsigned char *ptr_end = in + in_size; - int i, j = -999, rle_i, rle_j; - unsigned int x; - unsigned int out_sz, in_sz; - char *out_buf = NULL; - - // Sanity checking - if (in_size < 27) // Need at least this many bytes to start - return NULL; - - if (*in++ != 1) // Order-1 check - return NULL; - - in_sz = ((in[0])<<0) | ((in[1])<<8) | ((in[2])<<16) | (((uint32_t)in[3])<<24); - out_sz = ((in[4])<<0) | ((in[5])<<8) | ((in[6])<<16) | (((uint32_t)in[7])<<24); - if (in_sz != in_size-9) - return NULL; - - if (out_sz >= INT_MAX) - return NULL; // protect against some overflow cases - - // For speeding up the fuzzer only. - // Small input can lead to large uncompressed data. - // We reject this as it just slows things up instead of testing more code - // paths (once we've verified a few times for large data). -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (out_sz > 100000) - return NULL; -#endif - - // Allocate decoding lookup tables - RansDecSymbol32 (*syms)[256]; - uint8_t *mem = htscodecs_tls_calloc(256, sizeof(ari_decoder) - + sizeof(*syms)); - if (!mem) - return NULL; - ari_decoder *const D = (ari_decoder *)mem; - syms = (RansDecSymbol32 (*)[256])(mem + 256*sizeof(ari_decoder)); - int16_t map[256], map_i = 0; - - memset(map, -1, 256*sizeof(*map)); - - if (!D) goto cleanup; - /* These memsets prevent illegal memory access in syms due to - broken compressed data. As D is calloc'd, all illegal transitions - will end up in either row or column 0 of syms. */ - memset(&syms[0], 0, sizeof(syms[0])); - for (i = 0; i < 256; i++) - memset(&syms[i][0], 0, sizeof(syms[0][0])); - - //fprintf(stderr, "out_sz=%d\n", out_sz); - - //i = *cp++; - rle_i = 0; - i = *cp++; - do { - // Map arbitrary a,b,c to 0,1,2 to improve cache locality. - if (map[i] == -1) - map[i] = map_i++; - int m_i = map[i]; - - rle_j = x = 0; - j = *cp++; - do { - if (map[j] == -1) - map[j] = map_i++; - - int F, C; - if (cp > ptr_end - 16) goto cleanup; // Not enough input bytes left - if ((F = *cp++) >= 128) { - F &= ~128; - F = ((F & 127) << 8) | *cp++; - } - C = x; - - //fprintf(stderr, "i=%d j=%d F=%d C=%d\n", i, j, F, C); - - if (unlikely(!F)) - F = TOTFREQ; - - RansDecSymbolInit32(&syms[m_i][j], C, F); - - /* Build reverse lookup table */ - //if (!D[i].R) D[i].R = (unsigned char *)malloc(TOTFREQ); - if (x + F > TOTFREQ) - goto cleanup; - - memset(&D[m_i].R[x], j, F); - x += F; - - if (!rle_j && j+1 == *cp) { - j = *cp++; - rle_j = *cp++; - } else if (rle_j) { - rle_j--; - j++; - if (j > 255) - goto cleanup; - } else { - j = *cp++; - } - } while(j); - - if (x < TOTFREQ-1 || x > TOTFREQ) - goto cleanup; - if (x < TOTFREQ) // historically we fill 4095, not 4096 - D[i].R[x] = D[i].R[x-1]; - - if (!rle_i && i+1 == *cp) { - i = *cp++; - rle_i = *cp++; - } else if (rle_i) { - rle_i--; - i++; - if (i > 255) - goto cleanup; - } else { - i = *cp++; - } - } while (i); - for (i = 0; i < 256; i++) - if (map[i] == -1) - map[i] = 0; - - RansState rans0, rans1, rans2, rans3; - uint8_t *ptr = cp; - if (cp > ptr_end - 16) goto cleanup; // Not enough input bytes left - RansDecInit(&rans0, &ptr); if (rans0 < RANS_BYTE_L) goto cleanup; - RansDecInit(&rans1, &ptr); if (rans1 < RANS_BYTE_L) goto cleanup; - RansDecInit(&rans2, &ptr); if (rans2 < RANS_BYTE_L) goto cleanup; - RansDecInit(&rans3, &ptr); if (rans3 < RANS_BYTE_L) goto cleanup; - - RansState R[4]; - R[0] = rans0; - R[1] = rans1; - R[2] = rans2; - R[3] = rans3; - - unsigned int isz4 = out_sz>>2; - uint32_t l0 = 0; - uint32_t l1 = 0; - uint32_t l2 = 0; - uint32_t l3 = 0; - - unsigned int i4[] = {0*isz4, 1*isz4, 2*isz4, 3*isz4}; - - /* Allocate output buffer */ - out_buf = malloc(out_sz); - if (!out_buf) goto cleanup; - - uint8_t cc0 = D[map[l0]].R[R[0] & ((1u << TF_SHIFT)-1)]; - uint8_t cc1 = D[map[l1]].R[R[1] & ((1u << TF_SHIFT)-1)]; - uint8_t cc2 = D[map[l2]].R[R[2] & ((1u << TF_SHIFT)-1)]; - uint8_t cc3 = D[map[l3]].R[R[3] & ((1u << TF_SHIFT)-1)]; - - ptr_end -= 8; - for (; likely(i4[0] < isz4); i4[0]++, i4[1]++, i4[2]++, i4[3]++) { - // seq4-head2: file q40b - // O3 O2 - // gcc7 296/291 290/260 - // gcc10 292/292 290/261 - // gcc11 293/293 290/265 - // gcc12 293/290 291/266 - // clang10 293/290 296/272 - // clang13 300/290 290/266 - out_buf[i4[0]] = cc0; - out_buf[i4[1]] = cc1; - out_buf[i4[2]] = cc2; - out_buf[i4[3]] = cc3; - - RansDecSymbol32 s[4] = { - syms[l0][cc0], - syms[l1][cc1], - syms[l2][cc2], - syms[l3][cc3], - }; - RansDecAdvanceStep(&R[0], s[0].start, s[0].freq, TF_SHIFT); - RansDecAdvanceStep(&R[1], s[1].start, s[1].freq, TF_SHIFT); - RansDecAdvanceStep(&R[2], s[2].start, s[2].freq, TF_SHIFT); - RansDecAdvanceStep(&R[3], s[3].start, s[3].freq, TF_SHIFT); - - // Likely here helps speed of high-entropy data by 10-11%, - // but harms low entropy-data speed by 3-4%. - if ((ptr < ptr_end)) { - RansDecRenorm2(&R[0], &R[1], &ptr); - RansDecRenorm2(&R[2], &R[3], &ptr); - } else { - RansDecRenormSafe(&R[0], &ptr, ptr_end+8); - RansDecRenormSafe(&R[1], &ptr, ptr_end+8); - RansDecRenormSafe(&R[2], &ptr, ptr_end+8); - RansDecRenormSafe(&R[3], &ptr, ptr_end+8); - } - - l0 = map[cc0]; - l1 = map[cc1]; - l2 = map[cc2]; - l3 = map[cc3]; - - cc0 = D[l0].R[R[0] & ((1u << TF_SHIFT)-1)]; - cc1 = D[l1].R[R[1] & ((1u << TF_SHIFT)-1)]; - cc2 = D[l2].R[R[2] & ((1u << TF_SHIFT)-1)]; - cc3 = D[l3].R[R[3] & ((1u << TF_SHIFT)-1)]; - } - - // Remainder - for (; i4[3] < out_sz; i4[3]++) { - unsigned char c3 = D[l3].R[RansDecGet(&R[3], TF_SHIFT)]; - out_buf[i4[3]] = c3; - - uint32_t m = R[3] & ((1u << TF_SHIFT)-1); - R[3] = syms[l3][c3].freq * (R[3]>>TF_SHIFT) + m - syms[l3][c3].start; - RansDecRenormSafe(&R[3], &ptr, ptr_end+8); - l3 = map[c3]; - } - - *out_size = out_sz; - - cleanup: - htscodecs_tls_free(D); - - return (unsigned char *)out_buf; -} - -/*----------------------------------------------------------------------------- - * Simple interface to the order-0 vs order-1 encoders and decoders. - */ -unsigned char *rans_compress(unsigned char *in, unsigned int in_size, - unsigned int *out_size, int order) { - if (in_size > INT_MAX) { - *out_size = 0; - return NULL; - } - - return order - ? rans_compress_O1(in, in_size, out_size) - : rans_compress_O0(in, in_size, out_size); -} - -unsigned char *rans_uncompress(unsigned char *in, unsigned int in_size, - unsigned int *out_size) { - /* Both rans_uncompress functions need to be able to read at least 9 - bytes. */ - if (in_size < 9) - return NULL; - return in[0] - ? rans_uncompress_O1(in, in_size, out_size) - : rans_uncompress_O0(in, in_size, out_size); -} diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/rANS_static.h b/src/htslib-1.19.1/htscodecs/htscodecs/rANS_static.h deleted file mode 100644 index 357f46e..0000000 --- a/src/htslib-1.19.1/htscodecs/htscodecs/rANS_static.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2014-2019 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef RANS_STATIC_H -#define RANS_STATIC_H - -#ifdef __cplusplus -extern "C" { -#endif - -unsigned char *rans_compress(unsigned char *in, unsigned int in_size, - unsigned int *out_size, int order); -unsigned char *rans_uncompress(unsigned char *in, unsigned int in_size, - unsigned int *out_size); - -#ifdef __cplusplus -} -#endif - -#endif /* RANS_STATIC_H */ diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/rANS_static32x16pr.c b/src/htslib-1.19.1/htscodecs/htscodecs/rANS_static32x16pr.c deleted file mode 100644 index 51ea554..0000000 --- a/src/htslib-1.19.1/htscodecs/htscodecs/rANS_static32x16pr.c +++ /dev/null @@ -1,758 +0,0 @@ -/* - * Copyright (c) 2017-2023 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include -#include -#include -#include -#include -#include - -#include "rANS_word.h" -#include "rANS_static4x16.h" -#include "rANS_static16_int.h" -#include "varint.h" -#include "utils.h" - -#define TF_SHIFT 12 -#define TOTFREQ (1< *out_size) - return NULL; - - // If "out" isn't word aligned, tweak out_end/ptr to ensure it is. - // We already added more round in bound to allow for this. - if (((size_t)out)&1) - bound--; - ptr = out_end = out + bound; - - if (in_size == 0) - goto empty; - - // Compute statistics - double e = hist8e(in, in_size, F); - int low_ent = e < 2; - - // Normalise so frequences sum to power of 2 - uint32_t fsum = in_size; - uint32_t max_val = round2(fsum); - if (max_val > TOTFREQ) - max_val = TOTFREQ; - - if (normalise_freq(F, fsum, max_val) < 0) { - free(out_free); - return NULL; - } - fsum=max_val; - - cp = out; - cp += encode_freq(cp, F); - tab_size = cp-out; - //write(2, out+4, cp-(out+4)); - - if (normalise_freq(F, fsum, TOTFREQ) < 0) { - free(out_free); - return NULL; - } - - // Encode statistics. - for (x = j = 0; j < 256; j++) { - if (F[j]) { - RansEncSymbolInit(&syms[j], x, F[j], TF_SHIFT); - x += F[j]; - } - } - - for (z = 0; z < NX; z++) - RansEncInit(&ransN[z]); - - z = i = in_size&(NX-1); - while (z-- > 0) - RansEncPutSymbol(&ransN[z], &ptr, &syms[in[in_size-(i-z)]]); - - if (low_ent) { - // orig - // gcc 446 - // clang 427 - for (i=(in_size &~(NX-1)); likely(i>0); i-=NX) { - for (z = NX-1; z >= 0; z-=4) { - RansEncSymbol *s0 = &syms[in[i-(NX-z+0)]]; - RansEncSymbol *s1 = &syms[in[i-(NX-z+1)]]; - RansEncSymbol *s2 = &syms[in[i-(NX-z+2)]]; - RansEncSymbol *s3 = &syms[in[i-(NX-z+3)]]; - RansEncPutSymbol_branched(&ransN[z-0], &ptr, s0); - RansEncPutSymbol_branched(&ransN[z-1], &ptr, s1); - RansEncPutSymbol_branched(&ransN[z-2], &ptr, s2); - RansEncPutSymbol_branched(&ransN[z-3], &ptr, s3); - if (NX%8 == 0) { - z -= 4; - RansEncSymbol *s0 = &syms[in[i-(NX-z+0)]]; - RansEncSymbol *s1 = &syms[in[i-(NX-z+1)]]; - RansEncSymbol *s2 = &syms[in[i-(NX-z+2)]]; - RansEncSymbol *s3 = &syms[in[i-(NX-z+3)]]; - RansEncPutSymbol_branched(&ransN[z-0], &ptr, s0); - RansEncPutSymbol_branched(&ransN[z-1], &ptr, s1); - RansEncPutSymbol_branched(&ransN[z-2], &ptr, s2); - RansEncPutSymbol_branched(&ransN[z-3], &ptr, s3); - } - } - if (z < -1) abort(); - } - } else { - // Branchless version optimises poorly with gcc unless we have - // AVX2 capability, so have a custom rewrite of it. - uint16_t* ptr16 = (uint16_t *)ptr; - for (i=(in_size &~(NX-1)); likely(i>0); i-=NX) { - // Unrolled copy of below, because gcc doesn't optimise this - // well in the original form. - // - // Gcc11: 328 MB/s (this) vs 208 MB/s (orig) - // Clang10: 352 MB/s (this) vs 340 MB/s (orig) - // - // for (z = NX-1; z >= 0; z-=4) { - // RansEncSymbol *s0 = &syms[in[i-(NX-z+0)]]; - // RansEncSymbol *s1 = &syms[in[i-(NX-z+1)]]; - // RansEncSymbol *s2 = &syms[in[i-(NX-z+2)]]; - // RansEncSymbol *s3 = &syms[in[i-(NX-z+3)]]; - // RansEncPutSymbol(&ransN[z-0], &ptr, s0); - // RansEncPutSymbol(&ransN[z-1], &ptr, s1); - // RansEncPutSymbol(&ransN[z-2], &ptr, s2); - // RansEncPutSymbol(&ransN[z-3], &ptr, s3); - // } - - for (z = NX-1; z >= 0; z-=4) { - // RansEncPutSymbol added in-situ - RansState *rp = &ransN[z]-3; - RansEncSymbol *sy[4]; - uint8_t *C = &in[i-(NX-z)]-3; - - sy[0] = &syms[C[3]]; - sy[1] = &syms[C[2]]; - - int c0 = rp[3-0] > sy[0]->x_max; - int c1 = rp[3-1] > sy[1]->x_max; - -#ifdef HTSCODECS_LITTLE_ENDIAN - ptr16[-1] = rp[3-0]; ptr16 -= c0; - ptr16[-1] = rp[3-1]; ptr16 -= c1; -#else - ((uint8_t *)&ptr16[-1])[0] = rp[3-0]; - ((uint8_t *)&ptr16[-1])[1] = rp[3-0]>>8; - ptr16 -= c0; - ((uint8_t *)&ptr16[-1])[0] = rp[3-1]; - ((uint8_t *)&ptr16[-1])[1] = rp[3-1]>>8; - ptr16 -= c1; -#endif - - rp[3-0] = c0 ? rp[3-0]>>16 : rp[3-0]; - rp[3-1] = c1 ? rp[3-1]>>16 : rp[3-1]; - - sy[2] = &syms[C[1]]; - sy[3] = &syms[C[0]]; - - int c2 = rp[3-2] > sy[2]->x_max; - int c3 = rp[3-3] > sy[3]->x_max; -#ifdef HTSCODECS_LITTLE_ENDIAN - ptr16[-1] = rp[3-2]; ptr16 -= c2; - ptr16[-1] = rp[3-3]; ptr16 -= c3; -#else - ((uint8_t *)&ptr16[-1])[0] = rp[3-2]; - ((uint8_t *)&ptr16[-1])[1] = rp[3-2]>>8; - ptr16 -= c2; - ((uint8_t *)&ptr16[-1])[0] = rp[3-3]; - ((uint8_t *)&ptr16[-1])[1] = rp[3-3]>>8; - ptr16 -= c3; -#endif - rp[3-2] = c2 ? rp[3-2]>>16 : rp[3-2]; - rp[3-3] = c3 ? rp[3-3]>>16 : rp[3-3]; - - int k; - for (k = 0; k < 4; k++) { - uint64_t r64 = (uint64_t)rp[3-k]; - uint32_t q = (r64 * sy[k]->rcp_freq) >> sy[k]->rcp_shift; - rp[3-k] += sy[k]->bias + q*sy[k]->cmpl_freq; - } - } - if (z < -1) abort(); - } - ptr = (uint8_t *)ptr16; - } - for (z = NX-1; z >= 0; z--) - RansEncFlush(&ransN[z], &ptr); - - empty: - // Finalise block size and return it - *out_size = (out_end - ptr) + tab_size; - - memmove(out + tab_size, ptr, out_end-ptr); - - return out; -} - -unsigned char *rans_uncompress_O0_32x16(unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int out_sz) { - if (in_size < 16) // 4-states at least - return NULL; - - if (out_sz >= INT_MAX) - return NULL; // protect against some overflow cases - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (out_sz > 100000) - return NULL; -#endif - - /* Load in the static tables */ - unsigned char *cp = in, *out_free = NULL; - unsigned char *cp_end = in + in_size; - int i; - uint32_t s3[TOTFREQ]; // For TF_SHIFT <= 12 - - if (!out) - out_free = out = malloc(out_sz); - if (!out) - return NULL; - - // Precompute reverse lookup of frequency. - uint32_t F[256] = {0}, fsum; - int fsz = decode_freq(cp, cp_end, F, &fsum); - if (!fsz) - goto err; - cp += fsz; - - normalise_freq_shift(F, fsum, TOTFREQ); - - // Build symbols; fixme, do as part of decode, see the _d variant - if (rans_F_to_s3(F, TF_SHIFT, s3)) - goto err; - - if (cp_end - cp < NX * 4) - goto err; - - int z; - RansState R[NX]; - for (z = 0; z < NX; z++) { - RansDecInit(&R[z], &cp); - if (R[z] < RANS_BYTE_L) - goto err; - } - - int out_end = (out_sz&~(NX-1)); - const uint32_t mask = (1u << TF_SHIFT)-1; - cp_end -= NX*2; // worst case for renorm bytes - - // assume NX is divisible by 4 - assert(NX%4==0); - - // Unsafe loop with no ptr overflow checking within loop itself - for (i=0; likely(i < out_end && cp < cp_end); i+=NX) { - for (z = 0; z < NX; z+=4) { - uint32_t S[4]; - S[0] = s3[R[z+0] & mask]; - S[1] = s3[R[z+1] & mask]; - S[2] = s3[R[z+2] & mask]; - S[3] = s3[R[z+3] & mask]; - - R[z+0] = (S[0]>>(TF_SHIFT+8)) * (R[z+0] >> TF_SHIFT) - + ((S[0]>>8) & mask); - R[z+1] = (S[1]>>(TF_SHIFT+8)) * (R[z+1] >> TF_SHIFT) - + ((S[1]>>8) & mask); - R[z+2] = (S[2]>>(TF_SHIFT+8)) * (R[z+2] >> TF_SHIFT) - + ((S[2]>>8) & mask); - R[z+3] = (S[3]>>(TF_SHIFT+8)) * (R[z+3] >> TF_SHIFT) - + ((S[3]>>8) & mask); - - out[i+z+0] = S[0]; - out[i+z+1] = S[1]; - out[i+z+2] = S[2]; - out[i+z+3] = S[3]; - - RansDecRenorm(&R[z+0], &cp); - RansDecRenorm(&R[z+1], &cp); - RansDecRenorm(&R[z+2], &cp); - RansDecRenorm(&R[z+3], &cp); - - if (NX%8==0) { - z += 4; - S[0] = s3[R[z+0] & mask]; - S[1] = s3[R[z+1] & mask]; - S[2] = s3[R[z+2] & mask]; - S[3] = s3[R[z+3] & mask]; - - R[z+0] = (S[0]>>(TF_SHIFT+8)) * (R[z+0] >> TF_SHIFT) - + ((S[0]>>8) & mask); - R[z+1] = (S[1]>>(TF_SHIFT+8)) * (R[z+1] >> TF_SHIFT) - + ((S[1]>>8) & mask); - R[z+2] = (S[2]>>(TF_SHIFT+8)) * (R[z+2] >> TF_SHIFT) - + ((S[2]>>8) & mask); - R[z+3] = (S[3]>>(TF_SHIFT+8)) * (R[z+3] >> TF_SHIFT) - + ((S[3]>>8) & mask); - - out[i+z+0] = S[0]; - out[i+z+1] = S[1]; - out[i+z+2] = S[2]; - out[i+z+3] = S[3]; - - RansDecRenorm(&R[z+0], &cp); - RansDecRenorm(&R[z+1], &cp); - RansDecRenorm(&R[z+2], &cp); - RansDecRenorm(&R[z+3], &cp); - } - } - } - - // Safe loop - for (; i < out_end; i+=NX) { - for (z = 0; z < NX; z+=4) { - uint32_t S[4]; - S[0] = s3[R[z+0] & mask]; - S[1] = s3[R[z+1] & mask]; - S[2] = s3[R[z+2] & mask]; - S[3] = s3[R[z+3] & mask]; - - R[z+0] = (S[0]>>(TF_SHIFT+8)) * (R[z+0] >> TF_SHIFT) - + ((S[0]>>8) & mask); - R[z+1] = (S[1]>>(TF_SHIFT+8)) * (R[z+1] >> TF_SHIFT) - + ((S[1]>>8) & mask); - R[z+2] = (S[2]>>(TF_SHIFT+8)) * (R[z+2] >> TF_SHIFT) - + ((S[2]>>8) & mask); - R[z+3] = (S[3]>>(TF_SHIFT+8)) * (R[z+3] >> TF_SHIFT) - + ((S[3]>>8) & mask); - - out[i+z+0] = S[0]; - out[i+z+1] = S[1]; - out[i+z+2] = S[2]; - out[i+z+3] = S[3]; - - RansDecRenormSafe(&R[z+0], &cp, cp_end+NX*2); - RansDecRenormSafe(&R[z+1], &cp, cp_end+NX*2); - RansDecRenormSafe(&R[z+2], &cp, cp_end+NX*2); - RansDecRenormSafe(&R[z+3], &cp, cp_end+NX*2); - } - } - - for (z = out_sz & (NX-1); z-- > 0; ) - out[out_end + z] = s3[R[z] & mask]; - - //fprintf(stderr, " 0 Decoded %d bytes\n", (int)(cp-in)); //c-size - - return out; - - err: - free(out_free); - return NULL; -} - - -//----------------------------------------------------------------------------- -unsigned char *rans_compress_O1_32x16(unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int *out_size) { - unsigned char *cp, *out_end, *out_free = NULL; - unsigned int tab_size; - int bound = rans_compress_bound_4x16(in_size,1)-20, z; - RansState ransN[NX]; - - if (in_size < NX) // force O0 instead - return NULL; - - if (!out) { - *out_size = bound; - out_free = out = malloc(*out_size); - } - if (!out || bound > *out_size) - return NULL; - - if (((size_t)out)&1) - bound--; - out_end = out + bound; - - RansEncSymbol (*syms)[256] = htscodecs_tls_alloc(256 * (sizeof(*syms))); - if (!syms) { - free(out_free); - return NULL; - } - - cp = out; - int shift = encode_freq1(in, in_size, 32, syms, &cp); - if (shift < 0) { - free(out_free); - htscodecs_tls_free(syms); - return NULL; - } - tab_size = cp - out; - - for (z = 0; z < NX; z++) - RansEncInit(&ransN[z]); - - uint8_t* ptr = out_end; - - int iN[NX], isz4 = in_size/NX, i; - for (z = 0; z < NX; z++) - iN[z] = (z+1)*isz4-2; - - unsigned char lN[NX]; - for (z = 0; z < NX; z++) - lN[z] = in[iN[z]+1]; - - // Deal with the remainder - z = NX-1; - lN[z] = in[in_size-1]; - for (iN[z] = in_size-2; iN[z] > NX*isz4-2; iN[z]--) { - unsigned char c = in[iN[z]]; - RansEncPutSymbol(&ransN[z], &ptr, &syms[c][lN[z]]); - lN[z] = c; - } - - unsigned char *i32[NX]; - for (i = 0; i < NX; i++) - i32[i] = &in[iN[i]]; - - for (; likely(i32[0] >= in); ) { - uint16_t *ptr16 = (uint16_t *)ptr; - for (z = NX-1; z >= 0; z-=4) { - RansEncSymbol *sy[4]; - int k; - - for (k = 0; k < 4; k++) { - sy[k] = &syms[*i32[z-k]][lN[z-k]]; - lN[z-k] = *i32[z-k]--; - } - - // RansEncPutSymbol added in-situ - for (k = 0; k < 4; k++) { - int c = ransN[z-k] > sy[k]->x_max; -#ifdef HTSCODECS_LITTLE_ENDIAN - ptr16[-1] = ransN[z-k]; -#else - ((uint8_t *)&ptr16[-1])[0] = ransN[z-k]; - ((uint8_t *)&ptr16[-1])[1] = ransN[z-k]>>8; -#endif - ptr16 -= c; - //ransN[z-k] >>= c<<4; - ransN[z-k] = c ? ransN[z-k]>>16 : ransN[z-k]; - } - - for (k = 0; k < 4; k++) { - uint64_t r64 = ransN[z-k]; - uint32_t q = (r64 * sy[k]->rcp_freq) >> sy[k]->rcp_shift; - ransN[z-k] += sy[k]->bias + q*sy[k]->cmpl_freq; - } - } - ptr = (uint8_t *)ptr16; - } - - for (z = NX-1; z>=0; z--) - RansEncPutSymbol(&ransN[z], &ptr, &syms[0][lN[z]]); - - for (z = NX-1; z>=0; z--) - RansEncFlush(&ransN[z], &ptr); - - *out_size = (out_end - ptr) + tab_size; - - cp = out; - memmove(out + tab_size, ptr, out_end-ptr); - - htscodecs_tls_free(syms); - return out; -} - -//#define MAGIC2 111 -#define MAGIC2 179 -//#define MAGIC2 0 - -unsigned char *rans_uncompress_O1_32x16(unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int out_sz) { - if (in_size < NX*4) // 4-states at least - return NULL; - - if (out_sz >= INT_MAX) - return NULL; // protect against some overflow cases - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (out_sz > 100000) - return NULL; -#endif - - /* Load in the static tables */ - unsigned char *cp = in, *cp_end = in+in_size, *out_free = NULL; - unsigned char *c_freq = NULL; - int i; - - /* - * Somewhat complex memory layout. - * With shift==12 (TF_SHIFT_O1) we fill out use both sfb and fb. - * With shift==10 (...O1_FAST) we fill out and use s3 only. - * - * sfb+fb is larger, therefore we allocate this much memory. - */ - uint8_t *sfb_ = htscodecs_tls_alloc(256* - ((TOTFREQ_O1+MAGIC2)*sizeof(*sfb_) - +256 * sizeof(fb_t))); - if (!sfb_) - return NULL; - - // sfb and fb are consecutive - uint8_t *sfb[257]; - if ((*cp >> 4) == TF_SHIFT_O1) { - for (i = 0; i <= 256; i++) - sfb[i]= sfb_ + i*(TOTFREQ_O1+MAGIC2); - } else { - for (i = 0; i <= 256; i++) - sfb[i]= sfb_ + i*(TOTFREQ_O1_FAST+MAGIC2); - } - fb_t (*fb)[256] = (fb_t (*)[256]) sfb[256]; - - // NOTE: s3 overlaps sfb/fb - uint32_t (*s3)[TOTFREQ_O1_FAST] = (uint32_t (*)[TOTFREQ_O1_FAST])sfb_; - - if (!out) - out_free = out = malloc(out_sz); - - if (!out) - goto err; - - //fprintf(stderr, "out_sz=%d\n", out_sz); - - // compressed header? If so uncompress it - unsigned char *tab_end = NULL; - unsigned char *c_freq_end = cp_end; - unsigned int shift = *cp >> 4; - if (*cp++ & 1) { - uint32_t u_freq_sz, c_freq_sz; - cp += var_get_u32(cp, cp_end, &u_freq_sz); - cp += var_get_u32(cp, cp_end, &c_freq_sz); - if (c_freq_sz > cp_end - cp) - goto err; - tab_end = cp + c_freq_sz; - if (!(c_freq = rans_uncompress_O0_4x16(cp, c_freq_sz, NULL,u_freq_sz))) - goto err; - cp = c_freq; - c_freq_end = c_freq + u_freq_sz; - } - - // Decode order-0 symbol list; avoids needing in order-1 tables - cp += decode_freq1(cp, c_freq_end, shift, NULL, s3, sfb, fb); - - if (tab_end) - cp = tab_end; - free(c_freq); - c_freq = NULL; - - if (cp_end - cp < NX * 4) - goto err; - - RansState R[NX]; - uint8_t *ptr = cp, *ptr_end = in + in_size - 2*NX; - int z; - for (z = 0; z < NX; z++) { - RansDecInit(&R[z], &ptr); - if (R[z] < RANS_BYTE_L) - goto err; - } - - int isz4 = out_sz/NX; - int i4[NX], l[NX] = {0}; - for (z = 0; z < NX; z++) - i4[z] = z*isz4; - - const int low_ent = in_size < 0.2 * out_sz; - - // Around 15% faster to specialise for 10/12 than to have one - // loop with shift as a variable. - if (shift == TF_SHIFT_O1) { - // TF_SHIFT_O1 = 12 - const uint32_t mask = ((1u << TF_SHIFT_O1)-1); - for (; likely(i4[0] < isz4);) { - for (z = 0; z < NX; z+=4) { - uint16_t m[4], c[4]; - - c[0] = sfb[l[z+0]][m[0] = R[z+0] & mask]; - c[1] = sfb[l[z+1]][m[1] = R[z+1] & mask]; - c[2] = sfb[l[z+2]][m[2] = R[z+2] & mask]; - c[3] = sfb[l[z+3]][m[3] = R[z+3] & mask]; - - R[z+0] = fb[l[z+0]][c[0]].f * (R[z+0]>>TF_SHIFT_O1); - R[z+0] += m[0] - fb[l[z+0]][c[0]].b; - - R[z+1] = fb[l[z+1]][c[1]].f * (R[z+1]>>TF_SHIFT_O1); - R[z+1] += m[1] - fb[l[z+1]][c[1]].b; - - R[z+2] = fb[l[z+2]][c[2]].f * (R[z+2]>>TF_SHIFT_O1); - R[z+2] += m[2] - fb[l[z+2]][c[2]].b; - - R[z+3] = fb[l[z+3]][c[3]].f * (R[z+3]>>TF_SHIFT_O1); - R[z+3] += m[3] - fb[l[z+3]][c[3]].b; - - out[i4[z+0]++] = l[z+0] = c[0]; - out[i4[z+1]++] = l[z+1] = c[1]; - out[i4[z+2]++] = l[z+2] = c[2]; - out[i4[z+3]++] = l[z+3] = c[3]; - - if (!low_ent && likely(ptr < ptr_end)) { - RansDecRenorm(&R[z+0], &ptr); - RansDecRenorm(&R[z+1], &ptr); - RansDecRenorm(&R[z+2], &ptr); - RansDecRenorm(&R[z+3], &ptr); - } else { - RansDecRenormSafe(&R[z+0], &ptr, ptr_end+2*NX); - RansDecRenormSafe(&R[z+1], &ptr, ptr_end+2*NX); - RansDecRenormSafe(&R[z+2], &ptr, ptr_end+2*NX); - RansDecRenormSafe(&R[z+3], &ptr, ptr_end+2*NX); - } - } - } - - // Remainder - for (; i4[NX-1] < out_sz; i4[NX-1]++) { - uint32_t m = R[NX-1] & ((1u<>TF_SHIFT_O1) + - m - fb[l[NX-1]][c].b; - RansDecRenormSafe(&R[NX-1], &ptr, ptr_end + 2*NX); - l[NX-1] = c; - } - } else { - // TF_SHIFT_O1 = 10 - const uint32_t mask = ((1u << TF_SHIFT_O1_FAST)-1); - for (; likely(i4[0] < isz4);) { - for (z = 0; z < NX; z+=4) { - // Merged sfb and fb into single s3 lookup. - // The m[4] array completely vanishes in this method. - uint32_t S[4] = { - s3[l[z+0]][R[z+0] & mask], - s3[l[z+1]][R[z+1] & mask], - s3[l[z+2]][R[z+2] & mask], - s3[l[z+3]][R[z+3] & mask], - }; - - l[z+0] = out[i4[z+0]++] = S[0]; - l[z+1] = out[i4[z+1]++] = S[1]; - l[z+2] = out[i4[z+2]++] = S[2]; - l[z+3] = out[i4[z+3]++] = S[3]; - - uint32_t F[4] = { - S[0]>>(TF_SHIFT_O1_FAST+8), - S[1]>>(TF_SHIFT_O1_FAST+8), - S[2]>>(TF_SHIFT_O1_FAST+8), - S[3]>>(TF_SHIFT_O1_FAST+8), - }; - uint32_t B[4] = { - (S[0]>>8) & mask, - (S[1]>>8) & mask, - (S[2]>>8) & mask, - (S[3]>>8) & mask, - }; - - R[z+0] = F[0] * (R[z+0]>>TF_SHIFT_O1_FAST) + B[0]; - R[z+1] = F[1] * (R[z+1]>>TF_SHIFT_O1_FAST) + B[1]; - R[z+2] = F[2] * (R[z+2]>>TF_SHIFT_O1_FAST) + B[2]; - R[z+3] = F[3] * (R[z+3]>>TF_SHIFT_O1_FAST) + B[3]; - - if (!low_ent && (ptr < ptr_end)) { - // branchless & asm - RansDecRenorm(&R[z+0], &ptr); - RansDecRenorm(&R[z+1], &ptr); - RansDecRenorm(&R[z+2], &ptr); - RansDecRenorm(&R[z+3], &ptr); - } else { - // branched, but better when predictable - RansDecRenormSafe(&R[z+0], &ptr, ptr_end+2*NX); - RansDecRenormSafe(&R[z+1], &ptr, ptr_end+2*NX); - RansDecRenormSafe(&R[z+2], &ptr, ptr_end+2*NX); - RansDecRenormSafe(&R[z+3], &ptr, ptr_end+2*NX); - } - } - } - - // Remainder - for (; i4[NX-1] < out_sz; i4[NX-1]++) { - uint32_t S = s3[l[NX-1]][R[NX-1] & ((1u<>(TF_SHIFT_O1_FAST+8)) * (R[NX-1]>>TF_SHIFT_O1_FAST) - + ((S>>8) & ((1u< - -#include -#include - -#include "rANS_word.h" -#include "rANS_static4x16.h" -#include "rANS_static16_int.h" -#include "varint.h" -#include "utils.h" - -#define NX 32 - -// TODO: get access to MVE architecture so we can tune for the newer -// SIMD instructions. -// -// #if __ARM_FEATURE_MVE & 1 -// #include // Helium, eg for use of vcreateq_u32 -// #endif - -#define _ 99 -static uint8x8_t vtab[16] = { - {_,_, _,_, _,_, _,_ }, - {_,_, _,_, _,_, 12,13}, - {_,_, _,_, _,_, 8,9 }, - {_,_, _,_, 8,9, 12,13}, - {_,_, _,_, _,_, 4,5 }, - {_,_, _,_, 4,5, 12,13}, - {_,_, _,_, 4,5, 8,9 }, - {_,_, 4,5, 8,9, 12,13}, - {_,_, _,_, _,_, 0,1 }, - {_,_, _,_, 0,1, 12,13}, - {_,_, _,_, 0,1, 8,9 }, - {_,_, 0,1 , 8,9, 12,13}, - {_,_, _,_, 0,1, 4,5 }, - {_,_, 0,1, 4,5, 12,13}, - {_,_, 0,1, 4,5, 8,9 }, - {0,1, 4,5, 8,9, 12,13}, -}; -#undef _ - -unsigned char *rans_compress_O0_32x16_neon(unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int *out_size) { - unsigned char *cp, *out_end; - RansEncSymbol syms[256]; - RansState R[NX]; - uint8_t* ptr; - uint32_t F[256+MAGIC] = {0}; - int i, j, tab_size = 0, x, z; - // -20 for order/size/meta - uint32_t bound = rans_compress_bound_4x16(in_size,0)-20; - - if (!out) { - *out_size = bound; - out = malloc(*out_size); - } - if (!out || bound > *out_size) - return NULL; - - // If "out" isn't word aligned, tweak out_end/ptr to ensure it is. - // We already added more round in bound to allow for this. - if (((size_t)out)&1) - bound--; - ptr = out_end = out + bound; - - if (in_size == 0) - goto empty; - - // Compute statistics - if (hist8(in, in_size, F) < 0) - return NULL; - - // Normalise so frequences sum to power of 2 - uint32_t fsum = in_size; - uint32_t max_val = round2(fsum); - if (max_val > TOTFREQ) - max_val = TOTFREQ; - - if (normalise_freq(F, fsum, max_val) < 0) - return NULL; - fsum=max_val; - - cp = out; - cp += encode_freq(cp, F); - tab_size = cp-out; - //write(2, out+4, cp-(out+4)); - - if (normalise_freq(F, fsum, TOTFREQ) < 0) - return NULL; - - // Encode statistics. - for (x = j = 0; j < 256; j++) { - if (F[j]) { - RansEncSymbolInit(&syms[j], x, F[j], TF_SHIFT); - x += F[j]; - } - } - - for (z = 0; z < NX; z++) - RansEncInit(&R[z]); - - z = i = in_size&(NX-1); - while (z-- > 0) - RansEncPutSymbol(&R[z], &ptr, &syms[in[in_size-(i-z)]]); - - for (i=(in_size &~(NX-1)); i>0; i-=NX) { -// // Scalar equivalent -// for (z = NX-1; z >= 0; z-=4) { -// // 327 / 272 -// RansEncSymbol *s0 = &syms[in[i-(NX-z+0)]]; -// RansEncSymbol *s1 = &syms[in[i-(NX-z+1)]]; -// RansEncSymbol *s2 = &syms[in[i-(NX-z+2)]]; -// RansEncSymbol *s3 = &syms[in[i-(NX-z+3)]]; -// -// RansEncPutSymbol(&R[z-0], &ptr, s0); -// RansEncPutSymbol(&R[z-1], &ptr, s1); -// RansEncPutSymbol(&R[z-2], &ptr, s2); -// RansEncPutSymbol(&R[z-3], &ptr, s3); -// } - - // SIMD with 16-way unrolling - for (z = NX-1; z >= 0; z-=8) { - RansEncSymbol *s0 = &syms[in[i-(NX-z+0)]]; - RansEncSymbol *s1 = &syms[in[i-(NX-z+1)]]; - RansEncSymbol *s2 = &syms[in[i-(NX-z+2)]]; - RansEncSymbol *s3 = &syms[in[i-(NX-z+3)]]; - - RansEncSymbol *s4 = &syms[in[i-(NX-z+4)]]; - RansEncSymbol *s5 = &syms[in[i-(NX-z+5)]]; - RansEncSymbol *s6 = &syms[in[i-(NX-z+6)]]; - RansEncSymbol *s7 = &syms[in[i-(NX-z+7)]]; - - uint32x4_t Rv1 = vld1q_u32(&R[z-3]); - uint32x4_t Rv2 = vld1q_u32(&R[z-7]); - - // Sym bit sizes = 128bits - // 32: x_max - // 32: rcp_freq - // 32: bias - // 16: cmpl_freq - // 16: rcp_shift - - // Load and shuffle around - // A <---Xmax---><---RFreq--><---Bias---><-cf-><-rs-> - // B <---Xmax---><---RFreq--><---Bias---><-cf-><-rs-> - // C <---Xmax---><---RFreq--><---Bias---><-cf-><-rs-> - // D <---Xmax---><---RFreq--><---Bias---><-cf-><-rs-> - // vtrn1q_u32 vtrn2q_u32 (A1 = A+B) - // A1 <---Xmax---><---Xmax---><---Bias---><---Bias---> - // C1 <---Xmax---><---Xmax---><---Bias---><---Bias---> - // A2 <---RFreq--><---RFreq--><-cf-><-rs-><-cf-><-rs-> - // C2 <---RFreq--><---RFreq--><-cf-><-rs-><-cf-><-rs-> - // vtrn1q_u64 vtrn2q_u64 (A11 = A1+C1) - // A11 <---Xmax---><---Xmax---><---Xmax---><---Xmax---> - // A12 <---Bias---><---Bias---><---Bias---><---Bias---> - // A21 <---RFreq--><---RFreq--><---RFreq--><---RFreq--> - // A22 <-cf-><-rs-><-cf-><-rs-><-cf-><-rs-><-cf-><-rs-> - uint32x4_t A_1 = vld1q_u32((void *)s3); - uint32x4_t B_1 = vld1q_u32((void *)s2); - uint32x4_t C_1 = vld1q_u32((void *)s1); - uint32x4_t D_1 = vld1q_u32((void *)s0); - - uint32x4_t A1_1 = vtrn1q_u32(A_1, B_1); - uint32x4_t C1_1 = vtrn1q_u32(C_1, D_1); - uint32x4_t A2_1 = vtrn2q_u32(A_1, B_1); - uint32x4_t C2_1 = vtrn2q_u32(C_1, D_1); - -#define u32_u64(x) vreinterpretq_u32_u64((x)) -#define u64_u32(x) vreinterpretq_u64_u32((x)) - uint32x4_t Xmaxv1=u32_u64(vtrn1q_u64(u64_u32(A1_1),u64_u32(C1_1))); - uint32x4_t Biasv1=u32_u64(vtrn2q_u64(u64_u32(A1_1),u64_u32(C1_1))); - uint32x4_t RFv1 =u32_u64(vtrn1q_u64(u64_u32(A2_1),u64_u32(C2_1))); - uint32x4_t FSv1 =u32_u64(vtrn2q_u64(u64_u32(A2_1),u64_u32(C2_1))); - - uint32x4_t A_2 = vld1q_u32((void *)s7); - uint32x4_t B_2 = vld1q_u32((void *)s6); - uint32x4_t C_2 = vld1q_u32((void *)s5); - uint32x4_t D_2 = vld1q_u32((void *)s4); - - uint32x4_t A1_2 = vtrn1q_u32(A_2, B_2); - uint32x4_t C1_2 = vtrn1q_u32(C_2, D_2); - uint32x4_t A2_2 = vtrn2q_u32(A_2, B_2); - uint32x4_t C2_2 = vtrn2q_u32(C_2, D_2); - - uint32x4_t Xmaxv2=u32_u64(vtrn1q_u64(u64_u32(A1_2),u64_u32(C1_2))); - uint32x4_t Biasv2=u32_u64(vtrn2q_u64(u64_u32(A1_2),u64_u32(C1_2))); - uint32x4_t RFv2 =u32_u64(vtrn1q_u64(u64_u32(A2_2),u64_u32(C2_2))); - uint32x4_t FSv2 =u32_u64(vtrn2q_u64(u64_u32(A2_2),u64_u32(C2_2))); - - // Turn multi R>16 - uint32x4_t Rv1_r = vshrq_n_u32(Rv1, 16); - uint32x4_t Rv2_r = vshrq_n_u32(Rv2, 16); - - // Blend R and R' based on Cv. - Rv1 = vbslq_u32(Cv1, Rv1_r, Rv1); - Rv2 = vbslq_u32(Cv2, Rv2_r, Rv2); - - // R -> R' update - // q = (uint32_t) (((uint64_t)x * rcp_freq) >> rcp_shift); - // R' = R + sym->bias + q * sym->cmpl_freq; - - // Mix SIMD (mul) & scalar (shift). 365MB/s - - // We do 32 x 32 mul to get 64-bit, but then extract this - // a 64-bit quantity and shift as scalar, before - // recreating the 32x4 result. Despite SIMD-scalar-SIMD reg - // it's slightly quicker. - - uint64x2_t qvl1 = vmull_u32(vget_low_u32(Rv1), vget_low_u32(RFv1)); - uint64x2_t qvh1 = vmull_high_u32(Rv1, RFv1); - - uint64x2_t qvl2 = vmull_u32(vget_low_u32(Rv2), vget_low_u32(RFv2)); - uint64x2_t qvh2 = vmull_high_u32(Rv2, RFv2); - - uint32x2_t qv1a = - vcreate_u32(vgetq_lane_u64(qvl1, 1) >> s2->rcp_shift << 32 | - vgetq_lane_u64(qvl1, 0) >> s3->rcp_shift); - uint32x2_t qv1b = - vcreate_u32(vgetq_lane_u64(qvh1, 1) >> s0->rcp_shift << 32 | - vgetq_lane_u64(qvh1, 0) >> s1->rcp_shift); - - uint32x2_t qv2a = - vcreate_u32(vgetq_lane_u64(qvl2, 1) >> s6->rcp_shift << 32 | - vgetq_lane_u64(qvl2, 0) >> s7->rcp_shift); - uint32x2_t qv2b = - vcreate_u32(vgetq_lane_u64(qvh2, 1) >> s4->rcp_shift << 32 | - vgetq_lane_u64(qvh2, 0) >> s5->rcp_shift); - - uint32x4_t qv1 = vcombine_u32(qv1a, qv1b); - uint32x4_t qv2 = vcombine_u32(qv2a, qv2b); - - FSv1 = vandq_u32(FSv1, vdupq_n_u32(0xffff)); // cmpl_freq - FSv2 = vandq_u32(FSv2, vdupq_n_u32(0xffff)); - - qv1 = vmlaq_u32(Biasv1, qv1, FSv1); - qv2 = vmlaq_u32(Biasv2, qv2, FSv2); - - Rv1 = vaddq_u32(Rv1, qv1); - Rv2 = vaddq_u32(Rv2, qv2); - - vst1q_u32(&R[z-3], Rv1); - vst1q_u32(&R[z-7], Rv2); - } - if (z < -1) abort(); - } - for (z = NX-1; z >= 0; z--) - RansEncFlush(&R[z], &ptr); - - empty: - // Finalise block size and return it - *out_size = (out_end - ptr) + tab_size; - - memmove(out + tab_size, ptr, out_end-ptr); - - return out; -} - -#define _ 99 -static uint8x8_t idx[16] = { - { _,_,_,_,_,_,_,_ }, // 0000 - { _,_,_,_,_,_,0,1 }, // 0001 - { _,_,_,_,0,1,_,_ }, // 0010 - { _,_,_,_,0,1,2,3 }, // 0011 - - { _,_,0,1,_,_,_,_ }, // 0100 - { _,_,0,1,_,_,2,3 }, // 0101 - { _,_,0,1,2,3,_,_ }, // 0110 - { _,_,0,1,2,3,4,5 }, // 0111 - - { 0,1,_,_,_,_,_,_ }, // 1000 - { 0,1,_,_,_,_,2,3 }, // 1001 - { 0,1,_,_,2,3,_,_ }, // 1010 - { 0,1,_,_,2,3,4,5 }, // 1011 - - { 0,1,2,3,_,_,_,_ }, // 1100 - { 0,1,2,3,_,_,4,5 }, // 1101 - { 0,1,2,3,4,5,_,_ }, // 1110 - { 0,1,2,3,4,5,6,7 }, // 1111 -}; - -// norm2 with norm1 in top 4 bits already consumed -static uint8x8_t idx2[256] = { - { _, _, _, _, _, _, _, _, }, - { _, _, _, _, _, _, 0, 1, }, - { _, _, _, _, 0, 1, _, _, }, - { _, _, _, _, 0, 1, 2, 3, }, - { _, _, 0, 1, _, _, _, _, }, - { _, _, 0, 1, _, _, 2, 3, }, - { _, _, 0, 1, 2, 3, _, _, }, - { _, _, 0, 1, 2, 3, 4, 5, }, - { 0, 1, _, _, _, _, _, _, }, - { 0, 1, _, _, _, _, 2, 3, }, - { 0, 1, _, _, 2, 3, _, _, }, - { 0, 1, _, _, 2, 3, 4, 5, }, - { 0, 1, 2, 3, _, _, _, _, }, - { 0, 1, 2, 3, _, _, 4, 5, }, - { 0, 1, 2, 3, 4, 5, _, _, }, - { 0, 1, 2, 3, 4, 5, 6, 7, }, - { _, _, _, _, _, _, _, _, }, - { _, _, _, _, _, _, 2, 3, }, - { _, _, _, _, 2, 3, _, _, }, - { _, _, _, _, 2, 3, 4, 5, }, - { _, _, 2, 3, _, _, _, _, }, - { _, _, 2, 3, _, _, 4, 5, }, - { _, _, 2, 3, 4, 5, _, _, }, - { _, _, 2, 3, 4, 5, 6, 7, }, - { 2, 3, _, _, _, _, _, _, }, - { 2, 3, _, _, _, _, 4, 5, }, - { 2, 3, _, _, 4, 5, _, _, }, - { 2, 3, _, _, 4, 5, 6, 7, }, - { 2, 3, 4, 5, _, _, _, _, }, - { 2, 3, 4, 5, _, _, 6, 7, }, - { 2, 3, 4, 5, 6, 7, _, _, }, - { 2, 3, 4, 5, 6, 7, 8, 9, }, - { _, _, _, _, _, _, _, _, }, - { _, _, _, _, _, _, 2, 3, }, - { _, _, _, _, 2, 3, _, _, }, - { _, _, _, _, 2, 3, 4, 5, }, - { _, _, 2, 3, _, _, _, _, }, - { _, _, 2, 3, _, _, 4, 5, }, - { _, _, 2, 3, 4, 5, _, _, }, - { _, _, 2, 3, 4, 5, 6, 7, }, - { 2, 3, _, _, _, _, _, _, }, - { 2, 3, _, _, _, _, 4, 5, }, - { 2, 3, _, _, 4, 5, _, _, }, - { 2, 3, _, _, 4, 5, 6, 7, }, - { 2, 3, 4, 5, _, _, _, _, }, - { 2, 3, 4, 5, _, _, 6, 7, }, - { 2, 3, 4, 5, 6, 7, _, _, }, - { 2, 3, 4, 5, 6, 7, 8, 9, }, - { _, _, _, _, _, _, _, _, }, - { _, _, _, _, _, _, 4, 5, }, - { _, _, _, _, 4, 5, _, _, }, - { _, _, _, _, 4, 5, 6, 7, }, - { _, _, 4, 5, _, _, _, _, }, - { _, _, 4, 5, _, _, 6, 7, }, - { _, _, 4, 5, 6, 7, _, _, }, - { _, _, 4, 5, 6, 7, 8, 9, }, - { 4, 5, _, _, _, _, _, _, }, - { 4, 5, _, _, _, _, 6, 7, }, - { 4, 5, _, _, 6, 7, _, _, }, - { 4, 5, _, _, 6, 7, 8, 9, }, - { 4, 5, 6, 7, _, _, _, _, }, - { 4, 5, 6, 7, _, _, 8, 9, }, - { 4, 5, 6, 7, 8, 9, _, _, }, - { 4, 5, 6, 7, 8, 9,10,11, }, - { _, _, _, _, _, _, _, _, }, - { _, _, _, _, _, _, 2, 3, }, - { _, _, _, _, 2, 3, _, _, }, - { _, _, _, _, 2, 3, 4, 5, }, - { _, _, 2, 3, _, _, _, _, }, - { _, _, 2, 3, _, _, 4, 5, }, - { _, _, 2, 3, 4, 5, _, _, }, - { _, _, 2, 3, 4, 5, 6, 7, }, - { 2, 3, _, _, _, _, _, _, }, - { 2, 3, _, _, _, _, 4, 5, }, - { 2, 3, _, _, 4, 5, _, _, }, - { 2, 3, _, _, 4, 5, 6, 7, }, - { 2, 3, 4, 5, _, _, _, _, }, - { 2, 3, 4, 5, _, _, 6, 7, }, - { 2, 3, 4, 5, 6, 7, _, _, }, - { 2, 3, 4, 5, 6, 7, 8, 9, }, - { _, _, _, _, _, _, _, _, }, - { _, _, _, _, _, _, 4, 5, }, - { _, _, _, _, 4, 5, _, _, }, - { _, _, _, _, 4, 5, 6, 7, }, - { _, _, 4, 5, _, _, _, _, }, - { _, _, 4, 5, _, _, 6, 7, }, - { _, _, 4, 5, 6, 7, _, _, }, - { _, _, 4, 5, 6, 7, 8, 9, }, - { 4, 5, _, _, _, _, _, _, }, - { 4, 5, _, _, _, _, 6, 7, }, - { 4, 5, _, _, 6, 7, _, _, }, - { 4, 5, _, _, 6, 7, 8, 9, }, - { 4, 5, 6, 7, _, _, _, _, }, - { 4, 5, 6, 7, _, _, 8, 9, }, - { 4, 5, 6, 7, 8, 9, _, _, }, - { 4, 5, 6, 7, 8, 9,10,11, }, - { _, _, _, _, _, _, _, _, }, - { _, _, _, _, _, _, 4, 5, }, - { _, _, _, _, 4, 5, _, _, }, - { _, _, _, _, 4, 5, 6, 7, }, - { _, _, 4, 5, _, _, _, _, }, - { _, _, 4, 5, _, _, 6, 7, }, - { _, _, 4, 5, 6, 7, _, _, }, - { _, _, 4, 5, 6, 7, 8, 9, }, - { 4, 5, _, _, _, _, _, _, }, - { 4, 5, _, _, _, _, 6, 7, }, - { 4, 5, _, _, 6, 7, _, _, }, - { 4, 5, _, _, 6, 7, 8, 9, }, - { 4, 5, 6, 7, _, _, _, _, }, - { 4, 5, 6, 7, _, _, 8, 9, }, - { 4, 5, 6, 7, 8, 9, _, _, }, - { 4, 5, 6, 7, 8, 9,10,11, }, - { _, _, _, _, _, _, _, _, }, - { _, _, _, _, _, _, 6, 7, }, - { _, _, _, _, 6, 7, _, _, }, - { _, _, _, _, 6, 7, 8, 9, }, - { _, _, 6, 7, _, _, _, _, }, - { _, _, 6, 7, _, _, 8, 9, }, - { _, _, 6, 7, 8, 9, _, _, }, - { _, _, 6, 7, 8, 9,10,11, }, - { 6, 7, _, _, _, _, _, _, }, - { 6, 7, _, _, _, _, 8, 9, }, - { 6, 7, _, _, 8, 9, _, _, }, - { 6, 7, _, _, 8, 9,10,11, }, - { 6, 7, 8, 9, _, _, _, _, }, - { 6, 7, 8, 9, _, _,10,11, }, - { 6, 7, 8, 9,10,11, _, _, }, - { 6, 7, 8, 9,10,11,12,13, }, - { _, _, _, _, _, _, _, _, }, - { _, _, _, _, _, _, 2, 3, }, - { _, _, _, _, 2, 3, _, _, }, - { _, _, _, _, 2, 3, 4, 5, }, - { _, _, 2, 3, _, _, _, _, }, - { _, _, 2, 3, _, _, 4, 5, }, - { _, _, 2, 3, 4, 5, _, _, }, - { _, _, 2, 3, 4, 5, 6, 7, }, - { 2, 3, _, _, _, _, _, _, }, - { 2, 3, _, _, _, _, 4, 5, }, - { 2, 3, _, _, 4, 5, _, _, }, - { 2, 3, _, _, 4, 5, 6, 7, }, - { 2, 3, 4, 5, _, _, _, _, }, - { 2, 3, 4, 5, _, _, 6, 7, }, - { 2, 3, 4, 5, 6, 7, _, _, }, - { 2, 3, 4, 5, 6, 7, 8, 9, }, - { _, _, _, _, _, _, _, _, }, - { _, _, _, _, _, _, 4, 5, }, - { _, _, _, _, 4, 5, _, _, }, - { _, _, _, _, 4, 5, 6, 7, }, - { _, _, 4, 5, _, _, _, _, }, - { _, _, 4, 5, _, _, 6, 7, }, - { _, _, 4, 5, 6, 7, _, _, }, - { _, _, 4, 5, 6, 7, 8, 9, }, - { 4, 5, _, _, _, _, _, _, }, - { 4, 5, _, _, _, _, 6, 7, }, - { 4, 5, _, _, 6, 7, _, _, }, - { 4, 5, _, _, 6, 7, 8, 9, }, - { 4, 5, 6, 7, _, _, _, _, }, - { 4, 5, 6, 7, _, _, 8, 9, }, - { 4, 5, 6, 7, 8, 9, _, _, }, - { 4, 5, 6, 7, 8, 9,10,11, }, - { _, _, _, _, _, _, _, _, }, - { _, _, _, _, _, _, 4, 5, }, - { _, _, _, _, 4, 5, _, _, }, - { _, _, _, _, 4, 5, 6, 7, }, - { _, _, 4, 5, _, _, _, _, }, - { _, _, 4, 5, _, _, 6, 7, }, - { _, _, 4, 5, 6, 7, _, _, }, - { _, _, 4, 5, 6, 7, 8, 9, }, - { 4, 5, _, _, _, _, _, _, }, - { 4, 5, _, _, _, _, 6, 7, }, - { 4, 5, _, _, 6, 7, _, _, }, - { 4, 5, _, _, 6, 7, 8, 9, }, - { 4, 5, 6, 7, _, _, _, _, }, - { 4, 5, 6, 7, _, _, 8, 9, }, - { 4, 5, 6, 7, 8, 9, _, _, }, - { 4, 5, 6, 7, 8, 9,10,11, }, - { _, _, _, _, _, _, _, _, }, - { _, _, _, _, _, _, 6, 7, }, - { _, _, _, _, 6, 7, _, _, }, - { _, _, _, _, 6, 7, 8, 9, }, - { _, _, 6, 7, _, _, _, _, }, - { _, _, 6, 7, _, _, 8, 9, }, - { _, _, 6, 7, 8, 9, _, _, }, - { _, _, 6, 7, 8, 9,10,11, }, - { 6, 7, _, _, _, _, _, _, }, - { 6, 7, _, _, _, _, 8, 9, }, - { 6, 7, _, _, 8, 9, _, _, }, - { 6, 7, _, _, 8, 9,10,11, }, - { 6, 7, 8, 9, _, _, _, _, }, - { 6, 7, 8, 9, _, _,10,11, }, - { 6, 7, 8, 9,10,11, _, _, }, - { 6, 7, 8, 9,10,11,12,13, }, - { _, _, _, _, _, _, _, _, }, - { _, _, _, _, _, _, 4, 5, }, - { _, _, _, _, 4, 5, _, _, }, - { _, _, _, _, 4, 5, 6, 7, }, - { _, _, 4, 5, _, _, _, _, }, - { _, _, 4, 5, _, _, 6, 7, }, - { _, _, 4, 5, 6, 7, _, _, }, - { _, _, 4, 5, 6, 7, 8, 9, }, - { 4, 5, _, _, _, _, _, _, }, - { 4, 5, _, _, _, _, 6, 7, }, - { 4, 5, _, _, 6, 7, _, _, }, - { 4, 5, _, _, 6, 7, 8, 9, }, - { 4, 5, 6, 7, _, _, _, _, }, - { 4, 5, 6, 7, _, _, 8, 9, }, - { 4, 5, 6, 7, 8, 9, _, _, }, - { 4, 5, 6, 7, 8, 9,10,11, }, - { _, _, _, _, _, _, _, _, }, - { _, _, _, _, _, _, 6, 7, }, - { _, _, _, _, 6, 7, _, _, }, - { _, _, _, _, 6, 7, 8, 9, }, - { _, _, 6, 7, _, _, _, _, }, - { _, _, 6, 7, _, _, 8, 9, }, - { _, _, 6, 7, 8, 9, _, _, }, - { _, _, 6, 7, 8, 9,10,11, }, - { 6, 7, _, _, _, _, _, _, }, - { 6, 7, _, _, _, _, 8, 9, }, - { 6, 7, _, _, 8, 9, _, _, }, - { 6, 7, _, _, 8, 9,10,11, }, - { 6, 7, 8, 9, _, _, _, _, }, - { 6, 7, 8, 9, _, _,10,11, }, - { 6, 7, 8, 9,10,11, _, _, }, - { 6, 7, 8, 9,10,11,12,13, }, - { _, _, _, _, _, _, _, _, }, - { _, _, _, _, _, _, 6, 7, }, - { _, _, _, _, 6, 7, _, _, }, - { _, _, _, _, 6, 7, 8, 9, }, - { _, _, 6, 7, _, _, _, _, }, - { _, _, 6, 7, _, _, 8, 9, }, - { _, _, 6, 7, 8, 9, _, _, }, - { _, _, 6, 7, 8, 9,10,11, }, - { 6, 7, _, _, _, _, _, _, }, - { 6, 7, _, _, _, _, 8, 9, }, - { 6, 7, _, _, 8, 9, _, _, }, - { 6, 7, _, _, 8, 9,10,11, }, - { 6, 7, 8, 9, _, _, _, _, }, - { 6, 7, 8, 9, _, _,10,11, }, - { 6, 7, 8, 9,10,11, _, _, }, - { 6, 7, 8, 9,10,11,12,13, }, - { _, _, _, _, _, _, _, _, }, - { _, _, _, _, _, _, 8, 9, }, - { _, _, _, _, 8, 9, _, _, }, - { _, _, _, _, 8, 9,10,11, }, - { _, _, 8, 9, _, _, _, _, }, - { _, _, 8, 9, _, _,10,11, }, - { _, _, 8, 9,10,11, _, _, }, - { _, _, 8, 9,10,11,12,13, }, - { 8, 9, _, _, _, _, _, _, }, - { 8, 9, _, _, _, _,10,11, }, - { 8, 9, _, _,10,11, _, _, }, - { 8, 9, _, _,10,11,12,13, }, - { 8, 9,10,11, _, _, _, _, }, - { 8, 9,10,11, _, _,12,13, }, - { 8, 9,10,11,12,13, _, _, }, - { 8, 9,10,11,12,13,14,15, }, -}; - -// SIMD: 650MB/s -unsigned char *rans_uncompress_O0_32x16_neon(unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int out_sz) { - if (in_size < 16) // 4-states at least - return NULL; - - if (out_sz >= INT_MAX) - return NULL; // protect against some overflow cases - - /* Load in the static tables */ - unsigned char *cp = in, *out_free = NULL; - unsigned char *cp_end = in + in_size; - int i; - uint32_t s3[TOTFREQ]; // For TF_SHIFT <= 12 - - if (!out) - out_free = out = malloc(out_sz); - if (!out) - return NULL; - - // Precompute reverse lookup of frequency. - uint32_t F[256] = {0}, fsum; - int fsz = decode_freq(cp, cp_end, F, &fsum); - if (!fsz) - goto err; - cp += fsz; - - normalise_freq_shift(F, fsum, TOTFREQ); - - // Build symbols; fixme, do as part of decode, see the _d variant - if (rans_F_to_s3(F, TF_SHIFT, s3)) - goto err; - - if (cp_end - cp < NX * 4) - goto err; - - int z; - RansState R[NX]; - for (z = 0; z < NX; z++) { - RansDecInit(&R[z], &cp); - if (R[z] < RANS_BYTE_L) - goto err; - } - - int out_end = (out_sz&~(NX-1)); - const uint32_t mask = (1u << TF_SHIFT)-1; - uint32x4_t maskv = vdupq_n_u32((1u << TF_SHIFT)-1); - - // assume NX is divisible by 4 - assert(NX%4==0); - - uint32x4_t Rv1 = vld1q_u32(&R[0]); - uint32x4_t Rv2 = vld1q_u32(&R[4]); - uint32x4_t Rv3 = vld1q_u32(&R[8]); - uint32x4_t Rv4 = vld1q_u32(&R[12]); - uint32x4_t Rv5 = vld1q_u32(&R[16]); - uint32x4_t Rv6 = vld1q_u32(&R[20]); - uint32x4_t Rv7 = vld1q_u32(&R[24]); - uint32x4_t Rv8 = vld1q_u32(&R[28]); - - // Note this has a considerable amount of manual instruction reordering - // to avoid latency. We have 8 lanes of 4 rans states, but process 4 - // lanes at a time with the two sets of 4-lane steps interleaved to - // ensure best use of processor pipeline units and latency removal. - // - // Unfortunately this is still poor with Clang-10. Gcc more or less - // honours this order and on my test set operates at ~675MB/s decode. - // Clang without the manual reordering was at 440MB/s and with it at - // 500MB/s. Clang does a lot of reordering of this code, removing some - // of the manual tuning benefits. Short of dropping to assembly, for now - // I would recommend using gcc to compile this file. - uint16_t *sp = (uint16_t *)cp; - uint8_t overflow[64+64] = {0}; - for (i=0; i < out_end; i+=NX) { - // Decode freq, bias and symbol from s3 lookups - uint32x4_t Sv1, Sv2, Sv3, Sv4, Sv5, Sv6, Sv7, Sv8; - uint32x4_t Fv1, Fv2, Fv3, Fv4, Fv5, Fv6, Fv7, Fv8; - uint32x4_t Bv1, Bv2, Bv3, Bv4, Bv5, Bv6, Bv7, Bv8; - - // Note we could check __ARM_FEATURE_MVE & 1 and use - // vcreateq_u32 here, but I don't have a system to test with - // so cannot validate if the code works. - uint32x2_t s1a, s1b, s2a, s2b, s3a, s3b, s4a, s4b; - s1a = vcreate_u32((uint64_t)(s3[R[ 1]&mask])<<32 | (s3[R[ 0]&mask])); - s2a = vcreate_u32((uint64_t)(s3[R[ 5]&mask])<<32 | (s3[R[ 4]&mask])); - s1b = vcreate_u32((uint64_t)(s3[R[ 3]&mask])<<32 | (s3[R[ 2]&mask])); - s2b = vcreate_u32((uint64_t)(s3[R[ 7]&mask])<<32 | (s3[R[ 6]&mask])); - s3a = vcreate_u32((uint64_t)(s3[R[ 9]&mask])<<32 | (s3[R[ 8]&mask])); - s3b = vcreate_u32((uint64_t)(s3[R[11]&mask])<<32 | (s3[R[10]&mask])); - s4a = vcreate_u32((uint64_t)(s3[R[13]&mask])<<32 | (s3[R[12]&mask])); - s4b = vcreate_u32((uint64_t)(s3[R[15]&mask])<<32 | (s3[R[14]&mask])); - - Sv1 = vcombine_u32(s1a, s1b); - Sv2 = vcombine_u32(s2a, s2b); - Sv3 = vcombine_u32(s3a, s3b); - Sv4 = vcombine_u32(s4a, s4b); - - Fv1 = vshrq_n_u32(Sv1, TF_SHIFT+8); // Freq = S >> TF_SHIFT+8 - Fv2 = vshrq_n_u32(Sv2, TF_SHIFT+8); - Fv3 = vshrq_n_u32(Sv3, TF_SHIFT+8); - Fv4 = vshrq_n_u32(Sv4, TF_SHIFT+8); - - uint32x2_t s5a, s5b, s6a, s6b, s7a, s7b, s8a, s8b; - s5a = vcreate_u32((uint64_t)(s3[R[17]&mask])<<32 | (s3[R[16]&mask])); - s5b = vcreate_u32((uint64_t)(s3[R[19]&mask])<<32 | (s3[R[18]&mask])); - s6a = vcreate_u32((uint64_t)(s3[R[21]&mask])<<32 | (s3[R[20]&mask])); - s6b = vcreate_u32((uint64_t)(s3[R[23]&mask])<<32 | (s3[R[22]&mask])); - s7a = vcreate_u32((uint64_t)(s3[R[25]&mask])<<32 | (s3[R[24]&mask])); - s7b = vcreate_u32((uint64_t)(s3[R[27]&mask])<<32 | (s3[R[26]&mask])); - s8a = vcreate_u32((uint64_t)(s3[R[29]&mask])<<32 | (s3[R[28]&mask])); - s8b = vcreate_u32((uint64_t)(s3[R[31]&mask])<<32 | (s3[R[30]&mask])); - - Bv1 = vshrq_n_u32(Sv1, 8); // Bias = (S >> 8) - Bv2 = vshrq_n_u32(Sv2, 8); - Bv3 = vshrq_n_u32(Sv3, 8); - Bv4 = vshrq_n_u32(Sv4, 8); - - // R[0] = (freq * (R[0] >> TF_SHIFT) + bias; - Rv1 = vshrq_n_u32(Rv1, TF_SHIFT); // R >> TF_SHIFT - Rv2 = vshrq_n_u32(Rv2, TF_SHIFT); - Rv3 = vshrq_n_u32(Rv3, TF_SHIFT); - Rv4 = vshrq_n_u32(Rv4, TF_SHIFT); - - Sv5 = vcombine_u32(s5a, s5b); - Sv6 = vcombine_u32(s6a, s6b); - Sv7 = vcombine_u32(s7a, s7b); - Sv8 = vcombine_u32(s8a, s8b); - - Bv1 = vandq_u32(Bv1, maskv); // & mask - Bv2 = vandq_u32(Bv2, maskv); - Bv3 = vandq_u32(Bv3, maskv); - Bv4 = vandq_u32(Bv4, maskv); - - Fv5 = vshrq_n_u32(Sv5, TF_SHIFT+8); - Fv6 = vshrq_n_u32(Sv6, TF_SHIFT+8); - Fv7 = vshrq_n_u32(Sv7, TF_SHIFT+8); - Fv8 = vshrq_n_u32(Sv8, TF_SHIFT+8); - - // A mix of mul+add and mla instructions seems to win. - //Rv1 = vmulq_u32(Fv1, Rv1); Rv1 = vaddq_u32(Rv1, Bv1); - Rv1 = vmlaq_u32(Bv1, Fv1, Rv1); // R = R*Freq + Bias - Rv2 = vmulq_u32(Fv2, Rv2); Rv2 = vaddq_u32(Rv2, Bv2); - //Rv2 = vmlaq_u32(Bv2, Fv2, Rv2); - //Rv3 = vmulq_u32(Fv3, Rv3); Rv3 = vaddq_u32(Rv3, Bv3); - Rv3 = vmlaq_u32(Bv3, Fv3, Rv3); - Rv4 = vmulq_u32(Fv4, Rv4); Rv4 = vaddq_u32(Rv4, Bv4); - //Rv4 = vmlaq_u32(Bv4, Fv4, Rv4); - - Bv5 = vshrq_n_u32(Sv5, 8); - Bv6 = vshrq_n_u32(Sv6, 8); - Bv7 = vshrq_n_u32(Sv7, 8); - Bv8 = vshrq_n_u32(Sv8, 8); - - // Renorm - uint32x4_t Rlt1 = vcltq_u32(Rv1, vdupq_n_u32(RANS_BYTE_L)); // R cp_end) { - memmove(overflow, sp, cp_end - (uint8_t *)sp); - sp = (uint16_t *)overflow; - cp_end = overflow + sizeof(overflow); - } - - uint16x8_t norm12 = vld1q_u16(sp); - sp += nbits[imask1] + nbits[imask2]; - uint16x8_t norm34 = vld1q_u16(sp); - sp += nbits[imask3] + nbits[imask4]; - - Bv5 = vandq_u32(Bv5, maskv); - Bv6 = vandq_u32(Bv6, maskv); - Bv7 = vandq_u32(Bv7, maskv); - Bv8 = vandq_u32(Bv8, maskv); - - // Shuffle norm to the corresponding R lanes, via imask - //Rv5 = vmulq_u32(Fv5, Rv5); Rv5 = vaddq_u32(Rv5, Bv5); - Rv5 = vmlaq_u32(Bv5, Fv5, Rv5); - Rv6 = vmulq_u32(Fv6, Rv6); Rv6 = vaddq_u32(Rv6, Bv6); - //Rv6 = vmlaq_u32(Bv6, Fv6, Rv6); - //Rv7 = vmulq_u32(Fv7, Rv7); Rv7 = vaddq_u32(Rv7, Bv7); - Rv7 = vmlaq_u32(Bv7, Fv7, Rv7); - Rv8 = vmulq_u32(Fv8, Rv8); Rv8 = vaddq_u32(Rv8, Bv8); - //Rv8 = vmlaq_u32(Bv8, Fv8, Rv8); - - uint32_t imask12 = (imask1<<4)|imask2; - uint32_t imask34 = (imask3<<4)|imask4; - - // #define for brevity and formatting -#define cast_u16_u8 vreinterpret_u16_u8 -#define cast_u8_u16 vreinterpretq_u8_u16 - uint16x4_t norm1, norm2, norm3, norm4, norm5, norm6, norm7, norm8; - norm1 = cast_u16_u8(vqtbl1_u8(cast_u8_u16(norm12),idx [imask1])); - norm2 = cast_u16_u8(vqtbl1_u8(cast_u8_u16(norm12),idx2[imask12])); - norm3 = cast_u16_u8(vqtbl1_u8(cast_u8_u16(norm34),idx [imask3])); - norm4 = cast_u16_u8(vqtbl1_u8(cast_u8_u16(norm34),idx2[imask34])); - - uint32x4_t Rlt5 = vcltq_u32(Rv5, vdupq_n_u32(RANS_BYTE_L)); - uint32x4_t Rlt6 = vcltq_u32(Rv6, vdupq_n_u32(RANS_BYTE_L)); - uint32x4_t Rlt7 = vcltq_u32(Rv7, vdupq_n_u32(RANS_BYTE_L)); - uint32x4_t Rlt8 = vcltq_u32(Rv8, vdupq_n_u32(RANS_BYTE_L)); - - // Add norm to R<<16 (Rsl) and blend back in with R - uint32x4_t Rsl1 = vshlq_n_u32(Rv1, 16); // Rsl = R << 16 - uint32x4_t Rsl2 = vshlq_n_u32(Rv2, 16); - uint32x4_t Rsl3 = vshlq_n_u32(Rv3, 16); - uint32x4_t Rsl4 = vshlq_n_u32(Rv4, 16); - - uint16x8_t norm56 = vld1q_u16(sp); - uint32_t imask5 = vaddvq_u32(vandq_u32(Rlt5, bit)); - uint32_t imask6 = vaddvq_u32(vandq_u32(Rlt6, bit)); - uint32_t imask7 = vaddvq_u32(vandq_u32(Rlt7, bit)); - uint32_t imask8 = vaddvq_u32(vandq_u32(Rlt8, bit)); - - sp += nbits[imask5] + nbits[imask6]; - uint16x8_t norm78 = vld1q_u16(sp); - sp += nbits[imask7] + nbits[imask8]; - - Rsl1 = vaddw_u16(Rsl1, norm1); // Rsl += norm - Rsl2 = vaddw_u16(Rsl2, norm2); - Rsl3 = vaddw_u16(Rsl3, norm3); - Rsl4 = vaddw_u16(Rsl4, norm4); - - uint32_t imask56 = (imask5<<4)|imask6; - uint32_t imask78 = (imask7<<4)|imask8; - - Rv1 = vbslq_u32(Rlt1, Rsl1, Rv1); // R = R 0; ) - out[out_end + z] = s3[R[z] & mask]; - - //fprintf(stderr, " 0 Decoded %d bytes\n", (int)(cp-in)); //c-size - - return out; - - err: - free(out_free); - return NULL; -} - -//----------------------------------------------------------------------------- - -unsigned char *rans_compress_O1_32x16_neon(unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int *out_size) { - unsigned char *cp, *out_end, *out_free = NULL; - unsigned int tab_size; - uint32_t bound = rans_compress_bound_4x16(in_size,1)-20; - int z; - RansState ransN[NX]; - - if (in_size < NX) // force O0 instead - return NULL; - - if (!out) { - *out_size = bound; - out_free = out = malloc(*out_size); - } - if (!out || bound > *out_size) - return NULL; - - if (((size_t)out)&1) - bound--; - out_end = out + bound; - - RansEncSymbol (*syms)[256] = htscodecs_tls_alloc(256 * (sizeof(*syms))); - if (!syms) { - free(out_free); - return NULL; - } - - cp = out; - int shift = encode_freq1(in, in_size, 32, syms, &cp); - if (shift < 0) { - free(out_free); - htscodecs_tls_free(syms); - return NULL; - } - tab_size = cp - out; - - for (z = 0; z < NX; z++) - RansEncInit(&ransN[z]); - - uint8_t* ptr = out_end; - - int iN[NX], isz4 = in_size/NX; - for (z = 0; z < NX; z++) - iN[z] = (z+1)*isz4-2; - - unsigned char lN[NX]; - for (z = 0; z < NX; z++) - lN[z] = in[iN[z]+1]; - - // Deal with the remainder - z = NX-1; - lN[z] = in[in_size-1]; - for (iN[z] = in_size-2; iN[z] > NX*isz4-2; iN[z]--) { - unsigned char c = in[iN[z]]; - RansEncPutSymbol(&ransN[z], &ptr, &syms[c][lN[z]]); - lN[z] = c; - } - -#if 0 - // Scalar code equivalent - for (; iN[0] >= 0; ) { - for (z = NX-1; z >= 0; z-=4) { - unsigned char c0; - unsigned char c1; - unsigned char c2; - unsigned char c3; - - RansEncSymbol *s0 = &syms[c0=in[iN[z-0]--]][lN[z-0]]; lN[z-0] = c0; - RansEncSymbol *s1 = &syms[c1=in[iN[z-1]--]][lN[z-1]]; lN[z-1] = c1; - RansEncSymbol *s2 = &syms[c2=in[iN[z-2]--]][lN[z-2]]; lN[z-2] = c2; - RansEncSymbol *s3 = &syms[c3=in[iN[z-3]--]][lN[z-3]]; lN[z-3] = c3; - - RansEncPutSymbol(&ransN[z-0], &ptr, s0); - RansEncPutSymbol(&ransN[z-1], &ptr, s1); - RansEncPutSymbol(&ransN[z-2], &ptr, s2); - RansEncPutSymbol(&ransN[z-3], &ptr, s3); - } - } -#else - // SIMD code - for (; iN[0] >= 0; ) { - for (z = NX-1; z >= 0; z-=16) { - unsigned char c; - - RansEncSymbol *s0 = &syms[c=in[iN[z- 0]--]][lN[z- 0]]; lN[z- 0]=c; - RansEncSymbol *s1 = &syms[c=in[iN[z- 1]--]][lN[z- 1]]; lN[z- 1]=c; - RansEncSymbol *s2 = &syms[c=in[iN[z- 2]--]][lN[z- 2]]; lN[z- 2]=c; - RansEncSymbol *s3 = &syms[c=in[iN[z- 3]--]][lN[z- 3]]; lN[z- 3]=c; - - uint32x4_t Rv1 = vld1q_u32(&ransN[z-3]); - uint32x4_t Rv2 = vld1q_u32(&ransN[z-7]); - uint32x4_t Rv3 = vld1q_u32(&ransN[z-11]); - uint32x4_t Rv4 = vld1q_u32(&ransN[z-15]); - - RansEncSymbol *s4 = &syms[c=in[iN[z- 4]--]][lN[z- 4]]; lN[z- 4]=c; - RansEncSymbol *s5 = &syms[c=in[iN[z- 5]--]][lN[z- 5]]; lN[z- 5]=c; - RansEncSymbol *s6 = &syms[c=in[iN[z- 6]--]][lN[z- 6]]; lN[z- 6]=c; - RansEncSymbol *s7 = &syms[c=in[iN[z- 7]--]][lN[z- 7]]; lN[z- 7]=c; - - uint32x4_t A_1 = vld1q_u32((void *)s3); - uint32x4_t B_1 = vld1q_u32((void *)s2); - uint32x4_t C_1 = vld1q_u32((void *)s1); - uint32x4_t D_1 = vld1q_u32((void *)s0); - - uint32x4_t A1_1 = vtrn1q_u32(A_1, B_1); - uint32x4_t C1_1 = vtrn1q_u32(C_1, D_1); - uint32x4_t A2_1 = vtrn2q_u32(A_1, B_1); - uint32x4_t C2_1 = vtrn2q_u32(C_1, D_1); - - uint32x4_t Xmaxv1=u32_u64(vtrn1q_u64(u64_u32(A1_1),u64_u32(C1_1))); - uint32x4_t Biasv1=u32_u64(vtrn2q_u64(u64_u32(A1_1),u64_u32(C1_1))); - uint32x4_t RFv1 =u32_u64(vtrn1q_u64(u64_u32(A2_1),u64_u32(C2_1))); - uint32x4_t FSv1 =u32_u64(vtrn2q_u64(u64_u32(A2_1),u64_u32(C2_1))); - - uint32x4_t A_2 = vld1q_u32((void *)s7); - uint32x4_t B_2 = vld1q_u32((void *)s6); - uint32x4_t C_2 = vld1q_u32((void *)s5); - uint32x4_t D_2 = vld1q_u32((void *)s4); - uint32x4_t A1_2 = vtrn1q_u32(A_2, B_2); - uint32x4_t C1_2 = vtrn1q_u32(C_2, D_2); - uint32x4_t A2_2 = vtrn2q_u32(A_2, B_2); - uint32x4_t C2_2 = vtrn2q_u32(C_2, D_2); - - uint32x4_t Xmaxv2=u32_u64(vtrn1q_u64(u64_u32(A1_2),u64_u32(C1_2))); - uint32x4_t Biasv2=u32_u64(vtrn2q_u64(u64_u32(A1_2),u64_u32(C1_2))); - uint32x4_t RFv2 =u32_u64(vtrn1q_u64(u64_u32(A2_2),u64_u32(C2_2))); - uint32x4_t FSv2 =u32_u64(vtrn2q_u64(u64_u32(A2_2),u64_u32(C2_2))); - - uint32x4_t Cv1 = vcgtq_u32(Rv1, Xmaxv1); - uint32x4_t Cv2 = vcgtq_u32(Rv2, Xmaxv2); - uint32x4_t bit = {8,4,2,1}; - uint32_t imask1 = vaddvq_u32(vandq_u32(Cv1, bit)); - uint32_t imask2 = vaddvq_u32(vandq_u32(Cv2, bit)); - - RansEncSymbol *s8 = &syms[c=in[iN[z- 8]--]][lN[z- 8]]; lN[z- 8]=c; - RansEncSymbol *s9 = &syms[c=in[iN[z- 9]--]][lN[z- 9]]; lN[z- 9]=c; - RansEncSymbol *s10= &syms[c=in[iN[z-10]--]][lN[z-10]]; lN[z-10]=c; - RansEncSymbol *s11= &syms[c=in[iN[z-11]--]][lN[z-11]]; lN[z-11]=c; - - RansEncSymbol *s12= &syms[c=in[iN[z-12]--]][lN[z-12]]; lN[z-12]=c; - RansEncSymbol *s13= &syms[c=in[iN[z-13]--]][lN[z-13]]; lN[z-13]=c; - RansEncSymbol *s14= &syms[c=in[iN[z-14]--]][lN[z-14]]; lN[z-14]=c; - RansEncSymbol *s15= &syms[c=in[iN[z-15]--]][lN[z-15]]; lN[z-15]=c; - - uint32x4_t A_3 = vld1q_u32((void *)s11); - uint32x4_t B_3 = vld1q_u32((void *)s10); - uint32x4_t C_3 = vld1q_u32((void *)s9); - uint32x4_t D_3 = vld1q_u32((void *)s8); - - - uint32x4_t A1_3 = vtrn1q_u32(A_3, B_3); - uint32x4_t C1_3 = vtrn1q_u32(C_3, D_3); - uint32x4_t A2_3 = vtrn2q_u32(A_3, B_3); - uint32x4_t C2_3 = vtrn2q_u32(C_3, D_3); - - uint32x4_t Xmaxv3=u32_u64(vtrn1q_u64(u64_u32(A1_3),u64_u32(C1_3))); - uint32x4_t Biasv3=u32_u64(vtrn2q_u64(u64_u32(A1_3),u64_u32(C1_3))); - uint32x4_t RFv3 =u32_u64(vtrn1q_u64(u64_u32(A2_3),u64_u32(C2_3))); - uint32x4_t FSv3 =u32_u64(vtrn2q_u64(u64_u32(A2_3),u64_u32(C2_3))); - - uint32x4_t A_4 = vld1q_u32((void *)s15); - uint32x4_t B_4 = vld1q_u32((void *)s14); - uint32x4_t C_4 = vld1q_u32((void *)s13); - uint32x4_t D_4 = vld1q_u32((void *)s12); - - uint32x4_t A1_4 = vtrn1q_u32(A_4, B_4); - uint32x4_t C1_4 = vtrn1q_u32(C_4, D_4); - uint32x4_t A2_4 = vtrn2q_u32(A_4, B_4); - uint32x4_t C2_4 = vtrn2q_u32(C_4, D_4); - - uint32x4_t Xmaxv4=u32_u64(vtrn1q_u64(u64_u32(A1_4),u64_u32(C1_4))); - uint32x4_t Biasv4=u32_u64(vtrn2q_u64(u64_u32(A1_4),u64_u32(C1_4))); - uint32x4_t RFv4 =u32_u64(vtrn1q_u64(u64_u32(A2_4),u64_u32(C2_4))); - uint32x4_t FSv4 =u32_u64(vtrn2q_u64(u64_u32(A2_4),u64_u32(C2_4))); - - uint32x4_t Cv3 = vcgtq_u32(Rv3, Xmaxv3); - uint32x4_t Cv4 = vcgtq_u32(Rv4, Xmaxv4); - uint32_t imask3 = vaddvq_u32(vandq_u32(Cv3, bit)); - uint32_t imask4 = vaddvq_u32(vandq_u32(Cv4, bit)); - - // Select low 16-bits from Rv based on imask, using tbl - uint8x8_t norm1, norm2, norm3, norm4; - static int nbits[16] = { 0,2,2,4, 2,4,4,6, 2,4,4,6, 4,6,6,8 }; - norm1 = vqtbl1_u8(vreinterpretq_u8_u32(Rv1),vtab[imask1]); - norm2 = vqtbl1_u8(vreinterpretq_u8_u32(Rv2),vtab[imask2]); - - vst1_u8(ptr-8, norm1); ptr -= nbits[imask1]; - vst1_u8(ptr-8, norm2); ptr -= nbits[imask2]; - - norm3 = vqtbl1_u8(vreinterpretq_u8_u32(Rv3),vtab[imask3]); - norm4 = vqtbl1_u8(vreinterpretq_u8_u32(Rv4),vtab[imask4]); - - vst1_u8(ptr-8, norm3); ptr -= nbits[imask3]; - vst1_u8(ptr-8, norm4); ptr -= nbits[imask4]; - - // R' = R>>16 - uint32x4_t Rv1_r = vshrq_n_u32(Rv1, 16); - uint32x4_t Rv2_r = vshrq_n_u32(Rv2, 16); - uint32x4_t Rv3_r = vshrq_n_u32(Rv3, 16); - uint32x4_t Rv4_r = vshrq_n_u32(Rv4, 16); - - // Blend R and R' based on Cv. - Rv1 = vbslq_u32(Cv1, Rv1_r, Rv1); - Rv2 = vbslq_u32(Cv2, Rv2_r, Rv2); - Rv3 = vbslq_u32(Cv3, Rv3_r, Rv3); - Rv4 = vbslq_u32(Cv4, Rv4_r, Rv4); - - uint64x2_t qvl1 = vmull_u32(vget_low_u32(Rv1), vget_low_u32(RFv1)); - uint64x2_t qvh1 = vmull_high_u32(Rv1, RFv1); - uint64x2_t qvl2 = vmull_u32(vget_low_u32(Rv2), vget_low_u32(RFv2)); - uint64x2_t qvh2 = vmull_high_u32(Rv2, RFv2); - - int32x4_t RSv1 = vnegq_s32(vreinterpretq_s32_u32( - vshrq_n_u32(FSv1, 16))); - int32x4_t RSv2 = vnegq_s32(vreinterpretq_s32_u32( - vshrq_n_u32(FSv2, 16))); - - uint64x2_t qvl3 = vmull_u32(vget_low_u32(Rv3), vget_low_u32(RFv3)); - uint64x2_t qvh3 = vmull_high_u32(Rv3, RFv3); - uint64x2_t qvl4 = vmull_u32(vget_low_u32(Rv4), vget_low_u32(RFv4)); - uint64x2_t qvh4 = vmull_high_u32(Rv4, RFv4); - - int32x4_t RSv3 = vnegq_s32(vreinterpretq_s32_u32( - vshrq_n_u32(FSv3, 16))); - int32x4_t RSv4 = vnegq_s32(vreinterpretq_s32_u32( - vshrq_n_u32(FSv4, 16))); - - qvl1 = vreinterpretq_u64_s64( - vshlq_s64(vreinterpretq_s64_u64(qvl1), - vmovl_s32(vget_low_s32(RSv1)))); - qvh1 = vreinterpretq_u64_s64( - vshlq_s64(vreinterpretq_s64_u64(qvh1), - vmovl_s32(vget_high_s32(RSv1)))); - - qvl2 = vreinterpretq_u64_s64( - vshlq_s64(vreinterpretq_s64_u64(qvl2), - vmovl_s32(vget_low_s32(RSv2)))); - qvh2 = vreinterpretq_u64_s64( - vshlq_s64(vreinterpretq_s64_u64(qvh2), - vmovl_s32(vget_high_s32(RSv2)))); - - uint32x4_t qv1 = vcombine_u32(vmovn_u64(qvl1), - vmovn_u64(qvh1)); - uint32x4_t qv2 = vcombine_u32(vmovn_u64(qvl2), - vmovn_u64(qvh2)); - - qvl3 = vreinterpretq_u64_s64( - vshlq_s64(vreinterpretq_s64_u64(qvl3), - vmovl_s32(vget_low_s32(RSv3)))); - qvh3 = vreinterpretq_u64_s64( - vshlq_s64(vreinterpretq_s64_u64(qvh3), - vmovl_s32(vget_high_s32(RSv3)))); - - qvl4 = vreinterpretq_u64_s64( - vshlq_s64(vreinterpretq_s64_u64(qvl4), - vmovl_s32(vget_low_s32(RSv4)))); - qvh4 = vreinterpretq_u64_s64( - vshlq_s64(vreinterpretq_s64_u64(qvh4), - vmovl_s32(vget_high_s32(RSv4)))); - - uint32x4_t qv3 = vcombine_u32(vmovn_u64(qvl3), - vmovn_u64(qvh3)); - uint32x4_t qv4 = vcombine_u32(vmovn_u64(qvl4), - vmovn_u64(qvh4)); - - FSv1 = vandq_u32(FSv1, vdupq_n_u32(0xffff)); // cmpl_freq - FSv2 = vandq_u32(FSv2, vdupq_n_u32(0xffff)); - FSv3 = vandq_u32(FSv3, vdupq_n_u32(0xffff)); - FSv4 = vandq_u32(FSv4, vdupq_n_u32(0xffff)); - - qv1 = vmlaq_u32(Biasv1, qv1, FSv1); - qv2 = vmlaq_u32(Biasv2, qv2, FSv2); - qv3 = vmlaq_u32(Biasv3, qv3, FSv3); - qv4 = vmlaq_u32(Biasv4, qv4, FSv4); - - Rv1 = vaddq_u32(Rv1, qv1); - Rv2 = vaddq_u32(Rv2, qv2); - Rv3 = vaddq_u32(Rv3, qv3); - Rv4 = vaddq_u32(Rv4, qv4); - - vst1q_u32(&ransN[z-3], Rv1); - vst1q_u32(&ransN[z-7], Rv2); - vst1q_u32(&ransN[z-11],Rv3); - vst1q_u32(&ransN[z-15],Rv4); - } - } -#endif - - for (z = NX-1; z>=0; z--) - RansEncPutSymbol(&ransN[z], &ptr, &syms[0][lN[z]]); - - for (z = NX-1; z>=0; z--) - RansEncFlush(&ransN[z], &ptr); - - *out_size = (out_end - ptr) + tab_size; - - cp = out; - memmove(out + tab_size, ptr, out_end-ptr); - - htscodecs_tls_free(syms); - return out; -} - -//#define MAGIC2 111 -#define MAGIC2 179 -//#define MAGIC2 0 -typedef struct { - union { - struct { - uint16_t f; - uint16_t b; - } s; - uint32_t fb; - } u; -} bf_t; - -static inline void transpose_and_copy(uint8_t *out, int iN[32], - uint8_t t[32][32]) { - int z; -// for (z = 0; z < NX; z++) { -// int k; -// for (k = 0; k < 32; k++) -// out[iN[z]+k] = t[k][z]; -// iN[z] += 32; -// } - - for (z = 0; z < NX; z+=4) { - *(uint64_t *)&out[iN[z]] = - ((uint64_t)(t[0][z])<< 0) + - ((uint64_t)(t[1][z])<< 8) + - ((uint64_t)(t[2][z])<<16) + - ((uint64_t)(t[3][z])<<24) + - ((uint64_t)(t[4][z])<<32) + - ((uint64_t)(t[5][z])<<40) + - ((uint64_t)(t[6][z])<<48) + - ((uint64_t)(t[7][z])<<56); - *(uint64_t *)&out[iN[z+1]] = - ((uint64_t)(t[0][z+1])<< 0) + - ((uint64_t)(t[1][z+1])<< 8) + - ((uint64_t)(t[2][z+1])<<16) + - ((uint64_t)(t[3][z+1])<<24) + - ((uint64_t)(t[4][z+1])<<32) + - ((uint64_t)(t[5][z+1])<<40) + - ((uint64_t)(t[6][z+1])<<48) + - ((uint64_t)(t[7][z+1])<<56); - *(uint64_t *)&out[iN[z+2]] = - ((uint64_t)(t[0][z+2])<< 0) + - ((uint64_t)(t[1][z+2])<< 8) + - ((uint64_t)(t[2][z+2])<<16) + - ((uint64_t)(t[3][z+2])<<24) + - ((uint64_t)(t[4][z+2])<<32) + - ((uint64_t)(t[5][z+2])<<40) + - ((uint64_t)(t[6][z+2])<<48) + - ((uint64_t)(t[7][z+2])<<56); - *(uint64_t *)&out[iN[z+3]] = - ((uint64_t)(t[0][z+3])<< 0) + - ((uint64_t)(t[1][z+3])<< 8) + - ((uint64_t)(t[2][z+3])<<16) + - ((uint64_t)(t[3][z+3])<<24) + - ((uint64_t)(t[4][z+3])<<32) + - ((uint64_t)(t[5][z+3])<<40) + - ((uint64_t)(t[6][z+3])<<48) + - ((uint64_t)(t[7][z+3])<<56); - - *(uint64_t *)&out[iN[z]+8] = - ((uint64_t)(t[8+0][z])<< 0) + - ((uint64_t)(t[8+1][z])<< 8) + - ((uint64_t)(t[8+2][z])<<16) + - ((uint64_t)(t[8+3][z])<<24) + - ((uint64_t)(t[8+4][z])<<32) + - ((uint64_t)(t[8+5][z])<<40) + - ((uint64_t)(t[8+6][z])<<48) + - ((uint64_t)(t[8+7][z])<<56); - *(uint64_t *)&out[iN[z+1]+8] = - ((uint64_t)(t[8+0][z+1])<< 0) + - ((uint64_t)(t[8+1][z+1])<< 8) + - ((uint64_t)(t[8+2][z+1])<<16) + - ((uint64_t)(t[8+3][z+1])<<24) + - ((uint64_t)(t[8+4][z+1])<<32) + - ((uint64_t)(t[8+5][z+1])<<40) + - ((uint64_t)(t[8+6][z+1])<<48) + - ((uint64_t)(t[8+7][z+1])<<56); - *(uint64_t *)&out[iN[z+2]+8] = - ((uint64_t)(t[8+0][z+2])<< 0) + - ((uint64_t)(t[8+1][z+2])<< 8) + - ((uint64_t)(t[8+2][z+2])<<16) + - ((uint64_t)(t[8+3][z+2])<<24) + - ((uint64_t)(t[8+4][z+2])<<32) + - ((uint64_t)(t[8+5][z+2])<<40) + - ((uint64_t)(t[8+6][z+2])<<48) + - ((uint64_t)(t[8+7][z+2])<<56); - *(uint64_t *)&out[iN[z+3]+8] = - ((uint64_t)(t[8+0][z+3])<< 0) + - ((uint64_t)(t[8+1][z+3])<< 8) + - ((uint64_t)(t[8+2][z+3])<<16) + - ((uint64_t)(t[8+3][z+3])<<24) + - ((uint64_t)(t[8+4][z+3])<<32) + - ((uint64_t)(t[8+5][z+3])<<40) + - ((uint64_t)(t[8+6][z+3])<<48) + - ((uint64_t)(t[8+7][z+3])<<56); - - *(uint64_t *)&out[iN[z]+16] = - ((uint64_t)(t[16+0][z])<< 0) + - ((uint64_t)(t[16+1][z])<< 8) + - ((uint64_t)(t[16+2][z])<<16) + - ((uint64_t)(t[16+3][z])<<24) + - ((uint64_t)(t[16+4][z])<<32) + - ((uint64_t)(t[16+5][z])<<40) + - ((uint64_t)(t[16+6][z])<<48) + - ((uint64_t)(t[16+7][z])<<56); - *(uint64_t *)&out[iN[z+1]+16] = - ((uint64_t)(t[16+0][z+1])<< 0) + - ((uint64_t)(t[16+1][z+1])<< 8) + - ((uint64_t)(t[16+2][z+1])<<16) + - ((uint64_t)(t[16+3][z+1])<<24) + - ((uint64_t)(t[16+4][z+1])<<32) + - ((uint64_t)(t[16+5][z+1])<<40) + - ((uint64_t)(t[16+6][z+1])<<48) + - ((uint64_t)(t[16+7][z+1])<<56); - *(uint64_t *)&out[iN[z+2]+16] = - ((uint64_t)(t[16+0][z+2])<< 0) + - ((uint64_t)(t[16+1][z+2])<< 8) + - ((uint64_t)(t[16+2][z+2])<<16) + - ((uint64_t)(t[16+3][z+2])<<24) + - ((uint64_t)(t[16+4][z+2])<<32) + - ((uint64_t)(t[16+5][z+2])<<40) + - ((uint64_t)(t[16+6][z+2])<<48) + - ((uint64_t)(t[16+7][z+2])<<56); - *(uint64_t *)&out[iN[z+3]+16] = - ((uint64_t)(t[16+0][z+3])<< 0) + - ((uint64_t)(t[16+1][z+3])<< 8) + - ((uint64_t)(t[16+2][z+3])<<16) + - ((uint64_t)(t[16+3][z+3])<<24) + - ((uint64_t)(t[16+4][z+3])<<32) + - ((uint64_t)(t[16+5][z+3])<<40) + - ((uint64_t)(t[16+6][z+3])<<48) + - ((uint64_t)(t[16+7][z+3])<<56); - - *(uint64_t *)&out[iN[z]+24] = - ((uint64_t)(t[24+0][z])<< 0) + - ((uint64_t)(t[24+1][z])<< 8) + - ((uint64_t)(t[24+2][z])<<16) + - ((uint64_t)(t[24+3][z])<<24) + - ((uint64_t)(t[24+4][z])<<32) + - ((uint64_t)(t[24+5][z])<<40) + - ((uint64_t)(t[24+6][z])<<48) + - ((uint64_t)(t[24+7][z])<<56); - *(uint64_t *)&out[iN[z+1]+24] = - ((uint64_t)(t[24+0][z+1])<< 0) + - ((uint64_t)(t[24+1][z+1])<< 8) + - ((uint64_t)(t[24+2][z+1])<<16) + - ((uint64_t)(t[24+3][z+1])<<24) + - ((uint64_t)(t[24+4][z+1])<<32) + - ((uint64_t)(t[24+5][z+1])<<40) + - ((uint64_t)(t[24+6][z+1])<<48) + - ((uint64_t)(t[24+7][z+1])<<56); - *(uint64_t *)&out[iN[z+2]+24] = - ((uint64_t)(t[24+0][z+2])<< 0) + - ((uint64_t)(t[24+1][z+2])<< 8) + - ((uint64_t)(t[24+2][z+2])<<16) + - ((uint64_t)(t[24+3][z+2])<<24) + - ((uint64_t)(t[24+4][z+2])<<32) + - ((uint64_t)(t[24+5][z+2])<<40) + - ((uint64_t)(t[24+6][z+2])<<48) + - ((uint64_t)(t[24+7][z+2])<<56); - *(uint64_t *)&out[iN[z+3]+24] = - ((uint64_t)(t[24+0][z+3])<< 0) + - ((uint64_t)(t[24+1][z+3])<< 8) + - ((uint64_t)(t[24+2][z+3])<<16) + - ((uint64_t)(t[24+3][z+3])<<24) + - ((uint64_t)(t[24+4][z+3])<<32) + - ((uint64_t)(t[24+5][z+3])<<40) + - ((uint64_t)(t[24+6][z+3])<<48) + - ((uint64_t)(t[24+7][z+3])<<56); - - iN[z+0] += 32; - iN[z+1] += 32; - iN[z+2] += 32; - iN[z+3] += 32; - } -} - -unsigned char *rans_uncompress_O1_32x16_neon(unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int out_sz) { - if (in_size < NX*4) // 4-states at least - return NULL; - - if (out_sz >= INT_MAX) - return NULL; // protect against some overflow cases - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (out_sz > 100000) - return NULL; -#endif - - /* Load in the static tables */ - unsigned char *cp = in, *cp_end = in+in_size, *out_free = NULL; - unsigned char *c_freq = NULL; - int i, j = -999; - unsigned int x; - - uint8_t *sfb_ = htscodecs_tls_alloc(256*(TOTFREQ_O1+MAGIC2)*sizeof(*sfb_)); - uint32_t s3[256][TOTFREQ_O1_FAST]; - - if (!sfb_) - return NULL; - bf_t fb[256][256]; - uint8_t *sfb[256]; - if ((*cp >> 4) == TF_SHIFT_O1) { - for (i = 0; i < 256; i++) - sfb[i]= sfb_ + i*(TOTFREQ_O1+MAGIC2); - } else { - for (i = 0; i < 256; i++) - sfb[i]= sfb_ + i*(TOTFREQ_O1_FAST+MAGIC2); - } - - if (!out) - out_free = out = malloc(out_sz); - - if (!out) - goto err; - - //fprintf(stderr, "out_sz=%d\n", out_sz); - - // compressed header? If so uncompress it - unsigned char *tab_end = NULL; - unsigned char *c_freq_end = cp_end; - unsigned int shift = *cp >> 4; - if (*cp++ & 1) { - uint32_t u_freq_sz, c_freq_sz; - cp += var_get_u32(cp, cp_end, &u_freq_sz); - cp += var_get_u32(cp, cp_end, &c_freq_sz); - if (c_freq_sz > cp_end - cp) - goto err; - tab_end = cp + c_freq_sz; - if (!(c_freq = rans_uncompress_O0_4x16(cp, c_freq_sz, NULL, u_freq_sz))) - goto err; - cp = c_freq; - c_freq_end = c_freq + u_freq_sz; - } - - // Decode order-0 symbol list; avoids needing in order-1 tables -#if 0 - // Disable inline for now as this is ~10% slower under gcc. Why? - cp += decode_freq1(cp, c_freq_end, shift, NULL, s3, sfb, fb); -#else - uint32_t F0[256] = {0}; - int fsz = decode_alphabet(cp, c_freq_end, F0); - if (!fsz) - goto err; - cp += fsz; - - if (cp >= c_freq_end) - goto err; - - for (i = 0; i < 256; i++) { - if (F0[i] == 0) - continue; - - uint32_t F[256] = {0}, T = 0; - fsz = decode_freq_d(cp, c_freq_end, F0, F, &T); - if (!fsz) - goto err; - cp += fsz; - - if (!T) { - //fprintf(stderr, "No freq for F_%d\n", i); - continue; - } - - normalise_freq_shift(F, T, 1< (1<>TF_SHIFT_O1) + m - fb[l[z]][c].u.s.b; - RansDecRenormSafe(&R[z], &ptr, ptr_end); - l[z] = c; - } - } - - - // Remainder - for (; i4[NX-1] < out_sz; i4[NX-1]++) { - uint32_t m = R[NX-1] & ((1u<>TF_SHIFT_O1) + m - fb[l[NX-1]][c].u.s.b; - RansDecRenormSafe(&R[NX-1], &ptr, ptr_end); - l[NX-1] = c; - } - } else { - // TF_SHIFT_O1 = 10 - const uint32_t mask = ((1u << TF_SHIFT_O1_FAST)-1); - uint32x4_t maskv = vdupq_n_u32((1u << TF_SHIFT_O1_FAST)-1); - - // FIXME: plus room for "safe" renorm. - // Follow with 2nd copy doing scalar code instead? - unsigned char tbuf[32][32]; - int tidx = 0; - - uint32x4_t RV[8] = { - vld1q_u32(&R[0]), - vld1q_u32(&R[4]), - vld1q_u32(&R[8]), - vld1q_u32(&R[12]), - vld1q_u32(&R[16]), - vld1q_u32(&R[20]), - vld1q_u32(&R[24]), - vld1q_u32(&R[28]), - }; - -// uint32x4_t MV[8] = { -// vandq_u32(RV[0], maskv), -// vandq_u32(RV[1], maskv), -// vandq_u32(RV[2], maskv), -// vandq_u32(RV[3], maskv), -// vandq_u32(RV[4], maskv), -// vandq_u32(RV[5], maskv), -// vandq_u32(RV[6], maskv), -// vandq_u32(RV[7], maskv), -// }; - - uint32_t m[NX]; - for (z = 0; z < NX; z++) - m[z] = l[z]*TOTFREQ_O1_FAST + (R[z] & mask); - - uint32_t *S3 = (uint32_t *)s3; - - for (; i4[0] < isz4 && ptr+64 < ptr_end;) { - int Z = 0; - for (z = 0; z < NX; z+=16, Z+=4) { - // streamline these. Could swap between two banks and pre-load - uint32x4_t Sv1, Sv2, Sv3, Sv4; - uint32x4_t Fv1, Fv2, Fv3, Fv4; - uint32x4_t Bv1, Bv2, Bv3, Bv4; - uint32x2_t s1a, s1b, s2a, s2b, s3a, s3b, s4a, s4b; - - s1a = vcreate_u32((uint64_t)(S3[m[z+1]])<<32 | (S3[m[z+0]])); - s1b = vcreate_u32((uint64_t)(S3[m[z+3]])<<32 | (S3[m[z+2]])); - s2a = vcreate_u32((uint64_t)(S3[m[z+5]])<<32 | (S3[m[z+4]])); - s2b = vcreate_u32((uint64_t)(S3[m[z+7]])<<32 | (S3[m[z+6]])); - s3a = vcreate_u32((uint64_t)(S3[m[z+9]])<<32 | (S3[m[z+8]])); - s3b = vcreate_u32((uint64_t)(S3[m[z+11]])<<32 | (S3[m[z+10]])); - s4a = vcreate_u32((uint64_t)(S3[m[z+13]])<<32 | (S3[m[z+12]])); - s4b = vcreate_u32((uint64_t)(S3[m[z+15]])<<32 | (S3[m[z+14]])); - - Sv1 = vcombine_u32(s1a, s1b); - Sv2 = vcombine_u32(s2a, s2b); - Sv3 = vcombine_u32(s3a, s3b); - Sv4 = vcombine_u32(s4a, s4b); - - uint16x4_t p16_1 = vmovn_u32(Sv1); - uint16x4_t p16_2 = vmovn_u32(Sv2); - uint16x4_t p16_3 = vmovn_u32(Sv3); - uint16x4_t p16_4 = vmovn_u32(Sv4); - - uint8x8_t p8_12 = vmovn_u16(vcombine_u16(p16_1,p16_2)); - uint8x8_t p8_34 = vmovn_u16(vcombine_u16(p16_3,p16_4)); - uint8x16_t p8_a = vcombine_u8(p8_12, p8_34); - vst1q_u8(l+z, p8_a); - - Fv1 = vshrq_n_u32(Sv1, TF_SHIFT_O1_FAST+8); - Fv2 = vshrq_n_u32(Sv2, TF_SHIFT_O1_FAST+8); - Fv3 = vshrq_n_u32(Sv3, TF_SHIFT_O1_FAST+8); - Fv4 = vshrq_n_u32(Sv4, TF_SHIFT_O1_FAST+8); - - Bv1 = vandq_u32(vshrq_n_u32(Sv1, 8), maskv); - Bv2 = vandq_u32(vshrq_n_u32(Sv2, 8), maskv); - Bv3 = vandq_u32(vshrq_n_u32(Sv3, 8), maskv); - Bv4 = vandq_u32(vshrq_n_u32(Sv4, 8), maskv); - - // Add in transpose here. - memcpy(&tbuf[tidx][z], &l[z], 16); - - RV[Z+0] = vshrq_n_u32(RV[Z+0], TF_SHIFT_O1_FAST); - RV[Z+1] = vshrq_n_u32(RV[Z+1], TF_SHIFT_O1_FAST); - RV[Z+2] = vshrq_n_u32(RV[Z+2], TF_SHIFT_O1_FAST); - RV[Z+3] = vshrq_n_u32(RV[Z+3], TF_SHIFT_O1_FAST); - - // Ready for use in S3[] offset - Sv1 = vshlq_n_u32(vandq_u32(Sv1, vdupq_n_u32(0xff)), TF_SHIFT_O1_FAST); - Sv2 = vshlq_n_u32(vandq_u32(Sv2, vdupq_n_u32(0xff)), TF_SHIFT_O1_FAST); - Sv3 = vshlq_n_u32(vandq_u32(Sv3, vdupq_n_u32(0xff)), TF_SHIFT_O1_FAST); - Sv4 = vshlq_n_u32(vandq_u32(Sv4, vdupq_n_u32(0xff)), TF_SHIFT_O1_FAST); - - RV[Z+0] = vmlaq_u32(Bv1, Fv1, RV[Z+0]); - RV[Z+1] = vmlaq_u32(Bv2, Fv2, RV[Z+1]); - RV[Z+2] = vmlaq_u32(Bv3, Fv3, RV[Z+2]); - RV[Z+3] = vmlaq_u32(Bv4, Fv4, RV[Z+3]); - - // Renorm - uint32x4_t Rlt1 = vcltq_u32(RV[Z+0], vdupq_n_u32(RANS_BYTE_L)); - uint32x4_t Rlt2 = vcltq_u32(RV[Z+1], vdupq_n_u32(RANS_BYTE_L)); - uint32x4_t Rlt3 = vcltq_u32(RV[Z+2], vdupq_n_u32(RANS_BYTE_L)); - uint32x4_t Rlt4 = vcltq_u32(RV[Z+3], vdupq_n_u32(RANS_BYTE_L)); - - // Compute lookup table index - static int nbits[16] = { 0,2,2,4, 2,4,4,6, 2,4,4,6, 4,6,6,8 }; - uint32x4_t bit = {8,4,2,1}; - uint32_t imask1 = vaddvq_u32(vandq_u32(Rlt1, bit)); - uint32_t imask2 = vaddvq_u32(vandq_u32(Rlt2, bit)); - uint32_t imask3 = vaddvq_u32(vandq_u32(Rlt3, bit)); - uint32_t imask4 = vaddvq_u32(vandq_u32(Rlt4, bit)); - - // load 8 lanes of renorm data - uint16x8_t norm12 = vld1q_u16((uint16_t *)ptr); - // move ptr by no. renorm lanes used - ptr += nbits[imask1] + nbits[imask2]; - uint16x8_t norm34 = vld1q_u16((uint16_t *)ptr); - ptr += nbits[imask3] + nbits[imask4]; - - uint32_t imask12 = (imask1<<4)|imask2; - uint32_t imask34 = (imask3<<4)|imask4; - - // Shuffle norm to the corresponding R lanes, via imask - // #define for brevity and formatting - uint16x4_t norm1, norm2, norm3, norm4; - norm1 = cast_u16_u8(vqtbl1_u8(cast_u8_u16(norm12),idx [imask1])); - norm2 = cast_u16_u8(vqtbl1_u8(cast_u8_u16(norm12),idx2[imask12])); - norm3 = cast_u16_u8(vqtbl1_u8(cast_u8_u16(norm34),idx [imask3])); - norm4 = cast_u16_u8(vqtbl1_u8(cast_u8_u16(norm34),idx2[imask34])); - - // Add norm to R<<16 and blend back in with R - uint32x4_t Rsl1 = vshlq_n_u32(RV[Z+0], 16); // Rsl = R << 16 - uint32x4_t Rsl2 = vshlq_n_u32(RV[Z+1], 16); - uint32x4_t Rsl3 = vshlq_n_u32(RV[Z+2], 16); - uint32x4_t Rsl4 = vshlq_n_u32(RV[Z+3], 16); - - Rsl1 = vaddw_u16(Rsl1, norm1); // Rsl += norm - Rsl2 = vaddw_u16(Rsl2, norm2); - Rsl3 = vaddw_u16(Rsl3, norm3); - Rsl4 = vaddw_u16(Rsl4, norm4); - - RV[Z+0] = vbslq_u32(Rlt1, Rsl1, RV[Z+0]); // R = R s3 + l*TOTFREQ_O1_FAST + c. - uint32x4_t off1 = vandq_u32(RV[Z+0], maskv); - uint32x4_t off2 = vandq_u32(RV[Z+1], maskv); - uint32x4_t off3 = vandq_u32(RV[Z+2], maskv); - uint32x4_t off4 = vandq_u32(RV[Z+3], maskv); - - off1 = vaddq_u32(off1, Sv1); - off2 = vaddq_u32(off2, Sv2); - off3 = vaddq_u32(off3, Sv3); - off4 = vaddq_u32(off4, Sv4); - - vst1q_u32(&m[z+ 0], off1); - vst1q_u32(&m[z+ 4], off2); - vst1q_u32(&m[z+ 8], off3); - vst1q_u32(&m[z+12], off4); - } - - i4[0]++; - if (++tidx == 32) { - i4[0] -= 32; - - transpose_and_copy(out, i4, tbuf); - tidx = 0; - } - } - - vst1q_u32(&R[ 0], RV[0]); - vst1q_u32(&R[ 4], RV[1]); - vst1q_u32(&R[ 8], RV[2]); - vst1q_u32(&R[12], RV[3]); - vst1q_u32(&R[16], RV[4]); - vst1q_u32(&R[20], RV[5]); - vst1q_u32(&R[24], RV[6]); - vst1q_u32(&R[28], RV[7]); - - i4[0]-=tidx; - int T; - for (z = 0; z < NX; z++) - for (T = 0; T < tidx; T++) - out[i4[z]++] = tbuf[T][z]; - - // Scalar version for close to end of in[] array so we don't do - // SIMD loads beyond the end of the buffer - for (; i4[0] < isz4; ) { - for (z = 0; z < NX; z++) { - uint32_t m = R[z] & ((1u<>(TF_SHIFT_O1_FAST+8)) * (R[z]>>TF_SHIFT_O1_FAST) + - ((S>>8) & ((1u<>(TF_SHIFT_O1_FAST+8)) * (R[NX-1]>>TF_SHIFT_O1_FAST) - + ((S>>8) & ((1u< -#include -#include -#include -#include -#include -#include -#include -#include - -#ifndef NO_THREADS -#include -#endif - -#include "rANS_word.h" -#include "rANS_static4x16.h" -#include "rANS_static16_int.h" -#include "pack.h" -#include "rle.h" -#include "utils.h" - -#define TF_SHIFT 12 -#define TOTFREQ (1<>8) & 0xff; - if (!N) N=4; - - order &= 0xff; - unsigned int sz = (order == 0 - ? 1.05*size + 257*3 + 4 - : 1.05*size + 257*257*3 + 4 + 257*3+4) + - ((order & RANS_ORDER_PACK) ? 1 : 0) + - ((order & RANS_ORDER_RLE) ? 1 + 257*3+4: 0) + 20 + - ((order & RANS_ORDER_X32) ? (32-4)*4 : 0) + - ((order & RANS_ORDER_STRIPE) ? 7 + 5*N: 0); - return sz + (sz&1) + 2; // make this even so buffers are word aligned -} - -// Compresses in_size bytes from 'in' to *out_size bytes in 'out'. -// -// NB: The output buffer does not hold the original size, so it is up to -// the caller to store this. -unsigned char *rans_compress_O0_4x16(unsigned char *in, unsigned int in_size, - unsigned char *out, unsigned int *out_size) { - unsigned char *cp, *out_end; - RansEncSymbol syms[256]; - RansState rans0; - RansState rans2; - RansState rans1; - RansState rans3; - uint8_t* ptr; - uint32_t F[256+MAGIC] = {0}; - int i, j, tab_size = 0, rle, x; - // -20 for order/size/meta - uint32_t bound = rans_compress_bound_4x16(in_size,0)-20; - - if (!out) { - *out_size = bound; - out = malloc(*out_size); - } - if (!out || bound > *out_size) - return NULL; - - // If "out" isn't word aligned, tweak out_end/ptr to ensure it is. - // We already added more round in bound to allow for this. - if (((size_t)out)&1) - bound--; - ptr = out_end = out + bound; - - if (in_size == 0) - goto empty; - - // Compute statistics - if (hist8(in, in_size, F) < 0) - return NULL; - - // Normalise so frequences sum to power of 2 - uint32_t fsum = in_size; - uint32_t max_val = round2(fsum); - if (max_val > TOTFREQ) - max_val = TOTFREQ; - - if (normalise_freq(F, fsum, max_val) < 0) - return NULL; - fsum=max_val; - - cp = out; - cp += encode_freq(cp, F); - tab_size = cp-out; - //write(2, out+4, cp-(out+4)); - - if (normalise_freq(F, fsum, TOTFREQ) < 0) - return NULL; - - // Encode statistics. - for (x = rle = j = 0; j < 256; j++) { - if (F[j]) { - RansEncSymbolInit(&syms[j], x, F[j], TF_SHIFT); - x += F[j]; - } - } - - RansEncInit(&rans0); - RansEncInit(&rans1); - RansEncInit(&rans2); - RansEncInit(&rans3); - - switch (i=(in_size&3)) { - case 3: RansEncPutSymbol(&rans2, &ptr, &syms[in[in_size-(i-2)]]); - case 2: RansEncPutSymbol(&rans1, &ptr, &syms[in[in_size-(i-1)]]); - case 1: RansEncPutSymbol(&rans0, &ptr, &syms[in[in_size-(i-0)]]); - case 0: - break; - } - for (i=(in_size &~3); i>0; i-=4) { - RansEncSymbol *s3 = &syms[in[i-1]]; - RansEncSymbol *s2 = &syms[in[i-2]]; - RansEncSymbol *s1 = &syms[in[i-3]]; - RansEncSymbol *s0 = &syms[in[i-4]]; - -#if 1 - RansEncPutSymbol(&rans3, &ptr, s3); - RansEncPutSymbol(&rans2, &ptr, s2); - RansEncPutSymbol(&rans1, &ptr, s1); - RansEncPutSymbol(&rans0, &ptr, s0); -#else - // Slightly beter on gcc, much better on clang - uint16_t *ptr16 = (uint16_t *)ptr; - - if (rans3 >= s3->x_max) *--ptr16 = (uint16_t)rans3, rans3 >>= 16; - if (rans2 >= s2->x_max) *--ptr16 = (uint16_t)rans2, rans2 >>= 16; - uint32_t q3 = (uint32_t) (((uint64_t)rans3 * s3->rcp_freq) >> s3->rcp_shift); - uint32_t q2 = (uint32_t) (((uint64_t)rans2 * s2->rcp_freq) >> s2->rcp_shift); - rans3 += s3->bias + q3 * s3->cmpl_freq; - rans2 += s2->bias + q2 * s2->cmpl_freq; - - if (rans1 >= s1->x_max) *--ptr16 = (uint16_t)rans1, rans1 >>= 16; - if (rans0 >= s0->x_max) *--ptr16 = (uint16_t)rans0, rans0 >>= 16; - uint32_t q1 = (uint32_t) (((uint64_t)rans1 * s1->rcp_freq) >> s1->rcp_shift); - uint32_t q0 = (uint32_t) (((uint64_t)rans0 * s0->rcp_freq) >> s0->rcp_shift); - rans1 += s1->bias + q1 * s1->cmpl_freq; - rans0 += s0->bias + q0 * s0->cmpl_freq; - - ptr = (uint8_t *)ptr16; -#endif - } - - RansEncFlush(&rans3, &ptr); - RansEncFlush(&rans2, &ptr); - RansEncFlush(&rans1, &ptr); - RansEncFlush(&rans0, &ptr); - - empty: - // Finalise block size and return it - *out_size = (out_end - ptr) + tab_size; - - memmove(out + tab_size, ptr, out_end-ptr); - - return out; -} - -unsigned char *rans_uncompress_O0_4x16(unsigned char *in, unsigned int in_size, - unsigned char *out, unsigned int out_sz) { - if (in_size < 16) // 4-states at least - return NULL; - - if (out_sz >= INT_MAX) - return NULL; // protect against some overflow cases - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (out_sz > 100000) - return NULL; -#endif - - /* Load in the static tables */ - unsigned char *cp = in, *out_free = NULL; - unsigned char *cp_end = in + in_size - 8; // within 8 => be extra safe - int i, j; - unsigned int x, y; - uint16_t sfreq[TOTFREQ+32]; - uint16_t sbase[TOTFREQ+32]; // faster to use 32-bit on clang - uint8_t ssym [TOTFREQ+64]; // faster to use 16-bit on clang - - if (!out) - out_free = out = malloc(out_sz); - if (!out) - return NULL; - - // Precompute reverse lookup of frequency. - uint32_t F[256] = {0}, fsum; - int fsz = decode_freq(cp, cp_end, F, &fsum); - if (!fsz) - goto err; - cp += fsz; - - normalise_freq_shift(F, fsum, TOTFREQ); - - // Build symbols; fixme, do as part of decode, see the _d variant - for (j = x = 0; j < 256; j++) { - if (F[j]) { - if (F[j] > TOTFREQ - x) - goto err; - for (y = 0; y < F[j]; y++) { - ssym [y + x] = j; - sfreq[y + x] = F[j]; - sbase[y + x] = y; - } - x += F[j]; - } - } - - if (x != TOTFREQ) - goto err; - - if (cp+16 > cp_end+8) - goto err; - - RansState R[4]; - RansDecInit(&R[0], &cp); if (R[0] < RANS_BYTE_L) goto err; - RansDecInit(&R[1], &cp); if (R[1] < RANS_BYTE_L) goto err; - RansDecInit(&R[2], &cp); if (R[2] < RANS_BYTE_L) goto err; - RansDecInit(&R[3], &cp); if (R[3] < RANS_BYTE_L) goto err; - -// Simple version is comparable to below, but only with -O3 -// -// for (i = 0; cp < cp_end-8 && i < (out_sz&~7); i+=8) { -// for(j=0; j<8;j++) { -// RansState m = RansDecGet(&R[j%4], TF_SHIFT); -// R[j%4] = sfreq[m] * (R[j%4] >> TF_SHIFT) + sbase[m]; -// out[i+j] = ssym[m]; -// RansDecRenorm(&R[j%4], &cp); -// } -// } - - for (i = 0; cp < cp_end-8 && i < (out_sz&~7); i+=8) { - for (j = 0; j < 8; j+=4) { - RansState m0 = RansDecGet(&R[0], TF_SHIFT); - RansState m1 = RansDecGet(&R[1], TF_SHIFT); - out[i+j+0] = ssym[m0]; - out[i+j+1] = ssym[m1]; - - R[0] = sfreq[m0] * (R[0] >> TF_SHIFT) + sbase[m0]; - R[1] = sfreq[m1] * (R[1] >> TF_SHIFT) + sbase[m1]; - - RansState m2 = RansDecGet(&R[2], TF_SHIFT); - RansState m3 = RansDecGet(&R[3], TF_SHIFT); - - RansDecRenorm(&R[0], &cp); - RansDecRenorm(&R[1], &cp); - - R[2] = sfreq[m2] * (R[2] >> TF_SHIFT) + sbase[m2]; - R[3] = sfreq[m3] * (R[3] >> TF_SHIFT) + sbase[m3]; - - RansDecRenorm(&R[2], &cp); - RansDecRenorm(&R[3], &cp); - - out[i+j+2] = ssym[m2]; - out[i+j+3] = ssym[m3]; - } - } - - // remainder - for (; i < out_sz; i++) { - RansState m = RansDecGet(&R[i%4], TF_SHIFT); - R[i%4] = sfreq[m] * (R[i%4] >> TF_SHIFT) + sbase[m]; - out[i] = ssym[m]; - RansDecRenormSafe(&R[i%4], &cp, cp_end+8); - } - - //fprintf(stderr, " 0 Decoded %d bytes\n", (int)(cp-in)); //c-size - - return out; - - err: - free(out_free); - return NULL; -} - -//----------------------------------------------------------------------------- - -// Compute the entropy of 12-bit vs 10-bit frequency tables. -// 10 bit means smaller memory footprint when decoding and -// more speed due to cache hits, but it *may* be a poor -// compression fit. -int rans_compute_shift(uint32_t *F0, uint32_t (*F)[256], uint32_t *T, - uint32_t *S) { - int i, j; - - double e10 = 0, e12 = 0; - int max_tot = 0; - for (i = 0; i < 256; i++) { - if (F0[i] == 0) - continue; - unsigned int max_val = round2(T[i]); - int ns = 0; -#define MAX(a,b) ((a)>(b)?(a):(b)) - - // Number of samples that get their freq bumped to 1 - int sm10 = 0, sm12 = 0; - for (j = 0; j < 256; j++) { - if (F[i][j] && max_val / F[i][j] > TOTFREQ_O1_FAST) - sm10++; - if (F[i][j] && max_val / F[i][j] > TOTFREQ_O1) - sm12++; - } - - double l10 = log(TOTFREQ_O1_FAST + sm10); - double l12 = log(TOTFREQ_O1 + sm12); - double T_slow = (double)TOTFREQ_O1/T[i]; - double T_fast = (double)TOTFREQ_O1_FAST/T[i]; - - for (j = 0; j < 256; j++) { - if (F[i][j]) { - ns++; - - e10 -= F[i][j] * (fast_log(MAX(F[i][j]*T_fast,1)) - l10); - e12 -= F[i][j] * (fast_log(MAX(F[i][j]*T_slow,1)) - l12); - - // Estimation of compressed symbol freq table too. - e10 += 1.3; - e12 += 4.7; - } - } - - // Order-1 frequencies often end up totalling under TOTFREQ. - // In this case it's smaller to output the real frequencies - // prior to normalisation and normalise after (with an extra - // normalisation step needed in the decoder too). - // - // Thus we normalise to a power of 2 only, store those, - // and renormalise later here (and in decoder) by bit-shift - // to get to the fixed size. - if (ns < 64 && max_val > 128) max_val /= 2; - if (max_val > 1024) max_val /= 2; - if (max_val > TOTFREQ_O1) max_val = TOTFREQ_O1; - S[i] = max_val; // scale to max this - if (max_tot < max_val) - max_tot = max_val; - } - int shift = e10/e12 < 1.01 || max_tot <= TOTFREQ_O1_FAST - ? TF_SHIFT_O1_FAST - : TF_SHIFT_O1; - -// fprintf(stderr, "e10/12 = %f %f %f, shift %d\n", -// e10/log(256), e12/log(256), e10/e12, shift); - - return shift; -} - -static -unsigned char *rans_compress_O1_4x16(unsigned char *in, unsigned int in_size, - unsigned char *out, unsigned int *out_size) { - unsigned char *cp, *out_end, *out_free = NULL; - unsigned int tab_size; - - // -20 for order/size/meta - uint32_t bound = rans_compress_bound_4x16(in_size,1)-20; - - if (!out) { - *out_size = bound; - out_free = out = malloc(*out_size); - } - if (!out || bound > *out_size) - return NULL; - - if (((size_t)out)&1) - bound--; - out_end = out + bound; - - RansEncSymbol (*syms)[256] = htscodecs_tls_alloc(256 * (sizeof(*syms))); - if (!syms) { - free(out_free); - return NULL; - } - - cp = out; - int shift = encode_freq1(in, in_size, 4, syms, &cp); - if (shift < 0) { - htscodecs_tls_free(syms); - return NULL; - } - tab_size = cp - out; - - RansState rans0, rans1, rans2, rans3; - RansEncInit(&rans0); - RansEncInit(&rans1); - RansEncInit(&rans2); - RansEncInit(&rans3); - - uint8_t* ptr = out_end; - - int isz4 = in_size>>2; - int i0 = 1*isz4-2; - int i1 = 2*isz4-2; - int i2 = 3*isz4-2; - int i3 = 4*isz4-2; - - unsigned char l0 = in[i0+1]; - unsigned char l1 = in[i1+1]; - unsigned char l2 = in[i2+1]; - unsigned char l3 = in[i3+1]; - - // Deal with the remainder - l3 = in[in_size-1]; - for (i3 = in_size-2; i3 > 4*isz4-2; i3--) { - unsigned char c3 = in[i3]; - RansEncPutSymbol(&rans3, &ptr, &syms[c3][l3]); - l3 = c3; - } - - for (; i0 >= 0; i0--, i1--, i2--, i3--) { - unsigned char c0, c1, c2, c3; - RansEncSymbol *s3 = &syms[c3 = in[i3]][l3]; - RansEncSymbol *s2 = &syms[c2 = in[i2]][l2]; - RansEncSymbol *s1 = &syms[c1 = in[i1]][l1]; - RansEncSymbol *s0 = &syms[c0 = in[i0]][l0]; - - RansEncPutSymbol(&rans3, &ptr, s3); - RansEncPutSymbol(&rans2, &ptr, s2); - RansEncPutSymbol(&rans1, &ptr, s1); - RansEncPutSymbol(&rans0, &ptr, s0); - - l0 = c0; - l1 = c1; - l2 = c2; - l3 = c3; - } - - RansEncPutSymbol(&rans3, &ptr, &syms[0][l3]); - RansEncPutSymbol(&rans2, &ptr, &syms[0][l2]); - RansEncPutSymbol(&rans1, &ptr, &syms[0][l1]); - RansEncPutSymbol(&rans0, &ptr, &syms[0][l0]); - - RansEncFlush(&rans3, &ptr); - RansEncFlush(&rans2, &ptr); - RansEncFlush(&rans1, &ptr); - RansEncFlush(&rans0, &ptr); - - *out_size = (out_end - ptr) + tab_size; - - cp = out; - memmove(out + tab_size, ptr, out_end-ptr); - - htscodecs_tls_free(syms); - return out; -} - -//#define MAGIC2 111 -#define MAGIC2 179 -//#define MAGIC2 0 - -static -unsigned char *rans_uncompress_O1_4x16(unsigned char *in, unsigned int in_size, - unsigned char *out, unsigned int out_sz) { - if (in_size < 16) // 4-states at least - return NULL; - - if (out_sz >= INT_MAX) - return NULL; // protect against some overflow cases - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (out_sz > 100000) - return NULL; -#endif - - /* Load in the static tables */ - unsigned char *cp = in, *cp_end = in+in_size, *out_free = NULL; - unsigned char *c_freq = NULL; - int i, j = -999; - unsigned int x; - - uint8_t *sfb_ = htscodecs_tls_alloc(256*(TOTFREQ_O1+MAGIC2)*sizeof(*sfb_)); - uint32_t (*s3)[TOTFREQ_O1_FAST] = (uint32_t (*)[TOTFREQ_O1_FAST])sfb_; - // reuse the same memory for the fast mode lookup, but this only works - // if we're on e.g. 12-bit freqs vs 10-bit freqs as needs 4x larger array. - //uint32_t s3[256][TOTFREQ_O1_FAST]; - - if (!sfb_) - return NULL; - fb_t (*fb)[256] = htscodecs_tls_alloc(256 * sizeof(*fb)); - if (!fb) - goto err; - uint8_t *sfb[256]; - if ((*cp >> 4) == TF_SHIFT_O1) { - for (i = 0; i < 256; i++) - sfb[i]= sfb_ + i*(TOTFREQ_O1+MAGIC2); - } else { - for (i = 0; i < 256; i++) - sfb[i]= sfb_ + i*(TOTFREQ_O1_FAST+MAGIC2); - } - - if (!out) - out_free = out = malloc(out_sz); - - if (!out) - goto err; - - //fprintf(stderr, "out_sz=%d\n", out_sz); - - // compressed header? If so uncompress it - unsigned char *tab_end = NULL; - unsigned char *c_freq_end = cp_end; - unsigned int shift = *cp >> 4; - if (*cp++ & 1) { - uint32_t u_freq_sz, c_freq_sz; - cp += var_get_u32(cp, cp_end, &u_freq_sz); - cp += var_get_u32(cp, cp_end, &c_freq_sz); - if (c_freq_sz > cp_end - cp) - goto err; - tab_end = cp + c_freq_sz; - if (!(c_freq = rans_uncompress_O0_4x16(cp, c_freq_sz, NULL, u_freq_sz))) - goto err; - cp = c_freq; - c_freq_end = c_freq + u_freq_sz; - } - - // Decode order-0 symbol list; avoids needing in order-1 tables - uint32_t F0[256] = {0}; - int fsz = decode_alphabet(cp, c_freq_end, F0); - if (!fsz) - goto err; - cp += fsz; - - if (cp >= c_freq_end) - goto err; - - const int s3_fast_on = in_size >= 100000; - - for (i = 0; i < 256; i++) { - if (F0[i] == 0) - continue; - - uint32_t F[256] = {0}, T = 0; - fsz = decode_freq_d(cp, c_freq_end, F0, F, &T); - if (!fsz) - goto err; - cp += fsz; - - if (!T) { - //fprintf(stderr, "No freq for F_%d\n", i); - continue; - } - - normalise_freq_shift(F, T, 1< (1< cp_end) - goto err; - - RansState rans0, rans1, rans2, rans3; - uint8_t *ptr = cp, *ptr_end = in + in_size - 8; - RansDecInit(&rans0, &ptr); if (rans0 < RANS_BYTE_L) goto err; - RansDecInit(&rans1, &ptr); if (rans1 < RANS_BYTE_L) goto err; - RansDecInit(&rans2, &ptr); if (rans2 < RANS_BYTE_L) goto err; - RansDecInit(&rans3, &ptr); if (rans3 < RANS_BYTE_L) goto err; - - unsigned int isz4 = out_sz>>2; - int l0 = 0, l1 = 0, l2 = 0, l3 = 0; - unsigned int i4[] = {0*isz4, 1*isz4, 2*isz4, 3*isz4}; - - RansState R[4]; - R[0] = rans0; - R[1] = rans1; - R[2] = rans2; - R[3] = rans3; - - // Around 15% faster to specialise for 10/12 than to have one - // loop with shift as a variable. - if (shift == TF_SHIFT_O1) { - // TF_SHIFT_O1 = 12 - - const uint32_t mask = ((1u << TF_SHIFT_O1)-1); - for (; i4[0] < isz4; i4[0]++, i4[1]++, i4[2]++, i4[3]++) { - uint16_t m, c; - c = sfb[l0][m = R[0] & mask]; - R[0] = fb[l0][c].f * (R[0]>>TF_SHIFT_O1) + m - fb[l0][c].b; - out[i4[0]] = l0 = c; - - c = sfb[l1][m = R[1] & mask]; - R[1] = fb[l1][c].f * (R[1]>>TF_SHIFT_O1) + m - fb[l1][c].b; - out[i4[1]] = l1 = c; - - c = sfb[l2][m = R[2] & mask]; - R[2] = fb[l2][c].f * (R[2]>>TF_SHIFT_O1) + m - fb[l2][c].b; - out[i4[2]] = l2 = c; - - c = sfb[l3][m = R[3] & mask]; - R[3] = fb[l3][c].f * (R[3]>>TF_SHIFT_O1) + m - fb[l3][c].b; - out[i4[3]] = l3 = c; - - if (ptr < ptr_end) { - RansDecRenorm(&R[0], &ptr); - RansDecRenorm(&R[1], &ptr); - RansDecRenorm(&R[2], &ptr); - RansDecRenorm(&R[3], &ptr); - } else { - RansDecRenormSafe(&R[0], &ptr, ptr_end+8); - RansDecRenormSafe(&R[1], &ptr, ptr_end+8); - RansDecRenormSafe(&R[2], &ptr, ptr_end+8); - RansDecRenormSafe(&R[3], &ptr, ptr_end+8); - } - } - - // Remainder - for (; i4[3] < out_sz; i4[3]++) { - uint32_t m3 = R[3] & ((1u<>TF_SHIFT_O1) + m3 - fb[l3][c3].b; - RansDecRenormSafe(&R[3], &ptr, ptr_end + 8); - l3 = c3; - } - } else if (!s3_fast_on) { - // TF_SHIFT_O1 = 10 with sfb[256][1024] & fb[256]256] array lookup - // Slightly faster for -o193 on q4 (high comp), but also less - // initialisation cost for smaller data - const uint32_t mask = ((1u << TF_SHIFT_O1_FAST)-1); - for (; i4[0] < isz4; i4[0]++, i4[1]++, i4[2]++, i4[3]++) { - uint16_t m, c; - c = sfb[l0][m = R[0] & mask]; - R[0] = fb[l0][c].f * (R[0]>>TF_SHIFT_O1_FAST) + m - fb[l0][c].b; - out[i4[0]] = l0 = c; - - c = sfb[l1][m = R[1] & mask]; - R[1] = fb[l1][c].f * (R[1]>>TF_SHIFT_O1_FAST) + m - fb[l1][c].b; - out[i4[1]] = l1 = c; - - c = sfb[l2][m = R[2] & mask]; - R[2] = fb[l2][c].f * (R[2]>>TF_SHIFT_O1_FAST) + m - fb[l2][c].b; - out[i4[2]] = l2 = c; - - c = sfb[l3][m = R[3] & mask]; - R[3] = fb[l3][c].f * (R[3]>>TF_SHIFT_O1_FAST) + m - fb[l3][c].b; - out[i4[3]] = l3 = c; - - if (ptr < ptr_end) { - RansDecRenorm(&R[0], &ptr); - RansDecRenorm(&R[1], &ptr); - RansDecRenorm(&R[2], &ptr); - RansDecRenorm(&R[3], &ptr); - } else { - RansDecRenormSafe(&R[0], &ptr, ptr_end+8); - RansDecRenormSafe(&R[1], &ptr, ptr_end+8); - RansDecRenormSafe(&R[2], &ptr, ptr_end+8); - RansDecRenormSafe(&R[3], &ptr, ptr_end+8); - } - } - - // Remainder - for (; i4[3] < out_sz; i4[3]++) { - uint32_t m3 = R[3] & ((1u<>TF_SHIFT_O1_FAST) + m3 - fb[l3][c3].b; - RansDecRenormSafe(&R[3], &ptr, ptr_end + 8); - l3 = c3; - } - } else { - // TF_SHIFT_O1_FAST. - // Significantly faster for -o1 on q40 (low comp). - // Higher initialisation cost, so only use if big blocks. - const uint32_t mask = ((1u << TF_SHIFT_O1_FAST)-1); - for (; i4[0] < isz4; i4[0]++, i4[1]++, i4[2]++, i4[3]++) { - uint32_t S0 = s3[l0][R[0] & mask]; - uint32_t S1 = s3[l1][R[1] & mask]; - l0 = out[i4[0]] = S0; - l1 = out[i4[1]] = S1; - uint16_t F0 = S0>>(TF_SHIFT_O1_FAST+8); - uint16_t F1 = S1>>(TF_SHIFT_O1_FAST+8); - uint16_t B0 = (S0>>8) & mask; - uint16_t B1 = (S1>>8) & mask; - - R[0] = F0 * (R[0]>>TF_SHIFT_O1_FAST) + B0; - R[1] = F1 * (R[1]>>TF_SHIFT_O1_FAST) + B1; - - uint32_t S2 = s3[l2][R[2] & mask]; - uint32_t S3 = s3[l3][R[3] & mask]; - l2 = out[i4[2]] = S2; - l3 = out[i4[3]] = S3; - uint16_t F2 = S2>>(TF_SHIFT_O1_FAST+8); - uint16_t F3 = S3>>(TF_SHIFT_O1_FAST+8); - uint16_t B2 = (S2>>8) & mask; - uint16_t B3 = (S3>>8) & mask; - - R[2] = F2 * (R[2]>>TF_SHIFT_O1_FAST) + B2; - R[3] = F3 * (R[3]>>TF_SHIFT_O1_FAST) + B3; - - if (ptr < ptr_end) { - RansDecRenorm(&R[0], &ptr); - RansDecRenorm(&R[1], &ptr); - RansDecRenorm(&R[2], &ptr); - RansDecRenorm(&R[3], &ptr); - } else { - RansDecRenormSafe(&R[0], &ptr, ptr_end+8); - RansDecRenormSafe(&R[1], &ptr, ptr_end+8); - RansDecRenormSafe(&R[2], &ptr, ptr_end+8); - RansDecRenormSafe(&R[3], &ptr, ptr_end+8); - } - } - - // Remainder - for (; i4[3] < out_sz; i4[3]++) { - uint32_t S = s3[l3][R[3] & ((1u<>(TF_SHIFT_O1_FAST+8)) * (R[3]>>TF_SHIFT_O1_FAST) - + ((S>>8) & ((1u< - -#if defined(__clang__) && defined(__has_attribute) -# if __has_attribute(unused) -# define UNUSED __attribute__((unused)) -# else -# define UNUSED -# endif -#elif defined(__GNUC__) && __GNUC__ >= 3 -# define UNUSED __attribute__((unused)) -#else -# define UNUSED -#endif - -// CPU detection is performed once. NB this has an assumption that we're -// not migrating between processes with different instruction stes, but -// to date the only systems I know of that support this don't have different -// capabilities (that we use) per core. -#ifndef NO_THREADS -static pthread_once_t rans_cpu_once = PTHREAD_ONCE_INIT; -#endif - -static int have_ssse3 UNUSED = 0; -static int have_sse4_1 UNUSED = 0; -static int have_popcnt UNUSED = 0; -static int have_avx2 UNUSED = 0; -static int have_avx512f UNUSED = 0; -static int is_amd UNUSED = 0; - -#define HAVE_HTSCODECS_TLS_CPU_INIT -static void htscodecs_tls_cpu_init(void) { - unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; - // These may be unused, depending on HAVE_* config.h macros - - int level = __get_cpuid_max(0, NULL); - __cpuid_count(0, 0, eax, ebx, ecx, edx); - is_amd = (ecx == 0x444d4163); - if (level >= 1) { - __cpuid_count(1, 0, eax, ebx, ecx, edx); -#if defined(bit_SSSE3) - have_ssse3 = ecx & bit_SSSE3; -#endif -#if defined(bit_POPCNT) - have_popcnt = ecx & bit_POPCNT; -#endif -#if defined(bit_SSE4_1) - have_sse4_1 = ecx & bit_SSE4_1; -#endif - } - if (level >= 7) { - __cpuid_count(7, 0, eax, ebx, ecx, edx); -#if defined(bit_AVX2) - have_avx2 = ebx & bit_AVX2; -#endif -#if defined(bit_AVX512F) - have_avx512f = ebx & bit_AVX512F; -#endif - } - - if (!have_popcnt) have_avx512f = have_avx2 = have_sse4_1 = 0; - if (!have_ssse3) have_sse4_1 = 0; -} - -static inline -unsigned char *(*rans_enc_func(int do_simd, int order)) - (unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int *out_size) { - - int have_e_sse4_1 = have_sse4_1; - int have_e_avx2 = have_avx2; - int have_e_avx512f = have_avx512f; - - if (!(rans_cpu & RANS_CPU_ENC_AVX512)) have_e_avx512f = 0; - if (!(rans_cpu & RANS_CPU_ENC_AVX2)) have_e_avx2 = 0; - if (!(rans_cpu & RANS_CPU_ENC_SSE4)) have_e_sse4_1 = 0; - - if (!do_simd) { // SIMD disabled - return order & 1 - ? rans_compress_O1_4x16 - : rans_compress_O0_4x16; - } - -#ifdef NO_THREADS - htscodecs_tls_cpu_init(); -#else - int err = pthread_once(&rans_cpu_once, htscodecs_tls_cpu_init); - if (err != 0) { - fprintf(stderr, "Initialising TLS data failed: pthread_once: %s\n", - strerror(err)); - fprintf(stderr, "Using scalar code only\n"); - } -#endif - - if (order & 1) { - // With simulated gathers, the AVX512 is now slower than AVX2, so - // we avoid using it unless asking for the real avx512 gather. - // Note for testing we do -c 0x0404 to enable AVX512 and disable AVX2. - // We then need to call the avx512 func regardless. - int use_gather; -#ifdef USE_GATHER - use_gather = 1; -#else - use_gather = !have_e_avx2; -#endif - -#if defined(HAVE_AVX512) - if (have_e_avx512f && (!is_amd || !have_e_avx2) && use_gather) - return rans_compress_O1_32x16_avx512; -#endif -#if defined(HAVE_AVX2) - if (have_e_avx2) - return rans_compress_O1_32x16_avx2; -#endif -#if defined(HAVE_SSE4_1) && defined(HAVE_SSSE3) && defined(HAVE_POPCNT) - if (have_e_sse4_1) - return rans_compress_O1_32x16; -#endif - return rans_compress_O1_32x16; - } else { -#if defined(HAVE_AVX512) - if (have_e_avx512f && (!is_amd || !have_e_avx2)) - return rans_compress_O0_32x16_avx512; -#endif -#if defined(HAVE_AVX2) - if (have_e_avx2) - return rans_compress_O0_32x16_avx2; -#endif -#if defined(HAVE_SSE4_1) && defined(HAVE_SSSE3) && defined(HAVE_POPCNT) - if (have_e_sse4_1) - return rans_compress_O0_32x16; -#endif - return rans_compress_O0_32x16; - } -} - -static inline -unsigned char *(*rans_dec_func(int do_simd, int order)) - (unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int out_size) { - - int have_d_sse4_1 = have_sse4_1; - int have_d_avx2 = have_avx2; - int have_d_avx512f = have_avx512f; - - if (!(rans_cpu & RANS_CPU_DEC_AVX512)) have_d_avx512f = 0; - if (!(rans_cpu & RANS_CPU_DEC_AVX2)) have_d_avx2 = 0; - if (!(rans_cpu & RANS_CPU_DEC_SSE4)) have_d_sse4_1 = 0; - - if (!do_simd) { // SIMD disabled - return order & 1 - ? rans_uncompress_O1_4x16 - : rans_uncompress_O0_4x16; - } - -#ifdef NO_THREADS - htscodecs_tls_cpu_init(); -#else - int err = pthread_once(&rans_cpu_once, htscodecs_tls_cpu_init); - if (err != 0) { - fprintf(stderr, "Initialising TLS data failed: pthread_once: %s\n", - strerror(err)); - fprintf(stderr, "Using scalar code only\n"); - } -#endif - - if (order & 1) { -#if defined(HAVE_AVX512) - if (have_d_avx512f) - return rans_uncompress_O1_32x16_avx512; -#endif -#if defined(HAVE_AVX2) - if (have_d_avx2) - return rans_uncompress_O1_32x16_avx2; -#endif -#if defined(HAVE_SSE4_1) && defined(HAVE_SSSE3) && defined(HAVE_POPCNT) - if (have_d_sse4_1) - return rans_uncompress_O1_32x16_sse4; -#endif - return rans_uncompress_O1_32x16; - } else { -#if defined(HAVE_AVX512) - if (have_d_avx512f) - return rans_uncompress_O0_32x16_avx512; -#endif -#if defined(HAVE_AVX2) - if (have_d_avx2) - return rans_uncompress_O0_32x16_avx2; -#endif -#if defined(HAVE_SSE4_1) && defined(HAVE_SSSE3) && defined(HAVE_POPCNT) - if (have_d_sse4_1) - return rans_uncompress_O0_32x16_sse4; -#endif - return rans_uncompress_O0_32x16; - } -} - -#elif defined(__ARM_NEON) && defined(__aarch64__) - -#if defined(__linux__) || defined(__FreeBSD__) -#include -#elif defined(_WIN32) -#include -#endif - -static inline int have_neon(void) { -#if defined(__linux__) && defined(__arm__) - return (getauxval(AT_HWCAP) & HWCAP_NEON) != 0; -#elif defined(__linux__) && defined(__aarch64__) && defined(HWCAP_ASIMD) - return (getauxval(AT_HWCAP) & HWCAP_ASIMD) != 0; -#elif defined(__APPLE__) - return 1; -#elif defined(__FreeBSD__) && defined(__arm__) - u_long cap; - if (elf_aux_info(AT_HWCAP, &cap, sizeof cap) != 0) return 0; - return (cap & HWCAP_NEON) != 0; -#elif defined(__FreeBSD__) && defined(__aarch64__) && defined(HWCAP_ASIMD) - u_long cap; - if (elf_aux_info(AT_HWCAP, &cap, sizeof cap) != 0) return 0; - return (cap & HWCAP_ASIMD) != 0; -#elif defined(_WIN32) - return IsProcessorFeaturePresent(PF_ARM_V8_INSTRUCTIONS_AVAILABLE) != 0; -#else - return 0; -#endif -} - -static inline -unsigned char *(*rans_enc_func(int do_simd, int order)) - (unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int *out_size) { - - if (do_simd) { - if ((rans_cpu & RANS_CPU_ENC_NEON) && have_neon()) - return order & 1 - ? rans_compress_O1_32x16_neon - : rans_compress_O0_32x16_neon; - else - return order & 1 - ? rans_compress_O1_32x16 - : rans_compress_O0_32x16; - } else { - return order & 1 - ? rans_compress_O1_4x16 - : rans_compress_O0_4x16; - } -} - -static inline -unsigned char *(*rans_dec_func(int do_simd, int order)) - (unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int out_size) { - - if (do_simd) { - if ((rans_cpu & RANS_CPU_DEC_NEON) && have_neon()) - return order & 1 - ? rans_uncompress_O1_32x16_neon - : rans_uncompress_O0_32x16_neon; - else - return order & 1 - ? rans_uncompress_O1_32x16 - : rans_uncompress_O0_32x16; - } else { - return order & 1 - ? rans_uncompress_O1_4x16 - : rans_uncompress_O0_4x16; - } -} - -#else // !(defined(__GNUC__) && defined(__x86_64__)) && !defined(__ARM_NEON) - -static inline -unsigned char *(*rans_enc_func(int do_simd, int order)) - (unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int *out_size) { - - if (do_simd) { - return order & 1 - ? rans_compress_O1_32x16 - : rans_compress_O0_32x16; - } else { - return order & 1 - ? rans_compress_O1_4x16 - : rans_compress_O0_4x16; - } -} - -static inline -unsigned char *(*rans_dec_func(int do_simd, int order)) - (unsigned char *in, - unsigned int in_size, - unsigned char *out, - unsigned int out_size) { - - if (do_simd) { - return order & 1 - ? rans_uncompress_O1_32x16 - : rans_uncompress_O0_32x16; - } else { - return order & 1 - ? rans_uncompress_O1_4x16 - : rans_uncompress_O0_4x16; - } -} - -#endif - -// Test interface for restricting the auto-detection methods so we -// can forcibly compare different implementations on the same machine. -// See RANS_CPU_ defines in rANS_static4x16.h -void rans_set_cpu(int opts) { - rans_cpu = opts; -#ifdef HAVE_HTSCODECS_TLS_CPU_INIT - htscodecs_tls_cpu_init(); -#endif -} - -/*----------------------------------------------------------------------------- - * Simple interface to the order-0 vs order-1 encoders and decoders. - * - * Smallest is method, , so worst case 2 bytes longer. - */ -unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, - unsigned char *out,unsigned int *out_size, - int order) { - if (in_size > INT_MAX) { - *out_size = 0; - return NULL; - } - - unsigned int c_meta_len; - uint8_t *meta = NULL, *rle = NULL, *packed = NULL; - uint8_t *out_free = NULL; - - if (!out) { - *out_size = rans_compress_bound_4x16(in_size, order); - if (*out_size == 0) - return NULL; - if (!(out_free = out = malloc(*out_size))) - return NULL; - } - - unsigned char *out_end = out + *out_size; - - // Permit 32-way unrolling for large blocks, paving the way for - // AVX2 and AVX512 SIMD variants. - if ((order & RANS_ORDER_SIMD_AUTO) && in_size >= 50000 - && !(order & RANS_ORDER_STRIPE)) - order |= X_32; - - if (in_size <= 20) - order &= ~RANS_ORDER_STRIPE; -#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (in_size <= 1000) - order &= ~RANS_ORDER_X32; -#endif - if (order & RANS_ORDER_STRIPE) { - int N = (order>>8) & 0xff; - if (N == 0) N = 4; // default for compatibility with old tests - - unsigned char *transposed = malloc(in_size); - unsigned int part_len[256]; - unsigned int idx[256]; - if (!transposed) { - free(out_free); - return NULL; - } - int i, j, x; - - for (i = 0; i < N; i++) { - part_len[i] = in_size / N + ((in_size % N) > i); - idx[i] = i ? idx[i-1] + part_len[i-1] : 0; // cumulative index - } - -#define KN 8 - i = x = 0; - if (in_size >= N*KN) { - for (; i < in_size-N*KN;) { - int k; - unsigned char *ink = in+i; - for (j = 0; j < N; j++) - for (k = 0; k < KN; k++) - transposed[idx[j]+x+k] = ink[j+N*k]; - x += KN; i+=N*KN; - } - } -#undef KN - for (; i < in_size-N; i += N, x++) { - for (j = 0; j < N; j++) - transposed[idx[j]+x] = in[i+j]; - } - - for (; i < in_size; i += N, x++) { - for (j = 0; i+j < in_size; j++) - transposed[idx[j]+x] = in[i+j]; - } - - unsigned int olen2; - unsigned char *out2, *out2_start; - c_meta_len = 1; - *out = order & ~RANS_ORDER_NOSZ; - c_meta_len += var_put_u32(out+c_meta_len, out_end, in_size); - out[c_meta_len++] = N; - - unsigned char *out_best = NULL; - unsigned int out_best_len = 0; - - out2_start = out2 = out+7+5*N; // shares a buffer with c_meta - for (i = 0; i < N; i++) { - // Brute force try all methods. - int j, m[] = {1,64,128,0}, best_j = 0, best_sz = in_size+10; - for (j = 0; j < sizeof(m)/sizeof(*m); j++) { - if ((order & m[j]) != m[j]) - continue; - - // order-1 *only*; bit check above cannot elide order-0 - if ((order & RANS_ORDER_STRIPE_NO0) && (m[j]&1) == 0) - continue; - olen2 = *out_size - (out2 - out); - rans_compress_to_4x16(transposed+idx[i], part_len[i], - out2, &olen2, - m[j] | RANS_ORDER_NOSZ - | (order&RANS_ORDER_X32)); - if (best_sz > olen2) { - best_sz = olen2; - best_j = j; - if (j < sizeof(m)/sizeof(*m) && olen2 > out_best_len) { - unsigned char *tmp = realloc(out_best, olen2); - if (!tmp) { - free(out_free); - return NULL; - } - out_best = tmp; - out_best_len = olen2; - } - - // Cache a copy of the best so far - memcpy(out_best, out2, olen2); - } - } - if (best_j < sizeof(m)/sizeof(*m)) { - // Copy the best compression to output buffer if not current - memcpy(out2, out_best, best_sz); - olen2 = best_sz; - } - - out2 += olen2; - c_meta_len += var_put_u32(out+c_meta_len, out_end, olen2); - } - if (out_best) - free(out_best); - - memmove(out+c_meta_len, out2_start, out2-out2_start); - free(transposed); - *out_size = c_meta_len + out2-out2_start; - return out; - } - - if (order & RANS_ORDER_CAT) { - out[0] = RANS_ORDER_CAT; - c_meta_len = 1; - c_meta_len += var_put_u32(&out[1], out_end, in_size); - if (in_size) - memcpy(out+c_meta_len, in, in_size); - *out_size = c_meta_len + in_size; - return out; - } - - int do_pack = order & RANS_ORDER_PACK; - int do_rle = order & RANS_ORDER_RLE; - int no_size = order & RANS_ORDER_NOSZ; - int do_simd = order & RANS_ORDER_X32; - - out[0] = order; - c_meta_len = 1; - - if (!no_size) - c_meta_len += var_put_u32(&out[1], out_end, in_size); - - order &= 3; - - // Format is compressed meta-data, compressed data. - // Meta-data can be empty, pack, rle lengths, or pack + rle lengths. - // Data is either the original data, bit-packed packed, rle literals or - // packed + rle literals. - - if (do_pack && in_size) { - // PACK 2, 4 or 8 symbols into one byte. - int pmeta_len; - uint64_t packed_len; - packed = hts_pack(in, in_size, out+c_meta_len, &pmeta_len, &packed_len); - if (!packed) { - out[0] &= ~RANS_ORDER_PACK; - do_pack = 0; - free(packed); - packed = NULL; - } else { - in = packed; - in_size = packed_len; - c_meta_len += pmeta_len; - - // Could derive this rather than storing verbatim. - // Orig size * 8/nbits (+1 if not multiple of 8/n) - int sz = var_put_u32(out+c_meta_len, out_end, in_size); - c_meta_len += sz; - *out_size -= sz; - } - } else if (do_pack) { - out[0] &= ~RANS_ORDER_PACK; - } - - if (do_rle && in_size) { - // RLE 'in' -> rle_length + rle_literals arrays - unsigned int rmeta_len, c_rmeta_len; - uint64_t rle_len; - c_rmeta_len = in_size+257; - if (!(meta = malloc(c_rmeta_len))) { - free(out_free); - return NULL; - } - - uint8_t rle_syms[256]; - int rle_nsyms = 0; - uint64_t rmeta_len64; - rle = hts_rle_encode(in, in_size, meta, &rmeta_len64, - rle_syms, &rle_nsyms, NULL, &rle_len); - memmove(meta+1+rle_nsyms, meta, rmeta_len64); - meta[0] = rle_nsyms; - memcpy(meta+1, rle_syms, rle_nsyms); - rmeta_len = rmeta_len64 + rle_nsyms+1; - - if (!rle || rle_len + rmeta_len >= .99*in_size) { - // Not worth the speed hit. - out[0] &= ~RANS_ORDER_RLE; - do_rle = 0; - free(rle); - rle = NULL; - } else { - // Compress lengths with O0 and literals with O0/O1 ("order" param) - int sz = var_put_u32(out+c_meta_len, out_end, rmeta_len*2), sz2; - sz += var_put_u32(out+c_meta_len+sz, out_end, rle_len); - c_rmeta_len = *out_size - (c_meta_len+sz+5); - rans_enc_func(do_simd, 0)(meta, rmeta_len, out+c_meta_len+sz+5, &c_rmeta_len); - if (c_rmeta_len < rmeta_len) { - sz2 = var_put_u32(out+c_meta_len+sz, out_end, c_rmeta_len); - memmove(out+c_meta_len+sz+sz2, out+c_meta_len+sz+5, c_rmeta_len); - } else { - // Uncompressed RLE meta-data as too small - sz = var_put_u32(out+c_meta_len, out_end, rmeta_len*2+1); - sz2 = var_put_u32(out+c_meta_len+sz, out_end, rle_len); - memcpy(out+c_meta_len+sz+sz2, meta, rmeta_len); - c_rmeta_len = rmeta_len; - } - - c_meta_len += sz + sz2 + c_rmeta_len; - - in = rle; - in_size = rle_len; - } - - free(meta); - } else if (do_rle) { - out[0] &= ~RANS_ORDER_RLE; - } - - *out_size -= c_meta_len; - if (order && in_size < 8) { - out[0] &= ~1; - order &= ~1; - } - - rans_enc_func(do_simd, order)(in, in_size, out+c_meta_len, out_size); - - if (*out_size >= in_size) { - out[0] &= ~3; - out[0] |= RANS_ORDER_CAT | no_size; - if (in_size) - memcpy(out+c_meta_len, in, in_size); - *out_size = in_size; - } - - free(rle); - free(packed); - - *out_size += c_meta_len; - - return out; -} - -unsigned char *rans_compress_4x16(unsigned char *in, unsigned int in_size, - unsigned int *out_size, int order) { - return rans_compress_to_4x16(in, in_size, NULL, out_size, order); -} - -unsigned char *rans_uncompress_to_4x16(unsigned char *in, unsigned int in_size, - unsigned char *out, unsigned int *out_size) { - unsigned char *in_end = in + in_size; - unsigned char *out_free = NULL, *tmp_free = NULL, *meta_free = NULL; - - if (in_size == 0) - return NULL; - - if (*in & RANS_ORDER_STRIPE) { - unsigned int ulen, olen, c_meta_len = 1; - int i; - uint64_t clen_tot = 0; - - // Decode lengths - c_meta_len += var_get_u32(in+c_meta_len, in_end, &ulen); - if (c_meta_len >= in_size) - return NULL; - unsigned int N = in[c_meta_len++]; - if (N < 1) // Must be at least one stripe - return NULL; - unsigned int clenN[256], ulenN[256], idxN[256]; - if (!out) { - if (ulen >= INT_MAX) - return NULL; -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (ulen > 100000) - return NULL; -#endif - if (!(out_free = out = malloc(ulen))) { - return NULL; - } - *out_size = ulen; - } - if (ulen != *out_size) { - free(out_free); - return NULL; - } - - for (i = 0; i < N; i++) { - ulenN[i] = ulen / N + ((ulen % N) > i); - idxN[i] = i ? idxN[i-1] + ulenN[i-1] : 0; - c_meta_len += var_get_u32(in+c_meta_len, in_end, &clenN[i]); - clen_tot += clenN[i]; - if (c_meta_len > in_size || clenN[i] > in_size || clenN[i] < 1) { - free(out_free); - return NULL; - } - } - - // We can call this with a larger buffer, but once we've determined - // how much we really use we limit it so the recursion becomes easier - // to limit. - if (c_meta_len + clen_tot > in_size) { - free(out_free); - return NULL; - } - in_size = c_meta_len + clen_tot; - - //fprintf(stderr, " stripe meta %d\n", c_meta_len); //c-size - - // Uncompress the N streams - unsigned char *outN = malloc(ulen); - if (!outN) { - free(out_free); - return NULL; - } - for (i = 0; i < N; i++) { - olen = ulenN[i]; - if (in_size < c_meta_len) { - free(out_free); - free(outN); - return NULL; - } - if (!rans_uncompress_to_4x16(in+c_meta_len, in_size-c_meta_len, outN + idxN[i], &olen) - || olen != ulenN[i]) { - free(out_free); - free(outN); - return NULL; - } - c_meta_len += clenN[i]; - } - - unstripe(out, outN, ulen, N, idxN); - - free(outN); - *out_size = ulen; - return out; - } - - int order = *in++; in_size--; - int do_pack = order & RANS_ORDER_PACK; - int do_rle = order & RANS_ORDER_RLE; - int do_cat = order & RANS_ORDER_CAT; - int no_size = order & RANS_ORDER_NOSZ; - int do_simd = order & RANS_ORDER_X32; - order &= 1; - - int sz = 0; - unsigned int osz; - if (!no_size) { - sz = var_get_u32(in, in_end, &osz); - } else - sz = 0, osz = *out_size; - in += sz; - in_size -= sz; - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (osz > 100000) - return NULL; -#endif - - if (no_size && !out) - goto err; // Need one or the other - - if (!out) { - *out_size = osz; - if (!(out = out_free = malloc(*out_size))) - return NULL; - } else { - if (*out_size < osz) - goto err; - *out_size = osz; - } - -// if (do_pack || do_rle) { -// in += sz; // size field not needed when pure rANS -// in_size -= sz; -// } - - uint32_t c_meta_size = 0; - unsigned int tmp1_size = *out_size; - unsigned int tmp2_size = *out_size; - unsigned int tmp3_size = *out_size; - unsigned char *tmp1 = NULL, *tmp2 = NULL, *tmp3 = NULL, *tmp = NULL; - - // Need In, Out and Tmp buffers with temporary buffer of the same size - // as output. All use rANS, but with optional transforms (none, RLE, - // Pack, or both). - // - // rans unrle unpack - // If none: in -> out - // If RLE: in -> tmp -> out - // If Pack: in -> tmp -> out - // If RLE+Pack: in -> out -> tmp -> out - // tmp1 tmp2 tmp3 - // - // So rans is in -> tmp1 - // RLE is tmp1 -> tmp2 - // Unpack is tmp2 -> tmp3 - - // Format is meta data (Pack and RLE in that order if present), - // followed by rANS compressed data. - - if (do_pack || do_rle) { - if (!(tmp = tmp_free = malloc(*out_size))) - goto err; - if (do_pack && do_rle) { - tmp1 = out; - tmp2 = tmp; - tmp3 = out; - } else if (do_pack) { - tmp1 = tmp; - tmp2 = tmp1; - tmp3 = out; - } else if (do_rle) { - tmp1 = tmp; - tmp2 = out; - tmp3 = out; - } - } else { - // neither - tmp = NULL; - tmp1 = out; - tmp2 = out; - tmp3 = out; - } - - // Decode the bit-packing map. - uint8_t map[16] = {0}; - int npacked_sym = 0; - uint64_t unpacked_sz = 0; // FIXME: rename to packed_per_byte - if (do_pack) { - c_meta_size = hts_unpack_meta(in, in_size, *out_size, map, &npacked_sym); - if (c_meta_size == 0) - goto err; - - unpacked_sz = osz; - in += c_meta_size; - in_size -= c_meta_size; - - // New unpacked size. We could derive this bit from *out_size - // and npacked_sym. - unsigned int osz; - sz = var_get_u32(in, in_end, &osz); - in += sz; - in_size -= sz; - if (osz > tmp1_size) - goto err; - tmp1_size = osz; - } - - uint8_t *meta = NULL; - uint32_t u_meta_size = 0; - if (do_rle) { - // Uncompress meta data - uint32_t c_meta_size, rle_len, sz; - sz = var_get_u32(in, in_end, &u_meta_size); - sz += var_get_u32(in+sz, in_end, &rle_len); - if (rle_len > tmp1_size) // should never grow - goto err; - if (u_meta_size & 1) { - meta = in + sz; - u_meta_size = u_meta_size/2 > (in_end-meta) ? (in_end-meta) : u_meta_size/2; - c_meta_size = u_meta_size; - } else { - sz += var_get_u32(in+sz, in_end, &c_meta_size); - u_meta_size /= 2; - - meta_free = meta = rans_dec_func(do_simd, 0)(in+sz, in_size-sz, NULL, u_meta_size); - if (!meta) - goto err; - } - if (c_meta_size+sz > in_size) - goto err; - in += c_meta_size+sz; - in_size -= c_meta_size+sz; - tmp1_size = rle_len; - } - //fprintf(stderr, " meta_size %d bytes\n", (int)(in - orig_in)); //c-size - - // uncompress RLE data. in -> tmp1 - if (in_size) { - if (do_cat) { - //fprintf(stderr, " CAT %d\n", tmp1_size); //c-size - if (tmp1_size > in_size) - goto err; - if (tmp1_size > *out_size) - goto err; - memcpy(tmp1, in, tmp1_size); - } else { - tmp1 = rans_dec_func(do_simd, order)(in, in_size, tmp1, tmp1_size); - if (!tmp1) - goto err; - } - } else { - tmp1_size = 0; - } - tmp2_size = tmp3_size = tmp1_size; - - if (do_rle) { - // Unpack RLE. tmp1 -> tmp2. - if (u_meta_size == 0) - goto err; - uint64_t unrle_size = *out_size; - int rle_nsyms = *meta ? *meta : 256; - if (u_meta_size < 1+rle_nsyms) - goto err; - if (!hts_rle_decode(tmp1, tmp1_size, - meta+1+rle_nsyms, u_meta_size-(1+rle_nsyms), - meta+1, rle_nsyms, tmp2, &unrle_size)) - goto err; - tmp3_size = tmp2_size = unrle_size; - free(meta_free); - meta_free = NULL; - } - if (do_pack) { - // Unpack bits via pack-map. tmp2 -> tmp3 - if (npacked_sym == 1) - unpacked_sz = tmp2_size; - //uint8_t *porig = unpack(tmp2, tmp2_size, unpacked_sz, npacked_sym, map); - //memcpy(tmp3, porig, unpacked_sz); - if (!hts_unpack(tmp2, tmp2_size, tmp3, unpacked_sz, npacked_sym, map)) - goto err; - tmp3_size = unpacked_sz; - } - - if (tmp) - free(tmp); - - *out_size = tmp3_size; - return tmp3; - - err: - free(meta_free); - free(out_free); - free(tmp_free); - return NULL; -} - -unsigned char *rans_uncompress_4x16(unsigned char *in, unsigned int in_size, - unsigned int *out_size) { - return rans_uncompress_to_4x16(in, in_size, NULL, out_size); -} diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/rANS_word.h b/src/htslib-1.19.1/htscodecs/htscodecs/rANS_word.h deleted file mode 100644 index db60b04..0000000 --- a/src/htslib-1.19.1/htscodecs/htscodecs/rANS_word.h +++ /dev/null @@ -1,478 +0,0 @@ -/* rans_byte.h originally from https://github.com/rygorous/ryg_rans - * - * This is a public-domain implementation of several rANS variants. rANS is an - * entropy coder from the ANS family, as described in Jarek Duda's paper - * "Asymmetric numeral systems" (http://arxiv.org/abs/1311.2540). - */ - -/*-------------------------------------------------------------------------- */ -/* rans_byte.h from https://github.com/rygorous/ryg_rans */ - -// Simple byte-aligned rANS encoder/decoder - public domain - Fabian 'ryg' Giesen 2014 -// -// Not intended to be "industrial strength"; just meant to illustrate the general -// idea. - -#ifndef RANS_WORD_HEADER -#define RANS_WORD_HEADER - -#include -#include -#include -#include -#include "htscodecs_endian.h" - -#ifdef assert -#define RansAssert assert -#else -#define RansAssert(x) -#endif - -// READ ME FIRST: -// -// This is designed like a typical arithmetic coder API, but there's three -// twists you absolutely should be aware of before you start hacking: -// -// 1. You need to encode data in *reverse* - last symbol first. rANS works -// like a stack: last in, first out. -// 2. Likewise, the encoder outputs bytes *in reverse* - that is, you give -// it a pointer to the *end* of your buffer (exclusive), and it will -// slowly move towards the beginning as more bytes are emitted. -// 3. Unlike basically any other entropy coder implementation you might -// have used, you can interleave data from multiple independent rANS -// encoders into the same bytestream without any extra signaling; -// you can also just write some bytes by yourself in the middle if -// you want to. This is in addition to the usual arithmetic encoder -// property of being able to switch models on the fly. Writing raw -// bytes can be useful when you have some data that you know is -// incompressible, and is cheaper than going through the rANS encode -// function. Using multiple rANS coders on the same byte stream wastes -// a few bytes compared to using just one, but execution of two -// independent encoders can happen in parallel on superscalar and -// Out-of-Order CPUs, so this can be *much* faster in tight decoding -// loops. -// -// This is why all the rANS functions take the write pointer as an -// argument instead of just storing it in some context struct. - -// -------------------------------------------------------------------------- - -// L ('l' in the paper) is the lower bound of our normalization interval. -// Between this and our byte-aligned emission, we use 31 (not 32!) bits. -// This is done intentionally because exact reciprocals for 31-bit uints -// fit in 32-bit uints: this permits some optimizations during encoding. -#define RANS_BYTE_L (1u << 15) // lower bound of our normalization interval - -// State for a rANS encoder. Yep, that's all there is to it. -typedef uint32_t RansState; - -// Initialize a rANS encoder. -static inline void RansEncInit(RansState* r) -{ - *r = RANS_BYTE_L; -} - -// Renormalize the encoder. Internal function. -static inline RansState RansEncRenorm(RansState x, uint8_t** pptr, uint32_t freq, uint32_t scale_bits) -{ - uint32_t x_max = ((RANS_BYTE_L >> scale_bits) << 16) * freq-1; // this turns into a shift. - if (x > x_max) { - uint16_t* ptr = (uint16_t *)*pptr; - *--ptr = (uint16_t) (x & 0xffff); - x >>= 16; - *pptr = (uint8_t *)ptr; - } - return x; -} - -// Encodes a single symbol with range start "start" and frequency "freq". -// All frequencies are assumed to sum to "1 << scale_bits", and the -// resulting bytes get written to ptr (which is updated). -// -// NOTE: With rANS, you need to encode symbols in *reverse order*, i.e. from -// beginning to end! Likewise, the output bytestream is written *backwards*: -// ptr starts pointing at the end of the output buffer and keeps decrementing. -static inline void RansEncPut(RansState* r, uint8_t** pptr, uint32_t start, uint32_t freq, uint32_t scale_bits) -{ - // renormalize - RansState x = RansEncRenorm(*r, pptr, freq, scale_bits); - - // x = C(s,x) - *r = ((x / freq) << scale_bits) + (x % freq) + start; -} - -// Flushes the rANS encoder. -static inline void RansEncFlush(RansState* r, uint8_t** pptr) -{ - uint32_t x = *r; - uint8_t* ptr = *pptr; - - ptr -= 4; - ptr[0] = (uint8_t) (x >> 0); - ptr[1] = (uint8_t) (x >> 8); - ptr[2] = (uint8_t) (x >> 16); - ptr[3] = (uint8_t) (x >> 24); - - *pptr = ptr; -} - -// Initializes a rANS decoder. -// Unlike the encoder, the decoder works forwards as you'd expect. -static inline void RansDecInit(RansState* r, uint8_t** pptr) -{ - uint32_t x; - uint8_t* ptr = *pptr; - - x = ptr[0] << 0; - x |= ptr[1] << 8; - x |= ptr[2] << 16; - x |= ((uint32_t)ptr[3]) << 24; - ptr += 4; - - *pptr = ptr; - *r = x; -} - -// Returns the current cumulative frequency (map it to a symbol yourself!) -static inline uint32_t RansDecGet(RansState* r, uint32_t scale_bits) -{ - return *r & ((1u << scale_bits) - 1); -} - -// Advances in the bit stream by "popping" a single symbol with range start -// "start" and frequency "freq". All frequencies are assumed to sum to "1 << scale_bits", -// and the resulting bytes get written to ptr (which is updated). -static inline void RansDecAdvance(RansState* r, uint8_t** pptr, uint32_t start, uint32_t freq, uint32_t scale_bits) -{ - uint32_t mask = (1u << scale_bits) - 1; - - // s, x = D(x) - uint32_t x = *r; - x = freq * (x >> scale_bits) + (x & mask) - start; - - // renormalize - if (x < RANS_BYTE_L) { - uint8_t* ptr = *pptr; - do x = (x << 8) | *ptr++; while (x < RANS_BYTE_L); - *pptr = ptr; - } - - *r = x; -} - -// -------------------------------------------------------------------------- - -// That's all you need for a full encoder; below here are some utility -// functions with extra convenience or optimizations. - -// Encoder symbol description -// This (admittedly odd) selection of parameters was chosen to make -// RansEncPutSymbol as cheap as possible. -typedef struct { - uint32_t x_max; // (Exclusive) upper bound of pre-normalization interval - uint32_t rcp_freq; // Fixed-point reciprocal frequency - uint32_t bias; // Bias - - // NB: This pair are read as a 32-bit value by the SIMD o1 encoder. - uint16_t cmpl_freq; // Complement of frequency: (1 << scale_bits) - freq - uint16_t rcp_shift; // Reciprocal shift -} RansEncSymbol; - -// As above, but with cmpl_freq and rcp_shift combined into -// a single value. This could be done with a cast, but it avoids -// a type punning error. We could use a union, but anonymous unions -// are C11 only (still that's 10 year old!). For now we just cheat -// instead. -typedef struct { - uint32_t x_max; // (Exclusive) upper bound of pre-normalization interval - uint32_t rcp_freq; // Fixed-point reciprocal frequency - uint32_t bias; // Bias - - uint32_t cmpl_freq; // cmpl_freq+rcp_shift -} RansEncSymbol_simd; - -// Decoder symbols are straightforward. -typedef struct { - uint16_t start; // Start of range. - uint16_t freq; // Symbol frequency. -} RansDecSymbol; - -// Initializes an encoder symbol to start "start" and frequency "freq" -static inline void RansEncSymbolInit(RansEncSymbol* s, uint32_t start, uint32_t freq, uint32_t scale_bits) -{ - RansAssert(scale_bits <= 16); - RansAssert(start <= (1u << scale_bits)); - RansAssert(freq <= (1u << scale_bits) - start); - - // Say M := 1 << scale_bits. - // - // The original encoder does: - // x_new = (x/freq)*M + start + (x%freq) - // - // The fast encoder does (schematically): - // q = mul_hi(x, rcp_freq) >> rcp_shift (division) - // r = x - q*freq (remainder) - // x_new = q*M + bias + r (new x) - // plugging in r into x_new yields: - // x_new = bias + x + q*(M - freq) - // =: bias + x + q*cmpl_freq (*) - // - // and we can just precompute cmpl_freq. Now we just need to - // set up our parameters such that the original encoder and - // the fast encoder agree. - - s->x_max = ((RANS_BYTE_L >> scale_bits) << 16) * freq -1; - s->cmpl_freq = (uint16_t) ((1 << scale_bits) - freq); - if (freq < 2) { - // freq=0 symbols are never valid to encode, so it doesn't matter what - // we set our values to. - // - // freq=1 is tricky, since the reciprocal of 1 is 1; unfortunately, - // our fixed-point reciprocal approximation can only multiply by values - // smaller than 1. - // - // So we use the "next best thing": rcp_freq=0xffffffff, rcp_shift=0. - // This gives: - // q = mul_hi(x, rcp_freq) >> rcp_shift - // = mul_hi(x, (1<<32) - 1)) >> 0 - // = floor(x - x/(2^32)) - // = x - 1 if 1 <= x < 2^32 - // and we know that x>0 (x=0 is never in a valid normalization interval). - // - // So we now need to choose the other parameters such that - // x_new = x*M + start - // plug it in: - // x*M + start (desired result) - // = bias + x + q*cmpl_freq (*) - // = bias + x + (x - 1)*(M - 1) (plug in q=x-1, cmpl_freq) - // = bias + 1 + (x - 1)*M - // = x*M + (bias + 1 - M) - // - // so we have start = bias + 1 - M, or equivalently - // bias = start + M - 1. - s->rcp_freq = ~0u; - s->rcp_shift = 0; - s->bias = start + (1 << scale_bits) - 1; - } else { - // Alverson, "Integer Division using reciprocals" - // shift=ceil(log2(freq)) - uint32_t shift = 0; - while (freq > (1u << shift)) - shift++; - - s->rcp_freq = (uint32_t) (((1ull << (shift + 31)) + freq-1) / freq); - s->rcp_shift = shift - 1; - - // With these values, 'q' is the correct quotient, so we - // have bias=start. - s->bias = start; - } - - s->rcp_shift += 32; // Avoid the extra >>32 in RansEncPutSymbol -} - -// Initialize a decoder symbol to start "start" and frequency "freq" -static inline void RansDecSymbolInit(RansDecSymbol* s, uint32_t start, uint32_t freq) -{ - RansAssert(start <= (1 << 16)); - RansAssert(freq <= (1 << 16) - start); - s->start = (uint16_t) start; - s->freq = (uint16_t) freq; -} - -// Encodes a given symbol. This is faster than straight RansEnc since we can do -// multiplications instead of a divide. -// -// See RansEncSymbolInit for a description of how this works. -static inline void RansEncPutSymbol(RansState* r, uint8_t** pptr, RansEncSymbol const* sym) -{ - //RansAssert(sym->x_max != 0); // can't encode symbol with freq=0 - - // renormalize - uint32_t x = *r; - uint32_t x_max = sym->x_max; - -#ifdef HTSCODECS_LITTLE_ENDIAN - // Branchless renorm. - // - // This works best on high entropy data where branch prediction - // is poor. - // - // Note the bit-packing and RLE modes are more likely to be used on - // low entropy data, making this assertion generally true. See - // RansEncPutSymbol_branched for a low-entropy optimised function. - - // NB: "(x > x_max)*2" turns back into branched code with gcc. - int c = (x > x_max); c*=2; - memcpy(*pptr-2, &x, 2); - x >>= c*8; - *pptr = *pptr - c; -#else - if (x > x_max) { - uint8_t* ptr = *pptr; - ptr -= 2; - ptr[0] = x & 0xff; - ptr[1] = (x >> 8) & 0xff; - x >>= 16; - *pptr = ptr; - } -#endif - - // x = C(s,x) - // NOTE: written this way so we get a 32-bit "multiply high" when - // available. If you're on a 64-bit platform with cheap multiplies - // (e.g. x64), just bake the +32 into rcp_shift. - //uint32_t q = (uint32_t) (((uint64_t)x * sym->rcp_freq) >> 32) >> sym->rcp_shift; - - // Slow method, but robust -// *r = ((x / sym->freq) << sym->scale_bits) + (x % sym->freq) + sym->start; -// return; - - // The extra >>32 has already been added to RansEncSymbolInit - uint32_t q = (uint32_t) (((uint64_t)x * sym->rcp_freq) >> sym->rcp_shift); - *r = x + sym->bias + q * sym->cmpl_freq; - -// assert(((x / sym->freq) << sym->scale_bits) + (x % sym->freq) + sym->start == *r); -} - -static inline void RansEncPutSymbol_branched(RansState* r, uint8_t** pptr, RansEncSymbol const* sym) -{ - //RansAssert(sym->x_max != 0); // can't encode symbol with freq=0 - - // renormalize - uint32_t x = *r; - uint32_t x_max = sym->x_max; - -#ifdef HTSCODECS_LITTLE_ENDIAN - // The old non-branchless method - if (x > x_max) { - (*pptr) -= 2; - memcpy(*pptr, &x, 2); - x >>= 16; - } -#else - if (x > x_max) { - uint8_t* ptr = *pptr; - ptr -= 2; - ptr[0] = x & 0xff; - ptr[1] = (x >> 8) & 0xff; - x >>= 16; - *pptr = ptr; - } -#endif - - // x = C(s,x) - // NOTE: written this way so we get a 32-bit "multiply high" when - // available. If you're on a 64-bit platform with cheap multiplies - // (e.g. x64), just bake the +32 into rcp_shift. - //uint32_t q = (uint32_t) (((uint64_t)x * sym->rcp_freq) >> 32) >> sym->rcp_shift; - - // Slow method, but robust -// *r = ((x / sym->freq) << sym->scale_bits) + (x % sym->freq) + sym->start; -// return; - - // The extra >>32 has already been added to RansEncSymbolInit - uint32_t q = (uint32_t) (((uint64_t)x * sym->rcp_freq) >> sym->rcp_shift); - *r = x + sym->bias + q * sym->cmpl_freq; - -// assert(((x / sym->freq) << sym->scale_bits) + (x % sym->freq) + sym->start == *r); -} - -// Equivalent to RansDecAdvance that takes a symbol. -static inline void RansDecAdvanceSymbol(RansState* r, uint8_t** pptr, RansDecSymbol const* sym, uint32_t scale_bits) -{ - RansDecAdvance(r, pptr, sym->start, sym->freq, scale_bits); -} - -// Advances in the bit stream by "popping" a single symbol with range start -// "start" and frequency "freq". All frequencies are assumed to sum to "1 << scale_bits". -// No renormalization or output happens. -static inline void RansDecAdvanceStep(RansState* r, uint32_t start, uint32_t freq, uint32_t scale_bits) -{ - uint32_t mask = (1u << scale_bits) - 1; - - // s, x = D(x) - uint32_t x = *r; - *r = freq * (x >> scale_bits) + (x & mask) - start; -} - -// Equivalent to RansDecAdvanceStep that takes a symbol. -static inline void RansDecAdvanceSymbolStep(RansState* r, RansDecSymbol const* sym, uint32_t scale_bits) -{ - RansDecAdvanceStep(r, sym->start, sym->freq, scale_bits); -} - -// Renormalize. - -#if defined(__x86_64) && !defined(__ILP32__) - -/* - * Assembly variants of the RansDecRenorm code. - * These are based on joint ideas from Rob Davies and from looking at - * the clang assembly output. - */ -static inline void RansDecRenorm(RansState* r, uint8_t** pptr) { - // q4 q40 - // clang 730/608 717/467 - // gcc8 733/588 737/458 - uint32_t x = *r; - uint8_t *ptr = *pptr; - __asm__ ("movzwl (%0), %%eax\n\t" - "mov %1, %%edx\n\t" - "shl $0x10, %%edx\n\t" - "or %%eax, %%edx\n\t" - "xor %%eax, %%eax\n\t" - "cmp $0x8000,%1\n\t" - "cmovb %%edx, %1\n\t" - "lea 2(%0), %%rax\n\t" - "cmovb %%rax, %0\n\t" - : "=r" (ptr), "=r" (x) - : "0" (ptr), "1" (x) - : "eax", "edx" - ); - *pptr = (uint8_t *)ptr; - *r = x; -} - -#else /* __x86_64 */ - -static inline void RansDecRenorm(RansState* r, uint8_t** pptr) -{ - // renormalize, branchless - uint32_t x = *r; - int cmp = (x < RANS_BYTE_L)*2; - uint32_t y = (*pptr)[0] + ((*pptr)[1]<<8); - uint32_t x2 = (x << 16) | y; - x = cmp ? x2 : x; - (*pptr) += cmp; - *r = x; - -// // renormalize, branched. Faster on low-complexity data, but generally -// // that is best compressed with PACK and/or RLE which turns it back -// // into high complexity data. -// uint32_t x = *r; -// uint32_t y = (*pptr)[0] | ((*pptr)[1]<<8); -// -// if (x < RANS_BYTE_L) -// (*pptr)+=2; -// if (x < RANS_BYTE_L) -// x = (x << 16) | y; -// -// *r = x; -} -#endif /* __x86_64 */ - -// Note the data may not be word aligned here. -// This function is only used sparingly, for the last few bytes in the buffer, -// so speed isn't critical. -static inline void RansDecRenormSafe(RansState* r, uint8_t** pptr, uint8_t *ptr_end) -{ - uint32_t x = *r; - if (x >= RANS_BYTE_L || *pptr+1 >= ptr_end) return; - uint16_t y = (*pptr)[0] + ((*pptr)[1]<<8); - x = (x << 16) | y; - (*pptr) += 2; - *r = x; -} - -#endif // RANS_WORD_HEADER diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/rle.c b/src/htslib-1.19.1/htscodecs/htscodecs/rle.c deleted file mode 100644 index 863dde2..0000000 --- a/src/htslib-1.19.1/htscodecs/htscodecs/rle.c +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Copyright (c) 2019-2021 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include -#include -#include -#include - -#include "varint.h" -#include "rle.h" - -#define MAGIC 8 - -//----------------------------------------------------------------------------- -// Auto compute rle_syms / rle_nsyms -static void rle_find_syms(uint8_t *data, uint64_t data_len, - int64_t *saved, // dim >= 256 - uint8_t *rle_syms, int *rle_nsyms) { - int last = -1, n; - uint64_t i; - - if (data_len > 256) { - // 186/450 - // Interleaved buffers to avoid cache collisions - int64_t saved2[256+MAGIC] = {0}; - int64_t saved3[256+MAGIC] = {0}; - int64_t saved4[256+MAGIC] = {0}; - int64_t len4 = data_len&~3; - for (i = 0; i < len4; i+=4) { - int d1 = (data[i+0] == last) <<1; - int d2 = (data[i+1] == data[i+0])<<1; - int d3 = (data[i+2] == data[i+1])<<1; - int d4 = (data[i+3] == data[i+2])<<1; - last = data[i+3]; - saved [data[i+0]] += d1-1; - saved2[data[i+1]] += d2-1; - saved3[data[i+2]] += d3-1; - saved4[data[i+3]] += d4-1; - } - while (i < data_len) { - int d = (data[i] == last)<<1; - saved[data[i]] += d - 1; - last = data[i]; - i++; - } - for (i = 0; i < 256; i++) - saved[i] += saved2[i] + saved3[i] + saved4[i]; - } else { - // 163/391 - for (i = 0; i < data_len; i++) { - if (data[i] == last) { - saved[data[i]]++; - } else { - saved[data[i]]--; - last = data[i]; - } - } - } - - // Map back to a list - for (i = n = 0; i < 256; i++) { - if (saved[i] > 0) - rle_syms[n++] = i; - } - *rle_nsyms = n; -} - -uint8_t *hts_rle_encode(uint8_t *data, uint64_t data_len, - uint8_t *run, uint64_t *run_len, - uint8_t *rle_syms, int *rle_nsyms, - uint8_t *out, uint64_t *out_len) { - uint64_t i, j, k; - if (!out) - if (!(out = malloc(data_len*2))) - return NULL; - - // Two pass: Firstly compute which symbols are worth using RLE on. - int64_t saved[256+MAGIC] = {0}; - - if (*rle_nsyms) { - for (i = 0; i < *rle_nsyms; i++) - saved[rle_syms[i]] = 1; - } else { - // Writes back to rle_syms and rle_nsyms - rle_find_syms(data, data_len, saved, rle_syms, rle_nsyms); - } - - // 2nd pass: perform RLE itself to out[] and run[] arrays. - for (i = j = k = 0; i < data_len; i++) { - out[k++] = data[i]; - if (saved[data[i]] > 0) { - int rlen = i; - int last = data[i]; - while (i < data_len && data[i] == last) - i++; - i--; - rlen = i-rlen; - - j += var_put_u32(&run[j], NULL, rlen); - } - } - - *run_len = j; - *out_len = k; - return out; -} - -// On input *out_len holds the allocated size of out[]. -// On output it holds the used size of out[]. -uint8_t *hts_rle_decode(uint8_t *lit, uint64_t lit_len, - uint8_t *run, uint64_t run_len, - uint8_t *rle_syms, int rle_nsyms, - uint8_t *out, uint64_t *out_len) { - uint64_t j; - uint8_t *run_end = run + run_len; - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (*out_len > 100000) - return NULL; -#endif - - int saved[256] = {0}; - for (j = 0; j < rle_nsyms; j++) - saved[rle_syms[j]] = 1; - - uint8_t *lit_end = lit + lit_len; - uint8_t *out_end = out + *out_len; - uint8_t *outp = out; - - while (lit < lit_end) { - if (outp >= out_end) - goto err; - - uint8_t b = *lit; - if (saved[b]) { - uint32_t rlen; - run += var_get_u32(run, run_end, &rlen); - if (rlen) { - if (outp + rlen >= out_end) - goto err; - memset(outp, b, rlen+1); - outp += rlen+1; - } else { - *outp++ = b; - } - } else { - *outp++ = b; - } - lit++; - } - - *out_len = outp-out; - return out; - - err: - return NULL; -} - -// Deprecated interface; to remove when we next to an ABI breakage -uint8_t *rle_encode(uint8_t *data, uint64_t data_len, - uint8_t *run, uint64_t *run_len, - uint8_t *rle_syms, int *rle_nsyms, - uint8_t *out, uint64_t *out_len) { - return hts_rle_encode(data, data_len, run, run_len, - rle_syms, rle_nsyms, out, out_len); -} - -// Deprecated interface; to remove when we next to an ABI breakage -uint8_t *rle_decode(uint8_t *lit, uint64_t lit_len, - uint8_t *run, uint64_t run_len, - uint8_t *rle_syms, int rle_nsyms, - uint8_t *out, uint64_t *out_len) { - return hts_rle_decode(lit, lit_len, run, run_len, - rle_syms, rle_nsyms, out, out_len); -} diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/rle.h b/src/htslib-1.19.1/htscodecs/htscodecs/rle.h deleted file mode 100644 index b2f0671..0000000 --- a/src/htslib-1.19.1/htscodecs/htscodecs/rle.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2019 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef HTS_RLE_H -#define HTS_RLE_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Performs run length encoding of a byte stream, turning it into a - * list of lengths and a list of literals. - * - * The method used is a bit different to traditional run length - * encoding. It always outputs run-lengths for symbols in the - * 'rle_syms' list (even if that length is +0 more), and never outputs - * lengths for symbols not in that list. - * - * "run" should be preallocated to be large enough; - * e.g at least data_len bytes long as a worse case. - * "rle_syms" should be allocated to be at least 256 bytes. - * - * If *rle_nsyms is zero this function will survey the input data - * first to choose symbols automatically, writing back to rle_syms and - * rle_nsyms. - * - * The "out" buffer may be passed in as NULL in which case it is - * allocated and returned (and is up to the caller to free). - * Otherwise if specified as non-NULL it will be written to, but - * it is up to the caller to ensure the buffer size is large enough. - * A worst case scenario is 2*data_len. - * - * Returns the literal buffer on success with new length in out_len, - * also fills out run buffer and run_len, and potentially - * updates rle_syms / rle_nsyms too. - * Returns NULL of failure - */ -uint8_t *hts_rle_encode(uint8_t *data, uint64_t data_len, - uint8_t *run, uint64_t *run_len, - uint8_t *rle_syms, int *rle_nsyms, - uint8_t *out, uint64_t *out_len); - -/* - * Expands a run lengthed data steam from a pair of literal and - * run-length buffers. - * - * On input *out_len holds the length of the supplied out - * buffer. On exit, it holds the used portion of this buffer. - * - * Returns uncompressed data (out) on success, - * NULL on failure. - */ -uint8_t *hts_rle_decode(uint8_t *lit, uint64_t lit_len, - uint8_t *run, uint64_t run_len, - uint8_t *rle_syms, int rle_nsyms, - uint8_t *out, uint64_t *out_len); - -// TODO: Add rle scanning func to compute rle_syms. - -#ifdef __cplusplus -} -#endif - -#endif /* HTS_RLE_H */ diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/tokenise_name3.c b/src/htslib-1.19.1/htscodecs/htscodecs/tokenise_name3.c deleted file mode 100644 index b57ccd1..0000000 --- a/src/htslib-1.19.1/htscodecs/htscodecs/tokenise_name3.c +++ /dev/null @@ -1,1818 +0,0 @@ -/* - * Copyright (c) 2016-2022 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -// cc -O3 -g -DTEST_TOKENISER tokenise_name3.c arith_dynamic.c rANS_static4x16pr.c pooled_alloc.c -I.. -I. -lbz2 -pthread - -// Name tokeniser. -// It generates a series of byte streams (per token) and compresses these -// either using static rANS or dynamic arithmetic coding. Arith coding is -// typically 1-5% smaller, but around 50-100% slower. We only envisage it -// being used at the higher compression levels. - -// TODO -// -// - Is it better when encoding 1, 2, 3, 3, 4, 5, 5, 6, 7, 9, 9, 10 to encode -// this as a mixture of MATCH and DELTA ops, or as entirely as DELTA ops -// with some delta values being zero? I suspect the latter, but it is -// not implemented here. See "last_token_delta" comments in code. -// -// - Consider variable size string implementations. -// Pascal style strings (length + str), -// C style strings (nul terminated), -// Or split blocks: length block and string contents block. -// -// - Is this one token-block or many serialised token-blocks? -// A) Lots of different models but feeding one bit-buffer emitted to -// by the entropy encoder => one block (fqzcomp). -// B) Lots of different models each feeding their own bit-buffers -// => many blocks. -// -// - multiple integer types depending on size; 1, 2, 4 byte long. -// -// - Consider token choice for isalnum instead of isalpha. Sometimes better. -// -// - Consider token synchronisation (eg on matching chr symbols?) incase of -// variable number. Eg consider foo:0999, foo:1000, foo:1001 (the leading -// zero adds an extra token). -// -// - Optimisation of tokens. Eg: -// HS25_09827:2:2102:11274:80442#49 -// HS25_09827:2:2109:12941:31311#49 -// -// We'll have tokens for HS 25 _ 09827 : 2 : that are entirely -// after the initial token. These 7 tokens could be one ALPHA instead -// of 7 distinct tokens, with 1 MATCH instead of 7. This is both a speed -// improvement for decoding as well as a space saving (fewer token-blocks -// and associated overhead). -// -// - XOR. Like ALPHA, but used when previous symbol is ALPHA or XOR -// and string lengths match. Useful when names are similar, eg: -// the sequence in 07.names: -// -// @VP2-06:112:H7LNDMCVY:1:1105:26919:1172 1:N:0:ATTCAGAA+AGGAGAAG -// @VP2-06:112:H7LNDMCVY:1:1105:27100:1172 1:N:0:ATTCAGAA+AGGCGAAG -// @VP2-06:112:H7LNDMCVY:1:1105:27172:1172 1:N:0:ATTCAGAA+AGGCTAAG - -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "pooled_alloc.h" -#include "arith_dynamic.h" -#include "rANS_static4x16.h" -#include "tokenise_name3.h" -#include "varint.h" -#include "utils.h" - -// 128 is insufficient for SAM names (max 256 bytes) as -// we may alternate a0a0a0a0a0 etc. However if we fail, -// we just give up and switch to another codec, so this -// isn't a serious limit. Maybe up to 256 to permit all -// SAM names? -#define MAX_TOKENS 128 -#define MAX_TBLOCKS (MAX_TOKENS<<4) - -// Number of names per block -#define MAX_NAMES 1000000 - -enum name_type {N_ERR = -1, N_TYPE = 0, N_ALPHA, N_CHAR, N_DIGITS0, N_DZLEN, N_DUP, N_DIFF, - N_DIGITS, N_DDELTA, N_DDELTA0, N_MATCH, N_NOP, N_END, N_ALL}; - -typedef struct trie { - struct trie *next, *sibling; - int count; - uint32_t c:8; - uint32_t n:24; // Nth line -} trie_t; - -typedef struct { - enum name_type token_type; - int token_int; - int token_str; -} last_context_tok; - -typedef struct { - char *last_name; - int last_ntok; - last_context_tok *last; // [last_ntok] -} last_context; - -typedef struct { - uint8_t *buf; - size_t buf_a, buf_l; // alloc and used length. - int tnum, ttype; - int dup_from; -} descriptor; - -typedef struct { - last_context *lc; - - // For finding entire line dups - int counter; - - // Trie used in encoder only - trie_t *t_head; - pool_alloc_t *pool; - - // token blocks - descriptor desc[MAX_TBLOCKS]; - - // summary stats per token - int token_dcount[MAX_TOKENS]; - int token_icount[MAX_TOKENS]; - //int token_zcount[MAX_TOKENS]; - - int max_tok; // tracks which desc/[id]count elements have been initialised - int max_names; -} name_context; - -static name_context *create_context(int max_names) { - if (max_names <= 0) - return NULL; - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (max_names > 100000) - return NULL; -#endif - - // An arbitrary limit to prevent malformed data from consuming excessive - // amounts of memory. Consider upping this if we have genuine use cases - // for larger blocks. - if (max_names > 1e7) { - fprintf(stderr, "Name codec currently has a max of 10 million rec.\n"); - return NULL; - } - - name_context *ctx = htscodecs_tls_alloc(sizeof(*ctx) + - ++max_names*sizeof(*ctx->lc)); - if (!ctx) return NULL; - ctx->max_names = max_names; - - ctx->counter = 0; - ctx->t_head = NULL; - - ctx->lc = (last_context *)(((char *)ctx) + sizeof(*ctx)); - ctx->pool = NULL; - - memset(&ctx->desc[0], 0, 2*16 * sizeof(ctx->desc[0])); - memset(&ctx->token_dcount[0], 0, sizeof(int)); - memset(&ctx->token_icount[0], 0, sizeof(int)); - memset(&ctx->lc[0], 0, max_names*sizeof(ctx->lc[0])); - ctx->max_tok = 1; - - ctx->lc[0].last_ntok = 0; - - return ctx; -} - -static void free_context(name_context *ctx) { - if (!ctx) - return; - - if (ctx->t_head) - free(ctx->t_head); - if (ctx->pool) - pool_destroy(ctx->pool); - - int i; - for (i = 0; i < ctx->max_tok*16; i++) - free(ctx->desc[i].buf); - - for (i = 0; i < ctx->max_names; i++) - free(ctx->lc[i].last); - - htscodecs_tls_free(ctx); -} - -//----------------------------------------------------------------------------- -// Fast unsigned integer printing code. -// Returns number of bytes written. -static int append_uint32_fixed(char *cp, uint32_t i, uint8_t l) { - switch (l) { - case 9:*cp++ = i / 100000000 + '0', i %= 100000000; - case 8:*cp++ = i / 10000000 + '0', i %= 10000000; - case 7:*cp++ = i / 1000000 + '0', i %= 1000000; - case 6:*cp++ = i / 100000 + '0', i %= 100000; - case 5:*cp++ = i / 10000 + '0', i %= 10000; - case 4:*cp++ = i / 1000 + '0', i %= 1000; - case 3:*cp++ = i / 100 + '0', i %= 100; - case 2:*cp++ = i / 10 + '0', i %= 10; - case 1:*cp++ = i + '0'; - case 0:break; - } - return l; -} - -static int append_uint32_var(char *cp, uint32_t i) { - char *op = cp; - uint32_t j; - - //if (i < 10) goto b0; - if (i < 100) goto b1; - //if (i < 1000) goto b2; - if (i < 10000) goto b3; - //if (i < 100000) goto b4; - if (i < 1000000) goto b5; - //if (i < 10000000) goto b6; - if (i < 100000000) goto b7; - - if ((j = i / 1000000000)) {*cp++ = j + '0'; i -= j*1000000000; goto x8;} - if ((j = i / 100000000)) {*cp++ = j + '0'; i -= j*100000000; goto x7;} - b7:if ((j = i / 10000000)) {*cp++ = j + '0'; i -= j*10000000; goto x6;} - if ((j = i / 1000000)) {*cp++ = j + '0', i -= j*1000000; goto x5;} - b5:if ((j = i / 100000)) {*cp++ = j + '0', i -= j*100000; goto x4;} - if ((j = i / 10000)) {*cp++ = j + '0', i -= j*10000; goto x3;} - b3:if ((j = i / 1000)) {*cp++ = j + '0', i -= j*1000; goto x2;} - if ((j = i / 100)) {*cp++ = j + '0', i -= j*100; goto x1;} - b1:if ((j = i / 10)) {*cp++ = j + '0', i -= j*10; goto x0;} - if (i) *cp++ = i + '0'; - return cp-op; - - x8:*cp++ = i / 100000000 + '0', i %= 100000000; - x7:*cp++ = i / 10000000 + '0', i %= 10000000; - x6:*cp++ = i / 1000000 + '0', i %= 1000000; - x5:*cp++ = i / 100000 + '0', i %= 100000; - x4:*cp++ = i / 10000 + '0', i %= 10000; - x3:*cp++ = i / 1000 + '0', i %= 1000; - x2:*cp++ = i / 100 + '0', i %= 100; - x1:*cp++ = i / 10 + '0', i %= 10; - x0:*cp++ = i + '0'; - - return cp-op; -} - -//----------------------------------------------------------------------------- -// Example descriptor encoding and IO. -// -// Here we just append to a buffer so we can dump out the results. -// These could then be passed through a static entropy encoder that -// encodes the entire buffer. -// -// Alternatively an adaptive entropy encoder could be place inline -// here to encode as it goes using additional knowledge from the -// supplied context. - -// Ensure room for sz more bytes. -static int descriptor_grow(descriptor *fd, uint32_t sz) { - while (fd->buf_l + sz > fd->buf_a) { - size_t buf_a = fd->buf_a ? fd->buf_a*2 : 65536; - unsigned char *buf = realloc(fd->buf, buf_a); - if (!buf) - return -1; - fd->buf = buf; - fd->buf_a = buf_a; - } - - return 0; -} - -static int encode_token_type(name_context *ctx, int ntok, - enum name_type type) { - int id = ntok<<4; - - if (descriptor_grow(&ctx->desc[id], 1) < 0) return -1; - - ctx->desc[id].buf[ctx->desc[id].buf_l++] = type; - - return 0; -} - -static int encode_token_match(name_context *ctx, int ntok) { - return encode_token_type(ctx, ntok, N_MATCH); -} - -static int encode_token_end(name_context *ctx, int ntok) { - return encode_token_type(ctx, ntok, N_END); -} - -static enum name_type decode_token_type(name_context *ctx, int ntok) { - int id = ntok<<4; - if (ctx->desc[id].buf_l >= ctx->desc[id].buf_a) return -1; - return ctx->desc[id].buf[ctx->desc[id].buf_l++]; -} - -// int stored as 32-bit quantities -static int encode_token_int(name_context *ctx, int ntok, - enum name_type type, uint32_t val) { - int id = (ntok<<4) | type; - - if (encode_token_type(ctx, ntok, type) < 0) return -1; - if (descriptor_grow(&ctx->desc[id], 4) < 0) return -1; - - uint8_t *cp = &ctx->desc[id].buf[ctx->desc[id].buf_l]; - cp[0] = (val >> 0) & 0xff; - cp[1] = (val >> 8) & 0xff; - cp[2] = (val >> 16) & 0xff; - cp[3] = (val >> 24) & 0xff; - ctx->desc[id].buf_l += 4; - - return 0; -} - -// Return 0 on success, -1 on failure; -static int decode_token_int(name_context *ctx, int ntok, - enum name_type type, uint32_t *val) { - int id = (ntok<<4) | type; - - if (ctx->desc[id].buf_l + 4 > ctx->desc[id].buf_a) - return -1; - - uint8_t *cp = ctx->desc[id].buf + ctx->desc[id].buf_l; - *val = (cp[0]) + (cp[1]<<8) + (cp[2]<<16) + ((uint32_t)cp[3]<<24); - ctx->desc[id].buf_l += 4; - - return 0; -} - -// 8 bit integer quantity -static int encode_token_int1(name_context *ctx, int ntok, - enum name_type type, uint32_t val) { - int id = (ntok<<4) | type; - - if (encode_token_type(ctx, ntok, type) < 0) return -1; - if (descriptor_grow(&ctx->desc[id], 1) < 0) return -1; - - ctx->desc[id].buf[ctx->desc[id].buf_l++] = val; - - return 0; -} - -static int encode_token_int1_(name_context *ctx, int ntok, - enum name_type type, uint32_t val) { - int id = (ntok<<4) | type; - - if (descriptor_grow(&ctx->desc[id], 1) < 0) return -1; - - ctx->desc[id].buf[ctx->desc[id].buf_l++] = val; - - return 0; -} - -// Return 0 on success, -1 on failure; -static int decode_token_int1(name_context *ctx, int ntok, - enum name_type type, uint32_t *val) { - int id = (ntok<<4) | type; - - if (ctx->desc[id].buf_l >= ctx->desc[id].buf_a) - return -1; - *val = ctx->desc[id].buf[ctx->desc[id].buf_l++]; - - return 0; -} - - -// Basic C-string style for now. -// -// Maybe XOR with previous string as context? -// This permits partial match to be encoded efficiently. -static int encode_token_alpha(name_context *ctx, int ntok, - char *str, int len) { - int id = (ntok<<4) | N_ALPHA; - - if (encode_token_type(ctx, ntok, N_ALPHA) < 0) return -1; - if (descriptor_grow(&ctx->desc[id], len+1) < 0) return -1; - memcpy(&ctx->desc[id].buf[ctx->desc[id].buf_l], str, len); - ctx->desc[id].buf[ctx->desc[id].buf_l+len] = 0; - ctx->desc[id].buf_l += len+1; - - return 0; -} - -// FIXME: need limit on string length for security. -// Return length on success, -1 on failure; -static int decode_token_alpha(name_context *ctx, int ntok, char *str, int max_len) { - int id = (ntok<<4) | N_ALPHA; - char c; - int len = 0; - if (ctx->desc[id].buf_l >= ctx->desc[id].buf_a) - return -1; - do { - c = ctx->desc[id].buf[ctx->desc[id].buf_l++]; - str[len++] = c; - } while(c && len < max_len && ctx->desc[id].buf_l < ctx->desc[id].buf_a); - - return len-1; -} - -static int encode_token_char(name_context *ctx, int ntok, char c) { - int id = (ntok<<4) | N_CHAR; - - if (encode_token_type(ctx, ntok, N_CHAR) < 0) return -1; - if (descriptor_grow(&ctx->desc[id], 1) < 0) return -1; - ctx->desc[id].buf[ctx->desc[id].buf_l++] = c; - - return 0; -} - -// FIXME: need limit on string length for security -// Return length on success, -1 on failure; -static int decode_token_char(name_context *ctx, int ntok, char *str) { - int id = (ntok<<4) | N_CHAR; - - if (ctx->desc[id].buf_l >= ctx->desc[id].buf_a) - return -1; - *str = ctx->desc[id].buf[ctx->desc[id].buf_l++]; - - return 1; -} - - -// A duplicated name -static int encode_token_dup(name_context *ctx, uint32_t val) { - return encode_token_int(ctx, 0, N_DUP, val); -} - -// Which read to delta against -static int encode_token_diff(name_context *ctx, uint32_t val) { - return encode_token_int(ctx, 0, N_DIFF, val); -} - - -//----------------------------------------------------------------------------- -// Trie implementation for tracking common name prefixes. -static -int build_trie(name_context *ctx, char *data, size_t len, int n) { - int nlines = 0; - size_t i; - trie_t *t; - - if (!ctx->t_head) { - ctx->t_head = calloc(1, sizeof(*ctx->t_head)); - if (!ctx->t_head) - return -1; - } - - // Build our trie, also counting input lines - for (nlines = i = 0; i < len; i++, nlines++) { - t = ctx->t_head; - t->count++; - while (i < len && (unsigned char)data[i] > '\n') { - unsigned char c = data[i++]; - if (c & 0x80) - //fprintf(stderr, "8-bit ASCII is unsupported\n"); - return -1; - c &= 127; - - - trie_t *x = t->next, *l = NULL; - while (x && x->c != c) { - l = x; x = x->sibling; - } - if (!x) { - if (!ctx->pool) - ctx->pool = pool_create(sizeof(trie_t)); - if (!(x = (trie_t *)pool_alloc(ctx->pool))) - return -1; - memset(x, 0, sizeof(*x)); - if (!l) - x = t->next = x; - else - x = l->sibling = x; - x->n = n; - x->c = c; - } - t = x; - t->c = c; - t->count++; - } - } - - return 0; -} - -#if 0 -void dump_trie(trie_t *t, int depth) { - if (depth == 0) { - printf("graph x_%p {\n splines = ortho\n ranksep=2\n", t); - printf(" p_%p [label=\"\"];\n", t); - dump_trie(t, 1); - printf("}\n"); - } else { - int j, k, count;//, cj; - char label[100], *cp; - trie_t *tp = t; - -// patricia: -// for (count = j = 0; j < 128; j++) -// if (t->next[j]) -// count++, cj=j; -// -// if (count == 1) { -// t = t->next[cj]; -// *cp++ = cj; -// goto patricia; -// } - - trie_t *x; - for (x = t->next; x; x = x->sibling) { - printf(" p_%p [label=\"%c\"];\n", x, x->c); - printf(" p_%p -- p_%p [label=\"%d\", penwidth=\"%f\"];\n", tp, x, x->count, MAX((log(x->count)-3)*2,1)); - //if (depth <= 11) - dump_trie(x, depth+1); - } - -#if 0 - for (j = 0; j < 128; j++) { - trie_t *tn; - - if (!t->next[j]) - continue; - - cp = label; - tn = t->next[j]; - *cp++ = j; -// patricia: - - for (count = k = 0; k < 128; k++) - if (tn->next[k]) - count++;//, cj=k; - -// if (count == 1) { -// tn = tn->next[cj]; -// *cp++ = cj; -// goto patricia; -// } - *cp++ = 0; - - printf(" p_%p [label=\"%s\"];\n", tn, label); - printf(" p_%p -- p_%p [label=\"%d\", penwidth=\"%f\"];\n", tp, tn, tn->count, MAX((log(tn->count)-3)*2,1)); - if (depth <= 11) - dump_trie(tn, depth+1); - } -#endif - } -} -#endif - -static -int search_trie(name_context *ctx, char *data, size_t len, int n, int *exact, int *is_fixed, int *fixed_len) { - int nlines = 0; - size_t i; - trie_t *t; - int from = -1, p3 = -1; - *exact = 0; - *fixed_len = 0; - *is_fixed = 0; - - // Horrid hack for the encoder only. - // We optimise per known name format here. - int prefix_len; - char *d = *data == '@' ? data+1 : data; - int l = *data == '@' ? len-1 : len; - int f = (*data == '>') ? 1 : 0; - if (l > 70 && d[f+0] == 'm' && d[7] == '_' && d[f+14] == '_' && d[f+61] == '/') { - prefix_len = 60; // PacBio - *is_fixed = 0; - } else if (l == 17 && d[f+5] == ':' && d[f+11] == ':') { - prefix_len = 6; // IonTorrent - *fixed_len = 6; - *is_fixed = 1; - } else if (l > 37 && d[f+8] == '-' && d[f+13] == '-' && d[f+18] == '-' && d[f+23] == '-' && - ((d[f+0] >= '0' && d[f+0] <='9') || (d[f+0] >= 'a' && d[f+0] <= 'f')) && - ((d[f+35] >= '0' && d[f+35] <='9') || (d[f+35] >= 'a' && d[f+35] <= 'f'))) { - // ONT: f33d30d5-6eb8-4115-8f46-154c2620a5da_Basecall_1D_template... - prefix_len = 37; - *fixed_len = 37; - *is_fixed = 1; - } else { - // Check Illumina and trim back to lane:tile:x:y. - int colons = 0; - for (i = 0; i < len && data[i] > ' '; i++) - ; - while (i > 0 && colons < 4) - if (data[--i] == ':') - colons++; - - if (colons == 4) { - // Constant illumina prefix - *fixed_len = i+1; - prefix_len = i+1; - *is_fixed = 1; - } else { - // Unknown, don't use a fixed len, but still search - // for any exact matches. - prefix_len = INT_MAX; - *is_fixed = 0; - } - } - //prefix_len = INT_MAX; - - if (!ctx->t_head) { - ctx->t_head = calloc(1, sizeof(*ctx->t_head)); - if (!ctx->t_head) - return -1; - } - - // Find an item in the trie - for (nlines = i = 0; i < len; i++, nlines++) { - t = ctx->t_head; - while (i < len && data[i] > '\n') { - unsigned char c = data[i++]; - if (c & 0x80) - //fprintf(stderr, "8-bit ASCII is unsupported\n"); - return -1; - c &= 127; - - trie_t *x = t->next; - while (x && x->c != c) - x = x->sibling; - t = x; - -// t = t->next[c]; - -// if (!t) -// return -1; - - from = t->n; - if (i == prefix_len) p3 = t->n; - //if (t->count >= .0035*ctx->t_head->count && t->n != n) p3 = t->n; // pacbio - //if (i == 60) p3 = t->n; // pacbio - //if (i == 7) p3 = t->n; // iontorrent - t->n = n; - } - } - - //printf("Looked for %d, found %d, prefix %d\n", n, from, p3); - - *exact = (n != from) && len; - return *exact ? from : p3; -} - - -//----------------------------------------------------------------------------- -// Name encoder - -/* - * Tokenises a read name using ctx as context as the previous - * tokenisation. - * - * Parsed elements are then emitted for encoding by calling the - * encode_token() function with the context, token number (Nth token - * in line), token type and token value. - * - * Returns 0 on success; - * -1 on failure. - */ -static int encode_name(name_context *ctx, char *name, int len, int mode) { - int i, is_fixed, fixed_len; - - int exact; - int cnum = ctx->counter++; - int pnum = search_trie(ctx, name, len, cnum, &exact, &is_fixed, &fixed_len); - if (pnum < 0) pnum = cnum ? cnum-1 : 0; - //pnum = pnum & (MAX_NAMES-1); - //cnum = cnum & (MAX_NAMES-1); - //if (pnum == cnum) {pnum = cnum ? cnum-1 : 0;} -#ifdef ENC_DEBUG - fprintf(stderr, "%d: pnum=%d (%d), exact=%d\n%s\n%s\n", - ctx->counter, pnum, cnum-pnum, exact, ctx->lc[pnum].last_name, name); -#endif - - // Return DUP or DIFF switch, plus the distance. - if (exact && len == strlen(ctx->lc[pnum].last_name)) { - encode_token_dup(ctx, cnum-pnum); - ctx->lc[cnum].last_name = name; - ctx->lc[cnum].last_ntok = ctx->lc[pnum].last_ntok; - int nc = ctx->lc[cnum].last_ntok ? ctx->lc[cnum].last_ntok : MAX_TOKENS; - ctx->lc[cnum].last = malloc(nc * sizeof(*ctx->lc[cnum].last)); - if (!ctx->lc[cnum].last) - return -1; - memcpy(ctx->lc[cnum].last, ctx->lc[pnum].last, - ctx->lc[cnum].last_ntok * sizeof(*ctx->lc[cnum].last)); - return 0; - } - - ctx->lc[cnum].last = malloc(MAX_TOKENS * sizeof(*ctx->lc[cnum].last)); - if (!ctx->lc[cnum].last) - return -1; - encode_token_diff(ctx, cnum-pnum); - - int ntok = 1; - i = 0; - if (is_fixed) { - if (ntok >= ctx->max_tok) { - memset(&ctx->desc[ctx->max_tok << 4], 0, 16*sizeof(ctx->desc[0])); - memset(&ctx->token_dcount[ctx->max_tok], 0, sizeof(int)); - memset(&ctx->token_icount[ctx->max_tok], 0, sizeof(int)); - ctx->max_tok = ntok+1; - } - if (pnum < cnum && ntok < ctx->lc[pnum].last_ntok && ctx->lc[pnum].last[ntok].token_type == N_ALPHA) { - if (ctx->lc[pnum].last[ntok].token_int == fixed_len && memcmp(name, ctx->lc[pnum].last_name, fixed_len) == 0) { - encode_token_match(ctx, ntok); - } else { - encode_token_alpha(ctx, ntok, name, fixed_len); - } - } else { - encode_token_alpha(ctx, ntok, name, fixed_len); - } - ctx->lc[cnum].last[ntok].token_int = fixed_len; - ctx->lc[cnum].last[ntok].token_str = 0; - ctx->lc[cnum].last[ntok++].token_type = N_ALPHA; - i = fixed_len; - } - - for (; i < len; i++) { - if (ntok >= ctx->max_tok) { - if (ctx->max_tok >= MAX_TOKENS) - return -1; - memset(&ctx->desc[ctx->max_tok << 4], 0, 16*sizeof(ctx->desc[0])); - memset(&ctx->token_dcount[ctx->max_tok], 0, sizeof(int)); - memset(&ctx->token_icount[ctx->max_tok], 0, sizeof(int)); - ctx->max_tok = ntok+1; - } - - /* Determine data type of this segment */ - if (isalpha(name[i])) { - int s = i+1; -// int S = i+1; - -// // FIXME: try which of these is best. alnum is good sometimes. -// while (s < len && isalpha(name[s])) - while (s < len && (isalpha(name[s]) || ispunct(name[s]))) -// while (s < len && name[s] != ':') -// while (s < len && !isdigit(name[s]) && name[s] != ':') - s++; - -// if (!is_fixed) { -// while (S < len && isalnum(name[S])) -// S++; -// if (s < S) -// s = S; -// } - - // Single byte strings are better encoded as chars. - if (s-i == 1) goto n_char; - - if (pnum < cnum && ntok < ctx->lc[pnum].last_ntok && ctx->lc[pnum].last[ntok].token_type == N_ALPHA) { - if (s-i == ctx->lc[pnum].last[ntok].token_int && - memcmp(&name[i], - &ctx->lc[pnum].last_name[ctx->lc[pnum].last[ntok].token_str], - s-i) == 0) { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (alpha-mat, %.*s)\n", N_MATCH, s-i, &name[i]); -#endif - if (encode_token_match(ctx, ntok) < 0) return -1; - } else { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (alpha, %.*s / %.*s)\n", N_ALPHA, - s-i, &ctx->lc[pnum].last_name[ctx->lc[pnum].last[ntok].token_str], s-i, &name[i]); -#endif - // same token/length, but mismatches - if (encode_token_alpha(ctx, ntok, &name[i], s-i) < 0) return -1; - } - } else { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (new alpha, %.*s)\n", N_ALPHA, s-i, &name[i]); -#endif - if (encode_token_alpha(ctx, ntok, &name[i], s-i) < 0) return -1; - } - - ctx->lc[cnum].last[ntok].token_int = s-i; - ctx->lc[cnum].last[ntok].token_str = i; - ctx->lc[cnum].last[ntok].token_type = N_ALPHA; - - i = s-1; - } else if (name[i] == '0') digits0: { - // Digits starting with zero; encode length + value - uint32_t s = i; - uint32_t v = 0; - int d = 0; - - while (s < len && isdigit(name[s]) && s-i < 9) { - v = v*10 + name[s] - '0'; - //putchar(name[s]); - s++; - } - - // TODO: optimise choice over whether to switch from DIGITS to DELTA - // regularly vs all DIGITS, also MATCH vs DELTA 0. - if (pnum < cnum && ntok < ctx->lc[pnum].last_ntok && ctx->lc[pnum].last[ntok].token_type == N_DIGITS0) { - d = v - ctx->lc[pnum].last[ntok].token_int; - if (d == 0 && ctx->lc[pnum].last[ntok].token_str == s-i) { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (dig-mat, %d)\n", N_MATCH, v); -#endif - if (encode_token_match(ctx, ntok) < 0) return -1; - //ctx->lc[pnum].last[ntok].token_delta=0; - } else if (mode == 1 && d < 256 && d >= 0 && ctx->lc[pnum].last[ntok].token_str == s-i) { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (dig0-delta, %d / %d)\n", N_DDELTA0, ctx->lc[pnum].last[ntok].token_int, v); -#endif - //if (encode_token_int1_(ctx, ntok, N_DZLEN, s-i) < 0) return -1; - if (encode_token_int1(ctx, ntok, N_DDELTA0, d) < 0) return -1; - //ctx->lc[pnum].last[ntok].token_delta=1; - } else { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (dig0, %d / %d len %d)\n", N_DIGITS0, ctx->lc[pnum].last[ntok].token_int, v, s-i); -#endif - if (encode_token_int1_(ctx, ntok, N_DZLEN, s-i) < 0) return -1; - if (encode_token_int(ctx, ntok, N_DIGITS0, v) < 0) return -1; - //ctx->lc[pnum].last[ntok].token_delta=0; - } - } else { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (new dig0, %d len %d)\n", N_DIGITS0, v, s-i); -#endif - if (encode_token_int1_(ctx, ntok, N_DZLEN, s-i) < 0) return -1; - if (encode_token_int(ctx, ntok, N_DIGITS0, v) < 0) return -1; - //ctx->lc[pnum].last[ntok].token_delta=0; - } - - ctx->lc[cnum].last[ntok].token_str = s-i; // length - ctx->lc[cnum].last[ntok].token_int = v; - ctx->lc[cnum].last[ntok].token_type = N_DIGITS0; - - i = s-1; - } else if (isdigit(name[i])) { - // digits starting 1-9; encode value - uint32_t s = i; - uint32_t v = 0; - int d = 0; - - while (s < len && isdigit(name[s]) && s-i < 9) { - v = v*10 + name[s] - '0'; - //putchar(name[s]); - s++; - } - - // dataset/10/K562_cytosol_LID8465_TopHat_v2.names - // col 4 is Illumina lane - we don't want match & delta in there - // as it has multiple lanes (so not ALL match) and delta is just - // random chance, increasing entropy instead. -// if (ntok == 4 || ntok == 8 || ntok == 10) { -// encode_token_int(ctx, ntok, N_DIGITS, v); -// } else { - - // If the last token was DIGITS0 and we are the same length, then encode - // using that method instead as it seems likely the entire column is fixed - // width, sometimes with leading zeros. - if (pnum < cnum && ntok < ctx->lc[pnum].last_ntok && - ctx->lc[pnum].last[ntok].token_type == N_DIGITS0 && - ctx->lc[pnum].last[ntok].token_str == s-i) - goto digits0; - - // TODO: optimise choice over whether to switch from DIGITS to DELTA - // regularly vs all DIGITS, also MATCH vs DELTA 0. - if (pnum < cnum && ntok < ctx->lc[pnum].last_ntok && ctx->lc[pnum].last[ntok].token_type == N_DIGITS) { - d = v - ctx->lc[pnum].last[ntok].token_int; - if (d == 0) { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (dig-mat, %d)\n", N_MATCH, v); -#endif - if (encode_token_match(ctx, ntok) < 0) return -1; - //ctx->lc[pnum].last[ntok].token_delta=0; - //ctx->token_zcount[ntok]++; - } else if (mode == 1 && d < 256 && d >= 0 - //&& (10+ctx->token_dcount[ntok]) > (ctx->token_icount[ntok]+ctx->token_zcount[ntok]) - && (5+ctx->token_dcount[ntok]) > ctx->token_icount[ntok] - ) { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (dig-delta, %d / %d)\n", N_DDELTA, ctx->lc[pnum].last[ntok].token_int, v); -#endif - if (encode_token_int1(ctx, ntok, N_DDELTA, d) < 0) return -1; - //ctx->lc[pnum].last[ntok].token_delta=1; - ctx->token_dcount[ntok]++; - } else { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (dig, %d / %d)\n", N_DIGITS, ctx->lc[pnum].last[ntok].token_int, v); -#endif - if (encode_token_int(ctx, ntok, N_DIGITS, v) < 0) return -1; - //ctx->lc[pnum].last[ntok].token_delta=0; - ctx->token_icount[ntok]++; - } - } else { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (new dig, %d)\n", N_DIGITS, v); -#endif - if (encode_token_int(ctx, ntok, N_DIGITS, v) < 0) return -1; - //ctx->lc[pnum].last[ntok].token_delta=0; - } -// } - - ctx->lc[cnum].last[ntok].token_int = v; - ctx->lc[cnum].last[ntok].token_type = N_DIGITS; - - i = s-1; - } else { - n_char: - //if (!isalpha(name[i])) putchar(name[i]); - if (pnum < cnum && ntok < ctx->lc[pnum].last_ntok && ctx->lc[pnum].last[ntok].token_type == N_CHAR) { - if (name[i] == ctx->lc[pnum].last[ntok].token_int) { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (chr-mat, %c)\n", N_MATCH, name[i]); -#endif - if (encode_token_match(ctx, ntok) < 0) return -1; - } else { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (chr, %c / %c)\n", N_CHAR, ctx->lc[pnum].last[ntok].token_int, name[i]); -#endif - if (encode_token_char(ctx, ntok, name[i]) < 0) return -1; - } - } else { -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (new chr, %c)\n", N_CHAR, name[i]); -#endif - if (encode_token_char(ctx, ntok, name[i]) < 0) return -1; - } - - ctx->lc[cnum].last[ntok].token_int = name[i]; - ctx->lc[cnum].last[ntok].token_type = N_CHAR; - } - - ntok++; - //putchar(' '); - } - -#ifdef ENC_DEBUG - fprintf(stderr, "Tok %d (end)\n", N_END); -#endif - if (ntok >= ctx->max_tok) { - if (ctx->max_tok >= MAX_TOKENS) - return -1; - memset(&ctx->desc[ctx->max_tok << 4], 0, 16*sizeof(ctx->desc[0])); - memset(&ctx->token_dcount[ctx->max_tok], 0, sizeof(int)); - memset(&ctx->token_icount[ctx->max_tok], 0, sizeof(int)); - ctx->max_tok = ntok+1; - } - if (encode_token_end(ctx, ntok) < 0) return -1; -#ifdef ENC_DEBUG - fprintf(stderr, "ntok=%d max_tok=%d\n", ntok, ctx->max_tok); -#endif - - //printf("Encoded %.*s with %d tokens\n", len, name, ntok); - - ctx->lc[cnum].last_name = name; - ctx->lc[cnum].last_ntok = ntok; - last_context_tok *shrunk = realloc(ctx->lc[cnum].last, - (ntok+1) * sizeof(*ctx->lc[cnum].last)); - if (shrunk) - ctx->lc[cnum].last = shrunk; - - if (!ctx->lc[cnum].last) - return -1; - - return 0; -} - -//----------------------------------------------------------------------------- -// Name decoder - -static int decode_name(name_context *ctx, char *name, int name_len) { - int t0 = decode_token_type(ctx, 0); - uint32_t dist; - int pnum, cnum = ctx->counter++; - - if (cnum >= ctx->max_names) - return -1; - - if (t0 < 0 || t0 >= ctx->max_tok*16) - return 0; - - if (decode_token_int(ctx, 0, t0, &dist) < 0 || dist > cnum) - return -1; - if ((pnum = cnum - dist) < 0) pnum = 0; - - //fprintf(stderr, "t0=%d, dist=%d, pnum=%d, cnum=%d\n", t0, dist, pnum, cnum); - - if (t0 == N_DUP) { - if (pnum == cnum) - return -1; - - if (strlen(ctx->lc[pnum].last_name) +1 >= name_len) return -1; - strcpy(name, ctx->lc[pnum].last_name); - // FIXME: optimise this - ctx->lc[cnum].last_name = name; - ctx->lc[cnum].last_ntok = ctx->lc[pnum].last_ntok; - - int nc = ctx->lc[cnum].last_ntok ? ctx->lc[cnum].last_ntok : MAX_TOKENS; - ctx->lc[cnum].last = malloc(nc * sizeof(*ctx->lc[cnum].last)); - if (!ctx->lc[cnum].last) - return -1; - memcpy(ctx->lc[cnum].last, ctx->lc[pnum].last, - ctx->lc[cnum].last_ntok * sizeof(*ctx->lc[cnum].last)); - - return strlen(name)+1; - } - - *name = 0; - int ntok, len = 0, len2; - ctx->lc[cnum].last = malloc(MAX_TOKENS * sizeof(*ctx->lc[cnum].last)); - if (!ctx->lc[cnum].last) - return -1; - - for (ntok = 1; ntok < MAX_TOKENS && ntok < ctx->max_tok; ntok++) { - uint32_t v, vl; - enum name_type tok; - tok = decode_token_type(ctx, ntok); - //fprintf(stderr, "Tok %d = %d\n", ntok, tok); - - ctx->lc[cnum].last_ntok = 0; - - switch (tok) { - case N_CHAR: - if (len+1 >= name_len) return -1; - if (decode_token_char(ctx, ntok, &name[len]) < 0) return -1; - //fprintf(stderr, "Tok %d CHAR %c\n", ntok, name[len]); - ctx->lc[cnum].last[ntok].token_type = N_CHAR; - ctx->lc[cnum].last[ntok].token_int = name[len++]; - break; - - case N_ALPHA: - if ((len2 = decode_token_alpha(ctx, ntok, &name[len], name_len - len)) < 0) - return -1; - //fprintf(stderr, "Tok %d ALPHA %.*s\n", ntok, len2, &name[len]); - ctx->lc[cnum].last[ntok].token_type = N_ALPHA; - ctx->lc[cnum].last[ntok].token_str = len; - ctx->lc[cnum].last[ntok].token_int = len2; - len += len2; - break; - - case N_DIGITS0: // [0-9]* - if (decode_token_int1(ctx, ntok, N_DZLEN, &vl) < 0) return -1; - if (decode_token_int(ctx, ntok, N_DIGITS0, &v) < 0) return -1; - if (len+20+vl >= name_len) return -1; - len += append_uint32_fixed(&name[len], v, vl); - //fprintf(stderr, "Tok %d DIGITS0 %0*d\n", ntok, vl, v); - ctx->lc[cnum].last[ntok].token_type = N_DIGITS0; - ctx->lc[cnum].last[ntok].token_int = v; - ctx->lc[cnum].last[ntok].token_str = vl; - break; - - case N_DDELTA0: - if (ntok >= ctx->lc[pnum].last_ntok) return -1; - if (decode_token_int1(ctx, ntok, N_DDELTA0, &v) < 0) return -1; - v += ctx->lc[pnum].last[ntok].token_int; - if (len+ctx->lc[pnum].last[ntok].token_str+1 >= name_len) return -1; - len += append_uint32_fixed(&name[len], v, ctx->lc[pnum].last[ntok].token_str); - //fprintf(stderr, "Tok %d DELTA0 %0*d\n", ntok, ctx->lc[pnum].last[ntok].token_str, v); - ctx->lc[cnum].last[ntok].token_type = N_DIGITS0; - ctx->lc[cnum].last[ntok].token_int = v; - ctx->lc[cnum].last[ntok].token_str = ctx->lc[pnum].last[ntok].token_str; - break; - - case N_DIGITS: // [1-9][0-9]* - if (decode_token_int(ctx, ntok, N_DIGITS, &v) < 0) return -1; - if (len+20 >= name_len) return -1; - len += append_uint32_var(&name[len], v); - //fprintf(stderr, "Tok %d DIGITS %d\n", ntok, v); - ctx->lc[cnum].last[ntok].token_type = N_DIGITS; - ctx->lc[cnum].last[ntok].token_int = v; - break; - - case N_DDELTA: - if (ntok >= ctx->lc[pnum].last_ntok) return -1; - if (decode_token_int1(ctx, ntok, N_DDELTA, &v) < 0) return -1; - v += ctx->lc[pnum].last[ntok].token_int; - if (len+20 >= name_len) return -1; - len += append_uint32_var(&name[len], v); - //fprintf(stderr, "Tok %d DELTA %d\n", ntok, v); - ctx->lc[cnum].last[ntok].token_type = N_DIGITS; - ctx->lc[cnum].last[ntok].token_int = v; - break; - - case N_NOP: - ctx->lc[cnum].last[ntok].token_type = N_NOP; - break; - - case N_MATCH: - if (ntok >= ctx->lc[pnum].last_ntok) return -1; - switch (ctx->lc[pnum].last[ntok].token_type) { - case N_CHAR: - if (len+1 >= name_len) return -1; - name[len++] = ctx->lc[pnum].last[ntok].token_int; - //fprintf(stderr, "Tok %d MATCH CHAR %c\n", ntok, ctx->lc[pnum].last[ntok].token_int); - ctx->lc[cnum].last[ntok].token_type = N_CHAR; - ctx->lc[cnum].last[ntok].token_int = ctx->lc[pnum].last[ntok].token_int; - break; - - case N_ALPHA: - if (ctx->lc[pnum].last[ntok].token_int < 0 || - len+ctx->lc[pnum].last[ntok].token_int >= name_len) return -1; - memcpy(&name[len], - &ctx->lc[pnum].last_name[ctx->lc[pnum].last[ntok].token_str], - ctx->lc[pnum].last[ntok].token_int); - //fprintf(stderr, "Tok %d MATCH ALPHA %.*s\n", ntok, ctx->lc[pnum].last[ntok].token_int, &name[len]); - ctx->lc[cnum].last[ntok].token_type = N_ALPHA; - ctx->lc[cnum].last[ntok].token_str = len; - ctx->lc[cnum].last[ntok].token_int = ctx->lc[pnum].last[ntok].token_int; - len += ctx->lc[pnum].last[ntok].token_int; - break; - - case N_DIGITS: - if (len+20 >= name_len) return -1; - len += append_uint32_var(&name[len], ctx->lc[pnum].last[ntok].token_int); - //fprintf(stderr, "Tok %d MATCH DIGITS %d\n", ntok, ctx->lc[pnum].last[ntok].token_int); - ctx->lc[cnum].last[ntok].token_type = N_DIGITS; - ctx->lc[cnum].last[ntok].token_int = ctx->lc[pnum].last[ntok].token_int; - break; - - case N_DIGITS0: - if (len+ctx->lc[pnum].last[ntok].token_str >= name_len) return -1; - len += append_uint32_fixed(&name[len], ctx->lc[pnum].last[ntok].token_int, ctx->lc[pnum].last[ntok].token_str); - //fprintf(stderr, "Tok %d MATCH DIGITS %0*d\n", ntok, ctx->lc[pnum].last[ntok].token_str, ctx->lc[pnum].last[ntok].token_int); - ctx->lc[cnum].last[ntok].token_type = N_DIGITS0; - ctx->lc[cnum].last[ntok].token_int = ctx->lc[pnum].last[ntok].token_int; - ctx->lc[cnum].last[ntok].token_str = ctx->lc[pnum].last[ntok].token_str; - break; - - default: - return -1; - } - break; - - default: // an elided N_END - case N_END: - if (len+1 >= name_len) return -1; - name[len++] = 0; - ctx->lc[cnum].last[ntok].token_type = N_END; - - ctx->lc[cnum].last_name = name; - ctx->lc[cnum].last_ntok = ntok; - - last_context_tok *shrunk - = realloc(ctx->lc[cnum].last, - (ntok+1) * sizeof(*ctx->lc[cnum].last)); - if (shrunk) - ctx->lc[cnum].last = shrunk; - - if (!ctx->lc[cnum].last) - return -1; - - return len; - } - } - - - return -1; -} - -//----------------------------------------------------------------------------- -// arith adaptive codec or static rANS 4x16pr codec -static int arith_encode(uint8_t *in, uint64_t in_len, uint8_t *out, uint64_t *out_len, int method) { - unsigned int olen = *out_len-6, nb; - if (arith_compress_to(in, in_len, out+6, &olen, method) == NULL) - return -1; - - nb = var_put_u32(out, out + *out_len, olen); - memmove(out+nb, out+6, olen); - *out_len = olen+nb; - - return 0; -} - -// Returns number of bytes read from 'in' on success, -// -1 on failure. -static int64_t arith_decode(uint8_t *in, uint64_t in_len, uint8_t *out, uint64_t *out_len) { - unsigned int olen = *out_len; - - uint32_t clen; - int nb = var_get_u32(in, in+in_len, &clen); - //fprintf(stderr, "Arith decode %x\n", in[nb]); - if (arith_uncompress_to(in+nb, in_len-nb, out, &olen) == NULL) - return -1; - //fprintf(stderr, " Stored clen=%d\n", (int)clen); - *out_len = olen; - return clen+nb; -} - -static int rans_encode(uint8_t *in, uint64_t in_len, uint8_t *out, uint64_t *out_len, int method) { - unsigned int olen = *out_len-6, nb; - if (rans_compress_to_4x16(in, in_len, out+6, &olen, method) == NULL) - return -1; - - nb = var_put_u32(out, out + *out_len, olen); - memmove(out+nb, out+6, olen); - *out_len = olen+nb; - - return 0; -} - -// Returns number of bytes read from 'in' on success, -// -1 on failure. -static int64_t rans_decode(uint8_t *in, uint64_t in_len, uint8_t *out, uint64_t *out_len) { - unsigned int olen = *out_len; - - uint32_t clen; - int nb = var_get_u32(in, in+in_len, &clen); - //fprintf(stderr, "Arith decode %x\n", in[nb]); - if (rans_uncompress_to_4x16(in+nb, in_len-nb, out, &olen) == NULL) - return -1; - //fprintf(stderr, " Stored clen=%d\n", (int)clen); - *out_len = olen; - return clen+nb; -} - -static int compress(uint8_t *in, uint64_t in_len, enum name_type type, - int level, int use_arith, - uint8_t *out, uint64_t *out_len) { - uint64_t best_sz = UINT64_MAX; - uint64_t olen = *out_len; - int ret = -1; - - // Map levels 1-9 to 0-4, for parameter lookup in R[] below - level = (level-1)/2; - if (level<0) level=0; - if (level>4) level=4; - - // rANS4x16pr and arith_dynamic parameters to explore. - // We brute force these, so fast levels test 1 setting and slow test more - int R[5][N_ALL][7] = { - { // -1 - /* TYPE */ {1, 128}, - /* ALPHA */ {1, 129}, - /* CHAR */ {1, 0}, - /* DIGITS0 */ {1, 8}, - /* DZLEN */ {1, 0}, - /* DUP */ {1, 8}, - /* DIFF */ {1, 8}, - /* DIGITS */ {1, 8}, - /* DDELTA */ {1, 0}, - /* DDELTA0 */ {1, 128}, - /* MATCH */ {1, 0}, - /* NOP */ {1, 0}, - /* END */ {1, 0} - }, - - { // -3 - /* TYPE */ {2, 192,0}, - /* ALPHA */ {2, 129,1}, - /* CHAR */ {1, 0}, - /* DIGITS0 */ {2, 128+8,0}, // size%4==0 - /* DZLEN */ {1, 0}, - /* DUP */ {1, 192+8}, // size%4==0 - /* DIFF */ {1, 128+8}, // size%4==0 - /* DIGITS */ {1, 192+8}, // size%4==0 - /* DDELTA */ {1, 0}, - /* DDELTA0 */ {1, 128}, - /* MATCH */ {1, 0}, - /* NOP */ {1, 0}, - /* END */ {1, 0} - }, - - { // -5 - /* TYPE */ {2, 192,0}, - /* ALPHA */ {4, 1,128,0,129}, - /* CHAR */ {1, 0}, - /* DIGITS0 */ {2, 200,0}, - /* DZLEN */ {1, 0}, - /* DUP */ {1, 200}, - /* DIFF */ {2, 192,200}, - /* DIGITS */ {2, 132,201}, - /* DDELTA */ {1, 0}, - /* DDELTA0 */ {1, 128}, - /* MATCH */ {1, 0}, - /* NOP */ {1, 0}, - /* END */ {1, 0} - }, - - { // -7 - /* TYPE */ {3, 193,0,1}, - /* ALPHA */ {5, 128, 1,128,0,129}, - /* CHAR */ {2, 1,0}, - /* DIGITS0 */ {2, 200,0}, // or 201,0 - /* DZLEN */ {1, 0}, - /* DUP */ {1, 201}, - /* DIFF */ {2, 192,200}, // or 192,201 - /* DIGITS */ {2, 132, 201}, // +bz2 here and -9 - /* DDELTA */ {1, 0}, - /* DDELTA0 */ {1, 128}, - /* MATCH */ {1, 0}, - /* NOP */ {1, 0}, - /* END */ {1, 0} - }, - - { // -9 - /* TYPE */ {6, 192,0,1, 65, 193,132}, - /* ALPHA */ {4, 132, 1, 0,129}, - /* CHAR */ {3, 1,0,192}, - /* DIGITS0 */ {4, 201,0, 192,64}, - /* DZLEN */ {3, 0,128,1}, - /* DUP */ {1, 201}, - /* DIFF */ {3, 192, 201,65}, - /* DIGITS */ {6, 132, 201,1, 192,129, 193}, - /* DDELTA */ {3, 1,0, 192}, - /* DDELTA0 */ {3, 192,1, 0}, - /* MATCH */ {1, 0}, - /* NOP */ {1, 0}, - /* END */ {1, 0} - }, - }; - // Minor tweak to level 3 DIGITS if arithmetic, to use O(201) instead. - if (use_arith) R[1][N_DIGITS][1]=201; - - int *meth = R[level][type]; - - int last = 0, m; - uint8_t best_static[8192]; - uint8_t *best_dat = best_static; - for (m = 1; m <= meth[0]; m++) { - *out_len = olen; - - if (!use_arith && (meth[m] & 4)) - meth[m] &= ~4; - - if (in_len % 4 != 0 && (meth[m] & 8)) - continue; - - last = 0; - if (use_arith) { - if (arith_encode(in, in_len, out, out_len, meth[m]) <0) - goto err; - } else { - if (rans_encode(in, in_len, out, out_len, meth[m]) < 0) - goto err; - } - - if (best_sz > *out_len) { - best_sz = *out_len; - last = 1; - - if (m+1 > meth[0]) - // no need to memcpy if we're not going to overwrite out - break; - - if (best_sz > 8192 && best_dat == best_static) { - // No need to realloc as best_sz only ever decreases - best_dat = malloc(best_sz); - if (!best_dat) - return -1; - } - memcpy(best_dat, out, best_sz); - } - } - - if (!last) - memcpy(out, best_dat, best_sz); - *out_len = best_sz; - ret = 0; - - err: - if (best_dat != best_static) - free(best_dat); - - return ret; -} - -static uint64_t uncompressed_size(uint8_t *in, uint64_t in_len) { - uint32_t clen, ulen; - - // in[0] in part of buffer written by us - int nb = var_get_u32(in, in+in_len, &clen); - - // in[nb] is part of buffer written to by arith_dynamic. - var_get_u32(in+nb+1, in+in_len, &ulen); - - return ulen; -} - -static int uncompress(int use_arith, uint8_t *in, uint64_t in_len, - uint8_t *out, uint64_t *out_len) { - uint32_t clen; - var_get_u32(in, in+in_len, &clen); - return use_arith - ? arith_decode(in, in_len, out, out_len) - : rans_decode(in, in_len, out, out_len); -} - -//----------------------------------------------------------------------------- - -/* - * Converts a line or \0 separated block of reading names to a compressed buffer. - * The code can only encode whole lines and will not attempt a partial line. - * Use the "last_start_p" return value to identify the partial line start - * offset, for continuation purposes. - * - * Returns a malloced buffer holding compressed data of size *out_len, - * or NULL on failure - */ -uint8_t *tok3_encode_names(char *blk, int len, int level, int use_arith, - int *out_len, int *last_start_p) { - int last_start = 0, i, j, nreads; - - if (len < 0) { - *out_len = 0; - return NULL; - } - - // Count lines - for (nreads = i = 0; i < len; i++) - if (blk[i] <= '\n') // \n or \0 separated entries - nreads++; - - name_context *ctx = create_context(nreads); - if (!ctx) - return NULL; - - // Construct trie - int ctr = 0; - for (i = j = 0; i < len; j=++i) { - while (i < len && blk[i] > '\n') - i++; - if (i >= len) - break; - - //blk[i] = '\0'; - last_start = i+1; - if (build_trie(ctx, &blk[j], i-j, ctr++) < 0) { - free_context(ctx); - return NULL; - } - } - if (last_start_p) - *last_start_p = last_start; - - //fprintf(stderr, "Processed %d of %d in block, line %d\n", last_start, len, ctr); - - // Encode name - for (i = j = 0; i < len; j=++i) { - while (i < len && (signed char)blk[i] >= ' ') // non-ASCII check - i++; - if (i >= len) - break; - - if (blk[i] != '\0' && blk[i] != '\n') { - // Names must be 7-bit ASCII printable - free_context(ctx); - return NULL; - } - - blk[i] = '\0'; - // try both 0 and 1 and pick best? - if (encode_name(ctx, &blk[j], i-j, 1) < 0) { - free_context(ctx); - return NULL; - } - } - -#if 0 - for (i = 0; i < ctx->max_tok*16; i++) { - char fn[1024]; - if (!ctx->desc[i].buf_l) continue; - sprintf(fn, "_tok.%02d_%02d.%d", i>>4,i&15,i); - FILE *fp = fopen(fn, "w"); - fwrite(ctx->desc[i].buf, 1, ctx->desc[i].buf_l, fp); - fclose(fp); - } -#endif - - //dump_trie(t_head, 0); - - // FIXME: merge descriptors - // - // If we see foo7:1 foo7:12 foo7:7 etc then foo: is constant, - // but it's encoded as alpha+dig<7>+char<:> instead of alpha. - // Any time token type 0 is all match beyond the first location we have - // a candidate for merging in string form. - // - // This saves around .1 to 1.3 percent on varying data sets. - // Cruder hack is dedicated prefix/suffix matching to short-cut this. - - - // Drop N_TYPE blocks if they all contain matches bar the first item, - // as we can regenerate these from the subsequent blocks types during - // decode. - for (i = 0; i < ctx->max_tok*16; i+=16) { - if (!ctx->desc[i].buf_l) continue; - - int z; - for (z=1; zdesc[i].buf_l; z++) { - if (ctx->desc[i].buf[z] != N_MATCH) - break; - } - if (z == ctx->desc[i].buf_l) { - int k; - for (k=1; k<16; k++) - if (ctx->desc[i+k].buf_l) - break; - - if (k < 16) { - ctx->desc[i].buf_l = 0; - free(ctx->desc[i].buf); - ctx->desc[i].buf = NULL; - } - } - } - - // Serialise descriptors - uint32_t tot_size = 9; - for (i = 0; i < ctx->max_tok*16; i++) { - if (!ctx->desc[i].buf_l) continue; - - int tnum = i>>4; - int ttype = i&15; - - uint64_t out_len = 1.5 * arith_compress_bound(ctx->desc[i].buf_l, 1); // guesswork - uint8_t *out = malloc(out_len); - if (!out) { - free_context(ctx); - return NULL; - } - - if (compress(ctx->desc[i].buf, ctx->desc[i].buf_l, i&0xf, level, - use_arith, out, &out_len) < 0) { - free_context(ctx); - return NULL; - } - - free(ctx->desc[i].buf); - ctx->desc[i].buf = out; - ctx->desc[i].buf_l = out_len; - ctx->desc[i].tnum = tnum; - ctx->desc[i].ttype = ttype; - - // Find dups - int j; - for (j = 0; j < i; j++) { - if (!ctx->desc[j].buf) - continue; - if (ctx->desc[i].buf_l != ctx->desc[j].buf_l || ctx->desc[i].buf_l <= 4) - continue; - if (memcmp(ctx->desc[i].buf, ctx->desc[j].buf, ctx->desc[i].buf_l) == 0) - break; - } - if (j < i) { - ctx->desc[i].dup_from = j; - tot_size += 3; // flag, dup_from, ttype - } else { - ctx->desc[i].dup_from = -1; - tot_size += out_len + 1; // ttype - } - } - -#if 0 - for (i = 0; i < ctx->max_tok*16; i++) { - char fn[1024]; - if (!ctx->desc[i].buf_l && ctx->desc[i].dup_from == -1) continue; - sprintf(fn, "_tok.%02d_%02d.%d.comp", i>>4,i&15,i); - FILE *fp = fopen(fn, "w"); - fwrite(ctx->desc[i].buf, 1, ctx->desc[i].buf_l, fp); - fclose(fp); - } -#endif - - // Write - uint8_t *out = malloc(tot_size+13); - if (!out) { - free_context(ctx); - return NULL; - } - - uint8_t *cp = out; - - *out_len = tot_size; -// *(uint32_t *)cp = last_start; cp += 4; -// *(uint32_t *)cp = nreads; cp += 4; - *cp++ = (last_start >> 0) & 0xff; - *cp++ = (last_start >> 8) & 0xff; - *cp++ = (last_start >> 16) & 0xff; - *cp++ = (last_start >> 24) & 0xff; - *cp++ = (nreads >> 0) & 0xff; - *cp++ = (nreads >> 8) & 0xff; - *cp++ = (nreads >> 16) & 0xff; - *cp++ = (nreads >> 24) & 0xff; - *cp++ = use_arith; - //write(1, &nreads, 4); - int last_tnum = -1; - for (i = 0; i < ctx->max_tok*16; i++) { - if (!ctx->desc[i].buf_l) continue; - uint8_t ttype8 = ctx->desc[i].ttype; - if (ctx->desc[i].tnum != last_tnum) { - ttype8 |= 128; - last_tnum = ctx->desc[i].tnum; - } - if (ctx->desc[i].dup_from >= 0) { - //fprintf(stderr, "Dup %d from %d, sz %d\n", i, ctx->desc[i].dup_from, ctx->desc[i].buf_l); - *cp++ = ttype8 | 64; - *cp++ = ctx->desc[i].dup_from >> 4; - *cp++ = ctx->desc[i].dup_from & 15; - } else { - *cp++ = ttype8; - memcpy(cp, ctx->desc[i].buf, ctx->desc[i].buf_l); - cp += ctx->desc[i].buf_l; - } - } - - //assert(cp-out == tot_size); - - free_context(ctx); - - return out; -} - -// Deprecated interface; to remove when we next to an ABI breakage -uint8_t *encode_names(char *blk, int len, int level, int use_arith, - int *out_len, int *last_start_p) { - return tok3_encode_names(blk, len, level, use_arith, out_len, - last_start_p); -} - -/* - * Decodes a compressed block of read names into \0 separated names. - * The size of the data returned (malloced) is in *out_len. - * - * Returns NULL on failure. - */ -uint8_t *tok3_decode_names(uint8_t *in, uint32_t sz, uint32_t *out_len) { - if (sz < 9) - return NULL; - - int i, o = 9; - //int ulen = *(uint32_t *)in; - int ulen = (in[0]<<0) | (in[1]<<8) | (in[2]<<16) | - (((uint32_t)in[3])<<24); - - if (ulen < 0 || ulen >= INT_MAX-1024) - return NULL; - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - // Speed up fuzzing by blocking excessive sizes - if (ulen > 100000) - return NULL; -#endif - - //int nreads = *(uint32_t *)(in+4); - int nreads = (in[4]<<0) | (in[5]<<8) | (in[6]<<16) | (((uint32_t)in[7])<<24); - int use_arith = in[8]; - name_context *ctx = create_context(nreads); - if (!ctx) - return NULL; - - // Unpack descriptors - int tnum = -1; - while (o < sz) { - uint8_t ttype = in[o++]; - if (ttype & 64) { - if (o+2 > sz) goto err; - int j = in[o++]<<4; - j += in[o++]; - if (ttype & 128) { - tnum++; - if (tnum >= MAX_TOKENS) - goto err; - ctx->max_tok = tnum+1; - memset(&ctx->desc[tnum<<4], 0, 16*sizeof(ctx->desc[tnum])); - } - - if ((ttype & 15) != 0 && (ttype & 128)) { - if (tnum < 0) goto err; - ctx->desc[tnum<<4].buf = malloc(nreads); - if (!ctx->desc[tnum<<4].buf) - goto err; - - ctx->desc[tnum<<4].buf_l = 0; - ctx->desc[tnum<<4].buf_a = nreads; - ctx->desc[tnum<<4].buf[0] = ttype&15; - memset(&ctx->desc[tnum<<4].buf[1], N_MATCH, nreads-1); - } - - if (tnum < 0) goto err; - i = (tnum<<4) | (ttype&15); - if (j >= i) - goto err; - if (!ctx->desc[j].buf) - goto err; // Attempt to copy a non-existent stream - - ctx->desc[i].buf_l = 0; - ctx->desc[i].buf_a = ctx->desc[j].buf_a; - if (ctx->desc[i].buf) free(ctx->desc[i].buf); - ctx->desc[i].buf = malloc(ctx->desc[i].buf_a); - if (!ctx->desc[i].buf) - goto err; - - memcpy(ctx->desc[i].buf, ctx->desc[j].buf, ctx->desc[i].buf_a); - //fprintf(stderr, "Copy ttype %d, i=%d,j=%d, size %d\n", ttype, i, j, (int)ctx->desc[i].buf_a); - continue; - } - - //if (ttype == 0) - if (ttype & 128) { - tnum++; - if (tnum >= MAX_TOKENS) - goto err; - ctx->max_tok = tnum+1; - memset(&ctx->desc[tnum<<4], 0, 16*sizeof(ctx->desc[tnum])); - } - - if ((ttype & 15) != 0 && (ttype & 128)) { - if (tnum < 0) goto err; - if (ctx->desc[tnum<<4].buf) free(ctx->desc[tnum<<4].buf); - ctx->desc[tnum<<4].buf = malloc(nreads); - if (!ctx->desc[tnum<<4].buf) - goto err; - ctx->desc[tnum<<4].buf_l = 0; - ctx->desc[tnum<<4].buf_a = nreads; - ctx->desc[tnum<<4].buf[0] = ttype&15; - memset(&ctx->desc[tnum<<4].buf[1], N_MATCH, nreads-1); - } - - //fprintf(stderr, "Read %02x\n", c); - - // Load compressed block - int64_t clen, ulen = uncompressed_size(&in[o], sz-o); - if (ulen < 0 || ulen >= INT_MAX) - goto err; - if (tnum < 0) goto err; - i = (tnum<<4) | (ttype&15); - - if (i >= MAX_TBLOCKS || i < 0) - goto err; - - ctx->desc[i].buf_l = 0; - if (ctx->desc[i].buf) free(ctx->desc[i].buf); - ctx->desc[i].buf = malloc(ulen); - if (!ctx->desc[i].buf) - goto err; - - ctx->desc[i].buf_a = ulen; - uint64_t usz = ctx->desc[i].buf_a; // convert from size_t for 32-bit sys - clen = uncompress(use_arith, &in[o], sz-o, ctx->desc[i].buf, &usz); - ctx->desc[i].buf_a = usz; - if (clen < 0 || ctx->desc[i].buf_a != ulen) - goto err; - - // fprintf(stderr, "%d: Decode tnum %d type %d clen %d ulen %d via %d\n", - // o, tnum, ttype, (int)clen, (int)ctx->desc[i].buf_a, ctx->desc[i].buf[0]); - - o += clen; - - // Encode tnum 0 type 0 ulen 100000 clen 12530 via 2 - // Encode tnum 0 type 6 ulen 196800 clen 43928 via 3 - // Encode tnum 0 type 7 ulen 203200 clen 17531 via 3 - // Encode tnum 1 type 0 ulen 50800 clen 10 via 1 - // Encode tnum 1 type 1 ulen 3 clen 5 via 0 - // Encode tnum 2 type 0 ulen 50800 clen 10 via 1 - // - } - - int ret; - ulen += 1024; // for easy coding in decode_name. - uint8_t *out = malloc(ulen); - if (!out) - goto err; - - size_t out_sz = 0; - while ((ret = decode_name(ctx, (char *)out+out_sz, ulen)) > 0) { - out_sz += ret; - ulen -= ret; - } - - if (ret < 0) - free(out); - - free_context(ctx); - - *out_len = out_sz; - return ret == 0 ? out : NULL; - - err: - free_context(ctx); - return NULL; -} - -// Deprecated interface; to remove when we next to an ABI breakage -uint8_t *decode_names(uint8_t *in, uint32_t sz, uint32_t *out_len) { - return tok3_decode_names(in, sz, out_len); -} diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/tokenise_name3.h b/src/htslib-1.19.1/htscodecs/htscodecs/tokenise_name3.h deleted file mode 100644 index ef341df..0000000 --- a/src/htslib-1.19.1/htscodecs/htscodecs/tokenise_name3.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2017, 2019 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _TOKENISE_NAME3_H_ -#define _TOKENISE_NAME3_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Converts a line or \0 separated block of reading names to a compressed buffer. - * The code can only encode whole lines and will not attempt a partial line. - * Use the "last_start_p" return value to identify the partial line start - * offset, for continuation purposes. - * - * Returns a malloced buffer holding compressed data of size *out_len, - * or NULL on failure - */ -uint8_t *tok3_encode_names(char *blk, int len, int level, int use_arith, - int *out_len, int *last_start_p); - -/* - * Decodes a compressed block of read names into \0 separated names. - * The size of the data returned (malloced) is in *out_len. - * - * Returns NULL on failure. - */ -uint8_t *tok3_decode_names(uint8_t *in, uint32_t sz, uint32_t *out_len); - -#ifdef __cplusplus -} -#endif - -#endif /* _TOKENISE_NAME3_H_ */ diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/utils.c b/src/htslib-1.19.1/htscodecs/htscodecs/utils.c deleted file mode 100644 index 399b055..0000000 --- a/src/htslib-1.19.1/htscodecs/htscodecs/utils.c +++ /dev/null @@ -1,234 +0,0 @@ -/* - * Copyright (c) 2022 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include -#include -#include -#include - -#include "utils.h" - -#ifndef NO_THREADS -#include -#endif - -//#define TLS_DEBUG - -#ifndef NO_THREADS -/* - * Thread local storage per thread in the pool. - * - * We have some large memory blocks for rANS which we cannot store on the - * stack due to various system limitations. Allocaitng them can be - * expensive as some OSes use mmap and will pass the pages back to the OS - * on each free. This unfortunately then means zeroing the pages out again - * on each new malloc, plus additional switching into the kernel. - * - * Instead where available, we use pthread_once to allocate a small arena - * of memory buffers and we continually reuse these same buffers. We don't - * need to memset it (calloc equivalent) either as we're sure that any - * leakage of data is simply an earlier set of precomputed frequency - * lookups, and not something more sinister such as an encryption key. - * - * If we don't have pthreads, then we have to fall back to the slow - * traditional calloc instead. - */ - -#define MAX_TLS_BUFS 10 -typedef struct { - void *bufs[MAX_TLS_BUFS]; - size_t sizes[MAX_TLS_BUFS]; - int used[MAX_TLS_BUFS]; -} tls_pool; - -static pthread_once_t rans_once = PTHREAD_ONCE_INIT; -static pthread_key_t rans_key; - -/* - * Frees all local storage for this thread. - * Note: this isn't a function to free a specific allocated item. - */ -static void htscodecs_tls_free_all(void *ptr) { - tls_pool *tls = (tls_pool *)ptr; - if (!tls) - return; - - int i; - for (i = 0; i < MAX_TLS_BUFS; i++) { -#ifdef TLS_DEBUG - if (tls->bufs[i]) - fprintf(stderr, "Free %ld = %p\n", tls->sizes[i], tls->bufs[i]); -#endif - if (tls->used[i]) { - fprintf(stderr, "Closing thread while TLS data is in use\n"); - } - free(tls->bufs[i]); - } - - free(tls); -} - -static void htscodecs_tls_init(void) { - pthread_key_create(&rans_key, htscodecs_tls_free_all); -} - -/* - * Allocates size bytes from the global Thread Local Storage pool. - * This is shared by all subsequent calls within this thread. - * - * An simpler alternative could be possible where we have a fixed number - * of types of alloc, say 5, and specify the correct label when allocating. - * Eg histogram, name_context, fqzcomp, rans. We can have multiple types - * in use in different stack frames (such name_context + hist + rans), but - * the number is very limited. That then paves the way to simply check and - * realloc without needing to keep track of use status or overflowing - * the maximum number permitted. - */ -void *htscodecs_tls_alloc(size_t size) { - int i; - - int err = pthread_once(&rans_once, htscodecs_tls_init); - if (err != 0) { - fprintf(stderr, "Initialising TLS data failed: pthread_once: %s\n", - strerror(err)); - return NULL; - } - - // Initialise tls_pool on first usage - tls_pool *tls = pthread_getspecific(rans_key); - if (!tls) { - if (!(tls = calloc(1, sizeof(*tls)))) - return NULL; - pthread_setspecific(rans_key, tls); - } - - // Query pool for size - int avail = -1; - for (i = 0; i < MAX_TLS_BUFS; i++) { - if (!tls->used[i]) { - if (size <= tls->sizes[i]) { - tls->used[i] = 1; -#ifdef TLS_DEBUG - fprintf(stderr, "Reuse %d: %ld/%ld = %p\n", - i, size, tls->sizes[i], tls->bufs[i]); -#endif - return tls->bufs[i]; - } else if (avail == -1) { - avail = i; - } - } - } - - if (i == MAX_TLS_BUFS && avail == -1) { - // Shouldn't happen given our very limited use of this function - fprintf(stderr, "Error: out of rans_tls_alloc slots\n"); - return NULL; - } - - if (tls->bufs[avail]) - free(tls->bufs[avail]); - if (!(tls->bufs[avail] = calloc(1, size))) - return NULL; -#ifdef TLS_DEBUG - fprintf(stderr, "Alloc %d: %ld = %p\n", avail, size, tls->bufs[avail]); -#endif - tls->sizes[avail] = size; - tls->used[avail] = 1; - - return tls->bufs[avail]; -} - -void *htscodecs_tls_calloc(size_t nmemb, size_t size) { -#ifdef TLS_DEBUG - fprintf(stderr, "htscodecs_tls_calloc(%ld)\n", nmemb*size); -#endif - void *ptr = htscodecs_tls_alloc(nmemb * size); - if (ptr) - memset(ptr, 0, nmemb * size); - return ptr; -} - -void htscodecs_tls_free(void *ptr) { - if (!ptr) - return; - - tls_pool *tls = pthread_getspecific(rans_key); - - int i; - for (i = 0; i < MAX_TLS_BUFS; i++) { - if (tls->bufs[i] == ptr) - break; - } -#ifdef TLS_DEBUG - fprintf(stderr, "Fake free %d size %ld ptr %p\n", - i, tls->sizes[i], tls->bufs[i]); -#endif - if (i == MAX_TLS_BUFS) { - fprintf(stderr, "Attempt to htscodecs_tls_free a buffer not allocated" - " with htscodecs_tls_alloc\n"); - return; - } - if (!tls->used[i]) { - fprintf(stderr, "Attempt to htscodecs_tls_free a buffer twice\n"); - return; - } - tls->used[i] = 0; -} - -#else -/* - * Calloc/free equivalents instead. - * - * We use calloc instead of malloc as a sufficiently malformed set of input - * frequencies may not sum to the expected total frequency size, leaving - * some elements uninitialised. It's unlikely, but potentially a crafty - * attacker could somehow exploit this to pull out parts of this allocated - * buffer and leak them into the decompressed data stream, potentially - * compromising previous buffers such as encryption keys. (Although - * frankly any well-written crypto library should be zeroing such memory - * before freeing it to ensure it's never visible to a subsequent malloc.) - */ -void *htscodecs_tls_alloc(size_t size) { - return calloc(1, size); -} - -void *htscodecs_tls_calloc(size_t nmemb, size_t size) { - return calloc(nmemb, size); -} - -void htscodecs_tls_free(void *ptr) { - free(ptr); -} -#endif diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/utils.h b/src/htslib-1.19.1/htscodecs/htscodecs/utils.h deleted file mode 100644 index ae3a3dd..0000000 --- a/src/htslib-1.19.1/htscodecs/htscodecs/utils.h +++ /dev/null @@ -1,412 +0,0 @@ -/* - * Copyright (c) 2019-2022 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef RANS_UTILS_H -#define RANS_UTILS_H - -#include -#include -#include - -#if defined(__GNUC__) || defined(__clang__) -# if !defined(__clang__) && __GNUC__ >= 100 - // better still on gcc10 for O1 decode of old rans 4x8 - // gcc 10=246/205 11=243/205 12=230/197 -# define likely(x) __builtin_expect_with_probability((x), 1, 0.99) -# else - // gcc 10=193/168 11=195/161 12=199/176 -# define likely(x) __builtin_expect((x), 1) -# endif -# define unlikely(x) __builtin_expect((x), 0) -#else -# define likely(x) (x) -# define unlikely(x) (x) -#endif - -/* - * Allocates size bytes from the global Thread Local Storage pool. - * This is shared by all subsequent calls within this thread. - * - * Note this is NOT a general purpose allocator and usage outside of this - * library is not advised due to assumptions and limitations in the design. - */ -void *htscodecs_tls_alloc(size_t size); -void *htscodecs_tls_calloc(size_t nmemb, size_t size); -void htscodecs_tls_free(void *ptr); - - -/* Fast approximate log base 2 */ -static inline double fast_log(double a) { - union { double d; long long x; } u = { a }; - return (u.x - 4606921278410026770) * 1.539095918623324e-16; /* 1 / 6497320848556798.0; */ -} - -/* - * Data transpose by N. Common to rANS4x16 and arith_dynamic decoders. - * - * Tuned for specific common cases of N. - */ -static inline void unstripe(unsigned char *out, unsigned char *outN, - unsigned int ulen, unsigned int N, - unsigned int idxN[256]) { - int j = 0, k; - - if (ulen >= N) { - switch (N) { - case 4: -#define LLN 16 - if (ulen >= 4*LLN) { - while (j < ulen-4*LLN) { - int l; - for (l = 0; l < LLN; l++) { - for (k = 0; k < 4; k++) - out[j+k+l*4] = outN[idxN[k]+l]; - } - for (k = 0; k < 4; k++) - idxN[k] += LLN; - j += 4*LLN; - } - } - while (j < ulen-4) { - for (k = 0; k < 4; k++) - out[j++] = outN[idxN[k]++]; - } -#undef LLN - break; - - case 2: -#define LLN 4 - if (ulen >= 2*LLN) { - while (j < ulen-2*LLN) { - int l; - for (l = 0; l < LLN; l++) { - for (k = 0; k < 2; k++) - out[j++] = outN[idxN[k]+l]; - } - for (k = 0; k < 2; k++) - idxN[k] += l; - } - } - while (j < ulen-2) { - for (k = 0; k < 2; k++) - out[j++] = outN[idxN[k]++]; - } -#undef LLN - break; - - default: - // General case, around 25% slower overall decode - while (j < ulen-N) { - for (k = 0; k < N; k++) - out[j++] = outN[idxN[k]++]; - } - break; - } - } - for (k = 0; j < ulen; k++) - out[j++] = outN[idxN[k]++]; -} - -#define MAGIC 8 - -/* - * Order 0 histogram construction. 8-way unrolled to avoid cache collisions. - */ -static inline -int hist8(unsigned char *in, unsigned int in_size, uint32_t F0[256]) { - if (in_size > 500000) { - uint32_t *f0 = htscodecs_tls_calloc((65536+37)*3, sizeof(*f0)); - if (f0 == NULL) - return -1; - uint32_t *f1 = f0 + 65536+37; - uint32_t *f2 = f1 + 65536+37; - - uint32_t i, i8 = in_size & ~15; - - for (i = 0; i < i8; i+=16) { - uint16_t i16a[4], i16b[4]; - memcpy(i16a, in+i, 8); - f0[i16a[0]]++; - f1[i16a[1]]++; - f2[i16a[2]]++; - f0[i16a[3]]++; - - memcpy(i16b, in+i+8, 8); - f1[i16b[0]]++; - f0[i16b[1]]++; - f1[i16b[2]]++; - f2[i16b[3]]++; - } - - while (i < in_size) - F0[in[i++]]++; - - for (i = 0; i < 65536; i++) { - F0[i & 0xff] += f0[i] + f1[i] + f2[i]; - F0[i >> 8 ] += f0[i] + f1[i] + f2[i]; - } - htscodecs_tls_free(f0); - } else { - uint32_t F1[256+MAGIC] = {0}, F2[256+MAGIC] = {0}, F3[256+MAGIC] = {0}; - uint32_t i, i8 = in_size & ~7; - - for (i = 0; i < i8; i+=8) { - F0[in[i+0]]++; - F1[in[i+1]]++; - F2[in[i+2]]++; - F3[in[i+3]]++; - F0[in[i+4]]++; - F1[in[i+5]]++; - F2[in[i+6]]++; - F3[in[i+7]]++; - } - - while (i < in_size) - F0[in[i++]]++; - - for (i = 0; i < 256; i++) - F0[i] += F1[i] + F2[i] + F3[i]; - } - - return 0; -} - -// Hist8 with a crude entropy (bits / byte) estimator. -static inline -double hist8e(unsigned char *in, unsigned int in_size, uint32_t F0[256]) { - uint32_t F1[256+MAGIC] = {0}, F2[256+MAGIC] = {0}, F3[256+MAGIC] = {0}; - uint32_t F4[256+MAGIC] = {0}, F5[256+MAGIC] = {0}, F6[256+MAGIC] = {0}; - uint32_t F7[256+MAGIC] = {0}; - -#ifdef __GNUC__ - double e = 0, in_size_r2 = log(1.0/in_size)/log(2); -#else - double e = 0, in_size_r2 = log(1.0/in_size); -#endif - - unsigned int i, i8 = in_size & ~7; - for (i = 0; i < i8; i+=8) { - F0[in[i+0]]++; - F1[in[i+1]]++; - F2[in[i+2]]++; - F3[in[i+3]]++; - F4[in[i+4]]++; - F5[in[i+5]]++; - F6[in[i+6]]++; - F7[in[i+7]]++; - } - while (i < in_size) - F0[in[i++]]++; - - for (i = 0; i < 256; i++) { - F0[i] += F1[i] + F2[i] + F3[i] + F4[i] + F5[i] + F6[i] + F7[i]; -#ifdef __GNUC__ - e -= F0[i] * (32 - __builtin_clz(F0[i]|1) + in_size_r2); -#else - e -= F0[i] * (fast_log(F0[i]) + in_size_r2); -#endif - } - -#ifndef __GNUC__ - e /= log(2); -#endif - return e/in_size; -} - -/* - * A variant of hist8 that simply marks the presence of a symbol rather - * than its frequency. - */ -static inline -void present8(unsigned char *in, unsigned int in_size, - uint32_t F0[256]) { - uint32_t F1[256+MAGIC] = {0}, F2[256+MAGIC] = {0}, F3[256+MAGIC] = {0}; - uint32_t F4[256+MAGIC] = {0}, F5[256+MAGIC] = {0}, F6[256+MAGIC] = {0}; - uint32_t F7[256+MAGIC] = {0}; - - unsigned int i, i8 = in_size & ~7; - for (i = 0; i < i8; i+=8) { - F0[in[i+0]]=1; - F1[in[i+1]]=1; - F2[in[i+2]]=1; - F3[in[i+3]]=1; - F4[in[i+4]]=1; - F5[in[i+5]]=1; - F6[in[i+6]]=1; - F7[in[i+7]]=1; - } - while (i < in_size) - F0[in[i++]]=1; - - for (i = 0; i < 256; i++) - F0[i] += F1[i] + F2[i] + F3[i] + F4[i] + F5[i] + F6[i] + F7[i]; -} - -/* - * Order 1 histogram construction. 4-way unrolled to avoid cache collisions. - */ -#if 1 -static inline -int hist1_4(unsigned char *in, unsigned int in_size, - uint32_t F0[256][256], uint32_t *T0) { - unsigned char l = 0, c; - unsigned char *in_end = in + in_size; - - unsigned char cc[5] = {0}; - if (in_size > 500000) { - uint32_t (*F1)[259] = htscodecs_tls_calloc(256, sizeof(*F1)); - if (F1 == NULL) - return -1; - while (in < in_end-8) { - memcpy(cc, in, 4); in += 4; - F0[cc[4]][cc[0]]++; - F1[cc[0]][cc[1]]++; - F0[cc[1]][cc[2]]++; - F1[cc[2]][cc[3]]++; - cc[4] = cc[3]; - - memcpy(cc, in, 4); in += 4; - F0[cc[4]][cc[0]]++; - F1[cc[0]][cc[1]]++; - F0[cc[1]][cc[2]]++; - F1[cc[2]][cc[3]]++; - cc[4] = cc[3]; - } - l = cc[3]; - - while (in < in_end) { - F0[l][c = *in++]++; - l = c; - } - T0[l]++; - - int i, j; - for (i = 0; i < 256; i++) { - int tt = 0; - for (j = 0; j < 256; j++) { - F0[i][j] += F1[i][j]; - tt += F0[i][j]; - } - T0[i]+=tt; - } - htscodecs_tls_free(F1); - } else { - while (in < in_end-8) { - memcpy(cc, in, 4); in += 4; - F0[cc[4]][cc[0]]++; - F0[cc[0]][cc[1]]++; - F0[cc[1]][cc[2]]++; - F0[cc[2]][cc[3]]++; - cc[4] = cc[3]; - - memcpy(cc, in, 4); in += 4; - F0[cc[4]][cc[0]]++; - F0[cc[0]][cc[1]]++; - F0[cc[1]][cc[2]]++; - F0[cc[2]][cc[3]]++; - cc[4] = cc[3]; - } - l = cc[3]; - - while (in < in_end) { - F0[l][c = *in++]++; - l = c; - } - T0[l]++; - - int i, j; - for (i = 0; i < 256; i++) { - int tt = 0; - for (j = 0; j < 256; j++) - tt += F0[i][j]; - T0[i]+=tt; - } - } - - return 0; -} - -#else -// 16 bit mode, similar to O0 freq. -// This is better on some low entropy data, but generally we prefer to do -// bit-packing and/or RLE to turn it into higher-entropy data first. -// -// Kept here for posterity incase we need it again, as it's quick tricky. -static inline -int hist1_4(unsigned char *in, unsigned int in_size, - uint32_t F0[256][256], uint32_t *T0) { - uint32_t f0[65536+MAGIC] = {0}; - uint32_t f1[65536+MAGIC] = {0}; - - uint32_t i, i8 = (in_size-1) & ~15; - - T0[0]++; f0[in[0]<<8]++; - for (i = 0; i < i8; i+=16) { - uint16_t i16a[16]; - memcpy(i16a, in+i, 16); // faster in 2 as gcc recognises this - memcpy(i16a+8, in+i+1, 16); // faster in 2 as gcc recognises this - - f0[i16a[0]]++; - f1[i16a[1]]++; - f0[i16a[2]]++; - f1[i16a[3]]++; - f0[i16a[4]]++; - f1[i16a[5]]++; - f0[i16a[6]]++; - f1[i16a[7]]++; - f0[i16a[8]]++; - f1[i16a[9]]++; - f0[i16a[10]]++; - f1[i16a[11]]++; - f0[i16a[12]]++; - f1[i16a[13]]++; - f0[i16a[14]]++; - f1[i16a[15]]++; - } - - while (i < in_size-1) { - F0[in[i]][in[i+1]]++; - T0[in[i+1]]++; - i++; - } - - for (i = 0; i < 65536; i++) { - F0[i&0xff][i>>8] += f0[i] + f1[i]; - T0[i>>8] += f0[i] + f1[i]; - } - - return 0; -} -#endif - -#endif /* RANS_UTILS_H */ diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/varint2.h b/src/htslib-1.19.1/htscodecs/htscodecs/varint2.h deleted file mode 100644 index 4d6cc5f..0000000 --- a/src/htslib-1.19.1/htscodecs/htscodecs/varint2.h +++ /dev/null @@ -1,318 +0,0 @@ -//#include - -// FIXME: make get functions const uint8_t * - -/* - * Copyright (c) 2019 Genome Research Ltd. - * Author(s): James Bonfield - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger - * Institute nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH - * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef VARINT2_H -#define VARINT2_H - -#include - -// General API scheme is var_{get,put}_{s,u}{32,64} -// s/u for signed/unsigned; 32/64 for integer size. - -// The ideas here are taken from the vbenc code in TurboPFor -// (https://github.com/powturbo/TurboPFor) with analysis at -// https://github.com/stoklund/varint. - -// Unlike the ITF8 and standard 7-bit at a time encodings, this -// tries to ensure a larger portion of small numbers still fit in 1 byte. -// This trades more space for long integers with less space for short ones, -// which seems like a good tradeoff given the typical distribution curves. -// -// Like ITF8 and LTF8, the first byte also indicates the total number of -// bytes we need to decode, but unlike those it uses the same format for -// both meaning changing data type doesn't change encoding. -// -// Size comparison examples. -// -// Max value -// Bytes ITF8/7bit This -// 1 127 176 -// 2 16,383 16,560 -// 3 2,097,151 540,848 -// 4 268,435,455 16,777,215 -// 5 34,359,738,368 4,294,967,296 -// 6 4,398,046,511,104 1,099,511,627,776 -// ... -// -// The format is as follows: -// 0-176 1 byte: 0 + 8 bit -// 177-16560 (14 bit range) 2 bytes: 177 + 6bit, 0 + 8bit, for x-177 -// 16561-540848 (19 bits) 3 bytes: 241 + 3bit, 0+8, 0+8, for x-16561 -// 540849-16777215 (~24 bit) 4 bytes: 249, 0+8, 0+8, 0+8, for x -// 2^24 - 2^32-1 5 bytes: 250, 0+8 x4 -// 2^32 - 2^40-1 6 bytes: 251, 0+8 x5 -// 2^40 - 2^48-1 7 bytes: 252, 0+8 x6 -// 2^48 - 2^56-1 8 bytes: 253, 0+8 x7 -// 2^56 - 2^64-1 9 bytes: 254, 0+8 x8 -// -// Hence first byte value 255 is not possible and permits future -// escape code. - - -// FIXME: consider returning the value and having nbytes passed in by -// reference instead of vice-versa. -// -// ie uint64_t var_get_u64(uint8_t *cp, int *nbytes) -// vs int var_get_u64(uint8_t *cp, uint64_t *val) -// -// The return value can then be assigned to 32-bit or 64-bit type -// without need of a new function name. The cost is we can't then -// do "cp += var_get_u32(cp, endp, &u_freq_sz);". Maybe we can't do -// overflow detection with former? (Want 32-bit but got, say, 40 bit) - - -// static inline char *var_dump(const uint8_t *cp, int n) { -// static char buf[1000]; -// int i, o = 0; -// for (i = 0; i < n; i++) -// o += sprintf(&buf[o], " %d", cp[i]); -// return buf; -// } - -static inline int var_put_u64(uint8_t *cp, const uint8_t *endp, uint64_t x) { - uint8_t *op = cp; - - if (x < 177) { - if (endp && endp - cp < 1) return 0; - // 0 to 176 in single byte as-is - *cp++ = x; - } else if (x < 16561) { - if (endp && endp - cp < 2) return 0; - *cp++ = ((x-177)>>8)+177; - *cp++ = x-177; - } else if (x < 540849) { - if (endp && endp - cp < 3) return 0; - *cp++ = ((x-16561)>>16)+241; - *cp++ = (x-16561)>>8; - *cp++ = x-16561; - } else if (x < (1<<24)) { - if (endp && endp - cp < 4) return 0; - *cp++ = 249; - *cp++ = x>>16; - *cp++ = x>>8; - *cp++ = x; - } else if (x < (1LL<<32)) { - if (endp && endp - cp < 5) return 0; - *cp++ = 250; - *cp++ = x>>24; - *cp++ = x>>16; - *cp++ = x>>8; - *cp++ = x; - } else if (x < (1LL<<40)) { - if (endp && endp - cp < 6) return 0; - *cp++ = 251; - *cp++ = x>>32; - *cp++ = x>>24; - *cp++ = x>>16; - *cp++ = x>>8; - *cp++ = x; - } else if (x < (1LL<<48)) { - if (endp && endp - cp < 7) return 0; - *cp++ = 252; - *cp++ = x>>40; - *cp++ = x>>32; - *cp++ = x>>24; - *cp++ = x>>16; - *cp++ = x>>8; - *cp++ = x; - } else if (x < (1LL<<56)) { - if (endp && endp - cp < 8) return 0; - *cp++ = 253; - *cp++ = x>>48; - *cp++ = x>>40; - *cp++ = x>>32; - *cp++ = x>>24; - *cp++ = x>>16; - *cp++ = x>>8; - *cp++ = x; - } else { - if (endp && endp - cp < 9) return 0; - *cp++ = 254; - *cp++ = x>>56; - *cp++ = x>>48; - *cp++ = x>>40; - *cp++ = x>>32; - *cp++ = x>>24; - *cp++ = x>>16; - *cp++ = x>>8; - *cp++ = x; - } - -// fprintf(stderr, "Put64 %d (%s)\n", x, var_dump(op, cp-op)); - - return cp-op; -} - -static inline int var_put_u32(uint8_t *cp, const uint8_t *endp, uint32_t x) { - uint8_t *op = cp; - - if (x < 177) { - if (endp && endp - cp < 1) abort();//return 0; - // 0 to 176 in single byte as-is - *cp++ = x; - } else if (x < 16561) { - if (endp && endp - cp < 2) abort();//return 0; - *cp++ = ((x-177)>>8)+177; - *cp++ = x-177; - } else if (x < 540849) { - if (endp && endp - cp < 3) abort();//return 0; - *cp++ = ((x-16561)>>16)+241; - *cp++ = (x-16561)>>8; - *cp++ = x-16561; - } else if (x < (1<<24)) { - if (endp && endp - cp < 4) abort();//return 0; - *cp++ = 249; - *cp++ = x>>16; - *cp++ = x>>8; - *cp++ = x; - } else { - if (endp && endp - cp < 5) abort();//return 0; - *cp++ = 250; - *cp++ = x>>24; - *cp++ = x>>16; - *cp++ = x>>8; - *cp++ = x; - } - -// fprintf(stderr, "Put32 %d (%s)\n", x, var_dump(op, cp-op)); - - return cp-op; -} - -static inline int var_get_u64(uint8_t *cp, const uint8_t *endp, uint64_t *i) { - uint8_t *op = cp; - uint64_t j = 0; - - if (endp && cp >= endp) { - *i = 0; - return 0; - } - if (*cp < 177) { - j = *cp++; - } else if (*cp < 241) { - j = ((cp[0] - 177)<<8) + cp[1] + 177; - cp += 2; - } else if (*cp < 249) { - j = ((cp[0] - 241)<<16) + (cp[1]<<8) + cp[2] + 16561; - cp += 3; - } else { - int n = *cp++ - 249 + 3; - while (n--) - j = (j<<8) + *cp++; - } - -// fprintf(stderr, "Get64 %ld (%s)\n", j, var_dump(op, cp-op)); - - *i = j; - return cp-op; -} - -static inline int var_get_u32(uint8_t *cp, const uint8_t *endp, uint32_t *i) { - uint8_t *op = cp; - uint32_t j = 0; - - if (endp && cp >= endp) { - *i = 0; - return 0; - } - if (*cp < 177) { - j = *cp++; - } else if (*cp < 241) { - j = ((cp[0] - 177)<<8) + cp[1] + 177; - cp += 2; - } else if (*cp < 249) { - j = ((cp[0] - 241)<<16) + (cp[1]<<8) + cp[2] + 16561; - cp += 3; - } else { - int n = *cp++ - 249 + 3; - while (n--) - j = (j<<8) + *cp++; - } - -// fprintf(stderr, "Get32 %d (%s)\n", j, var_dump(op, cp-op)); - - *i = j; - return cp-op; -} - -// Signed versions of the above using zig-zag integer encoding. -// This folds the sign bit into the bottom bit so we iterate -// 0, -1, +1, -2, +2, etc. -static inline int var_put_s32(uint8_t *cp, const uint8_t *endp, int32_t i) { - return var_put_u32(cp, endp, (i << 1) ^ (i >> 31)); -} -static inline int var_put_s64(uint8_t *cp, const uint8_t *endp, int64_t i) { - return var_put_u64(cp, endp, (i << 1) ^ (i >> 63)); -} - -static inline int var_get_s32(uint8_t *cp, const uint8_t *endp, int32_t *i) { - int b = var_get_u32(cp, endp, (uint32_t *)i); - *i = (*i >> 1) ^ -(*i & 1); - return b; -} -static inline int var_get_s64(uint8_t *cp, const uint8_t *endp, int64_t *i) { - int b = var_get_u64(cp, endp, (uint64_t *)i); - *i = (*i >> 1) ^ -(*i & 1); - return b; -} - -static inline int var_size_u64(uint64_t v) { - if (v < 177) - return 1; - else if (v < 16561) - return 2; - else if (v < 540849) - return 3; - - int i = 0; - do { - v >>= 8; - i++; - } while (v); - -// fprintf(stderr, "Size %ld (%d)\n", v, i+1); - - return i+1; -} -#define var_size_u32 var_size_u64 - -static inline int var_size_s64(int64_t v) { - return var_size_u64((v >> 63) ^ (v << 1)); -} -#define var_size_s32 var_size_s64 - -#endif /* VARINT2_H */ diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/version.h b/src/htslib-1.19.1/htscodecs/htscodecs/version.h deleted file mode 100644 index 827d165..0000000 --- a/src/htslib-1.19.1/htscodecs/htscodecs/version.h +++ /dev/null @@ -1 +0,0 @@ -#define HTSCODECS_VERSION_TEXT "1.6.0" diff --git a/src/htslib-1.19.1/htscodecs_bundled.mk b/src/htslib-1.19.1/htscodecs_bundled.mk deleted file mode 100644 index 6274350..0000000 --- a/src/htslib-1.19.1/htscodecs_bundled.mk +++ /dev/null @@ -1,72 +0,0 @@ -# Makefile fragment to add settings needed when bundling htscodecs functions -# -# Copyright (C) 2021-2022 Genome Research Ltd. -# -# Author: Rob Davies -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - - -HTSCODECS_SOURCES = $(HTSPREFIX)htscodecs/htscodecs/arith_dynamic.c \ - $(HTSPREFIX)htscodecs/htscodecs/fqzcomp_qual.c \ - $(HTSPREFIX)htscodecs/htscodecs/htscodecs.c \ - $(HTSPREFIX)htscodecs/htscodecs/pack.c \ - $(HTSPREFIX)htscodecs/htscodecs/rANS_static4x16pr.c \ - $(HTSPREFIX)htscodecs/htscodecs/rANS_static32x16pr_avx2.c \ - $(HTSPREFIX)htscodecs/htscodecs/rANS_static32x16pr_avx512.c \ - $(HTSPREFIX)htscodecs/htscodecs/rANS_static32x16pr_sse4.c \ - $(HTSPREFIX)htscodecs/htscodecs/rANS_static32x16pr_neon.c \ - $(HTSPREFIX)htscodecs/htscodecs/rANS_static32x16pr.c \ - $(HTSPREFIX)htscodecs/htscodecs/rANS_static.c \ - $(HTSPREFIX)htscodecs/htscodecs/rle.c \ - $(HTSPREFIX)htscodecs/htscodecs/tokenise_name3.c \ - $(HTSPREFIX)htscodecs/htscodecs/utils.c - - -HTSCODECS_OBJS = $(HTSCODECS_SOURCES:.c=.o) - -# htscodecs public headers -htscodecs_arith_dynamic_h = htscodecs/htscodecs/arith_dynamic.h -htscodecs_fqzcomp_qual_h = htscodecs/htscodecs/fqzcomp_qual.h -htscodecs_htscodecs_h = htscodecs/htscodecs/htscodecs.h $(htscodecs_version_h) -htscodecs_pack_h = htscodecs/htscodecs/pack.h -htscodecs_rANS_static_h = htscodecs/htscodecs/rANS_static.h -htscodecs_rANS_static4x16_h = htscodecs/htscodecs/rANS_static4x16.h -htscodecs_rle_h = htscodecs/htscodecs/rle.h -htscodecs_tokenise_name3_h = htscodecs/htscodecs/tokenise_name3.h -htscodecs_varint_h = htscodecs/htscodecs/varint.h - -# htscodecs internal headers -htscodecs_htscodecs_endian_h = htscodecs/htscodecs/htscodecs_endian.h -htscodecs_c_range_coder_h = htscodecs/htscodecs/c_range_coder.h -htscodecs_c_simple_model_h = htscodecs/htscodecs/c_simple_model.h $(htscodecs_c_range_coder_h) -htscodecs_permute_h = htscodecs/htscodecs/permute.h -htscodecs_pooled_alloc_h = htscodecs/htscodecs/pooled_alloc.h -htscodecs_rANS_byte_h = htscodecs/htscodecs/rANS_byte.h -htscodecs_rANS_static16_int_h = htscodecs/htscodecs/rANS_static16_int.h $(htscodecs_varint_h) $(htscodecs_utils_h) -htscodecs_rANS_static32x16pr_h = htscodecs/htscodecs/rANS_static32x16pr.h -htscodecs_rANS_word_h = htscodecs/htscodecs/rANS_word.h $(htscodecs_htscodecs_endian_h) -htscodecs_utils_h = htscodecs/htscodecs/utils.h -htscodecs_version_h = htscodecs/htscodecs/version.h - -# Add htscodecs tests into the HTSlib test framework - -HTSCODECS_TEST_TARGETS = test_htscodecs_rans4x8 \ - test_htscodecs_rans4x16 test_htscodecs_arith test_htscodecs_tok3 \ - test_htscodecs_fqzcomp test_htscodecs_varint diff --git a/src/htslib-1.19.1/htscodecs_external.mk b/src/htslib-1.19.1/htscodecs_external.mk deleted file mode 100644 index 3f86811..0000000 --- a/src/htslib-1.19.1/htscodecs_external.mk +++ /dev/null @@ -1,46 +0,0 @@ -# Makefile fragment for use when linking to an external libhtscodecs -# -# Copyright (C) 2021 Genome Research Ltd. -# -# Author: Rob Davies -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -HTSCODECS_SOURCES = -HTSCODECS_OBJS = -HTSCODECS_TEST_TARGETS = - -htscodecs_arith_dynamic_h = -htscodecs_fqzcomp_qual_h = -htscodecs_htscodecs_h = -htscodecs_pack_h = -htscodecs_rANS_static_h = -htscodecs_rANS_static4x16_h = -htscodecs_rle_h = -htscodecs_tokenise_name3_h = -htscodecs_varint_h = - -htscodecs_htscodecs_endian_h = -htscodecs_c_range_coder_h = -htscodecs_c_simple_model_h = -htscodecs_pooled_alloc_h = -htscodecs_rANS_byte_h = -htscodecs_rANS_word_h = -htscodecs_utils_h = -htscodecs_version_h = diff --git a/src/htslib-1.19.1/htsfile.1 b/src/htslib-1.19.1/htsfile.1 deleted file mode 100644 index bb7caf5..0000000 --- a/src/htslib-1.19.1/htsfile.1 +++ /dev/null @@ -1,94 +0,0 @@ -.TH htsfile 1 "22 January 2024" "htslib-1.19.1" "Bioinformatics tools" -.SH NAME -htsfile \- identify high-throughput sequencing data files -.\" -.\" Copyright (C) 2015, 2017-2018 Genome Research Ltd. -.\" -.\" Author: John Marshall -.\" -.\" Permission is hereby granted, free of charge, to any person obtaining a -.\" copy of this software and associated documentation files (the "Software"), -.\" to deal in the Software without restriction, including without limitation -.\" the rights to use, copy, modify, merge, publish, distribute, sublicense, -.\" and/or sell copies of the Software, and to permit persons to whom the -.\" Software is furnished to do so, subject to the following conditions: -.\" -.\" The above copyright notice and this permission notice shall be included in -.\" all copies or substantial portions of the Software. -.\" -.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -.\" IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -.\" FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -.\" THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -.\" LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -.\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -.\" DEALINGS IN THE SOFTWARE. -.\" -.SH SYNOPSIS -.B htsfile -.RB [ -chHv ] -.IR FILE ... -.br -.B htsfile --copy -.RB [ -v ] -.I FILE DESTFILE -.SH DESCRIPTION -The \fBhtsfile\fR utility attempts to identify what kind of high-throughput -sequencing data files the specified files are, and provides minimal viewing -capabilities for some kinds of data file. -.P -It can identify sequencing data files such as SAM, BAM, and CRAM; -variant calling data files such as VCF and BCF; -index files used to index these data files; -and compressed versions of many of them. -.P -For each \fIFILE\fR given, \fBhtsfile\fP prints a description of the file -format determined, using similar keyword conventions to \fBfile\fP(1): -"text" indicates a textual file that can probably be viewed on a terminal; -"data" indicates binary data; -"sequence", "variant calling", and "index" indicate different categories of -data file. -When it can be identified, the name of the particular file format (such as -"BAM" or "VCF") is printed at the start of the description. -.P -When used to view file contents as text, \fBhtsfile\fP can optionally show -only headers or only data records, but has no other filtering capabilities. -Use \fBsamtools\fR or \fBbcftools\fR if you need more extensive viewing or -filtering capabilities. -.P -Alternatively, when \fB--copy\fR is used, \fBhtsfile\fR takes exactly two -arguments and performs a byte-for-byte copy from \fIFILE\fR to \fIDESTFILE\fR. -This is similar to \fBcp\fR(1), but HTSlib's remote file access facilities -are available for both source and destination. -.P -The following options are accepted: -.TP 4n -.BR -c ", " --view -Instead of identifying the specified files, display a textual representation -of their contents on standard output. -.IP -By default, \fB--view\fR refuses to display files in unknown formats. -When \fB--verbose\fR is also given, the raw contents of such files are -displayed, with non-printable characters shown via C-style "\\x" hexadecimal -escape sequences. -.TP -.BR -C ", " --copy -Instead of identifying or displaying the specified files, copy the source -\fIFILE\fR to the destination \fIDESTFILE\fR. -Only \fB--verbose\fR may be used in conjunction with \fB--copy\fR. -.TP -.BR -h ", " --header-only -Display data file headers only. -Implies \fB--view\fR. -.TP -.BR -H ", " --no-header -When viewing files, display data records only. -.TP -.BR -v ", " --verbose -Display additional warnings and diagnostic messages. -Using \fB--verbose\fR repeatedly further raises the verbosity. -.PP -.SH SEE ALSO -.IR bcftools (1), -.IR file (1), -.IR samtools (1) diff --git a/src/htslib-1.19.1/htsfile.c b/src/htslib-1.19.1/htsfile.c deleted file mode 100644 index 9af4ae3..0000000 --- a/src/htslib-1.19.1/htsfile.c +++ /dev/null @@ -1,324 +0,0 @@ -/* htsfile.c -- file identifier and minimal viewer. - - Copyright (C) 2014-2019 Genome Research Ltd. - - Author: John Marshall - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "htslib/hfile.h" -#include "htslib/hts.h" -#include "htslib/sam.h" -#include "htslib/vcf.h" - -#ifndef EFTYPE -#define EFTYPE ENOEXEC -#endif - -enum { identify, view_headers, view_all, copy } mode = identify; -int show_headers = 1; -int verbose = 0; -int status = EXIT_SUCCESS; /* Exit status from main */ - -void error(const char *format, ...) -{ - int err = errno; - va_list args; - va_start(args, format); - fflush(stdout); - fprintf(stderr, "htsfile: "); - vfprintf(stderr, format, args); - if (err) fprintf(stderr, ": %s\n", strerror(err)); - else fprintf(stderr, "\n"); - fflush(stderr); - va_end(args); - status = EXIT_FAILURE; -} - -static void view_sam(samFile *in, const char *filename) -{ - bam1_t *b = NULL; - sam_hdr_t *hdr = NULL; - samFile *out = NULL; - - hdr = sam_hdr_read(in); - if (hdr == NULL) { - errno = 0; error("reading headers from \"%s\" failed", filename); - goto clean; - } - - out = hts_open("-", "w"); - if (out == NULL) { error("reopening standard output failed"); goto clean; } - - if (show_headers) { - if (sam_hdr_write(out, hdr) != 0) { - error("writing headers to standard output failed"); - goto clean; - } - } - - if (mode == view_all) { - int ret; - - b = bam_init1(); - if (b == NULL) { error("can't create record"); goto clean; } - - while ((ret = sam_read1(in, hdr, b)) >= 0) { - if (sam_write1(out, hdr, b) < 0) { - error("writing to standard output failed"); - goto clean; - } - } - - if (ret < -1) { error("reading \"%s\" failed", filename); goto clean; } - } - - clean: - sam_hdr_destroy(hdr); - bam_destroy1(b); - if (out) hts_close(out); -} - -static void view_vcf(vcfFile *in, const char *filename) -{ - bcf1_t *rec = NULL; - bcf_hdr_t *hdr = NULL; - vcfFile *out = NULL; - - hdr = bcf_hdr_read(in); - if (hdr == NULL) { - errno = 0; error("reading headers from \"%s\" failed", filename); - goto clean; - } - - out = hts_open("-", "w"); - if (out == NULL) { error("reopening standard output failed"); goto clean; } - - if (show_headers) { - if (bcf_hdr_write(out, hdr) != 0) { - error("writing headers to standard output failed"); - goto clean; - } - } - - if (mode == view_all) { - int ret; - - rec = bcf_init(); - if (rec == NULL) { error("can't create record"); goto clean; } - - while ((ret = bcf_read(in, hdr, rec)) >= 0) { - if (bcf_write(out, hdr, rec) < 0) { - error("writing to standard output failed"); - goto clean; - } - } - - if (ret < -1) { error("reading \"%s\" failed", filename); goto clean; } - } - - clean: - if (hdr) bcf_hdr_destroy(hdr); - if (rec) bcf_destroy(rec); - if (out) hts_close(out); -} - -static void view_raw(hFILE *fp, const char *filename) -{ - int c, prev; - for (prev = '\n'; (c = hgetc(fp)) != EOF; prev = c) - if (isprint(c) || c == '\n' || c == '\t') putchar(c); - else if (c == '\r') fputs("\\r", stdout); - else if (c == '\0') fputs("\\0", stdout); - else printf("\\x%02x", c); - - if (prev != '\n') putchar('\n'); - - if (herrno(fp)) { - errno = herrno(fp); - error("reading \"%s\" failed", filename); - } -} - -static void copy_raw(const char *srcfilename, const char *destfilename) -{ - hFILE *src = hopen(srcfilename, "r"); - if (src == NULL) { - error("can't open \"%s\"", srcfilename); - return; - } - - size_t bufsize = 1048576; - char *buffer = malloc(bufsize); - if (buffer == NULL) { - error("can't allocate copy buffer"); - hclose_abruptly(src); - return; - } - - hFILE *dest = hopen(destfilename, "w"); - if (dest == NULL) { - error("can't create \"%s\"", destfilename); - hclose_abruptly(src); - free(buffer); - return; - } - - ssize_t n; - while ((n = hread(src, buffer, bufsize)) > 0) - if (hwrite(dest, buffer, n) != n) { - error("writing to \"%s\" failed", destfilename); - hclose_abruptly(dest); - dest = NULL; - break; - } - - if (n < 0) { - error("reading from \"%s\" failed", srcfilename); - hclose_abruptly(src); - src = NULL; - } - - if (dest && hclose(dest) < 0) error("closing \"%s\" failed", destfilename); - if (src && hclose(src) < 0) error("closing \"%s\" failed", srcfilename); - free(buffer); -} - -static void usage(FILE *fp, int status) -{ - fprintf(fp, -"Usage: htsfile [-chHv] FILE...\n" -" htsfile --copy [-v] FILE DESTFILE\n" -"Options:\n" -" -c, --view Write textual form of FILEs to standard output\n" -" -C, --copy Copy the exact contents of FILE to DESTFILE\n" -" -h, --header-only Display only headers in view mode, not records\n" -" -H, --no-header Suppress header display in view mode\n" -" -v, --verbose Increase verbosity of warnings and diagnostics\n"); - exit(status); -} - -int main(int argc, char **argv) -{ - static const struct option options[] = { - { "copy", no_argument, NULL, 'C' }, - { "header-only", no_argument, NULL, 'h' }, - { "no-header", no_argument, NULL, 'H' }, - { "view", no_argument, NULL, 'c' }, - { "verbose", no_argument, NULL, 'v' }, - { "help", no_argument, NULL, 2 }, - { "version", no_argument, NULL, 1 }, - { NULL, 0, NULL, 0 } - }; - - int c, i; - - status = EXIT_SUCCESS; - while ((c = getopt_long(argc, argv, "cChHv", options, NULL)) >= 0) - switch (c) { - case 'c': mode = view_all; break; - case 'C': mode = copy; break; - case 'h': mode = view_headers; show_headers = 1; break; - case 'H': show_headers = 0; break; - case 'v': hts_verbose++; verbose++; break; - case 1: - printf( -"htsfile (htslib) %s\n" -"Copyright (C) 2024 Genome Research Ltd.\n", - hts_version()); - exit(EXIT_SUCCESS); - break; - case 2: usage(stdout, EXIT_SUCCESS); break; - default: usage(stderr, EXIT_FAILURE); break; - } - - if (optind == argc) usage(stderr, EXIT_FAILURE); - - if (mode == copy) { - if (optind + 2 != argc) usage(stderr, EXIT_FAILURE); - copy_raw(argv[optind], argv[optind + 1]); - return status; - } - - for (i = optind; i < argc; i++) { - hFILE *fp = hopen(argv[i], "r"); - if (fp == NULL) { - error("can't open \"%s\"", argv[i]); - continue; - } - - if (mode == identify) { - htsFormat fmt; - if (hts_detect_format2(fp, argv[i], &fmt) < 0) { - error("detecting \"%s\" format failed", argv[i]); - hclose_abruptly(fp); - continue; - } - - char *description = hts_format_description(&fmt); - printf("%s:\t%s\n", argv[i], description); - free(description); - } - else { - htsFile *hts = hts_hopen(fp, argv[i], "r"); - if (hts) { - switch (hts_get_format(hts)->category) { - case sequence_data: - view_sam(hts, argv[i]); - break; - case variant_data: - view_vcf(hts, argv[i]); - break; - default: - if (verbose) - view_raw(fp, argv[i]); - else { - errno = 0; - error("can't view \"%s\": unknown format", argv[i]); - } - break; - } - - if (hts_close(hts) < 0) error("closing \"%s\" failed", argv[i]); - fp = NULL; - } - else if ((errno == EFTYPE || errno == ENOEXEC) && verbose) - view_raw(fp, argv[i]); - else - error("can't view \"%s\"", argv[i]); - } - - if (fp && hclose(fp) < 0) error("closing \"%s\" failed", argv[i]); - } - - if (fclose(stdout) != 0 && errno != EBADF) - error("closing standard output failed"); - - return status; -} diff --git a/src/htslib-1.19.1/htslib-s3-plugin.7 b/src/htslib-1.19.1/htslib-s3-plugin.7 deleted file mode 100644 index ffbcd9c..0000000 --- a/src/htslib-1.19.1/htslib-s3-plugin.7 +++ /dev/null @@ -1,215 +0,0 @@ -.TH htslib-s3-plugin 7 "22 January 2024" "htslib-1.19.1" "Bioinformatics tools" -.SH NAME -htslib-s3-plugin \- htslib AWS S3 plugin -.\" -.\" Copyright (C) 2021-2022 Genome Research Ltd. -.\" -.\" Author: Andrew Whitwham -.\" -.\" Permission is hereby granted, free of charge, to any person obtaining a -.\" copy of this software and associated documentation files (the "Software"), -.\" to deal in the Software without restriction, including without limitation -.\" the rights to use, copy, modify, merge, publish, distribute, sublicense, -.\" and/or sell copies of the Software, and to permit persons to whom the -.\" Software is furnished to do so, subject to the following conditions: -.\" -.\" The above copyright notice and this permission notice shall be included in -.\" all copies or substantial portions of the Software. -.\" -.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -.\" IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -.\" FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -.\" THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -.\" LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -.\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -.\" DEALINGS IN THE SOFTWARE. -.\" -. -.\" For code blocks and examples (cf groff's Ultrix-specific man macros) -.de EX - -. in +\\$1 -. nf -. ft CR -.. -.de EE -. ft -. fi -. in - -.. - -.SH DESCRIPTION -The S3 plugin allows htslib file functions to communicate with servers that use -the AWS S3 protocol. Files are identified by their bucket and object key in a -URL format e.g. - -.B s3://mybucket/path/to/file - -With \fIpath/to/file\fR being the object key. - -Necessary security information can be provided in as part of the URL, in -environment variables or from configuration files. - -The full URL format is: - -.B s3[+SCHEME]://[ID[:SECRET[:TOKEN]]@]BUCKET/PATH - -The elements are: -.TP -.I SCHEME -The protocol used. Defaults to \fIhttps\fR. -.TP -.I ID -The user AWS access key. -.TP -.I SECRET -The secret key for use with the access key. -.TP -.I TOKEN -Token used for temporary security credentials. -.TP -.I BUCKET -AWS S3 bucket. -.TP -.I PATH -Path to the object under the bucket. -.LP - -The environment variables below will be used if the user ID is not set. -.TP -.B AWS_ACCESS_KEY_ID -The user AWS access key. -.TP -.B AWS_SECRET_ACCESS_KEY -The secret key for use with the access key. -.TP -.B AWS_DEFAULT_REGION -The region to use. Defaults to -.IR us-east-1 . -.TP -.B AWS_SESSION_TOKEN -Token used for temporary security credentials. -.TP -.B AWS_DEFAULT_PROFILE -The profile to use in \fIcredentials\fR, \fIconfig\fR or \fIs3cfg\fR files. -Defaults to -.IR default . -.TP -.B AWS_PROFILE -Same as above. -.TP -.B AWS_SHARED_CREDENTIALS_FILE -Location of the credentials file. Defaults to -.IR ~/.aws/credentials . -.TP -.B HTS_S3_S3CFG -Location of the s3cfg file. Defaults to -.IR ~/.s3cfg . -.TP -.B HTS_S3_HOST -Sets the host. Defaults to -.IR s3.amazonaws.com . -.TP -.B HTS_S3_V2 -If set use signature v2 rather the default v4. This will limit the plugin to -reading only. -.TP -.B HTS_S3_PART_SIZE -Sets the upload part size in Mb, the minimum being 5Mb. -By default the part size starts at 5Mb and expands at regular intervals to -accommodate bigger files (up to 2.5 Tbytes with the current rate). -Using this setting disables the automatic part size expansion. -.TP -.B HTS_S3_ADDRESS_STYLE -Sets the URL style. Options are auto (default), virtual or path. -.LP -In the absence of an ID from the previous two methods the credential/config -files will be used. The default file locations are either -\fI~/.aws/credentials\fR or \fI~/.s3cfg\fR (in that order). - -Entries used in aws style credentials file are aws_access_key_id, -aws_secret_access_key, aws_session_token, region, addressing_style and -expiry_time (unofficial, see SHORT-LIVED CREDENTIALS below). -Only the first two are usually needed. - -Entries used in s3cmd style config files are access_key, secret_key, -access_token, host_base, bucket_location and host_bucket. Again only the first -two are usually needed. The host_bucket option is only used to set a path-style -URL, see below. - -.SH SHORT-LIVED CREDENTIALS - -Some cloud identity and access management (IAM) systems can make short-lived -credentials that allow access to resources. -These credentials will expire after a time and need to be renewed to -give continued access. -To enable this, the S3 plugin allows an \fIexpiry_time\fR entry to be set in the -\fI.aws/credentials\fR file. -The value for this entry should be the time when the token expires, -following the format in RFC3339 section 5.6, which takes the form: - - 2012-04-29T05:20:48Z - -That is, year - month - day, the letter "T", hour : minute : second. -The time can be followed by the letter "Z", indicating the UTC timezone, -or an offset from UTC which is a "+" or "-" sign followed by two digits for -the hours offset, ":", and two digits for the minutes. - -The S3 plugin will attempt to re-read the credentials file up to 1 minute -before the given expiry time, which means the file needs to be updated with -new credentials before then. -As the exact way of doing this can vary between services and IAM providers, -the S3 plugin expects this to be done by an external user-supplied process. -This may be achieved by running a program that replaces the file as new -credentials become available. -The following script shows how it might be done for AWS instance credentials: -.EX 2 -#!/bin/sh -instance='http://169.254.169.254' -tok_url="$instance/latest/api/token" -ttl_hdr='X-aws-ec2-metadata-token-ttl-seconds: 10' -creds_url="$instance/latest/meta-data/iam/security-credentials" -key1='aws_access_key_id = \(rs(.AccessKeyId)\(rsn' -key2='aws_secret_access_key = \(rs(.SecretAccessKey)\(rsn' -key3='aws_session_token = \(rs(.Token)\(rsn' -key4='expiry_time = \(rs(.Expiration)\(rsn' -while true; do - token=`curl -X PUT -H "$ttl_hdr" "$tok_url"` - tok_hdr="X-aws-ec2-metadata-token: $token" - role=`curl -H "$tok_hdr" "$creds_url/"` - expires='now' - ( curl -H "$tok_hdr" "$creds_url/$role" \(rs - | jq -r "\(rs"${key1}${key2}${key3}${key4}\(rs"" > credentials.new ) \(rs - && mv -f credentials.new credentials \(rs - && expires=`grep expiry_time credentials | cut -d ' ' -f 3-` - if test $? -ne 0 ; then break ; fi - expiry=`date -d "$expires - 3 minutes" '+%s'` - now=`date '+%s'` - test "$expiry" -gt "$now" && sleep $((($expiry - $now) / 2)) - sleep 30 -done -.EE - -Note that the \fIexpiry_time\fR key is currently only supported for the -\fI.aws/credentials\fR file (or the file referred to in the -.B AWS_SHARED_CREDENTIALS_FILE -environment variable). - -.SH NOTES -In most cases this plugin transforms the given URL into a virtual host-style -format e.g. \fIhttps://bucket.host/path/to/file\fR. A path-style format is used -where the URL is not DNS compliant or the bucket name contains a dot e.g. -\fIhttps://host/bu.cket/path/to/file\fR. - -Path-style can be forced by setting one either HTS_S3_ADDRESS_STYLE, -addressing_style or host_bucket. The first two can be set to \fBpath\fR while -host_bucket must \fBnot\fR include the \fB%(bucket).s\fR string. - -.SH "SEE ALSO" -.IR htsfile (1) -.IR samtools (1) -.PP -RFC 3339: -.PP -htslib website: diff --git a/src/htslib-1.19.1/htslib.map b/src/htslib-1.19.1/htslib.map deleted file mode 100644 index 9542861..0000000 --- a/src/htslib-1.19.1/htslib.map +++ /dev/null @@ -1,638 +0,0 @@ -HTSLIB_1.0 { - bam_aux2A; - bam_aux2Z; - bam_aux2f; - bam_aux2i; - bam_aux_append; - bam_aux_del; - bam_aux_get; - bam_cigar2qlen; - bam_cigar2rlen; - bam_copy1; - bam_destroy1; - bam_dup1; - bam_endpos; - bam_flag2str; - bam_hdr_read; - bam_hdr_write; - bam_init1; - bam_mplp_auto; - bam_mplp_destroy; - bam_mplp_init; - bam_mplp_init_overlaps; - bam_mplp_set_maxcnt; - bam_plp_auto; - bam_plp_destroy; - bam_plp_init; - bam_plp_next; - bam_plp_push; - bam_plp_reset; - bam_plp_set_maxcnt; - bam_read1; - bam_str2flag; - bam_write1; - bcf_add_filter; - bcf_calc_ac; - bcf_clear; - bcf_destroy; - bcf_dup; - bcf_enc_vchar; - bcf_enc_vfloat; - bcf_enc_vint; - bcf_float_missing; - bcf_float_vector_end; - bcf_fmt_array; - bcf_fmt_sized_array; - bcf_get_fmt; - bcf_get_format_string; - bcf_get_format_values; - bcf_get_info; - bcf_get_info_values; - bcf_get_variant_type; - bcf_get_variant_types; - bcf_gt_type; - bcf_has_filter; - bcf_hdr_add_hrec; - bcf_hdr_add_sample; - bcf_hdr_append; - bcf_hdr_combine; - bcf_hdr_destroy; - bcf_hdr_dup; - bcf_hdr_fmt_text; - bcf_hdr_get_hrec; - bcf_hdr_get_version; - bcf_hdr_id2int; - bcf_hdr_init; - bcf_hdr_parse; - bcf_hdr_parse_line; - bcf_hdr_printf; - bcf_hdr_read; - bcf_hdr_remove; - bcf_hdr_seqnames; - bcf_hdr_set; - bcf_hdr_set_samples; - bcf_hdr_set_version; - bcf_hdr_subset; - bcf_hdr_sync; - bcf_hdr_write; - bcf_hrec_add_key; - bcf_hrec_destroy; - bcf_hrec_dup; - bcf_hrec_find_key; - bcf_hrec_format; - bcf_hrec_set_val; - bcf_index_build; - bcf_init; - bcf_is_snp; - bcf_read; - bcf_readrec; - bcf_remove_alleles; - bcf_remove_filter; - bcf_sr_add_reader; - bcf_sr_destroy; - bcf_sr_init; - bcf_sr_next_line; - bcf_sr_regions_destroy; - bcf_sr_regions_flush; - bcf_sr_regions_init; - bcf_sr_regions_next; - bcf_sr_regions_overlap; - bcf_sr_regions_seek; - bcf_sr_remove_reader; - bcf_sr_seek; - bcf_sr_set_regions; - bcf_sr_set_samples; - bcf_sr_set_targets; - bcf_subset; - bcf_subset_format; - bcf_sweep_bwd; - bcf_sweep_destroy; - bcf_sweep_fwd; - bcf_sweep_hdr; - bcf_sweep_init; - bcf_translate; - bcf_trim_alleles; - bcf_type_shift; - bcf_unpack; - bcf_update_alleles; - bcf_update_alleles_str; - bcf_update_filter; - bcf_update_format; - bcf_update_format_string; - bcf_update_id; - bcf_update_info; - bcf_write; - bgzf_check_EOF; - bgzf_close; - bgzf_dopen; - bgzf_flush; - bgzf_flush_try; - bgzf_getc; - bgzf_getline; - bgzf_hopen; - bgzf_index_build_init; - bgzf_index_dump; - bgzf_index_load; - bgzf_is_bgzf; - bgzf_mt; - bgzf_open; - bgzf_raw_read; - bgzf_raw_write; - bgzf_read; - bgzf_read_block; - bgzf_seek; - bgzf_set_cache_size; - bgzf_useek; - bgzf_utell; - bgzf_write; - cram_close; - cram_compress_block; - cram_dopen; - cram_eof; - cram_flush; - cram_free_block; - cram_free_container; - cram_new_block; - cram_new_container; - cram_open; - cram_read_block; - cram_read_container; - cram_seek; - cram_set_header; - cram_set_option; - cram_set_voption; - cram_uncompress_block; - cram_write_block; - cram_write_container; - fai_build; - fai_destroy; - fai_fetch; - fai_load; - faidx_fetch_nseq; - faidx_fetch_seq; - faidx_has_seq; - hclose; - hclose_abruptly; - hdopen; - hfile_destroy; - hfile_init; - hfile_oflags; - hflush; - hgetc2; - hopen; - hpeek; - hputc2; - hputs2; - hread2; - hrec_add_idx; - hseek; - hts_close; - hts_file_type; - hts_get_bgzfp; - hts_getline; - hts_idx_destroy; - hts_idx_finish; - hts_idx_get_meta; - hts_idx_get_n_no_coor; - hts_idx_get_stat; - hts_idx_init; - hts_idx_load; - hts_idx_push; - hts_idx_save; - hts_idx_seqnames; - hts_idx_set_meta; - hts_itr_destroy; - hts_itr_next; - hts_itr_query; - hts_itr_querys; - hts_open; - hts_parse_reg; - hts_readlines; - hts_readlist; - hts_set_fai_filename; - hts_set_threads; - hts_verbose; - hts_version; - hwrite2; - kf_betai; - kf_erfc; - kf_gammap; - kf_gammaq; - kf_lgamma; - kmemmem; - knet_close; - knet_dopen; - knet_open; - knet_read; - knet_seek; - ksplit_core; - ksprintf; - kstrnstr; - kstrstr; - kstrtok; - kt_fisher_exact; - kvsprintf; - sam_format1; - sam_hdr_add_lines; - sam_hdr_dup; - sam_hdr_incr_ref; - sam_hdr_length; - sam_hdr_parse; - sam_hdr_read; - sam_hdr_str; - sam_hdr_write; - sam_index_load; - sam_itr_queryi; - sam_itr_querys; - sam_open_mode; - sam_parse1; - sam_read1; - sam_write1; - seq_nt16_str; - seq_nt16_table; - stringify_argv; - tbx_conf_bed; - tbx_conf_gff; - tbx_conf_psltbl; - tbx_conf_sam; - tbx_conf_vcf; - tbx_destroy; - tbx_index; - tbx_index_build; - tbx_index_load; - tbx_name2id; - tbx_readrec; - tbx_seqnames; - vcf_format; - vcf_hdr_read; - vcf_hdr_write; - vcf_parse; - vcf_read; - vcf_write; - vcf_write_line; -}; - -HTSLIB_1.1 { - bcf_get_fmt_id; - bcf_get_info_id; - faidx_iseq; - faidx_nseq; - faidx_seq_len; -} HTSLIB_1.0; - - -HTSLIB_1.2.1 { - bcf_copy; - bcf_sr_strerror; - hisremote; - hts_detect_format; - hts_format_description; - hts_get_format; - hts_hopen; - hts_set_opt; - regidx_destroy; - regidx_init; - regidx_insert; - regidx_nregs; - regidx_overlap; - regidx_parse_bed; - regidx_parse_tab; - regidx_seq_names; - regidx_seq_nregs; - seq_nt16_int; -} HTSLIB_1.1; - -HTSLIB_1.3 { - bcf_add_id; - bcf_empty; - bcf_hdr_merge; - bcf_index_build2; - bcf_index_load2; - bcf_remove_allele_set; - bgzf_compress; - cram_block_append; - cram_block_get_comp_size; - cram_block_get_content_id; - cram_block_get_content_type; - cram_block_get_crc32; - cram_block_get_data; - cram_block_get_offset; - cram_block_get_uncomp_size; - cram_block_set_comp_size; - cram_block_set_content_id; - cram_block_set_crc32; - cram_block_set_data; - cram_block_set_offset; - cram_block_set_uncomp_size; - cram_block_size; - cram_block_update_size; - cram_container_get_landmarks; - cram_container_get_length; - cram_container_get_num_blocks; - cram_container_is_empty; - cram_container_set_landmarks; - cram_container_set_length; - cram_container_set_num_blocks; - cram_container_size; - cram_copy_slice; - cram_fd_get_fp; - cram_fd_get_header; - cram_fd_get_version; - cram_fd_set_fp; - cram_fd_set_header; - cram_fd_set_version; - cram_major_vers; - cram_minor_vers; - cram_store_container; - cram_transcode_rg; - hfile_add_scheme_handler; - hfile_always_local; - hfile_always_remote; - hts_format_file_extension; - hts_idx_load2; - hts_idx_save_as; - hts_md5_destroy; - hts_md5_final; - hts_md5_hex; - hts_md5_init; - hts_md5_reset; - hts_md5_update; - hts_open_format; - hts_opt_add; - hts_opt_apply; - hts_opt_free; - hts_parse_decimal; - hts_parse_format; - hts_parse_opt_list; - int32_put_blk; - kgetline; - sam_index_build; - sam_index_build2; - sam_index_load2; - sam_open_mode_opts; - tbx_index_build2; - tbx_index_load2; -} HTSLIB_1.2.1; - -HTSLIB_1.4 { - bam_auxB2f; - bam_auxB2i; - bam_auxB_len; - bam_aux_update_str; - bam_mplp_constructor; - bam_mplp_destructor; - bam_mplp_reset; - bam_plp_constructor; - bam_plp_destructor; - bcf_hdr_format; - bcf_index_build3; - bcf_sr_destroy_threads; - bcf_sr_set_opt; - bcf_sr_set_threads; - bgzf_block_write; - bgzf_compression; - bgzf_index_dump_hfile; - bgzf_index_load_hfile; - bgzf_thread_pool; - cram_check_EOF; - cram_get_refs; - errmod_cal; - errmod_destroy; - errmod_init; - fai_build3; - fai_load3; - hgetdelim; - hgets; - hts_check_EOF; - hts_json_fnext; - hts_json_fskip_value; - hts_json_snext; - hts_json_sskip_value; - hts_realloc_or_die; - hts_set_cache_size; - hts_set_thread_pool; - hts_tpool_delete_result; - hts_tpool_destroy; - hts_tpool_dispatch; - hts_tpool_dispatch2; - hts_tpool_init; - hts_tpool_kill; - hts_tpool_next_result; - hts_tpool_next_result_wait; - hts_tpool_process_attach; - hts_tpool_process_destroy; - hts_tpool_process_detach; - hts_tpool_process_empty; - hts_tpool_process_flush; - hts_tpool_process_init; - hts_tpool_process_len; - hts_tpool_process_qsize; - hts_tpool_process_ref_decr; - hts_tpool_process_ref_incr; - hts_tpool_process_reset; - hts_tpool_process_shutdown; - hts_tpool_process_sz; - hts_tpool_result_data; - hts_tpool_size; - hts_tpool_wake_dispatch; - kputd; - probaln_glocal; - sam_cap_mapq; - sam_index_build3; - sam_prob_realn; - tbx_index_build3; -} HTSLIB_1.3; - -HTSLIB_1.5 { - hfile_set_blksize; - hts_get_log_level; - hts_log; - hts_set_log_level; -} HTSLIB_1.4; - -HTSLIB_1.6 { - hts_drand48; - hts_erand48; - hts_lrand48; - hts_srand48; -} HTSLIB_1.5; - -HTSLIB_1.7 { - hfile_mem_get_buffer; - hfile_mem_steal_buffer; - hts_itr_multi_bam; - hts_itr_multi_cram; - hts_itr_multi_next; - hts_itr_regions; - hts_json_alloc_token; - hts_json_free_token; - hts_json_token_str; - hts_json_token_type; - hts_reglist_free; - sam_hdr_change_HD; - sam_itr_regions; -} HTSLIB_1.6; - -HTSLIB_1.9 { - bam_aux_update_array; - bam_aux_update_float; - bam_aux_update_int; - fai_fetchqual; - fai_load3_format; - fai_load_format; - faidx_fetch_qual; -} HTSLIB_1.7; - -HTSLIB_1.10 { - bam_cigar_table; - bam_mplp64_auto; - bam_plp64_auto; - bam_plp64_next; - bam_plp_insertion; - bam_set_qname; - bcf_idx_init; - bcf_idx_save; - bcf_index_load3; - bgzf_peek; - fai_fetch64; - fai_fetchqual64; - fai_parse_region; - fai_set_cache_size; - faidx_fetch_qual64; - faidx_fetch_seq64; - haddextension; - hts_free; - hts_idx_fmt; - hts_idx_load3; - hts_idx_tbi_name; - hts_parse_reg64; - hts_parse_region; - hts_reglist_create; - hts_resize_array_; - hts_tpool_dispatch3; - kgetline2; - regidx_init_string; - regidx_insert_list; - regidx_parse_reg; - regidx_parse_vcf; - regidx_push; - regitr_copy; - regitr_destroy; - regitr_init; - regitr_loop; - regitr_overlap; - regitr_reset; - sam_hdr_add_line; - sam_hdr_add_pg; - sam_hdr_count_lines; - sam_hdr_destroy; - sam_hdr_find_line_id; - sam_hdr_find_line_pos; - sam_hdr_find_tag_id; - sam_hdr_find_tag_pos; - sam_hdr_init; - sam_hdr_line_index; - sam_hdr_line_name; - sam_hdr_name2tid; - sam_hdr_nref; - sam_hdr_pg_id; - sam_hdr_remove_except; - sam_hdr_remove_line_id; - sam_hdr_remove_line_pos; - sam_hdr_remove_lines; - sam_hdr_remove_tag_id; - sam_hdr_tid2len; - sam_hdr_tid2name; - sam_hdr_update_line; - sam_idx_init; - sam_idx_save; - sam_index_load3; - sam_itr_regarray; - sam_parse_region; - tbx_index_load3; -} HTSLIB_1.9; - -HTSLIB_1.11 { - fai_path; - hts_lib_shutdown; - hts_tpool_process_is_shutdown; - vcf_open_mode; -} HTSLIB_1.10; - -HTSLIB_1.12 { - bam_parse_cigar; - bam_set1; - hfile_has_plugin; - hfile_list_plugins; - hfile_list_schemes; - hts_feature_string; - hts_features; - hts_filter_eval; - hts_filter_free; - hts_filter_init; - hts_set_filter_expression; - hts_test_feature; - sam_parse_cigar; - sam_passes_filter; -} HTSLIB_1.11; - -HTSLIB_1.13 { - hts_idx_nseq; -} HTSLIB_1.12; - -HTSLIB_1.14 { - bam_mods_at_next_pos; - bam_mods_at_qpos; - bam_next_basemod; - bam_parse_basemod; - bam_plp_insertion_mod; - hts_base_mod_state_alloc; - hts_base_mod_state_free; - hts_flush; -} HTSLIB_1.13; - -HTSLIB_1.15 { - hts_detect_format2; -} HTSLIB_1.14; - -HTSLIB_1.16 { - bam_mods_query_type; - bam_mods_recorded; - bcf_has_variant_type; - bcf_has_variant_types; - bcf_variant_length; - cram_decode_slice_header; - cram_free_slice_header; - cram_slice_hdr_get_coords; - cram_slice_hdr_get_embed_ref_id; - cram_slice_hdr_get_num_blocks; - hts_filter_eval2; -} HTSLIB_1.15; - -HTSLIB_1.17 { - bam_aux_first; - bam_aux_next; - bam_aux_remove; - bcf_strerror; - cram_block_get_method; - cram_cid2ds_free; - cram_cid2ds_query; - cram_codec_describe; - cram_codec_get_content_ids; - cram_container_get_num_bases; - cram_container_get_num_records; - cram_decode_compression_header; - cram_describe_encodings; - cram_expand_method; - cram_free_compression_header; - cram_update_cid2ds_map; - fai_adjust_region; - fai_line_length; - faidx_seq_len64; -} HTSLIB_1.16; - -HTSLIB_1.18 { - bam_mods_queryi; - bam_parse_basemod2; - fai_thread_pool; -} HTSLIB_1.17; diff --git a/src/htslib-1.19.1/htslib.pc.in b/src/htslib-1.19.1/htslib.pc.in deleted file mode 100644 index d969d6b..0000000 --- a/src/htslib-1.19.1/htslib.pc.in +++ /dev/null @@ -1,15 +0,0 @@ -includedir=@-includedir@ -libdir=@-libdir@ - -# Flags and libraries needed when linking against a static libhts.a -# (used by manual and semi-manual pkg-config(1)-style enquiries). -static_ldflags=@static_LDFLAGS@ -static_libs=@static_LIBS@ - -Name: htslib -Description: C library for high-throughput sequencing data formats -Version: @-PACKAGE_VERSION@ -Cflags: -I${includedir} -Libs: -L${libdir} -lhts -Libs.private: -L${libdir} @private_LIBS@ -lhts -lm -lpthread -Requires.private: zlib @pc_requires@ diff --git a/src/htslib-1.19.1/htslib/bgzf.h b/src/htslib-1.19.1/htslib/bgzf.h deleted file mode 100644 index ea4ec3e..0000000 --- a/src/htslib-1.19.1/htslib/bgzf.h +++ /dev/null @@ -1,467 +0,0 @@ -/// @file htslib/bgzf.h -/// Low-level routines for direct BGZF operations. -/* - Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology - 2011, 2012 Attractive Chaos - Copyright (C) 2009, 2013, 2014, 2017, 2018-2019, 2022-2023 Genome Research Ltd - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. -*/ - -/* The BGZF library was originally written by Bob Handsaker from the Broad - * Institute. It was later improved by the SAMtools developers. */ - -#ifndef HTSLIB_BGZF_H -#define HTSLIB_BGZF_H - -#include -#include - -#include "hts_defs.h" - -// Ensure ssize_t exists within this header. All #includes must precede this, -// and ssize_t must be undefined again at the end of this header. -#if defined _MSC_VER && defined _INTPTR_T_DEFINED && !defined _SSIZE_T_DEFINED && !defined ssize_t -#define HTSLIB_SSIZE_T -#define ssize_t intptr_t -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -#define BGZF_BLOCK_SIZE 0xff00 // make sure compressBound(BGZF_BLOCK_SIZE) < BGZF_MAX_BLOCK_SIZE -#define BGZF_MAX_BLOCK_SIZE 0x10000 - -#define BGZF_ERR_ZLIB 1 -#define BGZF_ERR_HEADER 2 -#define BGZF_ERR_IO 4 -#define BGZF_ERR_MISUSE 8 -#define BGZF_ERR_MT 16 // stream cannot be multi-threaded -#define BGZF_ERR_CRC 32 - -struct hFILE; -struct hts_tpool; -struct kstring_t; -struct bgzf_mtaux_t; -typedef struct bgzidx_t bgzidx_t; -typedef struct bgzf_cache_t bgzf_cache_t; -struct z_stream_s; - -struct BGZF { - // Reserved bits should be written as 0; read as "don't care" - unsigned errcode:16, reserved:1, is_write:1, no_eof_block:1, is_be:1; - signed compress_level:9; - unsigned last_block_eof:1, is_compressed:1, is_gzip:1; - int cache_size; - int block_length, block_clength, block_offset; - int64_t block_address, uncompressed_address; - void *uncompressed_block, *compressed_block; - bgzf_cache_t *cache; - struct hFILE *fp; // actual file handle - struct bgzf_mtaux_t *mt; // only used for multi-threading - bgzidx_t *idx; // BGZF index - int idx_build_otf; // build index on the fly, set by bgzf_index_build_init() - struct z_stream_s *gz_stream; // for gzip-compressed files - int64_t seeked; // virtual offset of last seek -}; -#ifndef HTS_BGZF_TYPEDEF -typedef struct BGZF BGZF; -#define HTS_BGZF_TYPEDEF -#endif - - /****************** - * Basic routines * - ******************/ - - /** - * Open an existing file descriptor for reading or writing. - * - * @param fd file descriptor - * Note that the file must be opened in binary mode, or else - * there will be problems on platforms that make a difference - * between text and binary mode. - * @param mode mode matching /[rwag][u0-9]+/: 'r' for reading, 'w' for - * writing, 'a' for appending, 'g' for gzip rather than BGZF - * compression (with 'w' only), and digit specifies the zlib - * compression level. - * Note that there is a distinction between 'u' and '0': the - * first yields plain uncompressed output whereas the latter - * outputs uncompressed data wrapped in the zlib format. - * @return BGZF file handler; 0 on error - */ - HTSLIB_EXPORT - BGZF* bgzf_dopen(int fd, const char *mode); - - #define bgzf_fdopen(fd, mode) bgzf_dopen((fd), (mode)) // for backward compatibility - - /** - * Open the specified file for reading or writing. - */ - HTSLIB_EXPORT - BGZF* bgzf_open(const char* path, const char *mode); - - /** - * Open an existing hFILE stream for reading or writing. - */ - HTSLIB_EXPORT - BGZF* bgzf_hopen(struct hFILE *fp, const char *mode); - - /** - * Close the BGZF and free all associated resources. - * - * @param fp BGZF file handler - * @return 0 on success and -1 on error - */ - HTSLIB_EXPORT - int bgzf_close(BGZF *fp); - - /** - * Read up to _length_ bytes from the file storing into _data_. - * - * @param fp BGZF file handler - * @param data data array to read into - * @param length size of data to read - * @return number of bytes actually read; 0 on end-of-file and -1 on error - */ - HTSLIB_EXPORT - ssize_t bgzf_read(BGZF *fp, void *data, size_t length) HTS_RESULT_USED; - - /** - * Write _length_ bytes from _data_ to the file. If no I/O errors occur, - * the complete _length_ bytes will be written (or queued for writing). - * - * @param fp BGZF file handler - * @param data data array to write - * @param length size of data to write - * @return number of bytes written (i.e., _length_); negative on error - */ - HTSLIB_EXPORT - ssize_t bgzf_write(BGZF *fp, const void *data, size_t length) HTS_RESULT_USED; - - /** - * Write _length_ bytes from _data_ to the file, the index will be used to - * decide the amount of uncompressed data to be written to each bgzip block. - * If no I/O errors occur, the complete _length_ bytes will be written (or - * queued for writing). - * @param fp BGZF file handler - * @param data data array to write - * @param length size of data to write - * @return number of bytes written (i.e., _length_); negative on error - */ - HTSLIB_EXPORT - ssize_t bgzf_block_write(BGZF *fp, const void *data, size_t length); - - /** - * Returns the next byte in the file without consuming it. - * @param fp BGZF file handler - * @return -1 on EOF, - * -2 on error, - * otherwise the unsigned byte value. - */ - HTSLIB_EXPORT - int bgzf_peek(BGZF *fp); - - /** - * Read up to _length_ bytes directly from the underlying stream without - * decompressing. Bypasses BGZF blocking, so must be used with care in - * specialised circumstances only. - * - * @param fp BGZF file handler - * @param data data array to read into - * @param length number of raw bytes to read - * @return number of bytes actually read; 0 on end-of-file and -1 on error - */ - HTSLIB_EXPORT - ssize_t bgzf_raw_read(BGZF *fp, void *data, size_t length) HTS_RESULT_USED; - - /** - * Write _length_ bytes directly to the underlying stream without - * compressing. Bypasses BGZF blocking, so must be used with care - * in specialised circumstances only. - * - * @param fp BGZF file handler - * @param data data array to write - * @param length number of raw bytes to write - * @return number of bytes actually written; -1 on error - */ - HTSLIB_EXPORT - ssize_t bgzf_raw_write(BGZF *fp, const void *data, size_t length) HTS_RESULT_USED; - - /** - * Write the data in the buffer to the file. - * - * @param fp BGZF file handle - * @return 0 on success and -1 on error - */ - HTSLIB_EXPORT - int bgzf_flush(BGZF *fp) HTS_RESULT_USED; - - /** - * Return a virtual file pointer to the current location in the file. - * No interpretation of the value should be made, other than a subsequent - * call to bgzf_seek can be used to position the file at the same point. - * Return value is non-negative on success. - */ - #define bgzf_tell(fp) (((fp)->block_address << 16) | ((fp)->block_offset & 0xFFFF)) - - /** - * Set the file to read from the location specified by _pos_. - * - * @param fp BGZF file handler - * @param pos virtual file offset returned by bgzf_tell() - * @param whence must be SEEK_SET - * @return 0 on success and -1 on error - * - * @note It is not permitted to seek on files open for writing, - * or files compressed with gzip (as opposed to bgzip). - */ - HTSLIB_EXPORT - int64_t bgzf_seek(BGZF *fp, int64_t pos, int whence) HTS_RESULT_USED; - - /** - * Check if the BGZF end-of-file (EOF) marker is present - * - * @param fp BGZF file handler opened for reading - * @return 1 if the EOF marker is present and correct; - * 2 if it can't be checked, e.g., because fp isn't seekable; - * 0 if the EOF marker is absent; - * -1 (with errno set) on error - */ - HTSLIB_EXPORT - int bgzf_check_EOF(BGZF *fp); - - /** Return the file's compression format - * - * @param fp BGZF file handle - * @return A small integer matching the corresponding - * `enum htsCompression` value: - * - 0 / `no_compression` if the file is uncompressed - * - 1 / `gzip` if the file is plain GZIP-compressed - * - 2 / `bgzf` if the file is BGZF-compressed - * @since 1.4 - */ - HTSLIB_EXPORT - int bgzf_compression(BGZF *fp); - - /** - * Check if a file is in the BGZF format - * - * @param fn file name - * @return 1 if _fn_ is BGZF; 0 if not or on I/O error - */ - HTSLIB_EXPORT - int bgzf_is_bgzf(const char *fn) HTS_DEPRECATED("Use bgzf_compression() or hts_detect_format() instead"); - - /********************* - * Advanced routines * - *********************/ - - /** - * Set the cache size. Only effective when compiled with -DBGZF_CACHE. - * - * @param fp BGZF file handler - * @param size size of cache in bytes; 0 to disable caching (default) - */ - HTSLIB_EXPORT - void bgzf_set_cache_size(BGZF *fp, int size); - - /** - * Flush the file if the remaining buffer size is smaller than _size_ - * @return 0 if flushing succeeded or was not needed; negative on error - */ - HTSLIB_EXPORT - int bgzf_flush_try(BGZF *fp, ssize_t size) HTS_RESULT_USED; - - /** - * Read one byte from a BGZF file. It is faster than bgzf_read() - * @param fp BGZF file handler - * @return byte read; -1 on end-of-file or error - */ - HTSLIB_EXPORT - int bgzf_getc(BGZF *fp); - - /** - * Read one line from a BGZF file. It is faster than bgzf_getc() - * - * @param fp BGZF file handler - * @param delim delimiter - * @param str string to write to; must be initialized - * @return length of the string (capped at INT_MAX); - * -1 on end-of-file; <= -2 on error - */ - HTSLIB_EXPORT - int bgzf_getline(BGZF *fp, int delim, struct kstring_t *str); - - /** - * Read the next BGZF block. - */ - HTSLIB_EXPORT - int bgzf_read_block(BGZF *fp) HTS_RESULT_USED; - - /** - * Enable multi-threading via a shared thread pool. This means - * both encoder and decoder can balance usage across a single pool - * of worker jobs. - * - * @param fp BGZF file handler - * @param pool The thread pool (see hts_create_threads) - * @param qsize The size of the job queue. If 0 this is twice the - * number of threads in the pool. - */ - HTSLIB_EXPORT - int bgzf_thread_pool(BGZF *fp, struct hts_tpool *pool, int qsize); - - /** - * Enable multi-threading - * - * @param fp BGZF file handler - * @param n_threads #threads used for reading / writing - * @param n_sub_blks Unused (was #blocks processed by each thread) - */ - HTSLIB_EXPORT - int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks); - - /** - * Compress a single BGZF block. - * - * @param dst output buffer (must have size >= BGZF_MAX_BLOCK_SIZE) - * @param dlen size of output buffer; updated on return to the number - * of bytes actually written to dst - * @param src buffer to be compressed - * @param slen size of data to compress (must be <= BGZF_BLOCK_SIZE) - * @param level compression level - * @return 0 on success and negative on error - */ - HTSLIB_EXPORT - int bgzf_compress(void *dst, size_t *dlen, const void *src, size_t slen, int level); - - /******************* - * bgzidx routines * - *******************/ - - /** - * Position BGZF at the uncompressed offset - * - * @param fp BGZF file handler; must be opened for reading - * @param uoffset file offset in the uncompressed data - * @param where must be SEEK_SET - * - * Returns 0 on success and -1 on error. - * - * @note It is not permitted to seek on files open for writing, - * or files compressed with gzip (as opposed to bgzip). - */ - HTSLIB_EXPORT - int bgzf_useek(BGZF *fp, off_t uoffset, int where) HTS_RESULT_USED; - - /** - * Position in uncompressed BGZF - * - * @param fp BGZF file handler; must be opened for reading - * - * Returns the current offset on success and -1 on error. - */ - HTSLIB_EXPORT - off_t bgzf_utell(BGZF *fp); - - /** - * Tell BGZF to build index while compressing. - * - * @param fp BGZF file handler; can be opened for reading or writing. - * - * Returns 0 on success and -1 on error. - * - * @note This function must be called before any data has been read or - * written, and in particular before calling bgzf_mt() on the same - * file handle (as threads may start reading data before the index - * has been set up). - */ - HTSLIB_EXPORT - int bgzf_index_build_init(BGZF *fp); - - /// Load BGZF index - /** - * @param fp BGZF file handler - * @param bname base name - * @param suffix suffix to add to bname (can be NULL) - * @return 0 on success and -1 on error. - */ - HTSLIB_EXPORT - int bgzf_index_load(BGZF *fp, - const char *bname, const char *suffix) HTS_RESULT_USED; - - /// Load BGZF index from an hFILE - /** - * @param fp BGZF file handle - * @param idx hFILE to read from - * @param name file name (for error reporting only; can be NULL) - * @return 0 on success and -1 on error. - * - * Populates @p fp with index data read from the hFILE handle @p idx. - * The file pointer to @idx should point to the start of the index - * data when this function is called. - * - * The file name can optionally be passed in the @p name parameter. This - * is only used for printing error messages; if NULL the word "index" is - * used instead. - */ - HTSLIB_EXPORT - int bgzf_index_load_hfile(BGZF *fp, struct hFILE *idx, - const char *name) HTS_RESULT_USED; - - /// Save BGZF index - /** - * @param fp BGZF file handler - * @param bname base name - * @param suffix suffix to add to bname (can be NULL) - * @return 0 on success and -1 on error. - */ - HTSLIB_EXPORT - int bgzf_index_dump(BGZF *fp, - const char *bname, const char *suffix) HTS_RESULT_USED; - - /// Write a BGZF index to an hFILE - /** - * @param fp BGZF file handle - * @param idx hFILE to write to - * @param name file name (for error reporting only, can be NULL) - * @return 0 on success and -1 on error. - * - * Write index data from @p fp to the file @p idx. - * - * The file name can optionally be passed in the @p name parameter. This - * is only used for printing error messages; if NULL the word "index" is - * used instead. - */ - - HTSLIB_EXPORT - int bgzf_index_dump_hfile(BGZF *fp, struct hFILE *idx, - const char *name) HTS_RESULT_USED; - -#ifdef __cplusplus -} -#endif - -#ifdef HTSLIB_SSIZE_T -#undef HTSLIB_SSIZE_T -#undef ssize_t -#endif - -#endif diff --git a/src/htslib-1.19.1/htslib/cram.h b/src/htslib-1.19.1/htslib/cram.h deleted file mode 100644 index e0b5183..0000000 --- a/src/htslib-1.19.1/htslib/cram.h +++ /dev/null @@ -1,753 +0,0 @@ -/// @file htslib/cram.h -/// CRAM format-specific API functions. -/* - Copyright (C) 2015, 2016, 2018-2020, 2022-2023 Genome Research Ltd. - - Author: James Bonfield - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -/** @file - * Consider using the higher level hts_*() API for programs that wish to - * be file format agnostic (see htslib/hts.h). - * - * This API should be used for CRAM specific code. The specifics of the - * public API are implemented in cram_io.h, cram_encode.h and cram_decode.h - * although these should not be included directly (use this file instead). - */ - -#ifndef HTSLIB_CRAM_H -#define HTSLIB_CRAM_H - -#include -#include -#include - -#include "hts_defs.h" -#include "hts.h" -#include "sam.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// see cram/cram_structs.h for an internal more complete copy of this enum - -// Htslib 1.11 had these listed without any hts prefix, and included -// some internal values such as RANS1 and GZIP_RLE (which shouldn't have ever -// been public). -// -// We can't find evidence of these being used and the data type occurs -// nowhere in functions or structures meaning using it would be pointless. -// However for safety, if you absolute need the API to not change then -// define HTS_COMPAT to 101100 (XYYYZZ for X.Y[.Z], meaning 1.11). -#if defined(HTS_COMPAT) && HTS_COMPAT <= 101100 -enum cram_block_method { - // Public methods as defined in the CRAM spec. - BM_ERROR = -1, - - // CRAM 2.x and 3.0 - RAW = 0, - GZIP = 1, - BZIP2 = 2, - LZMA = 3, - RANS = 4, - - // NB: the subsequent numbers may change. They're simply here for - // compatibility with the old API, but may have no bearing on the - // internal way htslib works. DO NOT USE - RANS0 = 4, - RANS1 = 10, - GZIP_RLE = 11, -}; -#else - -// Values as defined in the CRAM specifications. -// See cram/cram_structs.h cram_block_method_int for an expanded version of -// this with local specialisations assigned to codes. -enum cram_block_method { - CRAM_COMP_UNKNOWN = -1, - - // CRAM 2.x and 3.0 - CRAM_COMP_RAW = 0, - CRAM_COMP_GZIP = 1, - CRAM_COMP_BZIP2 = 2, - - // CRAM 3.0 - CRAM_COMP_LZMA = 3, - CRAM_COMP_RANS4x8 = 4, // 4-way interleaving, 8-bit renormalisation - - // CRAM 3.1 - CRAM_COMP_RANSNx16 = 5, // both 4x16 and 32x16 variants, plus transforms - CRAM_COMP_ARITH = 6, // aka Range coding - CRAM_COMP_FQZ = 7, // FQZComp - CRAM_COMP_TOK3 = 8, // Name tokeniser -}; -#endif - -/* NOTE this structure may be expanded in future releases by appending - * additional fields. - * - * Do not assume the size is fixed and avoid using arrays of this struct. - */ -typedef struct { - enum cram_block_method method; - - // Generic compression level if known (0 if not). - // 1 or 9 for gzip min/max flag (else 5). 1-9 for bzip2 - // 1 or 11 for for tok3 (rans/arith encoder). - int level; - - // For rans* and arith codecs - int order; - - // ransNx16/arith specific - int rle; - int pack; - int stripe; - int cat; - int nosz; - int Nway; - - // Arithmetic coder only - int ext; // external: use gz, xz or bzip2 -} cram_method_details; - -enum cram_content_type { - CT_ERROR = -1, - FILE_HEADER = 0, - COMPRESSION_HEADER = 1, - MAPPED_SLICE = 2, - UNMAPPED_SLICE = 3, // CRAM V1.0 only - EXTERNAL = 4, - CORE = 5, -}; - -// Opaque data types, see cram_structs for the fully fledged versions. -typedef struct cram_file_def cram_file_def; -typedef struct cram_fd cram_fd; -typedef struct cram_container cram_container; -typedef struct cram_block cram_block; -typedef struct cram_slice cram_slice; -typedef struct cram_metrics cram_metrics; -typedef struct cram_block_slice_hdr cram_block_slice_hdr; -typedef struct cram_block_compression_hdr cram_block_compression_hdr; -typedef struct cram_codec cram_codec; -typedef struct refs_t refs_t; - -struct hFILE; - -// Accessor functions - -/* - *----------------------------------------------------------------------------- - * cram_fd - */ -HTSLIB_EXPORT -sam_hdr_t *cram_fd_get_header(cram_fd *fd); - -HTSLIB_EXPORT -void cram_fd_set_header(cram_fd *fd, sam_hdr_t *hdr); - -HTSLIB_EXPORT -int cram_fd_get_version(cram_fd *fd); - -HTSLIB_EXPORT -void cram_fd_set_version(cram_fd *fd, int vers); - -HTSLIB_EXPORT -int cram_major_vers(cram_fd *fd); -HTSLIB_EXPORT -int cram_minor_vers(cram_fd *fd); - -HTSLIB_EXPORT -struct hFILE *cram_fd_get_fp(cram_fd *fd); -HTSLIB_EXPORT -void cram_fd_set_fp(cram_fd *fd, struct hFILE *fp); - - -/* - *----------------------------------------------------------------------------- - * cram_container - */ -HTSLIB_EXPORT -int32_t cram_container_get_length(cram_container *c); -HTSLIB_EXPORT -void cram_container_set_length(cram_container *c, int32_t length); -HTSLIB_EXPORT -int32_t cram_container_get_num_blocks(cram_container *c); -HTSLIB_EXPORT -void cram_container_set_num_blocks(cram_container *c, int32_t num_blocks); -HTSLIB_EXPORT -int32_t *cram_container_get_landmarks(cram_container *c, int32_t *num_landmarks); -HTSLIB_EXPORT -void cram_container_set_landmarks(cram_container *c, int32_t num_landmarks, - int32_t *landmarks); -HTSLIB_EXPORT -int32_t cram_container_get_num_records(cram_container *c); -HTSLIB_EXPORT -int64_t cram_container_get_num_bases(cram_container *c); - -/* Returns true if the container is empty (EOF marker) */ -HTSLIB_EXPORT -int cram_container_is_empty(cram_fd *fd); - - -/* - *----------------------------------------------------------------------------- - * cram_block - */ -HTSLIB_EXPORT -int32_t cram_block_get_content_id(cram_block *b); -HTSLIB_EXPORT -int32_t cram_block_get_comp_size(cram_block *b); -HTSLIB_EXPORT -int32_t cram_block_get_uncomp_size(cram_block *b); -HTSLIB_EXPORT -int32_t cram_block_get_crc32(cram_block *b); -HTSLIB_EXPORT -void * cram_block_get_data(cram_block *b); -HTSLIB_EXPORT -enum cram_content_type cram_block_get_content_type(cram_block *b); -HTSLIB_EXPORT -enum cram_block_method cram_block_get_method(cram_block *b); - -HTSLIB_EXPORT -cram_method_details *cram_expand_method(uint8_t *data, int32_t size, - enum cram_block_method comp); - -HTSLIB_EXPORT -void cram_block_set_content_id(cram_block *b, int32_t id); -HTSLIB_EXPORT -void cram_block_set_comp_size(cram_block *b, int32_t size); -HTSLIB_EXPORT -void cram_block_set_uncomp_size(cram_block *b, int32_t size); -HTSLIB_EXPORT -void cram_block_set_crc32(cram_block *b, int32_t crc); -HTSLIB_EXPORT -void cram_block_set_data(cram_block *b, void *data); - -HTSLIB_EXPORT -int cram_block_append(cram_block *b, const void *data, int size); -HTSLIB_EXPORT -void cram_block_update_size(cram_block *b); - -// Offset is known as "size" internally, but it can be confusing. -HTSLIB_EXPORT -size_t cram_block_get_offset(cram_block *b); -HTSLIB_EXPORT -void cram_block_set_offset(cram_block *b, size_t offset); - -/* - * Computes the size of a cram block, including the block - * header itself. - */ -HTSLIB_EXPORT -uint32_t cram_block_size(cram_block *b); - -/* - * Returns the Block Content ID values referred to by a cram_codec in - * ids[2]. - * - * -2 is unused. - * -1 is CORE - * >= 0 is the block with that Content ID - */ -HTSLIB_EXPORT -void cram_codec_get_content_ids(cram_codec *c, int ids[2]); - -/* - * Produces a human readable description of the codec parameters. - * This is appended to an existing kstring 'ks'. - * - * Returns 0 on succes, - * <0 on failure - */ -HTSLIB_EXPORT -int cram_codec_describe(cram_codec *c, kstring_t *ks); - -/* - * Renumbers RG numbers in a cram compression header. - * - * CRAM stores RG as the Nth number in the header, rather than a - * string holding the ID: tag. This is smaller in space, but means - * "samtools cat" to join files together that contain single but - * different RG lines needs a way of renumbering them. - * - * The file descriptor is expected to be immediately after the - * cram_container structure (ie before the cram compression header). - * Due to the nature of the CRAM format, this needs to read and write - * the blocks itself. Note that there may be multiple slices within - * the container, meaning multiple compression headers to manipulate. - * Changing RG may change the size of the compression header and - * therefore the length field in the container. Hence we rewrite all - * blocks just in case and also emit the adjusted container. - * - * The current implementation can only cope with renumbering a single - * RG (and only then if it is using HUFFMAN or BETA codecs). In - * theory it *may* be possible to renumber multiple RGs if they use - * HUFFMAN to the CORE block or use an external block unshared by any - * other data series. So we have an API that can be upgraded to - * support this, but do not implement it for now. An example - * implementation of RG as an EXTERNAL block would be to find that - * block and rewrite it, returning the number of blocks consumed. - * - * Returns 0 on success; - * -1 if unable to edit; - * -2 on other errors (eg I/O). - */ -HTSLIB_EXPORT -int cram_transcode_rg(cram_fd *in, cram_fd *out, - cram_container *c, - int nrg, int *in_rg, int *out_rg); - -/* - * Copies the blocks representing the next num_slice slices from a - * container from 'in' to 'out'. It is expected that the file pointer - * is just after the read of the cram_container and cram compression - * header. - * - * Returns 0 on success - * -1 on failure - */ -HTSLIB_EXPORT -int cram_copy_slice(cram_fd *in, cram_fd *out, int32_t num_slice); - -/* - * Decodes a CRAM block compression header. - * Returns header ptr on success - * NULL on failure - */ -HTSLIB_EXPORT -cram_block_compression_hdr *cram_decode_compression_header(cram_fd *fd, - cram_block *b); -/* - * Frees a cram_block_compression_hdr structure. - */ -HTSLIB_EXPORT -void cram_free_compression_header(cram_block_compression_hdr *hdr); - -typedef struct cram_cid2ds_t cram_cid2ds_t; - -/* - * Map cram block numbers to data-series. It's normally a 1:1 mapping, - * but in rare cases it can be 1:many (or even many:many). - * The key is the block number and the value is an index into the data-series - * array, which we iterate over until reaching a negative value. - * - * Provide cid2ds as NULL to allocate a new map or pass in an existing one - * to append to this map. The new (or existing) map is returned. - * - * Returns the cid2ds (newly allocated or as provided) on success, - * NULL on failure. - */ -HTSLIB_EXPORT -cram_cid2ds_t *cram_update_cid2ds_map(cram_block_compression_hdr *hdr, - cram_cid2ds_t *cid2ds); - -/* - * Return a list of data series observed as belonging to a block with - * the specified content_id. *n is the number of data series - * returned, or 0 if block is unused. - * Block content_id of -1 is used to indicate the CORE block. - * - * The pointer returned is owned by the cram_cid2ds state and should - * not be freed by the caller. - */ -HTSLIB_EXPORT -int *cram_cid2ds_query(cram_cid2ds_t *c2d, int content_id, int *n); - -/* - * Frees a cram_cid2ds_t allocated by cram_update_cid2ds_map - */ -HTSLIB_EXPORT -void cram_cid2ds_free(cram_cid2ds_t *cid2ds); - -/* - * Produces a description of the record and tag encodings held within - * a compression header and appends to 'ks'. - * - * Returns 0 on success, - * <0 on failure. - */ -HTSLIB_EXPORT -int cram_describe_encodings(cram_block_compression_hdr *hdr, kstring_t *ks); - -/* - *----------------------------------------------------------------------------- - * cram slice interrogation - */ - -/* - * Returns the number of cram blocks within this slice. - */ -HTSLIB_EXPORT -int32_t cram_slice_hdr_get_num_blocks(cram_block_slice_hdr *hdr); - -/* - * Returns the block content_id for the block containing an embedded reference - * sequence. If none is present, -1 is returned. - */ -HTSLIB_EXPORT -int cram_slice_hdr_get_embed_ref_id(cram_block_slice_hdr *h); - -/* - * Returns slice reference ID, start and span (length) coordinates. - * Return parameters may be NULL in which case they are ignored. - */ -HTSLIB_EXPORT -void cram_slice_hdr_get_coords(cram_block_slice_hdr *h, - int *refid, hts_pos_t *start, hts_pos_t *span); - -/* - * Decodes a slice header from a cram block. - * Returns the opaque cram_block_slice_hdr pointer on success, - * NULL on failure. - */ -HTSLIB_EXPORT -cram_block_slice_hdr *cram_decode_slice_header(cram_fd *fd, cram_block *b); - -/* - * Frees a cram_block_slice_hdr structure. - */ -HTSLIB_EXPORT -void cram_free_slice_header(cram_block_slice_hdr *hdr); - -/* - *----------------------------------------------------------------------------- - * cram_io basics - */ - -/**@{ ---------------------------------------------------------------------- - * CRAM blocks - the dynamically growable data block. We have code to - * create, update, (un)compress and read/write. - * - * These are derived from the deflate_interlaced.c blocks, but with the - * CRAM extension of content types and IDs. - */ - -/*! Allocates a new cram_block structure with a specified content_type and - * id. - * - * @return - * Returns block pointer on success; - * NULL on failure - * - * The cram_block struct returned by a successful call should be freed - * via cram_free_block() when it is no longer needed. - */ -HTSLIB_EXPORT -cram_block *cram_new_block(enum cram_content_type content_type, - int content_id); - -/*! Reads a block from a cram file. - * - * @return - * Returns cram_block pointer on success; - * NULL on failure - * - * The cram_block struct returned by a successful call should be freed - * via cram_free_block() when it is no longer needed. - */ -HTSLIB_EXPORT -cram_block *cram_read_block(cram_fd *fd); - -/*! Writes a CRAM block. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -HTSLIB_EXPORT -int cram_write_block(cram_fd *fd, cram_block *b); - -/*! Frees a CRAM block, deallocating internal data too. - */ -HTSLIB_EXPORT -void cram_free_block(cram_block *b); - -/*! Uncompresses a CRAM block, if compressed. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -HTSLIB_EXPORT -int cram_uncompress_block(cram_block *b); - -/*! Compresses a block. - * - * Compresses a block using one of two different zlib strategies. If we only - * want one choice set strat2 to be -1. - * - * The logic here is that sometimes Z_RLE does a better job than Z_FILTERED - * or Z_DEFAULT_STRATEGY on quality data. If so, we'd rather use it as it is - * significantly faster. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -HTSLIB_EXPORT -int cram_compress_block(cram_fd *fd, cram_block *b, cram_metrics *metrics, - int method, int level); -int cram_compress_block2(cram_fd *fd, cram_slice *s, - cram_block *b, cram_metrics *metrics, - int method, int level); - -/**@}*/ -/**@{ ---------------------------------------------------------------------- - * Containers - */ - -/*! Creates a new container, specifying the maximum number of slices - * and records permitted. - * - * @return - * Returns cram_container ptr on success; - * NULL on failure - * - * The cram_container struct returned by a successful call should be freed - * via cram_free_container() when it is no longer needed. - */ -HTSLIB_EXPORT -cram_container *cram_new_container(int nrec, int nslice); -HTSLIB_EXPORT -void cram_free_container(cram_container *c); - -/*! Reads a container header. - * - * @return - * Returns cram_container on success; - * NULL on failure or no container left (fd->err == 0). - * - * The cram_container struct returned by a successful call should be freed - * via cram_free_container() when it is no longer needed. - */ -HTSLIB_EXPORT -cram_container *cram_read_container(cram_fd *fd); - -/*! Writes a container structure. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -HTSLIB_EXPORT -int cram_write_container(cram_fd *fd, cram_container *h); - -/* - * Stores the container structure in dat and returns *size as the - * number of bytes written to dat[]. The input size of dat is also - * held in *size and should be initialised to cram_container_size(c). - * - * Returns 0 on success; - * -1 on failure - */ -HTSLIB_EXPORT -int cram_store_container(cram_fd *fd, cram_container *c, char *dat, int *size); - -HTSLIB_EXPORT -int cram_container_size(cram_container *c); - -/**@}*/ -/**@{ ---------------------------------------------------------------------- - * The top-level cram opening, closing and option handling - */ - -/*! Opens a CRAM file for read (mode "rb") or write ("wb"). - * - * The filename may be "-" to indicate stdin or stdout. - * - * @return - * Returns file handle on success; - * NULL on failure. - */ -HTSLIB_EXPORT -cram_fd *cram_open(const char *filename, const char *mode); - -/*! Opens an existing stream for reading or writing. - * - * @return - * Returns file handle on success; - * NULL on failure. - */ -HTSLIB_EXPORT -cram_fd *cram_dopen(struct hFILE *fp, const char *filename, const char *mode); - -/*! Closes a CRAM file. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -HTSLIB_EXPORT -int cram_close(cram_fd *fd); - -/* - * Seek within a CRAM file. - * - * Returns 0 on success - * -1 on failure - */ -HTSLIB_EXPORT -int cram_seek(cram_fd *fd, off_t offset, int whence); - -/* - * Flushes a CRAM file. - * Useful for when writing to stdout without wishing to close the stream. - * - * Returns 0 on success - * -1 on failure - */ -HTSLIB_EXPORT -int cram_flush(cram_fd *fd); - -/*! Checks for end of file on a cram_fd stream. - * - * @return - * Returns 0 if not at end of file - * 1 if we hit an expected EOF (end of range or EOF block) - * 2 for other EOF (end of stream without EOF block) - */ -HTSLIB_EXPORT -int cram_eof(cram_fd *fd); - -/*! Sets options on the cram_fd. - * - * See CRAM_OPT_* definitions in hts.h. - * Use this immediately after opening. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -HTSLIB_EXPORT -int cram_set_option(cram_fd *fd, enum hts_fmt_option opt, ...); - -/*! Sets options on the cram_fd. - * - * See CRAM_OPT_* definitions in hts.h. - * Use this immediately after opening. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -HTSLIB_EXPORT -int cram_set_voption(cram_fd *fd, enum hts_fmt_option opt, va_list args); - -/*! - * Attaches a header to a cram_fd. - * - * This should be used when creating a new cram_fd for writing where - * we have an SAM_hdr already constructed (eg from a file we've read - * in). - * - * @return - * Returns 0 on success; - * -1 on failure - */ -HTSLIB_EXPORT -int cram_set_header(cram_fd *fd, sam_hdr_t *hdr); - -/*! Check if this file has a proper EOF block - * - * @return - * Returns 3 if the file is a version of CRAM that does not contain EOF blocks - * 2 if the file is a stream and thus unseekable - * 1 if the file contains an EOF block - * 0 if the file does not contain an EOF block - * -1 if an error occurred whilst reading the file or we could not seek back to where we were - * - */ -HTSLIB_EXPORT -int cram_check_EOF(cram_fd *fd); - -/* As int32_decoded/encode, but from/to blocks instead of cram_fd */ -HTSLIB_EXPORT -int int32_put_blk(cram_block *b, int32_t val); - -/**@}*/ -/**@{ ------------------------------------------------------------------- - * Old typedef and function names for compatibility with existing code. - * Header functionality is now provided by sam.h's sam_hdr_t functions. - */ - -typedef sam_hdr_t SAM_hdr; - -/*! Tokenises a SAM header into a hash table. - * - * Also extracts a few bits on specific data types, such as @RG lines. - * - * @return - * Returns a SAM_hdr struct on success (free with sam_hdr_free()); - * NULL on failure - */ -static inline SAM_hdr *sam_hdr_parse_(const char *hdr, size_t len) { return sam_hdr_parse(len, hdr); } - -/*! Deallocates all storage used by a SAM_hdr struct. - * - * This also decrements the header reference count. If after decrementing - * it is still non-zero then the header is assumed to be in use by another - * caller and the free is not done. - */ -static inline void sam_hdr_free(SAM_hdr *hdr) { sam_hdr_destroy(hdr); } - -/* sam_hdr_length() and sam_hdr_str() are now provided by sam.h. */ - -/*! Add an @PG line. - * - * If we wish complete control over this use sam_hdr_add_line() directly. This - * function uses that, but attempts to do a lot of tedious house work for - * you too. - * - * - It will generate a suitable ID if the supplied one clashes. - * - It will generate multiple @PG records if we have multiple PG chains. - * - * Call it as per sam_hdr_add_line() with a series of key,value pairs ending - * in NULL. - * - * @return - * Returns 0 on success; - * -1 on failure - */ -#define sam_hdr_add_PG sam_hdr_add_pg - -/**@{ -------------------------------------------------------------------*/ - -/*! - * Returns the refs_t structure used by a cram file handle. - * - * This may be used in conjunction with option CRAM_OPT_SHARED_REF to - * share reference memory between multiple file handles. - * - * @return - * Returns NULL if none exists or the file handle is not a CRAM file. - */ -HTSLIB_EXPORT -refs_t *cram_get_refs(htsFile *fd); - -/**@}*/ - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.19.1/htslib/faidx.h b/src/htslib-1.19.1/htslib/faidx.h deleted file mode 100644 index 4351b3f..0000000 --- a/src/htslib-1.19.1/htslib/faidx.h +++ /dev/null @@ -1,391 +0,0 @@ -/// @file htslib/faidx.h -/// FASTA random access. -/* - Copyright (C) 2008, 2009, 2013, 2014, 2016, 2017-2020, 2022-2023 Genome Research Ltd. - - Author: Heng Li - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#ifndef HTSLIB_FAIDX_H -#define HTSLIB_FAIDX_H - -#include -#include "hts_defs.h" -#include "hts.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/** @file - - Index FASTA or FASTQ files and extract subsequence. - - The fai file index columns for FASTA are: - - chromosome name - - chromosome length: number of bases - - offset: number of bytes to skip to get to the first base - from the beginning of the file, including the length - of the sequence description string (`>chr ..\n`) - - line length: number of bases per line (excluding `\n`) - - binary line length: number of bytes, including `\n` - - The index for FASTQ is similar to above: - - chromosome name - - chromosome length: number of bases - - sequence offset: number of bytes to skip to get to the first base - from the beginning of the file, including the length - of the sequence description string (`@chr ..\n`) - - line length: number of bases per line (excluding `\n`) - - binary line length: number of bytes, including `\n` - - quality offset: number of bytes to skip from the beginning of the file - to get to the first quality value in the indexed entry. - - The FASTQ version of the index uses line length and binary line length - for both the sequence and the quality values, so they must be line - wrapped in the same way. - */ - -struct faidx_t; -/// Opaque structure representing FASTA index -typedef struct faidx_t faidx_t; - -/// Opaque structure; sole item needed from htslib/thread_pool.h -struct hts_tpool; - -/// File format to be dealing with. -enum fai_format_options { - FAI_NONE, - FAI_FASTA, - FAI_FASTQ -}; - -/// Build index for a FASTA or FASTQ or bgzip-compressed FASTA or FASTQ file. -/** @param fn FASTA/FASTQ file name - @param fnfai Name of .fai file to build. - @param fngzi Name of .gzi file to build (if fn is bgzip-compressed). - @return 0 on success; or -1 on failure - -If fnfai is NULL, ".fai" will be appended to fn to make the FAI file name. -If fngzi is NULL, ".gzi" will be appended to fn for the GZI file. The GZI -file will only be built if fn is bgzip-compressed. -*/ -HTSLIB_EXPORT -int fai_build3(const char *fn, const char *fnfai, const char *fngzi) HTS_RESULT_USED; - -/// Build index for a FASTA or FASTQ or bgzip-compressed FASTA or FASTQ file. -/** @param fn FASTA/FASTQ file name - @return 0 on success; or -1 on failure - -File "fn.fai" will be generated. This function is equivalent to -fai_build3(fn, NULL, NULL); -*/ -HTSLIB_EXPORT -int fai_build(const char *fn) HTS_RESULT_USED; - -/// Destroy a faidx_t struct -HTSLIB_EXPORT -void fai_destroy(faidx_t *fai); - -enum fai_load_options { - FAI_CREATE = 0x01, -}; - -/// Load FASTA indexes. -/** @param fn File name of the FASTA file (can be compressed with bgzip). - @param fnfai File name of the FASTA index. - @param fngzi File name of the bgzip index. - @param flags Option flags to control index file caching and creation. - @return Pointer to a faidx_t struct on success, NULL on failure. - -If fnfai is NULL, ".fai" will be appended to fn to make the FAI file name. -If fngzi is NULL, ".gzi" will be appended to fn for the bgzip index name. -The bgzip index is only needed if fn is compressed. - -If (flags & FAI_CREATE) is true, the index files will be built using -fai_build3() if they are not already present. - -The struct returned by a successful call should be freed via fai_destroy() -when it is no longer needed. -*/ -HTSLIB_EXPORT -faidx_t *fai_load3(const char *fn, const char *fnfai, const char *fngzi, - int flags); - -/// Load index from "fn.fai". -/** @param fn File name of the FASTA file - @return Pointer to a faidx_t struct on success, NULL on failure. - -This function is equivalent to fai_load3(fn, NULL, NULL, FAI_CREATE|FAI_CACHE); -*/ -HTSLIB_EXPORT -faidx_t *fai_load(const char *fn); - -/// Load FASTA or FASTQ indexes. -/** @param fn File name of the FASTA/FASTQ file (can be compressed with bgzip). - @param fnfai File name of the FASTA/FASTQ index. - @param fngzi File name of the bgzip index. - @param flags Option flags to control index file caching and creation. - @param format FASTA or FASTQ file format - @return Pointer to a faidx_t struct on success, NULL on failure. - -If fnfai is NULL, ".fai" will be appended to fn to make the FAI file name. -If fngzi is NULL, ".gzi" will be appended to fn for the bgzip index name. -The bgzip index is only needed if fn is compressed. - -If (flags & FAI_CREATE) is true, the index files will be built using -fai_build3() if they are not already present. - -The struct returned by a successful call should be freed via fai_destroy() -when it is no longer needed. -*/ -HTSLIB_EXPORT -faidx_t *fai_load3_format(const char *fn, const char *fnfai, const char *fngzi, - int flags, enum fai_format_options format); - -/// Load index from "fn.fai". -/** @param fn File name of the FASTA/FASTQ file - @param format FASTA or FASTQ file format - @return Pointer to a faidx_t struct on success, NULL on failure. - -This function is equivalent to fai_load3_format(fn, NULL, NULL, FAI_CREATE|FAI_CACHE, format); -*/ -HTSLIB_EXPORT -faidx_t *fai_load_format(const char *fn, enum fai_format_options format); - -/// Fetch the sequence in a region -/** @param fai Pointer to the faidx_t struct - @param reg Region in the format "chr2:20,000-30,000" - @param len Length of the region; -2 if seq not present, -1 general error - @return Pointer to the sequence; `NULL` on failure - -The returned sequence is allocated by `malloc()` family and should be destroyed -by end users by calling `free()` on it. - -To work around ambiguous parsing issues, eg both "chr1" and "chr1:100-200" -are reference names, quote using curly braces. -Thus "{chr1}:100-200" and "{chr1:100-200}" disambiguate the above example. -*/ -HTSLIB_EXPORT -char *fai_fetch(const faidx_t *fai, const char *reg, int *len); -HTSLIB_EXPORT -char *fai_fetch64(const faidx_t *fai, const char *reg, hts_pos_t *len); - -/// Query the line-wrap length for a chromosome specified as part of a region -/** @param fai Pointer to the faidx_t struct - @param reg Region in the format "chr2:20,000-30,000" - @return The line length (excluding newline), - negative on error. -*/ -HTSLIB_EXPORT -hts_pos_t fai_line_length(const faidx_t *fai, const char *reg); - -/// Fetch the quality string for a region for FASTQ files -/** @param fai Pointer to the faidx_t struct - @param reg Region in the format "chr2:20,000-30,000" - @param len Length of the region; -2 if seq not present, -1 general error - @return Pointer to the quality string; null on failure - -The returned quality string is allocated by `malloc()` family and should be -destroyed by end users by calling `free()` on it. - -Region names can be quoted with curly braces, as for fai_fetch(). -*/ -HTSLIB_EXPORT -char *fai_fetchqual(const faidx_t *fai, const char *reg, int *len); -HTSLIB_EXPORT -char *fai_fetchqual64(const faidx_t *fai, const char *reg, hts_pos_t *len); - -/// Fetch the number of sequences -/** @param fai Pointer to the faidx_t struct - @return The number of sequences -*/ -HTSLIB_EXPORT -int faidx_fetch_nseq(const faidx_t *fai) HTS_DEPRECATED("Please use faidx_nseq instead"); - -/// Fetch the sequence in a region -/** @param fai Pointer to the faidx_t struct - @param c_name Region name - @param p_beg_i Beginning position number (zero-based) - @param p_end_i End position number (zero-based) - @param len Length of the region; -2 if c_name not present, -1 general error - @return Pointer to the sequence; null on failure - -The returned sequence is allocated by `malloc()` family and should be destroyed -by end users by calling `free()` on it. -*/ -HTSLIB_EXPORT -char *faidx_fetch_seq(const faidx_t *fai, const char *c_name, int p_beg_i, int p_end_i, int *len); - -/// Fetch the sequence in a region -/** @param fai Pointer to the faidx_t struct - @param c_name Region name - @param p_beg_i Beginning position number (zero-based) - @param p_end_i End position number (zero-based) - @param len Length of the region; -2 if c_name not present, -1 general error - @return Pointer to the sequence; null on failure - -The returned sequence is allocated by `malloc()` family and should be destroyed -by end users by calling `free()` on it. -*/ -HTSLIB_EXPORT -char *faidx_fetch_seq64(const faidx_t *fai, const char *c_name, hts_pos_t p_beg_i, hts_pos_t p_end_i, hts_pos_t *len); - -/// Fetch the quality string in a region for FASTQ files -/** @param fai Pointer to the faidx_t struct - @param c_name Region name - @param p_beg_i Beginning position number (zero-based) - @param p_end_i End position number (zero-based) - @param len Length of the region; -2 if c_name not present, -1 general error - @return Pointer to the sequence; null on failure - -The returned sequence is allocated by `malloc()` family and should be destroyed -by end users by calling `free()` on it. -*/ -HTSLIB_EXPORT -char *faidx_fetch_qual(const faidx_t *fai, const char *c_name, int p_beg_i, int p_end_i, int *len); - -/// Fetch the quality string in a region for FASTQ files -/** @param fai Pointer to the faidx_t struct - @param c_name Region name - @param p_beg_i Beginning position number (zero-based) - @param p_end_i End position number (zero-based) - @param len Length of the region; -2 if c_name not present, -1 general error - @return Pointer to the sequence; null on failure - -The returned sequence is allocated by `malloc()` family and should be destroyed -by end users by calling `free()` on it. -*/ -HTSLIB_EXPORT -char *faidx_fetch_qual64(const faidx_t *fai, const char *c_name, hts_pos_t p_beg_i, hts_pos_t p_end_i, hts_pos_t *len); - -/// Query if sequence is present -/** @param fai Pointer to the faidx_t struct - @param seq Sequence name - @return 1 if present or 0 if absent -*/ -HTSLIB_EXPORT -int faidx_has_seq(const faidx_t *fai, const char *seq); - -/// Return number of sequences in fai index -HTSLIB_EXPORT -int faidx_nseq(const faidx_t *fai); - -/// Return name of i-th sequence -HTSLIB_EXPORT -const char *faidx_iseq(const faidx_t *fai, int i); - -/// Return sequence length -/** @param fai Pointer to the faidx_t struct - @param seq Name of the sequence - @return Sequence length, or -1 if not present -*/ -HTSLIB_EXPORT -hts_pos_t faidx_seq_len64(const faidx_t *fai, const char *seq); - -/// Return sequence length -/** @param fai Pointer to the faidx_t struct - @param seq Name of the sequence - @return Sequence length, or -1 if not present - - @deprecated This funtion cannot handle very long sequences. - Use faidx_seq_len64() instead. -*/ -HTSLIB_EXPORT -int faidx_seq_len(const faidx_t *fai, const char *seq); - -/// Parses a region string. -/** @param fai Pointer to the faidx_t struct - @param s Region string - @param tid Returns which i-th sequence is described in the region. - @param beg Returns the start of the region (0 based) - @param end Returns the one past last of the region (0 based) - @param flags Parsing method, see HTS_PARSE_* in hts.h. - @return Pointer to end of parsed s if successful, NULL if not. - - To work around ambiguous parsing issues, eg both "chr1" and "chr1:100-200" - are reference names, quote using curly braces. - Thus "{chr1}:100-200" and "{chr1:100-200}" disambiguate the above example. -*/ -HTSLIB_EXPORT -const char *fai_parse_region(const faidx_t *fai, const char *s, - int *tid, hts_pos_t *beg, hts_pos_t *end, - int flags); - -/// Adjust region to the actual sequence length -/** @param fai Pointer to the faidx_t struct - @param tid Sequence index, as returned by fai_parse_region() - @param beg[in,out] The start of the region (0 based) - @param end[in,out] One past end of the region (0 based) - @return 1, 2, or 3 if @p beg, @p end, or both are adjusted, - 0 if @p beg and @p end are unchanged - -1 on error - - Looks up the length of @p tid, and then adjusts the values of @p beg - and @p end if they fall outside the boundaries of the sequence. - - If @p beg > @p end, it will be set to @p end. - - The return value indicates which, if any, of the inputs have been - adjusted. -1 will be returned if @p tid is not a valid sequence index. -*/ -HTSLIB_EXPORT -int fai_adjust_region(const faidx_t *fai, int tid, - hts_pos_t *beg, hts_pos_t *end); - -/// Sets the cache size of the underlying BGZF compressed file -/** @param fai Pointer to the faidx_t struct - * @param cache_size Selected cache size in bytes - */ -HTSLIB_EXPORT -void fai_set_cache_size(faidx_t *fai, int cache_size); - -/// Adds a thread pool to the underlying BGZF layer. -/** @param fai FAI file handler - * @param pool The thread pool (see hts_create_threads) - * @param qsize The size of the job queue. If 0 this is twice the - * number of threads in the pool. - */ -HTSLIB_EXPORT -int fai_thread_pool(faidx_t *fai, struct hts_tpool *pool, int qsize); - -/// Determines the path to the reference index file -/** @param fa String with the path to the reference file - * @return String with the path to the reference index file, or NULL on failure - - If the reference path has the format reference.fa##idx##index.fa.fai, - the index path is taken directly from it as index.fa.fai. - If the reference file is local and the index file cannot be found, it - will be created alongside the reference file. - If the reference file is remote and the index file cannot be found, - the method returns NULL. - - The returned string has to be freed by the user at the end of its scope. - */ -HTSLIB_EXPORT -char *fai_path(const char *fa); -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.19.1/htslib/hts.h b/src/htslib-1.19.1/htslib/hts.h deleted file mode 100644 index ef39330..0000000 --- a/src/htslib-1.19.1/htslib/hts.h +++ /dev/null @@ -1,1581 +0,0 @@ -/// @file htslib/hts.h -/// Format-neutral I/O, indexing, and iterator API functions. -/* - Copyright (C) 2012-2022 Genome Research Ltd. - Copyright (C) 2010, 2012 Broad Institute. - Portions copyright (C) 2003-2006, 2008-2010 by Heng Li - - Author: Heng Li - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#ifndef HTSLIB_HTS_H -#define HTSLIB_HTS_H - -#include -#include -#include - -#include "hts_defs.h" -#include "hts_log.h" -#include "kstring.h" -#include "kroundup.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Separator used to split HTS_PATH (for plugins); REF_PATH (cram references) -#if defined(_WIN32) || defined(__MSYS__) -#define HTS_PATH_SEPARATOR_CHAR ';' -#define HTS_PATH_SEPARATOR_STR ";" -#else -#define HTS_PATH_SEPARATOR_CHAR ':' -#define HTS_PATH_SEPARATOR_STR ":" -#endif - -#ifndef HTS_BGZF_TYPEDEF -typedef struct BGZF BGZF; -#define HTS_BGZF_TYPEDEF -#endif -struct cram_fd; -struct hFILE; -struct hts_tpool; -struct sam_hdr_t; - -/** - * @hideinitializer - * Deprecated macro to expand a dynamic array of a given type - * - * @param type_t The type of the array elements - * @param[in] n Requested number of elements of type type_t - * @param[in,out] m Size of memory allocated - * @param[in,out] ptr Pointer to the array - * - * @discussion - * Do not use this macro. Use hts_resize() instead as allows allocation - * failures to be handled more gracefully. - * - * The array *ptr will be expanded if necessary so that it can hold @p n - * or more elements. If the array is expanded then the new size will be - * written to @p m and the value in @p ptr may change. - * - * It must be possible to take the address of @p ptr and @p m must be usable - * as an lvalue. - * - * @bug - * If the memory allocation fails, this will call exit(1). This is - * not ideal behaviour in a library. - */ -#define hts_expand(type_t, n, m, ptr) do { \ - if ((n) > (m)) { \ - size_t hts_realloc_or_die(size_t, size_t, size_t, size_t, \ - int, void **, const char *); \ - (m) = hts_realloc_or_die((n) >= 1 ? (n) : 1, (m), sizeof(m), \ - sizeof(type_t), 0, \ - (void **)&(ptr), __func__); \ - } \ - } while (0) - -/** - * @hideinitializer - * Macro to expand a dynamic array, zeroing any newly-allocated memory - * - * @param type_t The type of the array elements - * @param[in] n Requested number of elements of type type_t - * @param[in,out] m Size of memory allocated - * @param[in,out] ptr Pointer to the array - * - * @discussion - * Do not use this macro. Use hts_resize() instead as allows allocation - * failures to be handled more gracefully. - * - * As for hts_expand(), except the bytes that make up the array elements - * between the old and new values of @p m are set to zero using memset(). - * - * @bug - * If the memory allocation fails, this will call exit(1). This is - * not ideal behaviour in a library. - */ - - -#define hts_expand0(type_t, n, m, ptr) do { \ - if ((n) > (m)) { \ - size_t hts_realloc_or_die(size_t, size_t, size_t, size_t, \ - int, void **, const char *); \ - (m) = hts_realloc_or_die((n) >= 1 ? (n) : 1, (m), sizeof(m), \ - sizeof(type_t), 1, \ - (void **)&(ptr), __func__); \ - } \ - } while (0) - -// For internal use (by hts_resize()) only -HTSLIB_EXPORT -int hts_resize_array_(size_t, size_t, size_t, void *, void **, int, - const char *); - -#define HTS_RESIZE_CLEAR 1 - -/** - * @hideinitializer - * Macro to expand a dynamic array of a given type - * - * @param type_t The type of the array elements - * @param[in] num Requested number of elements of type type_t - * @param[in,out] size_ptr Pointer to where the size (in elements) of the - array is stored. - * @param[in,out] ptr Location of the pointer to the array - * @param[in] flags Option flags - * - * @return 0 for success, or negative if an error occurred. - * - * @discussion - * The array *ptr will be expanded if necessary so that it can hold @p num - * or more elements. If the array is expanded then the new size will be - * written to @p *size_ptr and the value in @p *ptr may change. - * - * If ( @p flags & HTS_RESIZE_CLEAR ) is set, any newly allocated memory will - * be cleared. - */ - -#define hts_resize(type_t, num, size_ptr, ptr, flags) \ - ((num) > (*(size_ptr)) \ - ? hts_resize_array_(sizeof(type_t), (num), \ - sizeof(*(size_ptr)), (size_ptr), \ - (void **)(ptr), (flags), __func__) \ - : 0) - -/// Release resources when dlclosing a dynamically loaded HTSlib -/** @discussion - * Normally HTSlib cleans up automatically when your program exits, - * whether that is via exit(3) or returning from main(). However if you - * have dlopen(3)ed HTSlib and wish to close it before your main program - * exits, you must call hts_lib_shutdown() before dlclose(3). -*/ -HTSLIB_EXPORT -void hts_lib_shutdown(void); - -/** - * Wrapper function for free(). Enables memory deallocation across DLL - * boundary. Should be used by all applications, which are compiled - * with a different standard library than htslib and call htslib - * methods that return dynamically allocated data. - */ -HTSLIB_EXPORT -void hts_free(void *ptr); - -/************ - * File I/O * - ************/ - -// Add new entries only at the end (but before the *_maximum entry) -// of these enums, as their numbering is part of the htslib ABI. - -enum htsFormatCategory { - unknown_category, - sequence_data, // Sequence data -- SAM, BAM, CRAM, etc - variant_data, // Variant calling data -- VCF, BCF, etc - index_file, // Index file associated with some data file - region_list, // Coordinate intervals or regions -- BED, etc - category_maximum = 32767 -}; - -enum htsExactFormat { - unknown_format, - binary_format, text_format, - sam, bam, bai, cram, crai, vcf, bcf, csi, gzi, tbi, bed, - htsget, - json HTS_DEPRECATED_ENUM("Use htsExactFormat 'htsget' instead") = htsget, - empty_format, // File is empty (or empty after decompression) - fasta_format, fastq_format, fai_format, fqi_format, - hts_crypt4gh_format, - d4_format, - format_maximum = 32767 -}; - -enum htsCompression { - no_compression, gzip, bgzf, custom, bzip2_compression, razf_compression, - xz_compression, zstd_compression, - compression_maximum = 32767 -}; - -typedef struct htsFormat { - enum htsFormatCategory category; - enum htsExactFormat format; - struct { short major, minor; } version; - enum htsCompression compression; - short compression_level; // currently unused - void *specific; // format specific options; see struct hts_opt. -} htsFormat; - -struct hts_idx_t; -typedef struct hts_idx_t hts_idx_t; -struct hts_filter_t; - -/** - * @brief File handle returned by hts_open() etc. - * This structure should be considered opaque by end users. There should be - * no need to access most fields directly in user code, and in cases where - * it is desirable accessor functions such as hts_get_format() are provided. - */ -// Maintainers note htsFile cannot be an incomplete struct because some of its -// fields are part of libhts.so's ABI (hence these fields must not be moved): -// - fp is used in the public sam_itr_next()/etc macros -// - is_bin is used directly in samtools <= 1.1 and bcftools <= 1.1 -// - is_write and is_cram are used directly in samtools <= 1.1 -// - fp is used directly in samtools (up to and including current develop) -// - line is used directly in bcftools (up to and including current develop) -// - is_bgzf and is_cram flags indicate which fp union member to use. -// Note is_bgzf being set does not indicate the flag is BGZF compressed, -// nor even whether it is compressed at all (eg on naked BAMs). -typedef struct htsFile { - uint32_t is_bin:1, is_write:1, is_be:1, is_cram:1, is_bgzf:1, dummy:27; - int64_t lineno; - kstring_t line; - char *fn, *fn_aux; - union { - BGZF *bgzf; - struct cram_fd *cram; - struct hFILE *hfile; - } fp; - void *state; // format specific state information - htsFormat format; - hts_idx_t *idx; - const char *fnidx; - struct sam_hdr_t *bam_header; - struct hts_filter_t *filter; -} htsFile; - -// A combined thread pool and queue allocation size. -// The pool should already be defined, but qsize may be zero to -// indicate an appropriate queue size is taken from the pool. -// -// Reasons for explicitly setting it could be where many more file -// descriptors are in use than threads, so keeping memory low is -// important. -typedef struct htsThreadPool { - struct hts_tpool *pool; // The shared thread pool itself - int qsize; // Size of I/O queue to use for this fp -} htsThreadPool; - -// REQUIRED_FIELDS -enum sam_fields { - SAM_QNAME = 0x00000001, - SAM_FLAG = 0x00000002, - SAM_RNAME = 0x00000004, - SAM_POS = 0x00000008, - SAM_MAPQ = 0x00000010, - SAM_CIGAR = 0x00000020, - SAM_RNEXT = 0x00000040, - SAM_PNEXT = 0x00000080, - SAM_TLEN = 0x00000100, - SAM_SEQ = 0x00000200, - SAM_QUAL = 0x00000400, - SAM_AUX = 0x00000800, - SAM_RGAUX = 0x00001000, -}; - -// Mostly CRAM only, but this could also include other format options -enum hts_fmt_option { - // CRAM specific - CRAM_OPT_DECODE_MD, - CRAM_OPT_PREFIX, - CRAM_OPT_VERBOSITY, // obsolete, use hts_set_log_level() instead - CRAM_OPT_SEQS_PER_SLICE, - CRAM_OPT_SLICES_PER_CONTAINER, - CRAM_OPT_RANGE, - CRAM_OPT_VERSION, // rename to cram_version? - CRAM_OPT_EMBED_REF, - CRAM_OPT_IGNORE_MD5, - CRAM_OPT_REFERENCE, // make general - CRAM_OPT_MULTI_SEQ_PER_SLICE, - CRAM_OPT_NO_REF, - CRAM_OPT_USE_BZIP2, - CRAM_OPT_SHARED_REF, - CRAM_OPT_NTHREADS, // deprecated, use HTS_OPT_NTHREADS - CRAM_OPT_THREAD_POOL,// make general - CRAM_OPT_USE_LZMA, - CRAM_OPT_USE_RANS, - CRAM_OPT_REQUIRED_FIELDS, - CRAM_OPT_LOSSY_NAMES, - CRAM_OPT_BASES_PER_SLICE, - CRAM_OPT_STORE_MD, - CRAM_OPT_STORE_NM, - CRAM_OPT_RANGE_NOSEEK, // CRAM_OPT_RANGE minus the seek - CRAM_OPT_USE_TOK, - CRAM_OPT_USE_FQZ, - CRAM_OPT_USE_ARITH, - CRAM_OPT_POS_DELTA, // force delta for AP, even on non-pos sorted data - - // General purpose - HTS_OPT_COMPRESSION_LEVEL = 100, - HTS_OPT_NTHREADS, - HTS_OPT_THREAD_POOL, - HTS_OPT_CACHE_SIZE, - HTS_OPT_BLOCK_SIZE, - HTS_OPT_FILTER, - HTS_OPT_PROFILE, - - // Fastq - - // Boolean. - // Read / Write CASAVA 1.8 format. - // See https://emea.support.illumina.com/content/dam/illumina-support/documents/documentation/software_documentation/bcl2fastq/bcl2fastq_letterbooklet_15038058brpmi.pdf - // - // The CASAVA tag matches \d:[YN]:\d+:[ACGTN]+ - // The first \d is read 1/2 (1 or 2), [YN] is QC-PASS/FAIL flag, - // \d+ is a control number, and the sequence at the end is - // for barcode sequence. Barcodes are read into the aux tag defined - // by FASTQ_OPT_BARCODE ("BC" by default). - FASTQ_OPT_CASAVA = 1000, - - // String. - // Whether to read / write extra SAM format aux tags from the fastq - // identifier line. For reading this can simply be "1" to request - // decoding aux tags. For writing it is a comma separated list of aux - // tag types to be written out. - FASTQ_OPT_AUX, - - // Boolean. - // Whether to add /1 and /2 to read identifiers when writing FASTQ. - // These come from the BAM_FREAD1 or BAM_FREAD2 flags. - // (Detecting the /1 and /2 is automatic when reading fastq.) - FASTQ_OPT_RNUM, - - // Two character string. - // Barcode aux tag for CASAVA; defaults to "BC". - FASTQ_OPT_BARCODE, - - // Process SRA and ENA read names which pointlessly move the original - // name to the second field and insert a constructed . - // name in its place. - FASTQ_OPT_NAME2, -}; - -// Profile options for encoding; primarily used at present in CRAM -// but also usable in BAM as a synonym for deflate compression levels. -enum hts_profile_option { - HTS_PROFILE_FAST, - HTS_PROFILE_NORMAL, - HTS_PROFILE_SMALL, - HTS_PROFILE_ARCHIVE, -}; - -// For backwards compatibility -#define cram_option hts_fmt_option - -typedef struct hts_opt { - char *arg; // string form, strdup()ed - enum hts_fmt_option opt; // tokenised key - union { // ... and value - int i; - char *s; - } val; - struct hts_opt *next; -} hts_opt; - -#define HTS_FILE_OPTS_INIT {{0},0} - -/* - * Explicit index file name delimiter, see below - */ -#define HTS_IDX_DELIM "##idx##" - - -/********************** - * Exported functions * - **********************/ - -/* - * Parses arg and appends it to the option list. - * - * Returns 0 on success; - * -1 on failure. - */ -HTSLIB_EXPORT -int hts_opt_add(hts_opt **opts, const char *c_arg); - -/* - * Applies an hts_opt option list to a given htsFile. - * - * Returns 0 on success - * -1 on failure - */ -HTSLIB_EXPORT -int hts_opt_apply(htsFile *fp, hts_opt *opts); - -/* - * Frees an hts_opt list. - */ -HTSLIB_EXPORT -void hts_opt_free(hts_opt *opts); - -/* - * Accepts a string file format (sam, bam, cram, vcf, bam) optionally - * followed by a comma separated list of key=value options and splits - * these up into the fields of htsFormat struct. - * - * Returns 0 on success - * -1 on failure. - */ -HTSLIB_EXPORT -int hts_parse_format(htsFormat *opt, const char *str); - -/* - * Tokenise options as (key(=value)?,)*(key(=value)?)? - * NB: No provision for ',' appearing in the value! - * Add backslashing rules? - * - * This could be used as part of a general command line option parser or - * as a string concatenated onto the file open mode. - * - * Returns 0 on success - * -1 on failure. - */ -HTSLIB_EXPORT -int hts_parse_opt_list(htsFormat *opt, const char *str); - -/*! @abstract Table for converting a nucleotide character to 4-bit encoding. -The input character may be either an IUPAC ambiguity code, '=' for 0, or -'0'/'1'/'2'/'3' for a result of 1/2/4/8. The result is encoded as 1/2/4/8 -for A/C/G/T or combinations of these bits for ambiguous bases. -*/ -HTSLIB_EXPORT -extern const unsigned char seq_nt16_table[256]; - -/*! @abstract Table for converting a 4-bit encoded nucleotide to an IUPAC -ambiguity code letter (or '=' when given 0). -*/ -HTSLIB_EXPORT -extern const char seq_nt16_str[]; - -/*! @abstract Table for converting a 4-bit encoded nucleotide to about 2 bits. -Returns 0/1/2/3 for 1/2/4/8 (i.e., A/C/G/T), or 4 otherwise (0 or ambiguous). -*/ -HTSLIB_EXPORT -extern const int seq_nt16_int[]; - -/*! - @abstract Get the htslib version number - @return For released versions, a string like "N.N[.N]"; or git describe - output if using a library built within a Git repository. -*/ -HTSLIB_EXPORT -const char *hts_version(void); - -/*! - @abstract Compile-time HTSlib version number, for use in #if checks - @return For released versions X.Y[.Z], an integer of the form XYYYZZ; - useful for preprocessor conditionals such as - #if HTS_VERSION >= 101000 // Check for v1.10 or later -*/ -// Maintainers: Bump this in the final stage of preparing a new release. -// Immediately after release, bump ZZ to 90 to distinguish in-development -// Git repository builds from the release; you may wish to increment this -// further when significant features are merged. -#define HTS_VERSION 101901 - -/*! @abstract Introspection on the features enabled in htslib - * - * @return a bitfield of HTS_FEATURE_* macros. - */ -HTSLIB_EXPORT -unsigned int hts_features(void); - -HTSLIB_EXPORT -const char *hts_test_feature(unsigned int id); - -/*! @abstract Introspection on the features enabled in htslib, string form - * - * @return a string describing htslib build features - */ -HTSLIB_EXPORT -const char *hts_feature_string(void); - -// Whether ./configure was used or vanilla Makefile -#define HTS_FEATURE_CONFIGURE 1 - -// Whether --enable-plugins was used -#define HTS_FEATURE_PLUGINS 2 - -// Transport specific -#define HTS_FEATURE_LIBCURL (1u<<10) -#define HTS_FEATURE_S3 (1u<<11) -#define HTS_FEATURE_GCS (1u<<12) - -// Compression options -#define HTS_FEATURE_LIBDEFLATE (1u<<20) -#define HTS_FEATURE_LZMA (1u<<21) -#define HTS_FEATURE_BZIP2 (1u<<22) -#define HTS_FEATURE_HTSCODECS (1u<<23) // htscodecs library version - -// Build params -#define HTS_FEATURE_CC (1u<<27) -#define HTS_FEATURE_CFLAGS (1u<<28) -#define HTS_FEATURE_CPPFLAGS (1u<<29) -#define HTS_FEATURE_LDFLAGS (1u<<30) - - -/*! - @abstract Determine format by peeking at the start of a file - @param fp File opened for reading, positioned at the beginning - @param fmt Format structure that will be filled out on return - @return 0 for success, or negative if an error occurred. - - Equivalent to hts_detect_format2(fp, NULL, fmt). -*/ -HTSLIB_EXPORT -int hts_detect_format(struct hFILE *fp, htsFormat *fmt); - -/*! - @abstract Determine format primarily by peeking at the start of a file - @param fp File opened for reading, positioned at the beginning - @param fname Name of the file, or NULL if not available - @param fmt Format structure that will be filled out on return - @return 0 for success, or negative if an error occurred. - @since 1.15 - -Some formats are only recognised if the filename is available and has the -expected extension, as otherwise more generic files may be misrecognised. -In particular: - - FASTA/Q indexes must have .fai/.fqi extensions; without this requirement, - some similar BED files would be misrecognised as indexes. -*/ -HTSLIB_EXPORT -int hts_detect_format2(struct hFILE *fp, const char *fname, htsFormat *fmt); - -/*! - @abstract Get a human-readable description of the file format - @param fmt Format structure holding type, version, compression, etc. - @return Description string, to be freed by the caller after use. -*/ -HTSLIB_EXPORT -char *hts_format_description(const htsFormat *format); - -/*! - @abstract Open a sequence data (SAM/BAM/CRAM) or variant data (VCF/BCF) - or possibly-compressed textual line-orientated file - @param fn The file name or "-" for stdin/stdout. For indexed files - with a non-standard naming, the file name can include the - name of the index file delimited with HTS_IDX_DELIM - @param mode Mode matching / [rwa][bcefFguxz0-9]* / - @discussion - With 'r' opens for reading; any further format mode letters are ignored - as the format is detected by checking the first few bytes or BGZF blocks - of the file. With 'w' or 'a' opens for writing or appending, with format - specifier letters: - b binary format (BAM, BCF, etc) rather than text (SAM, VCF, etc) - c CRAM format - f FASTQ format - F FASTA format - g gzip compressed - u uncompressed - z bgzf compressed - [0-9] zlib compression level - and with non-format option letters (for any of 'r'/'w'/'a'): - e close the file on exec(2) (opens with O_CLOEXEC, where supported) - x create the file exclusively (opens with O_EXCL, where supported) - Note that there is a distinction between 'u' and '0': the first yields - plain uncompressed output whereas the latter outputs uncompressed data - wrapped in the zlib format. - @example - [rw]b .. compressed BCF, BAM, FAI - [rw]bu .. uncompressed BCF - [rw]z .. compressed VCF - [rw] .. uncompressed VCF -*/ -HTSLIB_EXPORT -htsFile *hts_open(const char *fn, const char *mode); - -/*! - @abstract Open a SAM/BAM/CRAM/VCF/BCF/etc file - @param fn The file name or "-" for stdin/stdout - @param mode Open mode, as per hts_open() - @param fmt Optional format specific parameters - @discussion - See hts_open() for description of fn and mode. - // TODO Update documentation for s/opts/fmt/ - Opts contains a format string (sam, bam, cram, vcf, bcf) which will, - if defined, override mode. Opts also contains a linked list of hts_opt - structures to apply to the open file handle. These can contain things - like pointers to the reference or information on compression levels, - block sizes, etc. -*/ -HTSLIB_EXPORT -htsFile *hts_open_format(const char *fn, const char *mode, const htsFormat *fmt); - -/*! - @abstract Open an existing stream as a SAM/BAM/CRAM/VCF/BCF/etc file - @param fn The already-open file handle - @param mode Open mode, as per hts_open() -*/ -HTSLIB_EXPORT -htsFile *hts_hopen(struct hFILE *fp, const char *fn, const char *mode); - -/*! - @abstract For output streams, flush any buffered data - @param fp The file handle to be flushed - @return 0 for success, or negative if an error occurred. - @since 1.14 -*/ -HTSLIB_EXPORT -int hts_flush(htsFile *fp); - -/*! - @abstract Close a file handle, flushing buffered data for output streams - @param fp The file handle to be closed - @return 0 for success, or negative if an error occurred. -*/ -HTSLIB_EXPORT -int hts_close(htsFile *fp); - -/*! - @abstract Returns the file's format information - @param fp The file handle - @return Read-only pointer to the file's htsFormat. -*/ -HTSLIB_EXPORT -const htsFormat *hts_get_format(htsFile *fp); - -/*! - @ abstract Returns a string containing the file format extension. - @ param format Format structure containing the file type. - @ return A string ("sam", "bam", etc) or "?" for unknown formats. - */ -HTSLIB_EXPORT -const char *hts_format_file_extension(const htsFormat *format); - -/*! - @abstract Sets a specified CRAM option on the open file handle. - @param fp The file handle open the open file. - @param opt The CRAM_OPT_* option. - @param ... Optional arguments, dependent on the option used. - @return 0 for success, or negative if an error occurred. -*/ -HTSLIB_EXPORT -int hts_set_opt(htsFile *fp, enum hts_fmt_option opt, ...); - -/*! - @abstract Read a line (and its \n or \r\n terminator) from a file - @param fp The file handle - @param delimiter Unused, but must be '\n' (or KS_SEP_LINE) - @param str The line (not including the terminator) is written here - @return Length of the string read (capped at INT_MAX); - -1 on end-of-file; <= -2 on error -*/ -HTSLIB_EXPORT -int hts_getline(htsFile *fp, int delimiter, kstring_t *str); - -HTSLIB_EXPORT -char **hts_readlines(const char *fn, int *_n); -/*! - @abstract Parse comma-separated list or read list from a file - @param list File name or comma-separated list - @param is_file - @param _n Size of the output array (number of items read) - @return NULL on failure or pointer to newly allocated array of - strings -*/ -HTSLIB_EXPORT -char **hts_readlist(const char *fn, int is_file, int *_n); - -/*! - @abstract Create extra threads to aid compress/decompression for this file - @param fp The file handle - @param n The number of worker threads to create - @return 0 for success, or negative if an error occurred. - @notes This function creates non-shared threads for use solely by fp. - The hts_set_thread_pool function is the recommended alternative. -*/ -HTSLIB_EXPORT -int hts_set_threads(htsFile *fp, int n); - -/*! - @abstract Create extra threads to aid compress/decompression for this file - @param fp The file handle - @param p A pool of worker threads, previously allocated by hts_create_threads(). - @return 0 for success, or negative if an error occurred. -*/ -HTSLIB_EXPORT -int hts_set_thread_pool(htsFile *fp, htsThreadPool *p); - -/*! - @abstract Adds a cache of decompressed blocks, potentially speeding up seeks. - This may not work for all file types (currently it is bgzf only). - @param fp The file handle - @param n The size of cache, in bytes -*/ -HTSLIB_EXPORT -void hts_set_cache_size(htsFile *fp, int n); - -/*! - @abstract Set .fai filename for a file opened for reading - @return 0 for success, negative on failure - @discussion - Called before *_hdr_read(), this provides the name of a .fai file - used to provide a reference list if the htsFile contains no @SQ headers. -*/ -HTSLIB_EXPORT -int hts_set_fai_filename(htsFile *fp, const char *fn_aux); - - -/*! - @abstract Sets a filter expression - @return 0 for success, negative on failure - @discussion - To clear an existing filter, specifying expr as NULL. -*/ -HTSLIB_EXPORT -int hts_set_filter_expression(htsFile *fp, const char *expr); - -/*! - @abstract Determine whether a given htsFile contains a valid EOF block - @return 3 for a non-EOF checkable filetype; - 2 for an unseekable file type where EOF cannot be checked; - 1 for a valid EOF block; - 0 for if the EOF marker is absent when it should be present; - -1 (with errno set) on failure - @discussion - Check if the BGZF end-of-file (EOF) marker is present -*/ -HTSLIB_EXPORT -int hts_check_EOF(htsFile *fp); - -/************ - * Indexing * - ************/ - -/*! -These HTS_IDX_* macros are used as special tid values for hts_itr_query()/etc, -producing iterators operating as follows: - - HTS_IDX_NOCOOR iterates over unmapped reads sorted at the end of the file - - HTS_IDX_START iterates over the entire file - - HTS_IDX_REST iterates from the current position to the end of the file - - HTS_IDX_NONE always returns "no more alignment records" -When one of these special tid values is used, beg and end are ignored. -When REST or NONE is used, idx is also ignored and may be NULL. -*/ -#define HTS_IDX_NOCOOR (-2) -#define HTS_IDX_START (-3) -#define HTS_IDX_REST (-4) -#define HTS_IDX_NONE (-5) - -#define HTS_FMT_CSI 0 -#define HTS_FMT_BAI 1 -#define HTS_FMT_TBI 2 -#define HTS_FMT_CRAI 3 -#define HTS_FMT_FAI 4 - -// Almost INT64_MAX, but when cast into a 32-bit int it's -// also INT_MAX instead of -1. This avoids bugs with old code -// using the new hts_pos_t data type. -#define HTS_POS_MAX ((((int64_t)INT_MAX)<<32)|INT_MAX) -#define HTS_POS_MIN INT64_MIN -#define PRIhts_pos PRId64 -typedef int64_t hts_pos_t; - -// For comparison with previous release: -// -// #define HTS_POS_MAX INT_MAX -// #define HTS_POS_MIN INT_MIN -// #define PRIhts_pos PRId32 -// typedef int32_t hts_pos_t; - -typedef struct hts_pair_pos_t { - hts_pos_t beg, end; -} hts_pair_pos_t; - -typedef hts_pair_pos_t hts_pair32_t; // For backwards compatibility - -typedef struct hts_pair64_t { - uint64_t u, v; -} hts_pair64_t; - -typedef struct hts_pair64_max_t { - uint64_t u, v; - uint64_t max; -} hts_pair64_max_t; - -typedef struct hts_reglist_t { - const char *reg; - hts_pair_pos_t *intervals; - int tid; - uint32_t count; - hts_pos_t min_beg, max_end; -} hts_reglist_t; - -typedef int hts_readrec_func(BGZF *fp, void *data, void *r, int *tid, hts_pos_t *beg, hts_pos_t *end); -typedef int hts_seek_func(void *fp, int64_t offset, int where); -typedef int64_t hts_tell_func(void *fp); - -/** - * @brief File iterator that can handle multiple target regions. - * This structure should be considered opaque by end users. - * It does both the stepping inside the file and the filtering of alignments. - * It can operate in single or multi-region mode, and depending on this, - * it uses different fields. - * - * read_rest (1) - read everything from the current offset, without filtering - * finished (1) - no more iterations - * is_cram (1) - current file has CRAM format - * nocoor (1) - read all unmapped reads - * - * multi (1) - multi-region moode - * reg_list - List of target regions - * n_reg - Size of the above list - * curr_reg - List index of the current region of search - * curr_intv - Interval index inside the current region; points to a (beg, end) - * end - Used for CRAM files, to preserve the max end coordinate - * - * multi (0) - single-region mode - * tid - Reference id of the target region - * beg - Start position of the target region - * end - End position of the target region - * - * Common fields: - * off - List of file offsets computed from the index - * n_off - Size of the above list - * i - List index of the current file offset - * curr_off - File offset for the next file read - * curr_tid - Reference id of the current alignment - * curr_beg - Start position of the current alignment - * curr_end - End position of the current alignment - * nocoor_off - File offset where the unmapped reads start - * - * readrec - File specific function that reads an alignment - * seek - File specific function for changing the file offset - * tell - File specific function for indicating the file offset - */ - -typedef struct hts_itr_t { - uint32_t read_rest:1, finished:1, is_cram:1, nocoor:1, multi:1, dummy:27; - int tid, n_off, i, n_reg; - hts_pos_t beg, end; - hts_reglist_t *reg_list; - int curr_tid, curr_reg, curr_intv; - hts_pos_t curr_beg, curr_end; - uint64_t curr_off, nocoor_off; - hts_pair64_max_t *off; - hts_readrec_func *readrec; - hts_seek_func *seek; - hts_tell_func *tell; - struct { - int n, m; - int *a; - } bins; -} hts_itr_t; - -typedef hts_itr_t hts_itr_multi_t; - -/// Compute the first bin on a given level -#define hts_bin_first(l) (((1<<(((l)<<1) + (l))) - 1) / 7) -/// Compute the parent bin of a given bin -#define hts_bin_parent(b) (((b) - 1) >> 3) - -/////////////////////////////////////////////////////////// -// Low-level API for building indexes. - -/// Create a BAI/CSI/TBI type index structure -/** @param n Initial number of targets - @param fmt Format, one of HTS_FMT_CSI, HTS_FMT_BAI or HTS_FMT_TBI - @param offset0 Initial file offset - @param min_shift Number of bits for the minimal interval - @param n_lvls Number of levels in the binning index - @return An initialised hts_idx_t struct on success; NULL on failure - -The struct returned by a successful call should be freed via hts_idx_destroy() -when it is no longer needed. -*/ -HTSLIB_EXPORT -hts_idx_t *hts_idx_init(int n, int fmt, uint64_t offset0, int min_shift, int n_lvls); - -/// Free a BAI/CSI/TBI type index -/** @param idx Index structure to free - */ -HTSLIB_EXPORT -void hts_idx_destroy(hts_idx_t *idx); - -/// Push an index entry -/** @param idx Index - @param tid Target id - @param beg Range start (zero-based) - @param end Range end (zero-based, half-open) - @param offset File offset - @param is_mapped Range corresponds to a mapped read - @return 0 on success; -1 on failure - -The @p is_mapped parameter is used to update the n_mapped / n_unmapped counts -stored in the meta-data bin. - */ -HTSLIB_EXPORT -int hts_idx_push(hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end, uint64_t offset, int is_mapped); - -/// Finish building an index -/** @param idx Index - @param final_offset Last file offset - @return 0 on success; non-zero on failure. -*/ -HTSLIB_EXPORT -int hts_idx_finish(hts_idx_t *idx, uint64_t final_offset); - -/// Returns index format -/** @param idx Index - @return One of HTS_FMT_CSI, HTS_FMT_BAI or HTS_FMT_TBI -*/ -HTSLIB_EXPORT -int hts_idx_fmt(hts_idx_t *idx); - -/// Add name to TBI index meta-data -/** @param idx Index - @param tid Target identifier - @param name Target name - @return Index number of name in names list on success; -1 on failure. -*/ -HTSLIB_EXPORT -int hts_idx_tbi_name(hts_idx_t *idx, int tid, const char *name); - -// Index loading and saving - -/// Save an index to a file -/** @param idx Index to be written - @param fn Input BAM/BCF/etc filename, to which .bai/.csi/etc will be added - @param fmt One of the HTS_FMT_* index formats - @return 0 if successful, or negative if an error occurred. -*/ -HTSLIB_EXPORT -int hts_idx_save(const hts_idx_t *idx, const char *fn, int fmt) HTS_RESULT_USED; - -/// Save an index to a specific file -/** @param idx Index to be written - @param fn Input BAM/BCF/etc filename - @param fnidx Output filename, or NULL to add .bai/.csi/etc to @a fn - @param fmt One of the HTS_FMT_* index formats - @return 0 if successful, or negative if an error occurred. -*/ -HTSLIB_EXPORT -int hts_idx_save_as(const hts_idx_t *idx, const char *fn, const char *fnidx, int fmt) HTS_RESULT_USED; - -/// Load an index file -/** @param fn BAM/BCF/etc filename, to which .bai/.csi/etc will be added or - the extension substituted, to search for an existing index file. - In case of a non-standard naming, the file name can include the - name of the index file delimited with HTS_IDX_DELIM. - @param fmt One of the HTS_FMT_* index formats - @return The index, or NULL if an error occurred. - -If @p fn contains the string "##idx##" (HTS_IDX_DELIM), the part before -the delimiter will be used as the name of the data file and the part after -it will be used as the name of the index. - -Otherwise, this function tries to work out the index name as follows: - - It will try appending ".csi" to @p fn - It will try substituting an existing suffix (e.g. .bam, .vcf) with ".csi" - Then, if @p fmt is HTS_FMT_BAI: - It will try appending ".bai" to @p fn - To will substituting the existing suffix (e.g. .bam) with ".bai" - else if @p fmt is HTS_FMT_TBI: - It will try appending ".tbi" to @p fn - To will substituting the existing suffix (e.g. .vcf) with ".tbi" - -If the index file is remote (served over a protocol like https), first a check -is made to see is a locally cached copy is available. This is done for all -of the possible names listed above. If a cached copy is not available then -the index will be downloaded and stored in the current working directory, -with the same name as the remote index. - - Equivalent to hts_idx_load3(fn, NULL, fmt, HTS_IDX_SAVE_REMOTE); -*/ -HTSLIB_EXPORT -hts_idx_t *hts_idx_load(const char *fn, int fmt); - -/// Load a specific index file -/** @param fn Input BAM/BCF/etc filename - @param fnidx The input index filename - @return The index, or NULL if an error occurred. - - Equivalent to hts_idx_load3(fn, fnidx, 0, 0); - - This function will not attempt to save index files locally. -*/ -HTSLIB_EXPORT -hts_idx_t *hts_idx_load2(const char *fn, const char *fnidx); - -/// Load a specific index file -/** @param fn Input BAM/BCF/etc filename - @param fnidx The input index filename - @param fmt One of the HTS_FMT_* index formats - @param flags Flags to alter behaviour (see description) - @return The index, or NULL if an error occurred. - - If @p fnidx is NULL, the index name will be derived from @p fn in the - same way as hts_idx_load(). - - If @p fnidx is not NULL, @p fmt is ignored. - - The @p flags parameter can be set to a combination of the following - values: - - HTS_IDX_SAVE_REMOTE Save a local copy of any remote indexes - HTS_IDX_SILENT_FAIL Fail silently if the index is not present - - The index struct returned by a successful call should be freed - via hts_idx_destroy() when it is no longer needed. -*/ -HTSLIB_EXPORT -hts_idx_t *hts_idx_load3(const char *fn, const char *fnidx, int fmt, int flags); - -/// Flags for hts_idx_load3() ( and also sam_idx_load3(), tbx_idx_load3() ) -#define HTS_IDX_SAVE_REMOTE 1 -#define HTS_IDX_SILENT_FAIL 2 - -/////////////////////////////////////////////////////////// -// Functions for accessing meta-data stored in indexes - -typedef const char *(*hts_id2name_f)(void*, int); - -/// Get extra index meta-data -/** @param idx The index - @param l_meta Pointer to where the length of the extra data is stored - @return Pointer to the extra data if present; NULL otherwise - - Indexes (both .tbi and .csi) made by tabix include extra data about - the indexed file. The returns a pointer to this data. Note that the - data is stored exactly as it is in the index. Callers need to interpret - the results themselves, including knowing what sort of data to expect; - byte swapping etc. -*/ -HTSLIB_EXPORT -uint8_t *hts_idx_get_meta(hts_idx_t *idx, uint32_t *l_meta); - -/// Set extra index meta-data -/** @param idx The index - @param l_meta Length of data - @param meta Pointer to the extra data - @param is_copy If not zero, a copy of the data is taken - @return 0 on success; -1 on failure (out of memory). - - Sets the data that is returned by hts_idx_get_meta(). - - If is_copy != 0, a copy of the input data is taken. If not, ownership of - the data pointed to by *meta passes to the index. -*/ -HTSLIB_EXPORT -int hts_idx_set_meta(hts_idx_t *idx, uint32_t l_meta, uint8_t *meta, int is_copy); - -/// Get number of mapped and unmapped reads from an index -/** @param idx Index - @param tid Target ID - @param[out] mapped Location to store number of mapped reads - @param[out] unmapped Location to store number of unmapped reads - @return 0 on success; -1 on failure (data not available) - - BAI and CSI indexes store information on the number of reads for each - target that were mapped or unmapped (unmapped reads will generally have - a paired read that is mapped to the target). This function returns this - information if it is available. - - @note Cram CRAI indexes do not include this information. -*/ -HTSLIB_EXPORT -int hts_idx_get_stat(const hts_idx_t* idx, int tid, uint64_t* mapped, uint64_t* unmapped); - -/// Return the number of unplaced reads from an index -/** @param idx Index - @return Unplaced reads count - - Unplaced reads are not linked to any reference (e.g. RNAME is '*' in SAM - files). -*/ -HTSLIB_EXPORT -uint64_t hts_idx_get_n_no_coor(const hts_idx_t* idx); - -/// Return a list of target names from an index -/** @param idx Index - @param[out] n Location to store the number of targets - @param getid Callback function to get the name for a target ID - @param hdr Header from indexed file - @return An array of pointers to the names on success; NULL on failure - - @note The names are pointers into the header data structure. When cleaning - up, only the array should be freed, not the names. - */ -HTSLIB_EXPORT -const char **hts_idx_seqnames(const hts_idx_t *idx, int *n, hts_id2name_f getid, void *hdr); // free only the array, not the values - -/// Return the number of targets from an index -/** @param idx Index - @return The number of targets - */ -HTSLIB_EXPORT -int hts_idx_nseq(const hts_idx_t *idx); - -/////////////////////////////////////////////////////////// -// Region parsing - -#define HTS_PARSE_THOUSANDS_SEP 1 ///< Ignore ',' separators within numbers -#define HTS_PARSE_ONE_COORD 2 ///< chr:pos means chr:pos-pos and not chr:pos-end -#define HTS_PARSE_LIST 4 ///< Expect a comma separated list of regions. (Disables HTS_PARSE_THOUSANDS_SEP) - -/// Parse a numeric string -/** The number may be expressed in scientific notation, and optionally may - contain commas in the integer part (before any decimal point or E notation). - @param str String to be parsed - @param strend If non-NULL, set on return to point to the first character - in @a str after those forming the parsed number - @param flags Or'ed-together combination of HTS_PARSE_* flags - @return Integer value of the parsed number, or 0 if no valid number - - The input string is parsed as: optional whitespace; an optional '+' or - '-' sign; decimal digits possibly including ',' characters (if @a flags - includes HTS_PARSE_THOUSANDS_SEP) and a '.' decimal point; and an optional - case-insensitive suffix, which may be either 'k', 'M', 'G', or scientific - notation consisting of 'e'/'E' followed by an optional '+' or '-' sign and - decimal digits. To be considered a valid numeric value, the main part (not - including any suffix or scientific notation) must contain at least one - digit (either before or after the decimal point). - - When @a strend is NULL, @a str is expected to contain only (optional - whitespace followed by) the numeric value. A warning will be printed - (if hts_verbose is HTS_LOG_WARNING or more) if no valid parsable number - is found or if there are any unused characters after the number. - - When @a strend is non-NULL, @a str starts with (optional whitespace - followed by) the numeric value. On return, @a strend is set to point - to the first unused character after the numeric value, or to @a str - if no valid parsable number is found. -*/ -HTSLIB_EXPORT -long long hts_parse_decimal(const char *str, char **strend, int flags); - -typedef int (*hts_name2id_f)(void*, const char*); - -/// Parse a "CHR:START-END"-style region string -/** @param str String to be parsed - @param beg Set on return to the 0-based start of the region - @param end Set on return to the 1-based end of the region - @return Pointer to the colon or '\0' after the reference sequence name, - or NULL if @a str could not be parsed. - - NOTE: For compatibility with hts_parse_reg only. - Please use hts_parse_region instead. -*/ -HTSLIB_EXPORT -const char *hts_parse_reg64(const char *str, hts_pos_t *beg, hts_pos_t *end); - -/// Parse a "CHR:START-END"-style region string -/** @param str String to be parsed - @param beg Set on return to the 0-based start of the region - @param end Set on return to the 1-based end of the region - @return Pointer to the colon or '\0' after the reference sequence name, - or NULL if @a str could not be parsed. -*/ -HTSLIB_EXPORT -const char *hts_parse_reg(const char *str, int *beg, int *end); - -/// Parse a "CHR:START-END"-style region string -/** @param str String to be parsed - @param tid Set on return (if not NULL) to be reference index (-1 if invalid) - @param beg Set on return to the 0-based start of the region - @param end Set on return to the 1-based end of the region - @param getid Function pointer. Called if not NULL to set tid. - @param hdr Caller data passed to getid. - @param flags Bitwise HTS_PARSE_* flags listed above. - @return Pointer to the byte after the end of the entire region - specifier (including any trailing comma) on success, - or NULL if @a str could not be parsed. - - A variant of hts_parse_reg which is reference-id aware. It uses - the iterator name2id callbacks to validate the region tokenisation works. - - This is necessary due to GRCh38 HLA additions which have reference names - like "HLA-DRB1*12:17". - - To work around ambiguous parsing issues, eg both "chr1" and "chr1:100-200" - are reference names, quote using curly braces. - Thus "{chr1}:100-200" and "{chr1:100-200}" disambiguate the above example. - - Flags are used to control how parsing works, and can be one of the below. - - HTS_PARSE_THOUSANDS_SEP: - Ignore commas in numbers. For example with this flag 1,234,567 - is interpreted as 1234567. - - HTS_PARSE_LIST: - If present, the region is assmed to be a comma separated list and - position parsing will not contain commas (this implicitly - clears HTS_PARSE_THOUSANDS_SEP in the call to hts_parse_decimal). - On success the return pointer will be the start of the next region, ie - the character after the comma. (If *ret != '\0' then the caller can - assume another region is present in the list.) - - If not set then positions may contain commas. In this case the return - value should point to the end of the string, or NULL on failure. - - HTS_PARSE_ONE_COORD: - If present, X:100 is treated as the single base pair region X:100-100. - In this case X:-100 is shorthand for X:1-100 and X:100- is X:100-. - (This is the standard bcftools region convention.) - - When not set X:100 is considered to be X:100- where is - the end of chromosome X (set to INT_MAX here). X:100- and X:-100 are - invalid. - (This is the standard samtools region convention.) - - Note the supplied string expects 1 based inclusive coordinates, but the - returned coordinates start from 0 and are half open, so pos0 is valid - for use in e.g. "for (pos0 = beg; pos0 < end; pos0++) {...}" - - If NULL is returned, the value in tid mat give additional information - about the error: - - -2 Failed to parse @p hdr; or out of memory - -1 The reference in @p str has mismatched braces, or does not - exist in @p hdr - >= 0 The specified range in @p str could not be parsed -*/ -HTSLIB_EXPORT -const char *hts_parse_region(const char *s, int *tid, hts_pos_t *beg, - hts_pos_t *end, hts_name2id_f getid, void *hdr, - int flags); - - -/////////////////////////////////////////////////////////// -// Generic iterators -// -// These functions provide the low-level infrastructure for iterators. -// Wrappers around these are used to make iterators for specific file types. -// See: -// htslib/sam.h for SAM/BAM/CRAM iterators -// htslib/vcf.h for VCF/BCF iterators -// htslib/tbx.h for files indexed by tabix - -/// Create a single-region iterator -/** @param idx Index - @param tid Target ID - @param beg Start of region - @param end End of region - @param readrec Callback to read a record from the input file - @return An iterator on success; NULL on failure - - The iterator struct returned by a successful call should be freed - via hts_itr_destroy() when it is no longer needed. - */ -HTSLIB_EXPORT -hts_itr_t *hts_itr_query(const hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end, hts_readrec_func *readrec); - -/// Free an iterator -/** @param iter Iterator to free - */ -HTSLIB_EXPORT -void hts_itr_destroy(hts_itr_t *iter); - -typedef hts_itr_t *hts_itr_query_func(const hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end, hts_readrec_func *readrec); - -/// Create a single-region iterator from a text region specification -/** @param idx Index - @param reg Region specifier - @param getid Callback function to return the target ID for a name - @param hdr Input file header - @param itr_query Callback function returning an iterator for a numeric tid, - start and end position - @param readrec Callback to read a record from the input file - @return An iterator on success; NULL on error - - The iterator struct returned by a successful call should be freed - via hts_itr_destroy() when it is no longer needed. - */ -HTSLIB_EXPORT -hts_itr_t *hts_itr_querys(const hts_idx_t *idx, const char *reg, hts_name2id_f getid, void *hdr, hts_itr_query_func *itr_query, hts_readrec_func *readrec); - -/// Return the next record from an iterator -/** @param fp Input file handle - @param iter Iterator - @param r Pointer to record placeholder - @param data Data passed to the readrec callback - @return >= 0 on success, -1 when there is no more data, < -1 on error - */ -HTSLIB_EXPORT -int hts_itr_next(BGZF *fp, hts_itr_t *iter, void *r, void *data) HTS_RESULT_USED; - -/********************************** - * Iterator with multiple regions * - **********************************/ - -typedef int hts_itr_multi_query_func(const hts_idx_t *idx, hts_itr_t *itr); -HTSLIB_EXPORT -int hts_itr_multi_bam(const hts_idx_t *idx, hts_itr_t *iter); -HTSLIB_EXPORT -int hts_itr_multi_cram(const hts_idx_t *idx, hts_itr_t *iter); - -/// Create a multi-region iterator from a region list -/** @param idx Index - @param reglist Region list - @param count Number of items in region list - @param getid Callback to convert names to target IDs - @param hdr Indexed file header (passed to getid) - @param itr_specific Filetype-specific callback function - @param readrec Callback to read an input file record - @param seek Callback to seek in the input file - @param tell Callback to return current input file location - @return An iterator on success; NULL on failure - - The iterator struct returned by a successful call should be freed - via hts_itr_destroy() when it is no longer needed. - */ -HTSLIB_EXPORT -hts_itr_t *hts_itr_regions(const hts_idx_t *idx, hts_reglist_t *reglist, int count, hts_name2id_f getid, void *hdr, hts_itr_multi_query_func *itr_specific, hts_readrec_func *readrec, hts_seek_func *seek, hts_tell_func *tell); - -/// Return the next record from an iterator -/** @param fp Input file handle - @param iter Iterator - @param r Pointer to record placeholder - @return >= 0 on success, -1 when there is no more data, < -1 on error - */ -HTSLIB_EXPORT -int hts_itr_multi_next(htsFile *fd, hts_itr_t *iter, void *r); - -/// Create a region list from a char array -/** @param argv Char array of target:interval elements, e.g. chr1:2500-3600, chr1:5100, chr2 - @param argc Number of items in the array - @param r_count Pointer to the number of items in the resulting region list - @param hdr Header for the sam/bam/cram file - @param getid Callback to convert target names to target ids. - @return A region list on success, NULL on failure - - The hts_reglist_t struct returned by a successful call should be freed - via hts_reglist_free() when it is no longer needed. - */ -HTSLIB_EXPORT -hts_reglist_t *hts_reglist_create(char **argv, int argc, int *r_count, void *hdr, hts_name2id_f getid); - -/// Free a region list -/** @param reglist Region list - @param count Number of items in the list - */ -HTSLIB_EXPORT -void hts_reglist_free(hts_reglist_t *reglist, int count); - -/// Free a multi-region iterator -/** @param iter Iterator to free - */ -#define hts_itr_multi_destroy(iter) hts_itr_destroy(iter) - - - /** - * hts_file_type() - Convenience function to determine file type - * DEPRECATED: This function has been replaced by hts_detect_format(). - * It and these FT_* macros will be removed in a future HTSlib release. - */ - #define FT_UNKN 0 - #define FT_GZ 1 - #define FT_VCF 2 - #define FT_VCF_GZ (FT_GZ|FT_VCF) - #define FT_BCF (1<<2) - #define FT_BCF_GZ (FT_GZ|FT_BCF) - #define FT_STDIN (1<<3) - HTSLIB_EXPORT - int hts_file_type(const char *fname); - - -/*************************** - * Revised MAQ error model * - ***************************/ - -struct errmod_t; -typedef struct errmod_t errmod_t; - -HTSLIB_EXPORT -errmod_t *errmod_init(double depcorr); -HTSLIB_EXPORT -void errmod_destroy(errmod_t *em); - -/* - n: number of bases - m: maximum base - bases[i]: qual:6, strand:1, base:4 - q[i*m+j]: phred-scaled likelihood of (i,j) - */ -HTSLIB_EXPORT -int errmod_cal(const errmod_t *em, int n, int m, uint16_t *bases, float *q); - - -/***************************************************** - * Probabilistic banded glocal alignment * - * See https://doi.org/10.1093/bioinformatics/btr076 * - *****************************************************/ - -typedef struct probaln_par_t { - float d, e; - int bw; -} probaln_par_t; - -/// Perform probabilistic banded glocal alignment -/** @param ref Reference sequence - @param l_ref Length of reference - @param query Query sequence - @param l_query Length of query sequence - @param iqual Query base qualities - @param c Alignment parameters - @param[out] state Output alignment - @param[out] q Phred scaled posterior probability of state[i] being wrong - @return Phred-scaled likelihood score, or INT_MIN on failure. - -The reference and query sequences are coded using integers 0,1,2,3,4 for -bases A,C,G,T,N respectively (N here is for any ambiguity code). - -On output, state and q are arrays of length l_query. The higher 30 -bits give the reference position the query base is matched to and the -lower two bits can be 0 (an alignment match) or 1 (an -insertion). q[i] gives the phred scaled posterior probability of -state[i] being wrong. - -On failure, errno will be set to EINVAL if the values of l_ref or l_query -were invalid; or ENOMEM if a memory allocation failed. -*/ - -HTSLIB_EXPORT -int probaln_glocal(const uint8_t *ref, int l_ref, const uint8_t *query, int l_query, const uint8_t *iqual, const probaln_par_t *c, int *state, uint8_t *q); - - - /********************** - * MD5 implementation * - **********************/ - - struct hts_md5_context; - typedef struct hts_md5_context hts_md5_context; - - /*! @abstract Initialises an MD5 context. - * @discussion - * The expected use is to allocate an hts_md5_context using - * hts_md5_init(). This pointer is then passed into one or more calls - * of hts_md5_update() to compute successive internal portions of the - * MD5 sum, which can then be externalised as a full 16-byte MD5sum - * calculation by calling hts_md5_final(). This can then be turned - * into ASCII via hts_md5_hex(). - * - * To dealloate any resources created by hts_md5_init() call the - * hts_md5_destroy() function. - * - * @return hts_md5_context pointer on success, NULL otherwise. - */ - HTSLIB_EXPORT - hts_md5_context *hts_md5_init(void); - - /*! @abstract Updates the context with the MD5 of the data. */ - HTSLIB_EXPORT - void hts_md5_update(hts_md5_context *ctx, const void *data, unsigned long size); - - /*! @abstract Computes the final 128-bit MD5 hash from the given context */ - HTSLIB_EXPORT - void hts_md5_final(unsigned char *digest, hts_md5_context *ctx); - - /*! @abstract Resets an md5_context to the initial state, as returned - * by hts_md5_init(). - */ - HTSLIB_EXPORT - void hts_md5_reset(hts_md5_context *ctx); - - /*! @abstract Converts a 128-bit MD5 hash into a 33-byte nul-termninated - * hex string. - */ - HTSLIB_EXPORT - void hts_md5_hex(char *hex, const unsigned char *digest); - - /*! @abstract Deallocates any memory allocated by hts_md5_init. */ - HTSLIB_EXPORT - void hts_md5_destroy(hts_md5_context *ctx); - -static inline int hts_reg2bin(hts_pos_t beg, hts_pos_t end, int min_shift, int n_lvls) -{ - int l, s = min_shift, t = ((1<<((n_lvls<<1) + n_lvls)) - 1) / 7; - for (--end, l = n_lvls; l > 0; --l, s += 3, t -= 1<<((l<<1)+l)) - if (beg>>s == end>>s) return t + (beg>>s); - return 0; -} - -/// Compute the level of a bin in a binning index -static inline int hts_bin_level(int bin) { - int l, b; - for (l = 0, b = bin; b; ++l, b = hts_bin_parent(b)); - return l; -} - -//! Compute the corresponding entry into the linear index of a given bin from -//! a binning index -/*! - * @param bin The bin number - * @param n_lvls The index depth (number of levels - 0 based) - * @return The integer offset into the linear index - * - * Explanation of the return value formula: - * Each bin on level l covers exp(2, (n_lvls - l)*3 + min_shift) base pairs. - * A linear index entry covers exp(2, min_shift) base pairs. - */ -static inline int hts_bin_bot(int bin, int n_lvls) -{ - int l = hts_bin_level(bin); - return (bin - hts_bin_first(l)) << (n_lvls - l) * 3; -} - -/************** - * Endianness * - **************/ - -static inline int ed_is_big(void) -{ - long one= 1; - return !(*((char *)(&one))); -} -static inline uint16_t ed_swap_2(uint16_t v) -{ - return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8)); -} -static inline void *ed_swap_2p(void *x) -{ - *(uint16_t*)x = ed_swap_2(*(uint16_t*)x); - return x; -} -static inline uint32_t ed_swap_4(uint32_t v) -{ - v = ((v & 0x0000FFFFU) << 16) | (v >> 16); - return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8); -} -static inline void *ed_swap_4p(void *x) -{ - *(uint32_t*)x = ed_swap_4(*(uint32_t*)x); - return x; -} -static inline uint64_t ed_swap_8(uint64_t v) -{ - v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32); - v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16); - return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8); -} -static inline void *ed_swap_8p(void *x) -{ - *(uint64_t*)x = ed_swap_8(*(uint64_t*)x); - return x; -} - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.19.1/htslib/hts_defs.h b/src/htslib-1.19.1/htslib/hts_defs.h deleted file mode 100644 index 3576840..0000000 --- a/src/htslib-1.19.1/htslib/hts_defs.h +++ /dev/null @@ -1,129 +0,0 @@ -/* hts_defs.h -- Miscellaneous definitions. - - Copyright (C) 2013-2015,2017, 2019-2020 Genome Research Ltd. - - Author: John Marshall - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#ifndef HTSLIB_HTS_DEFS_H -#define HTSLIB_HTS_DEFS_H - -#if defined __MINGW32__ -#include // For __MINGW_PRINTF_FORMAT macro -#endif - -#ifdef __clang__ -#ifdef __has_attribute -#define HTS_COMPILER_HAS(attribute) __has_attribute(attribute) -#endif - -#elif defined __GNUC__ -#define HTS_GCC_AT_LEAST(major, minor) \ - (__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))) -#endif - -#ifndef HTS_COMPILER_HAS -#define HTS_COMPILER_HAS(attribute) 0 -#endif -#ifndef HTS_GCC_AT_LEAST -#define HTS_GCC_AT_LEAST(major, minor) 0 -#endif - -#if HTS_COMPILER_HAS(__nonstring__) || HTS_GCC_AT_LEAST(8,1) -#define HTS_NONSTRING __attribute__ ((__nonstring__)) -#else -#define HTS_NONSTRING -#endif - -#if HTS_COMPILER_HAS(__noreturn__) || HTS_GCC_AT_LEAST(3,0) -#define HTS_NORETURN __attribute__ ((__noreturn__)) -#else -#define HTS_NORETURN -#endif - -// Enable optimisation level 3, especially for gcc. To be used -// where we want to force vectorisation in hot loops and the default -O2 -// just doesn't cut it. -#if HTS_COMPILER_HAS(optimize) || HTS_GCC_AT_LEAST(4,4) -#define HTS_OPT3 __attribute__((optimize("O3"))) -#else -#define HTS_OPT3 -#endif - -// GCC introduced warn_unused_result in 3.4 but added -Wno-unused-result later -#if HTS_COMPILER_HAS(__warn_unused_result__) || HTS_GCC_AT_LEAST(4,5) -#define HTS_RESULT_USED __attribute__ ((__warn_unused_result__)) -#else -#define HTS_RESULT_USED -#endif - -#if HTS_COMPILER_HAS(__unused__) || HTS_GCC_AT_LEAST(3,0) -#define HTS_UNUSED __attribute__ ((__unused__)) -#else -#define HTS_UNUSED -#endif - -#if HTS_COMPILER_HAS(__deprecated__) || HTS_GCC_AT_LEAST(4,5) -#define HTS_DEPRECATED(message) __attribute__ ((__deprecated__ (message))) -#elif HTS_GCC_AT_LEAST(3,1) -#define HTS_DEPRECATED(message) __attribute__ ((__deprecated__)) -#else -#define HTS_DEPRECATED(message) -#endif - -#if (HTS_COMPILER_HAS(__deprecated__) || HTS_GCC_AT_LEAST(6,4)) && !defined(__ICC) -#define HTS_DEPRECATED_ENUM(message) __attribute__ ((__deprecated__ (message))) -#else -#define HTS_DEPRECATED_ENUM(message) -#endif - -// On mingw the "printf" format type doesn't work. It needs "gnu_printf" -// in order to check %lld and %z, otherwise it defaults to checking against -// the Microsoft library printf format options despite linking against the -// GNU posix implementation of printf. The __MINGW_PRINTF_FORMAT macro -// expands to printf or gnu_printf as required, but obviously may not -// exist -#ifdef __MINGW_PRINTF_FORMAT -#define HTS_PRINTF_FMT __MINGW_PRINTF_FORMAT -#else -#define HTS_PRINTF_FMT printf -#endif - -#if HTS_COMPILER_HAS(__format__) || HTS_GCC_AT_LEAST(3,0) -#define HTS_FORMAT(type, idx, first) __attribute__((__format__ (type, idx, first))) -#else -#define HTS_FORMAT(type, idx, first) -#endif - -#if defined(_WIN32) || defined(__CYGWIN__) -#if defined(HTS_BUILDING_LIBRARY) -#define HTSLIB_EXPORT __declspec(dllexport) -#else -#define HTSLIB_EXPORT -#endif -#elif HTS_COMPILER_HAS(__visibility__) || HTS_GCC_AT_LEAST(4,0) -#define HTSLIB_EXPORT __attribute__((__visibility__("default"))) -#elif defined(__SUNPRO_C) && __SUNPRO_C >= 0x550 -#define HTSLIB_EXPORT __global -#else -#define HTSLIB_EXPORT -#endif - -#endif diff --git a/src/htslib-1.19.1/htslib/hts_expr.h b/src/htslib-1.19.1/htslib/hts_expr.h deleted file mode 100644 index 43da89d..0000000 --- a/src/htslib-1.19.1/htslib/hts_expr.h +++ /dev/null @@ -1,152 +0,0 @@ -/* expr.c -- filter expression parsing and processing. - - Copyright (C) 2020, 2022 Genome Research Ltd. - - Author: James Bonfield - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notices and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#ifndef HTS_EXPR_H -#define HTS_EXPR_H - -#include -#include "kstring.h" -#include "hts_defs.h" - -/// Holds a filter variable. This is also used to return the results. -/** - * The expression language has 3-states of string, numeric, and unknown. - * The unknown state is either a NaN numeric or a null string, with both - * internally considered to have the same "unknown" meaning. - * - * These largely match the IEE 754 semantics for NaN comparisons: <, >, ==, - * != all fail, (even NaN == NaN). Similarly arithmetic (+,-,/,*,%) with - * unknown values are still unknown (and false). - * - * The departure from NaN semantics though is that our unknown/null state is - * considered to be false while NaN in C is true. Similarly the false nature - * of our unknown state meants !val becomes true, !!val is once again false, - * val && 1 is false, val || 0 is false, and val || 1 is true along with - * !val || 0 and !val && 1. - * - * Note it is possible for empty strings and zero numbers to also be true. - * An example of this is the aux string '[NM]' which returns true if the - * NM tag is found, regardless of whether it is also zero. However the - * better approach added in 1.16 is 'exists([NM])'. - */ -typedef struct hts_expr_val_t { - char is_str; // Use .s vs .d - char is_true; // Force true if even zero - kstring_t s; // is_str and empty s permitted (eval as false) - double d; // otherwise this -} hts_expr_val_t; - -/// Returns true if an hts_expr_val_t is defined. -/* An example usage of this is in the SAM expression filter where an - * [X0] aux tag will be the value of X0 (string or numeric) if set, or - * a false nul-string (not the same as an empty one) when not set. - */ -static inline int hts_expr_val_exists(hts_expr_val_t *v) { - return v && !(v->is_str == 1 && v->s.s == NULL) - && !(v->is_str == 0 && isnan(v->d)); -} - -/// Returns true if an hts_expr_val_t is defined or is undef-but-true -static inline int hts_expr_val_existsT(hts_expr_val_t *v) { - return (v && v->is_true) || hts_expr_val_exists(v); -} - -/// Set a value to be undefined (nan). -static inline void hts_expr_val_undef(hts_expr_val_t *v) { - ks_clear(&v->s); - v->is_true = 0; - v->is_str = 0; - v->d = NAN; -} - -/// Frees a hts_expr_val_t type. -static inline void hts_expr_val_free(hts_expr_val_t *f) { - ks_free(&f->s); -} - -/// Opaque hts_filter_t type. Definition in hts_expr.c -typedef struct hts_filter_t hts_filter_t; - -/// For static initialisation of hts_expr_val_t values -#define HTS_EXPR_VAL_INIT {0, 0, KS_INITIALIZE, 0} - -/// Creates a filter for expression "str". -/** @param str The filter expression - * @return A pointer on success, NULL on failure - */ -HTSLIB_EXPORT -hts_filter_t *hts_filter_init(const char *str); - -/// Frees an hts_filter_t created via hts_filter_init -/** @param filt The filter pointer. - */ -HTSLIB_EXPORT -void hts_filter_free(hts_filter_t *filt); - -/// Type for expression symbol lookups; name -> value. -typedef int (hts_expr_sym_func)(void *data, char *str, char **end, - hts_expr_val_t *res); - -/// Evaluates a filter expression and returns the value -/** @param filt The filter, produced by hts_filter_init - * @param data Arbitrary caller data, passed into sym_func - * @param sym_func Callback function to lookup variables. - * @param res Filled out with the result of the filter evaluation - * @return Returns 0 on success, -1 on failure - * - * sym_func and data may be NULL if the caller does not need its own data - * pointer or if it has no variables to lookup. - * - * The type of the returned result may be numeric of string, as defined by - * the is_str member. It can also be explicitly defined to be true even - * for a null value. This may be used to check for the existence of - * something, irrespective of whether that something evaluates to zero. - * - * @p res must be initialized using HTS_EXPR_VAL_INIT before passing it - * to this function for the first time. - */ -HTSLIB_EXPORT -int hts_filter_eval2(hts_filter_t *filt, - void *data, hts_expr_sym_func *sym_func, - hts_expr_val_t *res); - -/// Evaluate a filter expression (derecated API) -/** - * @copydetails hts_filter_eval2() - * - * If calling this function more than once with the same @p res - * parameter, hts_expr_val_free(res) must be used between invocations - * to clear any allocated memory prior to reuse. - * - * @deprecated This function has been replaced by hts_filter_eval2(), - * which clears @p res properly itself. - */ -HTSLIB_EXPORT -int hts_filter_eval(hts_filter_t *filt, - void *data, hts_expr_sym_func *sym_func, - hts_expr_val_t *res) - HTS_DEPRECATED("Please use hts_filter_eval2 instead"); - - -#endif /* HTS_EXPR_H */ diff --git a/src/htslib-1.19.1/htslib/hts_log.h b/src/htslib-1.19.1/htslib/hts_log.h deleted file mode 100644 index f6a50b3..0000000 --- a/src/htslib-1.19.1/htslib/hts_log.h +++ /dev/null @@ -1,97 +0,0 @@ -/// \file htslib/hts_log.h -/// Configuration of log levels. -/* The MIT License -Copyright (C) 2017 Genome Research Ltd. - -Author: Anders Kaplan - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -*/ - -#ifndef HTS_LOG_H -#define HTS_LOG_H - -#include "hts_defs.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/// Log levels. -enum htsLogLevel { - HTS_LOG_OFF, ///< All logging disabled. - HTS_LOG_ERROR, ///< Logging of errors only. - HTS_LOG_WARNING = 3, ///< Logging of errors and warnings. - HTS_LOG_INFO, ///< Logging of errors, warnings, and normal but significant events. - HTS_LOG_DEBUG, ///< Logging of all except the most detailed debug events. - HTS_LOG_TRACE ///< All logging enabled. -}; - -/// Sets the selected log level. -HTSLIB_EXPORT -void hts_set_log_level(enum htsLogLevel level); - -/// Gets the selected log level. -HTSLIB_EXPORT -enum htsLogLevel hts_get_log_level(void); - -/// Selected log level. -/*! - * One of the HTS_LOG_* values. The default is HTS_LOG_WARNING. - * \note Avoid direct use of this variable. Use hts_set_log_level and hts_get_log_level instead. - */ -HTSLIB_EXPORT -extern int hts_verbose; - -/*! Logs an event. -* \param severity Severity of the event: -* - HTS_LOG_ERROR means that something went wrong so that a task could not be completed. -* - HTS_LOG_WARNING means that something unexpected happened, but that execution can continue, perhaps in a degraded mode. -* - HTS_LOG_INFO means that something normal but significant happened. -* - HTS_LOG_DEBUG means that something normal and insignificant happened. -* - HTS_LOG_TRACE means that something happened that might be of interest when troubleshooting. -* \param context Context where the event occurred. Typically set to "__func__". -* \param format Format string with placeholders, like printf. -*/ -HTSLIB_EXPORT -void hts_log(enum htsLogLevel severity, const char *context, const char *format, ...) -HTS_FORMAT(HTS_PRINTF_FMT, 3, 4); - -/*! Logs an event with severity HTS_LOG_ERROR and default context. Parameters: format, ... */ -#define hts_log_error(...) hts_log(HTS_LOG_ERROR, __func__, __VA_ARGS__) - -/*! Logs an event with severity HTS_LOG_WARNING and default context. Parameters: format, ... */ -#define hts_log_warning(...) hts_log(HTS_LOG_WARNING, __func__, __VA_ARGS__) - -/*! Logs an event with severity HTS_LOG_INFO and default context. Parameters: format, ... */ -#define hts_log_info(...) hts_log(HTS_LOG_INFO, __func__, __VA_ARGS__) - -/*! Logs an event with severity HTS_LOG_DEBUG and default context. Parameters: format, ... */ -#define hts_log_debug(...) hts_log(HTS_LOG_DEBUG, __func__, __VA_ARGS__) - -/*! Logs an event with severity HTS_LOG_TRACE and default context. Parameters: format, ... */ -#define hts_log_trace(...) hts_log(HTS_LOG_TRACE, __func__, __VA_ARGS__) - -#ifdef __cplusplus -} -#endif - -#endif // #ifndef HTS_LOG_H diff --git a/src/htslib-1.19.1/htslib/hts_os.h b/src/htslib-1.19.1/htslib/hts_os.h deleted file mode 100644 index c715b06..0000000 --- a/src/htslib-1.19.1/htslib/hts_os.h +++ /dev/null @@ -1,86 +0,0 @@ -/// @file hts_os.h -/// Operating System specific tweaks, for compatibility with POSIX. -/* - Copyright (C) 2017, 2019-2020 Genome Research Ltd. - - Author: James Bonfield - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#ifndef HTSLIB_HTS_OS_H -#define HTSLIB_HTS_OS_H - -#include "hts_defs.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* This is srand48_deterministic() on platforms that provide it, or srand48() - otherwise (or our own POSIX srand48() on platforms that provide neither). - Hence calling hts_srand48() will always set up the same POSIX-determined - sequence of pseudo-random numbers on any platform, while calling srand48() - may (e.g., on OpenBSD) set up a different non-deterministic sequence. */ -HTSLIB_EXPORT -void hts_srand48(long seed); - -HTSLIB_EXPORT -double hts_erand48(unsigned short xseed[3]); - -HTSLIB_EXPORT -double hts_drand48(void); - -HTSLIB_EXPORT -long hts_lrand48(void); - -#if defined(_WIN32) && !defined(__CYGWIN__) -// Windows usually lacks *rand48(), but cygwin provides them. -#define srand48(S) hts_srand48((S)) -#define erand48(X) hts_erand48((X)) -#define drand48() hts_drand48() -#define lrand48() hts_lrand48() -#endif - -#if 0 /* def _WIN32 - disabled for now, not currently used */ -/* Check if the fd is a cygwin/msys's pty. */ -extern int is_cygpty(int fd); -#endif - -#ifdef __cplusplus -} -#endif - -#if defined(__MINGW32__) -#include -#define mkdir(filename,mode) mkdir((filename)) -#endif - -#ifdef _WIN32 -#include -#define srandom srand -#define random rand -#endif - -/* MSVC does not provide ssize_t in its . This ensures the type - is available (unless suppressed by defining HTS_NO_SSIZE_T first). */ -#if defined _MSC_VER && defined _INTPTR_T_DEFINED && !defined _SSIZE_T_DEFINED && !defined HTS_NO_SSIZE_T && !defined ssize_t -#define ssize_t intptr_t -#endif - -#endif // HTSLIB_HTS_OS_H diff --git a/src/htslib-1.19.1/htslib/kbitset.h b/src/htslib-1.19.1/htslib/kbitset.h deleted file mode 100644 index 0a52958..0000000 --- a/src/htslib-1.19.1/htslib/kbitset.h +++ /dev/null @@ -1,203 +0,0 @@ -/* The MIT License - - Copyright (C) 2015, 2018 Genome Research Ltd. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#ifndef KBITSET_H -#define KBITSET_H - -/* Example of using kbitset_t, which represents a subset of {0,..., N-1}, - where N is the size specified in kbs_init(). - - kbitset_t *bset = kbs_init(100); - kbs_insert(bset, 5); - kbs_insert(bset, 68); - kbs_delete(bset, 37); - // ... - - if (kbs_exists(bset, 68)) printf("68 present\n"); - - kbitset_iter_t itr; - int i; - kbs_start(&itr); - while ((i = kbs_next(bset, &itr)) >= 0) - printf("%d present\n", i); - - kbs_destroy(bset); - - Example of declaring a kbitset_t-using function in a header file, so that - only source files that actually use process() need to include : - - struct kbitset_t; - void process(struct kbitset_t *bset); -*/ - -#include -#include -#include - -#define KBS_ELTBITS (CHAR_BIT * sizeof (unsigned long)) -#define KBS_ELT(i) ((i) / KBS_ELTBITS) -#define KBS_MASK(i) (1UL << ((i) % KBS_ELTBITS)) - -typedef struct kbitset_t { - size_t n, n_max; - unsigned long b[1]; -} kbitset_t; - -// (For internal use only.) Returns a mask (like 00011111) showing -// which bits are in use in the last slot (for the given ni) set. -static inline unsigned long kbs_last_mask(size_t ni) -{ - unsigned long mask = KBS_MASK(ni) - 1; - return mask? mask : ~0UL; -} - -// Initialise a bit set capable of holding ni integers, 0 <= i < ni. -// The set returned is empty if fill == 0, or all of [0,ni) otherwise. -static inline kbitset_t *kbs_init2(size_t ni, int fill) -{ - size_t n = (ni + KBS_ELTBITS-1) / KBS_ELTBITS; - kbitset_t *bs = - (kbitset_t *) malloc(sizeof(kbitset_t) + n * sizeof(unsigned long)); - if (bs == NULL) return NULL; - bs->n = bs->n_max = n; - memset(bs->b, fill? ~0 : 0, n * sizeof (unsigned long)); - // b[n] is always non-zero (a fact used by kbs_next()). - bs->b[n] = kbs_last_mask(ni); - if (fill) bs->b[n-1] &= bs->b[n]; - return bs; -} - -// Initialise an empty bit set capable of holding ni integers, 0 <= i < ni. -static inline kbitset_t *kbs_init(size_t ni) -{ - return kbs_init2(ni, 0); -} - -// Resize an existing bit set to be capable of holding ni_new integers. -// Elements in [ni_old,ni_new) are added to the set if fill != 0. -static inline int kbs_resize2(kbitset_t **bsp, size_t ni_new, int fill) -{ - kbitset_t *bs = *bsp; - size_t n = bs? bs->n : 0; - size_t n_new = (ni_new + KBS_ELTBITS-1) / KBS_ELTBITS; - if (bs == NULL || n_new > bs->n_max) { - bs = (kbitset_t *) - realloc(*bsp, sizeof(kbitset_t) + n_new * sizeof(unsigned long)); - if (bs == NULL) return -1; - - bs->n_max = n_new; - *bsp = bs; - } - - bs->n = n_new; - if (n_new >= n) - memset(&bs->b[n], fill? ~0 : 0, (n_new - n) * sizeof (unsigned long)); - bs->b[n_new] = kbs_last_mask(ni_new); - // Need to clear excess bits when fill!=0 or n_newb[n_new-1] &= bs->b[n_new]; - return 0; -} - -// Resize an existing bit set to be capable of holding ni_new integers. -// Returns negative on error. -static inline int kbs_resize(kbitset_t **bsp, size_t ni_new) -{ - return kbs_resize2(bsp, ni_new, 0); -} - -// Destroy a bit set. -static inline void kbs_destroy(kbitset_t *bs) -{ - free(bs); -} - -// Reset the bit set to empty. -static inline void kbs_clear(kbitset_t *bs) -{ - memset(bs->b, 0, bs->n * sizeof (unsigned long)); -} - -// Reset the bit set to all of [0,ni). -static inline void kbs_insert_all(kbitset_t *bs) -{ - memset(bs->b, ~0, bs->n * sizeof (unsigned long)); - bs->b[bs->n-1] &= bs->b[bs->n]; -} - -// Insert an element into the bit set. -static inline void kbs_insert(kbitset_t *bs, int i) -{ - bs->b[KBS_ELT(i)] |= KBS_MASK(i); -} - -// Remove an element from the bit set. -static inline void kbs_delete(kbitset_t *bs, int i) -{ - bs->b[KBS_ELT(i)] &= ~KBS_MASK(i); -} - -// Test whether the bit set contains the element. -static inline int kbs_exists(const kbitset_t *bs, int i) -{ - return (bs->b[KBS_ELT(i)] & KBS_MASK(i)) != 0; -} - -typedef struct kbitset_iter_t { - unsigned long mask; - size_t elt; - int i; -} kbitset_iter_t; - -// Initialise or reset a bit set iterator. -static inline void kbs_start(kbitset_iter_t *itr) -{ - itr->mask = 1; - itr->elt = 0; - itr->i = 0; -} - -// Return the next element contained in the bit set, or -1 if there are no more. -static inline int kbs_next(const kbitset_t *bs, kbitset_iter_t *itr) -{ - unsigned long b = bs->b[itr->elt]; - - for (;;) { - if (itr->mask == 0) { - while ((b = bs->b[++itr->elt]) == 0) itr->i += KBS_ELTBITS; - if (itr->elt == bs->n) return -1; - itr->mask = 1; - } - - if (b & itr->mask) break; - - itr->i++; - itr->mask <<= 1; - } - - itr->mask <<= 1; - return itr->i++; -} - -#endif diff --git a/src/htslib-1.19.1/htslib/kfunc.h b/src/htslib-1.19.1/htslib/kfunc.h deleted file mode 100644 index 34704b1..0000000 --- a/src/htslib-1.19.1/htslib/kfunc.h +++ /dev/null @@ -1,91 +0,0 @@ -/* The MIT License - - Copyright (C) 2010, 2013-2014 Genome Research Ltd. - Copyright (C) 2011 Attractive Chaos - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#ifndef HTSLIB_KFUNC_H -#define HTSLIB_KFUNC_H - -#include "hts_defs.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* Log gamma function - * \log{\Gamma(z)} - * AS245, 2nd algorithm, http://lib.stat.cmu.edu/apstat/245 - */ -HTSLIB_EXPORT -double kf_lgamma(double z); - -/* complementary error function - * \frac{2}{\sqrt{\pi}} \int_x^{\infty} e^{-t^2} dt - * AS66, 2nd algorithm, http://lib.stat.cmu.edu/apstat/66 - */ -HTSLIB_EXPORT -double kf_erfc(double x); - -/* The following computes regularized incomplete gamma functions. - * Formulas are taken from Wiki, with additional input from Numerical - * Recipes in C (for modified Lentz's algorithm) and AS245 - * (http://lib.stat.cmu.edu/apstat/245). - * - * A good online calculator is available at: - * - * http://www.danielsoper.com/statcalc/calc23.aspx - * - * It calculates upper incomplete gamma function, which equals - * kf_gammaq(s,z)*tgamma(s). - */ - -HTSLIB_EXPORT -double kf_gammap(double s, double z); -HTSLIB_EXPORT -double kf_gammaq(double s, double z); - -/* Regularized incomplete beta function. The method is taken from - * Numerical Recipe in C, 2nd edition, section 6.4. The following web - * page calculates the incomplete beta function, which equals - * kf_betai(a,b,x) * gamma(a) * gamma(b) / gamma(a+b): - * - * http://www.danielsoper.com/statcalc/calc36.aspx - */ -HTSLIB_EXPORT -double kf_betai(double a, double b, double x); - -/* - * n11 n12 | n1_ - * n21 n22 | n2_ - * -----------+---- - * n_1 n_2 | n - */ -HTSLIB_EXPORT -double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.19.1/htslib/khash.h b/src/htslib-1.19.1/htslib/khash.h deleted file mode 100644 index 4cea910..0000000 --- a/src/htslib-1.19.1/htslib/khash.h +++ /dev/null @@ -1,670 +0,0 @@ -/* The MIT License - - Copyright (c) 2008, 2009, 2011 by Attractive Chaos - Copyright (C) 2014-2015, 2018 Genome Research Ltd. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -/* - An example: - -#include "khash.h" -KHASH_MAP_INIT_INT(32, char) -int main() { - int ret, is_missing; - khiter_t k; - khash_t(32) *h = kh_init(32); - k = kh_put(32, h, 5, &ret); - kh_value(h, k) = 10; - k = kh_get(32, h, 10); - is_missing = (k == kh_end(h)); - k = kh_get(32, h, 5); - kh_del(32, h, k); - for (k = kh_begin(h); k != kh_end(h); ++k) - if (kh_exist(h, k)) kh_value(h, k) = 1; - kh_destroy(32, h); - return 0; -} -*/ - -/* - 2013-05-02 (0.2.8): - - * Use quadratic probing. When the capacity is power of 2, stepping function - i*(i+1)/2 guarantees to traverse each bucket. It is better than double - hashing on cache performance and is more robust than linear probing. - - In theory, double hashing should be more robust than quadratic probing. - However, my implementation is probably not for large hash tables, because - the second hash function is closely tied to the first hash function, - which reduce the effectiveness of double hashing. - - Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php - - 2011-12-29 (0.2.7): - - * Minor code clean up; no actual effect. - - 2011-09-16 (0.2.6): - - * The capacity is a power of 2. This seems to dramatically improve the - speed for simple keys. Thank Zilong Tan for the suggestion. Reference: - - - http://code.google.com/p/ulib/ - - http://nothings.org/computer/judy/ - - * Allow to optionally use linear probing which usually has better - performance for random input. Double hashing is still the default as it - is more robust to certain non-random input. - - * Added Wang's integer hash function (not used by default). This hash - function is more robust to certain non-random input. - - 2011-02-14 (0.2.5): - - * Allow to declare global functions. - - 2009-09-26 (0.2.4): - - * Improve portability - - 2008-09-19 (0.2.3): - - * Corrected the example - * Improved interfaces - - 2008-09-11 (0.2.2): - - * Improved speed a little in kh_put() - - 2008-09-10 (0.2.1): - - * Added kh_clear() - * Fixed a compiling error - - 2008-09-02 (0.2.0): - - * Changed to token concatenation which increases flexibility. - - 2008-08-31 (0.1.2): - - * Fixed a bug in kh_get(), which has not been tested previously. - - 2008-08-31 (0.1.1): - - * Added destructor -*/ - - -#ifndef __AC_KHASH_H -#define __AC_KHASH_H - -/*! - @header - - Generic hash table library. - */ - -#define AC_VERSION_KHASH_H "0.2.8" - -#include -#include -#include - -#include "kstring.h" -#include "kroundup.h" - -/* compiler specific configuration */ - -#if UINT_MAX == 0xffffffffu -typedef unsigned int khint32_t; -#elif ULONG_MAX == 0xffffffffu -typedef unsigned long khint32_t; -#endif - -#if ULONG_MAX == ULLONG_MAX -typedef unsigned long khint64_t; -#else -typedef unsigned long long khint64_t; -#endif - -#ifndef kh_inline -#ifdef _MSC_VER -#define kh_inline __inline -#else -#define kh_inline inline -#endif -#endif /* kh_inline */ - -#ifndef klib_unused -#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3) -#define klib_unused __attribute__ ((__unused__)) -#else -#define klib_unused -#endif -#endif /* klib_unused */ - -typedef khint32_t khint_t; -typedef khint_t khiter_t; - -#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2) -#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1) -#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3) -#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1))) -#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1))) -#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1))) -#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1)) - -#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4) - -#ifndef kroundup32 -#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) -#endif - -#ifndef kcalloc -#define kcalloc(N,Z) calloc(N,Z) -#endif -#ifndef kmalloc -#define kmalloc(Z) malloc(Z) -#endif -#ifndef krealloc -#define krealloc(P,Z) realloc(P,Z) -#endif -#ifndef kfree -#define kfree(P) free(P) -#endif - -static const double __ac_HASH_UPPER = 0.77; - -#define __KHASH_TYPE(name, khkey_t, khval_t) \ - typedef struct kh_##name##_s { \ - khint_t n_buckets, size, n_occupied, upper_bound; \ - khint32_t *flags; \ - khkey_t *keys; \ - khval_t *vals; \ - } kh_##name##_t; - -#define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \ - extern kh_##name##_t *kh_init_##name(void); \ - extern void kh_destroy_##name(kh_##name##_t *h); \ - extern void kh_clear_##name(kh_##name##_t *h); \ - extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \ - extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \ - extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \ - extern void kh_del_##name(kh_##name##_t *h, khint_t x); - -#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ - SCOPE kh_##name##_t *kh_init_##name(void) { \ - return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t)); \ - } \ - SCOPE void kh_destroy_##name(kh_##name##_t *h) \ - { \ - if (h) { \ - kfree((void *)h->keys); kfree(h->flags); \ - kfree((void *)h->vals); \ - kfree(h); \ - } \ - } \ - SCOPE void kh_clear_##name(kh_##name##_t *h) \ - { \ - if (h && h->flags) { \ - memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \ - h->size = h->n_occupied = 0; \ - } \ - } \ - SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \ - { \ - if (h->n_buckets) { \ - khint_t k, i, last, mask, step = 0; \ - mask = h->n_buckets - 1; \ - k = __hash_func(key); i = k & mask; \ - last = i; \ - while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ - i = (i + (++step)) & mask; \ - if (i == last) return h->n_buckets; \ - } \ - return __ac_iseither(h->flags, i)? h->n_buckets : i; \ - } else return 0; \ - } \ - SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \ - { /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \ - khint32_t *new_flags = 0; \ - khint_t j = 1; \ - { \ - kroundup32(new_n_buckets); \ - if (new_n_buckets < 4) new_n_buckets = 4; \ - if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \ - else { /* hash table size to be changed (shrink or expand); rehash */ \ - new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ - if (!new_flags) return -1; \ - memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ - if (h->n_buckets < new_n_buckets) { /* expand */ \ - khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \ - if (!new_keys) { kfree(new_flags); return -1; } \ - h->keys = new_keys; \ - if (kh_is_map) { \ - khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \ - if (!new_vals) { kfree(new_flags); return -1; } \ - h->vals = new_vals; \ - } \ - } /* otherwise shrink */ \ - } \ - } \ - if (j) { /* rehashing is needed */ \ - for (j = 0; j != h->n_buckets; ++j) { \ - if (__ac_iseither(h->flags, j) == 0) { \ - khkey_t key = h->keys[j]; \ - khval_t val; \ - khint_t new_mask; \ - new_mask = new_n_buckets - 1; \ - if (kh_is_map) val = h->vals[j]; \ - __ac_set_isdel_true(h->flags, j); \ - while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ - khint_t k, i, step = 0; \ - k = __hash_func(key); \ - i = k & new_mask; \ - while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \ - __ac_set_isempty_false(new_flags, i); \ - if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \ - { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ - if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \ - __ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \ - } else { /* write the element and jump out of the loop */ \ - h->keys[i] = key; \ - if (kh_is_map) h->vals[i] = val; \ - break; \ - } \ - } \ - } \ - } \ - if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \ - h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \ - if (kh_is_map) h->vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \ - } \ - kfree(h->flags); /* free the working space */ \ - h->flags = new_flags; \ - h->n_buckets = new_n_buckets; \ - h->n_occupied = h->size; \ - h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \ - } \ - return 0; \ - } \ - SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \ - { \ - khint_t x; \ - if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \ - if (h->n_buckets > (h->size<<1)) { \ - if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \ - *ret = -1; return h->n_buckets; \ - } \ - } else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */ \ - *ret = -1; return h->n_buckets; \ - } \ - } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ - { \ - khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \ - x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \ - if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \ - else { \ - last = i; \ - while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ - if (__ac_isdel(h->flags, i)) site = i; \ - i = (i + (++step)) & mask; \ - if (i == last) { x = site; break; } \ - } \ - if (x == h->n_buckets) { \ - if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \ - else x = i; \ - } \ - } \ - } \ - if (__ac_isempty(h->flags, x)) { /* not present at all */ \ - h->keys[x] = key; \ - __ac_set_isboth_false(h->flags, x); \ - ++h->size; ++h->n_occupied; \ - *ret = 1; \ - } else if (__ac_isdel(h->flags, x)) { /* deleted */ \ - h->keys[x] = key; \ - __ac_set_isboth_false(h->flags, x); \ - ++h->size; \ - *ret = 2; \ - } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \ - return x; \ - } \ - SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \ - { \ - if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \ - __ac_set_isdel_true(h->flags, x); \ - --h->size; \ - } \ - } - -#define KHASH_DECLARE(name, khkey_t, khval_t) \ - __KHASH_TYPE(name, khkey_t, khval_t) \ - __KHASH_PROTOTYPES(name, khkey_t, khval_t) - -#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ - __KHASH_TYPE(name, khkey_t, khval_t) \ - __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) - -#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ - KHASH_INIT2(name, static kh_inline klib_unused, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) - -/* --- BEGIN OF HASH FUNCTIONS --- */ - -/*! @function - @abstract Integer hash function - @param key The integer [khint32_t] - @return The hash value [khint_t] - */ -#define kh_int_hash_func(key) (khint32_t)(key) -/*! @function - @abstract Integer comparison function - */ -#define kh_int_hash_equal(a, b) ((a) == (b)) -/*! @function - @abstract 64-bit integer hash function - @param key The integer [khint64_t] - @return The hash value [khint_t] - */ -#define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11) -/*! @function - @abstract 64-bit integer comparison function - */ -#define kh_int64_hash_equal(a, b) ((a) == (b)) -/*! @function - @abstract const char* hash function - @param s Pointer to a null terminated string - @return The hash value - */ -static kh_inline khint_t __ac_X31_hash_string(const char *s) -{ - khint_t h = (khint_t)*s; - if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s; - return h; -} -/*! @function - @abstract Another interface to const char* hash function - @param key Pointer to a nul terminated string [const char*] - @return The hash value [khint_t] - */ -#define kh_str_hash_func(key) __ac_X31_hash_string(key) -/*! @function - @abstract Const char* comparison function - */ -#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0) - -/*! @function - @abstract Kstring hash function - @param s Pointer to a kstring - @return The hash value - */ -static kh_inline khint_t __ac_X31_hash_kstring(const kstring_t ks) -{ - khint_t h = 0; - size_t i; - for (i = 0; i < ks.l; i++) - h = (h << 5) - h + (khint_t)ks.s[i]; - return h; -} -/*! @function - @abstract Interface to kstring hash function. - @param key Pointer to a khash; permits hashing on non-nul terminated strings. - @return The hash value [khint_t] - */ -#define kh_kstr_hash_func(key) __ac_X31_hash_kstring(key) -/*! @function - @abstract kstring comparison function - */ -#define kh_kstr_hash_equal(a, b) ((a).l == (b).l && strncmp((a).s, (b).s, (a).l) == 0) - -static kh_inline khint_t __ac_Wang_hash(khint_t key) -{ - key += ~(key << 15); - key ^= (key >> 10); - key += (key << 3); - key ^= (key >> 6); - key += ~(key << 11); - key ^= (key >> 16); - return key; -} -#define kh_int_hash_func2(key) __ac_Wang_hash((khint_t)(key)) - -/* --- END OF HASH FUNCTIONS --- */ - -/* Other convenient macros... */ - -/*! - @abstract Type of the hash table. - @param name Name of the hash table [symbol] - */ -#define khash_t(name) kh_##name##_t - -/*! @function - @abstract Initiate a hash table. - @param name Name of the hash table [symbol] - @return Pointer to the hash table [khash_t(name)*] - */ -#define kh_init(name) kh_init_##name() - -/*! @function - @abstract Destroy a hash table. - @param name Name of the hash table [symbol] - @param h Pointer to the hash table [khash_t(name)*] - */ -#define kh_destroy(name, h) kh_destroy_##name(h) - -/*! @function - @abstract Reset a hash table without deallocating memory. - @param name Name of the hash table [symbol] - @param h Pointer to the hash table [khash_t(name)*] - */ -#define kh_clear(name, h) kh_clear_##name(h) - -/*! @function - @abstract Resize a hash table. - @param name Name of the hash table [symbol] - @param h Pointer to the hash table [khash_t(name)*] - @param s New size [khint_t] - */ -#define kh_resize(name, h, s) kh_resize_##name(h, s) - -/*! @function - @abstract Insert a key to the hash table. - @param name Name of the hash table [symbol] - @param h Pointer to the hash table [khash_t(name)*] - @param k Key [type of keys] - @param r Extra return code: -1 if the operation failed; - 0 if the key is present in the hash table; - 1 if the bucket is empty (never used); 2 if the element in - the bucket has been deleted [int*] - @return Iterator to the inserted element [khint_t] - */ -#define kh_put(name, h, k, r) kh_put_##name(h, k, r) - -/*! @function - @abstract Retrieve a key from the hash table. - @param name Name of the hash table [symbol] - @param h Pointer to the hash table [khash_t(name)*] - @param k Key [type of keys] - @return Iterator to the found element, or kh_end(h) if the element is absent [khint_t] - */ -#define kh_get(name, h, k) kh_get_##name(h, k) - -/*! @function - @abstract Remove a key from the hash table. - @param name Name of the hash table [symbol] - @param h Pointer to the hash table [khash_t(name)*] - @param k Iterator to the element to be deleted [khint_t] - */ -#define kh_del(name, h, k) kh_del_##name(h, k) - -/*! @function - @abstract Test whether a bucket contains data. - @param h Pointer to the hash table [khash_t(name)*] - @param x Iterator to the bucket [khint_t] - @return 1 if containing data; 0 otherwise [int] - */ -#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x))) - -/*! @function - @abstract Get key given an iterator - @param h Pointer to the hash table [khash_t(name)*] - @param x Iterator to the bucket [khint_t] - @return Key [type of keys] - */ -#define kh_key(h, x) ((h)->keys[x]) - -/*! @function - @abstract Get value given an iterator - @param h Pointer to the hash table [khash_t(name)*] - @param x Iterator to the bucket [khint_t] - @return Value [type of values] - @discussion For hash sets, calling this results in segfault. - */ -#define kh_val(h, x) ((h)->vals[x]) - -/*! @function - @abstract Alias of kh_val() - */ -#define kh_value(h, x) ((h)->vals[x]) - -/*! @function - @abstract Get the start iterator - @param h Pointer to the hash table [khash_t(name)*] - @return The start iterator [khint_t] - */ -#define kh_begin(h) (khint_t)(0) - -/*! @function - @abstract Get the end iterator - @param h Pointer to the hash table [khash_t(name)*] - @return The end iterator [khint_t] - */ -#define kh_end(h) ((h)->n_buckets) - -/*! @function - @abstract Get the number of elements in the hash table - @param h Pointer to the hash table [khash_t(name)*] - @return Number of elements in the hash table [khint_t] - */ -#define kh_size(h) ((h)->size) - -/*! @function - @abstract Get the number of buckets in the hash table - @param h Pointer to the hash table [khash_t(name)*] - @return Number of buckets in the hash table [khint_t] - */ -#define kh_n_buckets(h) ((h)->n_buckets) - -/*! @function - @abstract Iterate over the entries in the hash table - @param h Pointer to the hash table [khash_t(name)*] - @param kvar Variable to which key will be assigned - @param vvar Variable to which value will be assigned - @param code Block of code to execute - */ -#define kh_foreach(h, kvar, vvar, code) { khint_t __i; \ - for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ - if (!kh_exist(h,__i)) continue; \ - (kvar) = kh_key(h,__i); \ - (vvar) = kh_val(h,__i); \ - code; \ - } } - -/*! @function - @abstract Iterate over the values in the hash table - @param h Pointer to the hash table [khash_t(name)*] - @param vvar Variable to which value will be assigned - @param code Block of code to execute - */ -#define kh_foreach_value(h, vvar, code) { khint_t __i; \ - for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ - if (!kh_exist(h,__i)) continue; \ - (vvar) = kh_val(h,__i); \ - code; \ - } } - -/* More convenient interfaces */ - -/*! @function - @abstract Instantiate a hash set containing integer keys - @param name Name of the hash table [symbol] - */ -#define KHASH_SET_INIT_INT(name) \ - KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal) - -/*! @function - @abstract Instantiate a hash map containing integer keys - @param name Name of the hash table [symbol] - @param khval_t Type of values [type] - */ -#define KHASH_MAP_INIT_INT(name, khval_t) \ - KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) - -/*! @function - @abstract Instantiate a hash set containing 64-bit integer keys - @param name Name of the hash table [symbol] - */ -#define KHASH_SET_INIT_INT64(name) \ - KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal) - -/*! @function - @abstract Instantiate a hash map containing 64-bit integer keys - @param name Name of the hash table [symbol] - @param khval_t Type of values [type] - */ -#define KHASH_MAP_INIT_INT64(name, khval_t) \ - KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal) - -typedef const char *kh_cstr_t; -/*! @function - @abstract Instantiate a hash set containing const char* keys - @param name Name of the hash table [symbol] - */ -#define KHASH_SET_INIT_STR(name) \ - KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal) - -/*! @function - @abstract Instantiate a hash map containing const char* keys - @param name Name of the hash table [symbol] - @param khval_t Type of values [type] - */ -#define KHASH_MAP_INIT_STR(name, khval_t) \ - KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal) - -/*! @function - @abstract Instantiate a hash set containing kstring_t keys - @param name Name of the hash table [symbol] - */ -#define KHASH_SET_INIT_KSTR(name) \ - KHASH_INIT(name, kstring_t, char, 0, kh_kstr_hash_func, kh_kstr_hash_equal) - -/*! @function - @abstract Instantiate a hash map containing kstring_t keys - @param name Name of the hash table [symbol] - @param khval_t Type of values [type] - */ -#define KHASH_MAP_INIT_KSTR(name, khval_t) \ - KHASH_INIT(name, kstring_t, khval_t, 1, kh_kstr_hash_func, kh_kstr_hash_equal) - -#endif /* __AC_KHASH_H */ diff --git a/src/htslib-1.19.1/htslib/khash_str2int.h b/src/htslib-1.19.1/htslib/khash_str2int.h deleted file mode 100644 index 7a5c28b..0000000 --- a/src/htslib-1.19.1/htslib/khash_str2int.h +++ /dev/null @@ -1,135 +0,0 @@ -/* khash_str2int.h -- C-string to integer hash table. - - Copyright (C) 2013-2014,2020 Genome Research Ltd. - - Author: Petr Danecek - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#ifndef HTSLIB_KHASH_STR2INT_H -#define HTSLIB_KHASH_STR2INT_H - -#include "khash.h" - -KHASH_MAP_INIT_STR(str2int, int) - -/* - * Wrappers for khash dictionaries used by mpileup. - */ - -static inline void *khash_str2int_init(void) -{ - return kh_init(str2int); -} - -/* - * Destroy the hash structure, but not the keys - */ -static inline void khash_str2int_destroy(void *_hash) -{ - khash_t(str2int) *hash = (khash_t(str2int)*)_hash; - if (hash) kh_destroy(str2int, hash); // Note that strings are not freed. -} - -/* - * Destroys both the hash structure and the keys - */ -static inline void khash_str2int_destroy_free(void *_hash) -{ - khash_t(str2int) *hash = (khash_t(str2int)*)_hash; - khint_t k; - if (hash == 0) return; - for (k = 0; k < kh_end(hash); ++k) - if (kh_exist(hash, k)) free((char*)kh_key(hash, k)); - kh_destroy(str2int, hash); -} - -/* - * Returns 1 if key exists or 0 if not - */ -static inline int khash_str2int_has_key(void *_hash, const char *str) -{ - khash_t(str2int) *hash = (khash_t(str2int)*)_hash; - khint_t k = kh_get(str2int, hash, str); - if ( k == kh_end(hash) ) return 0; - return 1; -} - -/* - * Returns 0 on success and -1 when the key is not present. On success, - * *value is set, unless NULL is passed. - */ -static inline int khash_str2int_get(void *_hash, const char *str, int *value) -{ - khash_t(str2int) *hash = (khash_t(str2int)*)_hash; - khint_t k; - if ( !hash ) return -1; - k = kh_get(str2int, hash, str); - if ( k == kh_end(hash) ) return -1; - if ( !value ) return 0; - *value = kh_val(hash, k); - return 0; -} - -/* - * Add a new string to the dictionary, auto-incrementing the value. - * On success returns the newly inserted integer id, on error -1 - * is returned. Note that the key must continue to exist throughout - * the whole life of _hash. - */ -static inline int khash_str2int_inc(void *_hash, const char *str) -{ - khint_t k; - int ret; - khash_t(str2int) *hash = (khash_t(str2int)*)_hash; - if ( !hash ) return -1; - k = kh_put(str2int, hash, str, &ret); - if (ret < 0) return -1; - if (ret == 0) return kh_val(hash, k); - kh_val(hash, k) = kh_size(hash) - 1; - return kh_val(hash, k); -} - -/* - * Set a new key,value pair. On success returns the bin index, on - * error -1 is returned. Note that the key must continue to exist - * throughout the whole life of _hash. - */ -static inline int khash_str2int_set(void *_hash, const char *str, int value) -{ - khint_t k; - int ret; - khash_t(str2int) *hash = (khash_t(str2int)*)_hash; - if ( !hash ) return -1; - k = kh_put(str2int, hash, str, &ret); - if (ret < 0) return -1; - kh_val(hash,k) = value; - return k; -} - -/* - * Return the number of keys in the hash table. - */ -static inline int khash_str2int_size(void *_hash) -{ - khash_t(str2int) *hash = (khash_t(str2int)*)_hash; - return kh_size(hash); -} - -#endif diff --git a/src/htslib-1.19.1/htslib/knetfile.h b/src/htslib-1.19.1/htslib/knetfile.h deleted file mode 100644 index 0f2adec..0000000 --- a/src/htslib-1.19.1/htslib/knetfile.h +++ /dev/null @@ -1,117 +0,0 @@ -/* The MIT License - - Copyright (c) 2008, 2012, 2014, 2021-2022 Genome Research Ltd (GRL). - 2010 by Attractive Chaos - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#ifndef KNETFILE_H -#define KNETFILE_H - -#include -#include -#include - -#include "hts_defs.h" - -#ifndef _WIN32 -#define netread(fd, ptr, len) read(fd, ptr, len) -#define netwrite(fd, ptr, len) write(fd, ptr, len) -#define netclose(fd) close(fd) -#else -#include -#define netread(fd, ptr, len) recv(fd, ptr, len, 0) -#define netwrite(fd, ptr, len) send(fd, ptr, len, 0) -#define netclose(fd) closesocket(fd) -#endif - -// Ensure ssize_t exists within this header. All #includes must precede this, -// and ssize_t must be undefined again at the end of this header. -#if defined _MSC_VER && defined _INTPTR_T_DEFINED && !defined _SSIZE_T_DEFINED && !defined ssize_t -#define HTSLIB_SSIZE_T -#define ssize_t intptr_t -#endif - -// FIXME: currently I/O is unbuffered - -#define KNF_TYPE_LOCAL 1 -#define KNF_TYPE_FTP 2 -#define KNF_TYPE_HTTP 3 - -// Kept for API/ABI compatability only. Do not use directly! -typedef struct knetFile_s { - int type, fd; - int64_t offset; - char *host, *port; - - // the following are for FTP only - int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready; - char *response, *retr, *size_cmd; - int64_t seek_offset; // for lazy seek - int64_t file_size; - - // the following are for HTTP only - char *path, *http_host; -} knetFile; - -#define knet_tell(fp) ((fp)->offset) -#define knet_fileno(fp) ((fp)->fd) - -#ifdef __cplusplus -extern "C" { -#endif - - HTSLIB_EXPORT - knetFile *knet_open(const char *fn, const char *mode) HTS_DEPRECATED("Please use hopen instead"); - - /* - This only works with local files. - */ - HTSLIB_EXPORT - knetFile *knet_dopen(int fd, const char *mode) HTS_DEPRECATED("Please use hdopen instead"); - - /* - If ->is_ready==0, this routine updates ->fd; otherwise, it simply - reads from ->fd. - */ - HTSLIB_EXPORT - ssize_t knet_read(knetFile *fp, void *buf, size_t len) HTS_DEPRECATED("Please use hread instead"); - - /* - This routine only sets ->offset and ->is_ready=0. It does not - communicate with the FTP server. - */ - HTSLIB_EXPORT - off_t knet_seek(knetFile *fp, off_t off, int whence) HTS_DEPRECATED("Please use hseek instead"); - HTSLIB_EXPORT - int knet_close(knetFile *fp) HTS_DEPRECATED("Please use hclose instead"); - -#ifdef __cplusplus -} -#endif - -#ifdef HTSLIB_SSIZE_T -#undef HTSLIB_SSIZE_T -#undef ssize_t -#endif - -#endif diff --git a/src/htslib-1.19.1/htslib/kroundup.h b/src/htslib-1.19.1/htslib/kroundup.h deleted file mode 100644 index 1330a5b..0000000 --- a/src/htslib-1.19.1/htslib/kroundup.h +++ /dev/null @@ -1,76 +0,0 @@ -/* The MIT License - - Copyright (C) 2020 Genome Research Ltd. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#ifndef KROUNDUP_H -#define KROUNDUP_H - -// Value of this macro is 1 if x is a signed type; 0 if unsigned -#define k_signed_type(x) (!(-((x) * 0 + 1) > 0)) - -/* - Macro with value 1 if the highest bit in x is set for any integer type - - This is written avoiding conditionals (?: operator) to reduce the likelihood - of gcc attempting jump thread optimisations for code paths where (x) is - large. These optimisations can cause gcc to issue warnings about excessively - large memory allocations when the kroundup64() macro below is used with - malloc(). Such warnings can be misleading as they imply only the large - allocation happens when it's actually working fine for normal values of (x). - - See https://developers.redhat.com/blog/2019/03/13/understanding-gcc-warnings-part-2/ -*/ -#define k_high_bit_set(x) ((((x) >> (sizeof(x) * 8 - 1 - k_signed_type(x))) & 1)) - -/*! @hideinitializer - @abstract Round up to next power of two - @discussion - This macro will work for unsigned types up to uint64_t. - - If the next power of two does not fit in the given type, it will set - the largest value that does. - */ -#define kroundup64(x) ((x) > 0 ? \ - (--(x), \ - (x)|=(x)>>(sizeof(x)/8), \ - (x)|=(x)>>(sizeof(x)/4), \ - (x)|=(x)>>(sizeof(x)/2), \ - (x)|=(x)>>(sizeof(x)), \ - (x)|=(x)>>(sizeof(x)*2), \ - (x)|=(x)>>(sizeof(x)*4), \ - (x) += !k_high_bit_set(x), \ - (x)) \ - : 0) - -// Historic interfaces for 32-bit and size_t values. The macro above -// works for both (as long as size_t is no more than 64 bits). - -#ifndef kroundup32 -#define kroundup32(x) kroundup64(x) -#endif -#ifndef kroundup_size_t -#define kroundup_size_t(x) kroundup64(x) -#endif - -#endif diff --git a/src/htslib-1.19.1/htslib/ksort.h b/src/htslib-1.19.1/htslib/ksort.h deleted file mode 100644 index 7857d4c..0000000 --- a/src/htslib-1.19.1/htslib/ksort.h +++ /dev/null @@ -1,322 +0,0 @@ -/* The MIT License - - Copyright (c) 2008, 2012-2013, 2017-2019 Genome Research Ltd (GRL). - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -/* Contact: Heng Li */ - -/* - 2012-12-11 (0.1.4): - - * Defined __ks_insertsort_##name as static to compile with C99. - - 2008-11-16 (0.1.4): - - * Fixed a bug in introsort() that happens in rare cases. - - 2008-11-05 (0.1.3): - - * Fixed a bug in introsort() for complex comparisons. - - * Fixed a bug in mergesort(). The previous version is not stable. - - 2008-09-15 (0.1.2): - - * Accelerated introsort. On my Mac (not on another Linux machine), - my implementation is as fast as the C++ standard library's sort() - on random input. - - * Added combsort and in introsort, switch to combsort if the - recursion is too deep. - - 2008-09-13 (0.1.1): - - * Added k-small algorithm - - 2008-09-05 (0.1.0): - - * Initial version - -*/ - -#ifndef AC_KSORT_H -#define AC_KSORT_H - -#include -#include -#include "hts_defs.h" - -#ifndef klib_unused -#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3) -#define klib_unused __attribute__ ((__unused__)) -#else -#define klib_unused -#endif -#endif /* klib_unused */ - -#ifdef __cplusplus -extern "C" { -#endif - -// Use our own drand48() symbol (used by ks_shuffle) to avoid portability -// problems on Windows. Don't include htslib/hts_os.h for this as it -// may not get on with older attempts to fix this in code that includes -// this file. -HTSLIB_EXPORT -extern double hts_drand48(void); - -typedef struct { - void *left, *right; - int depth; -} ks_isort_stack_t; - -#define KSORT_SWAP(type_t, a, b) { type_t t=(a); (a)=(b); (b)=t; } - -#define KSORT_INIT(name, type_t, __sort_lt) KSORT_INIT_(_ ## name, , type_t, __sort_lt) -#define KSORT_INIT_STATIC(name, type_t, __sort_lt) KSORT_INIT_(_ ## name, static klib_unused, type_t, __sort_lt) -#define KSORT_INIT2(name, SCOPE, type_t, __sort_lt) KSORT_INIT_(_ ## name, SCOPE, type_t, __sort_lt) - -#define KSORT_INIT_(name, SCOPE, type_t, __sort_lt) \ - SCOPE int ks_mergesort##name(size_t n, type_t array[], type_t temp[]) \ - { \ - type_t *a2[2], *a, *b; \ - int curr, shift; \ - \ - a2[0] = array; \ - a2[1] = temp? temp : (type_t*)malloc(sizeof(type_t) * n); \ - for (curr = 0, shift = 0; (1ul<> 1) - 1; i != (size_t)(-1); --i) \ - ks_heapadjust##name(i, lsize, l); \ - } \ - SCOPE void ks_heapsort##name(size_t lsize, type_t l[]) \ - { \ - size_t i; \ - for (i = lsize - 1; i > 0; --i) { \ - type_t tmp; \ - tmp = *l; *l = l[i]; l[i] = tmp; ks_heapadjust##name(0, i, l); \ - } \ - } \ - static inline void __ks_insertsort##name(type_t *s, type_t *t) \ - { \ - type_t *i, *j, swap_tmp; \ - for (i = s + 1; i < t; ++i) \ - for (j = i; j > s && __sort_lt(*j, *(j-1)); --j) { \ - swap_tmp = *j; *j = *(j-1); *(j-1) = swap_tmp; \ - } \ - } \ - SCOPE void ks_combsort##name(size_t n, type_t a[]) \ - { \ - const double shrink_factor = 1.2473309501039786540366528676643; \ - int do_swap; \ - size_t gap = n; \ - type_t tmp, *i, *j; \ - do { \ - if (gap > 2) { \ - gap = (size_t)(gap / shrink_factor); \ - if (gap == 9 || gap == 10) gap = 11; \ - } \ - do_swap = 0; \ - for (i = a; i < a + n - gap; ++i) { \ - j = i + gap; \ - if (__sort_lt(*j, *i)) { \ - tmp = *i; *i = *j; *j = tmp; \ - do_swap = 1; \ - } \ - } \ - } while (do_swap || gap > 2); \ - if (gap != 1) __ks_insertsort##name(a, a + n); \ - } \ - SCOPE int ks_introsort##name(size_t n, type_t a[]) \ - { \ - int d; \ - ks_isort_stack_t *top, *stack; \ - type_t rp, swap_tmp; \ - type_t *s, *t, *i, *j, *k; \ - \ - if (n < 1) return 0; \ - else if (n == 2) { \ - if (__sort_lt(a[1], a[0])) { swap_tmp = a[0]; a[0] = a[1]; a[1] = swap_tmp; } \ - return 0; \ - } \ - for (d = 2; 1ul<>1) + 1; \ - if (__sort_lt(*k, *i)) { \ - if (__sort_lt(*k, *j)) k = j; \ - } else k = __sort_lt(*j, *i)? i : j; \ - rp = *k; \ - if (k != t) { swap_tmp = *k; *k = *t; *t = swap_tmp; } \ - for (;;) { \ - do ++i; while (__sort_lt(*i, rp)); \ - do --j; while (i <= j && __sort_lt(rp, *j)); \ - if (j <= i) break; \ - swap_tmp = *i; *i = *j; *j = swap_tmp; \ - } \ - swap_tmp = *i; *i = *t; *t = swap_tmp; \ - if (i-s > t-i) { \ - if (i-s > 16) { top->left = s; top->right = i-1; top->depth = d; ++top; } \ - s = t-i > 16? i+1 : t; \ - } else { \ - if (t-i > 16) { top->left = i+1; top->right = t; top->depth = d; ++top; } \ - t = i-s > 16? i-1 : s; \ - } \ - } else { \ - if (top == stack) { \ - free(stack); \ - __ks_insertsort##name(a, a+n); \ - return 0; \ - } else { --top; s = (type_t*)top->left; t = (type_t*)top->right; d = top->depth; } \ - } \ - } \ - return 0; \ - } \ - /* This function is adapted from: http://ndevilla.free.fr/median/ */ \ - /* 0 <= kk < n */ \ - SCOPE type_t ks_ksmall##name(size_t n, type_t arr[], size_t kk) \ - { \ - type_t *low, *high, *k, *ll, *hh, *mid; \ - low = arr; high = arr + n - 1; k = arr + kk; \ - for (;;) { \ - if (high <= low) return *k; \ - if (high == low + 1) { \ - if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \ - return *k; \ - } \ - mid = low + (high - low) / 2; \ - if (__sort_lt(*high, *mid)) KSORT_SWAP(type_t, *mid, *high); \ - if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \ - if (__sort_lt(*low, *mid)) KSORT_SWAP(type_t, *mid, *low); \ - KSORT_SWAP(type_t, *mid, *(low+1)); \ - ll = low + 1; hh = high; \ - for (;;) { \ - do ++ll; while (__sort_lt(*ll, *low)); \ - do --hh; while (__sort_lt(*low, *hh)); \ - if (hh < ll) break; \ - KSORT_SWAP(type_t, *ll, *hh); \ - } \ - KSORT_SWAP(type_t, *low, *hh); \ - if (hh <= k) low = ll; \ - if (hh >= k) high = hh - 1; \ - } \ - } \ - SCOPE void ks_shuffle##name(size_t n, type_t a[]) \ - { \ - int i, j; \ - for (i = n; i > 1; --i) { \ - type_t tmp; \ - j = (int)(hts_drand48() * i); \ - tmp = a[j]; a[j] = a[i-1]; a[i-1] = tmp; \ - } \ - } - -#define ks_mergesort(name, n, a, t) ks_mergesort_##name(n, a, t) -#define ks_introsort(name, n, a) ks_introsort_##name(n, a) -#define ks_combsort(name, n, a) ks_combsort_##name(n, a) -#define ks_heapsort(name, n, a) ks_heapsort_##name(n, a) -#define ks_heapmake(name, n, a) ks_heapmake_##name(n, a) -#define ks_heapadjust(name, i, n, a) ks_heapadjust_##name(i, n, a) -#define ks_ksmall(name, n, a, k) ks_ksmall_##name(n, a, k) -#define ks_shuffle(name, n, a) ks_shuffle_##name(n, a) - -#define ks_lt_generic(a, b) ((a) < (b)) -#define ks_lt_str(a, b) (strcmp((a), (b)) < 0) - -typedef const char *ksstr_t; - -#define KSORT_INIT_GENERIC(type_t) KSORT_INIT_(_ ## type_t, , type_t, ks_lt_generic) -#define KSORT_INIT_STR KSORT_INIT(str, ksstr_t, ks_lt_str) - -#define KSORT_INIT_STATIC_GENERIC(type_t) KSORT_INIT_(_ ## type_t, static klib_unused, type_t, ks_lt_generic) -#define KSORT_INIT_STATIC_STR KSORT_INIT_STATIC(str, ksstr_t, ks_lt_str) - -#define KSORT_INIT2_GENERIC(type_t, SCOPE) KSORT_INIT_(_ ## type_t, SCOPE, type_t, ks_lt_generic) -#define KSORT_INIT2_STR KSORT_INIT2(str, SCOPE, ksstr_t, ks_lt_str) - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.19.1/htslib/kstring.h b/src/htslib-1.19.1/htslib/kstring.h deleted file mode 100644 index 53a1980..0000000 --- a/src/htslib-1.19.1/htslib/kstring.h +++ /dev/null @@ -1,411 +0,0 @@ -/* The MIT License - - Copyright (C) 2011 by Attractive Chaos - Copyright (C) 2013-2014, 2016, 2018-2020, 2022 Genome Research Ltd. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#ifndef KSTRING_H -#define KSTRING_H - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "hts_defs.h" -#include "kroundup.h" - -#if defined __GNUC__ && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ > 4)) -#ifdef __MINGW_PRINTF_FORMAT -#define KS_ATTR_PRINTF(fmt, arg) __attribute__((__format__ (__MINGW_PRINTF_FORMAT, fmt, arg))) -#else -#define KS_ATTR_PRINTF(fmt, arg) __attribute__((__format__ (__printf__, fmt, arg))) -#endif // __MINGW_PRINTF_FORMAT -#else -#define KS_ATTR_PRINTF(fmt, arg) -#endif - -#ifndef HAVE___BUILTIN_CLZ -#if defined __GNUC__ && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) -#define HAVE___BUILTIN_CLZ 1 -#endif -#endif - -// Ensure ssize_t exists within this header. All #includes must precede this, -// and ssize_t must be undefined again at the end of this header. -#if defined _MSC_VER && defined _INTPTR_T_DEFINED && !defined _SSIZE_T_DEFINED && !defined ssize_t -#define HTSLIB_SSIZE_T -#define ssize_t intptr_t -#endif - -/* kstring_t is a simple non-opaque type whose fields are likely to be - * used directly by user code (but see also ks_str() and ks_len() below). - * A kstring_t object is initialised by either of - * kstring_t str = KS_INITIALIZE; - * kstring_t str; ...; ks_initialize(&str); - * and either ownership of the underlying buffer should be given away before - * the object disappears (see ks_release() below) or the kstring_t should be - * destroyed with ks_free(&str) or free(str.s) */ -#ifndef KSTRING_T -#define KSTRING_T kstring_t -typedef struct kstring_t { - size_t l, m; - char *s; -} kstring_t; -#endif - -typedef struct ks_tokaux_t { - uint64_t tab[4]; - int sep, finished; - const char *p; // end of the current token -} ks_tokaux_t; - -#ifdef __cplusplus -extern "C" { -#endif - - HTSLIB_EXPORT - int kvsprintf(kstring_t *s, const char *fmt, va_list ap) KS_ATTR_PRINTF(2,0); - - HTSLIB_EXPORT - int ksprintf(kstring_t *s, const char *fmt, ...) KS_ATTR_PRINTF(2,3); - - HTSLIB_EXPORT - int kputd(double d, kstring_t *s); // custom %g only handler - - HTSLIB_EXPORT - int ksplit_core(char *s, int delimiter, int *_max, int **_offsets); - - HTSLIB_EXPORT - char *kstrstr(const char *str, const char *pat, int **_prep); - - HTSLIB_EXPORT - char *kstrnstr(const char *str, const char *pat, int n, int **_prep); - - HTSLIB_EXPORT - void *kmemmem(const void *_str, int n, const void *_pat, int m, int **_prep); - - /* kstrtok() is similar to strtok_r() except that str is not - * modified and both str and sep can be NULL. For efficiency, it is - * actually recommended to set both to NULL in the subsequent calls - * if sep is not changed. */ - HTSLIB_EXPORT - char *kstrtok(const char *str, const char *sep, ks_tokaux_t *aux); - - /* kgetline() uses the supplied fgets()-like function to read a "\n"- - * or "\r\n"-terminated line from fp. The line read is appended to the - * kstring without its terminator and 0 is returned; EOF is returned at - * EOF or on error (determined by querying fp, as per fgets()). */ - typedef char *kgets_func(char *, int, void *); - HTSLIB_EXPORT - int kgetline(kstring_t *s, kgets_func *fgets_fn, void *fp); - - /* kgetline2() uses the supplied hgetln()-like function to read a "\n"- - * or "\r\n"-terminated line from fp. The line read is appended to the - * ksring without its terminator and 0 is returned; EOF is returned at - * EOF or on error (determined by querying fp, as per fgets()). */ - typedef ssize_t kgets_func2(char *, size_t, void *); - HTSLIB_EXPORT - int kgetline2(kstring_t *s, kgets_func2 *fgets_fn, void *fp); - -#ifdef __cplusplus -} -#endif - -/// kstring initializer for structure assignment -#define KS_INITIALIZE { 0, 0, NULL } - -/// kstring initializer for pointers -/** - @note Not to be used if the buffer has been allocated. Use ks_release() - or ks_clear() instead. -*/ - -static inline void ks_initialize(kstring_t *s) -{ - s->l = s->m = 0; - s->s = NULL; -} - -/// Resize a kstring to a given capacity -static inline int ks_resize(kstring_t *s, size_t size) -{ - if (s->m < size) { - char *tmp; - size = (size > (SIZE_MAX>>2)) ? size : size + (size >> 1); - tmp = (char*)realloc(s->s, size); - if (!tmp) - return -1; - s->s = tmp; - s->m = size; - } - return 0; -} - -/// Increase kstring capacity by a given number of bytes -static inline int ks_expand(kstring_t *s, size_t expansion) -{ - size_t new_size = s->l + expansion; - - if (new_size < s->l) // Overflow check - return -1; - return ks_resize(s, new_size); -} - -/// Returns the kstring buffer -static inline char *ks_str(kstring_t *s) -{ - return s->s; -} - -/// Returns the kstring buffer, or an empty string if l == 0 -/** - * Unlike ks_str(), this function will never return NULL. If the kstring is - * empty it will return a read-only empty string. As the returned value - * may be read-only, the caller should not attempt to modify it. - */ -static inline const char *ks_c_str(kstring_t *s) -{ - return s->l && s->s ? s->s : ""; -} - -static inline size_t ks_len(kstring_t *s) -{ - return s->l; -} - -/// Reset kstring length to zero -/** - @return The kstring itself - - Example use: kputsn(string, len, ks_clear(s)) -*/ -static inline kstring_t *ks_clear(kstring_t *s) -{ - s->l = 0; - return s; -} - -// Give ownership of the underlying buffer away to something else (making -// that something else responsible for freeing it), leaving the kstring_t -// empty and ready to be used again, or ready to go out of scope without -// needing free(str.s) to prevent a memory leak. -static inline char *ks_release(kstring_t *s) -{ - char *ss = s->s; - s->l = s->m = 0; - s->s = NULL; - return ss; -} - -/// Safely free the underlying buffer in a kstring. -static inline void ks_free(kstring_t *s) -{ - if (s) { - free(s->s); - ks_initialize(s); - } -} - -static inline int kputsn(const char *p, size_t l, kstring_t *s) -{ - size_t new_sz = s->l + l + 2; - if (new_sz <= s->l || ks_resize(s, new_sz) < 0) - return EOF; - memcpy(s->s + s->l, p, l); - s->l += l; - s->s[s->l] = 0; - return l; -} - -static inline int kputs(const char *p, kstring_t *s) -{ - if (!p) { errno = EFAULT; return -1; } - return kputsn(p, strlen(p), s); -} - -static inline int kputc(int c, kstring_t *s) -{ - if (ks_resize(s, s->l + 2) < 0) - return EOF; - s->s[s->l++] = c; - s->s[s->l] = 0; - return (unsigned char)c; -} - -static inline int kputc_(int c, kstring_t *s) -{ - if (ks_resize(s, s->l + 1) < 0) - return EOF; - s->s[s->l++] = c; - return 1; -} - -static inline int kputsn_(const void *p, size_t l, kstring_t *s) -{ - size_t new_sz = s->l + l; - if (new_sz < s->l || ks_resize(s, new_sz ? new_sz : 1) < 0) - return EOF; - memcpy(s->s + s->l, p, l); - s->l += l; - return l; -} - -static inline int kputuw(unsigned x, kstring_t *s) -{ -#if HAVE___BUILTIN_CLZ && UINT_MAX == 4294967295U - static const unsigned int kputuw_num_digits[32] = { - 10, 10, 10, 9, 9, 9, 8, 8, - 8, 7, 7, 7, 7, 6, 6, 6, - 5, 5, 5, 4, 4, 4, 4, 3, - 3, 3, 2, 2, 2, 1, 1, 1 - }; - static const unsigned int kputuw_thresholds[32] = { - 0, 0, 1000000000U, 0, 0, 100000000U, 0, 0, - 10000000, 0, 0, 0, 1000000, 0, 0, 100000, - 0, 0, 10000, 0, 0, 0, 1000, 0, - 0, 100, 0, 0, 10, 0, 0, 0 - }; -#else - uint64_t m; -#endif - static const char kputuw_dig2r[] = - "00010203040506070809" - "10111213141516171819" - "20212223242526272829" - "30313233343536373839" - "40414243444546474849" - "50515253545556575859" - "60616263646566676869" - "70717273747576777879" - "80818283848586878889" - "90919293949596979899"; - unsigned int l, j; - char *cp; - - // Trivial case - also prevents __builtin_clz(0), which is undefined - if (x < 10) { - if (ks_resize(s, s->l + 2) < 0) - return EOF; - s->s[s->l++] = '0'+x; - s->s[s->l] = 0; - return 0; - } - - // Find out how many digits are to be printed. -#if HAVE___BUILTIN_CLZ && UINT_MAX == 4294967295U - /* - * Table method - should be quick if clz can be done in hardware. - * Find the most significant bit of the value to print and look - * up in a table to find out how many decimal digits are needed. - * This number needs to be adjusted by 1 for cases where the decimal - * length could vary for a given number of bits (for example, - * a four bit number could be between 8 and 15). - */ - - l = __builtin_clz(x); - l = kputuw_num_digits[l] - (x < kputuw_thresholds[l]); -#else - // Fallback for when clz is not available - m = 1; - l = 0; - do { - l++; - m *= 10; - } while (x >= m); -#endif - - if (ks_resize(s, s->l + l + 2) < 0) - return EOF; - - // Add digits two at a time - j = l; - cp = s->s + s->l; - while (x >= 10) { - const char *d = &kputuw_dig2r[2*(x%100)]; - x /= 100; - memcpy(&cp[j-=2], d, 2); - } - - // Last one (if necessary). We know that x < 10 by now. - if (j == 1) - cp[0] = x + '0'; - - s->l += l; - s->s[s->l] = 0; - return 0; -} - -static inline int kputw(int c, kstring_t *s) -{ - unsigned int x = c; - if (c < 0) { - x = -x; - if (ks_resize(s, s->l + 3) < 0) - return EOF; - s->s[s->l++] = '-'; - } - - return kputuw(x, s); -} - -static inline int kputll(long long c, kstring_t *s) -{ - char buf[32]; - int i, l = 0; - unsigned long long x = c; - if (c < 0) x = -x; - do { buf[l++] = x%10 + '0'; x /= 10; } while (x > 0); - if (c < 0) buf[l++] = '-'; - if (ks_resize(s, s->l + l + 2) < 0) - return EOF; - for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i]; - s->s[s->l] = 0; - return 0; -} - -static inline int kputl(long c, kstring_t *s) { - return kputll(c, s); -} - -/* - * Returns 's' split by delimiter, with *n being the number of components; - * NULL on failure. - */ -static inline int *ksplit(kstring_t *s, int delimiter, int *n) -{ - int max = 0, *offsets = 0; - *n = ksplit_core(s->s, delimiter, &max, &offsets); - return offsets; -} - -#ifdef HTSLIB_SSIZE_T -#undef HTSLIB_SSIZE_T -#undef ssize_t -#endif - -#endif diff --git a/src/htslib-1.19.1/htslib/regidx.h b/src/htslib-1.19.1/htslib/regidx.h deleted file mode 100644 index cd14dbc..0000000 --- a/src/htslib-1.19.1/htslib/regidx.h +++ /dev/null @@ -1,242 +0,0 @@ -/// @file htslib/regidx.h -/// Region indexing. -/* - Copyright (C) 2014-2019 Genome Research Ltd. - - Author: Petr Danecek - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. -*/ - -/* - Region indexing with an optional payload. - - Example of usage: - - // Init the parser and print regions. In this example the payload is a - // pointer to a string. For the description of parse_custom and - // free_custom functions, see regidx_parse_f and regidx_free_f below, - // and for working example see test/test-regidx.c. - regidx_t *idx = regidx_init(in_fname,parse_custom,free_custom,sizeof(char*),NULL); - - // Query overlap with chr:beg-end (beg,end are 1-based coordinates) - regitr_t *itr = regitr_init(idx); - if ( regidx_overlap(idx, chr,beg-1,end-1, itr) ) printf("There is an overlap!\n"); - - while ( regitr_overlap(itr) ) - { - printf("[%"PRIhts_pos",%"PRIhts_pos"] overlaps with [%"PRIhts_pos",%"PRIhts_pos"], payload=%s\n", - beg, end, itr->beg+1, itr->end+1, regitr_payload(itr,char*)); - } - - regidx_destroy(idx); - regitr_destroy(itr); - - - Another example, loop over all regions: - - regidx_t *idx = regidx_init(in_fname,NULL,NULL,0,NULL); - regitr_t *itr = regitr_init(idx); - - while ( regitr_loop(itr) ) - printf("chr=%s beg=%d end=%d\n", itr->seq, itr->beg+1, itr->end+1); - - regidx_destroy(idx); - regitr_destroy(itr); -*/ - -#ifndef HTSLIB_REGIDX_H -#define HTSLIB_REGIDX_H - -#include "hts.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// maximum regidx position (0-based). Used to represent the end point of -// regions which do not explicitly set one. regidx_push() also limits -// positions passed to it to be no bigger than this. - -// Limit is set to ensure some internal values used by regidx keep within 32 -// bits and to stop the index from getting too big. - -#define REGIDX_MAX (1ULL << 35) - -typedef struct regidx_t regidx_t; -typedef struct regitr_t -{ - hts_pos_t beg,end; - void *payload; - char *seq; - void *itr; -} -regitr_t; - -#define regitr_payload(itr,type_t) (*((type_t*)(itr)->payload)) - -// Old API for backwards compatibility -#define REGITR_START(itr) (itr).beg -#define REGITR_END(itr) (itr).end -#define REGITR_PAYLOAD(itr,type_t) ((type_t*)(itr).payload) -#define REGITR_OVERLAP(itr,from,to) regidx_overlap((itr)); - -/* - * regidx_parse_f - Function to parse one input line, such as regidx_parse_bed - * or regidx_parse_tab below. The function is expected to set `chr_from` and - * `chr_to` to point to first and last character of chromosome name and set - * coordinates `beg` and `end` (0-based, inclusive). If regidx_init() was - * called with non-zero payload_size, the `payload` points to a memory - * location of the payload_size and `usr` is the data passed to regidx_init(). - * Any memory allocated by the function will be freed by regidx_free_f called - * by regidx_destroy(). - * - * Return value: 0 on success, -1 to skip a record, -2 on fatal error. - */ -typedef int (*regidx_parse_f)(const char *line, char **chr_beg, char **chr_end, hts_pos_t *beg, hts_pos_t *end, void *payload, void *usr); -typedef void (*regidx_free_f)(void *payload); - -/* - * A note about the parsers: - * - leading spaces are ignored - * - lines starting with "#" are ignored - */ -HTSLIB_EXPORT -int regidx_parse_bed(const char*,char**,char**,hts_pos_t*,hts_pos_t*,void*,void*); // CHROM or whitespace-sepatated CHROM,FROM,TO (0-based,right-open) -HTSLIB_EXPORT -int regidx_parse_tab(const char*,char**,char**,hts_pos_t*,hts_pos_t*,void*,void*); // CHROM or whitespace-separated CHROM,POS (1-based, inclusive) -HTSLIB_EXPORT -int regidx_parse_reg(const char*,char**,char**,hts_pos_t*,hts_pos_t*,void*,void*); // CHROM, CHROM:POS, CHROM:FROM-TO, CHROM:FROM- (1-based, inclusive) -HTSLIB_EXPORT -int regidx_parse_vcf(const char*,char**,char**,hts_pos_t*,hts_pos_t*,void*,void*); - -/* - * regidx_init() - creates new index - * regidx_init_string() - creates new index, from a string rather than from a file - * - * @param fname: input file name or NULL if regions will be added one-by-one via regidx_insert() - * @param parsef: regidx_parse_bed, regidx_parse_tab or see description of regidx_parse_f. If NULL, - * the format will be autodected, currently either regidx_parse_tab (the default) or - * regidx_parse_bed (file must be named 'bed' or 'bed.gz') will be used. Note that - * the exact autodetection algorithm will change. - * @param freef: NULL or see description of regidx_parse_f - * @param payload_size: 0 with regidx_parse_bed, regidx_parse_tab or see regidx_parse_f - * @param usr: optional user data passed to regidx_parse_f - * - * Returns index on success or NULL on error. - * - * The regidx_t index struct returned by a successful call should be freed - * via regidx_destroy() when it is no longer needed. - */ -HTSLIB_EXPORT -regidx_t *regidx_init(const char *fname, regidx_parse_f parsef, regidx_free_f freef, size_t payload_size, void *usr); -HTSLIB_EXPORT -regidx_t *regidx_init_string(const char *string, regidx_parse_f parsef, regidx_free_f freef, size_t payload_size, void *usr); - -/* - * regidx_destroy() - free memory allocated by regidx_init - */ -HTSLIB_EXPORT -void regidx_destroy(regidx_t *idx); - -/* - * regidx_overlap() - check overlap of the location chr:from-to with regions - * @param beg,end: 0-based start, end coordinate (inclusive) - * @param itr: pointer to iterator, can be NULL if regidx_loop not needed - * - * Returns 0 if there is no overlap or 1 if overlap is found. The overlapping - * regions can be iterated as shown in the example above. - */ -HTSLIB_EXPORT -int regidx_overlap(regidx_t *idx, const char *chr, hts_pos_t beg, hts_pos_t end, regitr_t *itr); - -/* - * regidx_insert() - add a new region. - * regidx_insert_list() - add new regions from a list - * regidx_push() - low level insertion of a new region - * - * Returns 0 on success or -1 on error. - */ -HTSLIB_EXPORT -int regidx_insert(regidx_t *idx, char *line); -HTSLIB_EXPORT -int regidx_insert_list(regidx_t *idx, char *line, char delim); -HTSLIB_EXPORT -int regidx_push(regidx_t *idx, char *chr_beg, char *chr_end, hts_pos_t beg, hts_pos_t end, void *payload); - -/* - * regidx_seq_names() - return list of all sequence names - */ -HTSLIB_EXPORT -char **regidx_seq_names(regidx_t *idx, int *n); - -/* - * regidx_seq_nregs() - number of regions - * regidx_nregs() - total number of regions - */ -HTSLIB_EXPORT -int regidx_seq_nregs(regidx_t *idx, const char *seq); - -HTSLIB_EXPORT -int regidx_nregs(regidx_t *idx); - -/* - * regitr_init() - initialize an iterator. The idx parameter is required only - * with regitr_loop. If only regitr_overlap is called, NULL - * can be given. - * - * The regitr_t struct returned by a successful regitr_init() - * call should be freed via regitr_destroy() when it is no - * longer needed. - * - * regitr_reset() - initialize an iterator for a repeated regitr_loop cycle. - * Not required with regitr_overlap. - */ -HTSLIB_EXPORT -regitr_t *regitr_init(regidx_t *idx); -HTSLIB_EXPORT -void regitr_destroy(regitr_t *itr); -HTSLIB_EXPORT -void regitr_reset(regidx_t *idx, regitr_t *itr); - -/* - * regitr_overlap() - next overlapping region - * Returns 0 when done or 1 when itr is set to next region - */ -HTSLIB_EXPORT -int regitr_overlap(regitr_t *itr); - -/* - * regitr_loop() - loop over all regions - * Returns 0 when done or 1 when itr is set to next region - */ -HTSLIB_EXPORT -int regitr_loop(regitr_t *itr); - -/* - * regitr_copy() - create a copy of an iterator for a repeated iteration with regitr_loop - */ -HTSLIB_EXPORT -void regitr_copy(regitr_t *dst, regitr_t *src); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.19.1/htslib/synced_bcf_reader.h b/src/htslib-1.19.1/htslib/synced_bcf_reader.h deleted file mode 100644 index 9a6b484..0000000 --- a/src/htslib-1.19.1/htslib/synced_bcf_reader.h +++ /dev/null @@ -1,396 +0,0 @@ -/// @file htslib/synced_bcf_reader.h -/// Stream through multiple VCF files. -/* - Copyright (C) 2012-2017, 2019-2023 Genome Research Ltd. - - Author: Petr Danecek - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -/* - The synced_bcf_reader allows to keep multiple VCFs open and stream them - using the next_line iterator in a seamless matter without worrying about - chromosomes and synchronizing the sites. This is used by vcfcheck to - compare multiple VCFs simultaneously and is used also for merging, - creating intersections, etc. - - The synced_bcf_reader also provides API for reading indexed BCF/VCF, - hiding differences in BCF/VCF opening, indexing and reading. - - - Example of usage: - - bcf_srs_t *sr = bcf_sr_init(); - bcf_sr_set_opt(sr, BCF_SR_PAIR_LOGIC, BCF_SR_PAIR_BOTH_REF); - bcf_sr_set_opt(sr, BCF_SR_REQUIRE_IDX); - for (i=0; ierrnum ) error("Error: %s\n", bcf_sr_strerror(sr->errnum)); - bcf_sr_destroy(sr); -*/ - -#ifndef HTSLIB_SYNCED_BCF_READER_H -#define HTSLIB_SYNCED_BCF_READER_H - -#include "hts.h" -#include "vcf.h" -#include "tbx.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* - When reading multiple files in parallel, duplicate records within each - file will be reordered and offered in intuitive order. For example, - when reading two files, each with unsorted SNP and indel record, the - reader should return the SNP records together and the indel records - together. The logic of compatible records can vary depending on the - application and can be set using the PAIR_* defined below. - - The COLLAPSE_* definitions will be deprecated in future versions, please - use the PAIR_* definitions instead. -*/ -#define COLLAPSE_NONE 0 // require the exact same set of alleles in all files -#define COLLAPSE_SNPS 1 // allow different alleles, as long as they all are SNPs -#define COLLAPSE_INDELS 2 // the same as above, but with indels -#define COLLAPSE_ANY 4 // any combination of alleles can be returned by bcf_sr_next_line() -#define COLLAPSE_SOME 8 // at least some of the ALTs must match -#define COLLAPSE_BOTH (COLLAPSE_SNPS|COLLAPSE_INDELS) - -#define BCF_SR_PAIR_SNPS (1<<0) // allow different alleles, as long as they all are SNPs -#define BCF_SR_PAIR_INDELS (1<<1) // the same as above, but with indels -#define BCF_SR_PAIR_ANY (1<<2) // any combination of alleles can be returned by bcf_sr_next_line() -#define BCF_SR_PAIR_SOME (1<<3) // at least some of multiallelic ALTs must match. Implied by all the others with the exception of EXACT -#define BCF_SR_PAIR_SNP_REF (1<<4) // allow REF-only records with SNPs -#define BCF_SR_PAIR_INDEL_REF (1<<5) // allow REF-only records with indels -#define BCF_SR_PAIR_EXACT (1<<6) // require the exact same set of alleles in all files -#define BCF_SR_PAIR_BOTH (BCF_SR_PAIR_SNPS|BCF_SR_PAIR_INDELS) -#define BCF_SR_PAIR_BOTH_REF (BCF_SR_PAIR_SNPS|BCF_SR_PAIR_INDELS|BCF_SR_PAIR_SNP_REF|BCF_SR_PAIR_INDEL_REF) - -typedef enum -{ - BCF_SR_REQUIRE_IDX, - BCF_SR_PAIR_LOGIC, // combination of the PAIR_* values above - BCF_SR_ALLOW_NO_IDX, // allow to proceed even if required index is not present (at the user's risk) - BCF_SR_REGIONS_OVERLAP, // include overlapping records with POS outside the regions: 0=no, 1=VCF line overlap, 2=true variant overlap [1] - BCF_SR_TARGETS_OVERLAP // include overlapping records with POS outside the targets: 0=no, 1=VCF line overlap, 2=true variant overlap [0] -} -bcf_sr_opt_t; - -struct bcf_sr_region_t; - -typedef struct bcf_sr_regions_t -{ - // for reading from tabix-indexed file (big data) - tbx_t *tbx; // tabix index - hts_itr_t *itr; // tabix iterator - kstring_t line; // holder of the current line, set only when reading from tabix-indexed files - htsFile *file; - char *fname; - int is_bin; // is open in binary mode (tabix access) - char **als; // parsed alleles if targets_als set and _regions_match_alleles called - kstring_t als_str; // block of parsed alleles - int nals, mals; // number of set alleles and the size of allocated array - int als_type; // alleles type, currently VCF_SNP or VCF_INDEL - - // user handler to deal with skipped regions without a counterpart in VCFs - void (*missed_reg_handler)(struct bcf_sr_regions_t *, void *); - void *missed_reg_data; - - // for in-memory regions (small data) - struct bcf_sr_region_t *regs; // the regions - - // shared by both tabix-index and in-memory regions - void *seq_hash; // keys: sequence names, values: index to seqs - char **seq_names; // sequence names - int nseqs; // number of sequences (chromosomes) in the file - int iseq; // current position: chr name, index to snames - hts_pos_t start, end; // current position: start, end of the region (0-based) - int prev_seq; - hts_pos_t prev_start, prev_end; - int overlap; // see BCF_SR_REGIONS_OVERLAP/BCF_SR_TARGETS_OVERLAP -} -bcf_sr_regions_t; - -typedef struct bcf_sr_t -{ - htsFile *file; - tbx_t *tbx_idx; - hts_idx_t *bcf_idx; - bcf_hdr_t *header; - hts_itr_t *itr; - char *fname; - bcf1_t **buffer; // cached VCF records. First is the current record synced across the reader - int nbuffer, mbuffer; // number of cached records (including the current record); number of allocated records - int nfilter_ids, *filter_ids; // -1 for ".", otherwise filter id as returned by bcf_hdr_id2int - int *samples, n_smpl; // list of columns in the order consistent with bcf_srs_t.samples -} -bcf_sr_t; - -typedef enum -{ - open_failed, not_bgzf, idx_load_failed, file_type_error, api_usage_error, - header_error, no_eof, no_memory, vcf_parse_error, bcf_read_error, noidx_error -} -bcf_sr_error; - -typedef struct bcf_srs_t -{ - // Parameters controlling the logic - int collapse; // Do not access directly, use bcf_sr_set_pairing_logic() instead - char *apply_filters; // If set, sites where none of the FILTER strings is listed - // will be skipped. Active only at the time of - // initialization, that is during the add_reader() - // calls. Therefore, each reader can be initialized with different - // filters. - int require_index; // Some tools do not need random access - int max_unpack; // When reading VCFs and knowing some fields will not be needed, boost performance of vcf_parse1 - int *has_line; // Corresponds to return value of bcf_sr_next_line but is not limited by sizeof(int). Use bcf_sr_has_line macro to query. - bcf_sr_error errnum; - - // Auxiliary data - bcf_sr_t *readers; - int nreaders; - int streaming; // reading mode: index-jumping or streaming - int explicit_regs; // was the list of regions se by bcf_sr_set_regions or guessed from tabix index? - char **samples; // List of samples - bcf_sr_regions_t *regions, *targets; // see bcf_sr_set_[targets|regions] for description - int targets_als; // subset to targets not only by position but also by alleles? - int targets_exclude; - kstring_t tmps; - int n_smpl; - - int n_threads; // Simple multi-threaded decoding / encoding. - htsThreadPool *p; // Our pool, but it can be used by others if needed. - void *aux; // Opaque auxiliary data -} -bcf_srs_t; - -/** Allocate and initialize a bcf_srs_t struct. - * - * The bcf_srs_t struct returned by a successful call should be freed - * via bcf_sr_destroy() when it is no longer needed. - */ -HTSLIB_EXPORT -bcf_srs_t *bcf_sr_init(void); - -/** Destroy a bcf_srs_t struct */ -HTSLIB_EXPORT -void bcf_sr_destroy(bcf_srs_t *readers); - -HTSLIB_EXPORT -char *bcf_sr_strerror(int errnum); - -HTSLIB_EXPORT -int bcf_sr_set_opt(bcf_srs_t *readers, bcf_sr_opt_t opt, ...); - - -/** - * bcf_sr_set_threads() - allocates a thread-pool for use by the synced reader. - * @n_threads: size of thread pool - * - * Returns 0 if the call succeeded, or <0 on error. - */ -HTSLIB_EXPORT -int bcf_sr_set_threads(bcf_srs_t *files, int n_threads); - -/** Deallocates thread memory, if owned by us. */ -HTSLIB_EXPORT -void bcf_sr_destroy_threads(bcf_srs_t *files); - -/** - * bcf_sr_add_reader() - open new reader - * @readers: holder of the open readers - * @fname: the VCF file - * - * Returns 1 if the call succeeded, or 0 on error. - * - * See also the bcf_srs_t data structure for parameters controlling - * the reader's logic. - */ -HTSLIB_EXPORT -int bcf_sr_add_reader(bcf_srs_t *readers, const char *fname); - -HTSLIB_EXPORT -void bcf_sr_remove_reader(bcf_srs_t *files, int i); - -/** - * bcf_sr_next_line() - the iterator - * @readers: holder of the open readers - * - * Returns the number of readers which have the current line - * (bcf_sr_t.buffer[0]) set at this position. Use the bcf_sr_has_line macro to - * determine which of the readers are set. - */ -HTSLIB_EXPORT -int bcf_sr_next_line(bcf_srs_t *readers); - -#define bcf_sr_has_line(readers, i) (readers)->has_line[i] -#define bcf_sr_get_line(_readers, i) ((_readers)->has_line[i] ? ((_readers)->readers[i].buffer[0]) : (bcf1_t *) NULL) -#define bcf_sr_swap_line(_readers, i, lieu) { bcf1_t *tmp = lieu; lieu = (_readers)->readers[i].buffer[0]; (_readers)->readers[i].buffer[0] = tmp; } -#define bcf_sr_region_done(_readers,i) (!(_readers)->has_line[i] && !(_readers)->readers[i].nbuffer ? 1 : 0) -#define bcf_sr_get_header(_readers, i) (_readers)->readers[i].header -#define bcf_sr_get_reader(_readers, i) &((_readers)->readers[i]) - - -/** - * bcf_sr_seek() - set all readers to selected position - * @seq: sequence name; NULL to seek to start - * @pos: 0-based coordinate - */ -HTSLIB_EXPORT -int bcf_sr_seek(bcf_srs_t *readers, const char *seq, hts_pos_t pos); - -/** - * bcf_sr_set_samples() - sets active samples - * @readers: holder of the open readers - * @samples: this can be one of: file name with one sample per line; - * or column-separated list of samples; or '-' for a list of - * samples shared by all files. If first character is the - * exclamation mark, all but the listed samples are included. - * @is_file: 0: list of samples; 1: file with sample names - * - * Returns 1 if the call succeeded, or 0 on error. - */ -HTSLIB_EXPORT -int bcf_sr_set_samples(bcf_srs_t *readers, const char *samples, int is_file); - -/** - * bcf_sr_set_targets(), bcf_sr_set_regions() - init targets/regions - * @readers: holder of the open readers - * @targets: list of regions, one-based and inclusive. - * @is_fname: 0: targets is a comma-separated list of regions (chr,chr:from-to) - * 1: targets is a tabix indexed file with a list of regions - * ( or ) - * - * Returns 0 if the call succeeded, or -1 on error. - * - * Both functions behave the same way, unlisted positions will be skipped by - * bcf_sr_next_line(). However, there is an important difference: regions use - * index to jump to desired positions while targets streams the whole files - * and merely skip unlisted positions. - * - * Moreover, bcf_sr_set_targets() accepts an optional parameter $alleles which - * is interpreted as a 1-based column index in the tab-delimited file where - * alleles are listed. This in principle enables to perform the COLLAPSE_* - * logic also with tab-delimited files. However, the current implementation - * considers the alleles merely as a suggestion for prioritizing one of possibly - * duplicate VCF lines. It is up to the caller to examine targets->als if - * perfect match is sought after. Note that the duplicate positions in targets - * file are currently not supported. - * Targets (but not regions) can be prefixed with "^" to request logical complement, - * for example "^X,Y,MT" indicates that sequences X, Y and MT should be skipped. - * - * API notes: - * - bcf_sr_set_targets MUST be called before the first call to bcf_sr_add_reader() - * - calling bcf_sr_set_regions AFTER readers have been initialized will - * reposition the readers and discard all previous regions. - */ -HTSLIB_EXPORT -int bcf_sr_set_targets(bcf_srs_t *readers, const char *targets, int is_file, int alleles); - -HTSLIB_EXPORT -int bcf_sr_set_regions(bcf_srs_t *readers, const char *regions, int is_file); - - - -/* - * bcf_sr_regions_init() - * @regions: regions can be either a comma-separated list of regions - * (chr|chr:pos|chr:from-to|chr:from-) or VCF, BED, or - * tab-delimited file (the default). Uncompressed files - * are stored in memory while bgzip-compressed and tabix-indexed - * region files are streamed. - * @is_file: 0: regions is a comma-separated list of regions - * (chr|chr:pos|chr:from-to|chr:from-) - * 1: VCF, BED or tab-delimited file - * @chr, from, to: - * Column indexes of chromosome, start position and end position - * in the tab-delimited file. The positions are 1-based and - * inclusive. - * These parameters are ignored when reading from VCF, BED or - * tabix-indexed files. When end position column is not present, - * supply 'from' in place of 'to'. When 'to' is negative, first - * abs(to) will be attempted and if that fails, 'from' will be used - * instead. - * If chromosome name contains the characters ':' or '-', it should - * be put in curly brackets, for example as "{weird-chr-name:1-2}:1000-2000" - * - * The bcf_sr_regions_t struct returned by a successful call should be freed - * via bcf_sr_regions_destroy() when it is no longer needed. - */ -HTSLIB_EXPORT -bcf_sr_regions_t *bcf_sr_regions_init(const char *regions, int is_file, int chr, int from, int to); - -HTSLIB_EXPORT -void bcf_sr_regions_destroy(bcf_sr_regions_t *regions); - -/* - * bcf_sr_regions_seek() - seek to the chromosome block - * - * Returns 0 on success or -1 on failure. Sets reg->seq appropriately and - * reg->start,reg->end to -1. - */ -HTSLIB_EXPORT -int bcf_sr_regions_seek(bcf_sr_regions_t *regions, const char *chr); - -/* - * bcf_sr_regions_next() - retrieves next region. Returns 0 on success and -1 - * when all regions have been read. The fields reg->seq, reg->start and - * reg->end are filled with the genomic coordinates on success or with - * NULL,-1,-1 when no region is available. The coordinates are 0-based, - * inclusive. - */ -HTSLIB_EXPORT -int bcf_sr_regions_next(bcf_sr_regions_t *reg); - -/* - * bcf_sr_regions_overlap() - checks if the interval overlaps any of - * the regions, the coordinates are 0-based, inclusive. The coordinate queries - * must come in ascending order. - * - * Returns 0 if the position is in regions; -1 if the position is not in the - * regions and more regions exist; -2 if not in the regions and there are no more - * regions left. - */ -HTSLIB_EXPORT -int bcf_sr_regions_overlap(bcf_sr_regions_t *reg, const char *seq, hts_pos_t start, hts_pos_t end); - -/* - * bcf_sr_regions_flush() - calls repeatedly regs->missed_reg_handler() until - * all remaining records are processed. - * Returns 0 on success, <0 on error. - */ -HTSLIB_EXPORT -int bcf_sr_regions_flush(bcf_sr_regions_t *regs); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.19.1/htslib/tbx.h b/src/htslib-1.19.1/htslib/tbx.h deleted file mode 100644 index 3d2037c..0000000 --- a/src/htslib-1.19.1/htslib/tbx.h +++ /dev/null @@ -1,143 +0,0 @@ -/// @file htslib/tbx.h -/// Tabix API functions. -/* - Copyright (C) 2009, 2012-2015, 2019 Genome Research Ltd. - Copyright (C) 2010, 2012 Broad Institute. - - Author: Heng Li - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#ifndef HTSLIB_TBX_H -#define HTSLIB_TBX_H - -#include "hts.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define TBX_MAX_SHIFT 31 - -#define TBX_GENERIC 0 -#define TBX_SAM 1 -#define TBX_VCF 2 -#define TBX_UCSC 0x10000 - -typedef struct tbx_conf_t { - int32_t preset; - int32_t sc, bc, ec; // seq col., beg col. and end col. - int32_t meta_char, line_skip; -} tbx_conf_t; - -typedef struct tbx_t { - tbx_conf_t conf; - hts_idx_t *idx; - void *dict; -} tbx_t; - -HTSLIB_EXPORT -extern const tbx_conf_t tbx_conf_gff, tbx_conf_bed, tbx_conf_psltbl, tbx_conf_sam, tbx_conf_vcf; - - #define tbx_itr_destroy(iter) hts_itr_destroy(iter) - #define tbx_itr_queryi(tbx, tid, beg, end) hts_itr_query((tbx)->idx, (tid), (beg), (end), tbx_readrec) - #define tbx_itr_querys(tbx, s) hts_itr_querys((tbx)->idx, (s), (hts_name2id_f)(tbx_name2id), (tbx), hts_itr_query, tbx_readrec) - #define tbx_itr_next(htsfp, tbx, itr, r) hts_itr_next(hts_get_bgzfp(htsfp), (itr), (r), (tbx)) - #define tbx_bgzf_itr_next(bgzfp, tbx, itr, r) hts_itr_next((bgzfp), (itr), (r), (tbx)) - - HTSLIB_EXPORT - int tbx_name2id(tbx_t *tbx, const char *ss); - - /* Internal helper function used by tbx_itr_next() */ - HTSLIB_EXPORT - BGZF *hts_get_bgzfp(htsFile *fp); - - HTSLIB_EXPORT - int tbx_readrec(BGZF *fp, void *tbxv, void *sv, int *tid, hts_pos_t *beg, hts_pos_t *end); - -/// Build an index of the lines in a BGZF-compressed file -/** The index struct returned by a successful call should be freed - via tbx_destroy() when it is no longer needed. -*/ - HTSLIB_EXPORT - tbx_t *tbx_index(BGZF *fp, int min_shift, const tbx_conf_t *conf); -/* - * All tbx_index_build* methods return: 0 (success), -1 (general failure) or -2 (compression not BGZF) - */ - HTSLIB_EXPORT - int tbx_index_build(const char *fn, int min_shift, const tbx_conf_t *conf); - - HTSLIB_EXPORT - int tbx_index_build2(const char *fn, const char *fnidx, int min_shift, const tbx_conf_t *conf); - - HTSLIB_EXPORT - int tbx_index_build3(const char *fn, const char *fnidx, int min_shift, int n_threads, const tbx_conf_t *conf); - - -/// Load or stream a .tbi or .csi index -/** @param fn Name of the data file corresponding to the index - - Equivalent to tbx_index_load3(fn, NULL, HTS_IDX_SAVE_REMOTE); -*/ - HTSLIB_EXPORT - tbx_t *tbx_index_load(const char *fn); - -/// Load or stream a .tbi or .csi index -/** @param fn Name of the data file corresponding to the index - @param fnidx Name of the indexed file - @return The index, or NULL if an error occurred - - If @p fnidx is NULL, the index name will be derived from @p fn. - - Equivalent to tbx_index_load3(fn, fnidx, HTS_IDX_SAVE_REMOTE); -*/ - HTSLIB_EXPORT - tbx_t *tbx_index_load2(const char *fn, const char *fnidx); - -/// Load or stream a .tbi or .csi index -/** @param fn Name of the data file corresponding to the index - @param fnidx Name of the indexed file - @param flags Flags to alter behaviour (see description) - @return The index, or NULL if an error occurred - - If @p fnidx is NULL, the index name will be derived from @p fn. - - The @p flags parameter can be set to a combination of the following - values: - - HTS_IDX_SAVE_REMOTE Save a local copy of any remote indexes - HTS_IDX_SILENT_FAIL Fail silently if the index is not present - - The index struct returned by a successful call should be freed - via tbx_destroy() when it is no longer needed. -*/ - HTSLIB_EXPORT - tbx_t *tbx_index_load3(const char *fn, const char *fnidx, int flags); - - HTSLIB_EXPORT - const char **tbx_seqnames(tbx_t *tbx, int *n); // free the array but not the values - - HTSLIB_EXPORT - void tbx_destroy(tbx_t *tbx); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.19.1/htslib/thread_pool.h b/src/htslib-1.19.1/htslib/thread_pool.h deleted file mode 100644 index b13ccb7..0000000 --- a/src/htslib-1.19.1/htslib/thread_pool.h +++ /dev/null @@ -1,385 +0,0 @@ -/// @file htslib/thread_pool.h -/// Thread pool for multi-threading applications. -/* - Copyright (c) 2013-2017, 2019, 2020 Genome Research Ltd. - - Author: James Bonfield - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -/* - * This file implements a thread pool for multi-threading applications. It - * consists of two distinct interfaces: thread pools and thread process - * queues (a queue of both jobs to-do and of the results of completed jobs). - * Do not confuse "process" here with a unix PID; rather it is analogous to a - * program reading a stream of data blocks, processing them in some manner, - * and outputting a stream of new data blocks. - * - * The pool of threads is given a function pointer and void* data to pass in. - * This means the pool can run jobs of multiple types, albeit first come - * first served with no job scheduling except to pick tasks for the - * processes that have room to store the result. - * - * Upon completion, the return value from the function pointer is - * added to back to the process result queue if required. We may have - * multiple "processes" in use for the one pool. - * - * To see example usage, please look at the #ifdef TEST_MAIN code in - * thread_pool.c. - */ - -#ifndef HTSLIB_THREAD_POOL_H -#define HTSLIB_THREAD_POOL_H - -#include "hts_defs.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/*----------------------------------------------------------------------------- - * Opaque data types. - * - * Actual definitions are in thread_pool_internal.h, but these should only - * be used by thread_pool.c itself. - */ - -/* - * An hts_tpool_process implements a queue of input jobs to process and a - * queue of resulting output post-processing. Internally it consists of two - * buffered queues, analogous to the pipes in a unix pipeline: - * ...input | process | output... - * - * Both input and output queues have size limits to prevent either queue from - * growing too large and serial numbers to ensure sequential consumption of - * the output. - * - * The thread pool may have many heterogeneous tasks, each using its own - * process mixed into the same thread pool. - */ -typedef struct hts_tpool_process hts_tpool_process; - -/* - * The single pool structure itself. - * - * This knows nothing about the nature of the jobs or where their output is - * going, but it maintains a list of process-queues associated with this pool - * from which the jobs are taken. - */ -typedef struct hts_tpool hts_tpool; - -/* - * An output, after job has executed. - */ -typedef struct hts_tpool_result hts_tpool_result; - - -/*----------------------------------------------------------------------------- - * Thread pool external functions - */ - - -/* - * Creates a worker pool with n worker threads. - * - * Returns pool pointer on success; - * NULL on failure - * - * The hts_tpool struct returned by a successful call should be freed - * via hts_tpool_destroy() when it is no longer needed. - */ -HTSLIB_EXPORT -hts_tpool *hts_tpool_init(int n); - - -/* - * Returns the number of requested threads for a pool. - */ -HTSLIB_EXPORT -int hts_tpool_size(hts_tpool *p); - - -/// Add an item to the work pool. -/** - * @param p Thread pool - * @param q Process queue - * @param func Function run by the thread pool - * @param arg Data for use by func() - * @return 0 on success - * -1 on failure - */ -// FIXME: should this drop the hts_tpool*p argument? It's just q->p -HTSLIB_EXPORT -int hts_tpool_dispatch(hts_tpool *p, hts_tpool_process *q, - void *(*func)(void *arg), void *arg); - -/// Add an item to the work pool, with nonblocking option. -/** - * @param p Thread pool - * @param q Process queue - * @param func Function run by the thread pool - * @param arg Data for use by func() - * @param nonblock Non-blocking flag (see description) - * @return 0 on success - * -1 on failure - * - * The @p nonblock parameter can take one of the following values: - * 0 => block if input queue is full - * +1 => don't block if input queue is full, but do not add task - * -1 => add task regardless of whether queue is full (over-size) - * - * If @p nonblock is +1 and the queue is full, -1 will be returned and - * `errno` is set to `EAGAIN`. - */ -HTSLIB_EXPORT -int hts_tpool_dispatch2(hts_tpool *p, hts_tpool_process *q, - void *(*func)(void *arg), void *arg, int nonblock); - -/// Add an item to the work pool, with nonblocking and cleanup callbacks. -/** - * @param p Thread pool - * @param q Process queue - * @param exec_func Function run by the thread pool - * @param arg Data for use by func() - * @param job_cleanup Callback to clean up when discarding jobs - * @param result_cleanup Callback to clean up when discarding result data - * @param nonblock Non-blocking flag (see description) - * @return 0 on success - * -1 on failure - * - * The @p nonblock parameter can take one of the following values: - * 0 => block if input queue is full - * +1 => don't block if input queue is full, but do not add task - * -1 => add task regardless of whether queue is full (over-size) - * - * If @p nonblock is +1 and the queue is full, -1 will be returned and - * `errno` is set to `EAGAIN`. - * - * The job_cleanup() and result_cleanup() callbacks are used when discarding - * data from a queue, for example when calling hts_tpool_process_reset() - * or hts_tpool_process_destroy(). - * - * If not NULL, job_cleanup() will be called for each pending job with the - * value of @p arg that was set for that job. This can be used to free - * any data associated with @p arg, and also @p arg itself. - * - * Similarly, result_cleanup() can be used to free any results left by - * jobs that had started before hts_tpool_process_reset() was called. - * The argument passed to result_cleanup() is the pointer that would - * have been returned by calling hts_tpool_result_data() on the result - * when pulled from the queue. - * - * job_cleanup() and result_cleanup() are only called when discarding jobs. - * For jobs that are processed normally, it is the responsibility of - * exec_func() and / or consumers of any results to do any cleaning up - * necessary. - */ -HTSLIB_EXPORT -int hts_tpool_dispatch3(hts_tpool *p, hts_tpool_process *q, - void *(*exec_func)(void *arg), void *arg, - void (*job_cleanup)(void *arg), - void (*result_cleanup)(void *data), - int nonblock); - -/* - * Wakes up a single thread stuck in dispatch and make it return with - * errno EAGAIN. - */ -HTSLIB_EXPORT -void hts_tpool_wake_dispatch(hts_tpool_process *q); - -/* - * Flushes the process-queue, but doesn't exit. This simply drains the queue - * and ensures all worker threads have finished their current tasks - * associated with this process. - * - * NOT: This does not mean the worker threads are not executing jobs in - * another process-queue. - * - * Returns 0 on success; - * -1 on failure - */ -HTSLIB_EXPORT -int hts_tpool_process_flush(hts_tpool_process *q); - -/* - * Resets a process to the initial state. - * - * This removes any queued up input jobs, disables any notification of - * new results/output, flushes what is left and then discards any - * queued output. Anything consumer stuck in a wait on results to - * appear should stay stuck and will only wake up when new data is - * pushed through the queue. - * - * Returns 0 on success; - * -1 on failure - */ -HTSLIB_EXPORT -int hts_tpool_process_reset(hts_tpool_process *q, int free_results); - -/* Returns the process queue size */ -HTSLIB_EXPORT -int hts_tpool_process_qsize(hts_tpool_process *q); - - -/* - * Destroys a thread pool. The threads are joined into the main - * thread so they will finish their current work load. - */ -HTSLIB_EXPORT -void hts_tpool_destroy(hts_tpool *p); - -/* - * Destroys a thread pool without waiting on jobs to complete. - * Use hts_tpool_kill(p) to quickly exit after a fatal error. - */ -HTSLIB_EXPORT -void hts_tpool_kill(hts_tpool *p); - -/* - * Pulls the next item off the process result queue. The caller should free - * it (and any internals as appropriate) after use. This doesn't wait for a - * result to be present. - * - * Results will be returned in strict order. - * - * Returns hts_tpool_result pointer if a result is ready. - * NULL if not. - */ -HTSLIB_EXPORT -hts_tpool_result *hts_tpool_next_result(hts_tpool_process *q); - -/* - * Pulls the next item off the process result queue. The caller should free - * it (and any internals as appropriate) after use. This will wait for - * a result to be present if none are currently available. - * - * Results will be returned in strict order. - * - * Returns hts_tpool_result pointer if a result is ready. - * NULL on error or during shutdown. - */ -HTSLIB_EXPORT -hts_tpool_result *hts_tpool_next_result_wait(hts_tpool_process *q); - -/* - * Frees a result 'r' and if free_data is true also frees - * the internal r->data result too. - */ -HTSLIB_EXPORT -void hts_tpool_delete_result(hts_tpool_result *r, int free_data); - -/* - * Returns the data portion of a hts_tpool_result, corresponding - * to the actual "result" itself. - */ -HTSLIB_EXPORT -void *hts_tpool_result_data(hts_tpool_result *r); - -/* - * Initialises a thread process-queue. - * - * In_only, if true, indicates that the process generates does not need to - * hold any output. Otherwise an output queue is used to store the results - * of processing each input job. - * - * Results hts_tpool_process pointer on success; - * NULL on failure - * - * The hts_tpool_process struct returned by a successful call should be freed - * via hts_tpool_process_destroy() when it is no longer needed. - */ -HTSLIB_EXPORT -hts_tpool_process *hts_tpool_process_init(hts_tpool *p, int qsize, int in_only); - - -/* Deallocates memory for a thread process-queue. - * Must be called before the thread pool is destroyed. - */ -HTSLIB_EXPORT -void hts_tpool_process_destroy(hts_tpool_process *q); - -/* - * Returns true if there are no items in the process results queue and - * also none still pending. - */ -HTSLIB_EXPORT -int hts_tpool_process_empty(hts_tpool_process *q); - -/* - * Returns the number of completed jobs in the process results queue. - */ -HTSLIB_EXPORT -int hts_tpool_process_len(hts_tpool_process *q); - -/* - * Returns the number of completed jobs in the process results queue plus the - * number running and queued up to run. - */ -HTSLIB_EXPORT -int hts_tpool_process_sz(hts_tpool_process *q); - -/* - * Shutdown a process. - * - * This sets the shutdown flag and wakes any threads waiting on process - * condition variables. - */ -HTSLIB_EXPORT -void hts_tpool_process_shutdown(hts_tpool_process *q); - -/* - * Returns whether this process queue has been shutdown. - * Return value of 1 signifies normal shutdown while >1 signifies it - * was shutdown due to an error condition. - */ -HTSLIB_EXPORT -int hts_tpool_process_is_shutdown(hts_tpool_process *q); - -/* - * Attach and detach a thread process-queue with / from the thread pool - * scheduler. - * - * We need to do attach after making a thread process, but may also wish - * to temporarily detach if we wish to stop running jobs on a specific - * process while permitting other process to continue. - */ -HTSLIB_EXPORT -void hts_tpool_process_attach(hts_tpool *p, hts_tpool_process *q); - -HTSLIB_EXPORT -void hts_tpool_process_detach(hts_tpool *p, hts_tpool_process *q); - -/* - * Increment and decrement the reference count in a process-queue. - * If the queue is being driven from two external (non thread-pool) - * threads, eg "main" and a "reader", this permits each end to - * decrement its use of the process-queue independently. - */ -HTSLIB_EXPORT -void hts_tpool_process_ref_incr(hts_tpool_process *q); - -HTSLIB_EXPORT -void hts_tpool_process_ref_decr(hts_tpool_process *q); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.19.1/htslib/vcf.h b/src/htslib-1.19.1/htslib/vcf.h deleted file mode 100644 index e60911a..0000000 --- a/src/htslib-1.19.1/htslib/vcf.h +++ /dev/null @@ -1,1673 +0,0 @@ -/// @file htslib/vcf.h -/// High-level VCF/BCF variant calling file operations. -/* - Copyright (C) 2012, 2013 Broad Institute. - Copyright (C) 2012-2020, 2022-2023 Genome Research Ltd. - - Author: Heng Li - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -/* - todo: - - make the function names consistent - - provide calls to abstract away structs as much as possible - */ - -#ifndef HTSLIB_VCF_H -#define HTSLIB_VCF_H - -#include -#include -#include -#include "hts.h" -#include "kstring.h" -#include "hts_defs.h" -#include "hts_endian.h" - -/* Included only for backwards compatibility with e.g. bcftools 1.10 */ -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/***************** - * Header struct * - *****************/ - -#define BCF_HL_FLT 0 // header line -#define BCF_HL_INFO 1 -#define BCF_HL_FMT 2 -#define BCF_HL_CTG 3 -#define BCF_HL_STR 4 // structured header line TAG= -#define BCF_HL_GEN 5 // generic header line - -#define BCF_HT_FLAG 0 // header type -#define BCF_HT_INT 1 -#define BCF_HT_REAL 2 -#define BCF_HT_STR 3 -#define BCF_HT_LONG (BCF_HT_INT | 0x100) // BCF_HT_INT, but for int64_t values; VCF only! - -#define BCF_VL_FIXED 0 // variable length -#define BCF_VL_VAR 1 -#define BCF_VL_A 2 -#define BCF_VL_G 3 -#define BCF_VL_R 4 - -/* === Dictionary === - - The header keeps three dictionaries. The first keeps IDs in the - "FILTER/INFO/FORMAT" lines, the second keeps the sequence names and lengths - in the "contig" lines and the last keeps the sample names. bcf_hdr_t::dict[] - is the actual hash table, which is opaque to the end users. In the hash - table, the key is the ID or sample name as a C string and the value is a - bcf_idinfo_t struct. bcf_hdr_t::id[] points to key-value pairs in the hash - table in the order that they appear in the VCF header. bcf_hdr_t::n[] is the - size of the hash table or, equivalently, the length of the id[] arrays. -*/ - -#define BCF_DT_ID 0 // dictionary type -#define BCF_DT_CTG 1 -#define BCF_DT_SAMPLE 2 - -// Complete textual representation of a header line -typedef struct bcf_hrec_t { - int type; // One of the BCF_HL_* type - char *key; // The part before '=', i.e. FILTER/INFO/FORMAT/contig/fileformat etc. - char *value; // Set only for generic lines, NULL for FILTER/INFO, etc. - int nkeys; // Number of structured fields - char **keys, **vals; // The key=value pairs -} bcf_hrec_t; - -typedef struct bcf_idinfo_t { - uint64_t info[3]; // stores Number:20, var:4, Type:4, ColType:4 in info[0..2] - // for BCF_HL_FLT,INFO,FMT and contig length in info[0] for BCF_HL_CTG - bcf_hrec_t *hrec[3]; - int id; -} bcf_idinfo_t; - -typedef struct bcf_idpair_t { - const char *key; - const bcf_idinfo_t *val; -} bcf_idpair_t; - -// Note that bcf_hdr_t structs must always be created via bcf_hdr_init() -typedef struct bcf_hdr_t { - int32_t n[3]; // n:the size of the dictionary block in use, (allocated size, m, is below to preserve ABI) - bcf_idpair_t *id[3]; - void *dict[3]; // ID dictionary, contig dict and sample dict - char **samples; - bcf_hrec_t **hrec; - int nhrec, dirty; - int ntransl, *transl[2]; // for bcf_translate() - int nsamples_ori; // for bcf_hdr_set_samples() - uint8_t *keep_samples; - kstring_t mem; - int32_t m[3]; // m: allocated size of the dictionary block in use (see n above) -} bcf_hdr_t; - -HTSLIB_EXPORT -extern uint8_t bcf_type_shift[]; - -/************** - * VCF record * - **************/ - -#define BCF_BT_NULL 0 -#define BCF_BT_INT8 1 -#define BCF_BT_INT16 2 -#define BCF_BT_INT32 3 -#define BCF_BT_INT64 4 // Unofficial, for internal use only. -#define BCF_BT_FLOAT 5 -#define BCF_BT_CHAR 7 - -#define VCF_REF 0 -#define VCF_SNP (1<<0) -#define VCF_MNP (1<<1) -#define VCF_INDEL (1<<2) -#define VCF_OTHER (1<<3) -#define VCF_BND (1<<4) // breakend -#define VCF_OVERLAP (1<<5) // overlapping deletion, ALT=* -#define VCF_INS (1<<6) // implies VCF_INDEL -#define VCF_DEL (1<<7) // implies VCF_INDEL -#define VCF_ANY (VCF_SNP|VCF_MNP|VCF_INDEL|VCF_OTHER|VCF_BND|VCF_OVERLAP|VCF_INS|VCF_DEL) // any variant type (but not VCF_REF) - -typedef struct bcf_variant_t { - int type, n; // variant type and the number of bases affected, negative for deletions -} bcf_variant_t; - -typedef struct bcf_fmt_t { - int id; // id: numeric tag id, the corresponding string is bcf_hdr_t::id[BCF_DT_ID][$id].key - int n, size, type; // n: number of values per-sample; size: number of bytes per-sample; type: one of BCF_BT_* types - uint8_t *p; // same as vptr and vptr_* in bcf_info_t below - uint32_t p_len; - uint32_t p_off:31, p_free:1; -} bcf_fmt_t; - -typedef struct bcf_info_t { - int key; // key: numeric tag id, the corresponding string is bcf_hdr_t::id[BCF_DT_ID][$key].key - int type; // type: one of BCF_BT_* types - union { - int64_t i; // integer value - float f; // float value - } v1; // only set if $len==1; for easier access - uint8_t *vptr; // pointer to data array in bcf1_t->shared.s, excluding the size+type and tag id bytes - uint32_t vptr_len; // length of the vptr block or, when set, of the vptr_mod block, excluding offset - uint32_t vptr_off:31, // vptr offset, i.e., the size of the INFO key plus size+type bytes - vptr_free:1; // indicates that vptr-vptr_off must be freed; set only when modified and the new - // data block is bigger than the original - int len; // vector length, 1 for scalars -} bcf_info_t; - - -#define BCF1_DIRTY_ID 1 -#define BCF1_DIRTY_ALS 2 -#define BCF1_DIRTY_FLT 4 -#define BCF1_DIRTY_INF 8 - -typedef struct bcf_dec_t { - int m_fmt, m_info, m_id, m_als, m_allele, m_flt; // allocated size (high-water mark); do not change - int n_flt; // Number of FILTER fields - int *flt; // FILTER keys in the dictionary - char *id, *als; // ID and REF+ALT block (\0-separated) - char **allele; // allele[0] is the REF (allele[] pointers to the als block); all null terminated - bcf_info_t *info; // INFO - bcf_fmt_t *fmt; // FORMAT and individual sample - bcf_variant_t *var; // $var and $var_type set only when set_variant_types called - int n_var, var_type; - int shared_dirty; // if set, shared.s must be recreated on BCF output - int indiv_dirty; // if set, indiv.s must be recreated on BCF output -} bcf_dec_t; - - -#define BCF_ERR_CTG_UNDEF 1 -#define BCF_ERR_TAG_UNDEF 2 -#define BCF_ERR_NCOLS 4 -#define BCF_ERR_LIMITS 8 -#define BCF_ERR_CHAR 16 -#define BCF_ERR_CTG_INVALID 32 -#define BCF_ERR_TAG_INVALID 64 - -/// Get error description for bcf error code -/** @param errorcode The error code which is to be described - @param buffer The buffer in which description to be added - @param maxbuffer The size of buffer passed - @return NULL on invalid buffer; buffer on other cases - -The buffer will be an empty string when @p errorcode is 0. -Description of errors present in code will be appended to @p buffer with ',' separation. -The buffer has to be at least 4 characters long. NULL will be returned if it is smaller or when buffer is NULL. - -'...' will be appended if the description doesn't fit in the given buffer. - */ - -HTSLIB_EXPORT -const char *bcf_strerror(int errorcode, char *buffer, size_t maxbuffer); - -/* - The bcf1_t structure corresponds to one VCF/BCF line. Reading from VCF file - is slower because the string is first to be parsed, packed into BCF line - (done in vcf_parse), then unpacked into internal bcf1_t structure. If it - is known in advance that some of the fields will not be required (notably - the sample columns), parsing of these can be skipped by setting max_unpack - appropriately. - Similarly, it is fast to output a BCF line because the columns (kept in - shared.s, indiv.s, etc.) are written directly by bcf_write, whereas a VCF - line must be formatted in vcf_format. - */ -typedef struct bcf1_t { - hts_pos_t pos; // POS - hts_pos_t rlen; // length of REF - int32_t rid; // CHROM - float qual; // QUAL - uint32_t n_info:16, n_allele:16; - uint32_t n_fmt:8, n_sample:24; - kstring_t shared, indiv; - bcf_dec_t d; // lazy evaluation: $d is not generated by bcf_read(), but by explicitly calling bcf_unpack() - int max_unpack; // Set to BCF_UN_STR, BCF_UN_FLT, or BCF_UN_INFO to boost performance of vcf_parse when some of the fields won't be needed - int unpacked; // remember what has been unpacked to allow calling bcf_unpack() repeatedly without redoing the work - int unpack_size[3]; // the original block size of ID, REF+ALT and FILTER - int errcode; // one of BCF_ERR_* codes -} bcf1_t; - -/******* - * API * - *******/ - - /*********************************************************************** - * BCF and VCF I/O - * - * A note about naming conventions: htslib internally represents VCF - * records as bcf1_t data structures, therefore most functions are - * prefixed with bcf_. There are a few exceptions where the functions must - * be aware of both BCF and VCF worlds, such as bcf_parse vs vcf_parse. In - * these cases, functions prefixed with bcf_ are more general and work - * with both BCF and VCF. - * - ***********************************************************************/ - - /** These macros are defined only for consistency with other parts of htslib */ - #define bcf_init1() bcf_init() - #define bcf_read1(fp,h,v) bcf_read((fp),(h),(v)) - #define vcf_read1(fp,h,v) vcf_read((fp),(h),(v)) - #define bcf_write1(fp,h,v) bcf_write((fp),(h),(v)) - #define vcf_write1(fp,h,v) vcf_write((fp),(h),(v)) - #define bcf_destroy1(v) bcf_destroy(v) - #define bcf_empty1(v) bcf_empty(v) - #define vcf_parse1(s,h,v) vcf_parse((s),(h),(v)) - #define bcf_clear1(v) bcf_clear(v) - #define vcf_format1(h,v,s) vcf_format((h),(v),(s)) - - /** - * bcf_hdr_init() - create an empty BCF header. - * @param mode "r" or "w" - * - * When opened for writing, the mandatory fileFormat and - * FILTER=PASS lines are added automatically. - * - * The bcf_hdr_t struct returned by a successful call should be freed - * via bcf_hdr_destroy() when it is no longer needed. - */ - HTSLIB_EXPORT - bcf_hdr_t *bcf_hdr_init(const char *mode); - - /** Destroy a BCF header struct */ - HTSLIB_EXPORT - void bcf_hdr_destroy(bcf_hdr_t *h); - - /** Allocate and initialize a bcf1_t object. - * - * The bcf1_t struct returned by a successful call should be freed - * via bcf_destroy() when it is no longer needed. - */ - HTSLIB_EXPORT - bcf1_t *bcf_init(void); - - /** Deallocate a bcf1_t object */ - HTSLIB_EXPORT - void bcf_destroy(bcf1_t *v); - - /** - * Same as bcf_destroy() but frees only the memory allocated by bcf1_t, - * not the bcf1_t object itself. - */ - HTSLIB_EXPORT - void bcf_empty(bcf1_t *v); - - /** - * Make the bcf1_t object ready for next read. Intended mostly for - * internal use, the user should rarely need to call this function - * directly. - */ - HTSLIB_EXPORT - void bcf_clear(bcf1_t *v); - - - /** bcf_open and vcf_open mode: please see hts_open() in hts.h */ - typedef htsFile vcfFile; - #define bcf_open(fn, mode) hts_open((fn), (mode)) - #define vcf_open(fn, mode) hts_open((fn), (mode)) - #define bcf_flush(fp) hts_flush((fp)) - #define bcf_close(fp) hts_close(fp) - #define vcf_close(fp) hts_close(fp) - - /// Read a VCF or BCF header - /** @param fp The file to read the header from - @return Pointer to a populated header structure on success; - NULL on failure - - The bcf_hdr_t struct returned by a successful call should be freed - via bcf_hdr_destroy() when it is no longer needed. - */ - HTSLIB_EXPORT - bcf_hdr_t *bcf_hdr_read(htsFile *fp) HTS_RESULT_USED; - - /** - * bcf_hdr_set_samples() - for more efficient VCF parsing when only one/few samples are needed - * @param samples samples to include or exclude from file or as a comma-separated string. - * LIST|FILE .. select samples in list/file - * ^LIST|FILE .. exclude samples from list/file - * - .. include all samples - * NULL .. exclude all samples - * @param is_file @p samples is a file (1) or a comma-separated list (0) - * - * The bottleneck of VCF reading is parsing of genotype fields. If the - * reader knows in advance that only subset of samples is needed (possibly - * no samples at all), the performance of bcf_read() can be significantly - * improved by calling bcf_hdr_set_samples after bcf_hdr_read(). - * The function bcf_read() will subset the VCF/BCF records automatically - * with the notable exception when reading records via bcf_itr_next(). - * In this case, bcf_subset_format() must be called explicitly, because - * bcf_readrec() does not see the header. - * - * Returns 0 on success, -1 on error or a positive integer if the list - * contains samples not present in the VCF header. In such a case, the - * return value is the index of the offending sample. - */ - HTSLIB_EXPORT - int bcf_hdr_set_samples(bcf_hdr_t *hdr, const char *samples, int is_file) HTS_RESULT_USED; - - HTSLIB_EXPORT - int bcf_subset_format(const bcf_hdr_t *hdr, bcf1_t *rec); - - /// Write a VCF or BCF header - /** @param fp Output file - @param h The header to write - @return 0 on success; -1 on failure - */ - HTSLIB_EXPORT - int bcf_hdr_write(htsFile *fp, bcf_hdr_t *h) HTS_RESULT_USED; - - /** - * Parse VCF line contained in kstring and populate the bcf1_t struct - * The line must not end with \n or \r characters. - */ - HTSLIB_EXPORT - int vcf_parse(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v); - - /** - * Complete the file opening mode, according to its extension. - * @param mode Preallocated mode string to be completed. - * @param fn File name to be opened. - * @param format Format string (vcf|bcf|vcf.gz) - * @return 0 on success; -1 on failure - */ - HTSLIB_EXPORT - int vcf_open_mode(char *mode, const char *fn, const char *format); - - /** The opposite of vcf_parse. It should rarely be called directly, see vcf_write */ - HTSLIB_EXPORT - int vcf_format(const bcf_hdr_t *h, const bcf1_t *v, kstring_t *s); - - /// Read next VCF or BCF record - /** @param fp The file to read the record from - @param h The header for the vcf/bcf file - @param v The bcf1_t structure to populate - @return 0 on success; -1 on end of file; < -1 on critical error - -On errors which are not critical for reading, such as missing header -definitions in vcf files, zero will be returned but v->errcode will have been -set to one of BCF_ERR* codes and must be checked before calling bcf_write(). - */ - HTSLIB_EXPORT - int bcf_read(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v) HTS_RESULT_USED; - - /** - * bcf_unpack() - unpack/decode a BCF record (fills the bcf1_t::d field) - * - * Note that bcf_unpack() must be called even when reading VCF. It is safe - * to call the function repeatedly, it will not unpack the same field - * twice. - */ - #define BCF_UN_STR 1 // up to ALT inclusive - #define BCF_UN_FLT 2 // up to FILTER - #define BCF_UN_INFO 4 // up to INFO - #define BCF_UN_SHR (BCF_UN_STR|BCF_UN_FLT|BCF_UN_INFO) // all shared information - #define BCF_UN_FMT 8 // unpack format and each sample - #define BCF_UN_IND BCF_UN_FMT // a synonym of BCF_UN_FMT - #define BCF_UN_ALL (BCF_UN_SHR|BCF_UN_FMT) // everything - HTSLIB_EXPORT - int bcf_unpack(bcf1_t *b, int which); - - /* - * bcf_dup() - create a copy of BCF record. - * - * Note that bcf_unpack() must be called on the returned copy as if it was - * obtained from bcf_read(). Also note that bcf_dup() calls bcf_sync1(src) - * internally to reflect any changes made by bcf_update_* functions. - * - * The bcf1_t struct returned by a successful call should be freed - * via bcf_destroy() when it is no longer needed. - */ - HTSLIB_EXPORT - bcf1_t *bcf_dup(bcf1_t *src); - - HTSLIB_EXPORT - bcf1_t *bcf_copy(bcf1_t *dst, bcf1_t *src); - - /// Write one VCF or BCF record. The type is determined at the open() call. - /** @param fp The file to write to - @param h The header for the vcf/bcf file - @param v The bcf1_t structure to write - @return 0 on success; -1 on error - */ - HTSLIB_EXPORT - int bcf_write(htsFile *fp, bcf_hdr_t *h, bcf1_t *v) HTS_RESULT_USED; - - /** - * The following functions work only with VCFs and should rarely be called - * directly. Usually one wants to use their bcf_* alternatives, which work - * transparently with both VCFs and BCFs. - */ - /// Read a VCF format header - /** @param fp The file to read the header from - @return Pointer to a populated header structure on success; - NULL on failure - - Use bcf_hdr_read() instead. - - The bcf_hdr_t struct returned by a successful call should be freed - via bcf_hdr_destroy() when it is no longer needed. - */ - HTSLIB_EXPORT - bcf_hdr_t *vcf_hdr_read(htsFile *fp) HTS_RESULT_USED; - - /// Write a VCF format header - /** @param fp Output file - @param h The header to write - @return 0 on success; -1 on failure - - Use bcf_hdr_write() instead - */ - HTSLIB_EXPORT - int vcf_hdr_write(htsFile *fp, const bcf_hdr_t *h) HTS_RESULT_USED; - - /// Read a record from a VCF file - /** @param fp The file to read the record from - @param h The header for the vcf file - @param v The bcf1_t structure to populate - @return 0 on success; -1 on end of file; < -1 on error - - Use bcf_read() instead - */ - HTSLIB_EXPORT - int vcf_read(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v) HTS_RESULT_USED; - - /// Write a record to a VCF file - /** @param fp The file to write to - @param h The header for the vcf file - @param v The bcf1_t structure to write - @return 0 on success; -1 on error - - Use bcf_write() instead - */ - HTSLIB_EXPORT - int vcf_write(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v) HTS_RESULT_USED; - - /** Helper function for the bcf_itr_next() macro; internal use, ignore it */ - HTSLIB_EXPORT - int bcf_readrec(BGZF *fp, void *null, void *v, int *tid, hts_pos_t *beg, hts_pos_t *end); - - /// Write a line to a VCF file - /** @param line Line to write - @param fp File to write it to - @return 0 on success; -1 on failure - - @note No checks are done on the line being added, apart from - ensuring that it ends with a newline. This function - should therefore be used with care. - */ - HTSLIB_EXPORT - int vcf_write_line(htsFile *fp, kstring_t *line); - - /************************************************************************** - * Header querying and manipulation routines - **************************************************************************/ - - /** Create a new header using the supplied template - * - * The bcf_hdr_t struct returned by a successful call should be freed - * via bcf_hdr_destroy() when it is no longer needed. - * @return NULL on failure, header otherwise - */ - HTSLIB_EXPORT - bcf_hdr_t *bcf_hdr_dup(const bcf_hdr_t *hdr); - - /** - * Copy header lines from src to dst if not already present in dst. See also bcf_translate(). - * Returns 0 on success or sets a bit on error: - * 1 .. conflicting definitions of tag length - * // todo - */ - HTSLIB_EXPORT - int bcf_hdr_combine(bcf_hdr_t *dst, const bcf_hdr_t *src) HTS_DEPRECATED("Please use bcf_hdr_merge instead"); - - /** - * bcf_hdr_merge() - copy header lines from src to dst, see also bcf_translate() - * @param dst: the destination header to be merged into, NULL on the first pass - * @param src: the source header - * @return NULL on failure, header otherwise - * - * Notes: - * - use as: - * bcf_hdr_t *dst = NULL; - * for (i=0; in[BCF_DT_SAMPLE] - - - /** The following functions are for internal use and should rarely be called directly */ - HTSLIB_EXPORT - int bcf_hdr_parse(bcf_hdr_t *hdr, char *htxt); - - /// Synchronize internal header structures - /** @param h Header - @return 0 on success, -1 on failure - - This function updates the id, sample and contig arrays in the - bcf_hdr_t structure so that they point to the same locations as - the id, sample and contig dictionaries. - */ - HTSLIB_EXPORT - int bcf_hdr_sync(bcf_hdr_t *h) HTS_RESULT_USED; - - /** - * bcf_hdr_parse_line() - parse a single line of VCF textual header - * @param h BCF header struct - * @param line One or more lines of header text - * @param len Filled out with length data parsed from 'line'. - * @return bcf_hrec_t* on success; - * NULL on error or on end of header text. - * NB: to distinguish error from end-of-header, check *len: - * *len == 0 indicates @p line did not start with "##" - * *len == -1 indicates failure, likely due to out of memory - * *len > 0 indicates a malformed header line - * - * If *len > 0 on exit, it will contain the full length of the line - * including any trailing newline (this includes cases where NULL was - * returned due to a malformed line). Callers can use this to skip to - * the next header line. - */ - HTSLIB_EXPORT - bcf_hrec_t *bcf_hdr_parse_line(const bcf_hdr_t *h, const char *line, int *len); - /// Convert a bcf header record to string form - /** - * @param hrec Header record - * @param str Destination kstring - * @return 0 on success; < 0 on error - */ - HTSLIB_EXPORT - int bcf_hrec_format(const bcf_hrec_t *hrec, kstring_t *str); - - /// Add a header record into a header - /** - * @param hdr Destination header - * @param hrec Header record - * @return 0 on success, -1 on failure - * - * If this function returns success, ownership of @p hrec will have - * been transferred to the header structure. It may also have been - * freed if it was a duplicate of a record already in the header. - * Therefore the @p hrec pointer should not be used after a successful - * return from this function. - * - * If this function returns failure, ownership will not have been taken - * and the caller is responsible for cleaning up @p hrec. - */ - - HTSLIB_EXPORT - int bcf_hdr_add_hrec(bcf_hdr_t *hdr, bcf_hrec_t *hrec); - - /** - * bcf_hdr_get_hrec() - get header line info - * @param type: one of the BCF_HL_* types: FLT,INFO,FMT,CTG,STR,GEN - * @param key: the header key for generic lines (e.g. "fileformat"), any field - * for structured lines, typically "ID". - * @param value: the value which pairs with key. Can be be NULL for BCF_HL_GEN - * @param str_class: the class of BCF_HL_STR line (e.g. "ALT" or "SAMPLE"), otherwise NULL - */ - HTSLIB_EXPORT - bcf_hrec_t *bcf_hdr_get_hrec(const bcf_hdr_t *hdr, int type, const char *key, const char *value, const char *str_class); - - /// Duplicate a header record - /** @param hrec Header record to copy - @return A new header record on success; NULL on failure - - The bcf_hrec_t struct returned by a successful call should be freed - via bcf_hrec_destroy() when it is no longer needed. - */ - HTSLIB_EXPORT - bcf_hrec_t *bcf_hrec_dup(bcf_hrec_t *hrec); - - /// Add a new header record key - /** @param hrec Header record - @param str Key name - @param len Length of @p str - @return 0 on success; -1 on failure - */ - HTSLIB_EXPORT - int bcf_hrec_add_key(bcf_hrec_t *hrec, const char *str, size_t len) HTS_RESULT_USED; - - /// Set a header record value - /** @param hrec Header record - @param i Index of value - @param str Value to set - @param len Length of @p str - @param is_quoted Value should be quoted - @return 0 on success; -1 on failure - */ - HTSLIB_EXPORT - int bcf_hrec_set_val(bcf_hrec_t *hrec, int i, const char *str, size_t len, int is_quoted) HTS_RESULT_USED; - - HTSLIB_EXPORT - int bcf_hrec_find_key(bcf_hrec_t *hrec, const char *key); - - - /// Add an IDX header record - /** @param hrec Header record - @param idx IDX value to add - @return 0 on success; -1 on failure - */ - HTSLIB_EXPORT - int hrec_add_idx(bcf_hrec_t *hrec, int idx) HTS_RESULT_USED; - - /// Free up a header record and associated structures - /** @param hrec Header record - */ - HTSLIB_EXPORT - void bcf_hrec_destroy(bcf_hrec_t *hrec); - - - - /************************************************************************** - * Individual record querying and manipulation routines - **************************************************************************/ - - /** See the description of bcf_hdr_subset() */ - HTSLIB_EXPORT - int bcf_subset(const bcf_hdr_t *h, bcf1_t *v, int n, int *imap); - - /** - * bcf_translate() - translate tags ids to be consistent with different header. This function - * is useful when lines from multiple VCF need to be combined. - * @dst_hdr: the destination header, to be used in bcf_write(), see also bcf_hdr_combine() - * @src_hdr: the source header, used in bcf_read() - * @src_line: line obtained by bcf_read() - */ - HTSLIB_EXPORT - int bcf_translate(const bcf_hdr_t *dst_hdr, bcf_hdr_t *src_hdr, bcf1_t *src_line); - - /// Get variant types in a BCF record - /** - * @param rec BCF/VCF record - * @return Types of variant present - * - * The return value will be a bitwise-or of VCF_SNP, VCF_MNP, - * VCF_INDEL, VCF_OTHER, VCF_BND or VCF_OVERLAP. If will return - * VCF_REF (i.e. 0) if none of the other types is present. - * @deprecated Please use bcf_has_variant_types() instead - */ - HTSLIB_EXPORT - int bcf_get_variant_types(bcf1_t *rec); - - /// Get variant type in a BCF record, for a given allele - /** - * @param rec BCF/VCF record - * @param ith_allele Allele to check - * @return Type of variant present - * - * The return value will be one of VCF_REF, VCF_SNP, VCF_MNP, - * VCF_INDEL, VCF_OTHER, VCF_BND or VCF_OVERLAP. - * @deprecated Please use bcf_has_variant_type() instead - */ - HTSLIB_EXPORT - int bcf_get_variant_type(bcf1_t *rec, int ith_allele); - - /// Match mode for bcf_has_variant_types() - enum bcf_variant_match { - bcf_match_exact, ///< Types present exactly match tested for - bcf_match_overlap, ///< At least one variant type in common - bcf_match_subset, ///< Test set is a subset of types present - }; - - /// Check for presence of variant types in a BCF record - /** - * @param rec BCF/VCF record - * @param bitmask Set of variant types to test for - * @param mode Match mode - * @return >0 if the variant types are present, - * 0 if not present, - * -1 on error - * - * @p bitmask should be the bitwise-or of the variant types (VCF_SNP, - * VCF_MNP, etc.) to test for. - * - * The return value is the bitwise-and of the set of types present - * and @p bitmask. Callers that want to check for the presence of more - * than one type can avoid function call overhead by passing all the - * types to be checked for in a single call to this function, in - * bcf_match_overlap mode, and then check for them individually in the - * returned value. - * - * As VCF_REF is represented by 0 (i.e. the absence of other variants) - * it should be tested for using - * bcf_has_variant_types(rec, VCF_REF, bcf_match_exact) - * which will return 1 if no other variant type is present, otherwise 0. - */ - HTSLIB_EXPORT - int bcf_has_variant_types(bcf1_t *rec, uint32_t bitmask, enum bcf_variant_match mode); - - /// Check for presence of variant types in a BCF record, for a given allele - /** - * @param rec BCF/VCF record - * @param ith_allele Allele to check - * @param bitmask Set of variant types to test for - * @return >0 if one of the variant types is present, - * 0 if not present, - * -1 on error - * - * @p bitmask should be the bitwise-or of the variant types (VCF_SNP, - * VCF_MNP, etc.) to test for, or VCF_REF on its own. - * - * The return value is the bitwise-and of the set of types present - * and @p bitmask. Callers that want to check for the presence of more - * than one type can avoid function call overhead by passing all the - * types to be checked for in a single call to this function, and then - * check for them individually in the returned value. - * - * As a special case, if @p bitmask is VCF_REF (i.e. 0), the function - * tests for an exact match. The return value will be 1 if the - * variant type calculated for the allele is VCF_REF, otherwise if - * any other type is present it will be 0. - */ - HTSLIB_EXPORT - int bcf_has_variant_type(bcf1_t *rec, int ith_allele, uint32_t bitmask); - - /// Return the number of bases affected by a variant, for a given allele - /** - * @param rec BCF/VCF record - * @param ith_allele Allele index - * @return The number of bases affected (negative for deletions), - * or bcf_int32_missing on error. - */ - HTSLIB_EXPORT - int bcf_variant_length(bcf1_t *rec, int ith_allele); - - HTSLIB_EXPORT - int bcf_is_snp(bcf1_t *v); - - /** - * bcf_update_filter() - sets the FILTER column - * @flt_ids: The filter IDs to set, numeric IDs returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS") - * @n: Number of filters. If n==0, all filters are removed - */ - HTSLIB_EXPORT - int bcf_update_filter(const bcf_hdr_t *hdr, bcf1_t *line, int *flt_ids, int n); - /** - * bcf_add_filter() - adds to the FILTER column - * @flt_id: filter ID to add, numeric ID returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS") - * - * If flt_id is PASS, all existing filters are removed first. If other than PASS, existing PASS is removed. - */ - HTSLIB_EXPORT - int bcf_add_filter(const bcf_hdr_t *hdr, bcf1_t *line, int flt_id); - /** - * bcf_remove_filter() - removes from the FILTER column - * @flt_id: filter ID to remove, numeric ID returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS") - * @pass: when set to 1 and no filters are present, set to PASS - */ - HTSLIB_EXPORT - int bcf_remove_filter(const bcf_hdr_t *hdr, bcf1_t *line, int flt_id, int pass); - /** - * Returns 1 if present, 0 if absent, or -1 if filter does not exist. "PASS" and "." can be used interchangeably. - */ - HTSLIB_EXPORT - int bcf_has_filter(const bcf_hdr_t *hdr, bcf1_t *line, char *filter); - /** - * bcf_update_alleles() and bcf_update_alleles_str() - update REF and ALT column - * @alleles: Array of alleles - * @nals: Number of alleles - * @alleles_string: Comma-separated alleles, starting with the REF allele - */ - HTSLIB_EXPORT - int bcf_update_alleles(const bcf_hdr_t *hdr, bcf1_t *line, const char **alleles, int nals); - - HTSLIB_EXPORT - int bcf_update_alleles_str(const bcf_hdr_t *hdr, bcf1_t *line, const char *alleles_string); - - /** - * bcf_update_id() - sets new ID string - * bcf_add_id() - adds to the ID string checking for duplicates - */ - HTSLIB_EXPORT - int bcf_update_id(const bcf_hdr_t *hdr, bcf1_t *line, const char *id); - - HTSLIB_EXPORT - int bcf_add_id(const bcf_hdr_t *hdr, bcf1_t *line, const char *id); - - /** - * bcf_update_info_*() - functions for updating INFO fields - * @param hdr: the BCF header - * @param line: VCF line to be edited - * @param key: the INFO tag to be updated - * @param values: pointer to the array of values. Pass NULL to remove the tag. - * @param n: number of values in the array. When set to 0, the INFO tag is removed - * @return 0 on success or negative value on error. - * - * The @p string in bcf_update_info_flag() is optional, - * @p n indicates whether the flag is set or removed. - * - * Note that updating an END info tag will cause line->rlen to be - * updated as a side-effect (removing the tag will set it to the - * string length of the REF allele). If line->pos is being changed as - * well, it is important that this is done before calling - * bcf_update_info_int32() to update the END tag, otherwise rlen will be - * set incorrectly. If the new END value is less than or equal to - * line->pos, a warning will be printed and line->rlen will be set to - * the length of the REF allele. - */ - #define bcf_update_info_int32(hdr,line,key,values,n) bcf_update_info((hdr),(line),(key),(values),(n),BCF_HT_INT) - #define bcf_update_info_float(hdr,line,key,values,n) bcf_update_info((hdr),(line),(key),(values),(n),BCF_HT_REAL) - #define bcf_update_info_flag(hdr,line,key,string,n) bcf_update_info((hdr),(line),(key),(string),(n),BCF_HT_FLAG) - #define bcf_update_info_string(hdr,line,key,string) bcf_update_info((hdr),(line),(key),(string),1,BCF_HT_STR) - HTSLIB_EXPORT - int bcf_update_info(const bcf_hdr_t *hdr, bcf1_t *line, const char *key, const void *values, int n, int type); - - /// Set or update 64-bit integer INFO values - /** - * @param hdr: the BCF header - * @param line: VCF line to be edited - * @param key: the INFO tag to be updated - * @param values: pointer to the array of values. Pass NULL to remove the tag. - * @param n: number of values in the array. When set to 0, the INFO tag is removed - * @return 0 on success or negative value on error. - * - * This function takes an int64_t values array as input. The data - * actually stored will be shrunk to the minimum size that can - * accept all of the values. - * - * INFO values outside of the range BCF_MIN_BT_INT32 to BCF_MAX_BT_INT32 - * can only be written to VCF files. - */ - static inline int bcf_update_info_int64(const bcf_hdr_t *hdr, bcf1_t *line, - const char *key, - const int64_t *values, int n) - { - return bcf_update_info(hdr, line, key, values, n, BCF_HT_LONG); - } - - /* - * bcf_update_format_*() - functions for updating FORMAT fields - * @values: pointer to the array of values, the same number of elements - * is expected for each sample. Missing values must be padded - * with bcf_*_missing or bcf_*_vector_end values. - * @n: number of values in the array. If n==0, existing tag is removed. - * - * The function bcf_update_format_string() is a higher-level (slower) variant of - * bcf_update_format_char(). The former accepts array of \0-terminated strings - * whereas the latter requires that the strings are collapsed into a single array - * of fixed-length strings. In case of strings with variable length, shorter strings - * can be \0-padded. Note that the collapsed strings passed to bcf_update_format_char() - * are not \0-terminated. - * - * Returns 0 on success or negative value on error. - */ - #define bcf_update_format_int32(hdr,line,key,values,n) bcf_update_format((hdr),(line),(key),(values),(n),BCF_HT_INT) - #define bcf_update_format_float(hdr,line,key,values,n) bcf_update_format((hdr),(line),(key),(values),(n),BCF_HT_REAL) - #define bcf_update_format_char(hdr,line,key,values,n) bcf_update_format((hdr),(line),(key),(values),(n),BCF_HT_STR) - #define bcf_update_genotypes(hdr,line,gts,n) bcf_update_format((hdr),(line),"GT",(gts),(n),BCF_HT_INT) // See bcf_gt_ macros below - - HTSLIB_EXPORT - int bcf_update_format_string(const bcf_hdr_t *hdr, bcf1_t *line, const char *key, const char **values, int n); - - HTSLIB_EXPORT - int bcf_update_format(const bcf_hdr_t *hdr, bcf1_t *line, const char *key, const void *values, int n, int type); - - // Macros for setting genotypes correctly, for use with bcf_update_genotypes only; idx corresponds - // to VCF's GT (1-based index to ALT or 0 for the reference allele) and val is the opposite, obtained - // from bcf_get_genotypes() below. - #define bcf_gt_phased(idx) (((idx)+1)<<1|1) - #define bcf_gt_unphased(idx) (((idx)+1)<<1) - #define bcf_gt_missing 0 - #define bcf_gt_is_missing(val) ((val)>>1 ? 0 : 1) - #define bcf_gt_is_phased(idx) ((idx)&1) - #define bcf_gt_allele(val) (((val)>>1)-1) - - /** Conversion between alleles indexes to Number=G genotype index (assuming diploid, all 0-based) */ - #define bcf_alleles2gt(a,b) ((a)>(b)?((a)*((a)+1)/2+(b)):((b)*((b)+1)/2+(a))) - static inline void bcf_gt2alleles(int igt, int *a, int *b) - { - int k = 0, dk = 1; - while ( k=0 on success - * -1 .. no such INFO tag defined in the header - * -2 .. clash between types defined in the header and encountered in the VCF record - * -3 .. tag is not present in the VCF record - * -4 .. the operation could not be completed (e.g. out of memory) - * - * Returns negative value on error or the number of values (including - * missing values) put in *dst on success. bcf_get_info_string() returns - * on success the number of characters stored excluding the nul- - * terminating byte. bcf_get_info_flag() does not store anything in *dst - * but returns 1 if the flag is set or 0 if not. - * - * *dst will be reallocated if it is not big enough (i.e. *ndst is too - * small) or NULL on entry. The new size will be stored in *ndst. - */ - #define bcf_get_info_int32(hdr,line,tag,dst,ndst) bcf_get_info_values(hdr,line,tag,(void**)(dst),ndst,BCF_HT_INT) - #define bcf_get_info_float(hdr,line,tag,dst,ndst) bcf_get_info_values(hdr,line,tag,(void**)(dst),ndst,BCF_HT_REAL) - #define bcf_get_info_string(hdr,line,tag,dst,ndst) bcf_get_info_values(hdr,line,tag,(void**)(dst),ndst,BCF_HT_STR) - #define bcf_get_info_flag(hdr,line,tag,dst,ndst) bcf_get_info_values(hdr,line,tag,(void**)(dst),ndst,BCF_HT_FLAG) - - HTSLIB_EXPORT - int bcf_get_info_values(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, void **dst, int *ndst, int type); - - /// Put integer INFO values into an int64_t array - /** - * @param hdr: BCF header - * @param line: BCF record - * @param tag: INFO tag to retrieve - * @param dst: *dst is pointer to a memory location, can point to NULL - * @param ndst: pointer to the size of allocated memory - * @return >=0 on success - * -1 .. no such INFO tag defined in the header - * -2 .. clash between types defined in the header and encountered in the VCF record - * -3 .. tag is not present in the VCF record - * -4 .. the operation could not be completed (e.g. out of memory) - * - * Returns negative value on error or the number of values (including - * missing values) put in *dst on success. - * - * *dst will be reallocated if it is not big enough (i.e. *ndst is too - * small) or NULL on entry. The new size will be stored in *ndst. - */ - static inline int bcf_get_info_int64(const bcf_hdr_t *hdr, bcf1_t *line, - const char *tag, int64_t **dst, - int *ndst) - { - return bcf_get_info_values(hdr, line, tag, - (void **) dst, ndst, BCF_HT_LONG); - } - - /** - * bcf_get_format_*() - same as bcf_get_info*() above - * - * The function bcf_get_format_string() is a higher-level (slower) variant of bcf_get_format_char(). - * see the description of bcf_update_format_string() and bcf_update_format_char() above. - * Unlike other bcf_get_format__*() functions, bcf_get_format_string() allocates two arrays: - * a single block of \0-terminated strings collapsed into a single array and an array of pointers - * to these strings. Both arrays must be cleaned by the user. - * - * Returns negative value on error or the number of written values on success. - * - * Use the returned number of written values for accessing valid entries of dst, as ndst is only a - * watermark that can be higher than the returned value, i.e. the end of dst can contain carry-over - * values from previous calls to bcf_get_format_*() on lines with more values per sample. - * - * Example: - * int ndst = 0; char **dst = NULL; - * if ( bcf_get_format_string(hdr, line, "XX", &dst, &ndst) > 0 ) - * for (i=0; iid[type][int_id].key) - - /** - * bcf_hdr_name2id() - Translates sequence names (chromosomes) into numeric ID - * bcf_hdr_id2name() - Translates numeric ID to sequence name - */ - static inline int bcf_hdr_name2id(const bcf_hdr_t *hdr, const char *id) { return bcf_hdr_id2int(hdr, BCF_DT_CTG, id); } - static inline const char *bcf_hdr_id2name(const bcf_hdr_t *hdr, int rid) - { - if ( !hdr || rid<0 || rid>=hdr->n[BCF_DT_CTG] ) return NULL; - return hdr->id[BCF_DT_CTG][rid].key; - } - static inline const char *bcf_seqname(const bcf_hdr_t *hdr, const bcf1_t *rec) { - return bcf_hdr_id2name(hdr, rec ? rec->rid : -1); - } - - /** Return CONTIG name, or "(unknown)" - - Like bcf_seqname(), but this function will never return NULL. If - the contig name cannot be found (either because @p hdr was not - supplied or rec->rid was out of range) it returns the string - "(unknown)". - */ - static inline const char *bcf_seqname_safe(const bcf_hdr_t *hdr, const bcf1_t *rec) { - const char *name = bcf_seqname(hdr, rec); - return name ? name : "(unknown)"; - } - - /** - * bcf_hdr_id2*() - Macros for accessing bcf_idinfo_t - * @type: one of BCF_HL_FLT, BCF_HL_INFO, BCF_HL_FMT - * @int_id: return value of bcf_hdr_id2int, must be >=0 - * - * The returned values are: - * bcf_hdr_id2length .. whether the number of values is fixed or variable, one of BCF_VL_* - * bcf_hdr_id2number .. the number of values, 0xfffff for variable length fields - * bcf_hdr_id2type .. the field type, one of BCF_HT_* - * bcf_hdr_id2coltype .. the column type, one of BCF_HL_* - * - * Notes: Prior to using the macros, the presence of the info should be - * tested with bcf_hdr_idinfo_exists(). - */ - #define bcf_hdr_id2length(hdr,type,int_id) ((hdr)->id[BCF_DT_ID][int_id].val->info[type]>>8 & 0xf) - #define bcf_hdr_id2number(hdr,type,int_id) ((hdr)->id[BCF_DT_ID][int_id].val->info[type]>>12) - #define bcf_hdr_id2type(hdr,type,int_id) (uint32_t)((hdr)->id[BCF_DT_ID][int_id].val->info[type]>>4 & 0xf) - #define bcf_hdr_id2coltype(hdr,type,int_id) (uint32_t)((hdr)->id[BCF_DT_ID][int_id].val->info[type] & 0xf) - #define bcf_hdr_idinfo_exists(hdr,type,int_id) ((int_id)>=0 && (int_id)<(hdr)->n[BCF_DT_ID] && (hdr)->id[BCF_DT_ID][int_id].val && bcf_hdr_id2coltype((hdr),(type),(int_id))!=0xf) - #define bcf_hdr_id2hrec(hdr,dict_type,col_type,int_id) ((hdr)->id[(dict_type)==BCF_DT_CTG?BCF_DT_CTG:BCF_DT_ID][int_id].val->hrec[(dict_type)==BCF_DT_CTG?0:(col_type)]) - /// Convert BCF FORMAT data to string form - /** - * @param s kstring to write into - * @param n number of items in @p data - * @param type type of items in @p data - * @param data BCF format data - * @return 0 on success - * -1 if out of memory - */ - HTSLIB_EXPORT - int bcf_fmt_array(kstring_t *s, int n, int type, void *data); - - HTSLIB_EXPORT - uint8_t *bcf_fmt_sized_array(kstring_t *s, uint8_t *ptr); - - /// Encode a variable-length char array in BCF format - /** - * @param s kstring to write into - * @param l length of input - * @param a input data to encode - * @return 0 on success; < 0 on error - */ - HTSLIB_EXPORT - int bcf_enc_vchar(kstring_t *s, int l, const char *a); - - /// Encode a variable-length integer array in BCF format - /** - * @param s kstring to write into - * @param n total number of items in @p a (<= 0 to encode BCF_BT_NULL) - * @param a input data to encode - * @param wsize vector length (<= 0 is equivalent to @p n) - * @return 0 on success; < 0 on error - * @note @p n should be an exact multiple of @p wsize - */ - HTSLIB_EXPORT - int bcf_enc_vint(kstring_t *s, int n, int32_t *a, int wsize); - - /// Encode a variable-length float array in BCF format - /** - * @param s kstring to write into - * @param n total number of items in @p a (<= 0 to encode BCF_BT_NULL) - * @param a input data to encode - * @return 0 on success; < 0 on error - */ - HTSLIB_EXPORT - int bcf_enc_vfloat(kstring_t *s, int n, float *a); - - - /************************************************************************** - * BCF index - * - * Note that these functions work with BCFs only. See synced_bcf_reader.h - * which provides (amongst other things) an API to work transparently with - * both indexed BCFs and VCFs. - **************************************************************************/ - - #define bcf_itr_destroy(iter) hts_itr_destroy(iter) - #define bcf_itr_queryi(idx, tid, beg, end) hts_itr_query((idx), (tid), (beg), (end), bcf_readrec) - #define bcf_itr_querys(idx, hdr, s) hts_itr_querys((idx), (s), (hts_name2id_f)(bcf_hdr_name2id), (hdr), hts_itr_query, bcf_readrec) - - static inline int bcf_itr_next(htsFile *htsfp, hts_itr_t *itr, void *r) { - if (htsfp->is_bgzf) - return hts_itr_next(htsfp->fp.bgzf, itr, r, 0); - - hts_log_error("Only bgzf compressed files can be used with iterators"); - errno = EINVAL; - return -2; - } -/// Load a BCF index -/** @param fn BCF file name - @return The index, or NULL if an error occurred. - @note This only works for BCF files. Consider synced_bcf_reader instead -which works for both BCF and VCF. -*/ - #define bcf_index_load(fn) hts_idx_load(fn, HTS_FMT_CSI) - #define bcf_index_seqnames(idx, hdr, nptr) hts_idx_seqnames((idx),(nptr),(hts_id2name_f)(bcf_hdr_id2name),(hdr)) - -/// Load a BCF index from a given index file name -/** @param fn Input BAM/BCF/etc filename - @param fnidx The input index filename - @return The index, or NULL if an error occurred. - @note This only works for BCF files. Consider synced_bcf_reader instead -which works for both BCF and VCF. -*/ - HTSLIB_EXPORT - hts_idx_t *bcf_index_load2(const char *fn, const char *fnidx); - -/// Load a BCF index from a given index file name -/** @param fn Input BAM/BCF/etc filename - @param fnidx The input index filename - @param flags Flags to alter behaviour (see description) - @return The index, or NULL if an error occurred. - @note This only works for BCF files. Consider synced_bcf_reader instead -which works for both BCF and VCF. - - The @p flags parameter can be set to a combination of the following - values: - - HTS_IDX_SAVE_REMOTE Save a local copy of any remote indexes - HTS_IDX_SILENT_FAIL Fail silently if the index is not present - - Equivalent to hts_idx_load3(fn, fnidx, HTS_FMT_CSI, flags); -*/ - HTSLIB_EXPORT - hts_idx_t *bcf_index_load3(const char *fn, const char *fnidx, int flags); - - /** - * bcf_index_build() - Generate and save an index file - * @fn: Input VCF(compressed)/BCF filename - * @min_shift: log2(width of the smallest bin), e.g. a value of 14 - * imposes a 16k base lower limit on the width of index bins. - * Positive to generate CSI, or 0 to generate TBI. However, a small - * value of min_shift would create a large index, which would lead to - * reduced performance when using the index. A recommended value is 14. - * For BCF files, only the CSI index can be generated. - * - * Returns 0 if successful, or negative if an error occurred. - * - * List of error codes: - * -1 .. indexing failed - * -2 .. opening @fn failed - * -3 .. format not indexable - * -4 .. failed to create and/or save the index - */ - HTSLIB_EXPORT - int bcf_index_build(const char *fn, int min_shift); - - /** - * bcf_index_build2() - Generate and save an index to a specific file - * @fn: Input VCF/BCF filename - * @fnidx: Output filename, or NULL to add .csi/.tbi to @fn - * @min_shift: Positive to generate CSI, or 0 to generate TBI - * - * Returns 0 if successful, or negative if an error occurred. - * - * List of error codes: - * -1 .. indexing failed - * -2 .. opening @fn failed - * -3 .. format not indexable - * -4 .. failed to create and/or save the index - */ - HTSLIB_EXPORT - int bcf_index_build2(const char *fn, const char *fnidx, int min_shift); - - /** - * bcf_index_build3() - Generate and save an index to a specific file - * @fn: Input VCF/BCF filename - * @fnidx: Output filename, or NULL to add .csi/.tbi to @fn - * @min_shift: Positive to generate CSI, or 0 to generate TBI - * @n_threads: Number of VCF/BCF decoder threads - * - * Returns 0 if successful, or negative if an error occurred. - * - * List of error codes: - * -1 .. indexing failed - * -2 .. opening @fn failed - * -3 .. format not indexable - * -4 .. failed to create and/or save the index - */ - HTSLIB_EXPORT - int bcf_index_build3(const char *fn, const char *fnidx, int min_shift, int n_threads); - - /// Initialise fp->idx for the current format type, for VCF and BCF files. - /** @param fp File handle for the data file being written. - @param h BCF header structured (needed for BAI and CSI). - @param min_shift CSI bin size (CSI default is 14). - @param fnidx Filename to write index to. This pointer must remain valid - until after bcf_idx_save is called. - @return 0 on success, <0 on failure. - @note This must be called after the header has been written, but before - any other data. - */ - HTSLIB_EXPORT - int bcf_idx_init(htsFile *fp, bcf_hdr_t *h, int min_shift, const char *fnidx); - - /// Writes the index initialised with bcf_idx_init to disk. - /** @param fp File handle for the data file being written. - @return 0 on success, <0 on failure. - */ - HTSLIB_EXPORT - int bcf_idx_save(htsFile *fp); - -/******************* - * Typed value I/O * - *******************/ - -/* - Note that in contrast with BCFv2.1 specification, HTSlib implementation - allows missing values in vectors. For integer types, the values 0x80, - 0x8000, 0x80000000 are interpreted as missing values and 0x81, 0x8001, - 0x80000001 as end-of-vector indicators. Similarly for floats, the value of - 0x7F800001 is interpreted as a missing value and 0x7F800002 as an - end-of-vector indicator. - Note that the end-of-vector byte is not part of the vector. - - This trial BCF version (v2.2) is compatible with the VCF specification and - enables to handle correctly vectors with different ploidy in presence of - missing values. - */ -#define bcf_int8_vector_end (-127) /* INT8_MIN + 1 */ -#define bcf_int16_vector_end (-32767) /* INT16_MIN + 1 */ -#define bcf_int32_vector_end (-2147483647) /* INT32_MIN + 1 */ -#define bcf_int64_vector_end (-9223372036854775807LL) /* INT64_MIN + 1 */ -#define bcf_str_vector_end 0 -#define bcf_int8_missing (-128) /* INT8_MIN */ -#define bcf_int16_missing (-32767-1) /* INT16_MIN */ -#define bcf_int32_missing (-2147483647-1) /* INT32_MIN */ -#define bcf_int64_missing (-9223372036854775807LL - 1LL) /* INT64_MIN */ - -// All of the above are values, which may occur multiple times in lists of -// integers or lists of floating point. Strings in VCF don't have -// lists - a list of strings is just another (comma-separated) string. -// -// Hence bcf_str_missing is the whole string being missing rather than -// an element of a list. Ie a string of length zero: (0<<4)|BCF_BT_CHAR. -#define bcf_str_missing BCF_BT_CHAR - -// Limits on BCF values stored in given types. Max values are the same -// as for the underlying type. Min values are slightly different as -// the last 8 values for each type were reserved by BCFv2.2. -#define BCF_MAX_BT_INT8 (0x7f) /* INT8_MAX */ -#define BCF_MAX_BT_INT16 (0x7fff) /* INT16_MAX */ -#define BCF_MAX_BT_INT32 (0x7fffffff) /* INT32_MAX */ -#define BCF_MIN_BT_INT8 (-120) /* INT8_MIN + 8 */ -#define BCF_MIN_BT_INT16 (-32760) /* INT16_MIN + 8 */ -#define BCF_MIN_BT_INT32 (-2147483640) /* INT32_MIN + 8 */ - -HTSLIB_EXPORT -extern uint32_t bcf_float_vector_end; -HTSLIB_EXPORT -extern uint32_t bcf_float_missing; -static inline void bcf_float_set(float *ptr, uint32_t value) -{ - union { uint32_t i; float f; } u; - u.i = value; - *ptr = u.f; -} -#define bcf_float_set_vector_end(x) bcf_float_set(&(x),bcf_float_vector_end) -#define bcf_float_set_missing(x) bcf_float_set(&(x),bcf_float_missing) -static inline int bcf_float_is_missing(float f) -{ - union { uint32_t i; float f; } u; - u.f = f; - return u.i==bcf_float_missing ? 1 : 0; -} -static inline int bcf_float_is_vector_end(float f) -{ - union { uint32_t i; float f; } u; - u.f = f; - return u.i==bcf_float_vector_end ? 1 : 0; -} - -static inline int bcf_format_gt(bcf_fmt_t *fmt, int isample, kstring_t *str) -{ - uint32_t e = 0; - #define BRANCH(type_t, convert, missing, vector_end) { \ - uint8_t *ptr = fmt->p + isample*fmt->size; \ - int i; \ - for (i=0; in; i++, ptr += sizeof(type_t)) \ - { \ - type_t val = convert(ptr); \ - if ( val == vector_end ) break; \ - if ( i ) e |= kputc("/|"[val&1], str) < 0; \ - if ( !(val>>1) ) e |= kputc('.', str) < 0; \ - else e |= kputw((val>>1) - 1, str) < 0; \ - } \ - if (i == 0) e |= kputc('.', str) < 0; \ - } - switch (fmt->type) { - case BCF_BT_INT8: BRANCH(int8_t, le_to_i8, bcf_int8_missing, bcf_int8_vector_end); break; - case BCF_BT_INT16: BRANCH(int16_t, le_to_i16, bcf_int16_missing, bcf_int16_vector_end); break; - case BCF_BT_INT32: BRANCH(int32_t, le_to_i32, bcf_int32_missing, bcf_int32_vector_end); break; - case BCF_BT_NULL: e |= kputc('.', str) < 0; break; - default: hts_log_error("Unexpected type %d", fmt->type); return -2; - } - #undef BRANCH - return e == 0 ? 0 : -1; -} - -static inline int bcf_enc_size(kstring_t *s, int size, int type) -{ - // Most common case is first - if (size < 15) { - if (ks_resize(s, s->l + 1) < 0) - return -1; - uint8_t *p = (uint8_t *)s->s + s->l; - *p++ = (size<<4) | type; - s->l++; - return 0; - } - - if (ks_resize(s, s->l + 6) < 0) - return -1; - uint8_t *p = (uint8_t *)s->s + s->l; - *p++ = 15<<4|type; - - if (size < 128) { - *p++ = 1<<4|BCF_BT_INT8; - *p++ = size; - s->l += 3; - } else { - if (size < 32768) { - *p++ = 1<<4|BCF_BT_INT16; - i16_to_le(size, p); - s->l += 4; - } else { - *p++ = 1<<4|BCF_BT_INT32; - i32_to_le(size, p); - s->l += 6; - } - } - return 0; -} - -static inline int bcf_enc_inttype(long x) -{ - if (x <= BCF_MAX_BT_INT8 && x >= BCF_MIN_BT_INT8) return BCF_BT_INT8; - if (x <= BCF_MAX_BT_INT16 && x >= BCF_MIN_BT_INT16) return BCF_BT_INT16; - return BCF_BT_INT32; -} - -static inline int bcf_enc_int1(kstring_t *s, int32_t x) -{ - if (ks_resize(s, s->l + 5) < 0) - return -1; - uint8_t *p = (uint8_t *)s->s + s->l; - - if (x == bcf_int32_vector_end) { - // An inline implementation of bcf_enc_size with size==1 and - // memory allocation already accounted for. - *p = (1<<4) | BCF_BT_INT8; - p[1] = bcf_int8_vector_end; - s->l+=2; - } else if (x == bcf_int32_missing) { - *p = (1<<4) | BCF_BT_INT8; - p[1] = bcf_int8_missing; - s->l+=2; - } else if (x <= BCF_MAX_BT_INT8 && x >= BCF_MIN_BT_INT8) { - *p = (1<<4) | BCF_BT_INT8; - p[1] = x; - s->l+=2; - } else if (x <= BCF_MAX_BT_INT16 && x >= BCF_MIN_BT_INT16) { - *p = (1<<4) | BCF_BT_INT16; - i16_to_le(x, p+1); - s->l+=3; - } else { - *p = (1<<4) | BCF_BT_INT32; - i32_to_le(x, p+1); - s->l+=5; - } - - return 0; -} - -/// Return the value of a single typed integer. -/** @param p Pointer to input data block. - @param type One of the BCF_BT_INT* type codes - @param[out] q Location to store an updated value for p - @return The integer value, or zero if @p type is not valid. - -If @p type is not one of BCF_BT_INT8, BCF_BT_INT16, BCF_BT_INT32 or -BCF_BT_INT64, zero will be returned and @p *q will not be updated. -Otherwise, the integer value will be returned and @p *q will be set -to the memory location immediately following the integer value. - -Cautious callers can detect invalid type codes by checking that *q has -actually been updated. -*/ - -static inline int64_t bcf_dec_int1(const uint8_t *p, int type, uint8_t **q) -{ - if (type == BCF_BT_INT8) { - *q = (uint8_t*)p + 1; - return le_to_i8(p); - } else if (type == BCF_BT_INT16) { - *q = (uint8_t*)p + 2; - return le_to_i16(p); - } else if (type == BCF_BT_INT32) { - *q = (uint8_t*)p + 4; - return le_to_i32(p); - } else if (type == BCF_BT_INT64) { - *q = (uint8_t*)p + 8; - return le_to_i64(p); - } else { // Invalid type. - return 0; - } -} - -/// Return the value of a single typed integer from a byte stream. -/** @param p Pointer to input data block. - @param[out] q Location to store an updated value for p - @return The integer value, or zero if the type code was not valid. - -Reads a one-byte type code from @p p, and uses it to decode an integer -value from the following bytes in @p p. - -If the type is not one of BCF_BT_INT8, BCF_BT_INT16 or BCF_BT_INT32, zero -will be returned and @p *q will unchanged. Otherwise, the integer value will -be returned and @p *q will be set to the memory location immediately following -the integer value. - -Cautious callers can detect invalid type codes by checking that *q has -actually been updated. -*/ -static inline int64_t bcf_dec_typed_int1(const uint8_t *p, uint8_t **q) -{ - return bcf_dec_int1(p + 1, *p&0xf, q); -} - -static inline int32_t bcf_dec_size(const uint8_t *p, uint8_t **q, int *type) -{ - *type = *p & 0xf; - if (*p>>4 != 15) { - *q = (uint8_t*)p + 1; - return *p>>4; - } else return bcf_dec_typed_int1(p + 1, q); -} - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.19.1/htslib/vcf_sweep.h b/src/htslib-1.19.1/htslib/vcf_sweep.h deleted file mode 100644 index 9200554..0000000 --- a/src/htslib-1.19.1/htslib/vcf_sweep.h +++ /dev/null @@ -1,57 +0,0 @@ -/// @file htslib/vcf_sweep.h -/// Forward/reverse sweep API. -/* - Copyright (C) 2013-2015, 2019 Genome Research Ltd. - - Author: Petr Danecek - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#ifndef HTSLIB_VCF_SWEEP_H -#define HTSLIB_VCF_SWEEP_H - -#include "hts.h" -#include "vcf.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct bcf_sweep_t bcf_sweep_t; - -HTSLIB_EXPORT -bcf_sweep_t *bcf_sweep_init(const char *fname); - -HTSLIB_EXPORT -void bcf_sweep_destroy(bcf_sweep_t *sw); - -HTSLIB_EXPORT -bcf_hdr_t *bcf_sweep_hdr(bcf_sweep_t *sw); - -HTSLIB_EXPORT -bcf1_t *bcf_sweep_fwd(bcf_sweep_t *sw); - -HTSLIB_EXPORT -bcf1_t *bcf_sweep_bwd(bcf_sweep_t *sw); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.19.1/htslib/vcfutils.h b/src/htslib-1.19.1/htslib/vcfutils.h deleted file mode 100644 index dd69edd..0000000 --- a/src/htslib-1.19.1/htslib/vcfutils.h +++ /dev/null @@ -1,143 +0,0 @@ -/// @file htslib/vcfutils.h -/// Allele-related utility functions. -/* - Copyright (C) 2012, 2013, 2015-2016 Genome Research Ltd. - - Author: Petr Danecek - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#ifndef HTSLIB_VCFUTILS_H -#define HTSLIB_VCFUTILS_H - -#include "vcf.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct kbitset_t; - -/** - * bcf_trim_alleles() - remove ALT alleles unused in genotype fields - * @header: for access to BCF_DT_ID dictionary - * @line: VCF line obtain from vcf_parse1 - * - * Returns the number of removed alleles on success or negative - * on error: - * -1 .. some allele index is out of bounds - * -2 .. could not remove alleles - */ -HTSLIB_EXPORT -int bcf_trim_alleles(const bcf_hdr_t *header, bcf1_t *line); - -/** - * bcf_remove_alleles() - remove ALT alleles according to bitmask @mask - * @header: for access to BCF_DT_ID dictionary - * @line: VCF line obtained from vcf_parse1 - * @mask: alleles to remove - * - * If you have more than 31 alleles, then the integer bit mask will - * overflow, so use bcf_remove_allele_set instead - * Returns 0 on success, <0 on error - */ -HTSLIB_EXPORT -int bcf_remove_alleles(const bcf_hdr_t *header, bcf1_t *line, int mask) HTS_DEPRECATED("Please use bcf_remove_allele_set instead"); - -/** - * bcf_remove_allele_set() - remove ALT alleles according to bitset @rm_set - * @header: for access to BCF_DT_ID dictionary - * @line: VCF line obtained from vcf_parse1 - * @rm_set: pointer to kbitset_t object with bits set for allele - * indexes to remove - * - * Returns 0 on success or -1 on failure - * - * Number=A,R,G INFO and FORMAT fields will be updated accordingly. - */ -HTSLIB_EXPORT -int bcf_remove_allele_set(const bcf_hdr_t *header, bcf1_t *line, const struct kbitset_t *rm_set); - -/** - * bcf_calc_ac() - calculate the number of REF and ALT alleles - * @header: for access to BCF_DT_ID dictionary - * @line: VCF line obtained from vcf_parse1 - * @ac: array of length line->n_allele - * @which: determine if INFO/AN,AC and indv fields be used - * - * Returns 1 if the call succeeded, or 0 if the value could not - * be determined. - * - * The value of @which determines if existing INFO/AC,AN can be - * used (BCF_UN_INFO) and and if indv fields can be split (BCF_UN_FMT). - */ -HTSLIB_EXPORT -int bcf_calc_ac(const bcf_hdr_t *header, bcf1_t *line, int *ac, int which); - - -/** - * bcf_gt_type() - determines type of the genotype - * @fmt_ptr: the GT format field as set for example by set_fmt_ptr - * @isample: sample index (starting from 0) - * @ial: index of the 1st non-reference allele (starting from 1) - * @jal: index of the 2nd non-reference allele (starting from 1) - * - * Returns the type of the genotype (one of GT_HOM_RR, GT_HET_RA, - * GT_HOM_AA, GT_HET_AA, GT_HAPL_R, GT_HAPL_A or GT_UNKN). If $ial - * is not NULL and the genotype has one or more non-reference - * alleles, $ial will be set. In case of GT_HET_AA, $ial is the - * position of the allele which appeared first in ALT. If $jal is - * not null and the genotype is GT_HET_AA, $jal will be set and is - * the position of the second allele in ALT. - */ -#define GT_HOM_RR 0 // note: the actual value of GT_* matters, used in dosage r2 calculation -#define GT_HOM_AA 1 -#define GT_HET_RA 2 -#define GT_HET_AA 3 -#define GT_HAPL_R 4 -#define GT_HAPL_A 5 -#define GT_UNKN 6 -HTSLIB_EXPORT -int bcf_gt_type(bcf_fmt_t *fmt_ptr, int isample, int *ial, int *jal); - -static inline int bcf_acgt2int(char c) -{ - if ( (int)c>96 ) c -= 32; - if ( c=='A' ) return 0; - if ( c=='C' ) return 1; - if ( c=='G' ) return 2; - if ( c=='T' ) return 3; - return -1; -} - -#define bcf_int2acgt(i) "ACGT"[i] - -/** - * bcf_ij2G() - common task: allele indexes to Number=G index (diploid) - * @i,j: allele indexes, 0-based, i<=j - * - * Returns index to the Number=G diploid array - */ -#define bcf_ij2G(i, j) ((j)*((j)+1)/2+(i)) - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.19.1/htslib_vars.mk b/src/htslib-1.19.1/htslib_vars.mk deleted file mode 100644 index 6af7186..0000000 --- a/src/htslib-1.19.1/htslib_vars.mk +++ /dev/null @@ -1,54 +0,0 @@ -# Makefile variables useful for third-party code using htslib's public API. -# -# Copyright (C) 2013-2017, 2019-2020 Genome Research Ltd. -# -# Author: John Marshall -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -# These variables can be used to express dependencies on htslib headers. -# See htslib.mk for details. - -htslib_bgzf_h = $(HTSPREFIX)htslib/bgzf.h $(htslib_hts_defs_h) -htslib_cram_h = $(HTSPREFIX)htslib/cram.h $(htslib_hts_defs_h) $(htslib_hts_h) $(htslib_sam_h) -htslib_faidx_h = $(HTSPREFIX)htslib/faidx.h $(htslib_hts_defs_h) $(htslib_hts_h) -htslib_hfile_h = $(HTSPREFIX)htslib/hfile.h $(htslib_hts_defs_h) -htslib_hts_h = $(HTSPREFIX)htslib/hts.h $(htslib_hts_defs_h) $(htslib_hts_log_h) $(htslib_kstring_h) $(htslib_kroundup_h) -htslib_hts_defs_h = $(HTSPREFIX)htslib/hts_defs.h -htslib_hts_endian_h = $(HTSPREFIX)htslib/hts_endian.h -htslib_hts_expr_h = $(HTSPREFIX)htslib/hts_expr.h $(htslib_kstring_h) $(htslib_hts_defs_h) -htslib_hts_log_h = $(HTSPREFIX)htslib/hts_log.h $(htslib_hts_defs_h) -htslib_hts_os_h = $(HTSPREFIX)htslib/hts_os.h $(htslib_hts_defs_h) -htslib_kbitset_h = $(HTSPREFIX)htslib/kbitset.h -htslib_kfunc_h = $(HTSPREFIX)htslib/kfunc.h $(htslib_hts_defs_h) -htslib_khash_h = $(HTSPREFIX)htslib/khash.h $(htslib_kstring_h) $(htslib_kroundup_h) -htslib_khash_str2int_h = $(HTSPREFIX)htslib/khash_str2int.h $(htslib_khash_h) -htslib_klist_h = $(HTSPREFIX)htslib/klist.h -htslib_kroundup_h = $(HTSPREFIX)htslib/kroundup.h -htslib_kseq_h = $(HTSPREFIX)htslib/kseq.h -htslib_ksort_h = $(HTSPREFIX)htslib/ksort.h $(htslib_hts_defs_h) -htslib_kstring_h = $(HTSPREFIX)htslib/kstring.h $(htslib_hts_defs_h) $(htslib_kroundup_h) -htslib_regidx_h = $(HTSPREFIX)htslib/regidx.h $(htslib_hts_h) -htslib_sam_h = $(HTSPREFIX)htslib/sam.h $(htslib_hts_h) $(htslib_hts_endian_h) -htslib_synced_bcf_reader_h = $(HTSPREFIX)htslib/synced_bcf_reader.h $(htslib_hts_h) $(htslib_vcf_h) $(htslib_tbx_h) -htslib_tbx_h = $(HTSPREFIX)htslib/tbx.h $(htslib_hts_h) -htslib_thread_pool_h = $(HTSPREFIX)htslib/thread_pool.h $(htslib_hts_defs_h) -htslib_vcf_h = $(HTSPREFIX)htslib/vcf.h $(htslib_hts_h) $(htslib_kstring_h) $(htslib_hts_defs_h) $(htslib_hts_endian_h) -htslib_vcf_sweep_h = $(HTSPREFIX)htslib/vcf_sweep.h $(htslib_hts_h) $(htslib_vcf_h) -htslib_vcfutils_h = $(HTSPREFIX)htslib/vcfutils.h $(htslib_vcf_h) diff --git a/src/htslib-1.19.1/kfunc.c b/src/htslib-1.19.1/kfunc.c deleted file mode 100644 index bf15cdf..0000000 --- a/src/htslib-1.19.1/kfunc.c +++ /dev/null @@ -1,313 +0,0 @@ -/* The MIT License - - Copyright (C) 2010, 2013-2014, 2020 Genome Research Ltd. - Copyright (C) 2011 Attractive Chaos - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include "htslib/kfunc.h" - -/* Log gamma function - * \log{\Gamma(z)} - * AS245, 2nd algorithm, http://lib.stat.cmu.edu/apstat/245 - */ -double kf_lgamma(double z) -{ - double x = 0; - x += 0.1659470187408462e-06 / (z+7); - x += 0.9934937113930748e-05 / (z+6); - x -= 0.1385710331296526 / (z+5); - x += 12.50734324009056 / (z+4); - x -= 176.6150291498386 / (z+3); - x += 771.3234287757674 / (z+2); - x -= 1259.139216722289 / (z+1); - x += 676.5203681218835 / z; - x += 0.9999999999995183; - return log(x) - 5.58106146679532777 - z + (z-0.5) * log(z+6.5); -} - -/* complementary error function - * \frac{2}{\sqrt{\pi}} \int_x^{\infty} e^{-t^2} dt - * AS66, 2nd algorithm, http://lib.stat.cmu.edu/apstat/66 - */ -double kf_erfc(double x) -{ - const double p0 = 220.2068679123761; - const double p1 = 221.2135961699311; - const double p2 = 112.0792914978709; - const double p3 = 33.912866078383; - const double p4 = 6.37396220353165; - const double p5 = .7003830644436881; - const double p6 = .03526249659989109; - const double q0 = 440.4137358247522; - const double q1 = 793.8265125199484; - const double q2 = 637.3336333788311; - const double q3 = 296.5642487796737; - const double q4 = 86.78073220294608; - const double q5 = 16.06417757920695; - const double q6 = 1.755667163182642; - const double q7 = .08838834764831844; - double expntl, z, p; - z = fabs(x) * M_SQRT2; - if (z > 37.) return x > 0.? 0. : 2.; - expntl = exp(z * z * - .5); - if (z < 10. / M_SQRT2) // for small z - p = expntl * ((((((p6 * z + p5) * z + p4) * z + p3) * z + p2) * z + p1) * z + p0) - / (((((((q7 * z + q6) * z + q5) * z + q4) * z + q3) * z + q2) * z + q1) * z + q0); - else p = expntl / 2.506628274631001 / (z + 1. / (z + 2. / (z + 3. / (z + 4. / (z + .65))))); - return x > 0.? 2. * p : 2. * (1. - p); -} - -/* The following computes regularized incomplete gamma functions. - * Formulas are taken from Wiki, with additional input from Numerical - * Recipes in C (for modified Lentz's algorithm) and AS245 - * (http://lib.stat.cmu.edu/apstat/245). - * - * A good online calculator is available at: - * - * http://www.danielsoper.com/statcalc/calc23.aspx - * - * It calculates upper incomplete gamma function, which equals - * kf_gammaq(s,z)*tgamma(s). - */ - -#define KF_GAMMA_EPS 1e-14 -#define KF_TINY 1e-290 - -// regularized lower incomplete gamma function, by series expansion -static double _kf_gammap(double s, double z) -{ - double sum, x; - int k; - for (k = 1, sum = x = 1.; k < 100; ++k) { - sum += (x *= z / (s + k)); - if (x / sum < KF_GAMMA_EPS) break; - } - return exp(s * log(z) - z - kf_lgamma(s + 1.) + log(sum)); -} -// regularized upper incomplete gamma function, by continued fraction -static double _kf_gammaq(double s, double z) -{ - int j; - double C, D, f; - f = 1. + z - s; C = f; D = 0.; - // Modified Lentz's algorithm for computing continued fraction - // See Numerical Recipes in C, 2nd edition, section 5.2 - for (j = 1; j < 100; ++j) { - double a = j * (s - j), b = (j<<1) + 1 + z - s, d; - D = b + a * D; - if (D < KF_TINY) D = KF_TINY; - C = b + a / C; - if (C < KF_TINY) C = KF_TINY; - D = 1. / D; - d = C * D; - f *= d; - if (fabs(d - 1.) < KF_GAMMA_EPS) break; - } - return exp(s * log(z) - z - kf_lgamma(s) - log(f)); -} - -double kf_gammap(double s, double z) -{ - return z <= 1. || z < s? _kf_gammap(s, z) : 1. - _kf_gammaq(s, z); -} - -double kf_gammaq(double s, double z) -{ - return z <= 1. || z < s? 1. - _kf_gammap(s, z) : _kf_gammaq(s, z); -} - -/* Regularized incomplete beta function. The method is taken from - * Numerical Recipe in C, 2nd edition, section 6.4. The following web - * page calculates the incomplete beta function, which equals - * kf_betai(a,b,x) * gamma(a) * gamma(b) / gamma(a+b): - * - * http://www.danielsoper.com/statcalc/calc36.aspx - */ -static double kf_betai_aux(double a, double b, double x) -{ - double C, D, f; - int j; - if (x == 0.) return 0.; - if (x == 1.) return 1.; - f = 1.; C = f; D = 0.; - // Modified Lentz's algorithm for computing continued fraction - for (j = 1; j < 200; ++j) { - double aa, d; - int m = j>>1; - aa = (j&1)? -(a + m) * (a + b + m) * x / ((a + 2*m) * (a + 2*m + 1)) - : m * (b - m) * x / ((a + 2*m - 1) * (a + 2*m)); - D = 1. + aa * D; - if (D < KF_TINY) D = KF_TINY; - C = 1. + aa / C; - if (C < KF_TINY) C = KF_TINY; - D = 1. / D; - d = C * D; - f *= d; - if (fabs(d - 1.) < KF_GAMMA_EPS) break; - } - return exp(kf_lgamma(a+b) - kf_lgamma(a) - kf_lgamma(b) + a * log(x) + b * log(1.-x)) / a / f; -} -double kf_betai(double a, double b, double x) -{ - return x < (a + 1.) / (a + b + 2.)? kf_betai_aux(a, b, x) : 1. - kf_betai_aux(b, a, 1. - x); -} - -#ifdef KF_MAIN -#include -int main(int argc, char *argv[]) -{ - double x = 5.5, y = 3; - double a, b; - printf("erfc(%lg): %lg, %lg\n", x, erfc(x), kf_erfc(x)); - printf("upper-gamma(%lg,%lg): %lg\n", x, y, kf_gammaq(y, x)*tgamma(y)); - a = 2; b = 2; x = 0.5; - printf("incomplete-beta(%lg,%lg,%lg): %lg\n", a, b, x, kf_betai(a, b, x) / exp(kf_lgamma(a+b) - kf_lgamma(a) - kf_lgamma(b))); - return 0; -} -#endif - - -// log\binom{n}{k} -static double lbinom(int n, int k) -{ - if (k == 0 || n == k) return 0; - return lgamma(n+1) - lgamma(k+1) - lgamma(n-k+1); -} - -// n11 n12 | n1_ -// n21 n22 | n2_ -//-----------+---- -// n_1 n_2 | n - -// hypergeometric distribution -static double hypergeo(int n11, int n1_, int n_1, int n) -{ - return exp(lbinom(n1_, n11) + lbinom(n-n1_, n_1-n11) - lbinom(n, n_1)); -} - -typedef struct { - int n11, n1_, n_1, n; - double p; -} hgacc_t; - -// incremental version of hypergenometric distribution -static double hypergeo_acc(int n11, int n1_, int n_1, int n, hgacc_t *aux) -{ - if (n1_ || n_1 || n) { - aux->n11 = n11; aux->n1_ = n1_; aux->n_1 = n_1; aux->n = n; - } else { // then only n11 changed; the rest fixed - if (n11%11 && n11 + aux->n - aux->n1_ - aux->n_1) { - if (n11 == aux->n11 + 1) { // incremental - aux->p *= (double)(aux->n1_ - aux->n11) / n11 - * (aux->n_1 - aux->n11) / (n11 + aux->n - aux->n1_ - aux->n_1); - aux->n11 = n11; - return aux->p; - } - if (n11 == aux->n11 - 1) { // incremental - aux->p *= (double)aux->n11 / (aux->n1_ - n11) - * (aux->n11 + aux->n - aux->n1_ - aux->n_1) / (aux->n_1 - n11); - aux->n11 = n11; - return aux->p; - } - } - aux->n11 = n11; - } - aux->p = hypergeo(aux->n11, aux->n1_, aux->n_1, aux->n); - return aux->p; -} - -double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two) -{ - int i, j, max, min; - double p, q, left, right; - hgacc_t aux; - int n1_, n_1, n; - - n1_ = n11 + n12; n_1 = n11 + n21; n = n11 + n12 + n21 + n22; // calculate n1_, n_1 and n - max = (n_1 < n1_) ? n_1 : n1_; // max n11, for right tail - min = n1_ + n_1 - n; // not sure why n11-n22 is used instead of min(n_1,n1_) - if (min < 0) min = 0; // min n11, for left tail - *two = *_left = *_right = 1.; - if (min == max) return 1.; // no need to do test - q = hypergeo_acc(n11, n1_, n_1, n, &aux); // the probability of the current table - - if (q == 0.0) { - /* - If here, the calculated probablility is so small it can't be stored - in a double, which is possible when the table contains fairly large - numbers. If this happens, most of the calculation can be skipped - as 'left', 'right' and '*two' will be (to a good approximation) 0.0. - The returned values '*_left' and '*_right' depend on which side - of the hypergeometric PDF 'n11' sits. This can be found by - comparing with the mode of the distribution, the formula for which - can be found at: - https://en.wikipedia.org/wiki/Hypergeometric_distribution - Note that in the comparison we multiply through by the denominator - of the mode (n + 2) to avoid a division. - */ - if ((int64_t) n11 * ((int64_t) n + 2) < ((int64_t) n_1 + 1) * ((int64_t) n1_ + 1)) { - // Peak to right of n11, so probability will be lower for all - // of the region from min to n11 and higher for at least some - // of the region from n11 to max; hence abs(i-n11) will be 0, - // abs(j-n11) will be > 0 and: - *_left = 0.0; *_right = 1.0; *two = 0.0; - return 0.0; - } else { - // Peak to left of n11, so probability will be lower for all - // of the region from n11 to max and higher for at least some - // of the region from min to n11; hence abs(i-n11) will be > 0, - // abs(j-n11) will be 0 and: - *_left = 1.0; *_right = 0.0; *two = 0.0; - return 0.0; - } - } - - // left tail - p = hypergeo_acc(min, 0, 0, 0, &aux); - for (left = 0., i = min + 1; p < 0.99999999 * q && i<=max; ++i) // loop until underflow - left += p, p = hypergeo_acc(i, 0, 0, 0, &aux); - --i; - if (p < 1.00000001 * q) left += p; - else --i; - // right tail - p = hypergeo_acc(max, 0, 0, 0, &aux); - for (right = 0., j = max - 1; p < 0.99999999 * q && j>=0; --j) // loop until underflow - right += p, p = hypergeo_acc(j, 0, 0, 0, &aux); - ++j; - if (p < 1.00000001 * q) right += p; - else ++j; - // two-tail - *two = left + right; - if (*two > 1.) *two = 1.; - // adjust left and right - if (abs(i - n11) < abs(j - n11)) right = 1. - left + q; - else left = 1.0 - right + q; - *_left = left; *_right = right; - return q; -} diff --git a/src/htslib-1.19.1/kstring.c b/src/htslib-1.19.1/kstring.c deleted file mode 100644 index 958d2ef..0000000 --- a/src/htslib-1.19.1/kstring.c +++ /dev/null @@ -1,452 +0,0 @@ -/* The MIT License - - Copyright (C) 2011 by Attractive Chaos - Copyright (C) 2013-2018, 2020-2021 Genome Research Ltd. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include "htslib/kstring.h" - -int kputd(double d, kstring_t *s) { - int len = 0; - char buf[21], *cp = buf+20, *ep; - if (d == 0) { - if (signbit(d)) { - kputsn("-0",2,s); - return 2; - } else { - kputsn("0",1,s); - return 1; - } - } - - if (d < 0) { - kputc('-',s); - len = 1; - d=-d; - } - if (!(d >= 0.0001 && d <= 999999)) { - if (ks_resize(s, s->l + 50) < 0) - return EOF; - // We let stdio handle the exponent cases - int s2 = snprintf(s->s + s->l, s->m - s->l, "%g", d); - len += s2; - s->l += s2; - return len; - } - - // Correction for rounding - rather ugly - // Optimised for small numbers. - - uint32_t i; - if (d<0.001) i = rint(d*1000000000), cp -= 1; - else if (d < 0.01) i = rint(d*100000000), cp -= 2; - else if (d < 0.1) i = rint(d*10000000), cp -= 3; - else if (d < 1) i = rint(d*1000000), cp -= 4; - else if (d < 10) i = rint(d*100000), cp -= 5; - else if (d < 100) i = rint(d*10000), cp -= 6; - else if (d < 1000) i = rint(d*1000), cp -= 7; - else if (d < 10000) i = rint(d*100), cp -= 8; - else if (d < 100000) i = rint(d*10), cp -= 9; - else i = rint(d), cp -= 10; - - // integer i is always 6 digits, so print it 2 at a time. - static const char kputuw_dig2r[] = - "00010203040506070809" - "10111213141516171819" - "20212223242526272829" - "30313233343536373839" - "40414243444546474849" - "50515253545556575859" - "60616263646566676869" - "70717273747576777879" - "80818283848586878889" - "90919293949596979899"; - - memcpy(cp-=2, &kputuw_dig2r[2*(i%100)], 2); i /= 100; - memcpy(cp-=2, &kputuw_dig2r[2*(i%100)], 2); i /= 100; - memcpy(cp-=2, &kputuw_dig2r[2*(i%100)], 2); - - // Except when it rounds up (d=0.009999999 is i=1000000) - if (i >= 100) - *--cp = '0' + (i/100); - - - int p = buf+20-cp; - if (p <= 10) { /* d < 1 */ - // 0.00123 is 123, so add leading zeros and 0. - ep = cp+5; // 6 precision - while (p < 10) { // aka d < 1 - *--cp = '0'; - p++; - } - *--cp = '.'; - *--cp = '0'; - } else { - // 123.001 is 123001 with p==13, so move 123 down and add "." - // Equiv to memmove(cp-1, cp, p-10); cp--; - char *xp = --cp; - ep = cp+6; - while (p > 10) { - xp[0] = xp[1]; - xp++; - p--; - } - xp[0] = '.'; - } - - // Cull trailing zeros - while (*ep == '0' && ep > cp) - ep--; - - // End can be 1 out due to the mostly-6 but occasionally 7 (i==1) case. - // Also code with "123." which should be "123" - if (*ep && *ep != '.') - ep++; - *ep = 0; - - int sl = ep-cp; - len += sl; - kputsn(cp, sl, s); - return len; -} - -int kvsprintf(kstring_t *s, const char *fmt, va_list ap) -{ - va_list args; - int l; - va_copy(args, ap); - - if (fmt[0] == '%' && fmt[1] == 'g' && fmt[2] == 0) { - double d = va_arg(args, double); - l = kputd(d, s); - va_end(args); - return l; - } - - if (!s->s) { - const size_t sz = 64; - s->s = malloc(sz); - if (!s->s) - return -1; - s->m = sz; - s->l = 0; - } - - l = vsnprintf(s->s + s->l, s->m - s->l, fmt, args); // This line does not work with glibc 2.0. See `man snprintf'. - va_end(args); - if (l + 1 > s->m - s->l) { - if (ks_resize(s, s->l + l + 2) < 0) - return -1; - va_copy(args, ap); - l = vsnprintf(s->s + s->l, s->m - s->l, fmt, args); - va_end(args); - } - s->l += l; - return l; -} - -int ksprintf(kstring_t *s, const char *fmt, ...) -{ - va_list ap; - int l; - va_start(ap, fmt); - l = kvsprintf(s, fmt, ap); - va_end(ap); - return l; -} - -char *kstrtok(const char *str, const char *sep_in, ks_tokaux_t *aux) -{ - const unsigned char *p, *start, *sep = (unsigned char *) sep_in; - if (sep) { // set up the table - if (str == 0 && aux->finished) return 0; // no need to set up if we have finished - aux->finished = 0; - if (sep[0] && sep[1]) { - aux->sep = -1; - aux->tab[0] = aux->tab[1] = aux->tab[2] = aux->tab[3] = 0; - for (p = sep; *p; ++p) aux->tab[*p>>6] |= 1ull<<(*p&0x3f); - } else aux->sep = sep[0]; - } - if (aux->finished) return 0; - else if (str) start = (unsigned char *) str, aux->finished = 0; - else start = (unsigned char *) aux->p + 1; - if (aux->sep < 0) { - for (p = start; *p; ++p) - if (aux->tab[*p>>6]>>(*p&0x3f)&1) break; - } else { - // Using strchr is fast for next token, but slower for - // last token due to extra pass from strlen. Overall - // on a VCF parse this func was 146% faster with // strchr. - // Equiv to: - // for (p = start; *p; ++p) if (*p == aux->sep) break; - - // NB: We could use strchrnul() here from glibc if detected, - // which is ~40% faster again, but it's not so portable. - // i.e. p = (uint8_t *)strchrnul((char *)start, aux->sep); - uint8_t *p2 = (uint8_t *)strchr((char *)start, aux->sep); - p = p2 ? p2 : start + strlen((char *)start); - } - aux->p = (const char *) p; // end of token - if (*p == 0) aux->finished = 1; // no more tokens - return (char*)start; -} - -// s MUST BE a null terminated string; l = strlen(s) -int ksplit_core(char *s, int delimiter, int *_max, int **_offsets) -{ - int i, n, max, last_char, last_start, *offsets, l; - n = 0; max = *_max; offsets = *_offsets; - l = strlen(s); - -#define __ksplit_aux do { \ - if (_offsets) { \ - s[i] = 0; \ - if (n == max) { \ - int *tmp; \ - max = max? max<<1 : 2; \ - if ((tmp = (int*)realloc(offsets, sizeof(int) * max))) { \ - offsets = tmp; \ - } else { \ - free(offsets); \ - *_offsets = NULL; \ - return 0; \ - } \ - } \ - offsets[n++] = last_start; \ - } else ++n; \ - } while (0) - - for (i = 0, last_char = last_start = 0; i <= l; ++i) { - if (delimiter == 0) { - if (isspace((int)((unsigned char) s[i])) || s[i] == 0) { - if (isgraph(last_char)) - __ksplit_aux; // the end of a field - } else { - if (isspace(last_char) || last_char == 0) - last_start = i; - } - } else { - if (s[i] == delimiter || s[i] == 0) { - if (last_char != 0 && last_char != delimiter) __ksplit_aux; // the end of a field - } else { - if (last_char == delimiter || last_char == 0) last_start = i; - } - } - last_char = (int)((unsigned char)s[i]); - } - *_max = max; *_offsets = offsets; - return n; -} - -int kgetline(kstring_t *s, kgets_func *fgets_fn, void *fp) -{ - size_t l0 = s->l; - - while (s->l == l0 || s->s[s->l-1] != '\n') { - if (s->m - s->l < 200) { - if (ks_resize(s, s->m + 200) < 0) - return EOF; - } - if (fgets_fn(s->s + s->l, s->m - s->l, fp) == NULL) break; - s->l += strlen(s->s + s->l); - } - - if (s->l == l0) return EOF; - - if (s->l > l0 && s->s[s->l-1] == '\n') { - s->l--; - if (s->l > l0 && s->s[s->l-1] == '\r') s->l--; - } - s->s[s->l] = '\0'; - return 0; -} - -int kgetline2(kstring_t *s, kgets_func2 *fgets_fn, void *fp) -{ - size_t l0 = s->l; - - while (s->l == l0 || s->s[s->l-1] != '\n') { - if (s->m - s->l < 200) { - // We return EOF for both EOF and error and the caller - // needs to check for errors in fp, and we haven't - // even got there yet. - // - // The only way of propagating memory errors is to - // deliberately call something that we know triggers - // and error so fp is also set. This works for - // hgets, but not for gets where reading <= 0 bytes - // isn't an error. - if (ks_resize(s, s->m + 200) < 0) { - fgets_fn(s->s + s->l, 0, fp); - return EOF; - } - } - ssize_t len = fgets_fn(s->s + s->l, s->m - s->l, fp); - if (len <= 0) break; - s->l += len; - } - - if (s->l == l0) return EOF; - - if (s->l > l0 && s->s[s->l-1] == '\n') { - s->l--; - if (s->l > l0 && s->s[s->l-1] == '\r') s->l--; - } - s->s[s->l] = '\0'; - return 0; -} - -/********************** - * Boyer-Moore search * - **********************/ - -typedef unsigned char ubyte_t; - -// reference: http://www-igm.univ-mlv.fr/~lecroq/string/node14.html -static int *ksBM_prep(const ubyte_t *pat, int m) -{ - int i, *suff, *prep, *bmGs, *bmBc; - prep = (int*)calloc(m + 256, sizeof(int)); - if (!prep) return NULL; - bmGs = prep; bmBc = prep + m; - { // preBmBc() - for (i = 0; i < 256; ++i) bmBc[i] = m; - for (i = 0; i < m - 1; ++i) bmBc[pat[i]] = m - i - 1; - } - suff = (int*)calloc(m, sizeof(int)); - if (!suff) { free(prep); return NULL; } - { // suffixes() - int f = 0, g; - suff[m - 1] = m; - g = m - 1; - for (i = m - 2; i >= 0; --i) { - if (i > g && suff[i + m - 1 - f] < i - g) - suff[i] = suff[i + m - 1 - f]; - else { - if (i < g) g = i; - f = i; - while (g >= 0 && pat[g] == pat[g + m - 1 - f]) --g; - suff[i] = f - g; - } - } - } - { // preBmGs() - int j = 0; - for (i = 0; i < m; ++i) bmGs[i] = m; - for (i = m - 1; i >= 0; --i) - if (suff[i] == i + 1) - for (; j < m - 1 - i; ++j) - if (bmGs[j] == m) - bmGs[j] = m - 1 - i; - for (i = 0; i <= m - 2; ++i) - bmGs[m - 1 - suff[i]] = m - 1 - i; - } - free(suff); - return prep; -} - -void *kmemmem(const void *_str, int n, const void *_pat, int m, int **_prep) -{ - int i, j, *prep = 0, *bmGs, *bmBc; - const ubyte_t *str, *pat; - str = (const ubyte_t*)_str; pat = (const ubyte_t*)_pat; - prep = (_prep == 0 || *_prep == 0)? ksBM_prep(pat, m) : *_prep; - if (!prep) return NULL; - if (_prep && *_prep == 0) *_prep = prep; - bmGs = prep; bmBc = prep + m; - j = 0; - while (j <= n - m) { - for (i = m - 1; i >= 0 && pat[i] == str[i+j]; --i); - if (i >= 0) { - int max = bmBc[str[i+j]] - m + 1 + i; - if (max < bmGs[i]) max = bmGs[i]; - j += max; - } else return (void*)(str + j); - } - if (_prep == 0) free(prep); - return 0; -} - -char *kstrstr(const char *str, const char *pat, int **_prep) -{ - return (char*)kmemmem(str, strlen(str), pat, strlen(pat), _prep); -} - -char *kstrnstr(const char *str, const char *pat, int n, int **_prep) -{ - return (char*)kmemmem(str, n, pat, strlen(pat), _prep); -} - -/*********************** - * The main() function * - ***********************/ - -#ifdef KSTRING_MAIN -#include -int main() -{ - kstring_t *s; - int *fields, n, i; - ks_tokaux_t aux; - char *p; - s = (kstring_t*)calloc(1, sizeof(kstring_t)); - // test ksprintf() - ksprintf(s, " abcdefg: %d ", 100); - printf("'%s'\n", s->s); - // test ksplit() - fields = ksplit(s, 0, &n); - for (i = 0; i < n; ++i) - printf("field[%d] = '%s'\n", i, s->s + fields[i]); - // test kstrtok() - s->l = 0; - for (p = kstrtok("ab:cde:fg/hij::k", ":/", &aux); p; p = kstrtok(0, 0, &aux)) { - kputsn(p, aux.p - p, s); - kputc('\n', s); - } - printf("%s", s->s); - // free - free(s->s); free(s); free(fields); - - { - static char *str = "abcdefgcdgcagtcakcdcd"; - static char *pat = "cd"; - char *ret, *s = str; - int *prep = 0; - while ((ret = kstrstr(s, pat, &prep)) != 0) { - printf("match: %s\n", ret); - s = ret + prep[0]; - } - free(prep); - } - return 0; -} -#endif diff --git a/src/htslib-1.19.1/m4/hts_hide_dynamic_syms.m4 b/src/htslib-1.19.1/m4/hts_hide_dynamic_syms.m4 deleted file mode 100644 index 62ccb8e..0000000 --- a/src/htslib-1.19.1/m4/hts_hide_dynamic_syms.m4 +++ /dev/null @@ -1,65 +0,0 @@ -dnl @synopsis HTS_HIDE_DYNAMIC_SYMBOLS -dnl -dnl Turn on compiler options that prevent unwanted symbols from being exported -dnl by shared libraries. -dnl -dnl @author Rob Davies -dnl @license MIT/Expat -dnl -dnl Copyright (C) 2018 Genome Research Ltd. -dnl -dnl Permission is hereby granted, free of charge, to any person obtaining a copy -dnl of this software and associated documentation files (the "Software"), to -dnl deal in the Software without restriction, including without limitation the -dnl rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -dnl sell copies of the Software, and to permit persons to whom the Software is -dnl furnished to do so, subject to the following conditions: -dnl -dnl The above copyright notice and this permission notice shall be included in -dnl all copies or substantial portions of the Software. -dnl -dnl THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -dnl IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -dnl FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -dnl THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -dnl LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -dnl FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -dnl DEALINGS IN THE SOFTWARE. - -# SYNOPSIS -# -# HTS_TEST_CC_C_LD_FLAG(FLAG, FOUND_VAR) -# -# Test if FLAG can be used on both CFLAGS and LDFLAGS. It it works, -# variable FOUND_VAR is set to FLAG. - -AC_DEFUN([HTS_TEST_CC_C_LD_FLAG], - [AS_VAR_PUSHDEF([hts_cv_check_flag],[hts_cv_check_$1])dnl - AC_CACHE_CHECK([whether the compiler accepts $1], - [hts_cv_check_flag], - [ac_check_save_cflags=$CFLAGS - ac_check_save_ldflags=$LDFLAGS - CFLAGS="$CFLAGS $1" - LDFLAGS="$LDFLAGS $1" - AC_LINK_IFELSE([AC_LANG_PROGRAM()], - [AS_VAR_SET([hts_cv_check_flag],[yes]) - AS_IF([test "x$2" != x],[eval AS_TR_SH([$2])="$1"])], - [AS_VAR_SET([hts_cv_check_flag],[no])]) - CFLAGS=$ac_check_save_cflags - LDFLAGS=$ac_check_save_ldflags]) - AS_VAR_POPDEF([hts_cv_check_flag])dnl -]) - -AC_DEFUN([HTS_HIDE_DYNAMIC_SYMBOLS], [ - # Test for flags to set default shared library visibility to hidden - # -fvisibility=hidden : GCC compatible - # -xldscope=hidden : SunStudio - ac_opt_found=no - m4_foreach_w([ac_opt],[-fvisibility=hidden -xldscope=hidden], - [AS_IF([test "x$ac_opt_found" = "xno"], - [HTS_TEST_CC_C_LD_FLAG(ac_opt,[ac_opt_found])]) - ]) - AS_IF([test "x$ac_opt_found" != "xno"], - [CFLAGS="$CFLAGS $ac_opt_found" - LDFLAGS="$LDFLAGS $ac_opt_found"]) -]) diff --git a/src/htslib-1.19.1/m4/hts_prog_cc_warnings.m4 b/src/htslib-1.19.1/m4/hts_prog_cc_warnings.m4 deleted file mode 100644 index f2aed93..0000000 --- a/src/htslib-1.19.1/m4/hts_prog_cc_warnings.m4 +++ /dev/null @@ -1,208 +0,0 @@ -dnl @synopsis HTS_PROG_CC_WARNINGS([ANSI]) -dnl -dnl Derived from -dnl http://ac-archive.sourceforge.net/ac-archive/vl_prog_cc_warnings.html -dnl -dnl Enables a reasonable set of warnings for the C compiler. -dnl Optionally, if the first argument is nonempty, turns on flags which -dnl enforce and/or enable proper ANSI C if such are known with the -dnl compiler used. -dnl -dnl Currently this macro knows about GCC, Solaris C compiler, Digital -dnl Unix C compiler, C for AIX Compiler, HP-UX C compiler, IRIX C -dnl compiler, NEC SX-5 (Super-UX 10) C compiler, and Cray J90 (Unicos -dnl 10.0.0.8) C compiler. -dnl -dnl @category C -dnl @author Ville Laurikari -dnl Updated by Rob Davies for HTSlib -dnl @license AllPermissive -dnl Copying and distribution of this file, with or without modification, -dnl are permitted in any medium without royalty provided the copyright notice -dnl and this notice are preserved. Users of this software should generally -dnl follow the principles of the MIT License including its disclaimer. -dnl Original Copyright (c) Ville Laurikari 2002 -dnl Modifications Copyright (c) Genome Research Limited 2015,2017 - -AC_DEFUN([HTS_PROG_CC_WARNINGS], [ - AC_ARG_ENABLE([warnings], - [AS_HELP_STRING([--disable-warnings], [turn off compiler warnings])], - [], - [enable_warnings=yes]) - - AS_IF([test "x$enable_warnings" != xno],[ - AC_REQUIRE([AC_PROG_GREP]) - - ansi="$1" - AS_IF([test "x$ansi" = "x"], - [msg="for C compiler warning flags"], - [msg="for C compiler warning and ANSI conformance flags"]) - - AC_MSG_CHECKING($msg) - AC_CACHE_VAL(hts_cv_prog_cc_warnings, [dnl - hts_cv_prog_cc_warnings="" - AS_IF([test "x$CC" != "x"],[ - cat > conftest.c < /dev/null 2>&1 && - test -f conftest.o],[dnl - AS_IF([test "x$ansi" = "x"], - [hts_cv_prog_cc_warnings="-Wall"], - [hts_cv_prog_cc_warnings="-Wall -ansi -pedantic"]) - ], - # Sun Studio or Solaris C compiler - ["$CC" -V 2>&1 | $GREP -i -E "WorkShop|Sun C" > /dev/null 2>&1 && - "$CC" -c -v -Xc conftest.c > /dev/null 2>&1 && - test -f conftest.o],[dnl - AS_IF([test "x$ansi" = "x"], - [hts_cv_prog_cc_warnings="-v"], - [hts_cv_prog_cc_warnings="-v -Xc"]) - ], - # Digital Unix C compiler - ["$CC" -V 2>&1 | $GREP -i "Digital UNIX Compiler" > /dev/null 2>&1 && - "$CC" -c -verbose -w0 -warnprotos -std1 conftest.c > /dev/null 2>&1 && - test -f conftest.o], [dnl - AS_IF([test "x$ansi" = "x"], - [hts_cv_prog_cc_warnings="-verbose -w0 -warnprotos"], - [hts_cv_prog_cc_warnings="-verbose -w0 -warnprotos -std1"]) - ], - # C for AIX Compiler - ["$CC" 2>&1 | $GREP -i "C for AIX Compiler" > /dev/null 2>&1 && - "$CC" -c -qlanglvl=ansi -qinfo=all conftest.c > /dev/null 2>&1 && - test -f conftest.o],[dnl - AS_IF([test "x$ansi" = "x"], - [hts_cv_prog_cc_warnings="-qsrcmsg -qinfo=all:noppt:noppc:noobs:nocnd"], - [hts_cv_prog_cc_warnings="-qsrcmsg -qinfo=all:noppt:noppc:noobs:nocnd -qlanglvl=ansi"]) - ], - # IRIX C compiler - ["$CC" -version 2>&1 | $GREP -i "MIPSpro Compilers" > /dev/null 2>&1 && - "$CC" -c -fullwarn -ansi -ansiE conftest.c > /dev/null 2>&1 && - test -f conftest.o],[dnl - AS_IF([test "x$ansi" = "x"], - [hts_cv_prog_cc_warnings="-fullwarn"], - [hts_cv_prog_cc_warnings="-fullwarn -ansi -ansiE"]) - ], - # HP-UX C compiler - [what "$CC" 2>&1 | $GREP -i "HP C Compiler" > /dev/null 2>&1 && - "$CC" -c -Aa +w1 conftest.c > /dev/null 2>&1 && - test -f conftest.o],[dnl - AS_IF([test "x$ansi" = "x"], - [hts_cv_prog_cc_warnings="+w1"], - [hts_cv_prog_cc_warnings="+w1 -Aa"]) - ], - # The NEC SX series (Super-UX 10) C compiler - ["$CC" -V 2>&1 | $GREP "/SX" > /dev/null 2>&1 && - "$CC" -c -pvctl[,]fullmsg -Xc conftest.c > /dev/null 2>&1 && - test -f conftest.o],[ - AS_IF([test "x$ansi" = "x"], - [hts_cv_prog_cc_warnings="-pvctl[,]fullmsg"], - [hts_cv_prog_cc_warnings="-pvctl[,]fullmsg -Xc"]) - ], - # The Cray C compiler (Unicos) - ["$CC" -V 2>&1 | $GREP -i "Cray" > /dev/null 2>&1 && - "$CC" -c -h msglevel_2 conftest.c > /dev/null 2>&1 && - test -f conftest.o],[dnl - AS_IF([test "x$ansi" = "x"], - [hts_cv_prog_cc_warnings="-h#msglevel_2"], - [hts_cv_prog_cc_warnings="-h#msglevel_2,conform"]) - ], - # The Tiny C Compiler - ["$CC" -v 2>&1 | $GREP "tcc version" > /dev/null && - "$CC" -Wall -c conftest.c > /dev/null 2>&1 && - test -f conftest.o],[dnl - hts_cv_prog_cc_warnings="-Wall" - ]) - rm -f conftest.* - ]) - ]) - - AS_IF([test "x$hts_cv_prog_cc_warnings" != "x"],[ -dnl Print result, with underscores as spaces -ac_arg_result=`echo "$hts_cv_prog_cc_warnings" | tr '#' ' '` -AC_MSG_RESULT($ac_arg_result) - -dnl Add options to CFLAGS only if they are not already present -ac_arg_needed="" -for ac_arg in $hts_cv_prog_cc_warnings -do - ac_arg_sp=`echo "$ac_arg" | tr '#' ' '` - AS_CASE([" $CFLAGS "], -[*" $ac_arg_sp "*], [], -[ac_arg_needed="$ac_arg_all $ac_arg_sp"]) -done -CFLAGS="$ac_arg_needed $CFLAGS"],[dnl - AC_MSG_RESULT(unknown) - ]) - ]) -])dnl HTS_PROG_CC_WARNINGS - -# SYNOPSIS -# -# HTS_PROG_CC_WERROR(FLAGS_VAR) -# -# Set FLAGS_VAR to the flags needed to make the C compiler treat warnings -# as errors. - -AC_DEFUN([HTS_PROG_CC_WERROR], [ - AC_ARG_ENABLE([werror], - [AS_HELP_STRING([--enable-werror], [change warnings into errors, where supported])], - [], - [enable_werror=no]) - - AS_IF([test "x$enable_werror" != xno],[ - AC_MSG_CHECKING([for C compiler flags to error on warnings]) - AC_CACHE_VAL(hts_cv_prog_cc_werror, [dnl - hts_cv_prog_cc_werror="" - AS_IF([test "x$CC" != "x"],[ - cat > conftest.c < /dev/null 2>&1 && - test -f conftest.o],[hts_cv_prog_cc_werror="-Werror"], - # Sun Studio or Solaris C compiler - ["$CC" -V 2>&1 | $GREP -i -E "WorkShop|Sun C" > /dev/null 2>&1 && - "$CC" -c -errwarn=%all conftest.c > /dev/null 2>&1 && - test -f conftest.o],[hts_cv_prog_cc_werror="-errwarn=%all"], - # The Tiny C Compiler - ["$CC" -v 2>&1 | $GREP "tcc version" > /dev/null && - "$CC" -Wall -c conftest.c > /dev/null 2>&1 && - test -f conftest.o],[hts_cv_prog_cc_werror="-Werror"] - dnl TODO: Add more compilers - ) - rm -f conftest.* - ]) - ]) - AS_IF([test "x$hts_cv_prog_cc_werror" != x],[ - AC_MSG_RESULT($hts_cv_prog_cc_werror) - AS_IF([test "x$1" != x],[eval AS_TR_SH([$1])="$hts_cv_prog_cc_werror"]) - ],[dnl - AC_MSG_RESULT(unknown) - ]) - ]) -])dnl HTS_PROG_CC_WERROR diff --git a/src/htslib-1.19.1/m4/pkg.m4 b/src/htslib-1.19.1/m4/pkg.m4 deleted file mode 100644 index 4b95a02..0000000 --- a/src/htslib-1.19.1/m4/pkg.m4 +++ /dev/null @@ -1,275 +0,0 @@ -# pkg.m4 - Macros to locate and use pkg-config. -*- Autoconf -*- -# serial 12 (pkg-config-0.29.2) - -dnl Copyright © 2004 Scott James Remnant . -dnl Copyright © 2012-2015 Dan Nicholson -dnl -dnl This program is free software; you can redistribute it and/or modify -dnl it under the terms of the GNU General Public License as published by -dnl the Free Software Foundation; either version 2 of the License, or -dnl (at your option) any later version. -dnl -dnl This program is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of -dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -dnl General Public License for more details. -dnl -dnl You should have received a copy of the GNU General Public License -dnl along with this program; if not, write to the Free Software -dnl Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA -dnl 02111-1307, USA. -dnl -dnl As a special exception to the GNU General Public License, if you -dnl distribute this file as part of a program that contains a -dnl configuration script generated by Autoconf, you may include it under -dnl the same distribution terms that you use for the rest of that -dnl program. - -dnl PKG_PREREQ(MIN-VERSION) -dnl ----------------------- -dnl Since: 0.29 -dnl -dnl Verify that the version of the pkg-config macros are at least -dnl MIN-VERSION. Unlike PKG_PROG_PKG_CONFIG, which checks the user's -dnl installed version of pkg-config, this checks the developer's version -dnl of pkg.m4 when generating configure. -dnl -dnl To ensure that this macro is defined, also add: -dnl m4_ifndef([PKG_PREREQ], -dnl [m4_fatal([must install pkg-config 0.29 or later before running autoconf/autogen])]) -dnl -dnl See the "Since" comment for each macro you use to see what version -dnl of the macros you require. -m4_defun([PKG_PREREQ], -[m4_define([PKG_MACROS_VERSION], [0.29.2]) -m4_if(m4_version_compare(PKG_MACROS_VERSION, [$1]), -1, - [m4_fatal([pkg.m4 version $1 or higher is required but ]PKG_MACROS_VERSION[ found])]) -])dnl PKG_PREREQ - -dnl PKG_PROG_PKG_CONFIG([MIN-VERSION]) -dnl ---------------------------------- -dnl Since: 0.16 -dnl -dnl Search for the pkg-config tool and set the PKG_CONFIG variable to -dnl first found in the path. Checks that the version of pkg-config found -dnl is at least MIN-VERSION. If MIN-VERSION is not specified, 0.9.0 is -dnl used since that's the first version where most current features of -dnl pkg-config existed. -AC_DEFUN([PKG_PROG_PKG_CONFIG], -[m4_pattern_forbid([^_?PKG_[A-Z_]+$]) -m4_pattern_allow([^PKG_CONFIG(_(PATH|LIBDIR|SYSROOT_DIR|ALLOW_SYSTEM_(CFLAGS|LIBS)))?$]) -m4_pattern_allow([^PKG_CONFIG_(DISABLE_UNINSTALLED|TOP_BUILD_DIR|DEBUG_SPEW)$]) -AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility]) -AC_ARG_VAR([PKG_CONFIG_PATH], [directories to add to pkg-config's search path]) -AC_ARG_VAR([PKG_CONFIG_LIBDIR], [path overriding pkg-config's built-in search path]) - -if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then - AC_PATH_TOOL([PKG_CONFIG], [pkg-config]) -fi -if test -n "$PKG_CONFIG"; then - _pkg_min_version=m4_default([$1], [0.9.0]) - AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version]) - if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - PKG_CONFIG="" - fi -fi[]dnl -])dnl PKG_PROG_PKG_CONFIG - -dnl PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) -dnl ------------------------------------------------------------------- -dnl Since: 0.18 -dnl -dnl Check to see whether a particular set of modules exists. Similar to -dnl PKG_CHECK_MODULES(), but does not set variables or print errors. -dnl -dnl Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG]) -dnl only at the first occurrence in configure.ac, so if the first place -dnl it's called might be skipped (such as if it is within an "if", you -dnl have to call PKG_CHECK_EXISTS manually -AC_DEFUN([PKG_CHECK_EXISTS], -[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl -if test -n "$PKG_CONFIG" && \ - AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then - m4_default([$2], [:]) -m4_ifvaln([$3], [else - $3])dnl -fi]) - -dnl _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES]) -dnl --------------------------------------------- -dnl Internal wrapper calling pkg-config via PKG_CONFIG and setting -dnl pkg_failed based on the result. -m4_define([_PKG_CONFIG], -[if test -n "$$1"; then - pkg_cv_[]$1="$$1" - elif test -n "$PKG_CONFIG"; then - PKG_CHECK_EXISTS([$3], - [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null` - test "x$?" != "x0" && pkg_failed=yes ], - [pkg_failed=yes]) - else - pkg_failed=untried -fi[]dnl -])dnl _PKG_CONFIG - -dnl _PKG_SHORT_ERRORS_SUPPORTED -dnl --------------------------- -dnl Internal check to see if pkg-config supports short errors. -AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED], -[AC_REQUIRE([PKG_PROG_PKG_CONFIG]) -if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then - _pkg_short_errors_supported=yes -else - _pkg_short_errors_supported=no -fi[]dnl -])dnl _PKG_SHORT_ERRORS_SUPPORTED - - -dnl PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], -dnl [ACTION-IF-NOT-FOUND]) -dnl -------------------------------------------------------------- -dnl Since: 0.4.0 -dnl -dnl Note that if there is a possibility the first call to -dnl PKG_CHECK_MODULES might not happen, you should be sure to include an -dnl explicit call to PKG_PROG_PKG_CONFIG in your configure.ac -AC_DEFUN([PKG_CHECK_MODULES], -[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl -AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl -AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl - -pkg_failed=no -AC_MSG_CHECKING([for $2]) - -_PKG_CONFIG([$1][_CFLAGS], [cflags], [$2]) -_PKG_CONFIG([$1][_LIBS], [libs], [$2]) - -m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS -and $1[]_LIBS to avoid the need to call pkg-config. -See the pkg-config man page for more details.]) - -if test $pkg_failed = yes; then - AC_MSG_RESULT([no]) - _PKG_SHORT_ERRORS_SUPPORTED - if test $_pkg_short_errors_supported = yes; then - $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1` - else - $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1` - fi - # Put the nasty error message in config.log where it belongs - echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD - - m4_default([$4], [AC_MSG_ERROR( -[Package requirements ($2) were not met: - -$$1_PKG_ERRORS - -Consider adjusting the PKG_CONFIG_PATH environment variable if you -installed software in a non-standard prefix. - -_PKG_TEXT])[]dnl - ]) -elif test $pkg_failed = untried; then - AC_MSG_RESULT([no]) - m4_default([$4], [AC_MSG_FAILURE( -[The pkg-config script could not be found or is too old. Make sure it -is in your PATH or set the PKG_CONFIG environment variable to the full -path to pkg-config. - -_PKG_TEXT - -To get pkg-config, see .])[]dnl - ]) -else - $1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS - $1[]_LIBS=$pkg_cv_[]$1[]_LIBS - AC_MSG_RESULT([yes]) - $3 -fi[]dnl -])dnl PKG_CHECK_MODULES - - -dnl PKG_CHECK_MODULES_STATIC(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], -dnl [ACTION-IF-NOT-FOUND]) -dnl --------------------------------------------------------------------- -dnl Since: 0.29 -dnl -dnl Checks for existence of MODULES and gathers its build flags with -dnl static libraries enabled. Sets VARIABLE-PREFIX_CFLAGS from --cflags -dnl and VARIABLE-PREFIX_LIBS from --libs. -dnl -dnl Note that if there is a possibility the first call to -dnl PKG_CHECK_MODULES_STATIC might not happen, you should be sure to -dnl include an explicit call to PKG_PROG_PKG_CONFIG in your -dnl configure.ac. -AC_DEFUN([PKG_CHECK_MODULES_STATIC], -[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl -_save_PKG_CONFIG=$PKG_CONFIG -PKG_CONFIG="$PKG_CONFIG --static" -PKG_CHECK_MODULES($@) -PKG_CONFIG=$_save_PKG_CONFIG[]dnl -])dnl PKG_CHECK_MODULES_STATIC - - -dnl PKG_INSTALLDIR([DIRECTORY]) -dnl ------------------------- -dnl Since: 0.27 -dnl -dnl Substitutes the variable pkgconfigdir as the location where a module -dnl should install pkg-config .pc files. By default the directory is -dnl $libdir/pkgconfig, but the default can be changed by passing -dnl DIRECTORY. The user can override through the --with-pkgconfigdir -dnl parameter. -AC_DEFUN([PKG_INSTALLDIR], -[m4_pushdef([pkg_default], [m4_default([$1], ['${libdir}/pkgconfig'])]) -m4_pushdef([pkg_description], - [pkg-config installation directory @<:@]pkg_default[@:>@]) -AC_ARG_WITH([pkgconfigdir], - [AS_HELP_STRING([--with-pkgconfigdir], pkg_description)],, - [with_pkgconfigdir=]pkg_default) -AC_SUBST([pkgconfigdir], [$with_pkgconfigdir]) -m4_popdef([pkg_default]) -m4_popdef([pkg_description]) -])dnl PKG_INSTALLDIR - - -dnl PKG_NOARCH_INSTALLDIR([DIRECTORY]) -dnl -------------------------------- -dnl Since: 0.27 -dnl -dnl Substitutes the variable noarch_pkgconfigdir as the location where a -dnl module should install arch-independent pkg-config .pc files. By -dnl default the directory is $datadir/pkgconfig, but the default can be -dnl changed by passing DIRECTORY. The user can override through the -dnl --with-noarch-pkgconfigdir parameter. -AC_DEFUN([PKG_NOARCH_INSTALLDIR], -[m4_pushdef([pkg_default], [m4_default([$1], ['${datadir}/pkgconfig'])]) -m4_pushdef([pkg_description], - [pkg-config arch-independent installation directory @<:@]pkg_default[@:>@]) -AC_ARG_WITH([noarch-pkgconfigdir], - [AS_HELP_STRING([--with-noarch-pkgconfigdir], pkg_description)],, - [with_noarch_pkgconfigdir=]pkg_default) -AC_SUBST([noarch_pkgconfigdir], [$with_noarch_pkgconfigdir]) -m4_popdef([pkg_default]) -m4_popdef([pkg_description]) -])dnl PKG_NOARCH_INSTALLDIR - - -dnl PKG_CHECK_VAR(VARIABLE, MODULE, CONFIG-VARIABLE, -dnl [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) -dnl ------------------------------------------- -dnl Since: 0.28 -dnl -dnl Retrieves the value of the pkg-config variable for the given module. -AC_DEFUN([PKG_CHECK_VAR], -[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl -AC_ARG_VAR([$1], [value of $3 for $2, overriding pkg-config])dnl - -_PKG_CONFIG([$1], [variable="][$3]["], [$2]) -AS_VAR_COPY([$1], [pkg_cv_][$1]) - -AS_VAR_IF([$1], [""], [$5], [$4])dnl -])dnl PKG_CHECK_VAR diff --git a/src/htslib-1.19.1/md5.c b/src/htslib-1.19.1/md5.c deleted file mode 100644 index 1a43da5..0000000 --- a/src/htslib-1.19.1/md5.c +++ /dev/null @@ -1,388 +0,0 @@ -/* - * Trivial amendments by James Bonfield to provide an - * HTSlib interface. 2015. - * - * Externally our API uses an opaque hts_md5_context structure. - * - * Internally either this gets defined and used with the routines here - * or it remains incomplete and is cast to the OpenSSL MD5_CTX structure - * and used by routines from OpenSSL. - */ - -/* - * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc. - * MD5 Message-Digest Algorithm (RFC 1321). - * - * Homepage: - * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 - * - * Author: - * Alexander Peslyak, better known as Solar Designer - * - * This software was written by Alexander Peslyak in 2001. No copyright is - * claimed, and the software is hereby placed in the public domain. - * In case this attempt to disclaim copyright and place the software in the - * public domain is deemed null and void, then the software is - * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the - * general public under the following terms: - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted. - * - * There's ABSOLUTELY NO WARRANTY, express or implied. - * - * (This is a heavily cut-down "BSD license".) - * - * This differs from Colin Plumb's older public domain implementation in that - * no exactly 32-bit integer data type is required (any 32-bit or wider - * unsigned integer data type will do), there's no compile-time endianness - * configuration, and the function prototypes match OpenSSL's. No code from - * Colin Plumb's implementation has been reused; this comment merely compares - * the properties of the two independent implementations. - * - * The primary goals of this implementation are portability and ease of use. - * It is meant to be fast, but not as fast as possible. Some known - * optimizations are not included to reduce source code size and avoid - * compile-time configuration. - */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include "htslib/hts.h" -#include "htslib/hts_endian.h" - -#ifndef HAVE_OPENSSL - -#include - -/* Any 32-bit or wider unsigned integer data type will do */ -typedef unsigned int hts_md5_u32plus; - -struct hts_md5_context { - hts_md5_u32plus lo, hi; - hts_md5_u32plus a, b, c, d; - unsigned char buffer[64]; - hts_md5_u32plus block[16]; -}; - -/* - * The basic MD5 functions. - * - * F and G are optimized compared to their RFC 1321 definitions for - * architectures that lack an AND-NOT instruction, just like in Colin Plumb's - * implementation. - */ -#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) -#define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y)))) -#define H(x, y, z) (((x) ^ (y)) ^ (z)) -#define H2(x, y, z) ((x) ^ ((y) ^ (z))) -#define I(x, y, z) ((y) ^ ((x) | ~(z))) - -/* - * The MD5 transformation for all four rounds. - */ -#define STEP(f, a, b, c, d, x, t, s) \ - (a) += f((b), (c), (d)) + (x) + (t); \ - (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \ - (a) += (b); - -/* - * SET reads 4 input bytes in little-endian byte order and stores them - * in a properly aligned word in host byte order. - * - * The check for little-endian architectures that tolerate unaligned - * memory accesses is just an optimization. Nothing will break if it - * doesn't work. - */ -#if defined(HTS_LITTLE_ENDIAN) && HTS_ALLOW_UNALIGNED != 0 -#define SET(n) \ - (*(hts_md5_u32plus *)&ptr[(n) * 4]) -#define GET(n) \ - SET(n) -#else -#define SET(n) \ - (ctx->block[(n)] = \ - (hts_md5_u32plus)ptr[(n) * 4] | \ - ((hts_md5_u32plus)ptr[(n) * 4 + 1] << 8) | \ - ((hts_md5_u32plus)ptr[(n) * 4 + 2] << 16) | \ - ((hts_md5_u32plus)ptr[(n) * 4 + 3] << 24)) -#define GET(n) \ - (ctx->block[(n)]) -#endif - -/* - * This processes one or more 64-byte data blocks, but does NOT update - * the bit counters. There are no alignment requirements. - */ -static const void *body(hts_md5_context *ctx, const void *data, unsigned long size) -{ - const unsigned char *ptr; - hts_md5_u32plus a, b, c, d; - hts_md5_u32plus saved_a, saved_b, saved_c, saved_d; - - ptr = (const unsigned char *)data; - - a = ctx->a; - b = ctx->b; - c = ctx->c; - d = ctx->d; - - do { - saved_a = a; - saved_b = b; - saved_c = c; - saved_d = d; - -/* Round 1 */ - STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7) - STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12) - STEP(F, c, d, a, b, SET(2), 0x242070db, 17) - STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22) - STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7) - STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12) - STEP(F, c, d, a, b, SET(6), 0xa8304613, 17) - STEP(F, b, c, d, a, SET(7), 0xfd469501, 22) - STEP(F, a, b, c, d, SET(8), 0x698098d8, 7) - STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12) - STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17) - STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22) - STEP(F, a, b, c, d, SET(12), 0x6b901122, 7) - STEP(F, d, a, b, c, SET(13), 0xfd987193, 12) - STEP(F, c, d, a, b, SET(14), 0xa679438e, 17) - STEP(F, b, c, d, a, SET(15), 0x49b40821, 22) - -/* Round 2 */ - STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5) - STEP(G, d, a, b, c, GET(6), 0xc040b340, 9) - STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14) - STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20) - STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5) - STEP(G, d, a, b, c, GET(10), 0x02441453, 9) - STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14) - STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20) - STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5) - STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9) - STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14) - STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20) - STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5) - STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9) - STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14) - STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20) - -/* Round 3 */ - STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4) - STEP(H2, d, a, b, c, GET(8), 0x8771f681, 11) - STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16) - STEP(H2, b, c, d, a, GET(14), 0xfde5380c, 23) - STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4) - STEP(H2, d, a, b, c, GET(4), 0x4bdecfa9, 11) - STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16) - STEP(H2, b, c, d, a, GET(10), 0xbebfbc70, 23) - STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4) - STEP(H2, d, a, b, c, GET(0), 0xeaa127fa, 11) - STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16) - STEP(H2, b, c, d, a, GET(6), 0x04881d05, 23) - STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4) - STEP(H2, d, a, b, c, GET(12), 0xe6db99e5, 11) - STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16) - STEP(H2, b, c, d, a, GET(2), 0xc4ac5665, 23) - -/* Round 4 */ - STEP(I, a, b, c, d, GET(0), 0xf4292244, 6) - STEP(I, d, a, b, c, GET(7), 0x432aff97, 10) - STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15) - STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21) - STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6) - STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10) - STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15) - STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21) - STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6) - STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10) - STEP(I, c, d, a, b, GET(6), 0xa3014314, 15) - STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21) - STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6) - STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10) - STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15) - STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21) - - a += saved_a; - b += saved_b; - c += saved_c; - d += saved_d; - - ptr += 64; - } while (size -= 64); - - ctx->a = a; - ctx->b = b; - ctx->c = c; - ctx->d = d; - - return ptr; -} - -void hts_md5_reset(hts_md5_context *ctx) -{ - ctx->a = 0x67452301; - ctx->b = 0xefcdab89; - ctx->c = 0x98badcfe; - ctx->d = 0x10325476; - - ctx->lo = 0; - ctx->hi = 0; -} - -void hts_md5_update(hts_md5_context *ctx, const void *data, unsigned long size) -{ - hts_md5_u32plus saved_lo; - unsigned long used, available; - - saved_lo = ctx->lo; - if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo) - ctx->hi++; - ctx->hi += size >> 29; - - used = saved_lo & 0x3f; - - if (used) { - available = 64 - used; - - if (size < available) { - memcpy(&ctx->buffer[used], data, size); - return; - } - - memcpy(&ctx->buffer[used], data, available); - data = (const unsigned char *)data + available; - size -= available; - body(ctx, ctx->buffer, 64); - } - - if (size >= 64) { - data = body(ctx, data, size & ~(unsigned long)0x3f); - size &= 0x3f; - } - - memcpy(ctx->buffer, data, size); -} - -void hts_md5_final(unsigned char *result, hts_md5_context *ctx) -{ - unsigned long used, available; - - used = ctx->lo & 0x3f; - - ctx->buffer[used++] = 0x80; - - available = 64 - used; - - if (available < 8) { - memset(&ctx->buffer[used], 0, available); - body(ctx, ctx->buffer, 64); - used = 0; - available = 64; - } - - memset(&ctx->buffer[used], 0, available - 8); - - ctx->lo <<= 3; - ctx->buffer[56] = ctx->lo; - ctx->buffer[57] = ctx->lo >> 8; - ctx->buffer[58] = ctx->lo >> 16; - ctx->buffer[59] = ctx->lo >> 24; - ctx->buffer[60] = ctx->hi; - ctx->buffer[61] = ctx->hi >> 8; - ctx->buffer[62] = ctx->hi >> 16; - ctx->buffer[63] = ctx->hi >> 24; - - body(ctx, ctx->buffer, 64); - - result[0] = ctx->a; - result[1] = ctx->a >> 8; - result[2] = ctx->a >> 16; - result[3] = ctx->a >> 24; - result[4] = ctx->b; - result[5] = ctx->b >> 8; - result[6] = ctx->b >> 16; - result[7] = ctx->b >> 24; - result[8] = ctx->c; - result[9] = ctx->c >> 8; - result[10] = ctx->c >> 16; - result[11] = ctx->c >> 24; - result[12] = ctx->d; - result[13] = ctx->d >> 8; - result[14] = ctx->d >> 16; - result[15] = ctx->d >> 24; - - memset(ctx, 0, sizeof(*ctx)); -} - - -hts_md5_context *hts_md5_init(void) -{ - hts_md5_context *ctx = malloc(sizeof(*ctx)); - if (!ctx) - return NULL; - - hts_md5_reset(ctx); - return ctx; -} - -#else - -#include -#include - -/* - * Wrappers around the OpenSSL libcrypto.so MD5 implementation. - * - * These are here to ensure they end up in the symbol table of the - * library regardless of the static inline in the headers. - */ -hts_md5_context *hts_md5_init(void) -{ - MD5_CTX *ctx = malloc(sizeof(*ctx)); - if (!ctx) - return NULL; - - MD5_Init(ctx); - - return (hts_md5_context *)ctx; -} - -void hts_md5_reset(hts_md5_context *ctx) -{ - MD5_Init((MD5_CTX *)ctx); -} - -void hts_md5_update(hts_md5_context *ctx, const void *data, unsigned long size) -{ - MD5_Update((MD5_CTX *)ctx, data, size); -} - -void hts_md5_final(unsigned char *result, hts_md5_context *ctx) -{ - MD5_Final(result, (MD5_CTX *)ctx); -} - -#endif - -void hts_md5_destroy(hts_md5_context *ctx) -{ - if (!ctx) - return; - - free(ctx); -} - -void hts_md5_hex(char *hex, const unsigned char *digest) -{ - int i; - for (i = 0; i < 16; i++) { - hex[i*2+0] = "0123456789abcdef"[(digest[i]>>4)&0xf]; - hex[i*2+1] = "0123456789abcdef"[digest[i]&0xf]; - } - hex[32] = 0; -} diff --git a/src/htslib-1.19.1/multipart.c b/src/htslib-1.19.1/multipart.c deleted file mode 100644 index 12d0df2..0000000 --- a/src/htslib-1.19.1/multipart.c +++ /dev/null @@ -1,267 +0,0 @@ -/* multipart.c -- GA4GH redirection and multipart backend for file streams. - - Copyright (C) 2016-2017 Genome Research Ltd. - - Author: John Marshall - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include - -#include "htslib/kstring.h" - -#include "hts_internal.h" -#include "hfile_internal.h" - -#ifndef EPROTO -#define EPROTO ENOEXEC -#endif - -typedef struct hfile_part { - char *url; - char **headers; -} hfile_part; - -typedef struct { - hFILE base; - hfile_part *parts; - size_t nparts, maxparts, current; - hFILE *currentfp; -} hFILE_multipart; - -static void free_part(hfile_part *p) -{ - free(p->url); - if (p->headers) { - char **hdr; - for (hdr = p->headers; *hdr; hdr++) free(*hdr); - free(p->headers); - } - - p->url = NULL; - p->headers = NULL; -} - -static void free_all_parts(hFILE_multipart *fp) -{ - size_t i; - for (i = 0; i < fp->nparts; i++) free_part(&fp->parts[i]); - free(fp->parts); -} - -static ssize_t multipart_read(hFILE *fpv, void *buffer, size_t nbytes) -{ - hFILE_multipart *fp = (hFILE_multipart *) fpv; - size_t n; - -open_next: - if (fp->currentfp == NULL) { - if (fp->current < fp->nparts) { - const hfile_part *p = &fp->parts[fp->current]; - hts_log_debug("Opening part #%zu of %zu: \"%.120s%s\"", - fp->current+1, fp->nparts, p->url, - (strlen(p->url) > 120)? "..." : ""); - - fp->currentfp = p->headers? - hopen(p->url, "r:", - "httphdr:v", p->headers, - "auth_token_enabled", "false", NULL) - : hopen(p->url, "r:", "auth_token_enabled", "false", NULL); - - if (fp->currentfp == NULL) return -1; - } - else return 0; // No more parts, so we're truly at EOF - } - - n = fp->currentfp->mobile? - fp->currentfp->backend->read(fp->currentfp, buffer, nbytes) - : hread(fp->currentfp, buffer, nbytes); - - if (n == 0) { - // We're at EOF on this part, so set up the next part - hFILE *prevfp = fp->currentfp; - free_part(&fp->parts[fp->current]); - fp->current++; - fp->currentfp = NULL; - if (hclose(prevfp) < 0) return -1; - goto open_next; - } - - return n; // Number of bytes read by (or an error from) fp->currentfp -} - -static ssize_t multipart_write(hFILE *fpv, const void *buffer, size_t nbytes) -{ - errno = EROFS; - return -1; -} - -static off_t multipart_seek(hFILE *fpv, off_t offset, int whence) -{ - errno = ESPIPE; - return -1; -} - -static int multipart_close(hFILE *fpv) -{ - hFILE_multipart *fp = (hFILE_multipart *) fpv; - - free_all_parts(fp); - if (fp->currentfp) { - if (hclose(fp->currentfp) < 0) return -1; - } - - return 0; -} - -static const struct hFILE_backend multipart_backend = -{ - multipart_read, multipart_write, multipart_seek, NULL, multipart_close -}; - -// Returns 'v' (valid value), 'i' (invalid; required GA4GH field missing), -// or upon encountering an unexpected token, that token's type. -// Explicit `return '?'` means a JSON parsing error, typically a member key -// that is not a string. An unexpected token may be a valid token that was -// not the type expected for a particular GA4GH field, or it may be '?' or -// '\0' which should be propagated. -static char -parse_ga4gh_body_json(hFILE_multipart *fp, hFILE *json, - kstring_t *b, kstring_t *header) -{ - hts_json_token t; - - if (hts_json_fnext(json, &t, b) != '{') return t.type; - while (hts_json_fnext(json, &t, b) != '}') { - if (t.type != 's') return '?'; - - if (strcmp(t.str, "urls") == 0) { - if (hts_json_fnext(json, &t, b) != '[') return t.type; - - while (hts_json_fnext(json, &t, b) != ']') { - hfile_part *part; - size_t n = 0, max = 0; - - hts_expand(hfile_part, fp->nparts+1, fp->maxparts, fp->parts); - part = &fp->parts[fp->nparts++]; - part->url = NULL; - part->headers = NULL; - - if (t.type != '{') return t.type; - while (hts_json_fnext(json, &t, b) != '}') { - if (t.type != 's') return '?'; - - if (strcmp(t.str, "url") == 0) { - if (hts_json_fnext(json, &t, b) != 's') return t.type; - part->url = ks_release(b); - } - else if (strcmp(t.str, "headers") == 0) { - if (hts_json_fnext(json, &t, b) != '{') return t.type; - - while (hts_json_fnext(json, &t, header) != '}') { - if (t.type != 's') return '?'; - - if (hts_json_fnext(json, &t, b) != 's') - return t.type; - - kputs(": ", header); - kputs(t.str, header); - n++; - hts_expand(char *, n+1, max, part->headers); - part->headers[n-1] = ks_release(header); - part->headers[n] = NULL; - } - } - else if (hts_json_fskip_value(json, '\0') != 'v') - return '?'; - } - - if (! part->url) return 'i'; - } - } - else if (strcmp(t.str, "format") == 0) { - if (hts_json_fnext(json, &t, b) != 's') return t.type; - - hts_log_debug("GA4GH JSON redirection to multipart %s data", t.str); - } - else if (hts_json_fskip_value(json, '\0') != 'v') return '?'; - } - - return 'v'; -} - -// Returns 'v' (valid value), 'i' (invalid; required GA4GH field missing), -// or upon encountering an unexpected token, that token's type. -// Explicit `return '?'` means a JSON parsing error, typically a member key -// that is not a string. An unexpected token may be a valid token that was -// not the type expected for a particular GA4GH field, or it may be '?' or -// '\0' which should be propagated. -static char -parse_ga4gh_redirect_json(hFILE_multipart *fp, hFILE *json, - kstring_t *b, kstring_t *header) { - hts_json_token t; - - if (hts_json_fnext(json, &t, b) != '{') return t.type; - while (hts_json_fnext(json, &t, b) != '}') { - if (t.type != 's') return '?'; - - if (strcmp(t.str, "htsget") == 0) { - char ret = parse_ga4gh_body_json(fp, json, b, header); - if (ret != 'v') return ret; - } - else return '?'; - } - - if (hts_json_fnext(json, &t, b) != '\0') return '?'; - - return 'v'; -} - -hFILE *hopen_htsget_redirect(hFILE *hfile, const char *mode) -{ - hFILE_multipart *fp; - kstring_t s1 = { 0, 0, NULL }, s2 = { 0, 0, NULL }; - char ret; - - fp = (hFILE_multipart *) hfile_init(sizeof (hFILE_multipart), mode, 0); - if (fp == NULL) return NULL; - - fp->parts = NULL; - fp->nparts = fp->maxparts = 0; - - ret = parse_ga4gh_redirect_json(fp, hfile, &s1, &s2); - free(s1.s); - free(s2.s); - if (ret != 'v') { - free_all_parts(fp); - hfile_destroy((hFILE *) fp); - errno = (ret == '?' || ret == '\0')? EPROTO : EINVAL; - return NULL; - } - - fp->current = 0; - fp->currentfp = NULL; - fp->base.backend = &multipart_backend; - return &fp->base; -} diff --git a/src/htslib-1.19.1/os/lzma_stub.h b/src/htslib-1.19.1/os/lzma_stub.h deleted file mode 100644 index 5dd9c1a..0000000 --- a/src/htslib-1.19.1/os/lzma_stub.h +++ /dev/null @@ -1,85 +0,0 @@ -#ifndef LZMA_STUB_H -#define LZMA_STUB_H - -/* Some platforms, notably macOS, ship a usable liblzma shared library but - do not ship any LZMA header files. The and header - files that come with the library contain the following statement: - - * - * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. - * - - Accordingly the following declarations have been copied and distilled - from and (primarily) and are sufficient - to compile cram/cram_io.c in the absence of proper LZMA headers. - - This file, lzma_stub.h, remains in the public domain. */ - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -typedef enum { LZMA_OK = 0, LZMA_STREAM_END = 1 } lzma_ret; -typedef enum { LZMA_RUN = 0, LZMA_FINISH = 3 } lzma_action; -typedef enum { LZMA_CHECK_CRC32 = 1 } lzma_check; -typedef enum { LZMA_RESERVED_ENUM = 0 } lzma_reserved_enum; - -struct lzma_allocator; -struct lzma_internal; - -typedef struct { - const uint8_t *next_in; - size_t avail_in; - uint64_t total_in; - - uint8_t *next_out; - size_t avail_out; - uint64_t total_out; - - const struct lzma_allocator *allocator; - struct lzma_internal *internal; - - void *reserved_ptr1; - void *reserved_ptr2; - void *reserved_ptr3; - void *reserved_ptr4; - uint64_t reserved_int1; - uint64_t reserved_int2; - size_t reserved_int3; - size_t reserved_int4; - lzma_reserved_enum reserved_enum1; - lzma_reserved_enum reserved_enum2; -} lzma_stream; - -#define LZMA_STREAM_INIT \ - { NULL, 0, 0, NULL, 0, 0, NULL, NULL, \ - NULL, NULL, NULL, NULL, 0, 0, 0, 0, \ - LZMA_RESERVED_ENUM, LZMA_RESERVED_ENUM } - -extern size_t lzma_stream_buffer_bound(size_t uncompressed_size); - -extern lzma_ret lzma_easy_buffer_encode( - uint32_t preset, lzma_check check, - const struct lzma_allocator *allocator, - const uint8_t *in, size_t in_size, - uint8_t *out, size_t *out_pos, size_t out_size); - -extern lzma_ret lzma_stream_decoder( - lzma_stream *strm, uint64_t memlimit, uint32_t flags); - -extern uint64_t lzma_easy_decoder_memusage(uint32_t preset); - -extern lzma_ret lzma_code(lzma_stream *strm, lzma_action action); - -extern void lzma_end(lzma_stream *strm); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.19.1/os/rand.c b/src/htslib-1.19.1/os/rand.c deleted file mode 100644 index 7ceafa8..0000000 --- a/src/htslib-1.19.1/os/rand.c +++ /dev/null @@ -1,97 +0,0 @@ -/* rand.c -- drand48 implementation from the FreeBSD source tree. */ - -// This file is an amalgamation of the many small files in FreeBSD to do with -// drand48 and friends implementations. -// It comprises _rand48.c, rand48.h, srand48.c, drand48.c, erand48.c, lrand48.c - -/* - * Copyright (c) 1993 Martin Birgmeier - * All rights reserved. - * - * You may redistribute unmodified or modified versions of this source - * code provided that the above copyright notice and this and the - * following conditions are retained. - * - * This software is provided ``as is'', and comes with no warranties - * of any kind. I shall in no event be liable for anything that happens - * to anyone/anything when using this software. - */ - -//#include -//__FBSDID("$FreeBSD: src/lib/libc/gen/_rand48.c,v 1.2 2002/03/22 21:52:05 obrien Exp $"); - -#include - -#define RAND48_SEED_0 (0x330e) -#define RAND48_SEED_1 (0xabcd) -#define RAND48_SEED_2 (0x1234) -#define RAND48_MULT_0 (0xe66d) -#define RAND48_MULT_1 (0xdeec) -#define RAND48_MULT_2 (0x0005) -#define RAND48_ADD (0x000b) - -static unsigned short _rand48_seed[3] = { - RAND48_SEED_0, - RAND48_SEED_1, - RAND48_SEED_2 -}; -static unsigned short _rand48_mult[3] = { - RAND48_MULT_0, - RAND48_MULT_1, - RAND48_MULT_2 -}; -static unsigned short _rand48_add = RAND48_ADD; - -static void -_dorand48(unsigned short xseed[3]) -{ - unsigned long accu; - unsigned short temp[2]; - - accu = (unsigned long) _rand48_mult[0] * (unsigned long) xseed[0] + - (unsigned long) _rand48_add; - temp[0] = (unsigned short) accu; /* lower 16 bits */ - accu >>= sizeof(unsigned short) * 8; - accu += (unsigned long) _rand48_mult[0] * (unsigned long) xseed[1] + - (unsigned long) _rand48_mult[1] * (unsigned long) xseed[0]; - temp[1] = (unsigned short) accu; /* middle 16 bits */ - accu >>= sizeof(unsigned short) * 8; - accu += _rand48_mult[0] * xseed[2] + _rand48_mult[1] * xseed[1] + _rand48_mult[2] * xseed[0]; - xseed[0] = temp[0]; - xseed[1] = temp[1]; - xseed[2] = (unsigned short) accu; -} - -HTSLIB_EXPORT -void hts_srand48(long seed) -{ - _rand48_seed[0] = RAND48_SEED_0; - _rand48_seed[1] = (unsigned short) seed; - _rand48_seed[2] = (unsigned short) (seed >> 16); - _rand48_mult[0] = RAND48_MULT_0; - _rand48_mult[1] = RAND48_MULT_1; - _rand48_mult[2] = RAND48_MULT_2; - _rand48_add = RAND48_ADD; -} - -HTSLIB_EXPORT -double hts_erand48(unsigned short xseed[3]) -{ - _dorand48(xseed); - return ldexp((double) xseed[0], -48) + - ldexp((double) xseed[1], -32) + - ldexp((double) xseed[2], -16); -} - -HTSLIB_EXPORT -double hts_drand48(void) -{ - return hts_erand48(_rand48_seed); -} - -HTSLIB_EXPORT -long hts_lrand48(void) -{ - _dorand48(_rand48_seed); - return ((long) _rand48_seed[2] << 15) + ((long) _rand48_seed[1] >> 1); -} diff --git a/src/htslib-1.19.1/plugin.c b/src/htslib-1.19.1/plugin.c deleted file mode 100644 index 670081f..0000000 --- a/src/htslib-1.19.1/plugin.c +++ /dev/null @@ -1,220 +0,0 @@ -/* plugin.c -- low-level path parsing and plugin functions. - - Copyright (C) 2015-2016, 2020 Genome Research Ltd. - - Author: John Marshall - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#include - -#include -#include -#include -#include - -#include -#include - -#include "hts_internal.h" -#include "htslib/kstring.h" - -#ifndef PLUGINPATH -#define PLUGINPATH "" -#endif - -static DIR *open_nextdir(struct hts_path_itr *itr) -{ - DIR *dir; - - while (1) { - const char *colon = strchr(itr->pathdir, HTS_PATH_SEPARATOR_CHAR); - if (colon == NULL) return NULL; - - itr->entry.l = 0; - kputsn(itr->pathdir, colon - itr->pathdir, &itr->entry); - itr->pathdir = &colon[1]; - if (itr->entry.l == 0) continue; - - dir = opendir(itr->entry.s); - if (dir) break; - - if (hts_verbose >= 4) - fprintf(stderr, - "[W::hts_path_itr] can't scan directory \"%s\": %s\n", - itr->entry.s, strerror(errno)); - } - - if (itr->entry.s[itr->entry.l-1] != '/') kputc('/', &itr->entry); - itr->entry_dir_l = itr->entry.l; - return dir; -} - -void hts_path_itr_setup(struct hts_path_itr *itr, const char *path, - const char *builtin_path, const char *prefix, size_t prefix_len, - const char *suffix, size_t suffix_len) -{ - itr->prefix = prefix; - itr->prefix_len = prefix_len; - - if (suffix) itr->suffix = suffix, itr->suffix_len = suffix_len; - else itr->suffix = PLUGIN_EXT, itr->suffix_len = strlen(PLUGIN_EXT); - - itr->path.l = itr->path.m = 0; itr->path.s = NULL; - itr->entry.l = itr->entry.m = 0; itr->entry.s = NULL; - - if (! builtin_path) builtin_path = PLUGINPATH; - if (! path) { - path = getenv("HTS_PATH"); - if (! path) path = ""; - } - - while (1) { - size_t len = strcspn(path, HTS_PATH_SEPARATOR_STR); - if (len == 0) kputs(builtin_path, &itr->path); - else kputsn(path, len, &itr->path); - kputc(HTS_PATH_SEPARATOR_CHAR, &itr->path); - - path += len; - if (*path == HTS_PATH_SEPARATOR_CHAR) path++; - else break; - } - - // Note that ':' now terminates entries rather than separates them - itr->pathdir = itr->path.s; - itr->dirv = open_nextdir(itr); -} - -const char *hts_path_itr_next(struct hts_path_itr *itr) -{ - while (itr->dirv) { - struct dirent *e; - while ((e = readdir((DIR *) itr->dirv)) != NULL) { - size_t d_name_len = strlen(e->d_name); - if (strncmp(e->d_name, itr->prefix, itr->prefix_len) == 0 && - d_name_len >= itr->suffix_len && - strncmp(e->d_name + d_name_len - itr->suffix_len, itr->suffix, - itr->suffix_len) == 0) { - itr->entry.l = itr->entry_dir_l; - kputs(e->d_name, &itr->entry); - return itr->entry.s; - } - } - - closedir((DIR *) itr->dirv); - itr->dirv = open_nextdir(itr); - } - - itr->pathdir = NULL; - free(itr->path.s); itr->path.s = NULL; - free(itr->entry.s); itr->entry.s = NULL; - return NULL; -} - - -#ifndef RTLD_NOLOAD -#define RTLD_NOLOAD 0 -#endif - -plugin_void_func *load_plugin(void **pluginp, const char *filename, const char *symbol) -{ - void *lib = dlopen(filename, RTLD_NOW | RTLD_LOCAL); - if (lib == NULL) goto error; - - plugin_void_func *sym; - *(void **) &sym = dlsym(lib, symbol); - if (sym == NULL) { - // Reopen the plugin with RTLD_GLOBAL and check for uniquified symbol - void *libg = dlopen(filename, RTLD_NOLOAD | RTLD_NOW | RTLD_GLOBAL); - if (libg == NULL) goto error; - dlclose(lib); - lib = libg; - - kstring_t symbolg = { 0, 0, NULL }; - kputs(symbol, &symbolg); - kputc('_', &symbolg); - const char *slash = strrchr(filename, '/'); - const char *basename = slash? slash+1 : filename; - kputsn(basename, strcspn(basename, ".-+"), &symbolg); - - *(void **) &sym = dlsym(lib, symbolg.s); - free(symbolg.s); - if (sym == NULL) goto error; - } - - *pluginp = lib; - return sym; - -error: - if (hts_verbose >= 4) - fprintf(stderr, "[W::%s] can't load plugin \"%s\": %s\n", - __func__, filename, dlerror()); - if (lib) dlclose(lib); - return NULL; -} - -void *plugin_sym(void *plugin, const char *name, const char **errmsg) -{ - void *sym = dlsym(plugin, name); - if (sym == NULL) *errmsg = dlerror(); - return sym; -} - -plugin_void_func *plugin_func(void *plugin, const char *name, const char **errmsg) -{ - plugin_void_func *sym; - *(void **) &sym = plugin_sym(plugin, name, errmsg); - return sym; -} - -void close_plugin(void *plugin) -{ - if (dlclose(plugin) != 0) { - if (hts_verbose >= 4) - fprintf(stderr, "[W::%s] dlclose() failed: %s\n", - __func__, dlerror()); - } -} - -const char *hts_plugin_path(void) { -#ifdef ENABLE_PLUGINS - char *path = getenv("HTS_PATH"); - if (!path) path = ""; - - kstring_t ks = {0}; - while(1) { - size_t len = strcspn(path, HTS_PATH_SEPARATOR_STR); - if (len == 0) kputs(PLUGINPATH, &ks); - else kputsn(path, len, &ks); - kputc(HTS_PATH_SEPARATOR_CHAR, &ks); - - path += len; - if (*path == HTS_PATH_SEPARATOR_CHAR) path++; - else break; - } - - static char s_path[1024]; - snprintf(s_path, sizeof(s_path), "%s", ks.s ? ks.s : ""); - free(ks.s); - - return s_path; -#else - return NULL; -#endif -} diff --git a/src/htslib-1.19.1/probaln.c b/src/htslib-1.19.1/probaln.c deleted file mode 100644 index b42f856..0000000 --- a/src/htslib-1.19.1/probaln.c +++ /dev/null @@ -1,468 +0,0 @@ -/* The MIT License - - Copyright (C) 2003-2006, 2008-2010 by Heng Li - Copyright (C) 2016-2017, 2020, 2023 Genome Research Ltd. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#include "htslib/hts.h" - -/***************************************** - * Probabilistic banded glocal alignment * - *****************************************/ - -#define EI .25 -#define EM .33333333333 - -static float g_qual2prob[256]; - -#define set_u(u, b, i, k) { int x=(i)-(b); x=x>0?x:0; (u)=((k)-x+1)*3; } - -/* - The topology of the profile HMM: - - /\ /\ /\ /\ - I[1] I[k-1] I[k] I[L] - ^ \ \ ^ \ ^ \ \ ^ - | \ \ | \ | \ \ | - M[0] M[1] -> ... -> M[k-1] -> M[k] -> ... -> M[L] M[L+1] - \ \/ \/ \/ / - \ /\ /\ /\ / - -> D[k-1] -> D[k] -> - - M[0] points to every {M,I}[k] and every {M,I}[k] points to M[L+1]. - - On input, ref is the reference sequence and query is the query - sequence. Both are sequences of 0/1/2/3/4 where 4 stands for an - ambiguous residue. iqual is the base quality. c sets the gap open - probability, gap extension probability and band width. - - On output, state and q are arrays of length l_query. The higher 30 - bits give the reference position the query base is matched to and the - lower two bits can be 0 (an alignment match) or 1 (an - insertion). q[i] gives the phred scaled posterior probability of - state[i] being wrong. - - Returns phred-scaled likelihood score, or INT_MIN on failure. - */ -int probaln_glocal(const uint8_t *ref, int l_ref, const uint8_t *query, - int l_query, const uint8_t *iqual, const probaln_par_t *c, - int *state, uint8_t *q) -{ - double *f = NULL, *b = NULL, *s = NULL, m[9], sI, sM, bI, bM; - float *qual = NULL; - int bw, bw2, i, k, is_backward = 1, Pr; - - if ( l_ref<0 || l_query<0 || l_query >= INT_MAX - 2) { - errno = EINVAL; - return INT_MIN; - } - if (l_ref==0 || l_query==0) - return 0; // Is this actually invalid?? - - /*** initialization ***/ - is_backward = state && q? 1 : 0; - bw = l_ref > l_query? l_ref : l_query; - if (bw > c->bw) bw = c->bw; - if (bw < abs(l_ref - l_query)) bw = abs(l_ref - l_query); - bw2 = bw * 2 + 1; - size_t i_dim = bw2 < l_ref ? (size_t) bw2*3+6 : (size_t) l_ref*3+6; - - // allocate the forward and backward matrices f[][] and b[][] - // and the scaling array s[] - // Ideally these callocs would be mallocs + initialisation of - // the few bits needed. - if (SIZE_MAX / (l_query+1) / i_dim < sizeof(double)) { - errno = ENOMEM; // Allocation would fail - return INT_MIN; - } - f = calloc((l_query+1)*i_dim, sizeof(double)); - if (!f) goto fail; - if (is_backward) { - b = calloc((l_query+1)*i_dim, sizeof(double)); - if (!b) goto fail; - } - - // s[] is the scaling factor to avoid underflow - s = malloc((l_query+2) * sizeof(double)); - if (!s) goto fail; - - // initialize qual - qual = malloc(l_query * sizeof(float)); - if (!qual) goto fail; - if (g_qual2prob[0] == 0) - for (i = 0; i < 256; ++i) - g_qual2prob[i] = pow(10, -i/10.); - qual[0] = 0.0; // Should be unused - for (i = 0; i < l_query; ++i) - qual[i] = g_qual2prob[iqual? iqual[i] : 30]; - - // initialize transition probability - // the value here seems not to affect results; FIXME: need proof - sM = sI = 1. / (2 * l_query + 2); - m[0*3+0] = (1 - c->d - c->d) * (1 - sM); - m[0*3+1] = m[0*3+2] = c->d * (1 - sM); - m[1*3+0] = (1 - c->e) * (1 - sI); - m[1*3+1] = c->e * (1 - sI); - m[1*3+2] = 0.; - m[2*3+0] = 1 - c->e; - m[2*3+1] = 0.; - m[2*3+2] = c->e; - bM = (1 - c->d) / l_ref; // (bM+bI)*l_ref==1 - bI = c->d / l_ref; - - // f[] and b[] are 2-d arrays of three scores, with rows along the - // query and columns across the band. The first query base and - // first band position appear at index 1 allowing edge conditions - // to be stored in index 0. Hence the loops below appear to use - // 1-based indexing instead of 0-based as you'd normally expect in C, - // and the sequences are accessed using query[i - 1] and ref[k - 1]. - - /*** forward ***/ - // f[0] - set_u(k, bw, 0, 0); - f[0*i_dim+k] = s[0] = 1.; - { // f[1] - double *fi = &f[1*i_dim], sum; - int beg = 1, end = l_ref < bw + 1? l_ref : bw + 1; - for (k = beg, sum = 0.; k <= end; ++k) { - int u; - double e = (ref[k - 1] > 3 || query[0] > 3)? 1. : ref[k - 1] == query[0]? 1. - qual[0] : qual[0] * EM; - set_u(u, bw, 1, k); - fi[u+0] = e * bM; fi[u+1] = EI * bI; - sum += fi[u] + fi[u+1]; - } - s[1] = sum; - } - // f[2..l_query] - for (i = 2; i <= l_query; ++i) { - double *fi = &f[i*i_dim], *fi1 = &f[(i-1)*i_dim], sum, qli = qual[i-1]; - int beg = 1, end = l_ref, x; - uint8_t qyi = query[i - 1]; - x = i - bw; beg = beg > x? beg : x; // band start - x = i + bw; end = end < x? end : x; // band end - - // NB end-beg is almost always 14 (99.9% of the time) - // Hence not a large volume to parallelise. - // - // Maybe stripe in diagonal doing 14 lines together? - // - // Consider rotation? 150x14 vs 14x150 so inner loop - // takes longer. - - double E[] = { - qli * EM, // 00 - 1. - qli, // 01 - 1., // 10 - 1., // 11 - }; - double M = 1./s[i-1]; - - // Note this code has the original version listed here (albeit - // with improved formatting), but we do not compile using - // -DPROBALN_ORIG. The purpose of this code is to act as an - // easier(?) to understand version of the heavily optimised - // version following it and as an easy validation path in case - // of any differences in results. -#ifdef PROBALN_ORIG - for (k = beg, sum = 0.; k <= end; ++k) { - int u, v11, v01, v10; - double e; - e = E[(ref[k - 1] > 3 || qyi > 3)*2 + (ref[k - 1] == qyi)]; - set_u(u, bw, i, k); - set_u(v11, bw, i-1, k-1); - set_u(v10, bw, i-1, k); - set_u(v01, bw, i, k-1); - fi[u+0] = e * (m[0] * M*fi1[v11+0] + m[3] * M*fi1[v11+1] + m[6] * M*fi1[v11+2]); - fi[u+1] = EI * (m[1] * M*fi1[v10+0] + m[4] * M*fi1[v10+1]); - fi[u+2] = m[2] * fi[v01+0] + m[8] * fi[v01+2]; - sum += fi[u] + fi[u+1] + fi[u+2]; - } -#else - // We use EI*(M*m[1]*? + M*m[4]*?) a lot. So factor it out here. - double xm[5]; - xm[0] = M*m[0]; - xm[1] = M*m[3]; - xm[2] = M*m[6]; - xm[3] = EI*M*m[1]; - xm[4] = EI*M*m[4]; - - { - int u, v11; - set_u(u, bw, i, beg); - set_u(v11, bw, i-1, beg-1); - // Rather than recompute k->{u,v01,v10,v11} each loop - // we just increment the pointers. - double *xi = &fi[u]; - double *yi = &fi1[v11]; - // Derived from xi[0,2] in previous loop iter. - double l_x0 = m[2]*xi[0]; - double l_x2 = m[8]*xi[2]; - for (k = beg, sum = 0.; k <= end; ++k, xi+=3, yi+=3) { - int cond = (ref[k-1] > 3 || qyi > 3)*2 + (ref[k-1] == qyi); - - double z0 = xm[0]*yi[0]; - double z1 = xm[1]*yi[1]; - double z2 = xm[2]*yi[2]; - double z3 = xm[3]*yi[3]; - double z4 = xm[4]*yi[4]; - - xi[0] = E[cond] * (z0+z1+z2); - xi[1] = z3 + z4; - xi[2] = l_x0 + l_x2; - sum += xi[0] + xi[1] + xi[2]; - - l_x0 = m[2]*xi[0]; - l_x2 = m[8]*xi[2]; - } - } -#endif - s[i] = sum; - } - - { // f[l_query+1] - double sum; - double M = 1./s[l_query]; - // Note that this goes from 1 to l_ref inclusive, but as the - // alignment is banded not all of the values will have been - // calculated (the rest are taken as 0), so the summation - // actually goes over the values set in the last iteration of - // the previous loop (when i = l_query). For some reason lost to - // time this is done by looking for valid values of 'u' instead of - // working out 'beg' and 'end'. - - // From HTSlib 1.8 to 1.17, the endpoint was incorrectly set - // to i_dim - 3. When l_query <= bandwidth, this caused the last - // column to be missed, and if l_ref == l_query then a match at the end - // could incorrectly be reported as an insertion. See #1605. - - for (k = 1, sum = 0.; k <= l_ref; ++k) { - int u; - set_u(u, bw, l_query, k); - if (u < 3 || u >= i_dim) continue; - sum += M*f[l_query*i_dim + u+0] * sM + M*f[l_query*i_dim + u+1] * sI; - } - s[l_query+1] = sum; // the last scaling factor - } - { // compute likelihood - double p = 1., Pr1 = 0.; - for (i = 0; i <= l_query + 1; ++i) { - p *= s[i]; - if (p < 1e-100) Pr1 += -4.343 * log(p), p = 1.; - } - Pr1 += -4.343 * log(p * l_ref * l_query); - Pr = (int)(Pr1 + .499); - if (!is_backward) { // skip backward and MAP - free(f); free(s); free(qual); - return Pr; - } - } - /*** backward ***/ - // b[l_query] (b[l_query+1][0]=1 and thus \tilde{b}[][]=1/s[l_query+1]; this is where s[l_query+1] comes from) - for (k = 1; k <= l_ref; ++k) { - int u; - double *bi = &b[l_query*i_dim]; - set_u(u, bw, l_query, k); - if (u < 3 || u >= i_dim) continue; - bi[u+0] = sM / s[l_query] / s[l_query+1]; bi[u+1] = sI / s[l_query] / s[l_query+1]; - } - // b[l_query-1..1] - for (i = l_query - 1; i >= 1; --i) { - int beg = 1, end = l_ref, x; - double *bi = &b[i*i_dim], *bi1 = &b[(i+1)*i_dim], y = (i > 1), qli1 = qual[i]; - uint8_t qyi1 = query[i]; - x = i - bw; beg = beg > x? beg : x; - x = i + bw; end = end < x? end : x; - double E[] = { - qli1 * EM, //000 - 1. - qli1, //001 - 1., //010 - 1., //011 - //0,0,0,0 //1xx - }; - -#ifdef PROBALN_ORIG - for (k = end; k >= beg; --k) { - int u, v11, v01, v10; - double e; - set_u(u, bw, i, k); - set_u(v11, bw, i+1, k+1); - set_u(v10, bw, i+1, k); - set_u(v01, bw, i, k+1); - e = (k>=l_ref)?0 :E[(ref[k] > 3 || qyi1 > 3)*2 + (ref[k] == qyi1)] * bi1[v11]; - bi[u+0] = e * m[0] + EI * m[1] * bi1[v10+1] + m[2] * bi[v01+2]; // bi1[v11] has been foled into e. - bi[u+1] = e * m[3] + EI * m[4] * bi1[v10+1]; - bi[u+2] = (e * m[6] + m[8] * bi[v01+2]) * y; -// fprintf(stderr, "B (%d,%d;%d): %lg,%lg,%lg\n", i, k, u, bi[u], bi[u+1], bi[u+2]); // DEBUG - } - // rescale - int _beg, _end; - set_u(_beg, bw, i, beg); set_u(_end, bw, i, end); _end += 2; - for (k = _beg, y = 1./s[i]; k <= _end; ++k) bi[k] *= y; -#else - { - int u, v10; - set_u(u, bw, i, end); - set_u(v10, bw, i+1, end); - // Rather than recompute k->{u,v01,v10,v11} each loop - // we just increment the pointers. - double *xi = &bi[u]; - double *yi = &bi1[v10]; - // NB xi[5] is equiv to v01+2. - double xi_5 = xi[5]; - // Manual loop invariant removal - double e1 = EI*m[1]; - double e4 = EI*m[4]; - // Do renorm too in the same pass. - double n = 1./s[i]; - for (k = end; k >= beg; --k, xi -= 3, yi -= 3) { - double e = (k>=l_ref) - ? 0 - : E[(ref[k]>3 || qyi1>3)*2 + (ref[k] == qyi1)] * yi[3]; - - xi[1] = e * m[3] + e4 * yi[1]; - xi[0] = e * m[0] + e1 * yi[1] + m[2] * xi_5; - xi[2] = (e * m[6] + m[8] * xi_5) * y; - // bi[u+2] from this iter becomes bi[v01+2] in next iter - xi_5 = xi[2]; - - // rescale - xi[1] *= n; - xi[0] *= n; - xi[2] *= n; - } - } -#endif - } - { // b[0] - int beg = 1, end = l_ref < bw + 1? l_ref : bw + 1; - double sum = 0.; - for (k = end; k >= beg; --k) { - int u; - double e = (ref[k - 1] > 3 || query[0] > 3)? 1. : ref[k - 1] == query[0]? 1. - qual[0] : qual[0] * EM; - set_u(u, bw, 1, k); - if (u < 3 || u >= i_dim) continue; - sum += e * b[1*i_dim + u+0] * bM + EI * b[1*i_dim + u+1] * bI; - } - set_u(k, bw, 0, 0); - b[0*i_dim + k] = sum / s[0]; // if everything works as is expected, b[0][k] == 1.0 - } - /*** MAP ***/ - for (i = 1; i <= l_query; ++i) { - double sum = 0., *fi = &f[i*i_dim], *bi = &b[i*i_dim], max = 0.; - int beg = 1, end = l_ref, x, max_k = -1; - x = i - bw; beg = beg > x? beg : x; - x = i + bw; end = end < x? end : x; - double M = 1./s[i]; -#ifdef PROBALN_ORIG - for (k = beg; k <= end; ++k) { - int u; - double z; - set_u(u, bw, i, k); - z = M*fi[u+0] * bi[u+0]; - if (z > max) max = z, max_k = (k-1)<<2 | 0; - sum += z; - z = M*fi[u+1] * bi[u+1]; - if (z > max) max = z, max_k = (k-1)<<2 | 1; - sum += z; - } -#else - { - int u; - set_u(u, bw, i, beg); - for (k = beg; k <= end; ++k, u+=3) { - double z1, z2; - z1 = M*fi[u+0] * bi[u+0]; - z2 = M*fi[u+1] * bi[u+1]; - int which = z2 > z1; // strictly z2 >= z1 matches old code - double zm = which ? z2 : z1; - if (zm > max) { - max = zm; - max_k = (k-1)<<2 | which; - } - sum += z1 + z2; - } - } -#endif - max /= sum; sum *= s[i]; // if everything works as is expected, sum == 1.0 - if (state) state[i-1] = max_k; - if (q) k = (int)(-4.343 * log(1. - max) + .499), q[i-1] = k > 100? 99 : k; -#ifdef PROBALN_MAIN - k = 0; - set_u(k, bw, 0, 0); - fprintf(stderr, "(%.10lg,%.10lg) (%d,%d:%c,%c:%d) %lg\n", b[0][k], sum, i-1, max_k>>2, - "ACGT"[query[i - 1]], "ACGT"[ref[(max_k>>2)]], max_k&3, max); // DEBUG -#endif - } - - /*** free ***/ - free(f); free(b); free(s); free(qual); - return Pr; - - fail: - free(f); free(b); free(s); free(qual); - return INT_MIN; -} - -#ifdef PROBALN_MAIN -#include -int main(int argc, char *argv[]) -{ - uint8_t conv[256], *iqual, *ref, *query; - probaln_par_t par = { 0.001, 0.1, 10 }; - int c, l_ref, l_query, i, q = 30, b = 10, P; - while ((c = getopt(argc, argv, "b:q:")) >= 0) { - switch (c) { - case 'b': b = atoi(optarg); break; - case 'q': q = atoi(optarg); break; - } - } - if (optind + 2 > argc) { - fprintf(stderr, "Usage: %s [-q %d] [-b %d] \n", argv[0], q, b); // example: acttc attc - return 1; - } - memset(conv, 4, 256); - conv['a'] = conv['A'] = 0; conv['c'] = conv['C'] = 1; - conv['g'] = conv['G'] = 2; conv['t'] = conv['T'] = 3; - ref = (uint8_t*)argv[optind]; query = (uint8_t*)argv[optind+1]; - l_ref = strlen((char*)ref); l_query = strlen((char*)query); - for (i = 0; i < l_ref; ++i) ref[i] = conv[ref[i]]; - for (i = 0; i < l_query; ++i) query[i] = conv[query[i]]; - iqual = malloc(l_query); - memset(iqual, q, l_query); - par.bw = b; - P = probaln_glocal(ref, l_ref, query, l_query, iqual, &par, 0, 0); - fprintf(stderr, "%d\n", P); - free(iqual); - return 0; -} -#endif diff --git a/src/htslib-1.19.1/realn.c b/src/htslib-1.19.1/realn.c deleted file mode 100644 index d7e8255..0000000 --- a/src/htslib-1.19.1/realn.c +++ /dev/null @@ -1,331 +0,0 @@ -/* realn.c -- BAQ calculation and realignment. - - Copyright (C) 2009-2011, 2014-2016, 2018, 2021, 2023 Genome Research Ltd. - Portions copyright (C) 2009-2011 Broad Institute. - - Author: Heng Li - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#include "htslib/hts.h" -#include "htslib/sam.h" - -int sam_cap_mapq(bam1_t *b, const char *ref, hts_pos_t ref_len, int thres) -{ - uint8_t *seq = bam_get_seq(b), *qual = bam_get_qual(b); - uint32_t *cigar = bam_get_cigar(b); - bam1_core_t *c = &b->core; - int i, y, mm, q, len, clip_l, clip_q; - hts_pos_t x; - double t; - if (thres < 0) thres = 40; // set the default - mm = q = len = clip_l = clip_q = 0; - for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) { - int j, l = cigar[i]>>4, op = cigar[i]&0xf; - if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) { - for (j = 0; j < l; ++j) { - int c1, c2, z = y + j; - if (x+j >= ref_len || ref[x+j] == '\0') break; // out of bounds - c1 = bam_seqi(seq, z), c2 = seq_nt16_table[(unsigned char)ref[x+j]]; - if (c2 != 15 && c1 != 15 && qual[z] >= 13) { // not ambiguous - ++len; - if (c1 && c1 != c2 && qual[z] >= 13) { // mismatch - ++mm; - q += qual[z] > 33? 33 : qual[z]; - } - } - } - if (j < l) break; - x += l; y += l; len += l; - } else if (op == BAM_CDEL) { - for (j = 0; j < l; ++j) - if (x+j >= ref_len || ref[x+j] == '\0') break; - if (j < l) break; - x += l; - } else if (op == BAM_CSOFT_CLIP) { - for (j = 0; j < l; ++j) clip_q += qual[y+j]; - clip_l += l; - y += l; - } else if (op == BAM_CHARD_CLIP) { - clip_q += 13 * l; - clip_l += l; - } else if (op == BAM_CINS) y += l; - else if (op == BAM_CREF_SKIP) x += l; - } - for (i = 0, t = 1; i < mm; ++i) - t *= (double)len / (i+1); - t = q - 4.343 * log(t) + clip_q / 5.; - if (t > thres) return -1; - if (t < 0) t = 0; - t = sqrt((thres - t) / thres) * thres; - //fprintf(stderr, "%s %lf %d\n", bam_get_qname(b), t, q); - return (int)(t + .499); -} - -static int realn_check_tag(const uint8_t *tg, enum htsLogLevel severity, - const char *type, const bam1_t *b) { - if (*tg != 'Z') { - hts_log(severity, __func__, "Incorrect %s tag type (%c) for read %s", - type, *tg, bam_get_qname(b)); - return -1; - } - if (b->core.l_qseq != strlen((const char *) tg + 1)) { - hts_log(severity, __func__, "Read %s %s tag is wrong length", - bam_get_qname(b), type); - return -1; - } - return 0; -} - -int sam_prob_realn(bam1_t *b, const char *ref, hts_pos_t ref_len, int flag) { - int k, bw, y, yb, ye, xb, xe, fix_bq = 0, apply_baq = flag & BAQ_APPLY, - extend_baq = flag & BAQ_EXTEND, redo_baq = flag & BAQ_REDO; - enum htsRealnFlags system = flag & (0xff << 3); - hts_pos_t i, x; - uint32_t *cigar = bam_get_cigar(b); - bam1_core_t *c = &b->core; - - // d(I) e(M) band - probaln_par_t conf = { 0.001, 0.1, 10 }; // Illumina - - if (b->core.l_qseq > 1000 || system > BAQ_ILLUMINA) { - // Params that work well on PacBio CCS 15k. Unknown if they - // help other long-read platforms yet, but likely better than - // the short-read tuned ones. - // - // This function has no access to the SAM header. - // Ideally the calling function would check for e.g. - // @RG PL = "PACBIO" and DS contains "READTYPE=CCS". - // - // In the absense of this, we simply auto-detect via a crude - // short vs long strategy. - conf.d = 1e-7; - conf.e = 1e-1; - } - - uint8_t *bq = NULL, *zq = NULL, *qual = bam_get_qual(b); - int *state = NULL; - if ((c->flag & BAM_FUNMAP) || b->core.l_qseq == 0 || qual[0] == (uint8_t)-1) - return -1; // do nothing - - // test if BQ or ZQ is present, and make sanity checks - if ((bq = bam_aux_get(b, "BQ")) != NULL) { - if (!redo_baq) { - if (realn_check_tag(bq, HTS_LOG_WARNING, "BQ", b) < 0) - fix_bq = 1; - } - ++bq; - } - if ((zq = bam_aux_get(b, "ZQ")) != NULL) { - if (realn_check_tag(zq, HTS_LOG_ERROR, "ZQ", b) < 0) - return -4; - ++zq; - } - if (bq && redo_baq) - { - bam_aux_del(b, bq-1); - bq = 0; - } - if (bq && zq) { // remove the ZQ tag - bam_aux_del(b, zq-1); - zq = 0; - } - if (!zq && fix_bq) { // Need to fix invalid BQ tag (by realigning) - assert(bq != NULL); - bam_aux_del(b, bq-1); - bq = 0; - } - - if (bq || zq) { - if ((apply_baq && zq) || (!apply_baq && bq)) return -3; // in both cases, do nothing - if (bq && apply_baq) { // then convert BQ to ZQ - for (i = 0; i < c->l_qseq; ++i) - qual[i] = qual[i] + 64 < bq[i]? 0 : qual[i] - ((int)bq[i] - 64); - *(bq - 3) = 'Z'; - } else if (zq && !apply_baq) { // then convert ZQ to BQ - for (i = 0; i < c->l_qseq; ++i) - qual[i] += (int)zq[i] - 64; - *(zq - 3) = 'B'; - } - return 0; - } - // find the start and end of the alignment - x = c->pos, y = 0, yb = ye = xb = xe = -1; - for (k = 0; k < c->n_cigar; ++k) { - int op, l; - op = cigar[k]&0xf; l = cigar[k]>>4; - if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) { - if (yb < 0) yb = y; - if (xb < 0) xb = x; - ye = y + l; xe = x + l; - x += l; y += l; - } else if (op == BAM_CSOFT_CLIP || op == BAM_CINS) y += l; - else if (op == BAM_CDEL) x += l; - else if (op == BAM_CREF_SKIP) return -1; // do nothing if there is a reference skip - } - if (xb == -1) // No matches in CIGAR. - return -1; - // set bandwidth and the start and the end - bw = 7; - if (abs((xe - xb) - (ye - yb)) > bw) - bw = abs((xe - xb) - (ye - yb)) + 3; - conf.bw = bw; - - xb -= yb + bw/2; if (xb < 0) xb = 0; - xe += c->l_qseq - ye + bw/2; - if (xe - xb - c->l_qseq > bw) - xb += (xe - xb - c->l_qseq - bw) / 2, xe -= (xe - xb - c->l_qseq - bw) / 2; - { // glocal - uint8_t *seq = bam_get_seq(b); - uint8_t *tseq; // translated seq A=>0,C=>1,G=>2,T=>3,other=>4 - uint8_t *tref; // translated ref - uint8_t *q; // Probability of incorrect alignment from probaln_glocal() - size_t lref = xe > xb ? xe - xb : 1; - size_t align_lqseq; - if (extend_baq && lref < c->l_qseq) - lref = c->l_qseq; // So we can recycle tseq,tref for left,rght below - // Try to make q,tref,tseq reasonably well aligned - align_lqseq = ((c->l_qseq + 1) | 0xf) + 1; - // Overflow check - 3 for *bq, sizeof(int) for *state - if ((SIZE_MAX - lref) / (3 + sizeof(int)) < align_lqseq) { - errno = ENOMEM; - goto fail; - } - - assert(bq == NULL); // bq was used above, but should now be NULL - bq = malloc(align_lqseq * 3 + lref); - if (!bq) goto fail; - q = bq + align_lqseq; - tseq = q + align_lqseq; - tref = tseq + align_lqseq; - - memcpy(bq, qual, c->l_qseq); bq[c->l_qseq] = 0; - for (i = 0; i < c->l_qseq; ++i) - tseq[i] = seq_nt16_int[bam_seqi(seq, i)]; - for (i = xb; i < xe; ++i) { - if (i >= ref_len || ref[i] == '\0') { xe = i; break; } - tref[i-xb] = seq_nt16_int[seq_nt16_table[(unsigned char)ref[i]]]; - } - - state = malloc(c->l_qseq * sizeof(int)); - if (!state) goto fail; - if (probaln_glocal(tref, xe-xb, tseq, c->l_qseq, qual, - &conf, state, q) == INT_MIN) { - goto fail; - } - - if (!extend_baq) { // in this block, bq[] is capped by base quality qual[] - for (k = 0, x = c->pos, y = 0; k < c->n_cigar; ++k) { - int op = cigar[k]&0xf, l = cigar[k]>>4; - if (l == 0) continue; - if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) { - // Sanity check running off the end of the sequence - // Can only happen if the alignment is broken - if (l > c->l_qseq - y) - l = c->l_qseq - y; - for (i = y; i < y + l; ++i) { - if ((state[i]&3) != 0 || state[i]>>2 != x - xb + (i - y)) bq[i] = 0; - else bq[i] = bq[i] < q[i]? bq[i] : q[i]; - } - x += l; y += l; - } else if (op == BAM_CSOFT_CLIP || op == BAM_CINS) { - // Need sanity check here too. - if (l > c->l_qseq - y) - l = c->l_qseq - y; - y += l; - } else if (op == BAM_CDEL) { - x += l; - } - } - for (i = 0; i < c->l_qseq; ++i) bq[i] = qual[i] - bq[i] + 64; // finalize BQ - } else { // in this block, bq[] is BAQ that can be larger than qual[] (different from the above!) - // tseq,tref are no longer needed, so we can steal them to avoid mallocs - uint8_t *left = tseq; - uint8_t *rght = tref; - int len = 0; - - for (k = 0, x = c->pos, y = 0; k < c->n_cigar; ++k) { - int op = cigar[k]&0xf, l = cigar[k]>>4; - - // concatenate alignment matches (including sequence (mis)matches) - // otherwise 50M50M gives a different result to 100M - if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) { - if ((k + 1) < c->n_cigar) { - int next_op = bam_cigar_op(cigar[k + 1]); - - if (next_op == BAM_CMATCH || next_op == BAM_CEQUAL || next_op == BAM_CDIFF) { - len += l; - continue; - } - } - - // last of M/X/= ops - l += len; - len = 0; - } - - if (l == 0) continue; - if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) { - // Sanity check running off the end of the sequence - // Can only happen if the alignment is broken - if (l > c->l_qseq - y) - l = c->l_qseq - y; - for (i = y; i < y + l; ++i) - bq[i] = ((state[i]&3) != 0 || state[i]>>2 != x - xb + (i - y))? 0 : q[i]; - for (left[y] = bq[y], i = y + 1; i < y + l; ++i) - left[i] = bq[i] > left[i-1]? bq[i] : left[i-1]; - for (rght[y+l-1] = bq[y+l-1], i = y + l - 2; i >= y; --i) - rght[i] = bq[i] > rght[i+1]? bq[i] : rght[i+1]; - for (i = y; i < y + l; ++i) - bq[i] = left[i] < rght[i]? left[i] : rght[i]; - x += l; y += l; - } else if (op == BAM_CSOFT_CLIP || op == BAM_CINS) { - // Need sanity check here too. - if (l > c->l_qseq - y) - l = c->l_qseq - y; - y += l; - } else if (op == BAM_CDEL) { - x += l; - } - } - for (i = 0; i < c->l_qseq; ++i) bq[i] = 64 + (qual[i] <= bq[i]? 0 : qual[i] - bq[i]); // finalize BQ - } - if (apply_baq) { - for (i = 0; i < c->l_qseq; ++i) qual[i] -= bq[i] - 64; // modify qual - bam_aux_append(b, "ZQ", 'Z', c->l_qseq + 1, bq); - } else bam_aux_append(b, "BQ", 'Z', c->l_qseq + 1, bq); - free(bq); free(state); - } - - return 0; - - fail: - free(bq); free(state); - return -4; -} diff --git a/src/htslib-1.19.1/region.c b/src/htslib-1.19.1/region.c deleted file mode 100644 index 8b570e0..0000000 --- a/src/htslib-1.19.1/region.c +++ /dev/null @@ -1,276 +0,0 @@ -/* region.c -- Functions to create and free region lists - - Copyright (C) 2019 Genome Research Ltd. - - Author: Valeriu Ohan - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include "htslib/hts.h" -#include "htslib/khash.h" - -typedef struct reglist -{ - uint32_t n, m; - hts_pair_pos_t *a; - int tid; -} reglist_t; - -KHASH_MAP_INIT_INT(reg, reglist_t) -typedef kh_reg_t reghash_t; - -static int compare_hts_pair_pos_t (const void *av, const void *bv) -{ - hts_pair_pos_t *a = (hts_pair_pos_t *) av; - hts_pair_pos_t *b = (hts_pair_pos_t *) bv; - if (a->beg < b->beg) return -1; - if (a->beg > b->beg) return 1; - if (a->end < b->end) return -1; - if (a->end > b->end) return 1; - - return 0; -} - -#if 0 -/** - * Good to have around for debugging - */ -static void reg_print(reghash_t *h) { - reglist_t *p; - khint_t k; - uint32_t i; - khint32_t key; - - if (!h) { - fprintf(stderr, "Hash table is empty!\n"); - return; - } - for (k = kh_begin(h); k < kh_end(h); k++) { - if (kh_exist(h,k)) { - key = kh_key(h,k); - fprintf(stderr, "Region: key %u tid %d\n", key, p->tid); - if ((p = &kh_val(h,k)) != NULL && p->n > 0) { - for (i=0; in; i++) { - fprintf(stderr, "\tinterval[%d]: %"PRIhts_pos"-%"PRIhts_pos"\n", i, - p->a[i].beg, p->a[i].end); - } - } else { - fprintf(stderr, "Region key %u has no intervals!\n", key); - } - } - } -} -#endif - -/** - * Sort and merge overlapping or adjacent intervals. - */ -static int reg_compact(reghash_t *h) { - khint_t i; - uint32_t j, new_n; - reglist_t *p; - int count = 0; - - if (!h) - return 0; - - for (i = kh_begin(h); i < kh_end(h); i++) { - if (!kh_exist(h,i) || !(p = &kh_val(h,i)) || !(p->n)) - continue; - - qsort(p->a, p->n, sizeof(p->a[0]), compare_hts_pair_pos_t); - for (new_n = 0, j = 1; j < p->n; j++) { - if (p->a[new_n].end < p->a[j].beg) { - p->a[++new_n].beg = p->a[j].beg; - p->a[new_n].end = p->a[j].end; - } else { - if (p->a[new_n].end < p->a[j].end) - p->a[new_n].end = p->a[j].end; - } - } - ++new_n; - if (p->n > new_n) { - // Shrink array to required size. - hts_pair_pos_t *new_a = realloc(p->a, new_n * sizeof(p->a[0])); - if (new_a) p->a = new_a; - } - p->n = new_n; - count++; - } - - return count; -} - -static int reg_insert(reghash_t *h, int tid, hts_pos_t beg, hts_pos_t end) { - - khint_t k; - reglist_t *p; - - if (!h) - return -1; - - // Put reg in the hash table if not already there - k = kh_get(reg, h, tid); - if (k == kh_end(h)) { // absent from the hash table - int ret; - k = kh_put(reg, h, tid, &ret); - if (-1 == ret) { - return -1; - } - memset(&kh_val(h, k), 0, sizeof(reglist_t)); - kh_val(h, k).tid = tid; - } - p = &kh_val(h, k); - - // Add beg and end to the list - if (p->n == p->m) { - uint32_t new_m = p->m ? p->m<<1 : 4; - if (new_m == 0) return -1; - hts_pair_pos_t *new_a = realloc(p->a, new_m * sizeof(p->a[0])); - if (new_a == NULL) return -1; - p->m = new_m; - p->a = new_a; - } - p->a[p->n].beg = beg; - p->a[p->n++].end = end; - - return 0; -} - -static void reg_destroy(reghash_t *h) { - - khint_t k; - - if (!h) - return; - - for (k = 0; k < kh_end(h); ++k) { - if (kh_exist(h, k)) { - free(kh_val(h, k).a); - } - } - kh_destroy(reg, h); -} - -/** - * Take a char array of reg:interval elements and produce a hts_reglis_t with r_count elements. - */ -hts_reglist_t *hts_reglist_create(char **argv, int argc, int *r_count, void *hdr, hts_name2id_f getid) { - - if (!argv || argc < 1) - return NULL; - - reghash_t *h = NULL; - reglist_t *p; - hts_reglist_t *h_reglist = NULL; - - khint_t k; - int i, l_count = 0, tid; - const char *q; - hts_pos_t beg, end; - - /* First, transform the char array into a hash table */ - h = kh_init(reg); - if (!h) { - hts_log_error("Error when creating the region hash table"); - return NULL; - } - - for (i=0; itid; - h_reglist[l_count].intervals = p->a; - h_reglist[l_count].count = p->n; - p->a = NULL; // As we stole it. - - // After reg_compact(), list is ordered and non-overlapping, so... - if (p->n > 0) { - h_reglist[l_count].min_beg = h_reglist[l_count].intervals[0].beg; - h_reglist[l_count].max_end = h_reglist[l_count].intervals[p->n - 1].end; - } else { - h_reglist[l_count].min_beg = 0; - h_reglist[l_count].max_end = 0; - } - - l_count++; - } - reg_destroy(h); - - return h_reglist; - -fail: - reg_destroy(h); - if(h_reglist) hts_reglist_free(h_reglist, l_count); - - return NULL; -} - -void hts_reglist_free(hts_reglist_t *reglist, int count) { - - int i; - if(reglist) { - for (i = 0; i < count; i++) { - if (reglist[i].intervals) - free(reglist[i].intervals); - } - free(reglist); - } -} diff --git a/src/htslib-1.19.1/sam.5 b/src/htslib-1.19.1/sam.5 deleted file mode 100644 index d44719e..0000000 --- a/src/htslib-1.19.1/sam.5 +++ /dev/null @@ -1,68 +0,0 @@ -'\" t -.TH sam 5 "August 2013" "htslib" "Bioinformatics formats" -.SH NAME -sam \- Sequence Alignment/Map file format -.\" -.\" Copyright (C) 2009, 2013-2014 Genome Research Ltd. -.\" -.\" Author: Heng Li -.\" -.\" Permission is hereby granted, free of charge, to any person obtaining a -.\" copy of this software and associated documentation files (the "Software"), -.\" to deal in the Software without restriction, including without limitation -.\" the rights to use, copy, modify, merge, publish, distribute, sublicense, -.\" and/or sell copies of the Software, and to permit persons to whom the -.\" Software is furnished to do so, subject to the following conditions: -.\" -.\" The above copyright notice and this permission notice shall be included in -.\" all copies or substantial portions of the Software. -.\" -.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -.\" IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -.\" FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -.\" THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -.\" LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -.\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -.\" DEALINGS IN THE SOFTWARE. -.\" -.SH DESCRIPTION -Sequence Alignment/Map (SAM) format is TAB-delimited. Apart from the header lines, which are started -with the `@' symbol, each alignment line consists of: -.TS -nlbl. -1 QNAME Query template/pair NAME -2 FLAG bitwise FLAG -3 RNAME Reference sequence NAME -4 POS 1-based leftmost POSition/coordinate of clipped sequence -5 MAPQ MAPping Quality (Phred-scaled) -6 CIGAR extended CIGAR string -7 MRNM Mate Reference sequence NaMe (`=' if same as RNAME) -8 MPOS 1-based Mate POSition -9 TLEN inferred Template LENgth (insert size) -10 SEQ query SEQuence on the same strand as the reference -11 QUAL query QUALity (ASCII-33 gives the Phred base quality) -12+ OPT variable OPTional fields in the format TAG:VTYPE:VALUE -.TE -.PP -Each bit in the FLAG field is defined as: -.TS -lcbl. -0x0001 p the read is paired in sequencing -0x0002 P the read is mapped in a proper pair -0x0004 u the query sequence itself is unmapped -0x0008 U the mate is unmapped -0x0010 r strand of the query (1 for reverse) -0x0020 R strand of the mate -0x0040 1 the read is the first read in a pair -0x0080 2 the read is the second read in a pair -0x0100 s the alignment is not primary -0x0200 f the read fails platform/vendor quality checks -0x0400 d the read is either a PCR or an optical duplicate -0x0800 S the alignment is supplementary -.TE -.P -where the second column gives the string representation of the FLAG field. -.SH SEE ALSO -.TP -https://github.com/samtools/hts-specs -The full SAM/BAM file format specification diff --git a/src/htslib-1.19.1/sam.c b/src/htslib-1.19.1/sam.c deleted file mode 100644 index cc3acb4..0000000 --- a/src/htslib-1.19.1/sam.c +++ /dev/null @@ -1,6164 +0,0 @@ -/* sam.c -- SAM and BAM file I/O and manipulation. - - Copyright (C) 2008-2010, 2012-2023 Genome Research Ltd. - Copyright (C) 2010, 2012, 2013 Broad Institute. - - Author: Heng Li - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -#include "fuzz_settings.h" -#endif - -// Suppress deprecation message for cigar_tab, which we initialise -#include "htslib/hts_defs.h" -#undef HTS_DEPRECATED -#define HTS_DEPRECATED(message) - -#include "htslib/sam.h" -#include "htslib/bgzf.h" -#include "cram/cram.h" -#include "hts_internal.h" -#include "sam_internal.h" -#include "htslib/hfile.h" -#include "htslib/hts_endian.h" -#include "htslib/hts_expr.h" -#include "header.h" - -#include "htslib/khash.h" -KHASH_DECLARE(s2i, kh_cstr_t, int64_t) -KHASH_SET_INIT_INT(tag) - -#ifndef EFTYPE -#define EFTYPE ENOEXEC -#endif -#ifndef EOVERFLOW -#define EOVERFLOW ERANGE -#endif - -/********************** - *** BAM header I/O *** - **********************/ - -HTSLIB_EXPORT -const int8_t bam_cigar_table[256] = { - // 0 .. 47 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - - // 48 .. 63 (including =) - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, BAM_CEQUAL, -1, -1, - - // 64 .. 79 (including MIDNHB) - -1, -1, BAM_CBACK, -1, BAM_CDEL, -1, -1, -1, - BAM_CHARD_CLIP, BAM_CINS, -1, -1, -1, BAM_CMATCH, BAM_CREF_SKIP, -1, - - // 80 .. 95 (including SPX) - BAM_CPAD, -1, -1, BAM_CSOFT_CLIP, -1, -1, -1, -1, - BAM_CDIFF, -1, -1, -1, -1, -1, -1, -1, - - // 96 .. 127 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - - // 128 .. 255 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 -}; - -sam_hdr_t *sam_hdr_init(void) -{ - sam_hdr_t *bh = (sam_hdr_t*)calloc(1, sizeof(sam_hdr_t)); - if (bh == NULL) return NULL; - - bh->cigar_tab = bam_cigar_table; - return bh; -} - -void sam_hdr_destroy(sam_hdr_t *bh) -{ - int32_t i; - - if (bh == NULL) return; - - if (bh->ref_count > 0) { - --bh->ref_count; - return; - } - - if (bh->target_name) { - for (i = 0; i < bh->n_targets; ++i) - free(bh->target_name[i]); - free(bh->target_name); - free(bh->target_len); - } - free(bh->text); - if (bh->hrecs) - sam_hrecs_free(bh->hrecs); - if (bh->sdict) - kh_destroy(s2i, (khash_t(s2i) *) bh->sdict); - free(bh); -} - -// Copy the sam_hdr_t::sdict hash, used to store the real lengths of long -// references before sam_hdr_t::hrecs is populated -int sam_hdr_dup_sdict(const sam_hdr_t *h0, sam_hdr_t *h) -{ - const khash_t(s2i) *src_long_refs = (khash_t(s2i) *) h0->sdict; - khash_t(s2i) *dest_long_refs = kh_init(s2i); - int i; - if (!dest_long_refs) return -1; - - for (i = 0; i < h->n_targets; i++) { - int ret; - khiter_t ksrc, kdest; - if (h->target_len[i] < UINT32_MAX) continue; - ksrc = kh_get(s2i, src_long_refs, h->target_name[i]); - if (ksrc == kh_end(src_long_refs)) continue; - kdest = kh_put(s2i, dest_long_refs, h->target_name[i], &ret); - if (ret < 0) { - kh_destroy(s2i, dest_long_refs); - return -1; - } - kh_val(dest_long_refs, kdest) = kh_val(src_long_refs, ksrc); - } - - h->sdict = dest_long_refs; - return 0; -} - -sam_hdr_t *sam_hdr_dup(const sam_hdr_t *h0) -{ - if (h0 == NULL) return NULL; - sam_hdr_t *h; - if ((h = sam_hdr_init()) == NULL) return NULL; - // copy the simple data - h->n_targets = 0; - h->ignore_sam_err = h0->ignore_sam_err; - h->l_text = 0; - - // Then the pointery stuff - - if (!h0->hrecs) { - h->target_len = (uint32_t*)calloc(h0->n_targets, sizeof(uint32_t)); - if (!h->target_len) goto fail; - h->target_name = (char**)calloc(h0->n_targets, sizeof(char*)); - if (!h->target_name) goto fail; - - int i; - for (i = 0; i < h0->n_targets; ++i) { - h->target_len[i] = h0->target_len[i]; - h->target_name[i] = strdup(h0->target_name[i]); - if (!h->target_name[i]) break; - } - h->n_targets = i; - if (i < h0->n_targets) goto fail; - - if (h0->sdict) { - if (sam_hdr_dup_sdict(h0, h) < 0) goto fail; - } - } - - if (h0->hrecs) { - kstring_t tmp = { 0, 0, NULL }; - if (sam_hrecs_rebuild_text(h0->hrecs, &tmp) != 0) { - free(ks_release(&tmp)); - goto fail; - } - - h->l_text = tmp.l; - h->text = ks_release(&tmp); - - if (sam_hdr_update_target_arrays(h, h0->hrecs, 0) != 0) - goto fail; - } else { - h->l_text = h0->l_text; - h->text = malloc(h->l_text + 1); - if (!h->text) goto fail; - memcpy(h->text, h0->text, h->l_text); - h->text[h->l_text] = '\0'; - } - - return h; - - fail: - sam_hdr_destroy(h); - return NULL; -} - -sam_hdr_t *bam_hdr_read(BGZF *fp) -{ - sam_hdr_t *h; - uint8_t buf[4]; - int magic_len, has_EOF; - int32_t i, name_len, num_names = 0; - size_t bufsize; - ssize_t bytes; - // check EOF - has_EOF = bgzf_check_EOF(fp); - if (has_EOF < 0) { - perror("[W::bam_hdr_read] bgzf_check_EOF"); - } else if (has_EOF == 0) { - hts_log_warning("EOF marker is absent. The input is probably truncated"); - } - // read "BAM1" - magic_len = bgzf_read(fp, buf, 4); - if (magic_len != 4 || memcmp(buf, "BAM\1", 4)) { - hts_log_error("Invalid BAM binary header"); - return 0; - } - h = sam_hdr_init(); - if (!h) goto nomem; - - // read plain text and the number of reference sequences - bytes = bgzf_read(fp, buf, 4); - if (bytes != 4) goto read_err; - h->l_text = le_to_u32(buf); - - bufsize = h->l_text + 1; - if (bufsize < h->l_text) goto nomem; // so large that adding 1 overflowed -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (bufsize > FUZZ_ALLOC_LIMIT) goto nomem; -#endif - h->text = (char*)malloc(bufsize); - if (!h->text) goto nomem; - h->text[h->l_text] = 0; // make sure it is NULL terminated - bytes = bgzf_read(fp, h->text, h->l_text); - if (bytes != h->l_text) goto read_err; - - bytes = bgzf_read(fp, &h->n_targets, 4); - if (bytes != 4) goto read_err; - if (fp->is_be) ed_swap_4p(&h->n_targets); - - if (h->n_targets < 0) goto invalid; - - // read reference sequence names and lengths -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (h->n_targets > (FUZZ_ALLOC_LIMIT - bufsize)/(sizeof(char*)+sizeof(uint32_t))) - goto nomem; -#endif - if (h->n_targets > 0) { - h->target_name = (char**)calloc(h->n_targets, sizeof(char*)); - if (!h->target_name) goto nomem; - h->target_len = (uint32_t*)calloc(h->n_targets, sizeof(uint32_t)); - if (!h->target_len) goto nomem; - } - else { - h->target_name = NULL; - h->target_len = NULL; - } - - for (i = 0; i != h->n_targets; ++i) { - bytes = bgzf_read(fp, &name_len, 4); - if (bytes != 4) goto read_err; - if (fp->is_be) ed_swap_4p(&name_len); - if (name_len <= 0) goto invalid; - - h->target_name[i] = (char*)malloc(name_len); - if (!h->target_name[i]) goto nomem; - num_names++; - - bytes = bgzf_read(fp, h->target_name[i], name_len); - if (bytes != name_len) goto read_err; - - if (h->target_name[i][name_len - 1] != '\0') { - /* Fix missing NUL-termination. Is this being too nice? - We could alternatively bail out with an error. */ - char *new_name; - if (name_len == INT32_MAX) goto invalid; - new_name = realloc(h->target_name[i], name_len + 1); - if (new_name == NULL) goto nomem; - h->target_name[i] = new_name; - h->target_name[i][name_len] = '\0'; - } - - bytes = bgzf_read(fp, &h->target_len[i], 4); - if (bytes != 4) goto read_err; - if (fp->is_be) ed_swap_4p(&h->target_len[i]); - } - return h; - - nomem: - hts_log_error("Out of memory"); - goto clean; - - read_err: - if (bytes < 0) { - hts_log_error("Error reading BGZF stream"); - } else { - hts_log_error("Truncated BAM header"); - } - goto clean; - - invalid: - hts_log_error("Invalid BAM binary header"); - - clean: - if (h != NULL) { - h->n_targets = num_names; // ensure we free only allocated target_names - sam_hdr_destroy(h); - } - return NULL; -} - -int bam_hdr_write(BGZF *fp, const sam_hdr_t *h) -{ - int32_t i, name_len, x; - kstring_t hdr_ks = { 0, 0, NULL }; - char *text; - uint32_t l_text; - - if (!h) return -1; - - if (h->hrecs) { - if (sam_hrecs_rebuild_text(h->hrecs, &hdr_ks) != 0) return -1; - if (hdr_ks.l > UINT32_MAX) { - hts_log_error("Header too long for BAM format"); - free(hdr_ks.s); - return -1; - } else if (hdr_ks.l > INT32_MAX) { - hts_log_warning("Header too long for BAM specification (>2GB)"); - hts_log_warning("Output file may not be portable"); - } - text = hdr_ks.s; - l_text = hdr_ks.l; - } else { - if (h->l_text > UINT32_MAX) { - hts_log_error("Header too long for BAM format"); - return -1; - } else if (h->l_text > INT32_MAX) { - hts_log_warning("Header too long for BAM specification (>2GB)"); - hts_log_warning("Output file may not be portable"); - } - text = h->text; - l_text = h->l_text; - } - // write "BAM1" - if (bgzf_write(fp, "BAM\1", 4) < 0) { free(hdr_ks.s); return -1; } - // write plain text and the number of reference sequences - if (fp->is_be) { - x = ed_swap_4(l_text); - if (bgzf_write(fp, &x, 4) < 0) { free(hdr_ks.s); return -1; } - if (l_text) { - if (bgzf_write(fp, text, l_text) < 0) { free(hdr_ks.s); return -1; } - } - x = ed_swap_4(h->n_targets); - if (bgzf_write(fp, &x, 4) < 0) { free(hdr_ks.s); return -1; } - } else { - if (bgzf_write(fp, &l_text, 4) < 0) { free(hdr_ks.s); return -1; } - if (l_text) { - if (bgzf_write(fp, text, l_text) < 0) { free(hdr_ks.s); return -1; } - } - if (bgzf_write(fp, &h->n_targets, 4) < 0) { free(hdr_ks.s); return -1; } - } - free(hdr_ks.s); - // write sequence names and lengths - for (i = 0; i != h->n_targets; ++i) { - char *p = h->target_name[i]; - name_len = strlen(p) + 1; - if (fp->is_be) { - x = ed_swap_4(name_len); - if (bgzf_write(fp, &x, 4) < 0) return -1; - } else { - if (bgzf_write(fp, &name_len, 4) < 0) return -1; - } - if (bgzf_write(fp, p, name_len) < 0) return -1; - if (fp->is_be) { - x = ed_swap_4(h->target_len[i]); - if (bgzf_write(fp, &x, 4) < 0) return -1; - } else { - if (bgzf_write(fp, &h->target_len[i], 4) < 0) return -1; - } - } - if (bgzf_flush(fp) < 0) return -1; - return 0; -} - -const char *sam_parse_region(sam_hdr_t *h, const char *s, int *tid, - hts_pos_t *beg, hts_pos_t *end, int flags) { - return hts_parse_region(s, tid, beg, end, (hts_name2id_f)bam_name2id, h, flags); -} - -/************************* - *** BAM alignment I/O *** - *************************/ - -bam1_t *bam_init1(void) -{ - return (bam1_t*)calloc(1, sizeof(bam1_t)); -} - -int sam_realloc_bam_data(bam1_t *b, size_t desired) -{ - uint32_t new_m_data; - uint8_t *new_data; - new_m_data = desired; - kroundup32(new_m_data); - if (new_m_data < desired) { - errno = ENOMEM; // Not strictly true but we can't store the size - return -1; - } -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (new_m_data > FUZZ_ALLOC_LIMIT) { - errno = ENOMEM; - return -1; - } -#endif - if ((bam_get_mempolicy(b) & BAM_USER_OWNS_DATA) == 0) { - new_data = realloc(b->data, new_m_data); - } else { - if ((new_data = malloc(new_m_data)) != NULL) { - if (b->l_data > 0) - memcpy(new_data, b->data, - b->l_data < b->m_data ? b->l_data : b->m_data); - bam_set_mempolicy(b, bam_get_mempolicy(b) & (~BAM_USER_OWNS_DATA)); - } - } - if (!new_data) return -1; - b->data = new_data; - b->m_data = new_m_data; - return 0; -} - -void bam_destroy1(bam1_t *b) -{ - if (b == 0) return; - if ((bam_get_mempolicy(b) & BAM_USER_OWNS_DATA) == 0) { - free(b->data); - if ((bam_get_mempolicy(b) & BAM_USER_OWNS_STRUCT) != 0) { - // In case of reuse - b->data = NULL; - b->m_data = 0; - b->l_data = 0; - } - } - - if ((bam_get_mempolicy(b) & BAM_USER_OWNS_STRUCT) == 0) - free(b); -} - -bam1_t *bam_copy1(bam1_t *bdst, const bam1_t *bsrc) -{ - if (realloc_bam_data(bdst, bsrc->l_data) < 0) return NULL; - memcpy(bdst->data, bsrc->data, bsrc->l_data); // copy var-len data - memcpy(&bdst->core, &bsrc->core, sizeof(bsrc->core)); // copy the rest - bdst->l_data = bsrc->l_data; - bdst->id = bsrc->id; - return bdst; -} - -bam1_t *bam_dup1(const bam1_t *bsrc) -{ - if (bsrc == NULL) return NULL; - bam1_t *bdst = bam_init1(); - if (bdst == NULL) return NULL; - if (bam_copy1(bdst, bsrc) == NULL) { - bam_destroy1(bdst); - return NULL; - } - return bdst; -} - -static void bam_cigar2rqlens(int n_cigar, const uint32_t *cigar, - hts_pos_t *rlen, hts_pos_t *qlen) -{ - int k; - *rlen = *qlen = 0; - for (k = 0; k < n_cigar; ++k) { - int type = bam_cigar_type(bam_cigar_op(cigar[k])); - int len = bam_cigar_oplen(cigar[k]); - if (type & 1) *qlen += len; - if (type & 2) *rlen += len; - } -} - -static int subtract_check_underflow(size_t length, size_t *limit) -{ - if (length <= *limit) { - *limit -= length; - return 0; - } - - return -1; -} - -int bam_set1(bam1_t *bam, - size_t l_qname, const char *qname, - uint16_t flag, int32_t tid, hts_pos_t pos, uint8_t mapq, - size_t n_cigar, const uint32_t *cigar, - int32_t mtid, hts_pos_t mpos, hts_pos_t isize, - size_t l_seq, const char *seq, const char *qual, - size_t l_aux) -{ - // use a default qname "*" if none is provided - if (l_qname == 0) { - l_qname = 1; - qname = "*"; - } - - // note: the qname is stored nul terminated and padded as described in the - // documentation for the bam1_t struct. - size_t qname_nuls = 4 - l_qname % 4; - - // the aligment length, needed for bam_reg2bin(), is calculated as in bam_endpos(). - // can't use bam_endpos() directly as some fields not yet set up. - hts_pos_t rlen = 0, qlen = 0; - if (!(flag & BAM_FUNMAP)) { - bam_cigar2rqlens((int)n_cigar, cigar, &rlen, &qlen); - } - if (rlen == 0) { - rlen = 1; - } - - // validate parameters - if (l_qname > 254) { - hts_log_error("Query name too long"); - errno = EINVAL; - return -1; - } - if (HTS_POS_MAX - rlen <= pos) { - hts_log_error("Read ends beyond highest supported position"); - errno = EINVAL; - return -1; - } - if (!(flag & BAM_FUNMAP) && l_seq > 0 && n_cigar == 0) { - hts_log_error("Mapped query must have a CIGAR"); - errno = EINVAL; - return -1; - } - if (!(flag & BAM_FUNMAP) && l_seq > 0 && l_seq != qlen) { - hts_log_error("CIGAR and query sequence are of different length"); - errno = EINVAL; - return -1; - } - - size_t limit = INT32_MAX; - int u = subtract_check_underflow(l_qname + qname_nuls, &limit); - u += subtract_check_underflow(n_cigar * 4, &limit); - u += subtract_check_underflow((l_seq + 1) / 2, &limit); - u += subtract_check_underflow(l_seq, &limit); - u += subtract_check_underflow(l_aux, &limit); - if (u != 0) { - hts_log_error("Size overflow"); - errno = EINVAL; - return -1; - } - - // re-allocate the data buffer as needed. - size_t data_len = l_qname + qname_nuls + n_cigar * 4 + (l_seq + 1) / 2 + l_seq; - if (realloc_bam_data(bam, data_len + l_aux) < 0) { - return -1; - } - - bam->l_data = (int)data_len; - bam->core.pos = pos; - bam->core.tid = tid; - bam->core.bin = bam_reg2bin(pos, pos + rlen); - bam->core.qual = mapq; - bam->core.l_extranul = (uint8_t)(qname_nuls - 1); - bam->core.flag = flag; - bam->core.l_qname = (uint16_t)(l_qname + qname_nuls); - bam->core.n_cigar = (uint32_t)n_cigar; - bam->core.l_qseq = (int32_t)l_seq; - bam->core.mtid = mtid; - bam->core.mpos = mpos; - bam->core.isize = isize; - - uint8_t *cp = bam->data; - strncpy((char *)cp, qname, l_qname); - int i; - for (i = 0; i < qname_nuls; i++) { - cp[l_qname + i] = '\0'; - } - cp += l_qname + qname_nuls; - - if (n_cigar > 0) { - memcpy(cp, cigar, n_cigar * 4); - } - cp += n_cigar * 4; - -#define NN 16 - const uint8_t *useq = (uint8_t *)seq; - for (i = 0; i + NN < l_seq; i += NN) { - int j; - const uint8_t *u2 = useq+i; - for (j = 0; j < NN/2; j++) - cp[j] = (seq_nt16_table[u2[j*2]]<<4) | seq_nt16_table[u2[j*2+1]]; - cp += NN/2; - } - for (; i + 1 < l_seq; i += 2) { - *cp++ = (seq_nt16_table[useq[i]] << 4) | seq_nt16_table[useq[i + 1]]; - } - - for (; i < l_seq; i++) { - *cp++ = seq_nt16_table[(unsigned char)seq[i]] << 4; - } - - if (qual) { - memcpy(cp, qual, l_seq); - } - else { - memset(cp, '\xff', l_seq); - } - - return (int)data_len; -} - -hts_pos_t bam_cigar2qlen(int n_cigar, const uint32_t *cigar) -{ - int k; - hts_pos_t l; - for (k = l = 0; k < n_cigar; ++k) - if (bam_cigar_type(bam_cigar_op(cigar[k]))&1) - l += bam_cigar_oplen(cigar[k]); - return l; -} - -hts_pos_t bam_cigar2rlen(int n_cigar, const uint32_t *cigar) -{ - int k; - hts_pos_t l; - for (k = l = 0; k < n_cigar; ++k) - if (bam_cigar_type(bam_cigar_op(cigar[k]))&2) - l += bam_cigar_oplen(cigar[k]); - return l; -} - -hts_pos_t bam_endpos(const bam1_t *b) -{ - hts_pos_t rlen = (b->core.flag & BAM_FUNMAP)? 0 : bam_cigar2rlen(b->core.n_cigar, bam_get_cigar(b)); - if (rlen == 0) rlen = 1; - return b->core.pos + rlen; -} - -static int bam_tag2cigar(bam1_t *b, int recal_bin, int give_warning) // return 0 if CIGAR is untouched; 1 if CIGAR is updated with CG -{ - bam1_core_t *c = &b->core; - uint32_t cigar_st, n_cigar4, CG_st, CG_en, ori_len = b->l_data, *cigar0, CG_len, fake_bytes; - uint8_t *CG; - - // test where there is a real CIGAR in the CG tag to move - if (c->n_cigar == 0 || c->tid < 0 || c->pos < 0) return 0; - cigar0 = bam_get_cigar(b); - if (bam_cigar_op(cigar0[0]) != BAM_CSOFT_CLIP || bam_cigar_oplen(cigar0[0]) != c->l_qseq) return 0; - fake_bytes = c->n_cigar * 4; - int saved_errno = errno; - CG = bam_aux_get(b, "CG"); - if (!CG) { - if (errno != ENOENT) return -1; // Bad aux data - errno = saved_errno; // restore errno on expected no-CG-tag case - return 0; - } - if (CG[0] != 'B' || !(CG[1] == 'I' || CG[1] == 'i')) - return 0; // not of type B,I - CG_len = le_to_u32(CG + 2); - if (CG_len < c->n_cigar || CG_len >= 1U<<29) return 0; // don't move if the real CIGAR length is shorter than the fake cigar length - - // move from the CG tag to the right position - cigar_st = (uint8_t*)cigar0 - b->data; - c->n_cigar = CG_len; - n_cigar4 = c->n_cigar * 4; - CG_st = CG - b->data - 2; - CG_en = CG_st + 8 + n_cigar4; - if (possibly_expand_bam_data(b, n_cigar4 - fake_bytes) < 0) return -1; - b->l_data = b->l_data - fake_bytes + n_cigar4; // we need c->n_cigar-fake_bytes bytes to swap CIGAR to the right place - memmove(b->data + cigar_st + n_cigar4, b->data + cigar_st + fake_bytes, ori_len - (cigar_st + fake_bytes)); // insert c->n_cigar-fake_bytes empty space to make room - memcpy(b->data + cigar_st, b->data + (n_cigar4 - fake_bytes) + CG_st + 8, n_cigar4); // copy the real CIGAR to the right place; -fake_bytes for the fake CIGAR - if (ori_len > CG_en) // move data after the CG tag - memmove(b->data + CG_st + n_cigar4 - fake_bytes, b->data + CG_en + n_cigar4 - fake_bytes, ori_len - CG_en); - b->l_data -= n_cigar4 + 8; // 8: CGBI (4 bytes) and CGBI length (4) - if (recal_bin) - b->core.bin = hts_reg2bin(b->core.pos, bam_endpos(b), 14, 5); - if (give_warning) - hts_log_error("%s encodes a CIGAR with %d operators at the CG tag", bam_get_qname(b), c->n_cigar); - return 1; -} - -static inline int aux_type2size(uint8_t type) -{ - switch (type) { - case 'A': case 'c': case 'C': - return 1; - case 's': case 'S': - return 2; - case 'i': case 'I': case 'f': - return 4; - case 'd': - return 8; - case 'Z': case 'H': case 'B': - return type; - default: - return 0; - } -} - -static void swap_data(const bam1_core_t *c, int l_data, uint8_t *data, int is_host) -{ - uint32_t *cigar = (uint32_t*)(data + c->l_qname); - uint32_t i; - for (i = 0; i < c->n_cigar; ++i) ed_swap_4p(&cigar[i]); -} - -// Fix bad records where qname is not terminated correctly. -static int fixup_missing_qname_nul(bam1_t *b) { - bam1_core_t *c = &b->core; - - // Note this is called before c->l_extranul is added to c->l_qname - if (c->l_extranul > 0) { - b->data[c->l_qname++] = '\0'; - c->l_extranul--; - } else { - if (b->l_data > INT_MAX - 4) return -1; - if (realloc_bam_data(b, b->l_data + 4) < 0) return -1; - b->l_data += 4; - b->data[c->l_qname++] = '\0'; - c->l_extranul = 3; - } - return 0; -} - -/* - * Note a second interface that returns a bam pointer instead would avoid bam_copy1 - * in multi-threaded handling. This may be worth considering for htslib2. - */ -int bam_read1(BGZF *fp, bam1_t *b) -{ - bam1_core_t *c = &b->core; - int32_t block_len, ret, i; - uint32_t x[8], new_l_data; - - b->l_data = 0; - - if ((ret = bgzf_read(fp, &block_len, 4)) != 4) { - if (ret == 0) return -1; // normal end-of-file - else return -2; // truncated - } - if (fp->is_be) - ed_swap_4p(&block_len); - if (block_len < 32) return -4; // block_len includes core data - if (bgzf_read(fp, x, 32) != 32) return -3; - if (fp->is_be) { - for (i = 0; i < 8; ++i) ed_swap_4p(x + i); - } - c->tid = x[0]; c->pos = (int32_t)x[1]; - c->bin = x[2]>>16; c->qual = x[2]>>8&0xff; c->l_qname = x[2]&0xff; - c->l_extranul = (c->l_qname%4 != 0)? (4 - c->l_qname%4) : 0; - c->flag = x[3]>>16; c->n_cigar = x[3]&0xffff; - c->l_qseq = x[4]; - c->mtid = x[5]; c->mpos = (int32_t)x[6]; c->isize = (int32_t)x[7]; - - new_l_data = block_len - 32 + c->l_extranul; - if (new_l_data > INT_MAX || c->l_qseq < 0 || c->l_qname < 1) return -4; - if (((uint64_t) c->n_cigar << 2) + c->l_qname + c->l_extranul - + (((uint64_t) c->l_qseq + 1) >> 1) + c->l_qseq > (uint64_t) new_l_data) - return -4; - if (realloc_bam_data(b, new_l_data) < 0) return -4; - b->l_data = new_l_data; - - if (bgzf_read(fp, b->data, c->l_qname) != c->l_qname) return -4; - if (b->data[c->l_qname - 1] != '\0') { // Try to fix missing NUL termination - if (fixup_missing_qname_nul(b) < 0) return -4; - } - for (i = 0; i < c->l_extranul; ++i) b->data[c->l_qname+i] = '\0'; - c->l_qname += c->l_extranul; - if (b->l_data < c->l_qname || - bgzf_read(fp, b->data + c->l_qname, b->l_data - c->l_qname) != b->l_data - c->l_qname) - return -4; - if (fp->is_be) swap_data(c, b->l_data, b->data, 0); - if (bam_tag2cigar(b, 0, 0) < 0) - return -4; - - if (c->n_cigar > 0) { // recompute "bin" and check CIGAR-qlen consistency - hts_pos_t rlen, qlen; - bam_cigar2rqlens(c->n_cigar, bam_get_cigar(b), &rlen, &qlen); - if ((b->core.flag & BAM_FUNMAP) || rlen == 0) rlen = 1; - b->core.bin = hts_reg2bin(b->core.pos, b->core.pos + rlen, 14, 5); - // Sanity check for broken CIGAR alignments - if (c->l_qseq > 0 && !(c->flag & BAM_FUNMAP) && qlen != c->l_qseq) { - hts_log_error("CIGAR and query sequence lengths differ for %s", - bam_get_qname(b)); - return -4; - } - } - - return 4 + block_len; -} - -int bam_write1(BGZF *fp, const bam1_t *b) -{ - const bam1_core_t *c = &b->core; - uint32_t x[8], block_len = b->l_data - c->l_extranul + 32, y; - int i, ok; - if (c->l_qname - c->l_extranul > 255) { - hts_log_error("QNAME \"%s\" is longer than 254 characters", bam_get_qname(b)); - errno = EOVERFLOW; - return -1; - } - if (c->n_cigar > 0xffff) block_len += 16; // "16" for "CGBI", 4-byte tag length and 8-byte fake CIGAR - if (c->pos > INT_MAX || - c->mpos > INT_MAX || - c->isize < INT_MIN || c->isize > INT_MAX) { - hts_log_error("Positional data is too large for BAM format"); - return -1; - } - x[0] = c->tid; - x[1] = c->pos; - x[2] = (uint32_t)c->bin<<16 | c->qual<<8 | (c->l_qname - c->l_extranul); - if (c->n_cigar > 0xffff) x[3] = (uint32_t)c->flag << 16 | 2; - else x[3] = (uint32_t)c->flag << 16 | (c->n_cigar & 0xffff); - x[4] = c->l_qseq; - x[5] = c->mtid; - x[6] = c->mpos; - x[7] = c->isize; - ok = (bgzf_flush_try(fp, 4 + block_len) >= 0); - if (fp->is_be) { - for (i = 0; i < 8; ++i) ed_swap_4p(x + i); - y = block_len; - if (ok) ok = (bgzf_write(fp, ed_swap_4p(&y), 4) >= 0); - swap_data(c, b->l_data, b->data, 1); - } else { - if (ok) ok = (bgzf_write(fp, &block_len, 4) >= 0); - } - if (ok) ok = (bgzf_write(fp, x, 32) >= 0); - if (ok) ok = (bgzf_write(fp, b->data, c->l_qname - c->l_extranul) >= 0); - if (c->n_cigar <= 0xffff) { // no long CIGAR; write normally - if (ok) ok = (bgzf_write(fp, b->data + c->l_qname, b->l_data - c->l_qname) >= 0); - } else { // with long CIGAR, insert a fake CIGAR record and move the real CIGAR to the CG:B,I tag - uint8_t buf[8]; - uint32_t cigar_st, cigar_en, cigar[2]; - hts_pos_t cigreflen = bam_cigar2rlen(c->n_cigar, bam_get_cigar(b)); - if (cigreflen >= (1<<28)) { - // Length of reference covered is greater than the biggest - // CIGAR operation currently allowed. - hts_log_error("Record %s with %d CIGAR ops and ref length %"PRIhts_pos - " cannot be written in BAM. Try writing SAM or CRAM instead.\n", - bam_get_qname(b), c->n_cigar, cigreflen); - return -1; - } - cigar_st = (uint8_t*)bam_get_cigar(b) - b->data; - cigar_en = cigar_st + c->n_cigar * 4; - cigar[0] = (uint32_t)c->l_qseq << 4 | BAM_CSOFT_CLIP; - cigar[1] = (uint32_t)cigreflen << 4 | BAM_CREF_SKIP; - u32_to_le(cigar[0], buf); - u32_to_le(cigar[1], buf + 4); - if (ok) ok = (bgzf_write(fp, buf, 8) >= 0); // write cigar: SN - if (ok) ok = (bgzf_write(fp, &b->data[cigar_en], b->l_data - cigar_en) >= 0); // write data after CIGAR - if (ok) ok = (bgzf_write(fp, "CGBI", 4) >= 0); // write CG:B,I - u32_to_le(c->n_cigar, buf); - if (ok) ok = (bgzf_write(fp, buf, 4) >= 0); // write the true CIGAR length - if (ok) ok = (bgzf_write(fp, &b->data[cigar_st], c->n_cigar * 4) >= 0); // write the real CIGAR - } - if (fp->is_be) swap_data(c, b->l_data, b->data, 0); - return ok? 4 + block_len : -1; -} - -/* - * Write a BAM file and append to the in-memory index simultaneously. - */ -static int bam_write_idx1(htsFile *fp, const sam_hdr_t *h, const bam1_t *b) { - BGZF *bfp = fp->fp.bgzf; - - if (!fp->idx) - return bam_write1(bfp, b); - - uint32_t block_len = b->l_data - b->core.l_extranul + 32; - if (bgzf_flush_try(bfp, 4 + block_len) < 0) - return -1; - if (!bfp->mt) - hts_idx_amend_last(fp->idx, bgzf_tell(bfp)); - - int ret = bam_write1(bfp, b); - if (ret < 0) - return -1; - - if (bgzf_idx_push(bfp, fp->idx, b->core.tid, b->core.pos, bam_endpos(b), bgzf_tell(bfp), !(b->core.flag&BAM_FUNMAP)) < 0) { - hts_log_error("Read '%s' with ref_name='%s', ref_length=%"PRIhts_pos", flags=%d, pos=%"PRIhts_pos" cannot be indexed", - bam_get_qname(b), sam_hdr_tid2name(h, b->core.tid), sam_hdr_tid2len(h, b->core.tid), b->core.flag, b->core.pos+1); - ret = -1; - } - - return ret; -} - -/* - * Set the qname in a BAM record - */ -int bam_set_qname(bam1_t *rec, const char *qname) -{ - if (!rec) return -1; - if (!qname || !*qname) return -1; - - size_t old_len = rec->core.l_qname; - size_t new_len = strlen(qname) + 1; - if (new_len < 1 || new_len > 255) return -1; - - int extranul = (new_len%4 != 0) ? (4 - new_len%4) : 0; - - size_t new_data_len = rec->l_data - old_len + new_len + extranul; - if (realloc_bam_data(rec, new_data_len) < 0) return -1; - - // Make room - if (new_len + extranul != rec->core.l_qname) - memmove(rec->data + new_len + extranul, rec->data + rec->core.l_qname, rec->l_data - rec->core.l_qname); - // Copy in new name and pad if needed - memcpy(rec->data, qname, new_len); - int n; - for (n = 0; n < extranul; n++) rec->data[new_len + n] = '\0'; - - rec->l_data = new_data_len; - rec->core.l_qname = new_len + extranul; - rec->core.l_extranul = extranul; - - return 0; -} - -/******************** - *** BAM indexing *** - ********************/ - -static hts_idx_t *sam_index(htsFile *fp, int min_shift) -{ - int n_lvls, i, fmt, ret; - bam1_t *b; - hts_idx_t *idx; - sam_hdr_t *h; - h = sam_hdr_read(fp); - if (h == NULL) return NULL; - if (min_shift > 0) { - hts_pos_t max_len = 0, s; - for (i = 0; i < h->n_targets; ++i) { - hts_pos_t len = sam_hdr_tid2len(h, i); - if (max_len < len) max_len = len; - } - max_len += 256; - for (n_lvls = 0, s = 1< s; ++n_lvls, s <<= 3); - fmt = HTS_FMT_CSI; - } else min_shift = 14, n_lvls = 5, fmt = HTS_FMT_BAI; - idx = hts_idx_init(h->n_targets, fmt, bgzf_tell(fp->fp.bgzf), min_shift, n_lvls); - b = bam_init1(); - while ((ret = sam_read1(fp, h, b)) >= 0) { - ret = hts_idx_push(idx, b->core.tid, b->core.pos, bam_endpos(b), bgzf_tell(fp->fp.bgzf), !(b->core.flag&BAM_FUNMAP)); - if (ret < 0) { // unsorted or doesn't fit - hts_log_error("Read '%s' with ref_name='%s', ref_length=%"PRIhts_pos", flags=%d, pos=%"PRIhts_pos" cannot be indexed", bam_get_qname(b), sam_hdr_tid2name(h, b->core.tid), sam_hdr_tid2len(h, b->core.tid), b->core.flag, b->core.pos+1); - goto err; - } - } - if (ret < -1) goto err; // corrupted BAM file - - hts_idx_finish(idx, bgzf_tell(fp->fp.bgzf)); - sam_hdr_destroy(h); - bam_destroy1(b); - return idx; - -err: - bam_destroy1(b); - hts_idx_destroy(idx); - return NULL; -} - -int sam_index_build3(const char *fn, const char *fnidx, int min_shift, int nthreads) -{ - hts_idx_t *idx; - htsFile *fp; - int ret = 0; - - if ((fp = hts_open(fn, "r")) == 0) return -2; - if (nthreads) - hts_set_threads(fp, nthreads); - - switch (fp->format.format) { - case cram: - - ret = cram_index_build(fp->fp.cram, fn, fnidx); - break; - - case bam: - case sam: - if (fp->format.compression != bgzf) { - hts_log_error("%s file \"%s\" not BGZF compressed", - fp->format.format == bam ? "BAM" : "SAM", fn); - ret = -1; - break; - } - idx = sam_index(fp, min_shift); - if (idx) { - ret = hts_idx_save_as(idx, fn, fnidx, (min_shift > 0)? HTS_FMT_CSI : HTS_FMT_BAI); - if (ret < 0) ret = -4; - hts_idx_destroy(idx); - } - else ret = -1; - break; - - default: - ret = -3; - break; - } - hts_close(fp); - - return ret; -} - -int sam_index_build2(const char *fn, const char *fnidx, int min_shift) -{ - return sam_index_build3(fn, fnidx, min_shift, 0); -} - -int sam_index_build(const char *fn, int min_shift) -{ - return sam_index_build3(fn, NULL, min_shift, 0); -} - -// Provide bam_index_build() symbol for binary compatibility with earlier HTSlib -#undef bam_index_build -int bam_index_build(const char *fn, int min_shift) -{ - return sam_index_build2(fn, NULL, min_shift); -} - -// Initialise fp->idx for the current format type. -// This must be called after the header has been written but no other data. -int sam_idx_init(htsFile *fp, sam_hdr_t *h, int min_shift, const char *fnidx) { - fp->fnidx = fnidx; - if (fp->format.format == bam || fp->format.format == bcf || - (fp->format.format == sam && fp->format.compression == bgzf)) { - int n_lvls, fmt = HTS_FMT_CSI; - if (min_shift > 0) { - int64_t max_len = 0, s; - int i; - for (i = 0; i < h->n_targets; ++i) - if (max_len < h->target_len[i]) max_len = h->target_len[i]; - max_len += 256; - for (n_lvls = 0, s = 1< s; ++n_lvls, s <<= 3); - - } else min_shift = 14, n_lvls = 5, fmt = HTS_FMT_BAI; - - fp->idx = hts_idx_init(h->n_targets, fmt, bgzf_tell(fp->fp.bgzf), min_shift, n_lvls); - return fp->idx ? 0 : -1; - } - - if (fp->format.format == cram) { - fp->fp.cram->idxfp = bgzf_open(fnidx, "wg"); - return fp->fp.cram->idxfp ? 0 : -1; - } - - return -1; -} - -// Finishes an index. Call after the last record has been written. -// Returns 0 on success, <0 on failure. -int sam_idx_save(htsFile *fp) { - if (fp->format.format == bam || fp->format.format == bcf || - fp->format.format == vcf || fp->format.format == sam) { - int ret; - if ((ret = sam_state_destroy(fp)) < 0) { - errno = -ret; - return -1; - } - if (!fp->is_bgzf || bgzf_flush(fp->fp.bgzf) < 0) - return -1; - hts_idx_amend_last(fp->idx, bgzf_tell(fp->fp.bgzf)); - - if (hts_idx_finish(fp->idx, bgzf_tell(fp->fp.bgzf)) < 0) - return -1; - - return hts_idx_save_as(fp->idx, NULL, fp->fnidx, hts_idx_fmt(fp->idx)); - - } else if (fp->format.format == cram) { - // flushed and closed by cram_close - } - - return 0; -} - -static int sam_readrec(BGZF *ignored, void *fpv, void *bv, int *tid, hts_pos_t *beg, hts_pos_t *end) -{ - htsFile *fp = (htsFile *)fpv; - bam1_t *b = bv; - fp->line.l = 0; - int ret = sam_read1(fp, fp->bam_header, b); - if (ret >= 0) { - *tid = b->core.tid; - *beg = b->core.pos; - *end = bam_endpos(b); - } - return ret; -} - -// This is used only with read_rest=1 iterators, so need not set tid/beg/end. -static int sam_readrec_rest(BGZF *ignored, void *fpv, void *bv, int *tid, hts_pos_t *beg, hts_pos_t *end) -{ - htsFile *fp = (htsFile *)fpv; - bam1_t *b = bv; - fp->line.l = 0; - int ret = sam_read1(fp, fp->bam_header, b); - return ret; -} - -// Internal (for now) func used by bam_sym_lookup. This is copied from -// samtools/bam.c. -static const char *bam_get_library(const bam_hdr_t *h, const bam1_t *b) -{ - const char *rg; - kstring_t lib = { 0, 0, NULL }; - rg = (char *)bam_aux_get(b, "RG"); - - if (!rg) - return NULL; - else - rg++; - - if (sam_hdr_find_tag_id((bam_hdr_t *)h, "RG", "ID", rg, "LB", &lib) < 0) - return NULL; - - static char LB_text[1024]; - int len = lib.l < sizeof(LB_text) - 1 ? lib.l : sizeof(LB_text) - 1; - - memcpy(LB_text, lib.s, len); - LB_text[len] = 0; - - free(lib.s); - - return LB_text; -} - - -// Bam record pointer and SAM header combined -typedef struct { - const sam_hdr_t *h; - const bam1_t *b; -} hb_pair; - -// Looks up variable names in str and replaces them with their value. -// Also supports aux tags. -// -// Note the expression parser deliberately overallocates str size so it -// is safe to use memcmp over strcmp. -static int bam_sym_lookup(void *data, char *str, char **end, - hts_expr_val_t *res) { - hb_pair *hb = (hb_pair *)data; - const bam1_t *b = hb->b; - - res->is_str = 0; - switch(*str) { - case 'c': - if (memcmp(str, "cigar", 5) == 0) { - *end = str+5; - res->is_str = 1; - ks_clear(&res->s); - uint32_t *cigar = bam_get_cigar(b); - int i, n = b->core.n_cigar, r = 0; - if (n) { - for (i = 0; i < n; i++) { - r |= kputw (bam_cigar_oplen(cigar[i]), &res->s) < 0; - r |= kputc_(bam_cigar_opchr(cigar[i]), &res->s) < 0; - } - r |= kputs("", &res->s) < 0; - } else { - r |= kputs("*", &res->s) < 0; - } - return r ? -1 : 0; - } - break; - - case 'e': - if (memcmp(str, "endpos", 6) == 0) { - *end = str+6; - res->d = bam_endpos(b); - return 0; - } - break; - - case 'f': - if (memcmp(str, "flag", 4) == 0) { - str = *end = str+4; - if (*str != '.') { - res->d = b->core.flag; - return 0; - } else { - str++; - if (!memcmp(str, "paired", 6)) { - *end = str+6; - res->d = b->core.flag & BAM_FPAIRED; - return 0; - } else if (!memcmp(str, "proper_pair", 11)) { - *end = str+11; - res->d = b->core.flag & BAM_FPROPER_PAIR; - return 0; - } else if (!memcmp(str, "unmap", 5)) { - *end = str+5; - res->d = b->core.flag & BAM_FUNMAP; - return 0; - } else if (!memcmp(str, "munmap", 6)) { - *end = str+6; - res->d = b->core.flag & BAM_FMUNMAP; - return 0; - } else if (!memcmp(str, "reverse", 7)) { - *end = str+7; - res->d = b->core.flag & BAM_FREVERSE; - return 0; - } else if (!memcmp(str, "mreverse", 8)) { - *end = str+8; - res->d = b->core.flag & BAM_FMREVERSE; - return 0; - } else if (!memcmp(str, "read1", 5)) { - *end = str+5; - res->d = b->core.flag & BAM_FREAD1; - return 0; - } else if (!memcmp(str, "read2", 5)) { - *end = str+5; - res->d = b->core.flag & BAM_FREAD2; - return 0; - } else if (!memcmp(str, "secondary", 9)) { - *end = str+9; - res->d = b->core.flag & BAM_FSECONDARY; - return 0; - } else if (!memcmp(str, "qcfail", 6)) { - *end = str+6; - res->d = b->core.flag & BAM_FQCFAIL; - return 0; - } else if (!memcmp(str, "dup", 3)) { - *end = str+3; - res->d = b->core.flag & BAM_FDUP; - return 0; - } else if (!memcmp(str, "supplementary", 13)) { - *end = str+13; - res->d = b->core.flag & BAM_FSUPPLEMENTARY; - return 0; - } else { - hts_log_error("Unrecognised flag string"); - return -1; - } - } - } - break; - - case 'h': - if (memcmp(str, "hclen", 5) == 0) { - int hclen = 0; - uint32_t *cigar = bam_get_cigar(b); - uint32_t ncigar = b->core.n_cigar; - - // left - if (ncigar > 0 && bam_cigar_op(cigar[0]) == BAM_CHARD_CLIP) - hclen = bam_cigar_oplen(cigar[0]); - - // right - if (ncigar > 1 && bam_cigar_op(cigar[ncigar-1]) == BAM_CHARD_CLIP) - hclen += bam_cigar_oplen(cigar[ncigar-1]); - - *end = str+5; - res->d = hclen; - return 0; - } - break; - - case 'l': - if (memcmp(str, "library", 7) == 0) { - *end = str+7; - res->is_str = 1; - const char *lib = bam_get_library(hb->h, b); - kputs(lib ? lib : "", ks_clear(&res->s)); - return 0; - } - break; - - case 'm': - if (memcmp(str, "mapq", 4) == 0) { - *end = str+4; - res->d = b->core.qual; - return 0; - } else if (memcmp(str, "mpos", 4) == 0) { - *end = str+4; - res->d = b->core.mpos+1; - return 0; - } else if (memcmp(str, "mrname", 6) == 0) { - *end = str+6; - res->is_str = 1; - const char *rn = sam_hdr_tid2name(hb->h, b->core.mtid); - kputs(rn ? rn : "*", ks_clear(&res->s)); - return 0; - } else if (memcmp(str, "mrefid", 6) == 0) { - *end = str+6; - res->d = b->core.mtid; - return 0; - } - break; - - case 'n': - if (memcmp(str, "ncigar", 6) == 0) { - *end = str+6; - res->d = b->core.n_cigar; - return 0; - } - break; - - case 'p': - if (memcmp(str, "pos", 3) == 0) { - *end = str+3; - res->d = b->core.pos+1; - return 0; - } else if (memcmp(str, "pnext", 5) == 0) { - *end = str+5; - res->d = b->core.mpos+1; - return 0; - } - break; - - case 'q': - if (memcmp(str, "qlen", 4) == 0) { - *end = str+4; - res->d = bam_cigar2qlen(b->core.n_cigar, bam_get_cigar(b)); - return 0; - } else if (memcmp(str, "qname", 5) == 0) { - *end = str+5; - res->is_str = 1; - kputs(bam_get_qname(b), ks_clear(&res->s)); - return 0; - } else if (memcmp(str, "qual", 4) == 0) { - *end = str+4; - ks_clear(&res->s); - if (ks_resize(&res->s, b->core.l_qseq+1) < 0) - return -1; - memcpy(res->s.s, bam_get_qual(b), b->core.l_qseq); - res->s.l = b->core.l_qseq; - res->is_str = 1; - return 0; - } - break; - - case 'r': - if (memcmp(str, "rlen", 4) == 0) { - *end = str+4; - res->d = bam_cigar2rlen(b->core.n_cigar, bam_get_cigar(b)); - return 0; - } else if (memcmp(str, "rname", 5) == 0) { - *end = str+5; - res->is_str = 1; - const char *rn = sam_hdr_tid2name(hb->h, b->core.tid); - kputs(rn ? rn : "*", ks_clear(&res->s)); - return 0; - } else if (memcmp(str, "rnext", 5) == 0) { - *end = str+5; - res->is_str = 1; - const char *rn = sam_hdr_tid2name(hb->h, b->core.mtid); - kputs(rn ? rn : "*", ks_clear(&res->s)); - return 0; - } else if (memcmp(str, "refid", 5) == 0) { - *end = str+5; - res->d = b->core.tid; - return 0; - } - break; - - case 's': - if (memcmp(str, "seq", 3) == 0) { - *end = str+3; - ks_clear(&res->s); - if (ks_resize(&res->s, b->core.l_qseq+1) < 0) - return -1; - nibble2base(bam_get_seq(b), res->s.s, b->core.l_qseq); - res->s.s[b->core.l_qseq] = 0; - res->s.l = b->core.l_qseq; - res->is_str = 1; - return 0; - } else if (memcmp(str, "sclen", 5) == 0) { - int sclen = 0; - uint32_t *cigar = bam_get_cigar(b); - int ncigar = b->core.n_cigar; - int left = 0; - - // left - if (ncigar > 0 - && bam_cigar_op(cigar[0]) == BAM_CSOFT_CLIP) - left = 0, sclen += bam_cigar_oplen(cigar[0]); - else if (ncigar > 1 - && bam_cigar_op(cigar[0]) == BAM_CHARD_CLIP - && bam_cigar_op(cigar[1]) == BAM_CSOFT_CLIP) - left = 1, sclen += bam_cigar_oplen(cigar[1]); - - // right - if (ncigar-1 > left - && bam_cigar_op(cigar[ncigar-1]) == BAM_CSOFT_CLIP) - sclen += bam_cigar_oplen(cigar[ncigar-1]); - else if (ncigar-2 > left - && bam_cigar_op(cigar[ncigar-1]) == BAM_CHARD_CLIP - && bam_cigar_op(cigar[ncigar-2]) == BAM_CSOFT_CLIP) - sclen += bam_cigar_oplen(cigar[ncigar-2]); - - *end = str+5; - res->d = sclen; - return 0; - } - break; - - case 't': - if (memcmp(str, "tlen", 4) == 0) { - *end = str+4; - res->d = b->core.isize; - return 0; - } - break; - - case '[': - if (*str == '[' && str[1] && str[2] && str[3] == ']') { - /* aux tags */ - *end = str+4; - - uint8_t *aux = bam_aux_get(b, str+1); - if (aux) { - // we define the truth of a tag to be its presence, even if 0. - res->is_true = 1; - switch (*aux) { - case 'Z': - case 'H': - res->is_str = 1; - kputs((char *)aux+1, ks_clear(&res->s)); - break; - - case 'A': - res->is_str = 1; - kputsn((char *)aux+1, 1, ks_clear(&res->s)); - break; - - case 'i': case 'I': - case 's': case 'S': - case 'c': case 'C': - res->is_str = 0; - res->d = bam_aux2i(aux); - break; - - case 'f': - case 'd': - res->is_str = 0; - res->d = bam_aux2f(aux); - break; - - default: - hts_log_error("Aux type '%c not yet supported by filters", - *aux); - return -1; - } - return 0; - - } else { - // hence absent tags are always false (and strings) - res->is_str = 1; - res->s.l = 0; - res->d = 0; - res->is_true = 0; - return 0; - } - } - break; - } - - // All successful matches in switch should return 0. - // So if we didn't match, it's a parse error. - return -1; -} - -// Returns 1 when accepted by the filter, 0 if not, -1 on error. -int sam_passes_filter(const sam_hdr_t *h, const bam1_t *b, hts_filter_t *filt) -{ - hb_pair hb = {h, b}; - hts_expr_val_t res = HTS_EXPR_VAL_INIT; - if (hts_filter_eval2(filt, &hb, bam_sym_lookup, &res)) { - hts_log_error("Couldn't process filter expression"); - hts_expr_val_free(&res); - return -1; - } - - int t = res.is_true; - hts_expr_val_free(&res); - - return t; -} - -static int cram_readrec(BGZF *ignored, void *fpv, void *bv, int *tid, hts_pos_t *beg, hts_pos_t *end) -{ - htsFile *fp = fpv; - bam1_t *b = bv; - int pass_filter, ret; - - do { - ret = cram_get_bam_seq(fp->fp.cram, &b); - if (ret < 0) - return cram_eof(fp->fp.cram) ? -1 : -2; - - if (bam_tag2cigar(b, 1, 1) < 0) - return -2; - - *tid = b->core.tid; - *beg = b->core.pos; - *end = bam_endpos(b); - - if (fp->filter) { - pass_filter = sam_passes_filter(fp->bam_header, b, fp->filter); - if (pass_filter < 0) - return -2; - } else { - pass_filter = 1; - } - } while (pass_filter == 0); - - return ret; -} - -static int cram_pseek(void *fp, int64_t offset, int whence) -{ - cram_fd *fd = (cram_fd *)fp; - - if ((0 != cram_seek(fd, offset, SEEK_SET)) - && (0 != cram_seek(fd, offset - fd->first_container, SEEK_CUR))) - return -1; - - fd->curr_position = offset; - - if (fd->ctr) { - cram_free_container(fd->ctr); - if (fd->ctr_mt && fd->ctr_mt != fd->ctr) - cram_free_container(fd->ctr_mt); - - fd->ctr = NULL; - fd->ctr_mt = NULL; - fd->ooc = 0; - } - - return 0; -} - -/* - * cram_ptell is a pseudo-tell function, because it matches the position of the disk cursor only - * after a fresh seek call. Otherwise it indicates that the read takes place inside the buffered - * container previously fetched. It was designed like this to integrate with the functionality - * of the iterator stepping logic. - */ - -static int64_t cram_ptell(void *fp) -{ - cram_fd *fd = (cram_fd *)fp; - cram_container *c; - cram_slice *s; - int64_t ret = -1L; - - if (fd) { - if ((c = fd->ctr) != NULL) { - if ((s = c->slice) != NULL && s->max_rec) { - if ((c->curr_slice + s->curr_rec/s->max_rec) >= (c->max_slice + 1)) - fd->curr_position += c->offset + c->length; - } - } - ret = fd->curr_position; - } - - return ret; -} - -static int bam_pseek(void *fp, int64_t offset, int whence) -{ - BGZF *fd = (BGZF *)fp; - - return bgzf_seek(fd, offset, whence); -} - -static int64_t bam_ptell(void *fp) -{ - BGZF *fd = (BGZF *)fp; - if (!fd) - return -1L; - - return bgzf_tell(fd); -} - - - -static hts_idx_t *index_load(htsFile *fp, const char *fn, const char *fnidx, int flags) -{ - switch (fp->format.format) { - case bam: - case sam: - return hts_idx_load3(fn, fnidx, HTS_FMT_BAI, flags); - - case cram: { - if (cram_index_load(fp->fp.cram, fn, fnidx) < 0) return NULL; - - // Cons up a fake "index" just pointing at the associated cram_fd: - hts_cram_idx_t *idx = malloc(sizeof (hts_cram_idx_t)); - if (idx == NULL) return NULL; - idx->fmt = HTS_FMT_CRAI; - idx->cram = fp->fp.cram; - return (hts_idx_t *) idx; - } - - default: - return NULL; // TODO Would use tbx_index_load if it returned hts_idx_t - } -} - -hts_idx_t *sam_index_load3(htsFile *fp, const char *fn, const char *fnidx, int flags) -{ - return index_load(fp, fn, fnidx, flags); -} - -hts_idx_t *sam_index_load2(htsFile *fp, const char *fn, const char *fnidx) { - return index_load(fp, fn, fnidx, HTS_IDX_SAVE_REMOTE); -} - -hts_idx_t *sam_index_load(htsFile *fp, const char *fn) -{ - return index_load(fp, fn, NULL, HTS_IDX_SAVE_REMOTE); -} - -static hts_itr_t *cram_itr_query(const hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end, hts_readrec_func *readrec) -{ - const hts_cram_idx_t *cidx = (const hts_cram_idx_t *) idx; - hts_itr_t *iter = (hts_itr_t *) calloc(1, sizeof(hts_itr_t)); - if (iter == NULL) return NULL; - - // Cons up a dummy iterator for which hts_itr_next() will simply invoke - // the readrec function: - iter->is_cram = 1; - iter->read_rest = 1; - iter->off = NULL; - iter->bins.a = NULL; - iter->readrec = readrec; - - if (tid >= 0 || tid == HTS_IDX_NOCOOR || tid == HTS_IDX_START) { - cram_range r = { tid, beg+1, end }; - int ret = cram_set_option(cidx->cram, CRAM_OPT_RANGE, &r); - - iter->curr_off = 0; - // The following fields are not required by hts_itr_next(), but are - // filled in in case user code wants to look at them. - iter->tid = tid; - iter->beg = beg; - iter->end = end; - - switch (ret) { - case 0: - break; - - case -2: - // No data vs this ref, so mark iterator as completed. - // Same as HTS_IDX_NONE. - iter->finished = 1; - break; - - default: - free(iter); - return NULL; - } - } - else switch (tid) { - case HTS_IDX_REST: - iter->curr_off = 0; - break; - case HTS_IDX_NONE: - iter->curr_off = 0; - iter->finished = 1; - break; - default: - hts_log_error("Query with tid=%d not implemented for CRAM files", tid); - abort(); - break; - } - - return iter; -} - -hts_itr_t *sam_itr_queryi(const hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end) -{ - const hts_cram_idx_t *cidx = (const hts_cram_idx_t *) idx; - if (idx == NULL) - return hts_itr_query(NULL, tid, beg, end, sam_readrec_rest); - else if (cidx->fmt == HTS_FMT_CRAI) - return cram_itr_query(idx, tid, beg, end, sam_readrec); - else - return hts_itr_query(idx, tid, beg, end, sam_readrec); -} - -static int cram_name2id(void *fdv, const char *ref) -{ - cram_fd *fd = (cram_fd *) fdv; - return sam_hdr_name2tid(fd->header, ref); -} - -hts_itr_t *sam_itr_querys(const hts_idx_t *idx, sam_hdr_t *hdr, const char *region) -{ - const hts_cram_idx_t *cidx = (const hts_cram_idx_t *) idx; - return hts_itr_querys(idx, region, (hts_name2id_f)(bam_name2id), hdr, - cidx->fmt == HTS_FMT_CRAI ? cram_itr_query : hts_itr_query, - sam_readrec); -} - -hts_itr_t *sam_itr_regarray(const hts_idx_t *idx, sam_hdr_t *hdr, char **regarray, unsigned int regcount) -{ - const hts_cram_idx_t *cidx = (const hts_cram_idx_t *) idx; - hts_reglist_t *r_list = NULL; - int r_count = 0; - - if (!cidx || !hdr) - return NULL; - - hts_itr_t *itr = NULL; - if (cidx->fmt == HTS_FMT_CRAI) { - r_list = hts_reglist_create(regarray, regcount, &r_count, cidx->cram, cram_name2id); - if (!r_list) - return NULL; - itr = hts_itr_regions(idx, r_list, r_count, cram_name2id, cidx->cram, - hts_itr_multi_cram, cram_readrec, cram_pseek, cram_ptell); - } else { - r_list = hts_reglist_create(regarray, regcount, &r_count, hdr, (hts_name2id_f)(bam_name2id)); - if (!r_list) - return NULL; - itr = hts_itr_regions(idx, r_list, r_count, (hts_name2id_f)(bam_name2id), hdr, - hts_itr_multi_bam, sam_readrec, bam_pseek, bam_ptell); - } - - if (!itr) - hts_reglist_free(r_list, r_count); - - return itr; -} - -hts_itr_t *sam_itr_regions(const hts_idx_t *idx, sam_hdr_t *hdr, hts_reglist_t *reglist, unsigned int regcount) -{ - const hts_cram_idx_t *cidx = (const hts_cram_idx_t *) idx; - - if(!cidx || !hdr || !reglist) - return NULL; - - if (cidx->fmt == HTS_FMT_CRAI) - return hts_itr_regions(idx, reglist, regcount, cram_name2id, cidx->cram, - hts_itr_multi_cram, cram_readrec, cram_pseek, cram_ptell); - else - return hts_itr_regions(idx, reglist, regcount, (hts_name2id_f)(bam_name2id), hdr, - hts_itr_multi_bam, sam_readrec, bam_pseek, bam_ptell); -} - -/********************** - *** SAM header I/O *** - **********************/ - -#include "htslib/kseq.h" -#include "htslib/kstring.h" - -sam_hdr_t *sam_hdr_parse(size_t l_text, const char *text) -{ - sam_hdr_t *bh = sam_hdr_init(); - if (!bh) return NULL; - - if (sam_hdr_add_lines(bh, text, l_text) != 0) { - sam_hdr_destroy(bh); - return NULL; - } - - return bh; -} - -static int valid_sam_header_type(const char *s) { - if (s[0] != '@') return 0; - switch (s[1]) { - case 'H': - return s[2] == 'D' && s[3] == '\t'; - case 'S': - return s[2] == 'Q' && s[3] == '\t'; - case 'R': - case 'P': - return s[2] == 'G' && s[3] == '\t'; - case 'C': - return s[2] == 'O'; - } - return 0; -} - -// Minimal sanitisation of a header to ensure. -// - null terminated string. -// - all lines start with @ (also implies no blank lines). -// -// Much more could be done, but currently is not, including: -// - checking header types are known (HD, SQ, etc). -// - syntax (eg checking tab separated fields). -// - validating n_targets matches @SQ records. -// - validating target lengths against @SQ records. -static sam_hdr_t *sam_hdr_sanitise(sam_hdr_t *h) { - if (!h) - return NULL; - - // Special case for empty headers. - if (h->l_text == 0) - return h; - - size_t i; - unsigned int lnum = 0; - char *cp = h->text, last = '\n'; - for (i = 0; i < h->l_text; i++) { - // NB: l_text excludes terminating nul. This finds early ones. - if (cp[i] == 0) - break; - - // Error on \n[^@], including duplicate newlines - if (last == '\n') { - lnum++; - if (cp[i] != '@') { - hts_log_error("Malformed SAM header at line %u", lnum); - sam_hdr_destroy(h); - return NULL; - } - } - - last = cp[i]; - } - - if (i < h->l_text) { // Early nul found. Complain if not just padding. - size_t j = i; - while (j < h->l_text && cp[j] == '\0') j++; - if (j < h->l_text) - hts_log_warning("Unexpected NUL character in header. Possibly truncated"); - } - - // Add trailing newline and/or trailing nul if required. - if (last != '\n') { - hts_log_warning("Missing trailing newline on SAM header. Possibly truncated"); - - if (h->l_text < 2 || i >= h->l_text - 2) { - if (h->l_text >= SIZE_MAX - 2) { - hts_log_error("No room for extra newline"); - sam_hdr_destroy(h); - return NULL; - } - - cp = realloc(h->text, (size_t) h->l_text+2); - if (!cp) { - sam_hdr_destroy(h); - return NULL; - } - h->text = cp; - } - cp[i++] = '\n'; - - // l_text may be larger already due to multiple nul padding - if (h->l_text < i) - h->l_text = i; - cp[h->l_text] = '\0'; - } - - return h; -} - -static void known_stderr(const char *tool, const char *advice) { - hts_log_warning("SAM file corrupted by embedded %s error/log message", tool); - hts_log_warning("%s", advice); -} - -static void warn_if_known_stderr(const char *line) { - if (strstr(line, "M::bwa_idx_load_from_disk") != NULL) - known_stderr("bwa", "Use `bwa mem -o file.sam ...` or `bwa sampe -f file.sam ...` instead of `bwa ... > file.sam`"); - else if (strstr(line, "M::mem_pestat") != NULL) - known_stderr("bwa", "Use `bwa mem -o file.sam ...` instead of `bwa mem ... > file.sam`"); - else if (strstr(line, "loaded/built the index") != NULL) - known_stderr("minimap2", "Use `minimap2 -o file.sam ...` instead of `minimap2 ... > file.sam`"); -} - -static sam_hdr_t *sam_hdr_create(htsFile* fp) { - kstring_t str = { 0, 0, NULL }; - khint_t k; - sam_hdr_t* h = sam_hdr_init(); - const char *q, *r; - char* sn = NULL; - khash_t(s2i) *d = kh_init(s2i); - khash_t(s2i) *long_refs = NULL; - if (!h || !d) - goto error; - - int ret, has_SQ = 0; - int next_c = '@'; - while (next_c == '@' && (ret = hts_getline(fp, KS_SEP_LINE, &fp->line)) >= 0) { - if (fp->line.s[0] != '@') - break; - - if (fp->line.l > 3 && strncmp(fp->line.s, "@SQ", 3) == 0) { - has_SQ = 1; - hts_pos_t ln = -1; - for (q = fp->line.s + 4;; ++q) { - if (strncmp(q, "SN:", 3) == 0) { - q += 3; - for (r = q;*r != '\t' && *r != '\n' && *r != '\0';++r); - - if (sn) { - hts_log_warning("SQ header line has more than one SN: tag"); - free(sn); - } - sn = (char*)calloc(r - q + 1, 1); - if (!sn) - goto error; - - strncpy(sn, q, r - q); - q = r; - } else { - if (strncmp(q, "LN:", 3) == 0) - ln = strtoll(q + 3, (char**)&q, 10); - } - - while (*q != '\t' && *q != '\n' && *q != '\0') - ++q; - if (*q == '\0' || *q == '\n') - break; - } - if (sn) { - if (ln >= 0) { - int absent; - k = kh_put(s2i, d, sn, &absent); - if (absent < 0) - goto error; - - if (!absent) { - hts_log_warning("Duplicated sequence \"%s\" in file \"%s\"", sn, fp->fn); - free(sn); - } else { - sn = NULL; - if (ln >= UINT32_MAX) { - // Stash away ref length that - // doesn't fit in target_len array - int k2; - if (!long_refs) { - long_refs = kh_init(s2i); - if (!long_refs) - goto error; - } - k2 = kh_put(s2i, long_refs, kh_key(d, k), &absent); - if (absent < 0) - goto error; - kh_val(long_refs, k2) = ln; - kh_val(d, k) = ((int64_t) (kh_size(d) - 1) << 32 - | UINT32_MAX); - } else { - kh_val(d, k) = (int64_t) (kh_size(d) - 1) << 32 | ln; - } - } - } else { - hts_log_warning("Ignored @SQ SN:%s : bad or missing LN tag", sn); - warn_if_known_stderr(fp->line.s); - free(sn); - } - } else { - hts_log_warning("Ignored @SQ line with missing SN: tag"); - warn_if_known_stderr(fp->line.s); - } - sn = NULL; - } - else if (!valid_sam_header_type(fp->line.s)) { - hts_log_error("Invalid header line: must start with @HD/@SQ/@RG/@PG/@CO"); - warn_if_known_stderr(fp->line.s); - goto error; - } - - if (kputsn(fp->line.s, fp->line.l, &str) < 0) - goto error; - - if (kputc('\n', &str) < 0) - goto error; - - if (fp->is_bgzf) { - next_c = bgzf_peek(fp->fp.bgzf); - } else { - unsigned char nc; - ssize_t pret = hpeek(fp->fp.hfile, &nc, 1); - next_c = pret > 0 ? nc : pret - 1; - } - if (next_c < -1) - goto error; - } - if (next_c != '@') - fp->line.l = 0; - - if (ret < -1) - goto error; - - if (!has_SQ && fp->fn_aux) { - kstring_t line = { 0, 0, NULL }; - - /* The reference index (.fai) is actually needed here */ - char *fai_fn = fp->fn_aux; - char *fn_delim = strstr(fp->fn_aux, HTS_IDX_DELIM); - if (fn_delim) - fai_fn = fn_delim + strlen(HTS_IDX_DELIM); - - hFILE* f = hopen(fai_fn, "r"); - int e = 0, absent; - if (f == NULL) - goto error; - - while (line.l = 0, kgetline(&line, (kgets_func*) hgets, f) >= 0) { - char* tab = strchr(line.s, '\t'); - hts_pos_t ln; - - if (tab == NULL) - continue; - - sn = (char*)calloc(tab-line.s+1, 1); - if (!sn) { - e = 1; - break; - } - memcpy(sn, line.s, tab-line.s); - k = kh_put(s2i, d, sn, &absent); - if (absent < 0) { - e = 1; - break; - } - - ln = strtoll(tab, NULL, 10); - - if (!absent) { - hts_log_warning("Duplicated sequence \"%s\" in the file \"%s\"", sn, fai_fn); - free(sn); - sn = NULL; - } else { - sn = NULL; - if (ln >= UINT32_MAX) { - // Stash away ref length that - // doesn't fit in target_len array - khint_t k2; - int absent = -1; - if (!long_refs) { - long_refs = kh_init(s2i); - if (!long_refs) { - e = 1; - break; - } - } - k2 = kh_put(s2i, long_refs, kh_key(d, k), &absent); - if (absent < 0) { - e = 1; - break; - } - kh_val(long_refs, k2) = ln; - kh_val(d, k) = ((int64_t) (kh_size(d) - 1) << 32 - | UINT32_MAX); - } else { - kh_val(d, k) = (int64_t) (kh_size(d) - 1) << 32 | ln; - } - has_SQ = 1; - } - - e |= kputs("@SQ\tSN:", &str) < 0; - e |= kputsn(line.s, tab - line.s, &str) < 0; - e |= kputs("\tLN:", &str) < 0; - e |= kputll(ln, &str) < 0; - e |= kputc('\n', &str) < 0; - if (e) - break; - } - - ks_free(&line); - if (hclose(f) != 0) { - hts_log_error("Error on closing %s", fai_fn); - e = 1; - } - if (e) - goto error; - } - - if (has_SQ) { - // Populate the targets array - h->n_targets = kh_size(d); - - h->target_name = (char**) malloc(sizeof(char*) * h->n_targets); - if (!h->target_name) { - h->n_targets = 0; - goto error; - } - - h->target_len = (uint32_t*) malloc(sizeof(uint32_t) * h->n_targets); - if (!h->target_len) { - h->n_targets = 0; - goto error; - } - - for (k = kh_begin(d); k != kh_end(d); ++k) { - if (!kh_exist(d, k)) - continue; - - h->target_name[kh_val(d, k) >> 32] = (char*) kh_key(d, k); - h->target_len[kh_val(d, k) >> 32] = kh_val(d, k) & 0xffffffffUL; - kh_val(d, k) >>= 32; - } - } - - // Repurpose sdict to hold any references longer than UINT32_MAX - h->sdict = long_refs; - - kh_destroy(s2i, d); - - if (str.l == 0) - kputsn("", 0, &str); - h->l_text = str.l; - h->text = ks_release(&str); - fp->bam_header = sam_hdr_sanitise(h); - fp->bam_header->ref_count = 1; - - return fp->bam_header; - - error: - if (h && d && (!h->target_name || !h->target_len)) { - for (k = kh_begin(d); k != kh_end(d); ++k) - if (kh_exist(d, k)) free((void *)kh_key(d, k)); - } - sam_hdr_destroy(h); - ks_free(&str); - kh_destroy(s2i, d); - kh_destroy(s2i, long_refs); - if (sn) free(sn); - return NULL; -} - -sam_hdr_t *sam_hdr_read(htsFile *fp) -{ - if (!fp) { - errno = EINVAL; - return NULL; - } - - switch (fp->format.format) { - case bam: - return sam_hdr_sanitise(bam_hdr_read(fp->fp.bgzf)); - - case cram: - return sam_hdr_sanitise(sam_hdr_dup(fp->fp.cram->header)); - - case sam: - return sam_hdr_create(fp); - - case fastq_format: - case fasta_format: - return sam_hdr_init(); - - case empty_format: - errno = EPIPE; - return NULL; - - default: - errno = EFTYPE; - return NULL; - } -} - -int sam_hdr_write(htsFile *fp, const sam_hdr_t *h) -{ - if (!fp || !h) { - errno = EINVAL; - return -1; - } - - switch (fp->format.format) { - case binary_format: - fp->format.category = sequence_data; - fp->format.format = bam; - /* fall-through */ - case bam: - if (bam_hdr_write(fp->fp.bgzf, h) < 0) return -1; - break; - - case cram: { - cram_fd *fd = fp->fp.cram; - if (cram_set_header2(fd, h) < 0) return -1; - if (fp->fn_aux) - cram_load_reference(fd, fp->fn_aux); - if (cram_write_SAM_hdr(fd, fd->header) < 0) return -1; - } - break; - - case text_format: - fp->format.category = sequence_data; - fp->format.format = sam; - /* fall-through */ - case sam: { - if (!h->hrecs && !h->text) - return 0; - char *text; - kstring_t hdr_ks = { 0, 0, NULL }; - size_t l_text; - ssize_t bytes; - int r = 0, no_sq = 0; - - if (h->hrecs) { - if (sam_hrecs_rebuild_text(h->hrecs, &hdr_ks) != 0) - return -1; - text = hdr_ks.s; - l_text = hdr_ks.l; - } else { - const char *p = NULL; - do { - const char *q = p == NULL ? h->text : p + 4; - p = strstr(q, "@SQ\t"); - } while (!(p == NULL || p == h->text || *(p - 1) == '\n')); - no_sq = p == NULL; - text = h->text; - l_text = h->l_text; - } - - if (fp->is_bgzf) { - bytes = bgzf_write(fp->fp.bgzf, text, l_text); - } else { - bytes = hwrite(fp->fp.hfile, text, l_text); - } - free(hdr_ks.s); - if (bytes != l_text) - return -1; - - if (no_sq) { - int i; - for (i = 0; i < h->n_targets; ++i) { - fp->line.l = 0; - r |= kputsn("@SQ\tSN:", 7, &fp->line) < 0; - r |= kputs(h->target_name[i], &fp->line) < 0; - r |= kputsn("\tLN:", 4, &fp->line) < 0; - r |= kputw(h->target_len[i], &fp->line) < 0; - r |= kputc('\n', &fp->line) < 0; - if (r != 0) - return -1; - - if (fp->is_bgzf) { - bytes = bgzf_write(fp->fp.bgzf, fp->line.s, fp->line.l); - } else { - bytes = hwrite(fp->fp.hfile, fp->line.s, fp->line.l); - } - if (bytes != fp->line.l) - return -1; - } - } - if (fp->is_bgzf) { - if (bgzf_flush(fp->fp.bgzf) != 0) return -1; - } else { - if (hflush(fp->fp.hfile) != 0) return -1; - } - } - break; - - case fastq_format: - case fasta_format: - // Nothing to output; FASTQ has no file headers. - break; - - default: - errno = EBADF; - return -1; - } - return 0; -} - -static int old_sam_hdr_change_HD(sam_hdr_t *h, const char *key, const char *val) -{ - char *p, *q, *beg = NULL, *end = NULL, *newtext; - size_t new_l_text; - if (!h || !key) - return -1; - - if (h->l_text > 3) { - if (strncmp(h->text, "@HD", 3) == 0) { //@HD line exists - if ((p = strchr(h->text, '\n')) == 0) return -1; - *p = '\0'; // for strstr call - - char tmp[5] = { '\t', key[0], key[0] ? key[1] : '\0', ':', '\0' }; - - if ((q = strstr(h->text, tmp)) != 0) { // key exists - *p = '\n'; // change back - - // mark the key:val - beg = q; - for (q += 4; *q != '\n' && *q != '\t'; ++q); - end = q; - - if (val && (strncmp(beg + 4, val, end - beg - 4) == 0) - && strlen(val) == end - beg - 4) - return 0; // val is the same, no need to change - - } else { - beg = end = p; - *p = '\n'; - } - } - } - if (beg == NULL) { // no @HD - new_l_text = h->l_text; - if (new_l_text > SIZE_MAX - strlen(SAM_FORMAT_VERSION) - 9) - return -1; - new_l_text += strlen(SAM_FORMAT_VERSION) + 8; - if (val) { - if (new_l_text > SIZE_MAX - strlen(val) - 5) - return -1; - new_l_text += strlen(val) + 4; - } - newtext = (char*)malloc(new_l_text + 1); - if (!newtext) return -1; - - if (val) - snprintf(newtext, new_l_text + 1, - "@HD\tVN:%s\t%s:%s\n%s", SAM_FORMAT_VERSION, key, val, h->text); - else - snprintf(newtext, new_l_text + 1, - "@HD\tVN:%s\n%s", SAM_FORMAT_VERSION, h->text); - } else { // has @HD but different or no key - new_l_text = (beg - h->text) + (h->text + h->l_text - end); - if (val) { - if (new_l_text > SIZE_MAX - strlen(val) - 5) - return -1; - new_l_text += strlen(val) + 4; - } - newtext = (char*)malloc(new_l_text + 1); - if (!newtext) return -1; - - if (val) { - snprintf(newtext, new_l_text + 1, "%.*s\t%s:%s%s", - (int) (beg - h->text), h->text, key, val, end); - } else { //delete key - snprintf(newtext, new_l_text + 1, "%.*s%s", - (int) (beg - h->text), h->text, end); - } - } - free(h->text); - h->text = newtext; - h->l_text = new_l_text; - return 0; -} - - -int sam_hdr_change_HD(sam_hdr_t *h, const char *key, const char *val) -{ - if (!h || !key) - return -1; - - if (!h->hrecs) - return old_sam_hdr_change_HD(h, key, val); - - if (val) { - if (sam_hdr_update_line(h, "HD", NULL, NULL, key, val, NULL) != 0) - return -1; - } else { - if (sam_hdr_remove_tag_id(h, "HD", NULL, NULL, key) != 0) - return -1; - } - return sam_hdr_rebuild(h); -} -/********************** - *** SAM record I/O *** - **********************/ - -static int sam_parse_B_vals(char type, uint32_t n, char *in, char **end, - char *r, bam1_t *b) -{ - int orig_l = b->l_data; - char *q = in; - int32_t size; - size_t bytes; - int overflow = 0; - - size = aux_type2size(type); - if (size <= 0 || size > 4) { - hts_log_error("Unrecognized type B:%c", type); - return -1; - } - - // Ensure space for type + values - bytes = (size_t) n * (size_t) size; - if (bytes / size != n - || possibly_expand_bam_data(b, bytes + 2 + sizeof(uint32_t))) { - hts_log_error("Out of memory"); - return -1; - } - - b->data[b->l_data++] = 'B'; - b->data[b->l_data++] = type; - i32_to_le(n, b->data + b->l_data); - b->l_data += sizeof(uint32_t); - // This ensures that q always ends up at the next comma after - // reading a number even if it's followed by junk. It - // prevents the possibility of trying to read more than n items. -#define skip_to_comma_(q) do { while (*(q) > '\t' && *(q) != ',') (q)++; } while (0) - if (type == 'c') { - while (q < r) { - *(b->data + b->l_data) = hts_str2int(q + 1, &q, 8, &overflow); - b->l_data++; - skip_to_comma_(q); - } - } else if (type == 'C') { - while (q < r) { - if (*q != '-') { - *(b->data + b->l_data) = hts_str2uint(q + 1, &q, 8, &overflow); - b->l_data++; - } else { - overflow = 1; - } - skip_to_comma_(q); - } - } else if (type == 's') { - while (q < r) { - i16_to_le(hts_str2int(q + 1, &q, 16, &overflow), b->data + b->l_data); - b->l_data += 2; - skip_to_comma_(q); - } - } else if (type == 'S') { - while (q < r) { - if (*q != '-') { - u16_to_le(hts_str2uint(q + 1, &q, 16, &overflow), b->data + b->l_data); - b->l_data += 2; - } else { - overflow = 1; - } - skip_to_comma_(q); - } - } else if (type == 'i') { - while (q < r) { - i32_to_le(hts_str2int(q + 1, &q, 32, &overflow), b->data + b->l_data); - b->l_data += 4; - skip_to_comma_(q); - } - } else if (type == 'I') { - while (q < r) { - if (*q != '-') { - u32_to_le(hts_str2uint(q + 1, &q, 32, &overflow), b->data + b->l_data); - b->l_data += 4; - } else { - overflow = 1; - } - skip_to_comma_(q); - } - } else if (type == 'f') { - while (q < r) { - float_to_le(strtod(q + 1, &q), b->data + b->l_data); - b->l_data += 4; - skip_to_comma_(q); - } - } else { - hts_log_error("Unrecognized type B:%c", type); - return -1; - } - - if (!overflow) { - *end = q; - return 0; - } else { - int64_t max = 0, min = 0, val; - // Given type was incorrect. Try to rescue the situation. - q = in; - overflow = 0; - b->l_data = orig_l; - // Find out what range of values is present - while (q < r) { - val = hts_str2int(q + 1, &q, 64, &overflow); - if (max < val) max = val; - if (min > val) min = val; - skip_to_comma_(q); - } - // Retry with appropriate type - if (!overflow) { - if (min < 0) { - if (min >= INT8_MIN && max <= INT8_MAX) { - return sam_parse_B_vals('c', n, in, end, r, b); - } else if (min >= INT16_MIN && max <= INT16_MAX) { - return sam_parse_B_vals('s', n, in, end, r, b); - } else if (min >= INT32_MIN && max <= INT32_MAX) { - return sam_parse_B_vals('i', n, in, end, r, b); - } - } else { - if (max < UINT8_MAX) { - return sam_parse_B_vals('C', n, in, end, r, b); - } else if (max <= UINT16_MAX) { - return sam_parse_B_vals('S', n, in, end, r, b); - } else if (max <= UINT32_MAX) { - return sam_parse_B_vals('I', n, in, end, r, b); - } - } - } - // If here then at least one of the values is too big to store - hts_log_error("Numeric value in B array out of allowed range"); - return -1; - } -#undef skip_to_comma_ -} - -static inline unsigned int parse_sam_flag(char *v, char **rv, int *overflow) { - if (*v >= '1' && *v <= '9') { - return hts_str2uint(v, rv, 16, overflow); - } - else if (*v == '0') { - // handle single-digit "0" directly; otherwise it's hex or octal - if (v[1] == '\t') { *rv = v+1; return 0; } - else { - unsigned long val = strtoul(v, rv, 0); - if (val > 65535) { *overflow = 1; return 65535; } - return val; - } - } - else { - // TODO implement symbolic flag letters - *rv = v; - return 0; - } -} - -// Parse tag line and append to bam object b. -// Shared by both SAM and FASTQ parsers. -// -// The difference between the two is how lenient we are to recognising -// non-compliant strings. The FASTQ parser glosses over arbitrary -// non-SAM looking strings. -static inline int aux_parse(char *start, char *end, bam1_t *b, int lenient, - khash_t(tag) *tag_whitelist) { - int overflow = 0; - int checkpoint; - char logbuf[40]; - char *q = start, *p = end; - -#define _parse_err(cond, ...) \ - do { \ - if (cond) { \ - if (lenient) { \ - while (q < p && !isspace_c(*q)) \ - q++; \ - while (q < p && isspace_c(*q)) \ - q++; \ - b->l_data = checkpoint; \ - goto loop; \ - } else { \ - hts_log_error(__VA_ARGS__); \ - goto err_ret; \ - } \ - } \ - } while (0) - - while (q < p) loop: { - char type; - checkpoint = b->l_data; - if (p - q < 5) { - if (lenient) { - break; - } else { - hts_log_error("Incomplete aux field"); - goto err_ret; - } - } - _parse_err(q[0] < '!' || q[1] < '!', "invalid aux tag id"); - - if (lenient && (q[2] | q[4]) != ':') { - while (q < p && !isspace_c(*q)) - q++; - while (q < p && isspace_c(*q)) - q++; - continue; - } - - if (tag_whitelist) { - int tt = q[0]*256 + q[1]; - if (kh_get(tag, tag_whitelist, tt) == kh_end(tag_whitelist)) { - while (q < p && *q != '\t') - q++; - continue; - } - } - - // Copy over id - if (possibly_expand_bam_data(b, 2) < 0) goto err_ret; - memcpy(b->data + b->l_data, q, 2); b->l_data += 2; - q += 3; type = *q++; ++q; // q points to value - if (type != 'Z' && type != 'H') // the only zero length acceptable fields - _parse_err(*q <= '\t', "incomplete aux field"); - - // Ensure enough space for a double + type allocated. - if (possibly_expand_bam_data(b, 16) < 0) goto err_ret; - - if (type == 'A' || type == 'a' || type == 'c' || type == 'C') { - b->data[b->l_data++] = 'A'; - b->data[b->l_data++] = *q++; - } else if (type == 'i' || type == 'I') { - if (*q == '-') { - int32_t x = hts_str2int(q, &q, 32, &overflow); - if (x >= INT8_MIN) { - b->data[b->l_data++] = 'c'; - b->data[b->l_data++] = x; - } else if (x >= INT16_MIN) { - b->data[b->l_data++] = 's'; - i16_to_le(x, b->data + b->l_data); - b->l_data += 2; - } else { - b->data[b->l_data++] = 'i'; - i32_to_le(x, b->data + b->l_data); - b->l_data += 4; - } - } else { - uint32_t x = hts_str2uint(q, &q, 32, &overflow); - if (x <= UINT8_MAX) { - b->data[b->l_data++] = 'C'; - b->data[b->l_data++] = x; - } else if (x <= UINT16_MAX) { - b->data[b->l_data++] = 'S'; - u16_to_le(x, b->data + b->l_data); - b->l_data += 2; - } else { - b->data[b->l_data++] = 'I'; - u32_to_le(x, b->data + b->l_data); - b->l_data += 4; - } - } - } else if (type == 'f') { - b->data[b->l_data++] = 'f'; - float_to_le(strtod(q, &q), b->data + b->l_data); - b->l_data += sizeof(float); - } else if (type == 'd') { - b->data[b->l_data++] = 'd'; - double_to_le(strtod(q, &q), b->data + b->l_data); - b->l_data += sizeof(double); - } else if (type == 'Z' || type == 'H') { - char *end = strchr(q, '\t'); - if (!end) end = q + strlen(q); - _parse_err(type == 'H' && ((end-q)&1) != 0, - "hex field does not have an even number of digits"); - b->data[b->l_data++] = type; - if (possibly_expand_bam_data(b, end - q + 1) < 0) goto err_ret; - memcpy(b->data + b->l_data, q, end - q); - b->l_data += end - q; - b->data[b->l_data++] = '\0'; - q = end; - } else if (type == 'B') { - uint32_t n; - char *r; - type = *q++; // q points to the first ',' following the typing byte - _parse_err(*q && *q != ',' && *q != '\t', - "B aux field type not followed by ','"); - - for (r = q, n = 0; *r > '\t'; ++r) - if (*r == ',') ++n; - - if (sam_parse_B_vals(type, n, q, &q, r, b) < 0) - goto err_ret; - } else _parse_err(1, "unrecognized type %s", hts_strprint(logbuf, sizeof logbuf, '\'', &type, 1)); - - while (*q > '\t') { q++; } // Skip any junk to next tab - q++; - } - - _parse_err(!lenient && overflow != 0, "numeric value out of allowed range"); -#undef _parse_err - - return 0; - -err_ret: - return -2; -} - -int sam_parse1(kstring_t *s, sam_hdr_t *h, bam1_t *b) -{ -#define _read_token(_p) (_p); do { char *tab = strchr((_p), '\t'); if (!tab) goto err_ret; *tab = '\0'; (_p) = tab + 1; } while (0) - -#if HTS_ALLOW_UNALIGNED != 0 && ULONG_MAX == 0xffffffffffffffff - -// Macro that operates on 64-bits at a time. -#define COPY_MINUS_N(to,from,n,l,failed) \ - do { \ - uint64_u *from8 = (uint64_u *)(from); \ - uint64_u *to8 = (uint64_u *)(to); \ - uint64_t uflow = 0; \ - size_t l8 = (l)>>3, i; \ - for (i = 0; i < l8; i++) { \ - to8[i] = from8[i] - (n)*0x0101010101010101UL; \ - uflow |= to8[i]; \ - } \ - for (i<<=3; i < (l); ++i) { \ - to[i] = from[i] - (n); \ - uflow |= to[i]; \ - } \ - failed = (uflow & 0x8080808080808080UL) > 0; \ - } while (0) - -#else - -// Basic version which operates a byte at a time -#define COPY_MINUS_N(to,from,n,l,failed) do { \ - uint8_t uflow = 0; \ - for (i = 0; i < (l); ++i) { \ - (to)[i] = (from)[i] - (n); \ - uflow |= (uint8_t) (to)[i]; \ - } \ - failed = (uflow & 0x80) > 0; \ - } while (0) - -#endif - -#define _get_mem(type_t, x, b, l) if (possibly_expand_bam_data((b), (l)) < 0) goto err_ret; *(x) = (type_t*)((b)->data + (b)->l_data); (b)->l_data += (l) -#define _parse_err(cond, ...) do { if (cond) { hts_log_error(__VA_ARGS__); goto err_ret; } } while (0) -#define _parse_warn(cond, ...) do { if (cond) { hts_log_warning(__VA_ARGS__); } } while (0) - - uint8_t *t; - - char *p = s->s, *q; - int i, overflow = 0; - char logbuf[40]; - hts_pos_t cigreflen; - bam1_core_t *c = &b->core; - - b->l_data = 0; - memset(c, 0, 32); - - // qname - q = _read_token(p); - - _parse_warn(p - q <= 1, "empty query name"); - _parse_err(p - q > 255, "query name too long"); - // resize large enough for name + extranul - if (possibly_expand_bam_data(b, (p - q) + 4) < 0) goto err_ret; - memcpy(b->data + b->l_data, q, p-q); b->l_data += p-q; - - c->l_extranul = (4 - (b->l_data & 3)) & 3; - memcpy(b->data + b->l_data, "\0\0\0\0", c->l_extranul); - b->l_data += c->l_extranul; - - c->l_qname = p - q + c->l_extranul; - - // flag - c->flag = parse_sam_flag(p, &p, &overflow); - if (*p++ != '\t') goto err_ret; // malformated flag - - // chr - q = _read_token(p); - if (strcmp(q, "*")) { - _parse_err(h->n_targets == 0, "no SQ lines present in the header"); - c->tid = bam_name2id(h, q); - _parse_err(c->tid < -1, "failed to parse header"); - _parse_warn(c->tid < 0, "unrecognized reference name %s; treated as unmapped", hts_strprint(logbuf, sizeof logbuf, '"', q, SIZE_MAX)); - } else c->tid = -1; - - // pos - c->pos = hts_str2uint(p, &p, 63, &overflow) - 1; - if (*p++ != '\t') goto err_ret; - if (c->pos < 0 && c->tid >= 0) { - _parse_warn(1, "mapped query cannot have zero coordinate; treated as unmapped"); - c->tid = -1; - } - if (c->tid < 0) c->flag |= BAM_FUNMAP; - - // mapq - c->qual = hts_str2uint(p, &p, 8, &overflow); - if (*p++ != '\t') goto err_ret; - // cigar - if (*p != '*') { - uint32_t *cigar = NULL; - int old_l_data = b->l_data; - int n_cigar = bam_parse_cigar(p, &p, b); - if (n_cigar < 1 || *p++ != '\t') goto err_ret; - cigar = (uint32_t *)(b->data + old_l_data); - - // can't use bam_endpos() directly as some fields not yet set up - cigreflen = (!(c->flag&BAM_FUNMAP))? bam_cigar2rlen(c->n_cigar, cigar) : 1; - if (cigreflen == 0) cigreflen = 1; - } else { - _parse_warn(!(c->flag&BAM_FUNMAP), "mapped query must have a CIGAR; treated as unmapped"); - c->flag |= BAM_FUNMAP; - q = _read_token(p); - cigreflen = 1; - } - _parse_err(HTS_POS_MAX - cigreflen <= c->pos, - "read ends beyond highest supported position"); - c->bin = hts_reg2bin(c->pos, c->pos + cigreflen, 14, 5); - // mate chr - q = _read_token(p); - if (strcmp(q, "=") == 0) { - c->mtid = c->tid; - } else if (strcmp(q, "*") == 0) { - c->mtid = -1; - } else { - c->mtid = bam_name2id(h, q); - _parse_err(c->mtid < -1, "failed to parse header"); - _parse_warn(c->mtid < 0, "unrecognized mate reference name %s; treated as unmapped", hts_strprint(logbuf, sizeof logbuf, '"', q, SIZE_MAX)); - } - // mpos - c->mpos = hts_str2uint(p, &p, 63, &overflow) - 1; - if (*p++ != '\t') goto err_ret; - if (c->mpos < 0 && c->mtid >= 0) { - _parse_warn(1, "mapped mate cannot have zero coordinate; treated as unmapped"); - c->mtid = -1; - } - // tlen - c->isize = hts_str2int(p, &p, 64, &overflow); - if (*p++ != '\t') goto err_ret; - // seq - q = _read_token(p); - if (strcmp(q, "*")) { - _parse_err(p - q - 1 > INT32_MAX, "read sequence is too long"); - c->l_qseq = p - q - 1; - hts_pos_t ql = bam_cigar2qlen(c->n_cigar, (uint32_t*)(b->data + c->l_qname)); - _parse_err(c->n_cigar && ql != c->l_qseq, "CIGAR and query sequence are of different length"); - i = (c->l_qseq + 1) >> 1; - _get_mem(uint8_t, &t, b, i); - - unsigned int lqs2 = c->l_qseq&~1, i; - for (i = 0; i < lqs2; i+=2) - t[i>>1] = (seq_nt16_table[(unsigned char)q[i]] << 4) | seq_nt16_table[(unsigned char)q[i+1]]; - for (; i < c->l_qseq; ++i) - t[i>>1] = seq_nt16_table[(unsigned char)q[i]] << ((~i&1)<<2); - } else c->l_qseq = 0; - // qual - _get_mem(uint8_t, &t, b, c->l_qseq); - if (p[0] == '*' && (p[1] == '\t' || p[1] == '\0')) { - memset(t, 0xff, c->l_qseq); - p += 2; - } else { - int failed = 0; - _parse_err(s->l - (p - s->s) < c->l_qseq - || (p[c->l_qseq] != '\t' && p[c->l_qseq] != '\0'), - "SEQ and QUAL are of different length"); - COPY_MINUS_N(t, p, 33, c->l_qseq, failed); - _parse_err(failed, "invalid QUAL character"); - p += c->l_qseq + 1; - } - - // aux - if (aux_parse(p, s->s + s->l, b, 0, NULL) < 0) - goto err_ret; - - if (bam_tag2cigar(b, 1, 1) < 0) - return -2; - return 0; - -#undef _parse_warn -#undef _parse_err -#undef _get_mem -#undef _read_token -err_ret: - return -2; -} - -static uint32_t read_ncigar(const char *q) { - uint32_t n_cigar = 0; - for (; *q && *q != '\t'; ++q) - if (!isdigit_c(*q)) ++n_cigar; - if (!n_cigar) { - hts_log_error("No CIGAR operations"); - return 0; - } - if (n_cigar >= 2147483647) { - hts_log_error("Too many CIGAR operations"); - return 0; - } - - return n_cigar; -} - -/*! @function - @abstract Parse a CIGAR string into preallocated a uint32_t array - @param in [in] pointer to the source string - @param a_cigar [out] address of the destination uint32_t buffer - @return number of processed input characters; 0 on error - */ -static int parse_cigar(const char *in, uint32_t *a_cigar, uint32_t n_cigar) { - int i, overflow = 0; - const char *p = in; - for (i = 0; i < n_cigar; i++) { - uint32_t len; - int op; - char *q; - len = hts_str2uint(p, &q, 28, &overflow)< *a_mem) { - uint32_t *a_tmp = realloc(*a_cigar, n_cigar*sizeof(**a_cigar)); - if (a_tmp) { - *a_cigar = a_tmp; - *a_mem = n_cigar; - } else { - hts_log_error("Memory allocation error"); - return -1; - } - } - - if (!(diff = parse_cigar(in, *a_cigar, n_cigar))) return -1; - if (end) *end = (char *)in+diff; - - return n_cigar; -} - -ssize_t bam_parse_cigar(const char *in, char **end, bam1_t *b) { - size_t n_cigar = 0; - int diff; - - if (!in || !b) { - hts_log_error("NULL pointer arguments"); - return -1; - } - if (end) *end = (char *)in; - - n_cigar = (*in == '*') ? 0 : read_ncigar(in); - if (!n_cigar && b->core.n_cigar == 0) { - if (end) *end = (char *)in+1; - return 0; - } - - ssize_t cig_diff = n_cigar - b->core.n_cigar; - if (cig_diff > 0 && - possibly_expand_bam_data(b, cig_diff * sizeof(uint32_t)) < 0) { - hts_log_error("Memory allocation error"); - return -1; - } - - uint32_t *cig = bam_get_cigar(b); - if ((uint8_t *)cig != b->data + b->l_data) { - // Modifying an BAM existing BAM record - uint8_t *seq = bam_get_seq(b); - memmove(cig + n_cigar, seq, (b->data + b->l_data) - seq); - } - - if (n_cigar) { - if (!(diff = parse_cigar(in, cig, n_cigar))) - return -1; - } else { - diff = 1; // handle "*" - } - - b->l_data += cig_diff * sizeof(uint32_t); - b->core.n_cigar = n_cigar; - if (end) *end = (char *)in + diff; - - return n_cigar; -} - -/* - * ----------------------------------------------------------------------------- - * SAM threading - */ -// Size of SAM text block (reading) -#define SAM_NBYTES 240000 - -// Number of BAM records (writing, up to NB_mem in size) -#define SAM_NBAM 1000 - -struct SAM_state; - -// Output job - a block of BAM records -typedef struct sp_bams { - struct sp_bams *next; - int serial; - - bam1_t *bams; - int nbams, abams; // used and alloc for bams[] array - size_t bam_mem; // very approximate total size - - struct SAM_state *fd; -} sp_bams; - -// Input job - a block of SAM text -typedef struct sp_lines { - struct sp_lines *next; - int serial; - - char *data; - int data_size; - int alloc; - - struct SAM_state *fd; - sp_bams *bams; -} sp_lines; - -enum sam_cmd { - SAM_NONE = 0, - SAM_CLOSE, - SAM_CLOSE_DONE, -}; - -typedef struct SAM_state { - sam_hdr_t *h; - - hts_tpool *p; - int own_pool; - pthread_mutex_t lines_m; - hts_tpool_process *q; - pthread_t dispatcher; - int dispatcher_set; - - sp_lines *lines; - sp_bams *bams; - - sp_bams *curr_bam; - int curr_idx; - int serial; - - // Be warned: moving these mutexes around in this struct can reduce - // threading performance by up to 70%! - pthread_mutex_t command_m; - pthread_cond_t command_c; - enum sam_cmd command; - - // One of the E* errno codes - int errcode; - - htsFile *fp; -} SAM_state; - -// Returns a SAM_state struct from a generic hFILE. -// -// Returns NULL on failure. -static SAM_state *sam_state_create(htsFile *fp) { - // Ideally sam_open wouldn't be a #define to hts_open but instead would - // be a redirect call with an additional 'S' mode. This in turn would - // correctly set the designed format to sam instead of a generic - // text_format. - if (fp->format.format != sam && fp->format.format != text_format) - return NULL; - - SAM_state *fd = calloc(1, sizeof(*fd)); - if (!fd) - return NULL; - - fp->state = fd; - fd->fp = fp; - - return fd; -} - -static int sam_format1_append(const bam_hdr_t *h, const bam1_t *b, kstring_t *str); -static void *sam_format_worker(void *arg); - -static void sam_state_err(SAM_state *fd, int errcode) { - pthread_mutex_lock(&fd->command_m); - if (!fd->errcode) - fd->errcode = errcode; - pthread_mutex_unlock(&fd->command_m); -} - -static void sam_free_sp_bams(sp_bams *b) { - if (!b) - return; - - if (b->bams) { - int i; - for (i = 0; i < b->abams; i++) { - if (b->bams[i].data) - free(b->bams[i].data); - } - free(b->bams); - } - free(b); -} - -// Destroys the state produce by sam_state_create. -int sam_state_destroy(htsFile *fp) { - int ret = 0; - - if (!fp->state) - return 0; - - SAM_state *fd = fp->state; - if (fd->p) { - if (fd->h) { - // Notify sam_dispatcher we're closing - pthread_mutex_lock(&fd->command_m); - if (fd->command != SAM_CLOSE_DONE) - fd->command = SAM_CLOSE; - pthread_cond_signal(&fd->command_c); - ret = -fd->errcode; - if (fd->q) - hts_tpool_wake_dispatch(fd->q); // unstick the reader - - if (!fp->is_write && fd->q && fd->dispatcher_set) { - for (;;) { - // Avoid deadlocks with dispatcher - if (fd->command == SAM_CLOSE_DONE) - break; - hts_tpool_wake_dispatch(fd->q); - pthread_mutex_unlock(&fd->command_m); - usleep(10000); - pthread_mutex_lock(&fd->command_m); - } - } - pthread_mutex_unlock(&fd->command_m); - - if (fp->is_write) { - // Dispatch the last partial block. - sp_bams *gb = fd->curr_bam; - if (!ret && gb && gb->nbams > 0 && fd->q) - ret = hts_tpool_dispatch(fd->p, fd->q, sam_format_worker, gb); - - // Flush and drain output - if (fd->q) - hts_tpool_process_flush(fd->q); - pthread_mutex_lock(&fd->command_m); - if (!ret) ret = -fd->errcode; - pthread_mutex_unlock(&fd->command_m); - - while (!ret && fd->q && !hts_tpool_process_empty(fd->q)) { - usleep(10000); - pthread_mutex_lock(&fd->command_m); - ret = -fd->errcode; - // not empty but shutdown implies error - if (hts_tpool_process_is_shutdown(fd->q) && !ret) - ret = EIO; - pthread_mutex_unlock(&fd->command_m); - } - if (fd->q) - hts_tpool_process_shutdown(fd->q); - } - - // Wait for it to acknowledge - if (fd->dispatcher_set) - pthread_join(fd->dispatcher, NULL); - if (!ret) ret = -fd->errcode; - } - - // Tidy up memory - if (fd->q) - hts_tpool_process_destroy(fd->q); - - if (fd->own_pool && fp->format.compression == no_compression) { - hts_tpool_destroy(fd->p); - fd->p = NULL; - } - pthread_mutex_destroy(&fd->lines_m); - pthread_mutex_destroy(&fd->command_m); - pthread_cond_destroy(&fd->command_c); - - sp_lines *l = fd->lines; - while (l) { - sp_lines *n = l->next; - free(l->data); - free(l); - l = n; - } - - sp_bams *b = fd->bams; - while (b) { - if (fd->curr_bam == b) - fd->curr_bam = NULL; - sp_bams *n = b->next; - sam_free_sp_bams(b); - b = n; - } - - if (fd->curr_bam) - sam_free_sp_bams(fd->curr_bam); - - // Decrement counter by one, maybe destroying too. - // This is to permit the caller using bam_hdr_destroy - // before sam_close without triggering decode errors - // in the background threads. - bam_hdr_destroy(fd->h); - } - - free(fp->state); - fp->state = NULL; - return ret; -} - -// Cleanup function - job for sam_parse_worker; result for sam_format_worker -static void cleanup_sp_lines(void *arg) { - sp_lines *gl = (sp_lines *)arg; - if (!gl) return; - - // Should always be true for lines passed to / from thread workers. - assert(gl->next == NULL); - - free(gl->data); - sam_free_sp_bams(gl->bams); - free(gl); -} - -// Run from one of the worker threads. -// Convert a passed in array of lines to array of BAMs, returning -// the result back to the thread queue. -static void *sam_parse_worker(void *arg) { - sp_lines *gl = (sp_lines *)arg; - sp_bams *gb = NULL; - char *lines = gl->data; - int i; - bam1_t *b; - SAM_state *fd = gl->fd; - - // Use a block of BAM structs we had earlier if available. - pthread_mutex_lock(&fd->lines_m); - if (fd->bams) { - gb = fd->bams; - fd->bams = gb->next; - } - pthread_mutex_unlock(&fd->lines_m); - - if (gb == NULL) { - gb = calloc(1, sizeof(*gb)); - if (!gb) { - return NULL; - } - gb->abams = 100; - gb->bams = b = calloc(gb->abams, sizeof(*b)); - if (!gb->bams) { - sam_state_err(fd, ENOMEM); - goto err; - } - gb->nbams = 0; - gb->bam_mem = 0; - } - gb->serial = gl->serial; - gb->next = NULL; - - b = (bam1_t *)gb->bams; - if (!b) { - sam_state_err(fd, ENOMEM); - goto err; - } - - i = 0; - char *cp = lines, *cp_end = lines + gl->data_size; - while (cp < cp_end) { - if (i >= gb->abams) { - int old_abams = gb->abams; - gb->abams *= 2; - b = (bam1_t *)realloc(gb->bams, gb->abams*sizeof(bam1_t)); - if (!b) { - gb->abams /= 2; - sam_state_err(fd, ENOMEM); - goto err; - } - memset(&b[old_abams], 0, (gb->abams - old_abams)*sizeof(*b)); - gb->bams = b; - } - - // Ideally we'd get sam_parse1 to return the number of - // bytes decoded and to be able to stop on newline as - // well as \0. - // - // We can then avoid the additional strchr loop. - // It's around 6% of our CPU cost, albeit threadable. - // - // However this is an API change so for now we copy. - - char *nl = strchr(cp, '\n'); - char *line_end; - if (nl) { - line_end = nl; - if (line_end > cp && *(line_end - 1) == '\r') - line_end--; - nl++; - } else { - nl = line_end = cp_end; - } - *line_end = '\0'; - kstring_t ks = { line_end - cp, gl->alloc, cp }; - if (sam_parse1(&ks, fd->h, &b[i]) < 0) { - sam_state_err(fd, errno ? errno : EIO); - cleanup_sp_lines(gl); - goto err; - } - - cp = nl; - i++; - } - gb->nbams = i; - - pthread_mutex_lock(&fd->lines_m); - gl->next = fd->lines; - fd->lines = gl; - pthread_mutex_unlock(&fd->lines_m); - return gb; - - err: - sam_free_sp_bams(gb); - return NULL; -} - -static void *sam_parse_eof(void *arg) { - return NULL; -} - -// Cleanup function - result for sam_parse_worker; job for sam_format_worker -static void cleanup_sp_bams(void *arg) { - sam_free_sp_bams((sp_bams *) arg); -} - -// Runs in its own thread. -// Reads a block of text (SAM) and sends a new job to the thread queue to -// translate this to BAM. -static void *sam_dispatcher_read(void *vp) { - htsFile *fp = vp; - kstring_t line = {0}; - int line_frag = 0; - SAM_state *fd = fp->state; - sp_lines *l = NULL; - - // Pre-allocate buffer for left-over bits of line (exact size doesn't - // matter as it will grow if necessary). - if (ks_resize(&line, 1000) < 0) - goto err; - - for (;;) { - // Check for command - pthread_mutex_lock(&fd->command_m); - switch (fd->command) { - - case SAM_CLOSE: - pthread_cond_signal(&fd->command_c); - pthread_mutex_unlock(&fd->command_m); - hts_tpool_process_shutdown(fd->q); - goto tidyup; - - default: - break; - } - pthread_mutex_unlock(&fd->command_m); - - pthread_mutex_lock(&fd->lines_m); - if (fd->lines) { - // reuse existing line buffer - l = fd->lines; - fd->lines = l->next; - } - pthread_mutex_unlock(&fd->lines_m); - - if (l == NULL) { - // none to reuse, to create a new one - l = calloc(1, sizeof(*l)); - if (!l) - goto err; - l->alloc = SAM_NBYTES; - l->data = malloc(l->alloc+8); // +8 for optimisation in sam_parse1 - if (!l->data) { - free(l); - l = NULL; - goto err; - } - l->fd = fd; - } - l->next = NULL; - - if (l->alloc < line_frag+SAM_NBYTES/2) { - char *rp = realloc(l->data, line_frag+SAM_NBYTES/2 +8); - if (!rp) - goto err; - l->alloc = line_frag+SAM_NBYTES/2; - l->data = rp; - } - memcpy(l->data, line.s, line_frag); - - l->data_size = line_frag; - ssize_t nbytes; - longer_line: - if (fp->is_bgzf) - nbytes = bgzf_read(fp->fp.bgzf, l->data + line_frag, l->alloc - line_frag); - else - nbytes = hread(fp->fp.hfile, l->data + line_frag, l->alloc - line_frag); - if (nbytes < 0) { - sam_state_err(fd, errno ? errno : EIO); - goto err; - } else if (nbytes == 0) - break; // EOF - l->data_size += nbytes; - - // trim to last \n. Maybe \r\n, but that's still fine - if (nbytes == l->alloc - line_frag) { - char *cp_end = l->data + l->data_size; - char *cp = cp_end-1; - - while (cp > (char *)l->data && *cp != '\n') - cp--; - - // entire buffer is part of a single line - if (cp == l->data) { - line_frag = l->data_size; - char *rp = realloc(l->data, l->alloc * 2 + 8); - if (!rp) - goto err; - l->alloc *= 2; - l->data = rp; - assert(l->alloc >= l->data_size); - assert(l->alloc >= line_frag); - assert(l->alloc >= l->alloc - line_frag); - goto longer_line; - } - cp++; - - // line holds the remainder of our line. - if (ks_resize(&line, cp_end - cp) < 0) - goto err; - memcpy(line.s, cp, cp_end - cp); - line_frag = cp_end - cp; - l->data_size = l->alloc - line_frag; - } else { - // out of buffer - line_frag = 0; - } - - l->serial = fd->serial++; - //fprintf(stderr, "Dispatching %p, %d bytes, serial %d\n", l, l->data_size, l->serial); - if (hts_tpool_dispatch3(fd->p, fd->q, sam_parse_worker, l, - cleanup_sp_lines, cleanup_sp_bams, 0) < 0) - goto err; - pthread_mutex_lock(&fd->command_m); - if (fd->command == SAM_CLOSE) { - pthread_mutex_unlock(&fd->command_m); - l = NULL; - goto tidyup; - } - l = NULL; // Now "owned" by sam_parse_worker() - pthread_mutex_unlock(&fd->command_m); - } - - if (hts_tpool_dispatch(fd->p, fd->q, sam_parse_eof, NULL) < 0) - goto err; - - // At EOF, wait for close request. - // (In future if we add support for seek, this is where we need to catch it.) - for (;;) { - pthread_mutex_lock(&fd->command_m); - if (fd->command == SAM_NONE) - pthread_cond_wait(&fd->command_c, &fd->command_m); - switch (fd->command) { - case SAM_CLOSE: - pthread_cond_signal(&fd->command_c); - pthread_mutex_unlock(&fd->command_m); - hts_tpool_process_shutdown(fd->q); - goto tidyup; - - default: - pthread_mutex_unlock(&fd->command_m); - break; - } - } - - tidyup: - pthread_mutex_lock(&fd->command_m); - fd->command = SAM_CLOSE_DONE; - pthread_cond_signal(&fd->command_c); - pthread_mutex_unlock(&fd->command_m); - - if (l) { - pthread_mutex_lock(&fd->lines_m); - l->next = fd->lines; - fd->lines = l; - pthread_mutex_unlock(&fd->lines_m); - } - free(line.s); - - return NULL; - - err: - sam_state_err(fd, errno ? errno : ENOMEM); - hts_tpool_process_shutdown(fd->q); - goto tidyup; -} - -// Runs in its own thread. -// Takes encoded blocks of SAM off the thread results queue and writes them -// to our output stream. -static void *sam_dispatcher_write(void *vp) { - htsFile *fp = vp; - SAM_state *fd = fp->state; - hts_tpool_result *r; - - // Iterates until result queue is shutdown, where it returns NULL. - while ((r = hts_tpool_next_result_wait(fd->q))) { - sp_lines *gl = (sp_lines *)hts_tpool_result_data(r); - if (!gl) { - sam_state_err(fd, ENOMEM); - goto err; - } - - if (fp->idx) { - sp_bams *gb = gl->bams; - int i = 0, count = 0; - while (i < gl->data_size) { - int j = i; - while (i < gl->data_size && gl->data[i] != '\n') - i++; - if (i < gl->data_size) - i++; - - if (fp->is_bgzf) { - if (bgzf_flush_try(fp->fp.bgzf, i-j) < 0) - goto err; - if (bgzf_write(fp->fp.bgzf, &gl->data[j], i-j) != i-j) - goto err; - } else { - if (hwrite(fp->fp.hfile, &gl->data[j], i-j) != i-j) - goto err; - } - - bam1_t *b = &gb->bams[count++]; - if (fp->format.compression == bgzf) { - if (bgzf_idx_push(fp->fp.bgzf, fp->idx, - b->core.tid, b->core.pos, bam_endpos(b), - bgzf_tell(fp->fp.bgzf), - !(b->core.flag&BAM_FUNMAP)) < 0) { - sam_state_err(fd, errno ? errno : ENOMEM); - hts_log_error("Read '%s' with ref_name='%s', ref_length=%"PRIhts_pos", flags=%d, pos=%"PRIhts_pos" cannot be indexed", - bam_get_qname(b), sam_hdr_tid2name(fd->h, b->core.tid), sam_hdr_tid2len(fd->h, b->core.tid), b->core.flag, b->core.pos+1); - goto err; - } - } else { - if (hts_idx_push(fp->idx, b->core.tid, b->core.pos, bam_endpos(b), - bgzf_tell(fp->fp.bgzf), !(b->core.flag&BAM_FUNMAP)) < 0) { - sam_state_err(fd, errno ? errno : ENOMEM); - hts_log_error("Read '%s' with ref_name='%s', ref_length=%"PRIhts_pos", flags=%d, pos=%"PRIhts_pos" cannot be indexed", - bam_get_qname(b), sam_hdr_tid2name(fd->h, b->core.tid), sam_hdr_tid2len(fd->h, b->core.tid), b->core.flag, b->core.pos+1); - goto err; - } - } - } - - assert(count == gb->nbams); - - // Add bam array to free-list - pthread_mutex_lock(&fd->lines_m); - gb->next = fd->bams; - fd->bams = gl->bams; - gl->bams = NULL; - pthread_mutex_unlock(&fd->lines_m); - } else { - if (fp->is_bgzf) { - // We keep track of how much in the current block we have - // remaining => R. We look for the last newline in input - // [i] to [i+R], backwards => position N. - // - // If we find a newline, we write out bytes i to N. - // We know we cannot fit the next record in this bgzf block, - // so we flush what we have and copy input N to i+R into - // the start of a new block, and recompute a new R for that. - // - // If we don't find a newline (i==N) then we cannot extend - // the current block at all, so flush whatever is in it now - // if it ends on a newline. - // We still copy i(==N) to i+R to the next block and - // continue as before with a new R. - // - // The only exception on the flush is when we run out of - // data in the input. In that case we skip it as we don't - // yet know if the next record will fit. - // - // Both conditions share the same code here: - // - Look for newline (pos N) - // - Write i to N (which maybe 0) - // - Flush if block ends on newline and not end of input - // - write N to i+R - - int i = 0; - BGZF *fb = fp->fp.bgzf; - while (i < gl->data_size) { - // remaining space in block - int R = BGZF_BLOCK_SIZE - fb->block_offset; - int eod = 0; - if (R > gl->data_size-i) - R = gl->data_size-i, eod = 1; - - // Find last newline in input data - int N = i + R; - while (--N > i) { - if (gl->data[N] == '\n') - break; - } - - if (N != i) { - // Found a newline - N++; - if (bgzf_write(fb, &gl->data[i], N-i) != N-i) - goto err; - } - - // Flush bgzf block - int b_off = fb->block_offset; - if (!eod && b_off && - ((char *)fb->uncompressed_block)[b_off-1] == '\n') - if (bgzf_flush_try(fb, BGZF_BLOCK_SIZE) < 0) - goto err; - - // Copy from N onwards into next block - if (i+R > N) - if (bgzf_write(fb, &gl->data[N], i+R - N) - != i+R - N) - goto err; - - i = i+R; - } - } else { - if (hwrite(fp->fp.hfile, gl->data, gl->data_size) != gl->data_size) - goto err; - } - } - - hts_tpool_delete_result(r, 0); - - // Also updated by main thread - pthread_mutex_lock(&fd->lines_m); - gl->next = fd->lines; - fd->lines = gl; - pthread_mutex_unlock(&fd->lines_m); - } - - sam_state_err(fd, 0); // success - hts_tpool_process_shutdown(fd->q); - return NULL; - - err: - sam_state_err(fd, errno ? errno : EIO); - return (void *)-1; -} - -// Run from one of the worker threads. -// Convert a passed in array of BAMs (sp_bams) and converts to a block -// of text SAM records (sp_lines). -static void *sam_format_worker(void *arg) { - sp_bams *gb = (sp_bams *)arg; - sp_lines *gl = NULL; - int i; - SAM_state *fd = gb->fd; - htsFile *fp = fd->fp; - - // Use a block of SAM strings we had earlier if available. - pthread_mutex_lock(&fd->lines_m); - if (fd->lines) { - gl = fd->lines; - fd->lines = gl->next; - } - pthread_mutex_unlock(&fd->lines_m); - - if (gl == NULL) { - gl = calloc(1, sizeof(*gl)); - if (!gl) { - sam_state_err(fd, ENOMEM); - return NULL; - } - gl->alloc = gl->data_size = 0; - gl->data = NULL; - } - gl->serial = gb->serial; - gl->next = NULL; - - kstring_t ks = {0, gl->alloc, gl->data}; - - for (i = 0; i < gb->nbams; i++) { - if (sam_format1_append(fd->h, &gb->bams[i], &ks) < 0) { - sam_state_err(fd, errno ? errno : EIO); - goto err; - } - kputc('\n', &ks); - } - - pthread_mutex_lock(&fd->lines_m); - gl->data_size = ks.l; - gl->alloc = ks.m; - gl->data = ks.s; - - if (fp->idx) { - // Keep hold of the bam array a little longer as - // sam_dispatcher_write needs to use them for building the index. - gl->bams = gb; - } else { - // Add bam array to free-list - gb->next = fd->bams; - fd->bams = gb; - } - pthread_mutex_unlock(&fd->lines_m); - - return gl; - - err: - // Possible race between this and fd->curr_bam. - // Easier to not free and leave it on the input list so it - // gets freed there instead? - // sam_free_sp_bams(gb); - if (gl) { - free(gl->data); - free(gl); - } - return NULL; -} - -int sam_set_thread_pool(htsFile *fp, htsThreadPool *p) { - if (fp->state) - return 0; - - if (!(fp->state = sam_state_create(fp))) - return -1; - SAM_state *fd = (SAM_state *)fp->state; - - pthread_mutex_init(&fd->lines_m, NULL); - pthread_mutex_init(&fd->command_m, NULL); - pthread_cond_init(&fd->command_c, NULL); - fd->p = p->pool; - int qsize = p->qsize; - if (!qsize) - qsize = 2*hts_tpool_size(fd->p); - fd->q = hts_tpool_process_init(fd->p, qsize, 0); - if (!fd->q) { - sam_state_destroy(fp); - return -1; - } - - if (fp->format.compression == bgzf) - return bgzf_thread_pool(fp->fp.bgzf, p->pool, p->qsize); - - return 0; -} - -int sam_set_threads(htsFile *fp, int nthreads) { - if (nthreads <= 0) - return 0; - - htsThreadPool p; - p.pool = hts_tpool_init(nthreads); - p.qsize = nthreads*2; - - int ret = sam_set_thread_pool(fp, &p); - if (ret < 0) - return ret; - - SAM_state *fd = (SAM_state *)fp->state; - fd->own_pool = 1; - - return 0; -} - -typedef struct { - kstring_t name; - kstring_t comment; // NB: pointer into name, do not free - kstring_t seq; - kstring_t qual; - int casava; - int aux; - int rnum; - char BC[3]; // aux tag ID for barcode - khash_t(tag) *tags; // which aux tags to use (if empty, use all). - char nprefix; - int sra_names; -} fastq_state; - -// Initialise fastq state. -// Name char of '@' or '>' distinguishes fastq vs fasta variant -static fastq_state *fastq_state_init(int name_char) { - fastq_state *x = (fastq_state *)calloc(1, sizeof(*x)); - if (!x) - return NULL; - strcpy(x->BC, "BC"); - x->nprefix = name_char; - - return x; -} - -void fastq_state_destroy(htsFile *fp) { - if (fp->state) { - fastq_state *x = (fastq_state *)fp->state; - if (x->tags) - kh_destroy(tag, x->tags); - ks_free(&x->name); - ks_free(&x->seq); - ks_free(&x->qual); - free(fp->state); - } -} - -int fastq_state_set(samFile *fp, enum hts_fmt_option opt, ...) { - va_list args; - - if (!fp) - return -1; - if (!fp->state) - if (!(fp->state = fastq_state_init(fp->format.format == fastq_format - ? '@' : '>'))) - return -1; - - fastq_state *x = (fastq_state *)fp->state; - - switch (opt) { - case FASTQ_OPT_CASAVA: - x->casava = 1; - break; - - case FASTQ_OPT_NAME2: - x->sra_names = 1; - break; - - case FASTQ_OPT_AUX: { - va_start(args, opt); - x->aux = 1; - char *tag = va_arg(args, char *); - va_end(args); - if (tag && strcmp(tag, "1") != 0) { - if (!x->tags) - if (!(x->tags = kh_init(tag))) - return -1; - - size_t i, tlen = strlen(tag); - for (i = 0; i+3 <= tlen+1; i += 3) { - if (tag[i+0] == ',' || tag[i+1] == ',' || - !(tag[i+2] == ',' || tag[i+2] == '\0')) { - hts_log_warning("Bad tag format '%.3s'; skipping option", tag+i); - break; - } - int ret, tcode = tag[i+0]*256 + tag[i+1]; - kh_put(tag, x->tags, tcode, &ret); - if (ret < 0) - return -1; - } - } - break; - } - - case FASTQ_OPT_BARCODE: { - va_start(args, opt); - char *bc = va_arg(args, char *); - va_end(args); - strncpy(x->BC, bc, 2); - x->BC[2] = 0; - break; - } - - case FASTQ_OPT_RNUM: - x->rnum = 1; - break; - - default: - break; - } - return 0; -} - -static int fastq_parse1(htsFile *fp, bam1_t *b) { - fastq_state *x = (fastq_state *)fp->state; - size_t i, l; - int ret = 0; - - if (fp->format.format == fasta_format && fp->line.s) { - // For FASTA we've already read the >name line; steal it - // Not the most efficient, but we don't optimise for fasta reading. - if (fp->line.l == 0) - return -1; // EOF - - free(x->name.s); - x->name = fp->line; - fp->line.l = fp->line.m = 0; - fp->line.s = NULL; - } else { - // Read a FASTQ format entry. - ret = hts_getline(fp, KS_SEP_LINE, &x->name); - if (ret == -1) - return -1; // EOF - else if (ret < -1) - return ret; // ERR - } - - // Name - if (*x->name.s != x->nprefix) - return -2; - - // Reverse the SRA strangeness of putting the run_name.number before - // the read name. - i = 0; - char *name = x->name.s+1; - if (x->sra_names) { - char *cp = strpbrk(x->name.s, " \t"); - if (cp) { - while (*cp == ' ' || *cp == '\t') - cp++; - *--cp = '@'; - i = cp - x->name.s; - name = cp+1; - } - } - - l = x->name.l; - char *s = x->name.s; - while (i < l && !isspace_c(s[i])) - i++; - if (i < l) { - s[i] = 0; - x->name.l = i++; - } - - // Comment; a kstring struct, but pointer into name line. (Do not free) - while (i < l && isspace_c(s[i])) - i++; - x->comment.s = s+i; - x->comment.l = l - i; - - // Seq - x->seq.l = 0; - for (;;) { - if ((ret = hts_getline(fp, KS_SEP_LINE, &fp->line)) < 0) - if (fp->format.format == fastq_format || ret < -1) - return -2; - if (ret == -1 || - *fp->line.s == (fp->format.format == fastq_format ? '+' : '>')) - break; - if (kputsn(fp->line.s, fp->line.l, &x->seq) < 0) - return -2; - } - - // Qual - if (fp->format.format == fastq_format) { - size_t remainder = x->seq.l; - x->qual.l = 0; - do { - if (hts_getline(fp, KS_SEP_LINE, &fp->line) < 0) - return -2; - if (fp->line.l > remainder) - return -2; - if (kputsn(fp->line.s, fp->line.l, &x->qual) < 0) - return -2; - remainder -= fp->line.l; - } while (remainder > 0); - - // Decr qual - for (i = 0; i < x->qual.l; i++) - x->qual.s[i] -= '!'; - } - - int flag = BAM_FUNMAP; int pflag = BAM_FMUNMAP | BAM_FPAIRED; - if (x->name.l > 2 && - x->name.s[x->name.l-2] == '/' && - isdigit_c(x->name.s[x->name.l-1])) { - switch(x->name.s[x->name.l-1]) { - case '1': flag |= BAM_FREAD1 | pflag; break; - case '2': flag |= BAM_FREAD2 | pflag; break; - default : flag |= BAM_FREAD1 | BAM_FREAD2 | pflag; break; - } - x->name.s[x->name.l-=2] = 0; - } - - // Convert to BAM - ret = bam_set1(b, - x->name.s + x->name.l - name, name, - flag, - -1, -1, 0, // ref '*', pos, mapq, - 0, NULL, // no cigar, - -1, -1, 0, // mate - x->seq.l, x->seq.s, x->qual.s, - 0); - - // Identify Illumina CASAVA strings. - // ::: - char *barcode = NULL; - int barcode_len = 0; - kstring_t *kc = &x->comment; - char *endptr; - if (x->casava && - // \d:[YN]:\d+:[ACGTN]+ - kc->l > 6 && (kc->s[1] | kc->s[3]) == ':' && isdigit_c(kc->s[0]) && - strtol(kc->s+4, &endptr, 10) >= 0 && endptr != kc->s+4 - && *endptr == ':') { - - // read num - switch(kc->s[0]) { - case '1': b->core.flag |= BAM_FREAD1 | pflag; break; - case '2': b->core.flag |= BAM_FREAD2 | pflag; break; - default : b->core.flag |= BAM_FREAD1 | BAM_FREAD2 | pflag; break; - } - - if (kc->s[2] == 'Y') - b->core.flag |= BAM_FQCFAIL; - - // Barcode, maybe numeric in which case we skip it - if (!isdigit_c(endptr[1])) { - barcode = endptr+1; - for (i = barcode - kc->s; i < kc->l; i++) - if (isspace_c(kc->s[i])) - break; - - kc->s[i] = 0; - barcode_len = i+1-(barcode - kc->s); - } - } - - if (ret >= 0 && barcode_len) - if (bam_aux_append(b, x->BC, 'Z', barcode_len, (uint8_t *)barcode) < 0) - ret = -2; - - if (!x->aux) - return ret; - - // Identify any SAM style aux tags in comments too. - if (aux_parse(&kc->s[barcode_len], kc->s + kc->l, b, 1, x->tags) < 0) - ret = -2; - - return ret; -} - -// Internal component of sam_read1 below -static inline int sam_read1_bam(htsFile *fp, sam_hdr_t *h, bam1_t *b) { - int ret = bam_read1(fp->fp.bgzf, b); - if (h && ret >= 0) { - if (b->core.tid >= h->n_targets || b->core.tid < -1 || - b->core.mtid >= h->n_targets || b->core.mtid < -1) { - errno = ERANGE; - return -3; - } - } - return ret; -} - -// Internal component of sam_read1 below -static inline int sam_read1_cram(htsFile *fp, sam_hdr_t *h, bam1_t **b) { - int ret = cram_get_bam_seq(fp->fp.cram, b); - if (ret < 0) - return cram_eof(fp->fp.cram) ? -1 : -2; - - if (bam_tag2cigar(*b, 1, 1) < 0) - return -2; - - return ret; -} - -// Internal component of sam_read1 below -static inline int sam_read1_sam(htsFile *fp, sam_hdr_t *h, bam1_t *b) { - int ret; - - // Consume 1st line after header parsing as it wasn't using peek - if (fp->line.l != 0) { - ret = sam_parse1(&fp->line, h, b); - fp->line.l = 0; - return ret; - } - - if (fp->state) { - SAM_state *fd = (SAM_state *)fp->state; - - if (fp->format.compression == bgzf && fp->fp.bgzf->seeked) { - // We don't support multi-threaded SAM parsing with seeks yet. - int ret; - if ((ret = sam_state_destroy(fp)) < 0) { - errno = -ret; - return -2; - } - if (bgzf_seek(fp->fp.bgzf, fp->fp.bgzf->seeked, SEEK_SET) < 0) - return -1; - fp->fp.bgzf->seeked = 0; - goto err_recover; - } - - if (!fd->h) { - fd->h = h; - fd->h->ref_count++; - // Ensure hrecs is initialised now as we don't want multiple - // threads trying to do this simultaneously. - if (!fd->h->hrecs && sam_hdr_fill_hrecs(fd->h) < 0) - return -2; - - // We can only do this once we've got a header - if (pthread_create(&fd->dispatcher, NULL, sam_dispatcher_read, - fp) != 0) - return -2; - fd->dispatcher_set = 1; - } - - if (fd->h != h) { - hts_log_error("SAM multi-threaded decoding does not support changing header"); - return -1; - } - - sp_bams *gb = fd->curr_bam; - if (!gb) { - if (fd->errcode) { - // In case reader failed - errno = fd->errcode; - return -2; - } - hts_tpool_result *r = hts_tpool_next_result_wait(fd->q); - if (!r) - return -2; - fd->curr_bam = gb = (sp_bams *)hts_tpool_result_data(r); - hts_tpool_delete_result(r, 0); - } - if (!gb) - return fd->errcode ? -2 : -1; - bam1_t *b_array = (bam1_t *)gb->bams; - if (fd->curr_idx < gb->nbams) - if (!bam_copy1(b, &b_array[fd->curr_idx++])) - return -2; - if (fd->curr_idx == gb->nbams) { - pthread_mutex_lock(&fd->lines_m); - gb->next = fd->bams; - fd->bams = gb; - pthread_mutex_unlock(&fd->lines_m); - - fd->curr_bam = NULL; - fd->curr_idx = 0; - } - - ret = 0; - - } else { - err_recover: - ret = hts_getline(fp, KS_SEP_LINE, &fp->line); - if (ret < 0) return ret; - - ret = sam_parse1(&fp->line, h, b); - fp->line.l = 0; - if (ret < 0) { - hts_log_warning("Parse error at line %lld", (long long)fp->lineno); - if (h && h->ignore_sam_err) goto err_recover; - } - } - - return ret; -} - -// Returns 0 on success, -// -1 on EOF, -// <-1 on error -int sam_read1(htsFile *fp, sam_hdr_t *h, bam1_t *b) -{ - int ret, pass_filter; - - do { - switch (fp->format.format) { - case bam: - ret = sam_read1_bam(fp, h, b); - break; - - case cram: - ret = sam_read1_cram(fp, h, &b); - break; - - case sam: - ret = sam_read1_sam(fp, h, b); - break; - - case fasta_format: - case fastq_format: { - fastq_state *x = (fastq_state *)fp->state; - if (!x) { - if (!(fp->state = fastq_state_init(fp->format.format - == fastq_format ? '@' : '>'))) - return -2; - } - - return fastq_parse1(fp, b); - } - - case empty_format: - errno = EPIPE; - return -3; - - default: - errno = EFTYPE; - return -3; - } - - pass_filter = (ret >= 0 && fp->filter) - ? sam_passes_filter(h, b, fp->filter) - : 1; - } while (pass_filter == 0); - - return pass_filter < 0 ? -2 : ret; -} - -// With gcc, -O3 or -ftree-loop-vectorize is really key here as otherwise -// this code isn't vectorised and runs far slower than is necessary (even -// with the restrict keyword being used). -static inline void HTS_OPT3 -add33(uint8_t *a, const uint8_t * b, int32_t len) { - uint32_t i; - for (i = 0; i < len; i++) - a[i] = b[i]+33; -} - -static int sam_format1_append(const bam_hdr_t *h, const bam1_t *b, kstring_t *str) -{ - int i, r = 0; - uint8_t *s, *end; - const bam1_core_t *c = &b->core; - - if (c->l_qname == 0) - return -1; - r |= kputsn_(bam_get_qname(b), c->l_qname-1-c->l_extranul, str); - r |= kputc_('\t', str); // query name - r |= kputw(c->flag, str); r |= kputc_('\t', str); // flag - if (c->tid >= 0) { // chr - r |= kputs(h->target_name[c->tid] , str); - r |= kputc_('\t', str); - } else r |= kputsn_("*\t", 2, str); - r |= kputll(c->pos + 1, str); r |= kputc_('\t', str); // pos - r |= kputw(c->qual, str); r |= kputc_('\t', str); // qual - if (c->n_cigar) { // cigar - uint32_t *cigar = bam_get_cigar(b); - for (i = 0; i < c->n_cigar; ++i) { - r |= kputw(bam_cigar_oplen(cigar[i]), str); - r |= kputc_(bam_cigar_opchr(cigar[i]), str); - } - } else r |= kputc_('*', str); - r |= kputc_('\t', str); - if (c->mtid < 0) r |= kputsn_("*\t", 2, str); // mate chr - else if (c->mtid == c->tid) r |= kputsn_("=\t", 2, str); - else { - r |= kputs(h->target_name[c->mtid], str); - r |= kputc_('\t', str); - } - r |= kputll(c->mpos + 1, str); r |= kputc_('\t', str); // mate pos - r |= kputll(c->isize, str); r |= kputc_('\t', str); // template len - if (c->l_qseq) { // seq and qual - uint8_t *s = bam_get_seq(b); - if (ks_resize(str, str->l+2+2*c->l_qseq) < 0) goto mem_err; - char *cp = str->s + str->l; - - // Sequence, 2 bases at a time - nibble2base(s, cp, c->l_qseq); - cp[c->l_qseq] = '\t'; - cp += c->l_qseq+1; - - // Quality - s = bam_get_qual(b); - i = 0; - if (s[0] == 0xff) { - cp[i++] = '*'; - } else { - add33((uint8_t *)cp, s, c->l_qseq); // cp[i] = s[i]+33; - i = c->l_qseq; - } - cp[i] = 0; - cp += i; - str->l = cp - str->s; - } else r |= kputsn_("*\t*", 3, str); - - s = bam_get_aux(b); // aux - end = b->data + b->l_data; - - while (end - s >= 4) { - r |= kputc_('\t', str); - if ((s = (uint8_t *)sam_format_aux1(s, s[2], s+3, end, str)) == NULL) - goto bad_aux; - } - r |= kputsn("", 0, str); // nul terminate - if (r < 0) goto mem_err; - - return str->l; - - bad_aux: - hts_log_error("Corrupted aux data for read %.*s", - b->core.l_qname, bam_get_qname(b)); - errno = EINVAL; - return -1; - - mem_err: - hts_log_error("Out of memory"); - errno = ENOMEM; - return -1; -} - -int sam_format1(const bam_hdr_t *h, const bam1_t *b, kstring_t *str) -{ - str->l = 0; - return sam_format1_append(h, b, str); -} - -static inline uint8_t *skip_aux(uint8_t *s, uint8_t *end); -int fastq_format1(fastq_state *x, const bam1_t *b, kstring_t *str) -{ - unsigned flag = b->core.flag; - int i, e = 0, len = b->core.l_qseq; - uint8_t *seq, *qual; - - str->l = 0; - - // Name - if (kputc(x->nprefix, str) == EOF || kputs(bam_get_qname(b), str) == EOF) - return -1; - - // /1 or /2 suffix - if (x && x->rnum && (flag & BAM_FPAIRED)) { - int r12 = flag & (BAM_FREAD1 | BAM_FREAD2); - if (r12 == BAM_FREAD1) { - if (kputs("/1", str) == EOF) - return -1; - } else if (r12 == BAM_FREAD2) { - if (kputs("/2", str) == EOF) - return -1; - } - } - - // Illumina CASAVA tag. - // This is ::: - if (x && x->casava) { - int rnum = (flag & BAM_FREAD1)? 1 : (flag & BAM_FREAD2)? 2 : 0; - char filtered = (flag & BAM_FQCFAIL)? 'Y' : 'N'; - uint8_t *bc = bam_aux_get(b, x->BC); - if (ksprintf(str, " %d:%c:0:%s", rnum, filtered, - bc ? (char *)bc+1 : "0") < 0) - return -1; - - if (bc && (*bc != 'Z' || (!isupper_c(bc[1]) && !islower_c(bc[1])))) { - hts_log_warning("BC tag starts with non-sequence base; using '0'"); - str->l -= strlen((char *)bc)-2; // limit to 1 char - str->s[str->l-1] = '0'; - str->s[str->l] = 0; - bc = NULL; - } - - // Replace any non-alpha with '+'. Ie seq-seq to seq+seq - if (bc) { - int l = strlen((char *)bc+1); - char *c = (char *)str->s + str->l - l; - for (i = 0; i < l; i++) { - if (!isalpha_c(c[i])) - c[i] = '+'; - else if (islower_c(c[i])) - c[i] = toupper_c(c[i]); - } - } - } - - // Aux tags - if (x && x->aux) { - uint8_t *s = bam_get_aux(b), *end = b->data + b->l_data; - while (s && end - s >= 4) { - int tt = s[0]*256 + s[1]; - if (x->tags == NULL || - kh_get(tag, x->tags, tt) != kh_end(x->tags)) { - e |= kputc_('\t', str) < 0; - if (!(s = (uint8_t *)sam_format_aux1(s, s[2], s+3, end, str))) - return -1; - } else { - s = skip_aux(s+2, end); - } - } - e |= kputsn("", 0, str) < 0; // nul terminate - } - - if (ks_resize(str, str->l + 1 + len+1 + 2 + len+1 + 1) < 0) return -1; - e |= kputc_('\n', str) < 0; - - // Seq line - seq = bam_get_seq(b); - if (flag & BAM_FREVERSE) - for (i = len-1; i >= 0; i--) - e |= kputc_("!TGKCYSBAWRDMHVN"[bam_seqi(seq, i)], str) < 0; - else - for (i = 0; i < len; i++) - e |= kputc_(seq_nt16_str[bam_seqi(seq, i)], str) < 0; - - - // Qual line - if (x->nprefix == '@') { - kputsn("\n+\n", 3, str); - qual = bam_get_qual(b); - if (qual[0] == 0xff) - for (i = 0; i < len; i++) - e |= kputc_('B', str) < 0; - else if (flag & BAM_FREVERSE) - for (i = len-1; i >= 0; i--) - e |= kputc_(33 + qual[i], str) < 0; - else - for (i = 0; i < len; i++) - e |= kputc_(33 + qual[i], str) < 0; - - } - e |= kputc('\n', str) < 0; - - return e ? -1 : str->l; -} - -// Sadly we need to be able to modify the bam_hdr here so we can -// reference count the structure. -int sam_write1(htsFile *fp, const sam_hdr_t *h, const bam1_t *b) -{ - switch (fp->format.format) { - case binary_format: - fp->format.category = sequence_data; - fp->format.format = bam; - /* fall-through */ - case bam: - return bam_write_idx1(fp, h, b); - - case cram: - return cram_put_bam_seq(fp->fp.cram, (bam1_t *)b); - - case text_format: - fp->format.category = sequence_data; - fp->format.format = sam; - /* fall-through */ - case sam: - if (fp->state) { - SAM_state *fd = (SAM_state *)fp->state; - - // Threaded output - if (!fd->h) { - // NB: discard const. We don't actually modify sam_hdr_t here, - // just data pointed to by it (which is a bit weasely still), - // but out cached pointer must be non-const as we want to - // destroy it later on and sam_hdr_destroy takes non-const. - // - // We do this because some tools do sam_hdr_destroy; sam_close - // while others do sam_close; sam_hdr_destroy. The former is - // an issue as we need the header still when flushing. - fd->h = (sam_hdr_t *)h; - fd->h->ref_count++; - - if (pthread_create(&fd->dispatcher, NULL, sam_dispatcher_write, - fp) != 0) - return -2; - fd->dispatcher_set = 1; - } - - if (fd->h != h) { - hts_log_error("SAM multi-threaded decoding does not support changing header"); - return -2; - } - - // Find a suitable BAM array to copy to - sp_bams *gb = fd->curr_bam; - if (!gb) { - pthread_mutex_lock(&fd->lines_m); - if (fd->bams) { - fd->curr_bam = gb = fd->bams; - fd->bams = gb->next; - gb->next = NULL; - gb->nbams = 0; - gb->bam_mem = 0; - pthread_mutex_unlock(&fd->lines_m); - } else { - pthread_mutex_unlock(&fd->lines_m); - if (!(gb = calloc(1, sizeof(*gb)))) return -1; - if (!(gb->bams = calloc(SAM_NBAM, sizeof(*gb->bams)))) { - free(gb); - return -1; - } - gb->nbams = 0; - gb->abams = SAM_NBAM; - gb->bam_mem = 0; - gb->fd = fd; - fd->curr_idx = 0; - fd->curr_bam = gb; - } - } - - if (!bam_copy1(&gb->bams[gb->nbams++], b)) - return -2; - gb->bam_mem += b->l_data + sizeof(*b); - - // Dispatch if full - if (gb->nbams == SAM_NBAM || gb->bam_mem > SAM_NBYTES*0.8) { - gb->serial = fd->serial++; - pthread_mutex_lock(&fd->command_m); - if (fd->errcode != 0) { - pthread_mutex_unlock(&fd->command_m); - return -fd->errcode; - } - if (hts_tpool_dispatch3(fd->p, fd->q, sam_format_worker, gb, - cleanup_sp_bams, - cleanup_sp_lines, 0) < 0) { - pthread_mutex_unlock(&fd->command_m); - return -1; - } - pthread_mutex_unlock(&fd->command_m); - fd->curr_bam = NULL; - } - - // Dummy value as we don't know how long it really is. - // We could track file sizes via a SAM_state field, but I don't think - // it is necessary. - return 1; - } else { - if (sam_format1(h, b, &fp->line) < 0) return -1; - kputc('\n', &fp->line); - if (fp->is_bgzf) { - if (bgzf_flush_try(fp->fp.bgzf, fp->line.l) < 0) - return -1; - if ( bgzf_write(fp->fp.bgzf, fp->line.s, fp->line.l) != fp->line.l ) return -1; - } else { - if ( hwrite(fp->fp.hfile, fp->line.s, fp->line.l) != fp->line.l ) return -1; - } - - if (fp->idx) { - if (fp->format.compression == bgzf) { - if (bgzf_idx_push(fp->fp.bgzf, fp->idx, b->core.tid, b->core.pos, bam_endpos(b), - bgzf_tell(fp->fp.bgzf), !(b->core.flag&BAM_FUNMAP)) < 0) { - hts_log_error("Read '%s' with ref_name='%s', ref_length=%"PRIhts_pos", flags=%d, pos=%"PRIhts_pos" cannot be indexed", - bam_get_qname(b), sam_hdr_tid2name(h, b->core.tid), sam_hdr_tid2len(h, b->core.tid), b->core.flag, b->core.pos+1); - return -1; - } - } else { - if (hts_idx_push(fp->idx, b->core.tid, b->core.pos, bam_endpos(b), - bgzf_tell(fp->fp.bgzf), !(b->core.flag&BAM_FUNMAP)) < 0) { - hts_log_error("Read '%s' with ref_name='%s', ref_length=%"PRIhts_pos", flags=%d, pos=%"PRIhts_pos" cannot be indexed", - bam_get_qname(b), sam_hdr_tid2name(h, b->core.tid), sam_hdr_tid2len(h, b->core.tid), b->core.flag, b->core.pos+1); - return -1; - } - } - } - - return fp->line.l; - } - - - case fasta_format: - case fastq_format: { - fastq_state *x = (fastq_state *)fp->state; - if (!x) { - if (!(fp->state = fastq_state_init(fp->format.format - == fastq_format ? '@' : '>'))) - return -2; - } - - if (fastq_format1(fp->state, b, &fp->line) < 0) - return -1; - if (fp->is_bgzf) { - if (bgzf_flush_try(fp->fp.bgzf, fp->line.l) < 0) - return -1; - if (bgzf_write(fp->fp.bgzf, fp->line.s, fp->line.l) != fp->line.l) - return -1; - } else { - if (hwrite(fp->fp.hfile, fp->line.s, fp->line.l) != fp->line.l) - return -1; - } - return fp->line.l; - } - - default: - errno = EBADF; - return -1; - } -} - -/************************ - *** Auxiliary fields *** - ************************/ -#ifndef HTS_LITTLE_ENDIAN -static int aux_to_le(char type, uint8_t *out, const uint8_t *in, size_t len) { - int tsz = aux_type2size(type); - - if (tsz >= 2 && tsz <= 8 && (len & (tsz - 1)) != 0) return -1; - - switch (tsz) { - case 'H': case 'Z': case 1: // Trivial - memcpy(out, in, len); - break; - -#define aux_val_to_le(type_t, store_le) do { \ - type_t v; \ - size_t i; \ - for (i = 0; i < len; i += sizeof(type_t), out += sizeof(type_t)) { \ - memcpy(&v, in + i, sizeof(type_t)); \ - store_le(v, out); \ - } \ - } while (0) - - case 2: aux_val_to_le(uint16_t, u16_to_le); break; - case 4: aux_val_to_le(uint32_t, u32_to_le); break; - case 8: aux_val_to_le(uint64_t, u64_to_le); break; - -#undef aux_val_to_le - - case 'B': { // Recurse! - uint32_t n; - if (len < 5) return -1; - memcpy(&n, in + 1, 4); - out[0] = in[0]; - u32_to_le(n, out + 1); - return aux_to_le(in[0], out + 5, in + 5, len - 5); - } - - default: // Unknown type code - return -1; - } - - - - return 0; -} -#endif - -int bam_aux_append(bam1_t *b, const char tag[2], char type, int len, const uint8_t *data) -{ - uint32_t new_len; - - assert(b->l_data >= 0); - new_len = b->l_data + 3 + len; - if (new_len > INT32_MAX || new_len < b->l_data) goto nomem; - - if (realloc_bam_data(b, new_len) < 0) return -1; - - b->data[b->l_data] = tag[0]; - b->data[b->l_data + 1] = tag[1]; - b->data[b->l_data + 2] = type; - -#ifdef HTS_LITTLE_ENDIAN - memcpy(b->data + b->l_data + 3, data, len); -#else - if (aux_to_le(type, b->data + b->l_data + 3, data, len) != 0) { - errno = EINVAL; - return -1; - } -#endif - - b->l_data = new_len; - - return 0; - - nomem: - errno = ENOMEM; - return -1; -} - -static inline uint8_t *skip_aux(uint8_t *s, uint8_t *end) -{ - int size; - uint32_t n; - if (s >= end) return end; - size = aux_type2size(*s); ++s; // skip type - switch (size) { - case 'Z': - case 'H': - while (s < end && *s) ++s; - return s < end ? s + 1 : end; - case 'B': - if (end - s < 5) return NULL; - size = aux_type2size(*s); ++s; - n = le_to_u32(s); - s += 4; - if (size == 0 || end - s < size * n) return NULL; - return s + size * n; - case 0: - return NULL; - default: - if (end - s < size) return NULL; - return s + size; - } -} - -uint8_t *bam_aux_first(const bam1_t *b) -{ - uint8_t *s = bam_get_aux(b); - uint8_t *end = b->data + b->l_data; - if (end - s <= 2) { errno = ENOENT; return NULL; } - return s+2; -} - -uint8_t *bam_aux_next(const bam1_t *b, const uint8_t *s) -{ - uint8_t *end = b->data + b->l_data; - uint8_t *next = s? skip_aux((uint8_t *) s, end) : end; - if (next == NULL) goto bad_aux; - if (end - next <= 2) { errno = ENOENT; return NULL; } - return next+2; - - bad_aux: - hts_log_error("Corrupted aux data for read %s", bam_get_qname(b)); - errno = EINVAL; - return NULL; -} - -uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]) -{ - uint8_t *s; - for (s = bam_aux_first(b); s; s = bam_aux_next(b, s)) - if (s[-2] == tag[0] && s[-1] == tag[1]) { - // Check the tag value is valid and complete - uint8_t *e = skip_aux(s, b->data + b->l_data); - if (e == NULL) goto bad_aux; - if ((*s == 'Z' || *s == 'H') && *(e - 1) != '\0') goto bad_aux; - - return s; - } - - // errno now as set by bam_aux_first()/bam_aux_next() - return NULL; - - bad_aux: - hts_log_error("Corrupted aux data for read %s", bam_get_qname(b)); - errno = EINVAL; - return NULL; -} - -int bam_aux_del(bam1_t *b, uint8_t *s) -{ - s = bam_aux_remove(b, s); - return (s || errno == ENOENT)? 0 : -1; -} - -uint8_t *bam_aux_remove(bam1_t *b, uint8_t *s) -{ - uint8_t *end = b->data + b->l_data; - uint8_t *next = skip_aux(s, end); - if (next == NULL) goto bad_aux; - - b->l_data -= next - (s-2); - if (next >= end) { errno = ENOENT; return NULL; } - - memmove(s-2, next, end - next); - return s; - - bad_aux: - hts_log_error("Corrupted aux data for read %s", bam_get_qname(b)); - errno = EINVAL; - return NULL; -} - -int bam_aux_update_str(bam1_t *b, const char tag[2], int len, const char *data) -{ - // FIXME: This is not at all efficient! - size_t ln = len >= 0 ? len : strlen(data) + 1; - size_t old_ln = 0; - int need_nul = ln == 0 || data[ln - 1] != '\0'; - int save_errno = errno; - int new_tag = 0; - uint8_t *s = bam_aux_get(b,tag), *e; - - if (s) { // Replacing existing tag - char type = *s; - if (type != 'Z') { - hts_log_error("Called bam_aux_update_str for type '%c' instead of 'Z'", type); - errno = EINVAL; - return -1; - } - s++; - e = memchr(s, '\0', b->data + b->l_data - s); - old_ln = (e ? e - s : b->data + b->l_data - s) + 1; - s -= 3; - } else { - if (errno != ENOENT) { // Invalid aux data, give up - return -1; - } else { // Tag doesn't exist - put it on the end - errno = save_errno; - s = b->data + b->l_data; - new_tag = 3; - } - } - - if (old_ln < ln + need_nul + new_tag) { - ptrdiff_t s_offset = s - b->data; - if (possibly_expand_bam_data(b, ln + need_nul + new_tag - old_ln) < 0) - return -1; - s = b->data + s_offset; - } - if (!new_tag) { - memmove(s + 3 + ln + need_nul, - s + 3 + old_ln, - b->l_data - (s + 3 - b->data) - old_ln); - } - b->l_data += new_tag + ln + need_nul - old_ln; - - s[0] = tag[0]; - s[1] = tag[1]; - s[2] = 'Z'; - memmove(s+3,data,ln); - if (need_nul) s[3 + ln] = '\0'; - return 0; -} - -int bam_aux_update_int(bam1_t *b, const char tag[2], int64_t val) -{ - uint32_t sz, old_sz = 0, new = 0; - uint8_t *s, type; - - if (val < INT32_MIN || val > UINT32_MAX) { - errno = EOVERFLOW; - return -1; - } - if (val < INT16_MIN) { type = 'i'; sz = 4; } - else if (val < INT8_MIN) { type = 's'; sz = 2; } - else if (val < 0) { type = 'c'; sz = 1; } - else if (val < UINT8_MAX) { type = 'C'; sz = 1; } - else if (val < UINT16_MAX) { type = 'S'; sz = 2; } - else { type = 'I'; sz = 4; } - - s = bam_aux_get(b, tag); - if (s) { // Tag present - how big was the old one? - switch (*s) { - case 'c': case 'C': old_sz = 1; break; - case 's': case 'S': old_sz = 2; break; - case 'i': case 'I': old_sz = 4; break; - default: errno = EINVAL; return -1; // Not an integer - } - } else { - if (errno == ENOENT) { // Tag doesn't exist - add a new one - s = b->data + b->l_data; - new = 1; - } else { // Invalid aux data, give up. - return -1; - } - } - - if (new || old_sz < sz) { - // Make room for new tag - ptrdiff_t s_offset = s - b->data; - if (possibly_expand_bam_data(b, (new ? 3 : 0) + sz - old_sz) < 0) - return -1; - s = b->data + s_offset; - if (new) { // Add tag id - *s++ = tag[0]; - *s++ = tag[1]; - } else { // Shift following data so we have space - memmove(s + sz, s + old_sz, b->l_data - s_offset - old_sz); - } - } else { - // Reuse old space. Data value may be bigger than necessary but - // we avoid having to move everything else - sz = old_sz; - type = (val < 0 ? "\0cs\0i" : "\0CS\0I")[old_sz]; - assert(type > 0); - } - *s++ = type; -#ifdef HTS_LITTLE_ENDIAN - memcpy(s, &val, sz); -#else - switch (sz) { - case 4: u32_to_le(val, s); break; - case 2: u16_to_le(val, s); break; - default: *s = val; break; - } -#endif - b->l_data += (new ? 3 : 0) + sz - old_sz; - return 0; -} - -int bam_aux_update_float(bam1_t *b, const char tag[2], float val) -{ - uint8_t *s = bam_aux_get(b, tag); - int shrink = 0, new = 0; - - if (s) { // Tag present - what was it? - switch (*s) { - case 'f': break; - case 'd': shrink = 1; break; - default: errno = EINVAL; return -1; // Not a float - } - } else { - if (errno == ENOENT) { // Tag doesn't exist - add a new one - new = 1; - } else { // Invalid aux data, give up. - return -1; - } - } - - if (new) { // Ensure there's room - if (possibly_expand_bam_data(b, 3 + 4) < 0) - return -1; - s = b->data + b->l_data; - *s++ = tag[0]; - *s++ = tag[1]; - } else if (shrink) { // Convert non-standard double tag to float - memmove(s + 5, s + 9, b->l_data - ((s + 9) - b->data)); - b->l_data -= 4; - } - *s++ = 'f'; - float_to_le(val, s); - if (new) b->l_data += 7; - - return 0; -} - -int bam_aux_update_array(bam1_t *b, const char tag[2], - uint8_t type, uint32_t items, void *data) -{ - uint8_t *s = bam_aux_get(b, tag); - size_t old_sz = 0, new_sz; - int new = 0; - - if (s) { // Tag present - if (*s != 'B') { errno = EINVAL; return -1; } - old_sz = aux_type2size(s[1]); - if (old_sz < 1 || old_sz > 4) { errno = EINVAL; return -1; } - old_sz *= le_to_u32(s + 2); - } else { - if (errno == ENOENT) { // Tag doesn't exist - add a new one - s = b->data + b->l_data; - new = 1; - } else { // Invalid aux data, give up. - return -1; - } - } - - new_sz = aux_type2size(type); - if (new_sz < 1 || new_sz > 4) { errno = EINVAL; return -1; } - if (items > INT32_MAX / new_sz) { errno = ENOMEM; return -1; } - new_sz *= items; - - if (new || old_sz < new_sz) { - // Make room for new tag - ptrdiff_t s_offset = s - b->data; - if (possibly_expand_bam_data(b, (new ? 8 : 0) + new_sz - old_sz) < 0) - return -1; - s = b->data + s_offset; - } - if (new) { // Add tag id and type - *s++ = tag[0]; - *s++ = tag[1]; - *s = 'B'; - b->l_data += 8 + new_sz; - } else if (old_sz != new_sz) { // shift following data if necessary - memmove(s + 6 + new_sz, s + 6 + old_sz, - b->l_data - ((s + 6 + old_sz) - b->data)); - b->l_data -= old_sz; - b->l_data += new_sz; - } - - s[1] = type; - u32_to_le(items, s + 2); -#ifdef HTS_LITTLE_ENDIAN - memcpy(s + 6, data, new_sz); - return 0; -#else - return aux_to_le(type, s + 6, data, new_sz); -#endif -} - -static inline int64_t get_int_aux_val(uint8_t type, const uint8_t *s, - uint32_t idx) -{ - switch (type) { - case 'c': return le_to_i8(s + idx); - case 'C': return s[idx]; - case 's': return le_to_i16(s + 2 * idx); - case 'S': return le_to_u16(s + 2 * idx); - case 'i': return le_to_i32(s + 4 * idx); - case 'I': return le_to_u32(s + 4 * idx); - default: - errno = EINVAL; - return 0; - } -} - -int64_t bam_aux2i(const uint8_t *s) -{ - int type; - type = *s++; - return get_int_aux_val(type, s, 0); -} - -double bam_aux2f(const uint8_t *s) -{ - int type; - type = *s++; - if (type == 'd') return le_to_double(s); - else if (type == 'f') return le_to_float(s); - else return get_int_aux_val(type, s, 0); -} - -char bam_aux2A(const uint8_t *s) -{ - int type; - type = *s++; - if (type == 'A') return *(char*)s; - errno = EINVAL; - return 0; -} - -char *bam_aux2Z(const uint8_t *s) -{ - int type; - type = *s++; - if (type == 'Z' || type == 'H') return (char*)s; - errno = EINVAL; - return 0; -} - -uint32_t bam_auxB_len(const uint8_t *s) -{ - if (s[0] != 'B') { - errno = EINVAL; - return 0; - } - return le_to_u32(s + 2); -} - -int64_t bam_auxB2i(const uint8_t *s, uint32_t idx) -{ - uint32_t len = bam_auxB_len(s); - if (idx >= len) { - errno = ERANGE; - return 0; - } - return get_int_aux_val(s[1], s + 6, idx); -} - -double bam_auxB2f(const uint8_t *s, uint32_t idx) -{ - uint32_t len = bam_auxB_len(s); - if (idx >= len) { - errno = ERANGE; - return 0.0; - } - if (s[1] == 'f') return le_to_float(s + 6 + 4 * idx); - else return get_int_aux_val(s[1], s + 6, idx); -} - -int sam_open_mode(char *mode, const char *fn, const char *format) -{ - // TODO Parse "bam5" etc for compression level - if (format == NULL) { - // Try to pick a format based on the filename extension - char extension[HTS_MAX_EXT_LEN]; - if (find_file_extension(fn, extension) < 0) return -1; - return sam_open_mode(mode, fn, extension); - } - else if (strcasecmp(format, "bam") == 0) strcpy(mode, "b"); - else if (strcasecmp(format, "cram") == 0) strcpy(mode, "c"); - else if (strcasecmp(format, "sam") == 0) strcpy(mode, ""); - else if (strcasecmp(format, "sam.gz") == 0) strcpy(mode, "z"); - else if (strcasecmp(format, "fastq") == 0 || - strcasecmp(format, "fq") == 0) strcpy(mode, "f"); - else if (strcasecmp(format, "fastq.gz") == 0 || - strcasecmp(format, "fq.gz") == 0) strcpy(mode, "fz"); - else if (strcasecmp(format, "fasta") == 0 || - strcasecmp(format, "fa") == 0) strcpy(mode, "F"); - else if (strcasecmp(format, "fasta.gz") == 0 || - strcasecmp(format, "fa.gz") == 0) strcpy(mode, "Fz"); - else return -1; - - return 0; -} - -// A version of sam_open_mode that can handle ,key=value options. -// The format string is allocated and returned, to be freed by the caller. -// Prefix should be "r" or "w", -char *sam_open_mode_opts(const char *fn, - const char *mode, - const char *format) -{ - char *mode_opts = malloc((format ? strlen(format) : 1) + - (mode ? strlen(mode) : 1) + 12); - char *opts, *cp; - int format_len; - - if (!mode_opts) - return NULL; - - strcpy(mode_opts, mode ? mode : "r"); - cp = mode_opts + strlen(mode_opts); - - if (format == NULL) { - // Try to pick a format based on the filename extension - char extension[HTS_MAX_EXT_LEN]; - if (find_file_extension(fn, extension) < 0) { - free(mode_opts); - return NULL; - } - if (sam_open_mode(cp, fn, extension) == 0) { - return mode_opts; - } else { - free(mode_opts); - return NULL; - } - } - - if ((opts = strchr(format, ','))) { - format_len = opts-format; - } else { - opts=""; - format_len = strlen(format); - } - - if (strncmp(format, "bam", format_len) == 0) { - *cp++ = 'b'; - } else if (strncmp(format, "cram", format_len) == 0) { - *cp++ = 'c'; - } else if (strncmp(format, "cram2", format_len) == 0) { - *cp++ = 'c'; - strcpy(cp, ",VERSION=2.1"); - cp += 12; - } else if (strncmp(format, "cram3", format_len) == 0) { - *cp++ = 'c'; - strcpy(cp, ",VERSION=3.0"); - cp += 12; - } else if (strncmp(format, "sam", format_len) == 0) { - ; // format mode="" - } else if (strncmp(format, "sam.gz", format_len) == 0) { - *cp++ = 'z'; - } else if (strncmp(format, "fastq", format_len) == 0 || - strncmp(format, "fq", format_len) == 0) { - *cp++ = 'f'; - } else if (strncmp(format, "fastq.gz", format_len) == 0 || - strncmp(format, "fq.gz", format_len) == 0) { - *cp++ = 'f'; - *cp++ = 'z'; - } else if (strncmp(format, "fasta", format_len) == 0 || - strncmp(format, "fa", format_len) == 0) { - *cp++ = 'F'; - } else if (strncmp(format, "fasta.gz", format_len) == 0 || - strncmp(format, "fa", format_len) == 0) { - *cp++ = 'F'; - *cp++ = 'z'; - } else { - free(mode_opts); - return NULL; - } - - strcpy(cp, opts); - - return mode_opts; -} - -#define STRNCMP(a,b,n) (strncasecmp((a),(b),(n)) || strlen(a)!=(n)) -int bam_str2flag(const char *str) -{ - char *end, *beg = (char*) str; - long int flag = strtol(str, &end, 0); - if ( end!=str ) return flag; // the conversion was successful - flag = 0; - while ( *str ) - { - end = beg; - while ( *end && *end!=',' ) end++; - if ( !STRNCMP("PAIRED",beg,end-beg) ) flag |= BAM_FPAIRED; - else if ( !STRNCMP("PROPER_PAIR",beg,end-beg) ) flag |= BAM_FPROPER_PAIR; - else if ( !STRNCMP("UNMAP",beg,end-beg) ) flag |= BAM_FUNMAP; - else if ( !STRNCMP("MUNMAP",beg,end-beg) ) flag |= BAM_FMUNMAP; - else if ( !STRNCMP("REVERSE",beg,end-beg) ) flag |= BAM_FREVERSE; - else if ( !STRNCMP("MREVERSE",beg,end-beg) ) flag |= BAM_FMREVERSE; - else if ( !STRNCMP("READ1",beg,end-beg) ) flag |= BAM_FREAD1; - else if ( !STRNCMP("READ2",beg,end-beg) ) flag |= BAM_FREAD2; - else if ( !STRNCMP("SECONDARY",beg,end-beg) ) flag |= BAM_FSECONDARY; - else if ( !STRNCMP("QCFAIL",beg,end-beg) ) flag |= BAM_FQCFAIL; - else if ( !STRNCMP("DUP",beg,end-beg) ) flag |= BAM_FDUP; - else if ( !STRNCMP("SUPPLEMENTARY",beg,end-beg) ) flag |= BAM_FSUPPLEMENTARY; - else return -1; - if ( !*end ) break; - beg = end + 1; - } - return flag; -} - -char *bam_flag2str(int flag) -{ - kstring_t str = {0,0,0}; - if ( flag&BAM_FPAIRED ) ksprintf(&str,"%s%s", str.l?",":"","PAIRED"); - if ( flag&BAM_FPROPER_PAIR ) ksprintf(&str,"%s%s", str.l?",":"","PROPER_PAIR"); - if ( flag&BAM_FUNMAP ) ksprintf(&str,"%s%s", str.l?",":"","UNMAP"); - if ( flag&BAM_FMUNMAP ) ksprintf(&str,"%s%s", str.l?",":"","MUNMAP"); - if ( flag&BAM_FREVERSE ) ksprintf(&str,"%s%s", str.l?",":"","REVERSE"); - if ( flag&BAM_FMREVERSE ) ksprintf(&str,"%s%s", str.l?",":"","MREVERSE"); - if ( flag&BAM_FREAD1 ) ksprintf(&str,"%s%s", str.l?",":"","READ1"); - if ( flag&BAM_FREAD2 ) ksprintf(&str,"%s%s", str.l?",":"","READ2"); - if ( flag&BAM_FSECONDARY ) ksprintf(&str,"%s%s", str.l?",":"","SECONDARY"); - if ( flag&BAM_FQCFAIL ) ksprintf(&str,"%s%s", str.l?",":"","QCFAIL"); - if ( flag&BAM_FDUP ) ksprintf(&str,"%s%s", str.l?",":"","DUP"); - if ( flag&BAM_FSUPPLEMENTARY ) ksprintf(&str,"%s%s", str.l?",":"","SUPPLEMENTARY"); - if ( str.l == 0 ) kputsn("", 0, &str); - return str.s; -} - - -/************************** - *** Pileup and Mpileup *** - **************************/ - -#if !defined(BAM_NO_PILEUP) - -#include - -/******************* - *** Memory pool *** - *******************/ - -typedef struct { - int k, y; - hts_pos_t x, end; -} cstate_t; - -static cstate_t g_cstate_null = { -1, 0, 0, 0 }; - -typedef struct __linkbuf_t { - bam1_t b; - hts_pos_t beg, end; - cstate_t s; - struct __linkbuf_t *next; - bam_pileup_cd cd; -} lbnode_t; - -typedef struct { - int cnt, n, max; - lbnode_t **buf; -} mempool_t; - -static mempool_t *mp_init(void) -{ - mempool_t *mp; - mp = (mempool_t*)calloc(1, sizeof(mempool_t)); - return mp; -} -static void mp_destroy(mempool_t *mp) -{ - int k; - for (k = 0; k < mp->n; ++k) { - free(mp->buf[k]->b.data); - free(mp->buf[k]); - } - free(mp->buf); - free(mp); -} -static inline lbnode_t *mp_alloc(mempool_t *mp) -{ - ++mp->cnt; - if (mp->n == 0) return (lbnode_t*)calloc(1, sizeof(lbnode_t)); - else return mp->buf[--mp->n]; -} -static inline void mp_free(mempool_t *mp, lbnode_t *p) -{ - --mp->cnt; p->next = 0; // clear lbnode_t::next here - if (mp->n == mp->max) { - mp->max = mp->max? mp->max<<1 : 256; - mp->buf = (lbnode_t**)realloc(mp->buf, sizeof(lbnode_t*) * mp->max); - } - mp->buf[mp->n++] = p; -} - -/********************** - *** CIGAR resolver *** - **********************/ - -/* s->k: the index of the CIGAR operator that has just been processed. - s->x: the reference coordinate of the start of s->k - s->y: the query coordinate of the start of s->k - */ -static inline int resolve_cigar2(bam_pileup1_t *p, hts_pos_t pos, cstate_t *s) -{ -#define _cop(c) ((c)&BAM_CIGAR_MASK) -#define _cln(c) ((c)>>BAM_CIGAR_SHIFT) - - bam1_t *b = p->b; - bam1_core_t *c = &b->core; - uint32_t *cigar = bam_get_cigar(b); - int k; - // determine the current CIGAR operation - //fprintf(stderr, "%s\tpos=%ld\tend=%ld\t(%d,%ld,%d)\n", bam_get_qname(b), pos, s->end, s->k, s->x, s->y); - if (s->k == -1) { // never processed - p->qpos = 0; - if (c->n_cigar == 1) { // just one operation, save a loop - if (_cop(cigar[0]) == BAM_CMATCH || _cop(cigar[0]) == BAM_CEQUAL || _cop(cigar[0]) == BAM_CDIFF) s->k = 0, s->x = c->pos, s->y = 0; - } else { // find the first match or deletion - for (k = 0, s->x = c->pos, s->y = 0; k < c->n_cigar; ++k) { - int op = _cop(cigar[k]); - int l = _cln(cigar[k]); - if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP || - op == BAM_CEQUAL || op == BAM_CDIFF) break; - else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) s->y += l; - } - assert(k < c->n_cigar); - s->k = k; - } - } else { // the read has been processed before - int op, l = _cln(cigar[s->k]); - if (pos - s->x >= l) { // jump to the next operation - assert(s->k < c->n_cigar); // otherwise a bug: this function should not be called in this case - op = _cop(cigar[s->k+1]); - if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP || op == BAM_CEQUAL || op == BAM_CDIFF) { // jump to the next without a loop - if (_cop(cigar[s->k]) == BAM_CMATCH|| _cop(cigar[s->k]) == BAM_CEQUAL || _cop(cigar[s->k]) == BAM_CDIFF) s->y += l; - s->x += l; - ++s->k; - } else { // find the next M/D/N/=/X - if (_cop(cigar[s->k]) == BAM_CMATCH|| _cop(cigar[s->k]) == BAM_CEQUAL || _cop(cigar[s->k]) == BAM_CDIFF) s->y += l; - s->x += l; - for (k = s->k + 1; k < c->n_cigar; ++k) { - op = _cop(cigar[k]), l = _cln(cigar[k]); - if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP || op == BAM_CEQUAL || op == BAM_CDIFF) break; - else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) s->y += l; - } - s->k = k; - } - assert(s->k < c->n_cigar); // otherwise a bug - } // else, do nothing - } - { // collect pileup information - int op, l; - op = _cop(cigar[s->k]); l = _cln(cigar[s->k]); - p->is_del = p->indel = p->is_refskip = 0; - if (s->x + l - 1 == pos && s->k + 1 < c->n_cigar) { // peek the next operation - int op2 = _cop(cigar[s->k+1]); - int l2 = _cln(cigar[s->k+1]); - if (op2 == BAM_CDEL && op != BAM_CDEL) { - // At start of a new deletion, merge e.g. 1D2D to 3D. - // Within a deletion (the 2D in 1D2D) we keep p->indel=0 - // and rely on is_del=1 as we would for 3D. - p->indel = -(int)l2; - for (k = s->k+2; k < c->n_cigar; ++k) { - op2 = _cop(cigar[k]); l2 = _cln(cigar[k]); - if (op2 == BAM_CDEL) p->indel -= l2; - else break; - } - } else if (op2 == BAM_CINS) { - p->indel = l2; - for (k = s->k+2; k < c->n_cigar; ++k) { - op2 = _cop(cigar[k]); l2 = _cln(cigar[k]); - if (op2 == BAM_CINS) p->indel += l2; - else if (op2 != BAM_CPAD) break; - } - } else if (op2 == BAM_CPAD && s->k + 2 < c->n_cigar) { - int l3 = 0; - for (k = s->k + 2; k < c->n_cigar; ++k) { - op2 = _cop(cigar[k]); l2 = _cln(cigar[k]); - if (op2 == BAM_CINS) l3 += l2; - else if (op2 == BAM_CDEL || op2 == BAM_CMATCH || op2 == BAM_CREF_SKIP || op2 == BAM_CEQUAL || op2 == BAM_CDIFF) break; - } - if (l3 > 0) p->indel = l3; - } - } - if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) { - p->qpos = s->y + (pos - s->x); - } else if (op == BAM_CDEL || op == BAM_CREF_SKIP) { - p->is_del = 1; p->qpos = s->y; // FIXME: distinguish D and N!!!!! - p->is_refskip = (op == BAM_CREF_SKIP); - } // cannot be other operations; otherwise a bug - p->is_head = (pos == c->pos); p->is_tail = (pos == s->end); - } - p->cigar_ind = s->k; - return 1; -} - -/******************************* - *** Expansion of insertions *** - *******************************/ - -/* - * Fills out the kstring with the padded insertion sequence for the current - * location in 'p'. If this is not an insertion site, the string is blank. - * - * This variant handles base modifications, but only when "m" is non-NULL. - * - * Returns the number of inserted base on success, with string length being - * accessable via ins->l; - * -1 on failure. - */ -int bam_plp_insertion_mod(const bam_pileup1_t *p, - hts_base_mod_state *m, - kstring_t *ins, int *del_len) { - int j, k, indel, nb = 0; - uint32_t *cigar; - - if (p->indel <= 0) { - if (ks_resize(ins, 1) < 0) - return -1; - ins->l = 0; - ins->s[0] = '\0'; - return 0; - } - - if (del_len) - *del_len = 0; - - // Measure indel length including pads - indel = 0; - k = p->cigar_ind+1; - cigar = bam_get_cigar(p->b); - while (k < p->b->core.n_cigar) { - switch (cigar[k] & BAM_CIGAR_MASK) { - case BAM_CPAD: - case BAM_CINS: - indel += (cigar[k] >> BAM_CIGAR_SHIFT); - break; - default: - k = p->b->core.n_cigar; - break; - } - k++; - } - nb = ins->l = indel; - - // Produce sequence - if (ks_resize(ins, indel+1) < 0) - return -1; - indel = 0; - k = p->cigar_ind+1; - j = 1; - while (k < p->b->core.n_cigar) { - int l, c; - switch (cigar[k] & BAM_CIGAR_MASK) { - case BAM_CPAD: - for (l = 0; l < (cigar[k]>>BAM_CIGAR_SHIFT); l++) - ins->s[indel++] = '*'; - break; - case BAM_CINS: - for (l = 0; l < (cigar[k]>>BAM_CIGAR_SHIFT); l++, j++) { - c = p->qpos + j - p->is_del < p->b->core.l_qseq - ? seq_nt16_str[bam_seqi(bam_get_seq(p->b), - p->qpos + j - p->is_del)] - : 'N'; - ins->s[indel++] = c; - int nm; - hts_base_mod mod[256]; - if (m && (nm = bam_mods_at_qpos(p->b, p->qpos + j - p->is_del, - m, mod, 256)) > 0) { - int o_indel = indel; - if (ks_resize(ins, ins->l + nm*16+3) < 0) - return -1; - ins->s[indel++] = '['; - int j; - for (j = 0; j < nm; j++) { - char qual[20]; - if (mod[j].qual >= 0) - snprintf(qual, sizeof(qual), "%d", mod[j].qual); - else - *qual=0; - if (mod[j].modified_base < 0) - // ChEBI - indel += snprintf(&ins->s[indel], ins->m - indel, - "%c(%d)%s", - "+-"[mod[j].strand], - -mod[j].modified_base, - qual); - else - indel += snprintf(&ins->s[indel], ins->m - indel, - "%c%c%s", - "+-"[mod[j].strand], - mod[j].modified_base, - qual); - } - ins->s[indel++] = ']'; - ins->l += indel - o_indel; // grow by amount we used - } - } - break; - case BAM_CDEL: - // eg cigar 1M2I1D gives mpileup output in T+2AA-1C style - if (del_len) - *del_len = cigar[k]>>BAM_CIGAR_SHIFT; - // fall through - default: - k = p->b->core.n_cigar; - break; - } - k++; - } - ins->s[indel] = '\0'; - ins->l = indel; // string length - - return nb; // base length -} - -/* - * Fills out the kstring with the padded insertion sequence for the current - * location in 'p'. If this is not an insertion site, the string is blank. - * - * This is the original interface with no capability for reporting base - * modifications. - * - * Returns the length of insertion string on success; - * -1 on failure. - */ -int bam_plp_insertion(const bam_pileup1_t *p, kstring_t *ins, int *del_len) { - return bam_plp_insertion_mod(p, NULL, ins, del_len); -} - -/*********************** - *** Pileup iterator *** - ***********************/ - -// Dictionary of overlapping reads -KHASH_MAP_INIT_STR(olap_hash, lbnode_t *) -typedef khash_t(olap_hash) olap_hash_t; - -struct bam_plp_s { - mempool_t *mp; - lbnode_t *head, *tail; - int32_t tid, max_tid; - hts_pos_t pos, max_pos; - int is_eof, max_plp, error, maxcnt; - uint64_t id; - bam_pileup1_t *plp; - // for the "auto" interface only - bam1_t *b; - bam_plp_auto_f func; - void *data; - olap_hash_t *overlaps; - - // For notification of creation and destruction events - // and associated client-owned pointer. - int (*plp_construct)(void *data, const bam1_t *b, bam_pileup_cd *cd); - int (*plp_destruct )(void *data, const bam1_t *b, bam_pileup_cd *cd); -}; - -bam_plp_t bam_plp_init(bam_plp_auto_f func, void *data) -{ - bam_plp_t iter; - iter = (bam_plp_t)calloc(1, sizeof(struct bam_plp_s)); - iter->mp = mp_init(); - iter->head = iter->tail = mp_alloc(iter->mp); - iter->max_tid = iter->max_pos = -1; - iter->maxcnt = 8000; - if (func) { - iter->func = func; - iter->data = data; - iter->b = bam_init1(); - } - return iter; -} - -int bam_plp_init_overlaps(bam_plp_t iter) -{ - iter->overlaps = kh_init(olap_hash); // hash for tweaking quality of bases in overlapping reads - return iter->overlaps ? 0 : -1; -} - -void bam_plp_destroy(bam_plp_t iter) -{ - lbnode_t *p, *pnext; - if ( iter->overlaps ) kh_destroy(olap_hash, iter->overlaps); - for (p = iter->head; p != NULL; p = pnext) { - pnext = p->next; - mp_free(iter->mp, p); - } - mp_destroy(iter->mp); - if (iter->b) bam_destroy1(iter->b); - free(iter->plp); - free(iter); -} - -void bam_plp_constructor(bam_plp_t plp, - int (*func)(void *data, const bam1_t *b, bam_pileup_cd *cd)) { - plp->plp_construct = func; -} - -void bam_plp_destructor(bam_plp_t plp, - int (*func)(void *data, const bam1_t *b, bam_pileup_cd *cd)) { - plp->plp_destruct = func; -} - -//--------------------------------- -//--- Tweak overlapping reads -//--------------------------------- - -/** - * cigar_iref2iseq_set() - find the first CMATCH setting the ref and the read index - * cigar_iref2iseq_next() - get the next CMATCH base - * @cigar: pointer to current cigar block (rw) - * @cigar_max: pointer just beyond the last cigar block - * @icig: position within the current cigar block (rw) - * @iseq: position in the sequence (rw) - * @iref: position with respect to the beginning of the read (iref_pos - b->core.pos) (rw) - * - * Returns BAM_CMATCH, -1 when there is no more cigar to process or the requested position is not covered, - * or -2 on error. - */ -static inline int cigar_iref2iseq_set(const uint32_t **cigar, - const uint32_t *cigar_max, - hts_pos_t *icig, - hts_pos_t *iseq, - hts_pos_t *iref) -{ - hts_pos_t pos = *iref; - if ( pos < 0 ) return -1; - *icig = 0; - *iseq = 0; - *iref = 0; - while ( *cigar> BAM_CIGAR_SHIFT; - - if ( cig==BAM_CSOFT_CLIP ) { (*cigar)++; *iseq += ncig; *icig = 0; continue; } - if ( cig==BAM_CHARD_CLIP || cig==BAM_CPAD ) { (*cigar)++; *icig = 0; continue; } - if ( cig==BAM_CMATCH || cig==BAM_CEQUAL || cig==BAM_CDIFF ) - { - pos -= ncig; - if ( pos < 0 ) { *icig = ncig + pos; *iseq += *icig; *iref += *icig; return BAM_CMATCH; } - (*cigar)++; *iseq += ncig; *icig = 0; *iref += ncig; - continue; - } - if ( cig==BAM_CINS ) { (*cigar)++; *iseq += ncig; *icig = 0; continue; } - if ( cig==BAM_CDEL || cig==BAM_CREF_SKIP ) - { - pos -= ncig; - if ( pos<0 ) pos = 0; - (*cigar)++; *icig = 0; *iref += ncig; - continue; - } - hts_log_error("Unexpected cigar %d", cig); - return -2; - } - *iseq = -1; - return -1; -} -static inline int cigar_iref2iseq_next(const uint32_t **cigar, - const uint32_t *cigar_max, - hts_pos_t *icig, - hts_pos_t *iseq, - hts_pos_t *iref) -{ - while ( *cigar < cigar_max ) - { - int cig = (**cigar) & BAM_CIGAR_MASK; - int ncig = (**cigar) >> BAM_CIGAR_SHIFT; - - if ( cig==BAM_CMATCH || cig==BAM_CEQUAL || cig==BAM_CDIFF ) - { - if ( *icig >= ncig - 1 ) { *icig = -1; (*cigar)++; continue; } - (*iseq)++; (*icig)++; (*iref)++; - return BAM_CMATCH; - } - if ( cig==BAM_CDEL || cig==BAM_CREF_SKIP ) { (*cigar)++; (*iref) += ncig; *icig = -1; continue; } - if ( cig==BAM_CINS ) { (*cigar)++; *iseq += ncig; *icig = -1; continue; } - if ( cig==BAM_CSOFT_CLIP ) { (*cigar)++; *iseq += ncig; *icig = -1; continue; } - if ( cig==BAM_CHARD_CLIP || cig==BAM_CPAD ) { (*cigar)++; *icig = -1; continue; } - hts_log_error("Unexpected cigar %d", cig); - return -2; - } - *iseq = -1; - *iref = -1; - return -1; -} - -// Given overlapping read 'a' (left) and 'b' (right) on the same -// template, adjust quality values to zero for either a or b. -// Note versions 1.12 and earlier always removed quality from 'b' for -// matching bases. Now we select a or b semi-randomly based on name hash. -// Returns 0 on success, -// -1 on failure -static int tweak_overlap_quality(bam1_t *a, bam1_t *b) -{ - const uint32_t *a_cigar = bam_get_cigar(a), - *a_cigar_max = a_cigar + a->core.n_cigar; - const uint32_t *b_cigar = bam_get_cigar(b), - *b_cigar_max = b_cigar + b->core.n_cigar; - hts_pos_t a_icig = 0, a_iseq = 0; - hts_pos_t b_icig = 0, b_iseq = 0; - uint8_t *a_qual = bam_get_qual(a), *b_qual = bam_get_qual(b); - uint8_t *a_seq = bam_get_seq(a), *b_seq = bam_get_seq(b); - - hts_pos_t iref = b->core.pos; - hts_pos_t a_iref = iref - a->core.pos; - hts_pos_t b_iref = iref - b->core.pos; - - int a_ret = cigar_iref2iseq_set(&a_cigar, a_cigar_max, - &a_icig, &a_iseq, &a_iref); - if ( a_ret<0 ) - // no overlap or error - return a_ret<-1 ? -1:0; - - int b_ret = cigar_iref2iseq_set(&b_cigar, b_cigar_max, - &b_icig, &b_iseq, &b_iref); - if ( b_ret<0 ) - // no overlap or error - return b_ret<-1 ? -1:0; - - // Determine which seq is the one getting modified qualities. - uint8_t amul, bmul; - if (__ac_Wang_hash(__ac_X31_hash_string(bam_get_qname(a))) & 1) { - amul = 1; - bmul = 0; - } else { - amul = 0; - bmul = 1; - } - - // Loop over the overlapping region nulling qualities in either - // seq a or b. - int err = 0; - while ( 1 ) - { - // Step to next matching reference position in a and b - while ( a_ret >= 0 && a_iref>=0 && a_iref < iref - a->core.pos ) - a_ret = cigar_iref2iseq_next(&a_cigar, a_cigar_max, - &a_icig, &a_iseq, &a_iref); - if ( a_ret<0 ) { // done - err = a_ret<-1?-1:0; - break; - } - if ( iref < a_iref + a->core.pos ) - iref = a_iref + a->core.pos; - - while ( b_ret >= 0 && b_iref>=0 && b_iref < iref - b->core.pos ) - b_ret = cigar_iref2iseq_next(&b_cigar, b_cigar_max, &b_icig, - &b_iseq, &b_iref); - if ( b_ret<0 ) { // done - err = b_ret<-1?-1:0; - break; - } - if ( iref < b_iref + b->core.pos ) - iref = b_iref + b->core.pos; - - iref++; - - if ( a_iref+a->core.pos != b_iref+b->core.pos ) - // only CMATCH positions, don't know what to do with indels - continue; - - if (a_iseq > a->core.l_qseq || b_iseq > b->core.l_qseq) - // Fell off end of sequence, bad CIGAR? - return -1; - - // We're finally at the same ref base in both a and b. - // Check if the bases match (confident) or mismatch - // (not so confident). - if ( bam_seqi(a_seq,a_iseq) == bam_seqi(b_seq,b_iseq) ) { - // We are very confident about this base. Use sum of quals - int qual = a_qual[a_iseq] + b_qual[b_iseq]; - a_qual[a_iseq] = amul * (qual>200 ? 200 : qual); - b_qual[b_iseq] = bmul * (qual>200 ? 200 : qual);; - } else { - // Not so confident about anymore given the mismatch. - // Reduce qual for lowest quality base. - if ( a_qual[a_iseq] > b_qual[b_iseq] ) { - // A highest qual base; keep - a_qual[a_iseq] = 0.8 * a_qual[a_iseq]; - b_qual[b_iseq] = 0; - } else if (a_qual[a_iseq] < b_qual[b_iseq] ) { - // B highest qual base; keep - b_qual[b_iseq] = 0.8 * b_qual[b_iseq]; - a_qual[a_iseq] = 0; - } else { - // Both equal, so pick randomly - a_qual[a_iseq] = amul * 0.8 * a_qual[a_iseq]; - b_qual[b_iseq] = bmul * 0.8 * b_qual[b_iseq]; - } - } - } - - return err; -} - -// Fix overlapping reads. Simple soft-clipping did not give good results. -// Lowering qualities of unwanted bases is more selective and works better. -// -// Returns 0 on success, -1 on failure -static int overlap_push(bam_plp_t iter, lbnode_t *node) -{ - if ( !iter->overlaps ) return 0; - - // mapped mates and paired reads only - if ( node->b.core.flag&BAM_FMUNMAP || !(node->b.core.flag&BAM_FPROPER_PAIR) ) return 0; - - // no overlap possible, unless some wild cigar - if ( (node->b.core.mtid >= 0 && node->b.core.tid != node->b.core.mtid) - || (llabs(node->b.core.isize) >= 2*node->b.core.l_qseq - && node->b.core.mpos >= node->end) // for those wild cigars - ) return 0; - - khiter_t kitr = kh_get(olap_hash, iter->overlaps, bam_get_qname(&node->b)); - if ( kitr==kh_end(iter->overlaps) ) - { - // Only add reads where the mate is still to arrive - if (node->b.core.mpos >= node->b.core.pos || - ((node->b.core.flag & BAM_FPAIRED) && node->b.core.mpos == -1)) { - int ret; - kitr = kh_put(olap_hash, iter->overlaps, bam_get_qname(&node->b), &ret); - if (ret < 0) return -1; - kh_value(iter->overlaps, kitr) = node; - } - } - else - { - lbnode_t *a = kh_value(iter->overlaps, kitr); - int err = tweak_overlap_quality(&a->b, &node->b); - kh_del(olap_hash, iter->overlaps, kitr); - assert(a->end-1 == a->s.end); - return err; - } - return 0; -} - -static void overlap_remove(bam_plp_t iter, const bam1_t *b) -{ - if ( !iter->overlaps ) return; - - khiter_t kitr; - if ( b ) - { - kitr = kh_get(olap_hash, iter->overlaps, bam_get_qname(b)); - if ( kitr!=kh_end(iter->overlaps) ) - kh_del(olap_hash, iter->overlaps, kitr); - } - else - { - // remove all - for (kitr = kh_begin(iter->overlaps); kitroverlaps); kitr++) - if ( kh_exist(iter->overlaps, kitr) ) kh_del(olap_hash, iter->overlaps, kitr); - } -} - - - -// Prepares next pileup position in bam records collected by bam_plp_auto -> user func -> bam_plp_push. Returns -// pointer to the piled records if next position is ready or NULL if there is not enough records in the -// buffer yet (the current position is still the maximum position across all buffered reads). -const bam_pileup1_t *bam_plp64_next(bam_plp_t iter, int *_tid, hts_pos_t *_pos, int *_n_plp) -{ - if (iter->error) { *_n_plp = -1; return NULL; } - *_n_plp = 0; - if (iter->is_eof && iter->head == iter->tail) return NULL; - while (iter->is_eof || iter->max_tid > iter->tid || (iter->max_tid == iter->tid && iter->max_pos > iter->pos)) { - int n_plp = 0; - // write iter->plp at iter->pos - lbnode_t **pptr = &iter->head; - while (*pptr != iter->tail) { - lbnode_t *p = *pptr; - if (p->b.core.tid < iter->tid || (p->b.core.tid == iter->tid && p->end <= iter->pos)) { // then remove - overlap_remove(iter, &p->b); - if (iter->plp_destruct) - iter->plp_destruct(iter->data, &p->b, &p->cd); - *pptr = p->next; mp_free(iter->mp, p); - } - else { - if (p->b.core.tid == iter->tid && p->beg <= iter->pos) { // here: p->end > pos; then add to pileup - if (n_plp == iter->max_plp) { // then double the capacity - iter->max_plp = iter->max_plp? iter->max_plp<<1 : 256; - iter->plp = (bam_pileup1_t*)realloc(iter->plp, sizeof(bam_pileup1_t) * iter->max_plp); - } - iter->plp[n_plp].b = &p->b; - iter->plp[n_plp].cd = p->cd; - if (resolve_cigar2(iter->plp + n_plp, iter->pos, &p->s)) ++n_plp; // actually always true... - } - pptr = &(*pptr)->next; - } - } - *_n_plp = n_plp; *_tid = iter->tid; *_pos = iter->pos; - // update iter->tid and iter->pos - if (iter->head != iter->tail) { - if (iter->tid > iter->head->b.core.tid) { - hts_log_error("Unsorted input. Pileup aborts"); - iter->error = 1; - *_n_plp = -1; - return NULL; - } - } - if (iter->tid < iter->head->b.core.tid) { // come to a new reference sequence - iter->tid = iter->head->b.core.tid; iter->pos = iter->head->beg; // jump to the next reference - } else if (iter->pos < iter->head->beg) { // here: tid == head->b.core.tid - iter->pos = iter->head->beg; // jump to the next position - } else ++iter->pos; // scan contiguously - // return - if (n_plp) return iter->plp; - if (iter->is_eof && iter->head == iter->tail) break; - } - return NULL; -} - -const bam_pileup1_t *bam_plp_next(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp) -{ - hts_pos_t pos64 = 0; - const bam_pileup1_t *p = bam_plp64_next(iter, _tid, &pos64, _n_plp); - if (pos64 < INT_MAX) { - *_pos = pos64; - } else { - hts_log_error("Position %"PRId64" too large", pos64); - *_pos = INT_MAX; - iter->error = 1; - *_n_plp = -1; - return NULL; - } - return p; -} - -int bam_plp_push(bam_plp_t iter, const bam1_t *b) -{ - if (iter->error) return -1; - if (b) { - if (b->core.tid < 0) { overlap_remove(iter, b); return 0; } - // Skip only unmapped reads here, any additional filtering must be done in iter->func - if (b->core.flag & BAM_FUNMAP) { overlap_remove(iter, b); return 0; } - if (iter->tid == b->core.tid && iter->pos == b->core.pos && iter->mp->cnt > iter->maxcnt) - { - overlap_remove(iter, b); - return 0; - } - if (bam_copy1(&iter->tail->b, b) == NULL) - return -1; - iter->tail->b.id = iter->id++; - iter->tail->beg = b->core.pos; - // Use raw rlen rather than bam_endpos() which adjusts rlen=0 to rlen=1 - iter->tail->end = b->core.pos + bam_cigar2rlen(b->core.n_cigar, bam_get_cigar(b)); - iter->tail->s = g_cstate_null; iter->tail->s.end = iter->tail->end - 1; // initialize cstate_t - if (b->core.tid < iter->max_tid) { - hts_log_error("The input is not sorted (chromosomes out of order)"); - iter->error = 1; - return -1; - } - if ((b->core.tid == iter->max_tid) && (iter->tail->beg < iter->max_pos)) { - hts_log_error("The input is not sorted (reads out of order)"); - iter->error = 1; - return -1; - } - iter->max_tid = b->core.tid; iter->max_pos = iter->tail->beg; - if (iter->tail->end > iter->pos || iter->tail->b.core.tid > iter->tid) { - lbnode_t *next = mp_alloc(iter->mp); - if (!next) { - iter->error = 1; - return -1; - } - if (iter->plp_construct) { - if (iter->plp_construct(iter->data, &iter->tail->b, - &iter->tail->cd) < 0) { - mp_free(iter->mp, next); - iter->error = 1; - return -1; - } - } - if (overlap_push(iter, iter->tail) < 0) { - mp_free(iter->mp, next); - iter->error = 1; - return -1; - } - iter->tail->next = next; - iter->tail = iter->tail->next; - } - } else iter->is_eof = 1; - return 0; -} - -const bam_pileup1_t *bam_plp64_auto(bam_plp_t iter, int *_tid, hts_pos_t *_pos, int *_n_plp) -{ - const bam_pileup1_t *plp; - if (iter->func == 0 || iter->error) { *_n_plp = -1; return 0; } - if ((plp = bam_plp64_next(iter, _tid, _pos, _n_plp)) != 0) return plp; - else { // no pileup line can be obtained; read alignments - *_n_plp = 0; - if (iter->is_eof) return 0; - int ret; - while ( (ret=iter->func(iter->data, iter->b)) >= 0) { - if (bam_plp_push(iter, iter->b) < 0) { - *_n_plp = -1; - return 0; - } - if ((plp = bam_plp64_next(iter, _tid, _pos, _n_plp)) != 0) return plp; - // otherwise no pileup line can be returned; read the next alignment. - } - if ( ret < -1 ) { iter->error = ret; *_n_plp = -1; return 0; } - if (bam_plp_push(iter, 0) < 0) { - *_n_plp = -1; - return 0; - } - if ((plp = bam_plp64_next(iter, _tid, _pos, _n_plp)) != 0) return plp; - return 0; - } -} - -const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp) -{ - hts_pos_t pos64 = 0; - const bam_pileup1_t *p = bam_plp64_auto(iter, _tid, &pos64, _n_plp); - if (pos64 < INT_MAX) { - *_pos = pos64; - } else { - hts_log_error("Position %"PRId64" too large", pos64); - *_pos = INT_MAX; - iter->error = 1; - *_n_plp = -1; - return NULL; - } - return p; -} - -void bam_plp_reset(bam_plp_t iter) -{ - overlap_remove(iter, NULL); - iter->max_tid = iter->max_pos = -1; - iter->tid = iter->pos = 0; - iter->is_eof = 0; - while (iter->head != iter->tail) { - lbnode_t *p = iter->head; - iter->head = p->next; - mp_free(iter->mp, p); - } -} - -void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt) -{ - iter->maxcnt = maxcnt; -} - -/************************ - *** Mpileup iterator *** - ************************/ - -struct bam_mplp_s { - int n; - int32_t min_tid, *tid; - hts_pos_t min_pos, *pos; - bam_plp_t *iter; - int *n_plp; - const bam_pileup1_t **plp; -}; - -bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data) -{ - int i; - bam_mplp_t iter; - iter = (bam_mplp_t)calloc(1, sizeof(struct bam_mplp_s)); - iter->pos = (hts_pos_t*)calloc(n, sizeof(hts_pos_t)); - iter->tid = (int32_t*)calloc(n, sizeof(int32_t)); - iter->n_plp = (int*)calloc(n, sizeof(int)); - iter->plp = (const bam_pileup1_t**)calloc(n, sizeof(bam_pileup1_t*)); - iter->iter = (bam_plp_t*)calloc(n, sizeof(bam_plp_t)); - iter->n = n; - iter->min_pos = HTS_POS_MAX; - iter->min_tid = (uint32_t)-1; - for (i = 0; i < n; ++i) { - iter->iter[i] = bam_plp_init(func, data[i]); - iter->pos[i] = iter->min_pos; - iter->tid[i] = iter->min_tid; - } - return iter; -} - -int bam_mplp_init_overlaps(bam_mplp_t iter) -{ - int i, r = 0; - for (i = 0; i < iter->n; ++i) - r |= bam_plp_init_overlaps(iter->iter[i]); - return r == 0 ? 0 : -1; -} - -void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt) -{ - int i; - for (i = 0; i < iter->n; ++i) - iter->iter[i]->maxcnt = maxcnt; -} - -void bam_mplp_destroy(bam_mplp_t iter) -{ - int i; - for (i = 0; i < iter->n; ++i) bam_plp_destroy(iter->iter[i]); - free(iter->iter); free(iter->pos); free(iter->tid); - free(iter->n_plp); free(iter->plp); - free(iter); -} - -int bam_mplp64_auto(bam_mplp_t iter, int *_tid, hts_pos_t *_pos, int *n_plp, const bam_pileup1_t **plp) -{ - int i, ret = 0; - hts_pos_t new_min_pos = HTS_POS_MAX; - uint32_t new_min_tid = (uint32_t)-1; - for (i = 0; i < iter->n; ++i) { - if (iter->pos[i] == iter->min_pos && iter->tid[i] == iter->min_tid) { - int tid; - hts_pos_t pos; - iter->plp[i] = bam_plp64_auto(iter->iter[i], &tid, &pos, &iter->n_plp[i]); - if ( iter->iter[i]->error ) return -1; - if (iter->plp[i]) { - iter->tid[i] = tid; - iter->pos[i] = pos; - } else { - iter->tid[i] = 0; - iter->pos[i] = 0; - } - } - if (iter->plp[i]) { - if (iter->tid[i] < new_min_tid) { - new_min_tid = iter->tid[i]; - new_min_pos = iter->pos[i]; - } else if (iter->tid[i] == new_min_tid && iter->pos[i] < new_min_pos) { - new_min_pos = iter->pos[i]; - } - } - } - iter->min_pos = new_min_pos; - iter->min_tid = new_min_tid; - if (new_min_pos == HTS_POS_MAX) return 0; - *_tid = new_min_tid; *_pos = new_min_pos; - for (i = 0; i < iter->n; ++i) { - if (iter->pos[i] == iter->min_pos && iter->tid[i] == iter->min_tid) { - n_plp[i] = iter->n_plp[i], plp[i] = iter->plp[i]; - ++ret; - } else n_plp[i] = 0, plp[i] = 0; - } - return ret; -} - -int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp) -{ - hts_pos_t pos64 = 0; - int ret = bam_mplp64_auto(iter, _tid, &pos64, n_plp, plp); - if (ret >= 0) { - if (pos64 < INT_MAX) { - *_pos = pos64; - } else { - hts_log_error("Position %"PRId64" too large", pos64); - *_pos = INT_MAX; - return -1; - } - } - return ret; -} - -void bam_mplp_reset(bam_mplp_t iter) -{ - int i; - iter->min_pos = HTS_POS_MAX; - iter->min_tid = (uint32_t)-1; - for (i = 0; i < iter->n; ++i) { - bam_plp_reset(iter->iter[i]); - iter->pos[i] = HTS_POS_MAX; - iter->tid[i] = (uint32_t)-1; - iter->n_plp[i] = 0; - iter->plp[i] = NULL; - } -} - -void bam_mplp_constructor(bam_mplp_t iter, - int (*func)(void *arg, const bam1_t *b, bam_pileup_cd *cd)) { - int i; - for (i = 0; i < iter->n; ++i) - bam_plp_constructor(iter->iter[i], func); -} - -void bam_mplp_destructor(bam_mplp_t iter, - int (*func)(void *arg, const bam1_t *b, bam_pileup_cd *cd)) { - int i; - for (i = 0; i < iter->n; ++i) - bam_plp_destructor(iter->iter[i], func); -} - -#endif // ~!defined(BAM_NO_PILEUP) diff --git a/src/htslib-1.19.1/sam_internal.h b/src/htslib-1.19.1/sam_internal.h deleted file mode 100644 index b1fce9f..0000000 --- a/src/htslib-1.19.1/sam_internal.h +++ /dev/null @@ -1,105 +0,0 @@ -/* sam_internal.h -- internal functions; not part of the public API. - - Copyright (C) 2019-2020 Genome Research Ltd. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#ifndef HTSLIB_SAM_INTERNAL_H -#define HTSLIB_SAM_INTERNAL_H - -#include -#include -#include "htslib/sam.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Used internally in the SAM format multi-threading. -int sam_state_destroy(samFile *fp); -int sam_set_thread_pool(htsFile *fp, htsThreadPool *p); -int sam_set_threads(htsFile *fp, int nthreads); - -// Fastq state -int fastq_state_set(samFile *fp, enum hts_fmt_option opt, ...); -void fastq_state_destroy(samFile *fp); - -// bam1_t data (re)allocation -int sam_realloc_bam_data(bam1_t *b, size_t desired); - -static inline int realloc_bam_data(bam1_t *b, size_t desired) -{ - if (desired <= b->m_data) return 0; - return sam_realloc_bam_data(b, desired); -} - -static inline int possibly_expand_bam_data(bam1_t *b, size_t bytes) { - size_t new_len = (size_t) b->l_data + bytes; - - if (new_len > INT32_MAX || new_len < bytes) { // Too big or overflow - errno = ENOMEM; - return -1; - } - if (new_len <= b->m_data) return 0; - return sam_realloc_bam_data(b, new_len); -} - -/* - * Convert a nibble encoded BAM sequence to a string of bases. - * - * We do this 2 bp at a time for speed. Equiv to: - * - * for (i = 0; i < len; i++) - * seq[i] = seq_nt16_str[bam_seqi(nib, i)]; - */ -static inline void nibble2base(uint8_t *nib, char *seq, int len) { - static const char code2base[512] = - "===A=C=M=G=R=S=V=T=W=Y=H=K=D=B=N" - "A=AAACAMAGARASAVATAWAYAHAKADABAN" - "C=CACCCMCGCRCSCVCTCWCYCHCKCDCBCN" - "M=MAMCMMMGMRMSMVMTMWMYMHMKMDMBMN" - "G=GAGCGMGGGRGSGVGTGWGYGHGKGDGBGN" - "R=RARCRMRGRRRSRVRTRWRYRHRKRDRBRN" - "S=SASCSMSGSRSSSVSTSWSYSHSKSDSBSN" - "V=VAVCVMVGVRVSVVVTVWVYVHVKVDVBVN" - "T=TATCTMTGTRTSTVTTTWTYTHTKTDTBTN" - "W=WAWCWMWGWRWSWVWTWWWYWHWKWDWBWN" - "Y=YAYCYMYGYRYSYVYTYWYYYHYKYDYBYN" - "H=HAHCHMHGHRHSHVHTHWHYHHHKHDHBHN" - "K=KAKCKMKGKRKSKVKTKWKYKHKKKDKBKN" - "D=DADCDMDGDRDSDVDTDWDYDHDKDDDBDN" - "B=BABCBMBGBRBSBVBTBWBYBHBKBDBBBN" - "N=NANCNMNGNRNSNVNTNWNYNHNKNDNBNN"; - - int i, len2 = len/2; - seq[0] = 0; - - for (i = 0; i < len2; i++) - // Note size_t cast helps gcc optimiser. - memcpy(&seq[i*2], &code2base[(size_t)nib[i]*2], 2); - - if ((i *= 2) < len) - seq[i] = seq_nt16_str[bam_seqi(nib, i)]; -} - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.19.1/sam_mods.c b/src/htslib-1.19.1/sam_mods.c deleted file mode 100644 index fe8db85..0000000 --- a/src/htslib-1.19.1/sam_mods.c +++ /dev/null @@ -1,683 +0,0 @@ -/* sam_mods.c -- Base modification handling in SAM and BAM. - - Copyright (C) 2020-2023 Genome Research Ltd. - - Author: James Bonfield - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include "htslib/sam.h" -#include "textutils_internal.h" - -// --------------------------- -// Base Modification retrieval -// -// These operate by recording state in an opaque type, allocated and freed -// via the functions below. -// -// Initially we call bam_parse_basemod to process the tags and record the -// modifications in the state structure, and then functions such as -// bam_next_basemod can iterate over this cached state. - -/* Overview of API. - -We start by allocating an hts_base_mod_state and parsing the MM, ML and MN -tags into it. This has optional flags controlling how we report base -modifications in "explicit" coordinates. See below - - hts_base_mod_state *m = hts_base_mod_state_alloc(); - bam_parse_basemod2(b, m, HTS_MOD_REPORT_UNCHECKED); - // Or: bam_parse_basemod(b, m), which is equiv to flags==0 - //... do something ... - hts_base_mod_state_free(m); - -In the default implicit MM coordinate system, any location not -reported is implicitly assumed to contain no modification. We only -report the places we think are likely modified. - -Some tools however only look for base modifications in particular -contexts, eg CpG islands. Here we need to distinguish between -not-looked-for and looked-for-but-didn't-find. These calls have an -explicit coordinate system, where we only know information about the -coordinates explicitly listed and everything else is considered to be -unverified. - -By default we don't get reports on the other coordinates in an -explicit MM tag, but the HTS_MOD_REPORT_UNCHECKED flag will report -them (with quality HTS_MOD_UNCHECKED) meaning we can do consensus -modification analysis with accurate counting when dealing with a -mixture of explicit and implicit records. - - -We have different ways of processing the base modifications. We can -iterate either mod-by-mod or position-by-position, or we can simply -query a specific coordinate as may be done when processing a pileup. - -To check for base modifications as a specific location within a -sequence we can use bam_mods_at_qpos. This provides complete random -access within the MM string. However currently this is inefficiently -implemented so should only be used for occasional analysis or as a way -to start iterating at a specific location. It modifies the state -position, so after the first use we can then switch to -bam_mods_at_next_pos to iterate position by position from then on. - - hts_base_mod mods[10]; - int n = bam_mods_at_qpos(b, pos, m, mods, 10); - -For base by base, we have bam_mods_at_next_pos. This strictly starts -at the first base and reports entries one at a time. It's more -efficient than a loop repeatedly calling ...at-pos. - - hts_base_mod mods[10]; - int n = bam_mods_at_next_pos(b, m, mods, 10); - for (int i = 0; i < n; i++) { - // report mod i of n - } - -Iterating over modifications instead of coordinates is simpler and -more efficient as it skips reporting of unmodified bases. This is -done with bam_next_basemod. - - hts_base_mod mods[10]; - while ((n=bam_next_basemod(b, m, mods, 10, &pos)) > 0) { - for (j = 0; j < n; j++) { - // Report 'n'th mod at sequence position 'pos' - } - } - -There are also functions that query meta-data about the MM line rather -than per-site information. - -bam_mods_recorded returns an array of ints holding the +ve code ('m') -or -ve CHEBI numeric values. - - int ntypes, *types = bam_mods_recorded(m, &ntype); - -We can then query a specific modification type to get further -information on the strand it is operating on, whether it has implicit -or explicit coordinates, and what it's corresponding canonical base it -is (The "C" in "C+m"). bam_mods_query_type does this by code name, -while bam_mods_queryi does this by numeric i^{th} type (from 0 to ntype-1). - - bam_mods_query_type(m, 'c', &strand, &implicit, &canonical); - bam_mods_queryi(m, 2, &strand, &implicit, &canonical); - -*/ - -/* - * Base modification are stored in MM/Mm tags as defined as - * - * ::= | "" - * ::= - * - * ::= "A" | "C" | "G" | "T" | "N". - * - * ::= "+" | "-". - * - * ::= | - * ::= | - * ::= - * ::= - * - * ::= "," | ";" - * - * We do not allocate additional memory other than the fixed size - * state, thus we track up to 256 pointers to different locations - * within the MM and ML tags. Each pointer is for a distinct - * modification code (simple or ChEBI), meaning some may point to the - * same delta-list when multiple codes are combined together - * (e.g. "C+mh,1,5,18,3;"). This is the MM[] array. - * - * Each numeric in the delta-list is tracked in MMcount[], counted - * down until it hits zero in which case the next delta is fetched. - * - * ML array similarly holds the locations in the quality (ML) tag per - * type, but these are interleaved so C+mhfc,10,15 will have 4 types - * all pointing to the same delta position, but in ML we store - * Q(m0)Q(h0)Q(f0)Q(c0) followed by Q(m1)Q(h1)Q(f1)Q(c1). This ML - * also has MLstride indicating how many positions along ML to jump - * each time we consume a base. (4 in our above example, but usually 1 - * for the simple case). - * - * One complexity of the base modification system is that mods are - * always stored in the original DNA orientation. This is so that - * tools that may reverse-complement a sequence (eg "samtools fastq -T - * MM,ML") can pass through these modification tags irrespective of - * whether they have any knowledge of their internal workings. - * - * Because we don't wish to allocate extra memory, we cannot simply - * reverse the MM and ML tags. Sadly this means we have to manage the - * reverse complementing ourselves on-the-fly. - * For reversed reads we start at the right end of MM and no longer - * stop at the semicolon. Instead we use MMend[] array to mark the - * termination point. - */ -#define MAX_BASE_MOD 256 -struct hts_base_mod_state { - int type[MAX_BASE_MOD]; // char or minus-CHEBI - int canonical[MAX_BASE_MOD];// canonical base, as seqi (1,2,4,8,15) - char strand[MAX_BASE_MOD]; // strand of modification; + or - - int MMcount[MAX_BASE_MOD]; // no. canonical bases left until next mod - char *MM[MAX_BASE_MOD]; // next pos delta (string) - char *MMend[MAX_BASE_MOD]; // end of pos-delta string - uint8_t *ML[MAX_BASE_MOD]; // next qual - int MLstride[MAX_BASE_MOD]; // bytes between quals for this type - int implicit[MAX_BASE_MOD]; // treat unlisted positions as non-modified? - int seq_pos; // current position along sequence - int nmods; // used array size (0 to MAX_BASE_MOD-1). - uint32_t flags; // Bit-field: see HTS_MOD_REPORT_UNCHECKED -}; - -hts_base_mod_state *hts_base_mod_state_alloc(void) { - return calloc(1, sizeof(hts_base_mod_state)); -} - -void hts_base_mod_state_free(hts_base_mod_state *state) { - free(state); -} - -/* - * Count frequency of A, C, G, T and N canonical bases in the sequence - */ -static void seq_freq(const bam1_t *b, int freq[16]) { - int i; - - memset(freq, 0, 16*sizeof(*freq)); - uint8_t *seq = bam_get_seq(b); - for (i = 0; i < b->core.l_qseq; i++) - freq[bam_seqi(seq, i)]++; - freq[15] = b->core.l_qseq; // all bases count as N for base mods -} - -//0123456789ABCDEF -//=ACMGRSVTWYHKDBN aka seq_nt16_str[] -//=TGKCYSBAWRDMHVN comp1ement of seq_nt16_str -//084C2A6E195D3B7F -static int seqi_rc[] = { 0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15 }; - -/* - * Parse the MM and ML tags to populate the base mod state. - * This structure will have been previously allocated via - * hts_base_mod_state_alloc, but it does not need to be repeatedly - * freed and allocated for each new bam record. (Although obviously - * it requires a new call to this function.) - * - * Flags are copied into the state and used to control reporting functions. - * Currently the only flag is HTS_MOD_REPORT_UNCHECKED, to control whether - * explicit "C+m?" mods report quality HTS_MOD_UNCHECKED for the bases - * outside the explicitly reported region. - */ -int bam_parse_basemod2(const bam1_t *b, hts_base_mod_state *state, - uint32_t flags) { - // Reset position, else upcoming calls may fail on - // seq pos - length comparison - state->seq_pos = 0; - state->nmods = 0; - state->flags = flags; - - // Read MM and ML tags - uint8_t *mm = bam_aux_get(b, "MM"); - if (!mm) mm = bam_aux_get(b, "Mm"); - if (!mm) - return 0; - if (mm[0] != 'Z') { - hts_log_error("%s: MM tag is not of type Z", bam_get_qname(b)); - return -1; - } - - uint8_t *mi = bam_aux_get(b, "MN"); - if (mi && bam_aux2i(mi) != b->core.l_qseq) { - // bam_aux2i with set errno = EINVAL and return 0 if the tag - // isn't integer, but 0 will be a seq-length mismatch anyway so - // triggers an error here too. - hts_log_error("%s: MM/MN data length is incompatible with" - " SEQ length", bam_get_qname(b)); - return -1; - } - - uint8_t *ml = bam_aux_get(b, "ML"); - if (!ml) ml = bam_aux_get(b, "Ml"); - if (ml && (ml[0] != 'B' || ml[1] != 'C')) { - hts_log_error("%s: ML tag is not of type B,C", bam_get_qname(b)); - return -1; - } - uint8_t *ml_end = ml ? ml+6 + le_to_u32(ml+2) : NULL; - if (ml) ml += 6; - - // Aggregate freqs of ACGTN if reversed, to get final-delta (later) - int freq[16]; - if (b->core.flag & BAM_FREVERSE) - seq_freq(b, freq); - - char *cp = (char *)mm+1; - int mod_num = 0; - int implicit = 1; - while (*cp) { - for (; *cp; cp++) { - // cp should be [ACGTNU][+-]([a-zA-Z]+|[0-9]+)[.?]?(,\d+)*; - unsigned char btype = *cp++; - - if (btype != 'A' && btype != 'C' && - btype != 'G' && btype != 'T' && - btype != 'U' && btype != 'N') - return -1; - if (btype == 'U') btype = 'T'; - - btype = seq_nt16_table[btype]; - - // Strand - if (*cp != '+' && *cp != '-') - return -1; // malformed - char strand = *cp++; - - // List of modification types - char *ms = cp, *me; // mod code start and end - char *cp_end = NULL; - int chebi = 0; - if (isdigit_c(*cp)) { - chebi = strtol(cp, &cp_end, 10); - cp = cp_end; - ms = cp-1; - } else { - while (*cp && isalpha_c(*cp)) - cp++; - if (*cp == '\0') - return -1; - } - - me = cp; - - // Optional explicit vs implicit marker - implicit = 1; - if (*cp == '.') { - // default is implicit = 1; - cp++; - } else if (*cp == '?') { - implicit = 0; - cp++; - } else if (*cp != ',' && *cp != ';') { - // parse error - return -1; - } - - long delta; - int n = 0; // nth symbol in a multi-mod string - int stride = me-ms; - int ndelta = 0; - - if (b->core.flag & BAM_FREVERSE) { - // We process the sequence in left to right order, - // but delta is successive count of bases to skip - // counting right to left. This also means the number - // of bases to skip at left edge is unrecorded (as it's - // the remainder). - // - // To output mods in left to right, we step through the - // MM list in reverse and need to identify the left-end - // "remainder" delta. - int total_seq = 0; - for (;;) { - cp += (*cp == ','); - if (*cp == 0 || *cp == ';') - break; - - delta = strtol(cp, &cp_end, 10); - if (cp_end == cp) { - hts_log_error("%s: Hit end of MM tag. Missing " - "semicolon?", bam_get_qname(b)); - return -1; - } - - cp = cp_end; - total_seq += delta+1; - ndelta++; - } - delta = freq[seqi_rc[btype]] - total_seq; // remainder - } else { - delta = *cp == ',' - ? strtol(cp+1, &cp_end, 10) - : 0; - if (!cp_end) { - // empty list - delta = INT_MAX; - cp_end = cp+1; - } - } - // Now delta is first in list or computed remainder, - // and cp_end is either start or end of the MM list. - while (ms < me) { - state->type [mod_num] = chebi ? -chebi : *ms; - state->strand [mod_num] = (strand == '-'); - state->canonical[mod_num] = btype; - state->MLstride [mod_num] = stride; - state->implicit [mod_num] = implicit; - - if (delta < 0) { - hts_log_error("%s: MM tag refers to bases beyond sequence " - "length", bam_get_qname(b)); - return -1; - } - state->MMcount [mod_num] = delta; - if (b->core.flag & BAM_FREVERSE) { - state->MM [mod_num] = cp+1; - state->MMend[mod_num] = cp_end; - state->ML [mod_num] = ml ? ml+n +(ndelta-1)*stride: NULL; - } else { - state->MM [mod_num] = cp_end; - state->MMend[mod_num] = NULL; - state->ML [mod_num] = ml ? ml+n : NULL; - } - - if (++mod_num >= MAX_BASE_MOD) { - hts_log_error("%s: Too many base modification types", - bam_get_qname(b)); - return -1; - } - ms++; n++; - } - - // Skip modification deltas - if (ml) { - if (b->core.flag & BAM_FREVERSE) { - ml += ndelta*stride; - } else { - while (*cp && *cp != ';') { - if (*cp == ',') - ml+=stride; - cp++; - } - } - if (ml > ml_end) { - hts_log_error("%s: Insufficient number of entries in ML " - "tag", bam_get_qname(b)); - return -1; - } - } else { - // cp_end already known if FREVERSE - if (cp_end && (b->core.flag & BAM_FREVERSE)) - cp = cp_end; - else - while (*cp && *cp != ';') - cp++; - } - if (!*cp) { - hts_log_error("%s: Hit end of MM tag. Missing semicolon?", - bam_get_qname(b)); - return -1; - } - } - } - - state->nmods = mod_num; - - return 0; -} - -int bam_parse_basemod(const bam1_t *b, hts_base_mod_state *state) { - return bam_parse_basemod2(b, state, 0); -} - -/* - * Fills out mods[] with the base modifications found. - * Returns the number found (0 if none), which may be more than - * the size of n_mods if more were found than reported. - * Returns <= -1 on error. - * - * This always marches left to right along sequence, irrespective of - * reverse flag or modification strand. - */ -int bam_mods_at_next_pos(const bam1_t *b, hts_base_mod_state *state, - hts_base_mod *mods, int n_mods) { - if (b->core.flag & BAM_FREVERSE) { - if (state->seq_pos < 0) - return -1; - } else { - if (state->seq_pos >= b->core.l_qseq) - return -1; - } - - int i, j, n = 0; - unsigned char base = bam_seqi(bam_get_seq(b), state->seq_pos); - state->seq_pos++; - if (b->core.flag & BAM_FREVERSE) - base = seqi_rc[base]; - - for (i = 0; i < state->nmods; i++) { - int unchecked = 0; - if (state->canonical[i] != base && state->canonical[i] != 15/*N*/) - continue; - - if (state->MMcount[i]-- > 0) { - if (!state->implicit[i] && - (state->flags & HTS_MOD_REPORT_UNCHECKED)) - unchecked = 1; - else - continue; - } - - char *MMptr = state->MM[i]; - if (n < n_mods) { - mods[n].modified_base = state->type[i]; - mods[n].canonical_base = seq_nt16_str[state->canonical[i]]; - mods[n].strand = state->strand[i]; - mods[n].qual = unchecked - ? HTS_MOD_UNCHECKED - : (state->ML[i] ? *state->ML[i] : HTS_MOD_UNKNOWN); - } - n++; - - if (unchecked) - continue; - - if (state->ML[i]) - state->ML[i] += (b->core.flag & BAM_FREVERSE) - ? -state->MLstride[i] - : +state->MLstride[i]; - - if (b->core.flag & BAM_FREVERSE) { - // process MM list backwards - char *cp; - for (cp = state->MMend[i]-1; cp != state->MM[i]; cp--) - if (*cp == ',') - break; - state->MMend[i] = cp; - if (cp != state->MM[i]) - state->MMcount[i] = strtol(cp+1, NULL, 10); - else - state->MMcount[i] = INT_MAX; - } else { - if (*state->MM[i] == ',') - state->MMcount[i] = strtol(state->MM[i]+1, &state->MM[i], 10); - else - state->MMcount[i] = INT_MAX; - } - - // Multiple mods at the same coords. - for (j=i+1; j < state->nmods && state->MM[j] == MMptr; j++) { - if (n < n_mods) { - mods[n].modified_base = state->type[j]; - mods[n].canonical_base = seq_nt16_str[state->canonical[j]]; - mods[n].strand = state->strand[j]; - mods[n].qual = state->ML[j] ? *state->ML[j] : -1; - } - n++; - state->MMcount[j] = state->MMcount[i]; - state->MM[j] = state->MM[i]; - if (state->ML[j]) - state->ML[j] += (b->core.flag & BAM_FREVERSE) - ? -state->MLstride[j] - : +state->MLstride[j]; - } - i = j-1; - } - - return n; -} - -/* - * Return data at the next modified location. - * - * bam_mods_at_next_pos does quite a bit of work, so we don't want to - * repeatedly call it for every location until we find a mod. Instead - * we check how many base types we can consume before the next mod, - * and scan through the sequence looking for them. Once we're at that - * site, we defer back to bam_mods_at_next_pos for the return values. - */ -int bam_next_basemod(const bam1_t *b, hts_base_mod_state *state, - hts_base_mod *mods, int n_mods, int *pos) { - if (state->seq_pos >= b->core.l_qseq) - return 0; - - // Look through state->MMcount arrays to see when the next lowest is - // per base type; - int next[16], freq[16] = {0}, i; - memset(next, 0x7f, 16*sizeof(*next)); - const int unchecked = state->flags & HTS_MOD_REPORT_UNCHECKED; - if (b->core.flag & BAM_FREVERSE) { - for (i = 0; i < state->nmods; i++) { - if (unchecked && !state->implicit[i]) - next[seqi_rc[state->canonical[i]]] = 1; - else if (next[seqi_rc[state->canonical[i]]] > state->MMcount[i]) - next[seqi_rc[state->canonical[i]]] = state->MMcount[i]; - } - } else { - for (i = 0; i < state->nmods; i++) { - if (unchecked && !state->implicit[i]) - next[state->canonical[i]] = 0; - else if (next[state->canonical[i]] > state->MMcount[i]) - next[state->canonical[i]] = state->MMcount[i]; - } - } - - // Now step through the sequence counting off base types. - for (i = state->seq_pos; i < b->core.l_qseq; i++) { - unsigned char bc = bam_seqi(bam_get_seq(b), i); - if (next[bc] <= freq[bc] || next[15] <= freq[15]) - break; - freq[bc]++; - if (bc != 15) // N - freq[15]++; - } - *pos = state->seq_pos = i; - - if (i >= b->core.l_qseq) { - // Check for more MM elements than bases present. - for (i = 0; i < state->nmods; i++) { - if (!(b->core.flag & BAM_FREVERSE) && - state->MMcount[i] < 0x7f000000) { - hts_log_warning("MM tag refers to bases beyond sequence length"); - return -1; - } - } - return 0; - } - - if (b->core.flag & BAM_FREVERSE) { - for (i = 0; i < state->nmods; i++) - state->MMcount[i] -= freq[seqi_rc[state->canonical[i]]]; - } else { - for (i = 0; i < state->nmods; i++) - state->MMcount[i] -= freq[state->canonical[i]]; - } - - int r = bam_mods_at_next_pos(b, state, mods, n_mods); - return r > 0 ? r : 0; -} - -/* - * As per bam_mods_at_next_pos, but at a specific qpos >= the previous qpos. - * This can only march forwards along the read, but can do so by more than - * one base-pair. - * - * This makes it useful for calling from pileup iterators where qpos may - * start part way through a read for the first occurrence of that record. - */ -int bam_mods_at_qpos(const bam1_t *b, int qpos, hts_base_mod_state *state, - hts_base_mod *mods, int n_mods) { - // FIXME: for now this is inefficient in implementation. - int r = 0; - while (state->seq_pos <= qpos) - if ((r = bam_mods_at_next_pos(b, state, mods, n_mods)) < 0) - break; - - return r; -} - -/* - * Returns the list of base modification codes provided for this - * alignment record as an array of character codes (+ve) or ChEBI numbers - * (negative). - * - * Returns the array, with *ntype filled out with the size. - * The array returned should not be freed. - * It is a valid pointer until the state is freed using - * hts_base_mod_free(). - */ -int *bam_mods_recorded(hts_base_mod_state *state, int *ntype) { - *ntype = state->nmods; - return state->type; -} - -/* - * Returns data about a specific modification type for the alignment record. - * Code is either positive (eg 'm') or negative for ChEBI numbers. - * - * Return 0 on success or -1 if not found. The strand, implicit and canonical - * fields are filled out if passed in as non-NULL pointers. - */ -int bam_mods_query_type(hts_base_mod_state *state, int code, - int *strand, int *implicit, char *canonical) { - // Find code entry - int i; - for (i = 0; i < state->nmods; i++) { - if (state->type[i] == code) - break; - } - if (i == state->nmods) - return -1; - - // Return data - if (strand) *strand = state->strand[i]; - if (implicit) *implicit = state->implicit[i]; - if (canonical) *canonical = "?AC?G???T??????N"[state->canonical[i]]; - - return 0; -} - -/* - * Returns data about the ith modification type for the alignment record. - * - * Return 0 on success or -1 if not found. The strand, implicit and canonical - * fields are filled out if passed in as non-NULL pointers. - */ -int bam_mods_queryi(hts_base_mod_state *state, int i, - int *strand, int *implicit, char *canonical) { - if (i < 0 || i >= state->nmods) - return -1; - - // Return data - if (strand) *strand = state->strand[i]; - if (implicit) *implicit = state->implicit[i]; - if (canonical) *canonical = "?AC?G???T??????N"[state->canonical[i]]; - - return 0; -} diff --git a/src/htslib-1.19.1/samples/DEMO.md b/src/htslib-1.19.1/samples/DEMO.md deleted file mode 100644 index 9117928..0000000 --- a/src/htslib-1.19.1/samples/DEMO.md +++ /dev/null @@ -1,1437 +0,0 @@ -# HTS API - -## HTSLib APIs and samtools - -HTSLib is a C library implementation used to access and process the genome -sequence data. HTSLib implements multiple API interfaces, HTS API, VCF API and -SAM API. HTS API provides a framework for use by other APIs and applications, -implements bgzf compression, htscodecs and provides CRAM format support. VCF -APIs work with variant data in VCF and BCF format. - -SAM API works with sequence data of different formats, SAM / BAM / CRAM / -FASTA / FASTQ, and provides methods to do operations on the data. It uses -methods from HTS API. - -'samtools' is the utility used to read and modify sequence data. It uses SAM -APIs from HTSLib to work on the sequence data. - - -## About this document - -There are a number of demonstration utilities and their source code in -'samples' directory of HTSLib and this document gives the description of them -and the usage of API of HTSLib. The samples are for demonstration -purposes only and proper error handling is required for actual usage. This -document is based on HTSLib version 1.17. - -Updates to this document may be made along with later releases when required. - - -## The sample apps - -Flags - This application showcases the basic read of alignment files and flag -access. It reads and shows the count of read1 and read2 alignments. - -Split - This application showcases the basic read and write of alignment data. -It saves the read1 and read2 as separate files in given directory, one as sam -and other as bam. - -Split2 - This application showcases the output file format selection. It saves -the read1 and read2 as separate files in given directory, both as compressed -sam though the extensions are different. - -Cram - This application showcases the different way in which cram reference -data is used for cram output creation. - -Read_fast - This application showcases the fasta/fastq data read. - -Read_header - This application showcases the read and access of header data. -It can show all header line of given type, data of a given tag on a specific -header line or for all lines of given type. - -Read_ref - This application showcases the read and access of header data. -It shows all reference names which has length equal or greater to given input. - -Read_bam - This application showcases read of different alignment data fields. -It shows contents of each alignment. - -Read_aux - This application showcases read of specific auxiliary tag data in -alignment. It shows the data retrieved using 2 APIs, one as a string with tag -data and other as raw data alternatively. - -Dump_aux - This application showcases read of all auxiliary tag data one by one -in an alignment. It shows the data retrieved. - -Add_header - This application showcases the write of header lines to a file. -It adds header line of types, SQ, RG, PG and CO and writes to standard output. - -Remove_header - This application showcases removal of header line from a file. -It removes either all header lines of given type or one specific line of given -type with given unique identifier. Modified header is written on standard -output. - -Update_header - This application shows the update of header line fields, where -update is allowed. It takes the header line type, unique identifier for the -line, tag to be modified and the new value. Updated data is written on standard -output. - -Mod_bam - This application showcases the update of alignment data. It takes -alignment name, position of field to be modified and new value of it. -Modified data is written on standard output. - -Mod_aux - This application showcases the update of auxiliary data in alignment. -It takes alignment name, tag to be modified, its type and new value. Modified -data is written on standard output. - -Mod_aux_ba - This application showcases the update of auxiliary array data in -alignment. It adds count of ATCGN base as an array in auxiliary data, BA:I. -Modified data is written on standard output. - -Write_fast - This application showcases the fasta/fastq data write. It appends -a dummy data to given file. - -Index_write - This application showcases the creation of index along with -output creation. Based on file type and shift, it creates bai, csi or crai -files. - -Read_reg - This application showcases the usage of region specification in -alignment read. - -Read_multireg - This application showcases the usage of mulitple regionn -specification in alignment read. - -Pileup - This application showcases the pileup api, where all alignments -covering a reference position are accessed together. It displays the bases -covering each position on standard output. - -Mpileup - This application showcases the mpileup api, which supports multiple -input files for pileup and gives a side by side view of them in pileup format. -It displays the bases covering each position on standard output. - -Modstate - This application showcases the access of base modifications in -alignment. It shows the modifications present in an alignment and accesses them -using available APIs. There are 2 APIs and which one to be used can be selected -through input. - -Pileup_mod - This application showcases the base modification access in pileup -mode. It shows the pileup display with base modifications. - -Flags_field - This application showcases the read of selected fields alone, -reducing the overhead / increasing the performance. It reads the flag field -alone and shows the count of read1 and read2. This has impact only on CRAM -files. - -Split_thread1 - This application showcases the use of threads in file handling. -It saves the read1 and read2 as separate files in given directory, one as sam -and other as bam. 2 threads are used for read and 1 each dedicated for each -output file. - -Split_thread2 - This application showcases the use of thread pool in file -handling. It saves the read1 and read2 as separate files in given directory, -one as sam and other as bam. A pool of 4 threads is created and shared for both -read and write. - - -## Building the sample apps - -The samples expect the HTSLib is installed, libraries and header file path are -part of the PATH environment variable. If not, these paths need to be explicitly -passed during the build time. - -Gcc and compatible compilers can be used to build the samples. - -These applications can be linked statically or dynamically to HTSLib. -For static linking, along with htslib other libraries and/or headers required -to build are, math, pthread, curl, lzma, z and bz2 libraries. - -A makefile is available along with source files which links statically to -htslib. To use dynamic linking, update the makefile's 'LDFLAGS' and 'rpath' -path. The 'rpath' path to be set as the path to lib directory of htslib -installation. - - -## Usage of HTS APIs -### Sequence data file access for read - -The sequence data file for read may be opened using the sam_open method. It -opens the file and returns samFile (htsFile) pointer on success or NULL on -failure. The input can be path to a file in disk, network, cloud or '-' -designating the standard input. - -SAM, BAM and CRAM file formats are supported and the input file format is -detected from the file content. - -Once done with the file, it needs to be closed with sam_close. - -Many times, header details would be required and can be read using -sam_hdr_read api. It returns sam_hdr_t pointer or NULL. The returned header -needs to be destroyed using sam_hdr_destroy when no longer required. - -The sequence data may be compressed or uncompressed on disk and on memory it -is read and kept as uncompressed BAM format. It can be read from a file using -sam_read1 api. samFile pointer, header and bam storage are to be passed as -argument and it returns 0 on success, -1 on end of file and < -1 in case of -errors. - -The bam storage has to be initialised using bam_init1 api before the call and -can be reused for successive reads. Once done, it needs to be destroyed using -bam_destroy1. The member field named core - bam1_core_t - in bam storage, -bam1_t, has the sequence data in an easily accessible way. Using the fields -and macros, data can easily be read from it. - - #include - - int main(int argc, char *argv[]) - { - ... - //initialize - if (!(bamdata = bam_init1())) { - ... - //open input files - r reading - if (!(infile = sam_open(inname, "r"))) { - ... - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - ... - //read data, check flags and update count - while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - if (bamdata->core.flag & BAM_FREAD1) { - cntread1++; - } - ... - //clean up - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - return ret; - } -Refer: flags_demo.c - -This shows the count of read1 and read2 alignments. - - ./flags /tmp/sample.sam.gz - -To read CRAM files, reference data is required and if it is not available, based -on configuration, library may try to download it from external repositories. - - -### Sequence data file access for write - -File access for write is similar to read with a few additional optional steps. - -The output file can be opened using sam_open api as in read, with "w" instead -of "r" as mode. This opens the file for writing and uses mode to select the -output file type. "w" alone denotes SAM, "wb" denotes BAM and "wc" denotes CRAM. - -Another way is to use sam_open_mode method, which sets the output file type and -compression based on the file name and explicit textual format specification. -This method expects a buffer to append type and compression flags. Usually a -buffer with standard file open flag is used, the buffer past the flag is passed -to the method to ensure existing flags and updates from this method are present -in the same buffer without being overwritten. This method will add more flags -indicating file type and compression based on name. If explicit format detail -given, then extension is ignored and the explicit specification is used. This -updated buffer can be used with sam_open to select the file format. - -sam_open_format method may also be used to open the file for output as more -information on the output file can be specified using this. Can use -mode buffer from sam_open_mode api or explicit format structure for this. - -The header data can be written using the sam_hdr_write api. When the header -data is copied to another variable and has different lifetime, it is good to -increase the reference count of the header using sam_hdr_incr_ref and -sam_hdr_destroy called as many times as required. - -The alignment data can be written using the sam_write1 api. It takes a samFile -pointer, header pointer and the alignment data. The header data is required to -set the reference name in the alignment. It returns -ve value on error. - - int main(int argc, char *argv[]) - { - ... - if (!(infile = sam_open(inname, "r"))) { - ... - outfile1 = sam_open(file1, "w"); //as SAM - outfile2 = sam_open(file2, "wb"); //as BAM - ... - if (!(in_samhdr = sam_hdr_read(infile))) { - ... - //write header - if ((sam_hdr_write(outfile1, in_samhdr) == -1) || - (sam_hdr_write(outfile2, in_samhdr) == -1)) { - ... - while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - if (bamdata->core.flag & BAM_FREAD1) { - if (sam_write1(outfile1, in_samhdr, bamdata) < 0) { - ... - } -Refer: split.c - -This creates 1.sam and 2.bam in /tmp/ containing read1 and read2 respectively. - - ./split /tmp/sample.sam.gz /tmp/ - -Below code excerpt shows sam_open_mode api usage. - - int main(int argc, char *argv[]) - { - ... - //set file open mode based on file name for 1st and as explicit for 2nd - if ((sam_open_mode(mode1+1, file1, NULL) == -1) || - (sam_open_mode(mode2+1, file2, "sam.gz") == -1)) { - ... - if (!(infile = sam_open(inname, "r"))) { - ... - //open output files - outfile1 = sam_open(file1, mode1); //as compressed SAM through sam_open - outfile2 = sam_open_format(file2, mode2, NULL); //as compressed SAM through sam_open_format - ... - } -Refer: split2.c - -This creates 1.sam.gz and 2.sam in /tmp/ both having compressed data. - - ./split2 /tmp/sample.sam.gz /tmp/ - -An htsFormat structure filled appropriately can also be used to specify output -file format while using sam_open_format api. - - -### CRAM writing - -CRAM files uses reference data and compresses alignment data. A CRAM file may -be created with external reference data file - most appropriate, with embedded -reference in it or with no reference data at all. It can also be created using -an autogenerated reference, based on consensus with-in the alignment data. -The reference detail can be set to an htsFormat structure using hts_parse_format -api and used with sam_open_format api to create appropriate CRAM file. - - ... - snprintf(reffmt1, size1, "cram,reference=%s", reffile); - snprintf(reffmt2, size2, "cram,embed_ref=1,reference=%s", reffile); - ... - if (hts_parse_format(&fmt1, reffmt1) == -1 || //using external reference - uses the M5/UR tags to get - reference data during read - hts_parse_format(&fmt2, reffmt2) == -1 || //embed the reference internally - hts_parse_format(&fmt3, "cram,embed_ref=2") == -1 || //embed autogenerated reference - hts_parse_format(&fmt4, "cram,no_ref=1") == -1) { //no reference data encoding at all - ... - outfile1 = sam_open_format(file1, "wc", &fmt1); outfile2 = sam_open_format(file2, "wc", &fmt2); - ... -Refer: cram.c - - -### FASTA/FASTQ data access - -FASTA/FASTQ files have the raw sequence data and the data can be read one by -one using sam_read1 or a selected range using a region. The data can be written -similar to alignment data using sam_write1 api. To write the file, format -can be set by updating mode buffer using sam_open_mode with file name -or explicit format text. This mode buffer can be used with sam_open or can be -used with sam_open_format with explicit format information in htsFormat -structure. - - ... - if (!(bamdata = bam_init1())) { - ... - if (!(infile = sam_open(inname, "r"))) { - ... - if (infile->format.format != fasta_format && infile->format.format != fastq_format) { - ... - if (!(in_samhdr = sam_hdr_read(infile))) { - ... - while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - printf("\nsequence: "); - for (c = 0; c < bamdata->core.l_qseq; ++c) { - printf("%c", seq_nt16_str[bam_seqi(bam_get_seq(bamdata), c)]); - } - if (infile->format.format == fastq_format) { - printf("\nquality: "); - for (c = 0; c < bamdata->core.l_qseq; ++c) { - printf("%c", bam_get_qual(bamdata)[c]); - ... -Refer: read_fast.c - - ... - char mode[4] = "a"; - ... - if (sam_open_mode(mode + 1, outname, NULL) < 0) { - ... - if (!(outfile = sam_open(outname, mode))) { - ... - if (bam_set1(bamdata, sizeof("test"), "test", BAM_FUNMAP, -1, -1, 0, 0, NULL, -1, -1, 0, 10, "AACTGACTGA", "1234567890", 0) - < 0) { - ... - if (sam_write1(outfile, out_samhdr, bamdata) < 0) { - printf("Failed to write data\n"); - ... -Refer: write_fast.c - - -### Header data read - -The header gives the version, reference details, read group, change history -and comments. These data are stored inside the sam_hdr_t. Each of these -entries, except comments, have their unique identifier and it is required to -access different fields of them. The api sam_hdr_count_lines gives the count -of the specified type of header line. The value of a unique identifier to a -specific type of header line can be retrieved with sam_hdr_line_name api. The -api sam_hdr_find_tag_id and sam_hdr_find_tag_pos can get the field data from a -header line using unique identifier values or using position. The full header -line can be retrieved using sam_hdr_find_line_pos or sam_hdr_line_id with -position and unique identifier values respectively. - - ... - if (!(in_samhdr = sam_hdr_read(infile))) { - ... - ret = sam_hdr_find_tag_id(in_samhdr, header, id, idval, tag, &data); - ... - ret = sam_hdr_find_line_id(in_samhdr, header, id, idval, &data); - ... - linecnt = sam_hdr_count_lines(in_samhdr, header); - ... - ret = sam_hdr_find_tag_pos(in_samhdr, header, c, tag, &data); - ... - ret = sam_hdr_find_line_pos(in_samhdr, header, c, &data); - ... -Refer: read_header.c - -This will show the VN tag's value from HD header. - - ./read_header /tmp/sample.sam.gz HD VN - -Shows the 2nd SQ line's LN field value. - - ./read_header /tmp/sample.sam.gz SQ SN T2 LN - -Below code excerpt shows the reference names which has length above given value. - - ... - linecnt = sam_hdr_count_lines(in_samhdr, "SQ"); //get reference count - ... - //iterate and check each reference's length - for (pos = 1, c = 0; c < linecnt; ++c) { - if ((ret = sam_hdr_find_tag_pos(in_samhdr, "SQ", c, "LN", &data) == -2)) { - ... - size = atoll(data.s); - if (size < minsize) { - //not required - continue; - } - if (!(id = sam_hdr_line_name(in_samhdr, "SQ", c))) { - //sam_hdr_find_tag_pos(in_samhdr, "SQ", c, "SN", &data) can also do the same! - ... - printf("%d,%s,%s\n", pos, id, data.s); - ... -Refer: read_refname.c - - -### Alignment data read - -The alignment / sequence data contains many fields. Mainly the read/query -name, flags indicating the properties of the read, reference sequence name, -position in reference to which it matches, quality of the read, CIGAR string -indicating the match status, position of mate / reverse strand, name of -reference sequence to which mate matches, the insert length, base sequence, -quality value of each base and auxiliary fields. - -Header data would be required to retrieve the reference names as alignment -contains the position of the reference in the header. - -A few of the data are directly visible in bam1_t and the rest are hidden -inside data member of bam1_t and can easily be retrieved using macros. -bam_get_qname gives the name of the read, sam_hdr_tid2name gives the reference -name. bam_get_cigar retrieves the cigar operation array, which can be decoded -using bam_cigar_oplen to get count of bases to which that operation applicable -and bam_cigar_opchr to get the cigar operation. bam_seqi retrieves the base -data at a given position in alignment and it can be converted to character by -indexing the seq_nt16_str array. - - ... - while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) - { - //QNAME FLAG RNAME POS MAPQ CIGAR RNEXT PNEXT TLEN SEQ QUAL [TAG:TYPE:VALUE] - printf("NAME: %s\n", bam_get_qname(bamdata)); //get the query name using the macro - flags = bam_flag2str(bamdata->core.flag); //flags as string - ... - tidname = sam_hdr_tid2name(in_samhdr, bamdata->core.tid); - ... - printf("MQUAL: %d\n", bamdata->core.qual); //map quality value - cigar = bam_get_cigar(bamdata); //retrieves the cigar data - for (i = 0; i < bamdata->core.n_cigar; ++i) { //no. of cigar data entries - printf("%d%c", bam_cigar_oplen(cigar[i]), bam_cigar_opchr(cigar[i])); //the macros gives the count of operation - and the symbol of operation for given cigar entry - } - printf("\nTLEN/ISIZE: %"PRIhts_pos"\n", bamdata->core.isize); - data = bam_get_seq(bamdata); - //get the sequence data - if (bamdata->core.l_qseq != bam_cigar2qlen(bamdata->core.n_cigar, cigar)) { //checks the length with CIGAR and query - ... - for (i = 0; i < bamdata->core.l_qseq ; ++i) { //sequence length - printf("%c", seq_nt16_str[bam_seqi(data, i)]); //retrieves the base from (internal compressed) sequence data - ... - printf("%c", bam_get_qual(bamdata)[i]+33); //retrives the quality value - ... -Refer: read_bam.c - -Shows the data from alignments. - - ./read_bam /tmp/sample.sam.gz - - -### Aux data read - -Auxiliary data gives extra information about the alignment. There can be a -number of such data and can be accessed by specifying required tag or by -iterating one by one through them once the alignment is read as bam1_t. The -auxiliary data are stored along with the variable length data in the data -field of bam1_t. There are macros defined to retrieve information about -auxiliary data from the data field of bam1_t. - -Data for a specific tag can be retrieved as a string or can be retrieved as raw -data. bam_aux_get_str retrieves as a string, with tag name, tag type and data. -bam_aux_get can get raw data and with bam_aux_type and bam_aux2A, bam_aux2f etc. -the raw data can be extracted. - -To iterate through all data, the start of aux data is retrieved using macro -bam_aux_first and successive ones using bam_aux_next. Macro bam_aux_tag gives -the tag of the aux field and bam_aux_type gives the information about type of -the aux field. - -Bam_aux2i, bam_aux2f, bam_aux2Z macros retrieve the aux data's value as -integer, float and string respectively. The integer value may be of different -precision / size and the bam_aux_type character indicates how to use the -value. The string/hex data are NULL terminated. - -For array data, bam_aux_type will return 'B' and bam_auxB_len gives the length -of the array. bam_aux_type with the next byte will give the type of data in -the array. bam_auxB2i, bam_auxB2f will give integer and float data from a -given position of the array. - - ... - while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - if (i % 2) { //use options alternatively to demonstrate both - //option 1 - get data as string with tag and type - if ((c = bam_aux_get_str(bamdata, tag, &sdata)) == 1) { - printf("%s\n",sdata.s); - ... - //option 2 - get raw data - if (!(data = bam_aux_get(bamdata, tag))) { - ... - if (printauxdata(stdout, bam_aux_type(data), -1, data) == EXIT_FAILURE) { - ... -Refer: read_aux.c - -Shows the MD aux tag from alignments. - - ./read_aux ../../samtools/test/mpileup/mpileup.1.bam MD - - ... - while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - data = bam_aux_first(bamdata); //get the first aux data - while (data) { - printf("%.2s:%c:", bam_aux_tag(data), NULL != strchr("cCsSiI", bam_aux_type(data)) ? 'i' : bam_aux_type(data)); - //macros gets the tag and type of aux data - //dump the data - if (printauxdata(stdout, bam_aux_type(data), -1, data) == EXIT_FAILURE) { - ... - data = bam_aux_next(bamdata, data); //get the next aux data - ... -Refer: dump_aux.c - -Shows all the tags from all alignments. - - ./dump_aux ../../samtools/test/mpileup/mpileup.1.bam - - -### Add/Remove/Update header - -There are specific types of data that can be part of header data. They have -a tag from HD, SQ, RG, PG and CO. Fully formatted header lines, separated by new -line, can be added with sam_hdr_add_lines api. A single header line can be added -using sam_hdr_add_line api where the header type, tag and value pair are passed -as arguments, terminated with a NULL argument. The PG header lines are special -that they have a kind of linkage to previous PG lines. This linkage can be auto -generated by using sam_hdr_add_pg api which sets the 'PP' field used in linkage. -sam_hdr_write api does the write of the header data to file. - - ... - //add SQ line with SN as TR1 and TR2 - if (sam_hdr_add_lines(in_samhdr, &sq[0], 0)) { //length as 0 for NULL terminated data - ... - //add RG line with ID as RG1 - if (sam_hdr_add_line(in_samhdr, "RG", "ID", "RG1", "LB", "Test", "SM", "S1", NULL)) { - ... - //add pg line - if (sam_hdr_add_pg(in_samhdr, "add_header", "VN", "Test", "CL", data.s, NULL)) { //NULL is to indicate end of args - ... - if (sam_hdr_add_line(in_samhdr, "CO", "Test data", NULL)) { //NULL is to indicate end of args - ... - //write output - if (sam_hdr_write(outfile, in_samhdr) < 0) { - ... -Refer: add_header.c - -Not all type of header data can be removed but where it is possible, either a -specific header line can be removed or all of a header type can be removed. To -remove a specific line, header type, unique identifier field tag and its value -to be used. To remove all lines of a type, header type and unique identifier -field tag are to be used. - - ... - //remove specific line - if (sam_hdr_remove_line_id(in_samhdr, header, id, idval)) { - ... - //remove multiple lines of a header type - if (sam_hdr_remove_lines(in_samhdr, header, id, NULL)) { - ... - if (sam_hdr_write(outfile, in_samhdr) < 0) { - ... -Refer: rem_header.c - -Shows the file content after removing SQ line with SN 2. - ./rem_header ../../samtools/test/mpileup/mpileup.1.bam SQ 2 - -The unique identifier for the line needs to be found to update a field, though -not all types in the header may be modifiable. The api sam_hdr_update_line -takes the unique identifier for the header line type, its value, the field -which needs to be modified and the new value with which to modify it, followed -by a NULL. -e.g. To change LN field from 2000 to 2250 in SQ line with unique identifier SN -as 'chr1', sam_hdr_update_line( header, "SQ", "SN", "chr1", "LN", "2250", -NULL). To change PP field from ABC to DEF in PG line with ID APP.10, -sam_hdr_update_line( header, "PG", "ID", "APP.10", "PP", "DEF", NULL). - - ... - //update with new data - if (sam_hdr_update_line(in_samhdr, header, id, idval, tag, val, NULL) < 0) { - printf("Failed to update data\n"); - goto end; - } - ... -Refer: update_header.c - -Shows new sam file with 2nd SQ line having length as 38. - - ./update_header /tmp/sample.sam.gz SQ T1 LN 38 - - -### Update alignment data - -Many of the bam data fields may be updated by setting new value to appropriate -field in bam1_core_t structure and for a few, creating a new bam1_t record would -be easier than update of existing record. - - ... - while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) - { - ... - case 1:// QNAME - ret = bam_set_qname(bamdata, val); - break; - case 2:// FLAG - bamdata->core.flag = atol(val) & 0xFFFF; - break; - case 3:// RNAME - case 7:// RNEXT - if ((ret = sam_hdr_name2tid(in_samhdr, val)) < 0) { - ... - if (field == 3) { - //reference - bamdata->core.tid = ret; - } - else { - //mate reference - bamdata->core.mtid = ret; - } - break; - case 4:// POS - bamdata->core.pos = atoll(val); - break; - case 5:// MAPQ - bamdata->core.qual = atoi(val) & 0x0FF; - break; - case 6:// CIGAR - { - ... - //get cigar array and set all data in new bam record - if ((ncigar = sam_parse_cigar(val, NULL, &cigar, &size)) < 0) { - ... - if (bam_set1(newbam, bamdata->core.l_qname, bam_get_qname(bamdata), bamdata->core.flag, bamdata->core.tid, - bamdata->core.pos, bamdata->core.qual, ncigar, cigar, bamdata->core.mtid, bamdata->core.mpos, - bamdata->core.isize, bamdata->core.l_qseq, (const char*)bam_get_seq(bamdata), - (const char*)bam_get_qual(bamdata), bam_get_l_aux(bamdata)) < 0) { - ... - //correct sequence data as input is expected in ascii format and not as compressed inside bam! - memcpy(bam_get_seq(newbam), bam_get_seq(bamdata), (bamdata->core.l_qseq + 1) / 2); - //copy the aux data - memcpy(bam_get_aux(newbam), bam_get_aux(bamdata), bam_get_l_aux(bamdata)); - ... - break; - case 8:// PNEXT - bamdata->core.mpos = atoll(val); - break; - case 9:// TLEN - bamdata->core.isize = atoll(val); - break; - case 10:// SEQ - ... - for( c = 0; c < i; ++c) { - bam_set_seqi(bam_get_seq(bamdata), c, seq_nt16_table[(unsigned char)val[c]]); - } - break; - case 11:// QUAL - ... - for (c = 0; c < i; ++c) { - val[c] -= 33; //phred score from ascii value - } - memcpy(bam_get_qual(bamdata), val, i); - ... -Refer: mod_bam.c - -Shows data with RNAME modified to T2. - - ./mod_bam /tmp/sample.sam ITR1 3 T2 - -The auxiliary data in bam1_t structure can be modified using -bam_aux_update_float, bam_aux_update_int etc. apis. If the aux field is not -present at all, it can be appended using bam_aux_append. - - ... - //matched to qname, update aux - if (!(data = bam_aux_get(bamdata, tag))) { - //tag not present append - ... - if (bam_aux_append(bamdata, tag, type, length, (const uint8_t*)val)) { - ... - else { - char auxtype = bam_aux_type(data); - //update the tag with newer value - switch (type) { - case 'f': - case 'd': - ... - if (bam_aux_update_float(bamdata, tag, atof(val))) { - ... - case 'C': - case 'S': - case 'I': - ... - if (bam_aux_update_int(bamdata, tag, atoll(val))) { - ... - case 'Z': - ... - if (bam_aux_update_str(bamdata, tag, length, val)) { - ... - case 'A': - ... - //update the char data directly on buffer - *(data+1) = val[0]; - ... -Refer: mod_aux.c - -Shows the given record's MD tag set to Test. - - ./mod_aux samtools/test/mpileup/mpileup.1.bam ERR013140.6157908 MD Z Test - -The array aux fields can be updated using bam_aux_update_array api. - - ... - if (bam_aux_update_array(bamdata, "BA", 'I', sizeof(cnt)/sizeof(cnt[0]), cnt)) { - ... -Refer: mod_aux_ba.c - -Shows the records updated with an array of integers, containing count of ACGT -and N in that order. - - ./mod_aux_ba samtools/test/mpileup/mpileup.1.bam - - -### Create an index - -Indexes help to read data faster without iterating sequentially through the -file. Indexes contain the position information about alignments and that they -can be read easily. There are different type of indices, BAI, CSI, CRAI, TBI, -FAI etc. and are usually used with iterators. - -Indexing of plain/textual files are not supported, compressed SAM&FASTA/Q, BAM, -and CRAM files can be indexed. CRAM files are indexed as .crai and the other two -can be indexed as .bai or .csi files. Each of these types have different -internal representations of the index information. Bai uses a fixed -configuration values where as csi has them dynamically updated based on the -alignment data. - -Indexes can be created either with save of alignment data or explicitly by -read of existing alignment file. - -To create index along with alignment write, the sam_idx_init api need to be -invoked before the start of alignment data write. This api takes the output -samFile pointer, header pointer, minimum shift and index file path. For BAI -index, the min shift has to be 0. - -At the end of write, sam_idx_save api need to be invoked to save the index. - - //write header - if (sam_hdr_write(outfile, in_samhdr)) { - ... - // initialize indexing, before start of write - if (sam_idx_init(outfile, in_samhdr, size, fileidx)) { - ... - if (sam_write1(outfile, in_samhdr, bamdata) < 0) { - ... - if (sam_idx_save(outfile)) { - ... -Refer:index_write.c - -Creates mpileup.1.bam and mpileup.1.bam.bai in /tmp/. - - ./idx_on_write ../../samtools/test/mpileup/mpileup.1.bam 0 /tmp/ - -To create index explicitly on an existing alignment data file, the -sam_index_build api or its alike can be used. sam_index_build takes the -alignment file path, min shift for the index and creates the index file in -same path. The output name will be based on the alignment file format and min -shift passed. - -The sam_index_build2 api takes the index file path as well and gives more -control than the previous one. The sam_index_build3 api provides an option to -configure the number of threads in index creation. - - -### Read with iterators - -Index file helps to read required data without sequentially accessing the file -and are required to use iterators. The interested reference, start and end -position etc. are required to read data with iterators. With index and these -information, an iterator is created and relevant alignments can be accessed by -iterating it. - -The api sam_index_load and the like does the index loading. It takes input -samFile pointer and file path. It loads the index file based on the input file -name, from the same path and with implicit index file extension - cram file -with .crai and others with .bai. The sam_index_load2 api accepts explicit path -to index file, which allows loading it from a different location and explicit -extensions. The sam_index_load3 api supports download/save of the index -locally from a remote location. These apis returns NULL on failure and index -pointer on success. - -The index file path can be appended to alignment file path and used as well. -In this case the paths are expected to be separated by '##idx##'. - -The sam_iter_queryi or sam_iter_querys apis may be used to create an iterator -and sam_itr_next api does the alignment data retrieval. Along with retrieval -of current data, it advances the iterator to next relevant data. The -sam_iter_queryi takes the interested positions as numeric values and -sam_iter_querys takes the interested position as a string. - -With sam_iter_queryi, the reference id can be the 0 based index of reference -data, -2 for unmapped alignments, -3 to start read from beginning of file, -4 -to continue from current position, -5 to return nothing. Based on the -reference id given, alignment covering the given start and end positions will -be read with sam_iter_next api. - -With sam_iter_querys, the reference sequence is identified with the name and -interested positions can be described with start and end separated by '-' as -string. When sequence is identified as '.', it begins from the start of file -and when it is '*', unmapped alignments are read. Reference with [:], -:S, :S-E, :-E retrieves all data, all data covering position -S onwards, all data covering position S to E, all data covering upto position -E of reference with ID respectively on read using sam_iter_next. - -The index and iterator created are to be destroyed once the need is over. -sam_itr_destroy and hts_idx_destroy apis does this. - - ... - //load index file - if (!(idx = sam_index_load2(infile, inname, idxfile))) { - ... - //create iterator - if (!(iter = sam_itr_querys(idx, in_samhdr, region))) { - ... - //read using iterator - while ((c = sam_itr_next(infile, iter, bamdata)) >= 0) { - ... - if (iter) { - sam_itr_destroy(iter); - } - if (idx) { - hts_idx_destroy(idx); - ... -Refer:index_reg_read.c - -With sample.sam, region as \* will show alignments with name UNMAP2 and UNMAP3 - - ./read_reg /tmp/sample.sam.gz \* - -With region as \., it shows all alignments - - ./read_reg /tmp/sample.sam.gz \. - -With region as T1:1-4, start 1 and end 4 it shows nothing and with T1:1-5 it -shows alignment with name ITR1. - - ./read_reg /tmp/sample.sam.gz T1:1-5 - -With region as T2:30-100, it shows alignment with name ITR2M which refers the -reference data T2. - - ./read_reg /tmp/sample.sam.gz T2:30-100 - - -Multiple interested regions can be specified for read using sam_itr_regarray. -It takes index path, header, count of regions and region descriptions as array -of char array / string. This array passed need to be released by the user -itself. - - ... - //load index file, assume it to be present in same location - if (!(idx = sam_index_load(infile, inname))) { - ... - //create iterator - if (!(iter = sam_itr_regarray(idx, in_samhdr, regions, regcnt))) { - ... - if (regions) { - //can be freed as it is no longer required - free(regions); - regions = NULL; - } - //get required area - while ((c = sam_itr_multi_next(infile, iter, bamdata) >= 0)) { - ... -Refer:index_multireg_read.c - -With compressed sample.sam and 2 regions from reference T1 (30 to 32) and 1 -region from T2 (34 onwards), alignments with name A1, B1, A2 and ITR2M would -be shown. - - ./read_multireg /tmp/sample.sam.gz 2 T1:30-32,T2:34 - -To use numeric indices instead of textual regions, sam_itr_regions can be used. -It takes index file path, header, count of regions and an array of region -description (hts_reglist_t*), which has the start end positions as numerals. - -The index and iterators are to be destroyed using the sam_itr_destroy and -hts_idx_destroy. The hts_reglist_t* array passed is destroyed by the library -on iterator destroy. The regions array (array of char array/string) needs to be -destroyed by the user itself. - - -### Pileup and MPileup - -Pileup shows the transposed view of the SAM alignment data, i.e. it shows the -the reference positions and bases which cover that position through different -reads side by side. MPileup facilitates the piling up of multiple sam files -against each other and same reference at the same time. - -Mpileup has replaced the pileup. The input expects the data to be sorted by -position. - -Pileup needs to be initialized with bam_pileup_init method which takes pointer -to a method, which will be called by pileup to read data from required files, -and pointer to data which might be required for this read method to do the -read operation. It returns a pointer to the pileup iterator. - -User can specify methods which need to be invoked during the load and unload -of an alignment, like constructor and destructor of objects. -Bam_plp_constructor and bam_plp_destructor methods does the setup of -these methods in the pileup iterator. During invocation of these methods, the -pointer to data passed in the initialization is passed as well. If user want -to do any custom status handling or actions during load or unload, it can be -done in these methods. Alignment specific data can be created and stored in -an argument passed to the constructor and the same will be accessible during -pileup status return. The same will be accessible during destructor as well -where any deallocation can be made. - -User is expected to invoke bam_plp_auto api to get the pileup status. It -returns the pileup status or NULL on end. During this all alignments are read -one by one, using the method given in initialization for data read, until one -for a new reference is found or all alignment covering a position is read. On -such condition, the pileup status is returned and the same continuous on next -bam_plp_auto call. The pileup status returned is an array for all positions -for which the processing is completed. Along with the result, the reference -index, position in reference data and number of alignments which covers this -position are passed. User can iterate the result array and get bases from each -alignment which covers the given reference position. The alignment specific -custom data which were created in constructor function will also be available -in the result. - -The bam_plp_auto api invokes the data read method to load an alignment and the -constructor method is invoked during the load. Once the end of alignment is -passed, it is removed from the processing and destructor method is invoked, -that user could do deallocations and custom actions as in load during this -time. The custom data passed during the initialization is passed to the -constructor and destructor methods during invocation. - -Once the forward and reverse strands are identified, the better of the quality -is identified and used. Both reads are required for this and hence reads are -cached until its mate is read. The maximum number of reads that can be cached -is controlled by bam_plp_set_maxcnt. Reads covering a position are cached and -as soon as mate is found, quality is adjusted and is removed from cache. Reads -above the cache limit are discarded. - -Once done, the pileup iterator to be discarded by sam_plp_destroy api. - - ... - if (!(plpiter = bam_plp_init(readdata, &conf))) { - ... - //set constructor destructor callbacks - bam_plp_constructor(plpiter, plpconstructor); - bam_plp_destructor(plpiter, plpdestructor); - - while ((plp = bam_plp_auto(plpiter, &tid, &refpos, &n))) { - printf("%d\t%d\t", tid+1, refpos+1); - for (j = 0; j < n; ++j) { - //doesnt detect succeeding insertion and deletion together here, only insertion is identified - //deletion is detected in plp->is_del as and when pos reaches the position - //if detection ahead is required, use bam_plp_insertion here which gives deletion length along with insertion - if (plp[j].is_del || plp[j].is_refskip) { - printf("*"); - continue; - } - //start and end are displayed in UPPER and rest on LOWER - printf("%c", plp[j].is_head ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]) : - (plp[j].is_tail ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]) : - tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]))); - if (plp[j].indel > 0) { - //insertions, anyway not start or end - printf("+%d", plp[j].indel); - for (k = 0; k < plp[j].indel; ++k) { - printf("%c", tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos + k + 1)])); - } - } - else if (plp[j].indel < 0) { - printf("%d", plp[j].indel); - for (k = 0; k < -plp[j].indel; ++k) { - printf("?"); - } - ... - if (plpiter) { - bam_plp_destroy(plpiter); - ... -Refer:pileup.c - -The read method may use a simple read or it could be an advanced read using -indices, iterators and region specifications based on the need. The constructor -method may create any custom data and store it in the pointer passed to it. The -same need to be released by use on destructor method. - -MPileup works same as the pileup and supports multiple inputs against the same -reference, giving side by side view of reference and alignments from different -inputs. - -MPileup needs to be initialized with bam_mpileup_init method which takes -pointer to a method, which will be called by pileup to read data from required -files, and an array of pointer to data which might be required for this read -method to do the read operation. It returns a pointer to the mpileup iterator. - -User can specify methods which need to be invoked during the load and unload -of an alignment, like constructor and destructor of objects. -bam_mplp_constructor and bam_mplp_destructor methods does the setup -of these methods in the pileup iterator. During invocation of these methods, -the pointer to data passed in the initialization is passed as well. If user -want to do any custom status handling or actions during load or unload, it can -be done on these methods. Alignment specific data can be created and -stored in the custom data pointer and the same will be accessible during -return of pileup status. The same will be accessible during destructor as well -where any deallocation can be made. - -User is expected to invoke bam_mplp_auto api to get the pileup status. It -returns the pileup status. During this all alignments are read one by one, -using the method given in initialization for data read, until one for a new -reference is found or all alignment covering a position is read. On such -condition, the pileup status is returned and the same continuous on next -bam_mplp_auto call. - -The pileup status is returned through a parameter in the method itself, is an -array for all inputs, each containing array for positions on which the -processing is completed. Along with the result, the reference index, position -in reference data and number of alignments which covers this position are -passed. User can iterate the result array and get bases from each alignment -which covers the given reference position. The alignment specific custom data -which were created in constructor function will also be available in the -result. - -Once the forward and reverse strands are identified, the better of the quality -is identified and used. Both reads are required for this and hence reads are -cached until its mate is read. The maximum number of reads that can be cached -is controlled by bam_mplp_set_maxcnt. Reads covering a position are cached and -as soon as mate is found, quality is adjusted and is removed from cache. Reads -above the cache limit are discarded. - -Once done, the pileup iterator to be discarded by sam_mplp_destroy api. - - ... - if (!(mplpiter = bam_mplp_init(argc - 1, readdata, (void**) conf))) { - ... - //set constructor destructor callbacks - bam_mplp_constructor(mplpiter, plpconstructor); - bam_mplp_destructor(mplpiter, plpdestructor); - - while (bam_mplp64_auto(mplpiter, &tid, &refpos, depth, plp) > 0) { - printf("%d\t%"PRIhts_pos"\t", tid+1, refpos+1); - - for (input = 0; input < argc - 1; ++input) { - for (dpt = 0; dpt < depth[input]; ++dpt) { - if (plp[input][dpt].is_del || plp[input][dpt].is_refskip) { - printf("*"); - continue; - } - //start and end are displayed in UPPER and rest on LOWER - printf("%c", plp[input][dpt].is_head ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[input][dpt].b), - plp[input][dpt].qpos)]) : (plp[input]->is_tail ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[input][dpt].b), - plp[input][dpt].qpos)]) : tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[input][dpt].b), - plp[input][dpt].qpos)]))); - if (plp[input][dpt].indel > 0) { - //insertions, anyway not start or end - printf("+%d", plp[input][dpt].indel); - for (k = 0; k < plp[input][dpt].indel; ++k) { - printf("%c", tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[input][dpt].b), - plp[input][dpt].qpos + k + 1)])); - } - } - else if (plp[input][dpt].indel < 0) { - printf("%d", plp[input][dpt].indel); - for (k = 0; k < -plp[input][dpt].indel; ++k) { - printf("?"); - ... - if (mplpiter) { - bam_mplp_destroy(mplpiter); - } - ... - if (plp) { - free(plp); - ... -Refer:mpileup.c - -This sample takes multiple sam files and shows the pileup of data side by side. - - ./mpileup /tmp/mp.bam /tmp/mp.sam - - -### Base modifications - -The alignment data may contain base modification information as well. This -gives the base, modifications found, orientation in which it was found and the -quality for the modification. The base modification can be identified using -hts_parse_basemod api. It stores the modification details on hts_base_mod_state -and this has to be initialized using hts_base_mod_state_alloc api. - -Once the modifications are identified, they can be accessed through different -ways. bam_mods_recorded api gives the modifications identified for an alignment. -Modifications can be queried for each base position iteratively using -bam_mods_at_next_pos api. Check the returned value with buffer size to see -whether the buffer is big enough to retrieve all modifications. -Instead of querying for each position, the next modified position can be -directly retrieved directly using bam_next_basemod api. An alignment can be -queried to have a specific modification using bam_mods_query_type api. At the -end of processing, the state need to be released using hts_base_mod_state_free -api. - - ... - if (!(ms = hts_base_mod_state_alloc())) { - ... - while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) - { - ... - if (bam_parse_basemod(bamdata, ms)) { - ... - bm = bam_mods_recorded(ms, &cnt); - for (k = 0; k < cnt; ++k) { - printf("%c", bm[k]); - } - printf("\n"); - hts_base_mod mod[5] = {0}; //for ATCGN - if (opt) { - //option 1 - for (; i < bamdata->core.l_qseq; ++i) { - if ((r = bam_mods_at_next_pos(bamdata, ms, mod, sizeof(mod)/sizeof(mod[0]))) <= -1) { - printf("Failed to get modifications\n"); - goto end; - } - else if (r > (sizeof(mod) / sizeof(mod[0]))) { - printf("More modifications than this app can handle, update the app\n"); - goto end; - } - else if (!r) { - //no modification at this pos - printf("%c", seq_nt16_str[bam_seqi(data, i)]); - } - //modifications - for (j = 0; j < r; ++j) { - printf("%c%c%c", mod[j].canonical_base, mod[j].strand ? '-' : '+', mod[j].modified_base); - ... - else { - //option 2 - while ((r = bam_next_basemod(bamdata, ms, mod, sizeof(mod)/sizeof(mod[0]), &pos)) >= 0) { - for (; i < bamdata->core.l_qseq && i < pos; ++i) { - printf("%c", seq_nt16_str[bam_seqi(data, i)]); - } - //modifications - for (j = 0; j < r; ++j) { - printf("%c%c%c", mod[j].canonical_base, mod[j].strand ? '-' : '+', mod[j].modified_base); - } - ... - //check last alignment's base modification - int strand = 0, impl = 0; - char canonical = 0, modification[] = "mhfcgebaon"; //possible modifications - printf("\n\nLast alignment has \n"); - for (k = 0; k < sizeof(modification) - 1; ++k) { //avoiding NUL termination - if (bam_mods_query_type(ms, modification[k], &strand, &impl, &canonical)) { - printf ("No modification of %c type\n", modification[k]); - } - else { - printf("%s strand has %c modified with %c, can %sassume unlisted as unmodified\n", strand ? "-/bottom/reverse" : - "+/top/forward", canonical, modification[k], impl?"" : "not " ); - } - } - ... - if (ms) { - hts_base_mod_state_free(ms); - ... -Refer:modstate.c - -The modification can be accessed in pileup mode as well. bam_mods_at_qpos gives -the modification at given pileup position. Insertion and deletion to the given -position with possible modification can be retrieved using bam_plp_insertion_mod -api. - - ... - int plpconstructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { - //when using cd, initialize and use as it will be reused after destructor - cd->p = hts_base_mod_state_alloc(); - //parse the bam data and gather modification data from MM tags - return (-1 == bam_parse_basemod(b, (hts_base_mod_state*)cd->p)) ? 1 : 0; - } - - int plpdestructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { - if (cd->p) { - hts_base_mod_state_free((hts_base_mod_state *)cd->p); - cd->p = NULL; - } - return 0; - } - - int main(int argc, char *argv[]) - { - ... - if (!(plpiter = bam_plp_init(readdata, &conf))) { - ... - //set constructor destructor callbacks - bam_plp_constructor(plpiter, plpconstructor); - bam_plp_destructor(plpiter, plpdestructor); - - while ((plp = bam_plp_auto(plpiter, &tid, &refpos, &depth))) { - memset(&mods, 0, sizeof(mods)); - printf("%d\t%d\t", tid+1, refpos+1); - - for (j = 0; j < depth; ++j) { - dellen = 0; - if (plp[j].is_del || plp[j].is_refskip) { - printf("*"); - continue; - } - /*invoke bam mods_mods_at_qpos before bam_plp_insertion_mod that the base modification - is retrieved before change in pileup pos thr' plp_insertion_mod call*/ - if ((modlen = bam_mods_at_qpos(plp[j].b, plp[j].qpos, plp[j].cd.p, mods, NMODS)) == -1) { - ... - //use plp_insertion/_mod to get insertion and del at the same position - if ((inslen = bam_plp_insertion_mod(&plp[j], (hts_base_mod_state*)plp[j].cd.p, &insdata, &dellen)) == -1) { - ... - //start and end are displayed in UPPER and rest on LOWER, only 1st modification considered - //base and modification - printf("%c%c%c", plp[j].is_head ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]) : - (plp[j].is_tail ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]) : - tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)])), - modlen > 0 ? mods[0].strand ? '-' : '+' : '\0', modlen > 0 ? mods[0].modified_base : '\0'); - //insertion and deletions - if (plp[j].indel > 0) { - //insertion - /*insertion data from plp_insertion_mod, note this shows the quality value as well - which is different from base and modification above;the lower case display is not attempted either*/ - printf("+%d%s", plp[j].indel, insdata.s); - //handle deletion if any - if (dellen) { - printf("-%d", dellen); - for (k = 0; k < dellen; ++k) { - printf("?"); - ... - else if (plp[j].indel < 0) { - //deletion - printf("%d", plp[j].indel); - for (k = 0; k < -plp[j].indel; ++k) { - printf("?"); - } - } - ... -Refer:pileup_mod.c - - -### Read selected fields - -At times the whole alignment data may not be of interest and it would be -better to read required fields alone from the alignment data. CRAM file format -supports such specific data read and HTSLib provides an option to use this. -This can improve the performance on read operation. - -The hts_set_opt method does the selection of specified fields. There are flags -indicating specific fields, like SAM_FLAG, SAM_SEQ, SAM_QNAME, in alignment -data and a combination of flags for the required fields can be passed with -CRAM_OPT_REQUIRED_FIELDS to this api. - - ... - //select required field alone, this is useful for CRAM alone - if (hts_set_opt(infile, CRAM_OPT_REQUIRED_FIELDS, SAM_FLAG) < 0) { - ... - //read header - in_samhdr = sam_hdr_read(infile); - ... - //read data, check flags and update count - while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - if (bamdata->core.flag & BAM_FREAD1) { - cntread1++; - ... -Refer: flags_htsopt_field.c - - -### Thread-pool to read / write - -The HTSLib api supports thread pooling for better performance. There are a few -ways in which this can be used. The pool can be made specific for a file or a -generic pool can be created and shared across multiple files. Another way to -use thread pool is to schedule tasks explicitly to queues which gets executed -using threads in pool. - -To have a thread pool specific for a file, hts_set_opt api can be used with the -file pointer, HTS_OPT_NTHREADS and the number of threads to use in the pool. -Closure of file releases the thread pool as well. To have a thread pool which -can be shared across different files, it needs to be initialized using -hts_tpool_init api, passing number of threads as argument. This thread pool can -be associated with a file using hts_set_opt api. The file pointer, -HTS_OPT_THREAD_POOL and the thread pool address are to be passed as arguments -to api. The thread pool has to be released with hts_tpool_destroy. - -Below excerpt shows file specific thread pool, - - ... - //create file specific threads - if (hts_set_opt(infile, HTS_OPT_NTHREADS, 2) < 0 || //2 thread specific for reading - hts_set_opt(outfile1, HTS_OPT_NTHREADS, 1) < 0 || //1 thread specific for sam write - hts_set_opt(outfile2, HTS_OPT_NTHREADS, 1) < 0) { //1 thread specific for bam write - printf("Failed to set thread options\n"); - goto end; - } -Refer: split_thread1.c - -Below excerpt shows thread pool shared across files, - - ... - //create a pool of 4 threads - if (!(tpool.pool = hts_tpool_init(4))) { - ... - //share the pool with all the 3 files - if (hts_set_opt(infile, HTS_OPT_THREAD_POOL, &tpool) < 0 || - hts_set_opt(outfile1, HTS_OPT_THREAD_POOL, &tpool) < 0 || - hts_set_opt(outfile2, HTS_OPT_THREAD_POOL, &tpool) < 0) { - ... - if (tpool.pool) { - hts_tpool_destroy(tpool.pool); - } - ... -Refer: split_thread2.c - - -## More Information - -### CRAM reference files - -The cram reference data is required for the read of sequence data in CRAM -format. The sequence data file may have it as embedded or as a reference to -the actual file. When it is a reference, it is downloaded locally, in the -cache directory for later usage. It will be stored in a directory structure -based on the MD5 checksum in the cache directory. - -Each chromosome in a reference file gets saved as a separate file with md5sum -as its path and name. The initial 4 numerals make the directory name and rest -as the file name (/<1st 2 of md5sum>/<2nd 2 of md5sum>/). - -The download would be attempted from standard location, EBI ENA -(https://www.ebi.ac.uk/ena). - - -### Bam1_t - -This structure holds the sequence data in BAM format. There are fixed and -variable size fields, basic and extended information on sequence -data. Variable size data and extended information are kept together in a -buffer, named data in bam1_t. Fields in the member named core, bam1_core_t, -and a few macros together support the storage and handling of the whole -sequence data. - -- core has a link to reference as a 0 based index in field tid. The mate / - reverse strand's link to reference is given by mtid. - -- Field pos and mpos gives the position in reference to which the sequence and - its mate / reverse strand match. - -- Field flag gives the properties of the given alignment. It shows the - alignment's orientation, mate status, read order etc. - -- Field qual gives the quality of the alignment read. - -- l_qname gives the length of the name of the alignment / read, l_extranul gives - the extra space used internally in the data field. - -- l_qseq gives the length of the alignment / read in the data field. - --- n_cigar gives the number of CIGAR operations for the given alignment. - -- isize gives the insert size of the read / alignment. - -The bases in sequence data are stored by compressing 2 bases together in a -byte. When the reverse flag is set, the base data is reversed and -complemented from the actual read (i.e. if the forward read is ACTG, the -reverse read to be CAGT; it will be stored in SAM format with reversed and -complemented format as ACTG with reverse flag set). - -Macros bam_get_qname, bam_get_seq, bam_get_qual, bam_get_aux, bam_get_l_aux, -bam_seqi etc access the data field and retrieve the required data. The aux -macros support the retrieval of auxiliary data from the data field. - - -### Sam_hdr_t - -This structure holds the header information. This holds the number of targets -/ SQ lines in the file, each one's length, name and reference count to this -structure. It also has this information in an internal data structure for -easier access of each field of this data. - -When this data is shared or assigned to another variable of a different scope -or purpose, the reference count needs to be incremented to ensure that it is -valid till the end of the variable's scope. sam_hdr_incr_ref and it needs to -be destroyed as many times with sam_hdr_destroy api. - - -### Index - -Indices need the data to be sorted by position. They can be of different -types with extension .bai, .csi or .tbi for compressed SAM/BAM files and .crai -for CRAM files. The index name can be passed along with the alignment file -itself by appending a specific character sequence. The apis can detect this -sequence and extract the index path. ##idx## is the sequence which separates -the file path and index path. - - -### Data files - -The data files can be a local file, a network file, a file accessible through -the web or in cloud storage like google and amazon. The data files can be -represented with URIs like file://, file://localhost/.., ,ftp://.., -gs+http[s].., s3+http[s]:// - diff --git a/src/htslib-1.19.1/samples/Makefile b/src/htslib-1.19.1/samples/Makefile deleted file mode 100644 index 40991d7..0000000 --- a/src/htslib-1.19.1/samples/Makefile +++ /dev/null @@ -1,106 +0,0 @@ -HTS_DIR = ../ -include $(HTS_DIR)/htslib_static.mk - -CC = gcc -CFLAGS = -Wall -g -O0 - -#to statically link to libhts -LDFLAGS = $(HTS_DIR)/libhts.a -L$(HTS_DIR) $(HTSLIB_static_LDFLAGS) $(HTSLIB_static_LIBS) - -#to dynamically link to libhts -#LDFLAGS = -L $(HTS_DIR) -lhts -Wl,-rpath, - -PRGS = flags split split2 cram read_fast read_header read_ref read_bam \ - read_aux dump_aux add_header rem_header update_header mod_bam mod_aux \ - mod_aux_ba write_fast idx_on_write read_reg read_multireg pileup \ - mpileup modstate pileup_mod flags_field split_t1 split_t2 - -all: $(PRGS) - -flags: - $(CC) $(CFLAGS) -I $(HTS_DIR) flags_demo.c -o $@ $(LDFLAGS) - -split: - $(CC) $(CFLAGS) -I $(HTS_DIR) split.c -o $@ $(LDFLAGS) - -split2: - $(CC) $(CFLAGS) -I $(HTS_DIR) split2.c -o $@ $(LDFLAGS) - -cram: - $(CC) $(CFLAGS) -I $(HTS_DIR) cram.c -o $@ $(LDFLAGS) - -read_fast: - $(CC) $(CFLAGS) -I $(HTS_DIR) read_fast.c -o $@ $(LDFLAGS) - -read_header: - $(CC) $(CFLAGS) -I $(HTS_DIR) read_header.c -o $@ $(LDFLAGS) - -read_ref: - $(CC) $(CFLAGS) -I $(HTS_DIR) read_refname.c -o $@ $(LDFLAGS) - -read_bam: - $(CC) $(CFLAGS) -I $(HTS_DIR) read_bam.c -o $@ $(LDFLAGS) - -read_aux: - $(CC) $(CFLAGS) -I $(HTS_DIR) read_aux.c -o $@ $(LDFLAGS) - -dump_aux: - $(CC) $(CFLAGS) -I $(HTS_DIR) dump_aux.c -o $@ $(LDFLAGS) - -add_header: - $(CC) $(CFLAGS) -I $(HTS_DIR) add_header.c -o $@ $(LDFLAGS) - -rem_header: - $(CC) $(CFLAGS) -I $(HTS_DIR) rem_header.c -o $@ $(LDFLAGS) - -update_header: - $(CC) $(CFLAGS) -I $(HTS_DIR) update_header.c -o $@ $(LDFLAGS) - -mod_bam: - $(CC) $(CFLAGS) -I $(HTS_DIR) mod_bam.c -o $@ $(LDFLAGS) - -mod_aux: - $(CC) $(CFLAGS) -I $(HTS_DIR) mod_aux.c -o $@ $(LDFLAGS) - -mod_aux_ba: - $(CC) $(CFLAGS) -I $(HTS_DIR) mod_aux_ba.c -o $@ $(LDFLAGS) - -write_fast: - $(CC) $(CFLAGS) -I $(HTS_DIR) write_fast.c -o $@ $(LDFLAGS) - -idx_on_write: - $(CC) $(CFLAGS) -I $(HTS_DIR) index_write.c -o $@ $(LDFLAGS) - -read_reg: - $(CC) $(CFLAGS) -I $(HTS_DIR) index_reg_read.c -o $@ $(LDFLAGS) - -read_multireg: - $(CC) $(CFLAGS) -I $(HTS_DIR) index_multireg_read.c -o $@ $(LDFLAGS) - -pileup: - $(CC) $(CFLAGS) -I $(HTS_DIR) pileup.c -o $@ $(LDFLAGS) - -mpileup: - $(CC) $(CFLAGS) -I $(HTS_DIR) mpileup.c -o $@ $(LDFLAGS) - -modstate: - $(CC) $(CFLAGS) -I $(HTS_DIR) modstate.c -o $@ $(LDFLAGS) - -pileup_mod: - $(CC) $(CFLAGS) -I $(HTS_DIR) pileup_mod.c -o $@ $(LDFLAGS) - -flags_field: - $(CC) $(CFLAGS) -I $(HTS_DIR) flags_htsopt_field.c -o $@ $(LDFLAGS) - -split_t1: - $(CC) $(CFLAGS) -I $(HTS_DIR) split_thread1.c -o $@ $(LDFLAGS) - -split_t2: - $(CC) $(CFLAGS) -I $(HTS_DIR) split_thread2.c -o $@ $(LDFLAGS) - -clean: - find . -name "*.o" | xargs rm -rf - find . -name "*.dSYM" | xargs rm -rf - rm $(PRGS) - - diff --git a/src/htslib-1.19.1/samples/add_header.c b/src/htslib-1.19.1/samples/add_header.c deleted file mode 100644 index d1a2fc1..0000000 --- a/src/htslib-1.19.1/samples/add_header.c +++ /dev/null @@ -1,128 +0,0 @@ -/* add_header.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: add_header infile\n\ -Adds new header lines of SQ, RG, PG and CO typs\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, sq[] = "@SQ\tSN:TR1\tLN:100\n@SQ\tSN:TR2\tLN:50"; - int c = 0, ret = EXIT_FAILURE; - samFile *infile = NULL, *outfile = NULL; - sam_hdr_t *in_samhdr = NULL; - kstring_t data = KS_INITIALIZE; - - //update_header infile header idval tag value - if (argc != 2) { - print_usage(stderr); - goto end; - } - inname = argv[1]; - - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - if (!(outfile = sam_open("-", "w"))) { //use stdout as the output file for ease of display of update - printf("Could not open stdout\n"); - goto end; - } - - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - - //dump command line arguments for PG line - for (c = 0; c < argc; ++c) { - kputs(argv[c], &data); - kputc(' ', &data); - } - - //add SQ line with SN as TR1 and TR2 - if (sam_hdr_add_lines(in_samhdr, &sq[0], 0)) { //length as 0 for NULL terminated data - printf("Failed to add SQ lines\n"); - goto end; - } - - //add RG line with ID as RG1 - if (sam_hdr_add_line(in_samhdr, "RG", "ID", "RG1", "LB", "Test", "SM", "S1", NULL)) { - printf("Failed to add RG line\n"); - goto end; - } - - //add pg line - if (sam_hdr_add_pg(in_samhdr, "add_header", "VN", "Test", "CL", data.s, NULL)) { //NULL is to indicate end of args - printf("Failed to add PG line\n"); - goto end; - } - - if (sam_hdr_add_line(in_samhdr, "CO", "Test data", NULL)) { //NULL is to indicate end of args - printf("Failed to add PG line\n"); - goto end; - } - - //write output - if (sam_hdr_write(outfile, in_samhdr) < 0) { - printf("Failed to write output\n"); - goto end; - } - ret = EXIT_SUCCESS; - //bam data write to follow.... -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (outfile) { - sam_close(outfile); - } - ks_free(&data); - return ret; -} diff --git a/src/htslib-1.19.1/samples/cram.c b/src/htslib-1.19.1/samples/cram.c deleted file mode 100644 index 5f55e65..0000000 --- a/src/htslib-1.19.1/samples/cram.c +++ /dev/null @@ -1,168 +0,0 @@ -/* cram.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: cram infile reffile outdir\n\ -Dumps the input file alignments in cram format in given directory\n\ -1.cram has external reference\n\ -2.cram has reference embedded\n\ -3.cram has autogenerated reference\n\ -4.cram has no reference data in it\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *outdir = NULL, *reffile = NULL; - char *file1 = NULL, *file2 = NULL, *file3 = NULL, *file4 = NULL, *reffmt1 = NULL, *reffmt2 = NULL; - int c = 0, ret = EXIT_FAILURE, size1 = 0, size2 = 0, size3 = 0; - samFile *infile = NULL, *outfile1 = NULL, *outfile2 = NULL, *outfile3 = NULL, *outfile4 = NULL; - sam_hdr_t *in_samhdr = NULL; - bam1_t *bamdata = NULL; - htsFormat fmt1 = {0}, fmt2 = {0}, fmt3 = {0}, fmt4 = {0}; - - //cram infile reffile outdir - if (argc != 4) { - print_usage(stdout); - goto end; - } - inname = argv[1]; - reffile = argv[2]; - outdir = argv[3]; - - //allocate space for option string and output file names - size1 = sizeof(char) * (strlen(reffile) + sizeof("cram,reference=") + 1); - size2 = sizeof(char) * (strlen(reffile) + sizeof("cram,embed_ref=1,reference=") + 1); - size3 = sizeof(char) * (strlen(outdir) + sizeof("/1.cram") + 1); - - reffmt1 = malloc(size1); reffmt2 = malloc(size2); - file1 = malloc(size3); file2 = malloc(size3); - file3 = malloc(size3); file4 = malloc(size3); - - if (!file1 || !file2 || !file3 || !file4 || !reffmt1 || !reffmt2) { - printf("Failed to create buffers\n"); - goto end; - } - - snprintf(reffmt1, size1, "cram,reference=%s", reffile); - snprintf(reffmt2, size2, "cram,embed_ref=1,reference=%s", reffile); - snprintf(file1, size3, "%s/1.cram", outdir); snprintf(file2, size3, "%s/2.cram", outdir); - snprintf(file3, size3, "%s/3.cram", outdir); snprintf(file4, size3, "%s/4.cram", outdir); - - if (hts_parse_format(&fmt1, reffmt1) == -1 || //using external reference - uses the M5/UR tags to get reference data during read - hts_parse_format(&fmt2, reffmt2) == -1 || //embed the reference internally - hts_parse_format(&fmt3, "cram,embed_ref=2") == -1 || //embed autogenerated reference - hts_parse_format(&fmt4, "cram,no_ref=1") == -1) { //no reference data encoding at all - printf("Failed to set output option\n"); - goto end; - } - - //bam data storage - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - //open input file - r reading - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - //open output files - w write as SAM, wb write as BAM, wc as CRAM (equivalent to fmt3) - outfile1 = sam_open_format(file1, "wc", &fmt1); outfile2 = sam_open_format(file2, "wc", &fmt2); - outfile3 = sam_open_format(file3, "wc", &fmt3); outfile4 = sam_open_format(file4, "wc", &fmt4); - if (!outfile1 || !outfile2 || !outfile3 || !outfile4) { - printf("Could not open output file\n"); - goto end; - } - - //read header, required to resolve the target names to proper ids - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - //write header - if ((sam_hdr_write(outfile1, in_samhdr) == -1) || (sam_hdr_write(outfile2, in_samhdr) == -1) || - (sam_hdr_write(outfile3, in_samhdr) == -1) || (sam_hdr_write(outfile4, in_samhdr) == -1)) { - printf("Failed to write header\n"); - goto end; - } - - //check flags and write - while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - if (sam_write1(outfile1, in_samhdr, bamdata) < 0 || - sam_write1(outfile2, in_samhdr, bamdata) < 0 || - sam_write1(outfile3, in_samhdr, bamdata) < 0 || - sam_write1(outfile4, in_samhdr, bamdata) < 0) { - printf("Failed to write output data\n"); - goto end; - } - } - if (-1 == c) { - //EOF - ret = EXIT_SUCCESS; - } - else { - printf("Error in reading data\n"); - } -end: -#define IF_OL(X,Y) if((X)) {(Y);} //if one liner - //cleanup - IF_OL(in_samhdr, sam_hdr_destroy(in_samhdr)); - IF_OL(infile, sam_close(infile)); - IF_OL(outfile1, sam_close(outfile1)); - IF_OL(outfile2, sam_close(outfile2)); - IF_OL(outfile3, sam_close(outfile3)); - IF_OL(outfile4, sam_close(outfile4)); - IF_OL(file1, free(file1)); - IF_OL(file2, free(file2)); - IF_OL(file3, free(file3)); - IF_OL(file4, free(file4)); - IF_OL(reffmt1, free(reffmt1)); - IF_OL(reffmt2, free(reffmt2)); - IF_OL(fmt1.specific, hts_opt_free(fmt1.specific)); - IF_OL(fmt2.specific, hts_opt_free(fmt2.specific)); - IF_OL(fmt3.specific, hts_opt_free(fmt3.specific)); - IF_OL(fmt4.specific, hts_opt_free(fmt4.specific)); - IF_OL(bamdata, bam_destroy1(bamdata)); - - return ret; -} diff --git a/src/htslib-1.19.1/samples/dump_aux.c b/src/htslib-1.19.1/samples/dump_aux.c deleted file mode 100644 index 49251fe..0000000 --- a/src/htslib-1.19.1/samples/dump_aux.c +++ /dev/null @@ -1,188 +0,0 @@ -/* dump_aux.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: dump_aux infile\n\ -Dump the aux tags from alignments\n"); -} - -/// printauxdata - prints aux data -/** @param fp - file to which it to be printed - stdout or null - * @param type - aux type - * @param idx - index in array, -1 when not an array type - * @param data - data - * recurses when the data is array type -returns 1 on failure 0 on success -*/ -int printauxdata(FILE *fp, char type, int32_t idx, const uint8_t *data) -{ - uint32_t auxBcnt = 0; - int i = 0; - char auxBType = 'Z'; - - //the tag is already queried and ensured to exist and the type is retrieved from the tag data, also iterated within index for arrays, so no error is expected here. - //when these apis are used explicitly, these error conditions needs to be handled based on return value and errno - switch(type) { - case 'A': - fprintf(fp, "%c", bam_aux2A(data)); //byte data - break; - case 'c': - fprintf(fp, "%d", (int8_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //signed 1 byte data; bam_auxB2i - from array or bam_aux2i - non array data - break; - case 'C': - fprintf(fp, "%u", (uint8_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //unsigned 1 byte data - break; - case 's': - fprintf(fp, "%d", (int16_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //signed 2 byte data - break; - case 'S': - fprintf(fp, "%u", (uint16_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //unsigned 2 byte data - break; - case 'i': - fprintf(fp, "%d", (int32_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //signed 4 byte data - break; - case 'I': - fprintf(fp, "%u", (uint32_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //unsigned 4 byte data - break; - case 'f': - case 'd': - fprintf(fp, "%g", (float)(idx > -1 ? bam_auxB2f(data, idx) : bam_aux2f(data))); //floating point data, 4 bytes - break; - case 'H': - case 'Z': - fprintf(fp, "%s", bam_aux2Z(data)); //array of char or hex data - break; - case 'B': //array of char/int/float - auxBcnt = bam_auxB_len(data); //length of array - auxBType = bam_aux_type(data + 1); //type of element in array - fprintf(fp, "%c", auxBType); - for (i = 0; i < auxBcnt; ++i) { //iterate the array - fprintf(fp, ","); - //calling recurssively with index to reuse a few lines - if (printauxdata(fp, auxBType, i, data) == EXIT_FAILURE) { - return EXIT_FAILURE; - } - } - break; - default: - printf("Invalid aux tag?\n"); - return EXIT_FAILURE; - break; - } - return EXIT_SUCCESS; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL; - int ret = EXIT_FAILURE; - sam_hdr_t *in_samhdr = NULL; - samFile *infile = NULL; - int ret_r = 0; - bam1_t *bamdata = NULL; - uint8_t *data = NULL; - - //dump_aux infile - if (argc != 2) { - print_usage(stderr); - goto end; - } - inname = argv[1]; - - if (!(bamdata = bam_init1())) { - printf("Failed to allocate data memory!\n"); - goto end; - } - - //open input file - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - - while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - errno = 0; - data = NULL; - data = bam_aux_first(bamdata); //get the first aux data - while (data) { - printf("%.2s:%c:", bam_aux_tag(data), NULL != strchr("cCsSiI", bam_aux_type(data)) ? 'i' : bam_aux_type(data)); //macros gets the tag and type of aux data - //dump the data - if (printauxdata(stdout, bam_aux_type(data), -1, data) == EXIT_FAILURE) { - printf("Failed to dump aux data\n"); - goto end; - } - else { - printf(" "); - } - data = bam_aux_next(bamdata, data); //get the next aux data - } - if (ENOENT != errno) { - printf("\nFailed to get aux data\n"); - goto end; - } - printf("\n"); - } - if (ret_r < -1) { - //read error - printf("Failed to read data\n"); - goto end; - } - - ret = EXIT_SUCCESS; -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - return ret; -} diff --git a/src/htslib-1.19.1/samples/flags_demo.c b/src/htslib-1.19.1/samples/flags_demo.c deleted file mode 100644 index e03fc6c..0000000 --- a/src/htslib-1.19.1/samples/flags_demo.c +++ /dev/null @@ -1,110 +0,0 @@ -/* flags_demo.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - show flags_demo usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: flags \n\ -Shows the count of read1 and read2 alignments\n\ -This shows basic reading and alignment flag access\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL; //input file name - int c = 0, ret = EXIT_FAILURE; - int64_t cntread1 = 0, cntread2 = 0; //count - samFile *infile = NULL; //sam file - sam_hdr_t *in_samhdr = NULL; //header of file - bam1_t *bamdata = NULL; //to hold the read data - - if (argc != 2) { - print_usage(stdout); - goto end; - } - inname = argv[1]; - - //initialize - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - //open input files - r reading - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf( "Failed to read header from file\n"); - goto end; - } - - //read data, check flags and update count - while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - if (bamdata->core.flag & BAM_FREAD1) { - cntread1++; - } - if (bamdata->core.flag & BAM_FREAD2) { - cntread2++; - } - } - if (c != -1) { - //error - printf("Failed to get data\n"); - goto end; - } - //else -1 / EOF - printf("File %s has %"PRIhts_pos" read1 and %"PRIhts_pos" read2 alignments\n", inname, cntread1, cntread2); - ret = EXIT_SUCCESS; -end: - //clean up - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - return ret; -} diff --git a/src/htslib-1.19.1/samples/flags_htsopt_field.c b/src/htslib-1.19.1/samples/flags_htsopt_field.c deleted file mode 100644 index 4b64445..0000000 --- a/src/htslib-1.19.1/samples/flags_htsopt_field.c +++ /dev/null @@ -1,115 +0,0 @@ -/* flags_htsopt_field.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - show flags_demo usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: flags_field \n\ -Shows the count of read1 and read2 alignments\n\ -This shows reading selected fields from CRAM file\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL; //input file name - int c = 0, ret = EXIT_FAILURE; - int64_t cntread1 = 0, cntread2 = 0; //count - samFile *infile = NULL; //sam file - sam_hdr_t *in_samhdr = NULL; //header of file - bam1_t *bamdata = NULL; //to hold the read data - - if (argc != 2) { - print_usage(stdout); - goto end; - } - inname = argv[1]; - - //initialize - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - //open input files - r reading - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - //select required field alone, this is useful for CRAM alone - if (hts_set_opt(infile, CRAM_OPT_REQUIRED_FIELDS, SAM_FLAG) < 0) { - printf("Failed to set htsoption\n"); - goto end; - } - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file\n"); - goto end; - } - - //read data, check flags and update count - while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - if (bamdata->core.flag & BAM_FREAD1) { - cntread1++; - } - if (bamdata->core.flag & BAM_FREAD2) { - cntread2++; - } - } - if (c != -1) { - //error - printf("Failed to get data\n"); - goto end; - } - //else -1 / EOF - printf("File %s has %"PRIhts_pos" read1 and %"PRIhts_pos" read2 alignments\n", inname, cntread1, cntread2); - ret = EXIT_SUCCESS; -end: - //clean up - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - return ret; -} diff --git a/src/htslib-1.19.1/samples/index_multireg_read.c b/src/htslib-1.19.1/samples/index_multireg_read.c deleted file mode 100644 index dbe8f15..0000000 --- a/src/htslib-1.19.1/samples/index_multireg_read.c +++ /dev/null @@ -1,150 +0,0 @@ -/* index_multireg_read.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the print_usage -/** @param fp pointer to the file / terminal to which print_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: read_multireg infile count regspec_csv\n\ - Reads alignment of a target matching to given region specifications\n\ - read_multireg infile.sam 2 R1:10-100,R2:200"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL; - char *ptr = NULL; - int c = 0, ret = EXIT_FAILURE; - samFile *infile = NULL, *outfile = NULL; - sam_hdr_t *in_samhdr = NULL; - bam1_t *bamdata = NULL; - hts_idx_t *idx = NULL; - hts_itr_t *iter = NULL; - unsigned int regcnt = 0; - char **regions = NULL; - - //read_multireg infile count regspec_csv - if (argc != 4) { - print_usage(stderr); - goto end; - } - inname = argv[1]; - regcnt = atoi(argv[2]); - regions = calloc(regcnt, sizeof(char*)); - //set each regspec as separate entry in region array - ptr = argv[3]; - for (c = 0; ptr && (c < regcnt); ++c) { - regions[c] = ptr; - ptr = strchr(ptr, ','); - if (ptr) { *ptr = '\0'; ++ptr; } - } - - if (regcnt == 0) { - printf("Region count can not be 0\n"); - goto end; - } - //initialize bam data storage - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - //open files, use stdout as output SAM file for ease of display - infile = sam_open(inname, "r"); - outfile = sam_open("-", "w"); - if (!outfile || !infile) { - printf("Could not open in/out files\n"); - goto end; - } - //load index file, assume it to be present in same location - if (!(idx = sam_index_load(infile, inname))) { - printf("Failed to load the index\n"); - goto end; - } - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - //create iterator - if (!(iter = sam_itr_regarray(idx, in_samhdr, regions, regcnt))) { - printf("Failed to get iterator\n"); - goto end; - } - if (regions) { - //can be freed as it is no longer required - free(regions); - regions = NULL; - } - - //get required area - while ((c = sam_itr_multi_next(infile, iter, bamdata) >= 0)) { - //write to output - if (sam_write1(outfile, in_samhdr, bamdata) < 0) { - printf("Failed to write output\n"); - goto end; - } - } - if (c != -1) { - printf("Error during read\n"); - goto end; - } - ret = EXIT_SUCCESS; - -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (outfile) { - sam_close(outfile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - if (iter) { - sam_itr_destroy(iter); - } - if (idx) - hts_idx_destroy(idx); - return ret; -} diff --git a/src/htslib-1.19.1/samples/index_reg_read.c b/src/htslib-1.19.1/samples/index_reg_read.c deleted file mode 100644 index 346d542..0000000 --- a/src/htslib-1.19.1/samples/index_reg_read.c +++ /dev/null @@ -1,143 +0,0 @@ -/* index_reg_read.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the print_usage -/** @param fp pointer to the file / terminal to which print_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: readreg infile idxfile region\n\ -Reads alignments matching to a specific region\n\ -\\. from start of file\n\ -\\* only unmapped reads\n\ -REFNAME all reads referring REFNAME\n\ -REFNAME:S all reads referring REFNAME and overlapping from S onwards\n\ -REFNAME:S-E all reads referring REFNAME overlapping from S to E\n\ -REFNAME:-E all reads referring REFNAME overlapping upto E\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *region = NULL; - char *idxfile = NULL; - int c = 0, ret = EXIT_FAILURE; - samFile *infile = NULL, *outfile = NULL; - sam_hdr_t *in_samhdr = NULL; - bam1_t *bamdata = NULL; - hts_idx_t *idx = NULL; - hts_itr_t *iter = NULL; - - //readreg infile indexfile region - if (argc != 4) { - print_usage(stderr); - goto end; - } - inname = argv[1]; - idxfile = argv[2]; - region = argv[3]; - - //initialize bam data storage - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - - //open files - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open input file\n"); - goto end; - } - //using stdout as output file for ease of dumping data - if (!(outfile = sam_open("-", "w"))) { - printf("Could not open out file\n"); - goto end; - } - //load index file - if (!(idx = sam_index_load2(infile, inname, idxfile))) { - printf("Failed to load the index\n"); - goto end; - } - //can use sam_index_load if the index file is present in same location and follows standard naming conventions (i.e. .) - - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - //create iterator - if (!(iter = sam_itr_querys(idx, in_samhdr, region))) { - printf("Failed to get iterator\n"); - goto end; - } - //read using iterator - while ((c = sam_itr_next(infile, iter, bamdata)) >= 0) { - //write to output - if (sam_write1(outfile, in_samhdr, bamdata) < 0) { - printf("Failed to write output\n"); - goto end; - } - } - if (c != -1) { - printf("Error during read\n"); - goto end; - } - ret = EXIT_SUCCESS; - -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (outfile) { - sam_close(outfile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - if (iter) { - sam_itr_destroy(iter); - } - if (idx) { - hts_idx_destroy(idx); - } - return ret; -} diff --git a/src/htslib-1.19.1/samples/index_write.c b/src/htslib-1.19.1/samples/index_write.c deleted file mode 100644 index 8fd2bc9..0000000 --- a/src/htslib-1.19.1/samples/index_write.c +++ /dev/null @@ -1,166 +0,0 @@ -/* index_write.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: idx_on_write infile shiftsize outdir\n\ -Creates compressed sam file and index file for it in given directory\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *outdir = NULL; - char *inname = NULL, *fileidx = NULL, *outname = NULL, outmode[4] = "w"; - int c = 0, ret = EXIT_FAILURE, size = 0; - samFile *infile = NULL, *outfile = NULL; - sam_hdr_t *in_samhdr = NULL; - bam1_t *bamdata = NULL; - - //idx_on_write infile sizeshift outputdirectory - if (argc != 4) { - print_usage(stderr); - goto end; - } - inname = argv[1]; - size = atoi(argv[2]); - outdir = argv[3]; - - //allocate space for output name - outdir/filename.ext.idxextNUL - c = strlen(basename(inname)) + strlen(outdir) + 10; - fileidx = malloc(sizeof(char) * c); - outname = malloc(sizeof(char) * c); - if (!fileidx || !outname) { - printf("Couldnt allocate memory\n"); - goto end; - } - //initialize bam storage - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - - //open files - if ((infile = sam_open(inname, "r"))) { - //get file type and create output names - if (infile->format.format == cram) { - //set as crai - snprintf(fileidx, c, "%s/%s.crai", outdir, basename(inname)); - snprintf(outname, c, "%s/%s", outdir, basename(inname)); - } - else { - //set as either bai or csi based on interval - if (infile->format.format == sam && infile->format.compression == no_compression) { - //create as gzip compressed - snprintf(outname, c, "%s/%s.gz", outdir, basename(inname)); - snprintf(fileidx, c, "%s/%s.gz.%s", outdir, basename(inname), !size ? "bai" : "csi"); - } - else { - //with same name as input - snprintf(outname, c, "%s/%s", outdir, basename(inname)); - snprintf(fileidx, c, "%s/%s.%s", outdir, basename(inname), !size ? "bai" : "csi"); - } - } - } - c = 0; - sam_open_mode(outmode + 1, outname, NULL); //set extra write options based on name - outfile = sam_open(outname, outmode); - if (!outfile || !infile) { - printf("Could not open files\n"); - goto end; - } - - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - //write header - if (sam_hdr_write(outfile, in_samhdr)) { - printf("Failed to write header\n"); - goto end; - } - - // initialize indexing, before start of write - if (sam_idx_init(outfile, in_samhdr, size, fileidx)) { - printf("idx initialization failed\n"); - goto end; - } - //read and write alignments - while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - if (sam_write1(outfile, in_samhdr, bamdata) < 0) { - printf("Failed to write data\n"); - goto end; - } - } - if (c != -1) { - printf("Error in reading data\n"); - goto end; - } - //else EOF, save index - if (sam_idx_save(outfile)) { - printf("Could not save index\n"); - goto end; - } - ret = EXIT_SUCCESS; -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - if (fileidx) { - free(fileidx); - } - if (outname) { - free(outname); - } - if (outfile) { - sam_close(outfile); - } - return ret; -} diff --git a/src/htslib-1.19.1/samples/mod_aux.c b/src/htslib-1.19.1/samples/mod_aux.c deleted file mode 100644 index d5ed18c..0000000 --- a/src/htslib-1.19.1/samples/mod_aux.c +++ /dev/null @@ -1,221 +0,0 @@ -/* mod_aux.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: mod_aux infile QNAME tag type val\n\ -Add/update the given aux tag to all alignments\n\ -type A-char C-int F-float Z-string\n"); -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *tag = NULL, *qname = NULL, *val = NULL; - char type = '\0'; - int ret = EXIT_FAILURE, ret_r = 0, length = 0; - sam_hdr_t *in_samhdr = NULL; - samFile *infile = NULL, *outfile = NULL; - bam1_t *bamdata = NULL; - uint8_t *data = NULL; - - //mod_aux infile QNAME tag type val - if (argc != 6) { - print_usage(stderr); - goto end; - } - inname = argv[1]; - qname = argv[2]; - tag = argv[3]; - type = argv[4][0]; - val = argv[5]; - - if (!(bamdata = bam_init1())) { - printf("Failed to allocate data memory!\n"); - goto end; - } - - //open input file - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - //open output file - if (!(outfile = sam_open("-", "w"))) { - printf("Could not open std output\n"); - goto end; - } - - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - - if (sam_hdr_write(outfile, in_samhdr) == -1) { - printf("Failed to write header\n"); - goto end; - } - - while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - if (strcasecmp(bam_get_qname(bamdata), qname)) { - if (sam_write1(outfile, in_samhdr, bamdata) < 0) { - printf("Failed to write output\n"); - goto end; - } - continue; //not matching - } - - errno = 0; - //matched to qname, update aux - if (!(data = bam_aux_get(bamdata, tag))) { - int i = 0; float f = 0; - //tag not present append - switch (type) { - case 'f': - case 'd': - length = sizeof(float); - f = atof(val); - val = (const char*) &f; - type = 'f'; - break; - case 'C': - case 'S': - case 'I': - length = sizeof(int); - i = atoi(val); - val = (const char*) &i; - break; - case 'Z': - length = strlen(val) + 1; //1 for NUL termination - break; - case 'A': - length = 1; - break; - default: - printf("Invalid type mentioned\n"); - goto end; - break; - } - if (bam_aux_append(bamdata, tag, type, length, (const uint8_t*)val)) { - printf("Failed to append aux data, errno: %d\n", errno); - goto end; - } - } - else { - char auxtype = bam_aux_type(data); - //update the tag with newer value - switch (type) { - case 'f': - case 'd': - if (auxtype != 'f' && auxtype != 'd') { - printf("Invalid aux type passed\n"); - goto end; - } - if (bam_aux_update_float(bamdata, tag, atof(val))) { - printf("Failed to update float data, errno: %d\n", errno); - goto end; - } - break; - case 'C': - case 'S': - case 'I': - if (auxtype != 'c' && auxtype != 'C' && auxtype != 's' && auxtype != 'S' && auxtype != 'i' && auxtype != 'I') { - printf("Invalid aux type passed\n"); - goto end; - } - if (bam_aux_update_int(bamdata, tag, atoll(val))) { - printf("Failed to update int data, errno: %d\n", errno); - goto end; - } - break; - case 'Z': - if (auxtype != 'Z') { - printf("Invalid aux type passed\n"); - goto end; - } - length = strlen(val) + 1; //1 for NUL termination - if (bam_aux_update_str(bamdata, tag, length, val)) { - //with length as -1, length will be detected based on null terminated val data - printf("Failed to update string data, errno: %d\n", errno); - goto end; - } - break; - case 'A': - if (auxtype != 'A') { - printf("Invalid aux type passed\n"); - goto end; - } - //update the char data directly on buffer - *(data+1) = val[0]; - break; - default: - printf("Invalid data type\n"); - goto end; - break; - } - } - if (sam_write1(outfile, in_samhdr, bamdata) < 0) { - printf("Failed to write output\n"); - goto end; - } - } - if (ret_r < -1) { - //read error - printf("Failed to read data\n"); - goto end; - } - - ret = EXIT_SUCCESS; -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (outfile) { - sam_close(outfile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - return ret; -} diff --git a/src/htslib-1.19.1/samples/mod_aux_ba.c b/src/htslib-1.19.1/samples/mod_aux_ba.c deleted file mode 100644 index 8ef90ee..0000000 --- a/src/htslib-1.19.1/samples/mod_aux_ba.c +++ /dev/null @@ -1,147 +0,0 @@ -/* mod_aux_ba.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: mod_aux_ba infile\n\ -Updates the count of bases as an aux array on all alignments\n\ -BA:B:I,count of ACTGN\n"); -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL; - int i = 0, ret = EXIT_FAILURE, ret_r = 0; - uint32_t cnt[5] = {0}; //A C G T N - sam_hdr_t *in_samhdr = NULL; - samFile *infile = NULL, *outfile = NULL; - bam1_t *bamdata = NULL; - - //mod_aux infile - if (argc != 2) { - print_usage(stderr); - goto end; - } - inname = argv[1]; - - if (!(bamdata = bam_init1())) { - printf("Failed to allocate data memory!\n"); - goto end; - } - - //open input file - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - - //open output file - if (!(outfile = sam_open("-", "w"))) { - printf("Could not open std output\n"); - goto end; - } - - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - - if (sam_hdr_write(outfile, in_samhdr) == -1) { - printf("Failed to write header\n"); - goto end; - } - - while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - errno = 0; - memset(cnt, 0, sizeof(cnt)); - for (i = 0; i < bamdata->core.l_qseq; ++i) { - switch (seq_nt16_str[bam_seqi(bam_get_seq(bamdata),i)]) { - case 'A': - ++cnt[0]; - break; - case 'C': - ++cnt[1]; - break; - case 'G': - ++cnt[2]; - break; - case 'T': - ++cnt[3]; - break; - default: //N - ++cnt[4]; - break; - } - } - - if (bam_aux_update_array(bamdata, "BA", 'I', sizeof(cnt)/sizeof(cnt[0]), cnt)) { - printf("Failed to update base array, errno %d", errno); - goto end; - } - - if (sam_write1(outfile, in_samhdr, bamdata) < 0) { - printf("Failed to write output\n"); - goto end; - } - } - if (ret_r < -1) { - //read error - printf("Failed to read data\n"); - goto end; - } - - ret = EXIT_SUCCESS; -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (outfile) { - sam_close(outfile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - return ret; -} diff --git a/src/htslib-1.19.1/samples/mod_bam.c b/src/htslib-1.19.1/samples/mod_bam.c deleted file mode 100644 index 9f1eb32..0000000 --- a/src/htslib-1.19.1/samples/mod_bam.c +++ /dev/null @@ -1,229 +0,0 @@ -/* mod_bam.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: mod_bam infile QNAME fieldpos newval\n\ -Modifies the alignment data field\n\ -fieldpos - 1 QNAME 2 FLAG 3 RNAME 4 POS 5 MAPQ 6 CIGAR 7 RNEXT 8 PNEXT 9 TLEN 10 SEQ 11 QUAL\n"); -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *qname = NULL; - char *val = NULL; - int c = 0, ret = EXIT_FAILURE, field = 0; - sam_hdr_t *in_samhdr = NULL; - samFile *infile = NULL, *outfile = NULL; - int ret_r = 0, i = 0; - bam1_t *bamdata = NULL; - - //mod_bam infile QNAME fieldpos newval - if (argc != 5) { - print_usage(stderr); - goto end; - } - inname = argv[1]; - qname = argv[2]; - //1 QNAME 2 FLAG 3 RNAME 4 POS 5 MAPQ 6 CIGAR 7 RNEXT 8 PNEXT 9 TLEN 10 SEQ 11 QUAL - field = atoi(argv[3]); - val = argv[4]; - - if (!(bamdata = bam_init1())) { - printf("Failed to allocate data memory!\n"); - goto end; - } - - //open input file - if (!(infile = sam_open(inname, "r")) || !(outfile = sam_open("-", "w"))) { - printf("Could not open input/output\n"); - goto end; - } - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - - if (sam_hdr_write(outfile, in_samhdr) == -1) { - printf("Failed to write header\n"); - goto end; - } - - while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) - { - //QNAME FLAG RNAME POS MAPQ CIGAR RNEXT PNEXT TLEN SEQ QUAL [TAG:TYPE:VALUE]… - ret = 0; - if (!strcasecmp(qname, bam_get_qname(bamdata))) { - //the required one - switch(field) { - case 1:// QNAME - ret = bam_set_qname(bamdata, val); - break; - case 2:// FLAG - bamdata->core.flag = atol(val) & 0xFFFF; - break; - case 3:// RNAME - case 7:// RNEXT - if ((ret = sam_hdr_name2tid(in_samhdr, val)) < 0) { - printf("Invalid reference name\n"); - ret = -1; - break; - } - if (field == 3) { - //reference - bamdata->core.tid = ret; - } - else { - //mate reference - bamdata->core.mtid = ret; - } - break; - case 4:// POS - bamdata->core.pos = atoll(val); - break; - case 5:// MAPQ - bamdata->core.qual = atoi(val) & 0x0FF; - break; - case 6:// CIGAR - { - uint32_t *cigar = NULL; - size_t size = 0; - ssize_t ncigar = 0; - bam1_t *newbam = bam_init1(); - if (!newbam) { - printf("Failed to create new bam data\n"); - ret = -1; - break; - } - //get cigar array and set all data in new bam record - if ((ncigar = sam_parse_cigar(val, NULL, &cigar, &size)) < 0) { - printf("Failed to parse cigar\n"); - ret = -1; - break; - } - if (bam_set1(newbam, bamdata->core.l_qname, bam_get_qname(bamdata), bamdata->core.flag, bamdata->core.tid, bamdata->core.pos, bamdata->core.qual, - ncigar, cigar, bamdata->core.mtid, bamdata->core.mpos, bamdata->core.isize, bamdata->core.l_qseq, (const char*)bam_get_seq(bamdata), (const char*)bam_get_qual(bamdata), bam_get_l_aux(bamdata)) < 0) { - printf("Failed to set bamdata\n"); - ret = -1; - break; - } - //correct sequence data as input is expected in ascii format and not as compressed inside bam! - memcpy(bam_get_seq(newbam), bam_get_seq(bamdata), (bamdata->core.l_qseq + 1) / 2); - //copy the aux data - memcpy(bam_get_aux(newbam), bam_get_aux(bamdata), bam_get_l_aux(bamdata)); - - bam_destroy1(bamdata); - bamdata = newbam; - } - break; - case 8:// PNEXT - bamdata->core.mpos = atoll(val); - break; - case 9:// TLEN - bamdata->core.isize = atoll(val); - break; - case 10:// SEQ - i = strlen(val); - if (bamdata->core.l_qseq != i) { - printf("SEQ length different\n"); - ret = -1; - //as it is different, have to update quality data and cigar data as well and more info is required for it, which is not handled in this sample - //accessing raw memory and moving is one option; creating and using new bam1_t object is another option. - break; - } - for( c = 0; c < i; ++c) { - bam_set_seqi(bam_get_seq(bamdata), c, seq_nt16_table[(unsigned char)val[c]]); - } - break; - case 11:// QUAL - i = strlen(val); - if (i != bamdata->core.l_qseq) { - printf("Qual length different than sequence\n"); - ret = -1; - break; - } - for (c = 0; c < i; ++c) { - val[c] -= 33; //phred score from ascii value - } - memcpy(bam_get_qual(bamdata), val, i); - break; - default: - printf("Invalid input\n"); - goto end; - break; - } - if (ret < 0) { - printf("Failed to set new data\n"); - ret = EXIT_FAILURE; - goto end; - } - } - if (sam_write1(outfile, in_samhdr, bamdata) < 0) { - printf("Failed to write bam data\n"); - ret = EXIT_FAILURE; - goto end; - } - } - - if (ret_r == -1 || ret != EXIT_FAILURE) { - // no error! - ret = EXIT_SUCCESS; - } - else { - printf("Failed to read data\n"); - } -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (outfile) { - sam_close(outfile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - return ret; -} diff --git a/src/htslib-1.19.1/samples/modstate.c b/src/htslib-1.19.1/samples/modstate.c deleted file mode 100644 index 9763916..0000000 --- a/src/htslib-1.19.1/samples/modstate.c +++ /dev/null @@ -1,190 +0,0 @@ -/* modstate.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: modstate infile option\n\ -Shows the base modifications on the alignment\n\ -Option can be 1 or 2 to select the api to use\n"); -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL; - int ret = EXIT_FAILURE; - sam_hdr_t *in_samhdr = NULL; - samFile *infile = NULL; - - int ret_r = 0, i = 0 , r = 0, j = 0, pos = 0, opt = 0, k = 0, cnt = 0, *bm = NULL; - bam1_t *bamdata = NULL; - uint8_t *data = NULL; - hts_base_mod_state *ms = NULL; - - - //modstate infile 1/2 - if (argc != 3) { - print_usage(stderr); - goto end; - } - inname = argv[1]; - opt = atoi(argv[2]) - 1; //option 1 or 2? - - if (!(bamdata = bam_init1())) { - printf("Failed to allocate data memory!\n"); - goto end; - } - - if (!(ms = hts_base_mod_state_alloc())) { - printf("Failed to allocate state memory\n"); - goto end; - } - - //open input file - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - - while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) - { - i = 0; - data = bam_get_seq(bamdata); - if (bam_parse_basemod(bamdata, ms)) { - printf("Failed to parse the base mods\n"); - goto end; - } - //dump the modifications - printf("Modifications:"); - bm = bam_mods_recorded(ms, &cnt); - for (k = 0; k < cnt; ++k) { - printf("%c", bm[k]); - } - printf("\n"); - hts_base_mod mod[5] = {0}; //for ATCGN - if (opt) { - //option 1 - for (; i < bamdata->core.l_qseq; ++i) { - if ((r = bam_mods_at_next_pos(bamdata, ms, mod, sizeof(mod)/sizeof(mod[0]))) <= -1) { - printf("Failed to get modifications\n"); - goto end; - } - else if (r > (sizeof(mod) / sizeof(mod[0]))) { - printf("More modifications than this app can handle, update the app\n"); - goto end; - } - else if (!r) { - //no modification at this pos - printf("%c", seq_nt16_str[bam_seqi(data, i)]); - } - //modifications - for (j = 0; j < r; ++j) { - printf("%c%c%c", mod[j].canonical_base, mod[j].strand ? '-' : '+', mod[j].modified_base); - } - } - } - else { - //option 2 - while ((r = bam_next_basemod(bamdata, ms, mod, sizeof(mod)/sizeof(mod[0]), &pos)) >= 0) { - for (; i < bamdata->core.l_qseq && i < pos; ++i) { - printf("%c", seq_nt16_str[bam_seqi(data, i)]); - } - //modifications - for (j = 0; j < r; ++j) { - printf("%c%c%c", mod[j].canonical_base, mod[j].strand ? '-' : '+', mod[j].modified_base); - } - if (i == pos) - i++; //skip the modification already displayed - if (!r) { - for (; i < bamdata->core.l_qseq; ++i) { - printf("%c", seq_nt16_str[bam_seqi(data, i)]); - } - break; - } - } - if (r <= -1) { - printf("Failed to get modifications\n"); - goto end; - } - } - printf("\n"); - } - - if (ret_r == -1) { - //check last alignment's base modification - int strand = 0, impl = 0; - char canonical = 0, modification[] = "mhfcgebaon"; //possible modifications - printf("\n\nLast alignment has \n"); - for (k = 0; k < sizeof(modification) - 1; ++k) { //avoiding NUL termination - if (bam_mods_query_type(ms, modification[k], &strand, &impl, &canonical)) { - printf ("No modification of %c type\n", modification[k]); - } - else { - printf("%s strand has %c modified with %c, can %sassume unlisted as unmodified\n", strand?"-/bottom/reverse":"+/top/forward", canonical, modification[k], impl?"" : "not " ); - } - } - // no error! - ret = EXIT_SUCCESS; - } - else { - printf("Failed to read data\n"); - } -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - - if (ms) { - hts_base_mod_state_free(ms); - } - return ret; -} diff --git a/src/htslib-1.19.1/samples/mpileup.c b/src/htslib-1.19.1/samples/mpileup.c deleted file mode 100644 index fe93374..0000000 --- a/src/htslib-1.19.1/samples/mpileup.c +++ /dev/null @@ -1,204 +0,0 @@ -/* mpileup.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include -#include - -/// print_usage - show flags_demo usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: mpileup infile ...\n\ -Shows the mpileup api usage.\n"); - return; -} - -typedef struct plpconf { - char *inname; - samFile *infile; - sam_hdr_t *in_samhdr; -} plpconf; - -/// @brief plpconstructor -/// @param data client data? -/// @param b bam being loaded -/// @param cd client data -/// @return -int plpconstructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { - return 0; -} - -int plpdestructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { - return 0; -} - -/// @brief bam_plp_auto_f reads alignment data for pileup operation -/// @param data client callback data holding alignment file handle -/// @param b bamdata read -/// @return same as sam_read1 -int readdata(void *data, bam1_t *b) -{ - plpconf *conf = (plpconf*)data; - if (!conf || !conf->infile) { - return -2; //cant read data - } - - //read alignment and send - return sam_read1(conf->infile, conf->infile->bam_header, b); -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - int ret = EXIT_FAILURE; - bam1_t *bamdata = NULL; - plpconf** conf = NULL; - bam_mplp_t mplpiter = NULL; - int tid = -1, input = 0, k = 0, dpt = 0, *depth = NULL; - hts_pos_t refpos = -1; - const bam_pileup1_t **plp = NULL; - - //infile ... - if (argc < 2) { - print_usage(stderr); - goto end; - } - if ((conf = calloc(argc - 1, sizeof(plpconf*)))) { - for (input = 0; input < argc - 1; ++input) { - conf[input] = calloc(1, sizeof(plpconf)); - } - } - depth = calloc(argc - 1, sizeof(int)); - plp = calloc(argc - 1, sizeof(bam_pileup1_t*)); - if (!conf || !depth || !plp) { - printf("Failed to allocate memory\n"); - goto end; - } - for (input = 0; input < argc - 1; ++input) { - conf[input]->inname = argv[input+1]; - } - - //initialize - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - //open input files - for(input = 0; input < argc - 1; ++input) { - if (!(conf[input]->infile = sam_open(conf[input]->inname, "r"))) { - printf("Could not open %s\n", conf[input]->inname); - goto end; - } - //read header - if (!(conf[input]->in_samhdr = sam_hdr_read(conf[input]->infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - } - - if (!(mplpiter = bam_mplp_init(argc - 1, readdata, (void**) conf))) { - printf("Failed to initialize mpileup data\n"); - goto end; - } - - //set constructor destructor callbacks - bam_mplp_constructor(mplpiter, plpconstructor); - bam_mplp_destructor(mplpiter, plpdestructor); - - while (bam_mplp64_auto(mplpiter, &tid, &refpos, depth, plp) > 0) { - printf("%d\t%"PRIhts_pos"\t", tid+1, refpos+1); - - for (input = 0; input < argc - 1; ++input) { - for (dpt = 0; dpt < depth[input]; ++dpt) { - if (plp[input][dpt].is_del || plp[input][dpt].is_refskip) { - printf("*"); - continue; - } - //start and end are displayed in UPPER and rest on LOWER - printf("%c", plp[input][dpt].is_head ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[input][dpt].b), plp[input][dpt].qpos)]) : - (plp[input]->is_tail ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[input][dpt].b), plp[input][dpt].qpos)]) : tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[input][dpt].b), plp[input][dpt].qpos)]))); - if (plp[input][dpt].indel > 0) { - //insertions, anyway not start or end - printf("+%d", plp[input][dpt].indel); - for (k = 0; k < plp[input][dpt].indel; ++k) { - printf("%c", tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[input][dpt].b), plp[input][dpt].qpos + k + 1)])); - } - } - else if (plp[input][dpt].indel < 0) { - printf("%d", plp[input][dpt].indel); - for (k = 0; k < -plp[input][dpt].indel; ++k) { - printf("?"); - } - } - } - printf(" "); - } - printf("\n"); - fflush(stdout); - } - - ret = EXIT_SUCCESS; -end: - //clean up - if (conf) { - for (input = 0; input < argc - 1; ++input) { - if (conf[input] && conf[input]->in_samhdr) { - sam_hdr_destroy(conf[input]->in_samhdr); - } - if (conf[input] && conf[input]->infile) { - sam_close(conf[input]->infile); - } - if (conf[input]) { - free(conf[input]); - } - } - free(conf); - } - - if (bamdata) { - bam_destroy1(bamdata); - } - if (mplpiter) { - bam_mplp_destroy(mplpiter); - } - if (depth) { - free(depth); - } - if (plp) { - free(plp); - } - return ret; -} diff --git a/src/htslib-1.19.1/samples/pileup.c b/src/htslib-1.19.1/samples/pileup.c deleted file mode 100644 index 11e2fb0..0000000 --- a/src/htslib-1.19.1/samples/pileup.c +++ /dev/null @@ -1,183 +0,0 @@ -/* pileup.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include -#include - -/// print_usage - show flags_demo usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: pileup infile\n\ -Shows the pileup api usage.\n"); - return; -} - -typedef struct plpconf { - char *inname; - samFile *infile; - sam_hdr_t *in_samhdr; -} plpconf; - -/// @brief plpconstructor -/// @param data client data? -/// @param b bam being loaded -/// @param cd client data -/// @return -int plpconstructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { - /*plpconf *conf= (plpconf*)data; - can access the data passed to pileup init from data - can do any alignment specific allocation / data storage here in param cd - it can hold either a float, 64 bit int or a pointer - when using cd, initialize and use as it will be reused after destructor*/ - return 0; -} - -int plpdestructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { - /*plpconf *conf= (plpconf*)data; - can access the data passed to pileup init from data - deallocate any alignment specific allocation made in constructor and stored in param cd*/ - return 0; -} - -/// @brief bam_plp_auto_f reads alignment data for pileup operation -/// @param data client callback data holding alignment file handle -/// @param b bamdata read -/// @return same as sam_read1 -int readdata(void *data, bam1_t *b) -{ - plpconf *conf = (plpconf*)data; - if (!conf || !conf->infile) { - return -2; //cant read data - } - - //read alignment and send - return sam_read1(conf->infile, conf->infile->bam_header, b); -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - int ret = EXIT_FAILURE; - bam1_t *bamdata = NULL; - plpconf conf = {0}; - bam_plp_t plpiter = NULL; - int tid = -1, n = -1, j = 0, k = 0; - int refpos = -1; - const bam_pileup1_t *plp = NULL; - - //infile - if (argc != 2) { - print_usage(stderr); - goto end; - } - conf.inname = argv[1]; - - //initialize - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - //open input files - if (!(conf.infile = sam_open(conf.inname, "r"))) { - printf("Could not open %s\n", conf.inname); - goto end; - } - //read header - if (!(conf.in_samhdr = sam_hdr_read(conf.infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - - if (!(plpiter = bam_plp_init(readdata, &conf))) { - printf("Failed to initialize pileup data\n"); - goto end; - } - - //set constructor destructor callbacks - bam_plp_constructor(plpiter, plpconstructor); - bam_plp_destructor(plpiter, plpdestructor); - - while ((plp = bam_plp_auto(plpiter, &tid, &refpos, &n))) { - printf("%d\t%d\t", tid+1, refpos+1); - - for (j = 0; j < n; ++j) { - //doesnt detect succeeding insertion and deletion together here, only insertion is identified - //deletion is detected in plp->is_del as and when pos reaches the position - //if detection ahead is required, use bam_plp_insertion here which gives deletion length along with insertion - if (plp[j].is_del || plp[j].is_refskip) { - printf("*"); - continue; - } - //start and end are displayed in UPPER and rest on LOWER - printf("%c", plp[j].is_head ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]) : - (plp[j].is_tail ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]) : tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]))); - if (plp[j].indel > 0) { - //insertions, anyway not start or end - printf("+%d", plp[j].indel); - for (k = 0; k < plp[j].indel; ++k) { - printf("%c", tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos + k + 1)])); - } - } - else if (plp[j].indel < 0) { - printf("%d", plp[j].indel); - for (k = 0; k < -plp[j].indel; ++k) { - printf("?"); - } - } - printf(" "); - } - printf("\n"); - fflush(stdout); - } - - ret = EXIT_SUCCESS; -end: - //clean up - if (conf.in_samhdr) { - sam_hdr_destroy(conf.in_samhdr); - } - if (conf.infile) { - sam_close(conf.infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - if (plpiter) { - bam_plp_destroy(plpiter); - } - return ret; -} diff --git a/src/htslib-1.19.1/samples/pileup_mod.c b/src/htslib-1.19.1/samples/pileup_mod.c deleted file mode 100644 index 24d6cf5..0000000 --- a/src/htslib-1.19.1/samples/pileup_mod.c +++ /dev/null @@ -1,218 +0,0 @@ -/* pileup_mod.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include -#include - -/// print_usage - show flags_demo usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: pileup_mod infile\n\ -Shows the pileup api usage with base modification.\n"); - return; -} - -typedef struct plpconf { - char *inname; - samFile *infile; - sam_hdr_t *in_samhdr; -} plpconf; - -/// @brief plpconstructor -/// @param data client data? -/// @param b bam being loaded -/// @param cd client data -/// @return -int plpconstructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { - //plpconf *conf= (plpconf*)data; can use this to access anything required from the data in pileup init - - //when using cd, initialize and use as it will be reused after destructor - cd->p = hts_base_mod_state_alloc(); - if (!cd->p) { - printf("Failed to allocate base modification state\n"); - return 1; - } - - //parse the bam data and gather modification data from MM tags - return (-1 == bam_parse_basemod(b, (hts_base_mod_state*)cd->p)) ? 1 : 0; -} - -int plpdestructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { - if (cd->p) { - hts_base_mod_state_free((hts_base_mod_state *)cd->p); - cd->p = NULL; - } - return 0; -} - -/// @brief bam_plp_auto_f reads alignment data for pileup operation -/// @param data client callback data holding alignment file handle -/// @param b bamdata read -/// @return same as sam_read1 -int readdata(void *data, bam1_t *b) -{ - plpconf *conf = (plpconf*)data; - if (!conf || !conf->infile) { - return -2; //cant read data - } - - //read alignment and send - return sam_read1(conf->infile, conf->infile->bam_header, b); -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - int ret = EXIT_FAILURE; - bam1_t *bamdata = NULL; - plpconf conf = {0}; - bam_plp_t plpiter = NULL; - int tid = -1, depth = -1, j = 0, k = 0, inslen = 0, dellen = 0, modlen = 0; - #define NMODS 5 - hts_base_mod mods[NMODS] = {0}; //ACGT N - int refpos = -1; - const bam_pileup1_t *plp = NULL; - kstring_t insdata = KS_INITIALIZE; - - //infile - if (argc != 2) { - print_usage(stderr); - goto end; - } - conf.inname = argv[1]; - - //initialize - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - //open input files - if (!(conf.infile = sam_open(conf.inname, "r"))) { - printf("Could not open %s\n", conf.inname); - goto end; - } - //read header - if (!(conf.in_samhdr = sam_hdr_read(conf.infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - - if (!(plpiter = bam_plp_init(readdata, &conf))) { - printf("Failed to initialize pileup data\n"); - goto end; - } - - //set constructor destructor callbacks - bam_plp_constructor(plpiter, plpconstructor); - bam_plp_destructor(plpiter, plpdestructor); - - while ((plp = bam_plp_auto(plpiter, &tid, &refpos, &depth))) { - memset(&mods, 0, sizeof(mods)); - printf("%d\t%d\t", tid+1, refpos+1); - - for (j = 0; j < depth; ++j) { - dellen = 0; - - if (plp[j].is_del || plp[j].is_refskip) { - printf("*"); - continue; - } - /*invoke bam_mods_at_qpos before bam_plp_insertion_mod that the base modification - is retrieved before change in pileup pos thr' plp_insertion_mod call*/ - if ((modlen = bam_mods_at_qpos(plp[j].b, plp[j].qpos, plp[j].cd.p, mods, NMODS)) == -1) { - printf("Failed to get modifications\n"); - goto end; - } - - //use plp_insertion/_mod to get insertion and del at the same position - if ((inslen = bam_plp_insertion_mod(&plp[j], (hts_base_mod_state*)plp[j].cd.p, &insdata, &dellen)) == -1) { - printf("Failed to get insertion status\n"); - goto end; - } - - //start and end are displayed in UPPER and rest on LOWER, only 1st modification considered - //base and modification - printf("%c%c%c", plp[j].is_head ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]) : - (plp[j].is_tail ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]) : - tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)])), - modlen > 0 ? mods[0].strand ? '-' : '+' : '\0', - modlen > 0 ? mods[0].modified_base : '\0'); - //insertion and deletions - if (plp[j].indel > 0) { - //insertion - /*insertion data from plp_insertion_mod, note this shows the quality value as well - which is different from base and modification above;the lower case display is not attempted either*/ - printf("+%d%s", plp[j].indel, insdata.s); - //handle deletion if any - if (dellen) { - printf("-%d", dellen); - for (k = 0; k < dellen; ++k) { - printf("?"); - } - } - } - else if (plp[j].indel < 0) { - //deletion - printf("%d", plp[j].indel); - for (k = 0; k < -plp[j].indel; ++k) { - printf("?"); - } - } - printf(" "); - } - printf("\n"); - fflush(stdout); - } - - ret = EXIT_SUCCESS; -end: - //clean up - if (conf.in_samhdr) { - sam_hdr_destroy(conf.in_samhdr); - } - if (conf.infile) { - sam_close(conf.infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - if (plpiter) { - bam_plp_destroy(plpiter); - } - ks_free(&insdata); - return ret; -} diff --git a/src/htslib-1.19.1/samples/read_aux.c b/src/htslib-1.19.1/samples/read_aux.c deleted file mode 100644 index cbf972b..0000000 --- a/src/htslib-1.19.1/samples/read_aux.c +++ /dev/null @@ -1,207 +0,0 @@ -/* read_aux.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: read_aux infile tag\n\ -Read the given aux tag from alignments either as SAM string or as raw data\n"); -} - -/// printauxdata - prints aux data -/** @param fp - file to which it to be printed - stdout or null - * @param type - aux type - * @param idx - index in array, -1 when not an array type - * @param data - data - * recurses when the data is array type -returns 1 on failure 0 on success -*/ -int printauxdata(FILE *fp, char type, int32_t idx, const uint8_t *data) -{ - uint32_t auxBcnt = 0; - int i = 0; - char auxBType = 'Z'; - - //the tag is already queried and ensured to exist and the type is retrieved from the tag data, also iterated within index for arrays, so no error is expected here. - //when these apis are used explicitly, these error conditions needs to be handled based on return value and errno - switch(type) { - case 'A': - fprintf(fp, "%c", bam_aux2A(data)); //byte data - break; - case 'c': - fprintf(fp, "%d", (int8_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //signed 1 byte data; bam_auxB2i - from array or bam_aux2i - non array data - break; - case 'C': - fprintf(fp, "%u", (uint8_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //unsigned 1 byte data - break; - case 's': - fprintf(fp, "%d", (int16_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //signed 2 byte data - break; - case 'S': - fprintf(fp, "%u", (uint16_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //unsigned 2 byte data - break; - case 'i': - fprintf(fp, "%d", (int32_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //signed 4 byte data - break; - case 'I': - fprintf(fp, "%u", (uint32_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //unsigned 4 byte data - break; - case 'f': - case 'd': - fprintf(fp, "%g", (float)(idx > -1 ? bam_auxB2f(data, idx) : bam_aux2f(data))); //floating point data, 4 bytes - break; - case 'H': - case 'Z': - fprintf(fp, "%s", bam_aux2Z(data)); //array of char or hex data - break; - case 'B': //array of char/int/float - auxBcnt = bam_auxB_len(data); //length of array - auxBType = bam_aux_type(data + 1); //type of element in array - fprintf(fp, "%c", auxBType); - for (i = 0; i < auxBcnt; ++i) { //iterate the array - fprintf(fp, ","); - //calling recurssively with index to reuse a few lines - if (printauxdata(fp, auxBType, i, data) == EXIT_FAILURE) { - return EXIT_FAILURE; - } - } - break; - default: - printf("Invalid aux tag?\n"); - return EXIT_FAILURE; - break; - } - return EXIT_SUCCESS; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *tag = NULL; - int c = 0, ret = EXIT_FAILURE, ret_r = 0, i = 0; - sam_hdr_t *in_samhdr = NULL; - samFile *infile = NULL; - bam1_t *bamdata = NULL; - uint8_t *data = NULL; - kstring_t sdata = KS_INITIALIZE; - - //read_aux infile tag - if (argc != 3) { - print_usage(stderr); - goto end; - } - inname = argv[1]; - tag = argv[2]; - - if (!(bamdata = bam_init1())) { - printf("Failed to allocate data memory!\n"); - goto end; - } - - //open input file - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - - while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - errno = 0; i++; - ks_clear(&sdata); - if (i % 2) { //use options alternatively to demonstrate both - //option 1 - get data as string with tag and type - if ((c = bam_aux_get_str(bamdata, tag, &sdata)) == 1) { - printf("%s\n",sdata.s); - } - else if (c == 0 && errno == ENOENT) { - //tag not present - printf("Tag not present\n"); - } - else { - //error - printf("Failed to get tag\n"); - goto end; - } - } - else { - //option 2 - get raw data - if (!(data = bam_aux_get(bamdata, tag))) { - //tag data not returned, errono gives the reason - if (errno == ENOENT) { - printf("Tag not present\n"); - } - else { - printf("Invalid aux data\n"); - } - } - else { - //got the tag, read and print - if (printauxdata(stdout, bam_aux_type(data), -1, data) == EXIT_FAILURE) { - printf("Failed to read aux data\n"); - goto end; - } - printf("\n"); - } - } - } - if (ret_r < -1) { - //read error - printf("Failed to read data\n"); - goto end; - } - - ret = EXIT_SUCCESS; -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - ks_free(&sdata); - return ret; -} diff --git a/src/htslib-1.19.1/samples/read_bam.c b/src/htslib-1.19.1/samples/read_bam.c deleted file mode 100644 index 7fca8c5..0000000 --- a/src/htslib-1.19.1/samples/read_bam.c +++ /dev/null @@ -1,139 +0,0 @@ -/* read_bam.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: read_bam infile\n\ -Shows the alignment data from file\n"); -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *tidname = NULL, *flags = NULL; - int ret = EXIT_FAILURE; - sam_hdr_t *in_samhdr = NULL; - samFile *infile = NULL; - - int ret_r = 0, i = 0; - bam1_t *bamdata = NULL; - uint8_t *data = NULL; - uint32_t *cigar = NULL; - - - //read_bam infile - if (argc != 2) { - print_usage(stderr); - goto end; - } - inname = argv[1]; - - if (!(bamdata = bam_init1())) { - printf("Failed to allocate data memory!\n"); - goto end; - } - - //open input file - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - - while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) - { - //QNAME FLAG RNAME POS MAPQ CIGAR RNEXT PNEXT TLEN SEQ QUAL [TAG:TYPE:VALUE]… - printf("NAME: %s\n", bam_get_qname(bamdata)); //get the query name using the macro - flags = bam_flag2str(bamdata->core.flag); //flags as string - printf("FLG: %d - %s\n", bamdata->core.flag, flags); //flag is available in core structure - free((void*)flags); - tidname = sam_hdr_tid2name(in_samhdr, bamdata->core.tid); - printf("RNAME/TID: %d - %s\n", bamdata->core.tid, tidname? tidname: "" ); //retrieves the target name using the value in bam and by referring the header - printf("POS: %"PRIhts_pos"\n", bamdata->core.pos + 1); //internally position is 0 based and on text output / SAM it is 1 based - printf("MQUAL: %d\n", bamdata->core.qual); //map quality value - - cigar = bam_get_cigar(bamdata); //retrieves the cigar data - printf("CGR: "); - for (i = 0; i < bamdata->core.n_cigar; ++i) { //no. of cigar data entries - printf("%d%c", bam_cigar_oplen(cigar[i]), bam_cigar_opchr(cigar[i])); //the macros gives the count of operation and the symbol of operation for given cigar entry - } - printf("\nTLEN/ISIZE: %"PRIhts_pos"\n", bamdata->core.isize); - - data = bam_get_seq(bamdata); //get the sequence data - if (bamdata->core.l_qseq != bam_cigar2qlen(bamdata->core.n_cigar, cigar)) { //checks the length with CIGAR and query - printf("\nLength doesnt matches to cigar data\n"); - goto end; - } - - printf("SEQ: "); - for (i = 0; i < bamdata->core.l_qseq ; ++i) { //sequence length - printf("%c", seq_nt16_str[bam_seqi(data, i)]); //retrieves the base from (internal compressed) sequence data - } - printf("\nQUAL: "); - for (int i = 0; i < bamdata->core.l_qseq ; ++i) { - printf("%c", bam_get_qual(bamdata)[i]+33); //retrives the quality value - } - printf("\n\n"); - } - - if (ret_r == -1) { - // no error! - ret = EXIT_SUCCESS; - } - else { - printf("Failed to read data\n"); - } -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - return ret; -} diff --git a/src/htslib-1.19.1/samples/read_fast.c b/src/htslib-1.19.1/samples/read_fast.c deleted file mode 100644 index f74b255..0000000 --- a/src/htslib-1.19.1/samples/read_fast.c +++ /dev/null @@ -1,116 +0,0 @@ -/* read_fast.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - show flags_demo usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: read_fast \n\ -Reads the fasta/fastq file and shows the content.\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL; //input file name - int c = 0, ret = EXIT_FAILURE; - samFile *infile = NULL; //sam file - sam_hdr_t *in_samhdr = NULL; //header of file - bam1_t *bamdata = NULL; //to hold the read data - - if (argc != 2) { - print_usage(stdout); - goto end; - } - inname = argv[1]; - - //initialize - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - //open input files - r reading - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - if (infile->format.format != fasta_format && infile->format.format != fastq_format) { - printf("Invalid file specified\n"); - goto end; - } - - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf( "Failed to read header from file\n"); - goto end; - } - - //read data - while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - printf("\nsequence: "); - for (c = 0; c < bamdata->core.l_qseq; ++c) { - printf("%c", seq_nt16_str[bam_seqi(bam_get_seq(bamdata), c)]); - } - if (infile->format.format == fastq_format) { - printf("\nquality: "); - for (c = 0; c < bamdata->core.l_qseq; ++c) { - printf("%c", bam_get_qual(bamdata)[c]); - } - } - } - if (c != -1) { - //error - printf("Failed to get data\n"); - goto end; - } - //else -1 / EOF - ret = EXIT_SUCCESS; -end: - //clean up - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - return ret; -} diff --git a/src/htslib-1.19.1/samples/read_header.c b/src/htslib-1.19.1/samples/read_header.c deleted file mode 100644 index eb14dae..0000000 --- a/src/htslib-1.19.1/samples/read_header.c +++ /dev/null @@ -1,173 +0,0 @@ -/* read_header.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: read_header infile header [id val] [tag]\n\ -This shows given tag from given header or the whole line\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *header = NULL, *tag = NULL, *idval = NULL; - char *id = NULL; - int c = 0, ret = EXIT_FAILURE, linecnt = 0; - samFile *infile = NULL; - sam_hdr_t *in_samhdr = NULL; - kstring_t data = KS_INITIALIZE; - - //read_header infile header tag - if (argc < 3 || argc > 6) { - print_usage(stderr); - goto end; - } - inname = argv[1]; - header = argv[2]; - if (argc == 4) { //header and tag - tag = argv[3]; - //find unique identifier field name for requested header type - if (header[0] == 'H' && header[1] == 'D') { - id = NULL; - } - else if (header[0] == 'S' && header[1] == 'Q') { - id = "SN"; - } - else if (header[0] == 'R' && header[1] == 'G') { - id = "ID"; - } - else if (header[0] == 'P' && header[1] == 'G') { - id = "ID"; - } - else if (header[0] == 'C' && header[1] == 'O') { - id = ""; - } - else { - printf("Invalid header type\n"); - goto end; - } - } - else if (argc == 5) { //header id val - id = argv[3]; - idval = argv[4]; - } - else if (argc == 6) { //header id val tag - id = argv[3]; - idval = argv[4]; - tag = argv[5]; - } - - //open input files - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - - if (id && idval) { - if (tag) { - ret = sam_hdr_find_tag_id(in_samhdr, header, id, idval, tag, &data); - } - else { - ret = sam_hdr_find_line_id(in_samhdr, header, id, idval, &data); - } - - if (ret == 0) { - printf("%s\n", data.s); - } - else if (ret == -1) { - printf("No matching tag found\n"); - goto end; - } - else { - printf("Failed to find header line\n"); - goto end; - } - } - else { - //get count of given header type - linecnt = sam_hdr_count_lines(in_samhdr, header); - if (linecnt == 0) { - printf("No matching line found\n"); - goto end; - } - for (c = 0; c < linecnt; ++c ) { - if (tag) { - //non CO, get the tag requested - ret = sam_hdr_find_tag_pos(in_samhdr, header, c, tag, &data); - } - else { - //CO header, there are no tags but the whole line - ret = sam_hdr_find_line_pos(in_samhdr, header, c, &data); - } - - if (ret == 0) { - printf("%s\n", data.s); - continue; - } - else if (ret == -1) { - printf("Tag not present\n"); - continue; - } - else { - printf("Failed to get tag\n"); - goto end; - } - } - } - ret = EXIT_SUCCESS; - -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - ks_free(&data); - return ret; -} diff --git a/src/htslib-1.19.1/samples/read_refname.c b/src/htslib-1.19.1/samples/read_refname.c deleted file mode 100644 index adbc711..0000000 --- a/src/htslib-1.19.1/samples/read_refname.c +++ /dev/null @@ -1,125 +0,0 @@ -/* read_refname.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: read_refname infile minsize\n\ -This shows name of references which has length above the given size\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *id = NULL; - int c = 0, ret = EXIT_FAILURE, linecnt = 0, pos = 0; - samFile *infile = NULL; - sam_hdr_t *in_samhdr = NULL; - kstring_t data = KS_INITIALIZE; - int64_t minsize = 0, size = 0; - - if (argc != 3 && argc != 2) { - print_usage(stdout); - goto end; - } - inname = argv[1]; - if (argc == 3) { - minsize = atoll(argv[2]); - } - - //open input files - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - - linecnt = sam_hdr_count_lines(in_samhdr, "SQ"); //get reference count - if (linecnt <= 0) { - if (!linecnt) { - printf("No reference line present\n"); - } - else { - printf("Failed to get reference line count\n"); - } - goto end; - } - //iterate and check each reference's length - for (pos = 1, c = 0; c < linecnt; ++c) { - if ((ret = sam_hdr_find_tag_pos(in_samhdr, "SQ", c, "LN", &data) == -2)) { - printf("Failed to get length\n"); - goto end; - } - else if (ret == -1) { - //length not present, ignore - continue; - } - //else have length - size = atoll(data.s); - if (size < minsize) { - //not required - continue; - } - if (!(id = sam_hdr_line_name(in_samhdr, "SQ", c))) { //sam_hdr_find_tag_pos(in_samhdr, "SQ", c, "SN", &data) can also do the same! - printf("Failed to get id for reference data\n"); - goto end; - } - printf("%d,%s,%s\n", pos, id, data.s); - pos++; - } - - ret = EXIT_SUCCESS; - -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - ks_free(&data); - return ret; -} diff --git a/src/htslib-1.19.1/samples/rem_header.c b/src/htslib-1.19.1/samples/rem_header.c deleted file mode 100644 index a0b6510..0000000 --- a/src/htslib-1.19.1/samples/rem_header.c +++ /dev/null @@ -1,138 +0,0 @@ -/* rem_header.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: rem_header infile header [id]\n\ -Removes header line of given type and id\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *header = NULL, *idval = NULL; - char *id = NULL; - int ret = EXIT_FAILURE; - samFile *infile = NULL, *outfile = NULL; - sam_hdr_t *in_samhdr = NULL; - - //update_header infile header idval tag value - if (argc <3 || argc > 4) { - //3 & 4 are ok, 3-> all of given header type, 4->given id of given header type to be removed - print_usage(stderr); - goto end; - } - inname = argv[1]; - header = argv[2]; - if (argc == 4) { - idval = argv[3]; - } - - //unique identifier for each of the header types - if (header[0] == 'H' && header[1] == 'D') { - id = NULL; - } - else if (header[0] == 'S' && header[1] == 'Q') { - id = "SN"; - } - else if (header[0] == 'R' && header[1] == 'G') { - id = "ID"; - } - else if (header[0] == 'P' && header[1] == 'G') { - id = "ID"; - } - else if (header[0] == 'C' && header[1] == 'O') { - //CO field can be removed using the position of it using sam_hdr_remove_line_pos - id = ""; - } - else { - printf("Invalid header type\n"); - goto end; - } - - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - if (!(outfile = sam_open("-", "w"))) { //use stdout as the output file for ease of display of update - printf("Could not open stdout\n"); - goto end; - } - - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - if (idval) { - //remove specific line - if (sam_hdr_remove_line_id(in_samhdr, header, id, idval)) { - printf("Failed to remove header line\n"); - goto end; - } - } - else { - //remove multiple lines of a header type - if (sam_hdr_remove_lines(in_samhdr, header, id, NULL)) { - printf("Failed to remove header line\n"); - goto end; - } - } - //write output - if (sam_hdr_write(outfile, in_samhdr) < 0) { - printf("Failed to write output\n"); - goto end; - } - ret = EXIT_SUCCESS; - //bam data write to follow.... -end: - //cleanupq - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (outfile) { - sam_close(outfile); - } - return ret; -} diff --git a/src/htslib-1.19.1/samples/sample.ref.fa b/src/htslib-1.19.1/samples/sample.ref.fa deleted file mode 100644 index 5789e8c..0000000 --- a/src/htslib-1.19.1/samples/sample.ref.fa +++ /dev/null @@ -1,4 +0,0 @@ ->T1 T1:1-40 -AAAAACTGAAAACCCCTTTTGGGGACTGTTAACAGTTTTT ->T2 T2:1:40 -TTTTCCCCACTGAAAACCCCTTTTGGGGACTGTTAACAGT diff --git a/src/htslib-1.19.1/samples/sample.sam b/src/htslib-1.19.1/samples/sample.sam deleted file mode 100644 index e56efd6..0000000 --- a/src/htslib-1.19.1/samples/sample.sam +++ /dev/null @@ -1,29 +0,0 @@ -@HD VN:1.17 SO:unknown -@SQ SN:T1 LN:40 -@SQ SN:T2 LN:40 -@CO @SQ SN* LN* AH AN AS DS M5 SP TP UR -@CO @RG ID* BC CN DS DT FO KS LB PG PI PL PM PU SM -@CO @PG ID* PN CL PP DS VN -@CO this is a dummy alignment file to demonstrate different abilities of hts apis -@CO QNAME FLAG RNAME POS MAPQ CIGAR RNEXT PNEXT TLEN SEQ QUAL [TAG:TYPE:VALUE]… -@CO 1234567890123456789012345678901234567890 -@CO AAAAACTGAAAACCCCTTTTGGGGACTGTTAACAGTTTTT T1 -@CO TTTTCCCCACTGAAAACCCCTTTTGGGGACTGTTAACAGT T2 -@CO ITR1-ITR2M, ITR2-ITR2M are proper pairs in T1 and T2, UNMP1 is partly mapped and pair is unmapped, UNMP2 & 3 are unmappped -@CO A1-A2, A4-A3 are proper pairs with A4-A3 in different read order. A5 is secondary alignment -ITR1 99 T1 5 40 4M = 33 10 ACTG ()() -ITR2 147 T2 23 49 2M = 35 -10 TT ** -ITR2M 99 T2 35 51 2M = 23 10 AA && -ITR1M 147 T1 33 37 4M = 5 -10 ACTG $$$$ -UNMP1 73 T1 21 40 3M * 0 5 GGG &&1 -UNMP2 141 * 0 0 * * 0 7 AA && -UNMP3 77 * 0 0 * * 0 5 GGG &&2 -A1 99 T1 25 35 6M = 31 8 ACTGTT ****** -A2 147 T1 31 33 6M = 25 -8 ACTGTT ()()() -A3 147 T2 23 47 2M1X = 12 -5 TTG ((( -A4 99 T2 12 50 3M = 23 5 GAA ()( -A5 355 T1 25 35 4M = 33 5 ACTG PPPP -B1 99 T1 25 35 6M = 31 8 GCTATT ****** -B3 147 T2 23 47 2M1X = 12 -5 TAG ((( -B4 99 T2 12 50 3M = 23 5 GAT ()( -B5 355 T1 25 35 4M = 33 5 AGTG PPPP diff --git a/src/htslib-1.19.1/samples/split.c b/src/htslib-1.19.1/samples/split.c deleted file mode 100644 index 2eb9e6b..0000000 --- a/src/htslib-1.19.1/samples/split.c +++ /dev/null @@ -1,153 +0,0 @@ -/* split.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: split infile outdir\n\ -Splits the input file alignments to read1 and read2 and saves as 1.sam and 2.bam in given directory\n\ -Shows the basic writing of output\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *outdir = NULL; - char *file1 = NULL, *file2 = NULL; - int c = 0, ret = EXIT_FAILURE, size = 0; - samFile *infile = NULL, *outfile1 = NULL, *outfile2 = NULL; - sam_hdr_t *in_samhdr = NULL; - bam1_t *bamdata = NULL; - - if (argc != 3) { - print_usage(stdout); - goto end; - } - inname = argv[1]; - outdir = argv[2]; - - //allocate space for output - size = sizeof(char) * (strlen(outdir) + sizeof("/1.sam") + 1); //space for output file name and null termination - file1 = malloc(size); - file2 = malloc(size); - if (!file1 || !file2) { - printf("Failed to set output path\n"); - goto end; - } - - //output file names - snprintf(file1, size, "%s/1.sam", outdir); //for SAM output - snprintf(file2, size, "%s/2.bam", outdir); //for BAM output - //bam data storage - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - //open input file - r reading - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - //open output files - w write as SAM, wb write as BAM - outfile1 = sam_open(file1, "w"); //as SAM - outfile2 = sam_open(file2, "wb"); //as BAM - if (!outfile1 || !outfile2) { - printf("Could not open output file\n"); - goto end; - } - - //read header, required to resolve the target names to proper ids - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - //write header - if ((sam_hdr_write(outfile1, in_samhdr) == -1) || (sam_hdr_write(outfile2, in_samhdr) == -1)) { - printf("Failed to write header\n"); - goto end; - } - - //check flags and write - while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - if (bamdata->core.flag & BAM_FREAD1) { - if (sam_write1(outfile1, in_samhdr, bamdata) < 0) { - printf("Failed to write output data\n"); - goto end; - } - } - else if (bamdata->core.flag & BAM_FREAD2) { - if (sam_write1(outfile2, in_samhdr, bamdata) < 0) { - printf("Failed to write output data\n"); - goto end; - } - } - } - if (-1 == c) { - //EOF - ret = EXIT_SUCCESS; - } - else { - printf("Error in reading data\n"); - } -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - if (file1) { - free(file1); - } - if (file2) { - free(file2); - } - if (outfile1) { - sam_close(outfile1); - } - if (outfile2) { - sam_close(outfile2); - } - return ret; -} diff --git a/src/htslib-1.19.1/samples/split2.c b/src/htslib-1.19.1/samples/split2.c deleted file mode 100644 index 2354abf..0000000 --- a/src/htslib-1.19.1/samples/split2.c +++ /dev/null @@ -1,158 +0,0 @@ -/* split2.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: split infile outdir\n\ -Splits the input file alignments to read1 and read2 and saves as 1.sam and 2.bam in given directory\n\ -Shows file type selection through name and format api\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *outdir = NULL; - char *file1 = NULL, *file2 = NULL, mode1[5] = "w", mode2[5] = "w"; - int c = 0, ret = EXIT_FAILURE, size = 0; - samFile *infile = NULL, *outfile1 = NULL, *outfile2 = NULL; - sam_hdr_t *in_samhdr = NULL; - bam1_t *bamdata = NULL; - - if (argc != 3) { - print_usage(stdout); - goto end; - } - inname = argv[1]; - outdir = argv[2]; - - //allocate space for output - size = sizeof(char) * (strlen(outdir) + sizeof("/1.sam.gz") + 1); //space for output file name and null termination - file1 = malloc(size); - file2 = malloc(size); - if (!file1 || !file2) { - printf("Failed to set output path\n"); - goto end; - } - - //output file names - snprintf(file1, size, "%s/1.sam.gz", outdir); //name of Read1 file - snprintf(file2, size, "%s/2.sam", outdir); //name of Read2 file - //bam data storage - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - //set file open mode based on file name for 1st and as explicit for 2nd - if ((sam_open_mode(mode1+1, file1, NULL) == -1) || (sam_open_mode(mode2+1, file2, "sam.gz") == -1)) { - printf("Failed to set open mode\n"); - goto end; - } - //open input file - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - //open output files - outfile1 = sam_open(file1, mode1); //as compressed SAM through sam_open - outfile2 = sam_open_format(file2, mode2, NULL); //as compressed SAM through sam_open_format - if (!outfile1 || !outfile2) { - printf("Could not open output file\n"); - goto end; - } - - //read header, required to resolve the target names to proper ids - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - //write header - if ((sam_hdr_write(outfile1, in_samhdr) == -1) || (sam_hdr_write(outfile2, in_samhdr) == -1)) { - printf("Failed to write header\n"); - goto end; - } - - //check flags and write - while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - if (bamdata->core.flag & BAM_FREAD1) { - if (sam_write1(outfile1, in_samhdr, bamdata) < 0) { - printf("Failed to write output data\n"); - goto end; - } - } - else if (bamdata->core.flag & BAM_FREAD2) { - if (sam_write1(outfile2, in_samhdr, bamdata) < 0) { - printf("Failed to write output data\n"); - goto end; - } - } - } - if (-1 == c) { - //EOF - ret = EXIT_SUCCESS; - } - else { - printf("Error in reading data\n"); - } -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - if (file1) { - free(file1); - } - if (file2) { - free(file2); - } - if (outfile1) { - sam_close(outfile1); - } - if (outfile2) { - sam_close(outfile2); - } - return ret; -} diff --git a/src/htslib-1.19.1/samples/split_thread1.c b/src/htslib-1.19.1/samples/split_thread1.c deleted file mode 100644 index 40d2dfd..0000000 --- a/src/htslib-1.19.1/samples/split_thread1.c +++ /dev/null @@ -1,161 +0,0 @@ -/* split_thread1.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: split_t1 infile outdir\n\ -Splits the input file alignments to read1 and read2 and saves as 1.sam and 2.bam in given directory\n\ -Shows the usage of basic thread in htslib\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *outdir = NULL; - char *file1 = NULL, *file2 = NULL; - int c = 0, ret = EXIT_FAILURE, size = 0; - samFile *infile = NULL, *outfile1 = NULL, *outfile2 = NULL; - sam_hdr_t *in_samhdr = NULL; - bam1_t *bamdata = NULL; - - if (argc != 3) { - print_usage(stdout); - goto end; - } - inname = argv[1]; - outdir = argv[2]; - - //allocate space for output - size = sizeof(char) * (strlen(outdir) + sizeof("/1.sam") + 1); //space for output file name and null termination - file1 = malloc(size); - file2 = malloc(size); - if (!file1 || !file2) { - printf("Failed to set output path\n"); - goto end; - } - - //output file names - snprintf(file1, size, "%s/1.sam", outdir); //for SAM output - snprintf(file2, size, "%s/2.bam", outdir); //for BAM output - //bam data storage - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - //open input file - r reading - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - //open output files - w write as SAM, wb write as BAM - outfile1 = sam_open(file1, "w"); //as SAM - outfile2 = sam_open(file2, "wb"); //as BAM - if (!outfile1 || !outfile2) { - printf("Could not open output file\n"); - goto end; - } - - //create file specific threads - if (hts_set_opt(infile, HTS_OPT_NTHREADS, 2) < 0 || //2 thread specific for reading - hts_set_opt(outfile1, HTS_OPT_NTHREADS, 1) < 0 || //1 thread specific for sam write - hts_set_opt(outfile2, HTS_OPT_NTHREADS, 1) < 0) { //1 thread specific for bam write - printf("Failed to set thread options\n"); - goto end; - } - - //read header, required to resolve the target names to proper ids - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - //write header - if ((sam_hdr_write(outfile1, in_samhdr) == -1) || (sam_hdr_write(outfile2, in_samhdr) == -1)) { - printf("Failed to write header\n"); - goto end; - } - - //check flags and write - while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - if (bamdata->core.flag & BAM_FREAD1) { - if (sam_write1(outfile1, in_samhdr, bamdata) < 0) { - printf("Failed to write output data\n"); - goto end; - } - } - else if (bamdata->core.flag & BAM_FREAD2) { - if (sam_write1(outfile2, in_samhdr, bamdata) < 0) { - printf("Failed to write output data\n"); - goto end; - } - } - } - if (-1 == c) { - //EOF - ret = EXIT_SUCCESS; - } - else { - printf("Error in reading data\n"); - } -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - if (file1) { - free(file1); - } - if (file2) { - free(file2); - } - if (outfile1) { - sam_close(outfile1); - } - if (outfile2) { - sam_close(outfile2); - } - return ret; -} diff --git a/src/htslib-1.19.1/samples/split_thread2.c b/src/htslib-1.19.1/samples/split_thread2.c deleted file mode 100644 index dab897b..0000000 --- a/src/htslib-1.19.1/samples/split_thread2.c +++ /dev/null @@ -1,171 +0,0 @@ -/* split_thread2.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: split_t2 infile outdir\n\ -Splits the input file alignments to read1 and read2 and saves as 1.sam and 2.bam in given directory\n\ -Shows the usage of thread pool\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *outdir = NULL; - char *file1 = NULL, *file2 = NULL; - int c = 0, ret = EXIT_FAILURE, size = 0; - samFile *infile = NULL, *outfile1 = NULL, *outfile2 = NULL; - sam_hdr_t *in_samhdr = NULL; - bam1_t *bamdata = NULL; - htsThreadPool tpool = {NULL, 0}; - - if (argc != 3) { - print_usage(stdout); - goto end; - } - inname = argv[1]; - outdir = argv[2]; - - //allocate space for output - size = sizeof(char) * (strlen(outdir) + sizeof("/1.sam") + 1); //space for output file name and null termination - file1 = malloc(size); - file2 = malloc(size); - if (!file1 || !file2) { - printf("Failed to set output path\n"); - goto end; - } - - //output file names - snprintf(file1, size, "%s/1.sam", outdir); //for SAM output - snprintf(file2, size, "%s/2.bam", outdir); //for BAM output - //bam data storage - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - //open input file - r reading - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - //open output files - w write as SAM, wb write as BAM - outfile1 = sam_open(file1, "w"); //as SAM - outfile2 = sam_open(file2, "wb"); //as BAM - if (!outfile1 || !outfile2) { - printf("Could not open output file\n"); - goto end; - } - - //create a pool of 4 threads - if (!(tpool.pool = hts_tpool_init(4))) { - printf("Failed to initialize the thread pool\n"); - goto end; - } - //share the pool with all the 3 files - if (hts_set_opt(infile, HTS_OPT_THREAD_POOL, &tpool) < 0 || - hts_set_opt(outfile1, HTS_OPT_THREAD_POOL, &tpool) < 0 || - hts_set_opt(outfile2, HTS_OPT_THREAD_POOL, &tpool) < 0) { - printf("Failed to set thread options\n"); - goto end; - } - - //read header, required to resolve the target names to proper ids - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - //write header - if ((sam_hdr_write(outfile1, in_samhdr) == -1) || (sam_hdr_write(outfile2, in_samhdr) == -1)) { - printf("Failed to write header\n"); - goto end; - } - - //check flags and write - while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { - if (bamdata->core.flag & BAM_FREAD1) { - if (sam_write1(outfile1, in_samhdr, bamdata) < 0) { - printf("Failed to write output data\n"); - goto end; - } - } - else if (bamdata->core.flag & BAM_FREAD2) { - if (sam_write1(outfile2, in_samhdr, bamdata) < 0) { - printf("Failed to write output data\n"); - goto end; - } - } - } - if (-1 == c) { - //EOF - ret = EXIT_SUCCESS; - } - else { - printf("Error in reading data\n"); - } -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - if (file1) { - free(file1); - } - if (file2) { - free(file2); - } - if (outfile1) { - sam_close(outfile1); - } - if (outfile2) { - sam_close(outfile2); - } - if (tpool.pool) { - hts_tpool_destroy(tpool.pool); - } - return ret; -} diff --git a/src/htslib-1.19.1/samples/update_header.c b/src/htslib-1.19.1/samples/update_header.c deleted file mode 100644 index f6b1680..0000000 --- a/src/htslib-1.19.1/samples/update_header.c +++ /dev/null @@ -1,131 +0,0 @@ -/* update_header.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - print the demo_usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: update_header infile header idval tag value\n\ -Updates the tag's value on line given in id on header of given type\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *inname = NULL, *tag = NULL, *idval = NULL, *val = NULL, *header = NULL; - char *id = NULL; - int ret = EXIT_FAILURE; - samFile *infile = NULL, *outfile = NULL; - sam_hdr_t *in_samhdr = NULL; - - //update_header infile header idval tag value - if (argc != 6) { - print_usage(stderr); - goto end; - } - inname = argv[1]; - header = argv[2]; - idval = argv[3]; - tag = argv[4]; - val = argv[5]; - - //unique identifier for each of the header types - if (header[0] == 'H' && header[1] == 'D') { - id = NULL; - printf("This sample doesnt not support modifying HD fields\n"); - } - else if (header[0] == 'S' && header[1] == 'Q') { - id = "SN"; - } - else if (header[0] == 'R' && header[1] == 'G') { - id = "ID"; - } - else if (header[0] == 'P' && header[1] == 'G') { - id = "ID"; - } - else if (header[0] == 'C' && header[1] == 'O') { - tag = NULL; - id = ""; - printf("This sample doesnt not support modifying CO fields\n"); - } - else { - printf("Invalid header type\n"); - goto end; - } - - if (!(infile = sam_open(inname, "r"))) { - printf("Could not open %s\n", inname); - goto end; - } - if (!(outfile = sam_open("-", "w"))) { //use stdout as the output file for ease of display of update - printf("Could not open stdout\n"); - goto end; - } - - //read header - if (!(in_samhdr = sam_hdr_read(infile))) { - printf("Failed to read header from file!\n"); - goto end; - } - - //update with new data - if (sam_hdr_update_line(in_samhdr, header, id, idval, tag, val, NULL) < 0) { - printf("Failed to update data\n"); - goto end; - } - //write output - if (sam_hdr_write(outfile, in_samhdr) < 0) { - printf("Failed to write output\n"); - goto end; - } - ret = EXIT_SUCCESS; - //bam data write to follow.... -end: - //cleanup - if (in_samhdr) { - sam_hdr_destroy(in_samhdr); - } - if (infile) { - sam_close(infile); - } - if (outfile) { - sam_close(outfile); - } - return ret; -} diff --git a/src/htslib-1.19.1/samples/write_fast.c b/src/htslib-1.19.1/samples/write_fast.c deleted file mode 100644 index ef78176..0000000 --- a/src/htslib-1.19.1/samples/write_fast.c +++ /dev/null @@ -1,101 +0,0 @@ -/* write_fast.c -- showcases the htslib api usage - - Copyright (C) 2023 Genome Research Ltd. - - Author: Vasudeva Sarma - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE - -*/ - -/* The pupose of this code is to demonstrate the library apis and need proper error handling and optimization */ - -#include -#include -#include - -/// print_usage - show flags_demo usage -/** @param fp pointer to the file / terminal to which demo_usage to be dumped -returns nothing -*/ -static void print_usage(FILE *fp) -{ - fprintf(fp, "Usage: write_fast \n\ -Appends a fasta/fastq file.\n"); - return; -} - -/// main_demo - start of the demo -/** @param argc - count of arguments - * @param argv - pointer to array of arguments -returns 1 on failure 0 on success -*/ -int main(int argc, char *argv[]) -{ - const char *outname = NULL; //output file name - int ret = EXIT_FAILURE; - samFile *outfile = NULL; //sam file - sam_hdr_t *out_samhdr = NULL; //header of file - bam1_t *bamdata = NULL; //to hold the read data - char mode[4] = "a"; - - if (argc != 2) { - print_usage(stdout); - goto end; - } - outname = argv[1]; - - //initialize - if (!(bamdata = bam_init1())) { - printf("Failed to initialize bamdata\n"); - goto end; - } - if (sam_open_mode(mode + 1, outname, NULL) < 0) { - printf("Invalid file name\n"); - goto end; - } - //open output file - if (!(outfile = sam_open(outname, mode))) { - printf("Could not open %s\n", outname); - goto end; - } - //dummy data - if (bam_set1(bamdata, sizeof("test"), "test", BAM_FUNMAP, -1, -1, 0, 0, NULL, -1, -1, 0, 10, "AACTGACTGA", "1234567890", 0) < 0) { - printf("Failed to set data\n"); - goto end; - } - if (sam_write1(outfile, out_samhdr, bamdata) < 0) { - printf("Failed to write data\n"); - goto end; - } - - ret = EXIT_SUCCESS; -end: - //clean up - if (out_samhdr) { - sam_hdr_destroy(out_samhdr); - } - if (outfile) { - sam_close(outfile); - } - if (bamdata) { - bam_destroy1(bamdata); - } - return ret; -} diff --git a/src/htslib-1.19.1/synced_bcf_reader.c b/src/htslib-1.19.1/synced_bcf_reader.c deleted file mode 100644 index a43ab15..0000000 --- a/src/htslib-1.19.1/synced_bcf_reader.c +++ /dev/null @@ -1,1500 +0,0 @@ -/* synced_bcf_reader.c -- stream through multiple VCF files. - - Copyright (C) 2012-2023 Genome Research Ltd. - - Author: Petr Danecek - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "htslib/synced_bcf_reader.h" -#include "htslib/kseq.h" -#include "htslib/khash_str2int.h" -#include "htslib/bgzf.h" -#include "htslib/thread_pool.h" -#include "bcf_sr_sort.h" - -#define REQUIRE_IDX_ 1 -#define ALLOW_NO_IDX_ 2 - -// Maximum indexable coordinate of .csi, for default min_shift of 14. -// This comes out to about 17 Tbp. Limiting factor is the bin number, -// which is a uint32_t in CSI. The highest number of levels compatible -// with this is 10 (needs 31 bits). -#define MAX_CSI_COOR ((1LL << (14 + 30)) - 1) - -typedef struct -{ - hts_pos_t start, end; // records are marked for skipping have start>end -} -region1_t; - -typedef struct bcf_sr_region_t -{ - region1_t *regs; // regions will sorted and merged, redundant records marked for skipping have start>end - int nregs, mregs, creg; // creg: the current active region -} -region_t; - -#define BCF_SR_AUX(x) ((aux_t*)((x)->aux)) -typedef struct -{ - sr_sort_t sort; - int regions_overlap, targets_overlap; -} -aux_t; - -static int _regions_add(bcf_sr_regions_t *reg, const char *chr, hts_pos_t start, hts_pos_t end); -static bcf_sr_regions_t *_regions_init_string(const char *str); -static int _regions_match_alleles(bcf_sr_regions_t *reg, int als_idx, bcf1_t *rec); -static void _regions_sort_and_merge(bcf_sr_regions_t *reg); -static int _bcf_sr_regions_overlap(bcf_sr_regions_t *reg, const char *seq, hts_pos_t start, hts_pos_t end, int missed_reg_handler); -static void bcf_sr_seek_start(bcf_srs_t *readers); - -char *bcf_sr_strerror(int errnum) -{ - switch (errnum) - { - case open_failed: - return strerror(errno); - case not_bgzf: - return "not compressed with bgzip"; - case idx_load_failed: - return "could not load index"; - case file_type_error: - return "unknown file type"; - case api_usage_error: - return "API usage error"; - case header_error: - return "could not parse header"; - case no_eof: - return "no BGZF EOF marker; file may be truncated"; - case no_memory: - return "Out of memory"; - case vcf_parse_error: - return "VCF parse error"; - case bcf_read_error: - return "BCF read error"; - case noidx_error: - return "merge of unindexed files failed"; - default: return ""; - } -} - -int bcf_sr_set_opt(bcf_srs_t *readers, bcf_sr_opt_t opt, ...) -{ - va_list args; - switch (opt) - { - case BCF_SR_REQUIRE_IDX: - readers->require_index = REQUIRE_IDX_; - return 0; - - case BCF_SR_ALLOW_NO_IDX: - readers->require_index = ALLOW_NO_IDX_; - return 0; - - case BCF_SR_PAIR_LOGIC: - va_start(args, opt); - BCF_SR_AUX(readers)->sort.pair = va_arg(args, int); - return 0; - - case BCF_SR_REGIONS_OVERLAP: - va_start(args, opt); - BCF_SR_AUX(readers)->regions_overlap = va_arg(args, int); - if ( readers->regions ) readers->regions->overlap = BCF_SR_AUX(readers)->regions_overlap; - return 0; - - case BCF_SR_TARGETS_OVERLAP: - va_start(args, opt); - BCF_SR_AUX(readers)->targets_overlap = va_arg(args, int); - if ( readers->targets ) readers->targets->overlap = BCF_SR_AUX(readers)->targets_overlap; - return 0; - - default: - break; - } - return 1; -} - -static int *init_filters(bcf_hdr_t *hdr, const char *filters, int *nfilters) -{ - kstring_t str = {0,0,0}; - const char *tmp = filters, *prev = filters; - int nout = 0, *out = NULL; - while ( 1 ) - { - if ( *tmp==',' || !*tmp ) - { - int *otmp = (int*) realloc(out, (nout+1)*sizeof(int)); - if (!otmp) - goto err; - out = otmp; - if ( tmp-prev==1 && *prev=='.' ) - { - out[nout] = -1; - nout++; - } - else - { - str.l = 0; - kputsn(prev, tmp-prev, &str); - out[nout] = bcf_hdr_id2int(hdr, BCF_DT_ID, str.s); - if ( out[nout]>=0 ) nout++; - } - if ( !*tmp ) break; - prev = tmp+1; - } - tmp++; - } - if ( str.m ) free(str.s); - *nfilters = nout; - return out; - - err: - if (str.m) free(str.s); - free(out); - return NULL; -} - -int bcf_sr_set_regions(bcf_srs_t *readers, const char *regions, int is_file) -{ - if ( readers->nreaders || readers->regions ) - { - if ( readers->regions ) bcf_sr_regions_destroy(readers->regions); - readers->regions = bcf_sr_regions_init(regions,is_file,0,1,-2); - bcf_sr_seek_start(readers); - return 0; - } - - readers->regions = bcf_sr_regions_init(regions,is_file,0,1,-2); - if ( !readers->regions ) return -1; - readers->explicit_regs = 1; - readers->require_index = REQUIRE_IDX_; - readers->regions->overlap = BCF_SR_AUX(readers)->regions_overlap; - return 0; -} - -int bcf_sr_set_targets(bcf_srs_t *readers, const char *targets, int is_file, int alleles) -{ - if ( readers->nreaders || readers->targets ) - { - hts_log_error("Must call bcf_sr_set_targets() before bcf_sr_add_reader()"); - return -1; - } - if ( targets[0]=='^' ) - { - readers->targets_exclude = 1; - targets++; - } - readers->targets = bcf_sr_regions_init(targets,is_file,0,1,-2); - if ( !readers->targets ) return -1; - readers->targets_als = alleles; - readers->targets->overlap = BCF_SR_AUX(readers)->targets_overlap; - return 0; -} - -int bcf_sr_set_threads(bcf_srs_t *files, int n_threads) -{ - if (!(files->n_threads = n_threads)) - return 0; - - files->p = calloc(1, sizeof(*files->p)); - if (!files->p) { - files->errnum = no_memory; - return -1; - } - if (!(files->p->pool = hts_tpool_init(n_threads))) - return -1; - - return 0; -} - -void bcf_sr_destroy_threads(bcf_srs_t *files) { - if (!files->p) - return; - - if (files->p->pool) - hts_tpool_destroy(files->p->pool); - free(files->p); -} - -int bcf_sr_add_reader(bcf_srs_t *files, const char *fname) -{ - char fmode[5]; - strcpy(fmode, "r"); - vcf_open_mode(fmode+1, fname, NULL); - htsFile* file_ptr = hts_open(fname, fmode); - if ( ! file_ptr ) { - files->errnum = open_failed; - return 0; - } - - files->has_line = (int*) realloc(files->has_line, sizeof(int)*(files->nreaders+1)); - files->has_line[files->nreaders] = 0; - files->readers = (bcf_sr_t*) realloc(files->readers, sizeof(bcf_sr_t)*(files->nreaders+1)); - bcf_sr_t *reader = &files->readers[files->nreaders++]; - memset(reader,0,sizeof(bcf_sr_t)); - - reader->file = file_ptr; - - files->errnum = 0; - - if ( reader->file->format.compression==bgzf ) - { - BGZF *bgzf = hts_get_bgzfp(reader->file); - if ( bgzf && bgzf_check_EOF(bgzf) == 0 ) { - files->errnum = no_eof; - hts_log_warning("No BGZF EOF marker; file '%s' may be truncated", fname); - } - if (files->p) - bgzf_thread_pool(bgzf, files->p->pool, files->p->qsize); - } - - if ( files->require_index==REQUIRE_IDX_ ) - { - if ( reader->file->format.format==vcf ) - { - if ( reader->file->format.compression!=bgzf ) - { - files->errnum = not_bgzf; - return 0; - } - - reader->tbx_idx = tbx_index_load(fname); - if ( !reader->tbx_idx ) - { - files->errnum = idx_load_failed; - return 0; - } - - reader->header = bcf_hdr_read(reader->file); - } - else if ( reader->file->format.format==bcf ) - { - if ( reader->file->format.compression!=bgzf ) - { - files->errnum = not_bgzf; - return 0; - } - - reader->header = bcf_hdr_read(reader->file); - - reader->bcf_idx = bcf_index_load(fname); - if ( !reader->bcf_idx ) - { - files->errnum = idx_load_failed; - return 0; - } - } - else - { - files->errnum = file_type_error; - return 0; - } - } - else - { - if ( reader->file->format.format==bcf || reader->file->format.format==vcf ) - { - reader->header = bcf_hdr_read(reader->file); - } - else - { - files->errnum = file_type_error; - return 0; - } - files->streaming = 1; - } - if ( files->streaming && files->nreaders>1 ) - { - static int no_index_warned = 0; - if ( files->require_index==ALLOW_NO_IDX_ && !no_index_warned ) - { - hts_log_warning("Using multiple unindexed files may produce errors, make sure chromosomes are in the same order!"); - no_index_warned = 1; - } - if ( files->require_index!=ALLOW_NO_IDX_ ) - { - files->errnum = api_usage_error; - hts_log_error("Must set require_index when the number of readers is greater than one"); - return 0; - } - } - if ( files->streaming && files->regions ) - { - files->errnum = api_usage_error; - hts_log_error("Cannot tabix-jump in streaming mode"); - return 0; - } - if ( !reader->header ) - { - files->errnum = header_error; - return 0; - } - - reader->fname = strdup(fname); - if ( files->apply_filters ) - reader->filter_ids = init_filters(reader->header, files->apply_filters, &reader->nfilter_ids); - - // Update list of chromosomes - if ( !files->explicit_regs && !files->streaming ) - { - int n = 0, i; - const char **names = reader->tbx_idx ? tbx_seqnames(reader->tbx_idx, &n) : bcf_hdr_seqnames(reader->header, &n); - for (i=0; iregions ) - files->regions = _regions_init_string(names[i]); - else - _regions_add(files->regions, names[i], -1, -1); - } - free(names); - _regions_sort_and_merge(files->regions); - } - - if ( files->require_index==ALLOW_NO_IDX_ && files->nreaders > 1 ) - { - bcf_hdr_t *hdr0 = files->readers[0].header; - bcf_hdr_t *hdr1 = reader->header; - if ( hdr0->n[BCF_DT_CTG]!=hdr1->n[BCF_DT_CTG] ) - { - files->errnum = noidx_error; - hts_log_error("Different number of sequences in the header, refusing to stream multiple unindexed files"); - return 0; - } - int i; - for (i=0; in[BCF_DT_CTG]; i++) - { - if ( strcmp(bcf_hdr_id2name(hdr0,i),bcf_hdr_id2name(hdr1,i)) ) - { - files->errnum = noidx_error; - hts_log_error("Sequences in the header appear in different order, refusing to stream multiple unindexed files"); - return 0; - } - } - } - - return 1; -} - -bcf_srs_t *bcf_sr_init(void) -{ - bcf_srs_t *files = (bcf_srs_t*) calloc(1,sizeof(bcf_srs_t)); - files->aux = (aux_t*) calloc(1,sizeof(aux_t)); - bcf_sr_sort_init(&BCF_SR_AUX(files)->sort); - bcf_sr_set_opt(files,BCF_SR_REGIONS_OVERLAP,1); - bcf_sr_set_opt(files,BCF_SR_TARGETS_OVERLAP,0); - return files; -} - -static void bcf_sr_destroy1(bcf_sr_t *reader) -{ - free(reader->fname); - if ( reader->tbx_idx ) tbx_destroy(reader->tbx_idx); - if ( reader->bcf_idx ) hts_idx_destroy(reader->bcf_idx); - bcf_hdr_destroy(reader->header); - hts_close(reader->file); - if ( reader->itr ) tbx_itr_destroy(reader->itr); - int j; - for (j=0; jmbuffer; j++) - bcf_destroy1(reader->buffer[j]); - free(reader->buffer); - free(reader->samples); - free(reader->filter_ids); -} - -void bcf_sr_destroy(bcf_srs_t *files) -{ - int i; - for (i=0; inreaders; i++) - bcf_sr_destroy1(&files->readers[i]); - free(files->has_line); - free(files->readers); - for (i=0; in_smpl; i++) free(files->samples[i]); - free(files->samples); - if (files->targets) bcf_sr_regions_destroy(files->targets); - if (files->regions) bcf_sr_regions_destroy(files->regions); - if (files->tmps.m) free(files->tmps.s); - if (files->n_threads) bcf_sr_destroy_threads(files); - bcf_sr_sort_destroy(&BCF_SR_AUX(files)->sort); - free(files->aux); - free(files); -} - -void bcf_sr_remove_reader(bcf_srs_t *files, int i) -{ - assert( !files->samples ); // not ready for this yet - bcf_sr_sort_remove_reader(files, &BCF_SR_AUX(files)->sort, i); - bcf_sr_destroy1(&files->readers[i]); - if ( i+1 < files->nreaders ) - { - memmove(&files->readers[i], &files->readers[i+1], (files->nreaders-i-1)*sizeof(bcf_sr_t)); - memmove(&files->has_line[i], &files->has_line[i+1], (files->nreaders-i-1)*sizeof(int)); - } - files->nreaders--; -} - -#if DEBUG_SYNCED_READER -void debug_buffer(FILE *fp, bcf_sr_t *reader) -{ - int j; - for (j=0; j<=reader->nbuffer; j++) - { - bcf1_t *line = reader->buffer[j]; - fprintf(fp,"\t%p\t%s%s\t%s:%"PRIhts_pos"\t%s ", (void*)line,reader->fname,j==0?"*":" ",reader->header->id[BCF_DT_CTG][line->rid].key,line->pos+1,line->n_allele?line->d.allele[0]:""); - int k; - for (k=1; kn_allele; k++) fprintf(fp," %s", line->d.allele[k]); - fprintf(fp,"\n"); - } -} - -void debug_buffers(FILE *fp, bcf_srs_t *files) -{ - int i; - for (i=0; inreaders; i++) - { - fprintf(fp, "has_line: %d\t%s\n", bcf_sr_has_line(files,i),files->readers[i].fname); - debug_buffer(fp, &files->readers[i]); - } - fprintf(fp,"\n"); -} -#endif - -static inline int has_filter(bcf_sr_t *reader, bcf1_t *line) -{ - int i, j; - if ( !line->d.n_flt ) - { - for (j=0; jnfilter_ids; j++) - if ( reader->filter_ids[j]<0 ) return 1; - return 0; - } - for (i=0; id.n_flt; i++) - { - for (j=0; jnfilter_ids; j++) - if ( line->d.flt[i]==reader->filter_ids[j] ) return 1; - } - return 0; -} - -static int _reader_seek(bcf_sr_t *reader, const char *seq, hts_pos_t start, hts_pos_t end) -{ - if ( end>=MAX_CSI_COOR ) - { - hts_log_error("The coordinate is out of csi index limit: %"PRIhts_pos, end+1); - exit(1); - } - if ( reader->itr ) - { - hts_itr_destroy(reader->itr); - reader->itr = NULL; - } - reader->nbuffer = 0; - if ( reader->tbx_idx ) - { - int tid = tbx_name2id(reader->tbx_idx, seq); - if ( tid==-1 ) return -1; // the sequence not present in this file - reader->itr = tbx_itr_queryi(reader->tbx_idx,tid,start,end+1); - } - else - { - int tid = bcf_hdr_name2id(reader->header, seq); - if ( tid==-1 ) return -1; // the sequence not present in this file - reader->itr = bcf_itr_queryi(reader->bcf_idx,tid,start,end+1); - } - if (!reader->itr) { - hts_log_error("Could not seek: %s:%"PRIhts_pos"-%"PRIhts_pos, seq, start + 1, end + 1); - assert(0); - } - return 0; -} - -/* - * _readers_next_region() - jumps to next region if necessary - * Returns 0 on success or -1 when there are no more regions left - */ -static int _readers_next_region(bcf_srs_t *files) -{ - // Need to open new chromosome? Check number of lines in all readers' buffers - int i, eos = 0; - for (i=0; inreaders; i++) - if ( !files->readers[i].itr && !files->readers[i].nbuffer ) eos++; - - if ( eos!=files->nreaders ) - { - // Some of the readers still has buffered lines - return 0; - } - - // No lines in the buffer, need to open new region or quit. - int prev_iseq = files->regions->iseq; - hts_pos_t prev_end = files->regions->end; - if ( bcf_sr_regions_next(files->regions)<0 ) return -1; - files->regions->prev_end = prev_iseq==files->regions->iseq ? prev_end : -1; - - for (i=0; inreaders; i++) - _reader_seek(&files->readers[i],files->regions->seq_names[files->regions->iseq],files->regions->start,files->regions->end); - - return 0; -} - -static void _set_variant_boundaries(bcf1_t *rec, hts_pos_t *beg, hts_pos_t *end) -{ - hts_pos_t off; - if ( rec->n_allele ) - { - off = rec->rlen; - bcf_unpack(rec, BCF_UN_STR); - int i; - for (i=1; in_allele; i++) - { - // Make symbolic alleles start at POS, although this is not strictly true for - // , where POS should be the position BEFORE the deletion/insertion. - // However, since arbitrary symbolic alleles can be defined by the user, we - // will simplify the interpretation of --targets-overlap and --region-overlap. - int j = 0; - char *ref = rec->d.allele[0]; - char *alt = rec->d.allele[i]; - while ( ref[j] && alt[j] && ref[j]==alt[j] ) j++; - if ( off > j ) off = j; - if ( !off ) break; - } - } - else - off = 0; - - *beg = rec->pos + off; - *end = rec->pos + rec->rlen - 1; -} - -/* - * _reader_fill_buffer() - buffers all records with the same coordinate - */ -static int _reader_fill_buffer(bcf_srs_t *files, bcf_sr_t *reader) -{ - // Return if the buffer is full: the coordinate of the last buffered record differs - if ( reader->nbuffer && reader->buffer[reader->nbuffer]->pos != reader->buffer[1]->pos ) return 0; - - // No iterator (sequence not present in this file) and not streaming - if ( !reader->itr && !files->streaming ) return 0; - - // Fill the buffer with records starting at the same position - int i, ret = 0; - while (1) - { - if ( reader->nbuffer+1 >= reader->mbuffer ) - { - // Increase buffer size - reader->mbuffer += 8; - reader->buffer = (bcf1_t**) realloc(reader->buffer, sizeof(bcf1_t*)*reader->mbuffer); - for (i=8; i>0; i--) // initialize - { - reader->buffer[reader->mbuffer-i] = bcf_init1(); - reader->buffer[reader->mbuffer-i]->max_unpack = files->max_unpack; - reader->buffer[reader->mbuffer-i]->pos = -1; // for rare cases when VCF starts from 1 - } - } - if ( files->streaming ) - { - if ( reader->file->format.format==vcf ) - { - ret = hts_getline(reader->file, KS_SEP_LINE, &files->tmps); - if ( ret < -1 ) files->errnum = bcf_read_error; - if ( ret < 0 ) break; // no more lines or an error - ret = vcf_parse1(&files->tmps, reader->header, reader->buffer[reader->nbuffer+1]); - if ( ret<0 ) { files->errnum = vcf_parse_error; break; } - } - else if ( reader->file->format.format==bcf ) - { - ret = bcf_read1(reader->file, reader->header, reader->buffer[reader->nbuffer+1]); - if ( ret < -1 ) files->errnum = bcf_read_error; - if ( ret < 0 ) break; // no more lines or an error - } - else - { - hts_log_error("Fixme: not ready for this"); - exit(1); - } - } - else if ( reader->tbx_idx ) - { - ret = tbx_itr_next(reader->file, reader->tbx_idx, reader->itr, &files->tmps); - if ( ret < -1 ) files->errnum = bcf_read_error; - if ( ret < 0 ) break; // no more lines or an error - ret = vcf_parse1(&files->tmps, reader->header, reader->buffer[reader->nbuffer+1]); - if ( ret<0 ) { files->errnum = vcf_parse_error; break; } - } - else - { - ret = bcf_itr_next(reader->file, reader->itr, reader->buffer[reader->nbuffer+1]); - if ( ret < -1 ) files->errnum = bcf_read_error; - if ( ret < 0 ) break; // no more lines or an error - bcf_subset_format(reader->header,reader->buffer[reader->nbuffer+1]); - } - - // Prevent creation of duplicates from records overlapping multiple regions - // and recognize true variant overlaps vs record overlaps (e.g. TA>T vs A>-) - if ( files->regions ) - { - hts_pos_t beg, end; - if ( BCF_SR_AUX(files)->regions_overlap==0 ) - beg = end = reader->buffer[reader->nbuffer+1]->pos; - else if ( BCF_SR_AUX(files)->regions_overlap==1 ) - { - beg = reader->buffer[reader->nbuffer+1]->pos; - end = reader->buffer[reader->nbuffer+1]->pos + reader->buffer[reader->nbuffer+1]->rlen - 1; - } - else if ( BCF_SR_AUX(files)->regions_overlap==2 ) - _set_variant_boundaries(reader->buffer[reader->nbuffer+1], &beg,&end); - else - { - hts_log_error("This should never happen, just to keep clang compiler happy: %d",BCF_SR_AUX(files)->targets_overlap); - exit(1); - } - if ( beg <= files->regions->prev_end || end < files->regions->start || beg > files->regions->end ) continue; - } - - // apply filter - if ( !reader->nfilter_ids ) - bcf_unpack(reader->buffer[reader->nbuffer+1], BCF_UN_STR); - else - { - bcf_unpack(reader->buffer[reader->nbuffer+1], BCF_UN_STR|BCF_UN_FLT); - if ( !has_filter(reader, reader->buffer[reader->nbuffer+1]) ) continue; - } - reader->nbuffer++; - - if ( reader->buffer[reader->nbuffer]->rid != reader->buffer[1]->rid ) break; - if ( reader->buffer[reader->nbuffer]->pos != reader->buffer[1]->pos ) break; // the buffer is full - } - if ( ret<0 ) - { - // done for this region - tbx_itr_destroy(reader->itr); - reader->itr = NULL; - } - if ( files->require_index==ALLOW_NO_IDX_ && reader->buffer[reader->nbuffer]->rid < reader->buffer[1]->rid ) - { - hts_log_error("Sequences out of order, cannot stream multiple unindexed files: %s", reader->fname); - exit(1); - } - return 0; // FIXME: Check for more errs in this function -} - -/* - * _readers_shift_buffer() - removes the first line - */ -static void _reader_shift_buffer(bcf_sr_t *reader) -{ - if ( !reader->nbuffer ) return; - int i; - bcf1_t *tmp = reader->buffer[1]; - for (i=2; i<=reader->nbuffer; i++) - reader->buffer[i-1] = reader->buffer[i]; - if ( reader->nbuffer > 1 ) - reader->buffer[reader->nbuffer] = tmp; - reader->nbuffer--; -} - -static int next_line(bcf_srs_t *files) -{ - const char *chr = NULL; - hts_pos_t min_pos = HTS_POS_MAX; - - // Loop until next suitable line is found or all readers have finished - while ( 1 ) - { - // Get all readers ready for the next region. - if ( files->regions && _readers_next_region(files)<0 ) break; - - // Fill buffers and find the minimum chromosome - int i, min_rid = INT32_MAX; - for (i=0; inreaders; i++) - { - _reader_fill_buffer(files, &files->readers[i]); - if ( files->require_index==ALLOW_NO_IDX_ ) - { - if ( !files->readers[i].nbuffer ) continue; - if ( min_rid > files->readers[i].buffer[1]->rid ) min_rid = files->readers[i].buffer[1]->rid; - } - } - - for (i=0; inreaders; i++) - { - if ( !files->readers[i].nbuffer ) continue; - if ( files->require_index==ALLOW_NO_IDX_ && min_rid != files->readers[i].buffer[1]->rid ) continue; - - // Update the minimum coordinate - if ( min_pos > files->readers[i].buffer[1]->pos ) - { - min_pos = files->readers[i].buffer[1]->pos; - chr = bcf_seqname(files->readers[i].header, files->readers[i].buffer[1]); - assert(chr); - bcf_sr_sort_set_active(&BCF_SR_AUX(files)->sort, i); - } - else if ( min_pos==files->readers[i].buffer[1]->pos ) - bcf_sr_sort_add_active(&BCF_SR_AUX(files)->sort, i); - } - if ( min_pos==HTS_POS_MAX ) - { - if ( !files->regions ) break; - continue; - } - - // Skip this position if not present in targets - if ( files->targets ) - { - int match = 0; - for (i=0; inreaders; i++) - { - if ( !files->readers[i].nbuffer || files->readers[i].buffer[1]->pos!=min_pos ) continue; - hts_pos_t beg, end; - if ( BCF_SR_AUX(files)->targets_overlap==0 ) - beg = end = min_pos; - else if ( BCF_SR_AUX(files)->targets_overlap==1 ) - { - beg = min_pos; - end = min_pos + files->readers[i].buffer[1]->rlen - 1; - } - else if ( BCF_SR_AUX(files)->targets_overlap==2 ) - _set_variant_boundaries(files->readers[i].buffer[1], &beg,&end); - else - { - hts_log_error("This should never happen, just to keep clang compiler happy: %d",BCF_SR_AUX(files)->targets_overlap); - exit(1); - } - int overlap = bcf_sr_regions_overlap(files->targets, chr, beg, end)==0 ? 1 : 0; - if ( (!files->targets_exclude && !overlap) || (files->targets_exclude && overlap) ) - _reader_shift_buffer(&files->readers[i]); - else - match = 1; - } - if ( !match ) - { - min_pos = HTS_POS_MAX; - chr = NULL; - continue; - } - } - break; // done: chr and min_pos are set - } - if ( !chr ) return 0; - - return bcf_sr_sort_next(files, &BCF_SR_AUX(files)->sort, chr, min_pos); -} - -int bcf_sr_next_line(bcf_srs_t *files) -{ - if ( !files->targets_als ) - return next_line(files); - - while (1) - { - int i, ret = next_line(files); - if ( !ret ) return ret; - - for (i=0; inreaders; i++) - if ( files->has_line[i] ) break; - - if ( _regions_match_alleles(files->targets, files->targets_als-1, files->readers[i].buffer[0]) ) return ret; - - // Check if there are more duplicate lines in the buffers. If not, return this line as if it - // matched the targets, even if there is a type mismatch - for (i=0; inreaders; i++) - { - if ( !files->has_line[i] ) continue; - if ( files->readers[i].nbuffer==0 || files->readers[i].buffer[1]->pos!=files->readers[i].buffer[0]->pos ) continue; - break; - } - if ( i==files->nreaders ) return ret; // no more lines left, output even if target alleles are not of the same type - } -} - -static void bcf_sr_seek_start(bcf_srs_t *readers) -{ - bcf_sr_regions_t *reg = readers->regions; - int i; - for (i=0; inseqs; i++) - reg->regs[i].creg = -1; - reg->iseq = 0; - reg->start = -1; - reg->end = -1; - reg->prev_seq = -1; - reg->prev_start = -1; - reg->prev_end = -1; -} - - -int bcf_sr_seek(bcf_srs_t *readers, const char *seq, hts_pos_t pos) -{ - if ( !readers->regions ) return 0; - bcf_sr_sort_reset(&BCF_SR_AUX(readers)->sort); - if ( !seq && !pos ) - { - // seek to start - bcf_sr_seek_start(readers); - return 0; - } - - int i, nret = 0; - - // Need to position both the readers and the regions. The latter is a bit of a mess - // because we can have in memory or external regions. The safe way is: - // - reset all regions as if they were not read from at all (bcf_sr_seek_start) - // - find the requested iseq (stored in the seq_hash) - // - position regions to the requested position (bcf_sr_regions_overlap) - bcf_sr_seek_start(readers); - if ( khash_str2int_get(readers->regions->seq_hash, seq, &i)>=0 ) readers->regions->iseq = i; - _bcf_sr_regions_overlap(readers->regions, seq, pos, pos, 0); - - for (i=0; inreaders; i++) - { - nret += _reader_seek(&readers->readers[i],seq,pos,MAX_CSI_COOR-1); - } - return nret; -} - -int bcf_sr_set_samples(bcf_srs_t *files, const char *fname, int is_file) -{ - int i, j, nsmpl, free_smpl = 0; - char **smpl = NULL; - - void *exclude = (fname[0]=='^') ? khash_str2int_init() : NULL; - if ( exclude || strcmp("-",fname) ) // "-" stands for all samples - { - smpl = hts_readlist(fname, is_file, &nsmpl); - if ( !smpl ) - { - hts_log_error("Could not read the file: \"%s\"", fname); - return 0; - } - if ( exclude ) - { - for (i=0; ireaders[0].header->samples; // intersection of all samples - nsmpl = bcf_hdr_nsamples(files->readers[0].header); - } - - files->samples = NULL; - files->n_smpl = 0; - for (i=0; inreaders; j++) - { - if ( bcf_hdr_id2int(files->readers[j].header, BCF_DT_SAMPLE, smpl[i])<0 ) break; - n_isec++; - } - if ( n_isec!=files->nreaders ) - { - hts_log_warning("The sample \"%s\" was not found in %s, skipping", - smpl[i], files->readers[n_isec].fname); - continue; - } - - files->samples = (char**) realloc(files->samples, (files->n_smpl+1)*sizeof(const char*)); - files->samples[files->n_smpl++] = strdup(smpl[i]); - } - - if ( exclude ) khash_str2int_destroy(exclude); - if ( free_smpl ) - { - for (i=0; in_smpl ) - { - if ( files->nreaders>1 ) - hts_log_warning("No samples in common"); - return 0; - } - for (i=0; inreaders; i++) - { - bcf_sr_t *reader = &files->readers[i]; - reader->samples = (int*) malloc(sizeof(int)*files->n_smpl); - reader->n_smpl = files->n_smpl; - for (j=0; jn_smpl; j++) - reader->samples[j] = bcf_hdr_id2int(reader->header, BCF_DT_SAMPLE, files->samples[j]); - } - return 1; -} - -// Add a new region into a list. On input the coordinates are 1-based, inclusive, then stored 0-based, -// inclusive. Sorting and merging step needed afterwards: qsort(..,cmp_regions) and merge_regions(). -static int _regions_add(bcf_sr_regions_t *reg, const char *chr, hts_pos_t start, hts_pos_t end) -{ - if ( start==-1 && end==-1 ) - { - start = 0; end = MAX_CSI_COOR-1; - } - else - { - start--; end--; // store 0-based coordinates - } - - if ( !reg->seq_hash ) - reg->seq_hash = khash_str2int_init(); - - int iseq; - if ( khash_str2int_get(reg->seq_hash, chr, &iseq)<0 ) - { - // the chromosome block does not exist - iseq = reg->nseqs++; - reg->seq_names = (char**) realloc(reg->seq_names,sizeof(char*)*reg->nseqs); - reg->regs = (region_t*) realloc(reg->regs,sizeof(region_t)*reg->nseqs); - memset(®->regs[reg->nseqs-1],0,sizeof(region_t)); - reg->seq_names[iseq] = strdup(chr); - reg->regs[iseq].creg = -1; - khash_str2int_set(reg->seq_hash,reg->seq_names[iseq],iseq); - } - - region_t *creg = ®->regs[iseq]; - hts_expand(region1_t,creg->nregs+1,creg->mregs,creg->regs); - creg->regs[creg->nregs].start = start; - creg->regs[creg->nregs].end = end; - creg->nregs++; - - return 0; // FIXME: check for errs in this function -} - -static int regions_cmp(const void *aptr, const void *bptr) -{ - region1_t *a = (region1_t*)aptr; - region1_t *b = (region1_t*)bptr; - if ( a->start < b->start ) return -1; - if ( a->start > b->start ) return 1; - if ( a->end < b->end ) return -1; - if ( a->end > b->end ) return 1; - return 0; -} -static void regions_merge(region_t *reg) -{ - int i = 0, j; - while ( inregs ) - { - j = i + 1; - while ( jnregs && reg->regs[i].end >= reg->regs[j].start ) - { - if ( reg->regs[i].end < reg->regs[j].end ) reg->regs[i].end = reg->regs[j].end; - reg->regs[j].start = 1; reg->regs[j].end = 0; // if beg>end, this region marked for skipping - j++; - } - i = j; - } -} -void _regions_sort_and_merge(bcf_sr_regions_t *reg) -{ - if ( !reg ) return; - - int i; - for (i=0; inseqs; i++) - { - qsort(reg->regs[i].regs, reg->regs[i].nregs, sizeof(*reg->regs[i].regs), regions_cmp); - regions_merge(®->regs[i]); - } -} - -// File name or a list of genomic locations. If file name, NULL is returned. -// Recognises regions in the form chr, chr:pos, chr:beg-end, chr:beg-, {weird-chr-name}:pos. -// Cannot use hts_parse_region() as that requires the header and if header is not present, -// wouldn't learn the chromosome name. -static bcf_sr_regions_t *_regions_init_string(const char *str) -{ - bcf_sr_regions_t *reg = (bcf_sr_regions_t *) calloc(1, sizeof(bcf_sr_regions_t)); - reg->start = reg->end = -1; - reg->prev_start = reg->prev_end = reg->prev_seq = -1; - - kstring_t tmp = {0,0,0}; - const char *sp = str, *ep = str; - hts_pos_t from, to; - while ( 1 ) - { - tmp.l = 0; - if ( *ep=='{' ) - { - while ( *ep && *ep!='}' ) ep++; - if ( !*ep ) - { - hts_log_error("Could not parse the region, mismatching braces in: \"%s\"", str); - goto exit_nicely; - } - ep++; - kputsn(sp+1,ep-sp-2,&tmp); - } - else - { - while ( *ep && *ep!=',' && *ep!=':' ) ep++; - kputsn(sp,ep-sp,&tmp); - } - if ( *ep==':' ) - { - sp = ep+1; - from = hts_parse_decimal(sp,(char**)&ep,0); - if ( sp==ep ) - { - hts_log_error("Could not parse the region(s): %s", str); - goto exit_nicely; - } - if ( !*ep || *ep==',' ) - { - _regions_add(reg, tmp.s, from, from); - sp = ep; - continue; - } - if ( *ep!='-' ) - { - hts_log_error("Could not parse the region(s): %s", str); - goto exit_nicely; - } - ep++; - sp = ep; - to = hts_parse_decimal(sp,(char**)&ep,0); - if ( *ep && *ep!=',' ) - { - hts_log_error("Could not parse the region(s): %s", str); - goto exit_nicely; - } - if ( sp==ep ) to = MAX_CSI_COOR-1; - _regions_add(reg, tmp.s, from, to); - if ( !*ep ) break; - sp = ep; - } - else if ( !*ep || *ep==',' ) - { - if ( tmp.l ) _regions_add(reg, tmp.s, -1, -1); - if ( !*ep ) break; - sp = ++ep; - } - else - { - hts_log_error("Could not parse the region(s): %s", str); - goto exit_nicely; - } - } - free(tmp.s); - return reg; - -exit_nicely: - bcf_sr_regions_destroy(reg); - free(tmp.s); - return NULL; -} - -// ichr,ifrom,ito are 0-based; -// returns -1 on error, 0 if the line is a comment line, 1 on success -static int _regions_parse_line(char *line, int ichr, int ifrom, int ito, char **chr, char **chr_end, hts_pos_t *from, hts_pos_t *to) -{ - if (ifrom < 0 || ito < 0) return -1; - *chr_end = NULL; - - if ( line[0]=='#' ) return 0; - - int k,l; // index of the start and end column of the tab-delimited file - if ( ifrom <= ito ) - k = ifrom, l = ito; - else - l = ifrom, k = ito; - - int i; - char *se = line, *ss = NULL; // start and end - char *tmp; - for (i=0; i<=k && *se; i++) - { - ss = i==0 ? se++ : ++se; - while (*se && *se!='\t') se++; - } - if ( i<=k ) return -1; - if ( k==l ) - { - *from = *to = hts_parse_decimal(ss, &tmp, 0); - if ( tmp==ss || (*tmp && *tmp!='\t') ) return -1; - } - else - { - if ( k==ifrom ) - *from = hts_parse_decimal(ss, &tmp, 0); - else - *to = hts_parse_decimal(ss, &tmp, 0); - if ( ss==tmp || (*tmp && *tmp!='\t') ) return -1; - - for (i=k; i0 ) ss = ++se; - while (*se && *se!='\t') se++; - } - if ( i<=ichr ) return -1; - *chr_end = se; - *chr = ss; - return 1; -} - -bcf_sr_regions_t *bcf_sr_regions_init(const char *regions, int is_file, int ichr, int ifrom, int ito) -{ - bcf_sr_regions_t *reg; - if ( !is_file ) - { - reg = _regions_init_string(regions); - _regions_sort_and_merge(reg); - return reg; - } - - reg = (bcf_sr_regions_t *) calloc(1, sizeof(bcf_sr_regions_t)); - reg->start = reg->end = -1; - reg->prev_start = reg->prev_end = reg->prev_seq = -1; - - reg->file = hts_open(regions, "rb"); - if ( !reg->file ) - { - hts_log_error("Could not open file: %s", regions); - free(reg); - return NULL; - } - - reg->tbx = tbx_index_load3(regions, NULL, HTS_IDX_SAVE_REMOTE|HTS_IDX_SILENT_FAIL); - if ( !reg->tbx ) - { - size_t iline = 0; - int len = strlen(regions); - int is_bed = strcasecmp(".bed",regions+len-4) ? 0 : 1; - if ( !is_bed && !strcasecmp(".bed.gz",regions+len-7) ) is_bed = 1; - - if ( reg->file->format.format==vcf ) ito = 1; - - // read the whole file, tabix index is not present - while ( hts_getline(reg->file, KS_SEP_LINE, ®->line) > 0 ) - { - iline++; - char *chr, *chr_end; - hts_pos_t from, to; - int ret; - ret = _regions_parse_line(reg->line.s, ichr,ifrom,abs(ito), &chr,&chr_end,&from,&to); - if ( ret < 0 ) - { - if ( ito<0 ) - ret = _regions_parse_line(reg->line.s, ichr,ifrom,ifrom, &chr,&chr_end,&from,&to); - if ( ret<0 ) - { - hts_log_error("Could not parse %zu-th line of file %s, using the columns %d,%d[,%d]", - iline, regions,ichr+1,ifrom+1,ito+1); - hts_close(reg->file); reg->file = NULL; free(reg); - return NULL; - } - ito = ifrom; - } - else if ( ito<0 ) - ito = abs(ito); - if ( !ret ) continue; - if ( is_bed ) from++; - *chr_end = 0; - _regions_add(reg, chr, from, to); - *chr_end = '\t'; - } - hts_close(reg->file); reg->file = NULL; - if ( !reg->nseqs ) { free(reg); return NULL; } - _regions_sort_and_merge(reg); - return reg; - } - - reg->seq_names = (char**) tbx_seqnames(reg->tbx, ®->nseqs); - if ( !reg->seq_hash ) - reg->seq_hash = khash_str2int_init(); - int i; - for (i=0; inseqs; i++) - { - khash_str2int_set(reg->seq_hash,reg->seq_names[i],i); - } - reg->fname = strdup(regions); - reg->is_bin = 1; - return reg; -} - -void bcf_sr_regions_destroy(bcf_sr_regions_t *reg) -{ - int i; - free(reg->fname); - if ( reg->itr ) tbx_itr_destroy(reg->itr); - if ( reg->tbx ) tbx_destroy(reg->tbx); - if ( reg->file ) hts_close(reg->file); - if ( reg->als ) free(reg->als); - if ( reg->als_str.s ) free(reg->als_str.s); - free(reg->line.s); - if ( reg->regs ) - { - // free only in-memory names, tbx names are const - for (i=0; inseqs; i++) - { - free(reg->seq_names[i]); - free(reg->regs[i].regs); - } - } - free(reg->regs); - free(reg->seq_names); - khash_str2int_destroy(reg->seq_hash); - free(reg); -} - -int bcf_sr_regions_seek(bcf_sr_regions_t *reg, const char *seq) -{ - reg->iseq = reg->start = reg->end = -1; - if ( khash_str2int_get(reg->seq_hash, seq, ®->iseq) < 0 ) return -1; // sequence seq not in regions - - // using in-memory regions - if ( reg->regs ) - { - reg->regs[reg->iseq].creg = -1; - return 0; - } - - // reading regions from tabix - if ( reg->itr ) tbx_itr_destroy(reg->itr); - reg->itr = tbx_itr_querys(reg->tbx, seq); - if ( reg->itr ) return 0; - - return -1; -} - -// Returns 0 on success, -1 when done -static int advance_creg(region_t *reg) -{ - int i = reg->creg + 1; - while ( inregs && reg->regs[i].start > reg->regs[i].end ) i++; // regions with start>end are marked to skip by merge_regions() - reg->creg = i; - if ( i>=reg->nregs ) return -1; - return 0; -} - -int bcf_sr_regions_next(bcf_sr_regions_t *reg) -{ - if ( reg->iseq<0 ) return -1; - reg->start = reg->end = -1; - reg->nals = 0; - - // using in-memory regions - if ( reg->regs ) - { - while ( reg->iseq < reg->nseqs ) - { - if ( advance_creg(®->regs[reg->iseq])==0 ) break; // a valid record was found - reg->iseq++; - } - if ( reg->iseq >= reg->nseqs ) { reg->iseq = -1; return -1; } // no more regions left - region1_t *creg = ®->regs[reg->iseq].regs[reg->regs[reg->iseq].creg]; - reg->start = creg->start; - reg->end = creg->end; - return 0; - } - - // reading from tabix - char *chr, *chr_end; - int ichr = 0, ifrom = 1, ito = 2, is_bed = 0; - hts_pos_t from, to; - if ( reg->tbx ) - { - ichr = reg->tbx->conf.sc-1; - ifrom = reg->tbx->conf.bc-1; - ito = reg->tbx->conf.ec-1; - if ( ito<0 ) ito = ifrom; - is_bed = reg->tbx->conf.preset==TBX_UCSC ? 1 : 0; - } - - int ret = 0; - while ( !ret ) - { - if ( reg->itr ) - { - // tabix index present, reading a chromosome block - ret = tbx_itr_next(reg->file, reg->tbx, reg->itr, ®->line); - if ( ret<0 ) { reg->iseq = -1; return -1; } - } - else - { - if ( reg->is_bin ) - { - // Waited for seek which never came. Reopen in text mode and stream - // through the regions, otherwise hts_getline would fail - hts_close(reg->file); - reg->file = hts_open(reg->fname, "r"); - if ( !reg->file ) - { - hts_log_error("Could not open file: %s", reg->fname); - reg->file = NULL; - bcf_sr_regions_destroy(reg); - return -1; - } - reg->is_bin = 0; - } - - // tabix index absent, reading the whole file - ret = hts_getline(reg->file, KS_SEP_LINE, ®->line); - if ( ret<0 ) { reg->iseq = -1; return -1; } - } - ret = _regions_parse_line(reg->line.s, ichr,ifrom,ito, &chr,&chr_end,&from,&to); - if ( ret<0 ) - { - hts_log_error("Could not parse the file %s, using the columns %d,%d,%d", - reg->fname,ichr+1,ifrom+1,ito+1); - return -1; - } - } - if ( is_bed ) from++; - - *chr_end = 0; - if ( khash_str2int_get(reg->seq_hash, chr, ®->iseq)<0 ) - { - hts_log_error("Broken tabix index? The sequence \"%s\" not in dictionary [%s]", - chr, reg->line.s); - exit(1); - } - *chr_end = '\t'; - - reg->start = from - 1; - reg->end = to - 1; - return 0; -} - -static int _regions_match_alleles(bcf_sr_regions_t *reg, int als_idx, bcf1_t *rec) -{ - if ( reg->regs ) - { - // payload is not supported for in-memory regions, switch to regidx instead in future - hts_log_error("Compressed and indexed targets file is required"); - exit(1); - } - - int i = 0, max_len = 0; - if ( !reg->nals ) - { - char *ss = reg->line.s; - while ( inals = 1; - while ( *se && *se!='\t' ) - { - if ( *se==',' ) reg->nals++; - se++; - } - ks_resize(®->als_str, se-ss+1+reg->nals); - reg->als_str.l = 0; - hts_expand(char*,reg->nals,reg->mals,reg->als); - reg->nals = 0; - - se = ss; - while ( *(++se) ) - { - if ( *se=='\t' ) break; - if ( *se!=',' ) continue; - reg->als[reg->nals] = ®->als_str.s[reg->als_str.l]; - kputsn(ss,se-ss,®->als_str); - if ( ®->als_str.s[reg->als_str.l] - reg->als[reg->nals] > max_len ) max_len = ®->als_str.s[reg->als_str.l] - reg->als[reg->nals]; - reg->als_str.l++; - reg->nals++; - ss = ++se; - } - reg->als[reg->nals] = ®->als_str.s[reg->als_str.l]; - kputsn(ss,se-ss,®->als_str); - if ( ®->als_str.s[reg->als_str.l] - reg->als[reg->nals] > max_len ) max_len = ®->als_str.s[reg->als_str.l] - reg->als[reg->nals]; - reg->nals++; - reg->als_type = max_len > 1 ? VCF_INDEL : VCF_SNP; // this is a simplified check, see vcf.c:bcf_set_variant_types - } - int type = bcf_get_variant_types(rec); - if ( reg->als_type & VCF_INDEL ) - return type & VCF_INDEL ? 1 : 0; - return !(type & VCF_INDEL) ? 1 : 0; -} - -int bcf_sr_regions_overlap(bcf_sr_regions_t *reg, const char *seq, hts_pos_t start, hts_pos_t end) -{ - return _bcf_sr_regions_overlap(reg,seq,start,end,1); -} - -static int _bcf_sr_regions_overlap(bcf_sr_regions_t *reg, const char *seq, hts_pos_t start, hts_pos_t end, int missed_reg_handler) -{ - int iseq; - if ( khash_str2int_get(reg->seq_hash, seq, &iseq)<0 ) return -1; // no such sequence - if ( missed_reg_handler && !reg->missed_reg_handler ) missed_reg_handler = 0; - - if ( reg->prev_seq==-1 || iseq!=reg->prev_seq || reg->prev_start > start ) // new chromosome or after a seek - { - // flush regions left on previous chromosome - if ( missed_reg_handler && reg->prev_seq!=-1 && reg->iseq!=-1 ) - bcf_sr_regions_flush(reg); - - bcf_sr_regions_seek(reg, seq); - reg->start = reg->end = -1; - } - if ( reg->prev_seq==iseq && reg->iseq!=iseq ) return -2; // no more regions on this chromosome - reg->prev_seq = reg->iseq; - reg->prev_start = start; - - while ( iseq==reg->iseq && reg->end < start ) - { - if ( bcf_sr_regions_next(reg) < 0 ) return -2; // no more regions left - if ( reg->iseq != iseq ) return -1; // does not overlap any regions - if ( missed_reg_handler && reg->end < start ) reg->missed_reg_handler(reg, reg->missed_reg_data); - } - if ( reg->start <= end ) return 0; // region overlap - return -1; // no overlap -} - -int bcf_sr_regions_flush(bcf_sr_regions_t *reg) -{ - if ( !reg->missed_reg_handler || reg->prev_seq==-1 ) return 0; - while ( !bcf_sr_regions_next(reg) ) reg->missed_reg_handler(reg, reg->missed_reg_data); - return 0; // FIXME: check for errs in this function -} - diff --git a/src/htslib-1.19.1/tabix.1 b/src/htslib-1.19.1/tabix.1 deleted file mode 100644 index 0d852a9..0000000 --- a/src/htslib-1.19.1/tabix.1 +++ /dev/null @@ -1,203 +0,0 @@ -.TH tabix 1 "22 January 2024" "htslib-1.19.1" "Bioinformatics tools" -.SH NAME -.PP -tabix \- Generic indexer for TAB-delimited genome position files -.\" -.\" Copyright (C) 2009-2011 Broad Institute. -.\" Copyright (C) 2014, 2016, 2018, 2020, 2022 Genome Research Ltd. -.\" -.\" Author: Heng Li -.\" -.\" Permission is hereby granted, free of charge, to any person obtaining a -.\" copy of this software and associated documentation files (the "Software"), -.\" to deal in the Software without restriction, including without limitation -.\" the rights to use, copy, modify, merge, publish, distribute, sublicense, -.\" and/or sell copies of the Software, and to permit persons to whom the -.\" Software is furnished to do so, subject to the following conditions: -.\" -.\" The above copyright notice and this permission notice shall be included in -.\" all copies or substantial portions of the Software. -.\" -.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -.\" IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -.\" FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -.\" THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -.\" LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -.\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -.\" DEALINGS IN THE SOFTWARE. -.\" -.SH SYNOPSIS -.PP -.B tabix -.RB [ -0lf ] -.RB [ -p -gff|bed|sam|vcf] -.RB [ -s -.IR seqCol ] -.RB [ -b -.IR begCol ] -.RB [ -e -.IR endCol ] -.RB [ -S -.IR lineSkip ] -.RB [ -c -.IR metaChar ] -.I in.tab.bgz -.RI [ "region1 " [ "region2 " [ ... "]]]" - -.SH DESCRIPTION -.PP -Tabix indexes a TAB-delimited genome position file -.I in.tab.bgz -and creates an index file -.RI ( in.tab.bgz.tbi -or -.IR in.tab.bgz.csi ) -when -.I region -is absent from the command-line. The input data file must be position -sorted and compressed by -.B bgzip -which has a -.BR gzip (1) -like interface. - -After indexing, tabix is able to quickly retrieve data lines overlapping -.I regions -specified in the format "chr:beginPos-endPos". -(Coordinates specified in this region format are 1-based and inclusive.) - -Fast data retrieval also -works over network if URI is given as a file name and in this case the -index file will be downloaded if it is not present locally. - -The tabix -.RI ( .tbi ) -and BAI index formats can handle individual chromosomes up to 512 Mbp -(2^29 bases) in length. -If your input file might contain data lines with begin or end positions -greater than that, you will need to use a CSI index. - -.SH INDEXING OPTIONS -.TP 10 -.B -0, --zero-based -Specify that the position in the data file is 0-based half-open -(e.g. UCSC files) rather than 1-based. -.TP -.BI "-b, --begin " INT -Column of start chromosomal position. [4] -.TP -.BI "-c, --comment " CHAR -Skip lines started with character CHAR. [#] -.TP -.BI "-C, --csi" -Produce CSI format index instead of classical tabix or BAI style indices. -.TP -.BI "-e, --end " INT -Column of end chromosomal position. The end column can be the same as the -start column. [5] -.TP -.B "-f, --force " -Force to overwrite the index file if it is present. -.TP -.BI "-m, --min-shift " INT -Set minimal interval size for CSI indices to 2^INT [14] -.TP -.BI "-p, --preset " STR -Input format for indexing. Valid values are: gff, bed, sam, vcf. -This option should not be applied together with any of -.BR -s ", " -b ", " -e ", " -c " and " -0 ; -it is not used for data retrieval because this setting is stored in -the index file. [gff] -.TP -.BI "-s, --sequence " INT -Column of sequence name. Option -.BR -s ", " -b ", " -e ", " -S ", " -c " and " -0 -are all stored in the index file and thus not used in data retrieval. [1] -.TP -.BI "-S, --skip-lines " INT -Skip first INT lines in the data file. [0] - -.SH QUERYING AND OTHER OPTIONS -.TP -.B "-h, --print-header " -Print also the header/meta lines. -.TP -.B "-H, --only-header " -Print only the header/meta lines. -.TP -.B "-l, --list-chroms " -List the sequence names stored in the index file. -.TP -.BI "-r, --reheader " FILE -Replace the header with the content of FILE -.TP -.BI "-R, --regions " FILE -Restrict to regions listed in the FILE. The FILE can be BED file (requires .bed, .bed.gz, .bed.bgz -file name extension) or a TAB-delimited file with CHROM, POS, and, optionally, -POS_TO columns, where positions are 1-based and inclusive. When this option is in use, the input -file may not be sorted. -.TP -.BI "-T, --targets " FILE -Similar to -.B -R -but the entire input will be read sequentially and regions not listed in FILE will be skipped. -.TP -.BI "-D " -Do not download the index file before opening it. Valid for remote files only. -.TP -.BI "--cache " INT -Set the BGZF block cache size to INT megabytes. [10] - -This is of most benefit when the -.B -R -option is used, which can cause blocks to be read more than once. -Setting the size to 0 will disable the cache. -.TP -.B --separate-regions -This option can be used when multiple regions are supplied in the command line -and the user needs to quickly see which file records belong to which region. -For this, a line with the name of the region, preceded by the file specific -comment symbol, is inserted in the output before its corresponding group of -records. -.TP -.BI "--verbosity " INT -Set verbosity of logging messages printed to stderr. -The default is 3, which turns on error and warning messages; -2 reduces warning messages; -1 prints only error messages and 0 is mostly silent. -Values higher than 3 produce additional informational and debugging messages. -.PP -.SH EXAMPLE -(grep "^#" in.gff; grep -v "^#" in.gff | sort -t"`printf '\(rst'`" -k1,1 -k4,4n) | bgzip > sorted.gff.gz; - -tabix -p gff sorted.gff.gz; - -tabix sorted.gff.gz chr1:10,000,000-20,000,000; - -.SH NOTES -It is straightforward to achieve overlap queries using the standard -B-tree index (with or without binning) implemented in all SQL databases, -or the R-tree index in PostgreSQL and Oracle. But there are still many -reasons to use tabix. Firstly, tabix directly works with a lot of widely -used TAB-delimited formats such as GFF/GTF and BED. We do not need to -design database schema or specialized binary formats. Data do not need -to be duplicated in different formats, either. Secondly, tabix works on -compressed data files while most SQL databases do not. The GenCode -annotation GTF can be compressed down to 4%. Thirdly, tabix is -fast. The same indexing algorithm is known to work efficiently for an -alignment with a few billion short reads. SQL databases probably cannot -easily handle data at this scale. Last but not the least, tabix supports -remote data retrieval. One can put the data file and the index at an FTP -or HTTP server, and other users or even web services will be able to get -a slice without downloading the entire file. - -.SH AUTHOR -.PP -Tabix was written by Heng Li. The BGZF library was originally -implemented by Bob Handsaker and modified by Heng Li for remote file -access and in-memory caching. - -.SH SEE ALSO -.IR bgzip (1), -.IR samtools (1) diff --git a/src/htslib-1.19.1/tabix.c b/src/htslib-1.19.1/tabix.c deleted file mode 100644 index e20c0fe..0000000 --- a/src/htslib-1.19.1/tabix.c +++ /dev/null @@ -1,720 +0,0 @@ -/* tabix.c -- Generic indexer for TAB-delimited genome position files. - - Copyright (C) 2009-2011 Broad Institute. - Copyright (C) 2010-2012, 2014-2020 Genome Research Ltd. - - Author: Heng Li - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "htslib/tbx.h" -#include "htslib/sam.h" -#include "htslib/vcf.h" -#include "htslib/kseq.h" -#include "htslib/bgzf.h" -#include "htslib/hts.h" -#include "htslib/regidx.h" -#include "htslib/hts_defs.h" -#include "htslib/hts_log.h" - -typedef struct -{ - char *regions_fname, *targets_fname; - int print_header, header_only, cache_megs, download_index, separate_regs; -} -args_t; - -static void HTS_FORMAT(HTS_PRINTF_FMT, 1, 2) HTS_NORETURN -error(const char *format, ...) -{ - va_list ap; - fflush(stdout); - va_start(ap, format); - vfprintf(stderr, format, ap); - va_end(ap); - fflush(stderr); - exit(EXIT_FAILURE); -} - -static void HTS_FORMAT(HTS_PRINTF_FMT, 1, 2) HTS_NORETURN -error_errno(const char *format, ...) -{ - va_list ap; - int eno = errno; - fflush(stdout); - if (format) { - va_start(ap, format); - vfprintf(stderr, format, ap); - va_end(ap); - } - if (eno) { - fprintf(stderr, "%s%s\n", format ? ": " : "", strerror(eno)); - } else { - fprintf(stderr, "\n"); - } - fflush(stderr); - exit(EXIT_FAILURE); -} - - -#define IS_GFF (1<<0) -#define IS_BED (1<<1) -#define IS_SAM (1<<2) -#define IS_VCF (1<<3) -#define IS_BCF (1<<4) -#define IS_BAM (1<<5) -#define IS_CRAM (1<<6) -#define IS_TXT (IS_GFF|IS_BED|IS_SAM|IS_VCF) - -int file_type(const char *fname) -{ - int l = strlen(fname); - if (l>=7 && strcasecmp(fname+l-7, ".gff.gz") == 0) return IS_GFF; - else if (l>=7 && strcasecmp(fname+l-7, ".bed.gz") == 0) return IS_BED; - else if (l>=7 && strcasecmp(fname+l-7, ".sam.gz") == 0) return IS_SAM; - else if (l>=7 && strcasecmp(fname+l-7, ".vcf.gz") == 0) return IS_VCF; - else if (l>=4 && strcasecmp(fname+l-4, ".bcf") == 0) return IS_BCF; - else if (l>=4 && strcasecmp(fname+l-4, ".bam") == 0) return IS_BAM; - else if (l>=4 && strcasecmp(fname+l-5, ".cram") == 0) return IS_CRAM; - - htsFile *fp = hts_open(fname,"r"); - if (!fp) { - if (errno == ENOEXEC) { - // hts_open() uses this to report that it didn't understand the - // file format. - error("Couldn't understand format of \"%s\"\n", fname); - } else { - error_errno("Couldn't open \"%s\"", fname); - } - } - enum htsExactFormat format = hts_get_format(fp)->format; - hts_close(fp); - if ( format == bcf ) return IS_BCF; - if ( format == bam ) return IS_BAM; - if ( format == cram ) return IS_CRAM; - if ( format == vcf ) return IS_VCF; - - return 0; -} - -static char **parse_regions(char *regions_fname, char **argv, int argc, int *nregs) -{ - kstring_t str = {0,0,0}; - int iseq = 0, ireg = 0; - char **regs = NULL; - *nregs = argc; - - if ( regions_fname ) - { - // improve me: this is a too heavy machinery for parsing regions... - - regidx_t *idx = regidx_init(regions_fname, NULL, NULL, 0, NULL); - if ( !idx ) { - error_errno("Could not build region list for \"%s\"", regions_fname); - } - regitr_t *itr = regitr_init(idx); - if ( !itr ) { - error_errno("Could not initialize an iterator over \"%s\"", - regions_fname); - } - - (*nregs) += regidx_nregs(idx); - regs = (char**) malloc(sizeof(char*)*(*nregs)); - if (!regs) error_errno(NULL); - - int nseq; - char **seqs = regidx_seq_names(idx, &nseq); - for (iseq=0; iseqbeg+1, itr->end+1) < 0) { - error_errno(NULL); - } - regs[ireg] = strdup(str.s); - if (!regs[ireg]) error_errno(NULL); - ireg++; - } - } - regidx_destroy(idx); - regitr_destroy(itr); - } - free(str.s); - - if ( !ireg ) - { - if ( argc ) - { - regs = (char**) malloc(sizeof(char*)*argc); - if (!regs) error_errno(NULL); - } - else - { - regs = (char**) malloc(sizeof(char*)); - if (!regs) error_errno(NULL); - regs[0] = strdup("."); - if (!regs[0]) error_errno(NULL); - *nregs = 1; - } - } - - for (iseq=0; iseqformat; - - if (args->cache_megs) - hts_set_cache_size(fp, args->cache_megs * 1048576); - - regidx_t *reg_idx = NULL; - if ( args->targets_fname ) - { - reg_idx = regidx_init(args->targets_fname, NULL, NULL, 0, NULL); - if (!reg_idx) - error_errno("Could not build region list for \"%s\"", - args->targets_fname); - } - - if ( format == bcf ) - { - htsFile *out = hts_open("-","w"); - if ( !out ) error_errno("Could not open stdout"); - hts_idx_t *idx = bcf_index_load3(fname, NULL, args->download_index ? HTS_IDX_SAVE_REMOTE : 0); - if ( !idx ) error_errno("Could not load .csi index of \"%s\"", fname); - - bcf_hdr_t *hdr = bcf_hdr_read(fp); - if ( !hdr ) error_errno("Could not read the header from \"%s\"", fname); - - if ( args->print_header ) { - if ( bcf_hdr_write(out,hdr)!=0 ) - error_errno("Failed to write to stdout"); - } - if ( !args->header_only ) - { - assert(regs != NULL); - bcf1_t *rec = bcf_init(); - if (!rec) error_errno(NULL); - for (i=0; i=0 ) - { - if ( reg_idx ) - { - const char *chr = bcf_seqname(hdr,rec); - if (!chr) { - error("Bad BCF record in \"%s\" : " - "Invalid CONTIG id %d\n", - fname, rec->rid); - } - if ( !regidx_overlap(reg_idx,chr,rec->pos,rec->pos+rec->rlen-1, NULL) ) continue; - } - if (!found) { - if (args->separate_regs) printf("%c%s\n", conf->meta_char, regs[i]); - found = 1; - } - if ( bcf_write(out,hdr,rec)!=0 ) { - error_errno("Failed to write to stdout"); - } - } - - if (ret < -1) { - error_errno("Reading \"%s\" failed", fname); - } - bcf_itr_destroy(itr); - } - bcf_destroy(rec); - } - if ( hts_close(out) ) - error_errno("hts_close returned non-zero status for stdout"); - - bcf_hdr_destroy(hdr); - hts_idx_destroy(idx); - } - else if ( format==vcf || format==sam || format==bed || format==text_format || format==unknown_format ) - { - tbx_t *tbx = tbx_index_load3(fname, NULL, args->download_index ? HTS_IDX_SAVE_REMOTE : 0); - if ( !tbx ) error_errno("Could not load .tbi/.csi index of %s", fname); - kstring_t str = {0,0,0}; - if ( args->print_header ) - { - int ret; - while ((ret = hts_getline(fp, KS_SEP_LINE, &str)) >= 0) - { - if ( !str.l || str.s[0]!=tbx->conf.meta_char ) break; - if (puts(str.s) < 0) - error_errno("Error writing to stdout"); - } - if (ret < -1) error_errno("Reading \"%s\" failed", fname); - } - if ( !args->header_only ) - { - int nseq; - const char **seq = NULL; - if ( reg_idx ) { - seq = tbx_seqnames(tbx, &nseq); - if (!seq) error_errno("Failed to get sequence names list"); - } - for (i=0; i= 0) - { - if ( reg_idx && !regidx_overlap(reg_idx,seq[itr->curr_tid],itr->curr_beg,itr->curr_end-1, NULL) ) continue; - if (!found) { - if (args->separate_regs) printf("%c%s\n", conf->meta_char, regs[i]); - found = 1; - } - if (puts(str.s) < 0) - error_errno("Failed to write to stdout"); - } - if (ret < -1) error_errno("Reading \"%s\" failed", fname); - tbx_itr_destroy(itr); - } - free(seq); - } - free(str.s); - tbx_destroy(tbx); - } - else if ( format==bam ) - error("Please use \"samtools view\" for querying BAM files.\n"); - - if ( reg_idx ) regidx_destroy(reg_idx); - if ( hts_close(fp) ) - error_errno("hts_close returned non-zero status: %s", fname); - - for (i=0; iblock_length ) return -1; - - char *buffer = fp->uncompressed_block; - int skip_until = 0; - - // Skip the header: find out the position of the data block - if ( buffer[0]==conf->meta_char ) - { - skip_until = 1; - while (1) - { - if ( buffer[skip_until]=='\n' ) - { - skip_until++; - if ( skip_until>=fp->block_length ) - { - if ( bgzf_read_block(fp) != 0 || !fp->block_length ) error("FIXME: No body in the file: %s\n", fname); - skip_until = 0; - } - // The header has finished - if ( buffer[skip_until]!=conf->meta_char ) break; - } - skip_until++; - if ( skip_until>=fp->block_length ) - { - if (bgzf_read_block(fp) != 0 || !fp->block_length) error("FIXME: No body in the file: %s\n", fname); - skip_until = 0; - } - } - } - - // Output the new header - FILE *hdr = fopen(header,"r"); - if ( !hdr ) error("%s: %s", header,strerror(errno)); - const size_t page_size = 32768; - char *buf = malloc(page_size); - BGZF *bgzf_out = bgzf_open("-", "w"); - ssize_t nread; - - if (!buf) error("%s\n", strerror(errno)); - if (!bgzf_out) - error_errno("Couldn't open output stream"); - while ( (nread=fread(buf,1,page_size-1,hdr))>0 ) - { - if ( nreaderrcode); - } - if ( ferror(hdr) ) error_errno("Failed to read \"%s\"", header); - if ( fclose(hdr) ) error_errno("Closing \"%s\" failed", header); - - // Output all remaining data read with the header block - if ( fp->block_length - skip_until > 0 ) - { - if (bgzf_write(bgzf_out, buffer+skip_until, fp->block_length-skip_until) < 0) error_errno("Write error %d",fp->errcode); - } - if (bgzf_flush(bgzf_out) < 0) - error_errno("Write error %d", bgzf_out->errcode); - - while (1) - { - nread = bgzf_raw_read(fp, buf, page_size); - if ( nread<=0 ) break; - - int count = bgzf_raw_write(bgzf_out, buf, nread); - if (count != nread) error_errno("Write failed, wrote %d instead of %d bytes", count,(int)nread); - } - if (nread < 0) error_errno("Error reading \"%s\"", fname); - if (bgzf_close(bgzf_out) < 0) - error_errno("Error %d closing output", bgzf_out->errcode); - if (bgzf_close(fp) < 0) - error_errno("Error %d closing \"%s\"", bgzf_out->errcode, fname); - free(buf); - } - else - error("todo: reheader BCF, BAM\n"); // BCF is difficult, records contain pointers to the header. - return 0; -} - -static int usage(FILE *fp, int status) -{ - fprintf(fp, "\n"); - fprintf(fp, "Version: %s\n", hts_version()); - fprintf(fp, "Usage: tabix [OPTIONS] [FILE] [REGION [...]]\n"); - fprintf(fp, "\n"); - fprintf(fp, "Indexing Options:\n"); - fprintf(fp, " -0, --zero-based coordinates are zero-based\n"); - fprintf(fp, " -b, --begin INT column number for region start [4]\n"); - fprintf(fp, " -c, --comment CHAR skip comment lines starting with CHAR [null]\n"); - fprintf(fp, " -C, --csi generate CSI index for VCF (default is TBI)\n"); - fprintf(fp, " -e, --end INT column number for region end (if no end, set INT to -b) [5]\n"); - fprintf(fp, " -f, --force overwrite existing index without asking\n"); - fprintf(fp, " -m, --min-shift INT set minimal interval size for CSI indices to 2^INT [14]\n"); - fprintf(fp, " -p, --preset STR gff, bed, sam, vcf\n"); - fprintf(fp, " -s, --sequence INT column number for sequence names (suppressed by -p) [1]\n"); - fprintf(fp, " -S, --skip-lines INT skip first INT lines [0]\n"); - fprintf(fp, "\n"); - fprintf(fp, "Querying and other options:\n"); - fprintf(fp, " -h, --print-header print also the header lines\n"); - fprintf(fp, " -H, --only-header print only the header lines\n"); - fprintf(fp, " -l, --list-chroms list chromosome names\n"); - fprintf(fp, " -r, --reheader FILE replace the header with the content of FILE\n"); - fprintf(fp, " -R, --regions FILE restrict to regions listed in the file\n"); - fprintf(fp, " -T, --targets FILE similar to -R but streams rather than index-jumps\n"); - fprintf(fp, " -D do not download the index file\n"); - fprintf(fp, " --cache INT set cache size to INT megabytes (0 disables) [10]\n"); - fprintf(fp, " --separate-regions separate the output by corresponding regions\n"); - fprintf(fp, " --verbosity INT set verbosity [3]\n"); - fprintf(fp, "\n"); - return status; -} - -int main(int argc, char *argv[]) -{ - int c, detect = 1, min_shift = 0, is_force = 0, list_chroms = 0, do_csi = 0; - tbx_conf_t conf = tbx_conf_gff; - char *reheader = NULL; - args_t args; - memset(&args,0,sizeof(args_t)); - args.cache_megs = 10; - args.download_index = 1; - int32_t new_line_skip = -1; - - static const struct option loptions[] = - { - {"help", no_argument, NULL, 2}, - {"regions", required_argument, NULL, 'R'}, - {"targets", required_argument, NULL, 'T'}, - {"csi", no_argument, NULL, 'C'}, - {"zero-based", no_argument, NULL, '0'}, - {"print-header", no_argument, NULL, 'h'}, - {"only-header", no_argument, NULL, 'H'}, - {"begin", required_argument, NULL, 'b'}, - {"comment", required_argument, NULL, 'c'}, - {"end", required_argument, NULL, 'e'}, - {"force", no_argument, NULL, 'f'}, - {"min-shift", required_argument, NULL, 'm'}, - {"preset", required_argument, NULL, 'p'}, - {"sequence", required_argument, NULL, 's'}, - {"skip-lines", required_argument, NULL, 'S'}, - {"list-chroms", no_argument, NULL, 'l'}, - {"reheader", required_argument, NULL, 'r'}, - {"version", no_argument, NULL, 1}, - {"verbosity", required_argument, NULL, 3}, - {"cache", required_argument, NULL, 4}, - {"separate-regions", no_argument, NULL, 5}, - {NULL, 0, NULL, 0} - }; - - char *tmp; - while ((c = getopt_long(argc, argv, "hH?0b:c:e:fm:p:s:S:lr:CR:T:D", loptions,NULL)) >= 0) - { - switch (c) - { - case 'R': args.regions_fname = optarg; break; - case 'T': args.targets_fname = optarg; break; - case 'C': do_csi = 1; break; - case 'r': reheader = optarg; break; - case 'h': args.print_header = 1; break; - case 'H': args.print_header = 1; args.header_only = 1; break; - case 'l': list_chroms = 1; break; - case '0': conf.preset |= TBX_UCSC; detect = 0; break; - case 'b': - conf.bc = strtol(optarg,&tmp,10); - if ( *tmp ) error("Could not parse argument: -b %s\n", optarg); - detect = 0; - break; - case 'e': - conf.ec = strtol(optarg,&tmp,10); - if ( *tmp ) error("Could not parse argument: -e %s\n", optarg); - detect = 0; - break; - case 'c': conf.meta_char = *optarg; detect = 0; break; - case 'f': is_force = 1; break; - case 'm': - min_shift = strtol(optarg,&tmp,10); - if ( *tmp ) error("Could not parse argument: -m %s\n", optarg); - break; - case 'p': - detect = 0; - if (strcmp(optarg, "gff") == 0) conf = tbx_conf_gff; - else if (strcmp(optarg, "bed") == 0) conf = tbx_conf_bed; - else if (strcmp(optarg, "sam") == 0) conf = tbx_conf_sam; - else if (strcmp(optarg, "vcf") == 0) conf = tbx_conf_vcf; - else if (strcmp(optarg, "bcf") == 0) detect = 1; // bcf is autodetected, preset is not needed - else if (strcmp(optarg, "bam") == 0) detect = 1; // same as bcf - else error("The preset string not recognised: '%s'\n", optarg); - break; - case 's': - conf.sc = strtol(optarg,&tmp,10); - if ( *tmp ) error("Could not parse argument: -s %s\n", optarg); - detect = 0; - break; - case 'S': - new_line_skip = strtol(optarg,&tmp,10); - if ( *tmp ) error("Could not parse argument: -S %s\n", optarg); - detect = 0; - break; - case 'D': - args.download_index = 0; - break; - case 1: - printf( -"tabix (htslib) %s\n" -"Copyright (C) 2024 Genome Research Ltd.\n", hts_version()); - return EXIT_SUCCESS; - case 2: - return usage(stdout, EXIT_SUCCESS); - case 3: { - int v = atoi(optarg); - if (v < 0) v = 0; - hts_set_log_level(v); - break; - } - case 4: - args.cache_megs = atoi(optarg); - if (args.cache_megs < 0) { - args.cache_megs = 0; - } else if (args.cache_megs >= INT_MAX / 1048576) { - args.cache_megs = INT_MAX / 1048576; - } - break; - case 5: - args.separate_regs = 1; - break; - default: return usage(stderr, EXIT_FAILURE); - } - } - - if (new_line_skip >= 0) - conf.line_skip = new_line_skip; - - if ( optind==argc ) return usage(stderr, EXIT_FAILURE); - - if ( list_chroms ) - return query_chroms(argv[optind], args.download_index); - - char *fname = argv[optind]; - int ftype = file_type(fname); - if ( detect ) // no preset given - { - if ( ftype==IS_GFF ) conf = tbx_conf_gff; - else if ( ftype==IS_BED ) conf = tbx_conf_bed; - else if ( ftype==IS_SAM ) conf = tbx_conf_sam; - else if ( ftype==IS_VCF ) - { - conf = tbx_conf_vcf; - if ( !min_shift && do_csi ) min_shift = 14; - } - else if ( ftype==IS_BCF ) - { - if ( !min_shift ) min_shift = 14; - } - else if ( ftype==IS_BAM ) - { - if ( !min_shift ) min_shift = 14; - } - } - if ( argc > optind+1 || args.header_only || args.regions_fname || args.targets_fname ) - { - int nregs = 0; - char **regs = NULL; - if ( !args.header_only ) - regs = parse_regions(args.regions_fname, argv+optind+1, argc-optind-1, &nregs); - return query_regions(&args, &conf, fname, regs, nregs); - } - if ( do_csi ) - { - if ( !min_shift ) min_shift = 14; - min_shift *= do_csi; // positive for CSIv2, negative for CSIv1 - } - if ( min_shift!=0 && !do_csi ) do_csi = 1; - - if ( reheader ) - return reheader_file(fname, reheader, ftype, &conf); - - char *suffix = ".tbi"; - if ( do_csi ) suffix = ".csi"; - else if ( ftype==IS_BAM ) suffix = ".bai"; - else if ( ftype==IS_CRAM ) suffix = ".crai"; - - char *idx_fname = calloc(strlen(fname) + 6, 1); - if (!idx_fname) error("%s\n", strerror(errno)); - strcat(strcpy(idx_fname, fname), suffix); - - struct stat stat_tbi, stat_file; - if ( !is_force && stat(idx_fname, &stat_tbi)==0 ) - { - // Before complaining about existing index, check if the VCF file isn't - // newer. This is a common source of errors, people tend not to notice - // that tabix failed - stat(fname, &stat_file); - if ( stat_file.st_mtime <= stat_tbi.st_mtime ) - error("[tabix] the index file exists. Please use '-f' to overwrite.\n"); - } - free(idx_fname); - - int ret; - if ( ftype==IS_CRAM ) - { - if ( bam_index_build(fname, min_shift)!=0 ) error("bam_index_build failed: %s\n", fname); - return 0; - } - else if ( do_csi ) - { - if ( ftype==IS_BCF ) - { - if ( bcf_index_build(fname, min_shift)!=0 ) error("bcf_index_build failed: %s\n", fname); - return 0; - } - if ( ftype==IS_BAM ) - { - if ( bam_index_build(fname, min_shift)!=0 ) error("bam_index_build failed: %s\n", fname); - return 0; - } - - switch (ret = tbx_index_build(fname, min_shift, &conf)) - { - case 0: - return 0; - case -2: - error("[tabix] the compression of '%s' is not BGZF\n", fname); - default: - error("tbx_index_build failed: %s\n", fname); - } - } - else // TBI index - { - switch (ret = tbx_index_build(fname, min_shift, &conf)) - { - case 0: - return 0; - case -2: - error("[tabix] the compression of '%s' is not BGZF\n", fname); - default: - error("tbx_index_build failed: %s\n", fname); - } - } - - return 0; -} diff --git a/src/htslib-1.19.1/tbx.c b/src/htslib-1.19.1/tbx.c deleted file mode 100644 index c2c5c6f..0000000 --- a/src/htslib-1.19.1/tbx.c +++ /dev/null @@ -1,496 +0,0 @@ -/* tbx.c -- tabix API functions. - - Copyright (C) 2009, 2010, 2012-2015, 2017-2020, 2022-2023 Genome Research Ltd. - Copyright (C) 2010-2012 Broad Institute. - - Author: Heng Li - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include "htslib/tbx.h" -#include "htslib/bgzf.h" -#include "htslib/hts_endian.h" -#include "hts_internal.h" - -#include "htslib/khash.h" -KHASH_DECLARE(s2i, kh_cstr_t, int64_t) - -HTSLIB_EXPORT -const tbx_conf_t tbx_conf_gff = { 0, 1, 4, 5, '#', 0 }; - -HTSLIB_EXPORT -const tbx_conf_t tbx_conf_bed = { TBX_UCSC, 1, 2, 3, '#', 0 }; - -HTSLIB_EXPORT -const tbx_conf_t tbx_conf_psltbl = { TBX_UCSC, 15, 17, 18, '#', 0 }; - -HTSLIB_EXPORT -const tbx_conf_t tbx_conf_sam = { TBX_SAM, 3, 4, 0, '@', 0 }; - -HTSLIB_EXPORT -const tbx_conf_t tbx_conf_vcf = { TBX_VCF, 1, 2, 0, '#', 0 }; - -typedef struct { - int64_t beg, end; - char *ss, *se; - int tid; -} tbx_intv_t; - -static inline int get_tid(tbx_t *tbx, const char *ss, int is_add) -{ - khint_t k; - khash_t(s2i) *d; - if (tbx->dict == 0) tbx->dict = kh_init(s2i); - if (!tbx->dict) return -1; // Out of memory - d = (khash_t(s2i)*)tbx->dict; - if (is_add) { - int absent; - k = kh_put(s2i, d, ss, &absent); - if (absent < 0) { - return -1; // Out of memory - } else if (absent) { - char *ss_dup = strdup(ss); - if (ss_dup) { - kh_key(d, k) = ss_dup; - kh_val(d, k) = kh_size(d) - 1; - } else { - kh_del(s2i, d, k); - return -1; // Out of memory - } - } - } else k = kh_get(s2i, d, ss); - return k == kh_end(d)? -1 : kh_val(d, k); -} - -int tbx_name2id(tbx_t *tbx, const char *ss) -{ - return get_tid(tbx, ss, 0); -} - -int tbx_parse1(const tbx_conf_t *conf, size_t len, char *line, tbx_intv_t *intv) -{ - size_t i, b = 0; - int id = 1; - char *s; - intv->ss = intv->se = 0; intv->beg = intv->end = -1; - for (i = 0; i <= len; ++i) { - if (line[i] == '\t' || line[i] == 0) { - if (id == conf->sc) { - intv->ss = line + b; intv->se = line + i; - } else if (id == conf->bc) { - // here ->beg is 0-based. - intv->beg = strtoll(line + b, &s, 0); - - if (conf->bc <= conf->ec) // don't overwrite an already set end point - intv->end = intv->beg; - - if ( s==line+b ) return -1; // expected int - - if (!(conf->preset&TBX_UCSC)) - --intv->beg; - else if (conf->bc <= conf->ec) - ++intv->end; - - if (intv->beg < 0) { - hts_log_warning("Coordinate <= 0 detected. " - "Did you forget to use the -0 option?"); - intv->beg = 0; - } - if (intv->end < 1) intv->end = 1; - } else { - if ((conf->preset&0xffff) == TBX_GENERIC) { - if (id == conf->ec) - { - intv->end = strtoll(line + b, &s, 0); - if ( s==line+b ) return -1; // expected int - } - } else if ((conf->preset&0xffff) == TBX_SAM) { - if (id == 6) { // CIGAR - int l = 0; - char *t; - for (s = line + b; s < line + i;) { - long x = strtol(s, &t, 10); - char op = toupper_c(*t); - if (op == 'M' || op == 'D' || op == 'N') l += x; - s = t + 1; - } - if (l == 0) l = 1; - intv->end = intv->beg + l; - } - } else if ((conf->preset&0xffff) == TBX_VCF) { - if (id == 4) { - if (b < i) intv->end = intv->beg + (i - b); - } else if (id == 8) { // look for "END=" - int c = line[i]; - line[i] = 0; - s = strstr(line + b, "END="); - if (s == line + b) s += 4; - else if (s) { - s = strstr(line + b, ";END="); - if (s) s += 5; - } - if (s && *s != '.') { - long long end = strtoll(s, &s, 0); - if (end <= intv->beg) { - static int reported = 0; - if (!reported) { - int l = intv->ss ? (int) (intv->se - intv->ss) : 0; - hts_log_warning("VCF INFO/END=%lld is smaller than POS at %.*s:%"PRIhts_pos"\n" - "This tag will be ignored. " - "Note: only one invalid END tag will be reported.", - end, l >= 0 ? l : 0, - intv->ss ? intv->ss : "", - intv->beg); - reported = 1; - } - } else { - intv->end = end; - } - } - line[i] = c; - } - } - } - b = i + 1; - ++id; - } - } - if (intv->ss == 0 || intv->se == 0 || intv->beg < 0 || intv->end < 0) return -1; - return 0; -} - -static inline int get_intv(tbx_t *tbx, kstring_t *str, tbx_intv_t *intv, int is_add) -{ - if (tbx_parse1(&tbx->conf, str->l, str->s, intv) == 0) { - int c = *intv->se; - *intv->se = '\0'; intv->tid = get_tid(tbx, intv->ss, is_add); *intv->se = c; - if (intv->tid < 0) return -2; // get_tid out of memory - return (intv->beg >= 0 && intv->end >= 0)? 0 : -1; - } else { - char *type = NULL; - switch (tbx->conf.preset&0xffff) - { - case TBX_SAM: type = "TBX_SAM"; break; - case TBX_VCF: type = "TBX_VCF"; break; - case TBX_UCSC: type = "TBX_UCSC"; break; - default: type = "TBX_GENERIC"; break; - } - hts_log_error("Failed to parse %s, was wrong -p [type] used?\nThe offending line was: \"%s\"", - type, str->s); - return -1; - } -} - -/* - * Called by tabix iterator to read the next record. - * Returns >= 0 on success - * -1 on EOF - * <= -2 on error - */ -int tbx_readrec(BGZF *fp, void *tbxv, void *sv, int *tid, hts_pos_t *beg, hts_pos_t *end) -{ - tbx_t *tbx = (tbx_t *) tbxv; - kstring_t *s = (kstring_t *) sv; - int ret; - if ((ret = bgzf_getline(fp, '\n', s)) >= 0) { - tbx_intv_t intv; - if (get_intv(tbx, s, &intv, 0) < 0) - return -2; - *tid = intv.tid; *beg = intv.beg; *end = intv.end; - } - return ret; -} - -static int tbx_set_meta(tbx_t *tbx) -{ - int i, l = 0, l_nm; - uint32_t x[7]; - char **name; - uint8_t *meta; - khint_t k; - khash_t(s2i) *d = (khash_t(s2i)*)tbx->dict; - - memcpy(x, &tbx->conf, 24); - name = (char**)malloc(sizeof(char*) * kh_size(d)); - if (!name) return -1; - for (k = kh_begin(d), l = 0; k != kh_end(d); ++k) { - if (!kh_exist(d, k)) continue; - name[kh_val(d, k)] = (char*)kh_key(d, k); - l += strlen(kh_key(d, k)) + 1; // +1 to include '\0' - } - l_nm = x[6] = l; - meta = (uint8_t*)malloc(l_nm + 28); - if (!meta) { free(name); return -1; } - if (ed_is_big()) - for (i = 0; i < 7; ++i) - x[i] = ed_swap_4(x[i]); - memcpy(meta, x, 28); - for (l = 28, i = 0; i < (int)kh_size(d); ++i) { - int x = strlen(name[i]) + 1; - memcpy(meta + l, name[i], x); - l += x; - } - free(name); - hts_idx_set_meta(tbx->idx, l, meta, 0); - return 0; -} - -// Minimal effort parser to extract reference length out of VCF header line -// This is used only used to adjust the number of levels if necessary, -// so not a major problem if it doesn't always work. -static void adjust_max_ref_len_vcf(const char *str, int64_t *max_ref_len) -{ - const char *ptr; - int64_t len; - if (strncmp(str, "##contig", 8) != 0) return; - ptr = strstr(str + 8, "length"); - if (!ptr) return; - for (ptr += 6; *ptr == ' ' || *ptr == '='; ptr++) {} - len = strtoll(ptr, NULL, 10); - if (*max_ref_len < len) *max_ref_len = len; -} - -// Same for sam files -static void adjust_max_ref_len_sam(const char *str, int64_t *max_ref_len) -{ - const char *ptr; - int64_t len; - if (strncmp(str, "@SQ", 3) != 0) return; - ptr = strstr(str + 3, "\tLN:"); - if (!ptr) return; - ptr += 4; - len = strtoll(ptr, NULL, 10); - if (*max_ref_len < len) *max_ref_len = len; -} - -// Adjusts number of levels if not big enough. This can happen for -// files with very large contigs. -static int adjust_n_lvls(int min_shift, int n_lvls, int64_t max_len) -{ - int64_t s = 1LL << (min_shift + n_lvls * 3); - max_len += 256; - for (; max_len > s; ++n_lvls, s <<= 3) {} - return n_lvls; -} - -tbx_t *tbx_index(BGZF *fp, int min_shift, const tbx_conf_t *conf) -{ - tbx_t *tbx; - kstring_t str; - int ret, first = 0, n_lvls, fmt; - int64_t lineno = 0; - uint64_t last_off = 0; - tbx_intv_t intv; - int64_t max_ref_len = 0; - - str.s = 0; str.l = str.m = 0; - tbx = (tbx_t*)calloc(1, sizeof(tbx_t)); - if (!tbx) return NULL; - tbx->conf = *conf; - if (min_shift > 0) n_lvls = (TBX_MAX_SHIFT - min_shift + 2) / 3, fmt = HTS_FMT_CSI; - else min_shift = 14, n_lvls = 5, fmt = HTS_FMT_TBI; - while ((ret = bgzf_getline(fp, '\n', &str)) >= 0) { - ++lineno; - if (str.s[0] == tbx->conf.meta_char && fmt == HTS_FMT_CSI) { - switch (tbx->conf.preset) { - case TBX_SAM: - adjust_max_ref_len_sam(str.s, &max_ref_len); break; - case TBX_VCF: - adjust_max_ref_len_vcf(str.s, &max_ref_len); break; - default: - break; - } - } - if (lineno <= tbx->conf.line_skip || str.s[0] == tbx->conf.meta_char) { - last_off = bgzf_tell(fp); - continue; - } - if (first == 0) { - if (fmt == HTS_FMT_CSI) { - if (!max_ref_len) - max_ref_len = (int64_t)100*1024*1024*1024; // 100G default - n_lvls = adjust_n_lvls(min_shift, n_lvls, max_ref_len); - } - tbx->idx = hts_idx_init(0, fmt, last_off, min_shift, n_lvls); - if (!tbx->idx) goto fail; - first = 1; - } - ret = get_intv(tbx, &str, &intv, 1); - if (ret < -1) goto fail; // Out of memory - if (ret < 0) continue; // Skip unparsable lines - if (hts_idx_push(tbx->idx, intv.tid, intv.beg, intv.end, - bgzf_tell(fp), 1) < 0) { - goto fail; - } - } - if (ret < -1) goto fail; - if ( !tbx->idx ) tbx->idx = hts_idx_init(0, fmt, last_off, min_shift, n_lvls); // empty file - if (!tbx->idx) goto fail; - if ( !tbx->dict ) tbx->dict = kh_init(s2i); - if (!tbx->dict) goto fail; - if (hts_idx_finish(tbx->idx, bgzf_tell(fp)) != 0) goto fail; - if (tbx_set_meta(tbx) != 0) goto fail; - free(str.s); - return tbx; - - fail: - free(str.s); - tbx_destroy(tbx); - return NULL; -} - -void tbx_destroy(tbx_t *tbx) -{ - khash_t(s2i) *d = (khash_t(s2i)*)tbx->dict; - if (d != NULL) - { - khint_t k; - for (k = kh_begin(d); k != kh_end(d); ++k) - if (kh_exist(d, k)) free((char*)kh_key(d, k)); - } - hts_idx_destroy(tbx->idx); - kh_destroy(s2i, d); - free(tbx); -} - -int tbx_index_build3(const char *fn, const char *fnidx, int min_shift, int n_threads, const tbx_conf_t *conf) -{ - tbx_t *tbx; - BGZF *fp; - int ret; - if ((fp = bgzf_open(fn, "r")) == 0) return -1; - if ( n_threads ) bgzf_mt(fp, n_threads, 256); - if ( bgzf_compression(fp) != bgzf ) { bgzf_close(fp); return -2; } - tbx = tbx_index(fp, min_shift, conf); - bgzf_close(fp); - if ( !tbx ) return -1; - ret = hts_idx_save_as(tbx->idx, fn, fnidx, min_shift > 0? HTS_FMT_CSI : HTS_FMT_TBI); - tbx_destroy(tbx); - return ret; -} - -int tbx_index_build2(const char *fn, const char *fnidx, int min_shift, const tbx_conf_t *conf) -{ - return tbx_index_build3(fn, fnidx, min_shift, 0, conf); -} - -int tbx_index_build(const char *fn, int min_shift, const tbx_conf_t *conf) -{ - return tbx_index_build3(fn, NULL, min_shift, 0, conf); -} - -static tbx_t *index_load(const char *fn, const char *fnidx, int flags) -{ - tbx_t *tbx; - uint8_t *meta; - char *nm, *p; - uint32_t l_meta, l_nm; - tbx = (tbx_t*)calloc(1, sizeof(tbx_t)); - if (!tbx) - return NULL; - tbx->idx = hts_idx_load3(fn, fnidx, HTS_FMT_TBI, flags); - if ( !tbx->idx ) - { - free(tbx); - return NULL; - } - meta = hts_idx_get_meta(tbx->idx, &l_meta); - if ( !meta || l_meta < 28) goto invalid; - - tbx->conf.preset = le_to_i32(&meta[0]); - tbx->conf.sc = le_to_i32(&meta[4]); - tbx->conf.bc = le_to_i32(&meta[8]); - tbx->conf.ec = le_to_i32(&meta[12]); - tbx->conf.meta_char = le_to_i32(&meta[16]); - tbx->conf.line_skip = le_to_i32(&meta[20]); - l_nm = le_to_u32(&meta[24]); - if (l_nm > l_meta - 28) goto invalid; - - p = nm = (char*)meta + 28; - // This assumes meta is NUL-terminated, so we can merrily strlen away. - // hts_idx_load_local() assures this for us by adding a NUL on the end - // of whatever it reads. - for (; p - nm < l_nm; p += strlen(p) + 1) { - if (get_tid(tbx, p, 1) < 0) { - hts_log_error("%s", strerror(errno)); - goto fail; - } - } - return tbx; - - invalid: - hts_log_error("Invalid index header for %s", fnidx ? fnidx : fn); - - fail: - tbx_destroy(tbx); - return NULL; -} - -tbx_t *tbx_index_load3(const char *fn, const char *fnidx, int flags) -{ - return index_load(fn, fnidx, flags); -} - -tbx_t *tbx_index_load2(const char *fn, const char *fnidx) -{ - return index_load(fn, fnidx, 1); -} - -tbx_t *tbx_index_load(const char *fn) -{ - return index_load(fn, NULL, 1); -} - -const char **tbx_seqnames(tbx_t *tbx, int *n) -{ - khash_t(s2i) *d = (khash_t(s2i)*)tbx->dict; - if (d == NULL) - { - *n = 0; - return calloc(1, sizeof(char *)); - } - int tid, m = kh_size(d); - const char **names = (const char**) calloc(m,sizeof(const char*)); - khint_t k; - if (!names) { - *n = 0; - return NULL; - } - for (k=kh_begin(d); k - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include - -#include "htslib/hfile.h" -#include "htslib/kstring.h" -#include "htslib/sam.h" // For stringify_argv() declaration - -#include "hts_internal.h" - -static int dehex(char c) -{ - if (c >= 'a' && c <= 'f') return c - 'a' + 10; - else if (c >= 'A' && c <= 'F') return c - 'A' + 10; - else if (c >= '0' && c <= '9') return c - '0'; - else return -1; // Hence dehex('\0') = -1 -} - -int hts_decode_percent(char *dest, size_t *destlen, const char *s) -{ - char *d = dest; - int hi, lo; - - while (*s) { - if (*s == '%' && (hi = dehex(s[1])) >= 0 && (lo = dehex(s[2])) >= 0) { - *d++ = (hi << 4) | lo; - s += 3; - } - else *d++ = *s++; - } - - *d = '\0'; - *destlen = d - dest; - return 0; -} - -static int debase64(char c) -{ - if (c >= 'a' && c <= 'z') return c - 'a' + 26; - else if (c >= 'A' && c <= 'Z') return c - 'A'; - else if (c >= '0' && c <= '9') return c - '0' + 52; - else if (c == '/') return 63; - else if (c == '+') return 62; - else return -1; // Hence debase64('\0') = -1 -} - -size_t hts_base64_decoded_length(size_t len) -{ - size_t nquartets = (len + 2) / 4; - return 3 * nquartets; -} - -int hts_decode_base64(char *dest, size_t *destlen, const char *s) -{ - char *d = dest; - int x0, x1, x2, x3; - - while (1) { - x0 = debase64(*s++); - x1 = (x0 >= 0)? debase64(*s++) : -1; - x2 = (x1 >= 0)? debase64(*s++) : -1; - x3 = (x2 >= 0)? debase64(*s++) : -1; - if (x3 < 0) break; - - *d++ = (x0 << 2) | (x1 >> 4); - *d++ = (x1 << 4) | (x2 >> 2); - *d++ = (x2 << 6) | x3; - } - - if (x1 >= 0) *d++ = (x0 << 2) | (x1 >> 4); - if (x2 >= 0) *d++ = (x1 << 4) | (x2 >> 2); - - *destlen = d - dest; - return 0; -} - -static char *encode_utf8(char *s, unsigned x) -{ - if (x >= 0x10000) { - *s++ = 0xF0 | (x >> 18); - *s++ = 0x80 | ((x >> 12) & 0x3F); - *s++ = 0x80 | ((x >> 6) & 0x3F); - *s++ = 0x80 | (x & 0x3F); - } - else if (x >= 0x800) { - *s++ = 0xE0 | (x >> 12); - *s++ = 0x80 | ((x >> 6) & 0x3F); - *s++ = 0x80 | (x & 0x3F); - } - else if (x >= 0x80) { - *s++ = 0xC0 | (x >> 6); - *s++ = 0x80 | (x & 0x3F); - } - else *s++ = x; - - return s; -} - -static char *sscan_string(char *s) -{ - char *d = s; - int d1, d2, d3, d4; - - for (;;) switch (*s) { - case '\\': - switch (s[1]) { - case '\0': *d = '\0'; return s+1; - case 'b': *d++ = '\b'; s += 2; break; - case 'f': *d++ = '\f'; s += 2; break; - case 'n': *d++ = '\n'; s += 2; break; - case 'r': *d++ = '\r'; s += 2; break; - case 't': *d++ = '\t'; s += 2; break; - default: *d++ = s[1]; s += 2; break; - case 'u': - if ((d1 = dehex(s[2])) >= 0 && (d2 = dehex(s[3])) >= 0 && - (d3 = dehex(s[4])) >= 0 && (d4 = dehex(s[5])) >= 0) { - d = encode_utf8(d, d1 << 12 | d2 << 8 | d3 << 4 | d4); - s += 6; - } - break; - } - break; - - case '"': - *d = '\0'; - return s+1; - - case '\0': - *d = '\0'; - return s; - - default: - *d++ = *s++; - break; - } -} - -static int fscan_string(hFILE *fp, kstring_t *d) -{ - int c, d1, d2, d3, d4; - uint32_t e = 0; - - while ((c = hgetc(fp)) != EOF) switch (c) { - case '\\': - if ((c = hgetc(fp)) == EOF) return e == 0 ? 0 : -1; - switch (c) { - case 'b': e |= kputc('\b', d) < 0; break; - case 'f': e |= kputc('\f', d) < 0; break; - case 'n': e |= kputc('\n', d) < 0; break; - case 'r': e |= kputc('\r', d) < 0; break; - case 't': e |= kputc('\t', d) < 0; break; - default: e |= kputc(c, d) < 0; break; - case 'u': - if ((c = hgetc(fp)) != EOF && (d1 = dehex(c)) >= 0 && - (c = hgetc(fp)) != EOF && (d2 = dehex(c)) >= 0 && - (c = hgetc(fp)) != EOF && (d3 = dehex(c)) >= 0 && - (c = hgetc(fp)) != EOF && (d4 = dehex(c)) >= 0) { - char buf[8]; - char *lim = encode_utf8(buf, d1 << 12 | d2 << 8 | d3 << 4 | d4); - e |= kputsn(buf, lim - buf, d) < 0; - } - break; - } - break; - - case '"': - return e == 0 ? 0 : -1; - - default: - e |= kputc(c, d) < 0; - break; - } - return e == 0 ? 0 : -1; -} - -static char token_type(hts_json_token *token) -{ - const char *s = token->str; - - switch (*s) { - case 'f': - return (strcmp(s, "false") == 0)? 'b' : '?'; - case 'n': - return (strcmp(s, "null") == 0)? '.' : '?'; - case 't': - return (strcmp(s, "true") == 0)? 'b' : '?'; - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return 'n'; - default: - return '?'; - } -} - -HTSLIB_EXPORT -hts_json_token * hts_json_alloc_token(void) { - return calloc(1, sizeof(hts_json_token)); -} - -HTSLIB_EXPORT -char hts_json_token_type(hts_json_token *token) { - return token->type; -} - -HTSLIB_EXPORT -void hts_json_free_token(hts_json_token *token) { - free(token); -} - -HTSLIB_EXPORT -char *hts_json_token_str(hts_json_token *token) { - return token->str; -} - -HTSLIB_EXPORT -char hts_json_snext(char *str, size_t *state, hts_json_token *token) -{ - char *s = &str[*state >> 2]; - int hidden = *state & 3; - - if (hidden) { - *state &= ~3; - return token->type = "?}]?"[hidden]; - } - -#define STATE(s,h) (((s) - str) << 2 | (h)) - - for (;;) switch (*s) { - case ' ': - case '\t': - case '\r': - case '\n': - case ',': - case ':': - s++; - continue; - - case '\0': - return token->type = '\0'; - - case '{': - case '[': - case '}': - case ']': - *state = STATE(s+1, 0); - return token->type = *s; - - case '"': - token->str = s+1; - *state = STATE(sscan_string(s+1), 0); - return token->type = 's'; - - default: - token->str = s; - s += strcspn(s, " \t\r\n,]}"); - hidden = (*s == '}')? 1 : (*s == ']')? 2 : 0; - if (*s != '\0') *s++ = '\0'; - *state = STATE(s, hidden); - return token->type = token_type(token); - } - -#undef STATE -} - -HTSLIB_EXPORT -char hts_json_fnext(struct hFILE *fp, hts_json_token *token, kstring_t *kstr) -{ - char peek; - int c; - - for (;;) switch (c = hgetc(fp)) { - case ' ': - case '\t': - case '\r': - case '\n': - case ',': - case ':': - continue; - - case EOF: - return token->type = '\0'; - - case '{': - case '[': - case '}': - case ']': - return token->type = c; - - case '"': - kstr->l = 0; - fscan_string(fp, kstr); - if (kstr->l == 0) kputsn("", 0, kstr); - token->str = kstr->s; - return token->type = 's'; - - default: - kstr->l = 0; - kputc(c, kstr); - while (hpeek(fp, &peek, 1) == 1 && !strchr(" \t\r\n,]}", peek)) { - if ((c = hgetc(fp)) == EOF) break; - kputc(c, kstr); - } - token->str = kstr->s; - return token->type = token_type(token); - } -} - - -typedef char hts_json_nextfn(void *arg1, void *arg2, hts_json_token *token); - -static char skip_value(char type, hts_json_nextfn *next, void *arg1, void *arg2) -{ - hts_json_token token; - int level; - - switch (type? type : next(arg1, arg2, &token)) { - case '\0': - return '\0'; - - case '?': - case '}': - case ']': - return '?'; - - case '{': - case '[': - level = 1; - break; - - default: - return 'v'; - } - - while (level > 0) - switch (next(arg1, arg2, &token)) { - case '\0': - return '\0'; - - case '?': - return '?'; - - case '{': - case '[': - level++; - break; - - case '}': - case ']': - --level; - break; - - default: - break; - } - - return 'v'; -} - -static char snext(void *arg1, void *arg2, hts_json_token *token) -{ - return hts_json_snext(arg1, arg2, token); -} - -HTSLIB_EXPORT -char hts_json_sskip_value(char *str, size_t *state, char type) -{ - return skip_value(type, snext, str, state); -} - -static char fnext(void *arg1, void *arg2, hts_json_token *token) -{ - return hts_json_fnext(arg1, token, arg2); -} - -HTSLIB_EXPORT -char hts_json_fskip_value(struct hFILE *fp, char type) -{ - kstring_t str = { 0, 0, NULL }; - char ret = skip_value(type, fnext, fp, &str); - free(str.s); - return ret; -} - -/* - * A function to help with construction of CL tags in @PG records. - * Takes an argc, argv pair and returns a single space-separated string. - * This string should be deallocated by the calling function. - * - * Returns malloced char * on success - * NULL on failure - */ -char *stringify_argv(int argc, char *argv[]) { - char *str, *cp; - size_t nbytes = 1; - int i, j; - - /* Allocate */ - for (i = 0; i < argc; i++) { - if (i > 0) nbytes += 1; - nbytes += strlen(argv[i]); - } - if (!(str = malloc(nbytes))) - return NULL; - - /* Copy */ - cp = str; - for (i = 0; i < argc; i++) { - if (i > 0) *cp++ = ' '; - j = 0; - while (argv[i][j]) { - if (argv[i][j] == '\t') - *cp++ = ' '; - else - *cp++ = argv[i][j]; - j++; - } - } - *cp++ = 0; - - return str; -} - -/* Utility function for printing possibly malicious text data - */ -const char * -hts_strprint(char *buf, size_t buflen, char quote, const char *s, size_t len) -{ - const char *slim = (len < SIZE_MAX)? &s[len] : NULL; - char *t = buf, *bufend = buf + buflen; - - size_t qlen = quote? 1 : 0; - if (quote) *t++ = quote; - - for (; slim? (s < slim) : (*s); s++) { - char c; - size_t clen; - switch (*s) { - case '\n': c = 'n'; clen = 2; break; - case '\r': c = 'r'; clen = 2; break; - case '\t': c = 't'; clen = 2; break; - case '\0': c = '0'; clen = 2; break; - case '\\': c = '\\'; clen = 2; break; - default: - c = *s; - if (c == quote) clen = 2; - else clen = isprint_c(c)? 1 : 4; - break; - } - - if (t-buf + clen + qlen >= buflen) { - while (t-buf + 3 + qlen >= buflen) t--; - if (quote) *t++ = quote; - strcpy(t, "..."); - return buf; - } - - if (clen == 4) { - snprintf(t, bufend - t, "\\x%02X", (unsigned char) c); - t += clen; - } - else { - if (clen == 2) *t++ = '\\'; - *t++ = c; - } - } - - if (quote) *t++ = quote; - *t = '\0'; - return buf; -} diff --git a/src/htslib-1.19.1/textutils_internal.h b/src/htslib-1.19.1/textutils_internal.h deleted file mode 100644 index 1ad0964..0000000 --- a/src/htslib-1.19.1/textutils_internal.h +++ /dev/null @@ -1,410 +0,0 @@ -/* textutils_internal.h -- non-bioinformatics utility routines for text etc. - - Copyright (C) 2016,2018-2020 Genome Research Ltd. - - Author: John Marshall - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#ifndef HTSLIB_TEXTUTILS_INTERNAL_H -#define HTSLIB_TEXTUTILS_INTERNAL_H - -/* N.B. These interfaces may be used by plug-ins */ - -#include -#include -#include "htslib/kstring.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/// Decode percent-encoded (URL-encoded) text -/** On input, _dest_ should be a buffer at least the same size as _s_, - and may be equal to _s_ to decode in place. On output, _dest_ will be - NUL-terminated and the number of characters written (not including the - NUL) is stored in _destlen_. -*/ -int hts_decode_percent(char *dest, size_t *destlen, const char *s); - -/// Return decoded data length given length of base64-encoded text -/** This gives an upper bound, as it overestimates by a byte or two when - the encoded text ends with (possibly omitted) `=` padding characters. -*/ -size_t hts_base64_decoded_length(size_t len); - -/// Decode base64-encoded data -/** On input, _dest_ should be a sufficient buffer (see `hts_base64_length()`), - and may be equal to _s_ to decode in place. On output, the number of - bytes written is stored in _destlen_. -*/ -int hts_decode_base64(char *dest, size_t *destlen, const char *s); - -/// Token structure returned by JSON lexing functions -/** Structure is defined in hts_internal.h - */ - -typedef struct hts_json_token hts_json_token; - -/// Allocate an empty JSON token structure, for use with hts_json_* functions -/** @return An empty token on success; NULL on failure - */ -HTSLIB_EXPORT -hts_json_token *hts_json_alloc_token(void); - -/// Free a JSON token -HTSLIB_EXPORT -void hts_json_free_token(hts_json_token *token); - -/// Accessor function to get JSON token type -/** @param token Pointer to JSON token - @return Character indicating the token type - -Token types correspond to scalar JSON values and selected punctuation -as follows: - - `s` string - - `n` number - - `b` boolean literal - - `.` null literal - - `{`, `}`, `[`, `]` object and array delimiters - - `?` lexing error - - `!` other errors (e.g. out of memory) - - `\0` terminator at end of input -*/ -HTSLIB_EXPORT -char hts_json_token_type(hts_json_token *token); - -/// Accessor function to get JSON token in string form -/** @param token Pointer to JSON token - @return String representation of the JSON token; NULL if unset - -If the token was parsed from a string using hts_json_snext(), the return value -will point into the string passed as the first parameter to hts_json_snext(). -If the token was parsed from a file using hts_json_fnext(), the return value -will point at the kstring_t buffer passed as the third parameter to -hts_json_fnext(). In that case, the value will only be valid until the -next call to hts_json_fnext(). - */ -HTSLIB_EXPORT -char *hts_json_token_str(hts_json_token *token); - -/// Read one JSON token from a string -/** @param str The input C string - @param state The input string state - @param token On return, filled in with the token read - @return The type of the token read - -On return, `token->str` points into the supplied input string, which -is modified by having token-terminating characters overwritten as NULs. -The `state` argument records the current position within `str` after each -`hts_json_snext()` call, and should be set to 0 before the first call. -*/ -HTSLIB_EXPORT -char hts_json_snext(char *str, size_t *state, hts_json_token *token); - -/// Read and discard a complete JSON value from a string -/** @param str The input C string - @param state The input string state, as per `hts_json_snext()` - @param type If the first token of the value to be discarded has already - been read, provide its type; otherwise `'\0'` - @return One of `v` (success), `\0` (end of string), and `?` (lexing error) - -Skips a complete JSON value, which may be a single token or an entire object -or array. -*/ -HTSLIB_EXPORT -char hts_json_sskip_value(char *str, size_t *state, char type); - -struct hFILE; - -/// Read one JSON token from a file -/** @param fp The file stream - @param token On return, filled in with the token read - @param kstr Buffer used to store the token string returned - @return The type of the token read - -The `kstr` buffer is used to store the string value of the token read, -so `token->str` is only valid until the next time `hts_json_fnext()` is -called with the same `kstr` argument. -*/ -HTSLIB_EXPORT -char hts_json_fnext(struct hFILE *fp, hts_json_token *token, kstring_t *kstr); - -/// Read and discard a complete JSON value from a file -/** @param fp The file stream - @param type If the first token of the value to be discarded has already - been read, provide its type; otherwise `'\0'` - @return One of `v` (success), `\0` (EOF), and `?` (lexing error) - -Skips a complete JSON value, which may be a single token or an entire object -or array. -*/ -HTSLIB_EXPORT -char hts_json_fskip_value(struct hFILE *fp, char type); - -// The functions operate on ints such as are returned by fgetc(), -// i.e., characters represented as unsigned-char-valued ints, or EOF. -// To operate on plain chars (and to avoid warnings on some platforms), -// technically one must cast to unsigned char everywhere (see CERT STR37-C) -// or less painfully use these *_c() functions that operate on plain chars -// (but not EOF, which must be considered separately where it is applicable). -// TODO We may eventually wish to implement these functions directly without -// using their equivalents, and thus make them immune to locales. -static inline int isalnum_c(char c) { return isalnum((unsigned char) c); } -static inline int isalpha_c(char c) { return isalpha((unsigned char) c); } -static inline int isdigit_c(char c) { return isdigit((unsigned char) c); } -static inline int isgraph_c(char c) { return isgraph((unsigned char) c); } -static inline int islower_c(char c) { return islower((unsigned char) c); } -static inline int isprint_c(char c) { return isprint((unsigned char) c); } -static inline int ispunct_c(char c) { return ispunct((unsigned char) c); } -static inline int isspace_c(char c) { return isspace((unsigned char) c); } -static inline int isupper_c(char c) { return isupper((unsigned char) c); } -static inline int isxdigit_c(char c) { return isxdigit((unsigned char) c); } -static inline char tolower_c(char c) { return tolower((unsigned char) c); } -static inline char toupper_c(char c) { return toupper((unsigned char) c); } - -/// Copy possibly malicious text data to a buffer -/** @param buf Destination buffer - @param buflen Size of the destination buffer (>= 4; >= 6 when quotes used) - @param quote Quote character (or '\0' for no quoting of the output) - @param s String to be copied - @param len Length of the input string, or SIZE_MAX to copy until '\0' - @return The destination buffer, @a buf. - -Copies the source text string (escaping any unprintable characters) to the -destination buffer. The destination buffer will always be NUL-terminated; -the text will be truncated (and "..." appended) if necessary to make it fit. - */ -const char *hts_strprint(char *buf, size_t buflen, char quote, - const char *s, size_t len); - -// Faster replacements for strtol, for use when parsing lots of numbers. -// Note that these only handle base 10 and do not skip leading whitespace - -/// Convert a string to a signed integer, with overflow detection -/** @param[in] in Input string - @param[out] end Returned end pointer - @param[in] bits Bits available for the converted value - @param[out] failed Location of overflow flag - @return String value converted to an int64_t - -Converts a signed decimal string to an int64_t. The string should -consist of an optional '+' or '-' sign followed by one or more of -the digits 0 to 9. The output value will be limited to fit in the -given number of bits (including the sign bit). If the value is too big, -the largest possible value will be returned and *failed will be set to 1. - -The address of the first character following the converted number will -be stored in *end. - -Both end and failed must be non-NULL. - */ -static inline int64_t hts_str2int(const char *in, char **end, int bits, - int *failed) { - uint64_t n = 0, limit = (1ULL << (bits - 1)) - 1; - uint32_t fast = (bits - 1) * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322 - const unsigned char *v = (const unsigned char *) in; - const unsigned int ascii_zero = '0'; // Prevents conversion to signed - unsigned char d; - int neg = 1; - - switch(*v) { - case '-': - neg=-1; - limit++; /* fall through */ - case '+': - v++; - break; - default: - break; - } - - while (--fast && *v>='0' && *v<='9') - n = n*10 + *v++ - ascii_zero; - - if (!fast) { - uint64_t limit_d_10 = limit / 10; - uint64_t limit_m_10 = limit - 10 * limit_d_10; - while ((d = *v - ascii_zero) < 10) { - if (n < limit_d_10 || (n == limit_d_10 && d <= limit_m_10)) { - n = n*10 + d; - v++; - } else { - do { v++; } while (*v - ascii_zero < 10); - n = limit; - *failed = 1; - break; - } - } - } - - *end = (char *)v; - - return (n && neg < 0) ? -((int64_t) (n - 1)) - 1 : (int64_t) n; -} - -/// Convert a string to an unsigned integer, with overflow detection -/** @param[in] in Input string - @param[out] end Returned end pointer - @param[in] bits Bits available for the converted value - @param[out] failed Location of overflow flag - @return String value converted to a uint64_t - -Converts an unsigned decimal string to a uint64_t. The string should -consist of an optional '+' sign followed by one or more of the digits 0 -to 9. The output value will be limited to fit in the given number of bits. -If the value is too big, the largest possible value will be returned -and *failed will be set to 1. - -The address of the first character following the converted number will -be stored in *end. - -Both end and failed must be non-NULL. - */ - -static inline uint64_t hts_str2uint(const char *in, char **end, int bits, - int *failed) { - uint64_t n = 0, limit = (bits < 64 ? (1ULL << bits) : 0) - 1; - const unsigned char *v = (const unsigned char *) in; - const unsigned int ascii_zero = '0'; // Prevents conversion to signed - uint32_t fast = bits * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322 - unsigned char d; - - if (*v == '+') - v++; - - while (--fast && *v>='0' && *v<='9') - n = n*10 + *v++ - ascii_zero; - - if (!fast) { - uint64_t limit_d_10 = limit / 10; - uint64_t limit_m_10 = limit - 10 * limit_d_10; - while ((d = *v - ascii_zero) < 10) { - if (n < limit_d_10 || (n == limit_d_10 && d <= limit_m_10)) { - n = n*10 + d; - v++; - } else { - do { v++; } while (*v - ascii_zero < 10); - n = limit; - *failed = 1; - break; - } - } - } - - *end = (char *)v; - return n; -} - -/// Convert a string to a double, with overflow detection -/** @param[in] in Input string - @param[out] end Returned end pointer - @param[out] failed Location of overflow flag - @return String value converted to a double - -Converts a floating point value string to a double. The string should -have the format [+-]?[0-9]*[.]?[0-9]* with at least one and no more than 15 -digits. Strings that do not match (inf, nan, values with exponents) will -be passed on to strtod() for processing. - -If the value is too big, the largest possible value will be returned; -if it is too small to be represented in a double zero will be returned. -In both cases errno will be set to ERANGE. - -If no characters could be converted, *failed will be set to 1. - -The address of the first character following the converted number will -be stored in *end. - -Both end and failed must be non-NULL. - */ - -static inline double hts_str2dbl(const char *in, char **end, int *failed) { - uint64_t n = 0; - int max_len = 15; - const unsigned char *v = (const unsigned char *) in; - const unsigned int ascii_zero = '0'; // Prevents conversion to signed - int neg = 0, point = -1; - double d; - static double D[] = {1,1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, - 1e8, 1e9, 1e10,1e11,1e12,1e13,1e14,1e15, - 1e16,1e17,1e18,1e19,1e20}; - - while (isspace(*v)) - v++; - - if (*v == '-') { - neg = 1; - v++; - } else if (*v == '+') { - v++; - } - - switch(*v) { - case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - break; - - case '0': - if (v[1] != 'x' && v[1] != 'X') break; - // else fall through - hex number - - default: - // Non numbers, like NaN, Inf - d = strtod(in, end); - if (*end == in) - *failed = 1; - return d; - } - - while (*v == '0') ++v; - - const unsigned char *start = v; - - while (--max_len && *v>='0' && *v<='9') - n = n*10 + *v++ - ascii_zero; - if (max_len && *v == '.') { - point = v - start; - v++; - while (--max_len && *v>='0' && *v<='9') - n = n*10 + *v++ - ascii_zero; - } - if (point < 0) - point = v - start; - - // Outside the scope of this quick and dirty parser. - if (!max_len || *v == 'e' || *v == 'E') { - d = strtod(in, end); - if (*end == in) - *failed = 1; - return d; - } - - *end = (char *)v; - d = n / D[v - start - point]; - - return neg ? -d : d; -} - - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.19.1/thread_pool.c b/src/htslib-1.19.1/thread_pool.c deleted file mode 100644 index 252a9d2..0000000 --- a/src/htslib-1.19.1/thread_pool.c +++ /dev/null @@ -1,1535 +0,0 @@ -/* thread_pool.c -- A pool of generic worker threads - - Copyright (c) 2013-2020 Genome Research Ltd. - - Author: James Bonfield - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#ifndef TEST_MAIN -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "thread_pool_internal.h" -#include "htslib/hts_log.h" - -// Minimum stack size for threads. Required for some rANS codecs -// that use over 2Mbytes of stack for encoder / decoder state -#define HTS_MIN_THREAD_STACK (3 * 1024 * 1024) - -static void hts_tpool_process_detach_locked(hts_tpool *p, - hts_tpool_process *q); - -//#define DEBUG - -#ifdef DEBUG -static int worker_id(hts_tpool *p) { - int i; - pthread_t s = pthread_self(); - for (i = 0; i < p->tsize; i++) { - if (pthread_equal(s, p->t[i].tid)) - return i; - } - return -1; -} - -void DBG_OUT(FILE *fp, char *fmt, ...) { - va_list args; - va_start(args, fmt); - vfprintf(fp, fmt, args); - va_end(args); -} -#else -#define DBG_OUT(...) do{}while(0) -#endif - -/* ---------------------------------------------------------------------------- - * A process-queue to hold results from the thread pool. - * - * Each thread pool may have jobs of multiple types being queued up and - * interleaved, so we attach several job process-queues to a single pool. - * - * The jobs themselves are expected to push their results onto their - * appropriate results queue. - */ - -/* - * Adds a result to the end of the process result queue. - * - * Returns 0 on success; - * -1 on failure - */ -static int hts_tpool_add_result(hts_tpool_job *j, void *data) { - hts_tpool_process *q = j->q; - hts_tpool_result *r; - - pthread_mutex_lock(&q->p->pool_m); - - DBG_OUT(stderr, "%d: Adding result to queue %p, serial %"PRId64", %d of %d\n", - worker_id(j->p), q, j->serial, q->n_output+1, q->qsize); - - if (--q->n_processing == 0) - pthread_cond_signal(&q->none_processing_c); - - /* No results queue is fine if we don't want any results back */ - if (q->in_only) { - pthread_mutex_unlock(&q->p->pool_m); - return 0; - } - - if (!(r = malloc(sizeof(*r)))) { - pthread_mutex_unlock(&q->p->pool_m); - hts_tpool_process_shutdown(q); - return -1; - } - - r->next = NULL; - r->data = data; - r->result_cleanup = j->result_cleanup; - r->serial = j->serial; - - q->n_output++; - if (q->output_tail) { - q->output_tail->next = r; - q->output_tail = r; - } else { - q->output_head = q->output_tail = r; - } - - assert(r->serial >= q->next_serial // Or it will never be dequeued ... - || q->next_serial == INT_MAX); // ... unless flush in progress. - if (r->serial == q->next_serial) { - DBG_OUT(stderr, "%d: Broadcasting result_avail (id %"PRId64")\n", - worker_id(j->p), r->serial); - pthread_cond_broadcast(&q->output_avail_c); - DBG_OUT(stderr, "%d: Broadcast complete\n", worker_id(j->p)); - } - - pthread_mutex_unlock(&q->p->pool_m); - - return 0; -} - -static void wake_next_worker(hts_tpool_process *q, int locked); - -/* Core of hts_tpool_next_result() */ -static hts_tpool_result *hts_tpool_next_result_locked(hts_tpool_process *q) { - hts_tpool_result *r, *last; - - if (q->shutdown) - return NULL; - - for (last = NULL, r = q->output_head; r; last = r, r = r->next) { - if (r->serial == q->next_serial) - break; - } - - if (r) { - // Remove r from out linked list - if (q->output_head == r) - q->output_head = r->next; - else - last->next = r->next; - - if (q->output_tail == r) - q->output_tail = last; - - if (!q->output_head) - q->output_tail = NULL; - - q->next_serial++; - q->n_output--; - - if (q->qsize && q->n_output < q->qsize) { - // Not technically input full, but can guarantee there is - // room for the input to go somewhere so we still signal. - // The waiting code will then check the condition again. - if (q->n_input < q->qsize) - pthread_cond_signal(&q->input_not_full_c); - if (!q->shutdown) - wake_next_worker(q, 1); - } - } - - return r; -} - -/* - * Pulls the next item off the process result queue. The caller should free - * it (and any internals as appropriate) after use. This doesn't wait for a - * result to be present. - * - * Results will be returned in strict order. - * - * Returns hts_tpool_result pointer if a result is ready. - * NULL if not. - */ -hts_tpool_result *hts_tpool_next_result(hts_tpool_process *q) { - hts_tpool_result *r; - - DBG_OUT(stderr, "Requesting next result on queue %p\n", q); - - pthread_mutex_lock(&q->p->pool_m); - r = hts_tpool_next_result_locked(q); - pthread_mutex_unlock(&q->p->pool_m); - - DBG_OUT(stderr, "(q=%p) Found %p\n", q, r); - - return r; -} - -/* - * Pulls the next item off the process result queue. The caller should free - * it (and any internals as appropriate) after use. This will wait for - * a result to be present if none are currently available. - * - * Results will be returned in strict order. - * - * Returns hts_tpool_result pointer if a result is ready. - * NULL on error or during shutdown. - */ -hts_tpool_result *hts_tpool_next_result_wait(hts_tpool_process *q) { - hts_tpool_result *r; - - pthread_mutex_lock(&q->p->pool_m); - while (!(r = hts_tpool_next_result_locked(q))) { - /* Possible race here now avoided via _locked() call, but in case... */ - struct timeval now; - struct timespec timeout; - - gettimeofday(&now, NULL); - timeout.tv_sec = now.tv_sec + 10; - timeout.tv_nsec = now.tv_usec * 1000; - - q->ref_count++; - if (q->shutdown) { - int rc = --q->ref_count; - pthread_mutex_unlock(&q->p->pool_m); - if (rc == 0) - hts_tpool_process_destroy(q); - return NULL; - } - pthread_cond_timedwait(&q->output_avail_c, &q->p->pool_m, &timeout); - - q->ref_count--; - } - pthread_mutex_unlock(&q->p->pool_m); - - return r; -} - -/* - * Returns true if there are no items in the process results queue and - * also none still pending. - */ -int hts_tpool_process_empty(hts_tpool_process *q) { - int empty; - - pthread_mutex_lock(&q->p->pool_m); - empty = q->n_input == 0 && q->n_processing == 0 && q->n_output == 0; - pthread_mutex_unlock(&q->p->pool_m); - - return empty; -} - -void hts_tpool_process_ref_incr(hts_tpool_process *q) { - pthread_mutex_lock(&q->p->pool_m); - q->ref_count++; - pthread_mutex_unlock(&q->p->pool_m); -} - -void hts_tpool_process_ref_decr(hts_tpool_process *q) { - pthread_mutex_lock(&q->p->pool_m); - if (--q->ref_count <= 0) { - pthread_mutex_unlock(&q->p->pool_m); - hts_tpool_process_destroy(q); - return; - } - - // maybe also call destroy here if needed? - pthread_mutex_unlock(&q->p->pool_m); -} - -/* - * Returns the number of completed jobs in the process results queue. - */ -int hts_tpool_process_len(hts_tpool_process *q) { - int len; - - pthread_mutex_lock(&q->p->pool_m); - len = q->n_output; - pthread_mutex_unlock(&q->p->pool_m); - - return len; -} - -/* - * Returns the number of completed jobs in the process results queue plus the - * number running and queued up to run. - */ -int hts_tpool_process_sz(hts_tpool_process *q) { - int len; - - pthread_mutex_lock(&q->p->pool_m); - len = q->n_output + q->n_input + q->n_processing; - pthread_mutex_unlock(&q->p->pool_m); - - return len; -} - -/* - * Shutdown a process. - * - * This sets the shutdown flag and wakes any threads waiting on process - * condition variables. - */ -static void hts_tpool_process_shutdown_locked(hts_tpool_process *q) { - q->shutdown = 1; - pthread_cond_broadcast(&q->output_avail_c); - pthread_cond_broadcast(&q->input_not_full_c); - pthread_cond_broadcast(&q->input_empty_c); - pthread_cond_broadcast(&q->none_processing_c); -} - -void hts_tpool_process_shutdown(hts_tpool_process *q) { - pthread_mutex_lock(&q->p->pool_m); - hts_tpool_process_shutdown_locked(q); - pthread_mutex_unlock(&q->p->pool_m); -} - -int hts_tpool_process_is_shutdown(hts_tpool_process *q) { - pthread_mutex_lock(&q->p->pool_m); - int r = q->shutdown; - pthread_mutex_unlock(&q->p->pool_m); - return r; -} - -/* - * Frees a result 'r' and if free_data is true also frees - * the internal r->data result too. - */ -void hts_tpool_delete_result(hts_tpool_result *r, int free_data) { - if (!r) - return; - - if (free_data && r->data) - free(r->data); - - free(r); -} - -/* - * Returns the data portion of a hts_tpool_result, corresponding - * to the actual "result" itself. - */ -void *hts_tpool_result_data(hts_tpool_result *r) { - return r->data; -} - -/* - * Initialises a thread process-queue. - * - * In_only, if true, indicates that the process generates does not need to - * hold any output. Otherwise an output queue is used to store the results - * of processing each input job. - * - * Results hts_tpool_process pointer on success; - * NULL on failure - */ -hts_tpool_process *hts_tpool_process_init(hts_tpool *p, int qsize, int in_only) { - hts_tpool_process *q = malloc(sizeof(*q)); - if (!q) - return NULL; - - pthread_cond_init(&q->output_avail_c, NULL); - pthread_cond_init(&q->input_not_full_c, NULL); - pthread_cond_init(&q->input_empty_c, NULL); - pthread_cond_init(&q->none_processing_c,NULL); - - q->p = p; - q->input_head = NULL; - q->input_tail = NULL; - q->output_head = NULL; - q->output_tail = NULL; - q->next_serial = 0; - q->curr_serial = 0; - q->no_more_input = 0; - q->n_input = 0; - q->n_output = 0; - q->n_processing= 0; - q->qsize = qsize; - q->in_only = in_only; - q->shutdown = 0; - q->wake_dispatch = 0; - q->ref_count = 1; - - q->next = NULL; - q->prev = NULL; - - hts_tpool_process_attach(p, q); - - return q; -} - -/* Deallocates memory for a thread process-queue. - * Must be called before the thread pool is destroyed. - */ -void hts_tpool_process_destroy(hts_tpool_process *q) { - DBG_OUT(stderr, "Destroying results queue %p\n", q); - - if (!q) - return; - - // Prevent dispatch from queuing up any more jobs. - // We want to reset (and flush) the queue here, before - // we set the shutdown flag, but we need to avoid races - // with queue more input during reset. - pthread_mutex_lock(&q->p->pool_m); - q->no_more_input = 1; - pthread_mutex_unlock(&q->p->pool_m); - - // Ensure it's fully drained before destroying the queue - hts_tpool_process_reset(q, 0); - pthread_mutex_lock(&q->p->pool_m); - hts_tpool_process_detach_locked(q->p, q); - hts_tpool_process_shutdown_locked(q); - - // Maybe a worker is scanning this queue, so delay destruction - if (--q->ref_count > 0) { - pthread_mutex_unlock(&q->p->pool_m); - return; - } - - pthread_cond_destroy(&q->output_avail_c); - pthread_cond_destroy(&q->input_not_full_c); - pthread_cond_destroy(&q->input_empty_c); - pthread_cond_destroy(&q->none_processing_c); - pthread_mutex_unlock(&q->p->pool_m); - - free(q); - - DBG_OUT(stderr, "Destroyed results queue %p\n", q); -} - - -/* - * Attach and detach a thread process-queue with / from the thread pool - * scheduler. - * - * We need to do attach after making a thread process, but may also wish - * to temporarily detach if we wish to stop running jobs on a specific - * process while permitting other process to continue. - */ -void hts_tpool_process_attach(hts_tpool *p, hts_tpool_process *q) { - pthread_mutex_lock(&p->pool_m); - if (p->q_head) { - q->next = p->q_head; - q->prev = p->q_head->prev; - p->q_head->prev->next = q; - p->q_head->prev = q; - } else { - q->next = q; - q->prev = q; - } - p->q_head = q; - assert(p->q_head && p->q_head->prev && p->q_head->next); - pthread_mutex_unlock(&p->pool_m); -} - -static void hts_tpool_process_detach_locked(hts_tpool *p, - hts_tpool_process *q) { - if (!p->q_head || !q->prev || !q->next) - return; - - hts_tpool_process *curr = p->q_head, *first = curr; - do { - if (curr == q) { - q->next->prev = q->prev; - q->prev->next = q->next; - p->q_head = q->next; - q->next = q->prev = NULL; - - // Last one - if (p->q_head == q) - p->q_head = NULL; - break; - } - - curr = curr->next; - } while (curr != first); -} - -void hts_tpool_process_detach(hts_tpool *p, hts_tpool_process *q) { - pthread_mutex_lock(&p->pool_m); - hts_tpool_process_detach_locked(p, q); - pthread_mutex_unlock(&p->pool_m); -} - - -/* ---------------------------------------------------------------------------- - * The thread pool. - */ - -#define TDIFF(t2,t1) ((t2.tv_sec-t1.tv_sec)*1000000 + t2.tv_usec-t1.tv_usec) - -/* - * A worker thread. - * - * Once woken, each thread checks each process-queue in the pool in turn, - * looking for input jobs that also have room for the output (if it requires - * storing). If found, we execute it and repeat. - * - * If we checked all input queues and find no such job, then we wait until we - * are signalled to check again. - */ -static void *tpool_worker(void *arg) { - hts_tpool_worker *w = (hts_tpool_worker *)arg; - hts_tpool *p = w->p; - hts_tpool_job *j; - - pthread_mutex_lock(&p->pool_m); - while (!p->shutdown) { - // Pop an item off the pool queue - - assert(p->q_head == 0 || (p->q_head->prev && p->q_head->next)); - - int work_to_do = 0; - hts_tpool_process *first = p->q_head, *q = first; - do { - // Iterate over queues, finding one with jobs and also - // room to put the result. - //if (q && q->input_head && !hts_tpool_process_output_full(q)) { - if (q && q->input_head - && q->qsize - q->n_output > q->n_processing - && !q->shutdown) { - work_to_do = 1; - break; - } - - if (q) q = q->next; - } while (q && q != first); - - if (!work_to_do) { - // We scanned all queues and cannot process any, so we wait. - p->nwaiting++; - - // Push this thread to the top of the waiting stack - if (p->t_stack_top == -1 || p->t_stack_top > w->idx) - p->t_stack_top = w->idx; - - p->t_stack[w->idx] = 1; -// printf("%2d: no work. In=%d Proc=%d Out=%d full=%d\n", -// w->idx, p->q_head->n_input, p->q_head->n_processing, p->q_head->n_output, -// hts_tpool_process_output_full(p->q_head)); - pthread_cond_wait(&w->pending_c, &p->pool_m); - p->t_stack[w->idx] = 0; - - /* Find new t_stack_top */ - int i; - p->t_stack_top = -1; - for (i = 0; i < p->tsize; i++) { - if (p->t_stack[i]) { - p->t_stack_top = i; - break; - } - } - - p->nwaiting--; - continue; // To outer loop. - } - - // Otherwise work_to_do, so process as many items in this queue as - // possible before switching to another queue. This means threads - // often end up being dedicated to one type of work. - q->ref_count++; - while (q->input_head && q->qsize - q->n_output > q->n_processing) { - if (p->shutdown) - goto shutdown; - - if (q->shutdown) - // Queue shutdown, but there may be other queues - break; - - j = q->input_head; - assert(j->p == p); - - if (!(q->input_head = j->next)) - q->input_tail = NULL; - - // Transitioning from full queue to not-full means we can wake up - // any blocked dispatch threads. We broadcast this as it's only - // happening once (on the transition) rather than every time we - // are below qsize. - // (I wish I could remember why io_lib rev 3660 changed this from - // == to >=, but keeping it just in case!) - q->n_processing++; - if (q->n_input-- >= q->qsize) - pthread_cond_broadcast(&q->input_not_full_c); - - if (q->n_input == 0) - pthread_cond_signal(&q->input_empty_c); - - p->njobs--; // Total number of jobs; used to adjust to CPU scaling - - pthread_mutex_unlock(&p->pool_m); - - DBG_OUT(stderr, "%d: Processing queue %p, serial %"PRId64"\n", - worker_id(j->p), q, j->serial); - - if (hts_tpool_add_result(j, j->func(j->arg)) < 0) - goto err; - //memset(j, 0xbb, sizeof(*j)); - free(j); - - pthread_mutex_lock(&p->pool_m); - } - if (--q->ref_count == 0) { // we were the last user - hts_tpool_process_destroy(q); - } else { - // Out of jobs on this queue, so restart search from next one. - // This is equivalent to "work-stealing". - if (p->q_head) - p->q_head = p->q_head->next; - } - } - - shutdown: - pthread_mutex_unlock(&p->pool_m); -#ifdef DEBUG - fprintf(stderr, "%d: Shutting down\n", worker_id(p)); -#endif - return NULL; - - err: -#ifdef DEBUG - fprintf(stderr, "%d: Failed to add result\n", worker_id(p)); -#endif - // Hard failure, so shutdown all queues - pthread_mutex_lock(&p->pool_m); - hts_tpool_process *first = p->q_head, *q = first; - if (q) { - do { - hts_tpool_process_shutdown_locked(q); - q->shutdown = 2; // signify error. - q = q->next; - } while (q != first); - } - pthread_mutex_unlock(&p->pool_m); - return NULL; -} - -static void wake_next_worker(hts_tpool_process *q, int locked) { - if (!q) return; - hts_tpool *p = q->p; - if (!locked) - pthread_mutex_lock(&p->pool_m); - - // Update the q_head to be this queue so we'll start processing - // the queue we know to have results. - assert(q->prev && q->next); // attached - p->q_head = q; - - // Wake up if we have more jobs waiting than CPUs. This partially combats - // CPU frequency scaling effects. Starting too many threads and then - // running out of jobs can cause each thread to have lots of start/stop - // cycles, which then translates often to CPU frequency scaling - // adjustments. Instead it is better to only start as many threads as we - // need to keep the throughput up, meaning some threads run flat out and - // others are idle. - // - // This isn't perfect as we need to know how many can actually start, - // rather than how many are waiting. A limit on output queue size makes - // these two figures different. - assert(p->njobs >= q->n_input); - - int running = p->tsize - p->nwaiting; - int sig = p->t_stack_top >= 0 && p->njobs > p->tsize - p->nwaiting - && (q->n_processing < q->qsize - q->n_output); - -//#define AVG_USAGE -#ifdef AVG_USAGE - // Track average number of running threads and try to keep close. - // We permit this to change, but slowly. This avoids "boom and bust" cycles - // where we read a lot of data, start a lot of jobs, then become idle again. - // This way some threads run steadily and others dormant, which is better - // for throughput. - // - // It's 50:50 if this is a good thing. It helps some tasks quite significantly - // while slightly hindering other (perhaps more usual) jobs. - - if (++p->n_count == 256) { - p->n_count >>= 1; - p->n_running >>= 1; - } - p->n_running += running; - // Built in lag to avoid see-sawing. Is this safe in all cases? - if (sig && p->n_count >= 128 && running*p->n_count > p->n_running+1) sig=0; -#endif - - if (0) { - printf("%d waiting, %d running, %d output, %d, arun %d => %d\t", p->njobs, - running, q->n_output, q->qsize - q->n_output, - p->n_running/p->n_count, sig); - int i; - for (i = 0; i < p->tsize; i++) - putchar("x "[p->t_stack[i]]); - putchar('\n'); - } - - if (sig) - pthread_cond_signal(&p->t[p->t_stack_top].pending_c); - - if (!locked) - pthread_mutex_unlock(&p->pool_m); -} - -/* - * Creates a worker pool with n worker threads. - * - * Returns pool pointer on success; - * NULL on failure - */ -hts_tpool *hts_tpool_init(int n) { - int t_idx = 0; - size_t stack_size = 0; - pthread_attr_t pattr; - int pattr_init_done = 0; - hts_tpool *p = malloc(sizeof(*p)); - if (!p) - return NULL; - p->tsize = n; - p->njobs = 0; - p->nwaiting = 0; - p->shutdown = 0; - p->q_head = NULL; - p->t_stack = NULL; - p->n_count = 0; - p->n_running = 0; - p->t = malloc(n * sizeof(p->t[0])); - if (!p->t) { - free(p); - return NULL; - } - p->t_stack = malloc(n * sizeof(*p->t_stack)); - if (!p->t_stack) { - free(p->t); - free(p); - return NULL; - } - p->t_stack_top = -1; - - pthread_mutexattr_t attr; - pthread_mutexattr_init(&attr); - pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); - pthread_mutex_init(&p->pool_m, &attr); - pthread_mutexattr_destroy(&attr); - - pthread_mutex_lock(&p->pool_m); - - // Ensure new threads have a reasonably large stack. On some platforms, - // for example MacOS which defaults to 512Kb, this is not big enough - // for some of the rANS codecs. - - if (pthread_attr_init(&pattr) < 0) - goto cleanup; - pattr_init_done = 1; - if (pthread_attr_getstacksize(&pattr, &stack_size) < 0) - goto cleanup; - if (stack_size < HTS_MIN_THREAD_STACK) { - if (pthread_attr_setstacksize(&pattr, HTS_MIN_THREAD_STACK) < 0) - goto cleanup; - } - - for (t_idx = 0; t_idx < n; t_idx++) { - hts_tpool_worker *w = &p->t[t_idx]; - p->t_stack[t_idx] = 0; - w->p = p; - w->idx = t_idx; - pthread_cond_init(&w->pending_c, NULL); - if (0 != pthread_create(&w->tid, &pattr, tpool_worker, w)) - goto cleanup; - } - - pthread_mutex_unlock(&p->pool_m); - pthread_attr_destroy(&pattr); - - return p; - - cleanup: { - // Any threads started will be waiting for p->pool_m, so we can - // stop them cleanly by setting p->shutdown, releasing the mutex and - // waiting for them to finish. - int j; - int save_errno = errno; - hts_log_error("Couldn't start thread pool worker : %s", - strerror(errno)); - p->shutdown = 1; - pthread_mutex_unlock(&p->pool_m); - for (j = 0; j < t_idx; j++) { - pthread_join(p->t[j].tid, NULL); - pthread_cond_destroy(&p->t[j].pending_c); - } - pthread_mutex_destroy(&p->pool_m); - if (pattr_init_done) - pthread_attr_destroy(&pattr); - free(p->t_stack); - free(p->t); - free(p); - errno = save_errno; - return NULL; - } -} - -/* - * Returns the number of requested threads for a pool. - */ -int hts_tpool_size(hts_tpool *p) { - return p->tsize; -} - -/* - * Adds an item to the work pool. - * - * Returns 0 on success - * -1 on failure - */ -int hts_tpool_dispatch(hts_tpool *p, hts_tpool_process *q, - void *(*func)(void *arg), void *arg) { - return hts_tpool_dispatch3(p, q, func, arg, NULL, NULL, 0); -} - -/* - * As above but optional non-block flag. - * - * nonblock 0 => block if input queue is full - * nonblock +1 => don't block if input queue is full, but do not add task - * nonblock -1 => add task regardless of whether queue is full (over-size) - */ -int hts_tpool_dispatch2(hts_tpool *p, hts_tpool_process *q, - void *(*func)(void *arg), void *arg, int nonblock) { - return hts_tpool_dispatch3(p, q, func, arg, NULL, NULL, nonblock); -} - -int hts_tpool_dispatch3(hts_tpool *p, hts_tpool_process *q, - void *(*exec_func)(void *arg), void *arg, - void (*job_cleanup)(void *arg), - void (*result_cleanup)(void *data), - int nonblock) { - hts_tpool_job *j; - - pthread_mutex_lock(&p->pool_m); - - DBG_OUT(stderr, "Dispatching job for queue %p, serial %"PRId64"\n", - q, q->curr_serial); - - if ((q->no_more_input || q->n_input >= q->qsize) && nonblock == 1) { - pthread_mutex_unlock(&p->pool_m); - errno = EAGAIN; - return -1; - } - - if (!(j = malloc(sizeof(*j)))) { - pthread_mutex_unlock(&p->pool_m); - return -1; - } - j->func = exec_func; - j->arg = arg; - j->job_cleanup = job_cleanup; - j->result_cleanup = result_cleanup; - j->next = NULL; - j->p = p; - j->q = q; - j->serial = q->curr_serial++; - - if (nonblock == 0) { - while ((q->no_more_input || q->n_input >= q->qsize) && - !q->shutdown && !q->wake_dispatch) { - pthread_cond_wait(&q->input_not_full_c, &q->p->pool_m); - } - if (q->no_more_input || q->shutdown) { - free(j); - pthread_mutex_unlock(&p->pool_m); - return -1; - } - if (q->wake_dispatch) { - //fprintf(stderr, "Wake => non-block for this operation\n"); - q->wake_dispatch = 0; - } - } - - p->njobs++; // total across all queues - q->n_input++; // queue specific - - if (q->input_tail) { - q->input_tail->next = j; - q->input_tail = j; - } else { - q->input_head = q->input_tail = j; - } - - DBG_OUT(stderr, "Dispatched (serial %"PRId64")\n", j->serial); - - // Let a worker know we have data. - // Keep incoming queue at 1 per running thread, so there is always - // something waiting when they end their current task. If we go above - // this signal to start more threads (if available). This has the effect - // of concentrating jobs to fewer cores when we are I/O bound, which in - // turn benefits systems with auto CPU frequency scaling. - if (!q->shutdown) - wake_next_worker(q, 1); - - pthread_mutex_unlock(&p->pool_m); - - return 0; -} - -/* - * Wakes up a single thread stuck in dispatch and make it return with - * errno EAGAIN. - */ -void hts_tpool_wake_dispatch(hts_tpool_process *q) { - pthread_mutex_lock(&q->p->pool_m); - q->wake_dispatch = 1; - pthread_cond_signal(&q->input_not_full_c); - pthread_mutex_unlock(&q->p->pool_m); -} - -/* - * Flushes the process-queue, but doesn't exit. This simply drains the queue - * and ensures all worker threads have finished their current tasks - * associated with this process. - * - * NOT: This does not mean the worker threads are not executing jobs in - * another process-queue. - * - * Returns 0 on success; - * -1 on failure - */ -int hts_tpool_process_flush(hts_tpool_process *q) { - int i; - hts_tpool *p = q->p; - - DBG_OUT(stderr, "Flushing pool %p\n", p); - - // Drains the queue - pthread_mutex_lock(&p->pool_m); - - // Wake up everything for the final sprint! - for (i = 0; i < p->tsize; i++) - if (p->t_stack[i]) - pthread_cond_signal(&p->t[i].pending_c); - - // Ensure there is room for the final sprint. - // Ideally we shouldn't get here, but the "q->qsize - q->n_output > - // n_processing" check in tpool_worker means we can trigger a - // deadlock there. This negates that possibility. - if (q->qsize < q->n_output + q->n_input + q->n_processing) - q->qsize = q->n_output + q->n_input + q->n_processing; - - // When shutdown, we won't be launching more, but we can still - // wait for any processing jobs complete. - if (q->shutdown) { - while (q->n_processing) - pthread_cond_wait(&q->none_processing_c, &p->pool_m); - } - - // Wait for n_input and n_processing to hit zero. - while (!q->shutdown && (q->n_input || q->n_processing)) { - struct timeval now; - struct timespec timeout; - - while (q->n_input && !q->shutdown) { - gettimeofday(&now, NULL); - timeout.tv_sec = now.tv_sec + 1; - timeout.tv_nsec = now.tv_usec * 1000; - pthread_cond_timedwait(&q->input_empty_c, &p->pool_m, &timeout); - } - - // Note: even if q->shutdown is set, we still have to wait until - // q->n_processing is zero as we cannot terminate while things are - // running otherwise we free up the data being worked on. - while (q->n_processing) { - gettimeofday(&now, NULL); - timeout.tv_sec = now.tv_sec + 1; - timeout.tv_nsec = now.tv_usec * 1000; - pthread_cond_timedwait(&q->none_processing_c, &p->pool_m, - &timeout); - } - if (q->shutdown) break; - } - - pthread_mutex_unlock(&p->pool_m); - - DBG_OUT(stderr, "Flushed complete for pool %p, queue %p\n", p, q); - - return 0; -} - -/* - * Resets a process to the initial state. - * - * This removes any queued up input jobs, disables any notification of - * new results/output, flushes what is left and then discards any - * queued output. Anything consumer stuck in a wait on results to - * appear should stay stuck and will only wake up when new data is - * pushed through the queue. - * - * Returns 0 on success; - * -1 on failure - */ -int hts_tpool_process_reset(hts_tpool_process *q, int free_results) { - hts_tpool_job *j, *jn, *j_head; - hts_tpool_result *r, *rn, *r_head; - - pthread_mutex_lock(&q->p->pool_m); - // prevent next_result from returning data during our flush - q->next_serial = INT_MAX; - - // Remove any queued input not yet being acted upon - j_head = q->input_head; - q->input_head = q->input_tail = NULL; - q->n_input = 0; - - // Remove any queued output, thus ensuring we have room to flush. - r_head = q->output_head; - q->output_head = q->output_tail = NULL; - q->n_output = 0; - pthread_mutex_unlock(&q->p->pool_m); - - // Release memory. This can be done unlocked now the lists have been - // removed from the queue - for (j = j_head; j; j = jn) { - jn = j->next; - if (j->job_cleanup) j->job_cleanup(j->arg); - free(j); - } - - for (r = r_head; r; r = rn) { - rn = r->next; - if (r->result_cleanup) { - r->result_cleanup(r->data); - r->data = NULL; - } - hts_tpool_delete_result(r, free_results); - } - - // Wait for any jobs being processed to complete. - // (TODO: consider how to cancel any currently processing jobs. - // Probably this is too hard.) - if (hts_tpool_process_flush(q) != 0) - return -1; - - // Remove any new output. - pthread_mutex_lock(&q->p->pool_m); - r_head = q->output_head; - q->output_head = q->output_tail = NULL; - q->n_output = 0; - - // Finally reset the serial back to the starting point. - q->next_serial = q->curr_serial = 0; - pthread_cond_signal(&q->input_not_full_c); - pthread_mutex_unlock(&q->p->pool_m); - - // Discard unwanted output - for (r = r_head; r; r = rn) { - //fprintf(stderr, "Discard output %d\n", r->serial); - rn = r->next; - if (r->result_cleanup) { - r->result_cleanup(r->data); - r->data = NULL; - } - hts_tpool_delete_result(r, free_results); - } - - return 0; -} - -/* Returns the process queue size */ -int hts_tpool_process_qsize(hts_tpool_process *q) { - return q->qsize; -} - -/* - * Destroys a thread pool. The threads are joined into the main - * thread so they will finish their current work load. - */ -void hts_tpool_destroy(hts_tpool *p) { - int i; - - DBG_OUT(stderr, "Destroying pool %p\n", p); - - /* Send shutdown message to worker threads */ - pthread_mutex_lock(&p->pool_m); - p->shutdown = 1; - - DBG_OUT(stderr, "Sending shutdown request\n"); - - for (i = 0; i < p->tsize; i++) - pthread_cond_signal(&p->t[i].pending_c); - - pthread_mutex_unlock(&p->pool_m); - - DBG_OUT(stderr, "Shutdown complete\n"); - - for (i = 0; i < p->tsize; i++) - pthread_join(p->t[i].tid, NULL); - - pthread_mutex_destroy(&p->pool_m); - for (i = 0; i < p->tsize; i++) - pthread_cond_destroy(&p->t[i].pending_c); - - if (p->t_stack) - free(p->t_stack); - - free(p->t); - free(p); - - DBG_OUT(stderr, "Destroyed pool %p\n", p); -} - - -/* - * Destroys a thread pool without waiting on jobs to complete. - * Use hts_tpool_kill(p) to quickly exit after a fatal error. - */ -void hts_tpool_kill(hts_tpool *p) { - int i; - - DBG_OUT(stderr, "Destroying pool %p, kill=%d\n", p, kill); - - for (i = 0; i < p->tsize; i++) - pthread_kill(p->t[i].tid, SIGINT); - - pthread_mutex_destroy(&p->pool_m); - for (i = 0; i < p->tsize; i++) - pthread_cond_destroy(&p->t[i].pending_c); - - if (p->t_stack) - free(p->t_stack); - - free(p->t); - free(p); - - DBG_OUT(stderr, "Destroyed pool %p\n", p); -} - - -/*============================================================================= - * Test app. - * - * This can be considered both as a basic test and as a worked example for - * various usage patterns. - *============================================================================= - */ - -#ifdef TEST_MAIN - -#include - -#ifndef TASK_SIZE -#define TASK_SIZE 1000 -#endif - -/*----------------------------------------------------------------------------- - * Unordered x -> x*x test. - * Results arrive in order of completion. - */ -void *doit_square_u(void *arg) { - int job = *(int *)arg; - - usleep(random() % 100000); // to coerce job completion out of order - - printf("RESULT: %d\n", job*job); - - free(arg); - return NULL; -} - -int test_square_u(int n) { - hts_tpool *p = hts_tpool_init(n); - hts_tpool_process *q = hts_tpool_process_init(p, n*2, 1); - int i; - - // Dispatch jobs - for (i = 0; i < TASK_SIZE; i++) { - int *ip = malloc(sizeof(*ip)); - *ip = i; - hts_tpool_dispatch(p, q, doit_square_u, ip); - } - - hts_tpool_process_flush(q); - hts_tpool_process_destroy(q); - hts_tpool_destroy(p); - - return 0; -} - - -/*----------------------------------------------------------------------------- - * Ordered x -> x*x test. - * Results arrive in numerical order. - * - * This implementation uses a non-blocking dispatch to avoid dead-locks - * where one job takes too long to complete. - */ -void *doit_square(void *arg) { - int job = *(int *)arg; - int *res; - - // One excessively slow, to stress test output queue filling and - // excessive out of order scenarios. - usleep(500000 * ((job&31)==31) + random() % 10000); - - res = malloc(sizeof(*res)); - *res = (job<0) ? -job*job : job*job; - - free(arg); - return res; -} - -int test_square(int n) { - hts_tpool *p = hts_tpool_init(n); - hts_tpool_process *q = hts_tpool_process_init(p, n*2, 0); - int i; - hts_tpool_result *r; - - // Dispatch jobs - for (i = 0; i < TASK_SIZE; i++) { - int *ip = malloc(sizeof(*ip)); - *ip = i; - int blk; - - do { - // In the situation where some jobs take much longer than - // others, we could end up blocking here as we haven't got - // any room in the output queue to place it. (We don't launch a - // job if the output queue is full.) - - // This happens when the next serial number to fetch is, eg, 50 - // but jobs 51-100 have all executed really fast and appeared in - // the output queue before 50. A dispatch & check-results - // alternating loop can fail to find job 50 many times over until - // eventually the dispatch blocks before it arrives. - - // Our solution is to dispatch in non-blocking mode so we are - // always to either dispatch or consume a result. - blk = hts_tpool_dispatch2(p, q, doit_square, ip, 1); - - // Check for results. - if ((r = hts_tpool_next_result(q))) { - printf("RESULT: %d\n", *(int *)hts_tpool_result_data(r)); - hts_tpool_delete_result(r, 1); - } - if (blk == -1) { - // The alternative is a separate thread for dispatching and/or - // consumption of results. See test_squareB. - putchar('.'); fflush(stdout); - usleep(10000); - } - } while (blk == -1); - } - - // Wait for any input-queued up jobs or in-progress jobs to complete. - hts_tpool_process_flush(q); - - while ((r = hts_tpool_next_result(q))) { - printf("RESULT: %d\n", *(int *)hts_tpool_result_data(r)); - hts_tpool_delete_result(r, 1); - } - - hts_tpool_process_destroy(q); - hts_tpool_destroy(p); - - return 0; -} - -/*----------------------------------------------------------------------------- - * Ordered x -> x*x test. - * Results arrive in numerical order. - * - * This implementation uses separate dispatching threads and job consumption - * threads (main thread). This means it can use a blocking calls for - * simplicity elsewhere. - */ -struct squareB_opt { - hts_tpool *p; - hts_tpool_process *q; - int n; -}; -static void *test_squareB_dispatcher(void *arg) { - struct squareB_opt *o = (struct squareB_opt *)arg; - int i, *ip; - - for (i = 0; i < o->n; i++) { - ip = malloc(sizeof(*ip)); - *ip = i; - - hts_tpool_dispatch(o->p, o->q, doit_square, ip); - } - - // Dispatch an sentinel job to mark the end - *(ip = malloc(sizeof(*ip))) = -1; - hts_tpool_dispatch(o->p, o->q, doit_square, ip); - pthread_exit(NULL); -} - -int test_squareB(int n) { - hts_tpool *p = hts_tpool_init(n); - hts_tpool_process *q = hts_tpool_process_init(p, n*2, 0); - struct squareB_opt o = {p, q, TASK_SIZE}; - pthread_t tid; - - // Launch our job creation thread. - pthread_create(&tid, NULL, test_squareB_dispatcher, &o); - - // Consume all results until we find the end-of-job marker. - for(;;) { - hts_tpool_result *r = hts_tpool_next_result_wait(q); - int x = *(int *)hts_tpool_result_data(r); - hts_tpool_delete_result(r, 1); - if (x == -1) - break; - printf("RESULT: %d\n", x); - } - - // Wait for any input-queued up jobs or in-progress jobs to complete. - // This should do nothing as we've been executing until the termination - // marker of -1. - hts_tpool_process_flush(q); - assert(hts_tpool_next_result(q) == NULL); - - hts_tpool_process_destroy(q); - hts_tpool_destroy(p); - pthread_join(tid, NULL); - - return 0; -} - - -/*----------------------------------------------------------------------------- - * A simple pipeline test. - * We use a dedicated input thread that does the initial generation of job - * and dispatch, several execution steps running in a shared pool, and a - * dedicated output thread that prints up the final result. It's key that our - * pipeline execution stages can run independently and don't themselves have - * any waits. To achieve this we therefore also use some dedicated threads - * that take the output from one queue and resubmits the job as the input to - * the next queue. - * - * More generally this could perhaps be a single pipeline thread that - * marshalls multiple queues and their interactions, but this is simply a - * demonstration of a single pipeline. - * - * Our process fills out the bottom byte of a 32-bit int and then shifts it - * left one byte at a time. Only the final stage needs to be ordered. Each - * stage uses its own queue. - * - * Possible improvement: we only need the last stage to be ordered. By - * allocating our own serial numbers for the first job and manually setting - * these serials in the last job, perhaps we can permit out of order execution - * of all the in-between stages. (I doubt it'll affect speed much though.) - */ - -static void *pipe_input_thread(void *arg); -static void *pipe_stage1(void *arg); -static void *pipe_stage2(void *arg); -static void *pipe_stage3(void *arg); -static void *pipe_output_thread(void *arg); - -typedef struct { - hts_tpool *p; - hts_tpool_process *q1; - hts_tpool_process *q2; - hts_tpool_process *q3; - int n; -} pipe_opt; - -typedef struct { - pipe_opt *o; - unsigned int x; - int eof; // set with last job. -} pipe_job; - -static void *pipe_input_thread(void *arg) { - pipe_opt *o = (pipe_opt *)arg; - - int i; - for (i = 1; i <= o->n; i++) { - pipe_job *j = malloc(sizeof(*j)); - j->o = o; - j->x = i; - j->eof = (i == o->n); - - printf("I %08x\n", j->x); - - if (hts_tpool_dispatch(o->p, o->q1, pipe_stage1, j) != 0) { - free(j); - pthread_exit((void *)1); - } - } - - pthread_exit(NULL); -} - -static void *pipe_stage1(void *arg) { - pipe_job *j = (pipe_job *)arg; - - j->x <<= 8; - usleep(random() % 10000); // fast job - printf("1 %08x\n", j->x); - - return j; -} - -static void *pipe_stage1to2(void *arg) { - pipe_opt *o = (pipe_opt *)arg; - hts_tpool_result *r; - - while ((r = hts_tpool_next_result_wait(o->q1))) { - pipe_job *j = (pipe_job *)hts_tpool_result_data(r); - hts_tpool_delete_result(r, 0); - if (hts_tpool_dispatch(j->o->p, j->o->q2, pipe_stage2, j) != 0) - pthread_exit((void *)1); - if (j->eof) - break; - } - - pthread_exit(NULL); -} - -static void *pipe_stage2(void *arg) { - pipe_job *j = (pipe_job *)arg; - - j->x <<= 8; - usleep(random() % 100000); // slow job - printf("2 %08x\n", j->x); - - return j; -} - -static void *pipe_stage2to3(void *arg) { - pipe_opt *o = (pipe_opt *)arg; - hts_tpool_result *r; - - while ((r = hts_tpool_next_result_wait(o->q2))) { - pipe_job *j = (pipe_job *)hts_tpool_result_data(r); - hts_tpool_delete_result(r, 0); - if (hts_tpool_dispatch(j->o->p, j->o->q3, pipe_stage3, j) != 0) - pthread_exit((void *)1); - if (j->eof) - break; - } - - pthread_exit(NULL); -} - -static void *pipe_stage3(void *arg) { - pipe_job *j = (pipe_job *)arg; - - usleep(random() % 10000); // fast job - j->x <<= 8; - return j; -} - -static void *pipe_output_thread(void *arg) { - pipe_opt *o = (pipe_opt *)arg; - hts_tpool_result *r; - - while ((r = hts_tpool_next_result_wait(o->q3))) { - pipe_job *j = (pipe_job *)hts_tpool_result_data(r); - int eof = j->eof; - printf("O %08x\n", j->x); - hts_tpool_delete_result(r, 1); - if (eof) - break; - } - - pthread_exit(NULL); -} - -int test_pipe(int n) { - hts_tpool *p = hts_tpool_init(n); - hts_tpool_process *q1 = hts_tpool_process_init(p, n*2, 0); - hts_tpool_process *q2 = hts_tpool_process_init(p, n*2, 0); - hts_tpool_process *q3 = hts_tpool_process_init(p, n*2, 0); - pipe_opt o = {p, q1, q2, q3, TASK_SIZE}; - pthread_t tidIto1, tid1to2, tid2to3, tid3toO; - void *retv; - int ret; - - // Launch our data source and sink threads. - pthread_create(&tidIto1, NULL, pipe_input_thread, &o); - pthread_create(&tid1to2, NULL, pipe_stage1to2, &o); - pthread_create(&tid2to3, NULL, pipe_stage2to3, &o); - pthread_create(&tid3toO, NULL, pipe_output_thread, &o); - - // Wait for tasks to finish. - ret = 0; - pthread_join(tidIto1, &retv); ret |= (retv != NULL); - pthread_join(tid1to2, &retv); ret |= (retv != NULL); - pthread_join(tid2to3, &retv); ret |= (retv != NULL); - pthread_join(tid3toO, &retv); ret |= (retv != NULL); - printf("Return value %d\n", ret); - - hts_tpool_process_destroy(q1); - hts_tpool_process_destroy(q2); - hts_tpool_process_destroy(q3); - hts_tpool_destroy(p); - - return 0; -} - -/*-----------------------------------------------------------------------------*/ -int main(int argc, char **argv) { - int n; - srandom(0); - - if (argc < 3) { - fprintf(stderr, "Usage: %s command n_threads\n", argv[0]); - fprintf(stderr, "Where commands are:\n\n"); - fprintf(stderr, "unordered # Unordered output\n"); - fprintf(stderr, "ordered1 # Main thread with non-block API\n"); - fprintf(stderr, "ordered2 # Dispatch thread, blocking API\n"); - fprintf(stderr, "pipe # Multi-stage pipeline, several queues\n"); - exit(1); - } - - n = atoi(argv[2]); - if (strcmp(argv[1], "unordered") == 0) return test_square_u(n); - if (strcmp(argv[1], "ordered1") == 0) return test_square(n); - if (strcmp(argv[1], "ordered2") == 0) return test_squareB(n); - if (strcmp(argv[1], "pipe") == 0) return test_pipe(n); - - fprintf(stderr, "Unknown sub-command\n"); - exit(1); -} -#endif diff --git a/src/htslib-1.19.1/thread_pool_internal.h b/src/htslib-1.19.1/thread_pool_internal.h deleted file mode 100644 index c560614..0000000 --- a/src/htslib-1.19.1/thread_pool_internal.h +++ /dev/null @@ -1,169 +0,0 @@ -/* thread_pool_internal.h -- Internal API for the thread pool. - - Copyright (c) 2013-2016 Genome Research Ltd. - - Author: James Bonfield - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -/* - * This file implements a thread pool for multi-threading applications. - * It consists of two distinct interfaces: thread pools an thread job queues. - * - * The pool of threads is given a function pointer and void* data to pass in. - * This means the pool can run jobs of multiple types, albeit first come - * first served with no job scheduling except to pick tasks from - * queues that have room to store the result. - * - * Upon completion, the return value from the function pointer is - * added to back to the queue if the result is required. We may have - * multiple queues in use for the one pool. - * - * To see example usage, please look at the #ifdef TEST_MAIN code in - * thread_pool.c. - */ - -#ifndef THREAD_POOL_INTERNAL_H -#define THREAD_POOL_INTERNAL_H - -#include -#include -#include "htslib/thread_pool.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * An input job, before execution. - */ -typedef struct hts_tpool_job { - void *(*func)(void *arg); - void *arg; - void (*job_cleanup)(void *arg); - void (*result_cleanup)(void *data); - struct hts_tpool_job *next; - - struct hts_tpool *p; - struct hts_tpool_process *q; - uint64_t serial; -} hts_tpool_job; - -/* - * An output, after job has executed. - */ -struct hts_tpool_result { - struct hts_tpool_result *next; - void (*result_cleanup)(void *data); - uint64_t serial; // sequential number for ordering - void *data; // result itself -}; - -/* - * A per-thread worker struct. - */ -typedef struct { - struct hts_tpool *p; - int idx; - pthread_t tid; - pthread_cond_t pending_c; // when waiting for a job -} hts_tpool_worker; - -/* - * An IO queue consists of a queue of jobs to execute - * (the "input" side) and a queue of job results post- - * execution (the "output" side). - * - * We have size limits to prevent either queue from - * growing too large and serial numbers to ensure - * sequential consumption of the output. - * - * The thread pool may have many hetergeneous tasks, each - * using its own io_queue mixed into the same thread pool. - */ -struct hts_tpool_process { - struct hts_tpool *p; // thread pool - hts_tpool_job *input_head; // input list - hts_tpool_job *input_tail; - hts_tpool_result *output_head; // output list - hts_tpool_result *output_tail; - int qsize; // max size of i/o queues - uint64_t next_serial; // next serial for output - uint64_t curr_serial; // current serial (next input) - - int no_more_input; // disable dispatching of more jobs - int n_input; // no. items in input queue; was njobs - int n_output; // no. items in output queue - int n_processing; // no. items being processed (executing) - - int shutdown; // true if pool is being destroyed - int in_only; // if true, don't queue result up. - int wake_dispatch; // unblocks waiting dispatchers - - int ref_count; // used to track safe destruction - - pthread_cond_t output_avail_c; // Signalled on each new output - pthread_cond_t input_not_full_c; // Input queue is no longer full - pthread_cond_t input_empty_c; // Input queue has become empty - pthread_cond_t none_processing_c;// n_processing has hit zero - - struct hts_tpool_process *next, *prev;// to form circular linked list. -}; - -/* - * The single pool structure itself. - * - * This knows nothing about the nature of the jobs or where their - * output is going, but it maintains a list of queues associated with - * this pool from which the jobs are taken. - */ -struct hts_tpool { - int nwaiting; // how many workers waiting for new jobs - int njobs; // how many total jobs are waiting in all queues - int shutdown; // true if pool is being destroyed - - // I/O queues to check for jobs in and to put results. - // Forms a circular linked list. (q_head may be amended - // to point to the most recently updated.) - hts_tpool_process *q_head; - - // threads - int tsize; // maximum number of jobs - hts_tpool_worker *t; - // array of worker IDs free - int *t_stack, t_stack_top; - - // A single mutex used when updating this and any associated structure. - pthread_mutex_t pool_m; - - // Tracking of average number of running jobs. - // This can be used to dampen any hysteresis caused by bursty - // input availability. - int n_count, n_running; - - // Debugging to check wait time. - // FIXME: should we just delete these and cull the associated code? - long long total_time, wait_time; -}; - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/htslib-1.19.1/vcf.5 b/src/htslib-1.19.1/vcf.5 deleted file mode 100644 index 35d60c1..0000000 --- a/src/htslib-1.19.1/vcf.5 +++ /dev/null @@ -1,120 +0,0 @@ -'\" t -.TH vcf 5 "August 2013" "htslib" "Bioinformatics formats" -.SH NAME -vcf \- Variant Call Format -.\" -.\" Copyright (C) 2011 Broad Institute. -.\" Copyright (C) 2013-2014 Genome Research Ltd. -.\" -.\" Author: Heng Li -.\" -.\" Permission is hereby granted, free of charge, to any person obtaining a -.\" copy of this software and associated documentation files (the "Software"), -.\" to deal in the Software without restriction, including without limitation -.\" the rights to use, copy, modify, merge, publish, distribute, sublicense, -.\" and/or sell copies of the Software, and to permit persons to whom the -.\" Software is furnished to do so, subject to the following conditions: -.\" -.\" The above copyright notice and this permission notice shall be included in -.\" all copies or substantial portions of the Software. -.\" -.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -.\" IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -.\" FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -.\" THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -.\" LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -.\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -.\" DEALINGS IN THE SOFTWARE. -.\" -.SH DESCRIPTION -The Variant Call Format (VCF) is a TAB-delimited format with each data line -consisting of the following fields: -.TS -nlbl. -1 CHROM CHROMosome name -2 POS the left-most POSition of the variant -3 ID unique variant IDentifier -4 REF the REFerence allele -5 ALT the ALTernate allele(s) (comma-separated) -6 QUAL variant/reference QUALity -7 FILTER FILTERs applied -8 INFO INFOrmation related to the variant (semicolon-separated) -9 FORMAT FORMAT of the genotype fields (optional; colon-separated) -10+ SAMPLE SAMPLE genotypes and per-sample information (optional) -.TE -.P -The following table gives the \fBINFO\fP tags used by samtools and bcftools. -.TP -.B AF1 -Max-likelihood estimate of the site allele frequency (AF) of the first ALT allele -(double) -.TP -.B DP -Raw read depth (without quality filtering) -(int) -.TP -.B DP4 -# high-quality reference forward bases, ref reverse, alternate for and alt rev bases -(int[4]) -.TP -.B FQ -Consensus quality. Positive: sample genotypes different; negative: otherwise -(int) -.TP -.B MQ -Root-Mean-Square mapping quality of covering reads -(int) -.TP -.B PC2 -Phred probability of AF in group1 samples being larger (,smaller) than in group2 -(int[2]) -.TP -.B PCHI2 -Posterior weighted chi^2 P-value between group1 and group2 samples -(double) -.TP -.B PV4 -P-value for strand bias, baseQ bias, mapQ bias and tail distance bias -(double[4]) -.TP -.B QCHI2 -Phred-scaled PCHI2 -(int) -.TP -.B RP -# permutations yielding a smaller PCHI2 -(int) -.TP -.B CLR -Phred log ratio of genotype likelihoods with and without the trio/pair constraint -(int) -.TP -.B UGT -Most probable genotype configuration without the trio constraint -(string) -.TP -.B CGT -Most probable configuration with the trio constraint -(string) -.TP -.B VDB -Tests variant positions within reads. Intended for filtering RNA-seq artifacts around splice sites -(float) -.TP -.B RPB -Mann-Whitney rank-sum test for tail distance bias -(float) -.TP -.B HWE -Hardy-Weinberg equilibrium test (Wigginton et al) -(float) -.P -.SH SEE ALSO -.TP -https://github.com/samtools/hts-specs -The full VCF/BCF file format specification -.TP -.I A note on exact tests of Hardy-Weinberg equilibrium -Wigginton JE et al -PMID:15789306 -.\" (http://www.ncbi.nlm.nih.gov/pubmed/15789306) diff --git a/src/htslib-1.19.1/vcf.c b/src/htslib-1.19.1/vcf.c deleted file mode 100644 index 0e8ca3f..0000000 --- a/src/htslib-1.19.1/vcf.c +++ /dev/null @@ -1,5923 +0,0 @@ -/* vcf.c -- VCF/BCF API functions. - - Copyright (C) 2012, 2013 Broad Institute. - Copyright (C) 2012-2023 Genome Research Ltd. - Portions copyright (C) 2014 Intel Corporation. - - Author: Heng Li - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -#include "fuzz_settings.h" -#endif - -#include "htslib/vcf.h" -#include "htslib/bgzf.h" -#include "htslib/tbx.h" -#include "htslib/hfile.h" -#include "hts_internal.h" -#include "htslib/hts_endian.h" -#include "htslib/khash_str2int.h" -#include "htslib/kstring.h" -#include "htslib/sam.h" -#include "htslib/khash.h" - -#if 0 -// This helps on Intel a bit, often 6-7% faster VCF parsing. -// Conversely sometimes harms AMD Zen4 as ~9% slower. -// Possibly related to IPC differences. However for now it's just a -// curiousity we ignore and stick with the simpler code. -// -// Left here as a hint for future explorers. -static inline int xstreq(const char *a, const char *b) { - while (*a && *a == *b) - a++, b++; - return *a == *b; -} - -#define KHASH_MAP_INIT_XSTR(name, khval_t) \ - KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, xstreq) - -KHASH_MAP_INIT_XSTR(vdict, bcf_idinfo_t) -#else -KHASH_MAP_INIT_STR(vdict, bcf_idinfo_t) -#endif - -typedef khash_t(vdict) vdict_t; - -KHASH_MAP_INIT_STR(hdict, bcf_hrec_t*) -typedef khash_t(hdict) hdict_t; - - -#include "htslib/kseq.h" -HTSLIB_EXPORT -uint32_t bcf_float_missing = 0x7F800001; - -HTSLIB_EXPORT -uint32_t bcf_float_vector_end = 0x7F800002; - -HTSLIB_EXPORT -uint8_t bcf_type_shift[] = { 0, 0, 1, 2, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - -static bcf_idinfo_t bcf_idinfo_def = { .info = { 15, 15, 15 }, .hrec = { NULL, NULL, NULL}, .id = -1 }; - -/* - Partial support for 64-bit POS and Number=1 INFO tags. - Notes: - - the support for 64-bit values is motivated by POS and INFO/END for large genomes - - the use of 64-bit values does not conform to the specification - - cannot output 64-bit BCF and if it does, it is not compatible with anything - - experimental, use at your risk -*/ -#ifdef VCF_ALLOW_INT64 - #define BCF_MAX_BT_INT64 (0x7fffffffffffffff) /* INT64_MAX, for internal use only */ - #define BCF_MIN_BT_INT64 -9223372036854775800LL /* INT64_MIN + 8, for internal use only */ -#endif - -#define BCF_IS_64BIT (1<<30) - - -// Opaque structure with auxilary data which allows to extend bcf_hdr_t without breaking ABI. -// Note that this preserving API and ABI requires that the first element is vdict_t struct -// rather than a pointer, as user programs may (and in some cases do) access the dictionary -// directly as (vdict_t*)hdr->dict. -typedef struct -{ - vdict_t dict; // bcf_hdr_t.dict[0] vdict_t dictionary which keeps bcf_idinfo_t for BCF_HL_FLT,BCF_HL_INFO,BCF_HL_FMT - hdict_t *gen; // hdict_t dictionary which keeps bcf_hrec_t* pointers for generic and structured fields - size_t *key_len;// length of h->id[BCF_DT_ID] strings -} -bcf_hdr_aux_t; - -static inline bcf_hdr_aux_t *get_hdr_aux(const bcf_hdr_t *hdr) -{ - return (bcf_hdr_aux_t *)hdr->dict[0]; -} - -static char *find_chrom_header_line(char *s) -{ - char *nl; - if (strncmp(s, "#CHROM\t", 7) == 0) return s; - else if ((nl = strstr(s, "\n#CHROM\t")) != NULL) return nl+1; - else return NULL; -} - -/************************* - *** VCF header parser *** - *************************/ - -static int bcf_hdr_add_sample_len(bcf_hdr_t *h, const char *s, size_t len) -{ - const char *ss = s; - while ( *ss && isspace_c(*ss) && ss - s < len) ss++; - if ( !*ss || ss - s == len) - { - hts_log_error("Empty sample name: trailing spaces/tabs in the header line?"); - return -1; - } - - vdict_t *d = (vdict_t*)h->dict[BCF_DT_SAMPLE]; - int ret; - char *sdup = malloc(len + 1); - if (!sdup) return -1; - memcpy(sdup, s, len); - sdup[len] = 0; - - // Ensure space is available in h->samples - size_t n = kh_size(d); - char **new_samples = realloc(h->samples, sizeof(char*) * (n + 1)); - if (!new_samples) { - free(sdup); - return -1; - } - h->samples = new_samples; - - int k = kh_put(vdict, d, sdup, &ret); - if (ret < 0) { - free(sdup); - return -1; - } - if (ret) { // absent - kh_val(d, k) = bcf_idinfo_def; - kh_val(d, k).id = n; - } else { - hts_log_error("Duplicated sample name '%s'", sdup); - free(sdup); - return -1; - } - h->samples[n] = sdup; - h->dirty = 1; - return 0; -} - -int bcf_hdr_add_sample(bcf_hdr_t *h, const char *s) -{ - if (!s) { - // Allowed for backwards-compatibility, calling with s == NULL - // used to trigger bcf_hdr_sync(h); - return 0; - } - return bcf_hdr_add_sample_len(h, s, strlen(s)); -} - -int HTS_RESULT_USED bcf_hdr_parse_sample_line(bcf_hdr_t *hdr, const char *str) -{ - const char *mandatory = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"; - if ( strncmp(str,mandatory,strlen(mandatory)) ) - { - hts_log_error("Could not parse the \"#CHROM..\" line, either the fields are incorrect or spaces are present instead of tabs:\n\t%s",str); - return -1; - } - - const char *beg = str + strlen(mandatory), *end; - if ( !*beg || *beg=='\n' ) return 0; - if ( strncmp(beg,"\tFORMAT\t",8) ) - { - hts_log_error("Could not parse the \"#CHROM..\" line, either FORMAT is missing or spaces are present instead of tabs:\n\t%s",str); - return -1; - } - beg += 8; - - int ret = 0; - while ( *beg ) - { - end = beg; - while ( *end && *end!='\t' && *end!='\n' ) end++; - if ( bcf_hdr_add_sample_len(hdr, beg, end-beg) < 0 ) ret = -1; - if ( !*end || *end=='\n' || ret<0 ) break; - beg = end + 1; - } - return ret; -} - -int bcf_hdr_sync(bcf_hdr_t *h) -{ - int i; - for (i = 0; i < 3; i++) - { - vdict_t *d = (vdict_t*)h->dict[i]; - khint_t k; - if ( h->n[i] < kh_size(d) ) - { - bcf_idpair_t *new_idpair; - // this should be true only for i=2, BCF_DT_SAMPLE - new_idpair = (bcf_idpair_t*) realloc(h->id[i], kh_size(d)*sizeof(bcf_idpair_t)); - if (!new_idpair) return -1; - h->n[i] = kh_size(d); - h->id[i] = new_idpair; - } - for (k=kh_begin(d); kid[i][kh_val(d,k).id].key = kh_key(d,k); - h->id[i][kh_val(d,k).id].val = &kh_val(d,k); - } - } - - // Invalidate key length cache - bcf_hdr_aux_t *aux = get_hdr_aux(h); - if (aux && aux->key_len) { - free(aux->key_len); - aux->key_len = NULL; - } - - h->dirty = 0; - return 0; -} - -void bcf_hrec_destroy(bcf_hrec_t *hrec) -{ - if (!hrec) return; - free(hrec->key); - if ( hrec->value ) free(hrec->value); - int i; - for (i=0; inkeys; i++) - { - free(hrec->keys[i]); - free(hrec->vals[i]); - } - free(hrec->keys); - free(hrec->vals); - free(hrec); -} - -// Copies all fields except IDX. -bcf_hrec_t *bcf_hrec_dup(bcf_hrec_t *hrec) -{ - int save_errno; - bcf_hrec_t *out = (bcf_hrec_t*) calloc(1,sizeof(bcf_hrec_t)); - if (!out) return NULL; - - out->type = hrec->type; - if ( hrec->key ) { - out->key = strdup(hrec->key); - if (!out->key) goto fail; - } - if ( hrec->value ) { - out->value = strdup(hrec->value); - if (!out->value) goto fail; - } - out->nkeys = hrec->nkeys; - out->keys = (char**) malloc(sizeof(char*)*hrec->nkeys); - if (!out->keys) goto fail; - out->vals = (char**) malloc(sizeof(char*)*hrec->nkeys); - if (!out->vals) goto fail; - int i, j = 0; - for (i=0; inkeys; i++) - { - if ( hrec->keys[i] && !strcmp("IDX",hrec->keys[i]) ) continue; - if ( hrec->keys[i] ) { - out->keys[j] = strdup(hrec->keys[i]); - if (!out->keys[j]) goto fail; - } - if ( hrec->vals[i] ) { - out->vals[j] = strdup(hrec->vals[i]); - if (!out->vals[j]) goto fail; - } - j++; - } - if ( i!=j ) out->nkeys -= i-j; // IDX was omitted - return out; - - fail: - save_errno = errno; - hts_log_error("%s", strerror(errno)); - bcf_hrec_destroy(out); - errno = save_errno; - return NULL; -} - -void bcf_hrec_debug(FILE *fp, bcf_hrec_t *hrec) -{ - fprintf(fp, "key=[%s] value=[%s]", hrec->key, hrec->value?hrec->value:""); - int i; - for (i=0; inkeys; i++) - fprintf(fp, "\t[%s]=[%s]", hrec->keys[i],hrec->vals[i]); - fprintf(fp, "\n"); -} - -void bcf_header_debug(bcf_hdr_t *hdr) -{ - int i, j; - for (i=0; inhrec; i++) - { - if ( !hdr->hrec[i]->value ) - { - fprintf(stderr, "##%s=<", hdr->hrec[i]->key); - fprintf(stderr,"%s=%s", hdr->hrec[i]->keys[0], hdr->hrec[i]->vals[0]); - for (j=1; jhrec[i]->nkeys; j++) - fprintf(stderr,",%s=%s", hdr->hrec[i]->keys[j], hdr->hrec[i]->vals[j]); - fprintf(stderr,">\n"); - } - else - fprintf(stderr,"##%s=%s\n", hdr->hrec[i]->key,hdr->hrec[i]->value); - } -} - -int bcf_hrec_add_key(bcf_hrec_t *hrec, const char *str, size_t len) -{ - char **tmp; - size_t n = hrec->nkeys + 1; - assert(len > 0 && len < SIZE_MAX); - tmp = realloc(hrec->keys, sizeof(char*)*n); - if (!tmp) return -1; - hrec->keys = tmp; - tmp = realloc(hrec->vals, sizeof(char*)*n); - if (!tmp) return -1; - hrec->vals = tmp; - - hrec->keys[hrec->nkeys] = (char*) malloc((len+1)*sizeof(char)); - if (!hrec->keys[hrec->nkeys]) return -1; - memcpy(hrec->keys[hrec->nkeys],str,len); - hrec->keys[hrec->nkeys][len] = 0; - hrec->vals[hrec->nkeys] = NULL; - hrec->nkeys = n; - return 0; -} - -int bcf_hrec_set_val(bcf_hrec_t *hrec, int i, const char *str, size_t len, int is_quoted) -{ - if ( hrec->vals[i] ) { - free(hrec->vals[i]); - hrec->vals[i] = NULL; - } - if ( !str ) return 0; - if ( is_quoted ) - { - if (len >= SIZE_MAX - 3) { - errno = ENOMEM; - return -1; - } - hrec->vals[i] = (char*) malloc((len+3)*sizeof(char)); - if (!hrec->vals[i]) return -1; - hrec->vals[i][0] = '"'; - memcpy(&hrec->vals[i][1],str,len); - hrec->vals[i][len+1] = '"'; - hrec->vals[i][len+2] = 0; - } - else - { - if (len == SIZE_MAX) { - errno = ENOMEM; - return -1; - } - hrec->vals[i] = (char*) malloc((len+1)*sizeof(char)); - if (!hrec->vals[i]) return -1; - memcpy(hrec->vals[i],str,len); - hrec->vals[i][len] = 0; - } - return 0; -} - -int hrec_add_idx(bcf_hrec_t *hrec, int idx) -{ - int n = hrec->nkeys + 1; - char **tmp = (char**) realloc(hrec->keys, sizeof(char*)*n); - if (!tmp) return -1; - hrec->keys = tmp; - - tmp = (char**) realloc(hrec->vals, sizeof(char*)*n); - if (!tmp) return -1; - hrec->vals = tmp; - - hrec->keys[hrec->nkeys] = strdup("IDX"); - if (!hrec->keys[hrec->nkeys]) return -1; - - kstring_t str = {0,0,0}; - if (kputw(idx, &str) < 0) { - free(hrec->keys[hrec->nkeys]); - return -1; - } - hrec->vals[hrec->nkeys] = str.s; - hrec->nkeys = n; - return 0; -} - -int bcf_hrec_find_key(bcf_hrec_t *hrec, const char *key) -{ - int i; - for (i=0; inkeys; i++) - if ( !strcasecmp(key,hrec->keys[i]) ) return i; - return -1; -} - -static void bcf_hrec_set_type(bcf_hrec_t *hrec) -{ - if ( !strcmp(hrec->key, "contig") ) hrec->type = BCF_HL_CTG; - else if ( !strcmp(hrec->key, "INFO") ) hrec->type = BCF_HL_INFO; - else if ( !strcmp(hrec->key, "FILTER") ) hrec->type = BCF_HL_FLT; - else if ( !strcmp(hrec->key, "FORMAT") ) hrec->type = BCF_HL_FMT; - else if ( hrec->nkeys>0 ) hrec->type = BCF_HL_STR; - else hrec->type = BCF_HL_GEN; -} - - -/** - The arrays were generated with - - valid_ctg: - perl -le '@v = (split(//,q[!#$%&*+./:;=?@^_|~-]),"a"..."z","A"..."Z","0"..."9"); @a = (0) x 256; foreach $c (@v) { $a[ord($c)] = 1; } print join(", ",@a)' | fold -w 48 - - valid_tag: - perl -le '@v = (split(//,q[_.]),"a"..."z","A"..."Z","0"..."9"); @a = (0) x 256; foreach $c (@v) { $a[ord($c)] = 1; } print join(", ",@a)' | fold -w 48 -*/ -static const uint8_t valid_ctg[256] = -{ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; -static const uint8_t valid_tag[256] = -{ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -/** - bcf_hrec_check() - check the validity of structured header lines - - Returns 0 on success or negative value on error. - - Currently the return status is not checked by the caller - and only a warning is printed on stderr. This should be improved - to propagate the error all the way up to the caller and let it - decide what to do: throw an error or proceed anyway. - */ -static int bcf_hrec_check(bcf_hrec_t *hrec) -{ - int i; - bcf_hrec_set_type(hrec); - - if ( hrec->type==BCF_HL_CTG ) - { - i = bcf_hrec_find_key(hrec,"ID"); - if ( i<0 ) goto err_missing_id; - char *val = hrec->vals[i]; - if ( val[0]=='*' || val[0]=='=' || !valid_ctg[(uint8_t)val[0]] ) goto err_invalid_ctg; - while ( *(++val) ) - if ( !valid_ctg[(uint8_t)*val] ) goto err_invalid_ctg; - return 0; - } - if ( hrec->type==BCF_HL_INFO ) - { - i = bcf_hrec_find_key(hrec,"ID"); - if ( i<0 ) goto err_missing_id; - char *val = hrec->vals[i]; - if ( !strcmp(val,"1000G") ) return 0; - if ( val[0]=='.' || (val[0]>='0' && val[0]<='9') || !valid_tag[(uint8_t)val[0]] ) goto err_invalid_tag; - while ( *(++val) ) - if ( !valid_tag[(uint8_t)*val] ) goto err_invalid_tag; - return 0; - } - if ( hrec->type==BCF_HL_FMT ) - { - i = bcf_hrec_find_key(hrec,"ID"); - if ( i<0 ) goto err_missing_id; - char *val = hrec->vals[i]; - if ( val[0]=='.' || (val[0]>='0' && val[0]<='9') || !valid_tag[(uint8_t)val[0]] ) goto err_invalid_tag; - while ( *(++val) ) - if ( !valid_tag[(uint8_t)*val] ) goto err_invalid_tag; - return 0; - } - return 0; - - err_missing_id: - hts_log_warning("Missing ID attribute in one or more header lines"); - return -1; - - err_invalid_ctg: - hts_log_warning("Invalid contig name: \"%s\"", hrec->vals[i]); - return -1; - - err_invalid_tag: - hts_log_warning("Invalid tag name: \"%s\"", hrec->vals[i]); - return -1; -} - -static inline int is_escaped(const char *min, const char *str) -{ - int n = 0; - while ( --str>=min && *str=='\\' ) n++; - return n%2; -} - -bcf_hrec_t *bcf_hdr_parse_line(const bcf_hdr_t *h, const char *line, int *len) -{ - bcf_hrec_t *hrec = NULL; - const char *p = line; - if (p[0] != '#' || p[1] != '#') { *len = 0; return NULL; } - p += 2; - - const char *q = p; - while ( *q && *q!='=' && *q != '\n' ) q++; - ptrdiff_t n = q-p; - if ( *q!='=' || !n ) // wrong format - goto malformed_line; - - hrec = (bcf_hrec_t*) calloc(1,sizeof(bcf_hrec_t)); - if (!hrec) { *len = -1; return NULL; } - hrec->key = (char*) malloc(sizeof(char)*(n+1)); - if (!hrec->key) goto fail; - memcpy(hrec->key,p,n); - hrec->key[n] = 0; - hrec->type = -1; - - p = ++q; - if ( *p!='<' ) // generic field, e.g. ##samtoolsVersion=0.1.18-r579 - { - while ( *q && *q!='\n' ) q++; - hrec->value = (char*) malloc((q-p+1)*sizeof(char)); - if (!hrec->value) goto fail; - memcpy(hrec->value, p, q-p); - hrec->value[q-p] = 0; - *len = q - line + (*q ? 1 : 0); // Skip \n but not \0 - return hrec; - } - - // structured line, e.g. - // ##INFO= - // ##PEDIGREE= - int nopen = 1; - while ( *q && *q!='\n' && nopen>0 ) - { - p = ++q; - while ( *q && *q==' ' ) { p++; q++; } - // ^[A-Za-z_][0-9A-Za-z_.]*$ - if (p==q && *q && (isalpha_c(*q) || *q=='_')) - { - q++; - while ( *q && (isalnum_c(*q) || *q=='_' || *q=='.') ) q++; - } - n = q-p; - int m = 0; - while ( *q && *q==' ' ) { q++; m++; } - if ( *q!='=' || !n ) - goto malformed_line; - - if (bcf_hrec_add_key(hrec, p, q-p-m) < 0) goto fail; - p = ++q; - while ( *q && *q==' ' ) { p++; q++; } - - int quoted = 0; - char ending = '\0'; - switch (*p) { - case '"': - quoted = 1; - ending = '"'; - p++; - break; - case '[': - quoted = 1; - ending = ']'; - break; - } - if ( quoted ) q++; - while ( *q && *q != '\n' ) - { - if ( quoted ) { if ( *q==ending && !is_escaped(p,q) ) break; } - else - { - if ( *q=='<' ) nopen++; - if ( *q=='>' ) nopen--; - if ( !nopen ) break; - if ( *q==',' && nopen==1 ) break; - } - q++; - } - const char *r = q; - if (quoted && ending == ']') { - if (*q == ending) { - r++; - q++; - quoted = 0; - } else { - char buffer[320]; - hts_log_error("Missing ']' in header line %s", - hts_strprint(buffer, sizeof(buffer), '"', - line, q-line)); - goto fail; - } - } - while ( r > p && r[-1] == ' ' ) r--; - if (bcf_hrec_set_val(hrec, hrec->nkeys-1, p, r-p, quoted) < 0) - goto fail; - if ( quoted && *q==ending ) q++; - if ( *q=='>' ) - { - if (nopen) nopen--; // this can happen with nested angle brackets <> - q++; - } - } - if ( nopen ) - hts_log_warning("Incomplete header line, trying to proceed anyway:\n\t[%s]\n\t[%d]",line,q[0]); - - // Skip to end of line - int nonspace = 0; - p = q; - while ( *q && *q!='\n' ) { nonspace |= !isspace_c(*q); q++; } - if (nonspace) { - char buffer[320]; - hts_log_warning("Dropped trailing junk from header line '%s'", - hts_strprint(buffer, sizeof(buffer), - '"', line, q - line)); - } - - *len = q - line + (*q ? 1 : 0); - return hrec; - - fail: - *len = -1; - bcf_hrec_destroy(hrec); - return NULL; - - malformed_line: - { - char buffer[320]; - while ( *q && *q!='\n' ) q++; // Ensure *len includes full line - hts_log_error("Could not parse the header line: %s", - hts_strprint(buffer, sizeof(buffer), - '"', line, q - line)); - *len = q - line + (*q ? 1 : 0); - bcf_hrec_destroy(hrec); - return NULL; - } -} - -static int bcf_hdr_set_idx(bcf_hdr_t *hdr, int dict_type, const char *tag, bcf_idinfo_t *idinfo) -{ - size_t new_n; - - // If available, preserve existing IDX - if ( idinfo->id==-1 ) - idinfo->id = hdr->n[dict_type]; - else if ( idinfo->id < hdr->n[dict_type] && hdr->id[dict_type][idinfo->id].key ) - { - hts_log_error("Conflicting IDX=%d lines in the header dictionary, the new tag is %s", - idinfo->id, tag); - errno = EINVAL; - return -1; - } - - new_n = idinfo->id >= hdr->n[dict_type] ? idinfo->id+1 : hdr->n[dict_type]; -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - // hts_resize() can attempt to allocate up to 2 * requested items - if (new_n > FUZZ_ALLOC_LIMIT/(2 * sizeof(bcf_idpair_t))) - return -1; -#endif - if (hts_resize(bcf_idpair_t, new_n, &hdr->m[dict_type], - &hdr->id[dict_type], HTS_RESIZE_CLEAR)) { - return -1; - } - hdr->n[dict_type] = new_n; - - // NB: the next kh_put call can invalidate the idinfo pointer, therefore - // we leave it unassigned here. It must be set explicitly in bcf_hdr_sync. - hdr->id[dict_type][idinfo->id].key = tag; - - return 0; -} - -// returns: 1 when hdr needs to be synced, -1 on error, 0 otherwise -static int bcf_hdr_register_hrec(bcf_hdr_t *hdr, bcf_hrec_t *hrec) -{ - // contig - int i, ret, replacing = 0; - khint_t k; - char *str = NULL; - - bcf_hrec_set_type(hrec); - - if ( hrec->type==BCF_HL_CTG ) - { - hts_pos_t len = 0; - - // Get the contig ID ($str) and length ($j) - i = bcf_hrec_find_key(hrec,"length"); - if ( i<0 ) len = 0; - else { - char *end = hrec->vals[i]; - len = strtoll(hrec->vals[i], &end, 10); - if (end == hrec->vals[i] || len < 0) return 0; - } - - i = bcf_hrec_find_key(hrec,"ID"); - if ( i<0 ) return 0; - str = strdup(hrec->vals[i]); - if (!str) return -1; - - // Register in the dictionary - vdict_t *d = (vdict_t*)hdr->dict[BCF_DT_CTG]; - khint_t k = kh_get(vdict, d, str); - if ( k != kh_end(d) ) { // already present - free(str); str=NULL; - if (kh_val(d, k).hrec[0] != NULL) // and not removed - return 0; - replacing = 1; - } else { - k = kh_put(vdict, d, str, &ret); - if (ret < 0) { free(str); return -1; } - } - - int idx = bcf_hrec_find_key(hrec,"IDX"); - if ( idx!=-1 ) - { - char *tmp = hrec->vals[idx]; - idx = strtol(hrec->vals[idx], &tmp, 10); - if ( *tmp || idx < 0 || idx >= INT_MAX - 1) - { - if (!replacing) { - kh_del(vdict, d, k); - free(str); - } - hts_log_warning("Error parsing the IDX tag, skipping"); - return 0; - } - } - - kh_val(d, k) = bcf_idinfo_def; - kh_val(d, k).id = idx; - kh_val(d, k).info[0] = len; - kh_val(d, k).hrec[0] = hrec; - if (bcf_hdr_set_idx(hdr, BCF_DT_CTG, kh_key(d,k), &kh_val(d,k)) < 0) { - if (!replacing) { - kh_del(vdict, d, k); - free(str); - } - return -1; - } - if ( idx==-1 ) { - if (hrec_add_idx(hrec, kh_val(d,k).id) < 0) { - return -1; - } - } - - return 1; - } - - if ( hrec->type==BCF_HL_STR ) return 1; - if ( hrec->type!=BCF_HL_INFO && hrec->type!=BCF_HL_FLT && hrec->type!=BCF_HL_FMT ) return 0; - - // INFO/FILTER/FORMAT - char *id = NULL; - uint32_t type = UINT32_MAX, var = UINT32_MAX; - int num = -1, idx = -1; - for (i=0; inkeys; i++) - { - if ( !strcmp(hrec->keys[i], "ID") ) id = hrec->vals[i]; - else if ( !strcmp(hrec->keys[i], "IDX") ) - { - char *tmp = hrec->vals[i]; - idx = strtol(hrec->vals[i], &tmp, 10); - if ( *tmp || idx < 0 || idx >= INT_MAX - 1) - { - hts_log_warning("Error parsing the IDX tag, skipping"); - return 0; - } - } - else if ( !strcmp(hrec->keys[i], "Type") ) - { - if ( !strcmp(hrec->vals[i], "Integer") ) type = BCF_HT_INT; - else if ( !strcmp(hrec->vals[i], "Float") ) type = BCF_HT_REAL; - else if ( !strcmp(hrec->vals[i], "String") ) type = BCF_HT_STR; - else if ( !strcmp(hrec->vals[i], "Character") ) type = BCF_HT_STR; - else if ( !strcmp(hrec->vals[i], "Flag") ) type = BCF_HT_FLAG; - else - { - hts_log_warning("The type \"%s\" is not supported, assuming \"String\"", hrec->vals[i]); - type = BCF_HT_STR; - } - } - else if ( !strcmp(hrec->keys[i], "Number") ) - { - if ( !strcmp(hrec->vals[i],"A") ) var = BCF_VL_A; - else if ( !strcmp(hrec->vals[i],"R") ) var = BCF_VL_R; - else if ( !strcmp(hrec->vals[i],"G") ) var = BCF_VL_G; - else if ( !strcmp(hrec->vals[i],".") ) var = BCF_VL_VAR; - else - { - sscanf(hrec->vals[i],"%d",&num); - var = BCF_VL_FIXED; - } - if (var != BCF_VL_FIXED) num = 0xfffff; - } - } - if (hrec->type == BCF_HL_INFO || hrec->type == BCF_HL_FMT) { - if (type == -1) { - hts_log_warning("%s %s field has no Type defined. Assuming String", - *hrec->key == 'I' ? "An" : "A", hrec->key); - type = BCF_HT_STR; - } - if (var == -1) { - hts_log_warning("%s %s field has no Number defined. Assuming '.'", - *hrec->key == 'I' ? "An" : "A", hrec->key); - var = BCF_VL_VAR; - } - if ( type==BCF_HT_FLAG && (var!=BCF_VL_FIXED || num!=0) ) - { - hts_log_warning("The definition of Flag \"%s/%s\" is invalid, forcing Number=0", hrec->key,id); - var = BCF_VL_FIXED; - num = 0; - } - } - uint32_t info = ((((uint32_t)num) & 0xfffff)<<12 | - (var & 0xf) << 8 | - (type & 0xf) << 4 | - (((uint32_t) hrec->type) & 0xf)); - - if ( !id ) return 0; - str = strdup(id); - if (!str) return -1; - - vdict_t *d = (vdict_t*)hdr->dict[BCF_DT_ID]; - k = kh_get(vdict, d, str); - if ( k != kh_end(d) ) - { - // already present - free(str); - if ( kh_val(d, k).hrec[info&0xf] ) return 0; - kh_val(d, k).info[info&0xf] = info; - kh_val(d, k).hrec[info&0xf] = hrec; - if ( idx==-1 ) { - if (hrec_add_idx(hrec, kh_val(d, k).id) < 0) { - return -1; - } - } - return 1; - } - k = kh_put(vdict, d, str, &ret); - if (ret < 0) { - free(str); - return -1; - } - kh_val(d, k) = bcf_idinfo_def; - kh_val(d, k).info[info&0xf] = info; - kh_val(d, k).hrec[info&0xf] = hrec; - kh_val(d, k).id = idx; - if (bcf_hdr_set_idx(hdr, BCF_DT_ID, kh_key(d,k), &kh_val(d,k)) < 0) { - kh_del(vdict, d, k); - free(str); - return -1; - } - if ( idx==-1 ) { - if (hrec_add_idx(hrec, kh_val(d,k).id) < 0) { - return -1; - } - } - - return 1; -} - -static void bcf_hdr_unregister_hrec(bcf_hdr_t *hdr, bcf_hrec_t *hrec) -{ - if (hrec->type == BCF_HL_FLT || - hrec->type == BCF_HL_INFO || - hrec->type == BCF_HL_FMT || - hrec->type == BCF_HL_CTG) { - int id = bcf_hrec_find_key(hrec, "ID"); - if (id < 0 || !hrec->vals[id]) - return; - vdict_t *dict = (hrec->type == BCF_HL_CTG - ? (vdict_t*)hdr->dict[BCF_DT_CTG] - : (vdict_t*)hdr->dict[BCF_DT_ID]); - khint_t k = kh_get(vdict, dict, hrec->vals[id]); - if (k != kh_end(dict)) - kh_val(dict, k).hrec[hrec->type==BCF_HL_CTG ? 0 : hrec->type] = NULL; - } -} - -static void bcf_hdr_remove_from_hdict(bcf_hdr_t *hdr, bcf_hrec_t *hrec) -{ - kstring_t str = KS_INITIALIZE; - bcf_hdr_aux_t *aux = get_hdr_aux(hdr); - khint_t k; - int id; - - switch (hrec->type) { - case BCF_HL_GEN: - if (ksprintf(&str, "##%s=%s", hrec->key,hrec->value) < 0) - str.l = 0; - break; - case BCF_HL_STR: - id = bcf_hrec_find_key(hrec, "ID"); - if (id < 0) - return; - if (!hrec->vals[id] || - ksprintf(&str, "##%s=", hrec->key, hrec->vals[id]) < 0) - str.l = 0; - break; - default: - return; - } - if (str.l) { - k = kh_get(hdict, aux->gen, str.s); - } else { - // Couldn't get a string for some reason, so try the hard way... - for (k = kh_begin(aux->gen); k < kh_end(aux->gen); k++) { - if (kh_exist(aux->gen, k) && kh_val(aux->gen, k) == hrec) - break; - } - } - if (k != kh_end(aux->gen) && kh_val(aux->gen, k) == hrec) { - kh_val(aux->gen, k) = NULL; - free((char *) kh_key(aux->gen, k)); - kh_key(aux->gen, k) = NULL; - kh_del(hdict, aux->gen, k); - } - free(str.s); -} - -int bcf_hdr_update_hrec(bcf_hdr_t *hdr, bcf_hrec_t *hrec, const bcf_hrec_t *tmp) -{ - // currently only for bcf_hdr_set_version - assert( hrec->type==BCF_HL_GEN ); - int ret; - khint_t k; - bcf_hdr_aux_t *aux = get_hdr_aux(hdr); - for (k=kh_begin(aux->gen); kgen); k++) - { - if ( !kh_exist(aux->gen,k) ) continue; - if ( hrec!=(bcf_hrec_t*)kh_val(aux->gen,k) ) continue; - break; - } - assert( kgen) ); // something went wrong, should never happen - free((char*)kh_key(aux->gen,k)); - kh_del(hdict,aux->gen,k); - kstring_t str = {0,0,0}; - if ( ksprintf(&str, "##%s=%s", tmp->key,tmp->value) < 0 ) - { - free(str.s); - return -1; - } - k = kh_put(hdict, aux->gen, str.s, &ret); - if ( ret<0 ) - { - free(str.s); - return -1; - } - free(hrec->value); - hrec->value = strdup(tmp->value); - if ( !hrec->value ) return -1; - return 0; -} - -int bcf_hdr_add_hrec(bcf_hdr_t *hdr, bcf_hrec_t *hrec) -{ - kstring_t str = {0,0,0}; - bcf_hdr_aux_t *aux = get_hdr_aux(hdr); - - int res; - if ( !hrec ) return 0; - - bcf_hrec_check(hrec); // todo: check return status and propagate errors up - - res = bcf_hdr_register_hrec(hdr,hrec); - if (res < 0) return -1; - if ( !res ) - { - // If one of the hashed field, then it is already present - if ( hrec->type != BCF_HL_GEN ) - { - bcf_hrec_destroy(hrec); - return 0; - } - - // Is one of the generic fields and already present? - if ( ksprintf(&str, "##%s=%s", hrec->key,hrec->value) < 0 ) - { - free(str.s); - return -1; - } - khint_t k = kh_get(hdict, aux->gen, str.s); - if ( k != kh_end(aux->gen) ) - { - // duplicate record - bcf_hrec_destroy(hrec); - free(str.s); - return 0; - } - } - - int i; - if ( hrec->type==BCF_HL_STR && (i=bcf_hrec_find_key(hrec,"ID"))>=0 ) - { - if ( ksprintf(&str, "##%s=", hrec->key,hrec->vals[i]) < 0 ) - { - free(str.s); - return -1; - } - khint_t k = kh_get(hdict, aux->gen, str.s); - if ( k != kh_end(aux->gen) ) - { - // duplicate record - bcf_hrec_destroy(hrec); - free(str.s); - return 0; - } - } - - // New record, needs to be added - int n = hdr->nhrec + 1; - bcf_hrec_t **new_hrec = realloc(hdr->hrec, n*sizeof(bcf_hrec_t*)); - if (!new_hrec) { - free(str.s); - bcf_hdr_unregister_hrec(hdr, hrec); - return -1; - } - hdr->hrec = new_hrec; - - if ( str.s ) - { - khint_t k = kh_put(hdict, aux->gen, str.s, &res); - if ( res<0 ) - { - free(str.s); - return -1; - } - kh_val(aux->gen,k) = hrec; - } - - hdr->hrec[hdr->nhrec] = hrec; - hdr->dirty = 1; - hdr->nhrec = n; - - return hrec->type==BCF_HL_GEN ? 0 : 1; -} - -bcf_hrec_t *bcf_hdr_get_hrec(const bcf_hdr_t *hdr, int type, const char *key, const char *value, const char *str_class) -{ - int i; - if ( type==BCF_HL_GEN ) - { - // e.g. ##fileformat=VCFv4.2 - // ##source=GenomicsDBImport - // ##bcftools_viewVersion=1.16-80-gdfdb0923+htslib-1.16-34-g215d364 - if ( value ) - { - kstring_t str = {0,0,0}; - ksprintf(&str, "##%s=%s", key,value); - bcf_hdr_aux_t *aux = get_hdr_aux(hdr); - khint_t k = kh_get(hdict, aux->gen, str.s); - free(str.s); - if ( k == kh_end(aux->gen) ) return NULL; - return kh_val(aux->gen, k); - } - for (i=0; inhrec; i++) - { - if ( hdr->hrec[i]->type!=type ) continue; - if ( strcmp(hdr->hrec[i]->key,key) ) continue; - return hdr->hrec[i]; - } - return NULL; - } - else if ( type==BCF_HL_STR ) - { - // e.g. ##GATKCommandLine= - // ##ALT= - if (!str_class) return NULL; - if ( !strcmp("ID",key) ) - { - kstring_t str = {0,0,0}; - ksprintf(&str, "##%s=<%s=%s>",str_class,key,value); - bcf_hdr_aux_t *aux = get_hdr_aux(hdr); - khint_t k = kh_get(hdict, aux->gen, str.s); - free(str.s); - if ( k == kh_end(aux->gen) ) return NULL; - return kh_val(aux->gen, k); - } - for (i=0; inhrec; i++) - { - if ( hdr->hrec[i]->type!=type ) continue; - if ( strcmp(hdr->hrec[i]->key,str_class) ) continue; - int j = bcf_hrec_find_key(hdr->hrec[i],key); - if ( j>=0 && !strcmp(hdr->hrec[i]->vals[j],value) ) return hdr->hrec[i]; - } - return NULL; - } - vdict_t *d = type==BCF_HL_CTG ? (vdict_t*)hdr->dict[BCF_DT_CTG] : (vdict_t*)hdr->dict[BCF_DT_ID]; - khint_t k = kh_get(vdict, d, value); - if ( k == kh_end(d) ) return NULL; - return kh_val(d, k).hrec[type==BCF_HL_CTG?0:type]; -} - -void bcf_hdr_check_sanity(bcf_hdr_t *hdr) -{ - static int PL_warned = 0, GL_warned = 0; - - if ( !PL_warned ) - { - int id = bcf_hdr_id2int(hdr, BCF_DT_ID, "PL"); - if ( bcf_hdr_idinfo_exists(hdr,BCF_HL_FMT,id) && bcf_hdr_id2length(hdr,BCF_HL_FMT,id)!=BCF_VL_G ) - { - hts_log_warning("PL should be declared as Number=G"); - PL_warned = 1; - } - } - if ( !GL_warned ) - { - int id = bcf_hdr_id2int(hdr, BCF_DT_ID, "GL"); - if ( bcf_hdr_idinfo_exists(hdr,BCF_HL_FMT,id) && bcf_hdr_id2length(hdr,BCF_HL_FMT,id)!=BCF_VL_G ) - { - hts_log_warning("GL should be declared as Number=G"); - GL_warned = 1; - } - } -} - -int bcf_hdr_parse(bcf_hdr_t *hdr, char *htxt) -{ - int len, done = 0; - char *p = htxt; - - // Check sanity: "fileformat" string must come as first - bcf_hrec_t *hrec = bcf_hdr_parse_line(hdr,p,&len); - if ( !hrec || !hrec->key || strcasecmp(hrec->key,"fileformat") ) - hts_log_warning("The first line should be ##fileformat; is the VCF/BCF header broken?"); - if (bcf_hdr_add_hrec(hdr, hrec) < 0) { - bcf_hrec_destroy(hrec); - return -1; - } - - // The filter PASS must appear first in the dictionary - hrec = bcf_hdr_parse_line(hdr,"##FILTER=",&len); - if (!hrec || bcf_hdr_add_hrec(hdr, hrec) < 0) { - bcf_hrec_destroy(hrec); - return -1; - } - - // Parse the whole header - do { - while (NULL != (hrec = bcf_hdr_parse_line(hdr, p, &len))) { - if (bcf_hdr_add_hrec(hdr, hrec) < 0) { - bcf_hrec_destroy(hrec); - return -1; - } - p += len; - } - assert(hrec == NULL); - if (len < 0) { - // len < 0 indicates out-of-memory, or similar error - hts_log_error("Could not parse header line: %s", strerror(errno)); - return -1; - } else if (len > 0) { - // Bad header line. bcf_hdr_parse_line() will have logged it. - // Skip and try again on the next line (p + len will be the start - // of the next one). - p += len; - continue; - } - - // Next should be the sample line. If not, it was a malformed - // header, in which case print a warning and skip (many VCF - // operations do not really care about a few malformed lines). - // In the future we may want to add a strict mode that errors in - // this case. - if ( strncmp("#CHROM\t",p,7) && strncmp("#CHROM ",p,7) ) { - char *eol = strchr(p, '\n'); - if (*p != '\0') { - char buffer[320]; - hts_log_warning("Could not parse header line: %s", - hts_strprint(buffer, sizeof(buffer), - '"', p, - eol ? (eol - p) : SIZE_MAX)); - } - if (eol) { - p = eol + 1; // Try from the next line. - } else { - done = -1; // No more lines left, give up. - } - } else { - done = 1; // Sample line found - } - } while (!done); - - if (done < 0) { - // No sample line is fatal. - hts_log_error("Could not parse the header, sample line not found"); - return -1; - } - - if (bcf_hdr_parse_sample_line(hdr,p) < 0) - return -1; - if (bcf_hdr_sync(hdr) < 0) - return -1; - bcf_hdr_check_sanity(hdr); - return 0; -} - -int bcf_hdr_append(bcf_hdr_t *hdr, const char *line) -{ - int len; - bcf_hrec_t *hrec = bcf_hdr_parse_line(hdr, (char*) line, &len); - if ( !hrec ) return -1; - if (bcf_hdr_add_hrec(hdr, hrec) < 0) - return -1; - return 0; -} - -void bcf_hdr_remove(bcf_hdr_t *hdr, int type, const char *key) -{ - int i = 0; - bcf_hrec_t *hrec; - if ( !key ) - { - // no key, remove all entries of this type - while ( inhrec ) - { - if ( hdr->hrec[i]->type!=type ) { i++; continue; } - hrec = hdr->hrec[i]; - bcf_hdr_unregister_hrec(hdr, hrec); - bcf_hdr_remove_from_hdict(hdr, hrec); - hdr->dirty = 1; - hdr->nhrec--; - if ( i < hdr->nhrec ) - memmove(&hdr->hrec[i],&hdr->hrec[i+1],(hdr->nhrec-i)*sizeof(bcf_hrec_t*)); - bcf_hrec_destroy(hrec); - } - return; - } - while (1) - { - if ( type==BCF_HL_FLT || type==BCF_HL_INFO || type==BCF_HL_FMT || type== BCF_HL_CTG ) - { - hrec = bcf_hdr_get_hrec(hdr, type, "ID", key, NULL); - if ( !hrec ) return; - - for (i=0; inhrec; i++) - if ( hdr->hrec[i]==hrec ) break; - assert( inhrec ); - - vdict_t *d = type==BCF_HL_CTG ? (vdict_t*)hdr->dict[BCF_DT_CTG] : (vdict_t*)hdr->dict[BCF_DT_ID]; - khint_t k = kh_get(vdict, d, key); - kh_val(d, k).hrec[type==BCF_HL_CTG?0:type] = NULL; - } - else - { - for (i=0; inhrec; i++) - { - if ( hdr->hrec[i]->type!=type ) continue; - if ( type==BCF_HL_GEN ) - { - if ( !strcmp(hdr->hrec[i]->key,key) ) break; - } - else - { - // not all structured lines have ID, we could be more sophisticated as in bcf_hdr_get_hrec() - int j = bcf_hrec_find_key(hdr->hrec[i], "ID"); - if ( j>=0 && !strcmp(hdr->hrec[i]->vals[j],key) ) break; - } - } - if ( i==hdr->nhrec ) return; - hrec = hdr->hrec[i]; - bcf_hdr_remove_from_hdict(hdr, hrec); - } - - hdr->nhrec--; - if ( i < hdr->nhrec ) - memmove(&hdr->hrec[i],&hdr->hrec[i+1],(hdr->nhrec-i)*sizeof(bcf_hrec_t*)); - bcf_hrec_destroy(hrec); - hdr->dirty = 1; - } -} - -int bcf_hdr_printf(bcf_hdr_t *hdr, const char *fmt, ...) -{ - char tmp[256], *line = tmp; - va_list ap; - va_start(ap, fmt); - int n = vsnprintf(line, sizeof(tmp), fmt, ap); - va_end(ap); - - if (n >= sizeof(tmp)) { - n++; // For trailing NUL - line = (char*)malloc(n); - if (!line) - return -1; - - va_start(ap, fmt); - vsnprintf(line, n, fmt, ap); - va_end(ap); - } - - int ret = bcf_hdr_append(hdr, line); - - if (line != tmp) free(line); - return ret; -} - - -/********************** - *** BCF header I/O *** - **********************/ - -const char *bcf_hdr_get_version(const bcf_hdr_t *hdr) -{ - bcf_hrec_t *hrec = bcf_hdr_get_hrec(hdr, BCF_HL_GEN, "fileformat", NULL, NULL); - if ( !hrec ) - { - hts_log_warning("No version string found, assuming VCFv4.2"); - return "VCFv4.2"; - } - return hrec->value; -} - -int bcf_hdr_set_version(bcf_hdr_t *hdr, const char *version) -{ - bcf_hrec_t *hrec = bcf_hdr_get_hrec(hdr, BCF_HL_GEN, "fileformat", NULL, NULL); - if ( !hrec ) - { - int len; - kstring_t str = {0,0,0}; - if ( ksprintf(&str,"##fileformat=%s", version) < 0 ) return -1; - hrec = bcf_hdr_parse_line(hdr, str.s, &len); - free(str.s); - } - else - { - bcf_hrec_t *tmp = bcf_hrec_dup(hrec); - if ( !tmp ) return -1; - free(tmp->value); - tmp->value = strdup(version); - if ( !tmp->value ) return -1; - bcf_hdr_update_hrec(hdr, hrec, tmp); - bcf_hrec_destroy(tmp); - } - hdr->dirty = 1; - return 0; // FIXME: check for errs in this function (return < 0 if so) -} - -bcf_hdr_t *bcf_hdr_init(const char *mode) -{ - int i; - bcf_hdr_t *h; - h = (bcf_hdr_t*)calloc(1, sizeof(bcf_hdr_t)); - if (!h) return NULL; - for (i = 0; i < 3; ++i) { - if ((h->dict[i] = kh_init(vdict)) == NULL) goto fail; - // Supersize the hash to make collisions very unlikely - static int dsize[3] = {16384,16384,2048}; // info, contig, format - if (kh_resize(vdict, h->dict[i], dsize[i]) < 0) goto fail; - } - - bcf_hdr_aux_t *aux = (bcf_hdr_aux_t*)calloc(1,sizeof(bcf_hdr_aux_t)); - if ( !aux ) goto fail; - if ( (aux->gen = kh_init(hdict))==NULL ) { free(aux); goto fail; } - aux->key_len = NULL; - aux->dict = *((vdict_t*)h->dict[0]); - free(h->dict[0]); - h->dict[0] = aux; - - if ( strchr(mode,'w') ) - { - bcf_hdr_append(h, "##fileformat=VCFv4.2"); - // The filter PASS must appear first in the dictionary - bcf_hdr_append(h, "##FILTER="); - } - return h; - - fail: - for (i = 0; i < 3; ++i) - kh_destroy(vdict, h->dict[i]); - free(h); - return NULL; -} - -void bcf_hdr_destroy(bcf_hdr_t *h) -{ - int i; - khint_t k; - if (!h) return; - for (i = 0; i < 3; ++i) { - vdict_t *d = (vdict_t*)h->dict[i]; - if (d == 0) continue; - for (k = kh_begin(d); k != kh_end(d); ++k) - if (kh_exist(d, k)) free((char*)kh_key(d, k)); - if ( i==0 ) - { - bcf_hdr_aux_t *aux = get_hdr_aux(h); - for (k=kh_begin(aux->gen); kgen); k++) - if ( kh_exist(aux->gen,k) ) free((char*)kh_key(aux->gen,k)); - kh_destroy(hdict, aux->gen); - free(aux->key_len); // may exist for dict[0] only - } - kh_destroy(vdict, d); - free(h->id[i]); - } - for (i=0; inhrec; i++) - bcf_hrec_destroy(h->hrec[i]); - if (h->nhrec) free(h->hrec); - if (h->samples) free(h->samples); - free(h->keep_samples); - free(h->transl[0]); free(h->transl[1]); - free(h->mem.s); - free(h); -} - -bcf_hdr_t *bcf_hdr_read(htsFile *hfp) -{ - if (hfp->format.format == vcf) - return vcf_hdr_read(hfp); - if (hfp->format.format != bcf) { - hts_log_error("Input is not detected as bcf or vcf format"); - return NULL; - } - - assert(hfp->is_bgzf); - - BGZF *fp = hfp->fp.bgzf; - uint8_t magic[5]; - bcf_hdr_t *h; - h = bcf_hdr_init("r"); - if (!h) { - hts_log_error("Failed to allocate bcf header"); - return NULL; - } - if (bgzf_read(fp, magic, 5) != 5) - { - hts_log_error("Failed to read the header (reading BCF in text mode?)"); - bcf_hdr_destroy(h); - return NULL; - } - if (strncmp((char*)magic, "BCF\2\2", 5) != 0) - { - if (!strncmp((char*)magic, "BCF", 3)) - hts_log_error("Invalid BCF2 magic string: only BCFv2.2 is supported"); - else - hts_log_error("Invalid BCF2 magic string"); - bcf_hdr_destroy(h); - return NULL; - } - uint8_t buf[4]; - size_t hlen; - char *htxt = NULL; - if (bgzf_read(fp, buf, 4) != 4) goto fail; - hlen = buf[0] | (buf[1] << 8) | (buf[2] << 16) | ((size_t) buf[3] << 24); - if (hlen >= SIZE_MAX) { errno = ENOMEM; goto fail; } -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (hlen > FUZZ_ALLOC_LIMIT/2) { errno = ENOMEM; goto fail; } -#endif - htxt = (char*)malloc(hlen + 1); - if (!htxt) goto fail; - if (bgzf_read(fp, htxt, hlen) != hlen) goto fail; - htxt[hlen] = '\0'; // Ensure htxt is terminated - if ( bcf_hdr_parse(h, htxt) < 0 ) goto fail; - free(htxt); - return h; - fail: - hts_log_error("Failed to read BCF header"); - free(htxt); - bcf_hdr_destroy(h); - return NULL; -} - -int bcf_hdr_write(htsFile *hfp, bcf_hdr_t *h) -{ - if (!h) { - errno = EINVAL; - return -1; - } - if ( h->dirty ) { - if (bcf_hdr_sync(h) < 0) return -1; - } - hfp->format.category = variant_data; - if (hfp->format.format == vcf || hfp->format.format == text_format) { - hfp->format.format = vcf; - return vcf_hdr_write(hfp, h); - } - - if (hfp->format.format == binary_format) - hfp->format.format = bcf; - - kstring_t htxt = {0,0,0}; - if (bcf_hdr_format(h, 1, &htxt) < 0) { - free(htxt.s); - return -1; - } - kputc('\0', &htxt); // include the \0 byte - - BGZF *fp = hfp->fp.bgzf; - if ( bgzf_write(fp, "BCF\2\2", 5) !=5 ) return -1; - uint8_t hlen[4]; - u32_to_le(htxt.l, hlen); - if ( bgzf_write(fp, hlen, 4) !=4 ) return -1; - if ( bgzf_write(fp, htxt.s, htxt.l) != htxt.l ) return -1; - - free(htxt.s); - return 0; -} - -/******************** - *** BCF site I/O *** - ********************/ - -bcf1_t *bcf_init(void) -{ - bcf1_t *v; - v = (bcf1_t*)calloc(1, sizeof(bcf1_t)); - return v; -} - -void bcf_clear(bcf1_t *v) -{ - int i; - for (i=0; id.m_info; i++) - { - if ( v->d.info[i].vptr_free ) - { - free(v->d.info[i].vptr - v->d.info[i].vptr_off); - v->d.info[i].vptr_free = 0; - } - } - for (i=0; id.m_fmt; i++) - { - if ( v->d.fmt[i].p_free ) - { - free(v->d.fmt[i].p - v->d.fmt[i].p_off); - v->d.fmt[i].p_free = 0; - } - } - v->rid = v->pos = v->rlen = v->unpacked = 0; - bcf_float_set_missing(v->qual); - v->n_info = v->n_allele = v->n_fmt = v->n_sample = 0; - v->shared.l = v->indiv.l = 0; - v->d.var_type = -1; - v->d.shared_dirty = 0; - v->d.indiv_dirty = 0; - v->d.n_flt = 0; - v->errcode = 0; - if (v->d.m_als) v->d.als[0] = 0; - if (v->d.m_id) v->d.id[0] = 0; -} - -void bcf_empty(bcf1_t *v) -{ - bcf_clear1(v); - free(v->d.id); - free(v->d.als); - free(v->d.allele); free(v->d.flt); free(v->d.info); free(v->d.fmt); - if (v->d.var ) free(v->d.var); - free(v->shared.s); free(v->indiv.s); - memset(&v->d,0,sizeof(v->d)); - memset(&v->shared,0,sizeof(v->shared)); - memset(&v->indiv,0,sizeof(v->indiv)); -} - -void bcf_destroy(bcf1_t *v) -{ - if (!v) return; - bcf_empty1(v); - free(v); -} - -static inline int bcf_read1_core(BGZF *fp, bcf1_t *v) -{ - uint8_t x[32]; - ssize_t ret; - uint32_t shared_len, indiv_len; - if ((ret = bgzf_read(fp, x, 32)) != 32) { - if (ret == 0) return -1; - return -2; - } - bcf_clear1(v); - shared_len = le_to_u32(x); - if (shared_len < 24) return -2; - shared_len -= 24; // to exclude six 32-bit integers - indiv_len = le_to_u32(x + 4); -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - // ks_resize() normally allocates 1.5 * requested size to allow for growth - if ((uint64_t) shared_len + indiv_len > FUZZ_ALLOC_LIMIT / 3 * 2) return -2; -#endif - if (ks_resize(&v->shared, shared_len ? shared_len : 1) != 0) return -2; - if (ks_resize(&v->indiv, indiv_len ? indiv_len : 1) != 0) return -2; - v->rid = le_to_i32(x + 8); - v->pos = le_to_u32(x + 12); - if ( v->pos==UINT32_MAX ) v->pos = -1; // this is for telomere coordinate, e.g. MT:0 - v->rlen = le_to_i32(x + 16); - v->qual = le_to_float(x + 20); - v->n_info = le_to_u16(x + 24); - v->n_allele = le_to_u16(x + 26); - v->n_sample = le_to_u32(x + 28) & 0xffffff; - v->n_fmt = x[31]; - v->shared.l = shared_len; - v->indiv.l = indiv_len; - // silent fix of broken BCFs produced by earlier versions of bcf_subset, prior to and including bd6ed8b4 - if ( (!v->indiv.l || !v->n_sample) && v->n_fmt ) v->n_fmt = 0; - - if (bgzf_read(fp, v->shared.s, v->shared.l) != v->shared.l) return -2; - if (bgzf_read(fp, v->indiv.s, v->indiv.l) != v->indiv.l) return -2; - return 0; -} - -#define bit_array_size(n) ((n)/8+1) -#define bit_array_set(a,i) ((a)[(i)/8] |= 1 << ((i)%8)) -#define bit_array_clear(a,i) ((a)[(i)/8] &= ~(1 << ((i)%8))) -#define bit_array_test(a,i) ((a)[(i)/8] & (1 << ((i)%8))) - -static int bcf_dec_typed_int1_safe(uint8_t *p, uint8_t *end, uint8_t **q, - int32_t *val) { - uint32_t t; - if (end - p < 2) return -1; - t = *p++ & 0xf; - /* Use if .. else if ... else instead of switch to force order. Assumption - is that small integers are more frequent than big ones. */ - if (t == BCF_BT_INT8) { - *val = *(int8_t *) p++; - } else { - if (end - p < (1<= end) return -1; - *type = *p & 0xf; - if (*p>>4 != 15) { - *q = p + 1; - *num = *p >> 4; - return 0; - } - r = bcf_dec_typed_int1_safe(p + 1, end, q, num); - if (r) return r; - return *num >= 0 ? 0 : -1; -} - -static const char *get_type_name(int type) { - const char *types[9] = { - "null", "int (8-bit)", "int (16 bit)", "int (32 bit)", - "unknown", "float", "unknown", "char", "unknown" - }; - int t = (type >= 0 && type < 8) ? type : 8; - return types[t]; -} - -static void bcf_record_check_err(const bcf_hdr_t *hdr, bcf1_t *rec, - char *type, uint32_t *reports, int i) { - if (*reports == 0 || hts_verbose >= HTS_LOG_DEBUG) - hts_log_warning("Bad BCF record at %s:%"PRIhts_pos - ": Invalid FORMAT %s %d", - bcf_seqname_safe(hdr,rec), rec->pos+1, type, i); - (*reports)++; -} - -static int bcf_record_check(const bcf_hdr_t *hdr, bcf1_t *rec) { - uint8_t *ptr, *end; - size_t bytes; - uint32_t err = 0; - int type = 0; - int num = 0; - int reflen = 0; - uint32_t i, reports; - const uint32_t is_integer = ((1 << BCF_BT_INT8) | - (1 << BCF_BT_INT16) | -#ifdef VCF_ALLOW_INT64 - (1 << BCF_BT_INT64) | -#endif - (1 << BCF_BT_INT32)); - const uint32_t is_valid_type = (is_integer | - (1 << BCF_BT_NULL) | - (1 << BCF_BT_FLOAT) | - (1 << BCF_BT_CHAR)); - int32_t max_id = hdr ? hdr->n[BCF_DT_ID] : 0; - - // Check for valid contig ID - if (rec->rid < 0 - || (hdr && (rec->rid >= hdr->n[BCF_DT_CTG] - || hdr->id[BCF_DT_CTG][rec->rid].key == NULL))) { - hts_log_warning("Bad BCF record at %"PRIhts_pos": Invalid %s id %d", rec->pos+1, "CONTIG", rec->rid); - err |= BCF_ERR_CTG_INVALID; - } - - // Check ID - ptr = (uint8_t *) rec->shared.s; - end = ptr + rec->shared.l; - if (bcf_dec_size_safe(ptr, end, &ptr, &num, &type) != 0) goto bad_shared; - if (type != BCF_BT_CHAR) { - hts_log_warning("Bad BCF record at %s:%"PRIhts_pos": Invalid %s type %d (%s)", bcf_seqname_safe(hdr,rec), rec->pos+1, "ID", type, get_type_name(type)); - err |= BCF_ERR_TAG_INVALID; - } - bytes = (size_t) num << bcf_type_shift[type]; - if (end - ptr < bytes) goto bad_shared; - ptr += bytes; - - // Check REF and ALT - if (rec->n_allele < 1) { - hts_log_warning("Bad BCF record at %s:%"PRIhts_pos": No REF allele", - bcf_seqname_safe(hdr,rec), rec->pos+1); - err |= BCF_ERR_TAG_UNDEF; - } - - reports = 0; - for (i = 0; i < rec->n_allele; i++) { - if (bcf_dec_size_safe(ptr, end, &ptr, &num, &type) != 0) goto bad_shared; - if (type != BCF_BT_CHAR) { - if (!reports++ || hts_verbose >= HTS_LOG_DEBUG) - hts_log_warning("Bad BCF record at %s:%"PRIhts_pos": Invalid %s type %d (%s)", bcf_seqname_safe(hdr,rec), rec->pos+1, "REF/ALT", type, get_type_name(type)); - err |= BCF_ERR_CHAR; - } - if (i == 0) reflen = num; - bytes = (size_t) num << bcf_type_shift[type]; - if (end - ptr < bytes) goto bad_shared; - ptr += bytes; - } - - // Check FILTER - reports = 0; - if (bcf_dec_size_safe(ptr, end, &ptr, &num, &type) != 0) goto bad_shared; - if (num > 0) { - bytes = (size_t) num << bcf_type_shift[type]; - if (((1 << type) & is_integer) == 0) { - hts_log_warning("Bad BCF record at %s:%"PRIhts_pos": Invalid %s type %d (%s)", bcf_seqname_safe(hdr,rec), rec->pos+1, "FILTER", type, get_type_name(type)); - err |= BCF_ERR_TAG_INVALID; - if (end - ptr < bytes) goto bad_shared; - ptr += bytes; - } else { - if (end - ptr < bytes) goto bad_shared; - for (i = 0; i < num; i++) { - int32_t key = bcf_dec_int1(ptr, type, &ptr); - if (key < 0 - || (hdr && (key >= max_id - || hdr->id[BCF_DT_ID][key].key == NULL))) { - if (!reports++ || hts_verbose >= HTS_LOG_DEBUG) - hts_log_warning("Bad BCF record at %s:%"PRIhts_pos": Invalid %s id %d", bcf_seqname_safe(hdr,rec), rec->pos+1, "FILTER", key); - err |= BCF_ERR_TAG_UNDEF; - } - } - } - } - - // Check INFO - reports = 0; - bcf_idpair_t *id_tmp = hdr ? hdr->id[BCF_DT_ID] : NULL; - for (i = 0; i < rec->n_info; i++) { - int32_t key = -1; - if (bcf_dec_typed_int1_safe(ptr, end, &ptr, &key) != 0) goto bad_shared; - if (key < 0 || (hdr && (key >= max_id - || id_tmp[key].key == NULL))) { - if (!reports++ || hts_verbose >= HTS_LOG_DEBUG) - hts_log_warning("Bad BCF record at %s:%"PRIhts_pos": Invalid %s id %d", bcf_seqname_safe(hdr,rec), rec->pos+1, "INFO", key); - err |= BCF_ERR_TAG_UNDEF; - } - if (bcf_dec_size_safe(ptr, end, &ptr, &num, &type) != 0) goto bad_shared; - if (((1 << type) & is_valid_type) == 0 - || (type == BCF_BT_NULL && num > 0)) { - if (!reports++ || hts_verbose >= HTS_LOG_DEBUG) - hts_log_warning("Bad BCF record at %s:%"PRIhts_pos": Invalid %s type %d (%s)", bcf_seqname_safe(hdr,rec), rec->pos+1, "INFO", type, get_type_name(type)); - err |= BCF_ERR_TAG_INVALID; - } - bytes = (size_t) num << bcf_type_shift[type]; - if (end - ptr < bytes) goto bad_shared; - ptr += bytes; - } - - // Check FORMAT and individual information - ptr = (uint8_t *) rec->indiv.s; - end = ptr + rec->indiv.l; - reports = 0; - for (i = 0; i < rec->n_fmt; i++) { - int32_t key = -1; - if (bcf_dec_typed_int1_safe(ptr, end, &ptr, &key) != 0) goto bad_indiv; - if (key < 0 - || (hdr && (key >= max_id - || id_tmp[key].key == NULL))) { - bcf_record_check_err(hdr, rec, "id", &reports, key); - err |= BCF_ERR_TAG_UNDEF; - } - if (bcf_dec_size_safe(ptr, end, &ptr, &num, &type) != 0) goto bad_indiv; - if (((1 << type) & is_valid_type) == 0 - || (type == BCF_BT_NULL && num > 0)) { - bcf_record_check_err(hdr, rec, "type", &reports, type); - err |= BCF_ERR_TAG_INVALID; - } - bytes = ((size_t) num << bcf_type_shift[type]) * rec->n_sample; - if (end - ptr < bytes) goto bad_indiv; - ptr += bytes; - } - - if (!err && rec->rlen < 0) { - // Treat bad rlen as a warning instead of an error, and try to - // fix up by using the length of the stored REF allele. - static int warned = 0; - if (!warned) { - hts_log_warning("BCF record at %s:%"PRIhts_pos" has invalid RLEN (%"PRIhts_pos"). " - "Only one invalid RLEN will be reported.", - bcf_seqname_safe(hdr,rec), rec->pos+1, rec->rlen); - warned = 1; - } - rec->rlen = reflen >= 0 ? reflen : 0; - } - - rec->errcode |= err; - - return err ? -2 : 0; // Return -2 so bcf_read() reports an error - - bad_shared: - hts_log_error("Bad BCF record at %s:%"PRIhts_pos" - shared section malformed or too short", bcf_seqname_safe(hdr,rec), rec->pos+1); - return -2; - - bad_indiv: - hts_log_error("Bad BCF record at %s:%"PRIhts_pos" - individuals section malformed or too short", bcf_seqname_safe(hdr,rec), rec->pos+1); - return -2; -} - -static inline uint8_t *bcf_unpack_fmt_core1(uint8_t *ptr, int n_sample, bcf_fmt_t *fmt); -int bcf_subset_format(const bcf_hdr_t *hdr, bcf1_t *rec) -{ - if ( !hdr->keep_samples ) return 0; - if ( !bcf_hdr_nsamples(hdr) ) - { - rec->indiv.l = rec->n_sample = 0; - return 0; - } - - int i, j; - uint8_t *ptr = (uint8_t*)rec->indiv.s, *dst = NULL, *src; - bcf_dec_t *dec = &rec->d; - hts_expand(bcf_fmt_t, rec->n_fmt, dec->m_fmt, dec->fmt); - for (i=0; im_fmt; ++i) dec->fmt[i].p_free = 0; - - for (i=0; in_fmt; i++) - { - ptr = bcf_unpack_fmt_core1(ptr, rec->n_sample, &dec->fmt[i]); - src = dec->fmt[i].p - dec->fmt[i].size; - if ( dst ) - { - memmove(dec->fmt[i-1].p + dec->fmt[i-1].p_len, dec->fmt[i].p - dec->fmt[i].p_off, dec->fmt[i].p_off); - dec->fmt[i].p = dec->fmt[i-1].p + dec->fmt[i-1].p_len + dec->fmt[i].p_off; - } - dst = dec->fmt[i].p; - for (j=0; jnsamples_ori; j++) - { - src += dec->fmt[i].size; - if ( !bit_array_test(hdr->keep_samples,j) ) continue; - memmove(dst, src, dec->fmt[i].size); - dst += dec->fmt[i].size; - } - rec->indiv.l -= dec->fmt[i].p_len - (dst - dec->fmt[i].p); - dec->fmt[i].p_len = dst - dec->fmt[i].p; - } - rec->unpacked |= BCF_UN_FMT; - - rec->n_sample = bcf_hdr_nsamples(hdr); - return 0; -} - -int bcf_read(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v) -{ - if (fp->format.format == vcf) return vcf_read(fp,h,v); - int ret = bcf_read1_core(fp->fp.bgzf, v); - if (ret == 0) ret = bcf_record_check(h, v); - if ( ret!=0 || !h->keep_samples ) return ret; - return bcf_subset_format(h,v); -} - -int bcf_readrec(BGZF *fp, void *null, void *vv, int *tid, hts_pos_t *beg, hts_pos_t *end) -{ - bcf1_t *v = (bcf1_t *) vv; - int ret = bcf_read1_core(fp, v); - if (ret == 0) ret = bcf_record_check(NULL, v); - if (ret >= 0) - *tid = v->rid, *beg = v->pos, *end = v->pos + v->rlen; - return ret; -} - -static inline int bcf1_sync_id(bcf1_t *line, kstring_t *str) -{ - // single typed string - if ( line->d.id && strcmp(line->d.id, ".") ) { - return bcf_enc_vchar(str, strlen(line->d.id), line->d.id); - } else { - return bcf_enc_size(str, 0, BCF_BT_CHAR); - } -} -static inline int bcf1_sync_alleles(bcf1_t *line, kstring_t *str) -{ - // list of typed strings - int i; - for (i=0; in_allele; i++) { - if (bcf_enc_vchar(str, strlen(line->d.allele[i]), line->d.allele[i]) < 0) - return -1; - } - if ( !line->rlen && line->n_allele ) line->rlen = strlen(line->d.allele[0]); - return 0; -} -static inline int bcf1_sync_filter(bcf1_t *line, kstring_t *str) -{ - // typed vector of integers - if ( line->d.n_flt ) { - return bcf_enc_vint(str, line->d.n_flt, line->d.flt, -1); - } else { - return bcf_enc_vint(str, 0, 0, -1); - } -} - -static inline int bcf1_sync_info(bcf1_t *line, kstring_t *str) -{ - // pairs of typed vectors - int i, irm = -1, e = 0; - for (i=0; in_info; i++) - { - bcf_info_t *info = &line->d.info[i]; - if ( !info->vptr ) - { - // marked for removal - if ( irm < 0 ) irm = i; - continue; - } - e |= kputsn_(info->vptr - info->vptr_off, info->vptr_len + info->vptr_off, str) < 0; - if ( irm >=0 ) - { - bcf_info_t tmp = line->d.info[irm]; line->d.info[irm] = line->d.info[i]; line->d.info[i] = tmp; - while ( irm<=i && line->d.info[irm].vptr ) irm++; - } - } - if ( irm>=0 ) line->n_info = irm; - return e == 0 ? 0 : -1; -} - -static int bcf1_sync(bcf1_t *line) -{ - char *shared_ori = line->shared.s; - size_t prev_len; - - kstring_t tmp = {0,0,0}; - if ( !line->shared.l ) - { - // New line created via API, BCF data blocks do not exist. Get it ready for BCF output - tmp = line->shared; - bcf1_sync_id(line, &tmp); - line->unpack_size[0] = tmp.l; prev_len = tmp.l; - - bcf1_sync_alleles(line, &tmp); - line->unpack_size[1] = tmp.l - prev_len; prev_len = tmp.l; - - bcf1_sync_filter(line, &tmp); - line->unpack_size[2] = tmp.l - prev_len; - - bcf1_sync_info(line, &tmp); - line->shared = tmp; - } - else if ( line->d.shared_dirty ) - { - // The line was edited, update the BCF data block. - - if ( !(line->unpacked & BCF_UN_STR) ) bcf_unpack(line,BCF_UN_STR); - - // ptr_ori points to the original unchanged BCF data. - uint8_t *ptr_ori = (uint8_t *) line->shared.s; - - // ID: single typed string - if ( line->d.shared_dirty & BCF1_DIRTY_ID ) - bcf1_sync_id(line, &tmp); - else - kputsn_(ptr_ori, line->unpack_size[0], &tmp); - ptr_ori += line->unpack_size[0]; - line->unpack_size[0] = tmp.l; prev_len = tmp.l; - - // REF+ALT: list of typed strings - if ( line->d.shared_dirty & BCF1_DIRTY_ALS ) - bcf1_sync_alleles(line, &tmp); - else - { - kputsn_(ptr_ori, line->unpack_size[1], &tmp); - if ( !line->rlen && line->n_allele ) line->rlen = strlen(line->d.allele[0]); - } - ptr_ori += line->unpack_size[1]; - line->unpack_size[1] = tmp.l - prev_len; prev_len = tmp.l; - - if ( line->unpacked & BCF_UN_FLT ) - { - // FILTER: typed vector of integers - if ( line->d.shared_dirty & BCF1_DIRTY_FLT ) - bcf1_sync_filter(line, &tmp); - else if ( line->d.n_flt ) - kputsn_(ptr_ori, line->unpack_size[2], &tmp); - else - bcf_enc_vint(&tmp, 0, 0, -1); - ptr_ori += line->unpack_size[2]; - line->unpack_size[2] = tmp.l - prev_len; - - if ( line->unpacked & BCF_UN_INFO ) - { - // INFO: pairs of typed vectors - if ( line->d.shared_dirty & BCF1_DIRTY_INF ) - { - bcf1_sync_info(line, &tmp); - ptr_ori = (uint8_t*)line->shared.s + line->shared.l; - } - } - } - - int size = line->shared.l - (size_t)ptr_ori + (size_t)line->shared.s; - if ( size ) kputsn_(ptr_ori, size, &tmp); - - free(line->shared.s); - line->shared = tmp; - } - if ( line->shared.s != shared_ori && line->unpacked & BCF_UN_INFO ) - { - // Reallocated line->shared.s block invalidated line->d.info[].vptr pointers - size_t off_new = line->unpack_size[0] + line->unpack_size[1] + line->unpack_size[2]; - int i; - for (i=0; in_info; i++) - { - uint8_t *vptr_free = line->d.info[i].vptr_free ? line->d.info[i].vptr - line->d.info[i].vptr_off : NULL; - line->d.info[i].vptr = (uint8_t*) line->shared.s + off_new + line->d.info[i].vptr_off; - off_new += line->d.info[i].vptr_len + line->d.info[i].vptr_off; - if ( vptr_free ) - { - free(vptr_free); - line->d.info[i].vptr_free = 0; - } - } - } - - if ( line->n_sample && line->n_fmt && (!line->indiv.l || line->d.indiv_dirty) ) - { - // The genotype fields changed or are not present - tmp.l = tmp.m = 0; tmp.s = NULL; - int i, irm = -1; - for (i=0; in_fmt; i++) - { - bcf_fmt_t *fmt = &line->d.fmt[i]; - if ( !fmt->p ) - { - // marked for removal - if ( irm < 0 ) irm = i; - continue; - } - kputsn_(fmt->p - fmt->p_off, fmt->p_len + fmt->p_off, &tmp); - if ( irm >=0 ) - { - bcf_fmt_t tfmt = line->d.fmt[irm]; line->d.fmt[irm] = line->d.fmt[i]; line->d.fmt[i] = tfmt; - while ( irm<=i && line->d.fmt[irm].p ) irm++; - } - - } - if ( irm>=0 ) line->n_fmt = irm; - free(line->indiv.s); - line->indiv = tmp; - - // Reallocated line->indiv.s block invalidated line->d.fmt[].p pointers - size_t off_new = 0; - for (i=0; in_fmt; i++) - { - uint8_t *p_free = line->d.fmt[i].p_free ? line->d.fmt[i].p - line->d.fmt[i].p_off : NULL; - line->d.fmt[i].p = (uint8_t*) line->indiv.s + off_new + line->d.fmt[i].p_off; - off_new += line->d.fmt[i].p_len + line->d.fmt[i].p_off; - if ( p_free ) - { - free(p_free); - line->d.fmt[i].p_free = 0; - } - } - } - if ( !line->n_sample ) line->n_fmt = 0; - line->d.shared_dirty = line->d.indiv_dirty = 0; - return 0; -} - -bcf1_t *bcf_copy(bcf1_t *dst, bcf1_t *src) -{ - bcf1_sync(src); - - bcf_clear(dst); - dst->rid = src->rid; - dst->pos = src->pos; - dst->rlen = src->rlen; - dst->qual = src->qual; - dst->n_info = src->n_info; dst->n_allele = src->n_allele; - dst->n_fmt = src->n_fmt; dst->n_sample = src->n_sample; - - if ( dst->shared.m < src->shared.l ) - { - dst->shared.s = (char*) realloc(dst->shared.s, src->shared.l); - dst->shared.m = src->shared.l; - } - dst->shared.l = src->shared.l; - memcpy(dst->shared.s,src->shared.s,dst->shared.l); - - if ( dst->indiv.m < src->indiv.l ) - { - dst->indiv.s = (char*) realloc(dst->indiv.s, src->indiv.l); - dst->indiv.m = src->indiv.l; - } - dst->indiv.l = src->indiv.l; - memcpy(dst->indiv.s,src->indiv.s,dst->indiv.l); - - return dst; -} -bcf1_t *bcf_dup(bcf1_t *src) -{ - bcf1_t *out = bcf_init1(); - return bcf_copy(out, src); -} - -int bcf_write(htsFile *hfp, bcf_hdr_t *h, bcf1_t *v) -{ - if ( h->dirty ) { - if (bcf_hdr_sync(h) < 0) return -1; - } - if ( bcf_hdr_nsamples(h)!=v->n_sample ) - { - hts_log_error("Broken VCF record, the number of columns at %s:%"PRIhts_pos" does not match the number of samples (%d vs %d)", - bcf_seqname_safe(h,v), v->pos+1, v->n_sample, bcf_hdr_nsamples(h)); - return -1; - } - - if ( hfp->format.format == vcf || hfp->format.format == text_format ) - return vcf_write(hfp,h,v); - - if ( v->errcode & ~BCF_ERR_LIMITS ) // todo: unsure about the other BCF_ERR_LIMITS branches in vcf_parse_format_alloc4() - { - // vcf_parse1() encountered a new contig or tag, undeclared in the - // header. At this point, the header must have been printed, - // proceeding would lead to a broken BCF file. Errors must be checked - // and cleared by the caller before we can proceed. - char errdescription[1024] = ""; - hts_log_error("Unchecked error (%d %s) at %s:%"PRIhts_pos, v->errcode, bcf_strerror(v->errcode, errdescription, sizeof(errdescription)), bcf_seqname_safe(h,v), v->pos+1); - return -1; - } - bcf1_sync(v); // check if the BCF record was modified - - if ( v->unpacked & BCF_IS_64BIT ) - { - hts_log_error("Data at %s:%"PRIhts_pos" contains 64-bit values not representable in BCF. Please use VCF instead", bcf_seqname_safe(h,v), v->pos+1); - return -1; - } - - BGZF *fp = hfp->fp.bgzf; - uint8_t x[32]; - u32_to_le(v->shared.l + 24, x); // to include six 32-bit integers - u32_to_le(v->indiv.l, x + 4); - i32_to_le(v->rid, x + 8); - u32_to_le(v->pos, x + 12); - u32_to_le(v->rlen, x + 16); - float_to_le(v->qual, x + 20); - u16_to_le(v->n_info, x + 24); - u16_to_le(v->n_allele, x + 26); - u32_to_le((uint32_t)v->n_fmt<<24 | (v->n_sample & 0xffffff), x + 28); - if ( bgzf_write(fp, x, 32) != 32 ) return -1; - if ( bgzf_write(fp, v->shared.s, v->shared.l) != v->shared.l ) return -1; - if ( bgzf_write(fp, v->indiv.s, v->indiv.l) != v->indiv.l ) return -1; - - if (hfp->idx) { - if (bgzf_idx_push(fp, hfp->idx, v->rid, v->pos, v->pos + v->rlen, - bgzf_tell(fp), 1) < 0) - return -1; - } - - return 0; -} - -/********************** - *** VCF header I/O *** - **********************/ - -static int add_missing_contig_hrec(bcf_hdr_t *h, const char *name) { - bcf_hrec_t *hrec = calloc(1, sizeof(bcf_hrec_t)); - int save_errno; - if (!hrec) goto fail; - - hrec->key = strdup("contig"); - if (!hrec->key) goto fail; - - if (bcf_hrec_add_key(hrec, "ID", strlen("ID")) < 0) goto fail; - if (bcf_hrec_set_val(hrec, hrec->nkeys-1, name, strlen(name), 0) < 0) - goto fail; - if (bcf_hdr_add_hrec(h, hrec) < 0) - goto fail; - return 0; - - fail: - save_errno = errno; - hts_log_error("%s", strerror(errno)); - if (hrec) bcf_hrec_destroy(hrec); - errno = save_errno; - return -1; -} - -bcf_hdr_t *vcf_hdr_read(htsFile *fp) -{ - kstring_t txt, *s = &fp->line; - int ret; - bcf_hdr_t *h; - tbx_t *idx = NULL; - const char **names = NULL; - h = bcf_hdr_init("r"); - if (!h) { - hts_log_error("Failed to allocate bcf header"); - return NULL; - } - txt.l = txt.m = 0; txt.s = 0; - while ((ret = hts_getline(fp, KS_SEP_LINE, s)) >= 0) { - int e = 0; - if (s->l == 0) continue; - if (s->s[0] != '#') { - hts_log_error("No sample line"); - goto error; - } - if (s->s[1] != '#' && fp->fn_aux) { // insert contigs here - kstring_t tmp = { 0, 0, NULL }; - hFILE *f = hopen(fp->fn_aux, "r"); - if (f == NULL) { - hts_log_error("Couldn't open \"%s\"", fp->fn_aux); - goto error; - } - while (tmp.l = 0, kgetline(&tmp, (kgets_func *) hgets, f) >= 0) { - char *tab = strchr(tmp.s, '\t'); - if (tab == NULL) continue; - e |= (kputs("##contig=\n", 2, &txt) < 0); - } - free(tmp.s); - if (hclose(f) != 0) { - hts_log_error("Error on closing %s", fp->fn_aux); - goto error; - } - if (e) goto error; - } - if (kputsn(s->s, s->l, &txt) < 0) goto error; - if (kputc('\n', &txt) < 0) goto error; - if (s->s[1] != '#') break; - } - if ( ret < -1 ) goto error; - if ( !txt.s ) - { - hts_log_error("Could not read the header"); - goto error; - } - if ( bcf_hdr_parse(h, txt.s) < 0 ) goto error; - - // check tabix index, are all contigs listed in the header? add the missing ones - idx = tbx_index_load3(fp->fn, NULL, HTS_IDX_SILENT_FAIL); - if ( idx ) - { - int i, n, need_sync = 0; - names = tbx_seqnames(idx, &n); - if (!names) goto error; - for (i=0; ivalue ) - { - int j, nout = 0; - e |= ksprintf(str, "##%s=<", hrec->key) < 0; - for (j=0; jnkeys; j++) - { - // do not output IDX if output is VCF - if ( !is_bcf && !strcmp("IDX",hrec->keys[j]) ) continue; - if ( nout ) e |= kputc(',',str) < 0; - e |= ksprintf(str,"%s=%s", hrec->keys[j], hrec->vals[j]) < 0; - nout++; - } - e |= ksprintf(str,">\n") < 0; - } - else - e |= ksprintf(str,"##%s=%s\n", hrec->key,hrec->value) < 0; - - return e == 0 ? 0 : -1; -} - -int bcf_hrec_format(const bcf_hrec_t *hrec, kstring_t *str) -{ - return _bcf_hrec_format(hrec,0,str); -} - -int bcf_hdr_format(const bcf_hdr_t *hdr, int is_bcf, kstring_t *str) -{ - int i, r = 0; - for (i=0; inhrec; i++) - r |= _bcf_hrec_format(hdr->hrec[i], is_bcf, str) < 0; - - r |= ksprintf(str, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO") < 0; - if ( bcf_hdr_nsamples(hdr) ) - { - r |= ksprintf(str, "\tFORMAT") < 0; - for (i=0; isamples[i]) < 0; - } - r |= ksprintf(str, "\n") < 0; - - return r ? -1 : 0; -} - -char *bcf_hdr_fmt_text(const bcf_hdr_t *hdr, int is_bcf, int *len) -{ - kstring_t txt = {0,0,0}; - if (bcf_hdr_format(hdr, is_bcf, &txt) < 0) - return NULL; - if ( len ) *len = txt.l; - return txt.s; -} - -const char **bcf_hdr_seqnames(const bcf_hdr_t *h, int *n) -{ - vdict_t *d = (vdict_t*)h->dict[BCF_DT_CTG]; - int i, tid, m = kh_size(d); - const char **names = (const char**) calloc(m,sizeof(const char*)); - if ( !names ) - { - hts_log_error("Failed to allocate memory"); - *n = 0; - return NULL; - } - khint_t k; - for (k=kh_begin(d); k= m ) - { - // This can happen after a contig has been removed from BCF header via bcf_hdr_remove() - if ( hts_resize(const char*, tid + 1, &m, &names, HTS_RESIZE_CLEAR)<0 ) - { - hts_log_error("Failed to allocate memory"); - *n = 0; - free(names); - return NULL; - } - m = tid + 1; - } - names[tid] = kh_key(d,k); - } - // ensure there are no gaps - for (i=0,tid=0; tidformat.compression!=no_compression ) { - ret = bgzf_write(fp->fp.bgzf, htxt.s, htxt.l); - if (bgzf_flush(fp->fp.bgzf) != 0) return -1; - } else { - ret = hwrite(fp->fp.hfile, htxt.s, htxt.l); - } - free(htxt.s); - return ret<0 ? -1 : 0; -} - -/*********************** - *** Typed value I/O *** - ***********************/ - -int bcf_enc_vint(kstring_t *s, int n, int32_t *a, int wsize) -{ - int32_t max = INT32_MIN, min = INT32_MAX; - int i; - if (n <= 0) { - return bcf_enc_size(s, 0, BCF_BT_NULL); - } else if (n == 1) { - return bcf_enc_int1(s, a[0]); - } else { - if (wsize <= 0) wsize = n; - - // Equivalent to: - // for (i = 0; i < n; ++i) { - // if (a[i] == bcf_int32_missing || a[i] == bcf_int32_vector_end ) - // continue; - // if (max < a[i]) max = a[i]; - // if (min > a[i]) min = a[i]; - // } - int max4[4] = {INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN}; - int min4[4] = {INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX}; - for (i = 0; i < (n&~3); i+=4) { - // bcf_int32_missing == INT32_MIN and - // bcf_int32_vector_end == INT32_MIN+1. - // We skip these, but can mostly avoid explicit checking - if (max4[0] < a[i+0]) max4[0] = a[i+0]; - if (max4[1] < a[i+1]) max4[1] = a[i+1]; - if (max4[2] < a[i+2]) max4[2] = a[i+2]; - if (max4[3] < a[i+3]) max4[3] = a[i+3]; - if (min4[0] > a[i+0] && a[i+0] > INT32_MIN+1) min4[0] = a[i+0]; - if (min4[1] > a[i+1] && a[i+1] > INT32_MIN+1) min4[1] = a[i+1]; - if (min4[2] > a[i+2] && a[i+2] > INT32_MIN+1) min4[2] = a[i+2]; - if (min4[3] > a[i+3] && a[i+3] > INT32_MIN+1) min4[3] = a[i+3]; - } - min = min4[0]; - if (min > min4[1]) min = min4[1]; - if (min > min4[2]) min = min4[2]; - if (min > min4[3]) min = min4[3]; - max = max4[0]; - if (max < max4[1]) max = max4[1]; - if (max < max4[2]) max = max4[2]; - if (max < max4[3]) max = max4[3]; - for (; i < n; ++i) { - if (max < a[i]) max = a[i]; - if (min > a[i] && a[i] > INT32_MIN+1) min = a[i]; - } - - if (max <= BCF_MAX_BT_INT8 && min >= BCF_MIN_BT_INT8) { - if (bcf_enc_size(s, wsize, BCF_BT_INT8) < 0 || - ks_resize(s, s->l + n) < 0) - return -1; - uint8_t *p = (uint8_t *) s->s + s->l; - for (i = 0; i < n; ++i, p++) { - if ( a[i]==bcf_int32_vector_end ) *p = bcf_int8_vector_end; - else if ( a[i]==bcf_int32_missing ) *p = bcf_int8_missing; - else *p = a[i]; - } - s->l += n; - } else if (max <= BCF_MAX_BT_INT16 && min >= BCF_MIN_BT_INT16) { - uint8_t *p; - if (bcf_enc_size(s, wsize, BCF_BT_INT16) < 0 || - ks_resize(s, s->l + n * sizeof(int16_t)) < 0) - return -1; - p = (uint8_t *) s->s + s->l; - for (i = 0; i < n; ++i) - { - int16_t x; - if ( a[i]==bcf_int32_vector_end ) x = bcf_int16_vector_end; - else if ( a[i]==bcf_int32_missing ) x = bcf_int16_missing; - else x = a[i]; - i16_to_le(x, p); - p += sizeof(int16_t); - } - s->l += n * sizeof(int16_t); - } else { - uint8_t *p; - if (bcf_enc_size(s, wsize, BCF_BT_INT32) < 0 || - ks_resize(s, s->l + n * sizeof(int32_t)) < 0) - return -1; - p = (uint8_t *) s->s + s->l; - for (i = 0; i < n; ++i) { - i32_to_le(a[i], p); - p += sizeof(int32_t); - } - s->l += n * sizeof(int32_t); - } - } - - return 0; -} - -#ifdef VCF_ALLOW_INT64 -static int bcf_enc_long1(kstring_t *s, int64_t x) { - uint32_t e = 0; - if (x <= BCF_MAX_BT_INT32 && x >= BCF_MIN_BT_INT32) - return bcf_enc_int1(s, x); - if (x == bcf_int64_vector_end) { - e |= bcf_enc_size(s, 1, BCF_BT_INT8); - e |= kputc(bcf_int8_vector_end, s) < 0; - } else if (x == bcf_int64_missing) { - e |= bcf_enc_size(s, 1, BCF_BT_INT8); - e |= kputc(bcf_int8_missing, s) < 0; - } else { - e |= bcf_enc_size(s, 1, BCF_BT_INT64); - e |= ks_expand(s, 8); - if (e == 0) { u64_to_le(x, (uint8_t *) s->s + s->l); s->l += 8; } - } - return e == 0 ? 0 : -1; -} -#endif - -static inline int serialize_float_array(kstring_t *s, size_t n, const float *a) { - uint8_t *p; - size_t i; - size_t bytes = n * sizeof(float); - - if (bytes / sizeof(float) != n) return -1; - if (ks_resize(s, s->l + bytes) < 0) return -1; - - p = (uint8_t *) s->s + s->l; - for (i = 0; i < n; i++) { - float_to_le(a[i], p); - p += sizeof(float); - } - s->l += bytes; - - return 0; -} - -int bcf_enc_vfloat(kstring_t *s, int n, float *a) -{ - assert(n >= 0); - bcf_enc_size(s, n, BCF_BT_FLOAT); - serialize_float_array(s, n, a); - return 0; // FIXME: check for errs in this function -} - -int bcf_enc_vchar(kstring_t *s, int l, const char *a) -{ - bcf_enc_size(s, l, BCF_BT_CHAR); - kputsn(a, l, s); - return 0; // FIXME: check for errs in this function -} - -// Special case of n==1 as it also occurs quite often in FORMAT data. -// This version is also small enough to get inlined. -static inline int bcf_fmt_array1(kstring_t *s, int type, void *data) { - uint32_t e = 0; - uint8_t *p = (uint8_t *)data; - int32_t v; - - // helps gcc more than clang here. In billions of cycles: - // bcf_fmt_array1 bcf_fmt_array - // gcc7: 23.2 24.3 - // gcc13: 21.6 23.0 - // clang13: 27.1 27.8 - switch (type) { - case BCF_BT_CHAR: - e |= kputc_(*p == bcf_str_missing ? '.' : *p, s) < 0; - break; - - case BCF_BT_INT8: - if (*(int8_t *)p != bcf_int8_vector_end) { - e |= ((*(int8_t *)p == bcf_int8_missing) - ? kputc_('.', s) - : kputw(*(int8_t *)p, s)) < 0; - } - break; - case BCF_BT_INT16: - v = le_to_i16(p); - if (v != bcf_int16_vector_end) { - e |= (v == bcf_int16_missing - ? kputc_('.', s) - : kputw(v, s)) < 0; - } - break; - - case BCF_BT_INT32: - v = le_to_i32(p); - if (v != bcf_int32_vector_end) { - e |= (v == bcf_int32_missing - ? kputc_('.', s) - : kputw(v, s)) < 0; - } - break; - - case BCF_BT_FLOAT: - v = le_to_u32(p); - if (v != bcf_float_vector_end) { - e |= (v == bcf_float_missing - ? kputc_('.', s) - : kputd(le_to_float(p), s)) < 0; - } - break; - - default: - hts_log_error("Unexpected type %d", type); - return -1; - } - - return e == 0 ? 0 : -1; -} - -int bcf_fmt_array(kstring_t *s, int n, int type, void *data) -{ - int j = 0; - uint32_t e = 0; - if (n == 0) { - return kputc_('.', s) >= 0 ? 0 : -1; - } - - if (type == BCF_BT_CHAR) - { - char *p = (char *)data; - - // Note bcf_str_missing is already accounted for in n==0 above. - if (n >= 8) { - char *p_end = memchr(p, 0, n); - e |= kputsn(p, p_end ? p_end-p : n, s) < 0; - } else { - for (j = 0; j < n && *p; ++j, ++p) - e |= kputc(*p, s) < 0; - } - } - else - { - #define BRANCH(type_t, convert, is_missing, is_vector_end, kprint) { \ - uint8_t *p = (uint8_t *) data; \ - for (j=0; jid[BCF_DT_ID][key] vdict - int max_m; // number of elements in field array (ie commas) - int size; // field size (max_l or max_g*4 if is_gt) - int offset; // offset of buf into h->mem - uint32_t is_gt:1, // is genotype - max_g:31; // maximum number of genotypes - uint32_t max_l; // length of field - uint32_t y; // h->id[0][fmt[j].key].val->info[BCF_HL_FMT] - uint8_t *buf; // Pointer into h->mem -} fmt_aux_t; - -// fmt_aux_t field notes: -// max_* are biggest sizes of the various FORMAT fields across all samples. -// We use these after pivoting the data to ensure easy random access -// of a specific sample. -// -// max_m is only used for type BCF_HT_REAL or BCF_HT_INT -// max_g is only used for is_gt == 1 (will be BCF_HT_STR) -// max_l is only used for is_gt == 0 (will be BCF_HT_STR) -// -// These are computed in vcf_parse_format_max3 and used in -// vcf_parse_format_alloc4 to get the size. -// -// size is computed from max_g, max_l, max_m and is_gt. Once computed -// the max values are never accessed again. -// -// In theory all 4 vars could be coalesced into a single variable, but this -// significantly harms speed (even if done via a union). It's about 25-30% -// slower. - -static inline int align_mem(kstring_t *s) -{ - int e = 0; - if (s->l&7) { - uint64_t zero = 0; - e = kputsn((char*)&zero, 8 - (s->l&7), s) < 0; - } - return e == 0 ? 0 : -1; -} - -#define MAX_N_FMT 255 /* Limited by size of bcf1_t n_fmt field */ - -// detect FORMAT "." -static int vcf_parse_format_empty1(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v, - const char *p, const char *q) { - const char *end = s->s + s->l; - if ( q>=end ) - { - hts_log_error("FORMAT column with no sample columns starting at %s:%"PRIhts_pos"", bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_NCOLS; - return -1; - } - - v->n_fmt = 0; - if ( p[0]=='.' && p[1]==0 ) // FORMAT field is empty "." - { - v->n_sample = bcf_hdr_nsamples(h); - return 1; - } - - return 0; -} - -// get format information from the dictionary -static int vcf_parse_format_dict2(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v, - const char *p, const char *q, fmt_aux_t *fmt) { - const vdict_t *d = (vdict_t*)h->dict[BCF_DT_ID]; - char *t; - int j; - ks_tokaux_t aux1; - - for (j = 0, t = kstrtok(p, ":", &aux1); t; t = kstrtok(0, 0, &aux1), ++j) { - if (j >= MAX_N_FMT) { - v->errcode |= BCF_ERR_LIMITS; - hts_log_error("FORMAT column at %s:%"PRIhts_pos" lists more identifiers than htslib can handle", - bcf_seqname_safe(h,v), v->pos+1); - return -1; - } - - *(char*)aux1.p = 0; - khint_t k = kh_get(vdict, d, t); - if (k == kh_end(d) || kh_val(d, k).info[BCF_HL_FMT] == 15) { - if ( t[0]=='.' && t[1]==0 ) - { - hts_log_error("Invalid FORMAT tag name '.' at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_TAG_INVALID; - return -1; - } - hts_log_warning("FORMAT '%s' at %s:%"PRIhts_pos" is not defined in the header, assuming Type=String", t, bcf_seqname_safe(h,v), v->pos+1); - kstring_t tmp = {0,0,0}; - int l; - ksprintf(&tmp, "##FORMAT=", t); - bcf_hrec_t *hrec = bcf_hdr_parse_line(h,tmp.s,&l); - free(tmp.s); - int res = hrec ? bcf_hdr_add_hrec((bcf_hdr_t*)h, hrec) : -1; - if (res < 0) bcf_hrec_destroy(hrec); - if (res > 0) res = bcf_hdr_sync((bcf_hdr_t*)h); - - k = kh_get(vdict, d, t); - v->errcode |= BCF_ERR_TAG_UNDEF; - if (res || k == kh_end(d)) { - hts_log_error("Could not add dummy header for FORMAT '%s' at %s:%"PRIhts_pos, t, bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_TAG_INVALID; - return -1; - } - } - fmt[j].max_l = fmt[j].max_m = fmt[j].max_g = 0; - fmt[j].key = kh_val(d, k).id; - fmt[j].is_gt = (t[0] == 'G' && t[1] == 'T' && !t[2]); - fmt[j].y = h->id[0][fmt[j].key].val->info[BCF_HL_FMT]; - v->n_fmt++; - } - return 0; -} - -// compute max -static int vcf_parse_format_max3(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v, - char *p, char *q, fmt_aux_t *fmt) { - int n_sample_ori = -1; - char *r = q + 1; // r: position in the format string - int l = 0, m = 1, g = 1, j; - v->n_sample = 0; // m: max vector size, l: max field len, g: max number of alleles - const char *end = s->s + s->l; - - while ( rkeep_samples ) - { - n_sample_ori++; - if ( !bit_array_test(h->keep_samples,n_sample_ori) ) - { - while ( *r!='\t' && ris_gt) g++; - break; - - case '\t': - *r = 0; // fall through - - default: // valid due to while loop above. - case '\0': - case ':': - l = r - r_start; r_start = r; - if (f->max_m < m) f->max_m = m; - if (f->max_l < l) f->max_l = l; - if (f->is_gt && f->max_g < g) f->max_g = g; - l = 0, m = g = 1; - if ( *r==':' ) { - j++; f++; - if ( j>=v->n_fmt ) { - hts_log_error("Incorrect number of FORMAT fields at %s:%"PRIhts_pos"", - h->id[BCF_DT_CTG][v->rid].key, v->pos+1); - v->errcode |= BCF_ERR_NCOLS; - return -1; - } - } else goto end_for; - break; - } - if ( r>=end ) break; - r++; - } - end_for: - v->n_sample++; - if ( v->n_sample == bcf_hdr_nsamples(h) ) break; - r++; - } - - return 0; -} - -// allocate memory for arrays -static int vcf_parse_format_alloc4(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v, - const char *p, const char *q, - fmt_aux_t *fmt) { - kstring_t *mem = (kstring_t*)&h->mem; - - int j; - for (j = 0; j < v->n_fmt; ++j) { - fmt_aux_t *f = &fmt[j]; - if ( !f->max_m ) f->max_m = 1; // omitted trailing format field - - if ((f->y>>4&0xf) == BCF_HT_STR) { - f->size = f->is_gt? f->max_g << 2 : f->max_l; - } else if ((f->y>>4&0xf) == BCF_HT_REAL || (f->y>>4&0xf) == BCF_HT_INT) { - f->size = f->max_m << 2; - } else { - hts_log_error("The format type %d at %s:%"PRIhts_pos" is currently not supported", f->y>>4&0xf, bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_TAG_INVALID; - return -1; - } - - if (align_mem(mem) < 0) { - hts_log_error("Memory allocation failure at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_LIMITS; - return -1; - } - - // Limit the total memory to ~2Gb per VCF row. This should mean - // malformed VCF data is less likely to take excessive memory and/or - // time. - if ((uint64_t) mem->l + v->n_sample * (uint64_t)f->size > INT_MAX) { - static int warned = 0; - if ( !warned ) hts_log_warning("Excessive memory required by FORMAT fields at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); - warned = 1; - v->errcode |= BCF_ERR_LIMITS; - f->size = -1; - f->offset = 0; - continue; - } - - f->offset = mem->l; - if (ks_resize(mem, mem->l + v->n_sample * (size_t)f->size) < 0) { - hts_log_error("Memory allocation failure at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_LIMITS; - return -1; - } - mem->l += v->n_sample * f->size; - } - - { - int j; - for (j = 0; j < v->n_fmt; ++j) - fmt[j].buf = (uint8_t*)mem->s + fmt[j].offset; - } - - return 0; -} - -// Fill the sample fields -static int vcf_parse_format_fill5(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v, - const char *p, const char *q, fmt_aux_t *fmt) { - static int extreme_val_warned = 0; - int n_sample_ori = -1; - // At beginning of the loop t points to the first char of a format - const char *t = q + 1; - int m = 0; // m: sample id - const int nsamples = bcf_hdr_nsamples(h); - - const char *end = s->s + s->l; - while ( tkeep_samples ) - { - n_sample_ori++; - if ( !bit_array_test(h->keep_samples,n_sample_ori) ) - { - while ( *t && ty>>4&0xf; - if (!z->buf) { - hts_log_error("Memory allocation failure for FORMAT field type %d at %s:%"PRIhts_pos, - z->y>>4&0xf, bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_LIMITS; - return -1; - } - - if ( z->size==-1 ) - { - // this field is to be ignored, it's too big - while ( *t != ':' && *t ) t++; - } - else if (htype == BCF_HT_STR) { - int l; - if (z->is_gt) { - // Genotypes. - // ([|/])+... where is [0-9]+ or ".". - int32_t is_phased = 0; - uint32_t *x = (uint32_t*)(z->buf + z->size * (size_t)m); - uint32_t unreadable = 0; - uint32_t max = 0; - int overflow = 0; - for (l = 0;; ++t) { - if (*t == '.') { - ++t, x[l++] = is_phased; - } else { - const char *tt = t; - uint32_t val; - // Or "v->n_allele < 10", but it doesn't - // seem to be any faster and this feels safer. - if (*t >= '0' && *t <= '9' && - !(t[1] >= '0' && t[1] <= '9')) { - val = *t++ - '0'; - } else { - val = hts_str2uint(t, (char **)&t, - sizeof(val) * CHAR_MAX - 2, - &overflow); - unreadable |= tt == t; - } - if (max < val) max = val; - x[l++] = (val + 1) << 1 | is_phased; - } - is_phased = (*t == '|'); - if (*t != '|' && *t != '/') break; - } - // Possibly check max against v->n_allele instead? - if (overflow || max > (INT32_MAX >> 1) - 1) { - hts_log_error("Couldn't read GT data: value too large at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); - return -1; - } - if (unreadable) { - hts_log_error("Couldn't read GT data: value not a number or '.' at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); - return -1; - } - if ( !l ) x[l++] = 0; // An empty field, insert missing value - for (; l < z->size>>2; ++l) - x[l] = bcf_int32_vector_end; - - } else { - // Otherwise arbitrary strings - char *x = (char*)z->buf + z->size * (size_t)m; - for (l = 0; *t != ':' && *t; ++t) - x[l++] = *t; - if (z->size > l) - memset(&x[l], 0, (z->size-l) * sizeof(*x)); - } - - } else if (htype == BCF_HT_INT) { - // One or more integers in an array - int32_t *x = (int32_t*)(z->buf + z->size * (size_t)m); - int l; - for (l = 0;; ++t) { - if (*t == '.') { - x[l++] = bcf_int32_missing, ++t; // ++t to skip "." - } else { - int overflow = 0; - char *te; - long int tmp_val = hts_str2int(t, &te, sizeof(tmp_val)*CHAR_BIT, &overflow); - if ( te==t || overflow || tmp_valBCF_MAX_BT_INT32 ) - { - if ( !extreme_val_warned ) - { - hts_log_warning("Extreme FORMAT/%s value encountered and set to missing at %s:%"PRIhts_pos, - h->id[BCF_DT_ID][fmt[j-1].key].key, bcf_seqname_safe(h,v), v->pos+1); - extreme_val_warned = 1; - } - tmp_val = bcf_int32_missing; - } - x[l++] = tmp_val; - t = te; - } - if (*t != ',') break; - } - if ( !l ) - x[l++] = bcf_int32_missing; - for (; l < z->size>>2; ++l) - x[l] = bcf_int32_vector_end; - - } else if (htype == BCF_HT_REAL) { - // One of more floating point values in an array - float *x = (float*)(z->buf + z->size * (size_t)m); - int l; - for (l = 0;; ++t) { - if (*t == '.' && !isdigit_c(t[1])) { - bcf_float_set_missing(x[l++]), ++t; // ++t to skip "." - } else { - int overflow = 0; - char *te; - float tmp_val = hts_str2dbl(t, &te, &overflow); - if ( (te==t || overflow) && !extreme_val_warned ) - { - hts_log_warning("Extreme FORMAT/%s value encountered at %s:%"PRIhts_pos, h->id[BCF_DT_ID][fmt[j-1].key].key, bcf_seqname(h,v), v->pos+1); - extreme_val_warned = 1; - } - x[l++] = tmp_val; - t = te; - } - if (*t != ',') break; - } - if ( !l ) - // An empty field, insert missing value - bcf_float_set_missing(x[l++]); - for (; l < z->size>>2; ++l) - bcf_float_set_vector_end(x[l]); - } else { - hts_log_error("Unknown FORMAT field type %d at %s:%"PRIhts_pos, htype, bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_TAG_INVALID; - return -1; - } - - if (*t == '\0') { - break; - } - else if (*t == ':') { - t++; - } - else { - char buffer[8]; - hts_log_error("Invalid character %s in '%s' FORMAT field at %s:%"PRIhts_pos"", - hts_strprint(buffer, sizeof buffer, '\'', t, 1), - h->id[BCF_DT_ID][z->key].key, bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_CHAR; - return -1; - } - } - - // fill end-of-vector values - for (; j < v->n_fmt; ++j) { - fmt_aux_t *z = &fmt[j]; - const int htype = z->y>>4&0xf; - int l; - if (htype == BCF_HT_STR) { - if (z->is_gt) { - int32_t *x = (int32_t*)(z->buf + z->size * (size_t)m); - if (z->size) x[0] = bcf_int32_missing; - for (l = 1; l < z->size>>2; ++l) x[l] = bcf_int32_vector_end; - } else { - char *x = (char*)z->buf + z->size * (size_t)m; - if ( z->size ) { - x[0] = '.'; - memset(&x[1], 0, (z->size-1) * sizeof(*x)); - } - } - } else if (htype == BCF_HT_INT) { - int32_t *x = (int32_t*)(z->buf + z->size * (size_t)m); - x[0] = bcf_int32_missing; - for (l = 1; l < z->size>>2; ++l) x[l] = bcf_int32_vector_end; - } else if (htype == BCF_HT_REAL) { - float *x = (float*)(z->buf + z->size * (size_t)m); - bcf_float_set_missing(x[0]); - for (l = 1; l < z->size>>2; ++l) bcf_float_set_vector_end(x[l]); - } - } - - m++; t++; - } - - return 0; -} - -// write individual genotype information -static int vcf_parse_format_gt6(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v, - const char *p, const char *q, fmt_aux_t *fmt) { - kstring_t *str = &v->indiv; - int i, need_downsize = 0; - if (v->n_sample > 0) { - for (i = 0; i < v->n_fmt; ++i) { - fmt_aux_t *z = &fmt[i]; - if ( z->size==-1 ) { - need_downsize = 1; - continue; - } - bcf_enc_int1(str, z->key); - if ((z->y>>4&0xf) == BCF_HT_STR && !z->is_gt) { - bcf_enc_size(str, z->size, BCF_BT_CHAR); - kputsn((char*)z->buf, z->size * (size_t)v->n_sample, str); - } else if ((z->y>>4&0xf) == BCF_HT_INT || z->is_gt) { - bcf_enc_vint(str, (z->size>>2) * v->n_sample, (int32_t*)z->buf, z->size>>2); - } else { - bcf_enc_size(str, z->size>>2, BCF_BT_FLOAT); - if (serialize_float_array(str, (z->size>>2) * (size_t)v->n_sample, - (float *) z->buf) != 0) { - v->errcode |= BCF_ERR_LIMITS; - hts_log_error("Out of memory at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); - return -1; - } - } - } - - } - if ( need_downsize ) { - i = 1; - while ( i < v->n_fmt ) { - if ( fmt[i].size==-1 ) - { - memmove(&fmt[i-1],&fmt[i],sizeof(*fmt)); - v->n_fmt--; - } - else - i++; - } - } - - return 0; -} - -// validity checking -static int vcf_parse_format_check7(const bcf_hdr_t *h, bcf1_t *v) { - if ( v->n_sample!=bcf_hdr_nsamples(h) ) - { - hts_log_error("Number of columns at %s:%"PRIhts_pos" does not match the number of samples (%d vs %d)", - bcf_seqname_safe(h,v), v->pos+1, v->n_sample, bcf_hdr_nsamples(h)); - v->errcode |= BCF_ERR_NCOLS; - return -1; - } - if ( v->indiv.l > 0xffffffff ) - { - hts_log_error("The FORMAT at %s:%"PRIhts_pos" is too long", bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_LIMITS; - - // Error recovery: return -1 if this is a critical error or 0 if we want to ignore the FORMAT and proceed - v->n_fmt = 0; - return -1; - } - - return 0; -} - -// p,q is the start and the end of the FORMAT field -static int vcf_parse_format(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v, - char *p, char *q) -{ - if ( !bcf_hdr_nsamples(h) ) return 0; - kstring_t *mem = (kstring_t*)&h->mem; - mem->l = 0; - - fmt_aux_t fmt[MAX_N_FMT]; - - // detect FORMAT "." - int ret; // +ve = ok, -ve = err - if ((ret = vcf_parse_format_empty1(s, h, v, p, q))) - return ret ? 0 : -1; - - // get format information from the dictionary - if (vcf_parse_format_dict2(s, h, v, p, q, fmt) < 0) - return -1; - - // FORMAT data is per-sample A:B:C A:B:C A:B:C ... but in memory it is - // stored as per-type arrays AAA... BBB... CCC... This is basically - // a data rotation or pivot. - - // The size of elements in the array grow to their maximum needed, - // permitting fast random access. This means however we have to first - // scan the whole FORMAT line to find the maximum of each type, and - // then scan it again to find the store the data. - // We break this down into compute-max, allocate, fill-out-buffers - - // TODO: ? - // The alternative would be to pivot on the first pass, with fixed - // size entries for numerics and concatenated strings otherwise, also - // tracking maximum sizes. Then on a second pass we reallocate and - // copy the data again to a uniformly sized array. Two passes through - // memory, but without doubling string parsing. - - // compute max - if (vcf_parse_format_max3(s, h, v, p, q, fmt) < 0) - return -1; - - // allocate memory for arrays - if (vcf_parse_format_alloc4(s, h, v, p, q, fmt) < 0) - return -1; - - // fill the sample fields; at beginning of the loop - if (vcf_parse_format_fill5(s, h, v, p, q, fmt) < 0) - return -1; - - // write individual genotype information - if (vcf_parse_format_gt6(s, h, v, p, q, fmt) < 0) - return -1; - - // validity checking - if (vcf_parse_format_check7(h, v) < 0) - return -1; - - return 0; -} - -static khint_t fix_chromosome(const bcf_hdr_t *h, vdict_t *d, const char *p) { - // Simple error recovery for chromosomes not defined in the header. It will not help when VCF header has - // been already printed, but will enable tools like vcfcheck to proceed. - - kstring_t tmp = {0,0,0}; - khint_t k; - int l; - if (ksprintf(&tmp, "##contig=", p) < 0) - return kh_end(d); - bcf_hrec_t *hrec = bcf_hdr_parse_line(h,tmp.s,&l); - free(tmp.s); - int res = hrec ? bcf_hdr_add_hrec((bcf_hdr_t*)h, hrec) : -1; - if (res < 0) bcf_hrec_destroy(hrec); - if (res > 0) res = bcf_hdr_sync((bcf_hdr_t*)h); - k = kh_get(vdict, d, p); - - return k; -} - -static int vcf_parse_filter(kstring_t *str, const bcf_hdr_t *h, bcf1_t *v, char *p, char *q) { - int i, n_flt = 1, max_n_flt = 0; - char *r, *t; - int32_t *a_flt = NULL; - ks_tokaux_t aux1; - khint_t k; - vdict_t *d = (vdict_t*)h->dict[BCF_DT_ID]; - // count the number of filters - if (*(q-1) == ';') *(q-1) = 0; - for (r = p; *r; ++r) - if (*r == ';') ++n_flt; - if (n_flt > max_n_flt) { - a_flt = malloc(n_flt * sizeof(*a_flt)); - if (!a_flt) { - hts_log_error("Could not allocate memory at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_LIMITS; // No appropriate code? - return -1; - } - max_n_flt = n_flt; - } - // add filters - for (t = kstrtok(p, ";", &aux1), i = 0; t; t = kstrtok(0, 0, &aux1)) { - *(char*)aux1.p = 0; - k = kh_get(vdict, d, t); - if (k == kh_end(d)) - { - // Simple error recovery for FILTERs not defined in the header. It will not help when VCF header has - // been already printed, but will enable tools like vcfcheck to proceed. - hts_log_warning("FILTER '%s' is not defined in the header", t); - kstring_t tmp = {0,0,0}; - int l; - ksprintf(&tmp, "##FILTER=", t); - bcf_hrec_t *hrec = bcf_hdr_parse_line(h,tmp.s,&l); - free(tmp.s); - int res = hrec ? bcf_hdr_add_hrec((bcf_hdr_t*)h, hrec) : -1; - if (res < 0) bcf_hrec_destroy(hrec); - if (res > 0) res = bcf_hdr_sync((bcf_hdr_t*)h); - k = kh_get(vdict, d, t); - v->errcode |= BCF_ERR_TAG_UNDEF; - if (res || k == kh_end(d)) { - hts_log_error("Could not add dummy header for FILTER '%s' at %s:%"PRIhts_pos, t, bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_TAG_INVALID; - free(a_flt); - return -1; - } - } - a_flt[i++] = kh_val(d, k).id; - } - - bcf_enc_vint(str, n_flt, a_flt, -1); - free(a_flt); - - return 0; -} - -static int vcf_parse_info(kstring_t *str, const bcf_hdr_t *h, bcf1_t *v, char *p, char *q) { - static int extreme_int_warned = 0, negative_rlen_warned = 0; - int max_n_val = 0, overflow = 0; - char *r, *key; - khint_t k; - vdict_t *d = (vdict_t*)h->dict[BCF_DT_ID]; - int32_t *a_val = NULL; - - v->n_info = 0; - if (*(q-1) == ';') *(q-1) = 0; - for (r = key = p;; ++r) { - int c; - char *val, *end; - while (*r > '=' || (*r != ';' && *r != '=' && *r != 0)) r++; - if (v->n_info == UINT16_MAX) { - hts_log_error("Too many INFO entries at %s:%"PRIhts_pos, - bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_LIMITS; - goto fail; - } - val = end = NULL; - c = *r; *r = 0; - if (c == '=') { - val = r + 1; - - for (end = val; *end != ';' && *end != 0; ++end); - c = *end; *end = 0; - } else end = r; - if ( !*key ) { if (c==0) break; r = end; key = r + 1; continue; } // faulty VCF, ";;" in the INFO - k = kh_get(vdict, d, key); - if (k == kh_end(d) || kh_val(d, k).info[BCF_HL_INFO] == 15) - { - hts_log_warning("INFO '%s' is not defined in the header, assuming Type=String", key); - kstring_t tmp = {0,0,0}; - int l; - ksprintf(&tmp, "##INFO=", key); - bcf_hrec_t *hrec = bcf_hdr_parse_line(h,tmp.s,&l); - free(tmp.s); - int res = hrec ? bcf_hdr_add_hrec((bcf_hdr_t*)h, hrec) : -1; - if (res < 0) bcf_hrec_destroy(hrec); - if (res > 0) res = bcf_hdr_sync((bcf_hdr_t*)h); - k = kh_get(vdict, d, key); - v->errcode |= BCF_ERR_TAG_UNDEF; - if (res || k == kh_end(d)) { - hts_log_error("Could not add dummy header for INFO '%s' at %s:%"PRIhts_pos, key, bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_TAG_INVALID; - goto fail; - } - } - uint32_t y = kh_val(d, k).info[BCF_HL_INFO]; - ++v->n_info; - bcf_enc_int1(str, kh_val(d, k).id); - if (val == 0) { - bcf_enc_size(str, 0, BCF_BT_NULL); - } else if ((y>>4&0xf) == BCF_HT_FLAG || (y>>4&0xf) == BCF_HT_STR) { // if Flag has a value, treat it as a string - bcf_enc_vchar(str, end - val, val); - } else { // int/float value/array - int i, n_val; - char *t, *te; - for (t = val, n_val = 1; *t; ++t) // count the number of values - if (*t == ',') ++n_val; - // Check both int and float size in one step for simplicity - if (n_val > max_n_val) { - int32_t *a_tmp = (int32_t *)realloc(a_val, n_val * sizeof(*a_val)); - if (!a_tmp) { - hts_log_error("Could not allocate memory at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_LIMITS; // No appropriate code? - goto fail; - } - a_val = a_tmp; - max_n_val = n_val; - } - if ((y>>4&0xf) == BCF_HT_INT) { - i = 0, t = val; - int64_t val1; - int is_int64 = 0; -#ifdef VCF_ALLOW_INT64 - if ( n_val==1 ) - { - overflow = 0; - long long int tmp_val = hts_str2int(val, &te, sizeof(tmp_val)*CHAR_BIT, &overflow); - if ( te==val ) tmp_val = bcf_int32_missing; - else if ( overflow || tmp_valBCF_MAX_BT_INT64 ) - { - if ( !extreme_int_warned ) - { - hts_log_warning("Extreme INFO/%s value encountered and set to missing at %s:%"PRIhts_pos,key,bcf_seqname_safe(h,v), v->pos+1); - extreme_int_warned = 1; - } - tmp_val = bcf_int32_missing; - } - else - is_int64 = 1; - val1 = tmp_val; - t = te; - i = 1; // this is just to avoid adding another nested block... - } -#endif - for (; i < n_val; ++i, ++t) - { - overflow = 0; - long int tmp_val = hts_str2int(t, &te, sizeof(tmp_val)*CHAR_BIT, &overflow); - if ( te==t ) tmp_val = bcf_int32_missing; - else if ( overflow || tmp_valBCF_MAX_BT_INT32 ) - { - if ( !extreme_int_warned ) - { - hts_log_warning("Extreme INFO/%s value encountered and set to missing at %s:%"PRIhts_pos,key,bcf_seqname_safe(h,v), v->pos+1); - extreme_int_warned = 1; - } - tmp_val = bcf_int32_missing; - } - a_val[i] = tmp_val; - for (t = te; *t && *t != ','; t++); - } - if (n_val == 1) { -#ifdef VCF_ALLOW_INT64 - if ( is_int64 ) - { - v->unpacked |= BCF_IS_64BIT; - bcf_enc_long1(str, val1); - } - else - bcf_enc_int1(str, (int32_t)val1); -#else - val1 = a_val[0]; - bcf_enc_int1(str, (int32_t)val1); -#endif - } else { - bcf_enc_vint(str, n_val, a_val, -1); - } - if (n_val==1 && (val1!=bcf_int32_missing || is_int64) - && memcmp(key, "END", 4) == 0) - { - if ( val1 <= v->pos ) - { - if ( !negative_rlen_warned ) - { - hts_log_warning("INFO/END=%"PRIhts_pos" is smaller than POS at %s:%"PRIhts_pos,val1,bcf_seqname_safe(h,v),v->pos+1); - negative_rlen_warned = 1; - } - } - else - v->rlen = val1 - v->pos; - } - } else if ((y>>4&0xf) == BCF_HT_REAL) { - float *val_f = (float *)a_val; - for (i = 0, t = val; i < n_val; ++i, ++t) - { - overflow = 0; - val_f[i] = hts_str2dbl(t, &te, &overflow); - if ( te==t || overflow ) // conversion failed - bcf_float_set_missing(val_f[i]); - for (t = te; *t && *t != ','; t++); - } - bcf_enc_vfloat(str, n_val, val_f); - } - } - if (c == 0) break; - r = end; - key = r + 1; - } - - free(a_val); - return 0; - - fail: - free(a_val); - return -1; -} - -int vcf_parse(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v) -{ - int ret = -2, overflow = 0; - char *p, *q, *r, *t; - kstring_t *str; - khint_t k; - ks_tokaux_t aux; - -//#define NOT_DOT(p) strcmp((p), ".") -//#define NOT_DOT(p) (!(*p == '.' && !p[1])) -//#define NOT_DOT(p) ((*p) != '.' || (p)[1]) -//#define NOT_DOT(p) (q-p != 1 || memcmp(p, ".\0", 2)) -#define NOT_DOT(p) (memcmp(p, ".\0", 2)) - - if (!s || !h || !v || !(s->s)) - return ret; - - // Assumed in lots of places, but we may as well spot this early - assert(sizeof(float) == sizeof(int32_t)); - - // Ensure string we parse has space to permit some over-flow when during - // parsing. Eg to do memcmp(key, "END", 4) in vcf_parse_info over - // the more straight forward looking strcmp, giving a speed advantage. - if (ks_resize(s, s->l+4) < 0) - return -1; - - // Force our memory to be initialised so we avoid the technicality of - // undefined behaviour in using a 4-byte memcmp. (The reality is this - // almost certainly is never detected by the compiler so has no impact, - // but equally so this code has minimal (often beneficial) impact on - // performance too.) - s->s[s->l+0] = 0; - s->s[s->l+1] = 0; - s->s[s->l+2] = 0; - s->s[s->l+3] = 0; - - bcf_clear1(v); - str = &v->shared; - memset(&aux, 0, sizeof(ks_tokaux_t)); - - // CHROM - if (!(p = kstrtok(s->s, "\t", &aux))) - goto err; - *(q = (char*)aux.p) = 0; - - vdict_t *d = (vdict_t*)h->dict[BCF_DT_CTG]; - k = kh_get(vdict, d, p); - if (k == kh_end(d)) { - hts_log_warning("Contig '%s' is not defined in the header. (Quick workaround: index the file with tabix.)", p); - v->errcode = BCF_ERR_CTG_UNDEF; - if ((k = fix_chromosome(h, d, p)) == kh_end(d)) { - hts_log_error("Could not add dummy header for contig '%s'", p); - v->errcode |= BCF_ERR_CTG_INVALID; - goto err; - } - } - v->rid = kh_val(d, k).id; - - // POS - if (!(p = kstrtok(0, 0, &aux))) - goto err; - *(q = (char*)aux.p) = 0; - - overflow = 0; - char *tmp = p; - v->pos = hts_str2uint(p, &p, 63, &overflow); - if (overflow) { - hts_log_error("Position value '%s' is too large", tmp); - goto err; - } else if ( *p ) { - hts_log_error("Could not parse the position '%s'", tmp); - goto err; - } else { - v->pos -= 1; - } - if (v->pos >= INT32_MAX) - v->unpacked |= BCF_IS_64BIT; - - // ID - if (!(p = kstrtok(0, 0, &aux))) - goto err; - *(q = (char*)aux.p) = 0; - - if (NOT_DOT(p)) bcf_enc_vchar(str, q - p, p); - else bcf_enc_size(str, 0, BCF_BT_CHAR); - - // REF - if (!(p = kstrtok(0, 0, &aux))) - goto err; - *(q = (char*)aux.p) = 0; - - bcf_enc_vchar(str, q - p, p); - v->n_allele = 1, v->rlen = q - p; - - // ALT - if (!(p = kstrtok(0, 0, &aux))) - goto err; - *(q = (char*)aux.p) = 0; - - if (NOT_DOT(p)) { - for (r = t = p;; ++r) { - if (*r == ',' || *r == 0) { - if (v->n_allele == UINT16_MAX) { - hts_log_error("Too many ALT alleles at %s:%"PRIhts_pos, - bcf_seqname_safe(h,v), v->pos+1); - v->errcode |= BCF_ERR_LIMITS; - goto err; - } - bcf_enc_vchar(str, r - t, t); - t = r + 1; - ++v->n_allele; - } - if (r == q) break; - } - } - - // QUAL - if (!(p = kstrtok(0, 0, &aux))) - goto err; - *(q = (char*)aux.p) = 0; - - if (NOT_DOT(p)) v->qual = atof(p); - else bcf_float_set_missing(v->qual); - if ( v->max_unpack && !(v->max_unpack>>1) ) goto end; // BCF_UN_STR - - // FILTER - if (!(p = kstrtok(0, 0, &aux))) - goto err; - *(q = (char*)aux.p) = 0; - - if (NOT_DOT(p)) { - if (vcf_parse_filter(str, h, v, p, q)) { - goto err; - } - } else bcf_enc_vint(str, 0, 0, -1); - if ( v->max_unpack && !(v->max_unpack>>2) ) goto end; // BCF_UN_FLT - - // INFO - if (!(p = kstrtok(0, 0, &aux))) - goto err; - *(q = (char*)aux.p) = 0; - - if (NOT_DOT(p)) { - if (vcf_parse_info(str, h, v, p, q)) { - goto err; - } - } - if ( v->max_unpack && !(v->max_unpack>>3) ) goto end; - - // FORMAT; optional - p = kstrtok(0, 0, &aux); - if (p) { - *(q = (char*)aux.p) = 0; - - return vcf_parse_format(s, h, v, p, q) == 0 ? 0 : -2; - } else { - return 0; - } - - end: - ret = 0; - - err: - return ret; -} - -int vcf_open_mode(char *mode, const char *fn, const char *format) -{ - if (format == NULL) { - // Try to pick a format based on the filename extension - char extension[HTS_MAX_EXT_LEN]; - if (find_file_extension(fn, extension) < 0) return -1; - return vcf_open_mode(mode, fn, extension); - } - else if (strcasecmp(format, "bcf") == 0) strcpy(mode, "b"); - else if (strcasecmp(format, "vcf") == 0) strcpy(mode, ""); - else if (strcasecmp(format, "vcf.gz") == 0 || strcasecmp(format, "vcf.bgz") == 0) strcpy(mode, "z"); - else return -1; - - return 0; -} - -int vcf_read(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v) -{ - int ret; - ret = hts_getline(fp, KS_SEP_LINE, &fp->line); - if (ret < 0) return ret; - return vcf_parse1(&fp->line, h, v); -} - -static inline uint8_t *bcf_unpack_fmt_core1(uint8_t *ptr, int n_sample, bcf_fmt_t *fmt) -{ - uint8_t *ptr_start = ptr; - fmt->id = bcf_dec_typed_int1(ptr, &ptr); - fmt->n = bcf_dec_size(ptr, &ptr, &fmt->type); - fmt->size = fmt->n << bcf_type_shift[fmt->type]; - fmt->p = ptr; - fmt->p_off = ptr - ptr_start; - fmt->p_free = 0; - ptr += n_sample * fmt->size; - fmt->p_len = ptr - fmt->p; - return ptr; -} - -static inline uint8_t *bcf_unpack_info_core1(uint8_t *ptr, bcf_info_t *info) -{ - uint8_t *ptr_start = ptr; - int64_t len = 0; - info->key = bcf_dec_typed_int1(ptr, &ptr); - len = info->len = bcf_dec_size(ptr, &ptr, &info->type); - info->vptr = ptr; - info->vptr_off = ptr - ptr_start; - info->vptr_free = 0; - info->v1.i = 0; - if (info->len == 1) { - switch(info->type) { - case BCF_BT_INT8: - case BCF_BT_CHAR: - info->v1.i = *(int8_t*)ptr; - break; - case BCF_BT_INT16: - info->v1.i = le_to_i16(ptr); - len <<= 1; - break; - case BCF_BT_INT32: - info->v1.i = le_to_i32(ptr); - len <<= 2; - break; - case BCF_BT_FLOAT: - info->v1.f = le_to_float(ptr); - len <<= 2; - break; - case BCF_BT_INT64: - info->v1.i = le_to_i64(ptr); - len <<= 3; - break; - } - } else { - len <<= bcf_type_shift[info->type]; - } - ptr += len; - - info->vptr_len = ptr - info->vptr; - return ptr; -} - -int bcf_unpack(bcf1_t *b, int which) -{ - if ( !b->shared.l ) return 0; // Building a new BCF record from scratch - uint8_t *ptr = (uint8_t*)b->shared.s, *ptr_ori; - int i; - bcf_dec_t *d = &b->d; - if (which & BCF_UN_FLT) which |= BCF_UN_STR; - if (which & BCF_UN_INFO) which |= BCF_UN_SHR; - if ((which&BCF_UN_STR) && !(b->unpacked&BCF_UN_STR)) - { - kstring_t tmp; - - // ID - tmp.l = 0; tmp.s = d->id; tmp.m = d->m_id; - ptr_ori = ptr; - ptr = bcf_fmt_sized_array(&tmp, ptr); - b->unpack_size[0] = ptr - ptr_ori; - kputc_('\0', &tmp); - d->id = tmp.s; d->m_id = tmp.m; - - // REF and ALT are in a single block (d->als) and d->alleles are pointers into this block - hts_expand(char*, b->n_allele, d->m_allele, d->allele); // NM: hts_expand() is a macro - tmp.l = 0; tmp.s = d->als; tmp.m = d->m_als; - ptr_ori = ptr; - for (i = 0; i < b->n_allele; ++i) { - // Use offset within tmp.s as realloc may change pointer - d->allele[i] = (char *)(intptr_t)tmp.l; - ptr = bcf_fmt_sized_array(&tmp, ptr); - kputc_('\0', &tmp); - } - b->unpack_size[1] = ptr - ptr_ori; - d->als = tmp.s; d->m_als = tmp.m; - - // Convert our offsets within tmp.s back to pointers again - for (i = 0; i < b->n_allele; ++i) - d->allele[i] = d->als + (ptrdiff_t)d->allele[i]; - b->unpacked |= BCF_UN_STR; - } - if ((which&BCF_UN_FLT) && !(b->unpacked&BCF_UN_FLT)) { // FILTER - ptr = (uint8_t*)b->shared.s + b->unpack_size[0] + b->unpack_size[1]; - ptr_ori = ptr; - if (*ptr>>4) { - int type; - d->n_flt = bcf_dec_size(ptr, &ptr, &type); - hts_expand(int, d->n_flt, d->m_flt, d->flt); - for (i = 0; i < d->n_flt; ++i) - d->flt[i] = bcf_dec_int1(ptr, type, &ptr); - } else ++ptr, d->n_flt = 0; - b->unpack_size[2] = ptr - ptr_ori; - b->unpacked |= BCF_UN_FLT; - } - if ((which&BCF_UN_INFO) && !(b->unpacked&BCF_UN_INFO)) { // INFO - ptr = (uint8_t*)b->shared.s + b->unpack_size[0] + b->unpack_size[1] + b->unpack_size[2]; - hts_expand(bcf_info_t, b->n_info, d->m_info, d->info); - for (i = 0; i < d->m_info; ++i) d->info[i].vptr_free = 0; - for (i = 0; i < b->n_info; ++i) - ptr = bcf_unpack_info_core1(ptr, &d->info[i]); - b->unpacked |= BCF_UN_INFO; - } - if ((which&BCF_UN_FMT) && b->n_sample && !(b->unpacked&BCF_UN_FMT)) { // FORMAT - ptr = (uint8_t*)b->indiv.s; - hts_expand(bcf_fmt_t, b->n_fmt, d->m_fmt, d->fmt); - for (i = 0; i < d->m_fmt; ++i) d->fmt[i].p_free = 0; - for (i = 0; i < b->n_fmt; ++i) - ptr = bcf_unpack_fmt_core1(ptr, b->n_sample, &d->fmt[i]); - b->unpacked |= BCF_UN_FMT; - } - return 0; -} - -int vcf_format(const bcf_hdr_t *h, const bcf1_t *v, kstring_t *s) -{ - int i; - int32_t max_dt_id = h->n[BCF_DT_ID]; - const char *chrom = bcf_seqname(h, v); - if (!chrom) { - hts_log_error("Invalid BCF, CONTIG id=%d not present in the header", - v->rid); - errno = EINVAL; - return -1; - } - - bcf_unpack((bcf1_t*)v, BCF_UN_ALL & ~(BCF_UN_INFO|BCF_UN_FMT)); - - // Cache of key lengths so we don't keep repeatedly using them. - // This assumes we're not modifying the header between successive calls - // to vcf_format, but that would lead to many other forms of breakage - // so it feels like a valid assumption to make. - // - // We cannot just do this in bcf_hdr_sync as some code (eg bcftools - // annotate) manipulates the headers directly without calling sync to - // refresh the data structures. So we must do just-in-time length - // calculation during writes instead. - bcf_hdr_aux_t *aux = get_hdr_aux(h); - if (!aux->key_len) { - if (!(aux->key_len = calloc(h->n[BCF_DT_ID]+1, sizeof(*aux->key_len)))) - return -1; - } - size_t *key_len = aux->key_len; - - kputs(chrom, s); // CHROM - kputc_('\t', s); kputll(v->pos + 1, s); // POS - kputc_('\t', s); kputs(v->d.id ? v->d.id : ".", s); // ID - kputc_('\t', s); // REF - if (v->n_allele > 0) kputs(v->d.allele[0], s); - else kputc_('.', s); - kputc_('\t', s); // ALT - if (v->n_allele > 1) { - for (i = 1; i < v->n_allele; ++i) { - if (i > 1) kputc_(',', s); - kputs(v->d.allele[i], s); - } - } else kputc_('.', s); - kputc_('\t', s); // QUAL - if ( bcf_float_is_missing(v->qual) ) kputc_('.', s); // QUAL - else kputd(v->qual, s); - kputc_('\t', s); // FILTER - if (v->d.n_flt) { - for (i = 0; i < v->d.n_flt; ++i) { - int32_t idx = v->d.flt[i]; - if (idx < 0 || idx >= max_dt_id - || h->id[BCF_DT_ID][idx].key == NULL) { - hts_log_error("Invalid BCF, the FILTER tag id=%d at %s:%"PRIhts_pos" not present in the header", - idx, bcf_seqname_safe(h, v), v->pos + 1); - errno = EINVAL; - return -1; - } - if (i) kputc_(';', s); - if (!key_len[idx]) - key_len[idx] = strlen(h->id[BCF_DT_ID][idx].key); - kputsn(h->id[BCF_DT_ID][idx].key, key_len[idx], s); - } - } else kputc_('.', s); - - kputc_('\t', s); // INFO - if (v->n_info) { - uint8_t *ptr = (uint8_t *)v->shared.s + v->unpack_size[0] + v->unpack_size[1] + v->unpack_size[2]; - int first = 1; - bcf_info_t *info = v->d.info; - - // Note if we duplicate this code into custom packed and unpacked - // implementations then we gain a bit more speed, particularly with - // clang 13 (up to 5%). Not sure why this is, but code duplication - // isn't pleasant and it's still faster adding packed support than - // not so it's a win, just not as good as it should be. - const int info_packed = !(v->unpacked & BCF_UN_INFO) && v->shared.l; - for (i = 0; i < v->n_info; ++i) { - bcf_info_t in, *z; - if (info_packed) { - // Use a local bcf_info_t when data is packed - z = ∈ - z->key = bcf_dec_typed_int1(ptr, &ptr); - z->len = bcf_dec_size(ptr, &ptr, &z->type); - z->vptr = ptr; - ptr += z->len << bcf_type_shift[z->type]; - } else { - // Else previously unpacked INFO struct - z = &info[i]; - - // Also potentially since deleted - if ( !z->vptr ) continue; - } - - bcf_idpair_t *id = z->key >= 0 && z->key < max_dt_id - ? &h->id[BCF_DT_ID][z->key] - : NULL; - - if (!id || !id->key) { - hts_log_error("Invalid BCF, the INFO tag id=%d is %s at %s:%"PRIhts_pos, - z->key, - z->key < 0 ? "negative" - : (z->key >= max_dt_id ? "too large" : "not present in the header"), - bcf_seqname_safe(h, v), v->pos+1); - errno = EINVAL; - return -1; - } - - // KEY - if (!key_len[z->key]) - key_len[z->key] = strlen(id->key); - size_t id_len = key_len[z->key]; - if (ks_resize(s, s->l + 3 + id_len) < 0) - return -1; - char *sptr = s->s + s->l; - if ( !first ) { - *sptr++ = ';'; - s->l++; - } - first = 0; - memcpy(sptr, id->key, id_len); - s->l += id_len; - - // VALUE - if (z->len <= 0) continue; - sptr[id_len] = '='; - s->l++; - - if (z->len != 1 || info_packed) { - bcf_fmt_array(s, z->len, z->type, z->vptr); - } else { - // Single length vectors are unpacked into their - // own info.v1 union and handled separately. - if (z->type == BCF_BT_FLOAT) { - if ( bcf_float_is_missing(z->v1.f) ) - kputc_('.', s); - else - kputd(z->v1.f, s); - } else if (z->type == BCF_BT_CHAR) { - kputc_(z->v1.i, s); - } else if (z->type < BCF_BT_INT64) { - int64_t missing[] = { - 0, // BCF_BT_NULL - bcf_int8_missing, - bcf_int16_missing, - bcf_int32_missing, - }; - if (z->v1.i == missing[z->type]) - kputc_('.', s); - else - kputw(z->v1.i, s); - } else if (z->type == BCF_BT_INT64) { - if (z->v1.i == bcf_int64_missing) - kputc_('.', s); - else - kputll(z->v1.i, s); - } else { - hts_log_error("Unexpected type %d at %s:%"PRIhts_pos, z->type, bcf_seqname_safe(h, v), v->pos+1); - errno = EINVAL; - return -1; - } - } - } - if ( first ) kputc_('.', s); - } else kputc_('.', s); - - // FORMAT and individual information - if (v->n_sample) { - int i,j; - if ( v->n_fmt) { - uint8_t *ptr = (uint8_t *)v->indiv.s; - int gt_i = -1; - bcf_fmt_t *fmt = v->d.fmt; - int first = 1; - int fmt_packed = !(v->unpacked & BCF_UN_FMT); - - if (fmt_packed) { - // Local fmt as we have an array of num FORMAT keys, - // each of which points to N.Sample values. - - // No real gain to be had in handling unpacked data here, - // but it doesn't cost us much in complexity either and - // it gives us flexibility. - fmt = malloc(v->n_fmt * sizeof(*fmt)); - if (!fmt) - return -1; - } - - // KEYS - for (i = 0; i < (int)v->n_fmt; ++i) { - bcf_fmt_t *z; - z = &fmt[i]; - if (fmt_packed) { - z->id = bcf_dec_typed_int1(ptr, &ptr); - z->n = bcf_dec_size(ptr, &ptr, &z->type); - z->p = ptr; - z->size = z->n << bcf_type_shift[z->type]; - ptr += v->n_sample * z->size; - } - if ( !z->p ) continue; - kputc_(!first ? ':' : '\t', s); first = 0; - - bcf_idpair_t *id = z->id >= 0 && z->id < max_dt_id - ? &h->id[BCF_DT_ID][z->id] - : NULL; - - if (!id || !id->key) { - hts_log_error("Invalid BCF, the FORMAT tag id=%d at %s:%"PRIhts_pos" not present in the header", z->id, bcf_seqname_safe(h, v), v->pos+1); - errno = EINVAL; - return -1; - } - - if (!key_len[z->id]) - key_len[z->id] = strlen(id->key); - size_t id_len = key_len[z->id]; - kputsn(id->key, id_len, s); - if (id_len == 2 && id->key[0] == 'G' && id->key[1] == 'T') - gt_i = i; - } - if ( first ) kputsn("\t.", 2, s); - - // VALUES per sample - for (j = 0; j < v->n_sample; ++j) { - kputc_('\t', s); - first = 1; - bcf_fmt_t *f = fmt; - for (i = 0; i < (int)v->n_fmt; i++, f++) { - if ( !f->p ) continue; - if (!first) kputc_(':', s); - first = 0; - if (gt_i == i) { - bcf_format_gt(f,j,s); - break; - } - else if (f->n == 1) - bcf_fmt_array1(s, f->type, f->p + j * (size_t)f->size); - else - bcf_fmt_array(s, f->n, f->type, f->p + j * (size_t)f->size); - } - - // Simpler loop post GT and at least 1 iteration - for (i++, f++; i < (int)v->n_fmt; i++, f++) { - if ( !f->p ) continue; - kputc_(':', s); - if (f->n == 1) - bcf_fmt_array1(s, f->type, f->p + j * (size_t)f->size); - else - bcf_fmt_array(s, f->n, f->type, f->p + j * (size_t)f->size); - } - if ( first ) kputc_('.', s); - } - if (fmt_packed) - free(fmt); - } - else - for (j=0; j<=v->n_sample; j++) - kputsn("\t.", 2, s); - } - kputc('\n', s); - return 0; -} - -int vcf_write_line(htsFile *fp, kstring_t *line) -{ - int ret; - if ( line->s[line->l-1]!='\n' ) kputc('\n',line); - if ( fp->format.compression!=no_compression ) - ret = bgzf_write(fp->fp.bgzf, line->s, line->l); - else - ret = hwrite(fp->fp.hfile, line->s, line->l); - return ret==line->l ? 0 : -1; -} - -int vcf_write(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v) -{ - ssize_t ret; - fp->line.l = 0; - if (vcf_format1(h, v, &fp->line) != 0) - return -1; - if ( fp->format.compression!=no_compression ) { - if (bgzf_flush_try(fp->fp.bgzf, fp->line.l) < 0) - return -1; - ret = bgzf_write(fp->fp.bgzf, fp->line.s, fp->line.l); - } else { - ret = hwrite(fp->fp.hfile, fp->line.s, fp->line.l); - } - - if (fp->idx && fp->format.compression == bgzf) { - int tid; - if ((tid = hts_idx_tbi_name(fp->idx, v->rid, bcf_seqname_safe(h, v))) < 0) - return -1; - - if (bgzf_idx_push(fp->fp.bgzf, fp->idx, - tid, v->pos, v->pos + v->rlen, - bgzf_tell(fp->fp.bgzf), 1) < 0) - return -1; - } - - return ret==fp->line.l ? 0 : -1; -} - -/************************ - * Data access routines * - ************************/ - -int bcf_hdr_id2int(const bcf_hdr_t *h, int which, const char *id) -{ - khint_t k; - vdict_t *d = (vdict_t*)h->dict[which]; - k = kh_get(vdict, d, id); - return k == kh_end(d)? -1 : kh_val(d, k).id; -} - - -/******************** - *** BCF indexing *** - ********************/ - -// Calculate number of index levels given min_shift and the header contig -// list. Also returns number of contigs in *nids_out. -static int idx_calc_n_lvls_ids(const bcf_hdr_t *h, int min_shift, - int starting_n_lvls, int *nids_out) -{ - int n_lvls, i, nids = 0; - int64_t max_len = 0, s; - - for (i = 0; i < h->n[BCF_DT_CTG]; ++i) - { - if ( !h->id[BCF_DT_CTG][i].val ) continue; - if ( max_len < h->id[BCF_DT_CTG][i].val->info[0] ) - max_len = h->id[BCF_DT_CTG][i].val->info[0]; - nids++; - } - if ( !max_len ) max_len = (1LL<<31) - 1; // In case contig line is broken. - max_len += 256; - s = 1LL << (min_shift + starting_n_lvls * 3); - for (n_lvls = starting_n_lvls; max_len > s; ++n_lvls, s <<= 3); - - if (nids_out) *nids_out = nids; - return n_lvls; -} - -hts_idx_t *bcf_index(htsFile *fp, int min_shift) -{ - int n_lvls; - bcf1_t *b = NULL; - hts_idx_t *idx = NULL; - bcf_hdr_t *h; - int r; - h = bcf_hdr_read(fp); - if ( !h ) return NULL; - int nids = 0; - n_lvls = idx_calc_n_lvls_ids(h, min_shift, 0, &nids); - idx = hts_idx_init(nids, HTS_FMT_CSI, bgzf_tell(fp->fp.bgzf), min_shift, n_lvls); - if (!idx) goto fail; - b = bcf_init1(); - if (!b) goto fail; - while ((r = bcf_read1(fp,h, b)) >= 0) { - int ret; - ret = hts_idx_push(idx, b->rid, b->pos, b->pos + b->rlen, bgzf_tell(fp->fp.bgzf), 1); - if (ret < 0) goto fail; - } - if (r < -1) goto fail; - hts_idx_finish(idx, bgzf_tell(fp->fp.bgzf)); - bcf_destroy1(b); - bcf_hdr_destroy(h); - return idx; - - fail: - hts_idx_destroy(idx); - bcf_destroy1(b); - bcf_hdr_destroy(h); - return NULL; -} - -hts_idx_t *bcf_index_load2(const char *fn, const char *fnidx) -{ - return fnidx? hts_idx_load2(fn, fnidx) : bcf_index_load(fn); -} - -hts_idx_t *bcf_index_load3(const char *fn, const char *fnidx, int flags) -{ - return hts_idx_load3(fn, fnidx, HTS_FMT_CSI, flags); -} - -int bcf_index_build3(const char *fn, const char *fnidx, int min_shift, int n_threads) -{ - htsFile *fp; - hts_idx_t *idx; - tbx_t *tbx; - int ret; - if ((fp = hts_open(fn, "rb")) == 0) return -2; - if (n_threads) - hts_set_threads(fp, n_threads); - if ( fp->format.compression!=bgzf ) { hts_close(fp); return -3; } - switch (fp->format.format) { - case bcf: - if (!min_shift) { - hts_log_error("TBI indices for BCF files are not supported"); - ret = -1; - } else { - idx = bcf_index(fp, min_shift); - if (idx) { - ret = hts_idx_save_as(idx, fn, fnidx, HTS_FMT_CSI); - if (ret < 0) ret = -4; - hts_idx_destroy(idx); - } - else ret = -1; - } - break; - - case vcf: - tbx = tbx_index(hts_get_bgzfp(fp), min_shift, &tbx_conf_vcf); - if (tbx) { - ret = hts_idx_save_as(tbx->idx, fn, fnidx, min_shift > 0 ? HTS_FMT_CSI : HTS_FMT_TBI); - if (ret < 0) ret = -4; - tbx_destroy(tbx); - } - else ret = -1; - break; - - default: - ret = -3; - break; - } - hts_close(fp); - return ret; -} - -int bcf_index_build2(const char *fn, const char *fnidx, int min_shift) -{ - return bcf_index_build3(fn, fnidx, min_shift, 0); -} - -int bcf_index_build(const char *fn, int min_shift) -{ - return bcf_index_build3(fn, NULL, min_shift, 0); -} - -// Initialise fp->idx for the current format type. -// This must be called after the header has been written but no other data. -static int vcf_idx_init(htsFile *fp, bcf_hdr_t *h, int min_shift, const char *fnidx) { - int n_lvls, fmt; - - if (min_shift == 0) { - min_shift = 14; - n_lvls = 5; - fmt = HTS_FMT_TBI; - } else { - // Set initial n_lvls to match tbx_index() - int starting_n_lvls = (TBX_MAX_SHIFT - min_shift + 2) / 3; - // Increase if necessary - n_lvls = idx_calc_n_lvls_ids(h, min_shift, starting_n_lvls, NULL); - fmt = HTS_FMT_CSI; - } - - fp->idx = hts_idx_init(0, fmt, bgzf_tell(fp->fp.bgzf), min_shift, n_lvls); - if (!fp->idx) return -1; - - // Tabix meta data, added even in CSI for VCF - uint8_t conf[4*7]; - u32_to_le(TBX_VCF, conf+0); // fmt - u32_to_le(1, conf+4); // name col - u32_to_le(2, conf+8); // beg col - u32_to_le(0, conf+12); // end col - u32_to_le('#', conf+16); // comment - u32_to_le(0, conf+20); // n.skip - u32_to_le(0, conf+24); // ref name len - if (hts_idx_set_meta(fp->idx, sizeof(conf)*sizeof(*conf), (uint8_t *)conf, 1) < 0) { - hts_idx_destroy(fp->idx); - fp->idx = NULL; - return -1; - } - fp->fnidx = fnidx; - - return 0; -} - -// Initialise fp->idx for the current format type. -// This must be called after the header has been written but no other data. -int bcf_idx_init(htsFile *fp, bcf_hdr_t *h, int min_shift, const char *fnidx) { - int n_lvls, nids = 0; - - if (fp->format.compression != bgzf) { - hts_log_error("Indexing is only supported on BGZF-compressed files"); - return -3; // Matches no-compression return for bcf_index_build3() - } - - if (fp->format.format == vcf) - return vcf_idx_init(fp, h, min_shift, fnidx); - - if (!min_shift) - min_shift = 14; - - n_lvls = idx_calc_n_lvls_ids(h, min_shift, 0, &nids); - - fp->idx = hts_idx_init(nids, HTS_FMT_CSI, bgzf_tell(fp->fp.bgzf), min_shift, n_lvls); - if (!fp->idx) return -1; - fp->fnidx = fnidx; - - return 0; -} - -// Finishes an index. Call after the last record has been written. -// Returns 0 on success, <0 on failure. -// -// NB: same format as SAM/BAM as it uses bgzf. -int bcf_idx_save(htsFile *fp) { - return sam_idx_save(fp); -} - -/***************** - *** Utilities *** - *****************/ - -int bcf_hdr_combine(bcf_hdr_t *dst, const bcf_hdr_t *src) -{ - int i, ndst_ori = dst->nhrec, need_sync = 0, ret = 0, res; - for (i=0; inhrec; i++) - { - if ( src->hrec[i]->type==BCF_HL_GEN && src->hrec[i]->value ) - { - int j; - for (j=0; jhrec[j]->type!=BCF_HL_GEN ) continue; - - // Checking only the key part of generic lines, otherwise - // the VCFs are too verbose. Should we perhaps add a flag - // to bcf_hdr_combine() and make this optional? - if ( !strcmp(src->hrec[i]->key,dst->hrec[j]->key) ) break; - } - if ( j>=ndst_ori ) { - res = bcf_hdr_add_hrec(dst, bcf_hrec_dup(src->hrec[i])); - if (res < 0) return -1; - need_sync += res; - } - } - else if ( src->hrec[i]->type==BCF_HL_STR ) - { - // NB: we are ignoring fields without ID - int j = bcf_hrec_find_key(src->hrec[i],"ID"); - if ( j>=0 ) - { - bcf_hrec_t *rec = bcf_hdr_get_hrec(dst, src->hrec[i]->type, "ID", src->hrec[i]->vals[j], src->hrec[i]->key); - if ( !rec ) { - res = bcf_hdr_add_hrec(dst, bcf_hrec_dup(src->hrec[i])); - if (res < 0) return -1; - need_sync += res; - } - } - } - else - { - int j = bcf_hrec_find_key(src->hrec[i],"ID"); - assert( j>=0 ); // this should always be true for valid VCFs - - bcf_hrec_t *rec = bcf_hdr_get_hrec(dst, src->hrec[i]->type, "ID", src->hrec[i]->vals[j], NULL); - if ( !rec ) { - res = bcf_hdr_add_hrec(dst, bcf_hrec_dup(src->hrec[i])); - if (res < 0) return -1; - need_sync += res; - } else if ( src->hrec[i]->type==BCF_HL_INFO || src->hrec[i]->type==BCF_HL_FMT ) - { - // Check that both records are of the same type. The bcf_hdr_id2length - // macro cannot be used here because dst header is not synced yet. - vdict_t *d_src = (vdict_t*)src->dict[BCF_DT_ID]; - vdict_t *d_dst = (vdict_t*)dst->dict[BCF_DT_ID]; - khint_t k_src = kh_get(vdict, d_src, src->hrec[i]->vals[0]); - khint_t k_dst = kh_get(vdict, d_dst, src->hrec[i]->vals[0]); - if ( (kh_val(d_src,k_src).info[rec->type]>>8 & 0xf) != (kh_val(d_dst,k_dst).info[rec->type]>>8 & 0xf) ) - { - hts_log_warning("Trying to combine \"%s\" tag definitions of different lengths", - src->hrec[i]->vals[0]); - ret |= 1; - } - if ( (kh_val(d_src,k_src).info[rec->type]>>4 & 0xf) != (kh_val(d_dst,k_dst).info[rec->type]>>4 & 0xf) ) - { - hts_log_warning("Trying to combine \"%s\" tag definitions of different types", - src->hrec[i]->vals[0]); - ret |= 1; - } - } - } - } - if ( need_sync ) { - if (bcf_hdr_sync(dst) < 0) return -1; - } - return ret; -} - -bcf_hdr_t *bcf_hdr_merge(bcf_hdr_t *dst, const bcf_hdr_t *src) -{ - if ( !dst ) - { - // this will effectively strip existing IDX attributes from src to become dst - dst = bcf_hdr_init("r"); - kstring_t htxt = {0,0,0}; - if (bcf_hdr_format(src, 0, &htxt) < 0) { - free(htxt.s); - return NULL; - } - if ( bcf_hdr_parse(dst, htxt.s) < 0 ) { - bcf_hdr_destroy(dst); - dst = NULL; - } - free(htxt.s); - return dst; - } - - int i, ndst_ori = dst->nhrec, need_sync = 0, res; - for (i=0; inhrec; i++) - { - if ( src->hrec[i]->type==BCF_HL_GEN && src->hrec[i]->value ) - { - int j; - for (j=0; jhrec[j]->type!=BCF_HL_GEN ) continue; - - // Checking only the key part of generic lines, otherwise - // the VCFs are too verbose. Should we perhaps add a flag - // to bcf_hdr_combine() and make this optional? - if ( !strcmp(src->hrec[i]->key,dst->hrec[j]->key) ) break; - } - if ( j>=ndst_ori ) { - res = bcf_hdr_add_hrec(dst, bcf_hrec_dup(src->hrec[i])); - if (res < 0) return NULL; - need_sync += res; - } - } - else if ( src->hrec[i]->type==BCF_HL_STR ) - { - // NB: we are ignoring fields without ID - int j = bcf_hrec_find_key(src->hrec[i],"ID"); - if ( j>=0 ) - { - bcf_hrec_t *rec = bcf_hdr_get_hrec(dst, src->hrec[i]->type, "ID", src->hrec[i]->vals[j], src->hrec[i]->key); - if ( !rec ) { - res = bcf_hdr_add_hrec(dst, bcf_hrec_dup(src->hrec[i])); - if (res < 0) return NULL; - need_sync += res; - } - } - } - else - { - int j = bcf_hrec_find_key(src->hrec[i],"ID"); - assert( j>=0 ); // this should always be true for valid VCFs - - bcf_hrec_t *rec = bcf_hdr_get_hrec(dst, src->hrec[i]->type, "ID", src->hrec[i]->vals[j], NULL); - if ( !rec ) { - res = bcf_hdr_add_hrec(dst, bcf_hrec_dup(src->hrec[i])); - if (res < 0) return NULL; - need_sync += res; - } else if ( src->hrec[i]->type==BCF_HL_INFO || src->hrec[i]->type==BCF_HL_FMT ) - { - // Check that both records are of the same type. The bcf_hdr_id2length - // macro cannot be used here because dst header is not synced yet. - vdict_t *d_src = (vdict_t*)src->dict[BCF_DT_ID]; - vdict_t *d_dst = (vdict_t*)dst->dict[BCF_DT_ID]; - khint_t k_src = kh_get(vdict, d_src, src->hrec[i]->vals[0]); - khint_t k_dst = kh_get(vdict, d_dst, src->hrec[i]->vals[0]); - if ( (kh_val(d_src,k_src).info[rec->type]>>8 & 0xf) != (kh_val(d_dst,k_dst).info[rec->type]>>8 & 0xf) ) - { - hts_log_warning("Trying to combine \"%s\" tag definitions of different lengths", - src->hrec[i]->vals[0]); - } - if ( (kh_val(d_src,k_src).info[rec->type]>>4 & 0xf) != (kh_val(d_dst,k_dst).info[rec->type]>>4 & 0xf) ) - { - hts_log_warning("Trying to combine \"%s\" tag definitions of different types", - src->hrec[i]->vals[0]); - } - } - } - } - if ( need_sync ) { - if (bcf_hdr_sync(dst) < 0) return NULL; - } - return dst; -} - -int bcf_translate(const bcf_hdr_t *dst_hdr, bcf_hdr_t *src_hdr, bcf1_t *line) -{ - int i; - if ( line->errcode ) - { - char errordescription[1024] = ""; - hts_log_error("Unchecked error (%d %s) at %s:%"PRIhts_pos", exiting", line->errcode, bcf_strerror(line->errcode, errordescription, sizeof(errordescription)), bcf_seqname_safe(src_hdr,line), line->pos+1); - exit(1); - } - if ( src_hdr->ntransl==-1 ) return 0; // no need to translate, all tags have the same id - if ( !src_hdr->ntransl ) // called for the first time, see what needs translating - { - int dict; - for (dict=0; dict<2; dict++) // BCF_DT_ID and BCF_DT_CTG - { - src_hdr->transl[dict] = (int*) malloc(src_hdr->n[dict]*sizeof(int)); - for (i=0; in[dict]; i++) - { - if ( !src_hdr->id[dict][i].key ) // gap left after removed BCF header lines - { - src_hdr->transl[dict][i] = -1; - continue; - } - src_hdr->transl[dict][i] = bcf_hdr_id2int(dst_hdr,dict,src_hdr->id[dict][i].key); - if ( src_hdr->transl[dict][i]!=-1 && i!=src_hdr->transl[dict][i] ) src_hdr->ntransl++; - } - } - if ( !src_hdr->ntransl ) - { - free(src_hdr->transl[0]); src_hdr->transl[0] = NULL; - free(src_hdr->transl[1]); src_hdr->transl[1] = NULL; - src_hdr->ntransl = -1; - } - if ( src_hdr->ntransl==-1 ) return 0; - } - bcf_unpack(line,BCF_UN_ALL); - - // CHROM - if ( src_hdr->transl[BCF_DT_CTG][line->rid] >=0 ) line->rid = src_hdr->transl[BCF_DT_CTG][line->rid]; - - // FILTER - for (i=0; id.n_flt; i++) - { - int src_id = line->d.flt[i]; - if ( src_hdr->transl[BCF_DT_ID][src_id] >=0 ) - line->d.flt[i] = src_hdr->transl[BCF_DT_ID][src_id]; - line->d.shared_dirty |= BCF1_DIRTY_FLT; - } - - // INFO - for (i=0; in_info; i++) - { - int src_id = line->d.info[i].key; - int dst_id = src_hdr->transl[BCF_DT_ID][src_id]; - if ( dst_id<0 ) continue; - line->d.info[i].key = dst_id; - if ( !line->d.info[i].vptr ) continue; // skip deleted - int src_size = src_id>>7 ? ( src_id>>15 ? BCF_BT_INT32 : BCF_BT_INT16) : BCF_BT_INT8; - int dst_size = dst_id>>7 ? ( dst_id>>15 ? BCF_BT_INT32 : BCF_BT_INT16) : BCF_BT_INT8; - if ( src_size==dst_size ) // can overwrite - { - uint8_t *vptr = line->d.info[i].vptr - line->d.info[i].vptr_off; - if ( dst_size==BCF_BT_INT8 ) { vptr[1] = (uint8_t)dst_id; } - else if ( dst_size==BCF_BT_INT16 ) { *(uint16_t*)vptr = (uint16_t)dst_id; } - else { *(uint32_t*)vptr = (uint32_t)dst_id; } - } - else // must realloc - { - bcf_info_t *info = &line->d.info[i]; - kstring_t str = {0,0,0}; - bcf_enc_int1(&str, dst_id); - bcf_enc_size(&str, info->len,info->type); - uint32_t vptr_off = str.l; - kputsn((char*)info->vptr, info->vptr_len, &str); - if( info->vptr_free ) free(info->vptr - info->vptr_off); - info->vptr_off = vptr_off; - info->vptr = (uint8_t*)str.s + info->vptr_off; - info->vptr_free = 1; - line->d.shared_dirty |= BCF1_DIRTY_INF; - } - } - - // FORMAT - for (i=0; in_fmt; i++) - { - int src_id = line->d.fmt[i].id; - int dst_id = src_hdr->transl[BCF_DT_ID][src_id]; - if ( dst_id<0 ) continue; - line->d.fmt[i].id = dst_id; - if( !line->d.fmt[i].p ) continue; // skip deleted - int src_size = src_id>>7 ? ( src_id>>15 ? BCF_BT_INT32 : BCF_BT_INT16) : BCF_BT_INT8; - int dst_size = dst_id>>7 ? ( dst_id>>15 ? BCF_BT_INT32 : BCF_BT_INT16) : BCF_BT_INT8; - if ( src_size==dst_size ) // can overwrite - { - uint8_t *p = line->d.fmt[i].p - line->d.fmt[i].p_off; // pointer to the vector size (4bits) and BT type (4bits) - if ( dst_size==BCF_BT_INT8 ) { p[1] = dst_id; } - else if ( dst_size==BCF_BT_INT16 ) { i16_to_le(dst_id, p + 1); } - else { i32_to_le(dst_id, p + 1); } - } - else // must realloc - { - bcf_fmt_t *fmt = &line->d.fmt[i]; - kstring_t str = {0,0,0}; - bcf_enc_int1(&str, dst_id); - bcf_enc_size(&str, fmt->n, fmt->type); - uint32_t p_off = str.l; - kputsn((char*)fmt->p, fmt->p_len, &str); - if( fmt->p_free ) free(fmt->p - fmt->p_off); - fmt->p_off = p_off; - fmt->p = (uint8_t*)str.s + fmt->p_off; - fmt->p_free = 1; - line->d.indiv_dirty = 1; - } - } - return 0; -} - -bcf_hdr_t *bcf_hdr_dup(const bcf_hdr_t *hdr) -{ - bcf_hdr_t *hout = bcf_hdr_init("r"); - if (!hout) { - hts_log_error("Failed to allocate bcf header"); - return NULL; - } - kstring_t htxt = {0,0,0}; - if (bcf_hdr_format(hdr, 1, &htxt) < 0) { - free(htxt.s); - return NULL; - } - if ( bcf_hdr_parse(hout, htxt.s) < 0 ) { - bcf_hdr_destroy(hout); - hout = NULL; - } - free(htxt.s); - return hout; -} - -bcf_hdr_t *bcf_hdr_subset(const bcf_hdr_t *h0, int n, char *const* samples, int *imap) -{ - void *names_hash = khash_str2int_init(); - kstring_t htxt = {0,0,0}; - kstring_t str = {0,0,0}; - bcf_hdr_t *h = bcf_hdr_init("w"); - int r = 0; - if (!h || !names_hash) { - hts_log_error("Failed to allocate bcf header"); - goto err; - } - if (bcf_hdr_format(h0, 1, &htxt) < 0) { - hts_log_error("Failed to get header text"); - goto err; - } - bcf_hdr_set_version(h,bcf_hdr_get_version(h0)); - int j; - for (j=0; j 0) { - char *p = find_chrom_header_line(htxt.s); - int i = 0, end = n? 8 : 7; - while ((p = strchr(p, '\t')) != 0 && i < end) ++i, ++p; - if (i != end) { - hts_log_error("Wrong number of columns in header #CHROM line"); - goto err; - } - r |= kputsn(htxt.s, p - htxt.s, &str) < 0; - for (i = 0; i < n; ++i) { - if ( khash_str2int_has_key(names_hash,samples[i]) ) - { - hts_log_error("Duplicate sample name \"%s\"", samples[i]); - goto err; - } - imap[i] = bcf_hdr_id2int(h0, BCF_DT_SAMPLE, samples[i]); - if (imap[i] < 0) continue; - r |= kputc('\t', &str) < 0; - r |= kputs(samples[i], &str) < 0; - r |= khash_str2int_inc(names_hash,samples[i]) < 0; - } - } else r |= kputsn(htxt.s, htxt.l, &str) < 0; - while (str.l && (!str.s[str.l-1] || str.s[str.l-1]=='\n') ) str.l--; // kill trailing zeros and newlines - r |= kputc('\n',&str) < 0; - if (r) { - hts_log_error("%s", strerror(errno)); - goto err; - } - if ( bcf_hdr_parse(h, str.s) < 0 ) { - bcf_hdr_destroy(h); - h = NULL; - } - free(str.s); - free(htxt.s); - khash_str2int_destroy(names_hash); - return h; - - err: - ks_free(&str); - ks_free(&htxt); - khash_str2int_destroy(names_hash); - bcf_hdr_destroy(h); - return NULL; -} - -int bcf_hdr_set_samples(bcf_hdr_t *hdr, const char *samples, int is_file) -{ - if ( samples && !strcmp("-",samples) ) return 0; // keep all samples - - int i, narr = bit_array_size(bcf_hdr_nsamples(hdr)); - hdr->keep_samples = (uint8_t*) calloc(narr,1); - if (!hdr->keep_samples) return -1; - - hdr->nsamples_ori = bcf_hdr_nsamples(hdr); - if ( !samples ) - { - // exclude all samples - khint_t k; - vdict_t *d = (vdict_t*)hdr->dict[BCF_DT_SAMPLE], *new_dict; - new_dict = kh_init(vdict); - if (!new_dict) return -1; - - bcf_hdr_nsamples(hdr) = 0; - - for (k = kh_begin(d); k != kh_end(d); ++k) - if (kh_exist(d, k)) free((char*)kh_key(d, k)); - kh_destroy(vdict, d); - hdr->dict[BCF_DT_SAMPLE] = new_dict; - if (bcf_hdr_sync(hdr) < 0) return -1; - - return 0; - } - - if ( samples[0]=='^' ) - for (i=0; ikeep_samples,i); - - int idx, n, ret = 0; - char **smpls = hts_readlist(samples[0]=='^'?samples+1:samples, is_file, &n); - if ( !smpls ) return -1; - for (i=0; ikeep_samples, idx); - else - bit_array_set(hdr->keep_samples, idx); - } - for (i=0; insamples_ori; i++) - if ( bit_array_test(hdr->keep_samples,i) ) bcf_hdr_nsamples(hdr)++; - - if ( !bcf_hdr_nsamples(hdr) ) { free(hdr->keep_samples); hdr->keep_samples=NULL; } - else - { - // Make new list and dictionary with desired samples - char **samples = (char**) malloc(sizeof(char*)*bcf_hdr_nsamples(hdr)); - vdict_t *new_dict, *d; - int k, res; - if (!samples) return -1; - - new_dict = kh_init(vdict); - if (!new_dict) { - free(samples); - return -1; - } - idx = 0; - for (i=0; insamples_ori; i++) { - if ( bit_array_test(hdr->keep_samples,i) ) { - samples[idx] = hdr->samples[i]; - k = kh_put(vdict, new_dict, hdr->samples[i], &res); - if (res < 0) { - free(samples); - kh_destroy(vdict, new_dict); - return -1; - } - kh_val(new_dict, k) = bcf_idinfo_def; - kh_val(new_dict, k).id = idx; - idx++; - } - } - - // Delete desired samples from old dictionary, so we don't free them - d = (vdict_t*)hdr->dict[BCF_DT_SAMPLE]; - for (i=0; i < idx; i++) { - int k = kh_get(vdict, d, samples[i]); - if (k < kh_end(d)) kh_del(vdict, d, k); - } - - // Free everything else - for (k = kh_begin(d); k != kh_end(d); ++k) - if (kh_exist(d, k)) free((char*)kh_key(d, k)); - kh_destroy(vdict, d); - hdr->dict[BCF_DT_SAMPLE] = new_dict; - - free(hdr->samples); - hdr->samples = samples; - - if (bcf_hdr_sync(hdr) < 0) - return -1; - } - - return ret; -} - -int bcf_subset(const bcf_hdr_t *h, bcf1_t *v, int n, int *imap) -{ - kstring_t ind; - ind.s = 0; ind.l = ind.m = 0; - if (n) { - bcf_fmt_t fmt[MAX_N_FMT]; - int i, j; - uint8_t *ptr = (uint8_t*)v->indiv.s; - for (i = 0; i < v->n_fmt; ++i) - ptr = bcf_unpack_fmt_core1(ptr, v->n_sample, &fmt[i]); - for (i = 0; i < (int)v->n_fmt; ++i) { - bcf_fmt_t *f = &fmt[i]; - bcf_enc_int1(&ind, f->id); - bcf_enc_size(&ind, f->n, f->type); - for (j = 0; j < n; ++j) - if (imap[j] >= 0) kputsn((char*)(f->p + imap[j] * f->size), f->size, &ind); - } - for (i = j = 0; j < n; ++j) if (imap[j] >= 0) ++i; - v->n_sample = i; - } else v->n_sample = 0; - if ( !v->n_sample ) v->n_fmt = 0; - free(v->indiv.s); - v->indiv = ind; - v->unpacked &= ~BCF_UN_FMT; // only BCF is ready for output, VCF will need to unpack again - return 0; -} - -int bcf_is_snp(bcf1_t *v) -{ - int i; - bcf_unpack(v, BCF_UN_STR); - for (i = 0; i < v->n_allele; ++i) - { - if ( v->d.allele[i][1]==0 && v->d.allele[i][0]!='*' ) continue; - - // mpileup's allele, see also below. This is not completely satisfactory, - // a general library is here narrowly tailored to fit samtools. - if ( v->d.allele[i][0]=='<' && v->d.allele[i][1]=='X' && v->d.allele[i][2]=='>' ) continue; - if ( v->d.allele[i][0]=='<' && v->d.allele[i][1]=='*' && v->d.allele[i][2]=='>' ) continue; - - break; - } - return i == v->n_allele; -} - -static void bcf_set_variant_type(const char *ref, const char *alt, bcf_variant_t *var) -{ - if ( *alt == '*' && !alt[1] ) { var->n = 0; var->type = VCF_OVERLAP; return; } // overlapping variant - - // The most frequent case - if ( !ref[1] && !alt[1] ) - { - if ( *alt == '.' || *ref==*alt ) { var->n = 0; var->type = VCF_REF; return; } - if ( *alt == 'X' ) { var->n = 0; var->type = VCF_REF; return; } // mpileup's X allele shouldn't be treated as variant - var->n = 1; var->type = VCF_SNP; return; - } - if ( alt[0]=='<' ) - { - if ( alt[1]=='X' && alt[2]=='>' ) { var->n = 0; var->type = VCF_REF; return; } // mpileup's X allele shouldn't be treated as variant - if ( alt[1]=='*' && alt[2]=='>' ) { var->n = 0; var->type = VCF_REF; return; } - if ( !strcmp("NON_REF>",alt+1) ) { var->n = 0; var->type = VCF_REF; return; } - var->type = VCF_OTHER; - return; - } - - // Catch "joined before" breakend case - if ( alt[0]==']' || alt[0] == '[' ) - { - var->type = VCF_BND; return; - } - - // Iterate through alt characters that match the reference - const char *r = ref, *a = alt; - while (*r && *a && toupper_c(*r)==toupper_c(*a) ) { r++; a++; } // unfortunately, matching REF,ALT case is not guaranteed - - if ( *a && !*r ) - { - if ( *a==']' || *a=='[' ) { var->type = VCF_BND; return; } // "joined after" breakend - while ( *a ) a++; - var->n = (a-alt)-(r-ref); var->type = VCF_INDEL | VCF_INS; return; - } - else if ( *r && !*a ) - { - while ( *r ) r++; - var->n = (a-alt)-(r-ref); var->type = VCF_INDEL | VCF_DEL; return; - } - else if ( !*r && !*a ) - { - var->n = 0; var->type = VCF_REF; return; - } - - const char *re = r, *ae = a; - while ( re[1] ) re++; - while ( ae[1] ) ae++; - while ( re>r && ae>a && toupper_c(*re)==toupper_c(*ae) ) { re--; ae--; } - if ( ae==a ) - { - if ( re==r ) { var->n = 1; var->type = VCF_SNP; return; } - var->n = -(re-r); - if ( toupper_c(*re)==toupper_c(*ae) ) { var->type = VCF_INDEL | VCF_DEL; return; } - var->type = VCF_OTHER; return; - } - else if ( re==r ) - { - var->n = ae-a; - if ( toupper_c(*re)==toupper_c(*ae) ) { var->type = VCF_INDEL | VCF_INS; return; } - var->type = VCF_OTHER; return; - } - - var->type = ( re-r == ae-a ) ? VCF_MNP : VCF_OTHER; - var->n = ( re-r > ae-a ) ? -(re-r+1) : ae-a+1; - - // should do also complex events, SVs, etc... -} - -static int bcf_set_variant_types(bcf1_t *b) -{ - if ( !(b->unpacked & BCF_UN_STR) ) bcf_unpack(b, BCF_UN_STR); - bcf_dec_t *d = &b->d; - if ( d->n_var < b->n_allele ) - { - bcf_variant_t *new_var = realloc(d->var, sizeof(bcf_variant_t)*b->n_allele); - if (!new_var) - return -1; - d->var = new_var; - d->n_var = b->n_allele; - } - int i; - b->d.var_type = 0; - d->var[0].type = VCF_REF; - d->var[0].n = 0; - for (i=1; in_allele; i++) - { - bcf_set_variant_type(d->allele[0],d->allele[i], &d->var[i]); - b->d.var_type |= d->var[i].type; - //fprintf(stderr,"[set_variant_type] %d %s %s -> %d %d .. %d\n", b->pos+1,d->allele[0],d->allele[i],d->var[i].type,d->var[i].n, b->d.var_type); - } - return 0; -} - -// bcf_get_variant_type/bcf_get_variant_types should only return the following, -// to be compatible with callers that are not expecting newer values -// like VCF_INS, VCF_DEL. The full set is available from the newer -// vcf_has_variant_type* interfaces. -#define ORIG_VAR_TYPES (VCF_SNP|VCF_MNP|VCF_INDEL|VCF_OTHER|VCF_BND|VCF_OVERLAP) -int bcf_get_variant_types(bcf1_t *rec) -{ - if ( rec->d.var_type==-1 ) { - if (bcf_set_variant_types(rec) != 0) { - hts_log_error("Couldn't get variant types: %s", strerror(errno)); - exit(1); // Due to legacy API having no way to report failures - } - } - return rec->d.var_type & ORIG_VAR_TYPES; -} - -int bcf_get_variant_type(bcf1_t *rec, int ith_allele) -{ - if ( rec->d.var_type==-1 ) { - if (bcf_set_variant_types(rec) != 0) { - hts_log_error("Couldn't get variant types: %s", strerror(errno)); - exit(1); // Due to legacy API having no way to report failures - } - } - if (ith_allele < 0 || ith_allele >= rec->n_allele) { - hts_log_error("Requested allele outside valid range"); - exit(1); - } - return rec->d.var[ith_allele].type & ORIG_VAR_TYPES; -} -#undef ORIG_VAR_TYPES - -int bcf_has_variant_type(bcf1_t *rec, int ith_allele, uint32_t bitmask) -{ - if ( rec->d.var_type==-1 ) { - if (bcf_set_variant_types(rec) != 0) return -1; - } - if (ith_allele < 0 || ith_allele >= rec->n_allele) return -1; - if (bitmask == VCF_REF) { // VCF_REF is 0, so handled as a special case - return rec->d.var[ith_allele].type == VCF_REF; - } - return bitmask & rec->d.var[ith_allele].type; -} - -int bcf_variant_length(bcf1_t *rec, int ith_allele) -{ - if ( rec->d.var_type==-1 ) { - if (bcf_set_variant_types(rec) != 0) return bcf_int32_missing; - } - if (ith_allele < 0 || ith_allele >= rec->n_allele) return bcf_int32_missing; - return rec->d.var[ith_allele].n; -} - -int bcf_has_variant_types(bcf1_t *rec, uint32_t bitmask, - enum bcf_variant_match mode) -{ - if ( rec->d.var_type==-1 ) { - if (bcf_set_variant_types(rec) != 0) return -1; - } - uint32_t type = rec->d.var_type; - if ( mode==bcf_match_overlap ) return bitmask & type; - - // VCF_INDEL is always set with VCF_INS and VCF_DEL by bcf_set_variant_type[s], but the bitmask may - // ask for say `VCF_INS` or `VCF_INDEL` only - if ( bitmask&(VCF_INS|VCF_DEL) && !(bitmask&VCF_INDEL) ) type &= ~VCF_INDEL; - else if ( bitmask&VCF_INDEL && !(bitmask&(VCF_INS|VCF_DEL)) ) type &= ~(VCF_INS|VCF_DEL); - - if ( mode==bcf_match_subset ) - { - if ( ~bitmask & type ) return 0; - else return bitmask & type; - } - // mode == bcf_match_exact - return type==bitmask ? type : 0; -} - -int bcf_update_info(const bcf_hdr_t *hdr, bcf1_t *line, const char *key, const void *values, int n, int type) -{ - static int negative_rlen_warned = 0; - int is_end_tag; - - // Is the field already present? - int i, inf_id = bcf_hdr_id2int(hdr,BCF_DT_ID,key); - if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_INFO,inf_id) ) return -1; // No such INFO field in the header - if ( !(line->unpacked & BCF_UN_INFO) ) bcf_unpack(line, BCF_UN_INFO); - - is_end_tag = strcmp(key, "END") == 0; - - for (i=0; in_info; i++) - if ( inf_id==line->d.info[i].key ) break; - bcf_info_t *inf = i==line->n_info ? NULL : &line->d.info[i]; - - if ( !n || (type==BCF_HT_STR && !values) ) - { - if ( n==0 && is_end_tag ) - line->rlen = line->n_allele ? strlen(line->d.allele[0]) : 0; - if ( inf ) - { - // Mark the tag for removal, free existing memory if necessary - if ( inf->vptr_free ) - { - free(inf->vptr - inf->vptr_off); - inf->vptr_free = 0; - } - line->d.shared_dirty |= BCF1_DIRTY_INF; - inf->vptr = NULL; - inf->vptr_off = inf->vptr_len = 0; - } - return 0; - } - - if (is_end_tag) - { - if (n != 1) - { - hts_log_error("END info tag should only have one value at %s:%"PRIhts_pos, bcf_seqname_safe(hdr,line), line->pos+1); - line->errcode |= BCF_ERR_TAG_INVALID; - return -1; - } - if (type != BCF_HT_INT && type != BCF_HT_LONG) - { - hts_log_error("Wrong type (%d) for END info tag at %s:%"PRIhts_pos, type, bcf_seqname_safe(hdr,line), line->pos+1); - line->errcode |= BCF_ERR_TAG_INVALID; - return -1; - } - } - - // Encode the values and determine the size required to accommodate the values - kstring_t str = {0,0,0}; - bcf_enc_int1(&str, inf_id); - if ( type==BCF_HT_INT ) - bcf_enc_vint(&str, n, (int32_t*)values, -1); - else if ( type==BCF_HT_REAL ) - bcf_enc_vfloat(&str, n, (float*)values); - else if ( type==BCF_HT_FLAG || type==BCF_HT_STR ) - { - if ( values==NULL ) - bcf_enc_size(&str, 0, BCF_BT_NULL); - else - bcf_enc_vchar(&str, strlen((char*)values), (char*)values); - } -#ifdef VCF_ALLOW_INT64 - else if ( type==BCF_HT_LONG ) - { - if (n != 1) { - hts_log_error("Only storing a single BCF_HT_LONG value is supported at %s:%"PRIhts_pos, bcf_seqname_safe(hdr,line), line->pos+1); - abort(); - } - bcf_enc_long1(&str, *(int64_t *) values); - } -#endif - else - { - hts_log_error("The type %d not implemented yet at %s:%"PRIhts_pos, type, bcf_seqname_safe(hdr,line), line->pos+1); - abort(); - } - - // Is the INFO tag already present - if ( inf ) - { - // Is it big enough to accommodate new block? - if ( inf->vptr && str.l <= inf->vptr_len + inf->vptr_off ) - { - if ( str.l != inf->vptr_len + inf->vptr_off ) line->d.shared_dirty |= BCF1_DIRTY_INF; - uint8_t *ptr = inf->vptr - inf->vptr_off; - memcpy(ptr, str.s, str.l); - free(str.s); - int vptr_free = inf->vptr_free; - bcf_unpack_info_core1(ptr, inf); - inf->vptr_free = vptr_free; - } - else - { - if ( inf->vptr_free ) - free(inf->vptr - inf->vptr_off); - bcf_unpack_info_core1((uint8_t*)str.s, inf); - inf->vptr_free = 1; - line->d.shared_dirty |= BCF1_DIRTY_INF; - } - } - else - { - // The tag is not present, create new one - line->n_info++; - hts_expand0(bcf_info_t, line->n_info, line->d.m_info , line->d.info); - inf = &line->d.info[line->n_info-1]; - bcf_unpack_info_core1((uint8_t*)str.s, inf); - inf->vptr_free = 1; - line->d.shared_dirty |= BCF1_DIRTY_INF; - } - line->unpacked |= BCF_UN_INFO; - - if ( n==1 && is_end_tag) { - hts_pos_t end = type == BCF_HT_INT ? *(int32_t *) values : *(int64_t *) values; - if ( (type == BCF_HT_INT && end!=bcf_int32_missing) || (type == BCF_HT_LONG && end!=bcf_int64_missing) ) - { - if ( end <= line->pos ) - { - if ( !negative_rlen_warned ) - { - hts_log_warning("INFO/END=%"PRIhts_pos" is smaller than POS at %s:%"PRIhts_pos,end,bcf_seqname_safe(hdr,line),line->pos+1); - negative_rlen_warned = 1; - } - line->rlen = line->n_allele ? strlen(line->d.allele[0]) : 0; - } - else - line->rlen = end - line->pos; - } - } - return 0; -} - -int bcf_update_format_string(const bcf_hdr_t *hdr, bcf1_t *line, const char *key, const char **values, int n) -{ - if ( !n ) - return bcf_update_format(hdr,line,key,NULL,0,BCF_HT_STR); - - int i, max_len = 0; - for (i=0; i max_len ) max_len = len; - } - char *out = (char*) malloc(max_len*n); - if ( !out ) return -2; - for (i=0; iunpacked & BCF_UN_FMT) ) bcf_unpack(line, BCF_UN_FMT); - - for (i=0; in_fmt; i++) - if ( line->d.fmt[i].id==fmt_id ) break; - bcf_fmt_t *fmt = i==line->n_fmt ? NULL : &line->d.fmt[i]; - - if ( !n ) - { - if ( fmt ) - { - // Mark the tag for removal, free existing memory if necessary - if ( fmt->p_free ) - { - free(fmt->p - fmt->p_off); - fmt->p_free = 0; - } - line->d.indiv_dirty = 1; - fmt->p = NULL; - } - return 0; - } - - line->n_sample = bcf_hdr_nsamples(hdr); - int nps = n / line->n_sample; // number of values per sample - assert( nps && nps*line->n_sample==n ); // must be divisible by n_sample - - // Encode the values and determine the size required to accommodate the values - kstring_t str = {0,0,0}; - bcf_enc_int1(&str, fmt_id); - assert(values != NULL); - if ( type==BCF_HT_INT ) - bcf_enc_vint(&str, n, (int32_t*)values, nps); - else if ( type==BCF_HT_REAL ) - { - bcf_enc_size(&str, nps, BCF_BT_FLOAT); - serialize_float_array(&str, nps*line->n_sample, (float *) values); - } - else if ( type==BCF_HT_STR ) - { - bcf_enc_size(&str, nps, BCF_BT_CHAR); - kputsn((char*)values, nps*line->n_sample, &str); - } - else - { - hts_log_error("The type %d not implemented yet at %s:%"PRIhts_pos, type, bcf_seqname_safe(hdr,line), line->pos+1); - abort(); - } - - if ( !fmt ) - { - // Not present, new format field - line->n_fmt++; - hts_expand0(bcf_fmt_t, line->n_fmt, line->d.m_fmt, line->d.fmt); - - // Special case: VCF specification requires that GT is always first - if ( line->n_fmt > 1 && key[0]=='G' && key[1]=='T' && !key[2] ) - { - for (i=line->n_fmt-1; i>0; i--) - line->d.fmt[i] = line->d.fmt[i-1]; - fmt = &line->d.fmt[0]; - } - else - fmt = &line->d.fmt[line->n_fmt-1]; - bcf_unpack_fmt_core1((uint8_t*)str.s, line->n_sample, fmt); - line->d.indiv_dirty = 1; - fmt->p_free = 1; - } - else - { - // The tag is already present, check if it is big enough to accommodate the new block - if ( fmt->p && str.l <= fmt->p_len + fmt->p_off ) - { - // good, the block is big enough - if ( str.l != fmt->p_len + fmt->p_off ) line->d.indiv_dirty = 1; - uint8_t *ptr = fmt->p - fmt->p_off; - memcpy(ptr, str.s, str.l); - free(str.s); - int p_free = fmt->p_free; - bcf_unpack_fmt_core1(ptr, line->n_sample, fmt); - fmt->p_free = p_free; - } - else - { - if ( fmt->p_free ) - free(fmt->p - fmt->p_off); - bcf_unpack_fmt_core1((uint8_t*)str.s, line->n_sample, fmt); - fmt->p_free = 1; - line->d.indiv_dirty = 1; - } - } - line->unpacked |= BCF_UN_FMT; - return 0; -} - - -int bcf_update_filter(const bcf_hdr_t *hdr, bcf1_t *line, int *flt_ids, int n) -{ - if ( !(line->unpacked & BCF_UN_FLT) ) bcf_unpack(line, BCF_UN_FLT); - line->d.shared_dirty |= BCF1_DIRTY_FLT; - line->d.n_flt = n; - if ( !n ) return 0; - hts_expand(int, line->d.n_flt, line->d.m_flt, line->d.flt); - int i; - for (i=0; id.flt[i] = flt_ids[i]; - return 0; -} - -int bcf_add_filter(const bcf_hdr_t *hdr, bcf1_t *line, int flt_id) -{ - if ( !(line->unpacked & BCF_UN_FLT) ) bcf_unpack(line, BCF_UN_FLT); - int i; - for (i=0; id.n_flt; i++) - if ( flt_id==line->d.flt[i] ) break; - if ( id.n_flt ) return 0; // this filter is already set - line->d.shared_dirty |= BCF1_DIRTY_FLT; - if ( flt_id==0 ) // set to PASS - line->d.n_flt = 1; - else if ( line->d.n_flt==1 && line->d.flt[0]==0 ) - line->d.n_flt = 1; - else - line->d.n_flt++; - hts_expand(int, line->d.n_flt, line->d.m_flt, line->d.flt); - line->d.flt[line->d.n_flt-1] = flt_id; - return 1; -} -int bcf_remove_filter(const bcf_hdr_t *hdr, bcf1_t *line, int flt_id, int pass) -{ - if ( !(line->unpacked & BCF_UN_FLT) ) bcf_unpack(line, BCF_UN_FLT); - int i; - for (i=0; id.n_flt; i++) - if ( flt_id==line->d.flt[i] ) break; - if ( i==line->d.n_flt ) return 0; // the filter is not present - line->d.shared_dirty |= BCF1_DIRTY_FLT; - if ( i!=line->d.n_flt-1 ) memmove(line->d.flt+i,line->d.flt+i+1,(line->d.n_flt-i-1)*sizeof(*line->d.flt)); - line->d.n_flt--; - if ( !line->d.n_flt && pass ) bcf_add_filter(hdr,line,0); - return 0; -} - -int bcf_has_filter(const bcf_hdr_t *hdr, bcf1_t *line, char *filter) -{ - if ( filter[0]=='.' && !filter[1] ) filter = "PASS"; - int id = bcf_hdr_id2int(hdr, BCF_DT_ID, filter); - if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_FLT,id) ) return -1; // not defined in the header - - if ( !(line->unpacked & BCF_UN_FLT) ) bcf_unpack(line, BCF_UN_FLT); - if ( id==0 && !line->d.n_flt) return 1; // PASS - - int i; - for (i=0; id.n_flt; i++) - if ( line->d.flt[i]==id ) return 1; - return 0; -} - -static inline int _bcf1_sync_alleles(const bcf_hdr_t *hdr, bcf1_t *line, int nals) -{ - line->d.shared_dirty |= BCF1_DIRTY_ALS; - - line->n_allele = nals; - hts_expand(char*, line->n_allele, line->d.m_allele, line->d.allele); - - char *als = line->d.als; - int n = 0; - while (nd.allele[n] = als; - while ( *als ) als++; - als++; - n++; - } - - // Update REF length. Note that END is 1-based while line->pos 0-based - bcf_info_t *end_info = bcf_get_info(hdr,line,"END"); - if ( end_info ) - { - if ( end_info->type==BCF_HT_INT && end_info->v1.i==bcf_int32_missing ) end_info = NULL; - else if ( end_info->type==BCF_HT_LONG && end_info->v1.i==bcf_int64_missing ) end_info = NULL; - } - if ( end_info && end_info->v1.i > line->pos ) - line->rlen = end_info->v1.i - line->pos; - else if ( nals > 0 ) - line->rlen = strlen(line->d.allele[0]); - else - line->rlen = 0; - - return 0; -} -int bcf_update_alleles(const bcf_hdr_t *hdr, bcf1_t *line, const char **alleles, int nals) -{ - if ( !(line->unpacked & BCF_UN_STR) ) bcf_unpack(line, BCF_UN_STR); - char *free_old = NULL; - char buffer[256]; - size_t used = 0; - - // The pointers in alleles may point into the existing line->d.als memory, - // so care needs to be taken not to clobber them while updating. Usually - // they will be short so we can copy through an intermediate buffer. - // If they're longer, or won't fit in the existing allocation we - // can allocate a new buffer to write into. Note that in either case - // pointers to line->d.als memory in alleles may not be valid when we've - // finished. - int i; - size_t avail = line->d.m_als < sizeof(buffer) ? line->d.m_als : sizeof(buffer); - for (i=0; id.m_als) // Don't shrink the buffer - needed = line->d.m_als; - if (needed > INT_MAX) { - hts_log_error("REF + alleles too long to fit in a BCF record"); - return -1; - } - new_als = malloc(needed); - if (!new_als) - return -1; - free_old = line->d.als; - line->d.als = new_als; - line->d.m_als = needed; - } - - // Copy from the temp buffer to the destination - if (used) { - assert(used <= line->d.m_als); - memcpy(line->d.als, buffer, used); - } - - // Add in any remaining entries - if this happens we will always be - // writing to a newly-allocated buffer. - for (; i < nals; i++) { - size_t sz = strlen(alleles[i]) + 1; - memcpy(line->d.als + used, alleles[i], sz); - used += sz; - } - - if (free_old) - free(free_old); - return _bcf1_sync_alleles(hdr,line,nals); -} - -int bcf_update_alleles_str(const bcf_hdr_t *hdr, bcf1_t *line, const char *alleles_string) -{ - if ( !(line->unpacked & BCF_UN_STR) ) bcf_unpack(line, BCF_UN_STR); - kstring_t tmp; - tmp.l = 0; tmp.s = line->d.als; tmp.m = line->d.m_als; - kputs(alleles_string, &tmp); - line->d.als = tmp.s; line->d.m_als = tmp.m; - - int nals = 1; - char *t = line->d.als; - while (*t) - { - if ( *t==',' ) { *t = 0; nals++; } - t++; - } - return _bcf1_sync_alleles(hdr, line, nals); -} - -int bcf_update_id(const bcf_hdr_t *hdr, bcf1_t *line, const char *id) -{ - if ( !(line->unpacked & BCF_UN_STR) ) bcf_unpack(line, BCF_UN_STR); - kstring_t tmp; - tmp.l = 0; tmp.s = line->d.id; tmp.m = line->d.m_id; - if ( id ) - kputs(id, &tmp); - else - kputs(".", &tmp); - line->d.id = tmp.s; line->d.m_id = tmp.m; - line->d.shared_dirty |= BCF1_DIRTY_ID; - return 0; -} - -int bcf_add_id(const bcf_hdr_t *hdr, bcf1_t *line, const char *id) -{ - if ( !id ) return 0; - if ( !(line->unpacked & BCF_UN_STR) ) bcf_unpack(line, BCF_UN_STR); - - kstring_t tmp; - tmp.l = 0; tmp.s = line->d.id; tmp.m = line->d.m_id; - - int len = strlen(id); - char *dst = line->d.id; - while ( *dst && (dst=strstr(dst,id)) ) - { - if ( dst[len]!=0 && dst[len]!=';' ) dst++; // a prefix, not a match - else if ( dst==line->d.id || dst[-1]==';' ) return 0; // already present - dst++; // a suffix, not a match - } - if ( line->d.id && (line->d.id[0]!='.' || line->d.id[1]) ) - { - tmp.l = strlen(line->d.id); - kputc(';',&tmp); - } - kputs(id,&tmp); - - line->d.id = tmp.s; line->d.m_id = tmp.m; - line->d.shared_dirty |= BCF1_DIRTY_ID; - return 0; - -} - -bcf_fmt_t *bcf_get_fmt(const bcf_hdr_t *hdr, bcf1_t *line, const char *key) -{ - int id = bcf_hdr_id2int(hdr, BCF_DT_ID, key); - if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_FMT,id) ) return NULL; // no such FMT field in the header - return bcf_get_fmt_id(line, id); -} - -bcf_info_t *bcf_get_info(const bcf_hdr_t *hdr, bcf1_t *line, const char *key) -{ - int id = bcf_hdr_id2int(hdr, BCF_DT_ID, key); - if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_INFO,id) ) return NULL; // no such INFO field in the header - return bcf_get_info_id(line, id); -} - -bcf_fmt_t *bcf_get_fmt_id(bcf1_t *line, const int id) -{ - int i; - if ( !(line->unpacked & BCF_UN_FMT) ) bcf_unpack(line, BCF_UN_FMT); - for (i=0; in_fmt; i++) - { - if ( line->d.fmt[i].id==id ) return &line->d.fmt[i]; - } - return NULL; -} - -bcf_info_t *bcf_get_info_id(bcf1_t *line, const int id) -{ - int i; - if ( !(line->unpacked & BCF_UN_INFO) ) bcf_unpack(line, BCF_UN_INFO); - for (i=0; in_info; i++) - { - if ( line->d.info[i].key==id ) return &line->d.info[i]; - } - return NULL; -} - - -int bcf_get_info_values(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, void **dst, int *ndst, int type) -{ - int i, ret = -4, tag_id = bcf_hdr_id2int(hdr, BCF_DT_ID, tag); - if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_INFO,tag_id) ) return -1; // no such INFO field in the header - if ( bcf_hdr_id2type(hdr,BCF_HL_INFO,tag_id)!=(type & 0xff) ) return -2; // expected different type - - if ( !(line->unpacked & BCF_UN_INFO) ) bcf_unpack(line, BCF_UN_INFO); - - for (i=0; in_info; i++) - if ( line->d.info[i].key==tag_id ) break; - if ( i==line->n_info ) return ( type==BCF_HT_FLAG ) ? 0 : -3; // the tag is not present in this record - if ( type==BCF_HT_FLAG ) return 1; - - bcf_info_t *info = &line->d.info[i]; - if ( !info->vptr ) return -3; // the tag was marked for removal - if ( type==BCF_HT_STR ) - { - if ( *ndst < info->len+1 ) - { - *ndst = info->len + 1; - *dst = realloc(*dst, *ndst); - } - memcpy(*dst,info->vptr,info->len); - ((uint8_t*)*dst)[info->len] = 0; - return info->len; - } - - // Make sure the buffer is big enough - int size1; - switch (type) { - case BCF_HT_INT: size1 = sizeof(int32_t); break; - case BCF_HT_LONG: size1 = sizeof(int64_t); break; - case BCF_HT_REAL: size1 = sizeof(float); break; - default: - hts_log_error("Unexpected output type %d at %s:%"PRIhts_pos, type, bcf_seqname_safe(hdr,line), line->pos+1); - return -2; - } - if ( *ndst < info->len ) - { - *ndst = info->len; - *dst = realloc(*dst, *ndst * size1); - } - - #define BRANCH(type_t, convert, is_missing, is_vector_end, set_missing, set_regular, out_type_t) do { \ - out_type_t *tmp = (out_type_t *) *dst; \ - int j; \ - for (j=0; jlen; j++) \ - { \ - type_t p = convert(info->vptr + j * sizeof(type_t)); \ - if ( is_vector_end ) break; \ - if ( is_missing ) set_missing; \ - else set_regular; \ - tmp++; \ - } \ - ret = j; \ - } while (0) - switch (info->type) { - case BCF_BT_INT8: - if (type == BCF_HT_LONG) { - BRANCH(int8_t, le_to_i8, p==bcf_int8_missing, p==bcf_int8_vector_end, *tmp=bcf_int64_missing, *tmp=p, int64_t); - } else { - BRANCH(int8_t, le_to_i8, p==bcf_int8_missing, p==bcf_int8_vector_end, *tmp=bcf_int32_missing, *tmp=p, int32_t); - } - break; - case BCF_BT_INT16: - if (type == BCF_HT_LONG) { - BRANCH(int16_t, le_to_i16, p==bcf_int16_missing, p==bcf_int16_vector_end, *tmp=bcf_int64_missing, *tmp=p, int64_t); - } else { - BRANCH(int16_t, le_to_i16, p==bcf_int16_missing, p==bcf_int16_vector_end, *tmp=bcf_int32_missing, *tmp=p, int32_t); - } - break; - case BCF_BT_INT32: - if (type == BCF_HT_LONG) { - BRANCH(int32_t, le_to_i32, p==bcf_int32_missing, p==bcf_int32_vector_end, *tmp=bcf_int64_missing, *tmp=p, int64_t); break; - } else { - BRANCH(int32_t, le_to_i32, p==bcf_int32_missing, p==bcf_int32_vector_end, *tmp=bcf_int32_missing, *tmp=p, int32_t); break; - } - case BCF_BT_FLOAT: BRANCH(uint32_t, le_to_u32, p==bcf_float_missing, p==bcf_float_vector_end, bcf_float_set_missing(*tmp), bcf_float_set(tmp, p), float); break; - default: hts_log_error("Unexpected type %d at %s:%"PRIhts_pos, info->type, bcf_seqname_safe(hdr,line), line->pos+1); return -2; - } - #undef BRANCH - return ret; // set by BRANCH -} - -int bcf_get_format_string(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, char ***dst, int *ndst) -{ - int i,tag_id = bcf_hdr_id2int(hdr, BCF_DT_ID, tag); - if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_FMT,tag_id) ) return -1; // no such FORMAT field in the header - if ( bcf_hdr_id2type(hdr,BCF_HL_FMT,tag_id)!=BCF_HT_STR ) return -2; // expected different type - - if ( !(line->unpacked & BCF_UN_FMT) ) bcf_unpack(line, BCF_UN_FMT); - - for (i=0; in_fmt; i++) - if ( line->d.fmt[i].id==tag_id ) break; - if ( i==line->n_fmt ) return -3; // the tag is not present in this record - bcf_fmt_t *fmt = &line->d.fmt[i]; - if ( !fmt->p ) return -3; // the tag was marked for removal - - int nsmpl = bcf_hdr_nsamples(hdr); - if ( !*dst ) - { - *dst = (char**) malloc(sizeof(char*)*nsmpl); - if ( !*dst ) return -4; // could not alloc - (*dst)[0] = NULL; - } - int n = (fmt->n+1)*nsmpl; - if ( *ndst < n ) - { - (*dst)[0] = realloc((*dst)[0], n); - if ( !(*dst)[0] ) return -4; // could not alloc - *ndst = n; - } - for (i=0; ip + i*fmt->n; - uint8_t *tmp = (uint8_t*)(*dst)[0] + i*(fmt->n+1); - memcpy(tmp,src,fmt->n); - tmp[fmt->n] = 0; - (*dst)[i] = (char*) tmp; - } - return n; -} - -int bcf_get_format_values(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, void **dst, int *ndst, int type) -{ - int i,j, tag_id = bcf_hdr_id2int(hdr, BCF_DT_ID, tag); - if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_FMT,tag_id) ) return -1; // no such FORMAT field in the header - if ( tag[0]=='G' && tag[1]=='T' && tag[2]==0 ) - { - // Ugly: GT field is considered to be a string by the VCF header but BCF represents it as INT. - if ( bcf_hdr_id2type(hdr,BCF_HL_FMT,tag_id)!=BCF_HT_STR ) return -2; - } - else if ( bcf_hdr_id2type(hdr,BCF_HL_FMT,tag_id)!=type ) return -2; // expected different type - - if ( !(line->unpacked & BCF_UN_FMT) ) bcf_unpack(line, BCF_UN_FMT); - - for (i=0; in_fmt; i++) - if ( line->d.fmt[i].id==tag_id ) break; - if ( i==line->n_fmt ) return -3; // the tag is not present in this record - bcf_fmt_t *fmt = &line->d.fmt[i]; - if ( !fmt->p ) return -3; // the tag was marked for removal - - if ( type==BCF_HT_STR ) - { - int n = fmt->n*bcf_hdr_nsamples(hdr); - if ( *ndst < n ) - { - *dst = realloc(*dst, n); - if ( !*dst ) return -4; // could not alloc - *ndst = n; - } - memcpy(*dst,fmt->p,n); - return n; - } - - // Make sure the buffer is big enough - int nsmpl = bcf_hdr_nsamples(hdr); - int size1 = type==BCF_HT_INT ? sizeof(int32_t) : sizeof(float); - if ( *ndst < fmt->n*nsmpl ) - { - *ndst = fmt->n*nsmpl; - *dst = realloc(*dst, *ndst*size1); - if ( !*dst ) return -4; // could not alloc - } - - #define BRANCH(type_t, convert, is_missing, is_vector_end, set_missing, set_vector_end, set_regular, out_type_t) { \ - out_type_t *tmp = (out_type_t *) *dst; \ - uint8_t *fmt_p = fmt->p; \ - for (i=0; in; j++) \ - { \ - type_t p = convert(fmt_p + j * sizeof(type_t)); \ - if ( is_missing ) set_missing; \ - else if ( is_vector_end ) { set_vector_end; break; } \ - else set_regular; \ - tmp++; \ - } \ - for (; jn; j++) { set_vector_end; tmp++; } \ - fmt_p += fmt->size; \ - } \ - } - switch (fmt->type) { - case BCF_BT_INT8: BRANCH(int8_t, le_to_i8, p==bcf_int8_missing, p==bcf_int8_vector_end, *tmp=bcf_int32_missing, *tmp=bcf_int32_vector_end, *tmp=p, int32_t); break; - case BCF_BT_INT16: BRANCH(int16_t, le_to_i16, p==bcf_int16_missing, p==bcf_int16_vector_end, *tmp=bcf_int32_missing, *tmp=bcf_int32_vector_end, *tmp=p, int32_t); break; - case BCF_BT_INT32: BRANCH(int32_t, le_to_i32, p==bcf_int32_missing, p==bcf_int32_vector_end, *tmp=bcf_int32_missing, *tmp=bcf_int32_vector_end, *tmp=p, int32_t); break; - case BCF_BT_FLOAT: BRANCH(uint32_t, le_to_u32, p==bcf_float_missing, p==bcf_float_vector_end, bcf_float_set_missing(*tmp), bcf_float_set_vector_end(*tmp), bcf_float_set(tmp, p), float); break; - default: hts_log_error("Unexpected type %d at %s:%"PRIhts_pos, fmt->type, bcf_seqname_safe(hdr,line), line->pos+1); exit(1); - } - #undef BRANCH - return nsmpl*fmt->n; -} - -//error description structure definition -typedef struct err_desc { - int errorcode; - const char *description; -}err_desc; - -// error descriptions -static const err_desc errdesc_bcf[] = { - { BCF_ERR_CTG_UNDEF, "Contig not defined in header"}, - { BCF_ERR_TAG_UNDEF, "Tag not defined in header" }, - { BCF_ERR_NCOLS, "Incorrect number of columns" }, - { BCF_ERR_LIMITS, "Limits reached" }, - { BCF_ERR_CHAR, "Invalid character" }, - { BCF_ERR_CTG_INVALID, "Invalid contig" }, - { BCF_ERR_TAG_INVALID, "Invalid tag" }, -}; - -/// append given description to buffer based on available size and add ... when not enough space - /** @param buffer buffer to which description to be appended - @param offset offset at which to be appended - @param maxbuffer maximum size of the buffer - @param description the description to be appended -on failure returns -1 - when buffer is not big enough; returns -1 on invalid params and on too small buffer which are improbable due to validation at caller site -on success returns 0 - */ -static int add_desc_to_buffer(char *buffer, size_t *offset, size_t maxbuffer, const char *description) { - - if (!description || !buffer || !offset || (maxbuffer < 4)) - return -1; - - size_t rembuffer = maxbuffer - *offset; - if (rembuffer > (strlen(description) + (rembuffer == maxbuffer ? 0 : 1))) { //add description with optionally required ',' - *offset += snprintf(buffer + *offset, rembuffer, "%s%s", (rembuffer == maxbuffer)? "": ",", description); - } else { //not enough space for description, put ... - size_t tmppos = (rembuffer <= 4) ? maxbuffer - 4 : *offset; - snprintf(buffer + tmppos, 4, "..."); //ignore offset update - return -1; - } - return 0; -} - -//get description for given error code. return NULL on error -const char *bcf_strerror(int errorcode, char *buffer, size_t maxbuffer) { - size_t usedup = 0; - int ret = 0; - int idx; - - if (!buffer || maxbuffer < 4) - return NULL; //invalid / insufficient buffer - - if (!errorcode) { - buffer[0] = '\0'; //no error, set null - return buffer; - } - - for (idx = 0; idx < sizeof(errdesc_bcf) / sizeof(err_desc); ++idx) { - if (errorcode & errdesc_bcf[idx].errorcode) { //error is set, add description - ret = add_desc_to_buffer(buffer, &usedup, maxbuffer, errdesc_bcf[idx].description); - if (ret < 0) - break; //not enough space, ... added, no need to continue - - errorcode &= ~errdesc_bcf[idx].errorcode; //reset the error - } - } - - if (errorcode && (ret >= 0)) { //undescribed error is present in error code and had enough buffer, try to add unkonwn error as well§ - add_desc_to_buffer(buffer, &usedup, maxbuffer, "Unknown error"); - } - return buffer; -} - diff --git a/src/htslib-1.19.1/vcf_sweep.c b/src/htslib-1.19.1/vcf_sweep.c deleted file mode 100644 index f3fb5fa..0000000 --- a/src/htslib-1.19.1/vcf_sweep.c +++ /dev/null @@ -1,190 +0,0 @@ -/* vcf_sweep.c -- forward/reverse sweep API. - - Copyright (C) 2013-2014, 2019 Genome Research Ltd. - - Author: Petr Danecek - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include - -#include - -#include "htslib/vcf_sweep.h" -#include "htslib/bgzf.h" - -#define SW_FWD 0 -#define SW_BWD 1 - -struct bcf_sweep_t -{ - htsFile *file; - bcf_hdr_t *hdr; - BGZF *fp; - - int direction; // to tell if the direction has changed - int block_size; // the size of uncompressed data to hold in memory - bcf1_t *rec; // bcf buffer - int nrec, mrec; // number of used records; total size of the buffer - int lrid, lpos, lnals, lals_len, mlals; // to check uniqueness of a record - char *lals; - - uint64_t *idx; // uncompressed offsets of VCF/BCF records - int iidx, nidx, midx; // i: current offset; n: used; m: allocated - int idx_done; // the index is built during the first pass -}; - -BGZF *hts_get_bgzfp(htsFile *fp); -int hts_useek(htsFile *file, off_t uoffset, int where); -off_t hts_utell(htsFile *file); - -static inline int sw_rec_equal(bcf_sweep_t *sw, bcf1_t *rec) -{ - if ( sw->lrid!=rec->rid ) return 0; - if ( sw->lpos!=rec->pos ) return 0; - if ( sw->lnals!=rec->n_allele ) return 0; - - char *t = rec->d.allele[sw->lnals-1]; - int len = t - rec->d.allele[0] + 1; - while ( *t ) { t++; len++; } - if ( sw->lals_len!=len ) return 0; - if ( memcmp(sw->lals,rec->d.allele[0],len) ) return 0; - return 1; -} - -static int sw_rec_save(bcf_sweep_t *sw, bcf1_t *rec) -{ - sw->lrid = rec->rid; - sw->lpos = rec->pos; - sw->lnals = rec->n_allele; - - char *t = rec->d.allele[sw->lnals-1]; - int len = t - rec->d.allele[0] + 1; - while ( *t ) { t++; len++; } - sw->lals_len = len; - hts_expand(char, len, sw->mlals, sw->lals); - memcpy(sw->lals, rec->d.allele[0], len); - - return 0; // FIXME: check for errs in this function -} - -static int sw_fill_buffer(bcf_sweep_t *sw) -{ - if ( !sw->iidx ) return 0; - sw->iidx--; - - int ret = hts_useek(sw->file, sw->idx[sw->iidx], 0); - assert( ret==0 ); - - sw->nrec = 0; - bcf1_t *rec = &sw->rec[sw->nrec]; - while ( (ret=bcf_read1(sw->file, sw->hdr, rec))==0 ) - { - bcf_unpack(rec, BCF_UN_STR); - - // if not in the last block, stop at the saved record - if ( sw->iidx+1 < sw->nidx && sw_rec_equal(sw,rec) ) break; - - sw->nrec++; - hts_expand0(bcf1_t, sw->nrec+1, sw->mrec, sw->rec); - rec = &sw->rec[sw->nrec]; - } - sw_rec_save(sw, &sw->rec[0]); - - return 0; // FIXME: check for errs in this function -} - -bcf_sweep_t *bcf_sweep_init(const char *fname) -{ - bcf_sweep_t *sw = (bcf_sweep_t*) calloc(1,sizeof(bcf_sweep_t)); - sw->file = hts_open(fname, "r"); - sw->fp = hts_get_bgzfp(sw->file); - if (sw->fp) bgzf_index_build_init(sw->fp); - sw->hdr = bcf_hdr_read(sw->file); - sw->mrec = 1; - sw->rec = (bcf1_t*) calloc(sw->mrec,(sizeof(bcf1_t))); - sw->block_size = 1024*1024*3; - sw->direction = SW_FWD; - return sw; -} - -void bcf_sweep_destroy(bcf_sweep_t *sw) -{ - int i; - for (i=0; imrec; i++) bcf_empty1(&sw->rec[i]); - free(sw->idx); - free(sw->rec); - free(sw->lals); - bcf_hdr_destroy(sw->hdr); - hts_close(sw->file); - free(sw); -} - -static void sw_seek(bcf_sweep_t *sw, int direction) -{ - sw->direction = direction; - if ( direction==SW_FWD ) - hts_useek(sw->file, sw->idx[0], 0); - else - { - sw->iidx = sw->nidx; - sw->nrec = 0; - } -} - -bcf1_t *bcf_sweep_fwd(bcf_sweep_t *sw) -{ - if ( sw->direction==SW_BWD ) sw_seek(sw, SW_FWD); - - off_t pos = hts_utell(sw->file); - - bcf1_t *rec = &sw->rec[0]; - int ret = bcf_read1(sw->file, sw->hdr, rec); - - if ( ret!=0 ) // last record, get ready for sweeping backwards - { - sw->idx_done = 1; - if (sw->fp) sw->fp->idx_build_otf = 0; - sw_seek(sw, SW_BWD); - return NULL; - } - - if ( !sw->idx_done ) - { - if ( !sw->nidx || pos - sw->idx[sw->nidx-1] > sw->block_size ) - { - sw->nidx++; - hts_expand(uint64_t, sw->nidx, sw->midx, sw->idx); - sw->idx[sw->nidx-1] = pos; - } - } - return rec; -} - -bcf1_t *bcf_sweep_bwd(bcf_sweep_t *sw) -{ - if ( sw->direction==SW_FWD ) sw_seek(sw, SW_BWD); - if ( !sw->nrec ) sw_fill_buffer(sw); - if ( !sw->nrec ) return NULL; - return &sw->rec[ --sw->nrec ]; -} - -bcf_hdr_t *bcf_sweep_hdr(bcf_sweep_t *sw) { return sw->hdr; } - diff --git a/src/htslib-1.19.1/vcfutils.c b/src/htslib-1.19.1/vcfutils.c deleted file mode 100644 index 890c50a..0000000 --- a/src/htslib-1.19.1/vcfutils.c +++ /dev/null @@ -1,854 +0,0 @@ -/* vcfutils.c -- allele-related utility functions. - - Copyright (C) 2012-2018, 2020-2022 Genome Research Ltd. - - Author: Petr Danecek - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. */ - -#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h -#include -#include - -#include "htslib/vcfutils.h" -#include "htslib/kbitset.h" - -int bcf_calc_ac(const bcf_hdr_t *header, bcf1_t *line, int *ac, int which) -{ - int i; - for (i=0; in_allele; i++) ac[i]=0; - - // Use INFO/AC,AN field only when asked - if ( which&BCF_UN_INFO ) - { - bcf_unpack(line, BCF_UN_INFO); - int an_id = bcf_hdr_id2int(header, BCF_DT_ID, "AN"); - int ac_id = bcf_hdr_id2int(header, BCF_DT_ID, "AC"); - int i, an=-1, ac_len=0, ac_type=0; - uint8_t *ac_ptr=NULL; - if ( an_id>=0 && ac_id>=0 ) - { - for (i=0; in_info; i++) - { - bcf_info_t *z = &line->d.info[i]; - if ( z->key == an_id ) an = z->v1.i; - else if ( z->key == ac_id ) { ac_ptr = z->vptr; ac_len = z->len; ac_type = z->type; } - } - } - if ( an>=0 && ac_ptr ) - { - if ( ac_len != line->n_allele - 1 ) - { - static int warned = 0; - if ( !warned ) - { - hts_log_warning("Incorrect number of AC fields at %s:%"PRIhts_pos". (This message is printed only once.)\n", - header->id[BCF_DT_CTG][line->rid].key, line->pos+1); - warned = 1; - } - return 0; - } - int nac = 0; - #define BRANCH_INT(type_t, convert) { \ - for (i=0; iid[BCF_DT_CTG][line->rid].key, line->pos+1); exit(1); break; - } - #undef BRANCH_INT - if ( anid[BCF_DT_CTG][line->rid].key, line->pos+1); - exit(1); - } - ac[0] = an - nac; - return 1; - } - } - - // Split genotype fields only when asked - if ( which&BCF_UN_FMT ) - { - int i, gt_id = bcf_hdr_id2int(header,BCF_DT_ID,"GT"); - if ( gt_id<0 ) return 0; - bcf_unpack(line, BCF_UN_FMT); - bcf_fmt_t *fmt_gt = NULL; - for (i=0; i<(int)line->n_fmt; i++) - if ( line->d.fmt[i].id==gt_id ) { fmt_gt = &line->d.fmt[i]; break; } - if ( !fmt_gt ) return 0; - #define BRANCH_INT(type_t, convert, vector_end) { \ - for (i=0; in_sample; i++) \ - { \ - uint8_t *p = (fmt_gt->p + i*fmt_gt->size); \ - int ial; \ - for (ial=0; ialn; ial++) \ - { \ - int32_t val = convert(&p[ial * sizeof(type_t)]); \ - if ( val==vector_end ) break; /* smaller ploidy */ \ - if ( bcf_gt_is_missing(val) ) continue; /* missing allele */ \ - if ( val>>1 > line->n_allele ) \ - { \ - hts_log_error("Incorrect allele (\"%d\") in %s at %s:%"PRIhts_pos, (val>>1)-1, header->samples[i], header->id[BCF_DT_CTG][line->rid].key, line->pos+1); \ - exit(1); \ - } \ - ac[(val>>1)-1]++; \ - } \ - } \ - } - switch (fmt_gt->type) { - case BCF_BT_INT8: BRANCH_INT(int8_t, le_to_i8, bcf_int8_vector_end); break; - case BCF_BT_INT16: BRANCH_INT(int16_t, le_to_i16, bcf_int16_vector_end); break; - case BCF_BT_INT32: BRANCH_INT(int32_t, le_to_i32, bcf_int32_vector_end); break; - default: hts_log_error("Unexpected type %d at %s:%"PRIhts_pos, fmt_gt->type, header->id[BCF_DT_CTG][line->rid].key, line->pos+1); exit(1); break; - } - #undef BRANCH_INT - return 1; - } - return 0; -} - -int bcf_gt_type(bcf_fmt_t *fmt_ptr, int isample, int *_ial, int *_jal) -{ - int i, nals = 0, has_ref = 0, has_alt = 0, ial = 0, jal = 0; - #define BRANCH_INT(type_t, convert, vector_end) { \ - uint8_t *p = fmt_ptr->p + isample*fmt_ptr->size; \ - for (i=0; in; i++) \ - { \ - int32_t val = convert(&p[i * sizeof(type_t)]); \ - if ( val == vector_end ) break; /* smaller ploidy */ \ - if ( bcf_gt_is_missing(val) ) return GT_UNKN; /* missing allele */ \ - int tmp = val>>1; \ - if ( tmp>1 ) \ - { \ - if ( !ial ) { ial = tmp; has_alt = 1; } \ - else if ( tmp!=ial ) \ - { \ - if ( tmptype) { - case BCF_BT_INT8: BRANCH_INT(int8_t, le_to_i8, bcf_int8_vector_end); break; - case BCF_BT_INT16: BRANCH_INT(int16_t, le_to_i16, bcf_int16_vector_end); break; - case BCF_BT_INT32: BRANCH_INT(int32_t, le_to_i32, bcf_int32_vector_end); break; - default: hts_log_error("Unexpected type %d", fmt_ptr->type); exit(1); break; - } - #undef BRANCH_INT - - if ( _ial ) *_ial = ial>0 ? ial-1 : ial; - if ( _jal ) *_jal = jal>0 ? jal-1 : jal; - if ( !nals ) return GT_UNKN; - if ( nals==1 ) - return has_ref ? GT_HAPL_R : GT_HAPL_A; - if ( !has_ref ) - return has_alt==1 ? GT_HOM_AA : GT_HET_AA; - if ( !has_alt ) - return GT_HOM_RR; - return GT_HET_RA; -} - -int bcf_trim_alleles(const bcf_hdr_t *header, bcf1_t *line) -{ - int i, ret = 0, nrm = 0; - kbitset_t *rm_set = NULL; - bcf_fmt_t *gt = bcf_get_fmt(header, line, "GT"); - if ( !gt ) return 0; - - int *ac = (int*) calloc(line->n_allele,sizeof(int)); - - // check if all alleles are populated - #define BRANCH(type_t, convert, vector_end) { \ - for (i=0; in_sample; i++) \ - { \ - uint8_t *p = gt->p + i*gt->size; \ - int ial; \ - for (ial=0; ialn; ial++) \ - { \ - int32_t val = convert(&p[ial * sizeof(type_t)]); \ - if ( val==vector_end ) break; /* smaller ploidy */ \ - if ( bcf_gt_is_missing(val) ) continue; /* missing allele */ \ - if ( (val>>1)-1 >= line->n_allele ) { \ - hts_log_error("Allele index is out of bounds at %s:%"PRIhts_pos, header->id[BCF_DT_CTG][line->rid].key, line->pos+1); \ - ret = -1; \ - goto clean; \ - } \ - ac[(val>>1)-1]++; \ - } \ - } \ - } - switch (gt->type) { - case BCF_BT_INT8: BRANCH(int8_t, le_to_i8, bcf_int8_vector_end); break; - case BCF_BT_INT16: BRANCH(int16_t, le_to_i16, bcf_int16_vector_end); break; - case BCF_BT_INT32: BRANCH(int32_t, le_to_i32, bcf_int32_vector_end); break; - default: hts_log_error("Unexpected GT %d at %s:%"PRIhts_pos, - gt->type, header->id[BCF_DT_CTG][line->rid].key, line->pos + 1); - goto clean; - } - #undef BRANCH - - rm_set = kbs_init(line->n_allele); - for (i=1; in_allele; i++) { - if ( !ac[i] ) { kbs_insert(rm_set, i); nrm++; } - } - - if (nrm) { - if (bcf_remove_allele_set(header, line, rm_set)) - ret = -2; - } - -clean: - free(ac); - if (rm_set) kbs_destroy(rm_set); - return ret ? ret : nrm; -} - -int bcf_remove_alleles(const bcf_hdr_t *header, bcf1_t *line, int rm_mask) -{ - int i; - kbitset_t *rm_set = kbs_init(line->n_allele); - for (i=1; in_allele; i++) - if ( rm_mask & 1<n_allele, sizeof(int)); - uint8_t *dat = NULL; - - bcf_unpack(line, BCF_UN_ALL); - - // create map of indexes from old to new ALT numbering and modify ALT - kstring_t str = {0,0,0}; - kputs(line->d.allele[0], &str); - - int nrm = 0, i,j; // i: ori alleles, j: new alleles - for (i=1, j=1; in_allele; i++) - { - if ( kbs_exists(rm_set, i) ) - { - // remove this allele - line->d.allele[i] = NULL; - nrm++; - continue; - } - kputc(',', &str); - kputs(line->d.allele[i], &str); - map[i] = j; - j++; - } - if ( !nrm ) goto clean; - - int nR_ori = line->n_allele; - int nR_new = line->n_allele-nrm; - if ( nR_new<=0 ) // should not be able to remove reference allele - { - hts_log_error("Cannot remove reference allele at %s:%"PRIhts_pos" [%d]", - bcf_seqname_safe(header,line), line->pos+1, nR_new); - goto err; - } - int nA_ori = nR_ori-1; - int nA_new = nR_new-1; - - int nG_ori = nR_ori*(nR_ori + 1)/2; - int nG_new = nR_new*(nR_new + 1)/2; - - bcf_update_alleles_str(header, line, str.s); - - // remove from Number=G, Number=R and Number=A INFO fields. - int mdat = 0, ndat = 0, mdat_bytes = 0, nret; - for (i=0; in_info; i++) - { - bcf_info_t *info = &line->d.info[i]; - int vlen = bcf_hdr_id2length(header,BCF_HL_INFO,info->key); - - if ( vlen!=BCF_VL_A && vlen!=BCF_VL_G && vlen!=BCF_VL_R ) continue; // no need to change - - int type = bcf_hdr_id2type(header,BCF_HL_INFO,info->key); - if ( type==BCF_HT_FLAG ) continue; - int size = 1; - if ( type==BCF_HT_REAL || type==BCF_HT_INT ) size = 4; - - mdat = mdat_bytes / size; - nret = bcf_get_info_values(header, line, bcf_hdr_int2id(header,BCF_DT_ID,info->key), (void**)&dat, &mdat, type); - mdat_bytes = mdat * size; - if ( nret<0 ) - { - hts_log_error("Could not access INFO/%s at %s:%"PRIhts_pos" [%d]", - bcf_hdr_int2id(header,BCF_DT_ID,info->key), bcf_seqname_safe(header,line), line->pos+1, nret); - goto err; - } - if ( nret==0 ) continue; // no data for this tag - - if ( type==BCF_HT_STR ) - { - str.l = 0; - char *ss = (char*) dat, *se = (char*) dat, s = ss[0]; - if ( vlen==BCF_VL_A || vlen==BCF_VL_R ) - { - int nexp, inc = 0; - if ( vlen==BCF_VL_A ) - { - nexp = nA_ori; - inc = 1; - } - else - nexp = nR_ori; - for (j=0; jkey), bcf_seqname_safe(header,line), line->pos+1, vlen==BCF_VL_A ? 'A' : 'R', nexp, j); - goto err; - } - } - else // Number=G, assuming diploid genotype - { - int k = 0, n = 0; - for (j=0; jkey), bcf_seqname_safe(header,line), line->pos+1, nG_ori, n); - goto err; - } - } - - nret = bcf_update_info(header, line, bcf_hdr_int2id(header,BCF_DT_ID,info->key), (void*)str.s, str.l, type); - if ( nret<0 ) - { - hts_log_error("Could not update INFO/%s at %s:%"PRIhts_pos" [%d]", - bcf_hdr_int2id(header,BCF_DT_ID,info->key), bcf_seqname_safe(header,line), line->pos+1, nret); - goto err; - } - continue; - } - - if (nret==1) // could be missing - check - { - int missing = 0; - #define BRANCH(type_t, convert, is_missing) { \ - type_t val = convert(info->vptr); \ - if ( is_missing ) missing = 1; \ - } - switch (info->type) { - case BCF_BT_INT8: BRANCH(int8_t, le_to_i8, val==bcf_int8_missing); break; - case BCF_BT_INT16: BRANCH(int16_t, le_to_i16, val==bcf_int16_missing); break; - case BCF_BT_INT32: BRANCH(int32_t, le_to_i32, val==bcf_int32_missing); break; - case BCF_BT_FLOAT: BRANCH(float, le_to_float, bcf_float_is_missing(val)); break; - default: hts_log_error("Unexpected type %d", info->type); goto err; - } - #undef BRANCH - if (missing) continue; // could remove this INFO tag? - } - - if ( vlen==BCF_VL_A || vlen==BCF_VL_R ) - { - int inc = 0, ntop; - if ( vlen==BCF_VL_A ) - { - if ( nret!=nA_ori ) - { - hts_log_error("Unexpected number of values in INFO/%s at %s:%"PRIhts_pos"; expected Number=A=%d, but found %d", - bcf_hdr_int2id(header,BCF_DT_ID,info->key), bcf_seqname_safe(header,line), line->pos+1, nA_ori, nret); - goto err; - } - ntop = nA_ori; - ndat = nA_new; - inc = 1; - } - else - { - if ( nret!=nR_ori ) - { - hts_log_error("Unexpected number of values in INFO/%s at %s:%"PRIhts_pos"; expected Number=R=%d, but found %d", - bcf_hdr_int2id(header,BCF_DT_ID,info->key), bcf_seqname_safe(header,line), line->pos+1, nR_ori, nret); - goto err; - } - ntop = nR_ori; - ndat = nR_new; - } - int k = 0; - - #define BRANCH(type_t,is_vector_end) \ - { \ - type_t *ptr = (type_t*) dat; \ - int size = sizeof(type_t); \ - for (j=0; jkey), bcf_seqname_safe(header,line), line->pos+1, nG_ori, nret); - goto err; - } - int k, l_ori = -1, l_new = 0; - ndat = nG_new; - - #define BRANCH(type_t,is_vector_end) \ - { \ - type_t *ptr = (type_t*) dat; \ - int size = sizeof(type_t); \ - for (j=0; jkey), (void*)dat, ndat, type); - if ( nret<0 ) - { - hts_log_error("Could not update INFO/%s at %s:%"PRIhts_pos" [%d]", - bcf_hdr_int2id(header,BCF_DT_ID,info->key), bcf_seqname_safe(header,line), line->pos+1, nret); - goto err; - } - } - - // Update GT fields, the allele indexes might have changed - for (i=1; i0 ) - { - nret /= line->n_sample; - int32_t *ptr = (int32_t*) dat; - for (i=0; in_sample; i++) - { - for (j=0; j=0 ) ) - { - hts_log_error("Problem updating genotypes at %s:%"PRIhts_pos" [ al=0 :: al=%d,nR_ori=%d,map[al]=%d ]", - bcf_seqname_safe(header,line), line->pos+1, al, nR_ori, map[al]); - goto err; - } - // if an allele other than the reference is mapped to 0, it has been removed, - // so translate it to 'missing', while preserving the phasing bit - ptr[j] = ((al>0 && !map[al]) ? bcf_gt_missing : (map[al]+1)<<1) | (ptr[j]&1); - } - ptr += nret; - } - nret = bcf_update_genotypes(header, line, (void*)dat, nret*line->n_sample); - if ( nret<0 ) - { - hts_log_error("Could not update FORMAT/GT at %s:%"PRIhts_pos" [%d]", - bcf_seqname_safe(header,line), line->pos+1, nret); - goto err; - } - } - } - - // Remove from Number=G, Number=R and Number=A FORMAT fields. - // Assuming haploid or diploid GTs - for (i=0; in_fmt; i++) - { - bcf_fmt_t *fmt = &line->d.fmt[i]; - int vlen = bcf_hdr_id2length(header,BCF_HL_FMT,fmt->id); - - if ( vlen!=BCF_VL_A && vlen!=BCF_VL_G && vlen!=BCF_VL_R ) continue; // no need to change - - int type = bcf_hdr_id2type(header,BCF_HL_FMT,fmt->id); - if ( type==BCF_HT_FLAG ) continue; - - int size = 1; - if ( type==BCF_HT_REAL || type==BCF_HT_INT ) size = 4; - - mdat = mdat_bytes / size; - nret = bcf_get_format_values(header, line, bcf_hdr_int2id(header,BCF_DT_ID,fmt->id), (void**)&dat, &mdat, type); - mdat_bytes = mdat * size; - if ( nret<0 ) - { - hts_log_error("Could not access FORMAT/%s at %s:%"PRIhts_pos" [%d]", - bcf_hdr_int2id(header,BCF_DT_ID,fmt->id), bcf_seqname_safe(header,line), line->pos+1, nret); - goto err; - } - if ( nret == 0 ) continue; // no data for this tag - - if ( type==BCF_HT_STR ) - { - int size = nret/line->n_sample; // number of bytes per sample - str.l = 0; - if ( vlen==BCF_VL_A || vlen==BCF_VL_R ) - { - int nexp, inc = 0; - if ( vlen==BCF_VL_A ) - { - nexp = nA_ori; - inc = 1; - } - else - nexp = nR_ori; - for (j=0; jn_sample; j++) - { - char *ss = ((char*)dat) + j*size, *se = ss + size, *ptr = ss, s = ss[0]; - int k_src = 0, k_dst = 0, l = str.l; - for (k_src=0; k_src=se || !*ptr) break; - while ( ptrid), bcf_seqname_safe(header,line), line->pos+1, vlen==BCF_VL_A ? 'A' : 'R', nexp, k_src); - goto err; - } - l = str.l - l; - for (; ln_sample; j++) - { - char *ss = ((char*)dat) + j*size, *se = ss + size, *ptr = ss, s = ss[0]; - int k_src = 0, k_dst = 0, l = str.l; - int nexp = 0; // diploid or haploid? - while ( ptrid), bcf_seqname_safe(header,line), line->pos+1, nG_ori, nR_ori, nexp); - goto err; - } - ptr = ss; - if ( nexp==nG_ori ) // diploid - { - int ia, ib; - for (ia=0; ia=se || !*ptr ) break; - while ( ptr=se || !*ptr ) break; - } - } - else // haploid - { - for (k_src=0; k_src=se || !*ptr ) break; - while ( ptrid), bcf_seqname_safe(header,line), line->pos+1, nR_ori, k_src); - goto err; - } - l = str.l - l; - for (; lid), (void*)str.s, str.l, type); - if ( nret<0 ) - { - hts_log_error("Could not update FORMAT/%s at %s:%"PRIhts_pos" [%d]", - bcf_hdr_int2id(header,BCF_DT_ID,fmt->id), bcf_seqname_safe(header,line), line->pos+1, nret); - goto err; - } - continue; - } - - int nori = nret / line->n_sample; - if ( nori==1 && !(vlen==BCF_VL_A && nori==nA_ori) ) // all values may be missing - check - { - int all_missing = 1; - #define BRANCH(type_t, convert, is_missing) { \ - for (j=0; jn_sample; j++) \ - { \ - type_t val = convert(fmt->p + j*fmt->size); \ - if ( !(is_missing)) { all_missing = 0; break; } \ - } \ - } - switch (fmt->type) { - case BCF_BT_INT8: BRANCH(int8_t, le_to_i8, val==bcf_int8_missing); break; - case BCF_BT_INT16: BRANCH(int16_t, le_to_i16, val==bcf_int16_missing); break; - case BCF_BT_INT32: BRANCH(int32_t, le_to_i32, val==bcf_int32_missing); break; - case BCF_BT_FLOAT: BRANCH(float, le_to_float, bcf_float_is_missing(val)); break; - default: hts_log_error("Unexpected type %d", fmt->type); goto err; - } - #undef BRANCH - if (all_missing) continue; // could remove this FORMAT tag? - } - - if ( vlen==BCF_VL_A || vlen==BCF_VL_R || (vlen==BCF_VL_G && nori==nR_ori) ) // Number=A, R or haploid Number=G - { - int inc = 0, nnew; - if ( vlen==BCF_VL_A ) - { - if ( nori!=nA_ori ) - { - hts_log_error("Unexpected number of values in FORMAT/%s at %s:%"PRIhts_pos"; expected Number=A=%d, but found %d", - bcf_hdr_int2id(header,BCF_DT_ID,fmt->id), bcf_seqname_safe(header,line), line->pos+1, nA_ori, nori); - goto err; - } - ndat = nA_new*line->n_sample; - nnew = nA_new; - inc = 1; - } - else - { - if ( nori!=nR_ori ) - { - hts_log_error("Unexpected number of values in FORMAT/%s at %s:%"PRIhts_pos"; expected Number=R=%d, but found %d", - bcf_hdr_int2id(header,BCF_DT_ID,fmt->id), bcf_seqname_safe(header,line), line->pos+1, nR_ori, nori); - goto err; - } - ndat = nR_new*line->n_sample; - nnew = nR_new; - } - - #define BRANCH(type_t,is_vector_end,set_missing) \ - { \ - for (j=0; jn_sample; j++) \ - { \ - type_t *ptr_src = ((type_t*)dat) + j*nori; \ - type_t *ptr_dst = ((type_t*)dat) + j*nnew; \ - int size = sizeof(type_t); \ - int k_src, k_dst = 0; \ - for (k_src=0; k_srcid), bcf_seqname_safe(header,line), line->pos+1, nG_ori, nori); - goto err; - } - ndat = nG_new*line->n_sample; - - #define BRANCH(type_t,is_vector_end) \ - { \ - for (j=0; jn_sample; j++) \ - { \ - type_t *ptr_src = ((type_t*)dat) + j*nori; \ - type_t *ptr_dst = ((type_t*)dat) + j*nG_new; \ - int size = sizeof(type_t); \ - int ia, ib, k_dst = 0, k_src; \ - int nset = 0; /* haploid or diploid? */ \ - for (k_src=0; k_srcid), (void*)dat, ndat, type); - if ( nret<0 ) - { - hts_log_error("Could not update FORMAT/%s at %s:%"PRIhts_pos" [%d]", - bcf_hdr_int2id(header,BCF_DT_ID,fmt->id), bcf_seqname_safe(header,line), line->pos+1, nret); - goto err; - } - } - -clean: - free(str.s); - free(map); - free(dat); - return 0; - -err: - free(str.s); - free(map); - free(dat); - return -1; -} - diff --git a/src/htslib-1.19.1/version.sh b/src/htslib-1.19.1/version.sh deleted file mode 100755 index 39fb49e..0000000 --- a/src/htslib-1.19.1/version.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/sh -# version.sh -- Script to build the htslib version string -# -# Author : James Bonfield -# -# Copyright (C) 2017-2018, 2021 Genome Research Ltd. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -# Master version, for use in tarballs or non-git source copies -VERSION=1.19.1 - -# If we have a git clone, then check against the current tag -srcdir=${0%/version.sh} -if [ -e $srcdir/.git ] -then - # If we ever get to 10.x this will need to be more liberal - v=`cd $srcdir && git describe --always --match '[0-9].[0-9]*' --dirty` - case $v in - [0-9]*.[0-9]*) VERSION="$v" ;; - [0-9a-f][0-9a-f]*) VERSION="$VERSION-1-g$v" ;; - esac -fi - -# Numeric version is for use in .dylib or .so libraries -# -# Follows the same logic from the Makefile commit c2e93911 -# as non-numeric versions get bumped to patch level 255 to indicate -# an unknown value. -if [ "$1" = "numeric" ] -then - v1=`expr "$VERSION" : '\([0-9]*\)'` - v2=`expr "$VERSION" : '[0-9]*.\([0-9]*\)'` - v3=`expr "$VERSION" : '[0-9]*.[0-9]*.\([0-9]*\)'` - if [ -z "`expr "$VERSION" : '\([0-9.]*\)$'`" ] - then - VERSION="$v1.$v2.255" - else - VERSION="$v1.$v2${v3:+.}$v3" - fi -fi - -echo $VERSION diff --git a/src/htslib-1.21/INSTALL b/src/htslib-1.21/INSTALL new file mode 100644 index 0000000..eb5048a --- /dev/null +++ b/src/htslib-1.21/INSTALL @@ -0,0 +1,316 @@ + Building and Installing HTSlib + ============================== + +Requirements +============ + +Building HTSlib requires a few programs and libraries to be present. +See the "System Specific Details" below for guidance on how to install +these. + +At least the following are required: + + GNU make + C compiler (e.g. gcc or clang) + +In addition, building the configure script requires: + + autoheader + autoconf + autoreconf + +Running the configure script uses awk, along with a number of +standard UNIX tools (cat, cp, grep, mv, rm, sed, among others). Almost +all installations will have these already. + +Running the test harness (make test) uses: + + bash + perl + +HTSlib uses the following external libraries. Building requires both the +library itself, and include files needed to compile code that uses functions +from the library. Note that some Linux distributions put include files in +a development ('-dev' or '-devel') package separate from the main library. + + zlib (required) + libbz2 (required, unless configured with --disable-bz2) + liblzma (required, unless configured with --disable-lzma) + libcurl (optional, but strongly recommended) + libcrypto (optional for Amazon S3 support; not needed on MacOS) + libdeflate (optional, but strongly recommended for faster gzip) + +Disabling libbzip2 and liblzma will make some CRAM files unreadable, so +is not recommended. + +Using libcurl provides HTSlib with network protocol support, for +example it enables the use of ftp://, http://, and https:// URLs. +It is also required if direct access to Amazon S3 or Google Cloud +Storage is enabled. + +Amazon S3 support requires an HMAC function to calculate a message +authentication code. On MacOS, the CCHmac function from the standard +library is used. Systems that do not have CCHmac will get this from +libcrypto. libcrypto is part of OpenSSL or one of its derivatives (LibreSSL +or BoringSSL). + +On Microsoft Windows we recommend use of Mingw64/Msys2. Whilst the +code may work on Windows with other environments, these have not been +verified. Use of the configure script is a requirement too. + +Update htscodecs submodule +========================== + +Note that this section only applies to git checkouts. If you're building +from a release tar file, you can skip this section. + +Some parts of HTSlib are provided by the external "htscodecs" project. This +is included as a submodule. When building from the git repository, +either clone the project using "git clone --recurse-submodules", or run: + + git submodule update --init --recursive + +to ensure the correct version of the submodule is present. + +It is also possible to link against an external libhtscodecs library +by using the '--with-external-htscodecs' configure option. When +this is used, the submodule files will be ignored. + +Building Configure +================== + +This step is only needed if configure.ac has been changed, or if configure +does not exist (for example, when building from a git clone). The +configure script and config.h.in can be built by running: + + autoreconf -i + +Basic Installation +================== + +To build and install HTSlib, 'cd' to the htslib-1.x directory containing +the package's source and type the following commands: + + ./configure + make + make install + +The './configure' command checks your build environment and allows various +optional functionality to be enabled (see Configuration below). If you +don't want to select any optional functionality, you may wish to omit +configure and just type 'make; make install' as for previous versions +of HTSlib. However if the build fails you should run './configure' as +it can diagnose the common reasons for build failures. + +The 'make' command builds the HTSlib library and various useful +utilities: bgzip, htsfile, and tabix. If compilation fails you should +run './configure' as it can diagnose problems with your build environment +that cause build failures. + +The 'make install' command installs the libraries, library header files, +utilities, several manual pages, and a pkgconfig file to /usr/local. +The installation location can be changed by configuring with --prefix=DIR +or via 'make prefix=DIR install' (see Installation Locations below). +Shared library permissions can be set via e.g. 'make install LIB_PERM=755'. + + +Configuration +============= + +By default, './configure' examines your build environment, checking for +requirements such as the zlib development files, and arranges for a plain +HTSlib build. The following configure options can be used to enable +various features and specify further optional external requirements: + +--enable-plugins + Use plugins to implement exotic file access protocols and other + specialised facilities. This enables such facilities to be developed + and packaged outwith HTSlib, and somewhat isolates HTSlib-using programs + from their library dependencies. By default (or with --disable-plugins), + any enabled pluggable facilities (such as libcurl file access) are built + directly within HTSlib. + + Programs that are statically linked to a libhts.a with plugins enabled + need to be linked using -rdynamic or a similar linker option. + + The repository contains + several additional plugins, including the iRODS () + file access plugin previously distributed with HTSlib. + +--with-plugin-dir=DIR + Specifies the directory into which plugins built while building HTSlib + should be installed; by default, LIBEXECDIR/htslib. + +--with-plugin-path=DIR:DIR:DIR... + Specifies the list of directories that HTSlib will search for plugins. + By default, only the directory specified via --with-plugin-dir will be + searched; you can use --with-plugin-path='DIR:$(plugindir):DIR' and so + on to cause additional directories to be searched. + +--with-external-htscodecs + Build and link against an external copy of the htscodecs library + instead of using the source files in the htscodecs directory. + +--enable-libcurl + Use libcurl () to implement network access to + remote files via FTP, HTTP, HTTPS, etc. By default or with + --enable-libcurl=check, configure will probe for libcurl and include + this functionality if libcurl is available. Use --disable-libcurl + to prevent this. + +--enable-gcs + Implement network access to Google Cloud Storage. By default or with + --enable-gcs=check, this is enabled when libcurl is enabled. + +--enable-s3 + Implement network access to Amazon AWS S3. By default or with + --enable-s3=check, this is enabled when libcurl is enabled. + +--disable-bz2 + Bzip2 is an optional compression codec format for CRAM, included + in HTSlib by default. It can be disabled with --disable-bz2, but + be aware that not all CRAM files may be possible to decode. + +--disable-lzma + LZMA is an optional compression codec for CRAM, included in HTSlib + by default. It can be disabled with --disable-lzma, but be aware + that not all CRAM files may be possible to decode. + +--with-libdeflate + Libdeflate is a heavily optimized library for DEFLATE-based compression + and decompression. It also includes a fast crc32 implementation. + By default, ./configure will probe for libdeflate and use it if + available. To prevent this, use --without-libdeflate. + +Each --enable-FEATURE/--disable-FEATURE/--with-PACKAGE/--without-PACKAGE +option listed also has an opposite, e.g., --without-external-htscodecs +or --disable-plugins. However, apart from those options for which the +default is to probe for related facilities, using these opposite options +is mostly unnecessary as they just select the default configure behaviour. + +The configure script also accepts the usual options and environment variables +for tuning installation locations and compilers: type './configure --help' +for details. For example, + + ./configure CC=icc --prefix=/opt/icc-compiled + +would specify that HTSlib is to be built with icc and installed into bin, +lib, etc subdirectories under /opt/icc-compiled. + +If dependencies have been installed in non-standard locations (i.e. not on +the normal include and library search paths) then the CPPFLAGS and LDFLAGS +environment variables can be used to set the options needed to find them. +For example, NetBSD users may use: + + ./configure CPPFLAGS=-I/usr/pkg/include \ + LDFLAGS='-L/usr/pkg/lib -Wl,-R/usr/pkg/lib' + +to allow compiling and linking against dependencies installed via the ports +collection. + +Installation Locations +====================== + +By default, 'make install' installs HTSlib libraries under /usr/local/lib, +HTSlib header files under /usr/local/include, utility programs under +/usr/local/bin, etc. (To be precise, the header files are installed within +a fixed 'htslib' subdirectory under the specified .../include location.) + +You can specify a different location to install HTSlib by configuring +with --prefix=DIR or specify locations for particular parts of HTSlib by +configuring with --libdir=DIR and so on. Type './configure --help' for +the full list of such install directory options. + +Alternatively you can specify different locations at install time by +typing 'make prefix=DIR install' or 'make libdir=DIR install' and so on. +Consult the list of prefix/exec_prefix/etc variables near the top of the +Makefile for the full list of such variables that can be overridden. + +You can also specify a staging area by typing 'make DESTDIR=DIR install', +possibly in conjunction with other --prefix or prefix=DIR settings. +For example, + + make DESTDIR=/tmp/staging prefix=/opt + +would install into bin, lib, etc subdirectories under /tmp/staging/opt. + + +System Specific Details +======================= + +Installing the prerequisites is system dependent and there is more +than one correct way of satisfying these, including downloading them +from source, compiling and installing them yourself. + +For people with super-user access, we provide an example set of commands +below for installing the dependencies on a variety of operating system +distributions. Note these are not specific recommendations on distribution, +compiler or SSL implementation. It is assumed you already have the core set +of packages for the given distribution - the lists may be incomplete if +this is not the case. + +Debian / Ubuntu +--------------- + +sudo apt-get update # Ensure the package list is up to date +sudo apt-get install autoconf automake make gcc perl zlib1g-dev libbz2-dev liblzma-dev libcurl4-gnutls-dev libssl-dev libdeflate-dev + +Note: libcurl4-openssl-dev can be used as an alternative to libcurl4-gnutls-dev. + +RedHat / CentOS +--------------- + +sudo yum install autoconf automake make gcc perl-Data-Dumper zlib-devel bzip2 bzip2-devel xz-devel curl-devel openssl-devel libdeflate-devel + +Note: On some versions perl FindBin will need to be installed to make the tests work. + +sudo yum install perl-FindBin + +Alpine Linux +------------ + +doas apk update # Ensure the package list is up to date +doas apk add autoconf automake make gcc musl-dev perl bash zlib-dev bzip2-dev xz-dev curl-dev openssl-dev + +Ideally also install a copy of libdeflate-dev for faster (de)compression. +This can be found in the Alpine community repository. + +Note: some older Alpine versions use libressl-dev rather than openssl-dev. + +OpenSUSE +-------- + +sudo zypper install autoconf automake make gcc perl zlib-devel libbz2-devel xz-devel libcurl-devel libopenssl-devel + +Also install libdeflate-devel, available on OpenSUSE Leap 15.4 onwards +or directly via git releases above. + +Windows MSYS2/MINGW64 +--------------------- + +The configure script must be used as without it the compilation will +likely fail. + +Follow MSYS2 installation instructions at +https://www.msys2.org/wiki/MSYS2-installation/ + +Then relaunch to MSYS2 shell using the "MSYS2 MinGW x64" executable. +Once in that environment (check $MSYSTEM equals "MINGW64") install the +compilers using pacman -S and the following package list: + +base-devel mingw-w64-x86_64-toolchain +mingw-w64-x86_64-libdeflate mingw-w64-x86_64-zlib mingw-w64-x86_64-bzip2 +mingw-w64-x86_64-xz mingw-w64-x86_64-curl mingw-w64-x86_64-autotools +mingw-w64-x86_64-tools-git + +(The last is only needed for building libraries compatible with MSVC.) + +HP-UX +----- + +HP-UX requires that shared libraries have execute permission. The +default for HTSlib is to install with permission 644 (read-write for +owner and read-only for group / other). This can be overridden by +setting the LIB_PERM variable at install time with: + + make install LIB_PERM=755 diff --git a/src/htslib-1.19.1/LICENSE b/src/htslib-1.21/LICENSE similarity index 100% rename from src/htslib-1.19.1/LICENSE rename to src/htslib-1.21/LICENSE diff --git a/src/htslib-1.21/Makefile b/src/htslib-1.21/Makefile new file mode 100644 index 0000000..ef9b5a9 --- /dev/null +++ b/src/htslib-1.21/Makefile @@ -0,0 +1,1005 @@ +# Makefile for htslib, a C library for high-throughput sequencing data formats. +# +# Copyright (C) 2013-2024 Genome Research Ltd. +# +# Author: John Marshall +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +CC = gcc +AR = ar +RANLIB = ranlib + +# Default libraries to link if configure is not used +htslib_default_libs = -lz -lm -lbz2 -llzma -lcurl + +CPPFLAGS = +# TODO: make the 64-bit support for VCF optional via configure, for now add -DVCF_ALLOW_INT64 +# to CFLAGS manually, here or in config.mk if the latter exists. +# TODO: probably update cram code to make it compile cleanly with -Wc++-compat +# For testing strict C99 support add -std=c99 -D_XOPEN_SOURCE=600 +#CFLAGS = -g -Wall -O2 -pedantic -std=c99 -D_XOPEN_SOURCE=600 +CFLAGS = -g -Wall -O2 -fvisibility=hidden +EXTRA_CFLAGS_PIC = -fpic +TARGET_CFLAGS = +LDFLAGS = -fvisibility=hidden +VERSION_SCRIPT_LDFLAGS = -Wl,-version-script,$(srcprefix)htslib.map +LIBS = $(htslib_default_libs) + +prefix = /usr/local +exec_prefix = $(prefix) +bindir = $(exec_prefix)/bin +includedir = $(prefix)/include +libdir = $(exec_prefix)/lib +libexecdir = $(exec_prefix)/libexec +datarootdir = $(prefix)/share +mandir = $(datarootdir)/man +man1dir = $(mandir)/man1 +man5dir = $(mandir)/man5 +man7dir = $(mandir)/man7 +pkgconfigdir= $(libdir)/pkgconfig + +MKDIR_P = mkdir -p +INSTALL = install -p +INSTALL_DATA = $(INSTALL) -m 644 +INSTALL_DIR = $(MKDIR_P) -m 755 +LIB_PERM = 644 +INSTALL_LIB = $(INSTALL) -m $(LIB_PERM) +INSTALL_MAN = $(INSTALL_DATA) +INSTALL_PROGRAM = $(INSTALL) + +# Set by config.mk if plugins are enabled +plugindir = + +BUILT_PROGRAMS = \ + annot-tsv \ + bgzip \ + htsfile \ + tabix + +BUILT_TEST_PROGRAMS = \ + test/hts_endian \ + test/fieldarith \ + test/hfile \ + test/pileup \ + test/pileup_mod \ + test/plugins-dlhts \ + test/sam \ + test/test_bgzf \ + test/test_expr \ + test/test_faidx \ + test/test_kfunc \ + test/test_khash \ + test/test_kstring \ + test/test_mod \ + test/test_nibbles \ + test/test_realn \ + test/test-regidx \ + test/test_str2int \ + test/test_time_funcs \ + test/test_view \ + test/test_index \ + test/test-vcf-api \ + test/test-vcf-sweep \ + test/test-bcf-sr \ + test/fuzz/hts_open_fuzzer.o \ + test/test-bcf-translate \ + test/test-parse-reg \ + test/test_introspection \ + test/test-bcf_set_variant_type + +BUILT_THRASH_PROGRAMS = \ + test/thrash_threads1 \ + test/thrash_threads2 \ + test/thrash_threads3 \ + test/thrash_threads4 \ + test/thrash_threads5 \ + test/thrash_threads6 \ + test/thrash_threads7 + +all: lib-static lib-shared $(BUILT_PROGRAMS) plugins \ + $(BUILT_TEST_PROGRAMS) htslib_static.mk htslib-uninstalled.pc + +# Report compiler and version +cc-version: + -@$(CC) --version 2>/dev/null || true + -@$(CC) --qversion 2>/dev/null || true + -@$(CC) -V 2>/dev/null || true + +ALL_CPPFLAGS = -I. $(CPPFLAGS) + +# Usually htscodecs.mk is generated by running configure or config.status, +# but if those aren't used create a default here. +htscodecs.mk: + echo '# Default htscodecs.mk generated by Makefile' > $@ + echo 'include $$(HTSPREFIX)htscodecs_bundled.mk' >> $@ + $(srcdir)/hts_probe_cc.sh '$(CC)' '$(CFLAGS) $(CPPFLAGS)' '$(LDFLAGS)' >> $@ + +srcdir = . +srcprefix = +HTSPREFIX = + +# Flags for SIMD code +HTS_CFLAGS_AVX2 = +HTS_CFLAGS_AVX512 = +HTS_CFLAGS_SSE4 = + +# Control building of SIMD code. Not used if configure has been run. +HTS_BUILD_AVX2 = +HTS_BUILD_AVX512 = +HTS_BUILD_SSE4 = + +include htslib_vars.mk +include htscodecs.mk + +# If not using GNU make, you need to copy the version number from version.sh +# into here. +PACKAGE_VERSION := $(shell $(srcdir)/version.sh) + +LIBHTS_SOVERSION = 3 + +# Version numbers for the Mac dynamic library. Note that the leading 3 +# is not strictly necessary and should be removed the next time +# LIBHTS_SOVERSION is bumped (see #1144 and +# https://developer.apple.com/library/archive/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html#//apple_ref/doc/uid/TP40002013-SW23) +MACH_O_COMPATIBILITY_VERSION = 3.1.21 +MACH_O_CURRENT_VERSION = 3.1.21 + +# Force version.h to be remade if $(PACKAGE_VERSION) has changed. +version.h: $(if $(wildcard version.h),$(if $(findstring "$(PACKAGE_VERSION)",$(shell cat version.h)),,force)) + +version.h: + echo '#define HTS_VERSION_TEXT "$(PACKAGE_VERSION)"' > $@ + +print-version: + @echo $(PACKAGE_VERSION) + +show-version: + @echo PACKAGE_VERSION = $(PACKAGE_VERSION) + +config_vars.h: override escape=$(subst ',\x27,$(subst ",\",$(subst \,\\,$(1)))) +config_vars.h: override hts_cc_escaped=$(call escape,$(CC)) +config_vars.h: override hts_cppflags_escaped=$(call escape,$(CPPFLAGS)) +config_vars.h: override hts_cflags_escaped=$(call escape,$(CFLAGS)) +config_vars.h: override hts_ldflags_escaped=$(call escape,$(LDFLAGS)) +config_vars.h: override hts_libs_escaped=$(call escape,$(LIBS)) + +config_vars.h: + printf '#define HTS_CC "%s"\n#define HTS_CPPFLAGS "%s"\n#define HTS_CFLAGS "%s"\n#define HTS_LDFLAGS "%s"\n#define HTS_LIBS "%s"\n' \ + '$(hts_cc_escaped)' \ + '$(hts_cppflags_escaped)' \ + '$(hts_cflags_escaped)' \ + '$(hts_ldflags_escaped)' \ + '$(hts_libs_escaped)' > $@ + +.SUFFIXES: .bundle .c .cygdll .dll .o .pico .so + +.c.o: + $(CC) $(CFLAGS) $(TARGET_CFLAGS) $(ALL_CPPFLAGS) -c -o $@ $< + +.c.pico: + $(CC) $(CFLAGS) $(TARGET_CFLAGS) $(ALL_CPPFLAGS) $(EXTRA_CFLAGS_PIC) -c -o $@ $< + + +LIBHTS_OBJS = \ + kfunc.o \ + kstring.o \ + bcf_sr_sort.o \ + bgzf.o \ + errmod.o \ + faidx.o \ + header.o \ + hfile.o \ + hts.o \ + hts_expr.o \ + hts_os.o\ + md5.o \ + multipart.o \ + probaln.o \ + realn.o \ + regidx.o \ + region.o \ + sam.o \ + sam_mods.o \ + simd.o \ + synced_bcf_reader.o \ + vcf_sweep.o \ + tbx.o \ + textutils.o \ + thread_pool.o \ + vcf.o \ + vcfutils.o \ + cram/cram_codecs.o \ + cram/cram_decode.o \ + cram/cram_encode.o \ + cram/cram_external.o \ + cram/cram_index.o \ + cram/cram_io.o \ + cram/cram_stats.o \ + cram/mFILE.o \ + cram/open_trace_file.o \ + cram/pooled_alloc.o \ + cram/string_alloc.o \ + $(HTSCODECS_OBJS) \ + $(NONCONFIGURE_OBJS) + +# Without configure we wish to have a rich set of default figures, +# but we still need conditional inclusion as we wish to still +# support ./configure --disable-blah. +NONCONFIGURE_OBJS = hfile_libcurl.o + +PLUGIN_EXT = +PLUGIN_OBJS = + +cram_h = cram/cram.h $(cram_samtools_h) $(header_h) $(cram_structs_h) $(cram_io_h) cram/cram_encode.h cram/cram_decode.h cram/cram_stats.h cram/cram_codecs.h cram/cram_index.h $(htslib_cram_h) +cram_io_h = cram/cram_io.h $(cram_misc_h) +cram_misc_h = cram/misc.h +cram_os_h = cram/os.h $(htslib_hts_endian_h) +cram_samtools_h = cram/cram_samtools.h $(htslib_sam_h) +cram_structs_h = cram/cram_structs.h $(htslib_thread_pool_h) $(htslib_cram_h) cram/string_alloc.h cram/mFILE.h $(htslib_khash_h) +cram_open_trace_file_h = cram/open_trace_file.h cram/mFILE.h +bcf_sr_sort_h = bcf_sr_sort.h $(htslib_synced_bcf_reader_h) $(htslib_kbitset_h) +fuzz_settings_h = fuzz_settings.h +header_h = header.h cram/string_alloc.h cram/pooled_alloc.h $(htslib_khash_h) $(htslib_kstring_h) $(htslib_sam_h) +hfile_internal_h = hfile_internal.h $(htslib_hts_defs_h) $(htslib_hfile_h) $(textutils_internal_h) +hts_internal_h = hts_internal.h $(htslib_hts_h) $(textutils_internal_h) +hts_time_funcs_h = hts_time_funcs.h +sam_internal_h = sam_internal.h $(htslib_sam_h) +textutils_internal_h = textutils_internal.h $(htslib_kstring_h) +thread_pool_internal_h = thread_pool_internal.h $(htslib_thread_pool_h) + +# To be effective, config.mk needs to appear after most Makefile variables are +# set but before most rules appear, so that it can both use previously-set +# variables in its own rules' prerequisites and also update variables for use +# in later rules' prerequisites. + +# If your make doesn't accept -include, change this to 'include' if you are +# using the configure script or just comment the line out if you are not. +-include config.mk + +# Usually config.h is generated by running configure or config.status, +# but if those aren't used create a default config.h here. +config.h: + echo '/* Default config.h generated by Makefile */' > $@ + echo '#ifndef _XOPEN_SOURCE' >> $@ + echo '#define _XOPEN_SOURCE 600' >> $@ + echo '#endif' >> $@ + echo '#define HAVE_LIBBZ2 1' >> $@ + echo '#define HAVE_LIBLZMA 1' >> $@ + echo '#ifndef __APPLE__' >> $@ + echo '#define HAVE_LZMA_H 1' >> $@ + echo '#endif' >> $@ + echo '#define HAVE_DRAND48 1' >> $@ + echo '#define HAVE_LIBCURL 1' >> $@ + if [ "x$(HTS_HAVE_CPUID)" != "x" ]; then \ + echo '#define HAVE_DECL___CPUID_COUNT 1' >> $@ ; \ + echo '#define HAVE_DECL___GET_CPUID_MAX 1' >> $@ ; \ + fi + if [ "x$(HTS_BUILD_SSE4)" != "x" ]; then \ + echo '#define HAVE_POPCNT 1' >> $@ ; \ + echo '#define HAVE_SSE4_1 1' >> $@ ; \ + echo '#define HAVE_SSSE3 1' >> $@ ; \ + echo '#if defined(HTS_ALLOW_UNALIGNED) && HTS_ALLOW_UNALIGNED == 0' >> $@ ; \ + echo '#define UBSAN 1' >> $@ ; \ + echo '#endif' >> $@ ; \ + fi + if [ "x$(HTS_BUILD_AVX2)" != "x" ] ; then \ + echo '#define HAVE_AVX2 1' >> $@ ; \ + fi + if [ "x$(HTS_BUILD_AVX512)" != "x" ] ; then \ + echo '#define HAVE_AVX512 1' >> $@ ; \ + fi + echo '#if defined __x86_64__ || defined __arm__ || defined __aarch64__' >> $@ + echo '#define HAVE_ATTRIBUTE_CONSTRUCTOR 1' >> $@ + echo '#endif' >> $@ + echo '#if (defined(__x86_64__) || defined(_M_X64))' >> $@ + echo '#define HAVE_ATTRIBUTE_TARGET 1' >> $@ + echo '#define HAVE_BUILTIN_CPU_SUPPORT_SSSE3 1' >> $@ + echo '#endif' >> $@ + +# And similarly for htslib.pc.tmp ("pkg-config template"). No dependency +# on htslib.pc.in listed, as if that file is newer the usual way to regenerate +# this target is via configure or config.status rather than this rule. +htslib.pc.tmp: + sed -e '/^static_libs=/s/@static_LIBS@/$(htslib_default_libs)/;s#@[^-][^@]*@##g' $(srcprefix)htslib.pc.in > $@ + +# Create a makefile fragment listing the libraries and LDFLAGS needed for +# static linking. This can be included by projects that want to build +# and link against the htslib source tree instead of an installed library. +htslib_static.mk: htslib.pc.tmp + sed -n '/^static_libs=/s/[^=]*=/HTSLIB_static_LIBS = /p;/^static_ldflags=/s/[^=]*=/HTSLIB_static_LDFLAGS = /p' $< > $@ + + +lib-static: libhts.a + +# $(shell), :=, and ifeq/.../endif are GNU Make-specific. If you don't have +# GNU Make, comment out the parts of these conditionals that don't apply. +ifneq "$(origin PLATFORM)" "file" +PLATFORM := $(shell uname -s) +endif +ifeq "$(PLATFORM)" "Darwin" +SHLIB_FLAVOUR = dylib +lib-shared: libhts.dylib +else ifeq "$(findstring CYGWIN,$(PLATFORM))" "CYGWIN" +SHLIB_FLAVOUR = cygdll +lib-shared: cyghts-$(LIBHTS_SOVERSION).dll +else ifeq "$(findstring MSYS,$(PLATFORM))" "MSYS" +SHLIB_FLAVOUR = dll +lib-shared: hts-$(LIBHTS_SOVERSION).dll hts-$(LIBHTS_SOVERSION).def hts-$(LIBHTS_SOVERSION).lib +else ifeq "$(findstring MINGW,$(PLATFORM))" "MINGW" +SHLIB_FLAVOUR = dll +lib-shared: hts-$(LIBHTS_SOVERSION).dll hts-$(LIBHTS_SOVERSION).def hts-$(LIBHTS_SOVERSION).lib +else +SHLIB_FLAVOUR = so +lib-shared: libhts.so +endif + +BUILT_PLUGINS = $(PLUGIN_OBJS:.o=$(PLUGIN_EXT)) + +ifneq "$(BUILT_PLUGINS)" "" +plugins: lib-shared +endif +plugins: $(BUILT_PLUGINS) + + +libhts.a: $(LIBHTS_OBJS) + @-rm -f $@ + $(AR) -rc $@ $(LIBHTS_OBJS) + -$(RANLIB) $@ + +print-config: + @echo HTS_CFLAGS_AVX2 = $(HTS_CFLAGS_AVX2) + @echo HTS_CFLAGS_AVX512 = $(HTS_CFLAGS_AVX512) + @echo HTS_CFLAGS_SSE4 = $(HTS_CFLAGS_SSE4) + @echo LDFLAGS = $(LDFLAGS) + @echo LIBHTS_OBJS = $(LIBHTS_OBJS) + @echo LIBS = $(LIBS) + @echo PLATFORM = $(PLATFORM) + +# The target here is libhts.so, as that is the built file that other rules +# depend upon and that is used when -lhts appears in other program's recipes. +# As a byproduct invisible to make, libhts.so.NN is also created, as it is the +# file used at runtime (when $LD_LIBRARY_PATH includes the build directory). + +libhts.so: $(LIBHTS_OBJS:.o=.pico) + $(CC) -shared -Wl,-soname,libhts.so.$(LIBHTS_SOVERSION) $(VERSION_SCRIPT_LDFLAGS) $(LDFLAGS) -o $@ $(LIBHTS_OBJS:.o=.pico) $(LIBS) -lpthread + ln -sf $@ libhts.so.$(LIBHTS_SOVERSION) + +# Similarly this also creates libhts.NN.dylib as a byproduct, so that programs +# when run can find this uninstalled shared library (when $DYLD_LIBRARY_PATH +# includes this project's build directory). + +libhts.dylib: $(LIBHTS_OBJS) + $(CC) -dynamiclib -install_name $(libdir)/libhts.$(LIBHTS_SOVERSION).dylib -current_version $(MACH_O_CURRENT_VERSION) -compatibility_version $(MACH_O_COMPATIBILITY_VERSION) $(LDFLAGS) -o $@ $(LIBHTS_OBJS) $(LIBS) + ln -sf $@ libhts.$(LIBHTS_SOVERSION).dylib + +cyghts-$(LIBHTS_SOVERSION).dll libhts.dll.a: $(LIBHTS_OBJS) + $(CC) -shared -Wl,--out-implib=libhts.dll.a -Wl,--enable-auto-import $(LDFLAGS) -o $@ -Wl,--whole-archive $(LIBHTS_OBJS) -Wl,--no-whole-archive $(LIBS) -lpthread + +hts-$(LIBHTS_SOVERSION).dll hts.dll.a: $(LIBHTS_OBJS) + $(CC) -shared -Wl,--out-implib=hts.dll.a -Wl,--enable-auto-import -Wl,--exclude-all-symbols $(LDFLAGS) -o $@ -Wl,--whole-archive $(LIBHTS_OBJS) -Wl,--no-whole-archive $(LIBS) -lpthread + +hts-$(LIBHTS_SOVERSION).def: hts-$(LIBHTS_SOVERSION).dll + gendef hts-$(LIBHTS_SOVERSION).dll + +hts-$(LIBHTS_SOVERSION).lib: hts-$(LIBHTS_SOVERSION).def + dlltool -m i386:x86-64 -d hts-$(LIBHTS_SOVERSION).def -l hts-$(LIBHTS_SOVERSION).lib + +# Bundling libraries, binaries, dll dependencies, and licenses into a +# single directory. NB: This is not needed for end-users, but a test bed +# for maintainers building binary distributions. +# +# NOTE: only tested on the supported MSYS2/MINGW64 environment. +dist-windows: DESTDIR= +dist-windows: prefix=dist-windows +dist-windows: install + cp hts-$(LIBHTS_SOVERSION).def hts-$(LIBHTS_SOVERSION).lib dist-windows/lib + cp `ldd hts-$(LIBHTS_SOVERSION).dll| awk '/mingw64/ {print $$3}'` dist-windows/bin + mkdir -p dist-windows/share/licenses/htslib + -cp -r /mingw64/share/licenses/mingw-w64-libraries \ + /mingw64/share/licenses/brotli \ + /mingw64/share/licenses/bzip2 \ + /mingw64/share/licenses/gcc-libs \ + /mingw64/share/licenses/libdeflate \ + /mingw64/share/licenses/libpsl \ + /mingw64/share/licenses/libtre \ + /mingw64/share/licenses/libwinpthread \ + /mingw64/share/licenses/openssl \ + /mingw64/share/licenses/xz \ + /mingw64/share/licenses/zlib \ + /mingw64/share/licenses/zstd \ + dist-windows/share/licenses/ + -cp -r /usr/share/licenses/curl \ + dist-windows/share/licenses/ + cp LICENSE dist-windows/share/licenses/htslib/ + + +# Target to allow htslib.mk to build all the object files before it +# links the shared and static libraries. +hts-object-files: $(LIBHTS_OBJS) + touch $@ + +# On Unix dlopen("libhts.so.NN", RTLD_LAZY) may default to RTLD_LOCAL. +# Hence plugins need to link to (shared) libhts.so.NN themselves, as they +# may not be able to access libhts symbols via the main program's libhts +# if that was dynamically loaded without an explicit RTLD_GLOBAL. +%.so: %.pico libhts.so + $(CC) -shared -Wl,-E $(LDFLAGS) -o $@ $< libhts.so $(LIBS) -lpthread + +# For programs *statically* linked to libhts.a, on macOS loading a plugin +# linked to a shared libhts.NN.dylib would lead to conflicting duplicate +# symbols. Fortunately macOS dlopen() defaults to RTLD_GLOBAL so there +# is less need for plugins to link back to libhts themselves. +%.bundle: %.o + $(CC) -bundle -Wl,-undefined,dynamic_lookup $(LDFLAGS) -o $@ $< $(LIBS) + +%.cygdll: %.o libhts.dll.a + $(CC) -shared $(LDFLAGS) -o $@ $< libhts.dll.a $(LIBS) + +%.dll: %.o hts.dll.a + $(CC) -shared $(LDFLAGS) -o $@ $< hts.dll.a $(LIBS) + + +bgzf.o bgzf.pico: bgzf.c config.h $(htslib_hts_h) $(htslib_bgzf_h) $(htslib_hfile_h) $(htslib_thread_pool_h) $(htslib_hts_endian_h) cram/pooled_alloc.h $(hts_internal_h) $(htslib_khash_h) +errmod.o errmod.pico: errmod.c config.h $(htslib_hts_h) $(htslib_ksort_h) $(htslib_hts_os_h) +kstring.o kstring.pico: kstring.c config.h $(htslib_kstring_h) +header.o header.pico: header.c config.h $(textutils_internal_h) $(header_h) +hfile.o hfile.pico: hfile.c config.h $(htslib_hfile_h) $(hfile_internal_h) $(htslib_kstring_h) $(hts_internal_h) $(htslib_khash_h) +hfile_gcs.o hfile_gcs.pico: hfile_gcs.c config.h $(htslib_hts_h) $(htslib_kstring_h) $(hfile_internal_h) +hfile_libcurl.o hfile_libcurl.pico: hfile_libcurl.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_h) +hfile_s3_write.o hfile_s3_write.pico: hfile_s3_write.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_h) +hfile_s3.o hfile_s3.pico: hfile_s3.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(hts_time_funcs_h) +hts.o hts.pico: hts.c config.h os/lzma_stub.h $(htslib_hts_h) $(htslib_bgzf_h) $(cram_h) $(htslib_hfile_h) $(htslib_hts_endian_h) version.h config_vars.h $(hts_internal_h) $(hfile_internal_h) $(sam_internal_h) $(htslib_hts_expr_h) $(htslib_hts_os_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_ksort_h) $(htslib_tbx_h) $(htscodecs_htscodecs_h) +hts_expr.o hts_expr.pico: hts_expr.c config.h $(htslib_hts_expr_h) $(htslib_hts_log_h) $(textutils_internal_h) +hts_os.o hts_os.pico: hts_os.c config.h $(htslib_hts_defs_h) os/rand.c +vcf.o vcf.pico: vcf.c config.h $(fuzz_settings_h) $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_hfile_h) $(hts_internal_h) $(htslib_khash_str2int_h) $(htslib_kstring_h) $(htslib_sam_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_hts_endian_h) +sam.o sam.pico: sam.c config.h $(fuzz_settings_h) $(htslib_hts_defs_h) $(htslib_sam_h) $(htslib_bgzf_h) $(cram_h) $(hts_internal_h) $(sam_internal_h) $(htslib_hfile_h) $(htslib_hts_endian_h) $(htslib_hts_expr_h) $(header_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_kstring_h) +sam_mods.o sam_mods.pico: sam_mods.c config.h $(htslib_sam_h) $(textutils_internal_h) +simd.o simd.pico: simd.c config.h $(htslib_sam_h) $(sam_internal_h) +tbx.o tbx.pico: tbx.c config.h $(htslib_tbx_h) $(htslib_bgzf_h) $(htslib_hts_endian_h) $(hts_internal_h) $(htslib_khash_h) +faidx.o faidx.pico: faidx.c config.h $(htslib_bgzf_h) $(htslib_faidx_h) $(htslib_hfile_h) $(htslib_khash_h) $(htslib_kstring_h) $(hts_internal_h) +bcf_sr_sort.o bcf_sr_sort.pico: bcf_sr_sort.c config.h $(bcf_sr_sort_h) $(htslib_khash_str2int_h) $(htslib_kbitset_h) +synced_bcf_reader.o synced_bcf_reader.pico: synced_bcf_reader.c config.h $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(htslib_bgzf_h) $(htslib_thread_pool_h) $(bcf_sr_sort_h) +vcf_sweep.o vcf_sweep.pico: vcf_sweep.c config.h $(htslib_vcf_sweep_h) $(htslib_bgzf_h) +vcfutils.o vcfutils.pico: vcfutils.c config.h $(htslib_vcfutils_h) $(htslib_kbitset_h) +kfunc.o kfunc.pico: kfunc.c config.h $(htslib_kfunc_h) +regidx.o regidx.pico: regidx.c config.h $(htslib_hts_h) $(htslib_kstring_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(htslib_regidx_h) $(hts_internal_h) +region.o region.pico: region.c config.h $(htslib_hts_h) $(htslib_khash_h) +md5.o md5.pico: md5.c config.h $(htslib_hts_h) $(htslib_hts_endian_h) +multipart.o multipart.pico: multipart.c config.h $(htslib_kstring_h) $(hts_internal_h) $(hfile_internal_h) +plugin.o plugin.pico: plugin.c config.h $(hts_internal_h) $(htslib_kstring_h) +probaln.o probaln.pico: probaln.c config.h $(htslib_hts_h) +realn.o realn.pico: realn.c config.h $(htslib_hts_h) $(htslib_sam_h) +textutils.o textutils.pico: textutils.c config.h $(htslib_hfile_h) $(htslib_kstring_h) $(htslib_sam_h) $(hts_internal_h) + +cram/cram_codecs.o cram/cram_codecs.pico: cram/cram_codecs.c config.h $(fuzz_settings_h) $(htslib_hts_endian_h) $(htscodecs_varint_h) $(htscodecs_pack_h) $(htscodecs_rle_h) $(cram_h) +cram/cram_decode.o cram/cram_decode.pico: cram/cram_decode.c config.h $(cram_h) $(cram_os_h) $(htslib_hts_h) +cram/cram_encode.o cram/cram_encode.pico: cram/cram_encode.c config.h $(cram_h) $(cram_os_h) $(sam_internal_h) $(htslib_hts_h) $(htslib_hts_endian_h) $(textutils_internal_h) +cram/cram_external.o cram/cram_external.pico: cram/cram_external.c config.h $(htscodecs_rANS_static4x16_h) $(htslib_hfile_h) $(cram_h) +cram/cram_index.o cram/cram_index.pico: cram/cram_index.c config.h $(htslib_bgzf_h) $(htslib_hfile_h) $(hts_internal_h) $(cram_h) $(cram_os_h) +cram/cram_io.o cram/cram_io.pico: cram/cram_io.c config.h os/lzma_stub.h $(fuzz_settings_h) $(cram_h) $(cram_os_h) $(htslib_hts_h) $(cram_open_trace_file_h) $(htscodecs_rANS_static_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_arith_dynamic_h) $(htscodecs_tokenise_name3_h) $(htscodecs_fqzcomp_qual_h) $(htscodecs_varint_h) $(htslib_hfile_h) $(htslib_bgzf_h) $(htslib_faidx_h) $(hts_internal_h) +cram/cram_stats.o cram/cram_stats.pico: cram/cram_stats.c config.h $(cram_h) $(cram_os_h) +cram/mFILE.o cram/mFILE.pico: cram/mFILE.c config.h $(htslib_hts_log_h) $(cram_os_h) cram/mFILE.h +cram/open_trace_file.o cram/open_trace_file.pico: cram/open_trace_file.c config.h $(cram_os_h) $(cram_open_trace_file_h) $(cram_misc_h) $(htslib_hfile_h) $(htslib_hts_log_h) $(htslib_hts_h) +cram/pooled_alloc.o cram/pooled_alloc.pico: cram/pooled_alloc.c config.h cram/pooled_alloc.h $(cram_misc_h) +cram/string_alloc.o cram/string_alloc.pico: cram/string_alloc.c config.h cram/string_alloc.h +thread_pool.o thread_pool.pico: thread_pool.c config.h $(thread_pool_internal_h) $(htslib_hts_log_h) + +htscodecs/htscodecs/arith_dynamic.o htscodecs/htscodecs/arith_dynamic.pico: htscodecs/htscodecs/arith_dynamic.c config.h $(htscodecs_arith_dynamic_h) $(htscodecs_varint_h) $(htscodecs_pack_h) $(htscodecs_utils_h) $(htscodecs_c_simple_model_h) +htscodecs/htscodecs/fqzcomp_qual.o htscodecs/htscodecs/fqzcomp_qual.pico: htscodecs/htscodecs/fqzcomp_qual.c config.h $(htscodecs_fqzcomp_qual_h) $(htscodecs_varint_h) $(htscodecs_utils_h) $(htscodecs_c_simple_model_h) +htscodecs/htscodecs/htscodecs.o htscodecs/htscodecs/htscodecs.pico: htscodecs/htscodecs/htscodecs.c $(htscodecs_htscodecs_h) $(htscodecs_version_h) +htscodecs/htscodecs/pack.o htscodecs/htscodecs/pack.pico: htscodecs/htscodecs/pack.c config.h $(htscodecs_pack_h) +htscodecs/htscodecs/rANS_static32x16pr.o htscodecs/htscodecs/rANS_static32x16pr.pico: htscodecs/htscodecs/rANS_static32x16pr.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) +htscodecs/htscodecs/rANS_static32x16pr_avx2.o htscodecs/htscodecs/rANS_static32x16pr_avx2.pico: htscodecs/htscodecs/rANS_static32x16pr_avx2.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) $(htscodecs_permute_h) +htscodecs/htscodecs/rANS_static32x16pr_avx512.o htscodecs/htscodecs/rANS_static32x16pr_avx512.pico: htscodecs/htscodecs/rANS_static32x16pr_avx512.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) +htscodecs/htscodecs/rANS_static32x16pr_neon.o htscodecs/htscodecs/rANS_static32x16pr_neon.pico: htscodecs/htscodecs/rANS_static32x16pr_neon.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) +htscodecs/htscodecs/rANS_static32x16pr_sse4.o htscodecs/htscodecs/rANS_static32x16pr_sse4.pico: htscodecs/htscodecs/rANS_static32x16pr_sse4.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_varint_h) $(htscodecs_utils_h) +htscodecs/htscodecs/rANS_static4x16pr.o htscodecs/htscodecs/rANS_static4x16pr.pico: htscodecs/htscodecs/rANS_static4x16pr.c config.h $(htscodecs_rANS_word_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_rANS_static16_int_h) $(htscodecs_pack_h) $(htscodecs_rle_h) $(htscodecs_utils_h) $(htscodecs_rANS_static32x16pr_h) +htscodecs/htscodecs/rANS_static.o htscodecs/htscodecs/rANS_static.pico: htscodecs/htscodecs/rANS_static.c config.h $(htscodecs_rANS_byte_h) $(htscodecs_utils_h) $(htscodecs_rANS_static_h) +htscodecs/htscodecs/rle.o htscodecs/htscodecs/rle.pico: htscodecs/htscodecs/rle.c config.h $(htscodecs_varint_h) $(htscodecs_rle_h) +htscodecs/htscodecs/tokenise_name3.o htscodecs/htscodecs/tokenise_name3.pico: htscodecs/htscodecs/tokenise_name3.c config.h $(htscodecs_pooled_alloc_h) $(htscodecs_arith_dynamic_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_tokenise_name3_h) $(htscodecs_varint_h) $(htscodecs_utils_h) +htscodecs/htscodecs/utils.o htscodecs/htscodecs/utils.pico: htscodecs/htscodecs/utils.c config.h $(htscodecs_utils_h) + +# Extra CFLAGS for specific files +htscodecs/htscodecs/rANS_static32x16pr_avx2.o htscodecs/htscodecs/rANS_static32x16pr_avx2.pico: TARGET_CFLAGS = $(HTS_CFLAGS_AVX2) +htscodecs/htscodecs/rANS_static32x16pr_avx512.o htscodecs/htscodecs/rANS_static32x16pr_avx512.pico: TARGET_CFLAGS = $(HTS_CFLAGS_AVX512) +htscodecs/htscodecs/rANS_static32x16pr_sse4.o htscodecs/htscodecs/rANS_static32x16pr_sse4.pico: TARGET_CFLAGS = $(HTS_CFLAGS_SSE4) + +annot-tsv: annot-tsv.o libhts.a + $(CC) $(LDFLAGS) -o $@ annot-tsv.o libhts.a $(LIBS) -lpthread + +bgzip: bgzip.o libhts.a + $(CC) $(LDFLAGS) -o $@ bgzip.o libhts.a $(LIBS) -lpthread + +htsfile: htsfile.o libhts.a + $(CC) $(LDFLAGS) -o $@ htsfile.o libhts.a $(LIBS) -lpthread + +tabix: tabix.o libhts.a + $(CC) $(LDFLAGS) -o $@ tabix.o libhts.a $(LIBS) -lpthread + +annot-tsv.o: annot-tsv.c config.h $(htslib_hts_h) $(htslib_hts_defs_h) $(htslib_khash_str2int_h) $(htslib_kstring_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(htslib_regidx_h) $(textutils_internal_h) +bgzip.o: bgzip.c config.h $(htslib_bgzf_h) $(htslib_hts_h) $(htslib_hfile_h) +htsfile.o: htsfile.c config.h $(htslib_hfile_h) $(htslib_hts_h) $(htslib_sam_h) $(htslib_vcf_h) +tabix.o: tabix.c config.h $(htslib_tbx_h) $(htslib_sam_h) $(htslib_vcf_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(htslib_hts_h) $(htslib_regidx_h) $(htslib_hts_defs_h) $(htslib_hts_log_h) $(htslib_thread_pool_h) + +# Runes to check that the htscodecs submodule is present +ifdef HTSCODECS_SOURCES +htscodecs/htscodecs/%.c: | htscodecs/htscodecs + @if test -e htscodecs/.git && test ! -e "$@" ; then \ + echo "Missing file '$@'" ; \ + echo " - Do you need to update the htscodecs submodule?" ; \ + false ; \ + fi + +htscodecs/htscodecs/%.h: | htscodecs/htscodecs + @if test -e htscodecs/.git && test ! -e "$@" ; then \ + echo "Missing file '$@'" ; \ + echo " - Do you need to update the htscodecs submodule?" ; \ + false ; \ + fi + +htscodecs/htscodecs: + @if test -e .git ; then \ + printf "\\n\\nError: htscodecs submodule files not present for htslib.\\n\ + Try running: \\n\ + git submodule update --init --recursive\\n\ + in the top-level htslib directory and then re-run make.\\n\\n\\n" ; \ + else \ + printf "\\n\\nError: htscodecs submodule files not present and this is not a git checkout.\\n\ + You have an incomplete distribution. Please try downloading one of the\\n\ + official releases from https://www.htslib.org/\\n" ; \ + fi + @false + +# Build the htscodecs/htscodecs/version.h file if necessary +htscodecs/htscodecs/version.h: force + @if test -e $(srcdir)/htscodecs/.git && test -e $(srcdir)/htscodecs/configure.ac ; then \ + vers=`cd $(srcdir)/htscodecs && git describe --always --dirty --match 'v[0-9]\.[0-9]*'` && \ + case "$$vers" in \ + v*) vers=$${vers#v} ;; \ + *) iv=`awk '/^AC_INIT\(htscodecs,/ { match($$0, /[0-9]+(\.[0-9]+)*/); print substr($$0, RSTART, RLENGTH) }' $(srcdir)/htscodecs/configure.ac` ; vers="$$iv$${vers:+-g$$vers}" ;; \ + esac ; \ + if ! grep -s -q '"'"$$vers"'"' $@ ; then \ + echo 'Updating $@ : #define HTSCODECS_VERSION_TEXT "'"$$vers"'"' ; \ + echo '#define HTSCODECS_VERSION_TEXT "'"$$vers"'"' > $@ ; \ + fi ; \ + fi +endif + +# Maintainer extra targets built +# - compile public headers as C++ +# Maintainer source code checks +# - copyright boilerplate presence +# - tab and trailing space detection +maintainer-check: test/usepublic.o + test/maintainer/check_copyright.pl . + test/maintainer/check_spaces.pl . + +# Look for untracked files in the git repository. +check-untracked: + @if test -e .git && git status --porcelain | grep '^\?'; then \ + echo 'Untracked files detected (see above). Please either clean up, add to .gitignore, or for test output files consider naming them to match *.tmp or *.tmp.*' ; \ + false ; \ + fi + +# Create a shorthand. We use $(SRC) or $(srcprefix) rather than $(srcdir)/ +# for brevity in test and install rules, and so that build logs do not have +# ./ sprinkled throughout. +SRC = $(srcprefix) + +# For tests that might use it, set $REF_PATH explicitly to use only reference +# areas within the test suite (or set it to ':' to use no reference areas). +# +# If using MSYS, avoid poor shell expansion via: +# MSYS2_ARG_CONV_EXCL="*" make check +check test: all $(HTSCODECS_TEST_TARGETS) + test/hts_endian + test/test_expr + test/test_kfunc + test/test_khash + test/test_kstring + test/test_nibbles -v + test/test_str2int + test/test_time_funcs + test/fieldarith test/fieldarith.sam + test/hfile + if test "x$(BUILT_PLUGINS)" != "x"; then \ + HTS_PATH=. test/with-shlib.sh test/plugins-dlhts -g ./libhts.$(SHLIB_FLAVOUR); \ + fi + if test "x$(BUILT_PLUGINS)" != "x"; then \ + HTS_PATH=. test/with-shlib.sh test/plugins-dlhts -l ./libhts.$(SHLIB_FLAVOUR); \ + fi + test/test_bgzf test/bgziptest.txt + test/test-parse-reg -t test/colons.bam + cd test/faidx && ./test-faidx.sh faidx.tst + cd test/sam_filter && ./filter.sh filter.tst + cd test/tabix && ./test-tabix.sh tabix.tst + cd test/mpileup && ./test-pileup.sh mpileup.tst + cd test/fastq && ./test-fastq.sh + cd test/base_mods && ./base-mods.sh base-mods.tst + REF_PATH=: test/sam test/ce.fa test/faidx/faidx.fa test/faidx/fastqs.fq + test/test-regidx + cd test && REF_PATH=: ./test.pl $${TEST_OPTS:-} + +test/hts_endian: test/hts_endian.o + $(CC) $(LDFLAGS) -o $@ test/hts_endian.o $(LIBS) + +# To build the fuzzer, try: +# make CC="clang16 -fsanitize=address,undefined,fuzzer" \ +# CFLAGS="-g -O3 -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION" \ +# test/fuzz/hts_open_fuzzer +test/fuzz/hts_open_fuzzer: test/fuzz/hts_open_fuzzer.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/fuzz/hts_open_fuzzer.o libhts.a $(LIBS) -lpthread + +test/fieldarith: test/fieldarith.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/fieldarith.o libhts.a $(LIBS) -lpthread + +test/hfile: test/hfile.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/hfile.o libhts.a $(LIBS) -lpthread + +test/pileup: test/pileup.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/pileup.o libhts.a $(LIBS) -lpthread + +test/pileup_mod: test/pileup_mod.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/pileup_mod.o libhts.a $(LIBS) -lpthread + +test/plugins-dlhts: test/plugins-dlhts.o + $(CC) $(LDFLAGS) -o $@ test/plugins-dlhts.o $(LIBS) + +test/sam: test/sam.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/sam.o libhts.a $(LIBS) -lpthread + +test/test_bgzf: test/test_bgzf.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/test_bgzf.o libhts.a $(LIBS) -lpthread + +test/test_expr: test/test_expr.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/test_expr.o libhts.a $(LIBS) -lpthread + +test/test_faidx: test/test_faidx.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/test_faidx.o libhts.a $(LIBS) -lpthread + +test/test_kfunc: test/test_kfunc.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/test_kfunc.o libhts.a $(LIBS) -lpthread + +test/test_khash: test/test_khash.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/test_khash.o libhts.a $(LIBS) -lpthread + +test/test_kstring: test/test_kstring.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/test_kstring.o libhts.a $(LIBS) -lpthread + +test/test_mod: test/test_mod.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/test_mod.o libhts.a $(LIBS) -lpthread + +test/test_nibbles: test/test_nibbles.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/test_nibbles.o libhts.a $(LIBS) -lpthread + +test/test_realn: test/test_realn.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/test_realn.o libhts.a $(LIBS) -lpthread + +test/test-regidx: test/test-regidx.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/test-regidx.o libhts.a $(LIBS) -lpthread + +test/test-parse-reg: test/test-parse-reg.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/test-parse-reg.o libhts.a $(LIBS) -lpthread + +test/test_str2int: test/test_str2int.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/test_str2int.o libhts.a $(LIBS) -lpthread + +test/test_time_funcs: test/test_time_funcs.o + $(CC) $(LDFLAGS) -o $@ test/test_time_funcs.o + +test/test_view: test/test_view.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/test_view.o libhts.a $(LIBS) -lpthread + +test/test_index: test/test_index.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/test_index.o libhts.a $(LIBS) -lpthread + +test/test-vcf-api: test/test-vcf-api.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/test-vcf-api.o libhts.a $(LIBS) -lpthread + +test/test-vcf-sweep: test/test-vcf-sweep.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/test-vcf-sweep.o libhts.a $(LIBS) -lpthread + +test/test-bcf-sr: test/test-bcf-sr.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/test-bcf-sr.o libhts.a $(LIBS) -lpthread + +test/test-bcf-translate: test/test-bcf-translate.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/test-bcf-translate.o libhts.a $(LIBS) -lpthread + +test/test_introspection: test/test_introspection.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/test_introspection.o libhts.a $(LIBS) -lpthread + +test/test-bcf_set_variant_type: test/test-bcf_set_variant_type.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/test-bcf_set_variant_type.o libhts.a $(LIBS) -lpthread + +# Extra tests for bundled htscodecs +test_htscodecs_rans4x8: htscodecs/tests/rans4x8 + cd htscodecs/tests && srcdir=. && export srcdir && ./rans4x8.test + +test_htscodecs_rans4x16: htscodecs/tests/rans4x16pr + cd htscodecs/tests && srcdir=. && export srcdir && ./rans4x16.test + +test_htscodecs_arith: htscodecs/tests/arith_dynamic + cd htscodecs/tests && srcdir=. && export srcdir && ./arith.test + +test_htscodecs_tok3: htscodecs/tests/tokenise_name3 + cd htscodecs/tests && srcdir=. && export srcdir && ./tok3.test + +test_htscodecs_fqzcomp: htscodecs/tests/fqzcomp_qual + cd htscodecs/tests && srcdir=. && export srcdir && ./fqzcomp.test + +test_htscodecs_varint: htscodecs/tests/varint + cd htscodecs/tests && ./varint + +htscodecs/tests/arith_dynamic: htscodecs/tests/arith_dynamic_test.o $(HTSCODECS_OBJS) + $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread + +htscodecs/tests/fqzcomp_qual: htscodecs/tests/fqzcomp_qual_test.o $(HTSCODECS_OBJS) + $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread + +htscodecs/tests/rans4x16pr: htscodecs/tests/rANS_static4x16pr_test.o $(HTSCODECS_OBJS) + $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread + +htscodecs/tests/rans4x8: htscodecs/tests/rANS_static_test.o $(HTSCODECS_OBJS) + $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread + +htscodecs/tests/tokenise_name3: htscodecs/tests/tokenise_name3_test.o $(HTSCODECS_OBJS) + $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread + +htscodecs/tests/varint: htscodecs/tests/varint_test.o $(HTSCODECS_OBJS) + $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -lm -lpthread + +htscodecs/tests/arith_dynamic_test.o: CPPFLAGS += -Ihtscodecs +htscodecs/tests/arith_dynamic_test.o: htscodecs/tests/arith_dynamic_test.c config.h $(htscodecs_arith_dynamic_h) +htscodecs/tests/fqzcomp_qual_test.o: CPPFLAGS += -Ihtscodecs +htscodecs/tests/fqzcomp_qual_test.o: htscodecs/tests/fqzcomp_qual_test.c config.h $(htscodecs_fqzcomp_qual_h) $(htscodecs_varint_h) +htscodecs/tests/rANS_static4x16pr_test.o: CPPFLAGS += -Ihtscodecs +htscodecs/tests/rANS_static4x16pr_test.o: htscodecs/tests/rANS_static4x16pr_test.c config.h $(htscodecs_rANS_static4x16_h) +htscodecs/tests/rANS_static_test.o: CPPFLAGS += -Ihtscodecs +htscodecs/tests/rANS_static_test.o: htscodecs/tests/rANS_static_test.c config.h $(htscodecs_rANS_static_h) +htscodecs/tests/tokenise_name3_test.o: CPPFLAGS += -Ihtscodecs +htscodecs/tests/tokenise_name3_test.o: htscodecs/tests/tokenise_name3_test.c config.h $(htscodecs_tokenise_name3_h) +htscodecs/tests/varint_test.o: CPPFLAGS += -Ihtscodecs +htscodecs/tests/varint_test.o: htscodecs/tests/varint_test.c config.h $(htscodecs_varint_h) + +test/hts_endian.o: test/hts_endian.c config.h $(htslib_hts_endian_h) +test/fuzz/hts_open_fuzzer.o: test/fuzz/hts_open_fuzzer.c config.h $(htslib_hfile_h) $(htslib_hts_h) $(htslib_sam_h) $(htslib_vcf_h) +test/fieldarith.o: test/fieldarith.c config.h $(htslib_sam_h) +test/hfile.o: test/hfile.c config.h $(htslib_hfile_h) $(htslib_hts_defs_h) $(htslib_kstring_h) +test/pileup.o: test/pileup.c config.h $(htslib_sam_h) $(htslib_kstring_h) +test/pileup_mod.o: test/pileup_mod.c config.h $(htslib_sam_h) +test/plugins-dlhts.o: test/plugins-dlhts.c config.h +test/sam.o: test/sam.c config.h $(htslib_hts_defs_h) $(htslib_sam_h) $(htslib_faidx_h) $(htslib_khash_h) $(htslib_hts_log_h) +test/test_bgzf.o: test/test_bgzf.c config.h $(htslib_bgzf_h) $(htslib_hfile_h) $(htslib_hts_log_h) $(hfile_internal_h) +test/test_expr.o: test/test_expr.c config.h $(htslib_hts_expr_h) +test/test_kfunc.o: test/test_kfunc.c config.h $(htslib_kfunc_h) +test/test_khash.o: test/test_khash.c config.h $(htslib_khash_h) $(htslib_kroundup_h) +test/test_kstring.o: test/test_kstring.c config.h $(htslib_kstring_h) +test/test_mod.o: test/test_mod.c config.h $(htslib_sam_h) +test/test_nibbles.o: test/test_nibbles.c config.h $(htslib_sam_h) $(sam_internal_h) +test/test-parse-reg.o: test/test-parse-reg.c config.h $(htslib_hts_h) $(htslib_sam_h) +test/test_realn.o: test/test_realn.c config.h $(htslib_hts_h) $(htslib_sam_h) $(htslib_faidx_h) +test/test-regidx.o: test/test-regidx.c config.h $(htslib_kstring_h) $(htslib_regidx_h) $(htslib_hts_defs_h) $(textutils_internal_h) +test/test_str2int.o: test/test_str2int.c config.h $(textutils_internal_h) +test/test_time_funcs.o: test/test_time_funcs.c config.h $(hts_time_funcs_h) +test/test_view.o: test/test_view.c config.h $(cram_h) $(htslib_sam_h) $(htslib_vcf_h) $(htslib_hts_log_h) +test/test_faidx.o: test/test_faidx.c config.h $(htslib_faidx_h) +test/test_index.o: test/test_index.c config.h $(htslib_sam_h) $(htslib_vcf_h) +test/test-vcf-api.o: test/test-vcf-api.c config.h $(htslib_hts_h) $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_kseq_h) +test/test-vcf-sweep.o: test/test-vcf-sweep.c config.h $(htslib_vcf_sweep_h) +test/test-bcf-sr.o: test/test-bcf-sr.c config.h $(htslib_synced_bcf_reader_h) $(htslib_hts_h) $(htslib_vcf_h) +test/test-bcf-translate.o: test/test-bcf-translate.c config.h $(htslib_vcf_h) +test/test_introspection.o: test/test_introspection.c config.h $(htslib_hts_h) $(htslib_hfile_h) +test/test-bcf_set_variant_type.o: test/test-bcf_set_variant_type.c config.h $(htslib_hts_h) vcf.c + +# Standalone target not added to $(BUILT_TEST_PROGRAMS) as some may not +# have a compiler that compiles as C++ when given a .cpp source file. +test/usepublic.o: test/usepublic.cpp config.h $(htslib_bgzf_h) $(htslib_cram_h) $(htslib_faidx_h) $(htslib_hfile_h) $(htslib_hts_h) $(htslib_hts_defs_h) $(htslib_hts_endian_h) $(htslib_hts_expr_h) $(htslib_hts_log_h) $(htslib_hts_os_h) $(htslib_kbitset_h) $(htslib_kfunc_h) $(htslib_khash_h) $(htslib_khash_str2int_h) $(htslib_klist_h) $(HTSPREFIX)htslib/knetfile.h $(htslib_kroundup_h) $(htslib_kseq_h) $(htslib_ksort_h) $(htslib_kstring_h) $(htslib_regidx_h) $(htslib_sam_h) $(htslib_synced_bcf_reader_h) $(htslib_tbx_h) $(htslib_thread_pool_h) $(htslib_vcf_h) $(htslib_vcf_sweep_h) $(htslib_vcfutils_h) + $(CC) $(CFLAGS) $(TARGET_CFLAGS) $(ALL_CPPFLAGS) -c -o $@ test/usepublic.cpp + + +test/thrash_threads1: test/thrash_threads1.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/thrash_threads1.o libhts.a $(LIBS) -lpthread + +test/thrash_threads2: test/thrash_threads2.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/thrash_threads2.o libhts.a $(LIBS) -lpthread + +test/thrash_threads3: test/thrash_threads3.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/thrash_threads3.o libhts.a $(LIBS) -lpthread + +test/thrash_threads4: test/thrash_threads4.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/thrash_threads4.o libhts.a $(LIBS) -lpthread + +test/thrash_threads5: test/thrash_threads5.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/thrash_threads5.o libhts.a $(LIBS) -lpthread + +test/thrash_threads6: test/thrash_threads6.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/thrash_threads6.o libhts.a $(LIBS) -lpthread + +test/thrash_threads7: test/thrash_threads7.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/thrash_threads7.o libhts.a $(LIBS) -lpthread + +test_thrash: $(BUILT_THRASH_PROGRAMS) + +# Test to ensure the functions in the header files are exported by the shared +# library. This currently works by comparing the output from ctags on +# the headers with the list of functions exported by the shared library. +# Note that functions marked as exported in the .c files and not the public +# headers will be missed by this test. +test-shlib-exports: header-exports.txt shlib-exports-$(SHLIB_FLAVOUR).txt + @echo "Checking shared library exports" + @if test ! -s header-exports.txt ; then echo "Error: header-exports.txt empty" ; false ; fi + @if test ! -s shlib-exports-$(SHLIB_FLAVOUR).txt ; then echo "Error: shlib-exports-$(SHLIB_FLAVOUR).txt empty" ; false ; fi + @! comm -23 header-exports.txt shlib-exports-$(SHLIB_FLAVOUR).txt | grep . || \ + ( echo "Error: Found unexported symbols (listed above)" ; false ) + +# Extract symbols that should be exported from public headers using ctags +# Filter out macros in htslib/hts_defs.h. +header-exports.txt: test/header_syms.pl htslib/*.h + test/header_syms.pl htslib/*.h | sort -u -o $@ + +shlib-exports-so.txt: libhts.so + nm -D -g libhts.so | awk '$$2 == "T" { sub("@.*", "", $$3); print $$3 }' | sort -u -o $@ + +shlib-exports-dylib.txt: libhts.dylib + nm -Ug libhts.dylib | awk '$$2 == "T" { sub("^_", "", $$3); print $$3 }' | sort -u -o $@ + +shlib-exports-dll.txt: hts.dll.a + nm -g hts.dll.a | awk '$$2 == "T" { print $$3 }' | sort -u -o $@ + +$(srcprefix)htslib.map: libhts.so + LC_ALL=C ; export LC_ALL; \ + curr_vers=`expr 'X$(PACKAGE_VERSION)' : 'X\([0-9]*\.[0-9.]*\)'` ; \ + last_vers=`awk '/^HTSLIB_[0-9](\.[0-9]+)+/ { lv = $$1 } END { print lv }' htslib.map` ; \ + if test "x$$curr_vers" = 'x' || test "x$$last_vers" = 'x' ; then \ + echo "Version check failed : $$curr_vers / $$las_vers" 1>&2 ; \ + exit 1 ; \ + fi && \ + if test "HTSLIB_$$curr_vers" = "$$last_vers" ; then \ + echo "Refusing to update $@ - HTSlib version not changed" 1>&2 ; \ + exit 1 ; \ + fi && \ + nm --with-symbol-versions -D -g libhts.so | awk '$$2 ~ /^[DGRT]$$/ && $$3 ~ /@@Base$$/ && $$3 !~ /^(_init|_fini|_edata)@@/ { sub(/@@Base$$/, ";", $$3); print " " $$3 }' > $@.tmp && \ + if [ -s $@.tmp ] ; then \ + cat $@ > $@.new.tmp && \ + printf '\n%s {\n' "HTSLIB_$$curr_vers" >> $@.new.tmp && \ + cat $@.tmp >> $@.new.tmp && \ + printf '} %s;\n' "$$last_vers" >> $@.new.tmp && \ + rm -f $@.tmp && \ + mv $@.new.tmp $@ ; \ + fi ; \ + else \ + rm -f $@.tmp ; \ + fi + +install: libhts.a $(BUILT_PROGRAMS) $(BUILT_PLUGINS) installdirs install-$(SHLIB_FLAVOUR) install-pkgconfig + $(INSTALL_PROGRAM) $(BUILT_PROGRAMS) $(DESTDIR)$(bindir) + if test -n "$(BUILT_PLUGINS)"; then $(INSTALL_PROGRAM) $(BUILT_PLUGINS) $(DESTDIR)$(plugindir); fi + $(INSTALL_DATA) $(SRC)htslib/*.h $(DESTDIR)$(includedir)/htslib + $(INSTALL_DATA) libhts.a $(DESTDIR)$(libdir)/libhts.a + $(INSTALL_MAN) $(SRC)annot-tsv.1 $(SRC)bgzip.1 $(SRC)htsfile.1 $(SRC)tabix.1 $(DESTDIR)$(man1dir) + $(INSTALL_MAN) $(SRC)faidx.5 $(SRC)sam.5 $(SRC)vcf.5 $(DESTDIR)$(man5dir) + $(INSTALL_MAN) $(SRC)htslib-s3-plugin.7 $(DESTDIR)$(man7dir) + +installdirs: + $(INSTALL_DIR) $(DESTDIR)$(bindir) $(DESTDIR)$(includedir) $(DESTDIR)$(includedir)/htslib $(DESTDIR)$(libdir) $(DESTDIR)$(man1dir) $(DESTDIR)$(man5dir) $(DESTDIR)$(man7dir) $(DESTDIR)$(pkgconfigdir) + if test -n "$(plugindir)"; then $(INSTALL_DIR) $(DESTDIR)$(plugindir); fi + +# After installation, the real file in $(libdir) will be libhts.so.X.Y.Z, +# with symlinks libhts.so (used via -lhts during linking of client programs) +# and libhts.so.NN (used by client executables at runtime). + +install-so: libhts.so installdirs + $(INSTALL_LIB) libhts.so $(DESTDIR)$(libdir)/libhts.so.$(PACKAGE_VERSION) + ln -sf libhts.so.$(PACKAGE_VERSION) $(DESTDIR)$(libdir)/libhts.so + ln -sf libhts.so.$(PACKAGE_VERSION) $(DESTDIR)$(libdir)/libhts.so.$(LIBHTS_SOVERSION) + +install-cygdll: cyghts-$(LIBHTS_SOVERSION).dll installdirs + $(INSTALL_PROGRAM) cyghts-$(LIBHTS_SOVERSION).dll $(DESTDIR)$(bindir)/cyghts-$(LIBHTS_SOVERSION).dll + $(INSTALL_PROGRAM) libhts.dll.a $(DESTDIR)$(libdir)/libhts.dll.a + +install-dll: hts-$(LIBHTS_SOVERSION).dll installdirs + $(INSTALL_PROGRAM) hts-$(LIBHTS_SOVERSION).dll $(DESTDIR)$(bindir)/hts-$(LIBHTS_SOVERSION).dll + $(INSTALL_PROGRAM) hts.dll.a $(DESTDIR)$(libdir)/hts.dll.a + +install-dylib: libhts.dylib installdirs + $(INSTALL_PROGRAM) libhts.dylib $(DESTDIR)$(libdir)/libhts.$(PACKAGE_VERSION).dylib + ln -sf libhts.$(PACKAGE_VERSION).dylib $(DESTDIR)$(libdir)/libhts.dylib + ln -sf libhts.$(PACKAGE_VERSION).dylib $(DESTDIR)$(libdir)/libhts.$(LIBHTS_SOVERSION).dylib + +# Substitute these pseudo-autoconf variables only at install time +# so that "make install prefix=/prefix/path" etc continue to work. +install-pkgconfig: htslib.pc.tmp installdirs + sed -e 's#@-includedir@#$(includedir)#g;s#@-libdir@#$(libdir)#g;s#@-PACKAGE_VERSION@#$(PACKAGE_VERSION)#g' htslib.pc.tmp > $(DESTDIR)$(pkgconfigdir)/htslib.pc + chmod 644 $(DESTDIR)$(pkgconfigdir)/htslib.pc + +# A pkg-config file (suitable for copying to $PKG_CONFIG_PATH) that provides +# flags for building against the uninstalled library in this build directory. +htslib-uninstalled.pc: htslib.pc.tmp + sed -e 's#@-includedir@#'`pwd`'#g;s#@-libdir@#'`pwd`'#g' htslib.pc.tmp > $@ + + +testclean: + -rm -f test/*.tmp test/*.tmp.* test/faidx/*.tmp* \ + test/longrefs/*.tmp.* test/tabix/*.tmp.* \ + test/bgzf_boundaries/*.tmp.* test/*/FAIL* \ + header-exports.txt shlib-exports-$(SHLIB_FLAVOUR).txt + -rm -rf htscodecs/tests/test.out + +# Only remove this in git checkouts +DEL_HTSCODECS_VERSION := $(if $(wildcard htscodecs/.git),htscodecs/htscodecs/version.h) + +mostlyclean: testclean + -rm -f *.o *.pico cram/*.o cram/*.pico test/*.o test/*.dSYM config_vars.h version.h + -rm -f htscodecs/htscodecs/*.o htscodecs/htscodecs/*.pico $(DEL_HTSCODECS_VERSION) + -rm -f hts-object-files + -rm -f htscodecs/tests/*.o + +clean: mostlyclean clean-$(SHLIB_FLAVOUR) + -rm -f libhts.a $(BUILT_PROGRAMS) $(BUILT_PLUGINS) $(BUILT_TEST_PROGRAMS) $(BUILT_THRASH_PROGRAMS) + -rm -f htscodecs/tests/rans4x8 htscodecs/tests/rans4x16pr htscodecs/tests/arith_dynamic htscodecs/tests/tokenise_name3 htscodecs/tests/fqzcomp_qual htscodecs/tests/varint + +distclean maintainer-clean: clean + -rm -f config.cache config.h config.log config.mk config.status + -rm -f TAGS *.pc.tmp *-uninstalled.pc htslib_static.mk htscodecs.mk + -rm -rf autom4te.cache + +clean-so: + -rm -f libhts.so libhts.so.* + +clean-cygdll: + -rm -f cyghts-*.dll libhts.dll.a + +clean-dll: + -rm -f hts-*.dll hts.dll.a + +clean-dylib: + -rm -f libhts.dylib libhts.*.dylib + + +tags TAGS: + ctags -f TAGS *.[ch] cram/*.[ch] htslib/*.h + +# We recommend libhts-using programs be built against a separate htslib +# installation. However if you feel that you must bundle htslib source +# code with your program, this hook enables Automake-style "make dist" +# for this subdirectory. If you do bundle an htslib snapshot, please +# add identifying information to $(PACKAGE_VERSION) as appropriate. +# (The wildcards attempt to omit non-exported files (.git*, README.md, +# etc) and other detritus that might be in the top-level directory.) +distdir: + @if [ -z "$(distdir)" ]; then echo "Please supply a distdir=DIR argument."; false; fi + tar -c *.[ch15] [ILMNRchtv]*[ELSbcekmnth] | (cd $(distdir) && tar -x) + +cd $(distdir) && $(MAKE) distclean + +force: + + +.PHONY: all check check-untracked clean distclean distdir force +.PHONY: install install-pkgconfig installdirs lib-shared lib-static +.PHONY: maintainer-check maintainer-clean mostlyclean plugins +.PHONY: print-config print-version show-version tags +.PHONY: test test-shlib-exports test_thrash testclean +.PHONY: clean-so install-so +.PHONY: clean-cygdll install-cygdll +.PHONY: clean-dll install-dll +.PHONY: clean-dylib install-dylib +.PHONY: test_htscodecs_rans4x8 test_htscodecs_rans4x16 test_htscodecs_arith +.PHONY: test_htscodecs_tok3 test_htscodecs_fqzcomp test_htscodecs_varint +.PHONY: cc-version diff --git a/src/htslib-1.21/NEWS b/src/htslib-1.21/NEWS new file mode 100644 index 0000000..8825c30 --- /dev/null +++ b/src/htslib-1.21/NEWS @@ -0,0 +1,2520 @@ +Noteworthy changes in release 1.21 (12th September 2024) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The primary user-visible changes in this release are updates to the +annot-tsv tool and some speed improvements. Full details of other +changes and bugs fixed are below. + +Notice: this is the last SAMtools / HTSlib release where CRAM 3.0 will be +the default CRAM version. From the next we will change to CRAM 3.1 +unless the version is explicitly specified, for example using +"samtools view -O cram,version=3.0". + + +Updates +------- + +* Extend annot-tsv with several new command line options. + --delim permits use of other delimiters. + --headers for selection of other header formats. + --no-header-idx to suppress column index numbers in header. + Also removed -h as it is now short for --headers. Note --help + still works. (PR #1779) + +* Allow annot-tsv -a to rename annotations. (PR #1709) + +* Extend annot-tsv --overlap to be able to specify the overlap + fraction separately for source and target. (PR #1811) + +* Added new APIs to facilitate low-level CRAM container manipulations, + used by the new "samtools cat" region filtering code. Functions are: + cram_container_get_coords() + cram_filter_container() + cram_index_extents() + cram_container_num2offset() + cram_container_offset2num() + cram_num_containers() + cram_num_containers_between() + Also improved cram_index_query() to cope with HTS_IDX_NOCOOR regions. + (PR #1771) + +* Bgzip now retains file modification and access times when + compressing and decompressing. (PR #1727, fixes #1718. Requested by + Gert Hulselmans.) + +* Use FNV1a for string hashing in khash. The old algorithm was + particularly weak with base-64 style strings and lead to a large + number of collisions. (PR #1806. Fixes samtools/samtools#2066, + reported by Hans-Joachim Ruscheweyh) + +* Improve the speed of the nibble2base() function on Intel (PR + #1667, PR #1764, PR #1786, PR #1802, thanks to Ruben Vorderman) and + ARM (PR #1795, thanks to John Marshall). + +* bgzf_getline() will now warn if it encounters UTF-16 data. + (PR #1487, thanks to John Marshall) + +* Speed up bgzf_read(). While this does not reduce CPU significantly, + it does increase the maximum parallelism available permitting 10-15% + faster decoding. (PR #1772, PR #1800, Issue #1798) + +* Speed up faidx by use of better isgraph methods (PR #1797) and + whole-line reading (PR #1799, thanks to John Marshall). + +* Speed up kputll() function, speeding up BAM -> SAM conversion by + about 5% and also samtools depth. (PR #1805) + +* Added more example code, covering fasta/fastq indexing, tabix + indexing and use of the thread pool. (PR #1666) + +Build Changes +------------- + +* Code warning fixes for pedantic compilers (PR #1777) and avoid + some undefined behaviour (PR #1810, PR #1816, PR #1828). + +* Windows based CI has been migrated from AppVeyor to GitHub Actions. + (PR #1796, PR #1803, PR #1808) + +* Miscellaneous minor build infrastructure and code fixes. + (PR #1807, PR #1829, both thanks to John Marshall) + +* Updated htscodecs submodule to version 1.6.1 (PR #1828) + +* Fixed an awk script in the Makefile that only worked with gawk. (PR #1831) + +Bug fixes +--------- + +* Fix small OSS-Fuzz reported issues with CRAM encoding and long + CIGARS and/or illegal positions. (PR #1775, PR #1801, PR #1817) + +* Fix issues with on-the-fly indexing of VCF/BCF (bcftools --write-index) + when not using multiple threads. (PR #1837. Fixes samtools/bcftools#2267, + reported by Giulio Genovese) + +* Stricter limits on POS / MPOS / TLEN in sam_parse1(). This fixes + a signed overflow reported by OSS-Fuzz and should help prevent other + as-yet undetected bugs. (PR #1812) + +* Check that the underlying file open worked for preload: URLs. Fixes + a NULL pointer dereference reported by OSS-Fuzz. (PR #1821) + +* Fix an infinite loop in hts_itr_query() when given extremely large + positions which cause integer overflow. Also adds hts_bin_maxpos() + and hts_idx_maxpos() functions. + (PR #1774, thanks to John Marshall and reported by Jesus Alberto + Munoz Mesa) + +* Fix an out of bounds read in hts_itr_multi_next() when switching + chromosomes. This bug is present in releases 1.11 to 1.20. + (PR #1788. Fixes samtools/samtools#2063, reported by acorvelo) + +* Work around parsing problems with colons in CHROM names. + Fixes samtools/bcftools#2139. (PR #1781, John Marshall / James Bonfield) + +* Correct the CPU detection for Mac OS X 10.7. cpuid is used by + htscodecs (see samtools/htscodecs#116), and the corresponding + changes in htslib are PR #1785. Reported by Ryan Carsten Schmidt. + +* Make BAM zero-length intervals work the same as CRAM; permitted and + returning overlapping records. (PR #1787. Fixes + samtools/samtools#2060, reported by acorvelo) + +* Replace assert() with abort() in BCF synced reader. This is not an + ideal solution, but it gives consistent behaviour when compiling + with or without NDEBUG. (PR #1791, thanks to Martin Pollard) + +* Fixed failure to change the write block size on compressed SAM or VCF + files due to an internal type confusion. (PR #1826) + +* Fixed an out-of-bounds read in cram_codec_iter_next() (PR #1832) + +Noteworthy changes in release 1.20 (15th April 2024) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Updates +------- + +* When working on named files, bgzip now sets the modified and access times + of the output files it makes to match those of the corresponding input. + (PR #1727, feature request #1718. Requested by Gert Hulselmans) + +* It's now possible to use a -o option to specify the output file name in + bgzip. + (PR #1747, feature request #1726. Requested by Gert Hulselmans) + +* Improved error faidx error messages. + (PR #1743, thanks to Nick Moore) + +* Faster reading of SAM array (type "B") tags. These often turn up + in ONT and PacBio data. + (PR #1741) + +* Improved validity checking of base modification tags. + (PR #1749) + +* mpileup overlap removal now works where one read has a deletion. + (PR #1751, fixes samtools/samtools#1992. Reported by Long Tian) + +* The S3 plugin can now find buckets via S3 access point aliases. + (PR #1756, thanks to Matt Pawelczyk; + fixes samtools/samtools#1984. Reported by Albert Li) + +* Added a --threads option (and -@ short option) to tabix. + (PR #1755, feature request #1735. Requested by Dan Bolser) + +* tabix can now index Graph Alignment Format (GAF) files. + (See https://github.com/lh3/gfatools/blob/master/doc/rGFA.md) + (PR #1763, thanks to Adam Novak) + +Bug fixes +--------- + +* Security fix: Prevent possible heap overflow in cram_encode_aux() on + bad RG:Z tags. + (PR #1737) + +* Security fix: Prevent attempts to call a NULL pointer if certain URL + schemes are used in CRAM @SQ UR: tags. + (PR #1757) + +* Security fix: Fixed a bug where following certain AWS S3 redirects could + downgrade the connection from TLS (i.e. https://) to unencrypted http://. + This could happen when using path-based URLs and AWS_DEFAULT_REGION + was set to a region other that the one where the data was stored. + (PR #1762, fixes #1760. Reported by andaca) + +* Fixed arithmetic overflow when loading very long references for CRAM. + (PR #1738, fixes #1738. Reported by Shane McCarthy) + +* Fixed faidx and CRAM reference look-ups on compressed fasta where the .fai + index file was present, but the .gzi index of compressed offsets was not. + (PR #1745, fixes #1744. Reported by Theodore Li) + +* Fixed BCF indexing on-the-fly bug which produced invalid indexes when + using multiple compression threads. + (PR #1742, fixes #1740. Reported by graphenn) + +* Ensure that pileup destructors are called by bam_plp_destroy(), to + prevent memory leaks. + (PR #1749, PR #1754) + +* Ensure on-the-fly index timestamps are always older than the data file. + Previously the files could be closed out of order, leading to warnings + being printed when using the index. + (PR #1753, fixes #1732. Reported by Gert Hulselmans) + +* To prevent data corruption when reading (strictly invalid) VCF files + with duplicated FORMAT tags, all but the first copy of the data + associated with the tag are now dropped with a warning. + (PR #1752, PR #1761, fixes #1733. Reported by anthakki) + +* Fixed a bug introduced in release 1.19 (PR #1689) which broke variant + record data if it tried to remove an over-long tag. + (PR #1752, PR #1761) + +* Changed error to warning when complaining about use of the CG tag + in SAM or CRAM files. + (PR #1758, fixes samtools/samtools#2002) + +Noteworthy changes in release 1.19.1 (22nd January 2024) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* Fixed a regression in release 1.19 that caused all aux records to + be stored uncompressed in CRAM files. The resulting files were + correctly formatted, but bigger than they needed to be. + (PR#1729, fixes samtools#1968. Reported by Clockris) + +* Fixed possible out-of-bounds reads due to an incorrect check on + B tag lengths in cram_encode_aux(). (PR#1725) + +* Fixed an incorrect check on tag length which could fail to catch a + two byte out-of-bounds read in bam_get_aux(). (PR#1728) + +* Made errors reported by hts_open_format() less confusing when it can't + open the reference file. (PR#1724, fixes #1723. Reported by + Alex Leonard) + +* Made hts_close() fail more gracefully if it's passed a NULL pointer + (PR#1724) + +Noteworthy changes in release 1.19 (12th December 2023) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Updates +------- + +* A temporary work-around has been put in the VCF parser so that it is + less likely to fail on rows with a large number of ALT alleles, + where Number=G tags like PL can expand beyond the 2Gb limit enforced + by HTSlib. For now, where this happens the offending tag will be dropped + so the data can be processed, albeit without the likelihood data. + + In future work, the library will instead convert such tags into their + local alternatives (see https://github.com/samtools/hts-specs/pull/434). + (PR #1689) + +* New program. Adds annot-tsv which annotates regions in a destination file with + texts from overlapping regions in a source file. + (PR#1619) + +* Change bam_parse_cigar() so that it can modify existing BAM records. This + makes more useful as public API. Previously it could only handle partially + formed BAM records. + (PR#1651, fixes #1650. Reported by Oleksii Nikolaienko) + +* Add "uncompressed" to hts_format_description() where appropriate. This adds + an "uncompressed" description to uncompressed files that would normally be + compressed, such as BAM and BCF. + (PR#1656, in relation to samtools#1884. Thanks to John Marshall) + +* Speed up to the VCF parser and writer. + (PR#1644 and PR#1663) + +* Add an hclen (hard clip length) SAM filter function. + (PR#1660, with reference to samtools#813) + +* Avoid really closing stdin/stdout in hclose()/hts_close()/et al. + See discussion in PR for details. + (PR#1665. Thanks to John Marshall) + +* Add support to handle multiple files in bgzip. + (PR#1658, fixes #1642. Requested by bw2) + +* Enable auto-vectorisation in CRAM 3.1 codecs. Speeds decoding on some + sequencing platform data. + (PR#1669) + +* Speed up removal of lines in large headers. + (PR#1662, fixes #1460. Reported by Anže Starič) + +* Apply seqtk PR to improve kseq.h parsing performance. Port of + Fabian Klötzl's (kloetzl) lh3/seqtk#123 and attractivechaos/klib#173 to + HTSlib. + (PR#1674. Thanks to John Marshall) + +Build changes +------------- + +* Updated htscodecs submodule to 1.6.0. + (PR#1685, PR#1717, PR#1719) + +* Apply the packed attribute to uint*_u types for Clang to prevent + -fsanitize=alignment failures. + (PR#1667. Thanks to Fangrui Song) + +* Fuzz testing improvements. + (PR#1664) + +* Add C++ casts for external headers in klist.h and kseq.h. + (PR#1683. See also PR#1674 and PR#1682) + +* Add test case compiling the public headers as C++. + (PR#1682. Thanks to John Marshall) + +* Enable optimisation level -O3 for SAM QUAL+33 formatting. + (PR#1679) + +* Make compiler flag detection work with zig cc. + (PR#1687) + +* Fix unused value warnings when built with NDEBUG. + (PR#1688) + +* Remove some disused Makefile variables, fix typos and a warning. Improve + bam_parse_basemod() documentation. + (PR#1705, Thanks to John Marshall) + +Bug fixes +--------- + +* Fail bgzf_useek() when offset is above block limits. + (PR#1668) + +* Fix multi-threaded on-the-fly indexing problems. + (PR#1672, fixes samtools#1861 and bcftools#1985. Reported by Mark Ebbert and + lacek) + +* Fix hfile_libcurl small seek bug. + (PR#1676, fixes samtools#1918. Also may fix #1037, #1625 and samtools#1622. + Reported by Alex Reynolds, Mark Walker, Arthur Gilly and skatragadda-nygc. + Thanks to John Marshall) + +* Fix a minor memory leak in malformed CRAM EXTERNAL blocks. [fuzz] + (PR#1671) + +* Fix a cram decode hang from block_resize(). + (PR#1680. Reported by Sebastian Deorowicz) + +* Cram fuzzing improvements. Fixes a number of cram errors. + (PR#1701, fixes #1691, #1692, #1693, #1696, #1697, #1698, #1699 and #1700. + Thanks to Octavio Galland for finding and reporting all these) + +* Fix crypt4gh redirection. + (PR#1675, fixes grbot/crypt4gh-tutorial#2. Reported by hth4) + +* Fix PG header linking when records make a loop. + (PR#1702, fixes #1694. Reported by Octavio Galland) + +* Prevent issues with no-stored-sequence records in CRAM files, by ensuring + they are accounted for properly in block size calculations, and by limiting + the maximum query length in the CIGAR data. Originally seen as an overflow + by OSS-Fuzz / UBSAN, it turned out this could lead to excessive time and + memory use by HTSlib, and could result in it writing out unreadable CRAM + files. + (PR#1710) + +* Fix some illegal shifts and integer overflows found by OSS-Fuzz / UBSAN. + (PR#1707, PR#1712, PR#1713) + +Noteworthy changes in release 1.18 (25th July 2023) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Updates +------- + +* Using CRAM 3.1 no longer gives a warning about the specification + being draft. Note CRAM 3.0 is still the default output format. + (PR#1583) + +* Replaced use of sprintf with snprintf, to silence potential warnings + from Apple's compilers and those who implement similar checks. + (PR#1594, fixes #1586. Reported by Oleksii Nikolaienko) + +* Fastq output will now generate empty records for reads with no + sequence data (i.e. sequence is "*" in SAM format). (PR#1576, + fixes samtools/samtools#1576. Reported by Nils Homer) + +* CRAM decoding speed-ups. (PR#1580) + +* A new MN aux tag can now be used to verify that MM/ML base modification + data has not been broken by hard clipping. (PR#1590, PR#1612. See also + PR samtools/hts-specs#714 and issue samtools/hts-specs#646. + Reported by Jared Simpson) + +* The base modification API has been improved to make it easier for callers + to tell unchecked bases from unmodified ones. (PR#1636, fixes #1550. + Requested by Chris Wright) + +* A new bam_mods_queryi() API has been added to return additional + data about the i-th base modification returned by bam_mods_recorded(). + (PR#1636, fixes #1550 and #1635. Requested by Jared Simpson) + +* Speed up index look-ups for whole-chromosome queries. (PR#1596) + +* Mpileup now merges adjacent (mis)match CIGAR operations, so CIGARs + using the X/= operators give the same results as if the M operator + was used. (PR#1607, fixes #1597. Reported by Marcel Martin) + +* It's now possible to call bcf_sr_set_regions() after adding readers + using bcf_sr_add_reader() (previously this returned an error). Doing so + will discard any unread data, and reset the readers so they iterate over + the new regions. (PR#1624, fixes samtools/bcftools#1918. Reported by + Gregg Thomas) + +* The synced BCF reader can now accept regions with reference names including + colons and hyphens, by enclosing them in curly braces. For example, + {chr_part:1-1001}:10-20 will return bases 10 to 20 from reference + "chr_part:1-1001". (PR#1630, fixes #1620. Reported by Bren) + +* Add a "samples" directory with code demonstrating usage of HTSlib plus + a tutorial document. (PR#1589) + +Build changes +------------- + +* Htscodecs has been updated to 1.5.1 (PR#1654) + +* Htscodecs SIMD code now works with Apple multiarch binaries. + (PR#1587, HTSlib fix for samtools/htscodecs#76. Reported by John Marshall) + +* Improve portability of "expr" usage in version.sh. + (PR#1593, fixes #1592. Reported by John Marshall) + +* Improve portability to *BSD targets by ensuring _XOPEN_SOURCE is defined + correctly and that source files properly include "config.h". Perl + scripts also now all use #!/usr/bin/env instead of assuming that + it's in /usr/bin/perl. (PR#1628, fixes #1606. + Reported by Robert Clausecker) + +* Fixed NAME entry in htslib-s3-plugin man page so the whatis and apropos + commands find it. (PR#1634, thanks to Étienne Mollier) + +* Assorted dependency tracking fixes. (PR#1653, thanks to John Marshall) + +Documentation updates +--------------------- + +* Changed Alpine build instructions as they've switched back to using openssl. + (PR#1609) + +* Recommend using -rdynamic when statically linking a libhts.a with + plugins enabled. (PR#1611, thanks to John Marshall. Fixes #1600, + reported by Jack Wimberley) + +* Fixed example in docs for sam_hdr_add_line(). (PR#1618, thanks to kojix2) + +* Improved test harness for base modifications API. (PR#1648) + +Bug fixes +--------- + +* Fix a major bug when searching against a CRAM index where one container + has start and end coordinates entirely contained within the previous + container. This would occasionally miss data, and sometimes return much + more than required. The bug affected versions 1.11 to 1.17, although the + change in 1.11 was bug-fixing multi-threaded index queries. This bug did + not affect index building. There is no need to reindex your CRAM files. + (PR#1574, PR#1640. Fixes #1569, #1639, samtools/samtools#1808, + samtools/samtools#1819. Reported by xuxif, Jens Reeder and Jared Simpson) + +* Prevent CRAM blocks from becoming too big in files with short + sequences but very long aux tags. (PR #1613) + +* Fix bug where the CRAM decoder for CONST_INT and CONST_BYTE + codecs may incorrectly look for extra data in the CORE block. + Note that this bug only affected the experimental CRAM v4.0 decoder. + (PR#1614) + +* Fix crypt4gh redirection so it works in conjunction with non-file + IO, such as using htsget. (PR#1577) + +* Improve error checking for the VCF POS column, when facing invalid + data. (PR#1575, replaces #1570 originally reported and fixed + by Colin Nolan.) + +* Improved error checking on VCF indexing to validate the data is BGZF + compressed. (PR#1581) + +* Fix bug where bin number calculation could overflow when making iterators + over regions that go to the end of a chromosome. (PR#1595) + +* Backport attractivechaos/klib#78 (by Pall Melsted) to HTSlib. + Prevents infinite loops in kseq_read() when reading broken gzip files. + (PR#1582, fixes #1579. Reported by Goran Vinterhalter) + +* Backport attractivechaos/klib@384277a (by innoink) to HTSlib. + Fixes the kh_int_hash_func2() macro definition. + (PR#1599, fixes #1598. Reported by fanxinping) + +* Remove a compilation warning on systems with newer libcurl releases. + (PR#1572) + +* Windows: Fixed BGZF EOF check for recent MinGW releases. (PR#1601, + fixes samtools/bcftools#1901) + +* Fixed bug where tabix would not return the correct regions for files + where the column ordering is end, ..., begin instead of begin, ..., end. + (PR#1626, fixes #1622. Reported by Hiruna Samarakoon) + +* sam_format_aux1() now always NUL-terminates Z/H tags. (PR#1631) + +* Ensure base modification iterator is reset when no MM tag is present. + (PR#1631, PR#1647) + +* Fix segfault when attempting to write an uncompressed BAM file opened using + hts_open(name, "wbu"). This was attempting to write BAM data without + wrapping it in BGZF blocks, which is invalid according to the BAM + specification. "wbu" is now internally converted to "wb0" to output + uncompressed data wrapped in BGZF blocks. (PR#1632, fixes #1617. + Reported by Joyjit Daw) + +* Fixed over-strict bounds check in probaln_glocal() which caused it to make + sub-optimal alignments when the requested band width was greater than the + query length. (PR#1616, fixes #1605. Reported by Jared Simpson) + +* Fixed possible double frees when handling errors in bcf_hdr_add_hrec(), + if particular memory allocations fail. (PR#1637) + +* Ensure that bcf_hdr_remove() clears up all pointers to the items removed + from dictionaries. Failing to do this could have resulted in a call + requesting a deleted item via bcf_hdr_get_hrec() returning a stale pointer. + (PR#1637) + +* Stop the gzip decompresser from finishing prematurely when an empty + gzip block is followed by more data. (PR#1643, PR#1646) + +Noteworthy changes in release 1.17 (21st February 2023) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* A new API for iterating through a BAM record's aux field. + (PR#1354, addresses #1319. Thanks to John Marshall) + +* Text mode for bgzip. Allows bgzip to compress lines of text with block breaks + at newlines. + (PR#1493, thanks to Mike Lin for the initial version PR#1369) + +* Make tabix support CSI indices with large positions. Unlike SAM and VCF + files, BED files do not set a maximum reference length which hindered CSI + support. This change sets an arbitrary large size of 100G to enable it to + work. + (PR#1506) + +* Add a fai_line_length function. Exposes the internal line-wrap length. + (PR#1516) + +* Check for invalid barcode tags in fastq output. + (PR#1518, fixes samtools#1728. Reported by Poshi) + +* Warn if reference found in a CRAM file is not contained in the specified + reference file. + (PR#1517 and PR#1521, adds diagnostics for #1515. Reported by Wei WeiDeng) + +* Add a faidx_seq_len64 function that can return sequence lengths longer than + INT_MAX. At the same time limit faidx_seq_len to INT_MAX output. Also add a + fai_adjust_region to ensure given ranges do not go beyond the end of the + requested sequence. + (PR#1519) + +* Add a bcf_strerror function to give text descriptions of BCF errors. + (PR#1510) + +* Add CRAM SQ/M5 header checking when specifying a fasta file. This is to + prevent creating a CRAM that cannot be decoded again. + (PR#1522. In response to samtools#1748 though not a direct fix) + +* Improve support for very long input lines (> 2Gbyte). This is mostly useful + for tabix which does not do much interpretation of its input. + (PR#1542, a partial fix for #1539) + +* Speed up load_ref_portion. This function has been sped up by about 7x, which + speeds up low-depth CRAM decoding by about 10%. + (PR#1551) + +* Expand CRAM API to cope with new samtools cram_size command. + (PR#1546) + +* Merges neighbouring I and D ops into one op within pileup. This means + 4M1D1D1D3M is reported as 4M3D3M. Fixing this in sam.c means not only is + samtools mpileup now looking better, but any tool using the mpileup API will + be getting consistent results. + (PR#1552, fixes the last remaining part of samtools#139) + +* Update the API documentation for bgzf_mt as it refered to a previous + iteration. + (PR#1556, fixes #1553. Reported by Raghavendra Padmanabhan) + + +Build changes +------------- + +* Use POSIX grep in testing as egrep and fgrep are considered obsolete. + (PR#1509, thanks to David Seifert) + +* Switch to building libdefalte with cmake for Cirris CI. + (PR#1511) + +* Ensure strings in config_vars.h are escaped correctly. + (PR#1530, fixes #1527. Reported by Lucas Czech) + +* Easier modification of shared library permissions during install. + (PR#1532, fixes #1525. Reported by StephDC) + +* Fix build on ancient compilers. Added -std=gnu90 to build tests so older + C compilers will still be happy. + (PR#1524, fixes #1523. Reported by Martin Jakt) + +* Switch MacOS CI tests to an ARM-based image. + (PR#1536) + +* Cut down the number of embed_ref=2 tests that get run. + (PR#1537) + +* Add symbol versions to libhts.so. This is to aid package developers. + (PR#1560 addresses #1505, thanks to John Marshall. Reported by Stefan Bruens) + +* htscodecs now updated to v1.4.0. + (PR#1563) + +* Cleaned up misleading system error reports in test_bgzf. + (PR#1565) + +Bug fixes +--------- + +* VCF. Fix n-squared complexity in sample line with many adjacent tabs [fuzz]. + (PR#1503) + +* Improved bcftools detection and reporting of bgzf decode errors. + (PR#1504, thanks to Lilian Janin. PR#1529 thanks to Bergur Ragnarsson, fixes + #1528. PR#1554) + +* Prevent crash when the only FASTA entry has no sequence [fuzz]. + (PR#1507) + +* Fixed typo in sam.h documentation. + (PR#1512, thanks to kojix2) + +* Fix buffer read-overrun in bam_plp_insertion_mod. + (PR#1520) + +* Fix hash keys being left behind by bcf_hdr_remove. + (PR#1535, fixes #1533. Reported by Giulio Genovese in #842) + +* Make bcf_hdr_idinfo_exists more robust by checking id value exists. + (PR#1544, fixes #1538. Reported by Giulio Genovese) + +* CRAM improvements. Fixed crash with multi-threaded CRAM. Fixed a bug in the + codec parameter learning for CRAM 3.1 name tokeniser. Fixed Cram compression + container substitution matrix generation, + (PR#1558, PR#1559 and PR#1562) + +Noteworthy changes in release 1.16 (18th August 2022) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* Make hfile_s3 refresh AWS credentials on expiry in order to make HTSlib work + better with AWS IAM credentials, which have a limited lifespan. + (PR#1462 and PR#1474, addresses #344) + +* Allow BAM headers between 2GB and 4GB in size once more. This is not + permitted in the BAM specification but was allowed in an earlier version of + HTSlib. There is now a warning at 2GB and a hard failure at 4GB. + (PR#1421, fixes #1420 and samtools#1613. Reported by John Marshall and + R C Mueller) + +* Improve error message when failing to load an index. + (PR#1468, example of the problem samtools#1637) + +* Permit MM (base modification) tags containing "." and "?" suffixes. These + define implicit vs explicit coordinates. See the SAM tags specification for + details. + (PR#1423 and PR#1426, fixes #1418. PR#1469, fixes #1466. Reported + by cjw85) + +* Warn if spaces instead of tabs are detected in a VCF file to prevent + confusion. + (PR#1328, fixes bcftools#1575. Reported by ketkijoshi278) + +* Add an "sclen" filter expression keyword. This is the length of a soft-clip, + both left and right end. It may be combined with qlen (qlen-sclen) to obtain + the number of bases in the query sequence that have been aligned to the genome + ie it provides a way to compare local-alignment vs global-alignment length. + (PR#1441 and PR/samtools#1661, fixes #1436. Requested by Chang Y) + +* Improve error messages for CRAM reference mismatches. If the user specifies + the wrong reference, the CRAM slice header MD5sum checks fail. We now report + the SQ line M5 string too so it is possible to validate against the whole + chr in the ref.fa file. The error message has also been improved to report + the reference name instead of #num. Finally, we now hint at the likely cause, + which counters the misleading samtools supplied error of "truncated or + corrupt" file. + (PR#1427, fixes samtools#1640. Reported by Jian-Guo Zhou) + +* Expose more of the CRAM API and add new functionality to extract the reference + from a CRAM file. + (PR#1429 and PR#1442) + +* Improvements to the implementation of embedded references in CRAM where no + external reference is specified. + (PR#1449, addresses some of the issues in #1445) + +* The CRAM writer now allows alignment records with RG:Z: aux tags that + don't have a corresponding @RG ID in the file header. Previously these + tags would have been silently dropped. HTSlib will complain whenever it + has to add one though, as such tags do not conform to recommended practice + for the SAM, BAM and CRAM formats. + (PR#1480, fixes #1479. Reported by Alex Leonard) + +* Set tab delimiter in man page for tabix GFF3 sort. + (PR#1457. Thanks to Colin Diesh) + +* When using libdeflate, the 1...9 scale of BGZF compression levels is + now remapped to the 1...12 range used by libdeflate instead of being + passed directly. In particular, HTSlib levels 8 and 9 now map to + libdeflate levels 10 and 12, so it is possible to select the highest (but + slowest) compression offered by libdeflate. + (PR#1488, fixes #1477. Reported by Gert Hulselmans) + +* The VCF variant API has been extended so that it can return separate flags + for INS and DEL variants as well as the existing INDEL one. These flags + have not been added to the old bcf_get_variant_types() interface as + it could break existing users. To access them, it is necessary to use new + functions bcf_has_variant_type() and bcf_has_variant_types(). + (PR#1467) + +* The missing, but trivial, `le_to_u8()` function has been added to hts_endian. + (PR#1494, Thanks to John Marshall) + +* bcf_format_gt() now works properly on big-endian platforms. + (PR#1495, Thanks to John Marshall) + +Build changes +------------- + +These are compiler, configuration and makefile based changes. + +* Update htscodecs to version 1.3.0 for new SIMD code + various fixes. + Updates the htscodecs submodule and adds changes necessary to make HTSlib + build the new SIMD codec implementations. + (PR#1438, PR#1489, PR#1500) + +* Fix clang builds under mingw. Under mingw, clang requires dllexport to be + applied to both function declarations and function definitions. + (PR#1435, PR#1497, PR#1498 fixes #1433. Reported by teepean) + +* Fix curl type warning with gcc 12.1 on Windows. + (PR#1443) + +* Detect ARM Neon support and only build appropriate SIMD object files. + (PR#1451, fixes #1450. Thanks to John Marshall) + +* `make print-config` now reports extra CFLAGS that are needed to build the + SIMD parts of htscodecs. These may be of use to third-party build + systems that don't use HTSlib's or htscodecs' build infrastructure. (PR#1485. + Thanks to John Marshall) + +* Fixed some Makefile dependency issues for the "check"/"test" targets + and plugins. In particular, "make check" will now build the "all" target, + if not done already, before running the tests. + (PR#1496) + +Bug fixes +--------- + +* Fix bug when reading position -1 in BCF (0 in VCF), which is used to indicate + telomeric regions. The BCF reader was incorrectly assuming the value stored + in the file was unsigned, so a VCF->BCF->VCF round-trip would change it + from 0 to 4294967296. + (PR#1476, fixes #1475 and bcftools#1753. Reported by Rodrigo Martin) + +* Various bugs and quirks have been fixed in the filter expression engine, + mostly related to the handling of absent tags, and the is_true flag. + Note that as a result of these fixes, some filter expressions may give + different results: + - Fixed and-expressions including aux tag values which could give an invalid + true result depending on the order of terms. + - The expression `![NM]` is now true if only `NM` does not exist. In + earlier versions it would also report true for tags like `NM:i:0` which + exist but have a value of zero. + - The expression `[X1] != 0` is now false when `X1` does not exist. Earlier + versions would return true for this comparison when the tag was missing. + - NULL values due to missing tags now propagate through string, bitwise + and mathematical operations. Logical operations always treat them as + false. + (PR#1463, fixes samtools#1670. Reported by Gert Hulselmans; + PR#1478, fixes samtools#1677. Reported by johnsonzcode) + +* Fix buffer overrun in bam_plp_insertion_mod. Memory now grows to the proper + size needed for base modification data. + (PR#1430, fixes samtools#1652. Reported by hd2326) + +* Remove limit of returned size from fai_retrieve(). + (PR#1446, fixes samtools#1660. Reported by Shane McCarthy) + +* Cap hts_getline() return value at INT_MAX. Prevents hts_getline() from + returning a negative number (a fail) for very long string length values. + (PR#1448. Thanks to John Marshall) + +* Fix breakend detection and test bcf_set_variant_type(). + (PR#1456, fixes #1455. Thanks to Martin Pollard) + +* Prevent arrays of BCF_BT_NULL values found in BCF files from causing + bcf_fmt_array() to call exit() as the type is unsupported. These are + now tested for and caught by bcf_record_check(), which returns an + error code instead. (PR#1486) + +* Improved detection of fasta and fastq files that have very long comments + following identifiers. (PR#1491, thanks to John Marshall. + Fixes samtools/samtools#1689, reported by cjw85) + +* Fixed a SEGV triggered by giving a SAM file to `samtools import`. + (PR#1492) + +Noteworthy changes in release 1.15.1 (7th April 2022) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* Security fix: Fixed broken error reporting in the sam_prob_realn() + function, due to a missing hts_log() parameter. Prior to this fix + (i.e., in HTSlib versions 1.8 to 1.15) it was possible to abuse + the log message format string by passing a specially crafted + alignment record to this function. (PR#1406) + +* HTSlib now uses libhtscodecs release 1.2.2. This fixes a number + of bugs where invalid compressed data could trigger usage of + uninitialised values. (PR#1416) + +* Fixed excessive memory used by multi-threaded SAM output on + long reads. (Part of PR#1384) + +* Fixed a bug where tabix would misinterpret region specifiers + starting at position 0. It will also now warn if the file + being indexed is supposed to be 1-based but has positions + less than or equal to 0. (PR#1411) + +* The VCF header parser will now issue a warning if it finds an + INFO header with Type=Flag but Number not equal to 0. It will + also ignore the incorrect Number so the flag can be used. (PR#1415) + +Noteworthy changes in release 1.15 (21st February 2022) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Features and Updates +-------------------- + +* Bgzip now has a --keep option to not remove the input file after + compressing. (PR#1331) + +* Improved file format detection so some BED files are no longer + detected as FASTQ or FASTA. (PR#1350, thanks to John Marshall) + +* Added xz (lzma), zstd and D4 formats to the file type detection + functions. We don't actively support reading these data types, but + function calls and htsfile can detect them. (PR#1340, thanks to + John Marshall) + +* CRAM now also uses libdeflate for read-names if the libdeflate + version is new enough (1.9 onwards). Previously we used zlib for + this due to poor performance of libdeflate. This gives a slight + speed up and reduction in file size. (PR#1383) + +* The VCF and BCF readers will now issue a warning if contig, INFO + or FORMAT IDs do not match the formats described in the VCFv4.3 + specification. Note that while the invalid names will mostly still + be accepted, future updates will convert the warnings to errors + causing files including invalid names to be rejected. (PR#1389) + +Build changes +------------- + +These are compiler, configuration and makefile based changes. + +* HTSlib now uses libhtscodecs release 1.2.1. + +* Improved support for compiling and linking against HTSlib with + Microsoft Visual Studio. (PR#1380, #1377, #1375. Thanks to + Aidan Bickford and John Marshall) + +* Various internal CI improvements. + +Bug fixes +--------- + +* Fixed CRAM index queries for HTSJDK output (PR#1388, reported by + Chris Norman). Note this also fixes writing CRAM writing, to match + the specification (and HTSJDK), from version 3.1 onwards. + +* Fixed CRAM index queries when required-fields settings are selected + to ignore CIGARs (PR#1372, reported by Giulio Genovese). + +* Unmapped but placed (having chr/pos) are now included in the BAM + indices. (PR#1352, thanks to John Marshall) + +* CRAM now honours the filename##idx##index nomenclature for + specifying non-standard index locations. (PR#1360, reported by + Michael Cariaso) + +* Minor CRAM v1.0 read-group fix (PR#1349, thanks to John Marshall) + +* Permit .fa and .fq file type detection as synonyms for FASTA and + FASTQ. (PR#1386). + +* Empty VCF format fields are now output ":.:" as instead of "::". + (PR#1370) + +* Repeated bcf_sr_seek calls now work. (PR#1363, reported by + Giulio Genovese) + +* Bcf_remove_allele_set now works on unpacked BCF records. (PR#1358, + reported by Brent Pedersen). + +* The hts_parse_decimal() function used to read numbers in region lists + is now better at rejecting non-numeric values. In particular it + now rejects a lone 'G' instead of interpreting it as '0G', i.e. zero. + (PR#1396, PR#1400, reported by SSSimon Yang; thanks to John Marshall). + +* Improve support for GPU issues listed by -Wdouble-promotion. + (PR#1365, reported by David Seisert) + +* Fix example code in header file documentation. (PR#1381, Thanks to + Aidan Bickford) + +Noteworthy changes in release 1.14 (22nd October 2021) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Features and Updates +-------------------- + +* Added a keep option to bgzip to leave the original file untouched. This + brings bgzip into line with gzip. (PR #1331, thanks to Alex Petty) + +* "endpos" has been added to the filter language, giving the position + of the rightmost mapped base as measured by the CIGAR string. For + unmapped reads it is the same as "pos". (PR #1307, thanks to John Marshall) + +* Interfaces have been added to interpret the new base modification tags + added to the SAMtags document in samtools/hts-specs#418. (PR #1132) + +* New API functions hts_flush()/sam_flush()/bcf_flush() for flushing output + htsFile/samFile/vcfFile streams. (PR #1326, thanks to John Marshall) + +* The synced_bcf_reader now sorts lines with symbolic alleles by END tag as + well as POS. (PR #1321) + +* Added synced_bcf_reader options BCF_SR_REGIONS_OVERLAP and + BCF_SR_TARGETS_OVERLAP for better control of records that start outside + the desired region but overlap it are handled. Fixes samtools/bcftools#1420 + and samtools/bcftools#1421 raised by John Marshall. (PR #1327) + +* HTSlib will now accept long-cigar CG:B: tags made by htsjdk which don't + quite follow the specification properly (using signed values instead of + unsigned). Thanks to Colin Diesh for reporting an example file. (PR #1317) + +* The warning printed when the BGZF reader finds a file with no EOF block + has been changed to be less alarming. Unfortunately some third-party + BGZF encoders don't write EOF blocks at the end of files. Thanks to + Keiran Raine for reporting an example file. (PR #1323) + +* The FASTA and FASTQ readers get an option to skip over the first item on + the header line, and use the second as the read name. It allows the original + name to be restored on some of the fastq files served from the European + Nucleotide Archive (ENA). (PR #1325) + +* HTSlib is now more strict when parsing the VCF samples line (beginning + #CHROM). It will only accept tabs between the mandatory field names and + sample names must be separated with tabs. (PR #1328) + +* HTSlib will now warn if it looks like the header has been corrupted + by diagnostic messages from the program that made it. This can happen when + using `nohup`, which by default mixes stdout and stderr into the same + stream. (PR#1339, thanks to John Marshall) + +* File format detection will now recognise signatures for XZ, Zstd and D4 + files (note that HTSlib will not read them yet). (PR #1340, thanks to + John Marshall) + +Build changes +------------- + +These are compiler, configuration and makefile based changes. + +* Some redundant tests have been removed from the test harness, speeding it up. + (PR #1308) + +* The version.sh script now works better on shallow checkouts. (PR #1324) + +* A check-untracked Makefile target has been added to catch untracked files + (mostly) left by the test harness. (PR #1324) + +Bug fixes +--------- + +* Fixed a case where flushing the thread pool could very occasionally cause + a deadlock. (PR #1309) + +* Fixed a bug where some CRAM files could fail to decode if the required_fields + option was in use. Thanks to Matt Sexton for reporting the issue. + (PR #1314, fixes samtools/samtools#1475) + +* Fixed a regression where the S3 plugin could not read public files unless + you supplied some Amazon credentials. Thanks to Chris Saunders for reporting. + (PR #1332, fixes samtools/samtools#1491) + +* Fixed a possible CRAM thread deadlock discovered by @ryancaicse. + (PR #1330, fixes #1329) + +* Some set-but-unused variables have been removed. (PR #1334) + +* Fixed a bug which prevented "flag.read2" from working in the filter + language unless it was at the end of the expression. Thanks to Vamsi Kodali + for reporting the issue. (PR #1342) + +* Fixed a memory leak that could happen if CRAM fails to inflate a LZMA + block. (PR #1340, thanks to John Marshall) + +Noteworthy changes in release 1.13 (7th July 2021) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Features and Updates +-------------------- + +* In case a PG header line has multiple ID tags supplied by other applications, + the header API now selects the first one encountered as the identifying tag + and issues a warning when detecting subsequent ID tags. + (#1256; fixed samtools/samtools#1393) + +* VCF header reading function (vcf_hdr_read) no longer tries to download a + remote index file by default. + (#1266; fixes #380) + +* Support reading and writing FASTQ format in the same way as SAM, BAM or CRAM. + Records read from a FASTQ file will be treated as unmapped data. + (#1156) + +* Added GCP requester pays bucket access. Thanks to @indraniel. + (#1255) + +* Made mpileup's overlap removal choose which copy to remove at random instead + of always removing the second one. This avoids strand bias in experiments + where the +ve and -ve strand reads always appear in the same order. + (#1273; fixes samtools/bcftools#1459) + +* It is now possible to use platform specific BAQ parameters. This also + selects long-read parameters for read lengths bigger than 1kb, which helps + bcftools mpileup call SNPs on PacBio CCS reads. + (#1275) + +* Improved bcf_remove_allele_set. This fixes a bug that stopped iteration over + alleles prematurely, marks removed alleles as 'missing' and does automatic + lazy unpacking. + (#1288; fixes #1259) + +* Improved compression metrics for unsorted CRAM files. This improves the + choice of codecs when handling unsorted data. + (#1291) + +* Linear index entries for empty intervals are now initialised with the file + offset in the next non-empty interval instead of the previous one. This + may reduce the amount of data iterators have to discard before reaching + the desired region, when the starting location is in a sequence gap. + Thanks to @carsonh for reporting the issue. + (#1286; fixes #486) + +* A new hts_bin_level API function has been added, to compute the level of a + given bin in the binning index. + (#1286) + +* Related to the above, a new API method, hts_idx_nseq, now returns the total + number of contigs from an index. + (#1295 and #1299) + +* Added bracket handling to bcf_hdr_parse_line, for use with ##META lines. + Thanks to Alberto Casas Ortiz. + (#1240) + +Build changes +------------- + +These are compiler, configuration and makefile based changes. + +* HTSlib now uses libhtscodecs release 1.1.1. + +* Added a curl/curl.h check to configure and improved INSTALL documentation on + build options. Thanks to Melanie Kirsche and John Marshall. + (#1265; fixes #1261) + +* Some fixes to address GCC 11.1 warnings. + (#1280, #1284, #1285; fixes #1283) + +* Supports building HTSlib in a separate directory. Thanks to John Marshall. + (#1277; fixes #231) + +* Supports building HTSlib on MinGW 32-bit environments. Thanks to + John Marshall. + (#1301) + +Bug fixes +--------- + +* Fixed hts_itr_query() et al region queries: fixed bug introduced in + HTSlib 1.12, which led to iterators producing very few reads for some + queries (especially for larger target regions) when unmapped reads were + present. HTSlib 1.11 had a related problem in which iterators would omit + a few unmapped reads that should have been produced; cf #1142. + Thanks to Daniel Cooke for reporting the issue. + (#1281; fixes #1279) + +* Removed compressBound assertions on opening bgzf files. Thanks to + Gurt Hulselmans for reporting the issue. + (#1258; fixed #1257) + +* Duplicate sample name error message for a VCF file now only displays the + duplicated name rather the entire same name list. + (#1262; fixes samtools/bcftools#1451) + +* Fix to make samtools cat work on CRAMs again. + (#1276; fixes samtools/samtools#1420) + +* Fix for a double memory free in SAM header creation. Thanks to @ihsineme. + (#1274) + +* Prevent assert in bcf_sr_set_regions. Thanks to Dr K D Murray. + (#1270) + +* Fixed crash in knet_open() etc stubs. Thanks to John Marshall. + (#1289) + +* Fixed filter expression "cigar" on unmapped reads. Stop treating an empty + CIGAR string as an error. Thanks to Chang Y for reporting the issue. + (#1298, fixes samtools/samtools#1445) + +* Bug fixes in the bundled copy of htscodecs: + + - Fixed an uninitialized access in the name tokeniser decoder. + (samtools/htscodecs#23) + + - Fixed a bug with name tokeniser and variable number of names per slice, + causing it to incorrectly report an error on certain valid inputs. + (samtools/htscodecs#24) + + +Noteworthy changes in release 1.12 (17th March 2021) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Features and Updates +-------------------- + +* Added experimental CRAM 3.1 and 4.0 support. (#929) + + These should not be used for long term data storage as the + specification still needs to be ratified by GA4GH and may be subject + to changes in format. (This is highly likely for 4.0). However it + may be tested using: + + test/test_view -t ref.fa -C -o version=3.1 in.bam -p out31.cram + + For smaller but slower files, try varying the compression profile + with an additional "-o small". Profile choices are fast, normal, + small and archive, and can be applied to all CRAM versions. + +* Added a general filtering syntax for alignment records in SAM/BAM/CRAM + readers. (#1181, #1203) + + An example to find chromosome spanning read-pairs with high mapping + quality: 'mqual >= 30 && mrname != rname' + + To find significant sized deletions: + 'cigar =~ "[0-9]{2}D"' or 'rlen - qlen > 10'. + + To report duplicates that aren't part of a "proper pair": + 'flag.dup && !flag.proper_pair' + + More details are in the samtools.1 man page under "FILTER EXPRESSIONS". + +* The knet networking code has been removed. It only supported the http + and ftp protocols, and a better and safer alternative using libcurl + has been available since release 1.3. If you need access to ftp:// and + http:// URLs, HTSlib should be built with libcurl support. (#1200) + +* The old htslib/knetfile.h interfaces have been marked as deprecated. Any + code still using them should be updated to use hFILE instead. (#1200) + +* Added an introspection API for checking some of the capabilities provided + by HTSlib. (#1170) Thanks also to John Marshall for contributions. (#1222) + - `hfile_list_schemes`: returns the number of schemes found + - `hfile_list_plugins`: returns the number of plugins found + - `hfile_has_plugin`: checks if a specific plugin is available + - `hts_features`: returns a bit mask with all available features + - `hts_test_feature`: test if a feature is available + - `hts_feature_string`: return a string summary of enabled features + +* Made performance improvements to `probaln_glocal` method, which + speeds up mpileup BAQ calculations. (#1188) + - Caching of reused loop variables and removal of loop invariants + - Code reordering to remove instruction latency. + - Other refactoring and tidyups. + +* Added a public method for constructing a BAM record from the + component pieces. Thanks to Anders Kaplan. (#1159, #1164) + +* Added two public methods, `sam_parse_cigar` and `bam_parse_cigar`, as part of + a small CIGAR API (#1169, #1182). Thanks to Daniel Cameron for input. (#1147) + +* HTSlib, and the included htsfile program, will now recognise the old + RAZF compressed file format. Note that while the format is detected, + HTSlib is unable to read it. It is recommended that RAZF files are + uncompressed with `gunzip` before using them with HTSlib. Thanks to + John Marshall (#1244); and Matthew J. Oldach who reported problems + with uncompressing some RAZF files (samtools/samtools#1387). + +* The S3 plugin now has options to force the address style. It will recognise + the addressing_style and host_bucket entries in the respective aws + .credentials and s3cmd .s3cfg files. There is also a new HTS_S3_ADDRESS_STYLE + environment variable. Details are in the htslib-s3-plugin.7 man file (#1249). + +Build changes +------------- + +These are compiler, configuration and makefile based changes. + +* Added new Makefile targets for the applications that embed HTSlib and + want to run its test suite or clean its generated artefacts. (#1230, #1238) + +* The CRAM codecs are now obtained via the htscodecs submodule, hence + when cloning it is now best to use "git clone --recursive". In an + existing clone, you may use "git submodule update --init" to obtain + the htscodecs submodule checkout. + +* Updated CI test configuration to recurse HTSlib submodules. (#1359) + +* Added Cirrus-CI integration as a replacement for Travis, which was + phased out. (#1175; #1212) + +* Updated the Windows image used by Appveyor to 'Visual Studio 2019'. (#1172; + fixed #1166) + +* Fixed a buglet in configure.ac, exposed by the release 2.70 of autoconf. + Thanks to John Marshall. (#1198) + +* Fixed plugin linking on macOS, to prevent symbol conflict when linking + with a static HTSlib. Thanks to John Marshall. (#1184) + +* Fixed a clang++9 error in `cram_io.h`. Thanks to Pjotr Prins. (#1190) + +* Introduced $(ALL_CPPFLAGS) to allow for more flexibility in setting the + compiler flags. Thanks to John Marshall. (#1187) + +* Added 'fall through' comments to prevent warnings issued by Clang on + intentional fall through case statements, when building with + `-Wextra flag`. Thanks to John Marshall. (#1163) + +* Non-configure builds now define _XOPEN_SOURCE=600 to allow them to work + when the `gcc -std=c99` option is used. Thanks to John Marshall. (#1246) + +Bug fixes +--------- + +* Fixed VCF `#CHROM` header parsing to only separate columns at tab characters. + Thanks to Sam Morris for reporting the issue. + (#1237; fixed samtools/bcftools#1408) + +* Fixed a crash reported in `bcf_sr_sort_set`, which expects REF to be present. + (#1204; fixed samtools/bcftools#1361) + +* Fixed a bcf synced reader bug when filtering with a region list, and + the first record for a chromosome had the same position as the last + record for the previous chromosome. (#1254; fixed samtools/bcftools#1441) + +* Fixed a bug in the overlapping logic of mpileup, dealing with iterating over + CIGAR segments. Thanks to `@wulj2` for the analysis. (#1202; fixed #1196) + +* Fixed a tabix bug that prevented setting the correct number of lines to be + skipped in a region file. Thanks to Jim Robinson for reporting it. (#1189; + fixed #1186) + +* Made `bam_itr_next` an alias for `sam_itr_next`, to prevent it from crashing + when working with htsFile pointers. Thanks to Torbjörn Klatt for + reporting it. (#1180; fixed #1179) + +* Fixed once per outgoing multi-threaded block `bgzf_idx_flush` assertion, to + accommodate situations when a single record could span multiple blocks. + Thanks to `@lacek`. (#1168; fixed samtools/samtools#1328) + +* Fixed assumption of pthread_t being a non-structure, as permitted by POSIX. + Thanks also to John Marshall and Anders Kaplan. (#1167, #1153, #1153) + +* Fixed the minimum offset of a BAI index bin, to account for unmapped reads. + Thanks to John Marshall for spotting the issue. (#1158; fixed #1142) + +* Fixed the CRLF handling in `sam_parse_worker` method. Thanks to + Anders Kaplan. (#1149; fixed #1148) + +* Included unistd.h and errno.h directly in HTSlib files, as opposed to + including them indirectly, via third party code. Thanks to + Andrew Patterson (#1143) and John Marshall (#1145). + + +Noteworthy changes in release 1.11 (22nd September 2020) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Features and Updates +-------------------- + +* Support added for remote reference files. fai_path() can take a remote + reference file and will return the corresponding index file. Remote indexes + can be handled by refs_load_fai(). UR tags in @SQ lines can now be set to + remote URIs. (#1017) + +* Added tabix --separate-regions option, which adds header comment lines + separating different regions' output records when multiple target regions + are supplied on the command line. (#1108) + +* Added tabix --cache option to set a BGZF block cache size. Most beneficial + when the -R option is used and the same blocks need to be re-read multiple + times. (#1053) + +* Improved error checking in tabix and added a --verbosity option so + it is possible to change the amount of logging when it runs. (#1040) + +* A note about the maximum chromosome length usable with TBI indexes has been + added to the tabix manual page. Thanks to John Marshall. (#1070) + +* New method vcf_open_mode() changes the opening mode of a variant file + based on its file extension. Similar to sam_open_mode(). (#1096) + +* The VCF parser has been made faster and easier to maintain. (#1057) + +* bcf_record_check() has been made faster, giving a 15% speed increase when + reading an uncompressed BCF file. (#1130) + +* The VCF parser now recognises the "" symbolic allele produced + by GATK. (#1045) + +* Support has been added for simultaneous reading of unindexed VCF/BCF files + when using the synced_bcf_reader interface. Input files must have the + chromosomes in the same order as each other and be consistent with the order + of sequences in the header. (#1089) + +* The VCF and BCF readers will now attempt to fix up invalid INFO/END tags + where the stored END value is less than POS, resulting in an apparently + negative record length. Such files have been generated by programs which + used END incorrectly, and by broken lift-over processes that failed to + update any END tags present. (#1021; fixed samtools/bcftools#1154) + +* The htsFile interface can now detect the crypt4gh encrypted format (see + https://samtools.github.io/hts-specs/crypt4gh.pdf). If HTSlib is + built with external plug-in support, and the hfile_crypt4gh plug-in is + present, the file will be passed to it for decryption. The plug-in + can be obtained from https://github.com/samtools/htslib-crypt4gh. (#1046) + +* hts_srand48() now seeds the same POSIX-standard sequences of pseudo-random + numbers regardless of platform, including on OpenBSD where plain srand48() + produces a different cryptographically-strong non-deterministic sequence. + Thanks to John Marshall. (#1002) + +* Iterators now work with 64 bit positions. (#1018) + +* Improved the speed of range queries when using BAI indexes by + making better use of the linear index data included in the file. + The best improvement is on low-coverage data. (#1031) + +* Alignments which consume no reference bases are now considered to have + length 1. This would make such alignments cover 1 reference position in + the same manner as alignments that are unmapped or have no CIGAR strings. + These alignments can now be returned by iterator-based queries. Thanks + to John Marshall. (#1063; fixed samtools/samtools#1240, see also + samtools/hts-specs#521). + +* A bam_set_seqi() function to modify a single base in the BAM structure + has been added. This is a companion function to bam_seqi(). (#1022) + +* Writing SAM format is around 30% faster. (#1035) + +* Added sam_format_aux1() which converts a BAM aux tag to a SAM format string. + (#1134) + +* bam_aux_update_str() no longer requires NUL-terminated strings. It + is also now possible to create tags containing part of a longer string. + (#1088) + +* It is now possible to use external plug-ins in language bindings that + dynamically load HTSlib. Note that a side-effect of this change is that + some plug-ins now link against libhts.so, which means that they have to be + able to find the shared library when they are started up. Thanks to + John Marshall. (#1072) + +* bgzf_close(), and therefore hts_close(), will now return non-zero when + closing a BGZF handle on which errors have been detected. (Part of #1117) + +* Added a special case to the kt_fisher_exact() test for when the table + probability is too small to be represented in a double. This fixes a + bug where it would, for some inputs, fail to correctly determine which + side of the distribution the table was on resulting in swapped p-values + being returned for the left- and right-tailed tests. The two-tailed + test value was not affected by this problem. (#1126) + +* Improved error diagnostics in the CRAM decoder (#1042), BGZF (#1049), + the VCF and BCF readers (#1059), and the SAM parser (#1073). + +* ks_resize() now allocates 1.5 times the requested size when it needs + to expand a kstring instead of rounding up to the next power of two. + This has been done mainly to make the inlined function smaller, but it + also reduces the overhead of storing data in kstrings at the expense of + possibly needing a few more reallocations. (#1129) + +CRAM improvements +----------------- + +* Delay CRAM crc32 checks until the data actually needs to be used. With + other changes this leads to a 20x speed up in indexing and other sub-query + based actions. (#988) + +* CRAM now handles the transition from mapped to unmapped data in a better + way, improving compression of the unmapped data. (#961) + +* CRAM can now use libdeflate. (#961) + +* Fixed bug in MD tag generation with "b" read feature codes, causing the + numbers in the tag to be too large. Note that HTSlib never uses this + feature code so it is unlikely that this bug would be seen on real data. + The problem was found when testing against hand-crafted CRAM files. (#1086) + +* Fixed a regression where the CRAM multi-region iterator became much less + efficient when using threads. It now works more like the single iterator + and does not preemptively decode the next container unless it will be used. + (#1061) + +* Set CRAM default quality in lossy quality modes. If lossy quality is enabled + and 'B', 'q' or 'Q' features are used, CRAM starts off with QUAL being all 255 + (as per BAM spec and "*" quality) and then modifies individual qualities as + dictated by the specific features. + + However that then produces ASCII quality " " (space, q=-1) for the unmodified + bases. Instead ASCII quality "?" (q=30) is used, as per HTSJDK. Quality 255 + is still used for sequences with no modifications at all. (#1094) + + +Build changes +------------- + +These are compiler, configuration and makefile based changes. + +* `make all` now also builds htslib_static.mk and htslib-uninstalled.pc. + Thanks to John Marshall. (#1011) + +* Various cppcheck-1.90 warnings have been fixed. (#995, #1011) + +* HTSlib now prefers its own headers when being compiled, fixing build + failures on machines that already had a system-installed HTSlib. Thanks to + John Marshall. (#1078; fixed #347) + +* Define HTSLIB_EXPORT without using a helper macro to reduce the length of + compiler diagnostics that mention exported functions. Thanks to + John Marshall. (#1029) + +* Fix dirty default build by including latest pkg.m4 instead of using + aclocal.m4. Thanks to Damien Zammit. (#1091) + +* Struct tags have been added to htslib/*.h public typedefs. This makes it + possible to forward declare htsFile without including htslib/hts.h. Thanks + to Lucas Czech and John Marshall. (#1115; fixed #1106) + +* Fixed compiler warnings emitted by the latest gcc and clang releases + when compiling HTSlib, along with some -Wextra warnings in the public + include files. Thanks to John Marshall. (#1066, #1063, #1083) + +Bug fixes +--------- + +* Fixed hfile_libcurl breakage when using libcurl 7.69.1 or later. Thanks to + John Marshall for tracking down the exact libcurl change that caused the + incompatibility. (#1105; fixed samtools/samtools#1254 and + samtools/samtools#1284) + +* Fixed overflows kroundup32() and kroundup_size_t() which caused them to + return zero when rounding up values where the most significant bit was + set. When this happens they now return the highest value that can + be stored (#1044). All of the kroundup macro definitions have also been + gathered together into a unified implementation (#1051). + +* Fixed missing return parameter value in idx_test_and_fetch(). Thanks to + Lilian Janin. (#1014) + +* Fixed crashes due to inconsistent selection between BGZF and plain (hFILE) + interfaces when reading files. [fuzz] (#1019) + +* Added and/or fixed byte swapping code for big-endian platforms. Thanks + to Jun Aruga, John Marshall, Michael R Crusoe and Gianfranco Costamagna + for their help. (#1023; fixed #119 and #355) + +* Fixed a problem with multi-threaded on-the-fly indexes which would + occasionally write virtual offsets pointing at the end of a BGZF block. + Attempting to read from such an offset caused EOF to be incorrectly + reported. These offsets are now handled correctly, and the indexer + has been updated to avoid generating them. (#1028; fixed + samtools/samtools#1197) + +* In sam_hdr_create(), free newly allocated SN strings when encountering an + error. [fuzz] (#1034) + +* Prevent double free in case of idx_test_and_fetch() failure. Thanks to + @fanwayne for the bug report. (#1047; fixed #1033) + +* In the header, link a new PG line only to valid chains. Prevents an + explosive growth of PG lines on headers where PG lines are already present + but not linked together correctly. (#1062; fixed samtools/samtools#1235) + +* Also in the header, when calling sam_hdr_update_line(), update target arrays + only when the name or length is changed. (#1007) + +* Fixed buffer overflows in CRAM MD5 calculation triggered by + files with invalid compression headers, or files with embedded + references that were one byte too short. [fuzz] (#1024, #1068) + +* Fix mpileup regression between 1.9 and 1.10 where overlap detection + was incorrectly skipped on reads where RNEXT, PNEXT and TLEN were + set to the "unavailable" values ("*", 0, 0 in SAM). (#1097) + +* kputs() now checks for null pointer in source string. [fuzz] (#1087) + +* Fix potential bcf_update_alleles() crash on 0 alleles. Thanks to + John Marshall. (#994) + +* Added bcf_unpack() calls to some bcf_update functions to fix a bug + where updates made after a call to bcf_dup() could be lost. (#1032; + fixed #1030) + +* Error message typo "Number=R" instead of "Number=G" fixed in + bcf_remove_allele_set(). Thanks to Ilya Vorontsov. (#1100) + +* Fixed crashes that could occur in BCF files that use IDX= header annotations + to create a sparse set of CHROM, FILTER or FORMAT indexes, and + include records that use one of the missing index values. [fuzz] (#1092) + +* Fixed potential integer overflows in the VCF parser and ensured that + the total length of FORMAT fields cannot go over 2Gbytes. [fuzz] (#1044, + #1104; latter is CVE-2020-36403 affecting all HTSlib versions up to 1.10.2) + +* Download index files atomically in idx_test_and_fetch(). This prevents + corruption when running parallel jobs on S3 files. Thanks to John Marshall. + (#1112; samtools/samtools#1242). + +* The pileup constructor callback is now given the copy of the bam1_t struct + made by pileup instead of the original one passed to bam_plp_push(). This + makes it the same as the one passed to the destructor and ensures that + cached data, for example the location of an aux tag, will remain valid. + (#1127) + +* Fixed possible error in code_sort() on negative CRAM Huffman code + length. (#1008) + +* Fixed possible undefined shift in cram_byte_array_stop_decode_init(). (#1009) + +* Fixed a bug where range queries to the end of a given reference + would return incorrect results on CRAM files. (#1016; + fixed samtools/samtools#1173) + +* Fixed an integer overflow in cram_read_slice(). [fuzz] (#1026) + +* Fixed a memory leak on failure in cram_decode_slice(). [fuzz] (#1054) + +* Fixed a regression which caused cram_transcode_rg() to fail, resulting + in a crash in "samtools cat" on CRAM files. (#1093; + fixed samtools/samtools#1276) + +* Fixed an undersized string reallocation in the threaded SAM reader which + caused it to crash when reading SAM files with very long lines. Numerous + memory allocation checks have also been added. (#1117) + + +Noteworthy changes in release 1.10.2 (19th December 2019) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a release fix that corrects minor inconsistencies discovered in +previous deliverables. + + +Noteworthy changes in release 1.10.1 (17th December 2019) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The support for 64-bit coordinates in VCF brought problems for files +not conforming to VCF/BCF specification. While previous versions would +make out-of-range values silently overflow creating nonsense values +but parseable file, the version 1.10 would silently create an invalid BCF. + + +Noteworthy changes in release 1.10 (6th December 2019) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Brief summary +------------- + +There are many changes in this release, so the executive summary is: + +* Addition of support for references longer than 2Gb (NB: SAM and VCF + formats only, not their binary counterparts). This may need changes + in code using HTSlib. See README.large_positions.md for more information. + +* Added a SAM header API. + +* Major speed up to SAM reading and writing. This also now supports + multi-threading. + +* We can now auto-index on-the-fly while writing a file. This also + includes to bgzipped SAM.gz. + +* Overhaul of the S3 interface, which now supports version 4 + signatures. This also makes writing to S3 work. + +These also required some ABI changes. See below for full details. + + +Features / updates +------------------ + +* A new SAM/BAM/CRAM header API has been added to HTSlib, allowing header + data to be updated without having to parse or rewrite large parts of the + header text. See htslib/sam.h for function definitions and + documentation. (#812) + + The header typedef and several pre-existing functions have been renamed + to have a sam_hdr_ prefix: sam_hdr_t, sam_hdr_init(), sam_hdr_destroy(), + and sam_hdr_dup(). (The existing bam_hdr_-prefixed names are still + provided for compatibility with existing code.) (#887, thanks to + John Marshall) + +* Changes to hfile_s3, which provides support for the AWS S3 API. (#839) + + - hfile_s3 now uses version 4 signatures by default. Attempting to write to + an S3 bucket will also now work correctly. It is possible to force + version 2 signatures by creating environment variable HTS_S3_V2 (the exact + value does not matter, it just has to exist). Note that writing depends + on features that need version 4 signatures, so forcing version 2 will + disable writes. + + - hfile_s3 will automatically retry requests where the region endpoint + was not specified correctly, either by following the 301 redirect (when + using path-style requests) or reading the 400 response (when using + virtual-hosted style requests and version 4 signatures). The first + region to try can be set by using the AWS_DEFAULT_REGION environment + variable, by setting "region" in ".aws/credentials" or by setting + "bucket_location" in ".s3cfg". + + - hfile_s3 now percent-escapes the path component of s3:// URLs. For + backwards-compatibility it will ignore any paths that have already + been escaped (detected by looking for '%' followed by two hexadecimal + digits.) + + - New environment variables HTS_S3_V2, HTS_S3_HOST, HTS_S3_S3CFG + and HTS_S3_PART_SIZE to force version-2 signatures, control the + S3 server hostname, the configuration file and upload chunk + sizes respectively. + +* Numerous SAM format improvements. + + - Bgzipped SAM files can now be indexed and queried. The library now + recognises sam.gz as a format name to ease this usage. (#718, #916) + + - The SAM reader and writer now supports multi-threading via the + thread-pool. (#916) + + Note that the multi-threaded SAM reader does not currently support seek + operations. Trying to do this (for example with an iterator range request) + will result in the SAM readers dropping back to single-threaded mode. + + - Major speed up of SAM decoding and encoding, by around 2x. (#722) + + - SAM format can now handle 64-bit coordinates and references. This + has implications for the ABI too (see below). Note BAM and CRAM + currently cannot handle references longer than 2Gb, however given + the speed and threading improvements SAM.gz is a viable workaround. (#709) + +* We can now automatically build indices on-the-fly while writing + SAM, BAM, CRAM, VCF and BCF files. (Note for SAM and VCF this only + works when bgzipped.) (#718) + +* HTSlib now supports the @SQ-AN header field, which lists alternative names + for reference sequences. This means given "@SQ SN:1 AN:chr1", tools like + samtools can accept requests for "1" or "chr1" equivalently. (#931) + +* Zero-length files are no longer considered to be valid SAM files + (with no header and no alignments). This has been changed so that pipelines + such as `somecmd | samtools ...` with `somecmd` aborting before outputting + anything will now propagate the error to the second command. (#721, thanks + to John Marshall; #261 reported by Adrian Tan) + +* Added support for use of non-standard index names by pasting the + data filename and index filename with ##idx##. For example + "/path1/my_data.bam##idx##/path2/my_index.csi" will open bam file + "/path1/my_data.bam" and index file "/path2/my_index.csi". (#884) + + This affects hts_idx_load() and hts_open() functions. + +* Improved the region parsing code to handle colons in reference + names. Strings can be disambiguated by the use of braces, so for + example when reference sequences called "chr1" and "chr1:100-200" + are both present, the regions "{chr1}:100-200" and "{chr1:100-200}" + unambiguously indicate which reference is being used. (#708) + + A new function hts_parse_region() has been added along with + specialisations for sam_parse_region() and fai_parse_region(). + +* CRAM encoding now has additional checks for MD/NM validity. If + they are incorrect, it stores the (incorrect copy) verbatim so + round-trips "work". (#792) + +* Sped up decoding of CRAM by around 10% when the MD tag is being + generated. (#874) + +* CRAM REF_PATH now supports %Ns (where N is a single digit) + expansion in http URLs, similar to how it already supported this + for directories. (#791) + +* BGZF now permits indexing and seeking using virtual offsets in + completely uncompressed streams. (#904, thanks to Adam Novak) + +* bgzip now asks for extra confirmation before decompressing files + that don't have a known compression extension (e.g. .gz). This avoids + `bgzip -d foo.bam.bai` producing a foo.bam file that is very much not + a BAM-formatted file. (#927, thanks to John Marshall) + +* The htsfile utility can now copy files (including to/from URLs using + HTSlib's remote access facilities) with the --copy option, in + addition to its existing uses of identifying file formats and + displaying sequence or variant data. (#756, thanks to John Marshall) + +* Added tabix --min-shift option. (#752, thanks to Garrett Stevens) + +* Tabix now has an -D option to disable storing a local copy of a + remote index. (#870) + +* Improved support for MSYS Windows compiler environment. (#966) + +* External htslib plugins are now supported on Windows. (#966) + + +API additions and improvements +------------------------------ + +* New API functions bam_set_mempolicy() and bam_get_mempolicy() have + been added. These allow more control over the ownership of bam1_t + alignment record data; see documentation in htslib/sam.h for more + information. (#922) + +* Added more HTS_RESULT_USED checks, this time for VCF I/O. (#805) + +* khash can now hash kstrings. This makes it easier to hash + non-NUL-terminated strings. (#713) + +* New haddextension() filename extension API function. (#788, thanks to + John Marshall) + +* New hts_resize() macro, designed to replace uses of hts_expand() + and hts_expand0(). (#805) + +* Added way of cleaning up unused jobs in the thread pool via the new + hts_tpool_dispatch3() function. (#830) + +* New API functions hts_reglist_create() and sam_itr_regarray() are added + to create hts_reglist_t region lists from `chr:-` type region + specifiers. (#836) + +* Ksort has been improved to facilitate library use. See KSORT_INIT2 + (adds scope / namespace capabilities) and KSORT_INIT_STATIC interfaces. + (#851, thanks to John Marshall) + +* New kstring functions (#879): + KS_INITIALIZE - Initializer for structure assignment + ks_initialize() - Initializer for pointed-to kstrings + ks_expand() - Increase kstring capacity by a given amount + ks_clear() - Set kstring length to zero + ks_free() - Free the underlying buffer + ks_c_str() - Returns the kstring buffer as a const char *, + or an empty string if the length is zero. + +* New API functions hts_idx_load3(), sam_index_load3(), tbx_index_load3() + and bcf_index_load3() have been added. These allow control of whether + remote indexes should be cached locally, and allow the error message + printed when the index does not exist to be suppressed. (#870) + +* Improved hts_detect_format() so it no longer assumes all text is + SAM unless positively identified otherwise. It also makes a stab + at detecting bzip2 format and identifying BED, FASTA and FASTQ + files. (#721, thanks to John Marshall; #200, #719 both reported by + Torsten Seemann) + +* File format errors now set errno to EFTYPE (BSD, MacOS) when + available instead of ENOEXEC. (#721) + +* New API function bam_set_qname (#942) + +* In addition to the existing hts_version() function, which reflects the + HTSlib version being used at runtime, now also provides + HTS_VERSION, a preprocessor macro reflecting the HTSlib version that + a program is being compiled against. (#951, thanks to John Marshall; #794) + + +ABI changes +----------- + +This release contains a number of things which change the Application +Binary Interface (ABI). This means code compiled against an earlier +library will require recompiling. The shared library soversion has +been bumped. + +* On systems that support it, the default symbol visibility has been + changed to hidden and the only exported symbols are ones that form part + of the officially supported ABI. This is to make clear exactly which + symbols are considered parts of the library interface. It also + helps packagers who want to check compatibility between HTSlib versions. + (#946; see for example issues #311, #616, and #695) + +* HTSlib now supports 64 bit reference positions. This means several + structures, function parameters, and return values have been made bigger + to allow larger values to be stored. While most code that uses + HTSlib interfaces should still build after this change, some alterations + may be needed - notably to printf() formats where the values of structure + members are being printed. (#709) + + Due to file format limitations, large positions are only supported + when reading and writing SAM and VCF files. + + See README.large_positions.md for more information. + +* An extra field has been added to the kbitset_t struct so bitsets can + be made smaller (and later enlarged) without involving memory allocation. + (#710, thanks to John Marshall) + +* A new field has been added to the bam_pileup1_t structure to keep track + of which CIGAR operator is being processed. This is used by a new + bam_plp_insertion() function which can be used to return the sequence of + any inserted bases at a given pileup location. If the alignment includes + CIGAR P operators, the returned sequence will include pads. (#699) + +* The hts_itr_t and hts_itr_multi_t structures have been merged and can be + used interchangeably. Extra fields have been added to hts_itr_t to support + this. hts_itr_multi_t is now a typedef for hts_itr_t; sam_itr_multi_next() + is now an alias for sam_itr_next() and hts_itr_multi_destroy() is an alias + for hts_itr_destroy(). (#836) + +* An improved regidx interface has been added. To allow this, struct + reg_t has been removed, regitr_t has been modified and various new + API functions have been added to htslib/regidx.h. While parts of + the old regidx API have been retained for backwards compatibility, + it is recommended that all code using regidx should be changed to use + the new interface. (#761) + +* Elements in the hts_reglist_t structure have been reordered slightly + so that they pack together better. (#761) + +* bgzf_utell() and bgzf_useek() now use type off_t instead of long for + the offset. This allows them to work correctly on files longer than + 2G bytes on Windows and 32-bit Linux. (#868) + +* A number of functions that used to return void now return int so that + they can report problems like memory allocation failures. Callers + should take care to check the return values from these functions. (#834) + + The affected functions are: + ksort.h: ks_introsort(), ks_mergesort() + sam.h: bam_mplp_init_overlaps() + synced_bcf_reader.h: bcf_sr_regions_flush() + vcf.h: bcf_format_gt(), bcf_fmt_array(), + bcf_enc_int1(), bcf_enc_size(), + bcf_enc_vchar(), bcf_enc_vfloat(), bcf_enc_vint(), + bcf_hdr_set_version(), bcf_hrec_format() + vcfutils.h: bcf_remove_alleles() + +* bcf_set_variant_type() now outputs VCF_OVERLAP for spanning + deletions (ALT=*). (#726) + +* A new field (hrecs) has been added to the bam_hdr_t structure for + use by the new header API. The old sdict field is now not used and + marked as deprecated. The l_text field has been changed from uint32_t + to size_t, to allow for very large headers in SAM files. The text + and l_text fields have been left for backwards compatibility, but + should not be accessed directly in code that uses the new header API. + To access the header text, the new functions sam_hdr_length() and + sam_hdr_str() should be used instead. (#812) + +* The old cigar_tab field is now marked as deprecated; use the new + bam_cigar_table[] instead. (#891, thanks to John Marshall) + +* The bam1_core_t structure's l_qname and l_extranul fields have been + rearranged and enlarged; l_qname still includes the extra NULs. + (Almost all code should use bam_get_qname(), bam_get_cigar(), etc, + and has no need to use these fields directly.) HTSlib now supports + the SAM specification's full 254 QNAME length again. (#900, thanks + to John Marshall; #520) + +* bcf_index_load() no longer tries the '.tbi' suffix when looking for + BCF index files (.tbi indexes are for text files, not binary BCF). (#870) + +* htsFile has a new 'state' member to support SAM multi-threading. (#916) + +* A new field has been added to the bam1_t structure, and others + have been rearranged to remove structure holes. (#709; #922) + + +Bug fixes +--------- + +* Several BGZF format fixes: + + - Support for multi-member gzip files. (#744, thanks to Adam Novak; #742) + + - Fixed error handling code for native gzip formatted files. (64c4927) + + - CRCs checked when threading too (previously only when non-threaded). (#745) + + - Made bgzf_useek function work with threads. (#818) + + - Fixed rare threading deadlocks. (#831) + + - Reading of very short files (<28 bytes) that do not contain an EOF block. + (#910) + +* Fixed some thread pool deadlocks caused by race conditions. (#746, #906) + +* Many additional memory allocation checks in VCF, BCF, SAM and CRAM + code. This also changes the return type of some functions. See ABI + changes above. (#920 amongst others) + +* Replace some sam parsing abort() calls with proper errors. + (#721, thanks to John Marshall; #576) + +* Fixed to permit SAM read names of length 252 to 254 (the maximum + specified by the SAM specification). (#900, thanks to John Marshall) + +* Fixed mpileup overlap detection heuristic to work with BAMs having + long CIGARs (more than 65536 operations). (#802) + +* Security fix: CIGAR strings starting with the "N" operation can no + longer cause underflow on the bam CIGAR structure. Similarly CIGAR + strings that are entirely "D" ops could leak the contents of + uninitialised variables. (#699) + +* Fixed bug where alignments starting 0M could cause an invalid + memory access in sam_prob_realn(). (#699) + +* Fixed out of bounds memory access in mpileup when given a reference + with binary characters (top-bit set). (#808, thanks to John Marshall) + +* Fixed crash in mpileup overlap_push() function. (#882; #852 reported + by Pierre Lindenbaum) + +* Fixed various potential CRAM memory leaks when recovering from + error cases. + +* Fixed CRAM index queries for unmapped reads (#911; samtools/samtools#958 + reported by @acorvelo) + +* Fixed the combination of CRAM embedded references and multiple + slices per container. This was incorrectly setting the header + MD5sum. (No impact on default CRAM behaviour.) (b2552fd) + +* Removed unwanted explicit data flushing in CRAM writing, which on + some OSes caused major slowdowns. (#883) + +* Fixed inefficiencies in CRAM encoding when many small references + occur within the middle of large chromosomes. Previously it + switched into multi-ref mode, but not back out of it which caused + the read POS field to be stored poorly. (#896) + +* Fixed CRAM handling of references when the order of sequences in a + supplied fasta file differs to the order of the @SQ headers. (#935) + +* Fixed BAM and CRAM multi-threaded decoding when used in conjunction + with the multi-region iterator. (#830; #577, #822, #926 all reported by + Brent Pedersen) + +* Removed some unaligned memory accesses in CRAM encoder and + undefined behaviour in BCF reading (#867, thanks to David Seifert) + +* Repeated calling of bcf_empty() no longer crashes. (#741) + +* Fixed bug where some 8 or 16-bit negative integers were stored using values + reserved by the BCF specification. These numbers are now promoted to the + next size up, so -121 to -128 are stored using at least 16 bits, and -32761 + to -32768 are stored using 32 bits. + + Note that while BCF files affected by this bug are technically incorrect, + it is still possible to read them. When converting to VCF format, + HTSlib (and therefore bcftools) will interpret the values as intended + and write out the correct negative numbers. (#766, thanks to John Marshall; + samtools/bcftools#874) + +* Allow repeated invocations of bcf_update_info() and bcf_update_format_*() + functions. (#856, thanks to John Marshall; #813 reported by Steffen Möller) + +* Memory leak removed in knetfile's kftp_parse_url() function. (#759, thanks + to David Alexander) + +* Fixed various crashes found by libfuzzer (invalid data leading to + errors), mostly but not exclusively in CRAM, VCF and BCF decoding. (#805) + +* Improved robustness of BAI and CSI index creation and loading. (#870; #967) + +* Prevent (invalid) creation of TBI indices for BCF files. + (#837; samtools/bcftools#707) + +* Better parsing of handling of remote URLs with ?param=val + components and their interaction with remote index URLs. (#790; #784 + reported by Mark Ebbert) + +* hts_idx_load() now checks locally for all possible index names before + attempting to download a remote index. It also checks that the remote + file it downloads is actually an index before trying to save and use + it. (#870; samtools/samtools#1045 reported by Albert Vilella) + +* hts_open_format() now honours the compression field, no longer also + requiring an explicit "z" in the mode string. Also fixed a 1 byte + buffer overrun. (#880) + +* Removed duplicate hts_tpool_process_flush prototype. (#816, reported by + James S Blachly) + +* Deleted defunct cram_tell declaration. (66c41e2; #915 reported by + Martin Morgan) + +* Fixed overly aggressive filename suffix checking in bgzip. (#927, thanks to + John Marshall; #129, reported by @hguturu) + +* Tabix and bgzip --help output now goes to standard output. (#754, thanks to + John Marshall) + +* Fixed bgzip index creation when using multiple threads. (#817) + +* Made bgzip -b option honour -I (index filename). (#817) + +* Bgzip -d no longer attempts to unlink(NULL) when decompressing stdin. (#718) + + +Miscellaneous other changes +--------------------------- + +* Integration with Google OSS fuzzing for automatic detection of + more bugs. (Thanks to Google for their assistance and the bugs it + has found.) (#796, thanks to Markus Kusano) + +* aclocal.m4 now has the pkg-config macros. (6ec3b94d; #733 reported by + Thomas Hickman) + +* Improved C++ compatibility of some header files. (#772; #771 reported + by @cwrussell) + +* Improved strict C99 compatibility. (#860, thanks to John Marshall) + +* Travis and AppVeyor improvements to aid testing. (#747; #773 thanks to + Lennard Berger; #781; #809; #804; #860; #909) + +* Various minor compiler warnings fixed. (#708; #765; #846, #860, thanks to + John Marshall; #865; #966; #973) + +* Various new and improved error messages. + +* Documentation updates (mostly in the header files). + +* Even more testing with "make check". + +* Corrected many copyright dates. (#979) + +* The default non-configure Makefile now uses libcurl instead of + knet, so it can support https. (#895) + + + + + + +Noteworthy changes in release 1.9 (18th July 2018) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* If `./configure` fails, `make` will stop working until either configure + is re-run successfully, or `make distclean` is used. This makes + configuration failures more obvious. (#711, thanks to John Marshall) + +* The default SAM version has been changed to 1.6. This is in line with the + latest version specification and indicates that HTSlib supports the + CG tag used to store long CIGAR data in BAM format. + +* bgzip integrity check option '--test' (#682, thanks to @sd4B75bJ, @jrayner) + +* Faidx can now index fastq files as well as fasta. The fastq index adds + an extra column to the `.fai` index which gives the offset to the quality + values. New interfaces have been added to `htslib/faidx.h` to read the + fastq index and retrieve the quality values. It is possible to open + a fastq index as if fasta (only sequences will be returned), but not + the other way round. (#701) + +* New API interfaces to add or update integer, float and array aux tags. (#694) + +* Add `level=` option to `hts_set_opt()` to allow the compression + level to be set. Setting `level=0` enables uncompressed output. (#715) + +* Improved bgzip error reporting. + +* Better error reporting when CRAM reference files can't be opened. (#706) + +* Fixes to make tests work properly on Windows/MinGW - mainly to handle + line ending differences. (#716) + +* Efficiency improvements: + + - Small speed-up for CRAM indexing. + + - Reduce the number of unnecessary wake-ups in the thread pool. (#703) + + - Avoid some memory copies when writing data, notably for uncompressed + BGZF output. (#703) + +* Bug fixes: + + - Fix multi-region iterator bugs on CRAM files. (#684) + + - Fixed multi-region iterator bug that caused some reads to be skipped + incorrectly when reading BAM files. (#687) + + - Fixed synced_bcf_reader() bug when reading contigs multiple times. (#691, + reported by @freeseek) + + - Fixed bug where bcf_hdr_set_samples() did not update the sample dictionary + when removing samples. (#692, reported by @freeseek) + + - Fixed bug where the VCF record ref length was calculated incorrectly + if an INFO END tag was present. (71b00a) + + - Fixed warnings found when compiling with gcc 8.1.0. (#700) + + - sam_hdr_read() and sam_hdr_write() will now return an error code + if passed a NULL file pointer, instead of crashing. + + - Fixed possible negative array look-up in sam_parse1() that somehow escaped + previous fuzz testing. (CVE-2018-13845, #731, reported by @fCorleone) + + - Fixed bug where cram range queries could incorrectly report an error + when using multiple threads. (#734, reported by Brent Pedersen) + + - Fixed very rare rANS normalisation bug that could cause an assertion + failure when writing CRAM files. (#739, reported by @carsonhh) + +Noteworthy changes in release 1.8 (3rd April 2018) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* The URL to get sequences from the EBI reference server has been changed + to https://. This is because the EBI no longer serve sequences via + plain HTTP - requests to the http:// endpoint just get redirected. + HTSlib needs to be linked against libcurl to download https:// URLs, + so CRAM users who want to get references from the EBI will need to + run configure and ensure libcurl support is enabled using the + --enable-libcurl option. + +* Added libdeflate as a build option for alternative faster compression and + decompression. Results vary by CPU but compression should be twice as fast + and decompression faster. + +* It is now possible to set the compression level in bgzip. (#675; thanks + to Nathan Weeks). + +* bgzip now gets its own manual page. + +* CRAM encoding now stored MD and NM tags verbatim where the reference + contains 'N' characters, to work around ambiguities in the SAM + specification (samtools #717/762). + Also added "store_md" and "store_nm" cram-options for forcing these + tags to be stored at all locations. This is best when combined with + a subsequent decode_md=0 option while reading CRAM. + +* Multiple CRAM bug fixes, including a fix to free and the subsequent reuse of + references with `-T ref.fa`. (#654; reported by Chris Saunders) + +* CRAM multi-threading bugs fixed: don't try to call flush on reading; + processing of multiple range queries; problems with multi-slice containers. + +* Fixed crashes caused when decoding some cramtools produced CRAM files. + +* Fixed a couple of minor rANS issues with handling invalid data. + +* Fixed bug where probaln_glocal() tried to allocate far more memory than + needed when the query sequence was much longer than the reference. This + caused crashes in samtools and bcftools mpileup when used on data with very + long reads. (#572, problem reported by Felix Bemm via minimap2). + +* sam_prop_realn() now returns -1 (the same value as for unmapped reads) + on reads that do not include at least one 'M', 'X' or '=' CIGAR operator, + and no longer adds BQ or ZQ tags. BAQ adjustments are only made to bases + covered by these operators so there is no point in trying to align + reads that do not have them. (#572) + +Noteworthy changes in release 1.7 (26th January 2018) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* BAM: HTSlib now supports BAMs which include CIGARs with more than + 65535 operations as per HTS-Specs 18th November (dab57f4 and 2f915a8). + +* BCF/VCF: + - Removed the need for long double in pileup calculations. + - Sped up the synced reader in some situations. + - Bug fixing: removed memory leak in bcf_copy. + +* CRAM: + - Added support for HTS_IDX_START in cram iterators. + - Easier to build when lzma header files are absent. + - Bug fixing: a region query with REQUIRED_FIELDS option to + disable sequence retrieval now gives correct results. + - Bug fixing: stop queries to regions starting after the last + read on a chromosome from incorrectly reporting errors + (#651, #653; reported by Imran Haque and @egafni via pysam). + +* Multi-region iterator: The new structure takes a list of regions and + iterates over all, deduplicating reads in the process, and producing a + full list of file offset intervals. This is usually much faster than + repeatedly using the old single-region iterator on a series of regions. + +* Curl improvements: + - Add Bearer token support via HTS_AUTH_LOCATION env (#600). + - Use CURL_CA_BUNDLE environment variable to override the CA (#622; + thanks to Garret Kelly & David Alexander). + - Speed up (removal of excessive waiting) for both http(s) and ftp. + - Avoid repeatedly reconnecting by removal of unnecessary seeks. + - Bug fixing: double free when libcurl_open fails. + +* BGZF block caching, if enabled, now performs far better (#629; reported + by Ram Yalamanchili). + +* Added an hFILE layer for in-memory I/O buffers (#590; thanks to Thomas + Hickman). + +* Tidied up the drand48 support (intended for systems that do not + provide this function). + +Noteworthy changes in release 1.6 (28th September 2017) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* Fixed bug where iterators on CRAM files did not propagate error return + values to the caller correctly. Thanks go to Chris Saunders. + +* Overhauled Windows builds. Building with msys2/mingw64 now works + correctly and passes all tests. + +* More improvements to logging output (thanks again to Anders Kaplan). + +* Return codes from sam_read1() when reading cram have been made + consistent with those returned when reading sam/bam. Thanks to + Chris Saunders (#575). + +* BGZF CRC32 checksums are now always verified. + +* It's now possible to set nthreads = 1 for cram files. + +* hfile_libcurl has been modified to make it thread-safe. It's also + better at handling web servers that do not honour byte range requests + when attempting to seek - it now sets errno to ESPIPE and keeps + the existing connection open so callers can revert to streaming mode + it they want to. + +* hfile_s3 now recalculates access tokens if they have become stale. This + fixes a reported problem where authentication failed after a file + had been in use for more than 15 minutes. + +* Fixed bug where remote index fetches would fail to notice errors when + writing files. + +* bam_read1() now checks that the query sequence length derived from the + CIGAR alignment matches the sequence length in the BAM record. + +Noteworthy changes in release 1.5 (21st June 2017) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* Added a new logging API: hts_log(), along with hts_log_error(), + hts_log_warn() etc. convenience macros. Thanks go to Anders Kaplan + for the implementation. (#499, #543, #551) + +* Added a new file I/O option "block_size" (HTS_OPT_BLOCK_SIZE) to + alter the hFILE buffer size. + +* Fixed various bugs, including compilation issues samtools/bcftools#610, + samtools/bcftools#611 and robustness to corrupted data #537, #538, + #541, #546, #548, #549, #554. + + +Noteworthy changes in release 1.4.1 (8th May 2017) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is primarily a security bug fix update. + +* Fixed SECURITY (CVE-2017-1000206) issue with buffer overruns with + malicious data. (#514) + +* S3 support for non Amazon AWS endpoints. (#506) + +* Support for variant breakpoints in bcftools. (#516) + +* Improved handling of BCF NaNs. (#485) + +* Compilation / portability improvements. (#255, #423, #498, #488) + +* Miscellaneous bug fixes (#482, #521, #522, #523, #524). + +* Sanitise headers (#509) + + +Release 1.4 (13 March 2017) + +* Incompatible changes: several functions and data types have been changed + in this release, and the shared library soversion has been bumped to 2. + + - bam_pileup1_t has an additional field (which holds user data) + - bam1_core_t has been modified to allow for >64K CIGAR operations + and (along with bam1_t) so that CIGAR entries are aligned in memory + - hopen() has vararg arguments for setting URL scheme-dependent options + - the various tbx_conf_* presets are now const + - auxiliary fields in bam1_t are now always stored in little-endian byte + order (previously this depended on if you read a bam, sam or cram file) + - index metadata (accessible via hts_idx_get_meta()) is now always + stored in little-endian byte order (previously this depended on if + the index was in tbi or csi format) + - bam_aux2i() now returns an int64_t value + - fai_load() will no longer save local copies of remote fasta indexes + - hts_idx_get_meta() now takes a uint32_t * for l_meta (was int32_t *) + +* HTSlib now links against libbz2 and liblzma by default. To remove these + dependencies, run configure with options --disable-bz2 and --disable-lzma, + but note that this may make some CRAM files produced elsewhere unreadable. + +* Added a thread pool interface and replaced the bgzf multi-threading + code to use this pool. BAM and CRAM decoding is now multi-threaded + too, using the pool to automatically balance the number of threads + between decode, encode and any data processing jobs. + +* New errmod_cal(), probaln_glocal(), sam_cap_mapq(), and sam_prob_realn() + functions, previously internal to SAMtools, have been added to HTSlib. + +* Files can now be accessed via Google Cloud Storage using gs: URLs, when + HTSlib is configured to use libcurl for network file access rather than + the included basic knetfile networking. + +* S3 file access now also supports the "host_base" setting in the + $HOME/.s3cfg configuration file. + +* Data URLs ("data:,text") now follow the standard RFC 2397 format and may + be base64-encoded (when written as "data:;base64,text") or may include + percent-encoded characters. HTSlib's previous over-simplified "data:text" + format is no longer supported -- you will need to add an initial comma. + +* When plugins are enabled, S3 support is now provided by a separate + hfile_s3 plugin rather than by hfile_libcurl itself as previously. + When --enable-libcurl is used, by default both GCS and S3 support + and plugins will also be built; they can be individually disabled + via --disable-gcs and --disable-s3. + +* The iRODS file access plugin has been moved to a separate repository. + Configure no longer has a --with-irods option; instead build the plugin + found at . + +* APIs to portably read and write (possibly unaligned) data in little-endian + byte order have been added. + +* New functions bam_auxB_len(), bam_auxB2i() and bam_auxB2f() have been + added to make accessing array-type auxiliary data easier. bam_aux2i() + can now return the full range of values that can be stored in an integer + tag (including unsigned 32 bit tags). bam_aux2f() will return the value + of integer tags (as a double) as well as floating-point ones. All of + the bam_aux2 and bam_auxB2 functions will set errno if the requested + conversion is not valid. + +* New functions fai_load3() and fai_build3() allow fasta indexes to be + stored in a different location to the indexed fasta file. + +* New functions bgzf_index_dump_hfile() and bgzf_index_load_hfile() + allow bgzf index files (.gzi) to be written to / read from an existing + hFILE handle. + +* hts_idx_push() will report when trying to add a range to an index that + is beyond the limits that the given index can handle. This means trying + to index chromosomes longer than 2^29 bases with a .bai or .tbi index + will report an error instead of apparently working but creating an invalid + index entry. + +* VCF formatting is now approximately 4x faster. (Whether this is + noticeable depends on what was creating the VCF.) + +* CRAM lossy_names mode now works with TLEN of 0 or TLEN within +/- 1 + of the computed value. Note in these situations TLEN will be + generated / fixed during CRAM decode. + +* CRAM now supports bzip2 and lzma codecs. Within htslib these are + disabled by default, but can be enabled by specifying "use_bzip2" or + "use_lzma" in an hts_opt_add() call or via the mode string of the + hts_open_format() function. + +Noteworthy changes in release 1.3.2 (13 September 2016) + +* Corrected bin calculation when converting directly from CRAM to BAM. + Previously a small fraction of converted reads would fail Picard's + validation with "bin field of BAM record does not equal value computed" + (SAMtools issue #574). + +* Plugins can now signal to HTSlib which of RTLD_LOCAL and RTLD_GLOBAL + they wish to be opened with -- previously they were always RTLD_LOCAL. + + +Noteworthy changes in release 1.3.1 (22 April 2016) + +* Improved error checking and reporting, especially of I/O errors when + writing output files (#17, #315, PR #271, PR #317). + +* Build fixes for 32-bit systems; be sure to run configure to enable + large file support and access to 2GiB+ files. + +* Numerous VCF parsing fixes (#321, #322, #323, #324, #325; PR #370). + Particular thanks to Kostya Kortchinsky of the Google Security Team + for testing and numerous input parsing bug reports. + +* HTSlib now prints an informational message when initially creating a + CRAM reference cache in the default location under your $HOME directory. + (No message is printed if you are using $REF_CACHE to specify a location.) + +* Avoided rare race condition when caching downloaded CRAM reference sequence + files, by using distinctive names for temporary files (in addition to O_EXCL, + which has always been used). Occasional corruption would previously occur + when multiple tools were simultaneously caching the same reference sequences + on an NFS filesystem that did not support O_EXCL (PR #320). + +* Prevented race condition in file access plugin loading (PR #341). + +* Fixed mpileup memory leak, so no more "[bam_plp_destroy] memory leak [...] + Continue anyway" warning messages (#299). + +* Various minor CRAM fixes. + +* Fixed documentation problems #348 and #358. + + +Noteworthy changes in release 1.3 (15 December 2015) + +* Files can now be accessed via HTTPS and Amazon S3 in addition to HTTP + and FTP, when HTSlib is configured to use libcurl for network file access + rather than the included basic knetfile networking. + +* HTSlib can be built to use remote access hFILE backends (such as iRODS + and libcurl) via a plugin mechanism. This allows other backends to be + easily added and facilitates building tools that use HTSlib, as they + don't need to be linked with the backends' various required libraries. + +* When writing CRAM output, sam_open() etc now default to writing CRAM v3.0 + rather than v2.1. + +* fai_build() and samtools faidx now accept initial whitespace in ">" + headers (e.g., "> chr1 description" is taken to refer to "chr1"). + +* tabix --only-header works again (was broken in 1.2.x; #249). + +* HTSlib's configure script and Makefile now fully support the standard + convention of allowing CC/CPPFLAGS/CFLAGS/LDFLAGS/LIBS to be overridden + as needed. Previously the Makefile listened to $(LDLIBS) instead; if you + were overriding that, you should now override LIBS rather than LDLIBS. + +* Fixed bugs #168, #172, #176, #197, #206, #225, #245, #265, #295, and #296. + + +Noteworthy changes in release 1.2.1 (3 February 2015) + +* Reinstated hts_file_type() and FT_* macros, which were available until 1.1 + but briefly removed in 1.2. This function is deprecated and will be removed + in a future release -- you should use hts_detect_format() etc instead + + +Noteworthy changes in release 1.2 (2 February 2015) + +* HTSlib now has a configure script which checks your build environment + and allows for selection of optional extras. See INSTALL for details + +* By default, reference sequences are fetched from the EBI CRAM Reference + Registry and cached in your $HOME cache directory. This behaviour can + be controlled by setting REF_PATH and REF_CACHE environment variables + (see the samtools(1) man page for details) + +* Numerous CRAM improvements: + - Support for CRAM v3.0, an upcoming revision to CRAM supporting + better compression and per-container checksums + - EOF checking for v2.1 and v3.0 (similar to checking BAM EOF blocks) + - Non-standard values for PNEXT and TLEN fields are now preserved + - hts_set_fai_filename() now provides a reference file when encoding + - Generated read names are now numbered from 1, rather than being + labelled 'slice:record-in-slice' + - Multi-threading and speed improvements + +* New htsfile command for identifying file formats, and corresponding + file format detection APIs + +* New tabix --regions FILE, --targets FILE options for filtering via BED files + +* Optional iRODS file access, disabled by default. Configure with --with-irods + to enable accessing iRODS data objects directly via 'irods:DATAOBJ' + +* All occurrences of 2^29 in the source have been eliminated, so indexing + and querying against reference sequences larger than 512Mbp works (when + using CSI indices) + +* Support for plain GZIP compression in various places + +* VCF header editing speed improvements + +* Added seq_nt16_int[] (equivalent to the samtools API's bam_nt16_nt4_table) + +* Reinstated faidx_fetch_nseq(), which was accidentally removed from 1.1. + Now faidx_fetch_nseq() and faidx_nseq() are equivalent; eventually + faidx_fetch_nseq() will be deprecated and removed [#156] + +* Fixed bugs #141, #152, #155, #158, #159, and various memory leaks diff --git a/src/htslib-1.18/README b/src/htslib-1.21/README similarity index 100% rename from src/htslib-1.18/README rename to src/htslib-1.21/README diff --git a/src/htslib-1.18/README.large_positions.md b/src/htslib-1.21/README.large_positions.md similarity index 100% rename from src/htslib-1.18/README.large_positions.md rename to src/htslib-1.21/README.large_positions.md diff --git a/src/htslib-1.19.1/annot-tsv.1 b/src/htslib-1.21/annot-tsv.1 similarity index 81% rename from src/htslib-1.19.1/annot-tsv.1 rename to src/htslib-1.21/annot-tsv.1 index 22191da..3a6034b 100644 --- a/src/htslib-1.19.1/annot-tsv.1 +++ b/src/htslib-1.21/annot-tsv.1 @@ -1,7 +1,7 @@ '\" t -.TH annot-tsv 1 "22 January 2024" "htslib-1.19.1" "Bioinformatics tools" +.TH annot-tsv 1 "12 September 2024" "htslib-1.21" "Bioinformatics tools" .\" -.\" Copyright (C) 2015, 2017-2018, 2023 Genome Research Ltd. +.\" Copyright (C) 2015, 2017-2018, 2023-2024 Genome Research Ltd. .\" .\" Author: Petr Danecek .\" @@ -108,6 +108,11 @@ Target file to be extend with annotations from Add the same annotations multiple times if multiple overlaps are found .RE .PP +.B \-\-help +.RS 4 +This help message +.RE +.PP .BR \-\-max\-annots " INT" .RS 4 Add at most INT annotations per column to save time when many overlaps are found with a single region @@ -138,18 +143,42 @@ number of source base pairs in the overlap .RE .RE .PP +.BR \-d ", " \-\-delim " SRC:TGT" +.RS 4 +Column delimiter in the source and the target file. For example, if both files are comma-delimited, run with +"--delim ,:," or simply "--delim ,". If the source file is comma-delimited and the target file is tab-delimited, +run with "-d $',:\\t'". +.RE +.PP +.BR \-h ", " \-\-headers " SRC:TGT" +.RS 4 +Line number of the header row with column names. By default the first line is interpreted as header if it starts with the comment +character ("#"), otherwise expects numeric indices. However, if the first line does not start with "#" but still +contains the column names, use "--headers 1:1". To ignore existing header (skip comment lines) and use numeric indices, +use "--headers 0:0" which is equivalent to "--ignore-headers". When negative value is given, it is interpreted as the number of +lines from the end of the comment block. Specifically, "--headers -1" takes the column names from the last line of +the comment block (e.g., the "#CHROM" line in the VCF format). +.RE +.PP .BR \-H ", " \-\-ignore\-headers .RS 4 Ignore the headers completely and use numeric indexes even when a header exists .RE .PP -.BR \-O ", " \-\-overlap " FLOAT" +.BR \-I ", " \-\-no\-hdr\-idx +.RS 4 +Suppress index numbers in the printed header. If given twice, drop the entire header. +.RE +.PP +.BR \-O ", " \-\-overlap " FLOAT,[FLOAT]" .RS 4 -Minimum overlap as a fraction of region length in at least one of the overlapping regions. If also +Minimum overlap as a fraction of region length in SRC and TGT, respectively (with two numbers), or in +at least one of the overlapping regions (with a single number). If also .BR \-r ", " \-\-reciprocal is given, require at least .I FLOAT -overlap with respect to both regions +overlap with respect to both regions. Two identical numbers are equivalent to running with +.BR \-r ", " \-\-reciprocal .RE .PP .BR \-r ", " \-\-reciprocal diff --git a/src/htslib-1.19.1/annot-tsv.c b/src/htslib-1.21/annot-tsv.c similarity index 79% rename from src/htslib-1.19.1/annot-tsv.c rename to src/htslib-1.21/annot-tsv.c index 4661e6e..494c437 100644 --- a/src/htslib-1.19.1/annot-tsv.c +++ b/src/htslib-1.21/annot-tsv.c @@ -1,5 +1,5 @@ /* - Copyright (C) 2018-2023 Genome Research Ltd. + Copyright (C) 2018-2024 Genome Research Ltd. Author: Petr Danecek @@ -44,6 +44,7 @@ #include "htslib/kseq.h" #include "htslib/bgzf.h" #include "htslib/regidx.h" +#include "textutils_internal.h" #define ANN_NBP 1 #define ANN_FRAC 2 @@ -71,6 +72,7 @@ typedef struct cols_t *core, *match, *transfer, *annots; int *core_idx, *match_idx, *transfer_idx, *annots_idx; int *nannots_added; // for --max-annots: the number of annotations added + char delim; int grow_n; kstring_t line; // one buffered line, a byproduct of reading the header htsFile *fp; @@ -100,11 +102,11 @@ typedef struct { nbp_t *nbp; dat_t dst, src; - char *core_str, *match_str, *transfer_str, *annots_str; + char *core_str, *match_str, *transfer_str, *annots_str, *headers_str, *delim_str; char *temp_dir, *out_fname; BGZF *out_fp; - int allow_dups, reciprocal, ignore_headers, max_annots, mode; - double overlap; + int allow_dups, max_annots, mode, no_write_hdr, overlap_either; + double overlap_src, overlap_dst; regidx_t *idx; regitr_t *itr; kstring_t tmp_kstr; @@ -282,7 +284,7 @@ int parse_tab_with_payload(const char *line, char **chr_beg, char **chr_end, hts dat_t *dat = (dat_t*) usr; - cols_t *cols = cols_split(line, NULL, '\t'); + cols_t *cols = cols_split(line, NULL, dat->delim); *((cols_t**)payload) = cols; if ( cols->n < dat->core_idx[0] ) error("Expected at least %d columns, found %d: %s\n",dat->core_idx[0]+1,cols->n,line); @@ -315,86 +317,136 @@ void free_payload(void *payload) cols_destroy(cols); } -// Parse header if present (first line has a leading #) or create a dummy header with -// numeric column names. If dummy is set, read first data line (without a leading #) -// and create a dummy header. -void parse_header(dat_t *dat, char *fname, int dummy) +// Parse header if present, the parameter irow indicates the header row line number: +// 0 .. ignore headers, create numeric fields names, 1-based indices +// N>0 .. N-th line, all previous lines are discarded +// N<0 .. N-th line from the end of the comment block (comment lines are prefixed with #), +// all preceding lines are discarded. +// When autodetect is set, the argument nth_row is ignored. +// Note this makes no attempt to preserve comment lines on output +void parse_header(dat_t *dat, char *fname, int nth_row, int autodetect) { dat->fp = hts_open(fname,"r"); if ( !dat->fp ) error("Failed to open: %s\n", fname); + // buffer comment lines when N<0 + int nbuf = 0; + char **buf = NULL; + if ( nth_row < 0 ) + { + buf = calloc(-nth_row,sizeof(*buf)); + if ( !buf ) error("Out of memory, failed to allocate %zu bytes\n",(-nth_row)*sizeof(*buf)); + } + + int irow = 0; cols_t *cols = NULL; while ( hts_getline(dat->fp, KS_SEP_LINE, &dat->line) > 0 ) { - if ( dat->line.s[0]=='#' ) + if ( autodetect ) + { + // if the first line is comment line, use it as a header. Otherwise go + // with numeric indices + nth_row = dat->line.s[0]=='#' ? 1 : 0; + break; + } + if ( nth_row==0 ) + { + // N=0 .. comment lines to be ignored, read until we get to the first data line + if ( dat->line.s[0]=='#' ) continue; + break; + } + if ( nth_row>0 ) { - // this is a header or comment line - if ( dummy ) continue; - cols = cols_split(dat->line.s, NULL, '\t'); + // N>1 .. regardless of this being a comment or data line, read until Nth line + if ( ++irow < nth_row ) continue; break; } + // N<0 .. keep abs(N) comment lines in a sliding buffer + if ( dat->line.s[0]!='#' ) break; // data line + if ( nbuf == -nth_row ) + { + // one more comment line and the buffer is full. We could use round buffer + // for efficiency, but the assumption is abs(nth_row) is small + free(buf[0]); + memmove(buf, &buf[1], (nbuf-1)*sizeof(*buf)); + nbuf--; + } + buf[nbuf++] = strdup(dat->line.s); + } + + int keep_line = 0; + if ( nth_row < 0 ) + { + if ( nbuf!=-nth_row ) + error("Found %d header lines in %s, cannot fetch N=%d from the end\n",nbuf,fname,-nth_row); + cols = cols_split(buf[0], NULL, dat->delim); + keep_line = 1; + } + else + cols = cols_split(dat->line.s, NULL, dat->delim); - // this a data line, we must be in a dummy mode - cols = cols_split(dat->line.s, NULL, '\t'); - assert(cols && cols->n); - assert(cols->off[0][0] != '#'); + if ( !dat->line.l ) error("Failed to read: %s\n", fname); + assert(cols && cols->n); + if ( nth_row == 0 ) // create numeric indices + { // create a dummy header with numeric field names kstring_t str = {0,0,0}; int i, n = cols->n; for (i=0; i0 ) kputc('\t', &str); + if ( i>0 ) kputc(dat->delim, &str); kputw(i+1, &str); } cols_destroy(cols); - cols = cols_split(str.s, NULL, '\t'); + cols = cols_split(str.s, NULL, dat->delim); free(str.s); dat->hdr.dummy = 1; - - break; + keep_line = 1; } - if ( !dat->line.l ) error("Failed to read: %s\n", fname); - assert(cols && cols->n); dat->hdr.name2idx = khash_str2int_init(); int i; for (i=0; in; i++) { char *ss = cols->off[i]; - while ( *ss && (*ss=='#' || isspace(*ss)) ) ss++; + while ( *ss && (*ss=='#' || isspace_c(*ss)) ) ss++; if ( !*ss ) error("Could not parse the header field \"%s\": %s\n", cols->off[i],dat->line.s); if ( *ss=='[' ) { char *se = ss+1; - while ( *se && isdigit(*se) ) se++; + while ( *se && isdigit_c(*se) ) se++; if ( *se==']' ) ss = se + 1; } - while ( *ss && (*ss=='#' || isspace(*ss)) ) ss++; + while ( *ss && (*ss=='#' || isspace_c(*ss)) ) ss++; if ( !*ss ) error("Could not parse the header field \"%s\": %s\n", cols->off[i],dat->line.s); cols->off[i] = ss; khash_str2int_set(dat->hdr.name2idx, cols->off[i], i); } dat->hdr.cols = cols; - if ( !dat->hdr.dummy ) dat->line.l = 0; + if ( !keep_line ) dat->line.l = 0; + + for (i=0; ihdr.dummy ) return; + if ( args->no_write_hdr>1 ) return; int i; kstring_t str = {0,0,0}; kputc('#', &str); for (i=0; ihdr.cols->n; i++) { - if ( i>0 ) kputc('\t', &str); - ksprintf(&str,"[%d]", i+1); + if ( i>0 ) kputc(dat->delim, &str); + if ( !args->no_write_hdr ) ksprintf(&str,"[%d]", i+1); kputs(dat->hdr.cols->off[i], &str); } if ( dat->hdr.annots ) { for (i=0; ihdr.annots->n; i++) { - if ( str.l > 1 ) kputc('\t', &str); + if ( str.l > 1 ) kputc(dat->delim, &str); kputs(dat->hdr.annots->off[i], &str); } } @@ -434,8 +486,30 @@ void sanity_check_columns(char *fname, hdr_t *hdr, cols_t *cols, int **col2idx, } void init_data(args_t *args) { - parse_header(&args->dst, args->dst.fname, args->ignore_headers); - parse_header(&args->src, args->src.fname, args->ignore_headers); + if ( !args->delim_str ) + args->dst.delim = args->src.delim = '\t'; + else if ( strlen(args->delim_str)==1 ) + args->dst.delim = args->src.delim = *args->delim_str; + else if ( strlen(args->delim_str)==3 && args->delim_str[1]==':' ) + args->src.delim = args->delim_str[0], args->dst.delim = args->delim_str[2]; + else + error("Could not parse the option --delim %s\n",args->delim_str); + + // --headers, determine header row index + int isrc = 0, idst = 0, autodetect = 1; + if ( args->headers_str ) + { + cols_t *tmp = cols_split(args->headers_str, NULL, ':'); + char *rmme; + isrc = strtol(tmp->off[0],&rmme,10); + if ( *rmme || tmp->off[0]==rmme ) error("Could not parse the option --headers %s\n",args->headers_str); + idst = strtol(tmp->n==2 ? tmp->off[1] : tmp->off[0],&rmme,10); + if ( *rmme || (tmp->n==2 ? tmp->off[1] : tmp->off[0])==rmme ) error("Could not parse the option --headers %s\n",args->headers_str); + cols_destroy(tmp); + autodetect = 0; + } + parse_header(&args->dst, args->dst.fname, idst, autodetect); + parse_header(&args->src, args->src.fname, isrc, autodetect); // -c, core columns if ( !args->core_str ) args->core_str = "chr,beg,end:chr,beg,end"; @@ -608,17 +682,17 @@ static void write_annots(args_t *args) { if ( args->dst.annots_idx[i]==ANN_NBP ) { - kputc('\t',&args->tmp_kstr); + kputc(args->dst.delim,&args->tmp_kstr); kputw(len,&args->tmp_kstr); } else if ( args->dst.annots_idx[i]==ANN_FRAC ) { - kputc('\t',&args->tmp_kstr); + kputc(args->dst.delim,&args->tmp_kstr); kputd((double)len/(args->nbp->end - args->nbp->beg + 1),&args->tmp_kstr); } else if ( args->dst.annots_idx[i]==ANN_CNT ) { - kputc('\t',&args->tmp_kstr); + kputc(args->dst.delim,&args->tmp_kstr); kputw(args->nbp->n/2,&args->tmp_kstr); } } @@ -662,18 +736,20 @@ void process_line(args_t *args, char *line, size_t size) int has_match = 0, annot_len = 0; while ( regitr_overlap(args->itr) ) { - if ( args->overlap ) + if ( args->overlap_src || args->overlap_dst ) { - double len1 = end - beg + 1; - double len2 = args->itr->end - args->itr->beg + 1; + double len_dst = end - beg + 1; + double len_src = args->itr->end - args->itr->beg + 1; double isec = (args->itr->end < end ? args->itr->end : end) - (args->itr->beg > beg ? args->itr->beg : beg) + 1; - if ( args->reciprocal ) + int pass_dst = isec/len_dst < args->overlap_dst ? 0 : 1; + int pass_src = isec/len_src < args->overlap_src ? 0 : 1; + if ( args->overlap_either ) { - if ( isec/len1 < args->overlap || isec/len2 < args->overlap ) continue; + if ( !pass_dst && !pass_src ) continue; } else { - if ( isec/len1 < args->overlap && isec/len2 < args->overlap ) continue; + if ( !pass_dst || !pass_src ) continue; } } cols_t *src_cols = regitr_payload(args->itr,cols_t*); @@ -758,7 +834,7 @@ void process_line(args_t *args, char *line, size_t size) write_string(args, dst_cols->off[0], 0); for (i=1; in; i++) { - write_string(args, "\t", 1); + write_string(args, &args->dst.delim, 1); write_string(args, dst_cols->off[i], 0); } write_annots(args); @@ -796,6 +872,7 @@ static const char *usage_text(void) "\n" "Other options:\n" " --allow-dups Add annotations multiple times\n" + " --help This help message\n" " --max-annots INT Adding at most INT annotations per column to save\n" " time in big regions\n" " --version Print version string and exit\n" @@ -804,9 +881,15 @@ static const char *usage_text(void) " frac .. fraction of the target region with an\n" " overlap\n" " nbp .. number of source base pairs in the overlap\n" - " -H, --ignore-headers Use numeric indexes, ignore the headers completely\n" - " -O, --overlap FLOAT Minimum required overlap (non-reciprocal, unless -r\n" - " is given)\n" + " -d, --delim SRC:TGT Column delimiter in SRC and TGT file\n" + " -h, --headers SRC:TGT Header row line number, 0:0 is equivalent to -H, negative\n" + " value counts from the end of comment line block [1:1]\n" + " -H, --ignore-headers Use numeric indices, ignore the headers completely\n" + " -I, --no-header-idx Suppress index numbers in the printed header. If given\n" + " twice, drop the entire header\n" + " -O, --overlap FLOAT[,FLOAT] Minimum required overlap with respect to SRC,TGT.\n" + " If single value, the bigger overlap is considered.\n" + " Identical values are equivalent to running with -r.\n" " -r, --reciprocal Apply the -O requirement to both overlapping\n" " intervals\n" " -x, --drop-overlaps Drop overlapping regions (precludes -f)\n" @@ -847,18 +930,22 @@ int main(int argc, char **argv) {"target-file",required_argument,NULL,'t'}, {"allow-dups",no_argument,NULL,0}, {"max-annots",required_argument,NULL,2}, + {"no-header-idx",required_argument,NULL,'I'}, {"version",no_argument,NULL,1}, {"annotate",required_argument,NULL,'a'}, + {"headers",no_argument,NULL,'h'}, {"ignore-headers",no_argument,NULL,'H'}, {"overlap",required_argument,NULL,'O'}, {"reciprocal",no_argument,NULL,'r'}, {"drop-overlaps",no_argument,NULL,'x'}, - {"help",no_argument,NULL,'h'}, + {"delim",required_argument,NULL,'d'}, + {"help",no_argument,NULL,4}, {NULL,0,NULL,0} }; char *tmp = NULL; int c; - while ((c = getopt_long(argc, argv, "hc:f:m:o:s:t:a:HO:rx",loptions,NULL)) >= 0) + int reciprocal = 0; + while ((c = getopt_long(argc, argv, "c:f:m:o:s:t:a:HO:rxh:Id:",loptions,NULL)) >= 0) { switch (c) { @@ -873,22 +960,33 @@ int main(int argc, char **argv) args->max_annots = strtod(optarg, &tmp); if ( tmp==optarg || *tmp ) error("Could not parse --max-annots %s\n", optarg); break; - case 'H': args->ignore_headers = 1; break; - case 'r': args->reciprocal = 1; break; + case 'I': args->no_write_hdr++; break; + case 'd': args->delim_str = optarg; break; + case 'h': args->headers_str = optarg; break; + case 'H': args->headers_str = "0:0"; break; + case 'r': reciprocal = 1; break; case 'c': args->core_str = optarg; break; case 't': args->dst.fname = optarg; break; case 'm': args->match_str = optarg; break; case 'a': args->annots_str = optarg; break; case 'o': args->out_fname = optarg; break; case 'O': - args->overlap = strtod(optarg, &tmp); - if ( tmp==optarg || *tmp ) error("Could not parse --overlap %s\n", optarg); - if ( args->overlap<0 || args->overlap>1 ) error("Expected value from the interval [0,1]: --overlap %s\n", optarg); + args->overlap_src = strtod(optarg, &tmp); + if ( tmp==optarg || (*tmp && *tmp!=',') ) error("Could not parse --overlap %s\n", optarg); + if ( args->overlap_src<0 || args->overlap_src>1 ) error("Expected value(s) from the interval [0,1]: --overlap %s\n", optarg); + if ( *tmp ) + { + args->overlap_dst = strtod(tmp+1, &tmp); + if ( *tmp ) error("Could not parse --overlap %s\n", optarg); + if ( args->overlap_dst<0 || args->overlap_dst>1 ) error("Expected value(s) from the interval [0,1]: --overlap %s\n", optarg); + } + else + args->overlap_either = 1; break; case 's': args->src.fname = optarg; break; case 'f': args->transfer_str = optarg; break; case 'x': args->mode = PRINT_NONMATCHING; break; - case 'h': printf("\nVersion: %s\n%s\n",hts_version(),usage_text()); exit(EXIT_SUCCESS); break; + case 4 : printf("\nVersion: %s\n%s\n",hts_version(),usage_text()); exit(EXIT_SUCCESS); break; case '?': // fall through default: error("\nVersion: %s\n%s\n",hts_version(),usage_text()); break; } @@ -908,13 +1006,27 @@ int main(int argc, char **argv) else args->mode = PRINT_MATCHING|PRINT_NONMATCHING; } if ( (args->transfer_str || args->annots_str) && !(args->mode & PRINT_MATCHING) ) error("The option -x cannot be combined with -f and -a\n"); + if ( reciprocal ) + { + if ( args->overlap_dst && args->overlap_src && args->overlap_dst!=args->overlap_src ) + error("The combination of --reciprocal with --overlap %f,%f makes no sense: expected single value or identical values\n",args->overlap_src,args->overlap_dst); + if ( !args->overlap_src ) + args->overlap_src = args->overlap_dst; + else + args->overlap_dst = args->overlap_src; + args->overlap_either = 0; + } init_data(args); write_header(args, &args->dst); while ( read_next_line(&args->dst) ) { int i; - for (i=0; idst.grow_n; i++) kputs("\t.", &args->dst.line); + for (i=0; idst.grow_n; i++) + { + kputc(args->dst.delim, &args->dst.line); + kputc('.', &args->dst.line); + } process_line(args, args->dst.line.s, args->dst.line.l); args->dst.line.l = 0; } diff --git a/src/htslib-1.18/bcf_sr_sort.c b/src/htslib-1.21/bcf_sr_sort.c similarity index 100% rename from src/htslib-1.18/bcf_sr_sort.c rename to src/htslib-1.21/bcf_sr_sort.c diff --git a/src/htslib-1.18/bcf_sr_sort.h b/src/htslib-1.21/bcf_sr_sort.h similarity index 100% rename from src/htslib-1.18/bcf_sr_sort.h rename to src/htslib-1.21/bcf_sr_sort.h diff --git a/src/htslib-1.21/bgzf.c b/src/htslib-1.21/bgzf.c new file mode 100644 index 0000000..8092c7b --- /dev/null +++ b/src/htslib-1.21/bgzf.c @@ -0,0 +1,2602 @@ +/* The MIT License + + Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology + 2011, 2012 Attractive Chaos + Copyright (C) 2009, 2013-2023 Genome Research Ltd + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_LIBDEFLATE +#include +#endif + +#include "htslib/hts.h" +#include "htslib/bgzf.h" +#include "htslib/hfile.h" +#include "htslib/thread_pool.h" +#include "htslib/hts_endian.h" +#include "cram/pooled_alloc.h" +#include "hts_internal.h" + +#ifndef EFTYPE +#define EFTYPE ENOEXEC +#endif + +#define BGZF_CACHE +#define BGZF_MT + +#define BLOCK_HEADER_LENGTH 18 +#define BLOCK_FOOTER_LENGTH 8 + + +/* BGZF/GZIP header (specialized from RFC 1952; little endian): + +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ + | 31|139| 8| 4| 0| 0|255| 6| 66| 67| 2|BLK_LEN| + +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ + BGZF extension: + ^ ^ ^ ^ + | | | | + FLG.EXTRA XLEN B C + + BGZF format is compatible with GZIP. It limits the size of each compressed + block to 2^16 bytes and adds and an extra "BC" field in the gzip header which + records the size. + +*/ +static const uint8_t g_magic[19] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\0\0"; + +#ifdef BGZF_CACHE +typedef struct { + int size; + uint8_t *block; + int64_t end_offset; +} cache_t; + +#include "htslib/khash.h" +KHASH_MAP_INIT_INT64(cache, cache_t) +#endif + +struct bgzf_cache_t { + khash_t(cache) *h; + khint_t last_pos; +}; + +#ifdef BGZF_MT + +typedef struct bgzf_job { + BGZF *fp; + unsigned char comp_data[BGZF_MAX_BLOCK_SIZE]; + size_t comp_len; + unsigned char uncomp_data[BGZF_MAX_BLOCK_SIZE]; + size_t uncomp_len; + int errcode; + int64_t block_address; + int hit_eof; +} bgzf_job; + +enum mtaux_cmd { + NONE = 0, + SEEK, + SEEK_DONE, + HAS_EOF, + HAS_EOF_DONE, + CLOSE, +}; + +// When multi-threaded bgzf_tell won't work, so we delay the hts_idx_push +// until we've written the last block. +typedef struct { + hts_pos_t beg, end; + int tid, is_mapped; // args for hts_idx_push + uint64_t offset, block_number; +} hts_idx_cache_entry; + +typedef struct { + int nentries, mentries; // used and allocated + hts_idx_cache_entry *e; // hts_idx elements +} hts_idx_cache_t; + +typedef struct bgzf_mtaux_t { + // Memory pool for bgzf_job structs, to avoid many malloc/free + pool_alloc_t *job_pool; + bgzf_job *curr_job; + + // Thread pool + int n_threads; + int own_pool; + hts_tpool *pool; + + // Output queue holding completed bgzf_jobs + hts_tpool_process *out_queue; + + // I/O thread. + pthread_t io_task; + pthread_mutex_t job_pool_m; + int jobs_pending; // number of jobs waiting + int flush_pending; + void *free_block; + int hit_eof; // r/w entirely within main thread + + // Message passing to the reader thread; eg seek requests + int errcode; + uint64_t block_address; + int eof; + pthread_mutex_t command_m; // Set whenever fp is being updated + pthread_cond_t command_c; + enum mtaux_cmd command; + + // For multi-threaded on-the-fly indexing. See bgzf_idx_push below. + pthread_mutex_t idx_m; + hts_idx_t *hts_idx; + uint64_t block_number, block_written; + hts_idx_cache_t idx_cache; +} mtaux_t; +#endif + +typedef struct +{ + uint64_t uaddr; // offset w.r.t. uncompressed data + uint64_t caddr; // offset w.r.t. compressed data +} +bgzidx1_t; + +struct bgzidx_t +{ + int noffs, moffs; // the size of the index, n:used, m:allocated + bgzidx1_t *offs; // offsets + uint64_t ublock_addr; // offset of the current block (uncompressed data) +}; + +/* + * Buffers up arguments to hts_idx_push for later use, once we've written all bar + * this block. This is necessary when multiple blocks are in flight (threading) + * and fp->block_address isn't known at the time of call as we have in-flight + * blocks that haven't yet been compressed. + * + * NB: this only matters when we're indexing on the fly (writing). + * Normal indexing is threaded reads, but we already know block sizes + * so it's a simpler process + * + * Returns 0 on success, + * -1 on failure + */ +int bgzf_idx_push(BGZF *fp, hts_idx_t *hidx, int tid, hts_pos_t beg, hts_pos_t end, uint64_t offset, int is_mapped) { + hts_idx_cache_entry *e; + mtaux_t *mt = fp->mt; + + if (!mt) + return hts_idx_push(hidx, tid, beg, end, offset, is_mapped); + + // Early check for out of range positions which would fail in hts_idx_push() + if (hts_idx_check_range(hidx, tid, beg, end) < 0) + return -1; + + pthread_mutex_lock(&mt->idx_m); + + mt->hts_idx = hidx; + hts_idx_cache_t *ic = &mt->idx_cache; + + if (ic->nentries >= ic->mentries) { + int new_sz = ic->mentries ? ic->mentries*2 : 1024; + if (!(e = realloc(ic->e, new_sz * sizeof(*ic->e)))) { + pthread_mutex_unlock(&mt->idx_m); + return -1; + } + ic->e = e; + ic->mentries = new_sz; + } + + e = &ic->e[ic->nentries++]; + e->tid = tid; + e->beg = beg; + e->end = end; + e->is_mapped = is_mapped; + e->offset = offset & 0xffff; + e->block_number = mt->block_number; + + pthread_mutex_unlock(&mt->idx_m); + + return 0; +} + +static int bgzf_idx_flush(BGZF *fp, + size_t block_uncomp_len, size_t block_comp_len) { + mtaux_t *mt = fp->mt; + + if (!mt->idx_cache.e) { + mt->block_written++; + return 0; + } + + pthread_mutex_lock(&mt->idx_m); + + hts_idx_cache_entry *e = mt->idx_cache.e; + int i; + + assert(mt->idx_cache.nentries == 0 || mt->block_written <= e[0].block_number); + + for (i = 0; i < mt->idx_cache.nentries && e[i].block_number == mt->block_written; i++) { + if (block_uncomp_len > 0 && e[i].offset == block_uncomp_len) { + /* + * If the virtual offset is at the end of the current block, + * adjust it to point to the start of the next one. This + * is needed when on-the-fly indexing has recorded a virtual + * offset just before a new block has been started, and makes + * on-the-fly and standard indexing give exactly the same results. + * + * In theory the two virtual offsets are equivalent, but pointing + * to the end of a block is inefficient, and caused problems with + * versions of HTSlib before 1.11 where bgzf_read() would + * incorrectly return EOF. + */ + + // Assert that this is the last entry for the current block_number + assert(i == mt->idx_cache.nentries - 1 + || e[i].block_number < e[i + 1].block_number); + + // Work out where the next block starts. For this entry, the + // offset will be zero. + uint64_t next_block_addr = mt->block_address + block_comp_len; + if (hts_idx_push(mt->hts_idx, e[i].tid, e[i].beg, e[i].end, + next_block_addr << 16, e[i].is_mapped) < 0) { + pthread_mutex_unlock(&mt->idx_m); + return -1; + } + // Count this entry and drop out of the loop + i++; + break; + } + + if (hts_idx_push(mt->hts_idx, e[i].tid, e[i].beg, e[i].end, + (mt->block_address << 16) + e[i].offset, + e[i].is_mapped) < 0) { + pthread_mutex_unlock(&mt->idx_m); + return -1; + } + } + + memmove(&e[0], &e[i], (mt->idx_cache.nentries - i) * sizeof(*e)); + mt->idx_cache.nentries -= i; + mt->block_written++; + + pthread_mutex_unlock(&mt->idx_m); + return 0; +} + +void bgzf_index_destroy(BGZF *fp); +int bgzf_index_add_block(BGZF *fp); +static int mt_destroy(mtaux_t *mt); + +static inline void packInt16(uint8_t *buffer, uint16_t value) +{ + buffer[0] = value; + buffer[1] = value >> 8; +} + +static inline int unpackInt16(const uint8_t *buffer) +{ + return buffer[0] | buffer[1] << 8; +} + +static inline void packInt32(uint8_t *buffer, uint32_t value) +{ + buffer[0] = value; + buffer[1] = value >> 8; + buffer[2] = value >> 16; + buffer[3] = value >> 24; +} + +static void razf_info(hFILE *hfp, const char *filename) +{ + uint64_t usize, csize; + off_t sizes_pos; + + if (filename == NULL || strcmp(filename, "-") == 0) filename = "FILE"; + + // RAZF files end with USIZE,CSIZE stored as big-endian uint64_t + if ((sizes_pos = hseek(hfp, -16, SEEK_END)) < 0) goto no_sizes; + if (hread(hfp, &usize, 8) != 8 || hread(hfp, &csize, 8) != 8) goto no_sizes; + if (!ed_is_big()) ed_swap_8p(&usize), ed_swap_8p(&csize); + if (csize >= sizes_pos) goto no_sizes; // Very basic validity check + + hts_log_error( +"To decompress this file, use the following commands:\n" +" truncate -s %" PRIu64 " %s\n" +" gunzip %s\n" +"The resulting uncompressed file should be %" PRIu64 " bytes in length.\n" +"If you do not have a truncate command, skip that step (though gunzip will\n" +"likely produce a \"trailing garbage ignored\" message, which can be ignored).", + csize, filename, filename, usize); + return; + +no_sizes: + hts_log_error( +"To decompress this file, use the following command:\n" +" gunzip %s\n" +"This will likely produce a \"trailing garbage ignored\" message, which can\n" +"usually be safely ignored.", filename); +} + +static const char *bgzf_zerr(int errnum, z_stream *zs) +{ + static char buffer[32]; + + /* Return zs->msg if available. + zlib doesn't set this very reliably. Looking at the source suggests + that it may get set to a useful message for deflateInit2, inflateInit2 + and inflate when it returns Z_DATA_ERROR. For inflate with other + return codes, deflate, deflateEnd and inflateEnd it doesn't appear + to be useful. For the likely non-useful cases, the caller should + pass NULL into zs. */ + + if (zs && zs->msg) return zs->msg; + + // gzerror OF((gzFile file, int *errnum) + switch (errnum) { + case Z_ERRNO: + return strerror(errno); + case Z_STREAM_ERROR: + return "invalid parameter/compression level, or inconsistent stream state"; + case Z_DATA_ERROR: + return "invalid or incomplete IO"; + case Z_MEM_ERROR: + return "out of memory"; + case Z_BUF_ERROR: + return "progress temporarily not possible, or in() / out() returned an error"; + case Z_VERSION_ERROR: + return "zlib version mismatch"; + case Z_NEED_DICT: + return "data was compressed using a dictionary"; + case Z_OK: // 0: maybe gzgets error Z_NULL + default: + snprintf(buffer, sizeof(buffer), "[%d] unknown", errnum); + return buffer; // FIXME: Not thread-safe. + } +} + +static BGZF *bgzf_read_init(hFILE *hfpr, const char *filename) +{ + BGZF *fp; + uint8_t magic[18]; + ssize_t n = hpeek(hfpr, magic, 18); + if (n < 0) return NULL; + + fp = (BGZF*)calloc(1, sizeof(BGZF)); + if (fp == NULL) return NULL; + + fp->is_write = 0; + fp->uncompressed_block = malloc(2 * BGZF_MAX_BLOCK_SIZE); + if (fp->uncompressed_block == NULL) { free(fp); return NULL; } + fp->compressed_block = (char *)fp->uncompressed_block + BGZF_MAX_BLOCK_SIZE; + fp->is_compressed = (n==18 && magic[0]==0x1f && magic[1]==0x8b); + fp->is_gzip = ( !fp->is_compressed || ((magic[3]&4) && memcmp(&magic[12], "BC\2\0",4)==0) ) ? 0 : 1; + if (fp->is_compressed && (magic[3]&4) && memcmp(&magic[12], "RAZF", 4)==0) { + hts_log_error("Cannot decompress legacy RAZF format"); + razf_info(hfpr, filename); + free(fp->uncompressed_block); + free(fp); + errno = EFTYPE; + return NULL; + } +#ifdef BGZF_CACHE + if (!(fp->cache = malloc(sizeof(*fp->cache)))) { + free(fp->uncompressed_block); + free(fp); + return NULL; + } + if (!(fp->cache->h = kh_init(cache))) { + free(fp->uncompressed_block); + free(fp->cache); + free(fp); + return NULL; + } + fp->cache->last_pos = 0; +#endif + return fp; +} + +// get the compress level from the mode string: compress_level==-1 for the default level, -2 plain uncompressed +static int mode2level(const char *mode) +{ + int i, compress_level = -1; + for (i = 0; mode[i]; ++i) + if (mode[i] >= '0' && mode[i] <= '9') break; + if (mode[i]) compress_level = (int)mode[i] - '0'; + if (strchr(mode, 'u')) compress_level = -2; + return compress_level; +} +static BGZF *bgzf_write_init(const char *mode) +{ + BGZF *fp; + fp = (BGZF*)calloc(1, sizeof(BGZF)); + if (fp == NULL) goto mem_fail; + fp->is_write = 1; + int compress_level = mode2level(mode); + if ( compress_level==-2 ) + { + fp->is_compressed = 0; + return fp; + } + fp->is_compressed = 1; + + fp->uncompressed_block = malloc(2 * BGZF_MAX_BLOCK_SIZE); + if (fp->uncompressed_block == NULL) goto mem_fail; + fp->compressed_block = (char *)fp->uncompressed_block + BGZF_MAX_BLOCK_SIZE; + + fp->compress_level = compress_level < 0? Z_DEFAULT_COMPRESSION : compress_level; // Z_DEFAULT_COMPRESSION==-1 + if (fp->compress_level > 9) fp->compress_level = Z_DEFAULT_COMPRESSION; + if ( strchr(mode,'g') ) + { + // gzip output + fp->is_gzip = 1; + fp->gz_stream = (z_stream*)calloc(1,sizeof(z_stream)); + if (fp->gz_stream == NULL) goto mem_fail; + fp->gz_stream->zalloc = NULL; + fp->gz_stream->zfree = NULL; + fp->gz_stream->msg = NULL; + + int ret = deflateInit2(fp->gz_stream, fp->compress_level, Z_DEFLATED, 15|16, 8, Z_DEFAULT_STRATEGY); + if (ret!=Z_OK) { + hts_log_error("Call to deflateInit2 failed: %s", bgzf_zerr(ret, fp->gz_stream)); + goto fail; + } + } + return fp; + +mem_fail: + hts_log_error("%s", strerror(errno)); + +fail: + if (fp != NULL) { + free(fp->uncompressed_block); + free(fp->gz_stream); + free(fp); + } + return NULL; +} + +BGZF *bgzf_open(const char *path, const char *mode) +{ + BGZF *fp = 0; + if (strchr(mode, 'r')) { + hFILE *fpr; + if ((fpr = hopen(path, mode)) == 0) return 0; + fp = bgzf_read_init(fpr, path); + if (fp == 0) { hclose_abruptly(fpr); return NULL; } + fp->fp = fpr; + } else if (strchr(mode, 'w') || strchr(mode, 'a')) { + hFILE *fpw; + if ((fpw = hopen(path, mode)) == 0) return 0; + fp = bgzf_write_init(mode); + if (fp == NULL) return NULL; + fp->fp = fpw; + } + else { errno = EINVAL; return 0; } + + fp->is_be = ed_is_big(); + return fp; +} + +BGZF *bgzf_dopen(int fd, const char *mode) +{ + BGZF *fp = 0; + if (strchr(mode, 'r')) { + hFILE *fpr; + if ((fpr = hdopen(fd, mode)) == 0) return 0; + fp = bgzf_read_init(fpr, NULL); + if (fp == 0) { hclose_abruptly(fpr); return NULL; } // FIXME this closes fd + fp->fp = fpr; + } else if (strchr(mode, 'w') || strchr(mode, 'a')) { + hFILE *fpw; + if ((fpw = hdopen(fd, mode)) == 0) return 0; + fp = bgzf_write_init(mode); + if (fp == NULL) return NULL; + fp->fp = fpw; + } + else { errno = EINVAL; return 0; } + + fp->is_be = ed_is_big(); + return fp; +} + +BGZF *bgzf_hopen(hFILE *hfp, const char *mode) +{ + BGZF *fp = NULL; + if (strchr(mode, 'r')) { + fp = bgzf_read_init(hfp, NULL); + if (fp == NULL) return NULL; + } else if (strchr(mode, 'w') || strchr(mode, 'a')) { + fp = bgzf_write_init(mode); + if (fp == NULL) return NULL; + } + else { errno = EINVAL; return 0; } + + fp->fp = hfp; + fp->is_be = ed_is_big(); + return fp; +} + +#ifdef HAVE_LIBDEFLATE +int bgzf_compress(void *_dst, size_t *dlen, const void *src, size_t slen, int level) +{ + if (slen == 0) { + // EOF block + if (*dlen < 28) return -1; + memcpy(_dst, "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0", 28); + *dlen = 28; + return 0; + } + + uint8_t *dst = (uint8_t*)_dst; + + if (level == 0) { + // Uncompressed data + if (*dlen < slen+5 + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH) return -1; + dst[BLOCK_HEADER_LENGTH] = 1; // BFINAL=1, BTYPE=00; see RFC1951 + u16_to_le(slen, &dst[BLOCK_HEADER_LENGTH+1]); // length + u16_to_le(~slen, &dst[BLOCK_HEADER_LENGTH+3]); // ones-complement length + memcpy(dst + BLOCK_HEADER_LENGTH+5, src, slen); + *dlen = slen+5 + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH; + + } else { + level = level > 0 ? level : 6; // libdeflate doesn't honour -1 as default + // NB levels go up to 12 here. + int lvl_map[] = {0,1,2,3,5,6,7,8,10,12}; + level = lvl_map[level>9 ?9 :level]; + struct libdeflate_compressor *z = libdeflate_alloc_compressor(level); + if (!z) return -1; + + // Raw deflate + size_t clen = + libdeflate_deflate_compress(z, src, slen, + dst + BLOCK_HEADER_LENGTH, + *dlen - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH); + + if (clen <= 0) { + hts_log_error("Call to libdeflate_deflate_compress failed"); + libdeflate_free_compressor(z); + return -1; + } + + *dlen = clen + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH; + + libdeflate_free_compressor(z); + } + + // write the header + memcpy(dst, g_magic, BLOCK_HEADER_LENGTH); // the last two bytes are a place holder for the length of the block + packInt16(&dst[16], *dlen - 1); // write the compressed length; -1 to fit 2 bytes + + // write the footer + uint32_t crc = libdeflate_crc32(0, src, slen); + packInt32((uint8_t*)&dst[*dlen - 8], crc); + packInt32((uint8_t*)&dst[*dlen - 4], slen); + return 0; +} + +#else + +int bgzf_compress(void *_dst, size_t *dlen, const void *src, size_t slen, int level) +{ + uint32_t crc; + z_stream zs; + uint8_t *dst = (uint8_t*)_dst; + + if (level == 0) { + uncomp: + // Uncompressed data + if (*dlen < slen+5 + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH) return -1; + dst[BLOCK_HEADER_LENGTH] = 1; // BFINAL=1, BTYPE=00; see RFC1951 + u16_to_le(slen, &dst[BLOCK_HEADER_LENGTH+1]); // length + u16_to_le(~slen, &dst[BLOCK_HEADER_LENGTH+3]); // ones-complement length + memcpy(dst + BLOCK_HEADER_LENGTH+5, src, slen); + *dlen = slen+5 + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH; + } else { + // compress the body + zs.zalloc = NULL; zs.zfree = NULL; + zs.msg = NULL; + zs.next_in = (Bytef*)src; + zs.avail_in = slen; + zs.next_out = dst + BLOCK_HEADER_LENGTH; + zs.avail_out = *dlen - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH; + int ret = deflateInit2(&zs, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY); // -15 to disable zlib header/footer + if (ret!=Z_OK) { + hts_log_error("Call to deflateInit2 failed: %s", bgzf_zerr(ret, &zs)); + return -1; + } + if ((ret = deflate(&zs, Z_FINISH)) != Z_STREAM_END) { + if (ret == Z_OK && zs.avail_out == 0) { + deflateEnd(&zs); + goto uncomp; + } else { + hts_log_error("Deflate operation failed: %s", bgzf_zerr(ret, ret == Z_DATA_ERROR ? &zs : NULL)); + } + return -1; + } + // If we used up the entire output buffer, then we either ran out of + // room or we *just* fitted, but either way we may as well store + // uncompressed for faster decode. + if (zs.avail_out == 0) { + deflateEnd(&zs); + goto uncomp; + } + if ((ret = deflateEnd(&zs)) != Z_OK) { + hts_log_error("Call to deflateEnd failed: %s", bgzf_zerr(ret, NULL)); + return -1; + } + *dlen = zs.total_out + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH; + } + + // write the header + memcpy(dst, g_magic, BLOCK_HEADER_LENGTH); // the last two bytes are a place holder for the length of the block + packInt16(&dst[16], *dlen - 1); // write the compressed length; -1 to fit 2 bytes + // write the footer + crc = crc32(crc32(0L, NULL, 0L), (Bytef*)src, slen); + packInt32((uint8_t*)&dst[*dlen - 8], crc); + packInt32((uint8_t*)&dst[*dlen - 4], slen); + return 0; +} +#endif // HAVE_LIBDEFLATE + +static int bgzf_gzip_compress(BGZF *fp, void *_dst, size_t *dlen, const void *src, size_t slen, int level) +{ + uint8_t *dst = (uint8_t*)_dst; + z_stream *zs = fp->gz_stream; + int flush = slen ? Z_PARTIAL_FLUSH : Z_FINISH; + zs->next_in = (Bytef*)src; + zs->avail_in = slen; + zs->next_out = dst; + zs->avail_out = *dlen; + int ret = deflate(zs, flush); + if (ret == Z_STREAM_ERROR) { + hts_log_error("Deflate operation failed: %s", bgzf_zerr(ret, NULL)); + return -1; + } + if (zs->avail_in != 0) { + hts_log_error("Deflate block too large for output buffer"); + return -1; + } + *dlen = *dlen - zs->avail_out; + return 0; +} + +// Deflate the block in fp->uncompressed_block into fp->compressed_block. Also adds an extra field that stores the compressed block length. +static int deflate_block(BGZF *fp, int block_length) +{ + size_t comp_size = BGZF_MAX_BLOCK_SIZE; + int ret; + if ( !fp->is_gzip ) + ret = bgzf_compress(fp->compressed_block, &comp_size, fp->uncompressed_block, block_length, fp->compress_level); + else + ret = bgzf_gzip_compress(fp, fp->compressed_block, &comp_size, fp->uncompressed_block, block_length, fp->compress_level); + + if ( ret != 0 ) + { + hts_log_debug("Compression error %d", ret); + fp->errcode |= BGZF_ERR_ZLIB; + return -1; + } + fp->block_offset = 0; + return comp_size; +} + +#ifdef HAVE_LIBDEFLATE + +static int bgzf_uncompress(uint8_t *dst, size_t *dlen, + const uint8_t *src, size_t slen, + uint32_t expected_crc) { + struct libdeflate_decompressor *z = libdeflate_alloc_decompressor(); + if (!z) { + hts_log_error("Call to libdeflate_alloc_decompressor failed"); + return -1; + } + + int ret = libdeflate_deflate_decompress(z, src, slen, dst, *dlen, dlen); + libdeflate_free_decompressor(z); + + if (ret != LIBDEFLATE_SUCCESS) { + hts_log_error("Inflate operation failed: %d", ret); + return -1; + } + + uint32_t crc = libdeflate_crc32(0, (unsigned char *)dst, *dlen); +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + // Pretend the CRC was OK so the fuzzer doesn't have to get it right + crc = expected_crc; +#endif + if (crc != expected_crc) { + hts_log_error("CRC32 checksum mismatch"); + return -2; + } + + return 0; +} + +#else + +static int bgzf_uncompress(uint8_t *dst, size_t *dlen, + const uint8_t *src, size_t slen, + uint32_t expected_crc) { + z_stream zs = { + .zalloc = NULL, + .zfree = NULL, + .msg = NULL, + .next_in = (Bytef*)src, + .avail_in = slen, + .next_out = (Bytef*)dst, + .avail_out = *dlen + }; + + int ret = inflateInit2(&zs, -15); + if (ret != Z_OK) { + hts_log_error("Call to inflateInit2 failed: %s", bgzf_zerr(ret, &zs)); + return -1; + } + if ((ret = inflate(&zs, Z_FINISH)) != Z_STREAM_END) { + hts_log_error("Inflate operation failed: %s", bgzf_zerr(ret, ret == Z_DATA_ERROR ? &zs : NULL)); + if ((ret = inflateEnd(&zs)) != Z_OK) { + hts_log_warning("Call to inflateEnd failed: %s", bgzf_zerr(ret, NULL)); + } + return -1; + } + if ((ret = inflateEnd(&zs)) != Z_OK) { + hts_log_error("Call to inflateEnd failed: %s", bgzf_zerr(ret, NULL)); + return -1; + } + *dlen = *dlen - zs.avail_out; + + uint32_t crc = crc32(crc32(0L, NULL, 0L), (unsigned char *)dst, *dlen); +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + // Pretend the CRC was OK so the fuzzer doesn't have to get it right + crc = expected_crc; +#endif + if (crc != expected_crc) { + hts_log_error("CRC32 checksum mismatch"); + return -2; + } + + return 0; +} +#endif // HAVE_LIBDEFLATE + +// Inflate the block in fp->compressed_block into fp->uncompressed_block +static int inflate_block(BGZF* fp, int block_length) +{ + size_t dlen = BGZF_MAX_BLOCK_SIZE; + uint32_t crc = le_to_u32((uint8_t *)fp->compressed_block + block_length-8); + int ret = bgzf_uncompress(fp->uncompressed_block, &dlen, + (Bytef*)fp->compressed_block + 18, + block_length - 18, crc); + if (ret < 0) { + if (ret == -2) + fp->errcode |= BGZF_ERR_CRC; + else + fp->errcode |= BGZF_ERR_ZLIB; + return -1; + } + + return dlen; +} + +// Decompress the next part of a non-blocked GZIP file. +// Return the number of uncompressed bytes read, 0 on EOF, or a negative number on error. +// Will fill the output buffer unless the end of the GZIP file is reached. +static int inflate_gzip_block(BGZF *fp) +{ + // we will set this to true when we detect EOF, so we don't bang against the EOF more than once per call + int input_eof = 0; + + // write to the part of the output buffer after block_offset + fp->gz_stream->next_out = (Bytef*)fp->uncompressed_block + fp->block_offset; + fp->gz_stream->avail_out = BGZF_MAX_BLOCK_SIZE - fp->block_offset; + + while ( fp->gz_stream->avail_out != 0 ) { + // until we fill the output buffer (or hit EOF) + + if ( !input_eof && fp->gz_stream->avail_in == 0 ) { + // we are out of input data in the buffer. Get more. + fp->gz_stream->next_in = fp->compressed_block; + int ret = hread(fp->fp, fp->compressed_block, BGZF_BLOCK_SIZE); + if ( ret < 0 ) { + // hread had an error. Pass it on. + return ret; + } + fp->gz_stream->avail_in = ret; + if ( fp->gz_stream->avail_in < BGZF_BLOCK_SIZE ) { + // we have reached EOF but the decompressor hasn't necessarily + input_eof = 1; + } + } + + fp->gz_stream->msg = NULL; + // decompress as much data as we can + int ret = inflate(fp->gz_stream, Z_SYNC_FLUSH); + + if ( (ret < 0 && ret != Z_BUF_ERROR) || ret == Z_NEED_DICT ) { + // an error occurred, other than running out of space + hts_log_error("Inflate operation failed: %s", bgzf_zerr(ret, ret == Z_DATA_ERROR ? fp->gz_stream : NULL)); + fp->errcode |= BGZF_ERR_ZLIB; + return -1; + } else if ( ret == Z_STREAM_END ) { + // we finished a GZIP member + + // scratch for peeking to see if the file is over + char c; + if (fp->gz_stream->avail_in > 0 || hpeek(fp->fp, &c, 1) == 1) { + // there is more data; try and read another GZIP member in the remaining data + int reset_ret = inflateReset(fp->gz_stream); + if (reset_ret != Z_OK) { + hts_log_error("Call to inflateReset failed: %s", bgzf_zerr(reset_ret, NULL)); + fp->errcode |= BGZF_ERR_ZLIB; + return -1; + } + } else { + // we consumed all the input data and hit Z_STREAM_END + // so stop looping, even if we never fill the output buffer + break; + } + } else if ( ret == Z_BUF_ERROR && input_eof && fp->gz_stream->avail_out > 0 ) { + // the gzip file has ended prematurely + hts_log_error("Gzip file truncated"); + fp->errcode |= BGZF_ERR_IO; + return -1; + } + } + + // when we get here, the buffer is full or there is an EOF after a complete gzip member + return BGZF_MAX_BLOCK_SIZE - fp->gz_stream->avail_out; +} + +// Returns: 0 on success (BGZF header); -1 on non-BGZF GZIP header; -2 on error +static int check_header(const uint8_t *header) +{ + if ( header[0] != 31 || header[1] != 139 || header[2] != 8 ) return -2; + return ((header[3] & 4) != 0 + && unpackInt16((uint8_t*)&header[10]) == 6 + && header[12] == 'B' && header[13] == 'C' + && unpackInt16((uint8_t*)&header[14]) == 2) ? 0 : -1; +} + +#ifdef BGZF_CACHE +static void free_cache(BGZF *fp) +{ + khint_t k; + if (fp->is_write) return; + khash_t(cache) *h = fp->cache->h; + for (k = kh_begin(h); k < kh_end(h); ++k) + if (kh_exist(h, k)) free(kh_val(h, k).block); + kh_destroy(cache, h); + free(fp->cache); +} + +static int load_block_from_cache(BGZF *fp, int64_t block_address) +{ + khint_t k; + cache_t *p; + + khash_t(cache) *h = fp->cache->h; + k = kh_get(cache, h, block_address); + if (k == kh_end(h)) return 0; + p = &kh_val(h, k); + if (fp->block_length != 0) fp->block_offset = 0; + fp->block_address = block_address; + fp->block_length = p->size; + memcpy(fp->uncompressed_block, p->block, p->size); + if ( hseek(fp->fp, p->end_offset, SEEK_SET) < 0 ) + { + // todo: move the error up + hts_log_error("Could not hseek to %" PRId64, p->end_offset); + exit(1); + } + return p->size; +} + +static void cache_block(BGZF *fp, int size) +{ + int ret; + khint_t k, k_orig; + uint8_t *block = NULL; + cache_t *p; + //fprintf(stderr, "Cache block at %llx\n", (int)fp->block_address); + khash_t(cache) *h = fp->cache->h; + if (BGZF_MAX_BLOCK_SIZE >= fp->cache_size) return; + if (fp->block_length < 0 || fp->block_length > BGZF_MAX_BLOCK_SIZE) return; + if ((kh_size(h) + 1) * BGZF_MAX_BLOCK_SIZE > (uint32_t)fp->cache_size) { + /* Remove uniformly from any position in the hash by a simple + * round-robin approach. An alternative strategy would be to + * remove the least recently accessed block, but the round-robin + * removal is simpler and is not expected to have a big impact + * on performance */ + if (fp->cache->last_pos >= kh_end(h)) fp->cache->last_pos = kh_begin(h); + k_orig = k = fp->cache->last_pos; + if (++k >= kh_end(h)) k = kh_begin(h); + while (k != k_orig) { + if (kh_exist(h, k)) + break; + if (++k == kh_end(h)) + k = kh_begin(h); + } + fp->cache->last_pos = k; + + if (k != k_orig) { + block = kh_val(h, k).block; + kh_del(cache, h, k); + } + } else { + block = (uint8_t*)malloc(BGZF_MAX_BLOCK_SIZE); + } + if (!block) return; + k = kh_put(cache, h, fp->block_address, &ret); + if (ret <= 0) { // kh_put failed, or in there already (shouldn't happen) + free(block); + return; + } + p = &kh_val(h, k); + p->size = fp->block_length; + p->end_offset = fp->block_address + size; + p->block = block; + memcpy(p->block, fp->uncompressed_block, p->size); +} +#else +static void free_cache(BGZF *fp) {} +static int load_block_from_cache(BGZF *fp, int64_t block_address) {return 0;} +static void cache_block(BGZF *fp, int size) {} +#endif + +/* + * Absolute htell in this compressed file. + * + * Do not confuse with the external bgzf_tell macro which returns the virtual + * offset. + */ +static off_t bgzf_htell(BGZF *fp) { + if (fp->mt) { + pthread_mutex_lock(&fp->mt->job_pool_m); + off_t pos = fp->block_address + fp->block_clength; + pthread_mutex_unlock(&fp->mt->job_pool_m); + return pos; + } else { + return htell(fp->fp); + } +} + +int bgzf_read_block(BGZF *fp) +{ + hts_tpool_result *r; + + if (fp->errcode) return -1; + + if (fp->mt) { + again: + if (fp->mt->hit_eof) { + // Further reading at EOF will always return 0 + fp->block_length = 0; + return 0; + } + r = hts_tpool_next_result_wait(fp->mt->out_queue); + bgzf_job *j = r ? (bgzf_job *)hts_tpool_result_data(r) : NULL; + + if (!j || j->errcode == BGZF_ERR_MT) { + if (!fp->mt->free_block) { + fp->uncompressed_block = malloc(2 * BGZF_MAX_BLOCK_SIZE); + if (fp->uncompressed_block == NULL) return -1; + fp->compressed_block = (char *)fp->uncompressed_block + BGZF_MAX_BLOCK_SIZE; + } // else it's already allocated with malloc, maybe even in-use. + if (mt_destroy(fp->mt) < 0) { + fp->errcode = BGZF_ERR_IO; + } + fp->mt = NULL; + hts_tpool_delete_result(r, 0); + if (fp->errcode) { + return -1; + } + goto single_threaded; + } + + if (j->errcode) { + fp->errcode = j->errcode; + hts_log_error("BGZF decode jobs returned error %d " + "for block offset %"PRId64, + j->errcode, j->block_address); + hts_tpool_delete_result(r, 0); + return -1; + } + + if (j->hit_eof) { + if (!fp->last_block_eof && !fp->no_eof_block) { + fp->no_eof_block = 1; + hts_log_warning("EOF marker is absent. The input may be truncated"); + } + fp->mt->hit_eof = 1; + } + + // Zero length blocks in the middle of a file are (wrongly) + // considered as EOF by many callers. We work around this by + // trying again to see if we hit a genuine EOF. + if (!j->hit_eof && j->uncomp_len == 0) { + fp->last_block_eof = 1; + hts_tpool_delete_result(r, 0); + goto again; + } + + // block_length=0 and block_offset set by bgzf_seek. + if (fp->block_length != 0) fp->block_offset = 0; + if (!j->hit_eof) fp->block_address = j->block_address; + fp->block_clength = j->comp_len; + fp->block_length = j->uncomp_len; + // bgzf_read() can change fp->block_length + fp->last_block_eof = (fp->block_length == 0); + + if ( j->uncomp_len && j->fp->idx_build_otf ) + { + bgzf_index_add_block(j->fp); + j->fp->idx->ublock_addr += j->uncomp_len; + } + + // Steal the data block as it's quicker than a memcpy. + // We just need to make sure we delay the pool free. + if (fp->mt->curr_job) { + pthread_mutex_lock(&fp->mt->job_pool_m); + pool_free(fp->mt->job_pool, fp->mt->curr_job); + pthread_mutex_unlock(&fp->mt->job_pool_m); + } + fp->uncompressed_block = j->uncomp_data; + fp->mt->curr_job = j; + if (fp->mt->free_block) { + free(fp->mt->free_block); // clear up last non-mt block + fp->mt->free_block = NULL; + } + + hts_tpool_delete_result(r, 0); + return 0; + } + + uint8_t header[BLOCK_HEADER_LENGTH], *compressed_block; + int count, size, block_length, remaining; + + single_threaded: + size = 0; + + int64_t block_address; + block_address = bgzf_htell(fp); + + // Reading an uncompressed file + if ( !fp->is_compressed ) + { + count = hread(fp->fp, fp->uncompressed_block, BGZF_MAX_BLOCK_SIZE); + if (count < 0) // Error + { + hts_log_error("Failed to read uncompressed data " + "at offset %"PRId64"%s%s", + block_address, errno ? ": " : "", strerror(errno)); + fp->errcode |= BGZF_ERR_IO; + return -1; + } + else if (count == 0) // EOF + { + fp->block_length = 0; + return 0; + } + if (fp->block_length != 0) fp->block_offset = 0; + fp->block_address = block_address; + fp->block_length = count; + return 0; + } + + // Reading compressed file + if ( fp->is_gzip && fp->gz_stream ) // is this is an initialized gzip stream? + { + count = inflate_gzip_block(fp); + if ( count<0 ) + { + hts_log_error("Reading GZIP stream failed at offset %"PRId64, + block_address); + fp->errcode |= BGZF_ERR_ZLIB; + return -1; + } + fp->block_length = count; + fp->block_address = block_address; + return 0; + } + if (fp->cache_size && load_block_from_cache(fp, block_address)) return 0; + + // loop to skip empty bgzf blocks + while (1) + { + count = hread(fp->fp, header, sizeof(header)); + if (count == 0) { // no data read + if (!fp->last_block_eof && !fp->no_eof_block && !fp->is_gzip) { + fp->no_eof_block = 1; + hts_log_warning("EOF marker is absent. The input may be truncated"); + } + fp->block_length = 0; + return 0; + } + int ret = 0; + if ( count != sizeof(header) || (ret=check_header(header))==-2 ) + { + fp->errcode |= BGZF_ERR_HEADER; + hts_log_error("%s BGZF header at offset %"PRId64, + ret ? "Invalid" : "Failed to read", + block_address); + return -1; + } + if ( ret==-1 ) + { + // GZIP, not BGZF + uint8_t *cblock = (uint8_t*)fp->compressed_block; + memcpy(cblock, header, sizeof(header)); + count = hread(fp->fp, cblock+sizeof(header), BGZF_BLOCK_SIZE - sizeof(header)) + sizeof(header); + + fp->is_gzip = 1; + fp->gz_stream = (z_stream*) calloc(1,sizeof(z_stream)); + // Set up zlib, using a window size of 15, and its built-in GZIP header processing (+16). + int ret = inflateInit2(fp->gz_stream, 15 + 16); + if (ret != Z_OK) + { + hts_log_error("Call to inflateInit2 failed: %s", bgzf_zerr(ret, fp->gz_stream)); + fp->errcode |= BGZF_ERR_ZLIB; + return -1; + } + fp->gz_stream->avail_in = count; + fp->gz_stream->next_in = cblock; + count = inflate_gzip_block(fp); + if ( count<0 ) + { + hts_log_error("Reading GZIP stream failed at offset %"PRId64, + block_address); + fp->errcode |= BGZF_ERR_ZLIB; + return -1; + } + fp->block_length = count; + fp->block_address = block_address; + if ( fp->idx_build_otf ) return -1; // cannot build index for gzip + return 0; + } + size = count; + block_length = unpackInt16((uint8_t*)&header[16]) + 1; // +1 because when writing this number, we used "-1" + if (block_length < BLOCK_HEADER_LENGTH) + { + hts_log_error("Invalid BGZF block length at offset %"PRId64, + block_address); + fp->errcode |= BGZF_ERR_HEADER; + return -1; + } + compressed_block = (uint8_t*)fp->compressed_block; + memcpy(compressed_block, header, BLOCK_HEADER_LENGTH); + remaining = block_length - BLOCK_HEADER_LENGTH; + count = hread(fp->fp, &compressed_block[BLOCK_HEADER_LENGTH], remaining); + if (count != remaining) { + hts_log_error("Failed to read BGZF block data at offset %"PRId64 + " expected %d bytes; hread returned %d", + block_address, remaining, count); + fp->errcode |= BGZF_ERR_IO; + return -1; + } + size += count; + if ((count = inflate_block(fp, block_length)) < 0) { + hts_log_debug("Inflate block operation failed for " + "block at offset %"PRId64": %s", + block_address, bgzf_zerr(count, NULL)); + fp->errcode |= BGZF_ERR_ZLIB; + return -1; + } + fp->last_block_eof = (count == 0); + if ( count ) break; // otherwise an empty bgzf block + block_address = bgzf_htell(fp); // update for new block start + } + if (fp->block_length != 0) fp->block_offset = 0; // Do not reset offset if this read follows a seek. + fp->block_address = block_address; + fp->block_length = count; + if ( fp->idx_build_otf ) + { + bgzf_index_add_block(fp); + fp->idx->ublock_addr += count; + } + cache_block(fp, size); + return 0; +} + +ssize_t bgzf_read(BGZF *fp, void *data, size_t length) +{ + ssize_t bytes_read = 0; + uint8_t *output = (uint8_t*)data; + if (length <= 0) return 0; + assert(fp->is_write == 0); + while (bytes_read < length) { + int copy_length, available = fp->block_length - fp->block_offset; + uint8_t *buffer; + if (available <= 0) { + int ret = bgzf_read_block(fp); + if (ret != 0) { + hts_log_error("Read block operation failed with error %d after %zd of %zu bytes", fp->errcode, bytes_read, length); + fp->errcode |= BGZF_ERR_ZLIB; + return -1; + } + available = fp->block_length - fp->block_offset; + if (available == 0) { + if (fp->block_length == 0) + break; // EOF + + // Offset was at end of block (see commit e9863a0) + fp->block_address = bgzf_htell(fp); + fp->block_offset = fp->block_length = 0; + continue; + } else if (available < 0) { + // Block offset was set to an invalid coordinate + hts_log_error("BGZF block offset %d set beyond block size %d", + fp->block_offset, fp->block_length); + fp->errcode |= BGZF_ERR_MISUSE; + return -1; + } + } + copy_length = length - bytes_read < available? length - bytes_read : available; + buffer = (uint8_t*)fp->uncompressed_block; + memcpy(output, buffer + fp->block_offset, copy_length); + fp->block_offset += copy_length; + output += copy_length; + bytes_read += copy_length; + + // For raw gzip streams this avoids short reads. + if (fp->block_offset == fp->block_length) { + fp->block_address = bgzf_htell(fp); + fp->block_offset = fp->block_length = 0; + } + } + + fp->uncompressed_address += bytes_read; + + return bytes_read; +} + +// -1 for EOF, -2 for error, 0-255 for byte. +int bgzf_peek(BGZF *fp) { + int available = fp->block_length - fp->block_offset; + if (available <= 0) { + if (bgzf_read_block(fp) < 0) { + hts_log_error("Read block operation failed with error %d", fp->errcode); + fp->errcode = BGZF_ERR_ZLIB; + return -2; + } + } + available = fp->block_length - fp->block_offset; + if (available) + return ((unsigned char *)fp->uncompressed_block)[fp->block_offset]; + + return -1; +} + +ssize_t bgzf_raw_read(BGZF *fp, void *data, size_t length) +{ + ssize_t ret = hread(fp->fp, data, length); + if (ret < 0) fp->errcode |= BGZF_ERR_IO; + return ret; +} + +#ifdef BGZF_MT + +/* Function to clean up when jobs are discarded (e.g. during seek) + * This works for results too, as results are the same struct with + * decompressed data stored in it. */ +static void job_cleanup(void *arg) { + bgzf_job *j = (bgzf_job *)arg; + mtaux_t *mt = j->fp->mt; + pthread_mutex_lock(&mt->job_pool_m); + pool_free(mt->job_pool, j); + pthread_mutex_unlock(&mt->job_pool_m); +} + +static void *bgzf_encode_func(void *arg) { + bgzf_job *j = (bgzf_job *)arg; + + j->comp_len = BGZF_MAX_BLOCK_SIZE; + int ret = bgzf_compress(j->comp_data, &j->comp_len, + j->uncomp_data, j->uncomp_len, + j->fp->compress_level); + if (ret != 0) + j->errcode |= BGZF_ERR_ZLIB; + + return arg; +} + +// Optimisation for compression level 0 (uncompressed deflate blocks) +// Avoids memcpy of the data from uncompressed to compressed buffer. +static void *bgzf_encode_level0_func(void *arg) { + bgzf_job *j = (bgzf_job *)arg; + uint32_t crc; + j->comp_len = j->uncomp_len + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH + 5; + + // Data will have already been copied in to + // j->comp_data + BLOCK_HEADER_LENGTH + 5 + + // Add preamble + memcpy(j->comp_data, g_magic, BLOCK_HEADER_LENGTH); + u16_to_le(j->comp_len-1, j->comp_data + 16); + + // Deflate uncompressed data header + j->comp_data[BLOCK_HEADER_LENGTH] = 1; // BFINAL=1, BTYPE=00; see RFC1951 + u16_to_le(j->uncomp_len, j->comp_data + BLOCK_HEADER_LENGTH + 1); + u16_to_le(~j->uncomp_len, j->comp_data + BLOCK_HEADER_LENGTH + 3); + + // Trailer (CRC, uncompressed length) +#ifdef HAVE_LIBDEFLATE + crc = libdeflate_crc32(0, j->comp_data + BLOCK_HEADER_LENGTH + 5, + j->uncomp_len); +#else + crc = crc32(crc32(0L, NULL, 0L), + (Bytef*)j->comp_data + BLOCK_HEADER_LENGTH + 5, j->uncomp_len); +#endif + u32_to_le(crc, j->comp_data + j->comp_len - 8); + u32_to_le(j->uncomp_len, j->comp_data + j->comp_len - 4); + + return arg; +} + +// Our input block has already been decoded by bgzf_mt_read_block(). +// We need to split that into a fetch block (compressed) and make this +// do the actual decompression step. +static void *bgzf_decode_func(void *arg) { + bgzf_job *j = (bgzf_job *)arg; + + j->uncomp_len = BGZF_MAX_BLOCK_SIZE; + uint32_t crc = le_to_u32((uint8_t *)j->comp_data + j->comp_len-8); + int ret = bgzf_uncompress(j->uncomp_data, &j->uncomp_len, + j->comp_data+18, j->comp_len-18, crc); + if (ret != 0) + j->errcode |= BGZF_ERR_ZLIB; + + return arg; +} + +/* + * Nul function so we can dispatch a job with the correct serial + * to mark failure or to indicate an empty read (EOF). + */ +static void *bgzf_nul_func(void *arg) { return arg; } + +/* + * Takes compressed blocks off the results queue and calls hwrite to + * punt them to the output stream. + * + * Returns NULL when no more are left, or -1 on error + */ +static void *bgzf_mt_writer(void *vp) { + BGZF *fp = (BGZF *)vp; + mtaux_t *mt = fp->mt; + hts_tpool_result *r; + + if (fp->idx_build_otf) { + fp->idx->moffs = fp->idx->noffs = 1; + fp->idx->offs = (bgzidx1_t*) calloc(fp->idx->moffs, sizeof(bgzidx1_t)); + if (!fp->idx->offs) goto err; + } + + // Iterates until result queue is shutdown, where it returns NULL. + while ((r = hts_tpool_next_result_wait(mt->out_queue))) { + bgzf_job *j = (bgzf_job *)hts_tpool_result_data(r); + assert(j); + + if (fp->idx_build_otf) { + fp->idx->noffs++; + if ( fp->idx->noffs > fp->idx->moffs ) + { + fp->idx->moffs = fp->idx->noffs; + kroundup32(fp->idx->moffs); + fp->idx->offs = (bgzidx1_t*) realloc(fp->idx->offs, fp->idx->moffs*sizeof(bgzidx1_t)); + if ( !fp->idx->offs ) goto err; + } + fp->idx->offs[ fp->idx->noffs-1 ].uaddr = fp->idx->offs[ fp->idx->noffs-2 ].uaddr + j->uncomp_len; + fp->idx->offs[ fp->idx->noffs-1 ].caddr = fp->idx->offs[ fp->idx->noffs-2 ].caddr + j->comp_len; + } + + // Flush any cached hts_idx_push calls + if (bgzf_idx_flush(fp, j->uncomp_len, j->comp_len) < 0) + goto err; + + if (hwrite(fp->fp, j->comp_data, j->comp_len) != j->comp_len) + goto err; + + // Update our local block_address. Cannot be fp->block_address due to no + // locking in bgzf_tell. + pthread_mutex_lock(&mt->idx_m); + mt->block_address += j->comp_len; + pthread_mutex_unlock(&mt->idx_m); + + /* + * Periodically call hflush (which calls fsync when on a file). + * This avoids the fsync being done at the bgzf_close stage, + * which can sometimes cause significant delays. As this is in + * a separate thread, spreading the sync delays throughout the + * program execution seems better. + * Frequency of 1/512 has been chosen by experimentation + * across local XFS, NFS and Lustre tests. + */ + if (++mt->flush_pending % 512 == 0) + if (hflush(fp->fp) != 0) + goto err; + + + hts_tpool_delete_result(r, 0); + + // Also updated by main thread + pthread_mutex_lock(&mt->job_pool_m); + pool_free(mt->job_pool, j); + mt->jobs_pending--; + pthread_mutex_unlock(&mt->job_pool_m); + } + + if (hflush(fp->fp) != 0) + goto err; + + hts_tpool_process_destroy(mt->out_queue); + + return NULL; + + err: + hts_tpool_process_destroy(mt->out_queue); + return (void *)-1; +} + + +/* + * Reads a compressed block of data using hread and dispatches it to + * the thread pool for decompression. This is the analogue of the old + * non-threaded bgzf_read_block() function, but without modifying fp + * in any way (except for the read offset). All output goes via the + * supplied bgzf_job struct. + * + * Returns NULL when no more are left, or -1 on error + */ +int bgzf_mt_read_block(BGZF *fp, bgzf_job *j) +{ + uint8_t header[BLOCK_HEADER_LENGTH], *compressed_block; + int count, block_length, remaining; + + // NOTE: Guaranteed to be compressed as we block multi-threading in + // uncompressed mode. However it may be gzip compression instead + // of bgzf. + + // Reading compressed file + int64_t block_address; + block_address = htell(fp->fp); + + j->block_address = block_address; // in case we exit with j->errcode + + if (fp->cache_size && load_block_from_cache(fp, block_address)) return 0; + count = hpeek(fp->fp, header, sizeof(header)); + if (count == 0) // no data read + return -1; + int ret; + if ( count != sizeof(header) || (ret=check_header(header))==-2 ) + { + j->errcode |= BGZF_ERR_HEADER; + return -1; + } + if (ret == -1) { + j->errcode |= BGZF_ERR_MT; + return -1; + } + + count = hread(fp->fp, header, sizeof(header)); + if (count != sizeof(header)) // no data read + return -1; + + block_length = unpackInt16((uint8_t*)&header[16]) + 1; // +1 because when writing this number, we used "-1" + if (block_length < BLOCK_HEADER_LENGTH) { + j->errcode |= BGZF_ERR_HEADER; + return -1; + } + compressed_block = (uint8_t*)j->comp_data; + memcpy(compressed_block, header, BLOCK_HEADER_LENGTH); + remaining = block_length - BLOCK_HEADER_LENGTH; + count = hread(fp->fp, &compressed_block[BLOCK_HEADER_LENGTH], remaining); + if (count != remaining) { + j->errcode |= BGZF_ERR_IO; + return -1; + } + j->comp_len = block_length; + j->uncomp_len = BGZF_MAX_BLOCK_SIZE; + j->block_address = block_address; + j->fp = fp; + j->errcode = 0; + + return 0; +} + + +static int bgzf_check_EOF_common(BGZF *fp) +{ + uint8_t buf[28]; + off_t offset = htell(fp->fp); + if (hseek(fp->fp, -28, SEEK_END) < 0) { + if (errno == ESPIPE) { hclearerr(fp->fp); return 2; } +#ifdef _WIN32 + if (errno == EINVAL) { hclearerr(fp->fp); return 2; } +#else + // Assume that EINVAL was due to the file being less than 28 bytes + // long, rather than being a random error return from an hfile backend. + // This should be reported as "no EOF block" rather than an error. + if (errno == EINVAL) { hclearerr(fp->fp); return 0; } +#endif + return -1; + } + if ( hread(fp->fp, buf, 28) != 28 ) return -1; + if ( hseek(fp->fp, offset, SEEK_SET) < 0 ) return -1; + return (memcmp("\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0", buf, 28) == 0)? 1 : 0; +} + +/* + * Checks EOF from the reader thread. + */ +static void bgzf_mt_eof(BGZF *fp) { + mtaux_t *mt = fp->mt; + + pthread_mutex_lock(&mt->job_pool_m); + mt->eof = bgzf_check_EOF_common(fp); + pthread_mutex_unlock(&mt->job_pool_m); + mt->command = HAS_EOF_DONE; + pthread_cond_signal(&mt->command_c); +} + + +/* + * Performs the seek (called by reader thread). + * + * This simply drains the entire queue, throwing away blocks, seeks, + * and starts it up again. Brute force, but maybe sufficient. + */ +static void bgzf_mt_seek(BGZF *fp) { + mtaux_t *mt = fp->mt; + + hts_tpool_process_reset(mt->out_queue, 0); + pthread_mutex_lock(&mt->job_pool_m); + mt->errcode = 0; + + if (hseek(fp->fp, mt->block_address, SEEK_SET) < 0) + mt->errcode = BGZF_ERR_IO; + + pthread_mutex_unlock(&mt->job_pool_m); + mt->command = SEEK_DONE; + pthread_cond_signal(&mt->command_c); +} + +static void *bgzf_mt_reader(void *vp) { + BGZF *fp = (BGZF *)vp; + mtaux_t *mt = fp->mt; + +restart: + pthread_mutex_lock(&mt->job_pool_m); + bgzf_job *j = pool_alloc(mt->job_pool); + pthread_mutex_unlock(&mt->job_pool_m); + if (!j) goto err; + j->errcode = 0; + j->comp_len = 0; + j->uncomp_len = 0; + j->hit_eof = 0; + j->fp = fp; + + while (bgzf_mt_read_block(fp, j) == 0) { + // Dispatch + if (hts_tpool_dispatch3(mt->pool, mt->out_queue, bgzf_decode_func, j, + job_cleanup, job_cleanup, 0) < 0) { + job_cleanup(j); + goto err; + } + + // Check for command + pthread_mutex_lock(&mt->command_m); + switch (mt->command) { + case SEEK: + bgzf_mt_seek(fp); // Sets mt->command to SEEK_DONE + pthread_mutex_unlock(&mt->command_m); + goto restart; + + case HAS_EOF: + bgzf_mt_eof(fp); // Sets mt->command to HAS_EOF_DONE + break; + + case SEEK_DONE: + case HAS_EOF_DONE: + pthread_cond_signal(&mt->command_c); + break; + + case CLOSE: + pthread_cond_signal(&mt->command_c); + pthread_mutex_unlock(&mt->command_m); + hts_tpool_process_destroy(mt->out_queue); + return NULL; + + default: + break; + } + pthread_mutex_unlock(&mt->command_m); + + // Allocate buffer for next block + pthread_mutex_lock(&mt->job_pool_m); + j = pool_alloc(mt->job_pool); + pthread_mutex_unlock(&mt->job_pool_m); + if (!j) { + hts_tpool_process_destroy(mt->out_queue); + return NULL; + } + j->errcode = 0; + j->comp_len = 0; + j->uncomp_len = 0; + j->hit_eof = 0; + j->fp = fp; + } + + if (j->errcode == BGZF_ERR_MT) { + // Attempt to multi-thread decode a raw gzip stream cannot be done. + // We tear down the multi-threaded decoder and revert to the old code. + if (hts_tpool_dispatch3(mt->pool, mt->out_queue, bgzf_nul_func, j, + job_cleanup, job_cleanup, 0) < 0) { + job_cleanup(j); + hts_tpool_process_destroy(mt->out_queue); + return NULL; + } + hts_tpool_process_ref_decr(mt->out_queue); + return &j->errcode; + } + + // Dispatch an empty block so EOF is spotted. + // We also use this mechanism for returning errors, in which case + // j->errcode is set already. + + j->hit_eof = 1; + if (hts_tpool_dispatch3(mt->pool, mt->out_queue, bgzf_nul_func, j, + job_cleanup, job_cleanup, 0) < 0) { + job_cleanup(j); + hts_tpool_process_destroy(mt->out_queue); + return NULL; + } + if (j->errcode != 0) { + hts_tpool_process_destroy(mt->out_queue); + return &j->errcode; + } + + // We hit EOF so can stop reading, but we may get a subsequent + // seek request. In this case we need to restart the reader. + // + // To handle this we wait on a condition variable and then + // monitor the command. (This could be either seek or close.) + for (;;) { + pthread_mutex_lock(&mt->command_m); + if (mt->command == NONE) + pthread_cond_wait(&mt->command_c, &mt->command_m); + switch(mt->command) { + default: + pthread_mutex_unlock(&mt->command_m); + break; + + case SEEK: + bgzf_mt_seek(fp); + pthread_mutex_unlock(&mt->command_m); + goto restart; + + case HAS_EOF: + bgzf_mt_eof(fp); // Sets mt->command to HAS_EOF_DONE + pthread_mutex_unlock(&mt->command_m); + break; + + case SEEK_DONE: + case HAS_EOF_DONE: + pthread_cond_signal(&mt->command_c); + pthread_mutex_unlock(&mt->command_m); + break; + + case CLOSE: + pthread_cond_signal(&mt->command_c); + pthread_mutex_unlock(&mt->command_m); + hts_tpool_process_destroy(mt->out_queue); + return NULL; + } + } + + err: + pthread_mutex_lock(&mt->command_m); + mt->command = CLOSE; + pthread_cond_signal(&mt->command_c); + pthread_mutex_unlock(&mt->command_m); + hts_tpool_process_destroy(mt->out_queue); + return NULL; +} + +int bgzf_thread_pool(BGZF *fp, hts_tpool *pool, int qsize) { + // No gain from multi-threading when not compressed + if (!fp->is_compressed) + return 0; + + mtaux_t *mt; + mt = (mtaux_t*)calloc(1, sizeof(mtaux_t)); + if (!mt) return -1; + fp->mt = mt; + + mt->pool = pool; + mt->n_threads = hts_tpool_size(pool); + if (!qsize) + qsize = mt->n_threads*2; + if (!(mt->out_queue = hts_tpool_process_init(mt->pool, qsize, 0))) + goto err; + hts_tpool_process_ref_incr(mt->out_queue); + + mt->job_pool = pool_create(sizeof(bgzf_job)); + if (!mt->job_pool) + goto err; + + pthread_mutex_init(&mt->job_pool_m, NULL); + pthread_mutex_init(&mt->command_m, NULL); + pthread_mutex_init(&mt->idx_m, NULL); + pthread_cond_init(&mt->command_c, NULL); + mt->flush_pending = 0; + mt->jobs_pending = 0; + mt->free_block = fp->uncompressed_block; // currently in-use block + mt->block_address = fp->block_address; + pthread_create(&mt->io_task, NULL, + fp->is_write ? bgzf_mt_writer : bgzf_mt_reader, fp); + + return 0; + + err: + free(mt); + fp->mt = NULL; + return -1; +} + +int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks) +{ + // No gain from multi-threading when not compressed + if (!fp->is_compressed || fp->is_gzip) + return 0; + + if (n_threads < 1) return -1; + hts_tpool *p = hts_tpool_init(n_threads); + if (!p) + return -1; + + if (bgzf_thread_pool(fp, p, 0) != 0) { + hts_tpool_destroy(p); + return -1; + } + + fp->mt->own_pool = 1; + + return 0; +} + +static int mt_destroy(mtaux_t *mt) +{ + int ret = 0; + + // Tell the reader to shut down + pthread_mutex_lock(&mt->command_m); + mt->command = CLOSE; + pthread_cond_signal(&mt->command_c); + hts_tpool_wake_dispatch(mt->out_queue); // unstick the reader + pthread_mutex_unlock(&mt->command_m); + + // Check for thread worker failure, indicated by is_shutdown returning 2 + // It's possible really late errors might be missed, but we can live with + // that. + ret = -(hts_tpool_process_is_shutdown(mt->out_queue) > 1); + // Destroying the queue first forces the writer to exit. + // mt->out_queue is reference counted, so destroy gets called in both + // this and the IO threads. The last to do it will clean up. + hts_tpool_process_destroy(mt->out_queue); + + // IO thread will now exit. Wait for it and perform final clean-up. + // If it returned non-NULL, it was not happy. + void *retval = NULL; + pthread_join(mt->io_task, &retval); + ret = retval != NULL ? -1 : ret; + + pthread_mutex_destroy(&mt->job_pool_m); + pthread_mutex_destroy(&mt->command_m); + pthread_mutex_destroy(&mt->idx_m); + pthread_cond_destroy(&mt->command_c); + if (mt->curr_job) + pool_free(mt->job_pool, mt->curr_job); + + if (mt->own_pool) + hts_tpool_destroy(mt->pool); + + pool_destroy(mt->job_pool); + + if (mt->idx_cache.e) + free(mt->idx_cache.e); + + free(mt); + fflush(stderr); + + return ret; +} + +static int mt_queue(BGZF *fp) +{ + mtaux_t *mt = fp->mt; + + mt->block_number++; + + // Also updated by writer thread + pthread_mutex_lock(&mt->job_pool_m); + bgzf_job *j = pool_alloc(mt->job_pool); + if (j) mt->jobs_pending++; + pthread_mutex_unlock(&mt->job_pool_m); + if (!j) return -1; + + j->fp = fp; + j->errcode = 0; + j->uncomp_len = fp->block_offset; + if (fp->compress_level == 0) { + memcpy(j->comp_data + BLOCK_HEADER_LENGTH + 5, fp->uncompressed_block, + j->uncomp_len); + if (hts_tpool_dispatch3(mt->pool, mt->out_queue, + bgzf_encode_level0_func, j, + job_cleanup, job_cleanup, 0) < 0) { + goto fail; + } + } else { + memcpy(j->uncomp_data, fp->uncompressed_block, j->uncomp_len); + + // Need non-block vers & job_pending? + if (hts_tpool_dispatch3(mt->pool, mt->out_queue, bgzf_encode_func, j, + job_cleanup, job_cleanup, 0) < 0) { + goto fail; + } + } + + fp->block_offset = 0; + return 0; + + fail: + job_cleanup(j); + pthread_mutex_lock(&mt->job_pool_m); + mt->jobs_pending--; + pthread_mutex_unlock(&mt->job_pool_m); + return -1; +} + +static int mt_flush_queue(BGZF *fp) +{ + mtaux_t *mt = fp->mt; + + // Drain the encoder jobs. + // We cannot use hts_tpool_flush here as it can cause deadlock if + // the queue is full up of decoder tasks. The best solution would + // be to have one input queue per type of job, but we don't right now. + //hts_tpool_flush(mt->pool); + pthread_mutex_lock(&mt->job_pool_m); + int shutdown = 0; + while (mt->jobs_pending != 0) { + if ((shutdown = hts_tpool_process_is_shutdown(mt->out_queue))) + break; + pthread_mutex_unlock(&mt->job_pool_m); + usleep(10000); // FIXME: replace by condition variable + pthread_mutex_lock(&mt->job_pool_m); + } + pthread_mutex_unlock(&mt->job_pool_m); + + if (shutdown) + return -1; + + // Wait on bgzf_mt_writer to drain the queue + if (hts_tpool_process_flush(mt->out_queue) != 0) + return -1; + + return (fp->errcode == 0)? 0 : -1; +} + +static int lazy_flush(BGZF *fp) +{ + if (fp->mt) + return fp->block_offset ? mt_queue(fp) : 0; + else + return bgzf_flush(fp); +} + +#else // ~ #ifdef BGZF_MT + +int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks) +{ + return 0; +} + +static inline int lazy_flush(BGZF *fp) +{ + return bgzf_flush(fp); +} + +#endif // ~ #ifdef BGZF_MT + +int bgzf_flush(BGZF *fp) +{ + if (!fp->is_write) return 0; +#ifdef BGZF_MT + if (fp->mt) { + int ret = 0; + if (fp->block_offset) ret = mt_queue(fp); + if (!ret) ret = mt_flush_queue(fp); + + // We maintain mt->block_address when threading as the + // main code can call bgzf_tell without any locks. + // (The result from tell are wrong, but we only care about the last + // 16-bits worth except for the final flush process. + pthread_mutex_lock(&fp->mt->idx_m); + fp->block_address = fp->mt->block_address; + pthread_mutex_unlock(&fp->mt->idx_m); + + return ret; + } +#endif + + if (!fp->is_compressed) { + return hflush(fp->fp); + } + + while (fp->block_offset > 0) { + int block_length; + if ( fp->idx_build_otf ) + { + bgzf_index_add_block(fp); + fp->idx->ublock_addr += fp->block_offset; + } + block_length = deflate_block(fp, fp->block_offset); + if (block_length < 0) { + hts_log_debug("Deflate block operation failed: %s", bgzf_zerr(block_length, NULL)); + return -1; + } + if (hwrite(fp->fp, fp->compressed_block, block_length) != block_length) { + hts_log_error("File write failed (wrong size)"); + fp->errcode |= BGZF_ERR_IO; // possibly truncated file + return -1; + } + fp->block_address += block_length; + } + return 0; +} + +int bgzf_flush_try(BGZF *fp, ssize_t size) +{ + if (fp->block_offset + size > BGZF_BLOCK_SIZE) return lazy_flush(fp); + return 0; +} + +ssize_t bgzf_write(BGZF *fp, const void *data, size_t length) +{ + if ( !fp->is_compressed ) { + size_t push = length + (size_t) fp->block_offset; + fp->block_offset = push % BGZF_MAX_BLOCK_SIZE; + fp->block_address += (push - fp->block_offset); + return hwrite(fp->fp, data, length); + } + + const uint8_t *input = (const uint8_t*)data; + ssize_t remaining = length; + assert(fp->is_write); + while (remaining > 0) { + uint8_t* buffer = (uint8_t*)fp->uncompressed_block; + int copy_length = BGZF_BLOCK_SIZE - fp->block_offset; + if (copy_length > remaining) copy_length = remaining; + memcpy(buffer + fp->block_offset, input, copy_length); + fp->block_offset += copy_length; + input += copy_length; + remaining -= copy_length; + if (fp->block_offset == BGZF_BLOCK_SIZE) { + if (lazy_flush(fp) != 0) return -1; + } + } + return length - remaining; +} + +ssize_t bgzf_block_write(BGZF *fp, const void *data, size_t length) +{ + if ( !fp->is_compressed ) { + size_t push = length + (size_t) fp->block_offset; + fp->block_offset = push % BGZF_MAX_BLOCK_SIZE; + fp->block_address += (push - fp->block_offset); + return hwrite(fp->fp, data, length); + } + + const uint8_t *input = (const uint8_t*)data; + ssize_t remaining = length; + assert(fp->is_write); + uint64_t current_block; //keep track of current block + uint64_t ublock_size; // amount of uncompressed data to be fed into next block + while (remaining > 0) { + current_block = fp->idx->moffs - fp->idx->noffs; + ublock_size = current_block + 1 < fp->idx->moffs ? fp->idx->offs[current_block+1].uaddr-fp->idx->offs[current_block].uaddr : BGZF_MAX_BLOCK_SIZE; + uint8_t* buffer = (uint8_t*)fp->uncompressed_block; + int copy_length = ublock_size - fp->block_offset; + if (copy_length > remaining) copy_length = remaining; + memcpy(buffer + fp->block_offset, input, copy_length); + fp->block_offset += copy_length; + input += copy_length; + remaining -= copy_length; + if (fp->block_offset == ublock_size) { + if (lazy_flush(fp) != 0) return -1; + if (fp->idx->noffs > 0) + fp->idx->noffs--; // decrement noffs to track the blocks + } + } + return length - remaining; +} + + +ssize_t bgzf_raw_write(BGZF *fp, const void *data, size_t length) +{ + ssize_t ret = hwrite(fp->fp, data, length); + if (ret < 0) fp->errcode |= BGZF_ERR_IO; + return ret; +} + +// Helper function for tidying up fp->mt and setting errcode +static void bgzf_close_mt(BGZF *fp) { + if (fp->mt) { + if (!fp->mt->free_block) + fp->uncompressed_block = NULL; + if (mt_destroy(fp->mt) < 0) + fp->errcode = BGZF_ERR_IO; + } +} + +int bgzf_close(BGZF* fp) +{ + int ret, block_length; + if (fp == 0) return -1; + if (fp->is_write && fp->is_compressed) { + if (bgzf_flush(fp) != 0) { + bgzf_close_mt(fp); + return -1; + } + fp->compress_level = -1; + block_length = deflate_block(fp, 0); // write an empty block + if (block_length < 0) { + hts_log_debug("Deflate block operation failed: %s", bgzf_zerr(block_length, NULL)); + bgzf_close_mt(fp); + return -1; + } + if (hwrite(fp->fp, fp->compressed_block, block_length) < 0 + || hflush(fp->fp) != 0) { + hts_log_error("File write failed"); + fp->errcode |= BGZF_ERR_IO; + return -1; + } + } + + bgzf_close_mt(fp); + + if ( fp->is_gzip ) + { + if (fp->gz_stream == NULL) ret = Z_OK; + else if (!fp->is_write) ret = inflateEnd(fp->gz_stream); + else ret = deflateEnd(fp->gz_stream); + if (ret != Z_OK) { + hts_log_error("Call to inflateEnd/deflateEnd failed: %s", bgzf_zerr(ret, NULL)); + } + free(fp->gz_stream); + } + ret = hclose(fp->fp); + if (ret != 0) return -1; + bgzf_index_destroy(fp); + free(fp->uncompressed_block); + free_cache(fp); + ret = fp->errcode ? -1 : 0; + free(fp); + return ret; +} + +void bgzf_set_cache_size(BGZF *fp, int cache_size) +{ + if (fp && fp->mt) return; // Not appropriate when multi-threading + if (fp && fp->cache) fp->cache_size = cache_size; +} + +int bgzf_check_EOF(BGZF *fp) { + int has_eof; + + if (fp->mt) { + pthread_mutex_lock(&fp->mt->command_m); + // fp->mt->command state transitions should be: + // NONE -> HAS_EOF -> HAS_EOF_DONE -> NONE + // (HAS_EOF -> HAS_EOF_DONE happens in bgzf_mt_reader thread) + if (fp->mt->command != CLOSE) + fp->mt->command = HAS_EOF; + pthread_cond_signal(&fp->mt->command_c); + hts_tpool_wake_dispatch(fp->mt->out_queue); + do { + if (fp->mt->command == CLOSE) { + // possible error in bgzf_mt_reader + pthread_mutex_unlock(&fp->mt->command_m); + return 0; + } + pthread_cond_wait(&fp->mt->command_c, &fp->mt->command_m); + switch (fp->mt->command) { + case HAS_EOF_DONE: break; + case HAS_EOF: + // Resend signal intended for bgzf_mt_reader() + pthread_cond_signal(&fp->mt->command_c); + break; + case CLOSE: + continue; + default: + abort(); // Should not get to any other state + } + } while (fp->mt->command != HAS_EOF_DONE); + fp->mt->command = NONE; + has_eof = fp->mt->eof; + pthread_mutex_unlock(&fp->mt->command_m); + } else { + has_eof = bgzf_check_EOF_common(fp); + } + + fp->no_eof_block = (has_eof == 0); + + return has_eof; +} + +static inline int64_t bgzf_seek_common(BGZF* fp, + int64_t block_address, int block_offset) +{ + if (fp->mt) { + // The reader runs asynchronous and does loops of: + // Read block + // Check & process command + // Dispatch decode job + // + // Once at EOF it then switches to loops of + // Wait for command + // Process command (possibly switching back to above loop). + // + // To seek we therefore send the reader thread a SEEK command, + // waking it up if blocked in dispatch and signalling if + // waiting for a command. We then wait for the response so we + // know the seek succeeded. + pthread_mutex_lock(&fp->mt->command_m); + fp->mt->hit_eof = 0; + // fp->mt->command state transitions should be: + // NONE -> SEEK -> SEEK_DONE -> NONE + // (SEEK -> SEEK_DONE happens in bgzf_mt_reader thread) + fp->mt->command = SEEK; + fp->mt->block_address = block_address; + pthread_cond_signal(&fp->mt->command_c); + hts_tpool_wake_dispatch(fp->mt->out_queue); + do { + pthread_cond_wait(&fp->mt->command_c, &fp->mt->command_m); + switch (fp->mt->command) { + case SEEK_DONE: break; + case SEEK: + // Resend signal intended for bgzf_mt_reader() + pthread_cond_signal(&fp->mt->command_c); + break; + default: + abort(); // Should not get to any other state + } + } while (fp->mt->command != SEEK_DONE); + fp->mt->command = NONE; + + fp->block_length = 0; // indicates current block has not been loaded + fp->block_address = block_address; + fp->block_offset = block_offset; + + pthread_mutex_unlock(&fp->mt->command_m); + } else { + if (hseek(fp->fp, block_address, SEEK_SET) < 0) { + fp->errcode |= BGZF_ERR_IO; + return -1; + } + fp->block_length = 0; // indicates current block has not been loaded + fp->block_address = block_address; + fp->block_offset = block_offset; + } + + return 0; +} + +int64_t bgzf_seek(BGZF* fp, int64_t pos, int where) +{ + if (fp->is_write || where != SEEK_SET || fp->is_gzip) { + fp->errcode |= BGZF_ERR_MISUSE; + return -1; + } + + // This is a flag to indicate we've jumped elsewhere in the stream, to act + // as a hint to any other code which is wrapping up bgzf for its own + // purposes. We may not be able to tell when seek happens as it can be + // done on our behalf, eg by the iterator. + // + // This is never cleared here. Any tool that needs to handle it is also + // responsible for clearing it. + fp->seeked = pos; + + return bgzf_seek_common(fp, pos >> 16, pos & 0xFFFF); +} + +int bgzf_is_bgzf(const char *fn) +{ + uint8_t buf[16]; + int n; + hFILE *fp; + if ((fp = hopen(fn, "r")) == 0) return 0; + n = hread(fp, buf, 16); + if (hclose(fp) < 0) return 0; + if (n != 16) return 0; + return check_header(buf) == 0? 1 : 0; +} + +int bgzf_compression(BGZF *fp) +{ + return (!fp->is_compressed)? no_compression : (fp->is_gzip)? gzip : bgzf; +} + +int bgzf_getc(BGZF *fp) +{ + if (fp->block_offset+1 < fp->block_length) { + fp->uncompressed_address++; + return ((unsigned char*)fp->uncompressed_block)[fp->block_offset++]; + } + + int c; + if (fp->block_offset >= fp->block_length) { + if (bgzf_read_block(fp) != 0) return -2; /* error */ + if (fp->block_length == 0) return -1; /* end-of-file */ + } + c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++]; + if (fp->block_offset == fp->block_length) { + fp->block_address = bgzf_htell(fp); + fp->block_offset = 0; + fp->block_length = 0; + } + fp->uncompressed_address++; + return c; +} + +int bgzf_getline(BGZF *fp, int delim, kstring_t *str) +{ + int l, state = 0; + str->l = 0; + do { + if (fp->block_offset >= fp->block_length) { + if (bgzf_read_block(fp) != 0) { state = -2; break; } + if (fp->block_length == 0) { state = -1; break; } + } + unsigned char *buf = fp->uncompressed_block; + + // Equivalent to a naive byte by byte search from + // buf + block_offset to buf + block_length. + void *e = memchr(&buf[fp->block_offset], delim, + fp->block_length - fp->block_offset); + l = e ? (unsigned char *)e - buf : fp->block_length; + + if (l < fp->block_length) state = 1; + l -= fp->block_offset; + if (ks_expand(str, l + 2) < 0) { state = -3; break; } + memcpy(str->s + str->l, buf + fp->block_offset, l); + str->l += l; + fp->block_offset += l + 1; + if (fp->block_offset >= fp->block_length) { + fp->block_address = bgzf_htell(fp); + fp->block_offset = 0; + fp->block_length = 0; + } + } while (state == 0); + if (state < -1) return state; + if (str->l == 0 && state < 0) return state; + fp->uncompressed_address += str->l + 1; + if ( delim=='\n' && str->l>0 && str->s[str->l-1]=='\r' ) str->l--; + str->s[str->l] = 0; + return str->l <= INT_MAX ? (int) str->l : INT_MAX; +} + +void bgzf_index_destroy(BGZF *fp) +{ + if ( !fp->idx ) return; + free(fp->idx->offs); + free(fp->idx); + fp->idx = NULL; + fp->idx_build_otf = 0; +} + +int bgzf_index_build_init(BGZF *fp) +{ + bgzf_index_destroy(fp); + fp->idx = (bgzidx_t*) calloc(1,sizeof(bgzidx_t)); + if ( !fp->idx ) return -1; + fp->idx_build_otf = 1; // build index on the fly + return 0; +} + +int bgzf_index_add_block(BGZF *fp) +{ + fp->idx->noffs++; + if ( fp->idx->noffs > fp->idx->moffs ) + { + fp->idx->moffs = fp->idx->noffs; + kroundup32(fp->idx->moffs); + fp->idx->offs = (bgzidx1_t*) realloc(fp->idx->offs, fp->idx->moffs*sizeof(bgzidx1_t)); + if ( !fp->idx->offs ) return -1; + } + fp->idx->offs[ fp->idx->noffs-1 ].uaddr = fp->idx->ublock_addr; + fp->idx->offs[ fp->idx->noffs-1 ].caddr = fp->block_address; + return 0; +} + +static inline int hwrite_uint64(uint64_t x, hFILE *f) +{ + if (ed_is_big()) x = ed_swap_8(x); + if (hwrite(f, &x, sizeof(x)) != sizeof(x)) return -1; + return 0; +} + +static char * get_name_suffix(const char *bname, const char *suffix) +{ + size_t len = strlen(bname) + strlen(suffix) + 1; + char *buff = malloc(len); + if (!buff) return NULL; + snprintf(buff, len, "%s%s", bname, suffix); + return buff; +} + +int bgzf_index_dump_hfile(BGZF *fp, struct hFILE *idx, const char *name) +{ + // Note that the index contains one extra record when indexing files opened + // for reading. The terminating record is not present when opened for writing. + // This is not a bug. + + int i; + + if (!fp->idx) { + hts_log_error("Called for BGZF handle with no index"); + errno = EINVAL; + return -1; + } + + if (bgzf_flush(fp) != 0) return -1; + + // discard the entry marking the end of the file + if (fp->mt && fp->idx) + fp->idx->noffs--; + + if (hwrite_uint64(fp->idx->noffs - 1, idx) < 0) goto fail; + for (i=1; iidx->noffs; i++) + { + if (hwrite_uint64(fp->idx->offs[i].caddr, idx) < 0) goto fail; + if (hwrite_uint64(fp->idx->offs[i].uaddr, idx) < 0) goto fail; + } + return 0; + + fail: + hts_log_error("Error writing to %s : %s", name ? name : "index", strerror(errno)); + return -1; +} + +int bgzf_index_dump(BGZF *fp, const char *bname, const char *suffix) +{ + const char *name = bname, *msg = NULL; + char *tmp = NULL; + hFILE *idx = NULL; + + if (!fp->idx) { + hts_log_error("Called for BGZF handle with no index"); + errno = EINVAL; + return -1; + } + + if ( suffix ) + { + tmp = get_name_suffix(bname, suffix); + if ( !tmp ) return -1; + name = tmp; + } + + idx = hopen(name, "wb"); + if ( !idx ) { + msg = "Error opening"; + goto fail; + } + + if (bgzf_index_dump_hfile(fp, idx, name) != 0) goto fail; + + if (hclose(idx) < 0) + { + idx = NULL; + msg = "Error on closing"; + goto fail; + } + + free(tmp); + return 0; + + fail: + if (msg != NULL) { + hts_log_error("%s %s : %s", msg, name, strerror(errno)); + } + if (idx) hclose_abruptly(idx); + free(tmp); + return -1; +} + +static inline int hread_uint64(uint64_t *xptr, hFILE *f) +{ + if (hread(f, xptr, sizeof(*xptr)) != sizeof(*xptr)) return -1; + if (ed_is_big()) ed_swap_8p(xptr); + return 0; +} + +int bgzf_index_load_hfile(BGZF *fp, struct hFILE *idx, const char *name) +{ + fp->idx = (bgzidx_t*) calloc(1,sizeof(bgzidx_t)); + if (fp->idx == NULL) goto fail; + uint64_t x; + if (hread_uint64(&x, idx) < 0) goto fail; + + fp->idx->noffs = fp->idx->moffs = x + 1; + fp->idx->offs = (bgzidx1_t*) malloc(fp->idx->moffs*sizeof(bgzidx1_t)); + if (fp->idx->offs == NULL) goto fail; + fp->idx->offs[0].caddr = fp->idx->offs[0].uaddr = 0; + + int i; + for (i=1; iidx->noffs; i++) + { + if (hread_uint64(&fp->idx->offs[i].caddr, idx) < 0) goto fail; + if (hread_uint64(&fp->idx->offs[i].uaddr, idx) < 0) goto fail; + } + + return 0; + + fail: + hts_log_error("Error reading %s : %s", name ? name : "index", strerror(errno)); + if (fp->idx) { + free(fp->idx->offs); + free(fp->idx); + fp->idx = NULL; + } + return -1; +} + +int bgzf_index_load(BGZF *fp, const char *bname, const char *suffix) +{ + const char *name = bname, *msg = NULL; + char *tmp = NULL; + hFILE *idx = NULL; + if ( suffix ) + { + tmp = get_name_suffix(bname, suffix); + if ( !tmp ) return -1; + name = tmp; + } + + idx = hopen(name, "rb"); + if ( !idx ) { + msg = "Error opening"; + goto fail; + } + + if (bgzf_index_load_hfile(fp, idx, name) != 0) goto fail; + + if (hclose(idx) != 0) { + idx = NULL; + msg = "Error closing"; + goto fail; + } + + free(tmp); + return 0; + + fail: + if (msg != NULL) { + hts_log_error("%s %s : %s", msg, name, strerror(errno)); + } + if (idx) hclose_abruptly(idx); + free(tmp); + return -1; +} + +int bgzf_useek(BGZF *fp, off_t uoffset, int where) +{ + if (fp->is_write || where != SEEK_SET || fp->is_gzip) { + fp->errcode |= BGZF_ERR_MISUSE; + return -1; + } + if (uoffset >= fp->uncompressed_address - fp->block_offset && + uoffset < fp->uncompressed_address + fp->block_length - fp->block_offset) { + // Can seek into existing data + fp->block_offset += uoffset - fp->uncompressed_address; + fp->uncompressed_address = uoffset; + return 0; + } + if ( !fp->is_compressed ) + { + if (hseek(fp->fp, uoffset, SEEK_SET) < 0) + { + fp->errcode |= BGZF_ERR_IO; + return -1; + } + fp->block_length = 0; // indicates current block has not been loaded + fp->block_address = uoffset; + fp->block_offset = 0; + if (bgzf_read_block(fp) < 0) { + fp->errcode |= BGZF_ERR_IO; + return -1; + } + fp->uncompressed_address = uoffset; + return 0; + } + + if ( !fp->idx ) + { + fp->errcode |= BGZF_ERR_IO; + return -1; + } + + // binary search + int ilo = 0, ihi = fp->idx->noffs - 1; + while ( ilo<=ihi ) + { + int i = (ilo+ihi)*0.5; + if ( uoffset < fp->idx->offs[i].uaddr ) ihi = i - 1; + else if ( uoffset >= fp->idx->offs[i].uaddr ) ilo = i + 1; + else break; + } + int i = ilo-1; + off_t offset = 0; + if (bgzf_seek_common(fp, fp->idx->offs[i].caddr, 0) < 0) + return -1; + + if ( bgzf_read_block(fp) < 0 ) { + fp->errcode |= BGZF_ERR_IO; + return -1; + } + offset = uoffset - fp->idx->offs[i].uaddr; + if ( offset > 0 ) + { + if (offset > fp->block_length) { + fp->errcode |= BGZF_ERR_IO; + return -1; //offset outside the available data + } + fp->block_offset = offset; + assert( fp->block_offset <= fp->block_length ); // todo: skipped, unindexed, blocks + } + fp->uncompressed_address = uoffset; + return 0; +} + +off_t bgzf_utell(BGZF *fp) +{ + return fp->uncompressed_address; // currently maintained only when reading +} + +/* prototype is in hfile_internal.h */ +struct hFILE *bgzf_hfile(struct BGZF *fp) { + return fp->fp; +} diff --git a/src/htslib-1.21/bgzip.1 b/src/htslib-1.21/bgzip.1 new file mode 100644 index 0000000..1e115d0 --- /dev/null +++ b/src/htslib-1.21/bgzip.1 @@ -0,0 +1,206 @@ +.TH bgzip 1 "12 September 2024" "htslib-1.21" "Bioinformatics tools" +.SH NAME +.PP +bgzip \- Block compression/decompression utility +.\" +.\" Copyright (C) 2009-2011 Broad Institute. +.\" Copyright (C) 2018, 2021-2024 Genome Research Limited. +.\" +.\" Author: Heng Li +.\" +.\" Permission is hereby granted, free of charge, to any person obtaining a +.\" copy of this software and associated documentation files (the "Software"), +.\" to deal in the Software without restriction, including without limitation +.\" the rights to use, copy, modify, merge, publish, distribute, sublicense, +.\" and/or sell copies of the Software, and to permit persons to whom the +.\" Software is furnished to do so, subject to the following conditions: +.\" +.\" The above copyright notice and this permission notice shall be included in +.\" all copies or substantial portions of the Software. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +.\" IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +.\" THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +.\" LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +.\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +.\" DEALINGS IN THE SOFTWARE. +.\" +. +.\" For code blocks and examples (cf groff's Ultrix-specific man macros) +.de EX + +. in +\\$1 +. nf +. ft CR +.. +.de EE +. ft +. fi +. in + +.. +.SH SYNOPSIS +.PP +.B bgzip +.RB [ -cdfhikrt ] +.RB [ -b +.IR virtualOffset ] +.RB [ -I +.IR index_name ] +.RB [ -l +.IR compression_level ] +.RB [ -o +.IR outfile ] +.RB [ -s +.IR size ] +.RB [ -@ +.IR threads ] +.RI [ file " ...]" +.PP +.SH DESCRIPTION +.PP +Bgzip compresses files in a similar manner to, and compatible with, gzip(1). +The file is compressed into a series of small (less than 64K) 'BGZF' blocks. +This allows indexes to be built against the compressed file and used to +retrieve portions of the data without having to decompress the entire file. + +If no files are specified on the command line, bgzip will compress (or +decompress if the -d option is used) standard input to standard output. +If a file is specified, it will be compressed (or decompressed with -d). +If the -c option is used, the result will be written to standard output, +otherwise when compressing bgzip will write to a new file with a .gz +suffix and remove the original. When decompressing the input file must +have a .gz suffix, which will be removed to make the output name. Again +after decompression completes the input file will be removed. When multiple +files are given as input, the operation is performed on all of them. Access +and modification time of input file from filesystem is set to output file. +Note, access time may get updated by system when it deems appropriate. + +.SH OPTIONS +.TP 10 +.B "--binary" +Bgzip will attempt to ensure BGZF blocks end on a newline when the +input is a text file. The exception to this is where a single line is +larger than a BGZF block (64Kb). This can aid tools that use the +index to perform random access on the compressed stream, as the start +of a block is likely to also be the start of a text record. + +This option processes text files as if they were binary content, +ignoring the location of newlines. This also restores the behaviour +for text files to bgzip version 1.15 and earlier. +.TP +.BI "-b, --offset " INT +Decompress to standard output from virtual file position (0-based uncompressed +offset). +Implies -c and -d. +.TP +.B "-c, --stdout" +Write to standard output, keep original files unchanged. +.TP +.B "-d, --decompress" +Decompress. +.TP +.B "-f, --force" +Overwrite files without asking, or decompress files that don't have a known +compression filename extension (e.g., \fI.gz\fR) without asking. +Use \fB--force\fR twice to do both without asking. +.TP +.B "-g, --rebgzip" +Try to use an existing index to create a compressed file with matching +block offsets. The index must be specified using the \fB-I +\fIfile.gzi\fR option. +Note that this assumes that the same compression library and level are in use +as when making the original file. +Don't use it unless you know what you're doing. +.TP +.B "-h, --help" +Displays a help message. +.TP +.B "-i, --index" +Create a BGZF index while compressing. +Unless the -I option is used, this will have the name of the compressed +file with .gzi appended to it. +.TP +.BI "-I, --index-name " FILE +Index file name. +.TP +.B "-k, --keep" +Do not delete input file during operation. +.TP +.BI "-l, --compress-level " INT +Compression level to use when compressing. +From 0 to 9, or -1 for the default level set by the compression library. [-1] +.TP +.BI "-o, --output " FILE +Write to a file, keep original files unchanged, will overwrite an existing +file. +.TP +.B "-r, --reindex" +Rebuild the index on an existing compressed file. +.TP +.BI "-s, --size " INT +Decompress INT bytes (uncompressed size) to standard output. +Implies -c. +.TP +.B "-t, --test" +Test the integrity of the compressed file. +.TP +.BI "-@, --threads " INT +Number of threads to use [1]. +.PP + +.SH BGZF FORMAT +The BGZF format written by bgzip is described in the SAM format specification +available from http://samtools.github.io/hts-specs/SAMv1.pdf. + +It makes use of a gzip feature which allows compressed files to be +concatenated. +The input data is divided into blocks which are no larger than 64 kilobytes +both before and after compression (including compression headers). +Each block is compressed into a gzip file. +The gzip header includes an extra sub-field with identifier 'BC' and the length +of the compressed block, including all headers. + +.SH GZI FORMAT +The index format is a binary file listing pairs of compressed and +uncompressed offsets in a BGZF file. +Each compressed offset points to the start of a BGZF block. +The uncompressed offset is the corresponding location in the uncompressed +data stream. + +All values are stored as little-endian 64-bit unsigned integers. + +The file contents are: +.EX 4 +uint64_t number_entries +.EE +followed by number_entries pairs of: +.EX 4 +uint64_t compressed_offset +uint64_t uncompressed_offset +.EE + +.SH EXAMPLES +.EX 4 +# Compress stdin to stdout +bgzip < /usr/share/dict/words > /tmp/words.gz + +# Make a .gzi index +bgzip -r /tmp/words.gz + +# Extract part of the data using the index +bgzip -b 367635 -s 4 /tmp/words.gz + +# Uncompress the whole file, removing the compressed copy +bgzip -d /tmp/words.gz +.EE + +.SH AUTHOR +.PP +The BGZF library was originally implemented by Bob Handsaker and modified +by Heng Li for remote file access and in-memory caching. + +.SH SEE ALSO +.IR gzip (1), +.IR tabix (1) diff --git a/src/htslib-1.21/bgzip.c b/src/htslib-1.21/bgzip.c new file mode 100644 index 0000000..687b29d --- /dev/null +++ b/src/htslib-1.21/bgzip.c @@ -0,0 +1,771 @@ +/* bgzip.c -- Block compression/decompression utility. + + Copyright (C) 2008, 2009 Broad Institute / Massachusetts Institute of Technology + Copyright (C) 2010, 2013-2019, 2021-2024 Genome Research Ltd. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notices and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "htslib/bgzf.h" +#include "htslib/hts.h" +#include "htslib/hfile.h" + +#ifdef _WIN32 +# define WIN32_LEAN_AND_MEAN +# include +# include +#endif + +static const int WINDOW_SIZE = BGZF_BLOCK_SIZE; + +static void HTS_FORMAT(HTS_PRINTF_FMT, 1, 2) error(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + +static int ask_yn(void) +{ + char line[1024]; + if (fgets(line, sizeof line, stdin) == NULL) + return 0; + return line[0] == 'Y' || line[0] == 'y'; +} + +static int confirm_overwrite(const char *fn) +{ + int save_errno = errno; + int ret = 0; + + if (isatty(STDIN_FILENO)) { + fprintf(stderr, "[bgzip] %s already exists; do you wish to overwrite (y or n)? ", fn); + if (ask_yn()) ret = 1; + } + + errno = save_errno; + return ret; +} + +static int known_extension(const char *ext) +{ + static const char *known[] = { + "gz", "bgz", "bgzf", + NULL + }; + + const char **p; + for (p = known; *p; p++) + if (strcasecmp(ext, *p) == 0) return 1; + return 0; +} + +static int confirm_filename(int *is_forced, const char *name, const char *ext) +{ + if (*is_forced) { + (*is_forced)--; + return 1; + } + + if (!isatty(STDIN_FILENO)) + return 0; + + fprintf(stderr, "[bgzip] .%s is not a known extension; do you wish to decompress to %s (y or n)? ", ext, name); + return ask_yn(); +} + +/* getfilespec - get file status data + path - file path for which status to be retrieved + status - pointer to status structure in which the data to be stored + returns 0 on success and -1 on failure +*/ +static int getfilespec(const char *path, struct stat *status) +{ + if (!path || !status) { //invalid + return -1; + } + if (!strcmp(path, "-")) { //cant get / set for stdin/out, return success + return 0; + } + if (stat(path, status) < 0) { + return -1; + } + return 0; +} + +/* setfilespec - set file status data + path - file path for which status to be set + status - pointer to status structure in which the data is present + returns 0 on success and -1 on failure + sets only the time as of now. +*/ +static int setfilespec(const char *path, const struct stat *status) +{ + if (!path || !status) { //invalid + return -1; + } + if (!strcmp(path, "-")) { //cant get / set for stdin/out, return success + return 0; + } + +#ifdef _WIN32 + struct _utimbuf tval; + //time upto sec - access & modification time + tval.actime = status->st_atime; + tval.modtime = status->st_mtime; + if (_utime(path, &tval) < 0) { + fprintf(stderr, "[bgzip] Failed to set file specifications.\n"); + return -1; + } +#else + struct timeval tval[2]; + memset(&tval[0], 0, sizeof(tval)); + //time upto sec - access time + tval[0].tv_sec = status->st_atime; + //time upto sec - modification time + tval[1].tv_sec = status->st_mtime; + if (utimes(path, &tval[0]) < 0) { + fprintf(stderr, "[bgzip] Failed to set file specifications.\n"); + return -1; + } +#endif //_WIN32 + return 0; +} + + +static int check_name_and_extension(char *name, int *forced) { + size_t pos; + char *ext; + + for (pos = strlen(name); pos > 0; --pos) + if (name[pos] == '.' || name[pos] == '/') break; + + if (pos == 0 || name[pos] != '.') { + fprintf(stderr, "[bgzip] can't find an extension in %s -- please rename\n", name); + return 1; + } + + name[pos] = '\0'; + ext = &name[pos+1]; + + if (!(known_extension(ext) || confirm_filename(forced, name, ext))) { + fprintf(stderr, "[bgzip] unknown extension .%s -- declining to decompress to %s\n", ext, name); + return 2; //explicit N, continue and return 2 + } + + return 0; +} + + +static int bgzip_main_usage(FILE *fp, int status) +{ + fprintf(fp, "\n"); + fprintf(fp, "Version: %s\n", hts_version()); + fprintf(fp, "Usage: bgzip [OPTIONS] [FILE] ...\n"); + fprintf(fp, "Options:\n"); + fprintf(fp, " -b, --offset INT decompress at virtual file pointer (0-based uncompressed offset)\n"); + fprintf(fp, " -c, --stdout write on standard output, keep original files unchanged\n"); + fprintf(fp, " -d, --decompress decompress\n"); + fprintf(fp, " -f, --force overwrite files without asking\n"); + fprintf(fp, " -g, --rebgzip use an index file to bgzip a file\n"); + fprintf(fp, " -h, --help give this help\n"); + fprintf(fp, " -i, --index compress and create BGZF index\n"); + fprintf(fp, " -I, --index-name FILE name of BGZF index file [file.gz.gzi]\n"); + fprintf(fp, " -k, --keep don't delete input files during operation\n"); + fprintf(fp, " -l, --compress-level INT Compression level to use when compressing; 0 to 9, or -1 for default [-1]\n"); + fprintf(fp, " -o, --output FILE write to file, keep original files unchanged\n"); + fprintf(fp, " -r, --reindex (re)index compressed file\n"); + fprintf(fp, " -s, --size INT decompress INT bytes (uncompressed size)\n"); + fprintf(fp, " -t, --test test integrity of compressed file\n"); + fprintf(fp, " --binary Don't align blocks with text lines\n"); + fprintf(fp, " -@, --threads INT number of compression threads to use [1]\n"); + return status; +} + +int main(int argc, char **argv) +{ + int c, compress, compress_level = -1, pstdout, is_forced, test, index = 0, rebgzip = 0, reindex = 0, keep, binary; + BGZF *fp; + char *buffer; + long start, end, size; + struct stat filestat; + char *statfilename = NULL; + char *index_fname = NULL, *write_fname = NULL; + int threads = 1, isstdin = 0, usedstdout = 0, ret = 0, exp_out_open = 0, f_dst = -1; + + static const struct option loptions[] = + { + {"help", no_argument, NULL, 'h'}, + {"offset", required_argument, NULL, 'b'}, + {"stdout", no_argument, NULL, 'c'}, + {"decompress", no_argument, NULL, 'd'}, + {"force", no_argument, NULL, 'f'}, + {"index", no_argument, NULL, 'i'}, + {"index-name", required_argument, NULL, 'I'}, + {"compress-level", required_argument, NULL, 'l'}, + {"reindex", no_argument, NULL, 'r'}, + {"rebgzip",no_argument,NULL,'g'}, + {"size", required_argument, NULL, 's'}, + {"threads", required_argument, NULL, '@'}, + {"test", no_argument, NULL, 't'}, + {"version", no_argument, NULL, 1}, + {"keep", no_argument, NULL, 'k'}, + {"binary", no_argument, NULL, 2}, + {"output", required_argument, NULL, 'o'}, + {NULL, 0, NULL, 0} + }; + + compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0; test = 0; keep = 0; binary = 0; + while((c = getopt_long(argc, argv, "cdh?fb:@:s:iI:l:grtko:",loptions,NULL)) >= 0){ + switch(c){ + case 'd': compress = 0; break; + case 'c': pstdout = 1; break; + case 'b': start = atol(optarg); compress = 0; pstdout = 1; break; + case 's': size = atol(optarg); pstdout = 1; break; + case 'f': is_forced++; break; + case 'i': index = 1; break; + case 'I': index_fname = optarg; break; + case 'l': compress_level = atol(optarg); break; + case 'g': rebgzip = 1; break; + case 'r': reindex = 1; compress = 0; break; + case '@': threads = atoi(optarg); break; + case 't': test = 1; compress = 0; reindex = 0; break; + case 'k': keep = 1; break; + case 'o': write_fname = optarg; break; + case 1: + printf( +"bgzip (htslib) %s\n" +"Copyright (C) 2024 Genome Research Ltd.\n", hts_version()); + return EXIT_SUCCESS; + case 2: binary = 1; break; + case 'h': return bgzip_main_usage(stdout, EXIT_SUCCESS); + case '?': return bgzip_main_usage(stderr, EXIT_FAILURE); + } + } + if (size >= 0) end = start + size; + if (end >= 0 && end < start) { + fprintf(stderr, "[bgzip] Illegal region: [%ld, %ld]\n", start, end); + return 1; + } + + if ( (index || reindex) && rebgzip ) + { + fprintf(stderr, "[bgzip] Can't produce a index and rebgzip simultaneously\n"); + return 1; + } + if ( rebgzip && !index_fname ) + { + fprintf(stderr, "[bgzip] Index file name expected with rebgzip. See -I option.\n"); + return 1; + } + /* avoid -I / indexfile with multiple inputs while index/reindex. these wont be set during + read/decompress and are not considered even if set */ + if ( (index || reindex) && !write_fname && index_fname && argc - optind > 1) { + fprintf(stderr, "[bgzip] Cannot specify index filename with multiple data file on index, reindex.\n"); + return 1; + } + + if (write_fname) { + if (pstdout) { + fprintf(stderr, "[bgzip] Cannot write to %s and stdout at the same time.\n", write_fname); + return 1; + } else if (strncmp(write_fname, "-", strlen(write_fname)) == 0) { + // stdout has special handling so treat as -c + pstdout = 1; + write_fname = NULL; + } + } + + do { + isstdin = optind >= argc ? 1 : !strcmp("-", argv[optind]); //using stdin or not? + /* when a named output file is not used, stdout is in use when explicitly + selected or when stdin in is in use, it needs to be closed + explicitly to get all io errors*/ + + if (!write_fname) + usedstdout |= isstdin || pstdout || test; + + statfilename = NULL; + + if (compress == 1) { + hFILE* f_src = NULL; + char out_mode[3] = "w\0"; + char out_mode_exclusive[4] = "wx\0"; + + if (compress_level < -1 || compress_level > 9) { + fprintf(stderr, "[bgzip] Invalid compress-level: %d\n", compress_level); + return 1; + } + if (compress_level >= 0) { + out_mode[1] = compress_level + '0'; + out_mode_exclusive[2] = compress_level + '0'; + } + if (!(f_src = hopen(!isstdin ? argv[optind] : "-", "r"))) { + fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), isstdin ? "stdin" : argv[optind]); + return 1; + } + + if (write_fname) { + if (!exp_out_open) { // only open this file once for writing, close at the end + if ((fp = bgzf_open(write_fname, out_mode)) == NULL) { + fprintf(stderr, "[bgzip] can't create %s: %s\n", write_fname, strerror(errno)); + return 1; + } else { + exp_out_open = 1; + } + } + } else if ( argc>optind && !isstdin ) //named input file that isn't an explicit "-" + { + if (pstdout) + fp = bgzf_open("-", out_mode); + else + { + char *name = malloc(strlen(argv[optind]) + 5); + strcpy(name, argv[optind]); + strcat(name, ".gz"); + fp = bgzf_open(name, is_forced? out_mode : out_mode_exclusive); + if (fp == NULL && errno == EEXIST) { + if (confirm_overwrite(name)) { + fp = bgzf_open(name, out_mode); + } + else { + ret = 2; //explicit N - no overwrite, continue and return 2 + hclose_abruptly(f_src); + free(name); + continue; + } + } + if (fp == NULL) { + fprintf(stderr, "[bgzip] can't create %s: %s\n", name, strerror(errno)); + free(name); + return 1; + } + statfilename = name; + } + } + else if (!pstdout && isatty(fileno((FILE *)stdout)) ) + return bgzip_main_usage(stderr, EXIT_FAILURE); + else if ( index && !index_fname ) + { + fprintf(stderr, "[bgzip] Index file name expected when writing to stdout\n"); + return 1; + } + else + fp = bgzf_open("-", out_mode); + + if ( index ) bgzf_index_build_init(fp); + if (threads > 1) + bgzf_mt(fp, threads, 256); + + buffer = malloc(WINDOW_SIZE); + if (!buffer) { + if (statfilename) { + free(statfilename); + } + return 1; + } + if (rebgzip){ + if ( bgzf_index_load(fp, index_fname, NULL) < 0 ) error("Could not load index: %s.%s\n", !isstdin ? argv[optind] : index_fname, !isstdin ? "gzi" : ""); + + while ((c = hread(f_src, buffer, WINDOW_SIZE)) > 0) + if (bgzf_block_write(fp, buffer, c) < 0) error("Could not write %d bytes: Error %d\n", c, fp->errcode); + } + else { + htsFormat fmt; + int textual = 0; + if (!binary + && hts_detect_format(f_src, &fmt) == 0 + && fmt.compression == no_compression) { + switch(fmt.format) { + case text_format: + case sam: + case vcf: + case bed: + case fasta_format: + case fastq_format: + case fai_format: + case fqi_format: + textual = 1; + break; + default: break; // silence clang warnings + } + } + + if (binary || !textual) { + // Binary data, either detected or explicit + while ((c = hread(f_src, buffer, WINDOW_SIZE)) > 0) + if (bgzf_write(fp, buffer, c) < 0) + error("Could not write %d bytes: Error %d\n", + c, fp->errcode); + } else { + /* Text mode, try a flush after a newline */ + int in_header = 1, n = 0, long_line = 0; + while ((c = hread(f_src, buffer+n, WINDOW_SIZE-n)) > 0) { + int c2 = c+n; + int flush = 0; + if (in_header && + (long_line || buffer[0] == '@' || buffer[0] == '#')) { + // Scan forward to find the last header line. + int last_start = 0; + n = 0; + while (n < c2) { + if (buffer[n++] != '\n') + continue; + + last_start = n; + if (n < c2 && + !(buffer[n] == '@' || buffer[n] == '#')) { + in_header = 0; + break; + } + } + if (!last_start) { + n = c2; + long_line = 1; + } else { + n = last_start; + flush = 1; + long_line = 0; + } + } else { + // Scan backwards to find the last newline. + n += c; // c read plus previous n overflow + while (--n >= 0 && ((char *)buffer)[n] != '\n') + ; + + if (n >= 0) { + flush = 1; + n++; + } else { + n = c2; + } + } + + // Pos n is either at the end of the buffer with flush==0, + // or the first byte after a newline and a flush point. + if (bgzf_write(fp, buffer, n) < 0) + error("Could not write %d bytes: Error %d\n", + n, fp->errcode); + if (flush) + if (bgzf_flush_try(fp, 65536) < 0) {// force + if (statfilename) { + free(statfilename); + } + return -1; + } + + memmove(buffer, buffer+n, c2-n); + n = c2-n; + } + + // Trailing data. + if (bgzf_write(fp, buffer, n) < 0) + error("Could not write %d bytes: Error %d\n", + n, fp->errcode); + } + } + if ( index && !write_fname ) + { + if (index_fname) { + if (bgzf_index_dump(fp, index_fname, NULL) < 0) + error("Could not write index to '%s'\n", index_fname); + } else if (!isstdin) { + if (bgzf_index_dump(fp, argv[optind], ".gz.gzi") < 0) + error("Could not write index to '%s.gz.gzi'\n", argv[optind]); + } + else { + //stdin, cant create index file as name is not present "-.gz.gzi" not a valid one! + error("Can not write index for stdin data without index filename, use -I option to set index file.\n"); + } + } + + if (!write_fname) { + if (bgzf_close(fp) < 0) + error("Output close failed: Error %d\n", fp->errcode); + } + + if (hclose(f_src) < 0) + error("Input close failed\n"); + + if (statfilename) { + //get input file timestamp + if (!getfilespec(argv[optind], &filestat)) { + //set output file timestamp + if (setfilespec(statfilename, &filestat) < 0) { + fprintf(stderr, "[bgzip] Failed to set file specification.\n"); + } + } + else { + fprintf(stderr, "[bgzip] Failed to get file specification.\n"); + } + free(statfilename); + } + + if (argc > optind && !pstdout && !keep && !isstdin && !write_fname) unlink(argv[optind]); + + free(buffer); + } + else if ( reindex ) + { + if ( argc>optind && !isstdin ) + { + fp = bgzf_open(argv[optind], "r"); + if ( !fp ) error("[bgzip] Could not open file: %s\n", argv[optind]); + } + else + { + if ( !index_fname ) error("[bgzip] Index file name expected when reading from stdin\n"); + fp = bgzf_open("-", "r"); + if ( !fp ) error("[bgzip] Could not read from stdin: %s\n", strerror(errno)); + } + + buffer = malloc(BGZF_BLOCK_SIZE); + bgzf_index_build_init(fp); + int ret; + while ( (ret=bgzf_read(fp, buffer, BGZF_BLOCK_SIZE))>0 ) ; + free(buffer); + if ( ret<0 ) error("Is the file gzipped or bgzipped? The latter is required for indexing.\n"); + + if ( index_fname ) { + if (bgzf_index_dump(fp, index_fname, NULL) < 0) + error("Could not write index to '%s'\n", index_fname); + } else if (!isstdin) { + if (bgzf_index_dump(fp, argv[optind], ".gzi") < 0) + error("Could not write index to '%s.gzi'\n", argv[optind]); + } + else { + //stdin, cant create index file as name is not present "-.gzi" not a valid one! + error("Can not write index for stdin data without index filename, use -I option to set index file.\n"); + } + + if ( bgzf_close(fp)<0 ) error("Close failed: Error %d\n",fp->errcode); + } + else + { + int is_forced_tmp = is_forced; + + if ( argc>optind && !isstdin ) + { + fp = bgzf_open(argv[optind], "r"); + if (fp == NULL) { + fprintf(stderr, "[bgzip] Could not open %s: %s\n", argv[optind], strerror(errno)); + return 1; + } + if (bgzf_compression(fp) == no_compression) { + fprintf(stderr, "[bgzip] %s: not a compressed file -- ignored\n", argv[optind]); + bgzf_close(fp); + return 1; + } + + if (pstdout || test) { + f_dst = fileno(stdout); + } else { + const int wrflags = O_WRONLY | O_CREAT | O_TRUNC; + char *name; + int check; + + if (!(name = strdup(argv[optind]))) { + fprintf(stderr, "[bgzip] unable to allocate memory for output file name.\n"); + bgzf_close(fp); + return 1; + } + + if ((check = check_name_and_extension(name, &is_forced_tmp))) { + bgzf_close(fp); + + if (check == 1) { + return 1; + } else { + ret = 2; + continue; + } + } + + if (!exp_out_open) { + if (write_fname) { // only open file once and don't care about overwriting + is_forced_tmp = 1; + exp_out_open = 1; + } + + f_dst = open(write_fname ? write_fname : name, is_forced_tmp? wrflags : wrflags|O_EXCL, 0666); + + if (f_dst < 0 && errno == EEXIST) { + if (confirm_overwrite(name)) { + f_dst = open(name, wrflags, 0666); + } + else { + ret = 2; //explicit N - no overwrite, continue and return 2 + bgzf_close(fp); + free(name); + continue; + } + } + if (f_dst < 0) { + fprintf(stderr, "[bgzip] can't create %s: %s\n", name, strerror(errno)); + free(name); + return 1; + } + } + + statfilename = name; + } + } + else if (!pstdout && isatty(fileno((FILE *)stdin)) ) + return bgzip_main_usage(stderr, EXIT_FAILURE); + else + { + f_dst = fileno(stdout); + fp = bgzf_open("-", "r"); + if (fp == NULL) { + fprintf(stderr, "[bgzip] Could not read from stdin: %s\n", strerror(errno)); + return 1; + } + if (bgzf_compression(fp) == no_compression) { + fprintf(stderr, "[bgzip] stdin is not compressed -- ignored\n"); + bgzf_close(fp); + return 1; + } + + if (!write_fname) { + f_dst = fileno(stdout); + } else { + if (!exp_out_open) { + exp_out_open = 1; + + f_dst = open(write_fname, O_WRONLY | O_CREAT | O_TRUNC, 0666); + + if (f_dst < 0) { + fprintf(stderr, "[bgzip] can't create %s: %s\n", write_fname, strerror(errno)); + return 1; + } + } + } + } + + buffer = malloc(WINDOW_SIZE); + if ( start>0 ) + { + if (index_fname) { + if ( bgzf_index_load(fp, index_fname, NULL) < 0 ) + error("Could not load index: %s\n", index_fname); + } else { + if (optind >= argc || isstdin) { + error("The -b option requires -I when reading from stdin " + "(and stdin must be seekable)\n"); + } + if ( bgzf_index_load(fp, argv[optind], ".gzi") < 0 ) + error("Could not load index: %s.gzi\n", argv[optind]); + } + if ( bgzf_useek(fp, start, SEEK_SET) < 0 ) error("Could not seek to %ld-th (uncompressd) byte\n", start); + } + + if (threads > 1) + bgzf_mt(fp, threads, 256); + + #ifdef _WIN32 + _setmode(f_dst, O_BINARY); + #endif + long start_reg = start, end_reg = end; + while (1) { + if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE); + else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start)); + if (c == 0) break; + if (c < 0) error("Error %d in block starting at offset %" PRId64 "(%" PRIX64 ")\n", fp->errcode, fp->block_address, fp->block_address); + start += c; + if ( !test && write(f_dst, buffer, c) != c ) { + #ifdef _WIN32 + if (GetLastError() != ERROR_NO_DATA) + #endif + error("Could not write %d bytes\n", c); + } + if (end >= 0 && start >= end) break; + } + start = start_reg; + end = end_reg; + free(buffer); + if (bgzf_close(fp) < 0) error("Close failed: Error %d\n",fp->errcode); + + if (statfilename) { + if (!write_fname) { + //get input file timestamp + if (!getfilespec(argv[optind], &filestat)) { + //set output file timestamp + if (setfilespec(statfilename, &filestat) < 0) { + fprintf(stderr, "[bgzip] Failed to set file specification.\n"); + } + } + else { + fprintf(stderr, "[bgzip] Failed to get file specification.\n"); + } + } + + free(statfilename); + } + + if (argc > optind && !pstdout && !test && !keep && !isstdin && !write_fname) unlink(argv[optind]); + if (!isstdin && !pstdout && !test && !write_fname) { + close(f_dst); //close output file when it is not stdout + } + } + } while (++optind < argc); + + if (usedstdout && !reindex) { + //stdout in use, have to close explicitly to get any pending write errors + if (fclose(stdout) != 0 && errno != EBADF) { + fprintf(stderr, "[bgzip] Failed to close stdout, errno %d", errno); + ret = 1; + } + } else if (write_fname) { + if (compress == 1) { // close explicit output file (this is for compression) + if (index) { + if (index_fname) { + if (bgzf_index_dump(fp, index_fname, NULL) < 0) + error("Could not write index to '%s'\n", index_fname); + } else { + if (bgzf_index_dump(fp, write_fname, ".gzi") < 0) + error("Could not write index to '%s.gzi'\n", write_fname); + } + } + + if (bgzf_close(fp) < 0) + error("Output close failed: Error %d\n", fp->errcode); + } else { + close(f_dst); + } + } + + + return ret; +} diff --git a/src/htslib-1.18/builddir_vars.mk.in b/src/htslib-1.21/builddir_vars.mk.in similarity index 100% rename from src/htslib-1.18/builddir_vars.mk.in rename to src/htslib-1.21/builddir_vars.mk.in diff --git a/src/htslib-1.19.1/config.guess b/src/htslib-1.21/config.guess similarity index 100% rename from src/htslib-1.19.1/config.guess rename to src/htslib-1.21/config.guess diff --git a/src/htslib-1.21/config.h.in b/src/htslib-1.21/config.h.in new file mode 100644 index 0000000..f9d38a4 --- /dev/null +++ b/src/htslib-1.21/config.h.in @@ -0,0 +1,180 @@ +/* config.h.in. Generated from configure.ac by autoheader. */ + +/* If you use configure, this file provides #defines reflecting your + configuration choices. If you have not run configure, suitable + conservative defaults will be used. + + Autoheader adds a number of items to this template file that are not + used by HTSlib: STDC_HEADERS and most HAVE_*_H header file defines + are immaterial, as we assume standard ISO C headers and facilities; + the PACKAGE_* defines are unused and are overridden by the more + accurate PACKAGE_VERSION as computed by the Makefile. */ + +/* Define if HTSlib should enable GCS support. */ +#undef ENABLE_GCS + +/* Define if HTSlib should enable plugins. */ +#undef ENABLE_PLUGINS + +/* Define if HTSlib should enable S3 support. */ +#undef ENABLE_S3 + +/* Define if __attribute__((constructor)) is available. */ +#undef HAVE_ATTRIBUTE_CONSTRUCTOR + +/* Define if __attribute__((target(...))) is available. */ +#undef HAVE_ATTRIBUTE_TARGET + +/* Defined to 1 if rANS source using AVX2 can be compiled. */ +#undef HAVE_AVX2 + +/* Defined to 1 if rANS source using AVX512F can be compiled. */ +#undef HAVE_AVX512 + +/* Defined to 1 if __builtin_cpu_supports("ssse3") works */ +#undef HAVE_BUILTIN_CPU_SUPPORT_SSSE3 + +/* Define if clock_gettime exists and accepts CLOCK_PROCESS_CPUTIME_ID. */ +#undef HAVE_CLOCK_GETTIME_CPUTIME + +/* Define if you have the Common Crypto library. */ +#undef HAVE_COMMONCRYPTO + +/* Define to 1 if you have the declaration of '__cpuid_count', and to 0 if you + don't. */ +#undef HAVE_DECL___CPUID_COUNT + +/* Define to 1 if you have the declaration of '__get_cpuid_max', and to 0 if + you don't. */ +#undef HAVE_DECL___GET_CPUID_MAX + +/* Define to 1 if you have the 'drand48' function. */ +#undef HAVE_DRAND48 + +/* Define if using an external libhtscodecs */ +#undef HAVE_EXTERNAL_LIBHTSCODECS + +/* Define to 1 if you have the 'fdatasync' function. */ +#undef HAVE_FDATASYNC + +/* Define to 1 if you have the 'fsync' function. */ +#undef HAVE_FSYNC + +/* Define to 1 if you have the 'getpagesize' function. */ +#undef HAVE_GETPAGESIZE + +/* Define to 1 if you have the 'gmtime_r' function. */ +#undef HAVE_GMTIME_R + +/* Define if you have libcrypto-style HMAC(). */ +#undef HAVE_HMAC + +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the 'bz2' library (-lbz2). */ +#undef HAVE_LIBBZ2 + +/* Define if libcurl file access is enabled. */ +#undef HAVE_LIBCURL + +/* Define if libdeflate is available. */ +#undef HAVE_LIBDEFLATE + +/* Define to 1 if you have the 'lzma' library (-llzma). */ +#undef HAVE_LIBLZMA + +/* Define to 1 if you have the 'z' library (-lz). */ +#undef HAVE_LIBZ + +/* Define to 1 if you have the header file. */ +#undef HAVE_LZMA_H + +/* Define to 1 if you have a working 'mmap' system call. */ +#undef HAVE_MMAP + +/* Defined to 1 if rANS source using popcnt can be compiled. */ +#undef HAVE_POPCNT + +/* Define to 1 if you have the 'srand48_deterministic' function. */ +#undef HAVE_SRAND48_DETERMINISTIC + +/* Defined to 1 if rANS source using SSE4.1 can be compiled. */ +#undef HAVE_SSE4_1 + +/* Defined to 1 if rANS source using SSSE3 can be compiled. */ +#undef HAVE_SSSE3 + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDIO_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_PARAM_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* Platform-dependent plugin filename extension. */ +#undef PLUGIN_EXT + +/* Define to 1 if all of the C89 standard headers exist (not just the ones + required in a freestanding environment). This macro is provided for + backward compatibility; new code need not use it. */ +#undef STDC_HEADERS + + +/* Prevent unaligned access in htscodecs SSE4 rANS codec */ +#if defined(HTS_ALLOW_UNALIGNED) && HTS_ALLOW_UNALIGNED == 0 +#undef UBSAN +#endif + +/* Number of bits in a file offset, on hosts where this is settable. */ +#undef _FILE_OFFSET_BITS + +/* Define to 1 on platforms where this makes off_t a 64-bit type. */ +#undef _LARGE_FILES + +/* Number of bits in time_t, on hosts where this is settable. */ +#undef _TIME_BITS + +/* Specify X/Open requirements */ +#undef _XOPEN_SOURCE + +/* Define to 1 on platforms where this makes time_t a 64-bit type. */ +#undef __MINGW_USE_VC2005_COMPAT diff --git a/src/htslib-1.19.1/config.mk.in b/src/htslib-1.21/config.mk.in similarity index 100% rename from src/htslib-1.19.1/config.mk.in rename to src/htslib-1.21/config.mk.in diff --git a/src/htslib-1.19.1/config.sub b/src/htslib-1.21/config.sub similarity index 100% rename from src/htslib-1.19.1/config.sub rename to src/htslib-1.21/config.sub diff --git a/src/htslib-1.21/configure b/src/htslib-1.21/configure new file mode 100755 index 0000000..fc73d48 --- /dev/null +++ b/src/htslib-1.21/configure @@ -0,0 +1,8312 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.72 for HTSlib 1.21. +# +# Report bugs to . +# +# +# Copyright (C) 1992-1996, 1998-2017, 2020-2023 Free Software Foundation, +# Inc. +# +# +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +# +# Portions copyright (C) 2020-2024 Genome Research Ltd. +# +# This configure script is free software: you are free to change and +# redistribute it. There is NO WARRANTY, to the extent permitted by law. +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 +then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else case e in #( + e) case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac ;; +esac +fi + + + +# Reset variables that may have inherited troublesome values from +# the environment. + +# IFS needs to be set, to space, tab, and newline, in precisely that order. +# (If _AS_PATH_WALK were called with IFS unset, it would have the +# side effect of setting IFS to empty, thus disabling word splitting.) +# Quoting is to prevent editors from complaining about space-tab. +as_nl=' +' +export as_nl +IFS=" "" $as_nl" + +PS1='$ ' +PS2='> ' +PS4='+ ' + +# Ensure predictable behavior from utilities with locale-dependent output. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# We cannot yet rely on "unset" to work, but we need these variables +# to be unset--not just set to an empty or harmless value--now, to +# avoid bugs in old shells (e.g. pre-3.0 UWIN ksh). This construct +# also avoids known problems related to "unset" and subshell syntax +# in other old shells (e.g. bash 2.01 and pdksh 5.2.14). +for as_var in BASH_ENV ENV MAIL MAILPATH CDPATH +do eval test \${$as_var+y} \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done + +# Ensure that fds 0, 1, and 2 are open. +if (exec 3>&0) 2>/dev/null; then :; else exec 0&1) 2>/dev/null; then :; else exec 1>/dev/null; fi +if (exec 3>&2) ; then :; else exec 2>/dev/null; fi + +# The user is always right. +if ${PATH_SEPARATOR+false} :; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + test -r "$as_dir$0" && as_myself=$as_dir$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as 'sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + printf "%s\n" "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + + +# Use a proper internal environment variable to ensure we don't fall + # into an infinite loop, continuously re-executing ourselves. + if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then + _as_can_reexec=no; export _as_can_reexec; + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed 'exec'. +printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2 +exit 255 + fi + # We don't want this to propagate to other subprocesses. + { _as_can_reexec=; unset _as_can_reexec;} +if test "x$CONFIG_SHELL" = x; then + as_bourne_compatible="if test \${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 +then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which + # is contrary to our usage. Disable this feature. + alias -g '\${1+\"\$@\"}'='\"\$@\"' + setopt NO_GLOB_SUBST +else case e in #( + e) case \`(set -o) 2>/dev/null\` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac ;; +esac +fi +" + as_required="as_fn_return () { (exit \$1); } +as_fn_success () { as_fn_return 0; } +as_fn_failure () { as_fn_return 1; } +as_fn_ret_success () { return 0; } +as_fn_ret_failure () { return 1; } + +exitcode=0 +as_fn_success || { exitcode=1; echo as_fn_success failed.; } +as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } +as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } +as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } +if ( set x; as_fn_ret_success y && test x = \"\$1\" ) +then : + +else case e in #( + e) exitcode=1; echo positional parameters were not saved. ;; +esac +fi +test x\$exitcode = x0 || exit 1 +blah=\$(echo \$(echo blah)) +test x\"\$blah\" = xblah || exit 1 +test -x / || exit 1" + as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO + as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO + eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && + test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 +test \$(( 1 + 1 )) = 2 || exit 1" + if (eval "$as_required") 2>/dev/null +then : + as_have_required=yes +else case e in #( + e) as_have_required=no ;; +esac +fi + if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null +then : + +else case e in #( + e) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_found=false +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + as_found=: + case $as_dir in #( + /*) + for as_base in sh bash ksh sh5; do + # Try only shells that exist, to save several forks. + as_shell=$as_dir$as_base + if { test -f "$as_shell" || test -f "$as_shell.exe"; } && + as_run=a "$as_shell" -c "$as_bourne_compatible""$as_required" 2>/dev/null +then : + CONFIG_SHELL=$as_shell as_have_required=yes + if as_run=a "$as_shell" -c "$as_bourne_compatible""$as_suggested" 2>/dev/null +then : + break 2 +fi +fi + done;; + esac + as_found=false +done +IFS=$as_save_IFS +if $as_found +then : + +else case e in #( + e) if { test -f "$SHELL" || test -f "$SHELL.exe"; } && + as_run=a "$SHELL" -c "$as_bourne_compatible""$as_required" 2>/dev/null +then : + CONFIG_SHELL=$SHELL as_have_required=yes +fi ;; +esac +fi + + + if test "x$CONFIG_SHELL" != x +then : + export CONFIG_SHELL + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed 'exec'. +printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2 +exit 255 +fi + + if test x$as_have_required = xno +then : + printf "%s\n" "$0: This script requires a shell more modern than all" + printf "%s\n" "$0: the shells that I found on your system." + if test ${ZSH_VERSION+y} ; then + printf "%s\n" "$0: In particular, zsh $ZSH_VERSION has bugs and should" + printf "%s\n" "$0: be upgraded to zsh 4.3.4 or later." + else + printf "%s\n" "$0: Please tell bug-autoconf@gnu.org and +$0: samtools-help@lists.sourceforge.net about your system, +$0: including any error possibly output before this +$0: message. Then install a modern shell, or manually run +$0: the script under such a shell if you do have one." + fi + exit 1 +fi ;; +esac +fi +fi +SHELL=${CONFIG_SHELL-/bin/sh} +export SHELL +# Unset more variables known to interfere with behavior of common tools. +CLICOLOR_FORCE= GREP_OPTIONS= +unset CLICOLOR_FORCE GREP_OPTIONS + +## --------------------- ## +## M4sh Shell Functions. ## +## --------------------- ## +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset + + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`printf "%s\n" "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null +then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else case e in #( + e) as_fn_append () + { + eval $1=\$$1\$2 + } ;; +esac +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null +then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else case e in #( + e) as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } ;; +esac +fi # as_fn_arith + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + printf "%s\n" "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + + + as_lineno_1=$LINENO as_lineno_1a=$LINENO + as_lineno_2=$LINENO as_lineno_2a=$LINENO + eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && + test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { + # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + t clear + :clear + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { printf "%s\n" "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } + + # If we had to re-execute with $CONFIG_SHELL, we're ensured to have + # already done that, so ensure we don't try to do so again and fall + # in an infinite loop. This has already happened in practice. + _as_can_reexec=no; export _as_can_reexec + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + + +# Determine whether it's possible to make 'echo' print without a newline. +# These variables are no longer used directly by Autoconf, but are AC_SUBSTed +# for compatibility with existing Makefiles. +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +# For backward compatibility with old third-party macros, we provide +# the shell variables $as_echo and $as_echo_n. New code should use +# AS_ECHO(["message"]) and AS_ECHO_N(["message"]), respectively. +as_echo='printf %s\n' +as_echo_n='printf %s' + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both 'ln -s file dir' and 'ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; 'ln -s' creates a wrapper executable. + # In both cases, we have to default to 'cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_sed_cpp="y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g" +as_tr_cpp="eval sed '$as_sed_cpp'" # deprecated + +# Sed expression to map a string onto a valid variable name. +as_sed_sh="y%*+%pp%;s%[^_$as_cr_alnum]%_%g" +as_tr_sh="eval sed '$as_sed_sh'" # deprecated + + +test -n "$DJDIR" || exec 7<&0 &1 + +# Name of the host. +# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_clean_files= +ac_config_libobj_dir=. +LIBOBJS= +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= + +# Identity of this package. +PACKAGE_NAME='HTSlib' +PACKAGE_TARNAME='htslib' +PACKAGE_VERSION='1.21' +PACKAGE_STRING='HTSlib 1.21' +PACKAGE_BUGREPORT='samtools-help@lists.sourceforge.net' +PACKAGE_URL='http://www.htslib.org/' + +ac_unique_file="hts.c" +# Factoring default headers for most tests. +ac_includes_default="\ +#include +#ifdef HAVE_STDIO_H +# include +#endif +#ifdef HAVE_STDLIB_H +# include +#endif +#ifdef HAVE_STRING_H +# include +#endif +#ifdef HAVE_INTTYPES_H +# include +#endif +#ifdef HAVE_STDINT_H +# include +#endif +#ifdef HAVE_STRINGS_H +# include +#endif +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#ifdef HAVE_SYS_STAT_H +# include +#endif +#ifdef HAVE_UNISTD_H +# include +#endif" + +ac_header_c_list= +ac_func_c_list= +enable_year2038=no +ac_subst_vars='LTLIBOBJS +LIBOBJS +HTSDIRslash_if_relsrcdir +static_LIBS +static_LDFLAGS +private_LIBS +pc_requires +CRYPTO_LIBS +s3 +gcs +libcurl +PLUGIN_EXT +host_os +host_vendor +host_cpu +host +build_os +build_vendor +build_cpu +build +VERSION_SCRIPT_LDFLAGS +PLATFORM +pluginpath +plugindir +with_external_htscodecs +enable_plugins +PKG_CONFIG_LIBDIR +PKG_CONFIG_PATH +PKG_CONFIG +hts_cflags_avx512 +hts_cflags_avx2 +hts_cflags_sse4 +GREP +RANLIB +OBJEXT +EXEEXT +ac_ct_CC +CPPFLAGS +LDFLAGS +CFLAGS +CC +target_alias +host_alias +build_alias +LIBS +ECHO_T +ECHO_N +ECHO_C +DEFS +mandir +localedir +libdir +psdir +pdfdir +dvidir +htmldir +infodir +docdir +oldincludedir +includedir +runstatedir +localstatedir +sharedstatedir +sysconfdir +datadir +datarootdir +libexecdir +sbindir +bindir +program_transform_name +prefix +exec_prefix +PACKAGE_URL +PACKAGE_BUGREPORT +PACKAGE_STRING +PACKAGE_VERSION +PACKAGE_TARNAME +PACKAGE_NAME +PATH_SEPARATOR +SHELL' +ac_subst_files='' +ac_user_opts=' +enable_option_checking +enable_warnings +enable_werror +enable_versioned_symbols +enable_bz2 +enable_gcs +enable_largefile +enable_libcurl +enable_lzma +enable_plugins +with_external_htscodecs +with_libdeflate +with_plugin_dir +with_plugin_path +enable_s3 +enable_year2038 +' + ac_precious_vars='build_alias +host_alias +target_alias +CC +CFLAGS +LDFLAGS +LIBS +CPPFLAGS +PKG_CONFIG +PKG_CONFIG_PATH +PKG_CONFIG_LIBDIR' + + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +ac_unrecognized_opts= +ac_unrecognized_sep= +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +# (The list follows the same order as the GNU Coding Standards.) +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datarootdir='${prefix}/share' +datadir='${datarootdir}' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +runstatedir='${localstatedir}/run' +includedir='${prefix}/include' +oldincludedir='/usr/include' +docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' +infodir='${datarootdir}/info' +htmldir='${docdir}' +dvidir='${docdir}' +pdfdir='${docdir}' +psdir='${docdir}' +libdir='${exec_prefix}/lib' +localedir='${datarootdir}/locale' +mandir='${datarootdir}/man' + +ac_prev= +ac_dashdash= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval $ac_prev=\$ac_option + ac_prev= + continue + fi + + case $ac_option in + *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; + *=) ac_optarg= ;; + *) ac_optarg=yes ;; + esac + + case $ac_dashdash$ac_option in + --) + ac_dashdash=yes ;; + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=*) + datadir=$ac_optarg ;; + + -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ + | --dataroo | --dataro | --datar) + ac_prev=datarootdir ;; + -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ + | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) + datarootdir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: '$ac_useropt'" + ac_useropt_orig=$ac_useropt + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=no ;; + + -docdir | --docdir | --docdi | --doc | --do) + ac_prev=docdir ;; + -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) + docdir=$ac_optarg ;; + + -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) + ac_prev=dvidir ;; + -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) + dvidir=$ac_optarg ;; + + -enable-* | --enable-*) + ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: '$ac_useropt'" + ac_useropt_orig=$ac_useropt + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=\$ac_optarg ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) + ac_prev=htmldir ;; + -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ + | --ht=*) + htmldir=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localedir | --localedir | --localedi | --localed | --locale) + ac_prev=localedir ;; + -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) + localedir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst | --locals) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) + ac_prev=pdfdir ;; + -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) + pdfdir=$ac_optarg ;; + + -psdir | --psdir | --psdi | --psd | --ps) + ac_prev=psdir ;; + -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) + psdir=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -runstatedir | --runstatedir | --runstatedi | --runstated \ + | --runstate | --runstat | --runsta | --runst | --runs \ + | --run | --ru | --r) + ac_prev=runstatedir ;; + -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \ + | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \ + | --run=* | --ru=* | --r=*) + runstatedir=$ac_optarg ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: '$ac_useropt'" + ac_useropt_orig=$ac_useropt + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=\$ac_optarg ;; + + -without-* | --without-*) + ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: '$ac_useropt'" + ac_useropt_orig=$ac_useropt + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=no ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) as_fn_error $? "unrecognized option: '$ac_option' +Try '$0 --help' for more information" + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + case $ac_envvar in #( + '' | [0-9]* | *[!_$as_cr_alnum]* ) + as_fn_error $? "invalid variable name: '$ac_envvar'" ;; + esac + eval $ac_envvar=\$ac_optarg + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + printf "%s\n" "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + printf "%s\n" "$as_me: WARNING: invalid host type: $ac_option" >&2 + : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + as_fn_error $? "missing argument to $ac_option" +fi + +if test -n "$ac_unrecognized_opts"; then + case $enable_option_checking in + no) ;; + fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; + *) printf "%s\n" "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; + esac +fi + +# Check all directory arguments for consistency. +for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ + datadir sysconfdir sharedstatedir localstatedir includedir \ + oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ + libdir localedir mandir runstatedir +do + eval ac_val=\$$ac_var + # Remove trailing slashes. + case $ac_val in + */ ) + ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` + eval $ac_var=\$ac_val;; + esac + # Be sure to have absolute directory names. + case $ac_val in + [\\/$]* | ?:[\\/]* ) continue;; + NONE | '' ) case $ac_var in *prefix ) continue;; esac;; + esac + as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" +done + +# There might be people who depend on the old broken behavior: '$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +ac_pwd=`pwd` && test -n "$ac_pwd" && +ac_ls_di=`ls -di .` && +ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || + as_fn_error $? "working directory cannot be determined" +test "X$ac_ls_di" = "X$ac_pwd_ls_di" || + as_fn_error $? "pwd does not report name of working directory" + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then the parent directory. + ac_confdir=`$as_dirname -- "$as_myself" || +$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_myself" : 'X\(//\)[^/]' \| \ + X"$as_myself" : 'X\(//\)$' \| \ + X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X"$as_myself" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r "$srcdir/$ac_unique_file"; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r "$srcdir/$ac_unique_file"; then + test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." + as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" +fi +ac_msg="sources are in $srcdir, but 'cd $srcdir' does not work" +ac_abs_confdir=`( + cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" + pwd)` +# When building in place, set srcdir=. +if test "$ac_abs_confdir" = "$ac_pwd"; then + srcdir=. +fi +# Remove unnecessary trailing slashes from srcdir. +# Double slashes in file names in object file debugging info +# mess up M-x gdb in Emacs. +case $srcdir in +*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; +esac +for ac_var in $ac_precious_vars; do + eval ac_env_${ac_var}_set=\${${ac_var}+set} + eval ac_env_${ac_var}_value=\$${ac_var} + eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} + eval ac_cv_env_${ac_var}_value=\$${ac_var} +done + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +'configure' configures HTSlib 1.21 to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print 'checking ...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for '--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or '..'] + +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, 'make install' will install all the files in +'$ac_default_prefix/bin', '$ac_default_prefix/lib' etc. You can specify +an installation prefix other than '$ac_default_prefix' using '--prefix', +for instance '--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] + --datadir=DIR read-only architecture-independent data [DATAROOTDIR] + --infodir=DIR info documentation [DATAROOTDIR/info] + --localedir=DIR locale-dependent data [DATAROOTDIR/locale] + --mandir=DIR man documentation [DATAROOTDIR/man] + --docdir=DIR documentation root [DATAROOTDIR/doc/htslib] + --htmldir=DIR html documentation [DOCDIR] + --dvidir=DIR dvi documentation [DOCDIR] + --pdfdir=DIR pdf documentation [DOCDIR] + --psdir=DIR ps documentation [DOCDIR] +_ACEOF + + cat <<\_ACEOF + +System types: + --build=BUILD configure for building on BUILD [guessed] + --host=HOST cross-compile to build programs to run on HOST [BUILD] +_ACEOF +fi + +if test -n "$ac_init_help"; then + case $ac_init_help in + short | recursive ) echo "Configuration of HTSlib 1.21:";; + esac + cat <<\_ACEOF + +Optional Features: + --disable-option-checking ignore unrecognized --enable/--with options + --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) + --enable-FEATURE[=ARG] include FEATURE [ARG=yes] + --disable-warnings turn off compiler warnings + --enable-werror change warnings into errors, where supported + --disable-versioned-symbols + disable versioned symbols in shared library + --disable-bz2 omit support for BZ2-compressed CRAM files + --enable-gcs support Google Cloud Storage URLs + --disable-largefile omit support for large files + --enable-libcurl enable libcurl-based support for http/https/etc URLs + --disable-lzma omit support for LZMA-compressed CRAM files + --enable-plugins enable separately-compiled plugins for file access + --enable-s3 support Amazon AWS S3 URLs + --enable-year2038 support timestamps after 2038 + +Optional Packages: + --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --with-external-htscodecs + get htscodecs functions from a shared library + --with-libdeflate use libdeflate for faster crc and deflate algorithms + --with-plugin-dir=DIR plugin installation location [LIBEXECDIR/htslib] + --with-plugin-path=PATH default HTS_PATH plugin search path [PLUGINDIR] + +Some influential environment variables: + CC C compiler command + CFLAGS C compiler flags + LDFLAGS linker flags, e.g. -L if you have libraries in a + nonstandard directory + LIBS libraries to pass to the linker, e.g. -l + CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if + you have headers in a nonstandard directory + PKG_CONFIG path to pkg-config utility + PKG_CONFIG_PATH + directories to add to pkg-config's search path + PKG_CONFIG_LIBDIR + path overriding pkg-config's built-in search path + +Use these variables to override the choices made by 'configure' or to help +it to find libraries and programs with nonstandard names/locations. + +Report bugs to . +HTSlib home page: . +_ACEOF +ac_status=$? +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d "$ac_dir" || + { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || + continue + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`printf "%s\n" "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`printf "%s\n" "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + cd "$ac_dir" || { ac_status=$?; continue; } + # Check for configure.gnu first; this name is used for a wrapper for + # Metaconfig's "Configure" on case-insensitive file systems. + if test -f "$ac_srcdir/configure.gnu"; then + echo && + $SHELL "$ac_srcdir/configure.gnu" --help=recursive + elif test -f "$ac_srcdir/configure"; then + echo && + $SHELL "$ac_srcdir/configure" --help=recursive + else + printf "%s\n" "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi || ac_status=$? + cd "$ac_pwd" || { ac_status=$?; break; } + done +fi + +test -n "$ac_init_help" && exit $ac_status +if $ac_init_version; then + cat <<\_ACEOF +HTSlib configure 1.21 +generated by GNU Autoconf 2.72 + +Copyright (C) 2023 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. + +Portions copyright (C) 2020-2024 Genome Research Ltd. + +This configure script is free software: you are free to change and +redistribute it. There is NO WARRANTY, to the extent permitted by law. +_ACEOF + exit +fi + +## ------------------------ ## +## Autoconf initialization. ## +## ------------------------ ## + +# ac_fn_c_try_compile LINENO +# -------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest.beam + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext +then : + ac_retval=0 +else case e in #( + e) printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 ;; +esac +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_compile + +# ac_fn_check_decl LINENO SYMBOL VAR INCLUDES EXTRA-OPTIONS FLAG-VAR +# ------------------------------------------------------------------ +# Tests whether SYMBOL is declared in INCLUDES, setting cache variable VAR +# accordingly. Pass EXTRA-OPTIONS to the compiler, using FLAG-VAR. +ac_fn_check_decl () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + as_decl_name=`echo $2|sed 's/ *(.*//'` + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $as_decl_name is declared" >&5 +printf %s "checking whether $as_decl_name is declared... " >&6; } +if eval test \${$3+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) as_decl_use=`echo $2|sed -e 's/(/((/' -e 's/)/) 0&/' -e 's/,/) 0& (/g'` + eval ac_save_FLAGS=\$$6 + as_fn_append $6 " $5" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main (void) +{ +#ifndef $as_decl_name +#ifdef __cplusplus + (void) $as_decl_use; +#else + (void) $as_decl_name; +#endif +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + eval "$3=yes" +else case e in #( + e) eval "$3=no" ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + eval $6=\$ac_save_FLAGS + ;; +esac +fi +eval ac_res=\$$3 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_check_decl + +# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists and can be compiled using the include files in +# INCLUDES, setting the cache variable VAR accordingly. +ac_fn_c_check_header_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +printf %s "checking for $2... " >&6; } +if eval test \${$3+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + eval "$3=yes" +else case e in #( + e) eval "$3=no" ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi +eval ac_res=\$$3 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_compile + +# ac_fn_c_try_link LINENO +# ----------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_link () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest.beam conftest$ac_exeext + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + test -x conftest$ac_exeext + } +then : + ac_retval=0 +else case e in #( + e) printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 ;; +esac +fi + # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information + # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would + # interfere with the next link command; also delete a directory that is + # left behind by Apple's compiler. We do this before executing the actions. + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_link + +# ac_fn_c_check_func LINENO FUNC VAR +# ---------------------------------- +# Tests whether FUNC exists, setting the cache variable VAR accordingly +ac_fn_c_check_func () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +printf %s "checking for $2... " >&6; } +if eval test \${$3+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +/* Define $2 to an innocuous variant, in case declares $2. + For example, HP-UX 11i declares gettimeofday. */ +#define $2 innocuous_$2 + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $2 (void); below. */ + +#include +#undef $2 + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $2 (void); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$2 || defined __stub___$2 +choke me +#endif + +int +main (void) +{ +return $2 (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + eval "$3=yes" +else case e in #( + e) eval "$3=no" ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext ;; +esac +fi +eval ac_res=\$$3 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_func + +# ac_fn_c_try_run LINENO +# ---------------------- +# Try to run conftest.$ac_ext, and return whether this succeeded. Assumes that +# executables *can* be run. +ac_fn_c_try_run () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } +then : + ac_retval=0 +else case e in #( + e) printf "%s\n" "$as_me: program exited with status $ac_status" >&5 + printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=$ac_status ;; +esac +fi + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_run +ac_configure_args_raw= +for ac_arg +do + case $ac_arg in + *\'*) + ac_arg=`printf "%s\n" "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + as_fn_append ac_configure_args_raw " '$ac_arg'" +done + +case $ac_configure_args_raw in + *$as_nl*) + ac_safe_unquote= ;; + *) + ac_unsafe_z='|&;<>()$`\\"*?[ '' ' # This string ends in space, tab. + ac_unsafe_a="$ac_unsafe_z#~" + ac_safe_unquote="s/ '\\([^$ac_unsafe_a][^$ac_unsafe_z]*\\)'/ \\1/g" + ac_configure_args_raw=` printf "%s\n" "$ac_configure_args_raw" | sed "$ac_safe_unquote"`;; +esac + +cat >config.log <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by HTSlib $as_me 1.21, which was +generated by GNU Autoconf 2.72. Invocation command line was + + $ $0$ac_configure_args_raw + +_ACEOF +exec 5>>config.log +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + printf "%s\n" "PATH: $as_dir" + done +IFS=$as_save_IFS + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *\'*) + ac_arg=`printf "%s\n" "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; + 2) + as_fn_append ac_configure_args1 " '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + as_fn_append ac_configure_args " '$ac_arg'" + ;; + esac + done +done +{ ac_configure_args0=; unset ac_configure_args0;} +{ ac_configure_args1=; unset ac_configure_args1;} + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Use '\'' to represent an apostrophe within the trap. +# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. +trap 'exit_status=$? + # Sanitize IFS. + IFS=" "" $as_nl" + # Save into config.log some information that might help in debugging. + { + echo + + printf "%s\n" "## ---------------- ## +## Cache variables. ## +## ---------------- ##" + echo + # The following way of writing the cache mishandles newlines in values, +( + for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +printf "%s\n" "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + (set) 2>&1 | + case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + sed -n \ + "s/'\''/'\''\\\\'\'''\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" + ;; #( + *) + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) + echo + + printf "%s\n" "## ----------------- ## +## Output variables. ## +## ----------------- ##" + echo + for ac_var in $ac_subst_vars + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`printf "%s\n" "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + printf "%s\n" "$ac_var='\''$ac_val'\''" + done | sort + echo + + if test -n "$ac_subst_files"; then + printf "%s\n" "## ------------------- ## +## File substitutions. ## +## ------------------- ##" + echo + for ac_var in $ac_subst_files + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`printf "%s\n" "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + printf "%s\n" "$ac_var='\''$ac_val'\''" + done | sort + echo + fi + + if test -s confdefs.h; then + printf "%s\n" "## ----------- ## +## confdefs.h. ## +## ----------- ##" + echo + cat confdefs.h + echo + fi + test "$ac_signal" != 0 && + printf "%s\n" "$as_me: caught signal $ac_signal" + printf "%s\n" "$as_me: exit $exit_status" + } >&5 + rm -f core *.core core.conftest.* && + rm -f -r conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status +' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -f -r conftest* confdefs.h + +printf "%s\n" "/* confdefs.h */" > confdefs.h + +# Predefined preprocessor variables. + +printf "%s\n" "#define PACKAGE_NAME \"$PACKAGE_NAME\"" >>confdefs.h + +printf "%s\n" "#define PACKAGE_TARNAME \"$PACKAGE_TARNAME\"" >>confdefs.h + +printf "%s\n" "#define PACKAGE_VERSION \"$PACKAGE_VERSION\"" >>confdefs.h + +printf "%s\n" "#define PACKAGE_STRING \"$PACKAGE_STRING\"" >>confdefs.h + +printf "%s\n" "#define PACKAGE_BUGREPORT \"$PACKAGE_BUGREPORT\"" >>confdefs.h + +printf "%s\n" "#define PACKAGE_URL \"$PACKAGE_URL\"" >>confdefs.h + + +# Let the site file select an alternate cache file if it wants to. +# Prefer an explicitly selected file to automatically selected ones. +if test -n "$CONFIG_SITE"; then + ac_site_files="$CONFIG_SITE" +elif test "x$prefix" != xNONE; then + ac_site_files="$prefix/share/config.site $prefix/etc/config.site" +else + ac_site_files="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" +fi + +for ac_site_file in $ac_site_files +do + case $ac_site_file in #( + */*) : + ;; #( + *) : + ac_site_file=./$ac_site_file ;; +esac + if test -f "$ac_site_file" && test -r "$ac_site_file"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 +printf "%s\n" "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" \ + || { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} +as_fn_error $? "failed to load site script $ac_site_file +See 'config.log' for more details" "$LINENO" 5; } + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special files + # actually), so we avoid doing that. DJGPP emulates it as a regular file. + if test /dev/null != "$cache_file" && test -f "$cache_file"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 +printf "%s\n" "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . "$cache_file";; + *) . "./$cache_file";; + esac + fi +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 +printf "%s\n" "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Test code for whether the C compiler supports C89 (global declarations) +ac_c_conftest_c89_globals=' +/* Does the compiler advertise C89 conformance? + Do not test the value of __STDC__, because some compilers set it to 0 + while being otherwise adequately conformant. */ +#if !defined __STDC__ +# error "Compiler does not advertise C89 conformance" +#endif + +#include +#include +struct stat; +/* Most of the following tests are stolen from RCS 5.7 src/conf.sh. */ +struct buf { int x; }; +struct buf * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (char **p, int i) +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* C89 style stringification. */ +#define noexpand_stringify(a) #a +const char *stringified = noexpand_stringify(arbitrary+token=sequence); + +/* C89 style token pasting. Exercises some of the corner cases that + e.g. old MSVC gets wrong, but not very hard. */ +#define noexpand_concat(a,b) a##b +#define expand_concat(a,b) noexpand_concat(a,b) +extern int vA; +extern int vbee; +#define aye A +#define bee B +int *pvA = &expand_concat(v,aye); +int *pvbee = &noexpand_concat(v,bee); + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not \xHH hex character constants. + These do not provoke an error unfortunately, instead are silently treated + as an "x". The following induces an error, until -std is added to get + proper ANSI mode. Curiously \x00 != x always comes out true, for an + array size at least. It is necessary to write \x00 == 0 to get something + that is true only with -std. */ +int osf4_cc_array ['\''\x00'\'' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) '\''x'\'' +int xlc6_cc_array[FOO(a) == '\''x'\'' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, int *(*)(struct buf *, struct stat *, int), + int, int);' + +# Test code for whether the C compiler supports C89 (body of main). +ac_c_conftest_c89_main=' +ok |= (argc == 0 || f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]); +' + +# Test code for whether the C compiler supports C99 (global declarations) +ac_c_conftest_c99_globals=' +/* Does the compiler advertise C99 conformance? */ +#if !defined __STDC_VERSION__ || __STDC_VERSION__ < 199901L +# error "Compiler does not advertise C99 conformance" +#endif + +// See if C++-style comments work. + +#include +extern int puts (const char *); +extern int printf (const char *, ...); +extern int dprintf (int, const char *, ...); +extern void *malloc (size_t); +extern void free (void *); + +// Check varargs macros. These examples are taken from C99 6.10.3.5. +// dprintf is used instead of fprintf to avoid needing to declare +// FILE and stderr. +#define debug(...) dprintf (2, __VA_ARGS__) +#define showlist(...) puts (#__VA_ARGS__) +#define report(test,...) ((test) ? puts (#test) : printf (__VA_ARGS__)) +static void +test_varargs_macros (void) +{ + int x = 1234; + int y = 5678; + debug ("Flag"); + debug ("X = %d\n", x); + showlist (The first, second, and third items.); + report (x>y, "x is %d but y is %d", x, y); +} + +// Check long long types. +#define BIG64 18446744073709551615ull +#define BIG32 4294967295ul +#define BIG_OK (BIG64 / BIG32 == 4294967297ull && BIG64 % BIG32 == 0) +#if !BIG_OK + #error "your preprocessor is broken" +#endif +#if BIG_OK +#else + #error "your preprocessor is broken" +#endif +static long long int bignum = -9223372036854775807LL; +static unsigned long long int ubignum = BIG64; + +struct incomplete_array +{ + int datasize; + double data[]; +}; + +struct named_init { + int number; + const wchar_t *name; + double average; +}; + +typedef const char *ccp; + +static inline int +test_restrict (ccp restrict text) +{ + // Iterate through items via the restricted pointer. + // Also check for declarations in for loops. + for (unsigned int i = 0; *(text+i) != '\''\0'\''; ++i) + continue; + return 0; +} + +// Check varargs and va_copy. +static bool +test_varargs (const char *format, ...) +{ + va_list args; + va_start (args, format); + va_list args_copy; + va_copy (args_copy, args); + + const char *str = ""; + int number = 0; + float fnumber = 0; + + while (*format) + { + switch (*format++) + { + case '\''s'\'': // string + str = va_arg (args_copy, const char *); + break; + case '\''d'\'': // int + number = va_arg (args_copy, int); + break; + case '\''f'\'': // float + fnumber = va_arg (args_copy, double); + break; + default: + break; + } + } + va_end (args_copy); + va_end (args); + + return *str && number && fnumber; +} +' + +# Test code for whether the C compiler supports C99 (body of main). +ac_c_conftest_c99_main=' + // Check bool. + _Bool success = false; + success |= (argc != 0); + + // Check restrict. + if (test_restrict ("String literal") == 0) + success = true; + char *restrict newvar = "Another string"; + + // Check varargs. + success &= test_varargs ("s, d'\'' f .", "string", 65, 34.234); + test_varargs_macros (); + + // Check flexible array members. + struct incomplete_array *ia = + malloc (sizeof (struct incomplete_array) + (sizeof (double) * 10)); + ia->datasize = 10; + for (int i = 0; i < ia->datasize; ++i) + ia->data[i] = i * 1.234; + // Work around memory leak warnings. + free (ia); + + // Check named initializers. + struct named_init ni = { + .number = 34, + .name = L"Test wide string", + .average = 543.34343, + }; + + ni.number = 58; + + int dynamic_array[ni.number]; + dynamic_array[0] = argv[0][0]; + dynamic_array[ni.number - 1] = 543; + + // work around unused variable warnings + ok |= (!success || bignum == 0LL || ubignum == 0uLL || newvar[0] == '\''x'\'' + || dynamic_array[ni.number - 1] != 543); +' + +# Test code for whether the C compiler supports C11 (global declarations) +ac_c_conftest_c11_globals=' +/* Does the compiler advertise C11 conformance? */ +#if !defined __STDC_VERSION__ || __STDC_VERSION__ < 201112L +# error "Compiler does not advertise C11 conformance" +#endif + +// Check _Alignas. +char _Alignas (double) aligned_as_double; +char _Alignas (0) no_special_alignment; +extern char aligned_as_int; +char _Alignas (0) _Alignas (int) aligned_as_int; + +// Check _Alignof. +enum +{ + int_alignment = _Alignof (int), + int_array_alignment = _Alignof (int[100]), + char_alignment = _Alignof (char) +}; +_Static_assert (0 < -_Alignof (int), "_Alignof is signed"); + +// Check _Noreturn. +int _Noreturn does_not_return (void) { for (;;) continue; } + +// Check _Static_assert. +struct test_static_assert +{ + int x; + _Static_assert (sizeof (int) <= sizeof (long int), + "_Static_assert does not work in struct"); + long int y; +}; + +// Check UTF-8 literals. +#define u8 syntax error! +char const utf8_literal[] = u8"happens to be ASCII" "another string"; + +// Check duplicate typedefs. +typedef long *long_ptr; +typedef long int *long_ptr; +typedef long_ptr long_ptr; + +// Anonymous structures and unions -- taken from C11 6.7.2.1 Example 1. +struct anonymous +{ + union { + struct { int i; int j; }; + struct { int k; long int l; } w; + }; + int m; +} v1; +' + +# Test code for whether the C compiler supports C11 (body of main). +ac_c_conftest_c11_main=' + _Static_assert ((offsetof (struct anonymous, i) + == offsetof (struct anonymous, w.k)), + "Anonymous union alignment botch"); + v1.i = 2; + v1.w.k = 5; + ok |= v1.i != 5; +' + +# Test code for whether the C compiler supports C11 (complete). +ac_c_conftest_c11_program="${ac_c_conftest_c89_globals} +${ac_c_conftest_c99_globals} +${ac_c_conftest_c11_globals} + +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_c_conftest_c89_main} + ${ac_c_conftest_c99_main} + ${ac_c_conftest_c11_main} + return ok; +} +" + +# Test code for whether the C compiler supports C99 (complete). +ac_c_conftest_c99_program="${ac_c_conftest_c89_globals} +${ac_c_conftest_c99_globals} + +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_c_conftest_c89_main} + ${ac_c_conftest_c99_main} + return ok; +} +" + +# Test code for whether the C compiler supports C89 (complete). +ac_c_conftest_c89_program="${ac_c_conftest_c89_globals} + +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_c_conftest_c89_main} + return ok; +} +" + +as_fn_append ac_header_c_list " stdio.h stdio_h HAVE_STDIO_H" +as_fn_append ac_header_c_list " stdlib.h stdlib_h HAVE_STDLIB_H" +as_fn_append ac_header_c_list " string.h string_h HAVE_STRING_H" +as_fn_append ac_header_c_list " inttypes.h inttypes_h HAVE_INTTYPES_H" +as_fn_append ac_header_c_list " stdint.h stdint_h HAVE_STDINT_H" +as_fn_append ac_header_c_list " strings.h strings_h HAVE_STRINGS_H" +as_fn_append ac_header_c_list " sys/stat.h sys_stat_h HAVE_SYS_STAT_H" +as_fn_append ac_header_c_list " sys/types.h sys_types_h HAVE_SYS_TYPES_H" +as_fn_append ac_header_c_list " unistd.h unistd_h HAVE_UNISTD_H" +as_fn_append ac_header_c_list " sys/param.h sys_param_h HAVE_SYS_PARAM_H" +as_fn_append ac_func_c_list " getpagesize HAVE_GETPAGESIZE" + +# Auxiliary files required by this configure script. +ac_aux_files="config.guess config.sub" + +# Locations in which to look for auxiliary files. +ac_aux_dir_candidates="${srcdir}${PATH_SEPARATOR}${srcdir}/..${PATH_SEPARATOR}${srcdir}/../.." + +# Search for a directory containing all of the required auxiliary files, +# $ac_aux_files, from the $PATH-style list $ac_aux_dir_candidates. +# If we don't find one directory that contains all the files we need, +# we report the set of missing files from the *first* directory in +# $ac_aux_dir_candidates and give up. +ac_missing_aux_files="" +ac_first_candidate=: +printf "%s\n" "$as_me:${as_lineno-$LINENO}: looking for aux files: $ac_aux_files" >&5 +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_found=false +for as_dir in $ac_aux_dir_candidates +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + as_found=: + + printf "%s\n" "$as_me:${as_lineno-$LINENO}: trying $as_dir" >&5 + ac_aux_dir_found=yes + ac_install_sh= + for ac_aux in $ac_aux_files + do + # As a special case, if "install-sh" is required, that requirement + # can be satisfied by any of "install-sh", "install.sh", or "shtool", + # and $ac_install_sh is set appropriately for whichever one is found. + if test x"$ac_aux" = x"install-sh" + then + if test -f "${as_dir}install-sh"; then + printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}install-sh found" >&5 + ac_install_sh="${as_dir}install-sh -c" + elif test -f "${as_dir}install.sh"; then + printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}install.sh found" >&5 + ac_install_sh="${as_dir}install.sh -c" + elif test -f "${as_dir}shtool"; then + printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}shtool found" >&5 + ac_install_sh="${as_dir}shtool install -c" + else + ac_aux_dir_found=no + if $ac_first_candidate; then + ac_missing_aux_files="${ac_missing_aux_files} install-sh" + else + break + fi + fi + else + if test -f "${as_dir}${ac_aux}"; then + printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}${ac_aux} found" >&5 + else + ac_aux_dir_found=no + if $ac_first_candidate; then + ac_missing_aux_files="${ac_missing_aux_files} ${ac_aux}" + else + break + fi + fi + fi + done + if test "$ac_aux_dir_found" = yes; then + ac_aux_dir="$as_dir" + break + fi + ac_first_candidate=false + + as_found=false +done +IFS=$as_save_IFS +if $as_found +then : + +else case e in #( + e) as_fn_error $? "cannot find required auxiliary files:$ac_missing_aux_files" "$LINENO" 5 ;; +esac +fi + + +# These three variables are undocumented and unsupported, +# and are intended to be withdrawn in a future Autoconf release. +# They can cause serious problems if a builder's source tree is in a directory +# whose full name contains unusual characters. +if test -f "${ac_aux_dir}config.guess"; then + ac_config_guess="$SHELL ${ac_aux_dir}config.guess" +fi +if test -f "${ac_aux_dir}config.sub"; then + ac_config_sub="$SHELL ${ac_aux_dir}config.sub" +fi +if test -f "$ac_aux_dir/configure"; then + ac_configure="$SHELL ${ac_aux_dir}configure" +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in $ac_precious_vars; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val=\$ac_cv_env_${ac_var}_value + eval ac_new_val=\$ac_env_${ac_var}_value + case $ac_old_set,$ac_new_set in + set,) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: '$ac_var' was set to '$ac_old_val' in the previous run" >&5 +printf "%s\n" "$as_me: error: '$ac_var' was set to '$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: '$ac_var' was not set in the previous run" >&5 +printf "%s\n" "$as_me: error: '$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + # differences in whitespace do not lead to failure. + ac_old_val_w=`echo x $ac_old_val` + ac_new_val_w=`echo x $ac_new_val` + if test "$ac_old_val_w" != "$ac_new_val_w"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: '$ac_var' has changed since the previous run:" >&5 +printf "%s\n" "$as_me: error: '$ac_var' has changed since the previous run:" >&2;} + ac_cache_corrupted=: + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in '$ac_var' since the previous run:" >&5 +printf "%s\n" "$as_me: warning: ignoring whitespace changes in '$ac_var' since the previous run:" >&2;} + eval $ac_var=\$ac_old_val + fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: former value: '$ac_old_val'" >&5 +printf "%s\n" "$as_me: former value: '$ac_old_val'" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: current value: '$ac_new_val'" >&5 +printf "%s\n" "$as_me: current value: '$ac_new_val'" >&2;} + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *\'*) ac_arg=$ac_var=`printf "%s\n" "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) as_fn_append ac_configure_args " '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 +printf "%s\n" "$as_me: error: changes in the environment can compromise the build" >&2;} + as_fn_error $? "run '${MAKE-make} distclean' and/or 'rm $cache_file' + and start over" "$LINENO" 5 +fi +## -------------------- ## +## Main body of script. ## +## -------------------- ## + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + +ac_config_headers="$ac_config_headers config.h" + + + + +# SYNOPSIS +# +# HTS_PROG_CC_WERROR(FLAGS_VAR) +# +# Set FLAGS_VAR to the flags needed to make the C compiler treat warnings +# as errors. + + +# hts_check_compile_flags_needed.m4 +# +# SYNOPSIS +# +# HTS_CHECK_COMPILE_FLAGS_NEEDED(FEATURE, FLAGS, [INPUT], [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS]) +# +# DESCRIPTION +# +# Check whether the given FLAGS are required to build and link INPUT with +# the current language's compiler. Compilation and linking are first +# tries without FLAGS. If that fails it then tries to compile and +# link again with FLAGS. +# +# FEATURE describes the feature being tested, and is used when printing +# messages and to name the cache entry (along with the tested flags). +# +# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on +# success/failure. In ACTION-SUCCESS, $flags_needed will be set to +# either an empty string or FLAGS depending on the test results. +# +# If EXTRA-FLAGS is defined, it is added to the current language's default +# flags (e.g. CFLAGS) when the check is done. The check is thus made with +# the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to +# force the compiler to issue an error when a bad flag is given. +# +# If omitted, INPUT defaults to AC_LANG_PROGRAM(), although that probably +# isn't very useful. +# +# NOTE: Implementation based on AX_CHECK_COMPILE_FLAG. +# +# LICENSE +# +# Copyright (c) 2008 Guido U. Draheim +# Copyright (c) 2011 Maarten Bosmans +# Copyright (c) 2023 Robert Davies +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +# HTS_CHECK_COMPILE_FLAGS_NEEDED(FEATURE, FLAGS, [INPUT], [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS]) + + + +# SYNOPSIS +# +# HTS_TEST_CC_C_LD_FLAG(FLAG, FOUND_VAR) +# +# Test if FLAG can be used on both CFLAGS and LDFLAGS. It it works, +# variable FOUND_VAR is set to FLAG. + + + + + +# pkg.m4 - Macros to locate and use pkg-config. -*- Autoconf -*- +# serial 12 (pkg-config-0.29.2) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="gcc" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf "%s\n" "$ac_ct_CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + fi +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + if test "$as_dir$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir$ac_word${1+' '}$@" + fi +fi +fi ;; +esac +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl.exe + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl.exe +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf "%s\n" "$ac_ct_CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}clang", so it can be a program name with args. +set dummy ${ac_tool_prefix}clang; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}clang" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "clang", so it can be a program name with args. +set dummy clang; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="clang" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf "%s\n" "$ac_ct_CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +fi + + +test -z "$CC" && { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} +as_fn_error $? "no acceptable C compiler found in \$PATH +See 'config.log' for more details" "$LINENO" 5; } + +# Provide some information about the compiler. +printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion -version; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 +printf %s "checking whether the C compiler works... " >&6; } +ac_link_default=`printf "%s\n" "$ac_link" | sed 's/ -o *conftest[^ ]*//'` + +# The possible output files: +ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" + +ac_rmfiles= +for ac_file in $ac_files +do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + * ) ac_rmfiles="$ac_rmfiles $ac_file";; + esac +done +rm -f $ac_rmfiles + +if { { ac_try="$ac_link_default" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_link_default") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : + # Autoconf-2.13 could set the ac_cv_exeext variable to 'no'. +# So ignore a value of 'no', otherwise this would lead to 'EXEEXT = no' +# in a Makefile. We should not override ac_cv_exeext if it was cached, +# so that the user can short-circuit this test for compilers unknown to +# Autoconf. +for ac_file in $ac_files '' +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + if test ${ac_cv_exeext+y} && test "$ac_cv_exeext" != no; + then :; else + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + fi + # We set ac_cv_exeext here because the later test for it is not + # safe: cross compilers may not add the suffix if given an '-o' + # argument, so we may need to know it at that point already. + # Even if this section looks crufty: it has the advantage of + # actually working. + break;; + * ) + break;; + esac +done +test "$ac_cv_exeext" = no && ac_cv_exeext= + +else case e in #( + e) ac_file='' ;; +esac +fi +if test -z "$ac_file" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} +as_fn_error 77 "C compiler cannot create executables +See 'config.log' for more details" "$LINENO" 5; } +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 +printf %s "checking for C compiler default output file name... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 +printf "%s\n" "$ac_file" >&6; } +ac_exeext=$ac_cv_exeext + +rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 +printf %s "checking for suffix of executables... " >&6; } +if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : + # If both 'conftest.exe' and 'conftest' are 'present' (well, observable) +# catch 'conftest.exe'. For instance with Cygwin, 'ls conftest' will +# work properly (i.e., refer to 'conftest.exe'), while it won't with +# 'rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + break;; + * ) break;; + esac +done +else case e in #( + e) { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of executables: cannot compile and link +See 'config.log' for more details" "$LINENO" 5; } ;; +esac +fi +rm -f conftest conftest$ac_cv_exeext +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 +printf "%s\n" "$ac_cv_exeext" >&6; } + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ +FILE *f = fopen ("conftest.out", "w"); + if (!f) + return 1; + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} +_ACEOF +ac_clean_files="$ac_clean_files conftest.out" +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 +printf %s "checking whether we are cross compiling... " >&6; } +if test "$cross_compiling" != yes; then + { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if { ac_try='./conftest$ac_cv_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} +as_fn_error 77 "cannot run C compiled programs. +If you meant to cross compile, use '--host'. +See 'config.log' for more details" "$LINENO" 5; } + fi + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 +printf "%s\n" "$cross_compiling" >&6; } + +rm -f conftest.$ac_ext conftest$ac_cv_exeext \ + conftest.o conftest.obj conftest.out +ac_clean_files=$ac_clean_files_save +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 +printf %s "checking for suffix of object files... " >&6; } +if test ${ac_cv_objext+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : + for ac_file in conftest.o conftest.obj conftest.*; do + test -f "$ac_file" || continue; + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else case e in #( + e) printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of object files: cannot compile +See 'config.log' for more details" "$LINENO" 5; } ;; +esac +fi +rm -f conftest.$ac_cv_objext conftest.$ac_ext ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 +printf "%s\n" "$ac_cv_objext" >&6; } +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler supports GNU C" >&5 +printf %s "checking whether the compiler supports GNU C... " >&6; } +if test ${ac_cv_c_compiler_gnu+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_compiler_gnu=yes +else case e in #( + e) ac_compiler_gnu=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 +printf "%s\n" "$ac_cv_c_compiler_gnu" >&6; } +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +if test $ac_compiler_gnu = yes; then + GCC=yes +else + GCC= +fi +ac_test_CFLAGS=${CFLAGS+y} +ac_save_CFLAGS=$CFLAGS +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 +printf %s "checking whether $CC accepts -g... " >&6; } +if test ${ac_cv_prog_cc_g+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_g=yes +else case e in #( + e) CFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + +else case e in #( + e) ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 +printf "%s\n" "$ac_cv_prog_cc_g" >&6; } +if test $ac_test_CFLAGS; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +ac_prog_cc_stdc=no +if test x$ac_prog_cc_stdc = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C11 features" >&5 +printf %s "checking for $CC option to enable C11 features... " >&6; } +if test ${ac_cv_prog_cc_c11+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_cv_prog_cc_c11=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_c_conftest_c11_program +_ACEOF +for ac_arg in '' -std=gnu11 +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_c11=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cc_c11" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC ;; +esac +fi + +if test "x$ac_cv_prog_cc_c11" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else case e in #( + e) if test "x$ac_cv_prog_cc_c11" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c11" >&5 +printf "%s\n" "$ac_cv_prog_cc_c11" >&6; } + CC="$CC $ac_cv_prog_cc_c11" ;; +esac +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c11 + ac_prog_cc_stdc=c11 ;; +esac +fi +fi +if test x$ac_prog_cc_stdc = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C99 features" >&5 +printf %s "checking for $CC option to enable C99 features... " >&6; } +if test ${ac_cv_prog_cc_c99+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_cv_prog_cc_c99=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_c_conftest_c99_program +_ACEOF +for ac_arg in '' -std=gnu99 -std=c99 -c99 -qlanglvl=extc1x -qlanglvl=extc99 -AC99 -D_STDC_C99= +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_c99=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cc_c99" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC ;; +esac +fi + +if test "x$ac_cv_prog_cc_c99" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else case e in #( + e) if test "x$ac_cv_prog_cc_c99" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c99" >&5 +printf "%s\n" "$ac_cv_prog_cc_c99" >&6; } + CC="$CC $ac_cv_prog_cc_c99" ;; +esac +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c99 + ac_prog_cc_stdc=c99 ;; +esac +fi +fi +if test x$ac_prog_cc_stdc = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C89 features" >&5 +printf %s "checking for $CC option to enable C89 features... " >&6; } +if test ${ac_cv_prog_cc_c89+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_c_conftest_c89_program +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_c89=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC ;; +esac +fi + +if test "x$ac_cv_prog_cc_c89" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else case e in #( + e) if test "x$ac_cv_prog_cc_c89" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +printf "%s\n" "$ac_cv_prog_cc_c89" >&6; } + CC="$CC $ac_cv_prog_cc_c89" ;; +esac +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c89 + ac_prog_cc_stdc=c89 ;; +esac +fi +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. +set dummy ${ac_tool_prefix}ranlib; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_RANLIB+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$RANLIB"; then + ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +RANLIB=$ac_cv_prog_RANLIB +if test -n "$RANLIB"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5 +printf "%s\n" "$RANLIB" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_RANLIB"; then + ac_ct_RANLIB=$RANLIB + # Extract the first word of "ranlib", so it can be a program name with args. +set dummy ranlib; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_RANLIB+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_RANLIB"; then + ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_RANLIB="ranlib" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB +if test -n "$ac_ct_RANLIB"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5 +printf "%s\n" "$ac_ct_RANLIB" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_RANLIB" = x; then + RANLIB=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + RANLIB=$ac_ct_RANLIB + fi +else + RANLIB="$ac_cv_prog_RANLIB" +fi + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 +printf %s "checking for grep that handles long lines and -e... " >&6; } +if test ${ac_cv_path_GREP+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -z "$GREP"; then + ac_path_GREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in grep ggrep + do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_GREP="$as_dir$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_GREP" || continue +# Check for GNU ac_path_GREP and select it if it is found. + # Check for GNU $ac_path_GREP +case `"$ac_path_GREP" --version 2>&1` in #( +*GNU*) + ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; +#( +*) + ac_count=0 + printf %s 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + printf "%s\n" 'GREP' >> "conftest.nl" + "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_GREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_GREP="$ac_path_GREP" + ac_path_GREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_GREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_GREP"; then + as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_GREP=$GREP +fi + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 +printf "%s\n" "$ac_cv_path_GREP" >&6; } + GREP="$ac_cv_path_GREP" + + + + # Check whether --enable-warnings was given. +if test ${enable_warnings+y} +then : + enableval=$enable_warnings; +else case e in #( + e) enable_warnings=yes ;; +esac +fi + + + if test "x$enable_warnings" != xno +then : + + + + ansi="" + if test "x$ansi" = "x" +then : + msg="for C compiler warning flags" +else case e in #( + e) msg="for C compiler warning and ANSI conformance flags" ;; +esac +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking $msg" >&5 +printf %s "checking $msg... " >&6; } + if test ${hts_cv_prog_cc_warnings+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) hts_cv_prog_cc_warnings="" + if test "x$CC" != "x" +then : + + cat > conftest.c < /dev/null 2>&1 && + test -f conftest.o +then : + if test "x$ansi" = "x" +then : + hts_cv_prog_cc_warnings="-Wall" +else case e in #( + e) hts_cv_prog_cc_warnings="-Wall -ansi -pedantic" ;; +esac +fi + +elif # Sun Studio or Solaris C compiler + "$CC" -V 2>&1 | $GREP -i -E "WorkShop|Sun C" > /dev/null 2>&1 && + "$CC" -c -v -Xc conftest.c > /dev/null 2>&1 && + test -f conftest.o +then : + if test "x$ansi" = "x" +then : + hts_cv_prog_cc_warnings="-v" +else case e in #( + e) hts_cv_prog_cc_warnings="-v -Xc" ;; +esac +fi + +elif # Digital Unix C compiler + "$CC" -V 2>&1 | $GREP -i "Digital UNIX Compiler" > /dev/null 2>&1 && + "$CC" -c -verbose -w0 -warnprotos -std1 conftest.c > /dev/null 2>&1 && + test -f conftest.o +then : + if test "x$ansi" = "x" +then : + hts_cv_prog_cc_warnings="-verbose -w0 -warnprotos" +else case e in #( + e) hts_cv_prog_cc_warnings="-verbose -w0 -warnprotos -std1" ;; +esac +fi + +elif # C for AIX Compiler + "$CC" 2>&1 | $GREP -i "C for AIX Compiler" > /dev/null 2>&1 && + "$CC" -c -qlanglvl=ansi -qinfo=all conftest.c > /dev/null 2>&1 && + test -f conftest.o +then : + if test "x$ansi" = "x" +then : + hts_cv_prog_cc_warnings="-qsrcmsg -qinfo=all:noppt:noppc:noobs:nocnd" +else case e in #( + e) hts_cv_prog_cc_warnings="-qsrcmsg -qinfo=all:noppt:noppc:noobs:nocnd -qlanglvl=ansi" ;; +esac +fi + +elif # IRIX C compiler + "$CC" -version 2>&1 | $GREP -i "MIPSpro Compilers" > /dev/null 2>&1 && + "$CC" -c -fullwarn -ansi -ansiE conftest.c > /dev/null 2>&1 && + test -f conftest.o +then : + if test "x$ansi" = "x" +then : + hts_cv_prog_cc_warnings="-fullwarn" +else case e in #( + e) hts_cv_prog_cc_warnings="-fullwarn -ansi -ansiE" ;; +esac +fi + +elif # HP-UX C compiler + what "$CC" 2>&1 | $GREP -i "HP C Compiler" > /dev/null 2>&1 && + "$CC" -c -Aa +w1 conftest.c > /dev/null 2>&1 && + test -f conftest.o +then : + if test "x$ansi" = "x" +then : + hts_cv_prog_cc_warnings="+w1" +else case e in #( + e) hts_cv_prog_cc_warnings="+w1 -Aa" ;; +esac +fi + +elif # The NEC SX series (Super-UX 10) C compiler + "$CC" -V 2>&1 | $GREP "/SX" > /dev/null 2>&1 && + "$CC" -c -pvctl,fullmsg -Xc conftest.c > /dev/null 2>&1 && + test -f conftest.o +then : + + if test "x$ansi" = "x" +then : + hts_cv_prog_cc_warnings="-pvctl,fullmsg" +else case e in #( + e) hts_cv_prog_cc_warnings="-pvctl,fullmsg -Xc" ;; +esac +fi + +elif # The Cray C compiler (Unicos) + "$CC" -V 2>&1 | $GREP -i "Cray" > /dev/null 2>&1 && + "$CC" -c -h msglevel_2 conftest.c > /dev/null 2>&1 && + test -f conftest.o +then : + if test "x$ansi" = "x" +then : + hts_cv_prog_cc_warnings="-h#msglevel_2" +else case e in #( + e) hts_cv_prog_cc_warnings="-h#msglevel_2,conform" ;; +esac +fi + +elif # The Tiny C Compiler + "$CC" -v 2>&1 | $GREP "tcc version" > /dev/null && + "$CC" -Wall -c conftest.c > /dev/null 2>&1 && + test -f conftest.o +then : + hts_cv_prog_cc_warnings="-Wall" + +fi + rm -f conftest.* + +fi + ;; +esac +fi + + + if test "x$hts_cv_prog_cc_warnings" != "x" +then : + +ac_arg_result=`echo "$hts_cv_prog_cc_warnings" | tr '#' ' '` +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_arg_result" >&5 +printf "%s\n" "$ac_arg_result" >&6; } + +ac_arg_needed="" +for ac_arg in $hts_cv_prog_cc_warnings +do + ac_arg_sp=`echo "$ac_arg" | tr '#' ' '` + case " $CFLAGS " in #( + *" $ac_arg_sp "*) : + ;; #( + *) : + ac_arg_needed="$ac_arg_all $ac_arg_sp" ;; +esac +done +CFLAGS="$ac_arg_needed $CFLAGS" +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unknown" >&5 +printf "%s\n" "unknown" >&6; } + ;; +esac +fi + +fi + + + # Check whether --enable-werror was given. +if test ${enable_werror+y} +then : + enableval=$enable_werror; +else case e in #( + e) enable_werror=no ;; +esac +fi + + + if test "x$enable_werror" != xno +then : + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler flags to error on warnings" >&5 +printf %s "checking for C compiler flags to error on warnings... " >&6; } + if test ${hts_cv_prog_cc_werror+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) hts_cv_prog_cc_werror="" + if test "x$CC" != "x" +then : + + cat > conftest.c < /dev/null 2>&1 && + test -f conftest.o +then : + hts_cv_prog_cc_werror="-Werror" +elif # Sun Studio or Solaris C compiler + "$CC" -V 2>&1 | $GREP -i -E "WorkShop|Sun C" > /dev/null 2>&1 && + "$CC" -c -errwarn=%all conftest.c > /dev/null 2>&1 && + test -f conftest.o +then : + hts_cv_prog_cc_werror="-errwarn=%all" +elif # The Tiny C Compiler + "$CC" -v 2>&1 | $GREP "tcc version" > /dev/null && + "$CC" -Wall -c conftest.c > /dev/null 2>&1 && + test -f conftest.o +then : + hts_cv_prog_cc_werror="-Werror" + +fi + rm -f conftest.* + +fi + ;; +esac +fi + + if test "x$hts_cv_prog_cc_werror" != x +then : + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hts_cv_prog_cc_werror" >&5 +printf "%s\n" "$hts_cv_prog_cc_werror" >&6; } + if test "xhts_late_cflags" != x +then : + eval hts_late_cflags="$hts_cv_prog_cc_werror" +fi + +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unknown" >&5 +printf "%s\n" "unknown" >&6; } + ;; +esac +fi + +fi + + +# HTSlib uses X/Open-only facilities (M_SQRT2 etc, drand48() etc), and +# various POSIX functions that are provided by various _POSIX_C_SOURCE values +# or by _XOPEN_SOURCE >= 500. It also uses usleep(), which is removed when +# _XOPEN_SOURCE >= 700. Additionally, some definitions may require +# _XOPEN_SOURCE >= 600 on some platforms (snprintf on MinGW, +# PTHREAD_MUTEX_RECURSIVE on some Linux distributions). Hence we set it to 600. + +# Define _XOPEN_SOURCE unless the user has already done so via $CPPFLAGS etc. + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC options needed to detect all undeclared functions" >&5 +printf %s "checking for $CC options needed to detect all undeclared functions... " >&6; } +if test ${ac_cv_c_undeclared_builtin_options+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_save_CFLAGS=$CFLAGS + ac_cv_c_undeclared_builtin_options='cannot detect' + for ac_arg in '' -fno-builtin; do + CFLAGS="$ac_save_CFLAGS $ac_arg" + # This test program should *not* compile successfully. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +(void) strchr; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + +else case e in #( + e) # This test program should compile successfully. + # No library function is consistently available on + # freestanding implementations, so test against a dummy + # declaration. Include always-available headers on the + # off chance that they somehow elicit warnings. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include +#include +extern void ac_decl (int, char *); + +int +main (void) +{ +(void) ac_decl (0, (char *) 0); + (void) ac_decl; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + if test x"$ac_arg" = x +then : + ac_cv_c_undeclared_builtin_options='none needed' +else case e in #( + e) ac_cv_c_undeclared_builtin_options=$ac_arg ;; +esac +fi + break +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + done + CFLAGS=$ac_save_CFLAGS + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_undeclared_builtin_options" >&5 +printf "%s\n" "$ac_cv_c_undeclared_builtin_options" >&6; } + case $ac_cv_c_undeclared_builtin_options in #( + 'cannot detect') : + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} +as_fn_error $? "cannot make $CC report undeclared builtins +See 'config.log' for more details" "$LINENO" 5; } ;; #( + 'none needed') : + ac_c_undeclared_builtin_options='' ;; #( + *) : + ac_c_undeclared_builtin_options=$ac_cv_c_undeclared_builtin_options ;; +esac + +ac_header= ac_cache= +for ac_item in $ac_header_c_list +do + if test $ac_cache; then + ac_fn_c_check_header_compile "$LINENO" $ac_header ac_cv_header_$ac_cache "$ac_includes_default" + if eval test \"x\$ac_cv_header_$ac_cache\" = xyes; then + printf "%s\n" "#define $ac_item 1" >> confdefs.h + fi + ac_header= ac_cache= + elif test $ac_header; then + ac_cache=$ac_item + else + ac_header=$ac_item + fi +done + + + + + + + + +if test $ac_cv_header_stdlib_h = yes && test $ac_cv_header_string_h = yes +then : + +printf "%s\n" "#define STDC_HEADERS 1" >>confdefs.h + +fi +ac_fn_check_decl "$LINENO" "_XOPEN_SOURCE" "ac_cv_have_decl__XOPEN_SOURCE" "$ac_includes_default" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl__XOPEN_SOURCE" = xyes +then : + +else case e in #( + e) +printf "%s\n" "#define _XOPEN_SOURCE 600" >>confdefs.h + ;; +esac +fi + +ac_fn_check_decl "$LINENO" "__get_cpuid_max" "ac_cv_have_decl___get_cpuid_max" "#include +" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl___get_cpuid_max" = xyes +then : + ac_have_decl=1 +else case e in #( + e) ac_have_decl=0 ;; +esac +fi +printf "%s\n" "#define HAVE_DECL___GET_CPUID_MAX $ac_have_decl" >>confdefs.h +if test $ac_have_decl = 1 +then : + + hts_have_cpuid=yes + +else case e in #( + e) + hts_have_cpuid=no + ;; +esac +fi +ac_fn_check_decl "$LINENO" "__cpuid_count" "ac_cv_have_decl___cpuid_count" "#include +" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl___cpuid_count" = xyes +then : + ac_have_decl=1 +else case e in #( + e) ac_have_decl=0 ;; +esac +fi +printf "%s\n" "#define HAVE_DECL___CPUID_COUNT $ac_have_decl" >>confdefs.h +if test $ac_have_decl = 1 +then : + + hts_have_cpuid=yes + +else case e in #( + e) + hts_have_cpuid=no + ;; +esac +fi + + +if test "x$hts_have_cpuid" = "xyes" +then : + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking C compiler flags needed for sse4.1" >&5 +printf %s "checking C compiler flags needed for sse4.1... " >&6; } +if test ${hts_cv_check_cflags_needed_sse4_1___msse4_1__mssse3__mpopcnt+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #ifdef __x86_64__ + #include "x86intrin.h" + #endif + +int +main (void) +{ + + #ifdef __x86_64__ + __m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1); + __m128i c = _mm_shuffle_epi8(_mm_max_epu32(a, b), b); + return _mm_popcnt_u32(*((char *) &c)); + #endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + hts_cv_check_cflags_needed_sse4_1___msse4_1__mssse3__mpopcnt=none +else case e in #( + e) ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -msse4.1 -mssse3 -mpopcnt" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #ifdef __x86_64__ + #include "x86intrin.h" + #endif + +int +main (void) +{ + + #ifdef __x86_64__ + __m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1); + __m128i c = _mm_shuffle_epi8(_mm_max_epu32(a, b), b); + return _mm_popcnt_u32(*((char *) &c)); + #endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + hts_cv_check_cflags_needed_sse4_1___msse4_1__mssse3__mpopcnt="-msse4.1 -mssse3 -mpopcnt" +else case e in #( + e) hts_cv_check_cflags_needed_sse4_1___msse4_1__mssse3__mpopcnt=unsupported ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hts_cv_check_cflags_needed_sse4_1___msse4_1__mssse3__mpopcnt" >&5 +printf "%s\n" "$hts_cv_check_cflags_needed_sse4_1___msse4_1__mssse3__mpopcnt" >&6; } +if test "x$hts_cv_check_cflags_needed_sse4_1___msse4_1__mssse3__mpopcnt" = xunsupported +then : + + : + +else case e in #( + e) + if test "x$hts_cv_check_cflags_needed_sse4_1___msse4_1__mssse3__mpopcnt" = xnone +then : + flags_needed="" +else case e in #( + e) flags_needed="$hts_cv_check_cflags_needed_sse4_1___msse4_1__mssse3__mpopcnt" ;; +esac +fi + + hts_cflags_sse4="$flags_needed" + +printf "%s\n" "#define HAVE_SSSE3 1" >>confdefs.h + + +printf "%s\n" "#define HAVE_POPCNT 1" >>confdefs.h + + +printf "%s\n" "#define HAVE_SSE4_1 1" >>confdefs.h + + + + printf "%s\n" "#define UBSAN 1" >>confdefs.h + + + ;; +esac +fi + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking C compiler flags needed for avx2" >&5 +printf %s "checking C compiler flags needed for avx2... " >&6; } +if test ${hts_cv_check_cflags_needed_avx2___mavx2__mpopcnt+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #ifdef __x86_64__ + #include "x86intrin.h" + #endif + +int +main (void) +{ + + #ifdef __x86_64__ + __m256i a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + __m256i b = _mm256_add_epi32(a, a); + long long c = _mm256_extract_epi64(b, 0); + return _mm_popcnt_u32((int) c); + #endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + hts_cv_check_cflags_needed_avx2___mavx2__mpopcnt=none +else case e in #( + e) ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx2 -mpopcnt" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #ifdef __x86_64__ + #include "x86intrin.h" + #endif + +int +main (void) +{ + + #ifdef __x86_64__ + __m256i a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + __m256i b = _mm256_add_epi32(a, a); + long long c = _mm256_extract_epi64(b, 0); + return _mm_popcnt_u32((int) c); + #endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + hts_cv_check_cflags_needed_avx2___mavx2__mpopcnt="-mavx2 -mpopcnt" +else case e in #( + e) hts_cv_check_cflags_needed_avx2___mavx2__mpopcnt=unsupported ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hts_cv_check_cflags_needed_avx2___mavx2__mpopcnt" >&5 +printf "%s\n" "$hts_cv_check_cflags_needed_avx2___mavx2__mpopcnt" >&6; } +if test "x$hts_cv_check_cflags_needed_avx2___mavx2__mpopcnt" = xunsupported +then : + + : + +else case e in #( + e) + if test "x$hts_cv_check_cflags_needed_avx2___mavx2__mpopcnt" = xnone +then : + flags_needed="" +else case e in #( + e) flags_needed="$hts_cv_check_cflags_needed_avx2___mavx2__mpopcnt" ;; +esac +fi + + hts_cflags_avx2="$flags_needed" + + +printf "%s\n" "#define HAVE_POPCNT 1" >>confdefs.h + + +printf "%s\n" "#define HAVE_AVX2 1" >>confdefs.h + + + ;; +esac +fi + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking C compiler flags needed for avx512f" >&5 +printf %s "checking C compiler flags needed for avx512f... " >&6; } +if test ${hts_cv_check_cflags_needed_avx512f___mavx512f__mpopcnt+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #ifdef __x86_64__ + #include "x86intrin.h" + #endif + +int +main (void) +{ + + #ifdef __x86_64__ + __m512i a = _mm512_set1_epi32(1); + __m512i b = _mm512_add_epi32(a, a); + __m256i c = _mm512_castsi512_si256(b); + __m256i d = _mm512_extracti64x4_epi64(a, 1); + return _mm_popcnt_u32(*((char *) &c)) + (*(char *) &d); + #endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + hts_cv_check_cflags_needed_avx512f___mavx512f__mpopcnt=none +else case e in #( + e) ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512f -mpopcnt" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #ifdef __x86_64__ + #include "x86intrin.h" + #endif + +int +main (void) +{ + + #ifdef __x86_64__ + __m512i a = _mm512_set1_epi32(1); + __m512i b = _mm512_add_epi32(a, a); + __m256i c = _mm512_castsi512_si256(b); + __m256i d = _mm512_extracti64x4_epi64(a, 1); + return _mm_popcnt_u32(*((char *) &c)) + (*(char *) &d); + #endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + hts_cv_check_cflags_needed_avx512f___mavx512f__mpopcnt="-mavx512f -mpopcnt" +else case e in #( + e) hts_cv_check_cflags_needed_avx512f___mavx512f__mpopcnt=unsupported ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hts_cv_check_cflags_needed_avx512f___mavx512f__mpopcnt" >&5 +printf "%s\n" "$hts_cv_check_cflags_needed_avx512f___mavx512f__mpopcnt" >&6; } +if test "x$hts_cv_check_cflags_needed_avx512f___mavx512f__mpopcnt" = xunsupported +then : + + : + +else case e in #( + e) + if test "x$hts_cv_check_cflags_needed_avx512f___mavx512f__mpopcnt" = xnone +then : + flags_needed="" +else case e in #( + e) flags_needed="$hts_cv_check_cflags_needed_avx512f___mavx512f__mpopcnt" ;; +esac +fi + + hts_cflags_avx512="$flags_needed" + + +printf "%s\n" "#define HAVE_POPCNT 1" >>confdefs.h + + +printf "%s\n" "#define HAVE_AVX512 1" >>confdefs.h + + + ;; +esac +fi + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for working __builtin_cpu_supports(\"ssse3\")" >&5 +printf %s "checking for working __builtin_cpu_supports(\"ssse3\")... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + if (__builtin_cpu_supports("ssse3")) { + return 0; + } + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +printf "%s\n" "#define HAVE_BUILTIN_CPU_SUPPORT_SSSE3 1" >>confdefs.h + + +else case e in #( + e) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for __attribute__((target))" >&5 +printf %s "checking for __attribute__((target))... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + __attribute__((target("ssse3"))) + int zero(void) { + return 0; + } + +int +main (void) +{ +zero(); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +printf "%s\n" "#define HAVE_ATTRIBUTE_TARGET 1" >>confdefs.h + + +else case e in #( + e) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + + +fi + + + + + + + +if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}pkg-config", so it can be a program name with args. +set dummy ${ac_tool_prefix}pkg-config; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_PKG_CONFIG+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) case $PKG_CONFIG in + [\\/]* | ?:[\\/]*) + ac_cv_path_PKG_CONFIG="$PKG_CONFIG" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_PKG_CONFIG="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac ;; +esac +fi +PKG_CONFIG=$ac_cv_path_PKG_CONFIG +if test -n "$PKG_CONFIG"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $PKG_CONFIG" >&5 +printf "%s\n" "$PKG_CONFIG" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_path_PKG_CONFIG"; then + ac_pt_PKG_CONFIG=$PKG_CONFIG + # Extract the first word of "pkg-config", so it can be a program name with args. +set dummy pkg-config; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_ac_pt_PKG_CONFIG+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) case $ac_pt_PKG_CONFIG in + [\\/]* | ?:[\\/]*) + ac_cv_path_ac_pt_PKG_CONFIG="$ac_pt_PKG_CONFIG" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_ac_pt_PKG_CONFIG="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac ;; +esac +fi +ac_pt_PKG_CONFIG=$ac_cv_path_ac_pt_PKG_CONFIG +if test -n "$ac_pt_PKG_CONFIG"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_pt_PKG_CONFIG" >&5 +printf "%s\n" "$ac_pt_PKG_CONFIG" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_pt_PKG_CONFIG" = x; then + PKG_CONFIG="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + PKG_CONFIG=$ac_pt_PKG_CONFIG + fi +else + PKG_CONFIG="$ac_cv_path_PKG_CONFIG" +fi + +fi +if test -n "$PKG_CONFIG"; then + _pkg_min_version=0.9.0 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking pkg-config is at least version $_pkg_min_version" >&5 +printf %s "checking pkg-config is at least version $_pkg_min_version... " >&6; } + if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + PKG_CONFIG="" + fi +fi + +need_crypto=no +pc_requires= +static_LDFLAGS=$LDFLAGS +static_LIBS='-lpthread -lz -lm' +private_LIBS=$LDFLAGS + +# Check whether --enable-versioned-symbols was given. +if test ${enable_versioned_symbols+y} +then : + enableval=$enable_versioned_symbols; +else case e in #( + e) enable_versioned_symbols=yes ;; +esac +fi + + +# Check whether --enable-bz2 was given. +if test ${enable_bz2+y} +then : + enableval=$enable_bz2; +else case e in #( + e) enable_bz2=yes ;; +esac +fi + + +# Check whether --enable-gcs was given. +if test ${enable_gcs+y} +then : + enableval=$enable_gcs; +else case e in #( + e) enable_gcs=check ;; +esac +fi + + +# Check whether --enable-largefile was given. +if test ${enable_largefile+y} +then : + enableval=$enable_largefile; +fi +if test "$enable_largefile,$enable_year2038" != no,no +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable large file support" >&5 +printf %s "checking for $CC option to enable large file support... " >&6; } +if test ${ac_cv_sys_largefile_opts+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_save_CC="$CC" + ac_opt_found=no + for ac_opt in "none needed" "-D_FILE_OFFSET_BITS=64" "-D_LARGE_FILES=1" "-n32"; do + if test x"$ac_opt" != x"none needed" +then : + CC="$ac_save_CC $ac_opt" +fi + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#ifndef FTYPE +# define FTYPE off_t +#endif + /* Check that FTYPE can represent 2**63 - 1 correctly. + We can't simply define LARGE_FTYPE to be 9223372036854775807, + since some C++ compilers masquerading as C compilers + incorrectly reject 9223372036854775807. */ +#define LARGE_FTYPE (((FTYPE) 1 << 31 << 31) - 1 + ((FTYPE) 1 << 31 << 31)) + int FTYPE_is_large[(LARGE_FTYPE % 2147483629 == 721 + && LARGE_FTYPE % 2147483647 == 1) + ? 1 : -1]; +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + if test x"$ac_opt" = x"none needed" +then : + # GNU/Linux s390x and alpha need _FILE_OFFSET_BITS=64 for wide ino_t. + CC="$CC -DFTYPE=ino_t" + if ac_fn_c_try_compile "$LINENO" +then : + +else case e in #( + e) CC="$CC -D_FILE_OFFSET_BITS=64" + if ac_fn_c_try_compile "$LINENO" +then : + ac_opt='-D_FILE_OFFSET_BITS=64' +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam +fi + ac_cv_sys_largefile_opts=$ac_opt + ac_opt_found=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + test $ac_opt_found = no || break + done + CC="$ac_save_CC" + + test $ac_opt_found = yes || ac_cv_sys_largefile_opts="support not detected" ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sys_largefile_opts" >&5 +printf "%s\n" "$ac_cv_sys_largefile_opts" >&6; } + +ac_have_largefile=yes +case $ac_cv_sys_largefile_opts in #( + "none needed") : + ;; #( + "supported through gnulib") : + ;; #( + "support not detected") : + ac_have_largefile=no ;; #( + "-D_FILE_OFFSET_BITS=64") : + +printf "%s\n" "#define _FILE_OFFSET_BITS 64" >>confdefs.h + ;; #( + "-D_LARGE_FILES=1") : + +printf "%s\n" "#define _LARGE_FILES 1" >>confdefs.h + ;; #( + "-n32") : + CC="$CC -n32" ;; #( + *) : + as_fn_error $? "internal error: bad value for \$ac_cv_sys_largefile_opts" "$LINENO" 5 ;; +esac + +if test "$enable_year2038" != no +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option for timestamps after 2038" >&5 +printf %s "checking for $CC option for timestamps after 2038... " >&6; } +if test ${ac_cv_sys_year2038_opts+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_save_CPPFLAGS="$CPPFLAGS" + ac_opt_found=no + for ac_opt in "none needed" "-D_TIME_BITS=64" "-D__MINGW_USE_VC2005_COMPAT" "-U_USE_32_BIT_TIME_T -D__MINGW_USE_VC2005_COMPAT"; do + if test x"$ac_opt" != x"none needed" +then : + CPPFLAGS="$ac_save_CPPFLAGS $ac_opt" +fi + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #include + /* Check that time_t can represent 2**32 - 1 correctly. */ + #define LARGE_TIME_T \\ + ((time_t) (((time_t) 1 << 30) - 1 + 3 * ((time_t) 1 << 30))) + int verify_time_t_range[(LARGE_TIME_T / 65537 == 65535 + && LARGE_TIME_T % 65537 == 0) + ? 1 : -1]; + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_sys_year2038_opts="$ac_opt" + ac_opt_found=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + test $ac_opt_found = no || break + done + CPPFLAGS="$ac_save_CPPFLAGS" + test $ac_opt_found = yes || ac_cv_sys_year2038_opts="support not detected" ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sys_year2038_opts" >&5 +printf "%s\n" "$ac_cv_sys_year2038_opts" >&6; } + +ac_have_year2038=yes +case $ac_cv_sys_year2038_opts in #( + "none needed") : + ;; #( + "support not detected") : + ac_have_year2038=no ;; #( + "-D_TIME_BITS=64") : + +printf "%s\n" "#define _TIME_BITS 64" >>confdefs.h + ;; #( + "-D__MINGW_USE_VC2005_COMPAT") : + +printf "%s\n" "#define __MINGW_USE_VC2005_COMPAT 1" >>confdefs.h + ;; #( + "-U_USE_32_BIT_TIME_T"*) : + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} +as_fn_error $? "the 'time_t' type is currently forced to be 32-bit. It +will stop working after mid-January 2038. Remove +_USE_32BIT_TIME_T from the compiler flags. +See 'config.log' for more details" "$LINENO" 5; } ;; #( + *) : + as_fn_error $? "internal error: bad value for \$ac_cv_sys_year2038_opts" "$LINENO" 5 ;; +esac + +fi + +fi + +# Check whether --enable-libcurl was given. +if test ${enable_libcurl+y} +then : + enableval=$enable_libcurl; +else case e in #( + e) enable_libcurl=check ;; +esac +fi + + +# Check whether --enable-lzma was given. +if test ${enable_lzma+y} +then : + enableval=$enable_lzma; +else case e in #( + e) enable_lzma=yes ;; +esac +fi + + +# Check whether --enable-plugins was given. +if test ${enable_plugins+y} +then : + enableval=$enable_plugins; +else case e in #( + e) enable_plugins=no ;; +esac +fi + + + + +# Check whether --with-external-htscodecs was given. +if test ${with_external_htscodecs+y} +then : + withval=$with_external_htscodecs; +else case e in #( + e) with_external_htscodecs=no ;; +esac +fi + + + + +# Check whether --with-libdeflate was given. +if test ${with_libdeflate+y} +then : + withval=$with_libdeflate; +else case e in #( + e) with_libdeflate=check ;; +esac +fi + + + +# Check whether --with-plugin-dir was given. +if test ${with_plugin_dir+y} +then : + withval=$with_plugin_dir; case $withval in + yes|no) cat > config.mk <<'EOF' +ifneq ($(MAKECMDGOALS),distclean) +$(error Resolve configure error first) +endif +EOF + as_fn_error $? "no directory specified for --with-plugin-dir" "$LINENO" 5 ;; + esac +else case e in #( + e) with_plugin_dir='$(libexecdir)/htslib' ;; +esac +fi + +plugindir=$with_plugin_dir + + + +# Check whether --with-plugin-path was given. +if test ${with_plugin_path+y} +then : + withval=$with_plugin_path; case $withval in + yes) cat > config.mk <<'EOF' +ifneq ($(MAKECMDGOALS),distclean) +$(error Resolve configure error first) +endif +EOF + as_fn_error $? "no path specified for --with-plugin-path" "$LINENO" 5 ;; + no) with_plugin_path= ;; + esac +else case e in #( + e) with_plugin_path=$with_plugin_dir ;; +esac +fi + +pluginpath=$with_plugin_path + + +# Check whether --enable-s3 was given. +if test ${enable_s3+y} +then : + enableval=$enable_s3; +else case e in #( + e) enable_s3=check ;; +esac +fi + + +basic_host=${host_alias:-unknown-`uname -s`} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking shared library type for $basic_host" >&5 +printf %s "checking shared library type for $basic_host... " >&6; } +case $basic_host in + *-cygwin* | *-CYGWIN*) + host_result="Cygwin DLL" + PLATFORM=CYGWIN + PLUGIN_EXT=.cygdll + ;; + *-darwin* | *-Darwin*) + host_result="Darwin dylib" + PLATFORM=Darwin + PLUGIN_EXT=.bundle + ;; + *-msys* | *-MSYS* | *-mingw* | *-MINGW*) + host_result="MSYS dll" + PLATFORM=MSYS + PLUGIN_EXT=.dll + # This also sets __USE_MINGW_ANSI_STDIO which in turn makes PRId64, + # %lld and %z printf formats work. It also enforces the snprintf to + # be C99 compliant so it returns the correct values (in kstring.c). + + # Now set by default, so no need to do it here. + # CPPFLAGS="$CPPFLAGS -D_XOPEN_SOURCE=600" + ;; + *) + host_result="plain .so" + PLATFORM=default + PLUGIN_EXT=.so + ;; +esac +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $host_result" >&5 +printf "%s\n" "$host_result" >&6; } + + +if test x"$PLATFORM" = xdefault && test x"$enable_versioned_symbols" = xyes +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the linker supports versioned symbols" >&5 +printf %s "checking whether the linker supports versioned symbols... " >&6; } +if test ${hts_cv_have_versioned_symbols+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + save_LDFLAGS=$LDFLAGS + LDFLAGS="-Wl,-version-script,$srcdir/htslib.map $LDFLAGS" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + hts_cv_have_versioned_symbols=yes +else case e in #( + e) hts_cv_have_versioned_symbols=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS=$save_LDFLAGS + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hts_cv_have_versioned_symbols" >&5 +printf "%s\n" "$hts_cv_have_versioned_symbols" >&6; } + if test "x$hts_cv_have_versioned_symbols" = xyes +then : + + VERSION_SCRIPT_LDFLAGS='-Wl,-version-script,$(srcprefix)htslib.map' + + +fi + +fi + + + # Test for flags to set default shared library visibility to hidden + # -fvisibility=hidden : GCC compatible + # -xldscope=hidden : SunStudio + ac_opt_found=no + if test "x$ac_opt_found" = "xno" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler accepts -fvisibility=hidden" >&5 +printf %s "checking whether the compiler accepts -fvisibility=hidden... " >&6; } +if test ${hts_cv_check__fvisibility_hidden+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_check_save_cflags=$CFLAGS + ac_check_save_ldflags=$LDFLAGS + CFLAGS="$CFLAGS -fvisibility=hidden" + LDFLAGS="$LDFLAGS -fvisibility=hidden" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + hts_cv_check__fvisibility_hidden=yes + if test "xac_opt_found" != x +then : + eval ac_opt_found="-fvisibility=hidden" +fi +else case e in #( + e) hts_cv_check__fvisibility_hidden=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CFLAGS=$ac_check_save_cflags + LDFLAGS=$ac_check_save_ldflags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hts_cv_check__fvisibility_hidden" >&5 +printf "%s\n" "$hts_cv_check__fvisibility_hidden" >&6; } + +fi + if test "x$ac_opt_found" = "xno" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler accepts -xldscope=hidden" >&5 +printf %s "checking whether the compiler accepts -xldscope=hidden... " >&6; } +if test ${hts_cv_check__xldscope_hidden+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_check_save_cflags=$CFLAGS + ac_check_save_ldflags=$LDFLAGS + CFLAGS="$CFLAGS -xldscope=hidden" + LDFLAGS="$LDFLAGS -xldscope=hidden" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + hts_cv_check__xldscope_hidden=yes + if test "xac_opt_found" != x +then : + eval ac_opt_found="-xldscope=hidden" +fi +else case e in #( + e) hts_cv_check__xldscope_hidden=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CFLAGS=$ac_check_save_cflags + LDFLAGS=$ac_check_save_ldflags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hts_cv_check__xldscope_hidden" >&5 +printf "%s\n" "$hts_cv_check__xldscope_hidden" >&6; } + +fi + + if test "x$ac_opt_found" != "xno" +then : + CFLAGS="$CFLAGS $ac_opt_found" + LDFLAGS="$LDFLAGS $ac_opt_found" +fi + + + + + + # Make sure we can run config.sub. +$SHELL "${ac_aux_dir}config.sub" sun4 >/dev/null 2>&1 || + as_fn_error $? "cannot run $SHELL ${ac_aux_dir}config.sub" "$LINENO" 5 + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking build system type" >&5 +printf %s "checking build system type... " >&6; } +if test ${ac_cv_build+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_build_alias=$build_alias +test "x$ac_build_alias" = x && + ac_build_alias=`$SHELL "${ac_aux_dir}config.guess"` +test "x$ac_build_alias" = x && + as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5 +ac_cv_build=`$SHELL "${ac_aux_dir}config.sub" $ac_build_alias` || + as_fn_error $? "$SHELL ${ac_aux_dir}config.sub $ac_build_alias failed" "$LINENO" 5 + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5 +printf "%s\n" "$ac_cv_build" >&6; } +case $ac_cv_build in +*-*-*) ;; +*) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;; +esac +build=$ac_cv_build +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_build +shift +build_cpu=$1 +build_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +build_os=$* +IFS=$ac_save_IFS +case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking host system type" >&5 +printf %s "checking host system type... " >&6; } +if test ${ac_cv_host+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test "x$host_alias" = x; then + ac_cv_host=$ac_cv_build +else + ac_cv_host=`$SHELL "${ac_aux_dir}config.sub" $host_alias` || + as_fn_error $? "$SHELL ${ac_aux_dir}config.sub $host_alias failed" "$LINENO" 5 +fi + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_host" >&5 +printf "%s\n" "$ac_cv_host" >&6; } +case $ac_cv_host in +*-*-*) ;; +*) as_fn_error $? "invalid value of canonical host" "$LINENO" 5;; +esac +host=$ac_cv_host +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_host +shift +host_cpu=$1 +host_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +host_os=$* +IFS=$ac_save_IFS +case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac + + + +ac_func= +for ac_item in $ac_func_c_list +do + if test $ac_func; then + ac_fn_c_check_func "$LINENO" $ac_func ac_cv_func_$ac_func + if eval test \"x\$ac_cv_func_$ac_func\" = xyes; then + echo "#define $ac_item 1" >> confdefs.h + fi + ac_func= + else + ac_func=$ac_item + fi +done + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for working mmap" >&5 +printf %s "checking for working mmap... " >&6; } +if test ${ac_cv_func_mmap_fixed_mapped+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test "$cross_compiling" = yes +then : + case "$host_os" in # (( + # Guess yes on platforms where we know the result. + linux*) ac_cv_func_mmap_fixed_mapped=yes ;; + # If we don't know, assume the worst. + *) ac_cv_func_mmap_fixed_mapped=no ;; + esac +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_includes_default +/* malloc might have been renamed as rpl_malloc. */ +#undef malloc + +/* Thanks to Mike Haertel and Jim Avera for this test. + Here is a matrix of mmap possibilities: + mmap private not fixed + mmap private fixed at somewhere currently unmapped + mmap private fixed at somewhere already mapped + mmap shared not fixed + mmap shared fixed at somewhere currently unmapped + mmap shared fixed at somewhere already mapped + For private mappings, we should verify that changes cannot be read() + back from the file, nor mmap's back from the file at a different + address. (There have been systems where private was not correctly + implemented like the infamous i386 svr4.0, and systems where the + VM page cache was not coherent with the file system buffer cache + like early versions of FreeBSD and possibly contemporary NetBSD.) + For shared mappings, we should conversely verify that changes get + propagated back to all the places they're supposed to be. */ + +#include +#include + +#ifndef getpagesize +/* Prefer sysconf to the legacy getpagesize function, as getpagesize has + been removed from POSIX and is limited to page sizes that fit in 'int'. */ +# ifdef _SC_PAGESIZE +# define getpagesize() sysconf (_SC_PAGESIZE) +# elif defined _SC_PAGE_SIZE +# define getpagesize() sysconf (_SC_PAGE_SIZE) +# elif HAVE_GETPAGESIZE +int getpagesize (); +# else +# ifdef HAVE_SYS_PARAM_H +# include +# ifdef EXEC_PAGESIZE +# define getpagesize() EXEC_PAGESIZE +# else /* no EXEC_PAGESIZE */ +# ifdef NBPG +# define getpagesize() NBPG * CLSIZE +# ifndef CLSIZE +# define CLSIZE 1 +# endif /* no CLSIZE */ +# else /* no NBPG */ +# ifdef NBPC +# define getpagesize() NBPC +# else /* no NBPC */ +# ifdef PAGESIZE +# define getpagesize() PAGESIZE +# endif /* PAGESIZE */ +# endif /* no NBPC */ +# endif /* no NBPG */ +# endif /* no EXEC_PAGESIZE */ +# else /* no HAVE_SYS_PARAM_H */ +# define getpagesize() 8192 /* punt totally */ +# endif /* no HAVE_SYS_PARAM_H */ +# endif +#endif + +int +main (void) +{ + char *data, *data2, *data3; + const char *cdata2; + long i, pagesize; + int fd, fd2; + + pagesize = getpagesize (); + + /* First, make a file with some known garbage in it. */ + data = (char *) malloc (pagesize); + if (!data) + return 1; + for (i = 0; i < pagesize; ++i) + *(data + i) = rand (); + umask (0); + fd = creat ("conftest.mmap", 0600); + if (fd < 0) + return 2; + if (write (fd, data, pagesize) != pagesize) + return 3; + close (fd); + + /* Next, check that the tail of a page is zero-filled. File must have + non-zero length, otherwise we risk SIGBUS for entire page. */ + fd2 = open ("conftest.txt", O_RDWR | O_CREAT | O_TRUNC, 0600); + if (fd2 < 0) + return 4; + cdata2 = ""; + if (write (fd2, cdata2, 1) != 1) + return 5; + data2 = (char *) mmap (0, pagesize, PROT_READ | PROT_WRITE, MAP_SHARED, fd2, 0L); + if (data2 == MAP_FAILED) + return 6; + for (i = 0; i < pagesize; ++i) + if (*(data2 + i)) + return 7; + close (fd2); + /* 'return 8;' not currently used. */ + + /* Next, try to mmap the file at a fixed address which already has + something else allocated at it. If we can, also make sure that + we see the same garbage. */ + fd = open ("conftest.mmap", O_RDWR); + if (fd < 0) + return 9; + if (data2 != mmap (data2, pagesize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_FIXED, fd, 0L)) + return 10; + for (i = 0; i < pagesize; ++i) + if (*(data + i) != *(data2 + i)) + return 11; + + /* Finally, make sure that changes to the mapped area do not + percolate back to the file as seen by read(). (This is a bug on + some variants of i386 svr4.0.) */ + for (i = 0; i < pagesize; ++i) + *(data2 + i) = *(data2 + i) + 1; + data3 = (char *) malloc (pagesize); + if (!data3) + return 12; + if (read (fd, data3, pagesize) != pagesize) + return 13; + for (i = 0; i < pagesize; ++i) + if (*(data + i) != *(data3 + i)) + return 14; + close (fd); + free (data); + free (data3); + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO" +then : + ac_cv_func_mmap_fixed_mapped=yes +else case e in #( + e) ac_cv_func_mmap_fixed_mapped=no ;; +esac +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_mmap_fixed_mapped" >&5 +printf "%s\n" "$ac_cv_func_mmap_fixed_mapped" >&6; } +if test $ac_cv_func_mmap_fixed_mapped = yes; then + +printf "%s\n" "#define HAVE_MMAP 1" >>confdefs.h + +fi +rm -f conftest.mmap conftest.txt + +ac_fn_c_check_func "$LINENO" "gmtime_r" "ac_cv_func_gmtime_r" +if test "x$ac_cv_func_gmtime_r" = xyes +then : + printf "%s\n" "#define HAVE_GMTIME_R 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "fsync" "ac_cv_func_fsync" +if test "x$ac_cv_func_fsync" = xyes +then : + printf "%s\n" "#define HAVE_FSYNC 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "drand48" "ac_cv_func_drand48" +if test "x$ac_cv_func_drand48" = xyes +then : + printf "%s\n" "#define HAVE_DRAND48 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "srand48_deterministic" "ac_cv_func_srand48_deterministic" +if test "x$ac_cv_func_srand48_deterministic" = xyes +then : + printf "%s\n" "#define HAVE_SRAND48_DETERMINISTIC 1" >>confdefs.h + +fi + + +# Darwin has a dubious fdatasync() symbol, but no declaration in +as_ac_Symbol=`printf "%s\n" "ac_cv_have_decl_fdatasync(int)" | sed "$as_sed_sh"` +ac_fn_check_decl "$LINENO" "fdatasync(int)" "$as_ac_Symbol" "$ac_includes_default" "$ac_c_undeclared_builtin_options" "CFLAGS" +if eval test \"x\$"$as_ac_Symbol"\" = x"yes" +then : + ac_fn_c_check_func "$LINENO" "fdatasync" "ac_cv_func_fdatasync" +if test "x$ac_cv_func_fdatasync" = xyes +then : + printf "%s\n" "#define HAVE_FDATASYNC 1" >>confdefs.h + +fi + +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for __attribute__((constructor))" >&5 +printf %s "checking for __attribute__((constructor))... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + static __attribute__((constructor)) void noop(void) {} + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +printf "%s\n" "#define HAVE_ATTRIBUTE_CONSTRUCTOR 1" >>confdefs.h + + +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for clock_gettime with CLOCK_PROCESS_CPUTIME_ID" >&5 +printf %s "checking for clock_gettime with CLOCK_PROCESS_CPUTIME_ID... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ + + struct timespec ts; + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +printf "%s\n" "#define HAVE_CLOCK_GETTIME_CPUTIME 1" >>confdefs.h + + +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + +if test $enable_plugins != no; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing dlsym" >&5 +printf %s "checking for library containing dlsym... " >&6; } +if test ${ac_cv_search_dlsym+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char dlsym (void); +int +main (void) +{ +return dlsym (); + ; + return 0; +} +_ACEOF +for ac_lib in '' dl +do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO" +then : + ac_cv_search_dlsym=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext + if test ${ac_cv_search_dlsym+y} +then : + break +fi +done +if test ${ac_cv_search_dlsym+y} +then : + +else case e in #( + e) ac_cv_search_dlsym=no ;; +esac +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_dlsym" >&5 +printf "%s\n" "$ac_cv_search_dlsym" >&6; } +ac_res=$ac_cv_search_dlsym +if test "$ac_res" != no +then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + +else case e in #( + e) cat > config.mk <<'EOF' +ifneq ($(MAKECMDGOALS),distclean) +$(error Resolve configure error first) +endif +EOF + as_fn_error $? "dlsym() not found + +Plugin support requires dynamic linking facilities from the operating system. +Either configure with --disable-plugins or resolve this error to build HTSlib." "$LINENO" 5 ;; +esac +fi + + # Check if the compiler understands -rdynamic + # TODO Test whether this is required and/or needs tweaking per-platform + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler accepts -rdynamic" >&5 +printf %s "checking whether the compiler accepts -rdynamic... " >&6; } +if test ${hts_cv_check__rdynamic+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_check_save_cflags=$CFLAGS + ac_check_save_ldflags=$LDFLAGS + CFLAGS="$CFLAGS -rdynamic" + LDFLAGS="$LDFLAGS -rdynamic" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + hts_cv_check__rdynamic=yes + if test "xrdynamic_flag" != x +then : + eval rdynamic_flag="-rdynamic" +fi +else case e in #( + e) hts_cv_check__rdynamic=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CFLAGS=$ac_check_save_cflags + LDFLAGS=$ac_check_save_ldflags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hts_cv_check__rdynamic" >&5 +printf "%s\n" "$hts_cv_check__rdynamic" >&6; } + + if test x"$rdynamic_flag" != "xno" +then : + LDFLAGS="$LDFLAGS $rdynamic_flag" + static_LDFLAGS="$static_LDFLAGS $rdynamic_flag" +fi + case "$ac_cv_search_dlsym" in + -l*) static_LIBS="$static_LIBS $ac_cv_search_dlsym" ;; + esac + +printf "%s\n" "#define ENABLE_PLUGINS 1" >>confdefs.h + + + +printf "%s\n" "#define PLUGIN_EXT \"$PLUGIN_EXT\"" >>confdefs.h + +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing log" >&5 +printf %s "checking for library containing log... " >&6; } +if test ${ac_cv_search_log+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char log (void); +int +main (void) +{ +return log (); + ; + return 0; +} +_ACEOF +for ac_lib in '' m +do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO" +then : + ac_cv_search_log=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext + if test ${ac_cv_search_log+y} +then : + break +fi +done +if test ${ac_cv_search_log+y} +then : + +else case e in #( + e) ac_cv_search_log=no ;; +esac +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_log" >&5 +printf "%s\n" "$ac_cv_search_log" >&6; } +ac_res=$ac_cv_search_log +if test "$ac_res" != no +then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + +else case e in #( + e) cat > config.mk <<'EOF' +ifneq ($(MAKECMDGOALS),distclean) +$(error Resolve configure error first) +endif +EOF + as_fn_error $? "log() not found + +HTSLIB requires a working floating-point math library. +FAILED. This error must be resolved in order to build HTSlib successfully." "$LINENO" 5 ;; +esac +fi + + +zlib_devel=ok +ac_fn_c_check_header_compile "$LINENO" "zlib.h" "ac_cv_header_zlib_h" "; +" +if test "x$ac_cv_header_zlib_h" = xyes +then : + +else case e in #( + e) zlib_devel=missing ;; +esac +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for inflate in -lz" >&5 +printf %s "checking for inflate in -lz... " >&6; } +if test ${ac_cv_lib_z_inflate+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS +LIBS="-lz $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char inflate (void); +int +main (void) +{ +return inflate (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_z_inflate=yes +else case e in #( + e) ac_cv_lib_z_inflate=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_z_inflate" >&5 +printf "%s\n" "$ac_cv_lib_z_inflate" >&6; } +if test "x$ac_cv_lib_z_inflate" = xyes +then : + printf "%s\n" "#define HAVE_LIBZ 1" >>confdefs.h + + LIBS="-lz $LIBS" + +else case e in #( + e) zlib_devel=missing ;; +esac +fi + + +if test $zlib_devel != ok; then + cat > config.mk <<'EOF' +ifneq ($(MAKECMDGOALS),distclean) +$(error Resolve configure error first) +endif +EOF + as_fn_error $? "zlib development files not found + +HTSlib uses compression routines from the zlib library . +Building HTSlib requires zlib development files to be installed on the build +machine; you may need to ensure a package such as zlib1g-dev (on Debian or +Ubuntu Linux) or zlib-devel (on RPM-based Linux distributions or Cygwin) +is installed. + +FAILED. This error must be resolved in order to build HTSlib successfully." "$LINENO" 5 +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing recv" >&5 +printf %s "checking for library containing recv... " >&6; } +if test ${ac_cv_search_recv+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char recv (void); +int +main (void) +{ +return recv (); + ; + return 0; +} +_ACEOF +for ac_lib in '' socket ws2_32 +do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO" +then : + ac_cv_search_recv=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext + if test ${ac_cv_search_recv+y} +then : + break +fi +done +if test ${ac_cv_search_recv+y} +then : + +else case e in #( + e) ac_cv_search_recv=no ;; +esac +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_recv" >&5 +printf "%s\n" "$ac_cv_search_recv" >&6; } +ac_res=$ac_cv_search_recv +if test "$ac_res" != no +then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + +if test "$ac_cv_search_recv" != "none required" +then + static_LIBS="$static_LIBS $ac_cv_search_recv" +fi +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing recv using declaration" >&5 +printf %s "checking for library containing recv using declaration... " >&6; } + LIBS="-lws2_32 $LIBS" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ +recv(0, 0, 0, 0); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: -lws2_32" >&5 +printf "%s\n" "-lws2_32" >&6; } + static_LIBS="$static_LIBS -lws2_32" +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + cat > config.mk <<'EOF' +ifneq ($(MAKECMDGOALS),distclean) +$(error Resolve configure error first) +endif +EOF + as_fn_error $? "unable to find the recv() function" "$LINENO" 5 ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext ;; +esac +fi + + +if test "$enable_bz2" != no; then + bz2_devel=ok + ac_fn_c_check_header_compile "$LINENO" "bzlib.h" "ac_cv_header_bzlib_h" "; +" +if test "x$ac_cv_header_bzlib_h" = xyes +then : + +else case e in #( + e) bz2_devel=missing ;; +esac +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for BZ2_bzBuffToBuffCompress in -lbz2" >&5 +printf %s "checking for BZ2_bzBuffToBuffCompress in -lbz2... " >&6; } +if test ${ac_cv_lib_bz2_BZ2_bzBuffToBuffCompress+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS +LIBS="-lbz2 $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char BZ2_bzBuffToBuffCompress (void); +int +main (void) +{ +return BZ2_bzBuffToBuffCompress (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_bz2_BZ2_bzBuffToBuffCompress=yes +else case e in #( + e) ac_cv_lib_bz2_BZ2_bzBuffToBuffCompress=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_bz2_BZ2_bzBuffToBuffCompress" >&5 +printf "%s\n" "$ac_cv_lib_bz2_BZ2_bzBuffToBuffCompress" >&6; } +if test "x$ac_cv_lib_bz2_BZ2_bzBuffToBuffCompress" = xyes +then : + printf "%s\n" "#define HAVE_LIBBZ2 1" >>confdefs.h + + LIBS="-lbz2 $LIBS" + +else case e in #( + e) bz2_devel=missing ;; +esac +fi + + if test $bz2_devel != ok; then + cat > config.mk <<'EOF' +ifneq ($(MAKECMDGOALS),distclean) +$(error Resolve configure error first) +endif +EOF + as_fn_error $? "libbzip2 development files not found + +The CRAM format may use bzip2 compression, which is implemented in HTSlib +by using compression routines from libbzip2 . + +Building HTSlib requires libbzip2 development files to be installed on the +build machine; you may need to ensure a package such as libbz2-dev (on Debian +or Ubuntu Linux) or bzip2-devel (on RPM-based Linux distributions or Cygwin) +is installed. + +Either configure with --disable-bz2 (which will make some CRAM files +produced elsewhere unreadable) or resolve this error to build HTSlib." "$LINENO" 5 + fi + if test -n "$PKG_CONFIG" && "$PKG_CONFIG" --exists bzip2; then + pc_requires="$pc_requires bzip2" + else + private_LIBS="$private_LIBS -lbz2" + fi + static_LIBS="$static_LIBS -lbz2" +fi + +if test "$enable_lzma" != no; then + lzma_devel=ok + for ac_header in lzma.h +do : + ac_fn_c_check_header_compile "$LINENO" "lzma.h" "ac_cv_header_lzma_h" "; +" +if test "x$ac_cv_header_lzma_h" = xyes +then : + printf "%s\n" "#define HAVE_LZMA_H 1" >>confdefs.h + +else case e in #( + e) lzma_devel=header-missing ;; +esac +fi + +done + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for lzma_easy_buffer_encode in -llzma" >&5 +printf %s "checking for lzma_easy_buffer_encode in -llzma... " >&6; } +if test ${ac_cv_lib_lzma_lzma_easy_buffer_encode+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS +LIBS="-llzma $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char lzma_easy_buffer_encode (void); +int +main (void) +{ +return lzma_easy_buffer_encode (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_lzma_lzma_easy_buffer_encode=yes +else case e in #( + e) ac_cv_lib_lzma_lzma_easy_buffer_encode=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_lzma_lzma_easy_buffer_encode" >&5 +printf "%s\n" "$ac_cv_lib_lzma_lzma_easy_buffer_encode" >&6; } +if test "x$ac_cv_lib_lzma_lzma_easy_buffer_encode" = xyes +then : + printf "%s\n" "#define HAVE_LIBLZMA 1" >>confdefs.h + + LIBS="-llzma $LIBS" + +else case e in #( + e) lzma_devel=missing ;; +esac +fi + + if test $lzma_devel = missing; then + cat > config.mk <<'EOF' +ifneq ($(MAKECMDGOALS),distclean) +$(error Resolve configure error first) +endif +EOF + as_fn_error $? "liblzma development files not found + +The CRAM format may use LZMA2 compression, which is implemented in HTSlib +by using compression routines from liblzma . + +Building HTSlib requires liblzma development files to be installed on the +build machine; you may need to ensure a package such as liblzma-dev (on Debian +or Ubuntu Linux), xz-devel (on RPM-based Linux distributions or Cygwin), or +xz (via Homebrew on macOS) is installed; or build XZ Utils from source. + +Either configure with --disable-lzma (which will make some CRAM files +produced elsewhere unreadable) or resolve this error to build HTSlib." "$LINENO" 5 + fi + pc_requires="$pc_requires liblzma" + static_LIBS="$static_LIBS -llzma" +fi + +if test "x$with_external_htscodecs" != "xno" +then : + libhtscodecs=ok + ac_fn_c_check_header_compile "$LINENO" "htscodecs/rANS_static4x16.h" "ac_cv_header_htscodecs_rANS_static4x16_h" "; +" +if test "x$ac_cv_header_htscodecs_rANS_static4x16_h" = xyes +then : + +else case e in #( + e) libhtscodecs='missing header' ;; +esac +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for rans_compress_bound_4x16 in -lhtscodecs" >&5 +printf %s "checking for rans_compress_bound_4x16 in -lhtscodecs... " >&6; } +if test ${ac_cv_lib_htscodecs_rans_compress_bound_4x16+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS +LIBS="-lhtscodecs $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char rans_compress_bound_4x16 (void); +int +main (void) +{ +return rans_compress_bound_4x16 (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_htscodecs_rans_compress_bound_4x16=yes +else case e in #( + e) ac_cv_lib_htscodecs_rans_compress_bound_4x16=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_htscodecs_rans_compress_bound_4x16" >&5 +printf "%s\n" "$ac_cv_lib_htscodecs_rans_compress_bound_4x16" >&6; } +if test "x$ac_cv_lib_htscodecs_rans_compress_bound_4x16" = xyes +then : + : +else case e in #( + e) libhtscodecs='missing library' ;; +esac +fi + + if test "$libhtscodecs" = "ok" +then : + +printf "%s\n" "#define HAVE_EXTERNAL_LIBHTSCODECS 1" >>confdefs.h + + LIBS="-lhtscodecs $LIBS" + private_LIBS="-lhtscodecs $private_LIBS" + static_LIBS="-lhtscodecs $static_LIBS" + selected_htscodecs_mk="htscodecs_external.mk" +else case e in #( + e) cat > config.mk <<'EOF' +ifneq ($(MAKECMDGOALS),distclean) +$(error Resolve configure error first) +endif +EOF + as_fn_error $? "libhtscodecs development files not found: $libhtscodecs + +You asked to use an external htscodecs library, but do not have the +required header / library files. You either need to supply these and +if necessary set CPPFLAGS and LDFLAGS so the compiler can find them; +or configure using --without-external-htscodecs to build the required +functions from the htscodecs submodule. +" "$LINENO" 5 ;; +esac +fi +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether htscodecs files are present" >&5 +printf %s "checking whether htscodecs files are present... " >&6; } + if test -e "$srcdir/htscodecs/htscodecs/rANS_static4x16.h" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + selected_htscodecs_mk="htscodecs_bundled.mk" +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + if test -e "$srcdir/.git" +then : + cat > config.mk <<'EOF' +ifneq ($(MAKECMDGOALS),distclean) +$(error Resolve configure error first) +endif +EOF + as_fn_error $? "htscodecs submodule files not present. + +HTSlib uses some functions from the htscodecs project, which is normally +included as a submodule. Try running: + + git submodule update --init --recursive + +in the top-level htslib directory to update it, and then re-run configure. +" "$LINENO" 5 +else case e in #( + e) cat > config.mk <<'EOF' +ifneq ($(MAKECMDGOALS),distclean) +$(error Resolve configure error first) +endif +EOF + as_fn_error $? "htscodecs submodule files not present. + +You have an incomplete distribution. Please try downloading one of the +official releases from https://www.htslib.org +" "$LINENO" 5 ;; +esac +fi ;; +esac +fi ;; +esac +fi + +if test "x$with_libdeflate" != "xno" +then : + libdeflate=ok + ac_fn_c_check_header_compile "$LINENO" "libdeflate.h" "ac_cv_header_libdeflate_h" "; +" +if test "x$ac_cv_header_libdeflate_h" = xyes +then : + +else case e in #( + e) libdeflate='missing header' ;; +esac +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for libdeflate_deflate_compress in -ldeflate" >&5 +printf %s "checking for libdeflate_deflate_compress in -ldeflate... " >&6; } +if test ${ac_cv_lib_deflate_libdeflate_deflate_compress+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS +LIBS="-ldeflate $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char libdeflate_deflate_compress (void); +int +main (void) +{ +return libdeflate_deflate_compress (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_deflate_libdeflate_deflate_compress=yes +else case e in #( + e) ac_cv_lib_deflate_libdeflate_deflate_compress=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_deflate_libdeflate_deflate_compress" >&5 +printf "%s\n" "$ac_cv_lib_deflate_libdeflate_deflate_compress" >&6; } +if test "x$ac_cv_lib_deflate_libdeflate_deflate_compress" = xyes +then : + : +else case e in #( + e) libdeflate='missing library' ;; +esac +fi + + if test "$libdeflate" = "ok" +then : + +printf "%s\n" "#define HAVE_LIBDEFLATE 1" >>confdefs.h + + LIBS="-ldeflate $LIBS" + private_LIBS="$private_LIBS -ldeflate" + static_LIBS="$static_LIBS -ldeflate" +else case e in #( + e) if test "x$with_libdeflate" != "xcheck" +then : + cat > config.mk <<'EOF' +ifneq ($(MAKECMDGOALS),distclean) +$(error Resolve configure error first) +endif +EOF + as_fn_error $? "libdeflate development files not found: $libdeflate + +You requested libdeflate, but do not have the required header / library +files. The source for libdeflate is available from +. You may have to adjust +search paths in CPPFLAGS and/or LDFLAGS if the header and library +are not currently on them. + +Either configure with --without-libdeflate or resolve this error to build +HTSlib." "$LINENO" 5 +fi ;; +esac +fi +fi + +libcurl=disabled +if test "$enable_libcurl" != no; then + libcurl_devel=ok + ac_fn_c_check_header_compile "$LINENO" "curl/curl.h" "ac_cv_header_curl_curl_h" "; +" +if test "x$ac_cv_header_curl_curl_h" = xyes +then : + +else case e in #( + e) libcurl_devel="headers not found" ;; +esac +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for curl_easy_pause in -lcurl" >&5 +printf %s "checking for curl_easy_pause in -lcurl... " >&6; } +if test ${ac_cv_lib_curl_curl_easy_pause+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS +LIBS="-lcurl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char curl_easy_pause (void); +int +main (void) +{ +return curl_easy_pause (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_curl_curl_easy_pause=yes +else case e in #( + e) ac_cv_lib_curl_curl_easy_pause=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_curl_curl_easy_pause" >&5 +printf "%s\n" "$ac_cv_lib_curl_curl_easy_pause" >&6; } +if test "x$ac_cv_lib_curl_curl_easy_pause" = xyes +then : + : +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for curl_easy_init in -lcurl" >&5 +printf %s "checking for curl_easy_init in -lcurl... " >&6; } +if test ${ac_cv_lib_curl_curl_easy_init+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS +LIBS="-lcurl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char curl_easy_init (void); +int +main (void) +{ +return curl_easy_init (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_curl_curl_easy_init=yes +else case e in #( + e) ac_cv_lib_curl_curl_easy_init=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_curl_curl_easy_init" >&5 +printf "%s\n" "$ac_cv_lib_curl_curl_easy_init" >&6; } +if test "x$ac_cv_lib_curl_curl_easy_init" = xyes +then : + libcurl_devel="library is too old (7.18+ required)" +else case e in #( + e) libcurl_devel="library not found" ;; +esac +fi + ;; +esac +fi + + + if test "$libcurl_devel" = ok; then + +printf "%s\n" "#define HAVE_LIBCURL 1" >>confdefs.h + + libcurl=enabled + elif test "$enable_libcurl" = check; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: libcurl not enabled: $libcurl_devel" >&5 +printf "%s\n" "$as_me: WARNING: libcurl not enabled: $libcurl_devel" >&2;} + else + cat > config.mk <<'EOF' +ifneq ($(MAKECMDGOALS),distclean) +$(error Resolve configure error first) +endif +EOF + as_fn_error $? "libcurl $libcurl_devel + +Support for HTTPS and other SSL-based URLs requires routines from the libcurl +library . Building HTSlib with libcurl enabled +requires libcurl development files to be installed on the build machine; you +may need to ensure a package such as libcurl4-{gnutls,nss,openssl}-dev (on +Debian or Ubuntu Linux) or libcurl-devel (on RPM-based Linux distributions +or Cygwin) is installed. + +Either configure with --disable-libcurl or resolve this error to build HTSlib." "$LINENO" 5 + fi + + if test "$libcurl" = enabled ; then + if test "$enable_plugins" != yes ; then + static_LIBS="$static_LIBS -lcurl" + fi + fi +fi + + +gcs=disabled +if test "$enable_gcs" != no; then + if test $libcurl = enabled; then + +printf "%s\n" "#define ENABLE_GCS 1" >>confdefs.h + + gcs=enabled + else + case "$enable_gcs" in + check) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: GCS support not enabled: requires libcurl support" >&5 +printf "%s\n" "$as_me: WARNING: GCS support not enabled: requires libcurl support" >&2;} ;; + *) cat > config.mk <<'EOF' +ifneq ($(MAKECMDGOALS),distclean) +$(error Resolve configure error first) +endif +EOF + as_fn_error $? "GCS support not enabled + +Support for Google Cloud Storage URLs requires libcurl support to be enabled +in HTSlib. Configure with --enable-libcurl in order to use GCS URLs." "$LINENO" 5 + ;; + esac + fi +fi + + +s3=disabled +if test "$enable_s3" != no; then + if test $libcurl = enabled; then + s3=enabled + need_crypto="$enable_s3" + else + case "$enable_s3" in + check) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: S3 support not enabled: requires libcurl support" >&5 +printf "%s\n" "$as_me: WARNING: S3 support not enabled: requires libcurl support" >&2;} ;; + *) cat > config.mk <<'EOF' +ifneq ($(MAKECMDGOALS),distclean) +$(error Resolve configure error first) +endif +EOF + as_fn_error $? "S3 support not enabled + +Support for Amazon AWS S3 URLs requires libcurl support to be enabled +in HTSlib. Configure with --enable-libcurl in order to use S3 URLs." "$LINENO" 5 + ;; + esac + fi +fi + +CRYPTO_LIBS= +if test $need_crypto != no; then + ac_fn_c_check_func "$LINENO" "CCHmac" "ac_cv_func_CCHmac" +if test "x$ac_cv_func_CCHmac" = xyes +then : + +printf "%s\n" "#define HAVE_COMMONCRYPTO 1" >>confdefs.h + +else case e in #( + e) save_LIBS=$LIBS + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing HMAC" >&5 +printf %s "checking for library containing HMAC... " >&6; } +if test ${ac_cv_search_HMAC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char HMAC (void); +int +main (void) +{ +return HMAC (); + ; + return 0; +} +_ACEOF +for ac_lib in '' crypto +do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO" +then : + ac_cv_search_HMAC=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext + if test ${ac_cv_search_HMAC+y} +then : + break +fi +done +if test ${ac_cv_search_HMAC+y} +then : + +else case e in #( + e) ac_cv_search_HMAC=no ;; +esac +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_HMAC" >&5 +printf "%s\n" "$ac_cv_search_HMAC" >&6; } +ac_res=$ac_cv_search_HMAC +if test "$ac_res" != no +then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + +printf "%s\n" "#define HAVE_HMAC 1" >>confdefs.h + + case "$ac_cv_search_HMAC" in + -l*) CRYPTO_LIBS=$ac_cv_search_HMAC ;; + esac +else case e in #( + e) case "$need_crypto" in + check) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: S3 support not enabled: requires SSL development files" >&5 +printf "%s\n" "$as_me: WARNING: S3 support not enabled: requires SSL development files" >&2;} + s3=disabled ;; + *) cat > config.mk <<'EOF' +ifneq ($(MAKECMDGOALS),distclean) +$(error Resolve configure error first) +endif +EOF + as_fn_error $? "SSL development files not found + +Support for AWS S3 URLs requires routines from an SSL library. Building +HTSlib with libcurl enabled requires SSL development files to be installed +on the build machine; you may need to ensure a package such as libgnutls-dev, +libnss3-dev, or libssl-dev (on Debian or Ubuntu Linux, corresponding to the +libcurl4-*-dev package installed), or openssl-devel (on RPM-based Linux +distributions or Cygwin) is installed. + +Either configure with --disable-s3 or resolve this error to build HTSlib." "$LINENO" 5 ;; + esac ;; +esac +fi + + LIBS=$save_LIBS ;; +esac +fi + + if test "$enable_plugins" != yes ; then + static_LIBS="$static_LIBS $CRYPTO_LIBS" + fi +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing regcomp" >&5 +printf %s "checking for library containing regcomp... " >&6; } +if test ${ac_cv_search_regcomp+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char regcomp (void); +int +main (void) +{ +return regcomp (); + ; + return 0; +} +_ACEOF +for ac_lib in '' regex +do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO" +then : + ac_cv_search_regcomp=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext + if test ${ac_cv_search_regcomp+y} +then : + break +fi +done +if test ${ac_cv_search_regcomp+y} +then : + +else case e in #( + e) ac_cv_search_regcomp=no ;; +esac +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_regcomp" >&5 +printf "%s\n" "$ac_cv_search_regcomp" >&6; } +ac_res=$ac_cv_search_regcomp +if test "$ac_res" != no +then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + libregex=needed +fi + + + +if test "$s3" = enabled ; then + +printf "%s\n" "#define ENABLE_S3 1" >>confdefs.h + +fi + +if test "x$hts_late_cflags" != x +then : + CFLAGS="$CFLAGS $hts_late_cflags" +fi + + + + + + + + + +ac_config_files="$ac_config_files config.mk htslib.pc.tmp:htslib.pc.in" + +ac_config_links="$ac_config_links htscodecs.mk:$selected_htscodecs_mk" + + +if test "$srcdir" != .; then + # Set up for a separate build directory. As HTSlib uses a non-recursive + # makefile, we need to create additional build subdirectories explicitly. + ac_config_links="$ac_config_links Makefile:Makefile htslib.mk:htslib.mk" + + ac_config_files="$ac_config_files htslib_vars.mk:builddir_vars.mk.in" + + ac_config_commands="$ac_config_commands mkdir" + +fi + +# @HTSDIRslash_if_relsrcdir@ will be empty when $srcdir is absolute +case "$srcdir" in + /*) HTSDIRslash_if_relsrcdir= ;; + *) HTSDIRslash_if_relsrcdir='$(HTSDIR)/' ;; +esac + + +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# 'ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* 'ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, we kill variables containing newlines. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +( + for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +printf "%s\n" "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + + (set) 2>&1 | + case $as_nl`(ac_space=' '; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + # 'set' does not quote correctly, so add quotes: double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \. + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; #( + *) + # 'set' quotes correctly as required by POSIX, so do not add quotes. + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) | + sed ' + /^ac_cv_env_/b end + t clear + :clear + s/^\([^=]*\)=\(.*[{}].*\)$/test ${\1+y} || &/ + t end + s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + :end' >>confcache +if diff "$cache_file" confcache >/dev/null 2>&1; then :; else + if test -w "$cache_file"; then + if test "x$cache_file" != "x/dev/null"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 +printf "%s\n" "$as_me: updating cache $cache_file" >&6;} + if test ! -f "$cache_file" || test -h "$cache_file"; then + cat confcache >"$cache_file" + else + case $cache_file in #( + */* | ?:*) + mv -f confcache "$cache_file"$$ && + mv -f "$cache_file"$$ "$cache_file" ;; #( + *) + mv -f confcache "$cache_file" ;; + esac + fi + fi + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 +printf "%s\n" "$as_me: not updating unwritable cache $cache_file" >&6;} + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +DEFS=-DHAVE_CONFIG_H + +ac_libobjs= +ac_ltlibobjs= +U= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' + ac_i=`printf "%s\n" "$ac_i" | sed "$ac_script"` + # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR + # will be set to the directory where LIBOBJS objects are built. + as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" + as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + +# Check whether --enable-year2038 was given. +if test ${enable_year2038+y} +then : + enableval=$enable_year2038; +fi + + +: "${CONFIG_STATUS=./config.status}" +ac_write_fail=0 +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 +printf "%s\n" "$as_me: creating $CONFIG_STATUS" >&6;} +as_write_fail=0 +cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false + +SHELL=\${CONFIG_SHELL-$SHELL} +export SHELL +_ASEOF +cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 +then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else case e in #( + e) case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac ;; +esac +fi + + + +# Reset variables that may have inherited troublesome values from +# the environment. + +# IFS needs to be set, to space, tab, and newline, in precisely that order. +# (If _AS_PATH_WALK were called with IFS unset, it would have the +# side effect of setting IFS to empty, thus disabling word splitting.) +# Quoting is to prevent editors from complaining about space-tab. +as_nl=' +' +export as_nl +IFS=" "" $as_nl" + +PS1='$ ' +PS2='> ' +PS4='+ ' + +# Ensure predictable behavior from utilities with locale-dependent output. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# We cannot yet rely on "unset" to work, but we need these variables +# to be unset--not just set to an empty or harmless value--now, to +# avoid bugs in old shells (e.g. pre-3.0 UWIN ksh). This construct +# also avoids known problems related to "unset" and subshell syntax +# in other old shells (e.g. bash 2.01 and pdksh 5.2.14). +for as_var in BASH_ENV ENV MAIL MAILPATH CDPATH +do eval test \${$as_var+y} \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done + +# Ensure that fds 0, 1, and 2 are open. +if (exec 3>&0) 2>/dev/null; then :; else exec 0&1) 2>/dev/null; then :; else exec 1>/dev/null; fi +if (exec 3>&2) ; then :; else exec 2>/dev/null; fi + +# The user is always right. +if ${PATH_SEPARATOR+false} :; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + test -r "$as_dir$0" && as_myself=$as_dir$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as 'sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + printf "%s\n" "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + printf "%s\n" "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset + +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null +then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else case e in #( + e) as_fn_append () + { + eval $1=\$$1\$2 + } ;; +esac +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null +then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else case e in #( + e) as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } ;; +esac +fi # as_fn_arith + + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + + +# Determine whether it's possible to make 'echo' print without a newline. +# These variables are no longer used directly by Autoconf, but are AC_SUBSTed +# for compatibility with existing Makefiles. +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +# For backward compatibility with old third-party macros, we provide +# the shell variables $as_echo and $as_echo_n. New code should use +# AS_ECHO(["message"]) and AS_ECHO_N(["message"]), respectively. +as_echo='printf %s\n' +as_echo_n='printf %s' + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both 'ln -s file dir' and 'ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; 'ln -s' creates a wrapper executable. + # In both cases, we have to default to 'cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`printf "%s\n" "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_sed_cpp="y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g" +as_tr_cpp="eval sed '$as_sed_cpp'" # deprecated + +# Sed expression to map a string onto a valid variable name. +as_sed_sh="y%*+%pp%;s%[^_$as_cr_alnum]%_%g" +as_tr_sh="eval sed '$as_sed_sh'" # deprecated + + +exec 6>&1 +## ----------------------------------- ## +## Main body of $CONFIG_STATUS script. ## +## ----------------------------------- ## +_ASEOF +test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# Save the log message, to keep $0 and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. +ac_log=" +This file was extended by HTSlib $as_me 1.21, which was +generated by GNU Autoconf 2.72. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +on `(hostname || uname -n) 2>/dev/null | sed 1q` +" + +_ACEOF + +case $ac_config_files in *" +"*) set x $ac_config_files; shift; ac_config_files=$*;; +esac + +case $ac_config_headers in *" +"*) set x $ac_config_headers; shift; ac_config_headers=$*;; +esac + + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# Files that config.status was made for. +config_files="$ac_config_files" +config_headers="$ac_config_headers" +config_links="$ac_config_links" +config_commands="$ac_config_commands" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +ac_cs_usage="\ +'$as_me' instantiates files and other configuration actions +from templates according to the current configuration. Unless the files +and actions are specified as TAGs, all are instantiated by default. + +Usage: $0 [OPTION]... [TAG]... + + -h, --help print this help, then exit + -V, --version print version number and configuration settings, then exit + --config print configuration, then exit + -q, --quiet, --silent + do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + --header=FILE[:TEMPLATE] + instantiate the configuration header FILE + +Configuration files: +$config_files + +Configuration headers: +$config_headers + +Configuration links: +$config_links + +Configuration commands: +$config_commands + +Report bugs to . +HTSlib home page: ." + +_ACEOF +ac_cs_config=`printf "%s\n" "$ac_configure_args" | sed "$ac_safe_unquote"` +ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\''/g"` +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_cs_config='$ac_cs_config_escaped' +ac_cs_version="\\ +HTSlib config.status 1.21 +configured by $0, generated by GNU Autoconf 2.72, + with options \\"\$ac_cs_config\\" + +Copyright (C) 2023 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." + +ac_pwd='$ac_pwd' +srcdir='$srcdir' +test -n "\$AWK" || AWK=awk +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# The default lists apply if the user does not specify any file. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=?*) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` + ac_shift=: + ;; + --*=) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg= + ac_shift=: + ;; + *) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + esac + + case $ac_option in + # Handling of the options. + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) + printf "%s\n" "$ac_cs_version"; exit ;; + --config | --confi | --conf | --con | --co | --c ) + printf "%s\n" "$ac_cs_config"; exit ;; + --debug | --debu | --deb | --de | --d | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`printf "%s\n" "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + '') as_fn_error $? "missing file argument" ;; + esac + as_fn_append CONFIG_FILES " '$ac_optarg'" + ac_need_defaults=false;; + --header | --heade | --head | --hea ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`printf "%s\n" "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + as_fn_append CONFIG_HEADERS " '$ac_optarg'" + ac_need_defaults=false;; + --he | --h) + # Conflict between --help and --header + as_fn_error $? "ambiguous option: '$1' +Try '$0 --help' for more information.";; + --help | --hel | -h ) + printf "%s\n" "$ac_cs_usage"; exit ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) as_fn_error $? "unrecognized option: '$1' +Try '$0 --help' for more information." ;; + + *) as_fn_append ac_config_targets " $1" + ac_need_defaults=false ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +if \$ac_cs_recheck; then + set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion + shift + \printf "%s\n" "running CONFIG_SHELL=$SHELL \$*" >&6 + CONFIG_SHELL='$SHELL' + export CONFIG_SHELL + exec "\$@" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX + printf "%s\n" "$ac_log" +} >&5 + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + +# Handling of arguments. +for ac_config_target in $ac_config_targets +do + case $ac_config_target in + "config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;; + "config.mk") CONFIG_FILES="$CONFIG_FILES config.mk" ;; + "htslib.pc.tmp") CONFIG_FILES="$CONFIG_FILES htslib.pc.tmp:htslib.pc.in" ;; + "htscodecs.mk") CONFIG_LINKS="$CONFIG_LINKS htscodecs.mk:$selected_htscodecs_mk" ;; + "Makefile") CONFIG_LINKS="$CONFIG_LINKS Makefile:Makefile" ;; + "htslib.mk") CONFIG_LINKS="$CONFIG_LINKS htslib.mk:htslib.mk" ;; + "htslib_vars.mk") CONFIG_FILES="$CONFIG_FILES htslib_vars.mk:builddir_vars.mk.in" ;; + "mkdir") CONFIG_COMMANDS="$CONFIG_COMMANDS mkdir" ;; + + *) as_fn_error $? "invalid argument: '$ac_config_target'" "$LINENO" 5;; + esac +done + + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test ${CONFIG_FILES+y} || CONFIG_FILES=$config_files + test ${CONFIG_HEADERS+y} || CONFIG_HEADERS=$config_headers + test ${CONFIG_LINKS+y} || CONFIG_LINKS=$config_links + test ${CONFIG_COMMANDS+y} || CONFIG_COMMANDS=$config_commands +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason against having it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Hook for its removal unless debugging. +# Note that there is a small window in which the directory will not be cleaned: +# after its creation but before its name has been assigned to '$tmp'. +$debug || +{ + tmp= ac_tmp= + trap 'exit_status=$? + : "${ac_tmp:=$tmp}" + { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status +' 0 + trap 'as_fn_exit 1' 1 2 13 15 +} +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && + test -d "$tmp" +} || +{ + tmp=./conf$$-$RANDOM + (umask 077 && mkdir "$tmp") +} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 +ac_tmp=$tmp + +# Set up the scripts for CONFIG_FILES section. +# No need to generate them if there are no CONFIG_FILES. +# This happens for instance with './config.status config.h'. +if test -n "$CONFIG_FILES"; then + + +ac_cr=`echo X | tr X '\015'` +# On cygwin, bash can eat \r inside `` if the user requested igncr. +# But we know of no other shell where ac_cr would be empty at this +# point, so we can use a bashism as a fallback. +if test "x$ac_cr" = x; then + eval ac_cr=\$\'\\r\' +fi +ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` +if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then + ac_cs_awk_cr='\\r' +else + ac_cs_awk_cr=$ac_cr +fi + +echo 'BEGIN {' >"$ac_tmp/subs1.awk" && +_ACEOF + + +{ + echo "cat >conf$$subs.awk <<_ACEOF" && + echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && + echo "_ACEOF" +} >conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 +ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` +ac_delim='%!_!# ' +for ac_last_try in false false false false false :; do + . ./conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + + ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` + if test $ac_delim_n = $ac_delim_num; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done +rm -f conf$$subs.sh + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && +_ACEOF +sed -n ' +h +s/^/S["/; s/!.*/"]=/ +p +g +s/^[^!]*!// +:repl +t repl +s/'"$ac_delim"'$// +t delim +:nl +h +s/\(.\{148\}\)..*/\1/ +t more1 +s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ +p +n +b repl +:more1 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t nl +:delim +h +s/\(.\{148\}\)..*/\1/ +t more2 +s/["\\]/\\&/g; s/^/"/; s/$/"/ +p +b +:more2 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t delim +' >$CONFIG_STATUS || ac_write_fail=1 +rm -f conf$$subs.awk +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACAWK +cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && + for (key in S) S_is_set[key] = 1 + FS = "" + +} +{ + line = $ 0 + nfields = split(line, field, "@") + substed = 0 + len = length(field[1]) + for (i = 2; i < nfields; i++) { + key = field[i] + keylen = length(key) + if (S_is_set[key]) { + value = S[key] + line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) + len += length(value) + length(field[++i]) + substed = 1 + } else + len += 1 + keylen + } + + print line +} + +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then + sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" +else + cat +fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ + || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 +_ACEOF + +# VPATH may cause trouble with some makes, so we remove sole $(srcdir), +# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ +h +s/// +s/^/:/ +s/[ ]*$/:/ +s/:\$(srcdir):/:/g +s/:\${srcdir}:/:/g +s/:@srcdir@:/:/g +s/^:*// +s/:*$// +x +s/\(=[ ]*\).*/\1/ +G +s/\n// +s/^[^=]*=[ ]*$// +}' +fi + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +fi # test -n "$CONFIG_FILES" + +# Set up the scripts for CONFIG_HEADERS section. +# No need to generate them if there are no CONFIG_HEADERS. +# This happens for instance with './config.status Makefile'. +if test -n "$CONFIG_HEADERS"; then +cat >"$ac_tmp/defines.awk" <<\_ACAWK || +BEGIN { +_ACEOF + +# Transform confdefs.h into an awk script 'defines.awk', embedded as +# here-document in config.status, that substitutes the proper values into +# config.h.in to produce config.h. + +# Create a delimiter string that does not exist in confdefs.h, to ease +# handling of long lines. +ac_delim='%!_!# ' +for ac_last_try in false false :; do + ac_tt=`sed -n "/$ac_delim/p" confdefs.h` + if test -z "$ac_tt"; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done + +# For the awk script, D is an array of macro values keyed by name, +# likewise P contains macro parameters if any. Preserve backslash +# newline sequences. + +ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* +sed -n ' +s/.\{148\}/&'"$ac_delim"'/g +t rset +:rset +s/^[ ]*#[ ]*define[ ][ ]*/ / +t def +d +:def +s/\\$// +t bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3"/p +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2"/p +d +:bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3\\\\\\n"\\/p +t cont +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p +t cont +d +:cont +n +s/.\{148\}/&'"$ac_delim"'/g +t clear +:clear +s/\\$// +t bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/"/p +d +:bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p +b cont +' >$CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + for (key in D) D_is_set[key] = 1 + FS = "" +} +/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ { + line = \$ 0 + split(line, arg, " ") + if (arg[1] == "#") { + defundef = arg[2] + mac1 = arg[3] + } else { + defundef = substr(arg[1], 2) + mac1 = arg[2] + } + split(mac1, mac2, "(") #) + macro = mac2[1] + prefix = substr(line, 1, index(line, defundef) - 1) + if (D_is_set[macro]) { + # Preserve the white space surrounding the "#". + print prefix "define", macro P[macro] D[macro] + next + } else { + # Replace #undef with comments. This is necessary, for example, + # in the case of _POSIX_SOURCE, which is predefined and required + # on some systems where configure will not decide to define it. + if (defundef == "undef") { + print "/*", prefix defundef, macro, "*/" + next + } + } +} +{ print } +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + as_fn_error $? "could not setup config headers machinery" "$LINENO" 5 +fi # test -n "$CONFIG_HEADERS" + + +eval set X " :F $CONFIG_FILES :H $CONFIG_HEADERS :L $CONFIG_LINKS :C $CONFIG_COMMANDS" +shift +for ac_tag +do + case $ac_tag in + :[FHLC]) ac_mode=$ac_tag; continue;; + esac + case $ac_mode$ac_tag in + :[FHL]*:*);; + :L* | :C*:*) as_fn_error $? "invalid tag '$ac_tag'" "$LINENO" 5;; + :[FH]-) ac_tag=-:-;; + :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; + esac + ac_save_IFS=$IFS + IFS=: + set x $ac_tag + IFS=$ac_save_IFS + shift + ac_file=$1 + shift + + case $ac_mode in + :L) ac_source=$1;; + :[FH]) + ac_file_inputs= + for ac_f + do + case $ac_f in + -) ac_f="$ac_tmp/stdin";; + *) # Look for the file first in the build tree, then in the source tree + # (if the path is not absolute). The absolute path cannot be DOS-style, + # because $ac_f cannot contain ':'. + test -f "$ac_f" || + case $ac_f in + [\\/$]*) false;; + *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; + esac || + as_fn_error 1 "cannot find input file: '$ac_f'" "$LINENO" 5;; + esac + case $ac_f in *\'*) ac_f=`printf "%s\n" "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac + as_fn_append ac_file_inputs " '$ac_f'" + done + + # Let's still pretend it is 'configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + configure_input='Generated from '` + printf "%s\n" "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' + `' by configure.' + if test x"$ac_file" != x-; then + configure_input="$ac_file. $configure_input" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 +printf "%s\n" "$as_me: creating $ac_file" >&6;} + fi + # Neutralize special characters interpreted by sed in replacement strings. + case $configure_input in #( + *\&* | *\|* | *\\* ) + ac_sed_conf_input=`printf "%s\n" "$configure_input" | + sed 's/[\\\\&|]/\\\\&/g'`;; #( + *) ac_sed_conf_input=$configure_input;; + esac + + case $ac_tag in + *:-:* | *:-) cat >"$ac_tmp/stdin" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; + esac + ;; + esac + + ac_dir=`$as_dirname -- "$ac_file" || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + as_dir="$ac_dir"; as_fn_mkdir_p + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`printf "%s\n" "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`printf "%s\n" "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + + case $ac_mode in + :F) + # + # CONFIG_FILE + # + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# If the template does not know about datarootdir, expand it. +# FIXME: This hack should be removed a few years after 2.60. +ac_datarootdir_hack=; ac_datarootdir_seen= +ac_sed_dataroot=' +/datarootdir/ { + p + q +} +/@datadir@/p +/@docdir@/p +/@infodir@/p +/@localedir@/p +/@mandir@/p' +case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in +*datarootdir*) ac_datarootdir_seen=yes;; +*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +printf "%s\n" "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + ac_datarootdir_hack=' + s&@datadir@&$datadir&g + s&@docdir@&$docdir&g + s&@infodir@&$infodir&g + s&@localedir@&$localedir&g + s&@mandir@&$mandir&g + s&\\\${datarootdir}&$datarootdir&g' ;; +esac +_ACEOF + +# Neutralize VPATH when '$srcdir' = '.'. +# Shell code in configure.ac might set extrasub. +# FIXME: do we really want to maintain this feature? +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_sed_extra="$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s|@configure_input@|$ac_sed_conf_input|;t t +s&@top_builddir@&$ac_top_builddir_sub&;t t +s&@top_build_prefix@&$ac_top_build_prefix&;t t +s&@srcdir@&$ac_srcdir&;t t +s&@abs_srcdir@&$ac_abs_srcdir&;t t +s&@top_srcdir@&$ac_top_srcdir&;t t +s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t +s&@builddir@&$ac_builddir&;t t +s&@abs_builddir@&$ac_abs_builddir&;t t +s&@abs_top_builddir@&$ac_abs_top_builddir&;t t +$ac_datarootdir_hack +" +eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ + >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + +test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && + { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && + { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ + "$ac_tmp/out"`; test -z "$ac_out"; } && + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable 'datarootdir' +which seems to be undefined. Please make sure it is defined" >&5 +printf "%s\n" "$as_me: WARNING: $ac_file contains a reference to the variable 'datarootdir' +which seems to be undefined. Please make sure it is defined" >&2;} + + rm -f "$ac_tmp/stdin" + case $ac_file in + -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; + *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; + esac \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + ;; + :H) + # + # CONFIG_HEADER + # + if test x"$ac_file" != x-; then + { + printf "%s\n" "/* $configure_input */" >&1 \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" + } >"$ac_tmp/config.h" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 +printf "%s\n" "$as_me: $ac_file is unchanged" >&6;} + else + rm -f "$ac_file" + mv "$ac_tmp/config.h" "$ac_file" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + fi + else + printf "%s\n" "/* $configure_input */" >&1 \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \ + || as_fn_error $? "could not create -" "$LINENO" 5 + fi + ;; + :L) + # + # CONFIG_LINK + # + + if test "$ac_source" = "$ac_file" && test "$srcdir" = '.'; then + : + else + # Prefer the file from the source tree if names are identical. + if test "$ac_source" = "$ac_file" || test ! -r "$ac_source"; then + ac_source=$srcdir/$ac_source + fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: linking $ac_source to $ac_file" >&5 +printf "%s\n" "$as_me: linking $ac_source to $ac_file" >&6;} + + if test ! -r "$ac_source"; then + as_fn_error $? "$ac_source: file not found" "$LINENO" 5 + fi + rm -f "$ac_file" + + # Try a relative symlink, then a hard link, then a copy. + case $ac_source in + [\\/$]* | ?:[\\/]* ) ac_rel_source=$ac_source ;; + *) ac_rel_source=$ac_top_build_prefix$ac_source ;; + esac + ln -s "$ac_rel_source" "$ac_file" 2>/dev/null || + ln "$ac_source" "$ac_file" 2>/dev/null || + cp -p "$ac_source" "$ac_file" || + as_fn_error $? "cannot link or copy $ac_source to $ac_file" "$LINENO" 5 + fi + ;; + :C) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5 +printf "%s\n" "$as_me: executing $ac_file commands" >&6;} + ;; + esac + + + case $ac_file$ac_mode in + "mkdir":C) as_dir=cram; as_fn_mkdir_p + as_dir=htscodecs/htscodecs; as_fn_mkdir_p + as_dir=htscodecs/tests; as_fn_mkdir_p + as_dir=test/fuzz; as_fn_mkdir_p + as_dir=test/longrefs; as_fn_mkdir_p + as_dir=test/tabix; as_fn_mkdir_p ;; + + esac +done # for ac_tag + + +as_fn_exit 0 +_ACEOF +ac_clean_files=$ac_clean_files_save + +test $ac_write_fail = 0 || + as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || as_fn_exit 1 +fi +if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 +printf "%s\n" "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} +fi + + diff --git a/src/htslib-1.21/configure.ac b/src/htslib-1.21/configure.ac new file mode 100644 index 0000000..87e928d --- /dev/null +++ b/src/htslib-1.21/configure.ac @@ -0,0 +1,675 @@ +# Configure script for htslib, a C library for high-throughput sequencing data. +# +# Copyright (C) 2015-2024 Genome Research Ltd. +# +# Author: John Marshall +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +dnl Process this file with autoconf to produce a configure script +AC_INIT([HTSlib], m4_esyscmd_s([./version.sh 2>/dev/null]), + [samtools-help@lists.sourceforge.net], [], [http://www.htslib.org/]) +AC_PREREQ(2.63) dnl This version introduced 4-argument AC_CHECK_HEADER +AC_CONFIG_SRCDIR(hts.c) +AC_CONFIG_HEADERS(config.h) + +m4_include([m4/hts_prog_cc_warnings.m4]) +m4_include([m4/hts_check_compile_flags_needed.m4]) +m4_include([m4/hts_hide_dynamic_syms.m4]) +m4_include([m4/pkg.m4]) + +dnl Copyright notice to be copied into the generated configure script +AC_COPYRIGHT([Portions copyright (C) 2020-2024 Genome Research Ltd. + +This configure script is free software: you are free to change and +redistribute it. There is NO WARRANTY, to the extent permitted by law.]) + +dnl Notes to be copied (by autoheader) into the generated config.h.in +AH_TOP([/* If you use configure, this file provides @%:@defines reflecting your + configuration choices. If you have not run configure, suitable + conservative defaults will be used. + + Autoheader adds a number of items to this template file that are not + used by HTSlib: STDC_HEADERS and most HAVE_*_H header file defines + are immaterial, as we assume standard ISO C headers and facilities; + the PACKAGE_* defines are unused and are overridden by the more + accurate PACKAGE_VERSION as computed by the Makefile. */]) + +dnl Variant of AC_MSG_ERROR that ensures subsequent make(1) invocations fail +dnl until the configuration error is resolved and configure is run again. +AC_DEFUN([MSG_ERROR], + [cat > config.mk <<'EOF' +ifneq ($(MAKECMDGOALS),distclean) +$(error Resolve configure error first) +endif +EOF + AC_MSG_ERROR([$1], [$2])]) + +AC_PROG_CC +AC_PROG_RANLIB + +dnl Turn on compiler warnings, if possible +HTS_PROG_CC_WARNINGS +dnl Flags to treat warnings as errors. These need to be applied to CFLAGS +dnl later as they can interfere with some of the tests (notably AC_SEARCH_LIBS) +HTS_PROG_CC_WERROR(hts_late_cflags) + +# HTSlib uses X/Open-only facilities (M_SQRT2 etc, drand48() etc), and +# various POSIX functions that are provided by various _POSIX_C_SOURCE values +# or by _XOPEN_SOURCE >= 500. It also uses usleep(), which is removed when +# _XOPEN_SOURCE >= 700. Additionally, some definitions may require +# _XOPEN_SOURCE >= 600 on some platforms (snprintf on MinGW, +# PTHREAD_MUTEX_RECURSIVE on some Linux distributions). Hence we set it to 600. + +# Define _XOPEN_SOURCE unless the user has already done so via $CPPFLAGS etc. +AC_CHECK_DECL([_XOPEN_SOURCE], [], + [AC_DEFINE([_XOPEN_SOURCE], [600], [Specify X/Open requirements])], + []) + +dnl Check that we have cpuid, and if so run the x86 SIMD checks +AC_CHECK_DECLS([__get_cpuid_max, __cpuid_count], [ + hts_have_cpuid=yes +], [ + hts_have_cpuid=no +], [[#include ]]) + +AS_IF(test "x$hts_have_cpuid" = "xyes", [ +dnl Options for rANS32x16 sse4.1 version - sse4.1 +HTS_CHECK_COMPILE_FLAGS_NEEDED([sse4.1], [-msse4.1 -mssse3 -mpopcnt], + [AC_LANG_PROGRAM([[ + #ifdef __x86_64__ + #include "x86intrin.h" + #endif + ]],[[ + #ifdef __x86_64__ + __m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1); + __m128i c = _mm_shuffle_epi8(_mm_max_epu32(a, b), b); + return _mm_popcnt_u32(*((char *) &c)); + #endif + ]])], [ + hts_cflags_sse4="$flags_needed" + AC_DEFINE([HAVE_SSSE3],1,[Defined to 1 if rANS source using SSSE3 can be compiled.]) + AC_DEFINE([HAVE_POPCNT],1,[Defined to 1 if rANS source using popcnt can be compiled.]) + AC_DEFINE([HAVE_SSE4_1],1,[Defined to 1 if rANS source using SSE4.1 can be compiled. +]) + +dnl Propagate HTSlib's unaligned access preference to htscodecs + AH_VERBATIM([UBSAN],[ +/* Prevent unaligned access in htscodecs SSE4 rANS codec */ +#if defined(HTS_ALLOW_UNALIGNED) && HTS_ALLOW_UNALIGNED == 0 +#undef UBSAN +#endif]) + AC_DEFINE([UBSAN],1,[]) +]) +AC_SUBST([hts_cflags_sse4]) + +dnl Options for rANS32x16 avx2 version +HTS_CHECK_COMPILE_FLAGS_NEEDED([avx2], [-mavx2 -mpopcnt], [AC_LANG_PROGRAM([[ + #ifdef __x86_64__ + #include "x86intrin.h" + #endif + ]],[[ + #ifdef __x86_64__ + __m256i a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + __m256i b = _mm256_add_epi32(a, a); + long long c = _mm256_extract_epi64(b, 0); + return _mm_popcnt_u32((int) c); + #endif + ]])], [ + hts_cflags_avx2="$flags_needed" + AC_SUBST([hts_cflags_avx2]) + AC_DEFINE([HAVE_POPCNT],1,[Defined to 1 if rANS source using popcnt can be compiled.]) + AC_DEFINE([HAVE_AVX2],1,[Defined to 1 if rANS source using AVX2 can be compiled.]) +]) + +dnl Options for rANS32x16 avx512 version +HTS_CHECK_COMPILE_FLAGS_NEEDED([avx512f], [-mavx512f -mpopcnt], + [AC_LANG_PROGRAM([[ + #ifdef __x86_64__ + #include "x86intrin.h" + #endif + ]],[[ + #ifdef __x86_64__ + __m512i a = _mm512_set1_epi32(1); + __m512i b = _mm512_add_epi32(a, a); + __m256i c = _mm512_castsi512_si256(b); + __m256i d = _mm512_extracti64x4_epi64(a, 1); + return _mm_popcnt_u32(*((char *) &c)) + (*(char *) &d); + #endif + ]])], [ + hts_cflags_avx512="$flags_needed" + AC_SUBST([hts_cflags_avx512]) + AC_DEFINE([HAVE_POPCNT],1,[Defined to 1 if rANS source using popcnt can be compiled.]) + AC_DEFINE([HAVE_AVX512],1,[Defined to 1 if rANS source using AVX512F can be compiled.]) +]) + +dnl Check for working __builtin_cpu_supports (ssse3 is broken on some clangs) +AC_MSG_CHECKING([for working __builtin_cpu_supports("ssse3")]) +AC_LINK_IFELSE([AC_LANG_PROGRAM([],[ + if (__builtin_cpu_supports("ssse3")) { + return 0; + } +])], [ + AC_MSG_RESULT([yes]) + AC_DEFINE([HAVE_BUILTIN_CPU_SUPPORT_SSSE3], 1, + [Defined to 1 if __builtin_cpu_supports("ssse3") works]) +], [ + AC_MSG_RESULT([no]) +]) + +dnl Check for function attribute used in conjunction with __builtin_cpu_supports +AC_MSG_CHECKING([for __attribute__((target))]) +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + __attribute__((target("ssse3"))) + int zero(void) { + return 0; + } +]], [[zero();]])], [ + AC_MSG_RESULT([yes]) + AC_DEFINE([HAVE_ATTRIBUTE_TARGET], 1, + [Define if __attribute__((target(...))) is available.]) +], [ + AC_MSG_RESULT([no]) +]) + +]) dnl End of AS_IF(hts_have_cpuid) + +dnl Avoid chicken-and-egg problem where pkg-config supplies the +dnl PKG_PROG_PKG_CONFIG macro, but we want to use it to check +dnl for pkg-config... +m4_ifdef([PKG_PROG_PKG_CONFIG], [PKG_PROG_PKG_CONFIG], [PKG_CONFIG=""]) + +need_crypto=no +pc_requires= +static_LDFLAGS=$LDFLAGS +static_LIBS='-lpthread -lz -lm' +private_LIBS=$LDFLAGS + +AC_ARG_ENABLE([versioned-symbols], + [AS_HELP_STRING([--disable-versioned-symbols], + [disable versioned symbols in shared library])], + [], [enable_versioned_symbols=yes]) + +AC_ARG_ENABLE([bz2], + [AS_HELP_STRING([--disable-bz2], + [omit support for BZ2-compressed CRAM files])], + [], [enable_bz2=yes]) + +AC_ARG_ENABLE([gcs], + [AS_HELP_STRING([--enable-gcs], + [support Google Cloud Storage URLs])], + [], [enable_gcs=check]) + +AC_SYS_LARGEFILE + +AC_ARG_ENABLE([libcurl], + [AS_HELP_STRING([--enable-libcurl], + [enable libcurl-based support for http/https/etc URLs])], + [], [enable_libcurl=check]) + +AC_ARG_ENABLE([lzma], + [AS_HELP_STRING([--disable-lzma], + [omit support for LZMA-compressed CRAM files])], + [], [enable_lzma=yes]) + +AC_ARG_ENABLE([plugins], + [AS_HELP_STRING([--enable-plugins], + [enable separately-compiled plugins for file access])], + [], [enable_plugins=no]) +AC_SUBST(enable_plugins) + +AC_ARG_WITH([external-htscodecs], + [AS_HELP_STRING([--with-external-htscodecs], + [get htscodecs functions from a shared library])], + [], [with_external_htscodecs=no]) +AC_SUBST(with_external_htscodecs) + +AC_ARG_WITH([libdeflate], + [AS_HELP_STRING([--with-libdeflate], + [use libdeflate for faster crc and deflate algorithms])], + [], [with_libdeflate=check]) + +AC_ARG_WITH([plugin-dir], + [AS_HELP_STRING([--with-plugin-dir=DIR], + [plugin installation location [LIBEXECDIR/htslib]])], + [case $withval in + yes|no) MSG_ERROR([no directory specified for --with-plugin-dir]) ;; + esac], + [with_plugin_dir='$(libexecdir)/htslib']) +AC_SUBST([plugindir], $with_plugin_dir) + +AC_ARG_WITH([plugin-path], + [AS_HELP_STRING([--with-plugin-path=PATH], + [default HTS_PATH plugin search path [PLUGINDIR]])], + [case $withval in + yes) MSG_ERROR([no path specified for --with-plugin-path]) ;; + no) with_plugin_path= ;; + esac], + [with_plugin_path=$with_plugin_dir]) +AC_SUBST([pluginpath], $with_plugin_path) + +AC_ARG_ENABLE([s3], + [AS_HELP_STRING([--enable-s3], + [support Amazon AWS S3 URLs])], + [], [enable_s3=check]) + +basic_host=${host_alias:-unknown-`uname -s`} +AC_MSG_CHECKING([shared library type for $basic_host]) +case $basic_host in + *-cygwin* | *-CYGWIN*) + host_result="Cygwin DLL" + PLATFORM=CYGWIN + PLUGIN_EXT=.cygdll + ;; + *-darwin* | *-Darwin*) + host_result="Darwin dylib" + PLATFORM=Darwin + PLUGIN_EXT=.bundle + ;; + *-msys* | *-MSYS* | *-mingw* | *-MINGW*) + host_result="MSYS dll" + PLATFORM=MSYS + PLUGIN_EXT=.dll + # This also sets __USE_MINGW_ANSI_STDIO which in turn makes PRId64, + # %lld and %z printf formats work. It also enforces the snprintf to + # be C99 compliant so it returns the correct values (in kstring.c). + + # Now set by default, so no need to do it here. + # CPPFLAGS="$CPPFLAGS -D_XOPEN_SOURCE=600" + ;; + *) + host_result="plain .so" + PLATFORM=default + PLUGIN_EXT=.so + ;; +esac +AC_MSG_RESULT([$host_result]) +AC_SUBST([PLATFORM]) + +dnl Check for versioned symbol support +dnl Only try for .so shared libraries as other types won't work +AS_IF([test x"$PLATFORM" = xdefault && test x"$enable_versioned_symbols" = xyes], + [AC_CACHE_CHECK([whether the linker supports versioned symbols], + [hts_cv_have_versioned_symbols], [ + save_LDFLAGS=$LDFLAGS + LDFLAGS="-Wl,-version-script,$srcdir/htslib.map $LDFLAGS" + AC_LINK_IFELSE([AC_LANG_PROGRAM()], + [hts_cv_have_versioned_symbols=yes], + [hts_cv_have_versioned_symbols=no]) + LDFLAGS=$save_LDFLAGS + ]) + AS_IF([test "x$hts_cv_have_versioned_symbols" = xyes],[ + VERSION_SCRIPT_LDFLAGS='-Wl,-version-script,$(srcprefix)htslib.map' + AC_SUBST([VERSION_SCRIPT_LDFLAGS]) + ]) +]) + +dnl Try to get more control over which symbols are exported in the shared +dnl library. +HTS_HIDE_DYNAMIC_SYMBOLS + +dnl FIXME This pulls in dozens of standard header checks +AC_FUNC_MMAP +AC_CHECK_FUNCS([gmtime_r fsync drand48 srand48_deterministic]) + +# Darwin has a dubious fdatasync() symbol, but no declaration in +AC_CHECK_DECL([fdatasync(int)], [AC_CHECK_FUNCS(fdatasync)]) + +AC_MSG_CHECKING([for __attribute__((constructor))]) +AC_LINK_IFELSE([AC_LANG_PROGRAM([[ + static __attribute__((constructor)) void noop(void) {} +]], [])], [ + AC_MSG_RESULT([yes]) + AC_DEFINE([HAVE_ATTRIBUTE_CONSTRUCTOR], 1, + [Define if __attribute__((constructor)) is available.]) +], [AC_MSG_RESULT([no])]) + +AC_MSG_CHECKING([for clock_gettime with CLOCK_PROCESS_CPUTIME_ID]) +AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include ]], [[ + struct timespec ts; + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts); +]])], [ + AC_MSG_RESULT([yes]) + AC_DEFINE([HAVE_CLOCK_GETTIME_CPUTIME], 1, + [Define if clock_gettime exists and accepts CLOCK_PROCESS_CPUTIME_ID.]) +], [AC_MSG_RESULT([no])]) + +if test $enable_plugins != no; then + AC_SEARCH_LIBS([dlsym], [dl], [], + [MSG_ERROR([dlsym() not found + +Plugin support requires dynamic linking facilities from the operating system. +Either configure with --disable-plugins or resolve this error to build HTSlib.])]) + # Check if the compiler understands -rdynamic + # TODO Test whether this is required and/or needs tweaking per-platform + HTS_TEST_CC_C_LD_FLAG([-rdynamic],[rdynamic_flag]) + AS_IF([test x"$rdynamic_flag" != "xno"], + [LDFLAGS="$LDFLAGS $rdynamic_flag" + static_LDFLAGS="$static_LDFLAGS $rdynamic_flag"]) + case "$ac_cv_search_dlsym" in + -l*) static_LIBS="$static_LIBS $ac_cv_search_dlsym" ;; + esac + AC_DEFINE([ENABLE_PLUGINS], 1, [Define if HTSlib should enable plugins.]) + AC_SUBST([PLUGIN_EXT]) + AC_DEFINE_UNQUOTED([PLUGIN_EXT], ["$PLUGIN_EXT"], + [Platform-dependent plugin filename extension.]) +fi + +AC_SEARCH_LIBS([log], [m], [], + [MSG_ERROR([log() not found + +HTSLIB requires a working floating-point math library. +FAILED. This error must be resolved in order to build HTSlib successfully.])]) + +zlib_devel=ok +dnl Set a trivial non-empty INCLUDES to avoid excess default includes tests +AC_CHECK_HEADER([zlib.h], [], [zlib_devel=missing], [;]) +AC_CHECK_LIB(z, inflate, [], [zlib_devel=missing]) + +if test $zlib_devel != ok; then + MSG_ERROR([zlib development files not found + +HTSlib uses compression routines from the zlib library . +Building HTSlib requires zlib development files to be installed on the build +machine; you may need to ensure a package such as zlib1g-dev (on Debian or +Ubuntu Linux) or zlib-devel (on RPM-based Linux distributions or Cygwin) +is installed. + +FAILED. This error must be resolved in order to build HTSlib successfully.]) +fi + +dnl connect() etc. fns are in libc on linux, but libsocket on illumos/Solaris +AC_SEARCH_LIBS([recv], [socket ws2_32], [ +if test "$ac_cv_search_recv" != "none required" +then + static_LIBS="$static_LIBS $ac_cv_search_recv" +fi], + dnl on MinGW-i686, checking recv() linking requires an annotated declaration + [AC_MSG_CHECKING([for library containing recv using declaration]) + LIBS="-lws2_32 $LIBS" + AC_LINK_IFELSE( + [AC_LANG_PROGRAM([[#include ]], [[recv(0, 0, 0, 0);]])], + [AC_MSG_RESULT([-lws2_32]) + static_LIBS="$static_LIBS -lws2_32"], + [AC_MSG_RESULT([no]) + MSG_ERROR([unable to find the recv() function])])]) + +if test "$enable_bz2" != no; then + bz2_devel=ok + AC_CHECK_HEADER([bzlib.h], [], [bz2_devel=missing], [;]) + AC_CHECK_LIB([bz2], [BZ2_bzBuffToBuffCompress], [], [bz2_devel=missing]) + if test $bz2_devel != ok; then + MSG_ERROR([libbzip2 development files not found + +The CRAM format may use bzip2 compression, which is implemented in HTSlib +by using compression routines from libbzip2 . + +Building HTSlib requires libbzip2 development files to be installed on the +build machine; you may need to ensure a package such as libbz2-dev (on Debian +or Ubuntu Linux) or bzip2-devel (on RPM-based Linux distributions or Cygwin) +is installed. + +Either configure with --disable-bz2 (which will make some CRAM files +produced elsewhere unreadable) or resolve this error to build HTSlib.]) + fi +dnl Unfortunately the 'bzip2' package-cfg module is not standard. +dnl Redhat/Fedora has it; Debian/Ubuntu does not. + if test -n "$PKG_CONFIG" && "$PKG_CONFIG" --exists bzip2; then + pc_requires="$pc_requires bzip2" + else + private_LIBS="$private_LIBS -lbz2" + fi + static_LIBS="$static_LIBS -lbz2" +fi + +if test "$enable_lzma" != no; then + lzma_devel=ok + AC_CHECK_HEADERS([lzma.h], [], [lzma_devel=header-missing], [;]) + AC_CHECK_LIB([lzma], [lzma_easy_buffer_encode], [], [lzma_devel=missing]) + if test $lzma_devel = missing; then + MSG_ERROR([liblzma development files not found + +The CRAM format may use LZMA2 compression, which is implemented in HTSlib +by using compression routines from liblzma . + +Building HTSlib requires liblzma development files to be installed on the +build machine; you may need to ensure a package such as liblzma-dev (on Debian +or Ubuntu Linux), xz-devel (on RPM-based Linux distributions or Cygwin), or +xz (via Homebrew on macOS) is installed; or build XZ Utils from source. + +Either configure with --disable-lzma (which will make some CRAM files +produced elsewhere unreadable) or resolve this error to build HTSlib.]) + fi + pc_requires="$pc_requires liblzma" + static_LIBS="$static_LIBS -llzma" +fi + +AS_IF([test "x$with_external_htscodecs" != "xno"], + [libhtscodecs=ok + AC_CHECK_HEADER([htscodecs/rANS_static4x16.h],[], + [libhtscodecs='missing header'],[;]) + AC_CHECK_LIB([htscodecs],[rans_compress_bound_4x16], + [:],[libhtscodecs='missing library']) + AS_IF([test "$libhtscodecs" = "ok"], + [AC_DEFINE([HAVE_EXTERNAL_LIBHTSCODECS], 1, [Define if using an external libhtscodecs]) + LIBS="-lhtscodecs $LIBS" + private_LIBS="-lhtscodecs $private_LIBS" + static_LIBS="-lhtscodecs $static_LIBS" + selected_htscodecs_mk="htscodecs_external.mk"], + [MSG_ERROR([libhtscodecs development files not found: $libhtscodecs + +You asked to use an external htscodecs library, but do not have the +required header / library files. You either need to supply these and +if necessary set CPPFLAGS and LDFLAGS so the compiler can find them; +or configure using --without-external-htscodecs to build the required +functions from the htscodecs submodule. +])])], + [AC_MSG_CHECKING([whether htscodecs files are present]) + AS_IF([test -e "$srcdir/htscodecs/htscodecs/rANS_static4x16.h"], + [AC_MSG_RESULT([yes]) + selected_htscodecs_mk="htscodecs_bundled.mk"], + [AC_MSG_RESULT([no]) + AS_IF([test -e "$srcdir/.git"], + [MSG_ERROR([htscodecs submodule files not present. + +HTSlib uses some functions from the htscodecs project, which is normally +included as a submodule. Try running: + + git submodule update --init --recursive + +in the top-level htslib directory to update it, and then re-run configure. +])], + [MSG_ERROR([htscodecs submodule files not present. + +You have an incomplete distribution. Please try downloading one of the +official releases from https://www.htslib.org +])])])]) + +AS_IF([test "x$with_libdeflate" != "xno"], + [libdeflate=ok + AC_CHECK_HEADER([libdeflate.h],[],[libdeflate='missing header'],[;]) + AC_CHECK_LIB([deflate], [libdeflate_deflate_compress],[:],[libdeflate='missing library']) + AS_IF([test "$libdeflate" = "ok"], + [AC_DEFINE([HAVE_LIBDEFLATE], 1, [Define if libdeflate is available.]) + LIBS="-ldeflate $LIBS" + private_LIBS="$private_LIBS -ldeflate" + static_LIBS="$static_LIBS -ldeflate"], + [AS_IF([test "x$with_libdeflate" != "xcheck"], + [MSG_ERROR([libdeflate development files not found: $libdeflate + +You requested libdeflate, but do not have the required header / library +files. The source for libdeflate is available from +. You may have to adjust +search paths in CPPFLAGS and/or LDFLAGS if the header and library +are not currently on them. + +Either configure with --without-libdeflate or resolve this error to build +HTSlib.])])])]) + +libcurl=disabled +if test "$enable_libcurl" != no; then + libcurl_devel=ok + AC_CHECK_HEADER([curl/curl.h], [], [libcurl_devel="headers not found"], [;]) + AC_CHECK_LIB([curl], [curl_easy_pause], [:], + [AC_CHECK_LIB([curl], [curl_easy_init], + [libcurl_devel="library is too old (7.18+ required)"], + [libcurl_devel="library not found"])]) + + if test "$libcurl_devel" = ok; then + AC_DEFINE([HAVE_LIBCURL], 1, [Define if libcurl file access is enabled.]) + libcurl=enabled + elif test "$enable_libcurl" = check; then + AC_MSG_WARN([libcurl not enabled: $libcurl_devel]) + else + MSG_ERROR([libcurl $libcurl_devel + +Support for HTTPS and other SSL-based URLs requires routines from the libcurl +library . Building HTSlib with libcurl enabled +requires libcurl development files to be installed on the build machine; you +may need to ensure a package such as libcurl4-{gnutls,nss,openssl}-dev (on +Debian or Ubuntu Linux) or libcurl-devel (on RPM-based Linux distributions +or Cygwin) is installed. + +Either configure with --disable-libcurl or resolve this error to build HTSlib.]) + fi + +dnl -lcurl is only needed for static linking if hfile_libcurl is not a plugin + if test "$libcurl" = enabled ; then + if test "$enable_plugins" != yes ; then + static_LIBS="$static_LIBS -lcurl" + fi + fi +fi +AC_SUBST([libcurl]) + +gcs=disabled +if test "$enable_gcs" != no; then + if test $libcurl = enabled; then + AC_DEFINE([ENABLE_GCS], 1, [Define if HTSlib should enable GCS support.]) + gcs=enabled + else + case "$enable_gcs" in + check) AC_MSG_WARN([GCS support not enabled: requires libcurl support]) ;; + *) MSG_ERROR([GCS support not enabled + +Support for Google Cloud Storage URLs requires libcurl support to be enabled +in HTSlib. Configure with --enable-libcurl in order to use GCS URLs.]) + ;; + esac + fi +fi +AC_SUBST([gcs]) + +s3=disabled +if test "$enable_s3" != no; then + if test $libcurl = enabled; then + s3=enabled + need_crypto="$enable_s3" + else + case "$enable_s3" in + check) AC_MSG_WARN([S3 support not enabled: requires libcurl support]) ;; + *) MSG_ERROR([S3 support not enabled + +Support for Amazon AWS S3 URLs requires libcurl support to be enabled +in HTSlib. Configure with --enable-libcurl in order to use S3 URLs.]) + ;; + esac + fi +fi + +CRYPTO_LIBS= +if test $need_crypto != no; then + AC_CHECK_FUNC([CCHmac], + [AC_DEFINE([HAVE_COMMONCRYPTO], 1, + [Define if you have the Common Crypto library.])], + [save_LIBS=$LIBS + AC_SEARCH_LIBS([HMAC], [crypto], + [AC_DEFINE([HAVE_HMAC], 1, [Define if you have libcrypto-style HMAC().]) + case "$ac_cv_search_HMAC" in + -l*) CRYPTO_LIBS=$ac_cv_search_HMAC ;; + esac], + [case "$need_crypto" in + check) AC_MSG_WARN([S3 support not enabled: requires SSL development files]) + s3=disabled ;; + *) MSG_ERROR([SSL development files not found + +Support for AWS S3 URLs requires routines from an SSL library. Building +HTSlib with libcurl enabled requires SSL development files to be installed +on the build machine; you may need to ensure a package such as libgnutls-dev, +libnss3-dev, or libssl-dev (on Debian or Ubuntu Linux, corresponding to the +libcurl4-*-dev package installed), or openssl-devel (on RPM-based Linux +distributions or Cygwin) is installed. + +Either configure with --disable-s3 or resolve this error to build HTSlib.]) ;; + esac]) + LIBS=$save_LIBS]) +dnl Only need to add to static_LIBS if not building as a plugin + if test "$enable_plugins" != yes ; then + static_LIBS="$static_LIBS $CRYPTO_LIBS" + fi +fi + +dnl Look for regcomp in various libraries (needed on windows/mingw). +AC_SEARCH_LIBS(regcomp, regex, [libregex=needed], []) + +dnl Look for PTHREAD_MUTEX_RECURSIVE. +dnl This is normally in pthread.h except on some broken glibc implementations. +dnl Now set by default +dnl AC_CHECK_DECL(PTHREAD_MUTEX_RECURSIVE, [], [AC_DEFINE([_XOPEN_SOURCE],[600], [Needed for PTHREAD_MUTEX_RECURSIVE])], [[#include ]]) + +if test "$s3" = enabled ; then + AC_DEFINE([ENABLE_S3], 1, [Define if HTSlib should enable S3 support.]) +fi + +dnl Apply value from HTS_PROG_CC_WERROR (if set) +AS_IF([test "x$hts_late_cflags" != x],[CFLAGS="$CFLAGS $hts_late_cflags"]) + +AC_SUBST([s3]) +AC_SUBST([CRYPTO_LIBS]) + +AC_SUBST([pc_requires]) +AC_SUBST([private_LIBS]) +AC_SUBST([static_LDFLAGS]) +AC_SUBST([static_LIBS]) + +AC_CONFIG_FILES([config.mk htslib.pc.tmp:htslib.pc.in]) +AC_CONFIG_LINKS([htscodecs.mk:$selected_htscodecs_mk]) + +if test "$srcdir" != .; then + # Set up for a separate build directory. As HTSlib uses a non-recursive + # makefile, we need to create additional build subdirectories explicitly. + AC_CONFIG_LINKS([Makefile:Makefile htslib.mk:htslib.mk]) + AC_CONFIG_FILES([htslib_vars.mk:builddir_vars.mk.in]) + AC_CONFIG_COMMANDS([mkdir], + [AS_MKDIR_P([cram]) + AS_MKDIR_P([htscodecs/htscodecs]) + AS_MKDIR_P([htscodecs/tests]) + AS_MKDIR_P([test/fuzz]) + AS_MKDIR_P([test/longrefs]) + AS_MKDIR_P([test/tabix])]) +fi + +# @HTSDIRslash_if_relsrcdir@ will be empty when $srcdir is absolute +case "$srcdir" in + /*) HTSDIRslash_if_relsrcdir= ;; + *) HTSDIRslash_if_relsrcdir='$(HTSDIR)/' ;; +esac +AC_SUBST([HTSDIRslash_if_relsrcdir]) + +AC_OUTPUT diff --git a/src/htslib-1.18/cram/README b/src/htslib-1.21/cram/README similarity index 100% rename from src/htslib-1.18/cram/README rename to src/htslib-1.21/cram/README diff --git a/src/htslib-1.18/cram/cram.h b/src/htslib-1.21/cram/cram.h similarity index 100% rename from src/htslib-1.18/cram/cram.h rename to src/htslib-1.21/cram/cram.h diff --git a/src/htslib-1.19.1/cram/cram_codecs.c b/src/htslib-1.21/cram/cram_codecs.c similarity index 100% rename from src/htslib-1.19.1/cram/cram_codecs.c rename to src/htslib-1.21/cram/cram_codecs.c diff --git a/src/htslib-1.18/cram/cram_codecs.h b/src/htslib-1.21/cram/cram_codecs.h similarity index 100% rename from src/htslib-1.18/cram/cram_codecs.h rename to src/htslib-1.21/cram/cram_codecs.h diff --git a/src/htslib-1.21/cram/cram_decode.c b/src/htslib-1.21/cram/cram_decode.c new file mode 100644 index 0000000..2b2ad60 --- /dev/null +++ b/src/htslib-1.21/cram/cram_decode.c @@ -0,0 +1,3587 @@ +/* +Copyright (c) 2012-2020, 2022-2024 Genome Research Ltd. +Author: James Bonfield + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +Institute nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* + * - In-memory decoding of CRAM data structures. + * - Iterator for reading CRAM record by record. + */ + +#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cram.h" +#include "os.h" +#include "../htslib/hts.h" + +//Whether CIGAR has just M or uses = and X to indicate match and mismatch +//#define USE_X + +/* ---------------------------------------------------------------------- + * CRAM compression headers + */ + +/* + * Decodes the Tag Dictionary record in the preservation map + * Updates the cram compression header. + * + * Returns number of bytes decoded on success + * -1 on failure + */ +int cram_decode_TD(cram_fd *fd, char *cp, const char *endp, + cram_block_compression_hdr *h) { + char *op = cp; + unsigned char *dat; + cram_block *b; + int32_t blk_size = 0; + int nTL, i, sz, err = 0; + + if (!(b = cram_new_block(0, 0))) + return -1; + + if (h->TD_blk || h->TL) { + hts_log_warning("More than one TD block found in compression header"); + cram_free_block(h->TD_blk); + free(h->TL); + h->TD_blk = NULL; + h->TL = NULL; + } + + /* Decode */ + blk_size = fd->vv.varint_get32(&cp, endp, &err); + if (!blk_size) { + h->nTL = 0; + cram_free_block(b); + return cp - op; + } + + if (err || blk_size < 0 || endp - cp < blk_size) { + cram_free_block(b); + return -1; + } + + BLOCK_APPEND(b, cp, blk_size); + cp += blk_size; + sz = cp - op; + // Force nul termination if missing + if (BLOCK_DATA(b)[BLOCK_SIZE(b)-1]) + BLOCK_APPEND_CHAR(b, '\0'); + + /* Set up TL lookup table */ + dat = BLOCK_DATA(b); + + // Count + for (nTL = i = 0; i < BLOCK_SIZE(b); i++) { + nTL++; + while (dat[i]) + i++; + } + + // Copy + if (!(h->TL = calloc(nTL, sizeof(*h->TL)))) { + cram_free_block(b); + return -1; + } + for (nTL = i = 0; i < BLOCK_SIZE(b); i++) { + h->TL[nTL++] = &dat[i]; + while (dat[i]) + i++; + } + h->TD_blk = b; + h->nTL = nTL; + + return sz; + + block_err: + cram_free_block(b); + return -1; +} + +/* + * Decodes a CRAM block compression header. + * Returns header ptr on success + * NULL on failure + */ +cram_block_compression_hdr *cram_decode_compression_header(cram_fd *fd, + cram_block *b) { + char *cp, *endp, *cp_copy; + cram_block_compression_hdr *hdr = calloc(1, sizeof(*hdr)); + int i, err = 0; + int32_t map_size = 0, map_count = 0; + + if (!hdr) + return NULL; + + if (b->method != RAW) { + if (cram_uncompress_block(b)) { + free(hdr); + return NULL; + } + } + + cp = (char *)b->data; + endp = cp + b->uncomp_size; + + if (CRAM_MAJOR_VERS(fd->version) == 1) { + hdr->ref_seq_id = fd->vv.varint_get32(&cp, endp, &err); + if (CRAM_MAJOR_VERS(fd->version) >= 4) { + hdr->ref_seq_start = fd->vv.varint_get64(&cp, endp, &err); + hdr->ref_seq_span = fd->vv.varint_get64(&cp, endp, &err); + } else { + hdr->ref_seq_start = fd->vv.varint_get32(&cp, endp, &err); + hdr->ref_seq_span = fd->vv.varint_get32(&cp, endp, &err); + } + hdr->num_records = fd->vv.varint_get32(&cp, endp, &err); + hdr->num_landmarks = fd->vv.varint_get32(&cp, endp, &err); + if (hdr->num_landmarks < 0 || + hdr->num_landmarks >= SIZE_MAX / sizeof(int32_t) || + endp - cp < hdr->num_landmarks) { + free(hdr); + return NULL; + } + if (!(hdr->landmark = malloc(hdr->num_landmarks * sizeof(int32_t)))) { + free(hdr); + return NULL; + } + for (i = 0; i < hdr->num_landmarks; i++) + hdr->landmark[i] = fd->vv.varint_get32(&cp, endp, &err);; + } + + hdr->preservation_map = kh_init(map); + + memset(hdr->rec_encoding_map, 0, + CRAM_MAP_HASH * sizeof(hdr->rec_encoding_map[0])); + memset(hdr->tag_encoding_map, 0, + CRAM_MAP_HASH * sizeof(hdr->tag_encoding_map[0])); + + if (!hdr->preservation_map) { + cram_free_compression_header(hdr); + return NULL; + } + + /* Initialise defaults for preservation map */ + hdr->read_names_included = 0; + hdr->AP_delta = 1; + hdr->qs_seq_orient = 1; + memcpy(hdr->substitution_matrix, "CGTNAGTNACTNACGNACGT", 20); + + /* Preservation map */ + map_size = fd->vv.varint_get32(&cp, endp, &err); cp_copy = cp; + map_count = fd->vv.varint_get32(&cp, endp, &err); + for (i = 0; i < map_count; i++) { + pmap_t hd; + khint_t k; + int r; + + if (endp - cp < 3) { + cram_free_compression_header(hdr); + return NULL; + } + cp += 2; + switch(CRAM_KEY(cp[-2],cp[-1])) { + case CRAM_KEY('M','I'): // was mapped QS included in V1.0 + case CRAM_KEY('U','I'): // was unmapped QS included in V1.0 + case CRAM_KEY('P','I'): // was unmapped placed in V1.0 + hd.i = *cp++; + break; + + case CRAM_KEY('R','N'): + hd.i = *cp++; + k = kh_put(map, hdr->preservation_map, "RN", &r); + if (-1 == r) { + cram_free_compression_header(hdr); + return NULL; + } + + kh_val(hdr->preservation_map, k) = hd; + hdr->read_names_included = hd.i; + break; + + case CRAM_KEY('A','P'): + hd.i = *cp++; + k = kh_put(map, hdr->preservation_map, "AP", &r); + if (-1 == r) { + cram_free_compression_header(hdr); + return NULL; + } + + kh_val(hdr->preservation_map, k) = hd; + hdr->AP_delta = hd.i; + break; + + case CRAM_KEY('R','R'): + hd.i = *cp++; + k = kh_put(map, hdr->preservation_map, "RR", &r); + if (-1 == r) { + cram_free_compression_header(hdr); + return NULL; + } + + kh_val(hdr->preservation_map, k) = hd; + hdr->no_ref = !hd.i; + break; + + case CRAM_KEY('Q','O'): + hd.i = *cp++; + k = kh_put(map, hdr->preservation_map, "QO", &r); + if (-1 == r) { + cram_free_compression_header(hdr); + return NULL; + } + + kh_val(hdr->preservation_map, k) = hd; + hdr->qs_seq_orient = hd.i; + break; + + case CRAM_KEY('S','M'): + if (endp - cp < 5) { + cram_free_compression_header(hdr); + return NULL; + } + hdr->substitution_matrix[0][(cp[0]>>6)&3] = 'C'; + hdr->substitution_matrix[0][(cp[0]>>4)&3] = 'G'; + hdr->substitution_matrix[0][(cp[0]>>2)&3] = 'T'; + hdr->substitution_matrix[0][(cp[0]>>0)&3] = 'N'; + + hdr->substitution_matrix[1][(cp[1]>>6)&3] = 'A'; + hdr->substitution_matrix[1][(cp[1]>>4)&3] = 'G'; + hdr->substitution_matrix[1][(cp[1]>>2)&3] = 'T'; + hdr->substitution_matrix[1][(cp[1]>>0)&3] = 'N'; + + hdr->substitution_matrix[2][(cp[2]>>6)&3] = 'A'; + hdr->substitution_matrix[2][(cp[2]>>4)&3] = 'C'; + hdr->substitution_matrix[2][(cp[2]>>2)&3] = 'T'; + hdr->substitution_matrix[2][(cp[2]>>0)&3] = 'N'; + + hdr->substitution_matrix[3][(cp[3]>>6)&3] = 'A'; + hdr->substitution_matrix[3][(cp[3]>>4)&3] = 'C'; + hdr->substitution_matrix[3][(cp[3]>>2)&3] = 'G'; + hdr->substitution_matrix[3][(cp[3]>>0)&3] = 'N'; + + hdr->substitution_matrix[4][(cp[4]>>6)&3] = 'A'; + hdr->substitution_matrix[4][(cp[4]>>4)&3] = 'C'; + hdr->substitution_matrix[4][(cp[4]>>2)&3] = 'G'; + hdr->substitution_matrix[4][(cp[4]>>0)&3] = 'T'; + + hd.p = cp; + cp += 5; + + k = kh_put(map, hdr->preservation_map, "SM", &r); + if (-1 == r) { + cram_free_compression_header(hdr); + return NULL; + } + kh_val(hdr->preservation_map, k) = hd; + break; + + case CRAM_KEY('T','D'): { + int sz = cram_decode_TD(fd, cp, endp, hdr); // tag dictionary + if (sz < 0) { + cram_free_compression_header(hdr); + return NULL; + } + + hd.p = cp; + cp += sz; + + k = kh_put(map, hdr->preservation_map, "TD", &r); + if (-1 == r) { + cram_free_compression_header(hdr); + return NULL; + } + kh_val(hdr->preservation_map, k) = hd; + break; + } + + default: + hts_log_warning("Unrecognised preservation map key %c%c", cp[-2], cp[-1]); + // guess byte; + cp++; + break; + } + } + if (cp - cp_copy != map_size) { + cram_free_compression_header(hdr); + return NULL; + } + + /* Record encoding map */ + map_size = fd->vv.varint_get32(&cp, endp, &err); cp_copy = cp; + map_count = fd->vv.varint_get32(&cp, endp, &err); + int is_v4 = CRAM_MAJOR_VERS(fd->version) >= 4 ? 1 : 0; + for (i = 0; i < map_count; i++) { + char *key = cp; + int32_t encoding = E_NULL; + int32_t size = 0; + ptrdiff_t offset; + cram_map *m; + enum cram_DS_ID ds_id; + enum cram_external_type type; + + if (endp - cp < 4) { + cram_free_compression_header(hdr); + return NULL; + } + + cp += 2; + encoding = fd->vv.varint_get32(&cp, endp, &err); + size = fd->vv.varint_get32(&cp, endp, &err); + + offset = cp - (char *)b->data; + + if (encoding == E_NULL) + continue; + + if (size < 0 || endp - cp < size) { + cram_free_compression_header(hdr); + return NULL; + } + + //printf("%s codes for %.2s\n", cram_encoding2str(encoding), key); + + /* + * For CRAM1.0 CF and BF are Byte and not Int. + * Practically speaking it makes no difference unless we have a + * 1.0 format file that stores these in EXTERNAL as only then + * does Byte vs Int matter. + * + * Neither this C code nor Java reference implementations did this, + * so we gloss over it and treat them as int. + */ + ds_id = DS_CORE; + if (key[0] == 'B' && key[1] == 'F') { + ds_id = DS_BF; type = E_INT; + } else if (key[0] == 'C' && key[1] == 'F') { + ds_id = DS_CF; type = E_INT; + } else if (key[0] == 'R' && key[1] == 'I') { + ds_id = DS_RI; type = E_INT; + } else if (key[0] == 'R' && key[1] == 'L') { + ds_id = DS_RL; type = E_INT; + } else if (key[0] == 'A' && key[1] == 'P') { + ds_id = DS_AP; + type = is_v4 ? E_SLONG : E_INT; + } else if (key[0] == 'R' && key[1] == 'G') { + ds_id = DS_RG; + type = E_INT; + } else if (key[0] == 'M' && key[1] == 'F') { + ds_id = DS_MF; type = E_INT; + } else if (key[0] == 'N' && key[1] == 'S') { + ds_id = DS_NS; type = E_INT; + } else if (key[0] == 'N' && key[1] == 'P') { + ds_id = DS_NP; + type = is_v4 ? E_LONG : E_INT; + } else if (key[0] == 'T' && key[1] == 'S') { + ds_id = DS_TS; + type = is_v4 ? E_SLONG : E_INT; + } else if (key[0] == 'N' && key[1] == 'F') { + ds_id = DS_NF; type = E_INT; + } else if (key[0] == 'T' && key[1] == 'C') { + ds_id = DS_TC; type = E_BYTE; + } else if (key[0] == 'T' && key[1] == 'N') { + ds_id = DS_TN; type = E_INT; + } else if (key[0] == 'F' && key[1] == 'N') { + ds_id = DS_FN; type = E_INT; + } else if (key[0] == 'F' && key[1] == 'C') { + ds_id = DS_FC; type = E_BYTE; + } else if (key[0] == 'F' && key[1] == 'P') { + ds_id = DS_FP; type = E_INT; + } else if (key[0] == 'B' && key[1] == 'S') { + ds_id = DS_BS; type = E_BYTE; + } else if (key[0] == 'I' && key[1] == 'N') { + ds_id = DS_IN; type = E_BYTE_ARRAY; + } else if (key[0] == 'S' && key[1] == 'C') { + ds_id = DS_SC; type = E_BYTE_ARRAY; + } else if (key[0] == 'D' && key[1] == 'L') { + ds_id = DS_DL; type = E_INT; + } else if (key[0] == 'B' && key[1] == 'A') { + ds_id = DS_BA; type = E_BYTE; + } else if (key[0] == 'B' && key[1] == 'B') { + ds_id = DS_BB; type = E_BYTE_ARRAY; + } else if (key[0] == 'R' && key[1] == 'S') { + ds_id = DS_RS; type = E_INT; + } else if (key[0] == 'P' && key[1] == 'D') { + ds_id = DS_PD; type = E_INT; + } else if (key[0] == 'H' && key[1] == 'C') { + ds_id = DS_HC; type = E_INT; + } else if (key[0] == 'M' && key[1] == 'Q') { + ds_id = DS_MQ; type = E_INT; + } else if (key[0] == 'R' && key[1] == 'N') { + ds_id = DS_RN; type = E_BYTE_ARRAY_BLOCK; + } else if (key[0] == 'Q' && key[1] == 'S') { + ds_id = DS_QS; type = E_BYTE; + } else if (key[0] == 'Q' && key[1] == 'Q') { + ds_id = DS_QQ; type = E_BYTE_ARRAY; + } else if (key[0] == 'T' && key[1] == 'L') { + ds_id = DS_TL; type = E_INT; + } else if (key[0] == 'T' && key[1] == 'M') { + } else if (key[0] == 'T' && key[1] == 'V') { + } else { + hts_log_warning("Unrecognised key: %.2s", key); + } + + if (ds_id != DS_CORE) { + if (hdr->codecs[ds_id] != NULL) { + hts_log_warning("Codec for key %.2s defined more than once", + key); + hdr->codecs[ds_id]->free(hdr->codecs[ds_id]); + } + hdr->codecs[ds_id] = cram_decoder_init(hdr, encoding, cp, size, + type, fd->version, &fd->vv); + if (!hdr->codecs[ds_id]) { + cram_free_compression_header(hdr); + return NULL; + } + } + + cp += size; + + // Fill out cram_map purely for cram_dump to dump out. + m = malloc(sizeof(*m)); + if (!m) { + cram_free_compression_header(hdr); + return NULL; + } + m->key = CRAM_KEY(key[0], key[1]); + m->encoding = encoding; + m->size = size; + m->offset = offset; + m->codec = NULL; + + m->next = hdr->rec_encoding_map[CRAM_MAP(key[0], key[1])]; + hdr->rec_encoding_map[CRAM_MAP(key[0], key[1])] = m; + } + if (cp - cp_copy != map_size) { + cram_free_compression_header(hdr); + return NULL; + } + + /* Tag encoding map */ + map_size = fd->vv.varint_get32(&cp, endp, &err); cp_copy = cp; + map_count = fd->vv.varint_get32(&cp, endp, &err); + for (i = 0; i < map_count; i++) { + int32_t encoding = E_NULL; + int32_t size = 0; + cram_map *m = malloc(sizeof(*m)); // FIXME: use pooled_alloc + uint8_t key[3]; + + if (!m || endp - cp < 6) { + free(m); + cram_free_compression_header(hdr); + return NULL; + } + + m->key = fd->vv.varint_get32(&cp, endp, &err); + key[0] = m->key>>16; + key[1] = m->key>>8; + key[2] = m->key; + encoding = fd->vv.varint_get32(&cp, endp, &err); + size = fd->vv.varint_get32(&cp, endp, &err); + + m->encoding = encoding; + m->size = size; + m->offset = cp - (char *)b->data; + if (size < 0 || endp - cp < size || + !(m->codec = cram_decoder_init(hdr, encoding, cp, size, + E_BYTE_ARRAY_BLOCK, fd->version, &fd->vv))) { + cram_free_compression_header(hdr); + free(m); + return NULL; + } + + cp += size; + + m->next = hdr->tag_encoding_map[CRAM_MAP(key[0],key[1])]; + hdr->tag_encoding_map[CRAM_MAP(key[0],key[1])] = m; + } + if (err || cp - cp_copy != map_size) { + cram_free_compression_header(hdr); + return NULL; + } + + return hdr; +} + +/* + * Note we also need to scan through the record encoding map to + * see which data series share the same block, either external or + * CORE. For example if we need the BF data series but MQ and CF + * are also encoded in the same block then we need to add those in + * as a dependency in order to correctly decode BF. + * + * Returns 0 on success + * -1 on failure + */ +int cram_dependent_data_series(cram_fd *fd, + cram_block_compression_hdr *hdr, + cram_slice *s) { + int *block_used; + int core_used = 0; + int i; + static int i_to_id[] = { + DS_BF, DS_AP, DS_FP, DS_RL, DS_DL, DS_NF, DS_BA, DS_QS, + DS_FC, DS_FN, DS_BS, DS_IN, DS_RG, DS_MQ, DS_TL, DS_RN, + DS_NS, DS_NP, DS_TS, DS_MF, DS_CF, DS_RI, DS_RS, DS_PD, + DS_HC, DS_SC, DS_BB, DS_QQ, + }; + uint32_t orig_ds; + + /* + * Set the data_series bit field based on fd->required_fields + * contents. + */ + if (fd->required_fields && fd->required_fields != INT_MAX) { + s->data_series = 0; + + if (fd->required_fields & SAM_QNAME) + s->data_series |= CRAM_RN; + + if (fd->required_fields & SAM_FLAG) + s->data_series |= CRAM_BF; + + if (fd->required_fields & SAM_RNAME) + s->data_series |= CRAM_RI | CRAM_BF; + + if (fd->required_fields & SAM_POS) + s->data_series |= CRAM_AP | CRAM_BF; + + if (fd->required_fields & SAM_MAPQ) + s->data_series |= CRAM_MQ; + + if (fd->required_fields & SAM_CIGAR) + s->data_series |= CRAM_CIGAR; + + if (fd->required_fields & SAM_RNEXT) + s->data_series |= CRAM_CF | CRAM_NF | CRAM_RI | CRAM_NS |CRAM_BF; + + if (fd->required_fields & SAM_PNEXT) + s->data_series |= CRAM_CF | CRAM_NF | CRAM_AP | CRAM_NP | CRAM_BF; + + if (fd->required_fields & SAM_TLEN) + s->data_series |= CRAM_CF | CRAM_NF | CRAM_AP | CRAM_TS | + CRAM_BF | CRAM_MF | CRAM_RI | CRAM_CIGAR; + + if (fd->required_fields & SAM_SEQ) + s->data_series |= CRAM_SEQ; + + if (!(fd->required_fields & SAM_AUX)) + // No easy way to get MD/NM without other tags at present + s->decode_md = 0; + + if (fd->required_fields & SAM_QUAL) + s->data_series |= CRAM_QUAL; + + if (fd->required_fields & SAM_AUX) + s->data_series |= CRAM_RG | CRAM_TL | CRAM_aux; + + if (fd->required_fields & SAM_RGAUX) + s->data_series |= CRAM_RG | CRAM_BF; + + // Always uncompress CORE block + if (cram_uncompress_block(s->block[0])) + return -1; + } else { + s->data_series = CRAM_ALL; + + for (i = 0; i < s->hdr->num_blocks; i++) { + if (cram_uncompress_block(s->block[i])) + return -1; + } + + return 0; + } + + block_used = calloc(s->hdr->num_blocks+1, sizeof(int)); + if (!block_used) + return -1; + + do { + /* + * Also set data_series based on code prerequisites. Eg if we need + * CRAM_QS then we also need to know CRAM_RL so we know how long it + * is, or if we need FC/FP then we also need FN (number of features). + * + * It's not reciprocal though. We may be needing to decode FN + * but have no need to decode FC, FP and cigar ops. + */ + if (s->data_series & CRAM_RS) s->data_series |= CRAM_FC|CRAM_FP; + if (s->data_series & CRAM_PD) s->data_series |= CRAM_FC|CRAM_FP; + if (s->data_series & CRAM_HC) s->data_series |= CRAM_FC|CRAM_FP; + if (s->data_series & CRAM_QS) s->data_series |= CRAM_FC|CRAM_FP; + if (s->data_series & CRAM_IN) s->data_series |= CRAM_FC|CRAM_FP; + if (s->data_series & CRAM_SC) s->data_series |= CRAM_FC|CRAM_FP; + if (s->data_series & CRAM_BS) s->data_series |= CRAM_FC|CRAM_FP; + if (s->data_series & CRAM_DL) s->data_series |= CRAM_FC|CRAM_FP; + if (s->data_series & CRAM_BA) s->data_series |= CRAM_FC|CRAM_FP; + if (s->data_series & CRAM_BB) s->data_series |= CRAM_FC|CRAM_FP; + if (s->data_series & CRAM_QQ) s->data_series |= CRAM_FC|CRAM_FP; + + // cram_decode_seq() needs seq[] array + if (s->data_series & (CRAM_SEQ|CRAM_CIGAR)) s->data_series |= CRAM_RL; + + if (s->data_series & CRAM_FP) s->data_series |= CRAM_FC; + if (s->data_series & CRAM_FC) s->data_series |= CRAM_FN; + if (s->data_series & CRAM_aux) s->data_series |= CRAM_TL; + if (s->data_series & CRAM_MF) s->data_series |= CRAM_CF; + if (s->data_series & CRAM_MQ) s->data_series |= CRAM_BF; + if (s->data_series & CRAM_BS) s->data_series |= CRAM_RI; + if (s->data_series & (CRAM_MF |CRAM_NS |CRAM_NP |CRAM_TS |CRAM_NF)) + s->data_series |= CRAM_CF; + if (!hdr->read_names_included && s->data_series & CRAM_RN) + s->data_series |= CRAM_CF | CRAM_NF; + if (s->data_series & (CRAM_BA | CRAM_QS | CRAM_BB | CRAM_QQ)) + s->data_series |= CRAM_BF | CRAM_CF | CRAM_RL; + if (s->data_series & CRAM_FN) { + // The CRAM_FN loop checks for reference length boundaries, + // which needs a working seq_pos. Some fields are fixed size + // irrespective of if we decode (BS), but others need to know + // the size of the string fetched back (SC, IN, BB). + s->data_series |= CRAM_SC | CRAM_IN | CRAM_BB; + } + + orig_ds = s->data_series; + + // Find which blocks are in use. + for (i = 0; i < sizeof(i_to_id)/sizeof(*i_to_id); i++) { + int bnum1, bnum2, j; + cram_codec *c = hdr->codecs[i_to_id[i]]; + + if (!(s->data_series & (1<hdr->num_blocks; j++) { + if (s->block[j]->content_type == EXTERNAL && + s->block[j]->content_id == bnum1) { + block_used[j] = 1; + if (cram_uncompress_block(s->block[j])) { + free(block_used); + return -1; + } + } + } + break; + } + + if (bnum2 == -2 || bnum1 == bnum2) + break; + + bnum1 = bnum2; // 2nd pass + } + } + + // Tags too + if ((fd->required_fields & SAM_AUX) || + (s->data_series & CRAM_aux)) { + for (i = 0; i < CRAM_MAP_HASH; i++) { + int bnum1, bnum2, j; + cram_map *m = hdr->tag_encoding_map[i]; + + while (m) { + cram_codec *c = m->codec; + if (!c) { + m = m->next; + continue; + } + + bnum1 = cram_codec_to_id(c, &bnum2); + + for (;;) { + switch (bnum1) { + case -2: + break; + + case -1: + core_used = 1; + break; + + default: + for (j = 0; j < s->hdr->num_blocks; j++) { + if (s->block[j]->content_type == EXTERNAL && + s->block[j]->content_id == bnum1) { + block_used[j] = 1; + if (cram_uncompress_block(s->block[j])) { + free(block_used); + return -1; + } + } + } + break; + } + + if (bnum2 == -2 || bnum1 == bnum2) + break; + + bnum1 = bnum2; // 2nd pass + } + + m = m->next; + } + } + } + + // We now know which blocks are in used, so repeat and find + // which other data series need to be added. + for (i = 0; i < sizeof(i_to_id)/sizeof(*i_to_id); i++) { + int bnum1, bnum2, j; + cram_codec *c = hdr->codecs[i_to_id[i]]; + + if (!c) + continue; + + bnum1 = cram_codec_to_id(c, &bnum2); + + for (;;) { + switch (bnum1) { + case -2: + break; + + case -1: + if (core_used) { + //printf(" + data series %08x:\n", 1<data_series |= 1<hdr->num_blocks; j++) { + if (s->block[j]->content_type == EXTERNAL && + s->block[j]->content_id == bnum1) { + if (block_used[j]) { + //printf(" + data series %08x:\n", 1<data_series |= 1<tag_encoding_map[i]; + + while (m) { + cram_codec *c = m->codec; + if (!c) { + m = m->next; + continue; + } + + bnum1 = cram_codec_to_id(c, &bnum2); + + for (;;) { + switch (bnum1) { + case -2: + break; + + case -1: + //printf(" + data series %08x:\n", CRAM_aux); + s->data_series |= CRAM_aux; + break; + + default: + for (j = 0; j < s->hdr->num_blocks; j++) { + if (s->block[j]->content_type == EXTERNAL && + s->block[j]->content_id == bnum1) { + if (block_used[j]) { + //printf(" + data series %08x:\n", + // CRAM_aux); + s->data_series |= CRAM_aux; + } + } + } + break; + } + + if (bnum2 == -2 || bnum1 == bnum2) + break; + + bnum1 = bnum2; // 2nd pass + } + + m = m->next; + } + } + } while (orig_ds != s->data_series); + + free(block_used); + return 0; +} + +/* + * Checks whether an external block is used solely by a single data series. + * Returns the codec type if so (EXTERNAL, BYTE_ARRAY_LEN, BYTE_ARRAY_STOP) + * or 0 if not (E_NULL). + */ +static int cram_ds_unique(cram_block_compression_hdr *hdr, cram_codec *c, + int id) { + int i, n_id = 0; + enum cram_encoding e_type = 0; + + for (i = 0; i < DS_END; i++) { + cram_codec *c; + int bnum1, bnum2, old_n_id; + + if (!(c = hdr->codecs[i])) + continue; + + bnum1 = cram_codec_to_id(c, &bnum2); + + old_n_id = n_id; + if (bnum1 == id) { + n_id++; + e_type = c->codec; + } + if (bnum2 == id) { + n_id++; + e_type = c->codec; + } + + if (n_id == old_n_id+2) + n_id--; // len/val in same place counts once only. + } + + return n_id == 1 ? e_type : 0; +} + +/* + * Attempts to estimate the size of some blocks so we can preallocate them + * before decoding. Although decoding will automatically grow the blocks, + * it is typically more efficient to preallocate. + */ +void cram_decode_estimate_sizes(cram_block_compression_hdr *hdr, cram_slice *s, + int *qual_size, int *name_size, + int *q_id) { + int bnum1, bnum2; + cram_codec *cd; + + *qual_size = 0; + *name_size = 0; + + /* Qual */ + cd = hdr->codecs[DS_QS]; + if (cd == NULL) return; + bnum1 = cram_codec_to_id(cd, &bnum2); + if (bnum1 < 0 && bnum2 >= 0) bnum1 = bnum2; + if (cram_ds_unique(hdr, cd, bnum1)) { + cram_block *b = cram_get_block_by_id(s, bnum1); + if (b) *qual_size = b->uncomp_size; + if (q_id && cd->codec == E_EXTERNAL) + *q_id = bnum1; + } + + /* Name */ + cd = hdr->codecs[DS_RN]; + if (cd == NULL) return; + bnum1 = cram_codec_to_id(cd, &bnum2); + if (bnum1 < 0 && bnum2 >= 0) bnum1 = bnum2; + if (cram_ds_unique(hdr, cd, bnum1)) { + cram_block *b = cram_get_block_by_id(s, bnum1); + if (b) *name_size = b->uncomp_size; + } +} + + +/* ---------------------------------------------------------------------- + * CRAM slices + */ + +/* + * Decodes a CRAM (un)mapped slice header block. + * Returns slice header ptr on success + * NULL on failure + */ +cram_block_slice_hdr *cram_decode_slice_header(cram_fd *fd, cram_block *b) { + cram_block_slice_hdr *hdr; + unsigned char *cp; + unsigned char *cp_end; + int i, err = 0; + + if (b->method != RAW) { + /* Spec. says slice header should be RAW, but we can future-proof + by trying to decode it if it isn't. */ + if (cram_uncompress_block(b) < 0) + return NULL; + } + cp = (unsigned char *)BLOCK_DATA(b); + cp_end = cp + b->uncomp_size; + + if (b->content_type != MAPPED_SLICE && + b->content_type != UNMAPPED_SLICE) + return NULL; + + if (!(hdr = calloc(1, sizeof(*hdr)))) + return NULL; + + hdr->content_type = b->content_type; + + if (b->content_type == MAPPED_SLICE) { + hdr->ref_seq_id = fd->vv.varint_get32s((char **)&cp, (char *)cp_end, &err); + if (CRAM_MAJOR_VERS(fd->version) >= 4) { + hdr->ref_seq_start = fd->vv.varint_get64((char **)&cp, (char *)cp_end, &err); + hdr->ref_seq_span = fd->vv.varint_get64((char **)&cp, (char *)cp_end, &err); + } else { + hdr->ref_seq_start = fd->vv.varint_get32((char **)&cp, (char *)cp_end, &err); + hdr->ref_seq_span = fd->vv.varint_get32((char **)&cp, (char *)cp_end, &err); + } + if (hdr->ref_seq_start < 0 || hdr->ref_seq_span < 0) { + free(hdr); + hts_log_error("Negative values not permitted for header " + "sequence start or span fields"); + return NULL; + } + } + hdr->num_records = fd->vv.varint_get32((char **)&cp, (char *) cp_end, &err); + hdr->record_counter = 0; + if (CRAM_MAJOR_VERS(fd->version) == 2) { + hdr->record_counter = fd->vv.varint_get32((char **)&cp, (char *)cp_end, &err); + } else if (CRAM_MAJOR_VERS(fd->version) >= 3) { + hdr->record_counter = fd->vv.varint_get64((char **)&cp, (char *)cp_end, &err); + } + hdr->num_blocks = fd->vv.varint_get32((char **)&cp, (char *)cp_end, &err); + hdr->num_content_ids = fd->vv.varint_get32((char **)&cp, (char *)cp_end, &err); + if (hdr->num_content_ids < 1 || + hdr->num_content_ids >= 10000) { + // Slice must have at least one data block, and there is no need + // for more than 2 per possible aux-tag plus ancillary. + free(hdr); + return NULL; + } + hdr->block_content_ids = malloc(hdr->num_content_ids * sizeof(int32_t)); + if (!hdr->block_content_ids) { + free(hdr); + return NULL; + } + + for (i = 0; i < hdr->num_content_ids; i++) + hdr->block_content_ids[i] = fd->vv.varint_get32((char **)&cp, + (char *)cp_end, + &err); + if (err) { + free(hdr->block_content_ids); + free(hdr); + return NULL; + } + + if (b->content_type == MAPPED_SLICE) + hdr->ref_base_id = fd->vv.varint_get32((char **)&cp, (char *) cp_end, &err); + + if (CRAM_MAJOR_VERS(fd->version) != 1) { + if (cp_end - cp < 16) { + free(hdr->block_content_ids); + free(hdr); + return NULL; + } + memcpy(hdr->md5, cp, 16); + } else { + memset(hdr->md5, 0, 16); + } + + if (!err) + return hdr; + + free(hdr->block_content_ids); + free(hdr); + return NULL; +} + + +#if 0 +/* Returns the number of bits set in val; it the highest bit used */ +static int nbits(int v) { + static const int MultiplyDeBruijnBitPosition[32] = { + 1, 10, 2, 11, 14, 22, 3, 30, 12, 15, 17, 19, 23, 26, 4, 31, + 9, 13, 21, 29, 16, 18, 25, 8, 20, 28, 24, 7, 27, 6, 5, 32 + }; + + v |= v >> 1; // first up to set all bits 1 after the first 1 */ + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + + // DeBruijn magic to find top bit + return MultiplyDeBruijnBitPosition[(uint32_t)(v * 0x07C4ACDDU) >> 27]; +} +#endif + +#if 0 +static int sort_freqs(const void *vp1, const void *vp2) { + const int i1 = *(const int *)vp1; + const int i2 = *(const int *)vp2; + return i1-i2; +} +#endif + +/* ---------------------------------------------------------------------- + * Primary CRAM sequence decoder + */ + +static inline int add_md_char(cram_slice *s, int decode_md, char c, int32_t *md_dist) { + if (decode_md) { + BLOCK_APPEND_UINT(s->aux_blk, *md_dist); + BLOCK_APPEND_CHAR(s->aux_blk, c); + *md_dist = 0; + } + return 0; + + block_err: + return -1; +} + +/* + * Internal part of cram_decode_slice(). + * Generates the sequence, quality and cigar components. + */ +static int cram_decode_seq(cram_fd *fd, cram_container *c, cram_slice *s, + cram_block *blk, cram_record *cr, sam_hdr_t *sh, + int cf, char *seq, char *qual, + int has_MD, int has_NM) { + int prev_pos = 0, f, r = 0, out_sz = 1; + int seq_pos = 1; + int cig_len = 0; + int64_t ref_pos = cr->apos; + int32_t fn, i32; + enum cigar_op cig_op = BAM_CMATCH; + uint32_t *cigar = s->cigar; + uint32_t ncigar = s->ncigar; + uint32_t cigar_alloc = s->cigar_alloc; + uint32_t nm = 0; + int32_t md_dist = 0; + int orig_aux = 0; + // CRAM < 4.0 decode_md is off/on + // CRAM >= 4.0 decode_md is auto/on (auto=on if MD* present, off otherwise) + int do_md = CRAM_MAJOR_VERS(fd->version) >= 4 + ? (s->decode_md > 0) + : (s->decode_md != 0); + int decode_md = s->ref && cr->ref_id >= 0 && ((do_md && !has_MD) || has_MD < 0); + int decode_nm = s->ref && cr->ref_id >= 0 && ((do_md && !has_NM) || has_NM < 0); + uint32_t ds = s->data_series; + sam_hrecs_t *bfd = sh->hrecs; + + cram_codec **codecs = c->comp_hdr->codecs; + + if ((ds & CRAM_QS) && !(cf & CRAM_FLAG_PRESERVE_QUAL_SCORES)) { + memset(qual, 255, cr->len); + } + + if (cr->cram_flags & CRAM_FLAG_NO_SEQ) + decode_md = decode_nm = 0; + + if (decode_md) { + orig_aux = BLOCK_SIZE(s->aux_blk); + if (has_MD == 0) + BLOCK_APPEND(s->aux_blk, "MDZ", 3); + } + + if (ds & CRAM_FN) { + if (!codecs[DS_FN]) return -1; + r |= codecs[DS_FN]->decode(s,codecs[DS_FN], + blk, (char *)&fn, &out_sz); + if (r) return r; + } else { + fn = 0; + } + + ref_pos--; // count from 0 + cr->cigar = ncigar; + + if (!(ds & (CRAM_FC | CRAM_FP))) + goto skip_cigar; + + if (fn) { + if ((ds & CRAM_FC) && !codecs[DS_FC]) + return -1; + if ((ds & CRAM_FP) && !codecs[DS_FP]) + return -1; + } + + for (f = 0; f < fn; f++) { + int32_t pos = 0; + char op; + + if (ncigar+2 >= cigar_alloc) { + cigar_alloc = cigar_alloc ? cigar_alloc*2 : 1024; + if (!(cigar = realloc(s->cigar, cigar_alloc * sizeof(*cigar)))) + return -1; + s->cigar = cigar; + } + + if (ds & CRAM_FC) { + r |= codecs[DS_FC]->decode(s, + codecs[DS_FC], + blk, + &op, &out_sz); + if (r) return r; + } + + if (!(ds & CRAM_FP)) + continue; + + r |= codecs[DS_FP]->decode(s, + codecs[DS_FP], + blk, + (char *)&pos, &out_sz); + if (r) return r; + pos += prev_pos; + + if (pos <= 0) { + hts_log_error("Feature position %d before start of read", pos); + return -1; + } + + if (pos > seq_pos) { + if (pos > cr->len+1) + return -1; + + if (s->ref && cr->ref_id >= 0) { + if (ref_pos + pos - seq_pos > bfd->ref[cr->ref_id].len) { + static int whinged = 0; + int rlen; + if (!whinged) + hts_log_warning("Ref pos outside of ref sequence boundary"); + whinged = 1; + rlen = bfd->ref[cr->ref_id].len - ref_pos; + // May miss MD/NM cases where both seq/ref are N, but this is a + // malformed cram file anyway. + if (rlen > 0) { + if (ref_pos + rlen > s->ref_end) + goto beyond_slice; + + memcpy(&seq[seq_pos-1], + &s->ref[ref_pos - s->ref_start +1], rlen); + if ((pos - seq_pos) - rlen > 0) + memset(&seq[seq_pos-1+rlen], 'N', + (pos - seq_pos) - rlen); + } else { + memset(&seq[seq_pos-1], 'N', cr->len - seq_pos + 1); + } + if (md_dist >= 0) + md_dist += pos - seq_pos; + } else { + // 'N' in both ref and seq is also mismatch for NM/MD + if (ref_pos + pos-seq_pos > s->ref_end) + goto beyond_slice; + + const char *refp = s->ref + ref_pos - s->ref_start + 1; + const int frag_len = pos - seq_pos; + int do_cpy = 1; + if (decode_md || decode_nm) { + char *N = memchr(refp, 'N', frag_len); + if (N) { + int i; + for (i = 0; i < frag_len; i++) { + char base = refp[i]; + if (base == 'N') { + if (add_md_char(s, decode_md, + 'N', &md_dist) < 0) + return -1; + nm++; + } else { + md_dist++; + } + seq[seq_pos-1+i] = base; + } + do_cpy = 0; + } else { + md_dist += frag_len; + } + } + if (do_cpy) + memcpy(&seq[seq_pos-1], refp, frag_len); + } + } +#ifdef USE_X + if (cig_len && cig_op != BAM_CBASE_MATCH) { + cigar[ncigar++] = (cig_len<<4) + cig_op; + cig_len = 0; + } + cig_op = BAM_CBASE_MATCH; +#else + if (cig_len && cig_op != BAM_CMATCH) { + cigar[ncigar++] = (cig_len<<4) + cig_op; + cig_len = 0; + } + cig_op = BAM_CMATCH; +#endif + cig_len += pos - seq_pos; + ref_pos += pos - seq_pos; + seq_pos = pos; + } + + prev_pos = pos; + + if (!(ds & CRAM_FC)) + goto skip_cigar; + + switch(op) { + case 'S': { // soft clip: IN + int32_t out_sz2 = 1; + int have_sc = 0; + + if (cig_len) { + cigar[ncigar++] = (cig_len<<4) + cig_op; + cig_len = 0; + } + switch (CRAM_MAJOR_VERS(fd->version)) { + case 1: + if (ds & CRAM_IN) { + r |= codecs[DS_IN] + ? codecs[DS_IN]->decode(s, codecs[DS_IN], + blk, + cr->len ? &seq[pos-1] : NULL, + &out_sz2) + : (seq[pos-1] = 'N', out_sz2 = 1, 0); + have_sc = 1; + } + break; + case 2: + default: + if (ds & CRAM_SC) { + r |= codecs[DS_SC] + ? codecs[DS_SC]->decode(s, codecs[DS_SC], + blk, + cr->len ? &seq[pos-1] : NULL, + &out_sz2) + : (seq[pos-1] = 'N', out_sz2 = 1, 0); + have_sc = 1; + } + break; + + //default: + // r |= codecs[DS_BB] + // ? codecs[DS_BB]->decode(s, codecs[DS_BB], + // blk, &seq[pos-1], &out_sz2) + // : (seq[pos-1] = 'N', out_sz2 = 1, 0); + } + if (have_sc) { + if (r) return r; + cigar[ncigar++] = (out_sz2<<4) + BAM_CSOFT_CLIP; + cig_op = BAM_CSOFT_CLIP; + seq_pos += out_sz2; + } + break; + } + + case 'X': { // Substitution; BS + unsigned char base; +#ifdef USE_X + if (cig_len && cig_op != BAM_CBASE_MISMATCH) { + cigar[ncigar++] = (cig_len<<4) + cig_op; + cig_len = 0; + } + if (ds & CRAM_BS) { + if (!codecs[DS_BS]) return -1; + r |= codecs[DS_BS]->decode(s, codecs[DS_BS], blk, + (char *)&base, &out_sz); + if (pos-1 < cr->len) + seq[pos-1] = 'N'; // FIXME look up BS=base value + } + cig_op = BAM_CBASE_MISMATCH; +#else + int ref_base; + if (cig_len && cig_op != BAM_CMATCH) { + cigar[ncigar++] = (cig_len<<4) + cig_op; + cig_len = 0; + } + if (ds & CRAM_BS) { + if (!codecs[DS_BS]) return -1; + r |= codecs[DS_BS]->decode(s, codecs[DS_BS], blk, + (char *)&base, &out_sz); + if (r) return -1; + if (cr->ref_id < 0 || ref_pos >= bfd->ref[cr->ref_id].len || !s->ref) { + if (pos-1 < cr->len) + seq[pos-1] = c->comp_hdr-> + substitution_matrix[fd->L1['N']][base]; + if (decode_md || decode_nm) { + if (md_dist >= 0 && decode_md) + BLOCK_APPEND_UINT(s->aux_blk, md_dist); + md_dist = -1; + nm--; + } + } else { + unsigned char ref_call = ref_pos < s->ref_end + ? (uc)s->ref[ref_pos - s->ref_start +1] + : 'N'; + ref_base = fd->L1[ref_call]; + if (pos-1 < cr->len) + seq[pos-1] = c->comp_hdr-> + substitution_matrix[ref_base][base]; + if (add_md_char(s, decode_md, ref_call, &md_dist) < 0) + return -1; + } + } + cig_op = BAM_CMATCH; +#endif + nm++; + cig_len++; + seq_pos++; + ref_pos++; + break; + } + + case 'D': { // Deletion; DL + if (cig_len && cig_op != BAM_CDEL) { + cigar[ncigar++] = (cig_len<<4) + cig_op; + cig_len = 0; + } + if (ds & CRAM_DL) { + if (!codecs[DS_DL]) return -1; + r |= codecs[DS_DL]->decode(s, codecs[DS_DL], blk, + (char *)&i32, &out_sz); + if (r) return r; + if (decode_md || decode_nm) { + if (ref_pos + i32 > s->ref_end) + goto beyond_slice; + if (md_dist >= 0 && decode_md) + BLOCK_APPEND_UINT(s->aux_blk, md_dist); + if (ref_pos + i32 <= bfd->ref[cr->ref_id].len) { + if (decode_md) { + BLOCK_APPEND_CHAR(s->aux_blk, '^'); + BLOCK_APPEND(s->aux_blk, + &s->ref[ref_pos - s->ref_start +1], + i32); + md_dist = 0; + } + nm += i32; + } else { + uint32_t dlen; + if (bfd->ref[cr->ref_id].len >= ref_pos) { + if (decode_md) { + BLOCK_APPEND_CHAR(s->aux_blk, '^'); + BLOCK_APPEND(s->aux_blk, + &s->ref[ref_pos - s->ref_start+1], + bfd->ref[cr->ref_id].len-ref_pos); + BLOCK_APPEND_UINT(s->aux_blk, 0); + } + dlen = i32 - (bfd->ref[cr->ref_id].len - ref_pos); + nm += i32 - dlen; + } else { + dlen = i32; + } + + md_dist = -1; + } + } + cig_op = BAM_CDEL; + cig_len += i32; + ref_pos += i32; + //printf(" %d: DL = %d (ret %d)\n", f, i32, r); + } + break; + } + + case 'I': { // Insertion (several bases); IN + int32_t out_sz2 = 1; + + if (cig_len && cig_op != BAM_CINS) { + cigar[ncigar++] = (cig_len<<4) + cig_op; + cig_len = 0; + } + + if (ds & CRAM_IN) { + if (!codecs[DS_IN]) return -1; + r |= codecs[DS_IN]->decode(s, codecs[DS_IN], blk, + cr->len ? &seq[pos-1] : NULL, + &out_sz2); + if (r) return r; + cig_op = BAM_CINS; + cig_len += out_sz2; + seq_pos += out_sz2; + nm += out_sz2; + //printf(" %d: IN(I) = %.*s (ret %d, out_sz %d)\n", f, out_sz2, dat, r, out_sz2); + } + break; + } + + case 'i': { // Insertion (single base); BA + if (cig_len && cig_op != BAM_CINS) { + cigar[ncigar++] = (cig_len<<4) + cig_op; + cig_len = 0; + } + if (ds & CRAM_BA) { + if (!codecs[DS_BA]) return -1; + r |= codecs[DS_BA]->decode(s, codecs[DS_BA], blk, + cr->len ? &seq[pos-1] : NULL, + &out_sz); + if (r) return r; + } + cig_op = BAM_CINS; + cig_len++; + seq_pos++; + nm++; + break; + } + + case 'b': { // Several bases + int32_t len = 1; + + if (cig_len && cig_op != BAM_CMATCH) { + cigar[ncigar++] = (cig_len<<4) + cig_op; + cig_len = 0; + } + + if (ds & CRAM_BB) { + if (!codecs[DS_BB]) return -1; + r |= codecs[DS_BB]->decode(s, codecs[DS_BB], blk, + cr->len ? &seq[pos-1] : NULL, + &len); + if (r) return r; + + if (decode_md || decode_nm) { + int x; + if (md_dist >= 0 && decode_md) + BLOCK_APPEND_UINT(s->aux_blk, md_dist); + + for (x = 0; x < len; x++) { + if (x && decode_md) + BLOCK_APPEND_UINT(s->aux_blk, 0); + if (ref_pos+x >= bfd->ref[cr->ref_id].len || !s->ref) { + md_dist = -1; + break; + } else { + if (decode_md) { + if (ref_pos + x > s->ref_end) + goto beyond_slice; + char r = s->ref[ref_pos+x-s->ref_start +1]; + BLOCK_APPEND_CHAR(s->aux_blk, r); + } + } + } + + nm += x; + md_dist = 0; + } + } + + cig_op = BAM_CMATCH; + + cig_len+=len; + seq_pos+=len; + ref_pos+=len; + //prev_pos+=len; + break; + } + + case 'q': { // Several quality values + int32_t len = 1; + + if (cig_len && cig_op != BAM_CMATCH) { + cigar[ncigar++] = (cig_len<<4) + cig_op; + cig_len = 0; + } + + if (ds & CRAM_QQ) { + if (!codecs[DS_QQ]) return -1; + if ((ds & CRAM_QS) && !(cf & CRAM_FLAG_PRESERVE_QUAL_SCORES) + && (unsigned char)*qual == 255) + memset(qual, 30, cr->len); // ? + r |= codecs[DS_QQ]->decode(s, codecs[DS_QQ], blk, + (char *)&qual[pos-1], &len); + if (r) return r; + } + + cig_op = BAM_CMATCH; + + //prev_pos+=len; + break; + } + + case 'B': { // Read base; BA, QS +#ifdef USE_X + if (cig_len && cig_op != BAM_CBASE_MISMATCH) { + cigar[ncigar++] = (cig_len<<4) + cig_op; + cig_len = 0; + } +#else + if (cig_len && cig_op != BAM_CMATCH) { + cigar[ncigar++] = (cig_len<<4) + cig_op; + cig_len = 0; + } +#endif + if (ds & CRAM_BA) { + if (!codecs[DS_BA]) return -1; + r |= codecs[DS_BA]->decode(s, codecs[DS_BA], blk, + cr->len ? &seq[pos-1] : NULL, + &out_sz); + + if (decode_md || decode_nm) { + if (md_dist >= 0 && decode_md) + BLOCK_APPEND_UINT(s->aux_blk, md_dist); + if (ref_pos >= bfd->ref[cr->ref_id].len || !s->ref) { + md_dist = -1; + } else { + if (decode_md) { + if (ref_pos > s->ref_end) + goto beyond_slice; + BLOCK_APPEND_CHAR(s->aux_blk, + s->ref[ref_pos-s->ref_start +1]); + } + nm++; + md_dist = 0; + } + } + } + if (ds & CRAM_QS) { + if (!codecs[DS_QS]) return -1; + if (!(cf & CRAM_FLAG_PRESERVE_QUAL_SCORES) + && (unsigned char)*qual == 255) + memset(qual, 30, cr->len); // ASCII ?. Same as htsjdk + r |= codecs[DS_QS]->decode(s, codecs[DS_QS], blk, + (char *)&qual[pos-1], &out_sz); + } +#ifdef USE_X + cig_op = BAM_CBASE_MISMATCH; +#else + cig_op = BAM_CMATCH; +#endif + cig_len++; + seq_pos++; + ref_pos++; + //printf(" %d: BA/QS(B) = %c/%d (ret %d)\n", f, i32, qc, r); + break; + } + + case 'Q': { // Quality score; QS + if (ds & CRAM_QS) { + if (!codecs[DS_QS]) return -1; + if (!(cf & CRAM_FLAG_PRESERVE_QUAL_SCORES) && + (unsigned char)*qual == 255) + memset(qual, 30, cr->len); // ? + r |= codecs[DS_QS]->decode(s, codecs[DS_QS], blk, + (char *)&qual[pos-1], &out_sz); + //printf(" %d: QS = %d (ret %d)\n", f, qc, r); + } + break; + } + + case 'H': { // hard clip; HC + if (cig_len && cig_op != BAM_CHARD_CLIP) { + cigar[ncigar++] = (cig_len<<4) + cig_op; + cig_len = 0; + } + if (ds & CRAM_HC) { + if (!codecs[DS_HC]) return -1; + r |= codecs[DS_HC]->decode(s, codecs[DS_HC], blk, + (char *)&i32, &out_sz); + if (r) return r; + cig_op = BAM_CHARD_CLIP; + cig_len += i32; + } + break; + } + + case 'P': { // padding; PD + if (cig_len && cig_op != BAM_CPAD) { + cigar[ncigar++] = (cig_len<<4) + cig_op; + cig_len = 0; + } + if (ds & CRAM_PD) { + if (!codecs[DS_PD]) return -1; + r |= codecs[DS_PD]->decode(s, codecs[DS_PD], blk, + (char *)&i32, &out_sz); + if (r) return r; + cig_op = BAM_CPAD; + cig_len += i32; + } + break; + } + + case 'N': { // Ref skip; RS + if (cig_len && cig_op != BAM_CREF_SKIP) { + cigar[ncigar++] = (cig_len<<4) + cig_op; + cig_len = 0; + } + if (ds & CRAM_RS) { + if (!codecs[DS_RS]) return -1; + r |= codecs[DS_RS]->decode(s, codecs[DS_RS], blk, + (char *)&i32, &out_sz); + if (r) return r; + cig_op = BAM_CREF_SKIP; + cig_len += i32; + ref_pos += i32; + } + break; + } + + default: + hts_log_error("Unknown feature code '%c'", op); + return -1; + } + } + + if (!(ds & CRAM_FC)) + goto skip_cigar; + + /* An implicit match op for any unaccounted for bases */ + if ((ds & CRAM_FN) && cr->len >= seq_pos) { + if (s->ref && cr->ref_id >= 0) { + if (ref_pos + cr->len - seq_pos + 1 > bfd->ref[cr->ref_id].len) { + static int whinged = 0; + int rlen; + if (!whinged) + hts_log_warning("Ref pos outside of ref sequence boundary"); + whinged = 1; + rlen = bfd->ref[cr->ref_id].len - ref_pos; + // May miss MD/NM cases where both seq/ref are N, but this is a + // malformed cram file anyway. + if (rlen > 0) { + if (seq_pos-1 + rlen < cr->len) + memcpy(&seq[seq_pos-1], + &s->ref[ref_pos - s->ref_start +1], rlen); + if ((cr->len - seq_pos + 1) - rlen > 0) + memset(&seq[seq_pos-1+rlen], 'N', + (cr->len - seq_pos + 1) - rlen); + } else { + if (cr->len - seq_pos + 1 > 0) + memset(&seq[seq_pos-1], 'N', cr->len - seq_pos + 1); + } + if (md_dist >= 0) + md_dist += cr->len - seq_pos + 1; + } else { + if (cr->len - seq_pos + 1 > 0) { + if (ref_pos + cr->len-seq_pos +1 > s->ref_end) + goto beyond_slice; + int remainder = cr->len - (seq_pos-1); + int j = ref_pos - s->ref_start + 1; + if (decode_md || decode_nm) { + int i; + char *N = memchr(&s->ref[j], 'N', remainder); + if (!N) { + // short cut the common case + md_dist += cr->len - (seq_pos-1); + } else { + char *refp = &s->ref[j-(seq_pos-1)]; + md_dist += N-&s->ref[j]; + int i_start = seq_pos-1 + (N - &s->ref[j]); + for (i = i_start; i < cr->len; i++) { + char base = refp[i]; + if (base == 'N') { + if (add_md_char(s, decode_md, 'N', + &md_dist) < 0) + return -1; + nm++; + } else { + md_dist++; + } + } + } + } + memcpy(&seq[seq_pos-1], &s->ref[j], remainder); + } + ref_pos += cr->len - seq_pos + 1; + } + } else if (cr->ref_id >= 0) { + // So alignment end can be computed even when not decoding sequence + ref_pos += cr->len - seq_pos + 1; + } + + if (ncigar+1 >= cigar_alloc) { + cigar_alloc = cigar_alloc ? cigar_alloc*2 : 1024; + if (!(cigar = realloc(s->cigar, cigar_alloc * sizeof(*cigar)))) + return -1; + s->cigar = cigar; + } +#ifdef USE_X + if (cig_len && cig_op != BAM_CBASE_MATCH) { + cigar[ncigar++] = (cig_len<<4) + cig_op; + cig_len = 0; + } + cig_op = BAM_CBASE_MATCH; +#else + if (cig_len && cig_op != BAM_CMATCH) { + cigar[ncigar++] = (cig_len<<4) + cig_op; + cig_len = 0; + } + cig_op = BAM_CMATCH; +#endif + cig_len += cr->len - seq_pos+1; + } + + skip_cigar: + + if ((ds & CRAM_FN) && decode_md) { + if (md_dist >= 0) + BLOCK_APPEND_UINT(s->aux_blk, md_dist); + } + + if (cig_len) { + if (ncigar >= cigar_alloc) { + cigar_alloc = cigar_alloc ? cigar_alloc*2 : 1024; + if (!(cigar = realloc(s->cigar, cigar_alloc * sizeof(*cigar)))) + return -1; + s->cigar = cigar; + } + + cigar[ncigar++] = (cig_len<<4) + cig_op; + } + + cr->ncigar = ncigar - cr->cigar; + cr->aend = ref_pos > cr->apos ? ref_pos : cr->apos; + + //printf("2: %.*s %d .. %d\n", cr->name_len, DSTRING_STR(name_ds) + cr->name, cr->apos, ref_pos); + + if (ds & CRAM_MQ) { + if (!codecs[DS_MQ]) return -1; + r |= codecs[DS_MQ]->decode(s, codecs[DS_MQ], blk, + (char *)&cr->mqual, &out_sz); + } else { + cr->mqual = 40; + } + + if ((ds & CRAM_QS) && (cf & CRAM_FLAG_PRESERVE_QUAL_SCORES)) { + int32_t out_sz2 = cr->len; + + if (!codecs[DS_QS]) return -1; + r |= codecs[DS_QS]->decode(s, codecs[DS_QS], blk, + qual, &out_sz2); + } + + s->cigar = cigar; + s->cigar_alloc = cigar_alloc; + s->ncigar = ncigar; + + if (cr->cram_flags & CRAM_FLAG_NO_SEQ) + cr->len = 0; + + if (decode_md) { + BLOCK_APPEND_CHAR(s->aux_blk, '\0'); // null terminate MD:Z: + size_t sz = BLOCK_SIZE(s->aux_blk) - orig_aux; + if (has_MD < 0) { + // has_MD < 0; already have MDZ allocated in aux at -has_MD, + // but wrote MD to end of aux (at orig_aux). + // We need some memmoves to shuffle it around. + char tmp_MD_[1024], *tmp_MD = tmp_MD_; + unsigned char *orig_aux_p = BLOCK_DATA(s->aux_blk) + orig_aux; + if (sz > 1024) { + tmp_MD = malloc(sz); + if (!tmp_MD) + return -1; + } + memcpy(tmp_MD, orig_aux_p, sz); + memmove(&BLOCK_DATA(s->aux_blk)[-has_MD] + sz, + &BLOCK_DATA(s->aux_blk)[-has_MD], + orig_aux_p - &BLOCK_DATA(s->aux_blk)[-has_MD]); + memcpy(&BLOCK_DATA(s->aux_blk)[-has_MD], tmp_MD, sz); + if (tmp_MD != tmp_MD_) + free(tmp_MD); + + if (-has_NM > -has_MD) + // we inserted before NM, so move it up a bit + has_NM -= sz; + } + // else has_MD == 0 and we've already appended MD to the end. + + cr->aux_size += sz; + } + + if (decode_nm) { + if (has_NM == 0) { + char buf[7]; + size_t buf_size; + buf[0] = 'N'; buf[1] = 'M'; + if (nm <= UINT8_MAX) { + buf_size = 4; + buf[2] = 'C'; + buf[3] = (nm>> 0) & 0xff; + } else if (nm <= UINT16_MAX) { + buf_size = 5; + buf[2] = 'S'; + buf[3] = (nm>> 0) & 0xff; + buf[4] = (nm>> 8) & 0xff; + } else { + buf_size = 7; + buf[2] = 'I'; + buf[3] = (nm>> 0) & 0xff; + buf[4] = (nm>> 8) & 0xff; + buf[5] = (nm>>16) & 0xff; + buf[6] = (nm>>24) & 0xff; + } + BLOCK_APPEND(s->aux_blk, buf, buf_size); + cr->aux_size += buf_size; + } else { + // Preallocated space for NM at -has_NM into aux block + unsigned char *buf = BLOCK_DATA(s->aux_blk) + -has_NM; + buf[0] = (nm>> 0) & 0xff; + buf[1] = (nm>> 8) & 0xff; + buf[2] = (nm>>16) & 0xff; + buf[3] = (nm>>24) & 0xff; + } + } + + return r; + + beyond_slice: + // Cramtools can create CRAMs that have sequence features outside the + // stated range of the container & slice reference extents (start + span). + // We have to check for these in many places, but for brevity have the + // error reporting in only one. + hts_log_error("CRAM CIGAR extends beyond slice reference extents"); + return -1; + + block_err: + return -1; +} + +/* + * Quick and simple hash lookup for cram_map arrays + */ +static cram_map *map_find(cram_map **map, unsigned char *key, int id) { + cram_map *m; + + m = map[CRAM_MAP(key[0],key[1])]; + while (m && m->key != id) + m= m->next; + + return m; +} + +//#define map_find(M,K,I) M[CRAM_MAP(K[0],K[1])];while (m && m->key != I);m= m->next + + +static int cram_decode_aux_1_0(cram_container *c, cram_slice *s, + cram_block *blk, cram_record *cr) { + int i, r = 0, out_sz = 1; + unsigned char ntags; + + if (!c->comp_hdr->codecs[DS_TC]) return -1; + r |= c->comp_hdr->codecs[DS_TC]->decode(s, c->comp_hdr->codecs[DS_TC], blk, + (char *)&ntags, &out_sz); + cr->ntags = ntags; + + //printf("TC=%d\n", cr->ntags); + cr->aux_size = 0; + cr->aux = BLOCK_SIZE(s->aux_blk); + + for (i = 0; i < cr->ntags; i++) { + int32_t id, out_sz = 1; + unsigned char tag_data[3]; + cram_map *m; + + //printf("Tag %d/%d\n", i+1, cr->ntags); + if (!c->comp_hdr->codecs[DS_TN]) return -1; + r |= c->comp_hdr->codecs[DS_TN]->decode(s, c->comp_hdr->codecs[DS_TN], + blk, (char *)&id, &out_sz); + if (out_sz == 3) { + // Tag name stored as 3 chars instead of an int? + memcpy(tag_data, &id, 3); + } else { + tag_data[0] = (id>>16) & 0xff; + tag_data[1] = (id>>8) & 0xff; + tag_data[2] = id & 0xff; + } + + m = map_find(c->comp_hdr->tag_encoding_map, tag_data, id); + if (!m) + return -1; + BLOCK_APPEND(s->aux_blk, (char *)tag_data, 3); + + if (!m->codec) return -1; + r |= m->codec->decode(s, m->codec, blk, (char *)s->aux_blk, &out_sz); + + cr->aux_size += out_sz + 3; + } + + return r; + + block_err: + return -1; +} + +// has_MD and has_NM are filled out with 0 for none present, +// 1 for present and verbatim, and -pos for present as placeholder +// (MD*, NM*) to be generated and filled out at offset +pos. +static int cram_decode_aux(cram_fd *fd, + cram_container *c, cram_slice *s, + cram_block *blk, cram_record *cr, + int *has_MD, int *has_NM) { + int i, r = 0, out_sz = 1; + int32_t TL = 0; + unsigned char *TN; + uint32_t ds = s->data_series; + + if (!(ds & (CRAM_TL|CRAM_aux))) { + cr->aux = 0; + cr->aux_size = 0; + return 0; + } + + if (!c->comp_hdr->codecs[DS_TL]) return -1; + r |= c->comp_hdr->codecs[DS_TL]->decode(s, c->comp_hdr->codecs[DS_TL], blk, + (char *)&TL, &out_sz); + if (r || TL < 0 || TL >= c->comp_hdr->nTL) + return -1; + + TN = c->comp_hdr->TL[TL]; + cr->ntags = strlen((char *)TN)/3; // optimise to remove strlen + + //printf("TC=%d\n", cr->ntags); + cr->aux_size = 0; + cr->aux = BLOCK_SIZE(s->aux_blk); + + if (!(ds & CRAM_aux)) + return 0; + + for (i = 0; i < cr->ntags; i++) { + int32_t id, out_sz = 1; + unsigned char tag_data[7]; + cram_map *m; + + if (TN[0] == 'M' && TN[1] == 'D' && has_MD) + *has_MD = (BLOCK_SIZE(s->aux_blk)+3) * (TN[2] == '*' ? -1 : 1); + if (TN[0] == 'N' && TN[1] == 'M' && has_NM) + *has_NM = (BLOCK_SIZE(s->aux_blk)+3) * (TN[2] == '*' ? -1 : 1);; + + //printf("Tag %d/%d\n", i+1, cr->ntags); + tag_data[0] = TN[0]; + tag_data[1] = TN[1]; + tag_data[2] = TN[2]; + id = (tag_data[0]<<16) | (tag_data[1]<<8) | tag_data[2]; + + if (CRAM_MAJOR_VERS(fd->version) >= 4 && TN[2] == '*') { + // Place holder, fill out contents later. + int tag_data_size; + if (TN[0] == 'N' && TN[1] == 'M') { + // Use a fixed size, so we can allocate room for it now. + memcpy(&tag_data[2], "I\0\0\0\0", 5); + tag_data_size = 7; + } else if (TN[0] == 'R' && TN[1] == 'G') { + // RG is variable size, but known already. Insert now + TN += 3; + // Equiv to fd->header->hrecs->rg[cr->rg], but this is the + // new header API equivalent. + const char *rg = sam_hdr_line_name(fd->header, "RG", cr->rg); + if (!rg) + continue; + + size_t rg_len = strlen(rg); + tag_data[2] = 'Z'; + BLOCK_APPEND(s->aux_blk, (char *)tag_data, 3); + BLOCK_APPEND(s->aux_blk, rg, rg_len); + BLOCK_APPEND_CHAR(s->aux_blk, '\0'); + cr->aux_size += 3 + rg_len + 1; + cr->rg = -1; // prevents auto-add later + continue; + } else { + // Unknown size. We'll insert MD into stream later. + tag_data[2] = 'Z'; + tag_data_size = 3; + } + BLOCK_APPEND(s->aux_blk, (char *)tag_data, tag_data_size); + cr->aux_size += tag_data_size; + TN += 3; + } else { + TN += 3; + m = map_find(c->comp_hdr->tag_encoding_map, tag_data, id); + if (!m) + return -1; + + BLOCK_APPEND(s->aux_blk, (char *)tag_data, 3); + + if (!m->codec) return -1; + r |= m->codec->decode(s, m->codec, blk, (char *)s->aux_blk, &out_sz); + if (r) break; + cr->aux_size += out_sz + 3; + + // cF CRAM flags. + if (TN[-3]=='c' && TN[-2]=='F' && TN[-1]=='C' && out_sz == 1) { + // Remove cF tag + uint8_t cF = BLOCK_END(s->aux_blk)[-1]; + BLOCK_SIZE(s->aux_blk) -= out_sz+3; + cr->aux_size -= out_sz+3; + + // bit 1 => don't auto-decode MD. + // Pretend MD is present verbatim, so we don't auto-generate + if ((cF & 1) && has_MD && *has_MD == 0) + *has_MD = 1; + + // bit 1 => don't auto-decode NM + if ((cF & 2) && has_NM && *has_NM == 0) + *has_NM = 1; + } + } + + // We could go to 2^32 fine, but we shouldn't be hitting this anyway, + // and it's protecting against memory hogs too. + if (BLOCK_SIZE(s->aux_blk) > (1u<<31)) { + hts_log_error("CRAM->BAM aux block size overflow"); + goto block_err; + } + } + + return r; + + block_err: + return -1; +} + +/* Resolve mate pair cross-references between recs within this slice */ +static int cram_decode_slice_xref(cram_slice *s, int required_fields) { + int rec; + + if (!(required_fields & (SAM_RNEXT | SAM_PNEXT | SAM_TLEN))) { + for (rec = 0; rec < s->hdr->num_records; rec++) { + cram_record *cr = &s->crecs[rec]; + + cr->tlen = 0; + cr->mate_pos = 0; + cr->mate_ref_id = -1; + } + + return 0; + } + + for (rec = 0; rec < s->hdr->num_records; rec++) { + cram_record *cr = &s->crecs[rec]; + + if (cr->mate_line >= 0) { + if (cr->mate_line < s->hdr->num_records) { + /* + * On the first read, loop through computing lengths. + * It's not perfect as we have one slice per reference so we + * cannot detect when TLEN should be zero due to seqs that + * map to multiple references. + * + * We also cannot set tlen correct when it spans a slice for + * other reasons. This may make tlen too small. Should we + * fix this by forcing TLEN to be stored verbatim in such cases? + * + * Or do we just admit defeat and output 0 for tlen? It's the + * safe option... + */ + if (cr->tlen == INT64_MIN) { + int id1 = rec, id2 = rec; + int64_t aleft = cr->apos, aright = cr->aend; + int64_t tlen; + int ref = cr->ref_id; + + // number of segments starting at the same point. + int left_cnt = 0; + + do { + if (aleft > s->crecs[id2].apos) + aleft = s->crecs[id2].apos, left_cnt = 1; + else if (aleft == s->crecs[id2].apos) + left_cnt++; + if (aright < s->crecs[id2].aend) + aright = s->crecs[id2].aend; + if (s->crecs[id2].mate_line == -1) { + s->crecs[id2].mate_line = rec; + break; + } + if (s->crecs[id2].mate_line <= id2 || + s->crecs[id2].mate_line >= s->hdr->num_records) + return -1; + id2 = s->crecs[id2].mate_line; + + if (s->crecs[id2].ref_id != ref) + ref = -1; + } while (id2 != id1); + + if (ref != -1) { + tlen = aright - aleft + 1; + id1 = id2 = rec; + + /* + * When we have two seqs with identical start and + * end coordinates, set +/- tlen based on 1st/last + * bit flags instead, as a tie breaker. + */ + if (s->crecs[id2].apos == aleft) { + if (left_cnt == 1 || + (s->crecs[id2].flags & BAM_FREAD1)) + s->crecs[id2].tlen = tlen; + else + s->crecs[id2].tlen = -tlen; + } else { + s->crecs[id2].tlen = -tlen; + } + + id2 = s->crecs[id2].mate_line; + while (id2 != id1) { + if (s->crecs[id2].apos == aleft) { + if (left_cnt == 1 || + (s->crecs[id2].flags & BAM_FREAD1)) + s->crecs[id2].tlen = tlen; + else + s->crecs[id2].tlen = -tlen; + } else { + s->crecs[id2].tlen = -tlen; + } + id2 = s->crecs[id2].mate_line; + } + } else { + id1 = id2 = rec; + + s->crecs[id2].tlen = 0; + id2 = s->crecs[id2].mate_line; + while (id2 != id1) { + s->crecs[id2].tlen = 0; + id2 = s->crecs[id2].mate_line; + } + } + } + + cr->mate_pos = s->crecs[cr->mate_line].apos; + cr->mate_ref_id = s->crecs[cr->mate_line].ref_id; + + // paired + cr->flags |= BAM_FPAIRED; + + // set mate unmapped if needed + if (s->crecs[cr->mate_line].flags & BAM_FUNMAP) { + cr->flags |= BAM_FMUNMAP; + cr->tlen = 0; + } + if (cr->flags & BAM_FUNMAP) { + cr->tlen = 0; + } + + // set mate reversed if needed + if (s->crecs[cr->mate_line].flags & BAM_FREVERSE) + cr->flags |= BAM_FMREVERSE; + } else { + hts_log_error("Mate line out of bounds: %d vs [0, %d]", + cr->mate_line, s->hdr->num_records-1); + } + + /* FIXME: construct read names here too if needed */ + } else { + if (cr->mate_flags & CRAM_M_REVERSE) { + cr->flags |= BAM_FPAIRED | BAM_FMREVERSE; + } + if (cr->mate_flags & CRAM_M_UNMAP) { + cr->flags |= BAM_FMUNMAP; + //cr->mate_ref_id = -1; + } + if (!(cr->flags & BAM_FPAIRED)) + cr->mate_ref_id = -1; + } + + if (cr->tlen == INT64_MIN) + cr->tlen = 0; // Just incase + } + + for (rec = 0; rec < s->hdr->num_records; rec++) { + cram_record *cr = &s->crecs[rec]; + if (cr->explicit_tlen != INT64_MIN) + cr->tlen = cr->explicit_tlen; + } + + return 0; +} + +static char *md5_print(unsigned char *md5, char *out) { + int i; + for (i = 0; i < 16; i++) { + out[i*2+0] = "0123456789abcdef"[md5[i]>>4]; + out[i*2+1] = "0123456789abcdef"[md5[i]&15]; + } + out[32] = 0; + + return out; +} + +/* + * Utility function to decode tlen (ISIZE), as it's called + * in multiple places. + * + * Returns codec return value (0 on success). + */ +static int cram_decode_tlen(cram_fd *fd, cram_container *c, cram_slice *s, + cram_block *blk, int64_t *tlen) { + int out_sz = 1, r = 0; + + if (!c->comp_hdr->codecs[DS_TS]) return -1; + if (CRAM_MAJOR_VERS(fd->version) < 4) { + int32_t i32; + r |= c->comp_hdr->codecs[DS_TS] + ->decode(s, c->comp_hdr->codecs[DS_TS], blk, + (char *)&i32, &out_sz); + *tlen = i32; + } else { + r |= c->comp_hdr->codecs[DS_TS] + ->decode(s, c->comp_hdr->codecs[DS_TS], blk, + (char *)tlen, &out_sz); + } + return r; +} + +/* + * Decode an entire slice from container blocks. Fills out s->crecs[] array. + * Returns 0 on success + * -1 on failure + */ +int cram_decode_slice(cram_fd *fd, cram_container *c, cram_slice *s, + sam_hdr_t *sh) { + cram_block *blk = s->block[0]; + int32_t bf, ref_id; + unsigned char cf; + int out_sz, r = 0; + int rec; + char *seq = NULL, *qual = NULL; + int unknown_rg = -1; + int embed_ref; + char **refs = NULL; + uint32_t ds; + sam_hrecs_t *bfd = sh->hrecs; + + if (cram_dependent_data_series(fd, c->comp_hdr, s) != 0) + return -1; + + ds = s->data_series; + + blk->bit = 7; // MSB first + + // Study the blocks and estimate approx sizes to preallocate. + // This looks to speed up decoding by around 8-9%. + // We can always shrink back down at the end if we overestimated. + // However it's likely that this also saves memory as own growth + // factor (*=1.5) is never applied. + { + int qsize, nsize, q_id; + cram_decode_estimate_sizes(c->comp_hdr, s, &qsize, &nsize, &q_id); + //fprintf(stderr, "qsize=%d nsize=%d\n", qsize, nsize); + + if (qsize && (ds & CRAM_RL)) BLOCK_RESIZE_EXACT(s->seqs_blk, qsize+1); + if (qsize && (ds & CRAM_RL)) BLOCK_RESIZE_EXACT(s->qual_blk, qsize+1); + if (nsize && (ds & CRAM_NS)) BLOCK_RESIZE_EXACT(s->name_blk, nsize+1); + + // To do - consider using q_id here to usurp the quality block and + // avoid a memcpy during decode. + // Specifically when quality is an external block uniquely used by + // DS_QS only, then we can set s->qual_blk directly to this + // block and save the codec->decode() calls. (Approx 3% cpu saving) + } + + /* Look for unknown RG, added as last by Java CRAM? */ + if (bfd->nrg > 0 && + bfd->rg[bfd->nrg-1].name != NULL && + !strcmp(bfd->rg[bfd->nrg-1].name, "UNKNOWN")) + unknown_rg = bfd->nrg-1; + + if (blk->content_type != CORE) + return -1; + + if (s->crecs) + free(s->crecs); + if (!(s->crecs = malloc(s->hdr->num_records * sizeof(*s->crecs)))) + return -1; + + ref_id = s->hdr->ref_seq_id; + if (CRAM_MAJOR_VERS(fd->version) < 4) + embed_ref = s->hdr->ref_base_id >= 0 ? 1 : 0; + else + embed_ref = s->hdr->ref_base_id > 0 ? 1 : 0; + + if (ref_id >= 0) { + if (embed_ref) { + cram_block *b; + if (s->hdr->ref_base_id < 0) { + hts_log_error("No reference specified and no embedded reference is available" + " at #%d:%"PRId64"-%"PRId64, ref_id, s->hdr->ref_seq_start, + s->hdr->ref_seq_start + s->hdr->ref_seq_span-1); + return -1; + } + b = cram_get_block_by_id(s, s->hdr->ref_base_id); + if (!b) + return -1; + if (cram_uncompress_block(b) != 0) + return -1; + s->ref = (char *)BLOCK_DATA(b); + s->ref_start = s->hdr->ref_seq_start; + s->ref_end = s->hdr->ref_seq_start + s->hdr->ref_seq_span-1; + if (s->hdr->ref_seq_span > b->uncomp_size) { + hts_log_error("Embedded reference is too small at #%d:%"PRIhts_pos"-%"PRIhts_pos, + ref_id, s->ref_start, s->ref_end); + return -1; + } + } else if (!c->comp_hdr->no_ref) { + //// Avoid Java cramtools bug by loading entire reference seq + //s->ref = cram_get_ref(fd, s->hdr->ref_seq_id, 1, 0); + //s->ref_start = 1; + + if (fd->required_fields & SAM_SEQ) { + s->ref = + cram_get_ref(fd, s->hdr->ref_seq_id, + s->hdr->ref_seq_start, + s->hdr->ref_seq_start + s->hdr->ref_seq_span -1); + } + s->ref_start = s->hdr->ref_seq_start; + s->ref_end = s->hdr->ref_seq_start + s->hdr->ref_seq_span-1; + + /* Sanity check */ + if (s->ref_start < 0) { + hts_log_warning("Slice starts before base 1" + " at #%d:%"PRId64"-%"PRId64, ref_id, s->hdr->ref_seq_start, + s->hdr->ref_seq_start + s->hdr->ref_seq_span-1); + s->ref_start = 0; + } + pthread_mutex_lock(&fd->ref_lock); + pthread_mutex_lock(&fd->refs->lock); + if ((fd->required_fields & SAM_SEQ) && + ref_id < fd->refs->nref && fd->refs->ref_id && + s->ref_end > fd->refs->ref_id[ref_id]->length) { + s->ref_end = fd->refs->ref_id[ref_id]->length; + } + pthread_mutex_unlock(&fd->refs->lock); + pthread_mutex_unlock(&fd->ref_lock); + } + } + + if ((fd->required_fields & SAM_SEQ) && + s->ref == NULL && s->hdr->ref_seq_id >= 0 && !c->comp_hdr->no_ref) { + hts_log_error("Unable to fetch reference #%d:%"PRId64"-%"PRId64"\n", + ref_id, s->hdr->ref_seq_start, + s->hdr->ref_seq_start + s->hdr->ref_seq_span-1); + return -1; + } + + if (CRAM_MAJOR_VERS(fd->version) != 1 + && (fd->required_fields & SAM_SEQ) + && s->hdr->ref_seq_id >= 0 + && !fd->ignore_md5 + && memcmp(s->hdr->md5, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 16)) { + hts_md5_context *md5; + unsigned char digest[16]; + + if (s->ref && s->hdr->ref_seq_id >= 0) { + int start, len; + + if (s->hdr->ref_seq_start >= s->ref_start) { + start = s->hdr->ref_seq_start - s->ref_start; + } else { + hts_log_warning("Slice starts before base 1 at #%d:%"PRIhts_pos"-%"PRIhts_pos, + ref_id, s->ref_start, s->ref_end); + start = 0; + } + + if (s->hdr->ref_seq_span <= s->ref_end - s->ref_start + 1) { + len = s->hdr->ref_seq_span; + } else { + hts_log_warning("Slice ends beyond reference end at #%d:%"PRIhts_pos"-%"PRIhts_pos, + ref_id, s->ref_start, s->ref_end); + len = s->ref_end - s->ref_start + 1; + } + + if (!(md5 = hts_md5_init())) + return -1; + if (start + len > s->ref_end - s->ref_start + 1) + len = s->ref_end - s->ref_start + 1 - start; + if (len >= 0) + hts_md5_update(md5, s->ref + start, len); + hts_md5_final(digest, md5); + hts_md5_destroy(md5); + } else if (!s->ref && s->hdr->ref_base_id >= 0) { + cram_block *b = cram_get_block_by_id(s, s->hdr->ref_base_id); + if (b) { + if (!(md5 = hts_md5_init())) + return -1; + hts_md5_update(md5, b->data, b->uncomp_size); + hts_md5_final(digest, md5); + hts_md5_destroy(md5); + } + } + + if (!c->comp_hdr->no_ref && + ((!s->ref && s->hdr->ref_base_id < 0) + || memcmp(digest, s->hdr->md5, 16) != 0)) { + char M[33]; + const char *rname = sam_hdr_tid2name(sh, ref_id); + if (!rname) rname="?"; // cannot happen normally + hts_log_error("MD5 checksum reference mismatch at %s:%"PRIhts_pos"-%"PRIhts_pos, + rname, s->ref_start, s->ref_end); + hts_log_error("CRAM : %s", md5_print(s->hdr->md5, M)); + hts_log_error("Ref : %s", md5_print(digest, M)); + kstring_t ks = KS_INITIALIZE; + if (sam_hdr_find_tag_id(sh, "SQ", "SN", rname, "M5", &ks) == 0) + hts_log_error("@SQ M5: %s", ks.s); + hts_log_error("Please check the reference given is correct"); + ks_free(&ks); + return -1; + } + } + + if (ref_id == -2) { + pthread_mutex_lock(&fd->ref_lock); + pthread_mutex_lock(&fd->refs->lock); + refs = calloc(fd->refs->nref, sizeof(char *)); + pthread_mutex_unlock(&fd->refs->lock); + pthread_mutex_unlock(&fd->ref_lock); + if (!refs) + return -1; + } + + int last_ref_id = -9; // Arbitrary -ve marker for not-yet-set + for (rec = 0; rec < s->hdr->num_records; rec++) { + cram_record *cr = &s->crecs[rec]; + int has_MD, has_NM; + + //fprintf(stderr, "Decode seq %d, %d/%d\n", rec, blk->byte, blk->bit); + + cr->s = s; + + out_sz = 1; /* decode 1 item */ + if (ds & CRAM_BF) { + if (!c->comp_hdr->codecs[DS_BF]) goto block_err; + r |= c->comp_hdr->codecs[DS_BF] + ->decode(s, c->comp_hdr->codecs[DS_BF], blk, + (char *)&bf, &out_sz); + if (r || bf < 0 || + bf >= sizeof(fd->bam_flag_swap)/sizeof(*fd->bam_flag_swap)) + goto block_err; + bf = fd->bam_flag_swap[bf]; + cr->flags = bf; + } else { + cr->flags = bf = 0x4; // unmapped + } + + if (ds & CRAM_CF) { + if (CRAM_MAJOR_VERS(fd->version) == 1) { + /* CF is byte in 1.0, int32 in 2.0 */ + if (!c->comp_hdr->codecs[DS_CF]) goto block_err; + r |= c->comp_hdr->codecs[DS_CF] + ->decode(s, c->comp_hdr->codecs[DS_CF], blk, + (char *)&cf, &out_sz); + if (r) goto block_err; + cr->cram_flags = cf; + } else { + if (!c->comp_hdr->codecs[DS_CF]) goto block_err; + r |= c->comp_hdr->codecs[DS_CF] + ->decode(s, c->comp_hdr->codecs[DS_CF], blk, + (char *)&cr->cram_flags, &out_sz); + if (r) goto block_err; + cf = cr->cram_flags; + } + } else { + cf = cr->cram_flags = 0; + } + + if (CRAM_MAJOR_VERS(fd->version) != 1 && ref_id == -2) { + if (ds & CRAM_RI) { + if (!c->comp_hdr->codecs[DS_RI]) goto block_err; + r |= c->comp_hdr->codecs[DS_RI] + ->decode(s, c->comp_hdr->codecs[DS_RI], blk, + (char *)&cr->ref_id, &out_sz); + if (r) goto block_err; + if ((fd->required_fields & (SAM_SEQ|SAM_TLEN)) + && cr->ref_id >= 0 + && cr->ref_id != last_ref_id) { + if (!c->comp_hdr->no_ref) { + // Range(fd): seq >= 0, unmapped -1, unspecified -2 + // Slice(s): seq >= 0, unmapped -1, multiple refs -2 + // Record(cr): seq >= 0, unmapped -1 + pthread_mutex_lock(&fd->range_lock); + int need_ref = (fd->range.refid == -2 || cr->ref_id == fd->range.refid); + pthread_mutex_unlock(&fd->range_lock); + if (need_ref) { + if (!refs[cr->ref_id]) + refs[cr->ref_id] = cram_get_ref(fd, cr->ref_id, 1, 0); + if (!(s->ref = refs[cr->ref_id])) + goto block_err; + } else { + // For multi-ref containers, we don't need to fetch all + // refs if we're only querying one. + s->ref = NULL; + } + + pthread_mutex_lock(&fd->range_lock); + int discard_last_ref = (last_ref_id >= 0 && + refs[last_ref_id] && + (fd->range.refid == -2 || + last_ref_id == fd->range.refid)); + pthread_mutex_unlock(&fd->range_lock); + if (discard_last_ref) { + pthread_mutex_lock(&fd->ref_lock); + discard_last_ref = !fd->unsorted; + pthread_mutex_unlock(&fd->ref_lock); + } + if (discard_last_ref) { + cram_ref_decr(fd->refs, last_ref_id); + refs[last_ref_id] = NULL; + } + } + s->ref_start = 1; + pthread_mutex_lock(&fd->ref_lock); + pthread_mutex_lock(&fd->refs->lock); + s->ref_end = fd->refs->ref_id[cr->ref_id]->length; + pthread_mutex_unlock(&fd->refs->lock); + pthread_mutex_unlock(&fd->ref_lock); + + last_ref_id = cr->ref_id; + } + } else { + cr->ref_id = -1; + } + } else { + cr->ref_id = ref_id; // Forced constant in CRAM 1.0 + } + if (cr->ref_id < -1 || cr->ref_id >= bfd->nref) { + hts_log_error("Requested unknown reference ID %d", cr->ref_id); + goto block_err; + } + + if (ds & CRAM_RL) { + if (!c->comp_hdr->codecs[DS_RL]) goto block_err; + r |= c->comp_hdr->codecs[DS_RL] + ->decode(s, c->comp_hdr->codecs[DS_RL], blk, + (char *)&cr->len, &out_sz); + if (r) goto block_err; + if (cr->len < 0) { + hts_log_error("Read has negative length"); + goto block_err; + } + } + + if (ds & CRAM_AP) { + if (!c->comp_hdr->codecs[DS_AP]) goto block_err; + if (CRAM_MAJOR_VERS(fd->version) >= 4) { + r |= c->comp_hdr->codecs[DS_AP] + ->decode(s, c->comp_hdr->codecs[DS_AP], blk, + (char *)&cr->apos, &out_sz); + } else { + int32_t i32; + r |= c->comp_hdr->codecs[DS_AP] + ->decode(s, c->comp_hdr->codecs[DS_AP], blk, + (char *)&i32, &out_sz); + cr->apos = i32; + } + if (r) goto block_err;; + if (c->comp_hdr->AP_delta) { + if (cr->apos < 0 && c->unsorted == 0) { + // cache locally in c->unsorted so we don't have an + // excessive number of locks + pthread_mutex_lock(&fd->ref_lock); + c->unsorted = fd->unsorted = 1; + pthread_mutex_unlock(&fd->ref_lock); + } + cr->apos += s->last_apos; + } + s->last_apos= cr->apos; + } else { + cr->apos = c->ref_seq_start; + } + + if (ds & CRAM_RG) { + if (!c->comp_hdr->codecs[DS_RG]) goto block_err; + r |= c->comp_hdr->codecs[DS_RG] + ->decode(s, c->comp_hdr->codecs[DS_RG], blk, + (char *)&cr->rg, &out_sz); + if (r) goto block_err; + if (cr->rg == unknown_rg) + cr->rg = -1; + } else { + cr->rg = -1; + } + + cr->name_len = 0; + + if (c->comp_hdr->read_names_included) { + int32_t out_sz2 = 1; + + // Read directly into name cram_block + cr->name = BLOCK_SIZE(s->name_blk); + if (ds & CRAM_RN) { + if (!c->comp_hdr->codecs[DS_RN]) goto block_err; + r |= c->comp_hdr->codecs[DS_RN] + ->decode(s, c->comp_hdr->codecs[DS_RN], blk, + (char *)s->name_blk, &out_sz2); + if (r) goto block_err; + cr->name_len = out_sz2; + } + } + + cr->mate_pos = 0; + cr->mate_line = -1; + cr->mate_ref_id = -1; + cr->explicit_tlen = INT64_MIN; + if ((ds & CRAM_CF) && (cf & CRAM_FLAG_DETACHED)) { + if (ds & CRAM_MF) { + if (CRAM_MAJOR_VERS(fd->version) == 1) { + /* MF is byte in 1.0, int32 in 2.0 */ + unsigned char mf; + if (!c->comp_hdr->codecs[DS_MF]) goto block_err; + r |= c->comp_hdr->codecs[DS_MF] + ->decode(s, c->comp_hdr->codecs[DS_MF], + blk, (char *)&mf, &out_sz); + if (r) goto block_err; + cr->mate_flags = mf; + } else { + if (!c->comp_hdr->codecs[DS_MF]) goto block_err; + r |= c->comp_hdr->codecs[DS_MF] + ->decode(s, c->comp_hdr->codecs[DS_MF], + blk, + (char *)&cr->mate_flags, + &out_sz); + if (r) goto block_err; + } + } else { + cr->mate_flags = 0; + } + + if (!c->comp_hdr->read_names_included) { + int32_t out_sz2 = 1; + + // Read directly into name cram_block + cr->name = BLOCK_SIZE(s->name_blk); + if (ds & CRAM_RN) { + if (!c->comp_hdr->codecs[DS_RN]) goto block_err; + r |= c->comp_hdr->codecs[DS_RN] + ->decode(s, c->comp_hdr->codecs[DS_RN], + blk, (char *)s->name_blk, + &out_sz2); + if (r) goto block_err; + cr->name_len = out_sz2; + } + } + + if (ds & CRAM_NS) { + if (!c->comp_hdr->codecs[DS_NS]) goto block_err; + r |= c->comp_hdr->codecs[DS_NS] + ->decode(s, c->comp_hdr->codecs[DS_NS], blk, + (char *)&cr->mate_ref_id, &out_sz); + if (r) goto block_err; + } + + // Skip as mate_ref of "*" is legit. It doesn't mean unmapped, just unknown. + // if (cr->mate_ref_id == -1 && cr->flags & 0x01) { + // /* Paired, but unmapped */ + // cr->flags |= BAM_FMUNMAP; + // } + + if (ds & CRAM_NP) { + if (!c->comp_hdr->codecs[DS_NP]) goto block_err;; + if (CRAM_MAJOR_VERS(fd->version) < 4) { + int32_t i32; + r |= c->comp_hdr->codecs[DS_NP] + ->decode(s, c->comp_hdr->codecs[DS_NP], blk, + (char *)&i32, &out_sz); + cr->mate_pos = i32; + } else { + r |= c->comp_hdr->codecs[DS_NP] + ->decode(s, c->comp_hdr->codecs[DS_NP], blk, + (char *)&cr->mate_pos, &out_sz); + } + if (r) goto block_err; + } + + if (ds & CRAM_TS) { + if (!c->comp_hdr->codecs[DS_TS]) goto block_err; + r = cram_decode_tlen(fd, c, s, blk, &cr->tlen); + if (r) goto block_err; + } else { + cr->tlen = INT64_MIN; + } + } else if ((ds & CRAM_CF) && (cf & CRAM_FLAG_MATE_DOWNSTREAM)) { + // else not detached + if (ds & CRAM_NF) { + if (!c->comp_hdr->codecs[DS_NF]) goto block_err; + r |= c->comp_hdr->codecs[DS_NF] + ->decode(s, c->comp_hdr->codecs[DS_NF], blk, + (char *)&cr->mate_line, &out_sz); + if (r) goto block_err; + cr->mate_line += rec + 1; + + //cr->name_len = sprintf(name, "%d", name_id++); + //cr->name = DSTRING_LEN(name_ds); + //dstring_nappend(name_ds, name, cr->name_len); + + cr->mate_ref_id = -1; + cr->tlen = INT64_MIN; + cr->mate_pos = 0; + } else { + cr->mate_flags = 0; + cr->tlen = INT64_MIN; + } + if ((ds & CRAM_CF) && (cf & CRAM_FLAG_EXPLICIT_TLEN)) { + if (ds & CRAM_TS) { + r = cram_decode_tlen(fd, c, s, blk, &cr->explicit_tlen); + if (r) return r; + } else { + cr->mate_flags = 0; + cr->tlen = INT64_MIN; + } + } + } else if ((ds & CRAM_CF) && (cf & CRAM_FLAG_EXPLICIT_TLEN)) { + if (ds & CRAM_TS) { + r = cram_decode_tlen(fd, c, s, blk, &cr->explicit_tlen); + if (r) return r; + } else { + cr->mate_flags = 0; + cr->tlen = INT64_MIN; + } + } else { + cr->mate_flags = 0; + cr->tlen = INT64_MIN; + } + /* + else if (!name[0]) { + //name[0] = '?'; name[1] = 0; + //cr->name_len = 1; + //cr->name= DSTRING_LEN(s->name_ds); + //dstring_nappend(s->name_ds, "?", 1); + + cr->mate_ref_id = -1; + cr->tlen = 0; + cr->mate_pos = 0; + } + */ + + /* Auxiliary tags */ + has_MD = has_NM = 0; + if (CRAM_MAJOR_VERS(fd->version) == 1) + r |= cram_decode_aux_1_0(c, s, blk, cr); + else + r |= cram_decode_aux(fd, c, s, blk, cr, &has_MD, &has_NM); + if (r) goto block_err; + + /* Fake up dynamic string growth and appending */ + if (ds & CRAM_RL) { + cr->seq = BLOCK_SIZE(s->seqs_blk); + BLOCK_GROW(s->seqs_blk, cr->len); + seq = (char *)BLOCK_END(s->seqs_blk); + BLOCK_SIZE(s->seqs_blk) += cr->len; + + if (!seq) + goto block_err; + + cr->qual = BLOCK_SIZE(s->qual_blk); + BLOCK_GROW(s->qual_blk, cr->len); + qual = (char *)BLOCK_END(s->qual_blk); + BLOCK_SIZE(s->qual_blk) += cr->len; + + if (!s->ref) + memset(seq, '=', cr->len); + } + + if (!(bf & BAM_FUNMAP)) { + if ((ds & CRAM_AP) && cr->apos <= 0) { + hts_log_error("Read has alignment position %"PRId64 + " but no unmapped flag", + cr->apos); + goto block_err; + } + /* Decode sequence and generate CIGAR */ + if (ds & (CRAM_SEQ | CRAM_MQ)) { + r |= cram_decode_seq(fd, c, s, blk, cr, sh, cf, seq, qual, + has_MD, has_NM); + if (r) goto block_err; + } else { + cr->cigar = 0; + cr->ncigar = 0; + cr->aend = cr->apos; + cr->mqual = 0; + } + } else { + int out_sz2 = cr->len; + + //puts("Unmapped"); + cr->cigar = 0; + cr->ncigar = 0; + cr->aend = cr->apos; + cr->mqual = 0; + + if (ds & CRAM_BA && cr->len) { + if (!c->comp_hdr->codecs[DS_BA]) goto block_err; + r |= c->comp_hdr->codecs[DS_BA] + ->decode(s, c->comp_hdr->codecs[DS_BA], blk, + (char *)seq, &out_sz2); + if (r) goto block_err; + } + + if ((ds & CRAM_CF) && (cf & CRAM_FLAG_PRESERVE_QUAL_SCORES)) { + out_sz2 = cr->len; + if (ds & CRAM_QS && cr->len >= 0) { + if (!c->comp_hdr->codecs[DS_QS]) goto block_err; + r |= c->comp_hdr->codecs[DS_QS] + ->decode(s, c->comp_hdr->codecs[DS_QS], + blk, qual, &out_sz2); + if (r) goto block_err; + } + } else { + if (ds & CRAM_RL) + memset(qual, 255, cr->len); + } + } + + if (!c->comp_hdr->qs_seq_orient && (ds & CRAM_QS) && (cr->flags & BAM_FREVERSE)) { + int i, j; + for (i = 0, j = cr->len-1; i < j; i++, j--) { + unsigned char c; + c = qual[i]; + qual[i] = qual[j]; + qual[j] = c; + } + } + } + + pthread_mutex_lock(&fd->ref_lock); + if (refs) { + int i; + for (i = 0; i < fd->refs->nref; i++) { + if (refs[i]) + cram_ref_decr(fd->refs, i); + } + free(refs); + refs = NULL; + } else if (ref_id >= 0 && s->ref != fd->ref_free && !embed_ref) { + cram_ref_decr(fd->refs, ref_id); + } + pthread_mutex_unlock(&fd->ref_lock); + + /* Resolve mate pair cross-references between recs within this slice */ + r |= cram_decode_slice_xref(s, fd->required_fields); + + // Free the original blocks as we no longer need these. + { + int i; + for (i = 0; i < s->hdr->num_blocks; i++) { + cram_block *b = s->block[i]; + cram_free_block(b); + s->block[i] = NULL; + } + } + + // Also see initial BLOCK_RESIZE_EXACT at top of function. + // As we grow blocks we overallocate by up to 50%. So shrink + // back to their final sizes here. + // + //fprintf(stderr, "%d %d // %d %d // %d %d // %d %d\n", + // (int)s->seqs_blk->byte, (int)s->seqs_blk->alloc, + // (int)s->qual_blk->byte, (int)s->qual_blk->alloc, + // (int)s->name_blk->byte, (int)s->name_blk->alloc, + // (int)s->aux_blk->byte, (int)s->aux_blk->alloc); + BLOCK_RESIZE_EXACT(s->seqs_blk, BLOCK_SIZE(s->seqs_blk)+1); + BLOCK_RESIZE_EXACT(s->qual_blk, BLOCK_SIZE(s->qual_blk)+1); + BLOCK_RESIZE_EXACT(s->name_blk, BLOCK_SIZE(s->name_blk)+1); + BLOCK_RESIZE_EXACT(s->aux_blk, BLOCK_SIZE(s->aux_blk)+1); + + return r; + + block_err: + if (refs) { + int i; + pthread_mutex_lock(&fd->ref_lock); + for (i = 0; i < fd->refs->nref; i++) { + if (refs[i]) + cram_ref_decr(fd->refs, i); + } + free(refs); + pthread_mutex_unlock(&fd->ref_lock); + } + + return -1; +} + +typedef struct { + cram_fd *fd; + cram_container *c; + cram_slice *s; + sam_hdr_t *h; + int exit_code; +} cram_decode_job; + +void *cram_decode_slice_thread(void *arg) { + cram_decode_job *j = (cram_decode_job *)arg; + + j->exit_code = cram_decode_slice(j->fd, j->c, j->s, j->h); + + return j; +} + +/* + * Spawn a multi-threaded version of cram_decode_slice(). + */ +int cram_decode_slice_mt(cram_fd *fd, cram_container *c, cram_slice *s, + sam_hdr_t *bfd) { + cram_decode_job *j; + int nonblock; + + if (!fd->pool) + return cram_decode_slice(fd, c, s, bfd); + + if (!(j = malloc(sizeof(*j)))) + return -1; + + j->fd = fd; + j->c = c; + j->s = s; + j->h = bfd; + + nonblock = hts_tpool_process_sz(fd->rqueue) ? 1 : 0; + + int saved_errno = errno; + errno = 0; + if (-1 == hts_tpool_dispatch2(fd->pool, fd->rqueue, cram_decode_slice_thread, + j, nonblock)) { + /* Would block */ + if (errno != EAGAIN) + return -1; + fd->job_pending = j; + } else { + fd->job_pending = NULL; + } + errno = saved_errno; + + // flush too + return 0; +} + + +/* ---------------------------------------------------------------------- + * CRAM sequence iterators. + */ + +/* + * Converts a cram in-memory record into a bam in-memory record. We + * pass a pointer to a bam_seq_t pointer along with the a pointer to + * the allocated size. These can initially be pointers to NULL and zero. + * + * This function will reallocate the bam buffer as required and update + * (*bam)->alloc accordingly, allowing it to be used within a loop + * efficiently without needing to allocate new bam objects over and + * over again. + * + * Returns the used size of the bam record on success + * -1 on failure. + */ +int cram_to_bam(sam_hdr_t *sh, cram_fd *fd, cram_slice *s, + cram_record *cr, int rec, bam_seq_t **bam) { + int ret, rg_len; + char name_a[1024], *name; + int name_len; + char *aux; + char *seq, *qual; + sam_hrecs_t *bfd = sh->hrecs; + + /* Assign names if not explicitly set */ + if (fd->required_fields & SAM_QNAME) { + if (cr->name_len) { + name = (char *)BLOCK_DATA(s->name_blk) + cr->name; + name_len = cr->name_len; + } else { + name = name_a; + if (cr->mate_line >= 0 && cr->mate_line < s->max_rec && + s->crecs[cr->mate_line].name_len > 0) { + // Copy our mate if non-zero. + memcpy(name_a, BLOCK_DATA(s->name_blk)+s->crecs[cr->mate_line].name, + s->crecs[cr->mate_line].name_len); + name = name_a + s->crecs[cr->mate_line].name_len; + } else { + // Otherwise generate a name based on prefix + name_len = strlen(fd->prefix); + memcpy(name, fd->prefix, name_len); + name += name_len; + *name++ = ':'; + if (cr->mate_line >= 0 && cr->mate_line < rec) { + name = (char *)append_uint64((unsigned char *)name, + s->hdr->record_counter + + cr->mate_line + 1); + } else { + name = (char *)append_uint64((unsigned char *)name, + s->hdr->record_counter + + rec + 1); + } + } + name_len = name - name_a; + name = name_a; + } + } else { + name = "?"; + name_len = 1; + } + + /* Generate BAM record */ + if (cr->rg < -1 || cr->rg >= bfd->nrg) + return -1; + rg_len = (cr->rg != -1) ? bfd->rg[cr->rg].name_len + 4 : 0; + + if (fd->required_fields & (SAM_SEQ | SAM_QUAL)) { + if (!BLOCK_DATA(s->seqs_blk)) + return -1; + seq = (char *)BLOCK_DATA(s->seqs_blk) + cr->seq; + } else { + seq = "*"; + cr->len = 0; + } + + if (fd->required_fields & SAM_QUAL) { + if (!BLOCK_DATA(s->qual_blk)) + return -1; + qual = (char *)BLOCK_DATA(s->qual_blk) + cr->qual; + } else { + qual = NULL; + } + + ret = bam_set1(*bam, + name_len, name, + cr->flags, cr->ref_id, cr->apos - 1, cr->mqual, + cr->ncigar, &s->cigar[cr->cigar], + cr->mate_ref_id, cr->mate_pos - 1, cr->tlen, + cr->len, seq, qual, + cr->aux_size + rg_len); + if (ret < 0) { + return ret; + } + + aux = (char *)bam_aux(*bam); + + /* Auxiliary strings */ + if (cr->aux_size != 0) { + memcpy(aux, BLOCK_DATA(s->aux_blk) + cr->aux, cr->aux_size); + aux += cr->aux_size; + (*bam)->l_data += cr->aux_size; + } + + /* RG:Z: */ + if (rg_len > 0) { + *aux++ = 'R'; *aux++ = 'G'; *aux++ = 'Z'; + int len = bfd->rg[cr->rg].name_len; + memcpy(aux, bfd->rg[cr->rg].name, len); + aux += len; + *aux++ = 0; + (*bam)->l_data += rg_len; + } + + return (*bam)->l_data; +} + +/* + * Here be dragons! The multi-threading code in this is crufty beyond belief. + */ + +/* + * Load first container. + * Called when fd->ctr is NULL> + * + * Returns container on success + * NULL on failure. + */ +static cram_container *cram_first_slice(cram_fd *fd) { + cram_container *c; + + do { + if (fd->ctr) + cram_free_container(fd->ctr); + + if (!(c = fd->ctr = cram_read_container(fd))) + return NULL; + c->curr_slice_mt = c->curr_slice; + } while (c->length == 0); + + /* + * The first container may be a result of a sub-range query. + * In which case it may still not be the optimal starting point + * due to skipped containers/slices in the index. + */ + // No need for locks here as we're in the main thread. + if (fd->range.refid != -2) { + while (c->ref_seq_id != -2 && + (c->ref_seq_id < fd->range.refid || + (fd->range.refid >= 0 && c->ref_seq_id == fd->range.refid + && c->ref_seq_start + c->ref_seq_span-1 < fd->range.start))) { + if (0 != cram_seek(fd, c->length, SEEK_CUR)) + return NULL; + cram_free_container(fd->ctr); + do { + if (!(c = fd->ctr = cram_read_container(fd))) + return NULL; + } while (c->length == 0); + } + + if (c->ref_seq_id != -2 && c->ref_seq_id != fd->range.refid) { + fd->eof = 1; + return NULL; + } + } + + if (!(c->comp_hdr_block = cram_read_block(fd))) + return NULL; + if (c->comp_hdr_block->content_type != COMPRESSION_HEADER) + return NULL; + + c->comp_hdr = cram_decode_compression_header(fd, c->comp_hdr_block); + if (!c->comp_hdr) + return NULL; + if (!c->comp_hdr->AP_delta && + sam_hrecs_sort_order(fd->header->hrecs) != ORDER_COORD) { + pthread_mutex_lock(&fd->ref_lock); + fd->unsorted = 1; + pthread_mutex_unlock(&fd->ref_lock); + } + + return c; +} + +cram_slice *cram_next_slice(cram_fd *fd, cram_container **cp) { + cram_container *c_curr; // container being consumed via cram_get_seq() + cram_slice *s_curr = NULL; + + // Populate the first container if unknown. + if (!(c_curr = fd->ctr)) { + if (!(c_curr = cram_first_slice(fd))) + return NULL; + } + + // Discard previous slice + if ((s_curr = c_curr->slice)) { + c_curr->slice = NULL; + cram_free_slice(s_curr); + s_curr = NULL; + } + + // If we've consumed all slices in this container, also discard + // the container too. + if (c_curr->curr_slice == c_curr->max_slice) { + if (fd->ctr == c_curr) + fd->ctr = NULL; + if (fd->ctr_mt == c_curr) + fd->ctr_mt = NULL; + cram_free_container(c_curr); + c_curr = NULL; + } + + if (!fd->ctr_mt) + fd->ctr_mt = c_curr; + + // Fetch the next slice (and the container if necessary). + // + // If single threaded this loop bails out as soon as it finds + // a slice in range. In this case c_next and c_curr end up being + // the same thing. + // + // If multi-threaded, we loop until we have filled out + // thread pool input queue. Here c_next and c_curr *may* differ, as + // can fd->ctr and fd->ctr_mt. + for (;;) { + cram_container *c_next = fd->ctr_mt; + cram_slice *s_next = NULL; + + // Next slice; either from the last job we failed to push + // to the input queue or via more I/O. + if (fd->job_pending) { + cram_decode_job *j = (cram_decode_job *)fd->job_pending; + c_next = j->c; + s_next = j->s; + free(fd->job_pending); + fd->job_pending = NULL; + } else if (!fd->ooc) { + empty_container: + if (!c_next || c_next->curr_slice_mt == c_next->max_slice) { + // new container + for(;;) { + if (!(c_next = cram_read_container(fd))) { + if (fd->pool) { + fd->ooc = 1; + break; + } + + return NULL; + } + c_next->curr_slice_mt = c_next->curr_slice; + + if (c_next->length != 0) + break; + + cram_free_container(c_next); + } + if (fd->ooc) + break; + + /* Skip containers not yet spanning our range */ + if (fd->range.refid != -2 && c_next->ref_seq_id != -2) { + // ref_id beyond end of range; bail out + if (c_next->ref_seq_id != fd->range.refid) { + cram_free_container(c_next); + fd->ctr_mt = NULL; + fd->ooc = 1; + break; + } + + // position beyond end of range; bail out + if (fd->range.refid != -1 && + c_next->ref_seq_start > fd->range.end) { + cram_free_container(c_next); + fd->ctr_mt = NULL; + fd->ooc = 1; + break; + } + + // before start of range; skip to next container + if (fd->range.refid != -1 && + c_next->ref_seq_start + c_next->ref_seq_span-1 < + fd->range.start) { + c_next->curr_slice_mt = c_next->max_slice; + cram_seek(fd, c_next->length, SEEK_CUR); + cram_free_container(c_next); + c_next = NULL; + continue; + } + } + + // Container is valid range, so remember it for restarting + // this function. + fd->ctr_mt = c_next; + + if (!(c_next->comp_hdr_block = cram_read_block(fd))) + return NULL; + if (c_next->comp_hdr_block->content_type != COMPRESSION_HEADER) + return NULL; + + c_next->comp_hdr = + cram_decode_compression_header(fd, c_next->comp_hdr_block); + if (!c_next->comp_hdr) + return NULL; + + if (!c_next->comp_hdr->AP_delta && + sam_hrecs_sort_order(fd->header->hrecs) != ORDER_COORD) { + pthread_mutex_lock(&fd->ref_lock); + fd->unsorted = 1; + pthread_mutex_unlock(&fd->ref_lock); + } + } + + if (c_next->num_records == 0) { + if (fd->ctr == c_next) + fd->ctr = NULL; + if (c_curr == c_next) + c_curr = NULL; + if (fd->ctr_mt == c_next) + fd->ctr_mt = NULL; + cram_free_container(c_next); + c_next = NULL; + goto empty_container; + } + + if (!(s_next = c_next->slice = cram_read_slice(fd))) + return NULL; + + s_next->slice_num = ++c_next->curr_slice_mt; + s_next->curr_rec = 0; + s_next->max_rec = s_next->hdr->num_records; + + s_next->last_apos = s_next->hdr->ref_seq_start; + + // We know the container overlaps our range, but with multi-slice + // containers we may have slices that do not. Skip these also. + if (fd->range.refid != -2 && s_next->hdr->ref_seq_id != -2) { + // ref_id beyond end of range; bail out + if (s_next->hdr->ref_seq_id != fd->range.refid) { + fd->ooc = 1; + cram_free_slice(s_next); + c_next->slice = s_next = NULL; + break; + } + + // position beyond end of range; bail out + if (fd->range.refid != -1 && + s_next->hdr->ref_seq_start > fd->range.end) { + fd->ooc = 1; + cram_free_slice(s_next); + c_next->slice = s_next = NULL; + break; + } + + // before start of range; skip to next slice + if (fd->range.refid != -1 && + s_next->hdr->ref_seq_start + s_next->hdr->ref_seq_span-1 < + fd->range.start) { + cram_free_slice(s_next); + c_next->slice = s_next = NULL; + continue; + } + } + } // end: if (!fd->ooc) + + if (!c_next || !s_next) + break; + + // Decode the slice, either right now (non-threaded) or by pushing + // it to the a decode queue (threaded). + if (cram_decode_slice_mt(fd, c_next, s_next, fd->header) != 0) { + hts_log_error("Failure to decode slice"); + cram_free_slice(s_next); + c_next->slice = NULL; + return NULL; + } + + // No thread pool, so don't loop again + if (!fd->pool) { + c_curr = c_next; + s_curr = s_next; + break; + } + + // With thread pool, but we have a job pending so our decode queue + // is full. + if (fd->job_pending) + break; + + // Otherwise we're threaded with room in the decode input queue, so + // keep reading slices for decode. + // Push it a bit far, to qsize in queue rather than pending arrival, + // as cram tends to be a bit bursty in decode timings. + if (hts_tpool_process_len(fd->rqueue) > + hts_tpool_process_qsize(fd->rqueue)) + break; + } // end of for(;;) + + + // When not threaded we've already have c_curr and s_curr. + // Otherwise we need get them by pulling off the decode output queue. + if (fd->pool) { + hts_tpool_result *res; + cram_decode_job *j; + + if (fd->ooc && hts_tpool_process_empty(fd->rqueue)) { + fd->eof = 1; + return NULL; + } + + res = hts_tpool_next_result_wait(fd->rqueue); + + if (!res || !hts_tpool_result_data(res)) { + hts_log_error("Call to hts_tpool_next_result failed"); + return NULL; + } + + j = (cram_decode_job *)hts_tpool_result_data(res); + c_curr = j->c; + s_curr = j->s; + + if (j->exit_code != 0) { + hts_log_error("Slice decode failure"); + fd->eof = 0; + hts_tpool_delete_result(res, 1); + return NULL; + } + + hts_tpool_delete_result(res, 1); + } + + *cp = c_curr; + + // Update current slice being processed (as opposed to current + // slice in the multi-threaded reahead. + fd->ctr = c_curr; + if (c_curr) { + c_curr->slice = s_curr; + if (s_curr) + c_curr->curr_slice = s_curr->slice_num; + } + if (s_curr) + s_curr->curr_rec = 0; + else + fd->eof = 1; + + return s_curr; +} + +/* + * Read the next cram record and return it. + * Note that to decode cram_record the caller will need to look up some data + * in the current slice, pointed to by fd->ctr->slice. This is valid until + * the next call to cram_get_seq (which may invalidate it). + * + * Returns record pointer on success (do not free) + * NULL on failure + */ +cram_record *cram_get_seq(cram_fd *fd) { + cram_container *c; + cram_slice *s; + + for (;;) { + c = fd->ctr; + if (c && c->slice && c->slice->curr_rec < c->slice->max_rec) { + s = c->slice; + } else { + if (!(s = cram_next_slice(fd, &c))) + return NULL; + continue; /* In case slice contains no records */ + } + + // No need to lock here as get_seq is running in the main thread, + // which is also the same one that does the range modifications. + if (fd->range.refid != -2) { + if (fd->range.refid == -1 && s->crecs[s->curr_rec].ref_id != -1) { + // Special case when looking for unmapped blocks at end. + // If these are mixed in with mapped data (c->ref_id == -2) + // then we need skip until we find the unmapped data, if at all + s->curr_rec++; + continue; + } + if (s->crecs[s->curr_rec].ref_id < fd->range.refid && + s->crecs[s->curr_rec].ref_id != -1) { + // Looking for a mapped read, but not there yet. Special case + // as -1 (unmapped) shouldn't be considered < refid. + s->curr_rec++; + continue; + } + + if (s->crecs[s->curr_rec].ref_id != fd->range.refid) { + fd->eof = 1; + cram_free_slice(s); + c->slice = NULL; + return NULL; + } + + if (fd->range.refid != -1 && s->crecs[s->curr_rec].apos > fd->range.end) { + fd->eof = 1; + cram_free_slice(s); + c->slice = NULL; + return NULL; + } + + if (fd->range.refid != -1 && s->crecs[s->curr_rec].aend < fd->range.start) { + s->curr_rec++; + continue; + } + } + + break; + } + + fd->ctr = c; + c->slice = s; + return &s->crecs[s->curr_rec++]; +} + +/* + * Read the next cram record and convert it to a bam_seq_t struct. + * + * Returns >= 0 success (number of bytes written to *bam) + * -1 on EOF or failure (check fd->err) + */ +int cram_get_bam_seq(cram_fd *fd, bam_seq_t **bam) { + cram_record *cr; + cram_container *c; + cram_slice *s; + + if (!(cr = cram_get_seq(fd))) + return -1; + + c = fd->ctr; + s = c->slice; + + return cram_to_bam(fd->header, fd, s, cr, s->curr_rec-1, bam); +} + +/* + * Drains and frees the decode read-queue for a multi-threaded reader. + */ +void cram_drain_rqueue(cram_fd *fd) { + cram_container *lc = NULL; + + if (!fd->pool || !fd->rqueue) + return; + + // drain queue of any in-flight decode jobs + while (!hts_tpool_process_empty(fd->rqueue)) { + hts_tpool_result *r = hts_tpool_next_result_wait(fd->rqueue); + if (!r) + break; + cram_decode_job *j = (cram_decode_job *)hts_tpool_result_data(r); + if (j->c->slice == j->s) + j->c->slice = NULL; + if (j->c != lc) { + if (lc) { + if (fd->ctr == lc) + fd->ctr = NULL; + if (fd->ctr_mt == lc) + fd->ctr_mt = NULL; + cram_free_container(lc); + } + lc = j->c; + } + cram_free_slice(j->s); + hts_tpool_delete_result(r, 1); + } + + // Also tidy up any pending decode job that we didn't submit to the workers + // due to the input queue being full. + if (fd->job_pending) { + cram_decode_job *j = (cram_decode_job *)fd->job_pending; + if (j->c->slice == j->s) + j->c->slice = NULL; + if (j->c != lc) { + if (lc) { + if (fd->ctr == lc) + fd->ctr = NULL; + if (fd->ctr_mt == lc) + fd->ctr_mt = NULL; + cram_free_container(lc); + } + lc = j->c; + } + cram_free_slice(j->s); + free(j); + fd->job_pending = NULL; + } + + if (lc) { + if (fd->ctr == lc) + fd->ctr = NULL; + if (fd->ctr_mt == lc) + fd->ctr_mt = NULL; + cram_free_container(lc); + } +} diff --git a/src/htslib-1.21/cram/cram_decode.h b/src/htslib-1.21/cram/cram_decode.h new file mode 100644 index 0000000..16d87a0 --- /dev/null +++ b/src/htslib-1.21/cram/cram_decode.h @@ -0,0 +1,142 @@ +/* +Copyright (c) 2012-2013, 2018, 2024 Genome Research Ltd. +Author: James Bonfield + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +Institute nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/*! \file + * Include cram.h instead. + * + * This is an internal part of the CRAM system and is automatically included + * when you #include cram.h. + * + * Implements the decoding portion of CRAM I/O. Also see + * cram_codecs.[ch] for the actual encoding functions themselves. + */ + +#ifndef CRAM_DECODE_H +#define CRAM_DECODE_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* ---------------------------------------------------------------------- + * CRAM sequence iterators. + */ + +/*! Read the next cram record and return it as a cram_record. + * + * Note that to decode cram_record the caller will need to look up some data + * in the current slice, pointed to by fd->ctr->slice. This is valid until + * the next call to cram_get_seq (which may invalidate it). + * + * @return + * Returns record pointer on success (do not free); + * NULL on failure + */ +cram_record *cram_get_seq(cram_fd *fd); + +/*! Read the next cram record and convert it to a bam_seq_t struct. + * + * @return + * Returns 0 on success; + * -1 on EOF or failure (check fd->err) + */ +int cram_get_bam_seq(cram_fd *fd, bam_seq_t **bam); + + +/* ---------------------------------------------------------------------- + * Internal functions + */ + +/*! INTERNAL: + * Decodes a CRAM block compression header. + * + * @return + * Returns header ptr on success; + * NULL on failure + */ +cram_block_compression_hdr *cram_decode_compression_header(cram_fd *fd, + cram_block *b); + +/*! INTERNAL: + * Decodes a CRAM (un)mapped slice header block. + * + * @return + * Returns slice header ptr on success; + * NULL on failure + */ +cram_block_slice_hdr *cram_decode_slice_header(cram_fd *fd, cram_block *b); + + +/*! INTERNAL: + * Loads and decodes the next slice worth of data. + * + * @return + * Returns cram slice pointer on success; + * NULL on failure + */ +cram_slice *cram_next_slice(cram_fd *fd, cram_container **cp); + +/*! INTERNAL: + * Decode an entire slice from container blocks. Fills out s->crecs[] array. + * + * @return + * Returns 0 on success; + * -1 on failure + */ +int cram_decode_slice(cram_fd *fd, cram_container *c, cram_slice *s, + sam_hdr_t *hdr); + + +/*! INTERNAL: + * Converts a cram in-memory record into a bam in-memory record. We + * pass a pointer to a bam_seq_t pointer along with the a pointer to + * the allocated size. These can initially be pointers to NULL and zero. + * + * This function will reallocate the bam buffer as required and update + * (*bam)->alloc accordingly, allowing it to be used within a loop + * efficiently without needing to allocate new bam objects over and + * over again. + * + * Returns the used size of the bam record on success + * -1 on failure. + */ +int cram_to_bam(sam_hdr_t *sh, cram_fd *fd, cram_slice *s, + cram_record *cr, int rec, bam_seq_t **bam); + +/* + * Drains and frees the decode read-queue for a multi-threaded reader. + */ +void cram_drain_rqueue(cram_fd *fd); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/htslib-1.21/cram/cram_encode.c b/src/htslib-1.21/cram/cram_encode.c new file mode 100644 index 0000000..5d22db5 --- /dev/null +++ b/src/htslib-1.21/cram/cram_encode.c @@ -0,0 +1,4180 @@ +/* +Copyright (c) 2012-2020, 2022-2024 Genome Research Ltd. +Author: James Bonfield + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +Institute nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cram.h" +#include "os.h" +#include "../sam_internal.h" // for nibble2base +#include "../htslib/hts.h" +#include "../htslib/hts_endian.h" +#include "../textutils_internal.h" + +KHASH_MAP_INIT_STR(m_s2u64, uint64_t) + +#define Z_CRAM_STRAT Z_FILTERED +//#define Z_CRAM_STRAT Z_RLE +//#define Z_CRAM_STRAT Z_HUFFMAN_ONLY +//#define Z_CRAM_STRAT Z_DEFAULT_STRATEGY + +static int process_one_read(cram_fd *fd, cram_container *c, + cram_slice *s, cram_record *cr, + bam_seq_t *b, int rnum, kstring_t *MD, + int embed_ref, int no_ref); + +/* + * Returns index of val into key. + * Basically strchr(key, val)-key; + */ +static int sub_idx(char *key, char val) { + int i; + + for (i = 0; i < 4 && *key++ != val; i++); + return i; +} + +/* + * Encodes a compression header block into a generic cram_block structure. + * + * Returns cram_block ptr on success + * NULL on failure + */ +cram_block *cram_encode_compression_header(cram_fd *fd, cram_container *c, + cram_block_compression_hdr *h, + int embed_ref) { + cram_block *cb = cram_new_block(COMPRESSION_HEADER, 0); + cram_block *map = cram_new_block(COMPRESSION_HEADER, 0); + int i, mc, r = 0; + + int no_ref = c->no_ref; + + if (!cb || !map) + return NULL; + + /* + * This is a concatenation of several blocks of data: + * header + landmarks, preservation map, read encoding map, and the tag + * encoding map. + * All 4 are variable sized and we need to know how large these are + * before creating the compression header itself as this starts with + * the total size (stored as a variable length string). + */ + + // Duplicated from container itself, and removed in 1.1 + if (CRAM_MAJOR_VERS(fd->version) == 1) { + r |= itf8_put_blk(cb, h->ref_seq_id); + r |= itf8_put_blk(cb, h->ref_seq_start); + r |= itf8_put_blk(cb, h->ref_seq_span); + r |= itf8_put_blk(cb, h->num_records); + r |= itf8_put_blk(cb, h->num_landmarks); + for (i = 0; i < h->num_landmarks; i++) { + r |= itf8_put_blk(cb, h->landmark[i]); + } + } + + if (h->preservation_map) { + kh_destroy(map, h->preservation_map); + h->preservation_map = NULL; + } + + /* Create in-memory preservation map */ + /* FIXME: should create this when we create the container */ + if (c->num_records > 0) { + khint_t k; + int r; + + if (!(h->preservation_map = kh_init(map))) + return NULL; + + k = kh_put(map, h->preservation_map, "RN", &r); + if (-1 == r) return NULL; + kh_val(h->preservation_map, k).i = !fd->lossy_read_names; + + if (CRAM_MAJOR_VERS(fd->version) == 1) { + k = kh_put(map, h->preservation_map, "PI", &r); + if (-1 == r) return NULL; + kh_val(h->preservation_map, k).i = 0; + + k = kh_put(map, h->preservation_map, "UI", &r); + if (-1 == r) return NULL; + kh_val(h->preservation_map, k).i = 1; + + k = kh_put(map, h->preservation_map, "MI", &r); + if (-1 == r) return NULL; + kh_val(h->preservation_map, k).i = 1; + + } else { + // Technically SM was in 1.0, but wasn't in Java impl. + k = kh_put(map, h->preservation_map, "SM", &r); + if (-1 == r) return NULL; + kh_val(h->preservation_map, k).i = 0; + + k = kh_put(map, h->preservation_map, "TD", &r); + if (-1 == r) return NULL; + kh_val(h->preservation_map, k).i = 0; + + k = kh_put(map, h->preservation_map, "AP", &r); + if (-1 == r) return NULL; + kh_val(h->preservation_map, k).i = h->AP_delta; + + if (CRAM_MAJOR_VERS(fd->version) >= 4) { + k = kh_put(map, h->preservation_map, "QO", &r); + if (-1 == r) return NULL; + kh_val(h->preservation_map, k).i = h->qs_seq_orient; + } + + if (no_ref || embed_ref>0) { + // Reference Required == No + k = kh_put(map, h->preservation_map, "RR", &r); + if (-1 == r) return NULL; + kh_val(h->preservation_map, k).i = 0; + } + } + } + + /* Encode preservation map; could collapse this and above into one */ + mc = 0; + BLOCK_SIZE(map) = 0; + if (h->preservation_map) { + khint_t k; + + for (k = kh_begin(h->preservation_map); + k != kh_end(h->preservation_map); + k++) { + const char *key; + khash_t(map) *pmap = h->preservation_map; + + + if (!kh_exist(pmap, k)) + continue; + + key = kh_key(pmap, k); + BLOCK_APPEND(map, key, 2); + + switch(CRAM_KEY(key[0], key[1])) { + case CRAM_KEY('M','I'): + case CRAM_KEY('U','I'): + case CRAM_KEY('P','I'): + case CRAM_KEY('A','P'): + case CRAM_KEY('R','N'): + case CRAM_KEY('R','R'): + case CRAM_KEY('Q','O'): + BLOCK_APPEND_CHAR(map, kh_val(pmap, k).i); + break; + + case CRAM_KEY('S','M'): { + char smat[5], *mp = smat; + // Output format is for order ACGTN (minus ref base) + // to store the code value 0-3 for each symbol. + // + // Note this is different to storing the symbols in order + // that the codes occur from 0-3, which is what we used to + // do. (It didn't matter as we always had a fixed table in + // the order.) + *mp++ = + (sub_idx(h->substitution_matrix[0], 'C') << 6) | + (sub_idx(h->substitution_matrix[0], 'G') << 4) | + (sub_idx(h->substitution_matrix[0], 'T') << 2) | + (sub_idx(h->substitution_matrix[0], 'N') << 0); + *mp++ = + (sub_idx(h->substitution_matrix[1], 'A') << 6) | + (sub_idx(h->substitution_matrix[1], 'G') << 4) | + (sub_idx(h->substitution_matrix[1], 'T') << 2) | + (sub_idx(h->substitution_matrix[1], 'N') << 0); + *mp++ = + (sub_idx(h->substitution_matrix[2], 'A') << 6) | + (sub_idx(h->substitution_matrix[2], 'C') << 4) | + (sub_idx(h->substitution_matrix[2], 'T') << 2) | + (sub_idx(h->substitution_matrix[2], 'N') << 0); + *mp++ = + (sub_idx(h->substitution_matrix[3], 'A') << 6) | + (sub_idx(h->substitution_matrix[3], 'C') << 4) | + (sub_idx(h->substitution_matrix[3], 'G') << 2) | + (sub_idx(h->substitution_matrix[3], 'N') << 0); + *mp++ = + (sub_idx(h->substitution_matrix[4], 'A') << 6) | + (sub_idx(h->substitution_matrix[4], 'C') << 4) | + (sub_idx(h->substitution_matrix[4], 'G') << 2) | + (sub_idx(h->substitution_matrix[4], 'T') << 0); + BLOCK_APPEND(map, smat, 5); + break; + } + + case CRAM_KEY('T','D'): { + r |= (fd->vv.varint_put32_blk(map, BLOCK_SIZE(h->TD_blk)) <= 0); + BLOCK_APPEND(map, + BLOCK_DATA(h->TD_blk), + BLOCK_SIZE(h->TD_blk)); + break; + } + + default: + hts_log_warning("Unknown preservation key '%.2s'", key); + break; + } + + mc++; + } + } + r |= (fd->vv.varint_put32_blk(cb, BLOCK_SIZE(map) + fd->vv.varint_size(mc)) <= 0); + r |= (fd->vv.varint_put32_blk(cb, mc) <= 0); + BLOCK_APPEND(cb, BLOCK_DATA(map), BLOCK_SIZE(map)); + + /* rec encoding map */ + mc = 0; + BLOCK_SIZE(map) = 0; + if (h->codecs[DS_BF]) { + if (-1 == h->codecs[DS_BF]->store(h->codecs[DS_BF], map, "BF", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_CF]) { + if (-1 == h->codecs[DS_CF]->store(h->codecs[DS_CF], map, "CF", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_RL]) { + if (-1 == h->codecs[DS_RL]->store(h->codecs[DS_RL], map, "RL", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_AP]) { + if (-1 == h->codecs[DS_AP]->store(h->codecs[DS_AP], map, "AP", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_RG]) { + if (-1 == h->codecs[DS_RG]->store(h->codecs[DS_RG], map, "RG", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_MF]) { + if (-1 == h->codecs[DS_MF]->store(h->codecs[DS_MF], map, "MF", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_NS]) { + if (-1 == h->codecs[DS_NS]->store(h->codecs[DS_NS], map, "NS", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_NP]) { + if (-1 == h->codecs[DS_NP]->store(h->codecs[DS_NP], map, "NP", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_TS]) { + if (-1 == h->codecs[DS_TS]->store(h->codecs[DS_TS], map, "TS", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_NF]) { + if (-1 == h->codecs[DS_NF]->store(h->codecs[DS_NF], map, "NF", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_TC]) { + if (-1 == h->codecs[DS_TC]->store(h->codecs[DS_TC], map, "TC", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_TN]) { + if (-1 == h->codecs[DS_TN]->store(h->codecs[DS_TN], map, "TN", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_TL]) { + if (-1 == h->codecs[DS_TL]->store(h->codecs[DS_TL], map, "TL", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_FN]) { + if (-1 == h->codecs[DS_FN]->store(h->codecs[DS_FN], map, "FN", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_FC]) { + if (-1 == h->codecs[DS_FC]->store(h->codecs[DS_FC], map, "FC", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_FP]) { + if (-1 == h->codecs[DS_FP]->store(h->codecs[DS_FP], map, "FP", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_BS]) { + if (-1 == h->codecs[DS_BS]->store(h->codecs[DS_BS], map, "BS", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_IN]) { + if (-1 == h->codecs[DS_IN]->store(h->codecs[DS_IN], map, "IN", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_DL]) { + if (-1 == h->codecs[DS_DL]->store(h->codecs[DS_DL], map, "DL", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_BA]) { + if (-1 == h->codecs[DS_BA]->store(h->codecs[DS_BA], map, "BA", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_BB]) { + if (-1 == h->codecs[DS_BB]->store(h->codecs[DS_BB], map, "BB", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_MQ]) { + if (-1 == h->codecs[DS_MQ]->store(h->codecs[DS_MQ], map, "MQ", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_RN]) { + if (-1 == h->codecs[DS_RN]->store(h->codecs[DS_RN], map, "RN", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_QS]) { + if (-1 == h->codecs[DS_QS]->store(h->codecs[DS_QS], map, "QS", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_QQ]) { + if (-1 == h->codecs[DS_QQ]->store(h->codecs[DS_QQ], map, "QQ", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_RI]) { + if (-1 == h->codecs[DS_RI]->store(h->codecs[DS_RI], map, "RI", + fd->version)) + return NULL; + mc++; + } + if (CRAM_MAJOR_VERS(fd->version) != 1) { + if (h->codecs[DS_SC]) { + if (-1 == h->codecs[DS_SC]->store(h->codecs[DS_SC], map, "SC", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_RS]) { + if (-1 == h->codecs[DS_RS]->store(h->codecs[DS_RS], map, "RS", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_PD]) { + if (-1 == h->codecs[DS_PD]->store(h->codecs[DS_PD], map, "PD", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_HC]) { + if (-1 == h->codecs[DS_HC]->store(h->codecs[DS_HC], map, "HC", + fd->version)) + return NULL; + mc++; + } + } + if (h->codecs[DS_TM]) { + if (-1 == h->codecs[DS_TM]->store(h->codecs[DS_TM], map, "TM", + fd->version)) + return NULL; + mc++; + } + if (h->codecs[DS_TV]) { + if (-1 == h->codecs[DS_TV]->store(h->codecs[DS_TV], map, "TV", + fd->version)) + return NULL; + mc++; + } + r |= (fd->vv.varint_put32_blk(cb, BLOCK_SIZE(map) + fd->vv.varint_size(mc)) <= 0); + r |= (fd->vv.varint_put32_blk(cb, mc) <= 0); + BLOCK_APPEND(cb, BLOCK_DATA(map), BLOCK_SIZE(map)); + + /* tag encoding map */ + mc = 0; + BLOCK_SIZE(map) = 0; + if (c->tags_used) { + khint_t k; + + for (k = kh_begin(c->tags_used); k != kh_end(c->tags_used); k++) { + int key; + if (!kh_exist(c->tags_used, k)) + continue; + + key = kh_key(c->tags_used, k); + cram_codec *cd = kh_val(c->tags_used, k)->codec; + + r |= (fd->vv.varint_put32_blk(map, key) <= 0); + if (-1 == cd->store(cd, map, NULL, fd->version)) + return NULL; + + mc++; + } + } + + r |= (fd->vv.varint_put32_blk(cb, BLOCK_SIZE(map) + fd->vv.varint_size(mc)) <= 0); + r |= (fd->vv.varint_put32_blk(cb, mc) <= 0); + BLOCK_APPEND(cb, BLOCK_DATA(map), BLOCK_SIZE(map)); + + hts_log_info("Wrote compression block header in %d bytes", (int)BLOCK_SIZE(cb)); + + BLOCK_UPLEN(cb); + + cram_free_block(map); + + if (r >= 0) + return cb; + + block_err: + return NULL; +} + + +/* + * Encodes a slice compression header. + * + * Returns cram_block on success + * NULL on failure + */ +cram_block *cram_encode_slice_header(cram_fd *fd, cram_slice *s) { + char *buf; + char *cp; + cram_block *b = cram_new_block(MAPPED_SLICE, 0); + int j; + + if (!b) + return NULL; + + cp = buf = malloc(22+16+5*(8+s->hdr->num_blocks)); + if (NULL == buf) { + cram_free_block(b); + return NULL; + } + + cp += fd->vv.varint_put32s(cp, NULL, s->hdr->ref_seq_id); + if (CRAM_MAJOR_VERS(fd->version) >= 4) { + cp += fd->vv.varint_put64(cp, NULL, s->hdr->ref_seq_start); + cp += fd->vv.varint_put64(cp, NULL, s->hdr->ref_seq_span); + } else { + if (s->hdr->ref_seq_start < 0 || s->hdr->ref_seq_start > INT_MAX) { + hts_log_error("Reference position too large for CRAM 3"); + cram_free_block(b); + free(buf); + return NULL; + } + cp += fd->vv.varint_put32(cp, NULL, s->hdr->ref_seq_start); + cp += fd->vv.varint_put32(cp, NULL, s->hdr->ref_seq_span); + } + cp += fd->vv.varint_put32(cp, NULL, s->hdr->num_records); + if (CRAM_MAJOR_VERS(fd->version) == 2) + cp += fd->vv.varint_put32(cp, NULL, s->hdr->record_counter); + else if (CRAM_MAJOR_VERS(fd->version) >= 3) + cp += fd->vv.varint_put64(cp, NULL, s->hdr->record_counter); + cp += fd->vv.varint_put32(cp, NULL, s->hdr->num_blocks); + cp += fd->vv.varint_put32(cp, NULL, s->hdr->num_content_ids); + for (j = 0; j < s->hdr->num_content_ids; j++) { + cp += fd->vv.varint_put32(cp, NULL, s->hdr->block_content_ids[j]); + } + if (s->hdr->content_type == MAPPED_SLICE) + cp += fd->vv.varint_put32(cp, NULL, s->hdr->ref_base_id); + + if (CRAM_MAJOR_VERS(fd->version) != 1) { + memcpy(cp, s->hdr->md5, 16); cp += 16; + } + + assert(cp-buf <= 22+16+5*(8+s->hdr->num_blocks)); + + b->data = (unsigned char *)buf; + b->comp_size = b->uncomp_size = cp-buf; + + return b; +} + + +/* + * Encodes a single read. + * + * Returns 0 on success + * -1 on failure + */ +static int cram_encode_slice_read(cram_fd *fd, + cram_container *c, + cram_block_compression_hdr *h, + cram_slice *s, + cram_record *cr, + int64_t *last_pos) { + int r = 0; + int32_t i32; + int64_t i64; + unsigned char uc; + + //fprintf(stderr, "Encode seq %d, %d/%d FN=%d, %s\n", rec, core->byte, core->bit, cr->nfeature, s->name_ds->str + cr->name); + + //printf("BF=0x%x\n", cr->flags); + // bf = cram_flag_swap[cr->flags]; + i32 = fd->cram_flag_swap[cr->flags & 0xfff]; + r |= h->codecs[DS_BF]->encode(s, h->codecs[DS_BF], (char *)&i32, 1); + + i32 = cr->cram_flags & CRAM_FLAG_MASK; + r |= h->codecs[DS_CF]->encode(s, h->codecs[DS_CF], (char *)&i32, 1); + + if (CRAM_MAJOR_VERS(fd->version) != 1 && s->hdr->ref_seq_id == -2) + r |= h->codecs[DS_RI]->encode(s, h->codecs[DS_RI], (char *)&cr->ref_id, 1); + + r |= h->codecs[DS_RL]->encode(s, h->codecs[DS_RL], (char *)&cr->len, 1); + + if (c->pos_sorted) { + if (CRAM_MAJOR_VERS(fd->version) >= 4) { + i64 = cr->apos - *last_pos; + r |= h->codecs[DS_AP]->encode(s, h->codecs[DS_AP], (char *)&i64, 1); + } else { + i32 = cr->apos - *last_pos; + r |= h->codecs[DS_AP]->encode(s, h->codecs[DS_AP], (char *)&i32, 1); + } + *last_pos = cr->apos; + } else { + if (CRAM_MAJOR_VERS(fd->version) >= 4) { + i64 = cr->apos; + r |= h->codecs[DS_AP]->encode(s, h->codecs[DS_AP], (char *)&i64, 1); + } else { + i32 = cr->apos; + r |= h->codecs[DS_AP]->encode(s, h->codecs[DS_AP], (char *)&i32, 1); + } + } + + r |= h->codecs[DS_RG]->encode(s, h->codecs[DS_RG], (char *)&cr->rg, 1); + + if (cr->cram_flags & CRAM_FLAG_DETACHED) { + i32 = cr->mate_flags; + r |= h->codecs[DS_MF]->encode(s, h->codecs[DS_MF], (char *)&i32, 1); + + r |= h->codecs[DS_NS]->encode(s, h->codecs[DS_NS], + (char *)&cr->mate_ref_id, 1); + + if (CRAM_MAJOR_VERS(fd->version) >= 4) { + r |= h->codecs[DS_NP]->encode(s, h->codecs[DS_NP], + (char *)&cr->mate_pos, 1); + r |= h->codecs[DS_TS]->encode(s, h->codecs[DS_TS], + (char *)&cr->tlen, 1); + } else { + i32 = cr->mate_pos; + r |= h->codecs[DS_NP]->encode(s, h->codecs[DS_NP], + (char *)&i32, 1); + i32 = cr->tlen; + r |= h->codecs[DS_TS]->encode(s, h->codecs[DS_TS], + (char *)&i32, 1); + } + } else { + if (cr->cram_flags & CRAM_FLAG_MATE_DOWNSTREAM) { + r |= h->codecs[DS_NF]->encode(s, h->codecs[DS_NF], + (char *)&cr->mate_line, 1); + } + if (cr->cram_flags & CRAM_FLAG_EXPLICIT_TLEN) { + if (CRAM_MAJOR_VERS(fd->version) >= 4) { + r |= h->codecs[DS_TS]->encode(s, h->codecs[DS_TS], + (char *)&cr->tlen, 1); + } + } + } + + /* Aux tags */ + if (CRAM_MAJOR_VERS(fd->version) == 1) { + int j; + uc = cr->ntags; + r |= h->codecs[DS_TC]->encode(s, h->codecs[DS_TC], (char *)&uc, 1); + + for (j = 0; j < cr->ntags; j++) { + uint32_t i32 = s->TN[cr->TN_idx + j]; // id + r |= h->codecs[DS_TN]->encode(s, h->codecs[DS_TN], (char *)&i32, 1); + } + } else { + r |= h->codecs[DS_TL]->encode(s, h->codecs[DS_TL], (char *)&cr->TL, 1); + } + + // qual + // QS codec : Already stored in block[2]. + + // features (diffs) + if (!(cr->flags & BAM_FUNMAP)) { + int prev_pos = 0, j; + + r |= h->codecs[DS_FN]->encode(s, h->codecs[DS_FN], + (char *)&cr->nfeature, 1); + for (j = 0; j < cr->nfeature; j++) { + cram_feature *f = &s->features[cr->feature + j]; + + uc = f->X.code; + r |= h->codecs[DS_FC]->encode(s, h->codecs[DS_FC], (char *)&uc, 1); + i32 = f->X.pos - prev_pos; + r |= h->codecs[DS_FP]->encode(s, h->codecs[DS_FP], (char *)&i32, 1); + prev_pos = f->X.pos; + + switch(f->X.code) { + //char *seq; + + case 'X': + //fprintf(stderr, " FC=%c FP=%d base=%d\n", f->X.code, i32, f->X.base); + + uc = f->X.base; + r |= h->codecs[DS_BS]->encode(s, h->codecs[DS_BS], + (char *)&uc, 1); + break; + case 'S': + // Already done + //r |= h->codecs[DS_SC]->encode(s, h->codecs[DS_SC], + // BLOCK_DATA(s->soft_blk) + f->S.seq_idx, + // f->S.len); + + //if (CRAM_MAJOR_VERS(fd->version) >= 3) { + // r |= h->codecs[DS_BB]->encode(s, h->codecs[DS_BB], + // BLOCK_DATA(s->seqs_blk) + f->S.seq_idx, + // f->S.len); + //} + break; + case 'I': + //seq = DSTRING_STR(s->seqs_ds) + f->S.seq_idx; + //r |= h->codecs[DS_IN]->encode(s, h->codecs[DS_IN], + // seq, f->S.len); + //if (CRAM_MAJOR_VERS(fd->version) >= 3) { + // r |= h->codecs[DS_BB]->encode(s, h->codecs[DS_BB], + // BLOCK_DATA(s->seqs_blk) + f->I.seq_idx, + // f->I.len); + //} + break; + case 'i': + uc = f->i.base; + r |= h->codecs[DS_BA]->encode(s, h->codecs[DS_BA], + (char *)&uc, 1); + //seq = DSTRING_STR(s->seqs_ds) + f->S.seq_idx; + //r |= h->codecs[DS_IN]->encode(s, h->codecs[DS_IN], + // seq, 1); + break; + case 'D': + i32 = f->D.len; + r |= h->codecs[DS_DL]->encode(s, h->codecs[DS_DL], + (char *)&i32, 1); + break; + + case 'B': + // // Used when we try to store a non ACGTN base or an N + // // that aligns against a non ACGTN reference + + uc = f->B.base; + r |= h->codecs[DS_BA]->encode(s, h->codecs[DS_BA], + (char *)&uc, 1); + + // Already added + // uc = f->B.qual; + // r |= h->codecs[DS_QS]->encode(s, h->codecs[DS_QS], + // (char *)&uc, 1); + break; + + case 'b': + // string of bases + r |= h->codecs[DS_BB]->encode(s, h->codecs[DS_BB], + (char *)BLOCK_DATA(s->seqs_blk) + + f->b.seq_idx, + f->b.len); + break; + + case 'Q': + // Already added + // uc = f->B.qual; + // r |= h->codecs[DS_QS]->encode(s, h->codecs[DS_QS], + // (char *)&uc, 1); + break; + + case 'N': + i32 = f->N.len; + r |= h->codecs[DS_RS]->encode(s, h->codecs[DS_RS], + (char *)&i32, 1); + break; + + case 'P': + i32 = f->P.len; + r |= h->codecs[DS_PD]->encode(s, h->codecs[DS_PD], + (char *)&i32, 1); + break; + + case 'H': + i32 = f->H.len; + r |= h->codecs[DS_HC]->encode(s, h->codecs[DS_HC], + (char *)&i32, 1); + break; + + + default: + hts_log_error("Unhandled feature code %c", f->X.code); + return -1; + } + } + + r |= h->codecs[DS_MQ]->encode(s, h->codecs[DS_MQ], + (char *)&cr->mqual, 1); + } else { + char *seq = (char *)BLOCK_DATA(s->seqs_blk) + cr->seq; + if (cr->len) + r |= h->codecs[DS_BA]->encode(s, h->codecs[DS_BA], seq, cr->len); + } + + return r ? -1 : 0; +} + + +/* + * Applies various compression methods to specific blocks, depending on + * known observations of how data series compress. + * + * Returns 0 on success + * -1 on failure + */ +static int cram_compress_slice(cram_fd *fd, cram_container *c, cram_slice *s) { + int level = fd->level, i; + int method = 1<version >= (3<<8)+1); + + /* Compress the CORE Block too, with minimal zlib level */ + if (level > 5 && s->block[0]->uncomp_size > 500) + cram_compress_block2(fd, s, s->block[0], NULL, 1<use_bz2) + method |= 1<use_rans) { + method_ranspr = (1< 1) + method_ranspr |= + (1< 5) + method_ranspr |= (1<use_rans) { + methodF |= v31_or_above ? method_ranspr : method_rans; + method |= v31_or_above ? method_ranspr : method_rans; + } + + int method_arith = 0; + if (fd->use_arith) { + method_arith = (1< 1) + method_arith |= + (1<use_arith && v31_or_above) { + methodF |= method_arith; + method |= method_arith; + } + + if (fd->use_lzma) + method |= (1<= 5) { + method |= 1<use_fqz) { + qmethod |= 1<level > 4) { + qmethod |= 1<level > 6) { + qmethod |= (1<metrics_lock); + for (i = 0; i < DS_END; i++) + if (c->stats[i] && c->stats[i]->nvals > 16) + fd->m[i]->unpackable = 1; + pthread_mutex_unlock(&fd->metrics_lock); + + /* Specific compression methods for certain block types */ + if (cram_compress_block2(fd, s, s->block[DS_IN], fd->m[DS_IN], //IN (seq) + method, level)) + return -1; + + if (fd->level == 0) { + /* Do nothing */ + } else if (fd->level == 1) { + if (cram_compress_block2(fd, s, s->block[DS_QS], fd->m[DS_QS], + qmethodF, 1)) + return -1; + for (i = DS_aux; i <= DS_aux_oz; i++) { + if (s->block[i]) + if (cram_compress_block2(fd, s, s->block[i], fd->m[i], + method, 1)) + return -1; + } + } else if (fd->level < 3) { + if (cram_compress_block2(fd, s, s->block[DS_QS], fd->m[DS_QS], + qmethod, 1)) + return -1; + if (cram_compress_block2(fd, s, s->block[DS_BA], fd->m[DS_BA], + method, 1)) + return -1; + if (s->block[DS_BB]) + if (cram_compress_block2(fd, s, s->block[DS_BB], fd->m[DS_BB], + method, 1)) + return -1; + for (i = DS_aux; i <= DS_aux_oz; i++) { + if (s->block[i]) + if (cram_compress_block2(fd, s, s->block[i], fd->m[i], + method, level)) + return -1; + } + } else { + if (cram_compress_block2(fd, s, s->block[DS_QS], fd->m[DS_QS], + qmethod, level)) + return -1; + if (cram_compress_block2(fd, s, s->block[DS_BA], fd->m[DS_BA], + method, level)) + return -1; + if (s->block[DS_BB]) + if (cram_compress_block2(fd, s, s->block[DS_BB], fd->m[DS_BB], + method, level)) + return -1; + for (i = DS_aux; i <= DS_aux_oz; i++) { + if (s->block[i]) + if (cram_compress_block2(fd, s, s->block[i], fd->m[i], + method, level)) + return -1; + } + } + + // NAME: best is generally xz, bzip2, zlib then rans1 + int method_rn = method & ~(method_rans | method_ranspr | 1<version >= (3<<8)+1 && fd->use_tok) + method_rn |= fd->use_arith ? (1<block[DS_RN], fd->m[DS_RN], + method_rn, level)) + return -1; + + // NS shows strong local correlation as rearrangements are localised + if (s->block[DS_NS] && s->block[DS_NS] != s->block[0]) + if (cram_compress_block2(fd, s, s->block[DS_NS], fd->m[DS_NS], + method, level)) + return -1; + + + /* + * Compress any auxiliary tags with their own per-tag metrics + */ + { + int i; + for (i = DS_END /*num_blk - naux_blk*/; i < s->hdr->num_blocks; i++) { + if (!s->block[i] || s->block[i] == s->block[0]) + continue; + + if (s->block[i]->method != RAW) + continue; + + if (cram_compress_block2(fd, s, s->block[i], s->block[i]->m, + method, level)) + return -1; + } + } + + /* + * Minimal compression of any block still uncompressed, bar CORE + */ + { + int i; + for (i = 1; i < s->hdr->num_blocks && i < DS_END; i++) { + if (!s->block[i] || s->block[i] == s->block[0]) + continue; + + if (s->block[i]->method != RAW) + continue; + + if (cram_compress_block2(fd, s, s->block[i], fd->m[i], + methodF, level)) + return -1; + } + } + + return 0; +} + +/* + * Allocates a block associated with the cram codec associated with + * data series ds_id or the internal codec_id (depending on codec + * type). + * + * The ds_ids are what end up written to disk as an external block. + * The c_ids are internal and used when daisy-chaining transforms + * such as MAP and RLE. These blocks are also allocated, but + * are ephemeral in nature. (The codecs themselves cannot allocate + * these as the same codec pointer may be operating on multiple slices + * if we're using a multi-slice container.) + * + * Returns 0 on success + * -1 on failure + */ +static int cram_allocate_block(cram_codec *codec, cram_slice *s, int ds_id) { + if (!codec) + return 0; + + switch(codec->codec) { + // Codecs which are hard-coded to use the CORE block + case E_GOLOMB: + case E_HUFFMAN: + case E_BETA: + case E_SUBEXP: + case E_GOLOMB_RICE: + case E_GAMMA: + codec->out = s->block[0]; + break; + + // Codecs which don't use external blocks + case E_CONST_BYTE: + case E_CONST_INT: + codec->out = NULL; + break; + + // Codecs that emit directly to external blocks + case E_EXTERNAL: + case E_VARINT_UNSIGNED: + case E_VARINT_SIGNED: + if (!(s->block[ds_id] = cram_new_block(EXTERNAL, ds_id))) + return -1; + codec->u.external.content_id = ds_id; + codec->out = s->block[ds_id]; + break; + + case E_BYTE_ARRAY_STOP: // Why no sub-codec? + if (!(s->block[ds_id] = cram_new_block(EXTERNAL, ds_id))) + return -1; + codec->u.byte_array_stop.content_id = ds_id; + codec->out = s->block[ds_id]; + break; + + + // Codecs that contain sub-codecs which may in turn emit to external blocks + case E_BYTE_ARRAY_LEN: { + cram_codec *bal = codec->u.e_byte_array_len.len_codec; + if (cram_allocate_block(bal, s, bal->u.external.content_id)) + return -1; + bal = codec->u.e_byte_array_len.val_codec; + if (cram_allocate_block(bal, s, bal->u.external.content_id)) + return -1; + + break; + } + + case E_XRLE: + if (cram_allocate_block(codec->u.e_xrle.len_codec, s, ds_id)) + //ds_id == DS_QS ? DS_QS_len : ds_id)) + return -1; + if (cram_allocate_block(codec->u.e_xrle.lit_codec, s, ds_id)) + return -1; + + break; + + case E_XPACK: + if (cram_allocate_block(codec->u.e_xpack.sub_codec, s, ds_id)) + return -1; + codec->out = cram_new_block(0, 0); // ephemeral + if (!codec->out) + return -1; + + break; + + case E_XDELTA: + if (cram_allocate_block(codec->u.e_xdelta.sub_codec, s, ds_id)) + return -1; + codec->out = cram_new_block(0, 0); // ephemeral + if (!codec->out) + return -1; + + break; + + default: + break; + } + + return 0; +} + +/* + * Encodes a single slice from a container + * + * Returns 0 on success + * -1 on failure + */ +static int cram_encode_slice(cram_fd *fd, cram_container *c, + cram_block_compression_hdr *h, cram_slice *s, + int embed_ref) { + int rec, r = 0; + int64_t last_pos; + enum cram_DS_ID id; + + /* + * Slice external blocks: + * ID 0 => base calls (insertions, soft-clip) + * ID 1 => qualities + * ID 2 => names + * ID 3 => TS (insert size), NP (next frag) + * ID 4 => tag values + * ID 6 => tag IDs (TN), if CRAM_V1.0 + * ID 7 => TD tag dictionary, if !CRAM_V1.0 + */ + + /* Create cram slice header */ + s->hdr->ref_base_id = embed_ref>0 && s->hdr->ref_seq_span > 0 + ? DS_ref + : (CRAM_MAJOR_VERS(fd->version) >= 4 ? 0 : -1); + s->hdr->record_counter = c->num_records + c->record_counter; + c->num_records += s->hdr->num_records; + + int ntags = c->tags_used ? c->tags_used->n_occupied : 0; + s->block = calloc(DS_END + ntags*2, sizeof(s->block[0])); + s->hdr->block_content_ids = malloc(DS_END * sizeof(int32_t)); + if (!s->block || !s->hdr->block_content_ids) + return -1; + + // Create first fixed blocks, always external. + // CORE + if (!(s->block[0] = cram_new_block(CORE, 0))) + return -1; + + // TN block for CRAM v1 + if (CRAM_MAJOR_VERS(fd->version) == 1) { + if (h->codecs[DS_TN]->codec == E_EXTERNAL) { + if (!(s->block[DS_TN] = cram_new_block(EXTERNAL,DS_TN))) return -1; + h->codecs[DS_TN]->u.external.content_id = DS_TN; + } else { + s->block[DS_TN] = s->block[0]; + } + } + + // Embedded reference + if (embed_ref>0) { + if (!(s->block[DS_ref] = cram_new_block(EXTERNAL, DS_ref))) + return -1; + s->ref_id = DS_ref; // needed? + BLOCK_APPEND(s->block[DS_ref], + c->ref + s->hdr->ref_seq_start - c->ref_start, + s->hdr->ref_seq_span); + } + + /* + * All the data-series blocks if appropriate. + */ + for (id = DS_QS; id < DS_TN; id++) { + if (cram_allocate_block(h->codecs[id], s, id) < 0) + return -1; + } + + /* + * Add in the external tag blocks too. + */ + if (c->tags_used) { + int n; + s->hdr->num_blocks = DS_END; + for (n = 0; n < s->naux_block; n++) { + s->block[s->hdr->num_blocks++] = s->aux_block[n]; + s->aux_block[n] = NULL; + } + } + + /* Encode reads */ + last_pos = s->hdr->ref_seq_start; + for (rec = 0; rec < s->hdr->num_records; rec++) { + cram_record *cr = &s->crecs[rec]; + if (cram_encode_slice_read(fd, c, h, s, cr, &last_pos) == -1) + return -1; + } + + s->block[0]->uncomp_size = s->block[0]->byte + (s->block[0]->bit < 7); + s->block[0]->comp_size = s->block[0]->uncomp_size; + + // Make sure the fixed blocks point to the correct sources + if (s->block[DS_IN]) cram_free_block(s->block[DS_IN]); + s->block[DS_IN] = s->base_blk; s->base_blk = NULL; + if (s->block[DS_QS]) cram_free_block(s->block[DS_QS]); + s->block[DS_QS] = s->qual_blk; s->qual_blk = NULL; + if (s->block[DS_RN]) cram_free_block(s->block[DS_RN]); + s->block[DS_RN] = s->name_blk; s->name_blk = NULL; + if (s->block[DS_SC]) cram_free_block(s->block[DS_SC]); + s->block[DS_SC] = s->soft_blk; s->soft_blk = NULL; + + // Finalise any data transforms. + for (id = DS_QS; id < DS_TN; id++) { + if (h->codecs[id] && h->codecs[id]->flush) + h->codecs[id]->flush(h->codecs[id]); + } + + // Ensure block sizes are up to date. + for (id = 1; id < s->hdr->num_blocks; id++) { + if (!s->block[id] || s->block[id] == s->block[0]) + continue; + + if (s->block[id]->uncomp_size == 0) + BLOCK_UPLEN(s->block[id]); + } + + // Compress it all + if (cram_compress_slice(fd, c, s) == -1) + return -1; + + // Collapse empty blocks and create hdr_block + { + int i, j; + + s->hdr->block_content_ids = realloc(s->hdr->block_content_ids, + s->hdr->num_blocks * sizeof(int32_t)); + if (!s->hdr->block_content_ids) + return -1; + + for (i = j = 1; i < s->hdr->num_blocks; i++) { + if (!s->block[i] || s->block[i] == s->block[0]) + continue; + if (s->block[i]->uncomp_size == 0) { + cram_free_block(s->block[i]); + s->block[i] = NULL; + continue; + } + s->block[j] = s->block[i]; + s->hdr->block_content_ids[j-1] = s->block[i]->content_id; + j++; + } + s->hdr->num_content_ids = j-1; + s->hdr->num_blocks = j; + + if (!(s->hdr_block = cram_encode_slice_header(fd, s))) + return -1; + } + + return r ? -1 : 0; + + block_err: + return -1; +} + +static inline const char *bam_data_end(bam1_t *b) { + return (const char *)b->data + b->l_data; +} + +/* + * A bounds checking version of bam_aux2i. + */ +static inline int bam_aux2i_end(const uint8_t *aux, const uint8_t *aux_end) { + int type = *aux++; + switch (type) { + case 'c': + if (aux_end - aux < 1) { + errno = EINVAL; + return 0; + } + return *(int8_t *)aux; + case 'C': + if (aux_end - aux < 1) { + errno = EINVAL; + return 0; + } + return *aux; + case 's': + if (aux_end - aux < 2) { + errno = EINVAL; + return 0; + } + return le_to_i16(aux); + case 'S': + if (aux_end - aux < 2) { + errno = EINVAL; + return 0; + } + return le_to_u16(aux); + case 'i': + if (aux_end - aux < 4) { + errno = EINVAL; + return 0; + } + return le_to_i32(aux); + case 'I': + if (aux_end - aux < 4) { + errno = EINVAL; + return 0; + } + return le_to_u32(aux); + default: + errno = EINVAL; + } + return 0; +} + +/* + * Returns the number of expected read names for this record. + */ +static int expected_template_count(bam_seq_t *b) { + int expected = bam_flag(b) & BAM_FPAIRED ? 2 : 1; + + uint8_t *TC = (uint8_t *)bam_aux_get(b, "TC"); + if (TC) { + int n = bam_aux2i_end(TC, (uint8_t *)bam_data_end(b)); + if (expected < n) + expected = n; + } + + if (!TC && bam_aux_get(b, "SA")) { + // We could count the semicolons, but we'd have to do this for + // read1, read2 and read(not-1-or-2) combining the results + // together. This is a cheap and safe alternative for now. + expected = INT_MAX; + } + + return expected; +} + +/* + * Lossily reject read names. + * + * The rule here is that if all reads for this template reside in the + * same slice then we can lose the name. Otherwise we keep them as we + * do not know when (or if) the other reads will turn up. + * + * Note there may be only 1 read (non-paired library) or more than 2 + * reads (paired library with supplementary reads), or other weird + * setups. We need to know how many are expected. Ways to guess: + * + * - Flags (0x1 - has > 1 read) + * - TC aux field (not mandatory) + * - SA tags (count semicolons, NB per fragment so sum - hard) + * - RNEXT/PNEXT uniqueness count. (not implemented, tricky) + * + * Returns 0 on success + * -1 on failure + */ +static int lossy_read_names(cram_fd *fd, cram_container *c, cram_slice *s, + int bam_start) { + int r1, r2, ret = -1; + + // Initialise cram_flags + for (r2 = 0; r2 < s->hdr->num_records; r2++) + s->crecs[r2].cram_flags = 0; + + if (!fd->lossy_read_names) + return 0; + + khash_t(m_s2u64) *names = kh_init(m_s2u64); + if (!names) + goto fail; + + // 1: Iterate through names to count frequency + for (r1 = bam_start, r2 = 0; r2 < s->hdr->num_records; r1++, r2++) { + //cram_record *cr = &s->crecs[r2]; + bam_seq_t *b = c->bams[r1]; + khint_t k; + int n; + uint64_t e; + union { + uint64_t i64; + struct { + int32_t e,c; // expected & observed counts. + } counts; + } u; + + e = expected_template_count(b); + u.counts.e = e; u.counts.c = 1; + + k = kh_put(m_s2u64, names, bam_name(b), &n); + if (n == -1) + goto fail; + + if (n == 0) { + // not a new name + u.i64 = kh_val(names, k); + if (u.counts.e != e) { + // different expectation or already hit the max + //fprintf(stderr, "Err computing no. %s recs\n", bam_name(b)); + kh_val(names, k) = 0; + } else { + u.counts.c++; + if (u.counts.e == u.counts.c) { + // Reached expected count. + kh_val(names, k) = -1; + } else { + kh_val(names, k) = u.i64; + } + } + } else { + // new name + kh_val(names, k) = u.i64; + } + } + + // 2: Remove names if all present (hd.i == -1) + for (r1 = bam_start, r2 = 0; r2 < s->hdr->num_records; r1++, r2++) { + cram_record *cr = &s->crecs[r2]; + bam_seq_t *b = c->bams[r1]; + khint_t k; + + k = kh_get(m_s2u64, names, bam_name(b)); + + if (k == kh_end(names)) + goto fail; + + if (kh_val(names, k) == -1) + cr->cram_flags = CRAM_FLAG_DISCARD_NAME; + } + + ret = 0; + fail: // ret==-1 + + if (names) + kh_destroy(m_s2u64, names); + + return ret; +} + +/* + * Adds the reading names. We do this here as a separate pass rather + * than per record in the process_one_read calls as that function can + * go back and change the CRAM_FLAG_DETACHED status of a previously + * processed read if it subsequently determines the TLEN field is + * incorrect. Given DETACHED reads always try to decode read names, + * we need to know their status before generating the read-name block. + * + * Output is an update s->name_blk, and cr->name / cr->name_len + * fields. + */ +static int add_read_names(cram_fd *fd, cram_container *c, cram_slice *s, + int bam_start) { + int r1, r2; + int keep_names = !fd->lossy_read_names; + + for (r1 = bam_start, r2 = 0; + r1 < c->curr_c_rec && r2 < s->hdr->num_records; + r1++, r2++) { + cram_record *cr = &s->crecs[r2]; + bam_seq_t *b = c->bams[r1]; + + cr->name = BLOCK_SIZE(s->name_blk); + if ((cr->cram_flags & CRAM_FLAG_DETACHED) || keep_names) { + if (CRAM_MAJOR_VERS(fd->version) >= 4 + && (cr->cram_flags & CRAM_FLAG_MATE_DOWNSTREAM) + && cr->mate_line) { + // Dedup read names in V4 + BLOCK_APPEND(s->name_blk, "\0", 1); + cr->name_len = 1; + } else { + BLOCK_APPEND(s->name_blk, bam_name(b), bam_name_len(b)); + cr->name_len = bam_name_len(b); + } + } else { + // Can only discard duplicate names if not detached + cr->name_len = 0; + } + + if (cram_stats_add(c->stats[DS_RN], cr->name_len) < 0) + goto block_err; + } + + return 0; + + block_err: + return -1; +} + +// CRAM version >= 3.1 +#define CRAM_ge31(v) ((v) >= 0x301) + +// Returns the next cigar op code: one of the BAM_C* codes, +// or -1 if no more are present. +static inline +int next_cigar_op(uint32_t *cigar, uint32_t ncigar, int *skip, int *spos, + uint32_t *cig_ind, uint32_t *cig_op, uint32_t *cig_len) { + for(;;) { + while (*cig_len == 0) { + if (*cig_ind < ncigar) { + *cig_op = cigar[*cig_ind] & BAM_CIGAR_MASK; + *cig_len = cigar[*cig_ind] >> BAM_CIGAR_SHIFT; + (*cig_ind)++; + } else { + return -1; + } + } + + if (skip[*cig_op]) { + *spos += (bam_cigar_type(*cig_op)&1) * *cig_len; + *cig_len = 0; + continue; + } + + (*cig_len)--; + break; + } + + return *cig_op; +} + +// Ensure ref and hist are large enough. +static inline int extend_ref(char **ref, uint32_t (**hist)[5], hts_pos_t pos, + hts_pos_t ref_start, hts_pos_t *ref_end) { + if (pos < ref_start) + return -1; + if (pos < *ref_end) + return 0; + + // realloc + if (pos - ref_start > UINT_MAX) + return -2; // protect overflow in new_end calculation + + hts_pos_t old_end = *ref_end ? *ref_end : ref_start; + hts_pos_t new_end = ref_start + 1000 + (pos-ref_start)*1.5; + + // Refuse to work on excessively large blocks. + // We'll just switch to referenceless encoding, which is probably better + // here as this must be very sparse data anyway. + if (new_end - ref_start > UINT_MAX/sizeof(**hist)/2) + return -2; + + char *tmp = realloc(*ref, new_end-ref_start+1); + if (!tmp) + return -1; + *ref = tmp; + + uint32_t (*tmp5)[5] = realloc(**hist, + (new_end - ref_start)*sizeof(**hist)); + if (!tmp5) + return -1; + *hist = tmp5; + *ref_end = new_end; + + // initialise + old_end -= ref_start; + new_end -= ref_start; + memset(&(*ref)[old_end], 0, new_end-old_end); + memset(&(*hist)[old_end], 0, (new_end-old_end)*sizeof(**hist)); + + return 0; +} + +// Walk through MD + seq to generate ref +// Returns 1 on success, <0 on failure +static int cram_add_to_ref_MD(bam1_t *b, char **ref, uint32_t (**hist)[5], + hts_pos_t ref_start, hts_pos_t *ref_end, + const uint8_t *MD) { + uint8_t *seq = bam_get_seq(b); + uint32_t *cigar = bam_get_cigar(b); + uint32_t ncigar = b->core.n_cigar; + uint32_t cig_op = 0, cig_len = 0, cig_ind = 0; + + int iseq = 0, next_op; + hts_pos_t iref = b->core.pos - ref_start; + + // Skip INS, REF_SKIP, *CLIP, PAD. and BACK. + static int cig_skip[16] = {0,1,0,1,1,1,1,0,0,1,1,1,1,1,1,1}; + while (iseq < b->core.l_qseq && *MD) { + if (isdigit(*MD)) { + // match + int overflow = 0; + int len = hts_str2uint((char *)MD, (char **)&MD, 31, &overflow); + if (overflow || + extend_ref(ref, hist, iref+ref_start + len, + ref_start, ref_end) < 0) + return -1; + while (iseq < b->core.l_qseq && len) { + // rewrite to have internal loops? + if ((next_op = next_cigar_op(cigar, ncigar, cig_skip, + &iseq, &cig_ind, &cig_op, + &cig_len)) < 0) + return -1; + + if (next_op != BAM_CMATCH && + next_op != BAM_CEQUAL) { + hts_log_info("MD:Z and CIGAR are incompatible for " + "record %s", bam_get_qname(b)); + return -1; + } + + // Short-cut loop over same cigar op for efficiency + cig_len++; + do { + cig_len--; + (*ref)[iref++] = seq_nt16_str[bam_seqi(seq, iseq)]; + iseq++; + len--; + } while (cig_len && iseq < b->core.l_qseq && len); + } + if (len > 0) + return -1; // MD is longer than seq + } else if (*MD == '^') { + // deletion + MD++; + while (isalpha(*MD)) { + if (extend_ref(ref, hist, iref+ref_start, ref_start, + ref_end) < 0) + return -1; + if ((next_op = next_cigar_op(cigar, ncigar, cig_skip, + &iseq, &cig_ind, &cig_op, + &cig_len)) < 0) + return -1; + + if (next_op != BAM_CDEL) { + hts_log_info("MD:Z and CIGAR are incompatible"); + return -1; + } + + (*ref)[iref++] = *MD++ & ~0x20; + } + } else { + // substitution + if (extend_ref(ref, hist, iref+ref_start, ref_start, ref_end) < 0) + return -1; + if ((next_op = next_cigar_op(cigar, ncigar, cig_skip, + &iseq, &cig_ind, &cig_op, + &cig_len)) < 0) + return -1; + + if (next_op != BAM_CMATCH && next_op != BAM_CDIFF) { + hts_log_info("MD:Z and CIGAR are incompatible"); + return -1; + } + + (*ref)[iref++] = *MD++ & ~0x20; + iseq++; + } + } + + return 1; +} + +// Append a sequence to a ref/consensus structure. +// We maintain both an absolute refefence (ACGTN where MD:Z is +// present) and a 5-way frequency array for when no MD:Z is known. +// We then subsequently convert the 5-way frequencies to a consensus +// ref in a second pass. +// +// Returns >=0 on success, +// -1 on failure (eg inconsistent data) +static int cram_add_to_ref(bam1_t *b, char **ref, uint32_t (**hist)[5], + hts_pos_t ref_start, hts_pos_t *ref_end) { + const uint8_t *MD = bam_aux_get(b, "MD"); + int ret = 0; + if (MD && *MD == 'Z') { + // We can use MD to directly compute the reference + int ret = cram_add_to_ref_MD(b, ref, hist, ref_start, ref_end, MD+1); + + if (ret > 0) + return ret; + } + + // Otherwise we just use SEQ+CIGAR and build a consensus which we later + // turn into a fake reference + uint32_t *cigar = bam_get_cigar(b); + uint32_t ncigar = b->core.n_cigar; + uint32_t i, j; + hts_pos_t iseq = 0, iref = b->core.pos - ref_start; + uint8_t *seq = bam_get_seq(b); + for (i = 0; i < ncigar; i++) { + switch (bam_cigar_op(cigar[i])) { + case BAM_CSOFT_CLIP: + case BAM_CINS: + iseq += bam_cigar_oplen(cigar[i]); + break; + + case BAM_CMATCH: + case BAM_CEQUAL: + case BAM_CDIFF: { + int len = bam_cigar_oplen(cigar[i]); + // Maps an nt16 (A=1 C=2 G=4 T=8 bits) to 0123 plus N=4 + static uint8_t L16[16] = {4,0,1,4, 2,4,4,4, 3,4,4,4, 4,4,4,4}; + + if (extend_ref(ref, hist, iref+ref_start + len, + ref_start, ref_end) < 0) + return -1; + if (iseq + len <= b->core.l_qseq) { + // Nullify failed MD:Z if appropriate + if (ret < 0) + memset(&(*ref)[iref], 0, len); + + for (j = 0; j < len; j++, iref++, iseq++) + (*hist)[iref][L16[bam_seqi(seq, iseq)]]++; + } else { + // Probably a 2ndary read with seq "*" + iseq += len; + iref += len; + } + break; + } + + case BAM_CDEL: + case BAM_CREF_SKIP: + iref += bam_cigar_oplen(cigar[i]); + } + } + + return 1; +} + +// Automatically generates the reference and stashed it in c->ref, also +// setting c->ref_start and c->ref_end. +// +// If we have MD:Z tags then we use them to directly infer the reference, +// along with SEQ + CIGAR. Otherwise we use SEQ/CIGAR only to build up +// a consensus and then assume the reference as the majority rule. +// +// In this latter scenario we need to be wary of auto-generating MD and NM +// during decode, but that's handled elsewhere via an additional aux tag. +// +// Returns 0 on success, +// -1 on failure +static int cram_generate_reference(cram_container *c, cram_slice *s, int r1) { + // TODO: if we can find an external reference then use it, even if the + // user told us to do embed_ref=2. + char *ref = NULL; + uint32_t (*hist)[5] = NULL; + hts_pos_t ref_start = c->bams[r1]->core.pos, ref_end = 0; + if (ref_start < 0) + return -1; // cannot build consensus from unmapped data + + // initial allocation + if (extend_ref(&ref, &hist, + c->bams[r1 + s->hdr->num_records-1]->core.pos + + c->bams[r1 + s->hdr->num_records-1]->core.l_qseq, + ref_start, &ref_end) < 0) + return -1; + + // Add each bam file to the reference/consensus arrays + int r2; + hts_pos_t last_pos = -1; + for (r2 = 0; r1 < c->curr_c_rec && r2 < s->hdr->num_records; r1++, r2++) { + if (c->bams[r1]->core.pos < last_pos) { + hts_log_error("Cannot build reference with unsorted data"); + goto err; + } + last_pos = c->bams[r1]->core.pos; + if (cram_add_to_ref(c->bams[r1], &ref, &hist, ref_start, &ref_end) < 0) + goto err; + } + + // Compute the consensus + hts_pos_t i; + for (i = 0; i < ref_end-ref_start; i++) { + if (!ref[i]) { + int max_v = 0, max_j = 4, j; + for (j = 0; j < 4; j++) + // don't call N (j==4) unless no coverage + if (max_v < hist[i][j]) + max_v = hist[i][j], max_j = j; + ref[i] = "ACGTN"[max_j]; + } + } + free(hist); + + // Put the reference in place so it appears to be an external + // ref file. + c->ref = ref; + c->ref_start = ref_start+1; + c->ref_end = ref_end+1; + c->ref_free = 1; + + return 0; + + err: + free(ref); + free(hist); + return -1; +} + +// Check if the SQ M5 tag matches the reference we've loaded. +static int validate_md5(cram_fd *fd, int ref_id) { + if (fd->ignore_md5 || ref_id < 0 || ref_id >= fd->refs->nref) + return 0; + + // Have we already checked this ref? + if (fd->refs->ref_id[ref_id]->validated_md5) + return 0; + + // Check if we have the MD5 known. + // We should, but maybe we're using embedded references? + sam_hrecs_t *hrecs = fd->header->hrecs; + sam_hrec_type_t *ty = sam_hrecs_find_type_id(hrecs, "SQ", "SN", + hrecs->ref[ref_id].name); + if (!ty) + return 0; + + sam_hrec_tag_t *m5tag = sam_hrecs_find_key(ty, "M5", NULL); + if (!m5tag) + return 0; + + // It's known, so compute md5 on the loaded reference sequence. + char *ref = fd->refs->ref_id[ref_id]->seq; + int64_t len = fd->refs->ref_id[ref_id]->length; + hts_md5_context *md5; + char unsigned buf[16]; + char buf2[33]; + + if (!(md5 = hts_md5_init())) + return -1; + hts_md5_update(md5, ref, len); + hts_md5_final(buf, md5); + hts_md5_destroy(md5); + hts_md5_hex(buf2, buf); + + // Compare it to header @SQ M5 tag + if (strcmp(m5tag->str+3, buf2)) { + hts_log_error("SQ header M5 tag discrepancy for reference '%s'", + hrecs->ref[ref_id].name); + hts_log_error("Please use the correct reference, or " + "consider using embed_ref=2"); + return -1; + } + fd->refs->ref_id[ref_id]->validated_md5 = 1; + + return 0; +} + +/* + * Encodes all slices in a container into blocks. + * Returns 0 on success + * -1 on failure + */ +int cram_encode_container(cram_fd *fd, cram_container *c) { + int i, j, slice_offset; + cram_block_compression_hdr *h = c->comp_hdr; + cram_block *c_hdr; + int multi_ref = 0; + int r1, r2, sn, nref, embed_ref, no_ref; + spare_bams *spares; + + if (!c->bams) + goto err; + + if (CRAM_MAJOR_VERS(fd->version) == 1) + goto err; + +//#define goto_err {fprintf(stderr, "ERR at %s:%d\n", __FILE__, __LINE__);goto err;} +#define goto_err goto err + + // Don't try embed ref if we repeatedly fail + pthread_mutex_lock(&fd->ref_lock); + int failed_embed = (fd->no_ref_counter >= 5); // maximum 5 tries + if (!failed_embed && c->embed_ref == -2) { + hts_log_warning("Retrying embed_ref=2 mode for #%d/5", fd->no_ref_counter); + fd->no_ref = c->no_ref = 0; + fd->embed_ref = c->embed_ref = 2; + } else if (failed_embed && c->embed_ref == -2) { + // We've tried several times, so this time give up for good + hts_log_warning("Keeping non-ref mode from now on"); + fd->embed_ref = c->embed_ref = 0; + } + pthread_mutex_unlock(&fd->ref_lock); + + restart: + /* Cache references up-front if we have unsorted access patterns */ + pthread_mutex_lock(&fd->ref_lock); + nref = fd->refs->nref; + pthread_mutex_unlock(&fd->ref_lock); + embed_ref = c->embed_ref; + no_ref = c->no_ref; + + /* To create M5 strings */ + /* Fetch reference sequence */ + if (!no_ref) { + if (!c->bams || !c->curr_c_rec || !c->bams[0]) + goto_err; + bam_seq_t *b = c->bams[0]; + + if (embed_ref <= 1) { + char *ref = cram_get_ref(fd, bam_ref(b), 1, 0); + if (!ref && bam_ref(b) >= 0) { + if (!c->pos_sorted) { + // TODO: maybe also check fd->no_ref? + hts_log_warning("Failed to load reference #%d", + bam_ref(b)); + hts_log_warning("Switching to non-ref mode"); + + pthread_mutex_lock(&fd->ref_lock); + c->embed_ref = fd->embed_ref = 0; + c->no_ref = fd->no_ref = 1; + pthread_mutex_unlock(&fd->ref_lock); + goto restart; + } + + if (c->multi_seq || embed_ref == 0) { + hts_log_error("Failed to load reference #%d", bam_ref(b)); + return -1; + } + hts_log_warning("Failed to load reference #%d", bam_ref(b)); + hts_log_warning("Enabling embed_ref=2 mode to auto-generate" + " reference"); + if (embed_ref <= 0) + hts_log_warning("NOTE: the CRAM file will be bigger than" + " using an external reference"); + pthread_mutex_lock(&fd->ref_lock); + embed_ref = c->embed_ref = fd->embed_ref = 2; + pthread_mutex_unlock(&fd->ref_lock); + goto auto_ref; + } else if (ref) { + if (validate_md5(fd, c->ref_seq_id) < 0) + goto_err; + } + if ((c->ref_id = bam_ref(b)) >= 0) { + c->ref_seq_id = c->ref_id; + c->ref = fd->refs->ref_id[c->ref_seq_id]->seq; + c->ref_start = 1; + c->ref_end = fd->refs->ref_id[c->ref_seq_id]->length; + } + } else { + auto_ref: + // Auto-embed ref. + // This starts as 'N' and is amended on-the-fly as we go + // based on MD:Z tags. + if ((c->ref_id = bam_ref(b)) >= 0) { + c->ref = NULL; + // c->ref_free is boolean; whether to free c->ref. In this + // case c->ref will be our auto-embedded sequence instead of + // a "global" portion of reference from fd->refs. + // Do not confuse with fd->ref_free which is a pointer to a + // reference string to free. + c->ref_free = 1; + } + } + c->ref_seq_id = c->ref_id; + } else { + c->ref_id = bam_ref(c->bams[0]); + cram_ref_incr(fd->refs, c->ref_id); + c->ref_seq_id = c->ref_id; + } + + if (!no_ref && c->refs_used) { + for (i = 0; i < nref; i++) { + if (c->refs_used[i]) { + if (cram_get_ref(fd, i, 1, 0)) { + if (validate_md5(fd, i) < 0) + goto_err; + } else { + hts_log_warning("Failed to find reference, " + "switching to non-ref mode"); + no_ref = c->no_ref = 1; + } + } + } + } + + /* Turn bams into cram_records and gather basic stats */ + for (r1 = sn = 0; r1 < c->curr_c_rec; sn++) { + cram_slice *s = c->slices[sn]; + int64_t first_base = INT64_MAX, last_base = INT64_MIN; + + int r1_start = r1; + + assert(sn < c->curr_slice); + + // Discover which read names *may* be safely removed. + // Ie which ones have all their records in this slice. + if (lossy_read_names(fd, c, s, r1_start) != 0) + return -1; + + // Tracking of MD tags so we can spot when the auto-generated values + // will differ from the current stored ones. The kstring here is + // simply to avoid excessive malloc and free calls. All initialisation + // is done within process_one_read(). + kstring_t MD = {0}; + + // Embed consensus / MD-generated ref + if (embed_ref == 2) { + if (cram_generate_reference(c, s, r1) < 0) { + // Should this be a permanent thing via fd->no_ref? + // Doing so means we cannot easily switch back again should + // things fix themselves later on. This is likely not a + // concern though as failure to generate a reference implies + // unsorted data which is rarely recovered from. + + // Only if sn == 0. We're hosed if we're on the 2nd slice and + // the first worked, as no-ref is a container global param. + if (sn > 0) { + hts_log_error("Failed to build reference, " + "switching to non-ref mode"); + return -1; + } else { + hts_log_warning("Failed to build reference, " + "switching to non-ref mode"); + } + pthread_mutex_lock(&fd->ref_lock); + c->embed_ref = fd->embed_ref = -2; // was previously embed_ref + c->no_ref = fd->no_ref = 1; + fd->no_ref_counter++; // more likely to keep permanent action + pthread_mutex_unlock(&fd->ref_lock); + failed_embed = 1; + goto restart; + } else { + pthread_mutex_lock(&fd->ref_lock); + fd->no_ref_counter -= (fd->no_ref_counter > 0); + pthread_mutex_unlock(&fd->ref_lock); + } + } + + // Iterate through records creating the cram blocks for some + // fields and just gathering stats for others. + for (r2 = 0; r1 < c->curr_c_rec && r2 < s->hdr->num_records; r1++, r2++) { + cram_record *cr = &s->crecs[r2]; + bam_seq_t *b = c->bams[r1]; + + /* If multi-ref we need to cope with changing reference per seq */ + if (c->multi_seq && !no_ref) { + if (bam_ref(b) != c->ref_seq_id && bam_ref(b) >= 0) { + if (c->ref_seq_id >= 0) + cram_ref_decr(fd->refs, c->ref_seq_id); + + if (!cram_get_ref(fd, bam_ref(b), 1, 0)) { + hts_log_error("Failed to load reference #%d", bam_ref(b)); + free(MD.s); + return -1; + } + if (validate_md5(fd, bam_ref(b)) < 0) + return -1; + + c->ref_seq_id = bam_ref(b); // overwritten later by -2 + if (!fd->refs->ref_id[c->ref_seq_id]->seq) + return -1; + c->ref = fd->refs->ref_id[c->ref_seq_id]->seq; + c->ref_start = 1; + c->ref_end = fd->refs->ref_id[c->ref_seq_id]->length; + } + } + + if (process_one_read(fd, c, s, cr, b, r2, &MD, embed_ref, + no_ref) != 0) { + free(MD.s); + return -1; + } + + if (first_base > cr->apos) + first_base = cr->apos; + + if (last_base < cr->aend) + last_base = cr->aend; + } + + free(MD.s); + + // Process_one_read doesn't add read names as it can change + // its mind during the loop on the CRAM_FLAG_DETACHED setting + // of earlier records (if it detects the auto-generation of + // TLEN is incorrect). This affects which read-names can be + // lossily compressed, so we do these in another pass. + if (add_read_names(fd, c, s, r1_start) < 0) + return -1; + + if (c->multi_seq) { + s->hdr->ref_seq_id = -2; + s->hdr->ref_seq_start = 0; + s->hdr->ref_seq_span = 0; + } else if (c->ref_id == -1 && CRAM_ge31(fd->version)) { + // Spec states span=0, but it broke our range queries. + // See commit message for this and prior. + s->hdr->ref_seq_id = -1; + s->hdr->ref_seq_start = 0; + s->hdr->ref_seq_span = 0; + } else { + s->hdr->ref_seq_id = c->ref_id; + s->hdr->ref_seq_start = first_base; + s->hdr->ref_seq_span = MAX(0, last_base - first_base + 1); + } + s->hdr->num_records = r2; + + // Processed a slice, now stash the aux blocks so the next + // slice can start aggregating them from the start again. + if (c->tags_used->n_occupied) { + int ntags = c->tags_used->n_occupied; + s->aux_block = calloc(ntags*2, sizeof(*s->aux_block)); + if (!s->aux_block) + return -1; + + khint_t k; + + s->naux_block = 0; + for (k = kh_begin(c->tags_used); k != kh_end(c->tags_used); k++) { + if (!kh_exist(c->tags_used, k)) + continue; + + cram_tag_map *tm = kh_val(c->tags_used, k); + if (!tm) goto_err; + if (!tm->blk) continue; + s->aux_block[s->naux_block++] = tm->blk; + tm->blk = NULL; + if (!tm->blk2) continue; + s->aux_block[s->naux_block++] = tm->blk2; + tm->blk2 = NULL; + } + assert(s->naux_block <= 2*c->tags_used->n_occupied); + } + } + + if (c->multi_seq && !no_ref) { + if (c->ref_seq_id >= 0) + cram_ref_decr(fd->refs, c->ref_seq_id); + } + + /* Link our bams[] array onto the spare bam list for reuse */ + spares = malloc(sizeof(*spares)); + if (!spares) goto_err; + pthread_mutex_lock(&fd->bam_list_lock); + spares->bams = c->bams; + spares->next = fd->bl; + fd->bl = spares; + pthread_mutex_unlock(&fd->bam_list_lock); + c->bams = NULL; + + /* Detect if a multi-seq container */ + cram_stats_encoding(fd, c->stats[DS_RI]); + multi_ref = c->stats[DS_RI]->nvals > 1; + pthread_mutex_lock(&fd->metrics_lock); + fd->last_RI_count = c->stats[DS_RI]->nvals; + pthread_mutex_unlock(&fd->metrics_lock); + + + if (multi_ref) { + hts_log_info("Multi-ref container"); + c->ref_seq_id = -2; + c->ref_seq_start = 0; + c->ref_seq_span = 0; + } + + + /* Compute MD5s */ + no_ref = c->no_ref; + int is_v4 = CRAM_MAJOR_VERS(fd->version) >= 4 ? 1 : 0; + + for (i = 0; i < c->curr_slice; i++) { + cram_slice *s = c->slices[i]; + + if (CRAM_MAJOR_VERS(fd->version) != 1) { + if (s->hdr->ref_seq_id >= 0 && c->multi_seq == 0 && !no_ref) { + hts_md5_context *md5 = hts_md5_init(); + if (!md5) + return -1; + hts_md5_update(md5, + c->ref + s->hdr->ref_seq_start - c->ref_start, + s->hdr->ref_seq_span); + hts_md5_final(s->hdr->md5, md5); + hts_md5_destroy(md5); + } else { + memset(s->hdr->md5, 0, 16); + } + } + } + + c->num_records = 0; + c->num_blocks = 1; // cram_block_compression_hdr + c->length = 0; + + //fprintf(stderr, "=== BF ===\n"); + h->codecs[DS_BF] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_BF]), + c->stats[DS_BF], E_INT, NULL, + fd->version, &fd->vv); + if (c->stats[DS_BF]->nvals && !h->codecs[DS_BF]) goto_err; + + //fprintf(stderr, "=== CF ===\n"); + h->codecs[DS_CF] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_CF]), + c->stats[DS_CF], E_INT, NULL, + fd->version, &fd->vv); + if (c->stats[DS_CF]->nvals && !h->codecs[DS_CF]) goto_err; + + //fprintf(stderr, "=== RN ===\n"); + //h->codecs[DS_RN] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_RN]), + // c->stats[DS_RN], E_BYTE_ARRAY, NULL, + // fd->version); + + //fprintf(stderr, "=== AP ===\n"); + if (c->pos_sorted || CRAM_MAJOR_VERS(fd->version) >= 4) { + if (c->pos_sorted) + h->codecs[DS_AP] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_AP]), + c->stats[DS_AP], + is_v4 ? E_LONG : E_INT, + NULL, fd->version, &fd->vv); + else + // Unsorted data has no stats, but hard-code VARINT_SIGNED / EXT. + h->codecs[DS_AP] = cram_encoder_init(is_v4 ? E_VARINT_SIGNED + : E_EXTERNAL, + NULL, + is_v4 ? E_LONG : E_INT, + NULL, fd->version, &fd->vv); + } else { + // Removed BETA in v4.0. + // Should we consider dropping use of it for 3.0 too? + hts_pos_t p[2] = {0, c->max_apos}; + h->codecs[DS_AP] = cram_encoder_init(E_BETA, NULL, + is_v4 ? E_LONG : E_INT, + p, fd->version, &fd->vv); +// cram_xdelta_encoder e; +// e.word_size = is_v4 ? 8 : 4; +// e.sub_encoding = E_EXTERNAL; +// e.sub_codec_dat = (void *)DS_AP; +// +// h->codecs[DS_AP] = cram_encoder_init(E_XDELTA, NULL, +// is_v4 ? E_LONG : E_INT, +// &e, fd->version, &fd->vv); + } + if (!h->codecs[DS_AP]) goto_err; + + //fprintf(stderr, "=== RG ===\n"); + h->codecs[DS_RG] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_RG]), + c->stats[DS_RG], + E_INT, + NULL, + fd->version, &fd->vv); + if (c->stats[DS_RG]->nvals && !h->codecs[DS_RG]) goto_err; + + //fprintf(stderr, "=== MQ ===\n"); + h->codecs[DS_MQ] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_MQ]), + c->stats[DS_MQ], E_INT, NULL, + fd->version, &fd->vv); + if (c->stats[DS_MQ]->nvals && !h->codecs[DS_MQ]) goto_err; + + //fprintf(stderr, "=== NS ===\n"); + h->codecs[DS_NS] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_NS]), + c->stats[DS_NS], E_INT, NULL, + fd->version, &fd->vv); + if (c->stats[DS_NS]->nvals && !h->codecs[DS_NS]) goto_err; + + //fprintf(stderr, "=== MF ===\n"); + h->codecs[DS_MF] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_MF]), + c->stats[DS_MF], E_INT, NULL, + fd->version, &fd->vv); + if (c->stats[DS_MF]->nvals && !h->codecs[DS_MF]) goto_err; + + //fprintf(stderr, "=== TS ===\n"); + h->codecs[DS_TS] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_TS]), + c->stats[DS_TS], + is_v4 ? E_LONG : E_INT, + NULL, fd->version, &fd->vv); + if (c->stats[DS_TS]->nvals && !h->codecs[DS_TS]) goto_err; + + //fprintf(stderr, "=== NP ===\n"); + h->codecs[DS_NP] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_NP]), + c->stats[DS_NP], + is_v4 ? E_LONG : E_INT, + NULL, fd->version, &fd->vv); + if (c->stats[DS_NP]->nvals && !h->codecs[DS_NP]) goto_err; + + //fprintf(stderr, "=== NF ===\n"); + h->codecs[DS_NF] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_NF]), + c->stats[DS_NF], E_INT, NULL, + fd->version, &fd->vv); + if (c->stats[DS_NF]->nvals && !h->codecs[DS_NF]) goto_err; + + //fprintf(stderr, "=== RL ===\n"); + h->codecs[DS_RL] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_RL]), + c->stats[DS_RL], E_INT, NULL, + fd->version, &fd->vv); + if (c->stats[DS_RL]->nvals && !h->codecs[DS_RL]) goto_err; + + //fprintf(stderr, "=== FN ===\n"); + h->codecs[DS_FN] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_FN]), + c->stats[DS_FN], E_INT, NULL, + fd->version, &fd->vv); + if (c->stats[DS_FN]->nvals && !h->codecs[DS_FN]) goto_err; + + //fprintf(stderr, "=== FC ===\n"); + h->codecs[DS_FC] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_FC]), + c->stats[DS_FC], E_BYTE, NULL, + fd->version, &fd->vv); + if (c->stats[DS_FC]->nvals && !h->codecs[DS_FC]) goto_err; + + //fprintf(stderr, "=== FP ===\n"); + h->codecs[DS_FP] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_FP]), + c->stats[DS_FP], E_INT, NULL, + fd->version, &fd->vv); + if (c->stats[DS_FP]->nvals && !h->codecs[DS_FP]) goto_err; + + //fprintf(stderr, "=== DL ===\n"); + h->codecs[DS_DL] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_DL]), + c->stats[DS_DL], E_INT, NULL, + fd->version, &fd->vv); + if (c->stats[DS_DL]->nvals && !h->codecs[DS_DL]) goto_err; + + //fprintf(stderr, "=== BA ===\n"); + h->codecs[DS_BA] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_BA]), + c->stats[DS_BA], E_BYTE, NULL, + fd->version, &fd->vv); + if (c->stats[DS_BA]->nvals && !h->codecs[DS_BA]) goto_err; + + if (CRAM_MAJOR_VERS(fd->version) >= 3) { + cram_byte_array_len_encoder e; + + e.len_encoding = CRAM_MAJOR_VERS(fd->version) >= 4 + ? E_VARINT_UNSIGNED + : E_EXTERNAL; + e.len_dat = (void *)DS_BB_len; + //e.len_dat = (void *)DS_BB; + + e.val_encoding = E_EXTERNAL; + e.val_dat = (void *)DS_BB; + + h->codecs[DS_BB] = cram_encoder_init(E_BYTE_ARRAY_LEN, NULL, + E_BYTE_ARRAY, (void *)&e, + fd->version, &fd->vv); + if (!h->codecs[DS_BB]) goto_err; + } else { + h->codecs[DS_BB] = NULL; + } + + //fprintf(stderr, "=== BS ===\n"); + h->codecs[DS_BS] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_BS]), + c->stats[DS_BS], E_BYTE, NULL, + fd->version, &fd->vv); + if (c->stats[DS_BS]->nvals && !h->codecs[DS_BS]) goto_err; + + if (CRAM_MAJOR_VERS(fd->version) == 1) { + h->codecs[DS_TL] = NULL; + h->codecs[DS_RI] = NULL; + h->codecs[DS_RS] = NULL; + h->codecs[DS_PD] = NULL; + h->codecs[DS_HC] = NULL; + h->codecs[DS_SC] = NULL; + + //fprintf(stderr, "=== TC ===\n"); + h->codecs[DS_TC] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_TC]), + c->stats[DS_TC], E_BYTE, NULL, + fd->version, &fd->vv); + if (c->stats[DS_TC]->nvals && !h->codecs[DS_TC]) goto_err; + + //fprintf(stderr, "=== TN ===\n"); + h->codecs[DS_TN] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_TN]), + c->stats[DS_TN], E_INT, NULL, + fd->version, &fd->vv); + if (c->stats[DS_TN]->nvals && !h->codecs[DS_TN]) goto_err; + } else { + h->codecs[DS_TC] = NULL; + h->codecs[DS_TN] = NULL; + + //fprintf(stderr, "=== TL ===\n"); + h->codecs[DS_TL] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_TL]), + c->stats[DS_TL], E_INT, NULL, + fd->version, &fd->vv); + if (c->stats[DS_TL]->nvals && !h->codecs[DS_TL]) goto_err; + + + //fprintf(stderr, "=== RI ===\n"); + h->codecs[DS_RI] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_RI]), + c->stats[DS_RI], E_INT, NULL, + fd->version, &fd->vv); + if (c->stats[DS_RI]->nvals && !h->codecs[DS_RI]) goto_err; + + //fprintf(stderr, "=== RS ===\n"); + h->codecs[DS_RS] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_RS]), + c->stats[DS_RS], E_INT, NULL, + fd->version, &fd->vv); + if (c->stats[DS_RS]->nvals && !h->codecs[DS_RS]) goto_err; + + //fprintf(stderr, "=== PD ===\n"); + h->codecs[DS_PD] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_PD]), + c->stats[DS_PD], E_INT, NULL, + fd->version, &fd->vv); + if (c->stats[DS_PD]->nvals && !h->codecs[DS_PD]) goto_err; + + //fprintf(stderr, "=== HC ===\n"); + h->codecs[DS_HC] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_HC]), + c->stats[DS_HC], E_INT, NULL, + fd->version, &fd->vv); + if (c->stats[DS_HC]->nvals && !h->codecs[DS_HC]) goto_err; + + //fprintf(stderr, "=== SC ===\n"); + if (1) { + int i2[2] = {0, DS_SC}; + + h->codecs[DS_SC] = cram_encoder_init(E_BYTE_ARRAY_STOP, NULL, + E_BYTE_ARRAY, (void *)i2, + fd->version, &fd->vv); + } else { + // Appears to be no practical benefit to using this method, + // but it may work better if we start mixing SC, IN and BB + // elements into the same external block. + cram_byte_array_len_encoder e; + + e.len_encoding = CRAM_MAJOR_VERS(fd->version) >= 4 + ? E_VARINT_UNSIGNED + : E_EXTERNAL; + e.len_dat = (void *)DS_SC_len; + + e.val_encoding = E_EXTERNAL; + e.val_dat = (void *)DS_SC; + + h->codecs[DS_SC] = cram_encoder_init(E_BYTE_ARRAY_LEN, NULL, + E_BYTE_ARRAY, (void *)&e, + fd->version, &fd->vv); + } + if (!h->codecs[DS_SC]) goto_err; + } + + //fprintf(stderr, "=== IN ===\n"); + { + int i2[2] = {0, DS_IN}; + h->codecs[DS_IN] = cram_encoder_init(E_BYTE_ARRAY_STOP, NULL, + E_BYTE_ARRAY, (void *)i2, + fd->version, &fd->vv); + if (!h->codecs[DS_IN]) goto_err; + } + + h->codecs[DS_QS] = cram_encoder_init(E_EXTERNAL, NULL, E_BYTE, + (void *)DS_QS, + fd->version, &fd->vv); + if (!h->codecs[DS_QS]) goto_err; + { + int i2[2] = {0, DS_RN}; + h->codecs[DS_RN] = cram_encoder_init(E_BYTE_ARRAY_STOP, NULL, + E_BYTE_ARRAY, (void *)i2, + fd->version, &fd->vv); + if (!h->codecs[DS_RN]) goto_err; + } + + + /* Encode slices */ + for (i = 0; i < c->curr_slice; i++) { + hts_log_info("Encode slice %d", i); + + int local_embed_ref = + embed_ref>0 && c->slices[i]->hdr->ref_seq_id != -1 ? 1 : 0; + if (cram_encode_slice(fd, c, h, c->slices[i], local_embed_ref) != 0) + return -1; + } + + /* Create compression header */ + { + h->ref_seq_id = c->ref_seq_id; + h->ref_seq_start = c->ref_seq_start; + h->ref_seq_span = c->ref_seq_span; + h->num_records = c->num_records; + h->qs_seq_orient = c->qs_seq_orient; + // slight misnomer - sorted or treat as-if sorted (ap_delta force to 1) + h->AP_delta = c->pos_sorted; + memcpy(h->substitution_matrix, CRAM_SUBST_MATRIX, 20); + + if (!(c_hdr = cram_encode_compression_header(fd, c, h, embed_ref))) + return -1; + } + + /* Compute landmarks */ + /* Fill out slice landmarks */ + c->num_landmarks = c->curr_slice; + c->landmark = malloc(c->num_landmarks * sizeof(*c->landmark)); + if (!c->landmark) + return -1; + + /* + * Slice offset starts after the first block, so we need to simulate + * writing it to work out the correct offset + */ + { + slice_offset = c_hdr->method == RAW + ? c_hdr->uncomp_size + : c_hdr->comp_size; + slice_offset += 2 + 4*(CRAM_MAJOR_VERS(fd->version) >= 3) + + fd->vv.varint_size(c_hdr->content_id) + + fd->vv.varint_size(c_hdr->comp_size) + + fd->vv.varint_size(c_hdr->uncomp_size); + } + + c->ref_seq_id = c->slices[0]->hdr->ref_seq_id; + if (c->ref_seq_id == -1 && CRAM_ge31(fd->version)) { + // Spec states span=0, but it broke our range queries. + // See commit message for this and prior. + c->ref_seq_start = 0; + c->ref_seq_span = 0; + } else { + c->ref_seq_start = c->slices[0]->hdr->ref_seq_start; + c->ref_seq_span = c->slices[0]->hdr->ref_seq_span; + } + for (i = 0; i < c->curr_slice; i++) { + cram_slice *s = c->slices[i]; + + c->num_blocks += s->hdr->num_blocks + 1; // slice header + c->landmark[i] = slice_offset; + + if (s->hdr->ref_seq_start + s->hdr->ref_seq_span > + c->ref_seq_start + c->ref_seq_span) { + c->ref_seq_span = s->hdr->ref_seq_start + s->hdr->ref_seq_span + - c->ref_seq_start; + } + + slice_offset += s->hdr_block->method == RAW + ? s->hdr_block->uncomp_size + : s->hdr_block->comp_size; + + slice_offset += 2 + 4*(CRAM_MAJOR_VERS(fd->version) >= 3) + + fd->vv.varint_size(s->hdr_block->content_id) + + fd->vv.varint_size(s->hdr_block->comp_size) + + fd->vv.varint_size(s->hdr_block->uncomp_size); + + for (j = 0; j < s->hdr->num_blocks; j++) { + slice_offset += 2 + 4*(CRAM_MAJOR_VERS(fd->version) >= 3) + + fd->vv.varint_size(s->block[j]->content_id) + + fd->vv.varint_size(s->block[j]->comp_size) + + fd->vv.varint_size(s->block[j]->uncomp_size); + + slice_offset += s->block[j]->method == RAW + ? s->block[j]->uncomp_size + : s->block[j]->comp_size; + } + } + c->length += slice_offset; // just past the final slice + + c->comp_hdr_block = c_hdr; + + if (c->ref_seq_id >= 0) { + if (c->ref_free) { + free(c->ref); + c->ref = NULL; + } else { + cram_ref_decr(fd->refs, c->ref_seq_id); + } + } + + /* Cache references up-front if we have unsorted access patterns */ + if (!no_ref && c->refs_used) { + for (i = 0; i < fd->refs->nref; i++) { + if (c->refs_used[i]) + cram_ref_decr(fd->refs, i); + } + } + + return 0; + + err: + return -1; +} + + +/* + * Adds a feature code to a read within a slice. For purposes of minimising + * memory allocations and fragmentation we have one array of features for all + * reads within the slice. We return the index into this array for this new + * feature. + * + * Returns feature index on success + * -1 on failure. + */ +static int cram_add_feature(cram_container *c, cram_slice *s, + cram_record *r, cram_feature *f) { + if (s->nfeatures >= s->afeatures) { + s->afeatures = s->afeatures ? s->afeatures*2 : 1024; + s->features = realloc(s->features, s->afeatures*sizeof(*s->features)); + if (!s->features) + return -1; + } + + if (!r->nfeature++) { + r->feature = s->nfeatures; + if (cram_stats_add(c->stats[DS_FP], f->X.pos) < 0) + return -1; + } else { + if (cram_stats_add(c->stats[DS_FP], + f->X.pos - s->features[r->feature + r->nfeature-2].X.pos) < 0) + return -1; + + } + if (cram_stats_add(c->stats[DS_FC], f->X.code) < 0) + return -1; + + s->features[s->nfeatures++] = *f; + + return 0; +} + +static int cram_add_substitution(cram_fd *fd, cram_container *c, + cram_slice *s, cram_record *r, + int pos, char base, char qual, char ref) { + cram_feature f; + + // seq=ACGTN vs ref=ACGT or seq=ACGT vs ref=ACGTN + if (fd->L2[(uc)base]<4 || (fd->L2[(uc)base]<5 && fd->L2[(uc)ref]<4)) { + f.X.pos = pos+1; + f.X.code = 'X'; + f.X.base = fd->cram_sub_matrix[ref&0x1f][base&0x1f]; + if (cram_stats_add(c->stats[DS_BS], f.X.base) < 0) + return -1; + } else { + f.B.pos = pos+1; + f.B.code = 'B'; + f.B.base = base; + f.B.qual = qual; + if (cram_stats_add(c->stats[DS_BA], f.B.base) < 0) return -1; + if (cram_stats_add(c->stats[DS_QS], f.B.qual) < 0) return -1; + BLOCK_APPEND_CHAR(s->qual_blk, qual); + } + return cram_add_feature(c, s, r, &f); + + block_err: + return -1; +} + +static int cram_add_bases(cram_fd *fd, cram_container *c, + cram_slice *s, cram_record *r, + int pos, int len, char *base) { + cram_feature f; + + f.b.pos = pos+1; + f.b.code = 'b'; + f.b.seq_idx = base - (char *)BLOCK_DATA(s->seqs_blk); + f.b.len = len; + + return cram_add_feature(c, s, r, &f); +} + +static int cram_add_base(cram_fd *fd, cram_container *c, + cram_slice *s, cram_record *r, + int pos, char base, char qual) { + cram_feature f; + f.B.pos = pos+1; + f.B.code = 'B'; + f.B.base = base; + f.B.qual = qual; + if (cram_stats_add(c->stats[DS_BA], base) < 0) return -1; + if (cram_stats_add(c->stats[DS_QS], qual) < 0) return -1; + BLOCK_APPEND_CHAR(s->qual_blk, qual); + return cram_add_feature(c, s, r, &f); + + block_err: + return -1; +} + +static int cram_add_quality(cram_fd *fd, cram_container *c, + cram_slice *s, cram_record *r, + int pos, char qual) { + cram_feature f; + f.Q.pos = pos+1; + f.Q.code = 'Q'; + f.Q.qual = qual; + if (cram_stats_add(c->stats[DS_QS], qual) < 0) return -1; + BLOCK_APPEND_CHAR(s->qual_blk, qual); + return cram_add_feature(c, s, r, &f); + + block_err: + return -1; +} + +static int cram_add_deletion(cram_container *c, cram_slice *s, cram_record *r, + int pos, int len, char *base) { + cram_feature f; + f.D.pos = pos+1; + f.D.code = 'D'; + f.D.len = len; + if (cram_stats_add(c->stats[DS_DL], len) < 0) return -1; + return cram_add_feature(c, s, r, &f); +} + +static int cram_add_softclip(cram_container *c, cram_slice *s, cram_record *r, + int pos, int len, char *base, int version) { + cram_feature f; + f.S.pos = pos+1; + f.S.code = 'S'; + f.S.len = len; + switch (CRAM_MAJOR_VERS(version)) { + case 1: + f.S.seq_idx = BLOCK_SIZE(s->base_blk); + BLOCK_APPEND(s->base_blk, base, len); + BLOCK_APPEND_CHAR(s->base_blk, '\0'); + break; + + case 2: + default: + f.S.seq_idx = BLOCK_SIZE(s->soft_blk); + if (base) { + BLOCK_APPEND(s->soft_blk, base, len); + } else { + int i; + for (i = 0; i < len; i++) + BLOCK_APPEND_CHAR(s->soft_blk, 'N'); + } + BLOCK_APPEND_CHAR(s->soft_blk, '\0'); + break; + + //default: + // // v3.0 onwards uses BB data-series + // f.S.seq_idx = BLOCK_SIZE(s->soft_blk); + } + return cram_add_feature(c, s, r, &f); + + block_err: + return -1; +} + +static int cram_add_hardclip(cram_container *c, cram_slice *s, cram_record *r, + int pos, int len, char *base) { + cram_feature f; + f.S.pos = pos+1; + f.S.code = 'H'; + f.S.len = len; + if (cram_stats_add(c->stats[DS_HC], len) < 0) return -1; + return cram_add_feature(c, s, r, &f); +} + +static int cram_add_skip(cram_container *c, cram_slice *s, cram_record *r, + int pos, int len, char *base) { + cram_feature f; + f.S.pos = pos+1; + f.S.code = 'N'; + f.S.len = len; + if (cram_stats_add(c->stats[DS_RS], len) < 0) return -1; + return cram_add_feature(c, s, r, &f); +} + +static int cram_add_pad(cram_container *c, cram_slice *s, cram_record *r, + int pos, int len, char *base) { + cram_feature f; + f.S.pos = pos+1; + f.S.code = 'P'; + f.S.len = len; + if (cram_stats_add(c->stats[DS_PD], len) < 0) return -1; + return cram_add_feature(c, s, r, &f); +} + +static int cram_add_insertion(cram_container *c, cram_slice *s, cram_record *r, + int pos, int len, char *base) { + cram_feature f; + f.I.pos = pos+1; + if (len == 1) { + char b = base ? *base : 'N'; + f.i.code = 'i'; + f.i.base = b; + if (cram_stats_add(c->stats[DS_BA], b) < 0) return -1; + } else { + f.I.code = 'I'; + f.I.len = len; + f.S.seq_idx = BLOCK_SIZE(s->base_blk); + if (base) { + BLOCK_APPEND(s->base_blk, base, len); + } else { + int i; + for (i = 0; i < len; i++) + BLOCK_APPEND_CHAR(s->base_blk, 'N'); + } + BLOCK_APPEND_CHAR(s->base_blk, '\0'); + } + return cram_add_feature(c, s, r, &f); + + block_err: + return -1; +} + +/* + * Encodes auxiliary data. Largely duplicated from above, but done so to + * keep it simple and avoid a myriad of version ifs. + * + * Returns the RG header line pointed to by the BAM aux fields on success, + * NULL on failure or no rg present, also sets "*err" to non-zero + */ +static sam_hrec_rg_t *cram_encode_aux(cram_fd *fd, bam_seq_t *b, + cram_container *c, + cram_slice *s, cram_record *cr, + int verbatim_NM, int verbatim_MD, + int NM, kstring_t *MD, int cf_tag, + int no_ref, int *err) { + char *aux, *orig; + sam_hrec_rg_t *brg = NULL; + int aux_size = bam_get_l_aux(b); + const char *aux_end = bam_data_end(b); + cram_block *td_b = c->comp_hdr->TD_blk; + int TD_blk_size = BLOCK_SIZE(td_b), new; + char *key; + khint_t k; + + if (err) *err = 1; + + orig = aux = (char *)bam_aux(b); + + + // cF:i => Extra CRAM bit flags. + // 1: Don't auto-decode MD (may be invalid) + // 2: Don't auto-decode NM (may be invalid) + if (cf_tag && CRAM_MAJOR_VERS(fd->version) < 4) { + // Temporary copy of aux so we can ammend it. + aux = malloc(aux_size+4); + if (!aux) + return NULL; + + memcpy(aux, orig, aux_size); + aux[aux_size++] = 'c'; + aux[aux_size++] = 'F'; + aux[aux_size++] = 'C'; + aux[aux_size++] = cf_tag; + orig = aux; + aux_end = aux + aux_size; + } + + // Copy aux keys to td_b and aux values to slice aux blocks + while (aux_end - aux >= 1 && aux[0] != 0) { + int r; + + // Room for code + type + at least 1 byte of data + if (aux - orig >= aux_size - 3) + goto err; + + // RG:Z + if (aux[0] == 'R' && aux[1] == 'G' && aux[2] == 'Z') { + char *rg = &aux[3]; + aux = rg; + while (aux < aux_end && *aux++); + if (aux == aux_end && aux[-1] != '\0') { + hts_log_error("Unterminated RG:Z tag for read \"%s\"", + bam_get_qname(b)); + goto err; + } + brg = sam_hrecs_find_rg(fd->header->hrecs, rg); + if (brg) { + if (CRAM_MAJOR_VERS(fd->version) >= 4) + BLOCK_APPEND(td_b, "RG*", 3); + continue; + } else { + // RG:Z tag will be stored verbatim + hts_log_warning("Missing @RG header for RG \"%s\"", rg); + aux = rg - 3; + } + } + + // MD:Z + if (aux[0] == 'M' && aux[1] == 'D' && aux[2] == 'Z') { + if (cr->len && !no_ref && !(cr->flags & BAM_FUNMAP) && !verbatim_MD) { + if (MD && MD->s && strncasecmp(MD->s, aux+3, orig + aux_size - (aux+3)) == 0) { + while (aux < aux_end && *aux++); + if (aux == aux_end && aux[-1] != '\0') { + hts_log_error("Unterminated MD:Z tag for read \"%s\"", + bam_get_qname(b)); + goto err; + } + if (CRAM_MAJOR_VERS(fd->version) >= 4) + BLOCK_APPEND(td_b, "MD*", 3); + continue; + } + } + } + + // NM:i + if (aux[0] == 'N' && aux[1] == 'M') { + if (cr->len && !no_ref && !(cr->flags & BAM_FUNMAP) && !verbatim_NM) { + int NM_ = bam_aux2i_end((uint8_t *)aux+2, (uint8_t *)aux_end); + if (NM_ == NM) { + switch(aux[2]) { + case 'A': case 'C': case 'c': aux+=4; break; + case 'S': case 's': aux+=5; break; + case 'I': case 'i': case 'f': aux+=7; break; + default: + hts_log_error("Unhandled type code for NM tag"); + goto err; + } + if (CRAM_MAJOR_VERS(fd->version) >= 4) + BLOCK_APPEND(td_b, "NM*", 3); + continue; + } + } + } + + BLOCK_APPEND(td_b, aux, 3); + + // Container level tags_used, for TD series + // Maps integer key ('X0i') to cram_tag_map struct. + int key = (((unsigned char *) aux)[0]<<16 | + ((unsigned char *) aux)[1]<<8 | + ((unsigned char *) aux)[2]); + k = kh_put(m_tagmap, c->tags_used, key, &r); + if (-1 == r) + goto err; + else if (r != 0) + kh_val(c->tags_used, k) = NULL; + + if (r == 1) { + khint_t k_global; + + // Global tags_used for cram_metrics support + pthread_mutex_lock(&fd->metrics_lock); + k_global = kh_put(m_metrics, fd->tags_used, key, &r); + if (-1 == r) { + pthread_mutex_unlock(&fd->metrics_lock); + goto err; + } + if (r >= 1) { + kh_val(fd->tags_used, k_global) = cram_new_metrics(); + if (!kh_val(fd->tags_used, k_global)) { + kh_del(m_metrics, fd->tags_used, k_global); + pthread_mutex_unlock(&fd->metrics_lock); + goto err; + } + } + + pthread_mutex_unlock(&fd->metrics_lock); + + int i2[2] = {'\t',key}; + size_t sk = key; + cram_tag_map *m = calloc(1, sizeof(*m)); + if (!m) + goto_err; + kh_val(c->tags_used, k) = m; + + cram_codec *c; + + // Use a block content id based on the tag id. + // Codec type depends on tag data type. + switch(aux[2]) { + case 'Z': case 'H': + // string as byte_array_stop + c = cram_encoder_init(E_BYTE_ARRAY_STOP, NULL, + E_BYTE_ARRAY, (void *)i2, + fd->version, &fd->vv); + break; + + case 'A': case 'c': case 'C': { + // byte array len, 1 byte + cram_byte_array_len_encoder e; + cram_stats st; + + if (CRAM_MAJOR_VERS(fd->version) <= 3) { + e.len_encoding = E_HUFFMAN; + e.len_dat = NULL; // will get codes from st + } else { + e.len_encoding = E_CONST_INT; + e.len_dat = NULL; // will get codes from st + } + memset(&st, 0, sizeof(st)); + if (cram_stats_add(&st, 1) < 0) goto block_err; + cram_stats_encoding(fd, &st); + + e.val_encoding = E_EXTERNAL; + e.val_dat = (void *)sk; + + c = cram_encoder_init(E_BYTE_ARRAY_LEN, &st, + E_BYTE_ARRAY, (void *)&e, + fd->version, &fd->vv); + break; + } + + case 's': case 'S': { + // byte array len, 2 byte + cram_byte_array_len_encoder e; + cram_stats st; + + if (CRAM_MAJOR_VERS(fd->version) <= 3) { + e.len_encoding = E_HUFFMAN; + e.len_dat = NULL; // will get codes from st + } else { + e.len_encoding = E_CONST_INT; + e.len_dat = NULL; // will get codes from st + } + memset(&st, 0, sizeof(st)); + if (cram_stats_add(&st, 2) < 0) goto block_err; + cram_stats_encoding(fd, &st); + + e.val_encoding = E_EXTERNAL; + e.val_dat = (void *)sk; + + c = cram_encoder_init(E_BYTE_ARRAY_LEN, &st, + E_BYTE_ARRAY, (void *)&e, + fd->version, &fd->vv); + break; + } + case 'i': case 'I': case 'f': { + // byte array len, 4 byte + cram_byte_array_len_encoder e; + cram_stats st; + + if (CRAM_MAJOR_VERS(fd->version) <= 3) { + e.len_encoding = E_HUFFMAN; + e.len_dat = NULL; // will get codes from st + } else { + e.len_encoding = E_CONST_INT; + e.len_dat = NULL; // will get codes from st + } + memset(&st, 0, sizeof(st)); + if (cram_stats_add(&st, 4) < 0) goto block_err; + cram_stats_encoding(fd, &st); + + e.val_encoding = E_EXTERNAL; + e.val_dat = (void *)sk; + + c = cram_encoder_init(E_BYTE_ARRAY_LEN, &st, + E_BYTE_ARRAY, (void *)&e, + fd->version, &fd->vv); + break; + } + + case 'B': { + // Byte array of variable size, but we generate our tag + // byte stream at the wrong stage (during reading and not + // after slice header construction). So we use + // BYTE_ARRAY_LEN with the length codec being external + // too. + cram_byte_array_len_encoder e; + + e.len_encoding = CRAM_MAJOR_VERS(fd->version) >= 4 + ? E_VARINT_UNSIGNED + : E_EXTERNAL; + e.len_dat = (void *)sk; // or key+128 for len? + + e.val_encoding = E_EXTERNAL; + e.val_dat = (void *)sk; + + c = cram_encoder_init(E_BYTE_ARRAY_LEN, NULL, + E_BYTE_ARRAY, (void *)&e, + fd->version, &fd->vv); + break; + } + + default: + hts_log_error("Unsupported SAM aux type '%c'", aux[2]); + c = NULL; + } + + if (!c) + goto_err; + + m->codec = c; + + // Link to fd-global tag metrics + pthread_mutex_lock(&fd->metrics_lock); + m->m = k_global ? (cram_metrics *)kh_val(fd->tags_used, k_global) : NULL; + pthread_mutex_unlock(&fd->metrics_lock); + } + + cram_tag_map *tm = (cram_tag_map *)kh_val(c->tags_used, k); + if (!tm) goto_err; + cram_codec *codec = tm->codec; + if (!tm->codec) goto_err; + + switch(aux[2]) { + case 'A': case 'C': case 'c': + if (aux_end - aux < 3+1) + goto err; + + if (!tm->blk) { + if (!(tm->blk = cram_new_block(EXTERNAL, key))) + goto err; + codec->u.e_byte_array_len.val_codec->out = tm->blk; + } + + aux+=3; + //codec->encode(s, codec, aux, 1); + // Functionally equivalent, but less code. + BLOCK_APPEND_CHAR(tm->blk, *aux); + aux++; + break; + + case 'S': case 's': + if (aux_end - aux < 3+2) + goto err; + + if (!tm->blk) { + if (!(tm->blk = cram_new_block(EXTERNAL, key))) + goto err; + codec->u.e_byte_array_len.val_codec->out = tm->blk; + } + + aux+=3; + //codec->encode(s, codec, aux, 2); + BLOCK_APPEND(tm->blk, aux, 2); + aux+=2; + break; + + case 'I': case 'i': case 'f': + if (aux_end - aux < 3+4) + goto err; + + if (!tm->blk) { + if (!(tm->blk = cram_new_block(EXTERNAL, key))) + goto err; + codec->u.e_byte_array_len.val_codec->out = tm->blk; + } + + aux+=3; + //codec->encode(s, codec, aux, 4); + BLOCK_APPEND(tm->blk, aux, 4); + aux+=4; + break; + + case 'd': + if (aux_end - aux < 3+8) + goto err; + + if (!tm->blk) { + if (!(tm->blk = cram_new_block(EXTERNAL, key))) + goto err; + codec->u.e_byte_array_len.val_codec->out = tm->blk; + } + + aux+=3; //*tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++; + //codec->encode(s, codec, aux, 8); + BLOCK_APPEND(tm->blk, aux, 8); + aux+=8; + break; + + case 'Z': case 'H': { + if (aux_end - aux < 3) + goto err; + + if (!tm->blk) { + if (!(tm->blk = cram_new_block(EXTERNAL, key))) + goto err; + codec->out = tm->blk; + } + + char *aux_s; + aux += 3; + aux_s = aux; + while (aux < aux_end && *aux++); + if (aux == aux_end && aux[-1] != '\0') { + hts_log_error("Unterminated %c%c:%c tag for read \"%s\"", + aux_s[-3], aux_s[-2], aux_s[-1], + bam_get_qname(b)); + goto err; + } + if (codec->encode(s, codec, aux_s, aux - aux_s) < 0) + goto err; + break; + } + + case 'B': { + if (aux_end - aux < 4+4) + goto err; + + int type = aux[3]; + uint64_t count = (((uint64_t)((unsigned char *)aux)[4]) << 0 | + ((uint64_t)((unsigned char *)aux)[5]) << 8 | + ((uint64_t)((unsigned char *)aux)[6]) <<16 | + ((uint64_t)((unsigned char *)aux)[7]) <<24); + uint64_t blen; + if (!tm->blk) { + if (!(tm->blk = cram_new_block(EXTERNAL, key))) + goto err; + if (codec->u.e_byte_array_len.val_codec->codec == E_XDELTA) { + if (!(tm->blk2 = cram_new_block(EXTERNAL, key+128))) + goto err; + codec->u.e_byte_array_len.len_codec->out = tm->blk2; + codec->u.e_byte_array_len.val_codec->u.e_xdelta.sub_codec->out = tm->blk; + } else { + codec->u.e_byte_array_len.len_codec->out = tm->blk; + codec->u.e_byte_array_len.val_codec->out = tm->blk; + } + } + + // skip TN field + aux+=3; + + // We use BYTE_ARRAY_LEN with external length, so store that first + switch (type) { + case 'c': case 'C': + blen = count; + break; + case 's': case 'S': + blen = 2*count; + break; + case 'i': case 'I': case 'f': + blen = 4*count; + break; + default: + hts_log_error("Unknown sub-type '%c' for aux type 'B'", type); + goto err; + } + + blen += 5; // sub-type & length + if (aux_end - aux < blen || blen > INT_MAX) + goto err; + + if (codec->encode(s, codec, aux, (int) blen) < 0) + goto err; + aux += blen; + break; + } + default: + hts_log_error("Unknown aux type '%c'", aux_end - aux < 2 ? '?' : aux[2]); + goto err; + } + tm->blk->m = tm->m; + } + + // FIXME: sort BLOCK_DATA(td_b) by char[3] triples + + // And and increment TD hash entry + BLOCK_APPEND_CHAR(td_b, 0); + + // Duplicate key as BLOCK_DATA() can be realloced to a new pointer. + key = string_ndup(c->comp_hdr->TD_keys, + (char *)BLOCK_DATA(td_b) + TD_blk_size, + BLOCK_SIZE(td_b) - TD_blk_size); + if (!key) + goto block_err; + k = kh_put(m_s2i, c->comp_hdr->TD_hash, key, &new); + if (new < 0) { + goto err; + } else if (new == 0) { + BLOCK_SIZE(td_b) = TD_blk_size; + } else { + kh_val(c->comp_hdr->TD_hash, k) = c->comp_hdr->nTL; + c->comp_hdr->nTL++; + } + + cr->TL = kh_val(c->comp_hdr->TD_hash, k); + if (cram_stats_add(c->stats[DS_TL], cr->TL) < 0) + goto block_err; + + if (orig != (char *)bam_aux(b)) + free(orig); + + if (err) *err = 0; + + return brg; + + err: + block_err: + if (orig != (char *)bam_aux(b)) + free(orig); + return NULL; +} + +/* + * During cram_next_container or before the final flush at end of + * file, we update the current slice headers and increment the slice + * number to the next slice. + * + * See cram_next_container() and cram_close(). + */ +void cram_update_curr_slice(cram_container *c, int version) { + cram_slice *s = c->slice; + if (c->multi_seq) { + s->hdr->ref_seq_id = -2; + s->hdr->ref_seq_start = 0; + s->hdr->ref_seq_span = 0; + } else if (c->curr_ref == -1 && CRAM_ge31(version)) { + // Spec states span=0, but it broke our range queries. + // See commit message for this and prior. + s->hdr->ref_seq_id = -1; + s->hdr->ref_seq_start = 0; + s->hdr->ref_seq_span = 0; + } else { + s->hdr->ref_seq_id = c->curr_ref; + s->hdr->ref_seq_start = c->first_base; + s->hdr->ref_seq_span = MAX(0, c->last_base - c->first_base + 1); + } + s->hdr->num_records = c->curr_rec; + + if (c->curr_slice == 0) { + if (c->ref_seq_id != s->hdr->ref_seq_id) + c->ref_seq_id = s->hdr->ref_seq_id; + c->ref_seq_start = c->first_base; + } + + c->curr_slice++; +} + +/* + * Handles creation of a new container or new slice, flushing any + * existing containers when appropriate. + * + * Really this is next slice, which may or may not lead to a new container. + * + * Returns cram_container pointer on success + * NULL on failure. + */ +static cram_container *cram_next_container(cram_fd *fd, bam_seq_t *b) { + cram_container *c = fd->ctr; + int i; + + /* First occurrence */ + if (c->curr_ref == -2) + c->curr_ref = bam_ref(b); + + if (c->slice) + cram_update_curr_slice(c, fd->version); + + /* Flush container */ + if (c->curr_slice == c->max_slice || + (bam_ref(b) != c->curr_ref && !c->multi_seq)) { + c->ref_seq_span = fd->last_base - c->ref_seq_start + 1; + hts_log_info("Flush container %d/%"PRId64"..%"PRId64, + c->ref_seq_id, c->ref_seq_start, + c->ref_seq_start + c->ref_seq_span -1); + + /* Encode slices */ + if (-1 == cram_flush_container_mt(fd, c)) + return NULL; + if (!fd->pool) { + // Move to sep func, as we need cram_flush_container for + // the closing phase to flush the partial container. + for (i = 0; i < c->max_slice; i++) { + cram_free_slice(c->slices[i]); + c->slices[i] = NULL; + } + + c->slice = NULL; + c->curr_slice = 0; + + /* Easy approach for purposes of freeing stats */ + cram_free_container(c); + } + + c = fd->ctr = cram_new_container(fd->seqs_per_slice, + fd->slices_per_container); + if (!c) + return NULL; + + pthread_mutex_lock(&fd->ref_lock); + c->no_ref = fd->no_ref; + c->embed_ref = fd->embed_ref; + c->record_counter = fd->record_counter; + pthread_mutex_unlock(&fd->ref_lock); + c->curr_ref = bam_ref(b); + } + + c->last_pos = c->first_base = c->last_base = bam_pos(b)+1; + + /* New slice */ + c->slice = c->slices[c->curr_slice] = + cram_new_slice(MAPPED_SLICE, c->max_rec); + if (!c->slice) + return NULL; + + if (c->multi_seq) { + c->slice->hdr->ref_seq_id = -2; + c->slice->hdr->ref_seq_start = 0; + c->slice->last_apos = 1; + } else { + c->slice->hdr->ref_seq_id = bam_ref(b); + // wrong for unsorted data, will fix during encoding. + c->slice->hdr->ref_seq_start = bam_pos(b)+1; + c->slice->last_apos = bam_pos(b)+1; + } + + c->curr_rec = 0; + c->s_num_bases = 0; + c->n_mapped = 0; + + // QO field: 0 implies original orientation, 1 implies sequence orientation + // 1 is often preferable for NovaSeq, but impact is slight. ~0.5% diff. + // Conversely other data sets it's often better than 1% saving for 0. + // Short of trying both and learning, for now we use use 0 for V4, 1 for V3. + c->qs_seq_orient = CRAM_MAJOR_VERS(fd->version) >= 4 ? 0 : 1; + + return c; +} + + +/* + * Converts a single bam record into a cram record. + * Possibly used within a thread. + * + * Returns 0 on success; + * -1 on failure + */ +static int process_one_read(cram_fd *fd, cram_container *c, + cram_slice *s, cram_record *cr, + bam_seq_t *b, int rnum, kstring_t *MD, + int embed_ref, int no_ref) { + int i, fake_qual = -1, NM = 0; + char *cp; + char *ref, *seq, *qual; + + // Any places with N in seq and/or reference can lead to ambiguous + // interpretation of the SAM NM:i tag. So we store these verbatim + // to ensure valid data round-trips the same regardless of who + // defines it as valid. + // Similarly when alignments go beyond end of the reference. + int verbatim_NM = fd->store_nm; + int verbatim_MD = fd->store_md; + + // FIXME: multi-ref containers + + cr->flags = bam_flag(b); + cr->len = bam_seq_len(b); + uint8_t *md; + if (!(md = bam_aux_get(b, "MD"))) + MD = NULL; + else + MD->l = 0; + + int cf_tag = 0; + + if (embed_ref == 2) { + cf_tag = MD ? 0 : 1; // No MD + cf_tag |= bam_aux_get(b, "NM") ? 0 : 2; // No NM + } + + //fprintf(stderr, "%s => %d\n", rg ? rg : "\"\"", cr->rg); + + ref = c->ref ? c->ref - (c->ref_start-1) : NULL; + cr->ref_id = bam_ref(b); + if (cram_stats_add(c->stats[DS_RI], cr->ref_id) < 0) + goto block_err; + if (cram_stats_add(c->stats[DS_BF], fd->cram_flag_swap[cr->flags & 0xfff]) < 0) + goto block_err; + + // Non reference based encoding means storing the bases verbatim as features, which in + // turn means every base also has a quality already stored. + if (!no_ref || CRAM_MAJOR_VERS(fd->version) >= 3) + cr->cram_flags |= CRAM_FLAG_PRESERVE_QUAL_SCORES; + + if (cr->len <= 0 && CRAM_MAJOR_VERS(fd->version) >= 3) + cr->cram_flags |= CRAM_FLAG_NO_SEQ; + //cram_stats_add(c->stats[DS_CF], cr->cram_flags & CRAM_FLAG_MASK); + + c->num_bases += cr->len; + cr->apos = bam_pos(b)+1; + if (cr->apos < 0 || cr->apos > INT64_MAX/2) + goto err; + if (c->pos_sorted) { + if (cr->apos < s->last_apos && !fd->ap_delta) { + c->pos_sorted = 0; + } else { + if (cram_stats_add(c->stats[DS_AP], cr->apos - s->last_apos) < 0) + goto block_err; + s->last_apos = cr->apos; + } + } else { + //cram_stats_add(c->stats[DS_AP], cr->apos); + } + c->max_apos += (cr->apos > c->max_apos) * (cr->apos - c->max_apos); + + /* + * This seqs_ds is largely pointless and it could reuse the same memory + * over and over. + * s->base_blk is what we need for encoding. + */ + cr->seq = BLOCK_SIZE(s->seqs_blk); + cr->qual = BLOCK_SIZE(s->qual_blk); + BLOCK_GROW(s->seqs_blk, cr->len+1); + BLOCK_GROW(s->qual_blk, cr->len); + + // Convert BAM nibble encoded sequence to string of base pairs + seq = cp = (char *)BLOCK_END(s->seqs_blk); + *seq = 0; + nibble2base(bam_seq(b), cp, cr->len); + BLOCK_SIZE(s->seqs_blk) += cr->len; + + qual = cp = (char *)bam_qual(b); + + + /* Copy and parse */ + if (!(cr->flags & BAM_FUNMAP)) { + uint32_t *cig_to, *cig_from; + int64_t apos = cr->apos-1, spos = 0; + int64_t MD_last = apos; // last position of edit in MD tag + + if (apos < 0) { + hts_log_error("Mapped read with position <= 0 is disallowed"); + return -1; + } + + cr->cigar = s->ncigar; + cr->ncigar = bam_cigar_len(b); + while (cr->cigar + cr->ncigar >= s->cigar_alloc) { + s->cigar_alloc = s->cigar_alloc ? s->cigar_alloc*2 : 1024; + s->cigar = realloc(s->cigar, s->cigar_alloc * sizeof(*s->cigar)); + if (!s->cigar) + return -1; + } + + cig_to = (uint32_t *)s->cigar; + cig_from = (uint32_t *)bam_cigar(b); + + cr->feature = 0; + cr->nfeature = 0; + for (i = 0; i < cr->ncigar; i++) { + enum cigar_op cig_op = cig_from[i] & BAM_CIGAR_MASK; + uint32_t cig_len = cig_from[i] >> BAM_CIGAR_SHIFT; + cig_to[i] = cig_from[i]; + + /* Can also generate events from here for CRAM diffs */ + + switch (cig_op) { + int l; + + // Don't trust = and X ops to be correct. + case BAM_CMATCH: + case BAM_CBASE_MATCH: + case BAM_CBASE_MISMATCH: + //fprintf(stderr, "\nBAM_CMATCH\nR: %.*s\nS: %.*s\n", + // cig_len, &ref[apos], cig_len, &seq[spos]); + l = 0; + if (!no_ref && cr->len) { + int end = cig_len+apos < c->ref_end + ? cig_len : c->ref_end - apos; + char *sp = &seq[spos]; + char *rp = &ref[apos]; + char *qp = &qual[spos]; + if (end > cr->len) { + hts_log_error("CIGAR and query sequence are of different length"); + return -1; + } + for (l = 0; l < end; l++) { + // This case is just too disputed and different tools + // interpret these in different ways. We give up and + // store verbatim. + if (rp[l] == 'N' && sp[l] == 'N') + verbatim_NM = verbatim_MD = 1; + if (rp[l] != sp[l]) { + // Build our own MD tag if one is on the sequence, so + // we can ensure it matches and thus can be discarded. + if (MD && ref) { + if (kputuw(apos+l - MD_last, MD) < 0) goto err; + if (kputc(rp[l], MD) < 0) goto err; + MD_last = apos+l+1; + } + NM++; + if (!sp[l]) + break; + if (0 && CRAM_MAJOR_VERS(fd->version) >= 3) { +#if 0 + // Disabled for the time being as it doesn't + // seem to gain us much. + int ol=l; + while (l 1) { + if (cram_add_bases(fd, c, s, cr, spos+ol, + l-ol, &seq[spos+ol])) + return -1; + l--; + } else { + l = ol; + if (cram_add_substitution(fd, c, s, cr, + spos+l, sp[l], + qp[l], rp[l])) + return -1; + } +#else + // With urmap pushed to the limit and lots + // of unaligned data (should be soft-clipped) + // this saves ~2-7%. Worth it? + int nl = l; + int max_end = nl, max_score = 0, score = 0; + while (nl < end) { + if (rp[nl] != sp[nl]) { + score += 3; + if (max_score < score) { + max_score = score; + max_end = nl; + } + } else { + score--; + if (score < -2 || + max_score - score > 7) + break; + } + nl++; + } + if (max_score > 20) { + cram_add_bases(fd, c, s, cr, spos+l, + max_end-l, &seq[spos+l]); + l = max_end-1; + } else { + while (l < nl) { + if (rp[l] != sp[l]) + cram_add_substitution(fd, c, s, + cr, spos+l, + sp[l], qp[l], + rp[l]); + l++; + } + l--; + } +#endif + } else { + if (cram_add_substitution(fd, c, s, cr, spos+l, + sp[l], qp[l], rp[l])) + return -1; + } + } + } + spos += l; + apos += l; + } + + if (l < cig_len && cr->len) { + if (no_ref) { + if (CRAM_MAJOR_VERS(fd->version) == 3) { + if (cram_add_bases(fd, c, s, cr, spos, + cig_len-l, &seq[spos])) + return -1; + spos += cig_len-l; + } else { + for (; l < cig_len && seq[spos]; l++, spos++) { + if (cram_add_base(fd, c, s, cr, spos, + seq[spos], qual[spos])) + return -1; + } + } + } else { + /* off end of sequence or non-ref based output */ + verbatim_NM = verbatim_MD = 1; + for (; l < cig_len && seq[spos]; l++, spos++) { + if (cram_add_base(fd, c, s, cr, spos, + seq[spos], qual[spos])) + return -1; + } + } + apos += cig_len; + } else if (!cr->len) { + /* Seq "*" */ + verbatim_NM = verbatim_MD = 1; + apos += cig_len; + spos += cig_len; + } + break; + + case BAM_CDEL: + if (MD && ref) { + if (kputuw(apos - MD_last, MD) < 0) goto err; + if (apos < c->ref_end) { + if (kputc_('^', MD) < 0) goto err; + if (kputsn(&ref[apos], MIN(c->ref_end - apos, cig_len), MD) < 0) + goto err; + } + } + NM += cig_len; + + if (cram_add_deletion(c, s, cr, spos, cig_len, &seq[spos])) + return -1; + apos += cig_len; + MD_last = apos; + break; + + case BAM_CREF_SKIP: + if (cram_add_skip(c, s, cr, spos, cig_len, &seq[spos])) + return -1; + apos += cig_len; + MD_last += cig_len; + break; + + case BAM_CINS: + if (cram_add_insertion(c, s, cr, spos, cig_len, + cr->len ? &seq[spos] : NULL)) + return -1; + if (no_ref && cr->len) { + for (l = 0; l < cig_len; l++, spos++) { + cram_add_quality(fd, c, s, cr, spos, qual[spos]); + } + } else { + spos += cig_len; + } + NM += cig_len; + break; + + case BAM_CSOFT_CLIP: + if (cram_add_softclip(c, s, cr, spos, cig_len, + cr->len ? &seq[spos] : NULL, + fd->version)) + return -1; + + if (no_ref && + !(cr->cram_flags & CRAM_FLAG_PRESERVE_QUAL_SCORES)) { + if (cr->len) { + for (l = 0; l < cig_len; l++, spos++) { + cram_add_quality(fd, c, s, cr, spos, qual[spos]); + } + } else { + for (l = 0; l < cig_len; l++, spos++) { + cram_add_quality(fd, c, s, cr, spos, -1); + } + } + } else { + spos += cig_len; + } + break; + + case BAM_CHARD_CLIP: + if (cram_add_hardclip(c, s, cr, spos, cig_len, &seq[spos])) + return -1; + break; + + case BAM_CPAD: + if (cram_add_pad(c, s, cr, spos, cig_len, &seq[spos])) + return -1; + break; + + default: + hts_log_error("Unknown CIGAR op code %d", cig_op); + return -1; + } + } + if (cr->len && spos != cr->len) { + hts_log_error("CIGAR and query sequence are of different length"); + return -1; + } + fake_qual = spos; + cr->aend = no_ref ? apos : MIN(apos, c->ref_end); + if (cram_stats_add(c->stats[DS_FN], cr->nfeature) < 0) + goto block_err; + + if (MD && ref) + if (kputuw(apos - MD_last, MD) < 0) goto err; + } else { + // Unmapped + cr->cram_flags |= CRAM_FLAG_PRESERVE_QUAL_SCORES; + cr->cigar = 0; + cr->ncigar = 0; + cr->nfeature = 0; + cr->aend = MIN(cr->apos, c->ref_end); + for (i = 0; i < cr->len; i++) + if (cram_stats_add(c->stats[DS_BA], seq[i]) < 0) + goto block_err; + fake_qual = 0; + } + + cr->ntags = 0; //cram_stats_add(c->stats[DS_TC], cr->ntags); + int err = 0; + sam_hrec_rg_t *brg = + cram_encode_aux(fd, b, c, s, cr, verbatim_NM, verbatim_MD, NM, MD, + cf_tag, no_ref, &err); + if (err) + goto block_err; + + /* Read group, identified earlier */ + if (brg) { + cr->rg = brg->id; + } else if (CRAM_MAJOR_VERS(fd->version) == 1) { + sam_hrec_rg_t *brg = sam_hrecs_find_rg(fd->header->hrecs, "UNKNOWN"); + if (!brg) goto block_err; + cr->rg = brg->id; + } else { + cr->rg = -1; + } + if (cram_stats_add(c->stats[DS_RG], cr->rg) < 0) + goto block_err; + + /* + * Append to the qual block now. We do this here as + * cram_add_substitution() can generate BA/QS events which need to + * be in the qual block before we append the rest of the data. + */ + if (cr->cram_flags & CRAM_FLAG_PRESERVE_QUAL_SCORES) { + /* Special case of seq "*" */ + if (cr->len == 0) { + cr->len = fake_qual; + BLOCK_GROW(s->qual_blk, cr->len); + cp = (char *)BLOCK_END(s->qual_blk); + memset(cp, 255, cr->len); + } else { + BLOCK_GROW(s->qual_blk, cr->len); + cp = (char *)BLOCK_END(s->qual_blk); + char *from = (char *)&bam_qual(b)[0]; + char *to = &cp[0]; + memcpy(to, from, cr->len); + + // Store quality in original orientation for better compression. + if (!c->qs_seq_orient) { + if (cr->flags & BAM_FREVERSE) { + int i, j; + for (i = 0, j = cr->len-1; i < j; i++, j--) { + unsigned char c; + c = to[i]; + to[i] = to[j]; + to[j] = c; + } + } + } + } + BLOCK_SIZE(s->qual_blk) += cr->len; + } else { + if (cr->len == 0) + cr->len = fake_qual >= 0 ? fake_qual : cr->aend - cr->apos + 1; + } + + if (cram_stats_add(c->stats[DS_RL], cr->len) < 0) + goto block_err; + + /* Now we know apos and aend both, update mate-pair information */ + { + int new; + khint_t k; + int sec = (cr->flags & BAM_FSECONDARY) ? 1 : 0; + + //fprintf(stderr, "Checking %"PRId64"/%.*s\t", rnum, + // cr->name_len, DSTRING_STR(s->name_ds)+cr->name); + if (cr->flags & BAM_FPAIRED) { + char *key = string_ndup(s->pair_keys, bam_name(b), bam_name_len(b)); + if (!key) + return -1; + + k = kh_put(m_s2i, s->pair[sec], key, &new); + if (-1 == new) + return -1; + else if (new > 0) + kh_val(s->pair[sec], k) = rnum; + } else { + new = 1; + k = 0; // Prevents false-positive warning from gcc -Og + } + + if (new == 0) { + cram_record *p = &s->crecs[kh_val(s->pair[sec], k)]; + int64_t aleft, aright; + int sign; + + aleft = MIN(cr->apos, p->apos); + aright = MAX(cr->aend, p->aend); + if (cr->apos < p->apos) { + sign = 1; + } else if (cr->apos > p->apos) { + sign = -1; + } else if (cr->flags & BAM_FREAD1) { + sign = 1; + } else { + sign = -1; + } + + // This vs p: tlen, matepos, flags. Permit TLEN 0 and/or TLEN +/- + // a small amount, if appropriate options set. + if ((!fd->tlen_zero && MAX(bam_mate_pos(b)+1, 0) != p->apos) && + !(fd->tlen_zero && bam_mate_pos(b) == 0)) + goto detached; + + if (((bam_flag(b) & BAM_FMUNMAP) != 0) != + ((p->flags & BAM_FUNMAP) != 0)) + goto detached; + + if (((bam_flag(b) & BAM_FMREVERSE) != 0) != + ((p->flags & BAM_FREVERSE) != 0)) + goto detached; + + + // p vs this: tlen, matepos, flags + if (p->ref_id != cr->ref_id && + !(fd->tlen_zero && p->ref_id == -1)) + goto detached; + + if (p->mate_pos != cr->apos && + !(fd->tlen_zero && p->mate_pos == 0)) + goto detached; + + if (((p->flags & BAM_FMUNMAP) != 0) != + ((p->mate_flags & CRAM_M_UNMAP) != 0)) + goto detached; + + if (((p->flags & BAM_FMREVERSE) != 0) != + ((p->mate_flags & CRAM_M_REVERSE) != 0)) + goto detached; + + // Supplementary reads are just too ill defined + if ((cr->flags & BAM_FSUPPLEMENTARY) || + (p->flags & BAM_FSUPPLEMENTARY)) + goto detached; + + // When in lossy name mode, if a read isn't detached we + // cannot store the name. The corollary is that when we + // must store the name, it must be detached (inefficient). + if (fd->lossy_read_names && + (!(cr->cram_flags & CRAM_FLAG_DISCARD_NAME) || + !((p->cram_flags & CRAM_FLAG_DISCARD_NAME)))) + goto detached; + + // Now check TLEN. We do this last as sometimes it's the + // only thing that differs. In CRAM4 we have a better way + // of handling this that doesn't break detached status + int explicit_tlen = 0; + int tflag1 = ((bam_ins_size(b) && + llabs(bam_ins_size(b) - sign*(aright-aleft+1)) + > fd->tlen_approx) + || (!bam_ins_size(b) && !fd->tlen_zero)); + + int tflag2 = ((p->tlen && llabs(p->tlen - -sign*(aright-aleft+1)) + > fd->tlen_approx) + || (!p->tlen && !fd->tlen_zero)); + + if (tflag1 || tflag2) { + if (CRAM_MAJOR_VERS(fd->version) >= 4) { + explicit_tlen = CRAM_FLAG_EXPLICIT_TLEN; + } else { + // Stil do detached for unmapped data in CRAM4 as this + // also impacts RNEXT calculation. + goto detached; + } + } + + /* + * The fields below are unused when encoding this read as it is + * no longer detached. In theory they may get referred to when + * processing a 3rd or 4th read in this template?, so we set them + * here just to be sure. + * + * They do not need cram_stats_add() calls those as they are + * not emitted. + */ + cr->mate_pos = p->apos; + cram_stats_add(c->stats[DS_NP], cr->mate_pos); + cr->tlen = explicit_tlen ? bam_ins_size(b) : sign*(aright-aleft+1); + cram_stats_add(c->stats[DS_TS], cr->tlen); + cr->mate_flags = + ((p->flags & BAM_FMUNMAP) == BAM_FMUNMAP) * CRAM_M_UNMAP + + ((p->flags & BAM_FMREVERSE) == BAM_FMREVERSE) * CRAM_M_REVERSE; + + // Decrement statistics aggregated earlier + if (p->cram_flags & CRAM_FLAG_STATS_ADDED) { + cram_stats_del(c->stats[DS_NP], p->mate_pos); + cram_stats_del(c->stats[DS_MF], p->mate_flags); + if (!(p->cram_flags & CRAM_FLAG_EXPLICIT_TLEN)) + cram_stats_del(c->stats[DS_TS], p->tlen); + cram_stats_del(c->stats[DS_NS], p->mate_ref_id); + } + + /* Similarly we could correct the p-> values too, but these will no + * longer have any code that refers back to them as the new 'p' + * for this template is our current 'cr'. + */ + //p->mate_pos = cr->apos; + //p->mate_flags = + // ((cr->flags & BAM_FMUNMAP) == BAM_FMUNMAP) * CRAM_M_UNMAP + + // ((cr->flags & BAM_FMREVERSE) == BAM_FMREVERSE)* CRAM_M_REVERSE; + //p->tlen = p->apos - cr->aend; + + // Clear detached from cr flags + cr->cram_flags &= ~CRAM_FLAG_DETACHED; + cr->cram_flags |= explicit_tlen; + if (cram_stats_add(c->stats[DS_CF], cr->cram_flags & CRAM_FLAG_MASK) < 0) + goto block_err; + + // Clear detached from p flags and set downstream + if (p->cram_flags & CRAM_FLAG_STATS_ADDED) { + cram_stats_del(c->stats[DS_CF], p->cram_flags & CRAM_FLAG_MASK); + p->cram_flags &= ~CRAM_FLAG_STATS_ADDED; + } + + p->cram_flags &= ~CRAM_FLAG_DETACHED; + p->cram_flags |= CRAM_FLAG_MATE_DOWNSTREAM | explicit_tlen;; + if (cram_stats_add(c->stats[DS_CF], p->cram_flags & CRAM_FLAG_MASK) < 0) + goto block_err; + + p->mate_line = rnum - (kh_val(s->pair[sec], k) + 1); + if (cram_stats_add(c->stats[DS_NF], p->mate_line) < 0) + goto block_err; + + kh_val(s->pair[sec], k) = rnum; + } else { + detached: + //fprintf(stderr, "unpaired\n"); + + /* Derive mate flags from this flag */ + cr->mate_flags = 0; + if (bam_flag(b) & BAM_FMUNMAP) + cr->mate_flags |= CRAM_M_UNMAP; + if (bam_flag(b) & BAM_FMREVERSE) + cr->mate_flags |= CRAM_M_REVERSE; + + if (cram_stats_add(c->stats[DS_MF], cr->mate_flags) < 0) + goto block_err; + + cr->mate_pos = MAX(bam_mate_pos(b)+1, 0); + if (cram_stats_add(c->stats[DS_NP], cr->mate_pos) < 0) + goto block_err; + + cr->tlen = bam_ins_size(b); + if (cram_stats_add(c->stats[DS_TS], cr->tlen) < 0) + goto block_err; + + cr->cram_flags |= CRAM_FLAG_DETACHED; + if (cram_stats_add(c->stats[DS_CF], cr->cram_flags & CRAM_FLAG_MASK) < 0) + goto block_err; + if (cram_stats_add(c->stats[DS_NS], bam_mate_ref(b)) < 0) + goto block_err; + + cr->cram_flags |= CRAM_FLAG_STATS_ADDED; + } + } + + cr->mqual = bam_map_qual(b); + if (cram_stats_add(c->stats[DS_MQ], cr->mqual) < 0) + goto block_err; + + cr->mate_ref_id = bam_mate_ref(b); + + if (!(bam_flag(b) & BAM_FUNMAP)) { + if (c->first_base > cr->apos) + c->first_base = cr->apos; + + if (c->last_base < cr->aend) + c->last_base = cr->aend; + } + + return 0; + + block_err: + err: + return -1; +} + +/* + * Write iterator: put BAM format sequences into a CRAM file. + * We buffer up a containers worth of data at a time. + * + * Returns 0 on success + * -1 on failure + */ +int cram_put_bam_seq(cram_fd *fd, bam_seq_t *b) { + cram_container *c; + + if (!fd->ctr) { + fd->ctr = cram_new_container(fd->seqs_per_slice, + fd->slices_per_container); + if (!fd->ctr) + return -1; + fd->ctr->record_counter = fd->record_counter; + + pthread_mutex_lock(&fd->ref_lock); + fd->ctr->no_ref = fd->no_ref; + fd->ctr->embed_ref = fd->embed_ref; + pthread_mutex_unlock(&fd->ref_lock); + } + c = fd->ctr; + + int embed_ref = c->embed_ref; + + if (!c->slice || c->curr_rec == c->max_rec || + (bam_ref(b) != c->curr_ref && c->curr_ref >= -1) || + (c->s_num_bases + c->s_aux_bytes >= fd->bases_per_slice)) { + int slice_rec, curr_rec, multi_seq = fd->multi_seq == 1; + int curr_ref = c->slice ? c->curr_ref : bam_ref(b); + + /* + * Start packing slices when we routinely have under 1/4tr full. + * + * This option isn't available if we choose to embed references + * since we can only have one per slice. + * + * The multi_seq var here refers to our intention for the next slice. + * This slice has already been encoded so we output as-is. + */ + if (fd->multi_seq == -1 && c->curr_rec < c->max_rec/4+10 && + fd->last_slice && fd->last_slice < c->max_rec/4+10 && + embed_ref<=0) { + if (!c->multi_seq) + hts_log_info("Multi-ref enabled for next container"); + multi_seq = 1; + } else if (fd->multi_seq == 1) { + pthread_mutex_lock(&fd->metrics_lock); + if (fd->last_RI_count <= c->max_slice && fd->multi_seq_user != 1) { + multi_seq = 0; + hts_log_info("Multi-ref disabled for next container"); + } + pthread_mutex_unlock(&fd->metrics_lock); + } + + slice_rec = c->slice_rec; + curr_rec = c->curr_rec; + + if (CRAM_MAJOR_VERS(fd->version) == 1 || + c->curr_rec == c->max_rec || fd->multi_seq != 1 || !c->slice || + c->s_num_bases + c->s_aux_bytes >= fd->bases_per_slice) { + if (NULL == (c = cram_next_container(fd, b))) { + if (fd->ctr) { + // prevent cram_close attempting to flush + fd->ctr_mt = fd->ctr; // delay free when threading + fd->ctr = NULL; + } + return -1; + } + } + + /* + * Due to our processing order, some things we've already done we + * cannot easily undo. So when we first notice we should be packing + * multiple sequences per container we emit the small partial + * container as-is and then start a fresh one in a different mode. + */ + if (multi_seq == 0 && fd->multi_seq == 1 && fd->multi_seq_user == -1) { + // User selected auto-mode, we're currently using multi-seq, but + // have detected we don't need to. Switch back to auto. + fd->multi_seq = -1; + } else if (multi_seq) { + // We detected we need multi-seq + fd->multi_seq = 1; + c->multi_seq = 1; + c->pos_sorted = 0; + + // Cram_next_container may end up flushing an existing one and + // triggering fd->embed_ref=2 if no reference is found. + // Embedded refs are incompatible with multi-seq, so we bail + // out and switch to no_ref in this scenario. We do this + // within the container only, as multi_seq may be temporary + // and we switch back away from it again. + pthread_mutex_lock(&fd->ref_lock); + if (fd->embed_ref > 0 && c->curr_rec == 0 && c->curr_slice == 0) { + hts_log_warning("Changing from embed_ref to no_ref mode"); + // Should we update fd->embed_ref and no_ref here too? + // Doing so means if we go into multi-seq and back out + // again, eg due a cluster of tiny refs in the middle of + // much larger ones, then we bake in no-ref mode. + // + // However for unsorted data we're realistically not + // going to switch back. + c->embed_ref = fd->embed_ref = 0; // or -1 for auto? + c->no_ref = fd->no_ref = 1; + } + pthread_mutex_unlock(&fd->ref_lock); + + if (!c->refs_used) { + pthread_mutex_lock(&fd->ref_lock); + c->refs_used = calloc(fd->refs->nref, sizeof(int)); + pthread_mutex_unlock(&fd->ref_lock); + if (!c->refs_used) + return -1; + } + } + + fd->last_slice = curr_rec - slice_rec; + c->slice_rec = c->curr_rec; + + // Have we seen this reference before? + if (bam_ref(b) >= 0 && curr_ref >= 0 && bam_ref(b) != curr_ref && + embed_ref<=0 && !fd->unsorted && multi_seq) { + + if (!c->refs_used) { + pthread_mutex_lock(&fd->ref_lock); + c->refs_used = calloc(fd->refs->nref, sizeof(int)); + pthread_mutex_unlock(&fd->ref_lock); + if (!c->refs_used) + return -1; + } else if (c->refs_used && c->refs_used[bam_ref(b)]) { + pthread_mutex_lock(&fd->ref_lock); + fd->unsorted = 1; + fd->multi_seq = 1; + pthread_mutex_unlock(&fd->ref_lock); + } + } + + c->curr_ref = bam_ref(b); + if (c->refs_used && c->curr_ref >= 0) c->refs_used[c->curr_ref]++; + } + + if (!c->bams) { + /* First time through, allocate a set of bam pointers */ + pthread_mutex_lock(&fd->bam_list_lock); + if (fd->bl) { + spare_bams *spare = fd->bl; + c->bams = spare->bams; + fd->bl = spare->next; + free(spare); + } else { + c->bams = calloc(c->max_c_rec, sizeof(bam_seq_t *)); + if (!c->bams) { + pthread_mutex_unlock(&fd->bam_list_lock); + return -1; + } + } + pthread_mutex_unlock(&fd->bam_list_lock); + } + + /* Copy or alloc+copy the bam record, for later encoding */ + if (c->bams[c->curr_c_rec]) { + if (bam_copy1(c->bams[c->curr_c_rec], b) == NULL) + return -1; + } else { + c->bams[c->curr_c_rec] = bam_dup1(b); + if (c->bams[c->curr_c_rec] == NULL) + return -1; + } + if (bam_seq_len(b)) { + c->s_num_bases += bam_seq_len(b); + } else { + // No sequence in BAM record. CRAM doesn't directly support this + // case, it ends up being stored as a string of N's for each query + // consuming CIGAR operation. As this can become very inefficient + // in time and memory, data where the query length is excessively + // long are rejected. + hts_pos_t qlen = bam_cigar2qlen(b->core.n_cigar, bam_get_cigar(b)); + if (qlen > 100000000) { + hts_log_error("CIGAR query length %"PRIhts_pos + " for read \"%s\" is too long", + qlen, bam_get_qname(b)); + return -1; + } + c->s_num_bases += qlen; + } + c->curr_rec++; + c->curr_c_rec++; + c->s_aux_bytes += bam_get_l_aux(b); + c->n_mapped += (bam_flag(b) & BAM_FUNMAP) ? 0 : 1; + fd->record_counter++; + + return 0; +} diff --git a/src/htslib-1.18/cram/cram_encode.h b/src/htslib-1.21/cram/cram_encode.h similarity index 100% rename from src/htslib-1.18/cram/cram_encode.h rename to src/htslib-1.21/cram/cram_encode.h diff --git a/src/htslib-1.21/cram/cram_external.c b/src/htslib-1.21/cram/cram_external.c new file mode 100644 index 0000000..4943750 --- /dev/null +++ b/src/htslib-1.21/cram/cram_external.c @@ -0,0 +1,1033 @@ +/* +Copyright (c) 2015, 2018-2020, 2022-2024 Genome Research Ltd. +Author: James Bonfield + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +Institute nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/*! \file + * External CRAM interface. + * + * Internally we're happy to use macros and to grub around in the cram + * structures. This isn't very sustainable for an externally usable + * ABI though, so we have anonymous structs and accessor functions too + * to permit software such as samtools reheader to manipulate cram + * containers and blocks in a robust manner. + */ + +#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h +#include +#include + +#if defined(HAVE_EXTERNAL_LIBHTSCODECS) +#include +#else +#include "../htscodecs/htscodecs/rANS_static4x16.h" +#endif + +#include "../htslib/hfile.h" +#include "cram.h" + +/* + *----------------------------------------------------------------------------- + * cram_fd + */ +sam_hdr_t *cram_fd_get_header(cram_fd *fd) { return fd->header; } +void cram_fd_set_header(cram_fd *fd, sam_hdr_t *hdr) { fd->header = hdr; } + +int cram_fd_get_version(cram_fd *fd) { return fd->version; } +void cram_fd_set_version(cram_fd *fd, int vers) { fd->version = vers; } + +int cram_major_vers(cram_fd *fd) { return CRAM_MAJOR_VERS(fd->version); } +int cram_minor_vers(cram_fd *fd) { return CRAM_MINOR_VERS(fd->version); } + +hFILE *cram_fd_get_fp(cram_fd *fd) { return fd->fp; } +void cram_fd_set_fp(cram_fd *fd, hFILE *fp) { fd->fp = fp; } + + +/* + *----------------------------------------------------------------------------- + * cram_container + */ +int32_t cram_container_get_length(cram_container *c) { + return c->length; +} + +void cram_container_set_length(cram_container *c, int32_t length) { + c->length = length; +} + + +int32_t cram_container_get_num_blocks(cram_container *c) { + return c->num_blocks; +} + +void cram_container_set_num_blocks(cram_container *c, int32_t num_blocks) { + c->num_blocks = num_blocks; +} + +int32_t cram_container_get_num_records(cram_container *c) { + return c->num_records; +} + +int64_t cram_container_get_num_bases(cram_container *c) { + return c->num_bases; +} + + +/* Returns the landmarks[] array and the number of elements + * in num_landmarks. + */ +int32_t *cram_container_get_landmarks(cram_container *c, int32_t *num_landmarks) { + *num_landmarks = c->num_landmarks; + return c->landmark; +} + +/* Sets the landmarks[] array (pointer copy, not a memory dup) and + * num_landmarks value. + */ +void cram_container_set_landmarks(cram_container *c, int32_t num_landmarks, + int32_t *landmarks) { + c->num_landmarks = num_landmarks; + c->landmark = landmarks; +} + + +/* Returns true if the container is empty (EOF marker) */ +int cram_container_is_empty(cram_fd *fd) { + return fd->empty_container; +} + +void cram_container_get_coords(cram_container *c, + int *refid, hts_pos_t *start, hts_pos_t *span) { + if (refid) + *refid = c->ref_seq_id; + if (start) + *start = c->ref_seq_start; + if (span) + *span = c->ref_seq_span; +} + + +/* + *----------------------------------------------------------------------------- + * cram_block_compression_hdr + */ + +/* + * Utility function to edit an RG id. + * This is only possible if there is one single RG value used and it + * is in the container compression header using HUFFMAN or BETA + * codec. In this case it is essentially hard coded and needs no + * editing of external (or worse, CORE) blocks. + * + * Returns 0 on success + * -1 on failure + */ +// Or arbitrary set compression header constant? + +static int cram_block_compression_hdr_set_DS(cram_block_compression_hdr *ch, + int ds, int new_rg) { + if (!ch || !ch->codecs[ds]) + return -1; + + switch (ch->codecs[ds]->codec) { + case E_HUFFMAN: + if (ch->codecs[ds]->u.huffman.ncodes != 1) + return -1; + ch->codecs[ds]->u.huffman.codes[0].symbol = new_rg; + return 0; + + case E_BETA: + if (ch->codecs[ds]->u.beta.nbits != 0) + return -1; + ch->codecs[ds]->u.beta.offset = -new_rg; + return 0; + + default: + break; + } + + return -1; +} + +int cram_block_compression_hdr_set_rg(cram_block_compression_hdr *ch, int new_rg) { + return cram_block_compression_hdr_set_DS(ch, DS_RG, new_rg); +} + +/* + * Converts a cram_block_compression_hdr struct used for decoding to + * one used for encoding. Maybe this should be a transparent + * operation applied on-demand. + * + * Returns 0 on success + * -1 on failure + */ +int cram_block_compression_hdr_decoder2encoder(cram_fd *fd, + cram_block_compression_hdr *ch) { + int i; + + if (!ch) + return -1; + + for (i = 0; i < DS_END; i++) { + cram_codec *co = ch->codecs[i]; + if (!co) + continue; + + if (-1 == cram_codec_decoder2encoder(fd, co)) + return -1; + } + + return 0; +} + +typedef struct { + cram_block_compression_hdr *hdr; + cram_map *curr_map; + int idx; + int is_tag; // phase 2 using tag_encoding_map +} cram_codec_iter; + +static void cram_codec_iter_init(cram_block_compression_hdr *hdr, + cram_codec_iter *iter) { + iter->hdr = hdr; + iter->curr_map = NULL; + iter->idx = 0; + iter->is_tag = 0; +} + +// See enum cram_DS_ID in cram/cram_structs +static int cram_ds_to_key(enum cram_DS_ID ds) { + switch(ds) { + case DS_RN: return 256*'R'+'N'; + case DS_QS: return 256*'Q'+'S'; + case DS_IN: return 256*'I'+'N'; + case DS_SC: return 256*'S'+'C'; + case DS_BF: return 256*'B'+'F'; + case DS_CF: return 256*'C'+'F'; + case DS_AP: return 256*'A'+'P'; + case DS_RG: return 256*'R'+'G'; + case DS_MQ: return 256*'M'+'Q'; + case DS_NS: return 256*'N'+'S'; + case DS_MF: return 256*'M'+'F'; + case DS_TS: return 256*'T'+'S'; + case DS_NP: return 256*'N'+'P'; + case DS_NF: return 256*'N'+'F'; + case DS_RL: return 256*'R'+'L'; + case DS_FN: return 256*'F'+'N'; + case DS_FC: return 256*'F'+'C'; + case DS_FP: return 256*'F'+'P'; + case DS_DL: return 256*'D'+'L'; + case DS_BA: return 256*'B'+'A'; + case DS_BS: return 256*'B'+'S'; + case DS_TL: return 256*'T'+'L'; + case DS_RI: return 256*'R'+'I'; + case DS_RS: return 256*'R'+'S'; + case DS_PD: return 256*'P'+'D'; + case DS_HC: return 256*'H'+'C'; + case DS_BB: return 256*'B'+'B'; + case DS_QQ: return 256*'Q'+'Q'; + case DS_TN: return 256*'T'+'N'; + case DS_TC: return 256*'T'+'C'; + case DS_TM: return 256*'T'+'M'; + case DS_TV: return 256*'T'+'V'; + default: break; + } + + return -1; // unknown +} + +static cram_codec *cram_codec_iter_next(cram_codec_iter *iter, + int *key) { + cram_codec *cc = NULL; + cram_block_compression_hdr *hdr = iter->hdr; + + if (!iter->is_tag) { + // 1: Iterating through main data-series + do { + cc = hdr->codecs[iter->idx++]; + } while(!cc && iter->idx < DS_END); + if (cc) { + *key = cram_ds_to_key(iter->idx-1); + return cc; + } + + // Reset index for phase 2 + iter->idx = 0; + iter->is_tag = 1; + } + + do { + if (!iter->curr_map) + iter->curr_map = hdr->tag_encoding_map[iter->idx++]; + + cc = iter->curr_map ? iter->curr_map->codec : NULL; + if (cc) { + *key = iter->curr_map->key; + iter->curr_map = iter->curr_map->next; + return cc; + } + } while (iter->idx < CRAM_MAP_HASH); + + // End of codecs + return NULL; +} + +/* + * A list of data-series, used to create a linked list threaded through + * a single array. + */ +typedef struct ds_list { + int data_series; + int next; +} ds_list; + +KHASH_MAP_INIT_INT(cid, int64_t) + +// Opaque struct for the CRAM block content-id -> data-series map. +struct cram_cid2ds_t { + ds_list *ds; // array of data-series with linked lists threading through it + int ds_size; + int ds_idx; + khash_t(cid) *hash; // key=content_id, value=index to ds array + int *ds_a; // serialised array of data-series returned by queries. +}; + +void cram_cid2ds_free(cram_cid2ds_t *cid2ds) { + if (cid2ds) { + if (cid2ds->hash) + kh_destroy(cid, cid2ds->hash); + free(cid2ds->ds); + free(cid2ds->ds_a); + free(cid2ds); + } +} + +/* + * Map cram block numbers to data-series. It's normally a 1:1 mapping, + * but in rare cases it can be 1:many (or even many:many). + * The key is the block number and the value is an index into the data-series + * array, which we iterate over until reaching a negative value. + * + * Provide cid2ds as NULL to allocate a new map or pass in an existing one + * to append to this map. The new (or existing) map is returned. + * + * Returns the cid2ds (newly allocated or as provided) on success, + * NULL on failure. + */ +cram_cid2ds_t *cram_update_cid2ds_map(cram_block_compression_hdr *hdr, + cram_cid2ds_t *cid2ds) { + cram_cid2ds_t *c2d = cid2ds; + if (!c2d) { + c2d = calloc(1, sizeof(*c2d)); + if (!c2d) + return NULL; + + c2d->hash = kh_init(cid); + if (!c2d->hash) + goto err; + } + + // Iterate through codecs. Initially primary two-left ones in + // rec_encoding_map, and then the three letter in tag_encoding_map. + cram_codec_iter citer; + cram_codec_iter_init(hdr, &citer); + cram_codec *codec; + int key; + + while ((codec = cram_codec_iter_next(&citer, &key))) { + // Having got a codec, we can then use cram_codec_to_id to get + // the block IDs utilised by that codec. This is then our + // map for allocating data blocks to data series, but for shared + // blocks we can't separate out how much is used by each DS. + int bnum[2]; + cram_codec_get_content_ids(codec, bnum); + + khiter_t k; + int ret, i; + for (i = 0; i < 2; i++) { + if (bnum[i] > -2) { + k = kh_put(cid, c2d->hash, bnum[i], &ret); + if (ret < 0) + goto err; + + if (c2d->ds_idx >= c2d->ds_size) { + c2d->ds_size += 100; + c2d->ds_size *= 2; + ds_list *ds_new = realloc(c2d->ds, + c2d->ds_size * sizeof(*ds_new)); + if (!ds_new) + goto err; + c2d->ds = ds_new; + } + + if (ret == 0) { + // Shared content_id, so add to list of DS + + // Maybe data-series should be part of the hash key? + // + // So top-32 bit is content-id, bot-32 bit is key. + // Sort hash by key and then can group all the data-series + // known together. ?? + // + // Brute force for now, scan to see if recorded. + // Typically this is minimal effort as we almost always + // have 1 data-series per block content-id, so the list to + // search is of size 1. + int dsi = kh_value(c2d->hash, k); + while (dsi >= 0) { + if (c2d->ds[dsi].data_series == key) + break; + dsi = c2d->ds[dsi].next; + } + + if (dsi == -1) { + // Block content_id seen before, but not with this DS + c2d->ds[c2d->ds_idx].data_series = key; + c2d->ds[c2d->ds_idx].next = kh_value(c2d->hash, k); + kh_value(c2d->hash, k) = c2d->ds_idx; + c2d->ds_idx++; + } + } else { + // First time this content id has been used + c2d->ds[c2d->ds_idx].data_series = key; + c2d->ds[c2d->ds_idx].next = -1; + kh_value(c2d->hash, k) = c2d->ds_idx; + c2d->ds_idx++; + } + } + } + } + + return c2d; + + err: + if (c2d != cid2ds) + cram_cid2ds_free(c2d); + return NULL; +} + +/* + * Return a list of data series observed as belonging to a block with + * the specified content_id. *n is the number of data series + * returned, or 0 if block is unused. + * Block content_id of -1 is used to indicate the CORE block. + * + * The pointer returned is owned by the cram_cid2ds state and should + * not be freed by the caller. + */ +int *cram_cid2ds_query(cram_cid2ds_t *c2d, int content_id, int *n) { + *n = 0; + if (!c2d || !c2d->hash) + return NULL; + + khiter_t k = kh_get(cid, c2d->hash, content_id); + if (k == kh_end(c2d->hash)) + return NULL; + + if (!c2d->ds_a) { + c2d->ds_a = malloc(c2d->ds_idx * sizeof(int)); + if (!c2d->ds_a) + return NULL; + } + + int dsi = kh_value(c2d->hash, k); // initial ds array index from hash + int idx = 0; + while (dsi >= 0) { + c2d->ds_a[idx++] = c2d->ds[dsi].data_series; + dsi = c2d->ds[dsi].next; // iterate over list within ds array + } + + *n = idx; + return c2d->ds_a; +} + +/* + * Produces a description of the record and tag encodings held within + * a compression header and appends to 'ks'. + * + * Returns 0 on success, + * <0 on failure. + */ +int cram_describe_encodings(cram_block_compression_hdr *hdr, kstring_t *ks) { + cram_codec_iter citer; + cram_codec_iter_init(hdr, &citer); + cram_codec *codec; + int key, r = 0; + + while ((codec = cram_codec_iter_next(&citer, &key))) { + char key_s[4] = {0}; + int key_i = 0; + if (key>>16) key_s[key_i++] = key>>16; + key_s[key_i++] = (key>>8)&0xff; + key_s[key_i++] = key&0xff; + r |= ksprintf(ks, "\t%s\t", key_s) < 0; + r |= cram_codec_describe(codec, ks) < 0; + r |= kputc('\n', ks) < 0; + } + + return r ? -1 : 0; +} + +/* + *----------------------------------------------------------------------------- + * cram_slice + */ +int32_t cram_slice_hdr_get_num_blocks(cram_block_slice_hdr *hdr) { + return hdr->num_blocks; +} + +int cram_slice_hdr_get_embed_ref_id(cram_block_slice_hdr *h) { + return h->ref_base_id; +} + +void cram_slice_hdr_get_coords(cram_block_slice_hdr *h, + int *refid, hts_pos_t *start, hts_pos_t *span) { + if (refid) + *refid = h->ref_seq_id; + if (start) + *start = h->ref_seq_start; + if (span) + *span = h->ref_seq_span; +} + +/* + *----------------------------------------------------------------------------- + * cram_block + */ +int32_t cram_block_get_content_id(cram_block *b) { + return b->content_type == CORE ? -1 : b->content_id; +} +int32_t cram_block_get_comp_size(cram_block *b) { return b->comp_size; } +int32_t cram_block_get_uncomp_size(cram_block *b) { return b->uncomp_size; } +int32_t cram_block_get_crc32(cram_block *b) { return b->crc32; } +void * cram_block_get_data(cram_block *b) { return BLOCK_DATA(b); } +int32_t cram_block_get_size(cram_block *b) { return BLOCK_SIZE(b); } +enum cram_block_method cram_block_get_method(cram_block *b) { + return (enum cram_block_method)b->orig_method; +} +enum cram_content_type cram_block_get_content_type(cram_block *b) { + return b->content_type; +} + +void cram_block_set_content_id(cram_block *b, int32_t id) { b->content_id = id; } +void cram_block_set_comp_size(cram_block *b, int32_t size) { b->comp_size = size; } +void cram_block_set_uncomp_size(cram_block *b, int32_t size) { b->uncomp_size = size; } +void cram_block_set_crc32(cram_block *b, int32_t crc) { b->crc32 = crc; } +void cram_block_set_data(cram_block *b, void *data) { BLOCK_DATA(b) = data; } +void cram_block_set_size(cram_block *b, int32_t size) { BLOCK_SIZE(b) = size; } + +int cram_block_append(cram_block *b, const void *data, int size) { + BLOCK_APPEND(b, data, size); + return 0; + + block_err: + return -1; +} +void cram_block_update_size(cram_block *b) { BLOCK_UPLEN(b); } + +// Offset is known as "size" internally, but it can be confusing. +size_t cram_block_get_offset(cram_block *b) { return BLOCK_SIZE(b); } +void cram_block_set_offset(cram_block *b, size_t offset) { BLOCK_SIZE(b) = offset; } + +/* + * Given a compressed block of data in a specified compression method, + * fill out the 'cm' field with meta-data gleaned from the compressed + * block. + * + * If comp is CRAM_COMP_UNKNOWN, we attempt to auto-detect the compression + * format, but this doesn't work for all methods. + * + * Retuns the detected or specified comp method, and fills out *cm + * if non-NULL. + */ +cram_method_details *cram_expand_method(uint8_t *data, int32_t size, + enum cram_block_method comp) { + cram_method_details *cm = calloc(1, sizeof(*cm)); + if (!cm) + return NULL; + + const char *xz_header = "\xFD""7zXZ"; // including nul + + if (comp == CRAM_COMP_UNKNOWN) { + // Auto-detect + if (size > 1 && data[0] == 0x1f && data[1] == 0x8b) + comp = CRAM_COMP_GZIP; + else if (size > 3 && data[1] == 'B' && data[2] == 'Z' + && data[3] == 'h') + comp = CRAM_COMP_BZIP2; + else if (size > 6 && memcmp(xz_header, data, 6) == 0) + comp = CRAM_COMP_LZMA; + else + comp = CRAM_COMP_UNKNOWN; + } + cm->method = comp; + + // Interrogate the compressed data stream to fill out additional fields. + switch (comp) { + case CRAM_COMP_GZIP: + if (size > 8) { + if (data[8] == 4) + cm->level = 1; + else if (data[8] == 2) + cm->level = 9; + else + cm->level = 5; + } + break; + + case CRAM_COMP_BZIP2: + if (size > 3 && data[3] >= '1' && data[3] <= '9') + cm->level = data[3]-'0'; + break; + + case CRAM_COMP_RANS4x8: + cm->Nway = 4; + if (size > 0 && data[0] == 1) + cm->order = 1; + else + cm->order = 0; + break; + + case CRAM_COMP_RANSNx16: + if (size > 0) { + cm->order = data[0] & 1; + cm->Nway = data[0] & RANS_ORDER_X32 ? 32 : 4; + cm->rle = data[0] & RANS_ORDER_RLE ? 1 : 0; + cm->pack = data[0] & RANS_ORDER_PACK ? 1 : 0; + cm->cat = data[0] & RANS_ORDER_CAT ? 1 : 0; + cm->stripe = data[0] & RANS_ORDER_STRIPE ? 1 : 0; + cm->nosz = data[0] & RANS_ORDER_NOSZ ? 1 : 0; + } + break; + + case CRAM_COMP_ARITH: + if (size > 0) { + // Not in a public header, but the same transforms as rANSNx16 + cm->order = data[0] & 3; + cm->rle = data[0] & RANS_ORDER_RLE ? 1 : 0; + cm->pack = data[0] & RANS_ORDER_PACK ? 1 : 0; + cm->cat = data[0] & RANS_ORDER_CAT ? 1 : 0; + cm->stripe = data[0] & RANS_ORDER_STRIPE ? 1 : 0; + cm->nosz = data[0] & RANS_ORDER_NOSZ ? 1 : 0; + cm->ext = data[0] & 4 /*external*/ ? 1 : 0; + } + break; + + case CRAM_COMP_TOK3: + if (size > 8) { + if (data[8] == 1) + cm->level = 11; + else if (data[8] == 0) + cm->level = 1; + } + break; + + default: + break; + } + + return cm; +} + +/* + *----------------------------------------------------------------------------- + * cram_codecs + */ + +// -2 is unused. +// -1 is CORE +// >= 0 is the block with that Content ID +void cram_codec_get_content_ids(cram_codec *c, int ids[2]) { + ids[0] = cram_codec_to_id(c, &ids[1]); +} + +/* + *----------------------------------------------------------------------------- + * Utility functions + */ + +/* + * Copies the blocks representing the next num_slice slices from a + * container from 'in' to 'out'. It is expected that the file pointer + * is just after the read of the cram_container and cram compression + * header. + * + * Returns 0 on success + * -1 on failure + */ +int cram_copy_slice(cram_fd *in, cram_fd *out, int32_t num_slice) { + int32_t i, j; + + for (i = 0; i < num_slice; i++) { + cram_block *blk; + cram_block_slice_hdr *hdr; + + if (!(blk = cram_read_block(in))) + return -1; + if (!(hdr = cram_decode_slice_header(in, blk))) { + cram_free_block(blk); + return -1; + } + + if (cram_write_block(out, blk) != 0) { + cram_free_block(blk); + return -1; + } + cram_free_block(blk); + + int num_blocks = cram_slice_hdr_get_num_blocks(hdr); + for (j = 0; j < num_blocks; j++) { + blk = cram_read_block(in); + if (!blk || cram_write_block(out, blk) != 0) { + if (blk) cram_free_block(blk); + return -1; + } + cram_free_block(blk); + } + cram_free_slice_header(hdr); + } + + return 0; +} + +/* + * Discards the next containers worth of data. + * Only the cram structure has been read so far. + * + * Returns 0 on success, + * -1 on failure + */ +static int cram_skip_container(cram_fd *in, cram_container *c) { + // Compression header + cram_block *blk; + if (!(blk = cram_read_block(in))) + return -1; + cram_free_block(blk); + + int i; + for (i = 0; i < c->num_landmarks; i++) { + cram_block_slice_hdr *hdr; + + if (!(blk = cram_read_block(in))) + return -1; + if (!(hdr = cram_decode_slice_header(in, blk))) { + cram_free_block(blk); + return -1; + } + cram_free_block(blk); + + int num_blocks = cram_slice_hdr_get_num_blocks(hdr), j; + for (j = 0; j < num_blocks; j++) { + blk = cram_read_block(in); + if (!blk) { + cram_free_slice_header(hdr); + return -1; + } + cram_free_block(blk); + } + cram_free_slice_header(hdr); + } + + return 0; +} + + +/* + * Copies a container, but filtering it down to a specific region, + * which has already been set on the 'in' fd. + * + * This is used in e.g. samtools cat where we specified a region and discover + * that a region doesn't entirely span the container, so we have to select + * which reads we need to copy out of it. + * + * If ref_id is non-NULL we also return the last ref_id we filtered. + * This can be -2 if it's multi-ref and we observe more than one reference, + * and actual ref_id >= -1 if it's multi-ref and we observe just one ref or + * it's fixed reference. + * + * Returns 0 on success + * -1 on error + */ +int cram_filter_container(cram_fd *in, cram_fd *out, cram_container *c, + int *ref_id) { + int err = 0, fixed_ref = -3; + + if (ref_id) + *ref_id = c->ref_seq_id; + + int rid = in->range.refid == -2 ? -1 : in->range.refid; + if (rid != c->ref_seq_id || + in->range.start > c->ref_seq_start + c->ref_seq_span-1) + // Except for multi-ref cases + if (c->ref_seq_id != -2) + return cram_skip_container(in, c); + + // Container compression header + cram_block *blk = cram_read_block(in); + if (!blk) + return -1; + c->comp_hdr = cram_decode_compression_header(in, blk); + in->ctr = c; + + // If it's multi-ref but a constant ref-id, then we can still do + // basic level chromosome filtering. Similarly multi-ref where we're + // _already_ in ref "*" (unmapped) means we can just copy the container + // as there are no positions to filter on and "*" sorts to the end. + // TODO: how to tell "already in" though? + if (c->ref_seq_id == -2) { + cram_codec *cd = c->comp_hdr->codecs[DS_RI]; + if (cd && cd->codec == E_HUFFMAN && cd->u.huffman.ncodes == 1 && + // this check should be always true anyway + rid == cd->u.huffman.codes[0].symbol) + // We're in multi-ref mode, but actually the entire container + // matches. So if we're in whole-chromosome mode we can just + // copy. + if (in->range.start <= 1 && + in->range.end >= (INT64_MAX&(0xffffffffULL<<32))) { + if (ref_id) + *ref_id = rid; + err |= cram_write_container(out, c) < 0; + err |= cram_write_block(out, blk); + return cram_copy_slice(in, out, c->num_landmarks) | -err; + } + } + + // A simple read-write loop with region filtering automatically due to + // an earlier CRAM_OPT_RANGE request. + // + // We can hit EOF when reaching the end of the range, but we still need + // to manually check we don't attempt to read beyond this single container. + + cram_range rng_copy = in->range; + in->range.start = INT64_MIN; + in->range.end = INT64_MAX; + + bam1_t *b = bam_init1(); + while ((c->curr_slice < c->max_slice || + c->slice->curr_rec < c->slice->max_rec)) { + cram_slice *s; + if (c->slice && c->slice->curr_rec < c->slice->max_rec) + s = c->slice; + else if (c->curr_slice < c->max_slice) + s = cram_next_slice(in, &c); + else + break; // end of container + c->slice = s; + + // This is more efficient if we check as a cram record instead of a + // bam record as we don't have to parse CIGAR end. + cram_record *cr = &c->slice->crecs[c->slice->curr_rec]; + if (fixed_ref == -3) + fixed_ref = cr->ref_id; + else if (fixed_ref != cr->ref_id) + fixed_ref = -2; + + if (rng_copy.refid != cr->ref_id) { + if (rng_copy.refid == -2) { + if (cr->ref_id > -1) { + // Want unmapped, but have mapped + c->slice->curr_rec++; + continue; + } + } else { + if (rng_copy.refid > cr->ref_id || rng_copy.refid == -1) { + // multi-ref and not at the correct ref yet + c->slice->curr_rec++; + continue; + } else { + // multi-ref and beyond the desired ref + break; + } + } + } + + // Correct ref, but check the desired region + if (cr->aend < rng_copy.start) { + c->slice->curr_rec++; + continue; + } + if (cr->apos > rng_copy.end) + break; + + // Broadly rquivalent to cram_get_bam_seq, but starting from 'cr' + err |= cram_to_bam(in->header, in, s, cr, s->curr_rec++, &b) < 0; + + if (cram_put_bam_seq(out, b) < 0) { + err |= 1; + break; + } + } + bam_destroy1(b); + + if (ref_id) + *ref_id = fixed_ref; + + in->range = rng_copy; + + // Avoids double frees as we stole the container from our other + // file descriptor. + in->ctr = NULL; + in->ctr_mt = NULL; + + err |= cram_flush(out); + cram_free_block(blk); + + return -err; +} + + +/* + * Renumbers RG numbers in a cram compression header. + * + * CRAM stores RG as the Nth number in the header, rather than a + * string holding the ID: tag. This is smaller in space, but means + * "samtools cat" to join files together that contain single but + * different RG lines needs a way of renumbering them. + * + * The file descriptor is expected to be immediately after the + * cram_container structure (ie before the cram compression header). + * Due to the nature of the CRAM format, this needs to read and write + * the blocks itself. Note that there may be multiple slices within + * the container, meaning multiple compression headers to manipulate. + * Changing RG may change the size of the compression header and + * therefore the length field in the container. Hence we rewrite all + * blocks just in case and also emit the adjusted container. + * + * The current implementation can only cope with renumbering a single + * RG (and only then if it is using HUFFMAN or BETA codecs). In + * theory it *may* be possible to renumber multiple RGs if they use + * HUFFMAN to the CORE block or use an external block unshared by any + * other data series. So we have an API that can be upgraded to + * support this, but do not implement it for now. An example + * implementation of RG as an EXTERNAL block would be to find that + * block and rewrite it, returning the number of blocks consumed. + * + * Returns 0 on success; + * -1 if unable to edit; + * -2 on other errors (eg I/O). + */ +int cram_transcode_rg(cram_fd *in, cram_fd *out, + cram_container *c, + int nrg, int *in_rg, int *out_rg) { + int new_rg = *out_rg, old_size, new_size; + cram_block *o_blk, *n_blk; + cram_block_compression_hdr *ch; + + if (nrg != 1) { + hts_log_error("CRAM transcode supports only a single RG"); + return -2; + } + + // Produce a new block holding the updated compression header, + // with RG transcoded to a new value. (Single only supported.) + o_blk = cram_read_block(in); + old_size = cram_block_size(o_blk); + ch = cram_decode_compression_header(in, o_blk); + if (cram_block_compression_hdr_set_rg(ch, new_rg) != 0) + return -1; + if (cram_block_compression_hdr_decoder2encoder(in, ch) != 0) + return -1; + n_blk = cram_encode_compression_header(in, c, ch, in->embed_ref); + cram_free_compression_header(ch); + + /* + * Warning: this has internal knowledge of the cram compression + * header format. + * + * The decoder doesn't set c->tags_used, so the encoder puts a two + * byte blank segment. This means n_blk is too short. We skip + * through the decoded old block (o_blk) and copy from there. + */ + char *cp = cram_block_get_data(o_blk); + char *op = cp; + char *endp = cp + cram_block_get_uncomp_size(o_blk); + //fprintf(stderr, "sz = %d\n", (int)(endp-cp)); + int32_t i32, err = 0; + + i32 = in->vv.varint_get32(&cp, endp, &err); + cp += i32; + i32 = in->vv.varint_get32(&cp, endp, &err); + cp += i32; + op = cp; + i32 = in->vv.varint_get32(&cp, endp, &err); + i32 += (cp-op); + if (err) + return -2; + + //fprintf(stderr, "remaining %d bytes\n", i32); + cram_block_set_size(n_blk, cram_block_get_size(n_blk)-2); + cram_block_append(n_blk, op, i32); + cram_block_update_size(n_blk); + + new_size = cram_block_size(n_blk); + + //fprintf(stderr, "size %d -> %d\n", old_size, new_size); + + // Now we've constructedthe updated compression header, + // amend the container too (it may have changed size). + int32_t *landmarks, num_landmarks; + landmarks = cram_container_get_landmarks(c, &num_landmarks); + + if (old_size != new_size) { + int diff = new_size - old_size, j; + + for (j = 0; j < num_landmarks; j++) + landmarks[j] += diff; + //cram_container_set_landmarks(c, num_landmarks, landmarks); + cram_container_set_length(c, cram_container_get_length(c) + diff); + } + + // Finally write it all out; container, compression header, + // and then all the remaining slice blocks. + if (cram_write_container(out, c) != 0) + return -2; + + cram_write_block(out, n_blk); + cram_free_block(o_blk); + cram_free_block(n_blk); + + // Container num_blocks can be invalid, due to a bug. + // Instead we iterate in slice context instead. + return cram_copy_slice(in, out, num_landmarks); +} + + +/*! + * Returns the refs_t structure used by a cram file handle. + * + * This may be used in conjunction with option CRAM_OPT_SHARED_REF to + * share reference memory between multiple file handles. + * + * @return + * Returns NULL if none exists or the file handle is not a CRAM file. + */ +refs_t *cram_get_refs(htsFile *fd) { + return fd->format.format == cram + ? fd->fp.cram->refs + : NULL; +} diff --git a/src/htslib-1.21/cram/cram_index.c b/src/htslib-1.21/cram/cram_index.c new file mode 100644 index 0000000..77c953d --- /dev/null +++ b/src/htslib-1.21/cram/cram_index.c @@ -0,0 +1,1040 @@ +/* +Copyright (c) 2013-2020, 2023-2024 Genome Research Ltd. +Author: James Bonfield + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +Institute nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* + * The index is a gzipped tab-delimited text file with one line per slice. + * The columns are: + * 1: reference number (0 to N-1, as per BAM ref_id) + * 2: reference position of 1st read in slice (1..?) + * 3: number of reads in slice + * 4: offset of container start (relative to end of SAM header, so 1st + * container is offset 0). + * 5: slice number within container (ie which landmark). + * + * In memory, we hold this in a nested containment list. Each list element is + * a cram_index struct. Each element in turn can contain its own list of + * cram_index structs. + * + * Any start..end range which is entirely contained within another (and + * earlier as it is sorted) range will be held within it. This ensures that + * the outer list will never have containments and we can safely do a + * binary search to find the first range which overlaps any given coordinate. + */ + +#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../htslib/bgzf.h" +#include "../htslib/hfile.h" +#include "../hts_internal.h" +#include "cram.h" +#include "os.h" + +#if 0 +static void dump_index_(cram_index *e, int level) { + int i, n; + n = printf("%*s%d / %d .. %d, ", level*4, "", e->refid, e->start, e->end); + printf("%*soffset %"PRId64" %p %p\n", MAX(0,50-n), "", e->offset, e, e->e_next); + for (i = 0; i < e->nslice; i++) { + dump_index_(&e->e[i], level+1); + } +} + +static void dump_index(cram_fd *fd) { + int i; + for (i = 0; i < fd->index_sz; i++) { + dump_index_(&fd->index[i], 0); + } +} +#endif + +// Thread a linked list through the nested containment list. +// This makes navigating it and finding the "next" index entry +// trivial. +static cram_index *link_index_(cram_index *e, cram_index *e_last) { + int i; + if (e_last) + e_last->e_next = e; + + // We don't want to link in the top-level cram_index with + // offset=0 and start/end = INT_MIN/INT_MAX. + if (e->offset) + e_last = e; + + for (i = 0; i < e->nslice; i++) + e_last = link_index_(&e->e[i], e_last); + + return e_last; +} + +static void link_index(cram_fd *fd) { + int i; + cram_index *e_last = NULL; + + for (i = 0; i < fd->index_sz; i++) { + e_last = link_index_(&fd->index[i], e_last); + } + + if (e_last) + e_last->e_next = NULL; +} + +static int kget_int32(kstring_t *k, size_t *pos, int32_t *val_p) { + int sign = 1; + int32_t val = 0; + size_t p = *pos; + + while (p < k->l && (k->s[p] == ' ' || k->s[p] == '\t')) + p++; + + if (p < k->l && k->s[p] == '-') + sign = -1, p++; + + if (p >= k->l || !(k->s[p] >= '0' && k->s[p] <= '9')) + return -1; + + while (p < k->l && k->s[p] >= '0' && k->s[p] <= '9') { + int digit = k->s[p++]-'0'; + val = val*10 + digit; + } + + *pos = p; + *val_p = sign*val; + + return 0; +} + +static int kget_int64(kstring_t *k, size_t *pos, int64_t *val_p) { + int sign = 1; + int64_t val = 0; + size_t p = *pos; + + while (p < k->l && (k->s[p] == ' ' || k->s[p] == '\t')) + p++; + + if (p < k->l && k->s[p] == '-') + sign = -1, p++; + + if (p >= k->l || !(k->s[p] >= '0' && k->s[p] <= '9')) + return -1; + + while (p < k->l && k->s[p] >= '0' && k->s[p] <= '9') { + int digit = k->s[p++]-'0'; + val = val*10 + digit; + } + + *pos = p; + *val_p = sign*val; + + return 0; +} + +/* + * Loads a CRAM .crai index into memory. + * + * Returns 0 for success + * -1 for failure + */ +int cram_index_load(cram_fd *fd, const char *fn, const char *fn_idx) { + + char *tfn_idx = NULL; + char buf[65536]; + ssize_t len; + kstring_t kstr = {0}; + hFILE *fp; + cram_index *idx; + cram_index **idx_stack = NULL, *ep, e; + int idx_stack_alloc = 0, idx_stack_ptr = 0; + size_t pos = 0; + + /* Check if already loaded */ + if (fd->index) + return 0; + + fd->index = calloc((fd->index_sz = 1), sizeof(*fd->index)); + if (!fd->index) + return -1; + + idx = &fd->index[0]; + idx->refid = -1; + idx->start = INT_MIN; + idx->end = INT_MAX; + + idx_stack = calloc(++idx_stack_alloc, sizeof(*idx_stack)); + if (!idx_stack) + goto fail; + + idx_stack[idx_stack_ptr] = idx; + + // Support pathX.cram##idx##pathY.crai + const char *fn_delim = strstr(fn, HTS_IDX_DELIM); + if (fn_delim && !fn_idx) + fn_idx = fn_delim + strlen(HTS_IDX_DELIM); + + if (!fn_idx) { + if (hts_idx_check_local(fn, HTS_FMT_CRAI, &tfn_idx) == 0 && hisremote(fn)) + tfn_idx = hts_idx_getfn(fn, ".crai"); + + if (!tfn_idx) { + hts_log_error("Could not retrieve index file for '%s'", fn); + goto fail; + } + fn_idx = tfn_idx; + } + + if (!(fp = hopen(fn_idx, "r"))) { + hts_log_error("Could not open index file '%s'", fn_idx); + goto fail; + } + + // Load the file into memory + while ((len = hread(fp, buf, sizeof(buf))) > 0) { + if (kputsn(buf, len, &kstr) < 0) + goto fail; + } + + if (len < 0 || kstr.l < 2) + goto fail; + + if (hclose(fp) < 0) + goto fail; + + // Uncompress if required + if (kstr.s[0] == 31 && (uc)kstr.s[1] == 139) { + size_t l = 0; + char *s = zlib_mem_inflate(kstr.s, kstr.l, &l); + if (!s) + goto fail; + + free(kstr.s); + kstr.s = s; + kstr.l = l; + kstr.m = l; // conservative estimate of the size allocated + if (kputsn("", 0, &kstr) < 0) // ensure kstr.s is NUL-terminated + goto fail; + } + + + // Parse it line at a time + while (pos < kstr.l) { + /* 1.1 layout */ + if (kget_int32(&kstr, &pos, &e.refid) == -1) + goto fail; + + if (kget_int32(&kstr, &pos, &e.start) == -1) + goto fail; + + if (kget_int32(&kstr, &pos, &e.end) == -1) + goto fail; + + if (kget_int64(&kstr, &pos, &e.offset) == -1) + goto fail; + + if (kget_int32(&kstr, &pos, &e.slice) == -1) + goto fail; + + if (kget_int32(&kstr, &pos, &e.len) == -1) + goto fail; + + e.end += e.start-1; + //printf("%d/%d..%d-offset=%" PRIu64 ",len=%d,slice=%d\n", e.refid, e.start, e.end, e.offset, e.len, e.slice); + + if (e.refid < -1) { + hts_log_error("Malformed index file, refid %d", e.refid); + goto fail; + } + + if (e.refid != idx->refid) { + if (fd->index_sz < e.refid+2) { + cram_index *new_idx; + int new_sz = e.refid+2; + size_t index_end = fd->index_sz * sizeof(*fd->index); + new_idx = realloc(fd->index, + new_sz * sizeof(*fd->index)); + if (!new_idx) + goto fail; + + fd->index = new_idx; + fd->index_sz = new_sz; + memset(((char *)fd->index) + index_end, 0, + fd->index_sz * sizeof(*fd->index) - index_end); + } + idx = &fd->index[e.refid+1]; + idx->refid = e.refid; + idx->start = INT_MIN; + idx->end = INT_MAX; + idx->nslice = idx->nalloc = 0; + idx->e = NULL; + idx_stack[(idx_stack_ptr = 0)] = idx; + } + + while (!(e.start >= idx->start && e.end <= idx->end) || + (idx->start == 0 && idx->refid == -1)) { + idx = idx_stack[--idx_stack_ptr]; + } + + // Now contains, so append + if (idx->nslice+1 >= idx->nalloc) { + cram_index *new_e; + idx->nalloc = idx->nalloc ? idx->nalloc*2 : 16; + new_e = realloc(idx->e, idx->nalloc * sizeof(*idx->e)); + if (!new_e) + goto fail; + + idx->e = new_e; + } + + e.nalloc = e.nslice = 0; e.e = NULL; + *(ep = &idx->e[idx->nslice++]) = e; + idx = ep; + + if (++idx_stack_ptr >= idx_stack_alloc) { + cram_index **new_stack; + idx_stack_alloc *= 2; + new_stack = realloc(idx_stack, idx_stack_alloc*sizeof(*idx_stack)); + if (!new_stack) + goto fail; + idx_stack = new_stack; + } + idx_stack[idx_stack_ptr] = idx; + + while (pos < kstr.l && kstr.s[pos] != '\n') + pos++; + pos++; + } + + free(idx_stack); + free(kstr.s); + free(tfn_idx); + + // Convert NCList to linear linked list + link_index(fd); + + //dump_index(fd); + + return 0; + + fail: + free(kstr.s); + free(idx_stack); + free(tfn_idx); + cram_index_free(fd); // Also sets fd->index = NULL + return -1; +} + +static void cram_index_free_recurse(cram_index *e) { + if (e->e) { + int i; + for (i = 0; i < e->nslice; i++) { + cram_index_free_recurse(&e->e[i]); + } + free(e->e); + } +} + +void cram_index_free(cram_fd *fd) { + int i; + + if (!fd->index) + return; + + for (i = 0; i < fd->index_sz; i++) { + cram_index_free_recurse(&fd->index[i]); + } + free(fd->index); + + fd->index = NULL; +} + +/* + * Searches the index for the first slice overlapping a reference ID + * and position, or one immediately preceding it if none is found in + * the index to overlap this position. (Our index may have missing + * entries, but we require at least one per reference.) + * + * If the index finds multiple slices overlapping this position we + * return the first one only. Subsequent calls should specify + * "from" as the last slice we checked to find the next one. Otherwise + * set "from" to be NULL to find the first one. + * + * Refid can also be any of the special HTS_IDX_ values. + * For backwards compatibility, refid -1 is equivalent to HTS_IDX_NOCOOR. + * + * Returns the cram_index pointer on success + * NULL on failure + */ +cram_index *cram_index_query(cram_fd *fd, int refid, hts_pos_t pos, + cram_index *from) { + int i, j, k; + cram_index *e; + + if (from) { + // Continue from a previous search. + // We switch to just scanning the linked list, as the nested + // lists are typically short. + if (refid == HTS_IDX_NOCOOR) + refid = -1; + + e = from->e_next; + if (e && e->refid == refid && e->start <= pos) + return e; + else + return NULL; + } + + switch(refid) { + case HTS_IDX_NONE: + case HTS_IDX_REST: + // fail, or already there, dealt with elsewhere. + return NULL; + + case -1: + case HTS_IDX_NOCOOR: + refid = -1; + pos = 0; + break; + + case HTS_IDX_START: { + int64_t min_idx = INT64_MAX; + for (i = 0, j = -1; i < fd->index_sz; i++) { + if (fd->index[i].e && fd->index[i].e[0].offset < min_idx) { + min_idx = fd->index[i].e[0].offset; + j = i; + } + } + if (j < 0) + return NULL; + return fd->index[j].e; + } + + default: + if (refid < HTS_IDX_NONE || refid+1 >= fd->index_sz) + return NULL; + } + + from = &fd->index[refid+1]; + + // Ref with nothing aligned against it. + if (!from->e) + return NULL; + + // This sequence is covered by the index, so binary search to find + // the optimal starting block. + i = 0, j = fd->index[refid+1].nslice-1; + for (k = j/2; k != i; k = (j-i)/2 + i) { + if (from->e[k].refid > refid) { + j = k; + continue; + } + + if (from->e[k].refid < refid) { + i = k; + continue; + } + + if (from->e[k].start >= pos) { + j = k; + continue; + } + + if (from->e[k].start < pos) { + i = k; + continue; + } + } + // i==j or i==j-1. Check if j is better. + if (j >= 0 && from->e[j].start < pos && from->e[j].refid == refid) + i = j; + + /* The above found *a* bin overlapping, but not necessarily the first */ + while (i > 0 && from->e[i-1].end >= pos) + i--; + + /* We may be one bin before the optimum, so check */ + while (i+1 < from->nslice && + (from->e[i].refid < refid || + from->e[i].end < pos)) + i++; + + e = &from->e[i]; + + return e; +} + +// Return the index entry for last slice on a specific reference. +cram_index *cram_index_last(cram_fd *fd, int refid, cram_index *from) { + int slice; + + if (refid+1 < 0 || refid+1 >= fd->index_sz) + return NULL; + + if (!from) + from = &fd->index[refid+1]; + + // Ref with nothing aligned against it. + if (!from->e) + return NULL; + + slice = fd->index[refid+1].nslice - 1; + + // e is the last entry in the nested containment list, but it may + // contain further slices within it. + cram_index *e = &from->e[slice]; + while (e->e_next) + e = e->e_next; + + return e; +} + +/* + * Find the last container overlapping pos 'end', and the file offset of + * its end (equivalent to the start offset of the container following it). + */ +cram_index *cram_index_query_last(cram_fd *fd, int refid, hts_pos_t end) { + cram_index *e = NULL, *prev_e; + do { + prev_e = e; + e = cram_index_query(fd, refid, end, prev_e); + } while (e); + + if (!prev_e) + return NULL; + e = prev_e; + + // Note: offset of e and e->e_next may be the same if we're using a + // multi-ref container where a single container generates multiple + // index entries. + // + // We need to keep iterating until offset differs in order to find + // the genuine file offset for the end of container. + do { + prev_e = e; + e = e->e_next; + } while (e && e->offset == prev_e->offset); + + return prev_e; +} + +/* + * Skips to a container overlapping the start coordinate listed in + * cram_range. + * + * In theory we call cram_index_query multiple times, once per slice + * overlapping the range. However slices may be absent from the index + * which makes this problematic. Instead we find the left-most slice + * and then read from then on, skipping decoding of slices and/or + * whole containers when they don't overlap the specified cram_range. + * + * This function also updates the cram_fd range field. + * + * Returns 0 on success + * -1 on general failure + * -2 on no-data (empty chromosome) + */ +int cram_seek_to_refpos(cram_fd *fd, cram_range *r) { + int ret = 0; + cram_index *e; + + if (r->refid == HTS_IDX_NONE) { + ret = -2; goto err; + } + + // Ideally use an index, so see if we have one. + if ((e = cram_index_query(fd, r->refid, r->start, NULL))) { + if (0 != cram_seek(fd, e->offset, SEEK_SET)) { + if (0 != cram_seek(fd, e->offset - fd->first_container, SEEK_CUR)) { + ret = -1; goto err; + } + } + } else { + // Absent from index, but this most likely means it simply has no data. + ret = -2; goto err; + } + + pthread_mutex_lock(&fd->range_lock); + fd->range = *r; + if (r->refid == HTS_IDX_NOCOOR) { + fd->range.refid = -1; + fd->range.start = 0; + } else if (r->refid == HTS_IDX_START || r->refid == HTS_IDX_REST) { + fd->range.refid = -2; // special case in cram_next_slice + } + pthread_mutex_unlock(&fd->range_lock); + + if (fd->ctr) { + cram_free_container(fd->ctr); + if (fd->ctr_mt && fd->ctr_mt != fd->ctr) + cram_free_container(fd->ctr_mt); + fd->ctr = NULL; + fd->ctr_mt = NULL; + fd->ooc = 0; + fd->eof = 0; + } + + return 0; + + err: + // It's unlikely fd->range will be accessed after EOF or error, + // but this maintains identical behaviour to the previous code. + pthread_mutex_lock(&fd->range_lock); + fd->range = *r; + pthread_mutex_unlock(&fd->range_lock); + return ret; +} + + +/* + * A specialised form of cram_index_build (below) that deals with slices + * having multiple references in this (ref_id -2). In this scenario we + * decode the slice to look at the RI data series instead. + * + * Returns 0 on success + * -1 on read failure + * -2 on wrong sort order + * -4 on write failure + */ +static int cram_index_build_multiref(cram_fd *fd, + cram_container *c, + cram_slice *s, + BGZF *fp, + off_t cpos, + int32_t landmark, + int sz) { + int i, ref = -2; + int64_t ref_start = 0, ref_end; + char buf[1024]; + + if (fd->mode != 'w') { + if (0 != cram_decode_slice(fd, c, s, fd->header)) + return -1; + } + + ref_end = INT_MIN; + + int32_t last_ref = -9; + int32_t last_pos = -9; + for (i = 0; i < s->hdr->num_records; i++) { + if (s->crecs[i].ref_id == last_ref && s->crecs[i].apos < last_pos) { + hts_log_error("CRAM file is not sorted by chromosome / position"); + return -2; + } + last_ref = s->crecs[i].ref_id; + last_pos = s->crecs[i].apos; + + if (s->crecs[i].ref_id == ref) { + if (ref_end < s->crecs[i].aend) + ref_end = s->crecs[i].aend; + continue; + } + + if (ref != -2) { + snprintf(buf, sizeof(buf), + "%d\t%"PRId64"\t%"PRId64"\t%"PRId64"\t%d\t%d\n", + ref, ref_start, ref_end - ref_start + 1, + (int64_t)cpos, landmark, sz); + if (bgzf_write(fp, buf, strlen(buf)) < 0) + return -4; + } + + ref = s->crecs[i].ref_id; + ref_start = s->crecs[i].apos; + ref_end = s->crecs[i].aend; + } + + if (ref != -2) { + snprintf(buf, sizeof(buf), + "%d\t%"PRId64"\t%"PRId64"\t%"PRId64"\t%d\t%d\n", + ref, ref_start, ref_end - ref_start + 1, + (int64_t)cpos, landmark, sz); + if (bgzf_write(fp, buf, strlen(buf)) < 0) + return -4; + } + + return 0; +} + +/* + * Adds a single slice to the index. + */ +int cram_index_slice(cram_fd *fd, + cram_container *c, + cram_slice *s, + BGZF *fp, + off_t cpos, + off_t spos, // relative to cpos + off_t sz) { + int ret; + char buf[1024]; + + if (sz > INT_MAX) { + hts_log_error("CRAM slice is too big (%"PRId64" bytes)", + (int64_t) sz); + return -1; + } + + if (s->hdr->ref_seq_id == -2) { + ret = cram_index_build_multiref(fd, c, s, fp, cpos, spos, sz); + } else { + snprintf(buf, sizeof(buf), + "%d\t%"PRId64"\t%"PRId64"\t%"PRId64"\t%d\t%d\n", + s->hdr->ref_seq_id, s->hdr->ref_seq_start, + s->hdr->ref_seq_span, (int64_t)cpos, (int)spos, (int)sz); + ret = (bgzf_write(fp, buf, strlen(buf)) >= 0)? 0 : -4; + } + + return ret; +} + +/* + * Adds a single container to the index. + */ +static +int cram_index_container(cram_fd *fd, + cram_container *c, + BGZF *fp, + off_t cpos) { + int j; + off_t spos; + + // 2.0 format + for (j = 0; j < c->num_landmarks; j++) { + cram_slice *s; + off_t sz; + int ret; + + spos = htell(fd->fp); + if (spos - cpos - (off_t) c->offset != c->landmark[j]) { + hts_log_error("CRAM slice offset %"PRId64" does not match" + " landmark %d in container header (%"PRId32")", + (int64_t) (spos - cpos - (off_t) c->offset), + j, c->landmark[j]); + return -1; + } + + if (!(s = cram_read_slice(fd))) { + return -1; + } + + sz = htell(fd->fp) - spos; + ret = cram_index_slice(fd, c, s, fp, cpos, c->landmark[j], sz); + + cram_free_slice(s); + + if (ret < 0) { + return ret; + } + } + + return 0; +} + + +/* + * Builds an index file. + * + * fd is a newly opened cram file that we wish to index. + * fn_base is the filename of the associated CRAM file. + * fn_idx is the filename of the index file to be written; + * if NULL, we add ".crai" to fn_base to get the index filename. + * + * Returns 0 on success, + * negative on failure (-1 for read failure, -4 for write failure) + */ +int cram_index_build(cram_fd *fd, const char *fn_base, const char *fn_idx) { + cram_container *c; + off_t cpos, hpos; + BGZF *fp; + kstring_t fn_idx_str = {0}; + int64_t last_ref = -9, last_start = -9; + + // Useful for cram_index_build_multiref + cram_set_option(fd, CRAM_OPT_REQUIRED_FIELDS, SAM_RNAME | SAM_POS | SAM_CIGAR); + + if (! fn_idx) { + kputs(fn_base, &fn_idx_str); + kputs(".crai", &fn_idx_str); + fn_idx = fn_idx_str.s; + } + + if (!(fp = bgzf_open(fn_idx, "wg"))) { + perror(fn_idx); + free(fn_idx_str.s); + return -4; + } + + free(fn_idx_str.s); + + cpos = htell(fd->fp); + while ((c = cram_read_container(fd))) { + if (fd->err) { + perror("Cram container read"); + return -1; + } + + hpos = htell(fd->fp); + + if (!(c->comp_hdr_block = cram_read_block(fd))) + return -1; + assert(c->comp_hdr_block->content_type == COMPRESSION_HEADER); + + c->comp_hdr = cram_decode_compression_header(fd, c->comp_hdr_block); + if (!c->comp_hdr) + return -1; + + if (c->ref_seq_id == last_ref && c->ref_seq_start < last_start) { + hts_log_error("CRAM file is not sorted by chromosome / position"); + return -2; + } + last_ref = c->ref_seq_id; + last_start = c->ref_seq_start; + + if (cram_index_container(fd, c, fp, cpos) < 0) { + bgzf_close(fp); + return -1; + } + + off_t next_cpos = htell(fd->fp); + if (next_cpos != hpos + c->length) { + hts_log_error("Length %"PRId32" in container header at offset %lld does not match block lengths (%lld)", + c->length, (long long) cpos, (long long) next_cpos - hpos); + return -1; + } + cpos = next_cpos; + + cram_free_container(c); + } + if (fd->err) { + bgzf_close(fp); + return -1; + } + + return (bgzf_close(fp) >= 0)? 0 : -4; +} + +// internal recursive step +static int64_t cram_num_containers_between_(cram_index *e, int64_t *last_pos, + int64_t nct, + off_t cstart, off_t cend, + int64_t *first, int64_t *last) { + int64_t nc = 0, i; + + if (e->offset) { + if (e->offset != *last_pos) { + if (e->offset >= cstart && (!cend || e->offset <= cend)) { + if (first && *first < 0) + *first = nct; + if (last) + *last = nct; + } + nc++; + } + // else a new multi-ref in same container + *last_pos = e->offset; + } + + for (i = 0; i < e->nslice; i++) + nc += cram_num_containers_between_(&e->e[i], last_pos, nc + nct, + cstart, cend, first, last); + + return nc; +} + +/*! Returns the number of containers in the CRAM file within given offsets. + * + * The cstart and cend offsets are the locations of the start of containers + * as returned by index_container_offset. + * + * If non-NULL, first and last will hold the inclusive range of container + * numbers, counting from zero. + * + * @return + * Returns the number of containers, equivalent to *last-*first+1. + */ +int64_t cram_num_containers_between(cram_fd *fd, + off_t cstart, off_t cend, + int64_t *first, int64_t *last) { + int64_t nc = 0, i; + int64_t last_pos = -99; + int64_t l_first = -1, l_last = -1; + + for (i = 0; i < fd->index_sz; i++) { + int j = i+1 == fd->index_sz ? 0 : i+1; // maps "*" to end + nc += cram_num_containers_between_(&fd->index[j], &last_pos, nc, + cstart, cend, &l_first, &l_last); + } + + if (first) + *first = l_first; + if (last) + *last = l_last; + + return l_last - l_first + 1; +} + +/* + * Queries the total number of distinct containers in the index. + * Note there may be more containers in the file than in the index, as we + * are not required to have an index entry for every one. + */ +int64_t cram_num_containers(cram_fd *fd) { + return cram_num_containers_between(fd, 0, 0, NULL, NULL); +} + + +/*! Returns the byte offset for the start of the n^th container. + * + * The index must have previously been loaded, otherwise <0 is returned. + */ +static cram_index *cram_container_num2offset_(cram_index *e, int num, + int64_t *last_pos, int *nc) { + if (e->offset) { + if (e->offset != *last_pos) { + if (*nc == num) + return e; + (*nc)++; + } + // else a new multi-ref in same container + *last_pos = e->offset; + } + + int i; + for (i = 0; i < e->nslice; i++) { + cram_index *tmp = cram_container_num2offset_(&e->e[i], num, + last_pos, nc); + if (tmp) + return tmp; + } + + + return NULL; +} + +off_t cram_container_num2offset(cram_fd *fd, int64_t num) { + int nc = 0, i; + int64_t last_pos = -9; + cram_index *e = NULL; + + for (i = 0; i < fd->index_sz; i++) { + int j = i+1 == fd->index_sz ? 0 : i+1; // maps "*" to end + if (!fd->index[j].nslice) + continue; + if ((e = cram_container_num2offset_(&fd->index[j], num, + &last_pos, &nc))) + break; + } + + return e ? e->offset : -1; +} + + +/*! Returns the container number for the first container at offset >= pos. + * + * The index must have previously been loaded, otherwise <0 is returned. + */ +static cram_index *cram_container_offset2num_(cram_index *e, off_t pos, + int64_t *last_pos, int *nc) { + if (e->offset) { + if (e->offset != *last_pos) { + if (e->offset >= pos) + return e; + (*nc)++; + } + // else a new multi-ref in same container + *last_pos = e->offset; + } + + int i; + for (i = 0; i < e->nslice; i++) { + cram_index *tmp = cram_container_offset2num_(&e->e[i], pos, + last_pos, nc); + if (tmp) + return tmp; + } + + + return NULL; +} + +int64_t cram_container_offset2num(cram_fd *fd, off_t pos) { + int nc = 0, i; + int64_t last_pos = -9; + cram_index *e = NULL; + + for (i = 0; i < fd->index_sz; i++) { + int j = i+1 == fd->index_sz ? 0 : i+1; // maps "*" to end + if (!fd->index[j].nslice) + continue; + if ((e = cram_container_offset2num_(&fd->index[j], pos, + &last_pos, &nc))) + break; + } + + return e ? nc : -1; +} + +/*! + * Returns the file offsets of CRAM containers covering a specific region + * query. Note both offsets are the START of the container. + * + * first will point to the start of the first overlapping container + * last will point to the start of the last overlapping container + * + * Returns 0 on success + * <0 on failure + */ +int cram_index_extents(cram_fd *fd, int refid, hts_pos_t start, hts_pos_t end, + off_t *first, off_t *last) { + cram_index *ci; + + if (first) { + if (!(ci = cram_index_query(fd, refid, start, NULL))) + return -1; + *first = ci->offset; + } + + if (last) { + if (!(ci = cram_index_query_last(fd, refid, end))) + return -1; + *last = ci->offset; + } + + return 0; +} diff --git a/src/htslib-1.18/cram/cram_index.h b/src/htslib-1.21/cram/cram_index.h similarity index 100% rename from src/htslib-1.18/cram/cram_index.h rename to src/htslib-1.21/cram/cram_index.h diff --git a/src/htslib-1.21/cram/cram_io.c b/src/htslib-1.21/cram/cram_io.c new file mode 100644 index 0000000..7f7ffca --- /dev/null +++ b/src/htslib-1.21/cram/cram_io.c @@ -0,0 +1,6025 @@ +/* +Copyright (c) 2012-2024 Genome Research Ltd. +Author: James Bonfield + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +Institute nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* + * CRAM I/O primitives. + * + * - ITF8 encoding and decoding. + * - Block based I/O + * - Zlib inflating and deflating (memory) + * - CRAM basic data structure reading and writing + * - File opening / closing + * - Reference sequence handling + */ + +#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h +#include + +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_LIBBZ2 +#include +#endif +#ifdef HAVE_LIBLZMA +#ifdef HAVE_LZMA_H +#include +#else +#include "../os/lzma_stub.h" +#endif +#endif +#include +#include +#include +#include + +#ifdef HAVE_LIBDEFLATE +#include +#define crc32(a,b,c) libdeflate_crc32((a),(b),(c)) +#endif + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +#include "../fuzz_settings.h" +#endif + +#include "cram.h" +#include "os.h" +#include "../htslib/hts.h" +#include "open_trace_file.h" + +#if defined(HAVE_EXTERNAL_LIBHTSCODECS) +#include +#include +#include +#include +#include +#include // CRAM v4.0 variable-size integers +#else +#include "../htscodecs/htscodecs/rANS_static.h" +#include "../htscodecs/htscodecs/rANS_static4x16.h" +#include "../htscodecs/htscodecs/arith_dynamic.h" +#include "../htscodecs/htscodecs/tokenise_name3.h" +#include "../htscodecs/htscodecs/fqzcomp_qual.h" +#include "../htscodecs/htscodecs/varint.h" +#endif + +//#define REF_DEBUG + +#ifdef REF_DEBUG +#include +#define gettid() (int)syscall(SYS_gettid) + +#define RP(...) fprintf (stderr, __VA_ARGS__) +#else +#define RP(...) +#endif + +#include "../htslib/hfile.h" +#include "../htslib/bgzf.h" +#include "../htslib/faidx.h" +#include "../hts_internal.h" + +#ifndef PATH_MAX +#define PATH_MAX FILENAME_MAX +#endif + +#define TRIAL_SPAN 70 +#define NTRIALS 3 + +#define CRAM_DEFAULT_LEVEL 5 + +/* ---------------------------------------------------------------------- + * ITF8 encoding and decoding. + * + * Also see the itf8_get and itf8_put macros in cram_io.h + */ + +/* + * LEGACY: consider using itf8_decode_crc. + * + * Reads an integer in ITF-8 encoding from 'cp' and stores it in + * *val. + * + * Returns the number of bytes read on success + * -1 on failure + */ +int itf8_decode(cram_fd *fd, int32_t *val_p) { + static int nbytes[16] = { + 0,0,0,0, 0,0,0,0, // 0000xxxx - 0111xxxx + 1,1,1,1, // 1000xxxx - 1011xxxx + 2,2, // 1100xxxx - 1101xxxx + 3, // 1110xxxx + 4, // 1111xxxx + }; + + static int nbits[16] = { + 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, // 0000xxxx - 0111xxxx + 0x3f, 0x3f, 0x3f, 0x3f, // 1000xxxx - 1011xxxx + 0x1f, 0x1f, // 1100xxxx - 1101xxxx + 0x0f, // 1110xxxx + 0x0f, // 1111xxxx + }; + + int32_t val = hgetc(fd->fp); + if (val == -1) + return -1; + + int i = nbytes[val>>4]; + val &= nbits[val>>4]; + + switch(i) { + case 0: + *val_p = val; + return 1; + + case 1: + val = (val<<8) | (unsigned char)hgetc(fd->fp); + *val_p = val; + return 2; + + case 2: + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + *val_p = val; + return 3; + + case 3: + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + *val_p = val; + return 4; + + case 4: // really 3.5 more, why make it different? + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<4) | (((unsigned char)hgetc(fd->fp)) & 0x0f); + *val_p = val; + } + + return 5; +} + +int itf8_decode_crc(cram_fd *fd, int32_t *val_p, uint32_t *crc) { + static int nbytes[16] = { + 0,0,0,0, 0,0,0,0, // 0000xxxx - 0111xxxx + 1,1,1,1, // 1000xxxx - 1011xxxx + 2,2, // 1100xxxx - 1101xxxx + 3, // 1110xxxx + 4, // 1111xxxx + }; + + static int nbits[16] = { + 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, // 0000xxxx - 0111xxxx + 0x3f, 0x3f, 0x3f, 0x3f, // 1000xxxx - 1011xxxx + 0x1f, 0x1f, // 1100xxxx - 1101xxxx + 0x0f, // 1110xxxx + 0x0f, // 1111xxxx + }; + unsigned char c[5]; + + int32_t val = hgetc(fd->fp); + if (val == -1) + return -1; + + c[0]=val; + + int i = nbytes[val>>4]; + val &= nbits[val>>4]; + + if (i > 0) { + if (hread(fd->fp, &c[1], i) < i) + return -1; + } + + switch(i) { + case 0: + *val_p = val; + *crc = crc32(*crc, c, 1); + return 1; + + case 1: + val = (val<<8) | c[1]; + *val_p = val; + *crc = crc32(*crc, c, 2); + return 2; + + case 2: + val = (val<<8) | c[1]; + val = (val<<8) | c[2]; + *val_p = val; + *crc = crc32(*crc, c, 3); + return 3; + + case 3: + val = (val<<8) | c[1]; + val = (val<<8) | c[2]; + val = (val<<8) | c[3]; + *val_p = val; + *crc = crc32(*crc, c, 4); + return 4; + + case 4: // really 3.5 more, why make it different? + { + uint32_t uv = val; + uv = (uv<<8) | c[1]; + uv = (uv<<8) | c[2]; + uv = (uv<<8) | c[3]; + uv = (uv<<4) | (c[4] & 0x0f); + // Avoid implementation-defined behaviour on negative values + *val_p = uv < 0x80000000UL ? (int32_t) uv : -((int32_t) (0xffffffffUL - uv)) - 1; + *crc = crc32(*crc, c, 5); + } + } + + return 5; +} + +/* + * Stores a value to memory in ITF-8 format. + * + * Returns the number of bytes required to store the number. + * This is a maximum of 5 bytes. + */ +static inline int itf8_put(char *cp, int32_t val) { + unsigned char *up = (unsigned char *)cp; + if (!(val & ~0x00000007f)) { // 1 byte + *up = val; + return 1; + } else if (!(val & ~0x00003fff)) { // 2 byte + *up++ = (val >> 8 ) | 0x80; + *up = val & 0xff; + return 2; + } else if (!(val & ~0x01fffff)) { // 3 byte + *up++ = (val >> 16) | 0xc0; + *up++ = (val >> 8 ) & 0xff; + *up = val & 0xff; + return 3; + } else if (!(val & ~0x0fffffff)) { // 4 byte + *up++ = (val >> 24) | 0xe0; + *up++ = (val >> 16) & 0xff; + *up++ = (val >> 8 ) & 0xff; + *up = val & 0xff; + return 4; + } else { // 5 byte + *up++ = 0xf0 | ((val>>28) & 0xff); + *up++ = (val >> 20) & 0xff; + *up++ = (val >> 12) & 0xff; + *up++ = (val >> 4 ) & 0xff; + *up = val & 0x0f; + return 5; + } +} + + +/* 64-bit itf8 variant */ +static inline int ltf8_put(char *cp, int64_t val) { + unsigned char *up = (unsigned char *)cp; + if (!(val & ~((1LL<<7)-1))) { + *up = val; + return 1; + } else if (!(val & ~((1LL<<(6+8))-1))) { + *up++ = (val >> 8 ) | 0x80; + *up = val & 0xff; + return 2; + } else if (!(val & ~((1LL<<(5+2*8))-1))) { + *up++ = (val >> 16) | 0xc0; + *up++ = (val >> 8 ) & 0xff; + *up = val & 0xff; + return 3; + } else if (!(val & ~((1LL<<(4+3*8))-1))) { + *up++ = (val >> 24) | 0xe0; + *up++ = (val >> 16) & 0xff; + *up++ = (val >> 8 ) & 0xff; + *up = val & 0xff; + return 4; + } else if (!(val & ~((1LL<<(3+4*8))-1))) { + *up++ = (val >> 32) | 0xf0; + *up++ = (val >> 24) & 0xff; + *up++ = (val >> 16) & 0xff; + *up++ = (val >> 8 ) & 0xff; + *up = val & 0xff; + return 5; + } else if (!(val & ~((1LL<<(2+5*8))-1))) { + *up++ = (val >> 40) | 0xf8; + *up++ = (val >> 32) & 0xff; + *up++ = (val >> 24) & 0xff; + *up++ = (val >> 16) & 0xff; + *up++ = (val >> 8 ) & 0xff; + *up = val & 0xff; + return 6; + } else if (!(val & ~((1LL<<(1+6*8))-1))) { + *up++ = (val >> 48) | 0xfc; + *up++ = (val >> 40) & 0xff; + *up++ = (val >> 32) & 0xff; + *up++ = (val >> 24) & 0xff; + *up++ = (val >> 16) & 0xff; + *up++ = (val >> 8 ) & 0xff; + *up = val & 0xff; + return 7; + } else if (!(val & ~((1LL<<(7*8))-1))) { + *up++ = (val >> 56) | 0xfe; + *up++ = (val >> 48) & 0xff; + *up++ = (val >> 40) & 0xff; + *up++ = (val >> 32) & 0xff; + *up++ = (val >> 24) & 0xff; + *up++ = (val >> 16) & 0xff; + *up++ = (val >> 8 ) & 0xff; + *up = val & 0xff; + return 8; + } else { + *up++ = 0xff; + *up++ = (val >> 56) & 0xff; + *up++ = (val >> 48) & 0xff; + *up++ = (val >> 40) & 0xff; + *up++ = (val >> 32) & 0xff; + *up++ = (val >> 24) & 0xff; + *up++ = (val >> 16) & 0xff; + *up++ = (val >> 8 ) & 0xff; + *up = val & 0xff; + return 9; + } +} + +/* + * Encodes and writes a single integer in ITF-8 format. + * Returns 0 on success + * -1 on failure + */ +int itf8_encode(cram_fd *fd, int32_t val) { + char buf[5]; + int len = itf8_put(buf, val); + return hwrite(fd->fp, buf, len) == len ? 0 : -1; +} + +const int itf8_bytes[16] = { + 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 3, 3, 4, 5 +}; + +const int ltf8_bytes[256] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + + 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 9 +}; + +/* + * LEGACY: consider using ltf8_decode_crc. + */ +int ltf8_decode(cram_fd *fd, int64_t *val_p) { + int c = hgetc(fd->fp); + int64_t val = (unsigned char)c; + if (c == -1) + return -1; + + if (val < 0x80) { + *val_p = val; + return 1; + + } else if (val < 0xc0) { + val = (val<<8) | (unsigned char)hgetc(fd->fp); + *val_p = val & (((1LL<<(6+8)))-1); + return 2; + + } else if (val < 0xe0) { + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + *val_p = val & ((1LL<<(5+2*8))-1); + return 3; + + } else if (val < 0xf0) { + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + *val_p = val & ((1LL<<(4+3*8))-1); + return 4; + + } else if (val < 0xf8) { + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + *val_p = val & ((1LL<<(3+4*8))-1); + return 5; + + } else if (val < 0xfc) { + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + *val_p = val & ((1LL<<(2+5*8))-1); + return 6; + + } else if (val < 0xfe) { + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + *val_p = val & ((1LL<<(1+6*8))-1); + return 7; + + } else if (val < 0xff) { + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + *val_p = val & ((1LL<<(7*8))-1); + return 8; + + } else { + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + val = (val<<8) | (unsigned char)hgetc(fd->fp); + *val_p = val; + } + + return 9; +} + +int ltf8_decode_crc(cram_fd *fd, int64_t *val_p, uint32_t *crc) { + unsigned char c[9]; + int64_t val = hgetc(fd->fp); + if (val < 0) + return -1; + + c[0] = val; + + if (val < 0x80) { + *val_p = val; + *crc = crc32(*crc, c, 1); + return 1; + + } else if (val < 0xc0) { + int v = hgetc(fd->fp); + if (v < 0) + return -1; + val = (val<<8) | (c[1]=v); + *val_p = val & (((1LL<<(6+8)))-1); + *crc = crc32(*crc, c, 2); + return 2; + + } else if (val < 0xe0) { + if (hread(fd->fp, &c[1], 2) < 2) + return -1; + val = (val<<8) | c[1]; + val = (val<<8) | c[2]; + *val_p = val & ((1LL<<(5+2*8))-1); + *crc = crc32(*crc, c, 3); + return 3; + + } else if (val < 0xf0) { + if (hread(fd->fp, &c[1], 3) < 3) + return -1; + val = (val<<8) | c[1]; + val = (val<<8) | c[2]; + val = (val<<8) | c[3]; + *val_p = val & ((1LL<<(4+3*8))-1); + *crc = crc32(*crc, c, 4); + return 4; + + } else if (val < 0xf8) { + if (hread(fd->fp, &c[1], 4) < 4) + return -1; + val = (val<<8) | c[1]; + val = (val<<8) | c[2]; + val = (val<<8) | c[3]; + val = (val<<8) | c[4]; + *val_p = val & ((1LL<<(3+4*8))-1); + *crc = crc32(*crc, c, 5); + return 5; + + } else if (val < 0xfc) { + if (hread(fd->fp, &c[1], 5) < 5) + return -1; + val = (val<<8) | c[1]; + val = (val<<8) | c[2]; + val = (val<<8) | c[3]; + val = (val<<8) | c[4]; + val = (val<<8) | c[5]; + *val_p = val & ((1LL<<(2+5*8))-1); + *crc = crc32(*crc, c, 6); + return 6; + + } else if (val < 0xfe) { + if (hread(fd->fp, &c[1], 6) < 6) + return -1; + val = (val<<8) | c[1]; + val = (val<<8) | c[2]; + val = (val<<8) | c[3]; + val = (val<<8) | c[4]; + val = (val<<8) | c[5]; + val = (val<<8) | c[6]; + *val_p = val & ((1LL<<(1+6*8))-1); + *crc = crc32(*crc, c, 7); + return 7; + + } else if (val < 0xff) { + uint64_t uval = val; + if (hread(fd->fp, &c[1], 7) < 7) + return -1; + uval = (uval<<8) | c[1]; + uval = (uval<<8) | c[2]; + uval = (uval<<8) | c[3]; + uval = (uval<<8) | c[4]; + uval = (uval<<8) | c[5]; + uval = (uval<<8) | c[6]; + uval = (uval<<8) | c[7]; + *val_p = uval & ((1ULL<<(7*8))-1); + *crc = crc32(*crc, c, 8); + return 8; + + } else { + uint64_t uval; + if (hread(fd->fp, &c[1], 8) < 8) + return -1; + uval = c[1]; + uval = (uval<<8) | c[2]; + uval = (uval<<8) | c[3]; + uval = (uval<<8) | c[4]; + uval = (uval<<8) | c[5]; + uval = (uval<<8) | c[6]; + uval = (uval<<8) | c[7]; + uval = (uval<<8) | c[8]; + *crc = crc32(*crc, c, 9); + // Avoid implementation-defined behaviour on negative values + *val_p = c[1] < 0x80 ? (int64_t) uval : -((int64_t) (0xffffffffffffffffULL - uval)) - 1; + } + + return 9; +} + +/* + * Pushes a value in ITF8 format onto the end of a block. + * This shouldn't be used for high-volume data as it is not the fastest + * method. + * + * Returns the number of bytes written + */ +int itf8_put_blk(cram_block *blk, int32_t val) { + char buf[5]; + int sz; + + sz = itf8_put(buf, val); + BLOCK_APPEND(blk, buf, sz); + return sz; + + block_err: + return -1; +} + +int ltf8_put_blk(cram_block *blk, int64_t val) { + char buf[9]; + int sz; + + sz = ltf8_put(buf, val); + BLOCK_APPEND(blk, buf, sz); + return sz; + + block_err: + return -1; +} + +static int64_t safe_itf8_get(char **cp, const char *endp, int *err) { + const unsigned char *up = (unsigned char *)*cp; + + if (endp && endp - *cp < 5 && + (*cp >= endp || endp - *cp < itf8_bytes[up[0]>>4])) { + if (err) *err = 1; + return 0; + } + + if (up[0] < 0x80) { + (*cp)++; + return up[0]; + } else if (up[0] < 0xc0) { + (*cp)+=2; + return ((up[0] <<8) | up[1]) & 0x3fff; + } else if (up[0] < 0xe0) { + (*cp)+=3; + return ((up[0]<<16) | (up[1]<< 8) | up[2]) & 0x1fffff; + } else if (up[0] < 0xf0) { + (*cp)+=4; + uint32_t uv = (((uint32_t)up[0]<<24) | (up[1]<<16) | (up[2]<<8) | up[3]) & 0x0fffffff; + return (int32_t)uv; + } else { + (*cp)+=5; + uint32_t uv = (((uint32_t)up[0] & 0x0f)<<28) | (up[1]<<20) | (up[2]<<12) | (up[3]<<4) | (up[4] & 0x0f); + return (int32_t)uv; + } +} + +static int64_t safe_ltf8_get(char **cp, const char *endp, int *err) { + unsigned char *up = (unsigned char *)*cp; + + if (endp && endp - *cp < 9 && + (*cp >= endp || endp - *cp < ltf8_bytes[up[0]])) { + if (err) *err = 1; + return 0; + } + + if (up[0] < 0x80) { + (*cp)++; + return up[0]; + } else if (up[0] < 0xc0) { + (*cp)+=2; + return (((uint64_t)up[0]<< 8) | + (uint64_t)up[1]) & (((1LL<<(6+8)))-1); + } else if (up[0] < 0xe0) { + (*cp)+=3; + return (((uint64_t)up[0]<<16) | + ((uint64_t)up[1]<< 8) | + (uint64_t)up[2]) & ((1LL<<(5+2*8))-1); + } else if (up[0] < 0xf0) { + (*cp)+=4; + return (((uint64_t)up[0]<<24) | + ((uint64_t)up[1]<<16) | + ((uint64_t)up[2]<< 8) | + (uint64_t)up[3]) & ((1LL<<(4+3*8))-1); + } else if (up[0] < 0xf8) { + (*cp)+=5; + return (((uint64_t)up[0]<<32) | + ((uint64_t)up[1]<<24) | + ((uint64_t)up[2]<<16) | + ((uint64_t)up[3]<< 8) | + (uint64_t)up[4]) & ((1LL<<(3+4*8))-1); + } else if (up[0] < 0xfc) { + (*cp)+=6; + return (((uint64_t)up[0]<<40) | + ((uint64_t)up[1]<<32) | + ((uint64_t)up[2]<<24) | + ((uint64_t)up[3]<<16) | + ((uint64_t)up[4]<< 8) | + (uint64_t)up[5]) & ((1LL<<(2+5*8))-1); + } else if (up[0] < 0xfe) { + (*cp)+=7; + return (((uint64_t)up[0]<<48) | + ((uint64_t)up[1]<<40) | + ((uint64_t)up[2]<<32) | + ((uint64_t)up[3]<<24) | + ((uint64_t)up[4]<<16) | + ((uint64_t)up[5]<< 8) | + (uint64_t)up[6]) & ((1LL<<(1+6*8))-1); + } else if (up[0] < 0xff) { + (*cp)+=8; + return (((uint64_t)up[1]<<48) | + ((uint64_t)up[2]<<40) | + ((uint64_t)up[3]<<32) | + ((uint64_t)up[4]<<24) | + ((uint64_t)up[5]<<16) | + ((uint64_t)up[6]<< 8) | + (uint64_t)up[7]) & ((1LL<<(7*8))-1); + } else { + (*cp)+=9; + return (((uint64_t)up[1]<<56) | + ((uint64_t)up[2]<<48) | + ((uint64_t)up[3]<<40) | + ((uint64_t)up[4]<<32) | + ((uint64_t)up[5]<<24) | + ((uint64_t)up[6]<<16) | + ((uint64_t)up[7]<< 8) | + (uint64_t)up[8]); + } +} + +// Wrapper for now +static int safe_itf8_put(char *cp, char *cp_end, int32_t val) { + return itf8_put(cp, val); +} + +static int safe_ltf8_put(char *cp, char *cp_end, int64_t val) { + return ltf8_put(cp, val); +} + +static int itf8_size(int64_t v) { + return ((!((v)&~0x7f))?1:(!((v)&~0x3fff))?2:(!((v)&~0x1fffff))?3:(!((v)&~0xfffffff))?4:5); +} + +//----------------------------------------------------------------------------- + +// CRAM v4.0 onwards uses a different variable sized integer encoding +// that is size agnostic. + +// Local interface to varint.h inline version, so we can use in func ptr. +// Note a lot of these use the unsigned interface but take signed int64_t. +// This is because the old CRAM ITF8 inteface had signed -1 as unsigned +// 0xffffffff. +static int uint7_size(int64_t v) { + return var_size_u64(v); +} + +static int64_t uint7_get_32(char **cp, const char *endp, int *err) { + uint32_t val; + int nb = var_get_u32((uint8_t *)(*cp), (const uint8_t *)endp, &val); + (*cp) += nb; + if (!nb && err) *err = 1; + return val; +} + +static int64_t sint7_get_32(char **cp, const char *endp, int *err) { + int32_t val; + int nb = var_get_s32((uint8_t *)(*cp), (const uint8_t *)endp, &val); + (*cp) += nb; + if (!nb && err) *err = 1; + return val; +} + +static int64_t uint7_get_64(char **cp, const char *endp, int *err) { + uint64_t val; + int nb = var_get_u64((uint8_t *)(*cp), (const uint8_t *)endp, &val); + (*cp) += nb; + if (!nb && err) *err = 1; + return val; +} + +static int64_t sint7_get_64(char **cp, const char *endp, int *err) { + int64_t val; + int nb = var_get_s64((uint8_t *)(*cp), (const uint8_t *)endp, &val); + (*cp) += nb; + if (!nb && err) *err = 1; + return val; +} + +static int uint7_put_32(char *cp, char *endp, int32_t val) { + return var_put_u32((uint8_t *)cp, (uint8_t *)endp, val); +} + +static int sint7_put_32(char *cp, char *endp, int32_t val) { + return var_put_s32((uint8_t *)cp, (uint8_t *)endp, val); +} + +static int uint7_put_64(char *cp, char *endp, int64_t val) { + return var_put_u64((uint8_t *)cp, (uint8_t *)endp, val); +} + +static int sint7_put_64(char *cp, char *endp, int64_t val) { + return var_put_s64((uint8_t *)cp, (uint8_t *)endp, val); +} + +// Put direct to to cram_block +static int uint7_put_blk_32(cram_block *blk, int32_t v) { + uint8_t buf[10]; + int sz = var_put_u32(buf, buf+10, v); + BLOCK_APPEND(blk, buf, sz); + return sz; + + block_err: + return -1; +} + +static int sint7_put_blk_32(cram_block *blk, int32_t v) { + uint8_t buf[10]; + int sz = var_put_s32(buf, buf+10, v); + BLOCK_APPEND(blk, buf, sz); + return sz; + + block_err: + return -1; +} + +static int uint7_put_blk_64(cram_block *blk, int64_t v) { + uint8_t buf[10]; + int sz = var_put_u64(buf, buf+10, v); + BLOCK_APPEND(blk, buf, sz); + return sz; + + block_err: + return -1; +} + +static int sint7_put_blk_64(cram_block *blk, int64_t v) { + uint8_t buf[10]; + int sz = var_put_s64(buf, buf+10, v); + BLOCK_APPEND(blk, buf, sz); + return sz; + + block_err: + return -1; +} + +// Decode 32-bits with CRC update from cram_fd +static int uint7_decode_crc32(cram_fd *fd, int32_t *val_p, uint32_t *crc) { + uint8_t b[5], i = 0; + int c; + uint32_t v = 0; + +#ifdef VARINT2 + b[0] = hgetc(fd->fp); + if (b[0] < 177) { + } else if (b[0] < 241) { + b[1] = hgetc(fd->fp); + } else if (b[0] < 249) { + b[1] = hgetc(fd->fp); + b[2] = hgetc(fd->fp); + } else { + int n = b[0]+2, z = 1; + while (n-- >= 249) + b[z++] = hgetc(fd->fp); + } + i = var_get_u32(b, NULL, &v); +#else +// // Little endian +// int s = 0; +// do { +// b[i++] = c = hgetc(fd->fp); +// if (c < 0) +// return -1; +// v |= (c & 0x7f) << s; +// s += 7; +// } while (i < 5 && (c & 0x80)); + + // Big endian, see also htscodecs/varint.h + do { + b[i++] = c = hgetc(fd->fp); + if (c < 0) + return -1; + v = (v<<7) | (c & 0x7f); + } while (i < 5 && (c & 0x80)); +#endif + *crc = crc32(*crc, b, i); + + *val_p = v; + return i; +} + +// Decode 32-bits with CRC update from cram_fd +static int sint7_decode_crc32(cram_fd *fd, int32_t *val_p, uint32_t *crc) { + uint8_t b[5], i = 0; + int c; + uint32_t v = 0; + +#ifdef VARINT2 + b[0] = hgetc(fd->fp); + if (b[0] < 177) { + } else if (b[0] < 241) { + b[1] = hgetc(fd->fp); + } else if (b[0] < 249) { + b[1] = hgetc(fd->fp); + b[2] = hgetc(fd->fp); + } else { + int n = b[0]+2, z = 1; + while (n-- >= 249) + b[z++] = hgetc(fd->fp); + } + i = var_get_u32(b, NULL, &v); +#else +// // Little endian +// int s = 0; +// do { +// b[i++] = c = hgetc(fd->fp); +// if (c < 0) +// return -1; +// v |= (c & 0x7f) << s; +// s += 7; +// } while (i < 5 && (c & 0x80)); + + // Big endian, see also htscodecs/varint.h + do { + b[i++] = c = hgetc(fd->fp); + if (c < 0) + return -1; + v = (v<<7) | (c & 0x7f); + } while (i < 5 && (c & 0x80)); +#endif + *crc = crc32(*crc, b, i); + + *val_p = (v>>1) ^ -(v&1); + return i; +} + + +// Decode 64-bits with CRC update from cram_fd +static int uint7_decode_crc64(cram_fd *fd, int64_t *val_p, uint32_t *crc) { + uint8_t b[10], i = 0; + int c; + uint64_t v = 0; + +#ifdef VARINT2 + b[0] = hgetc(fd->fp); + if (b[0] < 177) { + } else if (b[0] < 241) { + b[1] = hgetc(fd->fp); + } else if (b[0] < 249) { + b[1] = hgetc(fd->fp); + b[2] = hgetc(fd->fp); + } else { + int n = b[0]+2, z = 1; + while (n-- >= 249) + b[z++] = hgetc(fd->fp); + } + i = var_get_u64(b, NULL, &v); +#else +// // Little endian +// int s = 0; +// do { +// b[i++] = c = hgetc(fd->fp); +// if (c < 0) +// return -1; +// v |= (c & 0x7f) << s; +// s += 7; +// } while (i < 10 && (c & 0x80)); + + // Big endian, see also htscodecs/varint.h + do { + b[i++] = c = hgetc(fd->fp); + if (c < 0) + return -1; + v = (v<<7) | (c & 0x7f); + } while (i < 5 && (c & 0x80)); +#endif + *crc = crc32(*crc, b, i); + + *val_p = v; + return i; +} + +//----------------------------------------------------------------------------- + +/* + * Decodes a 32-bit little endian value from fd and stores in val. + * + * Returns the number of bytes read on success + * -1 on failure + */ +static int int32_decode(cram_fd *fd, int32_t *val) { + int32_t i; + if (4 != hread(fd->fp, &i, 4)) + return -1; + + *val = le_int4(i); + return 4; +} + +/* + * Encodes a 32-bit little endian value 'val' and writes to fd. + * + * Returns the number of bytes written on success + * -1 on failure + */ +static int int32_encode(cram_fd *fd, int32_t val) { + uint32_t v = le_int4(val); + if (4 != hwrite(fd->fp, &v, 4)) + return -1; + + return 4; +} + +/* As int32_decoded/encode, but from/to blocks instead of cram_fd */ +int int32_get_blk(cram_block *b, int32_t *val) { + if (b->uncomp_size - BLOCK_SIZE(b) < 4) + return -1; + + uint32_t v = + ((uint32_t) b->data[b->byte ]) | + (((uint32_t) b->data[b->byte+1]) << 8) | + (((uint32_t) b->data[b->byte+2]) << 16) | + (((uint32_t) b->data[b->byte+3]) << 24); + // Avoid implementation-defined behaviour on negative values + *val = v < 0x80000000U ? (int32_t) v : -((int32_t) (0xffffffffU - v)) - 1; + BLOCK_SIZE(b) += 4; + return 4; +} + +/* As int32_decoded/encode, but from/to blocks instead of cram_fd */ +int int32_put_blk(cram_block *b, int32_t val) { + unsigned char cp[4]; + uint32_t v = val; + cp[0] = ( v & 0xff); + cp[1] = ((v>>8) & 0xff); + cp[2] = ((v>>16) & 0xff); + cp[3] = ((v>>24) & 0xff); + + BLOCK_APPEND(b, cp, 4); + return 0; + + block_err: + return -1; +} + +#ifdef HAVE_LIBDEFLATE +/* ---------------------------------------------------------------------- + * libdeflate compression code, with interface to match + * zlib_mem_{in,de}flate for simplicity elsewhere. + */ + +// Named the same as the version that uses zlib as we always use libdeflate for +// decompression when available. +char *zlib_mem_inflate(char *cdata, size_t csize, size_t *size) { + struct libdeflate_decompressor *z = libdeflate_alloc_decompressor(); + if (!z) { + hts_log_error("Call to libdeflate_alloc_decompressor failed"); + return NULL; + } + + uint8_t *data = NULL, *new_data; + if (!*size) + *size = csize*2; + for(;;) { + new_data = realloc(data, *size); + if (!new_data) { + hts_log_error("Memory allocation failure"); + goto fail; + } + data = new_data; + + int ret = libdeflate_gzip_decompress(z, cdata, csize, data, *size, size); + + // Auto grow output buffer size if needed and try again. + // Fortunately for all bar one call of this we know the size already. + if (ret == LIBDEFLATE_INSUFFICIENT_SPACE) { + (*size) *= 1.5; + continue; + } + + if (ret != LIBDEFLATE_SUCCESS) { + hts_log_error("Inflate operation failed: %d", ret); + goto fail; + } else { + break; + } + } + + libdeflate_free_decompressor(z); + return (char *)data; + + fail: + libdeflate_free_decompressor(z); + free(data); + return NULL; +} + +// Named differently as we use both zlib/libdeflate for compression. +static char *libdeflate_deflate(char *data, size_t size, size_t *cdata_size, + int level, int strat) { + level = level > 0 ? level : 6; // libdeflate doesn't honour -1 as default + level *= 1.23; // NB levels go up to 12 here; 5 onwards is +1 + level += level>=8; // 5,6,7->6,7,8 8->10 9->12 + if (level > 12) level = 12; + + if (strat == Z_RLE) // not supported by libdeflate + level = 1; + + struct libdeflate_compressor *z = libdeflate_alloc_compressor(level); + if (!z) { + hts_log_error("Call to libdeflate_alloc_compressor failed"); + return NULL; + } + + unsigned char *cdata = NULL; /* Compressed output */ + size_t cdata_alloc; + cdata = malloc(cdata_alloc = size*1.05+100); + if (!cdata) { + hts_log_error("Memory allocation failure"); + libdeflate_free_compressor(z); + return NULL; + } + + *cdata_size = libdeflate_gzip_compress(z, data, size, cdata, cdata_alloc); + libdeflate_free_compressor(z); + + if (*cdata_size == 0) { + hts_log_error("Call to libdeflate_gzip_compress failed"); + free(cdata); + return NULL; + } + + return (char *)cdata; +} + +#else + +/* ---------------------------------------------------------------------- + * zlib compression code - from Gap5's tg_iface_g.c + * They're static here as they're only used within the cram_compress_block + * and cram_uncompress_block functions, which are the external interface. + */ +char *zlib_mem_inflate(char *cdata, size_t csize, size_t *size) { + z_stream s; + unsigned char *data = NULL; /* Uncompressed output */ + int data_alloc = 0; + int err; + + /* Starting point at uncompressed size, and scale after that */ + data = malloc(data_alloc = csize*1.2+100); + if (!data) + return NULL; + + /* Initialise zlib stream */ + s.zalloc = Z_NULL; /* use default allocation functions */ + s.zfree = Z_NULL; + s.opaque = Z_NULL; + s.next_in = (unsigned char *)cdata; + s.avail_in = csize; + s.total_in = 0; + s.next_out = data; + s.avail_out = data_alloc; + s.total_out = 0; + + //err = inflateInit(&s); + err = inflateInit2(&s, 15 + 32); + if (err != Z_OK) { + hts_log_error("Call to zlib inflateInit failed: %s", s.msg); + free(data); + return NULL; + } + + /* Decode to 'data' array */ + for (;s.avail_in;) { + unsigned char *data_tmp; + int alloc_inc; + + s.next_out = &data[s.total_out]; + err = inflate(&s, Z_NO_FLUSH); + if (err == Z_STREAM_END) + break; + + if (err != Z_OK) { + hts_log_error("Call to zlib inflate failed: %s", s.msg); + free(data); + inflateEnd(&s); + return NULL; + } + + /* More to come, so realloc based on growth so far */ + alloc_inc = (double)s.avail_in/s.total_in * s.total_out + 100; + data = realloc((data_tmp = data), data_alloc += alloc_inc); + if (!data) { + free(data_tmp); + inflateEnd(&s); + return NULL; + } + s.avail_out += alloc_inc; + } + inflateEnd(&s); + + *size = s.total_out; + return (char *)data; +} +#endif + +#if !defined(HAVE_LIBDEFLATE) || LIBDEFLATE_VERSION_MAJOR < 1 || (LIBDEFLATE_VERSION_MAJOR == 1 && LIBDEFLATE_VERSION_MINOR <= 8) +static char *zlib_mem_deflate(char *data, size_t size, size_t *cdata_size, + int level, int strat) { + z_stream s; + unsigned char *cdata = NULL; /* Compressed output */ + int cdata_alloc = 0; + int cdata_pos = 0; + int err; + + cdata = malloc(cdata_alloc = size*1.05+100); + if (!cdata) + return NULL; + cdata_pos = 0; + + /* Initialise zlib stream */ + s.zalloc = Z_NULL; /* use default allocation functions */ + s.zfree = Z_NULL; + s.opaque = Z_NULL; + s.next_in = (unsigned char *)data; + s.avail_in = size; + s.total_in = 0; + s.next_out = cdata; + s.avail_out = cdata_alloc; + s.total_out = 0; + s.data_type = Z_BINARY; + + err = deflateInit2(&s, level, Z_DEFLATED, 15|16, 9, strat); + if (err != Z_OK) { + hts_log_error("Call to zlib deflateInit2 failed: %s", s.msg); + return NULL; + } + + /* Encode to 'cdata' array */ + for (;s.avail_in;) { + s.next_out = &cdata[cdata_pos]; + s.avail_out = cdata_alloc - cdata_pos; + if (cdata_alloc - cdata_pos <= 0) { + hts_log_error("Deflate produced larger output than expected"); + return NULL; + } + err = deflate(&s, Z_NO_FLUSH); + cdata_pos = cdata_alloc - s.avail_out; + if (err != Z_OK) { + hts_log_error("Call to zlib deflate failed: %s", s.msg); + break; + } + } + if (deflate(&s, Z_FINISH) != Z_STREAM_END) { + hts_log_error("Call to zlib deflate failed: %s", s.msg); + } + *cdata_size = s.total_out; + + if (deflateEnd(&s) != Z_OK) { + hts_log_error("Call to zlib deflate failed: %s", s.msg); + } + return (char *)cdata; +} +#endif + +#ifdef HAVE_LIBLZMA +/* ------------------------------------------------------------------------ */ +/* + * Data compression routines using liblzma (xz) + * + * On a test set this shrunk the main db from 136157104 bytes to 114796168, but + * caused tg_index to grow from 2m43.707s to 15m3.961s. Exporting as bfastq + * went from 18.3s to 36.3s. So decompression suffers too, but not as bad + * as compression times. + * + * For now we disable this functionality. If it's to be reenabled make sure you + * improve the mem_inflate implementation as it's just a test hack at the + * moment. + */ + +static char *lzma_mem_deflate(char *data, size_t size, size_t *cdata_size, + int level) { + char *out; + size_t out_size = lzma_stream_buffer_bound(size); + *cdata_size = 0; + + out = malloc(out_size); + + /* Single call compression */ + if (LZMA_OK != lzma_easy_buffer_encode(level, LZMA_CHECK_CRC32, NULL, + (uint8_t *)data, size, + (uint8_t *)out, cdata_size, + out_size)) + return NULL; + + return out; +} + +static char *lzma_mem_inflate(char *cdata, size_t csize, size_t *size) { + lzma_stream strm = LZMA_STREAM_INIT; + size_t out_size = 0, out_pos = 0; + char *out = NULL, *new_out; + int r; + + /* Initiate the decoder */ + if (LZMA_OK != lzma_stream_decoder(&strm, lzma_easy_decoder_memusage(9), 0)) + return NULL; + + /* Decode loop */ + strm.avail_in = csize; + strm.next_in = (uint8_t *)cdata; + + for (;strm.avail_in;) { + if (strm.avail_in > out_size - out_pos) { + out_size += strm.avail_in * 4 + 32768; + new_out = realloc(out, out_size); + if (!new_out) + goto fail; + out = new_out; + } + strm.avail_out = out_size - out_pos; + strm.next_out = (uint8_t *)&out[out_pos]; + + r = lzma_code(&strm, LZMA_RUN); + if (LZMA_OK != r && LZMA_STREAM_END != r) { + hts_log_error("LZMA decode failure (error %d)", r); + goto fail; + } + + out_pos = strm.total_out; + + if (r == LZMA_STREAM_END) + break; + } + + /* finish up any unflushed data; necessary? */ + r = lzma_code(&strm, LZMA_FINISH); + if (r != LZMA_OK && r != LZMA_STREAM_END) { + hts_log_error("Call to lzma_code failed with error %d", r); + goto fail; + } + + new_out = realloc(out, strm.total_out > 0 ? strm.total_out : 1); + if (new_out) + out = new_out; + *size = strm.total_out; + + lzma_end(&strm); + + return out; + + fail: + lzma_end(&strm); + free(out); + return NULL; +} +#endif + +/* ---------------------------------------------------------------------- + * CRAM blocks - the dynamically growable data block. We have code to + * create, update, (un)compress and read/write. + * + * These are derived from the deflate_interlaced.c blocks, but with the + * CRAM extension of content types and IDs. + */ + +/* + * Allocates a new cram_block structure with a specified content_type and + * id. + * + * Returns block pointer on success + * NULL on failure + */ +cram_block *cram_new_block(enum cram_content_type content_type, + int content_id) { + cram_block *b = malloc(sizeof(*b)); + if (!b) + return NULL; + b->method = b->orig_method = RAW; + b->content_type = content_type; + b->content_id = content_id; + b->comp_size = 0; + b->uncomp_size = 0; + b->data = NULL; + b->alloc = 0; + b->byte = 0; + b->bit = 7; // MSB + b->crc32 = 0; + b->idx = 0; + b->m = NULL; + + return b; +} + +/* + * Reads a block from a cram file. + * Returns cram_block pointer on success. + * NULL on failure + */ +cram_block *cram_read_block(cram_fd *fd) { + cram_block *b = malloc(sizeof(*b)); + unsigned char c; + uint32_t crc = 0; + if (!b) + return NULL; + + //fprintf(stderr, "Block at %d\n", (int)ftell(fd->fp)); + + if (-1 == (b->method = hgetc(fd->fp))) { free(b); return NULL; } + c = b->method; crc = crc32(crc, &c, 1); + if (-1 == (b->content_type= hgetc(fd->fp))) { free(b); return NULL; } + c = b->content_type; crc = crc32(crc, &c, 1); + if (-1 == fd->vv.varint_decode32_crc(fd, &b->content_id, &crc)) { free(b); return NULL; } + if (-1 == fd->vv.varint_decode32_crc(fd, &b->comp_size, &crc)) { free(b); return NULL; } + if (-1 == fd->vv.varint_decode32_crc(fd, &b->uncomp_size, &crc)) { free(b); return NULL; } + + //fprintf(stderr, " method %d, ctype %d, cid %d, csize %d, ucsize %d\n", + // b->method, b->content_type, b->content_id, b->comp_size, b->uncomp_size); + + if (b->method == RAW) { + if (b->uncomp_size < 0 || b->comp_size != b->uncomp_size) { + free(b); + return NULL; + } + b->alloc = b->uncomp_size; + if (!(b->data = malloc(b->uncomp_size))){ free(b); return NULL; } + if (b->uncomp_size != hread(fd->fp, b->data, b->uncomp_size)) { + free(b->data); + free(b); + return NULL; + } + } else { + if (b->comp_size < 0 || b->uncomp_size < 0) { + free(b); + return NULL; + } + b->alloc = b->comp_size; + if (!(b->data = malloc(b->comp_size))) { free(b); return NULL; } + if (b->comp_size != hread(fd->fp, b->data, b->comp_size)) { + free(b->data); + free(b); + return NULL; + } + } + + if (CRAM_MAJOR_VERS(fd->version) >= 3) { + if (-1 == int32_decode(fd, (int32_t *)&b->crc32)) { + free(b->data); + free(b); + return NULL; + } + + b->crc32_checked = fd->ignore_md5; + b->crc_part = crc; + } else { + b->crc32_checked = 1; // CRC not present + } + + b->orig_method = b->method; + b->idx = 0; + b->byte = 0; + b->bit = 7; // MSB + + return b; +} + + +/* + * Computes the size of a cram block, including the block + * header itself. + */ +uint32_t cram_block_size(cram_block *b) { + unsigned char dat[100], *cp = dat;; + uint32_t sz; + + *cp++ = b->method; + *cp++ = b->content_type; + cp += itf8_put((char*)cp, b->content_id); + cp += itf8_put((char*)cp, b->comp_size); + cp += itf8_put((char*)cp, b->uncomp_size); + + sz = cp-dat + 4; + sz += b->method == RAW ? b->uncomp_size : b->comp_size; + + return sz; +} + +/* + * Writes a CRAM block. + * Returns 0 on success + * -1 on failure + */ +int cram_write_block(cram_fd *fd, cram_block *b) { + char vardata[100]; + int vardata_o = 0; + + assert(b->method != RAW || (b->comp_size == b->uncomp_size)); + + if (hputc(b->method, fd->fp) == EOF) return -1; + if (hputc(b->content_type, fd->fp) == EOF) return -1; + vardata_o += fd->vv.varint_put32(vardata , vardata+100, b->content_id); + vardata_o += fd->vv.varint_put32(vardata+vardata_o, vardata+100, b->comp_size); + vardata_o += fd->vv.varint_put32(vardata+vardata_o, vardata+100, b->uncomp_size); + if (vardata_o != hwrite(fd->fp, vardata, vardata_o)) + return -1; + + if (b->data) { + if (b->method == RAW) { + if (b->uncomp_size != hwrite(fd->fp, b->data, b->uncomp_size)) + return -1; + } else { + if (b->comp_size != hwrite(fd->fp, b->data, b->comp_size)) + return -1; + } + } else { + // Absent blocks should be size 0 + assert(b->method == RAW && b->uncomp_size == 0); + } + + if (CRAM_MAJOR_VERS(fd->version) >= 3) { + char dat[100], *cp = (char *)dat; + uint32_t crc; + + *cp++ = b->method; + *cp++ = b->content_type; + cp += fd->vv.varint_put32(cp, dat+100, b->content_id); + cp += fd->vv.varint_put32(cp, dat+100, b->comp_size); + cp += fd->vv.varint_put32(cp, dat+100, b->uncomp_size); + crc = crc32(0L, (uc *)dat, cp-dat); + + if (b->method == RAW) { + b->crc32 = crc32(crc, b->data ? b->data : (uc*)"", b->uncomp_size); + } else { + b->crc32 = crc32(crc, b->data ? b->data : (uc*)"", b->comp_size); + } + + if (-1 == int32_encode(fd, b->crc32)) + return -1; + } + + return 0; +} + +/* + * Frees a CRAM block, deallocating internal data too. + */ +void cram_free_block(cram_block *b) { + if (!b) + return; + if (b->data) + free(b->data); + free(b); +} + +/* + * Uncompresses a CRAM block, if compressed. + */ +int cram_uncompress_block(cram_block *b) { + char *uncomp; + size_t uncomp_size = 0; + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + // Pretend the CRC was OK so the fuzzer doesn't have to get it right + b->crc32_checked = 1; +#endif + + if (b->crc32_checked == 0) { + uint32_t crc = crc32(b->crc_part, b->data ? b->data : (uc *)"", b->alloc); + b->crc32_checked = 1; + if (crc != b->crc32) { + hts_log_error("Block CRC32 failure"); + return -1; + } + } + + if (b->uncomp_size == 0) { + // blank block + b->method = RAW; + return 0; + } + assert(b->uncomp_size >= 0); // cram_read_block should ensure this + + switch (b->method) { + case RAW: + return 0; + + case GZIP: + uncomp_size = b->uncomp_size; + uncomp = zlib_mem_inflate((char *)b->data, b->comp_size, &uncomp_size); + + if (!uncomp) + return -1; + if (uncomp_size != b->uncomp_size) { + free(uncomp); + return -1; + } + free(b->data); + b->data = (unsigned char *)uncomp; + b->alloc = uncomp_size; + b->method = RAW; + break; + +#ifdef HAVE_LIBBZ2 + case BZIP2: { + unsigned int usize = b->uncomp_size; + if (!(uncomp = malloc(usize))) + return -1; + if (BZ_OK != BZ2_bzBuffToBuffDecompress(uncomp, &usize, + (char *)b->data, b->comp_size, + 0, 0)) { + free(uncomp); + return -1; + } + free(b->data); + b->data = (unsigned char *)uncomp; + b->alloc = usize; + b->method = RAW; + b->uncomp_size = usize; // Just in case it differs + break; + } +#else + case BZIP2: + hts_log_error("Bzip2 compression is not compiled into this version. Please rebuild and try again"); + return -1; +#endif + +#ifdef HAVE_LIBLZMA + case LZMA: + uncomp = lzma_mem_inflate((char *)b->data, b->comp_size, &uncomp_size); + if (!uncomp) + return -1; + if (uncomp_size != b->uncomp_size) { + free(uncomp); + return -1; + } + free(b->data); + b->data = (unsigned char *)uncomp; + b->alloc = uncomp_size; + b->method = RAW; + break; +#else + case LZMA: + hts_log_error("Lzma compression is not compiled into this version. Please rebuild and try again"); + return -1; + break; +#endif + + case RANS: { + unsigned int usize = b->uncomp_size, usize2; + uncomp = (char *)rans_uncompress(b->data, b->comp_size, &usize2); + if (!uncomp) + return -1; + if (usize != usize2) { + free(uncomp); + return -1; + } + free(b->data); + b->data = (unsigned char *)uncomp; + b->alloc = usize2; + b->method = RAW; + b->uncomp_size = usize2; // Just in case it differs + //fprintf(stderr, "Expanded %d to %d\n", b->comp_size, b->uncomp_size); + break; + } + + case FQZ: { + uncomp_size = b->uncomp_size; + uncomp = fqz_decompress((char *)b->data, b->comp_size, &uncomp_size, NULL, 0); + if (!uncomp) + return -1; + free(b->data); + b->data = (unsigned char *)uncomp; + b->alloc = uncomp_size; + b->method = RAW; + b->uncomp_size = uncomp_size; + break; + } + + case RANS_PR0: { + unsigned int usize = b->uncomp_size, usize2; + uncomp = (char *)rans_uncompress_4x16(b->data, b->comp_size, &usize2); + if (!uncomp) + return -1; + if (usize != usize2) { + free(uncomp); + return -1; + } + b->orig_method = RANS_PR0 + (b->data[0]&1) + + 2*((b->data[0]&0x40)>0) + 4*((b->data[0]&0x80)>0); + free(b->data); + b->data = (unsigned char *)uncomp; + b->alloc = usize2; + b->method = RAW; + b->uncomp_size = usize2; // Just incase it differs + //fprintf(stderr, "Expanded %d to %d\n", b->comp_size, b->uncomp_size); + break; + } + + case ARITH_PR0: { + unsigned int usize = b->uncomp_size, usize2; + uncomp = (char *)arith_uncompress_to(b->data, b->comp_size, NULL, &usize2); + if (!uncomp) + return -1; + if (usize != usize2) { + free(uncomp); + return -1; + } + b->orig_method = ARITH_PR0 + (b->data[0]&1) + + 2*((b->data[0]&0x40)>0) + 4*((b->data[0]&0x80)>0); + free(b->data); + b->data = (unsigned char *)uncomp; + b->alloc = usize2; + b->method = RAW; + b->uncomp_size = usize2; // Just incase it differs + //fprintf(stderr, "Expanded %d to %d\n", b->comp_size, b->uncomp_size); + break; + } + + case TOK3: { + uint32_t out_len; + uint8_t *cp = tok3_decode_names(b->data, b->comp_size, &out_len); + if (!cp) + return -1; + b->orig_method = TOK3; + b->method = RAW; + free(b->data); + b->data = cp; + b->alloc = out_len; + b->uncomp_size = out_len; + break; + } + + default: + return -1; + } + + return 0; +} + +static char *cram_compress_by_method(cram_slice *s, char *in, size_t in_size, + int content_id, size_t *out_size, + enum cram_block_method_int method, + int level, int strat) { + switch (method) { + case GZIP: + case GZIP_RLE: + case GZIP_1: + // Read names bizarrely benefit from zlib over libdeflate for + // mid-range compression levels. Focusing purely of ratio or + // speed, libdeflate still wins. It also seems to win for + // other data series too. + // + // Eg RN at level 5; libdeflate=55.9MB zlib=51.6MB +#ifdef HAVE_LIBDEFLATE +# if (LIBDEFLATE_VERSION_MAJOR < 1 || (LIBDEFLATE_VERSION_MAJOR == 1 && LIBDEFLATE_VERSION_MINOR <= 8)) + if (content_id == DS_RN && level >= 4 && level <= 7) + return zlib_mem_deflate(in, in_size, out_size, level, strat); + else +# endif + return libdeflate_deflate(in, in_size, out_size, level, strat); +#else + return zlib_mem_deflate(in, in_size, out_size, level, strat); +#endif + + case BZIP2: { +#ifdef HAVE_LIBBZ2 + unsigned int comp_size = in_size*1.01 + 600; + char *comp = malloc(comp_size); + if (!comp) + return NULL; + + if (BZ_OK != BZ2_bzBuffToBuffCompress(comp, &comp_size, + in, in_size, + level, 0, 30)) { + free(comp); + return NULL; + } + *out_size = comp_size; + return comp; +#else + return NULL; +#endif + } + + case FQZ: + case FQZ_b: + case FQZ_c: + case FQZ_d: { + // Extract the necessary portion of the slice into an fqz_slice struct. + // These previously were the same thing, but this permits us to detach + // the codec from the rest of this CRAM implementation. + fqz_slice *f = malloc(2*s->hdr->num_records * sizeof(uint32_t) + sizeof(fqz_slice)); + if (!f) + return NULL; + f->num_records = s->hdr->num_records; + f->len = (uint32_t *)(((char *)f) + sizeof(fqz_slice)); + f->flags = f->len + s->hdr->num_records; + int i; + for (i = 0; i < s->hdr->num_records; i++) { + f->flags[i] = s->crecs[i].flags; + f->len[i] = (i+1 < s->hdr->num_records + ? s->crecs[i+1].qual - s->crecs[i].qual + : s->block[DS_QS]->uncomp_size - s->crecs[i].qual); + } + char *comp = fqz_compress(strat & 0xff /* cram vers */, f, + in, in_size, out_size, strat >> 8, NULL); + free(f); + return comp; + } + + case LZMA: +#ifdef HAVE_LIBLZMA + return lzma_mem_deflate(in, in_size, out_size, level); +#else + return NULL; +#endif + + case RANS0: + case RANS1: { + unsigned int out_size_i; + unsigned char *cp; + cp = rans_compress((unsigned char *)in, in_size, &out_size_i, + method == RANS0 ? 0 : 1); + *out_size = out_size_i; + return (char *)cp; + } + + case RANS_PR0: + case RANS_PR1: + case RANS_PR64: + case RANS_PR9: + case RANS_PR128: + case RANS_PR129: + case RANS_PR192: + case RANS_PR193: { + unsigned int out_size_i; + unsigned char *cp; + + // see enum cram_block. We map RANS_* methods to order bit-fields + static int methmap[] = { 1, 64,9, 128,129, 192,193 }; + + int m = method == RANS_PR0 ? 0 : methmap[method - RANS_PR1]; + cp = rans_compress_4x16((unsigned char *)in, in_size, &out_size_i, + m | RANS_ORDER_SIMD_AUTO); + *out_size = out_size_i; + return (char *)cp; + } + + case ARITH_PR0: + case ARITH_PR1: + case ARITH_PR64: + case ARITH_PR9: + case ARITH_PR128: + case ARITH_PR129: + case ARITH_PR192: + case ARITH_PR193: { + unsigned int out_size_i; + unsigned char *cp; + + // see enum cram_block. We map ARITH_* methods to order bit-fields + static int methmap[] = { 1, 64,9, 128,129, 192,193 }; + + cp = arith_compress_to((unsigned char *)in, in_size, NULL, &out_size_i, + method == ARITH_PR0 ? 0 : methmap[method - ARITH_PR1]); + *out_size = out_size_i; + return (char *)cp; + } + + case TOK3: + case TOKA: { + int out_len; + int lev = level; + if (method == TOK3 && lev > 3) + lev = 3; + uint8_t *cp = tok3_encode_names(in, in_size, lev, strat, &out_len, NULL); + *out_size = out_len; + return (char *)cp; + } + + case RAW: + break; + + default: + return NULL; + } + + return NULL; +} + + +/* + * Compresses a block using one of two different zlib strategies. If we only + * want one choice set strat2 to be -1. + * + * The logic here is that sometimes Z_RLE does a better job than Z_FILTERED + * or Z_DEFAULT_STRATEGY on quality data. If so, we'd rather use it as it is + * significantly faster. + * + * Method and level -1 implies defaults, as specified in cram_fd. + */ +int cram_compress_block2(cram_fd *fd, cram_slice *s, + cram_block *b, cram_metrics *metrics, + int method, int level) { + + if (!b) + return 0; + + char *comp = NULL; + size_t comp_size = 0; + int strat; + + // Internally we have parameterised methods that externally map + // to the same CRAM method value. + // See enum_cram_block_method_int in cram_structs.h. + int methmap[] = { + // Externally defined values + RAW, GZIP, BZIP2, LZMA, RANS, RANSPR, ARITH, FQZ, TOK3, + + // Reserved for possible expansion + 0, 0, + + // Internally parameterised versions matching back to above + // external values + GZIP, GZIP, + FQZ, FQZ, FQZ, + RANS, + RANSPR, RANSPR, RANSPR, RANSPR, RANSPR, RANSPR, RANSPR, + TOK3, + ARITH, ARITH, ARITH, ARITH, ARITH, ARITH, ARITH, + }; + + if (b->method != RAW) { + // Maybe already compressed if s->block[0] was compressed and + // we have e.g. s->block[DS_BA] set to s->block[0] due to only + // one base type present and hence using E_HUFFMAN on block 0. + // A second explicit attempt to compress the same block then + // occurs. + return 0; + } + + if (method == -1) { + method = 1<use_bz2) + method |= 1<use_lzma) + method |= 1<level; + + //fprintf(stderr, "IN: block %d, sz %d\n", b->content_id, b->uncomp_size); + + if (method == RAW || level == 0 || b->uncomp_size == 0) { + b->method = RAW; + b->comp_size = b->uncomp_size; + //fprintf(stderr, "Skip block id %d\n", b->content_id); + return 0; + } + +#ifndef ABS +# define ABS(a) ((a)>=0?(a):-(a)) +#endif + + if (metrics) { + pthread_mutex_lock(&fd->metrics_lock); + // Sudden changes in size trigger a retrial. These are mainly + // triggered when switching to sorted / unsorted, where the number + // of elements in a slice radically changes. + // + // We also get large fluctuations based on genome coordinate for + // e.g. SA:Z and SC series, but we consider the typical scale of + // delta between blocks and use this to look for abnormality. + + // Equivalent to (but minus possible integer overflow) + // (b->uncomp_size + 1000)/4 > metrics->input_avg_sz+1000 || + // b->uncomp_size + 1000 < (metrics->input_avg_sz+1000)/4) + if (metrics->input_avg_sz && + (b->uncomp_size/4 - 750 > metrics->input_avg_sz || + b->uncomp_size < metrics->input_avg_sz/4 - 750) && + ABS(b->uncomp_size-metrics->input_avg_sz)/10 + > metrics->input_avg_delta) { + metrics->next_trial = 0; + } + + if (metrics->trial > 0 || --metrics->next_trial <= 0) { + int m, unpackable = metrics->unpackable; + size_t sz_best = b->uncomp_size; + size_t sz[CRAM_MAX_METHOD] = {0}; + int method_best = 0; // RAW + char *c_best = NULL, *c = NULL; + + metrics->input_avg_delta = + 0.9 * (metrics->input_avg_delta + + ABS(b->uncomp_size - metrics->input_avg_sz)); + + metrics->input_avg_sz += b->uncomp_size*.2; + metrics->input_avg_sz *= 0.8; + + if (metrics->revised_method) + method = metrics->revised_method; + else + metrics->revised_method = method; + + if (metrics->next_trial <= 0) { + metrics->next_trial = TRIAL_SPAN; + metrics->trial = NTRIALS; + for (m = 0; m < CRAM_MAX_METHOD; m++) + metrics->sz[m] /= 2; + metrics->unpackable = 0; + } + + // Compress this block using the best method + if (unpackable && CRAM_MAJOR_VERS(fd->version) > 3) { + // No point trying bit-pack if 17+ symbols. + if (method & (1<metrics_lock); + + for (m = 0; m < CRAM_MAX_METHOD; m++) { + if (method & (1u<version); break; + case FQZ_b: strat = CRAM_MAJOR_VERS(fd->version)+256; break; + case FQZ_c: strat = CRAM_MAJOR_VERS(fd->version)+2*256; break; + case FQZ_d: strat = CRAM_MAJOR_VERS(fd->version)+3*256; break; + case TOK3: strat = 0; break; + case TOKA: strat = 1; break; + default: strat = 0; + } + + c = cram_compress_by_method(s, (char *)b->data, b->uncomp_size, + b->content_id, &sz[m], m, lvl, strat); + + if (c && sz_best > sz[m]) { + sz_best = sz[m]; + method_best = m; + if (c_best) + free(c_best); + c_best = c; + } else if (c) { + free(c); + } else { + sz[m] = UINT_MAX; // arbitrarily worse than raw + } + } else { + sz[m] = UINT_MAX; // arbitrarily worse than raw + } + } + + if (c_best) { + free(b->data); + b->data = (unsigned char *)c_best; + b->method = method_best; // adjusted to methmap[method_best] later + b->comp_size = sz_best; + } + + // Accumulate stats for all methods tried + pthread_mutex_lock(&fd->metrics_lock); + for (m = 0; m < CRAM_MAX_METHOD; m++) + // don't be overly sure on small blocks. + // +2000 means eg bzip2 vs gzip (1.07 to 1.04) or gz vs rans1 + // needs to be at least 60 bytes smaller to overcome the + // fixed size addition. + metrics->sz[m] += sz[m]+2000; + + // When enough trials performed, find the best on average + if (--metrics->trial == 0) { + int best_method = RAW; + int best_sz = INT_MAX; + + // Relative costs of methods. See enum_cram_block_method_int + // and methmap + double meth_cost[32] = { + // Externally defined methods + 1, // 0 raw + 1.04, // 1 gzip (Z_FILTERED) + 1.07, // 2 bzip2 + 1.08, // 3 lzma + 1.00, // 4 rans (O0) + 1.00, // 5 ranspr (O0) + 1.04, // 6 arithpr (O0) + 1.05, // 7 fqz + 1.05, // 8 tok3 (rans) + 1.00, 1.00, // 9,10 reserved + + // Paramterised versions of above + 1.01, // gzip rle + 1.01, // gzip -1 + + 1.05, 1.05, 1.05, // FQZ_b,c,d + + 1.01, // rans O1 + + 1.01, // rans_pr1 + 1.00, // rans_pr64; if smaller, usually fast + 1.03, // rans_pr65/9 + 1.00, // rans_pr128 + 1.01, // rans_pr129 + 1.00, // rans_pr192 + 1.01, // rans_pr193 + + 1.07, // tok3 arith + + 1.04, // arith_pr1 + 1.04, // arith_pr64 + 1.04, // arith_pr9 + 1.03, // arith_pr128 + 1.04, // arith_pr129 + 1.04, // arith_pr192 + 1.04, // arith_pr193 + }; + + // Scale methods by cost based on compression level + if (fd->level <= 1) { + for (m = 0; m < CRAM_MAX_METHOD; m++) + metrics->sz[m] *= 1+(meth_cost[m]-1)*4; + } else if (fd->level <= 3) { + for (m = 0; m < CRAM_MAX_METHOD; m++) + metrics->sz[m] *= 1+(meth_cost[m]-1); + } else if (fd->level <= 6) { + for (m = 0; m < CRAM_MAX_METHOD; m++) + metrics->sz[m] *= 1+(meth_cost[m]-1)/2; + } else if (fd->level <= 7) { + for (m = 0; m < CRAM_MAX_METHOD; m++) + metrics->sz[m] *= 1+(meth_cost[m]-1)/3; + } // else cost is ignored + + // Ensure these are never used; BSC and ZSTD + metrics->sz[9] = metrics->sz[10] = INT_MAX; + + for (m = 0; m < CRAM_MAX_METHOD; m++) { + if ((!metrics->sz[m]) || (!(method & (1u< metrics->sz[m]) + best_sz = metrics->sz[m], best_method = m; + } + + if (best_method != metrics->method) { + //metrics->trial = (NTRIALS+1)/2; // be sure + //metrics->next_trial /= 1.5; + metrics->consistency = 0; + } else { + metrics->next_trial *= MIN(2, 1+metrics->consistency/4.0); + metrics->consistency++; + } + + metrics->method = best_method; + switch (best_method) { + case GZIP: strat = Z_FILTERED; break; + case GZIP_1: strat = Z_DEFAULT_STRATEGY; break; + case GZIP_RLE: strat = Z_RLE; break; + case FQZ: strat = CRAM_MAJOR_VERS(fd->version); break; + case FQZ_b: strat = CRAM_MAJOR_VERS(fd->version)+256; break; + case FQZ_c: strat = CRAM_MAJOR_VERS(fd->version)+2*256; break; + case FQZ_d: strat = CRAM_MAJOR_VERS(fd->version)+3*256; break; + case TOK3: strat = 0; break; + case TOKA: strat = 1; break; + default: strat = 0; + } + metrics->strat = strat; + + // If we see at least MAXFAIL trials in a row for a specific + // compression method with more than MAXDELTA aggregate + // size then we drop this from the list of methods used + // for this block type. +#define MAXDELTA 0.20 +#define MAXFAILS 4 + for (m = 0; m < CRAM_MAX_METHOD; m++) { + if (best_method == m) { + metrics->cnt[m] = 0; + metrics->extra[m] = 0; + } else if (best_sz < metrics->sz[m]) { + double r = (double)metrics->sz[m] / best_sz - 1; + int mul = 1+(fd->level>=7); + if (++metrics->cnt[m] >= MAXFAILS*mul && + (metrics->extra[m] += r) >= MAXDELTA*mul) + method &= ~(1u<sz[m] > best_sz) + method &= ~(1u<verbose > 1 && method != metrics->revised_method) + // fprintf(stderr, "%d: revising method from %x to %x\n", + // b->content_id, metrics->revised_method, method); + metrics->revised_method = method; + } + pthread_mutex_unlock(&fd->metrics_lock); + } else { + metrics->input_avg_delta = + 0.9 * (metrics->input_avg_delta + + ABS(b->uncomp_size - metrics->input_avg_sz)); + + metrics->input_avg_sz += b->uncomp_size*.2; + metrics->input_avg_sz *= 0.8; + + strat = metrics->strat; + method = metrics->method; + + pthread_mutex_unlock(&fd->metrics_lock); + comp = cram_compress_by_method(s, (char *)b->data, b->uncomp_size, + b->content_id, &comp_size, method, + method == GZIP_1 ? 1 : level, + strat); + if (!comp) + return -1; + + if (comp_size < b->uncomp_size) { + free(b->data); + b->data = (unsigned char *)comp; + b->comp_size = comp_size; + b->method = method; + } else { + free(comp); + } + } + + } else { + // no cached metrics, so just do zlib? + comp = cram_compress_by_method(s, (char *)b->data, b->uncomp_size, + b->content_id, &comp_size, GZIP, level, Z_FILTERED); + if (!comp) { + hts_log_error("Compression failed!"); + return -1; + } + + if (comp_size < b->uncomp_size) { + free(b->data); + b->data = (unsigned char *)comp; + b->comp_size = comp_size; + b->method = GZIP; + } else { + free(comp); + } + strat = Z_FILTERED; + } + + hts_log_info("Compressed block ID %d from %d to %d by method %s", + b->content_id, b->uncomp_size, b->comp_size, + cram_block_method2str(b->method)); + + b->method = methmap[b->method]; + + return 0; +} +int cram_compress_block(cram_fd *fd, cram_block *b, cram_metrics *metrics, + int method, int level) { + return cram_compress_block2(fd, NULL, b, metrics, method, level); +} + +cram_metrics *cram_new_metrics(void) { + cram_metrics *m = calloc(1, sizeof(*m)); + if (!m) + return NULL; + m->trial = NTRIALS-1; + m->next_trial = TRIAL_SPAN/2; // learn quicker at start + m->method = RAW; + m->strat = 0; + m->revised_method = 0; + m->unpackable = 0; + + return m; +} + +char *cram_block_method2str(enum cram_block_method_int m) { + switch(m) { + case RAW: return "RAW"; + case GZIP: return "GZIP"; + case BZIP2: return "BZIP2"; + case LZMA: return "LZMA"; + case RANS0: return "RANS0"; + case RANS1: return "RANS1"; + case GZIP_RLE: return "GZIP_RLE"; + case GZIP_1: return "GZIP_1"; + case FQZ: return "FQZ"; + case FQZ_b: return "FQZ_b"; + case FQZ_c: return "FQZ_c"; + case FQZ_d: return "FQZ_d"; + case RANS_PR0: return "RANS_PR0"; + case RANS_PR1: return "RANS_PR1"; + case RANS_PR64: return "RANS_PR64"; + case RANS_PR9: return "RANS_PR9"; + case RANS_PR128: return "RANS_PR128"; + case RANS_PR129: return "RANS_PR129"; + case RANS_PR192: return "RANS_PR192"; + case RANS_PR193: return "RANS_PR193"; + case TOK3: return "TOK3_R"; + case TOKA: return "TOK3_A"; + case ARITH_PR0: return "ARITH_PR0"; + case ARITH_PR1: return "ARITH_PR1"; + case ARITH_PR64: return "ARITH_PR64"; + case ARITH_PR9: return "ARITH_PR9"; + case ARITH_PR128: return "ARITH_PR128"; + case ARITH_PR129: return "ARITH_PR129"; + case ARITH_PR192: return "ARITH_PR192"; + case ARITH_PR193: return "ARITH_PR193"; + case BM_ERROR: break; + } + return "?"; +} + +char *cram_content_type2str(enum cram_content_type t) { + switch (t) { + case FILE_HEADER: return "FILE_HEADER"; + case COMPRESSION_HEADER: return "COMPRESSION_HEADER"; + case MAPPED_SLICE: return "MAPPED_SLICE"; + case UNMAPPED_SLICE: return "UNMAPPED_SLICE"; + case EXTERNAL: return "EXTERNAL"; + case CORE: return "CORE"; + case CT_ERROR: break; + } + return "?"; +} + +/* ---------------------------------------------------------------------- + * Reference sequence handling + * + * These revolve around the refs_t structure, which may potentially be + * shared between multiple cram_fd. + * + * We start with refs_create() to allocate an empty refs_t and then + * populate it with @SQ line data using refs_from_header(). This is done on + * cram_open(). Also at start up we can call cram_load_reference() which + * is used with "scramble -r foo.fa". This replaces the fd->refs with the + * new one specified. In either case refs2id() is then called which + * maps ref_entry names to @SQ ids (refs_t->ref_id[]). + * + * Later, possibly within a thread, we will want to know the actual ref + * seq itself, obtained by calling cram_get_ref(). This may use the + * UR: or M5: fields or the filename specified in the original + * cram_load_reference() call. + * + * Given the potential for multi-threaded reference usage, we have + * reference counting (sorry for the confusing double use of "ref") to + * track the number of callers interested in any specific reference. + */ + +/* + * Frees/unmaps a reference sequence and associated file handles. + */ +static void ref_entry_free_seq(ref_entry *e) { + if (e->mf) + mfclose(e->mf); + if (e->seq && !e->mf) + free(e->seq); + + e->seq = NULL; + e->mf = NULL; +} + +void refs_free(refs_t *r) { + RP("refs_free()\n"); + + if (--r->count > 0) + return; + + if (!r) + return; + + if (r->pool) + string_pool_destroy(r->pool); + + if (r->h_meta) { + khint_t k; + + for (k = kh_begin(r->h_meta); k != kh_end(r->h_meta); k++) { + ref_entry *e; + + if (!kh_exist(r->h_meta, k)) + continue; + if (!(e = kh_val(r->h_meta, k))) + continue; + ref_entry_free_seq(e); + free(e); + } + + kh_destroy(refs, r->h_meta); + } + + if (r->ref_id) + free(r->ref_id); + + if (r->fp) + bgzf_close(r->fp); + + pthread_mutex_destroy(&r->lock); + + free(r); +} + +static refs_t *refs_create(void) { + refs_t *r = calloc(1, sizeof(*r)); + + RP("refs_create()\n"); + + if (!r) + return NULL; + + if (!(r->pool = string_pool_create(8192))) + goto err; + + r->ref_id = NULL; // see refs2id() to populate. + r->count = 1; + r->last = NULL; + r->last_id = -1; + + if (!(r->h_meta = kh_init(refs))) + goto err; + + pthread_mutex_init(&r->lock, NULL); + + return r; + + err: + refs_free(r); + return NULL; +} + +/* + * Opens a reference fasta file as a BGZF stream, allowing for + * compressed files. It automatically builds a .fai file if + * required and if compressed a .gzi bgzf index too. + * + * Returns a BGZF handle on success; + * NULL on failure. + */ +static BGZF *bgzf_open_ref(char *fn, char *mode, int is_md5) { + BGZF *fp; + + if (!is_md5 && !hisremote(fn)) { + char fai_file[PATH_MAX]; + + snprintf(fai_file, PATH_MAX, "%s.fai", fn); + if (access(fai_file, R_OK) != 0) + if (fai_build(fn) != 0) + return NULL; + } + + if (!(fp = bgzf_open(fn, mode))) { + perror(fn); + return NULL; + } + + if (fp->is_compressed == 1 && bgzf_index_load(fp, fn, ".gzi") < 0) { + hts_log_error("Unable to load .gzi index '%s.gzi'", fn); + bgzf_close(fp); + return NULL; + } + + return fp; +} + +/* + * Loads a FAI file for a reference.fasta. + * "is_err" indicates whether failure to load is worthy of emitting an + * error message. In some cases (eg with embedded references) we + * speculatively load, just in case, and silently ignore errors. + * + * Returns the refs_t struct on success (maybe newly allocated); + * NULL on failure + */ +static refs_t *refs_load_fai(refs_t *r_orig, const char *fn, int is_err) { + hFILE *fp = NULL; + char fai_fn[PATH_MAX]; + char line[8192]; + refs_t *r = r_orig; + size_t fn_l = strlen(fn); + int id = 0, id_alloc = 0; + + RP("refs_load_fai %s\n", fn); + + if (!r) + if (!(r = refs_create())) + goto err; + + if (r->fp) + if (bgzf_close(r->fp) != 0) + goto err; + r->fp = NULL; + + /* Look for a FASTA##idx##FAI format */ + char *fn_delim = strstr(fn, HTS_IDX_DELIM); + if (fn_delim) { + if (!(r->fn = string_ndup(r->pool, fn, fn_delim - fn))) + goto err; + fn_delim += strlen(HTS_IDX_DELIM); + snprintf(fai_fn, PATH_MAX, "%s", fn_delim); + } else { + /* An index file was provided, instead of the actual reference file */ + if (fn_l > 4 && strcmp(&fn[fn_l-4], ".fai") == 0) { + if (!r->fn) { + if (!(r->fn = string_ndup(r->pool, fn, fn_l-4))) + goto err; + } + snprintf(fai_fn, PATH_MAX, "%s", fn); + } else { + /* Only the reference file provided. Get the index file name from it */ + if (!(r->fn = string_dup(r->pool, fn))) + goto err; + snprintf(fai_fn, PATH_MAX, "%.*s.fai", PATH_MAX-5, fn); + } + } + + if (!(r->fp = bgzf_open_ref(r->fn, "r", 0))) { + hts_log_error("Failed to open reference file '%s'", r->fn); + goto err; + } + + if (!(fp = hopen(fai_fn, "r"))) { + hts_log_error("Failed to open index file '%s'", fai_fn); + if (is_err) + perror(fai_fn); + goto err; + } + while (hgets(line, 8192, fp) != NULL) { + ref_entry *e = malloc(sizeof(*e)); + char *cp; + int n; + khint_t k; + + if (!e) + return NULL; + + // id + for (cp = line; *cp && !isspace_c(*cp); cp++) + ; + *cp++ = 0; + e->name = string_dup(r->pool, line); + + // length + while (*cp && isspace_c(*cp)) + cp++; + e->length = strtoll(cp, &cp, 10); + + // offset + while (*cp && isspace_c(*cp)) + cp++; + e->offset = strtoll(cp, &cp, 10); + + // bases per line + while (*cp && isspace_c(*cp)) + cp++; + e->bases_per_line = strtol(cp, &cp, 10); + + // line length + while (*cp && isspace_c(*cp)) + cp++; + e->line_length = strtol(cp, &cp, 10); + + // filename + e->fn = r->fn; + + e->count = 0; + e->seq = NULL; + e->mf = NULL; + e->is_md5 = 0; + e->validated_md5 = 0; + + k = kh_put(refs, r->h_meta, e->name, &n); + if (-1 == n) { + free(e); + return NULL; + } + + if (n) { + kh_val(r->h_meta, k) = e; + } else { + ref_entry *re = kh_val(r->h_meta, k); + if (re && (re->count != 0 || re->length != 0)) { + /* Keep old */ + free(e); + } else { + /* Replace old */ + if (re) + free(re); + kh_val(r->h_meta, k) = e; + } + } + + if (id >= id_alloc) { + ref_entry **new_refs; + int x; + + id_alloc = id_alloc ?id_alloc*2 : 16; + new_refs = realloc(r->ref_id, id_alloc * sizeof(*r->ref_id)); + if (!new_refs) + goto err; + r->ref_id = new_refs; + + for (x = id; x < id_alloc; x++) + r->ref_id[x] = NULL; + } + r->ref_id[id] = e; + r->nref = ++id; + } + + if(hclose(fp) < 0) + goto err; + return r; + + err: + if (fp) + hclose_abruptly(fp); + + if (!r_orig) + refs_free(r); + + return NULL; +} + +/* + * Verifies that the CRAM @SQ lines and .fai files match. + */ +static void sanitise_SQ_lines(cram_fd *fd) { + int i; + + if (!fd->header || !fd->header->hrecs) + return; + + if (!fd->refs || !fd->refs->h_meta) + return; + + for (i = 0; i < fd->header->hrecs->nref; i++) { + const char *name = fd->header->hrecs->ref[i].name; + khint_t k = kh_get(refs, fd->refs->h_meta, name); + ref_entry *r; + + // We may have @SQ lines which have no known .fai, but do not + // in themselves pose a problem because they are unused in the file. + if (k == kh_end(fd->refs->h_meta)) + continue; + + if (!(r = (ref_entry *)kh_val(fd->refs->h_meta, k))) + continue; + + if (r->length && r->length != fd->header->hrecs->ref[i].len) { + assert(strcmp(r->name, fd->header->hrecs->ref[i].name) == 0); + + // Should we also check MD5sums here to ensure the correct + // reference was given? + hts_log_warning("Header @SQ length mismatch for ref %s, %"PRIhts_pos" vs %d", + r->name, fd->header->hrecs->ref[i].len, (int)r->length); + + // Fixing the parsed @SQ header will make MD:Z: strings work + // and also stop it producing N for the sequence. + fd->header->hrecs->ref[i].len = r->length; + } + } +} + +/* + * Indexes references by the order they appear in a BAM file. This may not + * necessarily be the same order they appear in the fasta reference file. + * + * Returns 0 on success + * -1 on failure + */ +int refs2id(refs_t *r, sam_hdr_t *hdr) { + int i; + sam_hrecs_t *h = hdr->hrecs; + + if (r->ref_id) + free(r->ref_id); + if (r->last) + r->last = NULL; + + r->ref_id = calloc(h->nref, sizeof(*r->ref_id)); + if (!r->ref_id) + return -1; + + r->nref = h->nref; + for (i = 0; i < h->nref; i++) { + khint_t k = kh_get(refs, r->h_meta, h->ref[i].name); + if (k != kh_end(r->h_meta)) { + r->ref_id[i] = kh_val(r->h_meta, k); + } else { + hts_log_warning("Unable to find ref name '%s'", h->ref[i].name); + } + } + + return 0; +} + +/* + * Generates refs_t entries based on @SQ lines in the header. + * Returns 0 on success + * -1 on failure + */ +static int refs_from_header(cram_fd *fd) { + if (!fd) + return -1; + + refs_t *r = fd->refs; + if (!r) + return -1; + + sam_hdr_t *h = fd->header; + if (!h) + return 0; + + if (!h->hrecs) { + if (-1 == sam_hdr_fill_hrecs(h)) + return -1; + } + + if (h->hrecs->nref == 0) + return 0; + + //fprintf(stderr, "refs_from_header for %p mode %c\n", fd, fd->mode); + + /* Existing refs are fine, as long as they're compatible with the hdr. */ + ref_entry **new_ref_id = realloc(r->ref_id, (r->nref + h->hrecs->nref) * sizeof(*r->ref_id)); + if (!new_ref_id) + return -1; + r->ref_id = new_ref_id; + + int i, j; + /* Copy info from h->ref[i] over to r */ + for (i = 0, j = r->nref; i < h->hrecs->nref; i++) { + sam_hrec_type_t *ty; + sam_hrec_tag_t *tag; + khint_t k; + int n; + + k = kh_get(refs, r->h_meta, h->hrecs->ref[i].name); + if (k != kh_end(r->h_meta)) + // Ref already known about + continue; + + if (!(r->ref_id[j] = calloc(1, sizeof(ref_entry)))) + return -1; + + if (!h->hrecs->ref[i].name) + return -1; + + r->ref_id[j]->name = string_dup(r->pool, h->hrecs->ref[i].name); + if (!r->ref_id[j]->name) return -1; + r->ref_id[j]->length = 0; // marker for not yet loaded + + /* Initialise likely filename if known */ + if ((ty = sam_hrecs_find_type_id(h->hrecs, "SQ", "SN", h->hrecs->ref[i].name))) { + if ((tag = sam_hrecs_find_key(ty, "M5", NULL))) { + r->ref_id[j]->fn = string_dup(r->pool, tag->str+3); + //fprintf(stderr, "Tagging @SQ %s / %s\n", r->ref_id[h]->name, r->ref_id[h]->fn); + } + } + + k = kh_put(refs, r->h_meta, r->ref_id[j]->name, &n); + if (n <= 0) // already exists or error + return -1; + kh_val(r->h_meta, k) = r->ref_id[j]; + + j++; + } + r->nref = j; + + return 0; +} + +/* + * Attaches a header to a cram_fd. + * + * This should be used when creating a new cram_fd for writing where + * we have a header already constructed (eg from a file we've read + * in). + */ +int cram_set_header2(cram_fd *fd, const sam_hdr_t *hdr) { + if (!fd || !hdr ) + return -1; + + if (fd->header != hdr) { + if (fd->header) + sam_hdr_destroy(fd->header); + fd->header = sam_hdr_dup(hdr); + if (!fd->header) + return -1; + } + return refs_from_header(fd); +} + +int cram_set_header(cram_fd *fd, sam_hdr_t *hdr) { + return cram_set_header2(fd, hdr); +} + +/* + * Returns whether the path refers to a directory. + */ +static int is_directory(char *fn) { + struct stat buf; + if ( stat(fn,&buf) ) return 0; + return S_ISDIR(buf.st_mode); +} + +/* + * Converts a directory and a filename into an expanded path, replacing %s + * in directory with the filename and %[0-9]+s with portions of the filename + * Any remaining parts of filename are added to the end with /%s. + */ +static int expand_cache_path(char *path, char *dir, const char *fn) { + char *cp, *start = path; + size_t len; + size_t sz = PATH_MAX; + + while ((cp = strchr(dir, '%'))) { + if (cp-dir >= sz) return -1; + strncpy(path, dir, cp-dir); + path += cp-dir; + sz -= cp-dir; + + if (*++cp == 's') { + len = strlen(fn); + if (len >= sz) return -1; + strcpy(path, fn); + path += len; + sz -= len; + fn += len; + cp++; + } else if (*cp >= '0' && *cp <= '9') { + char *endp; + long l; + + l = strtol(cp, &endp, 10); + l = MIN(l, strlen(fn)); + if (*endp == 's') { + if (l >= sz) return -1; + strncpy(path, fn, l); + path += l; + fn += l; + sz -= l; + *path = 0; + cp = endp+1; + } else { + if (sz < 3) return -1; + *path++ = '%'; + *path++ = *cp++; + } + } else { + if (sz < 3) return -1; + *path++ = '%'; + *path++ = *cp++; + } + dir = cp; + } + + len = strlen(dir); + if (len >= sz) return -1; + strcpy(path, dir); + path += len; + sz -= len; + + len = strlen(fn) + ((*fn && path > start && path[-1] != '/') ? 1 : 0); + if (len >= sz) return -1; + if (*fn && path > start && path[-1] != '/') + *path++ = '/'; + strcpy(path, fn); + return 0; +} + +/* + * Make the directory containing path and any prefix directories. + */ +static void mkdir_prefix(char *path, int mode) { + char *cp = strrchr(path, '/'); + if (!cp) + return; + + *cp = 0; + if (is_directory(path)) { + *cp = '/'; + return; + } + + if (mkdir(path, mode) == 0) { + chmod(path, mode); + *cp = '/'; + return; + } + + mkdir_prefix(path, mode); + mkdir(path, mode); + chmod(path, mode); + *cp = '/'; +} + +/* + * Return the cache directory to use, based on the first of these + * environment variables to be set to a non-empty value. + */ +static const char *get_cache_basedir(const char **extra) { + char *base; + + *extra = ""; + + base = getenv("XDG_CACHE_HOME"); + if (base && *base) return base; + + base = getenv("HOME"); + if (base && *base) { *extra = "/.cache"; return base; } + + base = getenv("TMPDIR"); + if (base && *base) return base; + + base = getenv("TEMP"); + if (base && *base) return base; + + return "/tmp"; +} + +/* + * Queries the M5 string from the header and attempts to populate the + * reference from this using the REF_PATH environment. + * + * Returns 0 on success + * -1 on failure + */ +static int cram_populate_ref(cram_fd *fd, int id, ref_entry *r) { + char *ref_path = getenv("REF_PATH"); + sam_hrec_type_t *ty; + sam_hrec_tag_t *tag; + char path[PATH_MAX]; + kstring_t path_tmp = KS_INITIALIZE; + char cache[PATH_MAX], cache_root[PATH_MAX]; + char *local_cache = getenv("REF_CACHE"); + mFILE *mf; + int local_path = 0; + + hts_log_info("Running cram_populate_ref on fd %p, id %d", (void *)fd, id); + + cache_root[0] = '\0'; + + if (!ref_path || *ref_path == '\0') { + /* + * If we have no ref path, we use the EBI server. + * However to avoid spamming it we require a local ref cache too. + */ + ref_path = "https://www.ebi.ac.uk/ena/cram/md5/%s"; + if (!local_cache || *local_cache == '\0') { + const char *extra; + const char *base = get_cache_basedir(&extra); + snprintf(cache_root, PATH_MAX, "%s%s/hts-ref", base, extra); + snprintf(cache,PATH_MAX, "%s%s/hts-ref/%%2s/%%2s/%%s", base, extra); + local_cache = cache; + hts_log_info("Populating local cache: %s", local_cache); + } + } + + if (!r->name) + return -1; + + if (!(ty = sam_hrecs_find_type_id(fd->header->hrecs, "SQ", "SN", r->name))) + return -1; + + if (!(tag = sam_hrecs_find_key(ty, "M5", NULL))) + goto no_M5; + + hts_log_info("Querying ref %s", tag->str+3); + + /* Use cache if available */ + if (local_cache && *local_cache) { + if (expand_cache_path(path, local_cache, tag->str+3) == 0) + local_path = 1; + } + +#ifndef HAVE_MMAP + char *path2; + /* Search local files in REF_PATH; we can open them and return as above */ + if (!local_path && (path2 = find_path(tag->str+3, ref_path))) { + int len = snprintf(path, PATH_MAX, "%s", path2); + free(path2); + if (len > 0 && len < PATH_MAX) // in case it's too long + local_path = 1; + } +#endif + + /* Found via REF_CACHE or local REF_PATH file */ + if (local_path) { + struct stat sb; + BGZF *fp; + + if (0 == stat(path, &sb) + && S_ISREG(sb.st_mode) + && (fp = bgzf_open(path, "r"))) { + r->length = sb.st_size; + r->offset = r->line_length = r->bases_per_line = 0; + + r->fn = string_dup(fd->refs->pool, path); + + if (fd->refs->fp) + if (bgzf_close(fd->refs->fp) != 0) + return -1; + fd->refs->fp = fp; + fd->refs->fn = r->fn; + r->is_md5 = 1; + r->validated_md5 = 1; + + // Fall back to cram_get_ref() where it'll do the actual + // reading of the file. + return 0; + } + } + + + /* Otherwise search full REF_PATH; slower as loads entire file */ + if ((mf = open_path_mfile(tag->str+3, ref_path, NULL))) { + size_t sz; + r->seq = mfsteal(mf, &sz); + if (r->seq) { + r->mf = NULL; + } else { + // keep mf around as we couldn't detach + r->seq = mf->data; + r->mf = mf; + } + r->length = sz; + r->is_md5 = 1; + r->validated_md5 = 1; + } else { + refs_t *refs; + const char *fn; + + no_M5: + /* Failed to find in search path or M5 cache, see if @SQ UR: tag? */ + if (!(tag = sam_hrecs_find_key(ty, "UR", NULL))) + return -1; + + fn = (strncmp(tag->str+3, "file:", 5) == 0) + ? tag->str+8 + : tag->str+3; + + if (fd->refs->fp) { + if (bgzf_close(fd->refs->fp) != 0) + return -1; + fd->refs->fp = NULL; + } + if (!(refs = refs_load_fai(fd->refs, fn, 0))) + return -1; + sanitise_SQ_lines(fd); + + fd->refs = refs; + if (fd->refs->fp) { + if (bgzf_close(fd->refs->fp) != 0) + return -1; + fd->refs->fp = NULL; + } + + if (!fd->refs->fn) + return -1; + + if (-1 == refs2id(fd->refs, fd->header)) + return -1; + if (!fd->refs->ref_id || !fd->refs->ref_id[id]) + return -1; + + // Local copy already, so fall back to cram_get_ref(). + return 0; + } + + /* Populate the local disk cache if required */ + if (local_cache && *local_cache) { + hFILE *fp; + + if (*cache_root && !is_directory(cache_root)) { + hts_log_warning("Creating reference cache directory %s\n" + "This may become large; see the samtools(1) manual page REF_CACHE discussion", + cache_root); + } + + if (expand_cache_path(path, local_cache, tag->str+3) < 0) { + return 0; // Not fatal - we have the data already so keep going. + } + hts_log_info("Writing cache file '%s'", path); + mkdir_prefix(path, 01777); + + fp = hts_open_tmpfile(path, "wx", &path_tmp); + if (!fp) { + perror(path_tmp.s); + free(path_tmp.s); + + // Not fatal - we have the data already so keep going. + return 0; + } + + // Check md5sum + hts_md5_context *md5; + char unsigned md5_buf1[16]; + char md5_buf2[33]; + + if (!(md5 = hts_md5_init())) { + hclose_abruptly(fp); + unlink(path_tmp.s); + free(path_tmp.s); + return -1; + } + hts_md5_update(md5, r->seq, r->length); + hts_md5_final(md5_buf1, md5); + hts_md5_destroy(md5); + hts_md5_hex(md5_buf2, md5_buf1); + + if (strncmp(tag->str+3, md5_buf2, 32) != 0) { + hts_log_error("Mismatching md5sum for downloaded reference"); + hclose_abruptly(fp); + unlink(path_tmp.s); + free(path_tmp.s); + return -1; + } + + ssize_t length_written = hwrite(fp, r->seq, r->length); + if (hclose(fp) < 0 || length_written != r->length || + chmod(path_tmp.s, 0444) < 0 || + rename(path_tmp.s, path) < 0) { + hts_log_error("Creating reference at %s failed: %s", + path, strerror(errno)); + unlink(path_tmp.s); + } + } + + free(path_tmp.s); + return 0; +} + +static void cram_ref_incr_locked(refs_t *r, int id) { + RP("%d INC REF %d, %d %p\n", gettid(), id, + (int)(id>=0 && r->ref_id[id]?r->ref_id[id]->count+1:-999), + id>=0 && r->ref_id[id]?r->ref_id[id]->seq:(char *)1); + + if (id < 0 || !r->ref_id[id] || !r->ref_id[id]->seq) + return; + + if (r->last_id == id) + r->last_id = -1; + + ++r->ref_id[id]->count; +} + +void cram_ref_incr(refs_t *r, int id) { + pthread_mutex_lock(&r->lock); + cram_ref_incr_locked(r, id); + pthread_mutex_unlock(&r->lock); +} + +static void cram_ref_decr_locked(refs_t *r, int id) { + RP("%d DEC REF %d, %d %p\n", gettid(), id, + (int)(id>=0 && r->ref_id[id]?r->ref_id[id]->count-1:-999), + id>=0 && r->ref_id[id]?r->ref_id[id]->seq:(char *)1); + + if (id < 0 || !r->ref_id[id] || !r->ref_id[id]->seq) { + return; + } + + if (--r->ref_id[id]->count <= 0) { + assert(r->ref_id[id]->count == 0); + if (r->last_id >= 0) { + if (r->ref_id[r->last_id]->count <= 0 && + r->ref_id[r->last_id]->seq) { + RP("%d FREE REF %d (%p)\n", gettid(), + r->last_id, r->ref_id[r->last_id]->seq); + ref_entry_free_seq(r->ref_id[r->last_id]); + if (r->ref_id[r->last_id]->is_md5) r->ref_id[r->last_id]->length = 0; + } + } + r->last_id = id; + } +} + +void cram_ref_decr(refs_t *r, int id) { + pthread_mutex_lock(&r->lock); + cram_ref_decr_locked(r, id); + pthread_mutex_unlock(&r->lock); +} + +/* + * Used by cram_ref_load and cram_get_ref. The file handle will have + * already been opened, so we can catch it. The ref_entry *e informs us + * of whether this is a multi-line fasta file or a raw MD5 style file. + * Either way we create a single contiguous sequence. + * + * Returns all or part of a reference sequence on success (malloced); + * NULL on failure. + */ +static char *load_ref_portion(BGZF *fp, ref_entry *e, + hts_pos_t start, hts_pos_t end) { + off_t offset, len; + char *seq; + + if (end < start) + end = start; + + /* + * Compute locations in file. This is trivial for the MD5 files, but + * is still necessary for the fasta variants. + * + * Note the offset here, as with faidx, has the assumption that white- + * space (the diff between line_length and bases_per_line) only occurs + * at the end of a line of text. + */ + offset = e->line_length + ? e->offset + (start-1)/e->bases_per_line * e->line_length + + (start-1) % e->bases_per_line + : start-1; + + len = (e->line_length + ? e->offset + (end-1)/e->bases_per_line * e->line_length + + (end-1) % e->bases_per_line + : end-1) - offset + 1; + + if (bgzf_useek(fp, offset, SEEK_SET) < 0) { + perror("bgzf_useek() on reference file"); + return NULL; + } + + if (len == 0 || !(seq = malloc(len))) { + return NULL; + } + + if (len != bgzf_read(fp, seq, len)) { + perror("bgzf_read() on reference file"); + free(seq); + return NULL; + } + + /* Strip white-space if required. */ + if (len != end-start+1) { + hts_pos_t i, j; + char *cp = seq; + char *cp_to; + + // Copy up to the first white-space, and then repeatedly just copy + // bases_per_line verbatim, and use the slow method to end again. + // + // This may seem excessive, but this code can be a significant + // portion of total CRAM decode CPU time for shallow data sets. + for (i = j = 0; i < len; i++) { + if (!isspace_c(cp[i])) + cp[j++] = cp[i] & ~0x20; + else + break; + } + while (i < len && isspace_c(cp[i])) + i++; + while (i < len - e->line_length) { + hts_pos_t j_end = j + e->bases_per_line; + while (j < j_end) + cp[j++] = cp[i++] & ~0x20; // toupper equiv + i += e->line_length - e->bases_per_line; + } + for (; i < len; i++) { + if (!isspace_c(cp[i])) + cp[j++] = cp[i] & ~0x20; + } + + cp_to = cp+j; + + if (cp_to - seq != end-start+1) { + hts_log_error("Malformed reference file"); + free(seq); + return NULL; + } + } else { + int i; + for (i = 0; i < len; i++) { + seq[i] = toupper_c(seq[i]); + } + } + + return seq; +} + +/* + * Load the entire reference 'id'. + * This also increments the reference count by 1. + * + * Returns ref_entry on success; + * NULL on failure + */ +ref_entry *cram_ref_load(refs_t *r, int id, int is_md5) { + ref_entry *e = r->ref_id[id]; + hts_pos_t start = 1, end = e->length; + char *seq; + + if (e->seq) { + return e; + } + + assert(e->count == 0); + + if (r->last) { +#ifdef REF_DEBUG + int idx = 0; + for (idx = 0; idx < r->nref; idx++) + if (r->last == r->ref_id[idx]) + break; + RP("%d cram_ref_load DECR %d\n", gettid(), idx); +#endif + assert(r->last->count > 0); + if (--r->last->count <= 0) { + RP("%d FREE REF %d (%p)\n", gettid(), id, r->ref_id[id]->seq); + if (r->last->seq) + ref_entry_free_seq(r->last); + } + } + + if (!r->fn) + return NULL; + + /* Open file if it's not already the current open reference */ + if (strcmp(r->fn, e->fn) || r->fp == NULL) { + if (r->fp) + if (bgzf_close(r->fp) != 0) + return NULL; + r->fn = e->fn; + if (!(r->fp = bgzf_open_ref(r->fn, "r", is_md5))) + return NULL; + } + + RP("%d Loading ref %d (%d..%d)\n", gettid(), id, start, end); + + if (!(seq = load_ref_portion(r->fp, e, start, end))) { + return NULL; + } + + RP("%d Loaded ref %d (%d..%d) = %p\n", gettid(), id, start, end, seq); + + RP("%d INC REF %d, %"PRId64"\n", gettid(), id, (e->count+1)); + e->seq = seq; + e->mf = NULL; + e->count++; + + /* + * Also keep track of last used ref so incr/decr loops on the same + * sequence don't cause load/free loops. + */ + RP("%d cram_ref_load INCR %d => %"PRId64"\n", gettid(), id, e->count+1); + r->last = e; + e->count++; + + return e; +} + +/* + * Returns a portion of a reference sequence from start to end inclusive. + * The returned pointer is owned by either the cram_file fd or by the + * internal refs_t structure and should not be freed by the caller. + * + * The difference is whether or not this refs_t is in use by just the one + * cram_fd or by multiples, or whether we have multiple threads accessing + * references. In either case fd->shared will be true and we start using + * reference counting to track the number of users of a specific reference + * sequence. + * + * Otherwise the ref seq returned is allocated as part of cram_fd itself + * and will be freed up on the next call to cram_get_ref or cram_close. + * + * To return the entire reference sequence, specify start as 1 and end + * as 0. + * + * To cease using a reference, call cram_ref_decr(). + * + * Returns reference on success, + * NULL on failure + */ +char *cram_get_ref(cram_fd *fd, int id, hts_pos_t start, hts_pos_t end) { + ref_entry *r; + char *seq; + int ostart = start; + + if (id == -1 || start < 1) + return NULL; + + /* FIXME: axiomatic query of r->seq being true? + * Or shortcut for unsorted data where we load once and never free? + */ + + //fd->shared_ref = 1; // hard code for now to simplify things + + pthread_mutex_lock(&fd->ref_lock); + + RP("%d cram_get_ref on fd %p, id %d, range %d..%d\n", gettid(), fd, id, start, end); + + /* + * Unsorted data implies we want to fetch an entire reference at a time. + * We just deal with this at the moment by claiming we're sharing + * references instead, which has the same requirement. + */ + if (fd->unsorted) + fd->shared_ref = 1; + + + /* Sanity checking: does this ID exist? */ + if (id >= fd->refs->nref) { + hts_log_error("No reference found for id %d", id); + pthread_mutex_unlock(&fd->ref_lock); + return NULL; + } + + if (!fd->refs || !fd->refs->ref_id[id]) { + hts_log_error("No reference found for id %d", id); + pthread_mutex_unlock(&fd->ref_lock); + return NULL; + } + + if (!(r = fd->refs->ref_id[id])) { + hts_log_error("No reference found for id %d", id); + pthread_mutex_unlock(&fd->ref_lock); + return NULL; + } + + + /* + * It has an entry, but may not have been populated yet. + * Any manually loaded .fai files have their lengths known. + * A ref entry computed from @SQ lines (M5 or UR field) will have + * r->length == 0 unless it's been loaded once and verified that we have + * an on-disk filename for it. + * + * 19 Sep 2013: Moved the lock here as the cram_populate_ref code calls + * open_path_mfile and libcurl, which isn't multi-thread safe unless I + * rewrite my code to have one curl handle per thread. + */ + pthread_mutex_lock(&fd->refs->lock); + if (r->length == 0) { + if (fd->ref_fn) + hts_log_warning("Reference file given, but ref '%s' not present", + r->name); + if (cram_populate_ref(fd, id, r) == -1) { + hts_log_warning("Failed to populate reference for id %d", id); + pthread_mutex_unlock(&fd->refs->lock); + pthread_mutex_unlock(&fd->ref_lock); + return NULL; + } + r = fd->refs->ref_id[id]; + if (fd->unsorted) + cram_ref_incr_locked(fd->refs, id); + } + + + /* + * We now know that we the filename containing the reference, so check + * for limits. If it's over half the reference we'll load all of it in + * memory as this will speed up subsequent calls. + */ + if (end < 1) + end = r->length; + if (end >= r->length) + end = r->length; + + if (end - start >= 0.5*r->length || fd->shared_ref) { + start = 1; + end = r->length; + } + + /* + * Maybe we have it cached already? If so use it. + * + * Alternatively if we don't have the sequence but we're sharing + * references and/or are asking for the entire length of it, then + * load the full reference into the refs structure and return + * a pointer to that one instead. + */ + if (fd->shared_ref || r->seq || (start == 1 && end == r->length)) { + char *cp; + + if (id >= 0) { + if (r->seq) { + cram_ref_incr_locked(fd->refs, id); + } else { + ref_entry *e; + if (!(e = cram_ref_load(fd->refs, id, r->is_md5))) { + pthread_mutex_unlock(&fd->refs->lock); + pthread_mutex_unlock(&fd->ref_lock); + return NULL; + } + + /* unsorted data implies cache ref indefinitely, to avoid + * continually loading and unloading. + */ + if (fd->unsorted) + cram_ref_incr_locked(fd->refs, id); + } + + fd->ref = NULL; /* We never access it directly */ + fd->ref_start = 1; + fd->ref_end = r->length; + fd->ref_id = id; + + cp = fd->refs->ref_id[id]->seq + ostart-1; + } else { + fd->ref = NULL; + cp = NULL; + } + + RP("%d cram_get_ref returning for id %d, count %d\n", gettid(), id, (int)r->count); + + pthread_mutex_unlock(&fd->refs->lock); + pthread_mutex_unlock(&fd->ref_lock); + return cp; + } + + /* + * Otherwise we're not sharing, we don't have a copy of it already and + * we're only asking for a small portion of it. + * + * In this case load up just that segment ourselves, freeing any old + * small segments in the process. + */ + + /* Unmapped ref ID */ + if (id < 0 || !fd->refs->fn) { + if (fd->ref_free) { + free(fd->ref_free); + fd->ref_free = NULL; + } + fd->ref = NULL; + fd->ref_id = id; + pthread_mutex_unlock(&fd->refs->lock); + pthread_mutex_unlock(&fd->ref_lock); + return NULL; + } + + /* Open file if it's not already the current open reference */ + if (strcmp(fd->refs->fn, r->fn) || fd->refs->fp == NULL) { + if (fd->refs->fp) + if (bgzf_close(fd->refs->fp) != 0) + return NULL; + fd->refs->fn = r->fn; + if (!(fd->refs->fp = bgzf_open_ref(fd->refs->fn, "r", r->is_md5))) { + pthread_mutex_unlock(&fd->refs->lock); + pthread_mutex_unlock(&fd->ref_lock); + return NULL; + } + } + + if (!(fd->ref = load_ref_portion(fd->refs->fp, r, start, end))) { + pthread_mutex_unlock(&fd->refs->lock); + pthread_mutex_unlock(&fd->ref_lock); + return NULL; + } + + if (fd->ref_free) + free(fd->ref_free); + + fd->ref_id = id; + fd->ref_start = start; + fd->ref_end = end; + fd->ref_free = fd->ref; + seq = fd->ref; + + pthread_mutex_unlock(&fd->refs->lock); + pthread_mutex_unlock(&fd->ref_lock); + + return seq ? seq + ostart - start : NULL; +} + +/* + * If fd has been opened for reading, it may be permitted to specify 'fn' + * as NULL and let the code auto-detect the reference by parsing the + * SAM header @SQ lines. + */ +int cram_load_reference(cram_fd *fd, char *fn) { + int ret = 0; + + if (fn) { + fd->refs = refs_load_fai(fd->refs, fn, + !(fd->embed_ref>0 && fd->mode == 'r')); + fn = fd->refs ? fd->refs->fn : NULL; + if (!fn) + ret = -1; + sanitise_SQ_lines(fd); + } + fd->ref_fn = fn; + + if ((!fd->refs || (fd->refs->nref == 0 && !fn)) && fd->header) { + if (fd->refs) + refs_free(fd->refs); + if (!(fd->refs = refs_create())) + return -1; + if (-1 == refs_from_header(fd)) + return -1; + } + + if (fd->header) + if (-1 == refs2id(fd->refs, fd->header)) + return -1; + + return ret; +} + +/* ---------------------------------------------------------------------- + * Containers + */ + +/* + * Creates a new container, specifying the maximum number of slices + * and records permitted. + * + * Returns cram_container ptr on success + * NULL on failure + */ +cram_container *cram_new_container(int nrec, int nslice) { + cram_container *c = calloc(1, sizeof(*c)); + enum cram_DS_ID id; + + if (!c) + return NULL; + + c->curr_ref = -2; + + c->max_c_rec = nrec * nslice; + c->curr_c_rec = 0; + + c->max_rec = nrec; + c->record_counter = 0; + c->num_bases = 0; + c->s_num_bases = 0; + + c->max_slice = nslice; + c->curr_slice = 0; + + c->pos_sorted = 1; + c->max_apos = 0; + c->multi_seq = 0; + c->qs_seq_orient = 1; + c->no_ref = 0; + c->embed_ref = -1; // automatic selection + + c->bams = NULL; + + if (!(c->slices = calloc(nslice != 0 ? nslice : 1, sizeof(cram_slice *)))) + goto err; + c->slice = NULL; + + if (!(c->comp_hdr = cram_new_compression_header())) + goto err; + c->comp_hdr_block = NULL; + + for (id = DS_RN; id < DS_TN; id++) + if (!(c->stats[id] = cram_stats_create())) goto err; + + //c->aux_B_stats = cram_stats_create(); + + if (!(c->tags_used = kh_init(m_tagmap))) + goto err; + c->refs_used = 0; + c->ref_free = 0; + + return c; + + err: + if (c) { + if (c->slices) + free(c->slices); + free(c); + } + return NULL; +} + +static void free_bam_list(bam_seq_t **bams, int max_rec) { + int i; + for (i = 0; i < max_rec; i++) + bam_free(bams[i]); + + free(bams); +} + +void cram_free_container(cram_container *c) { + enum cram_DS_ID id; + int i; + + if (!c) + return; + + if (c->refs_used) + free(c->refs_used); + + if (c->landmark) + free(c->landmark); + + if (c->comp_hdr) + cram_free_compression_header(c->comp_hdr); + + if (c->comp_hdr_block) + cram_free_block(c->comp_hdr_block); + + // Free the slices; filled out by encoder only + if (c->slices) { + for (i = 0; i < c->max_slice; i++) { + if (c->slices[i]) + cram_free_slice(c->slices[i]); + if (c->slices[i] == c->slice) + c->slice = NULL; + } + free(c->slices); + } + + // Free the current slice; set by both encoder & decoder + if (c->slice) { + cram_free_slice(c->slice); + c->slice = NULL; + } + + for (id = DS_RN; id < DS_TN; id++) + if (c->stats[id]) cram_stats_free(c->stats[id]); + + //if (c->aux_B_stats) cram_stats_free(c->aux_B_stats); + + if (c->tags_used) { + khint_t k; + + for (k = kh_begin(c->tags_used); k != kh_end(c->tags_used); k++) { + if (!kh_exist(c->tags_used, k)) + continue; + + cram_tag_map *tm = (cram_tag_map *)kh_val(c->tags_used, k); + if (tm) { + cram_codec *c = tm->codec; + + if (c) c->free(c); + + // If tm->blk or tm->blk2 is set, then we haven't yet got to + // cram_encode_container which copies the blocks to s->aux_block + // and NULLifies tm->blk*. In this case we failed to complete + // the container construction, so we have to free up our partially + // converted CRAM. + cram_free_block(tm->blk); + cram_free_block(tm->blk2); + free(tm); + } + } + + kh_destroy(m_tagmap, c->tags_used); + } + + if (c->ref_free) + free(c->ref); + + if (c->bams) + free_bam_list(c->bams, c->max_c_rec); + + free(c); +} + +/* + * Reads a container header. + * + * Returns cram_container on success + * NULL on failure or no container left (fd->err == 0). + */ +cram_container *cram_read_container(cram_fd *fd) { + cram_container c2, *c; + int i, s; + size_t rd = 0; + uint32_t crc = 0; + + fd->err = 0; + fd->eof = 0; + + memset(&c2, 0, sizeof(c2)); + if (CRAM_MAJOR_VERS(fd->version) == 1) { + if ((s = fd->vv.varint_decode32_crc(fd, &c2.length, &crc)) == -1) { + fd->eof = fd->empty_container ? 1 : 2; + return NULL; + } else { + rd+=s; + } + } else if (CRAM_MAJOR_VERS(fd->version) < 4) { + uint32_t len; + if ((s = int32_decode(fd, &c2.length)) == -1) { + if (CRAM_MAJOR_VERS(fd->version) == 2 && + CRAM_MINOR_VERS(fd->version) == 0) + fd->eof = 1; // EOF blocks arrived in v2.1 + else + fd->eof = fd->empty_container ? 1 : 2; + return NULL; + } else { + rd+=s; + } + len = le_int4(c2.length); + crc = crc32(0L, (unsigned char *)&len, 4); + } else { + if ((s = fd->vv.varint_decode32_crc(fd, &c2.length, &crc)) == -1) { + fd->eof = fd->empty_container ? 1 : 2; + return NULL; + } else { + rd+=s; + } + } + if ((s = fd->vv.varint_decode32s_crc(fd, &c2.ref_seq_id, &crc)) == -1) return NULL; else rd+=s; + if (CRAM_MAJOR_VERS(fd->version) >= 4) { + int64_t i64; + if ((s = fd->vv.varint_decode64_crc(fd, &i64, &crc))== -1) return NULL; else rd+=s; + c2.ref_seq_start = i64; + if ((s = fd->vv.varint_decode64_crc(fd, &i64, &crc)) == -1) return NULL; else rd+=s; + c2.ref_seq_span = i64; + } else { + int32_t i32; + if ((s = fd->vv.varint_decode32_crc(fd, &i32, &crc))== -1) return NULL; else rd+=s; + c2.ref_seq_start = i32; + if ((s = fd->vv.varint_decode32_crc(fd, &i32, &crc)) == -1) return NULL; else rd+=s; + c2.ref_seq_span = i32; + } + if ((s = fd->vv.varint_decode32_crc(fd, &c2.num_records, &crc)) == -1) return NULL; else rd+=s; + + if (CRAM_MAJOR_VERS(fd->version) == 1) { + c2.record_counter = 0; + c2.num_bases = 0; + } else { + if (CRAM_MAJOR_VERS(fd->version) >= 3) { + if ((s = fd->vv.varint_decode64_crc(fd, &c2.record_counter, &crc)) == -1) + return NULL; + else + rd += s; + } else { + int32_t i32; + if ((s = fd->vv.varint_decode32_crc(fd, &i32, &crc)) == -1) + return NULL; + else + rd += s; + c2.record_counter = i32; + } + + if ((s = fd->vv.varint_decode64_crc(fd, &c2.num_bases, &crc))== -1) + return NULL; + else + rd += s; + } + if ((s = fd->vv.varint_decode32_crc(fd, &c2.num_blocks, &crc)) == -1) + return NULL; + else + rd+=s; + if ((s = fd->vv.varint_decode32_crc(fd, &c2.num_landmarks, &crc))== -1) + return NULL; + else + rd+=s; + + if (c2.num_landmarks < 0 || c2.num_landmarks >= SIZE_MAX / sizeof(int32_t)) + return NULL; + + if (!(c = calloc(1, sizeof(*c)))) + return NULL; + + *c = c2; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (c->num_landmarks > FUZZ_ALLOC_LIMIT/sizeof(int32_t)) { + fd->err = errno = ENOMEM; + cram_free_container(c); + return NULL; + } +#endif + if (c->num_landmarks && !(c->landmark = malloc(c->num_landmarks * sizeof(int32_t)))) { + fd->err = errno; + cram_free_container(c); + return NULL; + } + for (i = 0; i < c->num_landmarks; i++) { + if ((s = fd->vv.varint_decode32_crc(fd, &c->landmark[i], &crc)) == -1) { + cram_free_container(c); + return NULL; + } else { + rd += s; + } + } + + if (CRAM_MAJOR_VERS(fd->version) >= 3) { + if (-1 == int32_decode(fd, (int32_t *)&c->crc32)) { + cram_free_container(c); + return NULL; + } else { + rd+=4; + } + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + // Pretend the CRC was OK so the fuzzer doesn't have to get it right + crc = c->crc32; +#endif + + if (crc != c->crc32) { + hts_log_error("Container header CRC32 failure"); + cram_free_container(c); + return NULL; + } + } + + c->offset = rd; + c->slices = NULL; + c->slice = NULL; + c->curr_slice = 0; + c->max_slice = c->num_landmarks; + c->slice_rec = 0; + c->curr_rec = 0; + c->max_rec = 0; + + if (c->ref_seq_id == -2) { + c->multi_seq = 1; + fd->multi_seq = 1; + } + + fd->empty_container = + (c->num_records == 0 && + c->ref_seq_id == -1 && + c->ref_seq_start == 0x454f46 /* EOF */) ? 1 : 0; + + return c; +} + + +/* MAXIMUM storage size needed for the container. */ +int cram_container_size(cram_container *c) { + return 55 + 5*c->num_landmarks; +} + + +/* + * Stores the container structure in dat and returns *size as the + * number of bytes written to dat[]. The input size of dat is also + * held in *size and should be initialised to cram_container_size(c). + * + * Returns 0 on success; + * -1 on failure + */ +int cram_store_container(cram_fd *fd, cram_container *c, char *dat, int *size) +{ + char *cp = (char *)dat; + int i; + + // Check the input buffer is large enough according to our stated + // requirements. (NOTE: it may actually take less.) + if (cram_container_size(c) > *size) + return -1; + + if (CRAM_MAJOR_VERS(fd->version) == 1) { + cp += itf8_put(cp, c->length); + } else { + *(int32_t *)cp = le_int4(c->length); + cp += 4; + } + if (c->multi_seq) { + cp += fd->vv.varint_put32(cp, NULL, -2); + cp += fd->vv.varint_put32(cp, NULL, 0); + cp += fd->vv.varint_put32(cp, NULL, 0); + } else { + cp += fd->vv.varint_put32s(cp, NULL, c->ref_seq_id); + if (CRAM_MAJOR_VERS(fd->version) >= 4) { + cp += fd->vv.varint_put64(cp, NULL, c->ref_seq_start); + cp += fd->vv.varint_put64(cp, NULL, c->ref_seq_span); + } else { + cp += fd->vv.varint_put32(cp, NULL, c->ref_seq_start); + cp += fd->vv.varint_put32(cp, NULL, c->ref_seq_span); + } + } + cp += fd->vv.varint_put32(cp, NULL, c->num_records); + if (CRAM_MAJOR_VERS(fd->version) == 2) { + cp += fd->vv.varint_put64(cp, NULL, c->record_counter); + } else if (CRAM_MAJOR_VERS(fd->version) >= 3) { + cp += fd->vv.varint_put32(cp, NULL, c->record_counter); + } + cp += fd->vv.varint_put64(cp, NULL, c->num_bases); + cp += fd->vv.varint_put32(cp, NULL, c->num_blocks); + cp += fd->vv.varint_put32(cp, NULL, c->num_landmarks); + for (i = 0; i < c->num_landmarks; i++) + cp += fd->vv.varint_put32(cp, NULL, c->landmark[i]); + + if (CRAM_MAJOR_VERS(fd->version) >= 3) { + c->crc32 = crc32(0L, (uc *)dat, cp-dat); + cp[0] = c->crc32 & 0xff; + cp[1] = (c->crc32 >> 8) & 0xff; + cp[2] = (c->crc32 >> 16) & 0xff; + cp[3] = (c->crc32 >> 24) & 0xff; + cp += 4; + } + + *size = cp-dat; // actual used size + + return 0; +} + + +/* + * Writes a container structure. + * + * Returns 0 on success + * -1 on failure + */ +int cram_write_container(cram_fd *fd, cram_container *c) { + char buf_a[1024], *buf = buf_a, *cp; + int i; + + if (61 + c->num_landmarks * 10 >= 1024) { + buf = malloc(61 + c->num_landmarks * 10); + if (!buf) + return -1; + } + cp = buf; + + if (CRAM_MAJOR_VERS(fd->version) == 1) { + cp += itf8_put(cp, c->length); + } else if (CRAM_MAJOR_VERS(fd->version) <= 3) { + *(int32_t *)cp = le_int4(c->length); + cp += 4; + } else { + cp += fd->vv.varint_put32(cp, NULL, c->length); + } + if (c->multi_seq) { + cp += fd->vv.varint_put32(cp, NULL, (uint32_t)-2); + cp += fd->vv.varint_put32(cp, NULL, 0); + cp += fd->vv.varint_put32(cp, NULL, 0); + } else { + cp += fd->vv.varint_put32s(cp, NULL, c->ref_seq_id); + if (CRAM_MAJOR_VERS(fd->version) >= 4) { + cp += fd->vv.varint_put64(cp, NULL, c->ref_seq_start); + cp += fd->vv.varint_put64(cp, NULL, c->ref_seq_span); + } else { + cp += fd->vv.varint_put32(cp, NULL, c->ref_seq_start); + cp += fd->vv.varint_put32(cp, NULL, c->ref_seq_span); + } + } + cp += fd->vv.varint_put32(cp, NULL, c->num_records); + if (CRAM_MAJOR_VERS(fd->version) >= 3) + cp += fd->vv.varint_put64(cp, NULL, c->record_counter); + else + cp += fd->vv.varint_put32(cp, NULL, c->record_counter); + cp += fd->vv.varint_put64(cp, NULL, c->num_bases); + cp += fd->vv.varint_put32(cp, NULL, c->num_blocks); + cp += fd->vv.varint_put32(cp, NULL, c->num_landmarks); + for (i = 0; i < c->num_landmarks; i++) + cp += fd->vv.varint_put32(cp, NULL, c->landmark[i]); + + if (CRAM_MAJOR_VERS(fd->version) >= 3) { + c->crc32 = crc32(0L, (uc *)buf, cp-buf); + cp[0] = c->crc32 & 0xff; + cp[1] = (c->crc32 >> 8) & 0xff; + cp[2] = (c->crc32 >> 16) & 0xff; + cp[3] = (c->crc32 >> 24) & 0xff; + cp += 4; + } + + if (cp-buf != hwrite(fd->fp, buf, cp-buf)) { + if (buf != buf_a) + free(buf); + return -1; + } + + if (buf != buf_a) + free(buf); + + return 0; +} + +// common component shared by cram_flush_container{,_mt} +static int cram_flush_container2(cram_fd *fd, cram_container *c) { + int i, j; + + if (c->curr_slice > 0 && !c->slices) + return -1; + + //fprintf(stderr, "Writing container %d, sum %u\n", c->record_counter, sum); + + off_t c_offset = htell(fd->fp); // File offset of container + + /* Write the container struct itself */ + if (0 != cram_write_container(fd, c)) + return -1; + + off_t hdr_size = htell(fd->fp) - c_offset; + + /* And the compression header */ + if (0 != cram_write_block(fd, c->comp_hdr_block)) + return -1; + + /* Followed by the slice blocks */ + off_t file_offset = htell(fd->fp); + for (i = 0; i < c->curr_slice; i++) { + cram_slice *s = c->slices[i]; + off_t spos = file_offset - c_offset - hdr_size; + + if (0 != cram_write_block(fd, s->hdr_block)) + return -1; + + for (j = 0; j < s->hdr->num_blocks; j++) { + if (0 != cram_write_block(fd, s->block[j])) + return -1; + } + + file_offset = htell(fd->fp); + off_t sz = file_offset - c_offset - hdr_size - spos; + + if (fd->idxfp) { + if (cram_index_slice(fd, c, s, fd->idxfp, c_offset, spos, sz) < 0) + return -1; + } + } + + return 0; +} + +/* + * Flushes a completely or partially full container to disk, writing + * container structure, header and blocks. This also calls the encoder + * functions. + * + * Returns 0 on success + * -1 on failure + */ +int cram_flush_container(cram_fd *fd, cram_container *c) { + /* Encode the container blocks and generate compression header */ + if (0 != cram_encode_container(fd, c)) + return -1; + + return cram_flush_container2(fd, c); +} + +typedef struct { + cram_fd *fd; + cram_container *c; +} cram_job; + +void *cram_flush_thread(void *arg) { + cram_job *j = (cram_job *)arg; + + /* Encode the container blocks and generate compression header */ + if (0 != cram_encode_container(j->fd, j->c)) { + hts_log_error("Call to cram_encode_container failed"); + return NULL; + } + + return arg; +} + +static int cram_flush_result(cram_fd *fd) { + int i, ret = 0; + hts_tpool_result *r; + cram_container *lc = NULL; + + // NB: we can have one result per slice, not per container, + // so we need to free the container only after all slices + // within it have been freed. (Automatic via reference counting.) + while ((r = hts_tpool_next_result(fd->rqueue))) { + cram_job *j = (cram_job *)hts_tpool_result_data(r); + cram_container *c; + + if (!j) { + hts_tpool_delete_result(r, 0); + return -1; + } + + fd = j->fd; + c = j->c; + + if (fd->mode == 'w') + if (0 != cram_flush_container2(fd, c)) + return -1; + + // Free the slices; filled out by encoder only + if (c->slices) { + for (i = 0; i < c->max_slice; i++) { + if (c->slices[i]) + cram_free_slice(c->slices[i]); + if (c->slices[i] == c->slice) + c->slice = NULL; + c->slices[i] = NULL; + } + } + + // Free the current slice; set by both encoder & decoder + if (c->slice) { + cram_free_slice(c->slice); + c->slice = NULL; + } + c->curr_slice = 0; + + // Our jobs will be in order, so we free the last + // container when our job has switched to a new one. + if (c != lc) { + if (lc) { + if (fd->ctr == lc) + fd->ctr = NULL; + if (fd->ctr_mt == lc) + fd->ctr_mt = NULL; + cram_free_container(lc); + } + lc = c; + } + + hts_tpool_delete_result(r, 1); + } + if (lc) { + if (fd->ctr == lc) + fd->ctr = NULL; + if (fd->ctr_mt == lc) + fd->ctr_mt = NULL; + cram_free_container(lc); + } + + return ret; +} + +// Note: called while metrics_lock is held. +// Will be left in this state too, but may temporarily unlock. +void reset_metrics(cram_fd *fd) { + int i; + + if (fd->pool) { + // If multi-threaded we have multiple blocks being + // compressed already and several on the to-do list + // (fd->rqueue->pending). It's tricky to reset the + // metrics exactly the correct point, so instead we + // just flush the pool, reset, and then continue again. + + // Don't bother starting a new trial before then though. + for (i = 0; i < DS_END; i++) { + cram_metrics *m = fd->m[i]; + if (!m) + continue; + m->next_trial = 999; + } + + pthread_mutex_unlock(&fd->metrics_lock); + hts_tpool_process_flush(fd->rqueue); + pthread_mutex_lock(&fd->metrics_lock); + } + + for (i = 0; i < DS_END; i++) { + cram_metrics *m = fd->m[i]; + if (!m) + continue; + + m->trial = NTRIALS; + m->next_trial = TRIAL_SPAN; + m->revised_method = 0; + m->unpackable = 0; + + memset(m->sz, 0, sizeof(m->sz)); + } +} + +int cram_flush_container_mt(cram_fd *fd, cram_container *c) { + cram_job *j; + + // At the junction of mapped to unmapped data the compression + // methods may need to change due to very different statistical + // properties; particularly BA if minhash sorted. + // + // However with threading we'll have several in-flight blocks + // arriving out of order. + // + // So we do one trial reset of NThreads to last for NThreads + // duration to get us over this transition period, followed + // by another retrial of the usual ntrials & trial span. + pthread_mutex_lock(&fd->metrics_lock); + if (c->n_mapped < 0.3*c->curr_rec && + fd->last_mapped > 0.7*c->max_rec) { + reset_metrics(fd); + } + fd->last_mapped = c->n_mapped * (c->max_rec+1)/(c->curr_rec+1) ; + pthread_mutex_unlock(&fd->metrics_lock); + + if (!fd->pool) + return cram_flush_container(fd, c); + + if (!(j = malloc(sizeof(*j)))) + return -1; + j->fd = fd; + j->c = c; + + // Flush the job. Note our encoder queue may be full, so we + // either have to keep trying in non-blocking mode (what we do) or + // use a dedicated separate thread for draining the queue. + for (;;) { + errno = 0; + hts_tpool_dispatch2(fd->pool, fd->rqueue, cram_flush_thread, j, 1); + int pending = (errno == EAGAIN); + if (cram_flush_result(fd) != 0) + return -1; + if (!pending) + break; + + usleep(1000); + } + + return 0; +} + +/* ---------------------------------------------------------------------- + * Compression headers; the first part of the container + */ + +/* + * Creates a new blank container compression header + * + * Returns header ptr on success + * NULL on failure + */ +cram_block_compression_hdr *cram_new_compression_header(void) { + cram_block_compression_hdr *hdr = calloc(1, sizeof(*hdr)); + if (!hdr) + return NULL; + + if (!(hdr->TD_blk = cram_new_block(CORE, 0))) { + free(hdr); + return NULL; + } + + if (!(hdr->TD_hash = kh_init(m_s2i))) { + cram_free_block(hdr->TD_blk); + free(hdr); + return NULL; + } + + if (!(hdr->TD_keys = string_pool_create(8192))) { + kh_destroy(m_s2i, hdr->TD_hash); + cram_free_block(hdr->TD_blk); + free(hdr); + return NULL; + } + + return hdr; +} + +void cram_free_compression_header(cram_block_compression_hdr *hdr) { + int i; + + if (hdr->landmark) + free(hdr->landmark); + + if (hdr->preservation_map) + kh_destroy(map, hdr->preservation_map); + + for (i = 0; i < CRAM_MAP_HASH; i++) { + cram_map *m, *m2; + for (m = hdr->rec_encoding_map[i]; m; m = m2) { + m2 = m->next; + if (m->codec) + m->codec->free(m->codec); + free(m); + } + } + + for (i = 0; i < CRAM_MAP_HASH; i++) { + cram_map *m, *m2; + for (m = hdr->tag_encoding_map[i]; m; m = m2) { + m2 = m->next; + if (m->codec) + m->codec->free(m->codec); + free(m); + } + } + + for (i = 0; i < DS_END; i++) { + if (hdr->codecs[i]) + hdr->codecs[i]->free(hdr->codecs[i]); + } + + if (hdr->TL) + free(hdr->TL); + if (hdr->TD_blk) + cram_free_block(hdr->TD_blk); + if (hdr->TD_hash) + kh_destroy(m_s2i, hdr->TD_hash); + if (hdr->TD_keys) + string_pool_destroy(hdr->TD_keys); + + free(hdr); +} + + +/* ---------------------------------------------------------------------- + * Slices and slice headers + */ + +void cram_free_slice_header(cram_block_slice_hdr *hdr) { + if (!hdr) + return; + + if (hdr->block_content_ids) + free(hdr->block_content_ids); + + free(hdr); + + return; +} + +void cram_free_slice(cram_slice *s) { + if (!s) + return; + + if (s->hdr_block) + cram_free_block(s->hdr_block); + + if (s->block) { + int i; + + if (s->hdr) { + for (i = 0; i < s->hdr->num_blocks; i++) { + if (i > 0 && s->block[i] == s->block[0]) + continue; + cram_free_block(s->block[i]); + } + } + free(s->block); + } + + { + // Normally already copied into s->block[], but potentially still + // here if we error part way through cram_encode_slice. + int i; + for (i = 0; i < s->naux_block; i++) + cram_free_block(s->aux_block[i]); + } + + if (s->block_by_id) + free(s->block_by_id); + + if (s->hdr) + cram_free_slice_header(s->hdr); + + if (s->seqs_blk) + cram_free_block(s->seqs_blk); + + if (s->qual_blk) + cram_free_block(s->qual_blk); + + if (s->name_blk) + cram_free_block(s->name_blk); + + if (s->aux_blk) + cram_free_block(s->aux_blk); + + if (s->base_blk) + cram_free_block(s->base_blk); + + if (s->soft_blk) + cram_free_block(s->soft_blk); + + if (s->cigar) + free(s->cigar); + + if (s->crecs) + free(s->crecs); + + if (s->features) + free(s->features); + + if (s->TN) + free(s->TN); + + if (s->pair_keys) + string_pool_destroy(s->pair_keys); + + if (s->pair[0]) + kh_destroy(m_s2i, s->pair[0]); + if (s->pair[1]) + kh_destroy(m_s2i, s->pair[1]); + + if (s->aux_block) + free(s->aux_block); + + free(s); +} + +/* + * Creates a new empty slice in memory, for subsequent writing to + * disk. + * + * Returns cram_slice ptr on success + * NULL on failure + */ +cram_slice *cram_new_slice(enum cram_content_type type, int nrecs) { + cram_slice *s = calloc(1, sizeof(*s)); + if (!s) + return NULL; + + if (!(s->hdr = (cram_block_slice_hdr *)calloc(1, sizeof(*s->hdr)))) + goto err; + s->hdr->content_type = type; + + s->hdr_block = NULL; + s->block = NULL; + s->block_by_id = NULL; + s->last_apos = 0; + if (!(s->crecs = malloc(nrecs * sizeof(cram_record)))) goto err; + s->cigar_alloc = 1024; + if (!(s->cigar = malloc(s->cigar_alloc * sizeof(*s->cigar)))) goto err; + s->ncigar = 0; + + if (!(s->seqs_blk = cram_new_block(EXTERNAL, 0))) goto err; + if (!(s->qual_blk = cram_new_block(EXTERNAL, DS_QS))) goto err; + if (!(s->name_blk = cram_new_block(EXTERNAL, DS_RN))) goto err; + if (!(s->aux_blk = cram_new_block(EXTERNAL, DS_aux))) goto err; + if (!(s->base_blk = cram_new_block(EXTERNAL, DS_IN))) goto err; + if (!(s->soft_blk = cram_new_block(EXTERNAL, DS_SC))) goto err; + + s->features = NULL; + s->nfeatures = s->afeatures = 0; + +#ifndef TN_external + s->TN = NULL; + s->nTN = s->aTN = 0; +#endif + + // Volatile keys as we do realloc in dstring + if (!(s->pair_keys = string_pool_create(8192))) goto err; + if (!(s->pair[0] = kh_init(m_s2i))) goto err; + if (!(s->pair[1] = kh_init(m_s2i))) goto err; + +#ifdef BA_external + s->BA_len = 0; +#endif + + return s; + + err: + if (s) + cram_free_slice(s); + + return NULL; +} + +/* + * Loads an entire slice. + * FIXME: In 1.0 the native unit of slices within CRAM is broken + * as slices contain references to objects in other slices. + * To work around this while keeping the slice oriented outer loop + * we read all slices and stitch them together into a fake large + * slice instead. + * + * Returns cram_slice ptr on success + * NULL on failure + */ +cram_slice *cram_read_slice(cram_fd *fd) { + cram_block *b = cram_read_block(fd); + cram_slice *s = calloc(1, sizeof(*s)); + int i, n, max_id, min_id; + + if (!b || !s) + goto err; + + s->hdr_block = b; + switch (b->content_type) { + case MAPPED_SLICE: + case UNMAPPED_SLICE: + if (!(s->hdr = cram_decode_slice_header(fd, b))) + goto err; + break; + + default: + hts_log_error("Unexpected block of type %s", + cram_content_type2str(b->content_type)); + goto err; + } + + if (s->hdr->num_blocks < 1) { + hts_log_error("Slice does not include any data blocks"); + goto err; + } + + s->block = calloc(n = s->hdr->num_blocks, sizeof(*s->block)); + if (!s->block) + goto err; + + for (max_id = i = 0, min_id = INT_MAX; i < n; i++) { + if (!(s->block[i] = cram_read_block(fd))) + goto err; + + if (s->block[i]->content_type == EXTERNAL) { + if (max_id < s->block[i]->content_id) + max_id = s->block[i]->content_id; + if (min_id > s->block[i]->content_id) + min_id = s->block[i]->content_id; + } + } + + if (!(s->block_by_id = calloc(512, sizeof(s->block[0])))) + goto err; + + for (i = 0; i < n; i++) { + if (s->block[i]->content_type != EXTERNAL) + continue; + uint32_t v = s->block[i]->content_id; + if (v >= 256) + v = 256 + v % 251; + s->block_by_id[v] = s->block[i]; + } + + /* Initialise encoding/decoding tables */ + s->cigar_alloc = 1024; + if (!(s->cigar = malloc(s->cigar_alloc * sizeof(*s->cigar)))) goto err; + s->ncigar = 0; + + if (!(s->seqs_blk = cram_new_block(EXTERNAL, 0))) goto err; + if (!(s->qual_blk = cram_new_block(EXTERNAL, DS_QS))) goto err; + if (!(s->name_blk = cram_new_block(EXTERNAL, DS_RN))) goto err; + if (!(s->aux_blk = cram_new_block(EXTERNAL, DS_aux))) goto err; + if (!(s->base_blk = cram_new_block(EXTERNAL, DS_IN))) goto err; + if (!(s->soft_blk = cram_new_block(EXTERNAL, DS_SC))) goto err; + + s->crecs = NULL; + + s->last_apos = s->hdr->ref_seq_start; + s->decode_md = fd->decode_md; + + return s; + + err: + if (b) + cram_free_block(b); + if (s) { + s->hdr_block = NULL; + cram_free_slice(s); + } + return NULL; +} + + +/* ---------------------------------------------------------------------- + * CRAM file definition (header) + */ + +/* + * Reads a CRAM file definition structure. + * Returns file_def ptr on success + * NULL on failure + */ +cram_file_def *cram_read_file_def(cram_fd *fd) { + cram_file_def *def = malloc(sizeof(*def)); + if (!def) + return NULL; + + if (26 != hread(fd->fp, &def->magic[0], 26)) { + free(def); + return NULL; + } + + if (memcmp(def->magic, "CRAM", 4) != 0) { + free(def); + return NULL; + } + + if (def->major_version > 4) { + hts_log_error("CRAM version number mismatch. Expected 1.x, 2.x, 3.x or 4.x, got %d.%d", + def->major_version, def->minor_version); + free(def); + return NULL; + } + + fd->first_container += 26; + fd->curr_position = fd->first_container; + fd->last_slice = 0; + + return def; +} + +/* + * Writes a cram_file_def structure to cram_fd. + * Returns 0 on success + * -1 on failure + */ +int cram_write_file_def(cram_fd *fd, cram_file_def *def) { + return (hwrite(fd->fp, &def->magic[0], 26) == 26) ? 0 : -1; +} + +void cram_free_file_def(cram_file_def *def) { + if (def) free(def); +} + +/* ---------------------------------------------------------------------- + * SAM header I/O + */ + + +/* + * Reads the SAM header from the first CRAM data block. + * Also performs minimal parsing to extract read-group + * and sample information. + + * Returns SAM hdr ptr on success + * NULL on failure + */ +sam_hdr_t *cram_read_SAM_hdr(cram_fd *fd) { + int32_t header_len; + char *header; + sam_hdr_t *hdr; + + /* 1.1 onwards stores the header in the first block of a container */ + if (CRAM_MAJOR_VERS(fd->version) == 1) { + /* Length */ + if (-1 == int32_decode(fd, &header_len)) + return NULL; + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (header_len > FUZZ_ALLOC_LIMIT) + return NULL; +#endif + + /* Alloc and read */ + if (header_len < 0 || NULL == (header = malloc((size_t) header_len+1))) + return NULL; + + if (header_len != hread(fd->fp, header, header_len)) { + free(header); + return NULL; + } + header[header_len] = '\0'; + + fd->first_container += 4 + header_len; + } else { + cram_container *c = cram_read_container(fd); + cram_block *b; + int i; + int64_t len; + + if (!c) + return NULL; + + fd->first_container += c->length + c->offset; + fd->curr_position = fd->first_container; + + if (c->num_blocks < 1) { + cram_free_container(c); + return NULL; + } + + if (!(b = cram_read_block(fd))) { + cram_free_container(c); + return NULL; + } + if (cram_uncompress_block(b) != 0) { + cram_free_container(c); + cram_free_block(b); + return NULL; + } + + len = b->comp_size + 2 + 4*(CRAM_MAJOR_VERS(fd->version) >= 3) + + fd->vv.varint_size(b->content_id) + + fd->vv.varint_size(b->uncomp_size) + + fd->vv.varint_size(b->comp_size); + + /* Extract header from 1st block */ + if (-1 == int32_get_blk(b, &header_len) || + header_len < 0 || /* Spec. says signed... why? */ + b->uncomp_size - 4 < header_len) { + cram_free_container(c); + cram_free_block(b); + return NULL; + } + if (NULL == (header = malloc((size_t) header_len+1))) { + cram_free_container(c); + cram_free_block(b); + return NULL; + } + memcpy(header, BLOCK_END(b), header_len); + header[header_len] = '\0'; + cram_free_block(b); + + /* Consume any remaining blocks */ + for (i = 1; i < c->num_blocks; i++) { + if (!(b = cram_read_block(fd))) { + cram_free_container(c); + free(header); + return NULL; + } + len += b->comp_size + 2 + 4*(CRAM_MAJOR_VERS(fd->version) >= 3) + + fd->vv.varint_size(b->content_id) + + fd->vv.varint_size(b->uncomp_size) + + fd->vv.varint_size(b->comp_size); + cram_free_block(b); + } + + if (c->length > 0 && len > 0 && c->length > len) { + // Consume padding + char *pads = malloc(c->length - len); + if (!pads) { + cram_free_container(c); + free(header); + return NULL; + } + + if (c->length - len != hread(fd->fp, pads, c->length - len)) { + cram_free_container(c); + free(header); + free(pads); + return NULL; + } + free(pads); + } + + cram_free_container(c); + } + + /* Parse */ + hdr = sam_hdr_init(); + if (!hdr) { + free(header); + return NULL; + } + + if (-1 == sam_hdr_add_lines(hdr, header, header_len)) { + free(header); + sam_hdr_destroy(hdr); + return NULL; + } + + hdr->l_text = header_len; + hdr->text = header; + + return hdr; + +} + +/* + * Converts 'in' to a full pathname to store in out. + * Out must be at least PATH_MAX bytes long. + */ +static void full_path(char *out, char *in) { + size_t in_l = strlen(in); + if (hisremote(in)) { + if (in_l > PATH_MAX) { + hts_log_error("Reference path is longer than %d", PATH_MAX); + return; + } + strncpy(out, in, PATH_MAX-1); + out[PATH_MAX-1] = 0; + return; + } + if (*in == '/' || + // Windows paths + (in_l > 3 && toupper_c(*in) >= 'A' && toupper_c(*in) <= 'Z' && + in[1] == ':' && (in[2] == '/' || in[2] == '\\'))) { + strncpy(out, in, PATH_MAX-1); + out[PATH_MAX-1] = 0; + } else { + size_t len; + + // unable to get dir or out+in is too long + if (!getcwd(out, PATH_MAX) || + (len = strlen(out))+1+strlen(in) >= PATH_MAX) { + strncpy(out, in, PATH_MAX-1); + out[PATH_MAX-1] = 0; + return; + } + + snprintf(out+len, PATH_MAX - len, "/%s", in); + + // FIXME: cope with `pwd`/../../../foo.fa ? + } +} + +/* + * Writes a CRAM SAM header. + * Returns 0 on success + * -1 on failure + */ +int cram_write_SAM_hdr(cram_fd *fd, sam_hdr_t *hdr) { + size_t header_len; + int blank_block = (CRAM_MAJOR_VERS(fd->version) >= 3); + + /* Write CRAM MAGIC if not yet written. */ + if (fd->file_def->major_version == 0) { + fd->file_def->major_version = CRAM_MAJOR_VERS(fd->version); + fd->file_def->minor_version = CRAM_MINOR_VERS(fd->version); + if (0 != cram_write_file_def(fd, fd->file_def)) + return -1; + } + + /* 1.0 requires an UNKNOWN read-group */ + if (CRAM_MAJOR_VERS(fd->version) == 1) { + if (!sam_hrecs_find_rg(hdr->hrecs, "UNKNOWN")) + if (sam_hdr_add_line(hdr, "RG", + "ID", "UNKNOWN", "SM", "UNKNOWN", NULL)) + return -1; + } + + if (-1 == refs_from_header(fd)) + return -1; + if (-1 == refs2id(fd->refs, fd->header)) + return -1; + + /* Fix M5 strings */ + if (fd->refs && !fd->no_ref && fd->embed_ref <= 1) { + int i; + for (i = 0; i < hdr->hrecs->nref; i++) { + sam_hrec_type_t *ty; + char *ref; + + if (!(ty = sam_hrecs_find_type_id(hdr->hrecs, "SQ", "SN", hdr->hrecs->ref[i].name))) + return -1; + + if (!sam_hrecs_find_key(ty, "M5", NULL)) { + char unsigned buf[16]; + char buf2[33]; + hts_pos_t rlen; + hts_md5_context *md5; + + if (!fd->refs || + !fd->refs->ref_id || + !fd->refs->ref_id[i]) { + return -1; + } + rlen = fd->refs->ref_id[i]->length; + ref = cram_get_ref(fd, i, 1, rlen); + if (NULL == ref) { + if (fd->embed_ref == -1) { + // auto embed-ref + hts_log_warning("No M5 tags present and could not " + "find reference"); + hts_log_warning("Enabling embed_ref=2 option"); + hts_log_warning("NOTE: the CRAM file will be bigger " + "than using an external reference"); + pthread_mutex_lock(&fd->ref_lock); + fd->embed_ref = 2; + pthread_mutex_unlock(&fd->ref_lock); + break; + } + return -1; + } + rlen = fd->refs->ref_id[i]->length; /* In case it just loaded */ + if (!(md5 = hts_md5_init())) + return -1; + if (HTS_POS_MAX <= ULONG_MAX) { + // Platforms with 64-bit unsigned long update in one go + hts_md5_update(md5, ref, rlen); + } else { + // Those with 32-bit ulong (Windows) may have to loop + // over epic references + hts_pos_t pos = 0; + while (rlen - pos > ULONG_MAX) { + hts_md5_update(md5, ref + pos, ULONG_MAX); + pos += ULONG_MAX; + } + hts_md5_update(md5, ref + pos, (unsigned long)(rlen - pos)); + } + hts_md5_final(buf, md5); + hts_md5_destroy(md5); + cram_ref_decr(fd->refs, i); + + hts_md5_hex(buf2, buf); + fd->refs->ref_id[i]->validated_md5 = 1; + if (sam_hdr_update_line(hdr, "SQ", "SN", hdr->hrecs->ref[i].name, "M5", buf2, NULL)) + return -1; + } + + if (fd->ref_fn) { + char ref_fn[PATH_MAX]; + full_path(ref_fn, fd->ref_fn); + if (sam_hdr_update_line(hdr, "SQ", "SN", hdr->hrecs->ref[i].name, "UR", ref_fn, NULL)) + return -1; + } + } + } + + /* Length */ + header_len = sam_hdr_length(hdr); + if (header_len > INT32_MAX) { + hts_log_error("Header is too long for CRAM format"); + return -1; + } + if (CRAM_MAJOR_VERS(fd->version) == 1) { + if (-1 == int32_encode(fd, header_len)) + return -1; + + /* Text data */ + if (header_len != hwrite(fd->fp, sam_hdr_str(hdr), header_len)) + return -1; + } else { + /* Create block(s) inside a container */ + cram_block *b = cram_new_block(FILE_HEADER, 0); + cram_container *c = cram_new_container(0, 0); + int padded_length; + char *pads; + int is_cram_3 = (CRAM_MAJOR_VERS(fd->version) >= 3); + + if (!b || !c) { + if (b) cram_free_block(b); + if (c) cram_free_container(c); + return -1; + } + + if (int32_put_blk(b, header_len) < 0) + return -1; + if (header_len) + BLOCK_APPEND(b, sam_hdr_str(hdr), header_len); + BLOCK_UPLEN(b); + + // Compress header block if V3.0 and above + if (CRAM_MAJOR_VERS(fd->version) >= 3) + if (cram_compress_block(fd, b, NULL, -1, -1) < 0) + return -1; + + if (blank_block) { + c->length = b->comp_size + 2 + 4*is_cram_3 + + fd->vv.varint_size(b->content_id) + + fd->vv.varint_size(b->uncomp_size) + + fd->vv.varint_size(b->comp_size); + + c->num_blocks = 2; + c->num_landmarks = 2; + if (!(c->landmark = malloc(2*sizeof(*c->landmark)))) { + cram_free_block(b); + cram_free_container(c); + return -1; + } + c->landmark[0] = 0; + c->landmark[1] = c->length; + + // Plus extra storage for uncompressed secondary blank block + padded_length = MIN(c->length*.5, 10000); + c->length += padded_length + 2 + 4*is_cram_3 + + fd->vv.varint_size(b->content_id) + + fd->vv.varint_size(padded_length)*2; + } else { + // Pad the block instead. + c->num_blocks = 1; + c->num_landmarks = 1; + if (!(c->landmark = malloc(sizeof(*c->landmark)))) + return -1; + c->landmark[0] = 0; + + padded_length = MAX(c->length*1.5, 10000) - c->length; + + c->length = b->comp_size + padded_length + + 2 + 4*is_cram_3 + + fd->vv.varint_size(b->content_id) + + fd->vv.varint_size(b->uncomp_size) + + fd->vv.varint_size(b->comp_size); + + if (NULL == (pads = calloc(1, padded_length))) { + cram_free_block(b); + cram_free_container(c); + return -1; + } + BLOCK_APPEND(b, pads, padded_length); + BLOCK_UPLEN(b); + free(pads); + } + + if (-1 == cram_write_container(fd, c)) { + cram_free_block(b); + cram_free_container(c); + return -1; + } + + if (-1 == cram_write_block(fd, b)) { + cram_free_block(b); + cram_free_container(c); + return -1; + } + + if (blank_block) { + BLOCK_RESIZE(b, padded_length); + memset(BLOCK_DATA(b), 0, padded_length); + BLOCK_SIZE(b) = padded_length; + BLOCK_UPLEN(b); + b->method = RAW; + if (-1 == cram_write_block(fd, b)) { + cram_free_block(b); + cram_free_container(c); + return -1; + } + } + + cram_free_block(b); + cram_free_container(c); + } + + if (0 != hflush(fd->fp)) + return -1; + + RP("=== Finishing saving header ===\n"); + + return 0; + + block_err: + return -1; +} + +/* ---------------------------------------------------------------------- + * The top-level cram opening, closing and option handling + */ + +/* + * Sets CRAM variable sized integer decode function tables. + * CRAM 1, 2, and 3.x all used ITF8 for uint32 and UTF8 for uint64. + * CRAM 4.x uses the same encoding mechanism for 32-bit and 64-bit + * (or anything inbetween), but also now supports signed values. + * + * Version is the CRAM major version number. + * vv is the vector table (probably &cram_fd->vv) + */ +static void cram_init_varint(varint_vec *vv, int version) { + if (version >= 4) { + vv->varint_get32 = uint7_get_32; // FIXME: varint.h API should be size agnostic + vv->varint_get32s = sint7_get_32; + vv->varint_get64 = uint7_get_64; + vv->varint_get64s = sint7_get_64; + vv->varint_put32 = uint7_put_32; + vv->varint_put32s = sint7_put_32; + vv->varint_put64 = uint7_put_64; + vv->varint_put64s = sint7_put_64; + vv->varint_put32_blk = uint7_put_blk_32; + vv->varint_put32s_blk = sint7_put_blk_32; + vv->varint_put64_blk = uint7_put_blk_64; + vv->varint_put64s_blk = sint7_put_blk_64; + vv->varint_size = uint7_size; + vv->varint_decode32_crc = uint7_decode_crc32; + vv->varint_decode32s_crc = sint7_decode_crc32; + vv->varint_decode64_crc = uint7_decode_crc64; + } else { + vv->varint_get32 = safe_itf8_get; + vv->varint_get32s = safe_itf8_get; + vv->varint_get64 = safe_ltf8_get; + vv->varint_get64s = safe_ltf8_get; + vv->varint_put32 = safe_itf8_put; + vv->varint_put32s = safe_itf8_put; + vv->varint_put64 = safe_ltf8_put; + vv->varint_put64s = safe_ltf8_put; + vv->varint_put32_blk = itf8_put_blk; + vv->varint_put32s_blk = itf8_put_blk; + vv->varint_put64_blk = ltf8_put_blk; + vv->varint_put64s_blk = ltf8_put_blk; + vv->varint_size = itf8_size; + vv->varint_decode32_crc = itf8_decode_crc; + vv->varint_decode32s_crc = itf8_decode_crc; + vv->varint_decode64_crc = ltf8_decode_crc; + } +} + +/* + * Initialises the lookup tables. These could be global statics, but they're + * clumsy to setup in a multi-threaded environment unless we generate + * verbatim code and include that. + */ +static void cram_init_tables(cram_fd *fd) { + int i; + + memset(fd->L1, 4, 256); + fd->L1['A'] = 0; fd->L1['a'] = 0; + fd->L1['C'] = 1; fd->L1['c'] = 1; + fd->L1['G'] = 2; fd->L1['g'] = 2; + fd->L1['T'] = 3; fd->L1['t'] = 3; + + memset(fd->L2, 5, 256); + fd->L2['A'] = 0; fd->L2['a'] = 0; + fd->L2['C'] = 1; fd->L2['c'] = 1; + fd->L2['G'] = 2; fd->L2['g'] = 2; + fd->L2['T'] = 3; fd->L2['t'] = 3; + fd->L2['N'] = 4; fd->L2['n'] = 4; + + if (CRAM_MAJOR_VERS(fd->version) == 1) { + for (i = 0; i < 0x200; i++) { + int f = 0; + + if (i & CRAM_FPAIRED) f |= BAM_FPAIRED; + if (i & CRAM_FPROPER_PAIR) f |= BAM_FPROPER_PAIR; + if (i & CRAM_FUNMAP) f |= BAM_FUNMAP; + if (i & CRAM_FREVERSE) f |= BAM_FREVERSE; + if (i & CRAM_FREAD1) f |= BAM_FREAD1; + if (i & CRAM_FREAD2) f |= BAM_FREAD2; + if (i & CRAM_FSECONDARY) f |= BAM_FSECONDARY; + if (i & CRAM_FQCFAIL) f |= BAM_FQCFAIL; + if (i & CRAM_FDUP) f |= BAM_FDUP; + + fd->bam_flag_swap[i] = f; + } + + for (i = 0; i < 0x1000; i++) { + int g = 0; + + if (i & BAM_FPAIRED) g |= CRAM_FPAIRED; + if (i & BAM_FPROPER_PAIR) g |= CRAM_FPROPER_PAIR; + if (i & BAM_FUNMAP) g |= CRAM_FUNMAP; + if (i & BAM_FREVERSE) g |= CRAM_FREVERSE; + if (i & BAM_FREAD1) g |= CRAM_FREAD1; + if (i & BAM_FREAD2) g |= CRAM_FREAD2; + if (i & BAM_FSECONDARY) g |= CRAM_FSECONDARY; + if (i & BAM_FQCFAIL) g |= CRAM_FQCFAIL; + if (i & BAM_FDUP) g |= CRAM_FDUP; + + fd->cram_flag_swap[i] = g; + } + } else { + /* NOP */ + for (i = 0; i < 0x1000; i++) + fd->bam_flag_swap[i] = i; + for (i = 0; i < 0x1000; i++) + fd->cram_flag_swap[i] = i; + } + + memset(fd->cram_sub_matrix, 4, 32*32); + for (i = 0; i < 32; i++) { + fd->cram_sub_matrix[i]['A'&0x1f]=0; + fd->cram_sub_matrix[i]['C'&0x1f]=1; + fd->cram_sub_matrix[i]['G'&0x1f]=2; + fd->cram_sub_matrix[i]['T'&0x1f]=3; + fd->cram_sub_matrix[i]['N'&0x1f]=4; + } + for (i = 0; i < 20; i+=4) { + int j; + for (j = 0; j < 20; j++) { + fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][j]=3; + fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][j]=3; + fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][j]=3; + fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][j]=3; + } + fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][CRAM_SUBST_MATRIX[i+0]&0x1f]=0; + fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][CRAM_SUBST_MATRIX[i+1]&0x1f]=1; + fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][CRAM_SUBST_MATRIX[i+2]&0x1f]=2; + fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][CRAM_SUBST_MATRIX[i+3]&0x1f]=3; + } + + cram_init_varint(&fd->vv, CRAM_MAJOR_VERS(fd->version)); +} + +// Default version numbers for CRAM +static int major_version = 3; +static int minor_version = 0; + +/* + * Opens a CRAM file for read (mode "rb") or write ("wb"). + * The filename may be "-" to indicate stdin or stdout. + * + * Returns file handle on success + * NULL on failure. + */ +cram_fd *cram_open(const char *filename, const char *mode) { + hFILE *fp; + cram_fd *fd; + char fmode[3]= { mode[0], '\0', '\0' }; + + if (strlen(mode) > 1 && (mode[1] == 'b' || mode[1] == 'c')) { + fmode[1] = 'b'; + } + + fp = hopen(filename, fmode); + if (!fp) + return NULL; + + fd = cram_dopen(fp, filename, mode); + if (!fd) + hclose_abruptly(fp); + + return fd; +} + +/* Opens an existing stream for reading or writing. + * + * Returns file handle on success; + * NULL on failure. + */ +cram_fd *cram_dopen(hFILE *fp, const char *filename, const char *mode) { + int i; + char *cp; + cram_fd *fd = calloc(1, sizeof(*fd)); + if (!fd) + return NULL; + + fd->level = CRAM_DEFAULT_LEVEL; + for (i = 0; mode[i]; i++) { + if (mode[i] >= '0' && mode[i] <= '9') { + fd->level = mode[i] - '0'; + break; + } + } + + fd->fp = fp; + fd->mode = *mode; + fd->first_container = 0; + fd->curr_position = 0; + + if (fd->mode == 'r') { + /* Reader */ + + if (!(fd->file_def = cram_read_file_def(fd))) + goto err; + + fd->version = fd->file_def->major_version * 256 + + fd->file_def->minor_version; + + cram_init_tables(fd); + + if (!(fd->header = cram_read_SAM_hdr(fd))) { + cram_free_file_def(fd->file_def); + goto err; + } + + } else { + /* Writer */ + cram_file_def *def = calloc(1, sizeof(*def)); + if (!def) + return NULL; + + fd->file_def = def; + + def->magic[0] = 'C'; + def->magic[1] = 'R'; + def->magic[2] = 'A'; + def->magic[3] = 'M'; + def->major_version = 0; // Indicator to write file def later. + def->minor_version = 0; + memset(def->file_id, 0, 20); + strncpy(def->file_id, filename, 20); + + fd->version = major_version * 256 + minor_version; + cram_init_tables(fd); + + /* SAM header written later along with this file_def */ + } + + fd->prefix = strdup((cp = strrchr(filename, '/')) ? cp+1 : filename); + if (!fd->prefix) + goto err; + fd->first_base = fd->last_base = -1; + fd->record_counter = 0; + + fd->ctr = NULL; + fd->ctr_mt = NULL; + fd->refs = refs_create(); + if (!fd->refs) + goto err; + fd->ref_id = -2; + fd->ref = NULL; + + fd->decode_md = 0; + fd->seqs_per_slice = SEQS_PER_SLICE; + fd->bases_per_slice = BASES_PER_SLICE; + fd->slices_per_container = SLICE_PER_CNT; + fd->embed_ref = -1; // automatic selection + fd->no_ref = 0; + fd->no_ref_counter = 0; + fd->ap_delta = 0; + fd->ignore_md5 = 0; + fd->lossy_read_names = 0; + fd->use_bz2 = 0; + fd->use_rans = (CRAM_MAJOR_VERS(fd->version) >= 3); + fd->use_tok = (CRAM_MAJOR_VERS(fd->version) >= 3) && (CRAM_MINOR_VERS(fd->version) >= 1); + fd->use_lzma = 0; + fd->multi_seq = -1; + fd->multi_seq_user = -1; + fd->unsorted = 0; + fd->shared_ref = 0; + fd->store_md = 0; + fd->store_nm = 0; + fd->last_RI_count = 0; + + fd->index = NULL; + fd->own_pool = 0; + fd->pool = NULL; + fd->rqueue = NULL; + fd->job_pending = NULL; + fd->ooc = 0; + fd->required_fields = INT_MAX; + + pthread_mutex_init(&fd->metrics_lock, NULL); + pthread_mutex_init(&fd->ref_lock, NULL); + pthread_mutex_init(&fd->range_lock, NULL); + pthread_mutex_init(&fd->bam_list_lock, NULL); + + for (i = 0; i < DS_END; i++) { + fd->m[i] = cram_new_metrics(); + if (!fd->m[i]) + goto err; + } + + if (!(fd->tags_used = kh_init(m_metrics))) + goto err; + + fd->range.refid = -2; // no ref. + fd->eof = 1; // See samtools issue #150 + fd->ref_fn = NULL; + + fd->bl = NULL; + + /* Initialise dummy refs from the @SQ headers */ + if (-1 == refs_from_header(fd)) + goto err; + + return fd; + + err: + if (fd) + free(fd); + + return NULL; +} + +/* + * Seek within a CRAM file. + * + * Returns 0 on success + * -1 on failure + */ +int cram_seek(cram_fd *fd, off_t offset, int whence) { + char buf[65536]; + + fd->ooc = 0; + + cram_drain_rqueue(fd); + + if (hseek(fd->fp, offset, whence) >= 0) { + return 0; + } + + if (!(whence == SEEK_CUR && offset >= 0)) + return -1; + + /* Couldn't fseek, but we're in SEEK_CUR mode so read instead */ + while (offset > 0) { + int len = MIN(65536, offset); + if (len != hread(fd->fp, buf, len)) + return -1; + offset -= len; + } + + return 0; +} + +/* + * Flushes a CRAM file. + * Useful for when writing to stdout without wishing to close the stream. + * + * Returns 0 on success + * -1 on failure + */ +int cram_flush(cram_fd *fd) { + if (!fd) + return -1; + + int ret = 0; + + if (fd->mode == 'w' && fd->ctr) { + if(fd->ctr->slice) + cram_update_curr_slice(fd->ctr, fd->version); + + if (-1 == cram_flush_container_mt(fd, fd->ctr)) + ret = -1; + + cram_free_container(fd->ctr); + if (fd->ctr_mt == fd->ctr) + fd->ctr_mt = NULL; + fd->ctr = NULL; + } + + return ret; +} + +/* + * Writes an EOF block to a CRAM file. + * + * Returns 0 on success + * -1 on failure + */ +int cram_write_eof_block(cram_fd *fd) { + // EOF block is a container with special values to aid detection + if (CRAM_MAJOR_VERS(fd->version) >= 2) { + // Empty container with + // ref_seq_id -1 + // start pos 0x454f46 ("EOF") + // span 0 + // nrec 0 + // counter 0 + // nbases 0 + // 1 block (landmark 0) + // (CRC32) + cram_container c; + memset(&c, 0, sizeof(c)); + c.ref_seq_id = -1; + c.ref_seq_start = 0x454f46; // "EOF" + c.ref_seq_span = 0; + c.record_counter = 0; + c.num_bases = 0; + c.num_blocks = 1; + int32_t land[1] = {0}; + c.landmark = land; + + // An empty compression header block with + // method raw (0) + // type comp header (1) + // content id 0 + // block contents size 6 + // raw size 6 + // empty preservation map (01 00) + // empty data series map (01 00) + // empty tag map (01 00) + // block CRC + cram_block_compression_hdr ch; + memset(&ch, 0, sizeof(ch)); + c.comp_hdr_block = cram_encode_compression_header(fd, &c, &ch, 0); + + c.length = c.comp_hdr_block->byte // Landmark[0] + + 5 // block struct + + 4*(CRAM_MAJOR_VERS(fd->version) >= 3); // CRC + if (cram_write_container(fd, &c) < 0 || + cram_write_block(fd, c.comp_hdr_block) < 0) { + cram_close(fd); + cram_free_block(c.comp_hdr_block); + return -1; + } + if (ch.preservation_map) + kh_destroy(map, ch.preservation_map); + cram_free_block(c.comp_hdr_block); + + // V2.1 bytes + // 0b 00 00 00 ff ff ff ff 0f // Cont HDR: size, ref seq id + // e0 45 4f 46 00 00 00 // Cont HDR: pos, span, nrec, counter + // 00 01 00 // Cont HDR: nbase, nblk, landmark + // 00 01 00 06 06 // Comp.HDR blk + // 01 00 01 00 01 00 // Comp.HDR blk + + // V3.0 bytes: + // 0f 00 00 00 ff ff ff ff 0f // Cont HDR: size, ref seq id + // e0 45 4f 46 00 00 00 // Cont HDR: pos, span, nrec, counter + // 00 01 00 // Cont HDR: nbase, nblk, landmark + // 05 bd d9 4f // CRC32 + // 00 01 00 06 06 // Comp.HDR blk + // 01 00 01 00 01 00 // Comp.HDR blk + // ee 63 01 4b // CRC32 + + // V4.0 bytes: + // 0f 00 00 00 8f ff ff ff // Cont HDR: size, ref seq id + // 82 95 9e 46 00 00 00 // Cont HDR: pos, span, nrec, counter + // 00 01 00 // Cont HDR: nbase, nblk, landmark + // ac d6 05 bc // CRC32 + // 00 01 00 06 06 // Comp.HDR blk + // 01 00 01 00 01 00 // Comp.HDR blk + // ee 63 01 4b // CRC32 + } + + return 0; +} + +/* + * Closes a CRAM file. + * Returns 0 on success + * -1 on failure + */ +int cram_close(cram_fd *fd) { + spare_bams *bl, *next; + int i, ret = 0; + + if (!fd) + return -1; + + if (fd->mode == 'w' && fd->ctr) { + if(fd->ctr->slice) + cram_update_curr_slice(fd->ctr, fd->version); + + if (-1 == cram_flush_container_mt(fd, fd->ctr)) + ret = -1; + } + + if (fd->mode != 'w') + cram_drain_rqueue(fd); + + if (fd->pool && fd->eof >= 0 && fd->rqueue) { + hts_tpool_process_flush(fd->rqueue); + + if (0 != cram_flush_result(fd)) + ret = -1; + + if (fd->mode == 'w') + fd->ctr = NULL; // prevent double freeing + + //fprintf(stderr, "CRAM: destroy queue %p\n", fd->rqueue); + + hts_tpool_process_destroy(fd->rqueue); + } + + pthread_mutex_destroy(&fd->metrics_lock); + pthread_mutex_destroy(&fd->ref_lock); + pthread_mutex_destroy(&fd->range_lock); + pthread_mutex_destroy(&fd->bam_list_lock); + + if (ret == 0 && fd->mode == 'w') { + /* Write EOF block */ + if (0 != cram_write_eof_block(fd)) + ret = -1; + } + + for (bl = fd->bl; bl; bl = next) { + int max_rec = fd->seqs_per_slice * fd->slices_per_container; + + next = bl->next; + free_bam_list(bl->bams, max_rec); + free(bl); + } + + if (hclose(fd->fp) != 0) + ret = -1; + + if (fd->file_def) + cram_free_file_def(fd->file_def); + + if (fd->header) + sam_hdr_destroy(fd->header); + + free(fd->prefix); + + if (fd->ctr) + cram_free_container(fd->ctr); + + if (fd->ctr_mt && fd->ctr_mt != fd->ctr) + cram_free_container(fd->ctr_mt); + + if (fd->refs) + refs_free(fd->refs); + if (fd->ref_free) + free(fd->ref_free); + + for (i = 0; i < DS_END; i++) + if (fd->m[i]) + free(fd->m[i]); + + if (fd->tags_used) { + khint_t k; + + for (k = kh_begin(fd->tags_used); k != kh_end(fd->tags_used); k++) { + if (kh_exist(fd->tags_used, k)) + free(kh_val(fd->tags_used, k)); + } + + kh_destroy(m_metrics, fd->tags_used); + } + + if (fd->index) + cram_index_free(fd); + + if (fd->own_pool && fd->pool) + hts_tpool_destroy(fd->pool); + + if (fd->idxfp) + if (bgzf_close(fd->idxfp) < 0) + ret = -1; + + free(fd); + + return ret; +} + +/* + * Returns 1 if we hit an EOF while reading. + */ +int cram_eof(cram_fd *fd) { + return fd->eof; +} + + +/* + * Sets options on the cram_fd. See CRAM_OPT_* definitions in cram_structs.h. + * Use this immediately after opening. + * + * Returns 0 on success + * -1 on failure + */ +int cram_set_option(cram_fd *fd, enum hts_fmt_option opt, ...) { + int r; + va_list args; + + va_start(args, opt); + r = cram_set_voption(fd, opt, args); + va_end(args); + + return r; +} + +/* + * Sets options on the cram_fd. See CRAM_OPT_* definitions in cram_structs.h. + * Use this immediately after opening. + * + * Returns 0 on success + * -1 on failure + */ +int cram_set_voption(cram_fd *fd, enum hts_fmt_option opt, va_list args) { + refs_t *refs; + + if (!fd) { + errno = EBADF; + return -1; + } + + switch (opt) { + case CRAM_OPT_DECODE_MD: + fd->decode_md = va_arg(args, int); + break; + + case CRAM_OPT_PREFIX: + if (fd->prefix) + free(fd->prefix); + if (!(fd->prefix = strdup(va_arg(args, char *)))) + return -1; + break; + + case CRAM_OPT_VERBOSITY: + break; + + case CRAM_OPT_SEQS_PER_SLICE: + fd->seqs_per_slice = va_arg(args, int); + if (fd->bases_per_slice == BASES_PER_SLICE) + fd->bases_per_slice = fd->seqs_per_slice * 500; + break; + + case CRAM_OPT_BASES_PER_SLICE: + fd->bases_per_slice = va_arg(args, int); + break; + + case CRAM_OPT_SLICES_PER_CONTAINER: + fd->slices_per_container = va_arg(args, int); + break; + + case CRAM_OPT_EMBED_REF: + fd->embed_ref = va_arg(args, int); + break; + + case CRAM_OPT_NO_REF: + fd->no_ref = va_arg(args, int); + break; + + case CRAM_OPT_POS_DELTA: + fd->ap_delta = va_arg(args, int); + break; + + case CRAM_OPT_IGNORE_MD5: + fd->ignore_md5 = va_arg(args, int); + break; + + case CRAM_OPT_LOSSY_NAMES: + fd->lossy_read_names = va_arg(args, int); + // Currently lossy read names required paired (attached) reads. + // TLEN 0 or being 1 out causes read pairs to be detached, breaking + // the lossy read name compression, so we have extra options to + // slacken the exact TLEN round-trip checks. + fd->tlen_approx = fd->lossy_read_names; + fd->tlen_zero = fd->lossy_read_names; + break; + + case CRAM_OPT_USE_BZIP2: + fd->use_bz2 = va_arg(args, int); + break; + + case CRAM_OPT_USE_RANS: + fd->use_rans = va_arg(args, int); + break; + + case CRAM_OPT_USE_TOK: + fd->use_tok = va_arg(args, int); + break; + + case CRAM_OPT_USE_FQZ: + fd->use_fqz = va_arg(args, int); + break; + + case CRAM_OPT_USE_ARITH: + fd->use_arith = va_arg(args, int); + break; + + case CRAM_OPT_USE_LZMA: + fd->use_lzma = va_arg(args, int); + break; + + case CRAM_OPT_SHARED_REF: + fd->shared_ref = 1; + refs = va_arg(args, refs_t *); + if (refs != fd->refs) { + if (fd->refs) + refs_free(fd->refs); + fd->refs = refs; + fd->refs->count++; + } + break; + + case CRAM_OPT_RANGE: { + int r = cram_seek_to_refpos(fd, va_arg(args, cram_range *)); + pthread_mutex_lock(&fd->range_lock); + if (fd->range.refid != -2) + fd->required_fields |= SAM_POS; + pthread_mutex_unlock(&fd->range_lock); + return r; + } + + case CRAM_OPT_RANGE_NOSEEK: { + // As per CRAM_OPT_RANGE, but no seeking + pthread_mutex_lock(&fd->range_lock); + cram_range *r = va_arg(args, cram_range *); + fd->range = *r; + if (r->refid == HTS_IDX_NOCOOR) { + fd->range.refid = -1; + fd->range.start = 0; + } else if (r->refid == HTS_IDX_START || r->refid == HTS_IDX_REST) { + fd->range.refid = -2; // special case in cram_next_slice + } + if (fd->range.refid != -2) + fd->required_fields |= SAM_POS; + fd->ooc = 0; + fd->eof = 0; + pthread_mutex_unlock(&fd->range_lock); + return 0; + } + + case CRAM_OPT_REFERENCE: + return cram_load_reference(fd, va_arg(args, char *)); + + case CRAM_OPT_VERSION: { + int major, minor; + char *s = va_arg(args, char *); + if (2 != sscanf(s, "%d.%d", &major, &minor)) { + hts_log_error("Malformed version string %s", s); + return -1; + } + if (!((major == 1 && minor == 0) || + (major == 2 && (minor == 0 || minor == 1)) || + (major == 3 && (minor == 0 || minor == 1)) || + (major == 4 && minor == 0))) { + hts_log_error("Unknown version string; use 1.0, 2.0, 2.1, 3.0, 3.1 or 4.0"); + errno = EINVAL; + return -1; + } + + if (major > 3 || (major == 3 && minor > 1)) { + hts_log_warning( + "CRAM version %s is still a draft and subject to change.\n" + "This is a technology demonstration that should not be " + "used for archival data.", s); + } + + fd->version = major*256 + minor; + + fd->use_rans = (CRAM_MAJOR_VERS(fd->version) >= 3) ? 1 : 0; + + fd->use_tok = ((CRAM_MAJOR_VERS(fd->version) == 3 && + CRAM_MINOR_VERS(fd->version) >= 1) || + CRAM_MAJOR_VERS(fd->version) >= 4) ? 1 : 0; + cram_init_tables(fd); + + break; + } + + case CRAM_OPT_MULTI_SEQ_PER_SLICE: + fd->multi_seq_user = fd->multi_seq = va_arg(args, int); + break; + + case CRAM_OPT_NTHREADS: { + int nthreads = va_arg(args, int); + if (nthreads >= 1) { + if (!(fd->pool = hts_tpool_init(nthreads))) + return -1; + + fd->rqueue = hts_tpool_process_init(fd->pool, nthreads*2, 0); + fd->shared_ref = 1; + fd->own_pool = 1; + } + break; + } + + case CRAM_OPT_THREAD_POOL: { + htsThreadPool *p = va_arg(args, htsThreadPool *); + fd->pool = p ? p->pool : NULL; + if (fd->pool) { + fd->rqueue = hts_tpool_process_init(fd->pool, + p->qsize ? p->qsize : hts_tpool_size(fd->pool)*2, + 0); + } + fd->shared_ref = 1; // Needed to avoid clobbering ref between threads + fd->own_pool = 0; + + //fd->qsize = 1; + //fd->decoded = calloc(fd->qsize, sizeof(cram_container *)); + //hts_tpool_dispatch(fd->pool, cram_decoder_thread, fd); + break; + } + + case CRAM_OPT_REQUIRED_FIELDS: + fd->required_fields = va_arg(args, int); + if (fd->range.refid != -2) + fd->required_fields |= SAM_POS; + break; + + case CRAM_OPT_STORE_MD: + fd->store_md = va_arg(args, int); + break; + + case CRAM_OPT_STORE_NM: + fd->store_nm = va_arg(args, int); + break; + + case HTS_OPT_COMPRESSION_LEVEL: + fd->level = va_arg(args, int); + break; + + case HTS_OPT_PROFILE: { + enum hts_profile_option prof = va_arg(args, int); + switch (prof) { + case HTS_PROFILE_FAST: + if (fd->level == CRAM_DEFAULT_LEVEL) fd->level = 1; + fd->use_tok = 0; + fd->seqs_per_slice = 10000; + break; + + case HTS_PROFILE_NORMAL: + break; + + case HTS_PROFILE_SMALL: + if (fd->level == CRAM_DEFAULT_LEVEL) fd->level = 6; + fd->use_bz2 = 1; + fd->use_fqz = 1; + fd->seqs_per_slice = 25000; + break; + + case HTS_PROFILE_ARCHIVE: + if (fd->level == CRAM_DEFAULT_LEVEL) fd->level = 7; + fd->use_bz2 = 1; + fd->use_fqz = 1; + fd->use_arith = 1; + if (fd->level > 7) + fd->use_lzma = 1; + fd->seqs_per_slice = 100000; + break; + } + + if (fd->bases_per_slice == BASES_PER_SLICE) + fd->bases_per_slice = fd->seqs_per_slice * 500; + break; + } + + default: + hts_log_error("Unknown CRAM option code %d", opt); + errno = EINVAL; + return -1; + } + + return 0; +} + +int cram_check_EOF(cram_fd *fd) +{ + // Byte 9 in these templates is & with 0x0f to resolve differences + // between ITF-8 interpretations between early Java and C + // implementations of CRAM + static const unsigned char TEMPLATE_2_1[30] = { + 0x0b, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x0f, 0xe0, + 0x45, 0x4f, 0x46, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, + 0x01, 0x00, 0x06, 0x06, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00 + }; + static const unsigned char TEMPLATE_3[38] = { + 0x0f, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x0f, 0xe0, + 0x45, 0x4f, 0x46, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x05, + 0xbd, 0xd9, 0x4f, 0x00, 0x01, 0x00, 0x06, 0x06, 0x01, 0x00, + 0x01, 0x00, 0x01, 0x00, 0xee, 0x63, 0x01, 0x4b + }; + + unsigned char buf[38]; // max(sizeof TEMPLATE_*) + + uint8_t major = CRAM_MAJOR_VERS(fd->version); + uint8_t minor = CRAM_MINOR_VERS(fd->version); + + const unsigned char *template; + ssize_t template_len; + if ((major < 2) || + (major == 2 && minor == 0)) { + return 3; // No EOF support in cram versions less than 2.1 + } else if (major == 2 && minor == 1) { + template = TEMPLATE_2_1; + template_len = sizeof TEMPLATE_2_1; + } else { + template = TEMPLATE_3; + template_len = sizeof TEMPLATE_3; + } + + off_t offset = htell(fd->fp); + if (hseek(fd->fp, -template_len, SEEK_END) < 0) { + if (errno == ESPIPE) { + hclearerr(fd->fp); + return 2; + } + else { + return -1; + } + } + if (hread(fd->fp, buf, template_len) != template_len) return -1; + if (hseek(fd->fp, offset, SEEK_SET) < 0) return -1; + buf[8] &= 0x0f; + return (memcmp(template, buf, template_len) == 0)? 1 : 0; +} diff --git a/src/htslib-1.21/cram/cram_io.h b/src/htslib-1.21/cram/cram_io.h new file mode 100644 index 0000000..d2d583d --- /dev/null +++ b/src/htslib-1.21/cram/cram_io.h @@ -0,0 +1,648 @@ +/* +Copyright (c) 2012-2020 Genome Research Ltd. +Author: James Bonfield + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +Institute nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/*! \file + * Include cram.h instead. + * + * This is an internal part of the CRAM system and is automatically included + * when you #include cram.h. + * + * Implements the low level CRAM I/O primitives. + * This includes basic data types such as byte, int, ITF-8, + * maps, bitwise I/O, etc. + */ + +#ifndef CRAM_IO_H +#define CRAM_IO_H + +#include + +#include "misc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/**@{ ---------------------------------------------------------------------- + * ITF8 encoding and decoding. + * + * Also see the itf8_get and itf8_put macros. + */ + +/*! INTERNAL: Converts two characters into an integer for use in switch{} */ +#define CRAM_KEY(a,b) ((((unsigned char) a)<<8)|(((unsigned char) b))) + +/*! Reads an integer in ITF-8 encoding from 'fd' and stores it in + * *val. + * + * @return + * Returns the number of bytes read on success; + * -1 on failure + */ +int itf8_decode(cram_fd *fd, int32_t *val); + +extern const int itf8_bytes[16]; +extern const int ltf8_bytes[256]; + +/*! Pushes a value in ITF8 format onto the end of a block. + * + * This shouldn't be used for high-volume data as it is not the fastest + * method. + * + * @return + * Returns the number of bytes written + */ +int itf8_put_blk(cram_block *blk, int32_t val); +int ltf8_put_blk(cram_block *blk, int64_t val); + +/*! Pulls a literal 32-bit value from a block. + * + * @returns the number of bytes decoded; + * -1 on failure. + */ +int int32_get_blk(cram_block *b, int32_t *val); + +/*! Pushes a literal 32-bit value onto the end of a block. + * + * @return + * Returns 0 on success; + * -1 on failure. + */ +int int32_put_blk(cram_block *blk, int32_t val); + + +/**@}*/ +/**@{ ---------------------------------------------------------------------- + * CRAM blocks - the dynamically growable data block. We have code to + * create, update, (un)compress and read/write. + * + * These are derived from the deflate_interlaced.c blocks, but with the + * CRAM extension of content types and IDs. + */ + +/*! Allocates a new cram_block structure with a specified content_type and + * id. + * + * @return + * Returns block pointer on success; + * NULL on failure + */ +cram_block *cram_new_block(enum cram_content_type content_type, + int content_id); + +/*! Reads a block from a cram file. + * + * @return + * Returns cram_block pointer on success; + * NULL on failure + */ +cram_block *cram_read_block(cram_fd *fd); + +/*! Writes a CRAM block. + * + * @return + * Returns 0 on success; + * -1 on failure + */ +int cram_write_block(cram_fd *fd, cram_block *b); + +/*! Frees a CRAM block, deallocating internal data too. + */ +void cram_free_block(cram_block *b); + +/*! Uncompress a memory block using Zlib. + * + * @return + * Returns 0 on success; + * -1 on failure + */ +char *zlib_mem_inflate(char *cdata, size_t csize, size_t *size); + +/*! Uncompresses a CRAM block, if compressed. + * + * @return + * Returns 0 on success; + * -1 on failure + */ +int cram_uncompress_block(cram_block *b); + +/*! Compresses a block. + * + * Compresses a block using one of two different zlib strategies. If we only + * want one choice set strat2 to be -1. + * + * The logic here is that sometimes Z_RLE does a better job than Z_FILTERED + * or Z_DEFAULT_STRATEGY on quality data. If so, we'd rather use it as it is + * significantly faster. + * + * @return + * Returns 0 on success; + * -1 on failure + */ +int cram_compress_block(cram_fd *fd, cram_block *b, cram_metrics *metrics, + int method, int level); +int cram_compress_block2(cram_fd *fd, cram_slice *s, + cram_block *b, cram_metrics *metrics, + int method, int level); + +cram_metrics *cram_new_metrics(void); +char *cram_block_method2str(enum cram_block_method_int m); +char *cram_content_type2str(enum cram_content_type t); + +/* + * Find an external block by its content_id + */ + +static inline cram_block *cram_get_block_by_id(cram_slice *slice, int id) { + //fprintf(stderr, "%d\t%p\n", id, slice->block_by_id); + uint32_t v = id; + if (slice->block_by_id && v < 256) { + return slice->block_by_id[v]; + } else { + v = 256 + v % 251; + if (slice->block_by_id && + slice->block_by_id[v] && + slice->block_by_id[v]->content_id == id) + return slice->block_by_id[v]; + + // Otherwise a linear search in case of collision + int i; + for (i = 0; i < slice->hdr->num_blocks; i++) { + cram_block *b = slice->block[i]; + if (b && b->content_type == EXTERNAL && b->content_id == id) + return b; + } + } + return NULL; +} + +/* --- Accessor macros for manipulating blocks on a byte by byte basis --- */ + +/* Block size and data pointer. */ +#define BLOCK_SIZE(b) ((b)->byte) +#define BLOCK_DATA(b) ((b)->data) + +/* Returns the address one past the end of the block */ +#define BLOCK_END(b) (&(b)->data[(b)->byte]) + +/* Make block exactly 'l' bytes long */ +static inline int block_resize_exact(cram_block *b, size_t len) { + unsigned char *tmp = realloc(b->data, len); + if (!tmp) + return -1; + b->alloc = len; + b->data = tmp; + return 0; +} + +/* Request block to be at least 'l' bytes long */ +static inline int block_resize(cram_block *b, size_t len) { + if (b->alloc > len) + return 0; + + size_t alloc = b->alloc+800; + alloc = MAX(alloc + (alloc>>2), len); + return block_resize_exact(b, alloc); +} + + +/* Ensure the block can hold at least another 'l' bytes */ +static inline int block_grow(cram_block *b, size_t len) { + return block_resize(b, BLOCK_SIZE(b) + len); +} + +/* Append string 's' of length 'l'. */ +static inline int block_append(cram_block *b, const void *s, size_t len) { + if (block_grow(b, len) < 0) + return -1; + + if (len) { + memcpy(BLOCK_END(b), s, len); + BLOCK_SIZE(b) += len; + } + + return 0; +} + +/* Append as single character 'c' */ +static inline int block_append_char(cram_block *b, char c) { + if (block_grow(b, 1) < 0) + return -1; + + b->data[b->byte++] = c; + return 0; +} + +/* Append a single unsigned integer */ +static inline unsigned char *append_uint32(unsigned char *cp, uint32_t i); +static inline int block_append_uint(cram_block *b, unsigned int i) { + if (block_grow(b, 11) < 0) + return -1; + + unsigned char *cp = &b->data[b->byte]; + b->byte += append_uint32(cp, i) - cp; + return 0; +} + +// Versions of above with built in goto block_err calls. +#define BLOCK_RESIZE_EXACT(b,l) if (block_resize_exact((b),(l))<0) goto block_err +#define BLOCK_RESIZE(b,l) if (block_resize((b),(l)) <0) goto block_err +#define BLOCK_GROW(b,l) if (block_grow((b),(l)) <0) goto block_err +#define BLOCK_APPEND(b,s,l) if (block_append((b),(s),(l)) <0) goto block_err +#define BLOCK_APPEND_CHAR(b,c) if (block_append_char((b),(c)) <0) goto block_err +#define BLOCK_APPEND_UINT(b,i) if (block_append_uint((b),(i)) <0) goto block_err + +static inline unsigned char *append_uint32(unsigned char *cp, uint32_t i) { + uint32_t j; + + if (i == 0) { + *cp++ = '0'; + return cp; + } + + if (i < 100) goto b1; + if (i < 10000) goto b3; + if (i < 1000000) goto b5; + if (i < 100000000) goto b7; + + if ((j = i / 1000000000)) {*cp++ = j + '0'; i -= j*1000000000; goto x8;} + if ((j = i / 100000000)) {*cp++ = j + '0'; i -= j*100000000; goto x7;} + b7:if ((j = i / 10000000)) {*cp++ = j + '0'; i -= j*10000000; goto x6;} + if ((j = i / 1000000)) {*cp++ = j + '0', i -= j*1000000; goto x5;} + b5:if ((j = i / 100000)) {*cp++ = j + '0', i -= j*100000; goto x4;} + if ((j = i / 10000)) {*cp++ = j + '0', i -= j*10000; goto x3;} + b3:if ((j = i / 1000)) {*cp++ = j + '0', i -= j*1000; goto x2;} + if ((j = i / 100)) {*cp++ = j + '0', i -= j*100; goto x1;} + b1:if ((j = i / 10)) {*cp++ = j + '0', i -= j*10; goto x0;} + if (i) *cp++ = i + '0'; + return cp; + + x8: *cp++ = i / 100000000 + '0', i %= 100000000; + x7: *cp++ = i / 10000000 + '0', i %= 10000000; + x6: *cp++ = i / 1000000 + '0', i %= 1000000; + x5: *cp++ = i / 100000 + '0', i %= 100000; + x4: *cp++ = i / 10000 + '0', i %= 10000; + x3: *cp++ = i / 1000 + '0', i %= 1000; + x2: *cp++ = i / 100 + '0', i %= 100; + x1: *cp++ = i / 10 + '0', i %= 10; + x0: *cp++ = i + '0'; + + return cp; +} + +static inline unsigned char *append_sub32(unsigned char *cp, uint32_t i) { + *cp++ = i / 100000000 + '0', i %= 100000000; + *cp++ = i / 10000000 + '0', i %= 10000000; + *cp++ = i / 1000000 + '0', i %= 1000000; + *cp++ = i / 100000 + '0', i %= 100000; + *cp++ = i / 10000 + '0', i %= 10000; + *cp++ = i / 1000 + '0', i %= 1000; + *cp++ = i / 100 + '0', i %= 100; + *cp++ = i / 10 + '0', i %= 10; + *cp++ = i + '0'; + + return cp; +} + +static inline unsigned char *append_uint64(unsigned char *cp, uint64_t i) { + uint64_t j; + + if (i <= 0xffffffff) + return append_uint32(cp, i); + + if ((j = i/1000000000) > 1000000000) { + cp = append_uint32(cp, j/1000000000); + j %= 1000000000; + cp = append_sub32(cp, j); + } else { + cp = append_uint32(cp, i / 1000000000); + } + cp = append_sub32(cp, i % 1000000000); + + return cp; +} + +#define BLOCK_UPLEN(b) \ + (b)->comp_size = (b)->uncomp_size = BLOCK_SIZE((b)) + +/**@}*/ +/**@{ ---------------------------------------------------------------------- + * Reference sequence handling + */ + +/*! Loads a reference set from fn and stores in the cram_fd. + * + * @return + * Returns 0 on success; + * -1 on failure + */ +int cram_load_reference(cram_fd *fd, char *fn); + +/*! Generates a lookup table in refs based on the SQ headers in sam_hdr_t. + * + * Indexes references by the order they appear in a BAM file. This may not + * necessarily be the same order they appear in the fasta reference file. + * + * @return + * Returns 0 on success; + * -1 on failure + */ +int refs2id(refs_t *r, sam_hdr_t *hdr); + +void refs_free(refs_t *r); + +/*! Returns a portion of a reference sequence from start to end inclusive. + * + * The returned pointer is owned by the cram_file fd and should not be freed + * by the caller. It is valid only until the next cram_get_ref is called + * with the same fd parameter (so is thread-safe if given multiple files). + * + * To return the entire reference sequence, specify start as 1 and end + * as 0. + * + * @return + * Returns reference on success; + * NULL on failure + */ +char *cram_get_ref(cram_fd *fd, int id, hts_pos_t start, hts_pos_t end); +void cram_ref_incr(refs_t *r, int id); +void cram_ref_decr(refs_t *r, int id); +/**@}*/ +/**@{ ---------------------------------------------------------------------- + * Containers + */ + +/*! Creates a new container, specifying the maximum number of slices + * and records permitted. + * + * @return + * Returns cram_container ptr on success; + * NULL on failure + */ +cram_container *cram_new_container(int nrec, int nslice); +void cram_free_container(cram_container *c); + +/*! Reads a container header. + * + * @return + * Returns cram_container on success; + * NULL on failure or no container left (fd->err == 0). + */ +cram_container *cram_read_container(cram_fd *fd); + +/*! Writes a container structure. + * + * @return + * Returns 0 on success; + * -1 on failure + */ +int cram_write_container(cram_fd *fd, cram_container *h); + +/*! Flushes a container to disk. + * + * Flushes a completely or partially full container to disk, writing + * container structure, header and blocks. This also calls the encoder + * functions. + * + * @return + * Returns 0 on success; + * -1 on failure + */ +int cram_flush_container(cram_fd *fd, cram_container *c); +int cram_flush_container_mt(cram_fd *fd, cram_container *c); + + +/**@}*/ +/**@{ ---------------------------------------------------------------------- + * Compression headers; the first part of the container + */ + +/*! Creates a new blank container compression header + * + * @return + * Returns header ptr on success; + * NULL on failure + */ +cram_block_compression_hdr *cram_new_compression_header(void); + +/*! Frees a cram_block_compression_hdr */ +void cram_free_compression_header(cram_block_compression_hdr *hdr); + + +/**@}*/ +/**@{ ---------------------------------------------------------------------- + * Slices and slice headers + */ + +/*! Frees a slice header */ +void cram_free_slice_header(cram_block_slice_hdr *hdr); + +/*! Frees a slice */ +void cram_free_slice(cram_slice *s); + +/*! Creates a new empty slice in memory, for subsequent writing to + * disk. + * + * @return + * Returns cram_slice ptr on success; + * NULL on failure + */ +cram_slice *cram_new_slice(enum cram_content_type type, int nrecs); + +/*! Loads an entire slice. + * + * FIXME: In 1.0 the native unit of slices within CRAM is broken + * as slices contain references to objects in other slices. + * To work around this while keeping the slice oriented outer loop + * we read all slices and stitch them together into a fake large + * slice instead. + * + * @return + * Returns cram_slice ptr on success; + * NULL on failure + */ +cram_slice *cram_read_slice(cram_fd *fd); + + + +/**@}*/ +/**@{ ---------------------------------------------------------------------- + * CRAM file definition (header) + */ + +/*! Reads a CRAM file definition structure. + * + * @return + * Returns file_def ptr on success; + * NULL on failure + */ +cram_file_def *cram_read_file_def(cram_fd *fd); + +/*! Writes a cram_file_def structure to cram_fd. + * + * @return + * Returns 0 on success; + * -1 on failure + */ +int cram_write_file_def(cram_fd *fd, cram_file_def *def); + +/*! Frees a cram_file_def structure. */ +void cram_free_file_def(cram_file_def *def); + + +/**@}*/ +/**@{ ---------------------------------------------------------------------- + * SAM header I/O + */ + +/*! Reads the SAM header from the first CRAM data block. + * + * Also performs minimal parsing to extract read-group + * and sample information. + * + * @return + * Returns SAM hdr ptr on success; + * NULL on failure + */ +sam_hdr_t *cram_read_SAM_hdr(cram_fd *fd); + +/*! Writes a CRAM SAM header. + * + * @return + * Returns 0 on success; + * -1 on failure + */ +int cram_write_SAM_hdr(cram_fd *fd, sam_hdr_t *hdr); + + +/**@}*/ +/**@{ ---------------------------------------------------------------------- + * The top-level cram opening, closing and option handling + */ + +/*! Opens a CRAM file for read (mode "rb") or write ("wb"). + * + * The filename may be "-" to indicate stdin or stdout. + * + * @return + * Returns file handle on success; + * NULL on failure. + */ +cram_fd *cram_open(const char *filename, const char *mode); + +/*! Opens an existing stream for reading or writing. + * + * @return + * Returns file handle on success; + * NULL on failure. + */ +cram_fd *cram_dopen(struct hFILE *fp, const char *filename, const char *mode); + +/*! Closes a CRAM file. + * + * @return + * Returns 0 on success; + * -1 on failure + */ +int cram_close(cram_fd *fd); + +/* + * Seek within a CRAM file. + * + * Returns 0 on success + * -1 on failure + */ +int cram_seek(cram_fd *fd, off_t offset, int whence); + +/* + * Flushes a CRAM file. + * Useful for when writing to stdout without wishing to close the stream. + * + * Returns 0 on success + * -1 on failure + */ +int cram_flush(cram_fd *fd); + +/*! Checks for end of file on a cram_fd stream. + * + * @return + * Returns 0 if not at end of file + * 1 if we hit an expected EOF (end of range or EOF block) + * 2 for other EOF (end of stream without EOF block) + */ +int cram_eof(cram_fd *fd); + +/*! Sets options on the cram_fd. + * + * See CRAM_OPT_* definitions in cram_structs.h. + * Use this immediately after opening. + * + * @return + * Returns 0 on success; + * -1 on failure + */ +int cram_set_option(cram_fd *fd, enum hts_fmt_option opt, ...); + +/*! Sets options on the cram_fd. + * + * See CRAM_OPT_* definitions in cram_structs.h. + * Use this immediately after opening. + * + * @return + * Returns 0 on success; + * -1 on failure + */ +int cram_set_voption(cram_fd *fd, enum hts_fmt_option opt, va_list args); + +/*! + * Attaches a header to a cram_fd. + * + * This should be used when creating a new cram_fd for writing where + * we have an sam_hdr_t already constructed (eg from a file we've read + * in). + * + * @return + * Returns 0 on success; + * -1 on failure + */ +int cram_set_header2(cram_fd *fd, const sam_hdr_t *hdr); + +/*! + * Returns the hFILE connected to a cram_fd. + */ +static inline struct hFILE *cram_hfile(cram_fd *fd) { + return fd->fp; +} + +#ifdef __cplusplus +} +#endif + +#endif /* CRAM_IO_H */ diff --git a/src/htslib-1.18/cram/cram_samtools.h b/src/htslib-1.21/cram/cram_samtools.h similarity index 100% rename from src/htslib-1.18/cram/cram_samtools.h rename to src/htslib-1.21/cram/cram_samtools.h diff --git a/src/htslib-1.19.1/cram/cram_stats.c b/src/htslib-1.21/cram/cram_stats.c similarity index 100% rename from src/htslib-1.19.1/cram/cram_stats.c rename to src/htslib-1.21/cram/cram_stats.c diff --git a/src/htslib-1.18/cram/cram_stats.h b/src/htslib-1.21/cram/cram_stats.h similarity index 100% rename from src/htslib-1.18/cram/cram_stats.h rename to src/htslib-1.21/cram/cram_stats.h diff --git a/src/htslib-1.19.1/cram/cram_structs.h b/src/htslib-1.21/cram/cram_structs.h similarity index 100% rename from src/htslib-1.19.1/cram/cram_structs.h rename to src/htslib-1.21/cram/cram_structs.h diff --git a/src/htslib-1.18/cram/mFILE.c b/src/htslib-1.21/cram/mFILE.c similarity index 100% rename from src/htslib-1.18/cram/mFILE.c rename to src/htslib-1.21/cram/mFILE.c diff --git a/src/htslib-1.18/cram/mFILE.h b/src/htslib-1.21/cram/mFILE.h similarity index 100% rename from src/htslib-1.18/cram/mFILE.h rename to src/htslib-1.21/cram/mFILE.h diff --git a/src/htslib-1.18/cram/misc.h b/src/htslib-1.21/cram/misc.h similarity index 100% rename from src/htslib-1.18/cram/misc.h rename to src/htslib-1.21/cram/misc.h diff --git a/src/htslib-1.18/cram/open_trace_file.c b/src/htslib-1.21/cram/open_trace_file.c similarity index 100% rename from src/htslib-1.18/cram/open_trace_file.c rename to src/htslib-1.21/cram/open_trace_file.c diff --git a/src/htslib-1.18/cram/open_trace_file.h b/src/htslib-1.21/cram/open_trace_file.h similarity index 100% rename from src/htslib-1.18/cram/open_trace_file.h rename to src/htslib-1.21/cram/open_trace_file.h diff --git a/src/htslib-1.18/cram/os.h b/src/htslib-1.21/cram/os.h similarity index 100% rename from src/htslib-1.18/cram/os.h rename to src/htslib-1.21/cram/os.h diff --git a/src/htslib-1.18/cram/pooled_alloc.c b/src/htslib-1.21/cram/pooled_alloc.c similarity index 100% rename from src/htslib-1.18/cram/pooled_alloc.c rename to src/htslib-1.21/cram/pooled_alloc.c diff --git a/src/htslib-1.18/cram/pooled_alloc.h b/src/htslib-1.21/cram/pooled_alloc.h similarity index 100% rename from src/htslib-1.18/cram/pooled_alloc.h rename to src/htslib-1.21/cram/pooled_alloc.h diff --git a/src/htslib-1.18/cram/string_alloc.c b/src/htslib-1.21/cram/string_alloc.c similarity index 100% rename from src/htslib-1.18/cram/string_alloc.c rename to src/htslib-1.21/cram/string_alloc.c diff --git a/src/htslib-1.18/cram/string_alloc.h b/src/htslib-1.21/cram/string_alloc.h similarity index 100% rename from src/htslib-1.18/cram/string_alloc.h rename to src/htslib-1.21/cram/string_alloc.h diff --git a/src/htslib-1.18/errmod.c b/src/htslib-1.21/errmod.c similarity index 100% rename from src/htslib-1.18/errmod.c rename to src/htslib-1.21/errmod.c diff --git a/src/htslib-1.18/faidx.5 b/src/htslib-1.21/faidx.5 similarity index 100% rename from src/htslib-1.18/faidx.5 rename to src/htslib-1.21/faidx.5 diff --git a/src/htslib-1.21/faidx.c b/src/htslib-1.21/faidx.c new file mode 100644 index 0000000..ed39c0c --- /dev/null +++ b/src/htslib-1.21/faidx.c @@ -0,0 +1,1066 @@ +/* faidx.c -- FASTA and FASTQ random access. + + Copyright (C) 2008, 2009, 2013-2020, 2022, 2024 Genome Research Ltd. + Portions copyright (C) 2011 Broad Institute. + + Author: Heng Li + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "htslib/bgzf.h" +#include "htslib/faidx.h" +#include "htslib/hfile.h" +#include "htslib/khash.h" +#include "htslib/kstring.h" +#include "hts_internal.h" + +// Faster isgraph; assumes ASCII +static inline int isgraph_(unsigned char c) { + return c > ' ' && c <= '~'; +} + +#ifdef isgraph +# undef isgraph +#endif +#define isgraph isgraph_ + +// An optimised bgzf_getc. +// We could consider moving this to bgzf.h, but our own code uses it here only. +static inline int bgzf_getc_(BGZF *fp) { + if (fp->block_offset+1 < fp->block_length) { + int c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++]; + fp->uncompressed_address++; + return c; + } + + return bgzf_getc(fp); +} +#define bgzf_getc bgzf_getc_ + +typedef struct { + int id; // faidx_t->name[id] is for this struct. + uint32_t line_len, line_blen; + uint64_t len; + uint64_t seq_offset; + uint64_t qual_offset; +} faidx1_t; +KHASH_MAP_INIT_STR(s, faidx1_t) + +struct faidx_t { + BGZF *bgzf; + int n, m; + char **name; + khash_t(s) *hash; + enum fai_format_options format; +}; + +static int fai_name2id(void *v, const char *ref) +{ + faidx_t *fai = (faidx_t *)v; + khint_t k = kh_get(s, fai->hash, ref); + return k == kh_end(fai->hash) ? -1 : kh_val(fai->hash, k).id; +} + +static inline int fai_insert_index(faidx_t *idx, const char *name, uint64_t len, uint32_t line_len, uint32_t line_blen, uint64_t seq_offset, uint64_t qual_offset) +{ + if (!name) { + hts_log_error("Malformed line"); + return -1; + } + + char *name_key = strdup(name); + int absent; + khint_t k = kh_put(s, idx->hash, name_key, &absent); + faidx1_t *v = &kh_value(idx->hash, k); + + if (! absent) { + hts_log_warning("Ignoring duplicate sequence \"%s\" at byte offset %" PRIu64, name, seq_offset); + free(name_key); + return 0; + } + + if (idx->n == idx->m) { + char **tmp; + idx->m = idx->m? idx->m<<1 : 16; + if (!(tmp = (char**)realloc(idx->name, sizeof(char*) * idx->m))) { + hts_log_error("Out of memory"); + return -1; + } + idx->name = tmp; + } + v->id = idx->n; + idx->name[idx->n++] = name_key; + v->len = len; + v->line_len = line_len; + v->line_blen = line_blen; + v->seq_offset = seq_offset; + v->qual_offset = qual_offset; + + return 0; +} + + +static faidx_t *fai_build_core(BGZF *bgzf) { + kstring_t name = { 0, 0, NULL }; + int c, read_done, line_num; + faidx_t *idx; + uint64_t seq_offset, qual_offset; + uint64_t seq_len, qual_len; + uint64_t char_len, cl, line_len, ll; + enum read_state {OUT_READ, IN_NAME, IN_SEQ, SEQ_END, IN_QUAL} state; + + idx = (faidx_t*)calloc(1, sizeof(faidx_t)); + idx->hash = kh_init(s); + idx->format = FAI_NONE; + + state = OUT_READ, read_done = 0, line_num = 1; + seq_offset = qual_offset = seq_len = qual_len = char_len = cl = line_len = ll = 0; + + while ((c = bgzf_getc(bgzf)) >= 0) { + switch (state) { + case OUT_READ: + switch (c) { + case '>': + if (idx->format == FAI_FASTQ) { + hts_log_error("Found '>' in a FASTQ file, error at line %d", line_num); + goto fail; + } + + idx->format = FAI_FASTA; + state = IN_NAME; + break; + + case '@': + if (idx->format == FAI_FASTA) { + hts_log_error("Found '@' in a FASTA file, error at line %d", line_num); + goto fail; + } + + idx->format = FAI_FASTQ; + state = IN_NAME; + break; + + case '\r': + // Blank line with cr-lf ending? + if ((c = bgzf_getc(bgzf)) == '\n') { + line_num++; + } else { + hts_log_error("Format error, carriage return not followed by new line at line %d", line_num); + goto fail; + } + break; + + case '\n': + // just move onto the next line + line_num++; + break; + + default: { + char s[4] = { '"', c, '"', '\0' }; + hts_log_error("Format error, unexpected %s at line %d", isprint(c) ? s : "character", line_num); + goto fail; + } + } + break; + + case IN_NAME: + if (read_done) { + if (fai_insert_index(idx, name.s, seq_len, line_len, char_len, seq_offset, qual_offset) != 0) + goto fail; + + read_done = 0; + } + + name.l = 0; + + do { + if (!isspace(c)) { + kputc(c, &name); + } else if (name.l > 0 || c == '\n') { + break; + } + } while ((c = bgzf_getc(bgzf)) >= 0); + + kputsn("", 0, &name); + + if (c < 0) { + hts_log_error("The last entry '%s' has no sequence at line %d", name.s, line_num); + goto fail; + } + + // read the rest of the line if necessary + if (c != '\n') while ((c = bgzf_getc(bgzf)) >= 0 && c != '\n'); + + state = IN_SEQ; seq_len = qual_len = char_len = line_len = 0; + seq_offset = bgzf_utell(bgzf); + line_num++; + break; + + case IN_SEQ: + if (idx->format == FAI_FASTA) { + if (c == '\n') { + state = OUT_READ; + line_num++; + continue; + } else if (c == '>') { + state = IN_NAME; + continue; + } + } else if (idx->format == FAI_FASTQ) { + if (c == '+') { + state = IN_QUAL; + if (c != '\n') while ((c = bgzf_getc(bgzf)) >= 0 && c != '\n'); + qual_offset = bgzf_utell(bgzf); + line_num++; + continue; + } else if (c == '\n') { + hts_log_error("Inlined empty line is not allowed in sequence '%s' at line %d", name.s, line_num); + goto fail; + } + } + + ll = cl = 0; + + if (idx->format == FAI_FASTA) read_done = 1; + + do { + ll++; + if (isgraph(c)) cl++; + } while ((c = bgzf_getc(bgzf)) >= 0 && c != '\n'); + + ll++; seq_len += cl; + + if (line_len == 0) { + line_len = ll; + char_len = cl; + } else if (line_len > ll) { + + if (idx->format == FAI_FASTA) + state = OUT_READ; + else + state = SEQ_END; + + } else if (line_len < ll) { + hts_log_error("Different line length in sequence '%s' at line %d", name.s, line_num); + goto fail; + } + + line_num++; + break; + + case SEQ_END: + if (c == '+') { + state = IN_QUAL; + while ((c = bgzf_getc(bgzf)) >= 0 && c != '\n'); + qual_offset = bgzf_utell(bgzf); + line_num++; + } else { + hts_log_error("Format error, expecting '+', got '%c' at line %d", c, line_num); + goto fail; + } + break; + + case IN_QUAL: + if (c == '\n') { + if (!read_done) { + hts_log_error("Inlined empty line is not allowed in quality of sequence '%s' at line %d", name.s, line_num); + goto fail; + } + + state = OUT_READ; + line_num++; + continue; + } else if (c == '@' && read_done) { + state = IN_NAME; + continue; + } + + ll = cl = 0; + + do { + ll++; + if (isgraph(c)) cl++; + } while ((c = bgzf_getc(bgzf)) >= 0 && c != '\n'); + + ll++; qual_len += cl; + + if (line_len < ll) { + hts_log_error("Quality line length too long in '%s' at line %d", name.s, line_num); + goto fail; + } else if (qual_len == seq_len) { + read_done = 1; + } else if (qual_len > seq_len) { + hts_log_error("Quality length longer than sequence in '%s' at line %d", name.s, line_num); + goto fail; + } else if (line_len > ll) { + hts_log_error("Quality line length too short in '%s' at line %d", name.s, line_num); + goto fail; + } + + line_num++; + break; + } + } + + if (read_done) { + if (fai_insert_index(idx, name.s, seq_len, line_len, char_len, seq_offset, qual_offset) != 0) + goto fail; + } else { + hts_log_error("File truncated at line %d", line_num); + goto fail; + } + + free(name.s); + return idx; + +fail: + free(name.s); + fai_destroy(idx); + return NULL; +} + + +static int fai_save(const faidx_t *fai, hFILE *fp) { + khint_t k; + int i; + char buf[96]; // Must be big enough for format below. + + for (i = 0; i < fai->n; ++i) { + faidx1_t x; + k = kh_get(s, fai->hash, fai->name[i]); + assert(k < kh_end(fai->hash)); + x = kh_value(fai->hash, k); + + if (fai->format == FAI_FASTA) { + snprintf(buf, sizeof(buf), + "\t%"PRIu64"\t%"PRIu64"\t%"PRIu32"\t%"PRIu32"\n", + x.len, x.seq_offset, x.line_blen, x.line_len); + } else { + snprintf(buf, sizeof(buf), + "\t%"PRIu64"\t%"PRIu64"\t%"PRIu32"\t%"PRIu32"\t%"PRIu64"\n", + x.len, x.seq_offset, x.line_blen, x.line_len, x.qual_offset); + } + + if (hputs(fai->name[i], fp) != 0) return -1; + if (hputs(buf, fp) != 0) return -1; + } + return 0; +} + + +static faidx_t *fai_read(hFILE *fp, const char *fname, int format) +{ + faidx_t *fai; + char *buf = NULL, *p; + ssize_t l, lnum = 1; + + fai = (faidx_t*)calloc(1, sizeof(faidx_t)); + if (!fai) return NULL; + + fai->hash = kh_init(s); + if (!fai->hash) goto fail; + + buf = (char*)calloc(0x10000, 1); + if (!buf) goto fail; + + while ((l = hgetln(buf, 0x10000, fp)) > 0) { + uint32_t line_len, line_blen, n; + uint64_t len; + uint64_t seq_offset; + uint64_t qual_offset = 0; + + for (p = buf; *p && !isspace_c(*p); ++p); + + if (p - buf < l) { + *p = 0; ++p; + } + + if (format == FAI_FASTA) { + n = sscanf(p, "%"SCNu64"%"SCNu64"%"SCNu32"%"SCNu32, &len, &seq_offset, &line_blen, &line_len); + + if (n != 4) { + hts_log_error("Could not understand FASTA index %s line %zd", fname, lnum); + goto fail; + } + } else { + n = sscanf(p, "%"SCNu64"%"SCNu64"%"SCNu32"%"SCNu32"%"SCNu64, &len, &seq_offset, &line_blen, &line_len, &qual_offset); + + if (n != 5) { + if (n == 4) { + hts_log_error("Possibly this is a FASTA index, try using faidx. Problem in %s line %zd", fname, lnum); + } else { + hts_log_error("Could not understand FASTQ index %s line %zd", fname, lnum); + } + + goto fail; + } + } + + if (fai_insert_index(fai, buf, len, line_len, line_blen, seq_offset, qual_offset) != 0) { + goto fail; + } + + if (buf[l - 1] == '\n') ++lnum; + } + + if (l < 0) { + hts_log_error("Error while reading %s: %s", fname, strerror(errno)); + goto fail; + } + free(buf); + return fai; + + fail: + free(buf); + fai_destroy(fai); + return NULL; +} + +void fai_destroy(faidx_t *fai) +{ + int i; + if (!fai) return; + for (i = 0; i < fai->n; ++i) free(fai->name[i]); + free(fai->name); + kh_destroy(s, fai->hash); + if (fai->bgzf) bgzf_close(fai->bgzf); + free(fai); +} + + +static int fai_build3_core(const char *fn, const char *fnfai, const char *fngzi) +{ + kstring_t fai_kstr = { 0, 0, NULL }; + kstring_t gzi_kstr = { 0, 0, NULL }; + BGZF *bgzf = NULL; + hFILE *fp = NULL; + faidx_t *fai = NULL; + int save_errno, res; + char *file_type; + + bgzf = bgzf_open(fn, "r"); + + if ( !bgzf ) { + hts_log_error("Failed to open the file %s : %s", fn, strerror(errno)); + goto fail; + } + + if ( bgzf->is_compressed ) { + if (bgzf_index_build_init(bgzf) != 0) { + hts_log_error("Failed to allocate bgzf index"); + goto fail; + } + } + + fai = fai_build_core(bgzf); + + if ( !fai ) { + if (bgzf->is_compressed && bgzf->is_gzip) { + hts_log_error("Cannot index files compressed with gzip, please use bgzip"); + } + goto fail; + } + + if (fai->format == FAI_FASTA) { + file_type = "FASTA"; + } else { + file_type = "FASTQ"; + } + + if (!fnfai) { + if (ksprintf(&fai_kstr, "%s.fai", fn) < 0) goto fail; + fnfai = fai_kstr.s; + } + + if (!fngzi) { + if (ksprintf(&gzi_kstr, "%s.gzi", fn) < 0) goto fail; + fngzi = gzi_kstr.s; + } + + if ( bgzf->is_compressed ) { + if (bgzf_index_dump(bgzf, fngzi, NULL) < 0) { + hts_log_error("Failed to make bgzf index %s", fngzi); + goto fail; + } + } + + res = bgzf_close(bgzf); + bgzf = NULL; + + if (res < 0) { + hts_log_error("Error on closing %s : %s", fn, strerror(errno)); + goto fail; + } + + fp = hopen(fnfai, "wb"); + + if ( !fp ) { + hts_log_error("Failed to open %s index %s : %s", file_type, fnfai, strerror(errno)); + goto fail; + } + + if (fai_save(fai, fp) != 0) { + hts_log_error("Failed to write %s index %s : %s", file_type, fnfai, strerror(errno)); + goto fail; + } + + if (hclose(fp) != 0) { + hts_log_error("Failed on closing %s index %s : %s", file_type, fnfai, strerror(errno)); + goto fail; + } + + free(fai_kstr.s); + free(gzi_kstr.s); + fai_destroy(fai); + return 0; + + fail: + save_errno = errno; + free(fai_kstr.s); + free(gzi_kstr.s); + bgzf_close(bgzf); + fai_destroy(fai); + errno = save_errno; + return -1; +} + + +int fai_build3(const char *fn, const char *fnfai, const char *fngzi) { + return fai_build3_core(fn, fnfai, fngzi); +} + + +int fai_build(const char *fn) { + return fai_build3(fn, NULL, NULL); +} + + +static faidx_t *fai_load3_core(const char *fn, const char *fnfai, const char *fngzi, + int flags, int format) +{ + kstring_t fai_kstr = { 0, 0, NULL }; + kstring_t gzi_kstr = { 0, 0, NULL }; + hFILE *fp = NULL; + faidx_t *fai = NULL; + int res, gzi_index_needed = 0; + char *file_type; + + if (format == FAI_FASTA) { + file_type = "FASTA"; + } else { + file_type = "FASTQ"; + } + + if (fn == NULL) + return NULL; + + if (fnfai == NULL) { + if (ksprintf(&fai_kstr, "%s.fai", fn) < 0) goto fail; + fnfai = fai_kstr.s; + } + if (fngzi == NULL) { + if (ksprintf(&gzi_kstr, "%s.gzi", fn) < 0) goto fail; + fngzi = gzi_kstr.s; + } + + fp = hopen(fnfai, "rb"); + + if (fp) { + // index file present, check if a compressed index is needed + hFILE *gz = NULL; + BGZF *bgzf = bgzf_open(fn, "rb"); + + if (bgzf == 0) { + hts_log_error("Failed to open %s file %s", file_type, fn); + goto fail; + } + + if (bgzf_compression(bgzf) == 2) { // BGZF compression + if ((gz = hopen(fngzi, "rb")) == 0) { + + if (!(flags & FAI_CREATE) || errno != ENOENT) { + hts_log_error("Failed to open %s index %s: %s", file_type, fngzi, strerror(errno)); + bgzf_close(bgzf); + goto fail; + } + + gzi_index_needed = 1; + res = hclose(fp); // closed as going to be re-indexed + + if (res < 0) { + hts_log_error("Failed on closing %s index %s : %s", file_type, fnfai, strerror(errno)); + goto fail; + } + } else { + res = hclose(gz); + + if (res < 0) { + hts_log_error("Failed on closing %s index %s : %s", file_type, fngzi, strerror(errno)); + goto fail; + } + } + } + + bgzf_close(bgzf); + } + + if (fp == 0 || gzi_index_needed) { + if (!(flags & FAI_CREATE) || errno != ENOENT) { + hts_log_error("Failed to open %s index %s: %s", file_type, fnfai, strerror(errno)); + goto fail; + } + + hts_log_info("Build %s index", file_type); + + if (fai_build3_core(fn, fnfai, fngzi) < 0) { + goto fail; + } + + fp = hopen(fnfai, "rb"); + if (fp == 0) { + hts_log_error("Failed to open %s index %s: %s", file_type, fnfai, strerror(errno)); + goto fail; + } + } + + fai = fai_read(fp, fnfai, format); + if (fai == NULL) { + hts_log_error("Failed to read %s index %s", file_type, fnfai); + goto fail; + } + + res = hclose(fp); + fp = NULL; + if (res < 0) { + hts_log_error("Failed on closing %s index %s : %s", file_type, fnfai, strerror(errno)); + goto fail; + } + + fai->bgzf = bgzf_open(fn, "rb"); + if (fai->bgzf == 0) { + hts_log_error("Failed to open %s file %s", file_type, fn); + goto fail; + } + + if ( fai->bgzf->is_compressed==1 ) { + if ( bgzf_index_load(fai->bgzf, fngzi, NULL) < 0 ) { + hts_log_error("Failed to load .gzi index: %s", fngzi); + goto fail; + } + } + free(fai_kstr.s); + free(gzi_kstr.s); + return fai; + + fail: + if (fai) fai_destroy(fai); + if (fp) hclose_abruptly(fp); + free(fai_kstr.s); + free(gzi_kstr.s); + return NULL; +} + + +faidx_t *fai_load3(const char *fn, const char *fnfai, const char *fngzi, + int flags) { + return fai_load3_core(fn, fnfai, fngzi, flags, FAI_FASTA); +} + + +faidx_t *fai_load(const char *fn) +{ + return fai_load3(fn, NULL, NULL, FAI_CREATE); +} + + +faidx_t *fai_load3_format(const char *fn, const char *fnfai, const char *fngzi, + int flags, enum fai_format_options format) { + return fai_load3_core(fn, fnfai, fngzi, flags, format); +} + + +faidx_t *fai_load_format(const char *fn, enum fai_format_options format) { + return fai_load3_format(fn, NULL, NULL, FAI_CREATE, format); +} + + +static char *fai_retrieve(const faidx_t *fai, const faidx1_t *val, + uint64_t offset, hts_pos_t beg, hts_pos_t end, hts_pos_t *len) { + char *buffer, *s; + ssize_t nread, remaining, firstline_len, firstline_blen; + int ret; + + if ((uint64_t) end - (uint64_t) beg >= SIZE_MAX - 2) { + hts_log_error("Range %"PRId64"..%"PRId64" too big", beg, end); + *len = -1; + return NULL; + } + + if (val->line_blen <= 0) { + hts_log_error("Invalid line length in index: %d", val->line_blen); + *len = -1; + return NULL; + } + + ret = bgzf_useek(fai->bgzf, + offset + + beg / val->line_blen * val->line_len + + beg % val->line_blen, SEEK_SET); + + if (ret < 0) { + *len = -1; + hts_log_error("Failed to retrieve block. (Seeking in a compressed, .gzi unindexed, file?)"); + return NULL; + } + + // Over-allocate so there is extra space for one end-of-line sequence + buffer = (char*)malloc((size_t) end - beg + val->line_len - val->line_blen + 1); + if (!buffer) { + *len = -1; + return NULL; + } + + remaining = *len = end - beg; + firstline_blen = val->line_blen - beg % val->line_blen; + + // Special case when the entire interval requested is within a single FASTA/Q line + if (remaining <= firstline_blen) { + nread = bgzf_read_small(fai->bgzf, buffer, remaining); + if (nread < remaining) goto error; + buffer[nread] = '\0'; + return buffer; + } + + s = buffer; + firstline_len = val->line_len - beg % val->line_blen; + + // Read the (partial) first line and its line terminator, but increment s past the + // line contents only, so the terminator characters will be overwritten by the next line. + nread = bgzf_read_small(fai->bgzf, s, firstline_len); + if (nread < firstline_len) goto error; + s += firstline_blen; + remaining -= firstline_blen; + + // Similarly read complete lines and their line terminator characters, but overwrite the latter. + while (remaining > val->line_blen) { + nread = bgzf_read_small(fai->bgzf, s, val->line_len); + if (nread < (ssize_t) val->line_len) goto error; + s += val->line_blen; + remaining -= val->line_blen; + } + + if (remaining > 0) { + nread = bgzf_read_small(fai->bgzf, s, remaining); + if (nread < remaining) goto error; + s += remaining; + } + + *s = '\0'; + return buffer; + +error: + hts_log_error("Failed to retrieve block: %s", + (nread == 0)? "unexpected end of file" : "error reading file"); + free(buffer); + *len = -1; + return NULL; +} + +static int fai_get_val(const faidx_t *fai, const char *str, + hts_pos_t *len, faidx1_t *val, hts_pos_t *fbeg, hts_pos_t *fend) { + khiter_t iter; + khash_t(s) *h; + int id; + hts_pos_t beg, end; + + if (!fai_parse_region(fai, str, &id, &beg, &end, 0)) { + hts_log_warning("Reference %s not found in FASTA file, returning empty sequence", str); + *len = -2; + return 1; + } + + h = fai->hash; + iter = kh_get(s, h, faidx_iseq(fai, id)); + if (iter >= kh_end(h)) { + // should have already been caught above + abort(); + } + *val = kh_value(h, iter); + + if (beg >= val->len) beg = val->len; + if (end >= val->len) end = val->len; + if (beg > end) beg = end; + + *fbeg = beg; + *fend = end; + + return 0; +} + +/* + * The internal still has line_blen as uint32_t, but our references + * can be longer, so for future proofing we use hts_pos_t. We also needed + * a signed value so we can return negatives as an error. + */ +hts_pos_t fai_line_length(const faidx_t *fai, const char *str) +{ + faidx1_t val; + int64_t beg, end; + hts_pos_t len; + + if (fai_get_val(fai, str, &len, &val, &beg, &end)) + return -1; + else + return val.line_blen; +} + +char *fai_fetch64(const faidx_t *fai, const char *str, hts_pos_t *len) +{ + faidx1_t val; + int64_t beg, end; + + if (fai_get_val(fai, str, len, &val, &beg, &end)) { + return NULL; + } + + // now retrieve the sequence + return fai_retrieve(fai, &val, val.seq_offset, beg, end, len); +} + +char *fai_fetch(const faidx_t *fai, const char *str, int *len) +{ + hts_pos_t len64; + char *ret = fai_fetch64(fai, str, &len64); + *len = len64 < INT_MAX ? len64 : INT_MAX; // trunc + return ret; +} + +char *fai_fetchqual64(const faidx_t *fai, const char *str, hts_pos_t *len) { + faidx1_t val; + int64_t beg, end; + + if (fai_get_val(fai, str, len, &val, &beg, &end)) { + return NULL; + } + + // now retrieve the sequence + return fai_retrieve(fai, &val, val.qual_offset, beg, end, len); +} + +char *fai_fetchqual(const faidx_t *fai, const char *str, int *len) { + hts_pos_t len64; + char *ret = fai_fetchqual64(fai, str, &len64); + *len = len64 < INT_MAX ? len64 : INT_MAX; // trunc + return ret; +} + +int faidx_fetch_nseq(const faidx_t *fai) +{ + return fai->n; +} + +int faidx_nseq(const faidx_t *fai) +{ + return fai->n; +} + +const char *faidx_iseq(const faidx_t *fai, int i) +{ + return fai->name[i]; +} + +hts_pos_t faidx_seq_len64(const faidx_t *fai, const char *seq) +{ + khint_t k = kh_get(s, fai->hash, seq); + if ( k == kh_end(fai->hash) ) return -1; + return kh_val(fai->hash, k).len; +} + +int faidx_seq_len(const faidx_t *fai, const char *seq) +{ + hts_pos_t len = faidx_seq_len64(fai, seq); + return len < INT_MAX ? len : INT_MAX; +} + +static int faidx_adjust_position(const faidx_t *fai, int end_adjust, + faidx1_t *val_out, const char *c_name, + hts_pos_t *p_beg_i, hts_pos_t *p_end_i, + hts_pos_t *len) { + khiter_t iter; + faidx1_t *val; + + // Adjust position + iter = kh_get(s, fai->hash, c_name); + + if (iter == kh_end(fai->hash)) { + if (len) + *len = -2; + hts_log_error("The sequence \"%s\" was not found", c_name); + return 1; + } + + val = &kh_value(fai->hash, iter); + + if (val_out) + *val_out = *val; + + if(*p_end_i < *p_beg_i) + *p_beg_i = *p_end_i; + + if(*p_beg_i < 0) + *p_beg_i = 0; + else if(val->len <= *p_beg_i) + *p_beg_i = val->len; + + if(*p_end_i < 0) + *p_end_i = 0; + else if(val->len <= *p_end_i) + *p_end_i = val->len - end_adjust; + + return 0; +} + +int fai_adjust_region(const faidx_t *fai, int tid, + hts_pos_t *beg, hts_pos_t *end) +{ + hts_pos_t orig_beg, orig_end; + + if (!fai || !beg || !end || tid < 0 || tid >= fai->n) + return -1; + + orig_beg = *beg; + orig_end = *end; + if (faidx_adjust_position(fai, 0, NULL, fai->name[tid], beg, end, NULL) != 0) { + hts_log_error("Inconsistent faidx internal state - couldn't find \"%s\"", + fai->name[tid]); + return -1; + } + + return ((orig_beg != *beg ? 1 : 0) | + (orig_end != *end && orig_end < HTS_POS_MAX ? 2 : 0)); +} + +char *faidx_fetch_seq64(const faidx_t *fai, const char *c_name, hts_pos_t p_beg_i, hts_pos_t p_end_i, hts_pos_t *len) +{ + faidx1_t val; + + // Adjust position + if (faidx_adjust_position(fai, 1, &val, c_name, &p_beg_i, &p_end_i, len)) { + return NULL; + } + + // Now retrieve the sequence + return fai_retrieve(fai, &val, val.seq_offset, p_beg_i, p_end_i + 1, len); +} + +char *faidx_fetch_seq(const faidx_t *fai, const char *c_name, int p_beg_i, int p_end_i, int *len) +{ + hts_pos_t len64; + char *ret = faidx_fetch_seq64(fai, c_name, p_beg_i, p_end_i, &len64); + *len = len64 < INT_MAX ? len64 : INT_MAX; // trunc + return ret; +} + +char *faidx_fetch_qual64(const faidx_t *fai, const char *c_name, hts_pos_t p_beg_i, hts_pos_t p_end_i, hts_pos_t *len) +{ + faidx1_t val; + + // Adjust position + if (faidx_adjust_position(fai, 1, &val, c_name, &p_beg_i, &p_end_i, len)) { + return NULL; + } + + // Now retrieve the sequence + return fai_retrieve(fai, &val, val.qual_offset, p_beg_i, p_end_i + 1, len); +} + +char *faidx_fetch_qual(const faidx_t *fai, const char *c_name, int p_beg_i, int p_end_i, int *len) +{ + hts_pos_t len64; + char *ret = faidx_fetch_qual64(fai, c_name, p_beg_i, p_end_i, &len64); + *len = len64 < INT_MAX ? len64 : INT_MAX; // trunc + return ret; +} + +int faidx_has_seq(const faidx_t *fai, const char *seq) +{ + khiter_t iter = kh_get(s, fai->hash, seq); + if (iter == kh_end(fai->hash)) return 0; + return 1; +} + +const char *fai_parse_region(const faidx_t *fai, const char *s, + int *tid, hts_pos_t *beg, hts_pos_t *end, + int flags) +{ + return hts_parse_region(s, tid, beg, end, (hts_name2id_f)fai_name2id, (void *)fai, flags); +} + +void fai_set_cache_size(faidx_t *fai, int cache_size) { + bgzf_set_cache_size(fai->bgzf, cache_size); +} + +// Adds a thread pool to the underlying BGZF layer. +int fai_thread_pool(faidx_t *fai, struct hts_tpool *pool, int qsize) { + return bgzf_thread_pool(fai->bgzf, pool, qsize); +} + +char *fai_path(const char *fa) { + char *fai = NULL; + if (!fa) { + hts_log_error("No reference file specified"); + } else { + char *fai_tmp = strstr(fa, HTS_IDX_DELIM); + if (fai_tmp) { + fai_tmp += strlen(HTS_IDX_DELIM); + fai = strdup(fai_tmp); + if (!fai) + hts_log_error("Failed to allocate memory"); + } else { + if (hisremote(fa)) { + fai = hts_idx_locatefn(fa, ".fai"); // get the remote fai file name, if any, but do not download the file + if (!fai) + hts_log_error("Failed to locate index file for remote reference file '%s'", fa); + } else{ + if (hts_idx_check_local(fa, HTS_FMT_FAI, &fai) == 0 && fai) { + if (fai_build3(fa, fai, NULL) == -1) { // create local fai file by indexing local fasta + hts_log_error("Failed to build index file for reference file '%s'", fa); + free(fai); + fai = NULL; + } + } + } + } + } + + return fai; +} diff --git a/src/htslib-1.19.1/fuzz_settings.h b/src/htslib-1.21/fuzz_settings.h similarity index 100% rename from src/htslib-1.19.1/fuzz_settings.h rename to src/htslib-1.21/fuzz_settings.h diff --git a/src/htslib-1.19.1/header.c b/src/htslib-1.21/header.c similarity index 100% rename from src/htslib-1.19.1/header.c rename to src/htslib-1.21/header.c diff --git a/src/htslib-1.18/header.h b/src/htslib-1.21/header.h similarity index 100% rename from src/htslib-1.18/header.h rename to src/htslib-1.21/header.h diff --git a/src/htslib-1.21/hfile.c b/src/htslib-1.21/hfile.c new file mode 100644 index 0000000..552b717 --- /dev/null +++ b/src/htslib-1.21/hfile.c @@ -0,0 +1,1440 @@ +/* hfile.c -- buffered low-level input/output streams. + + Copyright (C) 2013-2021, 2023-2024 Genome Research Ltd. + + Author: John Marshall + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h +#include + +#include +#include +#include +#include +#include +#include + +#include + +#ifdef ENABLE_PLUGINS +#if defined(_WIN32) || defined(__CYGWIN__) || defined(__MSYS__) +#define USING_WINDOWS_PLUGIN_DLLS +#include +#endif +#endif + +#include "htslib/hfile.h" +#include "hfile_internal.h" +#include "htslib/kstring.h" + +#ifndef ENOTSUP +#define ENOTSUP EINVAL +#endif +#ifndef EOVERFLOW +#define EOVERFLOW ERANGE +#endif +#ifndef EPROTONOSUPPORT +#define EPROTONOSUPPORT ENOSYS +#endif + +#ifndef SSIZE_MAX /* SSIZE_MAX is POSIX 1 */ +#define SSIZE_MAX LONG_MAX +#endif + +/* hFILE fields are used as follows: + + char *buffer; // Pointer to the start of the I/O buffer + char *begin; // First not-yet-read character / unused position + char *end; // First unfilled/unfillable position + char *limit; // Pointer to the first position past the buffer + + const hFILE_backend *backend; // Methods to refill/flush I/O buffer + + off_t offset; // Offset within the stream of buffer position 0 + unsigned at_eof:1;// For reading, whether EOF has been seen + unsigned mobile:1;// Buffer is a mobile window or fixed full contents + unsigned readonly:1;// Whether opened as "r" rather than "r+"/"w"/"a" + int has_errno; // Error number from the last failure on this stream + +For reading, begin is the first unread character in the buffer and end is the +first unfilled position: + + -----------ABCDEFGHIJKLMNO--------------- + ^buffer ^begin ^end ^limit + +For writing, begin is the first unused position and end is unused so remains +equal to buffer: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------- + ^buffer ^begin ^limit + ^end + +Thus if begin > end then there is a non-empty write buffer, if begin < end +then there is a non-empty read buffer, and if begin == end then both buffers +are empty. In all cases, the stream's file position indicator corresponds +to the position pointed to by begin. + +The above is the normal scenario of a mobile window. For in-memory +streams (eg via hfile_init_fixed) the buffer can be used as the full +contents without any separate backend behind it. These always have at_eof +set, offset set to 0, need no read() method, and should just return EINVAL +for seek(): + + abcdefghijkLMNOPQRSTUVWXYZ------ + ^buffer ^begin ^end ^limit +*/ +HTSLIB_EXPORT +hFILE *hfile_init(size_t struct_size, const char *mode, size_t capacity) +{ + hFILE *fp = (hFILE *) malloc(struct_size); + if (fp == NULL) goto error; + + if (capacity == 0) capacity = 32768; + // FIXME For now, clamp input buffer sizes so mpileup doesn't eat memory + if (strchr(mode, 'r') && capacity > 32768) capacity = 32768; + + fp->buffer = (char *) malloc(capacity); + if (fp->buffer == NULL) goto error; + + fp->begin = fp->end = fp->buffer; + fp->limit = &fp->buffer[capacity]; + + fp->offset = 0; + fp->at_eof = 0; + fp->mobile = 1; + fp->readonly = (strchr(mode, 'r') && ! strchr(mode, '+')); + fp->preserve = 0; + fp->has_errno = 0; + return fp; + +error: + hfile_destroy(fp); + return NULL; +} + +hFILE *hfile_init_fixed(size_t struct_size, const char *mode, + char *buffer, size_t buf_filled, size_t buf_size) +{ + hFILE *fp = (hFILE *) malloc(struct_size); + if (fp == NULL) return NULL; + + fp->buffer = fp->begin = buffer; + fp->end = &fp->buffer[buf_filled]; + fp->limit = &fp->buffer[buf_size]; + + fp->offset = 0; + fp->at_eof = 1; + fp->mobile = 0; + fp->readonly = (strchr(mode, 'r') && ! strchr(mode, '+')); + fp->preserve = 0; + fp->has_errno = 0; + return fp; +} + +static const struct hFILE_backend mem_backend; + +HTSLIB_EXPORT +void hfile_destroy(hFILE *fp) +{ + int save = errno; + if (fp) free(fp->buffer); + free(fp); + errno = save; +} + +static inline int writebuffer_is_nonempty(hFILE *fp) +{ + return fp->begin > fp->end; +} + +/* Refills the read buffer from the backend (once, so may only partially + fill the buffer), returning the number of additional characters read + (which might be 0), or negative when an error occurred. */ +static ssize_t refill_buffer(hFILE *fp) +{ + ssize_t n; + + // Move any unread characters to the start of the buffer + if (fp->mobile && fp->begin > fp->buffer) { + fp->offset += fp->begin - fp->buffer; + memmove(fp->buffer, fp->begin, fp->end - fp->begin); + fp->end = &fp->buffer[fp->end - fp->begin]; + fp->begin = fp->buffer; + } + + // Read into the available buffer space at fp->[end,limit) + if (fp->at_eof || fp->end == fp->limit) n = 0; + else { + n = fp->backend->read(fp, fp->end, fp->limit - fp->end); + if (n < 0) { fp->has_errno = errno; return n; } + else if (n == 0) fp->at_eof = 1; + } + + fp->end += n; + return n; +} + +/* + * Changes the buffer size for an hFILE. Ideally this is done + * immediately after opening. If performed later, this function may + * fail if we are reducing the buffer size and the current offset into + * the buffer is beyond the new capacity. + * + * Returns 0 on success; + * -1 on failure. + */ +HTSLIB_EXPORT +int hfile_set_blksize(hFILE *fp, size_t bufsiz) { + char *buffer; + ptrdiff_t curr_used; + if (!fp) return -1; + curr_used = (fp->begin > fp->end ? fp->begin : fp->end) - fp->buffer; + if (bufsiz == 0) bufsiz = 32768; + + // Ensure buffer resize will not erase live data + if (bufsiz < curr_used) + return -1; + + if (!(buffer = (char *) realloc(fp->buffer, bufsiz))) return -1; + + fp->begin = buffer + (fp->begin - fp->buffer); + fp->end = buffer + (fp->end - fp->buffer); + fp->buffer = buffer; + fp->limit = &fp->buffer[bufsiz]; + + return 0; +} + +/* Called only from hgetc(), when our buffer is empty. */ +HTSLIB_EXPORT +int hgetc2(hFILE *fp) +{ + return (refill_buffer(fp) > 0)? (unsigned char) *(fp->begin++) : EOF; +} + +ssize_t hgetdelim(char *buffer, size_t size, int delim, hFILE *fp) +{ + char *found; + size_t n, copied = 0; + ssize_t got; + + if (size < 1 || size > SSIZE_MAX) { + fp->has_errno = errno = EINVAL; + return -1; + } + if (writebuffer_is_nonempty(fp)) { + fp->has_errno = errno = EBADF; + return -1; + } + + --size; /* to allow space for the NUL terminator */ + + do { + n = fp->end - fp->begin; + if (n > size - copied) n = size - copied; + + /* Look in the hFILE buffer for the delimiter */ + found = memchr(fp->begin, delim, n); + if (found != NULL) { + n = found - fp->begin + 1; + memcpy(buffer + copied, fp->begin, n); + buffer[n + copied] = '\0'; + fp->begin += n; + return n + copied; + } + + /* No delimiter yet, copy as much as we can and refill if necessary */ + memcpy(buffer + copied, fp->begin, n); + fp->begin += n; + copied += n; + + if (copied == size) { /* Output buffer full */ + buffer[copied] = '\0'; + return copied; + } + + got = refill_buffer(fp); + } while (got > 0); + + if (got < 0) return -1; /* Error on refill. */ + + buffer[copied] = '\0'; /* EOF, return anything that was copied. */ + return copied; +} + +char *hgets(char *buffer, int size, hFILE *fp) +{ + if (size < 1) { + fp->has_errno = errno = EINVAL; + return NULL; + } + return hgetln(buffer, size, fp) > 0 ? buffer : NULL; +} + +ssize_t hpeek(hFILE *fp, void *buffer, size_t nbytes) +{ + size_t n = fp->end - fp->begin; + while (n < nbytes) { + ssize_t ret = refill_buffer(fp); + if (ret < 0) return ret; + else if (ret == 0) break; + else n += ret; + } + + if (n > nbytes) n = nbytes; + memcpy(buffer, fp->begin, n); + return n; +} + +/* Called only from hread(); when called, our buffer is empty and nread bytes + have already been placed in the destination buffer. */ +HTSLIB_EXPORT +ssize_t hread2(hFILE *fp, void *destv, size_t nbytes, size_t nread) +{ + const size_t capacity = fp->limit - fp->buffer; + int buffer_invalidated = 0; + char *dest = (char *) destv; + dest += nread, nbytes -= nread; + + // Read large requests directly into the destination buffer + while (nbytes * 2 >= capacity && !fp->at_eof) { + ssize_t n = fp->backend->read(fp, dest, nbytes); + if (n < 0) { fp->has_errno = errno; return n; } + else if (n == 0) fp->at_eof = 1; + else buffer_invalidated = 1; + fp->offset += n; + dest += n, nbytes -= n; + nread += n; + } + + if (buffer_invalidated) { + // Our unread buffer is empty, so begin == end, but our already-read + // buffer [buffer,begin) is likely non-empty and is no longer valid as + // its contents are no longer adjacent to the file position indicator. + // Discard it so that hseek() can't try to take advantage of it. + fp->offset += fp->begin - fp->buffer; + fp->begin = fp->end = fp->buffer; + } + + while (nbytes > 0 && !fp->at_eof) { + size_t n; + ssize_t ret = refill_buffer(fp); + if (ret < 0) return ret; + + n = fp->end - fp->begin; + if (n > nbytes) n = nbytes; + memcpy(dest, fp->begin, n); + fp->begin += n; + dest += n, nbytes -= n; + nread += n; + } + + return nread; +} + +/* Flushes the write buffer, fp->[buffer,begin), out through the backend + returning 0 on success or negative if an error occurred. */ +static ssize_t flush_buffer(hFILE *fp) +{ + const char *buffer = fp->buffer; + while (buffer < fp->begin) { + ssize_t n = fp->backend->write(fp, buffer, fp->begin - buffer); + if (n < 0) { fp->has_errno = errno; return n; } + buffer += n; + fp->offset += n; + } + + fp->begin = fp->buffer; // Leave the buffer empty + return 0; +} + +int hflush(hFILE *fp) +{ + if (flush_buffer(fp) < 0) return EOF; + if (fp->backend->flush) { + if (fp->backend->flush(fp) < 0) { fp->has_errno = errno; return EOF; } + } + return 0; +} + +/* Called only from hputc(), when our buffer is already full. */ +HTSLIB_EXPORT +int hputc2(int c, hFILE *fp) +{ + if (flush_buffer(fp) < 0) return EOF; + *(fp->begin++) = c; + return c; +} + +/* Called only from hwrite() and hputs2(); when called, our buffer is either + full and ncopied bytes from the source have already been copied to our + buffer; or completely empty, ncopied is zero and totalbytes is greater than + the buffer size. */ +HTSLIB_EXPORT +ssize_t hwrite2(hFILE *fp, const void *srcv, size_t totalbytes, size_t ncopied) +{ + const char *src = (const char *) srcv; + ssize_t ret; + const size_t capacity = fp->limit - fp->buffer; + size_t remaining = totalbytes - ncopied; + src += ncopied; + + ret = flush_buffer(fp); + if (ret < 0) return ret; + + // Write large blocks out directly from the source buffer + while (remaining * 2 >= capacity) { + ssize_t n = fp->backend->write(fp, src, remaining); + if (n < 0) { fp->has_errno = errno; return n; } + fp->offset += n; + src += n, remaining -= n; + } + + // Just buffer any remaining characters + memcpy(fp->begin, src, remaining); + fp->begin += remaining; + + return totalbytes; +} + +/* Called only from hputs(), when our buffer is already full. */ +HTSLIB_EXPORT +int hputs2(const char *text, size_t totalbytes, size_t ncopied, hFILE *fp) +{ + return (hwrite2(fp, text, totalbytes, ncopied) >= 0)? 0 : EOF; +} + +off_t hseek(hFILE *fp, off_t offset, int whence) +{ + off_t curpos, pos; + + if (writebuffer_is_nonempty(fp) && fp->mobile) { + int ret = flush_buffer(fp); + if (ret < 0) return ret; + } + + curpos = htell(fp); + + // Relative offsets are given relative to the hFILE's stream position, + // which may differ from the backend's physical position due to buffering + // read-ahead. Correct for this by converting to an absolute position. + if (whence == SEEK_CUR) { + if (curpos + offset < 0) { + // Either a negative offset resulted in a position before the + // start of the file, or we overflowed when given a positive offset + fp->has_errno = errno = (offset < 0)? EINVAL : EOVERFLOW; + return -1; + } + + whence = SEEK_SET; + offset = curpos + offset; + } + // For fixed immobile buffers, convert everything else to SEEK_SET too + // so that seeking can be avoided for all (within range) requests. + else if (! fp->mobile && whence == SEEK_END) { + size_t length = fp->end - fp->buffer; + if (offset > 0 || -offset > length) { + fp->has_errno = errno = EINVAL; + return -1; + } + + whence = SEEK_SET; + offset = length + offset; + } + + // Avoid seeking if the desired position is within our read buffer. + // (But not when the next operation may be a write on a mobile buffer.) + if (whence == SEEK_SET && (! fp->mobile || fp->readonly) && + offset >= fp->offset && offset - fp->offset <= fp->end - fp->buffer) { + fp->begin = &fp->buffer[offset - fp->offset]; + return offset; + } + + pos = fp->backend->seek(fp, offset, whence); + if (pos < 0) { fp->has_errno = errno; return pos; } + + // Seeking succeeded, so discard any non-empty read buffer + fp->begin = fp->end = fp->buffer; + fp->at_eof = 0; + + fp->offset = pos; + return pos; +} + +int hclose(hFILE *fp) +{ + int err = fp->has_errno; + + if (writebuffer_is_nonempty(fp) && hflush(fp) < 0) err = fp->has_errno; + if (!fp->preserve) { + if (fp->backend->close(fp) < 0) err = errno; + hfile_destroy(fp); + } + + if (err) { + errno = err; + return EOF; + } + else return 0; +} + +void hclose_abruptly(hFILE *fp) +{ + int save = errno; + if (fp->preserve) + return; + if (fp->backend->close(fp) < 0) { /* Ignore subsequent errors */ } + hfile_destroy(fp); + errno = save; +} + + +/*************************** + * File descriptor backend * + ***************************/ + +#ifndef _WIN32 +#include +#include +#define HAVE_STRUCT_STAT_ST_BLKSIZE +#else +#include +#define HAVE_CLOSESOCKET +#define HAVE_SETMODE +#endif +#include +#include + +/* For Unix, it doesn't matter whether a file descriptor is a socket. + However Windows insists on send()/recv() and its own closesocket() + being used when fd happens to be a socket. */ + +typedef struct { + hFILE base; + int fd; + unsigned is_socket:1, is_shared:1; +} hFILE_fd; + +static ssize_t fd_read(hFILE *fpv, void *buffer, size_t nbytes) +{ + hFILE_fd *fp = (hFILE_fd *) fpv; + ssize_t n; + do { + n = fp->is_socket? recv(fp->fd, buffer, nbytes, 0) + : read(fp->fd, buffer, nbytes); + } while (n < 0 && errno == EINTR); + return n; +} + +static ssize_t fd_write(hFILE *fpv, const void *buffer, size_t nbytes) +{ + hFILE_fd *fp = (hFILE_fd *) fpv; + ssize_t n; + do { + n = fp->is_socket? send(fp->fd, buffer, nbytes, 0) + : write(fp->fd, buffer, nbytes); + } while (n < 0 && errno == EINTR); +#ifdef _WIN32 + // On windows we have no SIGPIPE. Instead write returns + // EINVAL. We check for this and our fd being a pipe. + // If so, we raise SIGTERM instead of SIGPIPE. It's not + // ideal, but I think the only alternative is extra checking + // in every single piece of code. + if (n < 0 && errno == EINVAL && + GetLastError() == ERROR_NO_DATA && + GetFileType((HANDLE)_get_osfhandle(fp->fd)) == FILE_TYPE_PIPE) { + raise(SIGTERM); + } +#endif + return n; +} + +static off_t fd_seek(hFILE *fpv, off_t offset, int whence) +{ + hFILE_fd *fp = (hFILE_fd *) fpv; +#ifdef _WIN32 + // On windows lseek can return non-zero values even on a pipe. Instead + // it's likely to seek somewhere within the pipe memory buffer. + // This breaks bgzf_check_EOF among other things. + if (GetFileType((HANDLE)_get_osfhandle(fp->fd)) == FILE_TYPE_PIPE) { + errno = ESPIPE; + return -1; + } +#endif + + return lseek(fp->fd, offset, whence); +} + +static int fd_flush(hFILE *fpv) +{ + int ret = 0; + do { +#ifdef HAVE_FDATASYNC + hFILE_fd *fp = (hFILE_fd *) fpv; + ret = fdatasync(fp->fd); +#elif defined(HAVE_FSYNC) + hFILE_fd *fp = (hFILE_fd *) fpv; + ret = fsync(fp->fd); +#endif + // Ignore invalid-for-fsync(2) errors due to being, e.g., a pipe, + // and operation-not-supported errors (Mac OS X) + if (ret < 0 && (errno == EINVAL || errno == ENOTSUP)) ret = 0; + } while (ret < 0 && errno == EINTR); + return ret; +} + +static int fd_close(hFILE *fpv) +{ + hFILE_fd *fp = (hFILE_fd *) fpv; + int ret; + + // If we don't own the fd, return successfully without actually closing it + if (fp->is_shared) return 0; + + do { +#ifdef HAVE_CLOSESOCKET + ret = fp->is_socket? closesocket(fp->fd) : close(fp->fd); +#else + ret = close(fp->fd); +#endif + } while (ret < 0 && errno == EINTR); + return ret; +} + +static const struct hFILE_backend fd_backend = +{ + fd_read, fd_write, fd_seek, fd_flush, fd_close +}; + +static size_t blksize(int fd) +{ +#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE + struct stat sbuf; + if (fstat(fd, &sbuf) != 0) return 0; + return sbuf.st_blksize; +#else + return 0; +#endif +} + +static hFILE *hopen_fd(const char *filename, const char *mode) +{ + hFILE_fd *fp = NULL; + int fd = open(filename, hfile_oflags(mode), 0666); + if (fd < 0) goto error; + + fp = (hFILE_fd *) hfile_init(sizeof (hFILE_fd), mode, blksize(fd)); + if (fp == NULL) goto error; + + fp->fd = fd; + fp->is_socket = 0; + fp->is_shared = 0; + fp->base.backend = &fd_backend; + return &fp->base; + +error: + if (fd >= 0) { int save = errno; (void) close(fd); errno = save; } + hfile_destroy((hFILE *) fp); + return NULL; +} + +// Loads the contents of filename to produced a read-only, in memory, +// immobile hfile. fp is the already opened file. We always close this +// input fp, irrespective of whether we error or whether we return a new +// immobile hfile. +static hFILE *hpreload(hFILE *fp) { + hFILE *mem_fp; + char *buf = NULL; + off_t buf_sz = 0, buf_a = 0, buf_inc = 8192, len; + + for (;;) { + if (buf_a - buf_sz < 5000) { + buf_a += buf_inc; + char *t = realloc(buf, buf_a); + if (!t) goto err; + buf = t; + if (buf_inc < 1000000) buf_inc *= 1.3; + } + len = hread(fp, buf+buf_sz, buf_a-buf_sz); + if (len > 0) + buf_sz += len; + else + break; + } + + if (len < 0) goto err; + mem_fp = hfile_init_fixed(sizeof(hFILE), "r", buf, buf_sz, buf_a); + if (!mem_fp) goto err; + mem_fp->backend = &mem_backend; + + if (hclose(fp) < 0) { + hclose_abruptly(mem_fp); + goto err; + } + return mem_fp; + + err: + free(buf); + hclose_abruptly(fp); + return NULL; +} + +static int is_preload_url_remote(const char *url){ + return hisremote(url + 8); // len("preload:") = 8 +} + +static hFILE *hopen_preload(const char *url, const char *mode){ + hFILE* fp = hopen(url + 8, mode); + return fp ? hpreload(fp) : NULL; +} + +hFILE *hdopen(int fd, const char *mode) +{ + hFILE_fd *fp = (hFILE_fd*) hfile_init(sizeof (hFILE_fd), mode, blksize(fd)); + if (fp == NULL) return NULL; + + fp->fd = fd; + fp->is_socket = (strchr(mode, 's') != NULL); + fp->is_shared = (strchr(mode, 'S') != NULL); + fp->base.backend = &fd_backend; + return &fp->base; +} + +static hFILE *hopen_fd_fileuri(const char *url, const char *mode) +{ + if (strncmp(url, "file://localhost/", 17) == 0) url += 16; + else if (strncmp(url, "file:///", 8) == 0) url += 7; + else { errno = EPROTONOSUPPORT; return NULL; } + +#if defined(_WIN32) || defined(__MSYS__) + // For cases like C:/foo + if (url[0] == '/' && url[1] && url[2] == ':' && url[3] == '/') url++; +#endif + + return hopen_fd(url, mode); +} + +static hFILE *hopen_fd_stdinout(const char *mode) +{ + int fd = (strchr(mode, 'r') != NULL)? STDIN_FILENO : STDOUT_FILENO; + char mode_shared[101]; + snprintf(mode_shared, sizeof mode_shared, "S%s", mode); +#if defined HAVE_SETMODE && defined O_BINARY + if (setmode(fd, O_BINARY) < 0) return NULL; +#endif + return hdopen(fd, mode_shared); +} + +HTSLIB_EXPORT +int hfile_oflags(const char *mode) +{ + int rdwr = 0, flags = 0; + const char *s; + for (s = mode; *s; s++) + switch (*s) { + case 'r': rdwr = O_RDONLY; break; + case 'w': rdwr = O_WRONLY; flags |= O_CREAT | O_TRUNC; break; + case 'a': rdwr = O_WRONLY; flags |= O_CREAT | O_APPEND; break; + case '+': rdwr = O_RDWR; break; +#ifdef O_CLOEXEC + case 'e': flags |= O_CLOEXEC; break; +#endif +#ifdef O_EXCL + case 'x': flags |= O_EXCL; break; +#endif + default: break; + } + +#ifdef O_BINARY + flags |= O_BINARY; +#endif + + return rdwr | flags; +} + + +/********************* + * In-memory backend * + *********************/ + +#include "hts_internal.h" + +typedef struct { + hFILE base; +} hFILE_mem; + +static off_t mem_seek(hFILE *fpv, off_t offset, int whence) +{ + errno = EINVAL; + return -1; +} + +static int mem_close(hFILE *fpv) +{ + return 0; +} + +static const struct hFILE_backend mem_backend = +{ + NULL, NULL, mem_seek, NULL, mem_close +}; + +static int cmp_prefix(const char *key, const char *s) +{ + while (*key) + if (tolower_c(*s) != *key) return +1; + else s++, key++; + + return 0; +} + +static hFILE *create_hfile_mem(char* buffer, const char* mode, size_t buf_filled, size_t buf_size) +{ + hFILE_mem *fp = (hFILE_mem *) hfile_init_fixed(sizeof(hFILE_mem), mode, buffer, buf_filled, buf_size); + if (fp == NULL) + return NULL; + + fp->base.backend = &mem_backend; + return &fp->base; +} + +static hFILE *hopen_mem(const char *url, const char *mode) +{ + size_t length, size; + char *buffer; + const char *data, *comma = strchr(url, ','); + if (comma == NULL) { errno = EINVAL; return NULL; } + data = comma+1; + + // TODO Implement write modes + if (strchr(mode, 'r') == NULL) { errno = EROFS; return NULL; } + + if (comma - url >= 7 && cmp_prefix(";base64", &comma[-7]) == 0) { + size = hts_base64_decoded_length(strlen(data)); + buffer = malloc(size); + if (buffer == NULL) return NULL; + hts_decode_base64(buffer, &length, data); + } + else { + size = strlen(data) + 1; + buffer = malloc(size); + if (buffer == NULL) return NULL; + hts_decode_percent(buffer, &length, data); + } + hFILE* hf; + + if(!(hf = create_hfile_mem(buffer, mode, length, size))){ + free(buffer); + return NULL; + } + + return hf; +} + +static hFILE *hopenv_mem(const char *filename, const char *mode, va_list args) +{ + char* buffer = va_arg(args, char*); + size_t sz = va_arg(args, size_t); + va_end(args); + + hFILE* hf; + + if(!(hf = create_hfile_mem(buffer, mode, sz, sz))){ + free(buffer); + return NULL; + } + + return hf; +} + +char *hfile_mem_get_buffer(hFILE *file, size_t *length) { + if (file->backend != &mem_backend) { + errno = EINVAL; + return NULL; + } + + if (length) + *length = file->buffer - file->limit; + + return file->buffer; +} + +char *hfile_mem_steal_buffer(hFILE *file, size_t *length) { + char *buf = hfile_mem_get_buffer(file, length); + if (buf) + file->buffer = NULL; + return buf; +} + +// open() stub for mem: which only works with the vopen() interface +// Use 'data:,' for data encoded in the URL +static hFILE *hopen_not_supported(const char *fname, const char *mode) { + errno = EINVAL; + return NULL; +} + +int hfile_plugin_init_mem(struct hFILE_plugin *self) +{ + // mem files are declared remote so they work with a tabix index + static const struct hFILE_scheme_handler handler = + {hopen_not_supported, hfile_always_remote, "mem", 2000 + 50, hopenv_mem}; + self->name = "mem"; + hfile_add_scheme_handler("mem", &handler); + return 0; +} + +/********************************************************************** + * Dummy crypt4gh plug-in. Does nothing apart from advise how to get * + * the real one. It will be overridden by the actual plug-in. * + **********************************************************************/ + +static hFILE *crypt4gh_needed(const char *url, const char *mode) +{ + const char *u = strncmp(url, "crypt4gh:", 9) == 0 ? url + 9 : url; +#if defined(ENABLE_PLUGINS) + const char *enable_plugins = ""; +#else + const char *enable_plugins = "You also need to rebuild HTSlib with plug-ins enabled.\n"; +#endif + + hts_log_error("Accessing \"%s\" needs the crypt4gh plug-in.\n" + "It can be found at " + "https://github.com/samtools/htslib-crypt4gh\n" + "%s" + "If you have the plug-in, please ensure it can be " + "found on your HTS_PATH.", + u, enable_plugins); + + errno = EPROTONOSUPPORT; + return NULL; +} + +int hfile_plugin_init_crypt4gh_needed(struct hFILE_plugin *self) +{ + static const struct hFILE_scheme_handler handler = + { crypt4gh_needed, hfile_always_local, "crypt4gh-needed", 0, NULL }; + self->name = "crypt4gh-needed"; + hfile_add_scheme_handler("crypt4gh", &handler); + return 0; +} + + +/***************************************** + * Plugin and hopen() backend dispatcher * + *****************************************/ + +#include "htslib/khash.h" + +KHASH_MAP_INIT_STR(scheme_string, const struct hFILE_scheme_handler *) +static khash_t(scheme_string) *schemes = NULL; + +struct hFILE_plugin_list { + struct hFILE_plugin plugin; + struct hFILE_plugin_list *next; +}; + +static struct hFILE_plugin_list *plugins = NULL; +static pthread_mutex_t plugins_lock = PTHREAD_MUTEX_INITIALIZER; + +void hfile_shutdown(int do_close_plugin) +{ + pthread_mutex_lock(&plugins_lock); + + if (schemes) { + kh_destroy(scheme_string, schemes); + schemes = NULL; + } + + while (plugins != NULL) { + struct hFILE_plugin_list *p = plugins; + if (p->plugin.destroy) p->plugin.destroy(); +#ifdef ENABLE_PLUGINS + if (p->plugin.obj && do_close_plugin) close_plugin(p->plugin.obj); +#endif + plugins = p->next; + free(p); + } + + pthread_mutex_unlock(&plugins_lock); +} + +static void hfile_exit(void) +{ + hfile_shutdown(0); + pthread_mutex_destroy(&plugins_lock); +} + +static inline int priority(const struct hFILE_scheme_handler *handler) +{ + return handler->priority % 1000; +} + +#ifdef USING_WINDOWS_PLUGIN_DLLS +/* + * Work-around for Windows plug-in dlls where the plug-in could be + * using a different HTSlib library to the executable (for example + * because the latter was build against a static libhts.a). When this + * happens, the plug-in can call the wrong copy of hfile_add_scheme_handler(). + * If this is detected, it calls this function which attempts to fix the + * problem by redirecting to the hfile_add_scheme_handler() in the main + * executable. + */ +static int try_exe_add_scheme_handler(const char *scheme, + const struct hFILE_scheme_handler *handler) +{ + static void (*add_scheme_handler)(const char *scheme, + const struct hFILE_scheme_handler *handler); + if (!add_scheme_handler) { + // dlopen the main executable and resolve hfile_add_scheme_handler + void *exe_handle = dlopen(NULL, RTLD_LAZY); + if (!exe_handle) return -1; + *(void **) (&add_scheme_handler) = dlsym(exe_handle, "hfile_add_scheme_handler"); + dlclose(exe_handle); + } + // Check that the symbol was obtained and isn't the one in this copy + // of the library (to avoid infinite recursion) + if (!add_scheme_handler || add_scheme_handler == hfile_add_scheme_handler) + return -1; + add_scheme_handler(scheme, handler); + return 0; +} +#else +static int try_exe_add_scheme_handler(const char *scheme, + const struct hFILE_scheme_handler *handler) +{ + return -1; +} +#endif + +HTSLIB_EXPORT +void hfile_add_scheme_handler(const char *scheme, + const struct hFILE_scheme_handler *handler) +{ + int absent; + if (handler->open == NULL || handler->isremote == NULL) { + hts_log_warning("Couldn't register scheme handler for %s: missing method", scheme); + return; + } + if (!schemes) { + if (try_exe_add_scheme_handler(scheme, handler) != 0) { + hts_log_warning("Couldn't register scheme handler for %s", scheme); + } + return; + } + khint_t k = kh_put(scheme_string, schemes, scheme, &absent); + if (absent < 0) { + hts_log_warning("Couldn't register scheme handler for %s : %s", + scheme, strerror(errno)); + return; + } + if (absent || priority(handler) > priority(kh_value(schemes, k))) { + kh_value(schemes, k) = handler; + } +} + +static int init_add_plugin(void *obj, int (*init)(struct hFILE_plugin *), + const char *pluginname) +{ + struct hFILE_plugin_list *p = malloc (sizeof (struct hFILE_plugin_list)); + if (p == NULL) { + hts_log_debug("Failed to allocate memory for plugin \"%s\"", pluginname); + return -1; + } + + p->plugin.api_version = 1; + p->plugin.obj = obj; + p->plugin.name = NULL; + p->plugin.destroy = NULL; + + int ret = (*init)(&p->plugin); + + if (ret != 0) { + hts_log_debug("Initialisation failed for plugin \"%s\": %d", pluginname, ret); + free(p); + return ret; + } + + hts_log_debug("Loaded \"%s\"", pluginname); + + p->next = plugins, plugins = p; + return 0; +} + +/* + * Returns 0 on success, + * <0 on failure + */ +static int load_hfile_plugins(void) +{ + static const struct hFILE_scheme_handler + data = { hopen_mem, hfile_always_local, "built-in", 80 }, + file = { hopen_fd_fileuri, hfile_always_local, "built-in", 80 }, + preload = { hopen_preload, is_preload_url_remote, "built-in", 80 }; + + schemes = kh_init(scheme_string); + if (schemes == NULL) + return -1; + + hfile_add_scheme_handler("data", &data); + hfile_add_scheme_handler("file", &file); + hfile_add_scheme_handler("preload", &preload); + init_add_plugin(NULL, hfile_plugin_init_mem, "mem"); + init_add_plugin(NULL, hfile_plugin_init_crypt4gh_needed, "crypt4gh-needed"); + +#ifdef ENABLE_PLUGINS + struct hts_path_itr path; + const char *pluginname; + hts_path_itr_setup(&path, NULL, NULL, "hfile_", 6, NULL, 0); + while ((pluginname = hts_path_itr_next(&path)) != NULL) { + void *obj; + int (*init)(struct hFILE_plugin *) = (int (*)(struct hFILE_plugin *)) + load_plugin(&obj, pluginname, "hfile_plugin_init"); + + if (init) { + if (init_add_plugin(obj, init, pluginname) != 0) + close_plugin(obj); + } + } +#else + +#ifdef HAVE_LIBCURL + init_add_plugin(NULL, hfile_plugin_init_libcurl, "libcurl"); +#endif +#ifdef ENABLE_GCS + init_add_plugin(NULL, hfile_plugin_init_gcs, "gcs"); +#endif +#ifdef ENABLE_S3 + init_add_plugin(NULL, hfile_plugin_init_s3, "s3"); + init_add_plugin(NULL, hfile_plugin_init_s3_write, "s3w"); +#endif + +#endif + + // In the unlikely event atexit() fails, it's better to succeed here and + // carry on; then eventually when the program exits, we'll merely close + // down the plugins uncleanly, as if we had aborted. + (void) atexit(hfile_exit); + + return 0; +} + +/* A filename like "foo:bar" in which we don't recognise the scheme is + either an ordinary file or an indication of a missing or broken plugin. + Try to open it as an ordinary file; but if there's no such file, set + errno distinctively to make the plugin issue apparent. */ +static hFILE *hopen_unknown_scheme(const char *fname, const char *mode) +{ + hFILE *fp = hopen_fd(fname, mode); + if (fp == NULL && errno == ENOENT) errno = EPROTONOSUPPORT; + return fp; +} + +/* Returns the appropriate handler, or NULL if the string isn't an URL. */ +static const struct hFILE_scheme_handler *find_scheme_handler(const char *s) +{ + static const struct hFILE_scheme_handler unknown_scheme = + { hopen_unknown_scheme, hfile_always_local, "built-in", 0 }; + + char scheme[12]; + int i; + + for (i = 0; i < sizeof scheme; i++) + if (isalnum_c(s[i]) || s[i] == '+' || s[i] == '-' || s[i] == '.') + scheme[i] = tolower_c(s[i]); + else if (s[i] == ':') break; + else return NULL; + + // 1 byte schemes are likely windows C:/foo pathnames + if (i <= 1 || i >= sizeof scheme) return NULL; + scheme[i] = '\0'; + + pthread_mutex_lock(&plugins_lock); + if (!schemes && load_hfile_plugins() < 0) { + pthread_mutex_unlock(&plugins_lock); + return NULL; + } + pthread_mutex_unlock(&plugins_lock); + + khint_t k = kh_get(scheme_string, schemes, scheme); + return (k != kh_end(schemes))? kh_value(schemes, k) : &unknown_scheme; +} + + +/*************************** + * Library introspection functions + ***************************/ + +/* + * Fills out sc_list[] with the list of known URL schemes. + * This can be restricted to just ones from a specific plugin, + * or all (plugin == NULL). + * + * Returns number of schemes found on success; + * -1 on failure. + */ +HTSLIB_EXPORT +int hfile_list_schemes(const char *plugin, const char *sc_list[], int *nschemes) +{ + pthread_mutex_lock(&plugins_lock); + if (!schemes && load_hfile_plugins() < 0) { + pthread_mutex_unlock(&plugins_lock); + return -1; + } + pthread_mutex_unlock(&plugins_lock); + + khiter_t k; + int ns = 0; + + for (k = kh_begin(schemes); k != kh_end(schemes); k++) { + if (!kh_exist(schemes, k)) + continue; + + const struct hFILE_scheme_handler *s = kh_value(schemes, k); + if (plugin && strcmp(s->provider, plugin) != 0) + continue; + + if (ns < *nschemes) + sc_list[ns] = kh_key(schemes, k); + ns++; + } + + if (*nschemes > ns) + *nschemes = ns; + + return ns; +} + + +/* + * Fills out plist[] with the list of known hFILE plugins. + * + * Returns number of schemes found on success; + * -1 on failure + */ +HTSLIB_EXPORT +int hfile_list_plugins(const char *plist[], int *nplugins) +{ + pthread_mutex_lock(&plugins_lock); + if (!schemes && load_hfile_plugins() < 0) { + pthread_mutex_unlock(&plugins_lock); + return -1; + } + pthread_mutex_unlock(&plugins_lock); + + int np = 0; + if (*nplugins) + plist[np++] = "built-in"; + + struct hFILE_plugin_list *p = plugins; + while (p) { + if (np < *nplugins) + plist[np] = p->plugin.name; + + p = p->next; + np++; + } + + if (*nplugins > np) + *nplugins = np; + + return np; +} + + +/* + * Tests for the presence of a specific hFILE plugin. + * + * Returns 1 if true + * 0 otherwise + */ +HTSLIB_EXPORT +int hfile_has_plugin(const char *name) +{ + pthread_mutex_lock(&plugins_lock); + if (!schemes && load_hfile_plugins() < 0) { + pthread_mutex_unlock(&plugins_lock); + return -1; + } + pthread_mutex_unlock(&plugins_lock); + + struct hFILE_plugin_list *p = plugins; + while (p) { + if (strcmp(p->plugin.name, name) == 0) + return 1; + p = p->next; + } + + return 0; +} + +/*************************** + * hFILE interface proper + ***************************/ + +hFILE *hopen(const char *fname, const char *mode, ...) +{ + const struct hFILE_scheme_handler *handler = find_scheme_handler(fname); + if (handler) { + if (strchr(mode, ':') == NULL + || handler->priority < 2000 + || handler->vopen == NULL) { + return handler->open(fname, mode); + } + else { + hFILE *fp; + va_list arg; + va_start(arg, mode); + fp = handler->vopen(fname, mode, arg); + va_end(arg); + return fp; + } + } + else if (strcmp(fname, "-") == 0) return hopen_fd_stdinout(mode); + else return hopen_fd(fname, mode); +} + +HTSLIB_EXPORT +int hfile_always_local (const char *fname) { return 0; } + +HTSLIB_EXPORT +int hfile_always_remote(const char *fname) { return 1; } + +int hisremote(const char *fname) +{ + const struct hFILE_scheme_handler *handler = find_scheme_handler(fname); + return handler? handler->isremote(fname) : 0; +} + +// Remove an extension, if any, from the basename part of [start,limit). +// Note: Doesn't notice percent-encoded '.' and '/' characters. Don't do that. +static const char *strip_extension(const char *start, const char *limit) +{ + const char *s = limit; + while (s > start) { + --s; + if (*s == '.') return s; + else if (*s == '/') break; + } + return limit; +} + +char *haddextension(struct kstring_t *buffer, const char *filename, + int replace, const char *new_extension) +{ + const char *trailing, *end; + + if (find_scheme_handler(filename)) { + // URL, so alter extensions before any trailing query or fragment parts + // Allow # symbols in s3 URLs + trailing = filename + ((strncmp(filename, "s3://", 5) && strncmp(filename, "s3+http://", 10) && strncmp(filename, "s3+https://", 11)) ? strcspn(filename, "?#") : strcspn(filename, "?")); + } + else { + // Local path, so alter extensions at the end of the filename + trailing = strchr(filename, '\0'); + } + + end = replace? strip_extension(filename, trailing) : trailing; + + buffer->l = 0; + if (kputsn(filename, end - filename, buffer) >= 0 && + kputs(new_extension, buffer) >= 0 && + kputs(trailing, buffer) >= 0) return buffer->s; + else return NULL; +} + + +/* + * ---------------------------------------------------------------------- + * Minimal stub functions for knet, added after the removal of + * hfile_net.c and knetfile.c. + * + * They exist purely for ABI compatibility, but are simply wrappers to + * hFILE. API should be compatible except knet_fileno (unused?). + * + * CULL THESE and knetfile.h at the next .so version bump. + */ +typedef struct knetFile_s { + // As per htslib/knetfile.h. Duplicated here as we don't wish to + // have any dependence on the deprecated knetfile.h interface, plus + // it's hopefully only temporary. + int type, fd; + int64_t offset; + char *host, *port; + int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready; + char *response, *retr, *size_cmd; + int64_t seek_offset; + int64_t file_size; + char *path, *http_host; + + // Our local addition + hFILE *hf; +} knetFile; + +HTSLIB_EXPORT +knetFile *knet_open(const char *fn, const char *mode) { + knetFile *fp = calloc(1, sizeof(*fp)); + if (!fp) return NULL; + if (!(fp->hf = hopen(fn, mode))) { + free(fp); + return NULL; + } + + // FD backend is the only one implementing knet_fileno + fp->fd = fp->hf->backend == &fd_backend + ? ((hFILE_fd *)fp->hf)->fd + : -1; + + return fp; +} + +HTSLIB_EXPORT +knetFile *knet_dopen(int fd, const char *mode) { + knetFile *fp = calloc(1, sizeof(*fp)); + if (!fp) return NULL; + if (!(fp->hf = hdopen(fd, mode))) { + free(fp); + return NULL; + } + fp->fd = fd; + return fp; +} + +HTSLIB_EXPORT +ssize_t knet_read(knetFile *fp, void *buf, size_t len) { + ssize_t r = hread(fp->hf, buf, len); + fp->offset += r>0?r:0; + return r; +} + +HTSLIB_EXPORT +off_t knet_seek(knetFile *fp, off_t off, int whence) { + off_t r = hseek(fp->hf, off, whence); + if (r >= 0) + fp->offset = r; + return r; +} + +HTSLIB_EXPORT +int knet_close(knetFile *fp) { + int r = hclose(fp->hf); + free(fp); + return r; +} diff --git a/src/htslib-1.18/hfile_gcs.c b/src/htslib-1.21/hfile_gcs.c similarity index 100% rename from src/htslib-1.18/hfile_gcs.c rename to src/htslib-1.21/hfile_gcs.c diff --git a/src/htslib-1.18/hfile_internal.h b/src/htslib-1.21/hfile_internal.h similarity index 100% rename from src/htslib-1.18/hfile_internal.h rename to src/htslib-1.21/hfile_internal.h diff --git a/src/htslib-1.19.1/hfile_libcurl.c b/src/htslib-1.21/hfile_libcurl.c similarity index 100% rename from src/htslib-1.19.1/hfile_libcurl.c rename to src/htslib-1.21/hfile_libcurl.c diff --git a/src/htslib-1.21/hfile_s3.c b/src/htslib-1.21/hfile_s3.c new file mode 100644 index 0000000..c7c52e6 --- /dev/null +++ b/src/htslib-1.21/hfile_s3.c @@ -0,0 +1,1442 @@ +/* hfile_s3.c -- Amazon S3 backend for low-level file streams. + + Copyright (C) 2015-2017, 2019-2024 Genome Research Ltd. + + Author: John Marshall + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include "hfile_internal.h" +#ifdef ENABLE_PLUGINS +#include "version.h" +#endif +#include "htslib/hts.h" // for hts_version() and hts_verbose +#include "htslib/kstring.h" +#include "hts_time_funcs.h" + +typedef struct s3_auth_data { + kstring_t id; + kstring_t token; + kstring_t secret; + kstring_t region; + kstring_t canonical_query_string; + kstring_t user_query_string; + kstring_t host; + kstring_t profile; + enum {s3_auto, s3_virtual, s3_path} url_style; + time_t creds_expiry_time; + char *bucket; + kstring_t auth_hdr; + time_t auth_time; + char date[40]; + char date_long[17]; + char date_short[9]; + kstring_t date_html; + char mode; + char *headers[5]; + int refcount; +} s3_auth_data; + +#define AUTH_LIFETIME 60 // Regenerate auth headers if older than this +#define CREDENTIAL_LIFETIME 60 // Seconds before expiry to reread credentials + +#if defined HAVE_COMMONCRYPTO + +#include + +#define DIGEST_BUFSIZ CC_SHA1_DIGEST_LENGTH +#define SHA256_DIGEST_BUFSIZE CC_SHA256_DIGEST_LENGTH +#define HASH_LENGTH_SHA256 (SHA256_DIGEST_BUFSIZE * 2) + 1 + +static size_t +s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message) +{ + CCHmac(kCCHmacAlgSHA1, key->s, key->l, message->s, message->l, digest); + return CC_SHA1_DIGEST_LENGTH; +} + + +static void s3_sha256(const unsigned char *in, size_t length, unsigned char *out) { + CC_SHA256(in, length, out); +} + + +static void s3_sign_sha256(const void *key, int key_len, const unsigned char *d, int n, unsigned char *md, unsigned int *md_len) { + CCHmac(kCCHmacAlgSHA256, key, key_len, d, n, md); + *md_len = CC_SHA256_DIGEST_LENGTH; +} + + +#elif defined HAVE_HMAC + +#include +#include + +#define DIGEST_BUFSIZ EVP_MAX_MD_SIZE +#define SHA256_DIGEST_BUFSIZE SHA256_DIGEST_LENGTH +#define HASH_LENGTH_SHA256 (SHA256_DIGEST_BUFSIZE * 2) + 1 + +static size_t +s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message) +{ + unsigned int len; + HMAC(EVP_sha1(), key->s, key->l, + (unsigned char *) message->s, message->l, digest, &len); + return len; +} + + +static void s3_sha256(const unsigned char *in, size_t length, unsigned char *out) { + SHA256(in, length, out); +} + + +static void s3_sign_sha256(const void *key, int key_len, const unsigned char *d, int n, unsigned char *md, unsigned int *md_len) { + HMAC(EVP_sha256(), key, key_len, d, n, md, md_len); +} + +#else +#error No HMAC() routine found by configure +#endif + +static void +urldecode_kput(const char *s, int len, kstring_t *str) +{ + char buf[3]; + int i = 0; + + while (i < len) + if (s[i] == '%' && i+2 < len) { + buf[0] = s[i+1], buf[1] = s[i+2], buf[2] = '\0'; + kputc(strtol(buf, NULL, 16), str); + i += 3; + } + else kputc(s[i++], str); +} + +static void base64_kput(const unsigned char *data, size_t len, kstring_t *str) +{ + static const char base64[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + + size_t i = 0; + unsigned x = 0; + int bits = 0, pad = 0; + + while (bits || i < len) { + if (bits < 6) { + x <<= 8, bits += 8; + if (i < len) x |= data[i++]; + else pad++; + } + + bits -= 6; + kputc(base64[(x >> bits) & 63], str); + } + + str->l -= pad; + kputsn("==", pad, str); +} + +static int is_dns_compliant(const char *s0, const char *slim, int is_https) +{ + int has_nondigit = 0, len = 0; + const char *s; + + for (s = s0; s < slim; len++, s++) + if (islower_c(*s)) + has_nondigit = 1; + else if (*s == '-') { + has_nondigit = 1; + if (s == s0 || s+1 == slim) return 0; + } + else if (isdigit_c(*s)) + ; + else if (*s == '.') { + if (is_https) return 0; + if (s == s0 || ! isalnum_c(s[-1])) return 0; + if (s+1 == slim || ! isalnum_c(s[1])) return 0; + } + else return 0; + + return has_nondigit && len >= 3 && len <= 63; +} + +static FILE *expand_tilde_open(const char *fname, const char *mode) +{ + FILE *fp; + + if (strncmp(fname, "~/", 2) == 0) { + kstring_t full_fname = { 0, 0, NULL }; + const char *home = getenv("HOME"); + if (! home) return NULL; + + kputs(home, &full_fname); + kputs(&fname[1], &full_fname); + + fp = fopen(full_fname.s, mode); + free(full_fname.s); + } + else + fp = fopen(fname, mode); + + return fp; +} + +static void parse_ini(const char *fname, const char *section, ...) +{ + kstring_t line = { 0, 0, NULL }; + int active = 1; // Start active, so global properties are accepted + char *s; + + FILE *fp = expand_tilde_open(fname, "r"); + if (fp == NULL) return; + + while (line.l = 0, kgetline(&line, (kgets_func *) fgets, fp) >= 0) + if (line.s[0] == '[' && (s = strchr(line.s, ']')) != NULL) { + *s = '\0'; + active = (strcmp(&line.s[1], section) == 0); + } + else if (active && (s = strpbrk(line.s, ":=")) != NULL) { + const char *key = line.s, *value = &s[1], *akey; + va_list args; + + while (isspace_c(*key)) key++; + while (s > key && isspace_c(s[-1])) s--; + *s = '\0'; + + while (isspace_c(*value)) value++; + while (line.l > 0 && isspace_c(line.s[line.l-1])) + line.s[--line.l] = '\0'; + + va_start(args, section); + while ((akey = va_arg(args, const char *)) != NULL) { + kstring_t *avar = va_arg(args, kstring_t *); + if (strcmp(key, akey) == 0) { + avar->l = 0; + kputs(value, avar); + break; } + } + va_end(args); + } + + fclose(fp); + free(line.s); +} + +static void parse_simple(const char *fname, kstring_t *id, kstring_t *secret) +{ + kstring_t text = { 0, 0, NULL }; + char *s; + size_t len; + + FILE *fp = expand_tilde_open(fname, "r"); + if (fp == NULL) return; + + while (kgetline(&text, (kgets_func *) fgets, fp) >= 0) + kputc(' ', &text); + fclose(fp); + + s = text.s; + while (isspace_c(*s)) s++; + kputsn(s, len = strcspn(s, " \t"), id); + + s += len; + while (isspace_c(*s)) s++; + kputsn(s, strcspn(s, " \t"), secret); + + free(text.s); +} + +static int copy_auth_headers(s3_auth_data *ad, char ***hdrs) { + char **hdr = &ad->headers[0]; + int idx = 0; + *hdrs = hdr; + + hdr[idx] = strdup(ad->date); + if (!hdr[idx]) return -1; + idx++; + + if (ad->token.l) { + kstring_t token_hdr = KS_INITIALIZE; + kputs("X-Amz-Security-Token: ", &token_hdr); + kputs(ad->token.s, &token_hdr); + if (token_hdr.s) { + hdr[idx++] = token_hdr.s; + } else { + goto fail; + } + } + + if (ad->auth_hdr.l) { + hdr[idx] = strdup(ad->auth_hdr.s); + if (!hdr[idx]) goto fail; + idx++; + } + + hdr[idx] = NULL; + return 0; + + fail: + for (--idx; idx >= 0; --idx) + free(hdr[idx]); + return -1; +} + +static void free_auth_data(s3_auth_data *ad) { + if (ad->refcount > 0) { + --ad->refcount; + return; + } + free(ad->profile.s); + free(ad->id.s); + free(ad->token.s); + free(ad->secret.s); + free(ad->region.s); + free(ad->canonical_query_string.s); + free(ad->user_query_string.s); + free(ad->host.s); + free(ad->bucket); + free(ad->auth_hdr.s); + free(ad->date_html.s); + free(ad); +} + +static time_t parse_rfc3339_date(kstring_t *datetime) +{ + int offset = 0; + time_t when; + int num; + char should_be_t = '\0', timezone[10] = { '\0' }; + unsigned int year, mon, day, hour, min, sec; + + if (!datetime->s) + return 0; + + // It should be possible to do this with strptime(), but it seems + // to not get on with our feature definitions. + num = sscanf(datetime->s, "%4u-%2u-%2u%c%2u:%2u:%2u%9s", + &year, &mon, &day, &should_be_t, &hour, &min, &sec, timezone); + if (num < 8) + return 0; + if (should_be_t != 'T' && should_be_t != 't' && should_be_t != ' ') + return 0; + struct tm parsed = { sec, min, hour, day, mon - 1, year - 1900, 0, 0, 0 }; + + switch (timezone[0]) { + case 'Z': + case 'z': + case '\0': + break; + case '+': + case '-': { + unsigned hr_off, min_off; + if (sscanf(timezone + 1, "%2u:%2u", &hr_off, &min_off)) { + if (hr_off < 24 && min_off <= 60) { + offset = ((hr_off * 60 + min_off) + * (timezone[0] == '+' ? -60 : 60)); + } + } + break; + } + default: + return 0; + } + + when = hts_time_gm(&parsed); + return when >= 0 ? when + offset : 0; +} + +static void refresh_auth_data(s3_auth_data *ad) { + // Basically a copy of the AWS_SHARED_CREDENTIALS_FILE part of + // setup_auth_data(), but this only reads the authorisation parts. + const char *v = getenv("AWS_SHARED_CREDENTIALS_FILE"); + kstring_t expiry_time = KS_INITIALIZE; + parse_ini(v? v : "~/.aws/credentials", ad->profile.s, + "aws_access_key_id", &ad->id, + "aws_secret_access_key", &ad->secret, + "aws_session_token", &ad->token, + "expiry_time", &expiry_time); + if (expiry_time.l) { + ad->creds_expiry_time = parse_rfc3339_date(&expiry_time); + } + ks_free(&expiry_time); +} + +static int auth_header_callback(void *ctx, char ***hdrs) { + s3_auth_data *ad = (s3_auth_data *) ctx; + + time_t now = time(NULL); +#ifdef HAVE_GMTIME_R + struct tm tm_buffer; + struct tm *tm = gmtime_r(&now, &tm_buffer); +#else + struct tm *tm = gmtime(&now); +#endif + kstring_t message = { 0, 0, NULL }; + unsigned char digest[DIGEST_BUFSIZ]; + size_t digest_len; + + if (!hdrs) { // Closing connection + free_auth_data(ad); + return 0; + } + + if (ad->creds_expiry_time > 0 + && ad->creds_expiry_time - now < CREDENTIAL_LIFETIME) { + refresh_auth_data(ad); + } else if (now - ad->auth_time < AUTH_LIFETIME) { + // Last auth string should still be valid + *hdrs = NULL; + return 0; + } + + strftime(ad->date, sizeof(ad->date), "Date: %a, %d %b %Y %H:%M:%S GMT", tm); + if (!ad->id.l || !ad->secret.l) { + ad->auth_time = now; + return copy_auth_headers(ad, hdrs); + } + + if (ksprintf(&message, "%s\n\n\n%s\n%s%s%s%s", + ad->mode == 'r' ? "GET" : "PUT", ad->date + 6, + ad->token.l ? "x-amz-security-token:" : "", + ad->token.l ? ad->token.s : "", + ad->token.l ? "\n" : "", + ad->bucket) < 0) { + return -1; + } + + digest_len = s3_sign(digest, &ad->secret, &message); + ad->auth_hdr.l = 0; + if (ksprintf(&ad->auth_hdr, "Authorization: AWS %s:", ad->id.s) < 0) + goto fail; + base64_kput(digest, digest_len, &ad->auth_hdr); + + free(message.s); + ad->auth_time = now; + return copy_auth_headers(ad, hdrs); + + fail: + free(message.s); + return -1; +} + + +/* like a escape path but for query strings '=' and '&' are untouched */ +static char *escape_query(const char *qs) { + size_t i, j = 0, length, alloced; + char *escaped; + + length = strlen(qs); + alloced = length * 3 + 1; + if ((escaped = malloc(alloced)) == NULL) { + return NULL; + } + + for (i = 0; i < length; i++) { + int c = qs[i]; + + if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || + c == '_' || c == '-' || c == '~' || c == '.' || c == '/' || c == '=' || c == '&') { + escaped[j++] = c; + } else { + snprintf(escaped + j, alloced - j, "%%%02X", c); + j += 3; + } + } + + escaped[j] = '\0'; + + return escaped; +} + + +static char *escape_path(const char *path) { + size_t i, j = 0, length, alloced; + char *escaped; + + length = strlen(path); + alloced = length * 3 + 1; + + if ((escaped = malloc(alloced)) == NULL) { + return NULL; + } + + for (i = 0; i < length; i++) { + int c = path[i]; + + if (c == '?') break; // don't escape ? or beyond + + if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || + c == '_' || c == '-' || c == '~' || c == '.' || c == '/') { + escaped[j++] = c; + } else { + snprintf(escaped + j, alloced - j, "%%%02X", c); + j += 3; + } + } + + if (i != length) { + // in the case of a '?' copy the rest of the path across unchanged + strcpy(escaped + j, path + i); + } else { + escaped[j] = '\0'; + } + + return escaped; +} + + +static int is_escaped(const char *str) { + const char *c = str; + int escaped = 0; + int needs_escape = 0; + + while (*c != '\0') { + if (*c == '%' && c[1] != '\0' && c[2] != '\0') { + if (isxdigit_c(c[1]) && isxdigit_c(c[2])) { + escaped = 1; + c += 3; + continue; + } else { + // only escaped if all % signs are escaped + escaped = 0; + } + } + if (!((*c >= '0' && *c <= '9') || (*c >= 'A' && *c <= 'Z') + || (*c >= 'a' && *c <= 'z') || + *c == '_' || *c == '-' || *c == '~' || *c == '.' || *c == '/')) { + needs_escape = 1; + } + c++; + } + + return escaped || !needs_escape; +} + +static int redirect_endpoint_callback(void *auth, long response, + kstring_t *header, kstring_t *url) { + s3_auth_data *ad = (s3_auth_data *)auth; + char *new_region; + char *end; + int ret = -1; + + // get the new region from the reply header + if ((new_region = strstr(header->s, "x-amz-bucket-region: "))) { + + new_region += strlen("x-amz-bucket-region: "); + end = new_region; + + while (isalnum_c(*end) || ispunct_c(*end)) end++; + + *end = 0; + + if (strstr(ad->host.s, "amazonaws.com")) { + ad->region.l = 0; + kputs(new_region, &ad->region); + + ad->host.l = 0; + + if (ad->url_style == s3_path) { + // Path style https://s3.{region-code}.amazonaws.com/{bucket-name}/{key-name} + ksprintf(&ad->host, "s3.%s.amazonaws.com", new_region); + } else { + // Virtual https://{bucket-name}.s3.{region-code}.amazonaws.com/{key-name} + // Extract the {bucket-name} from {ad->host} to include in subdomain + kstring_t url_prefix = KS_INITIALIZE; + kputsn(ad->host.s, strcspn(ad->host.s, "."), &url_prefix); + + ksprintf(&ad->host, "%s.s3.%s.amazonaws.com", url_prefix.s, new_region); + free(url_prefix.s); + } + if (ad->region.l && ad->host.l) { + int e = 0; + url->l = 0; + e |= kputs("https://", url) < 0; + e |= kputs(ad->host.s, url) < 0; + e |= kputsn(ad->bucket, strlen(ad->bucket), url) < 0; + + if (!e) + ret = 0; + } + if (ad->user_query_string.l) { + kputc('?', url); + kputsn(ad->user_query_string.s, ad->user_query_string.l, url); + } + } + } + + return ret; +} + +static s3_auth_data * setup_auth_data(const char *s3url, const char *mode, + int sigver, kstring_t *url) +{ + s3_auth_data *ad = calloc(1, sizeof(*ad)); + const char *bucket, *path; + char *escaped = NULL; + size_t url_path_pos; + ptrdiff_t bucket_len; + int is_https = 1, dns_compliant; + char *query_start; + + if (!ad) + return NULL; + ad->mode = strchr(mode, 'r') ? 'r' : 'w'; + ad->url_style = s3_auto; + + // Our S3 URL format is s3[+SCHEME]://[ID[:SECRET[:TOKEN]]@]BUCKET/PATH + + if (s3url[2] == '+') { + bucket = strchr(s3url, ':') + 1; + if (bucket == NULL) { + free(ad); + return NULL; + } + kputsn(&s3url[3], bucket - &s3url[3], url); + is_https = strncmp(url->s, "https:", 6) == 0; + } + else { + kputs("https:", url); + bucket = &s3url[3]; + } + while (*bucket == '/') kputc(*bucket++, url); + + path = bucket + strcspn(bucket, "/?#@"); + + if (*path == '@') { + const char *colon = strpbrk(bucket, ":@"); + if (*colon != ':') { + urldecode_kput(bucket, colon - bucket, &ad->profile); + } + else { + const char *colon2 = strpbrk(&colon[1], ":@"); + urldecode_kput(bucket, colon - bucket, &ad->id); + urldecode_kput(&colon[1], colon2 - &colon[1], &ad->secret); + if (*colon2 == ':') + urldecode_kput(&colon2[1], path - &colon2[1], &ad->token); + } + + bucket = &path[1]; + path = bucket + strcspn(bucket, "/?#"); + } + else { + // If the URL has no ID[:SECRET]@, consider environment variables. + const char *v; + if ((v = getenv("AWS_ACCESS_KEY_ID")) != NULL) kputs(v, &ad->id); + if ((v = getenv("AWS_SECRET_ACCESS_KEY")) != NULL) kputs(v, &ad->secret); + if ((v = getenv("AWS_SESSION_TOKEN")) != NULL) kputs(v, &ad->token); + if ((v = getenv("AWS_DEFAULT_REGION")) != NULL) kputs(v, &ad->region); + if ((v = getenv("HTS_S3_HOST")) != NULL) kputs(v, &ad->host); + + if ((v = getenv("AWS_DEFAULT_PROFILE")) != NULL) kputs(v, &ad->profile); + else if ((v = getenv("AWS_PROFILE")) != NULL) kputs(v, &ad->profile); + else kputs("default", &ad->profile); + + if ((v = getenv("HTS_S3_ADDRESS_STYLE")) != NULL) { + if (strcasecmp(v, "virtual") == 0) { + ad->url_style = s3_virtual; + } else if (strcasecmp(v, "path") == 0) { + ad->url_style = s3_path; + } + } + } + + if (ad->id.l == 0) { + kstring_t url_style = KS_INITIALIZE; + kstring_t expiry_time = KS_INITIALIZE; + const char *v = getenv("AWS_SHARED_CREDENTIALS_FILE"); + parse_ini(v? v : "~/.aws/credentials", ad->profile.s, + "aws_access_key_id", &ad->id, + "aws_secret_access_key", &ad->secret, + "aws_session_token", &ad->token, + "region", &ad->region, + "addressing_style", &url_style, + "expiry_time", &expiry_time, + NULL); + + if (url_style.l) { + if (strcmp(url_style.s, "virtual") == 0) { + ad->url_style = s3_virtual; + } else if (strcmp(url_style.s, "path") == 0) { + ad->url_style = s3_path; + } else { + ad->url_style = s3_auto; + } + } + if (expiry_time.l) { + // Not a real part of the AWS configuration file, but it allows + // support for short-term credentials like those for the IAM + // service. The botocore library uses the key "expiry_time" + // internally for this purpose. + // See https://github.com/boto/botocore/blob/develop/botocore/credentials.py + ad->creds_expiry_time = parse_rfc3339_date(&expiry_time); + } + + ks_free(&url_style); + ks_free(&expiry_time); + } + + if (ad->id.l == 0) { + kstring_t url_style = KS_INITIALIZE; + const char *v = getenv("HTS_S3_S3CFG"); + parse_ini(v? v : "~/.s3cfg", ad->profile.s, "access_key", &ad->id, + "secret_key", &ad->secret, "access_token", &ad->token, + "host_base", &ad->host, + "bucket_location", &ad->region, + "host_bucket", &url_style, + NULL); + + if (url_style.l) { + // Conforming to s3cmd's GitHub PR#416, host_bucket without the "%(bucket)s" string + // indicates use of path style adressing. + if (strstr(url_style.s, "%(bucket)s") == NULL) { + ad->url_style = s3_path; + } else { + ad->url_style = s3_auto; + } + } + + ks_free(&url_style); + } + + if (ad->id.l == 0) + parse_simple("~/.awssecret", &ad->id, &ad->secret); + + + // if address_style is set, force the dns_compliant setting + if (ad->url_style == s3_virtual) { + dns_compliant = 1; + } else if (ad->url_style == s3_path) { + dns_compliant = 0; + } else { + dns_compliant = is_dns_compliant(bucket, path, is_https); + } + + if (ad->host.l == 0) + kputs("s3.amazonaws.com", &ad->host); + + if (!dns_compliant && ad->region.l > 0 + && strcmp(ad->host.s, "s3.amazonaws.com") == 0) { + // Can avoid a redirection by including the region in the host name + // (assuming the right one has been specified) + ad->host.l = 0; + ksprintf(&ad->host, "s3.%s.amazonaws.com", ad->region.s); + } + + if (ad->region.l == 0) + kputs("us-east-1", &ad->region); + + if (!is_escaped(path)) { + escaped = escape_path(path); + if (escaped == NULL) { + goto error; + } + } + + bucket_len = path - bucket; + + // Use virtual hosted-style access if possible, otherwise path-style. + if (dns_compliant) { + size_t url_host_pos = url->l; + // Append "bucket.host" to url + kputsn_(bucket, bucket_len, url); + kputc('.', url); + kputsn(ad->host.s, ad->host.l, url); + url_path_pos = url->l; + + if (sigver == 4) { + // Copy back to ad->host to use when making the signature + ad->host.l = 0; + kputsn(url->s + url_host_pos, url->l - url_host_pos, &ad->host); + } + } + else { + // Append "host/bucket" to url + kputsn(ad->host.s, ad->host.l, url); + url_path_pos = url->l; + kputc('/', url); + kputsn(bucket, bucket_len, url); + } + + kputs(escaped == NULL ? path : escaped, url); + + if (sigver == 4 || !dns_compliant) { + ad->bucket = malloc(url->l - url_path_pos + 1); + if (ad->bucket == NULL) { + goto error; + } + memcpy(ad->bucket, url->s + url_path_pos, url->l - url_path_pos + 1); + } + else { + ad->bucket = malloc(url->l - url_path_pos + bucket_len + 2); + if (ad->bucket == NULL) { + goto error; + } + ad->bucket[0] = '/'; + memcpy(ad->bucket + 1, bucket, bucket_len); + memcpy(ad->bucket + bucket_len + 1, + url->s + url_path_pos, url->l - url_path_pos + 1); + } + + // write any query strings to its own place to use later + if ((query_start = strchr(ad->bucket, '?'))) { + kputs(query_start + 1, &ad->user_query_string); + *query_start = 0; + } + + free(escaped); + + return ad; + + error: + free(escaped); + free_auth_data(ad); + return NULL; +} + +static hFILE * s3_rewrite(const char *s3url, const char *mode, va_list *argsp) +{ + kstring_t url = { 0, 0, NULL }; + s3_auth_data *ad = setup_auth_data(s3url, mode, 2, &url); + + if (!ad) + return NULL; + + hFILE *fp = hopen(url.s, mode, "va_list", argsp, + "httphdr_callback", auth_header_callback, + "httphdr_callback_data", ad, + "redirect_callback", redirect_endpoint_callback, + "redirect_callback_data", ad, + NULL); + if (!fp) goto fail; + + free(url.s); + return fp; + + fail: + free(url.s); + free_auth_data(ad); + return NULL; +} + +/*************************************************************** + +AWS S3 sig version 4 writing code + +****************************************************************/ + +static void hash_string(char *in, size_t length, char *out, size_t out_len) { + unsigned char hashed[SHA256_DIGEST_BUFSIZE]; + int i, j; + + s3_sha256((const unsigned char *)in, length, hashed); + + for (i = 0, j = 0; i < SHA256_DIGEST_BUFSIZE; i++, j+= 2) { + snprintf(out + j, out_len - j, "%02x", hashed[i]); + } +} + +static void ksinit(kstring_t *s) { + s->l = 0; + s->m = 0; + s->s = NULL; +} + + +static void ksfree(kstring_t *s) { + free(s->s); + ksinit(s); +} + + +static int make_signature(s3_auth_data *ad, kstring_t *string_to_sign, char *signature_string, size_t sig_string_len) { + unsigned char date_key[SHA256_DIGEST_BUFSIZE]; + unsigned char date_region_key[SHA256_DIGEST_BUFSIZE]; + unsigned char date_region_service_key[SHA256_DIGEST_BUFSIZE]; + unsigned char signing_key[SHA256_DIGEST_BUFSIZE]; + unsigned char signature[SHA256_DIGEST_BUFSIZE]; + + const unsigned char service[] = "s3"; + const unsigned char request[] = "aws4_request"; + + kstring_t secret_access_key = KS_INITIALIZE; + unsigned int len; + unsigned int i, j; + + ksprintf(&secret_access_key, "AWS4%s", ad->secret.s); + + if (secret_access_key.l == 0) { + return -1; + } + + s3_sign_sha256(secret_access_key.s, secret_access_key.l, (const unsigned char *)ad->date_short, strlen(ad->date_short), date_key, &len); + s3_sign_sha256(date_key, len, (const unsigned char *)ad->region.s, ad->region.l, date_region_key, &len); + s3_sign_sha256(date_region_key, len, service, 2, date_region_service_key, &len); + s3_sign_sha256(date_region_service_key, len, request, 12, signing_key, &len); + s3_sign_sha256(signing_key, len, (const unsigned char *)string_to_sign->s, string_to_sign->l, signature, &len); + + for (i = 0, j = 0; i < len; i++, j+= 2) { + snprintf(signature_string + j, sig_string_len - j, "%02x", signature[i]); + } + + ksfree(&secret_access_key); + + return 0; +} + + +static int make_authorisation(s3_auth_data *ad, char *http_request, char *content, kstring_t *auth) { + kstring_t signed_headers = KS_INITIALIZE; + kstring_t canonical_headers = KS_INITIALIZE; + kstring_t canonical_request = KS_INITIALIZE; + kstring_t scope = KS_INITIALIZE; + kstring_t string_to_sign = KS_INITIALIZE; + char cr_hash[HASH_LENGTH_SHA256]; + char signature_string[HASH_LENGTH_SHA256]; + int ret = -1; + + + if (!ad->token.l) { + kputs("host;x-amz-content-sha256;x-amz-date", &signed_headers); + } else { + kputs("host;x-amz-content-sha256;x-amz-date;x-amz-security-token", &signed_headers); + } + + if (signed_headers.l == 0) { + return -1; + } + + + if (!ad->token.l) { + ksprintf(&canonical_headers, "host:%s\nx-amz-content-sha256:%s\nx-amz-date:%s\n", + ad->host.s, content, ad->date_long); + } else { + ksprintf(&canonical_headers, "host:%s\nx-amz-content-sha256:%s\nx-amz-date:%s\nx-amz-security-token:%s\n", + ad->host.s, content, ad->date_long, ad->token.s); + } + + if (canonical_headers.l == 0) { + goto cleanup; + } + + // bucket == canonical_uri + ksprintf(&canonical_request, "%s\n%s\n%s\n%s\n%s\n%s", + http_request, ad->bucket, ad->canonical_query_string.s, + canonical_headers.s, signed_headers.s, content); + + if (canonical_request.l == 0) { + goto cleanup; + } + + hash_string(canonical_request.s, canonical_request.l, cr_hash, sizeof(cr_hash)); + + ksprintf(&scope, "%s/%s/s3/aws4_request", ad->date_short, ad->region.s); + + if (scope.l == 0) { + goto cleanup; + } + + ksprintf(&string_to_sign, "AWS4-HMAC-SHA256\n%s\n%s\n%s", ad->date_long, scope.s, cr_hash); + + if (string_to_sign.l == 0) { + goto cleanup; + } + + if (make_signature(ad, &string_to_sign, signature_string, sizeof(signature_string))) { + goto cleanup; + } + + ksprintf(auth, "Authorization: AWS4-HMAC-SHA256 Credential=%s/%s/%s/s3/aws4_request,SignedHeaders=%s,Signature=%s", + ad->id.s, ad->date_short, ad->region.s, signed_headers.s, signature_string); + + if (auth->l == 0) { + goto cleanup; + } + + ret = 0; + + cleanup: + ksfree(&signed_headers); + ksfree(&canonical_headers); + ksfree(&canonical_request); + ksfree(&scope); + ksfree(&string_to_sign); + + return ret; +} + + +static int update_time(s3_auth_data *ad, time_t now) { + int ret = -1; +#ifdef HAVE_GMTIME_R + struct tm tm_buffer; + struct tm *tm = gmtime_r(&now, &tm_buffer); +#else + struct tm *tm = gmtime(&now); +#endif + + if (now - ad->auth_time > AUTH_LIFETIME) { + // update timestamp + ad->auth_time = now; + + if (strftime(ad->date_long, 17, "%Y%m%dT%H%M%SZ", tm) != 16) { + return -1; + } + + if (strftime(ad->date_short, 9, "%Y%m%d", tm) != 8) { + return -1;; + } + + ad->date_html.l = 0; + ksprintf(&ad->date_html, "x-amz-date: %s", ad->date_long); + } + + if (ad->date_html.l) ret = 0; + + return ret; +} + + +static int query_cmp(const void *p1, const void *p2) { + char **q1 = (char **)p1; + char **q2 = (char **)p2; + + return strcmp(*q1, *q2); +} + + +/* Query strings must be in alphabetical order for authorisation */ + +static int order_query_string(kstring_t *qs) { + int *query_offset = NULL; + int num_queries, i; + char **queries = NULL; + kstring_t ordered = KS_INITIALIZE; + char *escaped = NULL; + int ret = -1; + + if ((query_offset = ksplit(qs, '&', &num_queries)) == NULL) { + return -1; + } + + if ((queries = malloc(num_queries * sizeof(char*))) == NULL) + goto err; + + for (i = 0; i < num_queries; i++) { + queries[i] = qs->s + query_offset[i]; + } + + qsort(queries, num_queries, sizeof(char *), query_cmp); + + for (i = 0; i < num_queries; i++) { + if (i) { + kputs("&", &ordered); + } + + kputs(queries[i], &ordered); + } + + if ((escaped = escape_query(ordered.s)) == NULL) + goto err; + + qs->l = 0; + kputs(escaped, qs); + + ret = 0; + err: + free(ordered.s); + free(queries); + free(query_offset); + free(escaped); + + return ret; +} + + +static int write_authorisation_callback(void *auth, char *request, kstring_t *content, char *cqs, + kstring_t *hash, kstring_t *auth_str, kstring_t *date, + kstring_t *token, int uqs) { + s3_auth_data *ad = (s3_auth_data *)auth; + char content_hash[HASH_LENGTH_SHA256]; + time_t now; + + if (request == NULL) { + // signal to free auth data + free_auth_data(ad); + return 0; + } + + now = time(NULL); + + if (update_time(ad, now)) { + return -1; + } + if (ad->creds_expiry_time > 0 + && ad->creds_expiry_time - now < CREDENTIAL_LIFETIME) { + refresh_auth_data(ad); + } + + if (content) { + hash_string(content->s, content->l, content_hash, sizeof(content_hash)); + } else { + // empty hash + hash_string("", 0, content_hash, sizeof(content_hash)); + } + + ad->canonical_query_string.l = 0; + kputs(cqs, &ad->canonical_query_string); + + if (ad->canonical_query_string.l == 0) { + return -1; + } + + /* add a user provided query string, normally only useful on upload initiation */ + if (uqs) { + kputs("&", &ad->canonical_query_string); + kputs(ad->user_query_string.s, &ad->canonical_query_string); + + if (order_query_string(&ad->canonical_query_string)) { + return -1; + } + } + + if (make_authorisation(ad, request, content_hash, auth_str)) { + return -1; + } + + kputs(ad->date_html.s, date); + kputsn(content_hash, HASH_LENGTH_SHA256, hash); + + if (date->l == 0 || hash->l == 0) { + return -1; + } + + if (ad->token.l) { + ksprintf(token, "x-amz-security-token: %s", ad->token.s); + } + + return 0; +} + + +static int v4_auth_header_callback(void *ctx, char ***hdrs) { + s3_auth_data *ad = (s3_auth_data *) ctx; + char content_hash[HASH_LENGTH_SHA256]; + kstring_t content = KS_INITIALIZE; + kstring_t authorisation = KS_INITIALIZE; + kstring_t token_hdr = KS_INITIALIZE; + char *date_html = NULL; + time_t now; + int idx; + + if (!hdrs) { // Closing connection + free_auth_data(ad); + return 0; + } + + now = time(NULL); + + if (update_time(ad, now)) { + return -1; + } + + if (ad->creds_expiry_time > 0 + && ad->creds_expiry_time - now < CREDENTIAL_LIFETIME) { + refresh_auth_data(ad); + } + + if (!ad->id.l || !ad->secret.l) { + return copy_auth_headers(ad, hdrs); + } + + hash_string("", 0, content_hash, sizeof(content_hash)); // empty hash + + ad->canonical_query_string.l = 0; + + if (ad->user_query_string.l > 0) { + kputs(ad->user_query_string.s, &ad->canonical_query_string); + + if (order_query_string(&ad->canonical_query_string)) { + return -1; + } + } else { + kputs("", &ad->canonical_query_string); + } + + if (make_authorisation(ad, "GET", content_hash, &authorisation)) { + return -1; + } + + ksprintf(&content, "x-amz-content-sha256: %s", content_hash); + date_html = strdup(ad->date_html.s); + + if (ad->token.l > 0) { + kputs("X-Amz-Security-Token: ", &token_hdr); + kputs(ad->token.s, &token_hdr); + } + + if (content.l == 0 || date_html == NULL) { + ksfree(&authorisation); + ksfree(&content); + ksfree(&token_hdr); + free(date_html); + return -1; + } + + *hdrs = &ad->headers[0]; + idx = 0; + ad->headers[idx++] = ks_release(&authorisation); + ad->headers[idx++] = date_html; + ad->headers[idx++] = ks_release(&content); + if (token_hdr.s) + ad->headers[idx++] = ks_release(&token_hdr); + ad->headers[idx++] = NULL; + + return 0; +} + +static int handle_400_response(hFILE *fp, s3_auth_data *ad) { + // v4 signatures in virtual hosted mode return 400 Bad Request if the + // wrong region is used to make the signature. The response is an xml + // document which includes the name of the correct region. This can + // be extracted and used to generate a corrected signature. + // As the xml is fairly simple, go with something "good enough" instead + // of trying to parse it properly. + + char buffer[1024], *region, *reg_end; + ssize_t bytes; + + bytes = hread(fp, buffer, sizeof(buffer) - 1); + if (bytes < 0) { + return -1; + } + buffer[bytes] = '\0'; + region = strstr(buffer, ""); + if (region == NULL) { + return -1; + } + region += 8; + while (isspace((unsigned char) *region)) ++region; + reg_end = strchr(region, '<'); + if (reg_end == NULL || strncmp(reg_end + 1, "/Region>", 8) != 0) { + return -1; + } + while (reg_end > region && isspace((unsigned char) reg_end[-1])) --reg_end; + ad->region.l = 0; + kputsn(region, reg_end - region, &ad->region); + if (ad->region.l == 0) { + return -1; + } + + return 0; +} + +static int set_region(void *adv, kstring_t *region) { + s3_auth_data *ad = (s3_auth_data *) adv; + + ad->region.l = 0; + return kputsn(region->s, region->l, &ad->region) < 0; +} + +static int http_status_errno(int status) +{ + if (status >= 500) + switch (status) { + case 501: return ENOSYS; + case 503: return EBUSY; + case 504: return ETIMEDOUT; + default: return EIO; + } + else if (status >= 400) + switch (status) { + case 401: return EPERM; + case 403: return EACCES; + case 404: return ENOENT; + case 405: return EROFS; + case 407: return EPERM; + case 408: return ETIMEDOUT; + case 410: return ENOENT; + default: return EINVAL; + } + else return 0; +} + +static hFILE *s3_open_v4(const char *s3url, const char *mode, va_list *argsp) { + kstring_t url = { 0, 0, NULL }; + + s3_auth_data *ad = setup_auth_data(s3url, mode, 4, &url); + hFILE *fp = NULL; + + if (ad == NULL) { + return NULL; + } + + if (ad->mode == 'r') { + long http_response = 0; + + fp = hopen(url.s, mode, "va_list", argsp, + "httphdr_callback", v4_auth_header_callback, + "httphdr_callback_data", ad, + "redirect_callback", redirect_endpoint_callback, + "redirect_callback_data", ad, + "http_response_ptr", &http_response, + "fail_on_error", 0, + NULL); + + if (fp == NULL) goto error; + + if (http_response == 307) { + // Follow additional redirect. + ad->refcount = 1; + hclose_abruptly(fp); + + url.l = 0; + ksprintf(&url, "https://%s%s", ad->host.s, ad->bucket); + + fp = hopen(url.s, mode, "va_list", argsp, + "httphdr_callback", v4_auth_header_callback, + "httphdr_callback_data", ad, + "redirect_callback", redirect_endpoint_callback, + "redirect_callback_data", ad, + "http_response_ptr", &http_response, + "fail_on_error", 0, + NULL); + } + + if (http_response == 400) { + ad->refcount = 1; + if (handle_400_response(fp, ad) != 0) { + goto error; + } + hclose_abruptly(fp); + fp = hopen(url.s, mode, "va_list", argsp, + "httphdr_callback", v4_auth_header_callback, + "httphdr_callback_data", ad, + "redirect_callback", redirect_endpoint_callback, + "redirect_callback_data", ad, + NULL); + } else if (http_response > 400) { + ad->refcount = 1; + errno = http_status_errno(http_response); + goto error; + } + + if (fp == NULL) goto error; + } else { + kstring_t final_url = KS_INITIALIZE; + + // add the scheme marker + ksprintf(&final_url, "s3w+%s", url.s); + + if(final_url.l == 0) goto error; + + fp = hopen(final_url.s, mode, "va_list", argsp, + "s3_auth_callback", write_authorisation_callback, + "s3_auth_callback_data", ad, + "redirect_callback", redirect_endpoint_callback, + "set_region_callback", set_region, + NULL); + free(final_url.s); + + if (fp == NULL) goto error; + } + + free(url.s); + + return fp; + + error: + + if (fp) hclose_abruptly(fp); + free(url.s); + free_auth_data(ad); + + return NULL; +} + + +static hFILE *s3_open(const char *url, const char *mode) +{ + hFILE *fp; + + kstring_t mode_colon = { 0, 0, NULL }; + kputs(mode, &mode_colon); + kputc(':', &mode_colon); + + if (getenv("HTS_S3_V2") == NULL) { // Force the v2 signature code + fp = s3_open_v4(url, mode_colon.s, NULL); + } else { + fp = s3_rewrite(url, mode_colon.s, NULL); + } + + free(mode_colon.s); + + return fp; +} + +static hFILE *s3_vopen(const char *url, const char *mode_colon, va_list args0) +{ + hFILE *fp; + // Need to use va_copy() as we can only take the address of an actual + // va_list object, not that of a parameter whose type may have decayed. + va_list args; + va_copy(args, args0); + + if (getenv("HTS_S3_V2") == NULL) { // Force the v2 signature code + fp = s3_open_v4(url, mode_colon, &args); + } else { + fp = s3_rewrite(url, mode_colon, &args); + } + + va_end(args); + return fp; +} + +int PLUGIN_GLOBAL(hfile_plugin_init,_s3)(struct hFILE_plugin *self) +{ + static const struct hFILE_scheme_handler handler = + { s3_open, hfile_always_remote, "Amazon S3", 2000 + 50, s3_vopen + }; + +#ifdef ENABLE_PLUGINS + // Embed version string for examination via strings(1) or what(1) + static const char id[] = "@(#)hfile_s3 plugin (htslib)\t" HTS_VERSION_TEXT; + if (hts_verbose >= 9) + fprintf(stderr, "[M::hfile_s3.init] version %s\n", strchr(id, '\t')+1); +#endif + + self->name = "Amazon S3"; + hfile_add_scheme_handler("s3", &handler); + hfile_add_scheme_handler("s3+http", &handler); + hfile_add_scheme_handler("s3+https", &handler); + return 0; +} diff --git a/src/htslib-1.21/hfile_s3_write.c b/src/htslib-1.21/hfile_s3_write.c new file mode 100644 index 0000000..a501645 --- /dev/null +++ b/src/htslib-1.21/hfile_s3_write.c @@ -0,0 +1,896 @@ +/* + hfile_s3_write.c - Code to handle multipart uploading to S3. + + Copyright (C) 2019 Genome Research Ltd. + + Author: Andrew Whitwham + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + + +S3 Multipart Upload +------------------- + +There are several steps in the Mulitipart upload. + + +1) Initiate Upload +------------------ + +Initiate the upload and get an upload ID. This ID is used in all other steps. + + +2) Upload Part +-------------- + +Upload a part of the data. 5Mb minimum part size (except for the last part). +Each part is numbered and a successful upload returns an Etag header value that +needs to used for the completion step. + +Step repeated till all data is uploaded. + + +3) Completion +------------- + +Complete the upload by sending all the part numbers along with their associated +Etag values. + + +Optional - Abort +---------------- + +If something goes wrong this instructs the server to delete all the partial +uploads and abandon the upload process. + + +Andrew Whitwham, January 2019 +*/ + +#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h +#include + +#include +#include +#include +#ifdef __MSYS__ +#include +#endif +#include +#include + +#include "hfile_internal.h" +#ifdef ENABLE_PLUGINS +#include "version.h" +#endif +#include "htslib/hts.h" +#include "htslib/kstring.h" +#include "htslib/khash.h" + +#include + +#define MINIMUM_S3_WRITE_SIZE 5242880 +#define S3_MOVED_PERMANENTLY 301 +#define S3_BAD_REQUEST 400 + +// Lets the part memory size grow to about 1Gb giving a 2.5Tb max file size. +// Max. parts allowed by AWS is 10000, so use ceil(10000.0/9.0) +#define EXPAND_ON 1112 + +static struct { + kstring_t useragent; + CURLSH *share; + pthread_mutex_t share_lock; +} curl = { { 0, 0, NULL }, NULL, PTHREAD_MUTEX_INITIALIZER }; + +static void share_lock(CURL *handle, curl_lock_data data, + curl_lock_access access, void *userptr) { + pthread_mutex_lock(&curl.share_lock); +} + +static void share_unlock(CURL *handle, curl_lock_data data, void *userptr) { + pthread_mutex_unlock(&curl.share_lock); +} + +typedef int (*s3_auth_callback) (void *auth_data, char *, kstring_t*, char*, kstring_t*, kstring_t*, kstring_t*, kstring_t*, int); + +typedef int (*set_region_callback) (void *auth_data, kstring_t *region); + +typedef struct { + s3_auth_callback callback; + redirect_callback redirect_callback; + set_region_callback set_region_callback; + void *callback_data; +} s3_authorisation; + +typedef struct { + hFILE base; + CURL *curl; + CURLcode ret; + s3_authorisation *au; + kstring_t buffer; + kstring_t url; + kstring_t upload_id; + kstring_t completion_message; + int part_no; + int aborted; + size_t index; + long verbose; + int part_size; + int expand; +} hFILE_s3_write; + + +static void ksinit(kstring_t *s) { + s->l = 0; + s->m = 0; + s->s = NULL; +} + + +static void ksfree(kstring_t *s) { + free(s->s); + ksinit(s); +} + + +static size_t response_callback(void *contents, size_t size, size_t nmemb, void *userp) { + size_t realsize = size * nmemb; + kstring_t *resp = (kstring_t *)userp; + + if (kputsn((const char *)contents, realsize, resp) == EOF) { + return 0; + } + + return realsize; +} + + +static int get_entry(char *in, char *start_tag, char *end_tag, kstring_t *out) { + char *start; + char *end; + + if (!in) { + return EOF; + } + + start = strstr(in, start_tag); + if (!start) return EOF; + + start += strlen(start_tag); + end = strstr(start, end_tag); + + if (!end) return EOF; + + return kputsn(start, end - start, out); +} + + +static void cleanup_local(hFILE_s3_write *fp) { + ksfree(&fp->buffer); + ksfree(&fp->url); + ksfree(&fp->upload_id); + ksfree(&fp->completion_message); + curl_easy_cleanup(fp->curl); + free(fp->au); + +} + + +static void cleanup(hFILE_s3_write *fp) { + // free up authorisation data + fp->au->callback(fp->au->callback_data, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0); + cleanup_local(fp); +} + + +static struct curl_slist *set_html_headers(hFILE_s3_write *fp, kstring_t *auth, kstring_t *date, kstring_t *content, kstring_t *token) { + struct curl_slist *headers = NULL; + + headers = curl_slist_append(headers, "Content-Type:"); // get rid of this + headers = curl_slist_append(headers, "Expect:"); // and this + headers = curl_slist_append(headers, auth->s); + headers = curl_slist_append(headers, date->s); + headers = curl_slist_append(headers, content->s); + + if (token->l) { + headers = curl_slist_append(headers, token->s); + } + + curl_easy_setopt(fp->curl, CURLOPT_HTTPHEADER, headers); + + return headers; +} + + +/* + The partially uploaded file will hang around unless the delete command is sent. +*/ +static int abort_upload(hFILE_s3_write *fp) { + kstring_t content_hash = {0, 0, NULL}; + kstring_t authorisation = {0, 0, NULL}; + kstring_t url = {0, 0, NULL}; + kstring_t content = {0, 0, NULL}; + kstring_t canonical_query_string = {0, 0, NULL}; + kstring_t date = {0, 0, NULL}; + kstring_t token = {0, 0, NULL}; + int ret = -1; + struct curl_slist *headers = NULL; + char http_request[] = "DELETE"; + + if (ksprintf(&canonical_query_string, "uploadId=%s", fp->upload_id.s) < 0) { + goto out; + } + + if (fp->au->callback(fp->au->callback_data, http_request, NULL, + canonical_query_string.s, &content_hash, + &authorisation, &date, &token, 0) != 0) { + goto out; + } + + if (ksprintf(&url, "%s?%s", fp->url.s, canonical_query_string.s) < 0) { + goto out; + } + + if (ksprintf(&content, "x-amz-content-sha256: %s", content_hash.s) < 0) { + goto out; + } + + curl_easy_reset(fp->curl); + curl_easy_setopt(fp->curl, CURLOPT_CUSTOMREQUEST, http_request); + curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s); + curl_easy_setopt(fp->curl, CURLOPT_URL, url.s); + + curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose); + + headers = set_html_headers(fp, &authorisation, &date, &content, &token); + fp->ret = curl_easy_perform(fp->curl); + + if (fp->ret == CURLE_OK) { + ret = 0; + } + + out: + ksfree(&authorisation); + ksfree(&content); + ksfree(&content_hash); + ksfree(&url); + ksfree(&date); + ksfree(&canonical_query_string); + ksfree(&token); + curl_slist_free_all(headers); + + fp->aborted = 1; + cleanup(fp); + + return ret; +} + + +static int complete_upload(hFILE_s3_write *fp, kstring_t *resp) { + kstring_t content_hash = {0, 0, NULL}; + kstring_t authorisation = {0, 0, NULL}; + kstring_t url = {0, 0, NULL}; + kstring_t content = {0, 0, NULL}; + kstring_t canonical_query_string = {0, 0, NULL}; + kstring_t date = {0, 0, NULL}; + kstring_t token = {0, 0, NULL}; + int ret = -1; + struct curl_slist *headers = NULL; + char http_request[] = "POST"; + + if (ksprintf(&canonical_query_string, "uploadId=%s", fp->upload_id.s) < 0) { + return -1; + } + + // finish off the completion reply + if (kputs("\n", &fp->completion_message) < 0) { + goto out; + } + + if (fp->au->callback(fp->au->callback_data, http_request, + &fp->completion_message, canonical_query_string.s, + &content_hash, &authorisation, &date, &token, 0) != 0) { + goto out; + } + + if (ksprintf(&url, "%s?%s", fp->url.s, canonical_query_string.s) < 0) { + goto out; + } + + if (ksprintf(&content, "x-amz-content-sha256: %s", content_hash.s) < 0) { + goto out; + } + + curl_easy_reset(fp->curl); + curl_easy_setopt(fp->curl, CURLOPT_POST, 1L); + curl_easy_setopt(fp->curl, CURLOPT_POSTFIELDS, fp->completion_message.s); + curl_easy_setopt(fp->curl, CURLOPT_POSTFIELDSIZE, (long) fp->completion_message.l); + curl_easy_setopt(fp->curl, CURLOPT_WRITEFUNCTION, response_callback); + curl_easy_setopt(fp->curl, CURLOPT_WRITEDATA, (void *)resp); + curl_easy_setopt(fp->curl, CURLOPT_URL, url.s); + curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s); + + curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose); + + headers = set_html_headers(fp, &authorisation, &date, &content, &token); + fp->ret = curl_easy_perform(fp->curl); + + if (fp->ret == CURLE_OK) { + ret = 0; + } + + out: + ksfree(&authorisation); + ksfree(&content); + ksfree(&content_hash); + ksfree(&url); + ksfree(&date); + ksfree(&token); + ksfree(&canonical_query_string); + curl_slist_free_all(headers); + + return ret; +} + + +static size_t upload_callback(void *ptr, size_t size, size_t nmemb, void *stream) { + size_t realsize = size * nmemb; + hFILE_s3_write *fp = (hFILE_s3_write *)stream; + size_t read_length; + + if (realsize > (fp->buffer.l - fp->index)) { + read_length = fp->buffer.l - fp->index; + } else { + read_length = realsize; + } + + memcpy(ptr, fp->buffer.s + fp->index, read_length); + fp->index += read_length; + + return read_length; +} + + +static int upload_part(hFILE_s3_write *fp, kstring_t *resp) { + kstring_t content_hash = {0, 0, NULL}; + kstring_t authorisation = {0, 0, NULL}; + kstring_t url = {0, 0, NULL}; + kstring_t content = {0, 0, NULL}; + kstring_t canonical_query_string = {0, 0, NULL}; + kstring_t date = {0, 0, NULL}; + kstring_t token = {0, 0, NULL}; + int ret = -1; + struct curl_slist *headers = NULL; + char http_request[] = "PUT"; + + if (ksprintf(&canonical_query_string, "partNumber=%d&uploadId=%s", fp->part_no, fp->upload_id.s) < 0) { + return -1; + } + + if (fp->au->callback(fp->au->callback_data, http_request, &fp->buffer, + canonical_query_string.s, &content_hash, + &authorisation, &date, &token, 0) != 0) { + goto out; + } + + if (ksprintf(&url, "%s?%s", fp->url.s, canonical_query_string.s) < 0) { + goto out; + } + + fp->index = 0; + if (ksprintf(&content, "x-amz-content-sha256: %s", content_hash.s) < 0) { + goto out; + } + + curl_easy_reset(fp->curl); + + curl_easy_setopt(fp->curl, CURLOPT_UPLOAD, 1L); + curl_easy_setopt(fp->curl, CURLOPT_READFUNCTION, upload_callback); + curl_easy_setopt(fp->curl, CURLOPT_READDATA, fp); + curl_easy_setopt(fp->curl, CURLOPT_INFILESIZE_LARGE, (curl_off_t)fp->buffer.l); + curl_easy_setopt(fp->curl, CURLOPT_HEADERFUNCTION, response_callback); + curl_easy_setopt(fp->curl, CURLOPT_HEADERDATA, (void *)resp); + curl_easy_setopt(fp->curl, CURLOPT_URL, url.s); + curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s); + + curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose); + + headers = set_html_headers(fp, &authorisation, &date, &content, &token); + fp->ret = curl_easy_perform(fp->curl); + + if (fp->ret == CURLE_OK) { + ret = 0; + } + + out: + ksfree(&authorisation); + ksfree(&content); + ksfree(&content_hash); + ksfree(&url); + ksfree(&date); + ksfree(&token); + ksfree(&canonical_query_string); + curl_slist_free_all(headers); + + return ret; +} + + +static ssize_t s3_write(hFILE *fpv, const void *bufferv, size_t nbytes) { + hFILE_s3_write *fp = (hFILE_s3_write *)fpv; + const char *buffer = (const char *)bufferv; + + if (kputsn(buffer, nbytes, &fp->buffer) == EOF) { + return -1; + } + + if (fp->buffer.l > fp->part_size) { + // time to write out our data + kstring_t response = {0, 0, NULL}; + int ret; + + ret = upload_part(fp, &response); + + if (!ret) { + long response_code; + kstring_t etag = {0, 0, NULL}; + + curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code); + + if (response_code > 200) { + ret = -1; + } else { + if (get_entry(response.s, "ETag: \"", "\"", &etag) == EOF) { + ret = -1; + } else { + ksprintf(&fp->completion_message, "\t\n\t\t%d\n\t\t%s\n\t\n", + fp->part_no, etag.s); + + ksfree(&etag); + } + } + } + + ksfree(&response); + + if (ret) { + abort_upload(fp); + return -1; + } + + fp->part_no++; + fp->buffer.l = 0; + + if (fp->expand && (fp->part_no % EXPAND_ON == 0)) { + fp->part_size *= 2; + } + } + + return nbytes; +} + + +static int s3_close(hFILE *fpv) { + hFILE_s3_write *fp = (hFILE_s3_write *)fpv; + kstring_t response = {0, 0, NULL}; + int ret = 0; + + if (!fp->aborted) { + + if (fp->buffer.l) { + // write the last part + + ret = upload_part(fp, &response); + + if (!ret) { + long response_code; + kstring_t etag = {0, 0, NULL}; + + curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code); + + if (response_code > 200) { + ret = -1; + } else { + if (get_entry(response.s, "ETag: \"", "\"", &etag) == EOF) { + ret = -1; + } else { + ksprintf(&fp->completion_message, "\t\n\t\t%d\n\t\t%s\n\t\n", + fp->part_no, etag.s); + + ksfree(&etag); + } + } + } + + ksfree(&response); + + if (ret) { + abort_upload(fp); + return -1; + } + + fp->part_no++; + } + + if (fp->part_no > 1) { + ret = complete_upload(fp, &response); + + if (!ret) { + if (strstr(response.s, "CompleteMultipartUploadResult") == NULL) { + ret = -1; + } + } + } else { + ret = -1; + } + + if (ret) { + abort_upload(fp); + } else { + cleanup(fp); + } + } + + ksfree(&response); + + return ret; +} + + +static int redirect_endpoint(hFILE_s3_write *fp, kstring_t *head) { + int ret = -1; + + if (fp->au->redirect_callback) { + ret = fp->au->redirect_callback(fp->au->callback_data, 301, head, &fp->url); + } + + return ret; +} + +static int handle_bad_request(hFILE_s3_write *fp, kstring_t *resp) { + kstring_t region = {0, 0, NULL}; + int ret = -1; + + if (fp->au->set_region_callback) { + if (get_entry(resp->s, "", "", ®ion) == EOF) { + return -1; + } + + ret = fp->au->set_region_callback(fp->au->callback_data, ®ion); + + ksfree(®ion); + } + + return ret; +} + +static int initialise_upload(hFILE_s3_write *fp, kstring_t *head, kstring_t *resp, int user_query) { + kstring_t content_hash = {0, 0, NULL}; + kstring_t authorisation = {0, 0, NULL}; + kstring_t url = {0, 0, NULL}; + kstring_t content = {0, 0, NULL}; + kstring_t date = {0, 0, NULL}; + kstring_t token = {0, 0, NULL}; + int ret = -1; + struct curl_slist *headers = NULL; + char http_request[] = "POST"; + char delimiter = '?'; + + if (user_query) { + delimiter = '&'; + } + + if (fp->au->callback(fp->au->callback_data, http_request, NULL, "uploads=", + &content_hash, &authorisation, &date, &token, user_query) != 0) { + goto out; + } + + if (ksprintf(&url, "%s%cuploads", fp->url.s, delimiter) < 0) { + goto out; + } + + if (ksprintf(&content, "x-amz-content-sha256: %s", content_hash.s) < 0) { + goto out; + } + + curl_easy_setopt(fp->curl, CURLOPT_URL, url.s); + curl_easy_setopt(fp->curl, CURLOPT_POST, 1L); + curl_easy_setopt(fp->curl, CURLOPT_POSTFIELDS, ""); // send no data + curl_easy_setopt(fp->curl, CURLOPT_WRITEFUNCTION, response_callback); + curl_easy_setopt(fp->curl, CURLOPT_WRITEDATA, (void *)resp); + curl_easy_setopt(fp->curl, CURLOPT_HEADERFUNCTION, response_callback); + curl_easy_setopt(fp->curl, CURLOPT_HEADERDATA, (void *)head); + curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s); + + curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose); + + headers = set_html_headers(fp, &authorisation, &date, &content, &token); + fp->ret = curl_easy_perform(fp->curl); + + if (fp->ret == CURLE_OK) { + ret = 0; + } + + out: + ksfree(&authorisation); + ksfree(&content); + ksfree(&content_hash); + ksfree(&url); + ksfree(&date); + ksfree(&token); + curl_slist_free_all(headers); + + return ret; +} + + +static int get_upload_id(hFILE_s3_write *fp, kstring_t *resp) { + int ret = 0; + + ksinit(&fp->upload_id); + + if (get_entry(resp->s, "", "", &fp->upload_id) == EOF) { + ret = -1; + } + + return ret; +} + + +static const struct hFILE_backend s3_write_backend = { + NULL, s3_write, NULL, NULL, s3_close +}; + + +static hFILE *s3_write_open(const char *url, s3_authorisation *auth) { + hFILE_s3_write *fp; + kstring_t response = {0, 0, NULL}; + kstring_t header = {0, 0, NULL}; + int ret, has_user_query = 0; + char *query_start; + const char *env; + + + if (!auth || !auth->callback || !auth->callback_data) { + return NULL; + } + + fp = (hFILE_s3_write *)hfile_init(sizeof(hFILE_s3_write), "w", 0); + + if (fp == NULL) { + return NULL; + } + + if ((fp->curl = curl_easy_init()) == NULL) { + errno = ENOMEM; + goto error; + } + + if ((fp->au = calloc(1, sizeof(s3_authorisation))) == NULL) { + goto error; + } + + memcpy(fp->au, auth, sizeof(s3_authorisation)); + + ksinit(&fp->buffer); + ksinit(&fp->url); + ksinit(&fp->completion_message); + fp->aborted = 0; + + fp->part_size = MINIMUM_S3_WRITE_SIZE; + fp->expand = 1; + + if ((env = getenv("HTS_S3_PART_SIZE")) != NULL) { + int part_size = atoi(env) * 1024 * 1024; + + if (part_size > fp->part_size) + fp->part_size = part_size; + + fp->expand = 0; + } + + if (hts_verbose >= 8) { + fp->verbose = 1L; + } else { + fp->verbose = 0L; + } + + kputs(url + 4, &fp->url); + + if ((query_start = strchr(fp->url.s, '?'))) { + has_user_query = 1;; + } + + ret = initialise_upload(fp, &header, &response, has_user_query); + + if (ret == 0) { + long response_code; + + curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code); + + if (response_code == S3_MOVED_PERMANENTLY) { + if (redirect_endpoint(fp, &header) == 0) { + ksfree(&response); + ksfree(&header); + + ret = initialise_upload(fp, &header, &response, has_user_query); + } + } else if (response_code == S3_BAD_REQUEST) { + if (handle_bad_request(fp, &response) == 0) { + ksfree(&response); + ksfree(&header); + + ret = initialise_upload(fp, &header, &response, has_user_query); + } + } + + ksfree(&header); // no longer needed + } + + if (ret) goto error; + + if (get_upload_id(fp, &response)) goto error; + + // start the completion message (a formatted list of parts) + ksinit(&fp->completion_message); + + if (kputs("\n", &fp->completion_message) == EOF) { + goto error; + } + + fp->part_no = 1; + + // user query string no longer a useful part of the URL + if (query_start) + *query_start = '\0'; + + fp->base.backend = &s3_write_backend; + ksfree(&response); + + return &fp->base; + +error: + ksfree(&response); + cleanup_local(fp); + hfile_destroy((hFILE *)fp); + return NULL; +} + + +static hFILE *hopen_s3_write(const char *url, const char *mode) { + if (hts_verbose >= 1) { + fprintf(stderr, "[E::%s] s3w:// URLs should not be used directly; use s3:// instead.\n", __func__); + } + return NULL; +} + + +static int parse_va_list(s3_authorisation *auth, va_list args) { + const char *argtype; + + while ((argtype = va_arg(args, const char *)) != NULL) { + if (strcmp(argtype, "s3_auth_callback") == 0) { + auth->callback = va_arg(args, s3_auth_callback); + } else if (strcmp(argtype, "s3_auth_callback_data") == 0) { + auth->callback_data = va_arg(args, void *); + } else if (strcmp(argtype, "redirect_callback") == 0) { + auth->redirect_callback = va_arg(args, redirect_callback); + } else if (strcmp(argtype, "set_region_callback") == 0) { + auth->set_region_callback = va_arg(args, set_region_callback); + } else if (strcmp(argtype, "va_list") == 0) { + va_list *args2 = va_arg(args, va_list *); + + if (args2) { + if (parse_va_list(auth, *args2) < 0) return -1; + } + } else { + errno = EINVAL; + return -1; + } + } + + return 0; +} + + +static hFILE *vhopen_s3_write(const char *url, const char *mode, va_list args) { + hFILE *fp = NULL; + s3_authorisation auth = {NULL, NULL, NULL}; + + if (parse_va_list(&auth, args) == 0) { + fp = s3_write_open(url, &auth); + } + + return fp; +} + + +static void s3_write_exit(void) { + if (curl_share_cleanup(curl.share) == CURLSHE_OK) + curl.share = NULL; + + free(curl.useragent.s); + curl.useragent.l = curl.useragent.m = 0; curl.useragent.s = NULL; + curl_global_cleanup(); +} + + +int PLUGIN_GLOBAL(hfile_plugin_init,_s3_write)(struct hFILE_plugin *self) { + + static const struct hFILE_scheme_handler handler = + { hopen_s3_write, hfile_always_remote, "S3 Multipart Upload", + 2000 + 50, vhopen_s3_write + }; + +#ifdef ENABLE_PLUGINS + // Embed version string for examination via strings(1) or what(1) + static const char id[] = + "@(#)hfile_s3_write plugin (htslib)\t" HTS_VERSION_TEXT; + const char *version = strchr(id, '\t') + 1; + + if (hts_verbose >= 9) + fprintf(stderr, "[M::hfile_s3_write.init] version %s\n", + version); +#else + const char *version = hts_version(); +#endif + + const curl_version_info_data *info; + CURLcode err; + CURLSHcode errsh; + + err = curl_global_init(CURL_GLOBAL_ALL); + + if (err != CURLE_OK) { + // look at putting in an errno here + return -1; + } + + curl.share = curl_share_init(); + + if (curl.share == NULL) { + curl_global_cleanup(); + errno = EIO; + return -1; + } + + errsh = curl_share_setopt(curl.share, CURLSHOPT_LOCKFUNC, share_lock); + errsh |= curl_share_setopt(curl.share, CURLSHOPT_UNLOCKFUNC, share_unlock); + errsh |= curl_share_setopt(curl.share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS); + + if (errsh != 0) { + curl_share_cleanup(curl.share); + curl_global_cleanup(); + errno = EIO; + return -1; + } + + info = curl_version_info(CURLVERSION_NOW); + ksprintf(&curl.useragent, "htslib/%s libcurl/%s", version, info->version); + + self->name = "S3 Multipart Upload"; + self->destroy = s3_write_exit; + + hfile_add_scheme_handler("s3w", &handler); + hfile_add_scheme_handler("s3w+http", &handler); + hfile_add_scheme_handler("s3w+https", &handler); + + return 0; +} diff --git a/src/htslib-1.21/hts.c b/src/htslib-1.21/hts.c new file mode 100644 index 0000000..a8a8bea --- /dev/null +++ b/src/htslib-1.21/hts.c @@ -0,0 +1,5152 @@ +/* hts.c -- format-neutral I/O, indexing, and iterator API functions. + + Copyright (C) 2008, 2009, 2012-2024 Genome Research Ltd. + Copyright (C) 2012, 2013 Broad Institute. + + Author: Heng Li + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_LIBLZMA +#ifdef HAVE_LZMA_H +#include +#else +#include "os/lzma_stub.h" +#endif +#endif + +#include "htslib/hts.h" +#include "htslib/bgzf.h" +#include "cram/cram.h" +#include "htslib/hfile.h" +#include "htslib/hts_endian.h" +#include "version.h" +#include "config_vars.h" +#include "hts_internal.h" +#include "hfile_internal.h" +#include "sam_internal.h" +#include "htslib/hts_expr.h" +#include "htslib/hts_os.h" // drand48 + +#include "htslib/khash.h" +#include "htslib/kseq.h" +#include "htslib/ksort.h" +#include "htslib/tbx.h" +#if defined(HAVE_EXTERNAL_LIBHTSCODECS) +#include +#else +#include "htscodecs/htscodecs/htscodecs.h" +#endif + +#ifndef EFTYPE +#define EFTYPE ENOEXEC +#endif + +KHASH_INIT2(s2i,, kh_cstr_t, int64_t, 1, kh_str_hash_func, kh_str_hash_equal) + +HTSLIB_EXPORT +int hts_verbose = HTS_LOG_WARNING; + +const char *hts_version(void) +{ + return HTS_VERSION_TEXT; +} + +unsigned int hts_features(void) { + unsigned int feat = HTS_FEATURE_HTSCODECS; // Always present + +#ifdef PACKAGE_URL + feat |= HTS_FEATURE_CONFIGURE; +#endif + +#ifdef ENABLE_PLUGINS + feat |= HTS_FEATURE_PLUGINS; +#endif + +#ifdef HAVE_LIBCURL + feat |= HTS_FEATURE_LIBCURL; +#endif + +#ifdef ENABLE_S3 + feat |= HTS_FEATURE_S3; +#endif + +#ifdef ENABLE_GCS + feat |= HTS_FEATURE_GCS; +#endif + +#ifdef HAVE_LIBDEFLATE + feat |= HTS_FEATURE_LIBDEFLATE; +#endif + +#ifdef HAVE_LIBLZMA + feat |= HTS_FEATURE_LZMA; +#endif + +#ifdef HAVE_LIBBZ2 + feat |= HTS_FEATURE_BZIP2; +#endif + + return feat; +} + +const char *hts_test_feature(unsigned int id) { + unsigned int feat = hts_features(); + + switch (id) { + case HTS_FEATURE_CONFIGURE: + return feat & HTS_FEATURE_CONFIGURE ? "yes" : NULL; + case HTS_FEATURE_PLUGINS: + return feat & HTS_FEATURE_PLUGINS ? "yes" : NULL; + case HTS_FEATURE_LIBCURL: + return feat & HTS_FEATURE_LIBCURL ? "yes" : NULL; + case HTS_FEATURE_S3: + return feat & HTS_FEATURE_S3 ? "yes" : NULL; + case HTS_FEATURE_GCS: + return feat & HTS_FEATURE_GCS ? "yes" : NULL; + case HTS_FEATURE_LIBDEFLATE: + return feat & HTS_FEATURE_LIBDEFLATE ? "yes" : NULL; + case HTS_FEATURE_BZIP2: + return feat & HTS_FEATURE_BZIP2 ? "yes" : NULL; + case HTS_FEATURE_LZMA: + return feat & HTS_FEATURE_LZMA ? "yes" : NULL; + + case HTS_FEATURE_HTSCODECS: + return htscodecs_version(); + + case HTS_FEATURE_CC: + return HTS_CC; + case HTS_FEATURE_CFLAGS: + return HTS_CFLAGS; + case HTS_FEATURE_LDFLAGS: + return HTS_LDFLAGS; + case HTS_FEATURE_CPPFLAGS: + return HTS_CPPFLAGS; + + default: + fprintf(stderr, "Unknown feature code: %u\n", id); + } + + return NULL; +} + +// Note this implementation also means we can just "strings" the library +// to find the configuration parameters. +const char *hts_feature_string(void) { + static char config[1200]; + const char *flags= + +#ifdef PACKAGE_URL + "build=configure " +#else + "build=Makefile " +#endif + +#ifdef HAVE_LIBCURL + "libcurl=yes " +#else + "libcurl=no " +#endif + +#ifdef ENABLE_S3 + "S3=yes " +#else + "S3=no " +#endif + +#ifdef ENABLE_GCS + "GCS=yes " +#else + "GCS=no " +#endif + +#ifdef HAVE_LIBDEFLATE + "libdeflate=yes " +#else + "libdeflate=no " +#endif + +#ifdef HAVE_LIBLZMA + "lzma=yes " +#else + "lzma=no " +#endif + +#ifdef HAVE_LIBBZ2 + "bzip2=yes " +#else + "bzip2=no " +#endif + +// "plugins=" must stay at the end as it is followed by "plugin-path=" +#ifdef ENABLE_PLUGINS + "plugins=yes"; +#else + "plugins=no"; +#endif + +#ifdef ENABLE_PLUGINS + snprintf(config, sizeof(config), + "%s plugin-path=%.1000s htscodecs=%.40s", + flags, hts_plugin_path(), htscodecs_version()); +#else + snprintf(config, sizeof(config), + "%s htscodecs=%.40s", + flags, htscodecs_version()); +#endif + return config; +} + + +HTSLIB_EXPORT +const unsigned char seq_nt16_table[256] = { + 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, + 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, + 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, + 1, 2, 4, 8, 15,15,15,15, 15,15,15,15, 15, 0 /*=*/,15,15, + 15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15, + 15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15, + 15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15, + 15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15, + + 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, + 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, + 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, + 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, + 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, + 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, + 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, + 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15 +}; + +HTSLIB_EXPORT +const char seq_nt16_str[] = "=ACMGRSVTWYHKDBN"; + +HTSLIB_EXPORT +const int seq_nt16_int[] = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 }; + +/********************** + *** Basic file I/O *** + **********************/ + +static enum htsFormatCategory format_category(enum htsExactFormat fmt) +{ + switch (fmt) { + case bam: + case sam: + case cram: + case fastq_format: + case fasta_format: + return sequence_data; + + case vcf: + case bcf: + return variant_data; + + case bai: + case crai: + case csi: + case fai_format: + case fqi_format: + case gzi: + case tbi: + return index_file; + + case bed: + case d4_format: + return region_list; + + case htsget: + case hts_crypt4gh_format: + return unknown_category; + + case unknown_format: + case binary_format: + case text_format: + case empty_format: + case format_maximum: + break; + } + + return unknown_category; +} + +// Decompress several hundred bytes by peeking at the file, which must be +// positioned at the start of a GZIP block. +static ssize_t +decompress_peek_gz(hFILE *fp, unsigned char *dest, size_t destsize) +{ + unsigned char buffer[2048]; + z_stream zs; + ssize_t npeek = hpeek(fp, buffer, sizeof buffer); + + if (npeek < 0) return -1; + + zs.zalloc = NULL; + zs.zfree = NULL; + zs.next_in = buffer; + zs.avail_in = npeek; + zs.next_out = dest; + zs.avail_out = destsize; + if (inflateInit2(&zs, 31) != Z_OK) return -1; + + int ret; + const unsigned char *last_in = buffer; + while (zs.avail_out > 0) { + ret = inflate(&zs, Z_SYNC_FLUSH); + if (ret == Z_STREAM_END) { + if (last_in == zs.next_in) + break; // Paranoia to avoid potential looping. Shouldn't happen + else + last_in = zs.next_in; + inflateReset(&zs); + } else if (ret != Z_OK) { + // eg Z_BUF_ERROR due to avail_in/out becoming zero + break; + } + } + + // NB: zs.total_out is changed by inflateReset, so use pointer diff instead + destsize = zs.next_out - dest; + inflateEnd(&zs); + + return destsize; +} + +#ifdef HAVE_LIBLZMA +// Similarly decompress a portion by peeking at the file, which must be +// positioned at the start of the file. +static ssize_t +decompress_peek_xz(hFILE *fp, unsigned char *dest, size_t destsize) +{ + unsigned char buffer[2048]; + ssize_t npeek = hpeek(fp, buffer, sizeof buffer); + if (npeek < 0) return -1; + + lzma_stream ls = LZMA_STREAM_INIT; + if (lzma_stream_decoder(&ls, lzma_easy_decoder_memusage(9), 0) != LZMA_OK) + return -1; + + ls.next_in = buffer; + ls.avail_in = npeek; + ls.next_out = dest; + ls.avail_out = destsize; + + int r = lzma_code(&ls, LZMA_RUN); + if (! (r == LZMA_OK || r == LZMA_STREAM_END)) { + lzma_end(&ls); + return -1; + } + + destsize = ls.total_out; + lzma_end(&ls); + + return destsize; +} +#endif + +// Parse "x.y" text, taking care because the string is not NUL-terminated +// and filling in major/minor only when the digits are followed by a delimiter, +// so we don't misread "1.10" as "1.1" due to reaching the end of the buffer. +static void +parse_version(htsFormat *fmt, const unsigned char *u, const unsigned char *ulim) +{ + const char *s = (const char *) u; + const char *slim = (const char *) ulim; + short v; + + fmt->version.major = fmt->version.minor = -1; + + for (v = 0; s < slim && isdigit_c(*s); s++) + v = 10 * v + *s - '0'; + + if (s < slim) { + fmt->version.major = v; + if (*s == '.') { + s++; + for (v = 0; s < slim && isdigit_c(*s); s++) + v = 10 * v + *s - '0'; + if (s < slim) + fmt->version.minor = v; + } + else + fmt->version.minor = 0; + } +} + +static int +cmp_nonblank(const char *key, const unsigned char *u, const unsigned char *ulim) +{ + const unsigned char *ukey = (const unsigned char *) key; + + while (*ukey) + if (u >= ulim) return +1; + else if (isspace_c(*u)) u++; + else if (*u != *ukey) return (*ukey < *u)? -1 : +1; + else u++, ukey++; + + return 0; +} + +static int is_text_only(const unsigned char *u, const unsigned char *ulim) +{ + for (; u < ulim; u++) + if (! (*u >= ' ' || *u == '\t' || *u == '\r' || *u == '\n')) + return 0; + + return 1; +} + +static inline int +alternate_zeros(const unsigned char *u, const unsigned char *ulim) +{ + for (; u < ulim; u += 2) + if (*u != '\0') return 0; + return 1; +} + +static int is_utf16_text(const unsigned char *u, const unsigned char *ulim) +{ + if (ulim - u >= 6 && + ((u[0] == 0xfe && u[1] == 0xff && alternate_zeros(u+2, ulim)) || + (u[0] == 0xff && u[1] == 0xfe && alternate_zeros(u+3, ulim)))) + return 2; + else if (ulim - u >= 8 && + (alternate_zeros(u, ulim) || alternate_zeros(u+1, ulim))) + return 1; + else + return 0; +} + +static int is_fastaq(const unsigned char *u, const unsigned char *ulim) +{ + const unsigned char *eol = memchr(u, '\n', ulim - u); + + // Check that the first line is entirely textual + if (! is_text_only(u, eol? eol : ulim)) return 0; + + // If the first line is very long, consider the file to indeed be FASTA/Q + if (eol == NULL) return 1; + + u = eol+1; // Now points to the first character of the second line + + // Scan over all base-encoding letters (including 'N' but not SEQ's '=') + while (u < ulim && (seq_nt16_table[*u] != 15 || toupper(*u) == 'N')) { + if (*u == '=') return 0; + u++; + } + + return (u == ulim || *u == '\r' || *u == '\n')? 1 : 0; +} + +// Parse tab-delimited text, filling in a string of column types and returning +// the number of columns spotted (within [u,ulim), and up to column_len) or -1 +// if non-printable characters were seen. Column types: +// i: integer, s: strand sign, C: CIGAR, O: SAM optional field, Z: anything +static int +parse_tabbed_text(char *columns, int column_len, + const unsigned char *u, const unsigned char *ulim, + int *complete) +{ + const char *str = (const char *) u; + const char *slim = (const char *) ulim; + const char *s; + int ncolumns = 0; + + enum { digit = 1, leading_sign = 2, cigar_operator = 4, other = 8 }; + unsigned seen = 0; + *complete = 0; + + for (s = str; s < slim; s++) + if (*s >= ' ') { + if (isdigit_c(*s)) + seen |= digit; + else if ((*s == '+' || *s == '-') && s == str) + seen |= leading_sign; + else if (strchr(BAM_CIGAR_STR, *s) && s > str && isdigit_c(s[-1])) + seen |= cigar_operator; + else + seen |= other; + } + else if (*s == '\t' || *s == '\r' || *s == '\n') { + size_t len = s - str; + char type; + + if (seen == digit || seen == (leading_sign|digit)) type = 'i'; + else if (seen == (digit|cigar_operator)) type = 'C'; + else if (len == 1) + switch (str[0]) { + case '*': type = 'C'; break; + case '+': case '-': case '.': type = 's'; break; + default: type = 'Z'; break; + } + else if (len >= 5 && str[2] == ':' && str[4] == ':') type = 'O'; + else type = 'Z'; + + columns[ncolumns++] = type; + if (*s != '\t' || ncolumns >= column_len - 1) { + *complete = 1; // finished the line or more columns than needed + break; + } + + str = s + 1; + seen = 0; + } + else return -1; + + columns[ncolumns] = '\0'; + return ncolumns; +} + +// Match COLUMNS as a prefix against PATTERN (so COLUMNS may run out first). +// Returns len(COLUMNS) (modulo '+'), or 0 if there is a mismatched entry. +static int colmatch(const char *columns, const char *pattern) +{ + int i; + for (i = 0; columns[i] != '\0'; i++) { + if (pattern[i] == '+') return i; + if (! (columns[i] == pattern[i] || pattern[i] == 'Z')) return 0; + } + + return i; +} + +int hts_detect_format(hFILE *hfile, htsFormat *fmt) +{ + return hts_detect_format2(hfile, NULL, fmt); +} + +int hts_detect_format2(hFILE *hfile, const char *fname, htsFormat *fmt) +{ + char extension[HTS_MAX_EXT_LEN], columns[24]; + unsigned char s[1024]; + int complete = 0; + ssize_t len = hpeek(hfile, s, 18); + if (len < 0) return -1; + + fmt->category = unknown_category; + fmt->format = unknown_format; + fmt->version.major = fmt->version.minor = -1; + fmt->compression = no_compression; + fmt->compression_level = -1; + fmt->specific = NULL; + + if (len >= 2 && s[0] == 0x1f && s[1] == 0x8b) { + // The stream is either gzip-compressed or BGZF-compressed. + // Determine which, and decompress the first few records or lines. + fmt->compression = gzip; + if (len >= 18 && (s[3] & 4)) { + if (memcmp(&s[12], "BC\2\0", 4) == 0) + fmt->compression = bgzf; + else if (memcmp(&s[12], "RAZF", 4) == 0) + fmt->compression = razf_compression; + } + if (len >= 9 && s[2] == 8) + fmt->compression_level = (s[8] == 2)? 9 : (s[8] == 4)? 1 : -1; + + len = decompress_peek_gz(hfile, s, sizeof s); + } + else if (len >= 10 && memcmp(s, "BZh", 3) == 0 && + (memcmp(&s[4], "\x31\x41\x59\x26\x53\x59", 6) == 0 || + memcmp(&s[4], "\x17\x72\x45\x38\x50\x90", 6) == 0)) { + fmt->compression = bzip2_compression; + fmt->compression_level = s[3] - '0'; + // Decompressing via libbz2 produces no output until it has a whole + // block (of size 100Kb x level), which is too large for peeking. + // So unfortunately we can recognise bzip2 but not the contents, + // except that \x1772... magic indicates the stream is empty. + if (s[4] == '\x31') return 0; + else len = 0; + } + else if (len >= 6 && memcmp(s, "\xfd""7zXZ\0", 6) == 0) { + fmt->compression = xz_compression; +#ifdef HAVE_LIBLZMA + len = decompress_peek_xz(hfile, s, sizeof s); +#else + // Without liblzma, we can't recognise the decompressed contents. + return 0; +#endif + } + else if (len >= 4 && memcmp(s, "\x28\xb5\x2f\xfd", 4) == 0) { + fmt->compression = zstd_compression; + return 0; + } + else { + len = hpeek(hfile, s, sizeof s); + } + if (len < 0) return -1; + + if (len == 0) { + fmt->format = empty_format; + return 0; + } + + // We avoid using filename extensions wherever possible (as filenames are + // not always available), but in a few cases they must be considered: + // - FASTA/Q indexes are simply tab-separated text; files that match these + // patterns but not the fai/fqi extension are usually generic BED files + // - GZI indexes have no magic numbers so can only be detected by filename + if (fname && strcmp(fname, "-") != 0) { + char *s; + if (find_file_extension(fname, extension) < 0) extension[0] = '\0'; + for (s = extension; *s; s++) *s = tolower_c(*s); + } + else extension[0] = '\0'; + + if (len >= 6 && memcmp(s,"CRAM",4) == 0 && s[4]>=1 && s[4]<=7 && s[5]<=7) { + fmt->category = sequence_data; + fmt->format = cram; + fmt->version.major = s[4], fmt->version.minor = s[5]; + fmt->compression = custom; + return 0; + } + else if (len >= 4 && s[3] <= '\4') { + if (memcmp(s, "BAM\1", 4) == 0) { + fmt->category = sequence_data; + fmt->format = bam; + // TODO Decompress enough to pick version from @HD-VN header + fmt->version.major = 1, fmt->version.minor = -1; + return 0; + } + else if (memcmp(s, "BAI\1", 4) == 0) { + fmt->category = index_file; + fmt->format = bai; + fmt->version.major = -1, fmt->version.minor = -1; + return 0; + } + else if (memcmp(s, "BCF\4", 4) == 0) { + fmt->category = variant_data; + fmt->format = bcf; + fmt->version.major = 1, fmt->version.minor = -1; + return 0; + } + else if (memcmp(s, "BCF\2", 4) == 0) { + fmt->category = variant_data; + fmt->format = bcf; + fmt->version.major = s[3]; + fmt->version.minor = (len >= 5 && s[4] <= 2)? s[4] : 0; + return 0; + } + else if (memcmp(s, "CSI\1", 4) == 0) { + fmt->category = index_file; + fmt->format = csi; + fmt->version.major = 1, fmt->version.minor = -1; + return 0; + } + else if (memcmp(s, "TBI\1", 4) == 0) { + fmt->category = index_file; + fmt->format = tbi; + return 0; + } + // GZI indexes have no magic numbers, so must be recognised solely by + // filename extension. + else if (strcmp(extension, "gzi") == 0) { + fmt->category = index_file; + fmt->format = gzi; + return 0; + } + } + else if (len >= 16 && memcmp(s, "##fileformat=VCF", 16) == 0) { + fmt->category = variant_data; + fmt->format = vcf; + if (len >= 21 && s[16] == 'v') + parse_version(fmt, &s[17], &s[len]); + return 0; + } + else if (len >= 4 && s[0] == '@' && + (memcmp(s, "@HD\t", 4) == 0 || memcmp(s, "@SQ\t", 4) == 0 || + memcmp(s, "@RG\t", 4) == 0 || memcmp(s, "@PG\t", 4) == 0 || + memcmp(s, "@CO\t", 4) == 0)) { + fmt->category = sequence_data; + fmt->format = sam; + // @HD-VN is not guaranteed to be the first tag, but then @HD is + // not guaranteed to be present at all... + if (len >= 9 && memcmp(s, "@HD\tVN:", 7) == 0) + parse_version(fmt, &s[7], &s[len]); + else + fmt->version.major = 1, fmt->version.minor = -1; + return 0; + } + else if (len >= 8 && memcmp(s, "d4\xdd\xdd", 4) == 0) { + fmt->category = region_list; + fmt->format = d4_format; + // How to decode the D4 Format Version bytes is not yet specified + // so we don't try to set fmt->version.{major,minor}. + return 0; + } + else if (cmp_nonblank("{\"htsget\":", s, &s[len]) == 0) { + fmt->category = unknown_category; + fmt->format = htsget; + return 0; + } + else if (len > 8 && memcmp(s, "crypt4gh", 8) == 0) { + fmt->category = unknown_category; + fmt->format = hts_crypt4gh_format; + return 0; + } + else if (len >= 1 && s[0] == '>' && is_fastaq(s, &s[len])) { + fmt->category = sequence_data; + fmt->format = fasta_format; + return 0; + } + else if (len >= 1 && s[0] == '@' && is_fastaq(s, &s[len])) { + fmt->category = sequence_data; + fmt->format = fastq_format; + return 0; + } + else if (parse_tabbed_text(columns, sizeof columns, s, + &s[len], &complete) > 0) { + // A complete SAM line is at least 11 columns. On unmapped long reads may + // be missing two. (On mapped long reads we must have an @ header so long + // CIGAR is irrelevant.) + if (colmatch(columns, "ZiZiiCZiiZZOOOOOOOOOOOOOOOOOOOO+") + >= 9 + 2*complete) { + fmt->category = sequence_data; + fmt->format = sam; + fmt->version.major = 1, fmt->version.minor = -1; + return 0; + } + else if (fmt->compression == gzip && colmatch(columns, "iiiiii") == 6) { + fmt->category = index_file; + fmt->format = crai; + return 0; + } + else if (strstr(extension, "fqi") && colmatch(columns, "Ziiiii") == 6) { + fmt->category = index_file; + fmt->format = fqi_format; + return 0; + } + else if (strstr(extension, "fai") && colmatch(columns, "Ziiii") == 5) { + fmt->category = index_file; + fmt->format = fai_format; + return 0; + } + else if (colmatch(columns, "Zii+") >= 3) { + fmt->category = region_list; + fmt->format = bed; + return 0; + } + } + + // Arbitrary text files can be read using hts_getline(). + if (is_text_only(s, &s[len])) fmt->format = text_format; + + // Nothing recognised: leave unset fmt-> fields as unknown. + return 0; +} + +char *hts_format_description(const htsFormat *format) +{ + kstring_t str = { 0, 0, NULL }; + + switch (format->format) { + case sam: kputs("SAM", &str); break; + case bam: kputs("BAM", &str); break; + case cram: kputs("CRAM", &str); break; + case fasta_format: kputs("FASTA", &str); break; + case fastq_format: kputs("FASTQ", &str); break; + case vcf: kputs("VCF", &str); break; + case bcf: + if (format->version.major == 1) kputs("Legacy BCF", &str); + else kputs("BCF", &str); + break; + case bai: kputs("BAI", &str); break; + case crai: kputs("CRAI", &str); break; + case csi: kputs("CSI", &str); break; + case fai_format: kputs("FASTA-IDX", &str); break; + case fqi_format: kputs("FASTQ-IDX", &str); break; + case gzi: kputs("GZI", &str); break; + case tbi: kputs("Tabix", &str); break; + case bed: kputs("BED", &str); break; + case d4_format: kputs("D4", &str); break; + case htsget: kputs("htsget", &str); break; + case hts_crypt4gh_format: kputs("crypt4gh", &str); break; + case empty_format: kputs("empty", &str); break; + default: kputs("unknown", &str); break; + } + + if (format->version.major >= 0) { + kputs(" version ", &str); + kputw(format->version.major, &str); + if (format->version.minor >= 0) { + kputc('.', &str); + kputw(format->version.minor, &str); + } + } + + switch (format->compression) { + case bzip2_compression: kputs(" bzip2-compressed", &str); break; + case razf_compression: kputs(" legacy-RAZF-compressed", &str); break; + case xz_compression: kputs(" XZ-compressed", &str); break; + case zstd_compression: kputs(" Zstandard-compressed", &str); break; + case custom: kputs(" compressed", &str); break; + case gzip: kputs(" gzip-compressed", &str); break; + + case bgzf: + switch (format->format) { + case bam: + case bcf: + case csi: + case tbi: + // These are by definition BGZF, so just use the generic term + kputs(" compressed", &str); + break; + default: + kputs(" BGZF-compressed", &str); + break; + } + break; + + case no_compression: + switch (format->format) { + case bam: + case bcf: + case cram: + case csi: + case tbi: + // These are normally compressed, so emphasise that this one isn't + kputs(" uncompressed", &str); + break; + default: + break; + } + break; + + default: break; + } + + switch (format->category) { + case sequence_data: kputs(" sequence", &str); break; + case variant_data: kputs(" variant calling", &str); break; + case index_file: kputs(" index", &str); break; + case region_list: kputs(" genomic region", &str); break; + default: break; + } + + if (format->compression == no_compression) + switch (format->format) { + case text_format: + case sam: + case crai: + case vcf: + case bed: + case fai_format: + case fqi_format: + case fasta_format: + case fastq_format: + case htsget: + kputs(" text", &str); + break; + + case empty_format: + break; + + default: + kputs(" data", &str); + break; + } + else + kputs(" data", &str); + + return ks_release(&str); +} + +htsFile *hts_open_format(const char *fn, const char *mode, const htsFormat *fmt) +{ + char smode[101], *cp, *cp2, *mode_c, *uncomp = NULL; + htsFile *fp = NULL; + hFILE *hfile = NULL; + char fmt_code = '\0'; + // see enum htsExactFormat in htslib/hts.h + const char format_to_mode[] = "\0g\0\0b\0c\0\0b\0g\0\0\0\0\0Ff\0\0"; + + strncpy(smode, mode, 99); + smode[99]=0; + if ((cp = strchr(smode, ','))) + *cp = '\0'; + + // Migrate format code (b or c) to the end of the smode buffer. + for (cp2 = cp = smode; *cp; cp++) { + if (*cp == 'b') + fmt_code = 'b'; + else if (*cp == 'c') + fmt_code = 'c'; + else { + *cp2++ = *cp; + // Cache the uncompress flag 'u' pos if present + if (!uncomp && (*cp == 'u')) { + uncomp = cp2 - 1; + } + } + } + mode_c = cp2; + *cp2++ = fmt_code; + *cp2++ = 0; + + // Set or reset the format code if opts->format is used + if (fmt && fmt->format > unknown_format + && fmt->format < sizeof(format_to_mode)) { + *mode_c = format_to_mode[fmt->format]; + } + + // Uncompressed bam/bcf is not supported, change 'u' to '0' on write + if (uncomp && *mode_c == 'b' && (strchr(smode, 'w') || strchr(smode, 'a'))) { + *uncomp = '0'; + } + + // If we really asked for a compressed text format then mode_c above will + // point to nul. We set to 'z' to enable bgzf. + if (strchr(mode, 'w') && fmt && fmt->compression == bgzf) { + if (fmt->format == sam || fmt->format == vcf || fmt->format == text_format) + *mode_c = 'z'; + } + + char *rmme = NULL, *fnidx = strstr(fn, HTS_IDX_DELIM); + if ( fnidx ) { + rmme = strdup(fn); + if ( !rmme ) goto error; + rmme[fnidx-fn] = 0; + fn = rmme; + } + + hfile = hopen(fn, smode); + if (hfile == NULL) goto error; + + fp = hts_hopen(hfile, fn, smode); + if (fp == NULL) goto error; + + // Compensate for the loss of exactness in htsExactFormat. + // hts_hopen returns generics such as binary or text, but we + // have been given something explicit here so use that instead. + if (fp->is_write && fmt && + (fmt->format == bam || fmt->format == sam || + fmt->format == vcf || fmt->format == bcf || + fmt->format == bed || fmt->format == fasta_format || + fmt->format == fastq_format)) + fp->format.format = fmt->format; + + if (fmt && fmt->specific) { + if (hts_opt_apply(fp, fmt->specific) != 0) { + if (((hts_opt*)fmt->specific)->opt == CRAM_OPT_REFERENCE && + (errno == ENOENT || errno == EIO || errno == EBADF || + errno == EACCES || errno == EISDIR)) { + /* error during reference file operation + for these specific errors, set the error as EINVAL */ + errno = EINVAL; + } + goto error; + } + } + if ( rmme ) free(rmme); + return fp; + +error: + hts_log_error("Failed to open file \"%s\"%s%s", fn, + errno ? " : " : "", errno ? strerror(errno) : ""); + if ( rmme ) free(rmme); + + if (hfile) + hclose_abruptly(hfile); + + return NULL; +} + +htsFile *hts_open(const char *fn, const char *mode) { + return hts_open_format(fn, mode, NULL); +} + +/* + * Splits str into a prefix, delimiter ('\0' or delim), and suffix, writing + * the prefix in lowercase into buf and returning a pointer to the suffix. + * On return, buf is always NUL-terminated; thus assumes that the "keyword" + * prefix should be one of several known values of maximum length buflen-2. + * (If delim is not found, returns a pointer to the '\0'.) + */ +static const char * +scan_keyword(const char *str, char delim, char *buf, size_t buflen) +{ + size_t i = 0; + while (*str && *str != delim) { + if (i < buflen-1) buf[i++] = tolower_c(*str); + str++; + } + + buf[i] = '\0'; + return *str? str+1 : str; +} + +/* + * Parses arg and appends it to the option list. + * + * Returns 0 on success; + * -1 on failure. + */ +int hts_opt_add(hts_opt **opts, const char *c_arg) { + hts_opt *o, *t; + char *val; + + /* + * IMPORTANT!!! + * If you add another string option here, don't forget to also add + * it to the case statement in hts_opt_apply. + */ + + if (!c_arg) + return -1; + + if (!(o = malloc(sizeof(*o)))) + return -1; + + if (!(o->arg = strdup(c_arg))) { + free(o); + return -1; + } + + if (!(val = strchr(o->arg, '='))) + val = "1"; // assume boolean + else + *val++ = '\0'; + + if (strcmp(o->arg, "decode_md") == 0 || + strcmp(o->arg, "DECODE_MD") == 0) + o->opt = CRAM_OPT_DECODE_MD, o->val.i = atoi(val); + + else if (strcmp(o->arg, "verbosity") == 0 || + strcmp(o->arg, "VERBOSITY") == 0) + o->opt = CRAM_OPT_VERBOSITY, o->val.i = atoi(val); + + else if (strcmp(o->arg, "seqs_per_slice") == 0 || + strcmp(o->arg, "SEQS_PER_SLICE") == 0) + o->opt = CRAM_OPT_SEQS_PER_SLICE, o->val.i = atoi(val); + + else if (strcmp(o->arg, "bases_per_slice") == 0 || + strcmp(o->arg, "BASES_PER_SLICE") == 0) + o->opt = CRAM_OPT_BASES_PER_SLICE, o->val.i = atoi(val); + + else if (strcmp(o->arg, "slices_per_container") == 0 || + strcmp(o->arg, "SLICES_PER_CONTAINER") == 0) + o->opt = CRAM_OPT_SLICES_PER_CONTAINER, o->val.i = atoi(val); + + else if (strcmp(o->arg, "embed_ref") == 0 || + strcmp(o->arg, "EMBED_REF") == 0) + o->opt = CRAM_OPT_EMBED_REF, o->val.i = atoi(val); + + else if (strcmp(o->arg, "no_ref") == 0 || + strcmp(o->arg, "NO_REF") == 0) + o->opt = CRAM_OPT_NO_REF, o->val.i = atoi(val); + + else if (strcmp(o->arg, "pos_delta") == 0 || + strcmp(o->arg, "POS_DELTA") == 0) + o->opt = CRAM_OPT_POS_DELTA, o->val.i = atoi(val); + + else if (strcmp(o->arg, "ignore_md5") == 0 || + strcmp(o->arg, "IGNORE_MD5") == 0) + o->opt = CRAM_OPT_IGNORE_MD5, o->val.i = atoi(val); + + else if (strcmp(o->arg, "use_bzip2") == 0 || + strcmp(o->arg, "USE_BZIP2") == 0) + o->opt = CRAM_OPT_USE_BZIP2, o->val.i = atoi(val); + + else if (strcmp(o->arg, "use_rans") == 0 || + strcmp(o->arg, "USE_RANS") == 0) + o->opt = CRAM_OPT_USE_RANS, o->val.i = atoi(val); + + else if (strcmp(o->arg, "use_lzma") == 0 || + strcmp(o->arg, "USE_LZMA") == 0) + o->opt = CRAM_OPT_USE_LZMA, o->val.i = atoi(val); + + else if (strcmp(o->arg, "use_tok") == 0 || + strcmp(o->arg, "USE_TOK") == 0) + o->opt = CRAM_OPT_USE_TOK, o->val.i = atoi(val); + + else if (strcmp(o->arg, "use_fqz") == 0 || + strcmp(o->arg, "USE_FQZ") == 0) + o->opt = CRAM_OPT_USE_FQZ, o->val.i = atoi(val); + + else if (strcmp(o->arg, "use_arith") == 0 || + strcmp(o->arg, "USE_ARITH") == 0) + o->opt = CRAM_OPT_USE_ARITH, o->val.i = atoi(val); + + else if (strcmp(o->arg, "fast") == 0 || + strcmp(o->arg, "FAST") == 0) + o->opt = HTS_OPT_PROFILE, o->val.i = HTS_PROFILE_FAST; + + else if (strcmp(o->arg, "normal") == 0 || + strcmp(o->arg, "NORMAL") == 0) + o->opt = HTS_OPT_PROFILE, o->val.i = HTS_PROFILE_NORMAL; + + else if (strcmp(o->arg, "small") == 0 || + strcmp(o->arg, "SMALL") == 0) + o->opt = HTS_OPT_PROFILE, o->val.i = HTS_PROFILE_SMALL; + + else if (strcmp(o->arg, "archive") == 0 || + strcmp(o->arg, "ARCHIVE") == 0) + o->opt = HTS_OPT_PROFILE, o->val.i = HTS_PROFILE_ARCHIVE; + + else if (strcmp(o->arg, "reference") == 0 || + strcmp(o->arg, "REFERENCE") == 0) + o->opt = CRAM_OPT_REFERENCE, o->val.s = val; + + else if (strcmp(o->arg, "version") == 0 || + strcmp(o->arg, "VERSION") == 0) + o->opt = CRAM_OPT_VERSION, o->val.s =val; + + else if (strcmp(o->arg, "multi_seq_per_slice") == 0 || + strcmp(o->arg, "MULTI_SEQ_PER_SLICE") == 0) + o->opt = CRAM_OPT_MULTI_SEQ_PER_SLICE, o->val.i = atoi(val); + + else if (strcmp(o->arg, "nthreads") == 0 || + strcmp(o->arg, "NTHREADS") == 0) + o->opt = HTS_OPT_NTHREADS, o->val.i = atoi(val); + + else if (strcmp(o->arg, "cache_size") == 0 || + strcmp(o->arg, "CACHE_SIZE") == 0) { + char *endp; + o->opt = HTS_OPT_CACHE_SIZE; + o->val.i = strtol(val, &endp, 0); + // NB: Doesn't support floats, eg 1.5g + // TODO: extend hts_parse_decimal? See also samtools sort. + switch (*endp) { + case 'g': case 'G': o->val.i *= 1024; // fall through + case 'm': case 'M': o->val.i *= 1024; // fall through + case 'k': case 'K': o->val.i *= 1024; break; + case '\0': break; + default: + hts_log_error("Unrecognised cache size suffix '%c'", *endp); + free(o->arg); + free(o); + return -1; + } + } + + else if (strcmp(o->arg, "required_fields") == 0 || + strcmp(o->arg, "REQUIRED_FIELDS") == 0) + o->opt = CRAM_OPT_REQUIRED_FIELDS, o->val.i = strtol(val, NULL, 0); + + else if (strcmp(o->arg, "lossy_names") == 0 || + strcmp(o->arg, "LOSSY_NAMES") == 0) + o->opt = CRAM_OPT_LOSSY_NAMES, o->val.i = strtol(val, NULL, 0); + + else if (strcmp(o->arg, "name_prefix") == 0 || + strcmp(o->arg, "NAME_PREFIX") == 0) + o->opt = CRAM_OPT_PREFIX, o->val.s = val; + + else if (strcmp(o->arg, "store_md") == 0 || + strcmp(o->arg, "store_md") == 0) + o->opt = CRAM_OPT_STORE_MD, o->val.i = atoi(val); + + else if (strcmp(o->arg, "store_nm") == 0 || + strcmp(o->arg, "store_nm") == 0) + o->opt = CRAM_OPT_STORE_NM, o->val.i = atoi(val); + + else if (strcmp(o->arg, "block_size") == 0 || + strcmp(o->arg, "BLOCK_SIZE") == 0) + o->opt = HTS_OPT_BLOCK_SIZE, o->val.i = strtol(val, NULL, 0); + + else if (strcmp(o->arg, "level") == 0 || + strcmp(o->arg, "LEVEL") == 0) + o->opt = HTS_OPT_COMPRESSION_LEVEL, o->val.i = strtol(val, NULL, 0); + + else if (strcmp(o->arg, "filter") == 0 || + strcmp(o->arg, "FILTER") == 0) + o->opt = HTS_OPT_FILTER, o->val.s = val; + + else if (strcmp(o->arg, "fastq_aux") == 0 || + strcmp(o->arg, "FASTQ_AUX") == 0) + o->opt = FASTQ_OPT_AUX, o->val.s = val; + + else if (strcmp(o->arg, "fastq_barcode") == 0 || + strcmp(o->arg, "FASTQ_BARCODE") == 0) + o->opt = FASTQ_OPT_BARCODE, o->val.s = val; + + else if (strcmp(o->arg, "fastq_rnum") == 0 || + strcmp(o->arg, "FASTQ_RNUM") == 0) + o->opt = FASTQ_OPT_RNUM, o->val.i = 1; + + else if (strcmp(o->arg, "fastq_casava") == 0 || + strcmp(o->arg, "FASTQ_CASAVA") == 0) + o->opt = FASTQ_OPT_CASAVA, o->val.i = 1; + + else if (strcmp(o->arg, "fastq_name2") == 0 || + strcmp(o->arg, "FASTQ_NAME2") == 0) + o->opt = FASTQ_OPT_NAME2, o->val.i = 1; + + else { + hts_log_error("Unknown option '%s'", o->arg); + free(o->arg); + free(o); + return -1; + } + + o->next = NULL; + + // Append; assumes small list. + if (*opts) { + t = *opts; + while (t->next) + t = t->next; + t->next = o; + } else { + *opts = o; + } + + return 0; +} + +/* + * Applies an hts_opt option list to a given htsFile. + * + * Returns 0 on success + * -1 on failure + */ +int hts_opt_apply(htsFile *fp, hts_opt *opts) { + hts_opt *last = NULL; + + for (; opts; opts = (last=opts)->next) { + switch (opts->opt) { + case CRAM_OPT_REFERENCE: + if (!(fp->fn_aux = strdup(opts->val.s))) + return -1; + // fall through + case CRAM_OPT_VERSION: + case CRAM_OPT_PREFIX: + case HTS_OPT_FILTER: + case FASTQ_OPT_AUX: + case FASTQ_OPT_BARCODE: + if (hts_set_opt(fp, opts->opt, opts->val.s) != 0) + return -1; + break; + default: + if (hts_set_opt(fp, opts->opt, opts->val.i) != 0) + return -1; + break; + } + } + + return 0; +} + +/* + * Frees an hts_opt list. + */ +void hts_opt_free(hts_opt *opts) { + hts_opt *last = NULL; + while (opts) { + opts = (last=opts)->next; + free(last->arg); + free(last); + } +} + + +/* + * Tokenise options as (key(=value)?,)*(key(=value)?)? + * NB: No provision for ',' appearing in the value! + * Add backslashing rules? + * + * This could be used as part of a general command line option parser or + * as a string concatenated onto the file open mode. + * + * Returns 0 on success + * -1 on failure. + */ +int hts_parse_opt_list(htsFormat *fmt, const char *str) { + while (str && *str) { + const char *str_start; + int len; + char arg[8001]; + + while (*str && *str == ',') + str++; + + for (str_start = str; *str && *str != ','; str++); + len = str - str_start; + + // Produce a nul terminated copy of the option + strncpy(arg, str_start, len < 8000 ? len : 8000); + arg[len < 8000 ? len : 8000] = '\0'; + + if (hts_opt_add((hts_opt **)&fmt->specific, arg) != 0) + return -1; + + if (*str) + str++; + } + + return 0; +} + +/* + * Accepts a string file format (sam, bam, cram, vcf, bam) optionally + * followed by a comma separated list of key=value options and splits + * these up into the fields of htsFormat struct. + * + * format is assumed to be already initialised, either to blank + * "unknown" values or via previous hts_opt_add calls. + * + * Returns 0 on success + * -1 on failure. + */ +int hts_parse_format(htsFormat *format, const char *str) { + char fmt[9]; + const char *cp = scan_keyword(str, ',', fmt, sizeof fmt); + + format->version.minor = 0; // unknown + format->version.major = 0; // unknown + + if (strcmp(fmt, "sam") == 0) { + format->category = sequence_data; + format->format = sam; + format->compression = no_compression; + format->compression_level = 0; + } else if (strcmp(fmt, "sam.gz") == 0) { + format->category = sequence_data; + format->format = sam; + format->compression = bgzf; + format->compression_level = -1; + } else if (strcmp(fmt, "bam") == 0) { + format->category = sequence_data; + format->format = bam; + format->compression = bgzf; + format->compression_level = -1; + } else if (strcmp(fmt, "cram") == 0) { + format->category = sequence_data; + format->format = cram; + format->compression = custom; + format->compression_level = -1; + } else if (strcmp(fmt, "vcf") == 0) { + format->category = variant_data; + format->format = vcf; + format->compression = no_compression; + format->compression_level = 0; + } else if (strcmp(fmt, "bcf") == 0) { + format->category = variant_data; + format->format = bcf; + format->compression = bgzf; + format->compression_level = -1; + } else if (strcmp(fmt, "fastq") == 0 || strcmp(fmt, "fq") == 0) { + format->category = sequence_data; + format->format = fastq_format; + format->compression = no_compression; + format->compression_level = 0; + } else if (strcmp(fmt, "fastq.gz") == 0 || strcmp(fmt, "fq.gz") == 0) { + format->category = sequence_data; + format->format = fastq_format; + format->compression = bgzf; + format->compression_level = 0; + } else if (strcmp(fmt, "fasta") == 0 || strcmp(fmt, "fa") == 0) { + format->category = sequence_data; + format->format = fasta_format; + format->compression = no_compression; + format->compression_level = 0; + } else if (strcmp(fmt, "fasta.gz") == 0 || strcmp(fmt, "fa.gz") == 0) { + format->category = sequence_data; + format->format = fasta_format; + format->compression = bgzf; + format->compression_level = 0; + } else { + return -1; + } + + return hts_parse_opt_list(format, cp); +} + + +/* + * Tokenise options as (key(=value)?,)*(key(=value)?)? + * NB: No provision for ',' appearing in the value! + * Add backslashing rules? + * + * This could be used as part of a general command line option parser or + * as a string concatenated onto the file open mode. + * + * Returns 0 on success + * -1 on failure. + */ +static int hts_process_opts(htsFile *fp, const char *opts) { + htsFormat fmt; + + fmt.specific = NULL; + if (hts_parse_opt_list(&fmt, opts) != 0) + return -1; + + if (hts_opt_apply(fp, fmt.specific) != 0) { + hts_opt_free(fmt.specific); + return -1; + } + + hts_opt_free(fmt.specific); + + return 0; +} + +static int hts_crypt4gh_redirect(const char *fn, const char *mode, + hFILE **hfile_ptr, htsFile *fp) { + hFILE *hfile1 = *hfile_ptr; + hFILE *hfile2 = NULL; + char fn_buf[512], *fn2 = fn_buf; + char mode2[102]; // Size set by sizeof(simple_mode) in hts_hopen() + const char *prefix = "crypt4gh:"; + size_t fn2_len = strlen(prefix) + strlen(fn) + 1; + int ret = -1; + + if (fn2_len > sizeof(fn_buf)) { + if (fn2_len >= INT_MAX) // Silence gcc format-truncation warning + return -1; + fn2 = malloc(fn2_len); + if (!fn2) return -1; + } + + // Reopen fn using the crypt4gh plug-in (if available) + snprintf(fn2, fn2_len, "%s%s", prefix, fn); + snprintf(mode2, sizeof(mode2), "%s%s", mode, strchr(mode, ':') ? "" : ":"); + hfile2 = hopen(fn2, mode2, "parent", hfile1, NULL); + if (hfile2) { + // Replace original hfile with the new one. The original is now + // enclosed within hfile2 + *hfile_ptr = hfile2; + ret = 0; + } + + if (fn2 != fn_buf) + free(fn2); + return ret; +} + +htsFile *hts_hopen(hFILE *hfile, const char *fn, const char *mode) +{ + hFILE *hfile_orig = hfile; + hFILE *hfile_cleanup = hfile; + htsFile *fp = (htsFile*)calloc(1, sizeof(htsFile)); + char simple_mode[101], *cp, *opts; + simple_mode[100] = '\0'; + + if (fp == NULL) goto error; + + fp->fn = strdup(fn); + fp->is_be = ed_is_big(); + + // Split mode into simple_mode,opts strings + if ((cp = strchr(mode, ','))) { + strncpy(simple_mode, mode, cp-mode <= 100 ? cp-mode : 100); + simple_mode[cp-mode] = '\0'; + opts = cp+1; + } else { + strncpy(simple_mode, mode, 100); + opts = NULL; + } + + if (strchr(simple_mode, 'r')) { + const int max_loops = 5; // Should be plenty + int loops = 0; + if (hts_detect_format2(hfile, fn, &fp->format) < 0) goto error; + + // Deal with formats that re-direct an underlying file via a plug-in. + // Loops as we may have crypt4gh served via htsget, or + // crypt4gh-in-crypt4gh. + + while (fp->format.format == htsget || + fp->format.format == hts_crypt4gh_format) { + // Ensure we don't get stuck in an endless redirect loop + if (++loops > max_loops) { + errno = ELOOP; + goto error; + } + + if (fp->format.format == htsget) { + hFILE *hfile2 = hopen_htsget_redirect(hfile, simple_mode); + if (hfile2 == NULL) goto error; + + if (hfile != hfile_cleanup) { + // Close the result of an earlier redirection + hclose_abruptly(hfile); + } + + hfile = hfile2; + } + else if (fp->format.format == hts_crypt4gh_format) { + int should_preserve = (hfile == hfile_orig); + int update_cleanup = (hfile == hfile_cleanup); + if (hts_crypt4gh_redirect(fn, simple_mode, &hfile, fp) < 0) + goto error; + if (should_preserve) { + // The original hFILE is now contained in a crypt4gh + // wrapper. Should we need to close the wrapper due + // to a later error, we need to prevent the wrapped + // handle from being closed as the caller will see + // this function return NULL and try to clean up itself. + hfile_orig->preserve = 1; + } + if (update_cleanup) { + // Update handle to close at the end if redirected by htsget + hfile_cleanup = hfile; + } + } + + // Re-detect format against the result of the redirection + if (hts_detect_format2(hfile, fn, &fp->format) < 0) goto error; + } + } + else if (strchr(simple_mode, 'w') || strchr(simple_mode, 'a')) { + htsFormat *fmt = &fp->format; + fp->is_write = 1; + + if (strchr(simple_mode, 'b')) fmt->format = binary_format; + else if (strchr(simple_mode, 'c')) fmt->format = cram; + else if (strchr(simple_mode, 'f')) fmt->format = fastq_format; + else if (strchr(simple_mode, 'F')) fmt->format = fasta_format; + else fmt->format = text_format; + + if (strchr(simple_mode, 'z')) fmt->compression = bgzf; + else if (strchr(simple_mode, 'g')) fmt->compression = gzip; + else if (strchr(simple_mode, 'u')) fmt->compression = no_compression; + else { + // No compression mode specified, set to the default for the format + switch (fmt->format) { + case binary_format: fmt->compression = bgzf; break; + case cram: fmt->compression = custom; break; + case fastq_format: fmt->compression = no_compression; break; + case fasta_format: fmt->compression = no_compression; break; + case text_format: fmt->compression = no_compression; break; + default: abort(); + } + } + + // Fill in category (if determinable; e.g. 'b' could be BAM or BCF) + fmt->category = format_category(fmt->format); + + fmt->version.major = fmt->version.minor = -1; + fmt->compression_level = -1; + fmt->specific = NULL; + } + else { errno = EINVAL; goto error; } + + switch (fp->format.format) { + case binary_format: + case bam: + case bcf: + fp->fp.bgzf = bgzf_hopen(hfile, simple_mode); + if (fp->fp.bgzf == NULL) goto error; + fp->is_bin = fp->is_bgzf = 1; + break; + + case cram: + fp->fp.cram = cram_dopen(hfile, fn, simple_mode); + if (fp->fp.cram == NULL) goto error; + if (!fp->is_write) + cram_set_option(fp->fp.cram, CRAM_OPT_DECODE_MD, -1); // auto + fp->is_cram = 1; + break; + + case empty_format: + case text_format: + case bed: + case fasta_format: + case fastq_format: + case sam: + case vcf: + if (fp->format.compression != no_compression) { + fp->fp.bgzf = bgzf_hopen(hfile, simple_mode); + if (fp->fp.bgzf == NULL) goto error; + fp->is_bgzf = 1; + } + else + fp->fp.hfile = hfile; + break; + + default: + errno = EFTYPE; + goto error; + } + + if (opts) + hts_process_opts(fp, opts); + + // Allow original file to close if it was preserved earlier by crypt4gh + hfile_orig->preserve = 0; + + // If redirecting via htsget, close the original hFILE now (pedantically + // we would instead close it in hts_close(), but this a simplifying + // optimisation) + if (hfile != hfile_cleanup) hclose_abruptly(hfile_cleanup); + + return fp; + +error: + hts_log_error("Failed to open file %s", fn); + + // If redirecting, close the failed redirection hFILE that we have opened + if (hfile != hfile_orig) hclose_abruptly(hfile); + hfile_orig->preserve = 0; // Allow caller to close the original hfile + + if (fp) { + free(fp->fn); + free(fp->fn_aux); + free(fp); + } + return NULL; +} + +static int hts_idx_close_otf_fp(hts_idx_t *idx); + +int hts_close(htsFile *fp) +{ + int ret = 0, save; + if (!fp) { + errno = EINVAL; + return -1; + } + + switch (fp->format.format) { + case binary_format: + case bam: + case bcf: + ret = bgzf_close(fp->fp.bgzf); + break; + + case cram: + if (!fp->is_write) { + switch (cram_eof(fp->fp.cram)) { + case 2: + hts_log_warning("EOF marker is absent. The input is probably truncated"); + break; + case 0: /* not at EOF, but may not have wanted all seqs */ + default: /* case 1, expected EOF */ + break; + } + } + ret = cram_close(fp->fp.cram); + break; + + case empty_format: + case text_format: + case bed: + case fasta_format: + case fastq_format: + case sam: + case vcf: + if (fp->format.format == sam) + ret = sam_state_destroy(fp); + else if (fp->format.format == fastq_format || + fp->format.format == fasta_format) + fastq_state_destroy(fp); + + if (fp->format.compression != no_compression) + ret |= bgzf_close(fp->fp.bgzf); + else + ret |= hclose(fp->fp.hfile); + break; + + default: + ret = -1; + break; + } + + if (fp->idx) { + // Close deferred index file handle, if present. + // Unfortunately this means errors on the index will get mixed with + // those on the main file, but as we only have the EOF block left to + // write it hopefully won't happen that often. + ret |= hts_idx_close_otf_fp(fp->idx); + } + + save = errno; + sam_hdr_destroy(fp->bam_header); + hts_idx_destroy(fp->idx); + hts_filter_free(fp->filter); + free(fp->fn); + free(fp->fn_aux); + free(fp->line.s); + free(fp); + errno = save; + return ret; +} + +int hts_flush(htsFile *fp) +{ + if (fp == NULL) return 0; + + switch (fp->format.format) { + case binary_format: + case bam: + case bcf: + return bgzf_flush(fp->fp.bgzf); + + case cram: + return cram_flush(fp->fp.cram); + + case empty_format: + case text_format: + case bed: + case fasta_format: + case fastq_format: + case sam: + case vcf: + if (fp->format.compression != no_compression) + return bgzf_flush(fp->fp.bgzf); + else + return hflush(fp->fp.hfile); + + default: + break; + } + + return 0; +} + +const htsFormat *hts_get_format(htsFile *fp) +{ + return fp? &fp->format : NULL; +} + +const char *hts_format_file_extension(const htsFormat *format) { + if (!format) + return "?"; + + switch (format->format) { + case sam: return "sam"; + case bam: return "bam"; + case bai: return "bai"; + case cram: return "cram"; + case crai: return "crai"; + case vcf: return "vcf"; + case bcf: return "bcf"; + case csi: return "csi"; + case fai_format: return "fai"; + case fqi_format: return "fqi"; + case gzi: return "gzi"; + case tbi: return "tbi"; + case bed: return "bed"; + case d4_format: return "d4"; + case fasta_format: return "fa"; + case fastq_format: return "fq"; + default: return "?"; + } +} + +static hFILE *hts_hfile(htsFile *fp) { + switch (fp->format.format) { + case binary_format:// fall through + case bcf: // fall through + case bam: return bgzf_hfile(fp->fp.bgzf); + case cram: return cram_hfile(fp->fp.cram); + case text_format: // fall through + case vcf: // fall through + case fastq_format: // fall through + case fasta_format: // fall through + case sam: return fp->format.compression != no_compression + ? bgzf_hfile(fp->fp.bgzf) + : fp->fp.hfile; + default: return NULL; + } +} + +int hts_set_opt(htsFile *fp, enum hts_fmt_option opt, ...) { + int r; + va_list args; + + switch (opt) { + case HTS_OPT_NTHREADS: { + va_start(args, opt); + int nthreads = va_arg(args, int); + va_end(args); + return hts_set_threads(fp, nthreads); + } + + case HTS_OPT_BLOCK_SIZE: { + hFILE *hf = hts_hfile(fp); + + if (hf) { + va_start(args, opt); + if (hfile_set_blksize(hf, va_arg(args, int)) != 0) + hts_log_warning("Failed to change block size"); + va_end(args); + } + else { + // To do - implement for vcf/bcf. + hts_log_warning("Cannot change block size for this format"); + } + + return 0; + } + + case HTS_OPT_THREAD_POOL: { + va_start(args, opt); + htsThreadPool *p = va_arg(args, htsThreadPool *); + va_end(args); + return hts_set_thread_pool(fp, p); + } + + case HTS_OPT_CACHE_SIZE: { + va_start(args, opt); + int cache_size = va_arg(args, int); + va_end(args); + hts_set_cache_size(fp, cache_size); + return 0; + } + + case FASTQ_OPT_CASAVA: + case FASTQ_OPT_RNUM: + case FASTQ_OPT_NAME2: + if (fp->format.format == fastq_format || + fp->format.format == fasta_format) + return fastq_state_set(fp, opt); + return 0; + + case FASTQ_OPT_AUX: + if (fp->format.format == fastq_format || + fp->format.format == fasta_format) { + va_start(args, opt); + char *list = va_arg(args, char *); + va_end(args); + return fastq_state_set(fp, opt, list); + } + return 0; + + case FASTQ_OPT_BARCODE: + if (fp->format.format == fastq_format || + fp->format.format == fasta_format) { + va_start(args, opt); + char *bc = va_arg(args, char *); + va_end(args); + return fastq_state_set(fp, opt, bc); + } + return 0; + + // Options below here flow through to cram_set_voption + case HTS_OPT_COMPRESSION_LEVEL: { + va_start(args, opt); + int level = va_arg(args, int); + va_end(args); + if (fp->is_bgzf) + fp->fp.bgzf->compress_level = level; + else if (fp->format.format == cram) + return cram_set_option(fp->fp.cram, opt, level); + return 0; + } + + case HTS_OPT_FILTER: { + va_start(args, opt); + char *expr = va_arg(args, char *); + va_end(args); + return hts_set_filter_expression(fp, expr); + } + + case HTS_OPT_PROFILE: { + va_start(args, opt); + enum hts_profile_option prof = va_arg(args, int); + va_end(args); + if (fp->is_bgzf) { + switch (prof) { +#ifdef HAVE_LIBDEFLATE + case HTS_PROFILE_FAST: fp->fp.bgzf->compress_level = 2; break; + case HTS_PROFILE_NORMAL: fp->fp.bgzf->compress_level = -1; break; + case HTS_PROFILE_SMALL: fp->fp.bgzf->compress_level = 10; break; + case HTS_PROFILE_ARCHIVE: fp->fp.bgzf->compress_level = 12; break; +#else + case HTS_PROFILE_FAST: fp->fp.bgzf->compress_level = 1; break; + case HTS_PROFILE_NORMAL: fp->fp.bgzf->compress_level = -1; break; + case HTS_PROFILE_SMALL: fp->fp.bgzf->compress_level = 8; break; + case HTS_PROFILE_ARCHIVE: fp->fp.bgzf->compress_level = 9; break; +#endif + } + } // else CRAM manages this in its own way + break; + } + + default: + break; + } + + if (fp->format.format != cram) + return 0; + + va_start(args, opt); + r = cram_set_voption(fp->fp.cram, opt, args); + va_end(args); + + return r; +} + +BGZF *hts_get_bgzfp(htsFile *fp); + +int hts_set_threads(htsFile *fp, int n) +{ + if (fp->format.format == sam) { + return sam_set_threads(fp, n); + } else if (fp->format.compression == bgzf) { + return bgzf_mt(hts_get_bgzfp(fp), n, 256/*unused*/); + } else if (fp->format.format == cram) { + return hts_set_opt(fp, CRAM_OPT_NTHREADS, n); + } + else return 0; +} + +int hts_set_thread_pool(htsFile *fp, htsThreadPool *p) { + if (fp->format.format == sam || fp->format.format == text_format) { + return sam_set_thread_pool(fp, p); + } else if (fp->format.compression == bgzf) { + return bgzf_thread_pool(hts_get_bgzfp(fp), p->pool, p->qsize); + } else if (fp->format.format == cram) { + return hts_set_opt(fp, CRAM_OPT_THREAD_POOL, p); + } + else return 0; +} + +void hts_set_cache_size(htsFile *fp, int n) +{ + if (fp->format.compression == bgzf) + bgzf_set_cache_size(hts_get_bgzfp(fp), n); +} + +int hts_set_fai_filename(htsFile *fp, const char *fn_aux) +{ + free(fp->fn_aux); + if (fn_aux) { + fp->fn_aux = strdup(fn_aux); + if (fp->fn_aux == NULL) return -1; + } + else fp->fn_aux = NULL; + + if (fp->format.format == cram) + if (cram_set_option(fp->fp.cram, CRAM_OPT_REFERENCE, fp->fn_aux)) + return -1; + + return 0; +} + +int hts_set_filter_expression(htsFile *fp, const char *expr) +{ + if (fp->filter) + hts_filter_free(fp->filter); + + if (!expr) + return 0; + + return (fp->filter = hts_filter_init(expr)) + ? 0 : -1; +} + +hFILE *hts_open_tmpfile(const char *fname, const char *mode, kstring_t *tmpname) +{ + int pid = (int) getpid(); + unsigned ptr = (uintptr_t) tmpname; + int n = 0; + hFILE *fp = NULL; + + do { + // Attempt to further uniquify the temporary filename + unsigned t = ((unsigned) time(NULL)) ^ ((unsigned) clock()) ^ ptr; + n++; + + ks_clear(tmpname); + if (ksprintf(tmpname, "%s.tmp_%d_%d_%u", fname, pid, n, t) < 0) break; + + fp = hopen(tmpname->s, mode); + } while (fp == NULL && errno == EEXIST && n < 100); + + return fp; +} + +int hts_is_utf16_text(const kstring_t *str) +{ + const unsigned char *u = (const unsigned char *) (str->s); + return (str->l > 0 && str->s)? is_utf16_text(u, u + str->l) : 0; +} + +// For VCF/BCF backward sweeper. Not exposing these functions because their +// future is uncertain. Things will probably have to change with hFILE... +BGZF *hts_get_bgzfp(htsFile *fp) +{ + if (fp->is_bgzf) + return fp->fp.bgzf; + else + return NULL; +} +int hts_useek(htsFile *fp, off_t uoffset, int where) +{ + if (fp->is_bgzf) + return bgzf_useek(fp->fp.bgzf, uoffset, where); + else + return (hseek(fp->fp.hfile, uoffset, SEEK_SET) >= 0)? 0 : -1; +} +off_t hts_utell(htsFile *fp) +{ + if (fp->is_bgzf) + return bgzf_utell(fp->fp.bgzf); + else + return htell(fp->fp.hfile); +} + +int hts_getline(htsFile *fp, int delimiter, kstring_t *str) +{ + int ret; + if (! (delimiter == KS_SEP_LINE || delimiter == '\n')) { + hts_log_error("Unexpected delimiter %d", delimiter); + abort(); + } + + switch (fp->format.compression) { + case no_compression: + str->l = 0; + ret = kgetline2(str, (kgets_func2 *) hgetln, fp->fp.hfile); + if (ret >= 0) ret = (str->l <= INT_MAX)? (int) str->l : INT_MAX; + else if (herrno(fp->fp.hfile)) ret = -2, errno = herrno(fp->fp.hfile); + else ret = -1; + break; + + case gzip: + case bgzf: + ret = bgzf_getline(fp->fp.bgzf, '\n', str); + break; + + default: + abort(); + } + + ++fp->lineno; + return ret; +} + +char **hts_readlist(const char *string, int is_file, int *_n) +{ + unsigned int m = 0, n = 0; + char **s = 0, **s_new; + if ( is_file ) + { + BGZF *fp = bgzf_open(string, "r"); + if ( !fp ) return NULL; + + kstring_t str; + int ret; + str.s = 0; str.l = str.m = 0; + while ((ret = bgzf_getline(fp, '\n', &str)) >= 0) + { + if (str.l == 0) continue; + if (n == 0 && hts_is_utf16_text(&str)) + hts_log_warning("'%s' appears to be encoded as UTF-16", string); + if (hts_resize(char*, n + 1, &m, &s, 0) < 0) + goto err; + s[n] = strdup(str.s); + if (!s[n]) + goto err; + n++; + } + if (ret < -1) // Read error + goto err; + bgzf_close(fp); + free(str.s); + } + else + { + const char *q = string, *p = string; + while ( 1 ) + { + if (*p == ',' || *p == 0) + { + if (hts_resize(char*, n + 1, &m, &s, 0) < 0) + goto err; + s[n] = (char*)calloc(p - q + 1, 1); + if (!s[n]) + goto err; + strncpy(s[n++], q, p - q); + q = p + 1; + } + if ( !*p ) break; + p++; + } + } + // Try to shrink s to the minimum size needed + s_new = (char**)realloc(s, n * sizeof(char*)); + if (!s_new) + goto err; + + s = s_new; + assert(n < INT_MAX); // hts_resize() should ensure this + *_n = n; + return s; + + err: + for (m = 0; m < n; m++) + free(s[m]); + free(s); + return NULL; +} + +char **hts_readlines(const char *fn, int *_n) +{ + unsigned int m = 0, n = 0; + char **s = 0, **s_new; + BGZF *fp = bgzf_open(fn, "r"); + if ( fp ) { // read from file + kstring_t str; + int ret; + str.s = 0; str.l = str.m = 0; + while ((ret = bgzf_getline(fp, '\n', &str)) >= 0) { + if (str.l == 0) continue; + if (n == 0 && hts_is_utf16_text(&str)) + hts_log_warning("'%s' appears to be encoded as UTF-16", fn); + if (hts_resize(char *, n + 1, &m, &s, 0) < 0) + goto err; + s[n] = strdup(str.s); + if (!s[n]) + goto err; + n++; + } + if (ret < -1) // Read error + goto err; + bgzf_close(fp); + free(str.s); + } else if (*fn == ':') { // read from string + const char *q, *p; + for (q = p = fn + 1;; ++p) + if (*p == ',' || *p == 0) { + if (hts_resize(char *, n + 1, &m, &s, 0) < 0) + goto err; + s[n] = (char*)calloc(p - q + 1, 1); + if (!s[n]) + goto err; + strncpy(s[n++], q, p - q); + q = p + 1; + if (*p == 0) break; + } + } else return 0; + // Try to shrink s to the minimum size needed + s_new = (char**)realloc(s, n * sizeof(char*)); + if (!s_new) + goto err; + + s = s_new; + assert(n < INT_MAX); // hts_resize() should ensure this + *_n = n; + return s; + + err: + for (m = 0; m < n; m++) + free(s[m]); + free(s); + return NULL; +} + +// DEPRECATED: To be removed in a future HTSlib release +int hts_file_type(const char *fname) +{ + int len = strlen(fname); + if ( !strcasecmp(".vcf.gz",fname+len-7) ) return FT_VCF_GZ; + if ( !strcasecmp(".vcf",fname+len-4) ) return FT_VCF; + if ( !strcasecmp(".bcf",fname+len-4) ) return FT_BCF_GZ; + if ( !strcmp("-",fname) ) return FT_STDIN; + + hFILE *f = hopen(fname, "r"); + if (f == NULL) return 0; + + htsFormat fmt; + if (hts_detect_format2(f, fname, &fmt) < 0) { hclose_abruptly(f); return 0; } + if (hclose(f) < 0) return 0; + + switch (fmt.format) { + case vcf: return (fmt.compression == no_compression)? FT_VCF : FT_VCF_GZ; + case bcf: return (fmt.compression == no_compression)? FT_BCF : FT_BCF_GZ; + default: return 0; + } +} + +int hts_check_EOF(htsFile *fp) +{ + if (fp->format.compression == bgzf) + return bgzf_check_EOF(hts_get_bgzfp(fp)); + else if (fp->format.format == cram) + return cram_check_EOF(fp->fp.cram); + else + return 3; +} + + +/**************** + *** Indexing *** + ****************/ + +#define HTS_MIN_MARKER_DIST 0x10000 + +// Finds the special meta bin +// ((1<<(3 * n_lvls + 3)) - 1) / 7 + 1 +#define META_BIN(idx) ((idx)->n_bins + 1) + +#define pair64_lt(a,b) ((a).u < (b).u) +#define pair64max_lt(a,b) ((a).u < (b).u || \ + ((a).u == (b).u && (a).max < (b).max)) + +KSORT_INIT_STATIC(_off, hts_pair64_t, pair64_lt) +KSORT_INIT_STATIC(_off_max, hts_pair64_max_t, pair64max_lt) + +typedef struct { + int32_t m, n; + uint64_t loff; + hts_pair64_t *list; +} bins_t; + +KHASH_MAP_INIT_INT(bin, bins_t) +typedef khash_t(bin) bidx_t; + +typedef struct { + hts_pos_t n, m; + uint64_t *offset; +} lidx_t; + +struct hts_idx_t { + int fmt, min_shift, n_lvls, n_bins; + uint32_t l_meta; + int32_t n, m; + uint64_t n_no_coor; + bidx_t **bidx; + lidx_t *lidx; + uint8_t *meta; // MUST have a terminating NUL on the end + int tbi_n, last_tbi_tid; + struct { + uint32_t last_bin, save_bin; + hts_pos_t last_coor; + int last_tid, save_tid, finished; + uint64_t last_off, save_off; + uint64_t off_beg, off_end; + uint64_t n_mapped, n_unmapped; + } z; // keep internal states + BGZF *otf_fp; // Index on-the-fly output file +}; + +static char * idx_format_name(int fmt) { + switch (fmt) { + case HTS_FMT_CSI: return "csi"; + case HTS_FMT_BAI: return "bai"; + case HTS_FMT_TBI: return "tbi"; + case HTS_FMT_CRAI: return "crai"; + default: return "unknown"; + } +} + +#ifdef DEBUG_INDEX +static void idx_dump(const hts_idx_t *idx) { + int i; + int64_t j; + + if (!idx) fprintf(stderr, "Null index\n"); + + fprintf(stderr, "format='%s', min_shift=%d, n_lvls=%d, n_bins=%d, l_meta=%u ", + idx_format_name(idx->fmt), idx->min_shift, idx->n_lvls, idx->n_bins, idx->l_meta); + fprintf(stderr, "n=%d, m=%d, n_no_coor=%"PRIu64"\n", idx->n, idx->m, idx->n_no_coor); + for (i = 0; i < idx->n; i++) { + bidx_t *bidx = idx->bidx[i]; + lidx_t *lidx = &idx->lidx[i]; + if (bidx) { + fprintf(stderr, "======== BIN Index - tid=%d, n_buckets=%d, size=%d\n", i, bidx->n_buckets, bidx->size); + int b; + for (b = 0; b < META_BIN(idx); b++) { + khint_t k; + if ((k = kh_get(bin, bidx, b)) != kh_end(bidx)) { + bins_t *entries = &kh_value(bidx, k); + int l = hts_bin_level(b); + int64_t bin_width = 1LL << ((idx->n_lvls - l) * 3 + idx->min_shift); + fprintf(stderr, "\tbin=%d, level=%d, parent=%d, n_chunks=%d, loff=%"PRIu64", interval=[%"PRId64" - %"PRId64"]\n", + b, l, hts_bin_parent(b), entries->n, entries->loff, (b-hts_bin_first(l))*bin_width+1, (b+1-hts_bin_first(l))*bin_width); + for (j = 0; j < entries->n; j++) + fprintf(stderr, "\t\tchunk=%"PRId64", u=%"PRIu64", v=%"PRIu64"\n", j, entries->list[j].u, entries->list[j].v); + } + } + } + if (lidx) { + fprintf(stderr, "======== LINEAR Index - tid=%d, n_values=%"PRId64"\n", i, lidx->n); + for (j = 0; j < lidx->n; j++) { + fprintf(stderr, "\t\tentry=%"PRId64", offset=%"PRIu64", interval=[%"PRId64" - %"PRId64"]\n", + j, lidx->offset[j], j*(1<min_shift)+1, (j+1)*(1<min_shift)); + } + } + } +} +#endif + +static inline int insert_to_b(bidx_t *b, int bin, uint64_t beg, uint64_t end) +{ + khint_t k; + bins_t *l; + int absent; + k = kh_put(bin, b, bin, &absent); + if (absent < 0) return -1; // Out of memory + l = &kh_value(b, k); + if (absent) { + l->m = 1; l->n = 0; + l->list = (hts_pair64_t*)calloc(l->m, sizeof(hts_pair64_t)); + if (!l->list) { + kh_del(bin, b, k); + return -1; + } + } else if (l->n == l->m) { + uint32_t new_m = l->m ? l->m << 1 : 1; + hts_pair64_t *new_list = realloc(l->list, new_m * sizeof(hts_pair64_t)); + if (!new_list) return -1; + l->list = new_list; + l->m = new_m; + } + l->list[l->n].u = beg; + l->list[l->n++].v = end; + return 0; +} + +static inline int insert_to_l(lidx_t *l, int64_t _beg, int64_t _end, uint64_t offset, int min_shift) +{ + int i; + hts_pos_t beg, end; + beg = _beg >> min_shift; + end = (_end - 1) >> min_shift; + if (l->m < end + 1) { + size_t new_m = l->m * 2 > end + 1 ? l->m * 2 : end + 1; + uint64_t *new_offset; + + new_offset = (uint64_t*)realloc(l->offset, new_m * sizeof(uint64_t)); + if (!new_offset) return -1; + + // fill unused memory with (uint64_t)-1 + memset(new_offset + l->m, 0xff, sizeof(uint64_t) * (new_m - l->m)); + l->m = new_m; + l->offset = new_offset; + } + for (i = beg; i <= end; ++i) { + if (l->offset[i] == (uint64_t)-1) l->offset[i] = offset; + } + if (l->n < end + 1) l->n = end + 1; + return 0; +} + +hts_idx_t *hts_idx_init(int n, int fmt, uint64_t offset0, int min_shift, int n_lvls) +{ + hts_idx_t *idx; + idx = (hts_idx_t*)calloc(1, sizeof(hts_idx_t)); + if (idx == NULL) return NULL; + idx->fmt = fmt; + idx->min_shift = min_shift; + idx->n_lvls = n_lvls; + idx->n_bins = ((1<<(3 * n_lvls + 3)) - 1) / 7; + idx->z.save_tid = idx->z.last_tid = -1; + idx->z.save_bin = idx->z.last_bin = 0xffffffffu; + idx->z.save_off = idx->z.last_off = idx->z.off_beg = idx->z.off_end = offset0; + idx->z.last_coor = 0xffffffffu; + if (n) { + idx->n = idx->m = n; + idx->bidx = (bidx_t**)calloc(n, sizeof(bidx_t*)); + if (idx->bidx == NULL) { free(idx); return NULL; } + idx->lidx = (lidx_t*) calloc(n, sizeof(lidx_t)); + if (idx->lidx == NULL) { free(idx->bidx); free(idx); return NULL; } + } + idx->tbi_n = -1; + idx->last_tbi_tid = -1; + idx->otf_fp = NULL; + return idx; +} + +static void update_loff(hts_idx_t *idx, int i, int free_lidx) +{ + bidx_t *bidx = idx->bidx[i]; + lidx_t *lidx = &idx->lidx[i]; + khint_t k; + int l; + // the last entry is always valid + for (l=lidx->n-2; l >= 0; l--) { + if (lidx->offset[l] == (uint64_t)-1) + lidx->offset[l] = lidx->offset[l+1]; + } + if (bidx == 0) return; + for (k = kh_begin(bidx); k != kh_end(bidx); ++k) // set loff + if (kh_exist(bidx, k)) + { + if ( kh_key(bidx, k) < idx->n_bins ) + { + int bot_bin = hts_bin_bot(kh_key(bidx, k), idx->n_lvls); + // disable linear index if bot_bin out of bounds + kh_val(bidx, k).loff = bot_bin < lidx->n ? lidx->offset[bot_bin] : 0; + } + else + kh_val(bidx, k).loff = 0; + } + if (free_lidx) { + free(lidx->offset); + lidx->m = lidx->n = 0; + lidx->offset = 0; + } +} + +static int compress_binning(hts_idx_t *idx, int i) +{ + bidx_t *bidx = idx->bidx[i]; + khint_t k; + int l, m; + if (bidx == 0) return 0; + // merge a bin to its parent if the bin is too small + for (l = idx->n_lvls; l > 0; --l) { + unsigned start = hts_bin_first(l); + for (k = kh_begin(bidx); k != kh_end(bidx); ++k) { + bins_t *p, *q; + if (!kh_exist(bidx, k) || kh_key(bidx, k) >= idx->n_bins || kh_key(bidx, k) < start) continue; + p = &kh_value(bidx, k); + if (l < idx->n_lvls && p->n > 1) ks_introsort(_off, p->n, p->list); + if ((p->list[p->n - 1].v>>16) - (p->list[0].u>>16) < HTS_MIN_MARKER_DIST) { + khint_t kp; + kp = kh_get(bin, bidx, hts_bin_parent(kh_key(bidx, k))); + if (kp == kh_end(bidx)) continue; + q = &kh_val(bidx, kp); + if (q->n + p->n > q->m) { + uint32_t new_m = q->n + p->n; + hts_pair64_t *new_list; + kroundup32(new_m); + if (new_m > INT32_MAX) return -1; // Limited by index format + new_list = realloc(q->list, new_m * sizeof(*new_list)); + if (!new_list) return -1; + q->m = new_m; + q->list = new_list; + } + memcpy(q->list + q->n, p->list, p->n * sizeof(hts_pair64_t)); + q->n += p->n; + free(p->list); + kh_del(bin, bidx, k); + } + } + } + k = kh_get(bin, bidx, 0); + if (k != kh_end(bidx)) ks_introsort(_off, kh_val(bidx, k).n, kh_val(bidx, k).list); + // merge adjacent chunks that start from the same BGZF block + for (k = kh_begin(bidx); k != kh_end(bidx); ++k) { + bins_t *p; + if (!kh_exist(bidx, k) || kh_key(bidx, k) >= idx->n_bins) continue; + p = &kh_value(bidx, k); + for (l = 1, m = 0; l < p->n; ++l) { + if (p->list[m].v>>16 >= p->list[l].u>>16) { + if (p->list[m].v < p->list[l].v) p->list[m].v = p->list[l].v; + } else p->list[++m] = p->list[l]; + } + p->n = m + 1; + } + return 0; +} + +int hts_idx_finish(hts_idx_t *idx, uint64_t final_offset) +{ + int i, ret = 0; + if (idx == NULL || idx->z.finished) return 0; // do not run this function on an empty index or multiple times + if (idx->z.save_tid >= 0) { + ret |= insert_to_b(idx->bidx[idx->z.save_tid], idx->z.save_bin, idx->z.save_off, final_offset); + ret |= insert_to_b(idx->bidx[idx->z.save_tid], META_BIN(idx), idx->z.off_beg, final_offset); + ret |= insert_to_b(idx->bidx[idx->z.save_tid], META_BIN(idx), idx->z.n_mapped, idx->z.n_unmapped); + } + for (i = 0; i < idx->n; ++i) { + update_loff(idx, i, (idx->fmt == HTS_FMT_CSI)); + ret |= compress_binning(idx, i); + } + idx->z.finished = 1; + + return ret; +} + +static inline hts_pos_t hts_idx_maxpos(const hts_idx_t *idx) +{ + return hts_bin_maxpos(idx->min_shift, idx->n_lvls); +} + +int hts_idx_check_range(hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end) +{ + hts_pos_t maxpos = hts_idx_maxpos(idx); + if (tid < 0 || (beg <= maxpos && end <= maxpos)) + return 0; + + if (idx->fmt == HTS_FMT_CSI) { + hts_log_error("Region %"PRIhts_pos"..%"PRIhts_pos" " + "cannot be stored in a csi index with these parameters. " + "Please use a larger min_shift or depth", + beg, end); + } else { + hts_log_error("Region %"PRIhts_pos"..%"PRIhts_pos + " cannot be stored in a %s index. Try using a csi index", + beg, end, idx_format_name(idx->fmt)); + } + errno = ERANGE; + return -1; +} + +int hts_idx_push(hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end, uint64_t offset, int is_mapped) +{ + int bin; + if (tid<0) beg = -1, end = 0; + if (hts_idx_check_range(idx, tid, beg, end) < 0) + return -1; + if (tid >= idx->m) { // enlarge the index + uint32_t new_m = idx->m * 2 > tid + 1 ? idx->m * 2 : tid + 1; + bidx_t **new_bidx; + lidx_t *new_lidx; + + new_bidx = (bidx_t**)realloc(idx->bidx, new_m * sizeof(bidx_t*)); + if (!new_bidx) return -1; + idx->bidx = new_bidx; + + new_lidx = (lidx_t*) realloc(idx->lidx, new_m * sizeof(lidx_t)); + if (!new_lidx) return -1; + idx->lidx = new_lidx; + + memset(&idx->bidx[idx->m], 0, (new_m - idx->m) * sizeof(bidx_t*)); + memset(&idx->lidx[idx->m], 0, (new_m - idx->m) * sizeof(lidx_t)); + idx->m = new_m; + } + if (idx->n < tid + 1) idx->n = tid + 1; + if (idx->z.finished) return 0; + if (idx->z.last_tid != tid || (idx->z.last_tid >= 0 && tid < 0)) { // change of chromosome + if ( tid>=0 && idx->n_no_coor ) + { + hts_log_error("NO_COOR reads not in a single block at the end %d %d", tid, idx->z.last_tid); + return -1; + } + if (tid>=0 && idx->bidx[tid] != 0) + { + hts_log_error("Chromosome blocks not continuous"); + return -1; + } + idx->z.last_tid = tid; + idx->z.last_bin = 0xffffffffu; + } else if (tid >= 0 && idx->z.last_coor > beg) { // test if positions are out of order + hts_log_error("Unsorted positions on sequence #%d: %"PRIhts_pos" followed by %"PRIhts_pos, tid+1, idx->z.last_coor+1, beg+1); + return -1; + } + if (end < beg) { + // Malformed ranges are errors. (Empty ranges (beg==end) are unusual but acceptable.) + hts_log_error("Invalid record on sequence #%d: end %"PRId64" < begin %"PRId64, tid+1, end, beg+1); + return -1; + } + if ( tid>=0 ) + { + if (idx->bidx[tid] == 0) idx->bidx[tid] = kh_init(bin); + // shoehorn [-1,0) (VCF POS=0) into the leftmost bottom-level bin + if (beg < 0) beg = 0; + if (end <= 0) end = 1; + // idx->z.last_off points to the start of the current record + if (insert_to_l(&idx->lidx[tid], beg, end, + idx->z.last_off, idx->min_shift) < 0) return -1; + } + else idx->n_no_coor++; + bin = hts_reg2bin(beg, end, idx->min_shift, idx->n_lvls); + if ((int)idx->z.last_bin != bin) { // then possibly write the binning index + if (idx->z.save_bin != 0xffffffffu) { // save_bin==0xffffffffu only happens to the first record + if (insert_to_b(idx->bidx[idx->z.save_tid], idx->z.save_bin, + idx->z.save_off, idx->z.last_off) < 0) return -1; + } + if (idx->z.last_bin == 0xffffffffu && idx->z.save_bin != 0xffffffffu) { // change of chr; keep meta information + idx->z.off_end = idx->z.last_off; + if (insert_to_b(idx->bidx[idx->z.save_tid], META_BIN(idx), + idx->z.off_beg, idx->z.off_end) < 0) return -1; + if (insert_to_b(idx->bidx[idx->z.save_tid], META_BIN(idx), + idx->z.n_mapped, idx->z.n_unmapped) < 0) return -1; + idx->z.n_mapped = idx->z.n_unmapped = 0; + idx->z.off_beg = idx->z.off_end; + } + idx->z.save_off = idx->z.last_off; + idx->z.save_bin = idx->z.last_bin = bin; + idx->z.save_tid = tid; + } + if (is_mapped) ++idx->z.n_mapped; + else ++idx->z.n_unmapped; + idx->z.last_off = offset; + idx->z.last_coor = beg; + return 0; +} + +// Needed for TBI only. Ensure 'tid' with 'name' is in the index meta data. +// idx->meta needs to have been initialised first with an appropriate Tabix +// configuration via hts_idx_set_meta. +// +// NB number of references (first 4 bytes of tabix header) aren't in +// idx->meta, but held in idx->n instead. +int hts_idx_tbi_name(hts_idx_t *idx, int tid, const char *name) { + // Horrid - we have to map incoming tid to a tbi alternative tid. + // This is because TBI counts tids by "covered" refs while everything + // else counts by Nth SQ/contig record in header. + if (tid == idx->last_tbi_tid || tid < 0 || !name) + return idx->tbi_n; + + uint32_t len = strlen(name)+1; + uint8_t *tmp = (uint8_t *)realloc(idx->meta, idx->l_meta + len); + if (!tmp) + return -1; + + // Append name + idx->meta = tmp; + strcpy((char *)idx->meta + idx->l_meta, name); + idx->l_meta += len; + + // Update seq length + u32_to_le(le_to_u32(idx->meta+24)+len, idx->meta+24); + + idx->last_tbi_tid = tid; + return ++idx->tbi_n; +} + +// When doing samtools index we have a read_bam / hts_idx_push(bgzf_tell()) +// loop. idx->z.last_off is the previous bzgf_tell location, so we know +// the location the current bam record started at as well as where it ends. +// +// When building an index on the fly via a write_bam / hts_idx_push loop, +// this isn't quite identical as we may amend the virtual coord returned +// by bgzf_tell to the start of a new block if the next bam struct doesn't +// fit. It's essentially the same thing, but for bit-identical indices +// we need to amend the idx->z.last_off when we know we're starting a new +// block. +void hts_idx_amend_last(hts_idx_t *idx, uint64_t offset) +{ + idx->z.last_off = offset; +} + +void hts_idx_destroy(hts_idx_t *idx) +{ + khint_t k; + int i; + if (idx == 0) return; + + // For HTS_FMT_CRAI, idx actually points to a different type -- see sam.c + if (idx->fmt == HTS_FMT_CRAI) { + hts_cram_idx_t *cidx = (hts_cram_idx_t *) idx; + cram_index_free(cidx->cram); + free(cidx); + return; + } + + for (i = 0; i < idx->m; ++i) { + bidx_t *bidx = idx->bidx[i]; + free(idx->lidx[i].offset); + if (bidx == 0) continue; + for (k = kh_begin(bidx); k != kh_end(bidx); ++k) + if (kh_exist(bidx, k)) + free(kh_value(bidx, k).list); + kh_destroy(bin, bidx); + } + free(idx->bidx); free(idx->lidx); free(idx->meta); + free(idx); +} + +int hts_idx_fmt(hts_idx_t *idx) { + return idx->fmt; +} + +// The optimizer eliminates these ed_is_big() calls; still it would be good to +// TODO Determine endianness at configure- or compile-time + +static inline ssize_t HTS_RESULT_USED idx_write_int32(BGZF *fp, int32_t x) +{ + if (ed_is_big()) x = ed_swap_4(x); + return bgzf_write(fp, &x, sizeof x); +} + +static inline ssize_t HTS_RESULT_USED idx_write_uint32(BGZF *fp, uint32_t x) +{ + if (ed_is_big()) x = ed_swap_4(x); + return bgzf_write(fp, &x, sizeof x); +} + +static inline ssize_t HTS_RESULT_USED idx_write_uint64(BGZF *fp, uint64_t x) +{ + if (ed_is_big()) x = ed_swap_8(x); + return bgzf_write(fp, &x, sizeof x); +} + +static inline void swap_bins(bins_t *p) +{ + int i; + for (i = 0; i < p->n; ++i) { + ed_swap_8p(&p->list[i].u); + ed_swap_8p(&p->list[i].v); + } +} + +static int need_idx_ugly_delay_hack(const hts_idx_t *idx) +{ + // Ugly hack for on-the-fly BAI indexes. As these are uncompressed, + // we need to delay writing a few bytes of data until file close + // so that we have something to force a modification time update. + // + // (For compressed indexes like CSI, the BGZF EOF block serves the same + // purpose). + return idx->otf_fp && !idx->otf_fp->is_compressed; +} + +static int idx_save_core(const hts_idx_t *idx, BGZF *fp, int fmt) +{ + int32_t i, j; + + #define check(ret) if ((ret) < 0) return -1 + + // VCF TBI/CSI only writes IDs for non-empty bins (ie covered references) + // + // NOTE: CSI meta is undefined in spec, so this code has an assumption + // that we're only using it for Tabix data. + int nids = idx->n; + if (idx->meta && idx->l_meta >= 4 && le_to_u32(idx->meta) == TBX_VCF) { + for (i = nids = 0; i < idx->n; ++i) { + if (idx->bidx[i]) + nids++; + } + } + check(idx_write_int32(fp, nids)); + if (fmt == HTS_FMT_TBI && idx->l_meta) + check(bgzf_write(fp, idx->meta, idx->l_meta)); + + for (i = 0; i < idx->n; ++i) { + khint_t k; + bidx_t *bidx = idx->bidx[i]; + lidx_t *lidx = &idx->lidx[i]; + + // write binning index + if (nids == idx->n || bidx) + check(idx_write_int32(fp, bidx? kh_size(bidx) : 0)); + if (bidx) + for (k = kh_begin(bidx); k != kh_end(bidx); ++k) + if (kh_exist(bidx, k)) { + bins_t *p = &kh_value(bidx, k); + check(idx_write_uint32(fp, kh_key(bidx, k))); + if (fmt == HTS_FMT_CSI) check(idx_write_uint64(fp, p->loff)); + //int j;for(j=0;jn;++j)fprintf(stderr,"%d,%llx,%d,%llx:%llx\n",kh_key(bidx,k),kh_val(bidx, k).loff,j,p->list[j].u,p->list[j].v); + check(idx_write_int32(fp, p->n)); + for (j = 0; j < p->n; ++j) { + //fprintf(stderr, "\t%ld\t%ld\n", p->list[j].u, p->list[j].v); + check(idx_write_uint64(fp, p->list[j].u)); + check(idx_write_uint64(fp, p->list[j].v)); + } + } + + // write linear index + if (fmt != HTS_FMT_CSI) { + check(idx_write_int32(fp, lidx->n)); + for (j = 0; j < lidx->n; ++j) + check(idx_write_uint64(fp, lidx->offset[j])); + } + } + + if (!need_idx_ugly_delay_hack(idx)) { + // Write this for compressed (CSI) indexes, but for BAI we + // need to save a bit for later. See hts_idx_close_otf_fp() + check(idx_write_uint64(fp, idx->n_no_coor)); + } + +#ifdef DEBUG_INDEX + idx_dump(idx); +#endif + + return 0; + #undef check +} + +int hts_idx_save(const hts_idx_t *idx, const char *fn, int fmt) +{ + int ret, save; + if (idx == NULL || fn == NULL) { errno = EINVAL; return -1; } + char *fnidx = (char*)calloc(1, strlen(fn) + 5); + if (fnidx == NULL) return -1; + + strcpy(fnidx, fn); + switch (fmt) { + case HTS_FMT_BAI: strcat(fnidx, ".bai"); break; + case HTS_FMT_CSI: strcat(fnidx, ".csi"); break; + case HTS_FMT_TBI: strcat(fnidx, ".tbi"); break; + default: abort(); + } + + ret = hts_idx_save_as(idx, fn, fnidx, fmt); + save = errno; + free(fnidx); + errno = save; + return ret; +} + +static int hts_idx_write_out(const hts_idx_t *idx, BGZF *fp, int fmt) +{ + #define check(ret) if ((ret) < 0) return -1 + + if (fmt == HTS_FMT_CSI) { + check(bgzf_write(fp, "CSI\1", 4)); + check(idx_write_int32(fp, idx->min_shift)); + check(idx_write_int32(fp, idx->n_lvls)); + check(idx_write_uint32(fp, idx->l_meta)); + if (idx->l_meta) check(bgzf_write(fp, idx->meta, idx->l_meta)); + } else if (fmt == HTS_FMT_TBI) { + check(bgzf_write(fp, "TBI\1", 4)); + } else if (fmt == HTS_FMT_BAI) { + check(bgzf_write(fp, "BAI\1", 4)); + } else abort(); + + check(idx_save_core(idx, fp, fmt)); + + #undef check + return 0; +} + +int hts_idx_save_as(const hts_idx_t *idx, const char *fn, const char *fnidx, int fmt) +{ + BGZF *fp; + + if (fnidx == NULL) + return hts_idx_save(idx, fn, fmt); + + fp = bgzf_open(fnidx, (fmt == HTS_FMT_BAI)? "wu" : "w"); + if (fp == NULL) return -1; + + if (hts_idx_write_out(idx, fp, fmt) < 0) { + int save_errno = errno; + bgzf_close(fp); + errno = save_errno; + return -1; + } + + return bgzf_close(fp); +} + +// idx_save for on-the-fly indexes. Mostly duplicated from above, except +// idx is not const because we want to store the file handle in it, and +// the index file handle is not closed. This allows the index file to be +// closed after the EOF block on the indexed file has been written out, +// so the modification times on the two files will be in the correct order. +int hts_idx_save_but_not_close(hts_idx_t *idx, const char *fnidx, int fmt) +{ + idx->otf_fp = bgzf_open(fnidx, (fmt == HTS_FMT_BAI)? "wu" : "w"); + if (idx->otf_fp == NULL) return -1; + + if (hts_idx_write_out(idx, idx->otf_fp, fmt) < 0) { + int save_errno = errno; + bgzf_close(idx->otf_fp); + idx->otf_fp = NULL; + errno = save_errno; + return -1; + } + + return bgzf_flush(idx->otf_fp); +} + +static int hts_idx_close_otf_fp(hts_idx_t *idx) +{ + if (idx && idx->otf_fp) { + int ret = 0; + if (need_idx_ugly_delay_hack(idx)) { + // BAI index - write out the bytes we deferred earlier + ret = idx_write_uint64(idx->otf_fp, idx->n_no_coor) < 0; + } + ret |= bgzf_close(idx->otf_fp) < 0; + idx->otf_fp = NULL; + return ret == 0 ? 0 : -1; + } + return 0; +} + +static int idx_read_core(hts_idx_t *idx, BGZF *fp, int fmt) +{ + int32_t i, n, is_be; + is_be = ed_is_big(); + if (idx == NULL) return -4; + for (i = 0; i < idx->n; ++i) { + bidx_t *h; + lidx_t *l = &idx->lidx[i]; + uint32_t key; + int j, absent; + bins_t *p; + h = idx->bidx[i] = kh_init(bin); + if (bgzf_read(fp, &n, 4) != 4) return -1; + if (is_be) ed_swap_4p(&n); + if (n < 0) return -3; + for (j = 0; j < n; ++j) { + khint_t k; + if (bgzf_read(fp, &key, 4) != 4) return -1; + if (is_be) ed_swap_4p(&key); + k = kh_put(bin, h, key, &absent); + if (absent < 0) return -2; // No memory + if (absent == 0) return -3; // Duplicate bin number + p = &kh_val(h, k); + if (fmt == HTS_FMT_CSI) { + if (bgzf_read(fp, &p->loff, 8) != 8) return -1; + if (is_be) ed_swap_8p(&p->loff); + } else p->loff = 0; + if (bgzf_read(fp, &p->n, 4) != 4) return -1; + if (is_be) ed_swap_4p(&p->n); + if (p->n < 0) return -3; + if ((size_t) p->n > SIZE_MAX / sizeof(hts_pair64_t)) return -2; + p->m = p->n; + p->list = (hts_pair64_t*)malloc(p->m * sizeof(hts_pair64_t)); + if (p->list == NULL) return -2; + if (bgzf_read(fp, p->list, ((size_t) p->n)<<4) != ((size_t) p->n)<<4) return -1; + if (is_be) swap_bins(p); + } + if (fmt != HTS_FMT_CSI) { // load linear index + int j, k; + uint32_t x; + if (bgzf_read(fp, &x, 4) != 4) return -1; + if (is_be) ed_swap_4p(&x); + l->n = x; + if (l->n < 0) return -3; + if ((size_t) l->n > SIZE_MAX / sizeof(uint64_t)) return -2; + l->m = l->n; + l->offset = (uint64_t*)malloc(l->n * sizeof(uint64_t)); + if (l->offset == NULL) return -2; + if (bgzf_read(fp, l->offset, l->n << 3) != l->n << 3) return -1; + if (is_be) for (j = 0; j < l->n; ++j) ed_swap_8p(&l->offset[j]); + for (k = j = 0; j < l->n && l->offset[j] == 0; k = ++j); // stop at the first non-zero entry + for (j = l->n-1; j > k; j--) // fill missing values; may happen given older samtools and tabix + if (l->offset[j-1] == 0) l->offset[j-1] = l->offset[j]; + update_loff(idx, i, 0); + } + } + if (bgzf_read(fp, &idx->n_no_coor, 8) != 8) idx->n_no_coor = 0; + if (is_be) ed_swap_8p(&idx->n_no_coor); +#ifdef DEBUG_INDEX + idx_dump(idx); +#endif + + return 0; +} + +static hts_idx_t *idx_read(const char *fn) +{ + uint8_t magic[4]; + int i, is_be; + hts_idx_t *idx = NULL; + uint8_t *meta = NULL; + BGZF *fp = bgzf_open(fn, "r"); + if (fp == NULL) return NULL; + is_be = ed_is_big(); + if (bgzf_read(fp, magic, 4) != 4) goto fail; + + if (memcmp(magic, "CSI\1", 4) == 0) { + uint32_t x[3], n; + if (bgzf_read(fp, x, 12) != 12) goto fail; + if (is_be) for (i = 0; i < 3; ++i) ed_swap_4p(&x[i]); + if (x[2]) { + if (SIZE_MAX - x[2] < 1) goto fail; // Prevent possible overflow + if ((meta = (uint8_t*)malloc((size_t) x[2] + 1)) == NULL) goto fail; + if (bgzf_read(fp, meta, x[2]) != x[2]) goto fail; + // Prevent possible strlen past the end in tbx_index_load2 + meta[x[2]] = '\0'; + } + if (bgzf_read(fp, &n, 4) != 4) goto fail; + if (is_be) ed_swap_4p(&n); + if (n > INT32_MAX) goto fail; + if ((idx = hts_idx_init(n, HTS_FMT_CSI, 0, x[0], x[1])) == NULL) goto fail; + idx->l_meta = x[2]; + idx->meta = meta; + meta = NULL; + if (idx_read_core(idx, fp, HTS_FMT_CSI) < 0) goto fail; + } + else if (memcmp(magic, "TBI\1", 4) == 0) { + uint8_t x[8 * 4]; + uint32_t n; + // Read file header + if (bgzf_read(fp, x, sizeof(x)) != sizeof(x)) goto fail; + n = le_to_u32(&x[0]); // location of n_ref + if (n > INT32_MAX) goto fail; + if ((idx = hts_idx_init(n, HTS_FMT_TBI, 0, 14, 5)) == NULL) goto fail; + n = le_to_u32(&x[7*4]); // location of l_nm + if (n > UINT32_MAX - 29) goto fail; // Prevent possible overflow + idx->l_meta = 28 + n; + if ((idx->meta = (uint8_t*)malloc(idx->l_meta + 1)) == NULL) goto fail; + // copy format, col_seq, col_beg, col_end, meta, skip, l_nm + // N.B. left in little-endian byte order. + memcpy(idx->meta, &x[1*4], 28); + // Read in sequence names. + if (bgzf_read(fp, idx->meta + 28, n) != n) goto fail; + // Prevent possible strlen past the end in tbx_index_load2 + idx->meta[idx->l_meta] = '\0'; + if (idx_read_core(idx, fp, HTS_FMT_TBI) < 0) goto fail; + } + else if (memcmp(magic, "BAI\1", 4) == 0) { + uint32_t n; + if (bgzf_read(fp, &n, 4) != 4) goto fail; + if (is_be) ed_swap_4p(&n); + if (n > INT32_MAX) goto fail; + if ((idx = hts_idx_init(n, HTS_FMT_BAI, 0, 14, 5)) == NULL) goto fail; + if (idx_read_core(idx, fp, HTS_FMT_BAI) < 0) goto fail; + } + else { errno = EINVAL; goto fail; } + + bgzf_close(fp); + return idx; + +fail: + bgzf_close(fp); + hts_idx_destroy(idx); + free(meta); + return NULL; +} + +int hts_idx_set_meta(hts_idx_t *idx, uint32_t l_meta, uint8_t *meta, + int is_copy) +{ + uint8_t *new_meta = meta; + if (is_copy) { + size_t l = l_meta; + if (l > SIZE_MAX - 1) { + errno = ENOMEM; + return -1; + } + new_meta = malloc(l + 1); + if (!new_meta) return -1; + memcpy(new_meta, meta, l); + // Prevent possible strlen past the end in tbx_index_load2 + new_meta[l] = '\0'; + } + if (idx->meta) free(idx->meta); + idx->l_meta = l_meta; + idx->meta = new_meta; + return 0; +} + +uint8_t *hts_idx_get_meta(hts_idx_t *idx, uint32_t *l_meta) +{ + *l_meta = idx->l_meta; + return idx->meta; +} + +const char **hts_idx_seqnames(const hts_idx_t *idx, int *n, hts_id2name_f getid, void *hdr) +{ + if ( !idx || !idx->n ) + { + *n = 0; + return NULL; + } + + int tid = 0, i; + const char **names = (const char**) calloc(idx->n,sizeof(const char*)); + for (i=0; in; i++) + { + bidx_t *bidx = idx->bidx[i]; + if ( !bidx ) continue; + names[tid++] = getid(hdr,i); + } + *n = tid; + return names; +} + +int hts_idx_nseq(const hts_idx_t *idx) { + if (!idx) return -1; + return idx->n; +} + +int hts_idx_get_stat(const hts_idx_t* idx, int tid, uint64_t* mapped, uint64_t* unmapped) +{ + if (!idx) return -1; + if ( idx->fmt == HTS_FMT_CRAI ) { + *mapped = 0; *unmapped = 0; + return -1; + } + + bidx_t *h = idx->bidx[tid]; + if (!h) return -1; + khint_t k = kh_get(bin, h, META_BIN(idx)); + if (k != kh_end(h)) { + *mapped = kh_val(h, k).list[1].u; + *unmapped = kh_val(h, k).list[1].v; + return 0; + } else { + *mapped = 0; *unmapped = 0; + return -1; + } +} + +uint64_t hts_idx_get_n_no_coor(const hts_idx_t* idx) +{ + if (idx->fmt == HTS_FMT_CRAI) return 0; + return idx->n_no_coor; +} + +/**************** + *** Iterator *** + ****************/ + +// Note: even with 32-bit hts_pos_t, end needs to be 64-bit here due to 1LL<>s); e = t + (end>>s); + for (i = b; i <= e; ++i) { + if (kh_get(bin, bidx, i) != kh_end(bidx)) { + assert(itr->bins.n < itr->bins.m); + itr->bins.a[itr->bins.n++] = i; + } + } + } + return itr->bins.n; +} + +static inline int reg2bins_wide(int64_t beg, int64_t end, hts_itr_t *itr, int min_shift, int n_lvls, bidx_t *bidx) +{ + khint_t i; + hts_pos_t max_shift = 3 * n_lvls + min_shift; + --end; + if (beg < 0) beg = 0; + for (i = kh_begin(bidx); i != kh_end(bidx); i++) { + if (!kh_exist(bidx, i)) continue; + hts_pos_t bin = (hts_pos_t) kh_key(bidx, i); + int level = hts_bin_level(bin); + if (level > n_lvls) continue; // Dodgy index? + hts_pos_t first = hts_bin_first(level); + hts_pos_t beg_at_level = first + (beg >> (max_shift - 3 * level)); + hts_pos_t end_at_level = first + (end >> (max_shift - 3 * level)); + if (beg_at_level <= bin && bin <= end_at_level) { + assert(itr->bins.n < itr->bins.m); + itr->bins.a[itr->bins.n++] = bin; + } + } + return itr->bins.n; +} + +static inline int reg2bins(int64_t beg, int64_t end, hts_itr_t *itr, int min_shift, int n_lvls, bidx_t *bidx) +{ + int l, t, s = min_shift + (n_lvls<<1) + n_lvls; + size_t reg_bin_count = 0, hash_bin_count = kh_n_buckets(bidx), max_bins; + hts_pos_t end1; + if (end >= 1LL<= end) return 0; + end1 = end - 1; + + // Count bins to see if it's faster to iterate through the hash table + // or the set of bins covering the region + for (l = 0, t = 0; l <= n_lvls; s -= 3, t += 1<<((l<<1)+l), ++l) { + reg_bin_count += (end1 >> s) - (beg >> s) + 1; + } + max_bins = reg_bin_count < kh_size(bidx) ? reg_bin_count : kh_size(bidx); + if (itr->bins.m - itr->bins.n < max_bins) { + // Worst-case memory usage. May be wasteful on very sparse + // data, but the bin list usually won't be too big anyway. + size_t new_m = max_bins + itr->bins.n; + if (new_m > INT_MAX || new_m > SIZE_MAX / sizeof(int)) { + errno = ENOMEM; + return -1; + } + int *new_a = realloc(itr->bins.a, new_m * sizeof(*new_a)); + if (!new_a) return -1; + itr->bins.a = new_a; + itr->bins.m = new_m; + } + if (reg_bin_count < hash_bin_count) { + return reg2bins_narrow(beg, end, itr, min_shift, n_lvls, bidx); + } else { + return reg2bins_wide(beg, end, itr, min_shift, n_lvls, bidx); + } +} + +static inline int add_to_interval(hts_itr_t *iter, bins_t *bin, + int tid, uint32_t interval, + uint64_t min_off, uint64_t max_off) +{ + hts_pair64_max_t *off; + int j; + + if (!bin->n) + return 0; + off = realloc(iter->off, (iter->n_off + bin->n) * sizeof(*off)); + if (!off) + return -2; + + iter->off = off; + for (j = 0; j < bin->n; ++j) { + if (bin->list[j].v > min_off && bin->list[j].u < max_off) { + iter->off[iter->n_off].u = min_off > bin->list[j].u + ? min_off : bin->list[j].u; + iter->off[iter->n_off].v = max_off < bin->list[j].v + ? max_off : bin->list[j].v; + // hts_pair64_max_t::max is now used to link + // file offsets to region list entries. + // The iterator can use this to decide if it + // can skip some file regions. + iter->off[iter->n_off].max = ((uint64_t) tid << 32) | interval; + iter->n_off++; + } + } + return 0; +} + +static inline int reg2intervals_narrow(hts_itr_t *iter, const bidx_t *bidx, + int tid, int64_t beg, int64_t end, + uint32_t interval, + uint64_t min_off, uint64_t max_off, + int min_shift, int n_lvls) +{ + int l, t, s = min_shift + n_lvls * 3; + hts_pos_t b, e, i; + + for (--end, l = 0, t = 0; l <= n_lvls; s -= 3, t += 1<<((l<<1)+l), ++l) { + b = t + (beg>>s); e = t + (end>>s); + for (i = b; i <= e; ++i) { + khint_t k = kh_get(bin, bidx, i); + if (k != kh_end(bidx)) { + bins_t *bin = &kh_value(bidx, k); + int res = add_to_interval(iter, bin, tid, interval, min_off, max_off); + if (res < 0) + return res; + } + } + } + return 0; +} + +static inline int reg2intervals_wide(hts_itr_t *iter, const bidx_t *bidx, + int tid, int64_t beg, int64_t end, + uint32_t interval, + uint64_t min_off, uint64_t max_off, + int min_shift, int n_lvls) +{ + khint_t i; + hts_pos_t max_shift = 3 * n_lvls + min_shift; + --end; + if (beg < 0) beg = 0; + for (i = kh_begin(bidx); i != kh_end(bidx); i++) { + if (!kh_exist(bidx, i)) continue; + hts_pos_t bin = (hts_pos_t) kh_key(bidx, i); + int level = hts_bin_level(bin); + if (level > n_lvls) continue; // Dodgy index? + hts_pos_t first = hts_bin_first(level); + hts_pos_t beg_at_level = first + (beg >> (max_shift - 3 * level)); + hts_pos_t end_at_level = first + (end >> (max_shift - 3 * level)); + if (beg_at_level <= bin && bin <= end_at_level) { + bins_t *bin = &kh_value(bidx, i); + int res = add_to_interval(iter, bin, tid, interval, min_off, max_off); + if (res < 0) + return res; + } + } + return 0; +} + +static inline int reg2intervals(hts_itr_t *iter, const hts_idx_t *idx, int tid, int64_t beg, int64_t end, uint32_t interval, uint64_t min_off, uint64_t max_off, int min_shift, int n_lvls) +{ + int l, t, s; + int i, j; + hts_pos_t end1; + bidx_t *bidx; + int start_n_off; + size_t reg_bin_count = 0, hash_bin_count; + int res; + + if (!iter || !idx || (bidx = idx->bidx[tid]) == NULL || beg > end) + return -1; + + hash_bin_count = kh_n_buckets(bidx); + + s = min_shift + (n_lvls<<1) + n_lvls; + if (end >= 1LL<> s) - (beg >> s) + 1; + } + + start_n_off = iter->n_off; + + // Populate iter->off with the intervals for this region + if (reg_bin_count < hash_bin_count) { + res = reg2intervals_narrow(iter, bidx, tid, beg, end, interval, + min_off, max_off, min_shift, n_lvls); + } else { + res = reg2intervals_wide(iter, bidx, tid, beg, end, interval, + min_off, max_off, min_shift, n_lvls); + } + if (res < 0) + return res; + + if (iter->n_off - start_n_off > 1) { + ks_introsort(_off_max, iter->n_off - start_n_off, iter->off + start_n_off); + for (i = start_n_off, j = start_n_off + 1; j < iter->n_off; j++) { + if (iter->off[i].v >= iter->off[j].u) { + if (iter->off[i].v < iter->off[j].v) + iter->off[i].v = iter->off[j].v; + } else { + i++; + if (i < j) + iter->off[i] = iter->off[j]; + } + } + iter->n_off = i + 1; + } + + return iter->n_off; +} + +static int compare_regions(const void *r1, const void *r2) { + hts_reglist_t *reg1 = (hts_reglist_t *)r1; + hts_reglist_t *reg2 = (hts_reglist_t *)r2; + + if (reg1->tid < 0 && reg2->tid >= 0) + return 1; + else if (reg1->tid >= 0 && reg2->tid < 0) + return -1; + else + return reg1->tid - reg2->tid; +} + +uint64_t hts_itr_off(const hts_idx_t* idx, int tid) { + + int i; + bidx_t* bidx; + uint64_t off0 = (uint64_t) -1; + khint_t k; + switch (tid) { + case HTS_IDX_START: + // Find the smallest offset, note that sequence ids may not be ordered sequentially + for (i = 0; i < idx->n; i++) { + bidx = idx->bidx[i]; + k = kh_get(bin, bidx, META_BIN(idx)); + if (k == kh_end(bidx)) + continue; + + if (off0 > kh_val(bidx, k).list[0].u) + off0 = kh_val(bidx, k).list[0].u; + } + if (off0 == (uint64_t) -1 && idx->n_no_coor) + off0 = 0; + // only no-coor reads in this bam + break; + case HTS_IDX_NOCOOR: + /* No-coor reads sort after all of the mapped reads. The position + is not stored in the index itself, so need to find the end + offset for the last mapped read. A loop is needed here in + case references at the end of the file have no mapped reads, + or sequence ids are not ordered sequentially. + See issue samtools#568 and commits b2aab8, 60c22d and cc207d. */ + for (i = 0; i < idx->n; i++) { + bidx = idx->bidx[i]; + k = kh_get(bin, bidx, META_BIN(idx)); + if (k != kh_end(bidx)) { + if (off0 == (uint64_t) -1 || off0 < kh_val(bidx, k).list[0].v) { + off0 = kh_val(bidx, k).list[0].v; + } + } + } + if (off0 == (uint64_t) -1 && idx->n_no_coor) + off0 = 0; + // only no-coor reads in this bam + break; + case HTS_IDX_REST: + off0 = 0; + break; + case HTS_IDX_NONE: + off0 = 0; + break; + } + + return off0; +} + +hts_itr_t *hts_itr_query(const hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end, hts_readrec_func *readrec) +{ + int i, n_off, l, bin; + hts_pair64_max_t *off; + khint_t k; + bidx_t *bidx; + uint64_t min_off, max_off; + hts_pos_t idx_maxpos; + hts_itr_t *iter; + uint32_t unmapped = 0, rel_off; + + // It's possible to call this function with NULL idx iff + // tid is one of the special values HTS_IDX_REST or HTS_IDX_NONE + if (!idx && !(tid == HTS_IDX_REST || tid == HTS_IDX_NONE)) { + errno = EINVAL; + return NULL; + } + + iter = (hts_itr_t*)calloc(1, sizeof(hts_itr_t)); + if (iter) { + if (tid < 0) { + uint64_t off = hts_itr_off(idx, tid); + if (off != (uint64_t) -1) { + iter->read_rest = 1; + iter->curr_off = off; + iter->readrec = readrec; + if (tid == HTS_IDX_NONE) + iter->finished = 1; + } else { + free(iter); + iter = NULL; + } + } else if (tid >= idx->n || (bidx = idx->bidx[tid]) == NULL) { + iter->finished = 1; + } else { + if (beg < 0) beg = 0; + if (end < beg) { + free(iter); + return NULL; + } + + k = kh_get(bin, bidx, META_BIN(idx)); + if (k != kh_end(bidx)) + unmapped = kh_val(bidx, k).list[1].v; + else + unmapped = 1; + + iter->tid = tid, iter->beg = beg, iter->end = end; iter->i = -1; + iter->readrec = readrec; + + if ( !kh_size(bidx) ) { iter->finished = 1; return iter; } + + idx_maxpos = hts_idx_maxpos(idx); + if (beg >= idx_maxpos) { iter->finished = 1; return iter; } + + rel_off = beg>>idx->min_shift; + // compute min_off + bin = hts_bin_first(idx->n_lvls) + rel_off; + do { + int first; + k = kh_get(bin, bidx, bin); + if (k != kh_end(bidx)) break; + first = (hts_bin_parent(bin)<<3) + 1; + if (bin > first) --bin; + else bin = hts_bin_parent(bin); + } while (bin); + if (bin == 0) k = kh_get(bin, bidx, bin); + min_off = k != kh_end(bidx)? kh_val(bidx, k).loff : 0; + // min_off can be calculated more accurately if the + // linear index is available + if (idx->lidx[tid].offset + && rel_off < idx->lidx[tid].n) { + if (min_off < idx->lidx[tid].offset[rel_off]) + min_off = idx->lidx[tid].offset[rel_off]; + if (unmapped) { + // unmapped reads are not covered by the linear index, + // so search backwards for a smaller offset + int tmp_off; + for (tmp_off = rel_off-1; tmp_off >= 0; tmp_off--) { + if (idx->lidx[tid].offset[tmp_off] < min_off) { + min_off = idx->lidx[tid].offset[tmp_off]; + break; + } + } + // if the search went too far back or no satisfactory entry + // was found, revert to the bin index loff value + if (k != kh_end(bidx) && (min_off < kh_val(bidx, k).loff || tmp_off < 0)) + min_off = kh_val(bidx, k).loff; + } + } else if (unmapped) { //CSI index + if (k != kh_end(bidx)) + min_off = kh_val(bidx, k).loff; + } + + // compute max_off: a virtual offset from a bin to the right of end + // First check if end lies within the range of the index (it won't + // if it's HTS_POS_MAX) + if (end <= idx_maxpos) { + bin = hts_bin_first(idx->n_lvls) + ((end-1) >> idx->min_shift) + 1; + if (bin >= idx->n_bins) bin = 0; + while (1) { + // search for an extant bin by moving right, but moving up to the + // parent whenever we get to a first child (which also covers falling + // off the RHS, which wraps around and immediately goes up to bin 0) + while (bin % 8 == 1) bin = hts_bin_parent(bin); + if (bin == 0) { max_off = UINT64_MAX; break; } + k = kh_get(bin, bidx, bin); + if (k != kh_end(bidx) && kh_val(bidx, k).n > 0) { max_off = kh_val(bidx, k).list[0].u; break; } + bin++; + } + } else { + // Searching to end of reference + max_off = UINT64_MAX; + } + + // retrieve bins + if (reg2bins(beg, end, iter, idx->min_shift, idx->n_lvls, bidx) < 0) { + hts_itr_destroy(iter); + return NULL; + } + + for (i = n_off = 0; i < iter->bins.n; ++i) + if ((k = kh_get(bin, bidx, iter->bins.a[i])) != kh_end(bidx)) + n_off += kh_value(bidx, k).n; + if (n_off == 0) { + // No overlapping bins means the iterator has already finished. + iter->finished = 1; + return iter; + } + off = calloc(n_off, sizeof(*off)); + for (i = n_off = 0; i < iter->bins.n; ++i) { + if ((k = kh_get(bin, bidx, iter->bins.a[i])) != kh_end(bidx)) { + int j; + bins_t *p = &kh_value(bidx, k); + for (j = 0; j < p->n; ++j) + if (p->list[j].v > min_off && p->list[j].u < max_off) { + off[n_off].u = min_off > p->list[j].u + ? min_off : p->list[j].u; + off[n_off].v = max_off < p->list[j].v + ? max_off : p->list[j].v; + // hts_pair64_max_t::max is now used to link + // file offsets to region list entries. + // The iterator can use this to decide if it + // can skip some file regions. + off[n_off].max = ((uint64_t) tid << 32) | j; + n_off++; + } + } + } + + if (n_off == 0) { + free(off); + iter->finished = 1; + return iter; + } + ks_introsort(_off_max, n_off, off); + // resolve completely contained adjacent blocks + for (i = 1, l = 0; i < n_off; ++i) + if (off[l].v < off[i].v) off[++l] = off[i]; + n_off = l + 1; + // resolve overlaps between adjacent blocks; this may happen due to the merge in indexing + for (i = 1; i < n_off; ++i) + if (off[i-1].v >= off[i].u) off[i-1].v = off[i].u; + // merge adjacent blocks + for (i = 1, l = 0; i < n_off; ++i) { + if (off[l].v>>16 == off[i].u>>16) off[l].v = off[i].v; + else off[++l] = off[i]; + } + n_off = l + 1; + iter->n_off = n_off; iter->off = off; + } + } + + return iter; +} + +int hts_itr_multi_bam(const hts_idx_t *idx, hts_itr_t *iter) +{ + int i, j, bin; + khint_t k; + bidx_t *bidx; + uint64_t min_off, max_off, t_off = (uint64_t)-1; + int tid; + hts_pos_t beg, end, idx_maxpos; + hts_reglist_t *curr_reg; + uint32_t unmapped = 0, rel_off; + + if (!idx || !iter || !iter->multi) + return -1; + + iter->i = -1; + for (i=0; in_reg; i++) { + + curr_reg = &iter->reg_list[i]; + tid = curr_reg->tid; + + if (tid < 0) { + t_off = hts_itr_off(idx, tid); + if (t_off != (uint64_t)-1) { + switch (tid) { + case HTS_IDX_NONE: + iter->finished = 1; + // fall through + case HTS_IDX_START: + case HTS_IDX_REST: + iter->curr_off = t_off; + iter->n_reg = 0; + iter->reg_list = NULL; + iter->read_rest = 1; + return 0; + case HTS_IDX_NOCOOR: + iter->nocoor = 1; + iter->nocoor_off = t_off; + } + } + } else { + if (tid >= idx->n || (bidx = idx->bidx[tid]) == NULL || !kh_size(bidx)) + continue; + + k = kh_get(bin, bidx, META_BIN(idx)); + if (k != kh_end(bidx)) + unmapped = kh_val(bidx, k).list[1].v; + else + unmapped = 1; + + idx_maxpos = hts_idx_maxpos(idx); + + for(j=0; jcount; j++) { + hts_pair32_t *curr_intv = &curr_reg->intervals[j]; + if (curr_intv->end < curr_intv->beg) + continue; + + beg = curr_intv->beg; + end = curr_intv->end; + if (beg >= idx_maxpos) + continue; + rel_off = beg>>idx->min_shift; + + /* Compute 'min_off' by searching the lowest level bin containing 'beg'. + If the computed bin is not in the index, try the next bin to the + left, belonging to the same parent. If it is the first sibling bin, + try the parent bin. */ + bin = hts_bin_first(idx->n_lvls) + rel_off; + do { + int first; + k = kh_get(bin, bidx, bin); + if (k != kh_end(bidx)) break; + first = (hts_bin_parent(bin)<<3) + 1; + if (bin > first) --bin; + else bin = hts_bin_parent(bin); + } while (bin); + if (bin == 0) + k = kh_get(bin, bidx, bin); + min_off = k != kh_end(bidx)? kh_val(bidx, k).loff : 0; + // min_off can be calculated more accurately if the + // linear index is available + if (idx->lidx[tid].offset + && rel_off < idx->lidx[tid].n) { + if (min_off < idx->lidx[tid].offset[rel_off]) + min_off = idx->lidx[tid].offset[rel_off]; + if (unmapped) { + int tmp_off; + for (tmp_off = rel_off-1; tmp_off >= 0; tmp_off--) { + if (idx->lidx[tid].offset[tmp_off] < min_off) { + min_off = idx->lidx[tid].offset[tmp_off]; + break; + } + } + + if (k != kh_end(bidx) && (min_off < kh_val(bidx, k).loff || tmp_off < 0)) + min_off = kh_val(bidx, k).loff; + } + } else if (unmapped) { //CSI index + if (k != kh_end(bidx)) + min_off = kh_val(bidx, k).loff; + } + + // compute max_off: a virtual offset from a bin to the right of end + // First check if end lies within the range of the index (it + // won't if it's HTS_POS_MAX) + if (end <= idx_maxpos) { + bin = hts_bin_first(idx->n_lvls) + ((end-1) >> idx->min_shift) + 1; + if (bin >= idx->n_bins) bin = 0; + while (1) { + // search for an extant bin by moving right, but moving up to the + // parent whenever we get to a first child (which also covers falling + // off the RHS, which wraps around and immediately goes up to bin 0) + while (bin % 8 == 1) bin = hts_bin_parent(bin); + if (bin == 0) { max_off = UINT64_MAX; break; } + k = kh_get(bin, bidx, bin); + if (k != kh_end(bidx) && kh_val(bidx, k).n > 0) { + max_off = kh_val(bidx, k).list[0].u; + break; + } + bin++; + } + } else { + // Searching to end of reference + max_off = UINT64_MAX; + } + + //convert coordinates to file offsets + if (reg2intervals(iter, idx, tid, beg, end, j, + min_off, max_off, + idx->min_shift, idx->n_lvls) < 0) { + return -1; + } + } + } + } + + if (iter->n_off > 1) + ks_introsort(_off_max, iter->n_off, iter->off); + + if(!iter->n_off && !iter->nocoor) + iter->finished = 1; + + return 0; +} + +int hts_itr_multi_cram(const hts_idx_t *idx, hts_itr_t *iter) +{ + const hts_cram_idx_t *cidx = (const hts_cram_idx_t *) idx; + int tid, i, n_off = 0; + uint32_t j; + hts_pos_t beg, end; + hts_reglist_t *curr_reg; + hts_pair32_t *curr_intv; + hts_pair64_max_t *off = NULL, *tmp; + cram_index *e = NULL; + + if (!cidx || !iter || !iter->multi) + return -1; + + iter->is_cram = 1; + iter->read_rest = 0; + iter->off = NULL; + iter->n_off = 0; + iter->curr_off = 0; + iter->i = -1; + + for (i=0; in_reg; i++) { + + curr_reg = &iter->reg_list[i]; + tid = curr_reg->tid; + + if (tid >= 0) { + tmp = realloc(off, (n_off + curr_reg->count) * sizeof(*off)); + if (!tmp) + goto err; + off = tmp; + + for (j=0; j < curr_reg->count; j++) { + curr_intv = &curr_reg->intervals[j]; + if (curr_intv->end < curr_intv->beg) + continue; + + beg = curr_intv->beg; + end = curr_intv->end; + +/* First, fetch the container overlapping 'beg' and assign its file offset to u, then + * find the container overlapping 'end' and assign the relative end of the slice to v. + * The cram_ptell function will adjust with the container offset, which is not stored + * in the index. + */ + e = cram_index_query(cidx->cram, tid, beg+1, NULL); + if (e) { + off[n_off].u = e->offset; + // hts_pair64_max_t::max is now used to link + // file offsets to region list entries. + // The iterator can use this to decide if it + // can skip some file regions. + off[n_off].max = ((uint64_t) tid << 32) | j; + + if (end >= HTS_POS_MAX) { + e = cram_index_last(cidx->cram, tid, NULL); + } else { + e = cram_index_query_last(cidx->cram, tid, end+1); + } + + if (e) { + off[n_off++].v = e->e_next + ? e->e_next->offset + : e->offset + e->slice + e->len; + } else { + hts_log_warning("Could not set offset end for region %d:%"PRIhts_pos"-%"PRIhts_pos". Skipping", tid, beg, end); + } + } + } + } else { + switch (tid) { + case HTS_IDX_NOCOOR: + e = cram_index_query(cidx->cram, tid, 1, NULL); + if (e) { + iter->nocoor = 1; + iter->nocoor_off = e->offset; + } else { + hts_log_warning("No index entry for NOCOOR region"); + } + break; + case HTS_IDX_START: + e = cram_index_query(cidx->cram, tid, 1, NULL); + if (e) { + iter->read_rest = 1; + tmp = realloc(off, sizeof(*off)); + if (!tmp) + goto err; + off = tmp; + off[0].u = e->offset; + off[0].v = 0; + n_off=1; + } else { + hts_log_warning("No index entries"); + } + break; + case HTS_IDX_REST: + break; + case HTS_IDX_NONE: + iter->finished = 1; + break; + default: + hts_log_error("Query with tid=%d not implemented for CRAM files", tid); + } + } + } + + if (n_off) { + ks_introsort(_off_max, n_off, off); + iter->n_off = n_off; iter->off = off; + } + + if(!n_off && !iter->nocoor) + iter->finished = 1; + + return 0; + + err: + free(off); + return -1; +} + +void hts_itr_destroy(hts_itr_t *iter) +{ + if (iter) { + if (iter->multi) { + hts_reglist_free(iter->reg_list, iter->n_reg); + } else { + free(iter->bins.a); + } + + if (iter->off) + free(iter->off); + free(iter); + } +} + +static inline unsigned long long push_digit(unsigned long long i, char c) +{ + // ensure subtraction occurs first, avoiding overflow for >= MAX-48 or so + int digit = c - '0'; + return 10 * i + digit; +} + +long long hts_parse_decimal(const char *str, char **strend, int flags) +{ + unsigned long long n = 0; + int digits = 0, decimals = 0, e = 0, lost = 0; + char sign = '+', esign = '+'; + const char *s, *str_orig = str; + + while (isspace_c(*str)) str++; + s = str; + + if (*s == '+' || *s == '-') sign = *s++; + while (*s) + if (isdigit_c(*s)) digits++, n = push_digit(n, *s++); + else if (*s == ',' && (flags & HTS_PARSE_THOUSANDS_SEP)) s++; + else break; + + if (*s == '.') { + s++; + while (isdigit_c(*s)) decimals++, digits++, n = push_digit(n, *s++); + } + + switch (*s) { + case 'e': case 'E': + s++; + if (*s == '+' || *s == '-') esign = *s++; + while (isdigit_c(*s)) e = push_digit(e, *s++); + if (esign == '-') e = -e; + break; + + case 'k': case 'K': e += 3; s++; break; + case 'm': case 'M': e += 6; s++; break; + case 'g': case 'G': e += 9; s++; break; + } + + e -= decimals; + while (e > 0) n *= 10, e--; + while (e < 0) lost += n % 10, n /= 10, e++; + + if (lost > 0) { + hts_log_warning("Discarding fractional part of %.*s", (int)(s - str), str); + } + + if (strend) { + // Set to the original input str pointer if not valid number syntax + *strend = (digits > 0)? (char *)s : (char *)str_orig; + } else if (digits == 0) { + hts_log_warning("Invalid numeric value %.8s[truncated]", str); + } else if (*s) { + if ((flags & HTS_PARSE_THOUSANDS_SEP) || (!(flags & HTS_PARSE_THOUSANDS_SEP) && *s != ',')) + hts_log_warning("Ignoring unknown characters after %.*s[%s]", (int)(s - str), str, s); + } + + return (sign == '+')? n : -n; +} + +static void *hts_memrchr(const void *s, int c, size_t n) { + size_t i; + unsigned char *u = (unsigned char *)s; + for (i = n; i > 0; i--) { + if (u[i-1] == c) + return u+i-1; + } + + return NULL; +} + +/* + * A variant of hts_parse_reg which is reference-id aware. It uses + * the iterator name2id callbacks to validate the region tokenisation works. + * + * This is necessary due to GRCh38 HLA additions which have reference names + * like "HLA-DRB1*12:17". + * + * All parameters are mandatory. + * + * To work around ambiguous parsing issues, eg both "chr1" and "chr1:100-200" + * are reference names, we may quote using curly braces. + * Thus "{chr1}:100-200" and "{chr1:100-200}" disambiguate the above example. + * + * Flags are used to control how parsing works, and can be one of the below. + * + * HTS_PARSE_LIST: + * If present, the region is assmed to be a comma separated list and + * position parsing will not contain commas (this implicitly + * clears HTS_PARSE_THOUSANDS_SEP in the call to hts_parse_decimal). + * On success the return pointer will be the start of the next region, ie + * the character after the comma. (If *ret != '\0' then the caller can + * assume another region is present in the list.) + * + * If not set then positions may contain commas. In this case the return + * value should point to the end of the string, or NULL on failure. + * + * HTS_PARSE_ONE_COORD: + * If present, X:100 is treated as the single base pair region X:100-100. + * In this case X:-100 is shorthand for X:1-100 and X:100- is X:100-. + * (This is the standard bcftools region convention.) + * + * When not set X:100 is considered to be X:100- where is + * the end of chromosome X (set to HTS_POS_MAX here). X:100- and X:-100 + * are invalid. + * (This is the standard samtools region convention.) + * + * Note the supplied string expects 1 based inclusive coordinates, but the + * returned coordinates start from 0 and are half open, so pos0 is valid + * for use in e.g. "for (pos0 = beg; pos0 < end; pos0++) {...}" + * + * On success a pointer to the byte after the end of the entire region + * specifier is returned (plus any trailing comma), and tid, + * beg & end will be set. + * On failure NULL is returned. + */ +const char *hts_parse_region(const char *s, int *tid, hts_pos_t *beg, + hts_pos_t *end, hts_name2id_f getid, void *hdr, + int flags) +{ + if (!s || !tid || !beg || !end || !getid) + return NULL; + + size_t s_len = strlen(s); + kstring_t ks = { 0, 0, NULL }; + + const char *colon = NULL, *comma = NULL; + int quoted = 0; + + if (flags & HTS_PARSE_LIST) + flags &= ~HTS_PARSE_THOUSANDS_SEP; + else + flags |= HTS_PARSE_THOUSANDS_SEP; + + const char *s_end = s + s_len; + + // Braced quoting of references is permitted to resolve ambiguities. + if (*s == '{') { + const char *close = memchr(s, '}', s_len); + if (!close) { + hts_log_error("Mismatching braces in \"%s\"", s); + *tid = -1; + return NULL; + } + s++; + s_len--; + if (close[1] == ':') + colon = close+1; + quoted = 1; // number of trailing characters to trim + + // Truncate to this item only, if appropriate. + if (flags & HTS_PARSE_LIST) { + comma = strchr(close, ','); + if (comma) { + s_len = comma-s; + s_end = comma+1; + } + } + } else { + // Truncate to this item only, if appropriate. + if (flags & HTS_PARSE_LIST) { + comma = strchr(s, ','); + if (comma) { + s_len = comma-s; + s_end = comma+1; + } + } + + colon = hts_memrchr(s, ':', s_len); + } + + // No colon is simplest case; just check and return. + if (colon == NULL) { + *beg = 0; *end = HTS_POS_MAX; + kputsn(s, s_len-quoted, &ks); // convert to nul terminated string + if (!ks.s) { + *tid = -2; + return NULL; + } + + *tid = getid(hdr, ks.s); + free(ks.s); + + return *tid >= 0 ? s_end : NULL; + } + + // Has a colon, but check whole name first. + if (!quoted) { + *beg = 0; *end = HTS_POS_MAX; + kputsn(s, s_len, &ks); // convert to nul terminated string + if (!ks.s) { + *tid = -2; + return NULL; + } + if ((*tid = getid(hdr, ks.s)) >= 0) { + // Entire name matches, but also check this isn't + // ambiguous. eg we have ref chr1 and ref chr1:100-200 + // both present. + ks.l = 0; + kputsn(s, colon-s, &ks); // convert to nul terminated string + if (!ks.s) { + *tid = -2; + return NULL; + } + if (getid(hdr, ks.s) >= 0) { + free(ks.s); + *tid = -1; + hts_log_error("Range is ambiguous. " + "Use {%s} or {%.*s}%s instead", + s, (int)(colon-s), s, colon); + return NULL; + } + free(ks.s); + + return s_end; + } + if (*tid < -1) // Failed to parse header + return NULL; + } + + // Quoted, or unquoted and whole string isn't a name. + // Check the pre-colon part is valid. + ks.l = 0; + kputsn(s, colon-s-quoted, &ks); // convert to nul terminated string + if (!ks.s) { + *tid = -2; + return NULL; + } + *tid = getid(hdr, ks.s); + free(ks.s); + if (*tid < 0) + return NULL; + + // Finally parse the post-colon coordinates + char *hyphen; + *beg = hts_parse_decimal(colon+1, &hyphen, flags) - 1; + if (*beg < 0) { + if (*beg != -1 && *hyphen == '-' && colon[1] != '\0') { + // User specified zero, but we're 1-based. + hts_log_error("Coordinates must be > 0"); + return NULL; + } + if (isdigit_c(*hyphen) || *hyphen == '\0' || *hyphen == ',') { + // interpret chr:-100 as chr:1-100 + *end = *beg==-1 ? HTS_POS_MAX : -(*beg+1); + *beg = 0; + return s_end; + } else if (*beg < -1) { + hts_log_error("Unexpected string \"%s\" after region", hyphen); + return NULL; + } + } + + if (*hyphen == '\0' || ((flags & HTS_PARSE_LIST) && *hyphen == ',')) { + *end = flags & HTS_PARSE_ONE_COORD ? *beg+1 : HTS_POS_MAX; + } else if (*hyphen == '-') { + *end = hts_parse_decimal(hyphen+1, &hyphen, flags); + if (*hyphen != '\0' && *hyphen != ',') { + hts_log_error("Unexpected string \"%s\" after region", hyphen); + return NULL; + } + } else { + hts_log_error("Unexpected string \"%s\" after region", hyphen); + return NULL; + } + + if (*end == 0) + *end = HTS_POS_MAX; // interpret chr:100- as chr:100- + + if (*beg >= *end) return NULL; + + return s_end; +} + +// Next release we should mark this as deprecated? +// Use hts_parse_region above instead. +const char *hts_parse_reg64(const char *s, hts_pos_t *beg, hts_pos_t *end) +{ + char *hyphen; + const char *colon = strrchr(s, ':'); + if (colon == NULL) { + *beg = 0; *end = HTS_POS_MAX; + return s + strlen(s); + } + + *beg = hts_parse_decimal(colon+1, &hyphen, HTS_PARSE_THOUSANDS_SEP) - 1; + if (*beg < 0) *beg = 0; + + if (*hyphen == '\0') *end = HTS_POS_MAX; + else if (*hyphen == '-') *end = hts_parse_decimal(hyphen+1, NULL, HTS_PARSE_THOUSANDS_SEP); + else return NULL; + + if (*beg >= *end) return NULL; + return colon; +} + +const char *hts_parse_reg(const char *s, int *beg, int *end) +{ + hts_pos_t beg64 = 0, end64 = 0; + const char *colon = hts_parse_reg64(s, &beg64, &end64); + if (beg64 > INT_MAX) { + hts_log_error("Position %"PRId64" too large", beg64); + return NULL; + } + if (end64 > INT_MAX) { + if (end64 == HTS_POS_MAX) { + end64 = INT_MAX; + } else { + hts_log_error("Position %"PRId64" too large", end64); + return NULL; + } + } + *beg = beg64; + *end = end64; + return colon; +} + +hts_itr_t *hts_itr_querys(const hts_idx_t *idx, const char *reg, hts_name2id_f getid, void *hdr, hts_itr_query_func *itr_query, hts_readrec_func *readrec) +{ + int tid; + hts_pos_t beg, end; + + if (strcmp(reg, ".") == 0) + return itr_query(idx, HTS_IDX_START, 0, 0, readrec); + else if (strcmp(reg, "*") == 0) + return itr_query(idx, HTS_IDX_NOCOOR, 0, 0, readrec); + + if (!hts_parse_region(reg, &tid, &beg, &end, getid, hdr, HTS_PARSE_THOUSANDS_SEP)) + return NULL; + + return itr_query(idx, tid, beg, end, readrec); +} + +hts_itr_t *hts_itr_regions(const hts_idx_t *idx, hts_reglist_t *reglist, int count, hts_name2id_f getid, void *hdr, hts_itr_multi_query_func *itr_specific, hts_readrec_func *readrec, hts_seek_func *seek, hts_tell_func *tell) { + + int i; + + if (!reglist) + return NULL; + + hts_itr_t *itr = (hts_itr_t*)calloc(1, sizeof(hts_itr_t)); + if (itr) { + itr->n_reg = count; + itr->readrec = readrec; + itr->seek = seek; + itr->tell = tell; + itr->reg_list = reglist; + itr->finished = 0; + itr->nocoor = 0; + itr->multi = 1; + + for (i = 0; i < itr->n_reg; i++) { + if (itr->reg_list[i].reg) { + if (!strcmp(itr->reg_list[i].reg, ".")) { + itr->reg_list[i].tid = HTS_IDX_START; + continue; + } + + if (!strcmp(itr->reg_list[i].reg, "*")) { + itr->reg_list[i].tid = HTS_IDX_NOCOOR; + continue; + } + + itr->reg_list[i].tid = getid(hdr, reglist[i].reg); + if (itr->reg_list[i].tid < 0) { + if (itr->reg_list[i].tid < -1) { + hts_log_error("Failed to parse header"); + hts_itr_destroy(itr); + return NULL; + } else { + hts_log_warning("Region '%s' specifies an unknown reference name. Continue anyway", reglist[i].reg); + } + } + } + } + + qsort(itr->reg_list, itr->n_reg, sizeof(hts_reglist_t), compare_regions); + if (itr_specific(idx, itr) != 0) { + hts_log_error("Failed to create the multi-region iterator!"); + hts_itr_destroy(itr); + itr = NULL; + } + } + + return itr; +} + +int hts_itr_next(BGZF *fp, hts_itr_t *iter, void *r, void *data) +{ + int ret, tid; + hts_pos_t beg, end; + if (iter == NULL || iter->finished) return -1; + if (iter->read_rest) { + if (iter->curr_off) { // seek to the start + if (bgzf_seek(fp, iter->curr_off, SEEK_SET) < 0) { + hts_log_error("Failed to seek to offset %"PRIu64"%s%s", + iter->curr_off, + errno ? ": " : "", strerror(errno)); + return -2; + } + iter->curr_off = 0; // only seek once + } + ret = iter->readrec(fp, data, r, &tid, &beg, &end); + if (ret < 0) iter->finished = 1; + iter->curr_tid = tid; + iter->curr_beg = beg; + iter->curr_end = end; + return ret; + } + // A NULL iter->off should always be accompanied by iter->finished. + assert(iter->off != NULL); + for (;;) { + if (iter->curr_off == 0 || iter->curr_off >= iter->off[iter->i].v) { // then jump to the next chunk + if (iter->i == iter->n_off - 1) { ret = -1; break; } // no more chunks + if (iter->i < 0 || iter->off[iter->i].v != iter->off[iter->i+1].u) { // not adjacent chunks; then seek + if (bgzf_seek(fp, iter->off[iter->i+1].u, SEEK_SET) < 0) { + hts_log_error("Failed to seek to offset %"PRIu64"%s%s", + iter->off[iter->i+1].u, + errno ? ": " : "", strerror(errno)); + return -2; + } + iter->curr_off = bgzf_tell(fp); + } + ++iter->i; + } + if ((ret = iter->readrec(fp, data, r, &tid, &beg, &end)) >= 0) { + iter->curr_off = bgzf_tell(fp); + if (tid != iter->tid || beg >= iter->end) { // no need to proceed + ret = -1; break; + } else if (end > iter->beg && iter->end > beg) { + iter->curr_tid = tid; + iter->curr_beg = beg; + iter->curr_end = end; + return ret; + } + } else break; // end of file or error + } + iter->finished = 1; + return ret; +} + +int hts_itr_multi_next(htsFile *fd, hts_itr_t *iter, void *r) +{ + void *fp; + int ret, tid, i, cr, ci; + hts_pos_t beg, end; + hts_reglist_t *found_reg; + + if (iter == NULL || iter->finished) return -1; + + if (iter->is_cram) { + fp = fd->fp.cram; + } else { + fp = fd->fp.bgzf; + } + + if (iter->read_rest) { + if (iter->curr_off) { // seek to the start + if (iter->seek(fp, iter->curr_off, SEEK_SET) < 0) { + hts_log_error("Seek at offset %" PRIu64 " failed.", iter->curr_off); + return -1; + } + iter->curr_off = 0; // only seek once + } + + ret = iter->readrec(fp, fd, r, &tid, &beg, &end); + if (ret < 0) + iter->finished = 1; + + iter->curr_tid = tid; + iter->curr_beg = beg; + iter->curr_end = end; + + return ret; + } + // A NULL iter->off should always be accompanied by iter->finished. + assert(iter->off != NULL || iter->nocoor != 0); + + int next_range = 0; + for (;;) { + // Note that due to the way bam indexing works, iter->off may contain + // file chunks that are not actually needed as they contain data + // beyond the end of the requested region. These are filtered out + // by comparing the tid and index into hts_reglist_t::intervals + // (packed for reasons of convenience into iter->off[iter->i].max) + // associated with the file region with iter->curr_tid and + // iter->curr_intv. + + if (next_range + || iter->curr_off == 0 + || iter->i >= iter->n_off + || iter->curr_off >= iter->off[iter->i].v + || (iter->off[iter->i].max >> 32 == iter->curr_tid + && (iter->off[iter->i].max & 0xffffffff) < iter->curr_intv)) { + + // Jump to the next chunk. It may be necessary to skip more + // than one as the iter->off list can include overlapping entries. + do { + iter->i++; + } while (iter->i < iter->n_off + && (iter->curr_off >= iter->off[iter->i].v + || (iter->off[iter->i].max >> 32 == iter->curr_tid + && (iter->off[iter->i].max & 0xffffffff) < iter->curr_intv))); + + if (iter->is_cram && iter->i < iter->n_off) { + // Ensure iter->curr_reg is correct. + // + // We need this for CRAM as we shortcut some of the later + // logic by getting an end-of-range and continuing to the + // next offset. + // + // We cannot do this for BAM (and fortunately do not need to + // either) because in BAM world a query to genomic positions + // GX and GY leading to a seek offsets PX and PY may have + // GX > GY and PX < PY. (This is due to the R-tree and falling + // between intervals, bumping up to a higher bin.) + // CRAM strictly follows PX >= PY if GX >= GY, so this logic + // works. + int want_tid = iter->off[iter->i].max >> 32; + if (!(iter->curr_reg < iter->n_reg && + iter->reg_list[iter->curr_reg].tid == want_tid)) { + int j; + for (j = 0; j < iter->n_reg; j++) + if (iter->reg_list[j].tid == want_tid) + break; + if (j == iter->n_reg) + return -1; + iter->curr_reg = j; + iter->curr_tid = iter->reg_list[iter->curr_reg].tid; + }; + iter->curr_intv = iter->off[iter->i].max & 0xffffffff; + } + + if (iter->i >= iter->n_off) { // no more chunks, except NOCOORs + if (iter->nocoor) { + next_range = 0; + if (iter->seek(fp, iter->nocoor_off, SEEK_SET) < 0) { + hts_log_error("Seek at offset %" PRIu64 " failed.", iter->nocoor_off); + return -1; + } + if (iter->is_cram) { + cram_range r = { HTS_IDX_NOCOOR }; + cram_set_option(fp, CRAM_OPT_RANGE_NOSEEK, &r); + } + + // The first slice covering the unmapped reads might + // contain a few mapped reads, so scroll + // forward until finding the first unmapped read. + do { + ret = iter->readrec(fp, fd, r, &tid, &beg, &end); + } while (tid >= 0 && ret >=0); + + if (ret < 0) + iter->finished = 1; + else + iter->read_rest = 1; + + iter->curr_off = 0; // don't seek any more + iter->curr_tid = tid; + iter->curr_beg = beg; + iter->curr_end = end; + + return ret; + } else { + ret = -1; break; + } + } else if (iter->i < iter->n_off) { + // New chunk may overlap the last one, so ensure we + // only seek forwards. + if (iter->curr_off < iter->off[iter->i].u || next_range) { + iter->curr_off = iter->off[iter->i].u; + + // CRAM has the capability of setting an end location. + // This means multi-threaded decodes can stop once they + // reach that point, rather than pointlessly decoding + // more slices than we'll be using. + // + // We have to be careful here. Whenever we set the cram + // range we need a corresponding seek in order to ensure + // we can safely decode at that offset. We use next_range + // var to ensure this is always true; this is set on + // end-of-range condition. It's never modified for BAM. + if (iter->is_cram) { + // Next offset.[uv] tuple, but it's already been + // included in our cram range, so don't seek and don't + // reset range so we can efficiently multi-thread. + if (next_range || iter->curr_off >= iter->end) { + if (iter->seek(fp, iter->curr_off, SEEK_SET) < 0) { + hts_log_error("Seek at offset %" PRIu64 + " failed.", iter->curr_off); + return -1; + } + + // Find the genomic range matching this interval. + int j; + hts_reglist_t *rl = &iter->reg_list[iter->curr_reg]; + cram_range r = { + rl->tid, + rl->intervals[iter->curr_intv].beg, + rl->intervals[iter->curr_intv].end + }; + + // Expand it up to cover neighbouring intervals. + // Note we can only have a single chromosome in a + // range, so if we detect our blocks span chromosomes + // or we have a multi-ref mode slice, we just use + // HTS_IDX_START refid instead. This doesn't actually + // seek (due to CRAM_OPT_RANGE_NOSEEK) and is simply + // and indicator of decoding with no end limit. + // + // That isn't as efficient as it could be, but it's + // no poorer than before and it works. + int tid = r.refid; + int64_t end = r.end; + int64_t v = iter->off[iter->i].v; + j = iter->i+1; + while (j < iter->n_off) { + if (iter->off[j].u > v) + break; + + uint64_t max = iter->off[j].max; + if ((max>>32) != tid) { + tid = HTS_IDX_START; // => no range limit + } else { + if (end < rl->intervals[max & 0xffffffff].end) + end = rl->intervals[max & 0xffffffff].end; + } + if (v < iter->off[j].v) + v = iter->off[j].v; + j++; + } + r.refid = tid; + r.end = end; + + // Remember maximum 'v' here so we don't do + // unnecessary subsequent seeks for the next + // regions. We can't change curr_off, but + // beg/end are used only by single region iterator so + // we cache it there to avoid changing the struct. + iter->end = v; + + cram_set_option(fp, CRAM_OPT_RANGE_NOSEEK, &r); + next_range = 0; + } + } else { // Not CRAM + if (iter->seek(fp, iter->curr_off, SEEK_SET) < 0) { + hts_log_error("Seek at offset %" PRIu64 " failed.", + iter->curr_off); + return -1; + } + } + } + } + } + + ret = iter->readrec(fp, fd, r, &tid, &beg, &end); + if (ret < 0) { + if (iter->is_cram && cram_eof(fp)) { + // Skip to end of range + // + // We should never be adjusting curr_off manually unless + // we also can guarantee we'll be doing a seek after to + // a new location. Otherwise we'll be reading wrong offset + // for the next container. + // + // We ensure this by adjusting our CRAM_OPT_RANGE + // accordingly above, but to double check we also + // set the skipped_block flag to enforce a seek also. + iter->curr_off = iter->off[iter->i].v; + next_range = 1; + + // Next region + if (++iter->curr_intv >= iter->reg_list[iter->curr_reg].count){ + if (++iter->curr_reg >= iter->n_reg) + break; + iter->curr_intv = 0; + iter->curr_tid = iter->reg_list[iter->curr_reg].tid; + } + continue; + } else { + break; + } + } + + iter->curr_off = iter->tell(fp); + + if (tid != iter->curr_tid) { + hts_reglist_t key; + key.tid = tid; + + found_reg = (hts_reglist_t *)bsearch(&key, iter->reg_list, + iter->n_reg, + sizeof(hts_reglist_t), + compare_regions); + if (!found_reg) + continue; + + iter->curr_reg = (found_reg - iter->reg_list); + iter->curr_tid = tid; + iter->curr_intv = 0; + } + + cr = iter->curr_reg; + ci = iter->curr_intv; + + for (i = ci; i < iter->reg_list[cr].count; i++) { + if (end > iter->reg_list[cr].intervals[i].beg && + iter->reg_list[cr].intervals[i].end > beg) { + iter->curr_beg = beg; + iter->curr_end = end; + iter->curr_intv = i; + + return ret; + } + + // Check if the read starts beyond intervals[i].end + // If so, the interval is finished so move on to the next. + if (beg > iter->reg_list[cr].intervals[i].end) + iter->curr_intv = i + 1; + + // No need to keep searching if the read ends before intervals[i].beg + if (end < iter->reg_list[cr].intervals[i].beg) + break; + } + } + iter->finished = 1; + + return ret; +} + +/********************** + *** Retrieve index *** + **********************/ +// Local_fn and local_len will return a sub-region of 'fn'. +// Eg http://elsewhere/dir/foo.bam.bai?a=b may return +// foo.bam.bai via local_fn and local_len. +// +// Returns -1 if index couldn't be opened. +// -2 on other errors +static int idx_test_and_fetch(const char *fn, const char **local_fn, int *local_len, int download) +{ + hFILE *remote_hfp = NULL; + hFILE *local_fp = NULL; + int save_errno; + htsFormat fmt; + kstring_t s = KS_INITIALIZE; + kstring_t tmps = KS_INITIALIZE; + + if (hisremote(fn)) { + const int buf_size = 1 * 1024 * 1024; + int l; + const char *p, *e; + // Ignore ?# params: eg any file.fmt?param=val, except for S3 URLs + e = fn + ((strncmp(fn, "s3://", 5) && strncmp(fn, "s3+http://", 10) && strncmp(fn, "s3+https://", 11)) ? strcspn(fn, "?#") : strcspn(fn, "?")); + // Find the previous slash from there. + p = e; + while (p > fn && *p != '/') p--; + if (*p == '/') p++; + + // Attempt to open local file first + kputsn(p, e-p, &s); + if (access(s.s, R_OK) == 0) + { + free(s.s); + *local_fn = p; + *local_len = e-p; + return 0; + } + + // Attempt to open remote file. Stay quiet on failure, it is OK to fail when trying first .csi then .bai or .tbi index. + if ((remote_hfp = hopen(fn, "r")) == 0) { + hts_log_info("Failed to open index file '%s'", fn); + free(s.s); + return -1; + } + if (hts_detect_format2(remote_hfp, fn, &fmt)) { + hts_log_error("Failed to detect format of index file '%s'", fn); + goto fail; + } + if (fmt.category != index_file || (fmt.format != bai && fmt.format != csi && fmt.format != tbi + && fmt.format != crai && fmt.format != fai_format)) { + hts_log_error("Format of index file '%s' is not supported", fn); + goto fail; + } + + if (download) { + if ((local_fp = hts_open_tmpfile(s.s, "wx", &tmps)) == NULL) { + hts_log_error("Failed to create file %s in the working directory", p); + goto fail; + } + hts_log_info("Downloading file %s to local directory", fn); + uint8_t *buf = (uint8_t*)calloc(buf_size, 1); + if (!buf) { + hts_log_error("%s", strerror(errno)); + goto fail; + } + while ((l = hread(remote_hfp, buf, buf_size)) > 0) { + if (hwrite(local_fp, buf, l) != l) { + hts_log_error("Failed to write data to %s : %s", + fn, strerror(errno)); + free(buf); + goto fail; + } + } + free(buf); + if (l < 0) { + hts_log_error("Error reading \"%s\"", fn); + goto fail; + } + if (hclose(local_fp) < 0) { + hts_log_error("Error closing %s : %s", fn, strerror(errno)); + local_fp = NULL; + goto fail; + } + local_fp = NULL; + if (rename(tmps.s, s.s) < 0) { + hts_log_error("Error renaming %s : %s", tmps.s, strerror(errno)); + goto fail; + } + ks_clear(&tmps); + + *local_fn = p; + *local_len = e-p; + } else { + *local_fn = fn; + *local_len = e-fn; + } + + if (hclose(remote_hfp) != 0) { + hts_log_error("Failed to close remote file %s", fn); + } + + free(tmps.s); + free(s.s); + return 0; + } else { + hFILE *local_hfp; + if ((local_hfp = hopen(fn, "r")) == 0) return -1; + hclose_abruptly(local_hfp); + *local_fn = fn; + *local_len = strlen(fn); + return 0; + } + + fail: + save_errno = errno; + if (remote_hfp) hclose_abruptly(remote_hfp); + if (local_fp) hclose_abruptly(local_fp); + if (tmps.l > 0) unlink(tmps.s); + free(tmps.s); + free(s.s); + errno = save_errno; + return -2; +} + +/* + * Check the existence of a local index file using part of the alignment + * file name. + * + * For a filename fn of fn.fmt (eg fn.bam or fn.cram) the order of checks is + * fn.fmt.csi, fn.csi, + * fn.fmt.bai, fn.bai - if fmt is HTS_FMT_BAI + * fn.fmt.tbi, fn.tbi - if fmt is HTS_FMT_TBI + * fn.fmt.crai, fn.crai - if fmt is HTS_FMT_CRAI + * fn.fmt.fai - if fmt is HTS_FMT_FAI + * also .gzi if fmt is ".gz" + * + * @param fn - pointer to the file name + * @param fmt - one of the HTS_FMT index formats + * @param fnidx - pointer to the index file name placeholder + * @return 1 for success, 0 for failure + */ +int hts_idx_check_local(const char *fn, int fmt, char **fnidx) { + int i, l_fn, l_ext; + const char *fn_tmp = NULL; + char *fnidx_tmp; + const char *csi_ext = ".csi"; + const char *bai_ext = ".bai"; + const char *tbi_ext = ".tbi"; + const char *crai_ext = ".crai"; + const char *fai_ext = ".fai"; + const char *gzi_ext = ".gzi"; + + if (!fn) + return 0; + + if (hisremote(fn)) { + for (i = strlen(fn) - 1; i >= 0; --i) + if (fn[i] == '/') { + fn_tmp = (char *)&fn[i+1]; + break; + } + } else { + // Borrowed from hopen_fd_fileuri() + if (strncmp(fn, "file://localhost/", 17) == 0) fn_tmp = fn + 16; + else if (strncmp(fn, "file:///", 8) == 0) fn_tmp = fn + 7; + else fn_tmp = fn; +#if defined(_WIN32) || defined(__MSYS__) + // For cases like C:/foo + if (fn_tmp[0] == '/' && fn_tmp[1] && fn_tmp[2] == ':' && fn_tmp[3] == '/') + fn_tmp++; +#endif + } + + if (!fn_tmp) return 0; + hts_log_info("Using alignment file '%s'", fn_tmp); + l_fn = strlen(fn_tmp); l_ext = 5; + fnidx_tmp = (char*)calloc(l_fn + l_ext + 1, 1); + if (!fnidx_tmp) return 0; + + struct stat sbuf; + + // Try alignment.bam.csi first + strcpy(fnidx_tmp, fn_tmp); strcpy(fnidx_tmp + l_fn, csi_ext); + if(stat(fnidx_tmp, &sbuf) == 0) { + *fnidx = fnidx_tmp; + return 1; + } else { // Then try alignment.csi + for (i = l_fn - 1; i > 0; --i) + if (fnidx_tmp[i] == '.') { + strcpy(fnidx_tmp + i, csi_ext); + if(stat(fnidx_tmp, &sbuf) == 0) { + *fnidx = fnidx_tmp; + return 1; + } + break; + } + } + if (fmt == HTS_FMT_BAI) { + // Next, try alignment.bam.bai + strcpy(fnidx_tmp, fn_tmp); strcpy(fnidx_tmp + l_fn, bai_ext); + if(stat(fnidx_tmp, &sbuf) == 0) { + *fnidx = fnidx_tmp; + return 1; + } else { // And finally, try alignment.bai + for (i = l_fn - 1; i > 0; --i) + if (fnidx_tmp[i] == '.') { + strcpy(fnidx_tmp + i, bai_ext); + if(stat(fnidx_tmp, &sbuf) == 0) { + *fnidx = fnidx_tmp; + return 1; + } + break; + } + } + } else if (fmt == HTS_FMT_TBI) { // Or .tbi + strcpy(fnidx_tmp, fn_tmp); strcpy(fnidx_tmp + l_fn, tbi_ext); + if(stat(fnidx_tmp, &sbuf) == 0) { + *fnidx = fnidx_tmp; + return 1; + } else { + for (i = l_fn - 1; i > 0; --i) + if (fnidx_tmp[i] == '.') { + strcpy(fnidx_tmp + i, tbi_ext); + if(stat(fnidx_tmp, &sbuf) == 0) { + *fnidx = fnidx_tmp; + return 1; + } + break; + } + } + } else if (fmt == HTS_FMT_CRAI) { // Or .crai + strcpy(fnidx_tmp, fn_tmp); strcpy(fnidx_tmp + l_fn, crai_ext); + if(stat(fnidx_tmp, &sbuf) == 0) { + *fnidx = fnidx_tmp; + return 1; + } else { + for (i = l_fn - 1; i > 0; --i) + if (fnidx_tmp[i] == '.') { + strcpy(fnidx_tmp + i, crai_ext); + if(stat(fnidx_tmp, &sbuf) == 0) { + *fnidx = fnidx_tmp; + return 1; + } + break; + } + } + } else if (fmt == HTS_FMT_FAI) { // Or .fai + // Check .gzi if we have a .gz file + strcpy(fnidx_tmp, fn_tmp); + int gzi_ok = 1; + if ((l_fn > 3 && strcmp(fn_tmp+l_fn-3, ".gz") == 0) || + (l_fn > 5 && strcmp(fn_tmp+l_fn-5, ".bgzf") == 0)) { + strcpy(fnidx_tmp + l_fn, gzi_ext); + gzi_ok = stat(fnidx_tmp, &sbuf)==0; + } + + // Now check for .fai. Occurs second as we're returning this + // in *fnidx irrespective of whether we did gzi check. + strcpy(fnidx_tmp + l_fn, fai_ext); + *fnidx = fnidx_tmp; + if (stat(fnidx_tmp, &sbuf) == 0) + return gzi_ok; + else + return 0; + } + + free(fnidx_tmp); + return 0; +} + +static char *idx_filename(const char *fn, const char *ext, int download) { + int ret, local_len; + char *fnidx; + const char *local_fn = NULL; + kstring_t buffer = KS_INITIALIZE; + + // First try : append `ext` to `fn` + if (!(fnidx = haddextension(&buffer, fn, 0, ext))) { + free(buffer.s); + return NULL; + } + if ((ret = idx_test_and_fetch(fnidx, &local_fn, &local_len, download)) == -1) { + // Second try : replace suffix of `fn` with `ext` + if (!(fnidx = haddextension(&buffer, fn, 1, ext))) { + free(buffer.s); + return NULL; + } + ret = idx_test_and_fetch(fnidx, &local_fn, &local_len, download); + } + + if (ret < 0) { + free(buffer.s); + return NULL; + } + + memmove(fnidx, local_fn, local_len); + fnidx[local_len] = 0; + return fnidx; +} + +char *hts_idx_getfn(const char *fn, const char *ext) +{ + return idx_filename(fn, ext, HTS_IDX_SAVE_REMOTE); +} + +char *hts_idx_locatefn(const char *fn, const char *ext) +{ + return idx_filename(fn, ext, 0); +} + +static hts_idx_t *idx_find_and_load(const char *fn, int fmt, int flags) +{ + char *fnidx = strstr(fn, HTS_IDX_DELIM); + hts_idx_t *idx; + + if ( fnidx ) { + char *fn2 = strdup(fn); + if (!fn2) { + hts_log_error("%s", strerror(errno)); + return NULL; + } + fn2[fnidx - fn] = '\0'; + fnidx += strlen(HTS_IDX_DELIM); + idx = hts_idx_load3(fn2, fnidx, fmt, flags); + free(fn2); + return idx; + } + + if (hts_idx_check_local(fn, fmt, &fnidx) == 0 && hisremote(fn)) { + if (flags & HTS_IDX_SAVE_REMOTE) { + fnidx = idx_filename(fn, ".csi", HTS_IDX_SAVE_REMOTE); + if (!fnidx) { + switch (fmt) { + case HTS_FMT_BAI: fnidx = idx_filename(fn, ".bai", HTS_IDX_SAVE_REMOTE); break; + case HTS_FMT_TBI: fnidx = idx_filename(fn, ".tbi", HTS_IDX_SAVE_REMOTE); break; + default: break; + } + } + } else { + fnidx = idx_filename(fn, ".csi", 0); + if (!fnidx) { + switch (fmt) { + case HTS_FMT_BAI: fnidx = idx_filename(fn, ".bai", 0); break; + case HTS_FMT_TBI: fnidx = idx_filename(fn, ".tbi", 0); break; + default: break; + } + } + } + } + if (!fnidx) { + if (!(flags & HTS_IDX_SILENT_FAIL)) + hts_log_error("Could not retrieve index file for '%s'", fn); + return 0; + } + + if (flags & HTS_IDX_SAVE_REMOTE) + idx = hts_idx_load3(fn, fnidx, fmt, flags); + else + idx = idx_read(fnidx); + free(fnidx); + return idx; +} + +hts_idx_t *hts_idx_load(const char *fn, int fmt) { + return idx_find_and_load(fn, fmt, 1); +} + +hts_idx_t *hts_idx_load2(const char *fn, const char *fnidx) +{ + return hts_idx_load3(fn, fnidx, 0, 0); +} + +hts_idx_t *hts_idx_load3(const char *fn, const char *fnidx, int fmt, int flags) +{ + const char *local_fn = NULL; + char *local_fnidx = NULL; + int local_len; + if (!fnidx) + return idx_find_and_load(fn, fmt, flags); + + // Check that the index file is up to date, the main file might have changed + struct stat stat_idx,stat_main; + int remote_fn = hisremote(fn), remote_fnidx = hisremote(fnidx); + if ( !remote_fn && !remote_fnidx + && !stat(fn, &stat_main) && !stat(fnidx, &stat_idx) ) + { + if ( stat_idx.st_mtime < stat_main.st_mtime ) + hts_log_warning("The index file is older than the data file: %s", fnidx); + } + + if (remote_fnidx && (flags & HTS_IDX_SAVE_REMOTE)) + { + int ret = idx_test_and_fetch(fnidx, &local_fn, &local_len, 1); + if (ret == 0) { + local_fnidx = strdup(local_fn); + if (local_fnidx) { + local_fnidx[local_len] = '\0'; + fnidx = local_fnidx; + } + } + } + + hts_idx_t *idx = idx_read(fnidx); + if (!idx && !(flags & HTS_IDX_SILENT_FAIL)) + hts_log_error("Could not load local index file '%s'%s%s", fnidx, + errno ? " : " : "", errno ? strerror(errno) : ""); + + + free(local_fnidx); + + return idx; +} + + + +/********************** + *** Memory *** + **********************/ + +/* For use with hts_expand macros *only* */ +HTSLIB_EXPORT +size_t hts_realloc_or_die(size_t n, size_t m, size_t m_sz, size_t size, + int clear, void **ptr, const char *func) { + /* If new_m and size are both below this limit, multiplying them + together can't overflow */ + const size_t safe = (size_t) 1 << (sizeof(size_t) * 4); + void *new_ptr; + size_t bytes, new_m; + + new_m = n; + kroundup_size_t(new_m); + + bytes = size * new_m; + + /* Check for overflow. Both ensure that new_m will fit in m (we make the + pessimistic assumption that m is signed), and that bytes has not + wrapped around. */ + if (new_m > (((size_t) 1 << (m_sz * 8 - 1)) - 1) + || ((size > safe || new_m > safe) + && bytes / new_m != size)) { + errno = ENOMEM; + goto die; + } + + new_ptr = realloc(*ptr, bytes); + if (new_ptr == NULL) goto die; + + if (clear) { + if (new_m > m) { + memset((char *) new_ptr + m * size, 0, (new_m - m) * size); + } + } + + *ptr = new_ptr; + + return new_m; + + die: + hts_log_error("%s", strerror(errno)); + exit(1); +} + +/* + * Companion to hts_resize() macro that does the actual allocation. + * + * Somewhat complicated as hts_resize() needs to write the new allocated + * size back into *size_in_out, and the value pointed to may either be + * int32_t, uint32_t or size_t depending on which array is being resized. + * This is solved by making `size_in_out` a void pointer, getting the macro + * to pass in the size of the item pointed to (in `size_sz`) and then using + * an appropriate cast (based on the value of size_sz). The function + * ensures that the maximum size will be storable in a signed type of + * the given size so storing to an int32_t should work correctly. + * + * Assumes that sizeof(uint32_t) and sizeof(int32_t) is 4, + * sizeof(uint64_t) and sizeof(int64_t) is 8 and sizeof(size_t) is + * either 4 or 8. It also assumes casting from unsigned to signed will + * work as long as the top bit isn't set. + */ + +int hts_resize_array_(size_t item_size, size_t num, size_t size_sz, + void *size_in_out, void **ptr_in_out, int flags, + const char *func) { + /* If new_size and item_size are both below this limit, multiplying them + together can't overflow */ + const size_t safe = (size_t) 1 << (sizeof(size_t) * 4); + void *new_ptr; + size_t bytes, new_size; + + new_size = num; + kroundup_size_t(new_size); + bytes = item_size * new_size; + + /* Check for overflow. Both ensure that alloc will fit in alloc_in_out (we + make the pessimistic assumption that *alloc_in_out is signed), and that + bytes has not wrapped around. */ + + if ((new_size > (((size_t) 1 << (size_sz * 8 - 1)) - 1)) + || (((item_size > safe) || (new_size > safe)) + && bytes / new_size != item_size)) { + hts_log(HTS_LOG_ERROR, func, "Memory allocation too large"); + errno = ENOMEM; + return -1; + } + + new_ptr = realloc(*ptr_in_out, bytes); + if (new_ptr == NULL) { + int save_errno = errno; + hts_log(HTS_LOG_ERROR, func, "%s", strerror(errno)); + errno = save_errno; + return -1; + } + + if (flags & HTS_RESIZE_CLEAR) { + size_t old_size; + switch (size_sz) { + case 4: old_size = *((uint32_t *) size_in_out); break; + case 8: old_size = *((uint64_t *) size_in_out); break; + default: abort(); + } + if (new_size > old_size) { + memset((char *) new_ptr + old_size * item_size, 0, + (new_size - old_size) * item_size); + } + } + + switch (size_sz) { + case 4: *((uint32_t *) size_in_out) = new_size; break; + case 8: *((uint64_t *) size_in_out) = new_size; break; + default: abort(); + } + + *ptr_in_out = new_ptr; + return 0; +} + +void hts_lib_shutdown(void) +{ + hfile_shutdown(1); +} + +void hts_free(void *ptr) { + free(ptr); +} + +void hts_set_log_level(enum htsLogLevel level) +{ + hts_verbose = level; +} + +enum htsLogLevel hts_get_log_level(void) +{ + return hts_verbose; +} + +static char get_severity_tag(enum htsLogLevel severity) +{ + switch (severity) { + case HTS_LOG_ERROR: + return 'E'; + case HTS_LOG_WARNING: + return 'W'; + case HTS_LOG_INFO: + return 'I'; + case HTS_LOG_DEBUG: + return 'D'; + case HTS_LOG_TRACE: + return 'T'; + default: + break; + } + + return '*'; +} + +void hts_log(enum htsLogLevel severity, const char *context, const char *format, ...) +{ + int save_errno = errno; + if (severity <= hts_verbose) { + va_list argptr; + + fprintf(stderr, "[%c::%s] ", get_severity_tag(severity), context); + + va_start(argptr, format); + vfprintf(stderr, format, argptr); + va_end(argptr); + + fprintf(stderr, "\n"); + } + errno = save_errno; +} diff --git a/src/htslib-1.21/hts_expr.c b/src/htslib-1.21/hts_expr.c new file mode 100644 index 0000000..dfd15b1 --- /dev/null +++ b/src/htslib-1.21/hts_expr.c @@ -0,0 +1,927 @@ +/* hts_expr.c -- filter expression parsing and processing. + + Copyright (C) 2020-2022, 2024 Genome Research Ltd. + + Author: James Bonfield + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notices and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +// TODO: +// - ?: operator for conditionals? + +#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "htslib/hts_expr.h" +#include "htslib/hts_log.h" +#include "textutils_internal.h" + +// Could also cache hts_expr_val_t stack here for kstring reuse? +#define MAX_REGEX 10 +struct hts_filter_t { + char *str; + int parsed; + int curr_regex, max_regex; + regex_t preg[MAX_REGEX]; +}; + +/* + * This is designed to be mostly C like with mostly same the precedence rules, + * with the exception of bit operators (widely considered as a mistake in C). + * It's not full C (eg no bit-shifting), but good enough for our purposes. + * + * Supported syntax, in order of precedence: + * + * Grouping: (, ), eg "(1+2)*3" + * Values: integers, floats, strings or variables + * Unary ops: +, -, !, ~ eg -10 +10, !10 (0), ~5 (bitwise not) + * Math ops: *, /, % [TODO: add // for floor division?] + * Math ops: +, - + * Bit-wise: &, ^, | [NB as 3 precedence levels, in that order] + * Conditionals: >, >=, <, <=, + * Equality: ==, !=, =~, !~ + * Boolean: &&, || + */ + +// Skip to start of term +static char *ws(char *str) { + while (*str && (*str == ' ' || *str == '\t')) + str++; + return str; +} + +static int expression(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, + char *str, char **end, hts_expr_val_t *res); + +/* + * Simple functions operating on strings only. + * length, min, max, avg. + * + * All return 0 on success, + * -1 on failure + */ +static int expr_func_length(hts_expr_val_t *res) { + if (!res->is_str) + return -1; + + res->is_str = 0; + res->d = res->s.l; + return 0; +} + +static int expr_func_min(hts_expr_val_t *res) { + if (!res->is_str) + return -1; + + size_t l = res->s.l; + int v = INT_MAX; + const uint8_t *x = (uint8_t *)res->s.s; + for (l = 0; l < res->s.l; l++) + if (v > x[l]) + v = x[l]; + + res->is_str = 0; + res->d = v == INT_MAX ? NAN : v; + + return 0; +} + +static int expr_func_max(hts_expr_val_t *res) { + if (!res->is_str) + return -1; + + size_t l = res->s.l; + int v = INT_MIN; + const uint8_t *x = (uint8_t *)res->s.s; + for (l = 0; l < res->s.l; l++) + if (v < x[l]) + v = x[l]; + + res->is_str = 0; + res->d = v == INT_MIN ? NAN : v; + + return 0; +} + +static int expr_func_avg(hts_expr_val_t *res) { + if (!res->is_str) + return -1; + + size_t l = res->s.l; + double v = 0; + const uint8_t *x = (uint8_t *)res->s.s; + for (l = 0; l < res->s.l; l++) + v += x[l]; + if (l) + v /= l; + + res->is_str = 0; + res->d = v; + + return 0; +} + +/* + * functions: FUNC(expr). + * Note for simplicity of parsing, the "(" must immediately follow FUNC, + * so "FUNC (x)" is invalid. + */ +static int func_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, + char *str, char **end, hts_expr_val_t *res) { + int func_ok = -1; + switch (*str) { + case 'a': + if (strncmp(str, "avg(", 4) == 0) { + if (expression(filt, data, fn, str+4, end, res)) return -1; + func_ok = expr_func_avg(res); + } + break; + + case 'd': + if (strncmp(str, "default(", 8) == 0) { + if (expression(filt, data, fn, str+8, end, res)) return -1; + if (**end != ',') + return -1; + (*end)++; + hts_expr_val_t val = HTS_EXPR_VAL_INIT; + if (expression(filt, data, fn, ws(*end), end, &val)) return -1; + func_ok = 1; + if (!hts_expr_val_existsT(res)) { + kstring_t swap = res->s; + *res = val; + val.s = swap; + hts_expr_val_free(&val); + } + } + break; + + case 'e': + if (strncmp(str, "exists(", 7) == 0) { + if (expression(filt, data, fn, str+7, end, res)) return -1; + func_ok = 1; + res->is_true = res->d = hts_expr_val_existsT(res); + res->is_str = 0; + } else if (strncmp(str, "exp(", 4) == 0) { + if (expression(filt, data, fn, str+4, end, res)) return -1; + func_ok = 1; + res->d = exp(res->d); + res->is_str = 0; + if (isnan(res->d)) + hts_expr_val_undef(res); + } + + break; + + case 'l': + if (strncmp(str, "length(", 7) == 0) { + if (expression(filt, data, fn, str+7, end, res)) return -1; + func_ok = expr_func_length(res); + } else if (strncmp(str, "log(", 4) == 0) { + if (expression(filt, data, fn, str+4, end, res)) return -1; + func_ok = 1; + res->d = log(res->d); + res->is_str = 0; + if (isnan(res->d)) + hts_expr_val_undef(res); + } + break; + + case 'm': + if (strncmp(str, "min(", 4) == 0) { + if (expression(filt, data, fn, str+4, end, res)) return -1; + func_ok = expr_func_min(res); + } else if (strncmp(str, "max(", 4) == 0) { + if (expression(filt, data, fn, str+4, end, res)) return -1; + func_ok = expr_func_max(res); + } + break; + + case 'p': + if (strncmp(str, "pow(", 4) == 0) { + if (expression(filt, data, fn, str+4, end, res)) return -1; + func_ok = 1; + + if (**end != ',') + return -1; + (*end)++; + hts_expr_val_t val = HTS_EXPR_VAL_INIT; + if (expression(filt, data, fn, ws(*end), end, &val)) return -1; + if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) { + hts_expr_val_undef(res); + } else if (res->is_str || val.is_str) { + hts_expr_val_free(&val); // arith on strings + return -1; + } else { + func_ok = 1; + res->d = pow(res->d, val.d); + hts_expr_val_free(&val); + res->is_str = 0; + } + + if (isnan(res->d)) + hts_expr_val_undef(res); + } + break; + + case 's': + if (strncmp(str, "sqrt(", 5) == 0) { + if (expression(filt, data, fn, str+5, end, res)) return -1; + func_ok = 1; + res->d = sqrt(res->d); + res->is_str = 0; + if (isnan(res->d)) + hts_expr_val_undef(res); + } + break; + } + + if (func_ok < 0) + return -1; + + str = ws(*end); + if (*str != ')') { + fprintf(stderr, "Missing ')'\n"); + return -1; + } + *end = str+1; + + return 0; +} + +/* + * simple_expr + * : identifier + * | constant + * | string + * | func_expr + * | '(' expression ')' +*/ +static int simple_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, + char *str, char **end, hts_expr_val_t *res) { + // Main recursion step + str = ws(str); + if (*str == '(') { + if (expression(filt, data, fn, str+1, end, res)) return -1; + str = ws(*end); + if (*str != ')') { + fprintf(stderr, "Missing ')'\n"); + return -1; + } + *end = str+1; + + return 0; + } + + // Otherwise a basic element. + int fail = 0; + double d = hts_str2dbl(str, end, &fail); + if (str != *end) { + res->is_str = 0; + res->d = d; + } else { + // Not valid floating point syntax. + // TODO: add function call names in here; len(), sqrt(), pow(), etc + if (*str == '"') { + res->is_str = 1; + char *e = str+1; + int backslash = 0; + while (*e && *e != '"') { + if (*e == '\\') + backslash=1, e+=1+(e[1]!='\0'); + else + e++; + } + + kputsn(str+1, e-(str+1), ks_clear(&res->s)); + if (backslash) { + size_t i, j; + for (i = j = 0; i < res->s.l; i++) { + res->s.s[j++] = res->s.s[i]; + if (res->s.s[i] == '\\') { + switch (res->s.s[++i]) { + case '"': res->s.s[j-1] = '"'; break; + case '\\':res->s.s[j-1] = '\\'; break; + case 't': res->s.s[j-1] = '\t'; break; + case 'n': res->s.s[j-1] = '\n'; break; + case 'r': res->s.s[j-1] = '\r'; break; + default: res->s.s[j++] = res->s.s[i]; + } + } + } + res->s.s[j] = 0; + res->s.l = j; + } + if (*e != '"') + return -1; + *end = e+1; + } else if (fn) { + // Try lookup as variable, if not as function + if (fn(data, str, end, res) == 0) + return 0; + else + return func_expr(filt, data, fn, str, end, res); + } else { + return -1; + } + } + + return 0; +} + +/* + * unary_expr + * : simple_expr + * | '+' simple_expr + * | '-' simple_expr + * | '!' unary_expr // higher precedence + * | '~' unary_expr // higher precedence + */ +static int unary_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, + char *str, char **end, hts_expr_val_t *res) { + int err; + str = ws(str); + if (*str == '+' || *str == '-') { + err = simple_expr(filt, data, fn, str+1, end, res); + if (!hts_expr_val_exists(res)) { + hts_expr_val_undef(res); + } else { + err |= res->is_str; + if (*str == '-') + res->d = -res->d; + res->is_true = res->d != 0; + } + } else if (*str == '!') { + err = unary_expr(filt, data, fn, str+1, end, res); + if (res->is_true) { + // Any explicitly true value becomes false + res->d = res->is_true = 0; + } else if (!hts_expr_val_exists(res)) { + // We can also still negate undef values by toggling the + // is_true override value. + res->d = res->is_true = !res->is_true; + } else if (res->is_str) { + // !null = true, !"foo" = false, NOTE: !"" = false also + res->d = res->is_true = (res->s.s == NULL); + } else { + res->d = !(int64_t)res->d; + res->is_true = res->d != 0; + } + res->is_str = 0; + } else if (*str == '~') { + err = unary_expr(filt, data, fn, str+1, end, res); + if (!hts_expr_val_exists(res)) { + hts_expr_val_undef(res); + } else { + err |= res->is_str; + if (!hts_expr_val_exists(res)) { + hts_expr_val_undef(res); + } else { + res->d = ~(int64_t)res->d; + res->is_true = res->d != 0; + } + } + } else { + err = simple_expr(filt, data, fn, str, end, res); + } + return err ? -1 : 0; +} + + +/* + * mul_expr + * : unary_expr ( + * '*' unary_expr + * | '/' unary_expr + * | '%' unary_expr + * )* + */ +static int mul_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, + char *str, char **end, hts_expr_val_t *res) { + if (unary_expr(filt, data, fn, str, end, res)) + return -1; + + str = *end; + hts_expr_val_t val = HTS_EXPR_VAL_INIT; + while (*str) { + str = ws(str); + if (*str == '*' || *str == '/' || *str == '%') { + if (unary_expr(filt, data, fn, str+1, end, &val)) return -1; + if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) { + hts_expr_val_undef(res); + } else if (val.is_str || res->is_str) { + hts_expr_val_free(&val); + return -1; // arith on strings + } + } + + if (*str == '*') + res->d *= val.d; + else if (*str == '/') + res->d /= val.d; + else if (*str == '%') { + if (val.d) + res->d = (int64_t)res->d % (int64_t)val.d; + else + hts_expr_val_undef(res); + } else + break; + + res->is_true = hts_expr_val_exists(res) && (res->d != 0); + str = *end; + } + + hts_expr_val_free(&val); + + return 0; +} + +/* + * add_expr + * : mul_expr ( + * '+' mul_expr + * | '-' mul_expr + * )* + */ +static int add_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, + char *str, char **end, hts_expr_val_t *res) { + if (mul_expr(filt, data, fn, str, end, res)) + return -1; + + str = *end; + hts_expr_val_t val = HTS_EXPR_VAL_INIT; + while (*str) { + str = ws(str); + int undef = 0; + if (*str == '+' || *str == '-') { + if (mul_expr(filt, data, fn, str+1, end, &val)) return -1; + if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) { + undef = 1; + } else if (val.is_str || res->is_str) { + hts_expr_val_free(&val); + return -1; // arith on strings + } + } + + if (*str == '+') + res->d += val.d; + else if (*str == '-') + res->d -= val.d; + else + break; + + if (undef) + hts_expr_val_undef(res); + else + res->is_true = res->d != 0; + + str = *end; + } + + hts_expr_val_free(&val); + + return 0; +} + +/* + * bitand_expr + * : add_expr + * | bitand_expr '&' add_expr + */ +static int bitand_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, + char *str, char **end, hts_expr_val_t *res) { + if (add_expr(filt, data, fn, str, end, res)) return -1; + + hts_expr_val_t val = HTS_EXPR_VAL_INIT; + int undef = 0; + for (;;) { + str = ws(*end); + if (*str == '&' && str[1] != '&') { + if (add_expr(filt, data, fn, str+1, end, &val)) return -1; + if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) { + undef = 1; + } else if (res->is_str || val.is_str) { + hts_expr_val_free(&val); + return -1; + } else { + res->is_true = + (res->d = ((int64_t)res->d & (int64_t)val.d)) != 0; + } + } else { + break; + } + } + hts_expr_val_free(&val); + if (undef) + hts_expr_val_undef(res); + + return 0; +} + +/* + * bitxor_expr + * : bitand_expr + * | bitxor_expr '^' bitand_expr + */ +static int bitxor_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, + char *str, char **end, hts_expr_val_t *res) { + if (bitand_expr(filt, data, fn, str, end, res)) return -1; + + hts_expr_val_t val = HTS_EXPR_VAL_INIT; + int undef = 0; + for (;;) { + str = ws(*end); + if (*str == '^') { + if (bitand_expr(filt, data, fn, str+1, end, &val)) return -1; + if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) { + undef = 1; + } else if (res->is_str || val.is_str) { + hts_expr_val_free(&val); + return -1; + } else { + res->is_true = + (res->d = ((int64_t)res->d ^ (int64_t)val.d)) != 0; + } + } else { + break; + } + } + hts_expr_val_free(&val); + if (undef) + hts_expr_val_undef(res); + + return 0; +} + +/* + * bitor_expr + * : bitxor_expr + * | bitor_expr '|' bitxor_expr + */ +static int bitor_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, + char *str, char **end, hts_expr_val_t *res) { + if (bitxor_expr(filt, data, fn, str, end, res)) return -1; + + hts_expr_val_t val = HTS_EXPR_VAL_INIT; + int undef = 0; + for (;;) { + str = ws(*end); + if (*str == '|' && str[1] != '|') { + if (bitxor_expr(filt, data, fn, str+1, end, &val)) return -1; + if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) { + undef = 1; + } else if (res->is_str || val.is_str) { + hts_expr_val_free(&val); + return -1; + } else { + res->is_true = + (res->d = ((int64_t)res->d | (int64_t)val.d)) != 0; + } + } else { + break; + } + } + hts_expr_val_free(&val); + if (undef) + hts_expr_val_undef(res); + + return 0; +} + +/* + * cmp_expr + * : bitor_expr + * | cmp_expr '<=' bitor_expr + * | cmp_expr '<' bitor_expr + * | cmp_expr '>=' bitor_expr + * | cmp_expr '>' bitor_expr + */ +static int cmp_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, + char *str, char **end, hts_expr_val_t *res) { + if (bitor_expr(filt, data, fn, str, end, res)) return -1; + + str = ws(*end); + hts_expr_val_t val = HTS_EXPR_VAL_INIT; + int err = 0, cmp_done = 0; + + if (*str == '>' && str[1] == '=') { + cmp_done = 1; + err = cmp_expr(filt, data, fn, str+2, end, &val); + if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) { + hts_expr_val_undef(res); + } else { + res->is_true=res->d + = res->is_str && res->s.s && val.is_str && val.s.s + ? strcmp(res->s.s, val.s.s) >= 0 + : !res->is_str && !val.is_str && res->d >= val.d; + res->is_str = 0; + } + } else if (*str == '>') { + cmp_done = 1; + err = cmp_expr(filt, data, fn, str+1, end, &val); + if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) { + hts_expr_val_undef(res); + } else { + res->is_true=res->d + = res->is_str && res->s.s && val.is_str && val.s.s + ? strcmp(res->s.s, val.s.s) > 0 + : !res->is_str && !val.is_str && res->d > val.d; + res->is_str = 0; + } + } else if (*str == '<' && str[1] == '=') { + cmp_done = 1; + err = cmp_expr(filt, data, fn, str+2, end, &val); + if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) { + hts_expr_val_undef(res); + } else { + res->is_true=res->d + = res->is_str && res->s.s && val.is_str && val.s.s + ? strcmp(res->s.s, val.s.s) <= 0 + : !res->is_str && !val.is_str && res->d <= val.d; + res->is_str = 0; + } + } else if (*str == '<') { + cmp_done = 1; + err = cmp_expr(filt, data, fn, str+1, end, &val); + if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) { + hts_expr_val_undef(res); + } else { + res->is_true=res->d + = res->is_str && res->s.s && val.is_str && val.s.s + ? strcmp(res->s.s, val.s.s) < 0 + : !res->is_str && !val.is_str && res->d < val.d; + res->is_str = 0; + } + } + + if (cmp_done && (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res))) + hts_expr_val_undef(res); + hts_expr_val_free(&val); + + return err ? -1 : 0; +} + +/* + * eq_expr + * : cmp_expr + * | eq_expr '==' cmp_expr + * | eq_expr '!=' cmp_expr + * | eq_expr '=~' cmp_expr + * | eq_expr '!~' cmp_expr + */ +static int eq_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, + char *str, char **end, hts_expr_val_t *res) { + if (cmp_expr(filt, data, fn, str, end, res)) return -1; + + str = ws(*end); + + int err = 0, eq_done = 0; + hts_expr_val_t val = HTS_EXPR_VAL_INIT; + + // numeric vs numeric comparison is as expected + // string vs string comparison is as expected + // numeric vs string is false + if (str[0] == '=' && str[1] == '=') { + eq_done = 1; + if ((err = eq_expr(filt, data, fn, str+2, end, &val))) { + res->is_true = res->d = 0; + } else { + if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) { + hts_expr_val_undef(res); + } else { + res->is_true = res->d = res->is_str + ? (res->s.s && val.s.s ?strcmp(res->s.s, val.s.s)==0 :0) + : !res->is_str && !val.is_str && res->d == val.d; + } + } + res->is_str = 0; + + } else if (str[0] == '!' && str[1] == '=') { + eq_done = 1; + if ((err = eq_expr(filt, data, fn, str+2, end, &val))) { + res->is_true = res->d = 0; + } else { + if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) { + hts_expr_val_undef(res); + } else { + res->is_true = res->d = res->is_str + ? (res->s.s && val.s.s ?strcmp(res->s.s, val.s.s) != 0 :1) + : res->is_str != val.is_str || res->d != val.d; + } + } + res->is_str = 0; + + } else if ((str[0] == '=' && str[1] == '~') || + (str[0] == '!' && str[1] == '~')) { + eq_done = 1; + err = eq_expr(filt, data, fn, str+2, end, &val); + if (!val.is_str || !res->is_str) { + hts_expr_val_free(&val); + return -1; + } + if (val.s.s && res->s.s && val.is_true >= 0 && res->is_true >= 0) { + regex_t preg_, *preg; + if (filt->curr_regex >= filt->max_regex) { + // Compile regex if not seen before + if (filt->curr_regex >= MAX_REGEX) { + preg = &preg_; + } else { + preg = &filt->preg[filt->curr_regex]; + filt->max_regex++; + } + + int ec = regcomp(preg, val.s.s, REG_EXTENDED | REG_NOSUB); + if (ec != 0) { + char errbuf[1024]; + regerror(ec, preg, errbuf, 1024); + fprintf(stderr, "Failed regex: %.1024s\n", errbuf); + hts_expr_val_free(&val); + return -1; + } + } else { + preg = &filt->preg[filt->curr_regex]; + } + res->is_true = res->d = regexec(preg, res->s.s, 0, NULL, 0) == 0 + ? *str == '=' // matcn + : *str == '!'; // no-match + if (preg == &preg_) + regfree(preg); + + filt->curr_regex++; + } else { + // nul regexp or input is considered false + res->is_true = 0; + } + res->is_str = 0; + } + + if (eq_done && ((!hts_expr_val_exists(&val)) || !hts_expr_val_exists(res))) + hts_expr_val_undef(res); + hts_expr_val_free(&val); + + return err ? -1 : 0; +} + +/* + * and_expr + * : eq_expr + * | and_expr 'and' eq_expr + * | and_expr 'or' eq_expr + */ +static int and_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, + char *str, char **end, hts_expr_val_t *res) { + if (eq_expr(filt, data, fn, str, end, res)) return -1; + + for (;;) { + hts_expr_val_t val = HTS_EXPR_VAL_INIT; + str = ws(*end); + if (str[0] == '&' && str[1] == '&') { + if (eq_expr(filt, data, fn, str+2, end, &val)) return -1; + if (!hts_expr_val_existsT(res) || !hts_expr_val_existsT(&val)) { + hts_expr_val_undef(res); + res->d = 0; + } else { + res->is_true = res->d = + (res->is_true || (res->is_str && res->s.s) || res->d) && + (val.is_true || (val.is_str && val.s.s) || val.d); + res->is_str = 0; + } + } else if (str[0] == '|' && str[1] == '|') { + if (eq_expr(filt, data, fn, str+2, end, &val)) return -1; + if (!hts_expr_val_existsT(res) && !hts_expr_val_existsT(&val)) { + // neither defined + hts_expr_val_undef(res); + res->d = 0; + } else if (!hts_expr_val_existsT(res) && + !(val.is_true || (val.is_str && val.s.s ) || val.d)) { + // LHS undef and RHS false + hts_expr_val_undef(res); + res->d = 0; + } else if (!hts_expr_val_existsT(&val) && + !(res->is_true || (res->is_str && res->s.s) || res->d)){ + // RHS undef and LHS false + hts_expr_val_undef(res); + res->d = 0; + } else { + res->is_true = res->d = + res->is_true || (res->is_str && res->s.s) || res->d || + val.is_true || (val.is_str && val.s.s ) || val.d; + res->is_str = 0; + } + } else { + break; + } + hts_expr_val_free(&val); + } + + return 0; +} + +static int expression(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, + char *str, char **end, hts_expr_val_t *res) { + return and_expr(filt, data, fn, str, end, res); +} + +hts_filter_t *hts_filter_init(const char *str) { + hts_filter_t *f = calloc(1, sizeof(*f)); + if (!f) return NULL; + + // Oversize to permit faster comparisons with memcmp over strcmp + size_t len = strlen(str)+100; + if (!(f->str = malloc(len))) { + free(f); + return NULL; + } + strcpy(f->str, str); + return f; +} + +void hts_filter_free(hts_filter_t *filt) { + if (!filt) + return; + + int i; + for (i = 0; i < filt->max_regex; i++) + regfree(&filt->preg[i]); + + free(filt->str); + free(filt); +} + +static int hts_filter_eval_(hts_filter_t *filt, + void *data, hts_expr_sym_func *fn, + hts_expr_val_t *res) { + char *end = NULL; + + filt->curr_regex = 0; + if (expression(filt, data, fn, filt->str, &end, res)) + return -1; + + if (end && *ws(end)) { + fprintf(stderr, "Unable to parse expression at %s\n", filt->str); + return -1; + } + + // Strings evaluate to true. An empty string is also true, but an + // absent (null) string is false, unless overriden by is_true. An + // empty string has kstring length of zero, but a pointer as it's + // nul-terminated. + if (res->is_str) { + res->is_true |= res->s.s != NULL; + res->d = res->is_true; + } else if (hts_expr_val_exists(res)) { + res->is_true |= res->d != 0; + } + + return 0; +} + +int hts_filter_eval(hts_filter_t *filt, + void *data, hts_expr_sym_func *fn, + hts_expr_val_t *res) { + if (res->s.l != 0 || res->s.m != 0 || res->s.s != NULL) { + // As *res is cleared below, it's not safe to call this function + // with res->s.s set, as memory would be leaked. It's also not + // possible to know is res was initialised correctly, so in + // either case we fail. + hts_log_error("Results structure must be cleared before calling this function"); + return -1; + } + + memset(res, 0, sizeof(*res)); + + return hts_filter_eval_(filt, data, fn, res); +} + +int hts_filter_eval2(hts_filter_t *filt, + void *data, hts_expr_sym_func *fn, + hts_expr_val_t *res) { + ks_free(&res->s); + memset(res, 0, sizeof(*res)); + + return hts_filter_eval_(filt, data, fn, res); +} diff --git a/src/htslib-1.21/hts_internal.h b/src/htslib-1.21/hts_internal.h new file mode 100644 index 0000000..52f29e6 --- /dev/null +++ b/src/htslib-1.21/hts_internal.h @@ -0,0 +1,149 @@ +/* hts_internal.h -- internal functions; not part of the public API. + + Copyright (C) 2015-2016, 2018-2020 Genome Research Ltd. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#ifndef HTSLIB_HTS_INTERNAL_H +#define HTSLIB_HTS_INTERNAL_H + +#include +#include + +#include "htslib/hts.h" +#include "textutils_internal.h" + +#define HTS_MAX_EXT_LEN 9 + +#ifdef __cplusplus +extern "C" { +#endif + +struct hFILE; + +struct hts_json_token { + char type; ///< Token type + char *str; ///< Value as a C string (filled in for all token types) + // TODO Add other fields to fill in for particular data types, e.g. + // int inum; + // float fnum; +}; + +struct cram_fd; + +/* + * Check the existence of a local index file using part of the alignment file name. + * The order is alignment.bam.csi, alignment.csi, alignment.bam.bai, alignment.bai + * @param fn - pointer to the file name + * @param fnidx - pointer to the index file name placeholder + * @return 1 for success, 0 for failure + */ +int hts_idx_check_local(const char *fn, int fmt, char **fnidx); + +// Retrieve the name of the index file and also download it, if it is remote +char *hts_idx_getfn(const char *fn, const char *ext); + +// Retrieve the name of the index file, but do not download it, if it is remote +char *hts_idx_locatefn(const char *fn, const char *ext); + +// Used for on-the-fly indexing. See the comments in hts.c. +void hts_idx_amend_last(hts_idx_t *idx, uint64_t offset); + +int hts_idx_fmt(hts_idx_t *idx); + +// Internal interface to save on-the-fly indexes. The index file handle +// is kept open so hts_close() can close if after writing out the EOF +// block for its own file. +int hts_idx_save_but_not_close(hts_idx_t *idx, const char *fnidx, int fmt); + +// Construct a unique filename based on fname and open it. +struct hFILE *hts_open_tmpfile(const char *fname, const char *mode, kstring_t *tmpname); + +// Check that index is capable of storing items in range beg..end +int hts_idx_check_range(hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end); + +// The CRAM implementation stores the loaded index within the cram_fd rather +// than separately as is done elsewhere in htslib. So if p is a pointer to +// an hts_idx_t with p->fmt == HTS_FMT_CRAI, then it actually points to an +// hts_cram_idx_t and should be cast accordingly. +typedef struct hts_cram_idx_t { + int fmt; + struct cram_fd *cram; +} hts_cram_idx_t; + +// Determine whether the string's contents appear to be UTF-16-encoded text. +// Returns 1 if they are, 2 if there is also a BOM, or 0 otherwise. +int hts_is_utf16_text(const kstring_t *str); + +// Entry point to hFILE_multipart backend. +struct hFILE *hopen_htsget_redirect(struct hFILE *hfile, const char *mode); + +struct hts_path_itr { + kstring_t path, entry; + void *dirv; // DIR * privately + const char *pathdir, *prefix, *suffix; + size_t prefix_len, suffix_len, entry_dir_l; +}; + +void hts_path_itr_setup(struct hts_path_itr *itr, const char *path, + const char *builtin_path, const char *prefix, size_t prefix_len, + const char *suffix, size_t suffix_len); + +const char *hts_path_itr_next(struct hts_path_itr *itr); + +typedef void plugin_void_func(void); +plugin_void_func *load_plugin(void **pluginp, const char *filename, const char *symbol); +void *plugin_sym(void *plugin, const char *name, const char **errmsg); +plugin_void_func *plugin_func(void *plugin, const char *name, const char **errmsg); +void close_plugin(void *plugin); +const char *hts_plugin_path(void); + +/* + * Buffers up arguments to hts_idx_push for later use, once we've written all bar + * this block. This is necessary when multiple blocks are in flight (threading). + * + * Returns 0 on success, + * -1 on failure + */ +int bgzf_idx_push(BGZF *fp, hts_idx_t *hidx, int tid, hts_pos_t beg, hts_pos_t end, uint64_t offset, int is_mapped); + +static inline int find_file_extension(const char *fn, char ext_out[static HTS_MAX_EXT_LEN]) +{ + const char *delim = fn ? strstr(fn, HTS_IDX_DELIM) : NULL, *ext; + if (!fn) return -1; + if (!delim) delim = fn + strlen(fn); + for (ext = delim; ext > fn && *ext != '.' && *ext != '/'; --ext) {} + if (*ext == '.' && + ((delim - ext == 3 && ext[1] == 'g' && ext[2] == 'z') || // permit .sam.gz as a valid file extension + (delim - ext == 4 && ext[1] == 'b' && ext[2] == 'g' && ext[3] == 'z'))) // permit .vcf.bgz as a valid file extension + { + for (ext--; ext > fn && *ext != '.' && *ext != '/'; --ext) {} + } + if (*ext != '.' || delim - ext > HTS_MAX_EXT_LEN || delim - ext < 3) + return -1; + memcpy(ext_out, ext + 1, delim - ext - 1); + ext_out[delim - ext - 1] = '\0'; + return 0; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/htslib-1.19.1/hts_os.c b/src/htslib-1.21/hts_os.c similarity index 100% rename from src/htslib-1.19.1/hts_os.c rename to src/htslib-1.21/hts_os.c diff --git a/src/htslib-1.21/hts_probe_cc.sh b/src/htslib-1.21/hts_probe_cc.sh new file mode 100755 index 0000000..c9fc0a8 --- /dev/null +++ b/src/htslib-1.21/hts_probe_cc.sh @@ -0,0 +1,143 @@ +#!/bin/sh + +# Check compiler options for non-configure builds and create Makefile fragment +# +# Copyright (C) 2022-2024 Genome Research Ltd. +# +# Author: Rob Davies +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Arguments are: +# 1. C compiler command +# 2. Initial CFLAGS +# 3. LDFLAGS + +CC=$1 +CFLAGS=$2 +LDFLAGS=$3 + +# Try running the compiler. Uses the same contest.* names as +# configure for temporary files. +run_compiler () +{ + $CC $CFLAGS $1 $LDFLAGS -o conftest conftest.c 2> conftest.err + retval=$? + rm -f conftest.err conftest + return $retval +} + +# Run a test. $1 is the flag to try, $2 is the Makefile variable to set +# with the flag probe result, $3 is a Makefile variable which will be +# set to 1 if the code was built successfully. The code to test should +# be passed in via fd 0. +# First try compiling conftest.c without the flag. If that fails, try +# again with it to see if the flag is needed. +run_test () +{ + if [ $have_cpuid -ne 1 ] ; then + # Only test for and build SSE / AVX code if cpuid works as + # otherwise it won't be executed, even if present + echo "$3 =" + return + fi + rm -f conftest conftest.err conftest.c + cat - > conftest.c + if run_compiler ; then + echo "$2 =" + echo "$3 = 1" + elif run_compiler "$1" ; then + echo "$2 = $1" + echo "$3 = 1" + else + echo "$3 =" + fi +} + +echo "# Compiler probe results, generated by $0" + +# Check for cpuid +rm -f conftest conftest.err conftest.c +cat > conftest.c <<'EOF' +#include +#include +int main(int argc, char **argv) { + unsigned int a, b, c, d; + int level = __get_cpuid_max(0, NULL); + if (level > 0) + __cpuid_count(1, 0, a, b, c, d); + return 0; +} +EOF +if run_compiler ; then + echo "HTS_HAVE_CPUID = 1" + have_cpuid=1 +else + echo "HTS_HAVE_CPUID =" + have_cpuid=0 +fi + +# Check for sse4.1 etc. support +run_test "-msse4.1 -mpopcnt -mssse3" HTS_CFLAGS_SSE4 HTS_BUILD_SSE4 <<'EOF' +#ifdef __x86_64__ +#include "x86intrin.h" +int main(int argc, char **argv) { + __m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1); + __m128i c = _mm_shuffle_epi8(_mm_max_epu32(a, b), b); + return _mm_popcnt_u32(*((char *) &c)); +} +#else +int main(int argc, char **argv) { return 0; } +#endif +EOF + +# Check for avx2 + +run_test "-mavx2 -mpopcnt" HTS_CFLAGS_AVX2 HTS_BUILD_AVX2 <<'EOF' +#ifdef __x86_64__ +#include "x86intrin.h" +int main(int argc, char **argv) { + __m256i a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + __m256i b = _mm256_add_epi32(a, a); + long long c = _mm256_extract_epi64(b, 0); + return _mm_popcnt_u32((int) c); +} +#else +int main(int argc, char **argv) { return 0; } +#endif +EOF + +# Check for avx512 + +run_test "-mavx512f -mpopcnt" HTS_CFLAGS_AVX512 HTS_BUILD_AVX512 <<'EOF' +#ifdef __x86_64__ +#include "x86intrin.h" +int main(int argc, char **argv) { + __m512i a = _mm512_set1_epi32(1); + __m512i b = _mm512_add_epi32(a, a); + __m256i c = _mm512_castsi512_si256(b); + __m256i d = _mm512_extracti64x4_epi64(a, 1); + return _mm_popcnt_u32(*((char *) &c)) + (*(char *) &d); +} +#else +int main(int argc, char **argv) { return 0; } +#endif +EOF + +rm -f conftest.c diff --git a/src/htslib-1.18/hts_time_funcs.h b/src/htslib-1.21/hts_time_funcs.h similarity index 100% rename from src/htslib-1.18/hts_time_funcs.h rename to src/htslib-1.21/hts_time_funcs.h diff --git a/src/htslib-1.18/htscodecs/BENCHMARKS.md b/src/htslib-1.21/htscodecs/BENCHMARKS.md similarity index 100% rename from src/htslib-1.18/htscodecs/BENCHMARKS.md rename to src/htslib-1.21/htscodecs/BENCHMARKS.md diff --git a/src/htslib-1.18/htscodecs/LICENSE.md b/src/htslib-1.21/htscodecs/LICENSE.md similarity index 100% rename from src/htslib-1.18/htscodecs/LICENSE.md rename to src/htslib-1.21/htscodecs/LICENSE.md diff --git a/src/htslib-1.18/htscodecs/MAINTAINERS.md b/src/htslib-1.21/htscodecs/MAINTAINERS.md similarity index 100% rename from src/htslib-1.18/htscodecs/MAINTAINERS.md rename to src/htslib-1.21/htscodecs/MAINTAINERS.md diff --git a/src/htslib-1.21/htscodecs/NEWS.md b/src/htslib-1.21/htscodecs/NEWS.md new file mode 100644 index 0000000..ff0177c --- /dev/null +++ b/src/htslib-1.21/htscodecs/NEWS.md @@ -0,0 +1,438 @@ +Release 1.6.1: 22nd August 2024 +------------------------------- + +This release is primarily portability and minor bug fixes. + +Changes + +- Improve warning levels by the compiler in CI. (#125) + +- Switch to GitHub actions for some CI builds. (#121, #123) + +- Add configure check for cpuid systems. (#115, #116. Reported by + Ryan Carsten Schmidt) + +Bug fixes + +- Use unsigned chars for ctype macros in the name tokeniser. + On many systems this was already mitigated against, but on some OSes + a char > 128 could trigger a buffer underrun. (#124) + +- Fix interaction between _XOPEN_SOURCE and FreeBSD. + (#119, John Marshall) + +- Improve AVX512 compiler support, notably MacOS El Capitan's XCode. + (#118, Rob Davies) + +- Fix -std=c99 -pendantic pedantry (#117) + + +Release 1.6.0: 7th December 2023 +-------------------------------- + +This release is primarily bug fixes, mostly spotted through improved fuzz +testing. + +One big change however is the SIMD rANS codecs are now performant on Intel +CPUs with the DownFall mitigation microcode applied. + + +Changes + +- Replaced the rANS codec SIMD gathers with simulated gathers via scalar + memory fetches. This helps AMD Zen4, but importantly it also fixes a + disastrous performance regression caused by Intel's DownFall microcode fix. + + There is an impact on pre-DownFall speeds, but we should focus on patched + CPUs as a priority. + +- A small speed up to the rans_F_to_s3 function used by order-0 rans decode. + +- Small speed up to SIMD rans32x16 order-1 encoder by reducing cache misses. + Also sped up the rans4x8 order-1 encoder, particularly on AMD Zen4. + +- Now supports building with "zig cc" + (Issue #109, reported by David Jackson) + + +Bug fixes + +- Improve robustness of name tokeniser when given non 7-bit ASCII and on + machines where "char" defaults to unsigned. + (Issue #105, reported by Shubham Chandak) + +- Also fixed a 1 byte buffer read-overrun in name tokeniser. + +- Fix name tokeniser encoder failure with some duplicated streams. + +- Fixed rans_set_cpu to work multiple times, as well as reinstating the + ability to change decode and encode side independently (accidentally lost in + commit 958032c). No effect on usage, but it improves the test coverage. + +- Added a round-trip fuzz tester to test the ability to encode. The old fuzz + testing was decode streams only. + +- Fixed bounds checking in rans_uncompress_O0_32x16_avx2, fixing buffer read + overruns. + +- Removed undefined behaviour in transpose_and_copy(), fixing zig cc builds. + + +Release 1.5.2: 6th October 2023 +------------------------------- + +*** SECURITY FIXES *** + +This release contains multiple bug fixes, including a couple +buffer overruns that could corrupt memory when used in specific +scenarios. These have not been observed with real data, but could +represent an attack vector for a malicious user. (We know of no +exploit.) + + +Changes + +- The range coder has been extended to do bounds checking if the + new RC_SetOutputEnd() is called. This has a small performance hit + for the encoder, depending on compiler, but tests showed within 10% + at worst. + +Bug fixes + +- Fix write-buffer overruns in fqzcomp and name tokeniser. + + SECURITY ISSUE: FQZComp could overflow the computed maximum growth + size, causing writes beyond the ends of the allocated memory. This + is triggered by many very small 1bp reads. Fixed the maximum + bounds for compressed data. + + SECURITY ISSUE: The name tokeniser using the maximum number of + tokens (128) would erroneously write a 129th token. This is a + restricted overflow of a few bytes. + + (PR#97, reported by Shubham Chandak) + +- Fix an maximum 8-byte read overflow in the AVX2 rans decoder. + SECURITY ISSUE: This was only present when using gcc. + (PR#100, reported by Rob Davies) + +- The rANS Order-1 SSE4 decoder could decode incorrectly. + When a single symbol only occurs and we're using 12-bit freqs, the + frequency of 4096 was interpreted as freq 0. This only happens in + the non-SIMD tidy-up stage at the end of the decode, so at worst the + final 31 bytes may be incorrect. (PR#102) + +- Fixed a 1-byte heap read-buffer overflow. Existed since 6a87ead2 + (Oct 2021). Low severity security due to size and high likelihood + it's just malloc meta-data. (PR#95; OSS-Fuzz 62270) + +- rans_compress_4x16 now works on zero length input. + Previously this was giving divide-by-zero errors. + (PR#101, reported by Shubham Chandak) + +- Remove asserts which caused warnings about unused variables when + building with -DNDEBUG. + +- Fix ARM builds when HWCAP_ASIMD is missing (on Conda) (PR#91) + +- Improve FreeBSD CI testing + +- Fix undefined behaviour from signed bit-shifting (PR#90). + + +Release 1.5.1: 19th July 2023 +----------------------------- + +This release is mainly small updates and bug fixes focusing on +specific platforms, with no new features added. + +Changes + +- Be more selective in use of AVX512 on AMD Zen4 processors. This can + be faster (e.g. with 64-way unrolling), but in the current rANS codec + implementations AVX2 is faster for certain operations (PR#85). + +- Add config.h to test programs to help them pick up definitions such + as XOPEN_SOURCE (PR#84) + +- Add FreeBSD to CI testing (PR#83) + +Bug fixes + +- Trivial bug fix to the rans4x16pr test harness when given + incompressible data (PR#86). + +- Make ARM NEON checks specific to AArch64 and exclude AArch32 systems. + (PR#82 to fix issue#81, reported by Robert Clausecker) + + +Release 1.5.0: 14th April 2023 +------------------------------ + +Changes + +- Significant speed ups to the fqzcomp codec via code restructuring + and use of memory prefetch instructions. Encode is 30-40% faster + and decode 5-8% faster. (PR#75 James Bonfield) + +- Improve multiarch builds on MacOS, fixing issues with getting the + various SIMD implementations integrated. (Issue#76 John Marshall, + PR#77/#78 Rob Davies) + +- Remove unused ax_with_libdeflate.m4 file from build system. + + +Release 1.4.0: Februrary 2023 +----------------------------- + +This is almost entirely minor bug fixing with a few small updates. + +Changes + +- Optimise compression / speed of the name tokeniser. + - In arithmetic coding mode, it can now utilise bzip2 at higher levels. + - For both rans / arith entropy encoders, the choice of method / order + is now optimised per token type, giving faster compression. + - Culled a pointless zlib check in the configure script. + - Made lack of bzip2 a hard failure in configure, unless an explicit + --disable-bz2 option is given. + (#72, #73) + +- Switch CI to use ARM for MacOS builds + (#69, thanks to Rob Davies) + + +Bug fixes + +- Remove some newer compiler warnings (#61) + +- Improvements for Intel -m32 builds, including better AVX2 validation + (m32 misses _mm256_extract_epi64) and improved data alignment. + (#62. See also samtools/htslib#1500) + +- Detect Neon capability at runtime via operating system APIs. + (#63, thanks to John Marshall) + +- Improve FreeBSD diagnostics when neglecting to use -lpthread / -lthr. + Plus additional extra error checking too. + (#68, #64, thanks to John Marshall) + +- Update hts_pack to operate in line with CRAMcodecs spec, where the + number of symbols > 16. + (#65/#66, reported by Michael Macias) + +- Fixed too-stringent buffer overflow checking in O1 rans decoder. + (#71, reported by Divon Lan) + + +Release 1.3.0: 9th August 2022 +------------------------------ + +The primary change in this release is a new SIMD enabled rANS codec. + +Changes + +- There is a 32-way unrolled rANS implementation. This is accessed + using the existing rans 4x16 API with the RANS_ORDER_X32 bit set. + Implementations exist for SSE4.1, AVX2, AVX512 and ARM Neon, as + well as traditional non-SIMD scalar code in C and JavaScript. See + the commit logs for benchmarks. + +- Improved memory allocation via a new htscodecs_tls_alloc function. + This uses Thread Local Storage (TLS) to avoid multiple malloc/free + calls, reducing system CPU time. + +- Some external functions have been renamed, with the old ones still + existing in a deprecated fashion. Every symbol should now start + hts_, rans_, arith_, fqz_ or tok3_*. + +- Improved test framework with an "entropy" tool that iterates over + all entropy encoders. + +- Updated the Appveyor CI image to user a newer gcc. Also added ARM + to the list of processors to test on. + +- Tab vs space code changes. Use "git diff -w" to see through these. + +- Reworked fuzzing infrastructure. + +- Small speed improvements to various rANS encoders and decoders. + These were tested on a broad range of compilers, versions and + systems. The new code may be slightly slower with some combinations, + but is faster overall and removes a few outliers with considerably + degraded performance. + +- Substantial memory reduction to the name tokeniser (tok3). + +Bug fixes + +- Fixed undefined behaviour in our use of _builtin_clz(). + +- Fixed a few redundant #includes. + +- Work around strict aliasing bugs, uncovered with gcc -O2. + +- Fixed an issue with encoding data blocks close to 2GB in size. + (Additionally blocks above 2GB now error, rather than crashing or + returning incorrect results.) + +- Fix encode error with large blocks using RANS_ORDER_STRIPE. + + +Release 1.2.2: 1st April 2022 +----------------------------- + +This release contains some fixes found during fuzzing with Clang's +memory-sanitizer. None of these are involving writing memory so there +is no possibility for code execution vulnerabilities. However some do +could access uninitialised elements in locally allocated memory, which +could leak private data if the library was used in conjunction with +other tools which don't zero sensitive data before freeing. + +Bug fixes: + +- The name tokeniser now validates the stored length in the data + stream matches the actual decoded length. Discovered by Taotao Gu. + +- Fixed an endless loop in arith_dynamic and rans4x16pr involving + X_STRIPE with 0 stripes. + +- Avoid a harmless (and wrong?) undefined behaviour sanitizer error + when calling memcpy(ptr, NULL, 0) in the name tokeniser. + +- Fixed possible uninitialised memory access in + rans_uncompress_O1_4x16. If the frequency table didn't add up to + the correct amount, parts of the "fb" table were left unpopulated. + It was then possible to use these array elements in some of the rANS + calculations. + +- Similarly rans_uncompress_O0 could access an uninitialised element + 4095 of the decoder tables if the frequencies summed to 4095 instead + of the expected 4096. + +- Improved error detection from fqzcomp's read_array function. + +- Reject fqzcomp parameters with inconsistent "sel" parameters, which + could lead to uninitialised access to the model.sel range coder. + + +Release 1.2.1: 15th February 2022 +--------------------------------- + +The only change in this release is a minor adjustment to the histogram +code so it works on systems with small stacks. This was detected on +Windows Mingw builds. + + +Release 1.2: 10th February 2022 +------------------------------- + +This release contains the following minor changes. +Please see the "git log" for the full details. + +Improvements / changes: + +- Speed up of rANS4x16 order-0. We now use a branchless encoder + renormalisation step. For complex data it's between 13 and 50% + speed up depending on compiler. + +- Improve rANS4x16 compute_shift estimates. The entropy calculation + is now more accurate. This leads to more frequent use of the 10-bit + frequency mode, at an expense of up to 1% size growth. + +- Speed improvements to the striped rANS mode, both encoding and + decoding. Encoder gains ~8% and decoder ~5%, but varies + considerably by compiler and data. + +- Added new var_put_u64_safe and var_put_u32_safe interfaces. + These are automatically used by var_put_u64 and var_put_u32 when + near the end of the buffer, but may also be called directly. + +- Small speed ups to the hist8 and hist1_4 functions. + +- Minor speed up to RLE decoding. + +Bug fixes: + +- Work around an icc-2021 compiler bug, but also speed up the varint + encoding too (#29). + +- Fix an off-by-one error in the initial size check in arith_dynamic. + This meant the very smallest of blocks could fail to decode. + Reported by Divon Lan. + +- Fixed hist1_4 to also count the last byte when computing T0[]. + +- Fixed overly harsh bounds checking in the fqzcomp read_array + function, which meant it failed to decode some configurations. + + +Release 1.1.1: 6th July 2021 +---------------------------- + +This release contains the following minor changes. +Please see the "git log" for the full details. + +Improvements / changes: + +- Modernised autoconf usage to avoid warnings with newer versions. + (John Marshall) + +- Avoid using awk with large records, due to some systems + (e.g. Solaris / OpenIndiana) with line length limits . + (John Marshall) + +- Applied Debian patch to make the library link against -lm. + +Bug fixes: + +- Fixed an issue with the name tokeniser when a slice (name_context) + has exactly 1 more name than the previous call. (James Bonfield) + +- Removed access to an uninitialised variable in the name tokeniser + decode when given malformed data. This occurs when we use delta + encoding for the very first name. (James Bonfield, OSS-Fuzz) + +- Minor fixes to distcheck and distclean targets + + +Release 1.0: 23rd Feb 2021 +-------------------------- + +This marks the first non-beta release of htscodecs, following a +perioid of integration with Htslib and automated fuzzing by Google's +OSS-Fuzz program. + +[Note this testing only applies to the C implementation. The +JavaScript code should still be considered as examples of the codecs, +more for purposes of understanding and clarity than as a fully +optimised and tested release.] + +Since the last release (0.5) the key changes are: + +- Improved support for big endian platforms + +- Speed improvements to CRAM 3.0 4x8 rANS order-1 encoding. + It's between 10 and 50% faster at encoding, based on input data. + +- Improved autoconf bzip2 checks and tidy up "make test" output. + +- Added some more files into "make install", so that "make distcheck" + now passes. + +- Replaced Travis with Cirrus-CI testing. + +- Removed various C undefined behaviour, such as left shifting of + negative values and integer overflows. As far as we know these were + currently harmless on the supported platforms, but may break future + compiler optimisations. + +- Fixed numerous OSS-Fuzz identified flaws. Some of these were + potential security issues such as small buffer overruns. + +- Tidied up some code to prevent warnings. + +- The name tokeniser now has a limit on the size of data it can encode + (10 million records). This may still be too high given the memory + it will require, so it may be reduced again. + diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/arith_dynamic.c b/src/htslib-1.21/htscodecs/htscodecs/arith_dynamic.c similarity index 100% rename from src/htslib-1.19.1/htscodecs/htscodecs/arith_dynamic.c rename to src/htslib-1.21/htscodecs/htscodecs/arith_dynamic.c diff --git a/src/htslib-1.18/htscodecs/htscodecs/arith_dynamic.h b/src/htslib-1.21/htscodecs/htscodecs/arith_dynamic.h similarity index 100% rename from src/htslib-1.18/htscodecs/htscodecs/arith_dynamic.h rename to src/htslib-1.21/htscodecs/htscodecs/arith_dynamic.h diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/c_range_coder.h b/src/htslib-1.21/htscodecs/htscodecs/c_range_coder.h similarity index 100% rename from src/htslib-1.19.1/htscodecs/htscodecs/c_range_coder.h rename to src/htslib-1.21/htscodecs/htscodecs/c_range_coder.h diff --git a/src/htslib-1.18/htscodecs/htscodecs/c_simple_model.h b/src/htslib-1.21/htscodecs/htscodecs/c_simple_model.h similarity index 100% rename from src/htslib-1.18/htscodecs/htscodecs/c_simple_model.h rename to src/htslib-1.21/htscodecs/htscodecs/c_simple_model.h diff --git a/src/htslib-1.21/htscodecs/htscodecs/fqzcomp_qual.c b/src/htslib-1.21/htscodecs/htscodecs/fqzcomp_qual.c new file mode 100644 index 0000000..a5b6687 --- /dev/null +++ b/src/htslib-1.21/htscodecs/htscodecs/fqzcomp_qual.c @@ -0,0 +1,1630 @@ +/* + * Copyright (c) 2011-2013, 2018-2022 Genome Research Ltd. + * Author(s): James Bonfield + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger + * Institute nor the names of its contributors may be used to endorse + * or promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH + * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// We use generic maps to turn 0-M into 0-N where N <= M +// before adding these into the context. These are used +// for positions, running-diffs and quality values. +// +// This can be used as a simple divisor, eg pos/24 to get +// 2 bits of positional data for each quarter along a 100bp +// read, or it can be tailored for specific such as noting +// the first 5 cycles are poor, then we have stability and +// a gradual drop off in the last 20 or so. Perhaps we then +// map pos 0-4=0, 5-79=1, 80-89=2, 90-99=3. +// +// We don't need to specify how many bits of data we are +// using (2 in the above example), as that is just implicit +// in the values in the map. Specify not to use a map simply +// disables that context type (our map is essentially 0-M -> 0). + +// Example of command line usage: +// +// f=~/scratch/data/q4 +// cc -Wall -DTEST_MAIN -O3 -g fqzcomp_qual2.c -lm +// ./a.out $f > /tmp/_ && ./a.out -d < /tmp/_ > /tmp/__ && cmp /tmp/__ $f + +#include "config.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fqzcomp_qual.h" +#include "varint.h" +#include "utils.h" + +#define CTX_BITS 16 +#define CTX_SIZE (1<(b)?(a):(b)) +#endif + +#define QMAX 256 +#define QBITS 12 +#define QSIZE (1< 255 therefore means we need to repeatedly read to find +// the actual run length. +// Alternatively we could bit-encode instead of byte encode, eg BETA. +static int store_array(unsigned char *out, unsigned int *array, int size) { + unsigned char tmp[2048]; + + int i, j, k; + for (i = j = k = 0; i < size; j++) { + int run_len = i; + while (i < size && array[i] == j) + i++; + run_len = i-run_len; + + int r; + do { + r = MIN(255, run_len); + tmp[k++] = r; + run_len -= r; + } while (r == 255); + } + while (i < size) + tmp[k++] = 0, j++; + + // RLE on out. + // 1 2 3 3 3 3 3 4 4 5 + // => 1 2 3 3 +3... 4 4 +0 5 + int last = -1; + for (i = j = 0; j < k; i++) { + out[i] = tmp[j++]; + if (out[i] == last) { + int n = j; + while (j < k && tmp[j] == last) + j++; + out[++i] = j-n; + } else { + last = out[i]; + } + } + k = i; + +// fprintf(stderr, "Store_array %d => %d {", size, k); +// for (i = 0; i < k; i++) +// fprintf(stderr, "%d,", out[i]); +// fprintf(stderr, "}\n"); + return k; +} + +static int read_array(unsigned char *in, size_t in_size, unsigned int *array, int size) { + unsigned char R[1024]; + int i, j, z, last = -1, nb = 0; + + size = MIN(1024, size); + + // Remove level one of run-len encoding + for (i = j = z = 0; z < size && i < in_size; i++) { + int run = in[i]; + R[j++] = run; + z += run; + if (run == last) { + if (i+1 >= in_size) + return -1; + int copy = in[++i]; + z += run * copy; + while (copy-- && z <= size && j < 1024) + R[j++] = run; + } + if (j >= 1024) + return -1; + last = run; + } + nb = i; + + // Now expand inner level of run-length encoding + int R_max = j; + for (i = j = z = 0; j < size; i++) { + int run_len = 0; + int run_part; + if (z >= R_max) + return -1; + do { + run_part = R[z++]; + run_len += run_part; + } while (run_part == 255 && z < R_max); + if (run_part == 255) + return -1; + + while (run_len && j < size) + run_len--, array[j++] = i; + } + + return nb; +} + +// FIXME: how to auto-tune these rather than trial and error? +// r2 = READ2 +// qa = qual avg (0, 2, 4) +static int strat_opts[][12] = { +// qb qs pb ps db ds ql sl pl dl r2 qa + {10, 5, 4,-1, 2, 1, 0, 14, 10, 14, 0,-1}, // basic options (level < 7) + {8, 5, 7, 0, 0, 0, 0, 14, 8, 14, 1,-1}, // e.g. HiSeq 2000 + {12, 6, 2, 0, 2, 3, 0, 9, 12, 14, 0, 0}, // e.g. MiSeq + {12, 6, 0, 0, 0, 0, 0, 12, 0, 0, 0, 0}, // e.g. IonTorrent; adaptive O1 + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // custom +}; +static int nstrats = sizeof(strat_opts) / sizeof(*strat_opts); + +#ifdef HAVE_BUILTIN_PREFETCH +static inline void mm_prefetch(void *x) { + __builtin_prefetch(x); +} +#else +static inline void mm_prefetch(void *x) { + // Fetch and discard is quite close to a genuine prefetch + *(volatile int *)x; +} +#endif + +typedef struct { + unsigned int qctx; // quality sub-context + unsigned int p; // pos (bytes remaining) + unsigned int delta; // delta running total + unsigned int prevq; // previous quality + unsigned int s; // selector + unsigned int qtot, qlen; + unsigned int first_len; + unsigned int last_len; + ssize_t rec; + unsigned int ctx; +} fqz_state; + +static void dump_table(unsigned int *tab, int size, char *name) { + int i, last = -99, run = 0; + fprintf(stderr, "\t%s\t{", name); + for (i = 0; i < size; i++) { + if (tab[i] == last) { + run++; + } else if (run == 1 && tab[i] == last+1) { + int first = last; + do { + last = tab[i]; + i++; + } while (i < size && tab[i] == last+1); + i--; + + // Want 0,1,2,3,3,3 as 0..2 3x3, not 0..3 3x2 + if (tab[i] == tab[i+1]) + i--; + if (tab[i] != first) + fprintf(stderr, "..%d", tab[i]); + run = 1; + last = -99; + } else { + if (run > 1) + fprintf(stderr, " x %d%s%d", run, i?", ":"", tab[i]); + else + fprintf(stderr, "%s%d", i?", ":"", tab[i]); + run = 1; + last = tab[i]; + } + } + if (run > 1) + fprintf(stderr, " x %d", run); + fprintf(stderr, "}\n"); +} + +static void dump_map(unsigned int *map, int size, char *name) { + int i, c = 0; + fprintf(stderr, "\t%s\t{", name); + for (i = 0; i < size; i++) + if (map[i] != INT_MAX) + fprintf(stderr, "%s%d=%d", c++?", ":"", i, map[i]); + fprintf(stderr, "}\n"); +} + +#pragma GCC diagnostic ignored "-Wunused-function" +static void dump_params(fqz_gparams *gp) { + fprintf(stderr, "Global params = {\n"); + fprintf(stderr, "\tvers\t%d\n", gp->vers); + fprintf(stderr, "\tgflags\t0x%02x\n", gp->gflags); + fprintf(stderr, "\tnparam\t%d\n", gp->nparam); + fprintf(stderr, "\tmax_sel\t%d\n", gp->max_sel); + fprintf(stderr, "\tmax_sym\t%d\n", gp->max_sym); + if (gp->gflags & GFLAG_HAVE_STAB) + dump_table(gp->stab, 256, "stab"); + fprintf(stderr, "}\n"); + + int i; + for (i = 0; i < gp->nparam; i++) { + fqz_param *pm = &gp->p[i]; + fprintf(stderr, "\nParam[%d] = {\n", i); + fprintf(stderr, "\tcontext\t0x%04x\n", pm->context); + fprintf(stderr, "\tpflags\t0x%02x\n", pm->pflags); + fprintf(stderr, "\tmax_sym\t%d\n", pm->max_sym); + fprintf(stderr, "\tqbits\t%d\n", pm->qbits); + fprintf(stderr, "\tqshift\t%d\n", pm->qshift); + fprintf(stderr, "\tqloc\t%d\n", pm->qloc); + fprintf(stderr, "\tsloc\t%d\n", pm->sloc); + fprintf(stderr, "\tploc\t%d\n", pm->ploc); + fprintf(stderr, "\tdloc\t%d\n", pm->dloc); + + if (pm->pflags & PFLAG_HAVE_QMAP) + dump_map(pm->qmap, 256, "qmap"); + + if (pm->pflags & PFLAG_HAVE_QTAB) + dump_table(pm->qtab, 256, "qtab"); + if (pm->pflags & PFLAG_HAVE_PTAB) + dump_table(pm->ptab, 1024, "ptab"); + if (pm->pflags & PFLAG_HAVE_DTAB) + dump_table(pm->dtab, 256, "dtab"); + fprintf(stderr, "}\n"); + } +} + +typedef struct { + SIMPLE_MODEL(QMAX,_) *qual; + SIMPLE_MODEL(256,_) len[4]; + SIMPLE_MODEL(2,_) revcomp; + SIMPLE_MODEL(256,_) sel; + SIMPLE_MODEL(2,_) dup; +} fqz_model; + +static int fqz_create_models(fqz_model *m, fqz_gparams *gp) { + int i; + + if (!(m->qual = htscodecs_tls_alloc(sizeof(*m->qual) * CTX_SIZE))) + return -1; + + for (i = 0; i < CTX_SIZE; i++) + SIMPLE_MODEL(QMAX,_init)(&m->qual[i], gp->max_sym+1); + + for (i = 0; i < 4; i++) + SIMPLE_MODEL(256,_init)(&m->len[i],256); + + SIMPLE_MODEL(2,_init)(&m->revcomp,2); + SIMPLE_MODEL(2,_init)(&m->dup,2); + if (gp->max_sel > 0) + SIMPLE_MODEL(256,_init)(&m->sel, gp->max_sel+1); + + return 0; +} + +static void fqz_destroy_models(fqz_model *m) { + htscodecs_tls_free(m->qual); +} + +static inline unsigned int fqz_update_ctx(fqz_param *pm, fqz_state *state, int q) { + unsigned int last = 0; // pm->context + state->qctx = (state->qctx << pm->qshift) + pm->qtab[q]; + last += (state->qctx & pm->qmask) << pm->qloc; + + // The final shifts have been factored into the tables already. + last += pm->ptab[MIN(1023, state->p)]; // << pm->ploc + last += pm->dtab[MIN(255, state->delta)]; // << pm->dloc + last += state->s << pm->sloc; + + // On the fly average is slow work. + // However it can be slightly better than using a selector bit + // as it's something we can compute on the fly and thus doesn't + // consume output bits for storing the selector itself. + // + // Q4 (novaseq.bam) + // qtot+=q*q -DQ1=8.84 -DQ2=8.51 -DQ3=7.70; 7203598 (-0.7%) + // qtot+=q -DQ1=2.96 -DQ2=2.85 -DQ3=2.69; 7207315 + // vs old delta; 7255614 (default params) + // vs 2 bit selector (no delta) 7203006 (-x 0x8261000e80) + // vs 2 bit selector (no delta) 7199153 (-x 0x7270000e70) -0.8% + // vs 2 bit selector (no delta) 7219668 (-x 0xa243000ea0) + //{ + // double qa = state->qtot / (state->qlen+.01); + // //fprintf(stderr, "%f\n", qa); + // int x = 0; + // if (qa>=Q1) x=3; + // else if (qa>=Q2) x=2; + // else if (qa>=Q3) x=1; + // else x=0; + // last += x << pm->dloc; // tmp reuse of delta pos + // state->qtot += q*q; + // state->qlen++; + //} + + // Only update delta after 1st base. + state->delta += (state->prevq != q); + state->prevq = q; + + state->p--; + + return last & (CTX_SIZE-1); +} + +// Build quality stats for qhist and set nsym, do_dedup and do_sel params. +// One_param is -1 to gather stats on all data, or >= 0 to gather data +// on one specific selector parameter. Used only in TEST_MAIN via +// fqz_manual_parameters at the moment. +void fqz_qual_stats(fqz_slice *s, + unsigned char *in, size_t in_size, + fqz_param *pm, + uint32_t qhist[256], + int one_param) { +#define NP 32 + uint32_t qhistb[NP][256] = {{0}}; // both + uint32_t qhist1[NP][256] = {{0}}; // READ1 only + uint32_t qhist2[NP][256] = {{0}}; // READ2 only + uint64_t t1[NP] = {0}; // Count for READ1 + uint64_t t2[NP] = {0}; // COUNT for READ2 + uint32_t avg[2560] = {0}; // Avg qual *and later* avg-to-selector map. + + int dir = 0; + int last_len = 0; + int do_dedup = 0; + size_t rec; + size_t i, j; + int num_rec = 0; + + // See what info we've been given. + // Do we have READ1 / READ2? + // Do we have selector hidden in the top bits of flag? + int max_sel = 0; + int has_r2 = 0; + for (rec = 0; rec < s->num_records; rec++) { + if (one_param >= 0 && (s->flags[rec] >> 16) != one_param) + continue; + num_rec++; + if (max_sel < (s->flags[rec] >> 16)) + max_sel = (s->flags[rec] >> 16); + if (s->flags[rec] & FQZ_FREAD2) + has_r2 = 1; + } + + // Dedup detection and histogram stats gathering + int *avg_qual = calloc((s->num_records+1), sizeof(int)); + if (!avg_qual) + return; + + rec = i = j = 0; + while (i < in_size) { + if (one_param >= 0 && (s->flags[rec] >> 16) != one_param) { + avg_qual[rec] = 0; + i += s->len[rec++]; + continue; + } + if (rec < s->num_records) { + j = s->len[rec]; + dir = s->flags[rec] & FQZ_FREAD2 ? 1 : 0; + if (i > 0 && j == last_len + && !memcmp(in+i-last_len, in+i, j)) + do_dedup++; // cache which records are dup? + } else { + j = in_size - i; + dir = 0; + } + last_len = j; + + uint32_t (*qh)[256] = dir ? qhist2 : qhist1; + uint64_t *th = dir ? t2 : t1; + + uint32_t tot = 0; + for (; i < in_size && j > 0; i++, j--) { + tot += in[i]; + qhist[in[i]]++; + qhistb[j & (NP-1)][in[i]]++; + qh[j & (NP-1)][in[i]]++; + th[j & (NP-1)]++; + } + tot = last_len ? (tot*10.0)/last_len+.5 : 0; + + avg_qual[rec] = tot; + avg[MIN(2559, tot)]++; + + rec++; + } + pm->do_dedup = ((rec+1)/(do_dedup+1) < 500); + + last_len = 0; + + // Unique symbol count + for (i = pm->max_sym = pm->nsym = 0; i < 256; i++) { + if (qhist[i]) + pm->max_sym = i, pm->nsym++; + } + + + // Auto tune: does average quality helps us? + if (pm->do_qa != 0) { + // Histogram of average qual in avg[] + // NB: we convert avg[] from count to selector index + + // Few symbols means high compression which means + // selector bits become more significant fraction. + // Reduce selector bits by skewing the distribution + // to not be even binning. + double qf0 = pm->nsym > 8 ? 0.2 : 0.05; + double qf1 = pm->nsym > 8 ? 0.5 : 0.22; + double qf2 = pm->nsym > 8 ? 0.8 : 0.60; + + int total = 0; + i = 0; + while (i < 2560) { + total += avg[i]; + if (total > qf0 * num_rec) { + //fprintf(stderr, "Q1=%d\n", (int)i); + break; + } + avg[i++] = 0; + } + while (i < 2560) { + total += avg[i]; + if (total > qf1 * num_rec) { + //fprintf(stderr, "Q2=%d\n", (int)i); + break; + } + avg[i++] = 1; + } + while (i < 2560) { + total += avg[i]; + if (total > qf2 * num_rec) { + //fprintf(stderr, "Q3=%d\n", (int)i); + break; + } + avg[i++] = 2; + } + while (i < 2560) + avg[i++] = 3; + + // Compute simple entropy of merged signal vs split signal. + i = 0; + rec = 0; + + int qbin4[4][NP][256] = {{{0}}}; + int qbin2[2][NP][256] = {{{0}}}; + int qbin1 [NP][256] = {{0}}; + int qcnt4[4][NP] = {{0}}; + int qcnt2[4][NP] = {{0}}; + int qcnt1 [NP] = {0}; + while (i < in_size) { + if (one_param >= 0 && (s->flags[rec] >> 16) != one_param) { + i += s->len[rec++]; + continue; + } + if ((rec & 7) && rec < s->num_records) { + // subsample for speed + i += s->len[rec++]; + continue; + } + if (rec < s->num_records) + j = s->len[rec]; + else + j = in_size - i; + last_len = j; + + uint32_t tot = avg_qual[rec]; + int qb4 = avg[MIN(2559, tot)]; + int qb2 = qb4/2; + + for (; i < in_size && j > 0; i++, j--) { + int x = j & (NP-1); + qbin4[qb4][x][in[i]]++; qcnt4[qb4][x]++; + qbin2[qb2][x][in[i]]++; qcnt2[qb2][x]++; + qbin1 [x][in[i]]++; qcnt1 [x]++; + } + rec++; + } + + double e1 = 0, e2 = 0, e4 = 0; + for (j = 0; j < NP; j++) { + for (i = 0; i < 256; i++) { + if (qbin1 [j][i]) e1 += qbin1 [j][i] * fast_log(qbin1 [j][i] / (double)qcnt1 [j]); + if (qbin2[0][j][i]) e2 += qbin2[0][j][i] * fast_log(qbin2[0][j][i] / (double)qcnt2[0][j]); + if (qbin2[1][j][i]) e2 += qbin2[1][j][i] * fast_log(qbin2[1][j][i] / (double)qcnt2[1][j]); + if (qbin4[0][j][i]) e4 += qbin4[0][j][i] * fast_log(qbin4[0][j][i] / (double)qcnt4[0][j]); + if (qbin4[1][j][i]) e4 += qbin4[1][j][i] * fast_log(qbin4[1][j][i] / (double)qcnt4[1][j]); + if (qbin4[2][j][i]) e4 += qbin4[2][j][i] * fast_log(qbin4[2][j][i] / (double)qcnt4[2][j]); + if (qbin4[3][j][i]) e4 += qbin4[3][j][i] * fast_log(qbin4[3][j][i] / (double)qcnt4[3][j]); + } + } + e1 /= -log(2)/8; + e2 /= -log(2)/8; + e4 /= -log(2)/8; + //fprintf(stderr, "E1=%f E2=%f E4=%f %f\n", e1, e2+s->num_records/8, e4+s->num_records/4, (e4+s->num_records/4)/(e2+s->num_records/8)); + + // Note by using the selector we're robbing bits from elsewhere in + // the context, which may reduce compression better. + // We don't know how much by, so this is basically a guess! + // For now we just say need 5% saving here. + double qm = pm->do_qa > 0 ? 1 : 0.98; + if ((pm->do_qa == -1 || pm->do_qa >= 4) && + e4 + s->num_records/4 < e2*qm + s->num_records/8 && + e4 + s->num_records/4 < e1*qm) { + //fprintf(stderr, "do q4\n"); + for (i = 0; i < s->num_records; i++) { + //fprintf(stderr, "%d -> %d -> %d, %d\n", (int)i, avg_qual[i], avg[MIN(2559, avg_qual[i])], s->flags[i]>>16); + s->flags[i] |= avg[MIN(2559, avg_qual[i])] <<16; + } + pm->do_sel = 1; + max_sel = 3; + } else if ((pm->do_qa == -1 || pm->do_qa >= 2) && e2 + s->num_records/8 < e1*qm) { + //fprintf(stderr, "do q2\n"); + for (i = 0; i < s->num_records; i++) + s->flags[i] |= (avg[MIN(2559, avg_qual[i])]>>1) <<16; + pm->do_sel = 1; + max_sel = 1; + } + + if (pm->do_qa == -1) { + // assume qual, pos, delta in that order. + if (pm->pbits > 0 && pm->dbits > 0) { + // 1 from pos/delta + pm->sloc = pm->dloc-1; + pm->pbits--; + pm->dbits--; + pm->dloc++; + } else if (pm->dbits >= 2) { + // 2 from delta + pm->sloc = pm->dloc; + pm->dbits -= 2; + pm->dloc += 2; + } else if (pm->qbits >= 2) { + pm->qbits -= 2; + pm->ploc -= 2; + pm->sloc = 16-2 - pm->do_r2; + if (pm->qbits == 6 && pm->qshift == 5) + pm->qbits--; + } + pm->do_qa = 4; + } + } + + // Auto tune: does splitting up READ1 and READ2 help us? + if (has_r2 || pm->do_r2) { // FIXME: && but debug for now + double e1 = 0, e2 = 0; // entropy sum + + for (j = 0; j < NP; j++) { + if (!t1[j] || !t2[j]) continue; + for (i = 0; i < 256; i++) { + if (!qhistb[j][i]) continue; + e1 -= (qhistb[j][i])*log(qhistb[j][i] / (double)(t1[j]+t2[j])); + if (qhist1[j][i]) + e2 -= qhist1[j][i] * log(qhist1[j][i] / (double)t1[j]); + if (qhist2[j][i]) + e2 -= qhist2[j][i] * log(qhist2[j][i] / (double)t2[j]); + } + } + e1 /= log(2)*8; // bytes + e2 /= log(2)*8; + + //fprintf(stderr, "read1/2 entropy merge %f split %f\n", e1, e2); + + // Note by using the selector we're robbing bits from elsewhere in + // the context, which may reduce compression better. + // We don't know how much by, so this is basically a guess! + // For now we just say need 5% saving here. + double qm = pm->do_r2 > 0 ? 1 : 0.95; + if (e2 + (8+s->num_records/8) < e1*qm) { + for (rec = 0; rec < s->num_records; rec++) { + if (one_param >= 0 && (s->flags[rec] >> 16) != one_param) + continue; + int sel = s->flags[rec] >> 16; + s->flags[rec] = (s->flags[rec] & 0xffff) + | ((s->flags[rec] & FQZ_FREAD2) + ? ((sel*2)+1) << 16 + : ((sel*2)+0) << 16); + if (max_sel < (s->flags[rec]>>16)) + max_sel = (s->flags[rec]>>16); + } + } + } + + // We provided explicit selector data or auto-tuned it + if (max_sel > 0) { + pm->do_sel = 1; + pm->max_sel = max_sel; + } + + free(avg_qual); +} + +static inline +int fqz_store_parameters1(fqz_param *pm, unsigned char *comp) { + int comp_idx = 0, i, j; + + // Starting context + comp[comp_idx++] = pm->context; + comp[comp_idx++] = pm->context >> 8; + + comp[comp_idx++] = pm->pflags; + comp[comp_idx++] = pm->max_sym; + + comp[comp_idx++] = (pm->qbits<<4)|pm->qshift; + comp[comp_idx++] = (pm->qloc<<4)|pm->sloc; + comp[comp_idx++] = (pm->ploc<<4)|pm->dloc; + + if (pm->store_qmap) { + for (i = j = 0; i < 256; i++) + if (pm->qmap[i] != INT_MAX) + comp[comp_idx++] = i; + } + + if (pm->qbits && pm->use_qtab) + // custom qtab + comp_idx += store_array(comp+comp_idx, pm->qtab, 256); + + if (pm->pbits && pm->use_ptab) + // custom ptab + comp_idx += store_array(comp+comp_idx, pm->ptab, 1024); + + if (pm->dbits && pm->use_dtab) + // custom dtab + comp_idx += store_array(comp+comp_idx, pm->dtab, 256); + + return comp_idx; +} + +static +int fqz_store_parameters(fqz_gparams *gp, unsigned char *comp) { + int comp_idx = 0; + comp[comp_idx++] = gp->vers; // Format number + + comp[comp_idx++] = gp->gflags; + + if (gp->gflags & GFLAG_MULTI_PARAM) + comp[comp_idx++] = gp->nparam; + + if (gp->gflags & GFLAG_HAVE_STAB) { + comp[comp_idx++] = gp->max_sel; + comp_idx += store_array(comp+comp_idx, gp->stab, 256); + } + + int i; + for (i = 0; i < gp->nparam; i++) + comp_idx += fqz_store_parameters1(&gp->p[i], comp+comp_idx); + + //fprintf(stderr, "Encoded %d bytes of param\n", comp_idx); + return comp_idx; +} + +// Choose a set of parameters based on quality statistics and +// some predefined options (selected via "strat"). +static inline +int fqz_pick_parameters(fqz_gparams *gp, + int vers, + int strat, + fqz_slice *s, + unsigned char *in, + size_t in_size) { + //approx sqrt(delta), must be sequential + int dsqr[] = { + 0, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 + }; + uint32_t qhist[256] = {0}; + + if (strat >= nstrats) strat = nstrats-1; + + // Start with 1 set of parameters. + // FIXME: add support for multiple params later. + memset(gp, 0, sizeof(*gp)); + gp->vers = FQZ_VERS; + + if (!(gp->p = calloc(1, sizeof(fqz_param)))) + return -1; + gp->nparam = 1; + gp->max_sel = 0; + + if (vers == 3) // V3.0 doesn't store qual in original orientation + gp->gflags |= GFLAG_DO_REV; + + fqz_param *pm = gp->p; + + // Programmed strategies, which we then amend based on our + // statistical analysis of the quality stream. + pm->qbits = strat_opts[strat][0]; + pm->qshift = strat_opts[strat][1]; + pm->pbits = strat_opts[strat][2]; + pm->pshift = strat_opts[strat][3]; + pm->dbits = strat_opts[strat][4]; + pm->dshift = strat_opts[strat][5]; + pm->qloc = strat_opts[strat][6]; + pm->sloc = strat_opts[strat][7]; + pm->ploc = strat_opts[strat][8]; + pm->dloc = strat_opts[strat][9]; + + // Params for controlling behaviour here. + pm->do_r2 = strat_opts[strat][10]; + pm->do_qa = strat_opts[strat][11]; + + // Validity check input lengths and buffer size + size_t tlen = 0, i; + for (i = 0; i < s->num_records; i++) { + if (tlen + s->len[i] > in_size) + // Oversized buffer + s->len[i] = in_size - tlen; + tlen += s->len[i]; + } + if (s->num_records > 0 && tlen < in_size) + // Undersized buffer + s->len[s->num_records-1] += in_size - tlen; + + // Quality metrics, for all recs + fqz_qual_stats(s, in, in_size, pm, qhist, -1); + + pm->store_qmap = (pm->nsym <= 8 && pm->nsym*2 < pm->max_sym); + + // Check for fixed length. + uint32_t first_len = s->len[0]; + for (i = 1; i < s->num_records; i++) { + if (s->len[i] != first_len) + break; + } + pm->fixed_len = (i == s->num_records); + pm->use_qtab = 0; // unused by current encoder + + if (strat >= nstrats-1) + goto manually_set; // used in TEST_MAIN for debugging + + if (pm->pshift < 0) + pm->pshift = MAX(0, log((double)s->len[0]/(1<pbits))/log(2)+.5); + + if (pm->nsym <= 4) { + // NovaSeq + pm->qshift = 2; // qmax 64, although we can store up to 256 if needed + if (in_size < 5000000) { + pm->pbits =2; + pm->pshift=5; + } + } else if (pm->nsym <= 8) { + // HiSeqX + pm->qbits =MIN(pm->qbits,9); + pm->qshift=3; + if (in_size < 5000000) + pm->qbits =6; + } + + if (in_size < 300000) { + pm->qbits=pm->qshift; + pm->dbits=2; + } + + manually_set: +// fprintf(stderr, "-x 0x%x%x%x%x%x%x%x%x%x%x%x%x\n", +// pm->qbits, pm->qshift, +// pm->pbits, pm->pshift, +// pm->dbits, pm->dshift, +// pm->qloc, pm->sloc, pm->ploc, pm->dloc, +// pm->do_r2, pm->do_qa); + + for (i = 0; i < sizeof(dsqr)/sizeof(*dsqr); i++) + if (dsqr[i] > (1<dbits)-1) + dsqr[i] = (1<dbits)-1; + + if (pm->store_qmap) { + int j; + for (i = j = 0; i < 256; i++) + if (qhist[i]) + pm->qmap[i] = j++; + else + pm->qmap[i] = INT_MAX; + pm->max_sym = pm->nsym; + } else { + pm->nsym = 255; + for (i = 0; i < 256; i++) + pm->qmap[i] = i; + } + if (gp->max_sym < pm->max_sym) + gp->max_sym = pm->max_sym; + + // Produce ptab from pshift. + if (pm->qbits) { + for (i = 0; i < 256; i++) { + pm->qtab[i] = i; // 1:1 + + // Alternative mappings: + //qtab[i] = i > 30 ? MIN(max_sym,i)-15 : i/2; // eg for 9827 BAM + } + + } + pm->qmask = (1<qbits)-1; + + if (pm->pbits) { + for (i = 0; i < 1024; i++) + pm->ptab[i] = MIN((1<pbits)-1, i>>pm->pshift); + + // Alternatively via analysis of quality distributions we + // may select a bunch of positions that are special and + // have a non-uniform ptab[]. + // Manual experimentation on a NovaSeq run saved 2.8% here. + } + + if (pm->dbits) { + for (i = 0; i < 256; i++) + pm->dtab[i] = dsqr[MIN(sizeof(dsqr)/sizeof(*dsqr)-1, i>>pm->dshift)]; + } + + pm->use_ptab = (pm->pbits > 0); + pm->use_dtab = (pm->dbits > 0); + + pm->pflags = + (pm->use_qtab ?PFLAG_HAVE_QTAB :0)| + (pm->use_dtab ?PFLAG_HAVE_DTAB :0)| + (pm->use_ptab ?PFLAG_HAVE_PTAB :0)| + (pm->do_sel ?PFLAG_DO_SEL :0)| + (pm->fixed_len ?PFLAG_DO_LEN :0)| + (pm->do_dedup ?PFLAG_DO_DEDUP :0)| + (pm->store_qmap ?PFLAG_HAVE_QMAP :0); + + gp->max_sel = 0; + if (pm->do_sel) { + // 2 selectors values, but 1 parameter block. + // We'll use the sloc instead to encode the selector bits into + // the context. + gp->max_sel = 1; // indicator to check recs + gp->gflags |= GFLAG_HAVE_STAB; + // NB: stab is already all zero + } + + if (gp->max_sel && s->num_records) { + int max = 0; + for (i = 0; i < s->num_records; i++) { + if (max < (s->flags[i] >> 16)) + max = (s->flags[i] >> 16); + } + gp->max_sel = max; + } + + return 0; +} + +static void fqz_free_parameters(fqz_gparams *gp) { + if (gp && gp->p) free(gp->p); +} + +static int compress_new_read(fqz_slice *s, + fqz_state *state, + fqz_gparams *gp, + fqz_param *pm, + fqz_model *model, + RangeCoder *rc, + unsigned char *in, + size_t *in_i, // in[in_i], + unsigned int *last) { + ssize_t rec = state->rec; + size_t i = *in_i; + if (pm->do_sel || (gp->gflags & GFLAG_MULTI_PARAM)) { + state->s = rec < s->num_records + ? s->flags[rec] >> 16 // reuse spare bits + : 0; + SIMPLE_MODEL(256,_encodeSymbol)(&model->sel, rc, state->s); + } else { + state->s = 0; + } + int x = (gp->gflags & GFLAG_HAVE_STAB) ? gp->stab[state->s] : state->s; + pm = &gp->p[x]; + + int len = s->len[rec]; + if (!pm->fixed_len || state->first_len) { + SIMPLE_MODEL(256,_encodeSymbol)(&model->len[0], rc, (len>> 0) & 0xff); + SIMPLE_MODEL(256,_encodeSymbol)(&model->len[1], rc, (len>> 8) & 0xff); + SIMPLE_MODEL(256,_encodeSymbol)(&model->len[2], rc, (len>>16) & 0xff); + SIMPLE_MODEL(256,_encodeSymbol)(&model->len[3], rc, (len>>24) & 0xff); + state->first_len = 0; + } + + if (gp->gflags & GFLAG_DO_REV) { + // no need to reverse complement for V4.0 as the core format + // already has this feature. + if (s->flags[rec] & FQZ_FREVERSE) + SIMPLE_MODEL(2,_encodeSymbol)(&model->revcomp, rc, 1); + else + SIMPLE_MODEL(2,_encodeSymbol)(&model->revcomp, rc, 0); + } + + state->rec++; + + state->qtot = 0; + state->qlen = 0; + + state->p = len; + state->delta = 0; + state->qctx = 0; + state->prevq = 0; + + *last = pm->context; + + if (pm->do_dedup) { + // Possible dup of previous read? + if (i && len == state->last_len && + !memcmp(in+i-state->last_len, in+i, len)) { + SIMPLE_MODEL(2,_encodeSymbol)(&model->dup, rc, 1); + i += len-1; + state->p = 0; + *in_i = i; + return 1; // is a dup + } else { + SIMPLE_MODEL(2,_encodeSymbol)(&model->dup, rc, 0); + } + + state->last_len = len; + } + + *in_i = i; + + return 0; // not dup +} + +static +unsigned char *compress_block_fqz2f(int vers, + int strat, + fqz_slice *s, + unsigned char *in, + size_t in_size, + size_t *out_size, + fqz_gparams *gp) { + fqz_gparams local_gp; + int free_params = 0; + + unsigned int last = 0; + size_t i, j; + ssize_t rec = 0; + + int comp_idx = 0; + RangeCoder rc; + + // Pick and store params + if (!gp) { + gp = &local_gp; + if (fqz_pick_parameters(gp, vers, strat, s, in, in_size) < 0) + return NULL; + free_params = 1; + } + + // Worst case scenario assuming random input data and no way to compress + // is NBytes*growth for some small growth factor (arith_dynamic uses 1.05), + // plus fixed overheads for the header / params. Growth can be high + // here as we're modelling things and pathological cases may trigger a + // bad probability model. + // + // Per read is 4-byte len if not fixed length (but less if avg smaller) + // up to 1 byte for selection state (log2(max_sel) bits) + // 1-bit for reverse flag + // 1-bit for dup-last flag (but then no quals) + // Per qual is 1-byte (assuming QMAX==256) + // + // Header size is total guess, as depends on params, but it's almost + // always tiny, so a few K extra should be sufficient. + // + // => Total of (s->num_records*4.25 + in_size)*growth + hdr + int sel_bits = 0, sel = gp->max_sel; + while (sel) { + sel_bits++; + sel >>= 1; + } + double len_sz = gp->p[0].fixed_len ? 0.25 : 4.25; + len_sz += sel_bits / 8.0; + size_t comp_sz = (s->num_records*len_sz + in_size)*1.1 + 10000; + + unsigned char *comp = (unsigned char *)malloc(comp_sz); + unsigned char *compe = comp + (size_t)comp_sz; + if (!comp) + return NULL; + + //dump_params(gp); + comp_idx = var_put_u32(comp, compe, in_size); + comp_idx += fqz_store_parameters(gp, comp+comp_idx); + + fqz_param *pm; + + // Optimise tables to remove shifts in loop (NB: cannot do this in next vers) + for (j = 0; j < gp->nparam; j++) { + pm = &gp->p[j]; + + for (i = 0; i < 1024; i++) + pm->ptab[i] <<= pm->ploc; + + for (i = 0; i < 256; i++) + pm->dtab[i] <<= pm->dloc; + } + + // Create models and initialise range coder + fqz_model model; + if (fqz_create_models(&model, gp) < 0) + return NULL; + + RC_SetOutput(&rc, (char *)comp+comp_idx); + RC_SetOutputEnd(&rc, (char *)comp+comp_sz); + RC_StartEncode(&rc); + + // For CRAM3.1, reverse upfront if needed + pm = &gp->p[0]; + if (gp->gflags & GFLAG_DO_REV) { + i = rec = j = 0; + while (i < in_size) { + int len = rec < s->num_records-1 + ? s->len[rec] : in_size - i; + + if (s->flags[rec] & FQZ_FREVERSE) { + // Reverse complement sequence - note: modifies buffer + int I,J; + unsigned char *cp = in+i; + for (I = 0, J = len-1; I < J; I++, J--) { + unsigned char c; + c = cp[I]; + cp[I] = cp[J]; + cp[J] = c; + } + } + + i += len; + rec++; + } + rec = 0; + } + + fqz_state state = {0}; + pm = &gp->p[0]; + state.p = 0; + state.first_len = 1; + state.last_len = 0; + state.rec = rec; + + for (i = 0; i < in_size; i++) { + if (state.p == 0) { + if (state.rec >= s->num_records || s->len[state.rec] <= 0) { + free(comp); + comp = NULL; + goto err; + } + + if (compress_new_read(s, &state, gp, pm, &model, &rc, + in, &i, /*&rec,*/ &last)) + continue; + } + +#if 0 + // fqz_qual_stats imp. + // q40 6.876 6.852 5.96 + // q4 6.566 5.07 + // _Q 1.383 1.11 + unsigned char q = in[i]; + unsigned char qm = pm->qmap[q]; + + SIMPLE_MODEL(QMAX,_encodeSymbol)(&model.qual[last], &rc, qm); + last = fqz_update_ctx(pm, &state, qm); +#else + // gcc clang gcc+fqz_qual_stats imp. + // q40 5.033 5.026 -27% 4.137 -38% + // q4 5.595 -15% 4.011 -36% + // _Q 1.225 -11% 0.956 + int j = -1; + + while (state.p >= 4 && i+j+4 < in_size) { + int l1 = last, l2, l3, l4; + // Model has symbols sorted by frequency, so most common are at + // start. So while model is approx 1Kb, the first cache line is + // a big win. + mm_prefetch(&model.qual[l1]); + unsigned char qm1 = pm->qmap[in[i + ++j]]; + last = fqz_update_ctx(pm, &state, qm1); l2 = last; + + mm_prefetch(&model.qual[l2]); + unsigned char qm2 = pm->qmap[in[i + ++j]]; + last = fqz_update_ctx(pm, &state, qm2); l3 = last; + + mm_prefetch(&model.qual[l3]); + unsigned char qm3 = pm->qmap[in[i + ++j]]; + last = fqz_update_ctx(pm, &state, qm3); l4 = last; + + mm_prefetch(&model.qual[l4]); + unsigned char qm4 = pm->qmap[in[i + ++j]]; + last = fqz_update_ctx(pm, &state, qm4); + + SIMPLE_MODEL(QMAX,_encodeSymbol)(&model.qual[l1], &rc, qm1); + SIMPLE_MODEL(QMAX,_encodeSymbol)(&model.qual[l2], &rc, qm2); + SIMPLE_MODEL(QMAX,_encodeSymbol)(&model.qual[l3], &rc, qm3); + SIMPLE_MODEL(QMAX,_encodeSymbol)(&model.qual[l4], &rc, qm4); + } + + while (state.p > 0) { + int l2 = last; + mm_prefetch(&model.qual[last]); + unsigned char qm = pm->qmap[in[i + ++j]]; + last = fqz_update_ctx(pm, &state, qm); + SIMPLE_MODEL(QMAX,_encodeSymbol)(&model.qual[l2], &rc, qm); + } + i += j; +#endif + } + + if (RC_FinishEncode(&rc) < 0) { + free(comp); + comp = NULL; + *out_size = 0; + goto err; + } + + // For CRAM3.1, undo our earlier reversal step + rec = state.rec; + if (gp->gflags & GFLAG_DO_REV) { + i = rec = j = 0; + while (i < in_size) { + int len = rec < s->num_records-1 + ? s->len[rec] + : in_size - i; + + if (s->flags[rec] & FQZ_FREVERSE) { + // Reverse complement sequence - note: modifies buffer + int I,J; + unsigned char *cp = in+i; + for (I = 0, J = len-1; I < J; I++, J--) { + unsigned char c; + c = cp[I]; + cp[I] = cp[J]; + cp[J] = c; + } + } + + i += len; + rec++; + } + } + + // Clear selector abuse of flags + for (rec = 0; rec < s->num_records; rec++) + s->flags[rec] &= 0xffff; + + *out_size = comp_idx + RC_OutSize(&rc); + //fprintf(stderr, "%d -> %d\n", (int)in_size, (int)*out_size); + + err: + fqz_destroy_models(&model); + if (free_params) + fqz_free_parameters(gp); + + return comp; +} + +// Read fqz paramaters. +// +// FIXME: pass in and check in_size. +// +// Returns number of bytes read on success, +// -1 on failure. +static inline +int fqz_read_parameters1(fqz_param *pm, unsigned char *in, size_t in_size) { + int in_idx = 0; + size_t i; + + if (in_size < 7) + return -1; + + // Starting context + pm->context = in[in_idx] + (in[in_idx+1]<<8); + in_idx += 2; + + // Bit flags + pm->pflags = in[in_idx++]; + pm->use_qtab = pm->pflags & PFLAG_HAVE_QTAB; + pm->use_dtab = pm->pflags & PFLAG_HAVE_DTAB; + pm->use_ptab = pm->pflags & PFLAG_HAVE_PTAB; + pm->do_sel = pm->pflags & PFLAG_DO_SEL; + pm->fixed_len = pm->pflags & PFLAG_DO_LEN; + pm->do_dedup = pm->pflags & PFLAG_DO_DEDUP; + pm->store_qmap = pm->pflags & PFLAG_HAVE_QMAP; + pm->max_sym = in[in_idx++]; + + // Sub-context sizes and locations + pm->qbits = in[in_idx]>>4; + pm->qmask = (1<qbits)-1; + pm->qshift = in[in_idx++]&15; + pm->qloc = in[in_idx]>>4; + pm->sloc = in[in_idx++]&15; + pm->ploc = in[in_idx]>>4; + pm->dloc = in[in_idx++]&15; + + // Maps and tables + if (pm->store_qmap) { + for (i = 0; i < 256; i++) pm->qmap[i] = INT_MAX; // so dump_map works + if (in_idx + pm->max_sym > in_size) + return -1; + for (i = 0; i < pm->max_sym; i++) + pm->qmap[i] = in[in_idx++]; + } else { + for (i = 0; i < 256; i++) + pm->qmap[i] = i; + } + + if (pm->qbits) { + if (pm->use_qtab) { + int used = read_array(in+in_idx, in_size-in_idx, pm->qtab, 256); + if (used < 0) + return -1; + in_idx += used; + } else { + for (i = 0; i < 256; i++) + pm->qtab[i] = i; + } + } + + if (pm->use_ptab) { + int used = read_array(in+in_idx, in_size-in_idx, pm->ptab, 1024); + if (used < 0) + return -1; + in_idx += used; + } else { + for (i = 0; i < 1024; i++) + pm->ptab[i] = 0; + } + + if (pm->use_dtab) { + int used = read_array(in+in_idx, in_size-in_idx, pm->dtab, 256); + if (used < 0) + return -1; + in_idx += used; + } else { + for (i = 0; i < 256; i++) + pm->dtab[i] = 0; + } + + return in_idx; +} + +static +int fqz_read_parameters(fqz_gparams *gp, unsigned char *in, size_t in_size) { + int in_idx = 0; + int i; + + if (in_size < 10) + return -1; + + // Format version + gp->vers = in[in_idx++]; + if (gp->vers != FQZ_VERS) + return -1; + + // Global glags + gp->gflags = in[in_idx++]; + + // Number of param blocks and param selector details + gp->nparam = (gp->gflags & GFLAG_MULTI_PARAM) ? in[in_idx++] : 1; + if (gp->nparam <= 0) + return -1; + gp->max_sel = gp->nparam > 1 ? gp->nparam : 0; + + if (gp->gflags & GFLAG_HAVE_STAB) { + gp->max_sel = in[in_idx++]; + int used = read_array(in+in_idx, in_size-in_idx, gp->stab, 256); + if (used < 0) + goto err; + in_idx += used; + } else { + for (i = 0; i < gp->nparam; i++) + gp->stab[i] = i; + for (; i < 256; i++) + gp->stab[i] = gp->nparam-1; + } + + // Load the individual parameter locks + if (!(gp->p = malloc(gp->nparam * sizeof(*gp->p)))) + return -1; + + gp->max_sym = 0; + for (i = 0; i < gp->nparam; i++) { + int e = fqz_read_parameters1(&gp->p[i], in + in_idx, in_size-in_idx); + if (e < 0) + goto err; + if (gp->p[i].do_sel && gp->max_sel == 0) + goto err; // Inconsistent + in_idx += e; + + if (gp->max_sym < gp->p[i].max_sym) + gp->max_sym = gp->p[i].max_sym; + } + + //fprintf(stderr, "Decoded %d bytes of param\n", in_idx); + return in_idx; + + err: + fqz_free_parameters(gp); + gp->nparam = 0; + return -1; +} + +// Handles the state.p==0 section of uncompress_block_fqz2f +static int decompress_new_read(fqz_slice *s, + fqz_state *state, + fqz_gparams *gp, + fqz_param *pm, + fqz_model *model, + RangeCoder *rc, + unsigned char *in, ssize_t *in_i, // in[in_i], + unsigned char *uncomp, size_t *out_size, + int *rev, char *rev_a, int *len_a, + int *lengths, int nlengths) { + size_t i = *in_i; + ssize_t rec = state->rec; + + if (pm->do_sel) { + state->s = SIMPLE_MODEL(256,_decodeSymbol)(&model->sel, rc); + } else { + state->s = 0; + } + + int x = (gp->gflags & GFLAG_HAVE_STAB) + ? gp->stab[MIN(255, state->s)] + : state->s; + if (x >= gp->nparam) + return -1; + pm = &gp->p[x]; + + unsigned int len = state->last_len; + if (!pm->fixed_len || state->first_len) { + len = SIMPLE_MODEL(256,_decodeSymbol)(&model->len[0], rc); + len |= SIMPLE_MODEL(256,_decodeSymbol)(&model->len[1], rc)<<8; + len |= SIMPLE_MODEL(256,_decodeSymbol)(&model->len[2], rc)<<16; + len |= ((unsigned)SIMPLE_MODEL(256,_decodeSymbol)(&model->len[3], rc))<<24; + state->first_len = 0; + state->last_len = len; + } + if (len > *out_size-i || len <= 0) + return -1; + + if (lengths && rec < nlengths) + lengths[rec] = len; + + if (gp->gflags & GFLAG_DO_REV) { + *rev = SIMPLE_MODEL(2,_decodeSymbol)(&model->revcomp, rc); + rev_a[rec] = *rev; + len_a[rec] = len; + } + + if (pm->do_dedup) { + if (SIMPLE_MODEL(2,_decodeSymbol)(&model->dup, rc)) { + // Dup of last line + if (len > i) + return -1; + memcpy(uncomp+i, uncomp+i-len, len); + i += len; + state->p = 0; + state->rec++; + *in_i = i; + return 1; // dup => continue + } + } + + state->rec++; + state->p = len; + state->delta = 0; + state->prevq = 0; + state->qctx = 0; + state->ctx = pm->context; + + *in_i = i; + + return 0; +} + + +static +unsigned char *uncompress_block_fqz2f(fqz_slice *s, + unsigned char *in, + size_t in_size, + size_t *out_size, + int *lengths, + int nlengths) { + fqz_gparams gp; + fqz_param *pm; + char *rev_a = NULL; + int *len_a = NULL; + memset(&gp, 0, sizeof(gp)); + + uint32_t len; + ssize_t i, rec = 0, in_idx; + in_idx = var_get_u32(in, in+in_size, &len); + *out_size = len; + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (len > 100000) + return NULL; +#endif + + unsigned char *uncomp = NULL; + RangeCoder rc; + unsigned int last = 0; + + // Decode parameter blocks + if ((i = fqz_read_parameters(&gp, in+in_idx, in_size-in_idx)) < 0) + return NULL; + //dump_params(&gp); + in_idx += i; + + // Optimisations to remove shifts from main loop + for (i = 0; i < gp.nparam; i++) { + int j; + pm = &gp.p[i]; + for (j = 0; j < 1024; j++) + pm->ptab[j] <<= pm->ploc; + for (j = 0; j < 256; j++) + pm->dtab[j] <<= pm->dloc; + } + + // Initialise models and entropy coder + fqz_model model; + if (fqz_create_models(&model, &gp) < 0) + return NULL; + + RC_SetInput(&rc, (char *)in+in_idx, (char *)in+in_size); + RC_StartDecode(&rc); + + + // Allocate buffers + uncomp = (unsigned char *)malloc(*out_size); + if (!uncomp) + goto err; + + int nrec = 1000; + rev_a = malloc(nrec); + len_a = malloc(nrec * sizeof(int)); + if (!rev_a || !len_a) + goto err; + + // Main decode loop + fqz_state state; + state.delta = 0; + state.prevq = 0; + state.qctx = 0; + state.p = 0; + state.s = 0; + state.first_len = 1; + state.last_len = 0; + state.rec = 0; + state.ctx = last; + + int rev = 0; + int x = 0; + pm = &gp.p[x]; + for (i = 0; i < len; ) { + if (state.rec >= nrec) { + nrec *= 2; + rev_a = realloc(rev_a, nrec); + len_a = realloc(len_a, nrec*sizeof(int)); + if (!rev_a || !len_a) + goto err; + } + + if (state.p == 0) { + int r = decompress_new_read(s, &state, &gp, pm, &model, &rc, + in, &i, uncomp, out_size, + &rev, rev_a, len_a, + lengths, nlengths); + if (r < 0) + goto err; + if (r > 0) + continue; + last = state.ctx; + } + + // Decode and update context + do { + unsigned char Q = SIMPLE_MODEL(QMAX,_decodeSymbol) + (&model.qual[last], &rc); + + last = fqz_update_ctx(pm, &state, Q); + uncomp[i++] = pm->qmap[Q]; + } while (state.p != 0 && i < len); + } + + rec = state.rec; + if (rec >= nrec) { + nrec *= 2; + rev_a = realloc(rev_a, nrec); + len_a = realloc(len_a, nrec*sizeof(int)); + if (!rev_a || !len_a) + goto err; + } + rev_a[rec] = rev; + len_a[rec] = len; + + if (gp.gflags & GFLAG_DO_REV) { + for (i = rec = 0; i < len && rec < nrec; i += len_a[rec++]) { + if (!rev_a[rec]) + continue; + + int I, J; + unsigned char *cp = uncomp+i; + for (I = 0, J = len_a[rec]-1; I < J; I++, J--) { + unsigned char c; + c = cp[I]; + cp[I] = cp[J]; + cp[J] = c; + } + } + } + + if (RC_FinishDecode(&rc) < 0) + goto err; + + fqz_destroy_models(&model); + free(rev_a); + free(len_a); + fqz_free_parameters(&gp); + +#ifdef TEST_MAIN + s->num_records = rec; +#endif + + return uncomp; + + err: + fqz_destroy_models(&model); + free(rev_a); + free(len_a); + fqz_free_parameters(&gp); + free(uncomp); + + return NULL; +} + +char *fqz_compress(int vers, fqz_slice *s, char *in, size_t uncomp_size, + size_t *comp_size, int strat, fqz_gparams *gp) { + if (uncomp_size > INT_MAX) { + *comp_size = 0; + return NULL; + } + + return (char *)compress_block_fqz2f(vers, strat, s, (unsigned char *)in, + uncomp_size, comp_size, gp); +} + +char *fqz_decompress(char *in, size_t comp_size, size_t *uncomp_size, + int *lengths, int nlengths) { + return (char *)uncompress_block_fqz2f(NULL, (unsigned char *)in, + comp_size, uncomp_size, lengths, nlengths); +} diff --git a/src/htslib-1.18/htscodecs/htscodecs/fqzcomp_qual.h b/src/htslib-1.21/htscodecs/htscodecs/fqzcomp_qual.h similarity index 100% rename from src/htslib-1.18/htscodecs/htscodecs/fqzcomp_qual.h rename to src/htslib-1.21/htscodecs/htscodecs/fqzcomp_qual.h diff --git a/src/htslib-1.18/htscodecs/htscodecs/htscodecs.c b/src/htslib-1.21/htscodecs/htscodecs/htscodecs.c similarity index 100% rename from src/htslib-1.18/htscodecs/htscodecs/htscodecs.c rename to src/htslib-1.21/htscodecs/htscodecs/htscodecs.c diff --git a/src/htslib-1.21/htscodecs/htscodecs/htscodecs.h b/src/htslib-1.21/htscodecs/htscodecs/htscodecs.h new file mode 100644 index 0000000..8d67e67 --- /dev/null +++ b/src/htslib-1.21/htscodecs/htscodecs/htscodecs.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2021-2024 Genome Research Ltd. + * Author(s): James Bonfield + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger + * Institute nor the names of its contributors may be used to endorse + * or promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH + * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef HTSCODECS_H +#define HTSCODECS_H + +/* + * Version X.Y.Z encoded as XYYYZZ. + * We mainly increment X and Y. Z *may* get bumped in between official + * releases in order to distinguish untagged github checkouts from + * official release tarballs. + * + * Note currently this needs manually editing as it isn't automatically + * updated by autoconf. + */ +#define HTSCODECS_VERSION 100601 + +/* + * A const string form of the HTSCODECS_VERSION define. + * NB: This is obtained from the auto-generated version.h, so + * we can include release number and git hash. + */ +const char *htscodecs_version(void); + +#endif /* HTSCODECS_H */ diff --git a/src/htslib-1.18/htscodecs/htscodecs/htscodecs_endian.h b/src/htslib-1.21/htscodecs/htscodecs/htscodecs_endian.h similarity index 100% rename from src/htslib-1.18/htscodecs/htscodecs/htscodecs_endian.h rename to src/htslib-1.21/htscodecs/htscodecs/htscodecs_endian.h diff --git a/src/htslib-1.21/htscodecs/htscodecs/pack.c b/src/htslib-1.21/htscodecs/htscodecs/pack.c new file mode 100644 index 0000000..eb8dac4 --- /dev/null +++ b/src/htslib-1.21/htscodecs/htscodecs/pack.c @@ -0,0 +1,394 @@ +/* + * Copyright (c) 2019-2020, 2022 Genome Research Ltd. + * Author(s): James Bonfield + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger + * Institute nor the names of its contributors may be used to endorse + * or promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH + * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include +#include +#include +#include + +#include "pack.h" + +//----------------------------------------------------------------------------- + +/* + * Packs multiple symbols into a single byte if the total alphabet of symbols + * used is <= 16. Each new symbol takes up 1, 2, 4 or 8 bits, or 0 if the + * alphabet used is 1 (constant). + * + * If successful, out_meta/out_meta_len are set to hold the mapping table + * to be used during decompression. + * + * Returns the packed buffer on success with new length in out_len, + * NULL of failure + */ +uint8_t *hts_pack(uint8_t *data, int64_t len, + uint8_t *out_meta, int *out_meta_len, uint64_t *out_len) { + int p[256] = {0}, n; + uint64_t i, j; + + // count syms + for (i = 0; i < len; i++) + p[data[i]]=1; + + for (i = n = 0; i < 256; i++) { + if (p[i]) { + p[i] = n++; // p[i] is now the code number + out_meta[n] = i; + } + } + out_meta[0] = n; // 256 wraps to 0 + j = n+1; + + // 1 value per byte + if (n > 16) + return NULL; + + uint8_t *out = malloc(len+1); + if (!out) + return NULL; + + // Work out how many values per byte to encode. + int val_per_byte; + if (n > 4) + val_per_byte = 2; + else if (n > 2) + val_per_byte = 4; + else if (n > 1) + val_per_byte = 8; + else + val_per_byte = 0; // infinite + + *out_meta_len = j; + j = 0; + + switch (val_per_byte) { + case 2: + for (i = 0; i < (len & ~1); i+=2) + out[j++] = (p[data[i]]<<0) | (p[data[i+1]]<<4); + switch (len-i) { + case 1: out[j++] = p[data[i]]; + } + *out_len = j; + return out; + + case 4: { + for (i = 0; i < (len & ~3); i+=4) + out[j++] = (p[data[i]]<<0) | (p[data[i+1]]<<2) | (p[data[i+2]]<<4) | (p[data[i+3]]<<6); + out[j] = 0; + int s = len-i, x = 0; + switch (s) { + case 3: out[j] |= p[data[i++]] << x; x+=2; // fall-through + case 2: out[j] |= p[data[i++]] << x; x+=2; // fall-through + case 1: out[j] |= p[data[i++]] << x; x+=2; + j++; + } + *out_len = j; + return out; + } + + case 8: { + for (i = 0; i < (len & ~7); i+=8) + out[j++] = (p[data[i+0]]<<0) | (p[data[i+1]]<<1) | (p[data[i+2]]<<2) | (p[data[i+3]]<<3) + | (p[data[i+4]]<<4) | (p[data[i+5]]<<5) | (p[data[i+6]]<<6) | (p[data[i+7]]<<7); + out[j] = 0; + int s = len-i, x = 0; + switch (s) { + case 7: out[j] |= p[data[i++]] << x++; // fall-through + case 6: out[j] |= p[data[i++]] << x++; // fall-through + case 5: out[j] |= p[data[i++]] << x++; // fall-through + case 4: out[j] |= p[data[i++]] << x++; // fall-through + case 3: out[j] |= p[data[i++]] << x++; // fall-through + case 2: out[j] |= p[data[i++]] << x++; // fall-through + case 1: out[j] |= p[data[i++]] << x++; + j++; + } + *out_len = j; + return out; + } + + case 0: + *out_len = j; + return out; + } + + return NULL; +} + + +/* + * Unpacks the meta-data portions of the hts_pack algorithm. + * This consists of the count of symbols and their values. + * + * The "map" array is filled out with the used symbols. + * "nsym" is set to contain the number of symbols per byte; + * 0, 1, 2, 4 or 8. + * + * Returns number of bytes of data[] consumed on success, + * zero on failure. + */ +uint8_t hts_unpack_meta(uint8_t *data, uint32_t data_len, + uint64_t udata_len, uint8_t *map, int *nsym) { + if (data_len == 0) + return 0; + + // Number of symbols used + unsigned int n = data[0]; + if (n == 0) + n = 256; + + // Symbols per byte + if (n <= 1) + *nsym = 0; + else if (n <= 2) + *nsym = 8; + else if (n <= 4) + *nsym = 4; + else if (n <= 16) + *nsym = 2; + else { + *nsym = 1; // no packing + return 1; + } + + if (data_len <= 1) + return 0; + + int j = 1, c = 0; + do { + map[c++] = data[j++]; + } while (c < n && j < data_len); + + return c < n ? 0 : j; +} + +/* + * Unpacks a packed data steam (given the unpacked meta-data). + * + * "map" is the pack map, mapping 0->n to the expanded symbols. + * The "out" buffer must be preallocated by the caller to be the correct + * size. For error checking purposes, out_len is set to the size of + * this buffer. + * + * Returns uncompressed data (out) on success, + * NULL on failure. + */ +uint8_t *hts_unpack(uint8_t *data, int64_t len, uint8_t *out, uint64_t out_len, int nsym, uint8_t *p) { + //uint8_t *out; + uint8_t c = 0; + int64_t i, j = 0, olen; + + if (nsym == 1) { + // raw data; FIXME: shortcut the need for malloc & memcpy here + memcpy(out, data, len); + return out; + } + + switch(nsym) { + case 8: { + union { + uint64_t w; + uint8_t c[8]; + } map[256]; + int x; + for (x = 0; x < 256; x++) { + map[x].c[0] = p[x>>0&1]; + map[x].c[1] = p[x>>1&1]; + map[x].c[2] = p[x>>2&1]; + map[x].c[3] = p[x>>3&1]; + map[x].c[4] = p[x>>4&1]; + map[x].c[5] = p[x>>5&1]; + map[x].c[6] = p[x>>6&1]; + map[x].c[7] = p[x>>7&1]; + } + if ((out_len+7)/8 > len) + return NULL; + olen = out_len & ~7; + + for (i = 0; i < olen; i+=8) + memcpy(&out[i], &map[data[j++]].w, 8); + + if (out_len != olen) { + c = data[j++]; + while (i < out_len) { + out[i++] = p[c & 1]; + c >>= 1; + } + } + break; + } + + case 4: { + union { + uint32_t w; + uint8_t c[4]; + } map[256]; + + int x, y, z, _, P=0; + for (x = 0; x < 4; x++) + for (y = 0; y < 4; y++) + for (z = 0; z < 4; z++) + for (_ = 0; _ < 4; _++, P++) { + map[P].c[0] = p[_]; + map[P].c[1] = p[z]; + map[P].c[2] = p[y]; + map[P].c[3] = p[x]; + } + + if ((out_len+3)/4 > len) + return NULL; + olen = out_len & ~3; + + for (i = 0; i < olen-12; i+=16) { + uint32_t w[] = { + map[data[j+0]].w, + map[data[j+1]].w, + map[data[j+2]].w, + map[data[j+3]].w + }; + j += 4; + memcpy(&out[i], &w, 16); + } + + for (; i < olen; i+=4) + memcpy(&out[i], &map[data[j++]].w, 4); + + if (out_len != olen) { + c = data[j++]; + while (i < out_len) { + out[i++] = p[c & 3]; + c >>= 2; + } + } + break; + } + + case 2: { + union { + uint16_t w; + uint8_t c[2]; + } map[256]; + + int x, y; + for (x = 0; x < 16; x++) { + for (y = 0; y < 16; y++) { + map[x*16+y].c[0] = p[y]; + map[x*16+y].c[1] = p[x]; + } + } + + if ((out_len+1)/2 > len) + return NULL; + olen = out_len & ~1; + + for (i = j = 0; i+2 < olen; i+=4) { + uint16_t w[] = { + map[data[j+0]].w, + map[data[j+1]].w + }; + memcpy(&out[i], &w, 4); + + j += 2; + } + + for (; i < olen; i+=2) + memcpy(&out[i], &map[data[j++]].w, 2); + + if (out_len != olen) { + c = data[j++]; + out[i+0] = p[c&15]; + } + break; + } + + case 0: + memset(out, p[0], out_len); + break; + + default: + return NULL; + } + + return out; +} + + +uint8_t *hts_unpack_(uint8_t *data, int64_t len, uint8_t *out, uint64_t out_len, int nsym, uint8_t *p) { + //uint8_t *out; + uint8_t c = 0; + int64_t i, j = 0, olen; + + if (nsym == 1) { + // raw data; FIXME: shortcut the need for malloc & memcpy here + memcpy(out, data, len); + return out; + } + + switch(nsym) { + case 2: { + uint16_t map[256], x, y; + for (x = 0; x < 16; x++) + for (y = 0; y < 16; y++) + map[x*16+y] = p[x]*256+p[y]; + + if ((out_len+1)/2 > len) + return NULL; + olen = out_len & ~1; + + uint16_t *o16 = (uint16_t *)out; + for (i = 0; i+4 < olen/2; i+=4) { + int k; + for (k = 0; k < 4; k++) + o16[i+k] = map[data[i+k]]; + } + j = i; i *= 2; + + for (; i < olen; i+=2) { + uint16_t w1 = map[data[j++]]; + *(uint16_t *)&out[i] = w1; + } + + if (out_len != olen) { + c = data[j++]; + out[i+0] = p[c&15]; + } + break; + } + + default: + return NULL; + } + + return out; +} diff --git a/src/htslib-1.18/htscodecs/htscodecs/pack.h b/src/htslib-1.21/htscodecs/htscodecs/pack.h similarity index 100% rename from src/htslib-1.18/htscodecs/htscodecs/pack.h rename to src/htslib-1.21/htscodecs/htscodecs/pack.h diff --git a/src/htslib-1.18/htscodecs/htscodecs/permute.h b/src/htslib-1.21/htscodecs/htscodecs/permute.h similarity index 100% rename from src/htslib-1.18/htscodecs/htscodecs/permute.h rename to src/htslib-1.21/htscodecs/htscodecs/permute.h diff --git a/src/htslib-1.18/htscodecs/htscodecs/pooled_alloc.h b/src/htslib-1.21/htscodecs/htscodecs/pooled_alloc.h similarity index 100% rename from src/htslib-1.18/htscodecs/htscodecs/pooled_alloc.h rename to src/htslib-1.21/htscodecs/htscodecs/pooled_alloc.h diff --git a/src/htslib-1.18/htscodecs/htscodecs/rANS_byte.h b/src/htslib-1.21/htscodecs/htscodecs/rANS_byte.h similarity index 100% rename from src/htslib-1.18/htscodecs/htscodecs/rANS_byte.h rename to src/htslib-1.21/htscodecs/htscodecs/rANS_byte.h diff --git a/src/htslib-1.21/htscodecs/htscodecs/rANS_static.c b/src/htslib-1.21/htscodecs/htscodecs/rANS_static.c new file mode 100644 index 0000000..1399ee7 --- /dev/null +++ b/src/htslib-1.21/htscodecs/htscodecs/rANS_static.c @@ -0,0 +1,850 @@ +/* + * Copyright (c) 2014-2022 Genome Research Ltd. + * Author(s): James Bonfield + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger + * Institute nor the names of its contributors may be used to endorse + * or promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH + * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +// Use 11 for order-1? +#define TF_SHIFT 12 +#define TOTFREQ (1< +#include +#include +#include +#include +#include +#include +#include +#ifndef NO_THREADS +#include +#endif + +#include "rANS_static.h" + +#define ABS(a) ((a)>0?(a):-(a)) + +/*----------------------------------------------------------------------------- + * Memory to memory compression functions. + * + * These are original versions without any manual loop unrolling. They + * are easier to understand, but can be up to 2x slower. + */ + +static +unsigned char *rans_compress_O0(unsigned char *in, unsigned int in_size, + unsigned int *out_size) { + unsigned char *out_buf = malloc(1.05*in_size + 257*257*3 + 9); + unsigned char *cp, *out_end; + RansEncSymbol syms[256]; + RansState rans0; + RansState rans2; + RansState rans1; + RansState rans3; + uint8_t* ptr; + int F[256+MAGIC] = {0}, i, j, tab_size, rle, x, fsum = 0; + int m = 0, M = 0; + uint64_t tr; + + if (!out_buf) + return NULL; + + ptr = out_end = out_buf + (uint32_t)(1.05*in_size) + 257*257*3 + 9; + + // Compute statistics + if (hist8(in, in_size, (uint32_t *)F) < 0) { + free(out_buf); + return NULL; + } + tr = in_size ? ((uint64_t)TOTFREQ<<31)/in_size + (1<<30)/in_size : 0; + + normalise_harder: + // Normalise so T[i] == TOTFREQ + for (fsum = m = M = j = 0; j < 256; j++) { + if (!F[j]) + continue; + + if (m < F[j]) + m = F[j], M = j; + + if ((F[j] = (F[j]*tr)>>31) == 0) + F[j] = 1; + fsum += F[j]; + } + + fsum++; + if (fsum < TOTFREQ) { + F[M] += TOTFREQ-fsum; + } else if (fsum-TOTFREQ > F[M]/2) { + // Corner case to avoid excessive frequency reduction + tr = 2104533975; goto normalise_harder; // equiv to *0.98. + } else { + F[M] -= fsum-TOTFREQ; + } + + //printf("F[%d]=%d\n", M, F[M]); + assert(F[M]>0); + + // Encode statistics. + cp = out_buf+9; + + for (x = rle = j = 0; j < 256; j++) { + if (F[j]) { + // j + if (rle) { + rle--; + } else { + *cp++ = j; + if (!rle && j && F[j-1]) { + for(rle=j+1; rle<256 && F[rle]; rle++) + ; + rle -= j+1; + *cp++ = rle; + } + //fprintf(stderr, "%d: %d %d\n", j, rle, N[j]); + } + + // F[j] + if (F[j]<128) { + *cp++ = F[j]; + } else { + *cp++ = 128 | (F[j]>>8); + *cp++ = F[j]&0xff; + } + RansEncSymbolInit(&syms[j], x, F[j], TF_SHIFT); + x += F[j]; + } + } + *cp++ = 0; + + //write(2, out_buf+4, cp-(out_buf+4)); + tab_size = cp-out_buf; + + RansEncInit(&rans0); + RansEncInit(&rans1); + RansEncInit(&rans2); + RansEncInit(&rans3); + + switch (i=(in_size&3)) { + case 3: RansEncPutSymbol(&rans2, &ptr, &syms[in[in_size-(i-2)]]); + // fall-through + case 2: RansEncPutSymbol(&rans1, &ptr, &syms[in[in_size-(i-1)]]); + // fall-through + case 1: RansEncPutSymbol(&rans0, &ptr, &syms[in[in_size-(i-0)]]); + // fall-through + case 0: + break; + } + for (i=(in_size &~3); likely(i>0); i-=4) { + RansEncSymbol *s3 = &syms[in[i-1]]; + RansEncSymbol *s2 = &syms[in[i-2]]; + RansEncSymbol *s1 = &syms[in[i-3]]; + RansEncSymbol *s0 = &syms[in[i-4]]; + + RansEncPutSymbol(&rans3, &ptr, s3); + RansEncPutSymbol(&rans2, &ptr, s2); + RansEncPutSymbol(&rans1, &ptr, s1); + RansEncPutSymbol(&rans0, &ptr, s0); + } + + RansEncFlush(&rans3, &ptr); + RansEncFlush(&rans2, &ptr); + RansEncFlush(&rans1, &ptr); + RansEncFlush(&rans0, &ptr); + + // Finalise block size and return it + *out_size = (out_end - ptr) + tab_size; + + cp = out_buf; + + *cp++ = 0; // order + *cp++ = ((*out_size-9)>> 0) & 0xff; + *cp++ = ((*out_size-9)>> 8) & 0xff; + *cp++ = ((*out_size-9)>>16) & 0xff; + *cp++ = ((*out_size-9)>>24) & 0xff; + + *cp++ = (in_size>> 0) & 0xff; + *cp++ = (in_size>> 8) & 0xff; + *cp++ = (in_size>>16) & 0xff; + *cp++ = (in_size>>24) & 0xff; + + memmove(out_buf + tab_size, ptr, out_end-ptr); + + return out_buf; +} + +typedef struct { + unsigned char R[TOTFREQ]; +} ari_decoder; + +static +unsigned char *rans_uncompress_O0(unsigned char *in, unsigned int in_size, + unsigned int *out_size) { + /* Load in the static tables */ + unsigned char *cp = in + 9; + unsigned char *cp_end = in + in_size; + const uint32_t mask = (1u << TF_SHIFT)-1; + int i, j, rle; + unsigned int x, y; + unsigned int out_sz, in_sz; + char *out_buf; + RansState R[4]; + RansState m[4]; + uint16_t sfreq[TOTFREQ+32]; + uint16_t ssym [TOTFREQ+32]; // faster, but only needs uint8_t + uint32_t sbase[TOTFREQ+16]; // faster, but only needs uint16_t + + if (in_size < 26) // Need at least this many bytes just to start + return NULL; + + if (*in++ != 0) // Order-0 check + return NULL; + + in_sz = ((in[0])<<0) | ((in[1])<<8) | ((in[2])<<16) | (((uint32_t)in[3])<<24); + out_sz = ((in[4])<<0) | ((in[5])<<8) | ((in[6])<<16) | (((uint32_t)in[7])<<24); + if (in_sz != in_size-9) + return NULL; + + if (out_sz >= INT_MAX) + return NULL; // protect against some overflow cases + + // For speeding up the fuzzer only. + // Small input can lead to large uncompressed data. + // We reject this as it just slows things up instead of testing more code + // paths (once we've verified a few times for large data). +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (out_sz > 100000) + return NULL; +#endif + + out_buf = malloc(out_sz); + if (!out_buf) + return NULL; + + //fprintf(stderr, "out_sz=%d\n", out_sz); + + // Precompute reverse lookup of frequency. + rle = x = y = 0; + j = *cp++; + do { + int F, C; + if (cp > cp_end - 16) goto cleanup; // Not enough input bytes left + if ((F = *cp++) >= 128) { + F &= ~128; + F = ((F & 127) << 8) | *cp++; + } + C = x; + + if (x + F > TOTFREQ) + goto cleanup; + + for (y = 0; y < F; y++) { + ssym [y + C] = j; + sfreq[y + C] = F; + sbase[y + C] = y; + } + x += F; + + if (!rle && j+1 == *cp) { + j = *cp++; + rle = *cp++; + } else if (rle) { + rle--; + j++; + if (j > 255) + goto cleanup; + } else { + j = *cp++; + } + } while(j); + + if (x < TOTFREQ-1 || x > TOTFREQ) + goto cleanup; + if (x != TOTFREQ) { + // Protection against accessing uninitialised memory in the case + // where SUM(freqs) == 4095 and not 4096. + ssym [x] = ssym [x-1]; + sfreq[x] = sfreq[x-1]; + sbase[x] = sbase[x-1]+1; + } + + // 16 bytes of cp here. Also why cp - 16 in above loop. + if (cp > cp_end - 16) goto cleanup; // Not enough input bytes left + + RansDecInit(&R[0], &cp); if (R[0] < RANS_BYTE_L) goto cleanup; + RansDecInit(&R[1], &cp); if (R[1] < RANS_BYTE_L) goto cleanup; + RansDecInit(&R[2], &cp); if (R[2] < RANS_BYTE_L) goto cleanup; + RansDecInit(&R[3], &cp); if (R[3] < RANS_BYTE_L) goto cleanup; + + int out_end = (out_sz&~3); + cp_end -= 8; // within 8 for simplicity of loop below + // 2 x likely() here harms gcc 7.5 by about 8% rate drop, but only in O2 + for (i=0; likely(i < out_end); i+=4) { + // /curr code + // gcc7 O2 513/497 562/556++ 556/547 ok + // gcc7 O3 566/552 569/553 581/563+ + // gcc10 O2 544/538 563/547 541/537-? + // gcc10 O3 531/519 546/530 575/546+ + // gcc11 O2 512/490 588/540 540/535 mid + // gcc11 O3 482/471 553/541 549/535 + // gcc12 O2 533/526 544/534 539/535 + // gcc12 O3 548/533 502/497-- 553/527 ok + // clang10 555/542 564/549 560/541 + // clang13 560/553 572/559 556/559 + m[0] = R[0] & mask; + R[0] = sfreq[m[0]] * (R[0] >> TF_SHIFT) + sbase[m[0]]; + + m[1] = R[1] & mask; + R[1] = sfreq[m[1]] * (R[1] >> TF_SHIFT) + sbase[m[1]]; + + m[2] = R[2] & mask; + R[2] = sfreq[m[2]] * (R[2] >> TF_SHIFT) + sbase[m[2]]; + + m[3] = R[3] & mask; + R[3] = sfreq[m[3]] * (R[3] >> TF_SHIFT) + sbase[m[3]]; + + // likely() here harms gcc12 -O3 + if (cp>2)]]++; + F[0][in[2*(in_size>>2)]]++; + F[0][in[3*(in_size>>2)]]++; + T[0]+=3; + + + // Normalise so T[i] == TOTFREQ + for (rle_i = i = 0; i < 256; i++) { + int t2, m, M; + unsigned int x; + + if (T[i] == 0) + continue; + + //uint64_t p = (TOTFREQ * TOTFREQ) / t; + double p = ((double)TOTFREQ)/T[i]; + normalise_harder: + for (t2 = m = M = j = 0; j < 256; j++) { + if (!F[i][j]) + continue; + + if (m < F[i][j]) + m = F[i][j], M = j; + + //if ((F[i][j] = (F[i][j] * p) / TOTFREQ) == 0) + if ((F[i][j] *= p) == 0) + F[i][j] = 1; + t2 += F[i][j]; + } + + t2++; + if (t2 < TOTFREQ) { + F[i][M] += TOTFREQ-t2; + } else if (t2-TOTFREQ >= F[i][M]/2) { + // Corner case to avoid excessive frequency reduction + p = .98; goto normalise_harder; + } else { + F[i][M] -= t2-TOTFREQ; + } + + // Store frequency table + // i + if (rle_i) { + rle_i--; + } else { + *cp++ = i; + // FIXME: could use order-0 statistics to observe which alphabet + // symbols are present and base RLE on that ordering instead. + if (i && T[i-1]) { + for(rle_i=i+1; rle_i<256 && T[rle_i]; rle_i++) + ; + rle_i -= i+1; + *cp++ = rle_i; + } + } + + int *F_i_ = F[i]; + x = 0; + rle_j = 0; + for (j = 0; j < 256; j++) { + if (F_i_[j]) { + //fprintf(stderr, "F[%d][%d]=%d, x=%d\n", i, j, F_i_[j], x); + + // j + if (rle_j) { + rle_j--; + } else { + *cp++ = j; + if (!rle_j && j && F_i_[j-1]) { + for(rle_j=j+1; rle_j<256 && F_i_[rle_j]; rle_j++) + ; + rle_j -= j+1; + *cp++ = rle_j; + } + } + + // F_i_[j] + if (F_i_[j]<128) { + *cp++ = F_i_[j]; + } else { + *cp++ = 128 | (F_i_[j]>>8); + *cp++ = F_i_[j]&0xff; + } + + RansEncSymbolInit(&syms[i][j], x, F_i_[j], TF_SHIFT); + x += F_i_[j]; + } + } + *cp++ = 0; + } + *cp++ = 0; + + //write(2, out_buf+4, cp-(out_buf+4)); + tab_size = cp - out_buf; + assert(tab_size < 257*257*3); + + RansState rans0, rans1, rans2, rans3; + RansEncInit(&rans0); + RansEncInit(&rans1); + RansEncInit(&rans2); + RansEncInit(&rans3); + + uint8_t* ptr = out_end; + + int isz4 = in_size>>2; + int i0 = 1*isz4-2; + int i1 = 2*isz4-2; + int i2 = 3*isz4-2; + int i3 = 4*isz4-2; + + unsigned char l0 = in[i0+1]; + unsigned char l1 = in[i1+1]; + unsigned char l2 = in[i2+1]; + unsigned char l3 = in[i3+1]; + + // Deal with the remainder + l3 = in[in_size-1]; + for (i3 = in_size-2; i3 > 4*isz4-2; i3--) { + unsigned char c3 = in[i3]; + RansEncPutSymbol(&rans3, &ptr, &syms[c3][l3]); + l3 = c3; + } + + for (; likely(i0 >= 0); i0--, i1--, i2--, i3--) { + unsigned char c3 = in[i3]; + unsigned char c2 = in[i2]; + unsigned char c1 = in[i1]; + unsigned char c0 = in[i0]; + + RansEncSymbol *s3 = &syms[c3][l3]; + RansEncSymbol *s2 = &syms[c2][l2]; + RansEncSymbol *s1 = &syms[c1][l1]; + RansEncSymbol *s0 = &syms[c0][l0]; + + RansEncPutSymbol4(&rans3, &rans2, &rans1, &rans0, &ptr, + s3, s2, s1, s0); + + l3 = c3; + l2 = c2; + l1 = c1; + l0 = c0; + } + + RansEncPutSymbol(&rans3, &ptr, &syms[0][l3]); + RansEncPutSymbol(&rans2, &ptr, &syms[0][l2]); + RansEncPutSymbol(&rans1, &ptr, &syms[0][l1]); + RansEncPutSymbol(&rans0, &ptr, &syms[0][l0]); + + RansEncFlush(&rans3, &ptr); + RansEncFlush(&rans2, &ptr); + RansEncFlush(&rans1, &ptr); + RansEncFlush(&rans0, &ptr); + + *out_size = (out_end - ptr) + tab_size; + + cp = out_buf; + *cp++ = 1; // order + + *cp++ = ((*out_size-9)>> 0) & 0xff; + *cp++ = ((*out_size-9)>> 8) & 0xff; + *cp++ = ((*out_size-9)>>16) & 0xff; + *cp++ = ((*out_size-9)>>24) & 0xff; + + *cp++ = (in_size>> 0) & 0xff; + *cp++ = (in_size>> 8) & 0xff; + *cp++ = (in_size>>16) & 0xff; + *cp++ = (in_size>>24) & 0xff; + + memmove(out_buf + tab_size, ptr, out_end-ptr); + + cleanup: + htscodecs_tls_free(syms); + + return out_buf; +} + +static +unsigned char *rans_uncompress_O1(unsigned char *in, unsigned int in_size, + unsigned int *out_size) { + /* Load in the static tables */ + unsigned char *cp = in + 9; + unsigned char *ptr_end = in + in_size; + int i, j = -999, rle_i, rle_j; + unsigned int x; + unsigned int out_sz, in_sz; + char *out_buf = NULL; + + // Sanity checking + if (in_size < 27) // Need at least this many bytes to start + return NULL; + + if (*in++ != 1) // Order-1 check + return NULL; + + in_sz = ((in[0])<<0) | ((in[1])<<8) | ((in[2])<<16) | (((uint32_t)in[3])<<24); + out_sz = ((in[4])<<0) | ((in[5])<<8) | ((in[6])<<16) | (((uint32_t)in[7])<<24); + if (in_sz != in_size-9) + return NULL; + + if (out_sz >= INT_MAX) + return NULL; // protect against some overflow cases + + // For speeding up the fuzzer only. + // Small input can lead to large uncompressed data. + // We reject this as it just slows things up instead of testing more code + // paths (once we've verified a few times for large data). +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (out_sz > 100000) + return NULL; +#endif + + // Allocate decoding lookup tables + RansDecSymbol32 (*syms)[256]; + uint8_t *mem = htscodecs_tls_calloc(256, sizeof(ari_decoder) + + sizeof(*syms)); + if (!mem) + return NULL; + ari_decoder *const D = (ari_decoder *)mem; + syms = (RansDecSymbol32 (*)[256])(mem + 256*sizeof(ari_decoder)); + int16_t map[256], map_i = 0; + + memset(map, -1, 256*sizeof(*map)); + + if (!D) goto cleanup; + /* These memsets prevent illegal memory access in syms due to + broken compressed data. As D is calloc'd, all illegal transitions + will end up in either row or column 0 of syms. */ + memset(&syms[0], 0, sizeof(syms[0])); + for (i = 0; i < 256; i++) + memset(&syms[i][0], 0, sizeof(syms[0][0])); + + //fprintf(stderr, "out_sz=%d\n", out_sz); + + //i = *cp++; + rle_i = 0; + i = *cp++; + do { + // Map arbitrary a,b,c to 0,1,2 to improve cache locality. + if (map[i] == -1) + map[i] = map_i++; + int m_i = map[i]; + + rle_j = x = 0; + j = *cp++; + do { + if (map[j] == -1) + map[j] = map_i++; + + int F, C; + if (cp > ptr_end - 16) goto cleanup; // Not enough input bytes left + if ((F = *cp++) >= 128) { + F &= ~128; + F = ((F & 127) << 8) | *cp++; + } + C = x; + + //fprintf(stderr, "i=%d j=%d F=%d C=%d\n", i, j, F, C); + + if (unlikely(!F)) + F = TOTFREQ; + + RansDecSymbolInit32(&syms[m_i][j], C, F); + + /* Build reverse lookup table */ + //if (!D[i].R) D[i].R = (unsigned char *)malloc(TOTFREQ); + if (x + F > TOTFREQ) + goto cleanup; + + memset(&D[m_i].R[x], j, F); + x += F; + + if (!rle_j && j+1 == *cp) { + j = *cp++; + rle_j = *cp++; + } else if (rle_j) { + rle_j--; + j++; + if (j > 255) + goto cleanup; + } else { + j = *cp++; + } + } while(j); + + if (x < TOTFREQ-1 || x > TOTFREQ) + goto cleanup; + if (x < TOTFREQ) // historically we fill 4095, not 4096 + D[i].R[x] = D[i].R[x-1]; + + if (!rle_i && i+1 == *cp) { + i = *cp++; + rle_i = *cp++; + } else if (rle_i) { + rle_i--; + i++; + if (i > 255) + goto cleanup; + } else { + i = *cp++; + } + } while (i); + for (i = 0; i < 256; i++) + if (map[i] == -1) + map[i] = 0; + + RansState rans0, rans1, rans2, rans3; + uint8_t *ptr = cp; + if (cp > ptr_end - 16) goto cleanup; // Not enough input bytes left + RansDecInit(&rans0, &ptr); if (rans0 < RANS_BYTE_L) goto cleanup; + RansDecInit(&rans1, &ptr); if (rans1 < RANS_BYTE_L) goto cleanup; + RansDecInit(&rans2, &ptr); if (rans2 < RANS_BYTE_L) goto cleanup; + RansDecInit(&rans3, &ptr); if (rans3 < RANS_BYTE_L) goto cleanup; + + RansState R[4]; + R[0] = rans0; + R[1] = rans1; + R[2] = rans2; + R[3] = rans3; + + unsigned int isz4 = out_sz>>2; + uint32_t l0 = 0; + uint32_t l1 = 0; + uint32_t l2 = 0; + uint32_t l3 = 0; + + unsigned int i4[] = {0*isz4, 1*isz4, 2*isz4, 3*isz4}; + + /* Allocate output buffer */ + out_buf = malloc(out_sz); + if (!out_buf) goto cleanup; + + uint8_t cc0 = D[map[l0]].R[R[0] & ((1u << TF_SHIFT)-1)]; + uint8_t cc1 = D[map[l1]].R[R[1] & ((1u << TF_SHIFT)-1)]; + uint8_t cc2 = D[map[l2]].R[R[2] & ((1u << TF_SHIFT)-1)]; + uint8_t cc3 = D[map[l3]].R[R[3] & ((1u << TF_SHIFT)-1)]; + + ptr_end -= 8; + for (; likely(i4[0] < isz4); i4[0]++, i4[1]++, i4[2]++, i4[3]++) { + // seq4-head2: file q40b + // O3 O2 + // gcc7 296/291 290/260 + // gcc10 292/292 290/261 + // gcc11 293/293 290/265 + // gcc12 293/290 291/266 + // clang10 293/290 296/272 + // clang13 300/290 290/266 + out_buf[i4[0]] = cc0; + out_buf[i4[1]] = cc1; + out_buf[i4[2]] = cc2; + out_buf[i4[3]] = cc3; + + RansDecSymbol32 s[4] = { + syms[l0][cc0], + syms[l1][cc1], + syms[l2][cc2], + syms[l3][cc3], + }; + RansDecAdvanceStep(&R[0], s[0].start, s[0].freq, TF_SHIFT); + RansDecAdvanceStep(&R[1], s[1].start, s[1].freq, TF_SHIFT); + RansDecAdvanceStep(&R[2], s[2].start, s[2].freq, TF_SHIFT); + RansDecAdvanceStep(&R[3], s[3].start, s[3].freq, TF_SHIFT); + + // Likely here helps speed of high-entropy data by 10-11%, + // but harms low entropy-data speed by 3-4%. + if ((ptr < ptr_end)) { + RansDecRenorm2(&R[0], &R[1], &ptr); + RansDecRenorm2(&R[2], &R[3], &ptr); + } else { + RansDecRenormSafe(&R[0], &ptr, ptr_end+8); + RansDecRenormSafe(&R[1], &ptr, ptr_end+8); + RansDecRenormSafe(&R[2], &ptr, ptr_end+8); + RansDecRenormSafe(&R[3], &ptr, ptr_end+8); + } + + l0 = map[cc0]; + l1 = map[cc1]; + l2 = map[cc2]; + l3 = map[cc3]; + + cc0 = D[l0].R[R[0] & ((1u << TF_SHIFT)-1)]; + cc1 = D[l1].R[R[1] & ((1u << TF_SHIFT)-1)]; + cc2 = D[l2].R[R[2] & ((1u << TF_SHIFT)-1)]; + cc3 = D[l3].R[R[3] & ((1u << TF_SHIFT)-1)]; + } + + // Remainder + for (; i4[3] < out_sz; i4[3]++) { + unsigned char c3 = D[l3].R[RansDecGet(&R[3], TF_SHIFT)]; + out_buf[i4[3]] = c3; + + uint32_t m = R[3] & ((1u << TF_SHIFT)-1); + R[3] = syms[l3][c3].freq * (R[3]>>TF_SHIFT) + m - syms[l3][c3].start; + RansDecRenormSafe(&R[3], &ptr, ptr_end+8); + l3 = map[c3]; + } + + *out_size = out_sz; + + cleanup: + htscodecs_tls_free(D); + + return (unsigned char *)out_buf; +} + +/*----------------------------------------------------------------------------- + * Simple interface to the order-0 vs order-1 encoders and decoders. + */ +unsigned char *rans_compress(unsigned char *in, unsigned int in_size, + unsigned int *out_size, int order) { + if (in_size > INT_MAX) { + *out_size = 0; + return NULL; + } + + return order + ? rans_compress_O1(in, in_size, out_size) + : rans_compress_O0(in, in_size, out_size); +} + +unsigned char *rans_uncompress(unsigned char *in, unsigned int in_size, + unsigned int *out_size) { + /* Both rans_uncompress functions need to be able to read at least 9 + bytes. */ + if (in_size < 9) + return NULL; + return in[0] + ? rans_uncompress_O1(in, in_size, out_size) + : rans_uncompress_O0(in, in_size, out_size); +} diff --git a/src/htslib-1.18/htscodecs/htscodecs/rANS_static.h b/src/htslib-1.21/htscodecs/htscodecs/rANS_static.h similarity index 100% rename from src/htslib-1.18/htscodecs/htscodecs/rANS_static.h rename to src/htslib-1.21/htscodecs/htscodecs/rANS_static.h diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/rANS_static16_int.h b/src/htslib-1.21/htscodecs/htscodecs/rANS_static16_int.h similarity index 100% rename from src/htslib-1.19.1/htscodecs/htscodecs/rANS_static16_int.h rename to src/htslib-1.21/htscodecs/htscodecs/rANS_static16_int.h diff --git a/src/htslib-1.18/htscodecs/htscodecs/rANS_static32x16pr.c b/src/htslib-1.21/htscodecs/htscodecs/rANS_static32x16pr.c similarity index 100% rename from src/htslib-1.18/htscodecs/htscodecs/rANS_static32x16pr.c rename to src/htslib-1.21/htscodecs/htscodecs/rANS_static32x16pr.c diff --git a/src/htslib-1.18/htscodecs/htscodecs/rANS_static32x16pr.h b/src/htslib-1.21/htscodecs/htscodecs/rANS_static32x16pr.h similarity index 100% rename from src/htslib-1.18/htscodecs/htscodecs/rANS_static32x16pr.h rename to src/htslib-1.21/htscodecs/htscodecs/rANS_static32x16pr.h diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/rANS_static32x16pr_avx2.c b/src/htslib-1.21/htscodecs/htscodecs/rANS_static32x16pr_avx2.c similarity index 100% rename from src/htslib-1.19.1/htscodecs/htscodecs/rANS_static32x16pr_avx2.c rename to src/htslib-1.21/htscodecs/htscodecs/rANS_static32x16pr_avx2.c diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/rANS_static32x16pr_avx512.c b/src/htslib-1.21/htscodecs/htscodecs/rANS_static32x16pr_avx512.c similarity index 100% rename from src/htslib-1.19.1/htscodecs/htscodecs/rANS_static32x16pr_avx512.c rename to src/htslib-1.21/htscodecs/htscodecs/rANS_static32x16pr_avx512.c diff --git a/src/htslib-1.18/htscodecs/htscodecs/rANS_static32x16pr_neon.c b/src/htslib-1.21/htscodecs/htscodecs/rANS_static32x16pr_neon.c similarity index 100% rename from src/htslib-1.18/htscodecs/htscodecs/rANS_static32x16pr_neon.c rename to src/htslib-1.21/htscodecs/htscodecs/rANS_static32x16pr_neon.c diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/rANS_static32x16pr_sse4.c b/src/htslib-1.21/htscodecs/htscodecs/rANS_static32x16pr_sse4.c similarity index 100% rename from src/htslib-1.19.1/htscodecs/htscodecs/rANS_static32x16pr_sse4.c rename to src/htslib-1.21/htscodecs/htscodecs/rANS_static32x16pr_sse4.c diff --git a/src/htslib-1.18/htscodecs/htscodecs/rANS_static4x16.h b/src/htslib-1.21/htscodecs/htscodecs/rANS_static4x16.h similarity index 100% rename from src/htslib-1.18/htscodecs/htscodecs/rANS_static4x16.h rename to src/htslib-1.21/htscodecs/htscodecs/rANS_static4x16.h diff --git a/src/htslib-1.21/htscodecs/htscodecs/rANS_static4x16pr.c b/src/htslib-1.21/htscodecs/htscodecs/rANS_static4x16pr.c new file mode 100644 index 0000000..8c9a64a --- /dev/null +++ b/src/htslib-1.21/htscodecs/htscodecs/rANS_static4x16pr.c @@ -0,0 +1,1728 @@ +/* + * Copyright (c) 2017-2023 Genome Research Ltd. + * Author(s): James Bonfield + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger + * Institute nor the names of its contributors may be used to endorse + * or promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH + * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// FIXME Can we get decoder to return the compressed sized read, avoiding +// us needing to store it? Yes we can. See c-size comments. If we added all these +// together we could get rans_uncompress_to_4x16 to return the number of bytes +// consumed, avoiding the calling code from needed to explicitly stored the size. +// However the effect on name tokeniser is to save 0.1 to 0.2% so not worth it. + +/*-------------------------------------------------------------------------- */ +/* + * Example wrapper to use the rans_byte.h functions included above. + * + * This demonstrates how to use, and unroll, an order-0 and order-1 frequency + * model. + */ + +#include "config.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef NO_THREADS +#include +#endif + +#include "rANS_word.h" +#include "rANS_static4x16.h" +#include "rANS_static16_int.h" +#include "pack.h" +#include "rle.h" +#include "utils.h" + +#define TF_SHIFT 12 +#define TOTFREQ (1<>8) & 0xff; + if (!N) N=4; + + order &= 0xff; + unsigned int sz = (order == 0 + ? 1.05*size + 257*3 + 4 + : 1.05*size + 257*257*3 + 4 + 257*3+4) + + ((order & RANS_ORDER_PACK) ? 1 : 0) + + ((order & RANS_ORDER_RLE) ? 1 + 257*3+4: 0) + 20 + + ((order & RANS_ORDER_X32) ? (32-4)*4 : 0) + + ((order & RANS_ORDER_STRIPE) ? 7 + 5*N: 0); + return sz + (sz&1) + 2; // make this even so buffers are word aligned +} + +// Compresses in_size bytes from 'in' to *out_size bytes in 'out'. +// +// NB: The output buffer does not hold the original size, so it is up to +// the caller to store this. +unsigned char *rans_compress_O0_4x16(unsigned char *in, unsigned int in_size, + unsigned char *out, unsigned int *out_size) { + unsigned char *cp, *out_end; + RansEncSymbol syms[256]; + RansState rans0; + RansState rans2; + RansState rans1; + RansState rans3; + uint8_t* ptr; + uint32_t F[256+MAGIC] = {0}; + int i, j, tab_size = 0, rle, x; + // -20 for order/size/meta + uint32_t bound = rans_compress_bound_4x16(in_size,0)-20; + + if (!out) { + *out_size = bound; + out = malloc(*out_size); + } + if (!out || bound > *out_size) + return NULL; + + // If "out" isn't word aligned, tweak out_end/ptr to ensure it is. + // We already added more round in bound to allow for this. + if (((size_t)out)&1) + bound--; + ptr = out_end = out + bound; + + if (in_size == 0) + goto empty; + + // Compute statistics + if (hist8(in, in_size, F) < 0) + return NULL; + + // Normalise so frequences sum to power of 2 + uint32_t fsum = in_size; + uint32_t max_val = round2(fsum); + if (max_val > TOTFREQ) + max_val = TOTFREQ; + + if (normalise_freq(F, fsum, max_val) < 0) + return NULL; + fsum=max_val; + + cp = out; + cp += encode_freq(cp, F); + tab_size = cp-out; + //write(2, out+4, cp-(out+4)); + + if (normalise_freq(F, fsum, TOTFREQ) < 0) + return NULL; + + // Encode statistics. + for (x = rle = j = 0; j < 256; j++) { + if (F[j]) { + RansEncSymbolInit(&syms[j], x, F[j], TF_SHIFT); + x += F[j]; + } + } + + RansEncInit(&rans0); + RansEncInit(&rans1); + RansEncInit(&rans2); + RansEncInit(&rans3); + + switch (i=(in_size&3)) { + case 3: RansEncPutSymbol(&rans2, &ptr, &syms[in[in_size-(i-2)]]); + // fall-through + case 2: RansEncPutSymbol(&rans1, &ptr, &syms[in[in_size-(i-1)]]); + // fall-through + case 1: RansEncPutSymbol(&rans0, &ptr, &syms[in[in_size-(i-0)]]); + // fall-through + case 0: + break; + } + for (i=(in_size &~3); i>0; i-=4) { + RansEncSymbol *s3 = &syms[in[i-1]]; + RansEncSymbol *s2 = &syms[in[i-2]]; + RansEncSymbol *s1 = &syms[in[i-3]]; + RansEncSymbol *s0 = &syms[in[i-4]]; + +#if 1 + RansEncPutSymbol(&rans3, &ptr, s3); + RansEncPutSymbol(&rans2, &ptr, s2); + RansEncPutSymbol(&rans1, &ptr, s1); + RansEncPutSymbol(&rans0, &ptr, s0); +#else + // Slightly beter on gcc, much better on clang + uint16_t *ptr16 = (uint16_t *)ptr; + + if (rans3 >= s3->x_max) *--ptr16 = (uint16_t)rans3, rans3 >>= 16; + if (rans2 >= s2->x_max) *--ptr16 = (uint16_t)rans2, rans2 >>= 16; + uint32_t q3 = (uint32_t) (((uint64_t)rans3 * s3->rcp_freq) >> s3->rcp_shift); + uint32_t q2 = (uint32_t) (((uint64_t)rans2 * s2->rcp_freq) >> s2->rcp_shift); + rans3 += s3->bias + q3 * s3->cmpl_freq; + rans2 += s2->bias + q2 * s2->cmpl_freq; + + if (rans1 >= s1->x_max) *--ptr16 = (uint16_t)rans1, rans1 >>= 16; + if (rans0 >= s0->x_max) *--ptr16 = (uint16_t)rans0, rans0 >>= 16; + uint32_t q1 = (uint32_t) (((uint64_t)rans1 * s1->rcp_freq) >> s1->rcp_shift); + uint32_t q0 = (uint32_t) (((uint64_t)rans0 * s0->rcp_freq) >> s0->rcp_shift); + rans1 += s1->bias + q1 * s1->cmpl_freq; + rans0 += s0->bias + q0 * s0->cmpl_freq; + + ptr = (uint8_t *)ptr16; +#endif + } + + RansEncFlush(&rans3, &ptr); + RansEncFlush(&rans2, &ptr); + RansEncFlush(&rans1, &ptr); + RansEncFlush(&rans0, &ptr); + + empty: + // Finalise block size and return it + *out_size = (out_end - ptr) + tab_size; + + memmove(out + tab_size, ptr, out_end-ptr); + + return out; +} + +unsigned char *rans_uncompress_O0_4x16(unsigned char *in, unsigned int in_size, + unsigned char *out, unsigned int out_sz) { + if (in_size < 16) // 4-states at least + return NULL; + + if (out_sz >= INT_MAX) + return NULL; // protect against some overflow cases + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (out_sz > 100000) + return NULL; +#endif + + /* Load in the static tables */ + unsigned char *cp = in, *out_free = NULL; + unsigned char *cp_end = in + in_size - 8; // within 8 => be extra safe + int i, j; + unsigned int x, y; + uint16_t sfreq[TOTFREQ+32]; + uint16_t sbase[TOTFREQ+32]; // faster to use 32-bit on clang + uint8_t ssym [TOTFREQ+64]; // faster to use 16-bit on clang + + if (!out) + out_free = out = malloc(out_sz); + if (!out) + return NULL; + + // Precompute reverse lookup of frequency. + uint32_t F[256] = {0}, fsum; + int fsz = decode_freq(cp, cp_end, F, &fsum); + if (!fsz) + goto err; + cp += fsz; + + normalise_freq_shift(F, fsum, TOTFREQ); + + // Build symbols; fixme, do as part of decode, see the _d variant + for (j = x = 0; j < 256; j++) { + if (F[j]) { + if (F[j] > TOTFREQ - x) + goto err; + for (y = 0; y < F[j]; y++) { + ssym [y + x] = j; + sfreq[y + x] = F[j]; + sbase[y + x] = y; + } + x += F[j]; + } + } + + if (x != TOTFREQ) + goto err; + + if (cp+16 > cp_end+8) + goto err; + + RansState R[4]; + RansDecInit(&R[0], &cp); if (R[0] < RANS_BYTE_L) goto err; + RansDecInit(&R[1], &cp); if (R[1] < RANS_BYTE_L) goto err; + RansDecInit(&R[2], &cp); if (R[2] < RANS_BYTE_L) goto err; + RansDecInit(&R[3], &cp); if (R[3] < RANS_BYTE_L) goto err; + +// Simple version is comparable to below, but only with -O3 +// +// for (i = 0; cp < cp_end-8 && i < (out_sz&~7); i+=8) { +// for(j=0; j<8;j++) { +// RansState m = RansDecGet(&R[j%4], TF_SHIFT); +// R[j%4] = sfreq[m] * (R[j%4] >> TF_SHIFT) + sbase[m]; +// out[i+j] = ssym[m]; +// RansDecRenorm(&R[j%4], &cp); +// } +// } + + for (i = 0; cp < cp_end-8 && i < (out_sz&~7); i+=8) { + for (j = 0; j < 8; j+=4) { + RansState m0 = RansDecGet(&R[0], TF_SHIFT); + RansState m1 = RansDecGet(&R[1], TF_SHIFT); + out[i+j+0] = ssym[m0]; + out[i+j+1] = ssym[m1]; + + R[0] = sfreq[m0] * (R[0] >> TF_SHIFT) + sbase[m0]; + R[1] = sfreq[m1] * (R[1] >> TF_SHIFT) + sbase[m1]; + + RansState m2 = RansDecGet(&R[2], TF_SHIFT); + RansState m3 = RansDecGet(&R[3], TF_SHIFT); + + RansDecRenorm(&R[0], &cp); + RansDecRenorm(&R[1], &cp); + + R[2] = sfreq[m2] * (R[2] >> TF_SHIFT) + sbase[m2]; + R[3] = sfreq[m3] * (R[3] >> TF_SHIFT) + sbase[m3]; + + RansDecRenorm(&R[2], &cp); + RansDecRenorm(&R[3], &cp); + + out[i+j+2] = ssym[m2]; + out[i+j+3] = ssym[m3]; + } + } + + // remainder + for (; i < out_sz; i++) { + RansState m = RansDecGet(&R[i%4], TF_SHIFT); + R[i%4] = sfreq[m] * (R[i%4] >> TF_SHIFT) + sbase[m]; + out[i] = ssym[m]; + RansDecRenormSafe(&R[i%4], &cp, cp_end+8); + } + + //fprintf(stderr, " 0 Decoded %d bytes\n", (int)(cp-in)); //c-size + + return out; + + err: + free(out_free); + return NULL; +} + +//----------------------------------------------------------------------------- + +// Compute the entropy of 12-bit vs 10-bit frequency tables. +// 10 bit means smaller memory footprint when decoding and +// more speed due to cache hits, but it *may* be a poor +// compression fit. +int rans_compute_shift(uint32_t *F0, uint32_t (*F)[256], uint32_t *T, + uint32_t *S) { + int i, j; + + double e10 = 0, e12 = 0; + int max_tot = 0; + for (i = 0; i < 256; i++) { + if (F0[i] == 0) + continue; + unsigned int max_val = round2(T[i]); + int ns = 0; +#define MAX(a,b) ((a)>(b)?(a):(b)) + + // Number of samples that get their freq bumped to 1 + int sm10 = 0, sm12 = 0; + for (j = 0; j < 256; j++) { + if (F[i][j] && max_val / F[i][j] > TOTFREQ_O1_FAST) + sm10++; + if (F[i][j] && max_val / F[i][j] > TOTFREQ_O1) + sm12++; + } + + double l10 = log(TOTFREQ_O1_FAST + sm10); + double l12 = log(TOTFREQ_O1 + sm12); + double T_slow = (double)TOTFREQ_O1/T[i]; + double T_fast = (double)TOTFREQ_O1_FAST/T[i]; + + for (j = 0; j < 256; j++) { + if (F[i][j]) { + ns++; + + e10 -= F[i][j] * (fast_log(MAX(F[i][j]*T_fast,1)) - l10); + e12 -= F[i][j] * (fast_log(MAX(F[i][j]*T_slow,1)) - l12); + + // Estimation of compressed symbol freq table too. + e10 += 1.3; + e12 += 4.7; + } + } + + // Order-1 frequencies often end up totalling under TOTFREQ. + // In this case it's smaller to output the real frequencies + // prior to normalisation and normalise after (with an extra + // normalisation step needed in the decoder too). + // + // Thus we normalise to a power of 2 only, store those, + // and renormalise later here (and in decoder) by bit-shift + // to get to the fixed size. + if (ns < 64 && max_val > 128) max_val /= 2; + if (max_val > 1024) max_val /= 2; + if (max_val > TOTFREQ_O1) max_val = TOTFREQ_O1; + S[i] = max_val; // scale to max this + if (max_tot < max_val) + max_tot = max_val; + } + int shift = e10/e12 < 1.01 || max_tot <= TOTFREQ_O1_FAST + ? TF_SHIFT_O1_FAST + : TF_SHIFT_O1; + +// fprintf(stderr, "e10/12 = %f %f %f, shift %d\n", +// e10/log(256), e12/log(256), e10/e12, shift); + + return shift; +} + +static +unsigned char *rans_compress_O1_4x16(unsigned char *in, unsigned int in_size, + unsigned char *out, unsigned int *out_size) { + unsigned char *cp, *out_end, *out_free = NULL; + unsigned int tab_size; + + // -20 for order/size/meta + uint32_t bound = rans_compress_bound_4x16(in_size,1)-20; + + if (!out) { + *out_size = bound; + out_free = out = malloc(*out_size); + } + if (!out || bound > *out_size) + return NULL; + + if (((size_t)out)&1) + bound--; + out_end = out + bound; + + RansEncSymbol (*syms)[256] = htscodecs_tls_alloc(256 * (sizeof(*syms))); + if (!syms) { + free(out_free); + return NULL; + } + + cp = out; + int shift = encode_freq1(in, in_size, 4, syms, &cp); + if (shift < 0) { + htscodecs_tls_free(syms); + return NULL; + } + tab_size = cp - out; + + RansState rans0, rans1, rans2, rans3; + RansEncInit(&rans0); + RansEncInit(&rans1); + RansEncInit(&rans2); + RansEncInit(&rans3); + + uint8_t* ptr = out_end; + + int isz4 = in_size>>2; + int i0 = 1*isz4-2; + int i1 = 2*isz4-2; + int i2 = 3*isz4-2; + int i3 = 4*isz4-2; + + unsigned char l0 = in[i0+1]; + unsigned char l1 = in[i1+1]; + unsigned char l2 = in[i2+1]; + unsigned char l3 = in[i3+1]; + + // Deal with the remainder + l3 = in[in_size-1]; + for (i3 = in_size-2; i3 > 4*isz4-2; i3--) { + unsigned char c3 = in[i3]; + RansEncPutSymbol(&rans3, &ptr, &syms[c3][l3]); + l3 = c3; + } + + for (; i0 >= 0; i0--, i1--, i2--, i3--) { + unsigned char c0, c1, c2, c3; + RansEncSymbol *s3 = &syms[c3 = in[i3]][l3]; + RansEncSymbol *s2 = &syms[c2 = in[i2]][l2]; + RansEncSymbol *s1 = &syms[c1 = in[i1]][l1]; + RansEncSymbol *s0 = &syms[c0 = in[i0]][l0]; + + RansEncPutSymbol(&rans3, &ptr, s3); + RansEncPutSymbol(&rans2, &ptr, s2); + RansEncPutSymbol(&rans1, &ptr, s1); + RansEncPutSymbol(&rans0, &ptr, s0); + + l0 = c0; + l1 = c1; + l2 = c2; + l3 = c3; + } + + RansEncPutSymbol(&rans3, &ptr, &syms[0][l3]); + RansEncPutSymbol(&rans2, &ptr, &syms[0][l2]); + RansEncPutSymbol(&rans1, &ptr, &syms[0][l1]); + RansEncPutSymbol(&rans0, &ptr, &syms[0][l0]); + + RansEncFlush(&rans3, &ptr); + RansEncFlush(&rans2, &ptr); + RansEncFlush(&rans1, &ptr); + RansEncFlush(&rans0, &ptr); + + *out_size = (out_end - ptr) + tab_size; + + cp = out; + memmove(out + tab_size, ptr, out_end-ptr); + + htscodecs_tls_free(syms); + return out; +} + +//#define MAGIC2 111 +#define MAGIC2 179 +//#define MAGIC2 0 + +static +unsigned char *rans_uncompress_O1_4x16(unsigned char *in, unsigned int in_size, + unsigned char *out, unsigned int out_sz) { + if (in_size < 16) // 4-states at least + return NULL; + + if (out_sz >= INT_MAX) + return NULL; // protect against some overflow cases + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (out_sz > 100000) + return NULL; +#endif + + /* Load in the static tables */ + unsigned char *cp = in, *cp_end = in+in_size, *out_free = NULL; + unsigned char *c_freq = NULL; + int i, j = -999; + unsigned int x; + + uint8_t *sfb_ = htscodecs_tls_alloc(256*(TOTFREQ_O1+MAGIC2)*sizeof(*sfb_)); + uint32_t (*s3)[TOTFREQ_O1_FAST] = (uint32_t (*)[TOTFREQ_O1_FAST])sfb_; + // reuse the same memory for the fast mode lookup, but this only works + // if we're on e.g. 12-bit freqs vs 10-bit freqs as needs 4x larger array. + //uint32_t s3[256][TOTFREQ_O1_FAST]; + + if (!sfb_) + return NULL; + fb_t (*fb)[256] = htscodecs_tls_alloc(256 * sizeof(*fb)); + if (!fb) + goto err; + uint8_t *sfb[256]; + if ((*cp >> 4) == TF_SHIFT_O1) { + for (i = 0; i < 256; i++) + sfb[i]= sfb_ + i*(TOTFREQ_O1+MAGIC2); + } else { + for (i = 0; i < 256; i++) + sfb[i]= sfb_ + i*(TOTFREQ_O1_FAST+MAGIC2); + } + + if (!out) + out_free = out = malloc(out_sz); + + if (!out) + goto err; + + //fprintf(stderr, "out_sz=%d\n", out_sz); + + // compressed header? If so uncompress it + unsigned char *tab_end = NULL; + unsigned char *c_freq_end = cp_end; + unsigned int shift = *cp >> 4; + if (*cp++ & 1) { + uint32_t u_freq_sz, c_freq_sz; + cp += var_get_u32(cp, cp_end, &u_freq_sz); + cp += var_get_u32(cp, cp_end, &c_freq_sz); + if (c_freq_sz > cp_end - cp) + goto err; + tab_end = cp + c_freq_sz; + if (!(c_freq = rans_uncompress_O0_4x16(cp, c_freq_sz, NULL, u_freq_sz))) + goto err; + cp = c_freq; + c_freq_end = c_freq + u_freq_sz; + } + + // Decode order-0 symbol list; avoids needing in order-1 tables + uint32_t F0[256] = {0}; + int fsz = decode_alphabet(cp, c_freq_end, F0); + if (!fsz) + goto err; + cp += fsz; + + if (cp >= c_freq_end) + goto err; + + const int s3_fast_on = in_size >= 100000; + + for (i = 0; i < 256; i++) { + if (F0[i] == 0) + continue; + + uint32_t F[256] = {0}, T = 0; + fsz = decode_freq_d(cp, c_freq_end, F0, F, &T); + if (!fsz) + goto err; + cp += fsz; + + if (!T) { + //fprintf(stderr, "No freq for F_%d\n", i); + continue; + } + + normalise_freq_shift(F, T, 1< (1< cp_end) + goto err; + + RansState rans0, rans1, rans2, rans3; + uint8_t *ptr = cp, *ptr_end = in + in_size - 8; + RansDecInit(&rans0, &ptr); if (rans0 < RANS_BYTE_L) goto err; + RansDecInit(&rans1, &ptr); if (rans1 < RANS_BYTE_L) goto err; + RansDecInit(&rans2, &ptr); if (rans2 < RANS_BYTE_L) goto err; + RansDecInit(&rans3, &ptr); if (rans3 < RANS_BYTE_L) goto err; + + unsigned int isz4 = out_sz>>2; + int l0 = 0, l1 = 0, l2 = 0, l3 = 0; + unsigned int i4[] = {0*isz4, 1*isz4, 2*isz4, 3*isz4}; + + RansState R[4]; + R[0] = rans0; + R[1] = rans1; + R[2] = rans2; + R[3] = rans3; + + // Around 15% faster to specialise for 10/12 than to have one + // loop with shift as a variable. + if (shift == TF_SHIFT_O1) { + // TF_SHIFT_O1 = 12 + + const uint32_t mask = ((1u << TF_SHIFT_O1)-1); + for (; i4[0] < isz4; i4[0]++, i4[1]++, i4[2]++, i4[3]++) { + uint16_t m, c; + c = sfb[l0][m = R[0] & mask]; + R[0] = fb[l0][c].f * (R[0]>>TF_SHIFT_O1) + m - fb[l0][c].b; + out[i4[0]] = l0 = c; + + c = sfb[l1][m = R[1] & mask]; + R[1] = fb[l1][c].f * (R[1]>>TF_SHIFT_O1) + m - fb[l1][c].b; + out[i4[1]] = l1 = c; + + c = sfb[l2][m = R[2] & mask]; + R[2] = fb[l2][c].f * (R[2]>>TF_SHIFT_O1) + m - fb[l2][c].b; + out[i4[2]] = l2 = c; + + c = sfb[l3][m = R[3] & mask]; + R[3] = fb[l3][c].f * (R[3]>>TF_SHIFT_O1) + m - fb[l3][c].b; + out[i4[3]] = l3 = c; + + if (ptr < ptr_end) { + RansDecRenorm(&R[0], &ptr); + RansDecRenorm(&R[1], &ptr); + RansDecRenorm(&R[2], &ptr); + RansDecRenorm(&R[3], &ptr); + } else { + RansDecRenormSafe(&R[0], &ptr, ptr_end+8); + RansDecRenormSafe(&R[1], &ptr, ptr_end+8); + RansDecRenormSafe(&R[2], &ptr, ptr_end+8); + RansDecRenormSafe(&R[3], &ptr, ptr_end+8); + } + } + + // Remainder + for (; i4[3] < out_sz; i4[3]++) { + uint32_t m3 = R[3] & ((1u<>TF_SHIFT_O1) + m3 - fb[l3][c3].b; + RansDecRenormSafe(&R[3], &ptr, ptr_end + 8); + l3 = c3; + } + } else if (!s3_fast_on) { + // TF_SHIFT_O1 = 10 with sfb[256][1024] & fb[256]256] array lookup + // Slightly faster for -o193 on q4 (high comp), but also less + // initialisation cost for smaller data + const uint32_t mask = ((1u << TF_SHIFT_O1_FAST)-1); + for (; i4[0] < isz4; i4[0]++, i4[1]++, i4[2]++, i4[3]++) { + uint16_t m, c; + c = sfb[l0][m = R[0] & mask]; + R[0] = fb[l0][c].f * (R[0]>>TF_SHIFT_O1_FAST) + m - fb[l0][c].b; + out[i4[0]] = l0 = c; + + c = sfb[l1][m = R[1] & mask]; + R[1] = fb[l1][c].f * (R[1]>>TF_SHIFT_O1_FAST) + m - fb[l1][c].b; + out[i4[1]] = l1 = c; + + c = sfb[l2][m = R[2] & mask]; + R[2] = fb[l2][c].f * (R[2]>>TF_SHIFT_O1_FAST) + m - fb[l2][c].b; + out[i4[2]] = l2 = c; + + c = sfb[l3][m = R[3] & mask]; + R[3] = fb[l3][c].f * (R[3]>>TF_SHIFT_O1_FAST) + m - fb[l3][c].b; + out[i4[3]] = l3 = c; + + if (ptr < ptr_end) { + RansDecRenorm(&R[0], &ptr); + RansDecRenorm(&R[1], &ptr); + RansDecRenorm(&R[2], &ptr); + RansDecRenorm(&R[3], &ptr); + } else { + RansDecRenormSafe(&R[0], &ptr, ptr_end+8); + RansDecRenormSafe(&R[1], &ptr, ptr_end+8); + RansDecRenormSafe(&R[2], &ptr, ptr_end+8); + RansDecRenormSafe(&R[3], &ptr, ptr_end+8); + } + } + + // Remainder + for (; i4[3] < out_sz; i4[3]++) { + uint32_t m3 = R[3] & ((1u<>TF_SHIFT_O1_FAST) + m3 - fb[l3][c3].b; + RansDecRenormSafe(&R[3], &ptr, ptr_end + 8); + l3 = c3; + } + } else { + // TF_SHIFT_O1_FAST. + // Significantly faster for -o1 on q40 (low comp). + // Higher initialisation cost, so only use if big blocks. + const uint32_t mask = ((1u << TF_SHIFT_O1_FAST)-1); + for (; i4[0] < isz4; i4[0]++, i4[1]++, i4[2]++, i4[3]++) { + uint32_t S0 = s3[l0][R[0] & mask]; + uint32_t S1 = s3[l1][R[1] & mask]; + l0 = out[i4[0]] = S0; + l1 = out[i4[1]] = S1; + uint16_t F0 = S0>>(TF_SHIFT_O1_FAST+8); + uint16_t F1 = S1>>(TF_SHIFT_O1_FAST+8); + uint16_t B0 = (S0>>8) & mask; + uint16_t B1 = (S1>>8) & mask; + + R[0] = F0 * (R[0]>>TF_SHIFT_O1_FAST) + B0; + R[1] = F1 * (R[1]>>TF_SHIFT_O1_FAST) + B1; + + uint32_t S2 = s3[l2][R[2] & mask]; + uint32_t S3 = s3[l3][R[3] & mask]; + l2 = out[i4[2]] = S2; + l3 = out[i4[3]] = S3; + uint16_t F2 = S2>>(TF_SHIFT_O1_FAST+8); + uint16_t F3 = S3>>(TF_SHIFT_O1_FAST+8); + uint16_t B2 = (S2>>8) & mask; + uint16_t B3 = (S3>>8) & mask; + + R[2] = F2 * (R[2]>>TF_SHIFT_O1_FAST) + B2; + R[3] = F3 * (R[3]>>TF_SHIFT_O1_FAST) + B3; + + if (ptr < ptr_end) { + RansDecRenorm(&R[0], &ptr); + RansDecRenorm(&R[1], &ptr); + RansDecRenorm(&R[2], &ptr); + RansDecRenorm(&R[3], &ptr); + } else { + RansDecRenormSafe(&R[0], &ptr, ptr_end+8); + RansDecRenormSafe(&R[1], &ptr, ptr_end+8); + RansDecRenormSafe(&R[2], &ptr, ptr_end+8); + RansDecRenormSafe(&R[3], &ptr, ptr_end+8); + } + } + + // Remainder + for (; i4[3] < out_sz; i4[3]++) { + uint32_t S = s3[l3][R[3] & ((1u<>(TF_SHIFT_O1_FAST+8)) * (R[3]>>TF_SHIFT_O1_FAST) + + ((S>>8) & ((1u< + +#if defined(__clang__) && defined(__has_attribute) +# if __has_attribute(unused) +# define UNUSED __attribute__((unused)) +# else +# define UNUSED +# endif +#elif defined(__GNUC__) && __GNUC__ >= 3 +# define UNUSED __attribute__((unused)) +#else +# define UNUSED +#endif + +// CPU detection is performed once. NB this has an assumption that we're +// not migrating between processes with different instruction stes, but +// to date the only systems I know of that support this don't have different +// capabilities (that we use) per core. +#ifndef NO_THREADS +static pthread_once_t rans_cpu_once = PTHREAD_ONCE_INIT; +#endif + +static int have_ssse3 UNUSED = 0; +static int have_sse4_1 UNUSED = 0; +static int have_popcnt UNUSED = 0; +static int have_avx2 UNUSED = 0; +static int have_avx512f UNUSED = 0; +static int is_amd UNUSED = 0; + +#define HAVE_HTSCODECS_TLS_CPU_INIT +static void htscodecs_tls_cpu_init(void) { + unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; + // These may be unused, depending on HAVE_* config.h macros + + int level = __get_cpuid_max(0, NULL); + __cpuid_count(0, 0, eax, ebx, ecx, edx); + is_amd = (ecx == 0x444d4163); + if (level >= 1) { + __cpuid_count(1, 0, eax, ebx, ecx, edx); +#if defined(bit_SSSE3) + have_ssse3 = ecx & bit_SSSE3; +#endif +#if defined(bit_POPCNT) + have_popcnt = ecx & bit_POPCNT; +#endif +#if defined(bit_SSE4_1) + have_sse4_1 = ecx & bit_SSE4_1; +#endif + } + if (level >= 7) { + __cpuid_count(7, 0, eax, ebx, ecx, edx); +#if defined(bit_AVX2) + have_avx2 = ebx & bit_AVX2; +#endif +#if defined(bit_AVX512F) + have_avx512f = ebx & bit_AVX512F; +#endif + } + + if (!have_popcnt) have_avx512f = have_avx2 = have_sse4_1 = 0; + if (!have_ssse3) have_sse4_1 = 0; +} + +static inline +unsigned char *(*rans_enc_func(int do_simd, int order)) + (unsigned char *in, + unsigned int in_size, + unsigned char *out, + unsigned int *out_size) { + + int have_e_sse4_1 = have_sse4_1; + int have_e_avx2 = have_avx2; + int have_e_avx512f = have_avx512f; + + if (!(rans_cpu & RANS_CPU_ENC_AVX512)) have_e_avx512f = 0; + if (!(rans_cpu & RANS_CPU_ENC_AVX2)) have_e_avx2 = 0; + if (!(rans_cpu & RANS_CPU_ENC_SSE4)) have_e_sse4_1 = 0; + + if (!do_simd) { // SIMD disabled + return order & 1 + ? rans_compress_O1_4x16 + : rans_compress_O0_4x16; + } + +#ifdef NO_THREADS + htscodecs_tls_cpu_init(); +#else + int err = pthread_once(&rans_cpu_once, htscodecs_tls_cpu_init); + if (err != 0) { + fprintf(stderr, "Initialising TLS data failed: pthread_once: %s\n", + strerror(err)); + fprintf(stderr, "Using scalar code only\n"); + } +#endif + + if (order & 1) { + // With simulated gathers, the AVX512 is now slower than AVX2, so + // we avoid using it unless asking for the real avx512 gather. + // Note for testing we do -c 0x0404 to enable AVX512 and disable AVX2. + // We then need to call the avx512 func regardless. + int use_gather; +#ifdef USE_GATHER + use_gather = 1; +#else + use_gather = !have_e_avx2; +#endif + +#if defined(HAVE_AVX512) + if (have_e_avx512f && (!is_amd || !have_e_avx2) && use_gather) + return rans_compress_O1_32x16_avx512; +#endif +#if defined(HAVE_AVX2) + if (have_e_avx2) + return rans_compress_O1_32x16_avx2; +#endif +#if defined(HAVE_SSE4_1) && defined(HAVE_SSSE3) && defined(HAVE_POPCNT) + if (have_e_sse4_1) + return rans_compress_O1_32x16; +#endif + return rans_compress_O1_32x16; + } else { +#if defined(HAVE_AVX512) + if (have_e_avx512f && (!is_amd || !have_e_avx2)) + return rans_compress_O0_32x16_avx512; +#endif +#if defined(HAVE_AVX2) + if (have_e_avx2) + return rans_compress_O0_32x16_avx2; +#endif +#if defined(HAVE_SSE4_1) && defined(HAVE_SSSE3) && defined(HAVE_POPCNT) + if (have_e_sse4_1) + return rans_compress_O0_32x16; +#endif + return rans_compress_O0_32x16; + } +} + +static inline +unsigned char *(*rans_dec_func(int do_simd, int order)) + (unsigned char *in, + unsigned int in_size, + unsigned char *out, + unsigned int out_size) { + + int have_d_sse4_1 = have_sse4_1; + int have_d_avx2 = have_avx2; + int have_d_avx512f = have_avx512f; + + if (!(rans_cpu & RANS_CPU_DEC_AVX512)) have_d_avx512f = 0; + if (!(rans_cpu & RANS_CPU_DEC_AVX2)) have_d_avx2 = 0; + if (!(rans_cpu & RANS_CPU_DEC_SSE4)) have_d_sse4_1 = 0; + + if (!do_simd) { // SIMD disabled + return order & 1 + ? rans_uncompress_O1_4x16 + : rans_uncompress_O0_4x16; + } + +#ifdef NO_THREADS + htscodecs_tls_cpu_init(); +#else + int err = pthread_once(&rans_cpu_once, htscodecs_tls_cpu_init); + if (err != 0) { + fprintf(stderr, "Initialising TLS data failed: pthread_once: %s\n", + strerror(err)); + fprintf(stderr, "Using scalar code only\n"); + } +#endif + + if (order & 1) { +#if defined(HAVE_AVX512) + if (have_d_avx512f) + return rans_uncompress_O1_32x16_avx512; +#endif +#if defined(HAVE_AVX2) + if (have_d_avx2) + return rans_uncompress_O1_32x16_avx2; +#endif +#if defined(HAVE_SSE4_1) && defined(HAVE_SSSE3) && defined(HAVE_POPCNT) + if (have_d_sse4_1) + return rans_uncompress_O1_32x16_sse4; +#endif + return rans_uncompress_O1_32x16; + } else { +#if defined(HAVE_AVX512) + if (have_d_avx512f) + return rans_uncompress_O0_32x16_avx512; +#endif +#if defined(HAVE_AVX2) + if (have_d_avx2) + return rans_uncompress_O0_32x16_avx2; +#endif +#if defined(HAVE_SSE4_1) && defined(HAVE_SSSE3) && defined(HAVE_POPCNT) + if (have_d_sse4_1) + return rans_uncompress_O0_32x16_sse4; +#endif + return rans_uncompress_O0_32x16; + } +} + +#elif defined(__ARM_NEON) && defined(__aarch64__) + +#if defined(__linux__) || defined(__FreeBSD__) +#include +#elif defined(_WIN32) +#include +#endif + +static inline int have_neon(void) { +#if defined(__linux__) && defined(__arm__) + return (getauxval(AT_HWCAP) & HWCAP_NEON) != 0; +#elif defined(__linux__) && defined(__aarch64__) && defined(HWCAP_ASIMD) + return (getauxval(AT_HWCAP) & HWCAP_ASIMD) != 0; +#elif defined(__APPLE__) + return 1; +#elif defined(__FreeBSD__) && defined(__arm__) + unsigned long cap; + if (elf_aux_info(AT_HWCAP, &cap, sizeof cap) != 0) return 0; + return (cap & HWCAP_NEON) != 0; +#elif defined(__FreeBSD__) && defined(__aarch64__) && defined(HWCAP_ASIMD) + unsigned long cap; + if (elf_aux_info(AT_HWCAP, &cap, sizeof cap) != 0) return 0; + return (cap & HWCAP_ASIMD) != 0; +#elif defined(_WIN32) + return IsProcessorFeaturePresent(PF_ARM_V8_INSTRUCTIONS_AVAILABLE) != 0; +#else + return 0; +#endif +} + +static inline +unsigned char *(*rans_enc_func(int do_simd, int order)) + (unsigned char *in, + unsigned int in_size, + unsigned char *out, + unsigned int *out_size) { + + if (do_simd) { + if ((rans_cpu & RANS_CPU_ENC_NEON) && have_neon()) + return order & 1 + ? rans_compress_O1_32x16_neon + : rans_compress_O0_32x16_neon; + else + return order & 1 + ? rans_compress_O1_32x16 + : rans_compress_O0_32x16; + } else { + return order & 1 + ? rans_compress_O1_4x16 + : rans_compress_O0_4x16; + } +} + +static inline +unsigned char *(*rans_dec_func(int do_simd, int order)) + (unsigned char *in, + unsigned int in_size, + unsigned char *out, + unsigned int out_size) { + + if (do_simd) { + if ((rans_cpu & RANS_CPU_DEC_NEON) && have_neon()) + return order & 1 + ? rans_uncompress_O1_32x16_neon + : rans_uncompress_O0_32x16_neon; + else + return order & 1 + ? rans_uncompress_O1_32x16 + : rans_uncompress_O0_32x16; + } else { + return order & 1 + ? rans_uncompress_O1_4x16 + : rans_uncompress_O0_4x16; + } +} + +#else // !(defined(__GNUC__) && defined(__x86_64__)) && !defined(__ARM_NEON) + +static inline +unsigned char *(*rans_enc_func(int do_simd, int order)) + (unsigned char *in, + unsigned int in_size, + unsigned char *out, + unsigned int *out_size) { + + if (do_simd) { + return order & 1 + ? rans_compress_O1_32x16 + : rans_compress_O0_32x16; + } else { + return order & 1 + ? rans_compress_O1_4x16 + : rans_compress_O0_4x16; + } +} + +static inline +unsigned char *(*rans_dec_func(int do_simd, int order)) + (unsigned char *in, + unsigned int in_size, + unsigned char *out, + unsigned int out_size) { + + if (do_simd) { + return order & 1 + ? rans_uncompress_O1_32x16 + : rans_uncompress_O0_32x16; + } else { + return order & 1 + ? rans_uncompress_O1_4x16 + : rans_uncompress_O0_4x16; + } +} + +#endif + +// Test interface for restricting the auto-detection methods so we +// can forcibly compare different implementations on the same machine. +// See RANS_CPU_ defines in rANS_static4x16.h +void rans_set_cpu(int opts) { + rans_cpu = opts; +#ifdef HAVE_HTSCODECS_TLS_CPU_INIT + htscodecs_tls_cpu_init(); +#endif +} + +/*----------------------------------------------------------------------------- + * Simple interface to the order-0 vs order-1 encoders and decoders. + * + * Smallest is method, , so worst case 2 bytes longer. + */ +unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, + unsigned char *out,unsigned int *out_size, + int order) { + if (in_size > INT_MAX) { + *out_size = 0; + return NULL; + } + + unsigned int c_meta_len; + uint8_t *meta = NULL, *rle = NULL, *packed = NULL; + uint8_t *out_free = NULL; + + if (!out) { + *out_size = rans_compress_bound_4x16(in_size, order); + if (*out_size == 0) + return NULL; + if (!(out_free = out = malloc(*out_size))) + return NULL; + } + + unsigned char *out_end = out + *out_size; + + // Permit 32-way unrolling for large blocks, paving the way for + // AVX2 and AVX512 SIMD variants. + if ((order & RANS_ORDER_SIMD_AUTO) && in_size >= 50000 + && !(order & RANS_ORDER_STRIPE)) + order |= X_32; + + if (in_size <= 20) + order &= ~RANS_ORDER_STRIPE; +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (in_size <= 1000) + order &= ~RANS_ORDER_X32; +#endif + if (order & RANS_ORDER_STRIPE) { + int N = (order>>8) & 0xff; + if (N == 0) N = 4; // default for compatibility with old tests + + unsigned char *transposed = malloc(in_size); + unsigned int part_len[256]; + unsigned int idx[256]; + if (!transposed) { + free(out_free); + return NULL; + } + int i, j, x; + + for (i = 0; i < N; i++) { + part_len[i] = in_size / N + ((in_size % N) > i); + idx[i] = i ? idx[i-1] + part_len[i-1] : 0; // cumulative index + } + +#define KN 8 + i = x = 0; + if (in_size >= N*KN) { + for (; i < in_size-N*KN;) { + int k; + unsigned char *ink = in+i; + for (j = 0; j < N; j++) + for (k = 0; k < KN; k++) + transposed[idx[j]+x+k] = ink[j+N*k]; + x += KN; i+=N*KN; + } + } +#undef KN + for (; i < in_size-N; i += N, x++) { + for (j = 0; j < N; j++) + transposed[idx[j]+x] = in[i+j]; + } + + for (; i < in_size; i += N, x++) { + for (j = 0; i+j < in_size; j++) + transposed[idx[j]+x] = in[i+j]; + } + + unsigned int olen2; + unsigned char *out2, *out2_start; + c_meta_len = 1; + *out = order & ~RANS_ORDER_NOSZ; + c_meta_len += var_put_u32(out+c_meta_len, out_end, in_size); + out[c_meta_len++] = N; + + unsigned char *out_best = NULL; + unsigned int out_best_len = 0; + + out2_start = out2 = out+7+5*N; // shares a buffer with c_meta + for (i = 0; i < N; i++) { + // Brute force try all methods. + int j, m[] = {1,64,128,0}, best_j = 0, best_sz = in_size+10; + for (j = 0; j < sizeof(m)/sizeof(*m); j++) { + if ((order & m[j]) != m[j]) + continue; + + // order-1 *only*; bit check above cannot elide order-0 + if ((order & RANS_ORDER_STRIPE_NO0) && (m[j]&1) == 0) + continue; + olen2 = *out_size - (out2 - out); + rans_compress_to_4x16(transposed+idx[i], part_len[i], + out2, &olen2, + m[j] | RANS_ORDER_NOSZ + | (order&RANS_ORDER_X32)); + if (best_sz > olen2) { + best_sz = olen2; + best_j = j; + if (j < sizeof(m)/sizeof(*m) && olen2 > out_best_len) { + unsigned char *tmp = realloc(out_best, olen2); + if (!tmp) { + free(out_free); + return NULL; + } + out_best = tmp; + out_best_len = olen2; + } + + // Cache a copy of the best so far + memcpy(out_best, out2, olen2); + } + } + if (best_j < sizeof(m)/sizeof(*m)) { + // Copy the best compression to output buffer if not current + memcpy(out2, out_best, best_sz); + olen2 = best_sz; + } + + out2 += olen2; + c_meta_len += var_put_u32(out+c_meta_len, out_end, olen2); + } + if (out_best) + free(out_best); + + memmove(out+c_meta_len, out2_start, out2-out2_start); + free(transposed); + *out_size = c_meta_len + out2-out2_start; + return out; + } + + if (order & RANS_ORDER_CAT) { + out[0] = RANS_ORDER_CAT; + c_meta_len = 1; + c_meta_len += var_put_u32(&out[1], out_end, in_size); + if (in_size) + memcpy(out+c_meta_len, in, in_size); + *out_size = c_meta_len + in_size; + return out; + } + + int do_pack = order & RANS_ORDER_PACK; + int do_rle = order & RANS_ORDER_RLE; + int no_size = order & RANS_ORDER_NOSZ; + int do_simd = order & RANS_ORDER_X32; + + out[0] = order; + c_meta_len = 1; + + if (!no_size) + c_meta_len += var_put_u32(&out[1], out_end, in_size); + + order &= 3; + + // Format is compressed meta-data, compressed data. + // Meta-data can be empty, pack, rle lengths, or pack + rle lengths. + // Data is either the original data, bit-packed packed, rle literals or + // packed + rle literals. + + if (do_pack && in_size) { + // PACK 2, 4 or 8 symbols into one byte. + int pmeta_len; + uint64_t packed_len; + packed = hts_pack(in, in_size, out+c_meta_len, &pmeta_len, &packed_len); + if (!packed) { + out[0] &= ~RANS_ORDER_PACK; + do_pack = 0; + free(packed); + packed = NULL; + } else { + in = packed; + in_size = packed_len; + c_meta_len += pmeta_len; + + // Could derive this rather than storing verbatim. + // Orig size * 8/nbits (+1 if not multiple of 8/n) + int sz = var_put_u32(out+c_meta_len, out_end, in_size); + c_meta_len += sz; + *out_size -= sz; + } + } else if (do_pack) { + out[0] &= ~RANS_ORDER_PACK; + } + + if (do_rle && in_size) { + // RLE 'in' -> rle_length + rle_literals arrays + unsigned int rmeta_len, c_rmeta_len; + uint64_t rle_len; + c_rmeta_len = in_size+257; + if (!(meta = malloc(c_rmeta_len))) { + free(out_free); + return NULL; + } + + uint8_t rle_syms[256]; + int rle_nsyms = 0; + uint64_t rmeta_len64; + rle = hts_rle_encode(in, in_size, meta, &rmeta_len64, + rle_syms, &rle_nsyms, NULL, &rle_len); + memmove(meta+1+rle_nsyms, meta, rmeta_len64); + meta[0] = rle_nsyms; + memcpy(meta+1, rle_syms, rle_nsyms); + rmeta_len = rmeta_len64 + rle_nsyms+1; + + if (!rle || rle_len + rmeta_len >= .99*in_size) { + // Not worth the speed hit. + out[0] &= ~RANS_ORDER_RLE; + do_rle = 0; + free(rle); + rle = NULL; + } else { + // Compress lengths with O0 and literals with O0/O1 ("order" param) + int sz = var_put_u32(out+c_meta_len, out_end, rmeta_len*2), sz2; + sz += var_put_u32(out+c_meta_len+sz, out_end, rle_len); + c_rmeta_len = *out_size - (c_meta_len+sz+5); + rans_enc_func(do_simd, 0)(meta, rmeta_len, out+c_meta_len+sz+5, &c_rmeta_len); + if (c_rmeta_len < rmeta_len) { + sz2 = var_put_u32(out+c_meta_len+sz, out_end, c_rmeta_len); + memmove(out+c_meta_len+sz+sz2, out+c_meta_len+sz+5, c_rmeta_len); + } else { + // Uncompressed RLE meta-data as too small + sz = var_put_u32(out+c_meta_len, out_end, rmeta_len*2+1); + sz2 = var_put_u32(out+c_meta_len+sz, out_end, rle_len); + memcpy(out+c_meta_len+sz+sz2, meta, rmeta_len); + c_rmeta_len = rmeta_len; + } + + c_meta_len += sz + sz2 + c_rmeta_len; + + in = rle; + in_size = rle_len; + } + + free(meta); + } else if (do_rle) { + out[0] &= ~RANS_ORDER_RLE; + } + + *out_size -= c_meta_len; + if (order && in_size < 8) { + out[0] &= ~1; + order &= ~1; + } + + rans_enc_func(do_simd, order)(in, in_size, out+c_meta_len, out_size); + + if (*out_size >= in_size) { + out[0] &= ~3; + out[0] |= RANS_ORDER_CAT | no_size; + if (in_size) + memcpy(out+c_meta_len, in, in_size); + *out_size = in_size; + } + + free(rle); + free(packed); + + *out_size += c_meta_len; + + return out; +} + +unsigned char *rans_compress_4x16(unsigned char *in, unsigned int in_size, + unsigned int *out_size, int order) { + return rans_compress_to_4x16(in, in_size, NULL, out_size, order); +} + +unsigned char *rans_uncompress_to_4x16(unsigned char *in, unsigned int in_size, + unsigned char *out, unsigned int *out_size) { + unsigned char *in_end = in + in_size; + unsigned char *out_free = NULL, *tmp_free = NULL, *meta_free = NULL; + + if (in_size == 0) + return NULL; + + if (*in & RANS_ORDER_STRIPE) { + unsigned int ulen, olen, c_meta_len = 1; + int i; + uint64_t clen_tot = 0; + + // Decode lengths + c_meta_len += var_get_u32(in+c_meta_len, in_end, &ulen); + if (c_meta_len >= in_size) + return NULL; + unsigned int N = in[c_meta_len++]; + if (N < 1) // Must be at least one stripe + return NULL; + unsigned int clenN[256], ulenN[256], idxN[256]; + if (!out) { + if (ulen >= INT_MAX) + return NULL; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (ulen > 100000) + return NULL; +#endif + if (!(out_free = out = malloc(ulen))) { + return NULL; + } + *out_size = ulen; + } + if (ulen != *out_size) { + free(out_free); + return NULL; + } + + for (i = 0; i < N; i++) { + ulenN[i] = ulen / N + ((ulen % N) > i); + idxN[i] = i ? idxN[i-1] + ulenN[i-1] : 0; + c_meta_len += var_get_u32(in+c_meta_len, in_end, &clenN[i]); + clen_tot += clenN[i]; + if (c_meta_len > in_size || clenN[i] > in_size || clenN[i] < 1) { + free(out_free); + return NULL; + } + } + + // We can call this with a larger buffer, but once we've determined + // how much we really use we limit it so the recursion becomes easier + // to limit. + if (c_meta_len + clen_tot > in_size) { + free(out_free); + return NULL; + } + in_size = c_meta_len + clen_tot; + + //fprintf(stderr, " stripe meta %d\n", c_meta_len); //c-size + + // Uncompress the N streams + unsigned char *outN = malloc(ulen); + if (!outN) { + free(out_free); + return NULL; + } + for (i = 0; i < N; i++) { + olen = ulenN[i]; + if (in_size < c_meta_len) { + free(out_free); + free(outN); + return NULL; + } + if (!rans_uncompress_to_4x16(in+c_meta_len, in_size-c_meta_len, outN + idxN[i], &olen) + || olen != ulenN[i]) { + free(out_free); + free(outN); + return NULL; + } + c_meta_len += clenN[i]; + } + + unstripe(out, outN, ulen, N, idxN); + + free(outN); + *out_size = ulen; + return out; + } + + int order = *in++; in_size--; + int do_pack = order & RANS_ORDER_PACK; + int do_rle = order & RANS_ORDER_RLE; + int do_cat = order & RANS_ORDER_CAT; + int no_size = order & RANS_ORDER_NOSZ; + int do_simd = order & RANS_ORDER_X32; + order &= 1; + + int sz = 0; + unsigned int osz; + if (!no_size) { + sz = var_get_u32(in, in_end, &osz); + } else + sz = 0, osz = *out_size; + in += sz; + in_size -= sz; + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (osz > 100000) + return NULL; +#endif + + if (no_size && !out) + goto err; // Need one or the other + + if (!out) { + *out_size = osz; + if (!(out = out_free = malloc(*out_size))) + return NULL; + } else { + if (*out_size < osz) + goto err; + *out_size = osz; + } + +// if (do_pack || do_rle) { +// in += sz; // size field not needed when pure rANS +// in_size -= sz; +// } + + uint32_t c_meta_size = 0; + unsigned int tmp1_size = *out_size; + unsigned int tmp2_size = *out_size; + unsigned int tmp3_size = *out_size; + unsigned char *tmp1 = NULL, *tmp2 = NULL, *tmp3 = NULL, *tmp = NULL; + + // Need In, Out and Tmp buffers with temporary buffer of the same size + // as output. All use rANS, but with optional transforms (none, RLE, + // Pack, or both). + // + // rans unrle unpack + // If none: in -> out + // If RLE: in -> tmp -> out + // If Pack: in -> tmp -> out + // If RLE+Pack: in -> out -> tmp -> out + // tmp1 tmp2 tmp3 + // + // So rans is in -> tmp1 + // RLE is tmp1 -> tmp2 + // Unpack is tmp2 -> tmp3 + + // Format is meta data (Pack and RLE in that order if present), + // followed by rANS compressed data. + + if (do_pack || do_rle) { + if (!(tmp = tmp_free = malloc(*out_size))) + goto err; + if (do_pack && do_rle) { + tmp1 = out; + tmp2 = tmp; + tmp3 = out; + } else if (do_pack) { + tmp1 = tmp; + tmp2 = tmp1; + tmp3 = out; + } else if (do_rle) { + tmp1 = tmp; + tmp2 = out; + tmp3 = out; + } + } else { + // neither + tmp = NULL; + tmp1 = out; + tmp2 = out; + tmp3 = out; + } + + // Decode the bit-packing map. + uint8_t map[16] = {0}; + int npacked_sym = 0; + uint64_t unpacked_sz = 0; // FIXME: rename to packed_per_byte + if (do_pack) { + c_meta_size = hts_unpack_meta(in, in_size, *out_size, map, &npacked_sym); + if (c_meta_size == 0) + goto err; + + unpacked_sz = osz; + in += c_meta_size; + in_size -= c_meta_size; + + // New unpacked size. We could derive this bit from *out_size + // and npacked_sym. + unsigned int osz; + sz = var_get_u32(in, in_end, &osz); + in += sz; + in_size -= sz; + if (osz > tmp1_size) + goto err; + tmp1_size = osz; + } + + uint8_t *meta = NULL; + uint32_t u_meta_size = 0; + if (do_rle) { + // Uncompress meta data + uint32_t c_meta_size, rle_len, sz; + sz = var_get_u32(in, in_end, &u_meta_size); + sz += var_get_u32(in+sz, in_end, &rle_len); + if (rle_len > tmp1_size) // should never grow + goto err; + if (u_meta_size & 1) { + meta = in + sz; + u_meta_size = u_meta_size/2 > (in_end-meta) ? (in_end-meta) : u_meta_size/2; + c_meta_size = u_meta_size; + } else { + sz += var_get_u32(in+sz, in_end, &c_meta_size); + u_meta_size /= 2; + + meta_free = meta = rans_dec_func(do_simd, 0)(in+sz, in_size-sz, NULL, u_meta_size); + if (!meta) + goto err; + } + if (c_meta_size+sz > in_size) + goto err; + in += c_meta_size+sz; + in_size -= c_meta_size+sz; + tmp1_size = rle_len; + } + //fprintf(stderr, " meta_size %d bytes\n", (int)(in - orig_in)); //c-size + + // uncompress RLE data. in -> tmp1 + if (in_size) { + if (do_cat) { + //fprintf(stderr, " CAT %d\n", tmp1_size); //c-size + if (tmp1_size > in_size) + goto err; + if (tmp1_size > *out_size) + goto err; + memcpy(tmp1, in, tmp1_size); + } else { + tmp1 = rans_dec_func(do_simd, order)(in, in_size, tmp1, tmp1_size); + if (!tmp1) + goto err; + } + } else { + tmp1_size = 0; + } + tmp2_size = tmp3_size = tmp1_size; + + if (do_rle) { + // Unpack RLE. tmp1 -> tmp2. + if (u_meta_size == 0) + goto err; + uint64_t unrle_size = *out_size; + int rle_nsyms = *meta ? *meta : 256; + if (u_meta_size < 1+rle_nsyms) + goto err; + if (!hts_rle_decode(tmp1, tmp1_size, + meta+1+rle_nsyms, u_meta_size-(1+rle_nsyms), + meta+1, rle_nsyms, tmp2, &unrle_size)) + goto err; + tmp3_size = tmp2_size = unrle_size; + free(meta_free); + meta_free = NULL; + } + if (do_pack) { + // Unpack bits via pack-map. tmp2 -> tmp3 + if (npacked_sym == 1) + unpacked_sz = tmp2_size; + //uint8_t *porig = unpack(tmp2, tmp2_size, unpacked_sz, npacked_sym, map); + //memcpy(tmp3, porig, unpacked_sz); + if (!hts_unpack(tmp2, tmp2_size, tmp3, unpacked_sz, npacked_sym, map)) + goto err; + tmp3_size = unpacked_sz; + } + + if (tmp) + free(tmp); + + *out_size = tmp3_size; + return tmp3; + + err: + free(meta_free); + free(out_free); + free(tmp_free); + return NULL; +} + +unsigned char *rans_uncompress_4x16(unsigned char *in, unsigned int in_size, + unsigned int *out_size) { + return rans_uncompress_to_4x16(in, in_size, NULL, out_size); +} diff --git a/src/htslib-1.18/htscodecs/htscodecs/rANS_word.h b/src/htslib-1.21/htscodecs/htscodecs/rANS_word.h similarity index 100% rename from src/htslib-1.18/htscodecs/htscodecs/rANS_word.h rename to src/htslib-1.21/htscodecs/htscodecs/rANS_word.h diff --git a/src/htslib-1.18/htscodecs/htscodecs/rle.c b/src/htslib-1.21/htscodecs/htscodecs/rle.c similarity index 100% rename from src/htslib-1.18/htscodecs/htscodecs/rle.c rename to src/htslib-1.21/htscodecs/htscodecs/rle.c diff --git a/src/htslib-1.18/htscodecs/htscodecs/rle.h b/src/htslib-1.21/htscodecs/htscodecs/rle.h similarity index 100% rename from src/htslib-1.18/htscodecs/htscodecs/rle.h rename to src/htslib-1.21/htscodecs/htscodecs/rle.h diff --git a/src/htslib-1.21/htscodecs/htscodecs/tokenise_name3.c b/src/htslib-1.21/htscodecs/htscodecs/tokenise_name3.c new file mode 100644 index 0000000..7493579 --- /dev/null +++ b/src/htslib-1.21/htscodecs/htscodecs/tokenise_name3.c @@ -0,0 +1,1819 @@ +/* + * Copyright (c) 2016-2022 Genome Research Ltd. + * Author(s): James Bonfield + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger + * Institute nor the names of its contributors may be used to endorse + * or promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH + * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// cc -O3 -g -DTEST_TOKENISER tokenise_name3.c arith_dynamic.c rANS_static4x16pr.c pooled_alloc.c -I.. -I. -lbz2 -pthread + +// Name tokeniser. +// It generates a series of byte streams (per token) and compresses these +// either using static rANS or dynamic arithmetic coding. Arith coding is +// typically 1-5% smaller, but around 50-100% slower. We only envisage it +// being used at the higher compression levels. + +// TODO +// +// - Is it better when encoding 1, 2, 3, 3, 4, 5, 5, 6, 7, 9, 9, 10 to encode +// this as a mixture of MATCH and DELTA ops, or as entirely as DELTA ops +// with some delta values being zero? I suspect the latter, but it is +// not implemented here. See "last_token_delta" comments in code. +// +// - Consider variable size string implementations. +// Pascal style strings (length + str), +// C style strings (nul terminated), +// Or split blocks: length block and string contents block. +// +// - Is this one token-block or many serialised token-blocks? +// A) Lots of different models but feeding one bit-buffer emitted to +// by the entropy encoder => one block (fqzcomp). +// B) Lots of different models each feeding their own bit-buffers +// => many blocks. +// +// - multiple integer types depending on size; 1, 2, 4 byte long. +// +// - Consider token choice for isalnum instead of isalpha. Sometimes better. +// +// - Consider token synchronisation (eg on matching chr symbols?) incase of +// variable number. Eg consider foo:0999, foo:1000, foo:1001 (the leading +// zero adds an extra token). +// +// - Optimisation of tokens. Eg: +// HS25_09827:2:2102:11274:80442#49 +// HS25_09827:2:2109:12941:31311#49 +// +// We'll have tokens for HS 25 _ 09827 : 2 : that are entirely +// after the initial token. These 7 tokens could be one ALPHA instead +// of 7 distinct tokens, with 1 MATCH instead of 7. This is both a speed +// improvement for decoding as well as a space saving (fewer token-blocks +// and associated overhead). +// +// - XOR. Like ALPHA, but used when previous symbol is ALPHA or XOR +// and string lengths match. Useful when names are similar, eg: +// the sequence in 07.names: +// +// @VP2-06:112:H7LNDMCVY:1:1105:26919:1172 1:N:0:ATTCAGAA+AGGAGAAG +// @VP2-06:112:H7LNDMCVY:1:1105:27100:1172 1:N:0:ATTCAGAA+AGGCGAAG +// @VP2-06:112:H7LNDMCVY:1:1105:27172:1172 1:N:0:ATTCAGAA+AGGCTAAG + +#include "config.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pooled_alloc.h" +#include "arith_dynamic.h" +#include "rANS_static4x16.h" +#include "tokenise_name3.h" +#include "varint.h" +#include "utils.h" + +// 128 is insufficient for SAM names (max 256 bytes) as +// we may alternate a0a0a0a0a0 etc. However if we fail, +// we just give up and switch to another codec, so this +// isn't a serious limit. Maybe up to 256 to permit all +// SAM names? +#define MAX_TOKENS 128 +#define MAX_TBLOCKS (MAX_TOKENS<<4) + +// Number of names per block +#define MAX_NAMES 1000000 + +enum name_type {N_ERR = -1, N_TYPE = 0, N_ALPHA, N_CHAR, N_DIGITS0, N_DZLEN, N_DUP, N_DIFF, + N_DIGITS, N_DDELTA, N_DDELTA0, N_MATCH, N_NOP, N_END, N_ALL}; + +typedef struct trie { + struct trie *next, *sibling; + int count; + uint32_t c:8; + uint32_t n:24; // Nth line +} trie_t; + +typedef struct { + enum name_type token_type; + int token_int; + int token_str; +} last_context_tok; + +typedef struct { + char *last_name; + int last_ntok; + last_context_tok *last; // [last_ntok] +} last_context; + +typedef struct { + uint8_t *buf; + size_t buf_a, buf_l; // alloc and used length. + int tnum, ttype; + int dup_from; +} descriptor; + +typedef struct { + last_context *lc; + + // For finding entire line dups + int counter; + + // Trie used in encoder only + trie_t *t_head; + pool_alloc_t *pool; + + // token blocks + descriptor desc[MAX_TBLOCKS]; + + // summary stats per token + int token_dcount[MAX_TOKENS]; + int token_icount[MAX_TOKENS]; + //int token_zcount[MAX_TOKENS]; + + int max_tok; // tracks which desc/[id]count elements have been initialised + int max_names; +} name_context; + +static name_context *create_context(int max_names) { + if (max_names <= 0) + return NULL; + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (max_names > 100000) + return NULL; +#endif + + // An arbitrary limit to prevent malformed data from consuming excessive + // amounts of memory. Consider upping this if we have genuine use cases + // for larger blocks. + if (max_names > 1e7) { + fprintf(stderr, "Name codec currently has a max of 10 million rec.\n"); + return NULL; + } + + name_context *ctx = htscodecs_tls_alloc(sizeof(*ctx) + + ++max_names*sizeof(*ctx->lc)); + if (!ctx) return NULL; + ctx->max_names = max_names; + + ctx->counter = 0; + ctx->t_head = NULL; + + ctx->lc = (last_context *)(((char *)ctx) + sizeof(*ctx)); + ctx->pool = NULL; + + memset(&ctx->desc[0], 0, 2*16 * sizeof(ctx->desc[0])); + memset(&ctx->token_dcount[0], 0, sizeof(int)); + memset(&ctx->token_icount[0], 0, sizeof(int)); + memset(&ctx->lc[0], 0, max_names*sizeof(ctx->lc[0])); + ctx->max_tok = 1; + + ctx->lc[0].last_ntok = 0; + + return ctx; +} + +static void free_context(name_context *ctx) { + if (!ctx) + return; + + if (ctx->t_head) + free(ctx->t_head); + if (ctx->pool) + pool_destroy(ctx->pool); + + int i; + for (i = 0; i < ctx->max_tok*16; i++) + free(ctx->desc[i].buf); + + for (i = 0; i < ctx->max_names; i++) + free(ctx->lc[i].last); + + htscodecs_tls_free(ctx); +} + +//----------------------------------------------------------------------------- +// Fast unsigned integer printing code. +// Returns number of bytes written. +static int append_uint32_fixed(char *cp, uint32_t i, uint8_t l) { + switch (l) { + case 9:*cp++ = i / 100000000 + '0', i %= 100000000; // fall-through + case 8:*cp++ = i / 10000000 + '0', i %= 10000000; // fall-through + case 7:*cp++ = i / 1000000 + '0', i %= 1000000; // fall-through + case 6:*cp++ = i / 100000 + '0', i %= 100000; // fall-through + case 5:*cp++ = i / 10000 + '0', i %= 10000; // fall-through + case 4:*cp++ = i / 1000 + '0', i %= 1000; // fall-through + case 3:*cp++ = i / 100 + '0', i %= 100; // fall-through + case 2:*cp++ = i / 10 + '0', i %= 10; // fall-through + case 1:*cp++ = i + '0'; // fall-throuhg + case 0:break; + } + return l; +} + +static int append_uint32_var(char *cp, uint32_t i) { + char *op = cp; + uint32_t j; + + //if (i < 10) goto b0; + if (i < 100) goto b1; + //if (i < 1000) goto b2; + if (i < 10000) goto b3; + //if (i < 100000) goto b4; + if (i < 1000000) goto b5; + //if (i < 10000000) goto b6; + if (i < 100000000) goto b7; + + if ((j = i / 1000000000)) {*cp++ = j + '0'; i -= j*1000000000; goto x8;} + if ((j = i / 100000000)) {*cp++ = j + '0'; i -= j*100000000; goto x7;} + b7:if ((j = i / 10000000)) {*cp++ = j + '0'; i -= j*10000000; goto x6;} + if ((j = i / 1000000)) {*cp++ = j + '0', i -= j*1000000; goto x5;} + b5:if ((j = i / 100000)) {*cp++ = j + '0', i -= j*100000; goto x4;} + if ((j = i / 10000)) {*cp++ = j + '0', i -= j*10000; goto x3;} + b3:if ((j = i / 1000)) {*cp++ = j + '0', i -= j*1000; goto x2;} + if ((j = i / 100)) {*cp++ = j + '0', i -= j*100; goto x1;} + b1:if ((j = i / 10)) {*cp++ = j + '0', i -= j*10; goto x0;} + if (i) *cp++ = i + '0'; + return cp-op; + + x8:*cp++ = i / 100000000 + '0', i %= 100000000; + x7:*cp++ = i / 10000000 + '0', i %= 10000000; + x6:*cp++ = i / 1000000 + '0', i %= 1000000; + x5:*cp++ = i / 100000 + '0', i %= 100000; + x4:*cp++ = i / 10000 + '0', i %= 10000; + x3:*cp++ = i / 1000 + '0', i %= 1000; + x2:*cp++ = i / 100 + '0', i %= 100; + x1:*cp++ = i / 10 + '0', i %= 10; + x0:*cp++ = i + '0'; + + return cp-op; +} + +//----------------------------------------------------------------------------- +// Example descriptor encoding and IO. +// +// Here we just append to a buffer so we can dump out the results. +// These could then be passed through a static entropy encoder that +// encodes the entire buffer. +// +// Alternatively an adaptive entropy encoder could be place inline +// here to encode as it goes using additional knowledge from the +// supplied context. + +// Ensure room for sz more bytes. +static int descriptor_grow(descriptor *fd, uint32_t sz) { + while (fd->buf_l + sz > fd->buf_a) { + size_t buf_a = fd->buf_a ? fd->buf_a*2 : 65536; + unsigned char *buf = realloc(fd->buf, buf_a); + if (!buf) + return -1; + fd->buf = buf; + fd->buf_a = buf_a; + } + + return 0; +} + +static int encode_token_type(name_context *ctx, int ntok, + enum name_type type) { + int id = ntok<<4; + + if (descriptor_grow(&ctx->desc[id], 1) < 0) return -1; + + ctx->desc[id].buf[ctx->desc[id].buf_l++] = type; + + return 0; +} + +static int encode_token_match(name_context *ctx, int ntok) { + return encode_token_type(ctx, ntok, N_MATCH); +} + +static int encode_token_end(name_context *ctx, int ntok) { + return encode_token_type(ctx, ntok, N_END); +} + +static enum name_type decode_token_type(name_context *ctx, int ntok) { + int id = ntok<<4; + if (ctx->desc[id].buf_l >= ctx->desc[id].buf_a) return -1; + return ctx->desc[id].buf[ctx->desc[id].buf_l++]; +} + +// int stored as 32-bit quantities +static int encode_token_int(name_context *ctx, int ntok, + enum name_type type, uint32_t val) { + int id = (ntok<<4) | type; + + if (encode_token_type(ctx, ntok, type) < 0) return -1; + if (descriptor_grow(&ctx->desc[id], 4) < 0) return -1; + + uint8_t *cp = &ctx->desc[id].buf[ctx->desc[id].buf_l]; + cp[0] = (val >> 0) & 0xff; + cp[1] = (val >> 8) & 0xff; + cp[2] = (val >> 16) & 0xff; + cp[3] = (val >> 24) & 0xff; + ctx->desc[id].buf_l += 4; + + return 0; +} + +// Return 0 on success, -1 on failure; +static int decode_token_int(name_context *ctx, int ntok, + enum name_type type, uint32_t *val) { + int id = (ntok<<4) | type; + + if (ctx->desc[id].buf_l + 4 > ctx->desc[id].buf_a) + return -1; + + uint8_t *cp = ctx->desc[id].buf + ctx->desc[id].buf_l; + *val = (cp[0]) + (cp[1]<<8) + (cp[2]<<16) + ((uint32_t)cp[3]<<24); + ctx->desc[id].buf_l += 4; + + return 0; +} + +// 8 bit integer quantity +static int encode_token_int1(name_context *ctx, int ntok, + enum name_type type, uint32_t val) { + int id = (ntok<<4) | type; + + if (encode_token_type(ctx, ntok, type) < 0) return -1; + if (descriptor_grow(&ctx->desc[id], 1) < 0) return -1; + + ctx->desc[id].buf[ctx->desc[id].buf_l++] = val; + + return 0; +} + +static int encode_token_int1_(name_context *ctx, int ntok, + enum name_type type, uint32_t val) { + int id = (ntok<<4) | type; + + if (descriptor_grow(&ctx->desc[id], 1) < 0) return -1; + + ctx->desc[id].buf[ctx->desc[id].buf_l++] = val; + + return 0; +} + +// Return 0 on success, -1 on failure; +static int decode_token_int1(name_context *ctx, int ntok, + enum name_type type, uint32_t *val) { + int id = (ntok<<4) | type; + + if (ctx->desc[id].buf_l >= ctx->desc[id].buf_a) + return -1; + *val = ctx->desc[id].buf[ctx->desc[id].buf_l++]; + + return 0; +} + + +// Basic C-string style for now. +// +// Maybe XOR with previous string as context? +// This permits partial match to be encoded efficiently. +static int encode_token_alpha(name_context *ctx, int ntok, + char *str, int len) { + int id = (ntok<<4) | N_ALPHA; + + if (encode_token_type(ctx, ntok, N_ALPHA) < 0) return -1; + if (descriptor_grow(&ctx->desc[id], len+1) < 0) return -1; + memcpy(&ctx->desc[id].buf[ctx->desc[id].buf_l], str, len); + ctx->desc[id].buf[ctx->desc[id].buf_l+len] = 0; + ctx->desc[id].buf_l += len+1; + + return 0; +} + +// FIXME: need limit on string length for security. +// Return length on success, -1 on failure; +static int decode_token_alpha(name_context *ctx, int ntok, char *str, int max_len) { + int id = (ntok<<4) | N_ALPHA; + char c; + int len = 0; + if (ctx->desc[id].buf_l >= ctx->desc[id].buf_a) + return -1; + do { + c = ctx->desc[id].buf[ctx->desc[id].buf_l++]; + str[len++] = c; + } while(c && len < max_len && ctx->desc[id].buf_l < ctx->desc[id].buf_a); + + return len-1; +} + +static int encode_token_char(name_context *ctx, int ntok, char c) { + int id = (ntok<<4) | N_CHAR; + + if (encode_token_type(ctx, ntok, N_CHAR) < 0) return -1; + if (descriptor_grow(&ctx->desc[id], 1) < 0) return -1; + ctx->desc[id].buf[ctx->desc[id].buf_l++] = c; + + return 0; +} + +// FIXME: need limit on string length for security +// Return length on success, -1 on failure; +static int decode_token_char(name_context *ctx, int ntok, char *str) { + int id = (ntok<<4) | N_CHAR; + + if (ctx->desc[id].buf_l >= ctx->desc[id].buf_a) + return -1; + *str = ctx->desc[id].buf[ctx->desc[id].buf_l++]; + + return 1; +} + + +// A duplicated name +static int encode_token_dup(name_context *ctx, uint32_t val) { + return encode_token_int(ctx, 0, N_DUP, val); +} + +// Which read to delta against +static int encode_token_diff(name_context *ctx, uint32_t val) { + return encode_token_int(ctx, 0, N_DIFF, val); +} + + +//----------------------------------------------------------------------------- +// Trie implementation for tracking common name prefixes. +static +int build_trie(name_context *ctx, char *data, size_t len, int n) { + int nlines = 0; + size_t i; + trie_t *t; + + if (!ctx->t_head) { + ctx->t_head = calloc(1, sizeof(*ctx->t_head)); + if (!ctx->t_head) + return -1; + } + + // Build our trie, also counting input lines + for (nlines = i = 0; i < len; i++, nlines++) { + t = ctx->t_head; + t->count++; + while (i < len && (unsigned char)data[i] > '\n') { + unsigned char c = data[i++]; + if (c & 0x80) + //fprintf(stderr, "8-bit ASCII is unsupported\n"); + return -1; + c &= 127; + + + trie_t *x = t->next, *l = NULL; + while (x && x->c != c) { + l = x; x = x->sibling; + } + if (!x) { + if (!ctx->pool) + ctx->pool = pool_create(sizeof(trie_t)); + if (!(x = (trie_t *)pool_alloc(ctx->pool))) + return -1; + memset(x, 0, sizeof(*x)); + if (!l) + x = t->next = x; + else + x = l->sibling = x; + x->n = n; + x->c = c; + } + t = x; + t->c = c; + t->count++; + } + } + + return 0; +} + +#if 0 +void dump_trie(trie_t *t, int depth) { + if (depth == 0) { + printf("graph x_%p {\n splines = ortho\n ranksep=2\n", t); + printf(" p_%p [label=\"\"];\n", t); + dump_trie(t, 1); + printf("}\n"); + } else { + int j, k, count;//, cj; + char label[100], *cp; + trie_t *tp = t; + +// patricia: +// for (count = j = 0; j < 128; j++) +// if (t->next[j]) +// count++, cj=j; +// +// if (count == 1) { +// t = t->next[cj]; +// *cp++ = cj; +// goto patricia; +// } + + trie_t *x; + for (x = t->next; x; x = x->sibling) { + printf(" p_%p [label=\"%c\"];\n", x, x->c); + printf(" p_%p -- p_%p [label=\"%d\", penwidth=\"%f\"];\n", tp, x, x->count, MAX((log(x->count)-3)*2,1)); + //if (depth <= 11) + dump_trie(x, depth+1); + } + +#if 0 + for (j = 0; j < 128; j++) { + trie_t *tn; + + if (!t->next[j]) + continue; + + cp = label; + tn = t->next[j]; + *cp++ = j; +// patricia: + + for (count = k = 0; k < 128; k++) + if (tn->next[k]) + count++;//, cj=k; + +// if (count == 1) { +// tn = tn->next[cj]; +// *cp++ = cj; +// goto patricia; +// } + *cp++ = 0; + + printf(" p_%p [label=\"%s\"];\n", tn, label); + printf(" p_%p -- p_%p [label=\"%d\", penwidth=\"%f\"];\n", tp, tn, tn->count, MAX((log(tn->count)-3)*2,1)); + if (depth <= 11) + dump_trie(tn, depth+1); + } +#endif + } +} +#endif + +static +int search_trie(name_context *ctx, char *data, size_t len, int n, int *exact, int *is_fixed, int *fixed_len) { + int nlines = 0; + size_t i; + trie_t *t; + int from = -1, p3 = -1; + *exact = 0; + *fixed_len = 0; + *is_fixed = 0; + + // Horrid hack for the encoder only. + // We optimise per known name format here. + int prefix_len; + char *d = *data == '@' ? data+1 : data; + int l = *data == '@' ? len-1 : len; + int f = (*data == '>') ? 1 : 0; + if (l > 70 && d[f+0] == 'm' && d[7] == '_' && d[f+14] == '_' && d[f+61] == '/') { + prefix_len = 60; // PacBio + *is_fixed = 0; + } else if (l == 17 && d[f+5] == ':' && d[f+11] == ':') { + prefix_len = 6; // IonTorrent + *fixed_len = 6; + *is_fixed = 1; + } else if (l > 37 && d[f+8] == '-' && d[f+13] == '-' && d[f+18] == '-' && d[f+23] == '-' && + ((d[f+0] >= '0' && d[f+0] <='9') || (d[f+0] >= 'a' && d[f+0] <= 'f')) && + ((d[f+35] >= '0' && d[f+35] <='9') || (d[f+35] >= 'a' && d[f+35] <= 'f'))) { + // ONT: f33d30d5-6eb8-4115-8f46-154c2620a5da_Basecall_1D_template... + prefix_len = 37; + *fixed_len = 37; + *is_fixed = 1; + } else { + // Check Illumina and trim back to lane:tile:x:y. + int colons = 0; + for (i = 0; i < len && data[i] > ' '; i++) + ; + while (i > 0 && colons < 4) + if (data[--i] == ':') + colons++; + + if (colons == 4) { + // Constant illumina prefix + *fixed_len = i+1; + prefix_len = i+1; + *is_fixed = 1; + } else { + // Unknown, don't use a fixed len, but still search + // for any exact matches. + prefix_len = INT_MAX; + *is_fixed = 0; + } + } + //prefix_len = INT_MAX; + + if (!ctx->t_head) { + ctx->t_head = calloc(1, sizeof(*ctx->t_head)); + if (!ctx->t_head) + return -1; + } + + // Find an item in the trie + for (nlines = i = 0; i < len; i++, nlines++) { + t = ctx->t_head; + while (i < len && data[i] > '\n') { + unsigned char c = data[i++]; + if (c & 0x80) + //fprintf(stderr, "8-bit ASCII is unsupported\n"); + return -1; + c &= 127; + + trie_t *x = t->next; + while (x && x->c != c) + x = x->sibling; + t = x; + +// t = t->next[c]; + +// if (!t) +// return -1; + + from = t->n; + if (i == prefix_len) p3 = t->n; + //if (t->count >= .0035*ctx->t_head->count && t->n != n) p3 = t->n; // pacbio + //if (i == 60) p3 = t->n; // pacbio + //if (i == 7) p3 = t->n; // iontorrent + t->n = n; + } + } + + //printf("Looked for %d, found %d, prefix %d\n", n, from, p3); + + *exact = (n != from) && len; + return *exact ? from : p3; +} + + +//----------------------------------------------------------------------------- +// Name encoder + +/* + * Tokenises a read name using ctx as context as the previous + * tokenisation. + * + * Parsed elements are then emitted for encoding by calling the + * encode_token() function with the context, token number (Nth token + * in line), token type and token value. + * + * Returns 0 on success; + * -1 on failure. + */ +static int encode_name(name_context *ctx, char *name, int len, int mode) { + int i, is_fixed, fixed_len; + + int exact; + int cnum = ctx->counter++; + int pnum = search_trie(ctx, name, len, cnum, &exact, &is_fixed, &fixed_len); + if (pnum < 0) pnum = cnum ? cnum-1 : 0; + //pnum = pnum & (MAX_NAMES-1); + //cnum = cnum & (MAX_NAMES-1); + //if (pnum == cnum) {pnum = cnum ? cnum-1 : 0;} +#ifdef ENC_DEBUG + fprintf(stderr, "%d: pnum=%d (%d), exact=%d\n%s\n%s\n", + ctx->counter, pnum, cnum-pnum, exact, ctx->lc[pnum].last_name, name); +#endif + + // Return DUP or DIFF switch, plus the distance. + if (exact && len == strlen(ctx->lc[pnum].last_name)) { + encode_token_dup(ctx, cnum-pnum); + ctx->lc[cnum].last_name = name; + ctx->lc[cnum].last_ntok = ctx->lc[pnum].last_ntok; + int nc = ctx->lc[cnum].last_ntok ? ctx->lc[cnum].last_ntok : MAX_TOKENS; + ctx->lc[cnum].last = malloc(nc * sizeof(*ctx->lc[cnum].last)); + if (!ctx->lc[cnum].last) + return -1; + memcpy(ctx->lc[cnum].last, ctx->lc[pnum].last, + ctx->lc[cnum].last_ntok * sizeof(*ctx->lc[cnum].last)); + return 0; + } + + ctx->lc[cnum].last = malloc(MAX_TOKENS * sizeof(*ctx->lc[cnum].last)); + if (!ctx->lc[cnum].last) + return -1; + encode_token_diff(ctx, cnum-pnum); + + int ntok = 1; + i = 0; + if (is_fixed) { + if (ntok >= ctx->max_tok) { + memset(&ctx->desc[ctx->max_tok << 4], 0, 16*sizeof(ctx->desc[0])); + memset(&ctx->token_dcount[ctx->max_tok], 0, sizeof(int)); + memset(&ctx->token_icount[ctx->max_tok], 0, sizeof(int)); + ctx->max_tok = ntok+1; + } + if (pnum < cnum && ntok < ctx->lc[pnum].last_ntok && ctx->lc[pnum].last[ntok].token_type == N_ALPHA) { + if (ctx->lc[pnum].last[ntok].token_int == fixed_len && memcmp(name, ctx->lc[pnum].last_name, fixed_len) == 0) { + encode_token_match(ctx, ntok); + } else { + encode_token_alpha(ctx, ntok, name, fixed_len); + } + } else { + encode_token_alpha(ctx, ntok, name, fixed_len); + } + ctx->lc[cnum].last[ntok].token_int = fixed_len; + ctx->lc[cnum].last[ntok].token_str = 0; + ctx->lc[cnum].last[ntok++].token_type = N_ALPHA; + i = fixed_len; + } + + for (; i < len; i++) { + if (ntok >= ctx->max_tok) { + if (ctx->max_tok >= MAX_TOKENS) + return -1; + memset(&ctx->desc[ctx->max_tok << 4], 0, 16*sizeof(ctx->desc[0])); + memset(&ctx->token_dcount[ctx->max_tok], 0, sizeof(int)); + memset(&ctx->token_icount[ctx->max_tok], 0, sizeof(int)); + ctx->max_tok = ntok+1; + } + + /* Determine data type of this segment */ + if (isalpha((uint8_t)name[i])) { + int s = i+1; +// int S = i+1; + +// // FIXME: try which of these is best. alnum is good sometimes. +// while (s < len && isalpha((uint8_t)name[s])) + while (s < len && (isalpha((uint8_t)name[s]) || + ispunct((uint8_t)name[s]))) +// while (s < len && name[s] != ':') +// while (s < len && !isdigit((uint8_t)name[s]) && name[s] != ':') + s++; + +// if (!is_fixed) { +// while (S < len && isalnum((uint8_t)name[S])) +// S++; +// if (s < S) +// s = S; +// } + + // Single byte strings are better encoded as chars. + if (s-i == 1) goto n_char; + + if (pnum < cnum && ntok < ctx->lc[pnum].last_ntok && ctx->lc[pnum].last[ntok].token_type == N_ALPHA) { + if (s-i == ctx->lc[pnum].last[ntok].token_int && + memcmp(&name[i], + &ctx->lc[pnum].last_name[ctx->lc[pnum].last[ntok].token_str], + s-i) == 0) { +#ifdef ENC_DEBUG + fprintf(stderr, "Tok %d (alpha-mat, %.*s)\n", N_MATCH, s-i, &name[i]); +#endif + if (encode_token_match(ctx, ntok) < 0) return -1; + } else { +#ifdef ENC_DEBUG + fprintf(stderr, "Tok %d (alpha, %.*s / %.*s)\n", N_ALPHA, + s-i, &ctx->lc[pnum].last_name[ctx->lc[pnum].last[ntok].token_str], s-i, &name[i]); +#endif + // same token/length, but mismatches + if (encode_token_alpha(ctx, ntok, &name[i], s-i) < 0) return -1; + } + } else { +#ifdef ENC_DEBUG + fprintf(stderr, "Tok %d (new alpha, %.*s)\n", N_ALPHA, s-i, &name[i]); +#endif + if (encode_token_alpha(ctx, ntok, &name[i], s-i) < 0) return -1; + } + + ctx->lc[cnum].last[ntok].token_int = s-i; + ctx->lc[cnum].last[ntok].token_str = i; + ctx->lc[cnum].last[ntok].token_type = N_ALPHA; + + i = s-1; + } else if (name[i] == '0') digits0: { + // Digits starting with zero; encode length + value + uint32_t s = i; + uint32_t v = 0; + int d = 0; + + while (s < len && isdigit((uint8_t)name[s]) && s-i < 9) { + v = v*10 + name[s] - '0'; + //putchar(name[s]); + s++; + } + + // TODO: optimise choice over whether to switch from DIGITS to DELTA + // regularly vs all DIGITS, also MATCH vs DELTA 0. + if (pnum < cnum && ntok < ctx->lc[pnum].last_ntok && ctx->lc[pnum].last[ntok].token_type == N_DIGITS0) { + d = v - ctx->lc[pnum].last[ntok].token_int; + if (d == 0 && ctx->lc[pnum].last[ntok].token_str == s-i) { +#ifdef ENC_DEBUG + fprintf(stderr, "Tok %d (dig-mat, %d)\n", N_MATCH, v); +#endif + if (encode_token_match(ctx, ntok) < 0) return -1; + //ctx->lc[pnum].last[ntok].token_delta=0; + } else if (mode == 1 && d < 256 && d >= 0 && ctx->lc[pnum].last[ntok].token_str == s-i) { +#ifdef ENC_DEBUG + fprintf(stderr, "Tok %d (dig0-delta, %d / %d)\n", N_DDELTA0, ctx->lc[pnum].last[ntok].token_int, v); +#endif + //if (encode_token_int1_(ctx, ntok, N_DZLEN, s-i) < 0) return -1; + if (encode_token_int1(ctx, ntok, N_DDELTA0, d) < 0) return -1; + //ctx->lc[pnum].last[ntok].token_delta=1; + } else { +#ifdef ENC_DEBUG + fprintf(stderr, "Tok %d (dig0, %d / %d len %d)\n", N_DIGITS0, ctx->lc[pnum].last[ntok].token_int, v, s-i); +#endif + if (encode_token_int1_(ctx, ntok, N_DZLEN, s-i) < 0) return -1; + if (encode_token_int(ctx, ntok, N_DIGITS0, v) < 0) return -1; + //ctx->lc[pnum].last[ntok].token_delta=0; + } + } else { +#ifdef ENC_DEBUG + fprintf(stderr, "Tok %d (new dig0, %d len %d)\n", N_DIGITS0, v, s-i); +#endif + if (encode_token_int1_(ctx, ntok, N_DZLEN, s-i) < 0) return -1; + if (encode_token_int(ctx, ntok, N_DIGITS0, v) < 0) return -1; + //ctx->lc[pnum].last[ntok].token_delta=0; + } + + ctx->lc[cnum].last[ntok].token_str = s-i; // length + ctx->lc[cnum].last[ntok].token_int = v; + ctx->lc[cnum].last[ntok].token_type = N_DIGITS0; + + i = s-1; + } else if (isdigit((uint8_t)name[i])) { + // digits starting 1-9; encode value + uint32_t s = i; + uint32_t v = 0; + int d = 0; + + while (s < len && isdigit((uint8_t)name[s]) && s-i < 9) { + v = v*10 + name[s] - '0'; + //putchar(name[s]); + s++; + } + + // dataset/10/K562_cytosol_LID8465_TopHat_v2.names + // col 4 is Illumina lane - we don't want match & delta in there + // as it has multiple lanes (so not ALL match) and delta is just + // random chance, increasing entropy instead. +// if (ntok == 4 || ntok == 8 || ntok == 10) { +// encode_token_int(ctx, ntok, N_DIGITS, v); +// } else { + + // If the last token was DIGITS0 and we are the same length, then encode + // using that method instead as it seems likely the entire column is fixed + // width, sometimes with leading zeros. + if (pnum < cnum && ntok < ctx->lc[pnum].last_ntok && + ctx->lc[pnum].last[ntok].token_type == N_DIGITS0 && + ctx->lc[pnum].last[ntok].token_str == s-i) + goto digits0; + + // TODO: optimise choice over whether to switch from DIGITS to DELTA + // regularly vs all DIGITS, also MATCH vs DELTA 0. + if (pnum < cnum && ntok < ctx->lc[pnum].last_ntok && ctx->lc[pnum].last[ntok].token_type == N_DIGITS) { + d = v - ctx->lc[pnum].last[ntok].token_int; + if (d == 0) { +#ifdef ENC_DEBUG + fprintf(stderr, "Tok %d (dig-mat, %d)\n", N_MATCH, v); +#endif + if (encode_token_match(ctx, ntok) < 0) return -1; + //ctx->lc[pnum].last[ntok].token_delta=0; + //ctx->token_zcount[ntok]++; + } else if (mode == 1 && d < 256 && d >= 0 + //&& (10+ctx->token_dcount[ntok]) > (ctx->token_icount[ntok]+ctx->token_zcount[ntok]) + && (5+ctx->token_dcount[ntok]) > ctx->token_icount[ntok] + ) { +#ifdef ENC_DEBUG + fprintf(stderr, "Tok %d (dig-delta, %d / %d)\n", N_DDELTA, ctx->lc[pnum].last[ntok].token_int, v); +#endif + if (encode_token_int1(ctx, ntok, N_DDELTA, d) < 0) return -1; + //ctx->lc[pnum].last[ntok].token_delta=1; + ctx->token_dcount[ntok]++; + } else { +#ifdef ENC_DEBUG + fprintf(stderr, "Tok %d (dig, %d / %d)\n", N_DIGITS, ctx->lc[pnum].last[ntok].token_int, v); +#endif + if (encode_token_int(ctx, ntok, N_DIGITS, v) < 0) return -1; + //ctx->lc[pnum].last[ntok].token_delta=0; + ctx->token_icount[ntok]++; + } + } else { +#ifdef ENC_DEBUG + fprintf(stderr, "Tok %d (new dig, %d)\n", N_DIGITS, v); +#endif + if (encode_token_int(ctx, ntok, N_DIGITS, v) < 0) return -1; + //ctx->lc[pnum].last[ntok].token_delta=0; + } +// } + + ctx->lc[cnum].last[ntok].token_int = v; + ctx->lc[cnum].last[ntok].token_type = N_DIGITS; + + i = s-1; + } else { + n_char: + //if (!isalpha((uint8_t)name[i])) putchar(name[i]); + if (pnum < cnum && ntok < ctx->lc[pnum].last_ntok && ctx->lc[pnum].last[ntok].token_type == N_CHAR) { + if (name[i] == ctx->lc[pnum].last[ntok].token_int) { +#ifdef ENC_DEBUG + fprintf(stderr, "Tok %d (chr-mat, %c)\n", N_MATCH, name[i]); +#endif + if (encode_token_match(ctx, ntok) < 0) return -1; + } else { +#ifdef ENC_DEBUG + fprintf(stderr, "Tok %d (chr, %c / %c)\n", N_CHAR, ctx->lc[pnum].last[ntok].token_int, name[i]); +#endif + if (encode_token_char(ctx, ntok, name[i]) < 0) return -1; + } + } else { +#ifdef ENC_DEBUG + fprintf(stderr, "Tok %d (new chr, %c)\n", N_CHAR, name[i]); +#endif + if (encode_token_char(ctx, ntok, name[i]) < 0) return -1; + } + + ctx->lc[cnum].last[ntok].token_int = name[i]; + ctx->lc[cnum].last[ntok].token_type = N_CHAR; + } + + ntok++; + //putchar(' '); + } + +#ifdef ENC_DEBUG + fprintf(stderr, "Tok %d (end)\n", N_END); +#endif + if (ntok >= ctx->max_tok) { + if (ctx->max_tok >= MAX_TOKENS) + return -1; + memset(&ctx->desc[ctx->max_tok << 4], 0, 16*sizeof(ctx->desc[0])); + memset(&ctx->token_dcount[ctx->max_tok], 0, sizeof(int)); + memset(&ctx->token_icount[ctx->max_tok], 0, sizeof(int)); + ctx->max_tok = ntok+1; + } + if (encode_token_end(ctx, ntok) < 0) return -1; +#ifdef ENC_DEBUG + fprintf(stderr, "ntok=%d max_tok=%d\n", ntok, ctx->max_tok); +#endif + + //printf("Encoded %.*s with %d tokens\n", len, name, ntok); + + ctx->lc[cnum].last_name = name; + ctx->lc[cnum].last_ntok = ntok; + last_context_tok *shrunk = realloc(ctx->lc[cnum].last, + (ntok+1) * sizeof(*ctx->lc[cnum].last)); + if (shrunk) + ctx->lc[cnum].last = shrunk; + + if (!ctx->lc[cnum].last) + return -1; + + return 0; +} + +//----------------------------------------------------------------------------- +// Name decoder + +static int decode_name(name_context *ctx, char *name, int name_len) { + int t0 = decode_token_type(ctx, 0); + uint32_t dist; + int pnum, cnum = ctx->counter++; + + if (cnum >= ctx->max_names) + return -1; + + if (t0 < 0 || t0 >= ctx->max_tok*16) + return 0; + + if (decode_token_int(ctx, 0, t0, &dist) < 0 || dist > cnum) + return -1; + if ((pnum = cnum - dist) < 0) pnum = 0; + + //fprintf(stderr, "t0=%d, dist=%d, pnum=%d, cnum=%d\n", t0, dist, pnum, cnum); + + if (t0 == N_DUP) { + if (pnum == cnum) + return -1; + + if (strlen(ctx->lc[pnum].last_name) +1 >= name_len) return -1; + strcpy(name, ctx->lc[pnum].last_name); + // FIXME: optimise this + ctx->lc[cnum].last_name = name; + ctx->lc[cnum].last_ntok = ctx->lc[pnum].last_ntok; + + int nc = ctx->lc[cnum].last_ntok ? ctx->lc[cnum].last_ntok : MAX_TOKENS; + ctx->lc[cnum].last = malloc(nc * sizeof(*ctx->lc[cnum].last)); + if (!ctx->lc[cnum].last) + return -1; + memcpy(ctx->lc[cnum].last, ctx->lc[pnum].last, + ctx->lc[cnum].last_ntok * sizeof(*ctx->lc[cnum].last)); + + return strlen(name)+1; + } + + *name = 0; + int ntok, len = 0, len2; + ctx->lc[cnum].last = malloc(MAX_TOKENS * sizeof(*ctx->lc[cnum].last)); + if (!ctx->lc[cnum].last) + return -1; + + for (ntok = 1; ntok < MAX_TOKENS && ntok < ctx->max_tok; ntok++) { + uint32_t v, vl; + enum name_type tok; + tok = decode_token_type(ctx, ntok); + //fprintf(stderr, "Tok %d = %d\n", ntok, tok); + + ctx->lc[cnum].last_ntok = 0; + + switch (tok) { + case N_CHAR: + if (len+1 >= name_len) return -1; + if (decode_token_char(ctx, ntok, &name[len]) < 0) return -1; + //fprintf(stderr, "Tok %d CHAR %c\n", ntok, name[len]); + ctx->lc[cnum].last[ntok].token_type = N_CHAR; + ctx->lc[cnum].last[ntok].token_int = name[len++]; + break; + + case N_ALPHA: + if ((len2 = decode_token_alpha(ctx, ntok, &name[len], name_len - len)) < 0) + return -1; + //fprintf(stderr, "Tok %d ALPHA %.*s\n", ntok, len2, &name[len]); + ctx->lc[cnum].last[ntok].token_type = N_ALPHA; + ctx->lc[cnum].last[ntok].token_str = len; + ctx->lc[cnum].last[ntok].token_int = len2; + len += len2; + break; + + case N_DIGITS0: // [0-9]* + if (decode_token_int1(ctx, ntok, N_DZLEN, &vl) < 0) return -1; + if (decode_token_int(ctx, ntok, N_DIGITS0, &v) < 0) return -1; + if (len+20+vl >= name_len) return -1; + len += append_uint32_fixed(&name[len], v, vl); + //fprintf(stderr, "Tok %d DIGITS0 %0*d\n", ntok, vl, v); + ctx->lc[cnum].last[ntok].token_type = N_DIGITS0; + ctx->lc[cnum].last[ntok].token_int = v; + ctx->lc[cnum].last[ntok].token_str = vl; + break; + + case N_DDELTA0: + if (ntok >= ctx->lc[pnum].last_ntok) return -1; + if (decode_token_int1(ctx, ntok, N_DDELTA0, &v) < 0) return -1; + v += ctx->lc[pnum].last[ntok].token_int; + if (len+ctx->lc[pnum].last[ntok].token_str+1 >= name_len) return -1; + len += append_uint32_fixed(&name[len], v, ctx->lc[pnum].last[ntok].token_str); + //fprintf(stderr, "Tok %d DELTA0 %0*d\n", ntok, ctx->lc[pnum].last[ntok].token_str, v); + ctx->lc[cnum].last[ntok].token_type = N_DIGITS0; + ctx->lc[cnum].last[ntok].token_int = v; + ctx->lc[cnum].last[ntok].token_str = ctx->lc[pnum].last[ntok].token_str; + break; + + case N_DIGITS: // [1-9][0-9]* + if (decode_token_int(ctx, ntok, N_DIGITS, &v) < 0) return -1; + if (len+20 >= name_len) return -1; + len += append_uint32_var(&name[len], v); + //fprintf(stderr, "Tok %d DIGITS %d\n", ntok, v); + ctx->lc[cnum].last[ntok].token_type = N_DIGITS; + ctx->lc[cnum].last[ntok].token_int = v; + break; + + case N_DDELTA: + if (ntok >= ctx->lc[pnum].last_ntok) return -1; + if (decode_token_int1(ctx, ntok, N_DDELTA, &v) < 0) return -1; + v += ctx->lc[pnum].last[ntok].token_int; + if (len+20 >= name_len) return -1; + len += append_uint32_var(&name[len], v); + //fprintf(stderr, "Tok %d DELTA %d\n", ntok, v); + ctx->lc[cnum].last[ntok].token_type = N_DIGITS; + ctx->lc[cnum].last[ntok].token_int = v; + break; + + case N_NOP: + ctx->lc[cnum].last[ntok].token_type = N_NOP; + break; + + case N_MATCH: + if (ntok >= ctx->lc[pnum].last_ntok) return -1; + switch (ctx->lc[pnum].last[ntok].token_type) { + case N_CHAR: + if (len+1 >= name_len) return -1; + name[len++] = ctx->lc[pnum].last[ntok].token_int; + //fprintf(stderr, "Tok %d MATCH CHAR %c\n", ntok, ctx->lc[pnum].last[ntok].token_int); + ctx->lc[cnum].last[ntok].token_type = N_CHAR; + ctx->lc[cnum].last[ntok].token_int = ctx->lc[pnum].last[ntok].token_int; + break; + + case N_ALPHA: + if (ctx->lc[pnum].last[ntok].token_int < 0 || + len+ctx->lc[pnum].last[ntok].token_int >= name_len) return -1; + memcpy(&name[len], + &ctx->lc[pnum].last_name[ctx->lc[pnum].last[ntok].token_str], + ctx->lc[pnum].last[ntok].token_int); + //fprintf(stderr, "Tok %d MATCH ALPHA %.*s\n", ntok, ctx->lc[pnum].last[ntok].token_int, &name[len]); + ctx->lc[cnum].last[ntok].token_type = N_ALPHA; + ctx->lc[cnum].last[ntok].token_str = len; + ctx->lc[cnum].last[ntok].token_int = ctx->lc[pnum].last[ntok].token_int; + len += ctx->lc[pnum].last[ntok].token_int; + break; + + case N_DIGITS: + if (len+20 >= name_len) return -1; + len += append_uint32_var(&name[len], ctx->lc[pnum].last[ntok].token_int); + //fprintf(stderr, "Tok %d MATCH DIGITS %d\n", ntok, ctx->lc[pnum].last[ntok].token_int); + ctx->lc[cnum].last[ntok].token_type = N_DIGITS; + ctx->lc[cnum].last[ntok].token_int = ctx->lc[pnum].last[ntok].token_int; + break; + + case N_DIGITS0: + if (len+ctx->lc[pnum].last[ntok].token_str >= name_len) return -1; + len += append_uint32_fixed(&name[len], ctx->lc[pnum].last[ntok].token_int, ctx->lc[pnum].last[ntok].token_str); + //fprintf(stderr, "Tok %d MATCH DIGITS %0*d\n", ntok, ctx->lc[pnum].last[ntok].token_str, ctx->lc[pnum].last[ntok].token_int); + ctx->lc[cnum].last[ntok].token_type = N_DIGITS0; + ctx->lc[cnum].last[ntok].token_int = ctx->lc[pnum].last[ntok].token_int; + ctx->lc[cnum].last[ntok].token_str = ctx->lc[pnum].last[ntok].token_str; + break; + + default: + return -1; + } + break; + + default: // an elided N_END + case N_END: + if (len+1 >= name_len) return -1; + name[len++] = 0; + ctx->lc[cnum].last[ntok].token_type = N_END; + + ctx->lc[cnum].last_name = name; + ctx->lc[cnum].last_ntok = ntok; + + last_context_tok *shrunk + = realloc(ctx->lc[cnum].last, + (ntok+1) * sizeof(*ctx->lc[cnum].last)); + if (shrunk) + ctx->lc[cnum].last = shrunk; + + if (!ctx->lc[cnum].last) + return -1; + + return len; + } + } + + + return -1; +} + +//----------------------------------------------------------------------------- +// arith adaptive codec or static rANS 4x16pr codec +static int arith_encode(uint8_t *in, uint64_t in_len, uint8_t *out, uint64_t *out_len, int method) { + unsigned int olen = *out_len-6, nb; + if (arith_compress_to(in, in_len, out+6, &olen, method) == NULL) + return -1; + + nb = var_put_u32(out, out + *out_len, olen); + memmove(out+nb, out+6, olen); + *out_len = olen+nb; + + return 0; +} + +// Returns number of bytes read from 'in' on success, +// -1 on failure. +static int64_t arith_decode(uint8_t *in, uint64_t in_len, uint8_t *out, uint64_t *out_len) { + unsigned int olen = *out_len; + + uint32_t clen; + int nb = var_get_u32(in, in+in_len, &clen); + //fprintf(stderr, "Arith decode %x\n", in[nb]); + if (arith_uncompress_to(in+nb, in_len-nb, out, &olen) == NULL) + return -1; + //fprintf(stderr, " Stored clen=%d\n", (int)clen); + *out_len = olen; + return clen+nb; +} + +static int rans_encode(uint8_t *in, uint64_t in_len, uint8_t *out, uint64_t *out_len, int method) { + unsigned int olen = *out_len-6, nb; + if (rans_compress_to_4x16(in, in_len, out+6, &olen, method) == NULL) + return -1; + + nb = var_put_u32(out, out + *out_len, olen); + memmove(out+nb, out+6, olen); + *out_len = olen+nb; + + return 0; +} + +// Returns number of bytes read from 'in' on success, +// -1 on failure. +static int64_t rans_decode(uint8_t *in, uint64_t in_len, uint8_t *out, uint64_t *out_len) { + unsigned int olen = *out_len; + + uint32_t clen; + int nb = var_get_u32(in, in+in_len, &clen); + //fprintf(stderr, "Arith decode %x\n", in[nb]); + if (rans_uncompress_to_4x16(in+nb, in_len-nb, out, &olen) == NULL) + return -1; + //fprintf(stderr, " Stored clen=%d\n", (int)clen); + *out_len = olen; + return clen+nb; +} + +static int compress(uint8_t *in, uint64_t in_len, enum name_type type, + int level, int use_arith, + uint8_t *out, uint64_t *out_len) { + uint64_t best_sz = UINT64_MAX; + uint64_t olen = *out_len; + int ret = -1; + + // Map levels 1-9 to 0-4, for parameter lookup in R[] below + level = (level-1)/2; + if (level<0) level=0; + if (level>4) level=4; + + // rANS4x16pr and arith_dynamic parameters to explore. + // We brute force these, so fast levels test 1 setting and slow test more + int R[5][N_ALL][7] = { + { // -1 + /* TYPE */ {1, 128}, + /* ALPHA */ {1, 129}, + /* CHAR */ {1, 0}, + /* DIGITS0 */ {1, 8}, + /* DZLEN */ {1, 0}, + /* DUP */ {1, 8}, + /* DIFF */ {1, 8}, + /* DIGITS */ {1, 8}, + /* DDELTA */ {1, 0}, + /* DDELTA0 */ {1, 128}, + /* MATCH */ {1, 0}, + /* NOP */ {1, 0}, + /* END */ {1, 0} + }, + + { // -3 + /* TYPE */ {2, 192,0}, + /* ALPHA */ {2, 129,1}, + /* CHAR */ {1, 0}, + /* DIGITS0 */ {2, 128+8,0}, // size%4==0 + /* DZLEN */ {1, 0}, + /* DUP */ {1, 192+8}, // size%4==0 + /* DIFF */ {1, 128+8}, // size%4==0 + /* DIGITS */ {1, 192+8}, // size%4==0 + /* DDELTA */ {1, 0}, + /* DDELTA0 */ {1, 128}, + /* MATCH */ {1, 0}, + /* NOP */ {1, 0}, + /* END */ {1, 0} + }, + + { // -5 + /* TYPE */ {2, 192,0}, + /* ALPHA */ {4, 1,128,0,129}, + /* CHAR */ {1, 0}, + /* DIGITS0 */ {2, 200,0}, + /* DZLEN */ {1, 0}, + /* DUP */ {1, 200}, + /* DIFF */ {2, 192,200}, + /* DIGITS */ {2, 132,201}, + /* DDELTA */ {1, 0}, + /* DDELTA0 */ {1, 128}, + /* MATCH */ {1, 0}, + /* NOP */ {1, 0}, + /* END */ {1, 0} + }, + + { // -7 + /* TYPE */ {3, 193,0,1}, + /* ALPHA */ {5, 128, 1,128,0,129}, + /* CHAR */ {2, 1,0}, + /* DIGITS0 */ {2, 200,0}, // or 201,0 + /* DZLEN */ {1, 0}, + /* DUP */ {1, 201}, + /* DIFF */ {2, 192,200}, // or 192,201 + /* DIGITS */ {2, 132, 201}, // +bz2 here and -9 + /* DDELTA */ {1, 0}, + /* DDELTA0 */ {1, 128}, + /* MATCH */ {1, 0}, + /* NOP */ {1, 0}, + /* END */ {1, 0} + }, + + { // -9 + /* TYPE */ {6, 192,0,1, 65, 193,132}, + /* ALPHA */ {4, 132, 1, 0,129}, + /* CHAR */ {3, 1,0,192}, + /* DIGITS0 */ {4, 201,0, 192,64}, + /* DZLEN */ {3, 0,128,1}, + /* DUP */ {1, 201}, + /* DIFF */ {3, 192, 201,65}, + /* DIGITS */ {6, 132, 201,1, 192,129, 193}, + /* DDELTA */ {3, 1,0, 192}, + /* DDELTA0 */ {3, 192,1, 0}, + /* MATCH */ {1, 0}, + /* NOP */ {1, 0}, + /* END */ {1, 0} + }, + }; + // Minor tweak to level 3 DIGITS if arithmetic, to use O(201) instead. + if (use_arith) R[1][N_DIGITS][1]=201; + + int *meth = R[level][type]; + + int last = 0, m; + uint8_t best_static[8192]; + uint8_t *best_dat = best_static; + for (m = 1; m <= meth[0]; m++) { + *out_len = olen; + + if (!use_arith && (meth[m] & 4)) + meth[m] &= ~4; + + if (in_len % 4 != 0 && (meth[m] & 8)) + continue; + + last = 0; + if (use_arith) { + if (arith_encode(in, in_len, out, out_len, meth[m]) <0) + goto err; + } else { + if (rans_encode(in, in_len, out, out_len, meth[m]) < 0) + goto err; + } + + if (best_sz > *out_len) { + best_sz = *out_len; + last = 1; + + if (m+1 > meth[0]) + // no need to memcpy if we're not going to overwrite out + break; + + if (best_sz > 8192 && best_dat == best_static) { + // No need to realloc as best_sz only ever decreases + best_dat = malloc(best_sz); + if (!best_dat) + return -1; + } + memcpy(best_dat, out, best_sz); + } + } + + if (!last) + memcpy(out, best_dat, best_sz); + *out_len = best_sz; + ret = 0; + + err: + if (best_dat != best_static) + free(best_dat); + + return ret; +} + +static uint64_t uncompressed_size(uint8_t *in, uint64_t in_len) { + uint32_t clen, ulen; + + // in[0] in part of buffer written by us + int nb = var_get_u32(in, in+in_len, &clen); + + // in[nb] is part of buffer written to by arith_dynamic. + var_get_u32(in+nb+1, in+in_len, &ulen); + + return ulen; +} + +static int uncompress(int use_arith, uint8_t *in, uint64_t in_len, + uint8_t *out, uint64_t *out_len) { + uint32_t clen; + var_get_u32(in, in+in_len, &clen); + return use_arith + ? arith_decode(in, in_len, out, out_len) + : rans_decode(in, in_len, out, out_len); +} + +//----------------------------------------------------------------------------- + +/* + * Converts a line or \0 separated block of reading names to a compressed buffer. + * The code can only encode whole lines and will not attempt a partial line. + * Use the "last_start_p" return value to identify the partial line start + * offset, for continuation purposes. + * + * Returns a malloced buffer holding compressed data of size *out_len, + * or NULL on failure + */ +uint8_t *tok3_encode_names(char *blk, int len, int level, int use_arith, + int *out_len, int *last_start_p) { + int last_start = 0, i, j, nreads; + + if (len < 0) { + *out_len = 0; + return NULL; + } + + // Count lines + for (nreads = i = 0; i < len; i++) + if (blk[i] <= '\n') // \n or \0 separated entries + nreads++; + + name_context *ctx = create_context(nreads); + if (!ctx) + return NULL; + + // Construct trie + int ctr = 0; + for (i = j = 0; i < len; j=++i) { + while (i < len && blk[i] > '\n') + i++; + if (i >= len) + break; + + //blk[i] = '\0'; + last_start = i+1; + if (build_trie(ctx, &blk[j], i-j, ctr++) < 0) { + free_context(ctx); + return NULL; + } + } + if (last_start_p) + *last_start_p = last_start; + + //fprintf(stderr, "Processed %d of %d in block, line %d\n", last_start, len, ctr); + + // Encode name + for (i = j = 0; i < len; j=++i) { + while (i < len && (signed char)blk[i] >= ' ') // non-ASCII check + i++; + if (i >= len) + break; + + if (blk[i] != '\0' && blk[i] != '\n') { + // Names must be 7-bit ASCII printable + free_context(ctx); + return NULL; + } + + blk[i] = '\0'; + // try both 0 and 1 and pick best? + if (encode_name(ctx, &blk[j], i-j, 1) < 0) { + free_context(ctx); + return NULL; + } + } + +#if 0 + for (i = 0; i < ctx->max_tok*16; i++) { + char fn[1024]; + if (!ctx->desc[i].buf_l) continue; + sprintf(fn, "_tok.%02d_%02d.%d", i>>4,i&15,i); + FILE *fp = fopen(fn, "w"); + fwrite(ctx->desc[i].buf, 1, ctx->desc[i].buf_l, fp); + fclose(fp); + } +#endif + + //dump_trie(t_head, 0); + + // FIXME: merge descriptors + // + // If we see foo7:1 foo7:12 foo7:7 etc then foo: is constant, + // but it's encoded as alpha+dig<7>+char<:> instead of alpha. + // Any time token type 0 is all match beyond the first location we have + // a candidate for merging in string form. + // + // This saves around .1 to 1.3 percent on varying data sets. + // Cruder hack is dedicated prefix/suffix matching to short-cut this. + + + // Drop N_TYPE blocks if they all contain matches bar the first item, + // as we can regenerate these from the subsequent blocks types during + // decode. + for (i = 0; i < ctx->max_tok*16; i+=16) { + if (!ctx->desc[i].buf_l) continue; + + int z; + for (z=1; zdesc[i].buf_l; z++) { + if (ctx->desc[i].buf[z] != N_MATCH) + break; + } + if (z == ctx->desc[i].buf_l) { + int k; + for (k=1; k<16; k++) + if (ctx->desc[i+k].buf_l) + break; + + if (k < 16) { + ctx->desc[i].buf_l = 0; + free(ctx->desc[i].buf); + ctx->desc[i].buf = NULL; + } + } + } + + // Serialise descriptors + uint32_t tot_size = 9; + for (i = 0; i < ctx->max_tok*16; i++) { + if (!ctx->desc[i].buf_l) continue; + + int tnum = i>>4; + int ttype = i&15; + + uint64_t out_len = 1.5 * arith_compress_bound(ctx->desc[i].buf_l, 1); // guesswork + uint8_t *out = malloc(out_len); + if (!out) { + free_context(ctx); + return NULL; + } + + if (compress(ctx->desc[i].buf, ctx->desc[i].buf_l, i&0xf, level, + use_arith, out, &out_len) < 0) { + free_context(ctx); + return NULL; + } + + free(ctx->desc[i].buf); + ctx->desc[i].buf = out; + ctx->desc[i].buf_l = out_len; + ctx->desc[i].tnum = tnum; + ctx->desc[i].ttype = ttype; + + // Find dups + int j; + for (j = 0; j < i; j++) { + if (!ctx->desc[j].buf) + continue; + if (ctx->desc[i].buf_l != ctx->desc[j].buf_l || ctx->desc[i].buf_l <= 4) + continue; + if (memcmp(ctx->desc[i].buf, ctx->desc[j].buf, ctx->desc[i].buf_l) == 0) + break; + } + if (j < i) { + ctx->desc[i].dup_from = j; + tot_size += 3; // flag, dup_from, ttype + } else { + ctx->desc[i].dup_from = -1; + tot_size += out_len + 1; // ttype + } + } + +#if 0 + for (i = 0; i < ctx->max_tok*16; i++) { + char fn[1024]; + if (!ctx->desc[i].buf_l && ctx->desc[i].dup_from == -1) continue; + sprintf(fn, "_tok.%02d_%02d.%d.comp", i>>4,i&15,i); + FILE *fp = fopen(fn, "w"); + fwrite(ctx->desc[i].buf, 1, ctx->desc[i].buf_l, fp); + fclose(fp); + } +#endif + + // Write + uint8_t *out = malloc(tot_size+13); + if (!out) { + free_context(ctx); + return NULL; + } + + uint8_t *cp = out; + + *out_len = tot_size; +// *(uint32_t *)cp = last_start; cp += 4; +// *(uint32_t *)cp = nreads; cp += 4; + *cp++ = (last_start >> 0) & 0xff; + *cp++ = (last_start >> 8) & 0xff; + *cp++ = (last_start >> 16) & 0xff; + *cp++ = (last_start >> 24) & 0xff; + *cp++ = (nreads >> 0) & 0xff; + *cp++ = (nreads >> 8) & 0xff; + *cp++ = (nreads >> 16) & 0xff; + *cp++ = (nreads >> 24) & 0xff; + *cp++ = use_arith; + //write(1, &nreads, 4); + int last_tnum = -1; + for (i = 0; i < ctx->max_tok*16; i++) { + if (!ctx->desc[i].buf_l) continue; + uint8_t ttype8 = ctx->desc[i].ttype; + if (ctx->desc[i].tnum != last_tnum) { + ttype8 |= 128; + last_tnum = ctx->desc[i].tnum; + } + if (ctx->desc[i].dup_from >= 0) { + //fprintf(stderr, "Dup %d from %d, sz %d\n", i, ctx->desc[i].dup_from, ctx->desc[i].buf_l); + *cp++ = ttype8 | 64; + *cp++ = ctx->desc[i].dup_from >> 4; + *cp++ = ctx->desc[i].dup_from & 15; + } else { + *cp++ = ttype8; + memcpy(cp, ctx->desc[i].buf, ctx->desc[i].buf_l); + cp += ctx->desc[i].buf_l; + } + } + + //assert(cp-out == tot_size); + + free_context(ctx); + + return out; +} + +// Deprecated interface; to remove when we next to an ABI breakage +uint8_t *encode_names(char *blk, int len, int level, int use_arith, + int *out_len, int *last_start_p) { + return tok3_encode_names(blk, len, level, use_arith, out_len, + last_start_p); +} + +/* + * Decodes a compressed block of read names into \0 separated names. + * The size of the data returned (malloced) is in *out_len. + * + * Returns NULL on failure. + */ +uint8_t *tok3_decode_names(uint8_t *in, uint32_t sz, uint32_t *out_len) { + if (sz < 9) + return NULL; + + int i, o = 9; + //int ulen = *(uint32_t *)in; + int ulen = (in[0]<<0) | (in[1]<<8) | (in[2]<<16) | + (((uint32_t)in[3])<<24); + + if (ulen < 0 || ulen >= INT_MAX-1024) + return NULL; + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + // Speed up fuzzing by blocking excessive sizes + if (ulen > 100000) + return NULL; +#endif + + //int nreads = *(uint32_t *)(in+4); + int nreads = (in[4]<<0) | (in[5]<<8) | (in[6]<<16) | (((uint32_t)in[7])<<24); + int use_arith = in[8]; + name_context *ctx = create_context(nreads); + if (!ctx) + return NULL; + + // Unpack descriptors + int tnum = -1; + while (o < sz) { + uint8_t ttype = in[o++]; + if (ttype & 64) { + if (o+2 > sz) goto err; + int j = in[o++]<<4; + j += in[o++]; + if (ttype & 128) { + tnum++; + if (tnum >= MAX_TOKENS) + goto err; + ctx->max_tok = tnum+1; + memset(&ctx->desc[tnum<<4], 0, 16*sizeof(ctx->desc[tnum])); + } + + if ((ttype & 15) != 0 && (ttype & 128)) { + if (tnum < 0) goto err; + ctx->desc[tnum<<4].buf = malloc(nreads); + if (!ctx->desc[tnum<<4].buf) + goto err; + + ctx->desc[tnum<<4].buf_l = 0; + ctx->desc[tnum<<4].buf_a = nreads; + ctx->desc[tnum<<4].buf[0] = ttype&15; + memset(&ctx->desc[tnum<<4].buf[1], N_MATCH, nreads-1); + } + + if (tnum < 0) goto err; + i = (tnum<<4) | (ttype&15); + if (j >= i) + goto err; + if (!ctx->desc[j].buf) + goto err; // Attempt to copy a non-existent stream + + ctx->desc[i].buf_l = 0; + ctx->desc[i].buf_a = ctx->desc[j].buf_a; + if (ctx->desc[i].buf) free(ctx->desc[i].buf); + ctx->desc[i].buf = malloc(ctx->desc[i].buf_a); + if (!ctx->desc[i].buf) + goto err; + + memcpy(ctx->desc[i].buf, ctx->desc[j].buf, ctx->desc[i].buf_a); + //fprintf(stderr, "Copy ttype %d, i=%d,j=%d, size %d\n", ttype, i, j, (int)ctx->desc[i].buf_a); + continue; + } + + //if (ttype == 0) + if (ttype & 128) { + tnum++; + if (tnum >= MAX_TOKENS) + goto err; + ctx->max_tok = tnum+1; + memset(&ctx->desc[tnum<<4], 0, 16*sizeof(ctx->desc[tnum])); + } + + if ((ttype & 15) != 0 && (ttype & 128)) { + if (tnum < 0) goto err; + if (ctx->desc[tnum<<4].buf) free(ctx->desc[tnum<<4].buf); + ctx->desc[tnum<<4].buf = malloc(nreads); + if (!ctx->desc[tnum<<4].buf) + goto err; + ctx->desc[tnum<<4].buf_l = 0; + ctx->desc[tnum<<4].buf_a = nreads; + ctx->desc[tnum<<4].buf[0] = ttype&15; + memset(&ctx->desc[tnum<<4].buf[1], N_MATCH, nreads-1); + } + + //fprintf(stderr, "Read %02x\n", c); + + // Load compressed block + int64_t clen, ulen = uncompressed_size(&in[o], sz-o); + if (ulen < 0 || ulen >= INT_MAX) + goto err; + if (tnum < 0) goto err; + i = (tnum<<4) | (ttype&15); + + if (i >= MAX_TBLOCKS || i < 0) + goto err; + + ctx->desc[i].buf_l = 0; + if (ctx->desc[i].buf) free(ctx->desc[i].buf); + ctx->desc[i].buf = malloc(ulen); + if (!ctx->desc[i].buf) + goto err; + + ctx->desc[i].buf_a = ulen; + uint64_t usz = ctx->desc[i].buf_a; // convert from size_t for 32-bit sys + clen = uncompress(use_arith, &in[o], sz-o, ctx->desc[i].buf, &usz); + ctx->desc[i].buf_a = usz; + if (clen < 0 || ctx->desc[i].buf_a != ulen) + goto err; + + // fprintf(stderr, "%d: Decode tnum %d type %d clen %d ulen %d via %d\n", + // o, tnum, ttype, (int)clen, (int)ctx->desc[i].buf_a, ctx->desc[i].buf[0]); + + o += clen; + + // Encode tnum 0 type 0 ulen 100000 clen 12530 via 2 + // Encode tnum 0 type 6 ulen 196800 clen 43928 via 3 + // Encode tnum 0 type 7 ulen 203200 clen 17531 via 3 + // Encode tnum 1 type 0 ulen 50800 clen 10 via 1 + // Encode tnum 1 type 1 ulen 3 clen 5 via 0 + // Encode tnum 2 type 0 ulen 50800 clen 10 via 1 + // + } + + int ret; + ulen += 1024; // for easy coding in decode_name. + uint8_t *out = malloc(ulen); + if (!out) + goto err; + + size_t out_sz = 0; + while ((ret = decode_name(ctx, (char *)out+out_sz, ulen)) > 0) { + out_sz += ret; + ulen -= ret; + } + + if (ret < 0) + free(out); + + free_context(ctx); + + *out_len = out_sz; + return ret == 0 ? out : NULL; + + err: + free_context(ctx); + return NULL; +} + +// Deprecated interface; to remove when we next to an ABI breakage +uint8_t *decode_names(uint8_t *in, uint32_t sz, uint32_t *out_len) { + return tok3_decode_names(in, sz, out_len); +} diff --git a/src/htslib-1.18/htscodecs/htscodecs/tokenise_name3.h b/src/htslib-1.21/htscodecs/htscodecs/tokenise_name3.h similarity index 100% rename from src/htslib-1.18/htscodecs/htscodecs/tokenise_name3.h rename to src/htslib-1.21/htscodecs/htscodecs/tokenise_name3.h diff --git a/src/htslib-1.18/htscodecs/htscodecs/utils.c b/src/htslib-1.21/htscodecs/htscodecs/utils.c similarity index 100% rename from src/htslib-1.18/htscodecs/htscodecs/utils.c rename to src/htslib-1.21/htscodecs/htscodecs/utils.c diff --git a/src/htslib-1.18/htscodecs/htscodecs/utils.h b/src/htslib-1.21/htscodecs/htscodecs/utils.h similarity index 100% rename from src/htslib-1.18/htscodecs/htscodecs/utils.h rename to src/htslib-1.21/htscodecs/htscodecs/utils.h diff --git a/src/htslib-1.19.1/htscodecs/htscodecs/varint.h b/src/htslib-1.21/htscodecs/htscodecs/varint.h similarity index 100% rename from src/htslib-1.19.1/htscodecs/htscodecs/varint.h rename to src/htslib-1.21/htscodecs/htscodecs/varint.h diff --git a/src/htslib-1.18/htscodecs/htscodecs/varint2.h b/src/htslib-1.21/htscodecs/htscodecs/varint2.h similarity index 100% rename from src/htslib-1.18/htscodecs/htscodecs/varint2.h rename to src/htslib-1.21/htscodecs/htscodecs/varint2.h diff --git a/src/htslib-1.21/htscodecs/htscodecs/version.h b/src/htslib-1.21/htscodecs/htscodecs/version.h new file mode 100644 index 0000000..048dcab --- /dev/null +++ b/src/htslib-1.21/htscodecs/htscodecs/version.h @@ -0,0 +1 @@ +#define HTSCODECS_VERSION_TEXT "1.6.1" diff --git a/src/htslib-1.18/htscodecs_bundled.mk b/src/htslib-1.21/htscodecs_bundled.mk similarity index 100% rename from src/htslib-1.18/htscodecs_bundled.mk rename to src/htslib-1.21/htscodecs_bundled.mk diff --git a/src/htslib-1.18/htscodecs_external.mk b/src/htslib-1.21/htscodecs_external.mk similarity index 100% rename from src/htslib-1.18/htscodecs_external.mk rename to src/htslib-1.21/htscodecs_external.mk diff --git a/src/htslib-1.21/htsfile.1 b/src/htslib-1.21/htsfile.1 new file mode 100644 index 0000000..e22fdbc --- /dev/null +++ b/src/htslib-1.21/htsfile.1 @@ -0,0 +1,94 @@ +.TH htsfile 1 "12 September 2024" "htslib-1.21" "Bioinformatics tools" +.SH NAME +htsfile \- identify high-throughput sequencing data files +.\" +.\" Copyright (C) 2015, 2017-2018 Genome Research Ltd. +.\" +.\" Author: John Marshall +.\" +.\" Permission is hereby granted, free of charge, to any person obtaining a +.\" copy of this software and associated documentation files (the "Software"), +.\" to deal in the Software without restriction, including without limitation +.\" the rights to use, copy, modify, merge, publish, distribute, sublicense, +.\" and/or sell copies of the Software, and to permit persons to whom the +.\" Software is furnished to do so, subject to the following conditions: +.\" +.\" The above copyright notice and this permission notice shall be included in +.\" all copies or substantial portions of the Software. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +.\" IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +.\" THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +.\" LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +.\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +.\" DEALINGS IN THE SOFTWARE. +.\" +.SH SYNOPSIS +.B htsfile +.RB [ -chHv ] +.IR FILE ... +.br +.B htsfile --copy +.RB [ -v ] +.I FILE DESTFILE +.SH DESCRIPTION +The \fBhtsfile\fR utility attempts to identify what kind of high-throughput +sequencing data files the specified files are, and provides minimal viewing +capabilities for some kinds of data file. +.P +It can identify sequencing data files such as SAM, BAM, and CRAM; +variant calling data files such as VCF and BCF; +index files used to index these data files; +and compressed versions of many of them. +.P +For each \fIFILE\fR given, \fBhtsfile\fP prints a description of the file +format determined, using similar keyword conventions to \fBfile\fP(1): +"text" indicates a textual file that can probably be viewed on a terminal; +"data" indicates binary data; +"sequence", "variant calling", and "index" indicate different categories of +data file. +When it can be identified, the name of the particular file format (such as +"BAM" or "VCF") is printed at the start of the description. +.P +When used to view file contents as text, \fBhtsfile\fP can optionally show +only headers or only data records, but has no other filtering capabilities. +Use \fBsamtools\fR or \fBbcftools\fR if you need more extensive viewing or +filtering capabilities. +.P +Alternatively, when \fB--copy\fR is used, \fBhtsfile\fR takes exactly two +arguments and performs a byte-for-byte copy from \fIFILE\fR to \fIDESTFILE\fR. +This is similar to \fBcp\fR(1), but HTSlib's remote file access facilities +are available for both source and destination. +.P +The following options are accepted: +.TP 4n +.BR -c ", " --view +Instead of identifying the specified files, display a textual representation +of their contents on standard output. +.IP +By default, \fB--view\fR refuses to display files in unknown formats. +When \fB--verbose\fR is also given, the raw contents of such files are +displayed, with non-printable characters shown via C-style "\\x" hexadecimal +escape sequences. +.TP +.BR -C ", " --copy +Instead of identifying or displaying the specified files, copy the source +\fIFILE\fR to the destination \fIDESTFILE\fR. +Only \fB--verbose\fR may be used in conjunction with \fB--copy\fR. +.TP +.BR -h ", " --header-only +Display data file headers only. +Implies \fB--view\fR. +.TP +.BR -H ", " --no-header +When viewing files, display data records only. +.TP +.BR -v ", " --verbose +Display additional warnings and diagnostic messages. +Using \fB--verbose\fR repeatedly further raises the verbosity. +.PP +.SH SEE ALSO +.IR bcftools (1), +.IR file (1), +.IR samtools (1) diff --git a/src/htslib-1.21/htsfile.c b/src/htslib-1.21/htsfile.c new file mode 100644 index 0000000..25af3f5 --- /dev/null +++ b/src/htslib-1.21/htsfile.c @@ -0,0 +1,324 @@ +/* htsfile.c -- file identifier and minimal viewer. + + Copyright (C) 2014-2019 Genome Research Ltd. + + Author: John Marshall + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "htslib/hfile.h" +#include "htslib/hts.h" +#include "htslib/sam.h" +#include "htslib/vcf.h" + +#ifndef EFTYPE +#define EFTYPE ENOEXEC +#endif + +enum { identify, view_headers, view_all, copy } mode = identify; +int show_headers = 1; +int verbose = 0; +int status = EXIT_SUCCESS; /* Exit status from main */ + +void HTS_FORMAT(HTS_PRINTF_FMT, 1, 2) error(const char *format, ...) +{ + int err = errno; + va_list args; + va_start(args, format); + fflush(stdout); + fprintf(stderr, "htsfile: "); + vfprintf(stderr, format, args); + if (err) fprintf(stderr, ": %s\n", strerror(err)); + else fprintf(stderr, "\n"); + fflush(stderr); + va_end(args); + status = EXIT_FAILURE; +} + +static void view_sam(samFile *in, const char *filename) +{ + bam1_t *b = NULL; + sam_hdr_t *hdr = NULL; + samFile *out = NULL; + + hdr = sam_hdr_read(in); + if (hdr == NULL) { + errno = 0; error("reading headers from \"%s\" failed", filename); + goto clean; + } + + out = hts_open("-", "w"); + if (out == NULL) { error("reopening standard output failed"); goto clean; } + + if (show_headers) { + if (sam_hdr_write(out, hdr) != 0) { + error("writing headers to standard output failed"); + goto clean; + } + } + + if (mode == view_all) { + int ret; + + b = bam_init1(); + if (b == NULL) { error("can't create record"); goto clean; } + + while ((ret = sam_read1(in, hdr, b)) >= 0) { + if (sam_write1(out, hdr, b) < 0) { + error("writing to standard output failed"); + goto clean; + } + } + + if (ret < -1) { error("reading \"%s\" failed", filename); goto clean; } + } + + clean: + sam_hdr_destroy(hdr); + bam_destroy1(b); + if (out) hts_close(out); +} + +static void view_vcf(vcfFile *in, const char *filename) +{ + bcf1_t *rec = NULL; + bcf_hdr_t *hdr = NULL; + vcfFile *out = NULL; + + hdr = bcf_hdr_read(in); + if (hdr == NULL) { + errno = 0; error("reading headers from \"%s\" failed", filename); + goto clean; + } + + out = hts_open("-", "w"); + if (out == NULL) { error("reopening standard output failed"); goto clean; } + + if (show_headers) { + if (bcf_hdr_write(out, hdr) != 0) { + error("writing headers to standard output failed"); + goto clean; + } + } + + if (mode == view_all) { + int ret; + + rec = bcf_init(); + if (rec == NULL) { error("can't create record"); goto clean; } + + while ((ret = bcf_read(in, hdr, rec)) >= 0) { + if (bcf_write(out, hdr, rec) < 0) { + error("writing to standard output failed"); + goto clean; + } + } + + if (ret < -1) { error("reading \"%s\" failed", filename); goto clean; } + } + + clean: + if (hdr) bcf_hdr_destroy(hdr); + if (rec) bcf_destroy(rec); + if (out) hts_close(out); +} + +static void view_raw(hFILE *fp, const char *filename) +{ + int c, prev; + for (prev = '\n'; (c = hgetc(fp)) != EOF; prev = c) + if (isprint(c) || c == '\n' || c == '\t') putchar(c); + else if (c == '\r') fputs("\\r", stdout); + else if (c == '\0') fputs("\\0", stdout); + else printf("\\x%02x", c); + + if (prev != '\n') putchar('\n'); + + if (herrno(fp)) { + errno = herrno(fp); + error("reading \"%s\" failed", filename); + } +} + +static void copy_raw(const char *srcfilename, const char *destfilename) +{ + hFILE *src = hopen(srcfilename, "r"); + if (src == NULL) { + error("can't open \"%s\"", srcfilename); + return; + } + + size_t bufsize = 1048576; + char *buffer = malloc(bufsize); + if (buffer == NULL) { + error("can't allocate copy buffer"); + hclose_abruptly(src); + return; + } + + hFILE *dest = hopen(destfilename, "w"); + if (dest == NULL) { + error("can't create \"%s\"", destfilename); + hclose_abruptly(src); + free(buffer); + return; + } + + ssize_t n; + while ((n = hread(src, buffer, bufsize)) > 0) + if (hwrite(dest, buffer, n) != n) { + error("writing to \"%s\" failed", destfilename); + hclose_abruptly(dest); + dest = NULL; + break; + } + + if (n < 0) { + error("reading from \"%s\" failed", srcfilename); + hclose_abruptly(src); + src = NULL; + } + + if (dest && hclose(dest) < 0) error("closing \"%s\" failed", destfilename); + if (src && hclose(src) < 0) error("closing \"%s\" failed", srcfilename); + free(buffer); +} + +static void usage(FILE *fp, int status) +{ + fprintf(fp, +"Usage: htsfile [-chHv] FILE...\n" +" htsfile --copy [-v] FILE DESTFILE\n" +"Options:\n" +" -c, --view Write textual form of FILEs to standard output\n" +" -C, --copy Copy the exact contents of FILE to DESTFILE\n" +" -h, --header-only Display only headers in view mode, not records\n" +" -H, --no-header Suppress header display in view mode\n" +" -v, --verbose Increase verbosity of warnings and diagnostics\n"); + exit(status); +} + +int main(int argc, char **argv) +{ + static const struct option options[] = { + { "copy", no_argument, NULL, 'C' }, + { "header-only", no_argument, NULL, 'h' }, + { "no-header", no_argument, NULL, 'H' }, + { "view", no_argument, NULL, 'c' }, + { "verbose", no_argument, NULL, 'v' }, + { "help", no_argument, NULL, 2 }, + { "version", no_argument, NULL, 1 }, + { NULL, 0, NULL, 0 } + }; + + int c, i; + + status = EXIT_SUCCESS; + while ((c = getopt_long(argc, argv, "cChHv", options, NULL)) >= 0) + switch (c) { + case 'c': mode = view_all; break; + case 'C': mode = copy; break; + case 'h': mode = view_headers; show_headers = 1; break; + case 'H': show_headers = 0; break; + case 'v': hts_verbose++; verbose++; break; + case 1: + printf( +"htsfile (htslib) %s\n" +"Copyright (C) 2024 Genome Research Ltd.\n", + hts_version()); + exit(EXIT_SUCCESS); + break; + case 2: usage(stdout, EXIT_SUCCESS); break; + default: usage(stderr, EXIT_FAILURE); break; + } + + if (optind == argc) usage(stderr, EXIT_FAILURE); + + if (mode == copy) { + if (optind + 2 != argc) usage(stderr, EXIT_FAILURE); + copy_raw(argv[optind], argv[optind + 1]); + return status; + } + + for (i = optind; i < argc; i++) { + hFILE *fp = hopen(argv[i], "r"); + if (fp == NULL) { + error("can't open \"%s\"", argv[i]); + continue; + } + + if (mode == identify) { + htsFormat fmt; + if (hts_detect_format2(fp, argv[i], &fmt) < 0) { + error("detecting \"%s\" format failed", argv[i]); + hclose_abruptly(fp); + continue; + } + + char *description = hts_format_description(&fmt); + printf("%s:\t%s\n", argv[i], description); + free(description); + } + else { + htsFile *hts = hts_hopen(fp, argv[i], "r"); + if (hts) { + switch (hts_get_format(hts)->category) { + case sequence_data: + view_sam(hts, argv[i]); + break; + case variant_data: + view_vcf(hts, argv[i]); + break; + default: + if (verbose) + view_raw(fp, argv[i]); + else { + errno = 0; + error("can't view \"%s\": unknown format", argv[i]); + } + break; + } + + if (hts_close(hts) < 0) error("closing \"%s\" failed", argv[i]); + fp = NULL; + } + else if ((errno == EFTYPE || errno == ENOEXEC) && verbose) + view_raw(fp, argv[i]); + else + error("can't view \"%s\"", argv[i]); + } + + if (fp && hclose(fp) < 0) error("closing \"%s\" failed", argv[i]); + } + + if (fclose(stdout) != 0 && errno != EBADF) + error("closing standard output failed"); + + return status; +} diff --git a/src/htslib-1.21/htslib-s3-plugin.7 b/src/htslib-1.21/htslib-s3-plugin.7 new file mode 100644 index 0000000..44de657 --- /dev/null +++ b/src/htslib-1.21/htslib-s3-plugin.7 @@ -0,0 +1,215 @@ +.TH htslib-s3-plugin 7 "12 September 2024" "htslib-1.21" "Bioinformatics tools" +.SH NAME +htslib-s3-plugin \- htslib AWS S3 plugin +.\" +.\" Copyright (C) 2021-2022 Genome Research Ltd. +.\" +.\" Author: Andrew Whitwham +.\" +.\" Permission is hereby granted, free of charge, to any person obtaining a +.\" copy of this software and associated documentation files (the "Software"), +.\" to deal in the Software without restriction, including without limitation +.\" the rights to use, copy, modify, merge, publish, distribute, sublicense, +.\" and/or sell copies of the Software, and to permit persons to whom the +.\" Software is furnished to do so, subject to the following conditions: +.\" +.\" The above copyright notice and this permission notice shall be included in +.\" all copies or substantial portions of the Software. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +.\" IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +.\" THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +.\" LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +.\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +.\" DEALINGS IN THE SOFTWARE. +.\" +. +.\" For code blocks and examples (cf groff's Ultrix-specific man macros) +.de EX + +. in +\\$1 +. nf +. ft CR +.. +.de EE +. ft +. fi +. in + +.. + +.SH DESCRIPTION +The S3 plugin allows htslib file functions to communicate with servers that use +the AWS S3 protocol. Files are identified by their bucket and object key in a +URL format e.g. + +.B s3://mybucket/path/to/file + +With \fIpath/to/file\fR being the object key. + +Necessary security information can be provided in as part of the URL, in +environment variables or from configuration files. + +The full URL format is: + +.B s3[+SCHEME]://[ID[:SECRET[:TOKEN]]@]BUCKET/PATH + +The elements are: +.TP +.I SCHEME +The protocol used. Defaults to \fIhttps\fR. +.TP +.I ID +The user AWS access key. +.TP +.I SECRET +The secret key for use with the access key. +.TP +.I TOKEN +Token used for temporary security credentials. +.TP +.I BUCKET +AWS S3 bucket. +.TP +.I PATH +Path to the object under the bucket. +.LP + +The environment variables below will be used if the user ID is not set. +.TP +.B AWS_ACCESS_KEY_ID +The user AWS access key. +.TP +.B AWS_SECRET_ACCESS_KEY +The secret key for use with the access key. +.TP +.B AWS_DEFAULT_REGION +The region to use. Defaults to +.IR us-east-1 . +.TP +.B AWS_SESSION_TOKEN +Token used for temporary security credentials. +.TP +.B AWS_DEFAULT_PROFILE +The profile to use in \fIcredentials\fR, \fIconfig\fR or \fIs3cfg\fR files. +Defaults to +.IR default . +.TP +.B AWS_PROFILE +Same as above. +.TP +.B AWS_SHARED_CREDENTIALS_FILE +Location of the credentials file. Defaults to +.IR ~/.aws/credentials . +.TP +.B HTS_S3_S3CFG +Location of the s3cfg file. Defaults to +.IR ~/.s3cfg . +.TP +.B HTS_S3_HOST +Sets the host. Defaults to +.IR s3.amazonaws.com . +.TP +.B HTS_S3_V2 +If set use signature v2 rather the default v4. This will limit the plugin to +reading only. +.TP +.B HTS_S3_PART_SIZE +Sets the upload part size in Mb, the minimum being 5Mb. +By default the part size starts at 5Mb and expands at regular intervals to +accommodate bigger files (up to 2.5 Tbytes with the current rate). +Using this setting disables the automatic part size expansion. +.TP +.B HTS_S3_ADDRESS_STYLE +Sets the URL style. Options are auto (default), virtual or path. +.LP +In the absence of an ID from the previous two methods the credential/config +files will be used. The default file locations are either +\fI~/.aws/credentials\fR or \fI~/.s3cfg\fR (in that order). + +Entries used in aws style credentials file are aws_access_key_id, +aws_secret_access_key, aws_session_token, region, addressing_style and +expiry_time (unofficial, see SHORT-LIVED CREDENTIALS below). +Only the first two are usually needed. + +Entries used in s3cmd style config files are access_key, secret_key, +access_token, host_base, bucket_location and host_bucket. Again only the first +two are usually needed. The host_bucket option is only used to set a path-style +URL, see below. + +.SH SHORT-LIVED CREDENTIALS + +Some cloud identity and access management (IAM) systems can make short-lived +credentials that allow access to resources. +These credentials will expire after a time and need to be renewed to +give continued access. +To enable this, the S3 plugin allows an \fIexpiry_time\fR entry to be set in the +\fI.aws/credentials\fR file. +The value for this entry should be the time when the token expires, +following the format in RFC3339 section 5.6, which takes the form: + + 2012-04-29T05:20:48Z + +That is, year - month - day, the letter "T", hour : minute : second. +The time can be followed by the letter "Z", indicating the UTC timezone, +or an offset from UTC which is a "+" or "-" sign followed by two digits for +the hours offset, ":", and two digits for the minutes. + +The S3 plugin will attempt to re-read the credentials file up to 1 minute +before the given expiry time, which means the file needs to be updated with +new credentials before then. +As the exact way of doing this can vary between services and IAM providers, +the S3 plugin expects this to be done by an external user-supplied process. +This may be achieved by running a program that replaces the file as new +credentials become available. +The following script shows how it might be done for AWS instance credentials: +.EX 2 +#!/bin/sh +instance='http://169.254.169.254' +tok_url="$instance/latest/api/token" +ttl_hdr='X-aws-ec2-metadata-token-ttl-seconds: 10' +creds_url="$instance/latest/meta-data/iam/security-credentials" +key1='aws_access_key_id = \(rs(.AccessKeyId)\(rsn' +key2='aws_secret_access_key = \(rs(.SecretAccessKey)\(rsn' +key3='aws_session_token = \(rs(.Token)\(rsn' +key4='expiry_time = \(rs(.Expiration)\(rsn' +while true; do + token=`curl -X PUT -H "$ttl_hdr" "$tok_url"` + tok_hdr="X-aws-ec2-metadata-token: $token" + role=`curl -H "$tok_hdr" "$creds_url/"` + expires='now' + ( curl -H "$tok_hdr" "$creds_url/$role" \(rs + | jq -r "\(rs"${key1}${key2}${key3}${key4}\(rs"" > credentials.new ) \(rs + && mv -f credentials.new credentials \(rs + && expires=`grep expiry_time credentials | cut -d ' ' -f 3-` + if test $? -ne 0 ; then break ; fi + expiry=`date -d "$expires - 3 minutes" '+%s'` + now=`date '+%s'` + test "$expiry" -gt "$now" && sleep $((($expiry - $now) / 2)) + sleep 30 +done +.EE + +Note that the \fIexpiry_time\fR key is currently only supported for the +\fI.aws/credentials\fR file (or the file referred to in the +.B AWS_SHARED_CREDENTIALS_FILE +environment variable). + +.SH NOTES +In most cases this plugin transforms the given URL into a virtual host-style +format e.g. \fIhttps://bucket.host/path/to/file\fR. A path-style format is used +where the URL is not DNS compliant or the bucket name contains a dot e.g. +\fIhttps://host/bu.cket/path/to/file\fR. + +Path-style can be forced by setting one either HTS_S3_ADDRESS_STYLE, +addressing_style or host_bucket. The first two can be set to \fBpath\fR while +host_bucket must \fBnot\fR include the \fB%(bucket).s\fR string. + +.SH "SEE ALSO" +.IR htsfile (1) +.IR samtools (1) +.PP +RFC 3339: +.PP +htslib website: diff --git a/src/htslib-1.21/htslib.map b/src/htslib-1.21/htslib.map new file mode 100644 index 0000000..52ad738 --- /dev/null +++ b/src/htslib-1.21/htslib.map @@ -0,0 +1,652 @@ +HTSLIB_1.0 { + bam_aux2A; + bam_aux2Z; + bam_aux2f; + bam_aux2i; + bam_aux_append; + bam_aux_del; + bam_aux_get; + bam_cigar2qlen; + bam_cigar2rlen; + bam_copy1; + bam_destroy1; + bam_dup1; + bam_endpos; + bam_flag2str; + bam_hdr_read; + bam_hdr_write; + bam_init1; + bam_mplp_auto; + bam_mplp_destroy; + bam_mplp_init; + bam_mplp_init_overlaps; + bam_mplp_set_maxcnt; + bam_plp_auto; + bam_plp_destroy; + bam_plp_init; + bam_plp_next; + bam_plp_push; + bam_plp_reset; + bam_plp_set_maxcnt; + bam_read1; + bam_str2flag; + bam_write1; + bcf_add_filter; + bcf_calc_ac; + bcf_clear; + bcf_destroy; + bcf_dup; + bcf_enc_vchar; + bcf_enc_vfloat; + bcf_enc_vint; + bcf_float_missing; + bcf_float_vector_end; + bcf_fmt_array; + bcf_fmt_sized_array; + bcf_get_fmt; + bcf_get_format_string; + bcf_get_format_values; + bcf_get_info; + bcf_get_info_values; + bcf_get_variant_type; + bcf_get_variant_types; + bcf_gt_type; + bcf_has_filter; + bcf_hdr_add_hrec; + bcf_hdr_add_sample; + bcf_hdr_append; + bcf_hdr_combine; + bcf_hdr_destroy; + bcf_hdr_dup; + bcf_hdr_fmt_text; + bcf_hdr_get_hrec; + bcf_hdr_get_version; + bcf_hdr_id2int; + bcf_hdr_init; + bcf_hdr_parse; + bcf_hdr_parse_line; + bcf_hdr_printf; + bcf_hdr_read; + bcf_hdr_remove; + bcf_hdr_seqnames; + bcf_hdr_set; + bcf_hdr_set_samples; + bcf_hdr_set_version; + bcf_hdr_subset; + bcf_hdr_sync; + bcf_hdr_write; + bcf_hrec_add_key; + bcf_hrec_destroy; + bcf_hrec_dup; + bcf_hrec_find_key; + bcf_hrec_format; + bcf_hrec_set_val; + bcf_index_build; + bcf_init; + bcf_is_snp; + bcf_read; + bcf_readrec; + bcf_remove_alleles; + bcf_remove_filter; + bcf_sr_add_reader; + bcf_sr_destroy; + bcf_sr_init; + bcf_sr_next_line; + bcf_sr_regions_destroy; + bcf_sr_regions_flush; + bcf_sr_regions_init; + bcf_sr_regions_next; + bcf_sr_regions_overlap; + bcf_sr_regions_seek; + bcf_sr_remove_reader; + bcf_sr_seek; + bcf_sr_set_regions; + bcf_sr_set_samples; + bcf_sr_set_targets; + bcf_subset; + bcf_subset_format; + bcf_sweep_bwd; + bcf_sweep_destroy; + bcf_sweep_fwd; + bcf_sweep_hdr; + bcf_sweep_init; + bcf_translate; + bcf_trim_alleles; + bcf_type_shift; + bcf_unpack; + bcf_update_alleles; + bcf_update_alleles_str; + bcf_update_filter; + bcf_update_format; + bcf_update_format_string; + bcf_update_id; + bcf_update_info; + bcf_write; + bgzf_check_EOF; + bgzf_close; + bgzf_dopen; + bgzf_flush; + bgzf_flush_try; + bgzf_getc; + bgzf_getline; + bgzf_hopen; + bgzf_index_build_init; + bgzf_index_dump; + bgzf_index_load; + bgzf_is_bgzf; + bgzf_mt; + bgzf_open; + bgzf_raw_read; + bgzf_raw_write; + bgzf_read; + bgzf_read_block; + bgzf_seek; + bgzf_set_cache_size; + bgzf_useek; + bgzf_utell; + bgzf_write; + cram_close; + cram_compress_block; + cram_dopen; + cram_eof; + cram_flush; + cram_free_block; + cram_free_container; + cram_new_block; + cram_new_container; + cram_open; + cram_read_block; + cram_read_container; + cram_seek; + cram_set_header; + cram_set_option; + cram_set_voption; + cram_uncompress_block; + cram_write_block; + cram_write_container; + fai_build; + fai_destroy; + fai_fetch; + fai_load; + faidx_fetch_nseq; + faidx_fetch_seq; + faidx_has_seq; + hclose; + hclose_abruptly; + hdopen; + hfile_destroy; + hfile_init; + hfile_oflags; + hflush; + hgetc2; + hopen; + hpeek; + hputc2; + hputs2; + hread2; + hrec_add_idx; + hseek; + hts_close; + hts_file_type; + hts_get_bgzfp; + hts_getline; + hts_idx_destroy; + hts_idx_finish; + hts_idx_get_meta; + hts_idx_get_n_no_coor; + hts_idx_get_stat; + hts_idx_init; + hts_idx_load; + hts_idx_push; + hts_idx_save; + hts_idx_seqnames; + hts_idx_set_meta; + hts_itr_destroy; + hts_itr_next; + hts_itr_query; + hts_itr_querys; + hts_open; + hts_parse_reg; + hts_readlines; + hts_readlist; + hts_set_fai_filename; + hts_set_threads; + hts_verbose; + hts_version; + hwrite2; + kf_betai; + kf_erfc; + kf_gammap; + kf_gammaq; + kf_lgamma; + kmemmem; + knet_close; + knet_dopen; + knet_open; + knet_read; + knet_seek; + ksplit_core; + ksprintf; + kstrnstr; + kstrstr; + kstrtok; + kt_fisher_exact; + kvsprintf; + sam_format1; + sam_hdr_add_lines; + sam_hdr_dup; + sam_hdr_incr_ref; + sam_hdr_length; + sam_hdr_parse; + sam_hdr_read; + sam_hdr_str; + sam_hdr_write; + sam_index_load; + sam_itr_queryi; + sam_itr_querys; + sam_open_mode; + sam_parse1; + sam_read1; + sam_write1; + seq_nt16_str; + seq_nt16_table; + stringify_argv; + tbx_conf_bed; + tbx_conf_gff; + tbx_conf_psltbl; + tbx_conf_sam; + tbx_conf_vcf; + tbx_destroy; + tbx_index; + tbx_index_build; + tbx_index_load; + tbx_name2id; + tbx_readrec; + tbx_seqnames; + vcf_format; + vcf_hdr_read; + vcf_hdr_write; + vcf_parse; + vcf_read; + vcf_write; + vcf_write_line; +}; + +HTSLIB_1.1 { + bcf_get_fmt_id; + bcf_get_info_id; + faidx_iseq; + faidx_nseq; + faidx_seq_len; +} HTSLIB_1.0; + + +HTSLIB_1.2.1 { + bcf_copy; + bcf_sr_strerror; + hisremote; + hts_detect_format; + hts_format_description; + hts_get_format; + hts_hopen; + hts_set_opt; + regidx_destroy; + regidx_init; + regidx_insert; + regidx_nregs; + regidx_overlap; + regidx_parse_bed; + regidx_parse_tab; + regidx_seq_names; + regidx_seq_nregs; + seq_nt16_int; +} HTSLIB_1.1; + +HTSLIB_1.3 { + bcf_add_id; + bcf_empty; + bcf_hdr_merge; + bcf_index_build2; + bcf_index_load2; + bcf_remove_allele_set; + bgzf_compress; + cram_block_append; + cram_block_get_comp_size; + cram_block_get_content_id; + cram_block_get_content_type; + cram_block_get_crc32; + cram_block_get_data; + cram_block_get_offset; + cram_block_get_uncomp_size; + cram_block_set_comp_size; + cram_block_set_content_id; + cram_block_set_crc32; + cram_block_set_data; + cram_block_set_offset; + cram_block_set_uncomp_size; + cram_block_size; + cram_block_update_size; + cram_container_get_landmarks; + cram_container_get_length; + cram_container_get_num_blocks; + cram_container_is_empty; + cram_container_set_landmarks; + cram_container_set_length; + cram_container_set_num_blocks; + cram_container_size; + cram_copy_slice; + cram_fd_get_fp; + cram_fd_get_header; + cram_fd_get_version; + cram_fd_set_fp; + cram_fd_set_header; + cram_fd_set_version; + cram_major_vers; + cram_minor_vers; + cram_store_container; + cram_transcode_rg; + hfile_add_scheme_handler; + hfile_always_local; + hfile_always_remote; + hts_format_file_extension; + hts_idx_load2; + hts_idx_save_as; + hts_md5_destroy; + hts_md5_final; + hts_md5_hex; + hts_md5_init; + hts_md5_reset; + hts_md5_update; + hts_open_format; + hts_opt_add; + hts_opt_apply; + hts_opt_free; + hts_parse_decimal; + hts_parse_format; + hts_parse_opt_list; + int32_put_blk; + kgetline; + sam_index_build; + sam_index_build2; + sam_index_load2; + sam_open_mode_opts; + tbx_index_build2; + tbx_index_load2; +} HTSLIB_1.2.1; + +HTSLIB_1.4 { + bam_auxB2f; + bam_auxB2i; + bam_auxB_len; + bam_aux_update_str; + bam_mplp_constructor; + bam_mplp_destructor; + bam_mplp_reset; + bam_plp_constructor; + bam_plp_destructor; + bcf_hdr_format; + bcf_index_build3; + bcf_sr_destroy_threads; + bcf_sr_set_opt; + bcf_sr_set_threads; + bgzf_block_write; + bgzf_compression; + bgzf_index_dump_hfile; + bgzf_index_load_hfile; + bgzf_thread_pool; + cram_check_EOF; + cram_get_refs; + errmod_cal; + errmod_destroy; + errmod_init; + fai_build3; + fai_load3; + hgetdelim; + hgets; + hts_check_EOF; + hts_json_fnext; + hts_json_fskip_value; + hts_json_snext; + hts_json_sskip_value; + hts_realloc_or_die; + hts_set_cache_size; + hts_set_thread_pool; + hts_tpool_delete_result; + hts_tpool_destroy; + hts_tpool_dispatch; + hts_tpool_dispatch2; + hts_tpool_init; + hts_tpool_kill; + hts_tpool_next_result; + hts_tpool_next_result_wait; + hts_tpool_process_attach; + hts_tpool_process_destroy; + hts_tpool_process_detach; + hts_tpool_process_empty; + hts_tpool_process_flush; + hts_tpool_process_init; + hts_tpool_process_len; + hts_tpool_process_qsize; + hts_tpool_process_ref_decr; + hts_tpool_process_ref_incr; + hts_tpool_process_reset; + hts_tpool_process_shutdown; + hts_tpool_process_sz; + hts_tpool_result_data; + hts_tpool_size; + hts_tpool_wake_dispatch; + kputd; + probaln_glocal; + sam_cap_mapq; + sam_index_build3; + sam_prob_realn; + tbx_index_build3; +} HTSLIB_1.3; + +HTSLIB_1.5 { + hfile_set_blksize; + hts_get_log_level; + hts_log; + hts_set_log_level; +} HTSLIB_1.4; + +HTSLIB_1.6 { + hts_drand48; + hts_erand48; + hts_lrand48; + hts_srand48; +} HTSLIB_1.5; + +HTSLIB_1.7 { + hfile_mem_get_buffer; + hfile_mem_steal_buffer; + hts_itr_multi_bam; + hts_itr_multi_cram; + hts_itr_multi_next; + hts_itr_regions; + hts_json_alloc_token; + hts_json_free_token; + hts_json_token_str; + hts_json_token_type; + hts_reglist_free; + sam_hdr_change_HD; + sam_itr_regions; +} HTSLIB_1.6; + +HTSLIB_1.9 { + bam_aux_update_array; + bam_aux_update_float; + bam_aux_update_int; + fai_fetchqual; + fai_load3_format; + fai_load_format; + faidx_fetch_qual; +} HTSLIB_1.7; + +HTSLIB_1.10 { + bam_cigar_table; + bam_mplp64_auto; + bam_plp64_auto; + bam_plp64_next; + bam_plp_insertion; + bam_set_qname; + bcf_idx_init; + bcf_idx_save; + bcf_index_load3; + bgzf_peek; + fai_fetch64; + fai_fetchqual64; + fai_parse_region; + fai_set_cache_size; + faidx_fetch_qual64; + faidx_fetch_seq64; + haddextension; + hts_free; + hts_idx_fmt; + hts_idx_load3; + hts_idx_tbi_name; + hts_parse_reg64; + hts_parse_region; + hts_reglist_create; + hts_resize_array_; + hts_tpool_dispatch3; + kgetline2; + regidx_init_string; + regidx_insert_list; + regidx_parse_reg; + regidx_parse_vcf; + regidx_push; + regitr_copy; + regitr_destroy; + regitr_init; + regitr_loop; + regitr_overlap; + regitr_reset; + sam_hdr_add_line; + sam_hdr_add_pg; + sam_hdr_count_lines; + sam_hdr_destroy; + sam_hdr_find_line_id; + sam_hdr_find_line_pos; + sam_hdr_find_tag_id; + sam_hdr_find_tag_pos; + sam_hdr_init; + sam_hdr_line_index; + sam_hdr_line_name; + sam_hdr_name2tid; + sam_hdr_nref; + sam_hdr_pg_id; + sam_hdr_remove_except; + sam_hdr_remove_line_id; + sam_hdr_remove_line_pos; + sam_hdr_remove_lines; + sam_hdr_remove_tag_id; + sam_hdr_tid2len; + sam_hdr_tid2name; + sam_hdr_update_line; + sam_idx_init; + sam_idx_save; + sam_index_load3; + sam_itr_regarray; + sam_parse_region; + tbx_index_load3; +} HTSLIB_1.9; + +HTSLIB_1.11 { + fai_path; + hts_lib_shutdown; + hts_tpool_process_is_shutdown; + vcf_open_mode; +} HTSLIB_1.10; + +HTSLIB_1.12 { + bam_parse_cigar; + bam_set1; + hfile_has_plugin; + hfile_list_plugins; + hfile_list_schemes; + hts_feature_string; + hts_features; + hts_filter_eval; + hts_filter_free; + hts_filter_init; + hts_set_filter_expression; + hts_test_feature; + sam_parse_cigar; + sam_passes_filter; +} HTSLIB_1.11; + +HTSLIB_1.13 { + hts_idx_nseq; +} HTSLIB_1.12; + +HTSLIB_1.14 { + bam_mods_at_next_pos; + bam_mods_at_qpos; + bam_next_basemod; + bam_parse_basemod; + bam_plp_insertion_mod; + hts_base_mod_state_alloc; + hts_base_mod_state_free; + hts_flush; +} HTSLIB_1.13; + +HTSLIB_1.15 { + hts_detect_format2; +} HTSLIB_1.14; + +HTSLIB_1.16 { + bam_mods_query_type; + bam_mods_recorded; + bcf_has_variant_type; + bcf_has_variant_types; + bcf_variant_length; + cram_decode_slice_header; + cram_free_slice_header; + cram_slice_hdr_get_coords; + cram_slice_hdr_get_embed_ref_id; + cram_slice_hdr_get_num_blocks; + hts_filter_eval2; +} HTSLIB_1.15; + +HTSLIB_1.17 { + bam_aux_first; + bam_aux_next; + bam_aux_remove; + bcf_strerror; + cram_block_get_method; + cram_cid2ds_free; + cram_cid2ds_query; + cram_codec_describe; + cram_codec_get_content_ids; + cram_container_get_num_bases; + cram_container_get_num_records; + cram_decode_compression_header; + cram_describe_encodings; + cram_expand_method; + cram_free_compression_header; + cram_update_cid2ds_map; + fai_adjust_region; + fai_line_length; + faidx_seq_len64; +} HTSLIB_1.16; + +HTSLIB_1.18 { + bam_mods_queryi; + bam_parse_basemod2; + fai_thread_pool; +} HTSLIB_1.17; + +HTSLIB_1.20 { + tbx_conf_gaf; +} HTSLIB_1.18; + +HTSLIB_1.21 { + cram_container_get_coords; + cram_container_num2offset; + cram_container_offset2num; + cram_filter_container; + cram_index_extents; + cram_num_containers; + cram_num_containers_between; +} HTSLIB_1.20; diff --git a/src/htslib-1.19.1/htslib.mk b/src/htslib-1.21/htslib.mk similarity index 100% rename from src/htslib-1.19.1/htslib.mk rename to src/htslib-1.21/htslib.mk diff --git a/src/htslib-1.18/htslib.pc.in b/src/htslib-1.21/htslib.pc.in similarity index 100% rename from src/htslib-1.18/htslib.pc.in rename to src/htslib-1.21/htslib.pc.in diff --git a/src/htslib-1.21/htslib/bgzf.h b/src/htslib-1.21/htslib/bgzf.h new file mode 100644 index 0000000..87d4c6a --- /dev/null +++ b/src/htslib-1.21/htslib/bgzf.h @@ -0,0 +1,506 @@ +/// @file htslib/bgzf.h +/// Low-level routines for direct BGZF operations. +/* + Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology + 2011, 2012 Attractive Chaos + Copyright (C) 2009, 2013, 2014, 2017, 2018-2019, 2022-2024 Genome Research Ltd + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +/* The BGZF library was originally written by Bob Handsaker from the Broad + * Institute. It was later improved by the SAMtools developers. */ + +#ifndef HTSLIB_BGZF_H +#define HTSLIB_BGZF_H + +#include +#include +#include + +#include "hts_defs.h" + +// Ensure ssize_t exists within this header. All #includes must precede this, +// and ssize_t must be undefined again at the end of this header. +#if defined _MSC_VER && defined _INTPTR_T_DEFINED && !defined _SSIZE_T_DEFINED && !defined ssize_t +#define HTSLIB_SSIZE_T +#define ssize_t intptr_t +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define BGZF_BLOCK_SIZE 0xff00 // make sure compressBound(BGZF_BLOCK_SIZE) < BGZF_MAX_BLOCK_SIZE +#define BGZF_MAX_BLOCK_SIZE 0x10000 + +#define BGZF_ERR_ZLIB 1 +#define BGZF_ERR_HEADER 2 +#define BGZF_ERR_IO 4 +#define BGZF_ERR_MISUSE 8 +#define BGZF_ERR_MT 16 // stream cannot be multi-threaded +#define BGZF_ERR_CRC 32 + +struct hFILE; +struct hts_tpool; +struct kstring_t; +struct bgzf_mtaux_t; +typedef struct bgzidx_t bgzidx_t; +typedef struct bgzf_cache_t bgzf_cache_t; +struct z_stream_s; + +struct BGZF { + // Reserved bits should be written as 0; read as "don't care" + unsigned errcode:16, reserved:1, is_write:1, no_eof_block:1, is_be:1; + signed compress_level:9; + unsigned last_block_eof:1, is_compressed:1, is_gzip:1; + int cache_size; + int block_length, block_clength, block_offset; + int64_t block_address, uncompressed_address; + void *uncompressed_block, *compressed_block; + bgzf_cache_t *cache; + struct hFILE *fp; // actual file handle + struct bgzf_mtaux_t *mt; // only used for multi-threading + bgzidx_t *idx; // BGZF index + int idx_build_otf; // build index on the fly, set by bgzf_index_build_init() + struct z_stream_s *gz_stream; // for gzip-compressed files + int64_t seeked; // virtual offset of last seek +}; +#ifndef HTS_BGZF_TYPEDEF +typedef struct BGZF BGZF; +#define HTS_BGZF_TYPEDEF +#endif + + /****************** + * Basic routines * + ******************/ + + /** + * Open an existing file descriptor for reading or writing. + * + * @param fd file descriptor + * Note that the file must be opened in binary mode, or else + * there will be problems on platforms that make a difference + * between text and binary mode. + * @param mode mode matching /[rwag][u0-9]+/: 'r' for reading, 'w' for + * writing, 'a' for appending, 'g' for gzip rather than BGZF + * compression (with 'w' only), and digit specifies the zlib + * compression level. + * Note that there is a distinction between 'u' and '0': the + * first yields plain uncompressed output whereas the latter + * outputs uncompressed data wrapped in the zlib format. + * @return BGZF file handler; 0 on error + */ + HTSLIB_EXPORT + BGZF* bgzf_dopen(int fd, const char *mode); + + #define bgzf_fdopen(fd, mode) bgzf_dopen((fd), (mode)) // for backward compatibility + + /** + * Open the specified file for reading or writing. + */ + HTSLIB_EXPORT + BGZF* bgzf_open(const char* path, const char *mode); + + /** + * Open an existing hFILE stream for reading or writing. + */ + HTSLIB_EXPORT + BGZF* bgzf_hopen(struct hFILE *fp, const char *mode); + + /** + * Close the BGZF and free all associated resources. + * + * @param fp BGZF file handler + * @return 0 on success and -1 on error + */ + HTSLIB_EXPORT + int bgzf_close(BGZF *fp); + + /** + * Read up to _length_ bytes from the file storing into _data_. + * + * @param fp BGZF file handler + * @param data data array to read into + * @param length size of data to read + * @return number of bytes actually read; 0 on end-of-file and -1 on error + */ + HTSLIB_EXPORT + ssize_t bgzf_read(BGZF *fp, void *data, size_t length) HTS_RESULT_USED; + +/** + * bgzf_read optimised for small quantities, as a static inline + * See bgzf_read() normal function for return values. + */ +static inline ssize_t bgzf_read_small(BGZF *fp, void *data, size_t length) { + // A block length of 0 implies current block isn't loaded (see + // bgzf_seek_common). That gives negative available so careful on types + if ((ssize_t)length < fp->block_length - fp->block_offset) { + // Short cut the common and easy mode + memcpy((uint8_t *)data, + (uint8_t *)fp->uncompressed_block + fp->block_offset, + length); + fp->block_offset += length; + fp->uncompressed_address += length; + return length; + } else { + return bgzf_read(fp, data, length); + } +} + + /** + * Write _length_ bytes from _data_ to the file. If no I/O errors occur, + * the complete _length_ bytes will be written (or queued for writing). + * + * @param fp BGZF file handler + * @param data data array to write + * @param length size of data to write + * @return number of bytes written (i.e., _length_); negative on error + */ + HTSLIB_EXPORT + ssize_t bgzf_write(BGZF *fp, const void *data, size_t length) HTS_RESULT_USED; + +/** + * bgzf_write optimised for small quantities, as a static inline + * See bgzf_write() normal function for return values. + */ +static inline +ssize_t bgzf_write_small(BGZF *fp, const void *data, size_t length) { + if (fp->is_compressed + && (size_t) (BGZF_BLOCK_SIZE - fp->block_offset) > length) { + // Short cut the common and easy mode + memcpy((uint8_t *)fp->uncompressed_block + fp->block_offset, + data, length); + fp->block_offset += length; + return length; + } else { + return bgzf_write(fp, data, length); + } +} + + /** + * Write _length_ bytes from _data_ to the file, the index will be used to + * decide the amount of uncompressed data to be written to each bgzip block. + * If no I/O errors occur, the complete _length_ bytes will be written (or + * queued for writing). + * @param fp BGZF file handler + * @param data data array to write + * @param length size of data to write + * @return number of bytes written (i.e., _length_); negative on error + */ + HTSLIB_EXPORT + ssize_t bgzf_block_write(BGZF *fp, const void *data, size_t length); + + /** + * Returns the next byte in the file without consuming it. + * @param fp BGZF file handler + * @return -1 on EOF, + * -2 on error, + * otherwise the unsigned byte value. + */ + HTSLIB_EXPORT + int bgzf_peek(BGZF *fp); + + /** + * Read up to _length_ bytes directly from the underlying stream without + * decompressing. Bypasses BGZF blocking, so must be used with care in + * specialised circumstances only. + * + * @param fp BGZF file handler + * @param data data array to read into + * @param length number of raw bytes to read + * @return number of bytes actually read; 0 on end-of-file and -1 on error + */ + HTSLIB_EXPORT + ssize_t bgzf_raw_read(BGZF *fp, void *data, size_t length) HTS_RESULT_USED; + + /** + * Write _length_ bytes directly to the underlying stream without + * compressing. Bypasses BGZF blocking, so must be used with care + * in specialised circumstances only. + * + * @param fp BGZF file handler + * @param data data array to write + * @param length number of raw bytes to write + * @return number of bytes actually written; -1 on error + */ + HTSLIB_EXPORT + ssize_t bgzf_raw_write(BGZF *fp, const void *data, size_t length) HTS_RESULT_USED; + + /** + * Write the data in the buffer to the file. + * + * @param fp BGZF file handle + * @return 0 on success and -1 on error + */ + HTSLIB_EXPORT + int bgzf_flush(BGZF *fp) HTS_RESULT_USED; + + /** + * Return a virtual file pointer to the current location in the file. + * No interpretation of the value should be made, other than a subsequent + * call to bgzf_seek can be used to position the file at the same point. + * Return value is non-negative on success. + */ + #define bgzf_tell(fp) (((fp)->block_address << 16) | ((fp)->block_offset & 0xFFFF)) + + /** + * Set the file to read from the location specified by _pos_. + * + * @param fp BGZF file handler + * @param pos virtual file offset returned by bgzf_tell() + * @param whence must be SEEK_SET + * @return 0 on success and -1 on error + * + * @note It is not permitted to seek on files open for writing, + * or files compressed with gzip (as opposed to bgzip). + */ + HTSLIB_EXPORT + int64_t bgzf_seek(BGZF *fp, int64_t pos, int whence) HTS_RESULT_USED; + + /** + * Check if the BGZF end-of-file (EOF) marker is present + * + * @param fp BGZF file handler opened for reading + * @return 1 if the EOF marker is present and correct; + * 2 if it can't be checked, e.g., because fp isn't seekable; + * 0 if the EOF marker is absent; + * -1 (with errno set) on error + */ + HTSLIB_EXPORT + int bgzf_check_EOF(BGZF *fp); + + /** Return the file's compression format + * + * @param fp BGZF file handle + * @return A small integer matching the corresponding + * `enum htsCompression` value: + * - 0 / `no_compression` if the file is uncompressed + * - 1 / `gzip` if the file is plain GZIP-compressed + * - 2 / `bgzf` if the file is BGZF-compressed + * @since 1.4 + */ + HTSLIB_EXPORT + int bgzf_compression(BGZF *fp); + + /** + * Check if a file is in the BGZF format + * + * @param fn file name + * @return 1 if _fn_ is BGZF; 0 if not or on I/O error + */ + HTSLIB_EXPORT + int bgzf_is_bgzf(const char *fn) HTS_DEPRECATED("Use bgzf_compression() or hts_detect_format() instead"); + + /********************* + * Advanced routines * + *********************/ + + /** + * Set the cache size. Only effective when compiled with -DBGZF_CACHE. + * + * @param fp BGZF file handler + * @param size size of cache in bytes; 0 to disable caching (default) + */ + HTSLIB_EXPORT + void bgzf_set_cache_size(BGZF *fp, int size); + + /** + * Flush the file if the remaining buffer size is smaller than _size_ + * @return 0 if flushing succeeded or was not needed; negative on error + */ + HTSLIB_EXPORT + int bgzf_flush_try(BGZF *fp, ssize_t size) HTS_RESULT_USED; + + /** + * Read one byte from a BGZF file. It is faster than bgzf_read() + * @param fp BGZF file handler + * @return byte read; -1 on end-of-file or error + */ + HTSLIB_EXPORT + int bgzf_getc(BGZF *fp); + + /** + * Read one line from a BGZF file. It is faster than bgzf_getc() + * + * @param fp BGZF file handler + * @param delim delimiter + * @param str string to write to; must be initialized + * @return length of the string (capped at INT_MAX); + * -1 on end-of-file; <= -2 on error + */ + HTSLIB_EXPORT + int bgzf_getline(BGZF *fp, int delim, struct kstring_t *str); + + /** + * Read the next BGZF block. + */ + HTSLIB_EXPORT + int bgzf_read_block(BGZF *fp) HTS_RESULT_USED; + + /** + * Enable multi-threading via a shared thread pool. This means + * both encoder and decoder can balance usage across a single pool + * of worker jobs. + * + * @param fp BGZF file handler + * @param pool The thread pool (see hts_create_threads) + * @param qsize The size of the job queue. If 0 this is twice the + * number of threads in the pool. + */ + HTSLIB_EXPORT + int bgzf_thread_pool(BGZF *fp, struct hts_tpool *pool, int qsize); + + /** + * Enable multi-threading + * + * @param fp BGZF file handler + * @param n_threads #threads used for reading / writing + * @param n_sub_blks Unused (was #blocks processed by each thread) + */ + HTSLIB_EXPORT + int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks); + + /** + * Compress a single BGZF block. + * + * @param dst output buffer (must have size >= BGZF_MAX_BLOCK_SIZE) + * @param dlen size of output buffer; updated on return to the number + * of bytes actually written to dst + * @param src buffer to be compressed + * @param slen size of data to compress (must be <= BGZF_BLOCK_SIZE) + * @param level compression level + * @return 0 on success and negative on error + */ + HTSLIB_EXPORT + int bgzf_compress(void *dst, size_t *dlen, const void *src, size_t slen, int level); + + /******************* + * bgzidx routines * + *******************/ + + /** + * Position BGZF at the uncompressed offset + * + * @param fp BGZF file handler; must be opened for reading + * @param uoffset file offset in the uncompressed data + * @param where must be SEEK_SET + * + * Returns 0 on success and -1 on error. + * + * @note It is not permitted to seek on files open for writing, + * or files compressed with gzip (as opposed to bgzip). + */ + HTSLIB_EXPORT + int bgzf_useek(BGZF *fp, off_t uoffset, int where) HTS_RESULT_USED; + + /** + * Position in uncompressed BGZF + * + * @param fp BGZF file handler; must be opened for reading + * + * Returns the current offset on success and -1 on error. + */ + HTSLIB_EXPORT + off_t bgzf_utell(BGZF *fp); + + /** + * Tell BGZF to build index while compressing. + * + * @param fp BGZF file handler; can be opened for reading or writing. + * + * Returns 0 on success and -1 on error. + * + * @note This function must be called before any data has been read or + * written, and in particular before calling bgzf_mt() on the same + * file handle (as threads may start reading data before the index + * has been set up). + */ + HTSLIB_EXPORT + int bgzf_index_build_init(BGZF *fp); + + /// Load BGZF index + /** + * @param fp BGZF file handler + * @param bname base name + * @param suffix suffix to add to bname (can be NULL) + * @return 0 on success and -1 on error. + */ + HTSLIB_EXPORT + int bgzf_index_load(BGZF *fp, + const char *bname, const char *suffix) HTS_RESULT_USED; + + /// Load BGZF index from an hFILE + /** + * @param fp BGZF file handle + * @param idx hFILE to read from + * @param name file name (for error reporting only; can be NULL) + * @return 0 on success and -1 on error. + * + * Populates @p fp with index data read from the hFILE handle @p idx. + * The file pointer to @idx should point to the start of the index + * data when this function is called. + * + * The file name can optionally be passed in the @p name parameter. This + * is only used for printing error messages; if NULL the word "index" is + * used instead. + */ + HTSLIB_EXPORT + int bgzf_index_load_hfile(BGZF *fp, struct hFILE *idx, + const char *name) HTS_RESULT_USED; + + /// Save BGZF index + /** + * @param fp BGZF file handler + * @param bname base name + * @param suffix suffix to add to bname (can be NULL) + * @return 0 on success and -1 on error. + */ + HTSLIB_EXPORT + int bgzf_index_dump(BGZF *fp, + const char *bname, const char *suffix) HTS_RESULT_USED; + + /// Write a BGZF index to an hFILE + /** + * @param fp BGZF file handle + * @param idx hFILE to write to + * @param name file name (for error reporting only, can be NULL) + * @return 0 on success and -1 on error. + * + * Write index data from @p fp to the file @p idx. + * + * The file name can optionally be passed in the @p name parameter. This + * is only used for printing error messages; if NULL the word "index" is + * used instead. + */ + + HTSLIB_EXPORT + int bgzf_index_dump_hfile(BGZF *fp, struct hFILE *idx, + const char *name) HTS_RESULT_USED; + +#ifdef __cplusplus +} +#endif + +#ifdef HTSLIB_SSIZE_T +#undef HTSLIB_SSIZE_T +#undef ssize_t +#endif + +#endif diff --git a/src/htslib-1.21/htslib/cram.h b/src/htslib-1.21/htslib/cram.h new file mode 100644 index 0000000..ddc44bb --- /dev/null +++ b/src/htslib-1.21/htslib/cram.h @@ -0,0 +1,826 @@ +/// @file htslib/cram.h +/// CRAM format-specific API functions. +/* + Copyright (C) 2015, 2016, 2018-2020, 2022-2024 Genome Research Ltd. + + Author: James Bonfield + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +/** @file + * Consider using the higher level hts_*() API for programs that wish to + * be file format agnostic (see htslib/hts.h). + * + * This API should be used for CRAM specific code. The specifics of the + * public API are implemented in cram_io.h, cram_encode.h and cram_decode.h + * although these should not be included directly (use this file instead). + */ + +#ifndef HTSLIB_CRAM_H +#define HTSLIB_CRAM_H + +#include +#include +#include + +#include "hts_defs.h" +#include "hts.h" +#include "sam.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// see cram/cram_structs.h for an internal more complete copy of this enum + +// Htslib 1.11 had these listed without any hts prefix, and included +// some internal values such as RANS1 and GZIP_RLE (which shouldn't have ever +// been public). +// +// We can't find evidence of these being used and the data type occurs +// nowhere in functions or structures meaning using it would be pointless. +// However for safety, if you absolute need the API to not change then +// define HTS_COMPAT to 101100 (XYYYZZ for X.Y[.Z], meaning 1.11). +#if defined(HTS_COMPAT) && HTS_COMPAT <= 101100 +enum cram_block_method { + // Public methods as defined in the CRAM spec. + BM_ERROR = -1, + + // CRAM 2.x and 3.0 + RAW = 0, + GZIP = 1, + BZIP2 = 2, + LZMA = 3, + RANS = 4, + + // NB: the subsequent numbers may change. They're simply here for + // compatibility with the old API, but may have no bearing on the + // internal way htslib works. DO NOT USE + RANS0 = 4, + RANS1 = 10, + GZIP_RLE = 11, +}; +#else + +// Values as defined in the CRAM specifications. +// See cram/cram_structs.h cram_block_method_int for an expanded version of +// this with local specialisations assigned to codes. +enum cram_block_method { + CRAM_COMP_UNKNOWN = -1, + + // CRAM 2.x and 3.0 + CRAM_COMP_RAW = 0, + CRAM_COMP_GZIP = 1, + CRAM_COMP_BZIP2 = 2, + + // CRAM 3.0 + CRAM_COMP_LZMA = 3, + CRAM_COMP_RANS4x8 = 4, // 4-way interleaving, 8-bit renormalisation + + // CRAM 3.1 + CRAM_COMP_RANSNx16 = 5, // both 4x16 and 32x16 variants, plus transforms + CRAM_COMP_ARITH = 6, // aka Range coding + CRAM_COMP_FQZ = 7, // FQZComp + CRAM_COMP_TOK3 = 8, // Name tokeniser +}; +#endif + +/* NOTE this structure may be expanded in future releases by appending + * additional fields. + * + * Do not assume the size is fixed and avoid using arrays of this struct. + */ +typedef struct { + enum cram_block_method method; + + // Generic compression level if known (0 if not). + // 1 or 9 for gzip min/max flag (else 5). 1-9 for bzip2 + // 1 or 11 for for tok3 (rans/arith encoder). + int level; + + // For rans* and arith codecs + int order; + + // ransNx16/arith specific + int rle; + int pack; + int stripe; + int cat; + int nosz; + int Nway; + + // Arithmetic coder only + int ext; // external: use gz, xz or bzip2 +} cram_method_details; + +enum cram_content_type { + CT_ERROR = -1, + FILE_HEADER = 0, + COMPRESSION_HEADER = 1, + MAPPED_SLICE = 2, + UNMAPPED_SLICE = 3, // CRAM V1.0 only + EXTERNAL = 4, + CORE = 5, +}; + +// Opaque data types, see cram_structs for the fully fledged versions. +typedef struct cram_file_def cram_file_def; +typedef struct cram_fd cram_fd; +typedef struct cram_container cram_container; +typedef struct cram_block cram_block; +typedef struct cram_slice cram_slice; +typedef struct cram_metrics cram_metrics; +typedef struct cram_block_slice_hdr cram_block_slice_hdr; +typedef struct cram_block_compression_hdr cram_block_compression_hdr; +typedef struct cram_codec cram_codec; +typedef struct refs_t refs_t; + +struct hFILE; + +// Accessor functions + +/* + *----------------------------------------------------------------------------- + * cram_fd + */ +HTSLIB_EXPORT +sam_hdr_t *cram_fd_get_header(cram_fd *fd); + +HTSLIB_EXPORT +void cram_fd_set_header(cram_fd *fd, sam_hdr_t *hdr); + +HTSLIB_EXPORT +int cram_fd_get_version(cram_fd *fd); + +HTSLIB_EXPORT +void cram_fd_set_version(cram_fd *fd, int vers); + +HTSLIB_EXPORT +int cram_major_vers(cram_fd *fd); +HTSLIB_EXPORT +int cram_minor_vers(cram_fd *fd); + +HTSLIB_EXPORT +struct hFILE *cram_fd_get_fp(cram_fd *fd); +HTSLIB_EXPORT +void cram_fd_set_fp(cram_fd *fd, struct hFILE *fp); + + +/* + *----------------------------------------------------------------------------- + * cram_container + */ +HTSLIB_EXPORT +int32_t cram_container_get_length(cram_container *c); +HTSLIB_EXPORT +void cram_container_set_length(cram_container *c, int32_t length); +HTSLIB_EXPORT +int32_t cram_container_get_num_blocks(cram_container *c); +HTSLIB_EXPORT +void cram_container_set_num_blocks(cram_container *c, int32_t num_blocks); +HTSLIB_EXPORT +int32_t *cram_container_get_landmarks(cram_container *c, int32_t *num_landmarks); +HTSLIB_EXPORT +void cram_container_set_landmarks(cram_container *c, int32_t num_landmarks, + int32_t *landmarks); +HTSLIB_EXPORT +int32_t cram_container_get_num_records(cram_container *c); +HTSLIB_EXPORT +int64_t cram_container_get_num_bases(cram_container *c); + +/* Returns true if the container is empty (EOF marker) */ +HTSLIB_EXPORT +int cram_container_is_empty(cram_fd *fd); + + +/* Returns chromosome and start/span from container struct */ +HTSLIB_EXPORT +void cram_container_get_coords(cram_container *c, + int *refid, hts_pos_t *start, hts_pos_t *span); + +/* + *----------------------------------------------------------------------------- + * cram_block + */ +HTSLIB_EXPORT +int32_t cram_block_get_content_id(cram_block *b); +HTSLIB_EXPORT +int32_t cram_block_get_comp_size(cram_block *b); +HTSLIB_EXPORT +int32_t cram_block_get_uncomp_size(cram_block *b); +HTSLIB_EXPORT +int32_t cram_block_get_crc32(cram_block *b); +HTSLIB_EXPORT +void * cram_block_get_data(cram_block *b); +HTSLIB_EXPORT +enum cram_content_type cram_block_get_content_type(cram_block *b); +HTSLIB_EXPORT +enum cram_block_method cram_block_get_method(cram_block *b); + +HTSLIB_EXPORT +cram_method_details *cram_expand_method(uint8_t *data, int32_t size, + enum cram_block_method comp); + +HTSLIB_EXPORT +void cram_block_set_content_id(cram_block *b, int32_t id); +HTSLIB_EXPORT +void cram_block_set_comp_size(cram_block *b, int32_t size); +HTSLIB_EXPORT +void cram_block_set_uncomp_size(cram_block *b, int32_t size); +HTSLIB_EXPORT +void cram_block_set_crc32(cram_block *b, int32_t crc); +HTSLIB_EXPORT +void cram_block_set_data(cram_block *b, void *data); + +HTSLIB_EXPORT +int cram_block_append(cram_block *b, const void *data, int size); +HTSLIB_EXPORT +void cram_block_update_size(cram_block *b); + +// Offset is known as "size" internally, but it can be confusing. +HTSLIB_EXPORT +size_t cram_block_get_offset(cram_block *b); +HTSLIB_EXPORT +void cram_block_set_offset(cram_block *b, size_t offset); + +/* + * Computes the size of a cram block, including the block + * header itself. + */ +HTSLIB_EXPORT +uint32_t cram_block_size(cram_block *b); + +/* + * Returns the Block Content ID values referred to by a cram_codec in + * ids[2]. + * + * -2 is unused. + * -1 is CORE + * >= 0 is the block with that Content ID + */ +HTSLIB_EXPORT +void cram_codec_get_content_ids(cram_codec *c, int ids[2]); + +/* + * Produces a human readable description of the codec parameters. + * This is appended to an existing kstring 'ks'. + * + * Returns 0 on succes, + * <0 on failure + */ +HTSLIB_EXPORT +int cram_codec_describe(cram_codec *c, kstring_t *ks); + +/* + * Renumbers RG numbers in a cram compression header. + * + * CRAM stores RG as the Nth number in the header, rather than a + * string holding the ID: tag. This is smaller in space, but means + * "samtools cat" to join files together that contain single but + * different RG lines needs a way of renumbering them. + * + * The file descriptor is expected to be immediately after the + * cram_container structure (ie before the cram compression header). + * Due to the nature of the CRAM format, this needs to read and write + * the blocks itself. Note that there may be multiple slices within + * the container, meaning multiple compression headers to manipulate. + * Changing RG may change the size of the compression header and + * therefore the length field in the container. Hence we rewrite all + * blocks just in case and also emit the adjusted container. + * + * The current implementation can only cope with renumbering a single + * RG (and only then if it is using HUFFMAN or BETA codecs). In + * theory it *may* be possible to renumber multiple RGs if they use + * HUFFMAN to the CORE block or use an external block unshared by any + * other data series. So we have an API that can be upgraded to + * support this, but do not implement it for now. An example + * implementation of RG as an EXTERNAL block would be to find that + * block and rewrite it, returning the number of blocks consumed. + * + * Returns 0 on success; + * -1 if unable to edit; + * -2 on other errors (eg I/O). + */ +HTSLIB_EXPORT +int cram_transcode_rg(cram_fd *in, cram_fd *out, + cram_container *c, + int nrg, int *in_rg, int *out_rg); + +/* + * Copies the blocks representing the next num_slice slices from a + * container from 'in' to 'out'. It is expected that the file pointer + * is just after the read of the cram_container and cram compression + * header. + * + * Returns 0 on success + * -1 on failure + */ +HTSLIB_EXPORT +int cram_copy_slice(cram_fd *in, cram_fd *out, int32_t num_slice); + +/* + * Copies a container, but filtering it down to a specific region (as + * already specified in 'in' + * + * Returns 0 on success + * -1 on EOF + * -2 on error + */ +HTSLIB_EXPORT +int cram_filter_container(cram_fd *in, cram_fd *out, cram_container *c, + int *ref_id); + +/* + * Decodes a CRAM block compression header. + * Returns header ptr on success + * NULL on failure + */ +HTSLIB_EXPORT +cram_block_compression_hdr *cram_decode_compression_header(cram_fd *fd, + cram_block *b); +/* + * Frees a cram_block_compression_hdr structure. + */ +HTSLIB_EXPORT +void cram_free_compression_header(cram_block_compression_hdr *hdr); + +typedef struct cram_cid2ds_t cram_cid2ds_t; + +/* + * Map cram block numbers to data-series. It's normally a 1:1 mapping, + * but in rare cases it can be 1:many (or even many:many). + * The key is the block number and the value is an index into the data-series + * array, which we iterate over until reaching a negative value. + * + * Provide cid2ds as NULL to allocate a new map or pass in an existing one + * to append to this map. The new (or existing) map is returned. + * + * Returns the cid2ds (newly allocated or as provided) on success, + * NULL on failure. + */ +HTSLIB_EXPORT +cram_cid2ds_t *cram_update_cid2ds_map(cram_block_compression_hdr *hdr, + cram_cid2ds_t *cid2ds); + +/* + * Return a list of data series observed as belonging to a block with + * the specified content_id. *n is the number of data series + * returned, or 0 if block is unused. + * Block content_id of -1 is used to indicate the CORE block. + * + * The pointer returned is owned by the cram_cid2ds state and should + * not be freed by the caller. + */ +HTSLIB_EXPORT +int *cram_cid2ds_query(cram_cid2ds_t *c2d, int content_id, int *n); + +/* + * Frees a cram_cid2ds_t allocated by cram_update_cid2ds_map + */ +HTSLIB_EXPORT +void cram_cid2ds_free(cram_cid2ds_t *cid2ds); + +/* + * Produces a description of the record and tag encodings held within + * a compression header and appends to 'ks'. + * + * Returns 0 on success, + * <0 on failure. + */ +HTSLIB_EXPORT +int cram_describe_encodings(cram_block_compression_hdr *hdr, kstring_t *ks); + +/* + *----------------------------------------------------------------------------- + * cram slice interrogation + */ + +/* + * Returns the number of cram blocks within this slice. + */ +HTSLIB_EXPORT +int32_t cram_slice_hdr_get_num_blocks(cram_block_slice_hdr *hdr); + +/* + * Returns the block content_id for the block containing an embedded reference + * sequence. If none is present, -1 is returned. + */ +HTSLIB_EXPORT +int cram_slice_hdr_get_embed_ref_id(cram_block_slice_hdr *h); + +/* + * Returns slice reference ID, start and span (length) coordinates. + * Return parameters may be NULL in which case they are ignored. + */ +HTSLIB_EXPORT +void cram_slice_hdr_get_coords(cram_block_slice_hdr *h, + int *refid, hts_pos_t *start, hts_pos_t *span); + +/* + * Decodes a slice header from a cram block. + * Returns the opaque cram_block_slice_hdr pointer on success, + * NULL on failure. + */ +HTSLIB_EXPORT +cram_block_slice_hdr *cram_decode_slice_header(cram_fd *fd, cram_block *b); + +/* + * Frees a cram_block_slice_hdr structure. + */ +HTSLIB_EXPORT +void cram_free_slice_header(cram_block_slice_hdr *hdr); + +/* + *----------------------------------------------------------------------------- + * cram_io basics + */ + +/**@{ ---------------------------------------------------------------------- + * CRAM blocks - the dynamically growable data block. We have code to + * create, update, (un)compress and read/write. + * + * These are derived from the deflate_interlaced.c blocks, but with the + * CRAM extension of content types and IDs. + */ + +/*! Allocates a new cram_block structure with a specified content_type and + * id. + * + * @return + * Returns block pointer on success; + * NULL on failure + * + * The cram_block struct returned by a successful call should be freed + * via cram_free_block() when it is no longer needed. + */ +HTSLIB_EXPORT +cram_block *cram_new_block(enum cram_content_type content_type, + int content_id); + +/*! Reads a block from a cram file. + * + * @return + * Returns cram_block pointer on success; + * NULL on failure + * + * The cram_block struct returned by a successful call should be freed + * via cram_free_block() when it is no longer needed. + */ +HTSLIB_EXPORT +cram_block *cram_read_block(cram_fd *fd); + +/*! Writes a CRAM block. + * + * @return + * Returns 0 on success; + * -1 on failure + */ +HTSLIB_EXPORT +int cram_write_block(cram_fd *fd, cram_block *b); + +/*! Frees a CRAM block, deallocating internal data too. + */ +HTSLIB_EXPORT +void cram_free_block(cram_block *b); + +/*! Uncompresses a CRAM block, if compressed. + * + * @return + * Returns 0 on success; + * -1 on failure + */ +HTSLIB_EXPORT +int cram_uncompress_block(cram_block *b); + +/*! Compresses a block. + * + * Compresses a block using one of two different zlib strategies. If we only + * want one choice set strat2 to be -1. + * + * The logic here is that sometimes Z_RLE does a better job than Z_FILTERED + * or Z_DEFAULT_STRATEGY on quality data. If so, we'd rather use it as it is + * significantly faster. + * + * @return + * Returns 0 on success; + * -1 on failure + */ +HTSLIB_EXPORT +int cram_compress_block(cram_fd *fd, cram_block *b, cram_metrics *metrics, + int method, int level); +int cram_compress_block2(cram_fd *fd, cram_slice *s, + cram_block *b, cram_metrics *metrics, + int method, int level); + +/**@}*/ +/**@{ ---------------------------------------------------------------------- + * Containers + */ + +/*! Creates a new container, specifying the maximum number of slices + * and records permitted. + * + * @return + * Returns cram_container ptr on success; + * NULL on failure + * + * The cram_container struct returned by a successful call should be freed + * via cram_free_container() when it is no longer needed. + */ +HTSLIB_EXPORT +cram_container *cram_new_container(int nrec, int nslice); +HTSLIB_EXPORT +void cram_free_container(cram_container *c); + +/*! Reads a container header. + * + * @return + * Returns cram_container on success; + * NULL on failure or no container left (fd->err == 0). + * + * The cram_container struct returned by a successful call should be freed + * via cram_free_container() when it is no longer needed. + */ +HTSLIB_EXPORT +cram_container *cram_read_container(cram_fd *fd); + +/*! Writes a container structure. + * + * @return + * Returns 0 on success; + * -1 on failure + */ +HTSLIB_EXPORT +int cram_write_container(cram_fd *fd, cram_container *h); + +/* + * Stores the container structure in dat and returns *size as the + * number of bytes written to dat[]. The input size of dat is also + * held in *size and should be initialised to cram_container_size(c). + * + * Returns 0 on success; + * -1 on failure + */ +HTSLIB_EXPORT +int cram_store_container(cram_fd *fd, cram_container *c, char *dat, int *size); + +HTSLIB_EXPORT +int cram_container_size(cram_container *c); + +/**@}*/ +/**@{ ---------------------------------------------------------------------- + * The top-level cram opening, closing and option handling + */ + +/*! Opens a CRAM file for read (mode "rb") or write ("wb"). + * + * The filename may be "-" to indicate stdin or stdout. + * + * @return + * Returns file handle on success; + * NULL on failure. + */ +HTSLIB_EXPORT +cram_fd *cram_open(const char *filename, const char *mode); + +/*! Opens an existing stream for reading or writing. + * + * @return + * Returns file handle on success; + * NULL on failure. + */ +HTSLIB_EXPORT +cram_fd *cram_dopen(struct hFILE *fp, const char *filename, const char *mode); + +/*! Closes a CRAM file. + * + * @return + * Returns 0 on success; + * -1 on failure + */ +HTSLIB_EXPORT +int cram_close(cram_fd *fd); + +/* + * Seek within a CRAM file. + * + * Returns 0 on success + * -1 on failure + */ +HTSLIB_EXPORT +int cram_seek(cram_fd *fd, off_t offset, int whence); + +/* + * Flushes a CRAM file. + * Useful for when writing to stdout without wishing to close the stream. + * + * Returns 0 on success + * -1 on failure + */ +HTSLIB_EXPORT +int cram_flush(cram_fd *fd); + +/*! Checks for end of file on a cram_fd stream. + * + * @return + * Returns 0 if not at end of file + * 1 if we hit an expected EOF (end of range or EOF block) + * 2 for other EOF (end of stream without EOF block) + */ +HTSLIB_EXPORT +int cram_eof(cram_fd *fd); + +/*! Sets options on the cram_fd. + * + * See CRAM_OPT_* definitions in hts.h. + * Use this immediately after opening. + * + * @return + * Returns 0 on success; + * -1 on failure + */ +HTSLIB_EXPORT +int cram_set_option(cram_fd *fd, enum hts_fmt_option opt, ...); + +/*! Sets options on the cram_fd. + * + * See CRAM_OPT_* definitions in hts.h. + * Use this immediately after opening. + * + * @return + * Returns 0 on success; + * -1 on failure + */ +HTSLIB_EXPORT +int cram_set_voption(cram_fd *fd, enum hts_fmt_option opt, va_list args); + +/*! + * Attaches a header to a cram_fd. + * + * This should be used when creating a new cram_fd for writing where + * we have an SAM_hdr already constructed (eg from a file we've read + * in). + * + * @return + * Returns 0 on success; + * -1 on failure + */ +HTSLIB_EXPORT +int cram_set_header(cram_fd *fd, sam_hdr_t *hdr); + +/*! Check if this file has a proper EOF block + * + * @return + * Returns 3 if the file is a version of CRAM that does not contain EOF blocks + * 2 if the file is a stream and thus unseekable + * 1 if the file contains an EOF block + * 0 if the file does not contain an EOF block + * -1 if an error occurred whilst reading the file or we could not seek back to where we were + * + */ +HTSLIB_EXPORT +int cram_check_EOF(cram_fd *fd); + +/* As int32_decoded/encode, but from/to blocks instead of cram_fd */ +HTSLIB_EXPORT +int int32_put_blk(cram_block *b, int32_t val); + +/**@}*/ +/**@{ ------------------------------------------------------------------- + * Old typedef and function names for compatibility with existing code. + * Header functionality is now provided by sam.h's sam_hdr_t functions. + */ + +typedef sam_hdr_t SAM_hdr; + +/*! Tokenises a SAM header into a hash table. + * + * Also extracts a few bits on specific data types, such as @RG lines. + * + * @return + * Returns a SAM_hdr struct on success (free with sam_hdr_free()); + * NULL on failure + */ +static inline SAM_hdr *sam_hdr_parse_(const char *hdr, size_t len) { return sam_hdr_parse(len, hdr); } + +/*! Deallocates all storage used by a SAM_hdr struct. + * + * This also decrements the header reference count. If after decrementing + * it is still non-zero then the header is assumed to be in use by another + * caller and the free is not done. + */ +static inline void sam_hdr_free(SAM_hdr *hdr) { sam_hdr_destroy(hdr); } + +/* sam_hdr_length() and sam_hdr_str() are now provided by sam.h. */ + +/*! Add an @PG line. + * + * If we wish complete control over this use sam_hdr_add_line() directly. This + * function uses that, but attempts to do a lot of tedious house work for + * you too. + * + * - It will generate a suitable ID if the supplied one clashes. + * - It will generate multiple @PG records if we have multiple PG chains. + * + * Call it as per sam_hdr_add_line() with a series of key,value pairs ending + * in NULL. + * + * @return + * Returns 0 on success; + * -1 on failure + */ +#define sam_hdr_add_PG sam_hdr_add_pg + +/**@{ -------------------------------------------------------------------*/ + +/*! + * Returns the refs_t structure used by a cram file handle. + * + * This may be used in conjunction with option CRAM_OPT_SHARED_REF to + * share reference memory between multiple file handles. + * + * @return + * Returns NULL if none exists or the file handle is not a CRAM file. + */ +HTSLIB_EXPORT +refs_t *cram_get_refs(htsFile *fd); + +/*! + * Returns the file offsets of CRAM slices covering a specific region + * query. Note both offsets are the START of the slice. + * + * first will point to the start of the first overlapping slice + * last will point to the start of the last overlapping slice + * + * @return + * Returns 0 on success + * <0 on failure + */ +HTSLIB_EXPORT +int cram_index_extents(cram_fd *fd, int refid, hts_pos_t start, hts_pos_t end, + off_t *first, off_t *last); + +/*! Returns the total number of containers in the CRAM index. + * + * Note the index is not required to have an entry for every container, but it + * will always have an index entry for the start of each chromosome. + * (Although in practice our indices do container one entry per container.) + * + * This is equivalent to cram_num_containers_between(fd, 0, 0, NULL, NULL) + */ +HTSLIB_EXPORT +int64_t cram_num_containers(cram_fd *fd); + +/*! Returns the number of containers in the CRAM index within given offsets. + * + * The cstart and cend offsets are the locations of the start of containers + * as returned by index_container_offset. + * + * If non-NULL, first and last will hold the inclusive range of container + * numbers, counting from zero. + * + * @return + * Returns the number of containers, equivalent to *last-*first+1. + */ +HTSLIB_EXPORT +int64_t cram_num_containers_between(cram_fd *fd, + off_t cstart, off_t cend, + int64_t *first, int64_t *last); + +/*! Returns the byte offset for the start of the n^th container. + * + * The index must have previously been loaded, otherwise <0 is returned. + */ +HTSLIB_EXPORT +off_t cram_container_num2offset(cram_fd *fd, int64_t n); + +/*! Returns the container number for the first container at offset >= pos. + * + * The index must have previously been loaded, otherwise <0 is returned. + */ +HTSLIB_EXPORT +int64_t cram_container_offset2num(cram_fd *fd, off_t pos); + +/**@}*/ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/htslib-1.18/htslib/faidx.h b/src/htslib-1.21/htslib/faidx.h similarity index 100% rename from src/htslib-1.18/htslib/faidx.h rename to src/htslib-1.21/htslib/faidx.h diff --git a/src/htslib-1.19.1/htslib/hfile.h b/src/htslib-1.21/htslib/hfile.h similarity index 100% rename from src/htslib-1.19.1/htslib/hfile.h rename to src/htslib-1.21/htslib/hfile.h diff --git a/src/htslib-1.21/htslib/hts.h b/src/htslib-1.21/htslib/hts.h new file mode 100644 index 0000000..4f85424 --- /dev/null +++ b/src/htslib-1.21/htslib/hts.h @@ -0,0 +1,1588 @@ +/// @file htslib/hts.h +/// Format-neutral I/O, indexing, and iterator API functions. +/* + Copyright (C) 2012-2022 Genome Research Ltd. + Copyright (C) 2010, 2012 Broad Institute. + Portions copyright (C) 2003-2006, 2008-2010 by Heng Li + + Author: Heng Li + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#ifndef HTSLIB_HTS_H +#define HTSLIB_HTS_H + +#include +#include +#include + +#include "hts_defs.h" +#include "hts_log.h" +#include "kstring.h" +#include "kroundup.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Separator used to split HTS_PATH (for plugins); REF_PATH (cram references) +#if defined(_WIN32) || defined(__MSYS__) +#define HTS_PATH_SEPARATOR_CHAR ';' +#define HTS_PATH_SEPARATOR_STR ";" +#else +#define HTS_PATH_SEPARATOR_CHAR ':' +#define HTS_PATH_SEPARATOR_STR ":" +#endif + +#ifndef HTS_BGZF_TYPEDEF +typedef struct BGZF BGZF; +#define HTS_BGZF_TYPEDEF +#endif +struct cram_fd; +struct hFILE; +struct hts_tpool; +struct sam_hdr_t; + +/** + * @hideinitializer + * Deprecated macro to expand a dynamic array of a given type + * + * @param type_t The type of the array elements + * @param[in] n Requested number of elements of type type_t + * @param[in,out] m Size of memory allocated + * @param[in,out] ptr Pointer to the array + * + * @discussion + * Do not use this macro. Use hts_resize() instead as allows allocation + * failures to be handled more gracefully. + * + * The array *ptr will be expanded if necessary so that it can hold @p n + * or more elements. If the array is expanded then the new size will be + * written to @p m and the value in @p ptr may change. + * + * It must be possible to take the address of @p ptr and @p m must be usable + * as an lvalue. + * + * @bug + * If the memory allocation fails, this will call exit(1). This is + * not ideal behaviour in a library. + */ +#define hts_expand(type_t, n, m, ptr) do { \ + if ((n) > (m)) { \ + size_t hts_realloc_or_die(size_t, size_t, size_t, size_t, \ + int, void **, const char *); \ + (m) = hts_realloc_or_die((n) >= 1 ? (n) : 1, (m), sizeof(m), \ + sizeof(type_t), 0, \ + (void **)&(ptr), __func__); \ + } \ + } while (0) + +/** + * @hideinitializer + * Macro to expand a dynamic array, zeroing any newly-allocated memory + * + * @param type_t The type of the array elements + * @param[in] n Requested number of elements of type type_t + * @param[in,out] m Size of memory allocated + * @param[in,out] ptr Pointer to the array + * + * @discussion + * Do not use this macro. Use hts_resize() instead as allows allocation + * failures to be handled more gracefully. + * + * As for hts_expand(), except the bytes that make up the array elements + * between the old and new values of @p m are set to zero using memset(). + * + * @bug + * If the memory allocation fails, this will call exit(1). This is + * not ideal behaviour in a library. + */ + + +#define hts_expand0(type_t, n, m, ptr) do { \ + if ((n) > (m)) { \ + size_t hts_realloc_or_die(size_t, size_t, size_t, size_t, \ + int, void **, const char *); \ + (m) = hts_realloc_or_die((n) >= 1 ? (n) : 1, (m), sizeof(m), \ + sizeof(type_t), 1, \ + (void **)&(ptr), __func__); \ + } \ + } while (0) + +// For internal use (by hts_resize()) only +HTSLIB_EXPORT +int hts_resize_array_(size_t, size_t, size_t, void *, void **, int, + const char *); + +#define HTS_RESIZE_CLEAR 1 + +/** + * @hideinitializer + * Macro to expand a dynamic array of a given type + * + * @param type_t The type of the array elements + * @param[in] num Requested number of elements of type type_t + * @param[in,out] size_ptr Pointer to where the size (in elements) of the + array is stored. + * @param[in,out] ptr Location of the pointer to the array + * @param[in] flags Option flags + * + * @return 0 for success, or negative if an error occurred. + * + * @discussion + * The array *ptr will be expanded if necessary so that it can hold @p num + * or more elements. If the array is expanded then the new size will be + * written to @p *size_ptr and the value in @p *ptr may change. + * + * If ( @p flags & HTS_RESIZE_CLEAR ) is set, any newly allocated memory will + * be cleared. + */ + +#define hts_resize(type_t, num, size_ptr, ptr, flags) \ + ((num) > (*(size_ptr)) \ + ? hts_resize_array_(sizeof(type_t), (num), \ + sizeof(*(size_ptr)), (size_ptr), \ + (void **)(ptr), (flags), __func__) \ + : 0) + +/// Release resources when dlclosing a dynamically loaded HTSlib +/** @discussion + * Normally HTSlib cleans up automatically when your program exits, + * whether that is via exit(3) or returning from main(). However if you + * have dlopen(3)ed HTSlib and wish to close it before your main program + * exits, you must call hts_lib_shutdown() before dlclose(3). +*/ +HTSLIB_EXPORT +void hts_lib_shutdown(void); + +/** + * Wrapper function for free(). Enables memory deallocation across DLL + * boundary. Should be used by all applications, which are compiled + * with a different standard library than htslib and call htslib + * methods that return dynamically allocated data. + */ +HTSLIB_EXPORT +void hts_free(void *ptr); + +/************ + * File I/O * + ************/ + +// Add new entries only at the end (but before the *_maximum entry) +// of these enums, as their numbering is part of the htslib ABI. + +enum htsFormatCategory { + unknown_category, + sequence_data, // Sequence data -- SAM, BAM, CRAM, etc + variant_data, // Variant calling data -- VCF, BCF, etc + index_file, // Index file associated with some data file + region_list, // Coordinate intervals or regions -- BED, etc + category_maximum = 32767 +}; + +enum htsExactFormat { + unknown_format, + binary_format, text_format, + sam, bam, bai, cram, crai, vcf, bcf, csi, gzi, tbi, bed, + htsget, + json HTS_DEPRECATED_ENUM("Use htsExactFormat 'htsget' instead") = htsget, + empty_format, // File is empty (or empty after decompression) + fasta_format, fastq_format, fai_format, fqi_format, + hts_crypt4gh_format, + d4_format, + format_maximum = 32767 +}; + +enum htsCompression { + no_compression, gzip, bgzf, custom, bzip2_compression, razf_compression, + xz_compression, zstd_compression, + compression_maximum = 32767 +}; + +typedef struct htsFormat { + enum htsFormatCategory category; + enum htsExactFormat format; + struct { short major, minor; } version; + enum htsCompression compression; + short compression_level; // currently unused + void *specific; // format specific options; see struct hts_opt. +} htsFormat; + +struct hts_idx_t; +typedef struct hts_idx_t hts_idx_t; +struct hts_filter_t; + +/** + * @brief File handle returned by hts_open() etc. + * This structure should be considered opaque by end users. There should be + * no need to access most fields directly in user code, and in cases where + * it is desirable accessor functions such as hts_get_format() are provided. + */ +// Maintainers note htsFile cannot be an incomplete struct because some of its +// fields are part of libhts.so's ABI (hence these fields must not be moved): +// - fp is used in the public sam_itr_next()/etc macros +// - is_bin is used directly in samtools <= 1.1 and bcftools <= 1.1 +// - is_write and is_cram are used directly in samtools <= 1.1 +// - fp is used directly in samtools (up to and including current develop) +// - line is used directly in bcftools (up to and including current develop) +// - is_bgzf and is_cram flags indicate which fp union member to use. +// Note is_bgzf being set does not indicate the flag is BGZF compressed, +// nor even whether it is compressed at all (eg on naked BAMs). +typedef struct htsFile { + uint32_t is_bin:1, is_write:1, is_be:1, is_cram:1, is_bgzf:1, dummy:27; + int64_t lineno; + kstring_t line; + char *fn, *fn_aux; + union { + BGZF *bgzf; + struct cram_fd *cram; + struct hFILE *hfile; + } fp; + void *state; // format specific state information + htsFormat format; + hts_idx_t *idx; + const char *fnidx; + struct sam_hdr_t *bam_header; + struct hts_filter_t *filter; +} htsFile; + +// A combined thread pool and queue allocation size. +// The pool should already be defined, but qsize may be zero to +// indicate an appropriate queue size is taken from the pool. +// +// Reasons for explicitly setting it could be where many more file +// descriptors are in use than threads, so keeping memory low is +// important. +typedef struct htsThreadPool { + struct hts_tpool *pool; // The shared thread pool itself + int qsize; // Size of I/O queue to use for this fp +} htsThreadPool; + +// REQUIRED_FIELDS +enum sam_fields { + SAM_QNAME = 0x00000001, + SAM_FLAG = 0x00000002, + SAM_RNAME = 0x00000004, + SAM_POS = 0x00000008, + SAM_MAPQ = 0x00000010, + SAM_CIGAR = 0x00000020, + SAM_RNEXT = 0x00000040, + SAM_PNEXT = 0x00000080, + SAM_TLEN = 0x00000100, + SAM_SEQ = 0x00000200, + SAM_QUAL = 0x00000400, + SAM_AUX = 0x00000800, + SAM_RGAUX = 0x00001000, +}; + +// Mostly CRAM only, but this could also include other format options +enum hts_fmt_option { + // CRAM specific + CRAM_OPT_DECODE_MD, + CRAM_OPT_PREFIX, + CRAM_OPT_VERBOSITY, // obsolete, use hts_set_log_level() instead + CRAM_OPT_SEQS_PER_SLICE, + CRAM_OPT_SLICES_PER_CONTAINER, + CRAM_OPT_RANGE, + CRAM_OPT_VERSION, // rename to cram_version? + CRAM_OPT_EMBED_REF, + CRAM_OPT_IGNORE_MD5, + CRAM_OPT_REFERENCE, // make general + CRAM_OPT_MULTI_SEQ_PER_SLICE, + CRAM_OPT_NO_REF, + CRAM_OPT_USE_BZIP2, + CRAM_OPT_SHARED_REF, + CRAM_OPT_NTHREADS, // deprecated, use HTS_OPT_NTHREADS + CRAM_OPT_THREAD_POOL,// make general + CRAM_OPT_USE_LZMA, + CRAM_OPT_USE_RANS, + CRAM_OPT_REQUIRED_FIELDS, + CRAM_OPT_LOSSY_NAMES, + CRAM_OPT_BASES_PER_SLICE, + CRAM_OPT_STORE_MD, + CRAM_OPT_STORE_NM, + CRAM_OPT_RANGE_NOSEEK, // CRAM_OPT_RANGE minus the seek + CRAM_OPT_USE_TOK, + CRAM_OPT_USE_FQZ, + CRAM_OPT_USE_ARITH, + CRAM_OPT_POS_DELTA, // force delta for AP, even on non-pos sorted data + + // General purpose + HTS_OPT_COMPRESSION_LEVEL = 100, + HTS_OPT_NTHREADS, + HTS_OPT_THREAD_POOL, + HTS_OPT_CACHE_SIZE, + HTS_OPT_BLOCK_SIZE, + HTS_OPT_FILTER, + HTS_OPT_PROFILE, + + // Fastq + + // Boolean. + // Read / Write CASAVA 1.8 format. + // See https://emea.support.illumina.com/content/dam/illumina-support/documents/documentation/software_documentation/bcl2fastq/bcl2fastq_letterbooklet_15038058brpmi.pdf + // + // The CASAVA tag matches \d:[YN]:\d+:[ACGTN]+ + // The first \d is read 1/2 (1 or 2), [YN] is QC-PASS/FAIL flag, + // \d+ is a control number, and the sequence at the end is + // for barcode sequence. Barcodes are read into the aux tag defined + // by FASTQ_OPT_BARCODE ("BC" by default). + FASTQ_OPT_CASAVA = 1000, + + // String. + // Whether to read / write extra SAM format aux tags from the fastq + // identifier line. For reading this can simply be "1" to request + // decoding aux tags. For writing it is a comma separated list of aux + // tag types to be written out. + FASTQ_OPT_AUX, + + // Boolean. + // Whether to add /1 and /2 to read identifiers when writing FASTQ. + // These come from the BAM_FREAD1 or BAM_FREAD2 flags. + // (Detecting the /1 and /2 is automatic when reading fastq.) + FASTQ_OPT_RNUM, + + // Two character string. + // Barcode aux tag for CASAVA; defaults to "BC". + FASTQ_OPT_BARCODE, + + // Process SRA and ENA read names which pointlessly move the original + // name to the second field and insert a constructed . + // name in its place. + FASTQ_OPT_NAME2, +}; + +// Profile options for encoding; primarily used at present in CRAM +// but also usable in BAM as a synonym for deflate compression levels. +enum hts_profile_option { + HTS_PROFILE_FAST, + HTS_PROFILE_NORMAL, + HTS_PROFILE_SMALL, + HTS_PROFILE_ARCHIVE, +}; + +// For backwards compatibility +#define cram_option hts_fmt_option + +typedef struct hts_opt { + char *arg; // string form, strdup()ed + enum hts_fmt_option opt; // tokenised key + union { // ... and value + int i; + char *s; + } val; + struct hts_opt *next; +} hts_opt; + +#define HTS_FILE_OPTS_INIT {{0},0} + +/* + * Explicit index file name delimiter, see below + */ +#define HTS_IDX_DELIM "##idx##" + + +/********************** + * Exported functions * + **********************/ + +/* + * Parses arg and appends it to the option list. + * + * Returns 0 on success; + * -1 on failure. + */ +HTSLIB_EXPORT +int hts_opt_add(hts_opt **opts, const char *c_arg); + +/* + * Applies an hts_opt option list to a given htsFile. + * + * Returns 0 on success + * -1 on failure + */ +HTSLIB_EXPORT +int hts_opt_apply(htsFile *fp, hts_opt *opts); + +/* + * Frees an hts_opt list. + */ +HTSLIB_EXPORT +void hts_opt_free(hts_opt *opts); + +/* + * Accepts a string file format (sam, bam, cram, vcf, bam) optionally + * followed by a comma separated list of key=value options and splits + * these up into the fields of htsFormat struct. + * + * Returns 0 on success + * -1 on failure. + */ +HTSLIB_EXPORT +int hts_parse_format(htsFormat *opt, const char *str); + +/* + * Tokenise options as (key(=value)?,)*(key(=value)?)? + * NB: No provision for ',' appearing in the value! + * Add backslashing rules? + * + * This could be used as part of a general command line option parser or + * as a string concatenated onto the file open mode. + * + * Returns 0 on success + * -1 on failure. + */ +HTSLIB_EXPORT +int hts_parse_opt_list(htsFormat *opt, const char *str); + +/*! @abstract Table for converting a nucleotide character to 4-bit encoding. +The input character may be either an IUPAC ambiguity code, '=' for 0, or +'0'/'1'/'2'/'3' for a result of 1/2/4/8. The result is encoded as 1/2/4/8 +for A/C/G/T or combinations of these bits for ambiguous bases. +*/ +HTSLIB_EXPORT +extern const unsigned char seq_nt16_table[256]; + +/*! @abstract Table for converting a 4-bit encoded nucleotide to an IUPAC +ambiguity code letter (or '=' when given 0). +*/ +HTSLIB_EXPORT +extern const char seq_nt16_str[]; + +/*! @abstract Table for converting a 4-bit encoded nucleotide to about 2 bits. +Returns 0/1/2/3 for 1/2/4/8 (i.e., A/C/G/T), or 4 otherwise (0 or ambiguous). +*/ +HTSLIB_EXPORT +extern const int seq_nt16_int[]; + +/*! + @abstract Get the htslib version number + @return For released versions, a string like "N.N[.N]"; or git describe + output if using a library built within a Git repository. +*/ +HTSLIB_EXPORT +const char *hts_version(void); + +/*! + @abstract Compile-time HTSlib version number, for use in #if checks + @return For released versions X.Y[.Z], an integer of the form XYYYZZ; + useful for preprocessor conditionals such as + #if HTS_VERSION >= 101000 // Check for v1.10 or later +*/ +// Maintainers: Bump this in the final stage of preparing a new release. +// Immediately after release, bump ZZ to 90 to distinguish in-development +// Git repository builds from the release; you may wish to increment this +// further when significant features are merged. +#define HTS_VERSION 102100 + +/*! @abstract Introspection on the features enabled in htslib + * + * @return a bitfield of HTS_FEATURE_* macros. + */ +HTSLIB_EXPORT +unsigned int hts_features(void); + +HTSLIB_EXPORT +const char *hts_test_feature(unsigned int id); + +/*! @abstract Introspection on the features enabled in htslib, string form + * + * @return a string describing htslib build features + */ +HTSLIB_EXPORT +const char *hts_feature_string(void); + +// Whether ./configure was used or vanilla Makefile +#define HTS_FEATURE_CONFIGURE 1 + +// Whether --enable-plugins was used +#define HTS_FEATURE_PLUGINS 2 + +// Transport specific +#define HTS_FEATURE_LIBCURL (1u<<10) +#define HTS_FEATURE_S3 (1u<<11) +#define HTS_FEATURE_GCS (1u<<12) + +// Compression options +#define HTS_FEATURE_LIBDEFLATE (1u<<20) +#define HTS_FEATURE_LZMA (1u<<21) +#define HTS_FEATURE_BZIP2 (1u<<22) +#define HTS_FEATURE_HTSCODECS (1u<<23) // htscodecs library version + +// Build params +#define HTS_FEATURE_CC (1u<<27) +#define HTS_FEATURE_CFLAGS (1u<<28) +#define HTS_FEATURE_CPPFLAGS (1u<<29) +#define HTS_FEATURE_LDFLAGS (1u<<30) + + +/*! + @abstract Determine format by peeking at the start of a file + @param fp File opened for reading, positioned at the beginning + @param fmt Format structure that will be filled out on return + @return 0 for success, or negative if an error occurred. + + Equivalent to hts_detect_format2(fp, NULL, fmt). +*/ +HTSLIB_EXPORT +int hts_detect_format(struct hFILE *fp, htsFormat *fmt); + +/*! + @abstract Determine format primarily by peeking at the start of a file + @param fp File opened for reading, positioned at the beginning + @param fname Name of the file, or NULL if not available + @param fmt Format structure that will be filled out on return + @return 0 for success, or negative if an error occurred. + @since 1.15 + +Some formats are only recognised if the filename is available and has the +expected extension, as otherwise more generic files may be misrecognised. +In particular: + - FASTA/Q indexes must have .fai/.fqi extensions; without this requirement, + some similar BED files would be misrecognised as indexes. +*/ +HTSLIB_EXPORT +int hts_detect_format2(struct hFILE *fp, const char *fname, htsFormat *fmt); + +/*! + @abstract Get a human-readable description of the file format + @param fmt Format structure holding type, version, compression, etc. + @return Description string, to be freed by the caller after use. +*/ +HTSLIB_EXPORT +char *hts_format_description(const htsFormat *format); + +/*! + @abstract Open a sequence data (SAM/BAM/CRAM) or variant data (VCF/BCF) + or possibly-compressed textual line-orientated file + @param fn The file name or "-" for stdin/stdout. For indexed files + with a non-standard naming, the file name can include the + name of the index file delimited with HTS_IDX_DELIM + @param mode Mode matching / [rwa][bcefFguxz0-9]* / + @discussion + With 'r' opens for reading; any further format mode letters are ignored + as the format is detected by checking the first few bytes or BGZF blocks + of the file. With 'w' or 'a' opens for writing or appending, with format + specifier letters: + b binary format (BAM, BCF, etc) rather than text (SAM, VCF, etc) + c CRAM format + f FASTQ format + F FASTA format + g gzip compressed + u uncompressed + z bgzf compressed + [0-9] zlib compression level + and with non-format option letters (for any of 'r'/'w'/'a'): + e close the file on exec(2) (opens with O_CLOEXEC, where supported) + x create the file exclusively (opens with O_EXCL, where supported) + Note that there is a distinction between 'u' and '0': the first yields + plain uncompressed output whereas the latter outputs uncompressed data + wrapped in the zlib format. + @example + [rw]b .. compressed BCF, BAM, FAI + [rw]bu .. uncompressed BCF + [rw]z .. compressed VCF + [rw] .. uncompressed VCF +*/ +HTSLIB_EXPORT +htsFile *hts_open(const char *fn, const char *mode); + +/*! + @abstract Open a SAM/BAM/CRAM/VCF/BCF/etc file + @param fn The file name or "-" for stdin/stdout + @param mode Open mode, as per hts_open() + @param fmt Optional format specific parameters + @discussion + See hts_open() for description of fn and mode. + // TODO Update documentation for s/opts/fmt/ + Opts contains a format string (sam, bam, cram, vcf, bcf) which will, + if defined, override mode. Opts also contains a linked list of hts_opt + structures to apply to the open file handle. These can contain things + like pointers to the reference or information on compression levels, + block sizes, etc. +*/ +HTSLIB_EXPORT +htsFile *hts_open_format(const char *fn, const char *mode, const htsFormat *fmt); + +/*! + @abstract Open an existing stream as a SAM/BAM/CRAM/VCF/BCF/etc file + @param fn The already-open file handle + @param mode Open mode, as per hts_open() +*/ +HTSLIB_EXPORT +htsFile *hts_hopen(struct hFILE *fp, const char *fn, const char *mode); + +/*! + @abstract For output streams, flush any buffered data + @param fp The file handle to be flushed + @return 0 for success, or negative if an error occurred. + @since 1.14 +*/ +HTSLIB_EXPORT +int hts_flush(htsFile *fp); + +/*! + @abstract Close a file handle, flushing buffered data for output streams + @param fp The file handle to be closed + @return 0 for success, or negative if an error occurred. +*/ +HTSLIB_EXPORT +int hts_close(htsFile *fp); + +/*! + @abstract Returns the file's format information + @param fp The file handle + @return Read-only pointer to the file's htsFormat. +*/ +HTSLIB_EXPORT +const htsFormat *hts_get_format(htsFile *fp); + +/*! + @ abstract Returns a string containing the file format extension. + @ param format Format structure containing the file type. + @ return A string ("sam", "bam", etc) or "?" for unknown formats. + */ +HTSLIB_EXPORT +const char *hts_format_file_extension(const htsFormat *format); + +/*! + @abstract Sets a specified CRAM option on the open file handle. + @param fp The file handle open the open file. + @param opt The CRAM_OPT_* option. + @param ... Optional arguments, dependent on the option used. + @return 0 for success, or negative if an error occurred. +*/ +HTSLIB_EXPORT +int hts_set_opt(htsFile *fp, enum hts_fmt_option opt, ...); + +/*! + @abstract Read a line (and its \n or \r\n terminator) from a file + @param fp The file handle + @param delimiter Unused, but must be '\n' (or KS_SEP_LINE) + @param str The line (not including the terminator) is written here + @return Length of the string read (capped at INT_MAX); + -1 on end-of-file; <= -2 on error +*/ +HTSLIB_EXPORT +int hts_getline(htsFile *fp, int delimiter, kstring_t *str); + +HTSLIB_EXPORT +char **hts_readlines(const char *fn, int *_n); +/*! + @abstract Parse comma-separated list or read list from a file + @param list File name or comma-separated list + @param is_file + @param _n Size of the output array (number of items read) + @return NULL on failure or pointer to newly allocated array of + strings +*/ +HTSLIB_EXPORT +char **hts_readlist(const char *fn, int is_file, int *_n); + +/*! + @abstract Create extra threads to aid compress/decompression for this file + @param fp The file handle + @param n The number of worker threads to create + @return 0 for success, or negative if an error occurred. + @notes This function creates non-shared threads for use solely by fp. + The hts_set_thread_pool function is the recommended alternative. +*/ +HTSLIB_EXPORT +int hts_set_threads(htsFile *fp, int n); + +/*! + @abstract Create extra threads to aid compress/decompression for this file + @param fp The file handle + @param p A pool of worker threads, previously allocated by hts_create_threads(). + @return 0 for success, or negative if an error occurred. +*/ +HTSLIB_EXPORT +int hts_set_thread_pool(htsFile *fp, htsThreadPool *p); + +/*! + @abstract Adds a cache of decompressed blocks, potentially speeding up seeks. + This may not work for all file types (currently it is bgzf only). + @param fp The file handle + @param n The size of cache, in bytes +*/ +HTSLIB_EXPORT +void hts_set_cache_size(htsFile *fp, int n); + +/*! + @abstract Set .fai filename for a file opened for reading + @return 0 for success, negative on failure + @discussion + Called before *_hdr_read(), this provides the name of a .fai file + used to provide a reference list if the htsFile contains no @SQ headers. +*/ +HTSLIB_EXPORT +int hts_set_fai_filename(htsFile *fp, const char *fn_aux); + + +/*! + @abstract Sets a filter expression + @return 0 for success, negative on failure + @discussion + To clear an existing filter, specifying expr as NULL. +*/ +HTSLIB_EXPORT +int hts_set_filter_expression(htsFile *fp, const char *expr); + +/*! + @abstract Determine whether a given htsFile contains a valid EOF block + @return 3 for a non-EOF checkable filetype; + 2 for an unseekable file type where EOF cannot be checked; + 1 for a valid EOF block; + 0 for if the EOF marker is absent when it should be present; + -1 (with errno set) on failure + @discussion + Check if the BGZF end-of-file (EOF) marker is present +*/ +HTSLIB_EXPORT +int hts_check_EOF(htsFile *fp); + +/************ + * Indexing * + ************/ + +/*! +These HTS_IDX_* macros are used as special tid values for hts_itr_query()/etc, +producing iterators operating as follows: + - HTS_IDX_NOCOOR iterates over unmapped reads sorted at the end of the file + - HTS_IDX_START iterates over the entire file + - HTS_IDX_REST iterates from the current position to the end of the file + - HTS_IDX_NONE always returns "no more alignment records" +When one of these special tid values is used, beg and end are ignored. +When REST or NONE is used, idx is also ignored and may be NULL. +*/ +#define HTS_IDX_NOCOOR (-2) +#define HTS_IDX_START (-3) +#define HTS_IDX_REST (-4) +#define HTS_IDX_NONE (-5) + +#define HTS_FMT_CSI 0 +#define HTS_FMT_BAI 1 +#define HTS_FMT_TBI 2 +#define HTS_FMT_CRAI 3 +#define HTS_FMT_FAI 4 + +// Almost INT64_MAX, but when cast into a 32-bit int it's +// also INT_MAX instead of -1. This avoids bugs with old code +// using the new hts_pos_t data type. +#define HTS_POS_MAX ((((int64_t)INT_MAX)<<32)|INT_MAX) +#define HTS_POS_MIN INT64_MIN +#define PRIhts_pos PRId64 +typedef int64_t hts_pos_t; + +// For comparison with previous release: +// +// #define HTS_POS_MAX INT_MAX +// #define HTS_POS_MIN INT_MIN +// #define PRIhts_pos PRId32 +// typedef int32_t hts_pos_t; + +typedef struct hts_pair_pos_t { + hts_pos_t beg, end; +} hts_pair_pos_t; + +typedef hts_pair_pos_t hts_pair32_t; // For backwards compatibility + +typedef struct hts_pair64_t { + uint64_t u, v; +} hts_pair64_t; + +typedef struct hts_pair64_max_t { + uint64_t u, v; + uint64_t max; +} hts_pair64_max_t; + +typedef struct hts_reglist_t { + const char *reg; + hts_pair_pos_t *intervals; + int tid; + uint32_t count; + hts_pos_t min_beg, max_end; +} hts_reglist_t; + +typedef int hts_readrec_func(BGZF *fp, void *data, void *r, int *tid, hts_pos_t *beg, hts_pos_t *end); +typedef int hts_seek_func(void *fp, int64_t offset, int where); +typedef int64_t hts_tell_func(void *fp); + +/** + * @brief File iterator that can handle multiple target regions. + * This structure should be considered opaque by end users. + * It does both the stepping inside the file and the filtering of alignments. + * It can operate in single or multi-region mode, and depending on this, + * it uses different fields. + * + * read_rest (1) - read everything from the current offset, without filtering + * finished (1) - no more iterations + * is_cram (1) - current file has CRAM format + * nocoor (1) - read all unmapped reads + * + * multi (1) - multi-region moode + * reg_list - List of target regions + * n_reg - Size of the above list + * curr_reg - List index of the current region of search + * curr_intv - Interval index inside the current region; points to a (beg, end) + * end - Used for CRAM files, to preserve the max end coordinate + * + * multi (0) - single-region mode + * tid - Reference id of the target region + * beg - Start position of the target region + * end - End position of the target region + * + * Common fields: + * off - List of file offsets computed from the index + * n_off - Size of the above list + * i - List index of the current file offset + * curr_off - File offset for the next file read + * curr_tid - Reference id of the current alignment + * curr_beg - Start position of the current alignment + * curr_end - End position of the current alignment + * nocoor_off - File offset where the unmapped reads start + * + * readrec - File specific function that reads an alignment + * seek - File specific function for changing the file offset + * tell - File specific function for indicating the file offset + */ + +typedef struct hts_itr_t { + uint32_t read_rest:1, finished:1, is_cram:1, nocoor:1, multi:1, dummy:27; + int tid, n_off, i, n_reg; + hts_pos_t beg, end; + hts_reglist_t *reg_list; + int curr_tid, curr_reg, curr_intv; + hts_pos_t curr_beg, curr_end; + uint64_t curr_off, nocoor_off; + hts_pair64_max_t *off; + hts_readrec_func *readrec; + hts_seek_func *seek; + hts_tell_func *tell; + struct { + int n, m; + int *a; + } bins; +} hts_itr_t; + +typedef hts_itr_t hts_itr_multi_t; + +/// Compute the first bin on a given level +#define hts_bin_first(l) (((1<<(((l)<<1) + (l))) - 1) / 7) +/// Compute the parent bin of a given bin +#define hts_bin_parent(b) (((b) - 1) >> 3) + +/////////////////////////////////////////////////////////// +// Low-level API for building indexes. + +/// Create a BAI/CSI/TBI type index structure +/** @param n Initial number of targets + @param fmt Format, one of HTS_FMT_CSI, HTS_FMT_BAI or HTS_FMT_TBI + @param offset0 Initial file offset + @param min_shift Number of bits for the minimal interval + @param n_lvls Number of levels in the binning index + @return An initialised hts_idx_t struct on success; NULL on failure + +The struct returned by a successful call should be freed via hts_idx_destroy() +when it is no longer needed. +*/ +HTSLIB_EXPORT +hts_idx_t *hts_idx_init(int n, int fmt, uint64_t offset0, int min_shift, int n_lvls); + +/// Free a BAI/CSI/TBI type index +/** @param idx Index structure to free + */ +HTSLIB_EXPORT +void hts_idx_destroy(hts_idx_t *idx); + +/// Push an index entry +/** @param idx Index + @param tid Target id + @param beg Range start (zero-based) + @param end Range end (zero-based, half-open) + @param offset File offset + @param is_mapped Range corresponds to a mapped read + @return 0 on success; -1 on failure + +The @p is_mapped parameter is used to update the n_mapped / n_unmapped counts +stored in the meta-data bin. + */ +HTSLIB_EXPORT +int hts_idx_push(hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end, uint64_t offset, int is_mapped); + +/// Finish building an index +/** @param idx Index + @param final_offset Last file offset + @return 0 on success; non-zero on failure. +*/ +HTSLIB_EXPORT +int hts_idx_finish(hts_idx_t *idx, uint64_t final_offset); + +/// Returns index format +/** @param idx Index + @return One of HTS_FMT_CSI, HTS_FMT_BAI or HTS_FMT_TBI +*/ +HTSLIB_EXPORT +int hts_idx_fmt(hts_idx_t *idx); + +/// Add name to TBI index meta-data +/** @param idx Index + @param tid Target identifier + @param name Target name + @return Index number of name in names list on success; -1 on failure. +*/ +HTSLIB_EXPORT +int hts_idx_tbi_name(hts_idx_t *idx, int tid, const char *name); + +// Index loading and saving + +/// Save an index to a file +/** @param idx Index to be written + @param fn Input BAM/BCF/etc filename, to which .bai/.csi/etc will be added + @param fmt One of the HTS_FMT_* index formats + @return 0 if successful, or negative if an error occurred. +*/ +HTSLIB_EXPORT +int hts_idx_save(const hts_idx_t *idx, const char *fn, int fmt) HTS_RESULT_USED; + +/// Save an index to a specific file +/** @param idx Index to be written + @param fn Input BAM/BCF/etc filename + @param fnidx Output filename, or NULL to add .bai/.csi/etc to @a fn + @param fmt One of the HTS_FMT_* index formats + @return 0 if successful, or negative if an error occurred. +*/ +HTSLIB_EXPORT +int hts_idx_save_as(const hts_idx_t *idx, const char *fn, const char *fnidx, int fmt) HTS_RESULT_USED; + +/// Load an index file +/** @param fn BAM/BCF/etc filename, to which .bai/.csi/etc will be added or + the extension substituted, to search for an existing index file. + In case of a non-standard naming, the file name can include the + name of the index file delimited with HTS_IDX_DELIM. + @param fmt One of the HTS_FMT_* index formats + @return The index, or NULL if an error occurred. + +If @p fn contains the string "##idx##" (HTS_IDX_DELIM), the part before +the delimiter will be used as the name of the data file and the part after +it will be used as the name of the index. + +Otherwise, this function tries to work out the index name as follows: + + It will try appending ".csi" to @p fn + It will try substituting an existing suffix (e.g. .bam, .vcf) with ".csi" + Then, if @p fmt is HTS_FMT_BAI: + It will try appending ".bai" to @p fn + To will substituting the existing suffix (e.g. .bam) with ".bai" + else if @p fmt is HTS_FMT_TBI: + It will try appending ".tbi" to @p fn + To will substituting the existing suffix (e.g. .vcf) with ".tbi" + +If the index file is remote (served over a protocol like https), first a check +is made to see is a locally cached copy is available. This is done for all +of the possible names listed above. If a cached copy is not available then +the index will be downloaded and stored in the current working directory, +with the same name as the remote index. + + Equivalent to hts_idx_load3(fn, NULL, fmt, HTS_IDX_SAVE_REMOTE); +*/ +HTSLIB_EXPORT +hts_idx_t *hts_idx_load(const char *fn, int fmt); + +/// Load a specific index file +/** @param fn Input BAM/BCF/etc filename + @param fnidx The input index filename + @return The index, or NULL if an error occurred. + + Equivalent to hts_idx_load3(fn, fnidx, 0, 0); + + This function will not attempt to save index files locally. +*/ +HTSLIB_EXPORT +hts_idx_t *hts_idx_load2(const char *fn, const char *fnidx); + +/// Load a specific index file +/** @param fn Input BAM/BCF/etc filename + @param fnidx The input index filename + @param fmt One of the HTS_FMT_* index formats + @param flags Flags to alter behaviour (see description) + @return The index, or NULL if an error occurred. + + If @p fnidx is NULL, the index name will be derived from @p fn in the + same way as hts_idx_load(). + + If @p fnidx is not NULL, @p fmt is ignored. + + The @p flags parameter can be set to a combination of the following + values: + + HTS_IDX_SAVE_REMOTE Save a local copy of any remote indexes + HTS_IDX_SILENT_FAIL Fail silently if the index is not present + + The index struct returned by a successful call should be freed + via hts_idx_destroy() when it is no longer needed. +*/ +HTSLIB_EXPORT +hts_idx_t *hts_idx_load3(const char *fn, const char *fnidx, int fmt, int flags); + +/// Flags for hts_idx_load3() ( and also sam_idx_load3(), tbx_idx_load3() ) +#define HTS_IDX_SAVE_REMOTE 1 +#define HTS_IDX_SILENT_FAIL 2 + +/////////////////////////////////////////////////////////// +// Functions for accessing meta-data stored in indexes + +typedef const char *(*hts_id2name_f)(void*, int); + +/// Get extra index meta-data +/** @param idx The index + @param l_meta Pointer to where the length of the extra data is stored + @return Pointer to the extra data if present; NULL otherwise + + Indexes (both .tbi and .csi) made by tabix include extra data about + the indexed file. The returns a pointer to this data. Note that the + data is stored exactly as it is in the index. Callers need to interpret + the results themselves, including knowing what sort of data to expect; + byte swapping etc. +*/ +HTSLIB_EXPORT +uint8_t *hts_idx_get_meta(hts_idx_t *idx, uint32_t *l_meta); + +/// Set extra index meta-data +/** @param idx The index + @param l_meta Length of data + @param meta Pointer to the extra data + @param is_copy If not zero, a copy of the data is taken + @return 0 on success; -1 on failure (out of memory). + + Sets the data that is returned by hts_idx_get_meta(). + + If is_copy != 0, a copy of the input data is taken. If not, ownership of + the data pointed to by *meta passes to the index. +*/ +HTSLIB_EXPORT +int hts_idx_set_meta(hts_idx_t *idx, uint32_t l_meta, uint8_t *meta, int is_copy); + +/// Get number of mapped and unmapped reads from an index +/** @param idx Index + @param tid Target ID + @param[out] mapped Location to store number of mapped reads + @param[out] unmapped Location to store number of unmapped reads + @return 0 on success; -1 on failure (data not available) + + BAI and CSI indexes store information on the number of reads for each + target that were mapped or unmapped (unmapped reads will generally have + a paired read that is mapped to the target). This function returns this + information if it is available. + + @note Cram CRAI indexes do not include this information. +*/ +HTSLIB_EXPORT +int hts_idx_get_stat(const hts_idx_t* idx, int tid, uint64_t* mapped, uint64_t* unmapped); + +/// Return the number of unplaced reads from an index +/** @param idx Index + @return Unplaced reads count + + Unplaced reads are not linked to any reference (e.g. RNAME is '*' in SAM + files). +*/ +HTSLIB_EXPORT +uint64_t hts_idx_get_n_no_coor(const hts_idx_t* idx); + +/// Return a list of target names from an index +/** @param idx Index + @param[out] n Location to store the number of targets + @param getid Callback function to get the name for a target ID + @param hdr Header from indexed file + @return An array of pointers to the names on success; NULL on failure + + @note The names are pointers into the header data structure. When cleaning + up, only the array should be freed, not the names. + */ +HTSLIB_EXPORT +const char **hts_idx_seqnames(const hts_idx_t *idx, int *n, hts_id2name_f getid, void *hdr); // free only the array, not the values + +/// Return the number of targets from an index +/** @param idx Index + @return The number of targets + */ +HTSLIB_EXPORT +int hts_idx_nseq(const hts_idx_t *idx); + +/////////////////////////////////////////////////////////// +// Region parsing + +#define HTS_PARSE_THOUSANDS_SEP 1 ///< Ignore ',' separators within numbers +#define HTS_PARSE_ONE_COORD 2 ///< chr:pos means chr:pos-pos and not chr:pos-end +#define HTS_PARSE_LIST 4 ///< Expect a comma separated list of regions. (Disables HTS_PARSE_THOUSANDS_SEP) + +/// Parse a numeric string +/** The number may be expressed in scientific notation, and optionally may + contain commas in the integer part (before any decimal point or E notation). + @param str String to be parsed + @param strend If non-NULL, set on return to point to the first character + in @a str after those forming the parsed number + @param flags Or'ed-together combination of HTS_PARSE_* flags + @return Integer value of the parsed number, or 0 if no valid number + + The input string is parsed as: optional whitespace; an optional '+' or + '-' sign; decimal digits possibly including ',' characters (if @a flags + includes HTS_PARSE_THOUSANDS_SEP) and a '.' decimal point; and an optional + case-insensitive suffix, which may be either 'k', 'M', 'G', or scientific + notation consisting of 'e'/'E' followed by an optional '+' or '-' sign and + decimal digits. To be considered a valid numeric value, the main part (not + including any suffix or scientific notation) must contain at least one + digit (either before or after the decimal point). + + When @a strend is NULL, @a str is expected to contain only (optional + whitespace followed by) the numeric value. A warning will be printed + (if hts_verbose is HTS_LOG_WARNING or more) if no valid parsable number + is found or if there are any unused characters after the number. + + When @a strend is non-NULL, @a str starts with (optional whitespace + followed by) the numeric value. On return, @a strend is set to point + to the first unused character after the numeric value, or to @a str + if no valid parsable number is found. +*/ +HTSLIB_EXPORT +long long hts_parse_decimal(const char *str, char **strend, int flags); + +typedef int (*hts_name2id_f)(void*, const char*); + +/// Parse a "CHR:START-END"-style region string +/** @param str String to be parsed + @param beg Set on return to the 0-based start of the region + @param end Set on return to the 1-based end of the region + @return Pointer to the colon or '\0' after the reference sequence name, + or NULL if @a str could not be parsed. + + NOTE: For compatibility with hts_parse_reg only. + Please use hts_parse_region instead. +*/ +HTSLIB_EXPORT +const char *hts_parse_reg64(const char *str, hts_pos_t *beg, hts_pos_t *end); + +/// Parse a "CHR:START-END"-style region string +/** @param str String to be parsed + @param beg Set on return to the 0-based start of the region + @param end Set on return to the 1-based end of the region + @return Pointer to the colon or '\0' after the reference sequence name, + or NULL if @a str could not be parsed. +*/ +HTSLIB_EXPORT +const char *hts_parse_reg(const char *str, int *beg, int *end); + +/// Parse a "CHR:START-END"-style region string +/** @param str String to be parsed + @param tid Set on return (if not NULL) to be reference index (-1 if invalid) + @param beg Set on return to the 0-based start of the region + @param end Set on return to the 1-based end of the region + @param getid Function pointer. Called if not NULL to set tid. + @param hdr Caller data passed to getid. + @param flags Bitwise HTS_PARSE_* flags listed above. + @return Pointer to the byte after the end of the entire region + specifier (including any trailing comma) on success, + or NULL if @a str could not be parsed. + + A variant of hts_parse_reg which is reference-id aware. It uses + the iterator name2id callbacks to validate the region tokenisation works. + + This is necessary due to GRCh38 HLA additions which have reference names + like "HLA-DRB1*12:17". + + To work around ambiguous parsing issues, eg both "chr1" and "chr1:100-200" + are reference names, quote using curly braces. + Thus "{chr1}:100-200" and "{chr1:100-200}" disambiguate the above example. + + Flags are used to control how parsing works, and can be one of the below. + + HTS_PARSE_THOUSANDS_SEP: + Ignore commas in numbers. For example with this flag 1,234,567 + is interpreted as 1234567. + + HTS_PARSE_LIST: + If present, the region is assmed to be a comma separated list and + position parsing will not contain commas (this implicitly + clears HTS_PARSE_THOUSANDS_SEP in the call to hts_parse_decimal). + On success the return pointer will be the start of the next region, ie + the character after the comma. (If *ret != '\0' then the caller can + assume another region is present in the list.) + + If not set then positions may contain commas. In this case the return + value should point to the end of the string, or NULL on failure. + + HTS_PARSE_ONE_COORD: + If present, X:100 is treated as the single base pair region X:100-100. + In this case X:-100 is shorthand for X:1-100 and X:100- is X:100-. + (This is the standard bcftools region convention.) + + When not set X:100 is considered to be X:100- where is + the end of chromosome X (set to INT_MAX here). X:100- and X:-100 are + invalid. + (This is the standard samtools region convention.) + + Note the supplied string expects 1 based inclusive coordinates, but the + returned coordinates start from 0 and are half open, so pos0 is valid + for use in e.g. "for (pos0 = beg; pos0 < end; pos0++) {...}" + + If NULL is returned, the value in tid mat give additional information + about the error: + + -2 Failed to parse @p hdr; or out of memory + -1 The reference in @p str has mismatched braces, or does not + exist in @p hdr + >= 0 The specified range in @p str could not be parsed +*/ +HTSLIB_EXPORT +const char *hts_parse_region(const char *s, int *tid, hts_pos_t *beg, + hts_pos_t *end, hts_name2id_f getid, void *hdr, + int flags); + + +/////////////////////////////////////////////////////////// +// Generic iterators +// +// These functions provide the low-level infrastructure for iterators. +// Wrappers around these are used to make iterators for specific file types. +// See: +// htslib/sam.h for SAM/BAM/CRAM iterators +// htslib/vcf.h for VCF/BCF iterators +// htslib/tbx.h for files indexed by tabix + +/// Create a single-region iterator +/** @param idx Index + @param tid Target ID + @param beg Start of region + @param end End of region + @param readrec Callback to read a record from the input file + @return An iterator on success; NULL on failure + + The iterator struct returned by a successful call should be freed + via hts_itr_destroy() when it is no longer needed. + */ +HTSLIB_EXPORT +hts_itr_t *hts_itr_query(const hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end, hts_readrec_func *readrec); + +/// Free an iterator +/** @param iter Iterator to free + */ +HTSLIB_EXPORT +void hts_itr_destroy(hts_itr_t *iter); + +typedef hts_itr_t *hts_itr_query_func(const hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end, hts_readrec_func *readrec); + +/// Create a single-region iterator from a text region specification +/** @param idx Index + @param reg Region specifier + @param getid Callback function to return the target ID for a name + @param hdr Input file header + @param itr_query Callback function returning an iterator for a numeric tid, + start and end position + @param readrec Callback to read a record from the input file + @return An iterator on success; NULL on error + + The iterator struct returned by a successful call should be freed + via hts_itr_destroy() when it is no longer needed. + */ +HTSLIB_EXPORT +hts_itr_t *hts_itr_querys(const hts_idx_t *idx, const char *reg, hts_name2id_f getid, void *hdr, hts_itr_query_func *itr_query, hts_readrec_func *readrec); + +/// Return the next record from an iterator +/** @param fp Input file handle + @param iter Iterator + @param r Pointer to record placeholder + @param data Data passed to the readrec callback + @return >= 0 on success, -1 when there is no more data, < -1 on error + */ +HTSLIB_EXPORT +int hts_itr_next(BGZF *fp, hts_itr_t *iter, void *r, void *data) HTS_RESULT_USED; + +/********************************** + * Iterator with multiple regions * + **********************************/ + +typedef int hts_itr_multi_query_func(const hts_idx_t *idx, hts_itr_t *itr); +HTSLIB_EXPORT +int hts_itr_multi_bam(const hts_idx_t *idx, hts_itr_t *iter); +HTSLIB_EXPORT +int hts_itr_multi_cram(const hts_idx_t *idx, hts_itr_t *iter); + +/// Create a multi-region iterator from a region list +/** @param idx Index + @param reglist Region list + @param count Number of items in region list + @param getid Callback to convert names to target IDs + @param hdr Indexed file header (passed to getid) + @param itr_specific Filetype-specific callback function + @param readrec Callback to read an input file record + @param seek Callback to seek in the input file + @param tell Callback to return current input file location + @return An iterator on success; NULL on failure + + The iterator struct returned by a successful call should be freed + via hts_itr_destroy() when it is no longer needed. + */ +HTSLIB_EXPORT +hts_itr_t *hts_itr_regions(const hts_idx_t *idx, hts_reglist_t *reglist, int count, hts_name2id_f getid, void *hdr, hts_itr_multi_query_func *itr_specific, hts_readrec_func *readrec, hts_seek_func *seek, hts_tell_func *tell); + +/// Return the next record from an iterator +/** @param fp Input file handle + @param iter Iterator + @param r Pointer to record placeholder + @return >= 0 on success, -1 when there is no more data, < -1 on error + */ +HTSLIB_EXPORT +int hts_itr_multi_next(htsFile *fd, hts_itr_t *iter, void *r); + +/// Create a region list from a char array +/** @param argv Char array of target:interval elements, e.g. chr1:2500-3600, chr1:5100, chr2 + @param argc Number of items in the array + @param r_count Pointer to the number of items in the resulting region list + @param hdr Header for the sam/bam/cram file + @param getid Callback to convert target names to target ids. + @return A region list on success, NULL on failure + + The hts_reglist_t struct returned by a successful call should be freed + via hts_reglist_free() when it is no longer needed. + */ +HTSLIB_EXPORT +hts_reglist_t *hts_reglist_create(char **argv, int argc, int *r_count, void *hdr, hts_name2id_f getid); + +/// Free a region list +/** @param reglist Region list + @param count Number of items in the list + */ +HTSLIB_EXPORT +void hts_reglist_free(hts_reglist_t *reglist, int count); + +/// Free a multi-region iterator +/** @param iter Iterator to free + */ +#define hts_itr_multi_destroy(iter) hts_itr_destroy(iter) + + + /** + * hts_file_type() - Convenience function to determine file type + * DEPRECATED: This function has been replaced by hts_detect_format(). + * It and these FT_* macros will be removed in a future HTSlib release. + */ + #define FT_UNKN 0 + #define FT_GZ 1 + #define FT_VCF 2 + #define FT_VCF_GZ (FT_GZ|FT_VCF) + #define FT_BCF (1<<2) + #define FT_BCF_GZ (FT_GZ|FT_BCF) + #define FT_STDIN (1<<3) + HTSLIB_EXPORT + int hts_file_type(const char *fname); + + +/*************************** + * Revised MAQ error model * + ***************************/ + +struct errmod_t; +typedef struct errmod_t errmod_t; + +HTSLIB_EXPORT +errmod_t *errmod_init(double depcorr); +HTSLIB_EXPORT +void errmod_destroy(errmod_t *em); + +/* + n: number of bases + m: maximum base + bases[i]: qual:6, strand:1, base:4 + q[i*m+j]: phred-scaled likelihood of (i,j) + */ +HTSLIB_EXPORT +int errmod_cal(const errmod_t *em, int n, int m, uint16_t *bases, float *q); + + +/***************************************************** + * Probabilistic banded glocal alignment * + * See https://doi.org/10.1093/bioinformatics/btr076 * + *****************************************************/ + +typedef struct probaln_par_t { + float d, e; + int bw; +} probaln_par_t; + +/// Perform probabilistic banded glocal alignment +/** @param ref Reference sequence + @param l_ref Length of reference + @param query Query sequence + @param l_query Length of query sequence + @param iqual Query base qualities + @param c Alignment parameters + @param[out] state Output alignment + @param[out] q Phred scaled posterior probability of state[i] being wrong + @return Phred-scaled likelihood score, or INT_MIN on failure. + +The reference and query sequences are coded using integers 0,1,2,3,4 for +bases A,C,G,T,N respectively (N here is for any ambiguity code). + +On output, state and q are arrays of length l_query. The higher 30 +bits give the reference position the query base is matched to and the +lower two bits can be 0 (an alignment match) or 1 (an +insertion). q[i] gives the phred scaled posterior probability of +state[i] being wrong. + +On failure, errno will be set to EINVAL if the values of l_ref or l_query +were invalid; or ENOMEM if a memory allocation failed. +*/ + +HTSLIB_EXPORT +int probaln_glocal(const uint8_t *ref, int l_ref, const uint8_t *query, int l_query, const uint8_t *iqual, const probaln_par_t *c, int *state, uint8_t *q); + + + /********************** + * MD5 implementation * + **********************/ + + struct hts_md5_context; + typedef struct hts_md5_context hts_md5_context; + + /*! @abstract Initialises an MD5 context. + * @discussion + * The expected use is to allocate an hts_md5_context using + * hts_md5_init(). This pointer is then passed into one or more calls + * of hts_md5_update() to compute successive internal portions of the + * MD5 sum, which can then be externalised as a full 16-byte MD5sum + * calculation by calling hts_md5_final(). This can then be turned + * into ASCII via hts_md5_hex(). + * + * To dealloate any resources created by hts_md5_init() call the + * hts_md5_destroy() function. + * + * @return hts_md5_context pointer on success, NULL otherwise. + */ + HTSLIB_EXPORT + hts_md5_context *hts_md5_init(void); + + /*! @abstract Updates the context with the MD5 of the data. */ + HTSLIB_EXPORT + void hts_md5_update(hts_md5_context *ctx, const void *data, unsigned long size); + + /*! @abstract Computes the final 128-bit MD5 hash from the given context */ + HTSLIB_EXPORT + void hts_md5_final(unsigned char *digest, hts_md5_context *ctx); + + /*! @abstract Resets an md5_context to the initial state, as returned + * by hts_md5_init(). + */ + HTSLIB_EXPORT + void hts_md5_reset(hts_md5_context *ctx); + + /*! @abstract Converts a 128-bit MD5 hash into a 33-byte nul-termninated + * hex string. + */ + HTSLIB_EXPORT + void hts_md5_hex(char *hex, const unsigned char *digest); + + /*! @abstract Deallocates any memory allocated by hts_md5_init. */ + HTSLIB_EXPORT + void hts_md5_destroy(hts_md5_context *ctx); + +static inline int hts_reg2bin(hts_pos_t beg, hts_pos_t end, int min_shift, int n_lvls) +{ + int l, s = min_shift, t = ((1<<((n_lvls<<1) + n_lvls)) - 1) / 7; + for (--end, l = n_lvls; l > 0; --l, s += 3, t -= 1<<((l<<1)+l)) + if (beg>>s == end>>s) return t + (beg>>s); + return 0; +} + +/// Compute the level of a bin in a binning index +static inline int hts_bin_level(int bin) { + int l, b; + for (l = 0, b = bin; b; ++l, b = hts_bin_parent(b)); + return l; +} + +//! Compute the corresponding entry into the linear index of a given bin from +//! a binning index +/*! + * @param bin The bin number + * @param n_lvls The index depth (number of levels - 0 based) + * @return The integer offset into the linear index + * + * Explanation of the return value formula: + * Each bin on level l covers exp(2, (n_lvls - l)*3 + min_shift) base pairs. + * A linear index entry covers exp(2, min_shift) base pairs. + */ +static inline int hts_bin_bot(int bin, int n_lvls) +{ + int l = hts_bin_level(bin); + return (bin - hts_bin_first(l)) << (n_lvls - l) * 3; +} + +/// Compute the (0-based exclusive) maximum position covered by a binning index +static inline hts_pos_t hts_bin_maxpos(int min_shift, int n_lvls) +{ + hts_pos_t one = 1; + return one << (min_shift + n_lvls * 3); +} + +/************** + * Endianness * + **************/ + +static inline int ed_is_big(void) +{ + long one= 1; + return !(*((char *)(&one))); +} +static inline uint16_t ed_swap_2(uint16_t v) +{ + return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8)); +} +static inline void *ed_swap_2p(void *x) +{ + *(uint16_t*)x = ed_swap_2(*(uint16_t*)x); + return x; +} +static inline uint32_t ed_swap_4(uint32_t v) +{ + v = ((v & 0x0000FFFFU) << 16) | (v >> 16); + return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8); +} +static inline void *ed_swap_4p(void *x) +{ + *(uint32_t*)x = ed_swap_4(*(uint32_t*)x); + return x; +} +static inline uint64_t ed_swap_8(uint64_t v) +{ + v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32); + v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16); + return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8); +} +static inline void *ed_swap_8p(void *x) +{ + *(uint64_t*)x = ed_swap_8(*(uint64_t*)x); + return x; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/htslib-1.21/htslib/hts_defs.h b/src/htslib-1.21/htslib/hts_defs.h new file mode 100644 index 0000000..b5cded3 --- /dev/null +++ b/src/htslib-1.21/htslib/hts_defs.h @@ -0,0 +1,135 @@ +/* hts_defs.h -- Miscellaneous definitions. + + Copyright (C) 2013-2015,2017, 2019-2020, 2024 Genome Research Ltd. + + Author: John Marshall + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#ifndef HTSLIB_HTS_DEFS_H +#define HTSLIB_HTS_DEFS_H + +#if defined __MINGW32__ +#include // For __MINGW_PRINTF_FORMAT macro +#endif + +#ifdef __clang__ +#ifdef __has_attribute +#define HTS_COMPILER_HAS(attribute) __has_attribute(attribute) +#endif + +#elif defined __GNUC__ +#define HTS_GCC_AT_LEAST(major, minor) \ + (__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))) +#endif + +#ifndef HTS_COMPILER_HAS +#define HTS_COMPILER_HAS(attribute) 0 +#endif +#ifndef HTS_GCC_AT_LEAST +#define HTS_GCC_AT_LEAST(major, minor) 0 +#endif + +#if HTS_COMPILER_HAS(__nonstring__) || HTS_GCC_AT_LEAST(8,1) +#define HTS_NONSTRING __attribute__ ((__nonstring__)) +#else +#define HTS_NONSTRING +#endif + +#if HTS_COMPILER_HAS(__noreturn__) || HTS_GCC_AT_LEAST(3,0) +#define HTS_NORETURN __attribute__ ((__noreturn__)) +#else +#define HTS_NORETURN +#endif + +// Enable optimisation level 3, especially for gcc. To be used +// where we want to force vectorisation in hot loops and the default -O2 +// just doesn't cut it. +#if HTS_COMPILER_HAS(optimize) || HTS_GCC_AT_LEAST(4,4) +#define HTS_OPT3 __attribute__((optimize("O3"))) +#else +#define HTS_OPT3 +#endif + +#if HTS_COMPILER_HAS(aligned) || HTS_GCC_AT_LEAST(4,3) +#define HTS_ALIGN32 __attribute__((aligned(32))) +#else +#define HTS_ALIGN32 +#endif + +// GCC introduced warn_unused_result in 3.4 but added -Wno-unused-result later +#if HTS_COMPILER_HAS(__warn_unused_result__) || HTS_GCC_AT_LEAST(4,5) +#define HTS_RESULT_USED __attribute__ ((__warn_unused_result__)) +#else +#define HTS_RESULT_USED +#endif + +#if HTS_COMPILER_HAS(__unused__) || HTS_GCC_AT_LEAST(3,0) +#define HTS_UNUSED __attribute__ ((__unused__)) +#else +#define HTS_UNUSED +#endif + +#if HTS_COMPILER_HAS(__deprecated__) || HTS_GCC_AT_LEAST(4,5) +#define HTS_DEPRECATED(message) __attribute__ ((__deprecated__ (message))) +#elif HTS_GCC_AT_LEAST(3,1) +#define HTS_DEPRECATED(message) __attribute__ ((__deprecated__)) +#else +#define HTS_DEPRECATED(message) +#endif + +#if (HTS_COMPILER_HAS(__deprecated__) || HTS_GCC_AT_LEAST(6,4)) && !defined(__ICC) +#define HTS_DEPRECATED_ENUM(message) __attribute__ ((__deprecated__ (message))) +#else +#define HTS_DEPRECATED_ENUM(message) +#endif + +// On mingw the "printf" format type doesn't work. It needs "gnu_printf" +// in order to check %lld and %z, otherwise it defaults to checking against +// the Microsoft library printf format options despite linking against the +// GNU posix implementation of printf. The __MINGW_PRINTF_FORMAT macro +// expands to printf or gnu_printf as required, but obviously may not +// exist +#ifdef __MINGW_PRINTF_FORMAT +#define HTS_PRINTF_FMT __MINGW_PRINTF_FORMAT +#else +#define HTS_PRINTF_FMT printf +#endif + +#if HTS_COMPILER_HAS(__format__) || HTS_GCC_AT_LEAST(3,0) +#define HTS_FORMAT(type, idx, first) __attribute__((__format__ (type, idx, first))) +#else +#define HTS_FORMAT(type, idx, first) +#endif + +#if defined(_WIN32) || defined(__CYGWIN__) +#if defined(HTS_BUILDING_LIBRARY) +#define HTSLIB_EXPORT __declspec(dllexport) +#else +#define HTSLIB_EXPORT +#endif +#elif HTS_COMPILER_HAS(__visibility__) || HTS_GCC_AT_LEAST(4,0) +#define HTSLIB_EXPORT __attribute__((__visibility__("default"))) +#elif defined(__SUNPRO_C) && __SUNPRO_C >= 0x550 +#define HTSLIB_EXPORT __global +#else +#define HTSLIB_EXPORT +#endif + +#endif diff --git a/src/htslib-1.19.1/htslib/hts_endian.h b/src/htslib-1.21/htslib/hts_endian.h similarity index 100% rename from src/htslib-1.19.1/htslib/hts_endian.h rename to src/htslib-1.21/htslib/hts_endian.h diff --git a/src/htslib-1.18/htslib/hts_expr.h b/src/htslib-1.21/htslib/hts_expr.h similarity index 100% rename from src/htslib-1.18/htslib/hts_expr.h rename to src/htslib-1.21/htslib/hts_expr.h diff --git a/src/htslib-1.18/htslib/hts_log.h b/src/htslib-1.21/htslib/hts_log.h similarity index 100% rename from src/htslib-1.18/htslib/hts_log.h rename to src/htslib-1.21/htslib/hts_log.h diff --git a/src/htslib-1.18/htslib/hts_os.h b/src/htslib-1.21/htslib/hts_os.h similarity index 100% rename from src/htslib-1.18/htslib/hts_os.h rename to src/htslib-1.21/htslib/hts_os.h diff --git a/src/htslib-1.18/htslib/kbitset.h b/src/htslib-1.21/htslib/kbitset.h similarity index 100% rename from src/htslib-1.18/htslib/kbitset.h rename to src/htslib-1.21/htslib/kbitset.h diff --git a/src/htslib-1.18/htslib/kfunc.h b/src/htslib-1.21/htslib/kfunc.h similarity index 100% rename from src/htslib-1.18/htslib/kfunc.h rename to src/htslib-1.21/htslib/kfunc.h diff --git a/src/htslib-1.21/htslib/khash.h b/src/htslib-1.21/htslib/khash.h new file mode 100644 index 0000000..02e4917 --- /dev/null +++ b/src/htslib-1.21/htslib/khash.h @@ -0,0 +1,749 @@ +/* The MIT License + + Copyright (c) 2008, 2009, 2011 by Attractive Chaos + Copyright (C) 2014-2015, 2018, 2024 Genome Research Ltd. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +/* + An example: + +#include "khash.h" +KHASH_MAP_INIT_INT(32, char) +int main() { + int ret, is_missing; + khiter_t k; + khash_t(32) *h = kh_init(32); + k = kh_put(32, h, 5, &ret); + kh_value(h, k) = 10; + k = kh_get(32, h, 10); + is_missing = (k == kh_end(h)); + k = kh_get(32, h, 5); + kh_del(32, h, k); + for (k = kh_begin(h); k != kh_end(h); ++k) + if (kh_exist(h, k)) kh_value(h, k) = 1; + kh_destroy(32, h); + return 0; +} +*/ + +/* + 2013-05-02 (0.2.8): + + * Use quadratic probing. When the capacity is power of 2, stepping function + i*(i+1)/2 guarantees to traverse each bucket. It is better than double + hashing on cache performance and is more robust than linear probing. + + In theory, double hashing should be more robust than quadratic probing. + However, my implementation is probably not for large hash tables, because + the second hash function is closely tied to the first hash function, + which reduce the effectiveness of double hashing. + + Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php + + 2011-12-29 (0.2.7): + + * Minor code clean up; no actual effect. + + 2011-09-16 (0.2.6): + + * The capacity is a power of 2. This seems to dramatically improve the + speed for simple keys. Thank Zilong Tan for the suggestion. Reference: + + - http://code.google.com/p/ulib/ + - http://nothings.org/computer/judy/ + + * Allow to optionally use linear probing which usually has better + performance for random input. Double hashing is still the default as it + is more robust to certain non-random input. + + * Added Wang's integer hash function (not used by default). This hash + function is more robust to certain non-random input. + + 2011-02-14 (0.2.5): + + * Allow to declare global functions. + + 2009-09-26 (0.2.4): + + * Improve portability + + 2008-09-19 (0.2.3): + + * Corrected the example + * Improved interfaces + + 2008-09-11 (0.2.2): + + * Improved speed a little in kh_put() + + 2008-09-10 (0.2.1): + + * Added kh_clear() + * Fixed a compiling error + + 2008-09-02 (0.2.0): + + * Changed to token concatenation which increases flexibility. + + 2008-08-31 (0.1.2): + + * Fixed a bug in kh_get(), which has not been tested previously. + + 2008-08-31 (0.1.1): + + * Added destructor +*/ + + +#ifndef __AC_KHASH_H +#define __AC_KHASH_H + +/*! + @header + + Generic hash table library. + */ + +#define AC_VERSION_KHASH_H "0.2.8" + +#include +#include +#include + +#include "kstring.h" +#include "kroundup.h" + +/* compiler specific configuration */ + +#if UINT_MAX == 0xffffffffu +typedef unsigned int khint32_t; +#elif ULONG_MAX == 0xffffffffu +typedef unsigned long khint32_t; +#endif + +#if ULONG_MAX == ULLONG_MAX +typedef unsigned long khint64_t; +#else +typedef unsigned long long khint64_t; +#endif + +#ifndef kh_inline +#ifdef _MSC_VER +#define kh_inline __inline +#else +#define kh_inline inline +#endif +#endif /* kh_inline */ + +#ifndef klib_unused +#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3) +#define klib_unused __attribute__ ((__unused__)) +#else +#define klib_unused +#endif +#endif /* klib_unused */ + +typedef khint32_t khint_t; +typedef khint_t khiter_t; + +#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2) +#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1) +#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3) +#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1))) +#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1))) +#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1))) +#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1)) + +#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4) + +#ifndef kroundup32 +#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) +#endif + +#ifndef kcalloc +#define kcalloc(N,Z) calloc(N,Z) +#endif +#ifndef kmalloc +#define kmalloc(Z) malloc(Z) +#endif +#ifndef krealloc +#define krealloc(P,Z) realloc(P,Z) +#endif +#ifndef kfree +#define kfree(P) free(P) +#endif + +static const double __ac_HASH_UPPER = 0.77; + +#define __KHASH_TYPE(name, khkey_t, khval_t) \ + typedef struct kh_##name##_s { \ + khint_t n_buckets, size, n_occupied, upper_bound; \ + khint32_t *flags; \ + khkey_t *keys; \ + khval_t *vals; \ + } kh_##name##_t; + +#define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \ + extern kh_##name##_t *kh_init_##name(void); \ + extern void kh_destroy_##name(kh_##name##_t *h); \ + extern void kh_clear_##name(kh_##name##_t *h); \ + extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \ + extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \ + extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \ + extern void kh_del_##name(kh_##name##_t *h, khint_t x); + +#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + SCOPE kh_##name##_t *kh_init_##name(void) { \ + return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t)); \ + } \ + SCOPE void kh_destroy_##name(kh_##name##_t *h) \ + { \ + if (h) { \ + kfree((void *)h->keys); kfree(h->flags); \ + kfree((void *)h->vals); \ + kfree(h); \ + } \ + } \ + SCOPE void kh_clear_##name(kh_##name##_t *h) \ + { \ + if (h && h->flags) { \ + memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \ + h->size = h->n_occupied = 0; \ + } \ + } \ + SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \ + { \ + if (h->n_buckets) { \ + khint_t k, i, last, mask, step = 0; \ + mask = h->n_buckets - 1; \ + k = __hash_func(key); i = k & mask; \ + last = i; \ + while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ + i = (i + (++step)) & mask; \ + if (i == last) return h->n_buckets; \ + } \ + return __ac_iseither(h->flags, i)? h->n_buckets : i; \ + } else return 0; \ + } \ + SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \ + { /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \ + khint32_t *new_flags = 0; \ + khint_t j = 1; \ + { \ + kroundup32(new_n_buckets); \ + if (new_n_buckets < 4) new_n_buckets = 4; \ + if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \ + else { /* hash table size to be changed (shrink or expand); rehash */ \ + new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ + if (!new_flags) return -1; \ + memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ + if (h->n_buckets < new_n_buckets) { /* expand */ \ + khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \ + if (!new_keys) { kfree(new_flags); return -1; } \ + h->keys = new_keys; \ + if (kh_is_map) { \ + khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \ + if (!new_vals) { kfree(new_flags); return -1; } \ + h->vals = new_vals; \ + } \ + } /* otherwise shrink */ \ + } \ + } \ + if (j) { /* rehashing is needed */ \ + for (j = 0; j != h->n_buckets; ++j) { \ + if (__ac_iseither(h->flags, j) == 0) { \ + khkey_t key = h->keys[j]; \ + khval_t val; \ + khint_t new_mask; \ + new_mask = new_n_buckets - 1; \ + if (kh_is_map) val = h->vals[j]; \ + __ac_set_isdel_true(h->flags, j); \ + while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ + khint_t k, i, step = 0; \ + k = __hash_func(key); \ + i = k & new_mask; \ + while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \ + __ac_set_isempty_false(new_flags, i); \ + if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \ + { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ + if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \ + __ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \ + } else { /* write the element and jump out of the loop */ \ + h->keys[i] = key; \ + if (kh_is_map) h->vals[i] = val; \ + break; \ + } \ + } \ + } \ + } \ + if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \ + h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \ + if (kh_is_map) h->vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \ + } \ + kfree(h->flags); /* free the working space */ \ + h->flags = new_flags; \ + h->n_buckets = new_n_buckets; \ + h->n_occupied = h->size; \ + h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \ + } \ + return 0; \ + } \ + SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \ + { \ + khint_t x; \ + if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \ + if (h->n_buckets > (h->size<<1)) { \ + if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \ + *ret = -1; return h->n_buckets; \ + } \ + } else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */ \ + *ret = -1; return h->n_buckets; \ + } \ + } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ + { \ + khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \ + x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \ + if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \ + else { \ + last = i; \ + while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ + if (__ac_isdel(h->flags, i)) site = i; \ + i = (i + (++step)) & mask; \ + if (i == last) { x = site; break; } \ + } \ + if (x == h->n_buckets) { \ + if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \ + else x = i; \ + } \ + } \ + } \ + if (__ac_isempty(h->flags, x)) { /* not present at all */ \ + h->keys[x] = key; \ + __ac_set_isboth_false(h->flags, x); \ + ++h->size; ++h->n_occupied; \ + *ret = 1; \ + } else if (__ac_isdel(h->flags, x)) { /* deleted */ \ + h->keys[x] = key; \ + __ac_set_isboth_false(h->flags, x); \ + ++h->size; \ + *ret = 2; \ + } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \ + return x; \ + } \ + SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \ + { \ + if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \ + __ac_set_isdel_true(h->flags, x); \ + --h->size; \ + } \ + } \ + SCOPE int kh_stats_##name(kh_##name##_t *h, khint_t *empty, \ + khint_t *deleted, khint_t *hist_size, \ + khint_t **hist_out) \ + { \ + khint_t i, *hist = NULL, dist_max = 0, k, dist, step; \ + khint_t mask = h->n_buckets - 1; \ + *empty = *deleted = *hist_size = 0; \ + hist = (khint_t *) calloc(1, sizeof(*hist)); \ + if (!hist) { return -1; } \ + for (i = kh_begin(h); i < kh_end(h); ++i) { \ + if (__ac_isempty(h->flags, i)) { (*empty)++; continue; } \ + if (__ac_isdel(h->flags, i)) { (*deleted)++; continue; } \ + k = __hash_func(h->keys[i]) & (h->n_buckets - 1); \ + dist = 0; \ + step = 0; \ + while (k != i) { \ + dist++; \ + k = (k + (++step)) & mask; \ + } \ + if (dist_max <= dist) { \ + khint_t *new_hist = (khint_t *) realloc(hist, sizeof(*new_hist) * (dist + 1)); \ + if (!new_hist) { free(hist); return -1; } \ + for (k = dist_max + 1; k <= dist; k++) new_hist[k] = 0; \ + hist = new_hist; \ + dist_max = dist; \ + } \ + hist[dist]++; \ + } \ + *hist_out = hist; \ + *hist_size = dist_max + 1; \ + return 0; \ + } + +#define KHASH_DECLARE(name, khkey_t, khval_t) \ + __KHASH_TYPE(name, khkey_t, khval_t) \ + __KHASH_PROTOTYPES(name, khkey_t, khval_t) + +#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + __KHASH_TYPE(name, khkey_t, khval_t) \ + __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) + +#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + KHASH_INIT2(name, static kh_inline klib_unused, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) + +/* --- BEGIN OF HASH FUNCTIONS --- */ + +/*! @function + @abstract Integer hash function + @param key The integer [khint32_t] + @return The hash value [khint_t] + */ +#define kh_int_hash_func(key) (khint32_t)(key) +/*! @function + @abstract Integer comparison function + */ +#define kh_int_hash_equal(a, b) ((a) == (b)) +/*! @function + @abstract 64-bit integer hash function + @param key The integer [khint64_t] + @return The hash value [khint_t] + */ +#define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11) +/*! @function + @abstract 64-bit integer comparison function + */ +#define kh_int64_hash_equal(a, b) ((a) == (b)) + +/*! @function + @abstract const char* hash function + @param s Pointer to a null terminated string + @return The hash value + */ +static kh_inline khint_t __ac_X31_hash_string(const char *s) +{ + khint_t h = (khint_t)*s; + if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s; + return h; +} + +/*! @function + @abstract const char* FNV1a hash function + @param s Pointer to a null terminated string + @return The hash value + */ +static kh_inline khint_t __ac_FNV1a_hash_string(const char *s) +{ + const khint_t offset_basis = 2166136261; + const khint_t FNV_prime = 16777619; + khint_t h = offset_basis; + for (; *s; ++s) h = (h ^ (uint8_t) *s) * FNV_prime; + return h; +} + +/*! @function + @abstract Another interface to const char* hash function + @param key Pointer to a nul terminated string [const char*] + @return The hash value [khint_t] + */ +#define kh_str_hash_func(key) __ac_FNV1a_hash_string(key) + +/*! @function + @abstract Const char* comparison function + */ +#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0) + +/*! @function + @abstract Kstring hash function + @param s Pointer to a kstring + @return The hash value + */ +static kh_inline khint_t __ac_X31_hash_kstring(const kstring_t ks) +{ + khint_t h = 0; + size_t i; + for (i = 0; i < ks.l; i++) + h = (h << 5) - h + (khint_t)ks.s[i]; + return h; +} + +/*! @function + @abstract Kstring hash function + @param s Pointer to a kstring + @return The hash value + */ +static kh_inline khint_t __ac_FNV1a_hash_kstring(const kstring_t ks) +{ + const khint_t offset_basis = 2166136261; + const khint_t FNV_prime = 16777619; + khint_t h = offset_basis; + size_t i; + for (i = 0; i < ks.l; i++) + h = (h ^ (uint8_t) ks.s[i]) * FNV_prime; + return h; +} + +/*! @function + @abstract Interface to kstring hash function. + @param key Pointer to a khash; permits hashing on non-nul terminated strings. + @return The hash value [khint_t] + */ +#define kh_kstr_hash_func(key) __ac_FNV1a_hash_kstring(key) +/*! @function + @abstract kstring comparison function + */ +#define kh_kstr_hash_equal(a, b) ((a).l == (b).l && strncmp((a).s, (b).s, (a).l) == 0) + +static kh_inline khint_t __ac_Wang_hash(khint_t key) +{ + key += ~(key << 15); + key ^= (key >> 10); + key += (key << 3); + key ^= (key >> 6); + key += ~(key << 11); + key ^= (key >> 16); + return key; +} +#define kh_int_hash_func2(key) __ac_Wang_hash((khint_t)(key)) + +/* --- END OF HASH FUNCTIONS --- */ + +/* Other convenient macros... */ + +/*! + @abstract Type of the hash table. + @param name Name of the hash table [symbol] + */ +#define khash_t(name) kh_##name##_t + +/*! @function + @abstract Initiate a hash table. + @param name Name of the hash table [symbol] + @return Pointer to the hash table [khash_t(name)*] + */ +#define kh_init(name) kh_init_##name() + +/*! @function + @abstract Destroy a hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + */ +#define kh_destroy(name, h) kh_destroy_##name(h) + +/*! @function + @abstract Reset a hash table without deallocating memory. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + */ +#define kh_clear(name, h) kh_clear_##name(h) + +/*! @function + @abstract Resize a hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param s New size [khint_t] + */ +#define kh_resize(name, h, s) kh_resize_##name(h, s) + +/*! @function + @abstract Insert a key to the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Key [type of keys] + @param r Extra return code: -1 if the operation failed; + 0 if the key is present in the hash table; + 1 if the bucket is empty (never used); 2 if the element in + the bucket has been deleted [int*] + @return Iterator to the inserted element [khint_t] + */ +#define kh_put(name, h, k, r) kh_put_##name(h, k, r) + +/*! @function + @abstract Retrieve a key from the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Key [type of keys] + @return Iterator to the found element, or kh_end(h) if the element is absent [khint_t] + */ +#define kh_get(name, h, k) kh_get_##name(h, k) + +/*! @function + @abstract Remove a key from the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Iterator to the element to be deleted [khint_t] + */ +#define kh_del(name, h, k) kh_del_##name(h, k) + +/*! @function + @abstract Test whether a bucket contains data. + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khint_t] + @return 1 if containing data; 0 otherwise [int] + */ +#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x))) + +/*! @function + @abstract Get key given an iterator + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khint_t] + @return Key [type of keys] + */ +#define kh_key(h, x) ((h)->keys[x]) + +/*! @function + @abstract Get value given an iterator + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khint_t] + @return Value [type of values] + @discussion For hash sets, calling this results in segfault. + */ +#define kh_val(h, x) ((h)->vals[x]) + +/*! @function + @abstract Alias of kh_val() + */ +#define kh_value(h, x) ((h)->vals[x]) + +/*! @function + @abstract Get the start iterator + @param h Pointer to the hash table [khash_t(name)*] + @return The start iterator [khint_t] + */ +#define kh_begin(h) (khint_t)(0) + +/*! @function + @abstract Get the end iterator + @param h Pointer to the hash table [khash_t(name)*] + @return The end iterator [khint_t] + */ +#define kh_end(h) ((h)->n_buckets) + +/*! @function + @abstract Get the number of elements in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @return Number of elements in the hash table [khint_t] + */ +#define kh_size(h) ((h)->size) + +/*! @function + @abstract Get the number of buckets in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @return Number of buckets in the hash table [khint_t] + */ +#define kh_n_buckets(h) ((h)->n_buckets) + +/*! @function + @abstract Iterate over the entries in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @param kvar Variable to which key will be assigned + @param vvar Variable to which value will be assigned + @param code Block of code to execute + */ +#define kh_foreach(h, kvar, vvar, code) { khint_t __i; \ + for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ + if (!kh_exist(h,__i)) continue; \ + (kvar) = kh_key(h,__i); \ + (vvar) = kh_val(h,__i); \ + code; \ + } } + +/*! @function + @abstract Iterate over the values in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @param vvar Variable to which value will be assigned + @param code Block of code to execute + */ +#define kh_foreach_value(h, vvar, code) { khint_t __i; \ + for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ + if (!kh_exist(h,__i)) continue; \ + (vvar) = kh_val(h,__i); \ + code; \ + } } + +/*! @function + @abstract Gather hash table statistics + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param empty[out] Number of empty hash bins + @param deleted[out] Number of hash bins with the deleted flag + @param hist_size[out] Size of @p hist array + @param hist[out] Probe count histogram + @return 0 on success; -1 on failure + */ +#define kh_stats(name, h, empty, deleted, hist_size, hist) \ + kh_stats_##name(h, empty, deleted, hist_size, hist) + +/* More convenient interfaces */ + +/*! @function + @abstract Instantiate a hash set containing integer keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_INT(name) \ + KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing integer keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_INT(name, khval_t) \ + KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) + +/*! @function + @abstract Instantiate a hash set containing 64-bit integer keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_INT64(name) \ + KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing 64-bit integer keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_INT64(name, khval_t) \ + KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal) + +typedef const char *kh_cstr_t; +/*! @function + @abstract Instantiate a hash set containing const char* keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_STR(name) \ + KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing const char* keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_STR(name, khval_t) \ + KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal) + +/*! @function + @abstract Instantiate a hash set containing kstring_t keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_KSTR(name) \ + KHASH_INIT(name, kstring_t, char, 0, kh_kstr_hash_func, kh_kstr_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing kstring_t keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_KSTR(name, khval_t) \ + KHASH_INIT(name, kstring_t, khval_t, 1, kh_kstr_hash_func, kh_kstr_hash_equal) + +#endif /* __AC_KHASH_H */ diff --git a/src/htslib-1.18/htslib/khash_str2int.h b/src/htslib-1.21/htslib/khash_str2int.h similarity index 100% rename from src/htslib-1.18/htslib/khash_str2int.h rename to src/htslib-1.21/htslib/khash_str2int.h diff --git a/src/htslib-1.19.1/htslib/klist.h b/src/htslib-1.21/htslib/klist.h similarity index 100% rename from src/htslib-1.19.1/htslib/klist.h rename to src/htslib-1.21/htslib/klist.h diff --git a/src/htslib-1.18/htslib/knetfile.h b/src/htslib-1.21/htslib/knetfile.h similarity index 100% rename from src/htslib-1.18/htslib/knetfile.h rename to src/htslib-1.21/htslib/knetfile.h diff --git a/src/htslib-1.18/htslib/kroundup.h b/src/htslib-1.21/htslib/kroundup.h similarity index 100% rename from src/htslib-1.18/htslib/kroundup.h rename to src/htslib-1.21/htslib/kroundup.h diff --git a/src/htslib-1.19.1/htslib/kseq.h b/src/htslib-1.21/htslib/kseq.h similarity index 100% rename from src/htslib-1.19.1/htslib/kseq.h rename to src/htslib-1.21/htslib/kseq.h diff --git a/src/htslib-1.18/htslib/ksort.h b/src/htslib-1.21/htslib/ksort.h similarity index 100% rename from src/htslib-1.18/htslib/ksort.h rename to src/htslib-1.21/htslib/ksort.h diff --git a/src/htslib-1.21/htslib/kstring.h b/src/htslib-1.21/htslib/kstring.h new file mode 100644 index 0000000..ebb2f93 --- /dev/null +++ b/src/htslib-1.21/htslib/kstring.h @@ -0,0 +1,457 @@ +/* The MIT License + + Copyright (C) 2011 by Attractive Chaos + Copyright (C) 2013-2014, 2016, 2018-2020, 2022, 2024 Genome Research Ltd. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#ifndef KSTRING_H +#define KSTRING_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hts_defs.h" +#include "kroundup.h" + +#if defined __GNUC__ && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ > 4)) +#ifdef __MINGW_PRINTF_FORMAT +#define KS_ATTR_PRINTF(fmt, arg) __attribute__((__format__ (__MINGW_PRINTF_FORMAT, fmt, arg))) +#else +#define KS_ATTR_PRINTF(fmt, arg) __attribute__((__format__ (__printf__, fmt, arg))) +#endif // __MINGW_PRINTF_FORMAT +#else +#define KS_ATTR_PRINTF(fmt, arg) +#endif + +#ifndef HAVE___BUILTIN_CLZ +#if defined __GNUC__ && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) +#define HAVE___BUILTIN_CLZ 1 +#endif +#endif + +// Ensure ssize_t exists within this header. All #includes must precede this, +// and ssize_t must be undefined again at the end of this header. +#if defined _MSC_VER && defined _INTPTR_T_DEFINED && !defined _SSIZE_T_DEFINED && !defined ssize_t +#define HTSLIB_SSIZE_T +#define ssize_t intptr_t +#endif + +/* kstring_t is a simple non-opaque type whose fields are likely to be + * used directly by user code (but see also ks_str() and ks_len() below). + * A kstring_t object is initialised by either of + * kstring_t str = KS_INITIALIZE; + * kstring_t str; ...; ks_initialize(&str); + * and either ownership of the underlying buffer should be given away before + * the object disappears (see ks_release() below) or the kstring_t should be + * destroyed with ks_free(&str) or free(str.s) */ +#ifndef KSTRING_T +#define KSTRING_T kstring_t +typedef struct kstring_t { + size_t l, m; + char *s; +} kstring_t; +#endif + +typedef struct ks_tokaux_t { + uint64_t tab[4]; + int sep, finished; + const char *p; // end of the current token +} ks_tokaux_t; + +#ifdef __cplusplus +extern "C" { +#endif + + HTSLIB_EXPORT + int kvsprintf(kstring_t *s, const char *fmt, va_list ap) KS_ATTR_PRINTF(2,0); + + HTSLIB_EXPORT + int ksprintf(kstring_t *s, const char *fmt, ...) KS_ATTR_PRINTF(2,3); + + HTSLIB_EXPORT + int kputd(double d, kstring_t *s); // custom %g only handler + + HTSLIB_EXPORT + int ksplit_core(char *s, int delimiter, int *_max, int **_offsets); + + HTSLIB_EXPORT + char *kstrstr(const char *str, const char *pat, int **_prep); + + HTSLIB_EXPORT + char *kstrnstr(const char *str, const char *pat, int n, int **_prep); + + HTSLIB_EXPORT + void *kmemmem(const void *_str, int n, const void *_pat, int m, int **_prep); + + /* kstrtok() is similar to strtok_r() except that str is not + * modified and both str and sep can be NULL. For efficiency, it is + * actually recommended to set both to NULL in the subsequent calls + * if sep is not changed. */ + HTSLIB_EXPORT + char *kstrtok(const char *str, const char *sep, ks_tokaux_t *aux); + + /* kgetline() uses the supplied fgets()-like function to read a "\n"- + * or "\r\n"-terminated line from fp. The line read is appended to the + * kstring without its terminator and 0 is returned; EOF is returned at + * EOF or on error (determined by querying fp, as per fgets()). */ + typedef char *kgets_func(char *, int, void *); + HTSLIB_EXPORT + int kgetline(kstring_t *s, kgets_func *fgets_fn, void *fp); + + /* kgetline2() uses the supplied hgetln()-like function to read a "\n"- + * or "\r\n"-terminated line from fp. The line read is appended to the + * ksring without its terminator and 0 is returned; EOF is returned at + * EOF or on error (determined by querying fp, as per fgets()). */ + typedef ssize_t kgets_func2(char *, size_t, void *); + HTSLIB_EXPORT + int kgetline2(kstring_t *s, kgets_func2 *fgets_fn, void *fp); + +#ifdef __cplusplus +} +#endif + +/// kstring initializer for structure assignment +#define KS_INITIALIZE { 0, 0, NULL } + +/// kstring initializer for pointers +/** + @note Not to be used if the buffer has been allocated. Use ks_release() + or ks_clear() instead. +*/ + +static inline void ks_initialize(kstring_t *s) +{ + s->l = s->m = 0; + s->s = NULL; +} + +/// Resize a kstring to a given capacity +static inline int ks_resize(kstring_t *s, size_t size) +{ + if (s->m < size) { + char *tmp; + size = (size > (SIZE_MAX>>2)) ? size : size + (size >> 1); + tmp = (char*)realloc(s->s, size); + if (!tmp) + return -1; + s->s = tmp; + s->m = size; + } + return 0; +} + +/// Increase kstring capacity by a given number of bytes +static inline int ks_expand(kstring_t *s, size_t expansion) +{ + size_t new_size = s->l + expansion; + + if (new_size < s->l) // Overflow check + return -1; + return ks_resize(s, new_size); +} + +/// Returns the kstring buffer +static inline char *ks_str(kstring_t *s) +{ + return s->s; +} + +/// Returns the kstring buffer, or an empty string if l == 0 +/** + * Unlike ks_str(), this function will never return NULL. If the kstring is + * empty it will return a read-only empty string. As the returned value + * may be read-only, the caller should not attempt to modify it. + */ +static inline const char *ks_c_str(kstring_t *s) +{ + return s->l && s->s ? s->s : ""; +} + +static inline size_t ks_len(kstring_t *s) +{ + return s->l; +} + +/// Reset kstring length to zero +/** + @return The kstring itself + + Example use: kputsn(string, len, ks_clear(s)) +*/ +static inline kstring_t *ks_clear(kstring_t *s) +{ + s->l = 0; + return s; +} + +// Give ownership of the underlying buffer away to something else (making +// that something else responsible for freeing it), leaving the kstring_t +// empty and ready to be used again, or ready to go out of scope without +// needing free(str.s) to prevent a memory leak. +static inline char *ks_release(kstring_t *s) +{ + char *ss = s->s; + s->l = s->m = 0; + s->s = NULL; + return ss; +} + +/// Safely free the underlying buffer in a kstring. +static inline void ks_free(kstring_t *s) +{ + if (s) { + free(s->s); + ks_initialize(s); + } +} + +static inline int kputsn(const char *p, size_t l, kstring_t *s) +{ + size_t new_sz = s->l + l + 2; + if (new_sz <= s->l || ks_resize(s, new_sz) < 0) + return EOF; + memcpy(s->s + s->l, p, l); + s->l += l; + s->s[s->l] = 0; + return l; +} + +static inline int kputs(const char *p, kstring_t *s) +{ + if (!p) { errno = EFAULT; return -1; } + return kputsn(p, strlen(p), s); +} + +static inline int kputc(int c, kstring_t *s) +{ + if (ks_resize(s, s->l + 2) < 0) + return EOF; + s->s[s->l++] = c; + s->s[s->l] = 0; + return (unsigned char)c; +} + +static inline int kputc_(int c, kstring_t *s) +{ + if (ks_resize(s, s->l + 1) < 0) + return EOF; + s->s[s->l++] = c; + return 1; +} + +static inline int kputsn_(const void *p, size_t l, kstring_t *s) +{ + size_t new_sz = s->l + l; + if (new_sz < s->l || ks_resize(s, new_sz ? new_sz : 1) < 0) + return EOF; + memcpy(s->s + s->l, p, l); + s->l += l; + return l; +} + +static inline int kputuw(unsigned x, kstring_t *s) +{ +#if HAVE___BUILTIN_CLZ && UINT_MAX == 4294967295U + static const unsigned int kputuw_num_digits[32] = { + 10, 10, 10, 9, 9, 9, 8, 8, + 8, 7, 7, 7, 7, 6, 6, 6, + 5, 5, 5, 4, 4, 4, 4, 3, + 3, 3, 2, 2, 2, 1, 1, 1 + }; + static const unsigned int kputuw_thresholds[32] = { + 0, 0, 1000000000U, 0, 0, 100000000U, 0, 0, + 10000000, 0, 0, 0, 1000000, 0, 0, 100000, + 0, 0, 10000, 0, 0, 0, 1000, 0, + 0, 100, 0, 0, 10, 0, 0, 0 + }; +#else + uint64_t m; +#endif + static const char kputuw_dig2r[] = + "00010203040506070809" + "10111213141516171819" + "20212223242526272829" + "30313233343536373839" + "40414243444546474849" + "50515253545556575859" + "60616263646566676869" + "70717273747576777879" + "80818283848586878889" + "90919293949596979899"; + unsigned int l, j; + char *cp; + + // Trivial case - also prevents __builtin_clz(0), which is undefined + if (x < 10) { + if (ks_resize(s, s->l + 2) < 0) + return EOF; + s->s[s->l++] = '0'+x; + s->s[s->l] = 0; + return 0; + } + + // Find out how many digits are to be printed. +#if HAVE___BUILTIN_CLZ && UINT_MAX == 4294967295U + /* + * Table method - should be quick if clz can be done in hardware. + * Find the most significant bit of the value to print and look + * up in a table to find out how many decimal digits are needed. + * This number needs to be adjusted by 1 for cases where the decimal + * length could vary for a given number of bits (for example, + * a four bit number could be between 8 and 15). + */ + + l = __builtin_clz(x); + l = kputuw_num_digits[l] - (x < kputuw_thresholds[l]); +#else + // Fallback for when clz is not available + m = 1; + l = 0; + do { + l++; + m *= 10; + } while (x >= m); +#endif + + if (ks_resize(s, s->l + l + 2) < 0) + return EOF; + + // Add digits two at a time + j = l; + cp = s->s + s->l; + while (x >= 10) { + const char *d = &kputuw_dig2r[2*(x%100)]; + x /= 100; + memcpy(&cp[j-=2], d, 2); + } + + // Last one (if necessary). We know that x < 10 by now. + if (j == 1) + cp[0] = x + '0'; + + s->l += l; + s->s[s->l] = 0; + return 0; +} + +static inline int kputw(int c, kstring_t *s) +{ + unsigned int x = c; + if (c < 0) { + x = -x; + if (ks_resize(s, s->l + 3) < 0) + return EOF; + s->s[s->l++] = '-'; + } + + return kputuw(x, s); +} + +static inline int kputll(long long c, kstring_t *s) +{ + // Worst case expansion. One check reduces function size + // and aids inlining chance. Memory overhead is minimal. + if (ks_resize(s, s->l + 23) < 0) + return EOF; + + unsigned long long x = c; + if (c < 0) { + x = -x; + s->s[s->l++] = '-'; + } + + if (x <= UINT32_MAX) + return kputuw(x, s); + + static const char kputull_dig2r[] = + "00010203040506070809" + "10111213141516171819" + "20212223242526272829" + "30313233343536373839" + "40414243444546474849" + "50515253545556575859" + "60616263646566676869" + "70717273747576777879" + "80818283848586878889" + "90919293949596979899"; + unsigned int l, j; + char *cp; + + // Find out how long the number is (could consider clzll) + uint64_t m = 1; + l = 0; + if (sizeof(long long)==sizeof(uint64_t) && x >= 10000000000000000000ULL) { + // avoids overflow below + l = 20; + } else { + do { + l++; + m *= 10; + } while (x >= m); + } + + // Add digits two at a time + j = l; + cp = s->s + s->l; + while (x >= 10) { + const char *d = &kputull_dig2r[2*(x%100)]; + x /= 100; + memcpy(&cp[j-=2], d, 2); + } + + // Last one (if necessary). We know that x < 10 by now. + if (j == 1) + cp[0] = x + '0'; + + s->l += l; + s->s[s->l] = 0; + return 0; +} + +static inline int kputl(long c, kstring_t *s) { + return kputll(c, s); +} + +/* + * Returns 's' split by delimiter, with *n being the number of components; + * NULL on failure. + */ +static inline int *ksplit(kstring_t *s, int delimiter, int *n) +{ + int max = 0, *offsets = 0; + *n = ksplit_core(s->s, delimiter, &max, &offsets); + return offsets; +} + +#ifdef HTSLIB_SSIZE_T +#undef HTSLIB_SSIZE_T +#undef ssize_t +#endif + +#endif diff --git a/src/htslib-1.18/htslib/regidx.h b/src/htslib-1.21/htslib/regidx.h similarity index 100% rename from src/htslib-1.18/htslib/regidx.h rename to src/htslib-1.21/htslib/regidx.h diff --git a/src/htslib-1.19.1/htslib/sam.h b/src/htslib-1.21/htslib/sam.h similarity index 100% rename from src/htslib-1.19.1/htslib/sam.h rename to src/htslib-1.21/htslib/sam.h diff --git a/src/htslib-1.18/htslib/synced_bcf_reader.h b/src/htslib-1.21/htslib/synced_bcf_reader.h similarity index 100% rename from src/htslib-1.18/htslib/synced_bcf_reader.h rename to src/htslib-1.21/htslib/synced_bcf_reader.h diff --git a/src/htslib-1.21/htslib/tbx.h b/src/htslib-1.21/htslib/tbx.h new file mode 100644 index 0000000..f4b5bd8 --- /dev/null +++ b/src/htslib-1.21/htslib/tbx.h @@ -0,0 +1,144 @@ +/// @file htslib/tbx.h +/// Tabix API functions. +/* + Copyright (C) 2009, 2012-2015, 2019 Genome Research Ltd. + Copyright (C) 2010, 2012 Broad Institute. + + Author: Heng Li + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#ifndef HTSLIB_TBX_H +#define HTSLIB_TBX_H + +#include "hts.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define TBX_MAX_SHIFT 31 + +#define TBX_GENERIC 0 +#define TBX_SAM 1 +#define TBX_VCF 2 +#define TBX_GAF 3 +#define TBX_UCSC 0x10000 + +typedef struct tbx_conf_t { + int32_t preset; + int32_t sc, bc, ec; // seq col., beg col. and end col. + int32_t meta_char, line_skip; +} tbx_conf_t; + +typedef struct tbx_t { + tbx_conf_t conf; + hts_idx_t *idx; + void *dict; +} tbx_t; + +HTSLIB_EXPORT +extern const tbx_conf_t tbx_conf_gff, tbx_conf_bed, tbx_conf_psltbl, tbx_conf_sam, tbx_conf_vcf, tbx_conf_gaf; + + #define tbx_itr_destroy(iter) hts_itr_destroy(iter) + #define tbx_itr_queryi(tbx, tid, beg, end) hts_itr_query((tbx)->idx, (tid), (beg), (end), tbx_readrec) + #define tbx_itr_querys(tbx, s) hts_itr_querys((tbx)->idx, (s), (hts_name2id_f)(tbx_name2id), (tbx), hts_itr_query, tbx_readrec) + #define tbx_itr_next(htsfp, tbx, itr, r) hts_itr_next(hts_get_bgzfp(htsfp), (itr), (r), (tbx)) + #define tbx_bgzf_itr_next(bgzfp, tbx, itr, r) hts_itr_next((bgzfp), (itr), (r), (tbx)) + + HTSLIB_EXPORT + int tbx_name2id(tbx_t *tbx, const char *ss); + + /* Internal helper function used by tbx_itr_next() */ + HTSLIB_EXPORT + BGZF *hts_get_bgzfp(htsFile *fp); + + HTSLIB_EXPORT + int tbx_readrec(BGZF *fp, void *tbxv, void *sv, int *tid, hts_pos_t *beg, hts_pos_t *end); + +/// Build an index of the lines in a BGZF-compressed file +/** The index struct returned by a successful call should be freed + via tbx_destroy() when it is no longer needed. +*/ + HTSLIB_EXPORT + tbx_t *tbx_index(BGZF *fp, int min_shift, const tbx_conf_t *conf); +/* + * All tbx_index_build* methods return: 0 (success), -1 (general failure) or -2 (compression not BGZF) + */ + HTSLIB_EXPORT + int tbx_index_build(const char *fn, int min_shift, const tbx_conf_t *conf); + + HTSLIB_EXPORT + int tbx_index_build2(const char *fn, const char *fnidx, int min_shift, const tbx_conf_t *conf); + + HTSLIB_EXPORT + int tbx_index_build3(const char *fn, const char *fnidx, int min_shift, int n_threads, const tbx_conf_t *conf); + + +/// Load or stream a .tbi or .csi index +/** @param fn Name of the data file corresponding to the index + + Equivalent to tbx_index_load3(fn, NULL, HTS_IDX_SAVE_REMOTE); +*/ + HTSLIB_EXPORT + tbx_t *tbx_index_load(const char *fn); + +/// Load or stream a .tbi or .csi index +/** @param fn Name of the data file corresponding to the index + @param fnidx Name of the indexed file + @return The index, or NULL if an error occurred + + If @p fnidx is NULL, the index name will be derived from @p fn. + + Equivalent to tbx_index_load3(fn, fnidx, HTS_IDX_SAVE_REMOTE); +*/ + HTSLIB_EXPORT + tbx_t *tbx_index_load2(const char *fn, const char *fnidx); + +/// Load or stream a .tbi or .csi index +/** @param fn Name of the data file corresponding to the index + @param fnidx Name of the indexed file + @param flags Flags to alter behaviour (see description) + @return The index, or NULL if an error occurred + + If @p fnidx is NULL, the index name will be derived from @p fn. + + The @p flags parameter can be set to a combination of the following + values: + + HTS_IDX_SAVE_REMOTE Save a local copy of any remote indexes + HTS_IDX_SILENT_FAIL Fail silently if the index is not present + + The index struct returned by a successful call should be freed + via tbx_destroy() when it is no longer needed. +*/ + HTSLIB_EXPORT + tbx_t *tbx_index_load3(const char *fn, const char *fnidx, int flags); + + HTSLIB_EXPORT + const char **tbx_seqnames(tbx_t *tbx, int *n); // free the array but not the values + + HTSLIB_EXPORT + void tbx_destroy(tbx_t *tbx); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/htslib-1.18/htslib/thread_pool.h b/src/htslib-1.21/htslib/thread_pool.h similarity index 100% rename from src/htslib-1.18/htslib/thread_pool.h rename to src/htslib-1.21/htslib/thread_pool.h diff --git a/src/htslib-1.21/htslib/vcf.h b/src/htslib-1.21/htslib/vcf.h new file mode 100644 index 0000000..9a36cab --- /dev/null +++ b/src/htslib-1.21/htslib/vcf.h @@ -0,0 +1,1674 @@ +/// @file htslib/vcf.h +/// High-level VCF/BCF variant calling file operations. +/* + Copyright (C) 2012, 2013 Broad Institute. + Copyright (C) 2012-2020, 2022-2023 Genome Research Ltd. + + Author: Heng Li + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +/* + todo: + - make the function names consistent + - provide calls to abstract away structs as much as possible + */ + +#ifndef HTSLIB_VCF_H +#define HTSLIB_VCF_H + +#include +#include +#include +#include "hts.h" +#include "kstring.h" +#include "hts_defs.h" +#include "hts_endian.h" + +/* Included only for backwards compatibility with e.g. bcftools 1.10 */ +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/***************** + * Header struct * + *****************/ + +#define BCF_HL_FLT 0 // header line +#define BCF_HL_INFO 1 +#define BCF_HL_FMT 2 +#define BCF_HL_CTG 3 +#define BCF_HL_STR 4 // structured header line TAG= +#define BCF_HL_GEN 5 // generic header line + +#define BCF_HT_FLAG 0 // header type +#define BCF_HT_INT 1 +#define BCF_HT_REAL 2 +#define BCF_HT_STR 3 +#define BCF_HT_LONG (BCF_HT_INT | 0x100) // BCF_HT_INT, but for int64_t values; VCF only! + +#define BCF_VL_FIXED 0 // variable length +#define BCF_VL_VAR 1 +#define BCF_VL_A 2 +#define BCF_VL_G 3 +#define BCF_VL_R 4 + +/* === Dictionary === + + The header keeps three dictionaries. The first keeps IDs in the + "FILTER/INFO/FORMAT" lines, the second keeps the sequence names and lengths + in the "contig" lines and the last keeps the sample names. bcf_hdr_t::dict[] + is the actual hash table, which is opaque to the end users. In the hash + table, the key is the ID or sample name as a C string and the value is a + bcf_idinfo_t struct. bcf_hdr_t::id[] points to key-value pairs in the hash + table in the order that they appear in the VCF header. bcf_hdr_t::n[] is the + size of the hash table or, equivalently, the length of the id[] arrays. +*/ + +#define BCF_DT_ID 0 // dictionary type +#define BCF_DT_CTG 1 +#define BCF_DT_SAMPLE 2 + +// Complete textual representation of a header line +typedef struct bcf_hrec_t { + int type; // One of the BCF_HL_* type + char *key; // The part before '=', i.e. FILTER/INFO/FORMAT/contig/fileformat etc. + char *value; // Set only for generic lines, NULL for FILTER/INFO, etc. + int nkeys; // Number of structured fields + char **keys, **vals; // The key=value pairs +} bcf_hrec_t; + +typedef struct bcf_idinfo_t { + uint64_t info[3]; // stores Number:20, var:4, Type:4, ColType:4 in info[0..2] + // for BCF_HL_FLT,INFO,FMT and contig length in info[0] for BCF_HL_CTG + bcf_hrec_t *hrec[3]; + int id; +} bcf_idinfo_t; + +typedef struct bcf_idpair_t { + const char *key; + const bcf_idinfo_t *val; +} bcf_idpair_t; + +// Note that bcf_hdr_t structs must always be created via bcf_hdr_init() +typedef struct bcf_hdr_t { + int32_t n[3]; // n:the size of the dictionary block in use, (allocated size, m, is below to preserve ABI) + bcf_idpair_t *id[3]; + void *dict[3]; // ID dictionary, contig dict and sample dict + char **samples; + bcf_hrec_t **hrec; + int nhrec, dirty; + int ntransl, *transl[2]; // for bcf_translate() + int nsamples_ori; // for bcf_hdr_set_samples() + uint8_t *keep_samples; + kstring_t mem; + int32_t m[3]; // m: allocated size of the dictionary block in use (see n above) +} bcf_hdr_t; + +HTSLIB_EXPORT +extern uint8_t bcf_type_shift[]; + +/************** + * VCF record * + **************/ + +#define BCF_BT_NULL 0 +#define BCF_BT_INT8 1 +#define BCF_BT_INT16 2 +#define BCF_BT_INT32 3 +#define BCF_BT_INT64 4 // Unofficial, for internal use only. +#define BCF_BT_FLOAT 5 +#define BCF_BT_CHAR 7 + +#define VCF_REF 0 +#define VCF_SNP (1<<0) +#define VCF_MNP (1<<1) +#define VCF_INDEL (1<<2) +#define VCF_OTHER (1<<3) +#define VCF_BND (1<<4) // breakend +#define VCF_OVERLAP (1<<5) // overlapping deletion, ALT=* +#define VCF_INS (1<<6) // implies VCF_INDEL +#define VCF_DEL (1<<7) // implies VCF_INDEL +#define VCF_ANY (VCF_SNP|VCF_MNP|VCF_INDEL|VCF_OTHER|VCF_BND|VCF_OVERLAP|VCF_INS|VCF_DEL) // any variant type (but not VCF_REF) + +typedef struct bcf_variant_t { + int type, n; // variant type and the number of bases affected, negative for deletions +} bcf_variant_t; + +typedef struct bcf_fmt_t { + int id; // id: numeric tag id, the corresponding string is bcf_hdr_t::id[BCF_DT_ID][$id].key + int n, size, type; // n: number of values per-sample; size: number of bytes per-sample; type: one of BCF_BT_* types + uint8_t *p; // same as vptr and vptr_* in bcf_info_t below + uint32_t p_len; + uint32_t p_off:31, p_free:1; +} bcf_fmt_t; + +typedef struct bcf_info_t { + int key; // key: numeric tag id, the corresponding string is bcf_hdr_t::id[BCF_DT_ID][$key].key + int type; // type: one of BCF_BT_* types + union { + int64_t i; // integer value + float f; // float value + } v1; // only set if $len==1; for easier access + uint8_t *vptr; // pointer to data array in bcf1_t->shared.s, excluding the size+type and tag id bytes + uint32_t vptr_len; // length of the vptr block or, when set, of the vptr_mod block, excluding offset + uint32_t vptr_off:31, // vptr offset, i.e., the size of the INFO key plus size+type bytes + vptr_free:1; // indicates that vptr-vptr_off must be freed; set only when modified and the new + // data block is bigger than the original + int len; // vector length, 1 for scalars +} bcf_info_t; + + +#define BCF1_DIRTY_ID 1 +#define BCF1_DIRTY_ALS 2 +#define BCF1_DIRTY_FLT 4 +#define BCF1_DIRTY_INF 8 + +typedef struct bcf_dec_t { + int m_fmt, m_info, m_id, m_als, m_allele, m_flt; // allocated size (high-water mark); do not change + int n_flt; // Number of FILTER fields + int *flt; // FILTER keys in the dictionary + char *id, *als; // ID and REF+ALT block (\0-separated) + char **allele; // allele[0] is the REF (allele[] pointers to the als block); all null terminated + bcf_info_t *info; // INFO + bcf_fmt_t *fmt; // FORMAT and individual sample + bcf_variant_t *var; // $var and $var_type set only when set_variant_types called + int n_var, var_type; + int shared_dirty; // if set, shared.s must be recreated on BCF output + int indiv_dirty; // if set, indiv.s must be recreated on BCF output +} bcf_dec_t; + + +#define BCF_ERR_CTG_UNDEF 1 +#define BCF_ERR_TAG_UNDEF 2 +#define BCF_ERR_NCOLS 4 +#define BCF_ERR_LIMITS 8 +#define BCF_ERR_CHAR 16 +#define BCF_ERR_CTG_INVALID 32 +#define BCF_ERR_TAG_INVALID 64 + +/// Get error description for bcf error code +/** @param errorcode The error code which is to be described + @param buffer The buffer in which description to be added + @param maxbuffer The size of buffer passed + @return NULL on invalid buffer; buffer on other cases + +The buffer will be an empty string when @p errorcode is 0. +Description of errors present in code will be appended to @p buffer with ',' separation. +The buffer has to be at least 4 characters long. NULL will be returned if it is smaller or when buffer is NULL. + +'...' will be appended if the description doesn't fit in the given buffer. + */ + +HTSLIB_EXPORT +const char *bcf_strerror(int errorcode, char *buffer, size_t maxbuffer); + +/* + The bcf1_t structure corresponds to one VCF/BCF line. Reading from VCF file + is slower because the string is first to be parsed, packed into BCF line + (done in vcf_parse), then unpacked into internal bcf1_t structure. If it + is known in advance that some of the fields will not be required (notably + the sample columns), parsing of these can be skipped by setting max_unpack + appropriately. + Similarly, it is fast to output a BCF line because the columns (kept in + shared.s, indiv.s, etc.) are written directly by bcf_write, whereas a VCF + line must be formatted in vcf_format. + */ +typedef struct bcf1_t { + hts_pos_t pos; // POS + hts_pos_t rlen; // length of REF + int32_t rid; // CHROM + float qual; // QUAL + uint32_t n_info:16, n_allele:16; + uint32_t n_fmt:8, n_sample:24; + kstring_t shared, indiv; + bcf_dec_t d; // lazy evaluation: $d is not generated by bcf_read(), but by explicitly calling bcf_unpack() + int max_unpack; // Set to BCF_UN_STR, BCF_UN_FLT, or BCF_UN_INFO to boost performance of vcf_parse when some of the fields won't be needed + int unpacked; // remember what has been unpacked to allow calling bcf_unpack() repeatedly without redoing the work + int unpack_size[3]; // the original block size of ID, REF+ALT and FILTER + int errcode; // one of BCF_ERR_* codes +} bcf1_t; + +/******* + * API * + *******/ + + /*********************************************************************** + * BCF and VCF I/O + * + * A note about naming conventions: htslib internally represents VCF + * records as bcf1_t data structures, therefore most functions are + * prefixed with bcf_. There are a few exceptions where the functions must + * be aware of both BCF and VCF worlds, such as bcf_parse vs vcf_parse. In + * these cases, functions prefixed with bcf_ are more general and work + * with both BCF and VCF. + * + ***********************************************************************/ + + /** These macros are defined only for consistency with other parts of htslib */ + #define bcf_init1() bcf_init() + #define bcf_read1(fp,h,v) bcf_read((fp),(h),(v)) + #define vcf_read1(fp,h,v) vcf_read((fp),(h),(v)) + #define bcf_write1(fp,h,v) bcf_write((fp),(h),(v)) + #define vcf_write1(fp,h,v) vcf_write((fp),(h),(v)) + #define bcf_destroy1(v) bcf_destroy(v) + #define bcf_empty1(v) bcf_empty(v) + #define vcf_parse1(s,h,v) vcf_parse((s),(h),(v)) + #define bcf_clear1(v) bcf_clear(v) + #define vcf_format1(h,v,s) vcf_format((h),(v),(s)) + + /** + * bcf_hdr_init() - create an empty BCF header. + * @param mode "r" or "w" + * + * When opened for writing, the mandatory fileFormat and + * FILTER=PASS lines are added automatically. + * + * The bcf_hdr_t struct returned by a successful call should be freed + * via bcf_hdr_destroy() when it is no longer needed. + */ + HTSLIB_EXPORT + bcf_hdr_t *bcf_hdr_init(const char *mode); + + /** Destroy a BCF header struct */ + HTSLIB_EXPORT + void bcf_hdr_destroy(bcf_hdr_t *h); + + /** Allocate and initialize a bcf1_t object. + * + * The bcf1_t struct returned by a successful call should be freed + * via bcf_destroy() when it is no longer needed. + */ + HTSLIB_EXPORT + bcf1_t *bcf_init(void); + + /** Deallocate a bcf1_t object */ + HTSLIB_EXPORT + void bcf_destroy(bcf1_t *v); + + /** + * Same as bcf_destroy() but frees only the memory allocated by bcf1_t, + * not the bcf1_t object itself. + */ + HTSLIB_EXPORT + void bcf_empty(bcf1_t *v); + + /** + * Make the bcf1_t object ready for next read. Intended mostly for + * internal use, the user should rarely need to call this function + * directly. + */ + HTSLIB_EXPORT + void bcf_clear(bcf1_t *v); + + + /** bcf_open and vcf_open mode: please see hts_open() in hts.h */ + typedef htsFile vcfFile; + #define bcf_open(fn, mode) hts_open((fn), (mode)) + #define vcf_open(fn, mode) hts_open((fn), (mode)) + #define bcf_flush(fp) hts_flush((fp)) + #define bcf_close(fp) hts_close(fp) + #define vcf_close(fp) hts_close(fp) + + /// Read a VCF or BCF header + /** @param fp The file to read the header from + @return Pointer to a populated header structure on success; + NULL on failure + + The bcf_hdr_t struct returned by a successful call should be freed + via bcf_hdr_destroy() when it is no longer needed. + */ + HTSLIB_EXPORT + bcf_hdr_t *bcf_hdr_read(htsFile *fp) HTS_RESULT_USED; + + /** + * bcf_hdr_set_samples() - for more efficient VCF parsing when only one/few samples are needed + * @param samples samples to include or exclude from file or as a comma-separated string. + * LIST|FILE .. select samples in list/file + * ^LIST|FILE .. exclude samples from list/file + * - .. include all samples + * NULL .. exclude all samples + * @param is_file @p samples is a file (1) or a comma-separated list (0) + * + * The bottleneck of VCF reading is parsing of genotype fields. If the + * reader knows in advance that only subset of samples is needed (possibly + * no samples at all), the performance of bcf_read() can be significantly + * improved by calling bcf_hdr_set_samples after bcf_hdr_read(). + * The function bcf_read() will subset the VCF/BCF records automatically + * with the notable exception when reading records via bcf_itr_next(). + * In this case, bcf_subset_format() must be called explicitly, because + * bcf_readrec() does not see the header. + * + * Returns 0 on success, -1 on error or a positive integer if the list + * contains samples not present in the VCF header. In such a case, the + * return value is the index of the offending sample. + */ + HTSLIB_EXPORT + int bcf_hdr_set_samples(bcf_hdr_t *hdr, const char *samples, int is_file) HTS_RESULT_USED; + + HTSLIB_EXPORT + int bcf_subset_format(const bcf_hdr_t *hdr, bcf1_t *rec); + + /// Write a VCF or BCF header + /** @param fp Output file + @param h The header to write + @return 0 on success; -1 on failure + */ + HTSLIB_EXPORT + int bcf_hdr_write(htsFile *fp, bcf_hdr_t *h) HTS_RESULT_USED; + + /** + * Parse VCF line contained in kstring and populate the bcf1_t struct + * The line must not end with \n or \r characters. + */ + HTSLIB_EXPORT + int vcf_parse(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v); + + /** + * Complete the file opening mode, according to its extension. + * @param mode Preallocated mode string to be completed. + * @param fn File name to be opened. + * @param format Format string (vcf|bcf|vcf.gz) + * @return 0 on success; -1 on failure + */ + HTSLIB_EXPORT + int vcf_open_mode(char *mode, const char *fn, const char *format); + + /** The opposite of vcf_parse. It should rarely be called directly, see vcf_write */ + HTSLIB_EXPORT + int vcf_format(const bcf_hdr_t *h, const bcf1_t *v, kstring_t *s); + + /// Read next VCF or BCF record + /** @param fp The file to read the record from + @param h The header for the vcf/bcf file + @param v The bcf1_t structure to populate + @return 0 on success; -1 on end of file; < -1 on critical error + +On errors which are not critical for reading, such as missing header +definitions in vcf files, zero will be returned but v->errcode will have been +set to one of BCF_ERR* codes and must be checked before calling bcf_write(). + */ + HTSLIB_EXPORT + int bcf_read(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v) HTS_RESULT_USED; + + /** + * bcf_unpack() - unpack/decode a BCF record (fills the bcf1_t::d field) + * + * Note that bcf_unpack() must be called even when reading VCF. It is safe + * to call the function repeatedly, it will not unpack the same field + * twice. + */ + #define BCF_UN_STR 1 // up to ALT inclusive + #define BCF_UN_FLT 2 // up to FILTER + #define BCF_UN_INFO 4 // up to INFO + #define BCF_UN_SHR (BCF_UN_STR|BCF_UN_FLT|BCF_UN_INFO) // all shared information + #define BCF_UN_FMT 8 // unpack format and each sample + #define BCF_UN_IND BCF_UN_FMT // a synonym of BCF_UN_FMT + #define BCF_UN_ALL (BCF_UN_SHR|BCF_UN_FMT) // everything + HTSLIB_EXPORT + int bcf_unpack(bcf1_t *b, int which); + + /* + * bcf_dup() - create a copy of BCF record. + * + * Note that bcf_unpack() must be called on the returned copy as if it was + * obtained from bcf_read(). Also note that bcf_dup() calls bcf_sync1(src) + * internally to reflect any changes made by bcf_update_* functions. + * + * The bcf1_t struct returned by a successful call should be freed + * via bcf_destroy() when it is no longer needed. + */ + HTSLIB_EXPORT + bcf1_t *bcf_dup(bcf1_t *src); + + HTSLIB_EXPORT + bcf1_t *bcf_copy(bcf1_t *dst, bcf1_t *src); + + /// Write one VCF or BCF record. The type is determined at the open() call. + /** @param fp The file to write to + @param h The header for the vcf/bcf file + @param v The bcf1_t structure to write + @return 0 on success; -1 on error + */ + HTSLIB_EXPORT + int bcf_write(htsFile *fp, bcf_hdr_t *h, bcf1_t *v) HTS_RESULT_USED; + + /** + * The following functions work only with VCFs and should rarely be called + * directly. Usually one wants to use their bcf_* alternatives, which work + * transparently with both VCFs and BCFs. + */ + /// Read a VCF format header + /** @param fp The file to read the header from + @return Pointer to a populated header structure on success; + NULL on failure + + Use bcf_hdr_read() instead. + + The bcf_hdr_t struct returned by a successful call should be freed + via bcf_hdr_destroy() when it is no longer needed. + */ + HTSLIB_EXPORT + bcf_hdr_t *vcf_hdr_read(htsFile *fp) HTS_RESULT_USED; + + /// Write a VCF format header + /** @param fp Output file + @param h The header to write + @return 0 on success; -1 on failure + + Use bcf_hdr_write() instead + */ + HTSLIB_EXPORT + int vcf_hdr_write(htsFile *fp, const bcf_hdr_t *h) HTS_RESULT_USED; + + /// Read a record from a VCF file + /** @param fp The file to read the record from + @param h The header for the vcf file + @param v The bcf1_t structure to populate + @return 0 on success; -1 on end of file; < -1 on error + + Use bcf_read() instead + */ + HTSLIB_EXPORT + int vcf_read(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v) HTS_RESULT_USED; + + /// Write a record to a VCF file + /** @param fp The file to write to + @param h The header for the vcf file + @param v The bcf1_t structure to write + @return 0 on success; -1 on error + + Use bcf_write() instead + */ + HTSLIB_EXPORT + int vcf_write(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v) HTS_RESULT_USED; + + /** Helper function for the bcf_itr_next() macro; internal use, ignore it */ + HTSLIB_EXPORT + int bcf_readrec(BGZF *fp, void *null, void *v, int *tid, hts_pos_t *beg, hts_pos_t *end); + + /// Write a line to a VCF file + /** @param line Line to write + @param fp File to write it to + @return 0 on success; -1 on failure + + @note No checks are done on the line being added, apart from + ensuring that it ends with a newline. This function + should therefore be used with care. + */ + HTSLIB_EXPORT + int vcf_write_line(htsFile *fp, kstring_t *line); + + /************************************************************************** + * Header querying and manipulation routines + **************************************************************************/ + + /** Create a new header using the supplied template + * + * The bcf_hdr_t struct returned by a successful call should be freed + * via bcf_hdr_destroy() when it is no longer needed. + * @return NULL on failure, header otherwise + */ + HTSLIB_EXPORT + bcf_hdr_t *bcf_hdr_dup(const bcf_hdr_t *hdr); + + /** + * Copy header lines from src to dst if not already present in dst. See also bcf_translate(). + * Returns 0 on success or sets a bit on error: + * 1 .. conflicting definitions of tag length + * // todo + */ + HTSLIB_EXPORT + int bcf_hdr_combine(bcf_hdr_t *dst, const bcf_hdr_t *src) HTS_DEPRECATED("Please use bcf_hdr_merge instead"); + + /** + * bcf_hdr_merge() - copy header lines from src to dst, see also bcf_translate() + * @param dst: the destination header to be merged into, NULL on the first pass + * @param src: the source header + * @return NULL on failure, header otherwise + * + * Notes: + * - use as: + * bcf_hdr_t *dst = NULL; + * for (i=0; in[BCF_DT_SAMPLE] + + + /** The following functions are for internal use and should rarely be called directly */ + HTSLIB_EXPORT + int bcf_hdr_parse(bcf_hdr_t *hdr, char *htxt); + + /// Synchronize internal header structures + /** @param h Header + @return 0 on success, -1 on failure + + This function updates the id, sample and contig arrays in the + bcf_hdr_t structure so that they point to the same locations as + the id, sample and contig dictionaries. + */ + HTSLIB_EXPORT + int bcf_hdr_sync(bcf_hdr_t *h) HTS_RESULT_USED; + + /** + * bcf_hdr_parse_line() - parse a single line of VCF textual header + * @param h BCF header struct + * @param line One or more lines of header text + * @param len Filled out with length data parsed from 'line'. + * @return bcf_hrec_t* on success; + * NULL on error or on end of header text. + * NB: to distinguish error from end-of-header, check *len: + * *len == 0 indicates @p line did not start with "##" + * *len == -1 indicates failure, likely due to out of memory + * *len > 0 indicates a malformed header line + * + * If *len > 0 on exit, it will contain the full length of the line + * including any trailing newline (this includes cases where NULL was + * returned due to a malformed line). Callers can use this to skip to + * the next header line. + */ + HTSLIB_EXPORT + bcf_hrec_t *bcf_hdr_parse_line(const bcf_hdr_t *h, const char *line, int *len); + /// Convert a bcf header record to string form + /** + * @param hrec Header record + * @param str Destination kstring + * @return 0 on success; < 0 on error + */ + HTSLIB_EXPORT + int bcf_hrec_format(const bcf_hrec_t *hrec, kstring_t *str); + + /// Add a header record into a header + /** + * @param hdr Destination header + * @param hrec Header record + * @return 0 on success, -1 on failure + * + * If this function returns success, ownership of @p hrec will have + * been transferred to the header structure. It may also have been + * freed if it was a duplicate of a record already in the header. + * Therefore the @p hrec pointer should not be used after a successful + * return from this function. + * + * If this function returns failure, ownership will not have been taken + * and the caller is responsible for cleaning up @p hrec. + */ + + HTSLIB_EXPORT + int bcf_hdr_add_hrec(bcf_hdr_t *hdr, bcf_hrec_t *hrec); + + /** + * bcf_hdr_get_hrec() - get header line info + * @param type: one of the BCF_HL_* types: FLT,INFO,FMT,CTG,STR,GEN + * @param key: the header key for generic lines (e.g. "fileformat"), any field + * for structured lines, typically "ID". + * @param value: the value which pairs with key. Can be be NULL for BCF_HL_GEN + * @param str_class: the class of BCF_HL_STR line (e.g. "ALT" or "SAMPLE"), otherwise NULL + */ + HTSLIB_EXPORT + bcf_hrec_t *bcf_hdr_get_hrec(const bcf_hdr_t *hdr, int type, const char *key, const char *value, const char *str_class); + + /// Duplicate a header record + /** @param hrec Header record to copy + @return A new header record on success; NULL on failure + + The bcf_hrec_t struct returned by a successful call should be freed + via bcf_hrec_destroy() when it is no longer needed. + */ + HTSLIB_EXPORT + bcf_hrec_t *bcf_hrec_dup(bcf_hrec_t *hrec); + + /// Add a new header record key + /** @param hrec Header record + @param str Key name + @param len Length of @p str + @return 0 on success; -1 on failure + */ + HTSLIB_EXPORT + int bcf_hrec_add_key(bcf_hrec_t *hrec, const char *str, size_t len) HTS_RESULT_USED; + + /// Set a header record value + /** @param hrec Header record + @param i Index of value + @param str Value to set + @param len Length of @p str + @param is_quoted Value should be quoted + @return 0 on success; -1 on failure + */ + HTSLIB_EXPORT + int bcf_hrec_set_val(bcf_hrec_t *hrec, int i, const char *str, size_t len, int is_quoted) HTS_RESULT_USED; + + HTSLIB_EXPORT + int bcf_hrec_find_key(bcf_hrec_t *hrec, const char *key); + + + /// Add an IDX header record + /** @param hrec Header record + @param idx IDX value to add + @return 0 on success; -1 on failure + */ + HTSLIB_EXPORT + int hrec_add_idx(bcf_hrec_t *hrec, int idx) HTS_RESULT_USED; + + /// Free up a header record and associated structures + /** @param hrec Header record + */ + HTSLIB_EXPORT + void bcf_hrec_destroy(bcf_hrec_t *hrec); + + + + /************************************************************************** + * Individual record querying and manipulation routines + **************************************************************************/ + + /** See the description of bcf_hdr_subset() */ + HTSLIB_EXPORT + int bcf_subset(const bcf_hdr_t *h, bcf1_t *v, int n, int *imap); + + /** + * bcf_translate() - translate tags ids to be consistent with different header. This function + * is useful when lines from multiple VCF need to be combined. + * @dst_hdr: the destination header, to be used in bcf_write(), see also bcf_hdr_combine() + * @src_hdr: the source header, used in bcf_read() + * @src_line: line obtained by bcf_read() + */ + HTSLIB_EXPORT + int bcf_translate(const bcf_hdr_t *dst_hdr, bcf_hdr_t *src_hdr, bcf1_t *src_line); + + /// Get variant types in a BCF record + /** + * @param rec BCF/VCF record + * @return Types of variant present + * + * The return value will be a bitwise-or of VCF_SNP, VCF_MNP, + * VCF_INDEL, VCF_OTHER, VCF_BND or VCF_OVERLAP. If will return + * VCF_REF (i.e. 0) if none of the other types is present. + * @deprecated Please use bcf_has_variant_types() instead + */ + HTSLIB_EXPORT + int bcf_get_variant_types(bcf1_t *rec); + + /// Get variant type in a BCF record, for a given allele + /** + * @param rec BCF/VCF record + * @param ith_allele Allele to check + * @return Type of variant present + * + * The return value will be one of VCF_REF, VCF_SNP, VCF_MNP, + * VCF_INDEL, VCF_OTHER, VCF_BND or VCF_OVERLAP. + * @deprecated Please use bcf_has_variant_type() instead + */ + HTSLIB_EXPORT + int bcf_get_variant_type(bcf1_t *rec, int ith_allele); + + /// Match mode for bcf_has_variant_types() + enum bcf_variant_match { + bcf_match_exact, ///< Types present exactly match tested for + bcf_match_overlap, ///< At least one variant type in common + bcf_match_subset, ///< Test set is a subset of types present + }; + + /// Check for presence of variant types in a BCF record + /** + * @param rec BCF/VCF record + * @param bitmask Set of variant types to test for + * @param mode Match mode + * @return >0 if the variant types are present, + * 0 if not present, + * -1 on error + * + * @p bitmask should be the bitwise-or of the variant types (VCF_SNP, + * VCF_MNP, etc.) to test for. + * + * The return value is the bitwise-and of the set of types present + * and @p bitmask. Callers that want to check for the presence of more + * than one type can avoid function call overhead by passing all the + * types to be checked for in a single call to this function, in + * bcf_match_overlap mode, and then check for them individually in the + * returned value. + * + * As VCF_REF is represented by 0 (i.e. the absence of other variants) + * it should be tested for using + * bcf_has_variant_types(rec, VCF_REF, bcf_match_exact) + * which will return 1 if no other variant type is present, otherwise 0. + */ + HTSLIB_EXPORT + int bcf_has_variant_types(bcf1_t *rec, uint32_t bitmask, enum bcf_variant_match mode); + + /// Check for presence of variant types in a BCF record, for a given allele + /** + * @param rec BCF/VCF record + * @param ith_allele Allele to check + * @param bitmask Set of variant types to test for + * @return >0 if one of the variant types is present, + * 0 if not present, + * -1 on error + * + * @p bitmask should be the bitwise-or of the variant types (VCF_SNP, + * VCF_MNP, etc.) to test for, or VCF_REF on its own. + * + * The return value is the bitwise-and of the set of types present + * and @p bitmask. Callers that want to check for the presence of more + * than one type can avoid function call overhead by passing all the + * types to be checked for in a single call to this function, and then + * check for them individually in the returned value. + * + * As a special case, if @p bitmask is VCF_REF (i.e. 0), the function + * tests for an exact match. The return value will be 1 if the + * variant type calculated for the allele is VCF_REF, otherwise if + * any other type is present it will be 0. + */ + HTSLIB_EXPORT + int bcf_has_variant_type(bcf1_t *rec, int ith_allele, uint32_t bitmask); + + /// Return the number of bases affected by a variant, for a given allele + /** + * @param rec BCF/VCF record + * @param ith_allele Allele index + * @return The number of bases affected (negative for deletions), + * or bcf_int32_missing on error. + */ + HTSLIB_EXPORT + int bcf_variant_length(bcf1_t *rec, int ith_allele); + + HTSLIB_EXPORT + int bcf_is_snp(bcf1_t *v); + + /** + * bcf_update_filter() - sets the FILTER column + * @flt_ids: The filter IDs to set, numeric IDs returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS") + * @n: Number of filters. If n==0, all filters are removed + */ + HTSLIB_EXPORT + int bcf_update_filter(const bcf_hdr_t *hdr, bcf1_t *line, int *flt_ids, int n); + /** + * bcf_add_filter() - adds to the FILTER column + * @flt_id: filter ID to add, numeric ID returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS") + * + * If flt_id is PASS, all existing filters are removed first. If other than PASS, existing PASS is removed. + */ + HTSLIB_EXPORT + int bcf_add_filter(const bcf_hdr_t *hdr, bcf1_t *line, int flt_id); + /** + * bcf_remove_filter() - removes from the FILTER column + * @flt_id: filter ID to remove, numeric ID returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS") + * @pass: when set to 1 and no filters are present, set to PASS + */ + HTSLIB_EXPORT + int bcf_remove_filter(const bcf_hdr_t *hdr, bcf1_t *line, int flt_id, int pass); + /** + * Returns 1 if present, 0 if absent, or -1 if filter does not exist. "PASS" and "." can be used interchangeably. + */ + HTSLIB_EXPORT + int bcf_has_filter(const bcf_hdr_t *hdr, bcf1_t *line, char *filter); + /** + * bcf_update_alleles() and bcf_update_alleles_str() - update REF and ALT column + * @alleles: Array of alleles + * @nals: Number of alleles + * @alleles_string: Comma-separated alleles, starting with the REF allele + */ + HTSLIB_EXPORT + int bcf_update_alleles(const bcf_hdr_t *hdr, bcf1_t *line, const char **alleles, int nals); + + HTSLIB_EXPORT + int bcf_update_alleles_str(const bcf_hdr_t *hdr, bcf1_t *line, const char *alleles_string); + + /** + * bcf_update_id() - sets new ID string + * bcf_add_id() - adds to the ID string checking for duplicates + */ + HTSLIB_EXPORT + int bcf_update_id(const bcf_hdr_t *hdr, bcf1_t *line, const char *id); + + HTSLIB_EXPORT + int bcf_add_id(const bcf_hdr_t *hdr, bcf1_t *line, const char *id); + + /** + * bcf_update_info_*() - functions for updating INFO fields + * @param hdr: the BCF header + * @param line: VCF line to be edited + * @param key: the INFO tag to be updated + * @param values: pointer to the array of values. Pass NULL to remove the tag. + * @param n: number of values in the array. When set to 0, the INFO tag is removed + * @return 0 on success or negative value on error. + * + * The @p string in bcf_update_info_flag() is optional, + * @p n indicates whether the flag is set or removed. + * + * Note that updating an END info tag will cause line->rlen to be + * updated as a side-effect (removing the tag will set it to the + * string length of the REF allele). If line->pos is being changed as + * well, it is important that this is done before calling + * bcf_update_info_int32() to update the END tag, otherwise rlen will be + * set incorrectly. If the new END value is less than or equal to + * line->pos, a warning will be printed and line->rlen will be set to + * the length of the REF allele. + */ + #define bcf_update_info_int32(hdr,line,key,values,n) bcf_update_info((hdr),(line),(key),(values),(n),BCF_HT_INT) + #define bcf_update_info_float(hdr,line,key,values,n) bcf_update_info((hdr),(line),(key),(values),(n),BCF_HT_REAL) + #define bcf_update_info_flag(hdr,line,key,string,n) bcf_update_info((hdr),(line),(key),(string),(n),BCF_HT_FLAG) + #define bcf_update_info_string(hdr,line,key,string) bcf_update_info((hdr),(line),(key),(string),1,BCF_HT_STR) + HTSLIB_EXPORT + int bcf_update_info(const bcf_hdr_t *hdr, bcf1_t *line, const char *key, const void *values, int n, int type); + + /// Set or update 64-bit integer INFO values + /** + * @param hdr: the BCF header + * @param line: VCF line to be edited + * @param key: the INFO tag to be updated + * @param values: pointer to the array of values. Pass NULL to remove the tag. + * @param n: number of values in the array. When set to 0, the INFO tag is removed + * @return 0 on success or negative value on error. + * + * This function takes an int64_t values array as input. The data + * actually stored will be shrunk to the minimum size that can + * accept all of the values. + * + * INFO values outside of the range BCF_MIN_BT_INT32 to BCF_MAX_BT_INT32 + * can only be written to VCF files. + */ + static inline int bcf_update_info_int64(const bcf_hdr_t *hdr, bcf1_t *line, + const char *key, + const int64_t *values, int n) + { + return bcf_update_info(hdr, line, key, values, n, BCF_HT_LONG); + } + + /* + * bcf_update_format_*() - functions for updating FORMAT fields + * @values: pointer to the array of values, the same number of elements + * is expected for each sample. Missing values must be padded + * with bcf_*_missing or bcf_*_vector_end values. + * @n: number of values in the array. If n==0, existing tag is removed. + * + * The function bcf_update_format_string() is a higher-level (slower) variant of + * bcf_update_format_char(). The former accepts array of \0-terminated strings + * whereas the latter requires that the strings are collapsed into a single array + * of fixed-length strings. In case of strings with variable length, shorter strings + * can be \0-padded. Note that the collapsed strings passed to bcf_update_format_char() + * are not \0-terminated. + * + * Returns 0 on success or negative value on error. + */ + #define bcf_update_format_int32(hdr,line,key,values,n) bcf_update_format((hdr),(line),(key),(values),(n),BCF_HT_INT) + #define bcf_update_format_float(hdr,line,key,values,n) bcf_update_format((hdr),(line),(key),(values),(n),BCF_HT_REAL) + #define bcf_update_format_char(hdr,line,key,values,n) bcf_update_format((hdr),(line),(key),(values),(n),BCF_HT_STR) + #define bcf_update_genotypes(hdr,line,gts,n) bcf_update_format((hdr),(line),"GT",(gts),(n),BCF_HT_INT) // See bcf_gt_ macros below + + HTSLIB_EXPORT + int bcf_update_format_string(const bcf_hdr_t *hdr, bcf1_t *line, const char *key, const char **values, int n); + + HTSLIB_EXPORT + int bcf_update_format(const bcf_hdr_t *hdr, bcf1_t *line, const char *key, const void *values, int n, int type); + + // Macros for setting genotypes correctly, for use with bcf_update_genotypes only; idx corresponds + // to VCF's GT (1-based index to ALT or 0 for the reference allele) and val is the opposite, obtained + // from bcf_get_genotypes() below. + #define bcf_gt_phased(idx) (((idx)+1)<<1|1) + #define bcf_gt_unphased(idx) (((idx)+1)<<1) + #define bcf_gt_missing 0 + #define bcf_gt_is_missing(val) ((val)>>1 ? 0 : 1) + #define bcf_gt_is_phased(idx) ((idx)&1) + #define bcf_gt_allele(val) (((val)>>1)-1) + + /** Conversion between alleles indexes to Number=G genotype index (assuming diploid, all 0-based) */ + #define bcf_alleles2gt(a,b) ((a)>(b)?((a)*((a)+1)/2+(b)):((b)*((b)+1)/2+(a))) + static inline void bcf_gt2alleles(int igt, int *a, int *b) + { + int k = 0, dk = 1; + while ( k=0 on success + * -1 .. no such INFO tag defined in the header + * -2 .. clash between types defined in the header and encountered in the VCF record + * -3 .. tag is not present in the VCF record + * -4 .. the operation could not be completed (e.g. out of memory) + * + * Returns negative value on error or the number of values (including + * missing values) put in *dst on success. bcf_get_info_string() returns + * on success the number of characters stored excluding the nul- + * terminating byte. bcf_get_info_flag() does not store anything in *dst + * but returns 1 if the flag is set or 0 if not. + * + * *dst will be reallocated if it is not big enough (i.e. *ndst is too + * small) or NULL on entry. The new size will be stored in *ndst. + */ + #define bcf_get_info_int32(hdr,line,tag,dst,ndst) bcf_get_info_values(hdr,line,tag,(void**)(dst),ndst,BCF_HT_INT) + #define bcf_get_info_float(hdr,line,tag,dst,ndst) bcf_get_info_values(hdr,line,tag,(void**)(dst),ndst,BCF_HT_REAL) + #define bcf_get_info_string(hdr,line,tag,dst,ndst) bcf_get_info_values(hdr,line,tag,(void**)(dst),ndst,BCF_HT_STR) + #define bcf_get_info_flag(hdr,line,tag,dst,ndst) bcf_get_info_values(hdr,line,tag,(void**)(dst),ndst,BCF_HT_FLAG) + + HTSLIB_EXPORT + int bcf_get_info_values(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, void **dst, int *ndst, int type); + + /// Put integer INFO values into an int64_t array + /** + * @param hdr: BCF header + * @param line: BCF record + * @param tag: INFO tag to retrieve + * @param dst: *dst is pointer to a memory location, can point to NULL + * @param ndst: pointer to the size of allocated memory + * @return >=0 on success + * -1 .. no such INFO tag defined in the header + * -2 .. clash between types defined in the header and encountered in the VCF record + * -3 .. tag is not present in the VCF record + * -4 .. the operation could not be completed (e.g. out of memory) + * + * Returns negative value on error or the number of values (including + * missing values) put in *dst on success. + * + * *dst will be reallocated if it is not big enough (i.e. *ndst is too + * small) or NULL on entry. The new size will be stored in *ndst. + */ + static inline int bcf_get_info_int64(const bcf_hdr_t *hdr, bcf1_t *line, + const char *tag, int64_t **dst, + int *ndst) + { + return bcf_get_info_values(hdr, line, tag, + (void **) dst, ndst, BCF_HT_LONG); + } + + /** + * bcf_get_format_*() - same as bcf_get_info*() above + * + * The function bcf_get_format_string() is a higher-level (slower) variant of bcf_get_format_char(). + * see the description of bcf_update_format_string() and bcf_update_format_char() above. + * Unlike other bcf_get_format__*() functions, bcf_get_format_string() allocates two arrays: + * a single block of \0-terminated strings collapsed into a single array and an array of pointers + * to these strings. Both arrays must be cleaned by the user. + * + * Returns negative value on error or the number of written values on success. + * + * Use the returned number of written values for accessing valid entries of dst, as ndst is only a + * watermark that can be higher than the returned value, i.e. the end of dst can contain carry-over + * values from previous calls to bcf_get_format_*() on lines with more values per sample. + * + * Example: + * int ndst = 0; char **dst = NULL; + * if ( bcf_get_format_string(hdr, line, "XX", &dst, &ndst) > 0 ) + * for (i=0; iid[type][int_id].key) + + /** + * bcf_hdr_name2id() - Translates sequence names (chromosomes) into numeric ID + * bcf_hdr_id2name() - Translates numeric ID to sequence name + */ + static inline int bcf_hdr_name2id(const bcf_hdr_t *hdr, const char *id) { return bcf_hdr_id2int(hdr, BCF_DT_CTG, id); } + static inline const char *bcf_hdr_id2name(const bcf_hdr_t *hdr, int rid) + { + if ( !hdr || rid<0 || rid>=hdr->n[BCF_DT_CTG] ) return NULL; + return hdr->id[BCF_DT_CTG][rid].key; + } + static inline const char *bcf_seqname(const bcf_hdr_t *hdr, const bcf1_t *rec) { + return bcf_hdr_id2name(hdr, rec ? rec->rid : -1); + } + + /** Return CONTIG name, or "(unknown)" + + Like bcf_seqname(), but this function will never return NULL. If + the contig name cannot be found (either because @p hdr was not + supplied or rec->rid was out of range) it returns the string + "(unknown)". + */ + static inline const char *bcf_seqname_safe(const bcf_hdr_t *hdr, const bcf1_t *rec) { + const char *name = bcf_seqname(hdr, rec); + return name ? name : "(unknown)"; + } + + /** + * bcf_hdr_id2*() - Macros for accessing bcf_idinfo_t + * @type: one of BCF_HL_FLT, BCF_HL_INFO, BCF_HL_FMT + * @int_id: return value of bcf_hdr_id2int, must be >=0 + * + * The returned values are: + * bcf_hdr_id2length .. whether the number of values is fixed or variable, one of BCF_VL_* + * bcf_hdr_id2number .. the number of values, 0xfffff for variable length fields + * bcf_hdr_id2type .. the field type, one of BCF_HT_* + * bcf_hdr_id2coltype .. the column type, one of BCF_HL_* + * + * Notes: Prior to using the macros, the presence of the info should be + * tested with bcf_hdr_idinfo_exists(). + */ + #define bcf_hdr_id2length(hdr,type,int_id) ((hdr)->id[BCF_DT_ID][int_id].val->info[type]>>8 & 0xf) + #define bcf_hdr_id2number(hdr,type,int_id) ((hdr)->id[BCF_DT_ID][int_id].val->info[type]>>12) + #define bcf_hdr_id2type(hdr,type,int_id) (uint32_t)((hdr)->id[BCF_DT_ID][int_id].val->info[type]>>4 & 0xf) + #define bcf_hdr_id2coltype(hdr,type,int_id) (uint32_t)((hdr)->id[BCF_DT_ID][int_id].val->info[type] & 0xf) + #define bcf_hdr_idinfo_exists(hdr,type,int_id) ((int_id)>=0 && (int_id)<(hdr)->n[BCF_DT_ID] && (hdr)->id[BCF_DT_ID][int_id].val && bcf_hdr_id2coltype((hdr),(type),(int_id))!=0xf) + #define bcf_hdr_id2hrec(hdr,dict_type,col_type,int_id) ((hdr)->id[(dict_type)==BCF_DT_CTG?BCF_DT_CTG:BCF_DT_ID][int_id].val->hrec[(dict_type)==BCF_DT_CTG?0:(col_type)]) + /// Convert BCF FORMAT data to string form + /** + * @param s kstring to write into + * @param n number of items in @p data + * @param type type of items in @p data + * @param data BCF format data + * @return 0 on success + * -1 if out of memory + */ + HTSLIB_EXPORT + int bcf_fmt_array(kstring_t *s, int n, int type, void *data); + + HTSLIB_EXPORT + uint8_t *bcf_fmt_sized_array(kstring_t *s, uint8_t *ptr); + + /// Encode a variable-length char array in BCF format + /** + * @param s kstring to write into + * @param l length of input + * @param a input data to encode + * @return 0 on success; < 0 on error + */ + HTSLIB_EXPORT + int bcf_enc_vchar(kstring_t *s, int l, const char *a); + + /// Encode a variable-length integer array in BCF format + /** + * @param s kstring to write into + * @param n total number of items in @p a (<= 0 to encode BCF_BT_NULL) + * @param a input data to encode + * @param wsize vector length (<= 0 is equivalent to @p n) + * @return 0 on success; < 0 on error + * @note @p n should be an exact multiple of @p wsize + */ + HTSLIB_EXPORT + int bcf_enc_vint(kstring_t *s, int n, int32_t *a, int wsize); + + /// Encode a variable-length float array in BCF format + /** + * @param s kstring to write into + * @param n total number of items in @p a (<= 0 to encode BCF_BT_NULL) + * @param a input data to encode + * @return 0 on success; < 0 on error + */ + HTSLIB_EXPORT + int bcf_enc_vfloat(kstring_t *s, int n, float *a); + + + /************************************************************************** + * BCF index + * + * Note that these functions work with BCFs only. See synced_bcf_reader.h + * which provides (amongst other things) an API to work transparently with + * both indexed BCFs and VCFs. + **************************************************************************/ + + #define bcf_itr_destroy(iter) hts_itr_destroy(iter) + #define bcf_itr_queryi(idx, tid, beg, end) hts_itr_query((idx), (tid), (beg), (end), bcf_readrec) + #define bcf_itr_querys(idx, hdr, s) hts_itr_querys((idx), (s), (hts_name2id_f)(bcf_hdr_name2id), (hdr), hts_itr_query, bcf_readrec) + + static inline int bcf_itr_next(htsFile *htsfp, hts_itr_t *itr, void *r) { + if (htsfp->is_bgzf) + return hts_itr_next(htsfp->fp.bgzf, itr, r, 0); + + hts_log_error("Only bgzf compressed files can be used with iterators"); + errno = EINVAL; + return -2; + } +/// Load a BCF index +/** @param fn BCF file name + @return The index, or NULL if an error occurred. + @note This only works for BCF files. Consider synced_bcf_reader instead +which works for both BCF and VCF. +*/ + #define bcf_index_load(fn) hts_idx_load(fn, HTS_FMT_CSI) + #define bcf_index_seqnames(idx, hdr, nptr) hts_idx_seqnames((idx),(nptr),(hts_id2name_f)(bcf_hdr_id2name),(hdr)) + +/// Load a BCF index from a given index file name +/** @param fn Input BAM/BCF/etc filename + @param fnidx The input index filename + @return The index, or NULL if an error occurred. + @note This only works for BCF files. Consider synced_bcf_reader instead +which works for both BCF and VCF. +*/ + HTSLIB_EXPORT + hts_idx_t *bcf_index_load2(const char *fn, const char *fnidx); + +/// Load a BCF index from a given index file name +/** @param fn Input BAM/BCF/etc filename + @param fnidx The input index filename + @param flags Flags to alter behaviour (see description) + @return The index, or NULL if an error occurred. + @note This only works for BCF files. Consider synced_bcf_reader instead +which works for both BCF and VCF. + + The @p flags parameter can be set to a combination of the following + values: + + HTS_IDX_SAVE_REMOTE Save a local copy of any remote indexes + HTS_IDX_SILENT_FAIL Fail silently if the index is not present + + Equivalent to hts_idx_load3(fn, fnidx, HTS_FMT_CSI, flags); +*/ + HTSLIB_EXPORT + hts_idx_t *bcf_index_load3(const char *fn, const char *fnidx, int flags); + + /** + * bcf_index_build() - Generate and save an index file + * @fn: Input VCF(compressed)/BCF filename + * @min_shift: log2(width of the smallest bin), e.g. a value of 14 + * imposes a 16k base lower limit on the width of index bins. + * Positive to generate CSI, or 0 to generate TBI. However, a small + * value of min_shift would create a large index, which would lead to + * reduced performance when using the index. A recommended value is 14. + * For BCF files, only the CSI index can be generated. + * + * Returns 0 if successful, or negative if an error occurred. + * + * List of error codes: + * -1 .. indexing failed + * -2 .. opening @fn failed + * -3 .. format not indexable + * -4 .. failed to create and/or save the index + */ + HTSLIB_EXPORT + int bcf_index_build(const char *fn, int min_shift); + + /** + * bcf_index_build2() - Generate and save an index to a specific file + * @fn: Input VCF/BCF filename + * @fnidx: Output filename, or NULL to add .csi/.tbi to @fn + * @min_shift: Positive to generate CSI, or 0 to generate TBI + * + * Returns 0 if successful, or negative if an error occurred. + * + * List of error codes: + * -1 .. indexing failed + * -2 .. opening @fn failed + * -3 .. format not indexable + * -4 .. failed to create and/or save the index + */ + HTSLIB_EXPORT + int bcf_index_build2(const char *fn, const char *fnidx, int min_shift); + + /** + * bcf_index_build3() - Generate and save an index to a specific file + * @fn: Input VCF/BCF filename + * @fnidx: Output filename, or NULL to add .csi/.tbi to @fn + * @min_shift: Positive to generate CSI, or 0 to generate TBI + * @n_threads: Number of VCF/BCF decoder threads + * + * Returns 0 if successful, or negative if an error occurred. + * + * List of error codes: + * -1 .. indexing failed + * -2 .. opening @fn failed + * -3 .. format not indexable + * -4 .. failed to create and/or save the index + */ + HTSLIB_EXPORT + int bcf_index_build3(const char *fn, const char *fnidx, int min_shift, int n_threads); + + /// Initialise fp->idx for the current format type, for VCF and BCF files. + /** @param fp File handle for the data file being written. + @param h BCF header structured (needed for BAI and CSI). + @param min_shift CSI bin size (CSI default is 14). + @param fnidx Filename to write index to. This pointer must remain valid + until after bcf_idx_save is called. + @return 0 on success, <0 on failure. + @note This must be called after the header has been written, but before + any other data. + */ + HTSLIB_EXPORT + int bcf_idx_init(htsFile *fp, bcf_hdr_t *h, int min_shift, const char *fnidx); + + /// Writes the index initialised with bcf_idx_init to disk. + /** @param fp File handle for the data file being written. + @return 0 on success, <0 on failure. + */ + HTSLIB_EXPORT + int bcf_idx_save(htsFile *fp); + +/******************* + * Typed value I/O * + *******************/ + +/* + Note that in contrast with BCFv2.1 specification, HTSlib implementation + allows missing values in vectors. For integer types, the values 0x80, + 0x8000, 0x80000000 are interpreted as missing values and 0x81, 0x8001, + 0x80000001 as end-of-vector indicators. Similarly for floats, the value of + 0x7F800001 is interpreted as a missing value and 0x7F800002 as an + end-of-vector indicator. + Note that the end-of-vector byte is not part of the vector. + + This trial BCF version (v2.2) is compatible with the VCF specification and + enables to handle correctly vectors with different ploidy in presence of + missing values. + */ +#define bcf_int8_vector_end (-127) /* INT8_MIN + 1 */ +#define bcf_int16_vector_end (-32767) /* INT16_MIN + 1 */ +#define bcf_int32_vector_end (-2147483647) /* INT32_MIN + 1 */ +#define bcf_int64_vector_end (-9223372036854775807LL) /* INT64_MIN + 1 */ +#define bcf_str_vector_end 0 +#define bcf_int8_missing (-128) /* INT8_MIN */ +#define bcf_int16_missing (-32767-1) /* INT16_MIN */ +#define bcf_int32_missing (-2147483647-1) /* INT32_MIN */ +#define bcf_int64_missing (-9223372036854775807LL - 1LL) /* INT64_MIN */ + +// All of the above are values, which may occur multiple times in lists of +// integers or lists of floating point. Strings in VCF don't have +// lists - a list of strings is just another (comma-separated) string. +// +// Hence bcf_str_missing is the whole string being missing rather than +// an element of a list. Ie a string of length zero: (0<<4)|BCF_BT_CHAR. +#define bcf_str_missing BCF_BT_CHAR + +// Limits on BCF values stored in given types. Max values are the same +// as for the underlying type. Min values are slightly different as +// the last 8 values for each type were reserved by BCFv2.2. +#define BCF_MAX_BT_INT8 (0x7f) /* INT8_MAX */ +#define BCF_MAX_BT_INT16 (0x7fff) /* INT16_MAX */ +#define BCF_MAX_BT_INT32 (0x7fffffff) /* INT32_MAX */ +#define BCF_MIN_BT_INT8 (-120) /* INT8_MIN + 8 */ +#define BCF_MIN_BT_INT16 (-32760) /* INT16_MIN + 8 */ +#define BCF_MIN_BT_INT32 (-2147483640) /* INT32_MIN + 8 */ + +HTSLIB_EXPORT +extern uint32_t bcf_float_vector_end; +HTSLIB_EXPORT +extern uint32_t bcf_float_missing; +static inline void bcf_float_set(float *ptr, uint32_t value) +{ + union { uint32_t i; float f; } u; + u.i = value; + *ptr = u.f; +} +#define bcf_float_set_vector_end(x) bcf_float_set(&(x),bcf_float_vector_end) +#define bcf_float_set_missing(x) bcf_float_set(&(x),bcf_float_missing) +static inline int bcf_float_is_missing(float f) +{ + union { uint32_t i; float f; } u; + u.f = f; + return u.i==bcf_float_missing ? 1 : 0; +} +static inline int bcf_float_is_vector_end(float f) +{ + union { uint32_t i; float f; } u; + u.f = f; + return u.i==bcf_float_vector_end ? 1 : 0; +} + +static inline int bcf_format_gt(bcf_fmt_t *fmt, int isample, kstring_t *str) +{ + uint32_t e = 0; + #define BRANCH(type_t, convert, missing, vector_end) { \ + uint8_t *ptr = fmt->p + isample*fmt->size; \ + int i; \ + for (i=0; in; i++, ptr += sizeof(type_t)) \ + { \ + type_t val = convert(ptr); \ + if ( val == vector_end ) break; \ + if ( i ) e |= kputc("/|"[val&1], str) < 0; \ + if ( !(val>>1) ) e |= kputc('.', str) < 0; \ + else e |= kputw((val>>1) - 1, str) < 0; \ + } \ + if (i == 0) e |= kputc('.', str) < 0; \ + } + switch (fmt->type) { + case BCF_BT_INT8: BRANCH(int8_t, le_to_i8, bcf_int8_missing, bcf_int8_vector_end); break; + case BCF_BT_INT16: BRANCH(int16_t, le_to_i16, bcf_int16_missing, bcf_int16_vector_end); break; + case BCF_BT_INT32: BRANCH(int32_t, le_to_i32, bcf_int32_missing, bcf_int32_vector_end); break; + case BCF_BT_NULL: e |= kputc('.', str) < 0; break; + default: hts_log_error("Unexpected type %d", fmt->type); return -2; + } + #undef BRANCH + return e == 0 ? 0 : -1; +} + +static inline int bcf_enc_size(kstring_t *s, int size, int type) +{ + // Most common case is first + if (size < 15) { + if (ks_resize(s, s->l + 1) < 0) + return -1; + uint8_t *p = (uint8_t *)s->s + s->l; + *p++ = (size<<4) | type; + s->l++; + return 0; + } + + if (ks_resize(s, s->l + 6) < 0) + return -1; + uint8_t *p = (uint8_t *)s->s + s->l; + *p++ = 15<<4|type; + + if (size < 128) { + *p++ = 1<<4|BCF_BT_INT8; + *p++ = size; + s->l += 3; + } else { + if (size < 32768) { + *p++ = 1<<4|BCF_BT_INT16; + i16_to_le(size, p); + s->l += 4; + } else { + *p++ = 1<<4|BCF_BT_INT32; + i32_to_le(size, p); + s->l += 6; + } + } + return 0; +} + +static inline int bcf_enc_inttype(long x) +{ + if (x <= BCF_MAX_BT_INT8 && x >= BCF_MIN_BT_INT8) return BCF_BT_INT8; + if (x <= BCF_MAX_BT_INT16 && x >= BCF_MIN_BT_INT16) return BCF_BT_INT16; + return BCF_BT_INT32; +} + +static inline int bcf_enc_int1(kstring_t *s, int32_t x) +{ + if (ks_resize(s, s->l + 5) < 0) + return -1; + uint8_t *p = (uint8_t *)s->s + s->l; + + if (x == bcf_int32_vector_end) { + // An inline implementation of bcf_enc_size with size==1 and + // memory allocation already accounted for. + *p = (1<<4) | BCF_BT_INT8; + p[1] = bcf_int8_vector_end; + s->l+=2; + } else if (x == bcf_int32_missing) { + *p = (1<<4) | BCF_BT_INT8; + p[1] = bcf_int8_missing; + s->l+=2; + } else if (x <= BCF_MAX_BT_INT8 && x >= BCF_MIN_BT_INT8) { + *p = (1<<4) | BCF_BT_INT8; + p[1] = x; + s->l+=2; + } else if (x <= BCF_MAX_BT_INT16 && x >= BCF_MIN_BT_INT16) { + *p = (1<<4) | BCF_BT_INT16; + i16_to_le(x, p+1); + s->l+=3; + } else { + *p = (1<<4) | BCF_BT_INT32; + i32_to_le(x, p+1); + s->l+=5; + } + + return 0; +} + +/// Return the value of a single typed integer. +/** @param p Pointer to input data block. + @param type One of the BCF_BT_INT* type codes + @param[out] q Location to store an updated value for p + @return The integer value, or zero if @p type is not valid. + +If @p type is not one of BCF_BT_INT8, BCF_BT_INT16, BCF_BT_INT32 or +BCF_BT_INT64, zero will be returned and @p *q will not be updated. +Otherwise, the integer value will be returned and @p *q will be set +to the memory location immediately following the integer value. + +Cautious callers can detect invalid type codes by checking that *q has +actually been updated. +*/ + +static inline int64_t bcf_dec_int1(const uint8_t *p, int type, uint8_t **q) +{ + if (type == BCF_BT_INT8) { + *q = (uint8_t*)p + 1; + return le_to_i8(p); + } else if (type == BCF_BT_INT16) { + *q = (uint8_t*)p + 2; + return le_to_i16(p); + } else if (type == BCF_BT_INT32) { + *q = (uint8_t*)p + 4; + return le_to_i32(p); + } else if (type == BCF_BT_INT64) { + *q = (uint8_t*)p + 8; + return le_to_i64(p); + } else { // Invalid type. + return 0; + } +} + +/// Return the value of a single typed integer from a byte stream. +/** @param p Pointer to input data block. + @param[out] q Location to store an updated value for p + @return The integer value, or zero if the type code was not valid. + +Reads a one-byte type code from @p p, and uses it to decode an integer +value from the following bytes in @p p. + +If the type is not one of BCF_BT_INT8, BCF_BT_INT16 or BCF_BT_INT32, zero +will be returned and @p *q will unchanged. Otherwise, the integer value will +be returned and @p *q will be set to the memory location immediately following +the integer value. + +Cautious callers can detect invalid type codes by checking that *q has +actually been updated. +*/ +static inline int64_t bcf_dec_typed_int1(const uint8_t *p, uint8_t **q) +{ + return bcf_dec_int1(p + 1, *p&0xf, q); +} + +static inline int32_t bcf_dec_size(const uint8_t *p, uint8_t **q, int *type) +{ + *type = *p & 0xf; + if (*p>>4 != 15) { + *q = (uint8_t*)p + 1; + return *p>>4; + } else return bcf_dec_typed_int1(p + 1, q); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/htslib-1.18/htslib/vcf_sweep.h b/src/htslib-1.21/htslib/vcf_sweep.h similarity index 100% rename from src/htslib-1.18/htslib/vcf_sweep.h rename to src/htslib-1.21/htslib/vcf_sweep.h diff --git a/src/htslib-1.18/htslib/vcfutils.h b/src/htslib-1.21/htslib/vcfutils.h similarity index 100% rename from src/htslib-1.18/htslib/vcfutils.h rename to src/htslib-1.21/htslib/vcfutils.h diff --git a/src/htslib-1.18/htslib_vars.mk b/src/htslib-1.21/htslib_vars.mk similarity index 100% rename from src/htslib-1.18/htslib_vars.mk rename to src/htslib-1.21/htslib_vars.mk diff --git a/src/htslib-1.18/kfunc.c b/src/htslib-1.21/kfunc.c similarity index 100% rename from src/htslib-1.18/kfunc.c rename to src/htslib-1.21/kfunc.c diff --git a/src/htslib-1.21/kstring.c b/src/htslib-1.21/kstring.c new file mode 100644 index 0000000..9a6142e --- /dev/null +++ b/src/htslib-1.21/kstring.c @@ -0,0 +1,452 @@ +/* The MIT License + + Copyright (C) 2011 by Attractive Chaos + Copyright (C) 2013-2018, 2020-2021, 2023 Genome Research Ltd. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h +#include + +#include +#include +#include +#include +#include +#include +#include "htslib/kstring.h" + +int kputd(double d, kstring_t *s) { + int len = 0; + char buf[21], *cp = buf+20, *ep; + if (d == 0) { + if (signbit(d)) { + kputsn("-0",2,s); + return 2; + } else { + kputsn("0",1,s); + return 1; + } + } + + if (d < 0) { + kputc('-',s); + len = 1; + d=-d; + } + if (!(d >= 0.0001 && d <= 999999)) { + if (ks_resize(s, s->l + 50) < 0) + return EOF; + // We let stdio handle the exponent cases + int s2 = snprintf(s->s + s->l, s->m - s->l, "%g", d); + len += s2; + s->l += s2; + return len; + } + + // Correction for rounding - rather ugly + // Optimised for small numbers. + + uint32_t i; + if (d<0.001) i = rint(d*1000000000), cp -= 1; + else if (d < 0.01) i = rint(d*100000000), cp -= 2; + else if (d < 0.1) i = rint(d*10000000), cp -= 3; + else if (d < 1) i = rint(d*1000000), cp -= 4; + else if (d < 10) i = rint(d*100000), cp -= 5; + else if (d < 100) i = rint(d*10000), cp -= 6; + else if (d < 1000) i = rint(d*1000), cp -= 7; + else if (d < 10000) i = rint(d*100), cp -= 8; + else if (d < 100000) i = rint(d*10), cp -= 9; + else i = rint(d), cp -= 10; + + // integer i is always 6 digits, so print it 2 at a time. + static const char kputuw_dig2r[] = + "00010203040506070809" + "10111213141516171819" + "20212223242526272829" + "30313233343536373839" + "40414243444546474849" + "50515253545556575859" + "60616263646566676869" + "70717273747576777879" + "80818283848586878889" + "90919293949596979899"; + + memcpy(cp-=2, &kputuw_dig2r[2*(i%100)], 2); i /= 100; + memcpy(cp-=2, &kputuw_dig2r[2*(i%100)], 2); i /= 100; + memcpy(cp-=2, &kputuw_dig2r[2*(i%100)], 2); + + // Except when it rounds up (d=0.009999999 is i=1000000) + if (i >= 100) + *--cp = '0' + (i/100); + + + int p = buf+20-cp; + if (p <= 10) { /* d < 1 */ + // 0.00123 is 123, so add leading zeros and 0. + ep = cp+5; // 6 precision + while (p < 10) { // aka d < 1 + *--cp = '0'; + p++; + } + *--cp = '.'; + *--cp = '0'; + } else { + // 123.001 is 123001 with p==13, so move 123 down and add "." + // Equiv to memmove(cp-1, cp, p-10); cp--; + char *xp = --cp; + ep = cp+6; + while (p > 10) { + xp[0] = xp[1]; + xp++; + p--; + } + xp[0] = '.'; + } + + // Cull trailing zeros + while (*ep == '0' && ep > cp) + ep--; + + // End can be 1 out due to the mostly-6 but occasionally 7 (i==1) case. + // Also code with "123." which should be "123" + if (*ep && *ep != '.') + ep++; + *ep = 0; + + int sl = ep-cp; + len += sl; + kputsn(cp, sl, s); + return len; +} + +int kvsprintf(kstring_t *s, const char *fmt, va_list ap) +{ + va_list args; + int l; + va_copy(args, ap); + + if (fmt[0] == '%' && fmt[1] == 'g' && fmt[2] == 0) { + double d = va_arg(args, double); + l = kputd(d, s); + va_end(args); + return l; + } + + if (!s->s) { + const size_t sz = 64; + s->s = malloc(sz); + if (!s->s) + return -1; + s->m = sz; + s->l = 0; + } + + l = vsnprintf(s->s + s->l, s->m - s->l, fmt, args); // This line does not work with glibc 2.0. See `man snprintf'. + va_end(args); + if (l + 1 > s->m - s->l) { + if (ks_resize(s, s->l + l + 2) < 0) + return -1; + va_copy(args, ap); + l = vsnprintf(s->s + s->l, s->m - s->l, fmt, args); + va_end(args); + } + s->l += l; + return l; +} + +int ksprintf(kstring_t *s, const char *fmt, ...) +{ + va_list ap; + int l; + va_start(ap, fmt); + l = kvsprintf(s, fmt, ap); + va_end(ap); + return l; +} + +char *kstrtok(const char *str, const char *sep_in, ks_tokaux_t *aux) +{ + const unsigned char *p, *start, *sep = (unsigned char *) sep_in; + if (sep) { // set up the table + if (str == 0 && aux->finished) return 0; // no need to set up if we have finished + aux->finished = 0; + if (sep[0] && sep[1]) { + aux->sep = -1; + aux->tab[0] = aux->tab[1] = aux->tab[2] = aux->tab[3] = 0; + for (p = sep; *p; ++p) aux->tab[*p>>6] |= 1ull<<(*p&0x3f); + } else aux->sep = sep[0]; + } + if (aux->finished) return 0; + else if (str) start = (unsigned char *) str, aux->finished = 0; + else start = (unsigned char *) aux->p + 1; + if (aux->sep < 0) { + for (p = start; *p; ++p) + if (aux->tab[*p>>6]>>(*p&0x3f)&1) break; + } else { + // Using strchr is fast for next token, but slower for + // last token due to extra pass from strlen. Overall + // on a VCF parse this func was 146% faster with // strchr. + // Equiv to: + // for (p = start; *p; ++p) if (*p == aux->sep) break; + + // NB: We could use strchrnul() here from glibc if detected, + // which is ~40% faster again, but it's not so portable. + // i.e. p = (uint8_t *)strchrnul((char *)start, aux->sep); + uint8_t *p2 = (uint8_t *)strchr((char *)start, aux->sep); + p = p2 ? p2 : start + strlen((char *)start); + } + aux->p = (const char *) p; // end of token + if (*p == 0) aux->finished = 1; // no more tokens + return (char*)start; +} + +// s MUST BE a null terminated string; l = strlen(s) +int ksplit_core(char *s, int delimiter, int *_max, int **_offsets) +{ + int i, n, max, last_char, last_start, *offsets, l; + n = 0; max = *_max; offsets = *_offsets; + l = strlen(s); + +#define __ksplit_aux do { \ + if (_offsets) { \ + s[i] = 0; \ + if (n == max) { \ + int *tmp; \ + max = max? max<<1 : 2; \ + if ((tmp = (int*)realloc(offsets, sizeof(int) * max))) { \ + offsets = tmp; \ + } else { \ + free(offsets); \ + *_offsets = NULL; \ + return 0; \ + } \ + } \ + offsets[n++] = last_start; \ + } else ++n; \ + } while (0) + + for (i = 0, last_char = last_start = 0; i <= l; ++i) { + if (delimiter == 0) { + if (isspace((int)((unsigned char) s[i])) || s[i] == 0) { + if (isgraph(last_char)) + __ksplit_aux; // the end of a field + } else { + if (isspace(last_char) || last_char == 0) + last_start = i; + } + } else { + if (s[i] == delimiter || s[i] == 0) { + if (last_char != 0 && last_char != delimiter) __ksplit_aux; // the end of a field + } else { + if (last_char == delimiter || last_char == 0) last_start = i; + } + } + last_char = (int)((unsigned char)s[i]); + } + *_max = max; *_offsets = offsets; + return n; +} + +int kgetline(kstring_t *s, kgets_func *fgets_fn, void *fp) +{ + size_t l0 = s->l; + + while (s->l == l0 || s->s[s->l-1] != '\n') { + if (s->m - s->l < 200) { + if (ks_resize(s, s->m + 200) < 0) + return EOF; + } + if (fgets_fn(s->s + s->l, s->m - s->l, fp) == NULL) break; + s->l += strlen(s->s + s->l); + } + + if (s->l == l0) return EOF; + + if (s->l > l0 && s->s[s->l-1] == '\n') { + s->l--; + if (s->l > l0 && s->s[s->l-1] == '\r') s->l--; + } + s->s[s->l] = '\0'; + return 0; +} + +int kgetline2(kstring_t *s, kgets_func2 *fgets_fn, void *fp) +{ + size_t l0 = s->l; + + while (s->l == l0 || s->s[s->l-1] != '\n') { + if (s->m - s->l < 200) { + // We return EOF for both EOF and error and the caller + // needs to check for errors in fp, and we haven't + // even got there yet. + // + // The only way of propagating memory errors is to + // deliberately call something that we know triggers + // and error so fp is also set. This works for + // hgets, but not for gets where reading <= 0 bytes + // isn't an error. + if (ks_resize(s, s->m + 200) < 0) { + fgets_fn(s->s + s->l, 0, fp); + return EOF; + } + } + ssize_t len = fgets_fn(s->s + s->l, s->m - s->l, fp); + if (len <= 0) break; + s->l += len; + } + + if (s->l == l0) return EOF; + + if (s->l > l0 && s->s[s->l-1] == '\n') { + s->l--; + if (s->l > l0 && s->s[s->l-1] == '\r') s->l--; + } + s->s[s->l] = '\0'; + return 0; +} + +/********************** + * Boyer-Moore search * + **********************/ + +typedef unsigned char ubyte_t; + +// reference: http://www-igm.univ-mlv.fr/~lecroq/string/node14.html +static int *ksBM_prep(const ubyte_t *pat, int m) +{ + int i, *suff, *prep, *bmGs, *bmBc; + prep = (int*)calloc(m + 256, sizeof(int)); + if (!prep) return NULL; + bmGs = prep; bmBc = prep + m; + { // preBmBc() + for (i = 0; i < 256; ++i) bmBc[i] = m; + for (i = 0; i < m - 1; ++i) bmBc[pat[i]] = m - i - 1; + } + suff = (int*)calloc(m, sizeof(int)); + if (!suff) { free(prep); return NULL; } + { // suffixes() + int f = 0, g; + suff[m - 1] = m; + g = m - 1; + for (i = m - 2; i >= 0; --i) { + if (i > g && suff[i + m - 1 - f] < i - g) + suff[i] = suff[i + m - 1 - f]; + else { + if (i < g) g = i; + f = i; + while (g >= 0 && pat[g] == pat[g + m - 1 - f]) --g; + suff[i] = f - g; + } + } + } + { // preBmGs() + int j = 0; + for (i = 0; i < m; ++i) bmGs[i] = m; + for (i = m - 1; i >= 0; --i) + if (suff[i] == i + 1) + for (; j < m - 1 - i; ++j) + if (bmGs[j] == m) + bmGs[j] = m - 1 - i; + for (i = 0; i <= m - 2; ++i) + bmGs[m - 1 - suff[i]] = m - 1 - i; + } + free(suff); + return prep; +} + +void *kmemmem(const void *_str, int n, const void *_pat, int m, int **_prep) +{ + int i, j, *prep = 0, *bmGs, *bmBc; + const ubyte_t *str, *pat; + str = (const ubyte_t*)_str; pat = (const ubyte_t*)_pat; + prep = (_prep == 0 || *_prep == 0)? ksBM_prep(pat, m) : *_prep; + if (!prep) return NULL; + if (_prep && *_prep == 0) *_prep = prep; + bmGs = prep; bmBc = prep + m; + j = 0; + while (j <= n - m) { + for (i = m - 1; i >= 0 && pat[i] == str[i+j]; --i); + if (i >= 0) { + int max = bmBc[str[i+j]] - m + 1 + i; + if (max < bmGs[i]) max = bmGs[i]; + j += max; + } else return (void*)(str + j); + } + if (_prep == 0) free(prep); + return 0; +} + +char *kstrstr(const char *str, const char *pat, int **_prep) +{ + return (char*)kmemmem(str, strlen(str), pat, strlen(pat), _prep); +} + +char *kstrnstr(const char *str, const char *pat, int n, int **_prep) +{ + return (char*)kmemmem(str, n, pat, strlen(pat), _prep); +} + +/*********************** + * The main() function * + ***********************/ + +#ifdef KSTRING_MAIN +#include +int main() +{ + kstring_t *s; + int *fields, n, i; + ks_tokaux_t aux; + char *p; + s = (kstring_t*)calloc(1, sizeof(kstring_t)); + // test ksprintf() + ksprintf(s, " abcdefg: %d ", 100); + printf("'%s'\n", s->s); + // test ksplit() + fields = ksplit(s, 0, &n); + for (i = 0; i < n; ++i) + printf("field[%d] = '%s'\n", i, s->s + fields[i]); + // test kstrtok() + s->l = 0; + for (p = kstrtok("ab:cde:fg/hij::k", ":/", &aux); p; p = kstrtok(0, 0, &aux)) { + kputsn(p, aux.p - p, s); + kputc('\n', s); + } + printf("%s", s->s); + // free + free(s->s); free(s); free(fields); + + { + static char *str = "abcdefgcdgcagtcakcdcd"; + static char *pat = "cd"; + char *ret, *s = str; + int *prep = 0; + while ((ret = kstrstr(s, pat, &prep)) != 0) { + printf("match: %s\n", ret); + s = ret + prep[0]; + } + free(prep); + } + return 0; +} +#endif diff --git a/src/htslib-1.19.1/m4/hts_check_compile_flags_needed.m4 b/src/htslib-1.21/m4/hts_check_compile_flags_needed.m4 similarity index 100% rename from src/htslib-1.19.1/m4/hts_check_compile_flags_needed.m4 rename to src/htslib-1.21/m4/hts_check_compile_flags_needed.m4 diff --git a/src/htslib-1.18/m4/hts_hide_dynamic_syms.m4 b/src/htslib-1.21/m4/hts_hide_dynamic_syms.m4 similarity index 100% rename from src/htslib-1.18/m4/hts_hide_dynamic_syms.m4 rename to src/htslib-1.21/m4/hts_hide_dynamic_syms.m4 diff --git a/src/htslib-1.18/m4/hts_prog_cc_warnings.m4 b/src/htslib-1.21/m4/hts_prog_cc_warnings.m4 similarity index 100% rename from src/htslib-1.18/m4/hts_prog_cc_warnings.m4 rename to src/htslib-1.21/m4/hts_prog_cc_warnings.m4 diff --git a/src/htslib-1.18/m4/pkg.m4 b/src/htslib-1.21/m4/pkg.m4 similarity index 100% rename from src/htslib-1.18/m4/pkg.m4 rename to src/htslib-1.21/m4/pkg.m4 diff --git a/src/htslib-1.18/md5.c b/src/htslib-1.21/md5.c similarity index 100% rename from src/htslib-1.18/md5.c rename to src/htslib-1.21/md5.c diff --git a/src/htslib-1.18/multipart.c b/src/htslib-1.21/multipart.c similarity index 100% rename from src/htslib-1.18/multipart.c rename to src/htslib-1.21/multipart.c diff --git a/src/htslib-1.18/os/lzma_stub.h b/src/htslib-1.21/os/lzma_stub.h similarity index 100% rename from src/htslib-1.18/os/lzma_stub.h rename to src/htslib-1.21/os/lzma_stub.h diff --git a/src/htslib-1.18/os/rand.c b/src/htslib-1.21/os/rand.c similarity index 100% rename from src/htslib-1.18/os/rand.c rename to src/htslib-1.21/os/rand.c diff --git a/src/htslib-1.18/plugin.c b/src/htslib-1.21/plugin.c similarity index 100% rename from src/htslib-1.18/plugin.c rename to src/htslib-1.21/plugin.c diff --git a/src/htslib-1.18/probaln.c b/src/htslib-1.21/probaln.c similarity index 100% rename from src/htslib-1.18/probaln.c rename to src/htslib-1.21/probaln.c diff --git a/src/htslib-1.18/realn.c b/src/htslib-1.21/realn.c similarity index 100% rename from src/htslib-1.18/realn.c rename to src/htslib-1.21/realn.c diff --git a/src/htslib-1.19.1/regidx.c b/src/htslib-1.21/regidx.c similarity index 100% rename from src/htslib-1.19.1/regidx.c rename to src/htslib-1.21/regidx.c diff --git a/src/htslib-1.18/region.c b/src/htslib-1.21/region.c similarity index 100% rename from src/htslib-1.18/region.c rename to src/htslib-1.21/region.c diff --git a/src/htslib-1.18/sam.5 b/src/htslib-1.21/sam.5 similarity index 100% rename from src/htslib-1.18/sam.5 rename to src/htslib-1.21/sam.5 diff --git a/src/htslib-1.21/sam.c b/src/htslib-1.21/sam.c new file mode 100644 index 0000000..7e58da6 --- /dev/null +++ b/src/htslib-1.21/sam.c @@ -0,0 +1,6391 @@ +/* sam.c -- SAM and BAM file I/O and manipulation. + + Copyright (C) 2008-2010, 2012-2024 Genome Research Ltd. + Copyright (C) 2010, 2012, 2013 Broad Institute. + + Author: Heng Li + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +#include "fuzz_settings.h" +#endif + +// Suppress deprecation message for cigar_tab, which we initialise +#include "htslib/hts_defs.h" +#undef HTS_DEPRECATED +#define HTS_DEPRECATED(message) + +#include "htslib/sam.h" +#include "htslib/bgzf.h" +#include "cram/cram.h" +#include "hts_internal.h" +#include "sam_internal.h" +#include "htslib/hfile.h" +#include "htslib/hts_endian.h" +#include "htslib/hts_expr.h" +#include "header.h" + +#include "htslib/khash.h" +KHASH_DECLARE(s2i, kh_cstr_t, int64_t) +KHASH_SET_INIT_INT(tag) + +#ifndef EFTYPE +#define EFTYPE ENOEXEC +#endif +#ifndef EOVERFLOW +#define EOVERFLOW ERANGE +#endif + +/********************** + *** BAM header I/O *** + **********************/ + +HTSLIB_EXPORT +const int8_t bam_cigar_table[256] = { + // 0 .. 47 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + + // 48 .. 63 (including =) + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, BAM_CEQUAL, -1, -1, + + // 64 .. 79 (including MIDNHB) + -1, -1, BAM_CBACK, -1, BAM_CDEL, -1, -1, -1, + BAM_CHARD_CLIP, BAM_CINS, -1, -1, -1, BAM_CMATCH, BAM_CREF_SKIP, -1, + + // 80 .. 95 (including SPX) + BAM_CPAD, -1, -1, BAM_CSOFT_CLIP, -1, -1, -1, -1, + BAM_CDIFF, -1, -1, -1, -1, -1, -1, -1, + + // 96 .. 127 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + + // 128 .. 255 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 +}; + +sam_hdr_t *sam_hdr_init(void) +{ + sam_hdr_t *bh = (sam_hdr_t*)calloc(1, sizeof(sam_hdr_t)); + if (bh == NULL) return NULL; + + bh->cigar_tab = bam_cigar_table; + return bh; +} + +void sam_hdr_destroy(sam_hdr_t *bh) +{ + int32_t i; + + if (bh == NULL) return; + + if (bh->ref_count > 0) { + --bh->ref_count; + return; + } + + if (bh->target_name) { + for (i = 0; i < bh->n_targets; ++i) + free(bh->target_name[i]); + free(bh->target_name); + free(bh->target_len); + } + free(bh->text); + if (bh->hrecs) + sam_hrecs_free(bh->hrecs); + if (bh->sdict) + kh_destroy(s2i, (khash_t(s2i) *) bh->sdict); + free(bh); +} + +// Copy the sam_hdr_t::sdict hash, used to store the real lengths of long +// references before sam_hdr_t::hrecs is populated +int sam_hdr_dup_sdict(const sam_hdr_t *h0, sam_hdr_t *h) +{ + const khash_t(s2i) *src_long_refs = (khash_t(s2i) *) h0->sdict; + khash_t(s2i) *dest_long_refs = kh_init(s2i); + int i; + if (!dest_long_refs) return -1; + + for (i = 0; i < h->n_targets; i++) { + int ret; + khiter_t ksrc, kdest; + if (h->target_len[i] < UINT32_MAX) continue; + ksrc = kh_get(s2i, src_long_refs, h->target_name[i]); + if (ksrc == kh_end(src_long_refs)) continue; + kdest = kh_put(s2i, dest_long_refs, h->target_name[i], &ret); + if (ret < 0) { + kh_destroy(s2i, dest_long_refs); + return -1; + } + kh_val(dest_long_refs, kdest) = kh_val(src_long_refs, ksrc); + } + + h->sdict = dest_long_refs; + return 0; +} + +sam_hdr_t *sam_hdr_dup(const sam_hdr_t *h0) +{ + if (h0 == NULL) return NULL; + sam_hdr_t *h; + if ((h = sam_hdr_init()) == NULL) return NULL; + // copy the simple data + h->n_targets = 0; + h->ignore_sam_err = h0->ignore_sam_err; + h->l_text = 0; + + // Then the pointery stuff + + if (!h0->hrecs) { + h->target_len = (uint32_t*)calloc(h0->n_targets, sizeof(uint32_t)); + if (!h->target_len) goto fail; + h->target_name = (char**)calloc(h0->n_targets, sizeof(char*)); + if (!h->target_name) goto fail; + + int i; + for (i = 0; i < h0->n_targets; ++i) { + h->target_len[i] = h0->target_len[i]; + h->target_name[i] = strdup(h0->target_name[i]); + if (!h->target_name[i]) break; + } + h->n_targets = i; + if (i < h0->n_targets) goto fail; + + if (h0->sdict) { + if (sam_hdr_dup_sdict(h0, h) < 0) goto fail; + } + } + + if (h0->hrecs) { + kstring_t tmp = { 0, 0, NULL }; + if (sam_hrecs_rebuild_text(h0->hrecs, &tmp) != 0) { + free(ks_release(&tmp)); + goto fail; + } + + h->l_text = tmp.l; + h->text = ks_release(&tmp); + + if (sam_hdr_update_target_arrays(h, h0->hrecs, 0) != 0) + goto fail; + } else { + h->l_text = h0->l_text; + h->text = malloc(h->l_text + 1); + if (!h->text) goto fail; + memcpy(h->text, h0->text, h->l_text); + h->text[h->l_text] = '\0'; + } + + return h; + + fail: + sam_hdr_destroy(h); + return NULL; +} + +sam_hdr_t *bam_hdr_read(BGZF *fp) +{ + sam_hdr_t *h; + uint8_t buf[4]; + int magic_len, has_EOF; + int32_t i, name_len, num_names = 0; + size_t bufsize; + ssize_t bytes; + // check EOF + has_EOF = bgzf_check_EOF(fp); + if (has_EOF < 0) { + perror("[W::bam_hdr_read] bgzf_check_EOF"); + } else if (has_EOF == 0) { + hts_log_warning("EOF marker is absent. The input is probably truncated"); + } + // read "BAM1" + magic_len = bgzf_read(fp, buf, 4); + if (magic_len != 4 || memcmp(buf, "BAM\1", 4)) { + hts_log_error("Invalid BAM binary header"); + return 0; + } + h = sam_hdr_init(); + if (!h) goto nomem; + + // read plain text and the number of reference sequences + bytes = bgzf_read(fp, buf, 4); + if (bytes != 4) goto read_err; + h->l_text = le_to_u32(buf); + + bufsize = h->l_text + 1; + if (bufsize < h->l_text) goto nomem; // so large that adding 1 overflowed +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (bufsize > FUZZ_ALLOC_LIMIT) goto nomem; +#endif + h->text = (char*)malloc(bufsize); + if (!h->text) goto nomem; + h->text[h->l_text] = 0; // make sure it is NULL terminated + bytes = bgzf_read(fp, h->text, h->l_text); + if (bytes != h->l_text) goto read_err; + + bytes = bgzf_read(fp, &h->n_targets, 4); + if (bytes != 4) goto read_err; + if (fp->is_be) ed_swap_4p(&h->n_targets); + + if (h->n_targets < 0) goto invalid; + + // read reference sequence names and lengths +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (h->n_targets > (FUZZ_ALLOC_LIMIT - bufsize)/(sizeof(char*)+sizeof(uint32_t))) + goto nomem; +#endif + if (h->n_targets > 0) { + h->target_name = (char**)calloc(h->n_targets, sizeof(char*)); + if (!h->target_name) goto nomem; + h->target_len = (uint32_t*)calloc(h->n_targets, sizeof(uint32_t)); + if (!h->target_len) goto nomem; + } + else { + h->target_name = NULL; + h->target_len = NULL; + } + + for (i = 0; i != h->n_targets; ++i) { + bytes = bgzf_read(fp, &name_len, 4); + if (bytes != 4) goto read_err; + if (fp->is_be) ed_swap_4p(&name_len); + if (name_len <= 0) goto invalid; + + h->target_name[i] = (char*)malloc(name_len); + if (!h->target_name[i]) goto nomem; + num_names++; + + bytes = bgzf_read(fp, h->target_name[i], name_len); + if (bytes != name_len) goto read_err; + + if (h->target_name[i][name_len - 1] != '\0') { + /* Fix missing NUL-termination. Is this being too nice? + We could alternatively bail out with an error. */ + char *new_name; + if (name_len == INT32_MAX) goto invalid; + new_name = realloc(h->target_name[i], name_len + 1); + if (new_name == NULL) goto nomem; + h->target_name[i] = new_name; + h->target_name[i][name_len] = '\0'; + } + + bytes = bgzf_read(fp, &h->target_len[i], 4); + if (bytes != 4) goto read_err; + if (fp->is_be) ed_swap_4p(&h->target_len[i]); + } + return h; + + nomem: + hts_log_error("Out of memory"); + goto clean; + + read_err: + if (bytes < 0) { + hts_log_error("Error reading BGZF stream"); + } else { + hts_log_error("Truncated BAM header"); + } + goto clean; + + invalid: + hts_log_error("Invalid BAM binary header"); + + clean: + if (h != NULL) { + h->n_targets = num_names; // ensure we free only allocated target_names + sam_hdr_destroy(h); + } + return NULL; +} + +int bam_hdr_write(BGZF *fp, const sam_hdr_t *h) +{ + int32_t i, name_len, x; + kstring_t hdr_ks = { 0, 0, NULL }; + char *text; + uint32_t l_text; + + if (!h) return -1; + + if (h->hrecs) { + if (sam_hrecs_rebuild_text(h->hrecs, &hdr_ks) != 0) return -1; + if (hdr_ks.l > UINT32_MAX) { + hts_log_error("Header too long for BAM format"); + free(hdr_ks.s); + return -1; + } else if (hdr_ks.l > INT32_MAX) { + hts_log_warning("Header too long for BAM specification (>2GB)"); + hts_log_warning("Output file may not be portable"); + } + text = hdr_ks.s; + l_text = hdr_ks.l; + } else { + if (h->l_text > UINT32_MAX) { + hts_log_error("Header too long for BAM format"); + return -1; + } else if (h->l_text > INT32_MAX) { + hts_log_warning("Header too long for BAM specification (>2GB)"); + hts_log_warning("Output file may not be portable"); + } + text = h->text; + l_text = h->l_text; + } + // write "BAM1" + if (bgzf_write(fp, "BAM\1", 4) < 0) { free(hdr_ks.s); return -1; } + // write plain text and the number of reference sequences + if (fp->is_be) { + x = ed_swap_4(l_text); + if (bgzf_write(fp, &x, 4) < 0) { free(hdr_ks.s); return -1; } + if (l_text) { + if (bgzf_write(fp, text, l_text) < 0) { free(hdr_ks.s); return -1; } + } + x = ed_swap_4(h->n_targets); + if (bgzf_write(fp, &x, 4) < 0) { free(hdr_ks.s); return -1; } + } else { + if (bgzf_write(fp, &l_text, 4) < 0) { free(hdr_ks.s); return -1; } + if (l_text) { + if (bgzf_write(fp, text, l_text) < 0) { free(hdr_ks.s); return -1; } + } + if (bgzf_write(fp, &h->n_targets, 4) < 0) { free(hdr_ks.s); return -1; } + } + free(hdr_ks.s); + // write sequence names and lengths + for (i = 0; i != h->n_targets; ++i) { + char *p = h->target_name[i]; + name_len = strlen(p) + 1; + if (fp->is_be) { + x = ed_swap_4(name_len); + if (bgzf_write(fp, &x, 4) < 0) return -1; + } else { + if (bgzf_write(fp, &name_len, 4) < 0) return -1; + } + if (bgzf_write(fp, p, name_len) < 0) return -1; + if (fp->is_be) { + x = ed_swap_4(h->target_len[i]); + if (bgzf_write(fp, &x, 4) < 0) return -1; + } else { + if (bgzf_write(fp, &h->target_len[i], 4) < 0) return -1; + } + } + if (bgzf_flush(fp) < 0) return -1; + return 0; +} + +const char *sam_parse_region(sam_hdr_t *h, const char *s, int *tid, + hts_pos_t *beg, hts_pos_t *end, int flags) { + return hts_parse_region(s, tid, beg, end, (hts_name2id_f)bam_name2id, h, flags); +} + +/************************* + *** BAM alignment I/O *** + *************************/ + +bam1_t *bam_init1(void) +{ + return (bam1_t*)calloc(1, sizeof(bam1_t)); +} + +int sam_realloc_bam_data(bam1_t *b, size_t desired) +{ + uint32_t new_m_data; + uint8_t *new_data; + new_m_data = desired; + kroundup32(new_m_data); // next power of 2 + new_m_data += 32; // reduces malloc arena migrations? + if (new_m_data < desired) { + errno = ENOMEM; // Not strictly true but we can't store the size + return -1; + } +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (new_m_data > FUZZ_ALLOC_LIMIT) { + errno = ENOMEM; + return -1; + } +#endif + if ((bam_get_mempolicy(b) & BAM_USER_OWNS_DATA) == 0) { + new_data = realloc(b->data, new_m_data); + } else { + if ((new_data = malloc(new_m_data)) != NULL) { + if (b->l_data > 0) + memcpy(new_data, b->data, + b->l_data < b->m_data ? b->l_data : b->m_data); + bam_set_mempolicy(b, bam_get_mempolicy(b) & (~BAM_USER_OWNS_DATA)); + } + } + if (!new_data) return -1; + b->data = new_data; + b->m_data = new_m_data; + return 0; +} + +void bam_destroy1(bam1_t *b) +{ + if (b == 0) return; + if ((bam_get_mempolicy(b) & BAM_USER_OWNS_DATA) == 0) { + free(b->data); + if ((bam_get_mempolicy(b) & BAM_USER_OWNS_STRUCT) != 0) { + // In case of reuse + b->data = NULL; + b->m_data = 0; + b->l_data = 0; + } + } + + if ((bam_get_mempolicy(b) & BAM_USER_OWNS_STRUCT) == 0) + free(b); +} + +bam1_t *bam_copy1(bam1_t *bdst, const bam1_t *bsrc) +{ + if (realloc_bam_data(bdst, bsrc->l_data) < 0) return NULL; + memcpy(bdst->data, bsrc->data, bsrc->l_data); // copy var-len data + memcpy(&bdst->core, &bsrc->core, sizeof(bsrc->core)); // copy the rest + bdst->l_data = bsrc->l_data; + bdst->id = bsrc->id; + return bdst; +} + +bam1_t *bam_dup1(const bam1_t *bsrc) +{ + if (bsrc == NULL) return NULL; + bam1_t *bdst = bam_init1(); + if (bdst == NULL) return NULL; + if (bam_copy1(bdst, bsrc) == NULL) { + bam_destroy1(bdst); + return NULL; + } + return bdst; +} + +static void bam_cigar2rqlens(int n_cigar, const uint32_t *cigar, + hts_pos_t *rlen, hts_pos_t *qlen) +{ + int k; + *rlen = *qlen = 0; + for (k = 0; k < n_cigar; ++k) { + int type = bam_cigar_type(bam_cigar_op(cigar[k])); + int len = bam_cigar_oplen(cigar[k]); + if (type & 1) *qlen += len; + if (type & 2) *rlen += len; + } +} + +static int subtract_check_underflow(size_t length, size_t *limit) +{ + if (length <= *limit) { + *limit -= length; + return 0; + } + + return -1; +} + +int bam_set1(bam1_t *bam, + size_t l_qname, const char *qname, + uint16_t flag, int32_t tid, hts_pos_t pos, uint8_t mapq, + size_t n_cigar, const uint32_t *cigar, + int32_t mtid, hts_pos_t mpos, hts_pos_t isize, + size_t l_seq, const char *seq, const char *qual, + size_t l_aux) +{ + // use a default qname "*" if none is provided + if (l_qname == 0) { + l_qname = 1; + qname = "*"; + } + + // note: the qname is stored nul terminated and padded as described in the + // documentation for the bam1_t struct. + size_t qname_nuls = 4 - l_qname % 4; + + // the aligment length, needed for bam_reg2bin(), is calculated as in bam_endpos(). + // can't use bam_endpos() directly as some fields not yet set up. + hts_pos_t rlen = 0, qlen = 0; + if (!(flag & BAM_FUNMAP)) { + bam_cigar2rqlens((int)n_cigar, cigar, &rlen, &qlen); + } + if (rlen == 0) { + rlen = 1; + } + + // validate parameters + if (l_qname > 254) { + hts_log_error("Query name too long"); + errno = EINVAL; + return -1; + } + if (HTS_POS_MAX - rlen <= pos) { + hts_log_error("Read ends beyond highest supported position"); + errno = EINVAL; + return -1; + } + if (!(flag & BAM_FUNMAP) && l_seq > 0 && n_cigar == 0) { + hts_log_error("Mapped query must have a CIGAR"); + errno = EINVAL; + return -1; + } + if (!(flag & BAM_FUNMAP) && l_seq > 0 && l_seq != qlen) { + hts_log_error("CIGAR and query sequence are of different length"); + errno = EINVAL; + return -1; + } + + size_t limit = INT32_MAX; + int u = subtract_check_underflow(l_qname + qname_nuls, &limit); + u += subtract_check_underflow(n_cigar * 4, &limit); + u += subtract_check_underflow((l_seq + 1) / 2, &limit); + u += subtract_check_underflow(l_seq, &limit); + u += subtract_check_underflow(l_aux, &limit); + if (u != 0) { + hts_log_error("Size overflow"); + errno = EINVAL; + return -1; + } + + // re-allocate the data buffer as needed. + size_t data_len = l_qname + qname_nuls + n_cigar * 4 + (l_seq + 1) / 2 + l_seq; + if (realloc_bam_data(bam, data_len + l_aux) < 0) { + return -1; + } + + bam->l_data = (int)data_len; + bam->core.pos = pos; + bam->core.tid = tid; + bam->core.bin = bam_reg2bin(pos, pos + rlen); + bam->core.qual = mapq; + bam->core.l_extranul = (uint8_t)(qname_nuls - 1); + bam->core.flag = flag; + bam->core.l_qname = (uint16_t)(l_qname + qname_nuls); + bam->core.n_cigar = (uint32_t)n_cigar; + bam->core.l_qseq = (int32_t)l_seq; + bam->core.mtid = mtid; + bam->core.mpos = mpos; + bam->core.isize = isize; + + uint8_t *cp = bam->data; + strncpy((char *)cp, qname, l_qname); + int i; + for (i = 0; i < qname_nuls; i++) { + cp[l_qname + i] = '\0'; + } + cp += l_qname + qname_nuls; + + if (n_cigar > 0) { + memcpy(cp, cigar, n_cigar * 4); + } + cp += n_cigar * 4; + +#define NN 16 + const uint8_t *useq = (uint8_t *)seq; + for (i = 0; i + NN < l_seq; i += NN) { + int j; + const uint8_t *u2 = useq+i; + for (j = 0; j < NN/2; j++) + cp[j] = (seq_nt16_table[u2[j*2]]<<4) | seq_nt16_table[u2[j*2+1]]; + cp += NN/2; + } + for (; i + 1 < l_seq; i += 2) { + *cp++ = (seq_nt16_table[useq[i]] << 4) | seq_nt16_table[useq[i + 1]]; + } + + for (; i < l_seq; i++) { + *cp++ = seq_nt16_table[(unsigned char)seq[i]] << 4; + } + + if (qual) { + memcpy(cp, qual, l_seq); + } + else { + memset(cp, '\xff', l_seq); + } + + return (int)data_len; +} + +hts_pos_t bam_cigar2qlen(int n_cigar, const uint32_t *cigar) +{ + int k; + hts_pos_t l; + for (k = l = 0; k < n_cigar; ++k) + if (bam_cigar_type(bam_cigar_op(cigar[k]))&1) + l += bam_cigar_oplen(cigar[k]); + return l; +} + +hts_pos_t bam_cigar2rlen(int n_cigar, const uint32_t *cigar) +{ + int k; + hts_pos_t l; + for (k = l = 0; k < n_cigar; ++k) + if (bam_cigar_type(bam_cigar_op(cigar[k]))&2) + l += bam_cigar_oplen(cigar[k]); + return l; +} + +hts_pos_t bam_endpos(const bam1_t *b) +{ + hts_pos_t rlen = (b->core.flag & BAM_FUNMAP)? 0 : bam_cigar2rlen(b->core.n_cigar, bam_get_cigar(b)); + if (rlen == 0) rlen = 1; + return b->core.pos + rlen; +} + +static int bam_tag2cigar(bam1_t *b, int recal_bin, int give_warning) // return 0 if CIGAR is untouched; 1 if CIGAR is updated with CG +{ + bam1_core_t *c = &b->core; + + // Bail out as fast as possible for the easy case + uint32_t test_CG = BAM_CSOFT_CLIP | (c->l_qseq << BAM_CIGAR_SHIFT); + if (c->n_cigar == 0 || test_CG != *bam_get_cigar(b)) + return 0; + + // The above isn't fool proof - we may have old CIGAR tags that aren't used, + // but this is much less likely so do as a secondary check. + if (c->tid < 0 || c->pos < 0) + return 0; + + // Do we have a CG tag? + uint8_t *CG = bam_aux_get(b, "CG"); + int saved_errno = errno; + if (!CG) { + if (errno != ENOENT) return -1; // Bad aux data + errno = saved_errno; // restore errno on expected no-CG-tag case + return 0; + } + + // Now we start with the serious work migrating CG to CIGAR + uint32_t cigar_st, n_cigar4, CG_st, CG_en, ori_len = b->l_data, + *cigar0, CG_len, fake_bytes; + cigar0 = bam_get_cigar(b); + fake_bytes = c->n_cigar * 4; + if (CG[0] != 'B' || !(CG[1] == 'I' || CG[1] == 'i')) + return 0; // not of type B,I + CG_len = le_to_u32(CG + 2); + // don't move if the real CIGAR length is shorter than the fake cigar length + if (CG_len < c->n_cigar || CG_len >= 1U<<29) return 0; + + // move from the CG tag to the right position + cigar_st = (uint8_t*)cigar0 - b->data; + c->n_cigar = CG_len; + n_cigar4 = c->n_cigar * 4; + CG_st = CG - b->data - 2; + CG_en = CG_st + 8 + n_cigar4; + if (possibly_expand_bam_data(b, n_cigar4 - fake_bytes) < 0) return -1; + // we need c->n_cigar-fake_bytes bytes to swap CIGAR to the right place + b->l_data = b->l_data - fake_bytes + n_cigar4; + // insert c->n_cigar-fake_bytes empty space to make room + memmove(b->data + cigar_st + n_cigar4, b->data + cigar_st + fake_bytes, ori_len - (cigar_st + fake_bytes)); + // copy the real CIGAR to the right place; -fake_bytes for the fake CIGAR + memcpy(b->data + cigar_st, b->data + (n_cigar4 - fake_bytes) + CG_st + 8, n_cigar4); + if (ori_len > CG_en) // move data after the CG tag + memmove(b->data + CG_st + n_cigar4 - fake_bytes, b->data + CG_en + n_cigar4 - fake_bytes, ori_len - CG_en); + b->l_data -= n_cigar4 + 8; // 8: CGBI (4 bytes) and CGBI length (4) + if (recal_bin) + b->core.bin = hts_reg2bin(b->core.pos, bam_endpos(b), 14, 5); + if (give_warning) + hts_log_warning("%s encodes a CIGAR with %d operators at the CG tag", bam_get_qname(b), c->n_cigar); + return 1; +} + +static inline int aux_type2size(uint8_t type) +{ + switch (type) { + case 'A': case 'c': case 'C': + return 1; + case 's': case 'S': + return 2; + case 'i': case 'I': case 'f': + return 4; + case 'd': + return 8; + case 'Z': case 'H': case 'B': + return type; + default: + return 0; + } +} + +static void swap_data(const bam1_core_t *c, int l_data, uint8_t *data, int is_host) +{ + uint32_t *cigar = (uint32_t*)(data + c->l_qname); + uint32_t i; + for (i = 0; i < c->n_cigar; ++i) ed_swap_4p(&cigar[i]); +} + +// Fix bad records where qname is not terminated correctly. +static int fixup_missing_qname_nul(bam1_t *b) { + bam1_core_t *c = &b->core; + + // Note this is called before c->l_extranul is added to c->l_qname + if (c->l_extranul > 0) { + b->data[c->l_qname++] = '\0'; + c->l_extranul--; + } else { + if (b->l_data > INT_MAX - 4) return -1; + if (realloc_bam_data(b, b->l_data + 4) < 0) return -1; + b->l_data += 4; + b->data[c->l_qname++] = '\0'; + c->l_extranul = 3; + } + return 0; +} + +/* + * Note a second interface that returns a bam pointer instead would avoid bam_copy1 + * in multi-threaded handling. This may be worth considering for htslib2. + */ +int bam_read1(BGZF *fp, bam1_t *b) +{ + bam1_core_t *c = &b->core; + int32_t block_len, ret, i; + uint32_t new_l_data; + uint8_t tmp[32], *x; + + b->l_data = 0; + + if ((ret = bgzf_read_small(fp, &block_len, 4)) != 4) { + if (ret == 0) return -1; // normal end-of-file + else return -2; // truncated + } + if (fp->is_be) + ed_swap_4p(&block_len); + if (block_len < 32) return -4; // block_len includes core data + if (fp->block_length - fp->block_offset > 32) { + // Avoid bgzf_read and a temporary copy to a local buffer + x = (uint8_t *)fp->uncompressed_block + fp->block_offset; + fp->block_offset += 32; + } else { + x = tmp; + if (bgzf_read(fp, x, 32) != 32) return -3; + } + + c->tid = le_to_u32(x); + c->pos = le_to_i32(x+4); + uint32_t x2 = le_to_u32(x+8); + c->bin = x2>>16; + c->qual = x2>>8&0xff; + c->l_qname = x2&0xff; + c->l_extranul = (c->l_qname%4 != 0)? (4 - c->l_qname%4) : 0; + uint32_t x3 = le_to_u32(x+12); + c->flag = x3>>16; + c->n_cigar = x3&0xffff; + c->l_qseq = le_to_u32(x+16); + c->mtid = le_to_u32(x+20); + c->mpos = le_to_i32(x+24); + c->isize = le_to_i32(x+28); + + new_l_data = block_len - 32 + c->l_extranul; + if (new_l_data > INT_MAX || c->l_qseq < 0 || c->l_qname < 1) return -4; + if (((uint64_t) c->n_cigar << 2) + c->l_qname + c->l_extranul + + (((uint64_t) c->l_qseq + 1) >> 1) + c->l_qseq > (uint64_t) new_l_data) + return -4; + if (realloc_bam_data(b, new_l_data) < 0) return -4; + b->l_data = new_l_data; + + if (bgzf_read_small(fp, b->data, c->l_qname) != c->l_qname) return -4; + if (b->data[c->l_qname - 1] != '\0') { // try to fix missing nul termination + if (fixup_missing_qname_nul(b) < 0) return -4; + } + for (i = 0; i < c->l_extranul; ++i) b->data[c->l_qname+i] = '\0'; + c->l_qname += c->l_extranul; + if (b->l_data < c->l_qname || + bgzf_read_small(fp, b->data + c->l_qname, b->l_data - c->l_qname) != b->l_data - c->l_qname) + return -4; + if (fp->is_be) swap_data(c, b->l_data, b->data, 0); + if (bam_tag2cigar(b, 0, 0) < 0) + return -4; + + // TODO: consider making this conditional + if (c->n_cigar > 0) { // recompute "bin" and check CIGAR-qlen consistency + hts_pos_t rlen, qlen; + bam_cigar2rqlens(c->n_cigar, bam_get_cigar(b), &rlen, &qlen); + if ((b->core.flag & BAM_FUNMAP) || rlen == 0) rlen = 1; + b->core.bin = hts_reg2bin(b->core.pos, b->core.pos + rlen, 14, 5); + // Sanity check for broken CIGAR alignments + if (c->l_qseq > 0 && !(c->flag & BAM_FUNMAP) && qlen != c->l_qseq) { + hts_log_error("CIGAR and query sequence lengths differ for %s", + bam_get_qname(b)); + return -4; + } + } + + return 4 + block_len; +} + +int bam_write1(BGZF *fp, const bam1_t *b) +{ + const bam1_core_t *c = &b->core; + uint32_t x[8], block_len = b->l_data - c->l_extranul + 32, y; + int i, ok; + if (c->l_qname - c->l_extranul > 255) { + hts_log_error("QNAME \"%s\" is longer than 254 characters", bam_get_qname(b)); + errno = EOVERFLOW; + return -1; + } + if (c->n_cigar > 0xffff) block_len += 16; // "16" for "CGBI", 4-byte tag length and 8-byte fake CIGAR + if (c->pos > INT_MAX || + c->mpos > INT_MAX || + c->isize < INT_MIN || c->isize > INT_MAX) { + hts_log_error("Positional data is too large for BAM format"); + return -1; + } + x[0] = c->tid; + x[1] = c->pos; + x[2] = (uint32_t)c->bin<<16 | c->qual<<8 | (c->l_qname - c->l_extranul); + if (c->n_cigar > 0xffff) x[3] = (uint32_t)c->flag << 16 | 2; + else x[3] = (uint32_t)c->flag << 16 | (c->n_cigar & 0xffff); + x[4] = c->l_qseq; + x[5] = c->mtid; + x[6] = c->mpos; + x[7] = c->isize; + ok = (bgzf_flush_try(fp, 4 + block_len) >= 0); + if (fp->is_be) { + for (i = 0; i < 8; ++i) ed_swap_4p(x + i); + y = block_len; + if (ok) ok = (bgzf_write_small(fp, ed_swap_4p(&y), 4) >= 0); + swap_data(c, b->l_data, b->data, 1); + } else { + if (ok) ok = (bgzf_write_small(fp, &block_len, 4) >= 0); + } + if (ok) ok = (bgzf_write_small(fp, x, 32) >= 0); + if (ok) ok = (bgzf_write_small(fp, b->data, c->l_qname - c->l_extranul) >= 0); + if (c->n_cigar <= 0xffff) { // no long CIGAR; write normally + if (ok) ok = (bgzf_write_small(fp, b->data + c->l_qname, b->l_data - c->l_qname) >= 0); + } else { // with long CIGAR, insert a fake CIGAR record and move the real CIGAR to the CG:B,I tag + uint8_t buf[8]; + uint32_t cigar_st, cigar_en, cigar[2]; + hts_pos_t cigreflen = bam_cigar2rlen(c->n_cigar, bam_get_cigar(b)); + if (cigreflen >= (1<<28)) { + // Length of reference covered is greater than the biggest + // CIGAR operation currently allowed. + hts_log_error("Record %s with %d CIGAR ops and ref length %"PRIhts_pos + " cannot be written in BAM. Try writing SAM or CRAM instead.\n", + bam_get_qname(b), c->n_cigar, cigreflen); + return -1; + } + cigar_st = (uint8_t*)bam_get_cigar(b) - b->data; + cigar_en = cigar_st + c->n_cigar * 4; + cigar[0] = (uint32_t)c->l_qseq << 4 | BAM_CSOFT_CLIP; + cigar[1] = (uint32_t)cigreflen << 4 | BAM_CREF_SKIP; + u32_to_le(cigar[0], buf); + u32_to_le(cigar[1], buf + 4); + if (ok) ok = (bgzf_write_small(fp, buf, 8) >= 0); // write cigar: SN + if (ok) ok = (bgzf_write_small(fp, &b->data[cigar_en], b->l_data - cigar_en) >= 0); // write data after CIGAR + if (ok) ok = (bgzf_write_small(fp, "CGBI", 4) >= 0); // write CG:B,I + u32_to_le(c->n_cigar, buf); + if (ok) ok = (bgzf_write_small(fp, buf, 4) >= 0); // write the true CIGAR length + if (ok) ok = (bgzf_write_small(fp, &b->data[cigar_st], c->n_cigar * 4) >= 0); // write the real CIGAR + } + if (fp->is_be) swap_data(c, b->l_data, b->data, 0); + return ok? 4 + block_len : -1; +} + +/* + * Write a BAM file and append to the in-memory index simultaneously. + */ +static int bam_write_idx1(htsFile *fp, const sam_hdr_t *h, const bam1_t *b) { + BGZF *bfp = fp->fp.bgzf; + + if (!fp->idx) + return bam_write1(bfp, b); + + uint32_t block_len = b->l_data - b->core.l_extranul + 32; + if (bgzf_flush_try(bfp, 4 + block_len) < 0) + return -1; + if (!bfp->mt) + hts_idx_amend_last(fp->idx, bgzf_tell(bfp)); + + int ret = bam_write1(bfp, b); + if (ret < 0) + return -1; + + if (bgzf_idx_push(bfp, fp->idx, b->core.tid, b->core.pos, bam_endpos(b), bgzf_tell(bfp), !(b->core.flag&BAM_FUNMAP)) < 0) { + hts_log_error("Read '%s' with ref_name='%s', ref_length=%"PRIhts_pos", flags=%d, pos=%"PRIhts_pos" cannot be indexed", + bam_get_qname(b), sam_hdr_tid2name(h, b->core.tid), sam_hdr_tid2len(h, b->core.tid), b->core.flag, b->core.pos+1); + ret = -1; + } + + return ret; +} + +/* + * Set the qname in a BAM record + */ +int bam_set_qname(bam1_t *rec, const char *qname) +{ + if (!rec) return -1; + if (!qname || !*qname) return -1; + + size_t old_len = rec->core.l_qname; + size_t new_len = strlen(qname) + 1; + if (new_len < 1 || new_len > 255) return -1; + + int extranul = (new_len%4 != 0) ? (4 - new_len%4) : 0; + + size_t new_data_len = rec->l_data - old_len + new_len + extranul; + if (realloc_bam_data(rec, new_data_len) < 0) return -1; + + // Make room + if (new_len + extranul != rec->core.l_qname) + memmove(rec->data + new_len + extranul, rec->data + rec->core.l_qname, rec->l_data - rec->core.l_qname); + // Copy in new name and pad if needed + memcpy(rec->data, qname, new_len); + int n; + for (n = 0; n < extranul; n++) rec->data[new_len + n] = '\0'; + + rec->l_data = new_data_len; + rec->core.l_qname = new_len + extranul; + rec->core.l_extranul = extranul; + + return 0; +} + +/******************** + *** BAM indexing *** + ********************/ + +static hts_idx_t *sam_index(htsFile *fp, int min_shift) +{ + int n_lvls, i, fmt, ret; + bam1_t *b; + hts_idx_t *idx; + sam_hdr_t *h; + h = sam_hdr_read(fp); + if (h == NULL) return NULL; + if (min_shift > 0) { + hts_pos_t max_len = 0, s; + for (i = 0; i < h->n_targets; ++i) { + hts_pos_t len = sam_hdr_tid2len(h, i); + if (max_len < len) max_len = len; + } + max_len += 256; + for (n_lvls = 0, s = 1< s; ++n_lvls, s <<= 3); + fmt = HTS_FMT_CSI; + } else min_shift = 14, n_lvls = 5, fmt = HTS_FMT_BAI; + idx = hts_idx_init(h->n_targets, fmt, bgzf_tell(fp->fp.bgzf), min_shift, n_lvls); + b = bam_init1(); + while ((ret = sam_read1(fp, h, b)) >= 0) { + ret = hts_idx_push(idx, b->core.tid, b->core.pos, bam_endpos(b), bgzf_tell(fp->fp.bgzf), !(b->core.flag&BAM_FUNMAP)); + if (ret < 0) { // unsorted or doesn't fit + hts_log_error("Read '%s' with ref_name='%s', ref_length=%"PRIhts_pos", flags=%d, pos=%"PRIhts_pos" cannot be indexed", bam_get_qname(b), sam_hdr_tid2name(h, b->core.tid), sam_hdr_tid2len(h, b->core.tid), b->core.flag, b->core.pos+1); + goto err; + } + } + if (ret < -1) goto err; // corrupted BAM file + + hts_idx_finish(idx, bgzf_tell(fp->fp.bgzf)); + sam_hdr_destroy(h); + bam_destroy1(b); + return idx; + +err: + bam_destroy1(b); + hts_idx_destroy(idx); + return NULL; +} + +int sam_index_build3(const char *fn, const char *fnidx, int min_shift, int nthreads) +{ + hts_idx_t *idx; + htsFile *fp; + int ret = 0; + + if ((fp = hts_open(fn, "r")) == 0) return -2; + if (nthreads) + hts_set_threads(fp, nthreads); + + switch (fp->format.format) { + case cram: + + ret = cram_index_build(fp->fp.cram, fn, fnidx); + break; + + case bam: + case sam: + if (fp->format.compression != bgzf) { + hts_log_error("%s file \"%s\" not BGZF compressed", + fp->format.format == bam ? "BAM" : "SAM", fn); + ret = -1; + break; + } + idx = sam_index(fp, min_shift); + if (idx) { + ret = hts_idx_save_as(idx, fn, fnidx, (min_shift > 0)? HTS_FMT_CSI : HTS_FMT_BAI); + if (ret < 0) ret = -4; + hts_idx_destroy(idx); + } + else ret = -1; + break; + + default: + ret = -3; + break; + } + hts_close(fp); + + return ret; +} + +int sam_index_build2(const char *fn, const char *fnidx, int min_shift) +{ + return sam_index_build3(fn, fnidx, min_shift, 0); +} + +int sam_index_build(const char *fn, int min_shift) +{ + return sam_index_build3(fn, NULL, min_shift, 0); +} + +// Provide bam_index_build() symbol for binary compatibility with earlier HTSlib +#undef bam_index_build +int bam_index_build(const char *fn, int min_shift) +{ + return sam_index_build2(fn, NULL, min_shift); +} + +// Initialise fp->idx for the current format type. +// This must be called after the header has been written but no other data. +int sam_idx_init(htsFile *fp, sam_hdr_t *h, int min_shift, const char *fnidx) { + fp->fnidx = fnidx; + if (fp->format.format == bam || fp->format.format == bcf || + (fp->format.format == sam && fp->format.compression == bgzf)) { + int n_lvls, fmt = HTS_FMT_CSI; + if (min_shift > 0) { + int64_t max_len = 0, s; + int i; + for (i = 0; i < h->n_targets; ++i) + if (max_len < h->target_len[i]) max_len = h->target_len[i]; + max_len += 256; + for (n_lvls = 0, s = 1< s; ++n_lvls, s <<= 3); + + } else min_shift = 14, n_lvls = 5, fmt = HTS_FMT_BAI; + + fp->idx = hts_idx_init(h->n_targets, fmt, bgzf_tell(fp->fp.bgzf), min_shift, n_lvls); + return fp->idx ? 0 : -1; + } + + if (fp->format.format == cram) { + fp->fp.cram->idxfp = bgzf_open(fnidx, "wg"); + return fp->fp.cram->idxfp ? 0 : -1; + } + + return -1; +} + +// Finishes an index. Call after the last record has been written. +// Returns 0 on success, <0 on failure. +int sam_idx_save(htsFile *fp) { + if (fp->format.format == bam || fp->format.format == bcf || + fp->format.format == vcf || fp->format.format == sam) { + int ret; + if ((ret = sam_state_destroy(fp)) < 0) { + errno = -ret; + return -1; + } + if (!fp->is_bgzf || bgzf_flush(fp->fp.bgzf) < 0) + return -1; + hts_idx_amend_last(fp->idx, bgzf_tell(fp->fp.bgzf)); + + if (hts_idx_finish(fp->idx, bgzf_tell(fp->fp.bgzf)) < 0) + return -1; + + return hts_idx_save_but_not_close(fp->idx, fp->fnidx, hts_idx_fmt(fp->idx)); + + } else if (fp->format.format == cram) { + // flushed and closed by cram_close + } + + return 0; +} + +static int sam_readrec(BGZF *ignored, void *fpv, void *bv, int *tid, hts_pos_t *beg, hts_pos_t *end) +{ + htsFile *fp = (htsFile *)fpv; + bam1_t *b = bv; + fp->line.l = 0; + int ret = sam_read1(fp, fp->bam_header, b); + if (ret >= 0) { + *tid = b->core.tid; + *beg = b->core.pos; + *end = bam_endpos(b); + } + return ret; +} + +// This is used only with read_rest=1 iterators, so need not set tid/beg/end. +static int sam_readrec_rest(BGZF *ignored, void *fpv, void *bv, int *tid, hts_pos_t *beg, hts_pos_t *end) +{ + htsFile *fp = (htsFile *)fpv; + bam1_t *b = bv; + fp->line.l = 0; + int ret = sam_read1(fp, fp->bam_header, b); + return ret; +} + +// Internal (for now) func used by bam_sym_lookup. This is copied from +// samtools/bam.c. +static const char *bam_get_library(const bam_hdr_t *h, const bam1_t *b) +{ + const char *rg; + kstring_t lib = { 0, 0, NULL }; + rg = (char *)bam_aux_get(b, "RG"); + + if (!rg) + return NULL; + else + rg++; + + if (sam_hdr_find_tag_id((bam_hdr_t *)h, "RG", "ID", rg, "LB", &lib) < 0) + return NULL; + + static char LB_text[1024]; + int len = lib.l < sizeof(LB_text) - 1 ? lib.l : sizeof(LB_text) - 1; + + memcpy(LB_text, lib.s, len); + LB_text[len] = 0; + + free(lib.s); + + return LB_text; +} + + +// Bam record pointer and SAM header combined +typedef struct { + const sam_hdr_t *h; + const bam1_t *b; +} hb_pair; + +// Looks up variable names in str and replaces them with their value. +// Also supports aux tags. +// +// Note the expression parser deliberately overallocates str size so it +// is safe to use memcmp over strcmp. +static int bam_sym_lookup(void *data, char *str, char **end, + hts_expr_val_t *res) { + hb_pair *hb = (hb_pair *)data; + const bam1_t *b = hb->b; + + res->is_str = 0; + switch(*str) { + case 'c': + if (memcmp(str, "cigar", 5) == 0) { + *end = str+5; + res->is_str = 1; + ks_clear(&res->s); + uint32_t *cigar = bam_get_cigar(b); + int i, n = b->core.n_cigar, r = 0; + if (n) { + for (i = 0; i < n; i++) { + r |= kputw (bam_cigar_oplen(cigar[i]), &res->s) < 0; + r |= kputc_(bam_cigar_opchr(cigar[i]), &res->s) < 0; + } + r |= kputs("", &res->s) < 0; + } else { + r |= kputs("*", &res->s) < 0; + } + return r ? -1 : 0; + } + break; + + case 'e': + if (memcmp(str, "endpos", 6) == 0) { + *end = str+6; + res->d = bam_endpos(b); + return 0; + } + break; + + case 'f': + if (memcmp(str, "flag", 4) == 0) { + str = *end = str+4; + if (*str != '.') { + res->d = b->core.flag; + return 0; + } else { + str++; + if (!memcmp(str, "paired", 6)) { + *end = str+6; + res->d = b->core.flag & BAM_FPAIRED; + return 0; + } else if (!memcmp(str, "proper_pair", 11)) { + *end = str+11; + res->d = b->core.flag & BAM_FPROPER_PAIR; + return 0; + } else if (!memcmp(str, "unmap", 5)) { + *end = str+5; + res->d = b->core.flag & BAM_FUNMAP; + return 0; + } else if (!memcmp(str, "munmap", 6)) { + *end = str+6; + res->d = b->core.flag & BAM_FMUNMAP; + return 0; + } else if (!memcmp(str, "reverse", 7)) { + *end = str+7; + res->d = b->core.flag & BAM_FREVERSE; + return 0; + } else if (!memcmp(str, "mreverse", 8)) { + *end = str+8; + res->d = b->core.flag & BAM_FMREVERSE; + return 0; + } else if (!memcmp(str, "read1", 5)) { + *end = str+5; + res->d = b->core.flag & BAM_FREAD1; + return 0; + } else if (!memcmp(str, "read2", 5)) { + *end = str+5; + res->d = b->core.flag & BAM_FREAD2; + return 0; + } else if (!memcmp(str, "secondary", 9)) { + *end = str+9; + res->d = b->core.flag & BAM_FSECONDARY; + return 0; + } else if (!memcmp(str, "qcfail", 6)) { + *end = str+6; + res->d = b->core.flag & BAM_FQCFAIL; + return 0; + } else if (!memcmp(str, "dup", 3)) { + *end = str+3; + res->d = b->core.flag & BAM_FDUP; + return 0; + } else if (!memcmp(str, "supplementary", 13)) { + *end = str+13; + res->d = b->core.flag & BAM_FSUPPLEMENTARY; + return 0; + } else { + hts_log_error("Unrecognised flag string"); + return -1; + } + } + } + break; + + case 'h': + if (memcmp(str, "hclen", 5) == 0) { + int hclen = 0; + uint32_t *cigar = bam_get_cigar(b); + uint32_t ncigar = b->core.n_cigar; + + // left + if (ncigar > 0 && bam_cigar_op(cigar[0]) == BAM_CHARD_CLIP) + hclen = bam_cigar_oplen(cigar[0]); + + // right + if (ncigar > 1 && bam_cigar_op(cigar[ncigar-1]) == BAM_CHARD_CLIP) + hclen += bam_cigar_oplen(cigar[ncigar-1]); + + *end = str+5; + res->d = hclen; + return 0; + } + break; + + case 'l': + if (memcmp(str, "library", 7) == 0) { + *end = str+7; + res->is_str = 1; + const char *lib = bam_get_library(hb->h, b); + kputs(lib ? lib : "", ks_clear(&res->s)); + return 0; + } + break; + + case 'm': + if (memcmp(str, "mapq", 4) == 0) { + *end = str+4; + res->d = b->core.qual; + return 0; + } else if (memcmp(str, "mpos", 4) == 0) { + *end = str+4; + res->d = b->core.mpos+1; + return 0; + } else if (memcmp(str, "mrname", 6) == 0) { + *end = str+6; + res->is_str = 1; + const char *rn = sam_hdr_tid2name(hb->h, b->core.mtid); + kputs(rn ? rn : "*", ks_clear(&res->s)); + return 0; + } else if (memcmp(str, "mrefid", 6) == 0) { + *end = str+6; + res->d = b->core.mtid; + return 0; + } + break; + + case 'n': + if (memcmp(str, "ncigar", 6) == 0) { + *end = str+6; + res->d = b->core.n_cigar; + return 0; + } + break; + + case 'p': + if (memcmp(str, "pos", 3) == 0) { + *end = str+3; + res->d = b->core.pos+1; + return 0; + } else if (memcmp(str, "pnext", 5) == 0) { + *end = str+5; + res->d = b->core.mpos+1; + return 0; + } + break; + + case 'q': + if (memcmp(str, "qlen", 4) == 0) { + *end = str+4; + res->d = bam_cigar2qlen(b->core.n_cigar, bam_get_cigar(b)); + return 0; + } else if (memcmp(str, "qname", 5) == 0) { + *end = str+5; + res->is_str = 1; + kputs(bam_get_qname(b), ks_clear(&res->s)); + return 0; + } else if (memcmp(str, "qual", 4) == 0) { + *end = str+4; + ks_clear(&res->s); + if (ks_resize(&res->s, b->core.l_qseq+1) < 0) + return -1; + memcpy(res->s.s, bam_get_qual(b), b->core.l_qseq); + res->s.l = b->core.l_qseq; + res->is_str = 1; + return 0; + } + break; + + case 'r': + if (memcmp(str, "rlen", 4) == 0) { + *end = str+4; + res->d = bam_cigar2rlen(b->core.n_cigar, bam_get_cigar(b)); + return 0; + } else if (memcmp(str, "rname", 5) == 0) { + *end = str+5; + res->is_str = 1; + const char *rn = sam_hdr_tid2name(hb->h, b->core.tid); + kputs(rn ? rn : "*", ks_clear(&res->s)); + return 0; + } else if (memcmp(str, "rnext", 5) == 0) { + *end = str+5; + res->is_str = 1; + const char *rn = sam_hdr_tid2name(hb->h, b->core.mtid); + kputs(rn ? rn : "*", ks_clear(&res->s)); + return 0; + } else if (memcmp(str, "refid", 5) == 0) { + *end = str+5; + res->d = b->core.tid; + return 0; + } + break; + + case 's': + if (memcmp(str, "seq", 3) == 0) { + *end = str+3; + ks_clear(&res->s); + if (ks_resize(&res->s, b->core.l_qseq+1) < 0) + return -1; + nibble2base(bam_get_seq(b), res->s.s, b->core.l_qseq); + res->s.s[b->core.l_qseq] = 0; + res->s.l = b->core.l_qseq; + res->is_str = 1; + return 0; + } else if (memcmp(str, "sclen", 5) == 0) { + int sclen = 0; + uint32_t *cigar = bam_get_cigar(b); + int ncigar = b->core.n_cigar; + int left = 0; + + // left + if (ncigar > 0 + && bam_cigar_op(cigar[0]) == BAM_CSOFT_CLIP) + left = 0, sclen += bam_cigar_oplen(cigar[0]); + else if (ncigar > 1 + && bam_cigar_op(cigar[0]) == BAM_CHARD_CLIP + && bam_cigar_op(cigar[1]) == BAM_CSOFT_CLIP) + left = 1, sclen += bam_cigar_oplen(cigar[1]); + + // right + if (ncigar-1 > left + && bam_cigar_op(cigar[ncigar-1]) == BAM_CSOFT_CLIP) + sclen += bam_cigar_oplen(cigar[ncigar-1]); + else if (ncigar-2 > left + && bam_cigar_op(cigar[ncigar-1]) == BAM_CHARD_CLIP + && bam_cigar_op(cigar[ncigar-2]) == BAM_CSOFT_CLIP) + sclen += bam_cigar_oplen(cigar[ncigar-2]); + + *end = str+5; + res->d = sclen; + return 0; + } + break; + + case 't': + if (memcmp(str, "tlen", 4) == 0) { + *end = str+4; + res->d = b->core.isize; + return 0; + } + break; + + case '[': + if (*str == '[' && str[1] && str[2] && str[3] == ']') { + /* aux tags */ + *end = str+4; + + uint8_t *aux = bam_aux_get(b, str+1); + if (aux) { + // we define the truth of a tag to be its presence, even if 0. + res->is_true = 1; + switch (*aux) { + case 'Z': + case 'H': + res->is_str = 1; + kputs((char *)aux+1, ks_clear(&res->s)); + break; + + case 'A': + res->is_str = 1; + kputsn((char *)aux+1, 1, ks_clear(&res->s)); + break; + + case 'i': case 'I': + case 's': case 'S': + case 'c': case 'C': + res->is_str = 0; + res->d = bam_aux2i(aux); + break; + + case 'f': + case 'd': + res->is_str = 0; + res->d = bam_aux2f(aux); + break; + + default: + hts_log_error("Aux type '%c not yet supported by filters", + *aux); + return -1; + } + return 0; + + } else { + // hence absent tags are always false (and strings) + res->is_str = 1; + res->s.l = 0; + res->d = 0; + res->is_true = 0; + return 0; + } + } + break; + } + + // All successful matches in switch should return 0. + // So if we didn't match, it's a parse error. + return -1; +} + +// Returns 1 when accepted by the filter, 0 if not, -1 on error. +int sam_passes_filter(const sam_hdr_t *h, const bam1_t *b, hts_filter_t *filt) +{ + hb_pair hb = {h, b}; + hts_expr_val_t res = HTS_EXPR_VAL_INIT; + if (hts_filter_eval2(filt, &hb, bam_sym_lookup, &res)) { + hts_log_error("Couldn't process filter expression"); + hts_expr_val_free(&res); + return -1; + } + + int t = res.is_true; + hts_expr_val_free(&res); + + return t; +} + +static int cram_readrec(BGZF *ignored, void *fpv, void *bv, int *tid, hts_pos_t *beg, hts_pos_t *end) +{ + htsFile *fp = fpv; + bam1_t *b = bv; + int pass_filter, ret; + + do { + ret = cram_get_bam_seq(fp->fp.cram, &b); + if (ret < 0) + return cram_eof(fp->fp.cram) ? -1 : -2; + + if (bam_tag2cigar(b, 1, 1) < 0) + return -2; + + *tid = b->core.tid; + *beg = b->core.pos; + *end = bam_endpos(b); + + if (fp->filter) { + pass_filter = sam_passes_filter(fp->bam_header, b, fp->filter); + if (pass_filter < 0) + return -2; + } else { + pass_filter = 1; + } + } while (pass_filter == 0); + + return ret; +} + +static int cram_pseek(void *fp, int64_t offset, int whence) +{ + cram_fd *fd = (cram_fd *)fp; + + if ((0 != cram_seek(fd, offset, SEEK_SET)) + && (0 != cram_seek(fd, offset - fd->first_container, SEEK_CUR))) + return -1; + + fd->curr_position = offset; + + if (fd->ctr) { + cram_free_container(fd->ctr); + if (fd->ctr_mt && fd->ctr_mt != fd->ctr) + cram_free_container(fd->ctr_mt); + + fd->ctr = NULL; + fd->ctr_mt = NULL; + fd->ooc = 0; + } + + return 0; +} + +/* + * cram_ptell is a pseudo-tell function, because it matches the position of the disk cursor only + * after a fresh seek call. Otherwise it indicates that the read takes place inside the buffered + * container previously fetched. It was designed like this to integrate with the functionality + * of the iterator stepping logic. + */ + +static int64_t cram_ptell(void *fp) +{ + cram_fd *fd = (cram_fd *)fp; + cram_container *c; + cram_slice *s; + int64_t ret = -1L; + + if (fd) { + if ((c = fd->ctr) != NULL) { + if ((s = c->slice) != NULL && s->max_rec) { + if ((c->curr_slice + s->curr_rec/s->max_rec) >= (c->max_slice + 1)) + fd->curr_position += c->offset + c->length; + } + } + ret = fd->curr_position; + } + + return ret; +} + +static int bam_pseek(void *fp, int64_t offset, int whence) +{ + BGZF *fd = (BGZF *)fp; + + return bgzf_seek(fd, offset, whence); +} + +static int64_t bam_ptell(void *fp) +{ + BGZF *fd = (BGZF *)fp; + if (!fd) + return -1L; + + return bgzf_tell(fd); +} + + + +static hts_idx_t *index_load(htsFile *fp, const char *fn, const char *fnidx, int flags) +{ + switch (fp->format.format) { + case bam: + case sam: + return hts_idx_load3(fn, fnidx, HTS_FMT_BAI, flags); + + case cram: { + if (cram_index_load(fp->fp.cram, fn, fnidx) < 0) return NULL; + + // Cons up a fake "index" just pointing at the associated cram_fd: + hts_cram_idx_t *idx = malloc(sizeof (hts_cram_idx_t)); + if (idx == NULL) return NULL; + idx->fmt = HTS_FMT_CRAI; + idx->cram = fp->fp.cram; + return (hts_idx_t *) idx; + } + + default: + return NULL; // TODO Would use tbx_index_load if it returned hts_idx_t + } +} + +hts_idx_t *sam_index_load3(htsFile *fp, const char *fn, const char *fnidx, int flags) +{ + return index_load(fp, fn, fnidx, flags); +} + +hts_idx_t *sam_index_load2(htsFile *fp, const char *fn, const char *fnidx) { + return index_load(fp, fn, fnidx, HTS_IDX_SAVE_REMOTE); +} + +hts_idx_t *sam_index_load(htsFile *fp, const char *fn) +{ + return index_load(fp, fn, NULL, HTS_IDX_SAVE_REMOTE); +} + +static hts_itr_t *cram_itr_query(const hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end, hts_readrec_func *readrec) +{ + const hts_cram_idx_t *cidx = (const hts_cram_idx_t *) idx; + hts_itr_t *iter = (hts_itr_t *) calloc(1, sizeof(hts_itr_t)); + if (iter == NULL) return NULL; + + // Cons up a dummy iterator for which hts_itr_next() will simply invoke + // the readrec function: + iter->is_cram = 1; + iter->read_rest = 1; + iter->off = NULL; + iter->bins.a = NULL; + iter->readrec = readrec; + + if (tid >= 0 || tid == HTS_IDX_NOCOOR || tid == HTS_IDX_START) { + cram_range r = { tid, beg+1, end }; + int ret = cram_set_option(cidx->cram, CRAM_OPT_RANGE, &r); + + iter->curr_off = 0; + // The following fields are not required by hts_itr_next(), but are + // filled in in case user code wants to look at them. + iter->tid = tid; + iter->beg = beg; + iter->end = end; + + switch (ret) { + case 0: + break; + + case -2: + // No data vs this ref, so mark iterator as completed. + // Same as HTS_IDX_NONE. + iter->finished = 1; + break; + + default: + free(iter); + return NULL; + } + } + else switch (tid) { + case HTS_IDX_REST: + iter->curr_off = 0; + break; + case HTS_IDX_NONE: + iter->curr_off = 0; + iter->finished = 1; + break; + default: + hts_log_error("Query with tid=%d not implemented for CRAM files", tid); + abort(); + break; + } + + return iter; +} + +hts_itr_t *sam_itr_queryi(const hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end) +{ + const hts_cram_idx_t *cidx = (const hts_cram_idx_t *) idx; + if (idx == NULL) + return hts_itr_query(NULL, tid, beg, end, sam_readrec_rest); + else if (cidx->fmt == HTS_FMT_CRAI) + return cram_itr_query(idx, tid, beg, end, sam_readrec); + else + return hts_itr_query(idx, tid, beg, end, sam_readrec); +} + +static int cram_name2id(void *fdv, const char *ref) +{ + cram_fd *fd = (cram_fd *) fdv; + return sam_hdr_name2tid(fd->header, ref); +} + +hts_itr_t *sam_itr_querys(const hts_idx_t *idx, sam_hdr_t *hdr, const char *region) +{ + const hts_cram_idx_t *cidx = (const hts_cram_idx_t *) idx; + return hts_itr_querys(idx, region, (hts_name2id_f)(bam_name2id), hdr, + cidx->fmt == HTS_FMT_CRAI ? cram_itr_query : hts_itr_query, + sam_readrec); +} + +hts_itr_t *sam_itr_regarray(const hts_idx_t *idx, sam_hdr_t *hdr, char **regarray, unsigned int regcount) +{ + const hts_cram_idx_t *cidx = (const hts_cram_idx_t *) idx; + hts_reglist_t *r_list = NULL; + int r_count = 0; + + if (!cidx || !hdr) + return NULL; + + hts_itr_t *itr = NULL; + if (cidx->fmt == HTS_FMT_CRAI) { + r_list = hts_reglist_create(regarray, regcount, &r_count, cidx->cram, cram_name2id); + if (!r_list) + return NULL; + itr = hts_itr_regions(idx, r_list, r_count, cram_name2id, cidx->cram, + hts_itr_multi_cram, cram_readrec, cram_pseek, cram_ptell); + } else { + r_list = hts_reglist_create(regarray, regcount, &r_count, hdr, (hts_name2id_f)(bam_name2id)); + if (!r_list) + return NULL; + itr = hts_itr_regions(idx, r_list, r_count, (hts_name2id_f)(bam_name2id), hdr, + hts_itr_multi_bam, sam_readrec, bam_pseek, bam_ptell); + } + + if (!itr) + hts_reglist_free(r_list, r_count); + + return itr; +} + +hts_itr_t *sam_itr_regions(const hts_idx_t *idx, sam_hdr_t *hdr, hts_reglist_t *reglist, unsigned int regcount) +{ + const hts_cram_idx_t *cidx = (const hts_cram_idx_t *) idx; + + if(!cidx || !hdr || !reglist) + return NULL; + + if (cidx->fmt == HTS_FMT_CRAI) + return hts_itr_regions(idx, reglist, regcount, cram_name2id, cidx->cram, + hts_itr_multi_cram, cram_readrec, cram_pseek, cram_ptell); + else + return hts_itr_regions(idx, reglist, regcount, (hts_name2id_f)(bam_name2id), hdr, + hts_itr_multi_bam, sam_readrec, bam_pseek, bam_ptell); +} + +/********************** + *** SAM header I/O *** + **********************/ + +#include "htslib/kseq.h" +#include "htslib/kstring.h" + +sam_hdr_t *sam_hdr_parse(size_t l_text, const char *text) +{ + sam_hdr_t *bh = sam_hdr_init(); + if (!bh) return NULL; + + if (sam_hdr_add_lines(bh, text, l_text) != 0) { + sam_hdr_destroy(bh); + return NULL; + } + + return bh; +} + +static int valid_sam_header_type(const char *s) { + if (s[0] != '@') return 0; + switch (s[1]) { + case 'H': + return s[2] == 'D' && s[3] == '\t'; + case 'S': + return s[2] == 'Q' && s[3] == '\t'; + case 'R': + case 'P': + return s[2] == 'G' && s[3] == '\t'; + case 'C': + return s[2] == 'O'; + } + return 0; +} + +// Minimal sanitisation of a header to ensure. +// - null terminated string. +// - all lines start with @ (also implies no blank lines). +// +// Much more could be done, but currently is not, including: +// - checking header types are known (HD, SQ, etc). +// - syntax (eg checking tab separated fields). +// - validating n_targets matches @SQ records. +// - validating target lengths against @SQ records. +static sam_hdr_t *sam_hdr_sanitise(sam_hdr_t *h) { + if (!h) + return NULL; + + // Special case for empty headers. + if (h->l_text == 0) + return h; + + size_t i; + unsigned int lnum = 0; + char *cp = h->text, last = '\n'; + for (i = 0; i < h->l_text; i++) { + // NB: l_text excludes terminating nul. This finds early ones. + if (cp[i] == 0) + break; + + // Error on \n[^@], including duplicate newlines + if (last == '\n') { + lnum++; + if (cp[i] != '@') { + hts_log_error("Malformed SAM header at line %u", lnum); + sam_hdr_destroy(h); + return NULL; + } + } + + last = cp[i]; + } + + if (i < h->l_text) { // Early nul found. Complain if not just padding. + size_t j = i; + while (j < h->l_text && cp[j] == '\0') j++; + if (j < h->l_text) + hts_log_warning("Unexpected NUL character in header. Possibly truncated"); + } + + // Add trailing newline and/or trailing nul if required. + if (last != '\n') { + hts_log_warning("Missing trailing newline on SAM header. Possibly truncated"); + + if (h->l_text < 2 || i >= h->l_text - 2) { + if (h->l_text >= SIZE_MAX - 2) { + hts_log_error("No room for extra newline"); + sam_hdr_destroy(h); + return NULL; + } + + cp = realloc(h->text, (size_t) h->l_text+2); + if (!cp) { + sam_hdr_destroy(h); + return NULL; + } + h->text = cp; + } + cp[i++] = '\n'; + + // l_text may be larger already due to multiple nul padding + if (h->l_text < i) + h->l_text = i; + cp[h->l_text] = '\0'; + } + + return h; +} + +static void known_stderr(const char *tool, const char *advice) { + hts_log_warning("SAM file corrupted by embedded %s error/log message", tool); + hts_log_warning("%s", advice); +} + +static void warn_if_known_stderr(const char *line) { + if (strstr(line, "M::bwa_idx_load_from_disk") != NULL) + known_stderr("bwa", "Use `bwa mem -o file.sam ...` or `bwa sampe -f file.sam ...` instead of `bwa ... > file.sam`"); + else if (strstr(line, "M::mem_pestat") != NULL) + known_stderr("bwa", "Use `bwa mem -o file.sam ...` instead of `bwa mem ... > file.sam`"); + else if (strstr(line, "loaded/built the index") != NULL) + known_stderr("minimap2", "Use `minimap2 -o file.sam ...` instead of `minimap2 ... > file.sam`"); +} + +static sam_hdr_t *sam_hdr_create(htsFile* fp) { + kstring_t str = { 0, 0, NULL }; + khint_t k; + sam_hdr_t* h = sam_hdr_init(); + const char *q, *r; + char* sn = NULL; + khash_t(s2i) *d = kh_init(s2i); + khash_t(s2i) *long_refs = NULL; + if (!h || !d) + goto error; + + int ret, has_SQ = 0; + int next_c = '@'; + while (next_c == '@' && (ret = hts_getline(fp, KS_SEP_LINE, &fp->line)) >= 0) { + if (fp->line.s[0] != '@') + break; + + if (fp->line.l > 3 && strncmp(fp->line.s, "@SQ", 3) == 0) { + has_SQ = 1; + hts_pos_t ln = -1; + for (q = fp->line.s + 4;; ++q) { + if (strncmp(q, "SN:", 3) == 0) { + q += 3; + for (r = q;*r != '\t' && *r != '\n' && *r != '\0';++r); + + if (sn) { + hts_log_warning("SQ header line has more than one SN: tag"); + free(sn); + } + sn = (char*)calloc(r - q + 1, 1); + if (!sn) + goto error; + + strncpy(sn, q, r - q); + q = r; + } else { + if (strncmp(q, "LN:", 3) == 0) + ln = strtoll(q + 3, (char**)&q, 10); + } + + while (*q != '\t' && *q != '\n' && *q != '\0') + ++q; + if (*q == '\0' || *q == '\n') + break; + } + if (sn) { + if (ln >= 0) { + int absent; + k = kh_put(s2i, d, sn, &absent); + if (absent < 0) + goto error; + + if (!absent) { + hts_log_warning("Duplicated sequence \"%s\" in file \"%s\"", sn, fp->fn); + free(sn); + } else { + sn = NULL; + if (ln >= UINT32_MAX) { + // Stash away ref length that + // doesn't fit in target_len array + int k2; + if (!long_refs) { + long_refs = kh_init(s2i); + if (!long_refs) + goto error; + } + k2 = kh_put(s2i, long_refs, kh_key(d, k), &absent); + if (absent < 0) + goto error; + kh_val(long_refs, k2) = ln; + kh_val(d, k) = ((int64_t) (kh_size(d) - 1) << 32 + | UINT32_MAX); + } else { + kh_val(d, k) = (int64_t) (kh_size(d) - 1) << 32 | ln; + } + } + } else { + hts_log_warning("Ignored @SQ SN:%s : bad or missing LN tag", sn); + warn_if_known_stderr(fp->line.s); + free(sn); + } + } else { + hts_log_warning("Ignored @SQ line with missing SN: tag"); + warn_if_known_stderr(fp->line.s); + } + sn = NULL; + } + else if (!valid_sam_header_type(fp->line.s)) { + hts_log_error("Invalid header line: must start with @HD/@SQ/@RG/@PG/@CO"); + warn_if_known_stderr(fp->line.s); + goto error; + } + + if (kputsn(fp->line.s, fp->line.l, &str) < 0) + goto error; + + if (kputc('\n', &str) < 0) + goto error; + + if (fp->is_bgzf) { + next_c = bgzf_peek(fp->fp.bgzf); + } else { + unsigned char nc; + ssize_t pret = hpeek(fp->fp.hfile, &nc, 1); + next_c = pret > 0 ? nc : pret - 1; + } + if (next_c < -1) + goto error; + } + if (next_c != '@') + fp->line.l = 0; + + if (ret < -1) + goto error; + + if (!has_SQ && fp->fn_aux) { + kstring_t line = { 0, 0, NULL }; + + /* The reference index (.fai) is actually needed here */ + char *fai_fn = fp->fn_aux; + char *fn_delim = strstr(fp->fn_aux, HTS_IDX_DELIM); + if (fn_delim) + fai_fn = fn_delim + strlen(HTS_IDX_DELIM); + + hFILE* f = hopen(fai_fn, "r"); + int e = 0, absent; + if (f == NULL) + goto error; + + while (line.l = 0, kgetline(&line, (kgets_func*) hgets, f) >= 0) { + char* tab = strchr(line.s, '\t'); + hts_pos_t ln; + + if (tab == NULL) + continue; + + sn = (char*)calloc(tab-line.s+1, 1); + if (!sn) { + e = 1; + break; + } + memcpy(sn, line.s, tab-line.s); + k = kh_put(s2i, d, sn, &absent); + if (absent < 0) { + e = 1; + break; + } + + ln = strtoll(tab, NULL, 10); + + if (!absent) { + hts_log_warning("Duplicated sequence \"%s\" in the file \"%s\"", sn, fai_fn); + free(sn); + sn = NULL; + } else { + sn = NULL; + if (ln >= UINT32_MAX) { + // Stash away ref length that + // doesn't fit in target_len array + khint_t k2; + int absent = -1; + if (!long_refs) { + long_refs = kh_init(s2i); + if (!long_refs) { + e = 1; + break; + } + } + k2 = kh_put(s2i, long_refs, kh_key(d, k), &absent); + if (absent < 0) { + e = 1; + break; + } + kh_val(long_refs, k2) = ln; + kh_val(d, k) = ((int64_t) (kh_size(d) - 1) << 32 + | UINT32_MAX); + } else { + kh_val(d, k) = (int64_t) (kh_size(d) - 1) << 32 | ln; + } + has_SQ = 1; + } + + e |= kputs("@SQ\tSN:", &str) < 0; + e |= kputsn(line.s, tab - line.s, &str) < 0; + e |= kputs("\tLN:", &str) < 0; + e |= kputll(ln, &str) < 0; + e |= kputc('\n', &str) < 0; + if (e) + break; + } + + ks_free(&line); + if (hclose(f) != 0) { + hts_log_error("Error on closing %s", fai_fn); + e = 1; + } + if (e) + goto error; + } + + if (has_SQ) { + // Populate the targets array + h->n_targets = kh_size(d); + + h->target_name = (char**) malloc(sizeof(char*) * h->n_targets); + if (!h->target_name) { + h->n_targets = 0; + goto error; + } + + h->target_len = (uint32_t*) malloc(sizeof(uint32_t) * h->n_targets); + if (!h->target_len) { + h->n_targets = 0; + goto error; + } + + for (k = kh_begin(d); k != kh_end(d); ++k) { + if (!kh_exist(d, k)) + continue; + + h->target_name[kh_val(d, k) >> 32] = (char*) kh_key(d, k); + h->target_len[kh_val(d, k) >> 32] = kh_val(d, k) & 0xffffffffUL; + kh_val(d, k) >>= 32; + } + } + + // Repurpose sdict to hold any references longer than UINT32_MAX + h->sdict = long_refs; + + kh_destroy(s2i, d); + + if (str.l == 0) + kputsn("", 0, &str); + h->l_text = str.l; + h->text = ks_release(&str); + fp->bam_header = sam_hdr_sanitise(h); + fp->bam_header->ref_count = 1; + + return fp->bam_header; + + error: + if (h && d && (!h->target_name || !h->target_len)) { + for (k = kh_begin(d); k != kh_end(d); ++k) + if (kh_exist(d, k)) free((void *)kh_key(d, k)); + } + sam_hdr_destroy(h); + ks_free(&str); + kh_destroy(s2i, d); + kh_destroy(s2i, long_refs); + if (sn) free(sn); + return NULL; +} + +sam_hdr_t *sam_hdr_read(htsFile *fp) +{ + if (!fp) { + errno = EINVAL; + return NULL; + } + + switch (fp->format.format) { + case bam: + return sam_hdr_sanitise(bam_hdr_read(fp->fp.bgzf)); + + case cram: + return sam_hdr_sanitise(sam_hdr_dup(fp->fp.cram->header)); + + case sam: + return sam_hdr_create(fp); + + case fastq_format: + case fasta_format: + return sam_hdr_init(); + + case empty_format: + errno = EPIPE; + return NULL; + + default: + errno = EFTYPE; + return NULL; + } +} + +int sam_hdr_write(htsFile *fp, const sam_hdr_t *h) +{ + if (!fp || !h) { + errno = EINVAL; + return -1; + } + + switch (fp->format.format) { + case binary_format: + fp->format.category = sequence_data; + fp->format.format = bam; + /* fall-through */ + case bam: + if (bam_hdr_write(fp->fp.bgzf, h) < 0) return -1; + break; + + case cram: { + cram_fd *fd = fp->fp.cram; + if (cram_set_header2(fd, h) < 0) return -1; + if (fp->fn_aux) + cram_load_reference(fd, fp->fn_aux); + if (cram_write_SAM_hdr(fd, fd->header) < 0) return -1; + } + break; + + case text_format: + fp->format.category = sequence_data; + fp->format.format = sam; + /* fall-through */ + case sam: { + if (!h->hrecs && !h->text) + return 0; + char *text; + kstring_t hdr_ks = { 0, 0, NULL }; + size_t l_text; + ssize_t bytes; + int r = 0, no_sq = 0; + + if (h->hrecs) { + if (sam_hrecs_rebuild_text(h->hrecs, &hdr_ks) != 0) + return -1; + text = hdr_ks.s; + l_text = hdr_ks.l; + } else { + const char *p = NULL; + do { + const char *q = p == NULL ? h->text : p + 4; + p = strstr(q, "@SQ\t"); + } while (!(p == NULL || p == h->text || *(p - 1) == '\n')); + no_sq = p == NULL; + text = h->text; + l_text = h->l_text; + } + + if (fp->is_bgzf) { + bytes = bgzf_write(fp->fp.bgzf, text, l_text); + } else { + bytes = hwrite(fp->fp.hfile, text, l_text); + } + free(hdr_ks.s); + if (bytes != l_text) + return -1; + + if (no_sq) { + int i; + for (i = 0; i < h->n_targets; ++i) { + fp->line.l = 0; + r |= kputsn("@SQ\tSN:", 7, &fp->line) < 0; + r |= kputs(h->target_name[i], &fp->line) < 0; + r |= kputsn("\tLN:", 4, &fp->line) < 0; + r |= kputw(h->target_len[i], &fp->line) < 0; + r |= kputc('\n', &fp->line) < 0; + if (r != 0) + return -1; + + if (fp->is_bgzf) { + bytes = bgzf_write(fp->fp.bgzf, fp->line.s, fp->line.l); + } else { + bytes = hwrite(fp->fp.hfile, fp->line.s, fp->line.l); + } + if (bytes != fp->line.l) + return -1; + } + } + if (fp->is_bgzf) { + if (bgzf_flush(fp->fp.bgzf) != 0) return -1; + } else { + if (hflush(fp->fp.hfile) != 0) return -1; + } + } + break; + + case fastq_format: + case fasta_format: + // Nothing to output; FASTQ has no file headers. + break; + + default: + errno = EBADF; + return -1; + } + return 0; +} + +static int old_sam_hdr_change_HD(sam_hdr_t *h, const char *key, const char *val) +{ + char *p, *q, *beg = NULL, *end = NULL, *newtext; + size_t new_l_text; + if (!h || !key) + return -1; + + if (h->l_text > 3) { + if (strncmp(h->text, "@HD", 3) == 0) { //@HD line exists + if ((p = strchr(h->text, '\n')) == 0) return -1; + *p = '\0'; // for strstr call + + char tmp[5] = { '\t', key[0], key[0] ? key[1] : '\0', ':', '\0' }; + + if ((q = strstr(h->text, tmp)) != 0) { // key exists + *p = '\n'; // change back + + // mark the key:val + beg = q; + for (q += 4; *q != '\n' && *q != '\t'; ++q); + end = q; + + if (val && (strncmp(beg + 4, val, end - beg - 4) == 0) + && strlen(val) == end - beg - 4) + return 0; // val is the same, no need to change + + } else { + beg = end = p; + *p = '\n'; + } + } + } + if (beg == NULL) { // no @HD + new_l_text = h->l_text; + if (new_l_text > SIZE_MAX - strlen(SAM_FORMAT_VERSION) - 9) + return -1; + new_l_text += strlen(SAM_FORMAT_VERSION) + 8; + if (val) { + if (new_l_text > SIZE_MAX - strlen(val) - 5) + return -1; + new_l_text += strlen(val) + 4; + } + newtext = (char*)malloc(new_l_text + 1); + if (!newtext) return -1; + + if (val) + snprintf(newtext, new_l_text + 1, + "@HD\tVN:%s\t%s:%s\n%s", SAM_FORMAT_VERSION, key, val, h->text); + else + snprintf(newtext, new_l_text + 1, + "@HD\tVN:%s\n%s", SAM_FORMAT_VERSION, h->text); + } else { // has @HD but different or no key + new_l_text = (beg - h->text) + (h->text + h->l_text - end); + if (val) { + if (new_l_text > SIZE_MAX - strlen(val) - 5) + return -1; + new_l_text += strlen(val) + 4; + } + newtext = (char*)malloc(new_l_text + 1); + if (!newtext) return -1; + + if (val) { + snprintf(newtext, new_l_text + 1, "%.*s\t%s:%s%s", + (int) (beg - h->text), h->text, key, val, end); + } else { //delete key + snprintf(newtext, new_l_text + 1, "%.*s%s", + (int) (beg - h->text), h->text, end); + } + } + free(h->text); + h->text = newtext; + h->l_text = new_l_text; + return 0; +} + + +int sam_hdr_change_HD(sam_hdr_t *h, const char *key, const char *val) +{ + if (!h || !key) + return -1; + + if (!h->hrecs) + return old_sam_hdr_change_HD(h, key, val); + + if (val) { + if (sam_hdr_update_line(h, "HD", NULL, NULL, key, val, NULL) != 0) + return -1; + } else { + if (sam_hdr_remove_tag_id(h, "HD", NULL, NULL, key) != 0) + return -1; + } + return sam_hdr_rebuild(h); +} +/********************** + *** SAM record I/O *** + **********************/ + +// The speed of this code can vary considerably depending on minor code +// changes elsewhere as some of the tight loops are particularly prone to +// speed changes when the instruction blocks are split over a 32-byte +// boundary. To protect against this, we explicitly specify an alignment +// for this function. If this is insufficient, we may also wish to +// consider alignment of blocks within this function via +// __attribute__((optimize("align-loops=5"))) (gcc) or clang equivalents. +// However it's not very portable. +// Instead we break into separate functions so we can explicitly specify +// use __attribute__((aligned(32))) instead and force consistent loop +// alignment. +static inline int64_t grow_B_array(bam1_t *b, uint32_t *n, size_t size) { + // Avoid overflow on 32-bit platforms, but it breaks BAM anyway + if (*n > INT32_MAX*0.666) { + errno = ENOMEM; + return -1; + } + + size_t bytes = (size_t)size * (size_t)(*n>>1); + if (possibly_expand_bam_data(b, bytes) < 0) { + hts_log_error("Out of memory"); + return -1; + } + + (*n)+=*n>>1; + return 0; +} + + +// This ensures that q always ends up at the next comma after +// reading a number even if it's followed by junk. It +// prevents the possibility of trying to read more than n items. +#define skip_to_comma_(q) do { while (*(q) > '\t' && *(q) != ',') (q)++; } while (0) + +HTS_ALIGN32 +static char *sam_parse_Bc_vals(bam1_t *b, char *q, uint32_t *nused, + uint32_t *nalloc, int *overflow) { + while (*q == ',') { + if ((*nused)++ >= (*nalloc)) { + if (grow_B_array(b, nalloc, 1) < 0) + return NULL; + } + *(b->data + b->l_data) = hts_str2int(q + 1, &q, 8, overflow); + b->l_data++; + } + return q; +} + +HTS_ALIGN32 +static char *sam_parse_BC_vals(bam1_t *b, char *q, uint32_t *nused, + uint32_t *nalloc, int *overflow) { + while (*q == ',') { + if ((*nused)++ >= (*nalloc)) { + if (grow_B_array(b, nalloc, 1) < 0) + return NULL; + } + if (q[1] != '-') { + *(b->data + b->l_data) = hts_str2uint(q + 1, &q, 8, overflow); + b->l_data++; + } else { + *overflow = 1; + q++; + skip_to_comma_(q); + } + } + return q; +} + +HTS_ALIGN32 +static char *sam_parse_Bs_vals(bam1_t *b, char *q, uint32_t *nused, + uint32_t *nalloc, int *overflow) { + while (*q == ',') { + if ((*nused)++ >= (*nalloc)) { + if (grow_B_array(b, nalloc, 2) < 0) + return NULL; + } + i16_to_le(hts_str2int(q + 1, &q, 16, overflow), + b->data + b->l_data); + b->l_data += 2; + } + return q; +} + +HTS_ALIGN32 +static char *sam_parse_BS_vals(bam1_t *b, char *q, uint32_t *nused, + uint32_t *nalloc, int *overflow) { + while (*q == ',') { + if ((*nused)++ >= (*nalloc)) { + if (grow_B_array(b, nalloc, 2) < 0) + return NULL; + } + if (q[1] != '-') { + u16_to_le(hts_str2uint(q + 1, &q, 16, overflow), + b->data + b->l_data); + b->l_data += 2; + } else { + *overflow = 1; + q++; + skip_to_comma_(q); + } + } + return q; +} + +HTS_ALIGN32 +static char *sam_parse_Bi_vals(bam1_t *b, char *q, uint32_t *nused, + uint32_t *nalloc, int *overflow) { + while (*q == ',') { + if ((*nused)++ >= (*nalloc)) { + if (grow_B_array(b, nalloc, 4) < 0) + return NULL; + } + i32_to_le(hts_str2int(q + 1, &q, 32, overflow), + b->data + b->l_data); + b->l_data += 4; + } + return q; +} + +HTS_ALIGN32 +static char *sam_parse_BI_vals(bam1_t *b, char *q, uint32_t *nused, + uint32_t *nalloc, int *overflow) { + while (*q == ',') { + if ((*nused)++ >= (*nalloc)) { + if (grow_B_array(b, nalloc, 4) < 0) + return NULL; + } + if (q[1] != '-') { + u32_to_le(hts_str2uint(q + 1, &q, 32, overflow), + b->data + b->l_data); + b->l_data += 4; + } else { + *overflow = 1; + q++; + skip_to_comma_(q); + } + } + return q; +} + +HTS_ALIGN32 +static char *sam_parse_Bf_vals(bam1_t *b, char *q, uint32_t *nused, + uint32_t *nalloc, int *overflow) { + while (*q == ',') { + if ((*nused)++ >= (*nalloc)) { + if (grow_B_array(b, nalloc, 4) < 0) + return NULL; + } + float_to_le(strtod(q + 1, &q), b->data + b->l_data); + b->l_data += 4; + } + return q; +} + +HTS_ALIGN32 +static int sam_parse_B_vals_r(char type, uint32_t nalloc, char *in, + char **end, bam1_t *b, + int *ctr) { + // Protect against infinite recursion when dealing with invalid input. + // An example string is "XX:B:C,-". The lack of a number means min=0, + // but it overflowed due to "-" and so we repeat ad-infinitum. + // + // Loop detection is the safest solution incase there are other + // strange corner cases with malformed inputs. + if (++(*ctr) > 2) { + hts_log_error("Malformed data in B:%c array", type); + return -1; + } + + int orig_l = b->l_data; + char *q = in; + int32_t size; + size_t bytes; + int overflow = 0; + + size = aux_type2size(type); + if (size <= 0 || size > 4) { + hts_log_error("Unrecognized type B:%c", type); + return -1; + } + + // Ensure space for type + values. + // The first pass through here we don't know the number of entries and + // nalloc == 0. We start with a small working set and then parse the + // data, growing as needed. + // + // If we have a second pass through we do know the number of entries + // and nalloc is already known. We have no need to expand the bam data. + if (!nalloc) + nalloc=7; + + // Ensure allocated memory is big enough (for current nalloc estimate) + bytes = (size_t) nalloc * (size_t) size; + if (bytes / size != nalloc + || possibly_expand_bam_data(b, bytes + 2 + sizeof(uint32_t))) { + hts_log_error("Out of memory"); + return -1; + } + + uint32_t nused = 0; + + b->data[b->l_data++] = 'B'; + b->data[b->l_data++] = type; + // 32-bit B-array length is inserted later once we know it. + int b_len_idx = b->l_data; + b->l_data += sizeof(uint32_t); + + if (type == 'c') { + if (!(q = sam_parse_Bc_vals(b, q, &nused, &nalloc, &overflow))) + return -1; + } else if (type == 'C') { + if (!(q = sam_parse_BC_vals(b, q, &nused, &nalloc, &overflow))) + return -1; + } else if (type == 's') { + if (!(q = sam_parse_Bs_vals(b, q, &nused, &nalloc, &overflow))) + return -1; + } else if (type == 'S') { + if (!(q = sam_parse_BS_vals(b, q, &nused, &nalloc, &overflow))) + return -1; + } else if (type == 'i') { + if (!(q = sam_parse_Bi_vals(b, q, &nused, &nalloc, &overflow))) + return -1; + } else if (type == 'I') { + if (!(q = sam_parse_BI_vals(b, q, &nused, &nalloc, &overflow))) + return -1; + } else if (type == 'f') { + if (!(q = sam_parse_Bf_vals(b, q, &nused, &nalloc, &overflow))) + return -1; + } + if (*q != '\t' && *q != '\0') { + // Unknown B array type or junk in the numbers + hts_log_error("Malformed B:%c", type); + return -1; + } + i32_to_le(nused, b->data + b_len_idx); + + if (!overflow) { + *end = q; + return 0; + } else { + int64_t max = 0, min = 0, val; + // Given type was incorrect. Try to rescue the situation. + char *r = q; + q = in; + overflow = 0; + b->l_data = orig_l; + // Find out what range of values is present + while (q < r) { + val = hts_str2int(q + 1, &q, 64, &overflow); + if (max < val) max = val; + if (min > val) min = val; + skip_to_comma_(q); + } + // Retry with appropriate type + if (!overflow) { + if (min < 0) { + if (min >= INT8_MIN && max <= INT8_MAX) { + return sam_parse_B_vals_r('c', nalloc, in, end, b, ctr); + } else if (min >= INT16_MIN && max <= INT16_MAX) { + return sam_parse_B_vals_r('s', nalloc, in, end, b, ctr); + } else if (min >= INT32_MIN && max <= INT32_MAX) { + return sam_parse_B_vals_r('i', nalloc, in, end, b, ctr); + } + } else { + if (max < UINT8_MAX) { + return sam_parse_B_vals_r('C', nalloc, in, end, b, ctr); + } else if (max <= UINT16_MAX) { + return sam_parse_B_vals_r('S', nalloc, in, end, b, ctr); + } else if (max <= UINT32_MAX) { + return sam_parse_B_vals_r('I', nalloc, in, end, b, ctr); + } + } + } + // If here then at least one of the values is too big to store + hts_log_error("Numeric value in B array out of allowed range"); + return -1; + } +#undef skip_to_comma_ +} + +HTS_ALIGN32 +static int sam_parse_B_vals(char type, char *in, char **end, bam1_t *b) +{ + int ctr = 0; + uint32_t nalloc = 0; + return sam_parse_B_vals_r(type, nalloc, in, end, b, &ctr); +} + +static inline unsigned int parse_sam_flag(char *v, char **rv, int *overflow) { + if (*v >= '1' && *v <= '9') { + return hts_str2uint(v, rv, 16, overflow); + } + else if (*v == '0') { + // handle single-digit "0" directly; otherwise it's hex or octal + if (v[1] == '\t') { *rv = v+1; return 0; } + else { + unsigned long val = strtoul(v, rv, 0); + if (val > 65535) { *overflow = 1; return 65535; } + return val; + } + } + else { + // TODO implement symbolic flag letters + *rv = v; + return 0; + } +} + +// Parse tag line and append to bam object b. +// Shared by both SAM and FASTQ parsers. +// +// The difference between the two is how lenient we are to recognising +// non-compliant strings. The FASTQ parser glosses over arbitrary +// non-SAM looking strings. +static inline int aux_parse(char *start, char *end, bam1_t *b, int lenient, + khash_t(tag) *tag_whitelist) { + int overflow = 0; + int checkpoint; + char logbuf[40]; + char *q = start, *p = end; + +#define _parse_err(cond, ...) \ + do { \ + if (cond) { \ + if (lenient) { \ + while (q < p && !isspace_c(*q)) \ + q++; \ + while (q < p && isspace_c(*q)) \ + q++; \ + b->l_data = checkpoint; \ + goto loop; \ + } else { \ + hts_log_error(__VA_ARGS__); \ + goto err_ret; \ + } \ + } \ + } while (0) + + while (q < p) loop: { + char type; + checkpoint = b->l_data; + if (p - q < 5) { + if (lenient) { + break; + } else { + hts_log_error("Incomplete aux field"); + goto err_ret; + } + } + _parse_err(q[0] < '!' || q[1] < '!', "invalid aux tag id"); + + if (lenient && (q[2] | q[4]) != ':') { + while (q < p && !isspace_c(*q)) + q++; + while (q < p && isspace_c(*q)) + q++; + continue; + } + + if (tag_whitelist) { + int tt = q[0]*256 + q[1]; + if (kh_get(tag, tag_whitelist, tt) == kh_end(tag_whitelist)) { + while (q < p && *q != '\t') + q++; + continue; + } + } + + // Copy over id + if (possibly_expand_bam_data(b, 2) < 0) goto err_ret; + memcpy(b->data + b->l_data, q, 2); b->l_data += 2; + q += 3; type = *q++; ++q; // q points to value + if (type != 'Z' && type != 'H') // the only zero length acceptable fields + _parse_err(*q <= '\t', "incomplete aux field"); + + // Ensure enough space for a double + type allocated. + if (possibly_expand_bam_data(b, 16) < 0) goto err_ret; + + if (type == 'A' || type == 'a' || type == 'c' || type == 'C') { + b->data[b->l_data++] = 'A'; + b->data[b->l_data++] = *q++; + } else if (type == 'i' || type == 'I') { + if (*q == '-') { + int32_t x = hts_str2int(q, &q, 32, &overflow); + if (x >= INT8_MIN) { + b->data[b->l_data++] = 'c'; + b->data[b->l_data++] = x; + } else if (x >= INT16_MIN) { + b->data[b->l_data++] = 's'; + i16_to_le(x, b->data + b->l_data); + b->l_data += 2; + } else { + b->data[b->l_data++] = 'i'; + i32_to_le(x, b->data + b->l_data); + b->l_data += 4; + } + } else { + uint32_t x = hts_str2uint(q, &q, 32, &overflow); + if (x <= UINT8_MAX) { + b->data[b->l_data++] = 'C'; + b->data[b->l_data++] = x; + } else if (x <= UINT16_MAX) { + b->data[b->l_data++] = 'S'; + u16_to_le(x, b->data + b->l_data); + b->l_data += 2; + } else { + b->data[b->l_data++] = 'I'; + u32_to_le(x, b->data + b->l_data); + b->l_data += 4; + } + } + } else if (type == 'f') { + b->data[b->l_data++] = 'f'; + float_to_le(strtod(q, &q), b->data + b->l_data); + b->l_data += sizeof(float); + } else if (type == 'd') { + b->data[b->l_data++] = 'd'; + double_to_le(strtod(q, &q), b->data + b->l_data); + b->l_data += sizeof(double); + } else if (type == 'Z' || type == 'H') { + char *end = strchr(q, '\t'); + if (!end) end = q + strlen(q); + _parse_err(type == 'H' && ((end-q)&1) != 0, + "hex field does not have an even number of digits"); + b->data[b->l_data++] = type; + if (possibly_expand_bam_data(b, end - q + 1) < 0) goto err_ret; + memcpy(b->data + b->l_data, q, end - q); + b->l_data += end - q; + b->data[b->l_data++] = '\0'; + q = end; + } else if (type == 'B') { + type = *q++; // q points to the first ',' following the typing byte + _parse_err(*q && *q != ',' && *q != '\t', + "B aux field type not followed by ','"); + + if (sam_parse_B_vals(type, q, &q, b) < 0) + goto err_ret; + } else _parse_err(1, "unrecognized type %s", hts_strprint(logbuf, sizeof logbuf, '\'', &type, 1)); + + while (*q > '\t') { q++; } // Skip any junk to next tab + q++; + } + + _parse_err(!lenient && overflow != 0, "numeric value out of allowed range"); +#undef _parse_err + + return 0; + +err_ret: + return -2; +} + +int sam_parse1(kstring_t *s, sam_hdr_t *h, bam1_t *b) +{ +#define _read_token(_p) (_p); do { char *tab = strchr((_p), '\t'); if (!tab) goto err_ret; *tab = '\0'; (_p) = tab + 1; } while (0) + +#if HTS_ALLOW_UNALIGNED != 0 && ULONG_MAX == 0xffffffffffffffff + +// Macro that operates on 64-bits at a time. +#define COPY_MINUS_N(to,from,n,l,failed) \ + do { \ + uint64_u *from8 = (uint64_u *)(from); \ + uint64_u *to8 = (uint64_u *)(to); \ + uint64_t uflow = 0; \ + size_t l8 = (l)>>3, i; \ + for (i = 0; i < l8; i++) { \ + to8[i] = from8[i] - (n)*0x0101010101010101UL; \ + uflow |= to8[i]; \ + } \ + for (i<<=3; i < (l); ++i) { \ + to[i] = from[i] - (n); \ + uflow |= to[i]; \ + } \ + failed = (uflow & 0x8080808080808080UL) > 0; \ + } while (0) + +#else + +// Basic version which operates a byte at a time +#define COPY_MINUS_N(to,from,n,l,failed) do { \ + uint8_t uflow = 0; \ + for (i = 0; i < (l); ++i) { \ + (to)[i] = (from)[i] - (n); \ + uflow |= (uint8_t) (to)[i]; \ + } \ + failed = (uflow & 0x80) > 0; \ + } while (0) + +#endif + +#define _get_mem(type_t, x, b, l) if (possibly_expand_bam_data((b), (l)) < 0) goto err_ret; *(x) = (type_t*)((b)->data + (b)->l_data); (b)->l_data += (l) +#define _parse_err(cond, ...) do { if (cond) { hts_log_error(__VA_ARGS__); goto err_ret; } } while (0) +#define _parse_warn(cond, ...) do { if (cond) { hts_log_warning(__VA_ARGS__); } } while (0) + + uint8_t *t; + + char *p = s->s, *q; + int i, overflow = 0; + char logbuf[40]; + hts_pos_t cigreflen; + bam1_core_t *c = &b->core; + + b->l_data = 0; + memset(c, 0, 32); + + // qname + q = _read_token(p); + + _parse_warn(p - q <= 1, "empty query name"); + _parse_err(p - q > 255, "query name too long"); + // resize large enough for name + extranul + if (possibly_expand_bam_data(b, (p - q) + 4) < 0) goto err_ret; + memcpy(b->data + b->l_data, q, p-q); b->l_data += p-q; + + c->l_extranul = (4 - (b->l_data & 3)) & 3; + memcpy(b->data + b->l_data, "\0\0\0\0", c->l_extranul); + b->l_data += c->l_extranul; + + c->l_qname = p - q + c->l_extranul; + + // flag + c->flag = parse_sam_flag(p, &p, &overflow); + if (*p++ != '\t') goto err_ret; // malformated flag + + // chr + q = _read_token(p); + if (strcmp(q, "*")) { + _parse_err(h->n_targets == 0, "no SQ lines present in the header"); + c->tid = bam_name2id(h, q); + _parse_err(c->tid < -1, "failed to parse header"); + _parse_warn(c->tid < 0, "unrecognized reference name %s; treated as unmapped", hts_strprint(logbuf, sizeof logbuf, '"', q, SIZE_MAX)); + } else c->tid = -1; + + // pos + c->pos = hts_str2uint(p, &p, 62, &overflow) - 1; + if (*p++ != '\t') goto err_ret; + if (c->pos < 0 && c->tid >= 0) { + _parse_warn(1, "mapped query cannot have zero coordinate; treated as unmapped"); + c->tid = -1; + } + if (c->tid < 0) c->flag |= BAM_FUNMAP; + + // mapq + c->qual = hts_str2uint(p, &p, 8, &overflow); + if (*p++ != '\t') goto err_ret; + // cigar + if (*p != '*') { + uint32_t *cigar = NULL; + int old_l_data = b->l_data; + int n_cigar = bam_parse_cigar(p, &p, b); + if (n_cigar < 1 || *p++ != '\t') goto err_ret; + cigar = (uint32_t *)(b->data + old_l_data); + + // can't use bam_endpos() directly as some fields not yet set up + cigreflen = (!(c->flag&BAM_FUNMAP))? bam_cigar2rlen(c->n_cigar, cigar) : 1; + if (cigreflen == 0) cigreflen = 1; + } else { + _parse_warn(!(c->flag&BAM_FUNMAP), "mapped query must have a CIGAR; treated as unmapped"); + c->flag |= BAM_FUNMAP; + q = _read_token(p); + cigreflen = 1; + } + _parse_err(HTS_POS_MAX - cigreflen <= c->pos, + "read ends beyond highest supported position"); + c->bin = hts_reg2bin(c->pos, c->pos + cigreflen, 14, 5); + // mate chr + q = _read_token(p); + if (strcmp(q, "=") == 0) { + c->mtid = c->tid; + } else if (strcmp(q, "*") == 0) { + c->mtid = -1; + } else { + c->mtid = bam_name2id(h, q); + _parse_err(c->mtid < -1, "failed to parse header"); + _parse_warn(c->mtid < 0, "unrecognized mate reference name %s; treated as unmapped", hts_strprint(logbuf, sizeof logbuf, '"', q, SIZE_MAX)); + } + // mpos + c->mpos = hts_str2uint(p, &p, 62, &overflow) - 1; + if (*p++ != '\t') goto err_ret; + if (c->mpos < 0 && c->mtid >= 0) { + _parse_warn(1, "mapped mate cannot have zero coordinate; treated as unmapped"); + c->mtid = -1; + } + // tlen + c->isize = hts_str2int(p, &p, 63, &overflow); + if (*p++ != '\t') goto err_ret; + _parse_err(overflow, "number outside allowed range"); + // seq + q = _read_token(p); + if (strcmp(q, "*")) { + _parse_err(p - q - 1 > INT32_MAX, "read sequence is too long"); + c->l_qseq = p - q - 1; + hts_pos_t ql = bam_cigar2qlen(c->n_cigar, (uint32_t*)(b->data + c->l_qname)); + _parse_err(c->n_cigar && ql != c->l_qseq, "CIGAR and query sequence are of different length"); + i = (c->l_qseq + 1) >> 1; + _get_mem(uint8_t, &t, b, i); + + unsigned int lqs2 = c->l_qseq&~1, i; + for (i = 0; i < lqs2; i+=2) + t[i>>1] = (seq_nt16_table[(unsigned char)q[i]] << 4) | seq_nt16_table[(unsigned char)q[i+1]]; + for (; i < c->l_qseq; ++i) + t[i>>1] = seq_nt16_table[(unsigned char)q[i]] << ((~i&1)<<2); + } else c->l_qseq = 0; + // qual + _get_mem(uint8_t, &t, b, c->l_qseq); + if (p[0] == '*' && (p[1] == '\t' || p[1] == '\0')) { + memset(t, 0xff, c->l_qseq); + p += 2; + } else { + int failed = 0; + _parse_err(s->l - (p - s->s) < c->l_qseq + || (p[c->l_qseq] != '\t' && p[c->l_qseq] != '\0'), + "SEQ and QUAL are of different length"); + COPY_MINUS_N(t, p, 33, c->l_qseq, failed); + _parse_err(failed, "invalid QUAL character"); + p += c->l_qseq + 1; + } + + // aux + if (aux_parse(p, s->s + s->l, b, 0, NULL) < 0) + goto err_ret; + + if (bam_tag2cigar(b, 1, 1) < 0) + return -2; + return 0; + +#undef _parse_warn +#undef _parse_err +#undef _get_mem +#undef _read_token +err_ret: + return -2; +} + +static uint32_t read_ncigar(const char *q) { + uint32_t n_cigar = 0; + for (; *q && *q != '\t'; ++q) + if (!isdigit_c(*q)) ++n_cigar; + if (!n_cigar) { + hts_log_error("No CIGAR operations"); + return 0; + } + if (n_cigar >= 2147483647) { + hts_log_error("Too many CIGAR operations"); + return 0; + } + + return n_cigar; +} + +/*! @function + @abstract Parse a CIGAR string into preallocated a uint32_t array + @param in [in] pointer to the source string + @param a_cigar [out] address of the destination uint32_t buffer + @return number of processed input characters; 0 on error + */ +static int parse_cigar(const char *in, uint32_t *a_cigar, uint32_t n_cigar) { + int i, overflow = 0; + const char *p = in; + for (i = 0; i < n_cigar; i++) { + uint32_t len; + int op; + char *q; + len = hts_str2uint(p, &q, 28, &overflow)< *a_mem) { + uint32_t *a_tmp = realloc(*a_cigar, n_cigar*sizeof(**a_cigar)); + if (a_tmp) { + *a_cigar = a_tmp; + *a_mem = n_cigar; + } else { + hts_log_error("Memory allocation error"); + return -1; + } + } + + if (!(diff = parse_cigar(in, *a_cigar, n_cigar))) return -1; + if (end) *end = (char *)in+diff; + + return n_cigar; +} + +ssize_t bam_parse_cigar(const char *in, char **end, bam1_t *b) { + size_t n_cigar = 0; + int diff; + + if (!in || !b) { + hts_log_error("NULL pointer arguments"); + return -1; + } + if (end) *end = (char *)in; + + n_cigar = (*in == '*') ? 0 : read_ncigar(in); + if (!n_cigar && b->core.n_cigar == 0) { + if (end) *end = (char *)in+1; + return 0; + } + + ssize_t cig_diff = n_cigar - b->core.n_cigar; + if (cig_diff > 0 && + possibly_expand_bam_data(b, cig_diff * sizeof(uint32_t)) < 0) { + hts_log_error("Memory allocation error"); + return -1; + } + + uint32_t *cig = bam_get_cigar(b); + if ((uint8_t *)cig != b->data + b->l_data) { + // Modifying an BAM existing BAM record + uint8_t *seq = bam_get_seq(b); + memmove(cig + n_cigar, seq, (b->data + b->l_data) - seq); + } + + if (n_cigar) { + if (!(diff = parse_cigar(in, cig, n_cigar))) + return -1; + } else { + diff = 1; // handle "*" + } + + b->l_data += cig_diff * sizeof(uint32_t); + b->core.n_cigar = n_cigar; + if (end) *end = (char *)in + diff; + + return n_cigar; +} + +/* + * ----------------------------------------------------------------------------- + * SAM threading + */ +// Size of SAM text block (reading) +#define SAM_NBYTES 240000 + +// Number of BAM records (writing, up to NB_mem in size) +#define SAM_NBAM 1000 + +struct SAM_state; + +// Output job - a block of BAM records +typedef struct sp_bams { + struct sp_bams *next; + int serial; + + bam1_t *bams; + int nbams, abams; // used and alloc for bams[] array + size_t bam_mem; // very approximate total size + + struct SAM_state *fd; +} sp_bams; + +// Input job - a block of SAM text +typedef struct sp_lines { + struct sp_lines *next; + int serial; + + char *data; + int data_size; + int alloc; + + struct SAM_state *fd; + sp_bams *bams; +} sp_lines; + +enum sam_cmd { + SAM_NONE = 0, + SAM_CLOSE, + SAM_CLOSE_DONE, +}; + +typedef struct SAM_state { + sam_hdr_t *h; + + hts_tpool *p; + int own_pool; + pthread_mutex_t lines_m; + hts_tpool_process *q; + pthread_t dispatcher; + int dispatcher_set; + + sp_lines *lines; + sp_bams *bams; + + sp_bams *curr_bam; + int curr_idx; + int serial; + + // Be warned: moving these mutexes around in this struct can reduce + // threading performance by up to 70%! + pthread_mutex_t command_m; + pthread_cond_t command_c; + enum sam_cmd command; + + // One of the E* errno codes + int errcode; + + htsFile *fp; +} SAM_state; + +// Returns a SAM_state struct from a generic hFILE. +// +// Returns NULL on failure. +static SAM_state *sam_state_create(htsFile *fp) { + // Ideally sam_open wouldn't be a #define to hts_open but instead would + // be a redirect call with an additional 'S' mode. This in turn would + // correctly set the designed format to sam instead of a generic + // text_format. + if (fp->format.format != sam && fp->format.format != text_format) + return NULL; + + SAM_state *fd = calloc(1, sizeof(*fd)); + if (!fd) + return NULL; + + fp->state = fd; + fd->fp = fp; + + return fd; +} + +static int sam_format1_append(const bam_hdr_t *h, const bam1_t *b, kstring_t *str); +static void *sam_format_worker(void *arg); + +static void sam_state_err(SAM_state *fd, int errcode) { + pthread_mutex_lock(&fd->command_m); + if (!fd->errcode) + fd->errcode = errcode; + pthread_mutex_unlock(&fd->command_m); +} + +static void sam_free_sp_bams(sp_bams *b) { + if (!b) + return; + + if (b->bams) { + int i; + for (i = 0; i < b->abams; i++) { + if (b->bams[i].data) + free(b->bams[i].data); + } + free(b->bams); + } + free(b); +} + +// Destroys the state produce by sam_state_create. +int sam_state_destroy(htsFile *fp) { + int ret = 0; + + if (!fp->state) + return 0; + + SAM_state *fd = fp->state; + if (fd->p) { + if (fd->h) { + // Notify sam_dispatcher we're closing + pthread_mutex_lock(&fd->command_m); + if (fd->command != SAM_CLOSE_DONE) + fd->command = SAM_CLOSE; + pthread_cond_signal(&fd->command_c); + ret = -fd->errcode; + if (fd->q) + hts_tpool_wake_dispatch(fd->q); // unstick the reader + + if (!fp->is_write && fd->q && fd->dispatcher_set) { + for (;;) { + // Avoid deadlocks with dispatcher + if (fd->command == SAM_CLOSE_DONE) + break; + hts_tpool_wake_dispatch(fd->q); + pthread_mutex_unlock(&fd->command_m); + usleep(10000); + pthread_mutex_lock(&fd->command_m); + } + } + pthread_mutex_unlock(&fd->command_m); + + if (fp->is_write) { + // Dispatch the last partial block. + sp_bams *gb = fd->curr_bam; + if (!ret && gb && gb->nbams > 0 && fd->q) + ret = hts_tpool_dispatch(fd->p, fd->q, sam_format_worker, gb); + + // Flush and drain output + if (fd->q) + hts_tpool_process_flush(fd->q); + pthread_mutex_lock(&fd->command_m); + if (!ret) ret = -fd->errcode; + pthread_mutex_unlock(&fd->command_m); + + while (!ret && fd->q && !hts_tpool_process_empty(fd->q)) { + usleep(10000); + pthread_mutex_lock(&fd->command_m); + ret = -fd->errcode; + // not empty but shutdown implies error + if (hts_tpool_process_is_shutdown(fd->q) && !ret) + ret = EIO; + pthread_mutex_unlock(&fd->command_m); + } + if (fd->q) + hts_tpool_process_shutdown(fd->q); + } + + // Wait for it to acknowledge + if (fd->dispatcher_set) + pthread_join(fd->dispatcher, NULL); + if (!ret) ret = -fd->errcode; + } + + // Tidy up memory + if (fd->q) + hts_tpool_process_destroy(fd->q); + + if (fd->own_pool && fp->format.compression == no_compression) { + hts_tpool_destroy(fd->p); + fd->p = NULL; + } + pthread_mutex_destroy(&fd->lines_m); + pthread_mutex_destroy(&fd->command_m); + pthread_cond_destroy(&fd->command_c); + + sp_lines *l = fd->lines; + while (l) { + sp_lines *n = l->next; + free(l->data); + free(l); + l = n; + } + + sp_bams *b = fd->bams; + while (b) { + if (fd->curr_bam == b) + fd->curr_bam = NULL; + sp_bams *n = b->next; + sam_free_sp_bams(b); + b = n; + } + + if (fd->curr_bam) + sam_free_sp_bams(fd->curr_bam); + + // Decrement counter by one, maybe destroying too. + // This is to permit the caller using bam_hdr_destroy + // before sam_close without triggering decode errors + // in the background threads. + bam_hdr_destroy(fd->h); + } + + free(fp->state); + fp->state = NULL; + return ret; +} + +// Cleanup function - job for sam_parse_worker; result for sam_format_worker +static void cleanup_sp_lines(void *arg) { + sp_lines *gl = (sp_lines *)arg; + if (!gl) return; + + // Should always be true for lines passed to / from thread workers. + assert(gl->next == NULL); + + free(gl->data); + sam_free_sp_bams(gl->bams); + free(gl); +} + +// Run from one of the worker threads. +// Convert a passed in array of lines to array of BAMs, returning +// the result back to the thread queue. +static void *sam_parse_worker(void *arg) { + sp_lines *gl = (sp_lines *)arg; + sp_bams *gb = NULL; + char *lines = gl->data; + int i; + bam1_t *b; + SAM_state *fd = gl->fd; + + // Use a block of BAM structs we had earlier if available. + pthread_mutex_lock(&fd->lines_m); + if (fd->bams) { + gb = fd->bams; + fd->bams = gb->next; + } + pthread_mutex_unlock(&fd->lines_m); + + if (gb == NULL) { + gb = calloc(1, sizeof(*gb)); + if (!gb) { + return NULL; + } + gb->abams = 100; + gb->bams = b = calloc(gb->abams, sizeof(*b)); + if (!gb->bams) { + sam_state_err(fd, ENOMEM); + goto err; + } + gb->nbams = 0; + gb->bam_mem = 0; + } + gb->serial = gl->serial; + gb->next = NULL; + + b = (bam1_t *)gb->bams; + if (!b) { + sam_state_err(fd, ENOMEM); + goto err; + } + + i = 0; + char *cp = lines, *cp_end = lines + gl->data_size; + while (cp < cp_end) { + if (i >= gb->abams) { + int old_abams = gb->abams; + gb->abams *= 2; + b = (bam1_t *)realloc(gb->bams, gb->abams*sizeof(bam1_t)); + if (!b) { + gb->abams /= 2; + sam_state_err(fd, ENOMEM); + goto err; + } + memset(&b[old_abams], 0, (gb->abams - old_abams)*sizeof(*b)); + gb->bams = b; + } + + // Ideally we'd get sam_parse1 to return the number of + // bytes decoded and to be able to stop on newline as + // well as \0. + // + // We can then avoid the additional strchr loop. + // It's around 6% of our CPU cost, albeit threadable. + // + // However this is an API change so for now we copy. + + char *nl = strchr(cp, '\n'); + char *line_end; + if (nl) { + line_end = nl; + if (line_end > cp && *(line_end - 1) == '\r') + line_end--; + nl++; + } else { + nl = line_end = cp_end; + } + *line_end = '\0'; + kstring_t ks = { line_end - cp, gl->alloc, cp }; + if (sam_parse1(&ks, fd->h, &b[i]) < 0) { + sam_state_err(fd, errno ? errno : EIO); + cleanup_sp_lines(gl); + goto err; + } + + cp = nl; + i++; + } + gb->nbams = i; + + pthread_mutex_lock(&fd->lines_m); + gl->next = fd->lines; + fd->lines = gl; + pthread_mutex_unlock(&fd->lines_m); + return gb; + + err: + sam_free_sp_bams(gb); + return NULL; +} + +static void *sam_parse_eof(void *arg) { + return NULL; +} + +// Cleanup function - result for sam_parse_worker; job for sam_format_worker +static void cleanup_sp_bams(void *arg) { + sam_free_sp_bams((sp_bams *) arg); +} + +// Runs in its own thread. +// Reads a block of text (SAM) and sends a new job to the thread queue to +// translate this to BAM. +static void *sam_dispatcher_read(void *vp) { + htsFile *fp = vp; + kstring_t line = {0}; + int line_frag = 0; + SAM_state *fd = fp->state; + sp_lines *l = NULL; + + // Pre-allocate buffer for left-over bits of line (exact size doesn't + // matter as it will grow if necessary). + if (ks_resize(&line, 1000) < 0) + goto err; + + for (;;) { + // Check for command + pthread_mutex_lock(&fd->command_m); + switch (fd->command) { + + case SAM_CLOSE: + pthread_cond_signal(&fd->command_c); + pthread_mutex_unlock(&fd->command_m); + hts_tpool_process_shutdown(fd->q); + goto tidyup; + + default: + break; + } + pthread_mutex_unlock(&fd->command_m); + + pthread_mutex_lock(&fd->lines_m); + if (fd->lines) { + // reuse existing line buffer + l = fd->lines; + fd->lines = l->next; + } + pthread_mutex_unlock(&fd->lines_m); + + if (l == NULL) { + // none to reuse, to create a new one + l = calloc(1, sizeof(*l)); + if (!l) + goto err; + l->alloc = SAM_NBYTES; + l->data = malloc(l->alloc+8); // +8 for optimisation in sam_parse1 + if (!l->data) { + free(l); + l = NULL; + goto err; + } + l->fd = fd; + } + l->next = NULL; + + if (l->alloc < line_frag+SAM_NBYTES/2) { + char *rp = realloc(l->data, line_frag+SAM_NBYTES/2 +8); + if (!rp) + goto err; + l->alloc = line_frag+SAM_NBYTES/2; + l->data = rp; + } + memcpy(l->data, line.s, line_frag); + + l->data_size = line_frag; + ssize_t nbytes; + longer_line: + if (fp->is_bgzf) + nbytes = bgzf_read(fp->fp.bgzf, l->data + line_frag, l->alloc - line_frag); + else + nbytes = hread(fp->fp.hfile, l->data + line_frag, l->alloc - line_frag); + if (nbytes < 0) { + sam_state_err(fd, errno ? errno : EIO); + goto err; + } else if (nbytes == 0) + break; // EOF + l->data_size += nbytes; + + // trim to last \n. Maybe \r\n, but that's still fine + if (nbytes == l->alloc - line_frag) { + char *cp_end = l->data + l->data_size; + char *cp = cp_end-1; + + while (cp > (char *)l->data && *cp != '\n') + cp--; + + // entire buffer is part of a single line + if (cp == l->data) { + line_frag = l->data_size; + char *rp = realloc(l->data, l->alloc * 2 + 8); + if (!rp) + goto err; + l->alloc *= 2; + l->data = rp; + assert(l->alloc >= l->data_size); + assert(l->alloc >= line_frag); + assert(l->alloc >= l->alloc - line_frag); + goto longer_line; + } + cp++; + + // line holds the remainder of our line. + if (ks_resize(&line, cp_end - cp) < 0) + goto err; + memcpy(line.s, cp, cp_end - cp); + line_frag = cp_end - cp; + l->data_size = l->alloc - line_frag; + } else { + // out of buffer + line_frag = 0; + } + + l->serial = fd->serial++; + //fprintf(stderr, "Dispatching %p, %d bytes, serial %d\n", l, l->data_size, l->serial); + if (hts_tpool_dispatch3(fd->p, fd->q, sam_parse_worker, l, + cleanup_sp_lines, cleanup_sp_bams, 0) < 0) + goto err; + pthread_mutex_lock(&fd->command_m); + if (fd->command == SAM_CLOSE) { + pthread_mutex_unlock(&fd->command_m); + l = NULL; + goto tidyup; + } + l = NULL; // Now "owned" by sam_parse_worker() + pthread_mutex_unlock(&fd->command_m); + } + + if (hts_tpool_dispatch(fd->p, fd->q, sam_parse_eof, NULL) < 0) + goto err; + + // At EOF, wait for close request. + // (In future if we add support for seek, this is where we need to catch it.) + for (;;) { + pthread_mutex_lock(&fd->command_m); + if (fd->command == SAM_NONE) + pthread_cond_wait(&fd->command_c, &fd->command_m); + switch (fd->command) { + case SAM_CLOSE: + pthread_cond_signal(&fd->command_c); + pthread_mutex_unlock(&fd->command_m); + hts_tpool_process_shutdown(fd->q); + goto tidyup; + + default: + pthread_mutex_unlock(&fd->command_m); + break; + } + } + + tidyup: + pthread_mutex_lock(&fd->command_m); + fd->command = SAM_CLOSE_DONE; + pthread_cond_signal(&fd->command_c); + pthread_mutex_unlock(&fd->command_m); + + if (l) { + pthread_mutex_lock(&fd->lines_m); + l->next = fd->lines; + fd->lines = l; + pthread_mutex_unlock(&fd->lines_m); + } + free(line.s); + + return NULL; + + err: + sam_state_err(fd, errno ? errno : ENOMEM); + hts_tpool_process_shutdown(fd->q); + goto tidyup; +} + +// Runs in its own thread. +// Takes encoded blocks of SAM off the thread results queue and writes them +// to our output stream. +static void *sam_dispatcher_write(void *vp) { + htsFile *fp = vp; + SAM_state *fd = fp->state; + hts_tpool_result *r; + + // Iterates until result queue is shutdown, where it returns NULL. + while ((r = hts_tpool_next_result_wait(fd->q))) { + sp_lines *gl = (sp_lines *)hts_tpool_result_data(r); + if (!gl) { + sam_state_err(fd, ENOMEM); + goto err; + } + + if (fp->idx) { + sp_bams *gb = gl->bams; + int i = 0, count = 0; + while (i < gl->data_size) { + int j = i; + while (i < gl->data_size && gl->data[i] != '\n') + i++; + if (i < gl->data_size) + i++; + + if (fp->is_bgzf) { + if (bgzf_flush_try(fp->fp.bgzf, i-j) < 0) + goto err; + if (bgzf_write(fp->fp.bgzf, &gl->data[j], i-j) != i-j) + goto err; + } else { + if (hwrite(fp->fp.hfile, &gl->data[j], i-j) != i-j) + goto err; + } + + bam1_t *b = &gb->bams[count++]; + if (fp->format.compression == bgzf) { + if (bgzf_idx_push(fp->fp.bgzf, fp->idx, + b->core.tid, b->core.pos, bam_endpos(b), + bgzf_tell(fp->fp.bgzf), + !(b->core.flag&BAM_FUNMAP)) < 0) { + sam_state_err(fd, errno ? errno : ENOMEM); + hts_log_error("Read '%s' with ref_name='%s', ref_length=%"PRIhts_pos", flags=%d, pos=%"PRIhts_pos" cannot be indexed", + bam_get_qname(b), sam_hdr_tid2name(fd->h, b->core.tid), sam_hdr_tid2len(fd->h, b->core.tid), b->core.flag, b->core.pos+1); + goto err; + } + } else { + if (hts_idx_push(fp->idx, b->core.tid, b->core.pos, bam_endpos(b), + bgzf_tell(fp->fp.bgzf), !(b->core.flag&BAM_FUNMAP)) < 0) { + sam_state_err(fd, errno ? errno : ENOMEM); + hts_log_error("Read '%s' with ref_name='%s', ref_length=%"PRIhts_pos", flags=%d, pos=%"PRIhts_pos" cannot be indexed", + bam_get_qname(b), sam_hdr_tid2name(fd->h, b->core.tid), sam_hdr_tid2len(fd->h, b->core.tid), b->core.flag, b->core.pos+1); + goto err; + } + } + } + + assert(count == gb->nbams); + + // Add bam array to free-list + pthread_mutex_lock(&fd->lines_m); + gb->next = fd->bams; + fd->bams = gl->bams; + gl->bams = NULL; + pthread_mutex_unlock(&fd->lines_m); + } else { + if (fp->is_bgzf) { + // We keep track of how much in the current block we have + // remaining => R. We look for the last newline in input + // [i] to [i+R], backwards => position N. + // + // If we find a newline, we write out bytes i to N. + // We know we cannot fit the next record in this bgzf block, + // so we flush what we have and copy input N to i+R into + // the start of a new block, and recompute a new R for that. + // + // If we don't find a newline (i==N) then we cannot extend + // the current block at all, so flush whatever is in it now + // if it ends on a newline. + // We still copy i(==N) to i+R to the next block and + // continue as before with a new R. + // + // The only exception on the flush is when we run out of + // data in the input. In that case we skip it as we don't + // yet know if the next record will fit. + // + // Both conditions share the same code here: + // - Look for newline (pos N) + // - Write i to N (which maybe 0) + // - Flush if block ends on newline and not end of input + // - write N to i+R + + int i = 0; + BGZF *fb = fp->fp.bgzf; + while (i < gl->data_size) { + // remaining space in block + int R = BGZF_BLOCK_SIZE - fb->block_offset; + int eod = 0; + if (R > gl->data_size-i) + R = gl->data_size-i, eod = 1; + + // Find last newline in input data + int N = i + R; + while (--N > i) { + if (gl->data[N] == '\n') + break; + } + + if (N != i) { + // Found a newline + N++; + if (bgzf_write(fb, &gl->data[i], N-i) != N-i) + goto err; + } + + // Flush bgzf block + int b_off = fb->block_offset; + if (!eod && b_off && + ((char *)fb->uncompressed_block)[b_off-1] == '\n') + if (bgzf_flush_try(fb, BGZF_BLOCK_SIZE) < 0) + goto err; + + // Copy from N onwards into next block + if (i+R > N) + if (bgzf_write(fb, &gl->data[N], i+R - N) + != i+R - N) + goto err; + + i = i+R; + } + } else { + if (hwrite(fp->fp.hfile, gl->data, gl->data_size) != gl->data_size) + goto err; + } + } + + hts_tpool_delete_result(r, 0); + + // Also updated by main thread + pthread_mutex_lock(&fd->lines_m); + gl->next = fd->lines; + fd->lines = gl; + pthread_mutex_unlock(&fd->lines_m); + } + + sam_state_err(fd, 0); // success + hts_tpool_process_shutdown(fd->q); + return NULL; + + err: + sam_state_err(fd, errno ? errno : EIO); + return (void *)-1; +} + +// Run from one of the worker threads. +// Convert a passed in array of BAMs (sp_bams) and converts to a block +// of text SAM records (sp_lines). +static void *sam_format_worker(void *arg) { + sp_bams *gb = (sp_bams *)arg; + sp_lines *gl = NULL; + int i; + SAM_state *fd = gb->fd; + htsFile *fp = fd->fp; + + // Use a block of SAM strings we had earlier if available. + pthread_mutex_lock(&fd->lines_m); + if (fd->lines) { + gl = fd->lines; + fd->lines = gl->next; + } + pthread_mutex_unlock(&fd->lines_m); + + if (gl == NULL) { + gl = calloc(1, sizeof(*gl)); + if (!gl) { + sam_state_err(fd, ENOMEM); + return NULL; + } + gl->alloc = gl->data_size = 0; + gl->data = NULL; + } + gl->serial = gb->serial; + gl->next = NULL; + + kstring_t ks = {0, gl->alloc, gl->data}; + + for (i = 0; i < gb->nbams; i++) { + if (sam_format1_append(fd->h, &gb->bams[i], &ks) < 0) { + sam_state_err(fd, errno ? errno : EIO); + goto err; + } + kputc('\n', &ks); + } + + pthread_mutex_lock(&fd->lines_m); + gl->data_size = ks.l; + gl->alloc = ks.m; + gl->data = ks.s; + + if (fp->idx) { + // Keep hold of the bam array a little longer as + // sam_dispatcher_write needs to use them for building the index. + gl->bams = gb; + } else { + // Add bam array to free-list + gb->next = fd->bams; + fd->bams = gb; + } + pthread_mutex_unlock(&fd->lines_m); + + return gl; + + err: + // Possible race between this and fd->curr_bam. + // Easier to not free and leave it on the input list so it + // gets freed there instead? + // sam_free_sp_bams(gb); + if (gl) { + free(gl->data); + free(gl); + } + return NULL; +} + +int sam_set_thread_pool(htsFile *fp, htsThreadPool *p) { + if (fp->state) + return 0; + + if (!(fp->state = sam_state_create(fp))) + return -1; + SAM_state *fd = (SAM_state *)fp->state; + + pthread_mutex_init(&fd->lines_m, NULL); + pthread_mutex_init(&fd->command_m, NULL); + pthread_cond_init(&fd->command_c, NULL); + fd->p = p->pool; + int qsize = p->qsize; + if (!qsize) + qsize = 2*hts_tpool_size(fd->p); + fd->q = hts_tpool_process_init(fd->p, qsize, 0); + if (!fd->q) { + sam_state_destroy(fp); + return -1; + } + + if (fp->format.compression == bgzf) + return bgzf_thread_pool(fp->fp.bgzf, p->pool, p->qsize); + + return 0; +} + +int sam_set_threads(htsFile *fp, int nthreads) { + if (nthreads <= 0) + return 0; + + htsThreadPool p; + p.pool = hts_tpool_init(nthreads); + p.qsize = nthreads*2; + + int ret = sam_set_thread_pool(fp, &p); + if (ret < 0) + return ret; + + SAM_state *fd = (SAM_state *)fp->state; + fd->own_pool = 1; + + return 0; +} + +typedef struct { + kstring_t name; + kstring_t comment; // NB: pointer into name, do not free + kstring_t seq; + kstring_t qual; + int casava; + int aux; + int rnum; + char BC[3]; // aux tag ID for barcode + khash_t(tag) *tags; // which aux tags to use (if empty, use all). + char nprefix; + int sra_names; +} fastq_state; + +// Initialise fastq state. +// Name char of '@' or '>' distinguishes fastq vs fasta variant +static fastq_state *fastq_state_init(int name_char) { + fastq_state *x = (fastq_state *)calloc(1, sizeof(*x)); + if (!x) + return NULL; + strcpy(x->BC, "BC"); + x->nprefix = name_char; + + return x; +} + +void fastq_state_destroy(htsFile *fp) { + if (fp->state) { + fastq_state *x = (fastq_state *)fp->state; + if (x->tags) + kh_destroy(tag, x->tags); + ks_free(&x->name); + ks_free(&x->seq); + ks_free(&x->qual); + free(fp->state); + } +} + +int fastq_state_set(samFile *fp, enum hts_fmt_option opt, ...) { + va_list args; + + if (!fp) + return -1; + if (!fp->state) + if (!(fp->state = fastq_state_init(fp->format.format == fastq_format + ? '@' : '>'))) + return -1; + + fastq_state *x = (fastq_state *)fp->state; + + switch (opt) { + case FASTQ_OPT_CASAVA: + x->casava = 1; + break; + + case FASTQ_OPT_NAME2: + x->sra_names = 1; + break; + + case FASTQ_OPT_AUX: { + va_start(args, opt); + x->aux = 1; + char *tag = va_arg(args, char *); + va_end(args); + if (tag && strcmp(tag, "1") != 0) { + if (!x->tags) + if (!(x->tags = kh_init(tag))) + return -1; + + size_t i, tlen = strlen(tag); + for (i = 0; i+3 <= tlen+1; i += 3) { + if (tag[i+0] == ',' || tag[i+1] == ',' || + !(tag[i+2] == ',' || tag[i+2] == '\0')) { + hts_log_warning("Bad tag format '%.3s'; skipping option", tag+i); + break; + } + int ret, tcode = tag[i+0]*256 + tag[i+1]; + kh_put(tag, x->tags, tcode, &ret); + if (ret < 0) + return -1; + } + } + break; + } + + case FASTQ_OPT_BARCODE: { + va_start(args, opt); + char *bc = va_arg(args, char *); + va_end(args); + strncpy(x->BC, bc, 2); + x->BC[2] = 0; + break; + } + + case FASTQ_OPT_RNUM: + x->rnum = 1; + break; + + default: + break; + } + return 0; +} + +static int fastq_parse1(htsFile *fp, bam1_t *b) { + fastq_state *x = (fastq_state *)fp->state; + size_t i, l; + int ret = 0; + + if (fp->format.format == fasta_format && fp->line.s) { + // For FASTA we've already read the >name line; steal it + // Not the most efficient, but we don't optimise for fasta reading. + if (fp->line.l == 0) + return -1; // EOF + + free(x->name.s); + x->name = fp->line; + fp->line.l = fp->line.m = 0; + fp->line.s = NULL; + } else { + // Read a FASTQ format entry. + ret = hts_getline(fp, KS_SEP_LINE, &x->name); + if (ret == -1) + return -1; // EOF + else if (ret < -1) + return ret; // ERR + } + + // Name + if (*x->name.s != x->nprefix) + return -2; + + // Reverse the SRA strangeness of putting the run_name.number before + // the read name. + i = 0; + char *name = x->name.s+1; + if (x->sra_names) { + char *cp = strpbrk(x->name.s, " \t"); + if (cp) { + while (*cp == ' ' || *cp == '\t') + cp++; + *--cp = '@'; + i = cp - x->name.s; + name = cp+1; + } + } + + l = x->name.l; + char *s = x->name.s; + while (i < l && !isspace_c(s[i])) + i++; + if (i < l) { + s[i] = 0; + x->name.l = i++; + } + + // Comment; a kstring struct, but pointer into name line. (Do not free) + while (i < l && isspace_c(s[i])) + i++; + x->comment.s = s+i; + x->comment.l = l - i; + + // Seq + x->seq.l = 0; + for (;;) { + if ((ret = hts_getline(fp, KS_SEP_LINE, &fp->line)) < 0) + if (fp->format.format == fastq_format || ret < -1) + return -2; + if (ret == -1 || + *fp->line.s == (fp->format.format == fastq_format ? '+' : '>')) + break; + if (kputsn(fp->line.s, fp->line.l, &x->seq) < 0) + return -2; + } + + // Qual + if (fp->format.format == fastq_format) { + size_t remainder = x->seq.l; + x->qual.l = 0; + do { + if (hts_getline(fp, KS_SEP_LINE, &fp->line) < 0) + return -2; + if (fp->line.l > remainder) + return -2; + if (kputsn(fp->line.s, fp->line.l, &x->qual) < 0) + return -2; + remainder -= fp->line.l; + } while (remainder > 0); + + // Decr qual + for (i = 0; i < x->qual.l; i++) + x->qual.s[i] -= '!'; + } + + int flag = BAM_FUNMAP; int pflag = BAM_FMUNMAP | BAM_FPAIRED; + if (x->name.l > 2 && + x->name.s[x->name.l-2] == '/' && + isdigit_c(x->name.s[x->name.l-1])) { + switch(x->name.s[x->name.l-1]) { + case '1': flag |= BAM_FREAD1 | pflag; break; + case '2': flag |= BAM_FREAD2 | pflag; break; + default : flag |= BAM_FREAD1 | BAM_FREAD2 | pflag; break; + } + x->name.s[x->name.l-=2] = 0; + } + + // Convert to BAM + ret = bam_set1(b, + x->name.s + x->name.l - name, name, + flag, + -1, -1, 0, // ref '*', pos, mapq, + 0, NULL, // no cigar, + -1, -1, 0, // mate + x->seq.l, x->seq.s, x->qual.s, + 0); + + // Identify Illumina CASAVA strings. + // ::: + char *barcode = NULL; + int barcode_len = 0; + kstring_t *kc = &x->comment; + char *endptr; + if (x->casava && + // \d:[YN]:\d+:[ACGTN]+ + kc->l > 6 && (kc->s[1] | kc->s[3]) == ':' && isdigit_c(kc->s[0]) && + strtol(kc->s+4, &endptr, 10) >= 0 && endptr != kc->s+4 + && *endptr == ':') { + + // read num + switch(kc->s[0]) { + case '1': b->core.flag |= BAM_FREAD1 | pflag; break; + case '2': b->core.flag |= BAM_FREAD2 | pflag; break; + default : b->core.flag |= BAM_FREAD1 | BAM_FREAD2 | pflag; break; + } + + if (kc->s[2] == 'Y') + b->core.flag |= BAM_FQCFAIL; + + // Barcode, maybe numeric in which case we skip it + if (!isdigit_c(endptr[1])) { + barcode = endptr+1; + for (i = barcode - kc->s; i < kc->l; i++) + if (isspace_c(kc->s[i])) + break; + + kc->s[i] = 0; + barcode_len = i+1-(barcode - kc->s); + } + } + + if (ret >= 0 && barcode_len) + if (bam_aux_append(b, x->BC, 'Z', barcode_len, (uint8_t *)barcode) < 0) + ret = -2; + + if (!x->aux) + return ret; + + // Identify any SAM style aux tags in comments too. + if (aux_parse(&kc->s[barcode_len], kc->s + kc->l, b, 1, x->tags) < 0) + ret = -2; + + return ret; +} + +// Internal component of sam_read1 below +static inline int sam_read1_bam(htsFile *fp, sam_hdr_t *h, bam1_t *b) { + int ret = bam_read1(fp->fp.bgzf, b); + if (h && ret >= 0) { + if (b->core.tid >= h->n_targets || b->core.tid < -1 || + b->core.mtid >= h->n_targets || b->core.mtid < -1) { + errno = ERANGE; + return -3; + } + } + return ret; +} + +// Internal component of sam_read1 below +static inline int sam_read1_cram(htsFile *fp, sam_hdr_t *h, bam1_t **b) { + int ret = cram_get_bam_seq(fp->fp.cram, b); + if (ret < 0) + return cram_eof(fp->fp.cram) ? -1 : -2; + + if (bam_tag2cigar(*b, 1, 1) < 0) + return -2; + + return ret; +} + +// Internal component of sam_read1 below +static inline int sam_read1_sam(htsFile *fp, sam_hdr_t *h, bam1_t *b) { + int ret; + + // Consume 1st line after header parsing as it wasn't using peek + if (fp->line.l != 0) { + ret = sam_parse1(&fp->line, h, b); + fp->line.l = 0; + return ret; + } + + if (fp->state) { + SAM_state *fd = (SAM_state *)fp->state; + + if (fp->format.compression == bgzf && fp->fp.bgzf->seeked) { + // We don't support multi-threaded SAM parsing with seeks yet. + int ret; + if ((ret = sam_state_destroy(fp)) < 0) { + errno = -ret; + return -2; + } + if (bgzf_seek(fp->fp.bgzf, fp->fp.bgzf->seeked, SEEK_SET) < 0) + return -1; + fp->fp.bgzf->seeked = 0; + goto err_recover; + } + + if (!fd->h) { + fd->h = h; + fd->h->ref_count++; + // Ensure hrecs is initialised now as we don't want multiple + // threads trying to do this simultaneously. + if (!fd->h->hrecs && sam_hdr_fill_hrecs(fd->h) < 0) + return -2; + + // We can only do this once we've got a header + if (pthread_create(&fd->dispatcher, NULL, sam_dispatcher_read, + fp) != 0) + return -2; + fd->dispatcher_set = 1; + } + + if (fd->h != h) { + hts_log_error("SAM multi-threaded decoding does not support changing header"); + return -1; + } + + sp_bams *gb = fd->curr_bam; + if (!gb) { + if (fd->errcode) { + // In case reader failed + errno = fd->errcode; + return -2; + } + hts_tpool_result *r = hts_tpool_next_result_wait(fd->q); + if (!r) + return -2; + fd->curr_bam = gb = (sp_bams *)hts_tpool_result_data(r); + hts_tpool_delete_result(r, 0); + } + if (!gb) + return fd->errcode ? -2 : -1; + bam1_t *b_array = (bam1_t *)gb->bams; + if (fd->curr_idx < gb->nbams) + if (!bam_copy1(b, &b_array[fd->curr_idx++])) + return -2; + if (fd->curr_idx == gb->nbams) { + pthread_mutex_lock(&fd->lines_m); + gb->next = fd->bams; + fd->bams = gb; + pthread_mutex_unlock(&fd->lines_m); + + fd->curr_bam = NULL; + fd->curr_idx = 0; + // Consider prefetching next record? I.e. + // } else { + // __builtin_prefetch(&b_array[fd->curr_idx], 0, 3); + } + + ret = 0; + + } else { + err_recover: + ret = hts_getline(fp, KS_SEP_LINE, &fp->line); + if (ret < 0) return ret; + + ret = sam_parse1(&fp->line, h, b); + fp->line.l = 0; + if (ret < 0) { + hts_log_warning("Parse error at line %lld", (long long)fp->lineno); + if (h && h->ignore_sam_err) goto err_recover; + } + } + + return ret; +} + +// Returns 0 on success, +// -1 on EOF, +// <-1 on error +int sam_read1(htsFile *fp, sam_hdr_t *h, bam1_t *b) +{ + int ret, pass_filter; + + do { + switch (fp->format.format) { + case bam: + ret = sam_read1_bam(fp, h, b); + break; + + case cram: + ret = sam_read1_cram(fp, h, &b); + break; + + case sam: + ret = sam_read1_sam(fp, h, b); + break; + + case fasta_format: + case fastq_format: { + fastq_state *x = (fastq_state *)fp->state; + if (!x) { + if (!(fp->state = fastq_state_init(fp->format.format + == fastq_format ? '@' : '>'))) + return -2; + } + + return fastq_parse1(fp, b); + } + + case empty_format: + errno = EPIPE; + return -3; + + default: + errno = EFTYPE; + return -3; + } + + pass_filter = (ret >= 0 && fp->filter) + ? sam_passes_filter(h, b, fp->filter) + : 1; + } while (pass_filter == 0); + + return pass_filter < 0 ? -2 : ret; +} + +// With gcc, -O3 or -ftree-loop-vectorize is really key here as otherwise +// this code isn't vectorised and runs far slower than is necessary (even +// with the restrict keyword being used). +static inline void HTS_OPT3 +add33(uint8_t *a, const uint8_t * b, int32_t len) { + uint32_t i; + for (i = 0; i < len; i++) + a[i] = b[i]+33; +} + +static int sam_format1_append(const bam_hdr_t *h, const bam1_t *b, kstring_t *str) +{ + int i, r = 0; + uint8_t *s, *end; + const bam1_core_t *c = &b->core; + + if (c->l_qname == 0) + return -1; + r |= kputsn_(bam_get_qname(b), c->l_qname-1-c->l_extranul, str); + r |= kputc_('\t', str); // query name + r |= kputw(c->flag, str); r |= kputc_('\t', str); // flag + if (c->tid >= 0) { // chr + r |= kputs(h->target_name[c->tid] , str); + r |= kputc_('\t', str); + } else r |= kputsn_("*\t", 2, str); + r |= kputll(c->pos + 1, str); r |= kputc_('\t', str); // pos + r |= kputw(c->qual, str); r |= kputc_('\t', str); // qual + if (c->n_cigar) { // cigar + uint32_t *cigar = bam_get_cigar(b); + for (i = 0; i < c->n_cigar; ++i) { + r |= kputw(bam_cigar_oplen(cigar[i]), str); + r |= kputc_(bam_cigar_opchr(cigar[i]), str); + } + } else r |= kputc_('*', str); + r |= kputc_('\t', str); + if (c->mtid < 0) r |= kputsn_("*\t", 2, str); // mate chr + else if (c->mtid == c->tid) r |= kputsn_("=\t", 2, str); + else { + r |= kputs(h->target_name[c->mtid], str); + r |= kputc_('\t', str); + } + r |= kputll(c->mpos + 1, str); r |= kputc_('\t', str); // mate pos + r |= kputll(c->isize, str); r |= kputc_('\t', str); // template len + if (c->l_qseq) { // seq and qual + uint8_t *s = bam_get_seq(b); + if (ks_resize(str, str->l+2+2*c->l_qseq) < 0) goto mem_err; + char *cp = str->s + str->l; + + // Sequence, 2 bases at a time + nibble2base(s, cp, c->l_qseq); + cp[c->l_qseq] = '\t'; + cp += c->l_qseq+1; + + // Quality + s = bam_get_qual(b); + i = 0; + if (s[0] == 0xff) { + cp[i++] = '*'; + } else { + add33((uint8_t *)cp, s, c->l_qseq); // cp[i] = s[i]+33; + i = c->l_qseq; + } + cp[i] = 0; + cp += i; + str->l = cp - str->s; + } else r |= kputsn_("*\t*", 3, str); + + s = bam_get_aux(b); // aux + end = b->data + b->l_data; + + while (end - s >= 4) { + r |= kputc_('\t', str); + if ((s = (uint8_t *)sam_format_aux1(s, s[2], s+3, end, str)) == NULL) + goto bad_aux; + } + r |= kputsn("", 0, str); // nul terminate + if (r < 0) goto mem_err; + + return str->l; + + bad_aux: + hts_log_error("Corrupted aux data for read %.*s flag %d", + b->core.l_qname, bam_get_qname(b), b->core.flag); + errno = EINVAL; + return -1; + + mem_err: + hts_log_error("Out of memory"); + errno = ENOMEM; + return -1; +} + +int sam_format1(const bam_hdr_t *h, const bam1_t *b, kstring_t *str) +{ + str->l = 0; + return sam_format1_append(h, b, str); +} + +static inline uint8_t *skip_aux(uint8_t *s, uint8_t *end); +int fastq_format1(fastq_state *x, const bam1_t *b, kstring_t *str) +{ + unsigned flag = b->core.flag; + int i, e = 0, len = b->core.l_qseq; + uint8_t *seq, *qual; + + str->l = 0; + + // Name + if (kputc(x->nprefix, str) == EOF || kputs(bam_get_qname(b), str) == EOF) + return -1; + + // /1 or /2 suffix + if (x && x->rnum && (flag & BAM_FPAIRED)) { + int r12 = flag & (BAM_FREAD1 | BAM_FREAD2); + if (r12 == BAM_FREAD1) { + if (kputs("/1", str) == EOF) + return -1; + } else if (r12 == BAM_FREAD2) { + if (kputs("/2", str) == EOF) + return -1; + } + } + + // Illumina CASAVA tag. + // This is ::: + if (x && x->casava) { + int rnum = (flag & BAM_FREAD1)? 1 : (flag & BAM_FREAD2)? 2 : 0; + char filtered = (flag & BAM_FQCFAIL)? 'Y' : 'N'; + uint8_t *bc = bam_aux_get(b, x->BC); + if (ksprintf(str, " %d:%c:0:%s", rnum, filtered, + bc ? (char *)bc+1 : "0") < 0) + return -1; + + if (bc && (*bc != 'Z' || (!isupper_c(bc[1]) && !islower_c(bc[1])))) { + hts_log_warning("BC tag starts with non-sequence base; using '0'"); + str->l -= strlen((char *)bc)-2; // limit to 1 char + str->s[str->l-1] = '0'; + str->s[str->l] = 0; + bc = NULL; + } + + // Replace any non-alpha with '+'. Ie seq-seq to seq+seq + if (bc) { + int l = strlen((char *)bc+1); + char *c = (char *)str->s + str->l - l; + for (i = 0; i < l; i++) { + if (!isalpha_c(c[i])) + c[i] = '+'; + else if (islower_c(c[i])) + c[i] = toupper_c(c[i]); + } + } + } + + // Aux tags + if (x && x->aux) { + uint8_t *s = bam_get_aux(b), *end = b->data + b->l_data; + while (s && end - s >= 4) { + int tt = s[0]*256 + s[1]; + if (x->tags == NULL || + kh_get(tag, x->tags, tt) != kh_end(x->tags)) { + e |= kputc_('\t', str) < 0; + if (!(s = (uint8_t *)sam_format_aux1(s, s[2], s+3, end, str))) + return -1; + } else { + s = skip_aux(s+2, end); + } + } + e |= kputsn("", 0, str) < 0; // nul terminate + } + + if (ks_resize(str, str->l + 1 + len+1 + 2 + len+1 + 1) < 0) return -1; + e |= kputc_('\n', str) < 0; + + // Seq line + seq = bam_get_seq(b); + if (flag & BAM_FREVERSE) + for (i = len-1; i >= 0; i--) + e |= kputc_("!TGKCYSBAWRDMHVN"[bam_seqi(seq, i)], str) < 0; + else + for (i = 0; i < len; i++) + e |= kputc_(seq_nt16_str[bam_seqi(seq, i)], str) < 0; + + + // Qual line + if (x->nprefix == '@') { + kputsn("\n+\n", 3, str); + qual = bam_get_qual(b); + if (qual[0] == 0xff) + for (i = 0; i < len; i++) + e |= kputc_('B', str) < 0; + else if (flag & BAM_FREVERSE) + for (i = len-1; i >= 0; i--) + e |= kputc_(33 + qual[i], str) < 0; + else + for (i = 0; i < len; i++) + e |= kputc_(33 + qual[i], str) < 0; + + } + e |= kputc('\n', str) < 0; + + return e ? -1 : str->l; +} + +// Sadly we need to be able to modify the bam_hdr here so we can +// reference count the structure. +int sam_write1(htsFile *fp, const sam_hdr_t *h, const bam1_t *b) +{ + switch (fp->format.format) { + case binary_format: + fp->format.category = sequence_data; + fp->format.format = bam; + /* fall-through */ + case bam: + return bam_write_idx1(fp, h, b); + + case cram: + return cram_put_bam_seq(fp->fp.cram, (bam1_t *)b); + + case text_format: + fp->format.category = sequence_data; + fp->format.format = sam; + /* fall-through */ + case sam: + if (fp->state) { + SAM_state *fd = (SAM_state *)fp->state; + + // Threaded output + if (!fd->h) { + // NB: discard const. We don't actually modify sam_hdr_t here, + // just data pointed to by it (which is a bit weasely still), + // but out cached pointer must be non-const as we want to + // destroy it later on and sam_hdr_destroy takes non-const. + // + // We do this because some tools do sam_hdr_destroy; sam_close + // while others do sam_close; sam_hdr_destroy. The former is + // an issue as we need the header still when flushing. + fd->h = (sam_hdr_t *)h; + fd->h->ref_count++; + + if (pthread_create(&fd->dispatcher, NULL, sam_dispatcher_write, + fp) != 0) + return -2; + fd->dispatcher_set = 1; + } + + if (fd->h != h) { + hts_log_error("SAM multi-threaded decoding does not support changing header"); + return -2; + } + + // Find a suitable BAM array to copy to + sp_bams *gb = fd->curr_bam; + if (!gb) { + pthread_mutex_lock(&fd->lines_m); + if (fd->bams) { + fd->curr_bam = gb = fd->bams; + fd->bams = gb->next; + gb->next = NULL; + gb->nbams = 0; + gb->bam_mem = 0; + pthread_mutex_unlock(&fd->lines_m); + } else { + pthread_mutex_unlock(&fd->lines_m); + if (!(gb = calloc(1, sizeof(*gb)))) return -1; + if (!(gb->bams = calloc(SAM_NBAM, sizeof(*gb->bams)))) { + free(gb); + return -1; + } + gb->nbams = 0; + gb->abams = SAM_NBAM; + gb->bam_mem = 0; + gb->fd = fd; + fd->curr_idx = 0; + fd->curr_bam = gb; + } + } + + if (!bam_copy1(&gb->bams[gb->nbams++], b)) + return -2; + gb->bam_mem += b->l_data + sizeof(*b); + + // Dispatch if full + if (gb->nbams == SAM_NBAM || gb->bam_mem > SAM_NBYTES*0.8) { + gb->serial = fd->serial++; + pthread_mutex_lock(&fd->command_m); + if (fd->errcode != 0) { + pthread_mutex_unlock(&fd->command_m); + return -fd->errcode; + } + if (hts_tpool_dispatch3(fd->p, fd->q, sam_format_worker, gb, + cleanup_sp_bams, + cleanup_sp_lines, 0) < 0) { + pthread_mutex_unlock(&fd->command_m); + return -1; + } + pthread_mutex_unlock(&fd->command_m); + fd->curr_bam = NULL; + } + + // Dummy value as we don't know how long it really is. + // We could track file sizes via a SAM_state field, but I don't think + // it is necessary. + return 1; + } else { + if (sam_format1(h, b, &fp->line) < 0) return -1; + kputc('\n', &fp->line); + if (fp->is_bgzf) { + if (bgzf_flush_try(fp->fp.bgzf, fp->line.l) < 0) + return -1; + if ( bgzf_write(fp->fp.bgzf, fp->line.s, fp->line.l) != fp->line.l ) return -1; + } else { + if ( hwrite(fp->fp.hfile, fp->line.s, fp->line.l) != fp->line.l ) return -1; + } + + if (fp->idx) { + if (fp->format.compression == bgzf) { + if (bgzf_idx_push(fp->fp.bgzf, fp->idx, b->core.tid, b->core.pos, bam_endpos(b), + bgzf_tell(fp->fp.bgzf), !(b->core.flag&BAM_FUNMAP)) < 0) { + hts_log_error("Read '%s' with ref_name='%s', ref_length=%"PRIhts_pos", flags=%d, pos=%"PRIhts_pos" cannot be indexed", + bam_get_qname(b), sam_hdr_tid2name(h, b->core.tid), sam_hdr_tid2len(h, b->core.tid), b->core.flag, b->core.pos+1); + return -1; + } + } else { + if (hts_idx_push(fp->idx, b->core.tid, b->core.pos, bam_endpos(b), + bgzf_tell(fp->fp.bgzf), !(b->core.flag&BAM_FUNMAP)) < 0) { + hts_log_error("Read '%s' with ref_name='%s', ref_length=%"PRIhts_pos", flags=%d, pos=%"PRIhts_pos" cannot be indexed", + bam_get_qname(b), sam_hdr_tid2name(h, b->core.tid), sam_hdr_tid2len(h, b->core.tid), b->core.flag, b->core.pos+1); + return -1; + } + } + } + + return fp->line.l; + } + + + case fasta_format: + case fastq_format: { + fastq_state *x = (fastq_state *)fp->state; + if (!x) { + if (!(fp->state = fastq_state_init(fp->format.format + == fastq_format ? '@' : '>'))) + return -2; + } + + if (fastq_format1(fp->state, b, &fp->line) < 0) + return -1; + if (fp->is_bgzf) { + if (bgzf_flush_try(fp->fp.bgzf, fp->line.l) < 0) + return -1; + if (bgzf_write(fp->fp.bgzf, fp->line.s, fp->line.l) != fp->line.l) + return -1; + } else { + if (hwrite(fp->fp.hfile, fp->line.s, fp->line.l) != fp->line.l) + return -1; + } + return fp->line.l; + } + + default: + errno = EBADF; + return -1; + } +} + +/************************ + *** Auxiliary fields *** + ************************/ +#ifndef HTS_LITTLE_ENDIAN +static int aux_to_le(char type, uint8_t *out, const uint8_t *in, size_t len) { + int tsz = aux_type2size(type); + + if (tsz >= 2 && tsz <= 8 && (len & (tsz - 1)) != 0) return -1; + + switch (tsz) { + case 'H': case 'Z': case 1: // Trivial + memcpy(out, in, len); + break; + +#define aux_val_to_le(type_t, store_le) do { \ + type_t v; \ + size_t i; \ + for (i = 0; i < len; i += sizeof(type_t), out += sizeof(type_t)) { \ + memcpy(&v, in + i, sizeof(type_t)); \ + store_le(v, out); \ + } \ + } while (0) + + case 2: aux_val_to_le(uint16_t, u16_to_le); break; + case 4: aux_val_to_le(uint32_t, u32_to_le); break; + case 8: aux_val_to_le(uint64_t, u64_to_le); break; + +#undef aux_val_to_le + + case 'B': { // Recurse! + uint32_t n; + if (len < 5) return -1; + memcpy(&n, in + 1, 4); + out[0] = in[0]; + u32_to_le(n, out + 1); + return aux_to_le(in[0], out + 5, in + 5, len - 5); + } + + default: // Unknown type code + return -1; + } + + + + return 0; +} +#endif + +int bam_aux_append(bam1_t *b, const char tag[2], char type, int len, const uint8_t *data) +{ + uint32_t new_len; + + assert(b->l_data >= 0); + new_len = b->l_data + 3 + len; + if (new_len > INT32_MAX || new_len < b->l_data) goto nomem; + + if (realloc_bam_data(b, new_len) < 0) return -1; + + b->data[b->l_data] = tag[0]; + b->data[b->l_data + 1] = tag[1]; + b->data[b->l_data + 2] = type; + +#ifdef HTS_LITTLE_ENDIAN + memcpy(b->data + b->l_data + 3, data, len); +#else + if (aux_to_le(type, b->data + b->l_data + 3, data, len) != 0) { + errno = EINVAL; + return -1; + } +#endif + + b->l_data = new_len; + + return 0; + + nomem: + errno = ENOMEM; + return -1; +} + +static inline uint8_t *skip_aux(uint8_t *s, uint8_t *end) +{ + int size; + uint32_t n; + if (s >= end) return end; + size = aux_type2size(*s); ++s; // skip type + switch (size) { + case 'Z': + case 'H': + while (s < end && *s) ++s; + return s < end ? s + 1 : end; + case 'B': + if (end - s < 5) return NULL; + size = aux_type2size(*s); ++s; + n = le_to_u32(s); + s += 4; + if (size == 0 || end - s < size * n) return NULL; + return s + size * n; + case 0: + return NULL; + default: + if (end - s < size) return NULL; + return s + size; + } +} + +uint8_t *bam_aux_first(const bam1_t *b) +{ + uint8_t *s = bam_get_aux(b); + uint8_t *end = b->data + b->l_data; + if (end - s <= 2) { errno = ENOENT; return NULL; } + return s+2; +} + +uint8_t *bam_aux_next(const bam1_t *b, const uint8_t *s) +{ + uint8_t *end = b->data + b->l_data; + uint8_t *next = s? skip_aux((uint8_t *) s, end) : end; + if (next == NULL) goto bad_aux; + if (end - next <= 2) { errno = ENOENT; return NULL; } + return next+2; + + bad_aux: + hts_log_error("Corrupted aux data for read %s flag %d", + bam_get_qname(b), b->core.flag); + errno = EINVAL; + return NULL; +} + +uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]) +{ + uint8_t *s; + for (s = bam_aux_first(b); s; s = bam_aux_next(b, s)) + if (s[-2] == tag[0] && s[-1] == tag[1]) { + // Check the tag value is valid and complete + uint8_t *e = skip_aux(s, b->data + b->l_data); + if (e == NULL) goto bad_aux; + if ((*s == 'Z' || *s == 'H') && *(e - 1) != '\0') goto bad_aux; + + return s; + } + + // errno now as set by bam_aux_first()/bam_aux_next() + return NULL; + + bad_aux: + hts_log_error("Corrupted aux data for read %s flag %d", + bam_get_qname(b), b->core.flag); + errno = EINVAL; + return NULL; +} + +int bam_aux_del(bam1_t *b, uint8_t *s) +{ + s = bam_aux_remove(b, s); + return (s || errno == ENOENT)? 0 : -1; +} + +uint8_t *bam_aux_remove(bam1_t *b, uint8_t *s) +{ + uint8_t *end = b->data + b->l_data; + uint8_t *next = skip_aux(s, end); + if (next == NULL) goto bad_aux; + + b->l_data -= next - (s-2); + if (next >= end) { errno = ENOENT; return NULL; } + + memmove(s-2, next, end - next); + return s; + + bad_aux: + hts_log_error("Corrupted aux data for read %s flag %d", + bam_get_qname(b), b->core.flag); + errno = EINVAL; + return NULL; +} + +int bam_aux_update_str(bam1_t *b, const char tag[2], int len, const char *data) +{ + // FIXME: This is not at all efficient! + size_t ln = len >= 0 ? len : strlen(data) + 1; + size_t old_ln = 0; + int need_nul = ln == 0 || data[ln - 1] != '\0'; + int save_errno = errno; + int new_tag = 0; + uint8_t *s = bam_aux_get(b,tag), *e; + + if (s) { // Replacing existing tag + char type = *s; + if (type != 'Z') { + hts_log_error("Called bam_aux_update_str for type '%c' instead of 'Z'", type); + errno = EINVAL; + return -1; + } + s++; + e = memchr(s, '\0', b->data + b->l_data - s); + old_ln = (e ? e - s : b->data + b->l_data - s) + 1; + s -= 3; + } else { + if (errno != ENOENT) { // Invalid aux data, give up + return -1; + } else { // Tag doesn't exist - put it on the end + errno = save_errno; + s = b->data + b->l_data; + new_tag = 3; + } + } + + if (old_ln < ln + need_nul + new_tag) { + ptrdiff_t s_offset = s - b->data; + if (possibly_expand_bam_data(b, ln + need_nul + new_tag - old_ln) < 0) + return -1; + s = b->data + s_offset; + } + if (!new_tag) { + memmove(s + 3 + ln + need_nul, + s + 3 + old_ln, + b->l_data - (s + 3 - b->data) - old_ln); + } + b->l_data += new_tag + ln + need_nul - old_ln; + + s[0] = tag[0]; + s[1] = tag[1]; + s[2] = 'Z'; + memmove(s+3,data,ln); + if (need_nul) s[3 + ln] = '\0'; + return 0; +} + +int bam_aux_update_int(bam1_t *b, const char tag[2], int64_t val) +{ + uint32_t sz, old_sz = 0, new = 0; + uint8_t *s, type; + + if (val < INT32_MIN || val > UINT32_MAX) { + errno = EOVERFLOW; + return -1; + } + if (val < INT16_MIN) { type = 'i'; sz = 4; } + else if (val < INT8_MIN) { type = 's'; sz = 2; } + else if (val < 0) { type = 'c'; sz = 1; } + else if (val < UINT8_MAX) { type = 'C'; sz = 1; } + else if (val < UINT16_MAX) { type = 'S'; sz = 2; } + else { type = 'I'; sz = 4; } + + s = bam_aux_get(b, tag); + if (s) { // Tag present - how big was the old one? + switch (*s) { + case 'c': case 'C': old_sz = 1; break; + case 's': case 'S': old_sz = 2; break; + case 'i': case 'I': old_sz = 4; break; + default: errno = EINVAL; return -1; // Not an integer + } + } else { + if (errno == ENOENT) { // Tag doesn't exist - add a new one + s = b->data + b->l_data; + new = 1; + } else { // Invalid aux data, give up. + return -1; + } + } + + if (new || old_sz < sz) { + // Make room for new tag + ptrdiff_t s_offset = s - b->data; + if (possibly_expand_bam_data(b, (new ? 3 : 0) + sz - old_sz) < 0) + return -1; + s = b->data + s_offset; + if (new) { // Add tag id + *s++ = tag[0]; + *s++ = tag[1]; + } else { // Shift following data so we have space + memmove(s + sz, s + old_sz, b->l_data - s_offset - old_sz); + } + } else { + // Reuse old space. Data value may be bigger than necessary but + // we avoid having to move everything else + sz = old_sz; + type = (val < 0 ? "\0cs\0i" : "\0CS\0I")[old_sz]; + assert(type > 0); + } + *s++ = type; +#ifdef HTS_LITTLE_ENDIAN + memcpy(s, &val, sz); +#else + switch (sz) { + case 4: u32_to_le(val, s); break; + case 2: u16_to_le(val, s); break; + default: *s = val; break; + } +#endif + b->l_data += (new ? 3 : 0) + sz - old_sz; + return 0; +} + +int bam_aux_update_float(bam1_t *b, const char tag[2], float val) +{ + uint8_t *s = bam_aux_get(b, tag); + int shrink = 0, new = 0; + + if (s) { // Tag present - what was it? + switch (*s) { + case 'f': break; + case 'd': shrink = 1; break; + default: errno = EINVAL; return -1; // Not a float + } + } else { + if (errno == ENOENT) { // Tag doesn't exist - add a new one + new = 1; + } else { // Invalid aux data, give up. + return -1; + } + } + + if (new) { // Ensure there's room + if (possibly_expand_bam_data(b, 3 + 4) < 0) + return -1; + s = b->data + b->l_data; + *s++ = tag[0]; + *s++ = tag[1]; + } else if (shrink) { // Convert non-standard double tag to float + memmove(s + 5, s + 9, b->l_data - ((s + 9) - b->data)); + b->l_data -= 4; + } + *s++ = 'f'; + float_to_le(val, s); + if (new) b->l_data += 7; + + return 0; +} + +int bam_aux_update_array(bam1_t *b, const char tag[2], + uint8_t type, uint32_t items, void *data) +{ + uint8_t *s = bam_aux_get(b, tag); + size_t old_sz = 0, new_sz; + int new = 0; + + if (s) { // Tag present + if (*s != 'B') { errno = EINVAL; return -1; } + old_sz = aux_type2size(s[1]); + if (old_sz < 1 || old_sz > 4) { errno = EINVAL; return -1; } + old_sz *= le_to_u32(s + 2); + } else { + if (errno == ENOENT) { // Tag doesn't exist - add a new one + s = b->data + b->l_data; + new = 1; + } else { // Invalid aux data, give up. + return -1; + } + } + + new_sz = aux_type2size(type); + if (new_sz < 1 || new_sz > 4) { errno = EINVAL; return -1; } + if (items > INT32_MAX / new_sz) { errno = ENOMEM; return -1; } + new_sz *= items; + + if (new || old_sz < new_sz) { + // Make room for new tag + ptrdiff_t s_offset = s - b->data; + if (possibly_expand_bam_data(b, (new ? 8 : 0) + new_sz - old_sz) < 0) + return -1; + s = b->data + s_offset; + } + if (new) { // Add tag id and type + *s++ = tag[0]; + *s++ = tag[1]; + *s = 'B'; + b->l_data += 8 + new_sz; + } else if (old_sz != new_sz) { // shift following data if necessary + memmove(s + 6 + new_sz, s + 6 + old_sz, + b->l_data - ((s + 6 + old_sz) - b->data)); + b->l_data -= old_sz; + b->l_data += new_sz; + } + + s[1] = type; + u32_to_le(items, s + 2); +#ifdef HTS_LITTLE_ENDIAN + memcpy(s + 6, data, new_sz); + return 0; +#else + return aux_to_le(type, s + 6, data, new_sz); +#endif +} + +static inline int64_t get_int_aux_val(uint8_t type, const uint8_t *s, + uint32_t idx) +{ + switch (type) { + case 'c': return le_to_i8(s + idx); + case 'C': return s[idx]; + case 's': return le_to_i16(s + 2 * idx); + case 'S': return le_to_u16(s + 2 * idx); + case 'i': return le_to_i32(s + 4 * idx); + case 'I': return le_to_u32(s + 4 * idx); + default: + errno = EINVAL; + return 0; + } +} + +int64_t bam_aux2i(const uint8_t *s) +{ + int type; + type = *s++; + return get_int_aux_val(type, s, 0); +} + +double bam_aux2f(const uint8_t *s) +{ + int type; + type = *s++; + if (type == 'd') return le_to_double(s); + else if (type == 'f') return le_to_float(s); + else return get_int_aux_val(type, s, 0); +} + +char bam_aux2A(const uint8_t *s) +{ + int type; + type = *s++; + if (type == 'A') return *(char*)s; + errno = EINVAL; + return 0; +} + +char *bam_aux2Z(const uint8_t *s) +{ + int type; + type = *s++; + if (type == 'Z' || type == 'H') return (char*)s; + errno = EINVAL; + return 0; +} + +uint32_t bam_auxB_len(const uint8_t *s) +{ + if (s[0] != 'B') { + errno = EINVAL; + return 0; + } + return le_to_u32(s + 2); +} + +int64_t bam_auxB2i(const uint8_t *s, uint32_t idx) +{ + uint32_t len = bam_auxB_len(s); + if (idx >= len) { + errno = ERANGE; + return 0; + } + return get_int_aux_val(s[1], s + 6, idx); +} + +double bam_auxB2f(const uint8_t *s, uint32_t idx) +{ + uint32_t len = bam_auxB_len(s); + if (idx >= len) { + errno = ERANGE; + return 0.0; + } + if (s[1] == 'f') return le_to_float(s + 6 + 4 * idx); + else return get_int_aux_val(s[1], s + 6, idx); +} + +int sam_open_mode(char *mode, const char *fn, const char *format) +{ + // TODO Parse "bam5" etc for compression level + if (format == NULL) { + // Try to pick a format based on the filename extension + char extension[HTS_MAX_EXT_LEN]; + if (find_file_extension(fn, extension) < 0) return -1; + return sam_open_mode(mode, fn, extension); + } + else if (strcasecmp(format, "bam") == 0) strcpy(mode, "b"); + else if (strcasecmp(format, "cram") == 0) strcpy(mode, "c"); + else if (strcasecmp(format, "sam") == 0) strcpy(mode, ""); + else if (strcasecmp(format, "sam.gz") == 0) strcpy(mode, "z"); + else if (strcasecmp(format, "fastq") == 0 || + strcasecmp(format, "fq") == 0) strcpy(mode, "f"); + else if (strcasecmp(format, "fastq.gz") == 0 || + strcasecmp(format, "fq.gz") == 0) strcpy(mode, "fz"); + else if (strcasecmp(format, "fasta") == 0 || + strcasecmp(format, "fa") == 0) strcpy(mode, "F"); + else if (strcasecmp(format, "fasta.gz") == 0 || + strcasecmp(format, "fa.gz") == 0) strcpy(mode, "Fz"); + else return -1; + + return 0; +} + +// A version of sam_open_mode that can handle ,key=value options. +// The format string is allocated and returned, to be freed by the caller. +// Prefix should be "r" or "w", +char *sam_open_mode_opts(const char *fn, + const char *mode, + const char *format) +{ + char *mode_opts = malloc((format ? strlen(format) : 1) + + (mode ? strlen(mode) : 1) + 12); + char *opts, *cp; + int format_len; + + if (!mode_opts) + return NULL; + + strcpy(mode_opts, mode ? mode : "r"); + cp = mode_opts + strlen(mode_opts); + + if (format == NULL) { + // Try to pick a format based on the filename extension + char extension[HTS_MAX_EXT_LEN]; + if (find_file_extension(fn, extension) < 0) { + free(mode_opts); + return NULL; + } + if (sam_open_mode(cp, fn, extension) == 0) { + return mode_opts; + } else { + free(mode_opts); + return NULL; + } + } + + if ((opts = strchr(format, ','))) { + format_len = opts-format; + } else { + opts=""; + format_len = strlen(format); + } + + if (strncmp(format, "bam", format_len) == 0) { + *cp++ = 'b'; + } else if (strncmp(format, "cram", format_len) == 0) { + *cp++ = 'c'; + } else if (strncmp(format, "cram2", format_len) == 0) { + *cp++ = 'c'; + strcpy(cp, ",VERSION=2.1"); + cp += 12; + } else if (strncmp(format, "cram3", format_len) == 0) { + *cp++ = 'c'; + strcpy(cp, ",VERSION=3.0"); + cp += 12; + } else if (strncmp(format, "sam", format_len) == 0) { + ; // format mode="" + } else if (strncmp(format, "sam.gz", format_len) == 0) { + *cp++ = 'z'; + } else if (strncmp(format, "fastq", format_len) == 0 || + strncmp(format, "fq", format_len) == 0) { + *cp++ = 'f'; + } else if (strncmp(format, "fastq.gz", format_len) == 0 || + strncmp(format, "fq.gz", format_len) == 0) { + *cp++ = 'f'; + *cp++ = 'z'; + } else if (strncmp(format, "fasta", format_len) == 0 || + strncmp(format, "fa", format_len) == 0) { + *cp++ = 'F'; + } else if (strncmp(format, "fasta.gz", format_len) == 0 || + strncmp(format, "fa", format_len) == 0) { + *cp++ = 'F'; + *cp++ = 'z'; + } else { + free(mode_opts); + return NULL; + } + + strcpy(cp, opts); + + return mode_opts; +} + +#define STRNCMP(a,b,n) (strncasecmp((a),(b),(n)) || strlen(a)!=(n)) +int bam_str2flag(const char *str) +{ + char *end, *beg = (char*) str; + long int flag = strtol(str, &end, 0); + if ( end!=str ) return flag; // the conversion was successful + flag = 0; + while ( *str ) + { + end = beg; + while ( *end && *end!=',' ) end++; + if ( !STRNCMP("PAIRED",beg,end-beg) ) flag |= BAM_FPAIRED; + else if ( !STRNCMP("PROPER_PAIR",beg,end-beg) ) flag |= BAM_FPROPER_PAIR; + else if ( !STRNCMP("UNMAP",beg,end-beg) ) flag |= BAM_FUNMAP; + else if ( !STRNCMP("MUNMAP",beg,end-beg) ) flag |= BAM_FMUNMAP; + else if ( !STRNCMP("REVERSE",beg,end-beg) ) flag |= BAM_FREVERSE; + else if ( !STRNCMP("MREVERSE",beg,end-beg) ) flag |= BAM_FMREVERSE; + else if ( !STRNCMP("READ1",beg,end-beg) ) flag |= BAM_FREAD1; + else if ( !STRNCMP("READ2",beg,end-beg) ) flag |= BAM_FREAD2; + else if ( !STRNCMP("SECONDARY",beg,end-beg) ) flag |= BAM_FSECONDARY; + else if ( !STRNCMP("QCFAIL",beg,end-beg) ) flag |= BAM_FQCFAIL; + else if ( !STRNCMP("DUP",beg,end-beg) ) flag |= BAM_FDUP; + else if ( !STRNCMP("SUPPLEMENTARY",beg,end-beg) ) flag |= BAM_FSUPPLEMENTARY; + else return -1; + if ( !*end ) break; + beg = end + 1; + } + return flag; +} + +char *bam_flag2str(int flag) +{ + kstring_t str = {0,0,0}; + if ( flag&BAM_FPAIRED ) ksprintf(&str,"%s%s", str.l?",":"","PAIRED"); + if ( flag&BAM_FPROPER_PAIR ) ksprintf(&str,"%s%s", str.l?",":"","PROPER_PAIR"); + if ( flag&BAM_FUNMAP ) ksprintf(&str,"%s%s", str.l?",":"","UNMAP"); + if ( flag&BAM_FMUNMAP ) ksprintf(&str,"%s%s", str.l?",":"","MUNMAP"); + if ( flag&BAM_FREVERSE ) ksprintf(&str,"%s%s", str.l?",":"","REVERSE"); + if ( flag&BAM_FMREVERSE ) ksprintf(&str,"%s%s", str.l?",":"","MREVERSE"); + if ( flag&BAM_FREAD1 ) ksprintf(&str,"%s%s", str.l?",":"","READ1"); + if ( flag&BAM_FREAD2 ) ksprintf(&str,"%s%s", str.l?",":"","READ2"); + if ( flag&BAM_FSECONDARY ) ksprintf(&str,"%s%s", str.l?",":"","SECONDARY"); + if ( flag&BAM_FQCFAIL ) ksprintf(&str,"%s%s", str.l?",":"","QCFAIL"); + if ( flag&BAM_FDUP ) ksprintf(&str,"%s%s", str.l?",":"","DUP"); + if ( flag&BAM_FSUPPLEMENTARY ) ksprintf(&str,"%s%s", str.l?",":"","SUPPLEMENTARY"); + if ( str.l == 0 ) kputsn("", 0, &str); + return str.s; +} + + +/************************** + *** Pileup and Mpileup *** + **************************/ + +#if !defined(BAM_NO_PILEUP) + +#include + +/******************* + *** Memory pool *** + *******************/ + +typedef struct { + int k, y; + hts_pos_t x, end; +} cstate_t; + +static cstate_t g_cstate_null = { -1, 0, 0, 0 }; + +typedef struct __linkbuf_t { + bam1_t b; + hts_pos_t beg, end; + cstate_t s; + struct __linkbuf_t *next; + bam_pileup_cd cd; +} lbnode_t; + +typedef struct { + int cnt, n, max; + lbnode_t **buf; +} mempool_t; + +static mempool_t *mp_init(void) +{ + mempool_t *mp; + mp = (mempool_t*)calloc(1, sizeof(mempool_t)); + return mp; +} +static void mp_destroy(mempool_t *mp) +{ + int k; + for (k = 0; k < mp->n; ++k) { + free(mp->buf[k]->b.data); + free(mp->buf[k]); + } + free(mp->buf); + free(mp); +} +static inline lbnode_t *mp_alloc(mempool_t *mp) +{ + ++mp->cnt; + if (mp->n == 0) return (lbnode_t*)calloc(1, sizeof(lbnode_t)); + else return mp->buf[--mp->n]; +} +static inline void mp_free(mempool_t *mp, lbnode_t *p) +{ + --mp->cnt; p->next = 0; // clear lbnode_t::next here + if (mp->n == mp->max) { + mp->max = mp->max? mp->max<<1 : 256; + mp->buf = (lbnode_t**)realloc(mp->buf, sizeof(lbnode_t*) * mp->max); + } + mp->buf[mp->n++] = p; +} + +/********************** + *** CIGAR resolver *** + **********************/ + +/* s->k: the index of the CIGAR operator that has just been processed. + s->x: the reference coordinate of the start of s->k + s->y: the query coordinate of the start of s->k + */ +static inline int resolve_cigar2(bam_pileup1_t *p, hts_pos_t pos, cstate_t *s) +{ +#define _cop(c) ((c)&BAM_CIGAR_MASK) +#define _cln(c) ((c)>>BAM_CIGAR_SHIFT) + + bam1_t *b = p->b; + bam1_core_t *c = &b->core; + uint32_t *cigar = bam_get_cigar(b); + int k; + // determine the current CIGAR operation + //fprintf(stderr, "%s\tpos=%ld\tend=%ld\t(%d,%ld,%d)\n", bam_get_qname(b), pos, s->end, s->k, s->x, s->y); + if (s->k == -1) { // never processed + p->qpos = 0; + if (c->n_cigar == 1) { // just one operation, save a loop + if (_cop(cigar[0]) == BAM_CMATCH || _cop(cigar[0]) == BAM_CEQUAL || _cop(cigar[0]) == BAM_CDIFF) s->k = 0, s->x = c->pos, s->y = 0; + } else { // find the first match or deletion + for (k = 0, s->x = c->pos, s->y = 0; k < c->n_cigar; ++k) { + int op = _cop(cigar[k]); + int l = _cln(cigar[k]); + if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP || + op == BAM_CEQUAL || op == BAM_CDIFF) break; + else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) s->y += l; + } + assert(k < c->n_cigar); + s->k = k; + } + } else { // the read has been processed before + int op, l = _cln(cigar[s->k]); + if (pos - s->x >= l) { // jump to the next operation + assert(s->k < c->n_cigar); // otherwise a bug: this function should not be called in this case + op = _cop(cigar[s->k+1]); + if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP || op == BAM_CEQUAL || op == BAM_CDIFF) { // jump to the next without a loop + if (_cop(cigar[s->k]) == BAM_CMATCH|| _cop(cigar[s->k]) == BAM_CEQUAL || _cop(cigar[s->k]) == BAM_CDIFF) s->y += l; + s->x += l; + ++s->k; + } else { // find the next M/D/N/=/X + if (_cop(cigar[s->k]) == BAM_CMATCH|| _cop(cigar[s->k]) == BAM_CEQUAL || _cop(cigar[s->k]) == BAM_CDIFF) s->y += l; + s->x += l; + for (k = s->k + 1; k < c->n_cigar; ++k) { + op = _cop(cigar[k]), l = _cln(cigar[k]); + if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP || op == BAM_CEQUAL || op == BAM_CDIFF) break; + else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) s->y += l; + } + s->k = k; + } + assert(s->k < c->n_cigar); // otherwise a bug + } // else, do nothing + } + { // collect pileup information + int op, l; + op = _cop(cigar[s->k]); l = _cln(cigar[s->k]); + p->is_del = p->indel = p->is_refskip = 0; + if (s->x + l - 1 == pos && s->k + 1 < c->n_cigar) { // peek the next operation + int op2 = _cop(cigar[s->k+1]); + int l2 = _cln(cigar[s->k+1]); + if (op2 == BAM_CDEL && op != BAM_CDEL) { + // At start of a new deletion, merge e.g. 1D2D to 3D. + // Within a deletion (the 2D in 1D2D) we keep p->indel=0 + // and rely on is_del=1 as we would for 3D. + p->indel = -(int)l2; + for (k = s->k+2; k < c->n_cigar; ++k) { + op2 = _cop(cigar[k]); l2 = _cln(cigar[k]); + if (op2 == BAM_CDEL) p->indel -= l2; + else break; + } + } else if (op2 == BAM_CINS) { + p->indel = l2; + for (k = s->k+2; k < c->n_cigar; ++k) { + op2 = _cop(cigar[k]); l2 = _cln(cigar[k]); + if (op2 == BAM_CINS) p->indel += l2; + else if (op2 != BAM_CPAD) break; + } + } else if (op2 == BAM_CPAD && s->k + 2 < c->n_cigar) { + int l3 = 0; + for (k = s->k + 2; k < c->n_cigar; ++k) { + op2 = _cop(cigar[k]); l2 = _cln(cigar[k]); + if (op2 == BAM_CINS) l3 += l2; + else if (op2 == BAM_CDEL || op2 == BAM_CMATCH || op2 == BAM_CREF_SKIP || op2 == BAM_CEQUAL || op2 == BAM_CDIFF) break; + } + if (l3 > 0) p->indel = l3; + } + } + if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) { + p->qpos = s->y + (pos - s->x); + } else if (op == BAM_CDEL || op == BAM_CREF_SKIP) { + p->is_del = 1; p->qpos = s->y; // FIXME: distinguish D and N!!!!! + p->is_refskip = (op == BAM_CREF_SKIP); + } // cannot be other operations; otherwise a bug + p->is_head = (pos == c->pos); p->is_tail = (pos == s->end); + } + p->cigar_ind = s->k; + return 1; +} + +/******************************* + *** Expansion of insertions *** + *******************************/ + +/* + * Fills out the kstring with the padded insertion sequence for the current + * location in 'p'. If this is not an insertion site, the string is blank. + * + * This variant handles base modifications, but only when "m" is non-NULL. + * + * Returns the number of inserted base on success, with string length being + * accessable via ins->l; + * -1 on failure. + */ +int bam_plp_insertion_mod(const bam_pileup1_t *p, + hts_base_mod_state *m, + kstring_t *ins, int *del_len) { + int j, k, indel, nb = 0; + uint32_t *cigar; + + if (p->indel <= 0) { + if (ks_resize(ins, 1) < 0) + return -1; + ins->l = 0; + ins->s[0] = '\0'; + return 0; + } + + if (del_len) + *del_len = 0; + + // Measure indel length including pads + indel = 0; + k = p->cigar_ind+1; + cigar = bam_get_cigar(p->b); + while (k < p->b->core.n_cigar) { + switch (cigar[k] & BAM_CIGAR_MASK) { + case BAM_CPAD: + case BAM_CINS: + indel += (cigar[k] >> BAM_CIGAR_SHIFT); + break; + default: + k = p->b->core.n_cigar; + break; + } + k++; + } + nb = ins->l = indel; + + // Produce sequence + if (ks_resize(ins, indel+1) < 0) + return -1; + indel = 0; + k = p->cigar_ind+1; + j = 1; + while (k < p->b->core.n_cigar) { + int l, c; + switch (cigar[k] & BAM_CIGAR_MASK) { + case BAM_CPAD: + for (l = 0; l < (cigar[k]>>BAM_CIGAR_SHIFT); l++) + ins->s[indel++] = '*'; + break; + case BAM_CINS: + for (l = 0; l < (cigar[k]>>BAM_CIGAR_SHIFT); l++, j++) { + c = p->qpos + j - p->is_del < p->b->core.l_qseq + ? seq_nt16_str[bam_seqi(bam_get_seq(p->b), + p->qpos + j - p->is_del)] + : 'N'; + ins->s[indel++] = c; + int nm; + hts_base_mod mod[256]; + if (m && (nm = bam_mods_at_qpos(p->b, p->qpos + j - p->is_del, + m, mod, 256)) > 0) { + int o_indel = indel; + if (ks_resize(ins, ins->l + nm*16+3) < 0) + return -1; + ins->s[indel++] = '['; + int j; + for (j = 0; j < nm; j++) { + char qual[20]; + if (mod[j].qual >= 0) + snprintf(qual, sizeof(qual), "%d", mod[j].qual); + else + *qual=0; + if (mod[j].modified_base < 0) + // ChEBI + indel += snprintf(&ins->s[indel], ins->m - indel, + "%c(%d)%s", + "+-"[mod[j].strand], + -mod[j].modified_base, + qual); + else + indel += snprintf(&ins->s[indel], ins->m - indel, + "%c%c%s", + "+-"[mod[j].strand], + mod[j].modified_base, + qual); + } + ins->s[indel++] = ']'; + ins->l += indel - o_indel; // grow by amount we used + } + } + break; + case BAM_CDEL: + // eg cigar 1M2I1D gives mpileup output in T+2AA-1C style + if (del_len) + *del_len = cigar[k]>>BAM_CIGAR_SHIFT; + // fall through + default: + k = p->b->core.n_cigar; + break; + } + k++; + } + ins->s[indel] = '\0'; + ins->l = indel; // string length + + return nb; // base length +} + +/* + * Fills out the kstring with the padded insertion sequence for the current + * location in 'p'. If this is not an insertion site, the string is blank. + * + * This is the original interface with no capability for reporting base + * modifications. + * + * Returns the length of insertion string on success; + * -1 on failure. + */ +int bam_plp_insertion(const bam_pileup1_t *p, kstring_t *ins, int *del_len) { + return bam_plp_insertion_mod(p, NULL, ins, del_len); +} + +/*********************** + *** Pileup iterator *** + ***********************/ + +// Dictionary of overlapping reads +KHASH_MAP_INIT_STR(olap_hash, lbnode_t *) +typedef khash_t(olap_hash) olap_hash_t; + +struct bam_plp_s { + mempool_t *mp; + lbnode_t *head, *tail; + int32_t tid, max_tid; + hts_pos_t pos, max_pos; + int is_eof, max_plp, error, maxcnt; + uint64_t id; + bam_pileup1_t *plp; + // for the "auto" interface only + bam1_t *b; + bam_plp_auto_f func; + void *data; + olap_hash_t *overlaps; + + // For notification of creation and destruction events + // and associated client-owned pointer. + int (*plp_construct)(void *data, const bam1_t *b, bam_pileup_cd *cd); + int (*plp_destruct )(void *data, const bam1_t *b, bam_pileup_cd *cd); +}; + +bam_plp_t bam_plp_init(bam_plp_auto_f func, void *data) +{ + bam_plp_t iter; + iter = (bam_plp_t)calloc(1, sizeof(struct bam_plp_s)); + iter->mp = mp_init(); + iter->head = iter->tail = mp_alloc(iter->mp); + iter->max_tid = iter->max_pos = -1; + iter->maxcnt = 8000; + if (func) { + iter->func = func; + iter->data = data; + iter->b = bam_init1(); + } + return iter; +} + +int bam_plp_init_overlaps(bam_plp_t iter) +{ + iter->overlaps = kh_init(olap_hash); // hash for tweaking quality of bases in overlapping reads + return iter->overlaps ? 0 : -1; +} + +void bam_plp_destroy(bam_plp_t iter) +{ + lbnode_t *p, *pnext; + if ( iter->overlaps ) kh_destroy(olap_hash, iter->overlaps); + for (p = iter->head; p != NULL; p = pnext) { + if (iter->plp_destruct && p != iter->tail) + iter->plp_destruct(iter->data, &p->b, &p->cd); + pnext = p->next; + mp_free(iter->mp, p); + } + mp_destroy(iter->mp); + if (iter->b) bam_destroy1(iter->b); + free(iter->plp); + free(iter); +} + +void bam_plp_constructor(bam_plp_t plp, + int (*func)(void *data, const bam1_t *b, bam_pileup_cd *cd)) { + plp->plp_construct = func; +} + +void bam_plp_destructor(bam_plp_t plp, + int (*func)(void *data, const bam1_t *b, bam_pileup_cd *cd)) { + plp->plp_destruct = func; +} + +//--------------------------------- +//--- Tweak overlapping reads +//--------------------------------- + +/** + * cigar_iref2iseq_set() - find the first CMATCH setting the ref and the read index + * cigar_iref2iseq_next() - get the next CMATCH base + * @cigar: pointer to current cigar block (rw) + * @cigar_max: pointer just beyond the last cigar block + * @icig: position within the current cigar block (rw) + * @iseq: position in the sequence (rw) + * @iref: position with respect to the beginning of the read (iref_pos - b->core.pos) (rw) + * + * Returns BAM_CMATCH, -1 when there is no more cigar to process or the requested position is not covered, + * or -2 on error. + */ +static inline int cigar_iref2iseq_set(const uint32_t **cigar, + const uint32_t *cigar_max, + hts_pos_t *icig, + hts_pos_t *iseq, + hts_pos_t *iref) +{ + hts_pos_t pos = *iref; + if ( pos < 0 ) return -1; + *icig = 0; + *iseq = 0; + *iref = 0; + while ( *cigar> BAM_CIGAR_SHIFT; + + if ( cig==BAM_CSOFT_CLIP ) { (*cigar)++; *iseq += ncig; *icig = 0; continue; } + if ( cig==BAM_CHARD_CLIP || cig==BAM_CPAD ) { (*cigar)++; *icig = 0; continue; } + if ( cig==BAM_CMATCH || cig==BAM_CEQUAL || cig==BAM_CDIFF ) + { + pos -= ncig; + if ( pos < 0 ) { *icig = ncig + pos; *iseq += *icig; *iref += *icig; return BAM_CMATCH; } + (*cigar)++; *iseq += ncig; *icig = 0; *iref += ncig; + continue; + } + if ( cig==BAM_CINS ) { (*cigar)++; *iseq += ncig; *icig = 0; continue; } + if ( cig==BAM_CDEL || cig==BAM_CREF_SKIP ) + { + pos -= ncig; + if ( pos<0 ) pos = 0; + (*cigar)++; *icig = 0; *iref += ncig; + continue; + } + hts_log_error("Unexpected cigar %d", cig); + return -2; + } + *iseq = -1; + return -1; +} +static inline int cigar_iref2iseq_next(const uint32_t **cigar, + const uint32_t *cigar_max, + hts_pos_t *icig, + hts_pos_t *iseq, + hts_pos_t *iref) +{ + while ( *cigar < cigar_max ) + { + int cig = (**cigar) & BAM_CIGAR_MASK; + int ncig = (**cigar) >> BAM_CIGAR_SHIFT; + + if ( cig==BAM_CMATCH || cig==BAM_CEQUAL || cig==BAM_CDIFF ) + { + if ( *icig >= ncig - 1 ) { *icig = -1; (*cigar)++; continue; } + (*iseq)++; (*icig)++; (*iref)++; + return BAM_CMATCH; + } + if ( cig==BAM_CDEL || cig==BAM_CREF_SKIP ) { (*cigar)++; (*iref) += ncig; *icig = -1; continue; } + if ( cig==BAM_CINS ) { (*cigar)++; *iseq += ncig; *icig = -1; continue; } + if ( cig==BAM_CSOFT_CLIP ) { (*cigar)++; *iseq += ncig; *icig = -1; continue; } + if ( cig==BAM_CHARD_CLIP || cig==BAM_CPAD ) { (*cigar)++; *icig = -1; continue; } + hts_log_error("Unexpected cigar %d", cig); + return -2; + } + *iseq = -1; + *iref = -1; + return -1; +} + +// Given overlapping read 'a' (left) and 'b' (right) on the same +// template, adjust quality values to zero for either a or b. +// Note versions 1.12 and earlier always removed quality from 'b' for +// matching bases. Now we select a or b semi-randomly based on name hash. +// Returns 0 on success, +// -1 on failure +static int tweak_overlap_quality(bam1_t *a, bam1_t *b) +{ + const uint32_t *a_cigar = bam_get_cigar(a), + *a_cigar_max = a_cigar + a->core.n_cigar; + const uint32_t *b_cigar = bam_get_cigar(b), + *b_cigar_max = b_cigar + b->core.n_cigar; + hts_pos_t a_icig = 0, a_iseq = 0; + hts_pos_t b_icig = 0, b_iseq = 0; + uint8_t *a_qual = bam_get_qual(a), *b_qual = bam_get_qual(b); + uint8_t *a_seq = bam_get_seq(a), *b_seq = bam_get_seq(b); + + hts_pos_t iref = b->core.pos; + hts_pos_t a_iref = iref - a->core.pos; + hts_pos_t b_iref = iref - b->core.pos; + + int a_ret = cigar_iref2iseq_set(&a_cigar, a_cigar_max, + &a_icig, &a_iseq, &a_iref); + if ( a_ret<0 ) + // no overlap or error + return a_ret<-1 ? -1:0; + + int b_ret = cigar_iref2iseq_set(&b_cigar, b_cigar_max, + &b_icig, &b_iseq, &b_iref); + if ( b_ret<0 ) + // no overlap or error + return b_ret<-1 ? -1:0; + + // Determine which seq is the one getting modified qualities. + uint8_t amul, bmul; + if (__ac_Wang_hash(__ac_X31_hash_string(bam_get_qname(a))) & 1) { + amul = 1; + bmul = 0; + } else { + amul = 0; + bmul = 1; + } + + // Loop over the overlapping region nulling qualities in either + // seq a or b. + int err = 0; + while ( 1 ) { + // Step to next matching reference position in a and b + while ( a_ret >= 0 && a_iref>=0 && a_iref < iref - a->core.pos ) + a_ret = cigar_iref2iseq_next(&a_cigar, a_cigar_max, + &a_icig, &a_iseq, &a_iref); + if ( a_ret<0 ) { // done + err = a_ret<-1?-1:0; + break; + } + + while ( b_ret >= 0 && b_iref>=0 && b_iref < iref - b->core.pos ) + b_ret = cigar_iref2iseq_next(&b_cigar, b_cigar_max, &b_icig, + &b_iseq, &b_iref); + if ( b_ret<0 ) { // done + err = b_ret<-1?-1:0; + break; + } + + if ( iref < a_iref + a->core.pos ) + iref = a_iref + a->core.pos; + + if ( iref < b_iref + b->core.pos ) + iref = b_iref + b->core.pos; + + iref++; + + // If A or B has a deletion then we catch up the other to this point. + // We also amend quality values using the same rules for mismatch. + if (a_iref+a->core.pos != b_iref+b->core.pos) { + if (a_iref+a->core.pos < b_iref+b->core.pos + && b_cigar > bam_get_cigar(b) + && bam_cigar_op(b_cigar[-1]) == BAM_CDEL) { + // Del in B means it's moved on further than A + do { + a_qual[a_iseq] = amul + ? a_qual[a_iseq]*0.8 + : 0; + a_ret = cigar_iref2iseq_next(&a_cigar, a_cigar_max, + &a_icig, &a_iseq, &a_iref); + if (a_ret < 0) + return -(a_ret<-1); // 0 or -1 + } while (a_iref + a->core.pos < b_iref+b->core.pos); + } else if (a_cigar > bam_get_cigar(a) + && bam_cigar_op(a_cigar[-1]) == BAM_CDEL) { + // Del in A means it's moved on further than B + do { + b_qual[b_iseq] = bmul + ? b_qual[b_iseq]*0.8 + : 0; + b_ret = cigar_iref2iseq_next(&b_cigar, b_cigar_max, + &b_icig, &b_iseq, &b_iref); + if (b_ret < 0) + return -(b_ret<-1); // 0 or -1 + } while (b_iref + b->core.pos < a_iref+a->core.pos); + } else { + // Anything else, eg ref-skip, we don't support here + continue; + } + } + + // fprintf(stderr, "a_cig=%ld,%ld b_cig=%ld,%ld iref=%ld " + // "a_iref=%ld b_iref=%ld a_iseq=%ld b_iseq=%ld\n", + // a_cigar-bam_get_cigar(a), a_icig, + // b_cigar-bam_get_cigar(b), b_icig, + // iref, a_iref+a->core.pos+1, b_iref+b->core.pos+1, + // a_iseq, b_iseq); + + if (a_iseq > a->core.l_qseq || b_iseq > b->core.l_qseq) + // Fell off end of sequence, bad CIGAR? + return -1; + + // We're finally at the same ref base in both a and b. + // Check if the bases match (confident) or mismatch + // (not so confident). + if ( bam_seqi(a_seq,a_iseq) == bam_seqi(b_seq,b_iseq) ) { + // We are very confident about this base. Use sum of quals + int qual = a_qual[a_iseq] + b_qual[b_iseq]; + a_qual[a_iseq] = amul * (qual>200 ? 200 : qual); + b_qual[b_iseq] = bmul * (qual>200 ? 200 : qual);; + } else { + // Not so confident about anymore given the mismatch. + // Reduce qual for lowest quality base. + if ( a_qual[a_iseq] > b_qual[b_iseq] ) { + // A highest qual base; keep + a_qual[a_iseq] = 0.8 * a_qual[a_iseq]; + b_qual[b_iseq] = 0; + } else if (a_qual[a_iseq] < b_qual[b_iseq] ) { + // B highest qual base; keep + b_qual[b_iseq] = 0.8 * b_qual[b_iseq]; + a_qual[a_iseq] = 0; + } else { + // Both equal, so pick randomly + a_qual[a_iseq] = amul * 0.8 * a_qual[a_iseq]; + b_qual[b_iseq] = bmul * 0.8 * b_qual[b_iseq]; + } + } + } + + return err; +} + +// Fix overlapping reads. Simple soft-clipping did not give good results. +// Lowering qualities of unwanted bases is more selective and works better. +// +// Returns 0 on success, -1 on failure +static int overlap_push(bam_plp_t iter, lbnode_t *node) +{ + if ( !iter->overlaps ) return 0; + + // mapped mates and paired reads only + if ( node->b.core.flag&BAM_FMUNMAP || !(node->b.core.flag&BAM_FPROPER_PAIR) ) return 0; + + // no overlap possible, unless some wild cigar + if ( (node->b.core.mtid >= 0 && node->b.core.tid != node->b.core.mtid) + || (llabs(node->b.core.isize) >= 2*node->b.core.l_qseq + && node->b.core.mpos >= node->end) // for those wild cigars + ) return 0; + + khiter_t kitr = kh_get(olap_hash, iter->overlaps, bam_get_qname(&node->b)); + if ( kitr==kh_end(iter->overlaps) ) + { + // Only add reads where the mate is still to arrive + if (node->b.core.mpos >= node->b.core.pos || + ((node->b.core.flag & BAM_FPAIRED) && node->b.core.mpos == -1)) { + int ret; + kitr = kh_put(olap_hash, iter->overlaps, bam_get_qname(&node->b), &ret); + if (ret < 0) return -1; + kh_value(iter->overlaps, kitr) = node; + } + } + else + { + lbnode_t *a = kh_value(iter->overlaps, kitr); + int err = tweak_overlap_quality(&a->b, &node->b); + kh_del(olap_hash, iter->overlaps, kitr); + assert(a->end-1 == a->s.end); + return err; + } + return 0; +} + +static void overlap_remove(bam_plp_t iter, const bam1_t *b) +{ + if ( !iter->overlaps ) return; + + khiter_t kitr; + if ( b ) + { + kitr = kh_get(olap_hash, iter->overlaps, bam_get_qname(b)); + if ( kitr!=kh_end(iter->overlaps) ) + kh_del(olap_hash, iter->overlaps, kitr); + } + else + { + // remove all + for (kitr = kh_begin(iter->overlaps); kitroverlaps); kitr++) + if ( kh_exist(iter->overlaps, kitr) ) kh_del(olap_hash, iter->overlaps, kitr); + } +} + + + +// Prepares next pileup position in bam records collected by bam_plp_auto -> user func -> bam_plp_push. Returns +// pointer to the piled records if next position is ready or NULL if there is not enough records in the +// buffer yet (the current position is still the maximum position across all buffered reads). +const bam_pileup1_t *bam_plp64_next(bam_plp_t iter, int *_tid, hts_pos_t *_pos, int *_n_plp) +{ + if (iter->error) { *_n_plp = -1; return NULL; } + *_n_plp = 0; + if (iter->is_eof && iter->head == iter->tail) return NULL; + while (iter->is_eof || iter->max_tid > iter->tid || (iter->max_tid == iter->tid && iter->max_pos > iter->pos)) { + int n_plp = 0; + // write iter->plp at iter->pos + lbnode_t **pptr = &iter->head; + while (*pptr != iter->tail) { + lbnode_t *p = *pptr; + if (p->b.core.tid < iter->tid || (p->b.core.tid == iter->tid && p->end <= iter->pos)) { // then remove + overlap_remove(iter, &p->b); + if (iter->plp_destruct) + iter->plp_destruct(iter->data, &p->b, &p->cd); + *pptr = p->next; mp_free(iter->mp, p); + } + else { + if (p->b.core.tid == iter->tid && p->beg <= iter->pos) { // here: p->end > pos; then add to pileup + if (n_plp == iter->max_plp) { // then double the capacity + iter->max_plp = iter->max_plp? iter->max_plp<<1 : 256; + iter->plp = (bam_pileup1_t*)realloc(iter->plp, sizeof(bam_pileup1_t) * iter->max_plp); + } + iter->plp[n_plp].b = &p->b; + iter->plp[n_plp].cd = p->cd; + if (resolve_cigar2(iter->plp + n_plp, iter->pos, &p->s)) ++n_plp; // actually always true... + } + pptr = &(*pptr)->next; + } + } + *_n_plp = n_plp; *_tid = iter->tid; *_pos = iter->pos; + // update iter->tid and iter->pos + if (iter->head != iter->tail) { + if (iter->tid > iter->head->b.core.tid) { + hts_log_error("Unsorted input. Pileup aborts"); + iter->error = 1; + *_n_plp = -1; + return NULL; + } + } + if (iter->tid < iter->head->b.core.tid) { // come to a new reference sequence + iter->tid = iter->head->b.core.tid; iter->pos = iter->head->beg; // jump to the next reference + } else if (iter->pos < iter->head->beg) { // here: tid == head->b.core.tid + iter->pos = iter->head->beg; // jump to the next position + } else ++iter->pos; // scan contiguously + // return + if (n_plp) return iter->plp; + if (iter->is_eof && iter->head == iter->tail) break; + } + return NULL; +} + +const bam_pileup1_t *bam_plp_next(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp) +{ + hts_pos_t pos64 = 0; + const bam_pileup1_t *p = bam_plp64_next(iter, _tid, &pos64, _n_plp); + if (pos64 < INT_MAX) { + *_pos = pos64; + } else { + hts_log_error("Position %"PRId64" too large", pos64); + *_pos = INT_MAX; + iter->error = 1; + *_n_plp = -1; + return NULL; + } + return p; +} + +int bam_plp_push(bam_plp_t iter, const bam1_t *b) +{ + if (iter->error) return -1; + if (b) { + if (b->core.tid < 0) { overlap_remove(iter, b); return 0; } + // Skip only unmapped reads here, any additional filtering must be done in iter->func + if (b->core.flag & BAM_FUNMAP) { overlap_remove(iter, b); return 0; } + if (iter->tid == b->core.tid && iter->pos == b->core.pos && iter->mp->cnt > iter->maxcnt) + { + overlap_remove(iter, b); + return 0; + } + if (bam_copy1(&iter->tail->b, b) == NULL) + return -1; + iter->tail->b.id = iter->id++; + iter->tail->beg = b->core.pos; + // Use raw rlen rather than bam_endpos() which adjusts rlen=0 to rlen=1 + iter->tail->end = b->core.pos + bam_cigar2rlen(b->core.n_cigar, bam_get_cigar(b)); + iter->tail->s = g_cstate_null; iter->tail->s.end = iter->tail->end - 1; // initialize cstate_t + if (b->core.tid < iter->max_tid) { + hts_log_error("The input is not sorted (chromosomes out of order)"); + iter->error = 1; + return -1; + } + if ((b->core.tid == iter->max_tid) && (iter->tail->beg < iter->max_pos)) { + hts_log_error("The input is not sorted (reads out of order)"); + iter->error = 1; + return -1; + } + iter->max_tid = b->core.tid; iter->max_pos = iter->tail->beg; + if (iter->tail->end > iter->pos || iter->tail->b.core.tid > iter->tid) { + lbnode_t *next = mp_alloc(iter->mp); + if (!next) { + iter->error = 1; + return -1; + } + if (iter->plp_construct) { + if (iter->plp_construct(iter->data, &iter->tail->b, + &iter->tail->cd) < 0) { + mp_free(iter->mp, next); + iter->error = 1; + return -1; + } + } + if (overlap_push(iter, iter->tail) < 0) { + mp_free(iter->mp, next); + iter->error = 1; + return -1; + } + iter->tail->next = next; + iter->tail = iter->tail->next; + } + } else iter->is_eof = 1; + return 0; +} + +const bam_pileup1_t *bam_plp64_auto(bam_plp_t iter, int *_tid, hts_pos_t *_pos, int *_n_plp) +{ + const bam_pileup1_t *plp; + if (iter->func == 0 || iter->error) { *_n_plp = -1; return 0; } + if ((plp = bam_plp64_next(iter, _tid, _pos, _n_plp)) != 0) return plp; + else { // no pileup line can be obtained; read alignments + *_n_plp = 0; + if (iter->is_eof) return 0; + int ret; + while ( (ret=iter->func(iter->data, iter->b)) >= 0) { + if (bam_plp_push(iter, iter->b) < 0) { + *_n_plp = -1; + return 0; + } + if ((plp = bam_plp64_next(iter, _tid, _pos, _n_plp)) != 0) return plp; + // otherwise no pileup line can be returned; read the next alignment. + } + if ( ret < -1 ) { iter->error = ret; *_n_plp = -1; return 0; } + if (bam_plp_push(iter, 0) < 0) { + *_n_plp = -1; + return 0; + } + if ((plp = bam_plp64_next(iter, _tid, _pos, _n_plp)) != 0) return plp; + return 0; + } +} + +const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp) +{ + hts_pos_t pos64 = 0; + const bam_pileup1_t *p = bam_plp64_auto(iter, _tid, &pos64, _n_plp); + if (pos64 < INT_MAX) { + *_pos = pos64; + } else { + hts_log_error("Position %"PRId64" too large", pos64); + *_pos = INT_MAX; + iter->error = 1; + *_n_plp = -1; + return NULL; + } + return p; +} + +void bam_plp_reset(bam_plp_t iter) +{ + overlap_remove(iter, NULL); + iter->max_tid = iter->max_pos = -1; + iter->tid = iter->pos = 0; + iter->is_eof = 0; + while (iter->head != iter->tail) { + lbnode_t *p = iter->head; + iter->head = p->next; + mp_free(iter->mp, p); + } +} + +void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt) +{ + iter->maxcnt = maxcnt; +} + +/************************ + *** Mpileup iterator *** + ************************/ + +struct bam_mplp_s { + int n; + int32_t min_tid, *tid; + hts_pos_t min_pos, *pos; + bam_plp_t *iter; + int *n_plp; + const bam_pileup1_t **plp; +}; + +bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data) +{ + int i; + bam_mplp_t iter; + iter = (bam_mplp_t)calloc(1, sizeof(struct bam_mplp_s)); + iter->pos = (hts_pos_t*)calloc(n, sizeof(hts_pos_t)); + iter->tid = (int32_t*)calloc(n, sizeof(int32_t)); + iter->n_plp = (int*)calloc(n, sizeof(int)); + iter->plp = (const bam_pileup1_t**)calloc(n, sizeof(bam_pileup1_t*)); + iter->iter = (bam_plp_t*)calloc(n, sizeof(bam_plp_t)); + iter->n = n; + iter->min_pos = HTS_POS_MAX; + iter->min_tid = (uint32_t)-1; + for (i = 0; i < n; ++i) { + iter->iter[i] = bam_plp_init(func, data[i]); + iter->pos[i] = iter->min_pos; + iter->tid[i] = iter->min_tid; + } + return iter; +} + +int bam_mplp_init_overlaps(bam_mplp_t iter) +{ + int i, r = 0; + for (i = 0; i < iter->n; ++i) + r |= bam_plp_init_overlaps(iter->iter[i]); + return r == 0 ? 0 : -1; +} + +void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt) +{ + int i; + for (i = 0; i < iter->n; ++i) + iter->iter[i]->maxcnt = maxcnt; +} + +void bam_mplp_destroy(bam_mplp_t iter) +{ + int i; + for (i = 0; i < iter->n; ++i) bam_plp_destroy(iter->iter[i]); + free(iter->iter); free(iter->pos); free(iter->tid); + free(iter->n_plp); free(iter->plp); + free(iter); +} + +int bam_mplp64_auto(bam_mplp_t iter, int *_tid, hts_pos_t *_pos, int *n_plp, const bam_pileup1_t **plp) +{ + int i, ret = 0; + hts_pos_t new_min_pos = HTS_POS_MAX; + uint32_t new_min_tid = (uint32_t)-1; + for (i = 0; i < iter->n; ++i) { + if (iter->pos[i] == iter->min_pos && iter->tid[i] == iter->min_tid) { + int tid; + hts_pos_t pos; + iter->plp[i] = bam_plp64_auto(iter->iter[i], &tid, &pos, &iter->n_plp[i]); + if ( iter->iter[i]->error ) return -1; + if (iter->plp[i]) { + iter->tid[i] = tid; + iter->pos[i] = pos; + } else { + iter->tid[i] = 0; + iter->pos[i] = 0; + } + } + if (iter->plp[i]) { + if (iter->tid[i] < new_min_tid) { + new_min_tid = iter->tid[i]; + new_min_pos = iter->pos[i]; + } else if (iter->tid[i] == new_min_tid && iter->pos[i] < new_min_pos) { + new_min_pos = iter->pos[i]; + } + } + } + iter->min_pos = new_min_pos; + iter->min_tid = new_min_tid; + if (new_min_pos == HTS_POS_MAX) return 0; + *_tid = new_min_tid; *_pos = new_min_pos; + for (i = 0; i < iter->n; ++i) { + if (iter->pos[i] == iter->min_pos && iter->tid[i] == iter->min_tid) { + n_plp[i] = iter->n_plp[i], plp[i] = iter->plp[i]; + ++ret; + } else n_plp[i] = 0, plp[i] = 0; + } + return ret; +} + +int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp) +{ + hts_pos_t pos64 = 0; + int ret = bam_mplp64_auto(iter, _tid, &pos64, n_plp, plp); + if (ret >= 0) { + if (pos64 < INT_MAX) { + *_pos = pos64; + } else { + hts_log_error("Position %"PRId64" too large", pos64); + *_pos = INT_MAX; + return -1; + } + } + return ret; +} + +void bam_mplp_reset(bam_mplp_t iter) +{ + int i; + iter->min_pos = HTS_POS_MAX; + iter->min_tid = (uint32_t)-1; + for (i = 0; i < iter->n; ++i) { + bam_plp_reset(iter->iter[i]); + iter->pos[i] = HTS_POS_MAX; + iter->tid[i] = (uint32_t)-1; + iter->n_plp[i] = 0; + iter->plp[i] = NULL; + } +} + +void bam_mplp_constructor(bam_mplp_t iter, + int (*func)(void *arg, const bam1_t *b, bam_pileup_cd *cd)) { + int i; + for (i = 0; i < iter->n; ++i) + bam_plp_constructor(iter->iter[i], func); +} + +void bam_mplp_destructor(bam_mplp_t iter, + int (*func)(void *arg, const bam1_t *b, bam_pileup_cd *cd)) { + int i; + for (i = 0; i < iter->n; ++i) + bam_plp_destructor(iter->iter[i], func); +} + +#endif // ~!defined(BAM_NO_PILEUP) diff --git a/src/htslib-1.21/sam_internal.h b/src/htslib-1.21/sam_internal.h new file mode 100644 index 0000000..750c597 --- /dev/null +++ b/src/htslib-1.21/sam_internal.h @@ -0,0 +1,121 @@ +/* sam_internal.h -- internal functions; not part of the public API. + + Copyright (C) 2019-2020, 2023-2024 Genome Research Ltd. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#ifndef HTSLIB_SAM_INTERNAL_H +#define HTSLIB_SAM_INTERNAL_H + +#include +#include + +#include "htslib/sam.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Used internally in the SAM format multi-threading. +int sam_state_destroy(samFile *fp); +int sam_set_thread_pool(htsFile *fp, htsThreadPool *p); +int sam_set_threads(htsFile *fp, int nthreads); + +// Fastq state +int fastq_state_set(samFile *fp, enum hts_fmt_option opt, ...); +void fastq_state_destroy(samFile *fp); + +// bam1_t data (re)allocation +int sam_realloc_bam_data(bam1_t *b, size_t desired); + +static inline int realloc_bam_data(bam1_t *b, size_t desired) +{ + if (desired <= b->m_data) return 0; + return sam_realloc_bam_data(b, desired); +} + +static inline int possibly_expand_bam_data(bam1_t *b, size_t bytes) { + size_t new_len = (size_t) b->l_data + bytes; + + if (new_len > INT32_MAX || new_len < bytes) { // Too big or overflow + errno = ENOMEM; + return -1; + } + if (new_len <= b->m_data) return 0; + return sam_realloc_bam_data(b, new_len); +} + +/* + * Convert a nibble encoded BAM sequence to a string of bases. + * + * We do this 2 bp at a time for speed. Equiv to: + * + * for (i = 0; i < len; i++) + * seq[i] = seq_nt16_str[bam_seqi(nib, i)]; + */ +static inline void nibble2base_default(uint8_t *nib, char *seq, int len) { + static const char code2base[512] = + "===A=C=M=G=R=S=V=T=W=Y=H=K=D=B=N" + "A=AAACAMAGARASAVATAWAYAHAKADABAN" + "C=CACCCMCGCRCSCVCTCWCYCHCKCDCBCN" + "M=MAMCMMMGMRMSMVMTMWMYMHMKMDMBMN" + "G=GAGCGMGGGRGSGVGTGWGYGHGKGDGBGN" + "R=RARCRMRGRRRSRVRTRWRYRHRKRDRBRN" + "S=SASCSMSGSRSSSVSTSWSYSHSKSDSBSN" + "V=VAVCVMVGVRVSVVVTVWVYVHVKVDVBVN" + "T=TATCTMTGTRTSTVTTTWTYTHTKTDTBTN" + "W=WAWCWMWGWRWSWVWTWWWYWHWKWDWBWN" + "Y=YAYCYMYGYRYSYVYTYWYYYHYKYDYBYN" + "H=HAHCHMHGHRHSHVHTHWHYHHHKHDHBHN" + "K=KAKCKMKGKRKSKVKTKWKYKHKKKDKBKN" + "D=DADCDMDGDRDSDVDTDWDYDHDKDDDBDN" + "B=BABCBMBGBRBSBVBTBWBYBHBKBDBBBN" + "N=NANCNMNGNRNSNVNTNWNYNHNKNDNBNN"; + + int i, len2 = len/2; + seq[0] = 0; + + for (i = 0; i < len2; i++) + // Note size_t cast helps gcc optimiser. + memcpy(&seq[i*2], &code2base[(size_t)nib[i]*2], 2); + + if ((i *= 2) < len) + seq[i] = seq_nt16_str[bam_seqi(nib, i)]; +} + +#if defined HAVE_ATTRIBUTE_CONSTRUCTOR && \ + ((defined __x86_64__ && defined HAVE_ATTRIBUTE_TARGET && defined HAVE_BUILTIN_CPU_SUPPORT_SSSE3) || \ + (defined __ARM_NEON)) +#define BUILDING_SIMD_NIBBLE2BASE +#endif + +static inline void nibble2base(uint8_t *nib, char *seq, int len) { +#ifdef BUILDING_SIMD_NIBBLE2BASE + extern void (*htslib_nibble2base)(uint8_t *nib, char *seq, int len); + htslib_nibble2base(nib, seq, len); +#else + nibble2base_default(nib, seq, len); +#endif +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/htslib-1.21/sam_mods.c b/src/htslib-1.21/sam_mods.c new file mode 100644 index 0000000..e45f26d --- /dev/null +++ b/src/htslib-1.21/sam_mods.c @@ -0,0 +1,695 @@ +/* sam_mods.c -- Base modification handling in SAM and BAM. + + Copyright (C) 2020-2024 Genome Research Ltd. + + Author: James Bonfield + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h +#include +#include + +#include "htslib/sam.h" +#include "textutils_internal.h" + +// --------------------------- +// Base Modification retrieval +// +// These operate by recording state in an opaque type, allocated and freed +// via the functions below. +// +// Initially we call bam_parse_basemod to process the tags and record the +// modifications in the state structure, and then functions such as +// bam_next_basemod can iterate over this cached state. + +/* Overview of API. + +We start by allocating an hts_base_mod_state and parsing the MM, ML and MN +tags into it. This has optional flags controlling how we report base +modifications in "explicit" coordinates. See below + + hts_base_mod_state *m = hts_base_mod_state_alloc(); + bam_parse_basemod2(b, m, HTS_MOD_REPORT_UNCHECKED); + // Or: bam_parse_basemod(b, m), which is equiv to flags==0 + //... do something ... + hts_base_mod_state_free(m); + +In the default implicit MM coordinate system, any location not +reported is implicitly assumed to contain no modification. We only +report the places we think are likely modified. + +Some tools however only look for base modifications in particular +contexts, eg CpG islands. Here we need to distinguish between +not-looked-for and looked-for-but-didn't-find. These calls have an +explicit coordinate system, where we only know information about the +coordinates explicitly listed and everything else is considered to be +unverified. + +By default we don't get reports on the other coordinates in an +explicit MM tag, but the HTS_MOD_REPORT_UNCHECKED flag will report +them (with quality HTS_MOD_UNCHECKED) meaning we can do consensus +modification analysis with accurate counting when dealing with a +mixture of explicit and implicit records. + + +We have different ways of processing the base modifications. We can +iterate either mod-by-mod or position-by-position, or we can simply +query a specific coordinate as may be done when processing a pileup. + +To check for base modifications as a specific location within a +sequence we can use bam_mods_at_qpos. This provides complete random +access within the MM string. However currently this is inefficiently +implemented so should only be used for occasional analysis or as a way +to start iterating at a specific location. It modifies the state +position, so after the first use we can then switch to +bam_mods_at_next_pos to iterate position by position from then on. + + hts_base_mod mods[10]; + int n = bam_mods_at_qpos(b, pos, m, mods, 10); + +For base by base, we have bam_mods_at_next_pos. This strictly starts +at the first base and reports entries one at a time. It's more +efficient than a loop repeatedly calling ...at-pos. + + hts_base_mod mods[10]; + int n = bam_mods_at_next_pos(b, m, mods, 10); + for (int i = 0; i < n; i++) { + // report mod i of n + } + +Iterating over modifications instead of coordinates is simpler and +more efficient as it skips reporting of unmodified bases. This is +done with bam_next_basemod. + + hts_base_mod mods[10]; + while ((n=bam_next_basemod(b, m, mods, 10, &pos)) > 0) { + for (j = 0; j < n; j++) { + // Report 'n'th mod at sequence position 'pos' + } + } + +There are also functions that query meta-data about the MM line rather +than per-site information. + +bam_mods_recorded returns an array of ints holding the +ve code ('m') +or -ve CHEBI numeric values. + + int ntypes, *types = bam_mods_recorded(m, &ntype); + +We can then query a specific modification type to get further +information on the strand it is operating on, whether it has implicit +or explicit coordinates, and what it's corresponding canonical base it +is (The "C" in "C+m"). bam_mods_query_type does this by code name, +while bam_mods_queryi does this by numeric i^{th} type (from 0 to ntype-1). + + bam_mods_query_type(m, 'c', &strand, &implicit, &canonical); + bam_mods_queryi(m, 2, &strand, &implicit, &canonical); + +*/ + +/* + * Base modification are stored in MM/Mm tags as defined as + * + * ::= | "" + * ::= + * + * ::= "A" | "C" | "G" | "T" | "N". + * + * ::= "+" | "-". + * + * ::= | + * ::= | + * ::= + * ::= + * + * ::= "," | ";" + * + * We do not allocate additional memory other than the fixed size + * state, thus we track up to 256 pointers to different locations + * within the MM and ML tags. Each pointer is for a distinct + * modification code (simple or ChEBI), meaning some may point to the + * same delta-list when multiple codes are combined together + * (e.g. "C+mh,1,5,18,3;"). This is the MM[] array. + * + * Each numeric in the delta-list is tracked in MMcount[], counted + * down until it hits zero in which case the next delta is fetched. + * + * ML array similarly holds the locations in the quality (ML) tag per + * type, but these are interleaved so C+mhfc,10,15 will have 4 types + * all pointing to the same delta position, but in ML we store + * Q(m0)Q(h0)Q(f0)Q(c0) followed by Q(m1)Q(h1)Q(f1)Q(c1). This ML + * also has MLstride indicating how many positions along ML to jump + * each time we consume a base. (4 in our above example, but usually 1 + * for the simple case). + * + * One complexity of the base modification system is that mods are + * always stored in the original DNA orientation. This is so that + * tools that may reverse-complement a sequence (eg "samtools fastq -T + * MM,ML") can pass through these modification tags irrespective of + * whether they have any knowledge of their internal workings. + * + * Because we don't wish to allocate extra memory, we cannot simply + * reverse the MM and ML tags. Sadly this means we have to manage the + * reverse complementing ourselves on-the-fly. + * For reversed reads we start at the right end of MM and no longer + * stop at the semicolon. Instead we use MMend[] array to mark the + * termination point. + */ +#define MAX_BASE_MOD 256 +struct hts_base_mod_state { + int type[MAX_BASE_MOD]; // char or minus-CHEBI + int canonical[MAX_BASE_MOD];// canonical base, as seqi (1,2,4,8,15) + char strand[MAX_BASE_MOD]; // strand of modification; + or - + int MMcount[MAX_BASE_MOD]; // no. canonical bases left until next mod + char *MM[MAX_BASE_MOD]; // next pos delta (string) + char *MMend[MAX_BASE_MOD]; // end of pos-delta string + uint8_t *ML[MAX_BASE_MOD]; // next qual + int MLstride[MAX_BASE_MOD]; // bytes between quals for this type + int implicit[MAX_BASE_MOD]; // treat unlisted positions as non-modified? + int seq_pos; // current position along sequence + int nmods; // used array size (0 to MAX_BASE_MOD-1). + uint32_t flags; // Bit-field: see HTS_MOD_REPORT_UNCHECKED +}; + +hts_base_mod_state *hts_base_mod_state_alloc(void) { + return calloc(1, sizeof(hts_base_mod_state)); +} + +void hts_base_mod_state_free(hts_base_mod_state *state) { + free(state); +} + +/* + * Count frequency of A, C, G, T and N canonical bases in the sequence + */ +static void seq_freq(const bam1_t *b, int freq[16]) { + int i; + + memset(freq, 0, 16*sizeof(*freq)); + uint8_t *seq = bam_get_seq(b); + for (i = 0; i < b->core.l_qseq; i++) + freq[bam_seqi(seq, i)]++; + freq[15] = b->core.l_qseq; // all bases count as N for base mods +} + +//0123456789ABCDEF +//=ACMGRSVTWYHKDBN aka seq_nt16_str[] +//=TGKCYSBAWRDMHVN comp1ement of seq_nt16_str +//084C2A6E195D3B7F +static int seqi_rc[] = { 0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15 }; + +/* + * Parse the MM and ML tags to populate the base mod state. + * This structure will have been previously allocated via + * hts_base_mod_state_alloc, but it does not need to be repeatedly + * freed and allocated for each new bam record. (Although obviously + * it requires a new call to this function.) + * + * Flags are copied into the state and used to control reporting functions. + * Currently the only flag is HTS_MOD_REPORT_UNCHECKED, to control whether + * explicit "C+m?" mods report quality HTS_MOD_UNCHECKED for the bases + * outside the explicitly reported region. + */ +int bam_parse_basemod2(const bam1_t *b, hts_base_mod_state *state, + uint32_t flags) { + // Reset position, else upcoming calls may fail on + // seq pos - length comparison + state->seq_pos = 0; + state->nmods = 0; + state->flags = flags; + + // Read MM and ML tags + uint8_t *mm = bam_aux_get(b, "MM"); + if (!mm) mm = bam_aux_get(b, "Mm"); + if (!mm) + return 0; + if (mm[0] != 'Z') { + hts_log_error("%s: MM tag is not of type Z", bam_get_qname(b)); + return -1; + } + + uint8_t *mi = bam_aux_get(b, "MN"); + if (mi && bam_aux2i(mi) != b->core.l_qseq && b->core.l_qseq) { + // bam_aux2i with set errno = EINVAL and return 0 if the tag + // isn't integer, but 0 will be a seq-length mismatch anyway so + // triggers an error here too. + hts_log_error("%s: MM/MN data length is incompatible with" + " SEQ length", bam_get_qname(b)); + return -1; + } + + uint8_t *ml = bam_aux_get(b, "ML"); + if (!ml) ml = bam_aux_get(b, "Ml"); + if (ml && (ml[0] != 'B' || ml[1] != 'C')) { + hts_log_error("%s: ML tag is not of type B,C", bam_get_qname(b)); + return -1; + } + uint8_t *ml_end = ml ? ml+6 + le_to_u32(ml+2) : NULL; + if (ml) ml += 6; + + // Aggregate freqs of ACGTN if reversed, to get final-delta (later) + int freq[16]; + if (b->core.flag & BAM_FREVERSE) + seq_freq(b, freq); + + char *cp = (char *)mm+1; + int mod_num = 0; + int implicit = 1; + while (*cp) { + for (; *cp; cp++) { + // cp should be [ACGTNU][+-]([a-zA-Z]+|[0-9]+)[.?]?(,\d+)*; + unsigned char btype = *cp++; + + if (btype != 'A' && btype != 'C' && + btype != 'G' && btype != 'T' && + btype != 'U' && btype != 'N') + return -1; + if (btype == 'U') btype = 'T'; + + btype = seq_nt16_table[btype]; + + // Strand + if (*cp != '+' && *cp != '-') + return -1; // malformed + char strand = *cp++; + + // List of modification types + char *ms = cp, *me; // mod code start and end + char *cp_end = NULL; + int chebi = 0; + if (isdigit_c(*cp)) { + chebi = strtol(cp, &cp_end, 10); + cp = cp_end; + ms = cp-1; + } else { + while (*cp && isalpha_c(*cp)) + cp++; + if (*cp == '\0') + return -1; + } + + me = cp; + + // Optional explicit vs implicit marker + implicit = 1; + if (*cp == '.') { + // default is implicit = 1; + cp++; + } else if (*cp == '?') { + implicit = 0; + cp++; + } else if (*cp != ',' && *cp != ';') { + // parse error + return -1; + } + + long delta; + int n = 0; // nth symbol in a multi-mod string + int stride = me-ms; + int ndelta = 0; + + if (b->core.flag & BAM_FREVERSE) { + // We process the sequence in left to right order, + // but delta is successive count of bases to skip + // counting right to left. This also means the number + // of bases to skip at left edge is unrecorded (as it's + // the remainder). + // + // To output mods in left to right, we step through the + // MM list in reverse and need to identify the left-end + // "remainder" delta. + int total_seq = 0; + for (;;) { + cp += (*cp == ','); + if (*cp == 0 || *cp == ';') + break; + + delta = strtol(cp, &cp_end, 10); + if (cp_end == cp) { + hts_log_error("%s: Hit end of MM tag. Missing " + "semicolon?", bam_get_qname(b)); + return -1; + } + + cp = cp_end; + total_seq += delta+1; + ndelta++; + } + delta = freq[seqi_rc[btype]] - total_seq; // remainder + } else { + delta = *cp == ',' + ? strtol(cp+1, &cp_end, 10) + : 0; + if (!cp_end) { + // empty list + delta = INT_MAX; + cp_end = cp; + } + } + // Now delta is first in list or computed remainder, + // and cp_end is either start or end of the MM list. + while (ms < me) { + state->type [mod_num] = chebi ? -chebi : *ms; + state->strand [mod_num] = (strand == '-'); + state->canonical[mod_num] = btype; + state->MLstride [mod_num] = stride; + state->implicit [mod_num] = implicit; + + if (delta < 0) { + hts_log_error("%s: MM tag refers to bases beyond sequence " + "length", bam_get_qname(b)); + return -1; + } + state->MMcount [mod_num] = delta; + if (b->core.flag & BAM_FREVERSE) { + state->MM [mod_num] = me+1; + state->MMend[mod_num] = cp_end; + state->ML [mod_num] = ml ? ml+n +(ndelta-1)*stride: NULL; + } else { + state->MM [mod_num] = cp_end; + state->MMend[mod_num] = NULL; + state->ML [mod_num] = ml ? ml+n : NULL; + } + + if (++mod_num >= MAX_BASE_MOD) { + hts_log_error("%s: Too many base modification types", + bam_get_qname(b)); + return -1; + } + ms++; n++; + } + + // Skip modification deltas + if (ml) { + if (b->core.flag & BAM_FREVERSE) { + ml += ndelta*stride; + } else { + while (*cp && *cp != ';') { + if (*cp == ',') + ml+=stride; + cp++; + } + } + if (ml > ml_end) { + hts_log_error("%s: Insufficient number of entries in ML " + "tag", bam_get_qname(b)); + return -1; + } + } else { + // cp_end already known if FREVERSE + if (cp_end && (b->core.flag & BAM_FREVERSE)) + cp = cp_end; + else + while (*cp && *cp != ';') + cp++; + } + if (!*cp) { + hts_log_error("%s: Hit end of MM tag. Missing semicolon?", + bam_get_qname(b)); + return -1; + } + } + } + if (ml && ml != ml_end) { + hts_log_error("%s: Too many entries in ML tag", bam_get_qname(b)); + return -1; + } + + state->nmods = mod_num; + + return 0; +} + +int bam_parse_basemod(const bam1_t *b, hts_base_mod_state *state) { + return bam_parse_basemod2(b, state, 0); +} + +/* + * Fills out mods[] with the base modifications found. + * Returns the number found (0 if none), which may be more than + * the size of n_mods if more were found than reported. + * Returns <= -1 on error. + * + * This always marches left to right along sequence, irrespective of + * reverse flag or modification strand. + */ +int bam_mods_at_next_pos(const bam1_t *b, hts_base_mod_state *state, + hts_base_mod *mods, int n_mods) { + if (b->core.flag & BAM_FREVERSE) { + if (state->seq_pos < 0) + return -1; + } else { + if (state->seq_pos >= b->core.l_qseq) + return -1; + } + + int i, j, n = 0; + unsigned char base = bam_seqi(bam_get_seq(b), state->seq_pos); + state->seq_pos++; + if (b->core.flag & BAM_FREVERSE) + base = seqi_rc[base]; + + for (i = 0; i < state->nmods; i++) { + int unchecked = 0; + if (state->canonical[i] != base && state->canonical[i] != 15/*N*/) + continue; + + if (state->MMcount[i]-- > 0) { + if (!state->implicit[i] && + (state->flags & HTS_MOD_REPORT_UNCHECKED)) + unchecked = 1; + else + continue; + } + + char *MMptr = state->MM[i]; + if (n < n_mods) { + mods[n].modified_base = state->type[i]; + mods[n].canonical_base = seq_nt16_str[state->canonical[i]]; + mods[n].strand = state->strand[i]; + mods[n].qual = unchecked + ? HTS_MOD_UNCHECKED + : (state->ML[i] ? *state->ML[i] : HTS_MOD_UNKNOWN); + } + n++; + + if (unchecked) + continue; + + if (state->ML[i]) + state->ML[i] += (b->core.flag & BAM_FREVERSE) + ? -state->MLstride[i] + : +state->MLstride[i]; + + if (b->core.flag & BAM_FREVERSE) { + // process MM list backwards + char *cp; + if (state->MMend[i]-1 < state->MM[i]) { + // Should be impossible to hit if coding is correct + hts_log_error("Assert failed while processing base modification states"); + return -1; + } + for (cp = state->MMend[i]-1; cp != state->MM[i]; cp--) + if (*cp == ',') + break; + state->MMend[i] = cp; + if (cp != state->MM[i]) + state->MMcount[i] = strtol(cp+1, NULL, 10); + else + state->MMcount[i] = INT_MAX; + } else { + if (*state->MM[i] == ',') + state->MMcount[i] = strtol(state->MM[i]+1, &state->MM[i], 10); + else + state->MMcount[i] = INT_MAX; + } + + // Multiple mods at the same coords. + for (j=i+1; j < state->nmods && state->MM[j] == MMptr; j++) { + if (n < n_mods) { + mods[n].modified_base = state->type[j]; + mods[n].canonical_base = seq_nt16_str[state->canonical[j]]; + mods[n].strand = state->strand[j]; + mods[n].qual = state->ML[j] ? *state->ML[j] : -1; + } + n++; + state->MMcount[j] = state->MMcount[i]; + state->MM[j] = state->MM[i]; + if (state->ML[j]) + state->ML[j] += (b->core.flag & BAM_FREVERSE) + ? -state->MLstride[j] + : +state->MLstride[j]; + } + i = j-1; + } + + return n; +} + +/* + * Return data at the next modified location. + * + * bam_mods_at_next_pos does quite a bit of work, so we don't want to + * repeatedly call it for every location until we find a mod. Instead + * we check how many base types we can consume before the next mod, + * and scan through the sequence looking for them. Once we're at that + * site, we defer back to bam_mods_at_next_pos for the return values. + */ +int bam_next_basemod(const bam1_t *b, hts_base_mod_state *state, + hts_base_mod *mods, int n_mods, int *pos) { + // Look through state->MMcount arrays to see when the next lowest is + // per base type; + int next[16], freq[16] = {0}, i; + memset(next, 0x7f, 16*sizeof(*next)); + const int unchecked = state->flags & HTS_MOD_REPORT_UNCHECKED; + if (b->core.flag & BAM_FREVERSE) { + for (i = 0; i < state->nmods; i++) { + if (unchecked && !state->implicit[i]) + next[seqi_rc[state->canonical[i]]] = 1; + else if (next[seqi_rc[state->canonical[i]]] > state->MMcount[i]) + next[seqi_rc[state->canonical[i]]] = state->MMcount[i]; + } + } else { + for (i = 0; i < state->nmods; i++) { + if (unchecked && !state->implicit[i]) + next[state->canonical[i]] = 0; + else if (next[state->canonical[i]] > state->MMcount[i]) + next[state->canonical[i]] = state->MMcount[i]; + } + } + + // Now step through the sequence counting off base types. + for (i = state->seq_pos; i < b->core.l_qseq; i++) { + unsigned char bc = bam_seqi(bam_get_seq(b), i); + if (next[bc] <= freq[bc] || next[15] <= freq[15]) + break; + freq[bc]++; + if (bc != 15) // N + freq[15]++; + } + *pos = state->seq_pos = i; + + if (b->core.flag & BAM_FREVERSE) { + for (i = 0; i < state->nmods; i++) + state->MMcount[i] -= freq[seqi_rc[state->canonical[i]]]; + } else { + for (i = 0; i < state->nmods; i++) + state->MMcount[i] -= freq[state->canonical[i]]; + } + + if (b->core.l_qseq && state->seq_pos >= b->core.l_qseq && + !(b->core.flag & BAM_FREVERSE)) { + // Spots +ve orientation run-overs. + // The -ve orientation is spotted in bam_parse_basemod2 + int i; + for (i = 0; i < state->nmods; i++) { + // Check if any remaining items in MM after hitting the end + // of the sequence. + if (state->MMcount[i] < 0x7f000000 || + (*state->MM[i]!=0 && *state->MM[i]!=';')) { + hts_log_warning("MM tag refers to bases beyond sequence length"); + return -1; + } + } + return 0; + } + + int r = bam_mods_at_next_pos(b, state, mods, n_mods); + return r > 0 ? r : 0; +} + +/* + * As per bam_mods_at_next_pos, but at a specific qpos >= the previous qpos. + * This can only march forwards along the read, but can do so by more than + * one base-pair. + * + * This makes it useful for calling from pileup iterators where qpos may + * start part way through a read for the first occurrence of that record. + */ +int bam_mods_at_qpos(const bam1_t *b, int qpos, hts_base_mod_state *state, + hts_base_mod *mods, int n_mods) { + // FIXME: for now this is inefficient in implementation. + int r = 0; + while (state->seq_pos <= qpos) + if ((r = bam_mods_at_next_pos(b, state, mods, n_mods)) < 0) + break; + + return r; +} + +/* + * Returns the list of base modification codes provided for this + * alignment record as an array of character codes (+ve) or ChEBI numbers + * (negative). + * + * Returns the array, with *ntype filled out with the size. + * The array returned should not be freed. + * It is a valid pointer until the state is freed using + * hts_base_mod_free(). + */ +int *bam_mods_recorded(hts_base_mod_state *state, int *ntype) { + *ntype = state->nmods; + return state->type; +} + +/* + * Returns data about a specific modification type for the alignment record. + * Code is either positive (eg 'm') or negative for ChEBI numbers. + * + * Return 0 on success or -1 if not found. The strand, implicit and canonical + * fields are filled out if passed in as non-NULL pointers. + */ +int bam_mods_query_type(hts_base_mod_state *state, int code, + int *strand, int *implicit, char *canonical) { + // Find code entry + int i; + for (i = 0; i < state->nmods; i++) { + if (state->type[i] == code) + break; + } + if (i == state->nmods) + return -1; + + // Return data + if (strand) *strand = state->strand[i]; + if (implicit) *implicit = state->implicit[i]; + if (canonical) *canonical = "?AC?G???T??????N"[state->canonical[i]]; + + return 0; +} + +/* + * Returns data about the ith modification type for the alignment record. + * + * Return 0 on success or -1 if not found. The strand, implicit and canonical + * fields are filled out if passed in as non-NULL pointers. + */ +int bam_mods_queryi(hts_base_mod_state *state, int i, + int *strand, int *implicit, char *canonical) { + if (i < 0 || i >= state->nmods) + return -1; + + // Return data + if (strand) *strand = state->strand[i]; + if (implicit) *implicit = state->implicit[i]; + if (canonical) *canonical = "?AC?G???T??????N"[state->canonical[i]]; + + return 0; +} diff --git a/src/htslib-1.21/samples/DEMO.md b/src/htslib-1.21/samples/DEMO.md new file mode 100644 index 0000000..98c9981 --- /dev/null +++ b/src/htslib-1.21/samples/DEMO.md @@ -0,0 +1,1740 @@ +# HTS API + +## HTSLib APIs and samtools + +HTSLib is a C library implementation used to access and process the genome +sequence data. HTSLib implements multiple API interfaces, HTS API, VCF API and +SAM API. HTS API provides a framework for use by other APIs and applications, +implements bgzf compression, htscodecs and provides CRAM format support. VCF +APIs work with variant data in VCF and BCF format. + +SAM API works with sequence data of different formats, SAM / BAM / CRAM / +FASTA / FASTQ, and provides methods to do operations on the data. It uses +methods from HTS API. + +'samtools' is the utility used to read and modify sequence data. It uses SAM +APIs from HTSLib to work on the sequence data. + + +## About this document + +There are a number of demonstration utilities and their source code in +'samples' directory of HTSLib and this document gives the description of them +and the usage of API of HTSLib. The samples are for demonstration +purposes only and proper error handling is required for actual usage. This +document is based on HTSLib version 1.17. + +Updates to this document may be made along with later releases when required. + + +## The sample apps + +Flags - This application showcases the basic read of alignment files and flag +access. It reads and shows the count of read1 and read2 alignments. + +Split - This application showcases the basic read and write of alignment data. +It saves the read1 and read2 as separate files in given directory, one as sam +and other as bam. + +Split2 - This application showcases the output file format selection. It saves +the read1 and read2 as separate files in given directory, both as compressed +sam though the extensions are different. + +Cram - This application showcases the different way in which cram reference +data is used for cram output creation. + +Read_fast - This application showcases the fasta/fastq data read. + +Read_header - This application showcases the read and access of header data. +It can show all header line of given type, data of a given tag on a specific +header line or for all lines of given type. + +Read_ref - This application showcases the read and access of header data. +It shows all reference names which has length equal or greater to given input. + +Read_bam - This application showcases read of different alignment data fields. +It shows contents of each alignment. + +Read_aux - This application showcases read of specific auxiliary tag data in +alignment. It shows the data retrieved using 2 APIs, one as a string with tag +data and other as raw data alternatively. + +Dump_aux - This application showcases read of all auxiliary tag data one by one +in an alignment. It shows the data retrieved. + +Add_header - This application showcases the write of header lines to a file. +It adds header line of types, SQ, RG, PG and CO and writes to standard output. + +Remove_header - This application showcases removal of header line from a file. +It removes either all header lines of given type or one specific line of given +type with given unique identifier. Modified header is written on standard +output. + +Update_header - This application shows the update of header line fields, where +update is allowed. It takes the header line type, unique identifier for the +line, tag to be modified and the new value. Updated data is written on standard +output. + +Mod_bam - This application showcases the update of alignment data. It takes +alignment name, position of field to be modified and new value of it. +Modified data is written on standard output. + +Mod_aux - This application showcases the update of auxiliary data in alignment. +It takes alignment name, tag to be modified, its type and new value. Modified +data is written on standard output. + +Mod_aux_ba - This application showcases the update of auxiliary array data in +alignment. It adds count of ATCGN base as an array in auxiliary data, BA:I. +Modified data is written on standard output. + +Write_fast - This application showcases the fasta/fastq data write. It appends +data to given file. + +Index_write - This application showcases the creation of index along with +output creation. Based on file type and shift, it creates bai, csi or crai +files. + +Index_fast - This application showcases the index creation on fasta/fastq +reference files. + +Read_reg - This application showcases the usage of region specification in +alignment read. + +Read_multireg - This application showcases the usage of multiple region +specification in alignment read. + +Read_fast_index - This application showcases the fasta/fastq data read using +index. + +Pileup - This application showcases the pileup api, where all alignments +covering a reference position are accessed together. It displays the bases +covering each position on standard output. + +Mpileup - This application showcases the mpileup api, which supports multiple +input files for pileup and gives a side by side view of them in pileup format. +It displays the bases covering each position on standard output. + +Modstate - This application showcases the access of base modifications in +alignment. It shows the modifications present in an alignment and accesses them +using available APIs. There are 2 APIs and which one to be used can be selected +through input. + +Pileup_mod - This application showcases the base modification access in pileup +mode. It shows the pileup display with base modifications. + +Flags_field - This application showcases the read of selected fields alone, +reducing the overhead / increasing the performance. It reads the flag field +alone and shows the count of read1 and read2. This has impact only on CRAM +files. + +Split_thread1 - This application showcases the use of threads in file handling. +It saves the read1 and read2 as separate files in given directory, one as sam +and other as bam. 2 threads are used for read and 1 each dedicated for each +output file. + +Split_thread2 - This application showcases the use of thread pool in file +handling. It saves the read1 and read2 as separate files in given directory, +one as sam and other as bam. A pool of 4 threads is created and shared for both +read and write. + +Qtask_ordered - This application showcases the use of queues and threads for +custom processing. Alignments in input file are updated with their GC ratio +on a custom aux tag. The processing may occur in any order but the result is +retrieved in same order as it was queued and saved to disk. + +Qtask_unordered - This application showcases the use of queues and threads +for custom processing. The count of bases and GC ratio are calculated and +displayed. The order of counting is irrelevant and hence ordered retrieval is +not used. + +## Building the sample apps + +The samples expect the HTSLib is installed, libraries and header file path are +part of the PATH environment variable. If not, these paths need to be explicitly +passed during the build time. + +Gcc and compatible compilers can be used to build the samples. + +These applications can be linked statically or dynamically to HTSLib. +For static linking, along with htslib other libraries and/or headers required +to build are, math, pthread, curl, lzma, z and bz2 libraries. + +A makefile is available along with source files which links statically to +htslib. To use dynamic linking, update the makefile's 'LDFLAGS' and 'rpath' +path. The 'rpath' path to be set as the path to lib directory of htslib +installation. + + +## Usage of HTS APIs +### Sequence data file access for read + +The sequence data file for read may be opened using the sam_open method. It +opens the file and returns samFile (htsFile) pointer on success or NULL on +failure. The input can be path to a file in disk, network, cloud or '-' +designating the standard input. + +SAM, BAM and CRAM file formats are supported and the input file format is +detected from the file content. + +Once done with the file, it needs to be closed with sam_close. + +Many times, header details would be required and can be read using +sam_hdr_read api. It returns sam_hdr_t pointer or NULL. The returned header +needs to be destroyed using sam_hdr_destroy when no longer required. + +The sequence data may be compressed or uncompressed on disk and on memory it +is read and kept as uncompressed BAM format. It can be read from a file using +sam_read1 api. samFile pointer, header and bam storage are to be passed as +argument and it returns 0 on success, -1 on end of file and < -1 in case of +errors. + +The bam storage has to be initialized using bam_init1 api before the call and +can be reused for successive reads. Once done, it needs to be destroyed using +bam_destroy1. The member field named core - bam1_core_t - in bam storage, +bam1_t, has the sequence data in an easily accessible way. Using the fields +and macros, data can easily be read from it. + + #include + + int main(int argc, char *argv[]) + { + ... + //initialize + if (!(bamdata = bam_init1())) + ... // error + //open input files - r reading + if (!(infile = sam_open(inname, "r"))) + ... // error + //read header + if (!(in_samhdr = sam_hdr_read(infile))) + ... // error + + //read data, check flags and update count + while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { + if (bamdata->core.flag & BAM_FREAD1) + cntread1++; + ... + + //clean up + if (in_samhdr) + sam_hdr_destroy(in_samhdr); + + if (infile) + sam_close(infile); + + if (bamdata) + bam_destroy1(bamdata); + + return ret; + } +Refer: flags_demo.c + +This shows the count of read1 and read2 alignments. + + ./flags /tmp/sample.sam.gz + +To read CRAM files, reference data is required and if it is not available, based +on configuration, library may try to download it from external repositories. + + +### Sequence data file access for write + +File access for write is similar to read with a few additional optional steps. + +The output file can be opened using sam_open api as in read, with "w" instead +of "r" as mode. This opens the file for writing and uses mode to select the +output file type. "w" alone denotes SAM, "wb" denotes BAM and "wc" denotes CRAM. + +Another way is to use sam_open_mode method, which sets the output file type and +compression based on the file name and explicit textual format specification. +This method expects a buffer to append type and compression flags. Usually a +buffer with standard file open flag is used, the buffer past the flag is passed +to the method to ensure existing flags and updates from this method are present +in the same buffer without being overwritten. This method will add more flags +indicating file type and compression based on name. If explicit format detail +given, then extension is ignored and the explicit specification is used. This +updated buffer can be used with sam_open to select the file format. + +sam_open_format method may also be used to open the file for output as more +information on the output file can be specified using this. Can use +mode buffer from sam_open_mode api or explicit format structure for this. + +The header data can be written using the sam_hdr_write api. When the header +data is copied to another variable and has different lifetime, it is good to +increase the reference count of the header using sam_hdr_incr_ref and +sam_hdr_destroy called as many times as required. + +The alignment data can be written using the sam_write1 api. It takes a samFile +pointer, header pointer and the alignment data. The header data is required to +set the reference name in the alignment. It returns -ve value on error. + + int main(int argc, char *argv[]) + { + ... + if (!(infile = sam_open(inname, "r"))) + ... // error + outfile1 = sam_open(file1, "w"); //as SAM + outfile2 = sam_open(file2, "wb"); //as BAM + ... + if (!(in_samhdr = sam_hdr_read(infile))) + ... // error + + //write header + if ((sam_hdr_write(outfile1, in_samhdr) == -1) || + (sam_hdr_write(outfile2, in_samhdr) == -1)) + ... // error + + while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { + if (bamdata->core.flag & BAM_FREAD1) { + if (sam_write1(outfile1, in_samhdr, bamdata) < 0) { + ... // error + } +Refer: split.c + +This creates 1.sam and 2.bam in /tmp/ containing read1 and read2 respectively. + + ./split /tmp/sample.sam.gz /tmp/ + +Below code excerpt shows sam_open_mode api usage. + + int main(int argc, char *argv[]) + { + ... + //set file open mode based on file name for 1st and as explicit for 2nd + if ((sam_open_mode(mode1+1, file1, NULL) == -1) || + (sam_open_mode(mode2+1, file2, "sam.gz") == -1)) + ... // error + if (!(infile = sam_open(inname, "r"))) + ... // error + + //open output files + outfile1 = sam_open(file1, mode1); //as compressed SAM through sam_open + outfile2 = sam_open_format(file2, mode2, NULL); //as compressed SAM through sam_open_format + ... + } +Refer: split2.c + +This creates 1.sam.gz and 2.sam in /tmp/ both having compressed data. + + ./split2 /tmp/sample.sam.gz /tmp/ + +An htsFormat structure filled appropriately can also be used to specify output +file format while using sam_open_format api. + + +### CRAM writing + +CRAM files uses reference data and compresses alignment data. A CRAM file may +be created with external reference data file - most appropriate, with embedded +reference in it or with no reference data at all. It can also be created using +an autogenerated reference, based on consensus with-in the alignment data. +The reference detail can be set to an htsFormat structure using hts_parse_format +api and used with sam_open_format api to create appropriate CRAM file. + + ... + snprintf(reffmt1, size1, "cram,reference=%s", reffile); + snprintf(reffmt2, size2, "cram,embed_ref=1,reference=%s", reffile); + ... + if (hts_parse_format(&fmt1, reffmt1) == -1 || //using external reference - uses the M5/UR tags to get + reference data during read + hts_parse_format(&fmt2, reffmt2) == -1 || //embed the reference internally + hts_parse_format(&fmt3, "cram,embed_ref=2") == -1 || //embed autogenerated reference + hts_parse_format(&fmt4, "cram,no_ref=1") == -1) { //no reference data encoding at all + ... // error + outfile1 = sam_open_format(file1, "wc", &fmt1); outfile2 = sam_open_format(file2, "wc", &fmt2); + ... +Refer: cram.c + + +### FASTA/FASTQ data access + +FASTA/FASTQ files have the raw sequence data and the data can be read one by +one using sam_read1 or a selected range using a region. The data can be written +similar to alignment data using sam_write1 api. To write the file, format +can be set by updating mode buffer using sam_open_mode with file name +or explicit format text. This mode buffer can be used with sam_open or can be +used with sam_open_format with explicit format information in htsFormat +structure. + +It is the FASTA format which is mainly in use to store the reference data. + + ... + if (!(bamdata = bam_init1())) + ... // error + if (!(infile = sam_open(inname, "r"))) + ... // error + if (infile->format.format != fasta_format && infile->format.format != fastq_format) + ... // error + if (!(in_samhdr = sam_hdr_read(infile))) + ... // error + + while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) + ... // error + printf("\nsequence: "); + for (c = 0; c < bamdata->core.l_qseq; ++c) { + printf("%c", seq_nt16_str[bam_seqi(bam_get_seq(bamdata), c)]); + } + if (infile->format.format == fastq_format) { + printf("\nquality: "); + for (c = 0; c < bamdata->core.l_qseq; ++c) { + printf("%c", bam_get_qual(bamdata)[c] + 33); + ... +Refer: read_fast.c + + ... + char mode[4] = "a"; + ... + if (sam_open_mode(mode + 1, outname, NULL) < 0) + ... // error + if (!(outfile = sam_open(outname, mode))) + ... // error + if (bam_set1(bamdata, strlen(name), name, BAM_FUNMAP, -1, -1, 0, 0, NULL, -1, -1, 0, strlen(data), data, qual, 0) < 0) + ... // error + if (sam_write1(outfile, out_samhdr, bamdata) < 0) { + printf("Failed to write data\n"); + ... +Refer: write_fast.c + + +### Header data read + +The header gives the version, reference details, read group, change history +and comments. These data are stored inside the sam_hdr_t. Each of these +entries, except comments, have their unique identifier and it is required to +access different fields of them. The api sam_hdr_count_lines gives the count +of the specified type of header line. The value of a unique identifier to a +specific type of header line can be retrieved with sam_hdr_line_name api. The +api sam_hdr_find_tag_id and sam_hdr_find_tag_pos can get the field data from a +header line using unique identifier values or using position. The full header +line can be retrieved using sam_hdr_find_line_pos or sam_hdr_line_id with +position and unique identifier values respectively. + + ... + if (!(in_samhdr = sam_hdr_read(infile))) + ... // error + ... + if (tag) + ret = sam_hdr_find_tag_id(in_samhdr, header, id, idval, tag, &data); + else + ret = sam_hdr_find_line_id(in_samhdr, header, id, idval, &data); + ... + linecnt = sam_hdr_count_lines(in_samhdr, header); + ... + if (tag) + ret = sam_hdr_find_tag_pos(in_samhdr, header, c, tag, &data); + else + ret = sam_hdr_find_line_pos(in_samhdr, header, c, &data); + ... +Refer: read_header.c + +This will show the VN tag's value from HD header. + + ./read_header /tmp/sample.sam.gz HD VN + +Shows the 2nd SQ line's LN field value. + + ./read_header /tmp/sample.sam.gz SQ SN T2 LN + +Below code excerpt shows the reference names which has length above given value. + + ... + linecnt = sam_hdr_count_lines(in_samhdr, "SQ"); //get reference count + ... + //iterate and check each reference's length + for (pos = 1, c = 0; c < linecnt; ++c) { + if ((ret = sam_hdr_find_tag_pos(in_samhdr, "SQ", c, "LN", &data) == -2)) + ... // error + + size = atoll(data.s); + if (size < minsize) { + //not required + continue; + } + + //sam_hdr_find_tag_pos(in_samhdr, "SQ", c, "SN", &data) can also do the same! + if (!(id = sam_hdr_line_name(in_samhdr, "SQ", c))) + ... // error + + printf("%d,%s,%s\n", pos, id, data.s); + ... +Refer: read_refname.c + + +### Alignment data read + +The alignment / sequence data contains many fields. Mainly the read/query +name, flags indicating the properties of the read, reference sequence name, +position in reference to which it matches, quality of the read, CIGAR string +indicating the match status, position of mate / reverse strand, name of +reference sequence to which mate matches, the insert length, base sequence, +quality value of each base and auxiliary fields. + +Header data would be required to retrieve the reference names as alignment +contains the position of the reference in the header. + +A few of the data are directly visible in bam1_t and the rest are hidden +inside data member of bam1_t and can easily be retrieved using macros. +bam_get_qname gives the name of the read, sam_hdr_tid2name gives the reference +name. bam_get_cigar retrieves the cigar operation array, which can be decoded +using bam_cigar_oplen to get count of bases to which that operation applicable +and bam_cigar_opchr to get the cigar operation. bam_seqi retrieves the base +data at a given position in alignment and it can be converted to character by +indexing the seq_nt16_str array. + + ... + while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) + { + //QNAME FLAG RNAME POS MAPQ CIGAR RNEXT PNEXT TLEN SEQ QUAL [TAG:TYPE:VALUE] + printf("NAME: %s\n", bam_get_qname(bamdata)); //get the query name using the macro + flags = bam_flag2str(bamdata->core.flag); //flags as string + ... + tidname = sam_hdr_tid2name(in_samhdr, bamdata->core.tid); + ... + printf("MQUAL: %d\n", bamdata->core.qual); //map quality value + cigar = bam_get_cigar(bamdata); //retrieves the cigar data + for (i = 0; i < bamdata->core.n_cigar; ++i) { //no. of cigar data entries + printf("%d%c", bam_cigar_oplen(cigar[i]), bam_cigar_opchr(cigar[i])); + //the macros gives the count of operation and the symbol of operation for given cigar entry + } + printf("\nTLEN/ISIZE: %"PRIhts_pos"\n", bamdata->core.isize); + data = bam_get_seq(bamdata); + //get the sequence data + if (bamdata->core.l_qseq != bam_cigar2qlen(bamdata->core.n_cigar, cigar)) { //checks the length with CIGAR and query + ... + for (i = 0; i < bamdata->core.l_qseq ; ++i) { //sequence length + printf("%c", seq_nt16_str[bam_seqi(data, i)]); //retrieves the base from (internal compressed) sequence data + ... + printf("%c", bam_get_qual(bamdata)[i]+33); //retrieves the quality value + ... +Refer: read_bam.c + +Shows the data from alignments. + + ./read_bam /tmp/sample.sam.gz + + +### Aux data read + +Auxiliary data gives extra information about the alignment. There can be a +number of such data and can be accessed by specifying required tag or by +iterating one by one through them once the alignment is read as bam1_t. The +auxiliary data are stored along with the variable length data in the data +field of bam1_t. There are macros defined to retrieve information about +auxiliary data from the data field of bam1_t. + +Data for a specific tag can be retrieved as a string or can be retrieved as raw +data. bam_aux_get_str retrieves as a string, with tag name, tag type and data. +bam_aux_get can get raw data and with bam_aux_type and bam_aux2A, bam_aux2f etc. +the raw data can be extracted. + +To iterate through all data, the start of aux data is retrieved using macro +bam_aux_first and successive ones using bam_aux_next. Macro bam_aux_tag gives +the tag of the aux field and bam_aux_type gives the information about type of +the aux field. + +Bam_aux2i, bam_aux2f, bam_aux2Z macros retrieve the aux data's value as +integer, float and string respectively. The integer value may be of different +precision / size and the bam_aux_type character indicates how to use the +value. The string/hex data are NULL terminated. + +For array data, bam_aux_type will return 'B' and bam_auxB_len gives the length +of the array. bam_aux_type with the next byte will give the type of data in +the array. bam_auxB2i, bam_auxB2f will give integer and float data from a +given position of the array. + + ... + while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) { + //option 1 - get data as string with tag and type + if ((c = bam_aux_get_str(bamdata, tag, &sdata)) == 1) { + printf("%s\n",sdata.s); + ... + //option 2 - get raw data + if ((data = bam_aux_get(bamdata, tag)) != NULL) { + printauxdata(stdout, bam_aux_type(data), -1, data); + ... +Refer: read_aux.c + +Shows the MD aux tag from alignments. + + ./read_aux ../../samtools/test/mpileup/mpileup.1.bam MD + + ... + while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) { + data = bam_aux_first(bamdata); //get the first aux data + while (data) { + printf("%.2s:%c:", bam_aux_tag(data), NULL != strchr("cCsSiI", bam_aux_type(data)) ? 'i' : bam_aux_type(data)); + //macros gets the tag and type of aux data + //dump the data + printauxdata(stdout, bam_aux_type(data), -1, data); + ... + data = bam_aux_next(bamdata, data); //get the next aux data + ... +Refer: dump_aux.c + +Shows all the tags from all alignments. + + ./dump_aux ../../samtools/test/mpileup/mpileup.1.bam + + +### Add/Remove/Update header + +There are specific types of data that can be part of header data. They have +a tag from HD, SQ, RG, PG and CO. Fully formatted header lines, separated by new +line, can be added with sam_hdr_add_lines api. A single header line can be added +using sam_hdr_add_line api where the header type, tag and value pair are passed +as arguments, terminated with a NULL argument. The PG header lines are special +that they have a kind of linkage to previous PG lines. This linkage can be auto +generated by using sam_hdr_add_pg api which sets the 'PP' field used in linkage. +sam_hdr_write api does the write of the header data to file. + + ... + //add SQ line with SN as TR1 and TR2 + if (sam_hdr_add_lines(in_samhdr, &sq[0], 0)) //length as 0 for NULL terminated data + ... // error + + //add RG line with ID as RG1 + if (sam_hdr_add_line(in_samhdr, "RG", "ID", "RG1", "LB", "Test", "SM", "S1", NULL)) + ... // error + + //add PG/CO lines + if (sam_hdr_add_pg(in_samhdr, "add_header", "VN", "Test", "CL", data.s, NULL)) //NULL is to indicate end of args + ... // error + if (sam_hdr_add_line(in_samhdr, "CO", "Test data", NULL)) //NULL is to indicate end of args + ... // error + + //write output + if (sam_hdr_write(outfile, in_samhdr) < 0) + ... // error +Refer: add_header.c + +Not all type of header data can be removed but where it is possible, either a +specific header line can be removed or all of a header type can be removed. To +remove a specific line, header type, unique identifier field tag and its value +to be used. To remove all lines of a type, header type and unique identifier +field tag are to be used. + + ... + + //remove specific line + if (sam_hdr_remove_line_id(in_samhdr, header, id, idval) < 0) + ... // error + + //remove multiple lines of a header type + if (sam_hdr_remove_lines(in_samhdr, header, id, NULL) < 0) + ... // error +Refer: rem_header.c + +Shows the file content after removing SQ line with SN 2. + ./rem_header ../../samtools/test/mpileup/mpileup.1.bam SQ 2 + +The unique identifier for the line needs to be found to update a field, though +not all types in the header may be modifiable. The api sam_hdr_update_line +takes the unique identifier for the header line type, its value, the field +which needs to be modified and the new value with which to modify it, followed +by a NULL. +e.g. To change LN field from 2000 to 2250 in SQ line with unique identifier SN +as 'chr1', sam_hdr_update_line( header, "SQ", "SN", "chr1", "LN", "2250", +NULL). To change PP field from ABC to DEF in PG line with ID APP.10, +sam_hdr_update_line( header, "PG", "ID", "APP.10", "PP", "DEF", NULL). + + ... + //update with new data + if (sam_hdr_update_line(in_samhdr, header, id, idval, tag, val, NULL) < 0) { + printf("Failed to update data\n"); + goto end; + } + ... +Refer: update_header.c + +Shows new sam file with 2nd SQ line having length as 38. + + ./update_header /tmp/sample.sam.gz SQ T1 LN 38 + + +### Update alignment data + +Many of the bam data fields may be updated by setting new value to appropriate +field in bam1_core_t structure and for a few, creating a new bam1_t record would +be easier than update of existing record. + + ... + while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) + { + ... + case 1:// QNAME + ret = bam_set_qname(bamdata, val); + break; + case 2:// FLAG + bamdata->core.flag = atol(val) & 0xFFFF; + break; + case 3:// RNAME + case 7:// RNEXT + if ((ret = sam_hdr_name2tid(in_samhdr, val)) < 0) + ... // error + if (field == 3) { + //reference + bamdata->core.tid = ret; + } else { + //mate reference + bamdata->core.mtid = ret; + } + break; + case 4:// POS + bamdata->core.pos = atoll(val); + break; + case 5:// MAPQ + bamdata->core.qual = atoi(val) & 0x0FF; + break; + case 6:// CIGAR + { + ... + //get cigar array and set all data in new bam record + if ((ncigar = sam_parse_cigar(val, NULL, &cigar, &size)) < 0) + ... // error + if (bam_set1(newbam, bamdata->core.l_qname, bam_get_qname(bamdata), bamdata->core.flag, bamdata->core.tid, + bamdata->core.pos, bamdata->core.qual, ncigar, cigar, bamdata->core.mtid, bamdata->core.mpos, + bamdata->core.isize, bamdata->core.l_qseq, (const char*)bam_get_seq(bamdata), + (const char*)bam_get_qual(bamdata), bam_get_l_aux(bamdata)) < 0) + ... // error + + //correct sequence data as input is expected in ascii format and not as compressed inside bam! + memcpy(bam_get_seq(newbam), bam_get_seq(bamdata), (bamdata->core.l_qseq + 1) / 2); + //copy the aux data + memcpy(bam_get_aux(newbam), bam_get_aux(bamdata), bam_get_l_aux(bamdata)); + ... + break; + case 8:// PNEXT + bamdata->core.mpos = atoll(val); + break; + case 9:// TLEN + bamdata->core.isize = atoll(val); + break; + case 10:// SEQ + ... + for( c = 0; c < i; ++c) { + bam_set_seqi(bam_get_seq(bamdata), c, seq_nt16_table[(unsigned char)val[c]]); + } + break; + case 11:// QUAL + ... + for (c = 0; c < i; ++c) + val[c] -= 33; //phred score from ascii value + memcpy(bam_get_qual(bamdata), val, i); +Refer: mod_bam.c + +Shows data with RNAME modified to T2. + + ./mod_bam /tmp/sample.sam ITR1 3 T2 + +The auxiliary data in bam1_t structure can be modified using +bam_aux_update_float, bam_aux_update_int etc. apis. If the aux field is not +present at all, it can be appended using bam_aux_append. + + ... + //matched to qname, update aux + if (!(data = bam_aux_get(bamdata, tag))) { + //tag not present append + ... // cut: computed length and val based on tag type + if (bam_aux_append(bamdata, tag, type, length, (const uint8_t*)val)) + ... // error + } else { + //update the tag with newer value + char auxtype = bam_aux_type(data); + switch (type) { + case 'f': + case 'd': + ... + if (bam_aux_update_float(bamdata, tag, atof(val))) + ... // error + case 'C': + case 'S': + case 'I': + ... + if (bam_aux_update_int(bamdata, tag, atoll(val))) + ... // error + case 'Z': + ... + if (bam_aux_update_str(bamdata, tag, length, val)) + ... // error + case 'A': + ... + //update the char data directly on buffer + *(data+1) = val[0]; +Refer: mod_aux.c + +Shows the given record's MD tag set to Test. + + ./mod_aux samtools/test/mpileup/mpileup.1.bam ERR013140.6157908 MD Z Test + +The array aux fields can be updated using bam_aux_update_array api. + + ... + if (bam_aux_update_array(bamdata, "BA", 'I', sizeof(cnt)/sizeof(cnt[0]), cnt)) + ... // error +Refer: mod_aux_ba.c + +Shows the records updated with an array of integers, containing count of ACGT +and N in that order. The bases are decoded before count for the sake of +simplicity. Refer qtask_ordered.c for a better counting where decoding is made +outside the loop. + + ./mod_aux_ba samtools/test/mpileup/mpileup.1.bam + + +### Create an index + +Indexes help to read data faster without iterating sequentially through the +file. Indexes contain the position information about alignments and that they +can be read easily. There are different type of indices, BAI, CSI, CRAI, TBI, +FAI etc. and are usually used with iterators. + +Indexing of plain/textual files are not supported, compressed SAM&FASTA/Q, BAM, +and CRAM files can be indexed. CRAM files are indexed as .crai and the others +as .bai, .csi, .fai etc. Each of these types have different internal +representations of the index information. Bai uses a fixed configuration values +where as csi has them dynamically updated based on the alignment data. + +Indexes can be created either with save of alignment data or explicitly by +read of existing alignment file for alignment data (SAM/BAM/CRAM). For reference +data it has to be explicitly created (FASTA). + +To create index along with alignment write, the sam_idx_init api need to be +invoked before the start of alignment data write. This api takes the output +samFile pointer, header pointer, minimum shift and index file path. For BAI +index, the min shift has to be 0. + +At the end of write, sam_idx_save api need to be invoked to save the index. + + ... + //write header + if (sam_hdr_write(outfile, in_samhdr)) + ... // error + // initialize indexing, before start of write + if (sam_idx_init(outfile, in_samhdr, size, fileidx)) + ... // error + if (sam_write1(outfile, in_samhdr, bamdata) < 0) + ... // error + if (sam_idx_save(outfile)) + ... // error +Refer:index_write.c + +Creates mpileup.1.bam and mpileup.1.bam.bai in /tmp/. + + ./idx_on_write ../../samtools/test/mpileup/mpileup.1.bam 0 /tmp/ + +To create index explicitly on an existing alignment data file, the +sam_index_build api or its alike can be used. sam_index_build takes the +alignment file path, min shift for the index and creates the index file in +same path. The output name will be based on the alignment file format and min +shift passed. + +The sam_index_build2 api takes the index file path as well and gives more +control than the previous one. The sam_index_build3 api provides an option to +configure the number of threads in index creation. + +Index for reference data can be created using fai_build3 api. This creates +index file with .fai extension. If the file is bgzip-ped, a .gzi file is +created as well. It takes the path to input file and that of fai and gzi files. +When fai/gzi path are NULL, they are created along with input file. +These index files will be useful for reference data access. + + ... + if (fai_build3(filename, NULL, NULL) == -1) + ... // error +Refer: index_fast.c + +A tabix index can be created for compressed vcf/sam/bed and other data using +tbx_index_build. It is mainly used with vcf and non-sam type files. + + +### Read with iterators + +Index file helps to read required data without sequentially accessing the file +and are required to use iterators. The interested reference, start and end +position etc. are required to read data with iterators. With index and these +information, an iterator is created and relevant alignments can be accessed by +iterating it. + +The api sam_index_load and the like does the index loading. It takes input +samFile pointer and file path. It loads the index file based on the input file +name, from the same path and with implicit index file extension - cram file +with .crai and others with .bai. The sam_index_load2 api accepts explicit path +to index file, which allows loading it from a different location and explicit +extensions. The sam_index_load3 api supports download/save of the index +locally from a remote location. These apis returns NULL on failure and index +pointer on success. + +The index file path can be appended to alignment file path and used as well. +In this case the paths are expected to be separated by '##idx##'. + +The sam_iter_queryi or sam_iter_querys apis may be used to create an iterator +and sam_itr_next api does the alignment data retrieval. Along with retrieval +of current data, it advances the iterator to next relevant data. The +sam_iter_queryi takes the interested positions as numeric values and +sam_iter_querys takes the interested position as a string. + +With sam_iter_queryi, the reference id can be the 0 based index of reference +data, -2 for unmapped alignments, -3 to start read from beginning of file, -4 +to continue from current position, -5 to return nothing. Based on the +reference id given, alignment covering the given start and end positions will +be read with sam_iter_next api. + +With sam_iter_querys, the reference sequence is identified with the name and +interested positions can be described with start and end separated by '-' as +string. When sequence is identified as '.', it begins from the start of file +and when it is '*', unmapped alignments are read. Reference with [:], +:S, :S-E, :-E retrieves all data, all data covering position +S onwards, all data covering position S to E, all data covering upto position +E of reference with ID respectively on read using sam_iter_next. + +The index and iterator created are to be destroyed once the need is over. +sam_itr_destroy and hts_idx_destroy apis does this. + + ... + //load index file + if (!(idx = sam_index_load2(infile, inname, idxfile))) + ... // error + //create iterator + if (!(iter = sam_itr_querys(idx, in_samhdr, region))) + ... // error + + //read using iterator + while ((c = sam_itr_next(infile, iter, bamdata)) >= 0) + ... // error + + if (iter) + sam_itr_destroy(iter); + if (idx) + hts_idx_destroy(idx); + ... +Refer:index_reg_read.c + +With sample.sam, region as \* will show alignments with name UNMAP2 and UNMAP3 + + ./read_reg /tmp/sample.sam.gz \* + +With region as \., it shows all alignments + + ./read_reg /tmp/sample.sam.gz \. + +With region as T1:1-4, start 1 and end 4 it shows nothing and with T1:1-5 it +shows alignment with name ITR1. + + ./read_reg /tmp/sample.sam.gz T1:1-5 + +With region as T2:30-100, it shows alignment with name ITR2M which refers the +reference data T2. + + ./read_reg /tmp/sample.sam.gz T2:30-100 + + +Multiple interested regions can be specified for read using sam_itr_regarray. +It takes index path, header, count of regions and region descriptions as array +of char array / string. This array passed need to be released by the user +itself. + + ... + //load index file, assume it to be present in same location + if (!(idx = sam_index_load(infile, inname))) + ... // error + //create iterator + if (!(iter = sam_itr_regarray(idx, in_samhdr, regions, regcnt))) + ... // error + if (regions) { + //can be freed as it is no longer required + free(regions); + regions = NULL; + } + + //get required area + while ((c = sam_itr_multi_next(infile, iter, bamdata) >= 0)) + ... // process bamdata +Refer:index_multireg_read.c + +With compressed sample.sam and 2 regions from reference T1 (30 to 32) and 1 +region from T2 (34 onwards), alignments with name A1, B1, A2 and ITR2M would +be shown. + + ./read_multireg /tmp/sample.sam.gz 2 T1:30-32,T2:34 + +To use numeric indices instead of textual regions, sam_itr_regions can be used. +It takes index file path, header, count of regions and an array of region +description (hts_reglist_t*), which has the start end positions as numerals. + +The index and iterators are to be destroyed using the sam_itr_destroy and +hts_idx_destroy. The hts_reglist_t* array passed is destroyed by the library +on iterator destroy. The regions array (array of char array/string) needs to be +destroyed by the user itself. + +For fasta/fastq files, the index has to be loaded using fai_load3_format which +takes the file, index file names and format. With single region specification +fai_fetch64 can be used to get bases, and fai_fetchqual64 for quality in case +of fastq data. With multiple region specification, with comma separation, +faidx_fetch_seq64 and faidx_fetch_qual64 does the job. Regions has to be parsed +using fai_parse_region in case of multiregion specifications. fai_adjust_region +is used to adjust the start-end points based on available data. + +Below excerpt shows fasta/q access with single and multiregions, + + ... + //load index + if (!(idx = fai_load3_format(inname, NULL, NULL, FAI_CREATE, fmt))) + ... // error + + ... + if (!usemulti) { + //get data from single given region + if (!(data = fai_fetch64(idx, region, &len))) + ... // region not found + + printf("Data: %"PRId64" %s\n", len, data); + free((void*)data); + //get quality for fastq type + if (fmt == FAI_FASTQ) { + if (!(data = fai_fetchqual64(idx, region, &len))) + ... // region not found + ... + + } else { // usemulti + //parse, get each region and get data for each + while ((remaining = fai_parse_region(idx, region, &tid, &beg, &end, HTS_PARSE_LIST))) { //here expects regions as csv + //parsed the region, correct end points based on actual data + if (fai_adjust_region(idx, tid, &beg, &end) == -1) + ... // error + //get data for given region + if (!(data = faidx_fetch_seq64(idx, faidx_iseq(idx, tid), beg, end, &len))) + ... // region not found + + printf("Data: %"PRIhts_pos" %s\n", len, data); + free((void*)data); + data = NULL; + //get quality data for fastq + if (fmt == FAI_FASTQ) { + if (!(data = faidx_fetch_qual64(idx, faidx_iseq(idx, tid), beg, end, &len))) + ... // error + printf("Qual: %"PRIhts_pos" %s\n", len, data); + free((void*)data); + ... + region = remaining; //parse remaining region defs + + ... + if (idx) { + fai_destroy(idx); + ... +Refer: read_fast_index.c + + +### Pileup and MPileup + +Pileup shows the transposed view of the SAM alignment data, i.e. it shows the +reference positions and bases which cover that position through different reads +side by side. MPileup facilitates the piling up of multiple sam files against +each other and same reference at the same time. + +Mpileup has replaced the pileup. The input expects the data to be sorted by +position. + +Pileup needs to be initialized with bam_pileup_init method which takes pointer +to a method, which will be called by pileup to read data from required files, +and pointer to data which might be required for this read method to do the +read operation. It returns a pointer to the pileup iterator. + +User can specify methods which need to be invoked during the load and unload +of an alignment, like constructor and destructor of objects. +Bam_plp_constructor and bam_plp_destructor methods does the setup of +these methods in the pileup iterator. During invocation of these methods, the +pointer to data passed in the initialization is passed as well. If user want +to do any custom status handling or actions during load or unload, it can be +done in these methods. Alignment specific data can be created and stored in +an argument passed to the constructor and the same will be accessible during +pileup status return. The same will be accessible during destructor as well +where any deallocation can be made. + +User is expected to invoke bam_plp_auto api to get the pileup status. It +returns the pileup status or NULL on end. During this all alignments are read +one by one, using the method given in initialization for data read, until one +for a new reference is found or all alignment covering a position is read. On +such condition, the pileup status is returned and the same continuous on next +bam_plp_auto call. The pileup status returned is an array for all positions +for which the processing is completed. Along with the result, the reference +index, position in reference data and number of alignments which covers this +position are passed. User can iterate the result array and get bases from each +alignment which covers the given reference position. The alignment specific +custom data which were created in constructor function will also be available +in the result. + +The bam_plp_auto api invokes the data read method to load an alignment and the +constructor method is invoked during the load. Once the end of alignment is +passed, it is removed from the processing and destructor method is invoked, +that user could do deallocations and custom actions as in load during this +time. The custom data passed during the initialization is passed to the +constructor and destructor methods during invocation. + +Once the forward and reverse strands are identified, the better of the quality +is identified and used. Both reads are required for this and hence reads are +cached until its mate is read. The maximum number of reads that can be cached +is controlled by bam_plp_set_maxcnt. Reads covering a position are cached and +as soon as mate is found, quality is adjusted and is removed from cache. Reads +above the cache limit are discarded. + +Once done, the pileup iterator to be discarded by sam_plp_destroy api. + + ... + if (!(plpiter = bam_plp_init(readdata, &conf))) + ... // error + //set constructor destructor callbacks + bam_plp_constructor(plpiter, plpconstructor); + bam_plp_destructor(plpiter, plpdestructor); + + while ((plp = bam_plp_auto(plpiter, &tid, &refpos, &n))) { + printf("%d\t%d\t", tid+1, refpos+1); + for (j = 0; j < n; ++j) { + //doesnt detect succeeding insertion and deletion together here, only insertion is identified + //deletion is detected in plp->is_del as and when pos reaches the position + //if detection ahead is required, use bam_plp_insertion here which gives deletion length along with insertion + if (plp[j].is_del || plp[j].is_refskip) { + printf("*"); + continue; + } + //start and end are displayed in UPPER and rest on LOWER + printf("%c", plp[j].is_head ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]) : + (plp[j].is_tail ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]) : + tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]))); + if (plp[j].indel > 0) { + //insertions, anyway not start or end + printf("+%d", plp[j].indel); + for (k = 0; k < plp[j].indel; ++k) { + printf("%c", tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos + k + 1)])); + } + } + else if (plp[j].indel < 0) { + printf("%d", plp[j].indel); + for (k = 0; k < -plp[j].indel; ++k) { + printf("?"); + } + ... + if (plpiter) + bam_plp_destroy(plpiter); + ... +Refer:pileup.c + +The read method may use a simple read or it could be an advanced read using +indices, iterators and region specifications based on the need. The constructor +method may create any custom data and store it in the pointer passed to it. The +same need to be released by use on destructor method. + +MPileup works same as the pileup and supports multiple inputs against the same +reference, giving side by side view of reference and alignments from different +inputs. + +MPileup needs to be initialized with bam_mpileup_init method which takes +pointer to a method, which will be called by pileup to read data from required +files, and an array of pointer to data which might be required for this read +method to do the read operation. It returns a pointer to the mpileup iterator. + +User can specify methods which need to be invoked during the load and unload +of an alignment, like constructor and destructor of objects. +bam_mplp_constructor and bam_mplp_destructor methods does the setup +of these methods in the pileup iterator. During invocation of these methods, +the pointer to data passed in the initialization is passed as well. If user +want to do any custom status handling or actions during load or unload, it can +be done on these methods. Alignment specific data can be created and +stored in the custom data pointer and the same will be accessible during +return of pileup status. The same will be accessible during destructor as well +where any deallocation can be made. + +User is expected to invoke bam_mplp_auto api to get the pileup status. It +returns the pileup status. During this all alignments are read one by one, +using the method given in initialization for data read, until one for a new +reference is found or all alignment covering a position is read. On such +condition, the pileup status is returned and the same continuous on next +bam_mplp_auto call. + +The pileup status is returned through a parameter in the method itself, is an +array for all inputs, each containing array for positions on which the +processing is completed. Along with the result, the reference index, position +in reference data and number of alignments which covers this position are +passed. User can iterate the result array and get bases from each alignment +which covers the given reference position. The alignment specific custom data +which were created in constructor function will also be available in the +result. + +Once the forward and reverse strands are identified, the better of the quality +is identified and used. Both reads are required for this and hence reads are +cached until its mate is read. The maximum number of reads that can be cached +is controlled by bam_mplp_set_maxcnt. Reads covering a position are cached and +as soon as mate is found, quality is adjusted and is removed from cache. Reads +above the cache limit are discarded. + +Once done, the pileup iterator to be discarded by sam_mplp_destroy api. + + ... + if (!(mplpiter = bam_mplp_init(argc - 1, readdata, (void**) conf))) + ... // error + //set constructor destructor callbacks + bam_mplp_constructor(mplpiter, plpconstructor); + bam_mplp_destructor(mplpiter, plpdestructor); + + while (bam_mplp64_auto(mplpiter, &tid, &refpos, depth, plp) > 0) { + printf("%d\t%"PRIhts_pos"\t", tid+1, refpos+1); + + for (input = 0; input < argc - 1; ++input) { + for (dpt = 0; dpt < depth[input]; ++dpt) { + if (plp[input][dpt].is_del || plp[input][dpt].is_refskip) { + printf("*"); + continue; + } + //start and end are displayed in UPPER and rest on LOWER + printf("%c", plp[input][dpt].is_head ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[input][dpt].b), + plp[input][dpt].qpos)]) : (plp[input]->is_tail ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[input][dpt].b), + plp[input][dpt].qpos)]) : tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[input][dpt].b), + plp[input][dpt].qpos)]))); + if (plp[input][dpt].indel > 0) { + //insertions, anyway not start or end + printf("+%d", plp[input][dpt].indel); + for (k = 0; k < plp[input][dpt].indel; ++k) { + printf("%c", tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[input][dpt].b), + plp[input][dpt].qpos + k + 1)])); + } + } + else if (plp[input][dpt].indel < 0) { + printf("%d", plp[input][dpt].indel); + for (k = 0; k < -plp[input][dpt].indel; ++k) { + printf("?"); + ... + if (mplpiter) { + bam_mplp_destroy(mplpiter); + } + ... + if (plp) { + free(plp); + ... +Refer:mpileup.c + +This sample takes multiple sam files and shows the pileup of data side by side. + + ./mpileup /tmp/mp.bam /tmp/mp.sam + + +### Base modifications + +The alignment data may contain base modification information as well. This +gives the base, modifications found, orientation in which it was found and the +quality for the modification. The base modification can be identified using +hts_parse_basemod api. It stores the modification details on hts_base_mod_state +and this has to be initialized using hts_base_mod_state_alloc api. + +Once the modifications are identified, they can be accessed through different +ways. bam_mods_recorded api gives the modifications identified for an alignment. +Modifications can be queried for each base position iteratively using +bam_mods_at_next_pos api. Check the returned value with buffer size to see +whether the buffer is big enough to retrieve all modifications. +Instead of querying for each position, the next modified position can be +directly retrieved directly using bam_next_basemod api. An alignment can be +queried to have a specific modification using bam_mods_query_type api. At the +end of processing, the state need to be released using hts_base_mod_state_free +api. + + ... + if (!(ms = hts_base_mod_state_alloc())) + ... // error + while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) + { + ... + if (bam_parse_basemod(bamdata, ms)) + ... // error + bm = bam_mods_recorded(ms, &cnt); + for (k = 0; k < cnt; ++k) { + printf("%c", bm[k]); + } + printf("\n"); + hts_base_mod mod[5] = {0}; //for ATCGN + if (opt) { + //option 1 + for (; i < bamdata->core.l_qseq; ++i) { + if ((r = bam_mods_at_next_pos(bamdata, ms, mod, sizeof(mod)/sizeof(mod[0]))) <= -1) { + printf("Failed to get modifications\n"); + goto end; + } + else if (r > (sizeof(mod) / sizeof(mod[0]))) { + printf("More modifications than this app can handle, update the app\n"); + goto end; + } + else if (!r) { + //no modification at this pos + printf("%c", seq_nt16_str[bam_seqi(data, i)]); + } + //modifications + for (j = 0; j < r; ++j) { + printf("%c%c%c", mod[j].canonical_base, mod[j].strand ? '-' : '+', mod[j].modified_base); + ... + else { + //option 2 + while ((r = bam_next_basemod(bamdata, ms, mod, sizeof(mod)/sizeof(mod[0]), &pos)) >= 0) { + for (; i < bamdata->core.l_qseq && i < pos; ++i) { + printf("%c", seq_nt16_str[bam_seqi(data, i)]); + } + //modifications + for (j = 0; j < r; ++j) { + printf("%c%c%c", mod[j].canonical_base, mod[j].strand ? '-' : '+', mod[j].modified_base); + } + ... + //check last alignment's base modification + int strand = 0, impl = 0; + char canonical = 0, modification[] = "mhfcgebaon"; //possible modifications + printf("\n\nLast alignment has \n"); + for (k = 0; k < sizeof(modification) - 1; ++k) { //avoiding NUL termination + if (bam_mods_query_type(ms, modification[k], &strand, &impl, &canonical)) { + printf ("No modification of %c type\n", modification[k]); + } + else { + printf("%s strand has %c modified with %c, can %sassume unlisted as unmodified\n", strand ? "-/bottom/reverse" : + "+/top/forward", canonical, modification[k], impl?"" : "not " ); + } + } + ... + if (ms) + hts_base_mod_state_free(ms); + ... +Refer:modstate.c + +The modification can be accessed in pileup mode as well. bam_mods_at_qpos gives +the modification at given pileup position. Insertion and deletion to the given +position with possible modification can be retrieved using bam_plp_insertion_mod +api. + + ... + int plpconstructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { + //when using cd, initialize and use as it will be reused after destructor + cd->p = hts_base_mod_state_alloc(); + //parse the bam data and gather modification data from MM tags + return (-1 == bam_parse_basemod(b, (hts_base_mod_state*)cd->p)) ? 1 : 0; + } + + int plpdestructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { + if (cd->p) { + hts_base_mod_state_free((hts_base_mod_state *)cd->p); + cd->p = NULL; + } + return 0; + } + + int main(int argc, char *argv[]) + { + ... + if (!(plpiter = bam_plp_init(readdata, &conf))) { + ... // error + //set constructor destructor callbacks + bam_plp_constructor(plpiter, plpconstructor); + bam_plp_destructor(plpiter, plpdestructor); + + while ((plp = bam_plp_auto(plpiter, &tid, &refpos, &depth))) { + memset(&mods, 0, sizeof(mods)); + printf("%d\t%d\t", tid+1, refpos+1); + + for (j = 0; j < depth; ++j) { + dellen = 0; + if (plp[j].is_del || plp[j].is_refskip) { + printf("*"); + continue; + } + /*invoke bam mods_mods_at_qpos before bam_plp_insertion_mod that the base modification + is retrieved before change in pileup pos thr' plp_insertion_mod call*/ + if ((modlen = bam_mods_at_qpos(plp[j].b, plp[j].qpos, plp[j].cd.p, mods, NMODS)) == -1) + ... // error + //use plp_insertion/_mod to get insertion and del at the same position + if ((inslen = bam_plp_insertion_mod(&plp[j], (hts_base_mod_state*)plp[j].cd.p, &insdata, &dellen)) == -1) + ... // error + //start and end are displayed in UPPER and rest on LOWER, only 1st modification considered + //base and modification + printf("%c%c%c", plp[j].is_head ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]) : + (plp[j].is_tail ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]) : + tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)])), + modlen > 0 ? mods[0].strand ? '-' : '+' : '\0', modlen > 0 ? mods[0].modified_base : '\0'); + //insertion and deletions + if (plp[j].indel > 0) { + //insertion + /*insertion data from plp_insertion_mod, note this shows the quality value as well + which is different from base and modification above;the lower case display is not attempted either*/ + printf("+%d%s", plp[j].indel, insdata.s); + //handle deletion if any + if (dellen) { + printf("-%d", dellen); + for (k = 0; k < dellen; ++k) { + printf("?"); + ... + else if (plp[j].indel < 0) { + //deletion + printf("%d", plp[j].indel); + for (k = 0; k < -plp[j].indel; ++k) { + printf("?"); + } + } + ... +Refer:pileup_mod.c + + +### Read selected fields + +At times the whole alignment data may not be of interest and it would be +better to read required fields alone from the alignment data. CRAM file format +supports such specific data read and HTSLib provides an option to use this. +This can improve the performance on read operation. + +The hts_set_opt method does the selection of specified fields. There are flags +indicating specific fields, like SAM_FLAG, SAM_SEQ, SAM_QNAME, in alignment +data and a combination of flags for the required fields can be passed with +CRAM_OPT_REQUIRED_FIELDS to this api. + + ... + //select required field alone, this is useful for CRAM alone + if (hts_set_opt(infile, CRAM_OPT_REQUIRED_FIELDS, SAM_FLAG) < 0) + ... // error + + //read header + in_samhdr = sam_hdr_read(infile); + ... + //read data, check flags and update count + while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { + if (bamdata->core.flag & BAM_FREAD1) + cntread1++; + ... +Refer: flags_htsopt_field.c + + +### Thread-pool to read / write + +The HTSLib api supports thread pooling for better performance. There are a few +ways in which this can be used. The pool can be made specific for a file or a +generic pool can be created and shared across multiple files. Thread pool can +also be used to execute user defined tasks. The tasks are to be added to queue, +threads in pool executes them and results can be queued back if required. + +To have a thread pool specific for a file, hts_set_opt api can be used with the +file pointer, HTS_OPT_NTHREADS and the number of threads to be in the pool. +Thread pool is released on closure of file. To have a thread pool which can be +shared across different files, it needs to be initialized using hts_tpool_init +api, passing number of threads as an argument. This thread pool can be +associated with a file using hts_set_opt api. The file pointer, +HTS_OPT_THREAD_POOL and the thread pool address are to be passed as arguments to +the api. The thread pool has to be released with hts_tpool_destroy. + +The samples are trivial ones to showcase the usage of api. The number of threads +to use for different tasks has to be identified based on complexity and +parallelism of the task. + +Below excerpt shows file specific thread pool, + + ... + //create file specific threads + if (hts_set_opt(infile, HTS_OPT_NTHREADS, 1) < 0 || //1 thread specific for reading + hts_set_opt(outfile1, HTS_OPT_NTHREADS, 1) < 0 || //1 thread specific for sam write + hts_set_opt(outfile2, HTS_OPT_NTHREADS, 2) < 0) { //2 thread specific for bam write + printf("Failed to set thread options\n"); + goto end; + } +Refer: split_thread1.c + +Below excerpt shows a thread pool shared across files, + + ... + //create a pool of 4 threads + if (!(tpool.pool = hts_tpool_init(4))) + ... // error + //share the pool with all the 3 files + if (hts_set_opt(infile, HTS_OPT_THREAD_POOL, &tpool) < 0 || + hts_set_opt(outfile1, HTS_OPT_THREAD_POOL, &tpool) < 0 || + hts_set_opt(outfile2, HTS_OPT_THREAD_POOL, &tpool) < 0) { + ... // error + + ... // do something + + //tidy up at end + if (tpool.pool) + hts_tpool_destroy(tpool.pool); + ... +Refer: split_thread2.c + +Note that it is important to analyze the task in hand to decide the number of +threads to be used. As an example, if the number of threads for reading is set +to 2 and bam write to 1, keeping total number of threads the same, the +performance may decrease as bam decoding is easier than encoding. + +Custom task / user defined functions can be performed on data using thread pool +and for that, the task has to be scheduled to a queue. Thread pool associated +with the queue will perform the task. There can be multiple pools and queues. +The order of execution of threads are decided based on many factors and load on +each task may vary, so the completion of the tasks may not be in the order of +their queueing. The queues can be used in two different ways, one where the +result is enqueued to queue again to be read in same order as initial queueing, +second where the resuls are not enqueued and completed possibly in a different +order than initial queueing. Explicitly created threads can also be used along +with hts thread pool usage. + +hts_tpool_process_init initializes the queue / process, associates a queue with +thread pool and reserves space for given number of tasks on queue. It takes a +parameter indicating whether the result need to be enqueued for retrieval or +not. If the result is enqueued, it is retrieved in the order of scheduling of +task. Another parameter sets the maximum number of slots for tasks in queue, +usually 2 times the number of threads are used. The input and output have their +own queues and they grow as required upto the max set. hts_tpool_dispatch api +enqueues the task to the queue. The api blocks when there is no space in queue. +This behavior can be controlled with hts_tpool_dispatch2 api. The queue can be +reset using hts_tpool_process_reset api where all tasks are discarded. The api +hts_tpool_dispatch3 supports configuring cleanup routines which are to be run +when reset occurs on the queue. hts_tpool_process_flush api can ensure that +all the piled up tasks are processed, a possible case when the queueing and +processing happen at different speeds. hts_tpool_process_shutdown api stops the +processing of queue. + +There are a few apis which let the user to check the status of processing. The +api hts_tpool_process_empty shows whether all the tasks are completed or not. +The api hts_tpool_process_sz gives the number of tasks, at different states of +processing. The api hts_tpool_process_len gives the number of results in output +queue waiting to be collected. + +The order of execution of tasks depends on the number of threads involved and +how the threads are scheduled by operating system. When the results are enqueued +back to queue, they are read in same order of enqueueing of task and in that +case the order of execution will not be noticed. When the results are not +enqueued the results are available right away and the order of execution may be +noticeable. Based on the nature of task and the need of order maintenance, users +can select either of the queueing. + +Below excerpts shows the usage of queues and threads in both cases. In the 1st, +alignments are updated with an aux tag indicating GC ratio. The order of data +has to be maintained even after update, hence the result queueing is used to +ensure same order as initial. A number of alignments are bunched together and +reuse of allocated memory is made to make it perform better. A sentinel job is +used to identify the completion of all tasks at the result collection side. + ... + void *thread_ordered_proc(void *args) + { + ... + for ( i = 0; i < bamdata->count; ++i) { + ... + for (pos = 0; pos < bamdata->bamarray[i]->core.l_qseq; ++pos) + count[bam_seqi(data,pos)]++; + ... + gcratio = (count[2] /*C*/ + count[4] /*G*/) / (float) (count[1] /*A*/ + count[8] /*T*/ + count[2] + count[4]); + + if (bam_aux_append(bamdata->bamarray[i], "xr", 'f', sizeof(gcratio), (const uint8_t*)&gcratio) < 0) { + + ... + void *threadfn_orderedwrite(void *args) + { + ... + //get result and write; wait if no result is in queue - until shutdown of queue + while (tdata->result == 0 && + (r = hts_tpool_next_result_wait(tdata->queue)) != NULL) { + bamdata = (data*) hts_tpool_result_data(r); + ... + for (i = 0; i < bamdata->count; ++i) { + if (sam_write1(tdata->outfile, tdata->samhdr, bamdata->bamarray[i]) < 0) { + ... // error + ... + hts_tpool_delete_result(r, 0); //release the result memory + ... + + // Shut down the process queue. If we stopped early due to a write failure, + // this will signal to the other end that something has gone wrong. + hts_tpool_process_shutdown(tdata->queue); + + ... + int main(int argc, char *argv[]) + { + ... + if (!(pool = hts_tpool_init(cnt))) //thread pool + ... // error + tpool.pool = pool; //to share the pool for file read and write as well + //queue to use with thread pool, for task and results + if (!(queue = hts_tpool_process_init(pool, cnt * 2, 0))) { + ... + //share the thread pool with i/o files + if (hts_set_opt(infile, HTS_OPT_THREAD_POOL, &tpool) < 0 || + hts_set_opt(outfile, HTS_OPT_THREAD_POOL, &tpool) < 0) + ... // error + if (pthread_create(&thread, NULL, threadfn_orderedwrite, &twritedata)) + ... // error + while (c >= 0) { + if (!(bamdata = getbamstorage(chunk, &bamcache))) + ... // error + for (cnt = 0; cnt < bamdata->maxsize; ++cnt) { + c = sam_read1(infile, in_samhdr, bamdata->bamarray[cnt]); + ... + if (hts_tpool_dispatch3(pool, queue, thread_ordered_proc, bamdata, + cleanup_bamstorage, cleanup_bamstorage, + 0) == -1) + ... // error + ... + if (queue) { + if (-1 == c) { + // EOF read, send a marker to tell the threadfn_orderedwrite() + // function to shut down. + if (hts_tpool_dispatch(pool, queue, thread_ordered_proc, + NULL) == -1) { + ... // error + hts_tpool_process_shutdown(queue); + + ... + // Wait for threadfn_orderedwrite to finish. + if (started_thread) { + pthread_join(thread, NULL); + + ... + if (queue) { + // Once threadfn_orderedwrite has stopped, the queue can be + // cleaned up. + hts_tpool_process_destroy(queue); + } + ... +Refer: qtask_ordered.c + +In this 2nd, the bases are counted and GC ratio of whole file is calculated. +Order in which bases are counted is not relevant and no result queue required. +The queue is created as input only. + ... + void *thread_unordered_proc(void *args) + { + ... + for ( i = 0; i < bamdata->count; ++i) { + data = bam_get_seq(bamdata->bamarray[i]); + for (pos = 0; pos < bamdata->bamarray[i]->core.l_qseq; ++pos) + counts[bam_seqi(data, pos)]++; + + ... + //update result and add the memory block for reuse + pthread_mutex_lock(&bamdata->cache->lock); + for (i = 0; i < 16; i++) { + bamdata->bases->counts[i] += counts[i]; + } + + bamdata->next = bamdata->cache->list; + bamdata->cache->list = bamdata; + pthread_mutex_unlock(&bamdata->cache->lock); + + ... + int main(int argc, char *argv[]) + { + ... + if (!(queue = hts_tpool_process_init(pool, cnt * 2, 1))) + ... // error + c = 0; + while (c >= 0) { + ... + for (cnt = 0; cnt < bamdata->maxsize; ++cnt) { + c = sam_read1(infile, in_samhdr, bamdata->bamarray[cnt]); + + ... + if (c >= -1 ) { + ... + if (hts_tpool_dispatch3(pool, queue, thread_unordered_proc, bamdata, + cleanup_bamstorage, cleanup_bamstorage, + 0) == -1) + ... // error + ... + if (-1 == c) { + // EOF read, ensure all are processed, waits for all to finish + if (hts_tpool_process_flush(queue) == -1) { + fprintf(stderr, "Failed to flush queue\n"); + } else { //all done + //refer seq_nt16_str to find position of required bases + fprintf(stdout, "GCratio: %f\nBase counts:\n", + (gccount.counts[2] /*C*/ + gccount.counts[4] /*G*/) / (float) + (gccount.counts[1] /*A*/ + gccount.counts[8] /*T*/ + + gccount.counts[2] + gccount.counts[4])); + ... + if (queue) { + hts_tpool_process_destroy(queue); + } +Refer: qtask_unordered.c + +## More Information + +### CRAM reference files + +The cram reference data is required for the read of sequence data in CRAM +format. The sequence data file may have it as embedded or as a reference to +the actual file. When it is a reference, it is downloaded locally, in the +cache directory for later usage. It will be stored in a directory structure +based on the MD5 checksum in the cache directory. + +Each chromosome in a reference file gets saved as a separate file with md5sum +as its path and name. The initial 4 numerals make the directory name and rest +as the file name (/<1st 2 of md5sum>/<2nd 2 of md5sum>/). + +The download would be attempted from standard location, EBI ENA +(https://www.ebi.ac.uk/ena). + + +### Bam1_t + +This structure holds the sequence data in BAM format. There are fixed and +variable size fields, basic and extended information on sequence +data. Variable size data and extended information are kept together in a +buffer, named data in bam1_t. Fields in the member named core, bam1_core_t, +and a few macros together support the storage and handling of the whole +sequence data. + +- core has a link to reference as a 0 based index in field tid. The mate / + reverse strand's link to reference is given by mtid. + +- Field pos and mpos gives the position in reference to which the sequence and + its mate / reverse strand match. + +- Field flag gives the properties of the given alignment. It shows the + alignment's orientation, mate status, read order etc. + +- Field qual gives the quality of the alignment read. + +- l_qname gives the length of the name of the alignment / read, l_extranul gives + the extra space used internally in the data field. + +- l_qseq gives the length of the alignment / read in the data field. + +-- n_cigar gives the number of CIGAR operations for the given alignment. + +- isize gives the insert size of the read / alignment. + +The bases in sequence data are stored by compressing 2 bases together in a +byte. When the reverse flag is set, the base data is reversed and +complemented from the actual read (i.e. if the forward read is ACTG, the +reverse read to be CAGT; it will be stored in SAM format with reversed and +complemented format as ACTG with reverse flag set). + +Macros bam_get_qname, bam_get_seq, bam_get_qual, bam_get_aux, bam_get_l_aux, +bam_seqi etc access the data field and retrieve the required data. The aux +macros support the retrieval of auxiliary data from the data field. + + +### Sam_hdr_t + +This structure holds the header information. This holds the number of targets +/ SQ lines in the file, each one's length, name and reference count to this +structure. It also has this information in an internal data structure for +easier access of each field of this data. + +When this data is shared or assigned to another variable of a different scope +or purpose, the reference count needs to be incremented to ensure that it is +valid till the end of the variable's scope. sam_hdr_incr_ref and it needs to +be destroyed as many times with sam_hdr_destroy api. + + +### Index + +Indices need the data to be sorted by position. They can be of different +types with extension .bai, .csi or .tbi for compressed SAM/BAM/VCF files and +.crai for CRAM files. The index name can be passed along with the alignment +file itself by appending a specific character sequence. The apis can detect this +sequence and extract the index path. ##idx## is the sequence which separates +the file path and index path. + + +### Data files + +The data files can be a local file, a network file, a file accessible through +the web or in cloud storage like google and amazon. The data files can be +represented with URIs like file://, file://localhost/.., ,ftp://.., +gs+http[s].., s3+http[s]:// + diff --git a/src/htslib-1.21/samples/Makefile b/src/htslib-1.21/samples/Makefile new file mode 100644 index 0000000..ee632e3 --- /dev/null +++ b/src/htslib-1.21/samples/Makefile @@ -0,0 +1,117 @@ +HTS_DIR = ../ +include $(HTS_DIR)/htslib_static.mk + +CC = gcc +CFLAGS = -Wall -O2 + +#to statically link to libhts +LDFLAGS = $(HTS_DIR)/libhts.a -L$(HTS_DIR) $(HTSLIB_static_LDFLAGS) $(HTSLIB_static_LIBS) + +#to dynamically link to libhts +#LDFLAGS = -L $(HTS_DIR) -lhts -Wl,-rpath, + +PRGS = flags split split2 cram read_fast read_header read_ref read_bam \ + read_aux dump_aux add_header rem_header update_header mod_bam mod_aux \ + mod_aux_ba write_fast idx_on_write read_reg read_multireg pileup \ + mpileup modstate pileup_mod flags_field split_t1 split_t2 \ + read_fast_i qtask_ordered qtask_unordered index_fasta + +all: $(PRGS) + +flags: flags_demo.c + $(CC) $(CFLAGS) -I $(HTS_DIR) flags_demo.c -o $@ $(LDFLAGS) + +split: split.c + $(CC) $(CFLAGS) -I $(HTS_DIR) split.c -o $@ $(LDFLAGS) + +split2: split2.c + $(CC) $(CFLAGS) -I $(HTS_DIR) split2.c -o $@ $(LDFLAGS) + +cram: cram.c + $(CC) $(CFLAGS) -I $(HTS_DIR) cram.c -o $@ $(LDFLAGS) + +read_fast: read_fast.c + $(CC) $(CFLAGS) -I $(HTS_DIR) read_fast.c -o $@ $(LDFLAGS) + +read_header: read_header.c + $(CC) $(CFLAGS) -I $(HTS_DIR) read_header.c -o $@ $(LDFLAGS) + +read_ref: read_refname.c + $(CC) $(CFLAGS) -I $(HTS_DIR) read_refname.c -o $@ $(LDFLAGS) + +read_bam: read_bam.c + $(CC) $(CFLAGS) -I $(HTS_DIR) read_bam.c -o $@ $(LDFLAGS) + +read_aux: read_aux.c + $(CC) $(CFLAGS) -I $(HTS_DIR) read_aux.c -o $@ $(LDFLAGS) + +dump_aux: dump_aux.c + $(CC) $(CFLAGS) -I $(HTS_DIR) dump_aux.c -o $@ $(LDFLAGS) + +add_header: add_header.c + $(CC) $(CFLAGS) -I $(HTS_DIR) add_header.c -o $@ $(LDFLAGS) + +rem_header: rem_header.c + $(CC) $(CFLAGS) -I $(HTS_DIR) rem_header.c -o $@ $(LDFLAGS) + +update_header: update_header.c + $(CC) $(CFLAGS) -I $(HTS_DIR) update_header.c -o $@ $(LDFLAGS) + +mod_bam: mod_bam.c + $(CC) $(CFLAGS) -I $(HTS_DIR) mod_bam.c -o $@ $(LDFLAGS) + +mod_aux: mod_aux.c + $(CC) $(CFLAGS) -I $(HTS_DIR) mod_aux.c -o $@ $(LDFLAGS) + +mod_aux_ba: mod_aux_ba.c + $(CC) $(CFLAGS) -I $(HTS_DIR) mod_aux_ba.c -o $@ $(LDFLAGS) + +write_fast: write_fast.c + $(CC) $(CFLAGS) -I $(HTS_DIR) write_fast.c -o $@ $(LDFLAGS) + +idx_on_write: index_write.c + $(CC) $(CFLAGS) -I $(HTS_DIR) index_write.c -o $@ $(LDFLAGS) + +read_reg: index_reg_read.c + $(CC) $(CFLAGS) -I $(HTS_DIR) index_reg_read.c -o $@ $(LDFLAGS) + +read_multireg: index_multireg_read.c + $(CC) $(CFLAGS) -I $(HTS_DIR) index_multireg_read.c -o $@ $(LDFLAGS) + +read_fast_i: read_fast_index.c + $(CC) $(CFLAGS) -I $(HTS_DIR) read_fast_index.c -o $@ $(LDFLAGS) + +pileup: pileup.c + $(CC) $(CFLAGS) -I $(HTS_DIR) pileup.c -o $@ $(LDFLAGS) + +mpileup: mpileup.c + $(CC) $(CFLAGS) -I $(HTS_DIR) mpileup.c -o $@ $(LDFLAGS) + +modstate: modstate.c + $(CC) $(CFLAGS) -I $(HTS_DIR) modstate.c -o $@ $(LDFLAGS) + +pileup_mod: pileup_mod.c + $(CC) $(CFLAGS) -I $(HTS_DIR) pileup_mod.c -o $@ $(LDFLAGS) + +flags_field: flags_htsopt_field.c + $(CC) $(CFLAGS) -I $(HTS_DIR) flags_htsopt_field.c -o $@ $(LDFLAGS) + +split_t1: split_thread1.c + $(CC) $(CFLAGS) -I $(HTS_DIR) split_thread1.c -o $@ $(LDFLAGS) + +split_t2: split_thread2.c + $(CC) $(CFLAGS) -I $(HTS_DIR) split_thread2.c -o $@ $(LDFLAGS) + +index_fasta: index_fasta.c + $(CC) $(CFLAGS) -I $(HTS_DIR) index_fasta.c -o $@ $(LDFLAGS) + +qtask_ordered: qtask_ordered.c + $(CC) $(CFLAGS) -I $(HTS_DIR) qtask_ordered.c -o $@ $(LDFLAGS) + +qtask_unordered: qtask_unordered.c + $(CC) $(CFLAGS) -I $(HTS_DIR) qtask_unordered.c -o $@ $(LDFLAGS) + +clean: + find . -name "*.o" | xargs rm -rf + find . -name "*.dSYM" | xargs rm -rf + -rm -f $(PRGS) diff --git a/src/htslib-1.21/samples/add_header.c b/src/htslib-1.21/samples/add_header.c new file mode 100644 index 0000000..066b1d4 --- /dev/null +++ b/src/htslib-1.21/samples/add_header.c @@ -0,0 +1,128 @@ +/* add_header.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include + +/// print_usage - print the usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: add_header infile\n\ +Adds new header lines of SQ, RG, PG and CO types\n"); + return; +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *inname = NULL, sq[] = "@SQ\tSN:TR1\tLN:100\n@SQ\tSN:TR2\tLN:50"; + int c = 0, ret = EXIT_FAILURE; + samFile *infile = NULL, *outfile = NULL; + sam_hdr_t *in_samhdr = NULL; + kstring_t data = KS_INITIALIZE; + + //update_header infile header idval tag value + if (argc != 2) { + print_usage(stderr); + goto end; + } + inname = argv[1]; + + if (!(infile = sam_open(inname, "r"))) { + printf("Could not open %s\n", inname); + goto end; + } + if (!(outfile = sam_open("-", "w"))) { //use stdout as the output file for ease of display of update + printf("Could not open stdout\n"); + goto end; + } + + //read header + if (!(in_samhdr = sam_hdr_read(infile))) { + printf("Failed to read header from file!\n"); + goto end; + } + + //dump command line arguments for PG line + for (c = 0; c < argc; ++c) { + kputs(argv[c], &data); + kputc(' ', &data); + } + + //add SQ line with SN as TR1 and TR2 + if (sam_hdr_add_lines(in_samhdr, &sq[0], 0)) { //length as 0 for NULL terminated data + printf("Failed to add SQ lines\n"); + goto end; + } + + //add RG line with ID as RG1 + if (sam_hdr_add_line(in_samhdr, "RG", "ID", "RG1", "LB", "Test", "SM", "S1", NULL)) { + printf("Failed to add RG line\n"); + goto end; + } + + //add pg line + if (sam_hdr_add_pg(in_samhdr, "add_header", "VN", "Test", "CL", data.s, NULL)) { //NULL is to indicate end of args + printf("Failed to add PG line\n"); + goto end; + } + + if (sam_hdr_add_line(in_samhdr, "CO", "Test data", NULL)) { //NULL is to indicate end of args + printf("Failed to add PG line\n"); + goto end; + } + + //write output + if (sam_hdr_write(outfile, in_samhdr) < 0) { + printf("Failed to write output\n"); + goto end; + } + ret = EXIT_SUCCESS; + //bam data write to follow.... +end: + //cleanup + if (in_samhdr) { + sam_hdr_destroy(in_samhdr); + } + if (infile) { + sam_close(infile); + } + if (outfile) { + sam_close(outfile); + } + ks_free(&data); + return ret; +} diff --git a/src/htslib-1.21/samples/cram.c b/src/htslib-1.21/samples/cram.c new file mode 100644 index 0000000..7b13423 --- /dev/null +++ b/src/htslib-1.21/samples/cram.c @@ -0,0 +1,168 @@ +/* cram.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include + +/// print_usage - print the usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: cram infile reffile outdir\n\ +Dumps the input file alignments in cram format in given directory\n\ +1.cram has external reference\n\ +2.cram has reference embedded\n\ +3.cram has autogenerated reference\n\ +4.cram has no reference data in it\n"); + return; +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *inname = NULL, *outdir = NULL, *reffile = NULL; + char *file1 = NULL, *file2 = NULL, *file3 = NULL, *file4 = NULL, *reffmt1 = NULL, *reffmt2 = NULL; + int c = 0, ret = EXIT_FAILURE, size1 = 0, size2 = 0, size3 = 0; + samFile *infile = NULL, *outfile1 = NULL, *outfile2 = NULL, *outfile3 = NULL, *outfile4 = NULL; + sam_hdr_t *in_samhdr = NULL; + bam1_t *bamdata = NULL; + htsFormat fmt1 = {0}, fmt2 = {0}, fmt3 = {0}, fmt4 = {0}; + + //cram infile reffile outdir + if (argc != 4) { + print_usage(stdout); + goto end; + } + inname = argv[1]; + reffile = argv[2]; + outdir = argv[3]; + + //allocate space for option string and output file names + size1 = sizeof(char) * (strlen(reffile) + sizeof("cram,reference=") + 1); + size2 = sizeof(char) * (strlen(reffile) + sizeof("cram,embed_ref=1,reference=") + 1); + size3 = sizeof(char) * (strlen(outdir) + sizeof("/1.cram") + 1); + + reffmt1 = malloc(size1); reffmt2 = malloc(size2); + file1 = malloc(size3); file2 = malloc(size3); + file3 = malloc(size3); file4 = malloc(size3); + + if (!file1 || !file2 || !file3 || !file4 || !reffmt1 || !reffmt2) { + printf("Failed to create buffers\n"); + goto end; + } + + snprintf(reffmt1, size1, "cram,reference=%s", reffile); + snprintf(reffmt2, size2, "cram,embed_ref=1,reference=%s", reffile); + snprintf(file1, size3, "%s/1.cram", outdir); snprintf(file2, size3, "%s/2.cram", outdir); + snprintf(file3, size3, "%s/3.cram", outdir); snprintf(file4, size3, "%s/4.cram", outdir); + + if (hts_parse_format(&fmt1, reffmt1) == -1 || //using external reference - uses the M5/UR tags to get reference data during read + hts_parse_format(&fmt2, reffmt2) == -1 || //embed the reference internally + hts_parse_format(&fmt3, "cram,embed_ref=2") == -1 || //embed autogenerated reference + hts_parse_format(&fmt4, "cram,no_ref=1") == -1) { //no reference data encoding at all + printf("Failed to set output option\n"); + goto end; + } + + //bam data storage + if (!(bamdata = bam_init1())) { + printf("Failed to initialize bamdata\n"); + goto end; + } + //open input file - r reading + if (!(infile = sam_open(inname, "r"))) { + printf("Could not open %s\n", inname); + goto end; + } + //open output files - w write as SAM, wb write as BAM, wc as CRAM (equivalent to fmt3) + outfile1 = sam_open_format(file1, "wc", &fmt1); outfile2 = sam_open_format(file2, "wc", &fmt2); + outfile3 = sam_open_format(file3, "wc", &fmt3); outfile4 = sam_open_format(file4, "wc", &fmt4); + if (!outfile1 || !outfile2 || !outfile3 || !outfile4) { + printf("Could not open output file\n"); + goto end; + } + + //read header, required to resolve the target names to proper ids + if (!(in_samhdr = sam_hdr_read(infile))) { + printf("Failed to read header from file!\n"); + goto end; + } + //write header + if ((sam_hdr_write(outfile1, in_samhdr) == -1) || (sam_hdr_write(outfile2, in_samhdr) == -1) || + (sam_hdr_write(outfile3, in_samhdr) == -1) || (sam_hdr_write(outfile4, in_samhdr) == -1)) { + printf("Failed to write header\n"); + goto end; + } + + //check flags and write + while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { + if (sam_write1(outfile1, in_samhdr, bamdata) < 0 || + sam_write1(outfile2, in_samhdr, bamdata) < 0 || + sam_write1(outfile3, in_samhdr, bamdata) < 0 || + sam_write1(outfile4, in_samhdr, bamdata) < 0) { + printf("Failed to write output data\n"); + goto end; + } + } + if (-1 == c) { + //EOF + ret = EXIT_SUCCESS; + } + else { + printf("Error in reading data\n"); + } +end: +#define IF_OL(X,Y) if((X)) {(Y);} //if one liner + //cleanup + IF_OL(in_samhdr, sam_hdr_destroy(in_samhdr)); + IF_OL(infile, sam_close(infile)); + IF_OL(outfile1, sam_close(outfile1)); + IF_OL(outfile2, sam_close(outfile2)); + IF_OL(outfile3, sam_close(outfile3)); + IF_OL(outfile4, sam_close(outfile4)); + IF_OL(file1, free(file1)); + IF_OL(file2, free(file2)); + IF_OL(file3, free(file3)); + IF_OL(file4, free(file4)); + IF_OL(reffmt1, free(reffmt1)); + IF_OL(reffmt2, free(reffmt2)); + IF_OL(fmt1.specific, hts_opt_free(fmt1.specific)); + IF_OL(fmt2.specific, hts_opt_free(fmt2.specific)); + IF_OL(fmt3.specific, hts_opt_free(fmt3.specific)); + IF_OL(fmt4.specific, hts_opt_free(fmt4.specific)); + IF_OL(bamdata, bam_destroy1(bamdata)); + + return ret; +} diff --git a/src/htslib-1.21/samples/dump_aux.c b/src/htslib-1.21/samples/dump_aux.c new file mode 100644 index 0000000..3caa160 --- /dev/null +++ b/src/htslib-1.21/samples/dump_aux.c @@ -0,0 +1,188 @@ +/* dump_aux.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include + +/// print_usage - print the usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: dump_aux infile\n\ +Dump the aux tags from alignments\n"); +} + +/// printauxdata - prints aux data +/** @param fp - file to which it to be printed - stdout or null + * @param type - aux type + * @param idx - index in array, -1 when not an array type + * @param data - data + * recurses when the data is array type +returns 1 on failure 0 on success +*/ +int printauxdata(FILE *fp, char type, int32_t idx, const uint8_t *data) +{ + uint32_t auxBcnt = 0; + int i = 0; + char auxBType = 'Z'; + + //the tag is already queried and ensured to exist and the type is retrieved from the tag data, also iterated within index for arrays, so no error is expected here. + //when these apis are used explicitly, these error conditions needs to be handled based on return value and errno + switch(type) { + case 'A': + fprintf(fp, "%c", bam_aux2A(data)); //byte data + break; + case 'c': + fprintf(fp, "%d", (int8_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //signed 1 byte data; bam_auxB2i - from array or bam_aux2i - non array data + break; + case 'C': + fprintf(fp, "%u", (uint8_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //unsigned 1 byte data + break; + case 's': + fprintf(fp, "%d", (int16_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //signed 2 byte data + break; + case 'S': + fprintf(fp, "%u", (uint16_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //unsigned 2 byte data + break; + case 'i': + fprintf(fp, "%d", (int32_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //signed 4 byte data + break; + case 'I': + fprintf(fp, "%u", (uint32_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //unsigned 4 byte data + break; + case 'f': + case 'd': + fprintf(fp, "%g", (float)(idx > -1 ? bam_auxB2f(data, idx) : bam_aux2f(data))); //floating point data, 4 bytes + break; + case 'H': + case 'Z': + fprintf(fp, "%s", bam_aux2Z(data)); //array of char or hex data + break; + case 'B': //array of char/int/float + auxBcnt = bam_auxB_len(data); //length of array + auxBType = bam_aux_type(data + 1); //type of element in array + fprintf(fp, "%c", auxBType); + for (i = 0; i < auxBcnt; ++i) { //iterate the array + fprintf(fp, ","); + //calling recursively with index to reuse a few lines + if (printauxdata(fp, auxBType, i, data) == EXIT_FAILURE) { + return EXIT_FAILURE; + } + } + break; + default: + printf("Invalid aux tag?\n"); + return EXIT_FAILURE; + break; + } + return EXIT_SUCCESS; +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *inname = NULL; + int ret = EXIT_FAILURE; + sam_hdr_t *in_samhdr = NULL; + samFile *infile = NULL; + int ret_r = 0; + bam1_t *bamdata = NULL; + uint8_t *data = NULL; + + //dump_aux infile + if (argc != 2) { + print_usage(stderr); + goto end; + } + inname = argv[1]; + + if (!(bamdata = bam_init1())) { + printf("Failed to allocate data memory!\n"); + goto end; + } + + //open input file + if (!(infile = sam_open(inname, "r"))) { + printf("Could not open %s\n", inname); + goto end; + } + + if (!(in_samhdr = sam_hdr_read(infile))) { + printf("Failed to read header from file!\n"); + goto end; + } + + while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) { + errno = 0; + data = NULL; + data = bam_aux_first(bamdata); //get the first aux data + while (data) { + printf("%.2s:%c:", bam_aux_tag(data), NULL != strchr("cCsSiI", bam_aux_type(data)) ? 'i' : bam_aux_type(data)); //macros gets the tag and type of aux data + //dump the data + if (printauxdata(stdout, bam_aux_type(data), -1, data) == EXIT_FAILURE) { + printf("Failed to dump aux data\n"); + goto end; + } + else { + printf(" "); + } + data = bam_aux_next(bamdata, data); //get the next aux data + } + if (ENOENT != errno) { + printf("\nFailed to get aux data\n"); + goto end; + } + printf("\n"); + } + if (ret_r < -1) { + //read error + printf("Failed to read data\n"); + goto end; + } + + ret = EXIT_SUCCESS; +end: + //cleanup + if (in_samhdr) { + sam_hdr_destroy(in_samhdr); + } + if (infile) { + sam_close(infile); + } + if (bamdata) { + bam_destroy1(bamdata); + } + return ret; +} diff --git a/src/htslib-1.21/samples/flags_demo.c b/src/htslib-1.21/samples/flags_demo.c new file mode 100644 index 0000000..ac26be8 --- /dev/null +++ b/src/htslib-1.21/samples/flags_demo.c @@ -0,0 +1,110 @@ +/* flags_demo.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include + +/// print_usage - show usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: flags \n\ +Shows the count of read1 and read2 alignments\n\ +This shows basic reading and alignment flag access\n"); + return; +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *inname = NULL; //input file name + int c = 0, ret = EXIT_FAILURE; + int64_t cntread1 = 0, cntread2 = 0; //count + samFile *infile = NULL; //sam file + sam_hdr_t *in_samhdr = NULL; //header of file + bam1_t *bamdata = NULL; //to hold the read data + + if (argc != 2) { + print_usage(stdout); + goto end; + } + inname = argv[1]; + + //initialize + if (!(bamdata = bam_init1())) { + printf("Failed to initialize bamdata\n"); + goto end; + } + //open input files - r reading + if (!(infile = sam_open(inname, "r"))) { + printf("Could not open %s\n", inname); + goto end; + } + //read header + if (!(in_samhdr = sam_hdr_read(infile))) { + printf( "Failed to read header from file\n"); + goto end; + } + + //read data, check flags and update count + while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { + if (bamdata->core.flag & BAM_FREAD1) { + cntread1++; + } + if (bamdata->core.flag & BAM_FREAD2) { + cntread2++; + } + } + if (c != -1) { + //error + printf("Failed to get data\n"); + goto end; + } + //else -1 / EOF + printf("File %s has %"PRIhts_pos" read1 and %"PRIhts_pos" read2 alignments\n", inname, cntread1, cntread2); + ret = EXIT_SUCCESS; +end: + //clean up + if (in_samhdr) { + sam_hdr_destroy(in_samhdr); + } + if (infile) { + sam_close(infile); + } + if (bamdata) { + bam_destroy1(bamdata); + } + return ret; +} diff --git a/src/htslib-1.21/samples/flags_htsopt_field.c b/src/htslib-1.21/samples/flags_htsopt_field.c new file mode 100644 index 0000000..40a0aff --- /dev/null +++ b/src/htslib-1.21/samples/flags_htsopt_field.c @@ -0,0 +1,115 @@ +/* flags_htsopt_field.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include + +/// print_usage - show usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: flags_field \n\ +Shows the count of read1 and read2 alignments\n\ +This shows reading selected fields from CRAM file\n"); + return; +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *inname = NULL; //input file name + int c = 0, ret = EXIT_FAILURE; + int64_t cntread1 = 0, cntread2 = 0; //count + samFile *infile = NULL; //sam file + sam_hdr_t *in_samhdr = NULL; //header of file + bam1_t *bamdata = NULL; //to hold the read data + + if (argc != 2) { + print_usage(stdout); + goto end; + } + inname = argv[1]; + + //initialize + if (!(bamdata = bam_init1())) { + printf("Failed to initialize bamdata\n"); + goto end; + } + //open input files - r reading + if (!(infile = sam_open(inname, "r"))) { + printf("Could not open %s\n", inname); + goto end; + } + //select required field alone, this is useful for CRAM alone + if (hts_set_opt(infile, CRAM_OPT_REQUIRED_FIELDS, SAM_FLAG) < 0) { + printf("Failed to set htsoption\n"); + goto end; + } + //read header + if (!(in_samhdr = sam_hdr_read(infile))) { + printf("Failed to read header from file\n"); + goto end; + } + + //read data, check flags and update count + while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { + if (bamdata->core.flag & BAM_FREAD1) { + cntread1++; + } + if (bamdata->core.flag & BAM_FREAD2) { + cntread2++; + } + } + if (c != -1) { + //error + printf("Failed to get data\n"); + goto end; + } + //else -1 / EOF + printf("File %s has %"PRIhts_pos" read1 and %"PRIhts_pos" read2 alignments\n", inname, cntread1, cntread2); + ret = EXIT_SUCCESS; +end: + //clean up + if (in_samhdr) { + sam_hdr_destroy(in_samhdr); + } + if (infile) { + sam_close(infile); + } + if (bamdata) { + bam_destroy1(bamdata); + } + return ret; +} diff --git a/src/htslib-1.21/samples/index_fasta.c b/src/htslib-1.21/samples/index_fasta.c new file mode 100644 index 0000000..ba04890 --- /dev/null +++ b/src/htslib-1.21/samples/index_fasta.c @@ -0,0 +1,72 @@ +/* index_fasta.c -- showcases the htslib api usage + + Copyright (C) 2024 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include +#include +#include + +/// print_usage - show usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: index_fasta \n\ +Indexes a fasta/fastq file and saves along with source.\n"); + return; +} + +/// main - indexes fasta/fastq file +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *filename = NULL; //file name + int ret = EXIT_FAILURE; + + if (argc != 2) { + print_usage(stdout); + goto end; + } + filename = argv[1]; + + // index the file + if (fai_build3(filename, NULL, NULL) == -1) { + printf("Indexing failed with %d\n", errno); + goto end; + } + //this creates an .fai file. If the file is bgzipped, a .gzi file will be created along with .fai + ret = EXIT_SUCCESS; +end: + //clean up + return ret; +} diff --git a/src/htslib-1.21/samples/index_multireg_read.c b/src/htslib-1.21/samples/index_multireg_read.c new file mode 100644 index 0000000..7bb8649 --- /dev/null +++ b/src/htslib-1.21/samples/index_multireg_read.c @@ -0,0 +1,150 @@ +/* index_multireg_read.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include + +/// print_usage - print the usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: read_multireg infile count regspec_csv\n\ + Reads alignment of a target matching to given region specifications\n\ + read_multireg infile.sam 2 R1:10-100,R2:200"); + return; +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *inname = NULL; + char *ptr = NULL; + int c = 0, ret = EXIT_FAILURE; + samFile *infile = NULL, *outfile = NULL; + sam_hdr_t *in_samhdr = NULL; + bam1_t *bamdata = NULL; + hts_idx_t *idx = NULL; + hts_itr_t *iter = NULL; + unsigned int regcnt = 0; + char **regions = NULL; + + //read_multireg infile count regspec_csv + if (argc != 4) { + print_usage(stderr); + goto end; + } + inname = argv[1]; + regcnt = atoi(argv[2]); + regions = calloc(regcnt, sizeof(char*)); + //set each regspec as separate entry in region array + ptr = argv[3]; + for (c = 0; ptr && (c < regcnt); ++c) { + regions[c] = ptr; + ptr = strchr(ptr, ','); + if (ptr) { *ptr = '\0'; ++ptr; } + } + + if (regcnt == 0) { + printf("Region count can not be 0\n"); + goto end; + } + //initialize bam data storage + if (!(bamdata = bam_init1())) { + printf("Failed to initialize bamdata\n"); + goto end; + } + //open files, use stdout as output SAM file for ease of display + infile = sam_open(inname, "r"); + outfile = sam_open("-", "w"); + if (!outfile || !infile) { + printf("Could not open in/out files\n"); + goto end; + } + //load index file, assume it to be present in same location + if (!(idx = sam_index_load(infile, inname))) { + printf("Failed to load the index\n"); + goto end; + } + //read header + if (!(in_samhdr = sam_hdr_read(infile))) { + printf("Failed to read header from file!\n"); + goto end; + } + //create iterator + if (!(iter = sam_itr_regarray(idx, in_samhdr, regions, regcnt))) { + printf("Failed to get iterator\n"); + goto end; + } + if (regions) { + //can be freed as it is no longer required + free(regions); + regions = NULL; + } + + //get required area + while ((c = sam_itr_multi_next(infile, iter, bamdata) >= 0)) { + //write to output + if (sam_write1(outfile, in_samhdr, bamdata) < 0) { + printf("Failed to write output\n"); + goto end; + } + } + if (c != -1) { + printf("Error during read\n"); + goto end; + } + ret = EXIT_SUCCESS; + +end: + //cleanup + if (in_samhdr) { + sam_hdr_destroy(in_samhdr); + } + if (infile) { + sam_close(infile); + } + if (outfile) { + sam_close(outfile); + } + if (bamdata) { + bam_destroy1(bamdata); + } + if (iter) { + sam_itr_destroy(iter); + } + if (idx) + hts_idx_destroy(idx); + return ret; +} diff --git a/src/htslib-1.21/samples/index_reg_read.c b/src/htslib-1.21/samples/index_reg_read.c new file mode 100644 index 0000000..dec6849 --- /dev/null +++ b/src/htslib-1.21/samples/index_reg_read.c @@ -0,0 +1,143 @@ +/* index_reg_read.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include + +/// print_usage - print the usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: read_reg infile idxfile region\n\ +Reads alignments matching to a specific region\n\ +\\. from start of file\n\ +\\* only unmapped reads\n\ +REFNAME all reads referring REFNAME\n\ +REFNAME:S all reads referring REFNAME and overlapping from S onwards\n\ +REFNAME:S-E all reads referring REFNAME overlapping from S to E\n\ +REFNAME:-E all reads referring REFNAME overlapping upto E\n"); + return; +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *inname = NULL, *region = NULL; + char *idxfile = NULL; + int c = 0, ret = EXIT_FAILURE; + samFile *infile = NULL, *outfile = NULL; + sam_hdr_t *in_samhdr = NULL; + bam1_t *bamdata = NULL; + hts_idx_t *idx = NULL; + hts_itr_t *iter = NULL; + + //readreg infile indexfile region + if (argc != 4) { + print_usage(stderr); + goto end; + } + inname = argv[1]; + idxfile = argv[2]; + region = argv[3]; + + //initialize bam data storage + if (!(bamdata = bam_init1())) { + printf("Failed to initialize bamdata\n"); + goto end; + } + + //open files + if (!(infile = sam_open(inname, "r"))) { + printf("Could not open input file\n"); + goto end; + } + //using stdout as output file for ease of dumping data + if (!(outfile = sam_open("-", "w"))) { + printf("Could not open out file\n"); + goto end; + } + //load index file + if (!(idx = sam_index_load2(infile, inname, idxfile))) { + printf("Failed to load the index\n"); + goto end; + } + //can use sam_index_load if the index file is present in same location and follows standard naming conventions (i.e. .) + + //read header + if (!(in_samhdr = sam_hdr_read(infile))) { + printf("Failed to read header from file!\n"); + goto end; + } + //create iterator + if (!(iter = sam_itr_querys(idx, in_samhdr, region))) { + printf("Failed to get iterator\n"); + goto end; + } + //read using iterator + while ((c = sam_itr_next(infile, iter, bamdata)) >= 0) { + //write to output + if (sam_write1(outfile, in_samhdr, bamdata) < 0) { + printf("Failed to write output\n"); + goto end; + } + } + if (c != -1) { + printf("Error during read\n"); + goto end; + } + ret = EXIT_SUCCESS; + +end: + //cleanup + if (in_samhdr) { + sam_hdr_destroy(in_samhdr); + } + if (infile) { + sam_close(infile); + } + if (outfile) { + sam_close(outfile); + } + if (bamdata) { + bam_destroy1(bamdata); + } + if (iter) { + sam_itr_destroy(iter); + } + if (idx) { + hts_idx_destroy(idx); + } + return ret; +} diff --git a/src/htslib-1.21/samples/index_write.c b/src/htslib-1.21/samples/index_write.c new file mode 100644 index 0000000..9ec63d4 --- /dev/null +++ b/src/htslib-1.21/samples/index_write.c @@ -0,0 +1,166 @@ +/* index_write.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include +#include + +/// print_usage - print the usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: idx_on_write infile shiftsize outdir\n\ +Creates compressed sam file and index file for it in given directory\n"); + return; +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *outdir = NULL; + char *inname = NULL, *fileidx = NULL, *outname = NULL, outmode[4] = "w"; + int c = 0, ret = EXIT_FAILURE, size = 0; + samFile *infile = NULL, *outfile = NULL; + sam_hdr_t *in_samhdr = NULL; + bam1_t *bamdata = NULL; + + //idx_on_write infile sizeshift outputdirectory + if (argc != 4) { + print_usage(stderr); + goto end; + } + inname = argv[1]; + size = atoi(argv[2]); + outdir = argv[3]; + + //allocate space for output name - outdir/filename.ext.idxextNUL + c = strlen(basename(inname)) + strlen(outdir) + 10; + fileidx = malloc(sizeof(char) * c); + outname = malloc(sizeof(char) * c); + if (!fileidx || !outname) { + printf("Couldnt allocate memory\n"); + goto end; + } + //initialize bam storage + if (!(bamdata = bam_init1())) { + printf("Failed to initialize bamdata\n"); + goto end; + } + + //open files + if ((infile = sam_open(inname, "r"))) { + //get file type and create output names + if (infile->format.format == cram) { + //set as crai + snprintf(fileidx, c, "%s/%s.crai", outdir, basename(inname)); + snprintf(outname, c, "%s/%s", outdir, basename(inname)); + } + else { + //set as either bai or csi based on interval + if (infile->format.format == sam && infile->format.compression == no_compression) { + //create as gzip compressed + snprintf(outname, c, "%s/%s.gz", outdir, basename(inname)); + snprintf(fileidx, c, "%s/%s.gz.%s", outdir, basename(inname), !size ? "bai" : "csi"); + } + else { + //with same name as input + snprintf(outname, c, "%s/%s", outdir, basename(inname)); + snprintf(fileidx, c, "%s/%s.%s", outdir, basename(inname), !size ? "bai" : "csi"); + } + } + } + c = 0; + sam_open_mode(outmode + 1, outname, NULL); //set extra write options based on name + outfile = sam_open(outname, outmode); + if (!outfile || !infile) { + printf("Could not open files\n"); + goto end; + } + + //read header + if (!(in_samhdr = sam_hdr_read(infile))) { + printf("Failed to read header from file!\n"); + goto end; + } + //write header + if (sam_hdr_write(outfile, in_samhdr)) { + printf("Failed to write header\n"); + goto end; + } + + // initialize indexing, before start of write + if (sam_idx_init(outfile, in_samhdr, size, fileidx)) { + printf("idx initialization failed\n"); + goto end; + } + //read and write alignments + while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { + if (sam_write1(outfile, in_samhdr, bamdata) < 0) { + printf("Failed to write data\n"); + goto end; + } + } + if (c != -1) { + printf("Error in reading data\n"); + goto end; + } + //else EOF, save index + if (sam_idx_save(outfile)) { + printf("Could not save index\n"); + goto end; + } + ret = EXIT_SUCCESS; +end: + //cleanup + if (in_samhdr) { + sam_hdr_destroy(in_samhdr); + } + if (infile) { + sam_close(infile); + } + if (bamdata) { + bam_destroy1(bamdata); + } + if (fileidx) { + free(fileidx); + } + if (outname) { + free(outname); + } + if (outfile) { + sam_close(outfile); + } + return ret; +} diff --git a/src/htslib-1.21/samples/mod_aux.c b/src/htslib-1.21/samples/mod_aux.c new file mode 100644 index 0000000..ae531b9 --- /dev/null +++ b/src/htslib-1.21/samples/mod_aux.c @@ -0,0 +1,222 @@ +/* mod_aux.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include +#include + +/// print_usage - print the usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: mod_aux infile QNAME tag type val\n\ +Add/update the given aux tag to all alignments\n\ +type A-char C-int F-float Z-string\n"); +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *inname = NULL, *tag = NULL, *qname = NULL, *val = NULL; + char type = '\0'; + int ret = EXIT_FAILURE, ret_r = 0, length = 0; + sam_hdr_t *in_samhdr = NULL; + samFile *infile = NULL, *outfile = NULL; + bam1_t *bamdata = NULL; + uint8_t *data = NULL; + + //mod_aux infile QNAME tag type val + if (argc != 6) { + print_usage(stderr); + goto end; + } + inname = argv[1]; + qname = argv[2]; + tag = argv[3]; + type = argv[4][0]; + val = argv[5]; + + if (!(bamdata = bam_init1())) { + printf("Failed to allocate data memory!\n"); + goto end; + } + + //open input file + if (!(infile = sam_open(inname, "r"))) { + printf("Could not open %s\n", inname); + goto end; + } + //open output file + if (!(outfile = sam_open("-", "w"))) { + printf("Could not open std output\n"); + goto end; + } + + if (!(in_samhdr = sam_hdr_read(infile))) { + printf("Failed to read header from file!\n"); + goto end; + } + + if (sam_hdr_write(outfile, in_samhdr) == -1) { + printf("Failed to write header\n"); + goto end; + } + + while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) { + if (strcasecmp(bam_get_qname(bamdata), qname)) { + if (sam_write1(outfile, in_samhdr, bamdata) < 0) { + printf("Failed to write output\n"); + goto end; + } + continue; //not matching + } + + errno = 0; + //matched to qname, update aux + if (!(data = bam_aux_get(bamdata, tag))) { + int i = 0; float f = 0; + //tag not present append + switch (type) { + case 'f': + case 'd': + length = sizeof(float); + f = atof(val); + val = (const char*) &f; + type = 'f'; + break; + case 'C': + case 'S': + case 'I': + length = sizeof(int); + i = atoi(val); + val = (const char*) &i; + break; + case 'Z': + length = strlen(val) + 1; //1 for NUL termination + break; + case 'A': + length = 1; + break; + default: + printf("Invalid type mentioned\n"); + goto end; + break; + } + if (bam_aux_append(bamdata, tag, type, length, (const uint8_t*)val)) { + printf("Failed to append aux data, errno: %d\n", errno); + goto end; + } + } + else { + char auxtype = bam_aux_type(data); + //update the tag with newer value + switch (type) { + case 'f': + case 'd': + if (auxtype != 'f' && auxtype != 'd') { + printf("Invalid aux type passed\n"); + goto end; + } + if (bam_aux_update_float(bamdata, tag, atof(val))) { + printf("Failed to update float data, errno: %d\n", errno); + goto end; + } + break; + case 'C': + case 'S': + case 'I': + if (auxtype != 'c' && auxtype != 'C' && auxtype != 's' && auxtype != 'S' && auxtype != 'i' && auxtype != 'I') { + printf("Invalid aux type passed\n"); + goto end; + } + if (bam_aux_update_int(bamdata, tag, atoll(val))) { + printf("Failed to update int data, errno: %d\n", errno); + goto end; + } + break; + case 'Z': + if (auxtype != 'Z') { + printf("Invalid aux type passed\n"); + goto end; + } + length = strlen(val) + 1; //1 for NUL termination + if (bam_aux_update_str(bamdata, tag, length, val)) { + //with length as -1, length will be detected based on null terminated val data + printf("Failed to update string data, errno: %d\n", errno); + goto end; + } + break; + case 'A': + if (auxtype != 'A') { + printf("Invalid aux type passed\n"); + goto end; + } + //update the char data directly on buffer + *(data+1) = val[0]; + break; + default: + printf("Invalid data type\n"); + goto end; + break; + } + } + if (sam_write1(outfile, in_samhdr, bamdata) < 0) { + printf("Failed to write output\n"); + goto end; + } + } + if (ret_r < -1) { + //read error + printf("Failed to read data\n"); + goto end; + } + + ret = EXIT_SUCCESS; +end: + //cleanup + if (in_samhdr) { + sam_hdr_destroy(in_samhdr); + } + if (infile) { + sam_close(infile); + } + if (outfile) { + sam_close(outfile); + } + if (bamdata) { + bam_destroy1(bamdata); + } + return ret; +} diff --git a/src/htslib-1.21/samples/mod_aux_ba.c b/src/htslib-1.21/samples/mod_aux_ba.c new file mode 100644 index 0000000..836a3d3 --- /dev/null +++ b/src/htslib-1.21/samples/mod_aux_ba.c @@ -0,0 +1,147 @@ +/* mod_aux_ba.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include + +/// print_usage - print the usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: mod_aux_ba infile\n\ +Updates the count of bases as an aux array on all alignments\n\ +BA:B:I,count of ACTGN\n"); +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *inname = NULL; + int i = 0, ret = EXIT_FAILURE, ret_r = 0; + uint32_t cnt[5] = {0}; //A C G T N + sam_hdr_t *in_samhdr = NULL; + samFile *infile = NULL, *outfile = NULL; + bam1_t *bamdata = NULL; + + //mod_aux infile + if (argc != 2) { + print_usage(stderr); + goto end; + } + inname = argv[1]; + + if (!(bamdata = bam_init1())) { + printf("Failed to allocate data memory!\n"); + goto end; + } + + //open input file + if (!(infile = sam_open(inname, "r"))) { + printf("Could not open %s\n", inname); + goto end; + } + + //open output file + if (!(outfile = sam_open("-", "w"))) { + printf("Could not open std output\n"); + goto end; + } + + if (!(in_samhdr = sam_hdr_read(infile))) { + printf("Failed to read header from file!\n"); + goto end; + } + + if (sam_hdr_write(outfile, in_samhdr) == -1) { + printf("Failed to write header\n"); + goto end; + } + + while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) { + errno = 0; + memset(cnt, 0, sizeof(cnt)); + for (i = 0; i < bamdata->core.l_qseq; ++i) { + switch (seq_nt16_str[bam_seqi(bam_get_seq(bamdata),i)]) { + case 'A': + ++cnt[0]; + break; + case 'C': + ++cnt[1]; + break; + case 'G': + ++cnt[2]; + break; + case 'T': + ++cnt[3]; + break; + default: //N + ++cnt[4]; + break; + } + } + + if (bam_aux_update_array(bamdata, "BA", 'I', sizeof(cnt)/sizeof(cnt[0]), cnt)) { + printf("Failed to update base array, errno %d", errno); + goto end; + } + + if (sam_write1(outfile, in_samhdr, bamdata) < 0) { + printf("Failed to write output\n"); + goto end; + } + } + if (ret_r < -1) { + //read error + printf("Failed to read data\n"); + goto end; + } + + ret = EXIT_SUCCESS; +end: + //cleanup + if (in_samhdr) { + sam_hdr_destroy(in_samhdr); + } + if (infile) { + sam_close(infile); + } + if (outfile) { + sam_close(outfile); + } + if (bamdata) { + bam_destroy1(bamdata); + } + return ret; +} diff --git a/src/htslib-1.21/samples/mod_bam.c b/src/htslib-1.21/samples/mod_bam.c new file mode 100644 index 0000000..6166396 --- /dev/null +++ b/src/htslib-1.21/samples/mod_bam.c @@ -0,0 +1,230 @@ +/* mod_bam.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include +#include + +/// print_usage - print the usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: mod_bam infile QNAME fieldpos newval\n\ +Modifies the alignment data field\n\ +fieldpos - 1 QNAME 2 FLAG 3 RNAME 4 POS 5 MAPQ 6 CIGAR 7 RNEXT 8 PNEXT 9 TLEN 10 SEQ 11 QUAL\n"); +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *inname = NULL, *qname = NULL; + char *val = NULL; + int c = 0, ret = EXIT_FAILURE, field = 0; + sam_hdr_t *in_samhdr = NULL; + samFile *infile = NULL, *outfile = NULL; + int ret_r = 0, i = 0; + bam1_t *bamdata = NULL; + + //mod_bam infile QNAME fieldpos newval + if (argc != 5) { + print_usage(stderr); + goto end; + } + inname = argv[1]; + qname = argv[2]; + //1 QNAME 2 FLAG 3 RNAME 4 POS 5 MAPQ 6 CIGAR 7 RNEXT 8 PNEXT 9 TLEN 10 SEQ 11 QUAL + field = atoi(argv[3]); + val = argv[4]; + + if (!(bamdata = bam_init1())) { + printf("Failed to allocate data memory!\n"); + goto end; + } + + //open input file + if (!(infile = sam_open(inname, "r")) || !(outfile = sam_open("-", "w"))) { + printf("Could not open input/output\n"); + goto end; + } + //read header + if (!(in_samhdr = sam_hdr_read(infile))) { + printf("Failed to read header from file!\n"); + goto end; + } + + if (sam_hdr_write(outfile, in_samhdr) == -1) { + printf("Failed to write header\n"); + goto end; + } + + while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) + { + //QNAME FLAG RNAME POS MAPQ CIGAR RNEXT PNEXT TLEN SEQ QUAL [TAG:TYPE:VALUE]… + ret = 0; + if (!strcasecmp(qname, bam_get_qname(bamdata))) { + //the required one + switch(field) { + case 1:// QNAME + ret = bam_set_qname(bamdata, val); + break; + case 2:// FLAG + bamdata->core.flag = atol(val) & 0xFFFF; + break; + case 3:// RNAME + case 7:// RNEXT + if ((ret = sam_hdr_name2tid(in_samhdr, val)) < 0) { + printf("Invalid reference name\n"); + ret = -1; + break; + } + if (field == 3) { + //reference + bamdata->core.tid = ret; + } + else { + //mate reference + bamdata->core.mtid = ret; + } + break; + case 4:// POS + bamdata->core.pos = atoll(val); + break; + case 5:// MAPQ + bamdata->core.qual = atoi(val) & 0x0FF; + break; + case 6:// CIGAR + { + uint32_t *cigar = NULL; + size_t size = 0; + ssize_t ncigar = 0; + bam1_t *newbam = bam_init1(); + if (!newbam) { + printf("Failed to create new bam data\n"); + ret = -1; + break; + } + //get cigar array and set all data in new bam record + if ((ncigar = sam_parse_cigar(val, NULL, &cigar, &size)) < 0) { + printf("Failed to parse cigar\n"); + ret = -1; + break; + } + if (bam_set1(newbam, bamdata->core.l_qname, bam_get_qname(bamdata), bamdata->core.flag, bamdata->core.tid, bamdata->core.pos, bamdata->core.qual, + ncigar, cigar, bamdata->core.mtid, bamdata->core.mpos, bamdata->core.isize, bamdata->core.l_qseq, (const char*)bam_get_seq(bamdata), (const char*)bam_get_qual(bamdata), bam_get_l_aux(bamdata)) < 0) { + printf("Failed to set bamdata\n"); + ret = -1; + break; + } + //correct sequence data as input is expected in ascii format and not as compressed inside bam! + memcpy(bam_get_seq(newbam), bam_get_seq(bamdata), (bamdata->core.l_qseq + 1) / 2); + //copy the aux data + memcpy(bam_get_aux(newbam), bam_get_aux(bamdata), bam_get_l_aux(bamdata)); + + bam_destroy1(bamdata); + bamdata = newbam; + } + break; + case 8:// PNEXT + bamdata->core.mpos = atoll(val); + break; + case 9:// TLEN + bamdata->core.isize = atoll(val); + break; + case 10:// SEQ + i = strlen(val); + if (bamdata->core.l_qseq != i) { + printf("SEQ length different\n"); + ret = -1; + //as it is different, have to update quality data and cigar data as well and more info is required for it, which is not handled in this sample + //accessing raw memory and moving is one option; creating and using new bam1_t object is another option. + break; + } + for( c = 0; c < i; ++c) { + bam_set_seqi(bam_get_seq(bamdata), c, seq_nt16_table[(unsigned char)val[c]]); + } + break; + case 11:// QUAL + i = strlen(val); + if (i != bamdata->core.l_qseq) { + printf("Qual length different than sequence\n"); + ret = -1; + break; + } + for (c = 0; c < i; ++c) { + val[c] -= 33; //phred score from ascii value + } + memcpy(bam_get_qual(bamdata), val, i); + break; + default: + printf("Invalid input\n"); + goto end; + break; + } + if (ret < 0) { + printf("Failed to set new data\n"); + ret = EXIT_FAILURE; + goto end; + } + } + if (sam_write1(outfile, in_samhdr, bamdata) < 0) { + printf("Failed to write bam data\n"); + ret = EXIT_FAILURE; + goto end; + } + } + + if (ret_r == -1 || ret != EXIT_FAILURE) { + // no error! + ret = EXIT_SUCCESS; + } + else { + printf("Failed to read data\n"); + } +end: + //cleanup + if (in_samhdr) { + sam_hdr_destroy(in_samhdr); + } + if (infile) { + sam_close(infile); + } + if (outfile) { + sam_close(outfile); + } + if (bamdata) { + bam_destroy1(bamdata); + } + return ret; +} diff --git a/src/htslib-1.21/samples/modstate.c b/src/htslib-1.21/samples/modstate.c new file mode 100644 index 0000000..4d5f676 --- /dev/null +++ b/src/htslib-1.21/samples/modstate.c @@ -0,0 +1,190 @@ +/* modstate.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include + +/// print_usage - print the usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: modstate infile option\n\ +Shows the base modifications on the alignment\n\ +Option can be 1 or 2 to select the api to use\n"); +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *inname = NULL; + int ret = EXIT_FAILURE; + sam_hdr_t *in_samhdr = NULL; + samFile *infile = NULL; + + int ret_r = 0, i = 0 , r = 0, j = 0, pos = 0, opt = 0, k = 0, cnt = 0, *bm = NULL; + bam1_t *bamdata = NULL; + uint8_t *data = NULL; + hts_base_mod_state *ms = NULL; + + + //modstate infile 1/2 + if (argc != 3) { + print_usage(stderr); + goto end; + } + inname = argv[1]; + opt = atoi(argv[2]) - 1; //option 1 or 2? + + if (!(bamdata = bam_init1())) { + printf("Failed to allocate data memory!\n"); + goto end; + } + + if (!(ms = hts_base_mod_state_alloc())) { + printf("Failed to allocate state memory\n"); + goto end; + } + + //open input file + if (!(infile = sam_open(inname, "r"))) { + printf("Could not open %s\n", inname); + goto end; + } + //read header + if (!(in_samhdr = sam_hdr_read(infile))) { + printf("Failed to read header from file!\n"); + goto end; + } + + while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) + { + i = 0; + data = bam_get_seq(bamdata); + if (bam_parse_basemod(bamdata, ms)) { + printf("Failed to parse the base mods\n"); + goto end; + } + //dump the modifications + printf("Modifications:"); + bm = bam_mods_recorded(ms, &cnt); + for (k = 0; k < cnt; ++k) { + printf("%c", bm[k]); + } + printf("\n"); + hts_base_mod mod[5] = {0}; //for ATCGN + if (opt) { + //option 1 + for (; i < bamdata->core.l_qseq; ++i) { + if ((r = bam_mods_at_next_pos(bamdata, ms, mod, sizeof(mod)/sizeof(mod[0]))) <= -1) { + printf("Failed to get modifications\n"); + goto end; + } + else if (r > (sizeof(mod) / sizeof(mod[0]))) { + printf("More modifications than this app can handle, update the app\n"); + goto end; + } + else if (!r) { + //no modification at this pos + printf("%c", seq_nt16_str[bam_seqi(data, i)]); + } + //modifications + for (j = 0; j < r; ++j) { + printf("%c%c%c", mod[j].canonical_base, mod[j].strand ? '-' : '+', mod[j].modified_base); + } + } + } + else { + //option 2 + while ((r = bam_next_basemod(bamdata, ms, mod, sizeof(mod)/sizeof(mod[0]), &pos)) >= 0) { + for (; i < bamdata->core.l_qseq && i < pos; ++i) { + printf("%c", seq_nt16_str[bam_seqi(data, i)]); + } + //modifications + for (j = 0; j < r; ++j) { + printf("%c%c%c", mod[j].canonical_base, mod[j].strand ? '-' : '+', mod[j].modified_base); + } + if (i == pos) + i++; //skip the modification already displayed + if (!r) { + for (; i < bamdata->core.l_qseq; ++i) { + printf("%c", seq_nt16_str[bam_seqi(data, i)]); + } + break; + } + } + if (r <= -1) { + printf("Failed to get modifications\n"); + goto end; + } + } + printf("\n"); + } + + if (ret_r == -1) { + //check last alignment's base modification + int strand = 0, impl = 0; + char canonical = 0, modification[] = "mhfcgebaon"; //possible modifications + printf("\n\nLast alignment has \n"); + for (k = 0; k < sizeof(modification) - 1; ++k) { //avoiding NUL termination + if (bam_mods_query_type(ms, modification[k], &strand, &impl, &canonical)) { + printf ("No modification of %c type\n", modification[k]); + } + else { + printf("%s strand has %c modified with %c, can %sassume unlisted as unmodified\n", strand?"-/bottom/reverse":"+/top/forward", canonical, modification[k], impl?"" : "not " ); + } + } + // no error! + ret = EXIT_SUCCESS; + } + else { + printf("Failed to read data\n"); + } +end: + //cleanup + if (in_samhdr) { + sam_hdr_destroy(in_samhdr); + } + if (infile) { + sam_close(infile); + } + if (bamdata) { + bam_destroy1(bamdata); + } + + if (ms) { + hts_base_mod_state_free(ms); + } + return ret; +} diff --git a/src/htslib-1.21/samples/mpileup.c b/src/htslib-1.21/samples/mpileup.c new file mode 100644 index 0000000..ecab705 --- /dev/null +++ b/src/htslib-1.21/samples/mpileup.c @@ -0,0 +1,204 @@ +/* mpileup.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include +#include + +/// print_usage - show usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: mpileup infile ...\n\ +Shows the mpileup api usage.\n"); + return; +} + +typedef struct plpconf { + char *inname; + samFile *infile; + sam_hdr_t *in_samhdr; +} plpconf; + +/// @brief plpconstructor +/// @param data client data? +/// @param b bam being loaded +/// @param cd client data +/// @return +int plpconstructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { + return 0; +} + +int plpdestructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { + return 0; +} + +/// @brief bam_plp_auto_f reads alignment data for pileup operation +/// @param data client callback data holding alignment file handle +/// @param b bamdata read +/// @return same as sam_read1 +int readdata(void *data, bam1_t *b) +{ + plpconf *conf = (plpconf*)data; + if (!conf || !conf->infile) { + return -2; //cant read data + } + + //read alignment and send + return sam_read1(conf->infile, conf->infile->bam_header, b); +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + int ret = EXIT_FAILURE; + bam1_t *bamdata = NULL; + plpconf** conf = NULL; + bam_mplp_t mplpiter = NULL; + int tid = -1, input = 0, k = 0, dpt = 0, *depth = NULL; + hts_pos_t refpos = -1; + const bam_pileup1_t **plp = NULL; + + //infile ... + if (argc < 2) { + print_usage(stderr); + goto end; + } + if ((conf = calloc(argc - 1, sizeof(plpconf*)))) { + for (input = 0; input < argc - 1; ++input) { + conf[input] = calloc(1, sizeof(plpconf)); + } + } + depth = calloc(argc - 1, sizeof(int)); + plp = calloc(argc - 1, sizeof(bam_pileup1_t*)); + if (!conf || !depth || !plp) { + printf("Failed to allocate memory\n"); + goto end; + } + for (input = 0; input < argc - 1; ++input) { + conf[input]->inname = argv[input+1]; + } + + //initialize + if (!(bamdata = bam_init1())) { + printf("Failed to initialize bamdata\n"); + goto end; + } + //open input files + for(input = 0; input < argc - 1; ++input) { + if (!(conf[input]->infile = sam_open(conf[input]->inname, "r"))) { + printf("Could not open %s\n", conf[input]->inname); + goto end; + } + //read header + if (!(conf[input]->in_samhdr = sam_hdr_read(conf[input]->infile))) { + printf("Failed to read header from file!\n"); + goto end; + } + } + + if (!(mplpiter = bam_mplp_init(argc - 1, readdata, (void**) conf))) { + printf("Failed to initialize mpileup data\n"); + goto end; + } + + //set constructor destructor callbacks + bam_mplp_constructor(mplpiter, plpconstructor); + bam_mplp_destructor(mplpiter, plpdestructor); + + while (bam_mplp64_auto(mplpiter, &tid, &refpos, depth, plp) > 0) { + printf("%d\t%"PRIhts_pos"\t", tid+1, refpos+1); + + for (input = 0; input < argc - 1; ++input) { + for (dpt = 0; dpt < depth[input]; ++dpt) { + if (plp[input][dpt].is_del || plp[input][dpt].is_refskip) { + printf("*"); + continue; + } + //start and end are displayed in UPPER and rest on LOWER + printf("%c", plp[input][dpt].is_head ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[input][dpt].b), plp[input][dpt].qpos)]) : + (plp[input]->is_tail ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[input][dpt].b), plp[input][dpt].qpos)]) : tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[input][dpt].b), plp[input][dpt].qpos)]))); + if (plp[input][dpt].indel > 0) { + //insertions, anyway not start or end + printf("+%d", plp[input][dpt].indel); + for (k = 0; k < plp[input][dpt].indel; ++k) { + printf("%c", tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[input][dpt].b), plp[input][dpt].qpos + k + 1)])); + } + } + else if (plp[input][dpt].indel < 0) { + printf("%d", plp[input][dpt].indel); + for (k = 0; k < -plp[input][dpt].indel; ++k) { + printf("?"); + } + } + } + printf(" "); + } + printf("\n"); + fflush(stdout); + } + + ret = EXIT_SUCCESS; +end: + //clean up + if (conf) { + for (input = 0; input < argc - 1; ++input) { + if (conf[input] && conf[input]->in_samhdr) { + sam_hdr_destroy(conf[input]->in_samhdr); + } + if (conf[input] && conf[input]->infile) { + sam_close(conf[input]->infile); + } + if (conf[input]) { + free(conf[input]); + } + } + free(conf); + } + + if (bamdata) { + bam_destroy1(bamdata); + } + if (mplpiter) { + bam_mplp_destroy(mplpiter); + } + if (depth) { + free(depth); + } + if (plp) { + free(plp); + } + return ret; +} diff --git a/src/htslib-1.21/samples/pileup.c b/src/htslib-1.21/samples/pileup.c new file mode 100644 index 0000000..be7aad8 --- /dev/null +++ b/src/htslib-1.21/samples/pileup.c @@ -0,0 +1,183 @@ +/* pileup.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include +#include + +/// print_usage - show usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: pileup infile\n\ +Shows the pileup api usage.\n"); + return; +} + +typedef struct plpconf { + char *inname; + samFile *infile; + sam_hdr_t *in_samhdr; +} plpconf; + +/// @brief plpconstructor +/// @param data client data? +/// @param b bam being loaded +/// @param cd client data +/// @return +int plpconstructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { + /*plpconf *conf= (plpconf*)data; + can access the data passed to pileup init from data + can do any alignment specific allocation / data storage here in param cd + it can hold either a float, 64 bit int or a pointer + when using cd, initialize and use as it will be reused after destructor*/ + return 0; +} + +int plpdestructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { + /*plpconf *conf= (plpconf*)data; + can access the data passed to pileup init from data + deallocate any alignment specific allocation made in constructor and stored in param cd*/ + return 0; +} + +/// @brief bam_plp_auto_f reads alignment data for pileup operation +/// @param data client callback data holding alignment file handle +/// @param b bamdata read +/// @return same as sam_read1 +int readdata(void *data, bam1_t *b) +{ + plpconf *conf = (plpconf*)data; + if (!conf || !conf->infile) { + return -2; //cant read data + } + + //read alignment and send + return sam_read1(conf->infile, conf->infile->bam_header, b); +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + int ret = EXIT_FAILURE; + bam1_t *bamdata = NULL; + plpconf conf = {0}; + bam_plp_t plpiter = NULL; + int tid = -1, n = -1, j = 0, k = 0; + int refpos = -1; + const bam_pileup1_t *plp = NULL; + + //infile + if (argc != 2) { + print_usage(stderr); + goto end; + } + conf.inname = argv[1]; + + //initialize + if (!(bamdata = bam_init1())) { + printf("Failed to initialize bamdata\n"); + goto end; + } + //open input files + if (!(conf.infile = sam_open(conf.inname, "r"))) { + printf("Could not open %s\n", conf.inname); + goto end; + } + //read header + if (!(conf.in_samhdr = sam_hdr_read(conf.infile))) { + printf("Failed to read header from file!\n"); + goto end; + } + + if (!(plpiter = bam_plp_init(readdata, &conf))) { + printf("Failed to initialize pileup data\n"); + goto end; + } + + //set constructor destructor callbacks + bam_plp_constructor(plpiter, plpconstructor); + bam_plp_destructor(plpiter, plpdestructor); + + while ((plp = bam_plp_auto(plpiter, &tid, &refpos, &n))) { + printf("%d\t%d\t", tid+1, refpos+1); + + for (j = 0; j < n; ++j) { + //doesnt detect succeeding insertion and deletion together here, only insertion is identified + //deletion is detected in plp->is_del as and when pos reaches the position + //if detection ahead is required, use bam_plp_insertion here which gives deletion length along with insertion + if (plp[j].is_del || plp[j].is_refskip) { + printf("*"); + continue; + } + //start and end are displayed in UPPER and rest on LOWER + printf("%c", plp[j].is_head ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]) : + (plp[j].is_tail ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]) : tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]))); + if (plp[j].indel > 0) { + //insertions, anyway not start or end + printf("+%d", plp[j].indel); + for (k = 0; k < plp[j].indel; ++k) { + printf("%c", tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos + k + 1)])); + } + } + else if (plp[j].indel < 0) { + printf("%d", plp[j].indel); + for (k = 0; k < -plp[j].indel; ++k) { + printf("?"); + } + } + printf(" "); + } + printf("\n"); + fflush(stdout); + } + + ret = EXIT_SUCCESS; +end: + //clean up + if (conf.in_samhdr) { + sam_hdr_destroy(conf.in_samhdr); + } + if (conf.infile) { + sam_close(conf.infile); + } + if (bamdata) { + bam_destroy1(bamdata); + } + if (plpiter) { + bam_plp_destroy(plpiter); + } + return ret; +} diff --git a/src/htslib-1.21/samples/pileup_mod.c b/src/htslib-1.21/samples/pileup_mod.c new file mode 100644 index 0000000..81ac5a5 --- /dev/null +++ b/src/htslib-1.21/samples/pileup_mod.c @@ -0,0 +1,218 @@ +/* pileup_mod.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include +#include + +/// print_usage - show usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: pileup_mod infile\n\ +Shows the pileup api usage with base modification.\n"); + return; +} + +typedef struct plpconf { + char *inname; + samFile *infile; + sam_hdr_t *in_samhdr; +} plpconf; + +/// @brief plpconstructor +/// @param data client data? +/// @param b bam being loaded +/// @param cd client data +/// @return +int plpconstructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { + //plpconf *conf= (plpconf*)data; can use this to access anything required from the data in pileup init + + //when using cd, initialize and use as it will be reused after destructor + cd->p = hts_base_mod_state_alloc(); + if (!cd->p) { + printf("Failed to allocate base modification state\n"); + return 1; + } + + //parse the bam data and gather modification data from MM tags + return (-1 == bam_parse_basemod(b, (hts_base_mod_state*)cd->p)) ? 1 : 0; +} + +int plpdestructor(void *data, const bam1_t *b, bam_pileup_cd *cd) { + if (cd->p) { + hts_base_mod_state_free((hts_base_mod_state *)cd->p); + cd->p = NULL; + } + return 0; +} + +/// @brief bam_plp_auto_f reads alignment data for pileup operation +/// @param data client callback data holding alignment file handle +/// @param b bamdata read +/// @return same as sam_read1 +int readdata(void *data, bam1_t *b) +{ + plpconf *conf = (plpconf*)data; + if (!conf || !conf->infile) { + return -2; //cant read data + } + + //read alignment and send + return sam_read1(conf->infile, conf->infile->bam_header, b); +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + int ret = EXIT_FAILURE; + bam1_t *bamdata = NULL; + plpconf conf = {0}; + bam_plp_t plpiter = NULL; + int tid = -1, depth = -1, j = 0, k = 0, inslen = 0, dellen = 0, modlen = 0; + #define NMODS 5 + hts_base_mod mods[NMODS] = {0}; //ACGT N + int refpos = -1; + const bam_pileup1_t *plp = NULL; + kstring_t insdata = KS_INITIALIZE; + + //infile + if (argc != 2) { + print_usage(stderr); + goto end; + } + conf.inname = argv[1]; + + //initialize + if (!(bamdata = bam_init1())) { + printf("Failed to initialize bamdata\n"); + goto end; + } + //open input files + if (!(conf.infile = sam_open(conf.inname, "r"))) { + printf("Could not open %s\n", conf.inname); + goto end; + } + //read header + if (!(conf.in_samhdr = sam_hdr_read(conf.infile))) { + printf("Failed to read header from file!\n"); + goto end; + } + + if (!(plpiter = bam_plp_init(readdata, &conf))) { + printf("Failed to initialize pileup data\n"); + goto end; + } + + //set constructor destructor callbacks + bam_plp_constructor(plpiter, plpconstructor); + bam_plp_destructor(plpiter, plpdestructor); + + while ((plp = bam_plp_auto(plpiter, &tid, &refpos, &depth))) { + memset(&mods, 0, sizeof(mods)); + printf("%d\t%d\t", tid+1, refpos+1); + + for (j = 0; j < depth; ++j) { + dellen = 0; + + if (plp[j].is_del || plp[j].is_refskip) { + printf("*"); + continue; + } + /*invoke bam_mods_at_qpos before bam_plp_insertion_mod that the base modification + is retrieved before change in pileup pos thr' plp_insertion_mod call*/ + if ((modlen = bam_mods_at_qpos(plp[j].b, plp[j].qpos, plp[j].cd.p, mods, NMODS)) == -1) { + printf("Failed to get modifications\n"); + goto end; + } + + //use plp_insertion/_mod to get insertion and del at the same position + if ((inslen = bam_plp_insertion_mod(&plp[j], (hts_base_mod_state*)plp[j].cd.p, &insdata, &dellen)) == -1) { + printf("Failed to get insertion status\n"); + goto end; + } + + //start and end are displayed in UPPER and rest on LOWER, only 1st modification considered + //base and modification + printf("%c%c%c", plp[j].is_head ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]) : + (plp[j].is_tail ? toupper(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)]) : + tolower(seq_nt16_str[bam_seqi(bam_get_seq(plp[j].b), plp[j].qpos)])), + modlen > 0 ? mods[0].strand ? '-' : '+' : '\0', + modlen > 0 ? mods[0].modified_base : '\0'); + //insertion and deletions + if (plp[j].indel > 0) { + //insertion + /*insertion data from plp_insertion_mod, note this shows the quality value as well + which is different from base and modification above;the lower case display is not attempted either*/ + printf("+%d%s", plp[j].indel, insdata.s); + //handle deletion if any + if (dellen) { + printf("-%d", dellen); + for (k = 0; k < dellen; ++k) { + printf("?"); + } + } + } + else if (plp[j].indel < 0) { + //deletion + printf("%d", plp[j].indel); + for (k = 0; k < -plp[j].indel; ++k) { + printf("?"); + } + } + printf(" "); + } + printf("\n"); + fflush(stdout); + } + + ret = EXIT_SUCCESS; +end: + //clean up + if (conf.in_samhdr) { + sam_hdr_destroy(conf.in_samhdr); + } + if (conf.infile) { + sam_close(conf.infile); + } + if (bamdata) { + bam_destroy1(bamdata); + } + if (plpiter) { + bam_plp_destroy(plpiter); + } + ks_free(&insdata); + return ret; +} diff --git a/src/htslib-1.21/samples/qtask_ordered.c b/src/htslib-1.21/samples/qtask_ordered.c new file mode 100644 index 0000000..a76d598 --- /dev/null +++ b/src/htslib-1.21/samples/qtask_ordered.c @@ -0,0 +1,425 @@ +/* qtask_ordered.c -- showcases the htslib api usage + + Copyright (C) 2024 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include +#include +#include +#include + +typedef struct data { + int count; //used up size + int maxsize; //max size per data chunk + bam1_t **bamarray; //bam1_t array for optimal queueing + struct data *next; //pointer to next one - to reuse earlier allocations +} data; + +typedef struct datacache +{ + pthread_mutex_t lock; //synchronizes the access to cache + data *list; //data storage +} datacache; + +typedef struct orderedwrite { + samFile *outfile; //output file handle + sam_hdr_t *samhdr; //header used to write data + hts_tpool_process *queue; //queue from which results to be retrieved + datacache *cache; //to re-use allocated storage + int result; //result code returned by writer thread +} orderedwrite; + +/// print_usage - print the usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: qtask_ordered infile threadcount outdir [chunksize]\n\ +Calculates GC ratio - sum(G,C) / sum(A,T,C,G) - and adds to each alignment\n\ +as xr:f aux tag. Output is saved in outdir.\n\ +chunksize [4096] sets the number of alignments clubbed together to process.\n"); + return; +} + +/// getbamstorage - allocates storage for alignments to queue +/** @param chunk number of bam data to allocate + * @param bamcache cached storage +returns already allocated data storage if one is available, otherwise allocates new +*/ +data* getbamstorage(int chunk, datacache *bamcache) +{ + int i = 0; + data *bamdata = NULL; + + if (!bamcache) { + return NULL; + } + //get from cache if there is an already allocated storage + if (pthread_mutex_lock(&bamcache->lock)) { + return NULL; + } + if (bamcache->list) { //available + bamdata = bamcache->list; + bamcache->list = bamdata->next; //remove and set next one as available + bamdata->next = NULL; //remove link + bamdata->count = 0; + goto end; + } + //allocate and use + if (!(bamdata = malloc(sizeof(data)))) { + goto end; + } + bamdata->bamarray = malloc(chunk * sizeof(bam1_t*)); + if (!bamdata->bamarray) { + free(bamdata); + bamdata = NULL; + goto end; + } + for (i = 0; i < chunk; ++i) { + bamdata->bamarray[i] = bam_init1(); + } + bamdata->maxsize = chunk; + bamdata->count = 0; + bamdata->next = NULL; + +end: + pthread_mutex_unlock(&bamcache->lock); + return bamdata; +} + +/// cleanup_bamstorage - frees a bamdata struct plus contents +/** @param arg Pointer to data to free + @p arg has type void * so it can be used as a callback passed + to hts_tpool_dispatch3(). + */ +void cleanup_bamstorage(void *arg) +{ + data *bamdata = (data *) arg; + if (!bamdata) + return; + if (bamdata->bamarray) { + int i; + for (i = 0; i < bamdata->maxsize; i++) { + bam_destroy1(bamdata->bamarray[i]); + } + free(bamdata->bamarray); + } + free(bamdata); +} + +/// thread_ordered_proc - does the processing of task in queue and queues the output back +/** @param args pointer to set of data to be processed +returns the processed data +the processing could be in any order based on the number of threads in use but read of output +from queue will be in order +a null data indicates the end of input and a null is returned to be added back to result queue +*/ +void *thread_ordered_proc(void *args) +{ + int i = 0, pos = 0; + data *bamdata = (data*)args; + float gcratio = 0; + uint8_t *data = NULL; + + if (bamdata == NULL) + return NULL; // Indicates no more input + + for ( i = 0; i < bamdata->count; ++i) { + //add count + uint64_t count[16] = {0}; + data = bam_get_seq(bamdata->bamarray[i]); + for (pos = 0; pos < bamdata->bamarray[i]->core.l_qseq; ++pos) { + count[bam_seqi(data,pos)]++; + } + /*it is faster to count all and use offset to get required counts rather than select + require ones inside the loop*/ + gcratio = (count[2] /*C*/ + count[4] /*G*/) / (float) (count[1] /*A*/ + count[8] /*T*/ + count[2] + count[4]); + + if (bam_aux_append(bamdata->bamarray[i], "xr", 'f', sizeof(gcratio), (const uint8_t*)&gcratio) < 0) { + fprintf(stderr, "Failed to add aux tag xr, errno: %d\n", errno); + break; + } + } + return bamdata; +} + +/// threadfn_orderedwrite - thread that read the output from queue and writes +/** @param args pointer to data specific for the thread +returns NULL +*/ +void *threadfn_orderedwrite(void *args) +{ + orderedwrite *tdata = (orderedwrite*)args; + hts_tpool_result *r = NULL; + data *bamdata = NULL; + int i = 0; + + tdata->result = 0; + + //get result and write; wait if no result is in queue - until shutdown of queue + while (tdata->result == 0 && + (r = hts_tpool_next_result_wait(tdata->queue)) != NULL) { + bamdata = (data*) hts_tpool_result_data(r); + + if (bamdata == NULL) { + // Indicator for no more input. Time to stop. + hts_tpool_delete_result(r, 0); + break; + } + + for (i = 0; i < bamdata->count; ++i) { + if (sam_write1(tdata->outfile, tdata->samhdr, bamdata->bamarray[i]) < 0) { + fprintf(stderr, "Failed to write output data\n"); + tdata->result = -1; + break; + } + } + hts_tpool_delete_result(r, 0); //release the result memory + + pthread_mutex_lock(&tdata->cache->lock); + bamdata->next = tdata->cache->list; //make current list as next + tdata->cache->list = bamdata; //set as current to reuse + pthread_mutex_unlock(&tdata->cache->lock); + } + + // Shut down the process queue. If we stopped early due to a write failure, + // this will signal to the other end that something has gone wrong. + hts_tpool_process_shutdown(tdata->queue); + + return NULL; +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *inname = NULL, *outdir = NULL; + char *file = NULL; + int c = 0, ret = EXIT_FAILURE, cnt = 0, started_thread = 0, chunk = 0; + size_t size = 0; + samFile *infile = NULL, *outfile = NULL; + sam_hdr_t *in_samhdr = NULL; + pthread_t thread; + orderedwrite twritedata = {0}; + hts_tpool *pool = NULL; + hts_tpool_process *queue = NULL; + htsThreadPool tpool = {NULL, 0}; + data *bamdata = NULL; + datacache bamcache = {PTHREAD_MUTEX_INITIALIZER, NULL}; + + //qtask infile threadcount outdir [chunksize] + if (argc != 4 && argc != 5) { + print_usage(stdout); + goto end; + } + inname = argv[1]; + cnt = atoi(argv[2]); + outdir = argv[3]; + if (argc == 5) { //chunk size present + chunk = atoi(argv[4]); + } + if (cnt < 1) { //set proper thread count + cnt = 1; + } + if (chunk < 1) { //set valid chunk size + chunk = 4096; + } + + //allocate space for output + size = (strlen(outdir) + sizeof("/out.bam") + 1); //space for output file name and null termination + if (!(file = malloc(size))) { + fprintf(stderr, "Failed to set output path\n"); + goto end; + } + snprintf(file, size, "%s/out.bam", outdir); //output file name + if (!(pool = hts_tpool_init(cnt))) { //thread pool + fprintf(stderr, "Failed to create thread pool\n"); + goto end; + } + tpool.pool = pool; //to share the pool for file read and write as well + //queue to use with thread pool, for task and results + if (!(queue = hts_tpool_process_init(pool, cnt * 2, 0))) { + fprintf(stderr, "Failed to create queue\n"); + goto end; + } + //open input file - r reading + if (!(infile = sam_open(inname, "r"))) { + fprintf(stderr, "Could not open %s\n", inname); + goto end; + } + //open output files - w write as SAM, wb write as BAM + if (!(outfile = sam_open(file, "wb"))) { + fprintf(stderr, "Could not open output file\n"); + goto end; + } + //share the thread pool with i/o files + if (hts_set_opt(infile, HTS_OPT_THREAD_POOL, &tpool) < 0 || + hts_set_opt(outfile, HTS_OPT_THREAD_POOL, &tpool) < 0) { + fprintf(stderr, "Failed to set threads to i/o files\n"); + goto end; + } + //read header, required to resolve the target names to proper ids + if (!(in_samhdr = sam_hdr_read(infile))) { + fprintf(stderr, "Failed to read header from file!\n"); + goto end; + } + //write header + if ((sam_hdr_write(outfile, in_samhdr) == -1)) { + fprintf(stderr, "Failed to write header\n"); + goto end; + } + + /* tasks are queued, worker threads get them and process in parallel; + the results are queued and they are to be removed in parallel as well */ + + // start output writer thread for ordered processing + twritedata.outfile = outfile; + twritedata.samhdr = in_samhdr; + twritedata.result = 0; + twritedata.queue = queue; + twritedata.cache = &bamcache; + if (pthread_create(&thread, NULL, threadfn_orderedwrite, &twritedata)) { + fprintf(stderr, "Failed to create writer thread\n"); + goto end; + } + started_thread = 1; + + c = 0; + while (c >= 0) { + if (!(bamdata = getbamstorage(chunk, &bamcache))) { + fprintf(stderr, "Failed to allocate memory\n"); + break; + } + //read alignments, upto max size for this lot + for (cnt = 0; cnt < bamdata->maxsize; ++cnt) { + c = sam_read1(infile, in_samhdr, bamdata->bamarray[cnt]); + if (c < 0) { + break; // EOF or failure + } + } + if (c >= -1 ) { + //max size data or reached EOF + bamdata->count = cnt; + // Queue the data for processing. hts_tpool_dispatch3() is + // used here as it allows in-flight data to be cleaned up + // properly when stopping early due to errors. + if (hts_tpool_dispatch3(pool, queue, thread_ordered_proc, bamdata, + cleanup_bamstorage, cleanup_bamstorage, + 0) == -1) { + fprintf(stderr, "Failed to schedule processing\n"); + goto end; + } + bamdata = NULL; + } else { + fprintf(stderr, "Error in reading data\n"); + break; + } + } + + ret = EXIT_SUCCESS; + + end: + // Tidy up after having dispatched all of the data. + + // Note that the order here is important. In particular, we need + // to join the thread that was started earlier before freeing anything + // to avoid any use-after-free errors. + + // It's also possible to get here early due to various error conditions, + // so we need to carefully check which parts of the program state have + // been created before trying to clean them up. + + if (queue) { + if (-1 == c) { + // EOF read, send a marker to tell the threadfn_orderedwrite() + // function to shut down. + if (hts_tpool_dispatch(pool, queue, thread_ordered_proc, + NULL) == -1) { + fprintf(stderr, "Failed to schedule sentinel job\n"); + ret = EXIT_FAILURE; + } + } else { + // Error or we never wrote anything. Shut down the queue to + // ensure threadfn_orderedwrite() wakes up and terminates. + hts_tpool_process_shutdown(queue); + } + } + + // Wait for threadfn_orderedwrite to finish. + if (started_thread) { + pthread_join(thread, NULL); + + // Once the writer thread has finished, check the result it sent back + if (twritedata.result != 0) { + ret = EXIT_FAILURE; + } + } + + if (queue) { + // Once threadfn_orderedwrite has stopped, the queue can be + // cleaned up. + hts_tpool_process_destroy(queue); + } + + if (in_samhdr) { + sam_hdr_destroy(in_samhdr); + } + if (infile) { + if (sam_close(infile) != 0) { + ret = EXIT_FAILURE; + } + } + if (outfile) { + if (sam_close(outfile) != 0) { + ret = EXIT_FAILURE; + } + } + + pthread_mutex_lock(&bamcache.lock); + if (bamcache.list) { + struct data *tmp = NULL; + while (bamcache.list) { + tmp = bamcache.list; + bamcache.list = bamcache.list->next; + cleanup_bamstorage(tmp); + } + } + pthread_mutex_unlock(&bamcache.lock); + + if (file) { + free(file); + } + if (pool) { + hts_tpool_destroy(pool); + } + return ret; +} diff --git a/src/htslib-1.21/samples/qtask_unordered.c b/src/htslib-1.21/samples/qtask_unordered.c new file mode 100644 index 0000000..05fe503 --- /dev/null +++ b/src/htslib-1.21/samples/qtask_unordered.c @@ -0,0 +1,320 @@ +/* qtask_ordered.c -- showcases the htslib api usage + + Copyright (C) 2024 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include +#include +#include +#include + +struct datacache; + +typedef struct basecount { + uint64_t counts[16]; //count of all bases +} basecount; + +typedef struct data { + int count; //used up size + int maxsize; //max size per data chunk + bam1_t **bamarray; //bam1_t array for optimal queueing + + struct datacache *cache; + basecount *bases; //count of all possible bases + struct data *next; //pointer to next one - to reuse earlier allocations +} data; + +typedef struct datacache +{ + pthread_mutex_t lock; //synchronizes the access to cache + data *list; //data storage +} datacache; + +/// print_usage - print the usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: qtask_unordered infile threadcount [chunksize]\n\ +Shows the base counts and calculates GC ratio - sum(G,C) / sum(A,T,C,G)\n\ +chunksize [4096] sets the number of alignments clubbed together to process.\n"); + return; +} + +/// getbamstorage - allocates storage for alignments to queue +/** @param chunk number of bam data to allocate + * @param bases storage of result + * @param bamcache cached storage +returns already allocated data storage if one is available, otherwise allocates new +*/ +data* getbamstorage(int chunk, basecount *bases, datacache *bamcache) +{ + int i = 0; + data *bamdata = NULL; + + if (!bamcache || !bases) { + return NULL; + } + //get from cache if there is an already allocated storage + if (pthread_mutex_lock(&bamcache->lock)) { + return NULL; + } + if (bamcache->list) { //available + bamdata = bamcache->list; + bamcache->list = bamdata->next; //remove and set next one as available + bamdata->next = NULL; //remove link + bamdata->count = 0; + + bamdata->bases = bases; + bamdata->cache = bamcache; + goto end; + } + //allocate and use + if (!(bamdata = malloc(sizeof(data)))) { + goto end; + } + bamdata->bamarray = malloc(chunk * sizeof(bam1_t*)); + if (!bamdata->bamarray) { + free(bamdata); + bamdata = NULL; + goto end; + } + for (i = 0; i < chunk; ++i) { + bamdata->bamarray[i] = bam_init1(); + } + bamdata->maxsize = chunk; + bamdata->count = 0; + bamdata->next = NULL; + + bamdata->bases = bases; + bamdata->cache = bamcache; + +end: + pthread_mutex_unlock(&bamcache->lock); + return bamdata; +} + +/// cleanup_bamstorage - frees a bamdata struct plus contents +/** @param arg Pointer to data to free + @p arg has type void * so it can be used as a callback passed + to hts_tpool_dispatch3(). + */ +void cleanup_bamstorage(void *arg) +{ + data *bamdata = (data *) arg; + if (!bamdata) + return; + if (bamdata->bamarray) { + int i; + for (i = 0; i < bamdata->maxsize; i++) { + bam_destroy1(bamdata->bamarray[i]); + } + free(bamdata->bamarray); + } + free(bamdata); +} + +/// thread_unordered_proc - does the processing of task in queue and updates result +/** @param args pointer to set of data to be processed +returns NULL +the processing could be in any order based on the number of threads in use +*/ +void *thread_unordered_proc(void *args) +{ + int i = 0; + data *bamdata = (data*)args; + uint64_t pos = 0; + uint8_t *data = NULL; + uint64_t counts[16] = {0}; + for ( i = 0; i < bamdata->count; ++i) { + data = bam_get_seq(bamdata->bamarray[i]); + for (pos = 0; pos < bamdata->bamarray[i]->core.l_qseq; ++pos) { + /* it is faster to count all bases and select required ones later + compared to select and count here */ + counts[bam_seqi(data, pos)]++; + } + } + //update result and add the memory block for reuse + pthread_mutex_lock(&bamdata->cache->lock); + for (i = 0; i < 16; i++) { + bamdata->bases->counts[i] += counts[i]; + } + + bamdata->next = bamdata->cache->list; + bamdata->cache->list = bamdata; + pthread_mutex_unlock(&bamdata->cache->lock); + + return NULL; +} + +/// main - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *inname = NULL; + int c = 0, ret = EXIT_FAILURE, cnt = 0, chunk = 0; + samFile *infile = NULL; + sam_hdr_t *in_samhdr = NULL; + hts_tpool *pool = NULL; + hts_tpool_process *queue = NULL; + htsThreadPool tpool = {NULL, 0}; + data *bamdata = NULL; + basecount gccount = {{0}}; + datacache bamcache = {PTHREAD_MUTEX_INITIALIZER, NULL}; + + //qtask infile threadcount [chunksize] + if (argc != 3 && argc != 4) { + print_usage(stdout); + goto end; + } + inname = argv[1]; + cnt = atoi(argv[2]); + if (argc == 4) { + chunk = atoi(argv[3]); + } + if (cnt < 1) { + cnt = 1; + } + if (chunk < 1) { + chunk = 4096; + } + + if (!(pool = hts_tpool_init(cnt))) { + fprintf(stderr, "Failed to create thread pool\n"); + goto end; + } + tpool.pool = pool; //to share the pool for file read and write as well + //queue to use with thread pool, for tasks + if (!(queue = hts_tpool_process_init(pool, cnt * 2, 1))) { + fprintf(stderr, "Failed to create queue\n"); + goto end; + } + //open input file - r reading + if (!(infile = sam_open(inname, "r"))) { + fprintf(stderr, "Could not open %s\n", inname); + goto end; + } + //share the thread pool with i/o files + if (hts_set_opt(infile, HTS_OPT_THREAD_POOL, &tpool) < 0) { + fprintf(stderr, "Failed to set threads to i/o files\n"); + goto end; + } + //read header, required to resolve the target names to proper ids + if (!(in_samhdr = sam_hdr_read(infile))) { + fprintf(stderr, "Failed to read header from file!\n"); + goto end; + } + + /*tasks are queued, worker threads get them and process in parallel; + all bases are counted instead of counting atcg alone as it is faster*/ + + c = 0; + while (c >= 0) { + //use cached storage to avoid allocate/deallocate overheads + if (!(bamdata = getbamstorage(chunk, &gccount, &bamcache))) { + fprintf(stderr, "Failed to allocate memory\n"); + break; + } + //read alignments, upto max size for this lot + for (cnt = 0; cnt < bamdata->maxsize; ++cnt) { + c = sam_read1(infile, in_samhdr, bamdata->bamarray[cnt]); + if (c < 0) { + break; // EOF or failure + } + } + if (c >= -1 ) { + //max size data or reached EOF + bamdata->count = cnt; + // Queue the data for processing. hts_tpool_dispatch3() is + // used here as it allows in-flight data to be cleaned up + // properly when stopping early due to errors. + if (hts_tpool_dispatch3(pool, queue, thread_unordered_proc, bamdata, + cleanup_bamstorage, cleanup_bamstorage, + 0) == -1) { + fprintf(stderr, "Failed to schedule processing\n"); + goto end; + } + bamdata = NULL; + } else { + fprintf(stderr, "Error in reading data\n"); + break; + } + } + + if (-1 == c) { + // EOF read, ensure all are processed, waits for all to finish + if (hts_tpool_process_flush(queue) == -1) { + fprintf(stderr, "Failed to flush queue\n"); + } else { //all done + //refer seq_nt16_str to find position of required bases + fprintf(stdout, "GCratio: %f\nBase counts:\n", + (gccount.counts[2] /*C*/ + gccount.counts[4] /*G*/) / (float) + (gccount.counts[1] /*A*/ + gccount.counts[8] /*T*/ + + gccount.counts[2] + gccount.counts[4])); + + for (cnt = 0; cnt < 16; ++cnt) { + fprintf(stdout, "%c: %"PRIu64"\n", seq_nt16_str[cnt], gccount.counts[cnt]); + } + + ret = EXIT_SUCCESS; + } + } + end: + if (queue) { + hts_tpool_process_destroy(queue); + } + + if (in_samhdr) { + sam_hdr_destroy(in_samhdr); + } + if (infile) { + if (sam_close(infile) != 0) { + ret = EXIT_FAILURE; + } + } + + pthread_mutex_lock(&bamcache.lock); + if (bamcache.list) { + struct data *tmp = NULL; + while (bamcache.list) { + tmp = bamcache.list; + bamcache.list = bamcache.list->next; + cleanup_bamstorage(tmp); + } + } + pthread_mutex_unlock(&bamcache.lock); + + if (pool) { + hts_tpool_destroy(pool); + } + return ret; +} diff --git a/src/htslib-1.21/samples/read_aux.c b/src/htslib-1.21/samples/read_aux.c new file mode 100644 index 0000000..efd6f36 --- /dev/null +++ b/src/htslib-1.21/samples/read_aux.c @@ -0,0 +1,207 @@ +/* read_aux.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include + +/// print_usage - print the usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: read_aux infile tag\n\ +Read the given aux tag from alignments either as SAM string or as raw data\n"); +} + +/// printauxdata - prints aux data +/** @param fp - file to which it to be printed - stdout or null + * @param type - aux type + * @param idx - index in array, -1 when not an array type + * @param data - data + * recurses when the data is array type +returns 1 on failure 0 on success +*/ +int printauxdata(FILE *fp, char type, int32_t idx, const uint8_t *data) +{ + uint32_t auxBcnt = 0; + int i = 0; + char auxBType = 'Z'; + + //the tag is already queried and ensured to exist and the type is retrieved from the tag data, also iterated within index for arrays, so no error is expected here. + //when these apis are used explicitly, these error conditions needs to be handled based on return value and errno + switch(type) { + case 'A': + fprintf(fp, "%c", bam_aux2A(data)); //byte data + break; + case 'c': + fprintf(fp, "%d", (int8_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //signed 1 byte data; bam_auxB2i - from array or bam_aux2i - non array data + break; + case 'C': + fprintf(fp, "%u", (uint8_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //unsigned 1 byte data + break; + case 's': + fprintf(fp, "%d", (int16_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //signed 2 byte data + break; + case 'S': + fprintf(fp, "%u", (uint16_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //unsigned 2 byte data + break; + case 'i': + fprintf(fp, "%d", (int32_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //signed 4 byte data + break; + case 'I': + fprintf(fp, "%u", (uint32_t)(idx > -1 ? bam_auxB2i(data, idx) : bam_aux2i(data))); //unsigned 4 byte data + break; + case 'f': + case 'd': + fprintf(fp, "%g", (float)(idx > -1 ? bam_auxB2f(data, idx) : bam_aux2f(data))); //floating point data, 4 bytes + break; + case 'H': + case 'Z': + fprintf(fp, "%s", bam_aux2Z(data)); //array of char or hex data + break; + case 'B': //array of char/int/float + auxBcnt = bam_auxB_len(data); //length of array + auxBType = bam_aux_type(data + 1); //type of element in array + fprintf(fp, "%c", auxBType); + for (i = 0; i < auxBcnt; ++i) { //iterate the array + fprintf(fp, ","); + //calling recursively with index to reuse a few lines + if (printauxdata(fp, auxBType, i, data) == EXIT_FAILURE) { + return EXIT_FAILURE; + } + } + break; + default: + printf("Invalid aux tag?\n"); + return EXIT_FAILURE; + break; + } + return EXIT_SUCCESS; +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *inname = NULL, *tag = NULL; + int c = 0, ret = EXIT_FAILURE, ret_r = 0, i = 0; + sam_hdr_t *in_samhdr = NULL; + samFile *infile = NULL; + bam1_t *bamdata = NULL; + uint8_t *data = NULL; + kstring_t sdata = KS_INITIALIZE; + + //read_aux infile tag + if (argc != 3) { + print_usage(stderr); + goto end; + } + inname = argv[1]; + tag = argv[2]; + + if (!(bamdata = bam_init1())) { + printf("Failed to allocate data memory!\n"); + goto end; + } + + //open input file + if (!(infile = sam_open(inname, "r"))) { + printf("Could not open %s\n", inname); + goto end; + } + + if (!(in_samhdr = sam_hdr_read(infile))) { + printf("Failed to read header from file!\n"); + goto end; + } + + while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) { + errno = 0; i++; + ks_clear(&sdata); + if (i % 2) { //use options alternatively to demonstrate both + //option 1 - get data as string with tag and type + if ((c = bam_aux_get_str(bamdata, tag, &sdata)) == 1) { + printf("%s\n",sdata.s); + } + else if (c == 0 && errno == ENOENT) { + //tag not present + printf("Tag not present\n"); + } + else { + //error + printf("Failed to get tag\n"); + goto end; + } + } + else { + //option 2 - get raw data + if (!(data = bam_aux_get(bamdata, tag))) { + //tag data not returned, errno gives the reason + if (errno == ENOENT) { + printf("Tag not present\n"); + } + else { + printf("Invalid aux data\n"); + } + } + else { + //got the tag, read and print + if (printauxdata(stdout, bam_aux_type(data), -1, data) == EXIT_FAILURE) { + printf("Failed to read aux data\n"); + goto end; + } + printf("\n"); + } + } + } + if (ret_r < -1) { + //read error + printf("Failed to read data\n"); + goto end; + } + + ret = EXIT_SUCCESS; +end: + //cleanup + if (in_samhdr) { + sam_hdr_destroy(in_samhdr); + } + if (infile) { + sam_close(infile); + } + if (bamdata) { + bam_destroy1(bamdata); + } + ks_free(&sdata); + return ret; +} diff --git a/src/htslib-1.21/samples/read_bam.c b/src/htslib-1.21/samples/read_bam.c new file mode 100644 index 0000000..30bedf8 --- /dev/null +++ b/src/htslib-1.21/samples/read_bam.c @@ -0,0 +1,139 @@ +/* read_bam.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include + +/// print_usage - print the usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: read_bam infile\n\ +Shows the alignment data from file\n"); +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *inname = NULL, *tidname = NULL, *flags = NULL; + int ret = EXIT_FAILURE; + sam_hdr_t *in_samhdr = NULL; + samFile *infile = NULL; + + int ret_r = 0, i = 0; + bam1_t *bamdata = NULL; + uint8_t *data = NULL; + uint32_t *cigar = NULL; + + + //read_bam infile + if (argc != 2) { + print_usage(stderr); + goto end; + } + inname = argv[1]; + + if (!(bamdata = bam_init1())) { + printf("Failed to allocate data memory!\n"); + goto end; + } + + //open input file + if (!(infile = sam_open(inname, "r"))) { + printf("Could not open %s\n", inname); + goto end; + } + //read header + if (!(in_samhdr = sam_hdr_read(infile))) { + printf("Failed to read header from file!\n"); + goto end; + } + + while ((ret_r = sam_read1(infile, in_samhdr, bamdata)) >= 0) + { + //QNAME FLAG RNAME POS MAPQ CIGAR RNEXT PNEXT TLEN SEQ QUAL [TAG:TYPE:VALUE]… + printf("NAME: %s\n", bam_get_qname(bamdata)); //get the query name using the macro + flags = bam_flag2str(bamdata->core.flag); //flags as string + printf("FLG: %d - %s\n", bamdata->core.flag, flags); //flag is available in core structure + free((void*)flags); + tidname = sam_hdr_tid2name(in_samhdr, bamdata->core.tid); + printf("RNAME/TID: %d - %s\n", bamdata->core.tid, tidname? tidname: "" ); //retrieves the target name using the value in bam and by referring the header + printf("POS: %"PRIhts_pos"\n", bamdata->core.pos + 1); //internally position is 0 based and on text output / SAM it is 1 based + printf("MQUAL: %d\n", bamdata->core.qual); //map quality value + + cigar = bam_get_cigar(bamdata); //retrieves the cigar data + printf("CGR: "); + for (i = 0; i < bamdata->core.n_cigar; ++i) { //no. of cigar data entries + printf("%d%c", bam_cigar_oplen(cigar[i]), bam_cigar_opchr(cigar[i])); //the macros gives the count of operation and the symbol of operation for given cigar entry + } + printf("\nTLEN/ISIZE: %"PRIhts_pos"\n", bamdata->core.isize); + + data = bam_get_seq(bamdata); //get the sequence data + if (bamdata->core.l_qseq != bam_cigar2qlen(bamdata->core.n_cigar, cigar)) { //checks the length with CIGAR and query + printf("\nLength doesnt matches to cigar data\n"); + goto end; + } + + printf("SEQ: "); + for (i = 0; i < bamdata->core.l_qseq ; ++i) { //sequence length + printf("%c", seq_nt16_str[bam_seqi(data, i)]); //retrieves the base from (internal compressed) sequence data + } + printf("\nQUAL: "); + for (int i = 0; i < bamdata->core.l_qseq ; ++i) { + printf("%c", bam_get_qual(bamdata)[i]+33); //retrives the quality value + } + printf("\n\n"); + } + + if (ret_r == -1) { + // no error! + ret = EXIT_SUCCESS; + } + else { + printf("Failed to read data\n"); + } +end: + //cleanup + if (in_samhdr) { + sam_hdr_destroy(in_samhdr); + } + if (infile) { + sam_close(infile); + } + if (bamdata) { + bam_destroy1(bamdata); + } + return ret; +} diff --git a/src/htslib-1.21/samples/read_fast.c b/src/htslib-1.21/samples/read_fast.c new file mode 100644 index 0000000..10f807b --- /dev/null +++ b/src/htslib-1.21/samples/read_fast.c @@ -0,0 +1,119 @@ +/* read_fast.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include + +/// print_usage - show usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: read_fast \n\ +Reads the fasta/fastq file and shows the content.\n"); + return; +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *inname = NULL; //input file name + int c = 0, ret = EXIT_FAILURE; + samFile *infile = NULL; //sam file + sam_hdr_t *in_samhdr = NULL; //header of file + bam1_t *bamdata = NULL; //to hold the read data + + if (argc != 2) { + print_usage(stdout); + goto end; + } + inname = argv[1]; + + //initialize + if (!(bamdata = bam_init1())) { + printf("Failed to initialize bamdata\n"); + goto end; + } + //open input files - r reading + if (!(infile = sam_open(inname, "r"))) { + printf("Could not open %s\n", inname); + goto end; + } + if (infile->format.format != fasta_format && infile->format.format != fastq_format) { + printf("Invalid file specified\n"); + goto end; + } + + //read header + if (!(in_samhdr = sam_hdr_read(infile))) { + printf( "Failed to read header from file\n"); + goto end; + } + + //read data + while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { + printf("\nname: "); + printf("%s", bam_get_qname(bamdata)); + printf("\nsequence: "); + for (c = 0; c < bamdata->core.l_qseq; ++c) { + printf("%c", seq_nt16_str[bam_seqi(bam_get_seq(bamdata), c)]); + } + if (infile->format.format == fastq_format) { + printf("\nquality: "); + for (c = 0; c < bamdata->core.l_qseq; ++c) { + printf("%c", bam_get_qual(bamdata)[c] + 33); + } + } + } + printf("\n"); + if (c != -1) { + //error + printf("Failed to get data\n"); + goto end; + } + //else -1 / EOF + ret = EXIT_SUCCESS; +end: + //clean up + if (in_samhdr) { + sam_hdr_destroy(in_samhdr); + } + if (infile) { + sam_close(infile); + } + if (bamdata) { + bam_destroy1(bamdata); + } + return ret; +} diff --git a/src/htslib-1.21/samples/read_fast_index.c b/src/htslib-1.21/samples/read_fast_index.c new file mode 100644 index 0000000..9707663 --- /dev/null +++ b/src/htslib-1.21/samples/read_fast_index.c @@ -0,0 +1,163 @@ +/* read_fast_index.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include +#include + +/// print_usage - show usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: read_fast_i A/Q 0/1 regiondef\n\ +Reads the fasta/fastq file using index and shows the content.\n\ +For fasta files use A and Q for fastq files.\n\ +Region can be 1 or more of [:start-end] entries separated by comma.\n\ +For single region, give regcount as 0 and non 0 for multi-regions.\n"); + return; +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *inname = NULL, *region = NULL, *data = NULL, *remaining = NULL; + int ret = EXIT_FAILURE, tid = -1, usemulti = 0; + faidx_t *idx = NULL; + enum fai_format_options fmt = FAI_FASTA; + hts_pos_t len = 0, beg = 0, end = 0; + + //read_fast_i infile A/Q regcount region + if (argc != 5) { + print_usage(stdout); + goto end; + } + inname = argv[1]; + if (argv[2][0] == 'Q') { + fmt = FAI_FASTQ; + } + usemulti = atoi(argv[3]); + region = argv[4]; + + //load index + if (!(idx = fai_load3_format(inname, NULL, NULL, FAI_CREATE, fmt))) { + printf("Failed to load index\n"); + goto end; + } + + if (!usemulti) { + //get data from given region + if (!(data = fai_fetch64(idx, region, &len))) { + if (-1 == len) { + printf("Failed to get data\n"); //failure + goto end; + } + else { + printf("Data not found for given region\n"); //no data + } + } + else { + printf("Data: %"PRId64" %s\n", len, data); + free((void*)data); + //get quality for fastq type + if (fmt == FAI_FASTQ) { + if (!(data = fai_fetchqual64(idx, region, &len))) { + if (len == -1) { + printf("Failed to get data\n"); + goto end; + } + else { + printf("Data not found for given region\n"); + } + } + else { + printf("Qual: %"PRId64" %s\n", len, data); + free((void*)data); + } + } + } + } + else { + //parse, get each region and get data for each + while ((remaining = fai_parse_region(idx, region, &tid, &beg, &end, HTS_PARSE_LIST))) { //here expects regions as csv + //parsed the region, correct end points based on actual data + if (fai_adjust_region(idx, tid, &beg, &end) == -1) { + printf("Error in adjusting region for tid %d\n", tid); + goto end; + } + //get data for given region + if (!(data = faidx_fetch_seq64(idx, faidx_iseq(idx, tid), beg, end, &len))) { + if (len == -1) { + printf("Failed to get data\n"); //failure + goto end; + } + else { + printf("No data found for given region\n"); //no data + } + } + else { + printf("Data: %"PRIhts_pos" %s\n", len, data); + free((void*)data); + data = NULL; + + //get quality data for fastq + if (fmt == FAI_FASTQ) { + if (!(data = faidx_fetch_qual64(idx, faidx_iseq(idx, tid), beg, end, &len))) { + if (len == -1) { + printf("Failed to get qual data\n"); + goto end; + } + else { + printf("No data found for given region\n"); + } + } + else { + printf("Qual: %"PRIhts_pos" %s\n", len, data); + free((void*)data); + data = NULL; + } + } + } + region = remaining; //parse remaining region defs + } + } + + ret = EXIT_SUCCESS; +end: + //clean up + if (idx) { + fai_destroy(idx); + } + return ret; +} diff --git a/src/htslib-1.21/samples/read_header.c b/src/htslib-1.21/samples/read_header.c new file mode 100644 index 0000000..54b07e7 --- /dev/null +++ b/src/htslib-1.21/samples/read_header.c @@ -0,0 +1,173 @@ +/* read_header.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include + +/// print_usage - print the usage +/** @param fp pointer to the file / terminal to which susage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: read_header infile header [id val] [tag]\n\ +This shows given tag from given header or the whole line\n"); + return; +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *inname = NULL, *header = NULL, *tag = NULL, *idval = NULL; + char *id = NULL; + int c = 0, ret = EXIT_FAILURE, linecnt = 0; + samFile *infile = NULL; + sam_hdr_t *in_samhdr = NULL; + kstring_t data = KS_INITIALIZE; + + //read_header infile header tag + if (argc < 3 || argc > 6) { + print_usage(stderr); + goto end; + } + inname = argv[1]; + header = argv[2]; + if (argc == 4) { //header and tag + tag = argv[3]; + //find unique identifier field name for requested header type + if (header[0] == 'H' && header[1] == 'D') { + id = NULL; + } + else if (header[0] == 'S' && header[1] == 'Q') { + id = "SN"; + } + else if (header[0] == 'R' && header[1] == 'G') { + id = "ID"; + } + else if (header[0] == 'P' && header[1] == 'G') { + id = "ID"; + } + else if (header[0] == 'C' && header[1] == 'O') { + id = ""; + } + else { + printf("Invalid header type\n"); + goto end; + } + } + else if (argc == 5) { //header id val + id = argv[3]; + idval = argv[4]; + } + else if (argc == 6) { //header id val tag + id = argv[3]; + idval = argv[4]; + tag = argv[5]; + } + + //open input files + if (!(infile = sam_open(inname, "r"))) { + printf("Could not open %s\n", inname); + goto end; + } + + //read header + if (!(in_samhdr = sam_hdr_read(infile))) { + printf("Failed to read header from file!\n"); + goto end; + } + + if (id && idval) { + if (tag) { + ret = sam_hdr_find_tag_id(in_samhdr, header, id, idval, tag, &data); + } + else { + ret = sam_hdr_find_line_id(in_samhdr, header, id, idval, &data); + } + + if (ret == 0) { + printf("%s\n", data.s); + } + else if (ret == -1) { + printf("No matching tag found\n"); + goto end; + } + else { + printf("Failed to find header line\n"); + goto end; + } + } + else { + //get count of given header type + linecnt = sam_hdr_count_lines(in_samhdr, header); + if (linecnt == 0) { + printf("No matching line found\n"); + goto end; + } + for (c = 0; c < linecnt; ++c ) { + if (tag) { + //non CO, get the tag requested + ret = sam_hdr_find_tag_pos(in_samhdr, header, c, tag, &data); + } + else { + //CO header, there are no tags but the whole line + ret = sam_hdr_find_line_pos(in_samhdr, header, c, &data); + } + + if (ret == 0) { + printf("%s\n", data.s); + continue; + } + else if (ret == -1) { + printf("Tag not present\n"); + continue; + } + else { + printf("Failed to get tag\n"); + goto end; + } + } + } + ret = EXIT_SUCCESS; + +end: + //cleanup + if (in_samhdr) { + sam_hdr_destroy(in_samhdr); + } + if (infile) { + sam_close(infile); + } + ks_free(&data); + return ret; +} diff --git a/src/htslib-1.21/samples/read_refname.c b/src/htslib-1.21/samples/read_refname.c new file mode 100644 index 0000000..9b4918d --- /dev/null +++ b/src/htslib-1.21/samples/read_refname.c @@ -0,0 +1,125 @@ +/* read_refname.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include + +/// print_usage - print the usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: read_refname infile minsize\n\ +This shows name of references which has length above the given size\n"); + return; +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *inname = NULL, *id = NULL; + int c = 0, ret = EXIT_FAILURE, linecnt = 0, pos = 0; + samFile *infile = NULL; + sam_hdr_t *in_samhdr = NULL; + kstring_t data = KS_INITIALIZE; + int64_t minsize = 0, size = 0; + + if (argc != 3 && argc != 2) { + print_usage(stdout); + goto end; + } + inname = argv[1]; + if (argc == 3) { + minsize = atoll(argv[2]); + } + + //open input files + if (!(infile = sam_open(inname, "r"))) { + printf("Could not open %s\n", inname); + goto end; + } + + //read header + if (!(in_samhdr = sam_hdr_read(infile))) { + printf("Failed to read header from file!\n"); + goto end; + } + + linecnt = sam_hdr_count_lines(in_samhdr, "SQ"); //get reference count + if (linecnt <= 0) { + if (!linecnt) { + printf("No reference line present\n"); + } + else { + printf("Failed to get reference line count\n"); + } + goto end; + } + //iterate and check each reference's length + for (pos = 1, c = 0; c < linecnt; ++c) { + if ((ret = sam_hdr_find_tag_pos(in_samhdr, "SQ", c, "LN", &data) == -2)) { + printf("Failed to get length\n"); + goto end; + } + else if (ret == -1) { + //length not present, ignore + continue; + } + //else have length + size = atoll(data.s); + if (size < minsize) { + //not required + continue; + } + if (!(id = sam_hdr_line_name(in_samhdr, "SQ", c))) { //sam_hdr_find_tag_pos(in_samhdr, "SQ", c, "SN", &data) can also do the same! + printf("Failed to get id for reference data\n"); + goto end; + } + printf("%d,%s,%s\n", pos, id, data.s); + pos++; + } + + ret = EXIT_SUCCESS; + +end: + //cleanup + if (in_samhdr) { + sam_hdr_destroy(in_samhdr); + } + if (infile) { + sam_close(infile); + } + ks_free(&data); + return ret; +} diff --git a/src/htslib-1.21/samples/rem_header.c b/src/htslib-1.21/samples/rem_header.c new file mode 100644 index 0000000..852d5f0 --- /dev/null +++ b/src/htslib-1.21/samples/rem_header.c @@ -0,0 +1,138 @@ +/* rem_header.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include + +/// print_usage - print the usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: rem_header infile header [id]\n\ +Removes header line of given type and id\n"); + return; +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *inname = NULL, *header = NULL, *idval = NULL; + char *id = NULL; + int ret = EXIT_FAILURE; + samFile *infile = NULL, *outfile = NULL; + sam_hdr_t *in_samhdr = NULL; + + //update_header infile header idval tag value + if (argc <3 || argc > 4) { + //3 & 4 are ok, 3-> all of given header type, 4->given id of given header type to be removed + print_usage(stderr); + goto end; + } + inname = argv[1]; + header = argv[2]; + if (argc == 4) { + idval = argv[3]; + } + + //unique identifier for each of the header types + if (header[0] == 'H' && header[1] == 'D') { + id = NULL; + } + else if (header[0] == 'S' && header[1] == 'Q') { + id = "SN"; + } + else if (header[0] == 'R' && header[1] == 'G') { + id = "ID"; + } + else if (header[0] == 'P' && header[1] == 'G') { + id = "ID"; + } + else if (header[0] == 'C' && header[1] == 'O') { + //CO field can be removed using the position of it using sam_hdr_remove_line_pos + id = ""; + } + else { + printf("Invalid header type\n"); + goto end; + } + + if (!(infile = sam_open(inname, "r"))) { + printf("Could not open %s\n", inname); + goto end; + } + if (!(outfile = sam_open("-", "w"))) { //use stdout as the output file for ease of display of update + printf("Could not open stdout\n"); + goto end; + } + + //read header + if (!(in_samhdr = sam_hdr_read(infile))) { + printf("Failed to read header from file!\n"); + goto end; + } + if (idval) { + //remove specific line + if (sam_hdr_remove_line_id(in_samhdr, header, id, idval)) { + printf("Failed to remove header line\n"); + goto end; + } + } + else { + //remove multiple lines of a header type + if (sam_hdr_remove_lines(in_samhdr, header, id, NULL)) { + printf("Failed to remove header line\n"); + goto end; + } + } + //write output + if (sam_hdr_write(outfile, in_samhdr) < 0) { + printf("Failed to write output\n"); + goto end; + } + ret = EXIT_SUCCESS; + //bam data write to follow.... +end: + //cleanup + if (in_samhdr) { + sam_hdr_destroy(in_samhdr); + } + if (infile) { + sam_close(infile); + } + if (outfile) { + sam_close(outfile); + } + return ret; +} diff --git a/src/htslib-1.21/samples/sample.bed b/src/htslib-1.21/samples/sample.bed new file mode 100644 index 0000000..2ae458f --- /dev/null +++ b/src/htslib-1.21/samples/sample.bed @@ -0,0 +1,4 @@ +T1 1 2 +T1 30 35 +T2 10 15 +T2 30 40 diff --git a/src/htslib-1.18/samples/sample.ref.fa b/src/htslib-1.21/samples/sample.ref.fa similarity index 100% rename from src/htslib-1.18/samples/sample.ref.fa rename to src/htslib-1.21/samples/sample.ref.fa diff --git a/src/htslib-1.21/samples/sample.ref.fq b/src/htslib-1.21/samples/sample.ref.fq new file mode 100644 index 0000000..18b2b96 --- /dev/null +++ b/src/htslib-1.21/samples/sample.ref.fq @@ -0,0 +1,16 @@ +@T1 +AAAAACTGAAAACCCCTTTTGGGGACTGTTAACAGTTTTT ++ +AAAAACTGAAAACCCCTTTTGGGGACTGTTAACAGTTTTT +@T2 +TTTTCCCCACTGAAAACCCCTTTTGGGGACTGTTAACAGT ++ +TTTTCCCCACTGAAAACCCCTTTTGGGGACTGTTAACAGT +@T3 +TTTTGGGGACTGTTAACAGT ++ +TTTTGGGGACTGTTAACAGT +@T4 +TTTTCCCCACTGAAAACCCCTTTTGGGGACTGTTAACAGTTTTTCCCCACTGAAAACCCCTTTTGGGGACTGTTAACAGTTTTTGGGGACTGTTAACAGT ++ +TTTTCCCCACTGAAAACCCCTTTTGGGGACTGTTAACAGTTTTTCCCCACTGAAAACCCCTTTTGGGGACTGTTAACAGTTTTTGGGGACTGTTAACAGT diff --git a/src/htslib-1.21/samples/sample.sam b/src/htslib-1.21/samples/sample.sam new file mode 100644 index 0000000..58515c9 --- /dev/null +++ b/src/htslib-1.21/samples/sample.sam @@ -0,0 +1,29 @@ +@HD VN:1.17 SO:unknown +@SQ SN:T1 LN:40 +@SQ SN:T2 LN:40 +@CO @SQ SN* LN* AH AN AS DS M5 SP TP UR +@CO @RG ID* BC CN DS DT FO KS LB PG PI PL PM PU SM +@CO @PG ID* PN CL PP DS VN +@CO this is a dummy alignment file to demonstrate different abilities of hts apis +@CO QNAME FLAG RNAME POS MAPQ CIGAR RNEXT PNEXT TLEN SEQ QUAL [TAG:TYPE:VALUE]… +@CO 1234567890123456789012345678901234567890 +@CO AAAAACTGAAAACCCCTTTTGGGGACTGTTAACAGTTTTT T1 +@CO TTTTCCCCACTGAAAACCCCTTTTGGGGACTGTTAACAGT T2 +@CO ITR1-ITR2M, ITR2-ITR2M are proper pairs in T1 and T2, UNMP1 is partly mapped and pair is unmapped, UNMP2 & 3 are unmapped +@CO A1-A2, A4-A3 are proper pairs with A4-A3 in different read order. A5 is secondary alignment +ITR1 99 T1 5 40 4M = 33 10 ACTG ()() +ITR2 147 T2 23 49 2M = 35 -10 TT ** +ITR2M 99 T2 35 51 2M = 23 10 AA && +ITR1M 147 T1 33 37 4M = 5 -10 ACTG $$$$ +UNMP1 73 T1 21 40 3M * 0 5 GGG &&1 +UNMP2 141 * 0 0 * * 0 7 AA && +UNMP3 77 * 0 0 * * 0 5 GGG &&2 +A1 99 T1 25 35 6M = 31 8 ACTGTT ****** +A2 147 T1 31 33 6M = 25 -8 ACTGTT ()()() +A3 147 T2 23 47 2M1X = 12 -5 TTG ((( +A4 99 T2 12 50 3M = 23 5 GAA ()( +A5 355 T1 25 35 4M = 33 5 ACTG PPPP +B1 99 T1 25 35 6M = 31 8 GCTATT ****** +B3 147 T2 23 47 2M1X = 12 -5 TAG ((( +B4 99 T2 12 50 3M = 23 5 GAT ()( +B5 355 T1 25 35 4M = 33 5 AGTG PPPP diff --git a/src/htslib-1.21/samples/split.c b/src/htslib-1.21/samples/split.c new file mode 100644 index 0000000..c51dbd3 --- /dev/null +++ b/src/htslib-1.21/samples/split.c @@ -0,0 +1,153 @@ +/* split.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include + +/// print_usage - print the usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: split infile outdir\n\ +Splits the input file alignments to read1 and read2 and saves as 1.sam and 2.bam in given directory\n\ +Shows the basic writing of output\n"); + return; +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *inname = NULL, *outdir = NULL; + char *file1 = NULL, *file2 = NULL; + int c = 0, ret = EXIT_FAILURE, size = 0; + samFile *infile = NULL, *outfile1 = NULL, *outfile2 = NULL; + sam_hdr_t *in_samhdr = NULL; + bam1_t *bamdata = NULL; + + if (argc != 3) { + print_usage(stdout); + goto end; + } + inname = argv[1]; + outdir = argv[2]; + + //allocate space for output + size = sizeof(char) * (strlen(outdir) + sizeof("/1.sam") + 1); //space for output file name and null termination + file1 = malloc(size); + file2 = malloc(size); + if (!file1 || !file2) { + printf("Failed to set output path\n"); + goto end; + } + + //output file names + snprintf(file1, size, "%s/1.sam", outdir); //for SAM output + snprintf(file2, size, "%s/2.bam", outdir); //for BAM output + //bam data storage + if (!(bamdata = bam_init1())) { + printf("Failed to initialize bamdata\n"); + goto end; + } + //open input file - r reading + if (!(infile = sam_open(inname, "r"))) { + printf("Could not open %s\n", inname); + goto end; + } + //open output files - w write as SAM, wb write as BAM + outfile1 = sam_open(file1, "w"); //as SAM + outfile2 = sam_open(file2, "wb"); //as BAM + if (!outfile1 || !outfile2) { + printf("Could not open output file\n"); + goto end; + } + + //read header, required to resolve the target names to proper ids + if (!(in_samhdr = sam_hdr_read(infile))) { + printf("Failed to read header from file!\n"); + goto end; + } + //write header + if ((sam_hdr_write(outfile1, in_samhdr) == -1) || (sam_hdr_write(outfile2, in_samhdr) == -1)) { + printf("Failed to write header\n"); + goto end; + } + + //check flags and write + while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { + if (bamdata->core.flag & BAM_FREAD1) { + if (sam_write1(outfile1, in_samhdr, bamdata) < 0) { + printf("Failed to write output data\n"); + goto end; + } + } + else if (bamdata->core.flag & BAM_FREAD2) { + if (sam_write1(outfile2, in_samhdr, bamdata) < 0) { + printf("Failed to write output data\n"); + goto end; + } + } + } + if (-1 == c) { + //EOF + ret = EXIT_SUCCESS; + } + else { + printf("Error in reading data\n"); + } +end: + //cleanup + if (in_samhdr) { + sam_hdr_destroy(in_samhdr); + } + if (infile) { + sam_close(infile); + } + if (bamdata) { + bam_destroy1(bamdata); + } + if (file1) { + free(file1); + } + if (file2) { + free(file2); + } + if (outfile1) { + sam_close(outfile1); + } + if (outfile2) { + sam_close(outfile2); + } + return ret; +} diff --git a/src/htslib-1.21/samples/split2.c b/src/htslib-1.21/samples/split2.c new file mode 100644 index 0000000..33fabbd --- /dev/null +++ b/src/htslib-1.21/samples/split2.c @@ -0,0 +1,158 @@ +/* split2.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include + +/// print_usage - print the usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: split2 infile outdir\n\ +Splits the input file alignments to read1 and read2 and saves as 1.sam and 2.bam in given directory\n\ +Shows file type selection through name and format api\n"); + return; +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *inname = NULL, *outdir = NULL; + char *file1 = NULL, *file2 = NULL, mode1[5] = "w", mode2[5] = "w"; + int c = 0, ret = EXIT_FAILURE, size = 0; + samFile *infile = NULL, *outfile1 = NULL, *outfile2 = NULL; + sam_hdr_t *in_samhdr = NULL; + bam1_t *bamdata = NULL; + + if (argc != 3) { + print_usage(stdout); + goto end; + } + inname = argv[1]; + outdir = argv[2]; + + //allocate space for output + size = sizeof(char) * (strlen(outdir) + sizeof("/1.sam.gz") + 1); //space for output file name and null termination + file1 = malloc(size); + file2 = malloc(size); + if (!file1 || !file2) { + printf("Failed to set output path\n"); + goto end; + } + + //output file names + snprintf(file1, size, "%s/1.sam.gz", outdir); //name of Read1 file + snprintf(file2, size, "%s/2.sam", outdir); //name of Read2 file + //bam data storage + if (!(bamdata = bam_init1())) { + printf("Failed to initialize bamdata\n"); + goto end; + } + //set file open mode based on file name for 1st and as explicit for 2nd + if ((sam_open_mode(mode1+1, file1, NULL) == -1) || (sam_open_mode(mode2+1, file2, "sam.gz") == -1)) { + printf("Failed to set open mode\n"); + goto end; + } + //open input file + if (!(infile = sam_open(inname, "r"))) { + printf("Could not open %s\n", inname); + goto end; + } + //open output files + outfile1 = sam_open(file1, mode1); //as compressed SAM through sam_open + outfile2 = sam_open_format(file2, mode2, NULL); //as compressed SAM through sam_open_format + if (!outfile1 || !outfile2) { + printf("Could not open output file\n"); + goto end; + } + + //read header, required to resolve the target names to proper ids + if (!(in_samhdr = sam_hdr_read(infile))) { + printf("Failed to read header from file!\n"); + goto end; + } + //write header + if ((sam_hdr_write(outfile1, in_samhdr) == -1) || (sam_hdr_write(outfile2, in_samhdr) == -1)) { + printf("Failed to write header\n"); + goto end; + } + + //check flags and write + while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { + if (bamdata->core.flag & BAM_FREAD1) { + if (sam_write1(outfile1, in_samhdr, bamdata) < 0) { + printf("Failed to write output data\n"); + goto end; + } + } + else if (bamdata->core.flag & BAM_FREAD2) { + if (sam_write1(outfile2, in_samhdr, bamdata) < 0) { + printf("Failed to write output data\n"); + goto end; + } + } + } + if (-1 == c) { + //EOF + ret = EXIT_SUCCESS; + } + else { + printf("Error in reading data\n"); + } +end: + //cleanup + if (in_samhdr) { + sam_hdr_destroy(in_samhdr); + } + if (infile) { + sam_close(infile); + } + if (bamdata) { + bam_destroy1(bamdata); + } + if (file1) { + free(file1); + } + if (file2) { + free(file2); + } + if (outfile1) { + sam_close(outfile1); + } + if (outfile2) { + sam_close(outfile2); + } + return ret; +} diff --git a/src/htslib-1.21/samples/split_thread1.c b/src/htslib-1.21/samples/split_thread1.c new file mode 100644 index 0000000..551c7f0 --- /dev/null +++ b/src/htslib-1.21/samples/split_thread1.c @@ -0,0 +1,161 @@ +/* split_thread1.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include + +/// print_usage - print the usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: split_t1 infile outdir\n\ +Splits the input file alignments to read1 and read2 and saves as 1.sam and 2.bam in given directory\n\ +Shows the usage of basic thread in htslib\n"); + return; +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *inname = NULL, *outdir = NULL; + char *file1 = NULL, *file2 = NULL; + int c = 0, ret = EXIT_FAILURE, size = 0; + samFile *infile = NULL, *outfile1 = NULL, *outfile2 = NULL; + sam_hdr_t *in_samhdr = NULL; + bam1_t *bamdata = NULL; + + if (argc != 3) { + print_usage(stdout); + goto end; + } + inname = argv[1]; + outdir = argv[2]; + + //allocate space for output + size = sizeof(char) * (strlen(outdir) + sizeof("/1.sam") + 1); //space for output file name and null termination + file1 = malloc(size); + file2 = malloc(size); + if (!file1 || !file2) { + printf("Failed to set output path\n"); + goto end; + } + + //output file names + snprintf(file1, size, "%s/1.sam", outdir); //for SAM output + snprintf(file2, size, "%s/2.bam", outdir); //for BAM output + //bam data storage + if (!(bamdata = bam_init1())) { + printf("Failed to initialize bamdata\n"); + goto end; + } + //open input file - r reading + if (!(infile = sam_open(inname, "r"))) { + printf("Could not open %s\n", inname); + goto end; + } + //open output files - w write as SAM, wb write as BAM + outfile1 = sam_open(file1, "w"); //as SAM + outfile2 = sam_open(file2, "wb"); //as BAM + if (!outfile1 || !outfile2) { + printf("Could not open output file\n"); + goto end; + } + + //create file specific threads + if (hts_set_opt(infile, HTS_OPT_NTHREADS, 1) < 0 || //1 thread specific for reading + hts_set_opt(outfile1, HTS_OPT_NTHREADS, 1) < 0 || //1 thread specific for sam write + hts_set_opt(outfile2, HTS_OPT_NTHREADS, 2) < 0) { //2 thread specific for bam write + printf("Failed to set thread options\n"); + goto end; + } + + //read header, required to resolve the target names to proper ids + if (!(in_samhdr = sam_hdr_read(infile))) { + printf("Failed to read header from file!\n"); + goto end; + } + //write header + if ((sam_hdr_write(outfile1, in_samhdr) == -1) || (sam_hdr_write(outfile2, in_samhdr) == -1)) { + printf("Failed to write header\n"); + goto end; + } + + //check flags and write + while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { + if (bamdata->core.flag & BAM_FREAD1) { + if (sam_write1(outfile1, in_samhdr, bamdata) < 0) { + printf("Failed to write output data\n"); + goto end; + } + } + else if (bamdata->core.flag & BAM_FREAD2) { + if (sam_write1(outfile2, in_samhdr, bamdata) < 0) { + printf("Failed to write output data\n"); + goto end; + } + } + } + if (-1 == c) { + //EOF + ret = EXIT_SUCCESS; + } + else { + printf("Error in reading data\n"); + } +end: + //cleanup + if (in_samhdr) { + sam_hdr_destroy(in_samhdr); + } + if (infile) { + sam_close(infile); + } + if (bamdata) { + bam_destroy1(bamdata); + } + if (file1) { + free(file1); + } + if (file2) { + free(file2); + } + if (outfile1) { + sam_close(outfile1); + } + if (outfile2) { + sam_close(outfile2); + } + return ret; +} diff --git a/src/htslib-1.21/samples/split_thread2.c b/src/htslib-1.21/samples/split_thread2.c new file mode 100644 index 0000000..dc8bc9f --- /dev/null +++ b/src/htslib-1.21/samples/split_thread2.c @@ -0,0 +1,171 @@ +/* split_thread2.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include +#include + +/// print_usage - print the usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: split_t2 infile outdir\n\ +Splits the input file alignments to read1 and read2 and saves as 1.sam and 2.bam in given directory\n\ +Shows the usage of thread pool\n"); + return; +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *inname = NULL, *outdir = NULL; + char *file1 = NULL, *file2 = NULL; + int c = 0, ret = EXIT_FAILURE, size = 0; + samFile *infile = NULL, *outfile1 = NULL, *outfile2 = NULL; + sam_hdr_t *in_samhdr = NULL; + bam1_t *bamdata = NULL; + htsThreadPool tpool = {NULL, 0}; + + if (argc != 3) { + print_usage(stdout); + goto end; + } + inname = argv[1]; + outdir = argv[2]; + + //allocate space for output + size = sizeof(char) * (strlen(outdir) + sizeof("/1.sam") + 1); //space for output file name and null termination + file1 = malloc(size); + file2 = malloc(size); + if (!file1 || !file2) { + printf("Failed to set output path\n"); + goto end; + } + + //output file names + snprintf(file1, size, "%s/1.sam", outdir); //for SAM output + snprintf(file2, size, "%s/2.bam", outdir); //for BAM output + //bam data storage + if (!(bamdata = bam_init1())) { + printf("Failed to initialize bamdata\n"); + goto end; + } + //open input file - r reading + if (!(infile = sam_open(inname, "r"))) { + printf("Could not open %s\n", inname); + goto end; + } + //open output files - w write as SAM, wb write as BAM + outfile1 = sam_open(file1, "w"); //as SAM + outfile2 = sam_open(file2, "wb"); //as BAM + if (!outfile1 || !outfile2) { + printf("Could not open output file\n"); + goto end; + } + + //create a pool of 4 threads + if (!(tpool.pool = hts_tpool_init(4))) { + printf("Failed to initialize the thread pool\n"); + goto end; + } + //share the pool with all the 3 files + if (hts_set_opt(infile, HTS_OPT_THREAD_POOL, &tpool) < 0 || + hts_set_opt(outfile1, HTS_OPT_THREAD_POOL, &tpool) < 0 || + hts_set_opt(outfile2, HTS_OPT_THREAD_POOL, &tpool) < 0) { + printf("Failed to set thread options\n"); + goto end; + } + + //read header, required to resolve the target names to proper ids + if (!(in_samhdr = sam_hdr_read(infile))) { + printf("Failed to read header from file!\n"); + goto end; + } + //write header + if ((sam_hdr_write(outfile1, in_samhdr) == -1) || (sam_hdr_write(outfile2, in_samhdr) == -1)) { + printf("Failed to write header\n"); + goto end; + } + + //check flags and write + while ((c = sam_read1(infile, in_samhdr, bamdata)) >= 0) { + if (bamdata->core.flag & BAM_FREAD1) { + if (sam_write1(outfile1, in_samhdr, bamdata) < 0) { + printf("Failed to write output data\n"); + goto end; + } + } + else if (bamdata->core.flag & BAM_FREAD2) { + if (sam_write1(outfile2, in_samhdr, bamdata) < 0) { + printf("Failed to write output data\n"); + goto end; + } + } + } + if (-1 == c) { + //EOF + ret = EXIT_SUCCESS; + } + else { + printf("Error in reading data\n"); + } +end: + //cleanup + if (in_samhdr) { + sam_hdr_destroy(in_samhdr); + } + if (infile) { + sam_close(infile); + } + if (bamdata) { + bam_destroy1(bamdata); + } + if (file1) { + free(file1); + } + if (file2) { + free(file2); + } + if (outfile1) { + sam_close(outfile1); + } + if (outfile2) { + sam_close(outfile2); + } + if (tpool.pool) { + hts_tpool_destroy(tpool.pool); + } + return ret; +} diff --git a/src/htslib-1.21/samples/update_header.c b/src/htslib-1.21/samples/update_header.c new file mode 100644 index 0000000..237d5c4 --- /dev/null +++ b/src/htslib-1.21/samples/update_header.c @@ -0,0 +1,131 @@ +/* update_header.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include + +/// print_usage - print the usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: update_header infile header idval tag value\n\ +Updates the tag's value on line given in id on header of given type\n"); + return; +} + +/// main_demo - start of the demo +/** @param argc - count of arguments + * @param argv - pointer to array of arguments +returns 1 on failure 0 on success +*/ +int main(int argc, char *argv[]) +{ + const char *inname = NULL, *tag = NULL, *idval = NULL, *val = NULL, *header = NULL; + char *id = NULL; + int ret = EXIT_FAILURE; + samFile *infile = NULL, *outfile = NULL; + sam_hdr_t *in_samhdr = NULL; + + //update_header infile header idval tag value + if (argc != 6) { + print_usage(stderr); + goto end; + } + inname = argv[1]; + header = argv[2]; + idval = argv[3]; + tag = argv[4]; + val = argv[5]; + + //unique identifier for each of the header types + if (header[0] == 'H' && header[1] == 'D') { + id = NULL; + printf("This sample doesnt not support modifying HD fields\n"); + } + else if (header[0] == 'S' && header[1] == 'Q') { + id = "SN"; + } + else if (header[0] == 'R' && header[1] == 'G') { + id = "ID"; + } + else if (header[0] == 'P' && header[1] == 'G') { + id = "ID"; + } + else if (header[0] == 'C' && header[1] == 'O') { + tag = NULL; + id = ""; + printf("This sample doesnt not support modifying CO fields\n"); + } + else { + printf("Invalid header type\n"); + goto end; + } + + if (!(infile = sam_open(inname, "r"))) { + printf("Could not open %s\n", inname); + goto end; + } + if (!(outfile = sam_open("-", "w"))) { //use stdout as the output file for ease of display of update + printf("Could not open stdout\n"); + goto end; + } + + //read header + if (!(in_samhdr = sam_hdr_read(infile))) { + printf("Failed to read header from file!\n"); + goto end; + } + + //update with new data + if (sam_hdr_update_line(in_samhdr, header, id, idval, tag, val, NULL) < 0) { + printf("Failed to update data\n"); + goto end; + } + //write output + if (sam_hdr_write(outfile, in_samhdr) < 0) { + printf("Failed to write output\n"); + goto end; + } + ret = EXIT_SUCCESS; + //bam data write to follow.... +end: + //cleanup + if (in_samhdr) { + sam_hdr_destroy(in_samhdr); + } + if (infile) { + sam_close(infile); + } + if (outfile) { + sam_close(outfile); + } + return ret; +} diff --git a/src/htslib-1.21/samples/write_fast.c b/src/htslib-1.21/samples/write_fast.c new file mode 100644 index 0000000..95d919f --- /dev/null +++ b/src/htslib-1.21/samples/write_fast.c @@ -0,0 +1,116 @@ +/* write_fast.c -- showcases the htslib api usage + + Copyright (C) 2023 Genome Research Ltd. + + Author: Vasudeva Sarma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE + +*/ + +/* The purpose of this code is to demonstrate the library apis and need proper error handling and optimisation */ + +#include +#include +#include +#include +#include + +/// print_usage - show usage +/** @param fp pointer to the file / terminal to which usage to be dumped +returns nothing +*/ +static void print_usage(FILE *fp) +{ + fprintf(fp, "Usage: write_fast [ 4 || argc < 3) { + print_usage(stdout); + goto end; + } + outname = argv[1]; + data = argv[2]; + if (argc == 4) { //fastq data + qual = argv[3]; + if (strlen(data) != strlen(qual)) { //check for proper length of data and quality values + printf("Incorrect reference and quality data\n"); + goto end; + } + } + + //initialize + if (!(bamdata = bam_init1())) { + printf("Failed to initialize bamdata\n"); + goto end; + } + if (sam_open_mode(mode + 1, outname, NULL) < 0) { + printf("Invalid file name\n"); + goto end; + } + //open output file + if (!(outfile = sam_open(outname, mode))) { //expects the name to have correct extension! + printf("Could not open %s\n", outname); + goto end; + } + /* if the file name extension is not appropriate to the content, inconsistent data will be present in output. + if required, htsFormat and sam_open_format can be explicitly used to ensure appropriateness of content. + htsFormat fmt = {sequence_data, fastq_format / fasta_format}; + sam_open_format(outname, mode, fmt); + */ + + snprintf(name, sizeof(name), "Test_%ld", (long) time(NULL)); + //data + if (bam_set1(bamdata, strlen(name), name, BAM_FUNMAP, -1, -1, 0, 0, NULL, -1, -1, 0, strlen(data), data, qual, 0) < 0) { + printf("Failed to set data\n"); + goto end; + } + //as we write only FASTA/FASTQ, we can get away without providing headers + if (sam_write1(outfile, NULL, bamdata) < 0) { + printf("Failed to write data\n"); + goto end; + } + ret = EXIT_SUCCESS; +end: + //clean up + if (outfile) { + sam_close(outfile); + } + if (bamdata) { + bam_destroy1(bamdata); + } + return ret; +} diff --git a/src/htslib-1.21/simd.c b/src/htslib-1.21/simd.c new file mode 100644 index 0000000..865dd88 --- /dev/null +++ b/src/htslib-1.21/simd.c @@ -0,0 +1,222 @@ +/* simd.c -- SIMD optimised versions of various internal functions. + + Copyright (C) 2024 Genome Research Ltd. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h +#include + +// These must be defined before the first system include to ensure that legacy +// BSD types needed by remain defined when _XOPEN_SOURCE is set. +#if defined __APPLE__ +#define _DARWIN_C_SOURCE +#elif defined __NetBSD__ +#define _NETBSD_SOURCE +#endif + +#include "htslib/sam.h" +#include "sam_internal.h" + +#if defined __x86_64__ +#include +#elif defined __ARM_NEON +#include +#endif + +#if defined __arm__ || defined __aarch64__ + +#if defined __linux__ || defined __FreeBSD__ +#include +#elif defined __APPLE__ +#include +#include +#elif defined __NetBSD__ +#include +#include +#include +#ifdef __aarch64__ +#include +#else +#include +#endif +#elif defined _WIN32 +#include +#endif + +static inline int cpu_supports_neon(void) { +#if defined __linux__ && defined __arm__ && defined HWCAP_NEON + return (getauxval(AT_HWCAP) & HWCAP_NEON) != 0; +#elif defined __linux__ && defined __arm__ && defined HWCAP_ARM_NEON + return (getauxval(AT_HWCAP) & HWCAP_ARM_NEON) != 0; +#elif defined __linux__ && defined __aarch64__ && defined HWCAP_ASIMD + return (getauxval(AT_HWCAP) & HWCAP_ASIMD) != 0; +#elif defined __APPLE__ && defined __aarch64__ + int32_t ctl; + size_t ctlsize = sizeof ctl; + if (sysctlbyname("hw.optional.AdvSIMD", &ctl, &ctlsize, NULL, 0) != 0) return 0; + if (ctlsize != sizeof ctl) return 0; + return ctl; +#elif defined __FreeBSD__ && defined __arm__ && defined HWCAP_NEON + unsigned long cap; + if (elf_aux_info(AT_HWCAP, &cap, sizeof cap) != 0) return 0; + return (cap & HWCAP_NEON) != 0; +#elif defined __FreeBSD__ && defined __aarch64__ && defined HWCAP_ASIMD + unsigned long cap; + if (elf_aux_info(AT_HWCAP, &cap, sizeof cap) != 0) return 0; + return (cap & HWCAP_ASIMD) != 0; +#elif defined __NetBSD__ && defined __arm__ && defined ARM_MVFR0_ASIMD_MASK + uint32_t buf[16]; + size_t buflen = sizeof buf; + if (sysctlbyname("machdep.id_mvfr", buf, &buflen, NULL, 0) != 0) return 0; + if (buflen < sizeof(uint32_t)) return 0; + return (buf[0] & ARM_MVFR0_ASIMD_MASK) == 0x00000002; +#elif defined __NetBSD__ && defined __aarch64__ && defined ID_AA64PFR0_EL1_ADVSIMD + struct aarch64_sysctl_cpu_id buf; + size_t buflen = sizeof buf; + if (sysctlbyname("machdep.cpu0.cpu_id", &buf, &buflen, NULL, 0) != 0) return 0; + if (buflen < offsetof(struct aarch64_sysctl_cpu_id, ac_aa64pfr0) + sizeof(uint64_t)) return 0; + return (buf.ac_aa64pfr0 & ID_AA64PFR0_EL1_ADVSIMD & 0x00e00000) == 0; +#elif defined _WIN32 + return IsProcessorFeaturePresent(PF_ARM_V8_INSTRUCTIONS_AVAILABLE) != 0; +#else + return 0; +#endif +} + +#endif + +#ifdef BUILDING_SIMD_NIBBLE2BASE + +void (*htslib_nibble2base)(uint8_t *nib, char *seq, int len) = nibble2base_default; + +#if defined __x86_64__ + +/* + * Convert a nibble encoded BAM sequence to a string of bases. + * + * Using SSSE3 instructions, 16 codepoints that hold 2 bases each can be + * unpacked into 32 indexes from 0-15. Using the pshufb instruction these can + * be converted to the IUPAC characters. + * It falls back on the nibble2base_default function for the remainder. + */ + +__attribute__((target("ssse3"))) +static void nibble2base_ssse3(uint8_t *nib, char *seq, int len) { + const char *seq_end_ptr = seq + len; + char *seq_cursor = seq; + uint8_t *nibble_cursor = nib; + const char *seq_vec_end_ptr = seq_end_ptr - (2 * sizeof(__m128i) - 1); + __m128i nuc_lookup_vec = _mm_lddqu_si128((__m128i *)seq_nt16_str); + /* Nucleotides are encoded 4-bits per nucleotide and stored in 8-bit bytes + as follows: |AB|CD|EF|GH|. The 4-bit codes (going from 0-15) can be used + together with the pshufb instruction as a lookup table. The most efficient + way is to use bitwise AND and shift to create two vectors. One with all + the upper codes (|A|C|E|G|) and one with the lower codes (|B|D|F|H|). + The lookup can then be performed and the resulting vectors can be + interleaved again using the unpack instructions. */ + while (seq_cursor < seq_vec_end_ptr) { + __m128i encoded = _mm_lddqu_si128((__m128i *)nibble_cursor); + __m128i encoded_upper = _mm_srli_epi64(encoded, 4); + encoded_upper = _mm_and_si128(encoded_upper, _mm_set1_epi8(15)); + __m128i encoded_lower = _mm_and_si128(encoded, _mm_set1_epi8(15)); + __m128i nucs_upper = _mm_shuffle_epi8(nuc_lookup_vec, encoded_upper); + __m128i nucs_lower = _mm_shuffle_epi8(nuc_lookup_vec, encoded_lower); + __m128i first_nucleotides = _mm_unpacklo_epi8(nucs_upper, nucs_lower); + __m128i second_nucleotides = _mm_unpackhi_epi8(nucs_upper, nucs_lower); + _mm_storeu_si128((__m128i *)seq_cursor, first_nucleotides); + _mm_storeu_si128((__m128i *)(seq_cursor + sizeof(__m128i)), + second_nucleotides); + nibble_cursor += sizeof(__m128i); + seq_cursor += 2 * sizeof(__m128i); + } + nibble2base_default(nibble_cursor, seq_cursor, seq_end_ptr - seq_cursor); +} + +__attribute__((constructor)) +static void nibble2base_resolve(void) { + if (__builtin_cpu_supports("ssse3")) { + htslib_nibble2base = nibble2base_ssse3; + } +} + +#elif defined __ARM_NEON + +static void nibble2base_neon(uint8_t *nib, char *seq0, int len) { + uint8x16_t low_nibbles_mask = vdupq_n_u8(0x0f); + uint8x16_t nuc_lookup_vec = vld1q_u8((const uint8_t *) seq_nt16_str); +#ifndef __aarch64__ + uint8x8x2_t nuc_lookup_vec2 = {{ vget_low_u8(nuc_lookup_vec), vget_high_u8(nuc_lookup_vec) }}; +#endif + + uint8_t *seq = (uint8_t *) seq0; + int blocks; + + for (blocks = len / 32; blocks > 0; --blocks) { + uint8x16_t encoded = vld1q_u8(nib); + nib += 16; + + /* Translate the high and low nibbles to nucleotide letters separately, + then interleave them back together via vzipq for writing. */ + + uint8x16_t high_nibbles = vshrq_n_u8(encoded, 4); + uint8x16_t low_nibbles = vandq_u8(encoded, low_nibbles_mask); + +#ifdef __aarch64__ + uint8x16_t high_nucleotides = vqtbl1q_u8(nuc_lookup_vec, high_nibbles); + uint8x16_t low_nucleotides = vqtbl1q_u8(nuc_lookup_vec, low_nibbles); +#else + uint8x8_t high_low = vtbl2_u8(nuc_lookup_vec2, vget_low_u8(high_nibbles)); + uint8x8_t high_high = vtbl2_u8(nuc_lookup_vec2, vget_high_u8(high_nibbles)); + uint8x16_t high_nucleotides = vcombine_u8(high_low, high_high); + + uint8x8_t low_low = vtbl2_u8(nuc_lookup_vec2, vget_low_u8(low_nibbles)); + uint8x8_t low_high = vtbl2_u8(nuc_lookup_vec2, vget_high_u8(low_nibbles)); + uint8x16_t low_nucleotides = vcombine_u8(low_low, low_high); +#endif + +#ifdef __aarch64__ + vst1q_u8_x2(seq, vzipq_u8(high_nucleotides, low_nucleotides)); +#else + // Avoid vst1q_u8_x2 as GCC erroneously omits it on 32-bit ARM + uint8x16x2_t nucleotides = {{ high_nucleotides, low_nucleotides }}; + vst2q_u8(seq, nucleotides); +#endif + seq += 32; + } + + if (len % 32 != 0) + nibble2base_default(nib, (char *) seq, len % 32); +} + +static __attribute__((constructor)) void nibble2base_resolve(void) { + if (cpu_supports_neon()) htslib_nibble2base = nibble2base_neon; +} + +#endif + +#endif // BUILDING_SIMD_NIBBLE2BASE + +// Potentially useful diagnostic, and prevents "empty translation unit" errors +const char htslib_simd[] = + "SIMD functions present:" +#ifdef BUILDING_SIMD_NIBBLE2BASE + " nibble2base" +#endif + "."; diff --git a/src/htslib-1.21/synced_bcf_reader.c b/src/htslib-1.21/synced_bcf_reader.c new file mode 100644 index 0000000..1835ea2 --- /dev/null +++ b/src/htslib-1.21/synced_bcf_reader.c @@ -0,0 +1,1520 @@ +/* synced_bcf_reader.c -- stream through multiple VCF files. + + Copyright (C) 2012-2023 Genome Research Ltd. + + Author: Petr Danecek + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "htslib/synced_bcf_reader.h" +#include "htslib/kseq.h" +#include "htslib/khash_str2int.h" +#include "htslib/bgzf.h" +#include "htslib/thread_pool.h" +#include "bcf_sr_sort.h" + +#define REQUIRE_IDX_ 1 +#define ALLOW_NO_IDX_ 2 + +// Maximum indexable coordinate of .csi, for default min_shift of 14. +// This comes out to about 17 Tbp. Limiting factor is the bin number, +// which is a uint32_t in CSI. The highest number of levels compatible +// with this is 10 (needs 31 bits). +#define MAX_CSI_COOR ((1LL << (14 + 30)) - 1) + +typedef struct +{ + hts_pos_t start, end; // records are marked for skipping have start>end +} +region1_t; + +typedef struct bcf_sr_region_t +{ + region1_t *regs; // regions will sorted and merged, redundant records marked for skipping have start>end + int nregs, mregs, creg; // creg: the current active region +} +region_t; + +#define BCF_SR_AUX(x) ((aux_t*)((x)->aux)) +typedef struct +{ + sr_sort_t sort; + int regions_overlap, targets_overlap; +} +aux_t; + +static bcf_sr_regions_t *bcf_sr_regions_alloc(void); +static int _regions_add(bcf_sr_regions_t *reg, const char *chr, hts_pos_t start, hts_pos_t end); +static bcf_sr_regions_t *_regions_init_string(const char *str); +static int _regions_match_alleles(bcf_sr_regions_t *reg, int als_idx, bcf1_t *rec); +static void _regions_sort_and_merge(bcf_sr_regions_t *reg); +static int _bcf_sr_regions_overlap(bcf_sr_regions_t *reg, const char *seq, hts_pos_t start, hts_pos_t end, int missed_reg_handler); +static void bcf_sr_seek_start(bcf_srs_t *readers); + +char *bcf_sr_strerror(int errnum) +{ + switch (errnum) + { + case open_failed: + return strerror(errno); + case not_bgzf: + return "not compressed with bgzip"; + case idx_load_failed: + return "could not load index"; + case file_type_error: + return "unknown file type"; + case api_usage_error: + return "API usage error"; + case header_error: + return "could not parse header"; + case no_eof: + return "no BGZF EOF marker; file may be truncated"; + case no_memory: + return "Out of memory"; + case vcf_parse_error: + return "VCF parse error"; + case bcf_read_error: + return "BCF read error"; + case noidx_error: + return "merge of unindexed files failed"; + default: return ""; + } +} + +int bcf_sr_set_opt(bcf_srs_t *readers, bcf_sr_opt_t opt, ...) +{ + va_list args; + switch (opt) + { + case BCF_SR_REQUIRE_IDX: + readers->require_index = REQUIRE_IDX_; + return 0; + + case BCF_SR_ALLOW_NO_IDX: + readers->require_index = ALLOW_NO_IDX_; + return 0; + + case BCF_SR_PAIR_LOGIC: + va_start(args, opt); + BCF_SR_AUX(readers)->sort.pair = va_arg(args, int); + return 0; + + case BCF_SR_REGIONS_OVERLAP: + va_start(args, opt); + BCF_SR_AUX(readers)->regions_overlap = va_arg(args, int); + if ( readers->regions ) readers->regions->overlap = BCF_SR_AUX(readers)->regions_overlap; + return 0; + + case BCF_SR_TARGETS_OVERLAP: + va_start(args, opt); + BCF_SR_AUX(readers)->targets_overlap = va_arg(args, int); + if ( readers->targets ) readers->targets->overlap = BCF_SR_AUX(readers)->targets_overlap; + return 0; + + default: + break; + } + return 1; +} + +static int *init_filters(bcf_hdr_t *hdr, const char *filters, int *nfilters) +{ + kstring_t str = {0,0,0}; + const char *tmp = filters, *prev = filters; + int nout = 0, *out = NULL; + while ( 1 ) + { + if ( *tmp==',' || !*tmp ) + { + int *otmp = (int*) realloc(out, (nout+1)*sizeof(int)); + if (!otmp) + goto err; + out = otmp; + if ( tmp-prev==1 && *prev=='.' ) + { + out[nout] = -1; + nout++; + } + else + { + str.l = 0; + kputsn(prev, tmp-prev, &str); + out[nout] = bcf_hdr_id2int(hdr, BCF_DT_ID, str.s); + if ( out[nout]>=0 ) nout++; + } + if ( !*tmp ) break; + prev = tmp+1; + } + tmp++; + } + if ( str.m ) free(str.s); + *nfilters = nout; + return out; + + err: + if (str.m) free(str.s); + free(out); + return NULL; +} + +int bcf_sr_set_regions(bcf_srs_t *readers, const char *regions, int is_file) +{ + if ( readers->nreaders || readers->regions ) + { + if ( readers->regions ) bcf_sr_regions_destroy(readers->regions); + readers->regions = bcf_sr_regions_init(regions,is_file,0,1,-2); + bcf_sr_seek_start(readers); + return 0; + } + + readers->regions = bcf_sr_regions_init(regions,is_file,0,1,-2); + if ( !readers->regions ) return -1; + readers->explicit_regs = 1; + readers->require_index = REQUIRE_IDX_; + readers->regions->overlap = BCF_SR_AUX(readers)->regions_overlap; + return 0; +} + +int bcf_sr_set_targets(bcf_srs_t *readers, const char *targets, int is_file, int alleles) +{ + if ( readers->nreaders || readers->targets ) + { + hts_log_error("Must call bcf_sr_set_targets() before bcf_sr_add_reader()"); + return -1; + } + if ( targets[0]=='^' ) + { + readers->targets_exclude = 1; + targets++; + } + readers->targets = bcf_sr_regions_init(targets,is_file,0,1,-2); + if ( !readers->targets ) return -1; + readers->targets_als = alleles; + readers->targets->overlap = BCF_SR_AUX(readers)->targets_overlap; + return 0; +} + +int bcf_sr_set_threads(bcf_srs_t *files, int n_threads) +{ + if (!(files->n_threads = n_threads)) + return 0; + + files->p = calloc(1, sizeof(*files->p)); + if (!files->p) { + files->errnum = no_memory; + return -1; + } + if (!(files->p->pool = hts_tpool_init(n_threads))) + return -1; + + return 0; +} + +void bcf_sr_destroy_threads(bcf_srs_t *files) { + if (!files->p) + return; + + if (files->p->pool) + hts_tpool_destroy(files->p->pool); + free(files->p); +} + +int bcf_sr_add_reader(bcf_srs_t *files, const char *fname) +{ + char fmode[5]; + strcpy(fmode, "r"); + vcf_open_mode(fmode+1, fname, NULL); + htsFile* file_ptr = hts_open(fname, fmode); + if ( ! file_ptr ) { + files->errnum = open_failed; + return 0; + } + + files->has_line = (int*) realloc(files->has_line, sizeof(int)*(files->nreaders+1)); + files->has_line[files->nreaders] = 0; + files->readers = (bcf_sr_t*) realloc(files->readers, sizeof(bcf_sr_t)*(files->nreaders+1)); + bcf_sr_t *reader = &files->readers[files->nreaders++]; + memset(reader,0,sizeof(bcf_sr_t)); + + reader->file = file_ptr; + + files->errnum = 0; + + if ( reader->file->format.compression==bgzf ) + { + BGZF *bgzf = hts_get_bgzfp(reader->file); + if ( bgzf && bgzf_check_EOF(bgzf) == 0 ) { + files->errnum = no_eof; + hts_log_warning("No BGZF EOF marker; file '%s' may be truncated", fname); + } + if (files->p) + bgzf_thread_pool(bgzf, files->p->pool, files->p->qsize); + } + + if ( files->require_index==REQUIRE_IDX_ ) + { + if ( reader->file->format.format==vcf ) + { + if ( reader->file->format.compression!=bgzf ) + { + files->errnum = not_bgzf; + return 0; + } + + reader->tbx_idx = tbx_index_load(fname); + if ( !reader->tbx_idx ) + { + files->errnum = idx_load_failed; + return 0; + } + + reader->header = bcf_hdr_read(reader->file); + } + else if ( reader->file->format.format==bcf ) + { + if ( reader->file->format.compression!=bgzf ) + { + files->errnum = not_bgzf; + return 0; + } + + reader->header = bcf_hdr_read(reader->file); + + reader->bcf_idx = bcf_index_load(fname); + if ( !reader->bcf_idx ) + { + files->errnum = idx_load_failed; + return 0; + } + } + else + { + files->errnum = file_type_error; + return 0; + } + } + else + { + if ( reader->file->format.format==bcf || reader->file->format.format==vcf ) + { + reader->header = bcf_hdr_read(reader->file); + } + else + { + files->errnum = file_type_error; + return 0; + } + files->streaming = 1; + } + if ( files->streaming && files->nreaders>1 ) + { + static int no_index_warned = 0; + if ( files->require_index==ALLOW_NO_IDX_ && !no_index_warned ) + { + hts_log_warning("Using multiple unindexed files may produce errors, make sure chromosomes are in the same order!"); + no_index_warned = 1; + } + if ( files->require_index!=ALLOW_NO_IDX_ ) + { + files->errnum = api_usage_error; + hts_log_error("Must set require_index when the number of readers is greater than one"); + return 0; + } + } + if ( files->streaming && files->regions ) + { + files->errnum = api_usage_error; + hts_log_error("Cannot tabix-jump in streaming mode"); + return 0; + } + if ( !reader->header ) + { + files->errnum = header_error; + return 0; + } + + reader->fname = strdup(fname); + if ( files->apply_filters ) + reader->filter_ids = init_filters(reader->header, files->apply_filters, &reader->nfilter_ids); + + // Update list of chromosomes + if ( !files->explicit_regs && !files->streaming ) + { + int n = 0, i; + const char **names; + + if ( !files->regions ) + { + files->regions = bcf_sr_regions_alloc(); + if ( !files->regions ) + { + hts_log_error("Cannot allocate regions data structure"); + return 0; + } + } + + names = reader->tbx_idx ? tbx_seqnames(reader->tbx_idx, &n) : bcf_hdr_seqnames(reader->header, &n); + for (i=0; iregions, names[i], -1, -1); + } + free(names); + _regions_sort_and_merge(files->regions); + } + + if ( files->require_index==ALLOW_NO_IDX_ && files->nreaders > 1 ) + { + bcf_hdr_t *hdr0 = files->readers[0].header; + bcf_hdr_t *hdr1 = reader->header; + if ( hdr0->n[BCF_DT_CTG]!=hdr1->n[BCF_DT_CTG] ) + { + files->errnum = noidx_error; + hts_log_error("Different number of sequences in the header, refusing to stream multiple unindexed files"); + return 0; + } + int i; + for (i=0; in[BCF_DT_CTG]; i++) + { + if ( strcmp(bcf_hdr_id2name(hdr0,i),bcf_hdr_id2name(hdr1,i)) ) + { + files->errnum = noidx_error; + hts_log_error("Sequences in the header appear in different order, refusing to stream multiple unindexed files"); + return 0; + } + } + } + + return 1; +} + +bcf_srs_t *bcf_sr_init(void) +{ + bcf_srs_t *files = (bcf_srs_t*) calloc(1,sizeof(bcf_srs_t)); + files->aux = (aux_t*) calloc(1,sizeof(aux_t)); + bcf_sr_sort_init(&BCF_SR_AUX(files)->sort); + bcf_sr_set_opt(files,BCF_SR_REGIONS_OVERLAP,1); + bcf_sr_set_opt(files,BCF_SR_TARGETS_OVERLAP,0); + return files; +} + +static void bcf_sr_destroy1(bcf_sr_t *reader) +{ + free(reader->fname); + if ( reader->tbx_idx ) tbx_destroy(reader->tbx_idx); + if ( reader->bcf_idx ) hts_idx_destroy(reader->bcf_idx); + bcf_hdr_destroy(reader->header); + hts_close(reader->file); + if ( reader->itr ) tbx_itr_destroy(reader->itr); + int j; + for (j=0; jmbuffer; j++) + bcf_destroy1(reader->buffer[j]); + free(reader->buffer); + free(reader->samples); + free(reader->filter_ids); +} + +void bcf_sr_destroy(bcf_srs_t *files) +{ + int i; + for (i=0; inreaders; i++) + bcf_sr_destroy1(&files->readers[i]); + free(files->has_line); + free(files->readers); + for (i=0; in_smpl; i++) free(files->samples[i]); + free(files->samples); + if (files->targets) bcf_sr_regions_destroy(files->targets); + if (files->regions) bcf_sr_regions_destroy(files->regions); + if (files->tmps.m) free(files->tmps.s); + if (files->n_threads) bcf_sr_destroy_threads(files); + bcf_sr_sort_destroy(&BCF_SR_AUX(files)->sort); + free(files->aux); + free(files); +} + +void bcf_sr_remove_reader(bcf_srs_t *files, int i) +{ + assert( !files->samples ); // not ready for this yet + bcf_sr_sort_remove_reader(files, &BCF_SR_AUX(files)->sort, i); + bcf_sr_destroy1(&files->readers[i]); + if ( i+1 < files->nreaders ) + { + memmove(&files->readers[i], &files->readers[i+1], (files->nreaders-i-1)*sizeof(bcf_sr_t)); + memmove(&files->has_line[i], &files->has_line[i+1], (files->nreaders-i-1)*sizeof(int)); + } + files->nreaders--; +} + +#if DEBUG_SYNCED_READER +void debug_buffer(FILE *fp, bcf_sr_t *reader) +{ + int j; + for (j=0; j<=reader->nbuffer; j++) + { + bcf1_t *line = reader->buffer[j]; + fprintf(fp,"\t%p\t%s%s\t%s:%"PRIhts_pos"\t%s ", (void*)line,reader->fname,j==0?"*":" ",reader->header->id[BCF_DT_CTG][line->rid].key,line->pos+1,line->n_allele?line->d.allele[0]:""); + int k; + for (k=1; kn_allele; k++) fprintf(fp," %s", line->d.allele[k]); + fprintf(fp,"\n"); + } +} + +void debug_buffers(FILE *fp, bcf_srs_t *files) +{ + int i; + for (i=0; inreaders; i++) + { + fprintf(fp, "has_line: %d\t%s\n", bcf_sr_has_line(files,i),files->readers[i].fname); + debug_buffer(fp, &files->readers[i]); + } + fprintf(fp,"\n"); +} +#endif + +static inline int has_filter(bcf_sr_t *reader, bcf1_t *line) +{ + int i, j; + if ( !line->d.n_flt ) + { + for (j=0; jnfilter_ids; j++) + if ( reader->filter_ids[j]<0 ) return 1; + return 0; + } + for (i=0; id.n_flt; i++) + { + for (j=0; jnfilter_ids; j++) + if ( line->d.flt[i]==reader->filter_ids[j] ) return 1; + } + return 0; +} + +static int _reader_seek(bcf_sr_t *reader, const char *seq, hts_pos_t start, hts_pos_t end) +{ + if ( end>=MAX_CSI_COOR ) + { + hts_log_error("The coordinate is out of csi index limit: %"PRIhts_pos, end+1); + exit(1); + } + if ( reader->itr ) + { + hts_itr_destroy(reader->itr); + reader->itr = NULL; + } + reader->nbuffer = 0; + if ( reader->tbx_idx ) + { + int tid = tbx_name2id(reader->tbx_idx, seq); + if ( tid==-1 ) return -1; // the sequence not present in this file + reader->itr = tbx_itr_queryi(reader->tbx_idx,tid,start,end+1); + } + else + { + int tid = bcf_hdr_name2id(reader->header, seq); + if ( tid==-1 ) return -1; // the sequence not present in this file + reader->itr = bcf_itr_queryi(reader->bcf_idx,tid,start,end+1); + } + if (!reader->itr) { + hts_log_error("Could not seek: %s:%"PRIhts_pos"-%"PRIhts_pos, seq, start + 1, end + 1); + abort(); + } + return 0; +} + +/* + * _readers_next_region() - jumps to next region if necessary + * Returns 0 on success or -1 when there are no more regions left + */ +static int _readers_next_region(bcf_srs_t *files) +{ + // Need to open new chromosome? Check number of lines in all readers' buffers + int i, eos = 0; + for (i=0; inreaders; i++) + if ( !files->readers[i].itr && !files->readers[i].nbuffer ) eos++; + + if ( eos!=files->nreaders ) + { + // Some of the readers still has buffered lines + return 0; + } + + // No lines in the buffer, need to open new region or quit. + int prev_iseq = files->regions->iseq; + hts_pos_t prev_end = files->regions->end; + if ( bcf_sr_regions_next(files->regions)<0 ) return -1; + files->regions->prev_end = prev_iseq==files->regions->iseq ? prev_end : -1; + + for (i=0; inreaders; i++) + _reader_seek(&files->readers[i],files->regions->seq_names[files->regions->iseq],files->regions->start,files->regions->end); + + return 0; +} + +static void _set_variant_boundaries(bcf1_t *rec, hts_pos_t *beg, hts_pos_t *end) +{ + hts_pos_t off; + if ( rec->n_allele ) + { + off = rec->rlen; + bcf_unpack(rec, BCF_UN_STR); + int i; + for (i=1; in_allele; i++) + { + // Make symbolic alleles start at POS, although this is not strictly true for + // , where POS should be the position BEFORE the deletion/insertion. + // However, since arbitrary symbolic alleles can be defined by the user, we + // will simplify the interpretation of --targets-overlap and --region-overlap. + int j = 0; + char *ref = rec->d.allele[0]; + char *alt = rec->d.allele[i]; + while ( ref[j] && alt[j] && ref[j]==alt[j] ) j++; + if ( off > j ) off = j; + if ( !off ) break; + } + } + else + off = 0; + + *beg = rec->pos + off; + *end = rec->pos + rec->rlen - 1; +} + +/* + * _reader_fill_buffer() - buffers all records with the same coordinate + */ +static int _reader_fill_buffer(bcf_srs_t *files, bcf_sr_t *reader) +{ + // Return if the buffer is full: the coordinate of the last buffered record differs + if ( reader->nbuffer && reader->buffer[reader->nbuffer]->pos != reader->buffer[1]->pos ) return 0; + + // No iterator (sequence not present in this file) and not streaming + if ( !reader->itr && !files->streaming ) return 0; + + // Fill the buffer with records starting at the same position + int i, ret = 0; + while (1) + { + if ( reader->nbuffer+1 >= reader->mbuffer ) + { + // Increase buffer size + reader->mbuffer += 8; + reader->buffer = (bcf1_t**) realloc(reader->buffer, sizeof(bcf1_t*)*reader->mbuffer); + for (i=8; i>0; i--) // initialize + { + reader->buffer[reader->mbuffer-i] = bcf_init1(); + reader->buffer[reader->mbuffer-i]->max_unpack = files->max_unpack; + reader->buffer[reader->mbuffer-i]->pos = -1; // for rare cases when VCF starts from 1 + } + } + if ( files->streaming ) + { + if ( reader->file->format.format==vcf ) + { + ret = hts_getline(reader->file, KS_SEP_LINE, &files->tmps); + if ( ret < -1 ) files->errnum = bcf_read_error; + if ( ret < 0 ) break; // no more lines or an error + ret = vcf_parse1(&files->tmps, reader->header, reader->buffer[reader->nbuffer+1]); + if ( ret<0 ) { files->errnum = vcf_parse_error; break; } + } + else if ( reader->file->format.format==bcf ) + { + ret = bcf_read1(reader->file, reader->header, reader->buffer[reader->nbuffer+1]); + if ( ret < -1 ) files->errnum = bcf_read_error; + if ( ret < 0 ) break; // no more lines or an error + } + else + { + hts_log_error("Fixme: not ready for this"); + exit(1); + } + } + else if ( reader->tbx_idx ) + { + ret = tbx_itr_next(reader->file, reader->tbx_idx, reader->itr, &files->tmps); + if ( ret < -1 ) files->errnum = bcf_read_error; + if ( ret < 0 ) break; // no more lines or an error + ret = vcf_parse1(&files->tmps, reader->header, reader->buffer[reader->nbuffer+1]); + if ( ret<0 ) { files->errnum = vcf_parse_error; break; } + } + else + { + ret = bcf_itr_next(reader->file, reader->itr, reader->buffer[reader->nbuffer+1]); + if ( ret < -1 ) files->errnum = bcf_read_error; + if ( ret < 0 ) break; // no more lines or an error + bcf_subset_format(reader->header,reader->buffer[reader->nbuffer+1]); + } + + // Prevent creation of duplicates from records overlapping multiple regions + // and recognize true variant overlaps vs record overlaps (e.g. TA>T vs A>-) + if ( files->regions ) + { + hts_pos_t beg, end; + if ( BCF_SR_AUX(files)->regions_overlap==0 ) + beg = end = reader->buffer[reader->nbuffer+1]->pos; + else if ( BCF_SR_AUX(files)->regions_overlap==1 ) + { + beg = reader->buffer[reader->nbuffer+1]->pos; + end = reader->buffer[reader->nbuffer+1]->pos + reader->buffer[reader->nbuffer+1]->rlen - 1; + } + else if ( BCF_SR_AUX(files)->regions_overlap==2 ) + _set_variant_boundaries(reader->buffer[reader->nbuffer+1], &beg,&end); + else + { + hts_log_error("This should never happen, just to keep clang compiler happy: %d",BCF_SR_AUX(files)->targets_overlap); + exit(1); + } + if ( beg <= files->regions->prev_end || end < files->regions->start || beg > files->regions->end ) continue; + } + + // apply filter + if ( !reader->nfilter_ids ) + bcf_unpack(reader->buffer[reader->nbuffer+1], BCF_UN_STR); + else + { + bcf_unpack(reader->buffer[reader->nbuffer+1], BCF_UN_STR|BCF_UN_FLT); + if ( !has_filter(reader, reader->buffer[reader->nbuffer+1]) ) continue; + } + reader->nbuffer++; + + if ( reader->buffer[reader->nbuffer]->rid != reader->buffer[1]->rid ) break; + if ( reader->buffer[reader->nbuffer]->pos != reader->buffer[1]->pos ) break; // the buffer is full + } + if ( ret<0 ) + { + // done for this region + tbx_itr_destroy(reader->itr); + reader->itr = NULL; + } + if ( files->require_index==ALLOW_NO_IDX_ && reader->buffer[reader->nbuffer]->rid < reader->buffer[1]->rid ) + { + hts_log_error("Sequences out of order, cannot stream multiple unindexed files: %s", reader->fname); + exit(1); + } + return 0; // FIXME: Check for more errs in this function +} + +/* + * _readers_shift_buffer() - removes the first line + */ +static void _reader_shift_buffer(bcf_sr_t *reader) +{ + if ( !reader->nbuffer ) return; + int i; + bcf1_t *tmp = reader->buffer[1]; + for (i=2; i<=reader->nbuffer; i++) + reader->buffer[i-1] = reader->buffer[i]; + if ( reader->nbuffer > 1 ) + reader->buffer[reader->nbuffer] = tmp; + reader->nbuffer--; +} + +static int next_line(bcf_srs_t *files) +{ + const char *chr = NULL; + hts_pos_t min_pos = HTS_POS_MAX; + + // Loop until next suitable line is found or all readers have finished + while ( 1 ) + { + // Get all readers ready for the next region. + if ( files->regions && _readers_next_region(files)<0 ) break; + + // Fill buffers and find the minimum chromosome + int i, min_rid = INT32_MAX; + for (i=0; inreaders; i++) + { + _reader_fill_buffer(files, &files->readers[i]); + if ( files->require_index==ALLOW_NO_IDX_ ) + { + if ( !files->readers[i].nbuffer ) continue; + if ( min_rid > files->readers[i].buffer[1]->rid ) min_rid = files->readers[i].buffer[1]->rid; + } + } + + for (i=0; inreaders; i++) + { + if ( !files->readers[i].nbuffer ) continue; + if ( files->require_index==ALLOW_NO_IDX_ && min_rid != files->readers[i].buffer[1]->rid ) continue; + + // Update the minimum coordinate + if ( min_pos > files->readers[i].buffer[1]->pos ) + { + min_pos = files->readers[i].buffer[1]->pos; + chr = bcf_seqname(files->readers[i].header, files->readers[i].buffer[1]); + assert(chr); + bcf_sr_sort_set_active(&BCF_SR_AUX(files)->sort, i); + } + else if ( min_pos==files->readers[i].buffer[1]->pos ) + bcf_sr_sort_add_active(&BCF_SR_AUX(files)->sort, i); + } + if ( min_pos==HTS_POS_MAX ) + { + if ( !files->regions ) break; + continue; + } + + // Skip this position if not present in targets + if ( files->targets ) + { + int match = 0; + for (i=0; inreaders; i++) + { + if ( !files->readers[i].nbuffer || files->readers[i].buffer[1]->pos!=min_pos ) continue; + hts_pos_t beg, end; + if ( BCF_SR_AUX(files)->targets_overlap==0 ) + beg = end = min_pos; + else if ( BCF_SR_AUX(files)->targets_overlap==1 ) + { + beg = min_pos; + end = min_pos + files->readers[i].buffer[1]->rlen - 1; + } + else if ( BCF_SR_AUX(files)->targets_overlap==2 ) + _set_variant_boundaries(files->readers[i].buffer[1], &beg,&end); + else + { + hts_log_error("This should never happen, just to keep clang compiler happy: %d",BCF_SR_AUX(files)->targets_overlap); + exit(1); + } + int overlap = bcf_sr_regions_overlap(files->targets, chr, beg, end)==0 ? 1 : 0; + if ( (!files->targets_exclude && !overlap) || (files->targets_exclude && overlap) ) + _reader_shift_buffer(&files->readers[i]); + else + match = 1; + } + if ( !match ) + { + min_pos = HTS_POS_MAX; + chr = NULL; + continue; + } + } + break; // done: chr and min_pos are set + } + if ( !chr ) return 0; + + return bcf_sr_sort_next(files, &BCF_SR_AUX(files)->sort, chr, min_pos); +} + +int bcf_sr_next_line(bcf_srs_t *files) +{ + if ( !files->targets_als ) + return next_line(files); + + while (1) + { + int i, ret = next_line(files); + if ( !ret ) return ret; + + for (i=0; inreaders; i++) + if ( files->has_line[i] ) break; + + if ( _regions_match_alleles(files->targets, files->targets_als-1, files->readers[i].buffer[0]) ) return ret; + + // Check if there are more duplicate lines in the buffers. If not, return this line as if it + // matched the targets, even if there is a type mismatch + for (i=0; inreaders; i++) + { + if ( !files->has_line[i] ) continue; + if ( files->readers[i].nbuffer==0 || files->readers[i].buffer[1]->pos!=files->readers[i].buffer[0]->pos ) continue; + break; + } + if ( i==files->nreaders ) return ret; // no more lines left, output even if target alleles are not of the same type + } +} + +static void bcf_sr_seek_start(bcf_srs_t *readers) +{ + bcf_sr_regions_t *reg = readers->regions; + int i; + for (i=0; inseqs; i++) + reg->regs[i].creg = -1; + reg->iseq = 0; + reg->start = -1; + reg->end = -1; + reg->prev_seq = -1; + reg->prev_start = -1; + reg->prev_end = -1; +} + + +int bcf_sr_seek(bcf_srs_t *readers, const char *seq, hts_pos_t pos) +{ + if ( !readers->regions ) return 0; + bcf_sr_sort_reset(&BCF_SR_AUX(readers)->sort); + if ( !seq && !pos ) + { + // seek to start + bcf_sr_seek_start(readers); + return 0; + } + + int i, nret = 0; + + // Need to position both the readers and the regions. The latter is a bit of a mess + // because we can have in memory or external regions. The safe way is: + // - reset all regions as if they were not read from at all (bcf_sr_seek_start) + // - find the requested iseq (stored in the seq_hash) + // - position regions to the requested position (bcf_sr_regions_overlap) + bcf_sr_seek_start(readers); + if ( khash_str2int_get(readers->regions->seq_hash, seq, &i)>=0 ) readers->regions->iseq = i; + _bcf_sr_regions_overlap(readers->regions, seq, pos, pos, 0); + + for (i=0; inreaders; i++) + { + nret += _reader_seek(&readers->readers[i],seq,pos,MAX_CSI_COOR-1); + } + return nret; +} + +int bcf_sr_set_samples(bcf_srs_t *files, const char *fname, int is_file) +{ + int i, j, nsmpl, free_smpl = 0; + char **smpl = NULL; + + void *exclude = (fname[0]=='^') ? khash_str2int_init() : NULL; + if ( exclude || strcmp("-",fname) ) // "-" stands for all samples + { + smpl = hts_readlist(fname, is_file, &nsmpl); + if ( !smpl ) + { + hts_log_error("Could not read the file: \"%s\"", fname); + return 0; + } + if ( exclude ) + { + for (i=0; ireaders[0].header->samples; // intersection of all samples + nsmpl = bcf_hdr_nsamples(files->readers[0].header); + } + + files->samples = NULL; + files->n_smpl = 0; + for (i=0; inreaders; j++) + { + if ( bcf_hdr_id2int(files->readers[j].header, BCF_DT_SAMPLE, smpl[i])<0 ) break; + n_isec++; + } + if ( n_isec!=files->nreaders ) + { + hts_log_warning("The sample \"%s\" was not found in %s, skipping", + smpl[i], files->readers[n_isec].fname); + continue; + } + + files->samples = (char**) realloc(files->samples, (files->n_smpl+1)*sizeof(const char*)); + files->samples[files->n_smpl++] = strdup(smpl[i]); + } + + if ( exclude ) khash_str2int_destroy(exclude); + if ( free_smpl ) + { + for (i=0; in_smpl ) + { + if ( files->nreaders>1 ) + hts_log_warning("No samples in common"); + return 0; + } + for (i=0; inreaders; i++) + { + bcf_sr_t *reader = &files->readers[i]; + reader->samples = (int*) malloc(sizeof(int)*files->n_smpl); + reader->n_smpl = files->n_smpl; + for (j=0; jn_smpl; j++) + reader->samples[j] = bcf_hdr_id2int(reader->header, BCF_DT_SAMPLE, files->samples[j]); + } + return 1; +} + +// Allocate a new region list structure. +static bcf_sr_regions_t *bcf_sr_regions_alloc(void) +{ + bcf_sr_regions_t *reg = (bcf_sr_regions_t *) calloc(1, sizeof(bcf_sr_regions_t)); + if ( !reg ) return NULL; + + reg->start = reg->end = -1; + reg->prev_start = reg->prev_end = reg->prev_seq = -1; + return reg; +} + +// Add a new region into a list. On input the coordinates are 1-based, inclusive, then stored 0-based, +// inclusive. Sorting and merging step needed afterwards: qsort(..,cmp_regions) and merge_regions(). +static int _regions_add(bcf_sr_regions_t *reg, const char *chr, hts_pos_t start, hts_pos_t end) +{ + if ( start==-1 && end==-1 ) + { + start = 0; end = MAX_CSI_COOR-1; + } + else + { + start--; end--; // store 0-based coordinates + } + + if ( !reg->seq_hash ) + reg->seq_hash = khash_str2int_init(); + + int iseq; + if ( khash_str2int_get(reg->seq_hash, chr, &iseq)<0 ) + { + // the chromosome block does not exist + iseq = reg->nseqs++; + reg->seq_names = (char**) realloc(reg->seq_names,sizeof(char*)*reg->nseqs); + reg->regs = (region_t*) realloc(reg->regs,sizeof(region_t)*reg->nseqs); + memset(®->regs[reg->nseqs-1],0,sizeof(region_t)); + reg->seq_names[iseq] = strdup(chr); + reg->regs[iseq].creg = -1; + khash_str2int_set(reg->seq_hash,reg->seq_names[iseq],iseq); + } + + region_t *creg = ®->regs[iseq]; + hts_expand(region1_t,creg->nregs+1,creg->mregs,creg->regs); + creg->regs[creg->nregs].start = start; + creg->regs[creg->nregs].end = end; + creg->nregs++; + + return 0; // FIXME: check for errs in this function +} + +static int regions_cmp(const void *aptr, const void *bptr) +{ + region1_t *a = (region1_t*)aptr; + region1_t *b = (region1_t*)bptr; + if ( a->start < b->start ) return -1; + if ( a->start > b->start ) return 1; + if ( a->end < b->end ) return -1; + if ( a->end > b->end ) return 1; + return 0; +} +static void regions_merge(region_t *reg) +{ + int i = 0, j; + while ( inregs ) + { + j = i + 1; + while ( jnregs && reg->regs[i].end >= reg->regs[j].start ) + { + if ( reg->regs[i].end < reg->regs[j].end ) reg->regs[i].end = reg->regs[j].end; + reg->regs[j].start = 1; reg->regs[j].end = 0; // if beg>end, this region marked for skipping + j++; + } + i = j; + } +} +void _regions_sort_and_merge(bcf_sr_regions_t *reg) +{ + if ( !reg ) return; + + int i; + for (i=0; inseqs; i++) + { + qsort(reg->regs[i].regs, reg->regs[i].nregs, sizeof(*reg->regs[i].regs), regions_cmp); + regions_merge(®->regs[i]); + } +} + +// File name or a list of genomic locations. If file name, NULL is returned. +// Recognises regions in the form chr, chr:pos, chr:beg-end, chr:beg-, {weird-chr-name}:pos. +// Cannot use hts_parse_region() as that requires the header and if header is not present, +// wouldn't learn the chromosome name. +static bcf_sr_regions_t *_regions_init_string(const char *str) +{ + bcf_sr_regions_t *reg = bcf_sr_regions_alloc(); + if ( !reg ) return NULL; + + kstring_t tmp = {0,0,0}; + const char *sp = str, *ep = str; + hts_pos_t from, to; + while ( 1 ) + { + tmp.l = 0; + if ( *ep=='{' ) + { + while ( *ep && *ep!='}' ) ep++; + if ( !*ep ) + { + hts_log_error("Could not parse the region, mismatching braces in: \"%s\"", str); + goto exit_nicely; + } + ep++; + kputsn(sp+1,ep-sp-2,&tmp); + } + else + { + while ( *ep && *ep!=',' && *ep!=':' ) ep++; + kputsn(sp,ep-sp,&tmp); + } + if ( *ep==':' ) + { + sp = ep+1; + from = hts_parse_decimal(sp,(char**)&ep,0); + if ( sp==ep ) + { + hts_log_error("Could not parse the region(s): %s", str); + goto exit_nicely; + } + if ( !*ep || *ep==',' ) + { + _regions_add(reg, tmp.s, from, from); + sp = ep; + continue; + } + if ( *ep!='-' ) + { + hts_log_error("Could not parse the region(s): %s", str); + goto exit_nicely; + } + ep++; + sp = ep; + to = hts_parse_decimal(sp,(char**)&ep,0); + if ( *ep && *ep!=',' ) + { + hts_log_error("Could not parse the region(s): %s", str); + goto exit_nicely; + } + if ( sp==ep ) to = MAX_CSI_COOR-1; + _regions_add(reg, tmp.s, from, to); + if ( !*ep ) break; + sp = ep; + } + else if ( !*ep || *ep==',' ) + { + if ( tmp.l ) _regions_add(reg, tmp.s, -1, -1); + if ( !*ep ) break; + sp = ++ep; + } + else + { + hts_log_error("Could not parse the region(s): %s", str); + goto exit_nicely; + } + } + free(tmp.s); + return reg; + +exit_nicely: + bcf_sr_regions_destroy(reg); + free(tmp.s); + return NULL; +} + +// ichr,ifrom,ito are 0-based; +// returns -1 on error, 0 if the line is a comment line, 1 on success +static int _regions_parse_line(char *line, int ichr, int ifrom, int ito, char **chr, char **chr_end, hts_pos_t *from, hts_pos_t *to) +{ + if (ifrom < 0 || ito < 0) return -1; + *chr_end = NULL; + + if ( line[0]=='#' ) return 0; + + int k,l; // index of the start and end column of the tab-delimited file + if ( ifrom <= ito ) + k = ifrom, l = ito; + else + l = ifrom, k = ito; + + int i; + char *se = line, *ss = NULL; // start and end + char *tmp; + for (i=0; i<=k && *se; i++) + { + ss = i==0 ? se++ : ++se; + while (*se && *se!='\t') se++; + } + if ( i<=k ) return -1; + if ( k==l ) + { + *from = *to = hts_parse_decimal(ss, &tmp, 0); + if ( tmp==ss || (*tmp && *tmp!='\t') ) return -1; + } + else + { + if ( k==ifrom ) + *from = hts_parse_decimal(ss, &tmp, 0); + else + *to = hts_parse_decimal(ss, &tmp, 0); + if ( ss==tmp || (*tmp && *tmp!='\t') ) return -1; + + for (i=k; i0 ) ss = ++se; + while (*se && *se!='\t') se++; + } + if ( i<=ichr ) return -1; + *chr_end = se; + *chr = ss; + return 1; +} + +bcf_sr_regions_t *bcf_sr_regions_init(const char *regions, int is_file, int ichr, int ifrom, int ito) +{ + bcf_sr_regions_t *reg; + if ( !is_file ) + { + reg = _regions_init_string(regions); + _regions_sort_and_merge(reg); + return reg; + } + + reg = bcf_sr_regions_alloc(); + if ( !reg ) return NULL; + + reg->file = hts_open(regions, "rb"); + if ( !reg->file ) + { + hts_log_error("Could not open file: %s", regions); + free(reg); + return NULL; + } + + reg->tbx = tbx_index_load3(regions, NULL, HTS_IDX_SAVE_REMOTE|HTS_IDX_SILENT_FAIL); + if ( !reg->tbx ) + { + size_t iline = 0; + int len = strlen(regions); + int is_bed = strcasecmp(".bed",regions+len-4) ? 0 : 1; + if ( !is_bed && !strcasecmp(".bed.gz",regions+len-7) ) is_bed = 1; + + if ( reg->file->format.format==vcf ) ito = 1; + + // read the whole file, tabix index is not present + while ( hts_getline(reg->file, KS_SEP_LINE, ®->line) > 0 ) + { + iline++; + char *chr, *chr_end; + hts_pos_t from, to; + int ret; + ret = _regions_parse_line(reg->line.s, ichr,ifrom,abs(ito), &chr,&chr_end,&from,&to); + if ( ret < 0 ) + { + if ( ito<0 ) + ret = _regions_parse_line(reg->line.s, ichr,ifrom,ifrom, &chr,&chr_end,&from,&to); + if ( ret<0 ) + { + hts_log_error("Could not parse %zu-th line of file %s, using the columns %d,%d[,%d]", + iline, regions,ichr+1,ifrom+1,ito+1); + hts_close(reg->file); reg->file = NULL; free(reg); + return NULL; + } + ito = ifrom; + } + else if ( ito<0 ) + ito = abs(ito); + if ( !ret ) continue; + if ( is_bed ) from++; + *chr_end = 0; + _regions_add(reg, chr, from, to); + *chr_end = '\t'; + } + hts_close(reg->file); reg->file = NULL; + if ( !reg->nseqs ) { free(reg); return NULL; } + _regions_sort_and_merge(reg); + return reg; + } + + reg->seq_names = (char**) tbx_seqnames(reg->tbx, ®->nseqs); + if ( !reg->seq_hash ) + reg->seq_hash = khash_str2int_init(); + int i; + for (i=0; inseqs; i++) + { + khash_str2int_set(reg->seq_hash,reg->seq_names[i],i); + } + reg->fname = strdup(regions); + reg->is_bin = 1; + return reg; +} + +void bcf_sr_regions_destroy(bcf_sr_regions_t *reg) +{ + int i; + free(reg->fname); + if ( reg->itr ) tbx_itr_destroy(reg->itr); + if ( reg->tbx ) tbx_destroy(reg->tbx); + if ( reg->file ) hts_close(reg->file); + if ( reg->als ) free(reg->als); + if ( reg->als_str.s ) free(reg->als_str.s); + free(reg->line.s); + if ( reg->regs ) + { + // free only in-memory names, tbx names are const + for (i=0; inseqs; i++) + { + free(reg->seq_names[i]); + free(reg->regs[i].regs); + } + } + free(reg->regs); + free(reg->seq_names); + khash_str2int_destroy(reg->seq_hash); + free(reg); +} + +int bcf_sr_regions_seek(bcf_sr_regions_t *reg, const char *seq) +{ + reg->iseq = reg->start = reg->end = -1; + if ( khash_str2int_get(reg->seq_hash, seq, ®->iseq) < 0 ) return -1; // sequence seq not in regions + + // using in-memory regions + if ( reg->regs ) + { + reg->regs[reg->iseq].creg = -1; + return 0; + } + + // reading regions from tabix + if ( reg->itr ) tbx_itr_destroy(reg->itr); + reg->itr = tbx_itr_querys(reg->tbx, seq); + if ( reg->itr ) return 0; + + return -1; +} + +// Returns 0 on success, -1 when done +static int advance_creg(region_t *reg) +{ + int i = reg->creg + 1; + while ( inregs && reg->regs[i].start > reg->regs[i].end ) i++; // regions with start>end are marked to skip by merge_regions() + reg->creg = i; + if ( i>=reg->nregs ) return -1; + return 0; +} + +int bcf_sr_regions_next(bcf_sr_regions_t *reg) +{ + if ( reg->iseq<0 ) return -1; + reg->start = reg->end = -1; + reg->nals = 0; + + // using in-memory regions + if ( reg->regs ) + { + while ( reg->iseq < reg->nseqs ) + { + if ( advance_creg(®->regs[reg->iseq])==0 ) break; // a valid record was found + reg->iseq++; + } + if ( reg->iseq >= reg->nseqs ) { reg->iseq = -1; return -1; } // no more regions left + region1_t *creg = ®->regs[reg->iseq].regs[reg->regs[reg->iseq].creg]; + reg->start = creg->start; + reg->end = creg->end; + return 0; + } + + // reading from tabix + char *chr, *chr_end; + int ichr = 0, ifrom = 1, ito = 2, is_bed = 0; + hts_pos_t from, to; + if ( reg->tbx ) + { + ichr = reg->tbx->conf.sc-1; + ifrom = reg->tbx->conf.bc-1; + ito = reg->tbx->conf.ec-1; + if ( ito<0 ) ito = ifrom; + is_bed = reg->tbx->conf.preset==TBX_UCSC ? 1 : 0; + } + + int ret = 0; + while ( !ret ) + { + if ( reg->itr ) + { + // tabix index present, reading a chromosome block + ret = tbx_itr_next(reg->file, reg->tbx, reg->itr, ®->line); + if ( ret<0 ) { reg->iseq = -1; return -1; } + } + else + { + if ( reg->is_bin ) + { + // Waited for seek which never came. Reopen in text mode and stream + // through the regions, otherwise hts_getline would fail + hts_close(reg->file); + reg->file = hts_open(reg->fname, "r"); + if ( !reg->file ) + { + hts_log_error("Could not open file: %s", reg->fname); + reg->file = NULL; + bcf_sr_regions_destroy(reg); + return -1; + } + reg->is_bin = 0; + } + + // tabix index absent, reading the whole file + ret = hts_getline(reg->file, KS_SEP_LINE, ®->line); + if ( ret<0 ) { reg->iseq = -1; return -1; } + } + ret = _regions_parse_line(reg->line.s, ichr,ifrom,ito, &chr,&chr_end,&from,&to); + if ( ret<0 ) + { + hts_log_error("Could not parse the file %s, using the columns %d,%d,%d", + reg->fname,ichr+1,ifrom+1,ito+1); + return -1; + } + } + if ( is_bed ) from++; + + *chr_end = 0; + if ( khash_str2int_get(reg->seq_hash, chr, ®->iseq)<0 ) + { + hts_log_error("Broken tabix index? The sequence \"%s\" not in dictionary [%s]", + chr, reg->line.s); + exit(1); + } + *chr_end = '\t'; + + reg->start = from - 1; + reg->end = to - 1; + return 0; +} + +static int _regions_match_alleles(bcf_sr_regions_t *reg, int als_idx, bcf1_t *rec) +{ + if ( reg->regs ) + { + // payload is not supported for in-memory regions, switch to regidx instead in future + hts_log_error("Compressed and indexed targets file is required"); + exit(1); + } + + int i = 0, max_len = 0; + if ( !reg->nals ) + { + char *ss = reg->line.s; + while ( inals = 1; + while ( *se && *se!='\t' ) + { + if ( *se==',' ) reg->nals++; + se++; + } + ks_resize(®->als_str, se-ss+1+reg->nals); + reg->als_str.l = 0; + hts_expand(char*,reg->nals,reg->mals,reg->als); + reg->nals = 0; + + se = ss; + while ( *(++se) ) + { + if ( *se=='\t' ) break; + if ( *se!=',' ) continue; + reg->als[reg->nals] = ®->als_str.s[reg->als_str.l]; + kputsn(ss,se-ss,®->als_str); + if ( ®->als_str.s[reg->als_str.l] - reg->als[reg->nals] > max_len ) max_len = ®->als_str.s[reg->als_str.l] - reg->als[reg->nals]; + reg->als_str.l++; + reg->nals++; + ss = ++se; + } + reg->als[reg->nals] = ®->als_str.s[reg->als_str.l]; + kputsn(ss,se-ss,®->als_str); + if ( ®->als_str.s[reg->als_str.l] - reg->als[reg->nals] > max_len ) max_len = ®->als_str.s[reg->als_str.l] - reg->als[reg->nals]; + reg->nals++; + reg->als_type = max_len > 1 ? VCF_INDEL : VCF_SNP; // this is a simplified check, see vcf.c:bcf_set_variant_types + } + int type = bcf_get_variant_types(rec); + if ( reg->als_type & VCF_INDEL ) + return type & VCF_INDEL ? 1 : 0; + return !(type & VCF_INDEL) ? 1 : 0; +} + +int bcf_sr_regions_overlap(bcf_sr_regions_t *reg, const char *seq, hts_pos_t start, hts_pos_t end) +{ + return _bcf_sr_regions_overlap(reg,seq,start,end,1); +} + +static int _bcf_sr_regions_overlap(bcf_sr_regions_t *reg, const char *seq, hts_pos_t start, hts_pos_t end, int missed_reg_handler) +{ + int iseq; + if ( khash_str2int_get(reg->seq_hash, seq, &iseq)<0 ) return -1; // no such sequence + if ( missed_reg_handler && !reg->missed_reg_handler ) missed_reg_handler = 0; + + if ( reg->prev_seq==-1 || iseq!=reg->prev_seq || reg->prev_start > start ) // new chromosome or after a seek + { + // flush regions left on previous chromosome + if ( missed_reg_handler && reg->prev_seq!=-1 && reg->iseq!=-1 ) + bcf_sr_regions_flush(reg); + + bcf_sr_regions_seek(reg, seq); + reg->start = reg->end = -1; + } + if ( reg->prev_seq==iseq && reg->iseq!=iseq ) return -2; // no more regions on this chromosome + reg->prev_seq = reg->iseq; + reg->prev_start = start; + + while ( iseq==reg->iseq && reg->end < start ) + { + if ( bcf_sr_regions_next(reg) < 0 ) return -2; // no more regions left + if ( reg->iseq != iseq ) return -1; // does not overlap any regions + if ( missed_reg_handler && reg->end < start ) reg->missed_reg_handler(reg, reg->missed_reg_data); + } + if ( reg->start <= end ) return 0; // region overlap + return -1; // no overlap +} + +int bcf_sr_regions_flush(bcf_sr_regions_t *reg) +{ + if ( !reg->missed_reg_handler || reg->prev_seq==-1 ) return 0; + while ( !bcf_sr_regions_next(reg) ) reg->missed_reg_handler(reg, reg->missed_reg_data); + return 0; // FIXME: check for errs in this function +} + diff --git a/src/htslib-1.21/tabix.1 b/src/htslib-1.21/tabix.1 new file mode 100644 index 0000000..f0dc7b5 --- /dev/null +++ b/src/htslib-1.21/tabix.1 @@ -0,0 +1,209 @@ +.TH tabix 1 "12 September 2024" "htslib-1.21" "Bioinformatics tools" +.SH NAME +.PP +tabix \- Generic indexer for TAB-delimited genome position files +.\" +.\" Copyright (C) 2009-2011 Broad Institute. +.\" Copyright (C) 2014, 2016, 2018, 2020, 2022, 2024 Genome Research Ltd. +.\" +.\" Author: Heng Li +.\" +.\" Permission is hereby granted, free of charge, to any person obtaining a +.\" copy of this software and associated documentation files (the "Software"), +.\" to deal in the Software without restriction, including without limitation +.\" the rights to use, copy, modify, merge, publish, distribute, sublicense, +.\" and/or sell copies of the Software, and to permit persons to whom the +.\" Software is furnished to do so, subject to the following conditions: +.\" +.\" The above copyright notice and this permission notice shall be included in +.\" all copies or substantial portions of the Software. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +.\" IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +.\" THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +.\" LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +.\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +.\" DEALINGS IN THE SOFTWARE. +.\" +.SH SYNOPSIS +.PP +.B tabix +.RB [ -0lf ] +.RB [ -p +gff|bed|sam|vcf] +.RB [ -s +.IR seqCol ] +.RB [ -b +.IR begCol ] +.RB [ -e +.IR endCol ] +.RB [ -S +.IR lineSkip ] +.RB [ -c +.IR metaChar ] +.I in.tab.bgz +.RI [ "region1 " [ "region2 " [ ... "]]]" + +.SH DESCRIPTION +.PP +Tabix indexes a TAB-delimited genome position file +.I in.tab.bgz +and creates an index file +.RI ( in.tab.bgz.tbi +or +.IR in.tab.bgz.csi ) +when +.I region +is absent from the command-line. The input data file must be position +sorted and compressed by +.B bgzip +which has a +.BR gzip (1) +like interface. + +After indexing, tabix is able to quickly retrieve data lines overlapping +.I regions +specified in the format "chr:beginPos-endPos". +(Coordinates specified in this region format are 1-based and inclusive.) + +Fast data retrieval also +works over network if URI is given as a file name and in this case the +index file will be downloaded if it is not present locally. + +The tabix +.RI ( .tbi ) +and BAI index formats can handle individual chromosomes up to 512 Mbp +(2^29 bases) in length. +If your input file might contain data lines with begin or end positions +greater than that, you will need to use a CSI index. + +Multiple threads can be used for operations except listing of sequence names. + +.SH INDEXING OPTIONS +.TP 10 +.B -0, --zero-based +Specify that the position in the data file is 0-based half-open +(e.g. UCSC files) rather than 1-based. +.TP +.BI "-b, --begin " INT +Column of start chromosomal position. [4] +.TP +.BI "-c, --comment " CHAR +Skip lines started with character CHAR. [#] +.TP +.BI "-C, --csi" +Produce CSI format index instead of classical tabix or BAI style indices. +.TP +.BI "-e, --end " INT +Column of end chromosomal position. The end column can be the same as the +start column. [5] +.TP +.B "-f, --force " +Force to overwrite the index file if it is present. +.TP +.BI "-m, --min-shift " INT +Set minimal interval size for CSI indices to 2^INT [14] +.TP +.BI "-p, --preset " STR +Input format for indexing. Valid values are: gff, bed, sam, vcf. +This option should not be applied together with any of +.BR -s ", " -b ", " -e ", " -c " and " -0 ; +it is not used for data retrieval because this setting is stored in +the index file. [gff] +.TP +.BI "-s, --sequence " INT +Column of sequence name. Option +.BR -s ", " -b ", " -e ", " -S ", " -c " and " -0 +are all stored in the index file and thus not used in data retrieval. [1] +.TP +.BI "-S, --skip-lines " INT +Skip first INT lines in the data file. [0] + +.SH QUERYING AND OTHER OPTIONS +.TP +.B "-h, --print-header " +Print also the header/meta lines. +.TP +.B "-H, --only-header " +Print only the header/meta lines. +.TP +.B "-l, --list-chroms " +List the sequence names stored in the index file. +.TP +.BI "-r, --reheader " FILE +Replace the header with the content of FILE +.TP +.BI "-R, --regions " FILE +Restrict to regions listed in the FILE. The FILE can be BED file (requires .bed, .bed.gz, .bed.bgz +file name extension) or a TAB-delimited file with CHROM, POS, and, optionally, +POS_TO columns, where positions are 1-based and inclusive. When this option is in use, the input +file may not be sorted. +.TP +.BI "-T, --targets " FILE +Similar to +.B -R +but the entire input will be read sequentially and regions not listed in FILE will be skipped. +.TP +.BI "-D " +Do not download the index file before opening it. Valid for remote files only. +.TP +.BI "--cache " INT +Set the BGZF block cache size to INT megabytes. [10] + +This is of most benefit when the +.B -R +option is used, which can cause blocks to be read more than once. +Setting the size to 0 will disable the cache. +.TP +.B --separate-regions +This option can be used when multiple regions are supplied in the command line +and the user needs to quickly see which file records belong to which region. +For this, a line with the name of the region, preceded by the file specific +comment symbol, is inserted in the output before its corresponding group of +records. +.TP +.BI "--verbosity " INT +Set verbosity of logging messages printed to stderr. +The default is 3, which turns on error and warning messages; +2 reduces warning messages; +1 prints only error messages and 0 is mostly silent. +Values higher than 3 produce additional informational and debugging messages. +.TP +.BI "-@, --threads " INT +Set number of threads to use for the operation. +The default is 0, where no extra threads are in use. +.PP +.SH EXAMPLE +(grep "^#" in.gff; grep -v "^#" in.gff | sort -t"`printf '\(rst'`" -k1,1 -k4,4n) | bgzip > sorted.gff.gz; + +tabix -p gff sorted.gff.gz; + +tabix sorted.gff.gz chr1:10,000,000-20,000,000; + +.SH NOTES +It is straightforward to achieve overlap queries using the standard +B-tree index (with or without binning) implemented in all SQL databases, +or the R-tree index in PostgreSQL and Oracle. But there are still many +reasons to use tabix. Firstly, tabix directly works with a lot of widely +used TAB-delimited formats such as GFF/GTF and BED. We do not need to +design database schema or specialized binary formats. Data do not need +to be duplicated in different formats, either. Secondly, tabix works on +compressed data files while most SQL databases do not. The GenCode +annotation GTF can be compressed down to 4%. Thirdly, tabix is +fast. The same indexing algorithm is known to work efficiently for an +alignment with a few billion short reads. SQL databases probably cannot +easily handle data at this scale. Last but not the least, tabix supports +remote data retrieval. One can put the data file and the index at an FTP +or HTTP server, and other users or even web services will be able to get +a slice without downloading the entire file. + +.SH AUTHOR +.PP +Tabix was written by Heng Li. The BGZF library was originally +implemented by Bob Handsaker and modified by Heng Li for remote file +access and in-memory caching. + +.SH SEE ALSO +.IR bgzip (1), +.IR samtools (1) diff --git a/src/htslib-1.21/tabix.c b/src/htslib-1.21/tabix.c new file mode 100644 index 0000000..2fb5d4b --- /dev/null +++ b/src/htslib-1.21/tabix.c @@ -0,0 +1,848 @@ +/* tabix.c -- Generic indexer for TAB-delimited genome position files. + + Copyright (C) 2009-2011 Broad Institute. + Copyright (C) 2010-2012, 2014-2020, 2024 Genome Research Ltd. + + Author: Heng Li + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "htslib/tbx.h" +#include "htslib/sam.h" +#include "htslib/vcf.h" +#include "htslib/kseq.h" +#include "htslib/bgzf.h" +#include "htslib/hts.h" +#include "htslib/regidx.h" +#include "htslib/hts_defs.h" +#include "htslib/hts_log.h" +#include "htslib/thread_pool.h" + +//for easy coding +#define RELEASE_TPOOL(X) { hts_tpool *ptr = (hts_tpool*)(X); if (ptr) { hts_tpool_destroy(ptr); } } +#define bam_index_build3(fn, min_shift, nthreads) (sam_index_build3((fn), NULL, (min_shift), (nthreads))) + +typedef struct +{ + char *regions_fname, *targets_fname; + int print_header, header_only, cache_megs, download_index, separate_regs, threads; +} +args_t; + +static void HTS_FORMAT(HTS_PRINTF_FMT, 1, 2) HTS_NORETURN +error(const char *format, ...) +{ + va_list ap; + fflush(stdout); + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); + fflush(stderr); + exit(EXIT_FAILURE); +} + +static void HTS_FORMAT(HTS_PRINTF_FMT, 1, 2) HTS_NORETURN +error_errno(const char *format, ...) +{ + va_list ap; + int eno = errno; + fflush(stdout); + if (format) { + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); + } + if (eno) { + fprintf(stderr, "%s%s\n", format ? ": " : "", strerror(eno)); + } else { + fprintf(stderr, "\n"); + } + fflush(stderr); + exit(EXIT_FAILURE); +} + + +#define IS_GFF (1<<0) +#define IS_BED (1<<1) +#define IS_SAM (1<<2) +#define IS_VCF (1<<3) +#define IS_BCF (1<<4) +#define IS_BAM (1<<5) +#define IS_CRAM (1<<6) +#define IS_GAF (1<<7) +#define IS_TXT (IS_GFF|IS_BED|IS_SAM|IS_VCF) + +int file_type(const char *fname) +{ + int l = strlen(fname); + if (l>=7 && strcasecmp(fname+l-7, ".gff.gz") == 0) return IS_GFF; + else if (l>=7 && strcasecmp(fname+l-7, ".bed.gz") == 0) return IS_BED; + else if (l>=7 && strcasecmp(fname+l-7, ".sam.gz") == 0) return IS_SAM; + else if (l>=7 && strcasecmp(fname+l-7, ".vcf.gz") == 0) return IS_VCF; + else if (l>=4 && strcasecmp(fname+l-4, ".bcf") == 0) return IS_BCF; + else if (l>=4 && strcasecmp(fname+l-4, ".bam") == 0) return IS_BAM; + else if (l>=4 && strcasecmp(fname+l-5, ".cram") == 0) return IS_CRAM; + else if (l>=7 && strcasecmp(fname+l-7, ".gaf.gz") == 0) return IS_GAF; + + htsFile *fp = hts_open(fname,"r"); + if (!fp) { + if (errno == ENOEXEC) { + // hts_open() uses this to report that it didn't understand the + // file format. + error("Couldn't understand format of \"%s\"\n", fname); + } else { + error_errno("Couldn't open \"%s\"", fname); + } + } + enum htsExactFormat format = hts_get_format(fp)->format; + hts_close(fp); + if ( format == bcf ) return IS_BCF; + if ( format == bam ) return IS_BAM; + if ( format == cram ) return IS_CRAM; + if ( format == vcf ) return IS_VCF; + + return 0; +} + +static char **parse_regions(char *regions_fname, char **argv, int argc, int *nregs) +{ + kstring_t str = {0,0,0}; + int iseq = 0, ireg = 0; + char **regs = NULL; + *nregs = argc; + + if ( regions_fname ) + { + // improve me: this is a too heavy machinery for parsing regions... + + regidx_t *idx = regidx_init(regions_fname, NULL, NULL, 0, NULL); + if ( !idx ) { + error_errno("Could not build region list for \"%s\"", regions_fname); + } + regitr_t *itr = regitr_init(idx); + if ( !itr ) { + error_errno("Could not initialize an iterator over \"%s\"", + regions_fname); + } + + (*nregs) += regidx_nregs(idx); + regs = (char**) malloc(sizeof(char*)*(*nregs)); + if (!regs) error_errno(NULL); + + int nseq; + char **seqs = regidx_seq_names(idx, &nseq); + for (iseq=0; iseqbeg+1, itr->end+1) < 0) { + error_errno(NULL); + } + regs[ireg] = strdup(str.s); + if (!regs[ireg]) error_errno(NULL); + ireg++; + } + } + regidx_destroy(idx); + regitr_destroy(itr); + } + free(str.s); + + if ( !ireg ) + { + if ( argc ) + { + regs = (char**) malloc(sizeof(char*)*argc); + if (!regs) error_errno(NULL); + } + else + { + regs = (char**) malloc(sizeof(char*)); + if (!regs) error_errno(NULL); + regs[0] = strdup("."); + if (!regs[0]) error_errno(NULL); + *nregs = 1; + } + } + + for (iseq=0; iseqformat; + if (args->cache_megs) + hts_set_cache_size(fp, args->cache_megs * 1048576); + + //set threads if needed, errors are logged and ignored + if (args->threads >= 1) { + if (!(tpool.pool = hts_tpool_init(args->threads))) { + hts_log_info("Could not initialize thread pool!"); + } + if (hts_set_thread_pool(fp, &tpool) < 0) { + hts_log_info("Could not set thread pool!"); + } + } + + regidx_t *reg_idx = NULL; + if ( args->targets_fname ) + { + reg_idx = regidx_init(args->targets_fname, NULL, NULL, 0, NULL); + if (!reg_idx) { + RELEASE_TPOOL(tpool.pool); + error_errno("Could not build region list for \"%s\"", + args->targets_fname); + } + } + + if ( format == bcf ) + { + htsFile *out = hts_open("-","w"); + if ( !out ) { + RELEASE_TPOOL(tpool.pool); + error_errno("Could not open stdout"); + } + if (hts_set_thread_pool(out, &tpool) < 0) { + hts_log_info("Could not set thread pool to output file!"); + } + hts_idx_t *idx = bcf_index_load3(fname, NULL, args->download_index ? HTS_IDX_SAVE_REMOTE : 0); + if ( !idx ) { + RELEASE_TPOOL(tpool.pool); + error_errno("Could not load .csi index of \"%s\"", fname); + } + + bcf_hdr_t *hdr = bcf_hdr_read(fp); + if ( !hdr ) { + RELEASE_TPOOL(tpool.pool); + error_errno("Could not read the header from \"%s\"", fname); + } + + if ( args->print_header ) { + if ( bcf_hdr_write(out,hdr)!=0 ) { + RELEASE_TPOOL(tpool.pool); + error_errno("Failed to write to stdout"); + } + } + if ( !args->header_only ) + { + assert(regs != NULL); + bcf1_t *rec = bcf_init(); + if (!rec) { + RELEASE_TPOOL(tpool.pool); + error_errno(NULL); + } + for (i=0; i=0 ) + { + if ( reg_idx ) + { + const char *chr = bcf_seqname(hdr,rec); + if (!chr) { + RELEASE_TPOOL(tpool.pool); + error("Bad BCF record in \"%s\" : " + "Invalid CONTIG id %d\n", + fname, rec->rid); + } + if ( !regidx_overlap(reg_idx,chr,rec->pos,rec->pos+rec->rlen-1, NULL) ) continue; + } + if (!found) { + if (args->separate_regs) printf("%c%s\n", conf->meta_char, regs[i]); + found = 1; + } + if ( bcf_write(out,hdr,rec)!=0 ) { + RELEASE_TPOOL(tpool.pool); + error_errno("Failed to write to stdout"); + } + } + + if (ret < -1) { + RELEASE_TPOOL(tpool.pool); + error_errno("Reading \"%s\" failed", fname); + } + bcf_itr_destroy(itr); + } + bcf_destroy(rec); + } + if ( hts_close(out) ) { + RELEASE_TPOOL(tpool.pool); + error_errno("hts_close returned non-zero status for stdout"); + } + + bcf_hdr_destroy(hdr); + hts_idx_destroy(idx); + } + else if ( format==vcf || format==sam || format==bed || format==text_format || format==unknown_format ) + { + tbx_t *tbx = tbx_index_load3(fname, NULL, args->download_index ? HTS_IDX_SAVE_REMOTE : 0); + if ( !tbx ) { + RELEASE_TPOOL(tpool.pool); + error_errno("Could not load .tbi/.csi index of %s", fname); + } + kstring_t str = {0,0,0}; + if ( args->print_header ) + { + int ret; + while ((ret = hts_getline(fp, KS_SEP_LINE, &str)) >= 0) + { + if ( !str.l || str.s[0]!=tbx->conf.meta_char ) break; + if (puts(str.s) < 0) { + RELEASE_TPOOL(tpool.pool); + error_errno("Error writing to stdout"); + } + } + if (ret < -1) { + RELEASE_TPOOL(tpool.pool); + error_errno("Reading \"%s\" failed", fname); + } + } + if ( !args->header_only ) + { + int nseq; + const char **seq = NULL; + if ( reg_idx ) { + seq = tbx_seqnames(tbx, &nseq); + if (!seq) { + RELEASE_TPOOL(tpool.pool); + error_errno("Failed to get sequence names list"); + } + } + for (i=0; i= 0) + { + if ( reg_idx && !regidx_overlap(reg_idx,seq[itr->curr_tid],itr->curr_beg,itr->curr_end-1, NULL) ) continue; + if (!found) { + if (args->separate_regs) printf("%c%s\n", conf->meta_char, regs[i]); + found = 1; + } + if (puts(str.s) < 0) { + RELEASE_TPOOL(tpool.pool); + error_errno("Failed to write to stdout"); + } + } + if (ret < -1) { + RELEASE_TPOOL(tpool.pool); + error_errno("Reading \"%s\" failed", fname); + } + tbx_itr_destroy(itr); + } + free(seq); + } + free(str.s); + tbx_destroy(tbx); + } + else if ( format==bam ) { + RELEASE_TPOOL(tpool.pool); + error("Please use \"samtools view\" for querying BAM files.\n"); + } + + if ( reg_idx ) regidx_destroy(reg_idx); + if ( hts_close(fp) ) { + RELEASE_TPOOL(tpool.pool); + error_errno("hts_close returned non-zero status: %s", fname); + } + + for (i=0; i= 1) { + if (!(tpool = hts_tpool_init(threads))) { + hts_log_info("Could not initialize thread pool!"); + } + } + if ( ftype & IS_TXT || !ftype ) + { + BGZF *fp = bgzf_open(fname,"r"); + if (!fp) { + RELEASE_TPOOL(tpool); + return -1; + } + if (bgzf_thread_pool(fp, tpool, 0) < 0) { + hts_log_info("Could not set thread pool!"); + } + if (bgzf_read_block(fp) != 0 || !fp->block_length ) { + RELEASE_TPOOL(tpool); + return -1; + } + + char *buffer = fp->uncompressed_block; + int skip_until = 0; + + // Skip the header: find out the position of the data block + if ( buffer[0]==conf->meta_char ) + { + skip_until = 1; + while (1) + { + if ( buffer[skip_until]=='\n' ) + { + skip_until++; + if ( skip_until>=fp->block_length ) + { + if ( bgzf_read_block(fp) != 0 || !fp->block_length ) { + RELEASE_TPOOL(tpool); + error("FIXME: No body in the file: %s\n", fname); + } + skip_until = 0; + } + // The header has finished + if ( buffer[skip_until]!=conf->meta_char ) break; + } + skip_until++; + if ( skip_until>=fp->block_length ) + { + if (bgzf_read_block(fp) != 0 || !fp->block_length) { + RELEASE_TPOOL(tpool); + error("FIXME: No body in the file: %s\n", fname); + } + skip_until = 0; + } + } + } + + // Output the new header + FILE *hdr = fopen(header,"r"); + if ( !hdr ) { + RELEASE_TPOOL(tpool); + error("%s: %s", header,strerror(errno)); + } + const size_t page_size = 32768; + char *buf = malloc(page_size); + BGZF *bgzf_out = bgzf_open("-", "w"); + ssize_t nread; + + if (!buf) { + RELEASE_TPOOL(tpool); + error("%s\n", strerror(errno)); + } + if (!bgzf_out) { + RELEASE_TPOOL(tpool); + error_errno("Couldn't open output stream"); + } + if (bgzf_thread_pool(bgzf_out, tpool, 0) < 0) { + hts_log_info("Could not set thread pool to output file!"); + } + while ( (nread=fread(buf,1,page_size-1,hdr))>0 ) + { + if ( nreaderrcode); + } + } + if ( ferror(hdr) ) { + RELEASE_TPOOL(tpool); + error_errno("Failed to read \"%s\"", header); + } + if ( fclose(hdr) ) { + RELEASE_TPOOL(tpool); + error_errno("Closing \"%s\" failed", header); + } + + // Output all remaining data read with the header block + if ( fp->block_length - skip_until > 0 ) + { + if (bgzf_write(bgzf_out, buffer+skip_until, fp->block_length-skip_until) < 0) { + RELEASE_TPOOL(tpool); + error_errno("Write error %d",fp->errcode); + } + } + if (bgzf_flush(bgzf_out) < 0) { + RELEASE_TPOOL(tpool); + error_errno("Write error %d", bgzf_out->errcode); + } + + while (1) + { + nread = bgzf_raw_read(fp, buf, page_size); + if ( nread<=0 ) break; + + int count = bgzf_raw_write(bgzf_out, buf, nread); + if (count != nread) { + RELEASE_TPOOL(tpool); + error_errno("Write failed, wrote %d instead of %d bytes", count,(int)nread); + } + } + if (nread < 0) { + RELEASE_TPOOL(tpool); + error_errno("Error reading \"%s\"", fname); + } + if (bgzf_close(bgzf_out) < 0) { + RELEASE_TPOOL(tpool); + error_errno("Error %d closing output", bgzf_out->errcode); + } + if (bgzf_close(fp) < 0) { + RELEASE_TPOOL(tpool); + error_errno("Error %d closing \"%s\"", bgzf_out->errcode, fname); + } + free(buf); + } + else { + RELEASE_TPOOL(tpool); + error("todo: reheader BCF, BAM\n"); // BCF is difficult, records contain pointers to the header. + } + RELEASE_TPOOL(tpool); + return 0; +} + +static int usage(FILE *fp, int status) +{ + fprintf(fp, "\n"); + fprintf(fp, "Version: %s\n", hts_version()); + fprintf(fp, "Usage: tabix [OPTIONS] [FILE] [REGION [...]]\n"); + fprintf(fp, "\n"); + fprintf(fp, "Indexing Options:\n"); + fprintf(fp, " -0, --zero-based coordinates are zero-based\n"); + fprintf(fp, " -b, --begin INT column number for region start [4]\n"); + fprintf(fp, " -c, --comment CHAR skip comment lines starting with CHAR [null]\n"); + fprintf(fp, " -C, --csi generate CSI index for VCF (default is TBI)\n"); + fprintf(fp, " -e, --end INT column number for region end (if no end, set INT to -b) [5]\n"); + fprintf(fp, " -f, --force overwrite existing index without asking\n"); + fprintf(fp, " -m, --min-shift INT set minimal interval size for CSI indices to 2^INT [14]\n"); + fprintf(fp, " -p, --preset STR gff, bed, sam, vcf, gaf\n"); + fprintf(fp, " -s, --sequence INT column number for sequence names (suppressed by -p) [1]\n"); + fprintf(fp, " -S, --skip-lines INT skip first INT lines [0]\n"); + fprintf(fp, "\n"); + fprintf(fp, "Querying and other options:\n"); + fprintf(fp, " -h, --print-header print also the header lines\n"); + fprintf(fp, " -H, --only-header print only the header lines\n"); + fprintf(fp, " -l, --list-chroms list chromosome names\n"); + fprintf(fp, " -r, --reheader FILE replace the header with the content of FILE\n"); + fprintf(fp, " -R, --regions FILE restrict to regions listed in the file\n"); + fprintf(fp, " -T, --targets FILE similar to -R but streams rather than index-jumps\n"); + fprintf(fp, " -D do not download the index file\n"); + fprintf(fp, " --cache INT set cache size to INT megabytes (0 disables) [10]\n"); + fprintf(fp, " --separate-regions separate the output by corresponding regions\n"); + fprintf(fp, " --verbosity INT set verbosity [3]\n"); + fprintf(fp, " -@, --threads INT number of additional threads to use [0]\n"); + fprintf(fp, "\n"); + return status; +} + +int main(int argc, char *argv[]) +{ + int c, detect = 1, min_shift = 0, is_force = 0, list_chroms = 0, do_csi = 0; + tbx_conf_t conf = tbx_conf_gff; + char *reheader = NULL; + args_t args; + memset(&args,0,sizeof(args_t)); + args.cache_megs = 10; + args.download_index = 1; + int32_t new_line_skip = -1; + + static const struct option loptions[] = + { + {"help", no_argument, NULL, 2}, + {"regions", required_argument, NULL, 'R'}, + {"targets", required_argument, NULL, 'T'}, + {"csi", no_argument, NULL, 'C'}, + {"zero-based", no_argument, NULL, '0'}, + {"print-header", no_argument, NULL, 'h'}, + {"only-header", no_argument, NULL, 'H'}, + {"begin", required_argument, NULL, 'b'}, + {"comment", required_argument, NULL, 'c'}, + {"end", required_argument, NULL, 'e'}, + {"force", no_argument, NULL, 'f'}, + {"min-shift", required_argument, NULL, 'm'}, + {"preset", required_argument, NULL, 'p'}, + {"sequence", required_argument, NULL, 's'}, + {"skip-lines", required_argument, NULL, 'S'}, + {"list-chroms", no_argument, NULL, 'l'}, + {"reheader", required_argument, NULL, 'r'}, + {"version", no_argument, NULL, 1}, + {"verbosity", required_argument, NULL, 3}, + {"cache", required_argument, NULL, 4}, + {"separate-regions", no_argument, NULL, 5}, + {"threads", required_argument, NULL, '@'}, + {NULL, 0, NULL, 0} + }; + + char *tmp; + while ((c = getopt_long(argc, argv, "hH?0b:c:e:fm:p:s:S:lr:CR:T:D@:", loptions,NULL)) >= 0) + { + switch (c) + { + case 'R': args.regions_fname = optarg; break; + case 'T': args.targets_fname = optarg; break; + case 'C': do_csi = 1; break; + case 'r': reheader = optarg; break; + case 'h': args.print_header = 1; break; + case 'H': args.print_header = 1; args.header_only = 1; break; + case 'l': list_chroms = 1; break; + case '0': conf.preset |= TBX_UCSC; detect = 0; break; + case 'b': + conf.bc = strtol(optarg,&tmp,10); + if ( *tmp ) error("Could not parse argument: -b %s\n", optarg); + detect = 0; + break; + case 'e': + conf.ec = strtol(optarg,&tmp,10); + if ( *tmp ) error("Could not parse argument: -e %s\n", optarg); + detect = 0; + break; + case 'c': conf.meta_char = *optarg; detect = 0; break; + case 'f': is_force = 1; break; + case 'm': + min_shift = strtol(optarg,&tmp,10); + if ( *tmp ) error("Could not parse argument: -m %s\n", optarg); + break; + case 'p': + detect = 0; + if (strcmp(optarg, "gff") == 0) conf = tbx_conf_gff; + else if (strcmp(optarg, "bed") == 0) conf = tbx_conf_bed; + else if (strcmp(optarg, "sam") == 0) conf = tbx_conf_sam; + else if (strcmp(optarg, "vcf") == 0) conf = tbx_conf_vcf; + else if (strcmp(optarg, "gaf") == 0) conf = tbx_conf_gaf; + else if (strcmp(optarg, "bcf") == 0) detect = 1; // bcf is autodetected, preset is not needed + else if (strcmp(optarg, "bam") == 0) detect = 1; // same as bcf + else error("The preset string not recognised: '%s'\n", optarg); + break; + case 's': + conf.sc = strtol(optarg,&tmp,10); + if ( *tmp ) error("Could not parse argument: -s %s\n", optarg); + detect = 0; + break; + case 'S': + new_line_skip = strtol(optarg,&tmp,10); + if ( *tmp ) error("Could not parse argument: -S %s\n", optarg); + detect = 0; + break; + case 'D': + args.download_index = 0; + break; + case 1: + printf( +"tabix (htslib) %s\n" +"Copyright (C) 2024 Genome Research Ltd.\n", hts_version()); + return EXIT_SUCCESS; + case 2: + return usage(stdout, EXIT_SUCCESS); + case 3: { + int v = atoi(optarg); + if (v < 0) v = 0; + hts_set_log_level(v); + break; + } + case 4: + args.cache_megs = atoi(optarg); + if (args.cache_megs < 0) { + args.cache_megs = 0; + } else if (args.cache_megs >= INT_MAX / 1048576) { + args.cache_megs = INT_MAX / 1048576; + } + break; + case 5: + args.separate_regs = 1; + break; + case '@': //thread count + args.threads = atoi(optarg); + break; + default: return usage(stderr, EXIT_FAILURE); + } + } + + if (new_line_skip >= 0) + conf.line_skip = new_line_skip; + + if ( optind==argc ) return usage(stderr, EXIT_FAILURE); + + if ( list_chroms ) + return query_chroms(argv[optind], args.download_index); + + char *fname = argv[optind]; + int ftype = file_type(fname); + if ( detect ) // no preset given + { + if ( ftype==IS_GFF ) conf = tbx_conf_gff; + else if ( ftype==IS_BED ) conf = tbx_conf_bed; + else if ( ftype==IS_GAF ) conf = tbx_conf_gaf; + else if ( ftype==IS_SAM ) conf = tbx_conf_sam; + else if ( ftype==IS_VCF ) + { + conf = tbx_conf_vcf; + if ( !min_shift && do_csi ) min_shift = 14; + } + else if ( ftype==IS_BCF ) + { + if ( !min_shift ) min_shift = 14; + } + else if ( ftype==IS_BAM ) + { + if ( !min_shift ) min_shift = 14; + } + } + if ( argc > optind+1 || args.header_only || args.regions_fname || args.targets_fname ) + { + int nregs = 0; + char **regs = NULL; + if ( !args.header_only ) + regs = parse_regions(args.regions_fname, argv+optind+1, argc-optind-1, &nregs); + return query_regions(&args, &conf, fname, regs, nregs); + } + if ( do_csi ) + { + if ( !min_shift ) min_shift = 14; + min_shift *= do_csi; // positive for CSIv2, negative for CSIv1 + } + if ( min_shift!=0 && !do_csi ) do_csi = 1; + + if ( reheader ) + return reheader_file(fname, reheader, ftype, &conf, args.threads); + + char *suffix = ".tbi"; + if ( do_csi ) suffix = ".csi"; + else if ( ftype==IS_BAM ) suffix = ".bai"; + else if ( ftype==IS_CRAM ) suffix = ".crai"; + + char *idx_fname = calloc(strlen(fname) + 6, 1); + if (!idx_fname) error("%s\n", strerror(errno)); + strcat(strcpy(idx_fname, fname), suffix); + + struct stat stat_tbi, stat_file; + if ( !is_force && stat(idx_fname, &stat_tbi)==0 ) + { + // Before complaining about existing index, check if the VCF file isn't + // newer. This is a common source of errors, people tend not to notice + // that tabix failed + stat(fname, &stat_file); + if ( stat_file.st_mtime <= stat_tbi.st_mtime ) + error("[tabix] the index file exists. Please use '-f' to overwrite.\n"); + } + free(idx_fname); + + int ret; + if ( ftype==IS_CRAM ) + { + if ( bam_index_build3(fname, min_shift, args.threads)!=0 ) error("bam_index_build failed: %s\n", fname); + return 0; + } + else if ( do_csi ) + { + if ( ftype==IS_BCF ) + { + if ( bcf_index_build3(fname, NULL, min_shift, args.threads)!=0 ) error("bcf_index_build failed: %s\n", fname); + return 0; + } + if ( ftype==IS_BAM ) + { + if ( bam_index_build3(fname, min_shift, args.threads)!=0 ) error("bam_index_build failed: %s\n", fname); + return 0; + } + + switch (ret = tbx_index_build3(fname, NULL, min_shift, args.threads, &conf)) + { + case 0: + return 0; + case -2: + error("[tabix] the compression of '%s' is not BGZF\n", fname); + default: + error("tbx_index_build3 failed: %s\n", fname); + } + } + else // TBI index + { + switch (ret = tbx_index_build3(fname, NULL, min_shift, args.threads, &conf)) + { + case 0: + return 0; + case -2: + error("[tabix] the compression of '%s' is not BGZF\n", fname); + default: + error("tbx_index_build3 failed: %s\n", fname); + } + } + + return 0; +} diff --git a/src/htslib-1.21/tbx.c b/src/htslib-1.21/tbx.c new file mode 100644 index 0000000..6625005 --- /dev/null +++ b/src/htslib-1.21/tbx.c @@ -0,0 +1,529 @@ +/* tbx.c -- tabix API functions. + + Copyright (C) 2009, 2010, 2012-2015, 2017-2020, 2022-2023 Genome Research Ltd. + Copyright (C) 2010-2012 Broad Institute. + + Author: Heng Li + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h +#include + +#include +#include +#include +#include +#include +#include "htslib/tbx.h" +#include "htslib/bgzf.h" +#include "htslib/hts_endian.h" +#include "hts_internal.h" + +#include "htslib/khash.h" +KHASH_DECLARE(s2i, kh_cstr_t, int64_t) + +HTSLIB_EXPORT +const tbx_conf_t tbx_conf_gff = { 0, 1, 4, 5, '#', 0 }; + +HTSLIB_EXPORT +const tbx_conf_t tbx_conf_bed = { TBX_UCSC, 1, 2, 3, '#', 0 }; + +HTSLIB_EXPORT +const tbx_conf_t tbx_conf_psltbl = { TBX_UCSC, 15, 17, 18, '#', 0 }; + +HTSLIB_EXPORT +const tbx_conf_t tbx_conf_sam = { TBX_SAM, 3, 4, 0, '@', 0 }; + +HTSLIB_EXPORT +const tbx_conf_t tbx_conf_vcf = { TBX_VCF, 1, 2, 0, '#', 0 }; +const tbx_conf_t tbx_conf_gaf = { TBX_GAF, 1, 6, 0, '#', 0 }; + +typedef struct { + int64_t beg, end; + char *ss, *se; + int tid; +} tbx_intv_t; + +static inline int get_tid(tbx_t *tbx, const char *ss, int is_add) +{ + khint_t k; + khash_t(s2i) *d; + if ((tbx->conf.preset&0xffff) == TBX_GAF) return(0); + if (tbx->dict == 0) tbx->dict = kh_init(s2i); + if (!tbx->dict) return -1; // Out of memory + d = (khash_t(s2i)*)tbx->dict; + if (is_add) { + int absent; + k = kh_put(s2i, d, ss, &absent); + if (absent < 0) { + return -1; // Out of memory + } else if (absent) { + char *ss_dup = strdup(ss); + if (ss_dup) { + kh_key(d, k) = ss_dup; + kh_val(d, k) = kh_size(d) - 1; + } else { + kh_del(s2i, d, k); + return -1; // Out of memory + } + } + } else k = kh_get(s2i, d, ss); + return k == kh_end(d)? -1 : kh_val(d, k); +} + +int tbx_name2id(tbx_t *tbx, const char *ss) +{ + return get_tid(tbx, ss, 0); +} + +int tbx_parse1(const tbx_conf_t *conf, size_t len, char *line, tbx_intv_t *intv) +{ + size_t i, b = 0; + int id = 1; + char *s; + intv->ss = intv->se = 0; intv->beg = intv->end = -1; + for (i = 0; i <= len; ++i) { + if (line[i] == '\t' || line[i] == 0) { + if (id == conf->sc) { + intv->ss = line + b; intv->se = line + i; + } else if (id == conf->bc) { + // here ->beg is 0-based. + if ((conf->preset&0xffff) == TBX_GAF){ + // if gaf find the smallest and largest node id + char *t; + int64_t nodeid = -1; + for (s = line + b + 1; s < line + i;) { + nodeid = strtoll(s, &t, 0); + if(intv->beg == -1){ + intv->beg = intv->end = nodeid; + } else { + if(nodeid < intv->beg){ + intv->beg = nodeid; + } + + if(nodeid > intv->end){ + intv->end = nodeid; + } + } + s = t + 1; + } + } else { + intv->beg = strtoll(line + b, &s, 0); + + if (conf->bc <= conf->ec) // don't overwrite an already set end point + intv->end = intv->beg; + + if ( s==line+b ) return -1; // expected int + + if (!(conf->preset&TBX_UCSC)) + --intv->beg; + else if (conf->bc <= conf->ec) + ++intv->end; + + if (intv->beg < 0) { + hts_log_warning("Coordinate <= 0 detected. " + "Did you forget to use the -0 option?"); + intv->beg = 0; + } + if (intv->end < 1) intv->end = 1; + } + } else { + if ((conf->preset&0xffff) == TBX_GENERIC) { + if (id == conf->ec) + { + intv->end = strtoll(line + b, &s, 0); + if ( s==line+b ) return -1; // expected int + } + } else if ((conf->preset&0xffff) == TBX_SAM) { + if (id == 6) { // CIGAR + int l = 0; + char *t; + for (s = line + b; s < line + i;) { + long x = strtol(s, &t, 10); + char op = toupper_c(*t); + if (op == 'M' || op == 'D' || op == 'N') l += x; + s = t + 1; + } + if (l == 0) l = 1; + intv->end = intv->beg + l; + } + } else if ((conf->preset&0xffff) == TBX_VCF) { + if (id == 4) { + if (b < i) intv->end = intv->beg + (i - b); + } else if (id == 8) { // look for "END=" + int c = line[i]; + line[i] = 0; + s = strstr(line + b, "END="); + if (s == line + b) s += 4; + else if (s) { + s = strstr(line + b, ";END="); + if (s) s += 5; + } + if (s && *s != '.') { + long long end = strtoll(s, &s, 0); + if (end <= intv->beg) { + static int reported = 0; + if (!reported) { + int l = intv->ss ? (int) (intv->se - intv->ss) : 0; + hts_log_warning("VCF INFO/END=%lld is smaller than POS at %.*s:%"PRIhts_pos"\n" + "This tag will be ignored. " + "Note: only one invalid END tag will be reported.", + end, l >= 0 ? l : 0, + intv->ss ? intv->ss : "", + intv->beg); + reported = 1; + } + } else { + intv->end = end; + } + } + line[i] = c; + } + } + } + b = i + 1; + ++id; + } + } + if (intv->ss == 0 || intv->se == 0 || intv->beg < 0 || intv->end < 0) return -1; + return 0; +} + +static inline int get_intv(tbx_t *tbx, kstring_t *str, tbx_intv_t *intv, int is_add) +{ + if (tbx_parse1(&tbx->conf, str->l, str->s, intv) == 0) { + int c = *intv->se; + *intv->se = '\0'; + if ((tbx->conf.preset&0xffff) == TBX_GAF){ + intv->tid = 0; + } else { + intv->tid = get_tid(tbx, intv->ss, is_add); + } + *intv->se = c; + if (intv->tid < 0) return -2; // get_tid out of memory + return (intv->beg >= 0 && intv->end >= 0)? 0 : -1; + } else { + char *type = NULL; + switch (tbx->conf.preset&0xffff) + { + case TBX_SAM: type = "TBX_SAM"; break; + case TBX_VCF: type = "TBX_VCF"; break; + case TBX_GAF: type = "TBX_GAF"; break; + case TBX_UCSC: type = "TBX_UCSC"; break; + default: type = "TBX_GENERIC"; break; + } + if (hts_is_utf16_text(str)) + hts_log_error("Failed to parse %s: offending line appears to be encoded as UTF-16", type); + else + hts_log_error("Failed to parse %s: was wrong -p [type] used?\nThe offending line was: \"%s\"", + type, str->s); + return -1; + } +} + +/* + * Called by tabix iterator to read the next record. + * Returns >= 0 on success + * -1 on EOF + * <= -2 on error + */ +int tbx_readrec(BGZF *fp, void *tbxv, void *sv, int *tid, hts_pos_t *beg, hts_pos_t *end) +{ + tbx_t *tbx = (tbx_t *) tbxv; + kstring_t *s = (kstring_t *) sv; + int ret; + if ((ret = bgzf_getline(fp, '\n', s)) >= 0) { + tbx_intv_t intv; + if (get_intv(tbx, s, &intv, 0) < 0) + return -2; + *tid = intv.tid; *beg = intv.beg; *end = intv.end; + } + return ret; +} + +static int tbx_set_meta(tbx_t *tbx) +{ + int i, l = 0, l_nm; + uint32_t x[7]; + char **name; + uint8_t *meta; + khint_t k; + khash_t(s2i) *d = (khash_t(s2i)*)tbx->dict; + + memcpy(x, &tbx->conf, 24); + name = (char**)malloc(sizeof(char*) * kh_size(d)); + if (!name) return -1; + for (k = kh_begin(d), l = 0; k != kh_end(d); ++k) { + if (!kh_exist(d, k)) continue; + name[kh_val(d, k)] = (char*)kh_key(d, k); + l += strlen(kh_key(d, k)) + 1; // +1 to include '\0' + } + l_nm = x[6] = l; + meta = (uint8_t*)malloc(l_nm + 28); + if (!meta) { free(name); return -1; } + if (ed_is_big()) + for (i = 0; i < 7; ++i) + x[i] = ed_swap_4(x[i]); + memcpy(meta, x, 28); + for (l = 28, i = 0; i < (int)kh_size(d); ++i) { + int x = strlen(name[i]) + 1; + memcpy(meta + l, name[i], x); + l += x; + } + free(name); + hts_idx_set_meta(tbx->idx, l, meta, 0); + return 0; +} + +// Minimal effort parser to extract reference length out of VCF header line +// This is used only used to adjust the number of levels if necessary, +// so not a major problem if it doesn't always work. +static void adjust_max_ref_len_vcf(const char *str, int64_t *max_ref_len) +{ + const char *ptr; + int64_t len; + if (strncmp(str, "##contig", 8) != 0) return; + ptr = strstr(str + 8, "length"); + if (!ptr) return; + for (ptr += 6; *ptr == ' ' || *ptr == '='; ptr++) {} + len = strtoll(ptr, NULL, 10); + if (*max_ref_len < len) *max_ref_len = len; +} + +// Same for sam files +static void adjust_max_ref_len_sam(const char *str, int64_t *max_ref_len) +{ + const char *ptr; + int64_t len; + if (strncmp(str, "@SQ", 3) != 0) return; + ptr = strstr(str + 3, "\tLN:"); + if (!ptr) return; + ptr += 4; + len = strtoll(ptr, NULL, 10); + if (*max_ref_len < len) *max_ref_len = len; +} + +// Adjusts number of levels if not big enough. This can happen for +// files with very large contigs. +static int adjust_n_lvls(int min_shift, int n_lvls, int64_t max_len) +{ + int64_t s = hts_bin_maxpos(min_shift, n_lvls); + max_len += 256; + for (; max_len > s; ++n_lvls, s <<= 3) {} + return n_lvls; +} + +tbx_t *tbx_index(BGZF *fp, int min_shift, const tbx_conf_t *conf) +{ + tbx_t *tbx; + kstring_t str; + int ret, first = 0, n_lvls, fmt; + int64_t lineno = 0; + uint64_t last_off = 0; + tbx_intv_t intv; + int64_t max_ref_len = 0; + + str.s = 0; str.l = str.m = 0; + tbx = (tbx_t*)calloc(1, sizeof(tbx_t)); + if (!tbx) return NULL; + tbx->conf = *conf; + if (min_shift > 0) n_lvls = (TBX_MAX_SHIFT - min_shift + 2) / 3, fmt = HTS_FMT_CSI; + else min_shift = 14, n_lvls = 5, fmt = HTS_FMT_TBI; + while ((ret = bgzf_getline(fp, '\n', &str)) >= 0) { + ++lineno; + if (str.s[0] == tbx->conf.meta_char && fmt == HTS_FMT_CSI) { + switch (tbx->conf.preset) { + case TBX_SAM: + adjust_max_ref_len_sam(str.s, &max_ref_len); break; + case TBX_VCF: + adjust_max_ref_len_vcf(str.s, &max_ref_len); break; + default: + break; + } + } + if (lineno <= tbx->conf.line_skip || str.s[0] == tbx->conf.meta_char) { + last_off = bgzf_tell(fp); + continue; + } + if (first == 0) { + if (fmt == HTS_FMT_CSI) { + if (!max_ref_len) + max_ref_len = (int64_t)100*1024*1024*1024; // 100G default + n_lvls = adjust_n_lvls(min_shift, n_lvls, max_ref_len); + } + tbx->idx = hts_idx_init(0, fmt, last_off, min_shift, n_lvls); + if (!tbx->idx) goto fail; + first = 1; + } + ret = get_intv(tbx, &str, &intv, 1); + if (ret < -1) goto fail; // Out of memory + if (ret < 0) continue; // Skip unparsable lines + if (hts_idx_push(tbx->idx, intv.tid, intv.beg, intv.end, + bgzf_tell(fp), 1) < 0) { + goto fail; + } + } + if (ret < -1) goto fail; + if ( !tbx->idx ) tbx->idx = hts_idx_init(0, fmt, last_off, min_shift, n_lvls); // empty file + if (!tbx->idx) goto fail; + if ( !tbx->dict ) tbx->dict = kh_init(s2i); + if (!tbx->dict) goto fail; + if (hts_idx_finish(tbx->idx, bgzf_tell(fp)) != 0) goto fail; + if (tbx_set_meta(tbx) != 0) goto fail; + free(str.s); + return tbx; + + fail: + free(str.s); + tbx_destroy(tbx); + return NULL; +} + +void tbx_destroy(tbx_t *tbx) +{ + khash_t(s2i) *d = (khash_t(s2i)*)tbx->dict; + if (d != NULL) + { + khint_t k; + for (k = kh_begin(d); k != kh_end(d); ++k) + if (kh_exist(d, k)) free((char*)kh_key(d, k)); + } + hts_idx_destroy(tbx->idx); + kh_destroy(s2i, d); + free(tbx); +} + +int tbx_index_build3(const char *fn, const char *fnidx, int min_shift, int n_threads, const tbx_conf_t *conf) +{ + tbx_t *tbx; + BGZF *fp; + int ret; + if ((fp = bgzf_open(fn, "r")) == 0) return -1; + if ( n_threads ) bgzf_mt(fp, n_threads, 256); + if ( bgzf_compression(fp) != bgzf ) { bgzf_close(fp); return -2; } + tbx = tbx_index(fp, min_shift, conf); + bgzf_close(fp); + if ( !tbx ) return -1; + ret = hts_idx_save_as(tbx->idx, fn, fnidx, min_shift > 0? HTS_FMT_CSI : HTS_FMT_TBI); + tbx_destroy(tbx); + return ret; +} + +int tbx_index_build2(const char *fn, const char *fnidx, int min_shift, const tbx_conf_t *conf) +{ + return tbx_index_build3(fn, fnidx, min_shift, 0, conf); +} + +int tbx_index_build(const char *fn, int min_shift, const tbx_conf_t *conf) +{ + return tbx_index_build3(fn, NULL, min_shift, 0, conf); +} + +static tbx_t *index_load(const char *fn, const char *fnidx, int flags) +{ + tbx_t *tbx; + uint8_t *meta; + char *nm, *p; + uint32_t l_meta, l_nm; + tbx = (tbx_t*)calloc(1, sizeof(tbx_t)); + if (!tbx) + return NULL; + tbx->idx = hts_idx_load3(fn, fnidx, HTS_FMT_TBI, flags); + if ( !tbx->idx ) + { + free(tbx); + return NULL; + } + meta = hts_idx_get_meta(tbx->idx, &l_meta); + if ( !meta || l_meta < 28) goto invalid; + + tbx->conf.preset = le_to_i32(&meta[0]); + tbx->conf.sc = le_to_i32(&meta[4]); + tbx->conf.bc = le_to_i32(&meta[8]); + tbx->conf.ec = le_to_i32(&meta[12]); + tbx->conf.meta_char = le_to_i32(&meta[16]); + tbx->conf.line_skip = le_to_i32(&meta[20]); + l_nm = le_to_u32(&meta[24]); + if (l_nm > l_meta - 28) goto invalid; + + p = nm = (char*)meta + 28; + // This assumes meta is NUL-terminated, so we can merrily strlen away. + // hts_idx_load_local() assures this for us by adding a NUL on the end + // of whatever it reads. + for (; p - nm < l_nm; p += strlen(p) + 1) { + if (get_tid(tbx, p, 1) < 0) { + hts_log_error("%s", strerror(errno)); + goto fail; + } + } + return tbx; + + invalid: + hts_log_error("Invalid index header for %s", fnidx ? fnidx : fn); + + fail: + tbx_destroy(tbx); + return NULL; +} + +tbx_t *tbx_index_load3(const char *fn, const char *fnidx, int flags) +{ + return index_load(fn, fnidx, flags); +} + +tbx_t *tbx_index_load2(const char *fn, const char *fnidx) +{ + return index_load(fn, fnidx, 1); +} + +tbx_t *tbx_index_load(const char *fn) +{ + return index_load(fn, NULL, 1); +} + +const char **tbx_seqnames(tbx_t *tbx, int *n) +{ + khash_t(s2i) *d = (khash_t(s2i)*)tbx->dict; + if (d == NULL) + { + *n = 0; + return calloc(1, sizeof(char *)); + } + int tid, m = kh_size(d); + const char **names = (const char**) calloc(m,sizeof(const char*)); + khint_t k; + if (!names) { + *n = 0; + return NULL; + } + for (k=kh_begin(d); kSheila +GCTAGCTCAGAAAAAAAAAA diff --git a/src/htslib-1.21/test/auxf.fa.fai b/src/htslib-1.21/test/auxf.fa.fai new file mode 100644 index 0000000..f3cdedb --- /dev/null +++ b/src/htslib-1.21/test/auxf.fa.fai @@ -0,0 +1 @@ +Sheila 20 8 20 21 diff --git a/src/htslib-1.21/test/base_mods/MM-MNf1.sam b/src/htslib-1.21/test/base_mods/MM-MNf1.sam new file mode 100644 index 0000000..f973d27 --- /dev/null +++ b/src/htslib-1.21/test/base_mods/MM-MNf1.sam @@ -0,0 +1,5 @@ +@SQ SN:I LN:999 +r1 0 I 1 0 36M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:C+m,2,2,1,4,1;C+h,6,7;N+n,15,2; Ml:B:C,128,153,179,204,230,159,6,215,240 MN:i:37 +r1- 16 I 1 0 36M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:G-m,0,1,4,1,2;G-h,0,7;N-n,17,2; Ml:B:C,230,204,179,153,128,6,159,240,215 +r2 0 I 4 0 3S33M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:C+m,2,2,1,4,1;C+h,6,7;N+n,15,2; Ml:B:C,128,153,179,204,230,159,6,215,240 +r3 0 I 11 0 10S20M6S * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:C+mh,2,2,0,0,4,1;N+n,15,2; Ml:B:C,128,0,153,0,0,159,179,0,204,0,230,6,215,240 MN:i:36 diff --git a/src/htslib-1.21/test/base_mods/MM-MNf2.sam b/src/htslib-1.21/test/base_mods/MM-MNf2.sam new file mode 100644 index 0000000..a889241 --- /dev/null +++ b/src/htslib-1.21/test/base_mods/MM-MNf2.sam @@ -0,0 +1,5 @@ +@SQ SN:I LN:999 +r1 0 I 1 0 36M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:C+m,2,2,1,4,1;C+h,6,7;N+n,15,2; Ml:B:C,128,153,179,204,230,159,6,215,240 MN:i:36 +r1- 16 I 1 0 36M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:G-m,0,1,4,1,2;G-h,0,7;N-n,17,2; Ml:B:C,230,204,179,153,128,6,159,240,215 +r2 0 I 4 0 3S33M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:C+m,2,2,1,4,1;C+h,6,7;N+n,15,2; Ml:B:C,128,153,179,204,230,159,6,215,240 +r3 0 I 11 0 10S20M6S * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:C+mh,2,2,0,0,4,1;N+n,15,2; Ml:B:C,128,0,153,0,0,159,179,0,204,0,230,6,215,240 MN:f:36 diff --git a/src/htslib-1.21/test/base_mods/MM-MNp.sam b/src/htslib-1.21/test/base_mods/MM-MNp.sam new file mode 100644 index 0000000..7bdca0f --- /dev/null +++ b/src/htslib-1.21/test/base_mods/MM-MNp.sam @@ -0,0 +1,5 @@ +@SQ SN:I LN:999 +r1 0 I 1 0 36M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:C+m,2,2,1,4,1;C+h,6,7;N+n,15,2; Ml:B:C,128,153,179,204,230,159,6,215,240 MN:i:36 +r1- 16 I 1 0 36M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:G-m,0,1,4,1,2;G-h,0,7;N-n,17,2; Ml:B:C,230,204,179,153,128,6,159,240,215 +r2 0 I 4 0 3S33M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:C+m,2,2,1,4,1;C+h,6,7;N+n,15,2; Ml:B:C,128,153,179,204,230,159,6,215,240 +r3 0 I 11 0 10S20M6S * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:C+mh,2,2,0,0,4,1;N+n,15,2; Ml:B:C,128,0,153,0,0,159,179,0,204,0,230,6,215,240 MN:i:36 diff --git a/src/htslib-1.21/test/base_mods/MM-bounds+.sam b/src/htslib-1.21/test/base_mods/MM-bounds+.sam new file mode 100644 index 0000000..03a112d --- /dev/null +++ b/src/htslib-1.21/test/base_mods/MM-bounds+.sam @@ -0,0 +1,2 @@ +@SQ SN:I LN:999 +r1 0 I 1 0 36M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:C+m,2,2,1,4,1,0;C+h,6,7;N+n,15,2; Ml:B:C,128,153,179,204,230,0,159,6,215,240 diff --git a/src/htslib-1.21/test/base_mods/MM-bounds-.sam b/src/htslib-1.21/test/base_mods/MM-bounds-.sam new file mode 100644 index 0000000..3f54798 --- /dev/null +++ b/src/htslib-1.21/test/base_mods/MM-bounds-.sam @@ -0,0 +1,2 @@ +@SQ SN:I LN:999 +r1- 16 I 1 0 36M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:G-m,0,1,4,1,2,2;G-h,0,7;N-n,17,2; Ml:B:C,230,204,179,153,128,0,6,159,240,215 diff --git a/src/htslib-1.21/test/base_mods/MM-chebi.out b/src/htslib-1.21/test/base_mods/MM-chebi.out new file mode 100644 index 0000000..89970dd --- /dev/null +++ b/src/htslib-1.21/test/base_mods/MM-chebi.out @@ -0,0 +1,48 @@ +0 A +1 G +2 C +3 T +4 C +5 T +6 C C+m102 +7 C +8 A +9 G +10 A +11 G +12 T +13 C +14 G +15 N N+n212 +16 A +17 C C+m128 +18 G +19 C C+(76792)161 +20 C C+m153 +21 A +22 T +23 Y +24 C +25 G +26 C +27 G +28 C +29 G +30 C +31 C C+m179 +32 A +33 C +34 C C+m204 C+(76792)33 +35 A +--- +Present: m. #-76792. n. +6 C C+m102 +15 N N+n212 +17 C C+m128 +19 C C+(76792)161 +20 C C+m153 +31 C C+m179 +34 C C+m204 C+(76792)33 + +=== + diff --git a/src/htslib-1.21/test/base_mods/MM-chebi.sam b/src/htslib-1.21/test/base_mods/MM-chebi.sam new file mode 100644 index 0000000..475a7d5 --- /dev/null +++ b/src/htslib-1.21/test/base_mods/MM-chebi.sam @@ -0,0 +1,2 @@ +@CO Separate m, h and N modifications +* 0 * 0 0 * * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA * Mm:Z:C+m,2,2,1,4,1;C+76792,6,7;N+n,15; Ml:B:C,102,128,153,179,204,161,33,212 diff --git a/src/htslib-1.21/test/base_mods/MM-double.out b/src/htslib-1.21/test/base_mods/MM-double.out new file mode 100644 index 0000000..431dfff --- /dev/null +++ b/src/htslib-1.21/test/base_mods/MM-double.out @@ -0,0 +1,48 @@ +0 A +1 G G-m115 +2 G +3 A +4 T +5 C +6 T +7 C C+m128 +8 T +9 A +10 G +11 C +12 G G-m141 +13 G G-m166 G+o102 +14 A +15 T +16 C +17 G +18 G +19 C +20 G +21 G +22 G G-m192 +23 G +24 G +25 A +26 T +27 A +28 T +29 G +30 C C+m153 +31 C C+m179 +32 A +33 T +34 A +35 T +--- +Present: m. m. o. +1 G G-m115 +7 C C+m128 +12 G G-m141 +13 G G-m166 G+o102 +22 G G-m192 +30 C C+m153 +31 C C+m179 + +=== + diff --git a/src/htslib-1.21/test/base_mods/MM-double.sam b/src/htslib-1.21/test/base_mods/MM-double.sam new file mode 100644 index 0000000..608516f --- /dev/null +++ b/src/htslib-1.21/test/base_mods/MM-double.sam @@ -0,0 +1,3 @@ +@CO Modifications called on both strands of the same record, +@CO including potentially at the same location simultaneously. +* 0 * 0 0 * * 0 0 AGGATCTCTAGCGGATCGGCGGGGGATATGCCATAT * Mm:Z:C+m,1,3,0;G-m,0,2,0,4;G+o,4; Ml:B:C,128,153,179,115,141,166,192,102 diff --git a/src/htslib-1.21/test/base_mods/MM-explicit-f.out b/src/htslib-1.21/test/base_mods/MM-explicit-f.out new file mode 100644 index 0000000..0f7326c --- /dev/null +++ b/src/htslib-1.21/test/base_mods/MM-explicit-f.out @@ -0,0 +1,111 @@ +0 A +1 T +2 C +3 A +4 T +5 C +6 A +7 T +8 T +9 C C+m200 C+h10 +10 C C+m50 C+h170 +11 T +12 A +13 C +14 C C+m160 C+h20 +15 G +16 C +17 T +18 A +19 T +20 A +21 G +22 C +23 C +24 T +--- +Present: m. h. +9 C C+m200 C+h10 +10 C C+m50 C+h170 +14 C C+m160 C+h20 + +=== + +0 A +1 T +2 C C+m# C+h# +3 A +4 T +5 C C+m# C+h# +6 A +7 T +8 T +9 C C+m200 C+h10 +10 C C+m50 C+h170 +11 T +12 A +13 C C+m10 C+h5 +14 C C+m160 C+h20 +15 G +16 C C+m10 C+h5 +17 T +18 A +19 T +20 A +21 G +22 C C+m# C+h# +23 C C+m# C+h# +24 T +--- +Present: m? h? +2 C C+m# C+h# +5 C C+m# C+h# +9 C C+m200 C+h10 +10 C C+m50 C+h170 +13 C C+m10 C+h5 +14 C C+m160 C+h20 +16 C C+m10 C+h5 +22 C C+m# C+h# +23 C C+m# C+h# + +=== + +0 A +1 T +2 C C+h# +3 A +4 T +5 C C+h# +6 A +7 T +8 T +9 C C+m200 C+h10 +10 C C+h170 +11 T +12 A +13 C C+h5 +14 C C+m160 C+h20 +15 G +16 C C+h5 +17 T +18 A +19 T +20 A +21 G +22 C C+h# +23 C C+h# +24 T +--- +Present: m. h? +2 C C+h# +5 C C+h# +9 C C+m200 C+h10 +10 C C+h170 +13 C C+h5 +14 C C+m160 C+h20 +16 C C+h5 +22 C C+h# +23 C C+h# + +=== + diff --git a/src/htslib-1.21/test/base_mods/MM-explicit-x.out b/src/htslib-1.21/test/base_mods/MM-explicit-x.out new file mode 100644 index 0000000..8acfbf2 --- /dev/null +++ b/src/htslib-1.21/test/base_mods/MM-explicit-x.out @@ -0,0 +1,103 @@ +0 A +1 T +2 C +3 A +4 T +5 C +6 A +7 T +8 T +9 C C+m.200 C+h.10 +10 C C+m.50 C+h.170 +11 T +12 A +13 C +14 C C+m.160 C+h.20 +15 G +16 C +17 T +18 A +19 T +20 A +21 G +22 C +23 C +24 T +--- +Present: m. h. +9 C C+m200 C+h10 +10 C C+m50 C+h170 +14 C C+m160 C+h20 + +=== + +0 A +1 T +2 C +3 A +4 T +5 C +6 A +7 T +8 T +9 C C+m?200 C+h?10 +10 C C+m?50 C+h?170 +11 T +12 A +13 C C+m?10 C+h?5 +14 C C+m?160 C+h?20 +15 G +16 C C+m?10 C+h?5 +17 T +18 A +19 T +20 A +21 G +22 C +23 C +24 T +--- +Present: m? h? +9 C C+m200 C+h10 +10 C C+m50 C+h170 +13 C C+m10 C+h5 +14 C C+m160 C+h20 +16 C C+m10 C+h5 + +=== + +0 A +1 T +2 C +3 A +4 T +5 C +6 A +7 T +8 T +9 C C+m.200 C+h?10 +10 C C+h?170 +11 T +12 A +13 C C+h?5 +14 C C+m.160 C+h?20 +15 G +16 C C+h?5 +17 T +18 A +19 T +20 A +21 G +22 C +23 C +24 T +--- +Present: m. h? +9 C C+m200 C+h10 +10 C C+h170 +13 C C+h5 +14 C C+m160 C+h20 +16 C C+h5 + +=== + diff --git a/src/htslib-1.21/test/base_mods/MM-explicit.out b/src/htslib-1.21/test/base_mods/MM-explicit.out new file mode 100644 index 0000000..0f3701f --- /dev/null +++ b/src/htslib-1.21/test/base_mods/MM-explicit.out @@ -0,0 +1,103 @@ +0 A +1 T +2 C +3 A +4 T +5 C +6 A +7 T +8 T +9 C C+m200 C+h10 +10 C C+m50 C+h170 +11 T +12 A +13 C +14 C C+m160 C+h20 +15 G +16 C +17 T +18 A +19 T +20 A +21 G +22 C +23 C +24 T +--- +Present: m. h. +9 C C+m200 C+h10 +10 C C+m50 C+h170 +14 C C+m160 C+h20 + +=== + +0 A +1 T +2 C +3 A +4 T +5 C +6 A +7 T +8 T +9 C C+m200 C+h10 +10 C C+m50 C+h170 +11 T +12 A +13 C C+m10 C+h5 +14 C C+m160 C+h20 +15 G +16 C C+m10 C+h5 +17 T +18 A +19 T +20 A +21 G +22 C +23 C +24 T +--- +Present: m? h? +9 C C+m200 C+h10 +10 C C+m50 C+h170 +13 C C+m10 C+h5 +14 C C+m160 C+h20 +16 C C+m10 C+h5 + +=== + +0 A +1 T +2 C +3 A +4 T +5 C +6 A +7 T +8 T +9 C C+m200 C+h10 +10 C C+h170 +11 T +12 A +13 C C+h5 +14 C C+m160 C+h20 +15 G +16 C C+h5 +17 T +18 A +19 T +20 A +21 G +22 C +23 C +24 T +--- +Present: m. h? +9 C C+m200 C+h10 +10 C C+h170 +13 C C+h5 +14 C C+m160 C+h20 +16 C C+h5 + +=== + diff --git a/src/htslib-1.21/test/base_mods/MM-explicit.sam b/src/htslib-1.21/test/base_mods/MM-explicit.sam new file mode 100644 index 0000000..c230a9d --- /dev/null +++ b/src/htslib-1.21/test/base_mods/MM-explicit.sam @@ -0,0 +1,27 @@ +@CO Testing explicit vs implicit base modifications. +@CO This covers the case where a lack of a signal could be either +@CO implicitly assumed to be no-mod (default) or assumed to be +@CO unchecked and require an explicit statement to indicate it was +@CO looked at and no base modification was observed. +@CO +@CO ATCATCATTCCTACCGCTATAGCCT r1; implicit +@CO - - .. -. - -- +@CO Mm M +@CO - - .. -. - -- +@CO hH h +@CO +@CO ATCATCATTCCTACCGCTATAGCCT r2; explicit to a small region +@CO - - ?? ?? ? -- +@CO Mm mM m +@CO - - ?? ?? ? -- +@CO hH hh h +@CO +@CO ATCATCATTCCTACCGCTATAGCCT r3; mixture +@CO - - . -. - -- +@CO M M +@CO - - ?? ?? ? -- +@CO hH hh h -- +@CO +r1 0 * 0 0 * * 0 0 ATCATCATTCCTACCGCTATAGCCT * Mm:Z:C+mh,2,0,1; Ml:B:C,200,10,50,170,160,20 +r2 0 * 0 0 * * 0 0 ATCATCATTCCTACCGCTATAGCCT * Mm:Z:C+mh?,2,0,0,0,0; Ml:B:C,200,10,50,170,10,5,160,20,10,5 +r3 0 * 0 0 * * 0 0 ATCATCATTCCTACCGCTATAGCCT * Mm:Z:C+m.,2,2;C+h?,2,0,0,0,0; Ml:B:C,200,160,10,170,5,20,5 diff --git a/src/htslib-1.21/test/base_mods/MM-multi.out b/src/htslib-1.21/test/base_mods/MM-multi.out new file mode 100644 index 0000000..41054a7 --- /dev/null +++ b/src/htslib-1.21/test/base_mods/MM-multi.out @@ -0,0 +1,97 @@ +0 A +1 G +2 C +3 T +4 C +5 T +6 C C+m128 +7 C +8 A +9 G +10 A +11 G +12 T +13 C +14 G +15 N N+n215 +16 A +17 C C+m153 +18 G N+n240 +19 C C+h159 +20 C C+m179 +21 A +22 T +23 Y +24 C +25 G +26 C +27 G +28 C +29 G +30 C +31 C C+m204 +32 A +33 C +34 C C+m230 C+h6 +35 A +--- +Present: m. h. n. +6 C C+m128 +15 N N+n215 +17 C C+m153 +18 G N+n240 +19 C C+h159 +20 C C+m179 +31 C C+m204 +34 C C+m230 C+h6 + +=== + +0 A +1 G +2 C +3 T +4 C +5 T +6 C C+m77 C+h159 +7 C +8 A +9 G +10 A +11 G +12 T +13 C +14 G +15 N N+n240 +16 A +17 C C+m103 C+h133 +18 G +19 C C+m128 C+h108 +20 C C+m154 C+h82 +21 A +22 T +23 Y +24 C +25 G +26 C +27 G +28 C +29 G +30 C +31 C C+m179 C+h57 +32 A +33 C +34 C C+m204 C+h31 +35 A +--- +Present: m. h. n. +6 C C+m77 C+h159 +15 N N+n240 +17 C C+m103 C+h133 +19 C C+m128 C+h108 +20 C C+m154 C+h82 +31 C C+m179 C+h57 +34 C C+m204 C+h31 + +=== + diff --git a/src/htslib-1.21/test/base_mods/MM-multi.sam b/src/htslib-1.21/test/base_mods/MM-multi.sam new file mode 100644 index 0000000..ac2831b --- /dev/null +++ b/src/htslib-1.21/test/base_mods/MM-multi.sam @@ -0,0 +1,7 @@ +@CO Testing multiple m, h and N modifications on the same read. +@CO r1 has them separated out. +@CO r2 has them combined together, for example as produced by +@CO a joint basecaller which assigns probabilities to all +@CO trained events simultaneously. +r1 0 * 0 0 * * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA * Mm:Z:C+m,2,2,1,4,1;C+h,6,7;N+n,15,2; Ml:B:C,128,153,179,204,230,159,6,215,240 MN:i:36 +r2 0 * 0 0 * * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA * Mm:Z:C+mh,2,2,0,0,4,1;N+n,15; Ml:B:C,77,159,103,133,128,108,154,82,179,57,204,31,240 diff --git a/src/htslib-1.21/test/base_mods/MM-not-all-modded.out b/src/htslib-1.21/test/base_mods/MM-not-all-modded.out new file mode 100644 index 0000000..64fc847 --- /dev/null +++ b/src/htslib-1.21/test/base_mods/MM-not-all-modded.out @@ -0,0 +1,180 @@ +0 A +1 G +2 C +3 T +4 C +5 T +6 C C+m128 +7 C +8 A +9 G +10 A +11 G +12 T +13 C +14 G +15 N N+n215 +16 A +17 C C+m153 +18 G N+n240 +19 C C+h159 +20 C C+m179 +21 A +22 T +23 Y +24 C +25 G +26 C +27 G +28 C +29 G +30 C +31 C C+m204 +32 A +33 C +34 C C+m230 C+h6 +35 A +--- +Present: m. h. n. +6 C C+m128 +15 N N+n215 +17 C C+m153 +18 G N+n240 +19 C C+h159 +20 C C+m179 +31 C C+m204 +34 C C+m230 C+h6 + +=== + +0 A +1 G +2 C +3 T +4 C +5 T +6 C +7 C +8 A +9 G +10 A +11 G +12 T +13 C +14 G +15 N +16 A +17 C +18 G +19 C +20 C +21 A +22 T +23 Y +24 C +25 G +26 C +27 G +28 C +29 G +30 C +31 C +32 A +33 C +34 C +35 A +--- +Present: + +=== + +0 A +1 G +2 C +3 T +4 C +5 T +6 C C+m128 +7 C +8 A +9 G +10 A +11 G +12 T +13 C +14 G +15 N N+n215 +16 A +17 C C+m153 +18 G N+n240 +19 C C+h159 +20 C C+m179 +21 A +22 T +23 Y +24 C +25 G +26 C +27 G +28 C +29 G +30 C +31 C C+m204 +32 A +33 C +34 C C+m230 C+h6 +35 A +--- +Present: m. h. n. +6 C C+m128 +15 N N+n215 +17 C C+m153 +18 G N+n240 +19 C C+h159 +20 C C+m179 +31 C C+m204 +34 C C+m230 C+h6 + +=== + +0 A +1 G +2 C +3 T +4 C +5 T +6 C +7 C +8 A +9 G +10 A +11 G +12 T +13 C +14 G +15 N +16 A +17 C +18 G +19 C +20 C +21 A +22 T +23 Y +24 C +25 G +26 C +27 G +28 C +29 G +30 C +31 C +32 A +33 C +34 C +35 A +--- +Present: + +=== + diff --git a/src/htslib-1.21/test/base_mods/MM-not-all-modded.sam b/src/htslib-1.21/test/base_mods/MM-not-all-modded.sam new file mode 100644 index 0000000..0858c76 --- /dev/null +++ b/src/htslib-1.21/test/base_mods/MM-not-all-modded.sam @@ -0,0 +1,5 @@ +@SQ SN:I LN:999 +r1 0 I 1 0 36M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:C+m,2,2,1,4,1;C+h,6,7;N+n,15,2; Ml:B:C,128,153,179,204,230,159,6,215,240 +r1b 0 I 1 0 36M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A +r2 0 I 4 0 3S33M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:C+m,2,2,1,4,1;C+h,6,7;N+n,15,2; Ml:B:C,128,153,179,204,230,159,6,215,240 +r2b 0 I 4 0 3S33M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A diff --git a/src/htslib-1.21/test/base_mods/MM-orient.sam b/src/htslib-1.21/test/base_mods/MM-orient.sam new file mode 100644 index 0000000..363e7c2 --- /dev/null +++ b/src/htslib-1.21/test/base_mods/MM-orient.sam @@ -0,0 +1,6 @@ +@CO Testing mods on top and bottom strand, but also in +@CO original vs reverse-complemented orientation +top-fwd 0 * 0 0 * * 0 0 AGGATCTCTAGCGGATCGGCGGGGGATATGCCATAT * Mm:Z:C+m,1,3,0; Ml:B:C,128,153,179 +top-rev 16 * 0 0 * * 0 0 ATATGGCATATCCCCCGCCGATCCGCTAGAGATCCT * Mm:Z:C+m,1,3,0; Ml:B:C,128,153,179 +bot-fwd 0 * 0 0 * * 0 0 AGGATCTCTAGCGGATCGGCGGGGGATATGCCATAT * Mm:Z:G-m,0,0,4,3; Ml:B:C,115,141,166,192 +bot-rev 16 * 0 0 * * 0 0 ATATGGCATATCCCCCGCCGATCCGCTAGAGATCCT * Mm:Z:G-m,0,0,4,3; Ml:B:C,115,141,166,192 diff --git a/src/htslib-1.21/test/base_mods/MM-pileup.out b/src/htslib-1.21/test/base_mods/MM-pileup.out new file mode 100644 index 0000000..8c603d6 --- /dev/null +++ b/src/htslib-1.21/test/base_mods/MM-pileup.out @@ -0,0 +1,36 @@ +I 0 AA DD +I 1 GG FF +I 2 CC ?? +I 3 TTT GGG +I 4 CCC CCC +I 5 TTT HHH +I 6 C[+m128]C[-m128]C[+m128] 888 +I 7 CCC 888 +I 8 AAA ... +I 9 GGG EEE +I 10 AAAA GGGG +I 11 GGGG 8888 +I 12 TTTT .... +I 13 CCCC 7777 +I 14 GGGG @@@@ +I 15 N[+n215]N[-n215]N[+n215]N[+n215] EEEE +I 16 AAAA 9999 +I 17 C[+m153]C[-m153]C[+m153]C[+m153+h0] GGGG +I 18 G[+n240]G[-n240]G[+n240]G[+n240] 8888 +I 19 C[+h159]C[-h159]C[+h159]C[+m0+h159] AAAA +I 20 C[+m179]C[-m179]C[+m179]C[+m179+h0] ???? +I 21 AAAA HHHH +I 22 TTTT 9999 +I 23 YYYY .... +I 24 CCCC :::: +I 25 GGGG CCCC +I 26 CCCC ???? +I 27 GGGG 8888 +I 28 CCCC ,,,, +I 29 GGGG @@@@ +I 30 CCC ,,, +I 31 C[+m204]C[-m204]C[+m204] ,,, +I 32 AAA 999 +I 33 CCC FFF +I 34 C[+m230+h6]C[-m230-h6]C[+m230+h6] @@@ +I 35 AAA AAA diff --git a/src/htslib-1.21/test/base_mods/MM-pileup.sam b/src/htslib-1.21/test/base_mods/MM-pileup.sam new file mode 100644 index 0000000..0021c72 --- /dev/null +++ b/src/htslib-1.21/test/base_mods/MM-pileup.sam @@ -0,0 +1,5 @@ +@SQ SN:I LN:999 +r1 0 I 1 0 36M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:C+m,2,2,1,4,1;C+h,6,7;N+n,15,2; Ml:B:C,128,153,179,204,230,159,6,215,240 +r1- 16 I 1 0 36M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:G-m,0,1,4,1,2;G-h,0,7;N-n,17,2; Ml:B:C,230,204,179,153,128,6,159,240,215 +r2 0 I 4 0 3S33M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:C+m,2,2,1,4,1;C+h,6,7;N+n,15,2; Ml:B:C,128,153,179,204,230,159,6,215,240 +r3 0 I 11 0 10S20M6S * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:C+mh,2,2,0,0,4,1;N+n,15,2; Ml:B:C,128,0,153,0,0,159,179,0,204,0,230,6,215,240 diff --git a/src/htslib-1.21/test/base_mods/MM-pileup2.out b/src/htslib-1.21/test/base_mods/MM-pileup2.out new file mode 100644 index 0000000..51e6973 --- /dev/null +++ b/src/htslib-1.21/test/base_mods/MM-pileup2.out @@ -0,0 +1,36 @@ +I 0 AA D~ +I 1 GG F~ +I 2 CC ?~ +I 3 TT G~ +I 4 CC C~ +I 5 TT H~ +I 6 C[+m128]C[+m77+h159] 8~ +I 7 CC 8~ +I 8 AA .~ +I 9 GG E~ +I 10 AA G~ +I 11 GG 8~ +I 12 TT .~ +I 13 CC 7~ +I 14 GG @~ +I 15 NN[+n240] E~ +I 16 AA 9~ +I 17 C[+m153]C[+m103+h133] G~ +I 18 GG 8~ +I 19 C[+(76792)159]C[+m128+h108] A~ +I 20 C[+m179]C[+m154+h82] ?~ +I 21 AA H~ +I 22 TT 9~ +I 23 YY .~ +I 24 CC :~ +I 25 GG C~ +I 26 CC ?~ +I 27 GG 8~ +I 28 CC ,~ +I 29 GG @~ +I 30 CC ,~ +I 31 C[+m204]C[+m179+h57] ,~ +I 32 AA 9~ +I 33 CC F~ +I 34 C[+m230+(76792)6]C[+m204+h31] @~ +I 35 AA A~ diff --git a/src/htslib-1.21/test/base_mods/MM-pileup2.sam b/src/htslib-1.21/test/base_mods/MM-pileup2.sam new file mode 100644 index 0000000..7aa9ab3 --- /dev/null +++ b/src/htslib-1.21/test/base_mods/MM-pileup2.sam @@ -0,0 +1,4 @@ +@CO With added CHeBI codes and mod-code combining. +@SQ SN:I LN:999 +r1 0 I 1 0 36M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:C+m,2,2,1,4,1;C+76792,6,7; Ml:B:C,128,153,179,204,230,159,6 +r2 0 I 1 0 36M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA * Mm:Z:C+mh,2,2,0,0,4,1;N+n,15; Ml:B:C,77,159,103,133,128,108,154,82,179,57,204,31,240 diff --git a/src/htslib-1.21/test/base_mods/base-mods.sh b/src/htslib-1.21/test/base_mods/base-mods.sh new file mode 100755 index 0000000..f3f3ca4 --- /dev/null +++ b/src/htslib-1.21/test/base_mods/base-mods.sh @@ -0,0 +1,35 @@ +#!/bin/sh +# +# Copyright (C) 2020 Genome Research Ltd. +# +# Author: James Bonfield +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Load in the test driver +. ../simple_test_driver.sh + +echo "Testing base-modifications..." + +test_mod="../test_mod" +pileup_mod="../pileup_mod" + +test_driver $@ + +exit $? diff --git a/src/htslib-1.21/test/base_mods/base-mods.tst b/src/htslib-1.21/test/base_mods/base-mods.tst new file mode 100644 index 0000000..5899c80 --- /dev/null +++ b/src/htslib-1.21/test/base_mods/base-mods.tst @@ -0,0 +1,63 @@ +# Copyright (C) 2020, 2023 Genome Research Ltd. +# +# Author: James Bonfield +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# First field: +# INIT = initialisation, not counted in testing +# P = expected to pass +# N = expected to return non-zero +# F = expected to fail + +# Second field: +# Filename of expected output + +# Third onwards; command to execute. $fmt is replaced by the current file +# format, ie sam, bam or cram. $samtools is a pointer to the desired +# samtools binary. This can be useful for testing older versions. + +# Test files from SAM spec +P MM-chebi.out $test_mod MM-chebi.sam +P MM-double.out $test_mod MM-double.sam +P MM-multi.out $test_mod MM-multi.sam +P MM-explicit.out $test_mod MM-explicit.sam +P MM-explicit-x.out $test_mod -x MM-explicit.sam + +# Report bases outside the explicitly called ranges, so we could exclude +# these in any depth based consensus analysis and only gather statistics +# for sites known to be have been scanned. +P MM-explicit-f.out $test_mod -f 1 MM-explicit.sam + +# Ensure state gets reset correctly between reads +P MM-not-all-modded.out $test_mod MM-not-all-modded.sam + +# Pileup testing +P MM-pileup.out $pileup_mod < MM-pileup.sam +P MM-pileup2.out $pileup_mod < MM-pileup2.sam + +# Validation testing. We just care about exit status here, but the +# test data is a copy of MM-pileup.sam so that suffices too. +P MM-pileup.out $pileup_mod < MM-MNp.sam +N MM-pileup.out $pileup_mod < MM-MNf1.sam +N MM-pileup.out $pileup_mod < MM-MNf2.sam +N MM-pileup.out $test_mod < MM-MNf1.sam +N MM-pileup.out $test_mod < MM-MNf2.sam +N MM-pileup.out $test_mod < MM-bounds+.sam +N MM-pileup.out $test_mod < MM-bounds-.sam diff --git a/src/htslib-1.21/test/bcf-sr/merge.noidx.a.vcf b/src/htslib-1.21/test/bcf-sr/merge.noidx.a.vcf new file mode 100644 index 0000000..fa8f04a --- /dev/null +++ b/src/htslib-1.21/test/bcf-sr/merge.noidx.a.vcf @@ -0,0 +1,12 @@ +##fileformat=VCFv4.3 +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A +1 3000150 . C T . . . GT 0/0 +2 3000150 . C T . . . GT 0/0 +3 3000150 . C T . . . GT 0/0 +4 3000150 . C T . . . GT 0/0 diff --git a/src/htslib-1.21/test/bcf-sr/merge.noidx.abc.expected.out b/src/htslib-1.21/test/bcf-sr/merge.noidx.abc.expected.out new file mode 100644 index 0000000..ef12e62 --- /dev/null +++ b/src/htslib-1.21/test/bcf-sr/merge.noidx.abc.expected.out @@ -0,0 +1,8 @@ +1:3000150 T T T +1:3000151 - T T +2:3000150 T T T +2:3000151 - T T +3:3000150 T T T +3:3000151 - T T +4:3000150 T T T +4:3000151 - T T diff --git a/src/htslib-1.21/test/bcf-sr/merge.noidx.b.vcf b/src/htslib-1.21/test/bcf-sr/merge.noidx.b.vcf new file mode 100644 index 0000000..0741bab --- /dev/null +++ b/src/htslib-1.21/test/bcf-sr/merge.noidx.b.vcf @@ -0,0 +1,16 @@ +##fileformat=VCFv4.3 +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT B +1 3000150 . C T . . . GT 0/1 +1 3000151 . C T . . . GT 0/1 +2 3000150 . C T . . . GT 0/1 +2 3000151 . C T . . . GT 0/1 +3 3000150 . C T . . . GT 0/1 +3 3000151 . C T . . . GT 0/1 +4 3000150 . C T . . . GT 0/1 +4 3000151 . C T . . . GT 0/1 diff --git a/src/htslib-1.21/test/bcf-sr/merge.noidx.c.vcf b/src/htslib-1.21/test/bcf-sr/merge.noidx.c.vcf new file mode 100644 index 0000000..fcfbbcb --- /dev/null +++ b/src/htslib-1.21/test/bcf-sr/merge.noidx.c.vcf @@ -0,0 +1,16 @@ +##fileformat=VCFv4.3 +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT C +1 3000150 . C T . . . GT 1/1 +1 3000151 . C T . . . GT 1/1 +2 3000150 . C T . . . GT 1/1 +2 3000151 . C T . . . GT 1/1 +3 3000150 . C T . . . GT 1/1 +3 3000151 . C T . . . GT 1/1 +4 3000150 . C T . . . GT 1/1 +4 3000151 . C T . . . GT 1/1 diff --git a/src/htslib-1.21/test/bcf-sr/merge.noidx.hdr_order.vcf b/src/htslib-1.21/test/bcf-sr/merge.noidx.hdr_order.vcf new file mode 100644 index 0000000..d4c0a9e --- /dev/null +++ b/src/htslib-1.21/test/bcf-sr/merge.noidx.hdr_order.vcf @@ -0,0 +1,12 @@ +##fileformat=VCFv4.3 +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A +1 3000150 . C T . . . GT 0/0 +2 3000150 . C T . . . GT 0/0 +3 3000150 . C T . . . GT 0/0 +4 3000150 . C T . . . GT 0/0 diff --git a/src/htslib-1.21/test/bcf-sr/merge.noidx.rec_order.vcf b/src/htslib-1.21/test/bcf-sr/merge.noidx.rec_order.vcf new file mode 100644 index 0000000..679f971 --- /dev/null +++ b/src/htslib-1.21/test/bcf-sr/merge.noidx.rec_order.vcf @@ -0,0 +1,12 @@ +##fileformat=VCFv4.3 +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A +1 3000150 . C T . . . GT 0/0 +3 3000150 . C T . . . GT 0/0 +2 3000150 . C T . . . GT 0/0 +4 3000150 . C T . . . GT 0/0 diff --git a/src/htslib-1.21/test/bcf-sr/weird-chr-names.1.out b/src/htslib-1.21/test/bcf-sr/weird-chr-names.1.out new file mode 100644 index 0000000..5705c75 --- /dev/null +++ b/src/htslib-1.21/test/bcf-sr/weird-chr-names.1.out @@ -0,0 +1,9 @@ +##fileformat=VCFv4.3 +##FILTER= +##reference=ref.fa +##contig= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO +1 1 . C T . . . +1 2 . C T . . . diff --git a/src/htslib-1.21/test/bcf-sr/weird-chr-names.2.out b/src/htslib-1.21/test/bcf-sr/weird-chr-names.2.out new file mode 100644 index 0000000..980818a --- /dev/null +++ b/src/htslib-1.21/test/bcf-sr/weird-chr-names.2.out @@ -0,0 +1,8 @@ +##fileformat=VCFv4.3 +##FILTER= +##reference=ref.fa +##contig= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO +1 1 . C T . . . diff --git a/src/htslib-1.21/test/bcf-sr/weird-chr-names.3.out b/src/htslib-1.21/test/bcf-sr/weird-chr-names.3.out new file mode 100644 index 0000000..5b3ac8e --- /dev/null +++ b/src/htslib-1.21/test/bcf-sr/weird-chr-names.3.out @@ -0,0 +1,9 @@ +##fileformat=VCFv4.3 +##FILTER= +##reference=ref.fa +##contig= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO +1:1 1 . C T . . . +1:1 2 . C T . . . diff --git a/src/htslib-1.21/test/bcf-sr/weird-chr-names.4.out b/src/htslib-1.21/test/bcf-sr/weird-chr-names.4.out new file mode 100644 index 0000000..0d9e274 --- /dev/null +++ b/src/htslib-1.21/test/bcf-sr/weird-chr-names.4.out @@ -0,0 +1,8 @@ +##fileformat=VCFv4.3 +##FILTER= +##reference=ref.fa +##contig= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO +1:1 1 . C T . . . diff --git a/src/htslib-1.21/test/bcf-sr/weird-chr-names.5.out b/src/htslib-1.21/test/bcf-sr/weird-chr-names.5.out new file mode 100644 index 0000000..6cb41e1 --- /dev/null +++ b/src/htslib-1.21/test/bcf-sr/weird-chr-names.5.out @@ -0,0 +1,9 @@ +##fileformat=VCFv4.3 +##FILTER= +##reference=ref.fa +##contig= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO +1:1-1 1 . C T . . . +1:1-1 2 . C T . . . diff --git a/src/htslib-1.21/test/bcf-sr/weird-chr-names.6.out b/src/htslib-1.21/test/bcf-sr/weird-chr-names.6.out new file mode 100644 index 0000000..a707ed8 --- /dev/null +++ b/src/htslib-1.21/test/bcf-sr/weird-chr-names.6.out @@ -0,0 +1,8 @@ +##fileformat=VCFv4.3 +##FILTER= +##reference=ref.fa +##contig= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO +1:1-1 1 . C T . . . diff --git a/src/htslib-1.21/test/bcf-sr/weird-chr-names.vcf b/src/htslib-1.21/test/bcf-sr/weird-chr-names.vcf new file mode 100644 index 0000000..c367be4 --- /dev/null +++ b/src/htslib-1.21/test/bcf-sr/weird-chr-names.vcf @@ -0,0 +1,12 @@ +##fileformat=VCFv4.3 +##reference=ref.fa +##contig= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO +1 1 . C T . . . +1 2 . C T . . . +1:1 1 . C T . . . +1:1 2 . C T . . . +1:1-1 1 . C T . . . +1:1-1 2 . C T . . . diff --git a/src/htslib-1.21/test/bgzf_boundaries/bgzf_boundaries1.bam b/src/htslib-1.21/test/bgzf_boundaries/bgzf_boundaries1.bam new file mode 100644 index 0000000..264e22f Binary files /dev/null and b/src/htslib-1.21/test/bgzf_boundaries/bgzf_boundaries1.bam differ diff --git a/src/htslib-1.21/test/bgzf_boundaries/bgzf_boundaries2.bam b/src/htslib-1.21/test/bgzf_boundaries/bgzf_boundaries2.bam new file mode 100644 index 0000000..704804e Binary files /dev/null and b/src/htslib-1.21/test/bgzf_boundaries/bgzf_boundaries2.bam differ diff --git a/src/htslib-1.21/test/bgzf_boundaries/bgzf_boundaries3.bam b/src/htslib-1.21/test/bgzf_boundaries/bgzf_boundaries3.bam new file mode 100644 index 0000000..328a274 Binary files /dev/null and b/src/htslib-1.21/test/bgzf_boundaries/bgzf_boundaries3.bam differ diff --git a/src/htslib-1.21/test/bgziptest.txt b/src/htslib-1.21/test/bgziptest.txt new file mode 100644 index 0000000..a086a51 --- /dev/null +++ b/src/htslib-1.21/test/bgziptest.txt @@ -0,0 +1 @@ +122333444455555 \ No newline at end of file diff --git a/src/htslib-1.21/test/bgziptest.txt.gz b/src/htslib-1.21/test/bgziptest.txt.gz new file mode 100644 index 0000000..dc433df Binary files /dev/null and b/src/htslib-1.21/test/bgziptest.txt.gz differ diff --git a/src/htslib-1.21/test/bgziptest.txt.gz.gzi b/src/htslib-1.21/test/bgziptest.txt.gz.gzi new file mode 100644 index 0000000..3bc450e Binary files /dev/null and b/src/htslib-1.21/test/bgziptest.txt.gz.gzi differ diff --git a/src/htslib-1.21/test/c1#bounds.sam b/src/htslib-1.21/test/c1#bounds.sam new file mode 100644 index 0000000..181dbe0 --- /dev/null +++ b/src/htslib-1.21/test/c1#bounds.sam @@ -0,0 +1,4 @@ +@SQ SN:c1 LN:10 +s0 0 c1 1 0 10M * 0 0 AACCGCGGTT ********** +s1 0 c1 2 0 10M * 0 0 ACCGCGGTTC ********** +s2 0 c1 3 0 10M * 0 0 CCGCGGTTCG ********** diff --git a/src/htslib-1.21/test/c1#clip.sam b/src/htslib-1.21/test/c1#clip.sam new file mode 100644 index 0000000..fd073f0 --- /dev/null +++ b/src/htslib-1.21/test/c1#clip.sam @@ -0,0 +1,8 @@ +@SQ SN:c1 LN:10 +s0a 0 c1 1 0 10M * 0 0 AACCGCGGTT ********** +s0A 0 c1 1 0 3M4N3M * 0 0 AACGTT ****** +s0b 0 c1 2 0 1S8M1S * 0 0 AACCGCGGTT ********** +s0B 0 c1 2 0 1H8M1H * 0 0 ACCGCGGT ******** +s0c 0 c1 3 0 2S6M2S * 0 0 AACCGCGGTT ********** +s0c 0 c1 3 0 2S3M2I3M2S * 0 0 AACCGNNCGGTT ************ +s0C 0 c1 3 0 2H6M2H * 0 0 CCGCGG ****** diff --git a/src/htslib-1.21/test/c1#noseq.sam b/src/htslib-1.21/test/c1#noseq.sam new file mode 100644 index 0000000..fda58c2 --- /dev/null +++ b/src/htslib-1.21/test/c1#noseq.sam @@ -0,0 +1,10 @@ +@SQ SN:c1 LN:10 +sq1 0 c1 1 0 10M * 0 0 AACCGCGGTT ********** MD:Z:10 NM:i:0 +sQ1 0 c1 1 0 10M * 0 0 AACCGCGGTT * MD:Z:10 NM:i:0 +SQ1 0 c1 1 0 10M * 0 0 * * MD:Z:10 NM:i:0 +sq2 0 c1 1 0 4M1D5M * 0 0 AACCCGGTT ********* MD:Z:4^G5 NM:i:1 +sQ2 0 c1 1 0 4M1D5M * 0 0 AACCCGGTT * MD:Z:4^G5 NM:i:1 +SQ2 0 c1 1 0 4M1D5M * 0 0 * * MD:Z:4^G5 NM:i:1 +sq3 4 c1 1 0 * * 0 0 AACCCGGTT ********* +sQ3 4 c1 1 0 * * 0 0 AACCCGGTT * +SQ3 4 c1 1 0 * * 0 0 * * diff --git a/src/htslib-1.21/test/c1#pad1.sam b/src/htslib-1.21/test/c1#pad1.sam new file mode 100644 index 0000000..54f7a11 --- /dev/null +++ b/src/htslib-1.21/test/c1#pad1.sam @@ -0,0 +1,10 @@ +@SQ SN:c1 LN:10 +s0a 0 c1 1 0 10M * 0 0 AACCGCGGTT * +s0b 0 c1 1 0 10M * 0 0 AACCGCGGTT * +s0c 0 c1 1 0 10M * 0 0 AACCGCGGTT * +s1 0 c1 1 0 5M6I5M * 0 0 AACCGGTTAACCGGTT * +s2 0 c1 1 0 5M1P4I1P5M * 0 0 AACCGTTAACGGTT * +s3 0 c1 1 0 5M3I3P5M * 0 0 AACCGGTTCGGTT * +s4 0 c1 1 0 5M3P3I5M * 0 0 AACCGAACCGGTT * +s5 0 c1 1 0 4M1D2P2I2P1D4M * 0 0 AACCTAGGTT * +s6 0 c1 1 0 2M3D6I3D2M * 0 0 AAGTTAACTT * diff --git a/src/htslib-1.21/test/c1#pad2.sam b/src/htslib-1.21/test/c1#pad2.sam new file mode 100644 index 0000000..66da374 --- /dev/null +++ b/src/htslib-1.21/test/c1#pad2.sam @@ -0,0 +1,14 @@ +@SQ SN:c1 LN:10 +s0a 0 c1 1 0 10M * 0 0 AACCGCGGTT * +s0b 0 c1 1 0 10M * 0 0 AACCGCGGTT * +s0c 0 c1 1 0 10M * 0 0 AACCGCGGTT * +s0d 0 c1 1 0 10M * 0 0 AACCGCGGTT * +s1 0 c1 1 0 5M6I5M * 0 0 AACCGGTTAACCGGTT * +s2 0 c1 1 0 5M1P4I1P5M * 0 0 AACCGTTAACGGTT * +s3 0 c1 1 0 5M3I3P5M * 0 0 AACCGGTTCGGTT * +s4 0 c1 1 0 5M3P3I5M * 0 0 AACCGAACCGGTT * +s5 0 c1 1 0 4M1D2P2I2P1D4M * 0 0 AACCTAGGTT * +s6 0 c1 1 0 2M3D6I3D2M * 0 0 AAGTTAACTT * +s7 0 c1 1 0 4M2D4M * 0 0 AACCGGTT * +s8 0 c1 1 0 5D2P2I2P5D * 0 0 TA * +s9 0 c1 5 0 1M2P2I2P * 0 0 GTA * diff --git a/src/htslib-1.21/test/c1#pad3.sam b/src/htslib-1.21/test/c1#pad3.sam new file mode 100644 index 0000000..c77da85 --- /dev/null +++ b/src/htslib-1.21/test/c1#pad3.sam @@ -0,0 +1,14 @@ +@SQ SN:c1 LN:10 +@RG ID:p.sam SM:unknown LB:p.sam +s0a 0 c1 6 0 5I6P5M * 0 0 AACCGCGGTT * RG:Z:p.sam +s0b 0 c1 6 0 5I6P5M * 0 0 AACCGCGGTT * RG:Z:p.sam +s0c 0 c1 6 0 5I6P5M * 0 0 AACCGCGGTT * RG:Z:p.sam +s0d 0 c1 6 0 5I6P5M * 0 0 AACCGCGGTT * RG:Z:p.sam +s1 0 c1 6 0 11I5M * 0 0 AACCGGTTAACCGGTT * RG:Z:p.sam +s2 0 c1 6 0 5I1P4I1P5M * 0 0 AACCGTTAACGGTT * RG:Z:p.sam +s3 0 c1 6 0 8I3P5M * 0 0 AACCGGTTCGGTT * RG:Z:p.sam +s4 0 c1 6 0 5I3P3I5M * 0 0 AACCGAACCGGTT * RG:Z:p.sam +s5 0 c1 6 0 4I3P2I2P1D4M * 0 0 AACCTAGGTT * RG:Z:p.sam +s6 0 c1 6 0 2I3P6I3D2M * 0 0 AAGTTAACTT * RG:Z:p.sam +s7 0 c1 6 0 4I7P1D4M * 0 0 AACCGGTT * RG:Z:p.sam +s8 0 c1 6 0 7P2I2P * 0 0 TA !! RG:Z:p.sam diff --git a/src/htslib-1.21/test/c1#unknown.sam b/src/htslib-1.21/test/c1#unknown.sam new file mode 100644 index 0000000..1f1616a --- /dev/null +++ b/src/htslib-1.21/test/c1#unknown.sam @@ -0,0 +1,9 @@ +@SQ SN:c1 LN:10 +@CO Tests permuations of seq / qual being present or "*" in mapped +@CO and unmapped forms. Also tests MD/NM tag generation. +_sqm 0 c1 1 0 2M1I4M1D3M * 0 0 AACCCTCGTT IIIIIIIIII MD:Z:4G1^G3 NM:i:3 +_sm 0 c1 1 0 2M1I4M1D3M * 0 0 AACCCTCGTT * MD:Z:4G1^G3 NM:i:3 +_m 0 c1 1 0 2M1I4M1D3M * 0 0 * * MD:Z:4G1^G3 NM:i:3 +_squ 4 c1 1 0 * * 0 0 AACCCTCGTT IIIIIIIIII +_su 4 c1 1 0 * * 0 0 AACCCTCGTT * +_u 4 c1 1 0 * * 0 0 * * diff --git a/src/htslib-1.21/test/c1.fa b/src/htslib-1.21/test/c1.fa new file mode 100644 index 0000000..12c54c9 --- /dev/null +++ b/src/htslib-1.21/test/c1.fa @@ -0,0 +1,2 @@ +>c1 +AACCGCGGTT diff --git a/src/htslib-1.21/test/c1.fa.fai b/src/htslib-1.21/test/c1.fa.fai new file mode 100644 index 0000000..fc35bec --- /dev/null +++ b/src/htslib-1.21/test/c1.fa.fai @@ -0,0 +1 @@ +c1 10 4 10 11 diff --git a/src/htslib-1.21/test/c2#pad.sam b/src/htslib-1.21/test/c2#pad.sam new file mode 100644 index 0000000..49e991b --- /dev/null +++ b/src/htslib-1.21/test/c2#pad.sam @@ -0,0 +1,26 @@ +@SQ SN:c2 LN:9 +@CO +@CO mpileup example from https://github.com/samtools/htslib/issues/59 +@CO with additional Pad cigar operations +@CO +@CO c2 CC***AA**T**AA***CC +@CO +@CO +s1 CT***AA**T**AA***TC +@CO +s1b CT*******T*******TC +@CO +s2 CT*****G***G*****TC +@CO +s2p CT*****G***G*****TC +@CO +s3 CT*****GG*GG*****TC +@CO +s3b CT****CGGCGGC****TC +@CO +s4 CT***AAG***GAA***TC +@CO +s4p CT***AAG***GAA***TC +@CO +s5 CTGGG*********GGGTC +@CO +s1 0 c2 1 0 9M * 0 0 CTAATAATC XXXXXXXXX +s1b 0 c2 1 0 2M2D1M2D2M * 0 0 CTTTC * +s2 0 c2 1 0 2M2D1I1D1I2D2M * 0 0 CTGGTC * +s2p 0 c2 1 0 2M2D1I1P1D1P1I2D2M * 0 0 CTGGTC * +s3 0 c2 1 0 2M2D2I1D2I2D2M * 0 0 CTGGGGTC * +s3b 0 c2 1 0 2M1D1M2I1M2I1M1D2M * 0 0 CTCGGCGGCTC * +s4 0 c2 1 0 4M1I1D1I4M * 0 0 CTAAGGAATC * +s4p 0 c2 1 0 4M1I1P1D1P1I4M * 0 0 CTAAGGAATC * +s5 0 c2 1 0 2M3I5D3I2M * 0 0 CTGGGGGGTC * diff --git a/src/htslib-1.21/test/c2.fa b/src/htslib-1.21/test/c2.fa new file mode 100644 index 0000000..1e1d309 --- /dev/null +++ b/src/htslib-1.21/test/c2.fa @@ -0,0 +1,2 @@ +>c2 +CCAATAACC diff --git a/src/htslib-1.21/test/c2.fa.fai b/src/htslib-1.21/test/c2.fa.fai new file mode 100644 index 0000000..f95d35d --- /dev/null +++ b/src/htslib-1.21/test/c2.fa.fai @@ -0,0 +1 @@ +c2 9 4 9 10 diff --git a/src/htslib-1.21/test/ce#1.sam b/src/htslib-1.21/test/ce#1.sam new file mode 100644 index 0000000..4376deb --- /dev/null +++ b/src/htslib-1.21/test/ce#1.sam @@ -0,0 +1,2 @@ +@SQ SN:CHROMOSOME_I LN:1009800 +SRR065390.14978392 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 AS:i:-18 XS:i:-18 YT:Z:UU diff --git a/src/htslib-1.21/test/ce#1000.sam b/src/htslib-1.21/test/ce#1000.sam new file mode 100644 index 0000000..857a9cd --- /dev/null +++ b/src/htslib-1.21/test/ce#1000.sam @@ -0,0 +1,1005 @@ +@SQ SN:CHROMOSOME_I LN:1009800 +@SQ SN:CHROMOSOME_II LN:5000 +@SQ SN:CHROMOSOME_III LN:5000 +@SQ SN:CHROMOSOME_IV LN:5000 +@SQ SN:CHROMOSOME_V LN:5000 +SRR065390.14978392 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-18 XS:i:-18 XN:i:0 XM:i:5 XO:i:1 XG:i:1 YT:Z:UU MD:Z:4A0G5G5G5G3^A73 NM:i:6 +SRR065390.921023 16 CHROMOSOME_I 3 12 100M * 0 0 CTAAGCCTAAATCTAAGCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############################################???88:;98768700000<>:BBA?BBAB?BBBBBBBB>B>BB::;?:00000 AS:i:-6 XS:i:-13 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:10G0C10G77 NM:i:3 +SRR065390.1871511 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA 0:BB@>B<=B@???@=8@B>BB@CA@DACDCBBCCCA@CCCCACCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.3743423 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##################?6@:7<=@3=@ABAAB>BDBBABADABDDDBDDBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.4251890 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###########@BB=BCBBC?B>B;>B@@ADBBB@DBBBBDCCBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.5238868 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA @,=@@D8D;?BBB>;?BBB==BB@D;>D>BBB>BBDDBA@@BCCB@=BACBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.8289592 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############################A?@C9@@BC=AABDD@A@DC@CB=@BA?6@CCAAC@+CCCCCCCCCCCCCCC@CCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.14497557 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ######@:@@.>=><;;B>AB>>BB?B=>B=BD>BDADDD>CCDDDBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.15617929 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA D?;;D>?C>CBAAACD@BB?B>BBDB>@BBDDBDC@CBDDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16049575 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #######################@??BB8BBB@@:AB@BDBCCDCBDCCCCACCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.17838261 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #################@>=?B@DCBDB>@D>DBADCDDD>CC@DCCCCBCCACCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.22711273 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################B<@=<:6/0307==72@@=?788==;AAA:@CCAACCC?CCAACCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.22922978 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##########################B=B>A@BBBC??=@=A@AC<><<8>C6CCCCC8CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23087186 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ############@:73???@6;D?B>:>BBA?B<>B@B>@B>@>BCDCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23506653 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############A/=A5::87@:=>6@AA>@CDBA@ABCB?BC>CD>DDBDC@CCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23791575 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCCCACCCCAACCCTTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##############################B4;:=B@>A@BCB@@ABCCBB@BCC@CCDCCDCCDCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-12 XS:i:-12 XN:i:0 XM:i:6 XO:i:0 XG:i:0 YT:Z:UU MD:Z:7T0A1G2T2G3A79 NM:i:6 +SRR065390.25911768 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##############@8B@B?9=:A?=@DDB>;B6?DDBCABABB@DDCCBDBDCCDACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.26055380 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #################################DAA><0=>=B;?BACDBDABCBBC@CACACACACCACCCCCCCCCCCCCCCCCCCCCCBCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.26121674 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #################?:AA::@DAAA>B??@A4@=BBBBDDBDBDCCBDDBCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.30352568 16 CHROMOSOME_I 3 7 100M * 0 0 CTAGGGCTAACCCTCAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #################################################################A>>5A?CCC@CCCCCCCCCC?CC:C@A@==@A@A@ AS:i:-10 XS:i:-19 XN:i:0 XM:i:5 XO:i:0 XG:i:0 YT:Z:UU MD:Z:3A1C4G3A37G47 NM:i:5 +SRR065390.31341126 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ########################?AD?D@BCAABBBD@=DBCDBAACCDCAABCDCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.33653624 16 CHROMOSOME_I 3 17 100M * 0 0 CTAATCCTAGGCCTAAGCCCAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ####################################??8?000-+0000,@ABBBB@B:B@B>BB????>>>@@?::?6?>>;>>@ACCCCBCCBACCCC AS:i:-6 XS:i:-19 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:4G4A9T80 NM:i:3 +SRR065390.28043279 16 CHROMOSOME_I 4 0 9M1I90M * 0 0 TCTTCCGATCTCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #####A>=7A6DD=@AA?>AAABC@CAABDBCBBABDADBADCABBBDCDCDCACDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCC AS:i:-26 XS:i:-26 XN:i:0 XM:i:6 XO:i:1 XG:i:1 YT:Z:UU MD:Z:1A0A0G2T1A0G89 NM:i:7 +SRR065390.29270443 16 CHROMOSOME_I 6 1 100M * 0 0 AGCCTAAGCCGAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC ###################################@:88@@>B>C>CCCCA@CCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCC AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:10T2G86 NM:i:2 +SRR065390.1364843 16 CHROMOSOME_I 11 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ####################@=A=8@:>@;@@=>>B8?C6CCCCCCCCCCACCCCBBCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.10190875 16 CHROMOSOME_I 11 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ##################@@@@@@;>BBB?>A6BAB?BB=BAB@?:A.<===@7:4::>8D@BABBACCCCAB@CCCDDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.13556211 0 CHROMOSOME_I 11 0 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGATTGGAAGAGCGGCTAAGCAGGAACGCCGGGCTCGATCTCAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCBCDCCB>BBBBB########################################### AS:i:-50 XS:i:-50 XN:i:0 XM:i:25 XO:i:0 XG:i:0 YT:Z:UU MD:Z:57C0C1A0A0G0C0C0T0A0A1C6C0T0A1G1C0T0A0A1C2A0A0G0C2A3 NM:i:25 +SRR065390.20524775 16 CHROMOSOME_I 11 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ############################?9<8B=?@C8A<@?@C8CBDCCC=CCCCC??@CCDCCCCCCCCCCCCCCCCCCCCDCCCCCCCDCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.20580336 16 CHROMOSOME_I 11 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ############################?:>@?@=>@=0<:CB>@B=DCADB@CCCCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.22573273 16 CHROMOSOME_I 11 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ##################################A9;?@CBBDBA>BB;ABDB>AAA;=>=0943@########### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.20870114 0 CHROMOSOME_I 12 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCABCCCC=BBBCA@B>B?D;B=>9?############################ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.3863623 16 CHROMOSOME_I 12 1 100M * 0 0 CGCCTACGCCTACGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC ##############################?@BB>B@BCABBB?DC@DADC@DCDCACDCBCCCCCCCCCCC@CCCCCCCCCCCCCCC1CCCCCCCCCCC AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0A5A5A87 NM:i:3 +SRR065390.1659845 0 CHROMOSOME_I 13 0 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAACCTAAGCCTAAGCCCAACCCTAAGACCGAGACCGAGACC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCAB@CCC######################################### AS:i:-22 XS:i:-22 XN:i:0 XM:i:11 XO:i:0 XG:i:0 YT:Z:UU MD:Z:60G14T2G6C1T0A2C1T0A2C1T0 NM:i:11 +SRR065390.1567418 16 CHROMOSOME_I 15 1 100M * 0 0 CACAGCCTACGTCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #########################################?:8A@<@>>BBB8>BBB@BBBB>@:??::87688:?:::?@<@@97866@?>@@;;>:< AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1T0A6A1C88 NM:i:4 +SRR065390.4996386 16 CHROMOSOME_I 15 17 100M * 0 0 CCAAGCCGAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###################################@@@@A=BB@C>>DCCACCCCCCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-4 XS:i:-22 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1T5T92 NM:i:2 +SRR065390.14822977 16 CHROMOSOME_I 15 1 100M * 0 0 CGAAGCCAGAGCCTAGGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ####################################B:B?:==2>6@B@@C>?>A@CB5@??@28C@CCCBC@CC?CC?A@CC:CBCCCCCCCCCCCCCC AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1T5T0A6A84 NM:i:4 +SRR065390.15148736 16 CHROMOSOME_I 15 17 100M * 0 0 CTGAGCCGAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###########################CCBC<=C;9??<;==C@BCCCCC=CCCCACACACCBBCCCCCCCCCCCCCCCCCBCCCCCCCCCCCBCA?CCC AS:i:-4 XS:i:-21 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:2A4T92 NM:i:2 +SRR065390.18089757 16 CHROMOSOME_I 15 1 100M * 0 0 CTGAGCCTGAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ########################A212.0:?.>8?BB?B<@@C?CCBCB;DCCCACDCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:2A5A91 NM:i:2 +SRR065390.25601994 16 CHROMOSOME_I 15 17 100M * 0 0 ATAAGCCTAATCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #####################???DD?BD?BDBB>ACBDBDDBDDDBDBDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-4 XS:i:-21 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0C9G89 NM:i:2 +SRR065390.29400981 16 CHROMOSOME_I 15 17 100M * 0 0 CGAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############################A<:?C>>BCABABC?AD>BDADDDBDBBDBDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-4 XS:i:-18 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1T2G95 NM:i:2 +SRR065390.1056430 0 CHROMOSOME_I 17 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGC CCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCDCCDCCC:@<:<@=:>>DD6?################ AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:96A3 NM:i:1 +SRR065390.32986719 0 CHROMOSOME_I 17 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAACCCTAAGC CCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCC>C@@C@@A?8?@==0><:??@C?7:@B############################# AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:92G7 NM:i:1 +SRR065390.25620225 16 CHROMOSOME_I 17 1 100M * 0 0 AAGCCGAAGCCGAAGCCTAAGCCGAAGCGTGAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC #########################################?1?<8@=CC@>CC?CCCCCACCCCC@@ACCCCCCBCCCCCAACCCCACCCCCCBACACC AS:i:-10 XS:i:-10 XN:i:0 XM:i:5 XO:i:0 XG:i:0 YT:Z:UU MD:Z:5T5T11T4C1A69 NM:i:5 +SRR065390.1843941 0 CHROMOSOME_I 18 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCBDCDDBADDDDD@DADDAABABBB>BBA?>>B;>@8=@@:@83@>AAA:A6?<4= AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.1866654 0 CHROMOSOME_I 18 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCDCDDCDCCDDB6:?@B=;@@=@@:A=B@??A=@<41@##### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.2940592 0 CHROMOSOME_I 18 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC CCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCBC@@=CB?B=?A@@DD>DB=?>B7@8@@?BABABB@3>D9=?(@==A=A##### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.3916542 0 CHROMOSOME_I 18 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCBDDCDD@DDDDBABDDDAA?BC@<:?@B?A????@?<@@<@;D@?@9 AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.9532073 0 CHROMOSOME_I 18 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC CACBCCCCCCCCCCCCBCCBCCCCCCCCCCC@C=CCCCCCC@CCCCCCC@C=?CCA=CCBACCAB?@BABCB=@B8BA=A::;8;BB>8>=??####### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.22616746 0 CHROMOSOME_I 18 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCAC@CCCCCA@CCAA<::>BB?@88@@########################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.26956496 0 CHROMOSOME_I 18 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC CACCCCCCCCCCCCCCCCCACCCCC@CCCCCCACCC@DADBC@BACAAC@B<@@BAA@A>@>?B@################################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.10808349 16 CHROMOSOME_I 18 0 18M1I81M * 0 0 TGCGCCAGCCCCAGCCCCGAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCGGCCTAATCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC #################################################################?A??ACCCCCCBBACCCCCCCCCCBC@CCCCCCCC AS:i:-33 XS:i:-33 XN:i:0 XM:i:11 XO:i:1 XG:i:1 YT:Z:UU MD:Z:0A2C0T0A4T0A4T0A41A0A6G31 NM:i:12 +SRR065390.10873351 16 CHROMOSOME_I 18 0 100M * 0 0 AGCCTAAGCCCAAGCCTAAGCCTAAGCCTAAGCCCAGTCCCGAGCCTCCTCCTAAACCTCACCCCAAATCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC ######################################################################CCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-30 XS:i:-30 XN:i:0 XM:i:15 XO:i:0 XG:i:0 YT:Z:UU MD:Z:10T23T1A0G2T0A5A0A0G5G3A1G2T2G0C31 NM:i:15 +SRR065390.15490530 16 CHROMOSOME_I 18 1 100M * 0 0 CGCCCCTCCCTACGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC ###############A@A5ACAB?<0ACBB??CCBBA@?CCC?C@?CCAACCC@AACCACCCC;CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-12 XS:i:-12 XN:i:0 XM:i:6 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0A3T0A0A0G4A87 NM:i:6 +SRR065390.1909715 16 CHROMOSOME_I 19 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ############@B@:;0=:1@B5ACD@DDCC@@@DACCCDDC@CCCCCCCCCCCCBCCCCCCCCC@C@CCCCCCCCCCCBCCCACCCCCDCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.7382123 16 CHROMOSOME_I 19 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ###############??6D?B?D>?B>?B>>?@BB;@BBB@??>AB@B@DCADABBCDBD?CCCACC@CCCCCBACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.20593989 16 CHROMOSOME_I 19 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ####################@=698@?8:4>=BDBDDCCDBAACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.25363430 16 CHROMOSOME_I 19 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ########A663>BB>CBABBAAAA?A??;BCDBAADCDDBCDACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.32419676 16 CHROMOSOME_I 19 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ###############A1=@?A@6A?>BD6?AABBBBB@AD>BBDBD>CDCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.22906057 0 CHROMOSOME_I 20 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCAAAGCCTAAGCCTA CCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCA@CCC@ACB@C=CC;=BBBB>;AABA################################ AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:86T13 NM:i:1 +SRR065390.27521332 0 CHROMOSOME_I 20 1 100M * 0 0 CCTAAGCCTACACCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCATAAGCCTCAGCCTA BBBBBBBBBB**9+7BBBBBBBBBBBBBBBBBBBBBBB@BB@>BB;B@BAB;BB@BA>ABA>A4A################################### AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:10A0G73C7A6 NM:i:4 +SRR065390.30947354 0 CHROMOSOME_I 20 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGACTAAGACTC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCDCC@ADABCBBBBBAABB>BA>B?@@B################################### AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:90C5C2A0 NM:i:3 +SRR065390.31587235 0 CHROMOSOME_I 20 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTGAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC'AAA@CCCAC@DCDC@;BABB@>@B?A@4;;8@??>:DBD?>:A AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:45A54 NM:i:1 +SRR065390.1370038 16 CHROMOSOME_I 20 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA #####@B>;DAB?B@=@@B=@BBB>B@@BDDB>DDDACBBCDDCCBC?CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.4643729 16 CHROMOSOME_I 20 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA #########################BD6;@BB@BB?BDDBDDDCDBCDCCBBCBCCCCCDCCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.6133344 16 CHROMOSOME_I 20 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ####################@?BB@999.4A;;;:C8C@@@A@>B;=;==CCCCCC@CCCBCCBBC?CCCCBACCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.6927609 16 CHROMOSOME_I 20 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ###########D?D@6@@@@3:>CCAABA>ABB>ABBB?>BB@@BDDBDCDA;BCCDCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.7858967 0 CHROMOSOME_I 20 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGTCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCCC>CCCCCCCCCCCCDCCCCBC;CC:AB>A?<@'>ADBABB>CAABBB>>B@B;BBB;BA?7>989?D?@?########## AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:48C51 NM:i:1 +SRR065390.8362231 16 CHROMOSOME_I 20 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA 3AA3@AA==.>>>>=4@?@?;BBBAAA>@@AAB>DDAB@BDDBDBBBDCC@CCCDCB@CDACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.9016128 16 CHROMOSOME_I 20 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ################?B@?=<9=:@>=7?AA??=A=??:7B=;=A=C?CB8B>C?ACBCAAAAACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.11434451 16 CHROMOSOME_I 20 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ########A9<=?B@B@?:CC=:?DAA>A@BBA>B@B8BD=DBBBC@?DD<ABBBDBBBDDB@B<>DDBBCDCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.20362361 16 CHROMOSOME_I 20 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA #######A1:6DBB?C;BBD=B@BDBABADDBCB@CBCBBACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.28067938 16 CHROMOSOME_I 20 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ###############@6=?@75595=7:=:;:A1@BA@@C@CACC6CAACC9CCCBC?@CCCCCCCCBBCCCCCCCCCCCBCCCCCCCCBCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.28676889 16 CHROMOSOME_I 20 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ######################D@D<>BBB86.5/B=7/7B<@@?ABA;BCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.31914072 16 CHROMOSOME_I 20 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA #######################BCC>AADDADBABDCDDBCACCDCCCCCCAADBDCCCCACCCCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.28534879 0 CHROMOSOME_I 21 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACGCATAAGCCTAAGTCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBDCCCDDDBB89BAAA@@?DB:@@9>====?########################### AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:81A2C10C4 NM:i:3 +SRR065390.2215027 0 CHROMOSOME_I 23 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBDCCCCBC@BB@BCDDBDDBA@DDBBB8@BB>B?=B?>B?>CBAAA?9>>A>:6@= AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.2347049 0 CHROMOSOME_I 23 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCBCCCDCCCDBDBADDD@AAD@BBDBBBCAA>@C??################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.3130808 0 CHROMOSOME_I 23 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCA@ACCCCC>AC@=CBCC@@BBBB=>BA.A=:?A@@@:>@####### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.4139123 0 CHROMOSOME_I 23 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCC@CCCDDCBCCCCBACBBB;DADBD;?BDB>;BBAB=;DDBBB>??BB>ACACC?>.A@=>,@########## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.5087657 0 CHROMOSOME_I 23 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCACCCC@CCCDCCCCC>B@CC@CB;@CBAB1B@=BBB@BA;A8??B:==B=@@?>><=6=6D@@>::9>9><=@@,@@@14;<@####### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.10185722 0 CHROMOSOME_I 23 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCD@BCDBBBDBDB@<@BBBB>DB>B@=BDBC>A;@@DD<4=@=<@6@###################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.14268858 0 CHROMOSOME_I 23 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCBCADCDBCABDDBB>BBBCBBAD?CA?>>BCBB?AD?BA?=99>8;8@;@ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.14318056 0 CHROMOSOME_I 23 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC=ADACBBABCBBD>AD=25867BB@BBBBA@;<>A=AAAA?A8?<>B78*=8=<>@###### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16702871 0 CHROMOSOME_I 23 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCDACCCCD@BCCCD?DDDCD?ABDDB;BBBB>?@?A@88B:@=2/99>A3.A##### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.18871990 0 CHROMOSOME_I 23 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBDCBCCBDBDCDDDDBC?;;B@BB>?D:>BBB@94@########################## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23185275 0 CHROMOSOME_I 23 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCC?DD@A@B@?BB>BB?>BD?########################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.26311247 0 CHROMOSOME_I 23 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC8CCDCDADB@;BB?@>:D>BB=>:DD>@=@>@?6?AA?AA############ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.31584477 0 CHROMOSOME_I 23 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCDCBCCCDDDCDCDDDDCDDDDCDDB?DABBBCBBABBCCBA?AAA=?D?DAA:AAACA>AAAA?################ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.33157605 0 CHROMOSOME_I 23 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBC8?>??DBBBBA@?CBA=?=>BB@?@=<=?B@BAA@@:=>::8AAA>BBBB3:===8D?B<:DD<:?#################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.5496550 16 CHROMOSOME_I 23 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ##########@727;=>/7=<=B:=@@ACA?A8CB@B;A@CCB?BCCC@CCCCBCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.17740346 16 CHROMOSOME_I 23 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ###############A;:>A7>=9=:.>==@A=BCCCBACCCCDBBB>B@BBB;A??BB;DAABD@DACCCBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16643153 0 CHROMOSOME_I 24 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCGTAAGCCTAAGCCTAAGGCTAAGGCTAAGGCTAAGCCTATGCTTAAGACTAAGCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC################################################################### AS:i:-14 XS:i:-14 XN:i:0 XM:i:7 XO:i:0 XG:i:0 YT:Z:UU MD:Z:45C16C5C5C9A2C4C7 NM:i:7 +SRR065390.1203526 16 CHROMOSOME_I 24 1 100M * 0 0 ACCCTAAAACAAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC ###############################A==B7277*@A?5AB9A@CCCDCDABA?:?A@ACCCCC?CC?CCCCCDCCCCCCCCCCCCCCCCCCCCA AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1G5G0C1T89 NM:i:4 +SRR065390.1571837 0 CHROMOSOME_I 26 0 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCGGAAGCCTAGGCCAGAGGCAAAGCCGAGGCCTAAGCCGA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCC>40?######################################### AS:i:-20 XS:i:-20 XN:i:0 XM:i:10 XO:i:0 XG:i:0 YT:Z:UU MD:Z:61C0T7A3T0A2C1T5T1A9T1 NM:i:10 +SRR065390.26106665 0 CHROMOSOME_I 26 0 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGACTAAGCCAAAGCCTAAGCATGAACCTAAGGCTCAACCTAAGCCGCAGCCTAATGCTAGGCATA CCCCCCBCCCCCCCCCCBCCCCCCCCCCCACCCCCA0AA############################################################# AS:i:-29 XS:i:-29 XN:i:0 XM:i:14 XO:i:0 XG:i:0 YT:Z:UU MD:Z:36C7T10C1A1G6C2A1G8T0A7G0C3A2C2 NM:i:14 +SRR065390.28275609 0 CHROMOSOME_I 26 0 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAACCTTCGACTAATCCTACACGCACGCATAAGCCTCCCCATA CCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCDDCDCBBDCBDBABB6@C################################################## AS:i:-30 XS:i:-30 XN:i:0 XM:i:15 XO:i:0 XG:i:0 YT:Z:UU MD:Z:59G3A0A1C4G4A0G1C0T1A2C7A0A0G1C2 NM:i:15 +SRR065390.7812 16 CHROMOSOME_I 27 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##############################@::?>=CB@CACCC???CCCBAB@BB;?B@B==@ACAA@?C?BBCBBCBCCCC@@CCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.2770334 16 CHROMOSOME_I 27 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##################A<8.@@@=A?@?B?@>CBB=B8BAC;@BBCAAADCCCACD@CCC@ACC?CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.5038515 16 CHROMOSOME_I 27 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #####################@>BB>B;@@BBBBBDD@D@BCCADA?CCBCCCCCDC@CCCCCCCCCACCCCCCCCCCCCCCCCCCBCCBCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.5869665 16 CHROMOSOME_I 27 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ########################@6A@?@A;B=BCC8BC@CC@BB@CC?CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.8368880 16 CHROMOSOME_I 27 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA <:3:B??@4??DB><7;;84A>>DD?BADDD@DABACDBBCACCDDCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.11712874 16 CHROMOSOME_I 27 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA >8=?=:;;1;1??:2279772,5552-278A8?A1@CCB@:ACCC=ACACA9?=BCCCACCCCCCCB@CCCCC@CCCC?CCCCCBCCCC=CBCCBBCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.12176045 16 CHROMOSOME_I 27 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #####################B4BB@;B?B@@==0BC5B?ABDB@BBDCBCB=BCDCC=CCCCCCCCCCCCBCCCCCCCBCCBBCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.12322670 16 CHROMOSOME_I 27 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ####?@@=6<??:>BBBBAAABBBB=DBB@4ADBBDABCCBB>CCCBC=@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.14671547 16 CHROMOSOME_I 27 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############?A@A8@@?@BBD9BB@@?CCCDDDA>CCAD?CCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.15954948 16 CHROMOSOME_I 27 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###A97==00;A????;=BCCBD>DCCCCCCCCCCC@CBCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.17314912 16 CHROMOSOME_I 27 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################AA54=?A?<8?@4:8)??:==.A>7;=3CCBCC@CCACC;CCCCC;CCACCCCCCCC@CCCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.17920847 16 CHROMOSOME_I 27 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ####################@(=@@9632;283*38762-8AAA=8@CCCBACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.18484794 16 CHROMOSOME_I 27 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #################@A=B@4BC?AA>CCBC@ACCBCAACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.19979752 16 CHROMOSOME_I 27 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ??@?AA>8=<;>=:2>;>BBA6ABB?>BABDB>BBBBB@>BDDBDDAACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.20318652 16 CHROMOSOME_I 27 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ############?A<8BBB8?A=B<8AB=CACCCCC@CC=CCCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.20730275 16 CHROMOSOME_I 27 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##############################?4?::BAAAAACCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.24048983 16 CHROMOSOME_I 27 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA 98(934>>39/<==9>3>>A.BDB?C;CCBD>BBBBDDABBDBBBBDBCCDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.26914314 16 CHROMOSOME_I 27 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############@;<:AAA?>?>>BBB>:ABBBBDADBBBBDDDD@DDCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.29129737 16 CHROMOSOME_I 27 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ############A?7;B?B>A6AA?:>:@@B;AABBA?@5DDDBADCDDADCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.30130221 16 CHROMOSOME_I 27 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #########################ABBABA@@;@@@@@BB;B>B@CCC?CCACCCCCCCCCCCCCCCCC@CCCCCCBCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.30619064 16 CHROMOSOME_I 27 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ######################@@?>@3>DA6?>AA@?@>BDDBD@DCCCCACCCCCCCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.31338027 16 CHROMOSOME_I 27 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###################################@>=>@BBBAABBBDB>BB@B=DBABCCCDCCCCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.31592423 0 CHROMOSOME_I 27 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGCCTAAGCCTAAGCCTGAGCCTCA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC?CCCCACCBCAACCDDD@BBCDBD############################### AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:74A17A5A1 NM:i:3 +SRR065390.7084193 0 CHROMOSOME_I 28 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCGAAC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCBCCD@ABDCB@=>BBB>>>BBB############################## AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:96T2G0 NM:i:2 +SRR065390.19170893 0 CHROMOSOME_I 28 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACGCCTAAGCCTACGCCTATGCCTCACCCCAAG CCCCCCCCCCCCCCCCCCCCCCCCCBB######################################################################### AS:i:-12 XS:i:-12 XN:i:0 XM:i:6 XO:i:0 XG:i:0 YT:Z:UU MD:Z:68A11A5A4A1G2T3 NM:i:6 +SRR065390.22308979 0 CHROMOSOME_I 28 0 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGACTAAGCCTAAAGCTACGCTGACGACAACGCCACAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCDDDCCCBACCCA########################################### AS:i:-24 XS:i:-24 XN:i:0 XM:i:12 XO:i:0 XG:i:0 YT:Z:UU MD:Z:64C10G0C3A2C0T1A1C1T1A3T0A2 NM:i:12 +SRR065390.5010743 0 CHROMOSOME_I 29 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACGC CCB@@CCCC@CC@CC@CCC?C5CCC<<8:?=@@:;CCC@;=CC8;@CCCC;1?C@:858?5618/?=695;9@@;>?B###################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:97A2 NM:i:1 +SRR065390.5559541 0 CHROMOSOME_I 29 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACACCBBCCCDDBACB@DD@B>@BB=BD?;B@=??85797;@::@########### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:97A2 NM:i:1 +SRR065390.31135792 0 CHROMOSOME_I 29 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGACTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCBD@CACDDBDDBBCBBDBC>?BAABAC?BBAAABCAAAAAD:D<=<3<9<@####################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:93C6 NM:i:1 +SRR065390.1936876 16 CHROMOSOME_I 29 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC #@>?B5555(=<:=?@@A?@;>>B?D?BDBBDDDDBBBC@BC@CCDCCCCCDCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16850763 16 CHROMOSOME_I 29 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC #######@8?73=/=@=:=7@A==A;;@;9C@@@BDA@CCADCCDCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.17680911 0 CHROMOSOME_I 29 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAACC CCCBCCCCCCCCCCCCCCCCC@CCACCCCCCCCCBBCBCCDCC=?CCABBAAAB@4BC?@:?@@B*B1B>B####################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:98G1 NM:i:1 +SRR065390.24171347 16 CHROMOSOME_I 29 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC #####################BB@DAD@B?A>BADBBABADABBB;DBBCDBCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.24428216 0 CHROMOSOME_I 29 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAACC CCCCBCCCCBCCCCCCCCCCCCCCCCCCCCCBCCCCCC@CCACC8??>A@?@@(=8==@=@8A@A>A##################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:98G1 NM:i:1 +SRR065390.27070077 16 CHROMOSOME_I 29 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ##########################?A=?@?@6@AAAAA>@CACB;BBABBB@BB>@DDCCDCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.27769553 0 CHROMOSOME_I 29 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGACTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDBCCCCBBABDABABDDDD?DBBABACBBBBC>>B;D1?B??D?:B?2@@=:=A?=?A=@######## AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:93C6 NM:i:1 +SRR065390.32563299 16 CHROMOSOME_I 29 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ###########@;;@.<@@?A?B@B?AAAABDCBBBDAADABCDDBCDCCDDCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.2792992 0 CHROMOSOME_I 30 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGTC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCBCC@CCBCDBCCBCBBBDD@DBADA@BB>BB=>:AABB>@B@>B?:D?D################# AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:95A2C1 NM:i:2 +SRR065390.639284 0 CHROMOSOME_I 31 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCDCCCAADA>C=@A@@@><@B@B=B4>@############## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.1105550 0 CHROMOSOME_I 31 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCBCC;A?CCACACCDB>? AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.8156389 0 CHROMOSOME_I 31 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCC>CCCCACCCBCC@@CCCC@CCCC@@CCCC=@@ADD?@@BBBACBA=DAA< AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.11912243 0 CHROMOSOME_I 31 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAACCCCAAGCCTAAGCCTAAGCCC CCC@BCC@CB>CACCCCC@CC??C@CCCCBCC?BBCBC@+=A5=1==:=76+@#################### AS:i:-7 XS:i:-7 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:78G2T17T0 NM:i:3 +SRR065390.17298969 0 CHROMOSOME_I 31 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCCCAA>BBACA>AA>>C:A?A>?<<< AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.20623938 0 CHROMOSOME_I 31 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGCCTAAGCCCAAGCTCAAACGC CCCCCCCBCCCCBCCCCCCBCCCCCCCC?A@CCCCCCACCBC@C:::?:@8?######################################## AS:i:-14 XS:i:-14 XN:i:0 XM:i:7 XO:i:0 XG:i:0 YT:Z:UU MD:Z:76A10T4C0T2G1C0T0 NM:i:7 +SRR065390.21172388 0 CHROMOSOME_I 31 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGACTACGGCAGAGCGG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCC>>CCCB48@BBAA.=@@@0.=>:=*7??####################### AS:i:-14 XS:i:-14 XN:i:0 XM:i:7 XO:i:0 XG:i:0 YT:Z:UU MD:Z:85C3A1C1T0A3C0T0 NM:i:7 +SRR065390.32636364 0 CHROMOSOME_I 31 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@CCCCCACCA@?CCC@BACBABA@B@@?B@DB8:=4=9;2B########################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.14726933 0 CHROMOSOME_I 33 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC?CCCCCCADCCAABD@CBBBB=BB8BBBB>;DB;B@@B>B########## AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:92A7 NM:i:1 +SRR065390.19804249 0 CHROMOSOME_I 33 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACGCCTAAGCCTAA 00.00?????CCCCCCCCCCCCCCCCCCCCCCCCBCCCBCBBBBBBBB@@B>@>@@BBB=<>>>>=>?0?############################## AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:87A12 NM:i:1 +SRR065390.641561 0 CHROMOSOME_I 34 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCC?BCCCCC@AA=C@C?BBC8@=C@@A@A@;=?@>A95<=>=*=9:<@################ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.4398044 0 CHROMOSOME_I 34 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCDCACDDB@DB@@BA>B@@A><@@?@)?B72=77B############### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.9917822 0 CHROMOSOME_I 34 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBBACCDD@BDBDBDBB@B?@@<@@?:D???<@??@==@3@@1<@=<<9< AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.13766985 0 CHROMOSOME_I 34 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCACCCCCCCCCCC@CBDDCBCDABBB=<@BBABB@BBB=B4BB>BB?4???>B>8DB<.?? AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.13985245 0 CHROMOSOME_I 34 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCADCDACCBDDA<@BBBBB@BBBC=>B>BCB>AA>AA>D==; AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.15076148 0 CHROMOSOME_I 34 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@CCCCBADDCBCABC=CCB@@;B@@?B8?@@;?8<:A:==?BAD?;@B@=@.@D@??2=? AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16049660 0 CHROMOSOME_I 34 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC=CCCCCCCCCBBCCCCBD@ADBB@@?@BB?;?B>20+7/<===@?######### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.22701424 0 CHROMOSOME_I 34 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCDCCBD?@@@ABBBABA?ABAB=3A;B?;D?DD>B5@69<6:=5=> AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.32125404 0 CHROMOSOME_I 34 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACDCB;@@BB<;@@@A@:?A?@?6AAAA############################## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.7546701 16 CHROMOSOME_I 34 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG #####@?.:@?BAA>DBBD<=BAB>BB>BB>DB>BC@DCDCCBCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.10989112 16 CHROMOSOME_I 34 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG 5050388648?AA6A===)77;==5:>BBCABBDDCBBADCCC?C=CCC@CCCCCCC@CCC@CCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.11531996 0 CHROMOSOME_I 34 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCBCCDCBBCCDABB@@BABBBB?B?ADABDB:@7.?=4A=:9A=???D>=(;4 AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16826146 16 CHROMOSOME_I 34 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG 9=59>@??;B==<::1@@@>8BB>BD=@CCCA@ACDCB@CCCAC?ACCCCAACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.17948445 16 CHROMOSOME_I 34 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG #####@4@4@8A>::<>5>=68AABC>CBABBCBC?===:B?DCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.29829737 16 CHROMOSOME_I 34 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ############################@B?@>BB@4=AA@@?A@?BC<@DBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.29888392 0 CHROMOSOME_I 34 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCDCCCCBCCCDCBDDBDCBDABBBADBBABAACA@>?@>?B?>BD=>@==A3<<.773783A################# AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.30225547 0 CHROMOSOME_I 34 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCADBBDAADBABB>ABBABA>BDBB<>>?8<>55>'0626526@88A0;3'5 AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.8226077 0 CHROMOSOME_I 38 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCGAAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCCCC@CBCC@CCCDCCBCCCACBCCDB;@=C?BC########################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:80T19 NM:i:1 +SRR065390.17492028 0 CHROMOSOME_I 38 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACACCTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCDCCDCA@@BACB==BBBA;4BBBBB@B??#################################### AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:94A0G4 NM:i:2 +SRR065390.17605413 0 CHROMOSOME_I 38 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGACTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCA?CCCCCCDCDACA@<=8@==*51553BB?BB?;@*?##### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:96C3 NM:i:1 +SRR065390.20811266 0 CHROMOSOME_I 38 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCB@BCC@ACC8;;:;CCCCC=A@5CBCB5=?7<<>>A:.@######################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:88A11 NM:i:1 +SRR065390.25876953 0 CHROMOSOME_I 38 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAATCCTA #################################################################################################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:95G4 NM:i:1 +SRR065390.27381447 0 CHROMOSOME_I 38 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGTCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCDDCCCCACDCCC@-A>A>>DAB>@A?AA:A>AACAC6<@@###### AS:i:-3 XS:i:-3 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:54C45 NM:i:1 +SRR065390.32350443 0 CHROMOSOME_I 38 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGACTACGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCCCCCDCDDCCDBADCDDACBDB@=DBBDAB6C@######################## AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:84C3A11 NM:i:2 +SRR065390.26113957 16 CHROMOSOME_I 38 1 100M * 0 0 CCTCCCCCTAAGCCTGAGCCTAAGCCTAAGCCTAAGCCTGAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA #########################################A=B7===;:?@??:B?CC?CAACCCBACCBC?CCCCCCCCCCCCCCDCCCCCCCCCCCC AS:i:-10 XS:i:-10 XN:i:0 XM:i:5 XO:i:0 XG:i:0 YT:Z:UU MD:Z:3A0A0G9A23A60 NM:i:5 +SRR065390.22114448 16 CHROMOSOME_I 39 1 67M1I32M * 0 0 CTAAGCCCAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ############################A:B;@@>?B@?ABBDABB@BC?BAACC>BCCAC>@CCCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-10 XS:i:-10 XN:i:0 XM:i:1 XO:i:1 XG:i:1 YT:Z:UU MD:Z:7T91 NM:i:2 +SRR065390.4122396 0 CHROMOSOME_I 40 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACG CCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCCBCCCDCCCCC@DDCCCA>DDDBBBABB<@C>@BB?8@@@;87?9<700727;7/7<@@@###### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:98A1 NM:i:1 +SRR065390.12501634 0 CHROMOSOME_I 40 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCCTAAGCCCAAG CCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCDBCDCCDCB@DB>@BBDDABD@BB><@=@?############################### AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:84T11T3 NM:i:2 +SRR065390.15005277 0 CHROMOSOME_I 40 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDACC?CAACAA@A:ABB=;1@################################## AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:86A13 NM:i:1 +SRR065390.22489915 0 CHROMOSOME_I 40 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGACTAAG CCCCCCCCCCCCCCCCCCCCC?CCCCCCCCCCCDCCCCCCCBDCD@C@BACB@@B@BBBB@:@==B:B<@BB3>?######################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:94C5 NM:i:1 +SRR065390.25381446 0 CHROMOSOME_I 40 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCGAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCDCC@@BBCCADDDB################################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:66T33 NM:i:1 +SRR065390.26177760 0 CHROMOSOME_I 40 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGCCAAAGCCTAAGCCTAAG CCA@CCCCB@CCCCCC@ACCC?CCA9<:<=9;??6?########################## AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:79A4T15 NM:i:2 +SRR065390.26407476 0 CHROMOSOME_I 40 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGNCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGTCTCAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCC/C00!+03;999CCCCBBCCCBCCBCCB?BBB@BCBBBABBB######################################## AS:i:-5 XS:i:-5 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:22C59C2A14 NM:i:3 +SRR065390.13931899 16 CHROMOSOME_I 40 0 100M * 0 0 TAAGCCTAACCCTATCGGTATCCCTACTCCTGCGCCTCCGCCTTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ###################################################CCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-28 XS:i:-28 XN:i:0 XM:i:14 XO:i:0 XG:i:0 YT:Z:UU MD:Z:9G4A0G0C0C2A0G4A0G3A0A4A0A4A56 NM:i:14 +SRR065390.1371033 0 CHROMOSOME_I 43 0 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCCTAGGCCAACGCATAAACCCCAAGCA :@@B??4?????3??77938<=<7?>39??B8A?A@@C???A>>BBBACCBCCCCCACBCCBBCCCC@?@??BB>B> AS:i:-10 XS:i:-10 XN:i:0 XM:i:5 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0G5G1C8A5A76 NM:i:5 +SRR065390.3034972 16 CHROMOSOME_I 43 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT >B>B;@B=B?><@B@B>>@>BBDBBADDBBA>CC?@CCCBCACCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.5390013 16 CHROMOSOME_I 43 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT #####################@88C8:?=AA1:A;8C5=CCAC=@ACAC=ABBDBBB@DDDBB@CD@DD@CCCCDCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.10370271 16 CHROMOSOME_I 43 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT #########################################?>BB37:7/:8:/75;7;77;=?4><;CB@A;@CCCCC@C@CCCCC@CC?CCCCCC@CC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.11422655 16 CHROMOSOME_I 43 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ######ABAC>?DBBBDBBBDADD@>DCBBDBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.15298468 16 CHROMOSOME_I 43 1 100M * 0 0 GCCCACGCCTAATCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT #####################################AAACC=ACC1ACCACC2BCC?C@ABCC?AA?A=C>CCACCC@BCCCCCCCCCCCCCCCCBCCC AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:3T1A6G87 NM:i:3 +SRR065390.20235987 16 CHROMOSOME_I 43 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT #############@@9:7:8BAA>ABB?@BB:B?@AA=DDA@ABBADDCCCBCCCCCCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.21015579 16 CHROMOSOME_I 43 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ##########################A;@=B@;BBA@CA=CAC@CCCCDCCCCCACCCCC@CCCCBCCCCCCCBCCCCCCCCCCCCCCCBCCCCCBCBCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.21101533 16 CHROMOSOME_I 43 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ##########################??A;=ABBAA>BB>ABADDB8DADCBCCCCCCCCDCCCDCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23753867 16 CHROMOSOME_I 43 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ######################??6?B@:DD6D>AA?>C;BBBC>BBDD@BBBBCBDCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.26254463 16 CHROMOSOME_I 43 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ######################@@@>=:>8?BB>B@=@A@=====@B;B@BBA;7B=AA?8BBB##### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:96C3 NM:i:1 +SRR065390.31555089 0 CHROMOSOME_I 44 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCBCCCCCCBCCCCACCCCBBBBB(?A??AA=B=CCCCDC>AC=BB@?B8>==:BB@6?#################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:45A54 NM:i:1 +SRR065390.4469707 16 CHROMOSOME_I 44 1 100M * 0 0 GCGAATCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ###########################@>==@A>=.@@@?CCCCCC@=CCBC@CCCBCC>C@C==?=>?ACC;;>5>@=CA?9A############################################# AS:i:-14 XS:i:-14 XN:i:0 XM:i:7 XO:i:0 XG:i:0 YT:Z:UU MD:Z:68A12A5A6G1C0T0A1 NM:i:7 +SRR065390.28536327 16 CHROMOSOME_I 45 1 100M * 0 0 CGTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTGAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ######################A:A@(6@@>:9:>553+(>@7<>=B:A@5BCDCADCBAC@CCCACCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCC AS:i:-7 XS:i:-7 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1T0A35A61 NM:i:3 +SRR065390.30452237 16 CHROMOSOME_I 45 1 100M * 0 0 CCAAGCCTAAGCCTACGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #########################################B@=@A=@DBCDCDCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1T13A84 NM:i:2 +SRR065390.473101 0 CHROMOSOME_I 46 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGACTAAGCCTAAGCCTAAGCACCAG CCC@@C@CCCCCC@CBC@C@C:?@@==;@=CBABC@CAC9?2?;2A6AA@################################################## AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:76C18C0T0A2 NM:i:4 +SRR065390.2292661 0 CHROMOSOME_I 46 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCDCCCCCACCCBCDCABCB@CCDBABC@######################################## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.4943586 0 CHROMOSOME_I 46 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@CCCCABCCCC=BCC@@@BC@B<@BAB?<;5@?877;7;>D>?D:>?=>@@@B=6@@@###### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.5569754 0 CHROMOSOME_I 46 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACBCDCCBDCBB@CBBBBBDB;DB>@ABBA?CAAAAAAA@AAAA########## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.8043944 0 CHROMOSOME_I 46 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCB@DCCCBCCBCDDADDDD>DBBD@B@@ADB?>B>B@B@>CBBABAA?BDB4@>??DAAA7A AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.12047482 0 CHROMOSOME_I 46 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCA@C@@BAB@DDB>B@;@A:@@>DC=;@@DBBBBBBAC>B?6B;?<8?:? AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16134128 0 CHROMOSOME_I 46 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAACGCTAAGCCTAAGCCTAAGCATAAGCGTAAG AAA=ABBABBCCCCC>ACACBCCCCCCCB?CCC@C@BBB>3?8A>>@CC@@D@?###################################### AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:69G0C18C5C4 NM:i:4 +SRR065390.17957775 0 CHROMOSOME_I 46 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCDDCBADBBCADDB<=DBABBBBB?C?DD>B::8AAA@@################## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.22147900 0 CHROMOSOME_I 46 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAACCCCAAGCCGAAGGCTAGG CCCCCCCCCCCCCCC@CCCCCCCCCCCCCCCCCCCCCCCACCCCCCCCCCCCBAD=@=A=@5==:74@################################ AS:i:-10 XS:i:-10 XN:i:0 XM:i:5 XO:i:0 XG:i:0 YT:Z:UU MD:Z:81G2T5T3C3A1 NM:i:5 +SRR065390.22676708 0 CHROMOSOME_I 46 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGACCCCGCCCAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCADDBDB@@DABABBDAD@?>6@@@@@################################### AS:i:-10 XS:i:-10 XN:i:0 XM:i:5 XO:i:0 XG:i:0 YT:Z:UU MD:Z:88C1T0A0A3T3 NM:i:5 +SRR065390.22905722 0 CHROMOSOME_I 46 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCDCDC@BBBBDBBBADB@BCBBB>A?>===92>9=985@###################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23066210 0 CHROMOSOME_I 46 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCDC?DCAB?>BCAB@DBDAA>>?BB??>DBB######################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.24301313 0 CHROMOSOME_I 46 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCDBDDADBBDDBBBCDBDBBBB>BAC>A>AAA53=@9>999AAA################# AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.25506513 0 CHROMOSOME_I 46 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGACTCAGACTAAGAAAAAGACTAAG CCCCCCCCCCCCCCCCCCCCCACCCCCDBDCCCDDDDDCBBDBBDAA>BA################################################## AS:i:-14 XS:i:-14 XN:i:0 XM:i:7 XO:i:0 XG:i:0 YT:Z:UU MD:Z:76C2A2C5C0C0T3C5 NM:i:7 +SRR065390.32874751 0 CHROMOSOME_I 46 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCD6CB?>?;>ABAA6A??>(9<9:18868@68@:692:85 AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.12211289 16 CHROMOSOME_I 47 1 100M * 0 0 ACGCCTAAGCCTAAGCGTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ###################################@;6;BBB?B>A?DD@:999<<><<;87;89<<>>?<9??;::9;7883800000 AS:i:-44 XS:i:-44 XN:i:0 XM:i:18 XO:i:1 XG:i:1 YT:Z:UU MD:Z:0A2C1T2G3A1G0C4G2T2G2T2G2T0A1G2T2G5G48 NM:i:19 +SRR065390.26846416 16 CHROMOSOME_I 48 1 100M * 0 0 GCCCGACGCCTACGCCGAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC ############################################################?BABC@CCB@AACCBCCCCCCCCCCC@CCCCCCCCCCCCC AS:i:-12 XS:i:-12 XN:i:0 XM:i:6 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0A0G2T1A5A3T83 NM:i:6 +SRR065390.4887361 16 CHROMOSOME_I 49 0 100M * 0 0 GCCTTCGCCCTGGCCCCCGCCTCCGCCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ##############################?AB>>BBDBBBBBBB>DBDCCDCBAACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-24 XS:i:-24 XN:i:0 XM:i:12 XO:i:0 XG:i:0 YT:Z:UU MD:Z:4A0A3T0A0A3T0A0A4A0A3T0A71 NM:i:12 +SRR065390.27800038 16 CHROMOSOME_I 49 1 100M * 0 0 GCCTAAGCCTAATGCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ############################################################@A@@@B>>@>>AA6:@A>>CB@BBACBBB4?>??@@BA;B>B0@############### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:93A6 NM:i:1 +SRR065390.4627990 0 CHROMOSOME_I 51 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDC?BDCABB>BA=B?@?B;?=BA@47??:?############### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16715324 0 CHROMOSOME_I 51 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCBAADB=AB;@D0;BACB;BBBA;0>==5=3?@>@7477:@######### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.18309630 0 CHROMOSOME_I 51 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCA=CCCBBBCC?BA@B;BAB<:703=:=:::??@@@######################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:93A6 NM:i:1 +SRR065390.19555357 0 CHROMOSOME_I 51 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAC CCCCCCCCCCCCCCCCCCCCCCBCCCCCBCCDCCACC@CC@CCDCA@@BBDDBA@BADC<@DBB;;@@??A>A@C######################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:99A0 NM:i:1 +SRR065390.20581090 0 CHROMOSOME_I 51 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCGAAGCCTAA CCCCCCCCCCCCCCCCCCBCCCCCC=A?:A<;?;;A?;A=9=;9A####################################################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:91T8 NM:i:1 +SRR065390.23080061 0 CHROMOSOME_I 51 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACAAC@CCCDBBDACDDADB@ABDB>ABBABA@CC>:A8B<<:?@>A@AAA???B### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.26337922 0 CHROMOSOME_I 51 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGCCTAA CCCCCBCCCCCCBCCCC?BCCCCCCCCCCCCCDCADCCCAA2=:(@############################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:92A7 NM:i:1 +SRR065390.10947804 0 CHROMOSOME_I 52 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGGCTAAGACTAAGGCTAAGGCTCAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC?CACC@@CBBC>=CB?A?5A######################################## AS:i:-10 XS:i:-10 XN:i:0 XM:i:5 XO:i:0 XG:i:0 YT:Z:UU MD:Z:76C5C5C5C2A2 NM:i:5 +SRR065390.13041859 0 CHROMOSOME_I 52 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCCTAAGCCTACGCCACACCCCAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCDCCDCCDBAABC@B@?BB?BBDADABAB8B?>>:9A########################## AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:90T9 NM:i:1 +SRR065390.20639196 0 CHROMOSOME_I 52 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAGGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@BCA=AA?CDCCCDACDCBBDDBBBBBBBBABB>?;>BBB?AADDB@D@BB@=BBA>=@BAB?5ACDDC5BACCAADCCC@ACCACCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.1360842 16 CHROMOSOME_I 56 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA #############AA;9457A5:7@C>B:C@CCB@@CCCC5@=CC=7;===@?==CC0>9A=;;BB;BBB?>AA@9?CAC>@BBCCCCC@CCCCABCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.8944828 16 CHROMOSOME_I 56 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ######D@D>>BDABAA@@ABBBBBDBD@@DDBBDADCDCBBCCDBDACCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.9154510 16 CHROMOSOME_I 56 0 100M * 0 0 TTCATATGGGCAGGGAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ##############################@B;@?>>B1?BCBBC@>CDB>B@CA@CCAC=AA>>AC;CCACCCCCCC=CCCCCCCCCCCCBCCCCCCCC AS:i:-28 XS:i:-28 XN:i:0 XM:i:14 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0C0C0T1A0G0C0C0T0A0A0G0C0C0T85 NM:i:14 +SRR065390.10327745 16 CHROMOSOME_I 56 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA #####?BB?>793=2=@7>75;BBDABDBBCB@CC@CCC89876@CCCC@B8@@ AS:i:-10 XS:i:-10 XN:i:0 XM:i:5 XO:i:0 XG:i:0 YT:Z:UU MD:Z:3A4T2G5G2T79 NM:i:5 +SRR065390.11441245 16 CHROMOSOME_I 56 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA #######################?=:>==8;;;231@==;B8BBA;?8AA9B@=CDABCDCD@CCCCCCCCCCCCCCCCCCCCCCCCCCCABCCCCCCBC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.12629337 16 CHROMOSOME_I 56 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ;=5?:8A?<:?8A@@A?==@6B=BDBBBA@CABC@DCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.13640483 16 CHROMOSOME_I 56 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA =>8=>3A@=6B=B@8B?>BDB@ABDA>BDBCCDDCCACCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.18076115 16 CHROMOSOME_I 56 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ###########@<88>>8?<=;8A@>:=;*AAB=@C>@84=>5:535BCBB@DCC?A?DCC?CA9CABC@;CCBAABCCDCCACCDCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.22432674 16 CHROMOSOME_I 56 1 100M * 0 0 CCCACGCCCAACCCCAAGCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA #####################################A?5@@AA:9B;BCCCACBCCCCCCCBCBCCCCCCCCCCCCDBCCCCCCCCCCCCCCCCCCCCC AS:i:-12 XS:i:-12 XN:i:0 XM:i:6 XO:i:0 XG:i:0 YT:Z:UU MD:Z:2T1A3T2G2T8G76 NM:i:6 +SRR065390.24633504 16 CHROMOSOME_I 56 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ###############?>B==B:BC4BDAACDC@CCACCCCCACCDCCCCCCCCCCCCCCACCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.30385823 16 CHROMOSOME_I 56 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ########################################?@?@AB@BBCD@B>CCBCD@BCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.159947 16 CHROMOSOME_I 57 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA <>4=96:98:?46?A6?B@BA=?>BBB>>BADBBBDA@BCBCCCCADCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.1796988 16 CHROMOSOME_I 57 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##############################################AAA:00000?=@BCCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.2760975 16 CHROMOSOME_I 57 1 100M * 0 0 AGAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############AABB@ABBACBCBC?CDCBCACCCCCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCDCC AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0C0T2G95 NM:i:3 +SRR065390.12418490 16 CHROMOSOME_I 57 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############A8@AACBBB;;B@BBBB@CB@B@DADADA>CCDCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.14521060 16 CHROMOSOME_I 57 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##############################B?>BB@A=:<07;;7B@@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.14712716 16 CHROMOSOME_I 57 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #########################@@98@B<=@@B=?ABCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.17723586 16 CHROMOSOME_I 57 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ####################################@:B><;75<@-==;CC@BCBC?CACBCCCCCCC>>==AA AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.17845832 16 CHROMOSOME_I 57 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #################?@9A?88ABB0?@ABBBABA=B*BC=BC?CDCCC=DCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.18910535 16 CHROMOSOME_I 57 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #######################BABCAAB>BBBBDB=DBBD@BD@DCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.19689790 16 CHROMOSOME_I 57 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ####################@33B@>B>ADA>B@@CADDCACCBACCCCCCCCCCCACCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.19927220 16 CHROMOSOME_I 57 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ######################CBBA>BBDDB>DBBD@ABBDCBDBDCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.19989398 16 CHROMOSOME_I 57 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA @@6@?@@:<@BBB@D>@D??>A?>CCBCBBBBAB@>BBAADBBBABDDCDDDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.28429399 16 CHROMOSOME_I 57 1 100M * 0 0 CAGAGCCTAAGCCTAAGCCTAATGCTAAGCCAAAGCATAGTCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############################################A:=@@.AC@CBCDCCCCCCCC;CBCCCCCCCCCCCCCCCCCCCCCCCCCC@CC AS:i:-16 XS:i:-16 XN:i:0 XM:i:8 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1T0A19G0C7T4C2A0G59 NM:i:8 +SRR065390.29699086 16 CHROMOSOME_I 57 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ####################################?:@?;:>=4B@BA?AB@BCB:BAACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.30602381 16 CHROMOSOME_I 57 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###########################ABBB@>>B@B?;B?BB?@B;CBBB=BDCBCCCCCCCCDCCCCCCCC?CCCCC@CCCCCCCCCCCCCCCCCBCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.30661211 16 CHROMOSOME_I 57 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ######??8?5<08<::=48?B6@B8;8=?=ABB=B;AABA@>BB;BBABB6BD>BDDDDDDCBACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.1865058 0 CHROMOSOME_I 58 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCDCCCBCCCCCCCBCCCCCCCCCACCCCCCCBCA?BBCCAA########################################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.3486883 0 CHROMOSOME_I 58 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG 000000000,00000AAAAAA>A=ABBA<:;8;592;77230/79111-9?+79?############################################# AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.5071220 0 CHROMOSOME_I 58 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCATAAGCCTAAGCCTAAGCATACG BCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCC?6>=>=@9B=?==9==BA;;@######################################## AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:72T4C17C2A1 NM:i:4 +SRR065390.7153981 0 CHROMOSOME_I 58 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCACCCCCCBCCCDCCCCACCCCCCCD@BDDDB:BA8>6>=)=>:==>>>5B?@@@96<:;@>0>> AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.9722947 0 CHROMOSOME_I 58 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCDCDDDCA@DBDDAC?BBBBB?ABBB@;A>:=:?9?6A??########## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.13953914 0 CHROMOSOME_I 58 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCGTCAG @@A?=CC@CC===A=CC:@@ACCCCCC@C<:8<839>=>?5=@@######################################################## AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:95C1A2 NM:i:2 +SRR065390.15007214 0 CHROMOSOME_I 58 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCB;DDDCDBD@DACB>?=BBBBA=0>>DBBDD=ABDB@B@@@BABABCB>>AB?BBB4?6(? AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16824019 0 CHROMOSOME_I 58 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACGCCTCAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@DCCD@@ABBAA>>DADBBBBAB?BAA=A############################### AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:92A4A2 NM:i:2 +SRR065390.16851488 0 CHROMOSOME_I 58 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCCTAAGCCCAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCC=8=A<=>=8:252;/@@A=?############################## AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:78T11T9 NM:i:2 +SRR065390.17897057 0 CHROMOSOME_I 58 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCAACCCCACDC@@>>BCBB8AAB>@############ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.19083188 0 CHROMOSOME_I 58 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCCCCC@CCCCCABADCBAD@ABB0B@B@(@################ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.19427372 0 CHROMOSOME_I 58 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCDCDDCABACD>BDDB?>BBBDA;D>A<>>DBB?A>DBBBA>ACB??><@ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.19483941 0 CHROMOSOME_I 58 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCBCCCCCCCCCCC@CCCC@CC@BC=>4B?CBB AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23214436 0 CHROMOSOME_I 58 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCABCCCC@CCCCCDCBDCAACBBCCB@CB@BBBB?;@@=BA>AABAA>;>AA.A@@?B?############### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.24888065 0 CHROMOSOME_I 58 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACGCCTACG CCCCCCCCCCCCCCCCCCCCCCCCCCCDCBCDD@DBDAAADBBBBA###################################################### AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:92A5A1 NM:i:2 +SRR065390.29491496 0 CHROMOSOME_I 58 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCDCCBCCCCCCCCCC@ACCCCACCCCCCCCCCCBCCCCCDDABCDDBBBDBAABDB@A>;@B@AAA??0:+=544925??######## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.31477096 0 CHROMOSOME_I 58 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACACCTAAG CCCACCCBCACCBCCCCCCCCAC@@CCCC@@@>?@>B@@;>A>>>*865><9?;;@>=>@BB@B>B;>;;===B=>;AA?*-373BB(?B########## AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:92A0G6 NM:i:2 +SRR065390.23852108 16 CHROMOSOME_I 58 0 15M1I84M * 0 0 TTAGCATAAGAATAAAGTTTAAAATTTAGCTTAAGACAAAGCCTAATCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ########################################################?A@<=CCCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-36 XS:i:-36 XN:i:0 XM:i:14 XO:i:1 XG:i:1 YT:Z:UU MD:Z:1A3C4C0C4C0C3G0C0C1A3C4C1T8G53 NM:i:15 +SRR065390.23932205 0 CHROMOSOME_I 58 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCDDCCDCDADDDBDBABABDBBBBAABBAAB?CC=AAACA?8=@@68: AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.24149813 0 CHROMOSOME_I 58 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCDCC@DDACA@=D@A=@BABBB6BBAAB=@B@AB71<>9>0:>1@6@## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.1087596 0 CHROMOSOME_I 59 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAACCCTAAGC CCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCDCCBBCDCAA@CB@=9:::A################################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:92G7 NM:i:1 +SRR065390.26571979 0 CHROMOSOME_I 59 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGACTAAGCCTAAGCCTAAGCCTAAGC =???=>AA>ACBCBCCCCCCCC>@@############################################# AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:75C24 NM:i:1 +SRR065390.31153791 0 CHROMOSOME_I 59 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCC?CCCCCCCCBABDADBADDDBBBBBBBB>@B?CA:AA>AC6=<:A:2:@###################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:85A14 NM:i:1 +SRR065390.33128805 0 CHROMOSOME_I 60 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCGTAAGCCTAAGACTAAGCCTAAGCCAAAGCA CABCCCCCCCCCCCCCCCCCCCCCCACCCCCDCDCCABBBAAA@?BA@<=0@@@<>B>BBBA>AB@################################## AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:69C10C13T4C0 NM:i:4 +SRR065390.8926768 0 CHROMOSOME_I 61 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCD@CDA?=>C88A==@.8:@=<4><@@86;@::@# AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.9390775 0 CHROMOSOME_I 61 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCCTAAGCCTAAGCCT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCB@CD>CBAADCC=AABB@@???@A######################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:81T18 NM:i:1 +SRR065390.12883674 0 CHROMOSOME_I 61 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCACACCCC=ACCCCABD@B@BADABBB:@@=@= AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.19474303 0 CHROMOSOME_I 61 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@DCCCC=CDBC=CACDC:>BBDB>?DB###################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.32664926 0 CHROMOSOME_I 61 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGCCTAAGCCTAAGCCT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCAAACC?BCBC;CCCBB>=@C>4@############################# AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:82A17 NM:i:1 +SRR065390.33180966 0 CHROMOSOME_I 61 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGACT CCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCACCCBCBCCACCACCCC@8?>AA############################################# AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:97C2 NM:i:1 +SRR065390.14178092 16 CHROMOSOME_I 62 1 100M * 0 0 CCGAAGCCGAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ##############################DBBBD@AC@@A??C@9><;:CABDCCBCCCCCACCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCC AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:2T5T91 NM:i:2 +SRR065390.33093699 0 CHROMOSOME_I 63 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCGTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCC1'0000>=39A############################## AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:72C27 NM:i:1 +SRR065390.3811166 16 CHROMOSOME_I 63 1 100M * 0 0 CGAAGCCTAAGCCTCAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #########################??>AB@?B?>@A==:BB@=??CBB>D@BDABBCDBDCDCDCCCACACCACCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1T12A85 NM:i:2 +SRR065390.9360891 16 CHROMOSOME_I 63 0 100M * 0 0 GTGCGCCTACCGCTACCCGTCACGGCAAGCCTGCGCCTCCGCCTAACCCTACGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #####################################################@C@BBCCCCCCCCC1BCCCCCCCCBCCCBCCCCCCCCCCCCCACCCC AS:i:-40 XS:i:-40 XN:i:0 XM:i:20 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0C1A0A5A0G0C3A0G1C1A1G0C0C0T6A0A4A0A6G4A48 NM:i:20 +SRR065390.11907902 16 CHROMOSOME_I 63 1 100M * 0 0 CTACCCCGACGCCAAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################B==8>7BB>@B?@CDC@@CCBCC?@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-10 XS:i:-10 XN:i:0 XM:i:5 XO:i:0 XG:i:0 YT:Z:UU MD:Z:3A0G2T1A3T86 NM:i:5 +SRR065390.18166736 16 CHROMOSOME_I 63 1 100M * 0 0 CTAATCCGAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ####################@?;AA>BB>B>===3BCBC@@CCDCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:4G2T2G89 NM:i:3 +SRR065390.24035073 16 CHROMOSOME_I 63 1 100M * 0 0 CTAAGCCTAAGCCGCAGCCTAAGCCCACGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##################################################A=CA?BACACBCCABCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCDC AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:13T0A10T1A72 NM:i:4 +SRR065390.29735708 16 CHROMOSOME_I 63 1 100M * 0 0 ACAAGCCTACGCCTAAGCCTGGGCCTCAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ####################################@B4B000+,1.+108;@BBBBBBB<@?B@>AB?BBB>>@?@>BBBB?:;>><:@@@? AS:i:-14 XS:i:-14 XN:i:0 XM:i:7 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1T6A7G2T5T7A12G53 NM:i:7 +SRR065390.1636136 0 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCC@CCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCC@CACC@CACCAB:B?66@:@@@ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.3392236 0 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCBCBDCAABBDADDDBBABB@?A>BBADA?ACBBB@;?B;@?@@@<@6@@ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.6690334 0 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCBCBBCCCCCCCCBCCCCCCCBCCCCCC@CCAC>:AAA;9;==?=:/A=AA@?C?@A8757=?######################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.13292986 0 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAGGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCC@CCCCCB@CD?C>C>@=A@5@A?AA?0=?####################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.14309215 0 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCAACBCA:ACCADBABDDA;@BB<=::A>?B4;D69===D######### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:98A1 NM:i:1 +SRR065390.15577802 0 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBDCCCCDACCCCBADCD@B>BB@BB3?B?BB>>B@AC?BB?>B?>?@@A@?B6AD AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16480523 0 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGGCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@CBCCC??>CCC=ABBBAC;?AA6?@>@########################## AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:94C5 NM:i:1 +SRR065390.17141184 0 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACG @BBBBBBBB=>>>>>:>>>:<>>:>BBBBBBBBBAB:BB>>BB5BB9>>?BBB############################################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:98A1 NM:i:1 +SRR065390.17760882 0 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACG CCCBCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCA@CCCCBA@CACC@CCC@BA>BCC@CBBCA><=CC@B@BB##################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:98A1 NM:i:1 +SRR065390.18662066 0 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCBCCBCCCCCDC@BC@AA=A5A=A@C=@@=?BB8A############### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.21809038 0 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAACCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCBCCDCCDCCBDCBDC?ABCBB?>B5=AA########################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:93G6 NM:i:1 +SRR065390.22192818 0 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDCCCCBBCCDDDBBBBBBBBABB>B>BDB?AA>>@>@>9<48*@######### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23362845 0 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCC?CCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCDDDCBBCBDDB>BBBB?AABBBA6A>BB:>>5,=99?;6>=?####################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.26775560 0 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCGAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCADCDDCBDCDB@B>BBBBB>>AA=<=@########################## AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:96T3 NM:i:1 +SRR065390.28745603 0 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCDCCDBABBB>BDB6BC>AA@BBDB>BA@B####################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.29133277 0 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAATCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCDCCCDCCDCDBCBBBBCDBBBBCDDCCAABBDBAAA>@A@>D>A############### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:93G6 NM:i:1 +SRR065390.4912040 16 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ###@@A5:8A7;84:=>A:>;BB@>;BBBB?=@BDDBBCBCB>DCACCCBCCCBCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.9629649 16 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ######################@A=>>;@@B;4BDB?BB5CCBB@>CCBBCCCCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.15607771 16 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ######@@=?<==:6<<>AA)B@BBA@BBBB>BA@BBCDDDADABCDDC@CCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16191081 0 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCBCCCCCCCCCCCCCC@CCCCCCCCCCCCCCCCCCCCCCC@A?CCCCCCCCCCCBC@CCCBB>A=@4=>@CC@@9@AA################# AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16241777 16 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ###############DBDD?:@1:@4=0=;>8B>@CC<@=CACCCCCCCCCBCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.22359834 16 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG @=)????<6>A>AAADBDD>;>>BBC;@BBAB;ADBBBBADBCB@@CCCD@CCCC;@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.25076320 16 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ###################################@:=C>@D=CCDCCCCCCCCCCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.26013356 16 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ###############?C>CBBA=BBDDBBABDD@DCDACDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.29257811 16 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ###############@9@@=:@?BB>;B?BB>>BBDBD;CBCDAACCCA>?CACC?CCCCCACCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.29430375 0 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCBCCCCDDCBBCABBBBBBAACCAAAABCBAAB?AAA6A@A@>?AAAA?64?########## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.30148213 0 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCC=@@B@A<@@;BB=BB;BBB>>?B@?@4;@############################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.30863885 16 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ####################@@@@@>>DBB>6BBBC@BDBBA=BBBBCDDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.31415192 0 CHROMOSOME_I 64 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBDDCCBCBCBCDD@C;@@C@BBB@BAB>BB@82>>=AB;B?@B>6>77=7*;,7378/895A?AB< AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.6076202 0 CHROMOSOME_I 65 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCDCACDC@DCABB>A>?@>??,>?=A########## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.6192585 0 CHROMOSOME_I 65 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBBBDBCBABBDDB@BDDD@B>BAB?@ABABB?C?:AA@=)?:=@@####### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.9007991 0 CHROMOSOME_I 65 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCCACCCCCCCCCCCCCCC=CDBC@AB>B5<@==BBB>B>?BBB20879=(5:.=:@94 AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.12321086 0 CHROMOSOME_I 65 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDCDDBDD@DDDDBDBBBBC=>B>BBBACCBC>CCAC?ACC= AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.20771747 0 CHROMOSOME_I 65 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCA@ACC@DCDAC@>@BBBB??B<<7739373:93937;3=@A>?A?A96<*8:5<8@############ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.21858348 0 CHROMOSOME_I 65 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCDDDABDBBBBBBADABBBA4>BB???######################## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.22848365 0 CHROMOSOME_I 65 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCBAC@CDADB;DDBB@@@?BBB>BCAACAADD??A>A>>40771 AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.25705771 0 CHROMOSOME_I 65 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCDDDBBCDAD>DABB>BCA=@:@?@A?*=70;70=@@A@######################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.12439940 16 CHROMOSOME_I 65 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ##DDB6@<<=D6B>@BB@@ABDB@@B@BB=D@DBDCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.14000986 0 CHROMOSOME_I 65 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCBACCCDD@CBDDA@DDABBADBDDDDABBBB;DBBBBB>CABAABAACA:BA:?B?6?ADD? AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16839733 16 CHROMOSOME_I 65 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC #############################################AA@CC;CCCCCCCAABA@CCCCA@C@CC@BCCCCCCAABAA@AACCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.17727634 0 CHROMOSOME_I 65 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCAACCBCBDCDDACBCBBBBAB@>BBB@BA@B?;BB@A6?A8?################## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.19208569 16 CHROMOSOME_I 65 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC #########?979-5A:.6@B?>6BABCA;=DDDB=ACCBDCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.22372328 16 CHROMOSOME_I 65 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ####A5*904@6=@>1BBBBBBCBBBABCDCCDDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23208723 16 CHROMOSOME_I 65 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ######@==B:77.<<@=<::>=:7;6BBBDB4BBBB9=BCA@B;CCBCAABCCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.24356433 0 CHROMOSOME_I 65 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCDCBDDBADDBB;BBAABBABB>C6AB??>*AA@:A8@@@###### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.33542765 16 CHROMOSOME_I 65 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC #######A6@>:AAC=AA4A>;@DB>=@8<@BBBDABBDDA@B:?0?############################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.12905806 0 CHROMOSOME_I 67 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCC@CCCCC?ACCC<@CBBBA5CBDA@=A@=@A><@>BB8BB>BBD3D###### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.17964782 0 CHROMOSOME_I 67 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCAABCB=8BB?BBB>AB8<=???0;7?######################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.24880161 0 CHROMOSOME_I 67 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCADCCCDDDDDCBAABDBB>D>B@BD?D8@==A==@@>;;: AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.29470600 0 CHROMOSOME_I 67 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACGCCT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCADCA@ABBBAB=BB<=A=BDD:@BCACAC@@############# AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:95A4 NM:i:1 +SRR065390.9712286 16 CHROMOSOME_I 67 1 100M * 0 0 CCCTAAGCCTAAGCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ##############################@AA@A::=>/==:A>B@DCCBBBBCCC>CDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0G17G81 NM:i:2 +SRR065390.9802725 0 CHROMOSOME_I 67 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT CCCCCCDCCCCCCCCCCCBCCCCBCCCCCCAA@CCCCCCCCCCCCBCCC?>AAA=>AA3>AA8=AC>A=CAC?>?B?B@B#################### AS:i:-3 XS:i:-3 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:58A41 NM:i:1 +SRR065390.4151860 0 CHROMOSOME_I 68 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGCCTA CCCCCCCCCCCCDCCCCCCCCCCCCCCCC@7?<A:=;A27>22@37???A6@A:9;-390=754:=@##################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:93A6 NM:i:1 +SRR065390.6451314 0 CHROMOSOME_I 68 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGCCTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCCCCCBCDAACDABBBDD@B=BDBB;A7A???#################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:93A6 NM:i:1 +SRR065390.7753919 0 CHROMOSOME_I 68 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGTCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCBCCBCCBCA?>*>??@?:B>CCC>CCCDBB>DB@B=6BBBB=1=A?9:D8D@@?@8 AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:48C51 NM:i:1 +SRR065390.12766808 0 CHROMOSOME_I 68 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGACTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCCCC@CCCCBBCB5>@?@B@B@97;;9@################################## AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:98T1 NM:i:1 +SRR065390.27353147 16 CHROMOSOME_I 68 1 100M * 0 0 CCGCAGCCTAAGTCTGAGCCTAAGCCTAAGCCTAATCCTAAGCCTGAGCCTACGCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA #############################################################################################B>==@A? AS:i:-16 XS:i:-16 XN:i:0 XM:i:8 XO:i:0 XG:i:0 YT:Z:UU MD:Z:2T0A8C2A19G9A6A6G40 NM:i:8 +SRR065390.2141529 0 CHROMOSOME_I 69 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC?CCCCCADAD@B>B>@@@D?BABB>?BA?########### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.2320480 0 CHROMOSOME_I 69 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC<>CCCC8@CCCC=ABCCBACBC9=CB@:C56@8A.:5?=3A?B@B@?>?B?;;434=>90>: AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.4714421 0 CHROMOSOME_I 69 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCBADBCDDCDBDCBBABDAC@BCBAAABBB?AB>=BDAAA?BD;?B?@#### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.6640654 0 CHROMOSOME_I 69 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCC@CCCCC@CCCCCCACCCCCCCCDC>CCCC>CCDCCABADCBD;B@BB=B?=B>19>==@>4BB8:=74B;?8D??8@########### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.7630109 0 CHROMOSOME_I 69 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCBACCBDAB@DAABBB@B?@########################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.9514930 0 CHROMOSOME_I 69 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCB@CCDB@BAB@A@>@B=>>4A???8BBD??9D@>?;?85@####### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.10004235 0 CHROMOSOME_I 69 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCACACCDCC@CD@AB@@C>BC;:@?:@=??########################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.10865454 0 CHROMOSOME_I 69 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDC@CD@BB?CB@BDDD@BAA@AB3>?>>==(9:;=4>@:@;:27.@ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.12673297 0 CHROMOSOME_I 69 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCDCCACCCC=CACBA>DCDDCABBDB@>>B>B@>B@AAA>C>3@=:@@A>?########## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.12772245 0 CHROMOSOME_I 69 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC?CACBBC@CCABC;CACBCCBA==B>?=A#################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.13667706 0 CHROMOSOME_I 69 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCBCCCCCCCCCCCC@CCCCCACCC?CCCCC@8AAA:@:>:BBB;B@=?B?BDB0BBD@B>B>=BD,//85A4AA@CAAAAAAAA??8?@@@@?########### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.17659493 0 CHROMOSOME_I 69 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCA@CCCDBCCACB@BBDBDA=BBABB=AACA=@?B####### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.18370296 0 CHROMOSOME_I 69 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDC>CCBA>BC>BBDBB>B=8BBDCAAAA>8?:?A?:?DAA>A###### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.18372281 0 CHROMOSOME_I 69 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCDBDDDBBDBDBBBB=BB;BB@BABBB>?BA?A>CAA@CCCACC>CBA>DCDB@A@BB@C@BBCA=>=:@##################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.31254323 0 CHROMOSOME_I 69 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCDBCCACCCCBABDBDBBAAB>BBBBBA?BBB>AABAAA??BB@9(AABDA>B@BB>BA8BB>:>8@=>6@3:<@=@:@ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.11515609 16 CHROMOSOME_I 69 1 100M * 0 0 CCAAGCCAAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############################?>55;C<CCCC@@82@A=A;=@8?C@C@@CCCB@CCCCBCCCBCCCCCCCCCACCCACCAC AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1T5T92 NM:i:2 +SRR065390.11752838 0 CHROMOSOME_I 69 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDBBCCBABBBD@BAB=BB?B=3?@@=?@:B>:A########### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.11823969 0 CHROMOSOME_I 69 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCBCCCCCCCC?CCACC>@C@CA?BC?AB;@@@@DB=ABBBBAD@BBB;AABBCAAABAA=A>A AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.12248467 0 CHROMOSOME_I 69 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCCCCCCBCCCCCCCCCCCBDD@A=A>BDD>>>BBB@BB>B3BB?@;:>=+<55,75=::AA@6@## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.7434339 0 CHROMOSOME_I 70 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCAAAGCCTAAG DBCCCCCCCCCCCCCCCCCCCCBC@CCACCAAB?ACCCCBA>BBBBA>>AD@B@?>@@A########################## AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:84A8C6 NM:i:2 +SRR065390.23718480 16 CHROMOSOME_I 71 0 100M * 0 0 ACGCAGACGCCATAGCAGAGGGGTACGCCTCAGCCTGAGCCTACGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ############################################################################A>4@AA:?A@;A6>00000763:6 AS:i:-30 XS:i:-30 XN:i:0 XM:i:15 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1A2C0T1A3T0A3C0T1A1C0C2A4A5A6A56 NM:i:15 +SRR065390.21251465 0 CHROMOSOME_I 73 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGCCT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCACCC@CCCCB>CBCDB=@7?5@@>:>@=??/=?############################ AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:94A5 NM:i:1 +SRR065390.21597469 16 CHROMOSOME_I 73 1 100M * 0 0 GCCTAAGCCTCAGCCTAAGCCTAAGGCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ####################################BBABC?CDCCC@CCCCACBCCCCCCCCCCCACCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:10A14C74 NM:i:2 +SRR065390.29344619 16 CHROMOSOME_I 73 1 100M * 0 0 TGATAGGCCAAAGCCTAAGCCTAGGCCAACACCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT #########################################AA?A897+99>;7:0A>>8BABABB?ABBAABA8><>9>808<8 AS:i:-18 XS:i:-18 XN:i:0 XM:i:9 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0G0C0C2A3T13A3T1A0G69 NM:i:9 +SRR065390.32874267 0 CHROMOSOME_I 75 1 13M1D87M * 0 0 CTAAGCCTAAGCCAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG DCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCC@CCCCCCCCCCCCCCA>/=;=9>:/5AA############# AS:i:-8 XS:i:-8 XN:i:0 XM:i:0 XO:i:1 XG:i:1 YT:Z:UU MD:Z:13^T87 NM:i:1 +SRR065390.13205982 16 CHROMOSOME_I 75 1 100M * 0 0 CTAAGCCTAGGCCCAAGCCCAAGCCTAAGCCTAATCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #################################################################################################### AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:9A3T5T14G65 NM:i:4 +SRR065390.1968887 0 CHROMOSOME_I 76 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAAGCTAAGCCCAAACCTAAAGCTAAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCC@C==@=:A####################################################### AS:i:-16 XS:i:-16 XN:i:0 XM:i:8 XO:i:0 XG:i:0 YT:Z:UU MD:Z:72T2G0C7T2G5G0C4G0 NM:i:8 +SRR065390.3020042 0 CHROMOSOME_I 76 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGACTAAGCCTACGACTAAG CCCCCCCCCCCCCCCCCCCCCC@CCCCACCCCCCCCCC@CCBCCCCC=CCCCD8CCA########################################### AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:82C9A1C5 NM:i:3 +SRR065390.5329842 0 CHROMOSOME_I 76 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAGGCCTAAG CCCCC@CCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCC@CCCCCCCACDCCCCABCCB0@>A@?BBD@B>BA>B>B??BBAAC>?=:?A########## AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:92A7 NM:i:1 +SRR065390.7718336 0 CHROMOSOME_I 76 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCAAAA+AAAAAACCCCCACDDCCDCCBD@B@BBBD<;@BBB?BACBBB8D>B>B)>? AS:i:-3 XS:i:-3 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:48T51 NM:i:1 +SRR065390.9099421 0 CHROMOSOME_I 76 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCAAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCB@C<@@BDCA@D@;B@A?=@B########################## AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:84T15 NM:i:1 +SRR065390.13504888 0 CHROMOSOME_I 76 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAACCCTAAGCCTAAG CCCCCCCCCCCCCCCBCCCCCCCCCCCACCCCCCCCCCCCC@CCC=;3=ABB=@B@?@B<@################################ AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:87G12 NM:i:1 +SRR065390.26299881 0 CHROMOSOME_I 76 0 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACGCACAAAGCTAAACGCATC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCBDBBDDDABBDD?###################################################### AS:i:-20 XS:i:-20 XN:i:0 XM:i:10 XO:i:0 XG:i:0 YT:Z:UU MD:Z:80A2C0T2G0C4G1C0T1A0G0 NM:i:10 +SRR065390.26546478 0 CHROMOSOME_I 76 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAACCTAAGCCTCAGCCTAAC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@DCAACBBC@CC@BCBDD=BB>;BDDB>?################################### AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:78T2G9A7G0 NM:i:4 +SRR065390.27871278 0 CHROMOSOME_I 76 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCDDDBCC@@B@AD@BDDC=BBBB@>;@BAA################################ AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:90T9 NM:i:1 +SRR065390.28581901 0 CHROMOSOME_I 76 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCATAAGCAAAAG CCCCCCCCC@CCCCCCCC@C>>B>>C@8CC@CC@@@?A######################################### AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:89C5C0T3 NM:i:3 +SRR065390.30916163 0 CHROMOSOME_I 76 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACG CCCCCCCCC9CCCCCCCCCCCCCCCCC@AC>?<;CCACCCCCB;C@@===;=BBB0A=A################################# AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:98A1 NM:i:1 +SRR065390.32781383 0 CHROMOSOME_I 76 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCAAAG DCCCDCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCCCCCCC?CCCCC@CCC?CC?CCCBCCDA@BB?BB3B>BBDADBDCACCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0T2G0C95 NM:i:3 +SRR065390.7469639 0 CHROMOSOME_I 77 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGACTAAGC CCBC?CABC>CCBCCACACCCB?CCC@ABCCCBDCA@C@BCB=@<@@##################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:85A14 NM:i:1 +SRR065390.13605737 0 CHROMOSOME_I 77 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGC CCCCCCCCCCCCCCCCCCCBCCCCCCACCCCCACCCDCACCACBABB@B<>BB>B:7;=7=><<<<>A=:?A.@@######################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:95T4 NM:i:1 +SRR065390.14866698 16 CHROMOSOME_I 77 1 100M * 0 0 AAGCGTAAGCCTAAGCCTAAGCGTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ########################################C>3CCAABAA3AA?ACCCCBCCBCCCCCCCCCCACCCCCCCCACCCCCCCCCCCDCCCCC AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:4C17C77 NM:i:2 +SRR065390.19434606 16 CHROMOSOME_I 77 1 100M * 0 0 AATGCTATGACTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ##############################################@:A80000/=09;?5>>>;AA5A>?CCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:2G0C3A1C90 NM:i:4 +SRR065390.30084692 16 CHROMOSOME_I 77 0 56M3I41M * 0 0 GCGCGTAGTCCTCATCCTCAGCCTCAGCCTAAGCCTAAGCCGAAGCCTAAGCCTAATAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ########################################################BA76CCB>B@ACBCA.>?ACC5CCCCCA;CC@CCCCCCC>==?=?@BC@@BCCCCACBACCACCBB@>BA=5BB############################################## AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:79C13T3C2 NM:i:3 +SRR065390.29958565 0 CHROMOSOME_I 79 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAACCTAAGCCTAGGCAGAAGGGCAAGCTC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDBCCACCC=BDDBA>@@@>B################################ AS:i:-18 XS:i:-18 XN:i:0 XM:i:9 XO:i:0 XG:i:0 YT:Z:UU MD:Z:72G10A2C0T3C0C0T4C0T0 NM:i:9 +SRR065390.26894079 0 CHROMOSOME_I 80 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCGCA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCDCC=CCACB;BBBB>8A8AA?=<>:?=8=A?@=D?D=B@B########################## AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:91A8 NM:i:1 +SRR065390.15628724 0 CHROMOSOME_I 83 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCC00000+0//000000CCCCCCCCCDCCCCA@@BCCCADCCCB;8@BBB>;BBBB@;AA?A749=>:=*9=>@::A?8=3@##### AS:i:-3 XS:i:-3 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:20G79 NM:i:1 +SRR065390.4058482 16 CHROMOSOME_I 83 1 100M * 0 0 AAGCCTATGCCCAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ############################BD:?2;@?24?6?3??86?3?:B>@BB87787>BBBBBBBBBBBBBBA????>:BBB?CACC?C@C@CCC@@ AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:7A3T88 NM:i:2 +SRR065390.31993990 16 CHROMOSOME_I 83 1 100M * 0 0 AAGCCTACGCCTAAGCCGAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ###########################B9;13>54888@@<@9>>7>7:@A@><8<68@@##################################################### AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:80T15C3 NM:i:2 +SRR065390.268023 16 CHROMOSOME_I 86 1 100M * 0 0 CCCAAGCCTAAGCCCAAGCCTACGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA #########################################@@B?>?;BBB>>CCC8CCDCCC?CCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCC AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:2T11T7A77 NM:i:3 +SRR065390.6919046 16 CHROMOSOME_I 86 1 100M * 0 0 CCCAAGCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ##########################################BCBAB=BCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:2T8G88 NM:i:2 +SRR065390.2121410 16 CHROMOSOME_I 87 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ######################?BA;;=:8CDBABDCDA@ADCABDACCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.6166783 16 CHROMOSOME_I 87 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##########BB?A?B>BB@>@CABABBA@@@BBDADB@BBCBBADCCBDCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.7495357 16 CHROMOSOME_I 87 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA B3>6BB=?>BBB4?@>@BBBB;@BBBAB=BBA@=AADDCCCCCCC@@CCCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.11353312 16 CHROMOSOME_I 87 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##################@7?;<@BB=6>BB?@A@@B;B@BACBDD@CCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.15848524 16 CHROMOSOME_I 87 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA 77471AAAAAAACCBAABB>BBDBBDBDDDADDCC@>?@6CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.15965869 16 CHROMOSOME_I 87 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #######@7.@B>>?@??@@:,9;3=<:8@>ABB=BBB@>ABBAC:CCCCD@ACCCAACCCCCC8CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.17210890 16 CHROMOSOME_I 87 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA D>:>DDD*>?BDB?;BDBB@>BBBB@>DC@CCBAB?CAB@DCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.17578845 16 CHROMOSOME_I 87 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA B:=@@1BAAAD>B???@???==;@A=@AADC@ACCCCB=CBCCC@?DCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.21562505 16 CHROMOSOME_I 87 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #################DBBBBB@>BCB=D@ADCBABCCCBDCDCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.30807379 16 CHROMOSOME_I 87 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #################@@;852A>:?A;AB>C;ABBAB>CBBBA@BBDBBACDDDDB>CCDCCDCCBCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.32276242 16 CHROMOSOME_I 87 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #########################?9>=AB@?@BBDA6DDBCDDBCDCCDCCCCCACCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.33628207 16 CHROMOSOME_I 87 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #####@B@9@77=74>6BBD;?B?B<@>DCBDABBAB?B@ACC=CCCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.25375430 0 CHROMOSOME_I 88 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCGAAGCCTAAGCCTAAGCATAAGCCTAAGCGTAAG CCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCBCACCDBDBBDA@@BC>B@>.?A??<1@########################## AS:i:-7 XS:i:-7 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:66T16C11C4 NM:i:3 +SRR065390.7789978 16 CHROMOSOME_I 88 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCGGAGCCTCAGCCTGAGCCTAGTCTTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ###########################################################@C?A@C<;;;5;;;65AACCCCC@CCCCCCCCCCCCCCCCC AS:i:-14 XS:i:-14 XN:i:0 XM:i:7 XO:i:0 XG:i:0 YT:Z:UU MD:Z:30T0A5A5A6A0G1C46 NM:i:7 +SRR065390.22922524 16 CHROMOSOME_I 88 1 100M * 0 0 TAAGCCTAAGCCTAGCCCTAGGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ##########################AA?:CBABABBDBB<;ABBA@@>8BC=>BCBCBACCACCCCADCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:14A0G4A79 NM:i:3 +SRR065390.30422609 16 CHROMOSOME_I 88 1 100M * 0 0 TAACCCTAAGCCCAAGCCTAAGCCTAAGCCTAAGCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ###################################################?CCCCCC?CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:3G8T26G60 NM:i:3 +SRR065390.3148788 0 CHROMOSOME_I 89 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@CCC796B@5B=5=@=:*=>><<8@############################# AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.3974764 0 CHROMOSOME_I 89 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCBCCA@CAC@A@DDADBCBAA@BCCBBBC=BCBB>?==>>=??@A@@?@########################## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.8437156 0 CHROMOSOME_I 89 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCACCCCADCCCCCCCCCDDDCCCCA@B@B@BACAB?@BB>BBA>B=>?::D>??B8@?:@######################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.12974063 0 CHROMOSOME_I 89 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCDDACBCB@;@BD@?@@@>@@@@??(@######################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.17247283 0 CHROMOSOME_I 89 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@CCCCCABBCBC8?AAAAA>B@BCABB>BB@.=@BA=>==BB@B@########## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.17845387 0 CHROMOSOME_I 89 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCACCCCC8CCCBC;BBBBAAADDDC>BDD>AA@BC?CA>CA>A;0500=A:?AB AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.19019558 0 CHROMOSOME_I 89 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCBDCCCDADC=B>BBBB;?BB>B>BB8A=A<=@############# AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.20644304 0 CHROMOSOME_I 89 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC?CCCC;ACCCA8ACCC<8@4@@@5?>@@3?###### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.29723571 0 CHROMOSOME_I 89 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCDADDBDA@BDBDBBCB>BB>BC>>A;@@3D>??D;?#################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.30710022 0 CHROMOSOME_I 89 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCDCACCCDDACDDBDBDDDBBABBBCB6BBBAC;>AAAAA?AAAA>??@?########### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.33685608 0 CHROMOSOME_I 89 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBDCDBBBBCBABBBDBA:;=:<;=AAAB?@=CBB@=C<@>B;@B:A>;BD>AB=@BBD@DAABABCCDDCCACCCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.15212051 0 CHROMOSOME_I 89 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCDCCCCCDD@CDC@DDBB@BDACB?BB=@B=B?D?B5=::A#################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.18095819 16 CHROMOSOME_I 89 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ################@B;DDBBB;BBDDB>BBDCB=DBBCBCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.20762057 16 CHROMOSOME_I 89 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ####?7:88;:7@.=>>>BBCB>@D>DB=BDBDDDBCDBBBDCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.21120770 16 CHROMOSOME_I 89 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ###########################@@?A?A@C@B@BAC@@?@CCACBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23419512 16 CHROMOSOME_I 89 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ###############?=;9:5A?@@2==9=270;7AB;:BDDCBDDDCBCCCCCCACCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.25957119 0 CHROMOSOME_I 91 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCGAAGCGTCAGCCTAGGCCT =BB?CA@@@C7?;<<;>CBCCCCCCCCC8?C@CC?@@B>>2B7888*.0000@====;80==.;?;2?######################## AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:81T4C1A6A4 NM:i:4 +SRR065390.256433 16 CHROMOSOME_I 91 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ##########################A>9=;BCA?BBBC@0?@>DB?=B@BBCCC>@ACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.6797268 0 CHROMOSOME_I 91 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCATAAGACTAAGCCTAAGACT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCC@AAAAC@CB?@@?@C;15<@9=7/7=<<>@B@B@##################### AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:80C4C11C2 NM:i:3 +SRR065390.7161425 16 CHROMOSOME_I 91 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ####################?8:@2@B;?@B@B>@DBBCB@DABCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.10403193 16 CHROMOSOME_I 91 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT 7:;72=9<=:4@@?A?5?>BDC>BB?@>8:??@B9BBDCD>@BBCCCC@ACCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.14326039 16 CHROMOSOME_I 91 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ##########?<@??>B1ABABB>BDABBB=ADBC@ACCCBC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16063981 16 CHROMOSOME_I 91 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT 7@7:7+>:=9@@B@=D>4B>@B6@B>B=<>DA@DABC=AC@BC=CCBCCCCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.20404151 16 CHROMOSOME_I 91 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ############?<>A?<<=73:55235124813808A?>8237:=;9?A000/000000?CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.24289923 16 CHROMOSOME_I 91 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ###############################AA>A=@A@@@B;CCB@D>CCBCC@DCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.28545477 0 CHROMOSOME_I 93 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCGTAA CCCCCCCCCCCCCCCCCCCC@@B@ACCBCCCCCCCCCCCCCCCBCBCC@C;?:7:::@>A>@>B>;@=A: AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.3687468 0 CHROMOSOME_I 94 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG BCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCBCCCCCCCCACCCACCCCCCCCCABACCCDACA>@@:@############################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.6194634 0 CHROMOSOME_I 94 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCC@CCAACCCC@CACCC?>ABBC=@@1@CB=CB6><@88:377;ABBBB##### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.6316515 0 CHROMOSOME_I 94 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCDDCDACBDDBDDBBDB>A>?B@?@5@???##### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.7638809 0 CHROMOSOME_I 94 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC?CACCDCADACBACB@CDABB@DBB@@>B>B>B=BAAC>>@:@@@5;79@########## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.12817194 0 CHROMOSOME_I 94 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCBCBDCCDCCBCCDDAABBB@DBCBDDD@DBB@CB>C= AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16588016 0 CHROMOSOME_I 94 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCCCCD@C=CCCBCDC@BBCB@?A@A?@>BDBBB>BAA):=:7D@@D? AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.21115573 0 CHROMOSOME_I 94 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCDCABDCDD@ABBBB@=BBBA=DBBBB;>B?D?6>B?D?############ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.22487510 0 CHROMOSOME_I 94 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCBCCADBBDDABACBB=AA?@DDBA:BD?A??B::==::9:*>@############### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.9644932 16 CHROMOSOME_I 94 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG 77=9==5845?A=??AB@@3BB@CAB;CCCAC>CBCDB@CCCCB?CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.12890063 16 CHROMOSOME_I 94 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ######@(<9?B4;B:A@;8?BBB9B@>BDB;DBDCCADBCCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16020961 16 CHROMOSOME_I 94 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ##################?:B*B@@?>?:?57=36C>@@AA=63ACC@CCCCCCCC@@CCBBCACCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.28179307 16 CHROMOSOME_I 94 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ###############?.?=@:B@??@@@@6?B>B?CBA;DBBBB;BBBBBADDBDDBCDDDDDCCACDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.28514209 0 CHROMOSOME_I 94 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCDCCACCCCCACCCCCCCCCACDCCB@>AABB>=B?A??=BBB>B8=>>8><@?62;B=@############### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.29025503 16 CHROMOSOME_I 94 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ##########A@63@;CCAC===@=>B?BBBB?BBBABBDBCDDDCDBDBCBCCCBCBCACCCBCCCCCCACCCCCCCCCCCCCDCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.25192538 0 CHROMOSOME_I 95 0 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGGCGAGGTTTAGCAGGAATGCCGAGACTGATACGACAACCCAGCCTCTGCCTCAGC AAAA>?A>A?@@B@>>?=>?58:>:AB<>B>>:B################################################################## AS:i:-56 XS:i:-56 XN:i:0 XM:i:28 XO:i:0 XG:i:0 YT:Z:UU MD:Z:42A2C1T1A1C0C2A0G0C0C0T2G0C1T0A2C2A1G0C1T1A0G0C1T0A5A0A4A3 NM:i:28 +SRR065390.28309751 0 CHROMOSOME_I 95 0 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAATCCTACGCCTAAACCTACACAAAAGCCTAAGCCTAACCCTAAAG AA>AACCCCCCCCCCCCCCCACCCCCACCCC5BBCCCCABDDDDBDC>@B################################################## AS:i:-20 XS:i:-20 XN:i:0 XM:i:10 XO:i:0 XG:i:0 YT:Z:UU MD:Z:56G4A6G4A0G1C0T14G5G0C0 NM:i:10 +SRR065390.28547696 0 CHROMOSOME_I 95 0 77M1I22M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCGTAAGCCTAAGCCTAACCCTAAACCATAGGACAACGCTAAAGCCTAAC B?CCCCCBCCBBBB=@BB:>:>B>@B>BBB@B<@@B@=ABAA;>>B?D>DAD################ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.10373067 0 CHROMOSOME_I 96 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC CCDCCCCCCCCCCCCCCCCCCACCCCCCCCCBD=CCCCCC?CA??<=AB<=A@A@@6A?AAB>@?B@B;B>?8DD>AAA>?>????3>8989A=;4A?;8;17888@############################################# AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:86C5C3A3 NM:i:3 +SRR065390.14825713 0 CHROMOSOME_I 96 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCBBAAB@AD@BA=BBB=ABABBC@4>B=5=>AB>DA5<@@@86@=@< AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.15608785 0 CHROMOSOME_I 96 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCBCCCCCCCCCBCDDBDABBB>BBA@B>ABBABABBB>A4B=;9>5AA######## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.29977405 0 CHROMOSOME_I 96 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCDDCBBBDDDDBDDDDDAADBBDABAABB>ACCBC>:<@@@##################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.7301839 0 CHROMOSOME_I 98 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGCCTACGAGTAAGCCAAAGACTAAGACTA CCCCCCCCCCCCCCCCC4>@################################################################################ AS:i:-14 XS:i:-14 XN:i:0 XM:i:7 XO:i:0 XG:i:0 YT:Z:UU MD:Z:69A6A1C0C6T3C5C3 NM:i:7 +SRR065390.253303 0 CHROMOSOME_I 99 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGACTAAGCCTAC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCB?BC@=CCBC@BBC?;@############################## AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:89C9A0 NM:i:2 +SRR065390.1116804 0 CHROMOSOME_I 101 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGCCTAAGCCTATGACTAAAC 0000089938?:?>8:8:<>785;;@3@@@######################################## AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:93C2A3 NM:i:2 +SRR065390.15284472 16 CHROMOSOME_I 102 0 79M3I18M * 0 0 AGCCTAAGCGTATCCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAATAAGCCTAAGCCGAAGCCTAA ###################################################A9<>>>BBBB>BBB>>88>/???89B>BBCC?CC00000:9:<;@B?CC AS:i:-24 XS:i:-24 XN:i:0 XM:i:4 XO:i:1 XG:i:3 YT:Z:UU MD:Z:9C2A0G74T8 NM:i:7 +SRR065390.32407240 16 CHROMOSOME_I 102 1 100M * 0 0 ACGCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC ######################################A=::@@A853;200*.054B5?=0=AA3338900.0/CCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1G0C97 NM:i:2 +SRR065390.2751803 0 CHROMOSOME_I 105 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGCCGAAGCCTAAGCCTAA BCCCCBCCCCCCCACCCCC>CCCCCCCBCBCCCCCCCCCC==:AA@CCCCB=BC:+0100C?CC?######################### AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:80A4T14 NM:i:2 +SRR065390.9101383 16 CHROMOSOME_I 105 12 100M * 0 0 CGAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCGCAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###########################################################A<7?<1?CCCC@ACBACAC?CCCCCCCCCCCCCCCBC@CCC AS:i:-6 XS:i:-18 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1T35T0A61 NM:i:3 +SRR065390.19805019 16 CHROMOSOME_I 109 1 100M * 0 0 GCCTGAGCCGAGGCCTAAGCCGAAGCCGAAGCCGGAGCCGAAGCCTAAGCCTAAGCGTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT #################################################################C>C:CCC@CC@CCCCBBCCCCCCCCBBCACCCBCC AS:i:-18 XS:i:-18 XN:i:0 XM:i:9 XO:i:0 XG:i:0 YT:Z:UU MD:Z:4A4T1A9T5T5T0A4T16C43 NM:i:9 +SRR065390.634578 0 CHROMOSOME_I 110 0 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCATAAGAATAAGCCAAAGCCTAAGCCTACGCTTACGCTTACGCCTATGCCTA >@C@@CCCBCCBCCC@B@5BAAAAA@BBBBB@BBAB(BBB?;;;?####################################################### AS:i:-20 XS:i:-20 XN:i:0 XM:i:10 XO:i:0 XG:i:0 YT:Z:UU MD:Z:49C4C0C6T13A2C2A2C2A5A5 NM:i:10 +SRR065390.14445465 16 CHROMOSOME_I 110 1 100M * 0 0 CCTGCGCCTAAGCGTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ##############################C?BBB=AAA:'000(0=AAA>?CCCC@8@BCCCCCCCCCCCCCCCC?@C@?CC@BBCCCC@C@C@CCCC AS:i:-10 XS:i:-10 XN:i:0 XM:i:5 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0C8A0A3T12A72 NM:i:5 +SRR065390.19877275 16 CHROMOSOME_I 110 1 100M * 0 0 CCTAACCCTACGCCAAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ##############################@=9==(B?<B>B>>B>>>A>B>BBB<>BB888:;00./0::72;CCCCCCCACC AS:i:-16 XS:i:-16 XN:i:0 XM:i:8 XO:i:0 XG:i:0 YT:Z:UU MD:Z:3A0A4A4T0A5A0A4A72 NM:i:8 +SRR065390.4419 0 CHROMOSOME_I 111 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGCCTAA CCCCCCCCCCCCCCBCBCCCBCC8CCCC?C@CCCCC@CC@C@CC8C?@5BCC@;@CA>@@=:>>532:;8A@A@########################## AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:92A7 NM:i:1 +SRR065390.6629332 0 CHROMOSOME_I 111 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCADCACCC@B=ACBBB=CBD?@BB>B@DB>>.>=>>*4;77A@?A>DB<:?=@@,8 AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:80A19 NM:i:1 +SRR065390.13561281 0 CHROMOSOME_I 111 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCADDCCCC?CCC@CB8?*?:7==8==@<@:@######################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:97T2 NM:i:1 +SRR065390.13848363 0 CHROMOSOME_I 111 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCBDCC@CCAA?DCBBADBD>>>BB>>B1DAB;B6B9DD###### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:99A0 NM:i:1 +SRR065390.15692749 0 CHROMOSOME_I 111 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCCC@CCCCB@BAC??>B@B@B?<:??C8@?A8A####################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:98A1 NM:i:1 +SRR065390.27886908 0 CHROMOSOME_I 111 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTGAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCBBBDBD@BCBBAB@############################################# AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:56A43 NM:i:1 +SRR065390.32433607 0 CHROMOSOME_I 111 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAATCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCDCCCD@BBCB*A:A?:@BBB?BA@BA;BBBBB=B>>B@;=>;<::82>8<59 AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:58G41 NM:i:1 +SRR065390.32577253 0 CHROMOSOME_I 111 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCGTAAGCCTAAGCCCAAGCCTCAGATTAAGCCGAG CCBCCCCCCCCBCCCCCCCBBCCCCCCCC=CCCC?CCCCCCCCCCCCCCBADCCDACABCD@DAB@BBBBC@AB@@################################# AS:i:-5 XS:i:-5 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:6C93 NM:i:1 +SRR065390.27208864 0 CHROMOSOME_I 112 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCGTAAGCCTAAGCCAACGCCTAAGCATAAGCCTAAGCCTAAGCCTAAGCGTAAGCGCAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCC?####################################################################### AS:i:-14 XS:i:-14 XN:i:0 XM:i:7 XO:i:0 XG:i:0 YT:Z:UU MD:Z:41C12T1A8C23C5C0T3 NM:i:7 +SRR065390.14809419 0 CHROMOSOME_I 114 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCCAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC CCCCCCCCCCCCCCCCCCCCCC=CCCCCCCCDCCCCCDCCDCCBCBDCCBDBBCDBDB>BDABAABBC@BCBBC>>A?B?B>?AAA>A4A=9@####### AS:i:-4 XS:i:-4 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:22T77 NM:i:1 +SRR065390.20842573 0 CHROMOSOME_I 114 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGACTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCC@CCCBDDBCDDB>D@A==:9@256&(3036-@@@3@#################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:68C31 NM:i:1 +SRR065390.349756 0 CHROMOSOME_I 115 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCAC@CCCB=B<>BB@= AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.8856958 0 CHROMOSOME_I 115 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT CCCCCCCCCCCCCCCCCCCCCCCCCCC@CCC?CCCCCDCCBDCDDBCA?>AA@@?BB?;B::2=4=B################################# AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.19124748 0 CHROMOSOME_I 115 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC?CCCCCCCCCCCC=CCCCDBCCDDACCB<<=@?B@6@?BB>B>?>9>(:=979A#### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23327180 0 CHROMOSOME_I 115 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBDCDCCBBABA@@B@BB6BB@BAAAC?CAA>=@@<(@########## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.24595212 0 CHROMOSOME_I 115 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCACDCCBB@DDDAA>DBDBA;AABB?6DBD>?=??B>@9<:=<;46@@@:@############## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.802879 16 CHROMOSOME_I 115 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ###############?05@@@@;BBAA=CCCBA6@@CCCC?CCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.9215590 16 CHROMOSOME_I 115 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ######?=?:@6B@@C<=A?A?8@@B@B@@B?8CBCDBB;>==BDADDBDBADBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.13357175 16 CHROMOSOME_I 115 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT B>::@@D<@??:?DB?B;D>BAB=B@DB>@DDBDBABBDC@ACACCDACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.13460510 16 CHROMOSOME_I 115 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT #########################C@<B>B@@@@8BCCDD>ACDBCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.24138882 16 CHROMOSOME_I 115 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ##############################@?BA?<;BCBB@ABC;DBA@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.29591166 16 CHROMOSOME_I 115 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT #########################@@<81=@@<BBBAADDBDBDDDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.29698542 0 CHROMOSOME_I 115 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@CCCCCCCCDCCACCBCCACABAABCBABBBB@A@?@AAA?####################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.30439372 0 CHROMOSOME_I 115 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCD@BBBCDDABCCBCDBC>BA@DABB>BBB;>BABABB>B?>ADB>B AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.6134052 0 CHROMOSOME_I 116 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCCTACGCCTAAGCCTAAGGCTA CCCCCC@BCCC@BCBBB>@B9<;88@@:@:8778;88;;>=:3@@76384@<@@############################################## AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:74T7A13C3 NM:i:3 +SRR065390.10167659 0 CHROMOSOME_I 116 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCATAAGCGTACGCCCA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCB=;ACCC@=AACC=?@@=@;(?@?################################### AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:85C5C2A3T1 NM:i:4 +SRR065390.14472485 0 CHROMOSOME_I 116 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCGTAAGCCTA CCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCDCCCCDBDCDDDDCBDDDCDDBBBBDBBBAABD>DD3:=>A><==A################ AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:86T4C8 NM:i:2 +SRR065390.16076532 0 CHROMOSOME_I 116 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCGCACGCCTA CCCCCCCCCCCCBCCCCCCCCCCCCCCCC@BCBCCCCCCCCCACCCCCCC?CCCC4;???@=B=C?A?A;??<B;==?############### AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:91C0T1A5 NM:i:3 +SRR065390.28531075 0 CHROMOSOME_I 117 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAC BCCCCCCCCCCCCCCC@CCCCCCACCCCCCCCCCCCCC=CACCCCCC@BDCC################################################ AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:99A0 NM:i:1 +SRR065390.32393251 0 CHROMOSOME_I 117 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@CCCCCCC@BCCA>B?CB@CBBBB=@B?BBB75@;<>BABBABBB@@CDAA@CBBCCDCCD@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.2710794 16 CHROMOSOME_I 117 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##################A,A?@9A59>;<1@AB<@5=BC9;9:3CCCCCCCBBCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.3026627 0 CHROMOSOME_I 117 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAA CCCCCCCCCCBBCCCCBCCCCC@CCCCCCBCCCCCCCCCCCAC?CC?CACCBBCCAC=?@@@?@BBAB@C############################## AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:97T2 NM:i:1 +SRR065390.4690010 16 CHROMOSOME_I 117 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ;=<@@9355*;B@@B>B>BB:BBDBBB4BBAA;8AACB@@CBBC;DCCCDACCCCCCCCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.4869028 0 CHROMOSOME_I 117 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACGCCTACGACTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCDCCBCBDCBBBBBB?########################################## AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:87A5A1C4 NM:i:3 +SRR065390.5729393 0 CHROMOSOME_I 117 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGTCTAATCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCDBBDADDD?DDA?BD>B>CBBBBD:>2B#################### AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:89C4G5 NM:i:2 +SRR065390.7269481 16 CHROMOSOME_I 117 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ############B??@@@@8>@BBB>BBDBBB>ADABBBCDDCBCCDCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.10010220 16 CHROMOSOME_I 117 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##########AA?==??>BBA6BB>B>;AB?DA@@DDBC@DCCCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.10563492 16 CHROMOSOME_I 117 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##########################################@:A@6@??AAC@C@?3B@?BCCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.11835411 16 CHROMOSOME_I 117 0 100M * 0 0 CCAACTCAACGCCTCTTCCTATCCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #########################################CCA=ABCCCCB@6CC@CCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-20 XS:i:-20 XN:i:0 XM:i:10 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1T2G0C1T1A4A0A0G4A0G77 NM:i:10 +SRR065390.11890498 0 CHROMOSOME_I 117 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCB?CBCCCC@6>BA>>>ABC>>@DABBD>D@BABDDCDBCCBCCC@DCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.13274061 16 CHROMOSOME_I 117 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##################A91=:?B?D;BBB>AA=BBBB<=BCA@8AAB@CCCCCCC@@BA6>BBB@D@B?BB>BBABDAD@A@CCCBCCAACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.15182324 16 CHROMOSOME_I 117 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA AA?>5BB?D;BABB=>BBBBAABB;DABDD@CDBCAACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16508065 16 CHROMOSOME_I 117 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA =297=@>>:@=?@:8B@BBBA=>BCB@C;CA8AC@AAACBCC@CC=4CACA;=@@###################################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:87A12 NM:i:1 +SRR065390.16618625 0 CHROMOSOME_I 117 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGCCTAAGCCTAAGCCTAAGCCTAA CCBCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCC@CCCCCC@CA@A>?ABACCC?8A@AA=CB?AA@BB(??B=A*553;;8>?66:9:>+?;79 AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:74A25 NM:i:1 +SRR065390.16909890 16 CHROMOSOME_I 117 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA A.=:@>>B8@B@B;BBACBAB@BCADBDCCCDDACCCCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.17985359 16 CHROMOSOME_I 117 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##################A<48@B58=9=>=8=?B?>B3B=CDDBBB>@CDBCBCDDCCDCCCCCCCCACCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.20428024 16 CHROMOSOME_I 117 1 100M * 0 0 CCAAGCCTAAGCCTAAGCCTAAGCCTAAGCCGAAGCGTAAGCCTAAGGCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ######################################################################################AAAA1/555/0000 AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1T29T4C10C52 NM:i:4 +SRR065390.25333072 16 CHROMOSOME_I 117 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #####################?B>DD?>BBBB;?>@BABBBACABBDDC@DBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.26734455 16 CHROMOSOME_I 117 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ################?A?@=;CBBA?>BAB>AABDDAA@DCDBBDCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.26915421 16 CHROMOSOME_I 117 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ####################@?=:6@;;7/4;5550>DD>>BBBB@B1DBBBB:;CABD?DC=AD@BACCACBCCDCCCCBCCCCCACCCCCCCCCCCCCCCCCBCCCCCCCCCBCBCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.32532096 16 CHROMOSOME_I 117 1 100M * 0 0 CTAATCATAAGCCTAAGCGTAAGCCTAATCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##################################################################A5?A000003699;:;<8:9;<99BC@CCCCCBB AS:i:-10 XS:i:-10 XN:i:0 XM:i:5 XO:i:0 XG:i:0 YT:Z:UU MD:Z:4G1C11C9G5G65 NM:i:5 +SRR065390.33077092 16 CHROMOSOME_I 117 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ############?7=;?=A?>@BBA=@?DCCA@CACCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.4104352 0 CHROMOSOME_I 119 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAACG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCDBDCCCDBACCCDACCCCBDBDDBD@BBDCBBBDBBBABBABBABA>BAC;>@>?####### AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:98G0C0 NM:i:2 +SRR065390.11335401 0 CHROMOSOME_I 119 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCCCAAGCCTCAGCCCAAGCCCCAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCC@CCABCDBCACBBBCBBB:A>??C################################### AS:i:-12 XS:i:-12 XN:i:0 XM:i:6 XO:i:0 XG:i:0 YT:Z:UU MD:Z:71T5T6A4T5T0A3 NM:i:6 +SRR065390.19440436 0 CHROMOSOME_I 119 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCGTAAGCCTAAGCCTATGC CCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC=@CCC@C@BAAA################################################## AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:82C14A2 NM:i:2 +SRR065390.31062597 0 CHROMOSOME_I 119 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCCCAAGCCTAGGCCCAAGCGCAAGC CBBCCCCCCCCCCCCCCCCCCCCACCCCCCC=?CCCCC?CC;;@@BBB@B@BC<@;;9>7@:=@@################################### AS:i:-12 XS:i:-12 XN:i:0 XM:i:6 XO:i:0 XG:i:0 YT:Z:UU MD:Z:71T5T7A3T4C0T4 NM:i:6 +SRR065390.21240567 16 CHROMOSOME_I 119 1 100M * 0 0 AAGCCTAACCCTAAGCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ########################################A::A48>,57/C6CC@CCBCC@CC?CCCCCDCCCCCCCCCCCCCCCCCBCCCCCDC AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:8G11G79 NM:i:2 +SRR065390.473388 0 CHROMOSOME_I 123 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@CACCC@CCCCCCCC6CCCCCCDAB?@A=C;CA@=BA######################################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.5212583 0 CHROMOSOME_I 123 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC?CCCCCBCBCDDA>BDBD=>?B>BB3B@B@@@DAC?C;>D:?<(79.:@<@@= AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.6106911 0 CHROMOSOME_I 123 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CBCCCACCA?79878B@@?B78678B??>?>=AAABBBBA8A;>>B>:>BB<>?>;???######################################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.7281918 0 CHROMOSOME_I 123 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC?CDBCCBCDCCC@CDDDDBDBB@BBABB>AB@BD>B?85=797/@<:8 AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.14025609 0 CHROMOSOME_I 123 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCC>CCCCD@BCDDBBCDAC?@A@DDABDA=D53885@?:A@@BDBB?3?>;=A######## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.19641253 0 CHROMOSOME_I 123 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCAACCCCCCDCD@D@B@BD@B>BDBAB8>??A??;@B@D;?3==?B6?A##### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.20334733 0 CHROMOSOME_I 123 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCACCCCCCCCCCCCCCCCC@BCAD?@@@BB@<1?################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.22745112 0 CHROMOSOME_I 123 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCB@BDCCBCCACAAD=DDBA>B>@BB6B@AD?###### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.25556189 0 CHROMOSOME_I 123 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCDCCCCCBCCCCCCCCCCCDBCDDDBBBBBBBCBCBAAB@BBA@################################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.27641394 0 CHROMOSOME_I 124 0 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAACCTAACCCTAAGCCTAAGGCGTAACCCAAACCAAAGCGCAAGCGCAAC 88;68;;:;9@?>AA@CCCCCCCCCCC@CCC@C<>>>CCAC@A-@####################################################### AS:i:-28 XS:i:-28 XN:i:0 XM:i:14 XO:i:0 XG:i:0 YT:Z:UU MD:Z:51G5G12C1T0A1G2T2G2T4C0T4C0T2G0 NM:i:14 +SRR065390.9280611 16 CHROMOSOME_I 124 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG @@D=6@@6@@A?=ABABAA@B>=BCDBCDBBDBBDBDDADC=DCCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.12478066 16 CHROMOSOME_I 124 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG #####B@@A?DB??@A:?AD=CBB>BCBBBADBBBDBABBDDDDCDCCDC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.25264436 16 CHROMOSOME_I 124 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG #######################@<766>><:4==:5==;C??@?=ACCCCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.32908522 16 CHROMOSOME_I 124 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG #############################@B=>><:CAB@=DCCAACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.24389795 16 CHROMOSOME_I 125 1 100M * 0 0 ACGCCTAAGCCTCAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC #######################################BBB=>DABB>BDCDDBBBCD@DCDCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1A10A87 NM:i:2 +SRR065390.7886550 16 CHROMOSOME_I 127 12 100M * 0 0 CCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCGAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT #####?6@??AA;AAD:6@?A=CAC;;<;7<1B@@>BDDBB@B@BDBABBAB>>;BB8=;@BD::;>D#### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.3825439 0 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCC@CCCCCCCCDCCCCCBCCCCDDCCD@CBCCDDCBDBABBA@DBDBBBBCBBABBC>BAAA>D@######## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.6176897 0 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCC@CCCCCCCC@ABCCAACCDCBBBA@ABABA@:@:??BBBA@DBAAAB>?B?:>B@@ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.6179290 0 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCDACBDCBABDBD@BBDDDBAAABBA>BCABB>A=4>>/=0298696=@##### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.9176327 0 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCC?CCCDDACDBDC>@BBAB=;?BBB4@@BD:3???5=:8>>9@:6@##### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.10096114 0 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCDCCDBCBADABDABBAA=B>B@?BD;4@7>9=7==>>> AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.12408086 0 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDBAA>DBDBB@BBAD=@?B?B?4A??B:A?AA<(@ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.15769247 0 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCACDDADDBA@B@BBBB>B6B?B@?########## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16538618 0 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCC?ACCCCBCCCC@ABB@B:6BB8A?7@<><@:9?################## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.18630353 0 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCC@@CCDA?BBDBB@B@BB;@?BB>B;?<>=>69@:86A3=@@##### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.21703645 0 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCDDCCCA?D?C@CB@@B@@CDA@:==@@@<@@=?=>BCABA##### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.24795092 0 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA CCCCCCCCBCCCCCCCCCCCCBCCCCBCCABCACA<;;??C@;BBBD?6@@@@>2?@;8::>9>A11@###### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.31111380 0 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCDCCBDCBDCBDBBBDBDDBBBA@BBDBA@BBDDAACAB?:DD:??@><@<@@@ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.1774013 16 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ###################@=379:@=??D>@<7D>>@B@DDB@;: AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.3893380 16 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ####################A?0@A@=>8CCC@B=@CC?B@@CCAB@ACCCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.4975116 16 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA #############B??8B?B8>>DB?=;8?BB@B@;BBB=>DCCBCABCCCCABCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.5530338 0 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCC?CCAAACDDBADBADDBBBAB@BB@;@B??################ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.6465879 16 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA #####################?;@B;;>5A38;98BBDBBDDDC@BCCBCCCCCCCCCCCCCCDCCABCCCCCC@CCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.6537766 16 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ################@8?=?D=6B>:==>?C;BBA5DBCB@BBAB;BCCBCB;CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.7401229 0 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCDCDCCCCDCCCCCCCCCCCCCCCCCCBBCB@CACABCBAA@BBB?=B;>B=5AA??@AB@=6=8=CC>AC;:??8;5/-51B>2A###### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.11449581 16 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA 7=741.8A;4B>>AAAB>AB?>B@BA@?A?/000(@AA>:C=;CCCCBCCCCCCCCBCCCCCCCCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.11762556 0 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACDACBADDBADDBB@DB@BBDBBACBCCC>A??>?<8B### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.11780901 16 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ######@@@@=<@@@68=>=>BADABBB6BB;BB>@CDBAA@CBBDADCCCCCCCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.12246013 0 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCC@CBDBB@DBB@DBCBB;D@D?@B>?B=0=====;:@@@@@:@CCCCB?ACCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.13762870 16 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ###############A<>=@92757;;?8ABBB>B@A;BBB<>;B6B;@AAA;:BD:?@*5@@17@### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.21762505 16 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA #############?:1,@>B<<===>8@6@DB?@?ABBB@ABADBBDDBCDBDCBCCACCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.21990338 16 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA #############A=452==:=?=?@:?::@5?>=8:<=C?C@CCCCC@CB?CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23838269 0 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCABCCDABDDCBABBBB>@=93=7=BBBB?0?:A=47@7@8/8<8@######### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.24259901 0 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@CCCBC@BCC@;CB@@BB><@@B@;;A??@:.?>9B:9>@@@(6=@####################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.26950543 16 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA #################################################A@B?@@CCACBCCCCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.28049981 0 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCDCCDDCCCDBBA>B@>B>@?BDB@???<:?.75377?DAAA############### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.28919001 16 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA #########################?:6>6@B6@@@B>D>AABBABDBACDADBBDDDDDBCDCDDCCCCAC@CDCCCCCCCCCCCCCCCBCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.29600181 0 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDBBDBDB?<@@6@?>9;9@############################# AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.29907490 16 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA #########################@:<=2A8=B>6;A?@B@?>BC@B;CACBDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.30476269 16 CHROMOSOME_I 128 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ########################@9/<22A?5:9@D>=D>A@BCAABDDBBDBCDBDCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.9576969 16 CHROMOSOME_I 129 1 100M * 0 0 CTAAGCCTGAGCCTAAGCCTGAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###################################################AA=AA4A?AA.=AA;>7<>>58>>>71;<>;;7300000<>9>>@A??? AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:8A11A79 NM:i:2 +SRR065390.9988260 0 CHROMOSOME_I 129 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@CCCCCCCCCCDA>A;C@=@;C@@@A@@############################ AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:79T20 NM:i:1 +SRR065390.10035310 16 CHROMOSOME_I 129 1 100M * 0 0 CAACGCCTAGGCCAACGCCAAAGCCGAAGCCTAGGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ########################################################A4?==:C?C>C@C@??><>@:@C@B@A@@CCCCBCACACC@CCC AS:i:-16 XS:i:-16 XN:i:0 XM:i:8 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1T1A5A3T1A3T5T7A66 NM:i:8 +SRR065390.29366441 16 CHROMOSOME_I 129 1 100M * 0 0 CAAACCCTAAACCTAAGCCCAAGCCCAAACCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###################################################A=@A>@BBBDA=CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-12 XS:i:-12 XN:i:0 XM:i:6 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1T2G5G8T5T2G71 NM:i:6 +SRR065390.3937302 0 CHROMOSOME_I 130 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCC?CCC?CCCCCCBCD?CCBCCBDC@CCB@==@A@@@B?8B?B?===?@?@A>?>A AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.4086234 0 CHROMOSOME_I 130 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCAADCCACADDAB8@BBBB>?BBBD>B?BBACAB>BBA########### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.4778767 0 CHROMOSOME_I 130 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGACTAAGCCTAAGCGGAAGCGTAAGCATAAGGCTACGCATAAC CCCCCCCCCCCCCCCC@ACCCCBCCCCCCACCCC@ACCAC=9CBCA<=?CA>=;BA@?########################################## AS:i:-18 XS:i:-18 XN:i:0 XM:i:9 XO:i:0 XG:i:0 YT:Z:UU MD:Z:58C12C0T4C5C4C3A2C3G0 NM:i:9 +SRR065390.8295829 0 CHROMOSOME_I 130 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCDCBDDBBCBBABBBBB==BB@@8@?@:>@@@A.:;8>A*<=8< AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.10871940 0 CHROMOSOME_I 130 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCDDBDDCCDBBBC>A############################## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.12095552 0 CHROMOSOME_I 130 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCCTAAGCCTCAC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDACCCBADBBC@4A)0.//A=>=6?>>:A>=@D99.@################# AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:84T12A1G0 NM:i:3 +SRR065390.13775982 0 CHROMOSOME_I 130 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCC@CCDCBDADBC@DDBAB>BBB>=??@B@@?@>BD AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.15558825 0 CHROMOSOME_I 130 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCAACCCCDCDBDBCDC@DB5BB@>@@;=A;?4@?############### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16058509 0 CHROMOSOME_I 130 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCACCACCBBCACC@DCBDC>CCABCBBC3===;DACBA;A:?########################## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.19900294 0 CHROMOSOME_I 130 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCABACCDBDDABDABBA####################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.19995334 0 CHROMOSOME_I 130 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCCBCCCCCCCBCCCB@BDCCBCCCDBADBBB@BBABBDAB@ADACD=>88A@@:=<>=A####### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.21017432 0 CHROMOSOME_I 130 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBDCCCC@DDCCCDBA=AA=>>:?<@BB8>B############################## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.21103370 0 CHROMOSOME_I 130 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDBCCCCBBCDDCB;ABABB>?BB?@############################## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23568099 0 CHROMOSOME_I 130 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC?CCCCDCCCC:ACCCAC>B>@@;C9=<8==BCAACCBB8;:69;37:5:3;9?########### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.24833775 0 CHROMOSOME_I 130 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCDDDCBCDBDDBBDBCDA=DBBDBBB>BA6BA>BB??:??A?AAA@= AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.24981819 0 CHROMOSOME_I 130 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCC?:CCBCCCCCCCBCCCCCBCCCCCC@CCCCCCCCCC@@CCC?CCC:C?CBCBC@BCB>@>@@.=9>9B@BB5@#################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.26074983 0 CHROMOSOME_I 130 0 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCACAAGCCCAACACTTACACCACG CCCCCCCBCCCCCCABCCCCCCC@CCCC>CCCCA@?CCBC3;A==<=;AD3BD;D@BB?=AAAA?>??###### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.28071689 0 CHROMOSOME_I 130 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACBCCCCCACCBDCBDC>BADA?BB=A@>>8=48(;88/? AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.28757581 0 CHROMOSOME_I 130 0 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAACACAAAGCCTAAACAAACGCCTATGCTTATTCAAAATCCTAAT CCCCCCCCCCCCCCCCCCCCCACCCCCCCCCCC=CCC?C@8?>>>B>B5B################################################## AS:i:-30 XS:i:-30 XN:i:0 XM:i:15 XO:i:0 XG:i:0 YT:Z:UU MD:Z:57G0C1T8G1C0T1A5A2C2A0G1C0T2G5G0 NM:i:15 +SRR065390.29809470 0 CHROMOSOME_I 130 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDD@CACDDADBDAAADBDB>;DBBB>BBABAC>ACB@3@@6:95*@########## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.32683144 0 CHROMOSOME_I 130 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCBCCCCDDB@BDBD>ABBADAABBBBA?>ADB>=AAABC>>C AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.24353773 16 CHROMOSOME_I 130 0 7M7I86M * 0 0 TAATCCTGCTTAGTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA @@@9@<>>=>1BBB>?@B>A>?B;AB@>>B<;<>>?7?C=CCC907=9728*82*3-3:=<9>@>BA>=+>@############################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:72A27 NM:i:1 +SRR065390.9158702 0 CHROMOSOME_I 131 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCDBAAD@C@@=>BBCBBDB>ABBBDABDAADBB?D############################################# AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:55A44 NM:i:1 +SRR065390.27452830 0 CHROMOSOME_I 131 1 100M * 0 0 AAGCCTAAGCATAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCC('/(/BBBBBCCCCCCCCCCCCCCCCCCDCCCCCDCDDCCDCBD?DDDBDDBBBDBDABBABB>A:B@;@>3@?@?:;6:@/?688<3:: AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:10C89 NM:i:1 +SRR065390.31152354 0 CHROMOSOME_I 131 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCDDCDCCDDDDDDBABDBBBBBBDBBBBAB>ACA@>.@D1/;>>@<6@##################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:79A20 NM:i:1 +SRR065390.33386269 0 CHROMOSOME_I 131 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCACACCBDCCBCDDCABCABCABBBCB;>BB??AA;9=8=?>BB?BB4;B;B>B=B@D=CDC@CCCACAD@DAC@CCCCACCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.8335531 16 CHROMOSOME_I 133 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ?DBD?=A?A?66A>BB<>B@B@??@>DAABCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.8716276 16 CHROMOSOME_I 133 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ###########@??8B;D@@BAB>BB@BB>BB@>@;@CBCC@C?@ACCCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.12206123 16 CHROMOSOME_I 133 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT #####################################@:BAA?AC@@A:5<@53;97>DACCDBDCCCBC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.13431089 16 CHROMOSOME_I 133 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ###############@3??A?:B@=>ABDBBDCBBDABADBBBCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.14308521 16 CHROMOSOME_I 133 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT 35880;A6AA?DAA@A=AACBC;CAADBBBBDDA@BBBDBACBBCDCBDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.17911869 16 CHROMOSOME_I 133 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ####################################ACCAA38C>@C>@ABCCCCCCCCDCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.26803411 16 CHROMOSOME_I 133 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ############AA@@31BDB?>@B@A?>8B@BBD;BBBBB>B@@BBACBCCA@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.29004521 16 CHROMOSOME_I 133 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ###################A>??=?BBD>BBADCABBBD?DABBBDBADDCACDDACBCCCCCCCCACCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.32891184 16 CHROMOSOME_I 133 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT #########@@:@@?A;?@??@@@BBBBC?@@@BB>>BCCACC=A==;00000.0000/0+00@=A@################################################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:73C26 NM:i:1 +SRR065390.19147644 16 CHROMOSOME_I 134 1 100M * 0 0 CCTAAGCCCACGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ####################?>?@BB0B?<@B=@BDDBABCCBCDCCDBACDCDACCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCC AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:8T1A89 NM:i:2 +SRR065390.24348183 16 CHROMOSOME_I 134 1 100M * 0 0 CCAACGCCTAGGCCTGAGCCTAACCCTAGGCCTAAGCCGAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA #################################################################AA@AAAAA?AAA:?A@7@2@:@A@:>>B9>A0?A0 AS:i:-14 XS:i:-14 XN:i:0 XM:i:7 XO:i:0 XG:i:0 YT:Z:UU MD:Z:2T1A5A4A7G4A9T61 NM:i:7 +SRR065390.26237014 16 CHROMOSOME_I 134 1 100M * 0 0 CCGAAGCCGAAGCCCAAGCCGAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ###################################?CA8B@CCC=BCCCC?DCCCCCCCBCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBDCCC AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:2T5T5T5T79 NM:i:4 +SRR065390.26715609 16 CHROMOSOME_I 134 1 100M * 0 0 CCCACGCCTAAGCCCAAGCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA #########################@4/>BB@A?BBC@B@B@CD@ACCCCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:2T1A9T8G76 NM:i:4 +SRR065390.29763377 16 CHROMOSOME_I 134 1 100M * 0 0 CCTGAGCGTAAGCCTACGCCTATGCCTAAGCCTAAGCCTACGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ###################################################################?A9?A?BB>><>;AAAAA>>;><>8>8>??B?: AS:i:-10 XS:i:-10 XN:i:0 XM:i:5 XO:i:0 XG:i:0 YT:Z:UU MD:Z:3A3C8A5A17A59 NM:i:5 +SRR065390.5592165 0 CHROMOSOME_I 135 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCBCCADCCCCBABDD@DAB=DABB@@@B5>@==B>=B:=>9>A@?><(@/8<8@6:96 AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.5867686 0 CHROMOSOME_I 135 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCC@CCCCCACCCCCCCDCCC@;CCCCCABCACC>DDBBBBB?BADAB>BBB>C;?>>?>B8;??5;@.;>8@## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.10729660 0 CHROMOSOME_I 135 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBBBD>B@BCCCCCBCADCB?BABBBB:;B@BB>BADBBC;BBB:B0?@??B>CB>>A>BAA6=>? AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.12631494 0 CHROMOSOME_I 135 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCBCC@CC?@BB@CDAABBABAA1?>A?=AA=A;9@?A@*?/B# AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.14201144 0 CHROMOSOME_I 135 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCCC@C;C>ACC?C=CCCB6BCCB5BA@@C##################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16211946 0 CHROMOSOME_I 135 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCADCCCCCDDBBCCABA@CDBDB5DBBD@279;9B>@=@B@@?<@@ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16765976 0 CHROMOSOME_I 135 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCBACBCABCD@ABB>AD<=<:87?<=?98=8?########## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16773580 0 CHROMOSOME_I 135 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAATCCTAACACTAAGCCTAAACATAAGCCTCAACGTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC6BABB<@3>?=1==;D?6D########### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.28919583 0 CHROMOSOME_I 135 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCACCCCCCCAACBAB@=BBBBB=B@B8:<@=D??;?7@8<19=992@6@################# AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.33072912 0 CHROMOSOME_I 135 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCACCCCC@BBABAADBD@<6B?3?A@A::<:9 AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.4862525 0 CHROMOSOME_I 136 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCDBADCCD@BBDBBDDDDD>A>==?;?BB??B::995>.@@@############ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.5774599 0 CHROMOSOME_I 136 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCBDDBBD@@ADBBD@BAABB?D>;?B?>D?D:A2?6=79;9 AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.13990332 0 CHROMOSOME_I 136 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCACCCCADCDBCCBABACCBBBBABBB??C?;B97;=3>=>>=5-;37=<8=< AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.17419760 0 CHROMOSOME_I 136 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCACDCCAABCDDD@?B@C@BDDBB>>ACBB6DB########################## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.21031614 0 CHROMOSOME_I 136 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDBCCDCADCDBBB>DBAB?:BB?@?>BB>@3DABB?ACAC>????D@########## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.29523788 0 CHROMOSOME_I 136 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCADBDBBBCD@CA@BBBABBDABBDBABABBA############### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.29752320 0 CHROMOSOME_I 136 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCDCCCCCCCCDCCDCDDCDCBBBDBB>ABDBB@ABABBBD;AACBAAAAACCAABAAACC@@:;9@############## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.32757960 0 CHROMOSOME_I 136 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCDCCABABBCA@CCB@@BDD@@B@BBB@BBB>B@;@B8@87B;;;>5@<@?=62A##### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.1606697 16 CHROMOSOME_I 136 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ###############BBBB>?4@?BABCB>CC8B@BAACCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.8349937 16 CHROMOSOME_I 136 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG =43?@B6>:???:D?B>B>D?BBB?DABDDBBADCD;CBBCDACABCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.9584086 16 CHROMOSOME_I 136 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG #######################??>A?AA>=9=5>A>AA>AA1@>@D@>6B??:?>:?B>??>?B:@?>BBBBBBDD>8?AA??>8@BB>=ABCDDACC@CCABCCCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.15429543 16 CHROMOSOME_I 136 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ######################A=:B6ACAAB?CACABBCCCCCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16332923 0 CHROMOSOME_I 136 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCDCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCC?CCCC?CCCCCCCCCA@?BCCBA@@CB=?:5B>BCCACCAC@CCCCCCCCCCCBCBCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.19757045 0 CHROMOSOME_I 136 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCC@?CCCAAACCCCA;>?>?>CA@BA>AA<<BBD>@D@CADCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23337581 16 CHROMOSOME_I 136 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG #############################################?7<1:8?CCCCCBCCCCCCBCCCAAA??::<<<<<< AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.25202138 16 CHROMOSOME_I 136 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ###############A@>::>BBB>@@?BB@>@=BDAA@DCCCB@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.28056173 16 CHROMOSOME_I 136 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG #########@@8'55<@3=@A:?=BADDBCDDDADCDCACCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.28190925 16 CHROMOSOME_I 136 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG #########################@8???30235=*??@CBBB>;CC@CA@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.13943605 0 CHROMOSOME_I 139 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGACTAAGCCTAAGACT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCACCCCC9@BB>A87=>@?################################# AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:85C11C2 NM:i:2 +SRR065390.323629 16 CHROMOSOME_I 139 1 100M * 0 0 GCCTAAGCCTAAGCAGAAGCCTACGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ##########################ABBA@@@==0===A8@?>>@@;CC=@@7@C8C8CCCC@CCAC>@C=C>CCABBCCCACCCCCBACCCCCCCCCC AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:14C0T7A76 NM:i:3 +SRR065390.14693556 16 CHROMOSOME_I 139 1 100M * 0 0 GCCAGCCCCGACGCCTCAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ##############################A=@:;==AA===7=>?BBCCC@@CACCDC@DCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-14 XS:i:-14 XN:i:0 XM:i:7 XO:i:0 XG:i:0 YT:Z:UU MD:Z:3T0A0A0G2T1A4A83 NM:i:7 +SRR065390.30350798 16 CHROMOSOME_I 139 0 100M * 0 0 CCCTACTGCTCCGCCCATCCCTACTCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT #########################A@=;:=@?:=:>CC@<;@CCBA5DCCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-22 XS:i:-22 XN:i:0 XM:i:11 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0G4A0G0C2A0A3T1A0G4A0G75 NM:i:11 +SRR065390.4780763 0 CHROMOSOME_I 140 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAACCCTA CCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCCCCDCCDCCBCCBDBADC@BB;CC@=A;CA>=@BACB>@ABB@A>AAB#################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:95G4 NM:i:1 +SRR065390.10506396 0 CHROMOSOME_I 140 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCA CCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCC?CCCCCCDCAA9ABB@1C@@@A@@A.B@A==(77;73:?6==/@6>@##### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:35G64 NM:i:1 +SRR065390.17927737 0 CHROMOSOME_I 140 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCCTA CCCCCCCCCCCACCCCCCCBCCCCCCCCCACC?CCCCCACCBB@BBDAABA?B=?########## AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:92T7 NM:i:1 +SRR065390.27209146 0 CHROMOSOME_I 140 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGACTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA CCACBCBCBC@@@AA,CCCCCCC=CCC?CCCCCCCCCCCCCB@CBC>9=8.;=:=789.177042357??################## AS:i:-3 XS:i:-3 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:54C45 NM:i:1 +SRR065390.8223160 0 CHROMOSOME_I 142 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCAAAGCCTAAGCCCAAGCCAAAGCCAAAGCCTAAACCTAAGCCGAAG CCCCCCCCBCCCCCCCC?CCB>BB::??3?ABA>B>ABAAB@B@B@BBB################################################### AS:i:-12 XS:i:-12 XN:i:0 XM:i:6 XO:i:0 XG:i:0 YT:Z:UU MD:Z:54T11T5T5T8G8T3 NM:i:6 +SRR065390.28374598 0 CHROMOSOME_I 142 0 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACGCCTAAGACAAAGCATACGCGACAGCCCAAGACAAACCACATG <9;;;=A?=>CCACACCCACCCCCCCBCCC=ACAC=CCCBAC@A@?@.>@################################################## AS:i:-30 XS:i:-30 XN:i:0 XM:i:15 XO:i:0 XG:i:0 YT:Z:UU MD:Z:56A7C1T4C2A2C0T0A4T3C1T2G1C0T1A1 NM:i:15 +SRR065390.2772417 16 CHROMOSOME_I 142 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ;7/77:0=1>=7==7@@BBB?4@BBB>DBB@CBDCDDC@DCCACABCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.5292931 16 CHROMOSOME_I 142 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ###############@>9471*=886:?DB>:DB@AAC>BBDB>@DBDDBBDBDDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.5356550 16 CHROMOSOME_I 142 1 100M * 0 0 TAAGCCTAAGCCTGAGCCGAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG #####################################?=AC@AA@CC=D@BB?B@@DADD@BDDD@?DC@CDBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.12004249 16 CHROMOSOME_I 142 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ##########@*?:@??>B?B;ABBA;BADB=A@BDDAABBDD?DABADCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.17100183 16 CHROMOSOME_I 142 1 100M * 0 0 TACGCCTAAGCCCACGCGTAGGCCTGAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ###########################################BA00*0+=C=7@>8C@ACCC5@@CACC@AAC@C>ACC@?;>@;ACA?CC?CA=ABAB AS:i:-12 XS:i:-12 XN:i:0 XM:i:6 XO:i:0 XG:i:0 YT:Z:UU MD:Z:2A9T1A2C2A4A74 NM:i:6 +SRR065390.19067238 16 CHROMOSOME_I 142 1 100M * 0 0 CAACCCCACGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ##########################BD;CB>BBCCB@B@AC0@BABCBCCCCCCCCCCC?CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0T2G2T1A91 NM:i:4 +SRR065390.30164492 16 CHROMOSOME_I 142 1 100M * 0 0 GAACCCCAAGCGTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ##################################B>;<>:=B@?>B>CBCDC@CCDCDDCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0T2G2T4C88 NM:i:4 +SRR065390.30379033 16 CHROMOSOME_I 142 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ###################################A>BABBBDB@?DCADBACCCBBCC?DCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.31408372 16 CHROMOSOME_I 142 1 100M * 0 0 TAAGCCTAAGCCTGAGCCTAAGGCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ##########################################BBB=?BC@@A=@C=C@BC@>C:C@ACCCCCC?@CCCC?CCCCCCCCA@CCCABBCCBC AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:13A8C77 NM:i:2 +SRR065390.24902099 0 CHROMOSOME_I 144 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC?CCCDC>BDBBA@;CAB>ABB>A?B=:2:=:7=(77@@:<>@=@?# AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.26130664 0 CHROMOSOME_I 144 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDBDDBDBDBB=ABABB>BB@AA0=A=???@<<>@3@@=+>A7(==7@>8:=(;7749>3@3@######### AS:i:-3 XS:i:-3 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:67C32 NM:i:1 +SRR065390.10040702 16 CHROMOSOME_I 145 1 100M * 0 0 CCCTAAGCCTGAGCCTACGCCTAAGCCTATGCCTACGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT #################################################################B;BAAABBBBA??AA?@CACCCCC@CCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCC AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:4A7G87 NM:i:2 +SRR065390.76906 0 CHROMOSOME_I 147 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACC?CCABCCCC0BBAB@.@?BB?:=?1?########### AS:i:-3 XS:i:-3 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:87A12 NM:i:1 +SRR065390.22155458 0 CHROMOSOME_I 147 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCA@BCBDDDD@DBBD>@A?AAB0AB>??########################## AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:98A1 NM:i:1 +SRR065390.407652 16 CHROMOSOME_I 147 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA =944;@<9>7:?@=?9@@B4B@B@ABADBDCDAABCCACCCCC>ACCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.688364 16 CHROMOSOME_I 147 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ?>>?>@D>6@?:=:=7@7>==@A??;22;0B@=;B?BA@@DBDA@@C@C@DBCCCCBACCDBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.2431093 16 CHROMOSOME_I 147 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA A>@>A==94=:7A8@?=B4?@=@B?@B@=BDBCBCCBCDCCBA=CACCCCDC;ACCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.5178538 16 CHROMOSOME_I 147 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ############################@?>2=8>>@?@B6BDBADBBB@ADAACDCDCCCCCDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.5776200 16 CHROMOSOME_I 147 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #################?@>>ACBAACB>BB>BB>BD?A@@DDDDDCBDCCCCCBDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.6963092 16 CHROMOSOME_I 147 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############?=??B@?BDD?B:>@BBB>@BBBBD@DBAD@BDDBBBCCDCCCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.7246095 16 CHROMOSOME_I 147 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ################@?A@?;A;?>;BBBB;?@?@@B@@BACBBBBBCBBCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.11988452 16 CHROMOSOME_I 147 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ######A>=BC1BBBDD>BABB=BBC@B?AACCCCCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.12087571 16 CHROMOSOME_I 147 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ########################BAA4ABBBBBB>6B?@?@=>>)69:===?<=B?=8@## AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:83C16 NM:i:1 +SRR065390.12944530 16 CHROMOSOME_I 147 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##########@B@?>:?@????@A@@B@@AA@B@@BCB@@CCACCAACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16589531 16 CHROMOSOME_I 147 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ############?A0@ABBDCBB@BBCBDDDDCDCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.18782999 16 CHROMOSOME_I 147 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##############################B9B??AB6BBBBBCBBDDBBBBBDDDDBDCCDCDDBCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.20251224 16 CHROMOSOME_I 147 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##########################@<3?>?>BADA>BDDBDABADDBBDADCABCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.22643473 16 CHROMOSOME_I 147 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##################@@9??DD>>?>ABB?;BBB?6;BDDAAB@D?BADBDCCBCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCACCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.25014137 16 CHROMOSOME_I 147 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ??@???BABB=A>BAB>@BBBBCBBBDDADBBDDDDBDDCBDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.31537660 16 CHROMOSOME_I 147 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ############A=9;C>BBD>A;BADDAA@CCCD=CCCCCCACACCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.4975670 0 CHROMOSOME_I 148 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCDCCBCCCCABBCDBBBADBDBBABBAA=B1<@=A=@>B>DB?########## AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:98A1 NM:i:1 +SRR065390.10980220 0 CHROMOSOME_I 148 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCDCDDBD@B;B?=@############################## AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:98A1 NM:i:1 +SRR065390.16205013 0 CHROMOSOME_I 148 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGACTAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@CCBCCCCCCCA?CCCCC>C9?CC@?CCC?6CB7>9/6ACA@@0@;@= AS:i:-3 XS:i:-3 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:88C11 NM:i:1 +SRR065390.19247267 0 CHROMOSOME_I 148 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCAAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDCCDCBABCDDA################################ AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:90T9 NM:i:1 +SRR065390.27744200 0 CHROMOSOME_I 148 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACG CCCACCCCCCCCCCC?CCCCCACCCCA@CCBCC?BBBCB>BD>BA@C@?@44A=9=76:?>><><=>=?########################## AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:98A1 NM:i:1 +SRR065390.48058 16 CHROMOSOME_I 148 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ###########@=<<>9:6:;AB>>>===:.9>>:>ABDD;BA@DDA2:<<::06=:A:=>B6B@BBB>B@>>@@@?CCB@DCDCCDCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.3911401 0 CHROMOSOME_I 148 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAACCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCC<@CCB=CA@CBCCB;CB@BB@CAABA>?0?><=@9:?/57.3B:B6;:;4B########### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:93G6 NM:i:1 +SRR065390.11019398 16 CHROMOSOME_I 148 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ###########################A@=?BBC?CCCA@ACCAAB@CC9CC?CC?CCCCCACCCCCCCCCCCCCCCCCCBCCCCDCCCCCDCCDCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.17181327 16 CHROMOSOME_I 148 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ##########A@@@@A@BC?CBDBBBBADDBBBBAB?>B>?CB@DACDC>D?@>;B@=BBABC>ACDD?6D######################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:98A1 NM:i:1 +SRR065390.30860653 16 CHROMOSOME_I 148 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG ############################@@B?>BA;?A=B8C<=@ACCAB58=8<> AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.428236 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CC@CCCCCCACCCC@C@CCCC8ACCCB>@@CC4CBC>A66??@?8>=78@>,@< AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.4115644 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCBCCCCDC@ACCBCBBCCBDAACCACBBBDB=BBABC>>?BBB;@=BBB>>8A=>0A:A=AA>A################ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.8373173 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCBCCCCCC@CCCCCCCC@B?CDAACCCACCDBBBB;B@C;8BB@A@0D45>737=>=+>@9A?########### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.9273601 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCBDCCDDADBDDB@AB>BBBB9@@@@8=::877519=8=99?:>A# AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.9667751 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCACCCDCC@@C@BA@@C??8=<;@@:94==759=>=A############################## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.10356164 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC?CCBC=>C@DCABBBBBAD?BBB1B><:7.7>A>:5=@=?@ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.10582969 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC=>=>>>@CCCCACCCBBBADDCACBBDBA>DABBABBABB<>B>BDB:>BCA=AA??AA?##### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.11554773 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCDC?CDCCCCCBCCBB@BBBBDBD3774;33005=A############# AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.11799834 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCD@BCDDD@BDDDB@ABBBC>D?B@C>8CCA>:A?AD?####### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.12271991 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCBDBABDCDACDC@BBA@A@BBB?>AA>>BD???6@::= AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.13100135 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCA@CCACC@CAC@@=;8><8B??A?=@=<>:==?;AA=@@B?AAC>5A#################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.13252530 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@ADCCD@BDCBB;ADD=B@DBBB@;@@?@=2=?################ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.14011507 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCC;CCACA8CC@CC@AC@C@?AC8A8=B?5?:DB?;B=@AA?@@.B8=<@=>@@:;?900;10>17> AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.14302244 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCDCCDCCCCCCCCBCCCCC@BCCC>@CCACCCCCCCCC=CCC@AACBC=B@@A8:>=B@@?8B=@B;=:>:?BB<@B;>B>BA>A?;:==.>.5555966@= AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.15822909 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@CCCC@DAAB;C@=BBAB==BB@;@@BB@@4887=:0>9:@###### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16263688 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBBCCDDD@CCDDBADDBB;>BAABB9B###################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.18384187 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@DCDBC@BBBBC;BBB>A>DB@=B3@?@>B6>>:9=0;=2::<> AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.18577103 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCBCCDCCBBDDADBBB>>DBCCA>AC?AC?AAAA################ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.19188617 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCBCDBCDBB=B@>AB?6>=2>A>>?;6D?;B@??AA>:AAAAA6<@6@ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.19522689 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCACCCDCDDBDDD@@ABBDB>;BBBBBBCAB>:>>?@9:@@?@96:<@@########################## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.22784325 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC?BCCD@CDCDD@BBABBC8@@AAC@?BBBBBB;>C?@3=@@?=<6=9>83@=@D###### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23133897 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCBB:?BA9AA################ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23202943 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBBBCACDCDDDDDD>BADB@ABBBCABB>AB>=>=@@A??AAAA>A################ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.25064051 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCBCCCCCCCCCCCCCCCCCC@CCCCCCCCCCDACDCDDABBCB?A@<@ABB@BBD?@AAB>0BB:<=8>A.>:>>/==;AB@::=@########## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.29443750 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDABCCBDDB=B@@BB@B>BBB>B?DDA@?ADB:>D?DD<9;-@############ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.29935780 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCAACCC@CCCCCCCDCDADDDDDBDCBDDBAABBBABB>B;AABAAAAD################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.1999503 16 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ####################B>@>=4BB@BBB;AADB@BDBBA8CCD@C=DCCCDABCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.2979717 16 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC #####################@3@A:?@;=D;???A@;BADBB@BB=?B=CDC@D@CC?@CCCCCCCCCCCCCCCCCCCCCBCCCCCBCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.3631589 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCBCCCDBAC@A.=BBBA@?@@BB>?@=@@7?@?A############ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.3971739 16 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ########B:=/5=7:>;4>@=?:7;===2B@BBBBB=BBB5BBBC?@DCCCCCDCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.4002088 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@DDCBCABCDCCBACBDB@;BBBBB>?=B@@>@>@<;@############################## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.4609375 16 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC #########A:3>59@?:=@<=>==6BB@?ACAABA@A@BB;BABBCDDBBDC@DCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCD AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.5221796 16 CHROMOSOME_I 149 1 100M * 0 0 AATACTAGACCTAGGCGTAAGCCTAAGCCTACGCCTAAGCCTAATCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC #############################################################C1CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-16 XS:i:-16 XN:i:0 XM:i:8 XO:i:0 XG:i:0 YT:Z:UU MD:Z:2G0C3A0G4A2C14A12G55 NM:i:8 +SRR065390.5504153 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCDDBDBD>@ADB;=B7A>>=A>BA>>A>@?CAA;ACA6C?@>B<<@<=>?>?;:B?D<9.7@/B74@########### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.6311425 16 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC #######@9@@::A::;<.?@BB>8A>BDBBBBDDB=DBDBBADABDDACDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.6572328 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCC6CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@ACCACCADCBCBDDAAD>BBBAD@@DB4=3===BBB;CC>AA=CAAAA AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.6809356 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCACDBCACBDBABBBACCBB;BBBB@;??D@B>AAAAB>@>>?############## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.7213562 16 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ###############BB:>DAABC=ADBBB?A?BBB8?B@>B@DCADDDDDDCCBDBBCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.17029406 16 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC @9'8@:6:,B>9==;6?@:BABBABBBBDAABBDBDAADDDBBDDBCCBCDDCACCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.17428924 16 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC #A9>AA68=>981==A@>??AA==::@AA0@;B@@B;ABAB=B=CBBAB;CC@AB@CDCCCCCCCCCCCCCCCCC@CCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.18015619 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC?CCCCC@CCCCAACCDB=C@DBB@DBDD6A?=>?>ABBBCBBBABBABDDBDBBBBDDCDCCADCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.20608417 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCABCBBCADBBA>BABBBB>B>>BBBDB@?BA6><=><=D<>B?############################ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.31976147 16 CHROMOSOME_I 149 1 100M * 0 0 AAGCATAAGACTAAGCCTAAGACTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ####################################A==A=A8:>@@@BB?C??CA@@CCC@BBC>AA=?BACCCCCCCCCC@BCCCAACCCACCCCCCC AS:i:-11 XS:i:-11 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:4C4C11C34G43 NM:i:4 +SRR065390.33177643 16 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ########@@@A66A@@@=:B;B@@;BBBB@?DBDA@BBDCAAACCCCDACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.33572049 0 CHROMOSOME_I 149 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ?>?B@8;88?>>B9>B@97>@>>980000.*0000838?8:;56722/-/;734@BA4?############################################# AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:93T0A3C0T0 NM:i:4 +SRR065390.824436 16 CHROMOSOME_I 151 0 9M3I88M * 0 0 AATTAAAACAGATAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG B;?8B?BBBD6:5:>3=BBBCBABBBC8@CC@ABACCAC;?CCCCCCCCCCCCCCCCCACCCCCBCCCCCCCCCCCCCCCBCCCCCCDCCCCC AS:i:-38 XS:i:-38 XN:i:0 XM:i:5 XO:i:1 XG:i:3 YT:Z:UU MD:Z:0G0C0C3G0C89 NM:i:8 +SRR065390.8852076 16 CHROMOSOME_I 151 1 100M * 0 0 GCCTAACCCGAAGCCTGAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT #####################################C@B8C;?CCCCCCC?CCCCB?CCCCBC?CCCCCCA=>CCBACCCACCCCCBA AS:i:-11 XS:i:-11 XN:i:0 XM:i:5 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0G3A0A5A38C49 NM:i:5 +SRR065390.24657688 16 CHROMOSOME_I 151 1 100M * 0 0 GCGTAAGGCTACCCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ########################################@4;A?@@+CAAB@=CCC?CDACCCC@?CCCCCCACCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:2C4C3A0G87 NM:i:4 +SRR065390.32343734 16 CHROMOSOME_I 151 1 100M * 0 0 GCCTACGCCCACGCCTAAGCCAACGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ####################################?BCB=BDBA@DACCCCC@DCCCC==@?(@############################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:80T19 NM:i:1 +SRR065390.11017273 0 CHROMOSOME_I 152 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAACCCTAAGCCTA CCCCDCCCCCCBCCCCCABCCCCCCCCCCCCCCBCCBCBCCCCCCCCACCBC?ACC=:=?########################### AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:90C8A0 NM:i:2 +SRR065390.23405592 16 CHROMOSOME_I 153 0 4M5I91M * 0 0 CGAAAATTTTCGAAAACCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG @??B?B@BAB<:@BBABABB@@CCCCCDCCCD@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-44 XS:i:-44 XN:i:0 XM:i:5 XO:i:1 XG:i:5 YT:Z:UU MD:Z:1T2G1C0T2G84 NM:i:10 +SRR065390.30873804 16 CHROMOSOME_I 153 1 100M * 0 0 CCAACCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #####################?:A@6==97B?DDBABBDB?AB@@<>;B>B?DB?=@@?@: AS:i:-8 XS:i:-8 XN:i:0 XM:i:0 XO:i:1 XG:i:1 YT:Z:UU MD:Z:5^T95 NM:i:1 +SRR065390.19270079 0 CHROMOSOME_I 155 6 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACGCCTGAGCCTCAGCCTAAGA CCCCCCCCCCCCCCCCCCCCCCCCC@CCBCCCABDCCDCBADBAAB=BBBBB6B@B>ADDDB>.?################################### AS:i:-8 XS:i:-10 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:79A4A5A8C0 NM:i:4 +SRR065390.30567090 0 CHROMOSOME_I 155 6 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACGCCTAAGCCTACGCCTCAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCDDCCDCDDDCADBDDDDBBBBBB;CCACAABA>D?##################################### AS:i:-6 XS:i:-12 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:79A11A4A3 NM:i:3 +SRR065390.7022394 16 CHROMOSOME_I 155 0 100M * 0 0 ACGGCTTCGCCTACGCCTATGCCTCAGCCCAACCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC #############################################DCBBBCCCCC=BCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCC AS:i:-20 XS:i:-20 XN:i:0 XM:i:10 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1A1C2A0A5A5A4A4T2G5G61 NM:i:10 +SRR065390.17996680 16 CHROMOSOME_I 155 1 100M * 0 0 AACCCCAACCCGAATCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ######################################?A0.00/0000.;8<5:C?C>CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-10 XS:i:-10 XN:i:0 XM:i:5 XO:i:0 XG:i:0 YT:Z:UU MD:Z:2G2T2G2T2G85 NM:i:5 +SRR065390.21693823 16 CHROMOSOME_I 155 1 100M * 0 0 ACGCCCAACCCTAAGCCTCAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ##################################@=B0>@<@7?A@CCCCCC@@C@@CC@CCCCCC@C@BCCCBCCCCCCBACCCCCCCB@@??==BB@= AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1A3T2G9A81 NM:i:4 +SRR065390.27653002 16 CHROMOSOME_I 155 0 100M * 0 0 GAACCAAAACCCAAGCCGAAACCCAAACCTGAGCCCAAGCCCAAGCCCAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ################################################?<8BA??C?ACACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCBC AS:i:-26 XS:i:-26 XN:i:0 XM:i:13 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0A1G2T2G2T5T2G2T2G3A4T5T5T52 NM:i:13 +SRR065390.31159665 16 CHROMOSOME_I 155 0 100M * 0 0 GAACCGCTCTTCCGATCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC #####################################BBCA9>5=BCC@@ACBCCC@@CCC@CCCCCCBCB?@B?@CBCCBCCBCBCCBC?;<><>??@? AS:i:-26 XS:i:-26 XN:i:0 XM:i:13 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0A1G2T0A0A0G0C0C0T0A0A0G0C84 NM:i:13 +SRR065390.11329514 0 CHROMOSOME_I 156 1 100M * 0 0 AGCATAAGCATAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCA CCCCCCCCCCCCCCCCCCCCCBA0==?:?BB>=D############### AS:i:-12 XS:i:-12 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:3C5C89C0 NM:i:3 +SRR065390.29072565 16 CHROMOSOME_I 156 1 100M * 0 0 AGCCCAGGCGTAAGCCTACGCCTAAGCCGAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC #######################################################C@.B>@CCCD=CCCCCCCCCA@?<>AACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0G10A88 NM:i:2 +SRR065390.31653691 16 CHROMOSOME_I 157 1 100M * 0 0 CCCCAAGCCCAAGCCGAAGCCGAAGCCGAAGCCTAAGCCGAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT #######################################################??5AACCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-14 XS:i:-14 XN:i:0 XM:i:7 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0G2T5T5T5T5T11T60 NM:i:7 +SRR065390.24940664 0 CHROMOSOME_I 158 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGCCTAAGCCAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCACCCBA@DBDBBBA=??><; AS:i:-16 XS:i:-16 XN:i:0 XM:i:8 XO:i:0 XG:i:0 YT:Z:UU MD:Z:2T5T0A4T2G2T5T5T67 NM:i:8 +SRR065390.10545162 0 CHROMOSOME_I 159 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCA CBCCBCCCCCB=??BBBBBBBBBBBBBB:BBBAA@34*2/;?>>>BBBA?882)';8748>B9>B+131@############################## AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:98A1 NM:i:1 +SRR065390.12800438 0 CHROMOSOME_I 159 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCD@CBBAABDDDBDA*AAA?>>BD@AB+40395=21>*2(22/@@+@<55@## AS:i:-3 XS:i:-3 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:74A25 NM:i:1 +SRR065390.21037219 0 CHROMOSOME_I 159 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCCCDCA@@>B@@@A>@=@?@?A@05/85;0;8@############### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:99A0 NM:i:1 +SRR065390.27810825 0 CHROMOSOME_I 159 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCCTAAGCCTAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDBCD<>CBDDBDADBCDDDBCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCACCC AS:i:-20 XS:i:-20 XN:i:0 XM:i:10 XO:i:0 XG:i:0 YT:Z:UU MD:Z:3A16A0A0G0C4G2T1A6G2T56 NM:i:10 +SRR065390.8173376 0 CHROMOSOME_I 160 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGACTAAG CCCCCCCCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCBDCCD@BDDDBB=A@@??8BB;D9?DD;BAAAA=AA### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:98A1 NM:i:1 +SRR065390.24971098 0 CHROMOSOME_I 160 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAG CCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCBBCCCCCCACDCA@@AAC6BABB=D@>@B>?>@1==377.5055(52049467 AS:i:-3 XS:i:-3 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:97A2 NM:i:1 +SRR065390.29229469 0 CHROMOSOME_I 160 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCGAAGCCTAAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCACCCCCCCBB?CCCABCA@C:=B@BC?=B==@==?@@A################# AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:90T9 NM:i:1 +SRR065390.15931715 0 CHROMOSOME_I 162 1 100M * 0 0 NGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC !++++22222AAA8AAAAAAAAAAAAAAAAAAAAA8:0:8AAAAAAAAAAAAAAA78AA7:89996664:AAA########################### AS:i:-1 XS:i:-1 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0A99 NM:i:1 +SRR065390.16420564 0 CHROMOSOME_I 163 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@CCCCCCCCCCC@CBC>@AA:A>?CA?@AB=A=@BBA@BADB=BBABCCAAA)A AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23326095 0 CHROMOSOME_I 163 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCACCBBABAB>DBBB=>B@@?B?B>BB6BBD<>4?==@@A@9@=7AB;B;BAD;@DACD=DCDDCC?CCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.4460513 16 CHROMOSOME_I 163 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ####################BD@:BBBABABBABDABDDDC=CCCDCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.4608863 16 CHROMOSOME_I 163 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT A;A2::::=.@.<@@=A>BDDADCCBCB@CCCCBCCCDCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.10096102 16 CHROMOSOME_I 163 1 100M * 0 0 GCCGAAGCCGAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT #########################@3;B@79;99>=BBA?@A;@=>>D::99=4=@*:BB>AB@@@BDDBDDDDCDACACCCCCACCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.27063479 16 CHROMOSOME_I 163 1 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT ##########################################@BB@B;ACA@B=CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.4604929 0 CHROMOSOME_I 164 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDBDCDDDDBDDDBD@DDA?B>?*:B8@@8=@??A?>B?A## AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:81A18 NM:i:1 +SRR065390.21767995 0 CHROMOSOME_I 164 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGCCTAAGCCTAAGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC>D@AA@=?@####################################################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:75A24 NM:i:1 +SRR065390.24370235 0 CHROMOSOME_I 164 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCCCA CCCC3CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCD=A>DBB@;@BBB;=?=6A:=A################################ AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:92T5T1 NM:i:2 +SRR065390.33289921 0 CHROMOSOME_I 164 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACGCCTAAGCCTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCCAACBBCCCC@BCCB>B@@B@B9B?B?:0>==D?:>?############### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:88A11 NM:i:1 +SRR065390.33450079 0 CHROMOSOME_I 164 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCCCA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@CDBCCDDDB@DBDDBBA?B@>@BABB6>=>>94@<==@#################### AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:92T5T1 NM:i:2 +SRR065390.9104074 16 CHROMOSOME_I 164 1 100M * 0 0 CCTAAGCCTAGGCCTAAGCCTAAGGCTAAGCCGAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA #################################?777?:5@B=B?B?;BDB@ABBA18BBACCCC####################################################### AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:96T1A1 NM:i:2 +SRR065390.6217548 0 CHROMOSOME_I 166 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAGG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCADDCACB?@@?@B@BBD=:=;7ABAAB@:?BD############################## AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:96T1A1 NM:i:2 +SRR065390.22416659 0 CHROMOSOME_I 166 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGACTAAGCCTAAGCACAAGCGTAAGCCTAAGCCTAAG ?A=8?@AA@?CCCCCBBBBB:8:?:78307>@#################################################################### AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:64C12C0T4C16 NM:i:4 +SRR065390.29369023 0 CHROMOSOME_I 166 1 100M * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCCTCAG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDBBDDCBBDBDDBDBCABA>BCACC>CAAAAA?A?>:=CA@9;787;8A48;8@######################## AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:86G1C11 NM:i:2 +SRR065390.19225398 0 CHROMOSOME_I 167 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACGCCTAAGCCCAAACCCAAGC ?<:<<>AA@>CCCCCB@B@=58;;?<>@BA>B@BB<@B9BBBBBBA>@############################################## AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:79A9T2G2T4 NM:i:4 +SRR065390.21123303 0 CHROMOSOME_I 167 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGACTCAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC######################################################## AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:72A2C2A21 NM:i:3 +SRR065390.29022479 0 CHROMOSOME_I 167 0 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAACCCTAAGCCTAATCCTATGCATAAACCTAAACAGAATCAAAAGAAAAATCCAATCT CCCCCCCCCACCCCCBCCCC?CCCCCCCD;?D?D################################ AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:94C0T0A3 NM:i:3 +SRR065390.23298396 16 CHROMOSOME_I 167 1 100M * 0 0 AAGCCTCGGCCTACGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC #####################A@><>B==BC@CCBB?BA'@>>;>>DADDDBDBADB?B6@7=;;7DBD?B<8=AA:4-9<@@1:@A################################ AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:98C1 NM:i:1 +SRR065390.23263331 0 CHROMOSOME_I 168 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCBCCCDCCDCDDDBBDA=B@BB@B>B>AB?@?BB>;;ACC>CAA@;9<5@############## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.1428659 16 CHROMOSOME_I 168 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC #######?DB@;>BBB::>:D=>D?BDDBBBBCCAC@DCCBDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.9270489 16 CHROMOSOME_I 168 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC ##########?4=>@BAA>BB>AA@====3BBBBB;B?@C==CCC?@CCC?CCC?ACCCCCBCCCBCCCCBCCCCCCCCCCCCCC=BCCCCCACCCDCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.9538669 16 CHROMOSOME_I 168 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC ##########@=?6??@B;BA@@@?.@?@@;D>A;DB@DBBBD>@DDDBADCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.15525407 16 CHROMOSOME_I 168 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC ####################@37:0BC@@C@ACCAB?@CCACCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.18387934 16 CHROMOSOME_I 168 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC ##########################@@A@4BDDBB@ACABB@8BCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.27778447 16 CHROMOSOME_I 168 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC ###############@@B=;>89<>/8?<8@>=ABDCCDCC@CCACB@@C@9ACCCC;CCCC@CCAAB@@CCCCCBCCCCCCCBCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.4767844 0 CHROMOSOME_I 170 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCCTAAGCCTAACCCCA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCCCCCCDCC=CCBA=BCCACCBCC<@@@A@>A?D<5/772AA####################### AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:80T14G2T1 NM:i:3 +SRR065390.6036148 0 CHROMOSOME_I 170 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCCTCAGACCA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCCBCCC=C########################################## AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:86T6A2C1T1 NM:i:4 +SRR065390.7523697 0 CHROMOSOME_I 170 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTATACCTATGCATA 8773399<;8BBB>BAA<A################################### AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:84C3A4A5A0 NM:i:4 +SRR065390.21777229 0 CHROMOSOME_I 170 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCCCAAGACCAAGCCAAGACCCC CCCCCCCCCCCCCCCCCCCCCCCCCCC@CCCCCCCCCCCCCCCCCBDABAA@48@############################################# AS:i:-18 XS:i:-18 XN:i:0 XM:i:9 XO:i:0 XG:i:0 YT:Z:UU MD:Z:74T5T3C1T5T1A0G2T0A0 NM:i:9 +SRR065390.22082412 0 CHROMOSOME_I 170 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTTGGCCGCAGCCTCAGCCTGAACAGA CCCCACACCCCCCCC??:??@CCCC@9A>9?AA@AC>@CA@B-73>8=53@=:=A?><=>49778?################ AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:88A5A5 NM:i:2 +SRR065390.32243033 0 CHROMOSOME_I 170 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACGACTA CCCCCCCCCCCACC@CCACCCCCCCCCCCCCCCCC@CADCCBBD@BB>=?A@9C@?C>A88?>8A?:@CCCCCCCCC:?>;:CCC?BCCCCACCCCCCCCCC AS:i:-39 XS:i:-39 XN:i:0 XM:i:18 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0C0T0A0A2C0T2G1C0T0A0A0G0C1T0A1G1C64C10 NM:i:18 +SRR065390.28296401 16 CHROMOSOME_I 171 1 100M * 0 0 CTAAGCCTAAGCCTAAGGCTAAGCCTAAACCCACGCCTAGGCCGAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##################################################BAADDDBBDDCCDCCCCCACDCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-12 XS:i:-12 XN:i:0 XM:i:6 XO:i:0 XG:i:0 YT:Z:UU MD:Z:17C10G2T1A5A3T56 NM:i:6 +SRR065390.1242089 0 CHROMOSOME_I 173 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC A=@@?=?=8A3BB>>B@B>BAB@B@B77//8<;>5:@@@B6ABA@BA<@BB5):5;*83736?;;;@@=;6B>??##################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.3872193 0 CHROMOSOME_I 173 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCBCCCC@DCACD=ABCB@BCDDA@BA=BBB@C??@;:0A>?>B>?)?#################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.14566073 0 CHROMOSOME_I 173 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCBCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCCCCCAB=?CCCA6?AACABCCAC=1B@A@;B<@A@@;>?@>8BB?B#################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.18391952 0 CHROMOSOME_I 173 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCDCCCAADCCB?CBABD=A>?BB5:??:B;>?@AA?>3?;@(8>=>>/(5500;+@@6 AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.18719419 0 CHROMOSOME_I 173 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCAC@@C@@B@DBBDBB################################ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23668023 0 CHROMOSOME_I 173 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCC@@ABDB@@BBB>DBABB@D@BDBAABAB>B>AA@??9:8>>A:255@###### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23826980 0 CHROMOSOME_I 173 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCDCDBCDDBDDDABBBBDDBBBBBBB>D?#################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.28024258 0 CHROMOSOME_I 173 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCBACDDBC>DDBDB>BBBBB;?@BBB3@???=0<=>@@:@################ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.30039772 0 CHROMOSOME_I 173 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCDACDBBDDDDDBBBDBBD>BBAADAABAAC??B??######################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.5345749 16 CHROMOSOME_I 173 1 100M * 0 0 GACCCAGACCCCGCGCCTAAGCCCAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ##########################################@BA=>AAA@;AAAA@AA9AAAA@BAA@:=@@@4A=?A@AAAAA:B@@BBBBB@>>>>> AS:i:-18 XS:i:-18 XN:i:0 XM:i:9 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0A1G2T0A1G2T0A0A9T76 NM:i:9 +SRR065390.16932911 16 CHROMOSOME_I 173 1 100M * 0 0 AACCCTAAACCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ###############B?BAA;;9>0A1BAAA@=CA*@CCCCACCCC@@?CAAB>AC=C?CCCCBCCBBCBCCCABCCBCA@CCCCCCBCCCCC?BCCCCC AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:2G5G5G85 NM:i:3 +SRR065390.17106354 0 CHROMOSOME_I 173 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCACC?CCADCCAC@BB@CBB@C?@A@@A>=B?BAABBABB6A>BBB:BBA=?DD??;D/<71; AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.22716808 0 CHROMOSOME_I 174 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCGAAGCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCABBBBB?################################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:94T5 NM:i:1 +SRR065390.12986460 0 CHROMOSOME_I 176 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCATAATCGTAAGACTAAGAGCAAGCCTCAGCATA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCA?CCA############################### AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:92T2G4 NM:i:2 +SRR065390.14729559 16 CHROMOSOME_I 176 1 100M * 0 0 CCTACGCCCAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA #########################?(4<=B@;BBBBCB?>BCCA?DCCACCCCCC@C;BBB??B<;9=C@BCAACBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:2T0A0A6G88 NM:i:4 +SRR065390.26023345 0 CHROMOSOME_I 177 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGCCGAA CCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCA?CDADABDBDDBDDBAB>>BBBB@;>@BBB?A>CBBB<>>B@@4@?>>?0ABD@@###### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.6149508 0 CHROMOSOME_I 179 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCDDCCBD=CCDB@@DABAB=ABB??>>@BB=BCBAB>>D;A?><>AA>?A==+@A AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.6618950 0 CHROMOSOME_I 179 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCDCCCCCDCCBCAACBBCBB@DADABBDAB?CBB@B;?BB=B>>>?:? AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.7246333 0 CHROMOSOME_I 179 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCACCCDCCCCCCCCCCCDCCBCD@CBBDCADADADBDABBDBDABDBCBBA>BAB>>AC9A################## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.8266146 0 CHROMOSOME_I 179 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAACCCTCAGCCGAGGCCTACGC CDCCCCCCCCCCCCCCCCCBCCCCCCDCCCCCCACDCCCCCDACBDCABCB@A=ABBB@BBD@DB?B################################# AS:i:-10 XS:i:-10 XN:i:0 XM:i:5 XO:i:0 XG:i:0 YT:Z:UU MD:Z:80G3A4T1A5A2 NM:i:5 +SRR065390.8986893 0 CHROMOSOME_I 179 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCC@CCCCCCCCCCA@CCCCD=CCCDAABBDB>BDDBDB;BB@@B=@BDB:.A>>BB:@################ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 diff --git a/src/htslib-1.21/test/ce#2.sam b/src/htslib-1.21/test/ce#2.sam new file mode 100644 index 0000000..8a4eead --- /dev/null +++ b/src/htslib-1.21/test/ce#2.sam @@ -0,0 +1,3 @@ +@SQ SN:CHROMOSOME_I LN:1009800 +SRR065390.14978392 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +SRR065390.921023 16 CHROMOSOME_I 3 12 100M * 0 0 CTAAGCCTAAATCTAAGCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############################################???88:;98768700000<>:BBA?BBAB?BBBBBBBB>B>BB::;?:00000 XG:i:0 XM:i:3 XN:i:0 XO:i:0 AS:i:-6 XS:i:-13 YT:Z:UU diff --git a/src/htslib-1.21/test/ce#5.sam b/src/htslib-1.21/test/ce#5.sam new file mode 100644 index 0000000..0535065 --- /dev/null +++ b/src/htslib-1.21/test/ce#5.sam @@ -0,0 +1,11 @@ +@SQ SN:CHROMOSOME_I LN:1009800 +@SQ SN:CHROMOSOME_II LN:5000 +@SQ SN:CHROMOSOME_III LN:5000 +@SQ SN:CHROMOSOME_IV LN:5000 +@SQ SN:CHROMOSOME_V LN:5000 +I 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +II.14978392 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +III 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +IV 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +V 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +VI 2048 CHROMOSOME_I 2 1 27M100000D73M * 0 0 ACTAAGCCTAAGCCTAAGCCTAAGCCAATTATCGATTTCTGAAAAAATTATCGAATTTTCTAGAAATTTTGCAAATTTTTTCATAAAATTATCGATTTTA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC diff --git a/src/htslib-1.21/test/ce#5b.sam b/src/htslib-1.21/test/ce#5b.sam new file mode 100644 index 0000000..f778394 --- /dev/null +++ b/src/htslib-1.21/test/ce#5b.sam @@ -0,0 +1,12 @@ +@SQ SN:CHROMOSOME_I LN:1009800 +@SQ SN:CHROMOSOME_II LN:5000 +@SQ SN:CHROMOSOME_III LN:5000 +@SQ SN:CHROMOSOME_IV LN:5000 +@SQ SN:CHROMOSOME_V LN:5000 +I 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +II.14978392 16 CHROMOSOME_II 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +III 16 CHROMOSOME_III 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +IV 16 CHROMOSOME_IV 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +V 16 CHROMOSOME_V 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +VI 0 CHROMOSOME_V 10 1 7S20M1D23M10I30M10S * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCTAAGCCTAAGCCTAAGCCTAAGCTTTTTTTTTTCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA * +VI 256 CHROMOSOME_V 10 1 7S20M1D23M10I30M10S * 0 0 * * diff --git a/src/htslib-1.21/test/ce#5b_java.cram b/src/htslib-1.21/test/ce#5b_java.cram new file mode 100644 index 0000000..97e58eb Binary files /dev/null and b/src/htslib-1.21/test/ce#5b_java.cram differ diff --git a/src/htslib-1.21/test/ce#large_seq.sam b/src/htslib-1.21/test/ce#large_seq.sam new file mode 100644 index 0000000..7df381d --- /dev/null +++ b/src/htslib-1.21/test/ce#large_seq.samdiff --git a/src/htslib-1.21/test/ce#supp.sam b/src/htslib-1.21/test/ce#supp.sam new file mode 100644 index 0000000..666e774 --- /dev/null +++ b/src/htslib-1.21/test/ce#supp.sam @@ -0,0 +1,6 @@ +@SQ SN:CHROMOSOME_I LN:1009800 +@CO Test supplementary reads, for CRAM +supp 99 CHROMOSOME_I 100 1 50M50S * 0 0 TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC * +supp 2147 CHROMOSOME_I 200 1 50H50M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC * +supp 2195 CHROMOSOME_I 500 1 50M50H * 0 0 TTTTAGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCC * +supp 147 CHROMOSOME_I 600 1 50S50M * 0 0 TTTTAGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCAGCCTAATACTAAGCCTAAGCCTAAGACTAAGCCTAATACTAAGCCTAAG * diff --git a/src/htslib-1.21/test/ce#tag_depadded.sam b/src/htslib-1.21/test/ce#tag_depadded.sam new file mode 100644 index 0000000..2026dd7 --- /dev/null +++ b/src/htslib-1.21/test/ce#tag_depadded.sam @@ -0,0 +1,11 @@ +@HD VN:1.4 SO:coordinate +@RG ID:UNKNOWN SM:unknown LB:UNKNOWN +@SQ SN:CHROMOSOME_I LN:1009800 +I 16 CHROMOSOME_I 2 1 100M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC PT:Z:27;28;+;STOP;Note=tag1 +II.14978392 16 CHROMOSOME_I 2 1 100M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC PT:Z:27;27;+;PRIM;Note=tag2|28;28;+;OLIG;Note=tag3 +III 0 CHROMOSOME_I 2 1 27M1I73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTCAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###########################"##@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC PT:Z:1;1;+;COMM|ote=consensus%0Amulti%09line%0Atag%0A!"#$%25^&*()_+<>?:@~{}%7C%0A1234567890-=[]'%3B#,./\ +* 768 CHROMOSOME_I 101 255 1M * 0 0 * * CT:Z:+;STOP diff --git a/src/htslib-1.21/test/ce#tag_padded.sam b/src/htslib-1.21/test/ce#tag_padded.sam new file mode 100644 index 0000000..47a57c2 --- /dev/null +++ b/src/htslib-1.21/test/ce#tag_padded.sam @@ -0,0 +1,11 @@ +@HD VN:1.4 SO:coordinate +@RG ID:UNKNOWN SM:unknown LB:UNKNOWN +@SQ SN:CHROMOSOME_I LN:1009800 +I 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC PT:Z:27;29;+;STOP;Note=tag1 +II.14978392 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC PT:Z:27;27;+;PRIM;Note=tag2|29;29;+;OLIG;Note=tag3 +III 0 CHROMOSOME_I 2 1 101M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTCAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###########################"##@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC PT:Z:1;1;+;COMM|ote=consensus%0Amulti%09line%0Atag%0A!"#$%25^&*()_+<>?:@~{}%7C%0A1234567890-=[]'%3B#,./\ +* 768 CHROMOSOME_I 102 255 1M * 0 0 * * CT:Z:+;STOP diff --git a/src/htslib-1.21/test/ce#unmap.sam b/src/htslib-1.21/test/ce#unmap.sam new file mode 100644 index 0000000..fec218c --- /dev/null +++ b/src/htslib-1.21/test/ce#unmap.sam @@ -0,0 +1,6 @@ +I 4 * 0 1 * * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC +II 4 * 0 1 * * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC +III 4 * 0 1 * * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC +IV 4 * 0 1 * * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC +V 4 * 0 1 * * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC +VI 4 * 0 1 * * 0 0 ACTAAGCCTAAGCCTAAGCCTAAGCCAATTATCGATTTCTGAAAAAATTATCGAATTTTCTAGAAATTTTGCAAATTTTTTCATAAAATTATCGATTTTA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC diff --git a/src/htslib-1.21/test/ce#unmap1.sam b/src/htslib-1.21/test/ce#unmap1.sam new file mode 100644 index 0000000..e1e464e --- /dev/null +++ b/src/htslib-1.21/test/ce#unmap1.sam @@ -0,0 +1,20 @@ +@HD VN:1.0 SO:unsorted +@SQ SN:CHROMOSOME_I LN:1009800 +@SQ SN:CHROMOSOME_II LN:5000 +@SQ SN:CHROMOSOME_III LN:5000 +@SQ SN:CHROMOSOME_IV LN:5000 +@SQ SN:CHROMOSOME_V LN:5000 +@SQ SN:CHROMOSOME_X LN:5000 +@SQ SN:CHROMOSOME_MtDNA LN:5000 +@RG ID:UNKNOWN SM:UNKNOWN +@PG ID:bowtie2 PN:bowtie2 VN:2.0.0-betadiff --git a/src/htslib-1.21/test/ce#unmap2.sam b/src/htslib-1.21/test/ce#unmap2.sam new file mode 100644 index 0000000..d002a34 --- /dev/null +++ b/src/htslib-1.21/test/ce#unmap2.sam @@ -0,0 +1,29 @@ +@HD VN:1.0 SO:unsorted +@SQ SN:CHROMOSOME_I LN:1009800 +@SQ SN:CHROMOSOME_II LN:5000 +@SQ SN:CHROMOSOME_III LN:5000 +@SQ SN:CHROMOSOME_IV LN:5000 +@SQ SN:CHROMOSOME_V LN:5000 +@SQ SN:CHROMOSOME_X LN:5000 +@SQ SN:CHROMOSOME_MtDNA LN:5000 +@RG ID:UNKNOWN SM:UNKNOWN +@PG ID:bowtie2 PN:bowtie2 VN:2.0.0-beta5 +SRR065390.14978392 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 AS:i:-18 XS:i:-18 YT:Z:UU +SRR065390.921023 16 CHROMOSOME_I 3 12 100M * 0 0 CTAAGCCTAAATCTAAGCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############################################???88:;98768700000<>:BBA?BBAB?BBBBBBBB>B>BB::;?:00000 XG:i:0 XM:i:3 XN:i:0 XO:i:0 AS:i:-6 XS:i:-13 YT:Z:UU +SRR065390.1871511 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA 0:BB@>B<=B@???@=8@B>BB@CA@DACDCBBCCCA@CCCCACCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:0 XM:i:0 XN:i:0 XO:i:0 AS:i:0 XS:i:0 YT:Z:UU +SRR065390.3743423 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##################?6@:7<=@3=@ABAAB>BDBBABADABDDDBDDBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:0 XM:i:0 XN:i:0 XO:i:0 AS:i:0 XS:i:0 YT:Z:UU +SRR065390.4251890 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###########@BB=BCBBC?B>B;>B@@ADBBB@DBBBBDCCBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:0 XM:i:0 XN:i:0 XO:i:0 AS:i:0 XS:i:0 YT:Z:UU +SRR065390.5238868 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA @,=@@D8D;?BBB>;?BBB==BB@D;>D>BBB>BBDDBA@@BCCB@=BACBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:0 XM:i:0 XN:i:0 XO:i:0 AS:i:0 XS:i:0 YT:Z:UU +SRR065390.8289592 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############################A?@C9@@BC=AABDD@A@DC@CB=@BA?6@CCAAC@+CCCCCCCCCCCCCCC@CCCCCCCCCCCCCCCC XG:i:0 XM:i:0 XN:i:0 XO:i:0 AS:i:0 XS:idiff --git a/src/htslib-1.21/test/ce.fa b/src/htslib-1.21/test/ce.fa new file mode 100644 index 0000000..ac19897 --- /dev/null +++ b/src/htslib-1.21/test/ce.fa @@ -0,0 +1,20803 @@ +>CHROMOSOME_I +GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC +CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT +AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA +GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC +CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT +AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA +GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC +CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT +AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAAAAATTGAGATAAGAAAA +CATTTTACTTTTTCAAAATTGTTTTCATGCTAAATTCAAAACGTTTTTTT +TTTAGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCT +GCCAACCTATATGCTCCTGTGTTTAGGCCTAATACTAAGCCTAAGCCTAA +GCCTAATACTAAGCCTAAGCCTAAGACTAAGCCTAATACTAAGCCTAAGC +CTAAGACTAAGCCTAAGACTAAGCCTAAGACTAAGCCTAATACTAAGCCT +AAGCCTAAGACTAAGCCTAAGCCTAATACTAAGCCTAAGCCTAAGACTAA +GCCTAATACTAAGCCTAAGCCTAAGACTAAGCCTAAGACTAAGCCTAAGA +CTAAGCCTAATACTAAGCCTAAGCCTAAGACTAAGCCTAAGCCTAAAAGA +ATATGGTAGCTACAGAAACGGTAGTACACTCTTCTGAAAATACAAAAAAT +TTGCAATTTTTATAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAAC +CAAAAATAATTGCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAAACA +TTTTTCGGGTTTTTTGAAATGAATATCGTAGCTACAGAAACGGTTGTGCA +CTCATCTGAAAGTTTGTTTTTCTTGTTTTCTTGCACTTTGTGCAGAATTC +TTGATTCTTGATTCTTGCAGAAATTTGCAAGAAAATTCGCAAGAAATTTG +TATTAAAAACTGTTCAAAATTTTTGGAAATTAGTTTAAAAATCTCACATT +TTTTTTAGAAAAATTATTTTTAAGAATTTTTCATTTTAGGAATATTGTTA +TTTCAGAAAATAGCTAAATGTGATTTCTGTAATTTTGCCTGCCAAATTCG +TGAAATGCAATAAAAATCTAATATCCCTCATCAGTGCGATTTCCGAATCA +GTATATTTTTACGTAATAGCTTCTTTGACATCAATAAGTATTTGCCTATA +TGACTTTAGACTTGAAATTGGCTATTAATGCCAATTTCATGATATCTAGC +CACTTTAGTATAATTGTTTTTAGTTTTTGGCAAAACTATTGTCTAAACAG +ATATTCGTGTTTTCAAGAAATTTTTCATGGTTTTTCTTGGTCTTTTCTTG +GTATTTTTTTGACAAAAATTTTTGTTTCTTGATTCTTGCAAAAATTTTTC +CGTTTGACGGCCTTGATGTGCACTACCTTCGCTTAAATACTACATTTTCT +GAAAATGTTATAATAGTGTTCATTGTTTCATACAAATACTTATTTAATAG +TATTTCTGGTTATATAATTTGTATAAAAAGTGGTTGACATAACAAGGCTG +ACGAAACTTTGTGATGGCTGAAAATATTTTCCTAGCTTTATTGATTTTTA +TTTATACGTGTTTGAATAACTTGGCCAAATCGCCGAGAAGGAATAGAATA +CTGGACGACATTGTACATATTTTCCAAAAAATCAGAAAGTAGATGACGGG +ACCAATTCTTTCTGTCAGGTTTTACAACCGCCCAGTGCGTCTACGTCACA +TGTTGTATAAATGGTTGTAAACAATATGCGGAAACAATCAAATGCATTCC +CATAAGGCATAATATAGAGGCTACAGGCAATGAGTATCGCTCTTTGCTTT +GTTTAAAGGGGGAGTAGAGTTTGTGGGGAAATATATGTTTCTGACTCTAA +TTTTGCCCCTGATACCGAATATCGATGTGAAAAAATTTAAAAAAATTTCC +CTGATTTTATATTAATTTTTAAAATCCGAAAATCCATTGGATGCCTATAT +GTGAGTTTTTAAACGCAAAATTTTCCCGGCAGAGACGCCCCGCCCACGAA +ACCGTGCCGCACGTGTGGGTTTACGAGCTGAATATTTTCCTTCTATTTTT +ATTTGATTTTATACCGATTTTCGTCGATTTTTCTCATTTTTTCTCTTTTT +TTTGGTGTTTTTTATTGAAAATTTTGTGATTTTCGTAAATTTATTCCTAT +TTATTAATAAAAACAAAAACAATTCCATTAAATATCCCATTTTCAGCGCA +AAATCGACTGGAGACTAGGAAAATCGTCTGGAGATAGAACGGATCAACAA +GATTATTATTATATCATTAATAATATTTATCAATTTTCTTCTGAGAGTCT +CATTGAGACTCTTATTTACGCCAAGAAATAAATTTAACATTAAAATTGTT +CATTTTTGAAAAAAAAATAATTAAAAAAACACATTTTTTGGAAAAAAAAA +TAAATAAAAAAAATTGTCCTCGAGGATCCTCCGGAGCGCGTCGAATCAAT +GTTTCCGGAACTCTGAAAATTAAATGTTTGTATGATTGTAGAACCCTTTC +GCTATTGAGATTTGATAACTTTTAAGTAATAAAATTTTCGCAGTAAGACA +TTAAAACATTTCACAATTAAGCTGGTTCTGAACTGTGTGAAGTATATTGA +AAAAAACTAACTGATACAAAAATATAATTTTATGATAGTTTTCTGGATGT +CCCAATATAAACGATGTCAATTCTGCGACATGCTACAGTCATCCACGAAA +GTAACCCGAATACCGACAAAAGAAGAGGAACGCCAACTTTGGATAGACGC +TCTAGGGGCTGATTTTGGTCGGAAAATAGTCGGGAAAAAATAGAGGACAT +TACAGATGAGGATGAGGATGAAGATAGAAATTTGCCGACAACTTCGTCAT +GCCGCTGATTTTTTTGATGTTCTACGCTTAAATTTTCAGCGAACGAACTA +TTTTTTATATTTTGATTGTTTTTAAATAATATTTGCCATAAGAAATTCTC +ACTTTTCCAGGAAACGTCGTTTCGCCGCGATTTTCCTCGTCTCCAGTCGA +TTTTGCGCTGAAAATGGGATATTTAATGGAATTGTTTTTGTTTTTATTAA +TAAATAGGAATAAATTTACGAAAATCACAAAATTTTCAATAAAAAACACC +AAAAAAAAAGAGAAAAAATGAGAAAAATCGACGAAAATCGGTATAAAATC +AAATAAAAATAGAAGGAAAATATTCAGCTCGTAAACCCGCAAGTGCGGCA +CGGTTTCGTGGGCGGGGCGTCTCTGGCGGGAAAATTTTGCGTTTGAAAAC +TCACATATAGGCATCCAATGGATTTTCGGATTTTCAAAATTAATATAAAA +TCAGGGAAATTTTTTTAAATTTTGTCACATCGATATTCGGTATCAGGGGC +AAAATTAGAGTCAGAAACATATATTTCCCCACAAACTCTACTCCCCCTTT +AACAACCACCCGAGGATATATTCGACAAACGATCTATCTACTAGGAATAA +CTCGATTATTGACATATTATAGACTTCTTTTAGTATTTGTAAAATAGAGG +ATCAGACCCAAAATTCAGCCCGCGAAGGCATGACGTCAGCGCGAGGCAGT +AGTTTCCAGAAGAACTCTGTCGTCTACCTTAATGCCTCAAATGCGAACCC +GCTTCGGCCATCCTTCTCGCTCAGAGAATGGATTAGAGTTCTCATCAACT +CCTCTGTCTAATTTTCAACTGCGGCGGTTGGCGACCGGTATTACCGCGGC +GACCGACACCTCCCGGGTTCCGTCGATCGCTGTCTGTTGTGTGCGCCGCG +ACTCCGCCCACCGGTGGTAACTTTTTGTGGGGGAATCTTTGTTTTTGGTC +ATTTTTCAGCGCTTTTCAGCGATTATTGACCAATTTTGAATAAAATTTTC +AACAGAATATCATCTAAAATATTGCTTAACATTTATTTAACAGAAATAAC +GTGAGCACGCATGTAAAACATGAAATTTTCGGGAAAATTGCAATTAAACG +AATAAAAATCGATATTTAAATCAATTATTGGTGAATCCGGTGTGTTGAGG +CTTCAATGCATACATTTTTACTGGATAAATCTCCTTTGGGAATCCGGTTT +GCAGTGCTTTCGAGACCATGTCCAGTTGAGAATCGGCGAACGCTTTAAGA +AGCTCGGGCTGAATAATGAATTGTTTTAAAAAATGTTTAGTAAAAAATTG +TTTTCGTGCAAATTGTCTTCGATATTATCCAAACGTGACGTTTTGCGATT +TTCGCGCTAAAATTACAGTAAGTGGGGTCTCGACACGACAATTTTTGTGA +AATACAAACGGGCGTGTGTCTTTAAGAAGTACTGTAGTTTAAAAACTTCA +TTTCTGTGGAATTTTCATATATTTTTCATAGTTTTTCTCTTTAAATAAAT +CACTTTTCAACAAAAAACTATGAGACAATAGTTTGAAATTACAGTATTCT +TTAAAGGTGCACGCCTGCTCGAATTTCGCAAAAACGTGTCGTGTCGAGAC +CCCAATTACAGTATTTTTGACCCGAATATCGCGAAATTTCGAGTCTGGGT +GAAAACATTGAAATTTTTGGCAAAATAAAAGAAATATGTCCTTTTTCAGA +ATATATTTTCTAAATTTCGAGACGAAACAACAATTTTAAATGAATTTTAA +TTTTAAATATTAAATATTTCGGAATTTGGCGTTTTTTATGCATGTCGATT +CACTAACGATTTTGTACTACACGTGGGCAAGTTTATACAGTTTTTGGCTA +AAATTTGTGAATTTGAACCGTTTTTCGGCGAATATTTGAAAAATTGGCAA +AACTGGTTCAAAAACAAAAATTTTTTAAACTGTACAAACTGTCCAAAAAT +TCGTCGTAAATCGACACACCCTTCTCATTTTTTCAAAATTTTAATTGTTT +TCGAATGTTTTTTTTGCAGAATAATTTGTAAAATGAGCCTTTTGTGAATT +TTTTTTAATTTCAAAGTTTTTATTATTTTTTCTCAAACCAGCACCTCTGT +TCTCGTCCAACTATGATCATCATCGTCGAATAACCGTTTCTCGTGATTTG +TCACATTATCCTTGAGCACAATACATCCACCAGGTTTCAGTCCTTTCTGA +AAATGAAAATTAATTTTAAAAAAATTGAATTATTTTAAATGAAACAGTTT +TCAGAGATTTCTCAACTTTTGAGTCCACCACCAGGCCTGCACGTTTTTCG +GGTTTTATCTTTTAAAAAACTGAAAAATCGAAAAATTTCAATTTCTGTTT +TGTGGTCAAAATTGTAATTACAGGTAAGCAAATAGTTTAATTTTAAAATT +GAAAATTAGGGAAATGACCGGACATAAGTTTAAAAACCCGATTTTTTCAA +TAAAAAGGAAAATTGAAAATTTAATAAAACAGGTTGTAAATCAAGGAGAT +CGTATTGATTGAAAAAAAATCCGAATGTTCCGGATTTTTCAGTGGTTTTT +TTTGAAAGAAAATCGAAAAAGTAAATGTTTTTAATTTTTAAATTTAAATT +TTTAATCGGAAAAAATGTACGAAATTGACTTTTTAATGTGAAAAATTGTT +GTTTTAAAAAAAAATTTTAACCGATACAGATTTTCTAGACTCAGTTTTTT +CGGTTGAATATTGTTTTTTACTATTTTTTCATTACAGAAAGAATCCAATT +TTATTTCGCTTAAAAAATAACCGGAGCATCGAAAATATTTTTTTTTCTGT +TTTACTCAAAGCATTTCAATTACCTAGAATTTTGTTTAAAAACTACATGC +TTTATTTATGAACGTAATAAATAAGACCCCCTCTTATTTATAAACTTTCA +ACATATTTTCAGTTTTCAGTGCTATCTAGTGCTTACCGCACATCTTTTAA +AGAAATCAACCAAATCCTCATCAACCAAATGCCCTGAAACCCATTGAATC +CATATCAAATCATAACGTCGTTCGGGCGGTGCAAACGTCTGCAGTCCTTC +GACGAATTTATCTCCAATTCGTGGATGTTTTCCAATATATTGATCACTTT +TCGTGATCAACTCCTCGACGACGTCTTCCATATCAACTTTCGAGAAGAAT +GGCATTAAGAGATGCTTTGTAACACGTCCGATACCCGCTCCGCAGTCCAG +TGCATAGTCAAAGTAGCCGAATAGATTCTGGAAAATATTTATAAAATTCA +AAGTTGGCCCAGGGGTGACCGGCAATTTCAAGCAAATCGGCAAATTGTCA +ATTTTCTGAATTTGCCGAAAATTTGACAAAAACGACAATTTGCCGGTTCG +CCGAATTTACCTTTTTTAAATTTAATTTTCAATTCAGGCAAACTGACGAT +TTTCCGTTTGCCGGATATCAATTTGCAGGAATTTCTCAAAGGAATTTTTA +TTAAGACGGAAACACAGTGCTTTTTTGAATTTTTTTTCCCGTTTTCTTCA +GATATTTTTATAGAATTTACTGACTTTTCAGAATAGATGTAGGACAATTT +TGTTGTTTTAAAAATTGAAATTCTGAAATTTCCAACAAAAAAACATGTGC +AAACCCACAAGTTGGCAAAAATATTTTGCATTTGCCGTTTTTCCCGTTTG +CCGAAAAGTCTAATTTCGGTAATTGGGCCATTTTTCGAAATTTTGAGCCA +CATAAAAAACTTTGAACCATTTTTGAGAAGTATTATTACGACATTCGTTT +ATTTGAGCACAATTTGGGCCTATACTTTCAAAATCGGGGTTTGAAAACCC +CTATATGTTCGACCGAATGTTAATCTCATAAAAATTTGATGAAAATAAAA +TTTTCTACGGCTCATAAACGTATAGCCCCCGTCAGTCTCAAAATTTATAC +GATAGACACTTTTTGGCGTTTATCGCCTATATTCCGTCAAAAACCATTAT +TCATCATTCTTTCAATGTTGTTTTTTTTAAGGCTAAAAAACTTTCATGCA +AATTTGTTAGCCGTGTCGTGGTTTATACGAAAATTTCAGAATTTATAAAA +TAAAGGAAAACGAAAATGTTTCTATATACCCTATTTATGTTCTCTGATTC +CGAATACCAATGTGAAAAATTCAAAAAAAATTCCCTGATTTTATATAAAT +TTTTGTAAGCGACAAAAATTGTCGTTTGAATTTCACACTTGGTTACAAAA +ATTTATGAAAATGAGGAAAATTTGTTTTAATTTTTTCACATTGATATTCG +GAATCAGGAAAATAAATAGGGTCTATAGAAAAATTCCGAACCTTCACTCC +TTCTCTGAGTATAATAAATTTAAAATAAATACAGAAAATTTCAGTTCAGA +CCTCATTAAATTTGGGTATATTTCTAGGATCCGAGTTTTTACACCAGATT +TACAAACTTTTAGCCTTTCACCGCCTTTTTATGCGCATTTCCCATCAGTC +AACTCCAAAAAAATCGCAACTTTTGCCTCATATTTCAAGAATATTCCCCT +TTCTCTCCCCATTGAAAGTCATTTTCGAAACAAGCGGAAGATTCGTCATA +TGTGGTAATGTGTGGCGTGCGTTGGCAAACAACAAGAAAGAATCATTCTC +TGAAAACAAAAAGCGTTTTGGGTGCCAAAGTAATATTGAAAATCTGCCGT +GTTTTCTCATTTTCCATCAAAAGAAAATGAGAAAAAAGTTTCGGCGTTTT +ATTTGATTTCCGGGAAAGAAGACTCGGAAAAAGATTTAATTGAATTTTTC +ATAGCAAACCTATATTGCAACAACTTTCTAAAAGGTCAGAAATTGCCGCG +TAGCCTAGAAAATTGGAAAACTCTTCCAGCTGGTATTATTTCAGACATGG +TGCATCGAAAATTCGAAAATTACAGAAATTAACATTTTGGAGCATCTGCC +AGAAAATTGAGATTACAGTACCCCACTTCTGCCGAGAAATTCGAGGTGGA +AGAGGTCTTACAAAATTTTCGGTCACGTGAAAATGGGAAGCGTTCAGGCT +CCACACGACGGAATTCACCTAGTTTTCAGGTGAGAAGATATCGTACGAGG +AGAATTGACCTCCAAATCCTGATCGTGACTACAGGTCGTCGTTCGGAGCT +GTGGAAGAGTTTTGAAAATCTTCGACCATGAGAGAAATAGACAGGACGAC +CAAACATTTTCAGTGGAAGAGCTTTTCCTAGGCCATCAGGATGCTATTTC +GACAGAGCTGAGTTATCCTCAAGTCGTTACGAGGTGTGGAAAAGTTTTCC +AAAATCCCCGACCAGGTAGAAAATGAGCACACCGATTAAGTTTCTCCAGT +GGTAGAGTTTTTTCTAGGCCATCATTATGCTATCTAGAAAAAAGCTTCGG +CCATGGGGTTTTTAGGCCGTCTATTTATTTCTCATAACTTTCTCAGAAAT +TCGTCTATTTCTCAGAACCCCCCAATGATAAGTTTTTTGCAAAAAAAGTT +CTGCTTTGCTCATCAGCCGTAATCAGGTGACCTCATTAGGCCTACCCAAA +CACAGATTTGTCATTATTTTTCAGACAAAAAACACGAAAAAAATCTTCAC +GCATGGGGTGATAACCTGATTTTAAATCTTACTGTGCCGGCTGGCGCGGC +GAGCTTCGATCACTGAGCCGAAAGATTATGAAAACTATGGGAATGACGGC +GTAGCCTAGAAATCGTCCAGGCAGAGATTCTGTCTAATTTTCGAGCATAT +ATCTCCCAGTTTTGTTATTAATTTAAGTAAACTCAAACCTAGAAACAAGT +AAAACGGGAGGGGGGGGGGGAATATCAGAAAATTAAATCTTGCGACACTT +TTCCATTGATACTTTCAAGGTAATGCCCAGAGGTGTGCGGCAAATTTTGA +AACTTGCGCATGCCGCCTTTTTTTTTTTCTAGAAAACAGTCAGAATTTTT +TGTCGAATTTGTTGAAAATTCGCTAATATACTGTGAGTTTAGAAAAAATA +ACGAAAAAACTCGGAAAAGGAGGAAGAGATCTGAAATATGTAGATTTTTT +TAGAAAAGACCAGAAATTACTGAAAAATTGGCATTTTTCGTCGAAACCCC +AATATACTAAATTATTCGGATTTTTAGAAAATTTTCAAATTCACCATACA +GTGCATTTTTTCCTACTTCTACGACTTTAAAGGGGGGAGCATTTATGCGG +AAGGGTCTTGCCGCGCATTTAGTCATCATTTTTAGCAGTTTCTGTGTAAA +ATTCGCGTAGATCACATGAAGATCACAAAATATTTATCCCATATTTCGTA +TTTCTGTTGCTTTTTCACAAATTAATTGTGATCTACGCGTGATCTCCGCG +AATTTTGAGCAGACTTTGTTAAAAATGATGACTATGTGCACGGCAAGACC +CTTCCGCATAAATGCGCCCCCCTTTAAAGTCGTAGAAGTGGAAAAAAAAT +GCACTGTAGCAAAAAATCGAACATTTCTGTTCGATTTTTGAATTTCTCGA +AATTTTTTAAAATAATTTTTAAAATAACATTTTTATTTTATTTCGAAAAC +TACCGATTTTAGAAAAATTCTAAAATTTCGATTTTTTTTGTTGATTTTTC +GATTTTTAAAATAAAATTTCATAATTTTTTAAACCGATCTTTCTTGCTTT +TCCTGAAAAATCGATGATTTCTATACCTTTTTCTTCAGTCCTTCAATAAA +TCGTTTCGACGCCGATATGTCGGGCGCGTGAAGCGCTTCGAATCCGCCGA +GCATTCCGTTGACGTCCTGGCTCGCGCGGCTCCAGTATTCCTCCGCCTGA +AAGAGAATAGTTGAAAACATTGTTTTGAGACTTAAAAATTTTTTTTTTAG +TTTTTTTCAAAAATTCTTACATGTTATAGAGTTTTTTTTTCAAATTTTCA +GCTTTTTTCAGAAAAACTTAGTATTTTCGATAATTTTAAATAAAAAAGTT +TTTTTTCAAAAAATGTTTCGGTTTTTTTTTTAATTTTTGGTCTAAAATTC +TCCGCAAAAGATTTGCGTGCTGGCCGAACTTTTTGATTTTGTACCTTTTC +ATAAACATCTTCACCATTGTGAATTCTAGAAGATGATGAAGAGCTCATTT +TTGATGTTGTGACAGCTGCTCCGAGCAATCTGGAGACTTTTGTGACGAAA +AGACGAGAGGTCACGGATATGATGATGATACTGGAAATGAGATATTTATA +TTTACTAGTTCATCGGGAAAATTATTACGAGAAAGATAAACAGACATGTG +CGTTTTTTTAATGGAAGAGAAACACAAGAAAAATCTGGAAAACTAGGCCA +CGGCTATCAGTGTCGATTTACGGCATACGGTCTCGACACGACTATTTTTG +TTAAATGTGAAGGTATGCACCTTTAAAGAGTACTGTAGTTTGTAACTCTC +ATTGCTGCAACATATTTGACGCTCAGCGAAAACTACAGCAATTCTTCAAA +AGACTACTGTAGCCTTTGTGTTGACTTACGGGCTCGATTCTCGAAACGAA +TTTCTGCTCGAATTGTGACAGCCATATTCAATTTGGTATAGTCTTTTCGT +ATTTTTTGCCATTTTTCTGTTTTCTTCTAATATTTAATCTATTATTAAAT +TATGTCCGTAACTCCCTCCAAAATTAGAACTGCGACCGAACAGAGATTCG +TTCCGCCCCATATTCCGGCCAATCAGATCGAGTAGGCGGAGTTCGAAGTC +GCTGATTGGTTTGAAAAGTCGCGGAAATTTGCAAGTTTTAAGGTAGCGAA +AACTGATGACTATTGTAGCGCGCTTGTGTCGATTTACGGAATCTCGATTT +TCAGGAATGAATTTTTAATTACATTTTTTCGCTCAATTAATATTCTAAAT +AAATAAATAAATGATTTGAATTAATTTAATTTCATTCGAGCCCGTAGATC +GACACATGTGCTACAGTAATCATTAGTTTTCGCTACGAGATATTTTGCGC +GTAAAATATTTTCCCGTAATAACTCTACTCCGACAAACATTACGACCTCC +ATGGAGGCCTCCAGGTATAGGTGAGACTCTTGTATTTCCAATTCAGAGAC +AATGCGTCACTGGAAGAGAAAACGAAGCGGAAAAAAAAACACGGAAACCC +AAAAATAGTGTTTGCCCCGCTCTATTCTTCTCCAATAATTTCTGTGTCTA +ATTTTGAAAGACTCCACCTGTGTATGCCTTCTCGACATAAACCCCCCCCC +CCCCCCCTATCTTACATGGTACTGATAACACTTTCAGTCTTTCACACTTT +TGGCGCGCAACGCCGCTCTTTTTTCGCGGCGAGCTGATGACGTCATCAAT +TTTTCATCGCTTTTGATTATCTTCAATGTTCTAGAAGGGCACATAGGTCA +TCCTTATTTTTTCCTTCTCTTTCTCGTGACGGCCCTTGTTGCGCATGCCC +GCCCCCTAGAGCAGGGCGTGGCCTGAACGGCGGCTCCGAGAGCTACTCAT +TCTTGCCGCGTCACCCTCCAGCGCCACCCAAACTTCTTCGGTTCTAGAGA +TCGAGAAGAACGTATGATTTTTTAAAATTATAATTGTTTCTTTCGAAAAA +AAAAATTTCATTTACAGTAAGCCAAACATACACAATCAACATGAAACTCG +TAATTCTGCTATCTTTTGTTGCGACAGTTGCGGTTTTTGGTGAGTTTATG +CTTTAGATAATACTTTTCCGCCAAAAATACAGTTGCCGGTCTCGGTATGG +CAATATTTTTGTTAAATTCGAAAAGCAGTGAGTAATGTAGTTTCGAATTT +TCGTTTCTGCTTAATTTTCATCAATTCATCGTTTTTCTCACGACTTCTTC +TTTATGAAAAATCAATGAAAATTCTGACTAGGTCAGCTTAGGGGTGAGGT +ACCTAGAGACGCCACATATGCCAAACGGAAGCTGAGATCATTGGCTACAA +GAATATGCTTTCAAATTCTGCAACGGACCTCTGGGAGTCTGGAAATTCTT +GTCTGAAATTATGCTTTTGAATGCTCGAAAGTGGTAAGAATTTAGAATTT +ATTACAGAAAAACGTTTAATTAATAAAATTAGTTTTATACTTGAAACAAG +TACTGTATGCACTGTATCAAAACACATTTTCATCTTTTCTAGGTATTCAA +CTTCACGTTTTTCTGTAATAAATTCTAAATTCTTACCACTTTCGAGCATT +CAAAAGCATAATTTCAGACAAGAATTTCCAGACTCCCAGAGGTCCGTTGC +AGAATTTGAAAGCATATTCTTGTAGCCAATGATCTCAGCTTCCGTTTGGC +ATATGTGGCGTCTCTAGGTACCTCACCCCTAAGCTGACCATTCCCTAGTG +AGCAAACAAAATTTTGAAATTACAGTACTATTTAAAGGCACATTGATTTT +TTGGGTCAAGCAAAAATTTGTCGTGTCGAGACCGGCTACGGTATTTTCGC +GAAAAATCGCAAAATCTTGCGGCTGGGATATACTTGTGCGAAATACTTTT +TGCATTAATTTTGAGCAAAATTATTTTTTTTAGACTTTTTGAAATCCAAA +TTTTTTGGATTGCGAAAAAAACCTGTGTCCGGTTGTTTCATTAGGCCAAC +AAAGTTCCTGGAACACTGATGAAAACCATGATAGAGGCGGAGCATAATAT +CGATTTTTCGTACTTTCCTGTATTTCTTCTTCTATATGGCCGAGTAGAAC +AGGATTAGGGGTAAAGTCAAAATTTTTCTCATATGGATATCATATGGATA +TCAAAATTTTTCTCATATGGATATGGAGAAAATTTTTCTCATATGGACTT +TGAAAGTTGAATCACTTGACATCTGGGAAATTAGTATTCCAGGCGTAAGT +CGGATCTGTTAGAAACGGAATACTTATAGGCTTCGTGAATTAGGTAGACT +TTCAATTAATCTGATCCATGGGAGTCAGACGCGGTTTCCAGGCCTGACGC +CTGCCTCCAACTTGCCCGCCTCACGCCGGTCTCTCGCCTCATTTCTGCAC +TGTGACGAGACAGACGAAGGTCGCCTTCTGGCGCCCGCATGGAAATCCTA +CGAATATGTCAGCTTCTGATGGGACTCCGTAAATCGACACACAGGGGTAC +CTCAGACATTTCCCTCCCCCTTACAAATTGTTAGGACAAGGAGGGGGAAT +TCATCTCCACTCGAGACACACATATGTTGTCGTCAGTGAAGTGTAAAGAT +CTAAACGATTGCGTGTATGAAAAAGCACTCTATGATCACCTTTTTCATCT +TCCTACACCCTTTTTAGGTGTGGTGCCCATCGAGCACTCACGCCAGGCAG +GGAGAGCACCGGTCCCTGACTAATGGGATTCGAATGTTTTAGACCGGAAA +TAGGAGCGATGAAAGAGCATAGAAATGATCATTTGGAAATCACGTTTAAT +TAGGTTACGGCGAAAATTTGCAAAAAAGAGCAGGAAACTTGGCTCAAATC +CTTCGAAATATAACAACTAGGACTTCCATGTAGGCGTTAAAGCGCCCTGT +CTCTCACCCCAATCCGTACCTTAAGCTGAAACAAACGTGAACTTTTTTCA +TTTCTTAAAGGAGTATCGTCAATGGGAAAATTGTTTTAAAATGTAGTATT +TGTACTTCAACTTCCAATTATTGCAAAAGAAAAACGGAAAAAATCCGTTA +ACATTCAGCATTTTAAGTCGAAGAAATCTTTAAAATTTAACTAGAGAAAT +CCTAGGCCACGACGCTCATTCGAATTTTAATTTGTTTTGATATTGTATTT +TGAAAAAAAAACTTAATACAATTCCTTCTTCCCAGTTTTCTATAACTTTT +TGAGAAAAAAACGAATTAAATTCCGAAAAAACTACATTTAAATCAATATT +TTGTTTACGAATATGGCCTAGAAATCGCGTGGTGGCCTAGGATTCATTTG +CGCGCGAAATTCAAATTCCGTCACTTTCGTCGATTTCAACGGCTAAATGC +TGAATGTCAACGGATTTTTCCCGTTTTTCTTTTGCAATAATTAGAAGTTT +GAGTACAAATACTACATTTTAAAACAATTTTATTTTTGGTATTTTGACGA +AAAATTGATTTATTGGTTTTTTTGGTTGTTTGGGACCAAAAAATCCAAAA +AAAATGTTTGGCGTGTCTAGTTTCGACTCGAGACTATTCTGTATTAAAAA +TACATTAAAACATGTATTTTAACACAGTTGTGACGTCATAAATGTATTTT +GATACATTTTGCAACATTACTTAAATAACCCCATTAAAAATTAACCTAAG +CATCAAAAATTTTTTGGTTTTTTTGGTTTTTCGAAAATTTCAATTTTTTT +TGTTTTTTGGTTTTTTTTGGTTTTTCAAAAACTTCAATTTTTTGTTTTTT +GGTCCAACATTTTTTTTTGGTCTCAGCTCTGCTGCCTACCCTAGAAGAAC +TAATAGCGCTTCAAAAACTGATGAAAACGTTCAAATTTGTCGAAATATTA +CGAAAATTTGAAAAGTTGGCTCAAATCTAGATTGAATCGGCCGATTTTCC +ACAAGTTTCCAAGTTTCCACAAGTCGCCACATATCCCGAGAAAAATCGAT +TCAAATTGTTTGAAAATTGGAATACTGCGAATTTTGAACCAAATTTCCCT +GGCTTCTCTGTTGAAATACTTGAAAATACCGCGAAGCAAACAAAAAATCT +AATTATTACGTGAACACAAAATTCTGAAAATGCGTATATATTGCGCAACA +TATTTGACGCGCAAAATATCTCGTAGCGAAAACTACATTAATTCTTTAAA +TGACACGCTGTATGTGGTGATTTACGGGCTCAAAAAATTATTTTCGAAAA +TCAAGCCCGTAAATCCACACGTAGTAATTATATAAAGAATTACTGTAGTT +TTCGCTACGAGATATTTTGCGCGCCAAGTATGTTGCGCAATACGCAACCC +CATATGTTGATATATACTGATGTGAGGATAAAAAACAACACAACTTTCAG +CGGCTCCATCGGCTCCGGCAGGTCTCGAGGAGAAGCTGCGTGCTCTTCAG +GAGCAACTGTACAGTCTGGAGAAAGAGAACGGAGTTGATGTGAAGCAAAA +GGAGCAACCAGCAGCAGCCGACACATTCCTTGGATTTGTTCCACAGAAGA +GAATGGTCGCGTGGCAGCCGATGAAGCGGTCGATGATCAATGAGGATTCT +AGAGCTCCATGTAAGTTAGTGGTGGTGGCCGGAAAAGAGAAAACTCGGCC +AAGCTGCTCGGAGTTTTTGAATTTTTGATAATCCGAAATAAAAATTGATT +GCTCGAAAAGGAACAATCTTTTGGAAAAAAACGAATTTTGTCATTTTTTT +CAGCAAAAATTGATTTTCGAATTTTTCCAATAAAAAATCGATAATTTCTC +CCCGTGCAGTGGAAAACAAACAATATTTTTTTGTTGATCGTTCTCTTCCA +AACCCGGAATAGGTACACACATTCCTGCGTCATCCCATTCTCTTATCACA +CTTTTTTTTCGAAAATAAAAGTGTAGAGACGGAAAAGTGAGAAAGGAGTC +AATTTTATGCGAAATTTTGCATGATAATACACTCAAATTAAAAAAACTGC +GTGGCGTGCACTGCAGAAAACCTCATATTTAGGCCCCGCCTTTTTCTCGT +CCACTCACGGAGAAAAGGCAAAAATTTGGGGACCAACCAATATCAGGCCG +CCGACATCCTACGGGTTCCGCGCGCCGCTATGTTTAACTCGCTGTGGGTG +TGGCGAGCTGTCTCCGCCCGCTGCGAGTTAAACATAGCGGCGCGCGGAAC +CCGTAGGAAGTCGGCGGCCTGATATTGTTGGTCCCCAAATTTTTTCCTTT +TCTCCGTGAGTGGACGAGAAAAAGGCGGGGCCTAATTATGAGGTTTTCTG +CAGTACACGCCACGCAGTTTTTTTATTTTGAGTGTATAGGTCTCGATTCT +CGAAAGTATGACAGTTATTTAAATGATGAACTCGTGATGACTGTTAAATT +TTTGGAAATTTCGGGGGAATTATATCGATTTTTCGATAAATTTACAGGAA +AAAAGTCCAAAATCTAGGTATTCCATGGTAGGCAGGCGCGATTTCTTGAC +GCCTGCCTGGAATCTGTCCGCCTCACACCAAAAAATGTCAATCATTTTGC +TGAAAACCAAATTAAGAAATGAAAAAGTGCACTTAGAGATGATGACGGAG +GTCGCCTTAAGGTCAGACAGGTTAAAAAACCGATTTTAGTTGAGTTTTCC +CGAAATTTTCTGAACAACCGAATTAGAAATATGCTGCTTGTCATTTTTGA +GTAAAAATTAACGAAAACTTCGACCAAAACCACGAAAAAAATGAAGAAAA +TAAAGATTTTTCGAGAAAATAACAACAAAATCCAGCAAATAGTGAAAAAT +AGTTTTATCCGAGAAAAAGTAGTTTAGACGCTATGAACTCTCGAAAATCA +GATTTTTTCAATCTAAAAGCCATAAAATTATCGATTTTTTAAAAATTCTC +ACTGAAAACCGGCGAATTTCAGTGCTCCACGCAATCGAAGCCCGCTTGGC +CGAAGTGTTGAGAGCCGGAGAACGCCTCGGAGTCAACCCGGAGGAAGTTT +TGGCGGATCTTCGTGCTCGTAATCAATTCCAATAAATATTCTTTGCCCTA +AATACTTTAAATTATCCATCTGACAACTAAAATTTCGGTTCTTCTTGGCT +TCTTCTATTTGTGAAATGGTTTATTTTCCCCCGAACTCTCAAAAGGTTTA +AATATTGTTCGATTACCCCTTTTTATCAATTATTTTCTTCAATTTCTTAT +TTATCATTATTTTTCTAAACGAAGACGGATGTGATTTTAAATTATGTTAA +TGGACTATTTTACAAACTGAATAAATTCAGCATGTTGGCAGGTTTTTTCA +GTAGTTTTTGAGTGAAAATAGAGGTAAAAAGACAGAAAATCAATAAAAAA +TGAAAACAAAACTATGAAAAATGGTTGAAAATCGAGCAAAAATCGTTCAA +AAAAAAATAAATTCAAAAAATAATTGCGTCGAGAAACGCGTCAGTAGCCG +CTCTCTGCGTCTCTCACCCTTCAGCACGCGGAGAGAGCCACGAGAAATGC +GCAAAGGCTAAATTCGGCGCGGAAAATCATTTTTCAAAATAAATTCGACG +AGAAAATCAATACTTAAGTAATTATCGATTTTCAGCTCGTTCAAAAAATT +TTCAGAAACGTTTTAGTCGTTTAAAGGTTTTTTTAAAATTAAAATCGTCG +GAAGTAAAAAAATAGCGCGGATGGAAATCTACGGAGTGCGGAGCGAACAA +ACGCGCGGTAATTCAAATGGGTAGAATAGTCAAAATTGAAAATTAGCCAG +CATCGACCGATTTTTTTAAAACTTAATGGATTTTTTCGTTTTTCTTTTGT +GGTATTTCGGCATTTAGGATTAGATAGCACATTTTAAAGTAAAATTCCCA +TCCAAGCTACTCCACCTTCTCCAGACTGTACAGTTAAACCAATTTGAAAA +GTGTATTGTATCCCGTTTTTTTTTCTGAACAATTTTGAAAATTTTTCGTT +TATCCAGGATACGATAATCATGATTCAAATTCGTTAACAAAAAATGAATA +TATGAGAGCGATTAAAGCATTTGTGTCGGAAAATATGGGTTAAATGGGGA +GAAGGGGGCGGACATTTGGATGGGGTACAAAAAAATATGCAAAAAATGGG +CTAAAAACAATATTTTCAAATTATGCCCGACAAAGGTTCAAAAGTCAATA +TATAGAAATGAGAACATGAGTATTATGCCACGTGGCGGGAAAAATATGTG +GAATGTAATACGATGAGATCCTTGTGAATACAAAGCTTGTGACGACGTGG +CCGAGAAGAACTTTTTAAGCCAACGAGAAAAAAGGGGTTCAAGGCCGAAA +TTTTTTTTGGGCCACCTATTAAGTTAAATTGAAAATTTAAAAAAAACACA +GCGGATCCAATTATTTGCCGAGTTTTGACTTGAGCTCGGCGCGATACGTG +TCGATTGACTGAAAATATTGTTTTTTTTTATTTCCGAATAAAAAATGGTG +AGTACCTCCAAAATTAGCTTTTCATTGTCCATATAGAACTTTTTGATTTG +TTCCACAGTTTTTGTGGCCATCAACTCGGCGATCAACTCGAAATTGTCCT +TGTACCAGTGGAAACCTGAAGGAATTTCGGATGTTTTTGCTTAATCATAA +TCATAATAATCTTAATCATAAGACTTGGAAAATGCGAAATTTTTCGAGAA +TATTCAATTTATCTTCAGATTTTATTGCAACAAATCGATTTTCAACATAA +AATTAATTTTTCCAACTTTTTTTCCCAATTTATGAGAGTTTAAAGATTGT +TTTAAAGCAAACCGCCAACTTTACATAAAAAATTAAAATATTGTGAAAAA +AATGATGAAATTTAGCAGATTTTCTGATAAAAAATTGAATTTTTTTGGAT +TCGCGCTTCAATTTCACATTGTTCTTTTAGAAAAGTCGAAATTTTATATT +TCCAATTTTCAGATTTAAAAAAATTTAAAAAGGAATGAACTTTTCCAAAG +AAAAACTGAATATAACCAGAAATTGTGATTTTTCAGCATTTTTTTTTAGG +TTTGAATTTTTTTTTCATGATTAATCACGTGAAAAGTCAATTTTACCGCA +AAACATTTAAAAAATCAAGATTTTTCAATTTTCTCTGAATTCCTGCAGAT +TTTTCGATGAAAAATTGAATTTTCCTTGGAATTTATATTTTTCGGGTATT +TAAAGTTTCGGATATTAAAAAAAATTTTCAATTTTCTCTGAAGTTATCGA +TAAAAATTATTTTCTGCAAAAAATCTACTTTTTTTCGTTGAATATTCCGG +AAAAAAAATCAGAATTTCAAGGCACATTTCCTTTTCTAATCTAATTCGAA +TAATTCAATATTCTTTTAAAAATTCGGGGTAGAAAAGGAATTGTACCAAT +TTTTATTTTTAAAAGTTAATTTTTCTAATTTTCAAAATTTTCTTGAATTT +TCGAATTACAGATTTTCAAAAAAATTTTTTTTGTTTTTTTTTCTCGAAAA +TTTGAAATCCATACATCTAATAGCATTCTTCTTTTCCTCAGGACTCCAAC +CATAATTTATCCTGACTTTTCCAGATCGATTGCCATTTGTTGCAGTAGTA +TCTAGTTCAGGAGTAAATCTCTCGAATCTTCCCTTCAACGCCATCATATC +TTTCTTCCAATTTGCAATTTCTCCTTTTGGTACACGGCTGTATGTCATTG +TTGCACGGAACATTTGTTGACGGGCTTCTTCATTCAGAATTCTGGAAAAA +TTGATGTTGTGCGATTTTTTTTGGTTAAAAAAAACAATTTTCGTAAGTTT +AATTAACTAATATTTTAAAAAATCTCTCATTTTCTGAGGCACCACGGATT +CAAGATCTGGTGGGATTCCGGATCTGGCACCGTGCCAACGCATTAAATGC +AATTTTTCTGAAAAAAGGGCAACGAAGATCCGATTTAAAAAAATTTTTCA +ATTATTTTTCAAAATTTTCACTAACTATAAGAAATTAGAGATTTTTCACA +AAAATTCCAGTTTTCTGTTAGAATTTGAAAAAAAAATTGAATTTTTCCTA +AAAAATTTGTAATTTTCCGATATTTCAAGCTGTCAAAACCTAAAATCTGA +AAACTGAATTTTTAAAGGAAAAATTTTGAGCATTCTTATCAAAAAATTGT +TTCAACTTTTTCTCAAAATGTTTCAACCTTTTTCTTTCTAAATTCTGAAA +AGCATATCTCAGCTTTTGCTAAACTATTTTTTTCCTCAATTTTTGAGAAA +ATTAAAATATAATATATAATATAGTAAATATTGCTTATTTTCTAATAATT +TTTGGTATTTCTATTCTTTCGTTTTTTTTTTCAAAAATTCCAAATAGTTT +TAAATGTTCATATTATTTTTTTTGACGAAAATAAATTTTAATTTTAAACC +GGAAAATTGTTTCGTAACTTTTTTTTTCAAAAAATTTGAATTTTCGACAT +GAAAGATGTAAAGTGTAATTTAAAAATAATAGTGCAGGTATTTTCAGTTT +ACAGCAAAAGTCAGTTTAAAAAATTTCGACTGGTTTTCAAAATGAGTTTC +CTTATTTTTTACACGTAGAACTTTTTTTATTTTCCGATTTTTTTTGTTGC +GCAGAAATTTTTTTTCCGCAAAATCAGGAAAAATTCAGAAAAAGACAGTC +AAAAAATTGTAGATACAATTTTTTGACTGTCTTTTTCTGAATTTTTCCTG +ATTTTGCGGAAAAAAAAATTTATTTTTTCATGAATAAAAATCGAATACCC +ATCCAATTCCACAAACTTACTCGTTCTCCTCCATACATTTCGTTTGTTTA +ACTCTCCAAACAAGTGGAACACACATATGATGTTTTCTCTTGATATTATC +AATTAATGCCAGTGCAGCCGGTGTATCGAAGCACCGTGTCATTCTGCACG +TATTCTCATCGATTGGATCAGCTTCAATCGATTGCTCCACAATGTAGGGG +CCTGATGGTTTACGGAGAAGGCAGTCGTCTGGAGAAAAATAGAATAGAAT +AATGATTTTTAGGTTATTTTACGTTTAAAAATCTAATTTTTAAGACGCGT +AAACGTTGAGCTCATTTATAAAAATTCGGCAAACCGGCAATTTGCCGAAA +AATTTCGGAAAATTGTCGGTTTGCACATTTTTTCTTGAAATTTCAGAACT +TCGATTTCAAACGGCAAAATTGTATACATCCTATCAAAACATCAATCTTG +AAAAGCCAGTAAACTCTATGAAAATGTCTAAAGAAAAGAAAACGGTAAAA +AAATACAGTTTTAAATGTTTCCGTCTTATTAATAACAAAATTCGACAATT +TGCCGGAATTGAAATTTTTTTTTCTCCAATTTCCGAAAAAAACCCACCGA +CCACCATAATATCATCGTCTTCTTCTTTTTCTTTTCCAATTCCAAGCCGT +TTGATCGCTTTTCCGTTGGCTGGCTCCATGAGCTCAAGATATCCGTATAC +ATAAATTTTCATGTCTGAAAGAAAATTCAAATTTCTTCTGGAATCAGTTA +TTCGAAACTAACATTCTGGACATAAAACTCGTTGCCGTCGTTTTGTCAGT +GCACGGAGGCTTGCCGGACGTGGAACACGCATCAAACGGAAATAAAGGAT +ACACGGTTTACATTCGTGACGCGACATTACACGATTTAGCTTAAAATTGT +GAAATTAATTTTTTTTAATAGCTCTTTATTTTTTTGAAAATTTCTCCCAT +GCTTTTTCCATTTTTTCAACGAGTTTCCTTATTTTTTGTCCATTTACTGT +AAGTTTTTTTTGAGAATTTTTTTTTGTTAATTTAACATTTTATTAGCTCA +AAACATTTATTAGCAAAAATTTTATTAGCAAAAAAATTTTTTAATTTTTT +TAAATTAGCTCAAAATTCTCGAAATTTTAAATTTTTAGGGTAAACAATAT +AAAACTTAGGGAGTTTTGAGCTATAAAATGATAAATTGATTTTAAAAAGG +ATGAAAAACTTATTTTAAAAAACCGACAAAAATCGACAAAAATGAAGGGA +ACAGGCAGCAGCTTAGCCCCATGCTTAGCCAGCAGCCCCGTAGCAACCCA +GTATCAATAATATCCCGTGCCAATTTTCATAAAACTGAATATAAATTGGG +TTGATGTTGCTAAAGGGCTGCGAAAAACTGACCTGGGATGAAGCTGGGCT +GCAAGGGGCTGCGAAGTGCTGCGAGGGCAAAGCGCTACAGTGCTAAAAGG +GGGCTGAGCCCAGACCCTCAGGAAAAAACTCATACTCGCAGCCCTTCGCA +GCCCACATTTGCGCTCTGATCGCGTGCTATCCGCGCGCACAGAATTTCGA +AAGTATTTTCCAAATTCGGAATGCGCGCGGAGCAGACGCAATTAGAGCGC +GGATCTGGCACGTAAGGAAGAAGTGTGACTGGAGCACGAACCAGTAATCT +AGTCGCGCCCCGTCCGCGCTCCAGGAGGAGCGATTTGCCGAGCAGTTCAG +CCCTTCGCAGCCCTTTAGCAACAACCAAATTTATACAGTTTTATGAAAAT +TGGAACGGGATATTATTGATACGCCTAAGCAGCCCTATTAAATAGTGATG +AGGGCGTAAATGAAATTCGCCATTTCCAGCTAAAATATAAATTTTTTGAA +TTTTTTAACATTGATATTCGGAATGGATTCAGCAGAAAATTTGAAGTCAT +TTGAAAATATTTTCCAGATTTCGGTACTCCACTTTTAAAATTGAATAAAA +CTGTAGTCTTTATTCAATGTTTCTTCAAAATTTAAAAAGTAGAATATAAC +TGTGAGAAAATTTCCAAAATTGTCAAAATTTCAAATAGCTGAAATATTTC +ACGGCCCGGCGGGGGGTACATGGATGAGAATTCTCTACCGTATTCCAATT +TGGCTGACTGCGTGCTCAACGTTGAATACTCAGTGTAAACTTTCGTACAC +CGTTGCGTACTGCACAGCGCGCATTTTAATTGACGACATTTAGCAAAAAT +TGAACATAAGATTTTTCGGAATTATGAAGCTCAATTTTCACAAAAATAAT +GAGTTTTTTGTAGAATTTATGAAAAAACGTGAATATATAGATTTTTTGTT +CATGATATTCAAGAAAAAGCGATTTTTAGTTCTTCACAGAGGAATCCTCT +CGCATTTCACTTGCTCATGATGTTTTTTGCTCCACTTTAGGACGATAAAA +ATGCGAATTGTTGATAAAATGAATGAATAATATAAAAAGTGCAAATATGA +CTTCAGCAAGTGTTAAATCCCAAATTTTTCCTGCGATTTTCTGCTAGATT +CCTGGTTTTGAGTAAACAGTCTGATATATTCATGATTATAATGATAACAA +TAACGAACATAATAATAAAAATGGAGAGCACAGAGAAACAACAAATTGCA +AAAACAGCAACTGATATCAGAATTAACGACGACCACGGAAACCGCCTCGG +TCTCCACCTCGCCCACCACGGAAGCCACCACCTCTGTCGCGTCCTCTGAA +TCCTCCTCGATCTCCACCGAATCCACCTCTAAATCCTCCATCGCGGTCTT +CTGATCTACCACGGAAGCCTCCACCTCCACCAGGATCTGTTGAAAGTCCT +CTGAAGCCTCCTCGATCGCCACCTCCACGGAAGCCACCACGATCCGCGGA +TTTTCCTCTATAGCCTTCGAGGCTTCAGTTGTACCCCATTCTTCGTTGGC +ACGCTTCAGATCTCTACAAAAAAAACAAATTAGAAGCATTCAATTATCGA +AATGTGTACCTATCCCGATTTATCGCAATCTGTCTATTCTTCTCCTTCTG +ATTCTCAACTTCTTTAACTTGTCCAGTAGCGGCAGCTTGCTTACGAGCAG +CATTTTCCCGAATCGCCTTCACCTCTGCCTCCTCAGCATCCTGTTGCTCC +TTGACAATCGTAAGTCTTCGAATGACACGTTGCTCACTCTCCTGCTCACG +ACGCTTTTTCATCTGCTTCTTCTTGTTTATAGTCACCGCATTATGCTTGT +GATAGAGAACCTCTCCCTCATCGATTTCTTCTTCAATTTTGACGAGTTCC +AGGGTCAGTCGGGTCCGATCTCACGAAGACGGACGTTGCTATTCTGGCCA +ATTCCGCAGTCACGTCCTTCATAAATGTCTTGTGGAAGTTCTTCTTGCTG +AGGGGGCTGCTGAAACCAATGTCGGCATGATGAGAGTTCCGGTCTTCTGA +ATCCATTTCCTGCGTGGGCTGTGGCGACGAGCTGCACGTCTGAAAATCAA +GTTTTTGTAATTTTTGGGCGCATGATATGGAGCTGAATCATTCGATTTTA +GAATCAGCATGCTTTTATTCATATTTTAGGATCTTTTTAAAAAATCTGGA +CCAACAGTTTTCGAAAAAATTTAATTTTTGTTCAGAAATGTGAATATTCA +CTAAATCGAAAAAAATAATTGCAAAATCCGTCAGCTGAACATTCAAAACT +TATCAATTTGAAATCAGCATATTTCAGTGTATAATTAAAAAAGTTTCAAA +AATTCTGAGACCAATTTTTATTGAGAAAAATAATTTTTCGCTCGAATTAT +TGAATTTTCACTAAATGCAAAAAACAGTAAACTTGGGCCCATGCTACAAG +CCTGAATCTTTCAAATTAAGAACCAGCATGATTTTTTCAATATTCTAGGA +CGTTTAAAAAAAATCTGGACCAACAGTTTTTGAGGAACGTAATTTTTTAT +ACAAAAATGTTCTGATTTTTCACTAAACTCAAAAAAATAGTCAAGTTGGG +CCCATGCTGTACACCTAAATCATTAAAATTCAGAACCGCCATGTATTTTT +TCTTACCAAAGGCTCTTTAAAAAAAATCTGGACCAACAGTTTTTGAGATA +TTTAGAAAAACAACTCACTTTTCGACGTTTTTCGCCTTTTCGTGGCTCAC +CCGGTTGATTTTTGCGGCGATTTGTGGTCTTTCGCTGAAAATATTATTTT +TATTTCAATTATTAACGAAGAAAACAAGAAAAAACGACGAGAAAACATCA +AAAAAACGCGAAAAAACATCGAAAAACCACCGCAACCTCATGAACAAAAA +AAAAGCATTGCAGCCGCGGGACTAGTTTTCGCAACTTTCTAGGCCATGTC +CCGTTCGCCGTGCCGTGTATTTGTTTAATTCCCTTTTTGGAAAAAGTCAA +CATATTTTTCTAACAAATCGTTTTTCTATTAATTTTTTTCTAAAACTCAC +AATCAACAGATCACTTTTTGCATTGCAATTCTCACAATATCCCGACGGAA +CCCTCTCCAAATGATTGACCTCTTTGAATAGTTCATCATAAGTGTCGGTT +TCATTCAAATGCACATTAATCATTGTTTTATAGTTTTGCACTTTTTTCGT +GTTGTAATAGTATTGGATAATGGAAGAAAGCGAGCGTTGGGGCATCTGCA +AAAAATAATGAAATTTATTTTCTTTTTATGATTAAATTAAATTTTCAAAA +ATTCCCTTTTTTTGACATATGCACTTACAGCCGCATGAATCTTCTTGAAC +CGTTTTCCGAAATGAAAGAAGCAAGTGGAGAAAAGACTAATTTCTTCTGC +CGTCCAATCATCATGAATTTCTTTTCTTCTCATCGCTTGAACCATCGCAG +CGTCGAAATCATTTGACTGTTTGTTCAGAATGAACAGAGCCTGTAAAAGC +AGTTAGTTTTTTTTTCAAATTCAAAGTACATTTCCGAAAAATAAAAAAAA +GGCTTGATTTTTTAAAATCTCGAATTTTTATTATGGTCAATTGTTATTTT +TTCCAGAGAAAAACTCATTTTCTCCCAATTTTCAGACGTTTCTCTCTAAA +TTTGGTGTTTTTCCAATCGTACCCTATCTATAGGTAATTGATATCGTCCA +GTAGCTTCTGAAATGTATTCTGTAAGCCGATTCTCGTTCATTTCGTCTGG +AAACGCCCAAATTTGTTGATCTCTGCACGGTTCTTTTTCCAATTGCTCTG +CAGTTGGCTGTATAATCGCCTGATATTCGGTTCCCACGTGGATTAGATTG +TCGACGTTGGAAAGTGGATTTGCTGGAAGAAATTGGGAATTTTTCAAGGT +TTTAAGTGGATTTTCAAGCTATTTATAAAAGCATGAAAAAGCTCAGAAAT +GACTATAAAACCTTTTTTTACGTCGTATTTTTTTCAATGAAATTACCTAC +TTTTAATTAATTGTTCGGCTTAAAACCAGAAAATTGTTTCATATCGATTT +TCCCGGTGAAAATCGAAGGAATCGTCGCATTCTCAAAGTTTTTTCACCGA +TTTGTTTCAATTTTAGCACAACTAAATGGAAAAATCACAAAAATTCCATT +ACAGCCGATTTTCGTGAATTTTCCTACATTTCGAACTAAAAATTGTCCTT +TCTTCTGTTTAAACCGGAAATTCTCTTTTGAAAAACCAATGAAAATTTGA +ATTTTCTGGGCTTTTCTTCGGAAAATTATTCTCGAAATTTATCAATCGAT +CCTTGGGCTTTTTTTGTTCCGCAGAGGCTGGCGGAGTTTACAAGCGTACG +AAGTGGTTCAACTTTTATATAAAGCTTTATAAATGGGACATAGATGAATA +TTTCGAATGCTAAATGCAAAAAGAATCAGTAAAAAAGCGCGCAGCCCCGT +CCTTCTCTGACGAAAAACGCCGTTTAAGGATCGATTGCTAAATTTTGGCA +GTAGTTAGAAGTGTCAAAATTTCTGCCGGAGAGTCGTCAAATTTCACTGA +AACGTAACCCGGTAATTTCCACAATTAATGGTCGATTTTTCGCAAAAAGT +GGTATGTTTGTCAGGATTTATTAGAAATTGTGGCTGTCCAGATTTTAAAG +AGTATTTTTGGGCAAAAATGTCGAATTTTCTCTGAAAAAGTTCGATTTTT +ATCGAAAATTCAGATTTTTTAGATAGTTTTCATCGATTTTCCCAGTTTTC +AGCCTGAGAACTTTACTAACAGAAAGATGTGTCATGAGCACCACTTTCAT +GATGCTCACGAGCTTCAGCTTCTTCATCTTCGTCCTCTTCATCCTCCAAA +TCTTCATCCTCATCGCCCATTGATTCCCCAGACGTTGTTTCGCGTTTTCT +CATGGATCTTATAGGACGAGCCATCTGAAGTTTCAATTTTAGCTTTTAAA +TTCAATTTTACCGCTTAAAAATCGATAATTCTCCCGTACTCTGCTGGTTT +CTTCTTCTTGTTCCGCCTGCTCCTCTGGATCATCTTCCTCCATTGGCTCC +GGCGATGCATTCAACATATTCAAGCCTTCGTCTGAAATATCTGGCCAATT +TATAGAAAAACCGACAAAATAATAAGCCTCACTTTCTTTTCGAGAGGCGT +CTTCGTCAGATGACGTGTACGAATCCATTTTCTGGAATTTGAGGATTTTT +GAATGTTTTTAAACAAACTTTATAGAGAAAACATTCGAAACACTAGAAGT +TATGTTGAAACACGAGAAAATTTTTTAAAAATCCATGAGAAAAACAGTTT +TGAAAAATCTGTTTTTGGAGGCTCTCCGGATTTTGAGGAATCGTCACCCC +GGAGACGCAGATTCTCCGGTAATTTTTCATTCATATTTGAGTTTAAGAAC +AAAACAGTTTAAAAAAATGTTTTTAGTATTTGAATGAAACTTATAATGTA +TTTTTTCTTCCATTAAAACTTAAAAAAAACTACAAAATTATTATGAATCA +AATTTGAAACCGTGAATCAATCTCCGCGGAAGGGCGAGTCTATACTGCTG +CAAGCGCACTCTATCGCAAATGTACAATTGGCGGTTTTTCAAACAGGAAT +TAATCGGATTCTCGTAGTTTATTTTGGATTTCTTTTTTCGGGAACATATT +GGTGTTTTTGCGTTCAATATTCAAATTTAGAGGAAAACTGCTTCAAATAT +TTAGGTAAACTCTTGAAACCGCTGAAAATAGGCAAAAATAATTATTTTTG +TATTTTTTAGGCTACTTTCTATACTTTTGCGTAAATACTATAGTTTTTCT +ATAAAACACCCATTAAAATTATTTTTATAAAATGATTTTTCCAATAAAAA +TAAAATGCGCAAAATGATTCTTTTCCAGAATCCTATATGCGCCTTTAAAA +TCTCTCGGATTACTGTAGTTTCAAAGAAATTATCCTTTATATTTTTAATT +TTAAATTTTTTCCTGAATGTCAAATATTAGGGGAAAAATTATAATAATAT +GTGCTTTATTCATATGAGTGTAGAATTAGTGAAAAAGAAAAAAAACATGT +ATGGACTGTAAAATTGGAATTTTAGCGAGAAAATAAAAATAATATGCAGA +AAAAATTAAAATTTTCAGGAAAAAAGTCAGTAAAGCCATCAAAAACTACT +CGATTTTGAAGGAAATCAGCAAGAAAAATTAGAAAAAAGTATTTTTAAGT +TGGAAAACCCCTGCTTGAATTTGTACACTAAATTGGGCATAAAAGCGTAC +AAATTCGCAAAAACCGGTAAAAATCTGGGGATCGTGATGGATGGAGTGTT +TTGTGAAAAAATGCAGCGAAAAATTGAGTAGACAATTTCAAAAATGTCGA +TTTTTGAAATTTGTGACGAAAAAATTGAACAAAAACTGTTTTTTTTTGGA +ATTTTCAACAAGAAGTTTTATAAATTTTTTTGTTTAAAATTTTGAATATT +ATATGAGTTTGGTTTCACTTAACAGAACAATTCGAACAAAAGTATTCTAG +AAAGGAAATGTGCGCTCCAGCACACTATTTGCCCGTGGAGCGCACTTGTG +TGCACGAACGCTAGCGAGAATGTGTGGTAGAAAGGGAGGGAATAGGAAAT +ATTAACAAAATTGGGCAAAATATGTAAGATTCGGAGAAAGAATTGGAGAA +AAATATGTATTTCGAGCTCCGCGAGCTGATCAATCCAAAGGCTTTCTCCA +TCCTTTTTTCGAGAGGCACATTGCATTATAGTTACACACAGCACGTGTAT +AATGGAACATTGAAGCCTGGAAACGAGCCATCGCTACCATCATTACCACG +TGGATCTGAAAAAATTAAAGTTTGATGATTCGAAAATTTTCTGGAAAAGT +TATGATTGTGAGATAAATTGAATTCTTTGAAAAATCAAAATTCAAAAGCT +TGTAGAAAATTTTATATATTTTTTTAAGCGTATTTTTTCCGTATACATTT +CCAAATTTTTTTGTTACCCAATTTTAAAGATTTTCTTGAATTTTAAAATT +TCTTTCAGTAAAAACTTTTTTTCAACTTTTTGATTTTTTTTCCGCATTTT +TTAAAATTTTATTCAGAATTATTAGATTCTTTTGAATTTAACGAATTTTT +TTCGCTAAAAAATTGTTCGATTTTTCCCGAATTAAGAAAAATATTATTTG +GTTTTTGAATTATTTTCCTGATTTTTTTCGATTAATAAATTTGTAAAAAC +AATTTTTTTTCTAATTTTTGGTTTTGATGATTGTGTTTTTTTTCTGAACT +TTACAGTTTTCAAAGTTTACACCGAACTTCCACATTAAAAAATTCTGATA +CAAAAAAGTATTCACATGATTTTTAAAATTTAAATATTTTTCAAAAAAAA +TAATATTTAAACTGTGTTTTTTTCGGAATTTTTTTTCGATTTTTTCCGAG +TTTTTTTTGGAATTTTTTCCTTTCTGCTCCAAAAATATTCAAATTCAATG +TTGTGTAGAAATTTTATTCAAAAAAAGTGTTCAACTTCTGAGTCTAAACC +TTTTCCGAATCCTTAAATCCTGGCAGAGCTCTCGTGAATTCAGTTGTCAA +TTTATGTGGATAGCAAGCTGCCAGTTTAATGAAAGTTTTAGTTCCTTTGT +CAAGTACTCGATTAATTTTCGAATAATCATAATCATCGACTCGAACACCA +TATAATCCTTGAGTATAGTTCCAAATTGCTTCACGGAATGCAGCAGTGTC +AATTTCATTCTGATTCACGGCGGCTGGTGGTTCTCCGTCTCCAGATGCAT +GGGATGAGCCGGATGGCCTGAAAAATTAATTTTTTGGAATTATTATATTT +TTCTGTTTTTGAAATTTCATGCATCTCGAATATTTTAACAAAATTACCAA +ATTCAACTAGATTTCTTACAACTTTCACTGTGTCGATTTACGGGTTCGTT +ATACGAATTGAATTTGTTTATCGATAGAATATTAAAATTTAGCTAAAATT +GAGAAGAATATAAGAAGAAATTAATTTTTTTAATTTCAAAAATCGAGCCA +GTAAATCGACACGAGCGATCGACACAGTAGTCATTTAAAGACCAGTTTCC +GCCACGAAATATTTCGCGCTTCAAACATGTTGCGTAGTACGTATTCTCAA +AATTGTGCGTTCACGTATAATATTTATGCGAATTTTTGGTCTACTTTGTT +AGAGAAATCATCACTAACATATTGCCAGTAAGAGTCCGAATATGATCGAA +CATTCGATCAAGCCGTGACGTCAGTGTATCCGTATACTCATTCATCGTAT +TATAAACATGATCCCATCCAAATTCTTCAACTCGGAATGGCGGAATATCC +TTTTCAGGTCGCTTTCTAAAATCAATATATCCAAATGTTCGATGATGCGA +GTAAATTGGATAATTACACGGCGGCTCTTTTTCCATAATATCTTCTCCAT +TTTCATCGATATTTGCAAGAAGAAGTACAGGCGAGTAGTTTTTCCGATTG +GAACTATATGTTGCTGCAGGAGCACTAATTAATGATTCAATAGTTTCAGT +AGTCATTGCACACATCTTCGCTGGTGGCCTAGTTTGTCCTTTTTCCGTCT +TTTTCAGCTCACTGATCAAATATTCGACTTCAGTTGGCCGACGATCTGGG +ACTTTTCGGAAATAGGCCGACATTCTCGCCTCCCAATAGTCGAGATCATC +GATATTAAGGAAATCAATCTCATCTTGTGTCAAATCAACACGACGTTCCA +ATCCAATACAGCATATAACTGTGCACATTGCGTGAGTCATTGACATTATT +CCGACGGCGTGGTGGAGAGAGCAAACCGAGAAAAACGCAGGACCACCGTC +TGGCGTGCGGCGAGCGAAGAGCACCTGGAAATTTTCAAATTCTTGAGAAA +AACCTAACATCGTTGTTATACGTTCGTTCTCTTGGCATTGGAGTTGGCAG +AATTTGTTTTGAAAAAACGTTGTTTTTTTTTTGAAAGAACATTTTTTTAT +TACGGGACCATGAGATCATGAGAATTCCTATTTACTGGCGCGAAAATATT +GGCAGGCCACGGCAACGAGAGAGCATATGGCAAAGAGAGACGCATCTTAT +TTTGTCTTGTAATTTTTTTTTAAAATAATTTACAATCCCTTTTCAACTAT +CGTGATTGTAAAATATTACAAATTTCAGAATTTCGCTACCAAATTATTAC +TGGAAAACTAAACTCTGAGAATGCGCATTGAGCAACATATTTGACGCGCA +AAGCATCTCGTAGCGAAAACTACAGTTATTCTTTAAATGACTACTGTAGC +GCTTGTGTCGATTTACGGGTTCGGTTTTTGAAATAATTTTCTTTTCGAGA +AGTGACAGTGATATTCCATTTTCCTTCTTTTCTTCCTATTATTTTATCAT +TATTTGCTTAATTTTAATATTCAATTCATAACTAAATTACTTTAATTCAT +TTCGAGTAGACATTCAAAGAATTCCGGTAGTTTTCGCTTCGAGATATTTT +GCGCGTGAAATATGTTGTGAAATACGCATTCTTAGAATATGGTGTTCCCG +TAATATTCAGAAAAGAAAAGATTTCCAAGAACTTTCTGAAGATTTCAATA +TTTGCAAAATCAGAAACCAGTTCTGAATATTCTTTATTTTTAGAAATTTT +TCAAGGTTTTCTAAATAACTTTTCTAAATAACCTACCGTATTTCTTCTAT +TAATATGGCTGCAATACTATTTTTCGATGGTCTTCCCGCTTGCAATACTA +TTAGGGAGTGCAAGTCTAATAGGGAGTGCCATACTATTCTTCAGAAAATT +TTTCTGTGTTGGGGCTTACTAGATTCTACTTGAAAAAACTCCAATTTTAT +TTGGAAGTATAGAAAATTTGATTGAAATTGCAACAAAAAGGTACAATAAC +TTCAATCTCTAAAAATTTTGTTATAAACTGTTGCAAAATAGGCAAAAAAT +GTTATTAAAATTTTAAAATTAGTAAGGAGTGTTTGCAACAAAAAAAAGTA +GGTGCAAGACTATTAGGGAGTGCAACACTAATAGGGAGTGCAATACTAAT +TTTCGGAAGGTCTCCGAGGGGCAATACTAATAGGGAGTGCAAATCTAATA +GGGAGGCCATATTAATAGAAGATATACGGTATATATAGCTTTGAAAAATC +GGAAAATGCCTAATTTTTACTTTTTGAGGTTTGAAAATCTCTAAAAATTC +AATAAAATTTCAAATTACCGCTAGATTTTTCCAATGAATCATCCATGGTC +TATGACAGAGCATTCGATTCAAATAATCCAATTTTCGAAATTTCATGTAT +GACCAATCAATGCCCAACAACCACATTTGTTGTCCACCCTTTTCCAGAAA +TTTGCGACGATGATGATCCATAAGTGATAGGCATCTGTGACGTGATGCAG +CCATTAGTGCAAGATAATGACGAGCCGAAGCTGGTAGATCACTTATATCA +ACGAACATATGGCCATAACTTCCTGTCATATGAACATGTAGAGTTGGGTG +TTTACATGTGAAACGGAATAATCTGGAAACGTGAGGGAAATTAGTTCGAG +ACGGGGAGGGGCAGGTTGGCGGTGCCAACCGACAGCCGAACATTGGGGTT +TCTCAGCTGGTAGCGCCAGCCGACAGTCTACTGCAGTACTGCAGATAAAT +TTTCGTCGGCTGTCGGCTGGTGAAAATTTTCATGAAAATCAATAATTTTA +AAGAAATTGTTGCAAATTTTTCCCAAACTTGACCAAATTTGTTGGCTGGC +TGTACCAGCCGACACCCGAAATTTAGAACATTGATTAGAGGCTGCTTGGC +AGAAATAATTTTAAATTCAGAAATTCAATTCGTTTTCAAAAAATATTTTT +TAAAACTTTACCGATCAACTTCTGGAATCGGATCAAAATTGAGCCAATCC +ATGGCTTTTCGTCTTTTAGTTGTAGTGTGCATTGTGTAGATCTTTTTATA +TTGCTGCGAGGTGAGTAAATGAAGAATTTTCGCGACCCGTTTCTGAAAAA +ACTCAGTTTTCTAAGGAAATTTTGAAAATAAATTCGAGAAAAAGAAACTG +AGTCAGCAAAAGAAAATTGGAAATGTCTGTCTGGAAATATTCGAATATTA +TATTCAAAAGTTTTCAAAAAAACAACGAAATTACAAGCAATTGTGATCAG +AAACCGCGGAAGGAACTGGACGAAAAAAATTATCTTTGAGACGAATCTCT +TTGCATCTTTGTGATCTAAAAGATTAATAAAGGTTGTCATCACATTTTTC +GAGATTTGGGAATGTGATAAGGGTGAAAAATGGAGATTAATTGTGGTAAA +ATGAGGAAAAACCTAATTTTTGGTGAGAAAATTGTGGAAAAACTATAAAA +GAATCTTTATGGAGTTTAAAACTCAAGTTTTTCACGCTTTTCCGCACTGT +GCGGAACGTTTTTTGAGAGAATTTGGCCGAATTCGGTGATTAAAAAAATA +ATTTCAAAACTTTGCGCCTCAATTGTGATGTATTACCGTACTCTGTTGCC +ATTCCACCAAAATTTCCTTCATTGTTTTGCCATTTTTCTGCATAATAACT +GTTCTGGGTTTTTTTGCTTCATGTGCCCAAATGTACGAATTTCCCTAAAA +ATTATACCTATTTTTTCAAAATTTTTAATCGCTAGAATTTTTTTTTCTGC +ATTTTCTTTAAAAAAAGAGATTTCTCGCAAGTAGAAGGAGAAAAAATGTG +TGGCTATACTTCTTCTTAAAGAATGCACGACTAGCCATAGCTCAAGCCCC +CTCTGGAACGTTCCATCTTCCTCCCATTTTCCCACGTTCAAGAATCATCA +GCTTCTTCTCCCTCAGCTTCTCTTCTTCTAAAACCACAACTAGACAAATG +TTCTTGTTTTCCACCCTATTTTTCACATAAAACCGCCGAGAAACCCGCTA +TCACAGACTCAATGCGCACCGGAGGGGCTCTTTGTGTGTGTGTACTGATC +TCTGCGTTATATTCGAACACCGGCGCACACTCGGATTGAACCAGAGGGGG +GGGGGGAGGGGGGGGGGGGGGTGAAAAAAGAGAAATACTCTGAAATTCCA +TAAAATCTAGAAGAAGAAAGAAAACAAAGGAAAAATTGGACATTCCGAAG +TCAGGCTAAAAAATCTCATAAAACAAAATCTATTCGATTTGTGACCATTT +TCATCTATCTCTCTCAAAACCCGAATAAACAAAGCCTCCCGTCCCCAAAG +TGTGCTCTCATGCTCTTCTGGAGCCTTCTAGACTGTCTGTAGAGCCTAGA +GACAGCGGAATTGCACTGAAGTGATGGAGAGACGTAGAGAAAACGCCTGA +AGAAAAAAACGAACACTTTGGTGGAGGAGGAGATGGCTTCCCTCCAAATA +AACAACAATTTCTATCGTTTCTCTGTGATTGTGTTCTCTTCTATGTATAC +TGTTACGATATTGAACAGGAAATTAAATTGAGCACTCTGAATACATAATA +CACAATAAATAAATACAAAAACTATAGTTTCAGCACAAAAAATTCGAAAA +AAAAACGATTTTTTTTGTCCGAGAGGAGTATATGGCCTAGAAAAAGAAAA +CTCGGCCACTCTGATGCAATAAATTTAAAAAATTATGGCCGAATTTTAGA +TTTCTCAGGCCAATTTGATACGTTTCTCGAAAAGCCATAAATTAGTCGGT +TTTTCACGGGCTTCTTGCCTTCCTCATTGCATTTTTCGCGCTCCATTGGC +AATCTCCTGCTGGACAACGCGTGGGAAATCGTGTGCCCCACACGGGCAAA +TACATTTTGTTTTACAAAGAAAACCGTGCCGCGACGCGACACGCAACGAG +CCGTAAATCTACCCCAGATATGGCCGAGCTCAAATGGCCTAACCTGTCAA +AATCTTCCACTTCAAAATATGAGGGAAGCCAGAAGCGCGTGTTGTTTCTG +AAAAAAAAACCCGCCTAAAGTTGATTTAAATTATCGTTTTTTTGGAAATA +ATAAAATCGATGAATTTGTAGATTTTGATAAATTTCCGATAAAAAAAAAA +TTTTAAAAGAGGAAAAAAAATGTTTCTTCGCCCTTTAGTACCAAAAATAC +GCCCAACTAACCAAATCGTTCTTTCAATCTTTTTTAAATGTTTGTGCGTC +TATAATTGTCGCTTCAGAAAACTACACAAAACACACACACACACAAGGAG +AAGAAAAGAAAAAACGTGTTCCATGACCTGCCACTGGGATCGATCTGTAA +AAGAATTGGGGAAAATTGAGGTAAACTGGTTTTTTATCGGGAAGATTTTT +TCGGAAGGATTGAGATGAAAGTTCGAAAGGTAATTGGCAAAGTTGAAAAT +TGAAAAATTCGAAAAAAATCTCAATTCTCTGCTGTAACCCCCAATTTTGC +GTCATGGCCTAGAGTATGCAGCGTGGCCTAGAAATTCCTAACGTGGCCTA +AAAGATCACGGCGGTACCTATGATTTTCTAGCGTGACCTAGAATATACCA +GACCTAGAATTTGATAGCGTAGAATTTCCCAGTATATCCTAGCAGTCTTA +AGTGACAGTTTCTCAGTACGTCCAAGAATTCGTCAGCATGACCTAGGATG +TTAAAGCGTGGCCTACAAATTTTCAGAGTCTTCTAGGATATTCCAGTCTA +AAAATTTTCAGTGAGGCCTGAAATCATCGCGTGTCCTAGAATGTCTAATA +ATTGCAAAAAAAAGATTTGAAAACTAGTATTTACCCTAAAATTGCATTTT +GAGCATTATTTTTAATCTAGTTTTAAGGAAAAAATCAGAAAAAATAAACA +TTTTTTGATTAAATCTTCCGATCTACAGATAGAAAGTGTGCAAGAAAGAA +TGCAACATTGTGCTCGGTGGAGCAAGAAGATAAAAGAAAGAGAAAGAAGG +TCCCCCACCCCTCCAGTGGTCGAAACAATGATAAATTGGACAAACGGAGG +ACCAAGGGGCCGGGCAGACACAAGAGAGAGAGTACGTGAACTGAGGAGGG +TGTGCAGGGAAAAATGGGATGGGGGCAAATCTAGTTCAAAGATGAGACAC +TTTTCAGGATCTTTGATTCTGAGAAAAATTTTGAACAAAAAGAATACTTC +AATAATTTAATGGCACATAGAAATATTTTCAGATTGTTCTTCAAAAGAAA +AATATTTTTATGCCCGGAAAATTTATTTATTGCATTTCTTCCAAAACAGT +GGCCGGTCTCGACACGACAAATTTTTGTTAAATGCGAAGAGGTGTGCGCC +TTTAAAGAGTACTGTAATTTCAAACTTTCGTTTTAATATTTACTTGTGGG +AAAACATTAATGCTTAACGAAAAATTACAGTACTCTTTAAAAGCGCACAT +CTTTTCGCATGTGACAAACATTTTCGCGTCTCGGTGACAACTTTTAAGTT +AAAGGCACATAGAACTTTTCTGAAGAATTTTATTTATTTTTCTGAAAGTT +AATTGCTACAGTATCCTTTTTCAAGTCGCACCGAGAGCCAAACTGTAGCA +AATCATCAAAAAAAAGTCGACAAAACGTGCCGAAATCAGTAAACTTGAGA +GCTTTAAAACTCTATTATCAGTTCTTCGCCAACAAAAAAAAAGAGTACCG +TATCAAAAACGAACTTCGACTTTTTTGGCTCTCCTGCATACGGACATGAT +TCTGATTGACAGTTTTCATGTTTTTTTTTGGGAGTTTTATTTATTGTGCA +TTTAAAAAATCGTATAGTTTGATGCGTGGCCTAGAATTTGCCAGTGTGAG +CATTAACTCTCCACGGTAGCCAAGAAATTTTCTACGGTGGCCTAAAAACT +GCCAGTGTAGCCTAAAATATTTTATTGTGGCCTAAATTTTCCAATGGTCT +GTTTTTTTTATAGTTGCCTAGAATTTCTTTTCGTGACCTAGAAGCGTACA +GAGTGGTGGCCTAGAAAACGATTCATGGCAGAGTTTTGAAAAAAAAACGA +AATTTCGAGAAACAAGCGAACAAAAATCGTCTGTCGAAAGAGTATTTCGA +ATGCTGGGGATGCAAATCAGCAAATCATTCAAAAAAAACTTTTGTGATAA +GAAATCAAACTGATAAGCCAGTGTCAAAGTCTCGAGGATTAAAAATAGCA +TTTCAGGTCGGGGTACGGTAGGGTTTTTGTAGAAATTAATGCAAAATTTC +AGTGGGAAACGAGTTCGTGGCCTAGAAAAATCATGTCTGAAAAATTGCAA +ATGCGCTCCCCCGAAATGGTTAAAAATTTTCAATTGATAGCCTATTTGAA +GTGGCGGCCTAGAATATCAAATAATGGCCTAGAACTCAAATTGGCGGCCT +AGAAATCAAACTAATGACCTAGATTAGGGCATCTTGTAGGCAGCTTAGAT +CACCTATTATAGGCAGGTGTAGGTAAAATTGTAGACAAATGTAAGTTTCT +TTGAAGATAGGCGTAGGTTCCTTTGCAGGCATACATAGATCATTTATTAG +GCAGATGTAGGCCTGATTGTAGGTACAGTGCCGGCCAAAAATATATCCTA +TTTTTGACTTTTGATAAATTTACAAATTTTCCAAACGAGCACAACTTTAA +AACTAGAAATGTTATCGAAAAAAGTTCAACTCATGTATGTATTGCCCATA +ATTACGTCTACTCGTATTCAATTGTTTGTTGTTTACTAGTGTCACGACAA +CAAATACAGCGGCCGACATCTCGTAAGCCCGTTTTTGACAACGTTTACTG +ATTCGGCCGTATCTCGAAAACTAATTTTTTTCTGAAAATGTTGTTAAAGT +GAAATAGTTTTCATGTTATTTGTTATCATTTGTGTTTATTCACTTTGTTC +TGAAAAATCCAGTAAAAAAGTTATGGGAGTGCAAACTTGTCGCTCACTGC +CACTCACCCGCTACAATCAAAAATCAGGTTACTTATAGTTAGTTCTAATT +TTTTTTTTGTAGAGCATTTTTTAGAAATAACACATGTAAAATCACAATGA +AGCTATATTCAAACACGATATCAAGATTCAGGAAAAAATTCATTGTTTGC +GAGAAATGTTCAAGGCGTGGCCAAACACTATTCAAGTTTAATCTCTCATA +ACTCTTTTTCTGGATTTTTCAGAACAAAGTGAATAGACATAAATGATGAC +AAGTAGTATGAAAACTATTTCACTTTAACAACACCTCGAAAAAAAAATCG +CTCTCGAGATACGGCCGAATCAGTAAACGTTGTCAAAAACGGGCTCACGA +GATGTCGGCCGCTGTATTTTTTGTCGTGGCACTGGTAAAAAACTTAAAAA +ATTGAATACAAGTAGACGCAATTATGGGCAATACTTCATCAGTTAAACTT +TTTTCGATAACATTTCTAGTTTTAAAGTTTTGCTCGTTTGGAAAAGTTGT +AAAGTTATCAAAAATCGAAAATGGCATATGTTTTTGGCCGGCCCTGTAGG +TTAAATATTTTCGTTATCAGGTGTAGGCATGAATCGCCATGTAGGCGGGC +GTAGGTATCTTCTAGGTAGGCGTAGGTTACCTGAGTCAAATTATAGGCAG +ACGTAAGTAATCATAAAAATTGACACTTTGTGGGCAGGCGTAGGTCACCT +TTTAGACAATCATAGTTAGCTTTCTAAGTAGGGTTAGGACACCTATTATA +GGCAGGTTTAATTCCTCTTATATGTGTGCGTAGAACACGTTATAGTCAGG +TGTAGGTCACTTTGTAGGCAAGTCTAGGTTCTTCTTTCGGCAGGCGAAGG +TCATCTTCTGAGCAAGTTAAGGTTCGCCTTGTAGGTCGGTGTAGGTTGCC +TCGTAGGCAGACTAAGTTACCTTCTGGGCATGCCTAGATTGACTAGTAGG +CAGGTGTTGGCACCAAGGGTGTCAGTGTCCCGTAAAAATTACAAAAACGG +GACAACGGGATGTCCCGTTCCCGTGAAAATTTTAAAAACGGGACAACGGG +ACGTCCCGTTCCCACGAAAACACCCAAAAAACGGGACAACGGGACATCCC +GTTCCCGTGAAAACGCTCAAAAACGGGACAAAAGACGTCCCGTTCCCGTA +AAAATGACAAAAACGGGACACCGGGACGTCCCGTTCCCGTGATAATTTTG +AAAACGGGACAACGGGACGTCCCGTTCCCGTGAAAACACCCAAAAAACAC +CCAAAAAAACGGGACAACGGGACAAACGGGACACGGGACTTGACACCCTT +GGTTGGCACCACTGAAGAATGCTGAAACCGACTTTTTTTTCAAAAATGTC +CTGCTCAACGAGCCGAATGCATTTTGGTCGGATTATATTGGCACACCTTT +TTGCCCCCGAGAGAATCATTGAACAAAATCTCATGCACTTTCAATTTCAT +TTTTCATTTCATCAAATAAAAAGATTCGGGAGGATTTGATATATATTGGA +AAAATAAATGAATGGGGGATTTGTATGGTGGGGGAAAAGTGCACACCCGG +AAATGAGCAGTAGGATTTTGAGCAGGAAATTGAAGGAGCTGGAGCAGGAG +AGCAGCTACAGTAACCACCCCTTCACAGCAAAACACATGGCTCATAAAAT +TGAATATTTGAAGTGAACTACTAGTGATAAGCGGTGAAACGGGGTATGTG +GCACTAATTTTTTTTTCTTCAAAAAACCCATTTCTTATCACTTGTGTGCA +GAAGCATTATGGAGACAGGCAGGAGGCAAGTGGTGGTCTAAAAATTAGAA +AAGTTCGGCCACAGCTTGCCCGATAGGAGCACACGGGCATACTGTTTCAA +CAAAAATTCGAAAAAATTGGAAAATCCCGGAATTTTGATTCCGCGGAATC +CGACGATTAAAGAAATTCTCGCGTTTCTGATTTCACGGAAATCGGTATTC +TCGAAATTTGGTTTCTGCGGTGTCTATTGTTTTGGAAATGTTGTGTTCAT +GTTTTATGAAGAACAATATAATATTATGTATAATAGATATTATATATTAT +ATATAATAGAATAATTTTAGTAAAAACCCCACAAAACTTCGAAAATAGAA +GAAATTCTCGCGTGTCTCCAAAATTACAAAAAAATCAGTTTTTTTCCTTT +ATTTTATATTACACAGGATATTTATATCAATTCAGCAAAAAAACGGGCGG +GACAGAAAATTAAGAAATTTGCGAATATTCGTTCCCACGGAAGTAAATTT +CCCCGAATTAGAAAAAATTCGAATTTTTACTGGATTTGTCTCAAGCTTTT +GAATCTAGAAACATTTTCTAGTAAAATCTCTTTAAAAAATTTTTTACACC +AAAAAACTTTTTAGGCCCTGAATTCTGCGAATTAAAAAATTCCGCAAGAG +ACCACACAAAATCGAGAGACTTAGTTAGACAAGTAGAGGGAAAAATAAGA +GGCAACAAATCATCGAAAGTTTTTTTCTGCAGAGAGAATGGGGGAGAAGT +TGCGGCGCTGAAAGAGAGAGAAAGAGAGAGAGAGACAGAGAGGGTGAGAG +ATATAACAGAAAACCAGGATAGTGCGGAGGAAGAGAAGAAAAAAGTGTTA +GAAATATTTGTCCGTCGACGCCTTCTTCGTCTTCTTCGTCGCCTTCATAA +ATGAAACTATGCATTTTCCTCAGATATTGCCTTACTATTAAATGGGGCCC +GGGCTAAAGGGTGTGGCCGGCCGATCCAAAACGGTGGCCTAGAAAAATAT +CGGCGGTGGCCGATATCAAATAAAAAAAACATCGATAGTTATGAGACCAT +AATTTTTAGTTTTTCAGATTTTGAGTCACTTTTCTGATTTTTATCCCAAA +AATAGAGCTCTTTGCTGTTTTTCTGCAGAATCAGCAGGTGGCAACTTGGT +TTTCGGAATTTGTGCTATTCTTAGAAATTCCCGACACAGCATCGAGTTTT +ACTGGAGAACACGTAGCGAGCACAGAAAAAGGAAGGAAAGACAAGAATTG +GCTCGGCGAGCAACCAGAAAAGCAGCAAGAAAAAACTGACCGAGCAAAGA +ACAGCGCGATGGGGCTCATATATATCTAAAAATGGATGGATGGAGAGATG +AGAGACAGCAGCAGTATTCGGTCAAATGAAGACAAGCCAATTTCGAAGCT +TATTTGGTATTCATGCGTGTCTTTTTTTTTCTTGCAAGAAAAACTGATCA +TTAGCAGAAAATTCAAAAAATATTGGTTGTCCGAGAGGAGTACAGAAATT +AAAAAAAATCGTCCGAAAGGAGTACAAAATTCAAAATGTCTATTATCCAA +GAGAAGTACAAAATTCAAAAGTGTATATTGTCCGAGAGGAGTACAAGATC +CAAAATATGAATTGTCCGAGAGGAGTAAAAATTCAAACCACAAAAATCCG +AATTTGCCAAGAAAGGGGCGGAGCCTGATTCGAGTGGAGTGTCGTTGCAA +AACGCAAAACTTCAAAAACATACGGTTTTCAAAATCTACCTCGCCGACCT +ACCGTAACCCTCTAAAATTTCTAGGATCAAATATCATCGCAAGAAAAGTG +TTCGTTCGAAACGAAAAATATTTCTGACGCCTCATCAATCATCATATCAG +TAACAAAAACCTGAGAGAGACGGAAACAAAGAAATATATTTTGAACCGAA +CAGGGATCCTGCAACAAATCACGTAATGGACAAAATGCGCCCTATTGCTA +AATATGCAGCAAGACGCAATTGCATTCTTCTGCCAGAATATCGATTTTTA +TACGTACTTATAGTGATAAAAAAAATGCTGAATTATCGATTTTTAATTCC +CCGGAAAAATGTGCGACAGAGCGTGTTTGCATTTTTTTTTTCGAATTTTC +GTGCAGAAAAACCCGTAAGCATCGATTTTTCTCAGTAAAAATTTCTCGCG +CCAAAATTGTTTCTATTTTTTTCGGGAAAAATCGAAAATCTCCAAAACTT +CGGTGGAGCGCACTTTCTGGACCACAATTTTTGAAGGGAAACATCACTTT +CTGCGGTTTTCAATAAACGAACAAACATCAACAAGAAACGAGAGAGAGTT +TTTGTTGACTTTCAAGAAAAAAATGGATATCGAATTTCTGTGCTCCATTT +CCCTCTCCGGGAAATGGCGCGGGGAACGTGCAAACAATGCGAGTATGTCT +AATGGCTCTCACGTAGATCGAGAAAGTGGAAAATTTGCTAGTATAGACAG +TTTAAGGAGGCTCTTTTATTCTATTGGGGAACCGATTTGTTTATGTCTTG +CCGTGGAGCGTAGTTACTGTAATTTTTTTGCAAACGTGCTCCATCGAACA +GTAAAAAAATCAGTATGCATACTAGTATGTACCTTTAAATAGCTGTAACA +CGAATTGCCTGTGAAAATAAAAATCTCGGCAAGTTCTCTCCATCGGATCA +ATGTAGGGAATTTTGCTGTTTATAATGCAAACTCGCTCTAATAAACTACA +ATTTTCGATCGGTATTTCATGCATTTCGACTGATTTTCGTTATTTTTTAT +GTACGCTCTCTATTGAAACACGGGGCCCGAGAAAATATCCAAAATCTAGA +GAGGTTGTATTGCTTAGGCTTAGCCGAAAACCTGTAAAGATTTTTTAAAA +ATTGTTCAGCCTGCGATGGACGACTTTTTGTAAAACTTGGCCACCAACTT +TTTTACGGTGCGGCTACATCGCATTTGGGTGACCTTTAATGTGTTCAGGG +TGTCTAAACATGCACCCAAAAGAGAGTGTCATTTTTAACGCGAAACAGCA +TCAATCCAAACGGGGCGACAAAAAATGAGAGAGAGAGAGAGAGCGGGCGC +AATCTAATGTTTTATCAAGTGTCTCTTCTTCTCCTCCGACCTCGTTTGCA +TCCCCCTAATTCATCATTCGTCGGTTTGTAATACAGAGAGAGGGACATAG +TGAGTGAGAGGGAAAATTGAGGAGATTCAGAGAGTTAGAAAGAGAGAGTG +AGAGGGAACTTCAACTTTTTTTTTCAAAAGTTTTGGTCGATGATTTTGAC +ATTTGATAAAACTGAATGAGAGATGATGCTTAGATAAATGAAAATTGATG +AGATGATATTTATAAATGCACTTGCGATGTTGTTTTACGTGCAAATTGCT +GATTAGTCGAGAAGTTGTGATTTTCGTGTCGGGAATATTTCACTAGGAAC +GCCCTTGCGCCTTTAAAATGTAAAGTAGCATAATTTATTTTCAGAAAATT +TAAGCGTTGGAAAAATAAGTAGTGCGCAACATATTTGACTCCCAAAATGT +TTTGTAGCGAAAACTACAGTAACTCTTTAAACGAATACTGTGGTTCTGGT +GTCGAGTTACGGGCTGCCAAAATTCGAAAGTAAATTCATTTATAATCGAA +CCCGTAAATCGACACAAGCGCTGCAGAAGTCATTCGAAGAATTACTGTTG +TTTTCGCTACTAGATATTTTGCGCGTCAAACATGTTGCGCAGTACACATT +CTCAGAATTTTGTGTTCCCGTGATATAACAATTCATTTTTCTGTCTATAA +CCTCTTTCAATTATAAGAAACCATTTGCAGCGAAACACAAAAAATTTAGC +CGATTTCTATTTCACCTATAAAATTCGCGTCAAATGGCCCGCGCTGTCAA +GACAGTCATAAAACCAAATGTTATGCAAATGGCGTAAAATTCAACAGTGC +CCGCCCGCATGGTCATTGATCGTTTAAGAAGAATGGAGGAGGGTGCGGGG +AGCATCATTTCCTGAAATGAGCCCAGAAGCGAGAGATTAGAGAATTAGAG +AAATGAGAGATCAATGAGGAGAGGGTGAAAGGTATGAACAATGAATAAGA +AAGGGATGGAAATGATCACAGATGGAAATAGATGGAATAATGAAGTGCAT +CGATGCACCATTTCAGATTTTTCGGGCTTTTCGCAGATAATTGAGGAATT +ACATTTTTCGGGCTCCATCGATAATACCCTGCCGGACAACGCGTTGGAAA +GTGTGTGTACTCCACACGGGCAAATACCTTTAGTTTTACAATGAAACCCG +AGCCGCGACCCGACACGCAACGCGCCGTAAATCGACCCCAGCCGTGGCCG +AACCAAAATGGCCTAATTCGTCAAACTTTTACATTCCAAAATATCAGGGA +AACCAGAAGTGCGCGTTTCCTGTTGTCCGAAAAAAGATTACAACAAAAAA +GAAGAAATGGAGCATTTGCGCTCCATCACACTCTCAGACAATTTCATTTT +CCACATCCTATATATATTTTGGTTTTTCTGTCGTATTTTGTTTTAATTTA +TTGGTATTTCGTTCAAAAATAATTATTTTGACTGTATTTTTGGTTGCATA +CATGTAGAACTGCTGTTTTTTAAGATATTCTGCCCATTCAAGTTTTTCAG +TGTAAAATTGATATATTTCATTCCAACTGAAAATGAGATCGAAACGATGG +AAAACCTCGGATATTACTGATTATGGAAAGAAGAGAAAAGAATCGGAAAG +TTGTGGATCAAGTTCACCGATTCTCGAAACACAGTCATCTGGCGGTGCGG +AACTTGACGAAGTTACTGAGGATGAATATTCTAGTAATTCGAGCAGTAAT +GAAACTAGCGACGAAGAGGAAAACTCAGAAGTACCAAATGTCTTATCTAT +AACAGAAAGAGGTAAGAATTGCGTCTTCTAGTGATCATACTTTTCGCCAG +ATTCCCTAATGTAATATATTTTGTTGTAGAGAAAAGTTGGCAAAAGTTAA +CGGAAAACGATTTGGGACGAATTCGTTTCATCTTGAAGTACACTAGCAAT +ACTAAAAAATGCGTGAACGAGTATTTTCAATATAATCATGGGCAAAACAA +TGAAATTATGAAAAGTCTATTATTGGATACCGATGGAACTATGACTGCAA +AGGCTTGTTCGGAATGTGCCTACGATTTGAATCAGTAAGTTACTCTCTCG +ATTTATTCCCAAAATTAATATGTGCTTCAGGTGCCACTGCAAAAAACCGC +TTCGCTTCATCAATGCTCCGTGTGGTTGGTTTGCTATTCAAAACTATAAA +TAGTTCACTGTTTCCGTTCAGAGGTCATCAACCAAGTTCTTCATGTTGAA +AATGCGGAGCCCACCAGGATCAACCATGTAATCGCAACACTCTTCCGGAA +TCACATTGGCGAGATTTTGTTGGTCCACTCTATTTCTGTGCGAGAACTGT +GATAAAACTAGTATTTTCAGCACAAAGGCTCGAACTGCGGAAGCTCGCGC +ATCTGAAGAAGCTCAAATCAGGATTCAAATCCAAGACAACTCGAACGCAT +TCCAAAGATCGTATCATAACGATCCACAACCTTCATCAGCCGAAGAACAT +GAGGAAGATATCGTGGTGGATGGCTGAGTACGGAGCTCAAATGCCTTAAG +GCGAAACAATTGGTTTTTTAATTTGCTGGTTATCATGTTAGATTTTGAAC +GTGTTAGGTCTTTCAATTGTTTTTTTTTTTCGAAATGTTGTTGTTCTAAT +AAATTTGTTTTATTTAATCAAACGTTTTTTAGTCTACTACGGGCGTGAAG +CCAGATATCAGTGGTATCTTCTTATCAGAAGCTGAATCATTTCCGGTTGA +CAATGTTTGAAGGACATAAGAAAGGCTGTGTTACTGATTTCGACCATTGA +TTTGTTTATATATGGATATGTTCCACTGCCTTTTGGAAAGGCAGTATTCC +CGGTATATATGGGCCTAATACGGAATCTAAAATAACCTGACACAAACCTG +ACGTTGACCTGTTGCCGGCCCGCGGCGGCTTAGTGTCAACTTGACAGCGG +GTCGCGATTTCACCTGCCAGTTGTTCTCCATTCAGCAGCCAGCGACCTGC +TGGCAGGTTGCCACTAACCTGACGCGGTTTACCTGTGTTATCGGCGCGTG +CATAGCTTAGTGGTTTCAGGAAATGATGCTAGTAATCAGAAGATCGGGGT +TCGGGAAACGGCAGGGGCTTGAAGGTTAGGTTCTATGAAGCAGGGCGAAG +GGTTGACAAGGAGAGGCAATAAGCAAGTAGTAGGGGTTCTCTAGAAAACA +TTTTTGTCTTTAATATGCGTTTCCTACTGATTTATTATTGATATTTGGAT +CCCCTTTTCTAGAAAAAAAAATCAGAATCAGCAGAAAAATTTGAGAAAAA +GTCATAGCAAATCAGAGTTGGTCAGAGTAAATCAGAGCTAGTCATAGTAA +ATCATAGCTAGTCAGAGAATATCAGAGTTAATCAGGGTAATAAGTAGACC +TAGTCATAGTAAATCAGAGCTAGGCATAGTAAAGCGTGGTTACTCCGAGT +AAAACCACACTTGCACCGAACTGCGGTTAGTGTGCTTTACCATTATGTAA +CTCCGCTTTTTACTCTGAGTTAGTATGATATGGTTTGTCTGAGCTGTGGT +TGGGCTTCGCGGGAAACTTGAATAATTCGAGACAAAATCTAATTTTAGCG +AATTTTCTTTAATTTCTTTGAGGTTTCTACGACAGAACTCGAAAAATTTC +GGGTTTTAATGTTTACACATTTTATTTAAAATTGAATAATCAACTGCGGG +ACTCCTCGAAAATCACATGCTCATTTAAATTTTGAAGTTCAAACCTCAAA +AAACGCGCAAAAACCAAATTCAGCTAGGATATCAAATTTATGATTGAAAT +CTATATTTTGATGCGGTGTTTCTGAAGTTTTCGCGATAAAATCCGAATAA +TAATTCCACGTACCGTATATTCTCTATCTAATTTCCAGGTCATTTTTTAA +TGCAGCACTATTAGAGACTGTCGTACTACTGGAGACTGCAGCATTAATTT +TCGAACGGCTACTGTCAATTATAGATCACTAGTATTTAGTCACAAAAGCT +AATTTTTTAAGCAGAAATTCATAAAAATGTTTTCAATATTGCGAACTTTT +GTAACAAAAAGACCCAGTAATTCAATTACTTTCGTAAATTATCAAAAAAT +CATCAAAAATATACAAAAAAATACCAAAAAATATTGAAACTTTCAAGTGA +CTCTTTCAATAGAAAATGGGGTGCAGCACTAATAGAGACTGCTGCACTAT +TTTTCGGACCCTTTTTGAATGCAGCACTATTAGAGACTGCAGTATTTACT +ACTGGAGATGCAGCACTAATAGAGAATATACGGTATATACGTAATATATT +CTTGCAGAAAAAAGTACGATTATCAATGAAAAATAGCTGATAAGAGGCTT +TTGTTTGAACTAACAGACGGAACGACTCCGGTTTAGTTCAAAAAATTCTA +AAAACACGTTGTGTCAGGCTGTCTCATTGCGGTTTGATCTACGAAAAATG +CGGGAATATTTTTCCAGAAAAATTGTGACGTCAGCACGCTCTTAACCATG +CGAAACGAGATGAGATGTCTGCGTCTCTTTTCCCGCATTTTTCGAAGATC +AAAACGAATGGGACTTTCTGACTCCACGTGTAAAAAGGGGTTACGACGGA +CCCTGGCCTAGAAATTAGGCGTGAAAATTCTCGGGCACTGGATGTAGTGA +ACGCCCGCGATGAAAAATTGGGGGAAAATTAGGCTTTCTTTGCGAGAAAG +ATTAATTAAAAATGTTTTCCTTTGTCGAAAATAATTTTTAAAAAACACAC +CACGTGTATTCAGCTCGACCAACGCCTCGAAAATTTTCAAAAAAGGCGGG +AAAAATTAGTTGAATTCGCCAAGAGGAATTTCACCGCAGCGCGTGCAAAA +ATTTCAGCATTTGCGCGTGACGGTGTTTGCACAAATTACACCGAATGGTC +GAGCTGAAAACACGTGCACACTTTTAAATAAAACTAGAAAATAAATCCCA +GGCCTGCAAATATTGCACACAAAACCGTAATCCCCTTCGCGCTAAACAAC +ACGCGCAACGATGCTCCGCTTGGGGACAAGGAAAAATTAATTTAACTCGG +GATTTTCATTAAAAAATTAGGTTTTTAGTTAATTTTTCGATGTTTTCACT +GCGAAAAAGTGTTAAAATAACGATTTTTCAACCTATTTTCAATTAATCCG +TGCAAAAAATCGTGTATTTCTCGAGTTTTGAAAGAAATTTATGAAAATCG +GCATTTTTAATAATGGTTTTTCAAATAAAAATATAATTTTTCGGTGCAGA +AAAGTCGTTGCTCGTACAGTTTTTTTAAAGCATTTTCACATCAAAATCCT +CCATTTTTCCAGTAAATCGATATGGAGTGCGACGAGACAAAGCTGAGCGA +CGGCGCAAGCGGCTGGGTGCCGAGTATCCCGACAGATATCGATTCAAAAG +ACACACCGTTGCTCGATATATCTTCTCAGGCGATTTGGGCGCTTTCCAGT +TGTAAAAGCGGTAAATTTTCCGACTTTCAAGGGAGAAAAGTGTAGAAAAA +TCGAAATTACTTCTTAAAAATCTCGTAAAAATCGAATTCTTTCAGGATTC +GGCATCGACGAGCTCCTATCCGACAGTGTTGAGAAATATTGGCAAAGCGA +TGGCCCGCAGCCGCACACGATTCTTCTAGAATTCCAGAAAAAGACCGACG +TGGCTATGATGATGTTCTATTTGGATTTTAAAAACGACGAGTCTTATACA +CCGTCAAAGTTAGCATTTTTGGCTTTTTCAAACGAAAAAATACAATGAAA +CACTGAATATCTAGTTTTTTTCTCAATTTTTGCCTAAAAAACGGCGATTT +TTCACTAGCTTTTCAATTAAAATTTGAACAAAAAGTTTTTTAAAGGAAAA +ACATGAATTTCTAGCTTTTTCAGAGGTTTTCTATTAAAAAATAGAGATTT +TTGTGATATCTGACTGAAAAATTACCAAACTGTCGATTTTTTTAAACTAT +TTTTCACTTAAAATCTGCAATTTTTTTTTTCGAGGAAACATGTGAATTTC +AAGCTTTTTCAGAGATTTTCTATGAAAAAGGTTCGTGCCGAGACCCATGT +GCTTTTAAACTTCAGAATTTTCCCAATTTTGAAATTAAAAAGAGAATGAA +AATTGATTTTCATGGAAAAATGCGTTTTTGGCCCAAAACCTCCAAAAAGT +ACAAATATAGGTCGACTTTCAACTGTTTTAGATCAATTTTTTTGCAGAAT +TCAAGTAAAAATGGGTTCATCTCACCAGGATATATTTTTCCGTCAAACAC +AAACATTCAACGAGCCCCAGGGATGGACATTTATCGATTTACGCGACAAA +AATGGGAAACCGAATCGCGTTTTTTGGCTTCAAGTACAAGTTATTCAGAA +TCATCAAAATGGGAGAGATACTCATATAAGGTAGAGGAATTGAGAATTTC +AGAACGAAAATTGCCGAAAAAATGAAATTTTAGCGAATTTGAGTCGGAAA +TTTCGAAATTTGATTGATTTTAAGCAAATTTCCAACTAAAATCTTGAAAA +TTTGATCTTTTTAGATAAATTTTTTTTTAATTTTGTGCTTTTCAAAAAAC +CTCAAAAAACAATTAAAAATTGAAGTAAAATTAATTTTTCAACAATTTTT +GAAAGGCCGAATTTTTGATTGAAAATTTTCACAATTTGTCCATTTTGTGG +TGGGGCTTATTCCGAAAAATCGTTGTTTTTTTTTTCAAAAAAGTTATAAA +AACTTTAAAATTGCCATGTAAAATATGTTTATTCTCAGACCTCGTAGGCA +CGAAGCAGGCGTAGGTCGCCTCGCAATAAATTTGAAAATCTCAAGAAAAA +TCAATAAATTTGTGATTAATCAAAAAAATTTAATTTCCTGGTCCCAGCAC +GAATGCTATTTTTCGAAAAAAAAAAAGAGGCGAGCCTAATATAGACCACG +CCCACAAAATGGGCAAAAGTTTGATTTTTCAAAAAATCGAAACAAAAATT +TTTCCAATTTTGTGAGATTTTAAAATTTCCGGTTTTTGGAAAATCGAAAA +AAAATTTCTCGTTTTTTAATTTTCAAAAAAAATTGTGCCTAAAATTCAAA +AAAAAAATCAATACTTTCTCAAAATTTCCAGAAAACAGTCCATTTTCCAG +GCACGTTCGAGTCCTTGGACCCCAGCGATCTCGTGTCTCCACAACGAATC +GAATATTCACCGGAGAACCACACGGACCGATTCCCGATAAAAATATCACT +AATTTCGACGACGAGGATTTTGCCAATTTTATCGATCACTCACTTGTTCA +CTTATCACTTCGTTAAATTTACCTCCAGTGATTCCAGATAATGAGCCAGT +TTTGCATTGAAATTTAGTGCCAAAATATAGAAAATCGCATGATTTAACAT +AAAATAGCGTTTCGAATTGAAACAATGGAAAAAAAGTGCTATGATGATTT +TTTAACACTTTTAATTGTTCCAATTTGAAGTAAAATCTATTTTCAGATAA +ATCAACTGATTTTCTATATTCTGCCACTAAAGCTTAAAAACTTGCCCTGC +TGTCCTAACCTTCAAATTGTTCCCTGCAAATTTTATTATTCTTGTTTCAT +ATTTTTGCGATTGCTTCGCGAGACCCAAACTCACACATTTACCTGTAAAA +TATAATCGAATAATTATTTATATATTTTCTGTAAATTTCCTTAGTATACT +ATAAATTTTCTGATCTCTCTTCAAAAATCGCTAGAAAAAATAAACAAATG +TCGGTTTAAAAATTCCTGGTAATTTACCTTCTATAGAAAATTTTTCGAAA +AAAAAACCGAAGAAATTCAGATGGAAATTCCCGATCCCGAACTGCCGGGA +ATACCGATTGATCCGCAAGATTTGGAGATTCTAGACACGCCCACACGGTT +TTACGAGAAGCTTTTAGTGCGTTTTTCGTGTCGGGACCCGGAAATTTGAC +ATTTTTGGCGCGCGGCTTGTTAGACTCCAAACCTTTTCAAAGATTTTTTT +TTCGAATTAAATAACATTCGTGCTTGGGCCCGGAAATTGAATTTTTGATT +TGAAAACAATTTTTTTTGAGTCCAAAATTTTCAAAGTTTGTCCATTTTTG +GCGCGTGGCCTAGTAGGATCCGCCCCTTCTAAATTTTTTTTGAGCAAGTT +TTCTGAAGCATTGATTTCAAAAATTTTTTTTGGAAATTTCTGGTTTATTT +TTCCGGTTTTTTTCCGAGTTGCTGTTTAAGTTTGGAGAAATTCCAGAATT +TGTCAATTTTTGGGGCGTGGCTTTTTCAGTAAGCACAGTTTTTTTTTTTT +GAAAAATTGAAATTTTCGCGGTGCGGTTCAAGAAAAACCACAAAAACTCA +ATGATTTTTTAACGAAAATTTCAAATTTCTTGCAAGACCTACTGCAATTT +CGATTTTTAGAAACTTTTTGAAAAAAATCCGAATTTTCTGATTTAGCCCC +GCCCCAAAAATGGAAAGATTTCCGAAAATTCGAACCAAAAGTTCGCAAAA +ACTTGAATTTCTCTCACACAGATTGACGCGCTAATTTGAATTTTTCCAAA +AATAAGCCCCGCCCCAAAAATGGACAAATTTTAAAAATTTTGAACCAAAT +AAATTCAATTTTTTTTCGCTTTTTTCCGTTTTCGAACAAAAAATTCTAAA +AATATATGGTTCTAGGCGGGGCTCAGGCACCCATCTACCTACTTAAAAAT +GCGTTAAATTTCAGGAATTAACTGCATCAACCGAACGGCGTCTCGCATTG +TGTAGTCTGTATTTGGGCGAAGGAGATCTCGAAAAAAATCTGATCGCTGC +GATCCGAGAAAGATCCGAAAAATCCGAGATTGAAGTGACGATTCTGTTGG +ATTTTTTGCGCGGAACACGGACCAATTCAAGCGGCGAAAGTAGTGTAACA +GTGCTGAAACCTATTTCGGAAAAGTCAAAAGTTGGTTTTTTTTGCAAAAA +AAAATCGATAAATCGATAAAAACCGACAATTTTGAGAATTTTCATTTCAA +ATTTGAGTCCCACATGCGCCTTTAAATATGGTGTACTGTAGTTTTAGCTC +GAATGTTGAATTTCAAAAATTGAGAATAAAGAAATGTCGTGACGAGACCC +ACAAATGTTTTGAAAAAAATTTTCAATTTCAAAAAAATGTAAAAAATTGG +GAATTTCCCTCCAAAAGTTAAATTGGTTTAGTCACAAACTTTGAAATTTT +GAAATAAAATTTTTTTCGGCTAAAAATAAGTATTTTTTAAAAACTATTTT +GAAGAAAAAAAGTTAGGTCTCGCCACGATGTATCTTGTATATGTGTATCT +AAATTGCCATGTCGTGACGAGACCCTCTCATATTTTACACTGCAACTTTT +TCCTCACGAGGGACGAGGAAAAGTGGTTTCTAGGCCATGGCCGAGGGGCC +GACAAGTTTCATCGGCCATTTATCTTGCTTTGTTTTCCGCCTGTTTTCTT +TCGTTTTTCACAGCTTTTTCCCATTTTTTCTTATTAAAACTGATAAATAA +ATATTTTTGCAGATGCCAAAACGATTTTCAAGTAAAAAAATCATGTATTC +AGTGGGCAAGCAGCGGTGAAAGTGGGCATTGTAATATGATGGATTACGGG +AATACAAAACCTAAACTTTTTCTGAAACATGATACATATGATGCTTAAAT +GCTGAGACTACCTGATTTTCATAACGAGACCGCTGAAAAAGTTTTGAGGT +TTTCAAAATTCAACTTTTTGTGCGAAAATCTCGACTTTTTCACCGAAAAA +GTTGAATTTTGGAAACCTCAAAACTTTTTCAGCGGTCTTGATATGAAAAT +CAGGTAGCTTCAGCATCTAAGCAGCATATGTATCATGTTAAAGAAAAAGT +TTAGGTTTTGTATTCCTGTAATCCATCATATTACATTGCCCACTTTCACC +GCTGCTTGCCCACTGAATACATAATTTTTTCACTTGGAAATTGTTTTAGC +ATCTGCAAAAAATATTTATTTATCAGTTTTATTAAGAAAAAACGAAAAAA +ATCAGTGAAAAACGAAAGAAGACAGGCGGAAAACAATACAAGATAAATGG +CCGCTGAAACTTATCGGCCCCTCGGCCATGGCCTAGAAATCACTTTTCCT +CGTCCCTCGTGAGGAAAAAGTTGCAGTGATTTTGCTCAAATAAAAAAATC +CCCCAAAAACCGATAATTTCACCATTTATCAGATATATCTCTTCCACACA +CCGGAGCTTAGTGGCTTAGTAAAACGAGTTCTTCCACAGCGAGCCGACGA +GATTATCGGTCTCCAGCACATGAAATTATACATTTTTGATGACAATGTAT +TGATTAGTGGGTTAGTTTTTTGTTTGCATGGGTCAGACTACAAACTACAA +AAAGCCTAATTTCAGAGCAAATTTGTCGGATTCTTATTTTACTAATCGAA +CGGATCGATATTTTCTATTCAGAAATTGCAAACCATTGGCAGATTTCTTC +CACGAAATTATCAATGTTGTTGGTGAGTTGGCGATTGCGCTCCACCGCAC +ACACTGTTCAATGGGGCTCGCTTGAACTGATAATTTTTTCTAAAAAATTA +CAAAAATTGTTTTAATTTGCCTTCAAAAAACCATCAGTTAATTAAAATTT +TCTAAGCAAAAAATTATATACTTTCTAAAAAAATTGAATTTCCCGCGAAA +ATAATTTTTTCTGAGAAAATTTGAATTTTTCACCAAAATATTTTTTTCAA +CATTTTTTACCACAAAATGAGGCGGGGCCTCGGTATACACGCTCAAAAAA +TCAAAAAATACTTGTAAATTTGACTACCAGCTATTTTTTTTTCGAAAAAA +AAATCGAAAAGTAGGCCAGTTTTCGAAAAATTCTCGACAAAAATTCCGAA +AAATGGGGCGGAGACCTGTGCGGTATTCGGCATTCGGCATATGCCGATGC +CGGTTTTTGGAGCCCGTCATATGCCGTTATGCCGATTTGAAAATTCACGG +CATATGCCGAAAATGCCGTTATGCCGAAAAATCCCGAATGCCGCACAAGT +CTGGGCGGGACTTCAAAAACCACGCCCACAACAGAGAACAAAAATTTGGA +GTGATGGTTTGCAATTCCCTCGCGCACTTTTTCTTGTTTTTTTTCAAAAA +GATGAAAAAGCGCTCTACTGTACACGTTTTTTTTCGAATATTTTCATTAA +AAATCCAATTTTCAGCCGACTCGAGCTTCATTGTCGAAAATGAGCAACTG +GTGCCGAGCCCAAAATGTGATGTGCACCCATATTTAGGTAATCTGAAAGG +ATGGGTAACCTGAAATTTTTCCAAAAAAATTTTTTTTAAGGCTCCGCTCA +TCTCTACCGAGAAATGCTCAAAACACGTGTGAATCGAGTTATCGAAAAAT +ACAAAGAATCGCGAAAAACGTCGTCGAATTGCATGTCTGCTGACACGTGG +ATTTACCCCGTTTTACAAATGGGGCTTTTGGGAATTCATCAGGAGTTTGA +ATTTTTGCAAAAACTTTTCTCACTGAAAAATCCGGAGCTCAAAATGACGA +TGGCTTCGGGATATTTCAATTTTATTCGAGATTATGAGGAATCGATTCTG +AAAGAAGGAGATTATCATTTGGATATTCTTACAGCTTCTCCTTTTGTAAG +ATTTTTTTTGAGGGAAAAATATCTGAAATAAGTTCAAAAATTTCAAAATT +GAATTTTTTCGAAATTTTTGTAGGAAATTTTGGTTAAAAAAAGTTTTCTC +GAGAAATTTGAATTTCCCGCCAAATTTTTTCTGTGAAAATTTGATTTTCC +CCTCAAACATGTTTTCTCATAAAATTTGAATTTCCCGTCAAAATGTTTCT +GAGGAAGTTTGATTTTCTCATCAAAATTTTTCAAAATTTCCAGGCGAACG +GATTCTTCGAATCAAATGGCTTCTCGAAATATATTCCACCACTATATTCC +AACATTTCTGATCAATTTCTTCGAAAACGAGAAATCAACGGCCGATTGAA +TGTAAAAATGTTCGAATATCGAAGAGAAGAATGGACATTTCATGCAAAAG +GTCTTTGGGCAGAACATAATAATCAATTAATGACATTAATTGGCTCATCA +AATTACGGTTATCGATCGGTTCATCGAGATCTTGAAGCTCAAGTGATGGT +TGTTACAAGAAATCCGACACTTATCGATCGATTGAAAGATGAGAAAAATC +TATTATTCGAATATTCATCGATACTTGACATGGCTGCACTTCAACAACCG +GAACATCATATTCCACCATTAGTTCGAGTTATTTCACGTCTTATTCGGAG +TTTTTTGTAGACGTTTTCCAATTTTTTATAGCGGAATAATAAGGTTTTTG +ACTTTAATAAATCTTTGTAGATTTCAGTTTTTTTGAAATGCAACATTTTG +CCGAATTAGGCCATTTGACAATTTTTGGTCGTGTCACGGCGCGGTTTGCA +GTAGAAAACTAAATGTATTTTGATTTGTTTTTTCGAAATATCCGAAAAAC +AACAAAAAAATCTGTTTTTTGTTGAAAAAAATGTTGCGAATTTCAGAAAA +CGGCACTTTTCCGATTTCTGCCCCCTAGGGCTGTTATGAGGGACGTCCCC +TATTGGGGGGCGGGGACGATCATTTGTCCCCCCATGGGGGACGGGGGCTC +GTCCCCGTCCCCACGTCGGGGGACGGGGAGCGCCCCTTGTCCCCGATGGA +ACGAAAAGTCGTCCCCTTTTAATTTTTGATTTATTTTAGATTTTCACTTC +ATTTATTACCGGTACAGAGAGTGTAGATAGTTAGAGAGTGCCAGACATCC +GGGACCCAATGGAGCGGGGCGCGCGGAAGAGACGATTAGTGTCGATTTAC +GAAATTTTCCTCGTTGTCATCATTTCGTAAATCGACACAAATCGTCTCTT +CCGCTTGGGTCCCGGATGTCTGGCACTCTAACTATCTACACTCTCTGTAC +CGATAATAAAATATTTTTACGTGAATCTCGTTTCCACTAAGATGTGCGGG +TGGATACCGTGGATATTCCACACAAAAAACGTGTCGCTTTAATAATTATG +AAGCAAAATACAAACGCACATTCTTCACATTGGGTAAAGGGGGCGCGCTC +CCCCTTTACCCGGGCGCCCCTTTTCGGGGGGGGGGGGGGGGCGTATTACG +GGAGGTTTTTTGAAAAATAATTTTTGAAATTTGTCAACTGTCGACAGCTG +TCGTTCAATTAATGTTTTTTTTTCAAATCTCCATACAAAAATGTTCATAG +TAAATTCTTTATCAATATTGGGGTCCCTCACAAAGTTCAAGTTTTCTTCT +CACTCTTCACGGCCGCCTTCGATTTTTTCTTAAGACCTTCATTTGCATCA +AAATACATTCGACAAATTTGAGTCACATAAGCATCATCATGTGCGTTTGC +CTGCAAATGTTAACAAAGTACCAGGCGGATGCCAAGCTTTCACGTCTTCA +TGTCTACGTAGAAAATGCAGTCATGAAGTAGGCACGCAGACAGACAGGCA +AGTACATAATCAGGCAATAAATGGGCAAGTAGGCAAAGCAGGCATGAAAT +AGGCGCTTAGGCACGTGGGTAGGCAGGTATGAGACATAGGCAAACATGCA +GGCAGGTATGAAAAGCGAGAAAAGGCAGGTGAGCATGATGAAGGCACATA +GGCAGGTAGGCAAGGAAGCTCCTAGACAGGTACAAAATAGATGTGCGCAA +AGTAGGCATGTGCAAAGAAACTAAGCTCGTCGCCAGGCATGAGGTAAGCG +CATAGGCTTGCAAGTAGGCGAGTAAACACGCGGGCATCGAATAGGCAATT +AGGCAGGTATTTTTGTGCTCCATTTGGAAACACACTTACTGGATAGAAAA +GAGTGAGAGTCGTGTAAAATGCAAATGCCAAATAAATTGGATCCGGTGCA +GCACATTTTCTCTTTGATCTCGTTTGTTCAATCAGACTCCGATAATTGAT +TTCTCCAGTTTCACTTGTATTGATCGCTTGGAAGAGGCTCTCGATGACTG +AAAAATGCTCAGAAATTTCGGATATGTTCTCGATTTTCAGTGAATTTTTC +CTGAGAAAAAGCTAAAAACTCACGAACAAACGCTGCCGATCGTCCACCAC +CAGTATTGCATACGACAAAAACCGGTCTCTCGCTGTCTGTCACTAGTTTC +AACAAATACCTAATCTCCTCGGCATCTTTCGGTCCCATATGCTCCGGCCA +TCCGGTATACTGGTAATGAGTCACCTCTTGAGTACGCTTCTTTCTGGAAA +AAAGTTTGTTGCAGGCTACCTTTTATGCCTGCATGGAAAAAAATAGTAGG +CGGACAGGCGTGAGGAAGGTTAGAATATTGATGAGAAAAATTGAAAACAA +TCATTTCAAAAAGCAAAGAATTGACCGGTTATTATTAAAAAATAAAAATT +TTTATCCGGACAATCCGATGCAGATGTGAGTGCCTGCCTACCGGCGTGCC +ACCGCGCCTATTTCATGCCTGCGTGCCTATACTTACCCAAATGTTAGCAA +CAATTGTCTAGTCTTCAACAAAGGCTTCGAGATACTCTTCGTGCAAGTGA +TTGTTAAGTCCGCCAAAATTAGTTTTTCGTCGAGTTCAGTGGGAAAGTAC +CTATCACAGTTCACCTTATTAGCTCCAATAAAGTGATCTGAAAAATAAAT +GGATTAAAAACGGAGGGTGGCTTGATAGACAGGCAGGCAGGACTACTGCC +CTGTGTAGGCCGTCTTGTAGACAAGCAGACAGGAGTGGGTCACCATGTAG +GCAGACGGGAGGGTGTAGGTTGTCTAGTGGGCAGGCAGGCGTAGGCTGCT +GCCAAGCAGATATAGGCTCACCTATTTCCGCGAAATCGCACAACATCACA +ATTGTGCTTGGTTTATTCAGCTTCACTGACTCCCAAAATGCGGCAATCGC +CAAAATATCGGAGCTTTTCTTATCAACAGGCAACTGCGGAGCAGGCATCA +ACACAAACTTCAAGCCGTTATCCAATTCAACAACATGTGATGATTCTCCC +TGGCTGAAAATGAAGATTTTTTGGTAGGTACCCATAATTTTGCCTACCTG +TCTACCTATGTGCGCTTGAGTTAAAGGTTAAACCTAAGCCTATGCCTAAA +CCTGAGCCTAAGCTGAAGCCTAAGCTTAAGGTTAAGTGTAAGCGTAAGCG +TAAACCTAAGCAATATGTAGAGGCGAAAGGTAGGCAGGCAGGCGTAACAG +TCTTACCCATTCTTTTTCGAAGACTCATATGACACCGGATCACATTGACA +GTGATTACCTCCCTGTAAGCCGCAAAACTGGGCAAAATCGTCTCCCGTAA +TTTCTAGGTTATTTGGAACATTCTCAAATACTGAAGTACAATTTCTTTTT +CTAGTAACGAAATCCGAAATTTGCGTGTTGAATGTCTTGAGGAAGTTTGA +AAAACGTGTCTCATTCACATCCACACGACTGATCATCTGATTTGGCGGGT +ACAGTTTCTCTTTTTCTTCCGAGTCGAGTATTTTGTAGCCGTTCGGGGTT +AAGCAAAAGTAAGCAATTATTGCACAAATTATGACAATGACGATCAGAGC +CAAAATTACATATTGTCCGTGTGTTCTGGGATTTTGGAAATTTTTAGTGA +AGCTTCTAGGCTTTAGCTTATGCTTAGGCATAGGCTTAGGCTTCGGCTTA +GGATTAGGCCTAAACACAGGAGCATATAGGTTGGCAGGCAGGCAAAATTA +GAGGTACCCGCCAAATATCTAGAAGCTTCACTAAAAAAAAAACGTTTTGA +ATTTAGCATGAAAACAATTTTGAAAAAGTAAAATGTTTTCTTATCTCAAT +TTTTTCCTTGATTTTATATTGTAATTAAAAATAATAATTTGAATTTTCAG +CTCGATTTAGATAGAGTTGTGCTAGATTTCAAGAAGAAATTGAGAATTTT +CGTAAAAAATATTTTTAACGCGAAAAAATTTTTAAAGCTTTAATCCAAAT +TGTTCAAAAAAGTTTGGAGTAAAAAATTTGGAATTTTTTTGATTTTCACC +CGAAAACCTTTCTTTAGAATGTCTGCCTCTTGCCTCCATCCGCGCCTTAT +GCTCAATCCGCGCCTTATGATTTTTCTCCGGGAACTTGAGACGATTTGGT +TGTTGGGAGAAGAAGTGCGGAATGTGGAGCGTTTGTTGAGAAAAGATAAA +ATTTGGAATTTTTTTGATTTTCACCCGAAAACCTTTCTTTAGAATGTCTG +CCTCTTGCCTCAATCCGCCTTATGATTTTTCTCCGGGAACTTGAGACGAT +TTGGTTGTTGGGAGAAGAAGTGCGGAATGTGGAGCGTTTGTTGAGAAAAG +ATAGATTTTCTACTATTTCTCAATTATTCAATACTATCGAACGACTTGAT +GTCCAGTCTTTGAATGTTCTTGTCGAGGAGGCGAAGGTTATTGGTAAGTG +TATGCGTGGCTTTCCGATGAATTACCAACGGCACGGAAAGCCTGAGTGTA +CCCTTTCGATTTCATCTTCGGAAAGTGTGCTAAGCGTAGGCGCGTTAAGC +TAATTTTTCCTGGTAAATCCGCAATTCTTGAAGATCGAACCAATAGGGAA +CACTGTGGCACCACGTGCAACTTCAGGGGTAGGCGGCAAACACAACGGAA +ATTTATCGATTTGCCCAATTTGCCAGAAATTTCGGTTTCCGAAATTTTGT +CGATGCATTAAGGGTTTGATAATAAAATCGAGTTTGGGAATTAGCGTTTT +TCAAAAATTGTCGAATACCTTCAGAATTTTGGTTTCCGAAAGTTTGTCTA +AAAATGCAGGGGTAGGCGGCAACTGCCGTTTGGCATATTTATTTTTGGCG +AATTCACCAAATTCCCGTGAAACATGCGTGAACTTTCTTCTCGTTTCTTA +ATTAGTTTTTCATCAAATTGATAAAAATGAGAAAATTATAAATTTGTAAA +TTCACTTCAAAATGCAGGCGGGCAGGTTTCAGTCAAGCCCTGAAACCGCG +CCTGTCTACCATGGAAGCCCTACTTTCAAATAAGATCAACTCACTGTGGT +GGTGGACCATTCTTTTCAGAATTTCCAAGATCTGGCGGTGGGAGAATTGC +AGTCGGCAGAGGCTTGAATAAATTTGAAAAAAACAAGTCAAGAGCAGCAA +GAGCCTTTTTCGGTTCATCAAGTCTGTTTTGAAAACTTGAAAACTCCAGT +CCACCATCATCCAAGCTCTTCAGAGCATCCTGAACTTCACCGAGCGACTT +CTTATTGCCAACATCCTTCACAAGTTCTACCAGTTTTTCCAACGATTTTT +CCAAGCCCATAAGACCACCAAGATTAGCCCAATCAACCTTCTTAGCTTCC +GTGAAAACATAACCATAGTCTACAAATTTTGTGGAATCAGACTTTTTAAT +TCCACTCTTCCATGCATCTAGCTGAACAAACATCACATCCAAAGATGCTC +CAAGCGCCACCAGCTTCTCCAGATTATCCTTATCGTCCTGTGCCAAAGTG +GCTCCACTCGCTGCCAACTCTCGTTGCACAATTTGTGCATGGTTCTTGAT +AATATCCACATTTGCTCTTTCTCGGAGCACGTCCTCCAGTCGAGCAATTT +CTTGTGCCGCTGGGCCAATGGAATTAGTGTGTTCAGCGGCGTTTGGAAAC +TGGGATGTTTTATCCGAAGCAAATTTGCGGCGCTTGCTAAAGTACAGCGC +CAGAGATTTCAGACCACTATTGAGGCTTGAAACTGAGATCAGAGCATCTT +CGACGGCAGTGACTATCGGCGGGAATTTTTGATCTGCTCGGAACGAATTC +AAATCTGACAGGTTCTTTTGAATTTTGTCCAATTCTTTGTTGGATTGAAG +ACAGCTCAGTACGTCGAGAAATTGAGCAATATCAGATCGAGAGTCATTGA +TTTTTTCAATAGCACGTTTTTCTAGTGTTTTCAAAAGATTCGGATTCAAG +AAAGTAGCGTTTGCCGTCATTCTACTCATGGTCAGCGACAAGCCGGTCAT +TTTTTTCACTCCATTAAAGCATGGCTCAGCGGTTGTCCAATTGCTATTGT +TCATATCCTTGCAAACAGTGAGCACTTCTGCGTGTCCTGATTTGCTTTCA +ACAAGTTGATTCATCACACTTTTCAATATCGATTTCGATTGTTCCAATTC +GGTGTCAATGTTCGATGCAATTTTAGATAGTTCCTCCAGATTCGTGAAGT +CGGGGGCGGTAGTTAGAGTAGATTTGGCGCATTGGGCTGAAAGTTTATTT +TTATTTTCGTTTTTTTGAGTTTGAACTTTTGTGGACAACCGACAGATTGG +CTCAAACTAAAAGGACTCTAAAATATACAAATTTTGGAACTCAAATCAAT +TTCGCAAATATTACTTTTTTCTACGGAGCTTTGATCAGGGTGGGGTGGCA +AACGATTTTTTTCCAGCAAACGGTAAATCGGCAAACCGGCATTTTGCCGA +TTTTCCAAAATGGCCGAAAATTTTTGAATCCCTCCCTCTAAACACTTCCG +GCAAATTGTTATCCAGCAAACTACAATTCGGCAAATTGCCTGAATTAAAA +ATTTCCGCCCAATCGGCAAATTGCAGTAACTGCAAATTTCCGATAAATTT +GGAGAAACCGGCAAATCGCCGAAAATTTTTGGTAACTGGAAAAAATTTCA +AAAGGCACTGTACCCGTGCTTCCGTTTTTTAAAAAATGCTTCTAAACAGT +TCCGGCATATTGATATCCGGCAAACGGCAAATCAGCAGTTTGGCTAAAAT +AAAAATATCCGGCGAATCAGCAAATTGCTGGAATTAAAAAGTTCCTCCAA +ATCGGCAAAACCGGGAATTTGGTGAATTCGCCAAAAATAAATATGCCAAA +CGGCAGTTGCCGCCTACCCCTGCATTTTTAGACAAACTTTCGGAAACCAA +AATTCTGAAGGTATTCGACAATTTTTGAAAAACGCTAATTCCCAAACTCG +ATTTTATTATCAAACACTTAATGCATCGACAAAATTTCGGAAACCGAAAT +TTCTGGCAAATTGGGCAAATCGATAAATTTCCGTTGTGTTTGCCGCCTAC +CCCTGAAGTTGCACGTGGTGCCACAGTGTTCCCTATTGGTTCGATCTTCA +AGAATTGCGGATTTACCAGGAAAAATTAGCTTAACGCGCCTACGCTTAGC +ACACTTTCCGAAGATGAAATCGAAAGGGTACACTCAGGCTTTCCGTGCCG +TTGGTAATTCATCGGAAAGCCACGCATACACTTACCAATAACCTTCGCCT +CATCGACAAGAACATTCAAAGACTGGATATCAAGTCGTTCGATAGTATTG +AATAATTGAGAAATAGTAGAAAATCTATCTTTTCTCAACAAACGCTCCAC +ATTCCGCACTTCTTCTCCCAACAACCAAATCGTCTCAAGTTCCCGGAGAA +AAATCATGTGATCAGTAGTTTCTCGGGCCCAAGCGTCGGTTGCCAATTGA +GAGTTTAGTGCTCCAATATCAATTGCTCCATTCGGTAGACCAACTGTGTA +CTTCTTGGTTCGCAGGGACTGCCCGATGGATTCGATGGTTTGAAGATGGA +TCTTCGATTCTTGATTTGGAAATGCATTCCGGATAGATGCCATGTGTTCT +TCTAGCTTTCTTAGTCCCCGATTGATGTCTACAATCTTGAAGGATGAAAT +AAAGTTCATCACATCACGAACTTTTGGCCATAATACAAAGAAAGAAGCAG +CAGAATTATTGTTTTTGAAGCCATGCAACGTAATCTTCGGATTCTCCACT +ATGTTCAAAAAATCTTGGGATTTTTTCAACAAATCGTTTAACGCAGAATG +GGATTTGGGAATATCGGTGAAACTAGTGAACGCTTCAATCTCTGTGTACG +AGTTGCTCCATACTGATAGCTTTTCGGAAAGGGTTGTAACTGATGAAAAA +CTGTCTGTAATGTCTTTCATGTCTGCCTTGAAATTAGTCTCGTCTGGCCA +CTTCTTGAGATCTCCAATATCATGAGCCACGTGGACAACACTTTTGACAG +TTGTGAAGAATTGATCTGCCTCAACATCGTCGCCGATATTCGATTGTAGT +TCTATAAAGGTATAGTTTTTGGGGGCAAAAAACGAAAGTGGTTCAAAATT +TTGGAACAAAATGCCTATTTTTAGATAGCTAAAATCTAAAATTTTGCAAT +GCTGACAAGTTGTGAAAATTTGAAATTTTAACAGAAAATAGGCTTTTTCC +TAAACTTTGAATCACCTTACTTTTTTTTTGAAATTTTCGAAACTTTTCGG +TTATTTCCAAAAAAAAGCCGCAAATACTCTATCAGGGGTAGGCGGGAATT +GCAGTTCGGCAAATGTATTTTCTGCAAATTCGGCAAATCGGGATGTGTAC +AATTTTGTCGTCTCTGGCAGTTTACCGACTTGCCCATTTACCGAAAATTT +TAATTCCGGCAAATTGTCCATACTCTATTACTACCATAGGCCCTTATATA +ATCTTTTTCCGCCCACCCCTGTACTCTATTACCACCTAATCTGTTACTAC +AGTGGATTTTTTGAAACCAAAATTTCAGCTTTTGAATTTTATATTCGTAG +TGAAAAGATATTCGTAGTGAAAAGTTGAAACCTGGAAAACATAGTTGCAT +AATTGCATCGTAATATGTAAAAATAGGTCTTCCATGGTAGATAGGCGCGG +TTTCAGGGCCTGCCTGGAACCTGCCTGCCCTATGTGAACGAGTAGACTGT +AGTTTTAAAAAACAAAAAAAGGTATTTGCCTAACAAAAAACATACTCAAA +TATTTGAATGTTTGCAACCAAACCTTTAAGCGCTATCAGTTTAGATTCCA +GCGAACTGTAGTCAATTGCATTCAGTTGATCCATTGAGAAATCGTATTTG +AACAAATCCAAAGTTAAATTCATTGATGGAATGGTTCCTTTTGTCAATTC +TTGTTGTAGGTACATTGCATTGATCAATCGTGAGGCGAATTGTAATCGCT +CAACGGCAGTTATCAAGTTGTCATCTGCAACTTTTATGAATGATTTTTTT +AAAAAGTTATTTGTTGTCATTTTTCTTTTATTAATTATTACGTATATTAT +ACGTATCATGCGAGTTTAGCTTTCCAACCCCCCGCGAACTCTTTTTACTT +TTTTTTTTTTTAACTTTTGTCTTTTTTGTCCTTTCTTGGCTATTTTTTTT +ATGAGAATTGACCCGAATTTGACTTTTTTCGGATTTTTAGGAATTTCTAG +GAATTTTTCTTTTTTGTCAAAGCGAGCTAACGCTCGCCACTGACGCCAAG +CCTAACATAAGACTTAACCTAAGTCTTAGCCTAAGCCTTACAAGGAAAGT +CTTTGAAGGGGCCTGTAGATTTGGGTTTCTCATGTTGATATTCCGATAGA +AGAGTGTTAGTTTTGATCTCTCCAAAAAATTTAGCTGCCCAAGCCAAGTT +TCAGCAAAGTAATGACATTTTGAAATTTCAGTTAAAAACACCATTGAAAT +ACACTGTCTTACAGTGCTATCCACACAAATATCAGCTTGCGTGAACACCG +AAAATGTGACGCCCACCACAATAGGCTGAAGACAGTGGATTTCAATGGTG +TTTTTAACTGAAATTTCAAAATGTCATAACTTTGCTGAAACTTGACTTGG +GCAGCTAAATTGTTTGGAGAGATCAAAACTAACACTCTTCTATCGGAATA +TCAACATGAGAACCCCAAATCTACGGGCCGCTTTAAAGACTTCCCTTGTT +AGCCTAAGCCTATGCAAGACACGCAAAATATACATTAACATATATATACA +ACAAATACAACGTAAAAATTGTTTTTCTGTAATTCGACTGACAAACCTTT +AGCTTCACGTTTGGAACGTGGATCTAATGACATCACATTAAAGAAACTGG +ATTCTTCTGGAACAAAATATATTGAAATTAAAGCTCTGAAACAGTCAAGA +AACCTTCGAAAGTATCCGAATTGACAACACGATTAACCGTTATAATTATC +AAAACGATTATTATTTTGATTCCTGGAAGACAGAAGAAATTCGGAACGAA +AATAAAAATCAACCTACTCATTGCAAAGTATTATGAAACTTCCCTTTGAT +CGACGGTTTTTGAATCAATTAATGATTGGAAGTGTGAATTCTGAGAGAAG +GAGTGTTGGAATTTTTATTTATTTTATAACGAAAAGGTCACGGTTTCTGT +AATTTTAATTTCTATGAAAGTTTTAGGGACTCACAGAATGTCAAACACTA +GATGGCCGAAGAAAATTTGACATAAGCAGCGAAATTTGACCTTTCAAAAA +AAAATTAATATTACGTGAAATCTTTTTTTTTCCAATGGAATTGAATTTTT +GGCTGATACATGAGGGAATGTGTTTCAAGCTGTAATTTCAGTAAAACTGT +ACAAGAAAAAACGTTCAATATTTTTACCTTTGCTTATAACTCCTCTACAA +AATTTTTTCGGACAAAAAAATAAAACAAGCATGGAGATTACAGGTGAATT +TCGAATCCTCATCTTCATTTTTCAGACTGAAAATCCAAGAAAAGTAGCCC +GAACTTTTTTCTGAAAAAGTTCCTATATACTCATCAAACATTCATAAACA +GTATTTAAACAAAAAATCCCGAGATAACACTTTTGTCTTCAAGATATTCA +GGTTGAAAACTAATGTCCTGAAACTTGGATTTTGGGTCCTTAAGAAGATA +AAACAGATAATTAGCTTTTACGTTAATACCCGCAATCATTTGCAAATTAG +AGCAAAATTAAATTGAAACTTTTTTTTGGATTTAAACATTTTTTGGAGCC +GAAATACAAGTTTCAGGACACTAGTTTTCAACCTGAATATCTTGAAGACA +AAAGTGTTATCTTGGGATTTTTTGTTTAAATACTGTTTATGAATGTTTGA +TGAGTATTTAGGAACTTTTTCAGAAAAAAGTTCGGGCTACTTTTCTCGGA +ATTTCAGTCTGAAAATGTATGATTTTTTAATTCAAAATTTGAGGGAATGT +CGTTCATTGTGGCTCGAGGTCAATGGATAAATGGCTAAGACTAAAACATC +ATTGATCGAAGAATGATTTTTTATTCATAAAAACTATATATTTGACCTGA +AATCATTGTTTTTTGTCACTATAACCCATTGGAATGAGCTGTAGAAATTA +ATTAAATTTAGAAATGATTTTACCGATACGTGGCAATGTTTAATACATTA +GTTTTCTGGCCGAAGATTTGTATGTTTTCCCGAGAAAGTGAAGCGACAGC +TTAAAAGCGCATCGCCTTTAGTGAGAGGCAGGTCTCGACGCGACGACATC +TTTCCTCGCGCGCAGCGGCGAGATTTCGTCATTCTACGTGTTGGTAAAGA +AAAAGGACGTGATTTTATCGATTTGCATCGATTTTTTTTGGATTTTTCTT +TGTTTTTATTGTATTTTTATCGAATTTTTAGTAGTTTTAAGGTTTTTTAT +CGATTTTCCTTGATTCCGTTGCGTTTTTAAGCGTTTTTCTCGTTCGTAAT +GATAAAAAGGGGGTTATTTCTCCGTTTTCCAAGAGTTATCTGCGCGTATG +CCAGATTTTGCAGGAGGAATCATCAAAATTCTCAGTTTTTGGCATGTAAT +CCTTACGAAGAGTACTTTCTGAGGCTCAATGGCTTTTTCCCCCGCTGTTA +GGTGCTTCAGTAAGTGTTCTACTCCTTTCAGGGGTTCCTTCTCTCGTCTA +GGAGATTAAATATGTTCGCATTTGTTTCTTCCCTCTCTCTTACGCGCTCT +TTCCAGCATTGTTAGTTTATCCACGCTCTTTTCCTCTGTAATTTTTCGTT +TATCACTTTCTCTAATTTTTCGGTTTCCGGTTCGTGCACTCGCGCACCCT +CTCCCTATTTTACTCTATCTCTCACTATCTCTCACATCTATCTCTATCTC +TCACAAGCTATTTTGGTATTTTGCGCGTTAAGAAGTCATGTTTTGATGGA +CCGTGTCCAAAGGAAGAATATTTTCGTCGAGTGAAAGGTATGTAACTTCG +TGTGAAGTCGACTCCATGAGTCTTGTGAATATTCCGATAACGTTTTATCG +AGATGTATTATCGGGCGGCCCGAATTTTATGTATGTTTTTCATTGTGGTT +ACCATGTAACGGGAGTTCACATCGCAGAAAGAGCAATTCTTTATCGCGAA +AGTACGATTTCAGGCCCCGGAAGGCTTTCTTATCAGTTTAATGCGACATC +TTTCGTCGGGTGAGATATATTGTCTTGCGTTTCTATCGCATTTCATTGGA +CCGGTCGAGCCAGGTCTGAGTGTTTATGAGTACGAGTCGACTCCGTGAGT +CTCAATGATTTTCCGTTTTCAGGCGGTTTTCAAGGTTTCCCGTTCTTGGT +TTCTGCTTCATCTTCTACTACCGTTTTAAGTAGGATTGGCTCAAGTCTAC +GGAAGAGTAGAGCAAGCCTACGAAGAAGGTCTTCGAAAGAGTAGCGCAAG +TTTACGGAGGATTCACGCAGGTCTACGGAAGAGTGGAGCAGGTATACGTT +AGAGTGGCACTAGACCATACTGGGAGTCCGATTTCCAATTGGATTCTCGA +AAAACCTTGTACCTGCACAAATAACCAGACGAGCCGATATATCTACCTTA +CCGAGCTCCGGGTTAATCGTCGTCAAGTATTTCAGGATGAAGCGTATAAG +GCGATGATTTTCTTTAGCTATGCCGCGCGTTTTGCAATTGCATAGCTGTG +TATTCCGTGTAGACTTTATCGCTCACTTGTTCGATTTTCAGGACATCCGG +TTCGACCTTTTTTCTTTCTCCCGCTATCGACGACGCAGATCACGTATGAG +TTGCTTTTATATACGGCGATCGATTGGTCCGGTATACGTAAAATTAGCTC +TATATTTTATCGCTTTCGTTGCTCACGCGATTCCCAGGTATTTTAATGTC +GAATCATATTCCGTCACCTACCACTACGTAGGAGATATGCAGGGCTATGT +GAGAATAACTCAGGTTTCGTAAGAGTTTCTCAAGACTAGGGAGGATATTC +GCAGGTTGTTGAGTAATTTGTTTTTCCAATTAGCTATTCTCCTTGTCAGG +ACTTATTCAGGTGTGAGTCGAAGTAGCCCGAGACGTTTGGCCCTAGTATG +GATACTTCACGACAGTTTCAGACAGATTGTGGTCCGCCTTTAATGGGAAT +TATTTCTACTGCACAGGTTCATAGCTGACTTAGCCGAATGAGCCCCTTCG +ACGTCGCAATTTGGCGGTTAGCACACTTCCAGGAAGACATAGACGAATCA +CAGTTTCAGAATTCCTATACACTTCAGTAACTACGGAGTAAGGCGATCAG +TTCCTTGCCGCATTTATATGTTCATCTTAACTTTATTTTATATTAGCTCA +CGGCTTTCATTGGAAACTACAGATAACTGAGCCAAATGCTTCGCCTTTTT +GTCACTTTTTCCCAGTTCACTCAAGCCCAGAGTAGATGTAGTTTGAGGTT +ATCTACTTCTCGTTTCCATGTGCGGATACTAGCAGAGTTTCTCCTGTTAT +AACTGAGAGTCATTTCACACACAATACGGTTTATCTGTCGGCAGTTTCTT +CTCACAAGGTTTTCGCTATAGATGAATATCTGGTATAGGGGTGAAGAATC +TGTAATTATTAAACGCCTCAAGCTTATAGCCGTGACTGACAGTTATAGCC +CCCGGCTTTGCTGGTTCTACCCTATACCACACAAAACCCAAACCAATAAA +CCCATAATACTATATAATTCGTGTTGTCTTATTAGAGCGTAACTTGATCA +GGGAAAACGAATTCTCGGGGAGCGAAGCGATCCGAGAATTGTATGTTTTC +CCGAGAAAGTGAAGCGACAGCTTAAAGGCGCATCGCCTTTAGTGAGAGGC +AGGTCTCGACGCGACGACATCTTTCCTCGCGCGCAGCGGCGAGATTTCGT +CATTCTACGTGTTGGTTTCCCCCCTCCCGCCCCACACACATATTATTCCA +AAGTGCGAGACACATATGCTATGATTTCCCTTCCGGAGAAAAATCGAATT +ATTATGAAAATATTAAGTTTAAACCGTAAGTCGTTTATTTTCAGGAGGTG +AGTTTTATTTTTCCTTCATTGTCGCCTGAGTGGGCAGTCGGTGTCTATCT +GCAACATTCGCAGCTCGACCCTTGTGCTTGTTTTGGCGGTAGCAGGAAAC +TTATGGGTTTTTTCACAGTTTCGGGAGAGATATTTACAGGCTCTGGTTCA +GGGAACCAGATTCAGAGTTTAATAATGATATATCTTTACAGGAAAATTGT +GGTTAGAGTTTCATGGCAGCTTCAGATCCCCTGATGATCAACAGTTCAAT +CCACGGAAACACGGACGAAATGAGTGGTCCACCGAGCAACAATGTGGAAC +GAGACGATTCCAGGTTTTCCATGCCCCCTAACATCGATGATGGTCTTATG +GTAAAGTTCTTCCAATCAAGCGGTGTTCGTGTTTTGGATAAACTTCTTCG +ATAAGTAATACTTGACTAAAAAATATCGAGAATTAAAGTCACAATTAAAA +TTTACAAAATATTTTGTTCCAGATTATGAATTTTCGAAGATTAGACTATT +CACGGCCACGTAGCGTTTGACATGTTGTGGGACTGAATTTCTACAAAAAC +GTGAACTCACGTAGACTGTCTCTTGAATAATTTGAATGAAACAGATGTGA +GCTCCGGAATAGGCTTAGGCTTAGGCTTAGGCGGGGCTGGGAGGAGAAAA +AAATTGCGGAATTTGCCGTTTGAAGAGCTCGGCGAGTTTGCCGAGTTTGG +CAAATTTTAAATTTTGCCGTACACACTACAAATTGGACAAAAAATCGAAA +TATCAAAAATAACAAATTGTTCTGAGTTTGATCAGTTCGGTAAGAAAATT +TTGAAAAAAGTGATTTGCAGAATTTGCCAAGCTCGCCTTTGTCTTTGGTC +AATTTAGCACCATATGAGTGATTATTATCAATTTTCCCACTGTCGCTACT +TCACATTTAAAAAGTAGTCATCAATTGCCGAAACAACTTCCTTCCAGCTT +AAACTTTTTTCTTAGCCTCATCGGGACCCTTTCCTAATCCCTTCTTAACC +TCATTCTTCCCATCTTCCAAAATTTCCATCCTTCTTCTTCTTCTTCTTCC +CACTACGACCATCTTTGTCGTATAGCACCTTTCTCGCGAGAGTGTGTGGA +AACAATTTGCGAAAAACGGCTCCCGATCCCCCCATTCGTTTTGGCGAAAT +CAATAGACACAAAAATGTTCAAACAAATCTCCCCTTCTCGTTTATCCCCC +TGTTGCACAATATCCTTTTCTCACAAAAAAACCTTTGCTCTTCAATTTTC +GGAATCATCTTTTTGCGCTTTGTCGTGTTACTGTGAAAGGCGGAGTACTG +AAATATGGAAAATATTTTTAAATGACTCCCTGATTCCGAATATCTATGTG +AGAAAGTTCAAAAAATTTTCCCTCATTTTATATTTCAGCTTGAAATCGTG +ACTTTCATTGTGCACTAATGAGATTTCGATTTCGAAGTTTTTTTGAAATG +TGTTTTCAAGGTCAATTTTCAAGGTCAATTTATTATTTTACTATCGAAAA +ACTAAAAAAATTTCAATGAGTAAAGTAAGTAAGTAAGCATTTCGTAAATA +TACAAAAATCTCGTAAATCGACACATAGCGTCTCTGGCGCGAAAATTCGA +AATCTCATGGGTGCACAAATGAAATGCGAGATTTCAAACTCAAATGTAAA +ATAAGGGAAATTTTTTTGAATTTTGTCACATAGATATTCGGAATCAGGGG +CAAATTTGAAGTCAATTAAAAATATTTTTCAGATTTCGTGGTACTCTAGT +CTAAAACTAAAACTAAAACTAAACTAAAGTTAAATTAATATTAAATTACC +ATGAATCTAATTTTTGTTTTTTAAAGTTTCCTGCAAAAATTCCAAGATCT +CAGTTTGCCGAAGTCTAAATAACGACAACTCTGAACTTTTGTCCCGAAAG +AAATCGAACACCGGTGTATACTAATAAGATCCCTCGAAGCTCGGTATACA +AAAGGATCATGAAAAAGGGGTGTCTCACCTTGCGCATAATACCTTCTGTC +TTTCCCCCTTCACCAGAAAGATTTCCTTCTTATATTTGTTCGTTTCGTTC +CTGCACACTCTCCATCTCTTCTAACCCCCTCCTCATTCAGAATACTCTCT +CATCTCACAACGCTTCTGTCTACCTGCCTGCGTCTCCTCGGTACCATATA +CTATCTTGTAGCTGCCACTTACCAACAGACTTGCCTCTTGGAGGTCTCTC +TCTCTTTTCCACCAAATCACCTTGTTCTTCCGACTTGTTCTCTCTTATCG +AACTGACTTTTCGTGTCTGCGGGCCTTTCACATTATTTTCCAATTTTATT +CGAATTTTATGTGCCCACTGCTTGCTAGGTTTATTGAGTGCCGCATTAAT +TTTATTGATTAAAAAAAAAGCCACTTGTAACAATTTTTATGAAATTTGTT +TGCATATTTATTTAACAGTAGCGAAATTGTTTTAAAATTCGTACTGTGTG +AGAAATTTGCACTTTCGAAGTGTTTAAAACATTCTATTACGGGATCACAA +GATTATGAGAATGCTTATTGCGCAACATTTTTGACGCGCAAAATATCTAG +TAGCGAAAACTACAGTAATTCTTCAAATGACTACTGTAGCGCTTGTGTCG +ATACTGGCTCGATTTTTTAAATGATTTTTTTTCGAATAGTGACGTCGATA +TTCCATTTTGCTTTCTTTTTCGTATTATAATTTTATTCATTTCGAGAATC +GAGCCCATAAATCGACACAAACACTACAGTAGTCATTTAAAGAATTACTG +TTATTTTCGCTACGAGATATTTAGCGCGTCAAGATTTTTCCTTACGGAAA +TACAAACTTCTTGGCGAAAATTTCACGAAAATTCAAATTTCCCGCTTAGA +ATTTCCCGAATTTTTCGAATTTCCTGCCAAATAAAAACTAGTTTAAATAT +TTAAAAAAAAGCTTAGAATTCTTAGAATGCGTATTGCGCAGCATATTTGA +CGCCCAAAGTATCTCATAGCGAAAACACAGTAATTCTTGAAATGACTACT +GTAGCGCTTTTGTCGATTTACGGGCTCGATTTTTGAAATAAATTTTTTAA +AATCTTTTCGAATCAAGCCCGTGAATCGACACAAGCTCTACAGTAGTTAT +TCAAAAAGTAGTTTTGGCTTCGATCGAGATATTTTGTGTGTCAAATATAT +TCCCGTAAAACAAAATACCGTCTAAAGATATAAAATTTAACGCCTATTAC +ACCCCAAAACCCGCCGGAAAATCCAGAAAAATTATATAAATTCGCAGTCA +CTTCTCAAAAATAACCTGAATAGTTGACCGCGTAATTCGACCCGTTTCCC +TACGACTGACCACACCCCGATTCTCTCGAATTACCCGCGACAAGCTTAAG +CATCTTTCTTCATTCCCACACTTTTCGTAAACTTCTGAGAGACAAAACAA +TAATATTTCTTTCTCAAATCAATAATGCCTATATTCTCCGCATCCCGTAG +CCGATCCCATCACCGTAATTCTAGAAATCCCACGAAATTTTCCAATTTCT +CTATTGAAAAGGTCTCTCTTTTATATTACCCAGGCGCAAAATTTCGTGCA +AAAATACAATCGACGGTCTCGACGCGATAATTTTTGCTAAATTCCAAAAA +TGTGTGCGCCTTTAAAGATTATTGTAAATAGCTTCAAACTTTCACCGATA +TTTTTAAAAATTGATTTTTCTATGTTTTCCGTTTAATATATATTTATTTA +TTTAAAAATTAAGTTGAAATTGAGAAATCGATGAAAATCGAAAATTACAT +TACTCTTTAAAGGCGTACATGTCGTTTCGAGACCGGGTACAGTATTTTCG +ACTGCGATTTTGGTTTTTAAAAAAACCTAACCAATAGAATTTCCAGCGCT +CACACTCATTGGGTGGAAGCAGCAGCAGTCCGACGAGCTCTTTTTCGTCA +AACGACGAATTCGACGATCGTCAGAACAAAAAGTGAGTCATCAGTTTTGT +TTGAACAGCAGGTGTGAAATTTATTGTGAAATTTAAAATGGGAAATCCGA +AAAAACACACCTTGTGTGATCTAAGATCCAAAAAAAATAGAACAATCGAT +AATTTTCGGGTAATTTTAAAGTTATTTTTAGCGAAATAAACAATGCATTT +TTGAGGGATTGGTTTTCAGTCATTTTCGATTAAAAAATTCAATGTTGAGC +TGAAAATATCGATTTTTACCAGAAAATTGATAATTTTCGAGCTCAATACC +ACAATTTTAATGTTTAAATCGCTTTGAAACCTCATAAAATCTCCCGCACC +TCCAGTTTTTGTAGTTTTCCGCCTAAAATGTCGGAAATTCAACAGAATAC +TCATTTTCCTAGCGAAATCTATTAGTTTATAATAAATTTGATCGTTTTCT +GCTTAAAATTCCCTGAAACTTCTGAAAAAAAACCGATTAATCAGCTGAAA +TTCGCTTAAAACTAATTTCGTGCCGGGACCACGGCCTTTTAAATCGGTAT +GCGCTTTTAGTTTTGTACCGTATCTATTTTAGTTTTTAAAGTAAAAAATG +AAATTTTCAGCAAGTTTTGGCGTCAAAGAAATGTATCGAACGCAGAAATT +GAACGAATAATCGAGGATTTCATAGCGAATGGTATTTTAAGTGAGTTTTA +TTCGATAAAATCGACGAAAAACCAGAAATAAACAGTTAATTTCCAGAAAT +GAGCAACGGGAACAGCTACAATCACCATCACCAATTCCCGATGAGCATTC +CAATATCGTGCTCGTCACACTCAATTCAATCACAATCACGAATGAACACT +CTGAACGCGAATAGGGATCTCCTGTCTCCCGGCAACGACGTAATTGTCAC +CAGAACCGTGTCACCGTCGTTTTATTCGCATGGAATGCCGGCCCGGGATA +ATGTTTTTCGAAAAGACGATCATGTCAGGATATTGGGGAATACAACGGTA +TGAAAATCTGCTTTTCATCGAAAAAAATTGGAATTTTCAGGACCCAGCGT +GGTATCGAGCCCGTAATGCGAATCAGGAGGAGGGTCTGGTTCATGCAGAT +TGTGTAGTGAGAATAAAGTTGGTTTTTCTGATCAATTTCTGTGAATAACC +CCGATTCCCATGAAAATAGGATTTTCAGCAGAAACTCCAGGCAAATTGAG +TTTTTGATGCGAATAATGGATTAAAAAAGATAAAAATCCATGAAAATTTA +ATTTTTTACATCAAAAATCCATAAAAATCGGATTTTTGAGCGGATTAGAT +TACTTTGCCGAAAATTCATTTAGAAAAAAACAATTCTTGCGCGCAATTGC +GCAAAAAATTCAGAAAAATGAAACATAAAAGTTAAAAAAATTATTTTTAA +ATATTTTTGAGCTGAAAATAAAAGAAAATTCGGATTTTGTGGCCAAAAAT +TCAAGAATATTCCATGAAAATTAGTTTTTGTTTGCAAAAATCCGAACAAA +AATTAATTCTTTTTTTCGCTAAAAATTGAGAAAATGGTTTTAACACAAAA +ATCCACAAAAAATTGATTTTTAGGCCACAAAATCTATGAAAATTCTGATT +TTCTGGAAAAAAATTAAGAAAATTCAAATTTTTTGTTCAAAAACTTTTTT +CAAAAGCATTTCAAGAAAATTTGATTTTTCATCAAAAAAATTAATAAATT +TTAGCGGGCAAGCCTACGACAATGGAATTGTAAGAATGAGAGCTAGCGGA +TGTGACGTGGCTCCGGGAGCAGCTTCAACAACCTCCAGCACATCATCACA +TCATTCAACTGCAGCAAACCATCAGCCATGGTTTCATTCAATGATTAGCA +GAGAAAATACTGAAAAGTAATATTGAAAATGTTGGAAAAATCTGGAAAAA +TGCGTCATTATCTCGGTTTTCAGTCGGTTTCCAGTTTTATGAAAAAAACG +TCACTAAAAACGACATATTTCTTTAAAAACGTAAACATCGGAAATTGTTT +GCATGAAAAATGTCGCGATTTTCTAGAAAATTTTAAAATTTAAAGAAAAA +TGTTGAAAATAGATTTTTTCTGAATTTTTTGAATTTTATTAGAAAATAGA +GAAAACATCATGTTTTTCAACGTTTTTCGCCTGAAACGCTGGAAAATGCA +GAAAATGGTTTTTTTAGTGGAAAATAAAAATATTTTTTAAAAAAACGAGA +ATTTTTTTTTCGTCGAAAGTTCCAACAAAAATTACATGTTTTACAAAACA +TGTAATTTTTGTTGGAACGTCTTTTTTTTCTGAATTTTTCTTTGATTTTG +TTAACTTTCTGAATTTCAATTCAAATCGGCAAAATTGTACGCGTCTTTTG +AATGTTCCCACATCTATTTTGTAACGAATACGGGAAAAAATATTTCAAAA +AGGCACATTTTTCAAGTGTTTCCGCCTTATAAAAAATTGCCACTTCCGGC +AAATTGATATCCGACATACGGCAAATCGGCGAACTGACAAATTGCCGGAA +TCGAAAGATTCCGGCAAACCGACAACCCTCCGATTTGCCGGTGTCAGAAA +ATTGAAAAGTGTAGAAAAATGACGTCACAACTAAATTATAATACATGTCA +TTTCTTAATGTATTTTAATACAGTTTTGACGTCATTTTTCTCCACTTTTC +ATTTTTCCGGCACTACTTGAATAACCCTGTATACCACTTTTAAAAATCAT +GTTTTACAAATTTGTAAACTCCAGATTACTCCGCGGCAAACCGGACGGAA +CCTTCCTAGTACGTGAATCCACAAATTTCCCCGGCGACTTCACACTATGT +ATGTCGTTTCACGGAAAAGTCGAACACTATCGCATCGAACAGACGTCCGG +CGGACAGCTGACCTGTGACAAGGAAGAATACTTTTCCAATTTGACACAGT +TGGTCTCGGTGAGTCATTTCGGGGCTGTAGAGACAGGAAAAGGGGGAGGA +ATGGGTAATGAACCGGTATTCGTTAGAATGAGACAGGTGAAGATAACAGT +TATGATTTTATATTGTGAAGAGTTGTATTATAAAAAAGGTTGGTAGAGAT +TTGAATATTTGGGAGAAAGAGAAAATGAGGGGAAAATTGTTAAAATTCGG +ATTTAAGTTTGAAAAGTCACAAAAATGTTATATAAATATTTATATTTATT +GGATATAACTAATTAAGATTTTATATAGCAATACTTTAAATAATATGAAT +ATTGTTAAATATTGATATAATATCATATCAATCTTCATTTAATGGCTAAT +TGTAATTTCCAATTTTCCAGCACTACAAACGTGACGCGGACGGTCTCTGC +CATCGCCTGGTAACTCCGATTATTTGTGAAACAGCGACATTTTCATCAAA +CGGATCATCGTCATTTGGTTCATCGTCGACAGTTGATTTGGAAGATCGAA +CATCAGTATTTCGACACGCGGGTCTAGTTATTTCATCGAACGATATTGAT +GTTGGTGATACAATTGGACACGGAGAATTTGGAGATGTTCGATTGGGAAC +TTATAAGAATCGAAAAGTCGCGTTAAAAGTGTCGAAAAGGCATGGAAATG +GAATGTTGGACTCATTGCTGGATGAAGCCAAGTTTATGGTGTAAGTTATC +AAATTATTGAGAAGAAAATAATTTTGGCAAAGCCCAAAACAATGCGTGTT +GAACAAAGTCATATTAAAAATATATTTATGTTTCAAAAATGTTATAATTC +AATTTATTTTTAATTTCGAAAATCTATAATAACAACTTTTCCAAGTTTTT +AGTTATGAGTACCCCGAAATCTGAAAAATATTTTCAATTGACTCCAAATT +TGCCCCTGATTCCGAAAATCTAAGTGAAAAATTCGAAACCTGATGGGCGC +ACAAATTATTTGTACAATTTAAACACTTTTTAATTAAGATTAAAAATTTC +CCCAATTTTTTTGAAAATATTCATTAATTTCTATTTTATGTTAATAATTG +TTTCAGTTTATAACGCTTAGTTTCAAAATTTTCAATTTTCCTGATTGATG +ACTGATAACTAATGGCAGCAAATAAATCTGAAAAATTCCGAATTTTGCAG +AGGTCTCTCGCATCCAAATCTAGTGACACTTGTCGGTGTTGTACTGGATG +ATGTGAATGTCTACATGATAACTGAATATATGGCAAATGGTAATTTAATC +GATTTACTCCGATCTCGAGGAAGGCATGCGTTGGAGAGGAGGCAGTTGAT +GATGTTTGCGATGTAAGAAAAAGTTGACAAATAAAAAAATTGGAATTTAA +CATTCAAATTAGATTGTTCAAAAATTGTTGACATTTCTGCATTTTTATAA +CCGAAAAATCGAAATTTCATTTTTTCCGCCTGAGTAATAGTAACTCACCA +CGAAAAGGGTTACTGTAGCAGTGCTTTTAGCTGTCTCTGGTTTAGAAATT +TACGATTTGCCGGTCTGTCTGAAACCCGATTTTTTCTAGATTTTAAACAA +TTTTCTAGTATCCTGTTGAATTAATTCCAAAAACATTTTTTGCTAAATTT +TTGTTCTCCGTTTGTCAAAAAGCCGACTTTTTAAAAACACTGCAAAATTT +CTCTCTAAGTTTTTTAAAAAATGTTTGCCCTTGAAATCAATGCAGTTTTT +TTTTTAAATTTTATTTTGGAGTTTTTTCGAACAAAAATTTATTTCTTCGT +CTATTCGTACCGAAACACCAAAAAATCCACTTTTTTTCGAAATTTTTTTT +TTCAAACAACAACCCTGATTTCAAGGGTAAACATTTTTTAAAAAATTTGA +AAAAAATATTGTAAAATCGGTTTTAGTTTTTAAAAAAAGTCGTTTAAATT +TGTTGCACAATTAAAAAAAAATTTAGAGAAAAATCTTGCAATGCTTCAGG +CACACAGCTGAAAATGCTAATACAGTACTGCTTTTCGTGGTTGGACCCAG +CCGTGGTTACTGTACCTCCGGCAAAGTGGAAAATTCGATTTTTTTTTGTT +AGAAAATTTGAAAATGTCAAAGATCTGATTTGAATGTTTCAATTTTTTGT +GACAGCTGTTCTTGAAATCCAAATTTTCAACTTTTTTTCCATTTTCAGGG +ATATTTGCCAAGGAATGTGTTATCTGGAATCAAAACAGATTGTTCATCGA +GACTTGGCCGCTCGAAATGTCCTACTCGACGATGATTTAGTAGCGAAAGT +TTCGGATTTTGGATTAGCGAAAAAAGCAAACAGCCAGTCACATGATTCGG +CTTCTGGCAAATTTCCGATTAAATGGACGGCACCTGAAGCGCTTAGACAT +AGTGTAAGTTTGGTTGAAAAATTGAAAAAAATTGAGAAATTTTTACAGAA +TTTATTCTTTCAAAATTAATTTCATAAAAAGAAACAAAATCATTTCAATT +TAAATTGGAATTTCGTATTCTTAAATTTTTAATTTAAAAAAATCGATATT +TTTTAAATTTAACAAAATTTGAAAAAAGAAATCCGAAAATCCGCCGGAAA +ATGCCTCAAAAATCTGCACTTTTTTTGAATTTTTGCCCAAATTTCTAGAA +TTTTCCCTTACTTCATTTTCTTTCATAGCTTGAATATCAAAAATGGGAAA +ATCTTGAAAAAAAATCCCAAAAAAATAAAAAACGAAAAAATCGATTTTCA +AAAGTTTATTTTTTAAAAATTTTTGTGATTTTTTTCTGATAAAAATAGTT +TGAAACTAGAATTTTTAGAAAATATCCAAATTTTCTAATAGTTCGCTGAA +AAATGACTCAAAAATCACCAAAAAATATTGAATTTTCGCATAAATTTTTC +AAAAATGGAATTTTTTTCATTAAAAATTAAGAATTTTTGCAATTTTCCCA +AATTTTTTTTAGCAATTCACCACAAAATCCGATGTTTGGTCATTCGGCAT +TCTTCTTTGGGAAATTTTTTCATTCGGACGAGTACCGTACCCAAGAATTG +TGAGTTTTCAGACAACATTTTTTTAATTAAAAAAATTTCTCGTGTTTTTT +CATATTATTCCTCGCTAGCTGTTGTTTTTATTTCTTGGTTGGGATCATTT +CTTTTAAGGCTCCACGGAAAGGGAATCTGCGCGAGAATGAGATCTACGAA +AATGAGCGACGATGCTAGGATGAAACGCGCGTTGTTTAGCATGTTTTTGA +GACTTTTCAAAAATAGAGTTTTTAACCTTTTTCGTAGATCCTCTTCTCGT +AGATTCCTTTTTCGTGGGTCCTCTTCCTATATTTTCGGCTTGGCCGAGTT +TTCTCCCGGCCACGATCATCACTATTTTATTGATTTTTGTCATGATTTTC +AGGGAGCCGATGGTCGTTCGGTATGTGTATATTGCATTTTTTCGTTAGGT +TCTATTTTTTTCAAATTCTATCCACTTCTATTATACAATACATCTCACTA +TCTTTTCCCCTGCCTGAAATTTTTGAAAAAAAATGAAGAAATTTTAATTT +TAAACTTTTTCCAAAATTTTCCAAAAAAAAAAGTTTTTTATCAATTTTTT +CAATAGCTCGAATTCTGAAAATGAAAAATGATGATAGAAATTGTTTTAAA +AAAATGATTTTTTTTGTTGCAAAATTTAATTTTTAAATCGAAATACAACG +TTTTTTTCAAAAAAAAAACACCTTATTCGAGACAAAAATAAGTTTCTTTT +TTTTTCAAAATTGAACATTTGGATTTTCTGGAAAATTGAATAATTAAAAA +AGATTAAATGTTCGAAAAAAAGAGAAATATATATTTATTGGCTTATTTGG +AAAAATCAAAAAATAGTAAGAAAAAAATCAAAATTGTTCGATTTTTTATT +TTTGTTTTGTTTCCGAAGATGTCCTTGTGGTATATAGTATTTTGGGTCCC +ACCACGAAAATCGCCGTGCGCCTTTAAATTCGGCCGACCACGTCTCGCCT +AAGTCCAGTCCCCCACTAATGTCAAAAAAAATTTTTTTTCGCAAAACCTG +AATTTTGAAATCTTTTTTCCAAAAAAAAAAACGTTTTTTATTTAATTTTA +AAAGAAAAAATTGGAAAAAAGTATTATTTTTTCTAATTTTGAACTTTTTA +AAAAAATTATTACACATTATCTGGATTTTCAAAATTTTCCATATTCGATG +TTAAAAATTATAAACATTTTTCAGAAAGGAACACCGAAATTTTTTTTAGA +AAAAAGTCAAATTTGGAGCGAAAATTATGTTTTTCAACAATTTTTCAACA +ATAATTTTGTTCAATTTTTTTTTTGGATCCAAAAAAATCTGTGTGTTTTT +GTGTGATTTCCGGCACCCAACTCCCCAGAATTTCCCTCAAAAACTCTATT +TTTTTCTCTGCTACCTCTTCTCAATCTTCTAATTAATTCTATGTTTTTTC +ACTATTTTTTATTATTTTTTCACCCCTTTCAGAGCTCATTTAGAGCCCAA +AAATAACCATCAGAAACACAAAAAAAACAACTTTTTTTTTCTTTTTGAAT +TAATTTTTTCAGCCTATCCAAGACGTTGTGAGATATATTGAGAAGGGATA +TCGAATGGAAGCACCTGAAGGATGTCCACCGGAAATTTTCAAGGTTATGA +ATGAAACATGGGCTCTATCGGCACAAGATCGACCGTCTTTTGGACAGGTA +GATTTTTTAAGGGAAATTTTTTAGAATTTTAAGACGAAAATCTGAAAAAA +AAACCTACAATTATGGCAATTTTTGTGATGAAAATCTACAAAATTGAATT +TTTTTTCGAAAACCTGATAGAAAAAAGAATAAAAATTAATTTGAAAAATG +CCGTAAATTGACCAAAAATATGGAATTTTTACCAAAATTTCAGGGGTTTT +TCAGATTTTACCAGAGTTTCTGCCTTATAAATTAAAAATTTCAAAGAGAA +ATATCTTTTTAAAATCAGCATTTTTAGCAATTTTTGTGATGAAAATTGTT +TAAAAATTGCCAAAAATGGGAATGTTTGAAGAAAATTCAGAAAAATCTAA +ATTTTGGGCAATTTTTGCAATAAAATTGAAAGATTCGGAAAATAATAAAT +TTTTCGAAATCCTGAAAAAATTTCACAATTTTTGCGAAAAGAATTGTAGA +AAATTCGAAATTTTATTATTAAAAATATTGAAAAAAACAAAAAATTTCAA +AAATCCGCCGAAAATGGCTTAAAAATCTAAAAGAAAAGATTTTTTGCATT +AAATTTTATTGCATTAGTATTTGCGAAAAACAAACCCGAAAATTCACTCT +AAAATTACAAAAATTTCAATTTTCTGAAGCTGCGTCCACAAAATGGCTTT +AAAAATCATTTTCATGTACAATGTCAAAATTTTAAAAAAAGTGAAATTTT +GTTCAGAAGTTACTATAATAAAGGTTTTTTGCTTTCCGGTCATTTTTTTG +AATTTTGGAACATTAACCGAAAAAAAAATTCCGCATTTTTACCCTGAAAA +ATGCGAAAATGTTTGAATTTTTTGAAAATTCCACTTAAATTTTTTATATT +TTCAGGTCTTGCAACGATTGACAACCATCCGAAATACAGTATGACATCGG +CGCACATCTCGAAACTTCCAGAAATCCATCGAATTTTCTAGTCCATATGA +TGTTTCTGTTTTTTTTTCGCTGTTTTTTACACAAATTTTGTCATATTTTA +TTGTAATTTCCCCCACACATTTTTAGGCATCGTTATTATATATTTTCTAA +TCCGCTGCCGCGCATTTCGGGAGATCCTGGAATTTAAAGGCACAGCGGTG +TTTTTGGATGGGTCCCACCGCGAAAATTATTATGCGCCTTTGAAATTTTT +TTTTCGATATTCCTAAAAATCACAAAAAACATTTTTTTTTCTAAATTAGA +GAAGTTTTAGCCTTTCTTTCGAAAAGTCGATTTTTTTGGTTTAAATTTTT +CTCGATTTGAAAAAAATTTCAATTTTTAGTGAAATCAAAGAAAAAACATC +AAGAAACTTCTATTCGAAAGAATTTTTTAAAATTGTATTTTCCGAAATAA +TTTTCTCTGTTGGGTCTCACCACGAAAATCGTCATGCACCTTTAAATTTC +CTTGCCCACACCTCTCCGAACGGCCGATACCCAATCTACGGTTTCTTCAA +TTTTCCTCTGCATTTTTCATCGATTTTTAAAAATTTTTATCGAATATTTT +CACGTTTAAAATTATTAATTTGCAACGAATTTCCCTCAACAAAAAATGCT +CAAAACTCCGCCCCTTTATTGGCATAATTTTTGCCGCCACGGAAAATGCA +ATAAATTTTATATTACTTTTCACCGATTTTTTAAGATTATTTTTAGAACT +TTTAGTTTTTGTATGTGTGTGTGCTCGTAGTTTATCGGCAAGAAATTATT +GTTTTTTTTTCTTAATTTTTAATAAGGGAACATTTTTTAAAGAGATTTTC +AGAAAATCGGTAAAGATTTAAGATGCTCTGAATATGTCCAACTTCACAAG +CTTCGTAGATTACGCGGAAAAGGTGATTTTGAATGTTGAATGTGCAAGTG +CGCCCTATGGATAAATGGAGCAGCGTGGCAATTCGAAAAAATTTAAATAA +AAACGTATTACTCGAAAGTTCACAGAGTTTTTCACCAAAAAATTGAAAAT +TCGCTGAAAAAGTCGCGGGACATTCCATTTTTATTTTTCGGGAAGAAATT +CGAGAATTGACTGAAAAAAGCGCGTCTTTATTCCAGAAATTTTAGCGGAA +AATTCCAGATTTTTTTATTTAAAAAGACTCAAAACATGAAAAAAATATTC +TGGAGAAATCAAAAATTGATTGAAAAGTTTCAGTTTTAGGCTAAAATTTT +TTTGCGAAAAATTCATTTAGTTTTTGTTGGTAAAGTTCCGATTTTTCTGC +AAAATTTGAATTTCTTTCTGAAAAAGTGCGTTTTTTTTTTCAAAAAATTC +TTAAAATCGGAAATTTTTCGCCCCGAAACTGAACCGAAAAATTCGAAAAA +TGAAGCGCGGTCAATTTTTGAATTTTGGCATTGAAAATATTCTGAAAAAT +TCAAAAATTGACGGAAAAGCTCAAAGTTTTTCACAATATAATCGATATTT +TCTCATTTTTCAATGTAAAAATCGATAGTTTCAGCCGCACGTCGCAGCAA +CAAGCCAGACACCAATACCGCCGTCACCGTTGAACGAAAAACGGCCAATT +CCACTGTTTCCAAGTGAATTTATCGATTTTTCTGTAGAAAAATCTGAAAA +TTTCTGAATTTTCAGACGAAATCAGCAGCTCTTCGTGCGATGACGTCTCG +TTTTCCATGCAATTCGGAGAATCTGAGCAGGGACATGTTATATGTACAAA +AGTTCGACTTCGATTCTATCCTGCCGTGGTGAAAGAGGTTCGAAATTCAA +AAAAAAAAGAAAACAATATTACTATACATTCGTAAAAATTAAATTTCAGT +TTAAAGGCGCATCTCACATTTAAATATGCATTATTATTCATTAGTGTCGA +CCCAGATTGAAAGCGCATGCAAATTTTGAACTTTAAAACTGGAAAAAAAA +TTTTATTACACATTCGTGTAACATGAATATTCAGTTTAAAGGCGCGTGCT +AGTTTTAAACTTCAAAAATTAAAAAAATATATATATATTCGTGAAAAAAT +GTTCTTTTTAAAAGCTCTTTAAAAGTTTAAAAATGATTATTATACGTTTC +TCAAAATTTTCAAAAATCAGGTCAAATGCACTTAGAATATTGGAAAATAT +GAAGAAAAATCTTTATACATTCCTGAAAAAATACGCATGGTCCAAAAGTC +GACAAATCGTGGCCCAATATTTTCTATTTTTTCAGAGAAATCTTCCGCAA +AGGTCGAAATTTTTCGACGATTTCTACGATGTTCCGCTCACTGCGATCGC +CAGAATTGAGGTGGCCATTGTGAAAGGTGAAAAATATTTTTACTTCAAAA +AATTATTATATTAAATTTACACATACACATTCGTGAAAAAATTTTGCCGG +TAAAAATAATGAAAAATATATACAGAAAAATACGGAAAATTTGGATGAAA +AATATTTTTTGGTGACAATTAGAATATTACCCGCAAACAAAATTCAAATG +AATTTTTTCAGTTTTTGAAAATCTAAACAAATTTCAAAATGTTATTATAG +ATTCGTGAAAAATTAAAACTCAGTTTAAAGGCGCATTTCATATTCAAATG +AGAATTATTAAACATCTTAAAGACATAAACAAAAAAAATGAAATCTGAAG +AAGAAAAATTATACATTCGTGAAAGTTGGGAACAAAATTTTGTAATTTGA +GTGCTTGACATCAGTTTAATAAAAAATTTTGAAACCAAGAATTTTCGAAA +AAAAATCACGTATACCTAAAAACTTGTAGTTTTTTAAAGTTAGTAAACTA +TTTTCTCTCAAAAATTCAAAAATATTCCAGGTAACAGTAAAGGAAAAGCG +GACAAATTTCAACGCCTCGAGACAAGCCTATCAACAATGGAAACAGTATC +AATAATTCGTCTAATATTAAAAGATGTCAGAGTTGTTACAATTGACTTGA +GAAGATCTCAAAATGCAAATATATTGGCTAATCAAATACTCTACTTCTCG +AAAAGTGGACCAATCGAAAAAATGACTCAAGTTGGAGCTGCTATGGAGGA +GCGCGGTGTGAAGGCGAAAAGTAGGCATTGGATAATTAGTATTGGGGGTT +GGGAGGGGAGGGAGGAAAGTTCGAGAAAATCGGAATATCTTATAAATATT +AGGCCGTTTGATTAATAAATCGACACATCAAGTAAATAAATTTTGAAACA +AATTAAAAATTTTAAACGTTTCGATTGATATATGTGTATCAATTTTTTAA +AAGATGTATATTAGTAACAAACAAGAAATTTTTTATTCAATAAAAATATA +CTACGGGAGCACTAAATTTTTAGAATGCGTATTGCACAACATATTGGACC +AGATATCTCGTAGCGAAAACTACAGTAATTCTTTAAATGACTGGTGTCGA +TTTACGGGCTCTTTTTTCATTTCTCCGTATTATTTTCTCTTTTTTGCTTA +ATTTTAATATTCTATCTATAACTAAAAAATTAATTCATTTCGAAAATCTA +GCCCGTAAATCGACACAAGCGCTACAGTAGTCATTTAAAGGATTACTGTA +ATTTCGCTACGCGATACTTTGTGCGTCAAATATATTGTGCATTACGCATT +TTCAGAATTTTGTATTCTCGTAAAATTTTCGAAAAAGTAAATCTTTTAAA +AAACTTTTTATTAAATGAATAAAAACGCATTTTTTGGAGAATCGGATAAA +AATAATTTATTTAAGTTTTGAAATTTTAATATTTTGAAAAAAAAACATTT +TAATTGGAAAAAAATATGAAACTTTCTTTTGTAAATATGGTTTTTACAAA +ATTCCCGAATGTCCAATATACAAACTTATCATTTTCCAGCAGCATTCAAT +AGCTACGAAGCATGGCGTTCGGAACTCCAAAGGTGCCAGCAGAAGACGGA +TTCAACGTCTGTCTGGAAAATTGTAGCACTTAACAAAGAAGGCTTCAACT +ATGCGGCTCAGGGGTGCGATTTTTGGAAAAAAAAACTGGATTTCCGATTA +AAAATATGTATAGTGTATGTATTTTTCAAATTTCCAGCTACCCTGTGTAC +GTAGTAGTGAGCAACTTCTTGGATCGTGTGGACATTGAACGACAACTACA +ACACTACAAACAAGGACGATTTCCGATTTGGGTGTGGTCTCGGGCGAACG +GACACTCGGCGTTATTCGTTTCGGCGGATCACGAGAACAATATCGCGATG +CCGGCGATTTTGGCGAAGTTAGTACATTATATTTTAAATTTGTAAAATGT +TATTTTCTTCCAGAATGCAAGAAAGTATCACCCGTTGCCATCCGAATAAC +GAGAAGCCGCATGTTATAAAATTGGACGTCGATTTTGTGTCGAATGTTGG +AAAAGCTTTTGATAATCTTCTATCGCTTTGTGCAATTGGTGAGGGAGAGC +ATTTCATATTTAAAAAAAAAATTATTCGCCGTATTTTCAGCTGTTTTATT +TTCCATTATTTTCCATATAATATGCATTTTCCCAGACTCCTACGAGCAAT +ACGTTACACTGCAAAACGGTTGGAATACGAAACTCTCGAGAACCGGATGG +CTTCATCTCGTCAAGCTGTGCCTTCAGACTACCTATCAGACGATTCAATG +GATTGTTGATAGAGATCGATCGGTTATTTTACAGGGTACTATCAATTTAG +GTTTTATTCAGAGAGAGAGAGATAGCAATTTTCAAAATATTTACAAACGA +GTTAGTCATTTACATCCACAATCGACTGATCAGAATCTGATTCGTGGCCG +TTTTCCTCCGCCAGAACTTTAGGAACTTGCACCGAACTACTTGCTGCTTG +ATCCTCCCCCTCCAGAGGATCTTCTCCAGATGGGATACAACTTTTGATGA +AGCTGATCATCGGATCCTCCTCTCCATCATCCGAACTAGTTTGTCTGAGA +AGAATATCAACCAGTGTCTGCATTAAATCACCGGGTTCCGTTCTTGCTCG +TCGAGTACTCAAATTGTGTCGGCACATGAATTTGTCCAACCATCCGCGGG +ATGCCTGAATGATAAAGGTGGAGTAGCGTCGAATAGTTATTTTTTATGTT +TTTCATATACTCGTTTGCATTAAAGTAACGGCTCTCCTACCTTGAAATCC +GAGTTCTGACTGATTTTCTTTGCCGTTTCACCGATGATTCTTCTGGTGAC +TCGTTGTTTCTCCTTACGCTGATCGTGAACCCATTTAATCAATTCCTTAT +CAACGTCGTCATACTTTAAAGGTCTTCCACCTCCCGAAAGTCGCTTTGTT +GCAGTTGAGCCGTTGTCTTCTCTGAAAGTGTTATTTGCCAAAAGGCAGCA +AGTCGAAAATTTTTCAAAAAGAAAATTTGAAATCGAACAGAGAATTTATG +TTATGCCCGTATTTCAAAATATAGGAATAGTTGAAATGACTTCTTTTGAT +AGGATTAATTATAAGCCTAGAAAAAATCATAGTATTTGGAATAAAAGGTC +CATAAAAGGTTCTCAATTGATAGGGCTTGCATGTAGGCGTTAAACCCTGC +CTTTAAGGCGACCTCCGCTTGCCTCTCGGTTACGCCTTACGTTGTGCTAA +AACATAATCAATCTTTTTTTTTTTAATTTCTATTTTGATTTTCATCAAAT +TGAGAGAAATGAGAAAATTAGAAATTTGTAAATTCACATAGGCGGGTTTC +AAGCAGGACCCTGACACCGCGCATGCCTAACATGAAAGTCCTAGCCCTGG +ACTATTTTTTTGAACACAAAAATATGTCCGAAGTGGCAAAACTTACAACT +GCTTCTCGATTTCAGCTTTCTGGGCGATCCACTGTTGAATGTTTTGCCTA +TTCACATTCAAATCCTTTGCTGCTTTCGATATATTCTTATGTTCAAGGGC +ATAATTTATGGCATGCAGTTTGAACTTCAAGTCATACGTTCTTTTTCGAG +TTTTTACAGGCACTTCTGTAGGGATTTCGCTCATTGGTTTTCTCTGAAAA +ATAAGTATATAACAAGCAAAGTCTTAACTAGTCAGCAAGTTATTGGAAAT +AAAACATGTGAGAAAAGCGAACGAAGGGGCAAGACTAATGGAGACAGCAA +ATTGGCACACACGCAGAGAGACATGGCTAATAGAGTGTGCAAGACTATTA +GAGGGTAGAGGCAGTGAGACGAATTTCCGACTGGTCAGAATGTAAAGGGG +GTGCAAGGCTAATAGGAGGAGACAAGACAAATAGAAAAACTACATTATTA +TATGAAAAGAGAGTAAGACTGAATACGTGTATGGGCACCCCACATATTTG +TATGTAAGTAGTTATGTAAATGACTTTTTGGAATTGAAAAGAAAATACCA +AGAAATGGAAAAAACCTCCGGTCAATCGTAAAAAAATTATGAAAGAAGTT +CGATCTACTCGGCTACGATTTACAGTTTCCCTTACTGATTTCTACTAAAA +TGCATTAATCTTGAGTTTATCTCTAACAAATTGATGGTACTTTTGAATGG +TTATAACACCGGATATTTTGCAGAAGACGAAGGATCAGATATGTCGATTA +TTGTGGCCAGCCTCACGCAAATGTGCTGTGATCCATTTTACAGAACAACT +ACGGGTGAGTTTATTAAATGTTTTTTCTGTACTAAAAATGGGGTTTAAAA +AACTAATTTTAATTTAGTTCTCTCCTTGTATTAATTTCCAAAATTTTCCG +AAATTTTTTTTTCAATTTAAAAATCAATTTCAGGTCTTCAACAACTCATC +GAAAAAATGTGGATTGCCCTTGGCCACCCATTTGGTGAACGGCTACTCGG +AAGAACCGACGATGATCCATCGAGACGTGGAAAACCACAGAATAAAATGA +GAACTGACGTCATGCCGACGTGGTTGCTCTTCTTGGATTGTGTTTCACAG +CTACATCGAATATATACATTTGAGTAGGTTTGGTAAAAAAAATGTTTTCC +CCCGCAAAAAAATGTTTTCCTCAAAAAATTTGAATTTCCCACCCAAAATT +TTTCTCAAAAATTTTTTTAAAAATTAATTTTAAAATTGTTTTCTCAAAAA +ATTTGAATCTCCCGGATTTAAAAATTCCAAAAATTCGTCTATCAATAAAT +TTTATATTGTTTTCGAATTTCGAATCATAAAAATTAGTTTCAAGTTAACA +CACCTTTTTTTTCAAATAATTTTTTTAAAAAATAATTTTAAAAACAATTA +ATTTTTTTCCTATTTCCAGATTCACATTCTCTCCACACGTTCTCATTGCA +CTATGGGATCTTTCATTAACTGGAATGGTTCCATCAATGGCTTGTAATAA +TCTGGAAGAGCAATTACTCGCAAAAGTCGGCGGTGGACCATTCCCATTGG +ATCGATATTTTGAGAAAAGCTACTCGAAATTATTTGGAAATATTTGGCAC +GACGCAGTGCTTTTTATGGAATCTATTAAGAAAAGTTGGTGCTTTTTAAG +AAAAAAATTTTATATCGATTGTTCTTTTCTTTTTGATTTCAGCTACTTTT +TCACCTCGGTCAGCAACATTTTTTTGCAACATTTTGGACGTGGCCGCGAA +AAAAAAATTGGTGGCCGAGTTTTCTTTTTTTTCACTGCCATTAATTTCCG +AGAGGAAAATCTAGGAATTAAAATTTTTTCGAAAGTATACAATGTTGACC +GAGAAAAAATCGGTGGCCGAATTTTCTCTTTCTTCTCTGCCACGTTATTT +TAGTAAAAAATTAGAAAAAAAAACACAAAATGTTGTTCAAATTTTGGAAA +AAGTCTGTAAAAACAACAAAAAATTGATGGCCGAGTTTTTTTTCCATTTT +CGCGGCCACGTGACAAGCAGTGATTTCTCAAATGTTTTCAGGAAAAAAAA +ACAGGCGGAATTCAGATAATTCTACGAAAACTATTTTTGTTTCTGGAAAA +AAGTCAAAACTTGAAAATCGGTGGTCGAGTGTTCTCTTTTTTTGCGGCCT +TATCTCGATTTTTCCAGATCAACCAGTGTGTGCTCACTCGAAATTCCTTC +AATGTGAATTTATTCGGCCACCGACGTCGTTTTGCGATATTCAACTGTGG +TCCGAGTGCTATCTTCGATGGATTCCTCCGGCAAACGGGAAAAATTCGGG +AAAACTCGGCGAAGAACTCTCGCTGGACGAGAAAATGATCGAAATGGCCA +AAAAATGGAACAGTTCCGAGTGGAAAATGCATTTGGACCTACCGGAAGAG +GTGACCAAACTTTTCGGATTTATAGGCTCTAAACATATTTTTACTTTCAA +TTCTTAAATTTTTAGTATTCATCTGCATATCCTTACTCGATTCCGGAGAT +TTCCCGCCCAATAATGGATTTAAATGATTTTGATATGATCCGGCCGGATG +ATTTCGACGCAATGTCCATGAATTCATCCATCAACTCATATTCGTTCGTT +AATACGGTAGGGATACTTATTAGCTCGGAAAATAAATTACGACATATTTT +GTCTCGAATTTTTTATAGGTGACTTATAATAGACATTTAGTGTCTAGGCA +CACATAAAAGTTAGATCTTTTTCAGCTATCGTTTCGATACTTTCACACAA +AAGTAGGTTTTTTTTCGCAACTTTGTAAATTGAAAAATCAATAGGTCGGC +AAATAATTGTTGGCGTATTTTGTCAATTCACGATATTTATCAAAAATTTT +ATATGTGACTTCTGGTAGAAATTTATTATGGTTGGCAATAAGTACTGAGT +CAAAATTCATATAACTTGTTTTGTTGCTTATCGATTTTTTTTAATTGCCG +GAGTTCATGTTATTAACTATGGGCTTTCATCTGAAAGTAGTCACAAAACA +TTTTAAAAACTCGAAGTGATCCAAGTCGAAACAACTTTTTCAAGGTTTAC +GCCCTCTTATTTTTTCTCTTATTTTCAGCATTGATATGAGTTTATGTACG +GATCAAGAGTGTCGGTTTTTCGATTTTTGTATAAAATCGAAATTTTTTTT +TTGATTTTTCGATTTTCCCTGAAAAACACACAATTGAAAAATGTCACTCT +TGGTACGGATATTGCTAAATCTATTATACAATGTGAGAATCCAATAAAAG +TTTTATATTTTATTTTTGCAGCCAGTTCGTAACAACACGACAACAGAATC +AATATTCGATCATTCGACACTTGAGAAACGAAATCGTCGAGCAGCTGCCA +CAATTGGCTCCGAATCGCCAGTTATTCAAATGAGACCAAAAGAGCAAATT +GTCGGATATTCGAAATATGCATTTGATGATTCATTATCACCAAGACCACA +TTCCCGAACTGTTGAGACGCCTATTTTAAAGAATGTACGGGATTTTTGGA +AAATTTAAAATTTGGAAAATGAAAAATTCAAACAAAACAATTTTTTCGCT +TTCAAATTTCATAAAAATTTATGTTAAAAATAGTTTTTAATTGAATTTTT +AAAAAATTCTTACTGAAAACTTTCGAAATAATTAAAAAAAAAAACCAAAC +TATATTTGTTTTTTTATTATTGAAAAAGTTAAAAAAAATGTTTTTTTTTG +ATAATTTTACACTGCTCGCCTTCAAAATTTGATGAAAATCTTAATACTAG +ATTTTTTTAATTTTTCATTTAAGACTTTTCGAAAAATTCGAAAGTTCAAA +AAAAAAAATCCAAAAAAAATTTCACTCAAATCAATCGCCTTCAAAATTTG +ATAAAAACTCGATGAAAATTTTGTGGAATTAAAATGTTTAATTTATCATT +CTGAGGCACCACGTTTTCAAGATCTGGTGGGATTCCGGATCTACATTCTT +CATTTCAGCGCAAAATCCCTAAATCCGGAATTCCACCGAATCTTGAAACC +GTGGTGCCTCATCATTCTACCTTTAAAAACTCGAAATTATAAAAAATTCT +GCAAAACTCTAAAAAAATTTGATTTTCGATTTTCGAAAATTTATCTTTTT +TTGTATTTAACATTCAATAAAATATTGATGAAAAAATTTCTAGAATCATA +TTTTTAAATTTTGCTTTACAAAATTTTTTAAAAATCTACAATCGTTCACT +TTCTTGCCTTCAAAATTTGAATGAAGAAAAAATTTCAAAAACTTTAAGAA +TTAAATTTAAAATGTGCTCAAAAAATTCTTTAAAAATATGCGTTTCCCTA +ATAATTTCCATGTTTCAGTCCCCATCGCCCTCCTACACCCCATTTCACCC +GACGCCCGTTCACCGGAGCAACACCGTCCGTGCCGACAATCGCCCAATTC +CCCCGCCCCGGCCGCAGGGCCTAGGAACACTGACCCCATCACGCCCCGAG +ATAAGAGCTCATCGAGATGATATAGAGAGCCCTACTATTCGGGTCACAAG +ATTTTGAAGAAAAAGTTATACTTTTATTTTAATTTCCGCTCAAAATCGGA +ATTTTCTATAAATATGCCCCTTTAAATTGAGCTACAGTAGTCACGTCGAA +AAATTTTCAATTTTTTTGAATTTTTTGAATTTTTTCATCCAGTTATAAAA +AAAAGCTTGCCCCTCAGCTCTACCCCATTTTTCTTTCATTTGTCACCTAC +TTCCTTCATATATGCCCGGTTAATTATGTATTTTTTGATGCTTTTTTAGT +CCCGCCACGGTTTTTTTCATTTCGAAAAGTCGATCGATTATCGATTTTTT +CGAAAATTTGGAAAATTTGGAGAAAATCTAAATATTATCGATTTTAAGCT +AAAATTTTAAGATTTTATTTTTTCAAATAAAATTTTGGAAATGTTTTTTT +GAAAAAAAAAGTGCAAAAAACACTCAAGGGAAAATTTTTAAAAAATTGGA +AAAAATCTTGAAAGTTTGTCGATTTTTCAGTGTGCATTTTGGAGTTATCG +GTTTTCTTTCCGAATTTTCATCAAATTTATTTTTGATCTACCAAAAAATC +TCTAAAACCCCGTGGCGGGAACCTACTTCACTTTTATTCTGCATAATTAG +ACAAAAATGTGATTATTTTCTCATTTCAAAACTTGTTACATATTTTGTGT +TGTGATACATATGTCTAAATTTTTACGGACTGAAAAAATACTTTTTTCTT +AATGTTTTTGAAATTTTCATTATTGGTGGCGTCATCCATGTCTATTTGAT +TTTCCAGACCCCCCCCCCCCCCTCTAAAATGAAAAGAACAAGACGATTGA +CAGCTAGCATGTGCGACGATCAAAGTACCGATTTGATTCAGTTGAAGGGA +CACGAGAACGCGAAAAAGTCGGAAGCCCATCTGAAAGTGCTGGAATACAT +TGGCAAACGCTATGAAATTCGAATGACCGACGGGCGGTACATACGTGGCA +CGATGATCGCCACTGACAAGGACGCTAATATGGTTTTCAACAAGGTAAAA +ATTTGATGAAAATTTGGTCCAAAAGCTCATAGTTTCAAATATTTTTCCTT +AAAATTATGCGCCGCAGGCAAAACAACATTCCGCCTTTTTCTATTGCTGG +CTTAGTTTTCGCAATTTTCTCGGCTACCAGGCTACTGCAAAAATGATCGA +TCCGTGAATTGTGCAAACATCTGAGTTACACAGAGAGTGGGGCGGAGCGG +AACATCGCAACACTGCCGCATGGTTTTTCCACAACTTTTTTTTTCAAGAA +AGTTTCGATTGTACTTGTTTTTTCTCTTTTTTGTATAAAAATGGTATTCA +TTCTAAATAATTCACATATGCTAAATTGCATATAAAAACATGGAATATCC +TCTCAAATTAAGAGTCTTAATCTTATTAGAATTAAATTGGATATCATTTT +CATTCGAAAAGAAAGAGAAAGAAACAAGTAAAATCGAAACTTTTTTGAAA +AAAATGCTTGATTTTATTTCATTTTCGTTGTTTTTTTTTCAGTATTTAAC +TTTTGAACGATCGATTATTTATAAGGTTTGCGGTTAAAAACGATGAATTT +CGTGTTTTTCTTTTCTGGATTCAAATGACAAGTTTTAACCAATTCGAATG +TCAAACACGGAAAAATAAGCGAAAATAACAAAAAATCGTGTATTGTAACC +AAAAAGTTCTCCACGAGTAGTACACTAGAGCCGACGCGCCGCAGGTTTGC +AAAACAACGCCCACCCCGTTTTCTGCGCGGCACTACACAATTTTCTATCT +CACGAATAAATATTTCAGAAAAGGCAGGATTTTGTCTGAAAATCATGAAA +AATCAAGTTTTTTTCAGGTTTCCAACGCTGAAATGGTTGGAATTAGGGTT +TGTTACCAAAAATCGTATAATATTAAAATCCCAGTGCTTCTCTGTCTTTC +AGCAATTTCTTTTAATTTCCAGGCAGACGAGCGATGGGACAAAGATCCAC +AATTGAAAGGAGTCCGATTTCTAGGGCAAGCGATGATCTCGAAAAAGCAC +GTGGAATCGATGCACGCGTTGCCGGACCCTAAAGAAACTGAAATTTGATC +CAGTTTTCTTATTTTCTCTAATTTATCCCATCCTTTTCTTGTCAAGATTA +ATATAAATTAATTTTTATCGAAGCGATTTTTGTTCATCTCATCCAAATAT +GAAAAAACATTCGAAAAAAAATCGCAGGCTTCACTTGGAACCGCGCGCCA +TCAGTTGAAAATGCTGAATATGTACAAAGACTGGAAAATATTAAATTTTT +ATGTTTAAAATTTCACGTTTTTCGTGTTTTCCACAAAAAAGCAATGCATT +GTCGTTTTTCAAAAAACACTCATGGTAAACGAAGTCCCCGTGGAGTACAC +GACAATGTCGCATCGAGACCCAAACTGGGAAACGCCTGCGCCTTTAATTT +TAGGAGTTTAATTTTCGAATTTCGAGTTATTTTTTTCTCAAGTCTTTTTG +CGACTTTTTTTTGAATATTTTATTACTTGTAGCGCTGAAATATGACAGTT +ATTATAGCGTCACACTGGGGACCTCAATCAAAACAGGTATTTTTCAAAAT +TTCGCCGTAAAATCTAATAACCAATTTTGAGATGTTGCCACCCGAGCCCC +CACGGATAATTCTGAGAGAAGTTCCGGTCCAGAATAACCAGAAAGAGCAT +CCACCGGTTCAGGAGATCAAAACCGTGTCCAGCAAGTCGAAGGAACATCG +CGTCAGTTCGAGCCGGAAAATTCCGGATCACTTCGACGTGGGCCCACGGT +TCTACATGAATGTCCCCGCAGATGGCTCAGAAGTGTTCGAGGACGACGAA +AAAGACGTCGAGAACGAGTGTTGGGCGGTGATCGAAAGAATAGGAAGCGA +GGATGACAAGTTCGAGGCCTCTGAGCTTGTCGAGTACCGCGATCACGATT +GGTACATCGCTTTGGCGATTAATAAGGAGAAAACGCCGGATAAGGCGAAT +TATCAAGTAGGAATTACGAAAAAAATTTTGATTTTCAATCTCATTTTTTA +AATTTCAGCACCTACTCTACAGCTATCGAGGCGGAATCCAACGCATAATT +CTCACGCCTCAACAGACGGATTCGATCGACAAAACGCCGCTAGTGAAGTA +CAAAATCATCGGAGACGGTCTGTACGAAGTGCTTCCGATTCACTCGTCCC +TTCCTCAAACTGGTCTAATCAGTCCCAAATATCGTTACAACAAAGGCGTC +GAGCTTCGGATTTTCGGGATAGTGAATTGGATTGATTTTGTGTTGGACGA +CGATCATCAGACGCACAGGACGATGGTTTGGACAGATGCGGTCGGACCGA +TCTATCTGAGTGCCGCGGATCGAGCCAATATTCGACGGAAGCTGCTGCTG +ACCGAGATGCAGATTTTCGCGCCGTTGCGAATGTGCCATATTACTGTCAA +GGCGGAATTTAACTTGTTAGTTAAAAAAAACGCGTAAAAAGAATAATAGT +TCCAAATTTTCAGCTCGATTCCCGATGGTTCCCCGATTCAATGGACCATA +TCATCGTTCCAGCCACTCATCGAAGAATCAGAGAAGGATCCGAATATTGG +GCGCAATTTATGGCCAGCGAGAGTGCTCAGATTCGATGATTTAGTCGTTA +CAAAGAAGGTGCTTAATCATTTTAATATTAAATTTTAACAGCATTTATAA +TTATTTATCTATTTCCAGACACCGAACGGCTACTGGCTGAAGTCTCAAAG +ATTGGAGGGTCACGTTAATGTGTTCGCCGGTGCAAATCAAATCGGAATCA +TTGAGTCGGCGGGTGAAAAATACGCGACGAAGGGAAGCATGATGGCGTTC +GTGGTTCCGTGCTATCAGAACAGCACGTTTGCATATTTCGAAGCACTTAT +CGCCGGACCCCCGCGAGTCGTGATGATTATTACCGAAGGACGCTTTCTGA +ATTATTGCCCGAAGACGTGGCCACCGTCTGTTCGGAAAATGCGTGATCAG +TATCAGAAAGAGCACGTTGTAAGCACAATTTTCCAGAGAAAAAGGCTTTC +GATAAATTTTCTTATTTTCAGCTGAAATCCGAAGTCCGAAGTTCACCAAT +CTGCATGAAACAGCCGGATTATTGCCTCAAATCACTACGAGGATTCAGCG +AGTGCCCATTTTGATCACTCAAAGTTTTATATATTTCTGTATTTCTTCTG +CCTTATTTTAAAAAGTATTTTCCCGTTTTTTTTCTTTTGTTCAATTTTCC +CTTTTTCTTTACTTGTTATGTCCTATGTTTCATGCTAAATTAACGTATAT +TTTTAATTAGCAATTAAAAAACTTTCTGATTAACTGCCAGTTTTTGAGAA +GAACCAGAAGATGGAACGCAGGCGAGTTTCTGAAACGGAAAGGGTTTGAA +ATTTTTATCAATCAAACAAACTCGCAATTTTATCAAAAACCATGCCGTCG +TTGCAAGTTTGAAAAAACCACATATATTTGCAACATTGATAGAATATCAT +TGGATCGTATGGATCCGCCATGGCCAGACCATCATATTGACATGTACCGT +TTAATGGGTTCGGTACCGCTGAAAAGTTGTTGGTCTCGCAGCGAAGTACT +ACAGTAATCGGGAAAAAGCGCTGTTTATTATCGATTTTTCAATTAAAAAT +CAATAAACTCGCCGCGAGATCCAAACCTCAACTTACGATAGCTTTTTGTA +GGAGTCGATTGTCCATCGACAAGGAAGCCAGCAACTAGGAAAATTTTGAT +AGGGAGAAGCATTGAAATTTAATTGTTTTTGGGGGTTTCGTTTGGTTTTT +ATAGGAATTTTTGAGCTGTAAGAAAACAAAGCAACATTTATTTCAATTAA +AACCGGAAAATATGAGTCATTATGGGGGGTTTTCAACCGTACGGAATTTG +ACAAATTTGAAACTACCATAAAATGGTTAGTTTTGTAGTTAGAGAACTCA +AAATTGATTTTCTAAGACAAAAAAATTGCCCTTTTTCAGAATATAGTAGG +TTTGTATAATTTCCAAAAAATTATTTTTCTTGCCCAAAAAAAAAAGGACA +GTCATTAGATGGCTGAAAAAAAAGTGGGCAAAAATAAAAAGTGTCTAATT +TTGTTGAAAACGGGTAATTAATATATGCAGAATTCAGAAAATCTAGGTTT +AACCCATCAAAAACCATTAAAAAGTGGCAAAATTTGGCAATTTTTGCAAA +AACTCACAATTTTGAAACTCCTCTAAATTGGTTCAATTTGTAGCTAGAGA +ACTTAAAATTGGTCTCCAAGCCCTAAAAAATTGTCCTTTTTCAAAATATA +ATAGTTTTAAACGTTTTTAGAAAATTTATTTTTCTTGCCCAAAAAAAAGG +ACAGTCATTAGATGGCTGAAAAAAATTGGGCAAAAATAAAAACTTGTCTA +ATTTTGTTGAAAACGGGTAATTCATATATGCAGAATTCAGAAAATCTAGG +TTTAACCCATCAAAAACCATTAAAAATCTAAAATGATTAATTTTCTAGTT +AAAGGATTCAAAATTGATCTTCGAACACTAAAAAGTTGTCAAGTATCCAA +AAAAAGTGGTTCAATTTAAAAAAAATTGGTCAGGGAAAATTATCGATTTT +TTCGCCGCTTCGCAACTTGGACGAATTTTTCTTTCGCTGCACAAAAATCA +ATAATTATTGATTTTTTTTCTAAAACAGAAAAATAGATATTTGCACTTTT +CTGAAAAATCGATATTAAACGGAAATTCGCAAAAATCGATGGTCACTAAT +TTTTTTGCAAAACCCCAGTAAAAAATCGCTTTTTTCGCCGCATTTCCTAA +CTTTTTCTATGCCACCACTATTGATTTTTTTTCCAGAAATCTTGGAGCTT +CCCAAACCCGCTCATCAGCCCGCGTGCTTCATCAATCATTCCGTTGAGAA +CTATGAATTTTTCGAATGAAAATTAAAAAGAGGGGGATTTATTAGACTGA +AAAAGTGTTTTTGTGCCACGTGGCGGCGGCCGAATTCTCCTGGACGGGAA +TTTCTAGGCCATGCTTGGGGGAGATGAGGAGATGGGGGTCAACTTCAAAT +GTGTGTAACAAAATAGAAAAAAAACTATAAAAGGTAAGAGGAGATCAAAG +GTCACGCCACTAGGGCTTGAAAAATGAATGCCTGCCTACAAATAGACATG +TAGGAATGTAGGCAGGCAGACAAAAAGTGGGTGAACAACGAGAAAAGGCA +CGTAAAAAGGGGTAAAGGTTAGAGAAAAAGAAAAAAAAAGCTCACAGAGC +ACATTAAAAATGGAAAAAAACGAGATCATTGGATATTCTGCTGCCGTTGA +TCATCGTCATCATGAATGATCAGCTGATTGGGGTCGACTATCACATCATC +TGGGTTCACGAGAGCATCTTCTTCGAGCAGCTCATGCGGAGGATGATCGT +CGTGATGATTTGGCTCTTCCACAACCACTTGTTCGAGGACATCTGGAATT +TATAGAATTAAAAAATACATTTTTTAGACAAGAAATCGACAAATTTCTGT +GTTTATTTCTATTTTTCAGATAATTCCACACAATTAGATAGTTATTGAGT +TTTCTTTCTATAAATTTCAAAAAAAAAAACGATAATTATTGATTTTTTTT +TAAATCAAAAACTTCACAAAACTCAATAATTATATATTTTTCGTCCTCAT +TCTCTTTCGGTTTTGTGTGTGTGTGAGTTTTTTTGAAAAACGCAAAATTT +TCTGTGATATTTCTATTTTTTAACCTGACATGGGCAGTTTTTGAAAAAAT +TTTGCAATATTTTCGGTGTTCTTTCTCAGAAAAACTCGAAAAATTGCAAA +ATAAATGTAAAATTTATCAAAAATTACACATTTTCGGTCAACCTTTTGCT +CAGAGAAACTAGAAAAAAAATTTCAATTTTTTTGGAAACCTGCCAAAAAA +TGAAGTTTCAGTGAAACATTTTTTTTATCTTTTTTCGAAAATTTAATACA +TTTTTTTGCAATTTTTTAATATCAATCCGTTCTCACATTTTTTGGTAAAA +ATATGAGGAAAATGAATACATTTTGGATTTTTTCAGAAAATCTGTTTACC +ATTAATCTGCTTCTCCCCAGTATAAACTTTAGGCTCATTCTCGTGCACTT +CTTCTTCCAAATTAACATCATCACCAACTGTCCCAATAACTTCTTCCTCG +ACAATTTCCTCAACAATTGCTCCATCAGGAGCCATATGCAAATGCATTTC +TCCATTATCCATTTCTTCTCCAGCCATTATTTCGACTCCAAACGTGTGAT +TCACTATTTGCAAGAGACGATGTTCGCTTTTTTCGAAGAGCATTTTGAAG +AGCAAATCGAATGTCTCGTTGTCGGTTGCTGAGCCAAATGAGTTCTTGGC +AATACGGAAGAGAGCCTCGTTTCGACTTGTTCCACGCGCCATTCCAGATT +GTGATAGAGGCTGGAAATATTACTATTTCAGATTTTTAGATTAAATGTGG +AGCTGGGAAATTCGAGTTTTCGACGAAAAAATAGCCTTTCAAAAGATTAT +TTTCAGATATGTTTAAAAAAATTCCGTTGAAAACGACGAAATTAGATAAA +AATTGGATTTTCAGCACATACACGGGCGTTTGATTACGTGGCCGTGAAAA +CGGAGAAACTCGGCCACGTAGAAACAAAACCGAAAATTGCAGAAAATCCA +GGAATTTTTTTTAATGACTCCGAAATTTCCCCGATTCCGAATAATATATA +TGAATGAATATATACTTCTGAAAAATTTCAAAACCTTTGCTCACCAAGCC +ACTACTCTGCTGCCCAAAATTTTTAGGCTCCTGATATTTCGCCTTTCTAT +ACGGCAGCTGAATCTGTTGCTGGGATGATGATGGTTGCTGAATCTCTTCG +GTATGTGAAACCTGAAGCTCTCGCTCGTCGAGCATCTCCTCAAGATGCAC +ATTTTTCTTTCTGGATCCTCCTTCTGAAGCTTCTGCTGAAGCTCCAGCCG +TGTCAGTCGTCCAATCATCCACAAAATCACCCGCCGCCGCCGAAACAATC +ATCTCATTCTTCAACTCGTACGGATCATGATCATCTTCAGCATCCCGAAT +ACCATCAGTCGCCGCCATAATCTCACGACGTCGTACATAATAGAGCATCT +TCGTGTTCACCTGAATCCCATACTTCTCCAAAATCACTTTAGCCACCTCG +GCAGGTGTCATTCTCGCGAGCAGCCGGAGCACATCCTCCCGAATGTGTGG +TGGAAGACGTTGAAGTGAGTATTCTTGAGCGGAGCTCATCGTGCTCACGT +TAACCGGAGCCGGCATCTCGCCGGGCTCAGTGTACCAGTAAATGATCATT +GCGTTGCCGATTTGATGGCCATTCTTCAACACGTACGTCACCTTCTTGCG +AAGACGTGGACAACGTGGAAGGGTGGCTGTGTAGACGTGAAGCTGGAATT +CGATGAGATCCGAACGATGGGATGATGGTGTCTTCCAGATGCGTAGTTGA +CCTTCTCGATTCAGTGCGAGCGCTTGTTTCGCAGATTTTGGACGTTTTGT +GATGCCGTCTGAAAAACAGAAAAAAAATTAAGTTTGGAGCATCTTGAATT +TGCCTTTTTTACTTTCGGAACGTCCCACTTTTGCTGATTTTTCGTGTTTT +TTTTTCTGGAATTTTGGAATTACAGGGAATATCTCTCCAAACCAATTACA +GGGAATATCTCTCCAAACTGGCCGTCGCAGGAAGACTTCACTTTTAAATT +GTTTTACCTGTAGGCACAAAAGCAAAAGAGGAGAGCCTACATCTTTGTAT +CTTCGATAAATTCGTTAAAAGTTCTTTTGTCTTTATTCATTTTTCAAATT +ATTTTCCAATGTTTTAAAAACATTTTTTCTAAACTTTACAGTAATTTGGG +CTATTTTCACTATTTTTCAACTTTTTGACGCGTTTTTAATTCTTCATTTC +AACCTCATTTTGTCCAAACTTTCTCAAATTTCCACATAATATGGCCGATT +TTCATGAAATTTTCATAATTTACACCAATATATTCAATTTTTAACCTCAT +TTTTTTCACAGAAATTCCCAACATTTTCACATATTTTTCATGACAAATTT +GAATTATTTCAATGTTTTCCAAACTTACGAGGATTGGTGCTAGTATTCTC +TGACCACGGAACCAGTCCATCACACGTCAAATTTCTTGCATCAACCTCGA +GCCCATCAATAAAGAAAACGAATGCGGCTTTGCCAGTTGTGGCTGGCGGA +AGGGCCGGTACTCGATTGTTGAGGCATCCGAGGGACTCACTGCAAAAAAA +TCATCGTTTTTTCTAGAAATGGATCGATGTTTGTGTAACTTTATCGATTT +GCTAGAATATTATCGATTATTCCGAAAAAAAATTTTAAAACAACAACTGT +ATTTCTATTTTTATTTTTTTAAAGATAGACTTCCAGATCATATTTTTTTT +GAAAATTTATTTAAAAAATTATCGATTTCTCTAGGAAAAATATCGGCTAT +TCTCGAAACTTGTGGATTTCCCTAGAGATTTATCGATTTCTGAAAAAATT +ATCGAATTTTCTAGAAATTTTGCAAATTTTTTCATAAAATTATCGATTTT +TCTGAAATATATCGACTTTTCGCCCAAAAAGTACGTACATCAATGAAGGC +TTATTGACAAGAAAGTGTAATTGTCGATTCCTATCATTCCAGAACTCCAT +ATCCAACAGCTGAAGATACAAACTACCATCATCGGCGGCTTGTGCTGGCA +CAGCAAACGGGCATTCTTTCTCGCATTCCGACATATCCGACACATCATTA +TCGATATCATGTATCTGCGTTGGCTGGAAAAATGGATCCTACACGCGTTG +AGCACACACTTTGTGAGCTCTGCGAGAGCTCAATGAGAACCTACCGGTCG +TTTTGTGGCTCGTTTCGCGAATCGTTTCAAATATCCGCCATGCATTCGTG +GTAGCTCGCCTTCCTGATTATAGTCGTACATGATGAGCGCATGGCTCACC +ACGAGCCGTGATTCGGTTTCCATCATGTAATAGATTTTTTTGCGTAGTCG +TGGCTCGCGTGGATTGATGGAATACAGGAATACCAAGTGGAATGGGCACG +TTTTGTAGTCCATGTTGTCTTTGGTCTGTGAGGGGTTTTGGATATTATTA +CGGGAGGCATAAAATTCTGAGAATGCGTATTGCACAGCATATAGACCAGA +TCCGTAAATCAACACAAGCCCTACAGTAGTTATTTAATAAATTACTGTAG +TTTTTGCTGCGAGATATTTTGCGCGTCAAATGTGTTGCGAAATACGAATT +CTCAGAATTTTTTGTTCCTAAGAACAGCTCGTCAATTTTTTTTTCAGTTT +TTCAGGGTTATTCAAAAAAAACCTGTTTTTAAAGCTTCTAGGTCTCATAT +TTAAAAAGTTTTGAGAATTTAATACATTTTTTTTAATAGAAATTTGGCAA +TTTTTTCATCACCTCATCGTCGATTATTCTTTTTGAAAAATTTTCGATTT +TTATGGGAATATATTTATTTTCCAAGAAGATTAACTTTTTTTCAAGAAAT +CTGAAAATTTATCAATTTTTCAAAAGAAATTAATGAATTTTATCAAAAAA +AGAATAACTTTTCCAGAAAATATTTTTTTTTCTTAAAAATTAAAAAAAAA +TTGTTAAATCGCTTTTTGTGTCAATTTTTCAAAAAAAAAAAAGAGGCATC +GAGAATATCCCATTTTTTTCGAAAAAAATTTTTTGGTTATTTTTCTTTTT +TTAGATAAAATGTATATACTTTTTTAAAAATGTTTTTTCACCAACAAATT +TTGATATTAAAAAGTTGGAATTTTATGACTAAATTATGGAAAACTTTTTC +GATTTTTAGAAAATCGGAAAATTTTGAAAGTGGGCGTCGTCAAAAAATTT +AATTTTTTAAGAAATTATAGAAAATATGCTGATTTTGGTATTTTTGGAGA +GCGTCGATTTCCATCGGATTTTTTTGGTTGTTTTCCCTGGGATCCATGTA +ACACCTGAAACTGTGATCCCAGCTCATCACAAATCACTCCGACTTTGCGA +ACTTTCGTCCGATAGCACGTCGGATTCTGTGCATTTCCATTATGAGACCA +TGGCGAGAAGTCGTCACGATTGATGTCGTGTGGAAAGATGGCTGTCCCGG +CGACGAAGAATGCGTACGCACGAGAATTACGAACAATTGGTGGGTGTCCC +GGCTTCGCGTAGTTCAGCAGACCACGTTCTTCGCTGGAAAAATGATGATT +ATATCGAAACATTCGAGGAAAATGGATAGAAAAAACGAAAAATTCAATTT +TTATCAAAGTTCGTTTATGTTGATGAAAACATTATTACGGGAACACAAAA +TTCTGAGAATGCGTATTGTGCAAAGTATCTCGTAGCGAAAACTACAGTTA +TTCTTCAAATGACTACTGTACCTATTGTGTCGATTTACGGAAATAATTTA +TAAATCGATAAAACAGTAGTTATTTTTAGAATTACTGTAGTCCTCGCCAC +GAGATATTTTGCGCGTCAAATATGATGCACAATACGCATTCTCCAAATTT +TGTGTTCCGTAATAAAAAATAAAGTTGTTAGATGAATATTCAGGGTCTCA +ACAAAATTTAATCTGTGTTTTCAGTTTTCATGTTTTTAGAGTGGAAAACG +TTTTTTTTTCAGAAAACAGTCAGAAATGTAGATTTTGTTCCATATTTCAA +AAAATCATTTAAAAAATATAGAACATTTTTTTGAAAAATCAAATTTCTCC +AGAGTGTAGTACGCCATTAAAGGTCTCGTCACGATTTTGTTTGTTTTCAA +TTTTTAAGTTTTTGAAGTAGAAAATTATTTTTTTTAGGAAAATTTTTCGT +CGATTTCGTTGAAAATTCGCTAATAAACGGGGATTAAAAAAAACGAAATT +TTCCAATTTTTCGAAAAAAAAAATCGAAAAATTCCTCCCAAACTTACACA +ACATTGGTATTAGCAATCCACTCCAACACGGCCTCAAACGTGCTGGCGAC +CCTCTTATGTCGAACAGTCAAGTAGATATGTCCCGTTTCCAATTGCCGGG +GATTCTCAAAATAGGGCGCCGGGTTCTCCTGCTGAGTAAGCGGATCATCG +TCAAACTCCAAATCGTAGCCATTGTTGAGCACATCCTCTTGAGACATATC +CAACAAATCATCGTCTAGCAGATGTCCCTGAGCGTCTTGAGCAATCGAAA +CATCTTGCATTAGCTGATGTTGCTGCTGCTGGTGCTGATCGTGTTGAAGC +TGGATCTGATCTAGGGAATCATTGTGATGTCCGATGACTTCTTCTTCCAC +AACTTCCAAATCTTCTACATCTATAAATTGTTCTTCATCATTCGGTGCAG +CAGATGATGTAGATGGATCTTGTGATGTATCAATCTTCTTCTGATCGTCG +ATCCATTTCGATCCTCCAACACCTCGTCCAGGTGTTCCAGGTGGTTTAAC +ATACGGACACGGAACCGGACCATCTCTCACATATTCATAGATATAACACA +CATTTCCGATCGTAATCGTATTGTTGCGTTGAATATAAATGAGACGCTTC +CGAAGACGGGGACATCTCGCCAGGACGGACATTGTTTCGACGAGACACAA +CTGTGACGAGTGCCAGCCGCCAGCCGTGTCTTTTGCCATTTGGAACTGGC +CGTTGTCGTCTTGGACGATGGCTCGGCGAGTTGAGCGGACGCGGGGTGAC +GGACCTTCCGAGGCAGCTGAAATTAATGAAACTACGGCACTCTGTACATC +TCCGGAGGCACCACGTTTTCAAGATCTACCAAAACAAGATCTACTATTTG +GAGATCTACGAAAACAAAATCTACGTTTTCAAGAAATGGTACCATGCTAA +TTGCAGTTTTCGCAGTTTTCTCGAAGAACAACAAATCAACAATGTTGACC +GCCCAGTGTGCTCCGATGTCGCGTGTTGTTTAGCGTTTAGAAAAATCTTT +TTTGAAAGAAAAACTTCAGGAGAACGATAAACAACGCACGATGAGTAATA +TGGAAGAATTTTTGTTTCGTTTTTCTTTGAGAAAACTGCGAAAAAGTGCT +AGATCCTGAAAACGTATATCTTGTTTTGGTAGATCTTATTTTCGTAGATC +TTGTTTTTGAACAGATCTCGAAAACGTGATGCCTCAATCTTCTGCAGTTG +CCGAATTTTCACATTTTTAAACCTTAAAACATATTTTTTCTAAAACTTTT +CCAAAAATCGATTTCTTCGTTTTCACGATTTTTTTCAGAGAAAATATAGA +AAATCGAAAATTTGTAAATTTTCGATTGAAAAAATGGAAAAATTACAGGA +AATTATTGAAGATTTAAACTTGTTTCTAAACAATCGAAAATTATCGTTTT +TTTTCTTTTGGAAATTTTTTTTTGGATTTTTCCCAAAATGGTGGTTTTTG +AAATCAAAAAGCGTTAAAAACAAATTTCTTTTAAAATTACCGGAAATTAT +TTTAGAAAATTTCCGAAAAGTCAATAAATTAAAAAATCGAACAAGAGAAA +GAGTTATTAAATTAAACAAAAACAATTTCACAAAAGTCGATAACTATTGA +TTTTTCATCAGAATTTCCTAAAAGTCGATATTTTTAAACCTTCTGATAAA +TTCTTTTTTTGTTAAAAAAAACTTTCGATTGATTAGAAACACGTTTAAAT +CTTTAATAACTTCCTGTAATTTTTCCTTTTTTTAATCGAAAATTTACAAT +TTTAATTTATTCTTTTAAAAAAATCAAAAAACGAAGAAATTGTTCGTTTT +TTCTGGAAAAATTAATTCTTCGGCTCGAAATTTCGATGATTTCTTTTTAG +AAAAAAATATATGTAAAAAATTATACAATTTCAAATAAAACACTGTTTTT +TTTCTTTTGATATTGAAAATAGTAGAAATTTTTCAATAAAAATTCCAATT +GTTCACTCAAAATTTCAAATTTTCGACAAAAAAACTTACGATTAACAGTC +CACGGTGACAGTCCATCGCCAGTATGATTAATCTGATTTGCAACAAAATT +CGCCGGCACAAAGTACACAAAAACGCCGGTTGTCGGAATAAGTGGCGGTA +GGTACGGTGTTCGGCGATTCAAACAATTCGATTGGCTCAGCAGCTCGGGC +TCATTCACCAAATATCGGAGGAGAAGCTTCTTGTTGTGTGCCCAACCGAG +TTTTGCAAGTTTTACGCGAAGAAACATTCCACCACGTGGTCCTTCCACTA +TCTCGTCTTCGAACTGGGAATTTTTTTTTGATGAATTTGAGAAAAATAGT +TTAAATCTTTATATTTCAGTTTGAAATCGCCGAATTTCAGTTGTGCACCT +TCGTGTATATTTACGATATGTTTAATCTTTTTCGGATGCTGATTTCCGAT +CTTTAACGGTTTTCCTTCATTTTCGTCGGTTTTTTTTGGATATGTGTTTT +TGAGGTCAATTAATGAACCATTTTATTATCAGAAAACTAGCAAAATGTTC +AATACTCGACAGAAAATCCATATGCTATATGCTTTTCTCGACTGAAAACC +GGAAATGTTGCTAGTTTTTGATAATAAAATGTTAAATTAACACATTTTCA +AAAAAAAAACACTTCTACAAAAACCGACGAAAATTAAAGAAAATCGTTGA +AAAAACGAAAATCAACACCCGATAAAGATTAAGCATTTCGTAAATATACA +CGAATTTGGTAAATCGACACATGGCATCTCTAGCGCGAAAATTCGAAATT +CGTAATTTCCAGCTGAAATATTAAATCAGGGAATTTTTCTGAATATTTCA +CTTAGATATTCGGAATCAGGGAAAAATTTGGAGTTCTTATAAAAAAATTT +ACTAAGCAAAGTTTTAGCAATTTAGGTTCTAATAGGCTTTACAAATATCA +AAAAGTCCCGATTTTTCAAGGATCTATATATATTTTTAGAACCACAGATT +TTTTGCTGTTTTTCAGCATTCTCGGAAGACCATACCGGTGTGGATCCGGG +ATCATCCAGATCTAACGAGACGTGCGGATCAGTTGGATCGACCATCGCAT +GTCTTAAATAGTCGTTTCCGTGCGGAAGATTTACGATTGGTGGAATTTCA +CCGGGACACGTGTAGTCGTAGAGAATGAGCACGTTTCCATAGATTTGGCC +ATCGCGGGAGAGGTAGAAAATCTGGAAAAAATTTTTTGAATAGATTTTTT +CGCCTTTTCTGAAGGATTTTTTTTGAAATTTGAAGGAAGACGGGTTTTTG +CTTGCATAGACTCAAAATAAACCCTTTAGAAAATTTTTACGTTGGCTCAA +AATATCGAGAATAAAAACCAATTTTTTTCTAAAATTTCGAAAAAATGGGC +ATAACGGCGGAAAATGTATTTTTATTTGATTATTGTACCTTTTTCAGTTC +ATTTTGGAGCATTTTTGGCCCATAGGCTTAAAATTAACCTTAAACATTTA +AAAAACGTGAAATAAGATATCAAAAAGCCATTATGGCCACTATGACCTAA +AAAAAATATTTAAAAAAAAACAACTTTTTTTTGAATATTCAAATTTTCTG +AAACTGATAATAAACCTTTTTTCTCAATCTTAACAGTCTCGGCAGCCACG +CGCTGTACTCGGTGAGATGAAGTTCGGCGATACGTGGATCTCCTTTGACC +GATTGGAGCCGTCCGTTGAGACGAGCAACCGGATGACGTCGGACATTCGG +TTTGATGACTGGATCGTTGAAGTGTTGCTCGCCGCCAGTAGAGCTCCATG +GCTTCAGATCGTCATTCGAGACGTCTTTTATCGACATGATTTTGGAGCCG +TCCACGTAGAATGCGAAACAGGCTTTGTGTGGAACAAGTGGTGGGAGACG +GGGTGGTTTGTCGAGAAGCCAACCTCCTTCGCGACTAAAAATGGGGACCG +TTGATTTTTGTTTTTTTTTTCTGACCGATTTTTCGGACAATAGCTTGAAA +TTTTGACAATTTTCTTTATTTTTAGAAGATATTTTTTTCTTATATTTTTT +TTTCTCTAAAAACTATAGTAAAAAATAGTTCAATTTTTTGTGGTTCTGGC +GAAAAATGTATTTTTTCGAAAAAAATTTTTTTTTTTCAAAACAAGATATT +TAGATTTATCACTGAAAATTGCTAATTTCCAGTGCGAAAAATGACTTTTT +CTGGCTATTTCTGCCTGATTTTTGATATAAATTCCAATTTTCACAATAAA +ATTGCTGAAATTTTCTCCTTTTTTCACTGTCTCCGTACATCATTGCAATT +TCGGTCTCGTTGGTGAGCAGCTCAATTATTCGTTCCTTTCTGAGAAGGTA +GGCCAGGCAGCCTGATATCACTGTGGCGTAGCAATCACCGTTTTCCGTCA +TTTCAACTTCATCTTCGAACTGCAAAAATTATAGAATTGAAAAAAAAAAA +CGATAAAATCCGACTTTCTCGATGATTTTTATGATTTTTAACTGAAAAAT +TGATTTTTCGGCATTTTCTGAAGGATTTTGATTAAAATTTGAAGGTTTTT +TTTTCTCCCGTAGGCTCAAAAAACGCCCTTTTTATGGCGGCTCAAAGGCA +AATTTTAAGTAATAATCGAATAAAAAATCATAAATTTACGACCCCTACGG +CACGGGAAATTGGAGACTTCAGCTAAAACTAACCGTTGATTTTGAATATT +TTGAGCCAAATTTGGCCAAATAGGCATTAAATGCACTCCAGATGACTAAA +AACACGTGAAAAAAGCTATTAAAATTAGGTAAATAAGACAAAAACAGAAA +AAAAAGAACTTGAAGCGAAACTGGAGCCCATTTTTGGAGACAATTCTCAT +TACAAAATTGGTTTTTTGGGCACATTTTTTAGTCTTTCTAGTTATTTCGA +ACATTTTCCGAAGAATTCCCATAAAACTTTGAAGCTTTTCCATGTAAGCC +CCTACCAATTTCACGTGGTACTGCCTTTCAGCAAGAGCTTTACTGATAAC +TCGTTCTCCTTGAACTCGGAACATTTTCGACGGTGTCGCCGCCGCCGGAT +TTATAATTTTCATCGCCTTCTTCTCCTTATCCCCATCGTCGATAGACCCA +TCGGTATCCATTGGCTTGCGTGTTCGGACACGTCGCTGTTTGCTGGGCCT +GGAAAATTTCGATTTTAAAGATTTTTTAAATAAAAAATCCCCCACAAATT +CATTTCCAAATCCGGTGGTTGCCCATCCATAAACCCGTCGTCCTCCTCCT +CATCATCACCGAACGGGTCCATTTGTATGCGATCCTCGTCTCGATATCGG +GAAAGGTTCAGAATTGGCGGCTCATCCTCTACGATGACCTCCGTCTCTGC +GTGGAGCATCGAGTCGTCGCCGGCGAAGATGGGCTCTCCGAGCTCTCCGG +CGGTGAGCTCCTCCAAATGTGGCGCTGGCTTTCTCTTTTTCGCGTTTCGA +ACCGTACTTTCCTCCGTCCTGGAGCGTTTTGGAGCTGCGATCGAGCTTGT +TGATGCAGAGGCGAGCGCTTCTTCCTTTGGGATGCCCAACTCGTAGTCTT +CATTGTTGTAGAGCACGGATTTCCGAGCATTTCGACGTGGACGAGATTTT +GTGGCTGGATCCTGGGGCTCTGCGGCAGGCTCTGGATTTTGCTGGGGCTC +TTCGGCCGGCTCTGGATTTTGCTGGGGCTCTGTTGGATTTTGCACAAGCT +CTTGATCAACTTCTTCCTGATTTTCTGCCAATTCTCCCGAATTTTCAGGA +TTCTCGGACATTTTTCTGCAATTTAATAATTGAAAATAAGAATAATTAAC +GGGGTGGAAAATTAAAAATTAAACAAGCATTAAAATAAATTTGAAAAAGT +CCTGATTTGTAGTCCGCAACGCACGCACGCAAGCATACACCAGAACAACT +AAGAGAGACGGGAGGAGAGATGGAGAATTTGAAAGAGACGGCGAGGCGAC +GAGAGAGCCGGAGGATTGAGGAAGGACAAACACACAGATGTGGGGTCTTG +CAGCGCTTTCTCTGCGTCTCTTTTTTCGACATTTTTCAATGGGAAATTTT +TTTTTGGAGTTTTTCTTTTTTTTTATCGGAAAACTTCAAAAAATTCAAAT +GGAAACGCTGTACGGACAATGCATAGAATAACACGCAAAAATGCACATTT +TTAGGCCTTAGGTTACTGTAGTGGTCTCGGCGCGAAAATGCGGATTTTCA +TTTTGAATTCAAAATAAAAAAACTGGACGTGATTTGGTACAAGCGTTTAG +TGAAAATGTTTCACAAAATAGGAGATCAATTAGATATATTTATTTATTCA +TGGAAAATGACTATTTGGAAAAAAATTGCGAAATAAATAAAACTCGATTA +AAAACAATGTGCATATGTCTTCGTTGCTTTCTTGGCGAGGGTTTCTCGGT +ATCGAGTTCTACAAGAGACACACATTACGAATAACCAAAATAGTTTAGGT +AATATACACATTGGGTGGGGGTGGGCCAGAATAATATTGAATAAGGGTGA +AGAAAAAATATTCAAAATACGTCGCTGAAGTTGGCATACAATCTTGAGTC +GGATAAATGGTAAGAAAGCAGCACCACTCAACTCAGATCACAATCTTCTG +AAGAAAAAGGGACGAATGTATAGGTTCGGAGGGGGGGGAGGGGGAAGGGG +TGGGGATGGAGGCAATTACAAATAAATCTGAAAGAAAGTTACAAATACTA +CACACCAAAACGGTGAAGACTTGCAAACGCAGTGCTTGCAAGAATTCGAT +AAAACTCGCGGATGTTAAGAGCGAATCATTTGGATTTCGGGGGCAGTAAA +TTAAATGGACAAGGCATTTCGAAAAATACACACGTTAGACTGAAAAAAAA +AAACACGATGTGATTTATGGGATGCAACACTGCTTCTTTGGTGGGTCGGA +CGCTGGCGAAGGAATGATTGTACCACTGCCACCGCCATATCCTTGTCTGT +CAGTTCCTACATGCTTGTTGGATACTGATTTGTAGATTTCCGTCAGGATA +TTAGTGAAAGCTGCTTCAACGTTGGTGCTGTCGAGGGCAGATGTTTCTGA +AAAAAAAAAATATATAAACGCAATAGATTAGTTTAACTTACCAATAAACG +ACAATTGATTTCTTTCGGCGTAGATCTTGGCCTCGTCTGTTGGAACTGCA +CGCAAATGGCGCAAGTCGCTCTTGTTTCCGACCAACATAATCACAATGTT +CTGATCGGCGTGATCACGAAGCTCCTTCAACCATCGCTCAACATTCTCGT +ACGTCACATGCTTAGCGATGTCGTAGACTAGGAGAGCTCCGACAGCCCCA +CGATAGTAAGCGGATGTGATGGCACGGTAACGTTCCTGTCCAGCAGTATC +CCAAATTTGAGCCTTCACTGTCTTGCCTTCTACCTACAAAAATAATAATT +TGAATCATAAGTGTATTCTAATGAAGTTCCTCACCGAGATGCTTCTCGTG +GCAAACTCGACTCCGATTGTTGATTTTGATTCCAAGTTGAACTCATTTCT +TGTGAAACGAGACAGGAGATTCGACTTTCCGACGCCTGAGTCTCCAATCA +GAACAACTGAAACAATTTTTCGTTTGGGAATTGAAAAATGAAAAACAAAA +TTACCCTTGAACAAGTAGTCGTATTCATCGTCACGAGAGCCCATCTTTAC +TGCTGAGCTGTAAAATGATAAAGCTTGTTTGAAAGTTGTGTATATATTGG +GACGATTGTAAAAAAGAAAAAGGGCATTGGTCACATGATTCATCAGCCGG +GCGCAATGTTTTGTTGTAAAATCGATATTAAAAGAACGAGACACTGTTTT +ATCGGCGGAGAATATTTTTATGTTCTGTTCCCGACACAAACGCGGAGTCA +GGAACCGGTTGTCTATCTCGACGGGGATTGAAAAAATATAAGTGAACTTG +ATTTAAGAATGATTCAAAGTATATGGAGTTGGGAGTTTTATATTGAATTT +TATCAATATATGAGGGTGGTTAGTCAGAAGAAAAATGTGAAAATGTATAT +AGAAGTGTGTTATGAAAGAGTATTTGAGGCTTATCATACCCGAAAAAGGG +AAGGTGAGCGGCCGAACTTTGTTTGTCGACACGATTTCGAAATAGTTGAA +CAGGATATTTTTAACAAGGGAAAGCTTTCTCTGGGTGTTCTTTTTTGATA +ATTTCAGCTAAAAATTGAAAAAAATAATTAGTTTTATGGCTGTGAACATC +CAGAATAAGCTTTTCCGAACGCGAAATGCGTGTGAAACCCATAAAATCGA +ACGTAAAACAGGTCAGGCGCACCGGCGCAGCCACGTCGGTCTGAGGGTAC +GCGACGCTAGCCGCAGCGCAAGCCTCCTCGGGTCTCGGCACGACAACTAC +CGTACCCCTTTCGCAGCGGCGCAACGGAATCTCAATTTTACATTTTTTTC +TTTTTGGCCTAACGAATTTTTCCGTTTTTTCAGAGCATTTTTCATTGATT +TTCTTAAATTTTCATCAGAATAAATTTTAGAACTAATTTCCCGTAATTCT +GATACGCGTTTTTTATAGTTTTCGTGAAGAAAATGCTGAACATTCGAAAA +ATACTGAAATAATTTAGTTTTGAAAACTTGCTCCTCTGTTTCAATATCTT +ATAATTTTCTTTTCTGATTAATATTTTGATTTTCCAGGTAATCAATCGCC +TATAATGGCTCCAACGTAAGTTTTCTTTATAAATTTATATTAGATTGAAA +TTTACTTTTAGGCCTGACTTTCAAATACTTCAATTTTGATATTAATTCTT +GGTTTTTCAGAAAGAAAGTGCCACAGGTTCCAGAAACTGTGCTCAAGCGC +AGAAAGCAGAGAGCCGATGCCCGCACCAAGGCTGCTCAACACAAGGTGAC +CGTCGCCGCTGTAAGTTTATTTAACTGTCAGAAAACAGAAAATGCTCTTC +AAAAGCACTGGTTTTAGGATCCACTATTATCCAAGCCAGCCGTCAAAACT +GAGCTATAAGAATTATCTTGTTTTTGGGTGAGGTGTATTCAATTCAGAAT +GCGTCTCAATAACACGATGACAATTCGAAATTTTAAAGTTTAAATTTTCT +TTTAGGTGTTTGAAAATTAATTTGAACTAATCCTGCATTAAAAATAAAAT +TTTTTACAGAAGAACAAGGAGAAGAAGACCCAATACTTCAAGCGTGCTGA +GAAATACGTCCAGGAGTACAGAAACGCCCAAAAGGAAGGACTCAGACTCA +AGCGCGAGGCTGAGGCCAAGGGAGACTTCTACGTTCCAGCTGAGCACAAG +GTCGCCTTCGTCGTCAGAATTCGCGGTATCAACCAGCTTCATCCAAAGCC +AAGAAAGGTGAAATTGTGTTAATGAGTTTTTTAATAACCATTCTTTGTTT +TCAGGCTCTCCAGATCCTCCGTCTTCGTCAGATCAACAACGGAGTGTTCG +TCAAGCTGAACAAGGCTACTCTTCCACTTCTCCGTATCATCGAGCCATAC +GTAGCTTGGGGTTATCCAAACAACAAGACCATCCACGATCTCCTCTACAA +GCGCGGATACGCCAAGGTTGACGGAAACCGTGTCCCAATCACCGACAACA +CCATCGTCGAGCAGAGCCTCGGTAATTTGAAAACTTATGCTTTTTAATTC +TTATTTAATTGTTTTCAGGAAAGTTCAACATCATCTGTCTTGAGGATTTG +GCCCACGAGATCGCCACCGTCGGACCACACTTCAAGGAGGCCACCAACTT +CTTGTGGCCCTTCAAGCTCAACAACCCAACTGGAGGATGGACCAAGAAGA +CCAACCACTTCGTTGAAGGAGGAGACTTCGGAAACAGAGAAGATCAAATC +AACAACCTTCTCCGAAAAATGGTCTAACTTGTTTGTTGTTCATACCTTCC +TTCCGGATAAATCGTTATTGTTTGACGTTGTTTAATTATGTGCATAAATA +ATCATGAAACGTGATCGTCTTGGCTCGACTCATCTCGCATTTCATGAAGC +GTCTCATCATTCAGATGAATCGTAAAACCGCTTTGCTGACTACTCTCCCG +AGAACAGTCACTGTTCCCTACTATGCTCCATCCTGTTTATTTCTAGAAAC +TTTTCTTCGGAGCAAGCTAATTCCCTATGAAGTGAGAGCATTTAGGTTAA +TAAACATAGTCCAGATTTAAATCCAGACAACGCAATGTTCTCTCTACAAT +GTACTCCCTCGTGAGCACCTATACCCATTGATTGATGTCGATGGGTATTT +CTTCAAAAATTTGATGGAAGGACTTGATTATCTGCTCGCCAAATATGGAA +AAGTTGGCTTAAAACTAGAATTTTAGAAAAAGCTTAAATATTCTTCCAGT +CTCTGGACTCGGGCCTGACTCCAAAAGAGCGAGCACAGGCTCTCGCATTG +AGCGCTTTGCTCGACGAGCTTACTTGGATGCTCGCCTACTCGCGAGGCCA +AGATTTCACGTGGCTCCGCGATGATCGTAAGATTATCGAAGACTTCGGGC +TTGTTCAGCTCTATTTTTGGAGGAATTGGATTGTACCGCAGATGCAGAAA +CGGGTAGATTATTGATATTTTATGGAAAAATCGATAAAATATTGATTTTT +TACTGAAAAAATTGATCAAAAATCGATTTTTTTTTCCTCGGAAAATCTGT +AAATTAGTAGTTTTTTGTTGAAACAATCGATAAAATTCTGGTTTTTCAGA +AGCAAAATCGATATATTAGGCCAAAATCACCAAAAATTATCAATTTTCCT +ATAAAATTTACCATTTTTGGCTAGATTATCGATTTTTAAGGCAAGAAAGT +CGATATTTCACAATTTTTTATTGATTTTTTTGGCTTTCCAGCGCGCAAAC +TGTGCCAAGATCTAAATTTTCCGAAATTTTCATTTCAGACACGCCGCCGA +GTGCGAGGCTACGGAATATCCGGAAAATCGGCGAGAAAGGAGGTCGCCTG +CCGAACTGAAGCAATGTTGGAGGCATTGGCCTCACTTTTGGCCTCGAATA +AATACTTTTTCGATGTCAATGAACCGTCTTGGGTAGGTGGCAGCCAAAAA +ACTCGGCCAAAACTCGGCCTTTCAGCTCGACTGCAAGGCGTTCGCCGTGC +TGGCTCAATTCAAATATACACCACTGCAGAACGAGGCCCGCGTAAAACAA +TTCATGAAGGACCGAACTCCAAATTTGGTGGGTGGCCTTGAGTTTTCTAG +GCCACGAGATTTGAAAAATTCTATAATTAGGGCATGTGGTTTCAGATGAC +ATTTGTGACGCGCATGAAAGAGGAATTCTGGTCGGATTGGTGCACCACGT +CGGAGGATTGAATTTCGTGGAAAATGCTAATTTGTTTAATGTAATATGAT +TTTGCTTCGTAGATTTTAACCTATGCTCGAATAAAATTTATATTCTAATT +TTAAAAAAATTTCAAAACAAAAAATTTCTTACGTGACAATTAAAGGGTAT +TACAATATGGGCATAAAAAATGTATTCCCTAATTTTCGAACTTGTTCAGT +CTTGGTTGCCCAGGTAATTTTTTTGCAGTTCTTGTTTGGGACTCGGAGCG +ATTTTGCGCAGATGCATCATATTTGACGCGCAGAGTTTTAATTGAGGTTT +GAGCTTGAATTGAGTTTGAATTGAATTTTAATTAAATAAAGCGAATTCTA +GAATAAAATGATGTGTTTATTGCGAAATAGATCGTGAACCTACAAAACAA +CAGACTTTACGCGCAAATTTCCGAAATTTTCCAGAAAAACCAGTCAGAAT +TTCAAATGATACTTGTTCCTCTTCTATTTCTTCTCCCAATTTTGTTGTGC +GGTGGGAAAAAGAAAAAAGCGAACAAAAATGTGAAAGAGGCGTCGAAAGT +GAACTCACTGGTTCCACCGTCGACCTCTACGGACCCGCTGAAGACTAAAG +AGAAGGAGATGAAAAAAGAAGAAGCTCCGGGAGCACCAGACGCCCCGACT +AAGCCGAAAAGTCCTCCGAAACCAGTGGTCCAGGACACGAAACTTGCTGA +AGTGCTGCCTGAAAGTGAGAAGGAAGACGAAATGAAGAATGGTATTCAGT +TGCCGAATCCGCCGAAAAATTTGGTGGGTCTCACGGCGATTTGAGTGTTT +TATCGATTTTCTATGAAATAATTGATTTTTTCTAAAATTATTGAGATCTA +GATTTGCGAGTTTTCTGAATTTTCTAATATATCGATTTTTCAGAAATGCG +AAAGTGAAATTGCTCCAAAGGAAAAGCCGGCGGCTGACGAAAAGAAGGAC +GAGAAAAAAGATGAAAGCAAGAAGGATAAGAAGGAAGAGAAAAAGGAGGA +GAAAAAAAAGGAAAATGACGAAATTATGTGAAAAATTCGATAAATTGGTT +CTTGAAGATTATTGATTTTTCATTGTCGATTTTTCAGAATTCCTCGAAAT +TTATTTGCGCGTCATATATGGTGCATTGAGTCCATAGAGACTTCAAAGTT +CACGCGCAAATGCAGACATTTTTGCACGCGCTGCGGTGAAATTCCTCTTG +GCGAATTCAAATATTTTTTCCCGCCTTTTTTGAAAATTTTCGAGGCGTTG +GTCGAGCTGAATACACGTGAAGTTTAGACTGAATGTCAAATCACAAATAC +ATCACATTTGACGCGCAAAAAATTCGTTTTGAAAATTTGCGCGTCAAATA +GGGTGCATTGAGTACGCGGAGCCAATACATCACACTTGCTTTTCGTGGAA +AATGGAGCGAAATTTGATTTTTTGATTTAAATATTACGGGAACAGAAAAT +TCTGAGAATGCGTATTGTGCAACATAATTGACGCGCAAAATATCTCGTAG +CGAAAACTACAGTAATTCTTTAAATGACTACAGTAGCGCTTGTGTCGATT +TACGGGCTTGATTTTTTTTCCATTTTCTTCAGTTTTTCTGACATTATTGT +GTTTTATTTTAATATTTTTACCGATTAATAAATGATTTCCGTAAATCGAC +ACAAACGCTACAGTAGTCATTTAAAGAATTACTGTAGTTTTCGCTGCGAG +ATATTTTGCGCGTCAAATATGTTGTGCAATACGCATTCTCAGAATTTTGT +GGCCACGTAATATTTGCGTTAAATAAAAATCAAAAAATTAAAAAAAAAAA +AACATTTCAGAAATTTTCAAAAATTTTTTGAAAAATTGACGCACAAAGTC +TCCTGTTTCCCTTTTCCCCAAAGTTTTAAACTAATAAAACTTTTTTAATT +GTCGAATCCGACCAGTTATTTAATACGAGACAAAACGGAAACAAAGATTC +CAATTTCCGATTACGGTCAGCTCAACACTTTCGCTCAAGGAATTCTCTAC +GCTTAGATTTTGATCTTAATGGAAAACTGGTAGAACTTGGTATAAAACTA +TCCAAAAAACTGGCAGAAAGAGTAGTCTTTCCGAAGAAATGAAGTCCCTA +ATCGAGGAAGATGACTTGGAAGTCGGCAGATTCCGCCGCAACGCGTTTTC +CGCCGTCGCCCTCTCCACCTCCTGTATTCTGGCGATTGTGGTGTTAACAC +CAATTGCCTATCAGGCAATTCAGAGAATTCACTCAAATTTGCTTAACGAT +GCCCATTTTTGTCAGGTAATTTTTTTAAAAAATTTAGAAATTTTGATTTG +GAAATATTCAAATAAAAAAAAAGTTTTTCGAAATTTATTTTCAATCGTTT +GAGATTTTGACATACATTCCAAAAATTCCTACTCTTCGCAATTTTTTAGT +CAACTCAAACTCAAAACCTTAAAAAAAAACCGCAAACCTGTCATGGCGCA +TCGATGTTATCCCTACGCTAAATGTTCTGTTTTCGAAGGTTACTTTTTTC +CAATTTTTATCACCAAGGAAAATGTATACACAAAAATATATAGAATTTGA +CGTGCAAATTGGCAAAAAACTAAACTTGCGCGTAAAATCACTGCATTGCC +TCTGCGGACTCAACACACCAGATTTGAAATTTCAGGAACTTATCGATTTT +TGTAAAATTTTGCAGCATGGCGCATCGATTAAATATTAAAAAAAAATTGA +AAATTATCGATTTTTTTGAACACGAACAATTTGTGGAAAATATGAAATAA +AAAAAAACATGGTGCATCGAAAAACTTACAGGCTCGAAACCGTGACCTCT +GGACAGAAACAATGAAGATGGCTCGGAGCCGCGGCCGGGACGAGGAGCTC +GTGGCCCGAACCAAGCGAGCGGTTAAAGGCACATGGCTCTTCGGACAGTA +TATTCCAGACCGATCAAGCAGAAATCGACGACAGCAATACGCCGAGGCCG +CTGGCGGAGGCGGTGGAGCACCACATGCCGAGTGTAGACGGGGTCCACCG +GGACCACCGGGTGATGGTAGGTTTTTAGATTTGCGAGGCAATTGTGGTGT +ATTGGTTTAATTTTACAGACGGGGTCGACGGAACGGACGGAAAAGACGGT +GCACCTGGAACCGATGGAAAAGACGGTCAGGTACCACAAGGACCAGCCGA +CGGAGGTTATCAAGAGGGACCATCTGATGCAGTTGAAGCATGCACACGTG +AATGCCCACCAGGACCGCCTGGACCAACTGGAGCGCCCGGAGACAAAGGA +TCTCGAGGCTACCCTGGAGAATCTGGAGAGCCAGGCACCCCGGGAAGCGC +CGGACCGAAAGGAAATGCTGGACCGTAAGTGGAACTAAAGAATCGATAGT +TTTCGGTAAAAAATCAATCATTTTAGGAAAAAAATCGATAATTTTCATTT +TCATTTTTCTCCATTAAATTATGTTAACTTTCTAGAGCTGGACCACCAGG +ACCACCCGGATATCCAGGACGTCCTGGAGAAACCGGAGATCACGGAAAGA +CGATCGCCGGAGAGGCGCCACCTGGACCACCAGGACGACAAGGAGAAATG +GGTCCACAAGGACCACCGGGACCACCAGGGCCACGTGGAAAGGACGGAGC +CGGTGGAGAGAAGGGTGCTCCGGGAGATCAAGGTAATCCGGGACCATATG +GAAAACCTGGGCAACCGGGAGCTCCAGGACCCGATGGCTCTGCTGGTGAG +AAGGGAGGATGCGATCATTGCCCACCGCCAAGAACTCCACCTGGTTATTA +AAGATTCTTCGATTGATATTTTATTTTATTTTCATGACGTTATTCATTTT +GTCATTCGGTTTTCTTTTGTTGCAATATGATAATAAATCAGATTCTCTTG +CCAGTAGTTCCAAGTGCCCCAACATATTTTTGAGTCCTTTATGTAGATCC +CTCTCCTCGAAAGCTGGATGATTTAATAAATCTTTCAGAAAAATCGAAAA +TTTTAGAAAAAAAAACATCAAAATTTTTGACAAACTGCATAATCAGTCAA +TATTCTAAAAAAAAATACGATCGCACAAAAATAAACAACGACGCAACGAC +ACTCCGCGGTTACTAAGCTCCGCCCACGATTTCATTTGCACTGCGTTGAG +TCGGCCGTATTGTTATTCAAACATGAATGAATATACTGGTGAAGTTTAAA +AAATTGGGTGTTTAGTTTTGAAGAAAATAATTTTTTTGGAATTTTGAATA +AAGCACAATTGGCAGCTGAAACTGGACTTTCAGACAAATTTTGAACGGTG +GTTGTTGATGCTTTGAGTTTATATTGTTATTCAAACATGTATGAACTTGT +AAAGTGCTTGTAAAACTTGTAAGTTGCTTGTTCGCTCATTCTGAAATAAA +ATTTAAATTAAATAAAAGTTCACGTGATACAAAAAAGCCAACTTATTGAT +TGAAATCGATAAAATCGATAAGAAATTCATTCATTTAATAGTTTTTAACA +AACAACAAATGCGATCAAAAAGTTCGGAAGGTGATTTACAACCAGAGGAC +ACCCAATCCAGAGAAGATAAAGAGACGACTGCTACGTATAGTGAGGATAC +AAAGCCAGGTAAATACAGTAGTCCTACCTTAAAATTACAGTAACCCAGCT +GACTAAATATCATAATAAAACTTGCAAAAAAATGATGTAAAATCTTAGTT +TTTAGTCTTTTGAACTGGTAGAAACTGTGTTAGCGCCACTTTTTGACTCT +AAATAGAAAAATTTCCCATTTTAAAAGAGTTGTATTATGATACACTGCGT +GGCGTGTAAAATACCAAAAAATAAGCGCGGTTTTTTTTTTCGTAAAATCA +CAGGGAAAAATCCGAAATCGGAACTCCAATCAGCCGCCAACATCCCACGG +GCCCAATAACTTCTGGGTTTTGTTTCAACAATCTCATTTACTCCAATTCC +AGAAACCCAAAAGGAGAGAAACGCGGCACTGGACAATCTTGCCAAGACCC +CAATCCAGTTAGTCGTCCAACCAACTCCTCTCACACCAGCCATCACGCCA +TGCGAAGCACCACCACCACCACCTCCACCAAAACCCTCTAGCGATAATAA +CAACTCGAAACGATTGAAAGTGAAAGATCAACTGATTGAAGTGCCCAGCG +ATGAAGTAGGCCGAGTTGAGAATAATATAGACAATTTCCCGTTCTATCAT +GGATTCATGGGAAGGAACGAGTGTGAGGCGATGTTGAGCAATCATGGAGA +CTTTTTGATTCGAATGACGGAAATTGGGAAGAGGGTCGCCTATGTGATTA +GTATCAAGTGGAAGTATCAAAATATTCATGTTCTAGTGAAACGGACCAAG +ACGGTGAGAGTTGTTTGGTATTATCATTTAGTTTCGTTTCAAATCAGACC +GAGATACATACAAATATTGAAGTTCAAATTTAGAACATGTTGCATCGCAA +TGTGTTCATTGTGCTCCCTGAAGCATTATGCGGACATCACGAATTACATA +AATTATTTTAACCAATGGTGGGCGGCAAATTGACAAATTGCCGATTTGCC +GAATTTGCCAAAAATTTTCGGAAAATTGTATTTTTGCACATTTTTTTTAA +ATTTCAAAATTTCAAATTTTATCGGCAAAATTGTATGCATCCTATGAATA +TTTCTACATCTGTTTCGAAAAGTAAGCAAATTATATGAAAATATCTAAAG +AAAAGGGGAAAAAATTCGAAAAGGCACAGTTTTCAGTGTTTCCGTCTTAC +AAAAAAAACCTCGAAAAACTTTCGGCAAATAAGCAAACCGGCAAATTGCC +GGAAATTATAGTTTCAGGCAAATCGGCAAACCGGCTATTTGCCGAATTTG +CCGAACGGCAGTAGCCTCTCACCCCTGATTTGAACATGGTGCATTGCATC +ACCGCTCCTCGACTTGGAAAGTTTATATCGGTAGATATTTGCAGCAGATG +TAATTTACTGATATTTTCAGAAAAAGCTCTACTGGACCAAAAAGTACGCC +TTCAAATCCATCTGTGAGCTTATTGCCTATCACAAGCGAAATCACAAGCC +GATTTATGAGGGCATGACGTTGATCTGTGGCCTGGCACGACATGGATGGC +AGCTGAACAACGAGCAGGTCACATTGAATAAGAAGTTGGGAGAGGGACAG +TTTGGAGAAGTGCACAAAGGATCGCTGAAGACATCTGTATTTGCAGCCCC +GGTCACTGTTGCCGTGAAGACCTTGCACCAGAACCATCTATCCGCCAATG +AGAAGATCTTGTTCCTAAGAGAGGCCAATGTGATGCTCACCCTGTCTCAT +GTAAGCTACCTGTTTCAAGTTGAACTAAACTAAAATTAATTTCAGCCAAA +TGTGATAAAATTCTACGGAGTATGCACCATGAAGGAGCCCATCATGATTG +TCATGGAGTTTTGCGACGGGAAATCCCTGGAGGACGCTCTACTCTCCAAG +GAAGAAAAAGTGTCAGCTGAGGACAAGATTCTCTACCTTTTCCACGCCGC +CTGCGGTATTGATTATTTGCACGGAAAGCAGGTTATTCACAGGGATATTG +CGGCGAGAAATTGTTTGTTGAATTCCAAAAAGATCGTGAGTTTGTTTTTC +AAAAATTTCTGAATCTATGTGAATGAAAAAATAAATTTCAGCTGAAAATC +TCCGACTTTGGATTGTCGGTCAAAGGAGTTGCTATAAAGGAGCGAAAGGG +AGGATGTCTGCCGGTCAAGTACATGGCTCCGGAGACATTGAAGAAGGGGT +TGTACAGTACTGCCTCTGATATTTATAGGTGAGCCTACGAGCATACATAG +ACATTGGTAGTGGCGTCAGCGGTGGCCTAGAAACCTTATCCGCGTCTGAA +GGTTAATCAACCTCACACAAGTCTCTATGATTTCCAGCTACGGCGCTCTA +ATGTACGAAGTCTACACGGACGGAAAGACGCCATTCGAAACGTGCGGGCT +ACGTGGAAATGAGCTCCGAAAAGCGATCATCGGGAAGAGAATCAGCTTGG +CCGTTGAGGTAGAGCTACCAGTGTTCATTGCCAACATTTTCGAGCAAAGC +AGGCAGTACGAGACCGAAGATCGCATAAGCTCCAAGCAAATTATTCAGAT +TTTTAAGGAGGTTTGTGAAGCATTTTATCGGTTTTTTGTTCAAAAAATTA +GTAATTTTCCAGGAAGTCGGATTCCATGAAATTGAGACAAGTGGGATACT +GCATAAGCTTGTCAATTCTCTTCCGAGAATTCACAATAAGGAAAGAAAAC +CCGCGGCGGTGGCAGTGTAATAAATATCTGAAAATCCCCCCACCCCTGAA +AATATTTGACTGTATTTTTTAAATCAAACACACAACGCACACAAACAGAC +AGACAGAAAGCCTAAGTACACAGAGAAAATGTTTGAGGTGAGGTGCGAAA +GGGAGGAGGCTAGAACACCTACTTTACGCCAACAAGTTATGACTTTTTGG +AATTCAAAATGTCGTTTTTTGCTAGTTCACTGCTTACCTACAAACCTTCC +TACCTTGTCTGCCTACCTTCCTTCCTATTCTCTTATTGCAAGATCTTTTG +AATTTTTCTGAAACGTCTGTCTACTTCTGCGTCATTTGCAAACAGGCTGT +TCTCAGTTTTAGGGGGTGGCAGCACCCACAAAAAAACATTGGAACAGAAA +TCAGTCGGAGTGATCTACTAGATTAGATAAACATTAGATATCAGTTTGGA +AAAGTTAAACATTATTAACTCACAATGGATTTTCTTATTGTGTTTCTTGA +TATAAAGCGTTGAGCGGTATTGATACCATTACAATGCACTAAGCTAAAGG +CTGTCAGAGCCGAATGAAAAACATGATACATCGAGCATGCGGCAATTGGC +GTTCGGAAAATCGGCAAATTAAGATTTGCCGGAAATGTTTTGAGGGATTT +TTTATAAGATTTAAATGTGCCTTTTTGAAATTGTGTCCCGTTTTCTTTAG +ATATTTTTATAGGATTCGCTCACTTTTCAAAATAGACGTAGGAGAATTCA +TAGGATGCATACACTTTTGCCGATTGAAATTTAAATTCTGAAATTTCCAA +TAAAAGTGCATCACCACAATTTGCCGACTTTTGGCAATTGCCGTTTCTCC +GGCAAATTCGACAAACCGGCAATTTGCCGATTTGTCGATTTGCCAGAAAT +TTTCAATTCCGGCAATTTTTCGATTTGCCAATTTGCCGAGAATTTCAATT +CCGTCAATTTGCCATATTGCCGGAAAAAATCGTTTGCCGCCCATCCCTGG +TTCGAACTCCTAGAATGGTAGAATATGAGTATACTAAAAATAATTTGAAC +AGAACTGGAACTGGAAGAAAAATTTTTTACCGACGTCTTCTGAGAAAATT +TCGTGAAAATTGAGTTTTTATCACTTTCTGACTCACAATAAAAAAAATTT +CCAAAATTTTTTGAATAGTTCTCGATCGTTTCCAAACTGGTCTTCCGACA +TTCAGGTGCGTGATTGGTAAATATGAGGGAACTGCCATAATCAAATGAAA +AACACGGTGTATCGTCTGAGAAAACCTTAAACTGTTCCGTTTTAGTAATT +GGACAAGGATCTGCTGAATCTCGCTACTATAGCAACCGACATCTTGCGGG +TCCAAAAAAGATTGATCTTGAAAATTTGATCCAAGTTTATTTAATCGCCG +CCACACACACACTCACAAAAAAATCAATCAAATCACCCATATCAGTTGCG +CGATTTCTAGTTTCTAGGCGCGAAAATCAAAAGGAGGAGAAGAAGCATTT +CCGCATTACTCGTGTGTGTGCCGTTTCCGTTTATTTGTCCCTCAGGCTCC +CCTCTCTGTTTTTTTTGTTTGCGAAAACTGTCTCTCTCTCTCTCTCTCCA +TCACTTTTCTGTTGCGGAATAGAGGGAGAGAGAGAGAATTTCGGAGGGAA +AAATACCTGAAATGTGATCCTCTCTCTGTATGTCATTGACTGTTGCGCGA +GAGCCTTTTCCCGCGCGCGCGAGCCTCCCACCTACCCGTCGACGAGCCTA +TTTTCTTGTCGGCGGTGCCATTCGGCAATACACGTCCCTCTCTCTTCTTG +CTCTAGAAACATTCGGATTTTGATTTTTTGATTTTTCATGGTGGATCTGT +AGGCTTTTTTAGGCCATCTTGGACTTTGTGGTGGCCTAATATATGGTGTA +TTTTCCAATTTATCATATTTGCTTTAGTATTGTCCGACTTGTAGGCAAAT +TGCACCATTTTAAGCCAAGTAATAAAGCTTTGAAGTTTGGCTGCCTAGGT +TTTTCTAGCCACCAAACAATTTTTTTGTTTTCACAGCACACTTTTTTAAA +GACTGTCATAAGTTTTCAAAATTCTTTGAAAAATTTTACACCGAATTTTG +AGATCTTTAAGTTTGGTGGCCTAAATTTTTCAAAATTTTCTATGCCACCA +GACTGAAACGTCCTAACTCTATTTGAATTATCTTTGAAACTATGAAAACT +ACAAGTTCGACTAAATATGGCTTATCCAATTAAACTCGATTGGTGGCCTA +GAAAACCAATTTCGATTTATTTCATTTTCTAGGCCACCAATAATCTTGTT +CGAAAATTACTTTTATTTAATTACAGTGGAGTTTGGAATACTTACTGCCA +TGGGCTGTAAGATGTTGCTCGCCTAACTTTTTCTATGCCAGCAACCAAAA +AGTGGTGTTGGTGGCCGACAAACCCAAACTTGGACAATTGCATTTTCTAG +GCCATCATTTTTTTTAAACTAATCGTTGTTGGTTTAGTCTACTGTAAAAA +ATCAAATCGATAAGAACATTGGGAGCCAAGCTTTGGAGTTGGTGGCCTAG +ATTTTTCTAGGTCACTAACTTGAAATGTCTGTAACTCTAAAAAATAATTT +AGAGATATATAACAGAATTAGTAGGCTAATTTTAGGATGAGAAACTATTT +AAACAATTGGTGGACTAGAAACTGAATTTTGAAATTTCTAGGCCACCAAT +TCCAAATAAGCCTATTTCACTCCTGAATTGAGATAAAACGCTGAAATTTT +GGACATATTAGAATACGCTATATTTGAAAAAATGGAATTGGTGACCTAGA +AAATATTTTTTTGAAATTTCTGCTCCACCAATTTCAATTATAGCATAACT +CATTTCATAGTTGTAGGACGGTGATTGAAATTTAGGATAGACAGATTTTA +AACCAAAACAGATTGATGGCCCTTGAAATTTCTAGGCCACCTCTCCAGAA +AAAAATCTGAAAATTGTTTTTATTTCTAATTCATTTTCTTCGTCCGTTTT +TCATCTCTCGAGGATCTTATCCCCTTGTCTTACGTCTCCCACCCGCTGCT +ATTTGGGCGACCTTCATAACTGTGAAAATTCTCACCATTCTCCCTCTCTC +GTTTTTGCCCCCCCCCCCCGGTCTCCTATCATTGTTCAAGTACACCTGTT +GTCCCATAACATCTGCCAACCAATTTTTTCCAATTTTGCCACAATTTTTA +TTCAAAAAAGTGCATTGCAGCGGCAGGCATGCCTCACTCGTACACACACT +TGAGCATACAGGAAAAGCGAAAAAATCAGTGATTGTTTGGATTCCTGGCT +CTTTCTAGGAAAAAAACTCTTCCATTTTTGTAGTGCACATCAAAATGATC +AAGCTGGTTAATTTTCAAGTGACTCCCAAAAAAATAACTGAGAAAGTTGC +GTTTTAAAGTTTCTAGGCCATTGTGGGATTTCTAGGTAGGTTTTTGAAAA +TAGAAGACAGTCTTGCACTAAAGTTTTGATTAGGCTGTTAAATGATTTTT +GTCAGTACGTTTTTTGGAACTTCGATTTTTGCCAAAAATTCCTTTTTTAG +TTTTTGTTTCAGTTTTTTGCAAATTTTTATTTCTTCCATATCTTACAACA +TTTAGAAGGTCTCAAAGTCTCTGATAGCCAAGAATTGACTCTAATGACGT +TATGAGCTCCGAGACTATTTTAACCTTTTTCCCTTTTAAGCATAAAATAT +ATAAATATCAAAAATTTAATACAAAAACTTCTCGTTTCGAAACGTTTCGA +AATCCTCCAGTTAAAAAAAAATTAGATTACGGCATCGGAAAAATTGCGAA +ACTTTTGAATCTGAGGGATTTTCAATAGAGGGTCCCGCCAGGAATTTGAG +TTTTTAACAGTACCCTTTCCATCGTCCTTCCCGAAGCCCATTACATCATC +ATTAATTTTGCGACCTTTCTAAAAGCGGTCATGGGGGGGGGGACATGTGT +TTGTTGCATATACAGGGACCCCCCGCCACAACCACCGCAACTAGAGATAA +AGATCTGCTGCACACAGACAGCTGGCACATGGATGGACAGAATGTCTCCT +TAATTTTACCGCCTATTTCATAATAATAAAAGTAGAACGGGGGGCAGAAA +AGAGAATGATTGCGAAACCACCTGCTTGGCTGTCAGCGCATAAACTTTTC +GGAAATTTTGGGTCCCACCACGCTTTTTTCGGTAACAAATTTTGAGATTT +CTCGATACACCATGATGTCTTTTGGTGGAATTTTGGGTTATCATGGCTAA +TACAAAGCTTTAACGTGCCTTGAACTTTCGCAATTTGAACTTTTGGCCTT +TACTTAAAAATTTCGATGGGGCTCTATAAGCCATGTGGTTTCGAACTTTT +CCTGAAAATTTAGATTTTTGTTCATGCACCATGTTGTTTTTTTTTTGGAA +TAATTGAGGAATTCCTTACTTTTAAAGTAAAAAGTTTGGTGAGATCCATG +TGTTTTCATATTATTCAGTTGGTAAATTATACAGATTATTGACTGAAGGT +TAATTTTTTGTTGGGAAAATAAAAATCGCCAAAATCAATAAAATCGTATT +TAGAAGTAGGGTTTCCATATAGGCGGTAAAAGGCTTGTCTGCCTGACAAT +AAGACGACCGCCGCTCGCCTCAATCCGCGGCGGAGGCAGGCAGGGTTCAG +GTAGGCACTGAAACCGCGCCTGCTTACCATGATGTTCCGTTCTTGTCGAT +GCACCATGATAATTTTGGGTTACATTTTGAGCCTTTTCGAAAAATTAATG +GTGGAAAATTTCAAGCTGAATTTTAATATTTATTTTTTTAACCGTATTGT +GGGACATGCAAATGTCTTTCATTTTGTGTCCCTTCACTCTAACGTCTTCA +GAATTTTCGAACTTCTCGATGCACCATAATGCTGTTTGGCGATAACTTTA +GAGTAAGAATGGTAGCGATTTTTGCGATGCACCATGACGTTTTTATCGAT +TTTCAGAAAAAAAATCACTCCGAGACCCAAATAGTTTCATTCTCGACGCA +CTATAACGTTTTCGAAGTAAATTTCAAAATTGCTTCTCGGAAAAGTTTTT +TGAGACCTACATGCACCTTTAATCTTCTTTGGAAACTTTGGAATTTCCGC +CGAATACTCTCATTTCAGTTGTTCTCGATGCACCATGATGGTATTTTTTT +CAAATTACTTTTTTTTGATTTTTCCATGAGAGGACCAATATCTGTCTTTA +TAACGGCGCATTCTGTAACCAAATTTTGAAATTCCACGACATTCTGCTCT +CATCGATTCAACATGATAATTTTAAAATGAAATTTAGCGATTTTTCGGGA +AAAAAACAGTTTCGAGATTTTCTAGTTATTGGACACTCTGCACCTTGCTT +ATTTGGAAATTACATTTTAAACCGACAACCCTTCAAAAACTGCTTTTCGT +CGATGCACTATGAGTGCATAATTTTGTGTAAATCGTGGTGGGCCTCTAAA +AAAACCGAAAAAAAAAACGTATAAAACGCAAACAAATTGTTTTTGTTGTA +ATGTTGACACCAACAGATTCGGCGTGGAATATACGGAAGATCGGATTTCC +AAAAAAACAACCGCTCTCCTGTTTTCGACGTTCCGCCGCTTTTTATGGGA +GAAGTAGTATGTACCTTTAAGGGTACTGTAGCCCCTTGAATATTGGCCTA +AAAGTGAAACTAGTGTTGCCAAAAAAAAACGAGATGACCAGTTTCAATAT +GTCATTTACATCTACACATTTCTAAAATCTCATTTTAAATTCCATTGTTT +TCTCGTTTCTTCTGTCCCAAAGTACCGTATCCTCATCCTTCTACCACCTT +AATTTGCATTTCTTCTTCCTTATTCACTCTTCTTCCCCTGGACGAAAAAT +GTTGCACGACAACAACAAAAGAGACCCGCCCCCCACCATCGGTTTCTCTG +AACCTCTAACTGTCCCCGCCCCCTTTTGGGAGCCCACTGTCTTTTCTTTT +TTTCTTTGGTCGCTGATTATGACGGATCGTTTTTGTCGTTTTTGTATTAC +AGGAATACAAATTTCCCAAAACGCGTATTGCACAACATATTTGACGTGCA +AAATATCTAGTAGCGAAAACTATAGTAACTCTTTAAATGACTTTTGTAGC +ACTGATGCCGATTTAATTAAATTATTTATTAACAGATAAAATATTAAAAG +AAAACACGACAATTCAAAAAAATTCATTTCGTAAATCGAGCCCGTAAATC +GACACAAGCGCTACAGTAGTCATTTAAAGAATTACTGTTTTTTCGCTACG +AGATATTTTGTGCGTCAAATATGTTGCGCAATACGCATTCTTATAATTTT +GTGTACCCGTAGTATATTTTTGTCTCTCTGATTTCCATTGCAATTTTTTA +ATTCCATATTTTTTTGGTGACAAATTTCTCCGCTTATCAATTTTTTTTTC +AACCACATAATTATTAACCCCAAAACATCGCAGATTTTCAGGTAAAATGC +CAGTCGTGAGCGTTAGACCTTTTTCTATGAGAAATGGTGAGCAATTTTTT +TGAAGTTTTTTTTTGAAATTTTTAATTTTTCCAGAAGGCTTCTCAAACGG +GCATAACACGTGGAACGACGCCGAATTTTGGAAACCGTGAGTTTTCGGCA +AATAAGCTAACAGTCTAAGAAACACGAAAAAAAATCCAATTTTCTTTTCA +AAATTCGTGATTTTCGGATGATTTTTGAACTTTTTGATTAATGTTCCCAC +TGAAAATCCTCCAAATCTCTAAAAATCCGCTTCTAAAAAAGCCTCAAATG +GCCTTGCGAGAATAGACGGAATGTTCAAGGATGACTGGTATAAAATCGGC +AAACGTGTTGCCATTCAAACAGAATATCTCGAAATTTCGCCTTCCTTTTT +TTTAAGTGGGTCGGATATTTAACATACAGAAGAGAATAGGGAGAGAAAAG +AGGGAAAAGAGGGGAAAAGTCTTCTTGGAAACAACCTCGAACCGCAATTA +AGTTTTTGCCTCACCCTTCAATTTTCGTGAGAAAGTTAAAAATCCAAATT +TTTTTCTCCTTTTTTAAAAAAAGATTTCCTCCTCAAAAATTCAAAGGATC +GAATTTTTTGTTACACGCCTGCCTCCTTTTTATTACCCTTGTTCTTGATC +GCAACCTCTTTTCTAGATTACTTGAAAAAATGCGCTATTTTTTTTCCTTT +TTCAACAGAGTCAATTTCAAAGGTGCTGTGGAAAAAGTTTCGGTTTTTTT +TTGCAAAAATAATCGAAATTTGTTCGTTTATACTGCTTAAAAAATGACCA +AAATTTCAAGTAATTCATTCGAAAAATCGATTTTTTTGAAATATACAAAA +ATTTTCAATTTTTCATATCTGAAAACACAATTTTTTCGATTTTTCATTTT +TCTTGGAAAAAACACAAAAAAAAACTGTATTTAATTTCCCACTTAATACT +ACGATATCCAAGCCCCGCCTCCGCGTATTGTACGTGAACACGGCGGCCCA +AAATTACACACTATCTCACACACGTTAAATGAGTGATACCGTACTCTCTC +TGTTTTTTCTTTCATTTTTTCGGCGCCGAAAATTTTCAATTTTTGGTGGT +TCTTTTGGCGGGATTTTTCGGATTTTTGTCGCGTTCACACACACACACCA +GTTGGTGTTGCCTGTCGTCACCCACCCGCCCCACTCTTTGTCCGTGTACT +GCTTTGCCATTTGTGCGCGCGTACCGCTGCCCGTCTGTCTTTCTCATCAT +ATGCGCCTCGTTTTGTCGCTTTTTCTCCATTTGCCCTTCCTACATGGGTA +CCTGCGGTTTTCATCGAAAATATCCTTAAAAATTCCGAAAAACCTCTATT +TTTCGATTTTTGAATCGATTTTCGCCCAAACATAACCATTTTTCAGAATG +AAGGAAGAACAAATCGCCTATAAATTGCCAGGGGCTTGGTATTACGAGGA +GGACACTGCCTCGTGCTCTCCAGTCAGCGATCCGGAGGATATTGCGCAGT +TTCTCAACTACAGAACGTCGATAGGCGTGCAAAATGTCACAGAGTCCGTG +GAAGTTCCGACGTCGGAGCATGTCGCCGAGATTGTTGGTCGTCAAGGTAA +AAATTGCCTTTTTAGTCAAATCTACAGTTTCCACAAATTTTCAGGCTGCA +AAATTAAGGCACTGCGCGCCAAGACCAACACCTACATCAAGACACCGGTT +CGCGGAGAAGATCCAATTTTCGTGGTCACCGGACGCCTGGAGGACGTGAA +CGAGGCGAAACGAGAGATCGACTGCGCCGCCGAGCACTTCACACAGATCC +GTGCGTCGAGAAGACATACACAAGGTAGGTGACCGTAGGCAGCCGGTTCC +TGAGCGCCTTGTTCAGTTGTTCCAGGAGCCCATGCACCTGGACAGATCAC +GAGCTATGTCAGAGTTCCGTTAAGAGTTGTCGGACTCGTCGTCGGACCGA +AAGGAGCGACTATCAAGCGAATTCAACAGGACACCCACACGTACATCATT +ACGCCGAGCCGAGAAAGGGAGCCCGTTTTTGAGGTAATTTTGTTGGAGAG +CATAGAGTTTTGGAGCAAAATTCTCAGCAATTTCATGCTTTTTAACAAAA +ACAGAGAAGTTTCTAGTTTCGGCTTAGAAAATACTGAAAATCGGCTTTTC +CATGGCATATATCGTGCTGAAAATCTACTTTTGACTGAAAAATTCAAGTT +TATGACTAAGAAAAGTTAGATAATTAGACCATGCTATTATTTCATAATAT +TGGTCCAATTTTTGTTGCTTTGCTGAAAATTAACAAAATTTAAAAATCGG +CGGGGTTTCTGAAATATTTCTGTATAAAAACCGATTTCAACTCCAATTTT +CCTCGATTTTTACATATAAGTAATCTAGAAAAATTGTAAAATGGGGAGAA +TTCATAAGAAAATCGGCTGTTTTGTAGCAAAGTTTGTAAATATATCATTC +TGAAAGTCTCAGCGATAACCGTGATTCTAGAAAACCTCATCAGGATTTGA +TTTCTGCGACACACTTCTCTCGAGTTTTACATAGAAGTTGTTCATTACAA +ATTTCTATTTTTAAGGTCCCCATTACCATTGTCAATCTTTCATTCCGATT +ATTTGTTTTTCCTCGTCACTGCAAGAAGGGGAGCCTAATGCCTAGACAGA +GACAGAAAACACATGTGTGTTTGTAAGTGTCTGGTGTGATATACCCAATT +CTTTTCGGCCGTCTCTTGTTTTCTTCCTGACACCGAGTTTCGAGCGATTT +TCGAGGTCGTACAGTTGTGAATTGGCTGCCACTGCTGTGGCAAACAAACA +AAAACAATCAAACTTGTGCGGAATTGATATTCTAAAGAGAATAAAGAGCA +ACCATGGGCGCCGAAACGAGCAGCGGAAGGGCTTGTGACCTCTTGGTGTG +AATTTTAAACGGTTTCCTTCCATCCGAAATGCTAAAAATTCCTCGGGAGC +CGTTTAGGGTCTTCACTGAGAGATGATGATTAGAATCTTTTAATTTGGTA +GAATTCCCCCCGCGGGACCGATTAATGTTGCATGTCGTCTTCTTCTTCTT +GGCAGCAAAATATCATTGTTGATGAGATATCTCAGTTTGATGCTCAATGA +CGCCTTGAGAAACATCCAGCACACAGTAGATTAGTCGAATTACCGATGGA +TGGGGCCGCCCGGGTTTTTGGAGCCAAGAGTCACGCGATGGGGAGTCCAC +ATACACACGCACCAACTCTTTCATTCCATCTAACAGGGTAGTAGGGTGAA +GAAAAGGGGGGGGATCGCGATGACAAAGAGACTCAATTAATTAAATGACC +GGGGCGGGCGCCAAATTGCACATATGCTTGGTTTTATTGCACCGTTGGCT +CAGGGAGAGCAACATTTTGTGGCAGCACCAGGGGTACTTCAAAATGACGA +ACAAAGGTCGTTGTTGTTTCTATAGGAAGGCTTCCGAACTTTTGAACAGT +AATCAGATCATATTAGGGTTATGTAATTGACGGTTTTGACCGTATGTGTC +TATAGATGCGTGTGTGTGTGTGTGTGTGGCGACATAAATTACCGTATCCG +CATGGCCTTTTTCCTCCTTCCTCACAATGGTAATTATCATCCCCCACTAC +TCTGGGTCGCCGGACGAACTGTGAATGTGTTGACAGTGCGGGCCCACTAG +TACATGACATCTTGCCCCGGTGCGGTGCGCTCATCGTTATTCCTCCCAAT +TTAGTTGTCCGTTTGACACAAATGCCTCCGGTACCCTGGACGAGACAAGG +AGGTGATGGATTTGCCAGGCGGGGAAGGAATTACATGATGTTTGGGAGAA +TATGAGGGAAAGGTACAGAAGCTTCACCTCGGATATTATATGCGTTCTAA +AAAGACCAATTGTCGTTTTTGTTCGTGGCTCAAATGAATAATTGGTATGG +AAATATAGAAAAATGTCCGACTAGTTTGGTAATAGAATTGTTTCGTTTTG +AACATTTCAAAAATACCTCAAATAGTGCACTTGAGATCTTTGTTGAATTT +TCAAAGGTTTTATTTTCAGATAAATTGAGCATTTTCCATACTTATAAAAT +CAATTTTTGGTTCAAAAACTTCTAATTTTCTCACCGAAAAAGTTCTCACA +AGACAGTGACAGAGGTCGCGTGTCGTTTTGATATCCATCTCGATAATCAG +AGATTGTGATCTATGCCTCAATGACACAGTTCCGCGAAGGAGTCCGAGAA +AAACGTACAAAATTGGGGAAAAATATTGGAAATAACATTTATTGAAAAAT +TAAAAAAAAAACATAAACTATTTAAATTCACAAAAATTCGGAAGCATATT +TCCTATAATGAAAACAAAAAATTCTGAAAATGTGTACTGCACAACATATT +TGACGCGCATAATCTCTTGTAGCAAAAACTACAGTAATTCATAAAATTGA +CTACTGTAGCGCTTGTTTTGAGTTACGTGCTCTACGAAATGAATTTAAAT +AATTTATCGATGGAAAAATAATTTTAAGCAGAAAATGAGAAAATAATATG +AAGAAACAAAAAGAAATTCGAGTTACGTGCTCTAAGAAATGAATTTAAAA +AATTTATCGATGGAAAAATATTTTTAAGAAGAAAATGAGAAAATAATATG +AAGAAACGAAAAGAAATTCATTCGAAAATCGAGCGCGTAAATCCAGAGCT +ACAGTAATCAGTGAAAGAATTACTGTAGTTTTCGCTACTAGATATTTAGT +GCGTCAAATATGTTGCGCAATACACATTTCTGGAATTTTGTGTTTCCGTA +ATAAAATAAGTTTTTTGCGAGAACTACAAAAATATAACTATTTCTATTCC +AGGTGACCGGCCTCCCACATAACGTCGAGGCTGCCCGCAAGGAGATCGAG +ACGCACATCTTCCAGCGAACCGGAAATCTTCCAGAAACCGACAATGACTT +TGCCGGACAGTTGGCCGGTGTCTCGTTGATGGTGCAGAAGCAGCAACAGG +CTCAACAACAAATGCAGGAGGCTCAGCAACAATCGATGTTCTATCGAAGA +GCATTCGGCAACAGTAATCCGTTCAATCAGAAGGAGATGTCGTCGTCGCC +ATTCGGAATGGAGAGCTCGTTGGGATTGGACGCGTTGCTCCGCAGTTTCC +CATCGATGCGTAGTTCGTTGACTCCGGAATCTCTTTCCGGTACTGGACTG +TCTTCTCGTCCATCGTTGGGAGGAGGACAATCGGCGAAACAAGATCTGCC +AACCTACGACTACTGGGGAACCAACAACTCGCTTAATGATATTATGGAGA +ACGGTAAGCTTTTTTTGCGAATTTTAATAAGAATTTTAAATGACAAATAA +GGATTTTTCAGAAATTCTCAGCCGCAAGTACGATGCCCTGTCCGCATGGT +CGTCGATGGGATTGGAGAAACGCGAGGAATCCCCAACCAATGGGTATATA +TTTTTGAAGGAATTTTATAAATTTTTGAAATAAAACTAAAAATTGCAATT +TTCCTTCAAAGTTCTAAATTTTTGAAAGAAAATTTGATTTTCCGCTAAAA +AATTTAAAAAAAAAACAAGTTTTCGTGTTTTCCACTCGGAACCTGACGGC +TTCTTATCAATTTTTTAAATAAAAAATAACGTTTCTAAAACTCAGAATGT +TTAAAAAATTTCACATTAAAAAATTATTTTTCCGAAATCTTGAACCTATC +CGTTTTTGAAAATAAAAAAGAGTAAAATCGTAGTTTTCTCTATAAAAATT +ATAATTTTCATTTGAAATTCAAAAATTTTTAAATAAAATTGGATTTTGTC +CGTCAGAAATCTATAAAAAGAAAATTTTTTTTGAACAATAAAAGAAAAAT +TGTAATTTTCTTTTGAACTTCAAAAAAATATGTTTTTTTTCACATGCAAA +AAATTGATTTTTTCAAAACCTCGTTTTTGAGAGAAAAAGGTGAAATTCCA +ATTTTAAGAGGTAAAGTTAAATTTATTTATTTTATGAGAAAAACTTGAAA +TTTAAAGGAAAATTACTGAAATTTACAATTTATCATACATATTTTTTAAT +TTCGACATATCTCGAAATATCAAGTCGTAAATTTTTTCCACAAAAGTGTG +ATATTCTCTTTAAAAACTAGAAAATTTCAATTGTAAATTTATGAAAAATC +AAATTTTTTGCAGGCTCATGTCCTCCCTCAAAGGAACATCCGCCGGCTTC +GGTCTTCTCAGCACAATCTGGTCTGGCGGAAACATGAATCTGAGCCCAGG +ATCGTTGGCTTCAGCATCTGCCTCGCCGACTTCATCGACCTGCGATCACA +ACGATCATACTCTCGTGCCGATCAATGGATAAGCTGTGGATCCATTCATC +ATTCCATTACTATCGTGTAGACTCTCATCTAGCAAAAACCCATCCTCAAC +GAGTTCCAAATTCATCTTTCCATTCTTTAAAAGACTCTTTTTCCCATTTT +CTACTTTGTTCATCGGTGTACGAGTCCCTCCATTCCCATTTTCTCACCTC +TTTACTACATTACTAGAACACACAAAAATCACCATCATTCTCACCCCAAA +ATCCTAGACAAAAAAAACGAAAAAAATCACATATATTTATATATAACTAT +TATTATTTGTTATTCATATTTTTGAAATATCTATATTATATATATTTATA +TATGTAACCCATGCAGTCCCCCAATATATATTCCTACAGTAGGTTTTTTT +GGAAAATCTGTAACTTTCTCTATTATTCTTTTTTGCATTATGATAGGACG +ACCAACTCATGTTTTAAACAAAAATTATATATATTATAAGGATTTCCTCT +TTTCTCTCTATCTCTCACCTAGTGTTTAACTCCCAATTTTCCCCCTCTCT +ACTACGCACAACTAACGGAGAATTGAGAGGCCTTTTATATTTATAGCACC +TCTTTATAATTAAAAACTCGAGTGATCCCCACCTCACCCATCAAATTTAT +CGTATTTTGTAAAAGTTCTGATATATTCCACAGATTTTGGTGTATGTGAA +TGAAGCGATTCGTGTCGTCTCTCCATATTCTGGTATCCATAGCAACTCCC +CGAGCTTTTGGCTCACTCGCGCACGCCCTATACTATACCTTTACTATTAT +TAAATGCACTCAACAAATCCTATCCTGCGTCAATTGGCCGATCAATTGTC +CGAATTGCCACAATATGCACCATTGCGGGTAATAATAGCACAATAGTTGA +ATATATTAATAATTCAAAAATATTTATTATATTTTGAGGTGTCCCATTGT +TTCAGACATATATCCGCGCGACGATAAGTGCCACGTTGGGGGAAACGCCA +AAGAAAACGAGTAGGGATTTGACGTATAGAGGTGAAATTTTTAGAAAATT +TGAAATTTTCTGAAAAAAAAAGGATTTTTCTTTTATGAAAAATGCTCTTT +TCGCCAAAAAAATCACAATTTTCGACTTAAAAACTTCAAAATTTTTTCAG +GAACACACAATTCAAAATACAAAATATCTCGTAGCAAAAACTACAGTAAT +TCTTTAAATGACTACTGTAGAGCTTGTGTCGATTTACGGGCTCAATTTTA +AAATTAGATAAAAATGAGAAAATAAAGACACGAAGAAACGAATGAAAGTG +TAATATCGAAAATATTCGAAAATAAATAAATTTCAAAAATCGAGTCCGTA +AATCGACACAAGAGCTACAGTAGTCATTTAAAGAATTACTGTAGTTTTCG +TTACAAGATATTTTGCGCGTCAAATATATTGTGCAATATGCATTTTCAGA +ATTTTGTGTTTCCGTAATAGGTAAAATGTCAGAAAATCAAGGATTCAGTG +CAACCCCGCGTTCTACGGGTATTTTGGCCACGGCCACTTTTTGCAAAACT +GCAAAATAGGCGTTGTTATTTCATTTTCTGAAAAAAACATTGAAAATTAT +TATCGAGAAAGTACGATCTGACAAATTTCTCATAATTTATTTTTGATCTA +CCTGTTGAACTTTACTCCGCCCCCAATCTTGTTGCCGTTGTTATTTTGTT +GTAGCTGTCTTGTCGAAGGACGGGGAGGAGCCTAGTCAACAAGGTAGATA +AAAAATATATTATGAGAAATTTGTGCGATCGTATTTTTTCGATTATATAT +ATATATATATCTTCATGTAATTCTCATGAAATCTTGAAAAAAAACGCGAC +ACATGGTATTACCACGCCCACTTTTTATAATTTGAGCGCGTGCAATTTTT +TGTCGATAACATAGTTTCTATTTAAACAAAAAACATCGTGCGCCCACATT +GAAGAAAATCCTATAATTGGGCGTGGCCTTAACTCTGAAAATTTACAGAA +CGTGTCACCCATAGCATGATTGCCGATTGGCTCGAGCAGAATGGATATCC +AATATCATCTCAAGTCTTGAAAACAGAAATGTCTGGAAACTACATGGAGA +GTGCAGAAAAACACGTGGAAAATGGGGAAAAAATCGATAATCTATTGCGA +AGTCAGCTGAAAATCGATATTCGCGATGGCACCGGTGACGATTTTGAACG +AAAATCGATAAAATCGATTGGCGCCGGCCGCCCGTTTGCAAAAATTAATA +TGGTCGATAAACAGCGAGCACCGTTGAAAATTACACCGCTTAGTGATGAG +GAATTTCGAAAAACGATGCGGAAACGGATGGAAATGGAGAGGGAACGAGT +GAGTTGCGTTAAATAATTTTTTTTTGAAATATTTAACAAAAGTCGACAAG +GCGATTTTTTAAAATCGATGTTTCGCGACTTTTATTCAAAAGAAAAATTT +CAATGCCCTAAATCACTCAAATGATGATTAAAAAAGTATTTGCGCGCGCG +CGTAAAATTTAGTGCCAGCGTATGACACTATGTGGTGCAAACTTCACTCC +GGGTCCGACACTTTTTGGATTTTGCGCATCGCTATAAAGATTTCAGATCG +CGGCGTGAGAGCGGCGTTTGCGGCGCCGACATGAGAGTCCTACTTTCCTA +CTTTTCTTGGCAAAAAAATCGATCGATTTTTCAGCTCGCACGTCTATCCA +ACCAGGCAGAAGACGATTCCGACGATTCATCGTCCTCCTCCGACTCTTCC +ACGTCATCCGCGTCGTCATCTTCCGAGGAGAAGTTGACTTTTTCGGATAT +CTTGGGGAAACCCACAGCAGCTGGCGCGGAGAAAATGATCGAACTAAGCC +AAATTCCCAACGCATGGGGTCCGTCAAAATCCGCGTTAGTCACTGATGAC +GTGGCAAGCACTTCTGACCCACTTCCGGCGATTTTCAACGCACATCTTCC +GCCGCTCTCCATGGGACGACCACCGCTGACAAAAAGCGCGAAAAGTCGGG +AAATCGACGCCATTTTCGGCTCGGATAACGTGGATTATGATTTAGAGGAT +TTCGAGCAGGATGGCGTGGCGAAGAAAAAGTCTATACTGCCTATGGAGAA +AAAGAAGGAGGAAGCGAAACCAAAGGAACAGGAGCTAGTGGAGCCGTTGG +TCTTGTCAGAAGGTAACTTTGAACTTGCACCATGCAAAAATTTCGAAAAA +TTATGCACGTGGTCACTGGCTGTCCTTTTGCATAAGCTTGAAGCAAACGC +GCCTCATTGAGAATTCACGTTTGCGCCAACTCTCGCTATCCATTGGGCGT +GAGAGACGCAGATACTACTTTTTTCTCTGAACGTGAAAAACGCAAAGAAT +AACCGTTTTGCCGTCTGCATCTCTTCTTTCACACGCTATTTTGGCTGTGG +ACGAGGAATTCTCCTCTTCCAGGATTTTCTAGGCCATTTTCTCATATTTC +TCAAGTTTTCTCGTCCGCGAGAAAACGTGAATTTTGAGACAGCCAGCGAG +CACGTGATTATGCAACTCTCCTAAAAATCGATCATTGTTCCTGTAGCTTA +TAATTTTAAAGGCGCATATTCCGCTTGCGGAGCAAAAGTAAAAAAAAAAC +ATGCCTCTACATACAAATCGATGAATTTCCTGAAAAATCAATAATAAAAA +ATTTTCAGGAGAATCAATCGACGAGCTTGAAGATTTCGACACCGGTCTAC +TATCTTCCGGAGGATCCGATTATTCTTTTTAAAATTTTCTTCTTTTAAAA +AATTTCTTTTGAAATAAATAAATTCTCACCTAGGAATTTCAACAATTCAA +CTTGAAAAAAGTTCGCGCAAACTACGAACAAATGTGTGTCGAGCGGGCGG +AGCCACTGAGAAAGAGGAGCAAAATGTACACAAAACCATATTTGAGTGTA +ATTTTTCAAAGTTTGGCGCCGATTTTCTGTGAGAGATGAGTTTTCTCAAT +TTATATTTGGTTATTTTTATTTTAGTTCTTACTGGTAAATTTCTGGGTAA +GTCCTGATGACTTTGAAAACGAAAAAAACTCTTTCATTGATGCTAGTGCG +ATTGCTAGGAAAGCAACTTTTCAGTTACCAAGAAAAAGTCCAAGGCCATA +GGGATTAGCTGCGTGGCATAACAACTCATCCATCCTCGCAGATGCAAATC +CGCTCTATTGGCAAATAACATGGAAGAGTATAAACATTTTCTCTTCCACA +CGGAAACCTAGTCCCCTTGGGGAGCGGTAGTGCCCACAACCCCGCATGTT +TACCAAACTACACAGACAGCGCTATTGTCTGCAAGTGGCAAAAAATGGCC +GCCGAAAATTTTTACAATGTAAATAATTTTCAAAGTGTTACTATTTGCAC +CTTTTTGGCAAAAATTCCATAAGTAAGAAATTTGATGGAAGAGATGCGAG +GTGCGGGGAGTCTGAAAATGGCTGCATGGCCTGTATGTGACCTGTGTATG +TGGCCTAAAGGTCTAGTTACCCCTAGAAATCAAAGTTCAGTTCAGCAAAG +AGACTTTATTTTCAGCACTCCAACAAGAGGCAGAAACTCAACGGTACGAC +GGATGGTACAACAACCTGGCGAACAGTGAATGGGGTTCTGCTGGTAGGTT +TTTTTGGAAGAGAAATGACGTCACACTGACCTACTCCTTCAGGAAGTCGG +CTGCATAGAGATGCACGTTCCTACTACTCAGACGGTGTATATTCAGTGAA +TAACTCACTTCCGTCCGCCCGTGAACTCTCCGATATACTATTTAAAGGAG +AGTCCGGTATTCCTAATACAAGAGGATGCACCACTTTATTGGCATTTTTC +AGTTTGTATTTTTTTAATACTTATAGTAGCCAATGTTTGTAGGTCAAGTA +GTTGCTTATGAAATAATGCAATCAAATGGAGTATCCTGTCCACTAGAGAC +ACTTAAAATTCAAGTACCCCTATGTGATAATGTATTTGATAACGAATGCG +AGGGGAAAACTACAATCCCATTTTACCGTGCAAAATACGACAAAGCAACT +GGAAATGGTCTTAACTCGCCTCGAGAACAAATCAATGAACGGACTTCATG +GATTGATGGATCATTCATCTATGGTACCACCCAGCCATGGGTGTCCGCAT +TAAGATCTTTTAAACAAGGACGGTTAGCTGAAGGTGTACCTGGATATCCA +CCACTTAACAACCCACATATTCCATTGAATAACCCCGCTCCGCCACAAGT +ACATCGATTGATGAGTCCAGATAGATTATTTAGTGAGTTCATTGTTCTAT +AGAAAAGTATAAATATTTAAAATTGAAGTGTTGGGAGACTCGCGTGTGAA +TGAGAATCCTGGTCTTCTTTCATTTGGTCTGATCCTCTTCCGTTGGCATA +ACTACAATGCAAATCAAATCTATCGAGAACATCCTGACTGGACAGACGAG +CAAATCTTCCAGGCAGCACGTCGTTTGGTGATTGCATCTATGCAGAAGAT +TATTGCATATGACTTTGTTCCAGGACTTCTAGGTTACTCAACTATCATTT +AATAGCTTAAACTCAGTGTTTCTTTTAAGGTGAAGACGTTCGTTTGTCAA +ACTACACCAAATACATGCCACATGTTCCACCTGGAATCTCGCATGCTTTT +GGAGCAGCCGCCTTCAGGTTCCCTCACTCAATTGTGCCACCAGCAATGCT +TCTGAGAAAACGAGGAAATAAATGTGAATTCCGGACGGAAGTTGGTGGAT +ATCCTGCATTGAGATTGTGCCAGAATTGGTGGAATGCACAGGATATTGTG +AAGGAGTACAGTGTGGATGAGATTATTCTTGGTTAGTTCATACTTGAGTG +GTTATATAATAAAGATTGTAATTTCAGGAATGGCAAGCCAGATAGCTGAA +CGAGATGATAACATAGTAGTCGAAGATCTTCGTGATTACATCTTCGGACC +AATGCATTTCTCTCGTTTGGATGTTGTTGCTTCATCAATAATGAGAGGAA +GGGACAATGGAGTACCACCGTATAATGAATTGAGAAGAACATTCGGACTT +GCACCAAAGACATGGGAGACAATGAATGAAGATTTCTACAAGAAGCATAC +TGCAAAAGTGGAAAAGTTGAAAGAGTTGTATGGAGGCAATATTTTATATC +TGGATGCTTATGTTGGAGGGTAAGCGTTTTTTATAAATTAAGTTAGAGAT +CCTCCAATATACCCTCATAACATATTGTATCAGTTTACTTCATCAATAAT +TCAGCTTGAACCATTTAGAATGCTGGAAGGAGGTGAAAATGGGCCTGGAG +AGATGTTCAAAGAAATCATAAAGGATCAGTTCACCCGTATTCGAGATGGA +GATCGATTCTGGTTTGAGAATAAACTAAATAGATTATTCACTGATGAAGA +AGTTCAAATGATTCATAGTATTACACTTCGTGATATTATCAAAGCAACAA +CCGATATCGATGAGACGATGCTTCAGAAGGATGTGAGTTTATTGTAAACA +CCCGTTGGTGATGATCGAGTAAGTTAATAGCATGTTTCATTCCAGTCATT +GATTGATATTTTGTCATGAATTGTTATATAACAATATATGTATGTATTAT +ATTACTCATGTCTCAATATTGCATTTAATTTCCAGGTATTTTTCTTCAAA +GAAGGTGATCCATGCCCCCAGCCGTTCCAAGTGAATACCATTGGACTTGA +GCCTTGTGCTCCACTTATTCAATCCACCTACTGGGATGATAACGATACTA +CTTATATCTACACTCTAATTGGTTTGGCGTGTATTCCTTTAAGTCAGTCT +TTATTTCAATCTAAATGACTACCGATAGAGACTCAGCGATTATAGAGTTG +ATCAAGCTAAGAACTTTCAGTTTGCTATAGTATCGGTCATTATATGGTTG +AACGACGTATCCGAATAGGTCATAACAGTGCTTGTGACAGCTTGACTACT +GACTTTTCAACAGAAAGTCCCAAAGTTAATGTCTACAAAGTGAATGGTTC +GTTATTTTTTCATACTTGATTTTTATTTAAATTATACGGTGAATCTGTTA +CAGCTTTGGAATGGCTTCAAGAAGAGTACATACGGCAAGTTCGGATAGAA +ATAGAGAATACCACGTTGACAGTGAAGAAGCCACGTGGTGGAATCCTTCG +AAAAATTCGTTTTGAAACTGGACAGAAGATTGAGGTATTCCACTCTATAC +CGAATCCATCAGCAATGCACGGACCATTTGTACTTTTGTCTCAAAAGAAT +AATCATCATTTGGTGATAAGATTGTCGTCTGATAGAGATTTATCTAAATT +TTTGGATCAAATTAGACAGGCGGCTAGTGGAATCAATGCAGAGGTTATCA +TAAAGGATGAGGAGAATTCTGTGAGTTTACTTCAAGAAATACGTCGAATC +TGGAAATTATTTCAGATTCTCTTGTCCCAAGCAATCACAAAAGAACGCCG +TCAAGACCGACTGGACCTGTTCTTCCGTGAAGCCTACGCAAAAGCATTCA +ATGATAGTGAACTTCAAGATTCGGAAACTTCATTTGACTCATCGAATGAT +GATATATTAAATGAGACAATATCTCGTGAGGAATTGGCAAGTGCAATGGG +AATGAAAGCTAATAATGAGTTTGTGAAGAGAATGTTCGCGATGACTGCAA +AACATAATGAGGATTCGCTCAGTTTCAATGAGTTTTTGACAGTATTGAGG +GAGTTTGTTAATGGTGAGTATGATTTACTAAAGTTCTGATCACAAAAACT +ACACGTGAATGAATGTTCCAGTCCGTTTTACACTAGGTTCGGTAAATATG +TATAATGTTATAGTGACTGTTACATGTTGAAGCAGTAGTCTTCTTTTCTT +TGAATGGCAACAAAAAGGCTCCCTCATTATTTAAGCTGTATTTCACTGCG +AATAACGGCTTAATGTGCTATTTAAACATTCGGGTCTATTAGTCTTTTAG +TATATTGTCGCGCATACTAAGGCTCGCTTTAATAATCAGTTTGAAACGCG +AATTTGATGGTGTTAAATACGATTTTAATTTGGGAATTGTTCACAATTAG +ATGCTATCTTGACTAATAATAATTAAATACAGAAACAAAAAACTTTTAAT +GGACAAAAATCACTCCGAAACTACCCAAATCGGCATTAGGTTAATGTCTT +TGATTACTCACCTTCTTTCCTAATTTCCAGCTCCTCAAAAGCAAAAACTG +CAAACTCTATTCAAAATGTGTGATTTGGAGGGAAAGAACAAGGTACTCCG +AAAGGATCTCGCAGAACTCGTCAAGTCCCTCAATCAAACCGCCGGAGTTC +ACATTACCGAAAGTGTGCAGCTTCGATTATTCAATGACGTGCTGCACAAG +TCTGGTATACATCCCTGTTTAGTATGCCAAGTATTATAAGAAGTTTCAGG +AGTAAGTGACGATGCGGAGTACTTGACTTGTAATAATTTCGACGCATTGT +TCTCGGAGATCTCTGACGTCCAGCCGATCGGTCTGCCATTTAATAGGAAG +AATTATAACTCACATATCAAGGAGTATGTTGTTGAATTCAAATCCGCAAG +TAAATACAAACTTCAGGCCATCATGCCACACCTCATTCCCAATAGTGGAC +CACTCTACTCCTGCTCCACTTTCTCTGATTCAGAGAATTTGTGCATTCCT +GGAAACCTATCGCCAACACGTCTTCATCATCTTCTGCTTTGTCGCCATTA +ACATTGTCCTTTTTTTCGAACTTTTCTGGCGTAAGTTTACTGTTTAGTTC +GGTTTTCAAAAGCAATATTCAATTAAGATTCCCGCTACCTAAACGAAGAT +CGAGACCTCCGTCGGGTGATGGGTGCCGGGATCGCTATCACTCTCTCTTC +TGCGGGAGCCTTGTCATTTTGCATGGCGTTGATATTGCTCACAGTTTGTA +GAAATATTATAACACTGCTTCGAGAGACAGTTATTGCGCAGTATATTCCA +TTTGACTCGGCTATCGCGTTCCATAAGGTAAGAGCCTCTCTCTTGGCCTA +GCGCTGTAAATAAAACTGCCAAATTTTTAAACTCCATCAATTTCAGATCG +TCGCATTATTCACCCTATTCTGGTCTACCCTTCACACCATCGGCCATTGT +GTTAACTTTTATCACGTTGGAACTCAAAGCGACCGTGGACTTGCTTGTCT +CTTCCAGGAAACATTTTTCGGGTAAGCCCCACCTACTCGGTAGATCTCTC +AGTAGATTGAACGTTGGCTTGCTTTCAGATCTGACGTCGTGCCTACCCTA +AGCTATTGGTTCTATGGAACAATTACTGGGTTGACGGGAATTGGATTAGT +TATTGTTATGAGTATCATTTATGTGTTCGCATTGCCAAAGTTCACTAGAA +GAGCATATCACGCGTTCCGGCTGACTCATCTTTTGAATATTGGGTTTTAT +GCACTCACTATTCTTCACGGACTTCCTTCACTTTTTGGGGTAAGGCTTAA +TTTAATTTGATTCATTTACGCTCTTTTTACGCTCTTGACGTCAATATTTG +TTACAGTCTCCCAAATTTGGCTACTACGTTGTTGGACCCATTGTCCTTTT +TGTAATCGATCGTATAATTGGGTTGATGCAATATTACAAGTCGTTGGATA +TTGCCCATGCAGAAATCCTTCCATCAGATATTATATACATCGAGTACCGT +CGTCCAAGAGAATTTGAATATAAATCAGGACAATGGATTACTGTATCATC +ACCATCTATATCATGTACCTTTAATGAATCTCACGCATTCTCGATTGCCT +CAAGTCCACAGGATGAGAATATGAAGTTGTATATAAAAGCAGTTGGACCA +TGGACATGGAAGTTGAGAAGTGAATTGATAAGATCATTGAATACAGGGTC +ACCATTCCCATTAATTCATATGAAAGGACCATATGGTGATGGTAATCAGG +AATGGATGAATTATGAAGTTGCAATAATGGTTGGAGCAGGAATCGGAGTG +ACTCCATATGCATCCACGCTTGTTGATCTTGTACAAAAAACATCAAGTGA +CTCGTTTCATAGAGTTCGATGTCGTAAAGTATATTTCCTATGGGTGTGTT +CGAGTCACAAGAACTTTGAATGGTTTGTGGATATGCTGAAAAATGTTGAA +AATCAAGCAAAGCCGGGAATCCTGGAGACACACATATTCGTCACTCAGAT +GTTCCATAAGTTTGATTTAAGAACTACTATGCTTGTGAGTTTTTTATGCG +TTTTTGGTTTTTTAGTGGTATAACTCTAAAACTAAATGTTTGGAGAAAAA +GGCTTAACTAATAAAATGTTGCTCATAACTTTTTCTATGCAATAAAATAT +GTTTCAAAATTTCATCCGAGATCTCACAAGTCCAAATTTGATTATTTTTC +TTAAAACGAGTATATTTCAAGACCGAAAAGTTTTCTTGAAAAAATGGTAT +ATTTTTTGCACATAATCATTTTTTCATAAAGTGCTCTGCAAATCAGAAAT +TAAAGTTCGAACAGTCTTATAGTTAGTACCGTGACATTTATTGTAACTTA +AAAAAAAAAATTACGCGACATAGAAACGCTCGATTATCTGAAAAACCAAA +TTGGATTTTGATTCTAGTTAATTTATTTTCAAAAATAAACATATTTTGTG +ATAATTGTAAACTAAAAACTATACCTGAATATTTTTATAAATTTTATGTG +GTTTTTGGTTTCAGGATTAAAACAATGGTAAAATACTACCCTAACGGCTT +ACCCAAGGCTCAGAAAATTGTAGTTAGTCTCAACAAATTAATACATATCA +TTTCTTATTTGCAGTACATTTGCGAGAAGCACTTCCGTGCCACCAACTCA +GGCATATCAATGTTCACTGGCCTACACGCTAAGAATCATTTCGGACGGCC +CAACTTCAAAGCTTTCTTCCAATTTATTCAGAGTGAACATAAGGAGGTTA +GTTTCATGGTTTTAACCTCTAAATAAAGCAAATTTGCAGCAATCCGAAAT +TGGAGTGTTCAGTTGTGGACCTGTAAACTTAAATGAAAGTATAGCTGAAG +GATGTGCAGATGCCAATCGACAACGAGATGCTCCGTCATTTGCACACCGC +TTTGAAACTTTCTAATCTTTCTACTGTTACTTACTATGAAAATCATAAAA +ATTAAGACGCTTCATGAATAAAACATTGAGAAAAAAAACCTGAAATAAAG +AATTTTCACATTTGTCCAGGTAGTATATATCCCCAATACAAACATTTTGA +GACGCGAACCTTCCCAAATAATCATCAGACATTGTTCTGTAATTAGGCAA +AAAACCAGCCCCCTAGCCTCCTCCCTCACCACAAATAAAACCAATTCCCC +CATTTCCCCCCAGTTCATTCACCAACAATGAGTTTGTTCAAAGTATACAC +CTCGTTTTCTTTGTTTTTCTCCGTTCTATTCCTTCTCGTCGCTTTTATTG +TGATCTATGACATTTTCGATGAAATTCGCGAATTTCAAGAAGTAATCGAA +AAAGATGCCAGGAAGTTTGAGGTAAGGCAATATGTGGTGCAGCAATAATT +TCCGCATTTTTCGTAGATCATACCGTTATGGGACAGTCTGACACCACGTG +ATAAAATAGTATACTAAGTATTACAAGCAAGGTTGAAACGAAAATGAAAT +TTCGTTTTCGAAAACGAAAACGAGAATTTTCGTTTTAAACGCGAAAAACG +AAAACGAAAACGAAAATATCAGTTTTGTCCAATTTAATTGGTAAAATATA +GATTTCATGAAATATTAGGTCCATTTTCGAATCAATAATAAACAAACAAT +AGTTTTATTACAAAAAAAACAATAAAAATTAAATAAAGCAAAAGAACAAT +AACAATAACACTTGCTGAACAACTATTCCATTAATCTAAACCAATTTTAA +AGCCTAAAACTTTTTGATTTTGCTGTAAGAACACAGACTTGCTCAACGAC +TTTGTAGACATCTTTCTTCTCAGCTTTGTAGAAACGCGGCCAGCTCCTGA +AAAAACTCGTTCCGATTCAGCAGATGAAGCTGGAGTTGTCAGATATCTGT +TCGCTATTTGAGAAAGCAATGGAAACTTAGATCGATTGAGCGGATTTTGC +CAAAAAACAGCGGGGTCTGATTTTCTATTGTTATCGGTATCGTAGAAAAC +TTCAACTTCTGCACTAGCACAGAGCATGGAATCTACAGGCGCTTTGTCTT +TTCTTTTTCTGATCTTGGAATGCTTTTTCTCGTAAGCTTCAAACAGGTCG +TCTATTCCCTCAAGAGTTTCATTTTCTGGCTCATCGACTGCTTCTTCCTC +TTCTTTTGATAATCCTTGAGCTAAGCTCAAAACAAGCTCTTTTCCGTCAC +AATATTTTCTTTTGTATCGTGGGTCAATGTTTGATGCAACAATCAGTTTG +AATGTCACCAGTCGGTGCCCGCGCCGTAGGTGCGGTCAACGGCTGGTATG +TATATAAAATATATTAGATTGAAAATATAACATTGAAAAAAAAAAACAAA +CATTGAAACACAATAAAAATCAAAATATAGACGCTCTTTATTCTTGAATA +AAAATAAGAATTACGAATTGTTCACTGGGAGTCAGCTTATTTCTTGTACA +AAATAATGATTTTGACCAGTGTCAGCTCAAAAAAAATACGTTGACAGAGA +GAGGTGGCATTTTCGTTTTTTCGAAAATTTTCGAAAACGAAAACGAAAAA +ATCATTTTCGCCAAAAACCCGAAAATAACGAAAAAACGAAAATTCGAAAA +CGAAAAAGTGACAAGCCTGATTACAAGGTCGGTATTTCATAATATGAATA +TTGTAGAGAAAGGTTACTAGCGATGATTCCAAAAAAATTTAGATTCATCA +GAGATGTACACCCAAATACCCAAGGCATAGGCCCTAATTATTGATTAGTC +ACAACTTCTTACTTCCAACAAGCTTTTCCTATTTTTCCAGTTCTACTCCA +CCGCCGCCTCACGTTCAATTGCCGGTCTCGACAAGCGGAGCCTATTCGGA +GGAGCTGTTTCTTTTTAGTTGTATATACATCTATTCAAAATTAAAAGTCA +TTGTTGTTGTTACCCATAAAACAGTGAATGAAAAAAAATGAAGTCTAGAC +AGAAAATGTAAAGCTGGCACAGAATACTCAAAGAAGCATAAAATATGTAT +AATAATGATGATATAGGGAAGGTTAGAACGTTTCAAAGCGATGTGCAAAT +GAAGGAGCATCTCGTTGTCGGTTGGCATCTGCACATCCTTCAGCTATACT +TTCATTCAAGTTTACAGGTCCACAACTGAACACTCCGATTTTGGATTGCT +GCAAATTTGCTTTATTTAGAGGTTAAAAAGCATTTAACTAACCTCCTTAT +GTTCACTCTGAATAAATTGGAAGAAAGCTTTGAAGTTGGGCCGTCCGAAA +TGGTTCTTAGCGTGGAGACCAGTAAACATTGAAATTCCTGAGTTGGTGGC +ACGGAAGTGCTTCTCGCAAATGTACTGCAAATAAGATGTTAAAGTTTAAA +AGCTTCAGACCTTAAGATGCCACTGCGTTTAAATGCGTCCGCTTTAATCA +GTTTATAACCAAACCTGACGTTTGATTTTGAAAATTATCTTCAATAAACT +CACAAGCATAGTAGTTCTCAAATCAAACTTGTGGAACGTCTGAGTGACAA +AGATATGTGTCTCCAAAATTCCCGACCTTGCTTGGTCTTCCACGTTCTTG +AGCACATCCACAAACCATTCATAGTTCTTGTGAGTTGAGCACACCCATAG +GAAATATACTTTACGGCAACGAACTCTGTGAAATGAGTCACTTGATGTTC +GTTGTACAAGATCAACAAGTGTCGATGCATATGGAGTCACTCCGATTCCT +GCTCCAACCATTATTGCAACTTCATAATCCATCCATTCTTGGTTACCATC +ACCATATGGTCCTTTCATATGGATTAATGGAAATGGCGATCCTGTATTCA +ATGATCTTATCAATTCGCTTCTCAACTTCCATGTCCATGGTCCAACTGCT +TTTATATACAACTTCATATTCTCATCCTGTGGACTTGAGGCAATCGAGAA +TGCGTGAGATTCATTAAAGGTACATGATATTGATGGTGATGATACAGTAA +CCCATTGTCCTGATTTATATTTAAACTCTCTTGGACGACGGTACTCGATG +TATATAATATCTGATGGAAGGATTTCTGCGTTTACAATTTCTAATTTTTT +GTAATATTGCATCAAACCAATTATGCGATCAATTACAAATAACACGATGG +GACCAACAACGTAGTAGCCAAATTTGGGAGACTGTAACAAATATTGAAGA +AACTTCTTGAACCGAACAGTGGCAAAAACTCACATCCAACAACTTTGGAA +GCCCATGAAGAAGAGTAAGTGCGTAAAAGGCAATATTGAGAAGATGTGTG +AGCCGGAATGCGTGATAAGCTCTCTTAATGAAACATGGTAACGCGAAAAC +ATAAATGATGCACATGACAGCGACCAATGCAATTCCTGTCAGACCTGTAA +TTGTGCTGAAGAACCAGTAACTGATTGAAGGAAGGAAGTTGGATCTGAAA +GTTATTTGATGAAAGTTGTTGACAGTCTTGAGGGAGTTTGTTAATGGTGC +GTATTCTAGTAAGATTGAATGCAAAAATGAATTCAAGTAAAGTATTACCC +AAAGAATGCTTCCTGAAAGAGACAAGCAAGACCTTCTTGACTTTGAGTTC +CAACGTGATAGAAATTGACACAATGTCCAACGGTGTGAAGAGTGGCCCAG +AAAGCCGCAAAGAGCGCAACGATCTAGAAATGTTCAGTTATTATAGAGTT +TTTGATACTGGCGGTCATAATAAACAATACTTTGTCCGAAATATTAGTTT +TCCCATTGTTTATCATAGTTTATTTTTTTATTCAAAAATCTGCCCTCTCT +ACAGAACACTTATATGTAAAGTAGACTTAAAAACATCTGAGTTTTTTGAG +CAACAAGAGACCCACACTCGTTCTAAAAAGAAATATATCTGGAGCACAGG +AAAATCTAAACTTAGGAGACACTTTTTTGTCAACAGAACACTACACACAA +TGAAGCAACACTGATCTAATGTTACCTTATCCTCACTTCTATTCATCATT +TATTCTTGCCTTTTATACATCGATATTTGTGTTTTTTGCTTCTTCTCCAA +TGTAGCCATTATTTTTTTTCTCATCTCTTTTTATTTTTGTTCCATTTTTA +TTTATGTAACCACTTGTGATGGTTTGTATCTATTAATGTGTGTTTTTTTT +CTACCTCACGATATCTTACTATCGTAATAAATTTAAAAAAATTAAACACT +TACAAGTTTAGGATAACATCGCTATATCATAAGTTCACATCTGGCTGAGA +AACACCATGTTCGATGCAAAGATCTTTCCTTTCATGCTTGTATTCTATTT +TAAAAAATCTTTTCAAAACATTGGGTATGCACAGTACAGTTATCCTACTT +GCATTAAACCTCCGCATTACTGTACAGATAATAAAATAACAAGTTTTCCC +TTATATCTGAACATGCTAAATATTTTTAAAAACAACTGATTTGACAAAAT +TTACTAATGCAATAATAATATTTAAAAAATGTGTTACACGCTGCCCTAAC +GTTTCTCACCTTGTGGAACGCAATAGCCGAGTCAAATGGAATATACTGCG +CAATGACTGTCTCTCGAAGAAGTGTGATTATGTTTCTACAAACTGTCAGC +AATATCAACGCCATGCAAAATGACAAGGCTCCCGCGGCACCACGAGTAAT +AGCGATTCCAGCTCCCATTACTCGTCGGAGATCCCTGTTTTCCGCCATGT +AACGATAATCTGAAAACTAAATTAACATGAGCTCCCAATTATATGAACTT +ACGCCAAAACCGTTCGAAGAAAAGAACAAGATTGATGGCAACAAAGCAGA +AGACAATGAAAACGTGTTGGCGATAGGTCTCCAAGAACGCTGAAACTTTG +TGGATCAAAGTTAGCGGTGCTGAACTGTTGATGGATCGATCCACGACGGC +AAATGAGTTCAGAGAAGATGTTCTAAAAGTTATAATTGTTATACAGTCAG +AGTCCTCCTGAATACTCACTCTCCAATACTTGGCTGATAGTTCTTTCGAT +TGAACGGCAGTCCAACTGGTTGCTTGTCAGGTATATCCGAGAACAGAGCA +TTGAAATCGTCGTAAGTCAGGTACTTGGCATCATTGCTCACTCCTGCATA +GTGCAACACTTCATTGAATAATCGAAGCTGCACACTTTCAGTAATGTGAA +CTCCAGCGGTTTGATTGAGGGACTTGACGAGTTCCGCGAGATCCTTTCGG +AGTACCTTGTTCTTTCCCTCCAAATCACACATTTTGAATAGAGTTTGCAG +TTTTTGCTTTTGAGGAGCTGGAAATTAAAGTAGGTGGTGCATAACATTTT +TTAAAGGCAGTGTTGGTTTTTCTACTGTTCAGTAAAAGAAAATCTACAAT +TGACTAAATTCTCAATACTTTGATGCCATATTAGTTGTGACCTCCCTGTC +ACACGAGAACTTTCTACAGTGCTCGCAAATTCTATAGGACTCCCCCGAAT +AAATTCAGATTCAGAATAAATAACCTTTAGAACTTTTACTATCTCGAAAT +ATTGCCGACCATAGGACACCGAGTTTCACCTCACGGCTAATGAACCTACA +AAATGTTAATCGGTCGCTAATCCCGGCCTAGTTTAAATTCAGATCAGAAG +AAATAAATTTCTTTTCGCAGGAGACTTCGGCTCTCTTGAACCAAAACCTT +TTATATGTAGTTTTGTACGTACAAAATCAAAGACAGTTCAAAACAACTTT +ACCGCTACTGCTGTTACACGTAATTAAGCTGCATTAAGAAATTATATTTA +TTCAATTAGAACTTATGTTAATGGGAAACACGCCGGAACATTCTAGAACG +TAGTCTTTGTATCACAACAAATTTATGCTGAAATACTCACCATTAACAAA +CTCTCTCAAGACTGTCAAAAACTCATTGAAACTGAGCGAATCCTCATTAT +GTTTTGCAGTCATCGCGAACATTCTCTTCACAAACTCATTATTCGCTTTC +ATTCCCATTGCACTTGCCAGTTCCTCACGAGATATTGTCTCATTTAATAT +ATCATCATTTGATGAGTCAAATGAAGTTTCCGAATCTTGAAGTTCACTAT +CATTGAATGCTTTTGCGTAGGCTTCACGGAAGAACAGGTCCAGTCGGTCT +TGACGGCGTTCTTTTGTGATTGCTTGGGATAAGAGAATCTGAAATAATTT +CCAGATTCAACGTATTTCTTGAAGTAAACTCACAGAATTCTCCTCATCCT +TTATGATAACCTCTGCATTGATTCCACTAGCCGCCTGTCTAATTTGATCC +AAAAATTTAGATAAATCTCTATCAGACGACAATCTTATCACCAAATGATG +ATTATTCTTTTGAGACAGAAGTACAAATGGTCCGTGCATTGCTGATGGAT +TCGGCATAGAGTGGAATAACTCAATCTTCTGTCCAGTTTCAAAACGAATT +TTTCGAAGGATTCCACCGCGTGGCTTCTTTACTGCCAACGTGGTGTTTTC +TATTTCTATCCTGACCTGTCGTATGTACTCTTCTTGAAGCCATTCCAAAG +CATTTACACCATAAATATCTCCCTTCGCGCCACAATCATCATTTGCAAAG +TCAGTAGTTAGGCTGTCACAAGCACTGTTGTGGCCAATAGCAATGCGACG +ATTAACCAAGTATCGGCCAATTCCATAGCAAACTGAAAGCATTTTGGTTT +GTATGGAACTAGATGCAATAGCTCACTTAATGGCACACATGCTAATCCAA +TTAGGGTGAAAACATAAGTGGTGTCATTATCAGTCCAATAAGTTGATTGC +ATAAATGGAACACATGGTTCAAGTCCAGTTGTGTTCACTTGGAATGGTTG +CGGGCACGGGTCACCTTCCTTGAAGAAGAATACCTGGAGTGATAAATACA +TTGCAATGCGGAATTCAAAAGAACATAATAATAAACCATTAAATTTGGAA +AGTAACTTACATCCTTCTGAAGCATTGTCTCATCGATATCGGTGGTTGCT +TTGATAATATCTCGAAGTGTAATACTATGAATCATTTGAACTTCTTCATC +AGTGAATAATCCATTCAATTTATTCTCAAACCAGAATCTATCTCCATCAC +GAATACGGGTGAATTGATCCTTTATGATTTCTTTGAACAACTCTCCAGGC +CCATTTTCACCTCCTTCCAGCATTCTAAAATAATAACGTTTGATTAACGT +AATCCAATATATGTGCACTGAGAAAGAAACAAGTATGCGTAATTTATTCT +GCCTAGATTTGCAAAAAAAACTACCTTGCATGGGTAACGTTTGAAATACC +GAAGGAATTGCACTATAAAACTGAGGTGACATACAACTTCCAGAGTTTAT +GCCTGCTACAAATTTTAGAGTTTGACCAAAAGAAGCCGTTAGAAGTTTAC +TCGTGAGAACTTTTTAACCCTTGAGAAGATTTAGTGAATATTCAAAATTC +CACACTTCTTAAACCAATCAGTTATGTTAAAACGAATTGATTTTCATTGT +TTTTGCTTGAAACTCTTCGAATTCACCGCATTTTAAGTTAAGCATTCCAC +TTTAAACCCAAAACTAACCCTCCTACATAAGCATCCAAATATAAAATATT +GCCTCCATACAACTCTTTCAACTTCTCCACCTTTGCAGTATGCTTCTTGT +AAAAGTCTTCATTCATTGTCTCCCATGTCTTTGGCGCAAGTCCGAATGTT +CTTCTCAATTCATTATACGGTGGTACTCCATTGTCCCTTCCTCTCATTAT +TGATGAAGCAACAACATCCAAACGAGAGAAATGCATTGGTCCGAAGATGT +AATCACGAAGATCTTCAACTACTATGTTATCATCTCGTTCAGCTATCTGG +CTTGCCATTCCTGAAATTACAACCTTTATTATATAACCACTCAAGCATGA +ACTAACCAAGAATAATCTCATCCACACTGTACTCCTTTACAATATCCTGC +GCATTCCACCAATTCTGGCACAATCTCAATGCAGGATATCCACCAACTTC +CGTCCGGAATTCACATTTATTTCCTCGTTTTCTCAGAAGCATTGCTGGTG +GCACAATTGAGTGAGGGAACCTGAAGGCGGCTGCTCCAAAAGCATGCGAG +ATTCCAGGTGGAACATGTGGCATGTATTTGGTGTAGTTTGACAAACGAAC +GTCTTCACCTTAAAAGAAACACTGAGTTTAAGGTATTATAATATAGTTGA +TTAACCTAACAGCCCTGGAACAAAGTCATATGCAATAATCTTCTGCATAG +ATGCAATCACCAAACGACGTGCTGCCTGGAAGATTTGTTCGTCTGTCCAG +TCAGGATGTTCTCGATGGATTTGATTTGCATTGTAGTTATGCCAACGGAA +GAGGATCAGACCAAATGAGAGAAGACCTGGATTCTCATTCACACGCGAGT +CTCCCAACACTTCAAGTTTAAATATTTATACTTTTCTATAAAACTATGAA +CTCACTAAATAATCTATCTGGACTCATCAATCGATGTACTTGTGGCGGAG +CGGGGTTATTCAATGGAATATGTGGGTTGTTAAGTGGTGGATATCCAGGT +ACACCTTCAGCCAACCGCCCTTGTTTGAAAGATCTTAATGAGGACACCCA +TGGCTGGGTGGTACCATAGATGAATGATCCATCAATCCATGAAGTCCGTT +CATTGATTTGTTCTCGAGGTGAGTTGAGCCCATTTCCAGTTGCTTTATCG +TATTTGGCACGTGTAAATGGGATTTCTGTCTTTCCCTCACATTCTTTATC +AAATACATTATCACATAGGGGTACTTGAATTTTAAGTGTCTCTAGTGGAC +AGGATACTCCATTTGATTGCATTATTTCATAAGCAACTACTTGACCTATA +ATTATTATTACTAATTTTTGAGGTAGAGGGCAATAAAAACGAACTGAAAA +ATGCCAATAAAGTCGTGCATCCTCTTGTATTAGGTATACCGGACTCTCCT +TTGAATAGTATATCGGAGAGTTCACGGGCGGACGGAAGTGAGTTATTCAC +TGAATATACACCGTCTGAGTAGTAGGAACGTGCATCTCTATGCAGCCGAC +TTCCTGAAGGAGTAGGTCAATGTAACGTCATTTCTCTTCCGAAAAAACCT +ACCAGCAGAACCCCATTCACTATTCGCCAGATTGTTGTACCATCCGTCGT +ATCTTTGAAATTCCTCATTTTGTTGGATTCCTGAAAAGATAAATGTTACG +AGTTTTTGATTTCCAGGCCACCAAAATCTATATGATGGCCTAGGTTCCTT +ATTGAAATTTCTAGGTCACACATGCTCCCTTCACTACATTTTCACACCGA +TTCGTGCATTTTCTCACTGAACGCGCAAATTTCCGGTCAGTAGCGGAGCA +GAGCAAACATTTGCCCCGCCACCCGAAACAATTATTTTCGTATCATTTTC +GCACCTATATTTATTCTGTTTGTTTGCCTATTTTTTGTACATTTTATCCT +TTTTCTTGTATTGGGCGACGATGGCAAGGTCAGTAAAACAGGAAATTGTT +CCTGATAACTCGGGAATTTCAGGAATTTTTCCAAAATTGGTTGGGAAACC +ACCACTTTGTTGAAATTCGAAAGATACACATTTTGTTATTTAGGTGTAAA +CATGAACACTTTCTTAGGTAAACTATGTGATTTTTATGAAAAAGAACACA +GACACCGGGTGGCACACAGAGCTTATCAGTAAACAGCTATCAGTGATTTT +GAAATTTTGGAGGGGCAGAAATGTTGCGGACAAGGAAAATTTTTGTTTGG +AATTTGAAGTTTTGAAATACGCAATTTGAGAATTTTTGAACATTTTTTTG +TAATTTTTATCCCATTCATTTTTAACCTAACATTTTGAAAATCTAGCACA +GAAAATAGTACAGACACGTTCAATTGAGCTATTCCAAAAATTATCAATAT +AGCGCACTTGGTGTCCATCTTCCTTAACTGTTTGCAATTAGTCTCTCTTT +ACAAGAAATCAGCTACTATTTGAACAGATCTTGAACACATTTTCAAAGAT +CGGCTCAAGATCAAAAGCCTCTTAAACATTTAAGATAGCAGGTCCGCTCT +GTTGACAAATTTTCCACCCGGTCCTCCGAAAGGAATTTTTTTGGTCATCC +AACAACCTAGACCATGACGGCACCGATCATGCAATTTGCCTGCTTATAAA +AAACGATATTTCTAGGCCACCCATTTTTAAATATTTTTTTGCCTAGTCTT +AGCTATGTGGAATAAATTCTAGGTCAGGAGGTTTTCTAGGCCACGTAATT +CCACTCACCTTTCCCTCCAAAAATTGAACTGAACAGTATAGCTATGTACA +GCACATGTTTTGAGCGCATTGCTCGTGCGCCTTAGAGTTTTAGTTGACCT +GAAAATATTTGAATATTTAGAAATTGAAATTTTGAAACGCAATGATTAAA +ACGGTCAAAAAAGATGTTATTTATACATACGCAACATTTCAAAATTGATA +ACAAATATTTACAGGAACACTTTTTTCCAATAATACGGTGACTAAGGGGG +AGTTTACTGATAGTAACAAATTGGAACGGTACAGGAAGAAAATTTAAAAC +CGGCTCTACCCATTTTAGTGCTACCAGCCGACAACCAAAAAAATCAATCA +GCCGCACACCCTGCTTGGAATGTGACAATCACAGAGTTTTGGAATTTTCC +TAATTTCTAAAATTTAATTTTCTAAAAACTGGGGATTTTTTCTGTAATTT +TTTTTTAATTTTAAATTATTTTTTAGGAACATTTTTTTGAAAATAACTTT +TGTTTGAAAAATTGTTCGCTAAAAGAGTATAAATAAGATCAGGCTTCCGA +AAATGTTTCAAAAAATATTTTTTCACGATTCTTGCAACAAAAAAAAACAA +ATACTGAAAAAAAAATTGTAAGAAATTTATTTTGTTGAACTTTTCAAATC +TACATTTACAACAAAAACAGTTCTTTATAATATTTTAAAATCCAAATAGA +TTCCCAGTAGATTTTGTTTAAATATTCGGAAAACGACCAAACTTTCATTT +TTTGAGTTCTTAAATTAAAAAAAAAATTTTAAATTAAAATTTTTGATTTT +CAGTCTAAAAATTTCAAAAAAGAGCTTTTAGTTCTGTAACTTTTGGAATA +AAAATTCAAAAAAAAATTGGGGGAGCCAAAAACTAACGCCTGCTTGAAAC +CTGCCCGCGAGAACTTGCAAATATTCAATTTTCTCATTTTTATTAATTTG +ATGAGAAAAATTTAGAAATAAAAAAAAATTTGCATAAGGCATCGATTGAG +GCGAAAGGCAGGCGGAGGTAATTTTAAGGCCAGGCTGGCGTTTTAACTTA +GGCTTCCATAGACCTAATATTTTCATACTTGTTGAAATTTCAGAGGTTTG +AAAATTGAACAATTTAGGCCCAAAACCTTTGTTCCTACAGTACTACAAAA +ATTCTTTGAAAAATTCCGGTAATAATAATTGGAGGAGGAGGAAAAATATA +AAATGATTCTTCGTCAGACAGAAATAAATTGGTGAAGAGAAAAAAATGAA +TTGAGAGAAAAAGAGAAAGAAAGACGGAGAGCGTCTTCGAAAGAAGGAAT +CCTCCTGCGCGGGATCGAAAAAATAAGCAGCAGCCGGGAGTGAGAGAGTA +CACTGCACTAGAAAAGATGCAGAGAGATTCACAGAAAATCGGGAGAGACC +CCCCGTATGTCGGTCGAGACTGAACACCTAAGACCTACTTCATATTTCGA +ACCGGTTCATTTTCATTGGCATTCGTATTATTATTAGTTTTGACAGGGGC +AGTCGTCGTAGATGCTATTGTTGTCAGTTTGACGTGATGGCCGTGTCATG +GGAAAAATTCGGCCATCAAAAATCGGGGGGTTCCTGCCACCCTTTGTCTT +CTATGGTCGGAGAGGCGTTTTCTAGGCTACTTATTTTGGTAGAGTAGCTG +TTAATTAAAAATTTCATCAATTTGGTAATAATGTGGCTGCAAATCATATT +TAAATTTTCTATACAGTCAGTGCCACCCAATTAAAAATTTTCTGGCAGTG +CCACCCAGAAAAAAAAAATATCCTAGGCCACCAATTTTTAAGATCATTAA +TTTTTTGGCTATCAAATTAAGTAGACCACTAAAATGTTTCGTCATTAAAA +TTTTCTAGGCCACCAATTTTGCTTGACTACTAACCTTCTTGGCCATCACA +ATTTCTAGGCCACTAATTTTTTTTCTAGGCCACTAATTTTCTAGGCCACT +AACAAACTGTTCAGGCCATAAAGTTTTCTAGGTCCCTATTTTTAGGTCTT +CAATTTTTTCAGGCCACCAAAATTCTATTCCAACATTTCTCCGTCACCAA +AGTTTCTAAGGCCTTTAATTTTATAGACGACTTCTTAATTGTAAGTCAAA +TTGTTTCAAAACGATTATTTTCAATACATATCAAAAACCCAAAATTTTTC +AAAAACTTAGTAAACTAAACTAAATTTCCAATACGTGCTTCATTTTTATA +TGTAGGCGTTTAGACACCCCAGTGGGCAGCAGTAATTGTTAATTCTTATG +ATTATCTCATTGGTACACTTTTCTTGTTCAACTTTAATTATTTATTTTTG +AAAAGGATTATCATTGGCATTGGCAAATAAATAAAATTGTTAGTGACGTG +GTGATATTGGACATTTTTTGATTTTAGTTTGCTTTTTGAAATTTCAATTT +TCGGCACTTTGAAATGTTCAAAATGTTCATTTGTATAGCAAATTATTTTA +TTAAAATTTTTTTCAAATTTATTACATTTTCAGTGAAAATTGCACAATTT +TTTAAAACTTTCTAAAAACTGCAAAAAAATAATTAGGTTTAAATAATCAA +ATCAATAATTTTTTAAAGGATTAAAATAAAATTATAGTTATTACAAATTA +TTTAAAAGATGTATACTTAAAAACTACTATTTATGTTTTGATTTTTACTC +GAAAATCCAGAAATTTTAAGTTTTCGAGAAAACTTTTTAAATTTGTTTTT +AAAAGAGGTTTATATAAATATAATGAACCCAAACTTGAAAGTGCGAAAAT +GTACTTAAAAGAGTCCAAAATAAGCAAATATCATCACGAAAAGCTCCGAA +CGTTTTTAAGTTTTTCGAAATGTTCAGTTATAGTTTTGGTAAACTGCCAA +CTTTACGAAAAATTCGGGTTTAATTTTCAATGTTTTTATACAAATATTTA +AAACAGAGTAATAGTATAAAATTTGTAAAAAAAAATTTTTTGGTCGGTAT +TTAAAAATGGTGATAGGTGATAGGGCCATTTTTGACAGTAAAAAAAACTT +TTATAATTGTTTTACATTTTACCAACATAGGAGCTGCCTTAAAAAATATC +TAAAGGCTTGGGGTTTTGCCCTACAGTCCCAAACTACCAAATATAAATGT +AAAATTTTTATAAAAATGTTCAAAATTTTTAATGATTAAAAAAATTTGCG +AAACTTAATCAAAATGCCGAATAATTGTTTCATTCCCGCGATTTTTTAAT +AATAATTTTTTTGTATAATTTTTGCATTTAGTGGCGTCATTTGTTTACAT +GTTGTGTTTTTCGGCAATATTAATAGAGGTTTTCAGAGATTTTCTAGTTT +TTGAAGCACATTTTGCCTTGTTCCCGTAAAGAAAACTTGATAATTGGAAA +GAAATTTGGCAAAACGGCGAAATTGACCAAATTGACCAAAAAAGTTAACA +AACTGTATTTAAAAATAAATTATTAAGCAAAGTAAAAACAAAGAAAACCC +ACATAAATGTCAAAAATGACGTCACTCATTTGAGCTGAAATTCAAAAAAG +AATTCGGTCCTTCTTTTTTTTTTTCAAAACAAATTTTTCTTAAATCATAA +AAAACATATTATAATTTTATGACTTTTCTGACAGTTATATTTGGAATAGT +GGGACATTTACAAGGGAAGTCGAAAAACTGAACTCCGGACTTTGACATGC +TATAGTTATTTTTCGATAAAAGAGTGAAAATAATGATCCCTCCAAAAAAT +TTTGCTGCCGCGGACCAGGTTCAGCAAAGTTATGACGTTTTGAAAGTGCC +GAAAAAAATTCCTTGACCAACCCAAGCAAAAAAAAACTTTCAAATTTTCA +AAAAAAAAATTCTGAAAGTTGATAAAAACTATTGTAACTTATTCAAAAAT +GTGAAAAACGTATATCATGCACGTTTTTTCTCCCCACGGACAAAAAACCA +CATTGCTTGATCAAAATATCTTGAGCAAAATTCTAAAAATTACTTTTTCT +TGTAGATTCATTTATTGGTTTTCTTCAGAGTTATGAGCTAAAACTTGCAT +GGCATATGTTTTTCACTGTTTTGAATAATTTACAAATATTTTTTCCTCTT +TTCAGAATTTTTTTTTTGAAATTTTTGAAATTTTTTAAAAGGGTGTTTCA +GCCACTTTCAAAACGTCATAACTTTGCTGAAGCTGGCCCGCGGCAGCTAA +ATTTTTTTGGAGAGATCGTTATTTTCACTCTTTTATTGAAAAATTACTAT +GACATGTCAAAGTCCGGAGTTCAGTTTTTCGACTTCCCTTGTTAGGGCAA +AAAATACCCACAGACGGTACTCCGCCTAAAATCAAAAAAGTTCCAGCAAC +TCGAGATCACTTTTTTACCATCTCAAATCGTTCGGTTTGTTGGGGGGGGG +GGGGGGGGGGCTTAAAGAAAAACGAAAAAATGAGACACTTGTTAATTTGA +TGGTAACAAGTGAAATGGAGAGAGAAAGAGAGTGTAAATACAAGAAGGGA +ACAAACACTTGAAAATCAAAATTGTCGGAAGGAACTAGGGGGAGGATAAA +ATATTGGAAATTAGGTTTAATAGGATATGTATCTAATCCCGAAGAATATT +ATTAAAATAATCGGTTCAAAGAATCTGAAAAAATCGATAAATGCGTTGTG +TTGTCCTACTTCCGTCCTCTACACAACGTCGTCTTCTTCTTCAGGGCGCA +TTCTTTTGTGTAACAGTGCCCCTTTTCTCTCTTGATGCCACAAAACACTT +TGGCAGTTACGCAATCGAAAGGCGAGGAAAGCAAAACGGGTATCAGATGA +TGATTAAGTGAAACTGGAACTGATAAGTGAGATGGATTGAAATACAGATA +GCCGTAAACTTTTAATAACCTAGAATTTTAGTTATTAAAGGTGTTATGTA +TGTTTCGAAATTTGAAAAGATTTTCTCAATTTTTGAATGAATTATGTTTG +AAGTAATTTAAAATGCCGAATGAAGGGTTTCAATTTTTGTTTTTTTAAAA +AGATTTTTCGTCCGGCCGATTTTTCGCAAAATGTTTTTTAAAATTTGGGT +TTATGTTCTTCTCTTTTTTCATGCCTAAGCCTAAGCTAGGCTTAGGTTTA +GGCTTACTAATCCTAATCCGAAGCATAAGCTTAATCCTAAGCCTAAACCT +CTCCTACTCTTTTAAGCTTAATGAATGCCCTAGCTTCATTTTTTTCATTT +TTCGCAGGTTTTTTTCTCAAAAACTCAAAAGCGATGCTACGAACACCAAA +AATTGGTGGTTCAAAATGTGTGTTTCTATTTTTTTCAAAATTTATTTGAC +TATACAAACCAGCTGACAATTTTCTTCAAAATTCCGTTTTTCTTATCAAA +AATAGTCAATTTTTCATCTAGAAACTTCAAAAAACCGTTACCGTTTCCCT +AAGTTTTGCTATCAGTTCCGTAAATCTTGTACCTTATGTCACATGGCATT +AGAAATATTTCAATTGAACCAATCTTGTTCGCGTGGAGTACAAGTTAAAC +ATTTATGATATGTGGATGGGTGCAATTGCGCTCTATTGAACAAACTATGT +ATCAACAGAACGCGTTAACATTATTTGTACAGGTGGGGAAAAACAGGAAA +AACGACTAGGCAAAACAAAAAGTATATAAGTTTTCTTCTACACGTCTCAT +AAAGGAAATTCAAATTTTTTAGAAAAAGAGCGGGGGGGCGCGATTGCAAA +GGTTTAGACGGCGTTAAGATCTTTGTCGACGAGTGGTTCAGCCGCTGAGA +ATGTGATTGCCGCCGGAATATTGTCCTTCTCGTTGATTGAGTCTTTCGAG +AAGAGCGCCAGAATGACGGGCAGAATGAAGAGACCGTGGAGGAGACCTGG +GAGAATTTTAAAATTTTTGAGTTTCTAGGCCACGAGTGACGTCACTATTA +TGTCCAAGAAGATATTCTAAGGACAAATTTGATGATTGTTCACTACTTTT +TCACTAAAAATCAGCTTGAATCGTAGAAAACAAAGAACTTTCAGAAAATT +TCAGCGCTCACCAATAGCAACGACCAAAAAAACGGTCTTAGCAAAGCACA +CGATTGCATAAGTTGGCACAAAAATCAACGGAAGCATGCATAGGAATGTC +GACAGTCCGGCTTCACACATTGGCATTGCCATTTCTGCCAGACTACTGTA +GACACGTTCTTGTGGAGTGCCACGTGCACGAAAAAAGTTGTAGGCCACGT +GAGCGGTGTAGTCGACCGAGAATCCCGTGGCGAGAAGGACGTCCACTTGG +ATTACCGGGTCCAGGTCGGCACCCCAAAGGGAGAGACCACCGACGAGAAC +TGTAAAAACGATTTTAAAAAAAAGTAACTTTTTCAGATTAAATCCAAGAA +AAAAACATAATTAAAATTTGTTTCAATGGTTTTTTAAAGTGACAATTACT +CATTTTCAGGTTTTAAAAACTTTAACTTGAAAAAAAAAAACATTTTGAGC +TACTTTTGAAAAAGGTTCTATTTTGAATTTCCCATCCAAACTTTTTGAAA +AAATACAAATGATAAAAATAATTGCTTTCTTAAAAAAAAATTGTTAGCAA +CCCTACCGTTCGCTAAAGATCGTTGTCGTTTTTTCTCAGAAAATTTAAAT +ATCCCGCCATATATATTTTCGGAGAATTTCAATTTCTCGAAAAATTGTTT +TCTCTGAAAATTTGAATTTTCCGCCAATAATTTTTCTCAGAAAATTTGAA +AAAAAGGATTAAAAACATGAATTTTCCGCAAAAAATGTTTTCTCAGAGAA +TATGAATGTCCCGCCAAATATATTTTCAGATAATTTTAAAGTCTCGCCAA +AACGTTTTCTTCGAACATTTTAGGATTATGTGTTGAATTTGTAGAATTTT +GATCTTGATTTAAATTTTTTCAGTTTTCAATTGAAAAAAAACTAACCATA +ACAGATACTTGCAATAACAGATGTAATCACAGCAACTGCATTAAAATTTG +CAATAAACACAAAACAAACGATTGCCATACAGACTACCGTAACCGCAATC +GATCCAATTAAGTCAGTGCCAACAGTCAGGATGATACTGAGAATTGCAGA +GTCACAGTCAAAGAGTGTAGCGTTGAATTGACTTTCTTCATGAAGAATTG +TTCGGATATGTTGCATTGCACGTGCTCTTTCTGCCCATTCAGACATTCCT +TTACCGAGAAGTGTCAGACGGAACGCTGTGACAATTGTTCTGAAAATTTG +GAAATTATTCGGATTGAAATTGAAAAGTGTATAGGAATTAAGTTTGAACT +ATTTGAGAAGTTTGTATTTTAAATTTTGCGCTAAGTTGCCTGAAAAAAAA +CTAATTTCGAACTATATACCTACAAATATTTTTCAAAGAGCTCAATATTT +GAATTTCGCGCCTAAAATATTTCGAAAACTTAAAATTTGCGACAAAACTA +TTTAAACTATTTGAGAAATTTGAACTTTTAATTAACATTTGAACTTTGCG +CCAATTTTTTTTTGGAAAATATTAAACTTTTGAAATTCTAGTTGAAAGGA +AATTCAGAAAAATTGAGGATGTAATTTTTTTATATGAAAAACTTTTAGAT +TCGATTTCGCGTCGAAAATTTTTGAATTGTCAAAATTTAAATTTTGACTT +TTGCAAAAAAATGGCGGGAAAGTGAAGTTTCATTAGGAAATTCAAATTTG +ATAGGCGTTTGACTAACTTATTATCATCTCCCATGTGATATTTTACAAGT +GGAGGGTTCCCGATCGCATCCATCCACGTCGGAAGATTATCATAAGATGG +GCGGTATTTTTCTCCAACCAGAGTGGATAAAATATTCATCGTCTTATCAA +AATTCACATATTGCGGAAGAAAAATCAGACTTCGATTGTCTCCACGAATA +CCTGGCACGTGCTCGAGCCGATCCATCATATTGTTGAAAGAGTCGTACTG +AAAATTGAATGGCCTAGTAAGTAAAAATTAGATCATGAACTTACCTCTGC +TTTGATCTCGATATTTGGAGGGTTGTTCACTATTACTGTAATAGGAAAGT +AACGGTTGAAGACTGGCCTGAAAAATGATAAATGCTCAAAAAATACAACT +TTTTTCTTTTAATAACGTAAAAAGTTTTTTAAATCGATAAAATCAATAAA +TCGGCATTTCAACTGTAAACCAAAAATAACAATTATTGATTTTTCTTACA +AAGACCTTAAAAATAAAAAAAAATCAATAAAAATAGCGATTATCTTTCCA +AAAATCTGAAAATCAATTCAAATAATTTTCTTGATCTTTTATGTTAACCT +ATAAATACCAATTATTGATTGTCTGGAAAATCAGTAAAGATTATTGATTT +TTTTGAGAACCTAAAAATCAATAAAACCCAATAAAAAAGTAATGTTTTTT +CAAACGCTTTTTCAAACAAATTCTTGATTTTACTTTCAAAAAACCAACTT +AATATTCGACAACGAATCCACCAGCTTCGAATTTGATGGGAATGCCTTTG +CAGGTTCAAAGGTCGTTTTCATGGTGACTACTCCATAAGTTGTCAACGCA +TACATCCCAAGGAGCACCATAATGCACGCTACACGCCCCTTCACGGAGCA +CACGAAGCTCGAGTACTTGGAGAGCCACGTGTCGGCTCGGGAAATTTTTG +GCTCCGTCTCATTGGCGATGGATTTGTAATCTGGGTCGTTGCAGAGGAAT +ACAATTGGAGCTAGGATCGTATAGGTGAAGATGTAGTCGAGGAGCAAGGC +GAGCGAGGCAGTTAGGCAGAACAAGGACATCTGGAAGAAGAGTTTTATTC +AAGCTAGCCCACGTTTTCAAAAACTGGGCTACGTGCCAAAAAAGGTTACC +TGAGGCGTTGGCGTCAAGAATCCGATTCCAAAAGCAATAATATTGGTAAG +TGAGGTGATTGTAATCGATGGTCCAACATCGACAATCACTTGTTCCAAAC +GGCGAGGTCTGAAAATTGCAATTTCACGATGCTTTTACAATACCCCTACA +GTACTTTTGCAGTTTCTCTACAGCACCCCTGTACAACTACGGTACTTTTA +TTTATTTATATATTTGTTCATCAGGCAACAATCATAATGATACAATTACA +ACAAGGCAGTGAACTTTTACAATACAATGCCCCACCCTCTTTTTCCGTGA +AAACTTACGTGTCAGTAATTGCAATATGATGTTTCCATCTGTGAAGTAGA +ATAAAAGCATCATCAACTCCAATTCCAAGTACCAAAAACGGCGTAACACA +TTGAATAGAGAACGACGGGAAGCCCATCCAACAAATTGCTCCAAACGAGG +CGACAGTTGCAGCCATCGGAGTGAGCAATGATGTGGCAACCAGATAGAAT +GCCATTTTGACAGATGACAGGCGGACAATGACAATGATGACCTGAGTCAG +AAGTAGGAAGAATCCGATTGTCATTAGAGTGGTAGCTTCAATGGCTCCTC +GAATCATTTCACGATTTGCTACTTGGTCACTGAAGATGGTGAACTGAACG +TGCTCGAAGGCAGAGGAGTTTTTGGAAAGTTGGAAGAGCTCGTCGATTGC +GTCCTGGGAAAAGTATAATTAGGAAATGCCGAGTGCTTGCCAGTTTTTTT +TATTATGTAAATTTGCTTCCCAGAATCGCGAAAATTTGATTTCCCGCAGA +AAAGACTGAAATTTAAATTTCTACTAAAAATTGTTCAGATGTTTGTTTTT +AGAAATTTTCTTTAAAGACAAAGATTTGAATTTCCTATTGAAAATAGGTG +AAAATTCAAATTTACTACAAAAAAATTGAGTTTTGTTAAAAACACACTGA +AAAATTGAACTTCCTGATTAAAATTGATTGAAAAATTAGAGGCTCCTAAA +AAATATTGAAAACTCGAAATGTAAATTCAAATTTCCCGTCCAAAAATTGA +CAAAAATATTTGAACTTTCTGCCAGAAACAAGTTAAAAAATCAAAGGTTC +CTCTAATAGTAAAACTGACTCAAAATTAATTGAAAATTGATCAAAAAGCC +GTATGAGTCTTCTAGGCTATTCTCACCTTAAACGCCAATTTTCCTTCTGG +AGTATCAGACCTCGAAAAATACCACAGCACCAAGGATTTGGAGCTGATCC +TCTCGGTCGGTGGTTCTGAAAGTTTCAATATTTTTTCACATTTTTCCAGA +ACTTTTTATAGTTAAAAAAATGAAATTTTAACTGGAAAGTGGCTAAATCA +TAAAAACTATTTTTCAACTTTTCATTTAACATTTCAAAAAAAATCTTACC +AAAATCCGCATTACTCAAGTGAAGTCCCAGGAAAATATCCAGTCCGCTAT +ATGGCATATCCGGATAGGTGAATACACTTGTGGTACCATTGTTTCTCCGC +GTCAAATTCTGAAAATTGCCATGTGGTCGTTGAAGAGTTTTCTAGTCCAC +CACCACCACCACCAAGGGTTTCGGTTTTTCGACTTTTTAGAAAATCGAAA +AATCGAACAATTTTTGAGAAAAACTTACAAAAAGCTGATAATGTTTTTAT +TTTAACTTTTTTCTCCATCATTCATTTTAAGAATATAGATAAATGGGATA +AGACCGTCCAGGTTTCTGTTCAGTTTTCCCTGTTGTTAAATTCATTAAGA +TGTTTGGTCCCCAAAAAACAATGTTTCTTTCATTTTTCGATTTTCAATTT +TTAGATTTTTCGATTTTCACTAAAAACATCGAATAATTGAAAAAATCGAA +AAATGACACCCTAGACCACCACCTACCGCTAACAAGTGAAAGGGGCTGTT +CAAGTCGCAGAGTGGATCGCAATTTTTTCGTCCATCCCCATCTTGGTAGC +CAACTGCCGCCGATAGTCGATTGTCCACAGTATAAATCTGAAATTTTTTA +AATTCCCGCCAGAAAAGTGGGCGTGACGGAGAATCAACTTACGTCTGCCA +GTGCGCCTTCCTGGAGAATACTGGAATTTTCGCGCTTGGCGGTAAAAATA +CAGAATGCTCGTTCCGGATAGATGTCGAGTCCGTAGTATTCCAGCCAGAC +ACGGGTTTCACTGAAAAATGTAAAAAAATTTATAGAAATTTCCGCAAAAA +GCATATATTTAAAATAGGAGTTTTAAAAAAATTAACAAAATAGTTACGAA +CGAAATTTTCATTTGCTTTAAAGTTTTCTTTTTTTTCTTATTTATTTTTG +ATCAAATATTTATTTTGGGTTTTTTAACTTCAAGTGGAATTCCAAAAAAA +TTCAATTTTCTTCCCAGTCAACCAAACATTGACCAAATCACAATAAGAAA +GGGAGAAAGGGAAATCGAATTAGAAAAAGAAAAACATTGCGAAAAAACGT +GTTATTTCTGACTGGTGGAGGAAGGAGGCTTCAAGGTCAGTAGTCACTTA +CTATCCAGCATCTGAATCATCCGGTGAAAAGGCTTTGCGAATATCTGGCT +CTAAACGAATGTGCACAAGTCCTGAACTGAGAACTAGGATGGATAAGATG +CTCACTGAAAATTGTCCAACTTGAAAAATTTCGATTTTTTATGTTTATTA +AATTTTTTCTGGTGTAAAATAGCAGAAAATGCCAAAAAAAAATGTATCAG +ACACAACTTTTTTTCAATTTTTCAAACTTTTCGTTGCAAGACCAAACATA +CAATAACAATTGTCGTTCGAGCGCGTTTTGCCCATGCACCCTTAACGGAG +ATTTTCATCGACTACTCTATAGAGCGCGTTGGTGATTTTGAAAATGAATT +TTTCTTGGAAATTTAAAACTTTTAGAAAGGGATTTTTCTATTTTTTCCAA +AAAATTATATTGAAAAATCAATAAATTAATTATTGATTTTTCAAAACTAT +AGTGATTCTGATTTGAAAAATGAAAAACATTGCTTTTAAAATTATCTGAA +CTCAGTTTTCACGAAAAATCAATAAATCGGTAATCGAAAAACATTCATTT +TTCTATTTTTCTCAATTTTTCGATGTATTTTCCAAGAAAATCGATTAATT +TATCGATTTTTTTTCCAGAATATCTGATCGATTTTTTTATTCATCGGAAA +AGTTCAATTATTTTATTTTTAGGAAATTGTTTTCCAAACTTATATTGGTG +GCCAAGTTTTCCCATTTCGACGACCATAAAGTTACAAAAAACTCACAAAG +TATCGAATAAACCGGATAGGCGGCTAACAGCCGTGCCACGACGGCACTAA +TCACATCCCATCCCATGTGCTGAAAAACTTCGAAATTTTTTTAATATTTC +AGAATTTAAAGCAGCATGCAAACGCGCTCCAACAAAGAAAAATGTTTTTA +AAAAATTGAAATTCAAAACACAAGCACGTGAAGCAGAGAATGCAGAAAAC +AAGACCACTGTCACAAAATTGTAGAAGTGAGGGAGGGAGGGCTATTGCAG +AGGTGACCAACGGGTTTCGGTTTCCAATTTTTCGGACACGGCGCAGAGGA +TTTGAGTCATTATTATTGTGTTTAGAGAGTGTAAGAGAATTAGAGAATTA +GGGATAGATGGGAAACTGTACGTACTTCTGCGAAATGGTGCACTTTTAGG +ATCGGCGGCCTAGAAATTTACATGGTGCTCTAGAAGTCTAATAGATAGCC +TAGATCTTTTTAAGTCCAGAAAATTACTTGGTGGTCTAGAAGTTTTTTTT +GTGACCTAGAAATGCAGTTGGTGGACTATAAATTCACTTGGTGGCCTAGA +AATTCATTTGGTAGCCTAGAAATGTACTTGGTGGCCTAGAAATTAACCTT +GAAGCTTAGAAGTTTGTTTGAAAGCCCAGAAATTCACTTGATTGCCTATG +AATTCACTTGGTGGTCGAGAAATGTTCGTGAAAGCCCAGATATTTACTTG +GTGGTCTAAAACATTTTTCAGTTCAGAAATACATTGGAAAAAATTTCGAT +TGAGAATTATGGCGTGGGATTTCAAGTGGTGACCTAGAAATTTGTCAAGG +GACTGAGGAATTCATTTGGCGTCTGGAAATTTTTTTGGTGACCTAGAAAT +TTATTTGGTGACCTGAAAATTCATTTCATGACCAGTGAATTTACTTGGTG +GTCTAAAAAGTCTCATGGTGCCGGTCTTAAAGTCTCATGTGTTGCAAAAA +TTATACTACAATATTTAACTTTGAAATAAAAATTCAGCGGGTCTAGATTT +GCAAGAAAAATCTGTATTTTCTCAATTTTTTTCAGGTTTTTGATTAGTTA +AAAAAAATCGAAATGATTGTTTAGAACTGCAGAGAAAAAACAATTTTGTA +TCTCCGGAATGCGCTATTCTGGGGAGTCAGATTTACTGGATTTTACTTTT +TTTCCTGCAATTCCAATGCAAAATAAGGTAAAAAAATGAGAAAACTGGCG +GAACACGGCGGTACACACAGGGGCAACTTGAATGTAAGGCAGGTGTTTAG +TAAGTGGACCGAGGGGACGGCAAAAGCTGGTATTTTTCCTGTGTCGATGG +GGGACAGTAGTAGCACACAAGAAACAAGTATTTTGATGATAATACGAATA +AGAAAAAGAAAAAGGCAGCAAAAAAGTGTGTGGCAGACCACCACCGTCCA +TCGGATTAGGGAGCGGAGGCAAACTCGCTCTACCGAACAGAGGGGTGTCC +GTTTCCCCCAAAATCCTCTGAATGTGACGTCATTGTTGGGGCGGCGGGGC +GGCGTCCAAAAATTAGTGATTTTTTTTTTGGTATTTTGGAAAAAAAGGAA +AAAATCGAGAAACATTTTTTTGTGTTTTTGGGGAGTTTGTCATGTGAAAT +TTGAAAATGTTGGGTACAATCAAAAAATTGTTTTGGTTTTTATAATTTTA +ATAGGAATTTTTAAAAATCTGAATCGTTTTTCTTTGAAAAATCAAAAAAC +AACAAAACATCCGCTTTTTTTAAAAATTGTTTGTTAAAAAAATAGAAATC +GGAAAACAAAATTTTCCGATCTTCTCGAAATTCACAAACTCATAAAAAAA +TCGAAATCCCCTTGTACCCCACCCTAGTTCACTGAAGCGCGTTTGCTTTC +TGGCCAAAGGCATGGAGGGAGGCGGGCACCTCCCTTTTGCAAAATTGACC +TAAATAAAGGTGATATGCCAGTGATAAGGAACGGTTTAATGATCCGAAAC +CGCCAAAAGTGCATAGTTTCTGCGTTAAAAAAAGTGACAACTGACATCAC +ACGGAGGCTACAAAAGGGGACTCACAAGTTTGCCGGAGAGGAATGAAAAT +TGGAAATTTGTACGGGTAAGGGGATCAATGTTCAGTGGTGGTCGCGAGGG +AGGACGGAGGGAAAAAGTGGAATTAGAATTGAATGGGAAATTGGTGGCAA +AACGGTAAACACTTCTTATTATCCAAATGCATGAGGGATGGAGAAAACTG +GAAACCTTTGCAGAATTTTGGAGGGGGTGATGAAAAGTATTGGCAACACC +TTGGTGGCCGAGAATTTTAAATTGATGATCTGGACTAAAAATTCAGATGA +TGGCCGAGTTTTTCATATTCGCACCAATACAAATGATGGCCTAGAAAGTT +TGGAGCGACCTAATTTTTCAAATTTACAAATAGATCTTTAATGTTTCATC +GACAAGGGATGTGAACTAGAAATTAAGATGGTGGCCTAGAATTATATTTA +TGGTTCGTCAAGAGGTGGCCTAACTCTTCAAATTCACGAAATATTCTAAA +GTTTAAATTTCCTTTAATTAGTATTGTGGACTAAAAGTCGAGATAATGAC +CGAACTTTTCATAAGCTCTCCACATCAATTGGTGGCCTTGAAATTCCGAC +ATGGCCGAATTTTCGTAAGTCGACCCCAGTGGTGGCCGAGCCAAAAAACA +TAATTCCGCAAACTCTTACATTTTAATATATGAGGGAAGCCAGAAGTGCG +TGCCACATTATTTTTACATTTTTGACTATCGTGCCAAAATTCCAAAATCC +TCAAAAATTTGAATTTCGCGGCACAATTTCGGGTTTTGAATATCTTTTTT +TTAGTGTGATGTAGGGCGTACTTTCCAATTTGCATAAAATAAAAAATCAA +CAAAAAGCCATACATAATGTCAAATGGTCAGTTGCAAAGAAAACAAACCT +GCGGTCAGGATGGCCGAACCAGAAAAATCAATTCAAGCTTTCGATTTCGC +CCGAAAAAAATGAGGATCAGAATAAGAAGGTGTCGCCTATTAGAGATTGG +AGGATGTTGAGAAAAAGAGAATAGTGGAGGAAAATGATGAGGTAGAACGG +AATTGAACATGCAAATAAAATGAATGTATTGGGTAGGAGAGGGAAGGTAA +CACTGTGTGTTCTGTGTTTGGAAAAGGATTGAAGTGGTGGACAAAAGGAC +TCTTGGTTTAATACTACTTGCCTGTTCTGATCAGGTTCAGAGGTATTTCA +GCAATAGCTTTTGTATTTTGCATTTTGTCTTTTTAAGTACTATTATTTAT +CAAATCCTTTTTTGTAACTTTGAATCTGATGTCAAACTTGTGGAAAGTTT +GAACTCGAAGAAATTGCTCAAAAAGTAAAATTTGCGCCCAATTTTTCGAA +TTTTTTTCGTTGTTTCTTTTTAATTTCAAAACACAAATCTTCGCCAAAAC +ATTGTTACAAAATTAACATTTTAATTTTTAAACAGCTGCTAAAGCACTAC +AAATGTGAAATTGATGCCTAAAATTGGAGAAAAATGGAGTTTTTCAAAAA +AAATTGAAATAAGAAAGTTTGGAAACTAAATGTCTATATATGGGGAAAAT +GCTATTTCGAATCTCGCGTTTTTTTAAGTTGGGCATCTCCAAAATATGAA +AAAAATACAAAAAAAAACTCCAAAAGATAATGTTGAATTTTTTCAAAACC +CTCACCAAGAAGTATGTTTTAATAAACAAAAATTTGAATTTCTTTTCGAA +TTTTTTTCAAAATTCGTAATATAAGATACCTGTCACACTTATTTTCAAGC +TAACAGGAAAAAATAAATGTTGAACTTTATTTAATGATAAAAATAGTTTG +TAGAAAATTCGTAAATTTAAAAAAATTCAAATTCTACCAAAATATATTAG +TTTTTCACCTGGAACTAAAAACATTTGAAGAATTTTTTTTAGCTTATATA +ACACTACAACCGCAACACATTTTTGTGAGAACATGTCTAGCCTCTGTCAA +ACAGTTTTCCGGTATCTTCGTGTGTGTGTTTGCTAGTGTTCAATAGATCT +TCCTGTAAAAAATCGAAAATTTCCGATTATATGCTTTTTATGTGTTTGAT +TCTATAAGCGTTTAGATTGTTATCAGATGATTTGACATTATTCGCAACAA +TTTTTTTAAATCCGAATGTTTTTCGAATTTCTATTTTTTATAATTTTCCA +GACAGTTTAAGAGCAAAGTTGAAGTTATTTTTTCCTATCAAAAAATGCAG +GAATAAATACTACCTCAATTTTTGCAAGAAAAAAAATTTACAGTAGTTTT +TTTGCTGGAAACTAAATTCCGCAGAATTTGAAAAATACTGAAAGGGTTTT +TTTCCTTTTTTAATTTGTACTCCCTACAGTACCCCTTATTTTTGAATTAC +AATTTGATGTTCGTACCTAAAATTGACACTTTTTCTCTAGTATATTTCCG +AATTCAATTCATAATTATCAGTTGAAACTAGACCCCACCAAAATCACAAA +TTGATAAGAAAACGTAGAAAGGGGAAAGCACGTGTAGCCTACAACAATAT +TAACTGTTTTCAATTTATTATTTTTATTTTCCAAGGGTTTTATTCTTAAT +TCTATTCACAATTTTTCGAAAAATATTCAGCAAAAAAGTATGCTTTCTGA +CTATATTATCAGTATAAAGTGAGTTTTTTGTGAGCATTAAAAAAATCAAT +TTTTTGTGGCCTAGCTTTGCATTTAGTGGCCTAAAAAATTAAATGCATTT +CTTCCACGTTTTCCGTAAAAAAGACAGTGGAAGAGTTTTTATCCGTACTT +GGGCAGGCTTAATATTTTGTGGCCAAATTTTTCGCGTGGTGCCTAGAAAC +TTTTTTTACGTTTTTTCTCGGAAACTTATTTTTTTCCATGGCCGTGAACG +AGTTTTGTTTAAATTTCAGGTCGCTTTCAATATTTTGATGACTAGTTTTT +TAAAAAATATTTTCCTGCTTTTAGTAATGCTTGGCCAAACGTCTTCACGT +GGTCGAGCTTTTCATTTGATGCCCTAGGATTTTTAACCTGTCGTTCTCTA +GACAATAACTACCGTGGACTCGAAAGTTCTACGCCACGTTTTAAATATTT +GTAGCCTAGAAATCAAAGTCACTTCCCGAGTGGCCTACGTTTCCCTGAGC +ATAACTAGTCCTCGAGAAGTCCAGTCGACCGTTACTGTCTACAAGATTAT +CGGCAAGATAATCCGTTTCGATTTCGATGTCCACTCATTCACTAGTCGTT +TTGTCGTCGTTGCAACAATGTGGAAATTACTAAATTTTGGCATTTTTAAA +GATTTTTAAACACATTTTTTCTGTCGGAAACTGAGAACCGCAGTTTGTAG +AGAAAAAAATTGCATTTACTTTCCTGATTGAAAAATCTTTGTTAGCGTTC +TTCCCCGCTGATTTATGTACCGTTTAAGTTTCAGCTGAGAGACAGCGTGG +GATTGGGGGAGACGCAGACAGCGAGTGAGTTTGCGTCTCCCTCTCCCTCC +CACTCTCTCATTTGCAAGTTTTGAGTTTTCATAACTCGGAGAGTGGAGCT +ATCGAAAAATTCAAAAAAAAAAACAAAAAATTAAAATAAAAAAATTCTAC +AAATTTTTAGTTGCTAATTTTTTGATAGCGCTGCTAGGTTTTGAGATAAT +GACGTTTTTAACTCTACCTCGACTTTTTGTGTGTTACCAAGTTAGTATTC +CTCTGACAAACCGCATGACCTCACCTCATTGTAGCCTCTTTCGAAATCTG +TAAACTTTACAACTCATGTTTCCTAGTGATATCCGCCTCCATCTCAGCTG +TTCTCTTTCCATCTTCTCCTAACTGTTTTCGTCTATTTTAAAAATGAGAT +TATTCGGATTTCTTACTAGTTCCGCTCAAAAACCAATAAATTTATTTTGT +AGAGCGAAAAATTCTACACATTTTTGTAGTCAACAAGTTTTTCAATGCAC +CTCTCCTCGCTGAGTTTTTAAGATCTATAATTTGATAGTAAAAATATTTT +TGACAGTAGCTTTTGTTTTAGAGATAATCAGGATTTCGCAAGCCACGGAC +TTCAGTCTAGCCTCATTTCGAAAGTTGTGAACTCTGCAAGTCATGTTGAC +TAATAGTATCCCTCTCCAGCTGTTCTTTCTCCTCCTTCACTCAAGTGGTG +ATCGTTTTCTCACTTCTCTCGCTTTTTCGCCCCTCTCTGTAGTGTCCAGA +GAGTATCAGTTATGTCTCCAGGACGCCGTGTGACGTCATCGCTGACAAGC +CGTAGCCTACAGCGCCCGGCAGAAGACCTGTTTTGACATGCCATTCGTGA +AAAATTTAGATTAGCCCTCTCACACTCTACATGTCTCCATGTTTTTTCAC +ACAAACTATTTTTTTGAAGAAATATCTTTAAAACTTTGCTTTGTCTATAA +TTGATAGCATAAAAGGTCCAATTTTTTCCGTAGCTCAGCAAATGGCGTAA +CTAAATGGTGTGAAATGTTTGATAGGGGACCGGTTTTTGGCTGAAATCTC +AGAAAAAATGGAGCCCTGCCTGCATAGAGACCCCTTTCGACTGATGACGT +CACAGAGTAAGGTGTCACATCACACCATTCTTATTTCTCTATTTGAGGTC +CAATTAGACTAGGGGGTCAGCGTGCTCTCGATTCCTAGCCGATGACGTCA +CATATTTGCCATTCACAACTTTCTAAACTTCAAACATTTTTTAATTTTTC +CCAAATTTCAGCCGATTCCTAATGGGAGGATTCCTCTCCAAACATCACCA +CCTCACTGCAGTCCACGATGCCACGTGTGGCCCAATAAAAGGTGTCGGAT +ATGAGCAAGTAGGTCATAAATTGAGCTCTCACTAAGTTAAGATGAAAGAA +AATTATTTAGGAAGACGGTTCAGTGGTGGAGGGGTTCCTTGGAATCCCGT +ACGCCGAGCCACCAATTGGAGCCTTGCGCTTTAAGAAGCCAGTTGCTCAT +CGGAAATGGACGGAGCCGCTGGATTGTGTTAGGTTCGGGCCAAGGAGTCC +GCAAAATGATGAGCTACTAGGGCAGGTGAGGCGCCTAAATCAGAGCAAAA +CTCTGTCATTTTTATAATGTTCAGTTTGTGAACACAGTCGGGAAAAGTGA +GGAGCACTGTCTCAGCCTGAACGTATTCACTCCAAAGTGGGAGTCAAATG +AATGGCCCGACGGCTTCCCAGTGATGGTCTTCATCCATGGCGGAGGCTTC +GCTGTTCACTCATCCAGCAACTACGGCTGCGCTTCCATCGCACGAAACCT +TTGCACCAAGGACGTCGTCGTCGTGACCATTAACTACCGCCTCGGCGTCC +TTGGATTCTTCACCACCGGCGACGAAGTGTGCCGTGGAAACCTTGGACTT +TGGGATCAGACTGCCGCACTCGAGTGGGTTCAGGAGAACATTCAAAGCTT +CCGAGGCGACCCTGATAATGTTACAATCTTTGGCCAAAGTGCAGGTGGAG +CATCTGTGGATCTGCTTTGCCTATCGCCGCACTCGAGGGGATTGTTCAAT +CGGGCGATTCCGATGGCAGGGAATGGAGAATGTGATTTTGCGATGCGGAC +TAGTGAGCAGCAGGCTCAGTTGTCGAGGGAGTTTGCGAGATACCTTGGAT +GGGAAGGAGATGGTAAGTGATTGAAATGTCGCTTAGTCGTAAGATGTTAG +AAATTGCATTGTCATCAGAGTTGGATTGAGTTTGTTTAAGCGTAATTTTT +ACTAAAGCCCATCGAAGCAGTTCCGTTGGCTACTTGGCCTACCACACTCA +GCCTGGGAGCCCGTCATAGCAAGGCTCAAGACATTTCCTATGTAGTTATT +TCTATTGCAAATTTAATACGGTGAATTTTCCAAAAATCTTGACGTATCCG +AATGAAAAACCAGTGCAAACTTGTTGAGTAATATCCCTAATTACTTCATT +CCAGACAACGACAGTGAAGACCTTCTCCAATTCATCGACCAGCAACCCCT +TTACAAGATCGAGATGGGCATAAACCCGAAAAGAGGATTCAAGCATTCCC +AGGCTGGAAGCTTGTATTTTGTGCCAAACTTTGACGGCGACTTCTTCCCG +AAACCACTTAACCAGCTTAGAAAAGAAGCTCCAAAGATGCAAATAATGAC +AGGAACCACCAAGTACGAGGGTCTATTTTTCAGTGAGTTTAAACTTCTGA +AAATTTTCTGAAAATTTTCTATTATAGTCGCCCTCGGTGCCTTATCGAAA +AATCCGGAAGGAATTAAAAAGTTCATGGGAAGAATCTTTAAAGAATGCGA +CTATGGAGAGCGTGCTGACGATGTGCTGCAGATGGTCTACGACTTTTATT +TCAAGGGGGTACATCCCAAGGATCATGAGAAGAATATGCATCAGATTGTG +AAGGTAGGTGGCATAGAGCCTACAGGGCCATCAGCCAGTGTTTCAGTTCA +TTGGAGACTATTCTATCAACTATGGTACATACCGTCTGGCTAACATTATG +ACGGATCTTCAACACGATGTATACTTCTATCAGTTTGACTATCATAACTC +TGCAGGATTCGGAGTGTTCCGGTGGCTTCTTCCGTTTTTGGGTGAGTTTA +GGCAGGATGGGTCAGATTTTCTAAGCTCTGGGCCACATGCTTTTTGGTAG +ATCAGTTTTAGAAATTCTAAGCTAGAAATGTATGAGTATGGTAAAACTTT +TAAATCAGATTGAAAAGAAATTTTCCAAATTATAGGACTTTGTAAAAACT +CCGATTGCTTAGGCTACTTCATCCGTACGACCGTAACCTGTCACTAACGA +TTCTAATAGTGTACCTTATCTGAACAGTATTGCCGAACTAATTGAAATTA +ATCCTGAGACTCCGAGATTACCTAGCGGTCCCTATCGCTGTGGCCCAGTG +CCCATTTGGCTCAATATCCAATTTTCAGGCTCCACACATTGCACGGAAAT +GAGATATGTGCTCGGCAAAGGAATAATCTCGAAATTCCGACCAAATGATA +ATGATAAGAAGATGCTTCACGTTATGACAACTTATTTTACAAATTTTGCA +AAATATGGGTACTTTTTTTGTGAACACTTTCGAAAAAAATTAAGCTGCTT +TCAGAAACCCTAATGGAGAAAACCAGGAGACTGGAGAATGGCAAAAGCAC +GACTCGGCACACCCGTTCCGCCATTTCAAGATTGATCTGGACGATTCTGA +AATGGTTGAGGACTATCAGGAACGGAGAGCCGAGCTATGGGATAAACTGA +GAGCATTAAATGTTAGCAGGGCTCAGATGTGAAATTGCTGTTATTTTTAC +TTGATTGATTTTACGGGTTTATAAATATTTTTGATTTGTTATATATTTAT +TTTTTCTGAGTTACATACATACATCAAGAAACATTTCAATATCTTGCAAT +CTAACTGAATTTTATTTTATAATCAGTTATTTTTATTAATCCTGTACTAT +GCCAAAAAATCTAACTTGTTTGAATCAATAACTCAACTACCAATCCTTAA +CACAAAAACCAACACGGTTCTACCCAAACGATGTATAAGCTGGCACCATT +ATAAATTATCCGCATACTTTTTCAAACAAAGGAAAAAGTCGGAAGCTTTC +CATACATTCAACTCTATTATACTTTCCCCTTTCCATGGATGTGCTTTTTG +TGTGAAACCAAAACTTAACCGCCAGAAAGCTTTTTACATAATCTATAACA +GAGGTGGGCGGCAATTGCCGTTCGGCGAACATTCTGATTTTTTGGAAATT +TTCATTTTTGGCAAATTGCCGATCTGCCGTTTGCCGGATATCAATTTGCC +GGAAGTGTTTAGAGGGTTCTTTTTACGACGGAAACACTTAAAACTGTGCC +TTTTTGAAAATATTTAATTTTTATTCTTTTTTTCGGCAAATTTGCCGGTT +TGCCCAATTTGGCAATTCGCCGGAAATTTCAATTTCGGCAGTCTGCCAAT +TTGCCAATTTTCAGAAAAAAAATTTCGACGCCCATCCCTGATGCATACCG +TTTTTGACTCAATTTAAATAGTACACAACCTTCCACATTAATGTAGTAAC +GGTAAGGTTGCCTAATTTGTTTGAACTTTGAAAGCCGCGCACAGCACCTA +CAGTAATCAATCTCCTTAAAGAGTGTTTTCGCAATACCAAACATAGGAGT +TTGTAGAAACATGAAACCGAAGGACAACACATTTTGAAACAGGTGTCACA +TTTATGTATCTGCACTTGATAGCTATGCAGTCAATGAAGCATAACGAGAG +ACATGTGTTCATTCCCGAATGGAGTCAATGAGTCATAAGGTTCTTCGTTC +TCATTTTATTATTACAGGCTTGGGTCCCACATTGATTCATATTATACTTA +CTTTTCAATTCATTGTTTTTTTGTGATTTTTTTTCATTAGAATAGTATTA +TAATAATATTCTTCTAATAATGACAATCGAAATAGTTGCAAGTAGAATAA +CGATTTGTACAGTGATGCCCATAATTGTGCATGTTTATAATGTTCTAATA +TTTCAACGCAATCATGACACAAGCTCTATAGCATCTCGTGAGTTCATTTT +TGGTGAGGTTTCAAAAAAAAAATCCTCTTGCTCAAGAGCCATACGTTTTC +AGTTAAAAATTTCTTTAATAAAATATAATGCTCATTTCTCGAACGTTACC +TGTTAATGTTAACTGCGCCTACCATGTCCGTTCAATCCCACTAGTCAGTT +ATTCTTTGCAATTAAATGATAAACTTTAATGTCTAAATCTCTTCATGGAT +ATCTCCATCTCAACCATCCATCGTTTTTTATCAACCATCATCATTCTCGT +GTTCTCGTTCTCGGGGTCGCCTCCGCTTCTTCATCATCATCACCACGACC +ATTCAAATCGTTCATCTTCCTAATCGACATATTCCCGCATTAAATTTCTC +TGTTGGGTATCAATGACTACCCAAAAACGGTCGCATTTTCTGAAAAAATG +GGAAAATTGATTCCTAATTTATTGGTTTTGACCGGTTGTAACTAACCCCC +ACACACTGTTTTTTGATACTACCTACGTCTATTTTCGGCTAATTTTAAGA +AGTCAATGATTCCTAATTGTCTTTTTTGTTATTTTCTGTGAAGTGAAGTA +TCAAGATAAAATCAGTTTGCGTCTCGCTTCGATTTTCTCATTTTTCGAAT +GCTATTCCCCCGTAGCAATATGGTTTTGTTATTACACATTTGCACTTTTT +TGCTACCCTCCCAATTCTGACTCACACTTGCATTCCGGAGACTGCCTGAG +AGTTAAAACTAAACATAATTGTTATATAATAAATGTCAAAGTTTTTCGCA +AGCTGAAAATTTTTGCCAGCTGAATTATTCCTCCTTCAAAGGAGCATAAG +TCTAAAAATGCTAGAGTTATTGGAAAGTTGCTAATTACAAGTTATAGAGT +TTAAAATTTTGAGTAGTTTCTCGATTTACAATTTTTGAAGCTTTATTCCT +TGCTCAAACTCGAGCAATGAGAGGCAGTGGGGACTCCCCCACTGGGGAGG +GGAGAACCTTGTCAACCCACTAGGGAGGGGAGAACCTTGTCAACTTTAAA +GCCACGTCCCTCTGCAGGGAAAAGATCTATCAAAAGGTTATCAACTAACA +AAATGTGCAAAATTATACGCTCTACATTTTTGTAGTTAACCGTTTTCTGT +TATCACAGTTAGTTTTTGTGAAACCATCAAATCTGTAGTTTCTCCATTAT +TATTATCACCTAACAGTATTATTATCAGCTGATAAGTATCCTTTCTTTTT +GGATAATCCCCTTGTAAAGGAATATACCATTGTCTTCATCTCCTTTTTCT +GTGTCTACCCTTATCTTTGGGTCGCCATCTGCCTCTCCTCCCCGCATATC +CCCTCTTATATCTTCAGAAATTCAGGTTCCAAGGAATATTCCGAAAAATT +TTGTTTGGAAGGTGACATGTCCGATCAATCGCTGGTCCAGAAGGTTCCAC +TACTTGGCTCGTGTTCTCAGGTGAGTATGCGCCTTTAAACATTGAGGGTA +CTGTATGCACCATAACGCAAAATTGCAGATTTTATAGTGGGGCAGGTTTG +TTCTACAGATAACCGTAAAATTTTTATGTCCCTTTAAAAGAAACTCGTCC +GAAAAATTGATAATTTACGAGGAAAACAACTTTTAAAAATGTTTTTTTAC +TTCAAACGATGCTCTATTGTGCCGAATCGCTTGTTGTTTAGCGGCGTTTC +TAAATATTTATTTACTTATCGAAAATCTTTTTTTTCATATTTATAACTGT +TTAAACAAAATCGATAATATTTATTTTTTCTGAAACTATTCTGAAAATTT +TGAATTTCCCGCCAAAAAAATTTCGGAAAATTTTGATTTTTTTTGGCATA +TTATTTTCTGATAGAATTTGAATTTTTGGCCAAAGAAATTCTGAAAGTTT +TGAATTTCCCGCAAAAATTGTATTTCTGAGAGAATTTGAATTTCTTGCAA +TTTTTCTGTGAAAAAACAATTGAAAATAAACGTGGTCGCCAATACGAGAA +AACTCGGCCACCGATTTTTTTTTCGCAAATTTTGCATTCGTTGCTGCATG +ACTGCCAAAAATTGGCGGCCGAGTTTTTTTATTCTACAGGGTTAAATTTC +ACAGATGGCCGAGTTTTCTCAGTTTCACGGTCACAGGCTTCTAGTTTATT +ATATTCATCTCCGCGGCGCGCGCACCGTACTTTCTCCTTCTCTACTCTCT +CGTTGTTCAATCCCTCTCTCGGATCCAACCAACGCCCTCTGCGATTGTGT +CATCATCGATTTGTTATCTCAAATTCCGTTTGGCTTTTCAATAACCGTGC +CTTATTGTCTCAGGAATTTCAAATTTCCGGTCAACAATTCGGATTATTAA +CCATTTCGAAAAAAATCCTTTAAATTATTTTTATTATAATGAAAAACGTG +CGATTTTTTCTCGCTGGTGGGTCCTGGCACGACTGAAACTTTTCAGCTAC +TGTACCCAGCATTGCTTTAGAATTTAAATTTTTGCCGGCAAATTAGAACT +GCAAGATTTTTTTGTTAGTTTTTAATTATAATTGTTTGGGAAAAATTACA +AACTTTAAAAAAACCTACCGTACTCCCAAGTACTGCAGCCAAGAAATAAT +TTGTTTCGTGCCAAGACCCATTTTTCCTTTAAGAACTAGAAACCTCCCTT +GTTTACAACTGTACTTAAAATCGAATTAGTTTGTTCGCTAAACATGTTTT +TCGCGCGAAATTTCGCTTTACTTTCGAAAAATGTTTTTCTTCATTTTCGT +CGGCATGCGCCTTTAAACTACTGTTTTTGAAACTTTCGAATGATAGAAAA +AATTAGAAAATATTTTTAATTTTCGCTTTCGCCGGAATATTCGAAAATTC +GAAAAAAAAACTAAATTTCATGGATTCTCAGTTAAAATTCACGGAAATTA +AAACTTGCGTAGCGAGACCTCAACGATCACATTACCTATGCGCCTTTAAA +GCTACCGTAACCCTATTTTTAATTAAAAAAAAAAGAATTTCCTTTTAAAA +TCGCCGGATTTTCTCCGTCTCTTTTCCCATCTTTTTTCATCTTCGGTACC +AATCATATGTAGGTGAGAATATCTCTGCACCCTCCAACGAACCCAACACT +ATATCGCTGCGCACCGTCCTTCAACGATGTCATCTTCCTCCTACTTCGAC +CCCCTCAATTTATTTCCGATTTCCTCTGATTTATTGATCTTTTTTCCTCT +TTTTCATCATTTCTACGTTCTATAAAGCGAGGTTTATAGCCAAATGGACT +CATTCAAACGATTACAATCCAAAGTTTTCACGTCAGTGAGCTCACTGCGA +AAATCTGAAGAGAATGTGAGTAAAAAGCGGCGGAAGACGTAAAACATCTG +AAATATTTTTAAAAAAATTTGTTAAAAGCCCCACATTTTCGAGATAAATC +TAAATTTTCAAGTTTGTAGCAGCAAATTAAGCAAAGATATATATTTCAGG +CCGACGAGCAGACATACAACACAAAACGATGCTTCATCCACGTCAATCAC +TTATCGATCGCGATCGCCGTCGTCGAGCTCTCGATTTTAATTTATCAGGT +AATTTGTTTGCCTGCAACTCAGACCAACGCTTAGTCGTCTAATTAGATTT +CAACCATATTTGATTAGAAACTGAGCAAATATAATGGGGACATAATTAAT +AGGCTTATGGATTTTATTTGATTTGACAATATGGATTTTCTAACCAAATA +TGGTTAAAATCTAATCCAATATGGAGCAAATCTAATCAAATATGGCTGAG +ATCGTGCCATATTGGATTAGATTTCGACCATATTGGATTAGATTTGTTTT +CTAATGAAATATGGAGAAAATCTAATCTAACATGGATTTCTAATCTAATA +TGGAGCAAATATAATGCAATATGGATTTTCTAAAAAATATGGTTGAAATC +TCGTTGAATATGGTTGAAATCTAATTAGACGACTAAGCGTTTATCTGAGA +AACTGGAAAAACCTAAAAAAAATCTGAAAATTTTCAGTTTTGTGTGGAAA +AAATCAATGAAAAACTCAATCCTACAGTAATTTAAAAATTCTTTTTCACT +AAAAAAATCAGTTCCCATTGAAAAAACAAACTGAAAATCAATTATTTCAG +CTCTCAAATGGCAGTTGGGGCAATTCGGAGCACTCGACCGTCTTCACAAT +CGCCGCCTTGCTCTTTCTGCTCGTCATTTTTCTGCTTTTCGTCGCGATTT +TCTACCAAATCGGAAATCTTCTCATCCCGCACATCGTCATGCAGATTCTG +CTCGTTTTGTGCTTCCTGGGACTCACATGTGCGACGCTTTATGCGCTTTT +CCACGGTGCCACGTTTCAGTTGCTCGTCGTGGTTACAAATCCGCAAATCG +CCGCTGATTCCATGGTAAGTGTGGTAAAAAATGAAATTTGTTTCCTTGGG +AAATTCCTTCTTTTTCCTCAATTTTTCCTCATTTTTTTGCTTCCAAAAAA +AAGACCCATTTTGCGAAAAAAATTTTTTTTTTTCATTAATATATGTATTT +TAAATTTTCCAGACAATTCTCCCGGCGCCAATGATCACAACAAACGTGGT +TTCCGGATTCCTAGTGGGCCTTCTCGTCATTTTCGCAGTCTCCTACTTGC +TCATCGCTGTGCTGAATGTGAGCTTTTCCCATGGCGAGACCCATGTAACT +TTAATTTTTGCAGACCTGGTGCATGTACGTCGTCATCGATAGTTACCAGC +TTTTGAAGAGCCAGAAACTTCAATCTCGTACTCCGTCGGTCGAGGAGTAT +TGTGCGCCGAAGACGATTCAACTCTCACTATACCCCAATCAAATCGTTCA +GGCCACCGATTTTTAGTCTATATTTTAGACCATTTGTTGAATTTTTCTCG +AAAAAATCCGGTAAATTTTTGTGTATATATGTCCCGAAATTCTTCACTTT +TATCACATGTGTCAGGCTTTAATTTTTTCATACTTTATTTTCGAATTCTG +TACAAAAATCTTTGATATATTCTATTTATTCTTCTCAATTAATAATCACA +CGATCCATCTTTCCACCCGTCTCGCCACCTTTGCATCACATCTCGACACT +CGAACTTTGGTTTTCCGGTTTTTTCGTTGACTTTATTGTGTAGCTGGCAC +ATCCATAGCGCAAACGCCTCGCGGCTTTCCACTTTTGGCGGCGATTCTTT +CAGATCTTTTCGGAGATCTTTGGCGCAGAAATCGCAGGGATATGTCTGAA +AAATTTAGGCGTTTTTGTGGGAATCTTGAGCTTATTAGCTAACTTTTCCT +AGAATAGACATGAAACTTCTGGCTCGATCCTTATCCTCGTCGGTAGGCTT +TTCCGGGTAGTAAACCGACATTGTGTGTAGCAAATTCCACGTGGATCTGC +CTGAAATTTGAAATTTTTGGTGGAAATTTGAATTAAAAGCGAACCGAGCT +CATCCTTGTCTACTGGACATCCGTGCAACTTCGCTCCTGTGCTCGTGCTC +GGCGTCGTCTCGGATTCTGCAAAGAACTAAAATAATTGAGATTTTTTGCT +CTGGAAATCTAGGGAAATCACCTTTTTTCTTTATTTTTTCAGCCATTTCT +CTGCCTTTTTTCATCATATCCTCTACACTGACGCATGCACGACACGGCTT +TCCATCTGGACCGATTTCCATTCTGAAATTAATTTTTAAACTTGGTTTTC +GTTTCGTCAGTTTTAGAAGCCGTTTTAGGGCGTAAACTTATGAAACTATA +CATAGTCTACGACAAATCTATAGAAATGCATTAAAAATAGGTCAAAAATG +TGAGAAATTGCGCAAAAAATGATGCAAAAACGAAAAAAAATTAACTAAAA +ACAAGAAATGCAAGTGCGCTCCACTGACACGCGTTAGAGAGCGTAGACGC +AGAGAAATCTCGAGATGAGGGAAATCGTGGCGCGCGGGAATCAAGCTTTT +TTGAATTGTCTTGATTTTTAAAGCCCAAAAAATAGATTTTTGGCGGATTT +TTGGTGGATTTTCTTGCTTTGCAATGTAAAAATTAATTATTTGTTGCATC +TAATCACTTTTAACAGAGATTTACAATTTTTTTAGTGGTCTGAACCTGGG +AAAATCCTTCAAAAATCAATTTTCCCTACAAAAATTCGTTATTTACCGCA +TTTTCCGCCCAGCTGATCAAAAATTCTTCTATTTTCTCAATTTATTGATA +ACACTCGCCCACCTCAAGCATTTTATCACCTGAAACTCGTCACACTTTAC +AATTTGTTTGCAGAAATATGCGAAAATGAGCGTTACTCAGCTAAAAAATC +TGAAACACGCGATAGCTCAGCTTCTGGAGTGGGATGGAACGAAAACGGCG +AGAAAGGCAAGTTTTTGGTGGAATTTATCGAAAAAACCTGATTTGTTTTT +TTGCAGAAAATCGTCGACGAGGTGGTTCTTCTCTATCACGCTCTGGGAGC +GGAGGCATTAAGTGAGGATAACCAGGAGATATATGATTTGTATGATTTAT +CTGCACGTATATTCAACTTAGCTAAGAAAGTTAGTTTTTTTTTTGTTAAA +AAGTCGTAAACAACAAGTAAATTTAAAAATAAGCATTAAAAAATATTGAG +GCTTATGAGAGCAAAAAATTCCAGGAAATCGAAGAAGCCAACCAACAATT +CGAAAAAGAACGAAAAAAGGGCACAAGACGAAGCGAAAAACCAGTCCCAA +CTCCACTTTTCGAGCTATCAATACAACATTTAAAACGTTGCTGTCAGCAG +GGAATCGATCACAATCAGGTGCCATGGATAGCATATTGTCTGAAACTACT +GGAATTTCCAATCACAATCACCGAAAAATCGATCGAAAACGAGATTTCCA +ACGTGCTCCTATTGAGCTCCAACGCCTCACAGCTCCATTGGGCCGAGCAT +GCTCATTTGAGCAGCTTATGGAAATGGATTTGGAGCCGTGTCGAGACCGC +CGATATTGGGGCACTCGCCATGAGAAATTATATGGAATTGGCGGCGAACT +TGCTGGAAAACGTGGATTACGTGGTTTTCGAGAAGTCGCCTATTGACCTG +ATGGCGAAAGTGATGGGAACACTGAAGAAAAGTGTGGAAATGGGAAATCC +CAAAGAATAGTGCGTGGAAAACGAGTTCTTGAAATAATGTTCAAAAAATT +CAAATTGGCTTGGACTTAGGCTTAAGCTTAAGCTTAGATTTAGGCTTAGG +CATAGGCTTACAAGGGAAGTATTTCAACTTATTCCCGGACTTCAGAATGA +AACCTATGTCATTTCGAAGAAGACTCTTTAAGTAGGTCCCTACAAAAAAT +TTAGCGGCAGAGGAGCACTTTGACCTGGAAAAAAGTCGCTCTGAAAACTT +TTCAGTGCATTTTTTCACGGCTTTCAAGGCCCGTAAAATGGCTCCAAATG +CGAATTTTTGTAGATTTTTGAAGATTTCTAGGATGCTCAGGGGCCGGAGA +GCACTCGGAATTTTTTTTGGAATTTTTTAAAATTTTCGAAAAAATCTGAA +ATTTTTTGAATAAGTCCATGGCAAAATTTTTCAGTGAATATATATATATA +TTTTTTTTAAATTTCAGAAAAGTTGTCAGTTATTTAAGATCAAAATTGGG +TGTTTTTACACAATGAAAATTTTTTTGAGATTTTTCGGAACTGCAAAAAC +CAATTTTGGAAAAAAATCGGCACCCGGAGTCGAACCCCAGTCTTAAAAAA +TATTAATCAAACTCGCTAACCACTCGGTCATTCAACGACAATTTTTCTTG +TAAATGAAATGGATGACAAAAAAACGTGACTCTTTGATATCTACTTAAAT +ATATAGGGACGTGAGAAATAGCTTTGGAAAAGGAGAGGACGCAGAGAAAA +ATTTTCAGAGCGACTTTTTTCCAGTTCAAAGTGCTCCTCTGCCGCTAAAT +TTTTTTGTAGGGGCCTACTTAAAGGGTCTTCTTCGAAATGACATAGGTTT +CATTCAGAAATCCGGGATTAAGTTGAAATACTTCCCTTGTTAGGCTCACT +AGGGAATGACCAGAATAAATGGAGCGATATTCAAAAAAAATATATTGTAT +CGGAAAGCTGGCATTCTCTACTATAAGAATATGACTGAAATTTTTGCCCG +TTCGGGCTGGAAATCTGAAATTTTTACGTCTGAAATTCTACACTGAAATC +AGTGCATTTCCTATGGTTAACAGTGGATTTTTGTCTCTGGCGCCAACAGA +AGTCTCACCACAATGGTGGAAGGGCGAAAACATCGGTTCGGTGGTCGAGT +GGTGAACGCGTTCGCCTCTTGAGCAGAAGTTTGTGGGTTCGGTTCCCATA +CATGGTTTAACTTTTGGCCTTTTTTATACAAAATTTTCAGAACGGGAAAC +AAGTATTTAGAACATTTTTTTGAGGGTTTTACATAATTTTTTTGCTTTTT +AATTGAACCATAATTACCCTGGAAACTTTTCAGAAATTTTAATTTTTTTC +GAAAATTGTCACTTTTTTCTCCACCAAACCCATGAGAAAATTTGATCGAA +AAATTTTTTTTTGAAATTTTTTTAAAAATGCATGAAATATTTTAGAGTGT +CACAAATAACCTATTTTTCATTATTTTCAATGACCGAATCATTGATTCTG +ATGCCTTATCAAGACGTTTTACCAAATCGATATTGGCAAAACATCTTGTT +TTTGAGGCTCCATATCTCTGCAGGAAAAAATCGCACTAAAAAGTGATCAA +CTAGAAACTTGTTAAACACAATGTGATCTAAAACATTTCAGTTGAACACT +TTTTTGTAAAAAATTTCGTTGCCGAGTTGGAACCAATTGATTTGAGCTTC +ATTATTTTTGAATATTCTAAATAGTTAAAGATCTATATCTTGGCAACGAA +ATTTTTTACAAAAAAGTGTTCAACTGAAAAGTTTCAGATTACATTGTGTT +TAACAAGTTTTCAGTTAATCACTTTTTAGTGCGATTTTTTCCTGCAGAGA +TATGGAGCCTCAAAAACAAGATGTTTTGCCAATATCGATTTGGTAAAACT +TTATTTTAATTTTTTTTTTTCATATTATTGTTTTTATATTGGGTCAAAAC +GTATTTGCTCTGCTAAATAGTTAGCTGGCGCTAATTTTTAATTTTATTAT +TTTCGTATTTTGATAAATTTCACTGAAATTGTCACTTTAATAGTTTACAT +TCTATTGGTTTTCTTTTATTCAATGTCTTTTGATATCGGAAACGTGATGT +TTGCCTTGCATCGTCATTTTCCCAATCCCCTCCCATCAAATCCCTCGATT +TCGAGACATGAGCAGAAGGTGGCAGATCATTTGGCAAGCATTATAAAGGA +ATCTTATTATGGAGATACCAGTGTGGAAGAATCCGAATATTTGGTTCAAG +AAGAATATGGCGATTGGGAGCCAGAAGTTTTTGAGGTATGTTGGTTTCGA +CACTATTAATTGTCATTTAAAAAACCTTTCAGCCGTATGACTGTGTGTTG +CCGGATCAAGATGATGTATTGTTCGGTGAGAAAGCTGTATCAAGGGAAGA +GTTGGATAAAGCGATCACCTTCTACCGTAGCGGAAAAATTGGAAGTAGGC +CGATAACGACTATGCACCATAGTTATCGCTGGATACGAACCGATGCTCAT +ATGAACAAACTGCGCAAGTACGAGAAAGATAAAAAAGCGTTCCAAGAAAG +CGTGCGGGTTCGTTTGGCACAGCTTACACAACGGCTTTATGAGGAGGTCA +AGGAGAAACTTGATAATGGTAACGAACATTAATATAACAAGTTTTAATAT +AAATATTTCAGGAGTCAATTTGCATGATTCTGATTTGATGGCTATGGCAC +TGGAAATCAACACCCGCGAAATGAAGTTGCAAAAACAAAACGAAATGAAG +TTGCAAAAAGCGTCACAATCCTGGATAACCCGATGGAAGCAAAGCCACAG +GTATGTAATAAATTGCGGAGTATGTTAGATATATATATATATATATATAT +AATTTCAGAATCGTGAGTCGGAGAGTCACCAAGTTTGTCACACGGAAGTG +CCTGATCAATAAAGACGCTATCAAAAAAAACGCGGATGATTTTGTCAAGA +ATGCCAGAACAGAGATCTCCAACTATCACCCGTCGATGGTCTTCAATTGT +GACCAAACCGGAATTCAAAAGGAGCTGTATCCAGCCCGGTCTTTAGCCTT +TATGGGCGAAAAAACAGTCGAGAGGTTGGCGCAATCGAAATCGTCGCTGA +CCCACTCGTTTACGTTTCTCCCGATGATTTTCCTCGATGGCTCAATGGGA +CCCAAGGCGTTTATGGTAATCGCTGAACCAAAAGGCCAGTTTCCTCCGTC +TCGTCCAATTCCAAACTGCCCAAATTTGGAAGTGCGGGCTGGATACAAGA +CACACATCATGACGAAGCAATTGATGTGCGATTTTTTCGAAAGTTGTGTC +TTCATTCCGTCTGTACCGAAAAAACTGTACATCATGCTGGACAGTTGGCC +AGCGTTCAAGGACCATACAACGATCAAGAACTTGGTTCCCAATGGTCATG +ATGTCGTCATTCGCAACATTCCAGAGCACACAACTGGAATGATCCAACCG +TTGGATGTCTATTGGAATGCGCCATGGAAGGTATGAATTTAAATCTTTAT +AACTTTTTGATAGTATTTTCCAGAGCTTGATCAAGAAGTTCACAGCATAT +GCCCTTCGAACCCAGACGGATTACGTCATCGCACAGAGGAACAACGCAAT +TTGCATGGTATCTGTGTTGTATCACCAGATCTCGGCAGAGCACTTCCGAC +CGTTTTTACAGCATTGTTGGAAGAAAGCTGGATACGTGGGTGCTGCGAAT +ACTTCATCAACACCATTTTTGACTCCAGCTCAATATTGCATTGATCATGG +TGACACAGTGATTTGCTATCACACTGGATGTAACCATCTCGGATTCATCC +GATGCGCAAGATGCAAGATGTTTGTTTGTTTTAATCATTTTGTTGTGTCA +AAACAACATCTTTGTTCATCTCCTTGAATAATTAATAAATTCATGATTGC +ATTACATTCAAGTTTCGCTATGAATAAAATTGTTTATATGAGGGGATCCA +TGGTTGTAGTGGTCCATGGTTAAGTTTAAATTTTAGGGGAAAAGATGTTG +AATGAATCATTATGGTGTTGGTCGACACATTGATCATGCTGATATCGATC +AGAAAAATTAAAATTTGGGCTTTTATATAGTTTCGAAAAAACACGTTTAA +ATTTTTGAAAAACTTTACGTTAGGGTGGAATTGAACCCCAGAGTCCCACC +ATGCTTGTCATCCACTCTATCCACTCGGCCATCTCGCTGTTGCAGGCAGT +ACTAATTGGGGATGGTGATAAGTAAAGCACGTCAAAACGAATAATTTCAG +ACGTAAAAATTTCAGATTTCCAGCCCGAACGGGCAAAAATTTCAGTCATA +TTCTTATAGTAGAGAATGTCAGCTTTCCGATACAATATTTTTTTTTTGAA +TATCGCTCCATTTATTCTGGTCATTCCCTAGTCAGGATCAGCCTCAGGCC +TAGGCTAAGCTTAGGATTAAGGGTAGGCTTTGGCTTAGGCTTATGCTCAC +GCTTAGGCTTCGGCTTGAGCTCAATCATACATCTTGCTTAAGCACGCAGC +TTAACAGCAAAGCTTAAGCACACAGATTGGGCTTAGGCTCAGGCTCAGGC +TTAGAAAAAGTCGAAAAGTTAACTAAGTCAAAAAATTTTATTGTTAAAAA +ATTGTTTAAAAAGTATTATGATTTTAATAGAAAATTGGAAAAAAACCGAA +AAAAAATGAACATAAATCAAAAAAAAAATTTTTTTTTAAAGAAAACTTCT +TTTTTTTGAAAAAAATGCAATTTTAAGGTTTTTCTGAAAAAAACTGGAAA +ATACTTTGATTTTACTGTAAAGAGAATTTTTTCTGTTTTTTTCCAAGATT +TCAACTTATTTCTGCGAGTTTTTTTTTTCAAATTTCCTCTTATTACAGTT +GGTGTCATTACCGAAAATTCTCGGTCGTCCACATTCTCTCATATATCGTA +CATCGTTGGGGTCTCGAAGCGAGAGATTTCATTCTTCGTGAGATTTTCGA +GCTCACTGGTAGTTTGTCCAACTTGATATCAGTGGCTCATAAGGATGGAG +AACAGTCCAAGGTTTGAGTTTTTTTTTCTGAGTTTGCAAACGCGCCCTAA +TTATAAAAATTTGGAAAATTCCACGTGGAAAATAGTTTGGCGAAAGTTTT +TATACAAATTTTTCGTGGAAATTAATTTTTTTTCGAAATTCTCTTTTAAA +AAATTTAAAAATGTTTTATTGGCGTATTATTTTTCTAAATTTCAGTTTTT +CGCAGATTTCACCAATAAAATATTTTTTTGAAAAATATCGCTTCGAGACC +CAAAAAAATAATTATGATTTCAATGAAAAATTGAAAAAAGCCAATGAAAT +CGAGAAATTATCGAAAATAATTCGAAAATTGGAACTTTTTAAAAATTTTA +AATTTTTTTCAAAACAAAAATAGTTTCTCAACTTTTTGCTAAACAAGAAA +TATATTTCGTGGCGAGACCCATTTCTACTAATTTTCTTTTTCAGAAATGC +ATAATGCGTTTGATTGATGATCTCGTGAAGCTCGCCATGATCGAAACCGT +ACACGGCCACCGTACCATGAACGAAGTGACACGTGGAAATATTCAAAAAC +TCGTGAAAACCGGAATCCAAGAGTCTCTGAAATCGGCGCACCGAAATTTC +TCAAGGAGTTCGACATTTTCGATTTCCGAAGAATGCGTGAGATATCTGAC +GAGATGGTTGTTGGCCGAACGAAGACTTGAACAGCCGTCTGCGGCTATGA +ATGAATCGTTTGAATTGACCGGTGATAGCAGTAGTAAGAAGAAGGACGAT +GCCACGTTTGATAGTGCGTAAATGGAATTCTACCAAAAGGAGATCTACAA +AAAGAAAATCTACGAAAATAGGATTTGGCACGGTGCCAGATCGATCTTTT +TTCGGCTCAAAATTGGAAAAACCTGCAATTTTTATACATTTTTGGCTTCG +AAAATCAGGGAACTCTGCAATTTTTGGCATTTTTCGCTAAAAAATTGGAA +AAATCTGCAATTTTTATACATTTTTGGCTCACAAACTGGAAAAACTTCCA +ATTTTCATCTCAAAAAATCGGGAATATCCGCAATTTTCCTCACATTTTTA +GCTTAAATAATTGAAAAAATCTGCAATATTTTACATTTTTGGCTCAAAAA +ATCGGGGAATTTTGCAATTTTTTACATTTTAAGTTTAAAAGTGGAAAATT +GCAAAAATCGGCATAATTTTGAACGTTTTCTTTTTTCCGGTTTTCCGTTT +TGTTACGATTTTCGTTTTGCATGGAAAATGCTTCGATTGATACCCTTCTT +TCGGAAACGAGTTTCTCCTGAAATAGGCGTAGTCTCAAACCATTTTATGG +GCGTGGTCTTTCTCTTATCTTGTTTTTATAAATGTTGTTCATTAAAACAT +TTTCAGGCCTTATCGATCTCTCCTTTGGCTCGACGATTTCCGGAAAACAT +AAATTGAACGCGTGGAATGGTGTAATGCAAATCCTGAATGAGCTCCTAAA +AAGTCGACGACTCGAACTTCAAGTCACTGAAAAAATCGTGACAATCCTCT +GGGAAAAGCGAAAATCCTACACAACGGAGCCACTCCGTACTGTGTTCTGC +TCCATTCTCTCCACAGTCGTCTGCCAGGCCGATGTTCGATTCGGTCATCG +GAAAGTGCCGACAATCGACTCGATTCTCAAATATTCGCTGTCTCTAATGC +CAAATGTCGCATCTCTTCCCAGTGCCGCTGCGTTGACCGAAACGATTGTC +AGATTCAGGACAGTATCACGAGAGGGTCTCCGTAACACGTGGGATACCGT +ATCCCGAACTAGCTCCGGCTCATTTGAAGTTGTTCGGCTGATTTCGGCGT +TGATCTCCGTTACGGAATTTGATGAGAATTCGAGATTCGCCAACGATGAG +AGAGTTCGGAGTTGGAGGTAAGTCACTATGCCACGTGGATGGATTTCCTT +TAAAAAGAGTTTTTGGAAAACAAATCAGATTTTGTATATCACTTCACCTT +TAAAGCGAAATTCTTGTTTTTTTGCAGTTTTCGAAAAGACATAATCGAAT +GGGTATTGCTGGATCCGAATGCACATAGTCACAAATTACTCTATCAGTTG +TGCCAGTATCATCCAACGTATTGTTATGAATCAGAAGCTTCTTCTAGTGG +TTAGTTTATTCCGTAGGGCTTCCATGGTAGGCAGGCGCGATTTCAGGGCC +TGACGCCTGCCTGAAACCTGCTGACCTTTCGCCTCTTTTCTCTCATTGCT +ATCAATTTGTTGAAAATCAAATTAAGAAATTAAAAAAAAAGTTCACGTAT +GTTTTGTCGTGGATTGAGGCGAGGGGCAGGTGGTGGTGGTCTTAAGGTCA +GGTAGGCAGGTGTTTTCACGCCTACATTGAAGCCATAGTACCCCATATGC +CGAAAACGCTGAAGTTTTCTCATTTTTCCAGACGACTCCCTGCTTCAAAC +TCTGAAACTATGTAAATTAGCTTGCTCTCCAGCTCCCCCATCGGCTCCAA +AAGCCCTCCGACCACTCGAAGCTTCAATTGAAGAGATTGTGAGATATGTG +CATGATAAGCTCAAGAGCATTCTCGGTGAGCATCGGCTGTCGTTTTTTTG +ATTTTTGAATTTTTAAAAACATTTTCAGCGACTGAAATCACTCTGCCTGC +ATTTGTGCTCTGCCACGAATTTGCTCTGAAGTATCCGGATAGATCTTATG +AGTTTAATGTTGGTTTTTTTTTCTAATTTTCGAAAAATTTAAGAAATTTT +AGGATATAAATAGAAATTGAAGAGAATTTGAATTTTTAAAAAATTTCGAA +TTTATGGAAATTCAGTAAAGTTTGTCGGAGTTTTTAAACGAATTTTCAGA +ATTTGTAATAAAAACTAAATTTTCTAATTTTTTTTCCACAATTTACAACA +TCTAAAAAACTTTTTAGAAATGAATTGTTAGAAGTTAGAATTCCTGGAAA +TTAAAAAAAAATTTGGATTTTTTTAGAATTCTTAAAAGTTTGATATTTTC +AAATGATATGAAGTTAACACATTTTTTAAAATATTCAAAATAAGAACATT +TTAATTTTTTTGAAGAAATGTTCAAAAAATTAACATTTTTTGGGACTTTC +AGAAGTTTCTTCTTTTTTTTTGCGTCTTTCAAGATTTTTTGCAATTTGCG +TAATTTTTATTAAGGGGACAGAAAATTCTGAGAATGCGGATTGCACAACA +TATTTGACGCGCAAAATATCTCGGAGCGAAAACTACAGTAATTCTTTAAA +TGACTACTGCTAAAATTGAGCATAAAAATAAAAATATATCGATGTATCTT +GGATCCCGTAAATCGACACTAGCGCGCTACAGTAGTCTTTTGAAGAATTA +CTGTAGTTTTCGCTACGAGATATTTTGCTCGTAAAGTGTGCACGAATTCT +TATAATATTTTGTTCTCACAAAAGTTGGTCAATTTATGGGATTCTCAAAT +TTTCGATGTTTTTTTTTTTGAAATTTAAGATTTTTAAAACATTTTTCAAA +AAACTGATTTTTCTCTTAAATTGAAAAGGTTTTTTTTTCAGAAAATGTAC +AAAAAGCTCTACCAAATCATGGAAGATCAAGAAGAAGACGAGTTTCTCCA +ATCAGCTCGCCATTTCTCAAAATGGCCTCAAAATCTGACACTACCAATAC +AAAAACAGACAATAAATTGCATGGCTGTCTTTTTCGAAGCGAATCTTGAC +AATCAGCTCGTCGATCTCTGTCAGTGGAGTGACCGACGAAAAGTGCTTGT +CGAGATGCTCGCCGAGCTGGCCGCCACAAGATCTGAAATTCGAGATAAAC +TTCAAAAATCGATGCCGTTCAACAAATTCGTCAAGGAGTGTATAATGGAG +AATCGCGGTGATTTGTATGAAATGACAAAGAGATTTGAGAAATATTCGTT +TTTGCTCTCGATTCGGAATTTAATTGTTACTAGGATGATTATAACGTGAG +TGTTTGTTTTGGGGATTGCCACGAAAAACTGGAAAAACGTTTCGAAAATT +CAAAAAATATTTCTTCAACATTAGAAAGAATTTCCATTTTTTATATAATT +TTGGAAAGTTTTTAGATTATTTTAGATAAAAAATCATTTCGAATTTACAA +AAAAAAATTATGAAATTTTAAAAACAAGTTTGAAAATGTATTTTTAAAAA +ATTTAAATATTAATAAAAAATCTTGTTTGAATTATTTAAATGTTTGCTAT +TGTGTTTTAAAAAAGCTTAAATTAAAAATTTTTTTCAACAAATGACTAAA +TTAACAATATATAACAAAAATTCAAAAAAAATTGTTTTCGAGAAAATTTT +TTGCTTTTTTTAAAAATAAAATTATAAAAAGAAACAGAATAAAAATCGAA +AAAAAAACAATTATTTTGAAATTTTCAAAAAAACTAGAAAAATGTAAAAA +TTTTGAGGAAAAAAATTCAAACATCCCAATTTTTGCAGAAACGAAGCCGC +CCGACTCCTAGGAGATGGTGAAACAATCAGTGAAACCGATATCTTCATAA +TCGAAAAGCGTACTCTTTCCACGTGTATTCGTAATGTGTCCGAAGGAAAA +GAGTTGAGCGGCTACACACTGGACCCCTATACGGTAGCTGCCAACGTGCA +CAATGTGCATTTCGATCACATAAACGTCGAAATCTATCTGGAATTGCTGA +AAAAGTCGCCATTTTTCGCTCAGAACATTGTACGCCACTTGTTACGGCAG +AATGGAAAAGAAGCAGAAGAAGAGACGTGGCACTTGCATGCCACTGTGCT +GAAAATTGTGATGAAAGATGAAAAGTTGCTGGCGGTAAGGAATTTTTTTG +GAAAAACTTGAAAAAAAAGAATTTCATCGTATTTTTAAATAATTTTTTGA +ATGAAAAATTCGGTACTTTTCCTTTTCTACCCCGCATTAAAAAAATATTC +AAACTATTGACTTATTCGAATTTAATTCTCACGTGAATGTTTGTATTAAT +ACTATTTTCATTTTTAGGCTTAGAAAACAACAGTACTAAGCCTTGAAATA +CAGAAAAAGAAAAAGAGTTCACGTTTTTATCAATGAAAATATTCCTGAAA +AAATGCGGGGTAAAAAAGGAAAAGTACCAAAAATCAAACTTTCAAAGTTA +ACAATACGCAAGTTTTCAAGTTAATTTTAATAATGGATTGTTTAAATCCG +GAATTAAAAAAGTGGTTCAAATTTTATTTTATTTGACTCTATTGTGGCGA +TTTTCATGACAATTGAAACGATTTTTGAAACTTGTATTTATTGAAAAAAA +AGTTGAAGCAGGTCGTAATTTTAATTTGAATATTTAAAAAAAACTTATAT +GGGTGCTTCAATTTTGCAAAACATTGCAATTAAAAAAAAAATTTTCAAAA +TGTGGTCGGCAAATCAGAGTGTTTGAAAAAAATCCTTCTAATTGTGTTTC +CGGAATTTTTGGGATTTTTTCTTCGAAAATAGGAATGGTTTAAATTGAGT +TAGAAATTTTTGCAAATAAAAATCGGTTTTTTAAAATCAATTTCTTGCAA +TCAAAAAATTGAAATTTTCAGCAAAATAATAAAGTTTTCTCTAATTTTTC +AATTTTCAGGTATGCGTGGCCACAATTCCAAATATGGTTCGATATCTCAA +AGTCTATCAGATTCATTTCAGTCCGAAATCCAACGCGGCGAAGTTCTTGT +AGGTTTTGTAACTGCGAATTTCAAAATTTTGCTCGTCTAATTTGACGCCT +GCTCTGTTTTTTTTTACGCGCAAATCTTAGAAATCGGTGACTTTTTCGTT +TAAAAACACCATTTTTCAGTAAAATCTGAGCAATTTTCGGTTTTTGCGCG +TCAAATATGACTAATTGAGTCAAACTTGACGCGCAAAATTTTCAAAAAAA +AAATTCAATATTTTTCCAGACACCTCGACATGGAATCGATTTCCCACTGC +CAATCATATTTGCGAAAACCAACAAAATCATCCAATCTGATCACGGCCGC +CAACTTTTTGACACTTTTCGGATGTGAAAAGCGCACGTGGAAGCGCCCGA +TTCTCAGATTTTGGAGCATTTTCAAGCAGCAACCGGCTATGTGTTGCGAG +AAATTGCTCATTTTTGTAAGCAATGATTTTTTGATAAGAAATTTCCCAAA +AATTTTGAATTTTTGAATTATTTTTGTTAACTTCTTATTTTTAACTCAGT +ATTTTTGCAAACTGAAAATTTCCAATTTTTGGGCTTAAAAAATCTCCATT +TTTCAGGCTGAAGAATGTGTCGAACTTGGCCTGAACCACCGAATCGCTTG +TCTTTTACGCGCACTGACAACCAGTGAATTCTGCCGAAAAGCTCTATGTG +ATGAATATCTGAAAATCGCGTTTCAGCTGACTTATCGATCGATTTTTCTG +ATTTTAAGCAAAAATGAGTGCAGGTTGGTGGAGAAGGATATGTGGAAAAA +CAATATTTGAAAAATAAAAAAAAAACTAATTGATTTGAATTTTCAAAAAT +TCCAAAAAAATAAAGAATTGTATTCCAATTTTCATGAAAATGGCAACAAA +AAACTCAAAGTTTCATTGGAAAATTTTTGAAATTACAAAAAAATTAATTT +TTGAGTTTCAATAAAGTTTCAAAAGTTTAGTTTGAGTAGTGTTGTCAACA +AAAATTGAATGCCTATTAGAAATAGTTGAAAAAATTGGAATTTACAAAAA +AAAACAATTTTTAAACTAATTTCTCGAATTTCAAAAAAAAAATCTTCTAA +TTTTTGAAATTGAACAATTTGGAAATTTTTTGAATTTTCAAGAAAAATTT +TCAATTTTTCGAATATTCAACAGTTCCAAAAAAAAATTTGTTGGATTTTT +TTCCAATTTTAAACAAAAAATTTTCAAATTTCCACAGATTTTTTTTTAAT +TTTTTAAATTTCAGAAAAAAATTAATTTTCATAAAAAATTGAAAATTTCC +ATTCTTCAAAAATAAGTTTTCACATTTTTCAGACCAGAAATTGTGGAGCT +CTGCGATGACATGAATCTTCGGTACGATCTTCTCCAGCACCAGATCAAAC +ATGTCGCGGCTCACCATTTGGAGCACTTTGAGCGTTTCGAAACGAAAATC +GCATTTTCTGTTGAGAAATTTCTGAAATCTGGAATTGATGGAATCGATTT +TGAAGATTTAGGATTAGTCGAGTTTTATAAGCAGTTGAACGAAAATTTGA +CAGAAGACGCGATCAGGAGCAATGAGGCGAGAAGTAAGGCCGTGGAATAT +AAATTGGTGGCCGAGTTTTTTGTAAACTATGCCACCAAACTTGGTTTCTC +CGTTCGTGGCCACAAAAATTTTGAATTTTTTGAGCTTCTCTTATAATGTT +TAAAAGTGATTTCCTGGCAAAAATTCATTTCTTTAATTAGAAAATTCCAA +AAAATTTGAAAAAAAAATGTAAACTCAATAAAAAGTCTAATTGTTTCAGA +TTTATTCTGGTTTTTATTGAATTTTCAGATTTTTTCTTCAATTTTCAGTT +TTTTTCGACTTTTCTAATTAAAAAAAACCAATTTTTGCCAGAACAAAAAC +CATTAAAAATCGACAGTAGTCATCCCATTGGAGTACAAAAATTTTAATGT +CCAAATTTTCATGACGAACGAAGAAACCTAGTTTGGTGGCCTAGTTTCCG +AAAAAACTCTGTCACCAATTTCTTTCAAAATTCAACAAAATTTTAGTAAA +TTCCAAAAAAACGAACATATTCGGCGAAAATTATTTCAAAAGCTTCCGAA +CAAAATCTAAAAACGTTCAGAAAGTTCTGGGTAATTTTTTAAATATTAAA +AAAATTCAACAAAAAAAGGGGGGCAGATAGCTCAGTCGGTAGTGGTGGCC +GCTAGCAGTCTGGAGGTCACGAGTTCAAGTCCGGCCTCACCCCCTAGGTT +CACCCAGCCTCTATTGGGAAGTGGAGCAATCCACGACTGGATTATCGGCC +ACAGTCCCCGGCTAGGACGTGGCTTAAATTACAGCCCAGAGGGATCACCA +CCAGGCAGTGTACCTGAATCCCAGATCCGCAGTGCATAGCACTTGAAGAA +CGGATCGTCCTTTAATTCTTTCAAAACTAATTTTTTACTAAAAAAAATTT +CCAGACATCTACATCGTCGACATTCTCTCAACAATTTGGCTCCAGCTCCC +CTCAATTCGTCCTCAAATTCTACCGATTCTCGCCCGCTTCAAGCACATTT +CCCCAGCGTGGACTAATTTCCCACAGCCGCCTCATATTTCGACAAACGAG +AAATCATTTCTTCAACATCTACGATTTCATCTTTATCTAAAAATGATGAA +TATCTCGAAATCCATGACGCAAGGCGAGTATGCCACGTGTATTATGATGC +TCCTGACGAGCTACGACTCGAGCCATTTCGTTGCGGATTTGATAGAGAAA +AAGCAGCTGGGAAAGCTGAAATTGCAGCAGAGAAGGAATGTTTTGTGCAT +TTTGAGCCGACTTTTGAAAGATCAAGCTGTGATGGGTGATGAAGATGAGA +CGATTATCGATCCGATTCTTTTTAAGGCAATCACCAAAGCTTCCGCCGTT +TTTGTACGTTTTTATACGACGGAAATCACTGAATTTCATGTTTCTATGCC +GAAAAATTATGAGCAATTAAATATACATCACAAGAAAGAAAATAGTTGCC +GGTTTTTGAATTTTTCTGTCATATCGGCAATTTTGGCAACTGCCACTTTT +TGAAGTTTTTAAAAATTTGGTAAATTTGGCAATTGCCAAAATTATCTCTG +TTTCGATTTTTTTCATCAGAATCATAATATTTTAAATTTCGCGCTAATTT +TTTTAGCAAATGATCTTCGAAAATTTAATGTCTCGCAGTTAAAAATACAT +CTTTGCAGGAAGACACCGCCGCGTGCATCGTACCATTTTTGTTCAAAATT +TGTGTGGATTTCAAGGGAAAATACGATAAATGTGTGATAAATTTGCTGGG +ATGTCTTAAGGGAGTTAATGCAGGTATGATTTTTCGTCAGAAATGACTGG +AACGAGTTTTTCGCATCAATTCAAATTTAATGTAATGGCATTTGGCATAT +GACATCGAATGTCGAATGCTGGGAATGCCGAATAATATATTTTGGTCGAG +TAGGAGAATAAACTAGACAAAGTTCAAATATATTCCCAACAGGGATGGCT +TCACCCGTGACCCGTTTCACCCGTTCACCCATTTTTACAAGGGAAGGTTC +TGAACTCGTTATCGGACTTCGTTACGCCACTATATACATTCGATAGAGAA +TGGTTACAGATGATCACTCCAAAAAATTTAGCTGCTTCAGAGCAGGTTCG +ACCAAGTTACGACACTTTGAAGATGCCGAAAAAAAAAATCATTGATGCAC +CCCCTTTGAAAAAAATTGAAAATTTTCACTGAAAATTTTTTTTTTCTGAA +AGTTGATAAAAATAGTTGTAATCGATTTAAATAGTAAAAAACATATATTA +TACAAGTTTTAGCCCATCACTCTCAAAAAAACCCTTAAATTAATGTACAT +ATCTTGAGAAAAATTCCAAAAAGTAGATGTTCATGTAGATCAATTTAAGG +GAGAATTATGAACTGAAACTTGTATGGTATGATTTTTCCATCATTTGCAA +CTATTTGAAAACATTTTATATCAACTTTTAGAAAAAAAATTTTTTTTTGA +ATTTTTTGGAATTTTTAAAAAGGGGTATTTTGGCACTTTCAAAATGTCGT +AACTTGGTCGAACCTGCTCTGAAGCAGCTAAATTTTTTGGAGTGATCATC +TGTAACTAATCTCTAACGAATGTATATAGTGGCGTAACGAAGTCCGATAA +CGAGTTCAGGGCCTTCCCTTGTTAGGCGTTTTTACGGGTGACGGGTGACC +CGCTCGCCCGTTTTTTTTTTCGTTTTAGTTGTTTTCACGGGTGACGGGTG +GCTCGCTCACCTGTTTTTCACCCGTTTTTGGAAGTTTTCACGGGTTATGG +GTGATCCTTGGTTCCCAAGAGGCAGCCAGTTTAGGTCAATAGGTCGTAAA +TATCGTTTTGGGCCACGTTTACAACTGGGCTGTGTGCAACTAGAGGTGCA +CTTGGGTCGAAAAGGGTCGACCCGCTCCAACTTTAGGTAGCATTTGTAGC +GACCCCGATAAATAAAATGTCGACCCCGAATGAAAATTTTAGCAAGGAGG +CCATGCTCGTTTAATTTCCATTTTCAAAATTATGATCGGACTATTGGTTG +AGCTACTGTGAAATAAGCGGGTCGACCCTGCTCGACCCATGAGCACCCCT +AGTGGCAACCATGATTTTCGGCAACTGGCAATTGCCAGAGTTGCCGAACC +CAATAAGTTTCGGCAACCGGCAATTGCCGAAGTTGCCGAACTCTAAAAAG +TGCAACCGGCAACTTTTGCGCAGCCCTGGTTTACAACTACCTAACGACTT +TTGGTAGACTAGAAATAACACATGTATTCGAATTCGACATATGCCGAATG +CCGAAAAAAGTATTGAACGACATTAGGCACAGGGATGGGCGGATACCTTT +TCGGATAACTTCACTAAATCAATTTATCAATAAGTATTGATTTAGTTATT +CTGGAGATACTATACTAGAATAAAGAGTACCTATTGAGCAACATTTTCAT +ATGACAAAAACTTTTAAAATTGTACGCATTTTCCTCGACAAGACCCTGAA +ACTCGTTATCCGATCAAAATCGGCTAACAGATAACGGACTTGCATACTTT +TTATACGGGAAACGTGTAGAATTTTAAAAAGTTTTGTGCCATGAAAATGT +TGCTCACCAGTTTCAATTTTGTTCTGATGCACTTTTTCCAGAATTACTAC +ATCAGCACGTATTGATAACTTGATTTAATGAAGTTACCCGAAAAAGTATC +CACACCCCATAATCAGCATATTTCATATGTCAAATATCAAATTTCCTGAA +TTCCGCACACCTCTGCTCTCCATACTCCAACTAATTTTAATTGCAGAAGA +CGAAATAGTAGTCCGCTGCCTTGCCGAATGCGTCGACTCCATTGGACTCA +ACGTGATTGCTCGTTACGAACGCCTGAACATTGAGACTCATTCGGAATTC +GGTGTGAAATGGTTCTTCAAATTATCCCGTCTCTTTCTGAAACATGGATT +TACTACGCATTCCTTCGCGATTGCGAATATATTATTTGACCGACTTTCGG +CGAGAAAACGAAATACAATGATGATAGATCGTACGAGTTTAGACCGAATT +GACAGATCACAGGAACTTATCAATCTTTTGGTGGAGATCTATGTGGCCGA +GGGTAACTCGGTAGCGTTGTCCTCTCTGCCACCTGCGGTACAAAATAGAC +CGGATGTTCGGCAGGTTATGAATAAGAGTTCAAAGGAATGGTTGAAATTG +CTGTCTTCGAATCAAATGGACTCGTGGGAGTTGACTATTGTGCAGTGGAT +GTGGTAATTGTTTATTTTGAAATTTTTCATTAGAATAGAAAGCAGTTTTC +TGGAAATTTTCAGGAAAACGCATTAATTTTTTACCTATTCTGCATTTCTA +GGTTCTTCAATTTTTTGAGAAAATATATATTCCTTCGAAAAAATCACTAA +CTTCTTTTCGGAAATTTAAAAAAGTTACAGTCGACGACTTTAAAAAATTC +AGTTAATAAAATTAGATTTCAAAAACAAATCGAATTTTTTAATTTGTACC +GAAATCTGGAAAATATTTTTAAATGACTCCAAATTTTTCCCTGATTCCAA +ATATCTATGAGATAAAATTCAAAAAATAATCCCTTTTTGATCTTGAAATC +GCCGAATTTCATTTGTGCACTCATGAGATTTCGAAGTTTCGCGCCAGAGA +CCCCATGTGTTGATTTACGAGATTTGTGTATATTTACGAACCCCTTAACC +TTTATCGGTTGCTGATTTCCGTTTTTCAACGAGTTTTCTTCAGTTTCGTC +GGTTTTTGTTAAAGTTTTTGTTTAAAACATTTTCAAAAAAACTTTGACAA +AAATGAAGGGAACTCGTTGAAAAACGGCAATCAGCATCCGATAAAGGTTA +AGGGGTTCGTAAATATACACAAATCTCGTAAATCGACACATGGAGTCTCT +GGCGCCAAAATTCGAAATCTCATGAGTGCACAAATGAAATTCGCGATTTT +AAGCTGAAATAAAGAACCAGGGAAAAACATTTTTTCACATAGATGTTCGG +AATCAGGGTCAAATTTAGAGTGAAAAACTTTTAAATTTTTAAATTTTTTC +AGTGGCATCCAATTCAATGCAATCACCGGCGATAAATACCTGAATTCAAT +TCTTCGTTGCAATTTCAACGAGTACACCAAAAAAATCGATTCCCCATTAA +AATTCGTCTATTTTCAACTATTCCATCTTTCGACGAGCACTCTGGAAATC +GAAGAAGCCATTTCTAGCATGCCGTTGGCTCCAACAATCGATCAAATGCG +GCTTATGATTATAGCCAACGCGACGGCAAGCTTCGAGCCACAATCCGTTG +AAGAGCACGTTGTTCGAGCTGTTCGAGAGCTCCGAGAGACGTCAAATCGG +CGAAAATCCGGTGGAAATGTGAAGGGAATTAATGAAAAAACAACGAGAAT +GGTGAAACTGGCCGAGATGCTCACCGAGAATAAAGCATACGATGTACGGA +GCAAAAAATAATGGAAAATTTGAATTTATTCAATTGTCAAAAAAAAGGAA +ATTTCCTTAATTTTTCGGTTTTTTTTTTCGGAAAAATCTGAATTTTTTCT +ATTTTGAGCAAAAAATTTAATTGAAAATCCGGAGAAAATCGTATACTTTT +TCTGCGAACCATTTTGTGGGCGGAGCCTAACTAGTAACACTTTTCAATTT +CCAAACAGAAAAGTGAGAAAAAATTCCTCTTTTTCTCAAAAATCTAAAAG +AAAAAAAAATCGGAAATTTTTGAATTTTTCGGTTTTTTCTGAATTTTTCA +ATTTTCATTCAAAGAAGGTCGAAAACTTTTCGATTTTCCGGTTTCCTTTT +TGAAAAAAAATATTTTAAAAATGTGTAGTTTGTAGTTTGTAGTTTTTGTA +GTTAATGTCTTCCACATTTTTAGGCGGCAATAAACCTGCTCGACACGTGG +GAGCACGAGTGTCTCCAATGGACATCTGTCGCTGCCGAATCAATCGATAT +CGATTTAATTCGAATCTGCAAACAACACGTGACGTGTCGGTCGGGAGATC +CAAGAATGGCGGACATAAATCTACGAACAATGCATCCACGTGTCCCGGTG +ATGAGTGACCTGGCGATTGCCGAATGGTCACTCGCGTTGAGCAAGATTAC +AATTGAATATCGGAATGATATGGAAGAGGGTATTCGGATTTTGGAATTTG +GATGCAAACATTTACAGAATAAGGATTCTGTAGAGACGAGGTTAAAGGTA +CATGCGGTCGGGTCTTGCAGCGAAATAATGCATTTTTAAAAATTCGAGAA +AAAAATTGTGAATTTTGCGCGTAAAACGTGATTCAATCTCGAATTGTTCT +ATAGATTTTTCCGACAATTCGGAAAAATGCCGGAATTGAAAATTTCCGGC +AAATCAGCAATTTGCCGTAATTCAAAATTTCCGGTAAATCGGCACATAGC +CAAAAATTAAAAATTTCGGCAAGTCGGCAAATTGCCGGAATTCAAAATTT +CCGGCAAATCGCCAAATTGTCGAAATTCAAAATTTCCCGAAAATCGGCTA +ATTGCCGGAAGTGAAAGTTTTATTGATAGAGATAAATTTAGAACTATCAC +TAGAATTTAAATGTAAAACAACTAGGAGTTATCATGTAAAACTTGTCATT +TGAATTTAACACTAGGAGTCCAATAGATTTTCAGATTTTAAAAAATTAAT +ATAAATTCAGGAAATTTTTTTTGAATTTATCCACATTAATATTCGGTATC +AGGAGCATAAATAGGATCTATTGAGATATTTTAACAAATTTCCGAACAAA +GTTTCAAATATTTTTTTCCAATTGATAGAATGTTCCATGAAACATTACAA +AAAAATTATTCATCAGGTTCTCCTAAAACTCCATTCCGTCTGTATTGGCC +AACTGTCGAAACTCGAAGAATATCGCGAAACGCGTACCTACCGTATGAAG +CAGCAGGCGGTCACCGCATTCGAACAACAAATTCAAAATTCGTGTCGAAC +CAGTCTGGCACGTGGCAATTCGGGTGACGAATGGACGAAAAAAACGGTGC +AACGGGTGAGAAAAGAGCATCAGTTTGAGAAGAATGATTTGGAAAAAGTG +GATAATTCGTTGAATTCGGCGGCCCGGAAAGCTGTATCGTCGGGTTTTGA +TGCACTTTGTGAGCAATTTTTGAAGTTTTTATTGAAAATTCAAAACTTGC +ACAAAAGGATTTTAAATTCGATTTTTCGCGATGTTTTAGCAATAAAATTT +TGAAAAATGACAAATTTTTGAAATAATTTCAGAAAGAGTGTAAAGTTTCA +GAAAAAAAAATTAAAATTAAAATTTATTTTAAAACTTAAAAAAACCGACT +TTTAGCGAATTTTTTTACGAAAAACAAAGTATTTAATTTCATAAATTAAT +TCAAAAAAAATATCTAAAATTTTTGTGAAAAACTGTTTTATACAGTTTCA +GAAAAATTTAAGAAAAAATTTTTTTTCAGAAAAAACCGGATTTTCTGTTA +ATTTGAAATTATGGAAAAAAAAACCCCAAAGGGTATTACGGAACTACAAA +ATTCTGAGAATGCGTACCGCGCAAAATTTCTCGTGGCGAAAACTACAGTA +ATTCTTTAAATGACTACTGTAGTGTTGGTGTCGATTTACGGGCTCGATTT +TAAAATTGAGCAAAAAATTATCACATAATACGAAGAAACGAAGGAAAATG +AAACTAAGTTATAAAAAAATTCGAAAATCGAGATCCCGTAAATCGACACG +AGCGCTACAGTAGTCAAAAATTAAATTAAAATTAAAGGATTACTGTAGTT +TTCGCTACGAAATATTTTGCGCGTCAAATGTGTTGCGCAATACGCAGAGA +TCTCAAAATTCTATGCTCCCGTAATAGATTTAGAAAAATTTTCCGAAAAT +TTACGTCTCGAAAAAGAAAAAAAAACTGGTGTAAAAAATTACACCAGTTT +TAATATTTACTGAAAAAAATTGGTAAAATCTAAAAATTAATCAGCTGAAA +TTGATATGTTTTGGGATAAAAAATTTTGGCGGGAAACTCAAGTTAAAAAA +AAAAATTTCAGTATGCATCAGCCAACTGGAAGACGACGACGAAGCGATCC +GCGCTTCATCTCTCATAATATTTCCATTAATCGATGTGATCTACAAATAC +GAAACGGACGTCGGAGTGATCGCCTTGCTCAAAGAGCACACCAAATCGAA +GCTCCCGTCGAAGCTGTGGATAAGTGCCACCTCACACATTGCCTCTAAGT +GCTTCTCCATCGAAAAATCGCAAATCACGAGACACTTATCACAGATTCTG +TGTCATCTCATCTATGACTACCCGTATCACGTTTTGCACACAATTTTGAT +GTATGATGACGAGAAGAACGCTTCCAAGGTGAAAGGCTTCTTGAAAACGA +TATTTGACGCGCGAGCTGACCAACGGGATTCGTCGAAGCTTAAGGAGATT +GTGATTACTATTCGTGAGGCTCACCAGGCTTATAGGTACATACGGCGGCT +AAATTTTTTTTGAAATTTCGAAAAAGACGAGAAATTCGATTTCTTATCAA +GTTTACAAAAATCTTCGCTTTTTTCTGACAATTTTAGTGTCGTGGTATCC +AATCAACGAAAAAAATTAGGTTCTTCTGGTTATTTTTGAGCATATTCAAT +AATTTAAGCAAATACTCTTTTACTTCTAAACTTTCATCGGCGATACGTGA +ACAATTTCCAAAATTATATATTATCTAACGCTACAGCATATAATTTTTAC +CAATTTTCATTCATTTTGATGGAAAAAAGCATCTAGAACATGTAATATGT +TCAATTGTTCGATTTCAATAAAAACCGACGGCATCCTTGGTTTGAATCGA +AAGTTTAGAACAGTTTTTTTCAGAGAAATTGCAATGCTTGACGTGAGAGG +AAACGTTCGGATACAACGTGTTGAAATAAACGGAAAAACGATGTACCGAT +GGCCACATGATTTGAAGATTTTCAAGTGCAAATTGCGTCAATTGCCGATT +CCCACCATTTCGCAAAAGGTTTGTTTTTATTTTATTAAAATAATTTAATG +AATATGTATTTGATCATACAAAAAAAAAATGTTTTCTTCTAAATTCCAGA +TTGGTTGCCCGGGTGACTACTCGACGACTGACCTAATCACGTGGAAACGA +TGGAAAGATGTATTCACAATTGCCGACGGAATTTCCACTCCAAAAATCTG +GGAAATTGAGGGTAGCGATGGAAAATGGTATAAAACTGTGTGGAAGAAGG +ACGACGTACGACAGGATGTGCTTGTTGAGCAAATGTTTGATGTTACGAAT +AATATGTTGGAGAAGGCGATGCTTCGGACGTATAATGTTGTCCCGTTAGA +CACTGAATGTGGAGTTATAGAGTTTTGTGGTGGAACTGTTAGCTTAAGTT +CGTTTTTTTTTCGTTTTTGAAAAAAACCAAAATAGTATTACGATAACACA +ATATGCTGAGAATGCGTATTGTACAACATATTTGACGAGCAAAATATCTC +ATAGCGAAAACTACAGTAATTTTTTAAATTACTACCGTAGCGCTTGTGTC +GATTTACGGGCTCGATTGTCGAATATTACGGGGCCATGAGATCATGAGTA +TGCCTATTTACTGGCGCGAAAATATTGGCAGGCCGCGGCAGCGAGAGAGC +GTGTGGCGAAGAGAGACGCAGGTCCCTTCGCTACGAGATATTTCCCGCCA +GAAAAGTAGCATTCTCATGATCTCATGGTCCCGTAATAGTGACAGCGATA +TTCCATTTATTTCACTTCAAAAATCGAGATGCCGTAAATCAACACAAGCG +CTACTGTCATTTAAAGAATTACTTTAGTTTTCGCTACGAGATATTTTTCC +CGTCAAATATGTTGTGCAATACGCATTCTCAGAAGAAAAAAGAGGGGGGC +AGATAGCTCAGTCGGTAGTGGTGGCCGCTAGCAGTCTGGAGGTCACGAGT +TCAAGTCCGGCCTCACCCCCTAGGTTCACCCAGCCTCTATTGGGAAGTGG +AGCAATCCACGACTGGATTATCGGCCACAGTCCCCGGCTAGGACGTGGCT +TAAATTACAGCCCAGAGGGATCACCACCAGGCAGTGTACCTGAATCCCAG +ATCCGCAGTGCATAGCACTTGAAGAACGGATCGTCCTTTAATCCTTTAAT +CCTTTAAAAATGTAAAAAAATTTCATTGCAAAAATATCAAACTATGAGAT +TTTTTCTATTATTTTTCAGAAGAAGTTATGTGTGGTGTGACACGAGAAGG +CGGTCTCCACCGGGAGTTCAATTCAGAAGAAGTTTCGGCGAGTAAAGTGT +CGTCAATGATGAGACAAGTACAGACAGAGTCCACAGAGACACGACGACAA +GTTTTTGTGGAGATTTGTCAGCAGTATTCTCCAGTTTTTAGGTAATTTTT +CTGAAAGTTGTCGAAGTTTTTTTTTTTGAAATTCAAAAAAATTCCCAAAC +AATTGGCCTTTTTTCAAATTTTTCCAAAAAATAAAATTACAGACACTTCT +TCTACACCAACTTTTCGACGGCGCAGATTTGGCGGCAAAAAATCATAAAT +TACAGGCAGAGCTTGGCCACGTGGAGTATTGGTAGGTTTCGAACTAAAAA +TGCTAAAAAATTTTCGAAGAATTCGAACATCTCAGATTTTTTCCGAAAAA +TAATAAAAACGTTATTTTCCAATTTTCTGAAAATAAGAATTTATTAGTTT +CTTGCGAAATGAAGAGTCTCAAAAAATCATATTTTTTTTAAAAATCAGAT +TTTTAAATTCTTTTTTTAAAAGTTTTATCGTGAAATTGAACGTTTATGAA +AAATGGCGAGAAAAACTTTTTTTTCGCTTTTTCAAATAATTCTCAAAATA +AAAATTCCAGTGTGCTACATCGTTGGCCTCGGCGATCGACACGCGTCGAA +TATTCTATTCGATCAGAAATTGTGCACATTTGTGCATATCGATTTGGGAA +TGATTTTGGAGTATAGTAAACGAACGTTGCCAGTTCCCGAACAAGTTCCA +TTCCGTATAACTCGAGATGTGCTCGACCCGATTCTGATTGAAGGAATCGA +GAATGGACAGTTGGCTGAGGAATGTACGCAGATTATGGAGAAATTGAAGG +AAAATGGAAAGGTACGGCGGTCTGGGGAAATTTATACGAAAAGAAAATTT +TGGACTGACAAAATTTTGGACTTGTAAAGCTATTTAAAAAATTCAAATTT +CCCGCCAAAAATATTTTTTCAAAGAATTTGAATTTTAGCTAAATTTTTAA +ACAAAATCTGAAGTTACCGCCAAAAGTTTTCCAAAAAAAATCGAATTTCC +CGCCTAAAATGTTTTTCAATAAATTCAAATTCCCGCCAATTTTTTTTACA +CAAAATTTGAATTTCGCGTCAAAAATTTTCCAAAAAATATTCAAAATTCC +CTGAATAGTTCAATTTCCCACCAAATATCAATTTTTCTGAATAAAATTTT +AGAATTTCTCGCCAAATTTCTTTTTCAATTTTTTCAATTTCCAAAATTTC +CAGGTAATCCTCGGTGTCGCCTCTGCTCTACTCCGCGAAACGATGACAAA +TTTCCGAGAAGCCGAACAGGCTGCCGGCCGCCCGTCCTACATTTCTGAAA +TGGCCATCGGACGGCTTCGAGAAAAGCTACGGGGCACCGATGACGGTGTG +ACGGCCCAATCGTCGAATCTTCAAATTCGGCGACTTTTACGAGAAGCTAC +GAGTGCTGACAATTTATCGCGAATGTTCTGCGGATGGATGCCGTTTTTGT +AGAGAAATCAGAGAAAATTTTTCTCGAAAATTAAAATTTTTACTGTATTA +TTGTTTTCTCGTATCAATTTTTCAGAATTTGCTTTGTTATTTTATTTTAC +GCGCAAAATAATCTTTCTAATTTAGAAATTTTAAAATTATGGAAACTTTC +TGAAATTTCTGCAAAATTCTAGAAAATCAAAAAATTTAGTTTTCCATCCT +AATTTTTTTTTTCGTTGATTTTCCCCAGAAAATTCAAACTTTTACTGTAT +TATTATTTTTTCATATCATGTAATGTTTTTTGTTCAACATTTACATGTTT +TTCCTACCTCTTCTATGATTTCCCCTTCCCCCAGGTCTCATTTTTCTTCA +CAATTTTTAAAATTGATTCACTGTTGAATGTGTTGTCTTTTTTCTATGCG +AATTTTATTAATTTTATGAAAAAAATTTGTAATATTTTTGTGTAAATTTT +GAGTCTTCTTACACTACAAAGTTAACCATTTCAGAGGAGTTTCAAAATAA +ATAGTGAATTTTTACAAAAATTAGATTTTCTGATTTCAGCGTACATGAAT +TGCCCGTTTTCAACAAAATAAGACAACTTTTTATTTTTGCCCAATTTTTT +TTCAGCCATCTAATGACTGTCCTTTTTTTTTGGGCAAAAATAATAAATTT +TCTAAAAGCGTTTAAAACTATTATATTTTGAAAAAAGACCATTTTTTAGG +GCTTGGCAATTTTAAGTTCTCTAGCTACAAATTGAACCAATTTAGAGGTT +TCAAAATTGTGAGTTTTTGCAAAAATTGCCAAATTTTGCCACTTTTTAAT +GGTTTTTGATGGGTTAAACCTAGATTTTCTGAATTCTGCATATATGAATT +ACCCGTTTTTAACAAAATTAGACAACTTTTTATTTTTGCCCAATTTTTTT +TCAATGACTGTCCTTTTTTTTTGGGCAAGAAAAATAATTTTTTGGAATTT +TTTTCTGAAAATGTTCGAAATTTTTGAAGTTTCTCTTGAAGCACTCCGAA +AAACCAAACGACCAGCACTACTTGCAAAGTCCGAAAATTTAAGTTTAGAA +AAATTTTGAAGTTTTAAAAGCTCCCAAACCAGAAACTCACGATATAGAAA +TTATTTCCAAACATTTCCGAAAATTCCAAAAAATGAAAAATCAGAACAAT +TTCGAAATTTTTTTCAAAACACTTCTGAAAAAAACCGAAACTGCAAAAAT +TTGAAAATGTGCTAAACTCCAAACTGTACGATTTTCCACATTTCAGAAAT +TCCAAAATTCGTAAATCTGCCAAATCCCTTCGGCGCAATAATCCCCCGGG +GTACCCCCCCCCCCCGCCCTAAAACCTTTTGCCTCTATTGTCTCTGGCGC +AGAAAATTGAAATTAAAAAATGAATCGATATTTTAAACTGTTATTCGTAG +CCATCTTCTCCGGATGTATAATCGCCTACACGAATCTATTCAATCGATCG +TTTTTCAACGGGTACTGTAGTAATAATGCTACGTGGCAACAGTCACAAGG +CGAAAGCGACAGTCACGTGGCAAATTCGTTGGATTTGTGTCGTGGCGATG +GTGTGAAGCAGTGTATTCCGCCGTTGATGAGGTTCAAACAGTTGTATCGG +GTGAGAAAGATTATCGATTTTTAACACCCGTCTCGCCAATTTTTTTCTCA +GAAATTGGAATTCCCGTCACAAAACTTTTACAGAATGTGAATTTTCGAAG +AAATTGGAATGACACCCAAAAATTTTCTCAGAAATCTAAATTCTCGGCAA +AAGTTTTCTCAGAAATTTGAATTCTCGTCAAAAATTTTCTCAGAAATTTG +AATTCCCGCCAAAATTTAAATCAGAAATTTGAGTTACCACCCTTCTCTTC +AAGTAACAGTATTCAACAATATACAATTAAATAGCTTTTCAAATTCCTAA +AAAGTTTGCTGGAGTTATCAATTTTTGATTCAAAACTCGCTAAACCACTT +GCCCATGTAGAATACATAATCAATTCTGAATAGAACGCTATTTTATCCTA +TTTTCAGACATCAAAACCGAATAGCCTAATCGCATGTGTCATTGAAAAAT +CATTTTCGACATTTCTTACTGCTATTATGTGCTTTTTACACGATCCACAA +ACATTTAAAGACAGCAATCGAACACTGGATTCTGATATTTTTGGAGACAG +GTTTGCGGCAAACGAACTCCGCTGCCGACTCTCGAATTATCAATTTTGCA +GGCTTTGTAAGGATAAAAACGAATTTACCGAGTTGAAGAAGATCGGAAGC +TGGCAGAAAATGTCGATTTTCACTGTGGTTCGGAATCCGATCGATCGGTT +TGTATCAGGCTTCACGGATAAATGTTTAAGAGAAAATGTCTGGAAAAAGT +TCAAAAATCGATGCGCCGGATGTAAGACTGTGCGTTTTATTTGCTAAAAT +CTTTTACTTAAACAATTTTTTTGGAATCTTTTATTTTCGGGAGTTTCGAA +AGTATATTGGAGTTTTTTAGAGAAAAAAATATCAATTGAAAAAGGTTTTT +TGAAAGGGGGTAACATTTTTCTAGGCCACTAGGCTTTGTAAGGTGGCCTA +GAAAATAATGTAAAAATCTAGGCCATAAGGGTTTATTGCGTGATGGCCTA +TGAAAAAGCTAGGTCGCCATAACTTTTATCAGGGTTTAGGAAAGTTAAAA +CTTCGGGCAGCAAGTTTCGCATAATGACCTAAAACTTTCTGGAGAACTAG +GCTATAAAAATGTTCATGATGGTTTCAAAAAGCGGAACATTCGGTGACCA +TAATCTTCTAGAATTTTTAGGAAAACTAGGCCATTAAGGTTTCTTCCGTG +GTCTAGAAATTCTGGAATACTAGGCTGCCAGGGTTTCTAGGCCACATAAG +AAAGGCCTAGATACAAAAATTTCAGAACGGTCTAGAAAAGTGGATAACTA +GGCCACCAATTTTCTCTTCGGAGTTTCAAAAATATATGATTCATGATTTA +CTTTTTTAAGAGTTTTTTTTTTCAAAAATTCTAAACTTTTGACACTAAAA +TGCTTCTGAATTTTTGAAAACTTTGAAAATTAAATGTTTAAAAATAATTT +TTTGAAAATCTCTCAATTTTTTTTCTTCGAAAAGCTCAAAAAAACGATAA +ATTTCCACATTTTGGCCAGAATTTAACATGTTTCGTCGATAAAATGTACG +ATCGGATGCATAGATTCGCGCGGAATCCGTATAAAGGAATCGATTTCGAC +GACAGTCACTTTTTCCCACAATCTTGGTATCGGTAAACGCGCTCTATAGA +TAAAGTCTCAATGGAGCGCATTTGCATTTTGAAATTTGATATATTTGTCG +TTTCAGGAGATGCGAGTTCTCGTCTCATCTTGTCAAATACCAAATTTTTC +AATTAGACGGTGCAAATTTCACAAATCAACTTCTTGGACTGCTGTCCGAG +CGGGGAGTTGATGAGAATGGAATTAACTTTATTAGTGAGTCTTAATCAAG +TTTGGGTCTCACCGCGAAAACCGTAATTGATTGGGGACAATGGGACCCAA +GAGAGCCCAATGGAGCGGAATTGCAACTCCTATGAATTTTCCAGACGGAA +GTCTCCATCACCGGACCCCACACAGCACAATGGATTCCGTGGAACGGGCG +GCCGTCGAGGAAACTGTTCTGAGCAGTCCGTATTTGTTGCGAAAAATAAT +TCAAATGTATTATTTTGACTTTTTGCTATTCGGGTATAAGCTTCCTGATA +TTCCAGTTGGAAATTAGGAAAGTTCTTTATATGTATTTAATTGTTGTTAA +ATAGTAAAAATCGATAAATTGCTTGAATTTTGATAAATTACGGCCACGTC +AATTTTTAGATAATTAACGGGCAAAAAAAATCTAAAATTCTTACAAATTG +AAAAAAAAAATTTCACAAAAAATAATAACATTTAGTCTAAATGTGTTATA +GCTTAGGCTGAGGTTTAGGCTTGGGCAGGTTATTTTTTAATTTTCAAGGT +TTCTCATTACATACTTTATTTCCGAAATACAAAAAAGAAGTCTCACAACG +AGTCATCAGTGCGTGTTGGTGGGTGGGGTGGGAGATTCTGGTGGCGACCT +ACAACTCACAAACAAAGATGACAACAGGGAAAATGTACACGTGGCAATAT +GGTGAAGCTATATGTATATATAATGAATAATGATATGTAAATTTGCATAA +TACAGGAACAGTACGTGATTGCTGGGGCGAGTAGAAATGTGGGTTATTAT +CTATCGATTACCATTACACATGAATTACGCTGGATTACGGTTAGAATTTT +GTGAAAAAAAATGAGTAAAACGCTGGGGCTATAAATCTATAATTGTCGAA +GATGCTCAACGAGCCAGGAACGAGGGGAACATTGCGTCAAACTGGGCGGT +CAATTCCATTAATGATGGTGCAGCTTGTGCATTGATTGCTGCAGCAGTAG +GGACTCCATGCCTCATCACCGAATCGATAATCTTCTTTCCAGATGAGAAA +CGCTCGAGACGGGAGGCGTGTTGGAGCACACGCTGCTTCATTTTCTCGTA +CCATCCAGAGTAGAGAAGCAGAATAGCTGGTGGAAGTTGACGTTCTCTGG +AAAAAAACAAGATTTTCTCTAGTTGCCTTAACACTTGGATTTACAGAATT +AGTGAATATTTAAATCAGGGTGGGCGGCAATTTTTTTTCCGGCAAATTCA +GCAAATTGCCGATTTGCCGGAAGTTTTTAAAAGGATTTTTAAAAGACGGA +AACACTTAAAACTGTGCCTTTTTGAAATTTTCTCCCGATTTCTTTACATA +TTTTCATAGATTTTACTGACTTTTCAAAATAAATGTAGGAACATTTATAG +GATGCGTACAATTTTGCCGATTAAAATTGAAACTGTGAAATTTTCAGAAA +AAATGTGCCGAATCACAATTTGCCGAAAATTTTAGTTCCGACAATTTGCC +GTTTGCCGGAAAAAACCGTTTTCCAAGCCACGTGGATCATTTTTTGAAGA +AAAAAATACGGCAAATTTACAGAAACTTTAGTACCATGTGGATTTTTTAA +GAACTATTGCCAAAGTACGTGGAATGTTCAGAAATTTTTTTGCCTTTGTG +AAATTTTGGATTTTTCTGGATTTTCGGAAAAAAATTCAAATTTTTCTATG +AAGACGAATTTCTGATTTTTTTAAAAATTTTTCACGAACTCAAATAATAC +TCAGACGCGAAATTTTACGTTTTCCGATCAAAACACGGTACCCGGGGTCT +CGAATCGACATGCAGTATTTTTCTCGCTTTGTCATAGTTTCTCGATAAAA +ATGGATTTTTTTGAATTCAAAATCCTTAAGTTTCTACCCAGGGGAGAAAA +AGAAGACTATAAAAATTTGAAGTTAAAGTACTCTTTAAAGGCGCCCTTTT +TTCGAACTTAACCAAAAATTGTCGTGTCGAGGCCTGGTACCGTATTGTGG +TGCGAAAATCATTTAAAAAATTGCGTCGATTTTTGGTTTTTTGTTATTTT +AAAAAATGTTCAATTTTTCAAAAATAGCTTTTAGATAAAAATAATAGGTT +TTTCCTTCTTTTTTTCCGGCTAAATAATTTTAAGAATCCATTTGCCACGT +TTCATCACAGAAAAAAAACTAACTCTGTGCCAATCAAAGCGGCGGTGCAA +ATCGAAATCATTTGCTGAACGACATAGTTGCCATACTGGTGGAACAGAAG +AATATCCAACGCATCACGATTCGACTCGACATCCTTCACATATCCACTGA +AAATCTCCTCCATCATCTCGTGAAGCAACGCGGGCGGTGCAAACAAGAAC +GCTCCCTCGATGACGTGCGACGCATACTTGTCCTGTGACATTGAGAGCAA +GTTACGGAGCAGGCATTTATCGATGATAGTGTCTCTGTACATTTCCATGA +TACCCGACGATTTGATGACGTATTGGATGACGTAGTTGGCGAACTCGTTG +GAAGACAGCCGGTAGCAATTACGGACGATACACGTCATCAGAGAATGCAA +CAATTGAATACGGAATTTGAAACACGGGAGCTTGGGATTCTCGGCGAGTC +TATCGATGACCTGTTGCACGAGACGACATCCATACTTGTCCTGGCACACG +GCCATCAGTGAATCTCCTGACGAGAGGAAATGCACGAAAAAGGTCCACAT +GTCGACTGGAAGTTGCTTGACGACACGTTGAATCACGTGGATCGAGATTT +GATCATCCAAGAGCTCAGCACATTGCCGCCAGGTCGAATGTGCTGAGCTC +TTGGATGAGCTGGAAGACGTTGGAATGGTCGAATTTCTGAAATTAACAAA +TTATTGATTTATTACACCTGGAAAGGCCTAAAAAGACCAAAAATAGCCCT +AAAAATTTCGAAGAAATGGATTAATTTTTAGCTAAAACGTAATTTTTTGC +CAACTTTTCTGTGTCGCGATTTTTTTAAACCAAAATCGAAAAATTTCGTT +TTTCGATATTTTGAACAAATTTCAATTTTTTCGGGAGAATATCTTAAAAC +TTAATTATTTTCCTCTAGGAGCCATTTTGTATGTTTTTTTCATCGACAAA +AAATTTTCGTTAATGTGTGCACCTTTAAGTAGTACTGTAACTTTAAACTT +TCATTGCTGCGGAATTTTTTAAAATTGATTTTCAATGTTTTTCTACAGTT +GTCGTCCAATTTCATGCAATTTAGAAAAAAACAGAAGGAAAAAGTGAAAC +ATCGATTTTAAAAAAATTCCGGAGCAATGAAAGTTCGGAGTTACAGTACT +CTTTGAAGGCGCACACCTTTTTTGTTTTAACAAAAATTTGTCGTGATGAG +ACTGGGGACAGTTTTTCGGCGGAAAAATCGCAAAATTTCGGCTAATATCG +AAGAAAAATCAATTTCCGACCGCTGCGACACTTTAGCAAAAAATTGTGAT +TTTAGCCAAAATTCAGTTATTTTTCGGAATTCCCGTGGCGAGACGTATTT +CTCCCACCTGCAGAGCCAACTGCACAACTCGACATGCGAATTTATCCTTG +CACATGGCCAAAAGCCCTCCGTCGATCATTTGACGAAGCAATTCCCGCTG +CTCGGCCGGTGTTGCGAGCTCGACGAGCTTCTGAACGATGAAATTGCCAC +TTCGACTGTGGCAGAGCCCCACGAAGAGCGTCGTGGATTCGGTGAGCTTC +TCGAAGACGGCTTTACGAATCTGATCTTCGGAATCCAGTGGGTAGTTGGC +CTCCAAGAACTTTACGCCCGATGGATCGATTGCGAAATCGATGAGCTGCC +CGTTGAACAGCACATCTTCGAGCGAAATGAAAATCGGAGCAACTGGTGGT +GGGGTCTCATCGTCGGAAGATCCCGATCCACTGTTGAAGATGTAGCTGTC +CGAGTCGGAGGAGGTGGTGTTGCTTCCGTGGCGGTAGGGAAGAAGCTTGA +CCGGCGGCTTTGGATTCTGGAAATTCGAATTTTAAATTTTTATTATATTT +TCTATTTAAATTAGAAGTTTTTATGCAATATTCTACCTTGTTCTTGTGAG +TTTTGTTCGACAAAAGCGAGAAATCCGGGTCGAAATCGAACGACGCGCCG +AGCATGTTGTACGAGTCTGTGCTTCTACGATTCGGGGTCATTTAGACAGA +AATGAATGAATACAGGTTAGTAGACGCTGAAAAATTTTGGGAATTAGGAT +TTTCAACGGAAAAAAGAGTGAAAATGAGAGGAAAACATGTAAATTTCAAC +GAAAATCGCGAAATTACCGCGCATCAAAATTCAAAATTGAATTTTTCGCG +GTGGCCCGGGTTACGGTGATTTTTAAAGGCGCATGGTTGTTTTGAGTAAG +GTCTTGACGCGAAATTTAAAATTATTGAACATTTTTATTTTATTTTTACA +TTTTTAAAGAAAAAAATGATTAAAATTCGATTTTTTAGGAGAAATTAAAC +ATTTTCACAAAAATTGCAAGAAATCGCTCGCCAAAATTCGAAAATGAAGA +GTTTGGCGGTGCTCATGGATTACGGTGGATTTAAAGGCACATGGCTTTTT +TGTATTAGGTTTCACCACGAAACTTGGAGTTTTGATAGTTTGCTTCCCAT +TTTGGGATTTATTTTTTTAATTTTCGCTCGATTTTCACACATTTTCACTC +ATTTGCACTCAATTTTTTTGCAGAAAAATGTCAATGGATATTGTTATGAA +GTCAGATGAGCAGCCGTTCATCTCTGCGCCTGTATGCTTTAAGTAAGCTC +CAATTTTTGTTATTCTGGCCTAAAATTATTTTTTTGATTTCCAGAACGTT +CGCATTTTTCGCCAAAGCATCCGCAGAGGAATGCAAACTTCTCTTCGGGC +TCCAACGCTCGAGAAAGTTTCCAGCAGGCCACGGGGCTCTCGGAGAAGAT +CACGGAAGACATTTACCGCCAAATTGAGTAGGTTTTTCGATTTTTTTACC +CTTAAAAACTACTATTTTTCTATTTTCCAGCGTCTCCCACATTTCGTCGA +TGGTCTTCAACAGAGAAATGGACGGTGGCATCGAGAGAAATCGGCTCGGC +TCGCAAGCGATTCTGCTCGACGAGAACTCCAGGCGCCGCAATCCACAGTT +CAAATCGATCGCGCTCCGCTCTCCGCTACGATCACATCGATCATAGAGCG +TTGGCAAGGAAATCGCCTCATCTGATGCCTCCGACTTTTATCAAAGAGGA +AATTATGGATGATGAGCTCGACGAGGTGAAGGAAGAAGTGGTTTCCGTTG +GAGAAGCCGCCTTGCCCACGCCAAAAGTCGAACTGAACATGGACCATCCG +GAAAAGGACCTGATCATCAGCATTTCCGTGTATCTCGGCTACACCCGTAC +CCTTCAATACCACGAGATTCGCCTCGGACGTTTGATGAAGATCACGGATC +GGCTCGAGCTCACCGGAGATCACACGCTTCGCGATCTGAAAAACGCGTTC +TCGTGCCCCATCGATTTCTCGTTTTCCGACGATTTTAGCGAGAAAAAGCC +GTCGTTCAAGGATATGGCGAAAAATAAGTGGCCGTCGACGATGTTCTTCA +TTCATGACACGTTTTATATATAACTGGAAATAATTTAATATTTTTCGAAC +AAATTTGAAAACTATAAAACCATCTTATTTTGATAAAACTCCGCCGCTGC +TCTCTGAAGCAATGAACTGCAATCAGACATCGAAGGGATCAGCCGATCAC +GGAACCAACCTGCCATTGAGCTTCTGAAAATTAATATATTTGATTCAATT +TATTGTACGAAATCGTTTTTACCCATATCTGTGAGCTAGCAAGCTAGCTA +ATCGTGATTAATTGTTGGGTATTCCTTCGCGTTCTTCAGCTTCTCCACAA +GCAAAATTCTTGTCTGCTCATCACAAATGATGTCATCAGCATCGCTCTGA +TCTTGAAAATCCTGAAATTATAACATTCTAGAGGGTTTCTGATGAGAATA +GTTACCTTACAACCAACACCAAATACGTTCTTCATTATGGCATCCGGCAG +CTTTCCATAATCGCAGAAGAAGTGCTCGGCATCACGATCGGATGCTCTAT +TTGGGAAACGACCGATGTAGACACGAACCATGTTGAGTGGCCGAGAGAGG +GATCATCTGAAACATTTAAATGTTAACTAAACAGGAGTTTAGAAACTTTA +AAACAACCTAACCTCCGTGATAACGTAGTTAGCTTCTCGAGTGAGCTTCT +TCTTCAAAATAAAACAGAATGAAACAAGAATCAAGTGGGTAAAATTGTAA +AATGAAGTATTTCAGACATTTTCGCTAATTTTGGCAGTAAAAACCTATAA +TTTTGGTAAAAATCGCTTGTATTCTGAAAAAAGAACTCAATATTTAATTT +TTTATAACTGATTTTAAAGAGAAACAAACGAAATATATGAAAAAATCGAA +AAAATCCCTGAAAAAGGCCTCCGCAATATTGATTGTTAAAGGGACACCGA +CTGTTCATCGAATGGGTCCCGCCGCGATTGTCCATTTCGAGCGCTTTTCG +CGATTTTTTACTACTGTAAAAAAGTGTAGGAAAATGTCGAAAAATGTCGA +AAAATGACGTCACAAATGTATTTAAATACATGTTTTTATTGTTACTTGAA +TAAGGCAAAATATGTATTTAAATACACTTTAATTTTCCGACATTACTTGA +ATAACCCCATAAAAAACCCCGAAAAAATTTTTTTTTTGTGAAAATAACCA +GAAAAATACATGTTAAAACAATTTTGTGAACAAAAAACTAAAAAATATTT +TTTTTGCAGAGATGAATCCCACGTGAACGTCGAATTTCCACGCCGCCTCG +TCGAACGGAATTTCAGAAGAATCGCATGTGACACGTGCAAGGAGGCGTCG +GCTCAGTGAGTACAGAAAAATCCATGGGGATTTTTTAGAAATTCGGAATT +GATTCGATTCAATAAAATCGATTTTTATATTAAAAAAAAGGATTTTATTG +GTTTTTAAAAAATTTAAGAAAAAATTACAAAAAATCTATAAAATCGATTT +TTTATTTAAAAAATTGAAACTCAATAAAAGTTCTTTCTAATTTTAGAAAA +ATCGATTAAAAAATAATGTTTTAAAAATCAGTAAGAATCGATTAAAAATT +CAATTTCATCGATTTTATTTATTTTAAAAAAACTTAATTTTTATGAAAAT +CGATCATATCGGAGTCCAAAAAATAATTGGGAAAAAACTAAATTTTCAAG +AAAATAATCGAAAAATTTTAATTTTGTTCAAATAATTGATAAAAACTCAT +TTTTTAGGTTTTTTTTCAAAAAAAATTTGAACAAAAAACACATATTTCTC +GAGCTTTTTTTCACAAATTGACCCGATTTAAATTAAAAATTCATAAGCTA +AAAACCTTCTTTTTTTAACCAAAAAAAAAACGAAATTCCCAAAAATCATC +AATTAAATCCAATTTTTTCAGCTGGATGATCGTCGACCACGACAATCTGC +TTCCCAACTCGCCAGGCTATCTGTGCTCTTCGTGCTACAAGGAATTCTGT +TTCGACGTGAACGGCAACAAAGTGTGCCAGTTTAAGGCCGTACCGTATTG +CGATCGAAAGGACATTGGCGACGGACGCCAATTCTTCACCGAGCTCGATC +TTTAGAGAGCGAAATTTAAAATTCTGTATCATTTTTTGTTGTTTTTTTTT +ATGTTTTTCGATTTGATTGTTTAATTATCAATTTTTGGCCTGTAACTGTG +TATTTGTTGAAATTCCAAATTTCGTTTTTTTTTTAAATAAAATTTCTGTG +TGCAAAAAAATTTATCCAATAAATAAACGGAATATATCGATGATATGGCA +AGTAGGGGGAAACGGACGGGAAAATGATTATTTACAGAAAGGGCGCGCAC +CAGAGGTTTCTCGGGGGAGCCCCGAGGCGGGTGGCGGAGCTTGGCTCAAA +ATCTGTCGGAAAAATGGGTGAAATGAGAGAAGAGGTAACCTAGAAAATGA +GATGAGAAATTCGGAAGTTTTGGGAAAAAAAAATCAACTTTTGAACAATT +CAATTGAAAAATCGATAATATTGATTTTTTTAAAATTCTCTAAGATCGAT +AAATCTGGTTTGGAAAAATCAATAAAATCGAATCTTAAATATCTTTTTAA +AAAACTATTTTCAAAAATCAATAATATCGATTTTTGAACAATTTGAAAAA +TCGATAATGTTGATTTTTAAAATTAAATTTTATTGATTTTTTGAAAATCG +ATTATATTGTTGATTATTGATTTAAAAATCACCAGAATCGATAAATCTGA +TTTGGAAAAACCAATAAAATTGATTTTTACAAAAATCAGCCAATAAAATA +GATTTTTTATAATCAAAAAAATCTATTTTTAAAACCGAAAAAATCGATAT +TTCAAAAATTGAAAAAAATCGATAAATCAATAAATCAATACATAGGTGAG +CTCGTCTCGTCGATTTGTGTTTTTGTGATTGGTCCCGGCTGCTTTTGAGA +TGGCCTAGGCGTTGTTCCGTATGACATTCTGGAATTTTTTTTTGTTGGGA +ATTTGGCACGCCCACAAAATGGACGGTGGCGGAAATTTAATTATTTTAAA +TACCTTTCTGGATAAAAAAAATCGATTTTTCCACTTTTTCGAGTGAATCG +AAAATTAAAAAAAACATTCAAAATTGTTTAGGGTCTCGCCACGAAAAACT +CACAAATGCACGTTATGCGGAGTGTCGTCGGATTCCGGTGTGTTGCTACG +GATCGTGTCGTCCATGCTCAGCTTCATCGTCGATTGACGTCGCCGTTTTG +TGGTGATCTGAGCGAGATTTATTGATTTTTAAAGGCGCATGGCCTAGAAA +ACACAACACTTCGGCCACGGACTTTGGCGGCCGAGTTTTCTAGGCCATGC +TGCGGCATTTAAAGGCACATACCATGATAATGAAGATCGCCAATCCGAAA +ATGATGACCAATGCGAAAGTGATAATTACAATCATCGATGATAAGGAATA +TGATGAAGTTTGCGTGGAAATCGAATCGATGTACGCTTGTGACGTCATTC +CTGGAAATTATTGATTTTTTTTTGGAAAAAAATTAATAATTACTTACCGT +CAACAGTAACACTCAACGAGTCAATTTTGTAATCCGAAATCATAGAGTTA +TTGCGTGAAAATGCGGCTTCCAACCTGAATGTTAAAAAAAAATCTATAAT +ATCGATTTTTTGAAAGTTTTTCGAAAAATTGATAAAATTGACGATTGATT +GTTTGCCCATAGAAACAGATAAAAATCGATATAACTTAATATCGATTTTT +TTTTGTGCACCTTTAAAGAGTACTGTAACTTCAAACTTTCATTGCTGCGG +AATTTTTTTATCGGTTTTTACAATTTTTTGTACAGTTTCCGTCCAATTTT +ACAGAATTGAACAACAACAGAAGGAAAAGTGAAAAATCGTAAAAAATGTC +CGCAGCAATGAAAGTTTGAAGTTACAGTACTCTTTAAAGGCGCACACCTT +TTTTGTTTTAACAAAAATTTGTCGTGGTAAGACTGGGGACCGTTTTTTCG +GCGGAGAAGTCGCAAAATTTCGGCTAATATCGAAGAAAAATCAATTTCCA +ACAGCAGCGACACATAAAAATTAGCAAAAAATTGTGATTTTAGCCAAAAT +TCAGTTATTTTTCGGAATTCCCGTGGCGAGACCCATTTCTCCCACCTGTA +GAGCCAACTGCACAACTCGACATGCGAGTTTATCCTTGCAGATGGCCAAA +AGCCCTCCGTCGATCATTTGACGAAGCAATTCCCGCTGCTCGGCCGGTGT +TGCGAGCTCGACGAGCTTCTGAACGATGAAATTGCCATTTCGACTGTGGC +AGAGCCCACGAAGAGCGTCGTGGATTCGGTGAGCTTCTCGAAGACGGCTT +TACGAATCTGATCTTCGGAATCCAGTGGGTAGTTGGCCTCCAAGAACTTT +ACGCCCGATGGATCGATTGCGAAATCGATGAGCTGCCCGTTGAGCAGCAC +ATCTTGGAGCGAGATGGTGATCGGAGCCACTGGTGGTGCAGTCTCATCGT +CGGAAGATCCAGATCCGAAGGTGTAGCTGTCAGAGTCGGAGGAGGTGGTG +TTGCTCCCGTGGCGGTAGGGAAGAAGCTTGACCGGCGGCTTTGGGTTCTG +GAAATTCTATATTTGAATTTGAAAATCAACAAAATTCAATATTTTACCCT +GCTCTTGTGAGTTTTGTTCGACAAAAGCGAAAAATCCGGGTCGAAATCGA +ATGAAACGCCGAACATGGGGGCCATTTGGACAGAAATGAATGAATACAGG +TTAGTAGACGCTGAAAAATTGGGAATTTCAATTAATTTAGGATTTTTTAA +CGGAAAAATAAGTGAAAATGAGAGGCAAACATGCAAATTAAAATCGCGAA +ATTACCGCGCTTTGAAAGGGACTCCACCGATCGGAGAGACGAGGGAAGTG +GAGAGACGCAAATCGAAGAGACGCCTTTTCAGACATTTTTTCGTGATAAT +TTTGCATGAATTGGCAAGTAGAGCTTACACGTGATGCGAATCATTGATAA +AAACATCAATTGTTGTTTTTAGTGTACTGAAATTCCGGCACAATAGCTCT +CGTAAATCGACACAAAAGTCACTCCGTGGAAAAGTGGAATTTGGATAATT +TTGTTCTTTTTTTTAAATATGATTAACGTTTTTGAAGGCTTTTGTCTCAT +AATTCATCAAATTCAAAACCGGAATTATCCAAATTCCACTTTTCCACGGA +GTGACTTTTGTGTCGATTTACGAGAGCTATTGTGCTGGAATTTCAGTACA +GTAGAAACAGAAAATGTTGTTTTTATCAACGATTCGCATCATGTGTAAGC +TCTACTTGCCAATTCATGCAAAATTATCACGAAAAATGTCTGAAAAGGCG +TCTCTCCGATTCGCGTCTCTCCACTTCCCTCGTCTCTCCGATCGGTGGAG +TCTCTTTTGAAATTCGAAAATAAAAAGTTTCGCAGTGCGCCCGGGTTACG +GTGAATTTTAAAAGCGCATCACGGTTTTGAGTGAGGTCTCGACGCGAAAT +TAGCCGTTTTTGAAGATTTGTCGTTTATTTTGGCATTTTTGAAGGAAACA +TAAGTGAAAATAAACAATTTTAATCTTAATTGAGAGAAACTTGAACATTT +TTACAAAAATTGCAAGAAATTGCGCGTTAAAATTCAAAAATTAGAAGTAT +CGCGGCGCTCAAGCCGTTGATAACAAGCCGTTATAAACTTATATAACTAT +ATAATGTAACTCGATGATTTGTTGATGTGATCTTTAAAAAGGTTGAAACT +GCCCCAGTCGATGTCCGCGCCAGAGGCGCTGTCAGCGGCTGGTTACTTAT +ATATAAAAAATATCGGGAAAAACGCACTTCCGTTCCATTTGTTTCTTTGT +TTTTGTGACGTCACTGAGAATTATGGGCGGGCCCGTCTGCCTTCGTGGTA +AGACCCATCGTGGCGAGACATATCGTGGTGATATCCTTCGTGGCGAGACC +CATCGTGGTGAGACCCATCGTGGCGAGACCCATCGTGGTGAGACCCATCG +TGGTAAGACCCATCGTGGCGAGACCTTTCGAAAATTTGGTGGGAATTCAA +ATTTTCTTAGAAACATTTTTTGGTGGGAATTCAAAATTTTCAAAGAAAAA +CTTTGGCGGGAATTCAATTTTTCAATAAATAATTTTGGCGGGAATTCAAA +TTTTCTAAGAATAATTTTGACGGGAAATTCAAATTTGTTGAGAAACTCGA +GTAAATGCTGGAATGTTCTAGAACCTTCCTGGAGATTGGGGAAAATTTTA +GTATGTTCTAGAACCATCGGCGAACTTAAGAAAATTCTGGAAAGTTCTAA +AACCTTCTAGAAAATTCGTGAAAATTCTGGATTGTTCTAGAAATTTCTAG +AAAATTCGAGAAAACTCTGGAACGTTCTATAGAACCTTCTAGAAAATTCG +GGAAAATTCTGGAATGTTCTAGAAAATCTTGTTTGCCAAAAGCTCTCGGA +AGGAACCGGAAAAGTTTCTTATGATTTGAAGAACGTCTAGAAACGTTCAA +GAAATTTAGAAGTAGTTCCAGGAAACCTGGTATTTTTAAGTTGTAACTCT +TGAAAACTTAGTTATATAATATTCATGTTGCGTTAAAAACTTCTGGAAAA +CTGAGAAAAACGTTCCAAGGCTTAGAAATTTTAGAATTTCATCTTAATTT +CGAGTACGCCAGTCGGAGCACGCGCATTGTGCGAACGGCTGGTATACAAA +CAATAGTCAGTCAATCCCATGGATCAAATTTTGAATTTTTCTAAGACTGA +ACTGTTCCTTTGATATGCAAAATTTTATAAAATTGTCTGTTATTGAAAGA +CATGAAACAAACTTGAATTACAGTAATTCTGTAAATTCTGAACTTACTAT +TAGAATCCTTAGTGAGCACACTTGAAAACGAATGAAATAAGGTATGTGAG +AATACCGTATCAATCAGATACGTCTGACACTTCCCCAAAAAAAACTATCA +GAATATGAGTACGATATGTTATTACTAGTGTGCATCTTTCCGCGCGACCA +TCATGGCTCAGCAGGTAAGACTTCGGCGACGCCTCGAGACGACGAGGTAG +TTGACGACTTTTTTCATTTTTTAGGCAGCGCCCAACTCTTGTCCGAGGTC +TACAGGCTACATGGTTTGGACGGACAGAGTATTTGCTTTATTAGTAGTAA +CAGAAATACACATAAAATCAATTAATAAGATCCATGAAATGCAATAAAAA +TCCAGTTTTCACCCATCTACTATGTCTCTCATTGAAGAACTTTTTCAGAT +AGCTTTTGGTATAGAATATATTAAAATTCATTCTTCGATTTTCTGTGGAA +TTTCTTATTTGAAAGCTTTTTCCATACCAGACAGTCACTTCTCCAAATCC +ATCCTTGAAATTTTCTTTGGGGATATTTAAAATATCATCGAGATACCAAC +AATCTACTGGTTTTTCACCTACAAGACGTGCTCTTGAATTGAAAATGTCT +GGGATATCCTCTGGAATATGTTTTTCTCCTTTGTCATGCTGATAGTATAA +GTTGAATGTGACACTTCCATTTCTCTGGCTGTTTTGTTCAGTCAATCGGA +TTAGCTTTGCTGTGACTCTGTCCATATTGTCCGGAGTAATGAGGAGATAC +GTTTTAGGAAGTAGAACAGTCATACTCTTGGCTGGGAAGTTTCGTTGAAC +ATTTGCAATCACATTGTCCATTGGTGAATAATAGTCGTTTTCCAGTTGTT +CGTTCATTTCGAGTAACATATCCTCAGAGTATTGTAAATCGATCTCCACT +TTTTCCAGAAGAACCCTCGAAACGGGTTGGGTGTTGAATGGTACATTGAA +TACGGCATTTGAAAAGTCTTTAGACGTTTCCAAAATGAAATCCATATTTG +AACTAAACATAGTATTAAAATGACCACTGTAAGCTTCGAACCGGAACTCC +AGTCTGAAATCCTTAACATTCCACTTTGAAATAAGCTGGTATAAAAAGTC +TCTTGGGATTGAAAGGAACTCTTTTCCCAGTATTAAAACGGTGAATGTGT +GGCATGTTACCAATGAATTGAGGAAATAATTTGCTCGAGCACCAACTACG +TACCGCCCTAGTTGCTTTTCGTATATGGTCCGATAAAACAACCTGTAAAA +TGTAATCTTTATAATGTTCCTGAGCATATCATGTCAAGAGTGTCTGAGTT +TTTCATCAACAAAAACAAAATTTGATTTTTTTGTTAATTTTCTAAACAAT +CCGATTTTTGGTTTTCCGGAAAATTGAAACATCGAAATTTTCCGTTTTTC +AATATCCAATTATTCGATTTTCCGATTTCCTCTCAGAAGAACGAAAAAAA +CGAGAATAAAGTTTCCATGACATTGATCGCAGTGGGCTGTTTGTTTTTGG +GATAACTTTAAGTGAAGGAATGGAGAGACTATCTAAGTATGAAAAACACG +AGAATTCACTCTTTTGAGAACCTGTTACAATACGTGTACTGTTATAATAG +AAAAACTAAAGCAGTGATTCAAACATCTGTTCGACGGTGTTCATTACAAA +TGTATCAATTTACACTCACCAGTCTTTCATTTCCAGCGCTTTGTAATGAC +GTGGAGATATCCTGATTTGATTTATGATTTCTATTATTTTTAATTCTCCA +AATAGCTCTTCTCCCAATATTCGCAACTCATCAATTCCAAAATTGCTCAA +TCGAAGATATGCGAATTTCCCGAATAGTTCGTTTAATCTGTCCATAAATC +GATCATGCTCATCAACTCCAAAATTTTCCAACATCAAATTCAAAAACTGC +TGCGGTATCGAATTTCCACCAAAATATTGAAGTTGAATACTATGAACTAT +ACAGTTTTTATTTCTATGAATGTCTCGACATATTTGACAACCACAGCAAA +AATCATTTTTATATTCTTCAACGAGTCCATCTAAAAATCATGAATGTCAA +ATCAGTAGTTCAAAAGCCATATATTTTACTCAGAATTCTGAAAAACAGGC +GCTCATGTCAAAAATTGACTTACAATCTACTCCAATCAAGCAATCGTGTA +TGAGATCATTGAGTATAATTCGAGCATCCACTCCGCACGAAATTTCGTTT +TTCTTAATCGGTTCGACATAAAGCTTGTAGACTTTCACATTTACCGTATT +TTTTAAAAATCTGAAAATAATGTTACAGCTGAGTACAAAACATTAAAAAT +TGGTTTAATTGATTGAAATTTTCACCACTTCTCATATTGTTTCCACGATC +ACAAAAAAGCATAAAAAACTTCAACAGACTCAGAAAACACTTTTTAAAGG +CAAGTTTTGAACCACCTATTTTGATGAAAAATTGTCTGAAACACTTCTCA +GATCGACTTTTCCCTAGCTCAAAGTGATCCTCTGCCGCTAGCTCTTTTTT +AGAGTGGTCATTCGAAAGTCCGGGTTTAAGTTGAAATATTTCCCTTACCG +GTCAAAATTCTATCAACATCAAAATAAAATTGAATTTTTTGGTAGTTAAC +AGAGGAAGCGGCGAGCAAAAAAACCAGGTTTTGAACAATTGATTTCAACT +TACACGAATAAGTGTTTCACCTTGTTTTTGTTTATCTCCAGTGCATTTAT +AAGAAACGAGCAAGTGCATCGACGAAGCTGTTTTAGCGGAAGTTCAGGAT +CTACTTGATTGTTAATGAATATTGATCTTTTTCTTCAATTTTTACAAGTT +TTGAAACTGTGAATTTGTATTTTTCGATGCTCATTTCGAATTTTGTGCAA +AACTTCTTCGCTGAACAATTTATTTATCAGCTGAAAAAAGAGCGAATTAA +AGTAAAATTGAACACAATTTTGATACCTGTCTATAATTAGGGATATCAGT +CCGCTGTATTGATTTTTTTGTTTGAAAAACTGAGTTTTTGTTTATTTGCA +GCGGCAAATTTTTATTAATTTTTTTTGATAAAAGTTCATCAATGCCATCA +GTGTTGTGGCACCACGGAGTGGCGGAAGACCGAACCCTTTAAGCCTCAAG +AGGAACCGAGCAACGAAGACTATGTCAGAGGTGCAGCATGGCATCGTCTA +TTTGTATATGTATTGGAAAGAACAGAACACAGTTTCAATAAATATGTTTA +TTACTCCAAAGGGGAGATAATCAACAGAGGGGGAGTGGGTCGTACACAAG +AGAGAGGGAATTGTCTGGTGTGAGAATATCACGTCCTGCCACGCGGGTCT +TTAGTCTGAAAAGGTCGAGTGTATGAATAGTCGGGGTCTTTGCAGACTCG +TAGAATAATGTCTTTGGTCAACTACACTTTCCAAATAAAGGGAGGGAAAG +GTAGGAAACGTCGGCAATTAGTCTGACTGGGAGCTGTCAATAACGGGGGG +GGGGGGGAAGAGACCAGCAAAACCGAACCAAGAGTGGGGAGGAGAGTTCC +TTTCCTGGTGCCACCGACGCTGCAGCTCGCTCCGTATCGTCGCTCATTGA +GTCGCCGCGCAGCCAATCGTTGCTACGGGGCGCGAGTGTAACTCTCGTTA +GAGAGCGTGTGGGAGCAAGAGACGCAGAAATACAGTAAACTTCTGCGACT +CCTAACTGTATGGTAAGCTTAAGAGCTGGTCTTAAAAGGTCGAATTGAAT +GTACCCCTCGCTCGGAGAACTGCTGAAGCAGTCGAGAGCGAGACATTGTG +AAATGGGCGATACAGCAGAGCCCAGTTCACAACTCTCCGGGGGGCGGGAC +TTGTAGGCTGGTTCACGTAGTGAACTTAAGGTTTGAAGCCTTCCTTTGCC +TTTCTTGGAAGAGATTGGCGGAGTGGAAGGTCGTCTGAGCTCCTGGCTTC +TTTTCGTGAGCGTTCAGCTTCTTTTGAAGATCTAGTATTAGGTGAGCTCG +GAGCTTCTTTTCTAAATCTTGCTTCGTCTGAGCTTAAAGCTTCTTTCCTG +GATTTAATAAATTTTAAAGATGGAAGCATAGGTAGTGAGGGTTCAGGAAA +GTCGGAGTCCGTCTCTTCGTCTTTTTGTGGAATTTTGTCTTTATTGTCTA +TGTCTTCCGCTGTAACCTCAAGAGGATACAGTTGATTTAGTGATCGTTCC +AACGTGGAGTTATTGAAACGAACTCGTGCCGATTCAATGTTTCCTTCTTT +ACTCGGAATGAGCTCCACAATTTTGCCCAGAGGCCATGTGTGTCTTGGCA +ACATTTCTTGTCCGACGAGAACAATGTCTCCTTGTTTAGGATCTCGAGGA +GCATCCCTTGTATTAGTCTTTTGTCTTTCTCTCAGAAACAGGAGATATGA +TGTCGACCAGATTTGCCACAACTTTGCAACTGTTGTTTCAACTCTGGCTA +AGTGTCTTCTCGTGATTTGCTCTGTTGATCGAGCTGTCTTTGGAGAATAT +TCCATCGGTTCGTCTAGATCAACTTCATTCGGTGCATCTAGCTGAACTTT +TGGTAGTAGAAAGTCGATTGGGCGGAGAGCCGTCAGATCATTGGGATCTG +TGTTGTCTGGTGTAAGTGGACGGTTGTTAATCATCCCTTGCACCTGTCTT +AACGTGCTGGACAGCTCGAAGAAGGTTAACTTTTTTTTGCCAATAGTCTT +GCGAAGTTGGTGCTTCGCAATTCCAACAATCCTCTCGTACACTCCACCTT +GCCAAGGGGCGAATGGAGTGATGTTGTGTACCTGAATTTCGTATTTGGCT +AAAAAGCAAATCATTGAGTTGCTTGGTGCGTAAAGTCTGATGTCTTGGTT +GACCATTTGGTGGCCGAGTGTAAATGTTGGTGCATTGTCACAGTAGATAT +GGGGCGGAACACCACATGCACTGGAGATTGCTCTGAGTGCGAGCAAGTAG +TTGGCTGTAGTAGCGTCTGGAATGAGTTCTAGAATGGTAGCTCTAGTCTT +CAGACAAGTGTAGATAAGAGCATAGGCTTTACCTAGCTTGTCATCGTCTG +TCTTGTATTGTATTGGACCCAAATAGTCGAGTCCTACATGGTCGAATGGT +GCAGAAGGTACAGTTCTGCAGTTTGGTAGTCGTGTGTCGTAATTGTATTT +AAAGGGTCGTGCTTTCACCTTTTTACAGTTCACGCACTGAGCAATTGTAG +TTCTTGCAATTTTGCGATCATTTCTGATCCAAAAGTGCAGTCTTACCGTA +GTTGCCAAATAGTGTAATGGTAAGTGGGTATTTCGTCTGTGGACATCTTC +CACAATTAGACACAATCAGATGTGGAACCGGATTGGGTCCCACTATCCAA +TGATGTTTGTTTCCGTCCTCGTCTACTGGGTTACGCAATGTGTCTTGCAA +AGTGATTAAGTAACCGTCTGTAACAACTGGAGAAGTTGTGTCAAAAAATG +TCTGAATCAGTATGATTTTGTCTCTGTCTTTGAATTCGAGAATTCGTATT +GTCTTGCATTGACTGAGCATAAGAACAGTTTCCTGTTATATGACTCGGTG +ACGAGCAAATCGTGCATCTACCATTGTTGATGCAGAAGCGTTTGACTTCA +ATCGCACCCATTGTGCACGTTTGAAGTGGATGGTCTTTTTTGCAGGGAGG +GCACGGATCTCCATCGAGAAATTCTTGCCAATTCTCCACGAGTCTGGAGG +TAGTATTACCCGTCTTTTGTTGAATATGCAGGTTCAGGAGGTTTTCCTTT +AGTTTTCTGTCCCGTTGGACATCAACCAAAGTACGTCGCCAATCTTCCGC +GCAGTTGTCTTCAAGATGTTTTTTGAGGCTTACGATTGGATCGTCAGCCC +GTCTGGGTCGTATAAAGTTACGCGGAGTCAGCTCGGTATAGTCAGCTTGA +TAGTCTTGCATATAGTCTTTGAGATCTTTAATTATCATGTCTTTGGATTT +AATCGTTGAAGGTTTCTGTTTAACAGTTTCCAACAAGTCTTTAATCGTCG +ATAGTCTATGTCTCAAGTCACCAGAAATAACAATCATTTTCTGTCTGGCC +AGATTAGTCGTGGATCGTATGATATCTGTTAAATTCAAAATTGGCGTACC +TGTATCCTTTGCCTTCTCGGCGACGTCCTTGAACATCTCCTCGATGATAT +CCGGCAATGAATTGGCTTCGACCAAGTATTGAGTCAGGTCTTTGTACCAT +TTGTGAAGATCCTTGATGTAGTCCATATGCTGCTGCTTGTCACAATCGAT +ATTGGCGTTAAGCCAGTCGGATGAAGACTTATCTTTGTGCTTCAAGTCTT +CAGTCTTAAGTGCGATACGCGTCAGCAACAATGCTATGGATGATGAAGTG +GAGAATGGGGTCCTGGATTCCCTTCCCAAGCTGCCACCACCGGAGTCGGG +ATCAAGACTTCCACCTCCAAGATGAAAAGTCATTCTTCTTTGAAAACCGA +GAAACTTGTTATCTAAAATCAATAATTGGTTAAGAAGAAAGACAAAATGT +TTTGAGATTAAGTAAAAGACTTAATAAGAGATTAGGAAAGAAAGATGGGG +ATGAAGACTCAATTTGAGTGATAAGAAAAAAAGGTATTTTAATATAAGAT +TCTACAATAGGGATTGGGAAAGGTGTGCCGTGGCCTAATATAGGTTTGGT +GGTACAATAAATAGGGGAAGTAATGACCCAATATGGGTGGGGGGATTATT +AAATGTTACCTAATATGGGTGAATTATTAATTATTCCTTTGGCAGAGGAA +TGTCGGTGAGGGCTAAGAGTTCCCTGTATTGTAGGTTGATTAGTCATGTA +GGGGACTGGTGTAGAGTAATTTGGTGTAGAGTAATTTGGTGTAGAGTAGG +TATTTGGTGTAGAGTAGGTTTTGGTGTAGAGTAGGTTTTGGTGTAGAGTA +ATTTGGTGTAGAGTAATTGGTGTAGAGTAGGGATGGAGGGAATTCAACAC +CGACTGGTCGTGAATCAGATCGGCCCTGAATTGGGGTAGGGGACAATAGA +AAGAAACTACCGTGTGGCCAACTGCCCCCAGCAGGGACCTAGTTGGGATT +GTTTTCACTTGAACACGAAAACAATGGGGGTTGGGAAAGTTAATAGGGAC +AAAGGAGCAAAACGAAACGTCCATTCCCGCATCGAATTGCCTCATAGGCA +CCGCACCCAAACTCTCCTGCCGCTGCAGCAGTCAGGGAATCCGACGCTCC +ACGCTCTCCATCGCGACTGGAGACCAAGTTCTTCTTTCTTCTTATAGTCT +TTTTTTTCTAGTGGCGCCGGTTAATTACTCCATTGCCAACTAGCTGTCGG +GAAGTCAGAAGTCAAATGTCCTTGCGGACGTGGAATATAACATGTTTGAA +ATTAGGGTGGTTTAAGTTTTTTGTGAGATCCCTATGGATAAATTTTGGTT +AATTTTTAAATTTGAAAGGTTTTAAAAGATGTACAAATAATTTTTAATGA +TGTATAATTTTTGGAGAAGGTACTAGTTGAAAGATATAAAGAATTTTTAA +ATTGAAAGTTAAATTAAAATTTTGAGGGGAATTGGTGTAGAAAATTAGTT +AAAATATATTTTTGGAATTTTTGAAATTTTATAATTTTTAAGGATTTTTT +AAATTTTTTAGGAAGTTATAAAAGGGGGTAAATAAACTAACTGTTCGATC +GCCGCGTCCTCCAACGAGCAAATCCTCCATCCATCCAGAGTTCTTGTCTA +CCCGTCTTGTCGTCTATTCCTCCTTTTCTTCTTGTCTCTAGCACACAGGA +GACTGTGCACTATTGTCTTTCCGTTGCAGTCAGCGAGCTGCCAGCAACTA +GTCGCATCGTTAATGGCACCTTTGCCACTGTATTGAATGCTGTGAGCTGT +TTTAAGTGCTATTACACTATAGTCTGTAGAAAACAAGAAATTTTGTAATC +GTCTTTCGTCTTTCGTCTTGAACCATATTAACAGCCGAGATTTATTAAAT +CAAGGAACAAATAACAGCTTCAACAATGTGGTATCAGATACCGGTGAGGA +GTGGTGAGGGGGGAATTTCAAAAAATTTAAAAGATAAAAATTTAGTGATC +GAATATCGAGATATTCGATGGGGATTGTCCTCGTGCCAATTTCTTGGCGA +TCCTTGGTTGGTATCGGCGTCTGACCGGCTGGTGTTGTTGCTGCTGTTGC +TGGAGTTGTGGTGGCGGTGGTCCAAATAGTTGTGGAGCAGGAACGCGAAG +TGGTGGTGGGAACTCCTGTGCGGCTGGTACGAGTTGTGGTGGAGGTTGCT +CTTCGGATGGTGGTGGCGTGTGAGCATTGAATCCTCCAGAGACTTCCATC +GCGAGGGAGATCCTGTTGATCGCTGCGTGCACCACATCTATCTTGTCGTA +CAGAACGACGTGATCCGCGGATTGGATCCACACTTCCTGGGAGCCAGAGC +CTTGTGGAGCCGTCGCAGTCGTCGGGCCATTTGGGTCGCGGCCGGTCTGG +GGGGCTGGGCCCGTACTTCCTGGGAAGTTGAGAAACCAGTCTTCGAAGAA +CTCGGATGGAGATGTGTCGTCGGTGAGCGTCGGGTCGAACGGTCTGAGAA +GATTTTAGAAAATTAATAATAGTATATGGAAAAATTGGATAAATTTTTAG +AATTTTGAAAGAATTGATTGAAAATGTGTATAAATTGAATTTTTTAGAGA +AAAATGATAAAATTTTTTAGAGAAAATAATAATTTTTAGATAATTTTTAA +ATAAAATCGAAACTTCCTTTAGTCTGGCGCTTCGTGGATGGGTAGGCTCC +ACTCAGCTGTCAACTTACGTACTACCAATGAAGAATTGCAGGATAATGTG +CATATTAGATGCAAAACGACGAGAAATAGCGAATAATAAGTCGGGGAAAG +TCGAAATTGTGCTCTGGGAGCATGAGTTTGCCAAACTCACGTCCCTCGGC +GTCTGTGGGCTCGCGAGCCGCCCTTGTGTACGATTTTAGGGGTTTTTTAA +TTTTGAATTTTTAGAATTTTAATTATTTTTAAAGAAAGTTTGGTAGAATA +AAGGGACGGGGGATGTTTAAATAGATGTGTACCTGTCGTCTATCAAGTCT +TCGACTATCGAAAACGTCGAGTCGTCGAAGTTCATATTGAATCGTCTCTT +CACGGCCCGGCGGGGGGTACATGGACGAGAATTCTCTACCGTATTCCAAT +TTGGCTGACTGCGTGCTCAACGTTGAATACTCAGTTTAAAGTTTCGTACA +CCGTTGCGTACTGCACAGCGCGCATTTTAATTGACGAAATTTCGCGAAAA +TTAACAGAAGATTTTTTTCGGAATTATAGAGCTGAAATTGAAAAAAAAAC +TATCAAATTTTCATCGAATTTGTGAAAAATCGTAAGTATGAAGATCTTTT +CTTCACTATATTCAAGGAAAATCGATATTTCGCTTTTCACAGACGAATGA +TGTCTCATTTTACTCGATGAAAGTTTCTGATGAGCTGTTTTTATCGATTT +TTGAGCGATAAAAATGCGATTTGTTGATAAAATGGATCAATTATATAAAG +AAACAACATATATTGCTCTGAGATTACTTTTTGAGAATCAATTCTTTATT +TTTCGGTCATTTTAAATTAAGCATTAAAATAAAAATATTAGAAATCATAA +TAAAAAAAACAGAAAATCGATATATTACTTTTTCTTCGGAATTTCACGAC +TTTTTTGGACGAATTTTATTCTGTAAACTTTCTTCTTCGAATTTGTGTCC +ACGTGGCTTTCAGTCGAAGAAGATTCTGCAGCACTCCTTCTTGCTTGCCC +ACAACTTACTCGAATTTTCTAAAATTTTTAACTTATTGAAATTGTCATTT +CACCTTTACACTCACTTCAGCTAAACTATTACTGCATTTCGGAAGTTGAT +AGGATACTGGTGGAGCAACAAGTGGATGGCTTCTAGTGATTGGCTGGCTT +GTCGAGCAAGTTTGTGTGATTGCCTGAAATAATTTTTGATTTCAATTTTG +AGTTGATTTAAAGCAGTGAACCTACCACCGGGTTCGGACGAGAAAGAGCA +TTACTCGGTAGACCACGGAATCCAATTTTCGTTGAATTGCCTCCAAATGC +AATAGAAGTTTGTACGTTTTGTGAGAAGTCGGGCTGAAAATTTTCAAAAT +TTGAAACTTTTCGAGAAAAATAAAAATCTCACCACAGCATTTCGAGATTT +TGTCGATTGTGGAAGCCTTTTCCTGGAGCGAAAATTGATTTTTTTTTTCG +CTAAATTTTTTCTTTTTTGGGCAGCCGTGACGTCCCGAATAACTGCTTTT +GGGTCCCGAAGATCATTTTGCGAAGAAATTGGCAGAACTGTTGCATCTTT +TGGTACGATGGAAAGACCGGGAATGGACGTGTTCTGAAATAGTTGTGTTT +TTAAGAATGCAGAAATGTTTTTCTGTACCAAAATTACCATAGTCATGTCA +TTCATGATGTTACGACACATGAGCTCTCTCAGAACATGGATGTAACGCCT +TTTCTTGTCCCGGTAATTGCAAAATCTCCTCTCAAGTGCATTGAAAATCG +CGTGGACAGATTCAACTCCTTGTTCTGTGATCCTTCCAATGTTTCTCACA +TCTTTTGCCATTTGTGGTGCATGGTAGACCAACAAGTGCAGCTTTAAAAT +AATTGTTTCTTCGGGAACCGCTACTTTCAAATCCTCCACAAATCCGCGAA +TCGAATTTTGAAGTATTAAGACGTCGGAATCATTTAAAAACTTGTTTCCC +GAAAGTGACATAATAGTTGAAAGCTTTCCCATTGCTGATTTCAATCCGAG +CAACATTGGGCATAAATTTGGGCCAAAAATGTTGAAAGTCTCCTCTACAA +CAGCCGGCGTTAGCAGCAATTTCAAATGGTTTCCGCAAAATGATTGGAAC +CAAGCCTGCTTGTCCGCTCCAAACTTAGCCCAACACTGTCCCATTTTTTC +AAGTGTTCCTTCGGGAGTACCATTCACAATTGTATCGAGCAACAATTTTT +CCGATTGAAGTGCTTTCAGTTCAGCATGCGACTCCAATTTCATCTTTCCG +GTGGCTCCTTGATACTTTTCTTCCGCACTTTTAATTAGGTTAACAGCGTT +TTTTAGAGTTGCTTTTCGTGTTTTCAGGATAGGAAAAGAAGTAGTGTTAT +CCAAAGTATCAGAATATTTCCAGAGGGGATTGAAGATATATTTGTCAAAA +ATACCCATGATAATGTGCAGAAGAGGAATCAAATAGAACATGATCGCAAC +GTGTGGCAGAAGTGGAGTACATCCTTTGCGAACACCCAAGTCGCCATTTT +CACAACAAGCTTTGTAAAGATCGATTGTTCGTGGGTGGAATGTTTCATCA +ACATTCATATCCTTGATTTTCATCCTCTCTTCAGCTCCCCGTGGATTCTG +TGCAAAACATTTGAAGCAGAAATTGTGGGATGAATGTCCTTGGTGTCCAA +GAATATCAGATTGAAACTTGCAATCTCCAGTTGCAATTTGCACAATTTTT +GCGGTTTTTTGAACTCCTTTGTCCAAATATCAAATTTTCGTTAGCTTGCC +AAGCTGCTCAAGAACGTCCGGAATGAATTTTTTCAGAGACGAATAATTGT +CGGATCCGTCATATACTGCAATTACCATAACGTGTCTCGAAGAATTCGGT +CGAGATACGTTTCCGATTACCAATGCCAACTTTGTGCTTCCACCTCCAGC +GTCACCAACGACTCCAATCTTGATTACTCCTTTCGTGTATCCGTCGTCCA +CAAATTGATTTGAATTGCATAGAAGCTCTATTCGATAGGCTAAAACTTCT +GCAATTTTCATGCACTGCACAATGGTAATCACTTTTCCTTTATTGTCGAA +CGAAGTGGAAACTTTGAAACTGGAGATCATTGATAACTGGATTGACAAAT +CTCTTGTGTTCTTTACCGATGGAAGCAAATCATAGCCAATGGCATTAGTC +AAATAGTTTTTGATTTTTTCCATCTGACTTAGAGATAATCCGCATTTTGA +TAAAAAGTCAACGGCCTCAAAGTTTGAAAGCTTGTTTTTGTAGCTTTGAT +TCTCTTCTGAATTCAGGAATTTTGTGAATTTTCGAATAAATTGTCCGACG +TCATCCTCGAGGCAGATTTCGTGTTGAAGCAAGTGAAGAGCTTTGCGAAA +TCGATTTTTGATACAACTTTTGCTTCTTAGATTCGAAATATTAACTTTAA +AAGCTGATTTTTTAAGGTTTTCAACTTCTTCGGCGTGTCTTTGTAGACTC +AGAACCATAGCTTTGCCACTTTTCTTCACATCTGCACAGCTTCTCACCAA +TCGACCTTCTATACCACTGACGATCGTTCGTATATTGCATACTTCCATTT +GCAGCGAAGAATTAGATGCTCTTATAGTGATATTTTCATGGCGGACTATT +TGCATTTCTTCCGAAAACACCGCAAACTCATCAATCCGCTTTTGTATTTC +TTCTGATATTTCATTTTTTTCATTTTTCAGTCGTTCGATCGTTAGTCGGA +GCATTTTGATCTGCGGAATTTGCTCAACATTGGAGATTATTCGAACCCTC +GGTGTACTGAACGAGTTTCGTAAAGGTGTCGGTGGAAATACGGGATTGGA +GAATCTCAGCAAAATCATATAATATTAGTTTTGAAATATTGAAAAAAATT +ACATTGTGAGAAAAAGTCGGAATTTCGTCACTAAAATCCATTTCCACGTC +TCTCGTCAGAATTCCTTCATCCATATTGAAACAATTTGACGACCTGCATG +TAGTTGCGGAGCTACTGGAAGCAATGTCGGGATGGTGGGAGTTTCGATCT +TCTGAACTGATTTCCTGATTAGCCTGTGGCGACGAGCTGCACGTCTGAAA +ATCACGTTTTTGAAGTTAGAACAAACTACTCCAACTTAATTAAAGTTGAC +AAAATTGAGCTGAACGAACCTCCACTTTCGAATTGTTCAGTTCTTCCTCT +TCAGTTTGATCTTTTGAAACTCCATTAGCACTGTTCCTTGCTCTCTGGGC +ATTTGCTAAAAGAAGGCCTGCACAAGATTTTTCTTTTCTTTTTTGTTTGA +AGTATACTTTTGTCATCTGGAAATATTGCATGAATATTATAAGGGAAACA +ATTTTTAAATATCGATTTTCACGAAATTTGAAAAAATCAATAATTTGGGC +GCATGATATTGAGCTGAATGTTTCGAATTTAGAATCAGCATGCTTTTATT +CATATTTTAGGATCTTTTTAAAAAATCTGGACCAACAGTTTTTGAAAAAA +AAATACTTTTCGTTCAGAAATGTACTGATTTTCCACTGATTTTCACGAAA +TTTGAAAAAATCAATAATTTAGGCGCATGATATTGAGCTGAATGTTTTGA +ATTTAGAATCAGCATGCTTTTATTCATATTTTAGGATCTTTTTAAAAAAT +CTGGACCAACAGTTTTCGAAAAAATTCAATTTTTGTTCAGAAATGTGAAT +ATTCACTAAATCGAAAAAAATAATTGCAAAATCCGTCGGCTGAACATTCA +AAACTTATCAATTTGAAATCAGCATATTTCAGTGTATAATTAAAAAAGGT +TTCAAAAATTCTGAGACCAATTTTTGTTGAGAAAAATAATTTTTCGTTCG +AATTATCGATTTTTCACGAAATGCCAAAAACAGTAAACTTGGGCCCATGC +TAAAAGCCTGAATCTTTCAAATTAAAAACCAGCATGATTTTTTCTATATT +CTAAGACGTTTAAAAAAAATCTGGACCAACAGTTCTTGAGGAAAGTAATT +TTTTATACAAAAATGTGCTGATTTTTCACTAAATTCAAAAAAATAATCAA +GTTGGGCCCATGCTATACACCTAAATCATTAAAATTCAGAACCGCCATGT +ATGTATTTTTTCATACCATAGGCTCTTTAAAAAAAATCTGGACCAACAGT +TTTTGAGATATGTCAAAAAAAACAACTCACTTTTTGACGTTTTTCGCCTT +TTCGCGGATGATGCGGTCGATTTTTGCGGCGATTTGTGGTCTTTCGCTGA +AAATATTATTTTTATTTCAATTTTTAACGAAGAAAACAAGAAAAAACGAC +GAGAAAACATCAAAAAACACGAAAAAAACGTCGAAAAACTCCCGCAACCT +CATGAAAAAAAATAAAGCACTGCAGCCGCGGGACTAGTTTTCGCAACTTT +CTAGGCCATGTCCCGTTCGCCGTGCCGTGTCTTGTCGTGGTGGTGTGAAG +TGTGTTGGTGTGAAGCGTGTGAGTACGGTAGGCTGCTGCAGATGTGGTGT +GAGCTGTAAAAAATCGAAATAATTCAAGAATTACGACGATTTTCGTATTT +TGAGAAGAAGAAAATTTTAAATCACGAAAATTCGAGAGAAAAAATGATCA +AATTCAGTCTCAGAAGCGAAAAATGAACTTCTTTGTCTGGAAAACAGCGG +TTGCTCGTGAGAAAAAATGTTTAAAATTGAAGAAGATCAACAAAAACAAA +AAAAGACACGTAAATGCAACTTGTTTTTAAAGGTCGTGGCCGCGTCGCGG +TCGCGCCGCGTGCGCACTTTTGTAGAAGACTCCGCCCCCTTTTTTCTTGG +CGCCGTGCCAATTTTTAGAAGGAAGAGCGTTTTTGGTTGAAATAATCGAT +TTTCGACGAGAAAATAGACTAAATCAAGTATGGTGAATCTGATTAAAGGC +ATATTAACTTTGTAAACTGCACAATTAACTAGAAATCGTACTATAACCGA +ATATAGTCAAAGGGTCGCTCAGTTCCCCCGGTCTAGAGTCCCAGGTTCAT +CTTTCGAGCTGTTTCCTGAGAAATTGTACAGGATTTGTCTGAGTCGTGAA +CTGAACACGCTTTTCAGGTAAACGTCGTCTGGATGTCCTGGAAATCAAAT +GTTAGTCTTTGTATCGATGGAAAACATGGTTTTCTCAAGGAAATAGTCGG +AAAGTCGGTCGATGCACCATGTTGTGGCACCACGGAGTGGCGGAAGACCG +AACCCTTTAAGCCTCAAGAGGAACCGAGCAACGAAGACTATGTCAGAGGT +GCAGCATGGCATCGTCTATTTGTATATGTATTGGAAAGAACAGAACACAG +TTTTAATAAATATGTTTATTACTCCAAAGGGGAGATAATCAACAGAGGGG +AGTGGGTCGTACACAAGAGAGAGGGAATTGTCTGGTGTGAGAATATCACG +TCCTGCCACGCGGGTCTTTAGTCTGAAAAGGTCGAGTGTATGAATAGTCG +GGGTCTTTGCAGACTCGTAGAATAATGTCTTTGGTCAACTACACTTTCCA +AATAAAGGGAGGGAAAGGTAGGAAACGTCGGCAATTAGTCTGACTGGGAG +CTGTCAATAACGGGGGGGGGGGGGGGGGGGGAAGAGACCAGCAAAACCGA +ACCAAGAGTGGGGAGGAGAGTTCCTTTCCTGGTGCCACCGACGCTGCAGC +TCGCTCCGTATCGTCGCTCATTGAGTCGCCGCGCAGCCAATCGTTGCTAC +GGGGCGCGAGTGTAACTCTCGTTAGAGAGCGTGTGGGAGCAAGAGACGCA +GAAATACAGTAAACTTCTCCTAACTGTATGGTAAGCTTAAGAGCTGGTCT +TAAAAGGTCGAATTGAATGTACCCCTCGCTCGGAGAACTGCTGAAGCAGT +CGAGAGCGAGACATTGTGAAATGGGCGATACAGCAGAGCCCAGTTCACAA +CAATCAGACTAACTGGCACTTACGTATAAATAATATTATGGGTTTCGTCG +CTCCCCCTCCAAATGTTTTAAAAACGATTTAAAACATAATTTATACTGTT +TTTTTTTACTACTGACCAATGAAAGTTCATGGTTGATAACATAAATCCCC +ACATTTTCAGAAAAATGGATAAGCAGAAAAAAAGTTATTAATGAACTCAT +AATTTTCATCGAAATTAGCCAGCTTTAGTTTCGGTGTATTACCAGAAAAC +AAATTTAAGTGATCTTACTCTCAACTCCAACAGAGTATCTAATTGATTAG +ACACATTTTTTACAATCTTGCTGACAATTGTGCGATTTTTGAAGATTTTG +TCAATAAGCACGTGATCCTGAAAATAAAAAAGTGAGTAATTTTACGCTCG +CGAAATTGTGAGCCAGTGGCTGCCTAGATTTTCAATTTGTTACATTATTT +TGCAGCCATACGTGGACCATAATTCTGGCGACCACTTTACGGCATTATAA +TTTTTTGAACTGCAAACAAAACATGTAAATATACATATATGTATACTAGG +AATTCGCACTAAAGTAAATTATTGAAATTCGTCAACTTTTATTTTCATTT +CTTATATTTTGTCTAATTTTCAACGAATTATTGAAAATGAAAATTTTTAA +TTCACGTCAAAATTAGTGAACGCGGTGAACGGCGACGGCGAAGGCGGGCC +GGCGTGAGGCCGGCGTGAGGCCCGCGTTTCGCGCCTCACTTAGCTGGAAA +CCCTAAGTTTCTCCAATTAAATTACAGGTGATATACATTTTTCCTCTTTA +GGATAAGAAAAGATCATTTCCTAAGCCTGACAATACAAAAATGTGGTTCA +CGTTTTTATTTTTCATAACTTAAAAAAATACTATTAAAATGAGGGCATGT +AATACACAAATACCGGCAAACGGTGTTTCAAATCAAATATTGAAGAAAAA +ACAAACAAAATATGATGAATTCTCTTTGTTCTTACCAAAATCGAAACCAT +CATTTAAAATTTAGCAATTTTTTACAGTTATATTTGGTAATTACGGCCCA +TTTTCTATAATATAAAGAAACAATTGAAACTGATCCACCCTGTCCGCCGA +CTATTGGGCATCGACAGATGGTGTTCTCAAAATACCAAAAATGGGTGGAA +CTCTCTCACGAGCGCTCTGCAGGTGTCGTTTTCCTTTGCAAAGGACTGCC +CTTCCATTCATTTGTGTCTCTATACAACATCTCATCTGTCATGCAAACCG +ACATTCCCAACATACTCGAAATCCCAATTGATGTAAGTTCTTGTTTCTTA +AACTAAAATTAGCCAACTATTTTCTAGAAATATCAATCTAGACCGAAATA +CCTTTTACTCTTGAATGATATCAAAAGTTTAAAAATGTTTTATCTGGTAA +ATATATTATTCAGTCAATAATAACAGAGCAATAATTTCCTATTTGTAAGA +CGGTTTGCCAGCCCTACTGATGTAAGCTAAAGGTCCTACAAAAAATCCCT +GAATTTTGGGTCTCCTTTGCTAACTACAAATGGTAGGCAAAAGAACATAT +TTGTGTACAAAAAAGTATGCAAGCAAAAGAGACAGCGTAGGCAGGCAAAA +GATTCATAAAAGAGAGATAGCCCCCGCGGTCAAATCATTCTACTAGTGGC +TTGAAGCCTCTGTTGATGTGATAAAAGTAACATTTTATATAGCATTGTGT +TCGATTTTTCTAACAAATAAAATGCATATCATTACTTATACAGTGCGTGC +ATGTTCTTTATGCCACCCCCAAAGTTTTTAATCATGGCAGTTTCTGTGTA +ATTTGGTGAGCAAAAAGTATTGAAAACCTATTCACAATGAAAAACCTAAA +CTCAAGAAATGTTGTGATTATATCTCGAAAATTGTGAAAAATAAGACAAT +AAATCAAAAATTGGCCGTGCAACTTCTATATGCCACCTCGGATTTTTTTA +TGATTTCTTATGATTTTAGACTATTAACTTATTATTCAAATAAAATTCGA +TTCAATTCGTATACATTTCAGTGCTAAATAACTATTCCTGTAACTTCTCC +CAAAGACGATACGATGATCAAAATATGGGGATATGTTCTAGATACTAATT +AAACATAGTATGTGAATATTGGTTCACATTGGTGGATTTTTAGCGTCGCC +AGAGGGACATATTTGGTGTCCCCAGCCGTTTTTGGTGAAAACACTATTTC +TCTTTTTTTTCTCGGTGCCAATCAAGTTGAGAAATTTCAGGCATGCTCTC +GAACCTTCAGGAAAGATCGAATACTGCAGAAAACCTGTAGACTAGGAAAA +CAACTTGGAAGTTTCTTAAAAGTTTGGAATTTTACTAGAATTTTCTGAAA +AATTTCAGAAAAAAAGAGACTTTCAAGAAACTTCTGAAAACTTCGAGAAA +GTTCTGGAATGTTCCAGATTTTTCTAAAAAGTAACAAATTCCGTTCAGAA +CAATGTTTTGAATATATGCCAGAATCTTACTCAAGTAAAATAATTTTTTT +TAAATTCTGATACTCCAAAAATATTATGAAATTTCAAAAAAAATTAAATT +TAATAGACGTTTGCAATACTATTGAAGACCTGAAAAAATGTTAGAAATTT +AGAAATTGTTCATTTTGAAGTATTCGAAATACTGTTCGATTATTCATGAG +ACTGATAATGTTATCAGTTTTTTTTATTTGAAAGCTTTCTAAGAATTTCA +TATGATTAGAACATCGAAATAGTTTTACTTTAAAGAAAAAATTGCTCCTG +GAAAATTTTAACTGCTGAAAACTTGGTTACAGAAATTTTATGATGCTCAA +ACACTTCCTGGAAACTGAAAAAAAAAAACGTTTCATCTGCAAAGAAAAAC +TTAGAACAAATTTCAATAATTTACTTTAGTGCAAATTCCTAGTATACATA +TATGTATATTTACATGTTTTGTTTGCAGTTCAAAAAATTATAATGCCGTA +AAATGGTCGCCAGAATTATGGTCCACGTATGGCTGCAAAATAATGTAAAA +AATTGAAAATCTAGGCAGCCACTGGCTCACAATTTCGCGAGCGTAAAATT +ACTCACTTTTTTATTTTCAGGATCACGTGCTTATTGACAAAATCTTCAAA +AATCGCACAATTGTCAGCAAGATTGTAAAAAATGTGTCTAATCAATTAAA +TACTCTGTTGGAGTTGAGAGTAAGATCACTTAAATTTGTTTTCTGGTAAT +ACACCGAAACTAAAGCTGGCTAATTTCGATGAAAATTATGAGTTCATTAA +TAACTTTTTTTCTGCTTATCCATTTTTCTGAAAATGTGGGGATTTATGTT +ATCAACCATGAACTTTCATTGGTCAGTAGTAAAAAAAACAGTATAAATTA +TGTTTTAAATCGTTTTTAAAACATTTGGAGGGGGAGCGACGAAACCCATA +ATATTATTTATACGTAAGTGCCAGTTAGTCTGATGGCATTGATGAACTTT +TATCAAAAAAAATTAATAAAAATTTGCCGCTGCAAATAAACAAAAACTTA +GTTTTTCAAACAAAAAAAATCAATACAGCGGACTGATATCCCTAATTATA +GACAGGTATCAAAATTGTGTTCAATTTTACTTTAATTCGCTCTTTTTTCA +GCTGATAAATAAATTGTTCAGCGAAGAAGTTTTGCACAAAATTCGAAATG +AGCATCGAAAAATACAAATTCACAGTTTCAAGACTTGTAAAAATTGAAGA +AAAAGATCAATATTCATTAACAATCAAGTAGATCCTGAACTTCCGCTAAA +ACAGCCTCGTCGATGCACTTGCTCGTTTCTTATAAATGCACTGGAGATAA +ACAAGAACAAGGTGAAACACTTATTCGTGTAAGTTGAAATCAATTGTTCA +AAACCTGGTTTTTTTGCTCGCCGCTTCCTCTGTTAACTACCAAAAAATTC +AATTTTATTTTGATGTTGATAGAATTTTGACCGGTAAGGGAAATATTTCA +ACTTAAACCCGGACTTTCGAATGACCACTCTAAAAAAGAGCTAGCGGCAG +AGGATCAATTTGAGCTAGGGAAAAGTCGATCTGAGAAGTGTTTCAGACAA +TTTTTCATCAAAATAGGTGGTTCAGAACTTGCCTTTAAAAAGTGTTTTCT +GAGTCTGTTGAAGTTTTTTATGCTTTTTTGTGATCGTGGAAACAATATGA +GAAGTGGTGAAAATTTCAATCAATTAAACCAATTTTTAATGTTTTGTACT +CAGCTGTAACATTATTTTCAGATTTTTAAAAAATACGGTAAATGTGAAAG +TCTACAAGCTTTATGTCGAACCGATTAAGAAAAACGAAATTTCGTGCGGA +GTGGATGCTCGAATTATACTCAATGATCTCATACACGATTGCTTGATTGG +AGTAGATTGTAAGTCAATTTTTGACATGAGCGCCTGTTTTTCAGAATTCT +GAGTAAAATATATGGCTTTTGAACTACTGATTTGACATTCATGATTTTTA +GATGGACTCGTTGAAGAATATAAAAATGATTTTTGCTGTGGTTGTCAAAT +ATGTCGAGACATTCATAGAAATAAAAACTGTATAGTTCATAGTATTCAAC +TTCAATATTTTGGTGGAAATTCGATACCGCAGCAGTTTTTGAATTTGATG +TTGGAAAATTTTGGAGTTGATGAGCATGATCGATTTATGGACAGATTAAA +CGAACTATTCGGGAAATTCGCATATCTTCGATTGAGCAATTTTGGAATTG +ATGAGTTGCGAATATCGGGAGAAGAGCTATTTGGAGAATTAAAAATAATA +GAAATCATAAATCAAATCAGGATATCTCCACATCATTACAAAGCGCTGGA +AATGAAAGACTGGTGAGTGTAAATTGATACATTTGTAATGAACACCGTCG +AACAGATGTTTGAATCACTGCTTTAGTTTTTCTATTATAACAGTACACGT +ATTGTAACAGGTTCTCAAAAGAGTGAATTCTCGTGTTTTTCATACTTAGA +TAGTCTCTCCATTCCTTCACTTAAAGTTATCCCAAAAACAAACAGCCCAC +TGCGATCAATGTCATGGAAACTTTATTCTCGTTTTTTTCGTTCTTCTGAG +AGGAAATCGGAAAATCGAATAATTGGATATTGAAAAACGGAAAATTTCGG +TGTTTCAATTTTCCGGAAAACCAAAAATCGGATTGTTTAGAAAATTAACA +AAAAAATCAAATTTTGTTTTTGTTGATGAAAAACTCAGACACTCTTGACA +TGATATGCTCAGGAACATTATAAAGATTACATTTTACAGGTTGTTTTATC +GGACCATATACGAAAAGCAACTAGGGCGGTACGTAGTTGGTGCTCGAGCA +AATTATTTCCTCAATTCATTGGTAACATGCCACACATTCACCGTTTTAAT +ACTGGGAAAAGAGTTCCTTTCAATCCCAAGAGACTTTTTATACCAGCTTA +TTTCAAAGTGGAATGTTAAGGATTTCAGACTGGAGTTCCGGTTCGAAGCT +TACAGTGGTCATTTTAATACTATGTTTAGTTCAAATATGGATTTCATTTT +GGAAACGTCTAAAGACTTTTCAAATGCCGTATTCAATGTACCATTCAACA +CCCAACCCGTTTCGAGGGTTCTTCTGGAAAAAGTGGAGATCGATTTACAA +TACTCTGAGGATATGTTACTCGAAATGAACGAACAACTGGAAAACGACTA +TTATTCACCAATGGACAATGTGATTGCAAATGTTCAACGAAACTTCCCAG +CCAAGAGTATGACTGTTCTACTTCCTAAAACGTATCTCCTCATTACTCCG +GACAATATGGACAGAGTCACAGCAAAGCTAATCCGATTGACTGAACAAAA +CAGCCAGAGAAATGGAAGTGTCACATTCAACTTATACTATCAGCATGACA +AAGGAGAAAAACATATTCCAGAGGATATCCCAGACATTTTCAATTCAAGA +GCACGTCTTGTAGGTGAAAAACCAGTAGATTGTTGGTATCTCGATGATAT +TTTAAATATCCCCAAAGAAAATTTCAAGGATGGATTTGGAGAAGTGACTG +TCTGGTATGGAAAAAGCTTTCAAATAAGAAATTCCACAGAAAATCGAAGA +ATGAATTTTAATATATTCTATACCAAAAGCTATCTGAAAAAGTTCTTCAA +TGAGAGACATAGTAGATGGGTGAAAACTGGATTTTTATTGCATTTCATGG +ATCTTATTAATTGATTTTATGTGTATTTCTGTTACTACTAATAAAGCAAA +TACTCTGTCCGTCCAAACCATGTAGCCTGTAGACCTCGGACAAGAGTTGG +GCGCTGCCTAAAAAATGAAAAAAGTCGTCAACTACCTCGTCGTCTCGAGG +CGTCGCCGAAGTCTTACCTGCTGAGCCATGATGGTCGCGCGGAAAGATGC +ACACTAGTAATAACATATCGTACTCATATTCTGATAGTTTTTTTTGGGGA +AGTGTCAGACGTATCTGATTGATACGGTATTCTCACATACCTTATTTCAT +TCGTTTTCAAGTGTGCTCACTAAGGATTCTAATAGTAAGTTCAGAATTTA +CAGAATTACTGTAATTCAAGTTTGTTTCATGTCTTTCAATAACAGACAAT +TTTATAAAATTTTGCATTTCAAAGGAACAGTTCAGTCTTAGAAAAATTCA +AAATTTGATCCATGGGATTGACTGACTATTGTTTGTATACCAGCCGTTCG +CACAATGCGAGTGCTCCGACTGGCGTACTCGAAATTAAGATGAAATTCTA +AAATTTCTAAGCCTTGGAACGTTTTTCTCAGTCTTCCAGAAGTTTTTAAA +GCAACATGAATATTATATAACTAAGTTTTCAAAAGTTACAGATGTTTTCG +GCCGAAATTAGACTATTTTGCAATATTTTGCGACTTTTTGCTGAAAAATG +GTACCCATCAGAGATGTGCGGCATGTGCCGAACGGCATGTGCCGATGTGC +CGAAAATTATTCCACTCGGCACATCGGCATGTGCCGACCTTTTTTGTCGG +CACATTTCGGCACATTTCGGCATATTCGGCACTGTCTGGAATATGTACCA +AAATTTATTTTTTAATTTTAAAAATGCAAAGAAACTTCAAAAAATTGATT +AAAATTTTCGGAATTTATCATTTCAACTTATAGTTTACTAACTTCAATAT +TAAGACAAATGCACTGCATTTTTTATGGGGTTATCCAACTGAATGTTCCC +GCTTTTTCCTCCTGTTTCCCCCCTCTAGTCGCGATCCAACTGGGAGGAAA +AATGCATTTTCCCCTCGTTTTCGCATTTTTTAGCTGCGAAATTTCAGAAC +TGAGCTTAGGGTGGGCATTTATAGACTTTTTTTAATTTTTTTTGACCAGA +AAAAGTTAAATTTTTATAGTTATTTTACCAATTGGACCTTAAATTTGAGC +TATGATATCTTTGTGGTAAGCCAGAGTCATGGTGAGTGATCAACTACAGA +GTTGTAGCAAATTTTCTGTTTAAAATTTTGTTAGTTGATCAATTCTTGGT +ATCATAGATTTTCACAGACTAACATAGCTGTGAAACTGGACAATTTCTTA +ATGTAAATTGCGTACACGAGATTTCTCTATTTTCCGCGTGGAGTACTAAT +ACCAAAAATTGATCAACTAACAAAATTTTAAACAGAAACCTTGCTACAAC +TCTGTAGTTGATCACTTACCATGACTGTGGCTCACCACAAAGATATCATA +GCTCAAAGTTAAGGTCCAATTCATTTTAAAAAAAACTATAAAAATTTAAC +TATTTCTGGTCAAAGAAAGCTGGAACCGCATTAAAAATTATGTTATTTTA +GTATTTCAACAAATGTCTAACTGTGAAAATTAAAAGTAAGTTTGAAAAAA +TTTCTTTAAAACATTTTTTGATAATTTTTTCATGTCCTGTGCAGATTTCA +AATTTTGAAACAATACTTTTAATTCTCATATATCTCCGTTGAAAAATTTT +TTATGACAAAGTGATCAATTACAAAGTTGTACTTTGGATTAAGAAAAAAA +AACTTTGTAGTTGATCACTTTGTCATATAAATTTTTTCCACGGAGATATA +CGCATCCGAAGTGAATGAGTTTTCACTATCAATTCTACTAAACCCTATGT +TTTTGTCTGAAATCGTGTTCAGAACATCCAAACTGAAAAAAACAACAAAC +ATTGCAAGTAATTTTTTGTTATTTATTTAAATTGATGAACTTTTTGTTTT +TTTTAAGAAGCTATATAGTATCTTTGCAAATGTGCCGAATGTGCCGATTT +TTTTAGGTTCGGCATGTACCGAGTGCCGAGAAAAAATTAATTCGGCACAT +CGGCATGTGCCGATGTGCCGAAATTTCAACAAGTGCCGCACATCTCTGGT +ACCCATCGTCTCGTCACGACAAATTTTTGTTAAATTCGAGTATGTGTGCG +CCTTTAAAGAGTACTGTAATTTGAAACTTTCGTTCCTATGGAATTTTCTT +TGACTTTTCACTTTTTCCTCTGGTTTTTATCCAATATTCATAGAAAAAAA +GAGAAAATTTTTCTAAAAATCGAAAGTTTGAAGTTACAGTACTCCTTAAA +GGCGCATAACGTTTCGGAATAAACAAAAATTTGTCGTGGCGAGACCTGAG +TATTATTACAGGAACGCAAAATTCGGAGAATGCGTTTTACACAACATATT +TGACGCGCAAAATATCTCGTAGCGAAAACTACAGTCATTTTTTAAATGAC +TACTGTAGCACTGGTGTCGATTTACGGGCTAGATTTTTATTAATTTCTGA +AAATTGAGCACCCGTAAATCGACACTACTGTAGTTATTTGAACAGGATTA +CTGTAGTTTTTCGCTACGAGATAATTTGCGCGTCAGATATTTTGTGCAAT +ACGCATTATAAGGGACAAGTTCTCCAGTGAATTCTTCCAATTACATTGAA +ATCCTTCTATTTTGAATACAAATTACTCAAAAGACAAAGTGTCGATTTAG +CCTAAACATTGCGCATGCGTAATTTCTGAATCTTGTTCGTTCTTTACATC +TGCACACTTGGGTAAGAAGTTGGGCAGCCTATGTTTTAGAGGGAAGTCAA +CAATGTTATTTTCGCACTTGTGGCAACAGAGTTGAAACTACGGTACTGTA +TATGGGTACACCTTTTAAAATTTTAAACAAAAATTTGTCTTGTTGAGACC +GGAGCACATAGATATTCTCAGTGTGTATTTGTCTAAAAACCTGCCAATAT +TTGAAAATCTTTCAAAACTTCAATATTTTGCTCCAAAAATTAAATTTTTC +AAATAATCTACAAAAAGAATCCAATTTGACTGAAAACCTGCTAATATTTA +AAAATCTTTCAAATGTATAATTTTTTTGCTCCAAAAACTGATTTTTTTCA +AATAATCACAATTTCAATTTTAATTTCGATAAAGATACACAAAAAAAAAG +AATAGCATTTAAGGATTTCCTTGTTAAGCTTCAACAGCGTAAGGGAAGGC +CTCGTTGGCAATCATCATGCGTGTAAGTTCGGCGGCGCTGAAAATAAATT +TTTGATGTTTAGCTTGCATGGTATGTATTACTGATCCCTCAACTCCGCCA +TGGTCCGACCCTGACGAAGTGGTATCTCTCCAGCATGGTGAACTCTGACG +TGATTCCAGTGCACAAGCCTTGTTGCAGTATACGACAGCCCACAGATACC +GCAGTAATACGGATAGGGAAGGGTATGCAGATGGTAGACGTGGCGAGCCC +TGAATGTTGAAACTGACTTTTTGAAATGACTGGAAAAATAATTACAAGTC +AACTTGCTGTCCGAAATTCATCATTACAAATGGAGCATTGGAATTGTTGA +GCATCAGGAGATTCCGAATTCCGATGGGTATCTTGAGGCTCTTCCATTTC +CTGAAAATTGTAATTTTAGACTTTTTAGATAATATTCACAAAATCTGTTT +AGGAAAAATTAACTACTAAAGTTACAGTAAGAATTTTGCTCTGAATTTGC +TCATAAAGGAAATTTTTTTAAGCTTGCACCCTGATTTGTTTAAATTCTTC +AATCTTTGGATTCCTCGCAAAAAATTCAATTCAGTCTCCTGGGCGTCGAA +TTGCGATGGAGCGCGCTTGCCGTGCGTTGGCGCAGCCACGGTGGTTCAGT +TGTAAGATGGGTGCAAACGCGCTCCACAGAGTTTTCGATCCCCAGGAGAC +TGTGTCCAAGTAAACTGAGTGGTAGCTTTTTTGTTCAAAAAATGTGCCCC +GCCCATTGGAAAAGCATGACCTTGTAATCGAAAATCCTTCAAATACGATT +TCTATTCAAATTTACGAAACTGAAAAAAAATTAATTTTACGAACACTTTG +ACGTCCATTCGGTGAGATTCTTTCAGGTGTGAATCGTTTTCTCTTGTTAC +GAGCATCATTTGGCTCCGGCACCGCCATCTGACCGGCCTAAAATAAGAAT +GTTTAAAAAAATATGCATTTAGGAAGTTTAATATTCAAAACAGAAGAGAC +ACATTCCATATGAAAACAACTTACATTCTGAACGTTTCCAACGTCCGCCG +ATTCCTTTACACCGTATCGATCGATTAAAAAATCGATCAGCTGACCGTTG +AGCAGTACATCCTGGAGCGAGATGGTGATCGGAGCCACCGGTGGTGCAGT +CTCATCGTCTGAAGATCCAGATCCGAAGGCATAGCTGTCCGAGTGGATGG +TGTTGGAGTGGTCGAATTTCTGAAAATTGACCAATTTTTGATTTTTTAGA +CTTGAAAAGGCCTAAAAAGGACAAAACGAGCCCCAAAAATTTGAACAAAA +GGGTTGAAATTTCATATTTTGTTAATTTTTCTGTGTCACAATTTTGATTT +TCAATCTATCGATATTTTGAATAAATTTCAATTTTTTCGGGAAAATATTG +CAAAATTTAGTCATTTTTCCTCAAAATAAACCAAAATTTGATTTTAAAAG +TTTGGAGAATGAATTGTTGAACTTGGAAACACCAAAATTAGCTCTAAAAT +TTCGAAAAAATGGGTAATTTCAACTTTCCGTATCTTTTCAGTTTGTCGGA +ATTTTTAAAAAATTTTATTACAGAAAACCACCAATAATTAAAATCTTGAC +TTTTTTTTTAACCCTAAAAGATTTTTTTCGAGAAAATATCTTAAAATTTA +GTCATTTTTCTTCTTTTTAGGAGCTATTTTTATGTTTTTTACTTTTTGTT +CCGTCAAAAACAATTTTAATTGTAAAGGTTTTTTTACGATTTTTCCAGAA +AAACCGGTAAATGACACTATTACGGGAACAAAAAATTTGGAAAATGCGTA +CTGCACAACATATCTGACGCACAAAATATCTTGTAGCGAAAACTACAGTA +ATCCTTTTCAAATGGCTACTGTAGTGTCGAATTACGGGGCTCGATGTTCA +GAAATTAATTTTTTAATCGAGCCCCGTAAATCGACACTACAGTAGCCATT +TGAAAAGTATTACTGTAGTTTTCGCTACGAGACCCTATTTTGCGCGTCGA +ATATGTTGTACAGTACGCATTTTCAGAATTTTGTGTTCCTGTAATAATAC +TAAGATCTCGCCACGACAAAGCGAAAAATTTTTATCGATTTTTCAGCAAT +TTTTCTCTTGTTTTCATAAAAATTGGGCAAAAACCGGAGGAAAAAAGTGA +CGAATTAATAAAAATTCCATGGCAACGAAAGTTTGAAGCTACAGTACTCT +TTAAAGAAGTGCACCTTTTTGGATTAACAAAATTTTGTCGTGACGAGACC +CTGGATACCATTTTTCCGGCGGAAAAGTCGCAAAATTAGCGATTTTGGGA +ATTTTTCGCGGCGAGACAACTGCACAACTCGACATGCGAGTTTATCCTTG +CAGATGGCCAAAAGCCCTCCGTCGATCATTTGACGAAGCAATTCCCTGGT +GTTGCGAGCTCGACGAGCTTCTGAACGATGAAATTGCCATTTCGACTGTG +GCAGAGCCCCACGAAGAGCGTCGTGGATTTGGTGAGCTTCTCGAACACGG +CTTTACGAATCTGATCTTCGGAATTTAGTGGATAGTTGGCCTCCAAGAAC +CTTACCCCCGATGGATAGATTGCGAAATCGATGAGCTGCCCGTTGAGCAG +CACATCTTCGAGCGAAATGAAAATCGGAGCCACTGGTGGTGGGGTCTCAT +CGTCGGATGATCCCGATCCACTGTTGAGGATAAAGCTGTCCGAGTCGGAG +GAGGTGGTGTTGCTTCCGTGGCGGTAGGGGAGAAGCTTGACCGGCGGCTT +TGGATTCTGGAAATTCGAATTTTAAACTTTTATTATATTTTTATTTAAAT +TAGAAATTTTTATGCAATATTTTACCTTGTTCTTGTGAGTTTTTTTCGAC +AAAAGCGAGAAATCCGGGTCGAAATCGAACGACGCGCCGAGCATGTTGTA +CGAATCCGTGCTTTTACGATTCGGAGTCATTTAGACAGAAAAATGAATGA +ATATAGGTTAGTAGACGCTGAAAAATTGGGAATTTTGGATTTTTTAACGG +AAAAACGAGTGAAAATGAGAGAAAAACATGTAAATTTCAACGAAAATCGC +GAAATTACCGCGCATCGAAATTCAAAATTGAATTTTTCGCGGTGGCCCGG +GTTACGGTGATTTTTAAAGGCGCATGGTTGTTTTGAGTGAGGTATTGACG +CGAAATTTAAAATTATTGAATATTTTTTTCTTATTTTTACATTTTTGAAG +AAAAAAATGATTAAAATTCGATTTTTTAGGGAAAATTAAACATTTTCACA +AAAATTGCAGGAAATCACTCGCCAAAATTCAAAAATGAAGAGTTTCGTGG +TGCGCCGGGTTACGGTGGATTTTAAAGGCGCATGGCTGTTTTGTATTAGG +TCTCACCACGAAACTTGGAGTTTTTGATAGTTTACTTCCTATTAGGGATT +TATTTTTTTAATTTTCGCTCGATTTTCACACATTTTCACTCATATTCACT +CAATTTTTTGCAGAAAAAATGTCAATGGACATTGTTATGAAGTCAGATGA +GCAGCCGTTCATCTCTGCGCCTGTATGCTTTAAGTAAGCTCCAATTTTTG +TTATTCTGGCCTAAAATTATTTTTTTGATTTCCAGAACGTTCGCATTTTT +CGCCAAAGCATCCGCAGAGGAATGCAAACTTCTAGCTTCGGGCTCCAACG +CTCGAGAAAGTTTCCAGCAGGCCACGGGGCTATCGGAGAAGATCACGGAA +GACATTTACCGCCAAATTGAGTAGGTTTTTCGATTTTTTTACCCTTAAAA +ACTACTATTTTTCTATTTTCCAGCGTCTCCCACATTTCATCGATGGTCTT +CAACAGAGAAATGGACGGTGGCATCGAGAGAAATCGGCTCGGCTCGCAAG +CGATTCTGCTCGACGAGAACTCCAGGCGCCGCAATCCACAGTTCAAATCG +ATCGCGCTCCGCTCTCTCCGCTACGATCACATCGATCATAGAGCGTTGGC +AAGGAAATCGCCTCATCTGATGCCTCCGACTTATATCAAAGAGGAAATTA +TGGATGATGAGCTCGACGAGGTGAAGGAAGAAGTGGTTTCCGTTGGAGAA +GCCGCCTTGCCCACGCCAAAAGTTGAACTGAACATGGACCATCCGGAAAA +GGACCTGATCATCAGCATTTCCGTGTATCTCGGCTACACCCGTACCCTTC +AATACCACGAGATTCGTCTCGGACGTTTGATGAAGGTCACGGATCGGCTC +GAGCTCACCGGAGATCACACGCTTCGCGATCTGAAAAACGCGTTCTCGTG +CCCCATCGATTTCTCGTTTTCCGACGATTTTAGCGAGAAAAAGCCGTCGT +TCAAGGATATGGCGAAAAATAAGTGGCCGTCGTCGATGTTCTTCATTCAT +GACACGTTTTATATCGACTCGAACACTGGCGACAAGTTTGTGGATCCGTC +GATGTGAGCAAGGGTGCCGGCTAAAAGTTTTTCAGTGTTTTTTGAGCGAA +AAGGAACAATTTTTTTCTGAAAATTATTGATTTTTCGTTAGTCTTCAGAA +AAAATGATTTTTTACGCCGCAAAATCGGGAAAAAACAAAAGAAAACTGAA +AATTAAGACATTTTCGTTGATAAAAACGAATTTTTTGGTTTTTTTTTTCA +GAAATATATTATTTTTTGCTTTATTTCAACAAAAAAAACCCAAATTTTCA +GCACAATCCGAAGTTGGGCCAAAAAGTTCGACTACATCGGTCCGATGCAC +GTGAAACAGATGTCGGAGACGAGGATCGGGGATCTCATTTGTCGGCTCGG +TCAGCCATACGTCTACATTCATCAAGGCGTCTGCGAGCACCTCATCGTCT +TCAATGACTTGTGCCTGAGGTACGGAAATTCTGGAAAAATCGAAAATTTT +GATTTAAAAAAACTCAAATATTTATGTGTTCTTTTTTTTACTAAAAATTT +TTTTCAAATTGTTTATGAAAAAAACACTAAAAAATAAGTTTTTATCAATT +ATTTAAACAAAATTTATATTTTCCGATTATCAAAATTGTTTTTTTTTTTC +AAATAAAATACCCCGAAAAATATATATTTTTTAATTTTTTTTTTCTATTT +TTTGCAGAGATGAATCCCACACAAACGTCGAATTTCCACGCCGCCTCGTC +GAACGGAATTTCAGAAGAATTGCATGTGACACGTGCAAGGAGGCGTCGGC +GCAGTGAGTAGTAGCATTTAGAAATTCGGAATTTTTTGTGAAACCCCCGA +AAAATATAGTTTTTATCGGAGTTCGAATTTTCGAAAAAAAAATCCAAAGA +GAATCGATTAAGGCAAAAATCATCAATTGAATCCAATTTTTTCAGCTGGA +TGATCGTCGACCACGACAATCTGCTTCCCAACTCGCCAGGCTATCTGTGC +TCTTCGTGCTACAAGGAGTTCTGTTTCGACGTGAACGGCAAAAAAGTGTG +CCAGTTTAAGGCCGTACCGTATTGCGATCGAAAGGACATTGGCGACGGAC +GCCAGTTCTTCACCGAGCTCGATCTTTAGAGAACGAGATTTAAAAGTCTG +TATCATTTTTTGTTGTTTTTTTTTAAATGTTTTTCGATTTGATTGTTTAA +TTACCGATTTTTGGCCTGTAACTATATTTGTTGAAATTTCAAATTTCTTT +TTTTTTAAATAAAATTTTTGTTTGCAAAAAAATTTATCCAATAAATAAAC +GGAATATATCGATGATATGGCAAGTAGGGGGAAACGGACGGGAAAATGAT +TATTTACAGAAAGGGCGCGCACCAGAGGTTTCTCGGGGGAGCCCCGAGGC +GGGTGGCGGAGCTTGGCTCAAAATCTGTCGGAAAAATGGGTTAAATGAGA +GAAGAGGTAACCTAGAAAATGAGATGAGAAATTCGGAAGAATTTTGAACA +ATTTGAAAAATCGATAATGTTGAATTTTAAAATTAAATTTTATTGATTTT +TTGAAAATCGATTATATTGTTGATTATTGATTAAAAATCACTAGAATCAA +TAAATCTGATTTGGAAAAATCAAAAAAATCGATTTTTTATAATCATAAAA +ATCAATTTTTAAAATCCAAAAAAATCGATATTTAAGAAATTGAAAAAAAA +TCGATAAATCAATACATAGGTGAGCTCGTCTCGTCGATTTGTGTTTTTGT +GATTGGTCCCGGCTGCTTTTGAGATGGTCTAGGCGTTGTTCCGTATGACA +TTCTGGAAATTTTTTTGTGAGGCAATGACACGCCCACAAAATGGACGGTG +GCAGAAATTTAATAATTTTAAATAATTTTCTGGATAAAAAAATCGATTTT +TCCACTTTTTCGAGTGAATCGAAAATTTAAAAAAAACATTCAAAATTGTT +CAGGGTCTCGCCACGAAAAACTCACAAATGCACGTTATGCGGAGCGTCGT +CGGATTCCGGTGTGTTGCTACGGATCGTGTCGTCCATGCTCAGCTTCATC +GTCGATTGACGTCGCCGTTTTGTGGTGATCTGAGCGAGATTTATTGATTT +TTAAAGACGCATGGCCTAGAAAACACAACACTTCGGCCACGGACTTTGGC +GGCCGAGTTTTCTAGGCCATGCTGCGGCATTTAAAGGCACATACCATGAT +AATGAAGATCGCCAATCCGAAAATGATGACCAATGCGAAAGTGATGATTA +CAATCATCAGCGATGATAAGGAATATGATGAAGTTTGCGTGGAAATAGAA +TCGATGTACGCTTGTGACGTCATTCCTGGAAATTATTGATTTTTTTGGAA +AAAATCAATAATTACTTACCGTCAACAGTAACACTCAACGAATCAATCTT +GTAATCCGAAATCATAGAGTTATTGCGTGAAAATGCGGCTTCGAACCTGA +AATGTTAAAAAAATCAATTATCGATTTTTTTGAAAGTTTTTGAAAGTTTT +TGAAAAATTGATACAATTAACTTGATATTCCCCCGGAAAACCTATAAATC +AATGAAAAAGTTAATATCTAAATTTTTTCCAAAAATTGAACTAAAGTCGA +TTATCGATTATCGATTTTACTAATCATTTTTCAAAAATCTATCATATAGA +CGATTCCTACGGAAACCCCGAACAAATCAAAAATCGATAAAATTGACTAT +CGATATTTTTTTCTAAATTTTACTTTTTTCGATAAAAGTCGATAGCGTTG +ATAATCGATTTTTTAGAGTTTTCCTCAAAAAATGGTAATAAAATATTTCT +AATCGGTAAGAACTCACAAATTAACAGCCATTTGAGCATCACCAAGTGAT +TTTTTGGTGAGCACGTCGGCTCCGACAACCACTGATCCTTTGGTCAGTGA +CTTGATTTCAACATTCACAAAATTGTCGTCTCCCGAAATGGAATTGATGA +ATTCTCCGATCTGGAAAAAAATCAATAATTGTTTAAAATTGATTTTTTTC +AGATCATTTTTAAAATAACTAGTCTAAATCTATATTGAAGTCTAACTCAA +AATTTTCCACAAAGGCTTACTATTAGGCTTAGGCTTAGGCTTAGGCTTAA +GCTCAGGCTTAGGCTTAGGCCACGCGAGATGAAAACAAAAGAGAAAATTT +AGTGAAAACAAGAAAAATTTCTTTTAAAAAAAAGAAAAAAAAAAAAAAAA +AAGAGCAACAGCCATGCAATTCTTAAATATAACAGGTTCAACTACTATCT +TCAAAAATCAGTTTAATATCGATTTTTCCCAAATGTCAATAATCCATTAT +CCATTTTTGACTCACCGCTGGCACCAAATCTTTCGCTAATCTCTTCGCAG +GGCCACTCTGAATATCGTTGAATTCCTCTACAAATTCCAGTGAAATAATA +CGAAAAACTATTGGTGTTCGAATGGATTTAGCCGGAAGGAATACTTTCGA +TGAGTTTTGAAGTGCCACCGGCACAGTTGAAATATCTTCAAAAGTCGTGG +ATTCCTCTGATGAGGTGGAGGTGGTCTCTTCGGAGGAGGTAGTGACGTCA +GTGGAGAGGGTTGATGGCTCGGTTTCGGCCATGGTTGTGTCCATTGTGGT +TTCCATAGTTTTTGTAGTTTCCATTGTTGAAGAGGATTTCATTCTGAAAA +TTTTGAAGTTTTGAATTTTTTCCTGCAATATATGAACACTTCCGAACCAG +CAAAACTCACTAGGTAGTGTTTTAACTATACGGTGCGATCGAGTAAAAGT +GTCATGCGATAGCTGGCATCTTAGGCTTTCAGAATCTGTAATTTGTTCCG +GCGGAAGACCTCTGTGAGTCTGGAAATTTTCATCTGAAAATTTAGTACTG +AAATCTGCATTTCCTATGGTTAACAGTGGATTTTTGTCTCTGGCGCCAAC +AGAAGTCTCACCACAATGGTGGAAGGGCGAAAACATCGCTTCGAAGCTGT +TAACCATAGGAAATGCACTGATTTCAGTACTAAATTTTCAGATGAAAATT +TCCAGACTCACAGAGGTCTTCCGCCGGAACAAATTACAGATTCTGAAAGC +CTAAGATGCCAGCTATCGCATGACACGTTTACTTTTACTCGATCGCACCG +TATAGTTAAAACACTCCCTAGTCAGATATGTGCTCTACACTTTTAAATTT +TGCAATCAAAATACTTGAATATAAAATTAAGAGATTTTTGAATTTACTAG +GCCACCTCGAAAATTTCTAGGCCACGGCAGGTTGGTAGGGTGAAGCTGCA +GACCCCATAAAGCTGTGAGAACGGTTTTTTTGCAAATTTTTTAAAACAAA +CATTTGTGAAGAATCTGAATATTATCAGAACAAAAAATAATTTCGACAAA +AAATAATAAATTTTTTTATCAATATTCAAAATTTTTAACTTCCGAAAAAA +CTCAAAAAAAAACTCACCCAGTCGTAGTGCTCTCGACCATCTGCTTCGTA +GTTTTCGGGATATTCGACAAAATCTTTGGCTCTTCCTCAGACATTTTCAG +AATTGGGAATGCGGGGCTCTGATCTGTGCTCAATGAGCTATCAATAACAC +TTCCATCCTGCTCAAAAAGTCCTTCCAGTGCACTTTGATCAAATGGTGTT +TCTCCGTCATCTCCAGCTTCTGATGGGTCACTTACTGCGACGGTGGATTG +GGTTGTGGTCACTGTAAAATTGGAAATTTGGAAGAGAAAAACCTCGGCCA +TTGGTGACGTCATACTTTTCGCCACTTCTTCGACCAGCTATAATGCCACG +CTTTTAGCAAGTTTTATTTTTGGTAAACCAAAGAGCAACCAATTTTAAAA +CTGTGTCTCCAGGTTTACCGCCAACTTAAGGGCGTGTCCGTACAGGGTTT +GCCCGTTTTATGGGCGTCGCTTAATAAATCATTTTTCAAGTTTTCAGTTT +CGTGGAAAATTAGCATCCGGGAAATTGAGTCATAAGTATGACGTCAGCAG +CCGCGCGGCTTCCCATGACGTCACTCCATGCGCTCCCCCGACGTCCCTCT +AAGACGTCACCACCTCTCACCTGGTTTAGTGGGTCGTGTCATCTCCTTCT +CTTTCAAAACTCGTCTATCAGCGACCTCGTCAACCTCAATCGGAAGGGAG +CTGAACGTAATCACCTCCAAGGGAGTGGTTGGCATCATTTCAGTCGTATT +TTCAGCGGCGAGAGTTGTTTTCTCAGTGGATTCATCCTGATCCTCCATAC +TATCATCTTCGATTGTTGCCTGGGAGAAGTTGAACATACCGCCGTCAAGC +GGAGTTATTGGGGAGAAGACGGACGTTTTCTGGATTGGGTTCACTTCTAG +GGAGGTGTCGTTCGGCGAGAGTTCGATCATTTCTGTACTTGTCTCGAAGG +TATCTGATCTGAAAGTTATGACATTTTTTTGGTGGTGGCCTGGGAGAAAA +CTAGGCCTCCTGGCCTAGAAATCTCAAACCTCGGCCTCCAGCTCTCACTA +GCTGACCTAGGTTTACTAAACATCGGCAAGACCTCTTTTCGACTACTCCC +TTACAAATTTTTAACAGTTTTTCATCACACAATTTTCCGATTCTTACCCC +TCATTCATCATTTCTTCTTTCGGAACAGCCATCATCGGCATATCATCATT +ATCAGGCATTACGGGTGTCGTTGTGTCTTTTGCCAGCGGTTTGTCGAACA +CATCTTCAGAATTTTCGTCTTGCGCGAAGGAAACTGAAAAAGTTAAATTA +CATTTTTTCTAGGCCACCAGGTTTTGCATTTTGGCCTAGAAAATGAAATG +AGAAGCTAGACCATGTGGGTTTATTGCGAGGAAAAGCTATCCCACGAAAA +CTTTTATAAGCGTCTAGAAAAGTAGAAACCTCGGCCACCAAGTTTTGCAC +TGTGACCTAAACAATTTTGGTAAAATAGGCCAGCAAAATTTCCATGATGG +CCTCAGGAAAACTAGGCCATTAAGGTTTCTTCCGTTACCTAGAAATTTTG +TAAACCTAGGTCACCAGGGTTTCTAGGCCACACAAGAAAAGCTAAAAACC +AACATTTCAGATCGGTCTGGAAAAGTGCAAAATTAAGCCACCAGGATTTG +CATGGTGGTAAAAAATTGGTAAACGTGAACTAGAAGTTTATCGGTCATCA +ACTTACGTGTCGTTGAAAATGCAATGAATAGAATAAAACCAAATGGTATG +CGCATCGCTGAAAAATTGCAAAAATTAATTTATGTTTTGGAAAATCAGAA +ATCGGAGTGTTCATTTTTTATGGGACAAGATAAAAAGAGATGGAGAAAAT +TTATATGATATATCAGGTCAAAATGACCTTCGGAAATTGGGAATGAAAAA +TCAAAATTAATTCGCGTGGAACTGGTTAAGATGATCGAAGATTGAAAAAT +TTGACAGAAATTGCAACAATACTAATGCAAAAAGTTGGTAAAATATTTTT +TTCAGTTTTCAGAAAAATCGATAGCAAAAGTCTTTTTTTTTAAATAAAAA +TTTCTGAAATATTTTCGCTGCGAGACCTAAGGTGACAAATTGGATTAAAA +ATTTTATGTGCCCATTAAGAGTTTTTCTGGAAACTGAAGAGAAAGTCGAA +AAATTAATATACCCTACTTAAATTTAAAGGCGCACGGATGAATTTCCAGA +AAACTAAAAAAAAACATCACAAAATTTGATACTCCAAGTTTAAAGATGCA +TGGGTAATCTTTGAATGAGAATTTTGTTTTAAATTTTTGAACTGCCATTT +CAGATATTATGAGAACACAAAATTCTAATAATGCGTATTGCGCAACATAT +TTTGCGCTCAAAATATCTCGTAGCGAAAACTACAGTAATTCTTTAAATAA +CTACTGTAACGCTTGTGTCGATTTACGCGCATCAAAATGAATTAAGATCA +TACATTTACCGACAAAATATTAAAATTAGGCAAAAAATAATACGAATAAT +GCATTTCGTAAATCGAGCACGTAAATCGACACAAGCGCTACAGTAGTCAT +CAAGAAAAATTACTGTACTTCTCGTTACGAGATATTTTGCGCATAAAATA +TGCTGCGCATTACGCATTCTCAGAATTTTGTGCGCCCGTAATAACCATTT +GTTAAATGCTTAAAAAATTCAGATTTTATTGAAAAAAATTTTTGAGCAAA +ATTTTTCGAAAAATAAATTTCGGCAATAAAATATTATTTTTCCATCGAAA +AAGTATCGTATGGTGTGCTTGACATAATCTTGGTAAAAAGTGGCATATTT +TGAAAACAAAAGGGGTTACCCCCTTCCTTCAGCCCGCCCGCCCGGGAAGC +CTGGGCATGCGCGCAAGTGATTGTTTTGTCTGTTGTTGCTTATGTATATG +CTTATTATTTACACATTTGTGTGTGTGTGAGTGACTACACTGCACACCTC +TTGAAACGGATTATCCGGCCGTGGTCGAGGAGGGCCGCCGAATTATGCAC +ATTCCTATGCTATGTATTGTGAATACTTCATTTCAATAACTTTATATATA +TACAGTTAGCACATACGATTTTTGAAAATTGGGGTTGTGGAAGAGTTTTC +ACACAAGTGGCCTAAAGTTTTTGAGTTTACTTTAATACTTCTAAATCACG +TGGTGTCAGGCGGTCTCAATACAATTTGATCTACAAAAAATGCCGGCATT +TTTTCCCAGAAAAATTGTGACGTCCGCACGCTCTCAACCATAAAAAATCA +GATGAGATCTGCGTCTCAACTCCCGCATTTTTTGAAGATCAAAGCGAAAT +GGGACTTTCTGACTCCACGTGCTAAATGAAATGTACGTTAAAAAATTATC +TCTGGAAATTTGGAATTGCTCGGTGGAGCGCGTTTGCCGCTTAGTAGCTG +AACCGTGATAGCTGCGCCGTGAAGAGAAGGGAAAAGTAAGGAAGTTATCA +AAACCAACGACGCACTAATGAAAAGGAACACAAGAAAAAGTGAAAGTGTT +TTTTTTTGGGTTTTTGTGATTTAGTTCGTGTGTGATAAAAGTTCAAATGG +TTCGGCAGGACAATAGGAAGGAAACTGGAATTTTATATTTCATCTCTTGT +TAAATTTACTTTTGTAGGTGTATGATTAGGTAGTGTTAATAGAAATCGGG +ACTCGTGGCCTATCATTTTTTTGTGGCCGAGTAAAAAAATCTTCCACATT +CGTCAACCATACGTGTGAATTTAAAGGTAATCTATAGGATATGATATTGG +TAAACTTTCACAGAAATCCTAAAGCTAGGTTTTGAACTATGGAGCTTTGA +AGTTTCTAGGCCCCTTTGGGAAACATAGGCCACCAACCTGAGAAAGGTAA +GGGGAGGGCAGGGTACATAAAATACAACTTTTTCTCTGAAGTTTTTTTTA +ATTAAGCGAAAAAGCCGACATAAATGTTTCAGTCATGGCCTAACTTGACA +TTGGTGGCCGATTTTTTCGAAAAACTCTTCCACGATTTAGAACATGAACT +TGGCAAAACTTCAAACAAATCCTAGAGATAGTTTTTGAATTATGAAGCTT +TGAAACTTCTAGACACGTTGAAAAAGTTAGACCTCTTCCACTGTCATAAA +CTTATAAATTCACAATTCAAAAATTATAGATTTCTTTCATATGTAGAATT +AGCCCAATTTTTAAATCCATTGCACGGCTAGATTTGAAATCATAGCAATT +TGAAAATTCTAGGCCACGCAAAAGAAAATTCTAGTTCATGGGAGTTGAAA +GAGAGTCTCGGAAGCTAATTTTAAAAGCACAAGATAGAGGAAAACCAAAA +ATTTATAAATTCATAAAAATCCAGTTTTTTAGCAGCTATGTGAAAAGGAT +TAGAATTTCAACACGGCCACCACCGCCGGAGATACAATCATTTTGGGAAT +ATATGTCGTTTTCGCGTATTTCTCACCAAAAAAAGATTCAAATCTTTTAA +TACGAAAAAAAGTTGGAAAAGAATTAGAAAAATGCTGAGAAATTTTTTTT +ATGAAAGCTTAGGCTTACAATTAGGCTTAGGCTTAGGCTTAGTTATTTTT +TCAGTGTTTTCCAAGTGCAGAAAGGAAGAAACCGACGCTTCCTGACTTTC +TATCACACACTGAGGGGAATTTCACCCAACCCCCCCCCCCCCCTCACTTC +CCGCTACTACTTTTACTGTACACTTTCACCGAAAAGTTATTGTGTCATCA +CGTCATTCGCCAAAAACGAATCGAACAAGGAGAAAAAATTGATGAGAAAA +CGCAAGTGTTTCTTCCTACACTACATAGTGTTCAACACCCCTTCATGATA +TGGCCGCGTGAAAACTCTTCCACGACCATAAAAGAAATTTACGGAACAAT +CTAGCAAATTTTCATAAAAATCGGTTGGAAACTATAAAAGTTACAGGGGT +TTGAATTTTCTAGGTCATGCGGAAAAAAACTCGGCCACGCAGGTTTTTAG +GGTTAGGGAGGCGGTGTCTTCAAAAAAAAAGAAACCTCGCACCTAATGAT +CCATGAATCTTCAAACAGAAAATGAGGTATAATCTTAGGTGGCCGAACTT +TTGTTTCTGAAGAAAACTCGGCCAAGACCACGAAAAACGTATTTTTTTGT +ATCGTTAATTGAAAAACTGAAAACAAGAAGTTTCTAGGCCATGTTGATAA +ACCTTCAGTAAAAATTTTTCCTAAGCCATTAAATTACTACTGAACTGAAA +ACCTAGGCCACGGCTTCAAAACATAGGCCACCAAAAACTGAAAGGGGGTC +GTAGGACCGGAACCTGCTATTATGAAGCAAAGTTTTGTGCAATAAAAGTT +TCAGAATTTCCAAGTTTTTAAACTATTTTTTGTCTGGAAATTATAAACCT +GAAATTTCGGGTGCGAGACAAGATGGTCAAAGAAAAATAACGAGAATTCG +GAGGGCAAAAAAATGAGAAGAATTTCCAGGAAAACAAGTGAAAGTGAGTA +TGACTTTGACCTATACGCGTATTGTCTTTTTTGTTTTCTTCTTCTCGAGA +AAAGAGAAACTGTCACAGCAGTAGTAGTTCTTCGATCACCTAATTACCTT +ATTGCCCCCTTCCAGGAAGTCACAGTTTTTTTTAGGAAATGTGTGAAAAA +TACACATTCCGCTAGGAAAAATTGAAATTGCGGTTATTGAGTTCCCTTGC +CACGACAACGTGGCCTAGGATACCGCCATGCTCTAGAAATCCCGCTTGTA +GCCTAGAATTTCCCATTGTGGCCTGGAGAAGTTTAGCGGGGCCTAAGACT +TTTAATTGTGATCTGAGAGTTTTTTCACATGGCCTAAAATGCTATACGTG +GTCTAAAATTCCTCATTGTTGAAATTTTTAGCGCGGCTTAGGATTTTCTA +TTATGGCACTGGATTCTCGCCGTGACCTAGAATTTCGTAATGTGGCCTGA +AAAGTTTAAGCGTGAAAAGTTTAAGTATTTTCAATCATAGCCTGGAGATT +TTTTACAAGAATTCCTCATTGTGGCCTTGAATTTTCTATTATGGCCTAGA +ATTTCGCATCGTGGCCTAGAAGAAATTTGCAGAGTGGTTTTGAAATTCGG +TTTGTGACCTAGGGTGCTATATTGTGGCCTAAATTCTTCCATCATAACGT +ATGATTTCACATTTTGACCTGAAAATTCTTTGCGTGGTCTAAGATTTCTC +ATTATATTCTCGAATTTAACTTCGACACGTGAGAACAATTTTGATGGCCT +AGAAATTGCTCCCGTGACCTAGGAAGTTTCAAATTTTTAAAGTTCAAATA +AATTTCACCAGAAATTCATCAAAATTTACAATTACGGAAAAAGAATCATA +TTAACCACTGCAACTTTTTTCGAACGAAGGACGAGGCAAAAAGGTTTCTA +GGCCATGGCCGAGTCCCCGACAAATTTCAGCGGCCATTTATTTTGCTTTG +TTTTTTGCGTGTTTTCTTTCGTTTTTCACCGATTTTTTCCGTTTTTTCTT +ATCAAAACTAATAAATAAATATTTTTTGCAGATGCTAAAACAATTTCCAA +GTGAAAAAATTGTGAATTCAGTCGGCAAGTAGCGGTAAAAGTGGTCAATG +TAATATGATGGATTACGGGAATAAAAAACCCAAACTTTTTCCCAAACATG +ATACATATGCTGTTTAGAAACACGGGAAAAATCGGTGAAAAACACAAGAA +AACAAAGCAAGTACAAAATGTCGTAAAATGCGGTGTATTGTGTGCAAACA +CCGATTTTTCGCATTTTTTCTCGGCCCTCGTTCGAAAAAAGTTGCAGTGT +TAACGGCGGTGGAAGAGCTTTACCTTTACAAAAGGTGGCCACAAAAAAAA +TAGAAGAAAGGATCCAAAAAGAGCAGGGAAGAGTTTTCTCAATTTGACGG +CCACGTGTTCTATAGAAAAAGATAAATACAATAAATACGTTTTTTGAACA +ATTAAAAAAACAAAAAATTTCAAGAAAAATTTAGAATATTTTTTCAGTTT +TTTTCCTGAAATTCAAATAGAATAAATGGAAGTTCAACAAAAAAAGGTGA +CCTAAATACTTATTAATTCACCTTCTTGACCTCTCGATTCTTCAGAAAAA +AGAATAGGCTTCTCCTCCTCCCACCTCCCAGACACTTATCTACACTTTAC +ATCATTCCTCCCGGCCCCCGGTTTTTTCACATTTTTCAACCTTGAAAATG +ACGAAATAAACAAGTGTTTTAGATAAATGTTTCTGTGTATGTTATCTGAA +ATCTTGGAAGGGTGTTGCAACAGAGCCCCGCGTGTAGCGTGGCCTAGAAA +TCCAAAAAGTCTTCTTTTAGATCTTCAGCTTTCTCAGCAAGTTAGGTCTT +AAGTCAAAAGGACTCTCCATGACCAAGTTATAACGATTTGTAGTTTCTAG +GCCATGGAGTACTCTCCGAAGGGGAGACATTGACCCCCTGACGCCGGATT +TGTAGGAATTTTCACGTGTTTATGCTACTGTAAGCGAGCTATATACGTTC +TTCAAAAATATCCGTCAAATTGTGACCACCGTTTCAGCACTTTAAAATTT +CTAGGCCACGCTGTCGCCATAATGGAATATTTTTCTCACGAGGAATAGAA +ATTAAAAAAAAAACTCTTCAACTGGTCGATATTTGGAAAACCAAACTAAA +ATGTTTCAGAATTATTAAAAGCTTAATTAAAACAAACAGCCACAAAAGTT +ACCTAAACTGATGAAATAAAGACATAGGCATACAGATTTATAAATCCTAT +AAGCATGGGTGTGTCACAGGTAAAGATTATGGAAAGTGAGGGAAAGTAGG +AAGATATTACTGGAAAGTTCTGGGGGAGAACGGCTGAAATCAAAATGGAT +TCGGTTGAACGCGTCGAATCACGGAGACTCGTTTGTAGGGCTTCCAGGTA +GATAAAAAATGTTCTGCCTGAATTGGGCCAAAACACATGAATCTTCAAAA +ACAAATTGACACAAGAAAATACATACTACTCCAAATTTTCAAAAATCAAA +AAATAGGCGATAGGTTTTTAGATACTAAAGTATATTGAGCAATTAAACAA +TTCACCCCCTACAACTGGCGGGCCCGCAGGGCCTGCTAGTCAGGCAAAAA +ATATATGGCCCACACTCCCAGCGACAAGAAAAATTAGGGGATATTAGAGA +CATTTACTCAAAAGGAGCATTACACAAATTTTAAAAATCTCAAAGCGGGA +AATTTTTTTAATGCGTACTGAATAAGAAATCGACGTTTAATGAAACTCAA +TTGCTACAACCCCGTAAACCTCCTTCACGTCCAATCAATAGTGACATGAA +TCCTTCCGACCAACGTCATAAAGATTGGAGATGACGAAATTTAAGATGTA +CCAAGTATGCTGGTACATACAATTTCTACGTCACAACATCTTGACAGTTT +TTTCCCTCAAAGAGAGGTGAGCCTGTAGAAGAGATACAAACGCTGCGTCC +TATCGTGGGTTCAGTTGTGGGGAAAAATTGTAAAAACTGTATGTACCAAC +TATGCTGATGTGCCAACTACCAACCAGCATAGTTGGTGCATCTTCAATTT +TGCCATCTCCAATCTTTATGACGTTGGTCGGAAGGACTTTTGTCACTATT +GATTGGAGGCTTACGGGGTTGAAACAATCGTTCATTAAAAGTCGTATTCT +TATTCAGTACGCATTTCAAAATGTTTCCCACTTTTTTTCGGCTTGATCTA +CGTAGATCTTTAAAAAATGCGGGAGAAGATATGCAGTGTTCTCAACTAAT +TTCGCATAGTTAAGAACGTGCTGACGTCACTTTTTTTTTGAGCAAAACAT +TCCGGCATTTTTTGTAGATCAATCCGTAATTGGACAGCCTGGCACCACGT +GTAAGAACACGATGCCTCTATTTTTGCCCGGGAGTGTGGGCCTTGAAAAG +TTGTGAGCTGTAGAAAAATGGACAACAAGCATGTTGTCAGTTAGCAGCAT +TTTATAAGTCAAGGACAAGTACAACCGAGATAAGTGAAAAGTTTGAAAAA +TGGAAAACTTTCAAAAAACAGCGAAGGTTCTTTGATCGATGCAAGGGCAG +AAAGTGAATAATGCATGTTTTTGAACTTAAAACAAATTGGCTAGACTTTG +ACTGAAACTACCTTTTATAAAAATGAAGAAAATGAACCATTACGAAAATG +TTAAATACACCTGAAAATTGGTGTGTTTTTATTGTCGTTCTGTAATTAAA +TTTTGAATGATCGCTCTTCTGCAAAGAACAACCACAAAAAATAATTCGAT +ATACTCAAGTATCGACACTTATGATTTGAATGCAAACATATAACAAATTA +AAATTAAAATACGGTCATTATATTTTAGGAAAATTATGGAAATTTTTTAG +GCTTATGATGTTTCAAAAACTTTCAGAACTCTAGTAAAAAACATTTTATA +TGTAATAAAACACTCAGCGACTTTAAACTTTGCAAGACAATTTGGGACAT +AATTTTGGGACATTGTAAGAACAGTTAATTGGAAAATAAAATAGTTATTC +GAATGATCGTTCTTCAAACAACCTTACAAACAACAACACACCTACTCTGC +TATAATAAGAAAATTTAAAATACTACTCAAGTTGTCACTTCAAGACCGCA +TAAGTTTATACCACTCGAACAAAAAACAGTCTTCGTTCTTTCTTTCTTGA +AAAGAGACATAATTGAAAATGAACTAATCAGCCGCTGCCCGGGATGGAAA +AAAGAAAAGAAATTCCTATCATCTTTTGACCTTGTTATTTTTGCAAATCA +TGTCTGAATAATACACTTTTTGATACATATAAATCAGGCGTGGGCGACAA +ACAATTTTTCCGGCAAATCGGCAAATTGCTGGAATTGAAATTTCCGGCAA +ATCGGCAAATCAAAAATGTTTGAAAATAGGAGAGTAAAAAGGTGGAAGTC +AAGTGATGGTAAGTACCTATTTTGTTTCAATCAAATTAAATTTTACACCA +TCTCTCTTTTTTTTCCTTATTTAACAGTCTTTGGTCTGTGTTCTGATGTT +GAAAGATAAGGGAACATGGACGAGTTGTTTTTGGGAAAAAACATTCTTTT +TGGATTATTTTAGTAAAAGAAAAATTTAAATTTATAAGCATATTCACTAA +ATATTTTTGTGGGAATTTGAAAAAAAAAGAAATTAAAGAAGAAACTGATA +AAAACTAGAAAATGTATAACTGGGAGAAAAAAGTTTCCAATTTTCAGAAC +TTTTGAAAAATTGTAGAAAATAGTTCATGCTTTCAAAGTTGTAGGAAAAA +ATAATATGTGGCCAATTTCCGAAAGAAAAAACTCTGCCACGACCAAAACT +CAGAACTAGACTGAAATTTCAGTTGTAGCTGACAAAACCATCCAAAAGAT +ATTGTAGGTTACAATATCTAGGCCGTGAAAATTAGAAAACTCGAATCAGA +TATCCTTGGCCGGAATGCGAAAAAACAAAAATTAGAGTTTTCTAGGCCAC +AAAAACTCGTCCACGTGTGCATGAACTGTCAAGTGGTCAACGTAAAAGGT +GTGAAACCCTTCAGAGAAAAACACTAGAAATGAGATAAAGTAGGTGAGAC +CTCCTGTCTCTGAGAAGCAACGTTCGAAATTCTTTTCCAATTCCATCTAT +TATTATTATTATTATTATTATTAGGACGGTATGGCGCCAGGAACTGGTTG +TTTATACTGACCACTGCCTCTTCCCGATTAGGATTCTGAATTTCGGATTT +CGGGAAATTGAGAATGGCAGATGGATGTAATTTCATTAATCATGTTCCTC +AATAAGTTAAGAAACAAAAGAAGGAAGATATAGTGAATCAGTGAGGAACA +AGAACATAGCATATATGGAATTGGCGGTGTAAGACTGGTTTACATGGTTA +CACGGAGGGAAAAAGATTTGAAAGTATTCGTCATCTTATCTAGGTAGTTG +AGACACTTGATTAGTCATAGTTGAAAGAACAAATGGAAGAAGTTCATATG +CTGAAAGCATCGCTAATAACATAATATTGGTTTTAAAGGTGGAGTACGGT +CTGTGGGTTTTTAACCTAAATGACAGAATACAGTCCCAATATACCGAATA +TAACTGTTAAAGATTTATAAAATTTATCTAAATTTTTTATGATTTTTAAA +AAAGTTTTTTTTTTTTTAATTTAAAAATTTATTCAAAATTCCAAAAAAAT +ATTTTATTCAAAACTAAAAACCCAAATTTTCTGAAACTTTACCATTTTAC +TTATTTATGTCTTAACAATAATATACACATATTAACGAAATACCCACCGT +TCAAAATTTGACTTAATCTCCAGTTTTAACTGCCAATGAGAATTTGTTCA +AAATTCCAATCTCACACCCAATGTTTTTCTTTGTTTTTCATTTTAAAACA +GTTCTATCCAGTTTAAAAATTTACCAAATAAAAATGTGTCTATTCCTCTC +ACCTCCAATCCCGTGCATGTGTTCATGTTTACCTATTGAAACGTGTGTAT +TACTATATAATACAGAACAAATATAAAATGGCCACTCTTGCACCTCCAGC +GGAAAGAAAGCTTTTGAGGTGTTACGAACACTGCAACACTTTCCTATTTT +ACTAGTTTTGAGCAATTTTCTGGTCCTTTTATTTATTTTTGTTTTGTCTG +GAACAGGAAGTTGTTTGTGTTCTGTTTGGAAAATTAATCAGTTCTACCGT +GAGATTTTTAAGTTATTTTTTGAATGGAATTTCAATTATTATTTCATTTC +ATTTGTTTTTTTTTCTCACCGAAATCCACTTTTTTTAGAAACAATTTTTT +TTTCTACATTTTTTTCCGATTTTTGTCATTCAATTTTTCAACTTCTCCAT +TTTTTCTACAAAACACCGAAAACTACAAAATACCTAAAAATTGCAAATTA +AAAAAATTTTTAAACATTTTTGCCCAAAAAATTGTTTTTTTTTTCAGAAC +ATCTAAAACTCACTAAAAAATATGCTCAAAAATTTTTTTGAGAACACTTA +ATTTTTCGAAAATCTTATTTTTGCAATATTAAATCCAGAGAAATTAGAAA +AAAAAAACGGTTTTCTACAGTACTCCTACCCTCCCTCTCCAGTACTCTTA +AAATATTCCTACAGTGTTCTTACAGTACTCTTACACCACTCCTACACTGT +TCTTACAGTGATCCGATACTACTTTTATAAGACTCCCACAGACTCCTACT +ATACCTATACCTTATCACTACAGTATCCCTGCCGTACCCCCCCCCCCCCC +CCAGTACCTATACTTGCCACTACAGTATCCTACAGTATTTCTACAGTGCT +CCTACACTACCTCTACAGTACTCCTACAGTACCATTACCAAAAATTTGAA +TAGAAAAGTGAAATTATGAACATTTTTTCCTGTATACCAATTTTTAAAGT +TTTAGTTTTGTAGTTGTTTTTGACAACGAAATCAATTTCAAAGTATTTTT +CTTTAAACAATCGTACTTTTTGCGTACTGTTCTTGCACTTTTTGGGATCT +TTTGGACTAAAAGTGCCCCCCGAAAAAAGCGCGGAATTAAAAGAACAAAA +ACCGGATAGACTGAGGAGGTGTCGAACGATGGAATGAAAAAAGAAACAGA +AATTAGATTTGGTTCAGTGAATTCTGTTTCTTTTATTTGGATTTTGGTTT +TTTGGAGTCTTGGAAAAATGTTTTATTTTTGTTCTTTCCAAGAGTCCAAA +CGTTTTTTTCCACCAAAACTCCAAAATTTTTAAAATTTGGAAATTTGATT +CACTGCCAAAACTTCCAGAGAAATTAAATGTTCAGTTTCCCGTAAATAAC +AGGGTTTCGTATCAAAAACTGTGAATTTTCCCGCCACCTATGTTTTTTAA +AAATGATTATACTCTAATTTCCCGTCAACATTTTTTTATTTTGCGAAAAA +TGATTATTTGAACATCCCAGAAAATTTTAAAAACGTCATGAGTTTTTTGA +AAATTCAATTTTTCGTGTTCCTGTAATAAAAACCATAGAAACTGTTGACC +TATAACCTGCCAGACGTTCTTCCTGTCTGTTTATACCCCCATCTGCTCTG +CACACCAGACACCCCTCCTCCTAACTACACTTTAAACTGTTACCCCCCCC +CCCCCCGTTTACCCCTCATTTGTTTTTTTTTCTAGTGCATCCCTTCCGTG +TAACCTGACACTTTTTCCAAGAAGCTCGCATTTCTTAGGAGGAGCAGCAA +TTTTTCGTTACTTAATTTTCCACACAGTAGGAAAGACAAAAAGAAAACGT +CATAGAAATTAAAAATTCTGCGTTTAATTTTTTGTTCAGTTTCTGAAACT +CAATTTTGTGGAACCGTTTTTTTCGATCTTTGTAAAATTGAATTTTTTTG +ATTTTTGGAAAATTTGGATTTTTGCTGGAAAGTCAATTTTTGTGAGAAAA +GGTTTGGCGGGAAATTTAAATTTATTGAGAAAAATTTTAGCGGGAAATTA +AATTTTTTGATAGTGAGAAAAATTCAATTGAATACCTTTTTTGTGCGAAT +TTTTCATAATATCTGAAACTCAAAAAATCTGAAAATTAACTCGAAAAAAT +TTTCAAATTTGATATTTTGGCAATTCAAAAATTGCTTAAAGAAAATAACG +GAACCTCGTTTTTTTTCTTTAAAAAAAGGTTTTAGTAACTTTTCCGGTAA +CCAAGGTTACTCTTTCCCCATCAATCTCCCAAAAGCATGCCAGAAAAAAA +AGAGAGAGAGAGAGAGATGCGTGTATGATTGTGTCATTTGACAGTGCGCA +ACAGACACACATAAAACAGCTGACATCTTCTCTTTTTATTTCAACTGATT +TTTAATTTCCCTCCTTGTGTTCTCTGAGTCTCTCTCATTTATTCAGTTTC +TCGATTTTCTTCCAAATTCATTAATATTTTAATCTATGCGTAAAGTGGAG +GAAGATTTCTTCTACGTTTTGATTTGCAATTTCTCGGGTTTTTCAAATTA +TGGATTTTGAAAAAAAAAATTTCTGAAATTTATTTTTGATTCATGTCAGA +AAATGAGTCTCCTGAATTTTTTTTGGAAAATATAAAACAAATTCTGAATT +TTTTTTACTTTTTGATGTGTAAAACAAAAAAATTGATTAGAAAAATCTTT +ATCTATTTTTTGAACTTTTCAATTTTAAAATCAAAATTAACTTCTAAGAA +ATAAGATTTTTTTCGGAATTTTGAAAAAAAATGTTGTTTTTTTGAGATTT +TAGTACATTCAATTTTCGTAAAATTCGAATTCATTTTTTTATTGTGCAAA +TGTTTTCTGACTGACTATGAGAAAAATTTCAACATTTTGAAGAACCCCCC +CCCCCCCCCCCCCCATTATTTGGACGGAGCCTGAATACGAAATGTGAGTT +CTCAGAGAAGGAGGTGAACAATTTGATTGATGTGCATATGGAGATGCTCA +TTGTGCATATCAGTTTGGAGCATCTATCTCAGAGTGAGCATGACATTTAC +TTTTACCCATTTGCGTAGGCTCCGCCATTTCACGATTTTTTTTTTCCAAT +TTTTGAACTAGTATAAAAATTTCTGAAATTTCTGTAATTTCTGCCATCTC +CTCATAAATTCCATTGTATTCTTTGCACTATTGTTCCACAAAAGTGCAAA +ATTTTCGAATTCGTAATGTGTCTTTCATATATATATCCATGCACTTTTGT +CCCTTTTCCTGAATACTAATTTGGTGTGCTCCAAGAGGAAATTTTTTTGT +TTTGTTGAAATTACTGCTCCTTTTCGGAATTTTTTAATTTCTGAGAAAAA +TTCAATTCCTAAATGTATAATACTAAAGTCAAAATACTAAAGTGTAATGG +TTTGTTTTTTGTTACAGTCTTTAGTTTGTAGCTTTTGTTTTGTGTAGTTT +TCCAACTTTTTTTTTGCCAAAATGAAGGCTGAGAAACCGAAAAAAAAAAT +TCAAAACATAGATTGGGCTGAAGCGGTATTATTGGTTTACTGTAGCATTA +CTGGGTTACTCTAGTTTGGCCCATGTTTGGTGTACGTTAGTAGTAACAAC +ATTTCGTTCATTTGTTTTTTGATTTGGTAACTTCTGGTGATTAAACTTTT +AATGTACCTAAATCTTCTGTTGCATTATTGATTTGTTGTTTTCTTCTTTT +TAAGTTAAACATGCTCAAATTAAATTCAAATTGAGTTGAAAATGCAAAAT +TATTATCAACTTGAATACCCAAAATTTTGGACGGTATTTCAAAAAAATTG +ATTTCCAGCCGCTTCGACCTTCGACACAACAAACGAATGCTAACTTTCCC +AGGATTACACAAAATAGGTCAAAATCCCCTTTTCTCAATTTTTTGCACAT +ACATCCTCATCATCATCATTGTAATGATCGTGACCATCCGGTCTTCTATT +ATTAAATGAGAGAAGGAAAGAGTATACATATGTGTCTTTTATATTAAACT +AATTATATTTCGACACATTCTGCGCATATCACTACTGCACTAGAATTACA +TTTTAAGAATAAAATGTAACCAAAAGACCAAAAAGTTGTTTGCATTTATT +GTTATAAACAGAATAGAATGAATGTTAGGAAACGTTATTAAAACTTGCAA +AGTGACACAATTCGGTATATCGATTTCTCTGAAATTACTTAACTTAGTAG +GCAGAGTTTTGATACTGATACGGATTTTACTAAATTTAATTTTTGTAAAT +CAGTATTGCTTTTATAACCTATTAAGATTTTTTTGTTCCAGCGTGCAATT +ATTTTTAATTCTATTTAGGATTTTAATAAATAGAAAAATACGTTTTGAAT +TATTTTTTACGTTATAATATTCAATAGTAAAGAAGCACACATCAATTCCC +CTATTCAGTTTCCATACTTTGCCTGATAAACATGAATAGTAAAATAGACA +CTTTCATTTATACAGACACCCCCCCCCCCTTGCTTCCGGATTAAATCCCA +ATAAAGGGTGTGGCAATAAGGCATTGTTGCAGTTTCATTGAATTTATTAA +TCATTTGACAGCATGAAATTACAAAACTGGAGAATTTCTAGTTTAAACTT +TTATACAATGAAGAAATCTTCCATAGAATCAATATTAAATTATCAATAAA +TGAGTATTTACGTTTTTCAGCAACGGTATTATTTTTATACTTACTAGTTA +TTAAGTTAGTAGTTAGTTACATTTTTAAATGCGATGCAGTCGAAATTTTT +TTATAGACTAGAAATTGTCTTAGTTTCCAGCCACTGACAAGTCGGCCAAA +TTTCAAACTTTCAGTAATTTTGCGTCATTTGTTGAGCCGCCATAACTCTT +TTTTTGAGACGTTTTCAAGAAGTTGAAATTTGGTATTTTCAGACATTTGT +ATCTTATTTAAAATATGAGTATTCACTCCTTTTACCTGCCAGTCGGCTGC +CTCCTTATCTCCTATTTTTCTCTAATGAGCACACACCACTATCATCATTA +TTATTACCACCTGTTCTTAATAATAAACATTTCCCGCACTTTATTTCCAT +TTCGATGCATTTTATAGAAAATGTTCCCCCTTTTCGAGCTTTCCAATTAG +ATTCACCCTTTGATTGCTCTTTCTTGGACGGAAGTGTTCCCCAATTCCCC +TTCATCTTATTACATTTTTATGCAGATTTTATTGCATTTTAGTGCGCTAG +ATTTTTCATGGGATTTTGCTAAATAATGCAATCACGCTCTAAATAAAATT +CTCAGTTTATATTTTAATGTTAAAACTGTGTGTAAAATTTACAAACATAG +TGAATACATTTTTCGAATAAAAGTATGAACATTTTTCGAGGGATCTATAG +AAAATTGCCGGCTGTTCCTAAATGCCCTTTCACTGGTCACCAAGTTCCTG +TTCATGCAATTCAAAAGAAATGTTCAGCCCCCCGCTCAATAGTCGCCCGC +ACCACAAAATTGAAAAGCGAACATTTTACTAGTCGAACTTTTGAATTGAA +CATTTTTGGAGTATTTTAAAGAAATGTTTTCAAGTCCACCAAAACATATG +CCAGGCCACAAAAATTTCAATTTTCCGCCAATTTTTTTCTCACAAAAGTT +GAATTTTCTCGAATTTCCGATAATTTTCGATTTATTTGAAAGTTATTGAA +ATTTATATTTTCTAAAACACTTCCACAATAACAAATTAATAGATATAATA +AAATTTGAAATTCCCATGCCACACCTCAAAAAGTGGCTCCCTTAACTCCT +GAGTGCTCGCGGAGCCCTACCAAACTTGGAACATTCTTTTCCCAACGGAG +TCATGAACATATGAGCATTAAATGAGTAGACGACATGTGTATACATAGGA +CCACCACAGCTGTTCCTTCCTTTTTATGAGAAGACATGTCATACACACAA +TGTCTCTTCTCCTTTCCGATTGGAAAATTAAAATTATAGGAGGGGGGGGG +GGGGGGGGCGATTCTTGACATTTCAATTACCCCCCCCCCCCCCCCACATC +CGTTATGTTGCATTTTGGTAGGCTTGGGGCCGAATAAGTCTCCCATGCAG +GCGTCAAAACGTCTGCGGACCTACGCCTGCCTCACGCCGCGCCTGCTGTA +CAGGAACCCGAAAAGTGTCGAACGCGGCTAAAGAACCACTTTTCGCACTA +TGCGGTGTAAACACAAGGTTTTGATTTGTAGTCATAGTGCATATATGATG +CGGGAGGCTGACAGGTAGGAGGCAAGCGTTCAACCGCGCATGCCCACCAT +GGAAGCCCTAGAGCCTAGTTTTAGATGAGTTTCAGCATTTTTGATCTACA +AGATTTTGAAAGTTTTACAAAAAAACCAAGATTTTTTGTAATTTATGAAG +TGATTTTTCTACATTTCATCTAGAAACCAGCAGAGTTATTTTTTGGGTAA +TTTATCAATAGAGCGCGTTCGCGTCATGAAAACAAAATGGCGGCCGAACT +TTGAAAATCGATGGCTGAATTTTTCGGAACAAAAGTCCGTAGCTATAAAG +CACAGGCCTACGTTTTTTTTGCTTTTAAATGACATAGTTTTCTAAAAAAA +AGTCTGTGGCCGAATTTTTAAATTTTGAATAGGGGCTTATTTCAAAAAAA +ATTTGAGGCCAACCTACTCTTCTCAATTATAACTTGCCATGGCAGAACTT +TTGAACAACTATTACAACGTTTTCAAAAAACAAGTTAAAGCATTTTTTCG +AATAAAGGTTAAATTGGTAGCCTAGTTTTCTCAAAACAAAATACTAAAAA +AAATATCGATGACCGAAGTTTATATTTGAAAAAATATGTGACCTAGAACT +TTTAACTACTCTTCCAATGTCTTCTAGCTTCTGAGAAAAAGTTCGTGGCC +TAAGTGTACCACGATGGTCTAGTTATCTCGGAAATAGCCATAGCCTAGTT +TTCTCAAAAAAATCGGTGGTCGAGTTTCCTGTATGTAGGCATGTAGGTAT +GTAATTTTTATCTTCCAGGTATCTATTTGTACTTCCCGGTAGCCAGGAGC +AATACTTTTGAATTCCTTTGCTCTCCCTATTCAGCTTCTCCCCTCACACT +CGTTTTTGGTATTCTATTGTTCTCCTCCTCTTCCCATTCTACCCCCATTT +CATTTTCCTCTTGGAGACCCCTGTCTCTCTCTCTTTGTATTGGCTCTTTT +ACGTCTTCTAGTAGGCTTTTCCTTAATGAACTTGTGTGTGTGTGCTTGAA +TCATCATCGCGTTCTCTCGTTCCATTTCCCTTTTGCAATTTTCTTTTCTT +GGAGAAATTTGGAAAGTTTTGCTCAAAATTCCTACGTACACATGCCTCAA +AATCTGGTTGTATATGGCCTAACATTCAATTTCTAGTGCACACAATAAAA +TTTTCCGGGAAATTCAAATTTTCTGATAATGAGGAAGGACTTTGGCGGAA +AATTCAAATTTTAAAAAAGTAGGCGGGAAATTCGAATTTCGAATTTTCTA +AACTTTTTGGCCGGAATTTCAAATGTTCTGAAAAATAATTTTGGAGCGAA +ATTCATATTTTCTCAGAAAAATCTCAGAAAATTTGAATCTCCCACTAATA +TTCTTCTCAAAAAACTGTTACGTGACCTATTTATATCTTATTTATTGGTC +CATTTATTACGTGGCCTAAAACATTTTTTCTTTTCCCCCGTTTACAGCGA +TCATTTGAGATGGCCTAGAAAACGCACGTTAGGCCATCAGGTTTACGGGG +TATAGTACAACCTCTTGCAGGGGACTGAGGTTCAGAAAACTACAAATTTA +GCTAATTCCCTACCTACTCTTGCAAATCCAATAACTAACCCAATTTCCCG +GGGAAATTTTTATTCTTAGAAAACAACTTGTATATCTCCGCAACTACATA +TCCCTCCTTCAAAATAATAATAATGGGAAGCTTCTGCTTCTCTACACCTT +CTTTTGACCAAAAACTTGTTGCCAGTGCCCTATTATGTACGGTCCGTTGA +GTTTTGTTGTTGTTCACAATAATATGTGTAATTCGCGCATCTGAAGAGGG +GAGGTGTAAATGTTGTGTCTAACCGTTATGTTGCTGTCACCTCCAAGTGG +TACAGGTGGCCTAGAAATTTGCGAAAACTCTTCCACGATTATATTTCTCA +AAAAATTACTTTTCTCAGAAAATTTGATTTTCTCACCAAAATATTTCTCA +GATAATTTGAATTCCCGCCAAAATTGTTTAAAAAAATAGAATATCTCGTC +AAAATTTTTATCAGAAAATTTGAATTCCCCACTAAAATTTTGCCAAAACA +TTTTATTTTCCCACTTAAAAAAGTTTTTTTTTTAATTTGAAAGTGGAAAA +GTTTTCTAGGTCACTTATACATTACCGGTGACCGAGGTCGAATTATTCCT +AGGCCACAAATCTCCCAAAGGGGTATTCCCACACCTAATTGACACCTCCC +CGGGCACCTTTGAGCACTAAGTAAGGACACCCACCCCTCTCAGCTTAATA +GCTTTTAAAGGAGCACACCAGCCATATGTTTTCTCTAATTCTCTCACTCT +CTCTCTCGGCTTCTAAATTCAATTTCCCCCCGTCACGCCTCTCGTTACGT +GAGGAGTGTTCAACATGTTCCCATTCCCTTATCCGATAAGTTTTTGGTAG +GCAGGAGCTTGTACTAGTAGAGGACATTTGTCGCTTCTTGGTCTTTCGCC +TCCATTTTCCGTAATTTATACGTGACTTAGAAAATTAGAAAAAAACGCAT +TCCACGAGTTTGTTGGACAACTTTTTTTACGTGGCCTAGAAAAAAATGAA +ACTCGGCCACCGACTTTTCTTTCACTTTTGTGAAGATATGATCTCATTCT +ATAATTCTGTCGAACCGATTAGGAGGGTGGCTGAGTTTGCACTTTTGCGT +GTTCTATACCATTTACAGAAGCTAGTCCCCCCGTCATTCTTTTTCCTGCA +AAATGTTTTTTTTTGCCCCCGGTGTCTCATCATACAGTACAGCAGCCAAT +ATTATTTGTTTTTTGTTTCGTCTTTTCGTCTTTTCGGAGAAAAAACGCTC +TATTCTATGACAAGGTGACGTCATTGGACTCGTGGCCTAGAAAGTTTGGC +AAACTCTTCCACCATTCGATTTTACAGGAGCCTCAAAGCCCCACATGGGG +CCTAGAAATCTTGACATTTCTAAAAAACATCCATTTTGAAATTTTTCGAA +ATTGGTGCAACCTAGCAACCTCTAGAACTTCACGTGACCTAGAAAAAACA +AAACCTCAAAATTTCATGTGGACTAGAAACTTTTAAAAACTCTTCCATCG +ATTTTGTTTTGTGGCGGGGACTTTTTCCAGTTTCTTTTTCACATAAAAAC +CAAAAAAAAACAAGACGTCAAGCTGTGAAAATGATTAGTCACAAAAAGAA +GAAAAACAGTAGTGTCGTTCCTGCCAATTTCAGAGATTCCTTCTTCGCTT +CTCACGACACGAGGGGGGGAGGGTCTAATTGGCACCCCCCCCCCCCCCTA +CTGAGGTGAGAAGCTTGAGTTGACTCACACGTCGCTCTTGTTTTATTCAT +TTTTGGCACAAAGTGAAGTGTGCGAAAATTGTAGTAGTCTGTGTGTGTGT +GTGTGTATGAGCATTTCGAATTTTAAAATACTTTTTTAATGTACCGCAGG +CCACGTGGTAAGTTTTTTTTCATAAATATAGGATTTGTTTTCTTGTAGAA +AAGTTTTTCGAATTTTGAAATGAAAAGGATAACTGGGAACGGAGAAAAAT +TAAGTTTTAATTTCTAGGCCACGTACCAATGAGGATGAGAAGTCGGTTTT +CTAGATCACAAGGTCATCTAAACTTTTAGGAAATTTAAAAAGTTTGGTGA +TTACGAGAATTTCTAGGCCACGTCGCAATACACCGGCGCGTTGGCGTTTT +TACACTTTCTAGACTACTAAGATTTTCCTAGGCCACGTCCCAATCAGGGC +TGTTGCAAAAAAAAATTTTCGACCAAAAAACAAAAAACAAAAAATTTTTT +GGTCTAACTATGGACCAAAAAACAAAAAACAAAAAAACAAAAAACAAAAA +AAAATTCAATGTTTGATATTGAACGGGACAAAACGGAATTTTTTCATGAA +ATATTCTGGTGATTTTTTATAAAATATTGAAAATTGAACGAACAGAAGGT +TTTTTGTTAGCTTTTTTGCTGAATCAGTGCTGTAATTTATTTTTTGGTTG +TTTTTGGTTTTTTTTTTGGTCTCAAAAAACCAAAAAAACCAAAAAATCAA +TTTTTCCCAAAAATACCAAAAAAAACCAAAAAAAAACAGCCCTGGTCCCA +ATGTACAGGTGTTGTGGTGCTTTTTGTGAGCTCTTAGGCCAGCTGGAGTT +TTCTAGGCCACTTGTAAATCAATGGTTTTTTTTCGGTGTTCTAGGCCACC +TGTCACGCCTAGACCATTGACCTAGAAGTTTAATGTTTTTGAATAATAAG +TACACGTACTCTCACATACCCGCAAATGCACATATCTATCTTCATTTCGT +GATTCTCTTCATCTCATCTTATTTTCCATTTCGGTTTACCCTTTTTTTCT +CTATGAATATATTTTCATGTACTTGCTCTTCTTCCCTGAAAAAGATTCTT +TTACACTTTTTTCATTTCCTTAGGCAGTTGTAAAGAACTTGTCATTAGAA +CTTTCAGAATGACATCAGAGTAGATGGCCGAACTTTTTCTAGGCCACGGC +TTGGGAGCCAAACATGGCTAAACATTCTAACTTCCTGGGTCATCTAGAAG +TTTCTAGGTCTTATGGGTGAAAATTGTCAAATGACAGAATTTTCTTAAGT +TATAGTTGAATTTCCCGCCAAACCGATTTTTTTTTGAAAAATTTAAATTT +CCCGCCAAAAATTTTTTGAAACGTTTAAATTTTATGCCAAACTTTTTCTG +AGAGAATTTGGTTTCGCTCCCAATTTTATTCTCTGAAAATTTTAAATTCG +CACCAAATTTTTCGAGAGCTTTTTGAAGTTCCCGGCAAAATTTGTTTTCG +AAAAATTAGATCCCCGTCAACTATGATAGAGAAGTTACGTCATCTGGCTA +CCCTGGCCGAATTTTCAATATATTTCTAGGCCACATATTGACCGCTAAAT +CATGTTGTGGAAGAGTAGTGACGTCACTTGGCGACCGCGACCATCGAGCA +GCGTGAGGGGCGAAGTATCCCCCTTCCCAACCCTAAAGACGTCGTTCATC +GCTCCTTTTTTGTTGTTGTAGGAGTTCCGTTTTTTTGTGTCTCGATTCGT +TTCCTCCTGTTTGTTGTCATTTCTGAACTCATCTCTACACTTCTCTCGTC +TTCTCTGCGTCTCTCCATCCTGCACACTATTTTTGTTTACCTCGATGATC +TTTCTTTTTGCGTGTGTGTCACTGACGAACGAACGGCTGACACAGACTAT +TGTTTTAGTTTTGTTGTTTAGAAGTAGTTGTAGTATTCCAATAGTTTTTC +ACACTTTGCATTATTTCAGATGGCTCAGGACGGAGACCGCCATCACATTT +TTGTCAGTGCTCCGCCGCCGCCGCCACCTTCCTCGTCGATTCCAGCTCCA +TCGCAGGTAAATTTAAGGTTGCAAGCGCGCTCTAATGACCTTTTTTGTGC +CTCCGGCTCCTCTTTGGGTTATTGCCAACGCGCTCCAATTCGAATTATTC +TTACTTTCAGCCAAATAGACTGGAAAAGGAGCCGCTCATCGATGAGGAAA +CTGACATGATTGACGAATCCCGAGCCACGTATTGGAAGGGATGTGAATTC +TTGAAAGCCTCTGGGTTGTACTCGAGTAAATGTGAGTTGGTCTAGAAAAT +TTGTAAAGCTCTTCCACGAAGTAATTAATAAGGGATACTATTGCCGACAC +CCAGGAACACGTATACCACGGAATTCAAATTTTAATTTTTTGAAAATATT +TTGGCGGGAATTTAAAATTTTAATTTTTTGAAAATATTTTGGCGGGAATT +TAAAATTTTAATTTTTTGAAAATATTTTGGCGGGAATTTAAAATTTTAAT +TTTTTGAAAATATTTTGGCGGGAATTCAAATTTTAATTTTTTGAAAATAT +TTTGGCGGGAATTTAAAATTTTAATTTTTTGAAAATATTTTGACGGGAAT +TTAAAATTTTAATTTTCTGAAAATATTTTGGCGGGAATTCAAATTTTAAT +TTTTTGAAAATATTTTGGCGGGAATTCAAATTTTAATTTTTTGAAAATAT +TTTGGCGGGAATTTAAAATTTTAATTTTTTGAAAATATTTTGGCGGGAAT +TTAAAATTTTAATTTTTTGAAAATATTTTGGCGGGAATTTAAAATTTTAA +TTTTTTGAAAATATTTTGGCGGGAATTTAAAATTTTAATTTTTTGAAAAT +ATTTTGGCGGGAATTTAAAATTTTAATTTTTTGAAAATATTTTGGCGGGA +ATTTAAAATTTTAATTTTTTGAAAATATTTTGGCGGGAATTCAAATTTTA +ATTTTTTGAAAATATTTTGGCGGGAATTTAAAATTTTAATTTTTTGAAAA +TATTTTGGCGGGAATTTAAAATTTTAATTTTTTGAAAATATTTTGGCGGG +AATTTAAAATTTTAATTTTTTGAAAATATTTTGGCGGGAATTTAAAATTT +TAATTTTTTGAAAATATTTTGGCGGGAATTCAAATTTTAATTTTTTGAAA +ATATTTTGGCGGGAATTTAAAATTTTAATTTTCTGAAAATATTTTGGCGG +GAATTCAAATTTTAATTTTTTGAAAATATTTTGGCGGGAATTTAAAATTT +TAATTTTTTGAAAATATTTTGGCTGGAATTTAAAATTTCTGAGAAAAAGA +ACCTTCGTGTCGAGACCCATCGTGGTGAGACCCTTCGTGGTGAGACCCAT +CGTGGTGACACCCATCGTGGTGAGACCCTTCGTAGTGAGACCCTTCGTGG +TGAGACCTTTCGTGGTGAGACCCATCGTGGTGAGACCCATCGTGGTGAGA +CTCATCGTGGTGAGACCCTTCGTGGTGATACCCATCGTGGTGAGACCTTT +CGTGGTGAGACCCACCGTGGTGAGACCCATCGTGGTGAGACTCATCGTGG +TGAGACCCTTCGTGGTGAGACCCATCGTGGTGAGACCTTTCGTGGTGAGA +CCTTTCGTGGTGAGACCCATCGTGGTGAGACCTTTCGTGGTGAGACCTTT +CGTGGTGAGACCTTTCGTGGTGAGACCCATCGTGGTGAGACCCTTCGTGG +TGAGACCCATCGTGGTGAGACCTTTCGTGGTGAGACCCATCGTGGTGAGA +CCCATCGTGGTGAGACCCATCGTGGTGAGACCCATCGTGGTGAGACCCTT +CATGGTTAGACCCATCGTGGTGAGACCCTTCGTGCTGAGACCATTCATGG +AGAGACCCATCGTGGTGAGACCCTTCGTGGTTAGACCCATCGTGGTGAGA +TCTTTCGTGGTGAGACCCATCGTGGTGAGACTCATCGTGGTGAGACCCTT +CGTGGGGAGACTCTTCGTGGTGAGACCCTTCGTGGTTAGACCCATCGTGG +TTAGACCCATCGTGGTGAGATCTTTCGTGGTGAGCCCCATCGTGGTGAGA +CCCATCGTGGTGAGACTTTTCGTGGTGAGACCCATCGTGGTGAGACTTTT +CGTGGTGAGACCCATCGTGGTGAGACCTTTCGTGGTGAGACCCTTCGTGG +GGAGACTCTTCGTGTTTGATATTATATTGGCGGGAGCTAAGGAGTTGGTG +TGGGATAATGTCAAGGTACTCTAGGGGTATTGTGGATTTGCTGTAGGGTT +ACTGTAGTTCAGAAAAATTGACTTTGTGCTTTTGAAGAGGTATTGGCTCG +AGAGTTAGAGAGGCGGAAGCTTCTGTGATATTTTTTACATTCCAGAATGT +TTGAAAAAGTAAAAGTTTCCACCTGTTTTCATGTGACATTGTTAAAAGAT +TATATGGTGTTTTATGTGTCGAACAATTTAAATTTCGCCAGTCGTTGGCC +GCGCCGTAGGCGCGGTCAGCGGCTGGTTTTTCCAGAAAAGGAATAAGAAA +TAAATTATTTGGAAGTTACTAGGACCCTGAATAGAATTCTAGGCCATATC +GAAAAAGGGGATGGGTGCACGTGGGAACCTTTTTTTGGATATTTTGGAAA +GAAGTAAGAAGTAAGAAGTAAGAAGTTGACTTAAGAAGTTGACTTTTCTA +GAGTAAGAAGTTGACTTTTCTAGAGATCATCTCCCCTCTATTATTCTCAG +CGTGACCTAGTTATCCGGAAAACTCTTCCACCAAAAAATGAGTGTCTCCA +ACTGAAAAATGCGTCATAAAACACACGGAAAAGGTACTTGTGTATTACAT +GCCCTCATTTTAATAGTAGTTTTTTAAGCCATGAAAAATAAAAACGTGAA +CCACAATTTGTATTTTTAGGCTTAGGAAATAACATTTTCTAAGCCTAAAG +AGGAAAATGTGTAATACACAAGTACCACGGAAAATTTTTTGAAACCTTCC +GAATTTGAAATAATTTAACTAAAAGTTTCTAGGCCACGATGAAAATAGAA +ACAATGATGTCCCGATTTTGCTGAAAATTTCTGTCCATAGTTATTTAGAG +TTCTGAAAAAAAAGATTTTCGAGAAATGATTTTCTAGGCCACCAGAAAAG +TTTCTAGGCCATTATCCCTTCTTGATCCTTTATCATTATGATTATGATAT +TTTTTAAACGATTTTTTAGGCCACCAGAGGGAATTCTAGTCCGTGTCAAA +AAAAAAAGAAAGGGGGTATTCTTTCCACATAAAAATCCCGTTGGCCTTTT +CAATCAAATTTCTTCAATTTTTCGCATAAAACTTACCTCCCGCTGTGACA +AGCCTCCTCCCGTCTCCCAGTATTCTCAATTCATTAATCATCACCCCCAA +TGCCCTCATCAACTACAGTATCCCTGCAATCCGCGTCTGCAAATATGTGC +CGAATATGTCATACTTCTACGTCAACGCGGTCAAATCCGCTCATCTCACC +GTGTCGATGCTCTGGAACACTGCTCTTTGTGCATAAGGCGTGTGTTGTGG +TGAGCTCTTTTTTTGTAAAATTCAAAATAATAATTAAAAAATTTAAAAAA +AAACCTTTTGTGGAATTTGCAATTTTTATAAAATCGTTAAAAACCAAATT +CAATTTATACAAATTTCCACATTTTCCAAAATAAAAAGAAATAGCCATGC +CCACAAAATGGGCGGCGCTTTCATAGCCACGTCCATTTTGTGGGCGGAGC +CTCGGATCAAAGTTTTTTCAAAAACTTACTATAAGTTGAACACGGATTTC +TGGCTTCCTTCATAAATCGAAATGGCAGAGTTTGCCGAACTAGGCCATTT +TGGGTCGGAGAGATTTTGTGTAGATTTACGGCGTGTTGCGTGTCGCGTCG +CGGCTCGTTTTTAGTTGTAAAACTGCTGTATTTGTCCGTGTGGAGTACAC +GATTTTTTCCACGCGCTGTCCGGCAGGCGATTGTCAATGAGGAGCGAAAA +TTCAATGCGAAAGGCCAGAAGCCCGTGGTTGAAGTGTGGTTTCAATATTA +CAGAAATTTTAAAGAGGAATAAACATGCCTGCCTACAAAATGGGCGGAGC +GTTGGAGGCGTGGTCTAAGAAGTCACGCCCTTTTTTTTAAATACTTGTTC +TGATGTACAATAGAGCGCATTTACATCAAATTAATAATTTGTTTTTCAAT +TTTTCCAGAGATGGCTGGAAATGTCGACTCGAAAAATGGTCCCATCACCA +CGTTGCGAACTATGCGGCTACGATTATCGACGTGGAAATATATTTCAAAT +GAAATCTTTACATGTTCCACACGTGGATCGATCATCTTGTCTGCTCAATG +TGCTCTTTTTGATTACCGTATTAATTATGATATTTTGTGGATATTTTACG +ATTCAATTCATTCAGGAGAATGCACTCTTAAAACGAAGGTGGGCGAAGTT +TTCTGGTGATGACCTAGGAAATTAGAAAACTAGGCCACCCCGCAAATGTG +GTGGCCTAGAATACCGAAATTTGACTAAGCTATGGCCTGCGGCCTAGAAA +AATAAAAGTAGACTGTATAAAATTCTCATCAAGCCTTTTGGAGATGAACT +GGAAACCCAAATTTTTTGGAAGAAAATATTGACCTCCTTTAAAACGGTGG +CCTACTTTTTCTGATGGCCTAGGATTTTAGACAAAGAAGCCACACAGCTA +CTATCCACATAGTTTCAGGAAAAAAAATAAGAAAAATAGGCTCCGCCCAC +ATATTCTTTTGAATTAGAATATACACGAGCCATTTATTTTGCTAAGTTTT +GAATGTTAGCTCCGCCCACATAATGATTTGAATTTCCCGCCCATTTTTCT +CTCAAAAATTTGAATTCCCTGCCAAATTTTTTTTTGCGAAAAACTCAATA +CTTTCAGGCTCTTCGCGCACAGTTCGACAAACACAGCGTACACGTGGAAA +CGGCGTGGCTATTTCAGCGGAAACGGGAATGGAGACGGGGACGGTAGTTC +GGATGGATATTTCTCGCGTACACCTGTCTCATCACTTTTTGACATAAAAG +TGGTCTTGTGTGCCAGCATGTTCTTGGTCTCGTTCATTTTGGCCCTTTTT +ACACAGTATAAGTGAGTTTTTTGACACGTGGCCTGGAAAATGAGAAAACT +CGGCCACCGATTTTTTCAGCCGAATTTTTTTAGTTTAGCGGTCACACGGG +CTTCTGGCCATTCTCTCTGCATTTTTCGCGCTCCATTGGCATTGGCTTGC +CGGACAACGCGTGGGAAAGTCGTGTACTCCACACGGATAAATTTGTTCAG +TTTTACATTGAAAACCGAGCCGTGACACGCCGTAAATCTACCCCGGCCGT +GGCCGAGTCAAAATGGCATAGTTCGGCAAATTCTTGAATTTCAAAATATG +ATGGAAGCCTGAAGCACGTGCGGTCATAAAAATCAATAAATTATCTTTTT +TTTCCAGAGCCGAGTCCACGATATTCCGATGTATTTTCCGATTTTTTGTG +ATCAATAAGAATTGGATGATCAAAAACTACGATATCAAGTGAGTGGGCGG +AGTGTAATTTCGGCGGGAAATTCAAAATTTTTCAAATTTTTCAGAAAAAT +TTCGCGCAAAGTTCAAAAACTCAAAACTTTCGGTAAATTTCGGCGCGAAA +TTCAAAATGTTAAAAATTTTGGGGAAATTTTGGAGGGAAAATCAATTTTA +AAAAACGTTTGTTTGAAATTTTTTTAATTTTCAGAAATGATCCCGAAATG +GCTCACCGCCGCGGTCTCCAGAAGTCTTCACCTGTTCCGTTAACCTTATC +AGCTTCTGATATGTGTATCCACGTGTCATCCTAGTCAACAATTCAACGAA +AAAAACCATTCCAGTTACAATTTCTCATTTCTTTTCACCACTTTCGGGTT +TTTTGCAAATTAAAAAAATTTCGATTTTTTTTTTGAAATTTTAATTTTCC +AATTTTTATTTTTATTCCACAACTCAGGTTGCTCAACAGTGCCTAAAATA +AATTTTATTTCTTTGATTTTTTATAAATTTACAAAAAAAATACAACAAAA +AAATAATATTACACATTCGTGAAAAACGGAAAAAAAATTCAGCCTATTTA +CATTAAAACACGTGGAATTTGAAATTTTAGCGATATATCGATTACCCTTT +CTCGGAAAACTCGTAATAAATGTGAAAAAGGCGAGGAAATTGGAAGAAAT +AAATGCGGGGGTTTCTAGAAAAAAAAATTGGGAGAAAGTTTAAAGAAAAA +TTCAAAATAATGCTCCGGTGTCATCGTGTTCCACAGTGGTTAGATTCTGG +TATTTTGTGCATGGTGTGCTCTGAAATTTAAAAAAAAAGTAAAAAATTGA +AAAAAAAATTTCTTAAAAATTAAAAAAAATGAACTTTTTTTTAAAATTTA +TATTCAAAAATAAAAATGTTTGGTATACCTAATTATTCATACTTTTATCA +ATTTTCGAGAAACTTCAATTTTTCAACTGCTAACGTATACAACTCGCGTT +ACTCGAAAAAGACCTGGGGACTAGTTTCCCACTTGGTGGCCTAGAAATCT +CCAGTAAAAACATCGACGACCGAGATATGAGTTTCTAGGCCTCCATATGA +AAAAAAAACTCGGTCGGGGACTAGGTTTCCACATGATGGCCTTGATAACT +TCAAATTTTTTGGTGACCTAGATTTTTCAAAACTTGGATTACTAGGCCAC +TAAAAGCTTTTCGGTGGCCTAGAAACCAAAATTGCGTGAATTTTTCATAA +TTCTATATCAAAATTGACTAAACCGAAAAGAATGCGATGCTTTTGCAATA +TTTCAGCAGTTGGTGTATCCCACGTGGCGACGGGATTTCGGGCGACCGTG +CCATTGATTAGAGCCAACGGAAGATATTTTGAGCATTCGGCGGTGCTCTG +CAAAAATTTTTTTTTTGCAAAAGTTAAAGACGCATGGTGGGTCTTGATCT +ATGAAATTTCGTGATTATTCGAAAATTTTTTGTCCTTATTGACAATTTTT +TTATCCACAAAAAAGGCGGATGTCCTAATTGCCAAAAATGTGGGTTTCTA +GGCCACGAAAAATAATTTTTTGCTTCAAACATCCTGCTTTTTTTCCAAAA +TTTAGCTTTAAAAAGTCGGTGGCCGAACTTTATTAAAAGTTCAATTTCTC +GGCCACAGAAACTTTGTTTAAGTTTTCACATTTGGATTTCTAGCCCACCA +ATAACGTTTTTATTTCATATTTTTATAAAAATCGAATTAAAAGTAGATGA +TGACCTAGTTTTTTTTCAATAATTTGGGTTTCTAGGCCACCAACTTTTTT +TAAAAATTTTCTTAATTTTCCATAACCAAAAAAAAAAAACAAAATCGATG +GCCTAACTTTTTGGCTTCCTAGGCCACCAATAACTCTTTTATTACTAATT +TTCCTCACTCTCTGATGTCGATAAACACTTTCTTGTCTCGAATATCGCTT +CCCACACCCATCACACTCAAATGCTCCATTCGTCTCATGAGTGAGCAAGT +GTTGATTGAGCCCATGCTTTTGCCGGAAAGTTTTCTCGCAGTCTCGACAT +TTATAAGGTCCATTGTTGGGCTGGGCCTCGTGAGCCGACGACGGGGCCGC +CCGTTTTTTCGCCCTTTTCCCCGATTTTTCCGGTGAAAAAGTGTCGAAAT +ACGCGTTATTGTCATCTAATTTTGGTACATTCGCCAACATTGCTAGATTC +TCCTGAGCCACCTGTTGTGACGTGTGAGGAATGTGATGGTGCTGATAGAG +GTCTGGTGGTGGCTGAAAATGGGGAATATTCCCGGCGCCGAACATTTTCA +GAAGTTCCAGGTCGAATCGTTGTTGAGCATCCGCGTCGACTACTTGATTG +AGGGAGACGGTGAAGTCAGTTGAGGAGGTGGATGGCAGTTTTATAGAGAA +ATCCTGCAAAAACTCAGGTTTTTGCTAAATTTTTCAATTTATCATTGTAA +TTTCCGAAAAATTATAGCTTAATTTTTAGATTTTTCGCTTGATTCTCAAG +TTTTTATGCGAAAATTTAGTTTTTTGTCAATTTTTTTTTGTTTTAAATGT +AAAACTACGGTATTTAAAGGGAGGAGCACTAATTTATGCCGATGGGTCTC +GCAGCGAAAACATTAAATGCCTTAATTTAGCCATGCGCCTGGAAGGTTAC +TGTAGTTTTTATTGATTGATTTTTCATTCAATTTGTGGTTTTTGAAGGTT +TTCAACTCATTTTGAAAGGCTTTTATAGATTTTTTTTTCTGAAAATCAAA +TTTAAATGTTAAAATAATTTTTTTTTTTTTTCACACAAAAATAACAGAAA +TTCCGAAAAATTGCAATTTAAACAGCTCTAAAATATGAAAATTAACGCGA +TTTTTATCATTTTTTGCTTCAAATCGCTGACTTCTGGCAAAAAAAAGGAA +AATTCAGCAAAAATCAATTTTTCAGAAAAATTTGATTGATTTTTGGAATT +TTTCTTACCAATCCGTCGTCTTGCCGTAAATCCTGTAGCTTCATCTCCTT +AGCCAGCAGATTTCTTACATCTAATTGCACGTGGCAATTGCAGTTCTGAA +AAAAAAATGTTTTTGAGATTTTTAGAAAACTTATTTTGTAAATGTTCTAG +GTGGGTTTTCAAATTTTTTTTTAAATTTAATTAAATCAAATTCTTCTCAC +CGTCAGTTTCTCTTTGACAAAATCGCTCAAATTCTTCTGATTTTCCTGAA +CTTTCTCCACTTTCACGGCCAAATCCTGCTGCTCACGCAAAAATTGTTTG +ACAAGACACGTTAGCTGGTTCACATTTGATAGATTGTCCGGGATTTTTAT +AATCTGAAATTTGAAATTAAGGATTTTAGCGGTTTTTTCGGCAAAACGTA +AACTTTTAACGCTGAAAACTCACATTTTTCGAACTCCGCTCCAATAATCG +TCTAGTGAATTCTTGAATAAATCCGGTTGCTTTCATAAAATTCTGAAATT +CCTCATTTCCAATGAATTCGCCCATTTTCCGATGGAAGAAAAACCAATGG +AAATACGGCAATATTTGCGCACAAAATCGATGCTGAAGTGTCGTCGGACG +CAGTGTTTCGTGTGCAGTACACGCCTACGACAAATTGCGGACTTTTTTGT +TATGGAGAGAGTAGAATAGAGAAGAAAAAAGAAGATAAAATGGATAAAAA +TTGCGCGTTTTTCTCGTTTTTTCTCTGAATCTCTCAATTTTTAGGTTTTC +GATCCATATACGTCCCGAGTGGAGATGAAGCCATAAATTTTTCTCTAAAA +ATTGTTTCTTGCCCCTCCAAATCGCTTATTCGTTTTTAAAATTATGAAAA +AAAAATGCTAAAAATGCTTACCAAACCGCAAATTTTTAATTGGAAAATAC +CCAATATTTTGCAATTTATAGTTTAAATTCAAGAAGGTGCCAAAATTTCT +GCAAAATTGCATATCTTCATATAAATTGTTACATTTTTTGTAAAAACTTG +AAAACCCTGCTTTTCACATAATAGTACTGGAATTTTTCAATTTTCAACAA +TTTCATAATTTTTGACCAATTTTTTATTGAAATTTTCAATAAAATATTTA +AAAGATGTGGCTTTCCAAGCAGATTTTCCAATTTTTCCGAGTTTGAGTAA +CTAAGTTTCAACCAAAATGATGTCACAAGTCAGAAAATTTCCCTAAATTT +CAAAACACAGGTGAATAACTATAGGATTAGAATCTGCAAAGATTGAGTCT +GGAGATAATTTCGGACATTTTTTCGGATAGTTTACGTTTCCTGTAGTTTC +AAATAGGCAATTTTACTTTAAAGGTTGCTAATAAAATTAAGAAAAATCCT +TATATAGTCTCCACCCACAGATTGGCTTTTTAGATATTTTCAAAATATCT +TGTAACGAAAACTACAGTAATCTTTAAAATTAAAATTATTACTGTATTGC +TTTTGTCCATTTACGGGCTCGATTTACGAAATTAAATTTAATAATCTTTA +ATCTATAGAATATTAAAATTAAGCAAAAAATGAGAAAATAATACGAAGAA +ATGAAGGAAAATGGAATATCGGTGTAACTATCCGAAAATAAATTCATTTA +GAAAATCGAGCCCGTAAATCGACGTTACGAAGAATTACTGTAGTTTTCGC +TACGAGATATTTTGCGCGTCAAATATGTTGAGCAATACGCATTTTAAGAA +TTTACTGTTATCGTAATAGTTCAAATTTTTCGAGATAATAGTCTTCCAAA +AAACTATTGTTTAAGGTTTTATTTTTTCCGTAATCTAAATTGATTATCAC +TAGACACTGTTCATTAATCATTCAGAACTTGTATAATCCATCTCGGTTAA +ATTGTTGGTGAGACAAAGTGTAATCTGTTCTAGTTGGGTCACACGATCCC +TTTTCCACTCAATCGCTTACCCACTCAACACTATAGACTAGACACTCACA +CAGGAAATAGTATTGTTTACCTGTTTTCTTTTGGACACTGTTTATAACTT +TTCCCTCTCCGTTTCTCTACGTTTAAATTTGTCAAAAATTAATAATTTTC +TTACTTCCACATTCAATACTCATTTAATGACCGGTATTTGAAAATATTTT +AATTTTTTAATCTTTAGTTATTACAGGAACAGTAAACTCTGGGAATTCGT +ATTGCACAACATATTTGACGCGCAAAATATCACGTTAGCTGGCATAATAT +TAGGCATATATTGTTTCTACAATCTTGTAATGTATAACTGACTAACATCT +TTGAAATAAGTTTGTAAAACTTGTGAGTTGGAAAACATTGTGCATGGATG +TCGTGTTTCGCGCACGCGCTAGAGTAGTCATAAAAAGATTACTGTAGTTT +TCGCTACGAGATATTTTGCGCGTCAAATATGTTGTGCAGTATGCATTCTC +AGAATTTTGTGTTCACGTAATAAATTAAATTGTTAAAAGATATAATAAAA +GTTAAAAGATATAACAGATTTTTTCTCACTTCCCTAATATGTTTAATAAT +CAATCATTTTAAATTGTGCCGAAAATTTTCCGTTTTCCAATTGTTCGGAA +TTTTTTCGGTTTTGCTCGAAAAAACCAAAAAGCAAATTTTTATAAATTTC +CCTGGTGTTCAAAATTTTCCCGCAATTTTTACCTATTTTATTTTCAAAAT +ATTTATTTATTTCAGTTCCCCCAAGTCAAACCGTTCGATGTTGTAGAAAT +GGAGATAATTCCGCTGCATTACGGAGCACAAGTTGGGATTTATGACGAGG +ATCATGAGGTTGGTAGATTTTAATTTGTTTTTTAAATCGTGGCCTAGCAC +ATTCTAGGCCACAGGTAGTGACGTCACTCTGAACATTTTCAATATATGTA +AATTTTAAATTCAAACTACTTAAAACTCGGAAATCACTTTTTCCGTCAGA +TACACAGAAATAAACTAGTTTTACCTTTTAAAAAACTAAAACCATTGAAC +TTTTCACAAATGTTCTTTGAAACTTGAGATAATCTGCAAATCCCAAACTA +TTAAAAACTGAAAAAGGTAGCCTAGAAAATGAAAATTTTCAATAGTTTTT +TCAAGTTCAAAGAAAAAAAATTGAATTTTTAAATTGTTTTTTTTCGCTAC +AAGTCAAAATTATATTATATTTATTAGTTTTTTTCAATTTAAGAACATTA +CTAAAATTAGTGCATATTTAAAAAAAATGTTTGTTTCGGAGAAAACAACT +ATTAGTAATTGCACTGCAATCATCAGATTAGTGAGGAATTTAGTTCTATT +TTTTTCTTTTTTTTCTGTTTCAAAGTCGTTTTTTCATGAAAAATAGACCT +TCGAGCTACTGGTTTTTTGAGAAATATTATTTCAATTTTCAATTTCATGA +TGAACAACATTTTTGGTAACACATTTAATTCTAGTAGAATATTATCCACT +TTTTCTCTAATTTTTCAAAATTTATTCCTAAGTTTTTCTGATCAATGAAG +AACTACCTTTTTACGACAATATTGCCTTCAATTTTTTCTCTCTGAAAATT +CGAAATTTATCTGAATCAATGGTGCCGGTTTTCTTATTTCTGAATAACGA +AATAGATTTTTTATTTATTTTTCAACAATTCCCGGAAATTTCTCTATTGT +TAAAAACAAATGAAGTGAAAATCGATTTTTTTACTTCGAAAGTTTAACAT +GAAATTCGATAAATTTAATCTATGATCTTTCAAGCTCCGCGAGTTTGAGG +CGGGATGTTGATAATTAGAGATATGCCGGGGGAGAACACTACTCTCCAAT +TATATTTCGGTTGCTCGAGAAATATTTTTAATGATAAATACTGGATACTA +GATTCTATAGAAAACTAATTTATTTCAATAAATTTATTATTATTCAGGAG +AATTATATTTTCTTGCTCGAAAGAAAATAAATAATCAAAAACCGACGCAT +TTTCCTGAATTTATAGTTATTCAGAAGAATATAGATTTTTATTTTCAAAT +TCTGAGAATTCAGAAAACATGGCTATTTGCCGGCCTAGAAAATAGAACAA +CTAGGCCACCGATCATTTTTGTTTTGCCGTTGTCAGGCTTCTGGCCTAAC +TTTCTCTATCTTAAAAGATTCCCAAATTCAAACTACTGGCTTTCCAAAAC +CTTCAAATTGTGGAATGAACTTGTGATTCCAAAGATCTGATGTCAAGAAT +TCAGTCCATAGTTCATATACTCCAACAACACTTTTGGAATTTTAAATCTT +CATGGTTTTCAAAAAGAATCCACCAAATTATTTAGCAACGGGGTTTTCCC +TTCTAAACAATTGTGTGACACAAATCATAATCTCCGTCACTTTGTACAAT +TTTTCTAGTTTTGGTGATTTCCCCTCATGAGCTCAACGCGGCGGAGTAGA +TCTTCCATGCAGGCGTTAAAACGCCTGCCTGCCTGACTTTAAGGCGGCCT +CCGCCTGCCTAACGCTTCAGTCCTAGTCTTGTGCTAAACCATACATGAAC +TATTTTTCTAATTCTTAATTTGGTTCTCATAAAATTGATAAAAATTAGGA +AAATTTGAAATCTGAAATTTCAAGTAAAAATGGGCGGCAAACTTTGTATA +AAAAAAAAAAGAAAGCTCGGCCACCAATTTTTCAAAGTTTCTTACCGCTC +TCTGTCTTGCCTCGGGCAAGCCCTAAAACTTTAGAAAACATCCCCGAACA +AAAAAGTAATAAATCCTTCAAATGTGGGAGATGCCCCGTGTATATGTCAC +CTGTCGACTCGTCGAAAACGTTTGTGCCACCCGGCTCTTTACCTCGTCTC +CTCTTACTCATCATTTTACTGCCCCCGTTGCTTTCCGGCTAATGAGGTTC +TTAGGGCGTAGGACGCCATTGATTTGTGCTCTTTTCAGGCAAAATTTTAG +ATTTTATTTCTCTGATTTCACTCAATTATCAGACAATCTAAGTTTTTTGA +CATGTTCAATGCTGGAAGCAATTTTCGATTTTTTTATCGACCATGGCCTA +AAATTACGGGTCAATTTCCTGAAACGGACGAACATTTTTGAAAGCAAAAC +AGTTTGTAGAGTTTCGCTAAAATTTCTGATTTTTATCGAAAAAATCGGGC +ACACCAAACAAAAGTTCGTTTGTGTCAGGACCTTGACGGTACGCAATTTT +CGGACAAACCACGTGTGGCTGATTCAAAAATGTCAACTAAATTGACATCT +TGAAAACATTTTTTTCACCTGAAAACCACTAACAGTGTCAGGTTAATCCG +ATTCAAGCGCTATTTTTATGAACTTTCCTGAAAGTTTAATTCCACACGTT +GTGAGGGAATTTGACACATTTTTAGAAAACCTGATCTCACGGTTAATTTT +TAAACACTTCGAGGGCCCAGAAATCCTAGTTTTTTTTCTGGTTATTCATA +GTTTTTCTAGAATTTAATTTCACATCTACATTTCTAAACAAAACTTTTGA +AGGTCCTTGTAATTCTTCTCTAGTTTTTTATTGTCAAATTAAGTTTTCTC +CAAATTAGGAAGAAGCGATAATTGCGGGTTTTGTGAGGGAAGTTTGTCGT +CGGTCCATTTGTTTTGAAATGAGAATCGGAACGATTTCCTGTCTCCCGTA +GAAACCAATTAAACGAAAACTTTTTTTTTTCATTTCGTTCATTTTGCATG +CTTGCTAGCCTCGTCTATTGGAATTCTTTTGTACTATGATTCTATTCTAG +TTGTGTCTGCATTAAAAAAATCGATTTTTTTGTTGAGTATATTTATTTTT +CAAACAAGCCGAAAAAATAGATCAAAACTGAAAATTTTATAAGAAAAATT +TCAGTTTAAATCTAGTTTTTTTTTCAATTGGCCAACTTATAATTTATATA +TACTCTCAAAATCAAAAAAAAATGCACATTCTATATTACCCACAAACATC +GAAAATCCGCTATTCCCTCAATTATAAAAAATGTGAGAAAATCCTCGATT +CTTGGTTGCTATGAGACAGATTTCAGATTTTTCACCGGATAAACAAGACG +TTATCATTACTTTTCAGTAATGCGCGTTTATTTTTCTATTCCATTTTTCG +AATTTTGAGATGTTGTATTATTTAGAGAGCACGAAATTCTAAGAATGCGT +ATTGCGCAACATATTTGAGGCACAAAATATCTCACAGCGAAAACTACAGT +AATCCTTTAAATGGCTACTGTAGCGCTTGTGTCGATTTATGTAAATCATA +TATTCGTCGATATAATATTGAGCGAAAATTAAGAAAATAATACGAAAAGC +GAAAAATTAATATCGAAAATCGAGATTCCGTAAATCGACACAAGCGCTAC +AGTAGTCATGTAAAGAATTACTGTAGTTTTTGCTACGAGATATTTTGCGC +GTCTAATATGTTGTGCAATACGCATTCTTAGAGTTTTGTGCCTCCCGTAA +TATTATGTACAATGCGTATTTCCTCTGCATGCAAGACTAATTTCCGATTG +GTCTGTAGAGGTGCAGTTGGCTGGCTGACTTTTAATACTTATGTTCCTTT +AAATTTTCAAGTACGATTAGGAACCCTTTCTCTCAGAATTTCACTAGTGT +ACCTTAATGTTTGTCTTTTGCTACCTGAGACGATCATCAAGTCTTTTCTT +TTGTTTGTATTAGATTTCGCAGTAAAATTGTGAAAAAAATTGCAAAAAAT +TGAGAGCAAGAGCAGAAATGTGCACTTCTCGTTGTACCACGATTTTTCGT +TTTCCCACTAAATACCTATACGTATAGGATTATCCAAAGATGATCCCGGG +GATCACCAGTTCTTAGTTTCCTTATCAATTTTATATTACTCGTATGCCTT +CCTCGTTTTTTTTTCTAAAGCCATGAAAATCATAGGTATCGCCTCAGCGG +GTCGTAGAAGGAGCACAATGTACTCGAGCTCGTCACGAAGACCGTTTCGA +CGGCTGGTTCCGCTTAAAATGGTTAGAGAACTGGGTCTTGATGCGATCCT +AACAAATTTTTTGAATCTCTAAATAGTCCCTATAGATCATGTAATAATAT +AGGTTGAACGTTTTACAGAATAAAGTTTTACAATAAACTTTATAGCATGG +GTGTGTGGCAAATTTGCCGGACTTGCCGTTTTCCGAGATCGGCAAATTTG +CCGAGTTTGCTGCACACATGAAAAATTTTTAATTAAGATTTTGTGCAAAA +ACTAATGTAGCCCCTGGTTTACACCTTTTTACGATATTCTGAGTACATTC +GAAAAGGATGTTTTAAATAGATTGAATATCAAATCAAAAAAGAAAGTTGC +ATGAGTATTATATCTAAAAACTTTTAAAGAACACATTTTCAGGCACAAGG +CCGAACGGCAACCGACTACGCGGGTCAGGACTACGATGAATGTTATGATA +GGTTAGTTTCTTTTTCTATTATTTCATTCAGAATATTACGAATTAATGCT +TAAAAAATTTTCAAATTAATTTTCAAATCAACCGCCTTAGCTCCACCCAG +CTCTCTAGATCGCAACCCGCAATAAAGCCCATAAATCAGTGGGCAGCTGC +TGCTGCTCCGTAGTGGATCAGTCTGTCGTCTCTCCGATCAGTGCGCTCTC +GATGATACTCTCACATTTGACCTTTCTCGCTTTTTGTAAACTATGTGCTG +GACACGTTGAAAGTAGTATCAATTGCACTTTTCTGGGTAGTTACCGATAA +AATTGCCCAGATGGAAGAGTTTTCCATAACAATTTCTCTTCCATTTTCAT +CTATCAGCTCTGTGGCCCCGCTCATTAGTGTTTCTCTAGCCGATGCTGTT +CCAGGTTCCCATGGCAATGTGTACATGTGTGCCTAAAGTGAGCCAAAATG +CTCTTTGAATTATTCATTCATCCTATGTTTTTTATTCGTCCTCACGTGAC +CTGCACCACACTGCGCATTAAATATTAATTGGCACTGAAGAGAGCCGCCG +CTCGGGCAAATGTTTAAATTTTAAATTTAATTGATTTATACATATTTGAA +AAATGAGACACTATGTTATTTTCGCCGAATTCGTTCTCTGAGCATTGCGT +TCATATCTTTGCACCTATATGAACAATTAAAATAATTTGTGTGTCCGGTA +ACGAAAAACCCGAAATAAGTCTTGCCTATACAGGAGTTTGTGGAAAGTTT +TCAAAATTTTTAATGCAAACAACGAAGGAAATTGGAGACGTAATACGATC +ACTAGCAATCTGAATTTCTACTTTCAAAACGTTTTTTTCATAAGAAACTC +CATTATCCAAAATCCGGCCACCTTGCTAATCCTGAGTCCATAATAGGCAT +TGCTAAGGCAAATCTAGGGCTTTTGGCCGATAAAAGACTTTTTGATAATA +ATTTCCAAGCTTAAACCTTATTCATATTACTTTTAGTAATCCTATAAGTT +TTGTTTGCAGTGGTTCAGAATTGCCGAACATTCCAAGTTTTTTGATAAAG +GTTTTACATATATGTTGAGCTTAAAAAATAAACTCAAAAAAATTAACAAT +TAGTTCATACGTTATTAATTTCAACGAGTTTTAGTGTAAATATATATGTG +GTCCTCTATATATTTTCCGTATTATTATTATTATTGATTCGCTCCAAAGC +TTTAATTTCAGTCTCAAGACTATACTGCTCAACTGTGTTTATAGTAGTAT +TTTCAAACGAACAAAAAGTGACAATACAGTATACTTTAAACTAAAAAAAA +CTTTGTCGATGAAAGTCTATCGATCAAACTAGCGAAATCCGCCCGTTTAA +TCCGTTTTCCCGCCACCATCACTTATTGTGTTTTCATAGAGCTTTATTTG +ACGGAATTTCCTTTCTTCTGATTTTTCATATTTAACCAAAACTTACTTGA +AAGAATCTTAAAAGGGATTTTTACAACTCTCTGGATCAGATTTTTCGAAT +TGTTTCATTCGCAAAAGAAGAGCGACTCACTGCTATAAATGCAACATTCA +CCACACTATCTCCACAATTTCATCTATTAATTCATACAATGACACTACAA +TAATCCCCACTTGCTCAGAATTTGTCTCTCCCGCGGGGGACAAATTGGAA +TCAATCATTCTGTCGGTCGACCATTGAACCACCACCACACAATATGGTTC +AGCGGGGGGAGGTCCAGTTTTATTGTTATTACGTATACTGCTAGGGGAAC +CCAATTTTCGGACCCCGCGCGGAATATTCAAATTGGAAAGAGTGTGTGTG +CAAAAACTTTGTCCATCTTGTGTCCAGCAGACCAGCCAGCAGCTTCTCAA +TTTCCGCTTCAACTTTCTCCACTTTCCCATGGGACAAACCTTTTGTCTTT +CTTCTGGCTTCTCCTCTTACTTCTTATTCTTCTTCGTTGCCCAAATTGTG +CTGGAAGAACTAGAGATGCTGAAAAGAAATAGGAAAAGAGCACGAAGACA +ATATTTATGTTTTGTTTTCCATGGTTCGCCTCGAGTATTTGATTAGGCTT +CTCCTTGTTGTCTTCACTCGACGTCGTGTTCTCTATTTTATGTCCACTCT +TGAATTCCTGGAGTTTTCAGTGCACGCATTTGTTGAGCTGTAAGAAAGCT +GCAGGAGTTTCCCAGAATTTGATCGATTTACGGAGCGGAGTGTCGTACTA +CTTATTTCCGGATTATAAAATTCTGAGAATGCGTATTGCACACCATATTT +GACGCGCAAATTATCTCATAGCGAAAACTACAGTAATTTTTACGTTTCGA +GAATCGAGCCCCCCTAAATCTACACAAGAGCTACAGTAGTAATTTAAAGA +ATTACATTTCCGAACCTTTCTCATTTTTCAACAATTTTTTAACTTGCCTA +GCGTAGCCGAAAATTAGCGGTAATCGCTTCAAGACCCAAAACAAGCCAAT +TTCATCAAATTTCAAGTAGAATTTTGTGAAAACAACCGTCACTACCATTC +TGATGACCCTTTACTTCGACCGCCCAGTTCGTTAAACATTTTTGAATTTT +GTTCACCTGTCTGCCACCACATATCTTTCTTCATCAAAATTCAACGTGCT +GCGATCATCATCTTCATCTCTGTCTCTTCATCTCGCCCAAATTCATTTCC +GTCACTGCGTCTCGTACTGATTTTTTTTCATTTGTCTCCAATCATTTAGA +GGCCAAAAGACCTCCTCTCAATTTTTTTCTGTCTGTCATTGACCCCTCAG +TTGCGAACCGATTCAGAGTTTACATAAACAAGCTCGCTTTGTATATAATG +TTGAGTGAGCGAGAGAGTGAAAGAGAGAAGACGCAGGAAGCTAAGCGTTT +CCTTTGTTTCAAGCTCTGTCAAATTGTCAATTTTCTAGCTAGAAGATTCT +TATGAAGTTTTGAACATTCTAGATTTAGTTTGTTACCTACAACAGGTGAA +TTTGTCTTTCTTGTCCAGAGATAAAACTTATCGTGGCCTTTTCCGAAATA +GTATCAGATTTCCTTTGTACAAGGGCTTTCGTGAGGCGGTAAATCGCCTG +CCTGACTTTAAGGTGACCTCCGCCTGCCTCTCGCCTCAATACGTTATGCT +AAAACAAACGTGAACTTTTAAAAAAATTTAATTTTCAACAAATTGATAAA +AATTTAAAAAATTTGAAATTTACCTGCCTACCATAGAAGCCCTACCTTGT +TCCACTATTGATGCGATTCATTCTATTACAAATCAATGTAACGAACTCCG +AAGTTTGATAGCCCGAGGCGTTTTCCTTGTTAATTTGATATCGATTATCA +GTTTGACCAACCAAACACCATCGATTTGGCTTGATTTTCAAGGAATTTAT +AGAATTTCGTAGAGTTTTGGACTATGTTTTGCATTTACGTTGAATGGAGA +GAATTATATTTGAATAATTGAATAACTTGTTAATGGTTCAAGGATTACCC +GTTCCGAAGTCTAGAAATGCCCTGGCGATGCTCCCGTCGAGAACTGTAAC +CGATAATCAATCAATATTGCGGGTATTAATTGGCTAATATTCATAATCCT +GAAAAGTTATTTGAATAACGGCAAACTCCCAAGAAACATACCATTTCCGA +GGTGTATGAGAATTTTAACAAGACCTTAGGGTACATAAGCCTATCAGTTT +GAAAAACAATGTCGGAATTTGTGCTTCCATGGTAGACAGGCGCGGTTTCA +GGGTCTGAAGCTTGCCTGAAGGCCAGGCAGACAGGCGTTTTAATTCCTAC +ATGGAAGCCCTAGAATACAGCATATACGGTACCTAAGCCAATTATCAAAT +TTTACTTTTAAAAATTCATTCCACAGGTGTTCCTGTTCTGCTGCATCATC +GTCACACAGACACTTACACGCGCCTTTTCACTTCTCATTTCCCACTTTTT +CTTCGTTTTTCTCGCTTTTTCTTCTTCTCGACACGAAGAAGTAGGATAGA +TCTAGAGACGTCGAAAGCCATAATTGTTTCCGTCGGCGTCTCCACGCGCA +CTCGTACTGTCTACAAGTGCCACACACACACTTGAAGAAAATAGAGAGAG +GTGCAGGAGGAAAAGGAGGGGACGACTTCTGAAAGTGTCGATAACTACCT +CCTATTTGAGTAAAGAGTGCAAGAAATATATGTGCGTGTGTGTCTAATGA +GAGGGTCTTGGAGCGATTTTTAAGCATGTACAGGCATGAATCAGGCGGCA +GGTTCCAGCCAGGCTCTGAAACCGCGCCTGCATACCAGGGAAACCCAACG +ATGGTTTTTCTTTGCCCGGTTGATCCCTTTCTTGGATAGTTTTAGTCTTC +GTCCGTTTTTCATTCATACTCTTTTTGTAACTGTTCAAAAGTCTAGTTTC +ATATAATCAGAAATTCGAGAAAAAGGATTTTTCGAGGCTCTGAAAAAAGA +TCCCAGAAGATTGTGTCTAATTCTAATTCTCTACAGAAAATGCCCAAATC +GAAAATTTTTCGACAGATCCTCGTATTTCGAAATTCTGGAACATTCGGCA +AAGCTGAAAATGTCACAACTTGCCAAAATGAACATTTTCGGCAAATCGGC +CAATAGCTGTATTTTTCAAACTTTCGGTAAACCGGCAGTTCGCCAAAAAA +AATTCGGCCATGGTCTAGACTCTAGAGTTGGGCGGCAGACAATTATTTCG +GGCAAACGCAAAAATTAAAAATGGAAATTGGTGTTCGGCAAATCGACAAT +TCGCCGGAATTGAGAGTTTCCTGCAAATTCGGCAAACCGGCAATTTGCCG +AAGGGCAACTTCTCCGAACTGACACCCCTTCCAAAAATCCGAAATTTTTT +TAGCTTCACCCCCTTCCCTCCCTTCTCGTTTTCTAAAAACCGGACACACC +TCTTACAATTATTACAAACACCACTACGTACGTATTAAACGTCTCTCGTA +ATAAGTGGGGCGCCAAAATATAATGGTTTTTGTTGTGTTTAGTTAGCCTC +CTCCTCTCTACTTCTCCACCATCATCAAATCTACCACCACAATAACATTA +ATTCGATCATCAACTGACTACTTTTGTCTTTCAATGTGGATTTGTAGTTG +TTGGTTATTTGAATTCTTAACTATATATCCATCCCTATGTTGAAAATTTG +AAATTTTTGGTAAATTTGAAAGATACCAAGTTTATGTATATTATAATATA +TTATAAAAATTTCCGGTAAATTGATATCCTGCAAACGGCAAACCGACAAT +TTGCCGGAATTGAAAATTTCCGGCAAACCGCAATTTGCCGATAATAAATT +TGCCTTTATTGATTCTAGGTTTCGAAAAATTACGTGTTCCGTTTTTTTTT +TGAATATTAAAATAGGAATATCAGAATTTTTTATTTTTTATTCTGCAACT +TTTCAAATTATGTTCAAATTTATTATAAAACTATCGTAAGTGTGAATTTG +CAATTAATTTTCTTATTGCTAGGTATCAGTTTTACTAGGGCTGTCATGTA +TGTACCAAAACGCCTACCGCGCCTTCTGTCTAGTGCGGTGCGGAACCCGA +AAAGTGTCGGAATCATCTTTCGCAGTATGTGGTGCTGAAACCAAGCCTTT +GAGTTCTCGCCATAGTGCGGCGAGAGGCCGGCATGAGGCGGGCAGGTAGG +AGGCAGGCTTCAGGAACTGTGTCGCGTCTGCCTCCCAATCCCAGAATACT +TTTTTTTTCAAAAAGTTGCAATGGAAATCAGATTTTCAAACGCAACCTGT +CTTCCTCTTGTACTCAACTTTTACTTGATGATAGTCGATAAAGGTGTGAA +CGCGGTGAGGTGTAACCGTGTGACAATACATCATTGTATTATTCTTTTTC +GCAAAAAAAAAACATTCTTTTGGGTATTATATTCTACCAATGTATCTTTC +ATTTTCTCGATTGATTTTTTCTCCGGCGAAAAGAAAACAAATTGAAAAAA +AACGTCAAAACATAAACATTTGGAAAATATAAAGGTGGATATAAAAACGG +TTTCTTTCTTCTGGAACATTCATTTTTAACCATTTTCCTACATATTTTCA +CGCTAAGCCGGAGCAAAACAGAGACAGTCGAGACTGTCAAAACATGTAAA +ATATACGTTTTTTGGTATACCTATTTATTAACAGGTGCTCCCTTTTCTCT +AAAAGTTTAAATTTGTCGCGAAAAAATGCAAGAAATGTTTTTTTTCTAAA +AACTAAAAACTCGATCAAAAATAATAATTGGGTATTTAAATATTCCGGAA +TCTCCATTTTCTTCCCGTTCCTGCCCTTCCATTTTTTTCTTCCTGCACTT +ACTCGAGTATTCATCCTCTTCTACTGCTGCCATAAAAGGTCTTCCGCTTC +ACTGAAGCCTTCTGCACAGCAGCGTATCCTCCCGCTTCATATCTCGTTGC +TGAACTCCATATAGCTTGTTATAGCTCAGTTTTAGACGGATTTAGGTTCG +AATTTCTGTAGGGTCTCTAGGAGTACTGTAATAAGTGATGTGTCAAAAAT +TTGGCCTAAACTGTCATTGCTATAGATTACGGTGATAGAAATTTGAATAT +GTATGAAATAGTGAAAAATCAGTAGACTGAGCCTTACTCAAATTCACATC +CGTTCGAGATATTTCTGAGTTCTGAATTTCTGTTGATTGCAATTGCCGTA +ACTTATAGCAAGGAAAGAACAATTTGAAGCCGAATTTACGGCACAAAATT +AGTTTTGGTGCTTCTAGAGCCTATTTTATAAAATATATACTTGACACGTG +GCTTTTCAATATCACCATCTCAAGAAATTACCACAATCGTTTCAGGTGCG +AACAAGAGTGTGGCGAGGAGGCGTGGCGAATCGCGTGCTGCTCACGAGTC +GGGAAGATCGCCATCCTTGTCATGTCAATCATTGTATTATATCTTATCAT +ATCAGGTTGGTTTAAATACATATTATTCAAATTTATCTGAGTTTACACAA +TGAATTTTCAGCATTAACGCCCTCCGAAAGCACACACAAGACGGACCTCA +ACGAGCCGCGGCCTGGTATTAATTTAACAAGGTGAGAATGTTGGGAACAT +ATTACTATTGTAGGAAATTGCTCGATAATTCCACACTTATTGAGTTCACA +ATTAATTTCTACTATATTAACTGTGTTTTTGGTAAAGAAGTATCACTTTT +TCCCCTCCCAACTCCGATAATTCGGCATTCCTAGAGTTTTAACACAATGT +TTTACGACATACTTATGAAGAGTTGATGCAAATCGCAAAAAAAAAACAGA +AAAATAACTATTTTATGAATAGTATCATTATCGTATGGTGTCGTCAAAAT +GTTTTGTTGTTTTAGTTTTTTTTTACTCGAAATTTTCTGAAAACACCGAA +TTTCATATTGAATCAAATTTCTTGATAATTTATTAGACATTTCGATTTTA +CAGAGCCAATCAAGTACTCACCGGATTATGTGATGCCTATGAGCGAGGAG +ACGTTTCCGGAGACTCTTGTAATCGACTTTGCTATGATCGAAACTGGCTT +GTCACTGACTTTTACGAAGGGCACAAGACTGTGGTCATAGTGAAAGATGG +CGGACAGATTGCTGTCTATAAAAGCACAAAACCGTTTATGGATCAATTCG +ACGAGCCGAAAGACCATCTGACGGATGCAGAGTTCTCGGATCGAGTAGTC +GATATGGTGAATGATGAGCTGAGACTCGGATGGCCGAAACACTATCGACG +GCATTTAATGGAGACTGTGTGGCCGACACTGTTACGGACCAAGGGAGAAG +CCATGTCGAAAGCGGATCGACGATCCTTGTGGGCTTTGTTAAAGCAGCCT +GAATTCATCCTGTTCCGAGTGTTACCGTTGACACGTGTCACGCCGAAGCT +CATTGGGACGTGTGGACATATGTATCAGACTGAGTCACTGGTGGCGTTCA +AGATGAAAGGGTATTATACGAATTTGAAGGTGGGGATTTTTTTTTAACTT +GATTTTAAAAAAAAAATTTTCTCAATCCAAATTTACAGGCAAAAATCTTG +GTGCATGTCATGGGAACCCTGAAGCTCTTATACGAATTTCTCGACGAACC +CCTTCAATGGTGTGATGTTCGATTTGATAATCTCGGGTTATCAGCAGATT +ATCCGAAAAGGTAGGTCTGCCTGTTTTTTTATTTGATTTTTTTTTCCGAA +AAAATCTACATTTTCAATTATTCGAAACGTAATTCGAATAAAAGTTATTA +ATGATTTTTTAAACAAAAAAATTGATGATAATCCAACCGGAAACTCAAAT +TTTGAAAACCCCTAAAAATCCAAATTTCTCAATTTTTCTGCAGATTCGCT +CTAATGGACGGTGACATGGTGTACACGAAATCGAAATTGGATTCTCTCCT +AAAAGGACGTCCGTGTGAAAGTGATAATGATTGTAAAATTGGAGACTGTG +AGGCGAGATGCACTTCTAATATGGTGTGCTCGTCGAGGAGTAACGGGAAT +TTGGAGGTAAGCTTGAAAGTTGAAAAAGTTTGGAAAAATTGTGAAAAATC +AATAAAATCGGCAAAAAACGAGCTTTTTTCCAGATGGCCGAATTTTTCAT +AGTTTGCAAAAGTTCGTCCACCTGTGAAAAATTGCTAAAAAAGCAACAGC +ATTGATGAATATTCATAAAAAACCAATAAATTCGACAAAAACCTTTTTGA +TTTTTCCAAAAATCGAAAATCACATTCTGATGTTCAAATTGAATTTTCAA +AAAAAATATAGAAATTTCCTAAAAGTTTTAATTCTCAAAAAACGAACGAC +TGACAAAAAATTTTTAAAAAAGTGAATTTTCCGAATTTTTTCTAAAGAAA +AACTGAAAATTTGAGAATTTTCAGTCAGAAATGTGATTTTTTTTTTGATT +GAAAGGAGTTTATTTTGTAGAATTTAACTTCCACTATCAGAAAAAAATTT +ACCTATTTACTTTGTAATAGGAAAAAAGTTGTGAAATTGTTAGGAAAATT +TTTTTCAACTTTACAATTTCCCGGACAGGCCTGTAGTTCCTTTTTTTCAA +AACGTAAAAGAAAAAATTAATTAAAAAGTTCAAACTCCTAGAATTTATAC +ATTTATTTTGCATTCTAAAAAACAAAGTTTTTCTTGATTTTTTTGTGTAA +AAATTGAAAAAAAATCCAAATTTTGTAGGTCTTCTGCGATAAACTGGTCA +ACAAACTGTTCGCAAATCAATGGTCCAAAAATAATAAATATTTGGTCGCA +TGCCGGGATACAGGACGAAATATCACAACTCGACTCAACGAATTACGGTT +AACATGGTCATGGAATCTGCCGGATGTTTAAAGGAATCTAGCTCAATTCT +TTCTTTTTTTTTCTTCTACTCCATGTGTCAAAAAATCCCCGTGTCTTATA +ATTTTCTGGTGCTTTTTTCCATATTTTCCTTTTTTTGTGTTCTGGCTCAT +TTTTCCATGGCTCACAGAGCCCGAAAGCTTAGGATTTTGAAATTTTATTC +GTTTTTTTCTCATAATTCTGTCATCATTTTTTGCTCTGCATCATCATCAT +CATCGTCAACATCATCGCGTTTTATTATGGGTTCATGTATTATTACACTT +TCCTCGAAAAAAAATTTTGACGAACGTGTAATATGTTTCACCTTGTCTCA +TTTGTACCTCGTTCATTGTTACGGGTTTGAAGTTTTAAAACTCTGAATTT +ATATGAAGAATTTTAATTTAAAAAAAAACTTTGTTTCCTTTTTTGAATTT +TTACAGACAAATTTTTGGCTTGAAAACTGAAAAACCCTGACTAAACTTAC +TATTTATGTGTTGGGTATCAACGTGTATTGACCCACGACGTGGCTAAGAA +GATCGTACAACGTGAACTCTATGAACTAACTTATTTATTCCTTAGTTTAG +TAATGTAATAAACTACTCGTCTTCTCCTCCTCGTCTCGCCTTCGAATACA +CAACATTATGAAAAGAGTTTTTCTTAAGATAAAAAATGTTCTAAAAAATC +GATGGCCGAGTTTCCTATTTACGCGGCCACAAGACCAGTAAAAAAGTGTT +AACACAAGTGCGAAAACTAGAACATTTATTTTCAAATTTCACCCGATTTC +TACAAAAAATCAACAAAAAAGTCACAAAATTTAAAATATGAACATAATAA +TCAATATCCCGCTGGCGCTGACTCCTCCCCTTTTCGGAAATCACTGTTCG +CATAGATTTGGCCGTCGGAAAGTCGTTCGGCGGCGGTGACAACTGTCAAA +TTGTTCACTGCCTTCATTACGTGACCACGATCGATCAGGGATTTCACGTA +GGCTTTCGGGAAGTTTGGCTCGTACCACGTGTAATTTGGCTAAAAAATAA +GGAAATTGGGAAAGGTTTTGATTTTATTGATTGAAAAATATTTCATGTGA +AAGTCGAAAATTGATTGAAAATTAAATTCAGAAGTTTTTTTTCTGAAAAT +TATTGATTTTTATTTTTAAAAAATCAACTTTTTGACAATTATCGATTTTT +CCAACAAAAAAAAAATCAAATTTCGCGTGTAATTCTCTACCTGTAACTGA +TTGTGCATCCTTGGAGCATCAACAGCTTGTTTAACATCTGCTTTCAGCCA +TAACGCATGCAGTGCAACTCCAGCAACACCCGAAATAATGGTGGATCCAC +CGGCTCCGCCGACCGCCATAAGCTCCGCCCCTTTTGTGTTAAAGATCACT +AATGGACTTTGACTACTCATGGGGCGTTTTCCAGGGCGAATGAAATTGGC +GGGAGACGGTGGGAAACCGAAGAAATTCGGATGACCGGGAGTGGAAAAGT +CGTCCATTTCGTTATTCCAGAGAATTCCCGTCGATTCGGAGGATACTAGG +GCACCCAGGCTGGAAGAATTTGAGATGGTGGAAGAGTTTTTTTGGCGGTG +GCCTAGAAAATGTCTTCATATCCGAAAAAGTCAAAGTGATGGCCTAGAAA +TTTAAGTTTGTGGCCTATAAACTTGAACTGATGGCCTAAAAATTGAACGT +CGTCCTAAAAATGTTTTTAACTGATGGCCTAGAATTGAAAGGTGATGGCC +TCCAAATTTCAGGTGGGCGTATATAAACTTTGGTTGATGGTCTAGAAATT +ATACACGTGGCCCAGATTATGAAATCACTTCGGTATAAAATCCAGGTGGT +AAACAATTTACTATTAAGACTGCCTAAAATCATCCCCTGGCCTAATTAGT +TTAAAATTTTTGGTGACGACCTCGAACTTGTTCATTTTTATCAACTGCAG +TTGCTTGTCGACTGAGGACTAACTTCTCACTTGCTGGCCGAGTTTTCGAA +GTTCGGCCACCAAAAATTTCCTTTAAAAAAGCCAAACTCACTATAAGTTA +ATGGTAGACGTAACCGAAACTGCATTGCCGTCTGCATCGATAATCGATAC +ATGAGTTGTTCCATGATCCTGCGGTGGTGCTTCAAACGATCCTCCATAGT +AATCATCAGGATGCGTCGTGTCTGTGATCTTCGACCTAACCCAATCCGCC +CACTCTTTTGACGTAATATTTTTGGCGATTTCCGTGGCGTTTGCCACGAA +TTGCGGGTCGCCAAGCCAGCTTCTAGCCGCATAACTAAATTTGCTAGATT +CCAGAAAATGATGGTATAACGTTGCGATTTCGTTGAACGACTTCATGTTA +TATTCGAAGCCATCCATCACGTTAAGTATTGCCTGAGCCACCGCTGAGCC +CGAGGGAGGCGGCGGGCCGCAGACTCCGCGCCCGTTTTTCAGCTTTGTGT +AGATCACATTTTTTGAGTCGTGGACTATTGATTTGTAATTTTTAAAGTCC +TGAAGCCGGATGATCCCACCGTTGGCTTCGAACTCTTTGGCGAGTTGCTC +GGCGAGCCGACCTGTATAAAAGTCGGCGATGGGATCTGTGGAGTTTGCGA +GTAGCCGGAATGTTTCTAGAAGGTTTTGGCGGGTTTTTATCTGCTCTCCT +GGTTGGAAGACACGCTGAGTTTCGGGGTTGATGAAGTTTTTCATGGTGGG +CTCCGCGCGGATTTGGTGCTCGTTTTGGTTGAGAGCTTTTGCAAGGGCAT +GGGACGTTGGGTAGCCTGGAATTTTTTAGGATGGCCTAGAAACATGAACC +GATAACATAGTTACTTATTTTGTGACCTCGAAATTTTCATGATGGCTTAG +AAATTTTTATGATGGCCTAGAATTTTTATGGTGACCTAGAAATTTTTTGA +TGGTCTAGAAATTTTTAGTAATGTAATAAACTACTCGTCTTCTCCTCCTG +AAAAAATTTGGGCCCGTCTCCCTTCGTGGTGAGACCCATCGTGGCGAGAC +CCATCGTGGCGAGACCCATCGTGACGAGACCCATCGTGACGAGGCCTATC +GTGGTGAGACCCATCGTGGCGAGACCCTCCGTTGAGAGACCCATCGTGAT +GAGACCCATCGTGGTGAGACACGCCGTGGTGAGACTTATTGTGATACCCA +TCTTAACGTACACAGTAATTGGAGAAACTTTTAAACTGATTTTTACAAAA +AGTTCAACATTTAACTTACCAAGAGCTCAATATTCCGTTGCCTCGCTTTT +TGTTGCCCTCAGGGATTATTGTTCTGTTGCCTCCATTTCCGTTACCCCTG +TTGTGTTGCCTTCGAGCAGCGTTGAATCCGTTGCCCCCCCCCTTTTTCGT +TGTCCCCGGGGAATATTAAATCCGTTGCCCCCGTTATCCGTTACCCCTAG +GGATTGTTGGCCGGTCTAAAAACGGCCACCAGAAAAGTTTTTGAGCATCA +AAAATGTTCTACGCCGTTAGGAGTTTTTTAGGCCAACAGAAATATTCTAG +GCTCGCAGAAAAAGTGCCCACCTTCAGCCAACAAATCAATAGTTGGTTTC +AACAACTGTCTCCACTCGACTTTTCCACTTCCGAATCTCTCGAATTCAGT +CCGGAGGCCGTGAAGCTCTCCGGGCACCGCAATCGCCTTCCATCCGATTC +TCGACTGATTCCATTTGTCACGGTACATTTCCTCTGTTGCGGCGAGCGGA +GCAATCTCCCGAGCATCGATTACCGTACACTCTTTTGTCGTTCTGGAAGA +ATTTTAGAGGTTTCTAAGTCATTGAAAATATTTTTAGGCCACGGCAGAAG +TTTAAAGGCCACACGCGCTTCTGGCTTTCCTCATATTTTGAAATGGAAGA +GTTTGCCGAGTTAGCCCGTTTGGACTAGGCCACGACCGGGGTAAATTTAC +GGAGCGTCGTGTAGACGATTGTCAATGGAGCGCGAGAAAAACGCTTTGAC +GAAGGCCAGAAACCAGTGAGGCCACAATGTGATGTTTGCAAAAATGTTTT +TAATGGCCTAGGAAAATCATTCTTTCTTCCAAAATTTTTCTTCTTTGTTT +TCCTTTTCGATTATTGATTTTTAGTTGAAAAATCAATATTAATAATTATC +TTTTTTTTTAATTATTGATGTTCCGCTCCAATTCGTGGCGAGACCAACAA +CTCACGCATTATAAATAGTCATAAAATGTCCGCCGCCAATTCCGGCCGAA +TGTGTATCCATTACACCGATACAGAAAAGTGCGGCGATTGCCGAATCCAC +CGCATTTCCGCCTTTCAGCAAAATATTCCGACCGATTTCTGAGCAAATTT +CGTTGTCGGCGGCGACGGCGGCTTTCGAATATTTTCCGAGTGGAGAAAGT +GATGGTCCAGGCCAACGGGGTAGGCGGGCATTGTTCTGGAAACCTTGCCG +AGTTTTATTTTTATCGGCCACGTAGCAAATCGGCCAGCCATGAACCGACT +GCGGACTAGTTTTCACCCTCGTGGCCTAGCTTTCTAACAAACCTGCATTA +GTAAATGAAAATACAATGAAGCGAATGCGACCGTTGAGATTAACAACGTG +AATACGAGAACCGTTAAGCCGACCGCCGCGACTAAAGTACGGATTTTCAT +GGGTTGCTCGGCCTCGATAAGCGGCTCAACTTCATCAAATGGTCCATATT +CTGGCGGTAAATTATCATCGTCCGGAAACGTGTTATAGTATTGAACAGGA +GGTGGGTTACTGCTGAAAAGTCGAGTTTTGCGCGTCAAATTTGATGTATT +GAGTGGCATGATCACTATTATTACAAGACCGCGAAATTTTGAGAATGCGT +ATTGCACAACATTTTTGACGCGCAGAATATCTCGTAGCGAAAACTACAGT +AACTCTTTAAATCACTACGGTAAAGCTTGTGTCGATTTACGGGGTTTCGG +TTTTTTCATTTTTTTTGTGTTTTTTAAAAAACAATTTATCGACTAATGAA +TGATTTCCGTAAATCGACACAAACACTACAGTAGTCATTTAAAGAATTAT +TGTAGTTTTTGCTACGAGATATTTTGCGCGTCGAATATGTTGCGCAATAC +GCATTCTCAGAATTTTGTGTTGCCGTAATAATTCTGATTTAAAAATGGAA +TTTTCAAAAATCAGAGGTTTCCCAAAAATTTGAATATTCCACAAAAAAGA +TGAATCTGAAGAGTATTTTCAATTTTTTGAGAAAATAATCGATTTTCTGG +ATTAAAAAATTTGAATTTATAGATTTCTAGATTCTAAAATTTGAATTTGT +GCAATTTCAAACAAAAATGTATCGATTTTTATCGACTTTTCATAAAATTT +AATTTAATTTTCGAAATTCCGAAAATATTGCCCGTTTTTCATATACCAAA +AAAGTTGTATCAATTTTTTCAAAAAATTAAATTTTCATCGATAGTTGTTT +TCAAAGCATCATGATTTTTTTGAAAGAATTTGCAAATGTGAGAACTACAG +TACTCCCTTAAAGGCGCACGCTTGTTTTTATTTTTAAAAAATTGGTCGTT +TCGAGACCTGGTACCGTATTTTCGCATTTGGGTAATATTTCCAGCATTAT +AAATTTAAAAACAATGCACTTTTTAAATCTTGAATCCATATATATATTTT +TTTCAGAAACTCATGCCGCCTCGTTTGGGTCTCGATTCGAATTTTTTTCA +CAAACGTATAATATTTTCTCGAAAAACCCGCGAATTTTTAACATTATCGA +TTTTTCTCTCACCTCGTCGATTGCCCTTCAGGAGTCATTATTCAGAATTG +AGTATGAATTAAAAAATCATAGAAAAAGTCAAAGCGCTGTCAATTGGAAA +TTTAGGACGAGAATGTCAAAATTGCAACTTTTCACGATAACGGAATTCTA +TCGCAAAACTAGATAAGAGCTACAGTTTGCTGCGTGTAAGCGCGCTCCAA +TGACAAACGAGAAGAGAAGAGAAGAGGGGGTAACCGCCAATAGCTCTCTT +TATCGAGGATAGTCTTCAAGTGACAGAAGAGATATTCTGTGGTGGGAGAG +AGTTGGGTGGAAGGGGGAACAATGAGGACTACGGAAGAATGACTTTTAGA +TTTGGAGTATAGGAGAAAGTGGAGGTTTTGGCTGAAATTATTTTCAAAAA +CTTTAACTGTAACAACTAACGAGTAGATTTGAAAATGAGGATAATATTCA +ACACAACAGAAAAAAAGTCGATAGGTAACTAATAAAAATTAGAGAAAATG +TGGCATCGGGGAGAAGTGGTGGAACAAAAGAAGAAACTGATGATGATGAT +AGGAGGATAGCCGCGAAACAAGCGATTAACAGTGAACAAGATTCATTTCG +GTGAAGATGGAGAAGATAACAGCAAAAAAAAAAACACAACAAAAGTCCTT +ATTATTGCCCATTGAGAGTTTGTTGAAGGACACTGGTAAGGTGCTCGTCG +ATTCCATCTTGCTGCGCGAATTGTGGGAACGATGCGTAGATTTGCTTCAG +GATTGTGACAATTCTTTGCTTGGTTTGGTTGCCGCTATCGTCGTCGTTGA +AAGCATCGTTGGCGATTGAGAGCAAGCCTGAAAAATACAAATAAAAATAA +AATAAGTTACGGATTTTTTTAGATTAGAACTTACAAACAAGGAAGATTCT +CGGAAGATTCTGATTTTCTGGTCCAAAAAGTGCGGCGTCTTGTTTATCAA +ACAATTCGGCGAGACACGTGTAAATGTAGGGGCTTTCCTCGGTGTCTGAA +TATGTTGGCAACCAGCTCAGGAACATCTCGACGACCTGGAAATAATATGG +AACTTTTTTCTTGAAAAACTGAATTCTAAAACCAACCTTTCCGTAGGCTT +CAGCAGGAAGTGGCACATTGCCGATGATTTTGGCAAATGCAGAAATCGCG +TTTTCCGTGGCAACAGTGCTCTCCTCGGTGGCTCGGGCGTCCTCTCGTTG +AATCATGGCGGCCAGTGGCTCCAAACAGCTCAAAATCTCGTTTCGGTAGT +CGCTGATCTGGTGGTAGTTGATGGCCATAACTCCGAAACCGTAGCTCGCT +GCTTGTCTCACTTCAGGATACTCATCTCCCAACAACTTGTACATTATTGG +AATAAGCTTCGGGAAGCGAGTTGGCATATGTCCGACACCGAACTCGATGG +CGTCATCGAGAAGGCACATTCCCCATTGTCTCTCGAAGTATTGTTTGGAT +CCCTCGATGAGTTGAATGGCACAGTTGAACACGTTGATCATTCCCTCGAA +GATAGTTCCCTTGAACTCCTTCATAAGAGAGTGAGTCAAATCAGAGATGG +CTCCAAGGCATGAGGCCTCCAATTCCATGAAATAATCGAGCTCCTCCTTA +GCTTCAGCATCATCGTCATCTTCATCTTCAACTGGACGATCGCTCATTCT +CTTTCCGTAGTTTTCAAGTTGCTTGAGAAGCACCGAAATGATGAGTTGAA +CCTCTTCCTCGGCGATTCCCTCGGTTTTCATAACCTCAATGCACGATCCG +ATGGTCGTCATGAACGAGGCGAGAATCTCGACATCGTCCTCCTCCTCCAT +AGAGGTCGTCAATGCCTTCAAGAACTCGCACCACAAACGACGCTTGTCAG +CCAATCCTTGCTTCTCAACACAGGTCAACAAGCATGGCATGATTTCAGCG +GAAGCCGTACGAACTCCGTCGTGAAGTCCGAAATCGAGATTCTTGATGGC +GAGCTCGTAGACGTCGACGACGTATGGCATGAAAGCCTCCTTCATCTCCT +TGGCGAACGCGACAAGCATATCGCAAGCAGTTGCCTTCTCCTCAAGACCG +CTGGTGCGGATTCCGACAGTCTTTTCTCCTCCGATTCCGTGGTATTCAAC +TCCTTCCTCGTTTTCGTGAACATCTTCGTCTGAAATTTATATATTTTTTT +AACACGTAAAACTAAAAAACACTCACTGTTGAAGATATTGAAGTCTGGGC +GGTAACGAGCGGCACGAAGAACTGGATCCATGACAACTGGGAGGAATGGT +GCGAAATCGGCTCCAAGAATAGAGCAGAAACGAGTCCATGAGCTGATCAT +GTAGCTGTATTGTGGATCATCGATAGCCAAATCCTTCATTCCGTCTCCGA +GAAGGTTCAGGATCTCGATTGCAGTAGCGTGGAATTGTTCCTTTCCGACA +GCATATCCGATGAGAGAGATGCATTCGATGGTCTTTCCTCTCAACTCCTT +GAGCTCTCCGACGTTTTGAAGAATGTGAACGAGGTTCGGGATGAGGCGAG +CATGATGTTCCTTGAAGAGCTCCTCGGCGGCTTCAGCGACGGAGGCGATG +GCGGTGACGATGTTCTCAACGACAACCTGGTATCTCTTGTCTCCGAGACG +ATTGAACACAGCGGAAAGCACATTTTCGAGTTTCTGAAGAATGAATGGAA +GATATTGTCCAATGATGCTCTTTGGGCACTCCTCGGCAAAGTTGACAAGA +GCCGAAGCGGCATGAGCACAGACACGTGGAACATCGGTTCTGTCAAGAGA +CTCGAGAAGAGCTGGTATGACGGCGGCGTGGCACTTCTTCTGAAGAGTTG +GAGCGAAGTCAGAAGACATTTGTCCGATGGCGTTGCAAGCCGCGTATTGA +ACTCTTGGATGAGCATCATTGACGTAATTGGTGATATGAGCCATAATTTG +CTCTATGTGTGGTTCCATAGATCTTTGGCACCCTTCACCGACGGCGGAGA +ACGCGCGAAGAGCCGCGTGCTTCATCTTCCAGTCTTCGCTGGTCAAAAGC +TTCTCGACAAGTGGAAGGAACACTGGGAGCATCACTTTTCCGTTGATGCA +GCAAGCAACACGGTCGATGGCGCTTTCGGCGATGATTGGGATACTGAAAA +ATAATTTAATTGAAGAAAAACTAAAAATTTATCCTTACTCTTCATAGTCT +TCTTCCTCTTCGATCTCGTTGAGCCACTCATTGAGCACATCGTCATCCAT +TTCGGTCATGCATGAAAGAAGAGTCTCGAGAATTGGTCCAAGAGCTCCTG +GTGCGTATTTCTTGAGTCCTTTTGGTGCGGATTCCATGTACGAGCAAATA +ACTTCGATGGCGTTCTGACGGACCATCTCGTTCTTTTCCTTGTTTCCAGC +AATCTGAAATTTGAAAAAATGAAATTTTTAAATTGGGCCATTTAAATTAC +TTACAGCAAGAGTGACTTGAAGAACCTGAGACATATGAGTGTTAAGGCAT +TTTGGCAGAGAAGAGGCGAGCTCGGCGAACTCTCCGAGTGGTCCATCTGA +ATCATCCTCGTCGCTGGTTTCATTGCAAACTTGAAGAACATTTGGAACAA +GAGAGGTCATAAGCCTGACAACATCCTTCTCCTCATCATTATCAACAGCG +AAAGCGATAACAGCACGGACGGCGGTGGCCTTGATTTGCAAATCCGGTGT +AGCCATGCACTTTTCCAAGACAACTTTGAGGGTTGGAAGGAAGTGGGCCA +GCCTGTTTCCAAAAATTATTGGGCATCCGCGAAGAATAAGCAGGGCTATG +TAGTTTCCGGTAAGATCCTCGCTCTTCAAACAATGATCCATCAGCTCGAG +GACTCCTCCCCATGTCATATCTCCAGAATCGTCGATGAGGTTTGAAGCGA +TTTCAGAGATCAAATCAGCAATCTTCTTCTTAATCGAGAGTTCGGTCTCG +TGAACTATCATCTCGAGAACTTTGGCGAGAATGCGCTGTTTGTTCTCCTC +GTTCAGCGTTTCCCAAATCGCATCCCAGTCTCTGGCGAGAACTCGTCGCA +GAAACACCAAAACTGTGGATCTGGCCTGAAAATCGAAAATTCATGGAATA +TATAGGAATTTGAGTGAACTAACATCAGAATTATTCGTGTGGTGTGTGTA +GGCCTCGAAAAGTGCAGCTACTTTGGTTGGTCCTTCAATCTTTTCATAGA +CTTCTTCAGCTTGTTTTCGGATATCATTGTCTGCTGACTGCAGTTTTGTG +ATGAGCTCAGTGAATTGATTGACGTCCATTGTAGGGAACCCTGAAAATCA +GAATATGTGCTAAAAATTTCAAAATTGAGAAGGAAAAAGATGAAAGCGAG +TCGGTTTTGCAGATAAACGCCTGGGGAATACGCGGAATCGCTGAGGAAAA +TAGATTTTTAGTATAAAAATTGGCATTTTCAACAAAAAACCTATCGGAAA +ATGTGAAAAATAGGCCAACAAATATTGAATTTTGGGACAAAAACGCCTCG +AGTGCACGATTCTCAACGGAGCGCACTTGCTGCGTCACACGCACTTTAAA +CTGTACACGGCGGGAAGTTCAAATTTTAAAAGGATTCACGTGATTTTCAC +GACATTTACTGGTTTTTTCAGCGAGTTTTCGCAACTTTTCGGCGACAACC +GATGATGATTTATGTTTGGAATATGATGATGGTAGCGTGTTGCGAAATAA +AAAATCGATGGAATGAGTCAGCGAGAAACTAATATTTGCATAGAGAACTG +TTTGCAATTTTGGCATTACATTGGCCAGTGCACTGACAAAAAAAGAAAAA +AATAAACGGCAAAATCTCTAATACCTATTCCAGGCAACCAGTGTCTGGCG +ACGATGGAGAGTAATTCTGAAAGAGACGCAGTTGAAAAGAGTGTGTTTTT +CGCTTTTTTTTTCAGACAGAAGAGTAAAAAGAGTAAATACGTATCAGTAG +TAAAAACAATGTGCGAGCTGGGAAATCAGTAATAATTAACTACTTGTGAT +ACAAATTCCTCAAGGAATGAAGGAGAAAGTGTACTTTTTTTGAGACCCGA +AGAACTCGGGGGATGTCCAATTGGGGGGATTACCAACTCGGGGGATTGGC +CCCGCCCACAGAACCGTGGCTTGCAATACGCCCATTTCTGCAACTGCCGC +ACGGTTTTAAAACTGTATTTTTCTCAATAGAGCGAGAATTAACAAGAAAA +AATAATTTTAAAACCGTGCGGCAGTTGCAGAAATGGGCGTATTGCAAGCC +ACGGTTCTGTGGGCGGGGCCAAACTCCCGAGTTGGTAATCCCCCCAATTG +GACATCCCCCGAGTTCTTCGGGTCTCCTTTTTTTTAATTTTGAATTGAAA +AATTGTCCGAAAAGTGCAAACGTAGAACATCAAAACCAGTGGAGGGGCGA +AATTTGATAGATCGCATGTTGCAAGAATGAGCATTCTACGAGTTTCGCGC +CATTTCTGCGTAGCGCGCACAATATTGTGCAATAAATCTCGGTATTTGCG +TACATGCATCATATCTTACGCGCAAATCATATTGGAAATTTCCCCGAAAC +ACGGGGAGGCAAAGCTAACGTGGCTGAAGAAATTTCTACAGTAGTCCCAT +TTGGCTGACTGAATATTCAACGCGAATAAGTTTTGTACACTATTGCGTAC +TTTGCGTACGCGCATTTTATTTGACGACAATTCGTCAATATCAGCTCTGG +CTAAAAGCGCTTTTCTCATTATTTAAGCGAATAAAAGTCGAAAAAAAATG +TTTTGTGATAGAGAATAGTAATTTAAAAAATAAAAAAGTGCATATTTTAT +GTTTCTCATTATGTTTCCACTGATTTTCTGAGAAAAACCGAAAATTTCCT +CTTTTTTCGATGAATTTCAGCTGAATAGTTTGTTTTTATCTAGTTTTTCT +TCCGATTGACTGAATAACACATTTAATAACATCAATATAACGTTTAAAAC +ATTCATTGTCTCGAAAACCAATATAAAACTCGTCAGAGAGGGATACTTAG +TAGATATGCTGAACAAGGAAAAGAGAGAGAAGAACATTAACCGAATGGGG +AGGGGGGAATAAGAAATCTGAACAGGAACGAAATATAAAGAACATTATTA +GAAAGAACAAGCATGGCTTGATCTTCTTGGCAGTCGTCTCCTTATCCTAT +CACTTCTTCTCGGCGTCCTTCTGATCATGCTACCGAGCAACGTGGCACGA +TCGAAGAGTTTCACAACGGTTTGATACTTTCGCTCCACGAGGGGGCTCCC +AATTCAGCGAGGGTCCTCTCACGAAGTTTAATTTTTTAAAATTAAAAAAT +AAGTAACACCTCAAGAATTCCATCGAAGTTGTGCTCCTTTGCAATAAGAC +GAAATTTTTTTTTACGGAATTGAACAGGATCATTGAGGTACGGCATGAAA +ACGGCTTCCTCGTTGAACGATGTTGAAGCTCCTTCACCATTGCACCTGAA +AATCAACTTCTAAATTTTCCAATTTAAAGAAAATCCACATACTTATCCTC +GGAAACGAATCCAATATTCTGGTTGCTGAGTGGAATTGCTTCAAAGATTT +TGATCAGATTTTTTTTTGAAATTTTCGATCACACATCTTGTATCCACCAA +ATCGCCGAGTGCTTCATGAACTTCAGACTCGTGCTTTCCAACCTCTTCCA +TGAGCTTCACTTCTTCTGCTTGGAACTTTTGATTGCCTTCGGACCGATCT +GACTTGTAGTTTCTTCTTAAATAGTGGCTGATTGAGCAGTCGACCTTGCA +GTCCTACCTTCTTTGGCTTCTTCTGCGGCACTTGAGCCCTTGTAAGAGCA +CTTATTGCTGCTTAACCTTCCTTATTAATACTTCGAACAGCTTCTTCTTG +TTCGCATCTATTTCGAATTTTGTCTTGAAATCTTCCAATGCAGCGACAAC +GTAATATCTGGAAATTATAAAAACATTAAGAGAAAATATTTTGAAAAAAA +ATCGAAAATTGCACTGAATTCCTAAATTTTTTATTAAAATCGAAAAAAAA +AAATGAAATACGTGAGATTGAGTTTCGAGACTTTTTTATTCAGAATCAGC +ATATATTTCTCCATATTTGAGTAGGTTTTCAGAAATATTGTACCATAATT +TTTGGAAAAATGTAATTTTTAATTCGAAATTGCACTGAATTTCTCGAATT +TTTCACTAAAATCGAGAAAATAAATATGAAATACGCGAGATTGAGGTTCA +AGACTTTTTAATTCGGAATCAGCATATATTTTTCCATATTTGAGTAGATT +TTCAGAAATATTGTACCATAATTTTTCGAGATATTTTGAATAATAACTTA +CTTTTCGACGTTTTTTGCCTTTGTCCGGTTTAATCCATCGAATTTCGAAG +CGGTTTGCGTAGATTAGCTGAAAACATTATGCTTATTCCACGTAGTAACA +AGAAAAAACAAGAAAAAATAAGAAAAAACGAAGAAAAATAAGCATATAAG +TCAAATTAAAAATGTTTATTCGATCAAAATTCTTAACCATAGGAGGCGGT +GGCTAGCCGGCGCACTCTCGCGGCCACGTTAGCTTTGCCTCCCCGTGGAA +AGGAGCCATGATATTACGGAAACACTATTTGAATTCTGAGAACGCGAAAT +ATCTCGTAGCGAAAACTACAGTAACCCTTTAAATGACTACTGTAGCGCTG +GTGTCGATTTACGGGTTCGATTTTTAAAATTATTTTTTTTGTTTTTTGTG +TGCGTCATCGATTACTAAATTATAAATTATTTCCGTAAATCGACATACAA +TCGCTACAGTATTCATTTAAAGAATTACTGTAGTTTTCGCTACGAGATAT +TTTGCGCGTCACTCCTTCAGGCACCACATCTTACACTCAAAATCGAGGTG +AATTTCTGTGTATTTGACAGCAAAAATGTCGTGAAAAACAATTAAATAAG +GCAATTTGAAGAGGAAAAAACTGGGGCAGATGTGAGATAAGAAGAAGAAG +AAGCGGGGCTTCGACAACCGTAAACTAATCAGAGATGGAAAGTGAGAAAT +TTGAAATTTAAGACAAAAAAGTTAACAACGGGGGATTTTTTTTAGAGAGA +AAGAGCGAGTAATCGGGCAGCAGCAGAGAAAAAAAAAGATAATAAACAAT +TGAAAAAATATAAATATCGGGGAAATTATTGCTGAATAGACTGCAAATAG +GTGACAATCTCGGGCGGTACAGAACGAAGTGTCTCTGGACAATTTCCGGG +TCCACGGACAAGAACGGCCTGCGCAAAGTAGGCCTTGTGGTTGGCGATGT +TGGCGGCGACGGGCGGTTGCTTGGGAGCGTAACTCAGACGACAGAATGGG +TTCACAAATTCTCCCTCGGCGTTGTACATTGAAGCTTGCTCTTCTTCTGG +TGATAGGACGGCTCGATCGGAGGCTGAAGCAGCTTCCAGCAGCATTGCCA +CCTCGTATGACAGAATTCCGTATTGACCACTGCAAAAAAAATTTATATGA +GAAATTGAATTTAAAATGATTTTATTTGTAATTTTGTTTTTGTTAATTGT +CGTTAATTGTTTTTTTTTTGAGAAAAAACCGAAAAAAAAATTAAAATCTT +CATTTTTGATTTAATTTTTTTGTTGAAAAATCGGCAGTTTTCGATTTTGT +TTTTTTGAAAAAATAGGGAAAAGTCGGTAATTGTCGATTTTTCAAAAAAC +TGAAAATTCCAGATTTTGTCAGGAAAAAAAACGAAGAAATCGACAAAATT +TCAATTTTATTTTTTTAAATTTGTGTTTTTCAAATTTTTTTTTGATATGT +GATTTTTAACTTTTCTCTGGAAAAAAACCGAAAATTTGCAATTTTTCAAT +TTTTTCCAAAAAAATCTTTAAAAAATAGAAAATTTTCGTTTTTTTTTCTG +AAATATTTAAAAATTGTAACTTTCTATTTTTTTTAAACCCGTATTAAAGT +TTAAAAAAAAAATCGGAGAAAAAAAAACTGAATCGAAAATATGTGATTTT +CAATTTCCTTTGAAAAAAATATAAAAAATCGAAAATTTTCGTTTTTCGAT +ATTTTTTCTGGAAAAAAGCCTAAATTTTCATTACTAACACAAGCTGCTGC +GTGACATCGGCTAGCAAATTGCCCATTCCAATGGCAATAATCCGTTTCTC +AGGCCCCGTTGTCGTTTGTTTCAGACCTGGCAAATCAATGCATACGACCT +TCTCGATGAGCATTCCGAACATGCCAGTCTGAATATTCTCACACGATTGA +ACAAGATCCTGTGCACTCCGGGCGATCGTAAATCGACACAAGAACACGAT +GAATAGCTTCATAAACTTGGGTGTCTTCGACGATTGAACTCTTCGAAACA +TTGTATTGAGCACAAATGTCATCGCGGAACCCTCGAAGTGCTCAATCGAC +GGAAGAATCGTCGCCGCCAACTGGAAACCGTATTGATCCAAGGTTTTCGA +GCCCAAAAGCCGCGCCAAATGGCTCAAGATCAACCCAGAATTCTCCGAGA +CGACTCGCTGAGCATTCACGGAAAGGATCACTTCGAGAACCGAAAGTGCC +GCTGGAACATTCGCTGATCTCGCCCAAAGACGTTCGGATAGCAGAAACGG +AAGGAACGGTGAGAATTGATCGATCGATGAGTTTCTAGCGATACATGAGG +AGACGAGAACTCCCGTGATTTGGAGAGCATATGGGATGAGATCTTCCAAA +TCCTCGCGGAAGATCACTTCGATCAGTGGAAGGAGTTGAGCATCCAGTGA +GGCGCCGATTGTTCGGGTCTGGAACAGGAAATTCGTTAAAAAATATAAAA +TGTTCTTAAAATTGCTATCACTTTTTTTTTGAATAAGAAAAAAAATCACA +AAAAGGAAAACACGAAAAAGCAGACAAATGTTTCGATTTTGCAATTTTTA +ATTTTTTTTTTTCAGAAAAAAATTATCGATTTCTTCTTTAAAAAAAGAAG +ATTCTTTTCGATTTTTTCGGAAAAAAACATCGAAAAATTGAAAAACAAAA +AGTTTTGATTTTTTTTTCAGTGAAAATCGTTCAATTTTTCGGGGTTTGTT +TCGACAAAAATAATTAAAAACTAAAATTTTTCAGTTTTTAGGTTTCTTAT +CAGAAAAATTGAATTTTTTCGATTTTCCGAAAAATTAAAAAAAGGTATAT +TTTTTGAAAACACGAAAACGGAAAAGTGTTGGTTTTTTGTAAATTTTTGC +GTTTTCCAGCAAAATTGCCAATTTTTCCGAAAATTTTCAGATCGAAAGTT +AAATTATCGATGTCTATTCCAATTTCCGAGTTTCAGCGAATTTTTTTTTG +CAGAAAAATTTCAGAAAATTTGACAAGTTATGTTCCAATTTATGAAAAAA +AATTTGAGACCAAAGTGCGGCACGGAAAAATGGGCGGAGTTTAGAGTGGT +ATTTTTCTCTCAAACTACTCCTTACCTTGGTGATCAACACACAAATCGTC +TCAAACAAAAAGTGCGTGTGCACCGAATCCGCCGGATTTTTCGTCGCCGA +CTCGACCAACTGTGCGAGCTTGACCGCAATCGCGTCCGCATGCCGAATCG +TGTCGTCATCGAGAATCACAATGATCCTCAAAATCGCCTTGATCAGATAC +GGCGAGTTTTGCGCTTTGGCATCCTTGTCGAACGCGGTGACAAGGTTCTG +AAGGATCGAGGAGACTGGAAGATTTTGAGCCGAGAAGATTTTGTTGGAAT +CAGCGAGAAGGATCTTTTCAATAGCGTAAGCTGCGTACTTGTGCAGAATC +GGAGTATTCGAGCTGAGAAGAGCGTCCGCCGACTTGATCGCAGTCATAAG +GTGCTCTGGAGCCAGCTGTTTCCTGAAAGTGACCGCAAACTTCAGTGCAT +CAGCTTTAAGGATCGGAGTTTGATTGACATCGGCGTTGAGATGTGTGGCC +ACTTGTGTGATGAAGAAGTCGTTGATGTCCACCAGCGGGTTCGTCGCCGT +TACTCCACTCTTAGCAGTCTCCGTCTTCACCGCGATCGCCGTGATCAGCG +AGTAGACAATGTCGATCTTGATCCAATCACCACTTCCCAGCAGGTTCTGC +ACGATCTCCCCCAGACATGGAAGCATTTGCGCCTCGAATCTTCGGCAGAG +ACCTCGCGCAAGATCGATGGCTCCACGGCGTCGAGTTCCTACATCGGTAC +CCTCGATGTCTCGCTTCATATAATCCAGTGGCTCATCCTCGAACAGTTCC +ATATCCTGTTGACGAAGCAGAAGGTTCTGAACGCACACGTTCTCGGCGAG +AGTCTTCAGAACGCCTTCACCAGTGAAGTGTCCCTCGTAGTACTGTCGCT +GGCTGACCATCGACAGAAATTCCAGCGCGGCGCACACCATCGTATCGTAT +CGAGTGTCGGGGCCGGTAGACTTGAGCAGATTCCAGACGGCAAGGATGAT +GTCGGGCACGAACTCGGAGATCTCCTCCTCGTAGCGTTGCGAGTAGAGCG +TGAAGATCTCACAGATCTCGTGCTTAAGCTCGTCCAGAGTTGTAGGTTCT +CCCGAGTTGGAGGTCTGCGTCGGCGCGTCGATTTGCACGAGATGCAAGAA +GTGTGGCATCCAGTCCTTGAGATGATCCTCGAAGTATTCCGGAATCTCCT +GTGAGCACAGAGAATGGTAGACTTTTGCAATGAGAAGTAAGACTCGGAGC +CATTGGGCGATCTCGTCGGCTCCGAGTTGATCCTTCCGCTGTCCCACCTC +CATCATGTTCCGTAGCAACAGAGTTAGAGGCTCTTGGGTCTGAAAAATCG +GTTCTTTTTATTTGTAACTGAAAAATCTGGGGAGAAAACAGTTTTTCGGT +CCAAATTCGAGACTGACATTCAGATTTCTTCAATTTTTTGGCTCAAAAAT +TTTGAAAATTTGTCCATTTTTATAGGAAAATTATCAATTTTCGCCAAAAA +TTCCTCATTTTTGCTTTAGTTAAAATTTTTGACTAAAAAATCATAAAATG +TTGAATTTTTCACGATTTTTCGGAAAACAAGGCCAGCTTTAGCTAGCTTA +AATGTCTAATTTTGGTCATTTTTTAGGGTCAAAGCGGACAAAAATTCAAC +TTCCGACGGCTGCGACGTAAAAAAGTGGCCAAAATTGGGGATTTTAGCTG +AAATTAGCCAAATTTTGAGCCAAAACGCAACTCACGCTCAGTAGACACTT +CTTCAGCTCTTTCCAAAGCTCTGCTGACTTGGATTCGAATCGGAATTTGC +GGAAAATCTGCTCCATCGACGCCAGCGAGGCCACCAGGTGATTGAGATCC +GCTCCGTTCAGGAATTTCGACAAATATGGAACCAAATCTGGCCATTTTTC +GGGAAAATCTCTCTGAGCGATGAGATACAGAGCATTGGACAGGATTTCCT +GAACATTTGATTTTGTATTGAACATCGCTTCGAGAAGCATGCTTCTGAAC +TGCTCCTCGTCTTCCTGGCCCATTTCTACCTCTGGTGCCGGGCCCTGAAA +CAATTTAAATATAAAATTCACGCATTTATTCAAATTTTCCAACCCAATTT +CTCTTCACAAAATTTTTCAAAGCGACAGCCGCGGCGATTCGGATCTGCGG +AGCAATTTGTTGCTGTTCGTTGACCACCAGTTGAAGAATTTGAATGATAT +AGCCTGGATTTGATTGCAGCGAGCGGAGCGCTTCTTCTCCACGCTTGCGA +ATCGCAGCATCCGGCTCCAGAGTCTGCTGGAGAGCTGCTCCGATTTGCTC +CATCTGAAAATAGGGAAAAATAGCTGTTTTGGGGGTGAAATAGGGAAGAA +TTAGGTGAAAGGACAAAGATAGTTTAGCTTAAAACTTGAAATCTAAAATT +TCCGGTCAAAATATTAAAGTTTTACGCAATTTTTCGCAGGCCCGCGGGGA +ATAAACTCGCAAAAATTCGCAAAAATAAAAAAAAAATTTCAAAAGTGTGA +AGTGTTTGCGTACAGCGCACCCGACCCGACGCGCAAATTTTTTTCATTAT +GTTCTCTGTTTTCACTGATTTTTACTGATTTTTCCTAGTTTTCCCTTGAT +TTTCTTCAAATTTTCCACTTTTTCAGCAAAACGTGCATAAAAAAATAAAT +TAAAACATTTCAATTTTCAAAACTTTGCAAAATAAAGGCGTCGTTGCGTA +CAATGCGCACGACTTGACGCGCAAACTTCCCAATTTTTATTCTTAAATTT +TCCTGGATACCCCTGGTTTTTTCTCTGTTTCCCCCCTAATTTTTCGCATT +TTCAGCAAACATGGGTGTTGATAAAAAGCAAAAGCAACGAAAAACCAATC +CATTCGAGCTAAAATTCAACAAATCCAAGCATGACGTGAGTTTTTCAGAG +TTTTGAAGAAAAAATCGATTTTCTCGCAGATTCTCGGCCGGAAAAAGGGC +GCACAAGTTGGAGCACCGACTGCGTCACGAAAACGAGCTCACGAGCAACG +TGAACAGACGCTAGGCGTAGAATATGACCGGAAAAATAAGATTAGCAAAA +TTGTCGACAAACGGCTCGGCGAAAAGGACGGAAAAAGCGAGGAGGAGAAG +GGCGCGATGAGATTCACAGAGGAAAGAGTCAAAAATTATAAAAGAGCATC +GAAATTCAACTTGACAGATGATGGAGATGAGGAGGAAGAAGGTTTATTCC +GAAATAAATTGAGAAAAATTTTAATAAAGTAAATTTCAGTACTTACTCAC +AAGGGAAAAGCGCTCTCGGACATTGAAAAATACGACAAATCGATGATTTC +CGACTCGGATGACGATGAAGAGCCGGGAAATTTGGGCTCAAATATGGTAA +AAGTGGCTCATTTCGGTGGTGGAGAGAAGACCGCCGAGGAGCACGTCCGC +GAAAAAATCAGCCGAGAGGATATGATTTCGAATTTGATTGCGAAAACGAA +GCTGGCGCGTCACGAGAAGCAACAGCAGAAGGATGAGCTCGAGCTGATGA +CAGAGTCGTTGGATTCCAAGTATCAGGCTTTGATGGGCAAAATGAAGGCT +TCATTTAGGCCGACCGGGCGCCAGCCGCTGGAAAAAGATGATTACGACAA +ATTGGTACGGGAAACATCCCGGTTTTCTCCAAATTTTTAAGATAAAAGAT +AATTATAAATTTAGAGTGAAAATCGATTTTTTTCACAATTTTTTAGTGGA +AAACCTAAAACCCAAAAAACTTGATCTGAAAATGCTAATTTTTAACCATT +TTTGCTATGGAAATTGGAAATTTTCATTAATTTTGCTGTAAAAGTTGAAG +AAATCCAAGTTTTAGCTTTGTAATCAGAGAAAACACTGTAAAATTATTTC +GGAATTTGTGCAATTTGAAGTGAAAAATAAAACATTTAACTTCTTATCGC +GAAAGAAATTGAGCTGAACTGAGAATTTTTTTGACGAAAAATCATTAAAA +AGTCAATTTTTTTGACAAAAATTCGAGAAAAGTCGGTTTTTCTTTTGACA +AAAATGACGAAAAAGGCAATTTTTAATTTAAAAAAATAAATTTTTCGCCG +AAAAATAACTTTAAAAAAGTCGTTTTTTTACAAAATAAAACAATTTAAAA +TAACTATTTTTCGACAAAAAAATTAAGAAAAAGCCAATTTTTTTGTTTAA +AAAATGATAGAAAAAGGCTTTTTTTTTGTTTCTTCGCCTAAAAAATCAAG +AAAAATAGGTTTTATTCCATTTTTTATCCAAAAATTATTATTTTTCCAGA +CAATCACCTTGAAAACCGAAGCCGACGCTCGTGCCACCCCAGCAGATCGT +AAGCTATCCGAAGAAGAAGAAGCTCTGAAAGAAAAAGAACGTCTGGAAAC +TCTCGAGGCCGCCCGTATCTCGAAAAATAATGCATTTTTCAACGCAAAAT +CTCATTTATCAGCCGATGCCGACGTTGATATCGATGCTGGATCGAAGGCC +GACGCCAGAAAAGTTCAGGCGAAAAATTCGAGATTTGAGGTCAAATTTGA +CGATGAAGGTGGCCTGATCGATGAGGATACGGTGGAAAAATCCAGGATTT +TAAAGAAAAATCTGGATGGTTCTGATGAATCTGACGATGACGAGGATCTA +GAAGATGAGGAAGAGGATCTGGATGATCTACTGGAAGATGAGGATGAGCT +GGAAGAAGATTCCGATGATGAGGAAGCTCAGGAAGCCCAAAAAGTCGTCA +AAAAAGCGAAAAAATCTGCTCCAGAACCCGCTGAAACTCTGCCATTCGTA +TTCGAAATGCCGAAAAACTATAAAAAATTCTGTGCTCTTCTGGAAAAACA +CTCGGAATCGATGGATTTAGTGCTGGAACGACTCGTGAAATGTCATCATC +CGAGCCTTAAAGAAGGAAATAAGAAGCGTCTGAATAAGCTTTTCCTCTTG +TGTTTAAGATGGTTTGATGATATGTCGAAGGAGGAATTGACAGCGGAAAG +TGTGAAGGAAATGAATTTAGCGCAGGAAACTATGCATGCGTTGATGAAGG +TAGCGATTGGATGGAAAAGCTGAAAAATTACCGCACTTTTTAATCTGAAA +ATTGAAAAAATTCGAGAATTTTTGACCTAAAATTTTGAAAAATTCCCGAT +TTTTATACCCCAAAAATTGCAAAAAAGTCCCGATTTTTTACCAAAAAATG +TTTTAAAATCCCCGAATTTTTTACCTAAAAATTGCAAAAAAGTTTCGATA +TTTAGACCCAAAAATTGCAAAAAAGTCCCGATATTTTTACCAAAAAATGT +TTTATAATCCCCGAATTTTTGACCTAAAAATTGCAAAAAAGTCTCGATTT +TTAGACCCAAAAATTGCAAAAAAGTCCCGAATTTTTTACCAAAAAATGTT +TTAAAATCCCCGAATTTTTGACCTAAAAATTGCAAAAAAGCCGGTTTTTT +TACAAAAAAATGTTTTAAAATCCCCGAATTTTTGACCGAAAAATTGCAAA +AAAGTCTCGATTTTTAGACCCAAAAATTGCAAAAAAGTCCCGAATTTTTT +ACCAAAAAATGTTTTAAAATCCCCGAATTTTTTGACCTAAAAATTGCAAT +AAATTCCAAAAACTTTGGCCAAAAAATTAAAAAAAATACCCGAATGTTTG +ACCCAAAAATTGCAAAAAAGCCCCGATTTTTTGAGAAAAATCATGTGAAA +TTAAAACCTTTTTTTTTTGAAAATCAGTCTCGAAAAAAAAAAGAAAAACC +AGGAAAATCATGAAAATTTAAGAAAAAAATCATTAAAAATAGGAAACGAT +CATGGAAATATTTTTAAAGAAACATTATATAAAAAATCATTAAAATTTCA +GAAAAAAAATATGGAAAATCCTGAAAATAAAGAAAATAATATTAAAAATT +CAGATTAAAAAACGCAAAAAATTCAACAATTGAAAAAAATTTCAAATTGT +TTTTTGCAGTTCGACATTCAATACGGAGTCCGATGTGTGCGTGCTCTAAT +CCGTCAACACTGGAAAGGCCGCCAGGATAAACAGAAGAGTAGCCCAGTGT +CATTTGGATTAATCTCTGCAATTCGTCTTGTTTCCGGCCTCTTCCCAGTT +GCCGATTCCTGGCATCCTGTAGTGGTTCCGGCTCTTTTTTTGGCAACTGA +AGCACTTTGCTCGGCGAAATGTGCCAATTTGAATGCGTTGGCTAAACAAA +TTCAATTGGCTAATGCTATTGTTGAATATGTGTCTGAATCCAAGAGGTAA +TACTTGGAAACGAACATTTAAATAGGCAAATTTTGAAAAAAATACTGAAA +GCTTTGATATAAAATAGGAAAAGTAACGAAAAAAAAAGCCAAAACCACCA +AAAATATTATTTTCAAAAGCTAACTCAAAAAATTTAAAATTCAGAATTGT +CGCTTAAAAAAATCTATTTTTTTCGATATCGAAAAAAGATTACTATAAAA +ATTCAAAAAAGCAATTTTTTAAAATATAACTTGATTTTTTTTTCTGAAGT +TCATTTTTTAACAAATAATTGCATTAATTTTCCATTTTAATGTTAAAAAA +AAGCGAATTTTTCAAAAATTGATATAAATTTTGTTTTGGCAATTTTTAGA +TCAGAAAACATTGAAAAAATACAAAAATCCCGTTTTTTTTTAAATTAATT +TTTTAGAAAAAATTGCAATTTTTCAAAAATCTTCCGGTCATTTCTCTATT +TTTAAAAAATCTCTCGTCTCTTTTAAAAAAATTTAAAAAACCGAATTATC +GAAGCGTATCAAAATTTTGAAATTAAAAAAAAAACCGAACTCCATTTTTA +CGAAAAATCCGAAAGTTGAAAATTGTTTTCAATAAAATAAATTTCAGATA +CGTCCCCGAGCTGGTCGCATTTGCTCGAAGTGCTCTTCTGCTTGCAGTTA +CAGAGAAAAGTGAGAAATTCGCGACAAATGGATTCCCGATTTCCAAGCCA +CACACGGAAATGTTATGCTTTGAGGAGAAGGTAAATTTATAAAATTTGTT +TTTTTGAGCAAAAAATTGGTTTTTTTTTTTGGTTTTCTCTTTAAAAAAAT +TTGCTTAAAAACGCTCAATATTTAAAAAAAAAACTGGCAAAATCGAATTT +TATAATATAAATTAAAAATTTTGTTCAAATAAAAATGATTTTTTTTTGAA +AATTTAGTTTTGAAAAATTTAAAATTAAATTTTAAATTTTCACAAAATTG +CCAATGATAAAATGTTCACTTTTTCTTGTATAAAACTGCTTGAAATTTTT +TAAAAGCAATTTTCCGAAAAATAATTTTTTAATAGATTTCTGAAAAACCG +AAAAACCACCGATTGAAAAACGAAAATCAAGTTTTTTTCCGAATTTTCTA +TTTTTAACAAATTAAATATCAATTTTGCACATACAAAACGGTTAAAAAAA +AACAAAAAAAATTGCCGAGAGCCAAAAAAATTTAAGTTTCAGCGATTTTG +AAATTTTTTTTTTAGGAAACACGGTCTCCTCGGGTGGAAACGAAGGGGGA +GGGAGATTGCTTGCGCGTTTCACCGATGCGCCTTTCGCGTGCTGGCGCAC +TTCTGAATATTGAATTATTTTTGCGGAAAAAATTCATTTCTTCTATGAAA +TTTTCTTGAGAAAGCAAAGGAAAGATGATGAAATGAATTTTTAAAATAGT +TTTCTCAGTTAAAAAGAGAATACCTTGCTGCAAAAGGTTAAATTTATTGA +AAAATTGAGAAAATAAGAAGTTTATCTTTGTTTTCACTACATAAAGTGGG +GAAATCTCAAAAAAATTTGAAGACAATTAAGAAATATAATTAAATATATA +GCCTGAAAGTCAAATAAGAAATGAAGGTTTGGTCACAAACAATTTTATTT +TTGAATTGAAGTTTTGAGAAATGATCATGTGCCAGAATCACTAAATCTGA +GACATTCAGCCCAATTTCTTTAATTTTCTGTGATTTTATCATGGAAGTGT +GGCTTTTTCTGTGATCTTGCCAGTTTTAACAAGTTGAAATTGGAAAATCG +TGAAGTGGGAAACTAGCAGTGAAGCTTCCAAAAATTTCAAGCCTTACTGA +AAGGAAAGTATTGGAAACTAAAAACGAAAGCTTAAGAAGATACCGTTTTT +ATATATTTGAGTTTTGAAAAGCCTTAATAGGTTTTAAATACAGTTTTCTC +AAGAATTCAAACTTGTAACGATTAAGTTGAGGCACAGAATAAGATGGTAA +TACTAAGTTATGTTGAAAAAAGCCAAGAAAGACTGAAAACATTGCTTTCA +ATCTGATTTTTTGTAAGTAATGTAATGTTGAAACATTATTCTTCATAGTT +CAGCACTTTGTATGGCTTATTTGAGCCCAAACGTACAGTAGTATGTGGCA +AGAAATAAAATATTGTGCCAACAAAATTGGAGCCGAAAAACCCATGATAA +AATGTTTAATACGTGTATTGCTCTAATAAAATACCAAATATTAATTAGAT +TCACTGTCTCGACTCTTTTTAATTCAGATTAATAACTTTTAGCTCCTCTT +TTTAACGAAAAATTATAACTGGATTTCACAAAAAGAGTTCGATCTGAGTT +CTTTCAACAGGTACATAGACGAATTATATCTTAAAATGACGAGAAAAGTT +TGCTCTATAAACGTTATCATTGCAGGTTATGAGAAAAACAAATAAAAATT +TAATTTTAAAAAATCCGAAAATTCAGTTGTTATTTTCAAACAGTTCGTTA +TGCATTTCCACCGAACGGTTTCCGATAATTTTTGTTTCTCCACTTTCGGT +TCAGTCTACTTTATAATTGTTATCATTTTGAAATAGAAAAATCGACGAAA +AACTAATAAAAATCGAGGGAAATCACTCTTCAACGGTGAAATCTCGGTGG +ACGCAATGAAGCTGCAGCATTTCAAAAAAATTTTAGAAGCAATTTTGGTC +ATTTCAGCTTCAAAAAGCTTTAAAACTTAAAAAAATTAAAATTCATTTCA +AAAAATTTGAAATTAGTCATTTAAAATATAAAAATTAGAAAAATTCTAAT +TTTCAGCGAATTTTCAAATTTATTTTTATTTCAGAAAAATTCAAATTTTG +GCTTTCAAAAAAGCTTACAAAATATTAATTTTTAAAAAATCTAACTTTCC +CAATTTTTCAGTACACTGGCCCCGCACTCCAGCCAATCTCCCTGACCACA +ATCTTCAACAATTCCCCATCCGATCCATCTCTAAAGCTTCACGTTCTTCG +TGCTCTGCTCTCCCTCATCCAACATCTCCGTGTAATCTACTCCAACCAAA +ATGAGACCTATTCTATCGTTTTCAAGCCATTCCTCCGGATTTTGGAATCA +ATTCAAGCCAAAAATCTTCCAGCCGAGGTCCAGGAAGAGCTGGAGACTCT +GTGCGCCTCAATGAAGGCTGAAATTGGAGCCAAGTGCCGTCTGGTGCACC +TGTCGCTGGTGAAAACCGAAAAGAGCATGCTGAAAATGCTGGAACCCCGT +TTCGAGTGGGATTTCGATCCGGAACGCCCACATCACGGACCCAAAGACGA +GAAGAAGAAGCTGACGAAGAATCTGAGGAACGAAAGACGCGGAGCCATCA +AGGAGCTCAGAAAGGATACGGCATTCTTGGCCAGGAAGCAATTGTCGAGT +GTCAAGACGAAGGATCGTGCGAGAATTGCGGCCACGAAACGTGTCATGGG +CGGTCTTATGCAGCAGCAGGGTGAATGGAATAAGGAGAAGCGTACGGCGG +ATGTTGAGAAGAAGAAGGACAAGAAATAATTTAAATTTTCCTATTTTTTG +TTTGTTGAAAATGACTTTATTTGGCAAAAAAAACCCCTAAATTTCAGTTT +TTCTCGATTTGTTGAAAAATAGTCAAAAAACTGATTTCTATGTTATTTTT +GGTCAAAATTTGCCTTTTTCAAACATTTTTTTTGCCATTTTTCCTCCATT +TTTTGTTATTTTTTTTTTTCGTTTTGTTATGAGATATGTTTCTGTTTTGT +TGACATTTTTTATTATTGAAAATTAAACGAGTTTTCTGAATTTTTTTGTT +CAATTTTTTTGAAATTTTCCGTTTCACATACGTTTGTCAATATCTCGATT +AATTTTTGAAGTTTTCCAATTGACTCGCGCATTTTCTAAAAAGTTTTTAA +ACTTTACATTGAAAATACCCCTCTAACTCGAATATCTGATTTTATTGGAA +TTTTGAGTTTTTTTAGGTTAAAAAATTCTTGTGCTAAAATCATCCATTAT +AGTTCGTAAGTCAGCAAATTTTGGCTCAAACTTAGAGCGATTTCCAATTT +ATGGAGTTTTTTGTTCAGAATTCTCGAAATTTTCTCATTTCTGGAAATTT +TGAGTCTACGAGTCACACAGTTGTTCCGTAATTAGCTTCACTTACATCTC +CTCAACTCTGCAAACTCTCAAACTTTCGGGAAAGGGTCTCGCCACGAAAT +CACGGGTGGGCGGCAATTGCAGTTCGGCAAATTGCCGGTTTGCCGGAAAT +TTTCAATCCCGGCAAAATTCCGTTTGCCGGAAGTTTTTAAACGGGATCTT +TTATAAGACGGAAACACTTAAAACTGCCATTTTTAATTTTTTGCCCGTTT +TCTCTAAATATTTTCATAGAATTTACTGACTTTTTAGGATAGATGTTTTC +ATGGGATGTGCACATGTTGTTCCGGCAAATCGGTAATTGCCGAAAATTTG +AAAAACGACAATTTGCCAAAAAAATCGTTTGCCGTTCACCCCTGTATTGT +ACCATTTTTGGCGAAAATGCGCGTAAATTAATATGCTTGCGTGTGTAATA +TTTCGTTCATATATTCTAAATATACGCACCTTTTGAAATATTCATAATAT +ATGCATTTACGTACGTTCGAGAATATTTTGGGAATACACATTTATCATCA +TTCCCACCCGTTGCCATAGTATCCTCATCCCCGCCCCGCCCCGCCCCTTT +CTCTCATTTCCTCTTCCAAATCCTTAATGGCTCATCCGGTCATTGGAGAG +ATATGGCAGATGTGGCGGTTTTGACGAATGTTCTGGAGAACTCGAATTTT +ATATAACTATTAGACAATTTCGATATTAAAAACATTTATATGTAAAATTT +TCAATTTTTTGAATTTGCTCGCCGAATTTTGACTTTCTGACAATTGTGTG +TCGATTTACGAGGGTTATGTATATTTACGATATGTTTTTAATCATTATCG +AATGCTGATTTCCGTTTTTCTACGAGTTGTCTTCATTTTCGTTGGTTTTT +TTTTGTTTTTTTTTTTTGAAAGTGTATTTTTTAAGGTCAAAAAACTAGAA +AAATATTCAGTTTTCAGTCAGGAAAACCATTTATTTGGTTTTTTCAATAT +TAAAAAAATTATTGGGAAAAATGAATGAAACTCGTCGAAAAACGAAAATC +ATCATTCGATAAAGATTAAATATTTCGTAAATCGACACACATGTCTCCGC +CGCGAAAAATCGAAATTTCATAGAGGCATAAAAATTCACAATATTTTAGT +TTTTTTATTTTTTATTTTAATCCAAATCCCTATTCATGCTTAGATTTTTA +GGTGTTTTTCTGTAAAAAATCAAAAAATTGTATATTACTCATAAATTTTT +CCAATTTTCTCAAACCTTGGATCTCGCCACGACATTATTAAAAAATTCCC +GTTTTCTGCAGAAATGACGACAACGGAAGAAGCTCCCAAATCGCCGCTTT +TCGAGGCAATCGACAAAAATGACACTGAAGCAGCGCTGGCACTGCTGAAA +ACGAAGGAACAAGCCGCTCAACGGGATCCCAGTGGAATGAGTGTGCTGGC +AGCTGCCGCGTATAGGTATGCACCTTTAAAGCGGCGACGGTTACTGTATC +CTCGCAGATTGTTTAAAGGTACATACCGTAATCCGTTGAATTTCAGAGGA +AATCTTACGTTGGTCGAGAAAGCGATTGAGCTGAAATGTGATGTGAATGA +TAAAACCGATGGAACTCTGTACACTCCACTCATGTTTGCCGCCTTATCAG +GTAGGGCAAAAATTTTTTAAAAAAATTTTTTGCGTCAAATTTGATGCAAA +TTCTGGTTTTTTACCGTTGAAAAGTAAAAAAAAATTCCGAAAAAATCGAT +TTTTATGCTAACAACAAACTTTTAAGTGAAAAATCGTGGAAAATTAGACC +CAAATCACGTATTTTTGATCTGAAATTCAATTTAGCGAAAAGTCATCGAA +ATTTTATGGTTTTCGCGGTGAGACCCAATATTCGCAATTTTTTTTTGCAC +CAAATACAACACATTTGACGCGCAAATTCAAATTTTTGAAACTTTTTTCC +GTTTTTACAATATTTTTAGGCTGAACCCCAATATTTGAAAAAAAAAAAAC +CAATACACTATATTTTACGCGCAAATGTTAAATTTTTATTTAAAATCCTC +GTAAAGTTCTATTTTCTTTTTTTGATCGTTTTCAAGCTCAAAAATTCAAA +TTTCAATTCGAAATATTACCGGAACACAAAATTCTGAAAATGCGTACTGG +TCAACATATTTGACGCGCAAAATATCTCGTAGCGAAAACTACAGTAATTC +ATTAAATGACTACGGTAACGCTTGTGTCGATTTACGGGCTCGTTATTAAA +AATCATTAATTTCAAAAAAATCGAGCCCGTAAATCACCACAAGCGCTACC +GTAGTCATTTAATGAATTACTGTAGTTTTCGCTACGAGATATTTTGCGCG +TCAAATATGCTCAATACACATTCTCAGAATTTTGCGTTAACGTATTACTA +CTTTCTTAGTTTTTCCAAAAAAAAAATTCGAATACACCATATTTGACGCG +CAAACTTTTTTTTTCAAAGCAAAATTGTCAAACGTTTGCAGGAAAACAGG +ACGTATGCCGCCTGCTAATGGACTCCGGAGCCCGTATGTATTTGGTGAAT +GGAATCGGAAAAACCGCCTCTGAACTGGCGGCATTTGTGGGTCATCACGA +GTGTGTGGCAATTATCAATAATCATATAACAATTGATGTGATCGAAGATC +TTTTGCGGCCAAAAGTGAATGGAAAATATGAAGGAGCTGAGGAATATCCG +GATGAGCTGGCTGTATTTATTCATTCATTGTGTGGATCACATGAGATTCA +TCCTGTTAAGATTATTTTTCGATTCAGTAAATATCCCGATTCGTTGAAGT +ATAAGAAAAAGGTACATAGCTGTGTAGTTTGGAAAAAAAACGAAAAATCT +GAAAATTTGAGAAATTTCCTTAAGCTTTTTCAAGATGCGCATTTTTGTTC +ATTCTTATTTTCAAAAAAATCCAAAAAGTTTTTTAAAAAATTTAATAAAG +CATTCTTTTTAATTTCAAAACAAAAAAGTTATACCAAATAAAAACTTTTA +GTAATCAAAATTTTTCACTTTTTCTCGGTTTTTCTCATCGTTTTTCAAAT +TTGAGATCTCTTCCTGAATTTTGCCTAAAAATTGATTTTTTCAAAATATT +TTCAGAAATGATTCTTTCTGTGAAAAAATGTTTGAAAATGCGAAAATATT +CGAAACTAAAAAAACTTTTATTAAAATAAATTTTTGAAATTTTTCAGAAT +TTAAAAATTGAATCGAATTAGGTATCAGTAGTTTTCAGATCGATTTTTTC +GAATTTCTGAATTTGTTAAAAACTAGAAATTAAAAAAAAAACTTTTGATG +TTAAATGTTTTTCGAGATTAAAATAAACCGAAAACCCAAAAAAAATGTAA +AAATTGTGTTCTTTTTGTTATAATAAACCAGAATTTTCTCGAAATTTTCA +GAAGGTTCTAGAATATTTCAGAATTTTCTCGAAATTTCCAAAAGGTTCTA +GAACATTTCAGAATTTTCTCGAAATTTTCAGAAGGTTCTAGAACATTCCA +GAATTTTCTCGAAATTTTCAGAAGGTTCTAGAATAGTTCAGAATTTTCTC +GAAATTTCCAAAAGGTTCTAGAACATTACATAATTTTCTCGAAATTTCCA +GAAGGTTCTAGAACATTCCAGAATTTTCTCGAAATTTTCAGAAGGTTCTA +GAATATTTCAGAATTTTCTCGAAATTTCCAAAAGGTTCTAGAACATTTCA +GAATTTTCTCGAAATTTTCAGAAGGTTCTAGAATATTTCAGAATTTTCTC +GAAATTTCCAAAAGGTTCTAGAACATTACAGAATTTTCTCGAAATTTCCA +AAAGGTTCTAGAACATTACAGAATTTTCTCGAAATTTTCAGAAGGTTCTA +GAATATTTCAGAATTTTCTCGAAATTTCCAAAAGGTTCTAGAACAATCCA +GAATAATGTTTTCAAAAAATTCAAATTTGAATTCCCGCCAAAATGTTTTC +AAAAAATTAAAATTCGAATTTCCCGCCAAAATATGTACAGTACTCCTACA +GTACCTCTACAGTACTACTACAGTACCCCGACCATATCCCACTACTAACC +CCAAACCTATATCTCTTCAAAAGACTAAAACACAATTTTTCCTAAACTAC +AGTAATCCTACCGTACTCCTACAGTACTACTACAGTACCCCCACCATATC +CCACTACTAACCCCAAACCTATATCTCTTCAAAAGACTAAAACACAATTT +TTCCTAAACTACAGTAATCCTACCGTACTCCTACAGTACTCCTACAGTAC +TACTACAGTACCCCGACCATATCCCACTACTAAGCCCAAACTAATATCCC +TCCATCAGCCGAAAACGCCTTGCCTTTGTAAACTATGACGTCACTACTTA +ACAAACGGACACTATTTTTTTATATTTTTTTTTCAAAGCAAAAACCACCC +ATTTTCCAGATCCTCTACGTCATCGATCGTGTCTTCGAGAAACAGCTTCG +ATGTAAGGAAAGCAATGAAATAATGTCGCTCAAGCTTTGGCTAATTCTAT +TTTCAATGCGTGAAACCTCGAAATTCGTGGAGTCGAACAAGGAAAAGTCG +CCAGAAGAAGCGTCTCTACAGTACGCAAAACTGATTTCCACGTGGCAAGA +GGGCGATGAAACTAGGCGAGCACTTGACGTGATGCTGAGAAATGCGGTTG +CTTCGTTCCCGTATAAACATTCATTACTTCATGATACTCTACAAAAAGCA +CTGCAAAAAAGTCAAATTGGTGAACGACCAAGTGCCTATGAATACATTGT +TCAGGCACTTTTCGGACAACGAATCGCTGCGGTCTGCCAGTTTTGCTCGG +TTTGCGGACATCCTGGAGCCAAGAAACGGTGCACACAGTGCAAAGTATGG +AGTTTTTAGGGTTAAAAATAATTATTTAATAATTTAATAAAGCTCGAATT +TGGGAAATAATCAATTCCAAATTTTAAAAATATGGAAAAAATTTTATTCC +GTTACATTTTATGAATTTTCCCACAAACTCGGCATTTGGCTCTAGCTTCT +TGCCCAAGTTTAGCCCAAAAAATATTAACTTGAAGCTGTCTAAACTTGGG +CAAAAGTTAGACAAAACTTTGGCAAAACTTGGATTCAAGCTTTACCAAGG +TCTAACCCAAGTTTCACCCAACTCTTGCCAAACTTTGGCCCAAACTTTTC +TTATTTCGTTTCAAATTTGGGCCAAAGTTTGGCAAGAGTTGGGTGAAACT +TGGGTTAGACTTTGGTAAAGCTTGAATCCAAGTTTTGCCAAAGTCTTGCC +TAACTTTTGCCCAAGTTTAGACAGCTTCTGATCCAAGTTAACATTTTTTG +GGCTAAACTTGGGCAAGAAGCAAGAGCCAAATGCCGAGAAACTCGAATAA +AAATTGAAAGTTTTCAAAATTTCAGTTCGTTTTTAATTTACAAAATTTGG +CAGCTCTAACAATTCTTTAAAGATTCTTTAAATTAAAAAAAAGAATTATT +AAAACTTTTTAAAAAAATTCATTCTGTAGAAAATTCCCGTAAAATACTCT +TTGAAAATCCGGGAAAAAACTTCAAAAAACAAAAATAAATTCTAGACATT +CTGTAAATATCGAAAAAAGAACATTTTGTCTGTAAATGTATTAGTCAAAA +TTAATTTCTGATACTTTTTCCAATTTTTCAAAATTTTAAGTGTCGAGGTT +CAATTTTTTTGAATTTCCTGTTTTTCCTTTATTAAAAAAAGTTTTCTATA +ATATGCTGTATTTGAAAATTAAAAACTATATCTGAAAATATCGAGGCACA +ACGTTTTCAAGATCTGGTGAAATTCCGGATCTACGTTTTCCGGATCTACC +ATTTCCGGATCTACGTTTTCCGGATCTGGCACCGTGCCAACGCACAAAAC +GCTTTTTTGTTCACTCGACGCACGTTGTTTTTTGAAAATTTCTTCTAGAA +GAAACGCTTAACAACACGCGACGCGTAACAACGGAGCATCGTTATCACGT +TTTTCTCCGAGAAAAATAGCGTTTTAAGAGTTGGCACGGTGCCAGATCCG +GAAATGGTAGATCCGGAAAACGGAGATCCGGAATTCCGCCAGATCTTGAA +AATGTGGTGCCTCAAAATATCGATTAAAGAATTTTTTCTGAAAAATATCC +AATTTTTCAACAACAGAATAGCTAAAAAGTGAAAAAAAACTCAATTCTCA +TTATAAATTGCAAACAATTTCCAAATTTTGATAAAATGGAAAAGAGTTTA +AAAATTTCAGGCAACACATTTTTTAACTCTAGTAAACGTTTTTTAAATTC +CAACAATTTTTACAGCTCGCCTACTGTTCCCAAGAATGCCAAAAATTCGA +CTGGCCAATTCACAAAAAAGTGTGCTCATTTCTGAAAACGCGACAAGAAG +TGTCGCCCACCGACGAGACCGCCATGTCGCTGGACGATATTCAGGCTCAA +ATCGCCAAAATCGACGTGTAGAAGTGCCGATATTTCGATCTCAATATACT +TTTTTTCTGGAATTTATTTATTTTTAATGTATATTTCGGCTTCATCTCAT +TGCACGAACTTTTAATTTCATTTTCATAAATTCATTTTTGAAGTCATTCC +TTGAACAAAAATTCACTAAAACATGCATTAAGAAATATGGAATCCAAAAA +TTAATCTAAAAACCTTTTCAAAAAACCACTTCGTCAAAAACTGATGATGG +AAAACTCGTTGAAAAACGGAAAGAGTATCCAATAAAGATTAAAAATTTCG +ACATTTCGTAAATCGACACAAATCTCGTAAATCGACAAAAATGAAAAAAT +CAGGAACCCAAGAAATTCAATATTCTCATTTGTAAAGACAACTGGTAAAA +CATTTTCAAATCAAAAAATTATTTTTTTTGCCCTCAAAATTGATCTCCGA +ATACTATAAAAAAGAAAACTATAAAAAGTGGCGAAAATTCGAAATTTTTT +AACCCCTCTAAAATGGTTCATTTTAGTTGTCTAATGATACAACAAAGTAG +ACATAGTTCTACAATATCTGATAAATACTTGAAAAGTCTAAAAACAAAAG +TTTTTTCGTTTTTTTAACGGATTTTTAAAATCCAGAAGAACGAAAAAAAA +TTTTTTTAAGAGAATAGAGTAAACTAATCATGTTCGAGCAAAAAAATCCG +ACTTAGAATATGAACGGACCCAAGTGTATCATAATTATTTTAATTTCTGT +GTATCAGAATTATTTTAGTTTCCTTAGTGTGATTCCCAAACTGCTTAAAT +TCTAGGAAATATTTCTTTACTGGAACACTCTTAGCCACTGTACGCTGCCG +AACGAATAATAAGAGAATACAGAACACCAATTATGCCCGAGAAAAAGATC +CTACTCAGAATATAAACATAGTCAAATTTATCGGATGTATAAAGATTCCC +GAAGACACTTTCCAATTACCCAAATTGTTCATATTCTAAATCAAATTCTC +TTACTAGAACGCTCTTGGCCAATGTACGCAGCCGAACGTATCATAAGTGA +ATACAGAACACCAATTATGCCCGAGAAAAAGATCCTACTCAGAATATAAA +CATAGTCAAATTTATCGGATGTATAAAGATTCCCGAAGACACTTTCCAAT +TACCCAAATTGTTCATATTCTAAATGAAATTCTTTTACTAGAACACTCTT +GGCCAATGTACGCAGCCGAACGTATCATAAGTGAATACAGAACACCAATT +ATGCCCGAGAAAAAGATCCTACTCAGAATATAAACATAGTTAAATTTATT +GGATGTATAAAGATTCCCGAAGACACTTTCCAATTACCCAAATTGTTCAT +ATTCTAAATGAAATTCTCTTACTAGAACACTCTTGGCCAATGTACGCAGC +CGAACGTATCATAAGTGAATACAGAACACCAATTATGCCCGAGAAAAAGA +TCCTACTCAGAATATAAACATAGTCGAATTTATCGGATGTATAAAGATTC +CCGAAGACACTTTCCAATTACCCAAATTGTTCATATTCTAAATGAAATTC +TCTTACTAGAACACTCTTGGCCAATGTACGCAGCCGAACGTATCATAAGT +GAATACAGAACACCAATTATGCCCGAGAAAAAGATCCTACTCAGAATATA +AACATAGTTAAATTTATTGGATGTATAAAGATTCCCGAAGACACTTTCCA +ATTACCCAAATTGTTCATATTCTAAATGAAATTCTCTTACTAGAACACTC +TTGGCCAATGTACGCAGCCGAACGTATCATAAGTGAATACAGAACACCAA +TTATGCCCGAGAAAAAGATCCTACTCAGAATATAAACATAGTCGAATTTA +TCGGATGTATAAAGATTCCCGAAGACACTTTCCAATTACCCAAATTGTTC +ATATTCTAAATGAAATTCTCTTACTAGAACACTCTTGGCCAATGTACGCA +GCCGAACGTATCATAAGTGAATACAGAACACCAATTATGCCCGAGAAAAA +GATCCTACTCAGAATATAAACATATTCGAATTTAACGGATGTATAAAGAT +TCCCGAAGACACTTTCCAATTACCCAAATTGTTCATATTCTAAATGAAAT +TCTCTTACTAGAACACTCTTGGCCAATGTACGCAGCCGAACGTATCATAA +GTGAATACAGAACACCAATTATGCCCGGGAAAAAGATCCTACTCAGAATA +TAAACATATTCGAATTTATCGGATGTATAAAGATTCCCGAAGACACTTTC +CAATTACCCAAATTGTTCATATTCTAAATGAAATTCTCTTACTAGAACAC +TCTTGGCCAATGTACGCAGCCGAACGTATCATAAGTGAATACAGAACACC +AATTATGCCCGAGAAAAAGATCCTACTCAGAATATAAACATAGTCAAATT +TATCGGATGTATAAAGATTCCCGAAGACACTTTCCAATTACCCAAATTGT +TCATATTCTAAATGAAATTCTCTTACTAGAACACTCTTGGCCAATGTACG +CAGCCTAACGTATCATAAGTGAATACAGAACACCAATTATGCCCGAGAAA +AAGATCCAACTCAGAATAAAAACATATTCGAATTTACCGGATGTATAAAG +ATTCCCGAAGACACTTTCCAATTACCCAAATTGTTCATATTCTGAATGAA +ATTCTCTTACTAGAACACTCTTGGCCAATGTACGCAGCCGAACGTATCAT +AAGTGAATACAGAACACCAATTATGCCCGAGAAAAAGATCCTACTCAGAA +TATAAACATAGTCGAATTTATCGAATGTATAAAGATTCCCGAAGACACTT +TCCAATTACCCAAATTGTTCATATTCTAAATGAAATTCTCTTACTAGAAC +ACTCTTGGCCAATGTACGCAGCCGAACGTATCATAAGTGAATACAGAACA +CCAATTATGCCCGAGAAAAAGATCCTACTCAGAATATAAACATATTCGAA +TTTAACGGATGTATAAAGATTCCCGAAGACACTTTCCAATTACCCAAATT +GTTCATATTCTAAATGAAATTCTCTTACTAGAACACTCTTGGCCAATGTA +CGCAGCCGAACGTATCATAAGTGAATACAGAACACCAATTATGCCCGGGA +AAAAGATCCTACTCAGAATATAAACATATTCGAATTTATCGGATGTATAA +AGATTCCCGAAGACACTTTCCAATTACCCAAATTGTTCATATTCTAAATG +AAATTCTCTTACTAGAACACTCTTGGCCAATGTACGCAGCCGAACGTATC +ATAAGTGAATACAGAACACCAATTATGCCCGAGAAAAAGATCCAACTCAG +AATATAAACATATTCGAATTTATCGGATGTATAAAGATTCCCGAAGACAC +TTTCCAATTACCCAAATTGTTCATATTCTAAATGAAATTCTCTTACTAGA +ACACTCTTGGCCAATGTACGCAGCCGAACGTATCATAAGTGAATACAGAA +CACCAATCATGATCGAGCAAAAAGATCCGATTCAGAATATAAACATATTC +GAATTTATCGGATGTATAAAGATTCCCTATTGGGAAGTGGAGCAATCCAC +GACTGGTTTATCGGCCACAGTCCCCGGCTAGGACATGGCTTATATTATTG +GGCCAAGGGGAGCACCACCAGGCAGTGTACCTGACTCCCAGATCAGCAGT +ACATAGCACTTGAAGAATGGATCGTCCTTTAATCTTTTAATCTTTTAAAA +AGAATCGAAGGAACTCTCATCGGGTCATGTGGTTGTGGGGACAAAGAGGG +AGGCTTACATCAATACCAAATACCTGTGGTAGATCACAATACCTGTGGTA +GATCACACCCTATCCACAAAGAAAATCTGTGGACGTCCTCAAAGGAGGCC +GCCCGCGCCCTTGAGCTGGCCAACAAACCCTTCGAGCTGGGTGGAGGAAT +ACTCCAGCCGAGCGACTGAAAACGGCGGTAACGCCACGTTGTCGCACAAT +AACAACAAAAAAGCCCGCGGGCCCCAAGAAGCTCAAGAAGCCCACGGCTT +AATTTTCAAATCAATTACCTTACTATGAATCTCCTTTTTGCTCTACGAGT +CGTCGTTGATGTCATCCTTCCGTCCAACCTCCGTCAAACAGTCCATCTGA +CCATCCGTCCAACCAACATGTGGTGGAGTGTCCAACGCATCTGAAATTGA +AAAATATTTATATCTGATTTTTTAAATGGAACCATTTACAAAAAACATAA +ACGGAAAACGCTTAGCAAAAAAACAAACAATTAGTATTTAGAAAAACGGA +GACAAATGCTCTCGCGATCTTATTTATATTAATTTTCCAGTCGATTGCAA +GGCATCTGCTCCCACGGGTTTATATAAAATTGTGTAACTAATTTTAAACT +TCTCTTGAAACCTTTCAACCAGTACTTTTCAAGAGTTTTTGGTAATTTTT +CGATTTTTCAGAAATTTCAAAAAAACGGCGAAATTCCACTCAGAACCCTC +ATTAATATTTCACTAGTCCAGCACACTAACCCAAAAAATTATTTTTTTTT +TGAACTACAGTAATCCTACAAAATTGCTACAGTACTATTACGGGACCATA +ACAAAATTTTGATAATGCGTATTGCGCAACATATATGACGCGCAGAATAT +CTTGTAACGAAAACTACAGTAATAATTTGAATGACTACTGTAGCGTTTGT +GTCGATTTACGGGCTCAATTTTCGTAATGTTACACGACACATTTTTTTGA +CAAATGCAAAAAAGTGTGCGCCTTCAAATTAAAAAAAAATTTGATTTTTG +TTGCCGGCTTTTAACACATCGAAAAAATAAAAGAAACGAAAGTTTGTAAT +TACAGTACTCCTCTTAAACGCGAACACCTTTTCGCTTTTCAGAAAAACTT +GCGCCGTTTCGAAACCGGGTACTATACTTTTATATTAAAATCGATTAAAA +ATCGCGAAATTTTGCGGTGGAGCATATGTCAAATTGCAACTCGAAAAGAA +AAAATCTGAAAATGCATGCTGTGAAAAAAAAAGCAGCAGTACTCCAAGTC +ACCCATTGGAAAGAATGACTGAAAATTGAAAACAATTCTTCACTTTGAGC +GAAAAAATGCCGCGTTGAATGAGAGAGGGACGAATCGAAATTTAAAAGGA +GAAGATCAAAAAAAAATGTTTGTGGTAGGTCTGGAATCTTGCAAATTATC +GTTTTAAAATCAATTTTTAACACTTTTATATCATAAAAATAGTTTTTCAT +CAATCAATCGATTAAAATACCAACTCTCGAAAATTAACAGCTTTAACAGC +GCCGTCTACTGATTTGAAATTGCAGTCGCTGCCGCTCAATTAAATGTTTT +GCGCGTCAATTAAAATGCCTTGTACGCAGATGCGCGTCTCCTAAAAAATA +AAAAGTTGTCCAATTTTATTGAAAACGGGTATTTAATTCATGTAAATATG +CTGAATTTAGAAAATCTAGGTTTAACCTATCAAAAACTATAAAAAAGTGG +CAAAAATGGGCAATTTATGGCAAAAATTCACAATTTTGAAACTCCTCTAA +AATGGTTCATTTTATTGGTAGAAGAGGACTAAAAATTGATATCCGAACCC +TAAAAAAATTGTCCTTTTTCAATATTCAGAAGAAAATTATGAAGTTATTT +ACTTCCATTTTTCGATTGTACGGTAAATCAAAACTAAAGGTGGGCACGGT +ATCTGATAAAGTTACGATCGTTCCAGGATCACGAAAATCGAATAATTACA +CAGCCAGAGTTACATGAAACAGTGTTTGGGAAATTTAAAAATCAGTACAA +GAAAACCTCAAAAAAAAAACAAAATTACAGGAAAAAACGGAAATTTTCAG +TAAAAAATTATAGGGTATGTAAAATCGATAAAATATTTAAAATTCAATCG +TATTCTCCGTTTTCGGCGTTCGGATCGTTCACTGGAAGTACGGGATGTCG +AAGTTTGAAATTGAAGAATTCGCACCAACCGGCTGGAAGTAGATCGTGGG +AGCTGCAAAAAATGTTCGTGGCCGCGAAAAAAATCGGTGGCCGATTTTTG +TTTTTTCGCGGCCACGTTGTGACTAGACGGCGAAATAAAAATTTGTTTTT +TGGTTTTTAGTGTTCAAAACTGTTGTTCTTGTTGAAAAACAATTTTTTCG +AATTTTTTTTGTTTTTTCAACTAATTTTTTTTTCTGAAAATGCTTTTAGA +ACAGTTCATTTTGTTTTTTTTTCAATTTTTTTCAGACACCCCTTACTCCA +TATCATAAAGCTCGGAAAATTCGGAATCCCATCCCTGAAACTCGATGAGC +ACCGTTCTACCGTGTACTTCTTCGACGGAGGCCGGTGATATCCAGTAGGT +CTCGTTTTGGCCGACGGCTTCAAGGACACGACCGAACTGAAAAATATTGT +TTTTTTTTTCAAAACCATTTTTTTTAGGAAAAAATTTTTATTGAAACGTT +TTTTTTTTAATTTTAAAATTCCAAAAAAACGCAAAAGTGCAAAAAAACTT +GAAAAAAATGTTCTCAATATTTTTTCCGACTCACAATCAAAGAAAATTTG +AATCGAAAAAAATTAATTGTTTTCTTTTAAAATTTAAGGAAATTATTTTT +CTAATTATTTCTTCCAAAAAAAAGTTTAAAAATAATAATTTTTCAAGTGT +TTTTCAAGAGAAAATTAAAAAATTTCAAATTTCAAATTTTAATATACTTT +TTATTTTAAAAACTTGTATAAATCTCCGAATTATATGGATTTTTTTGAAA +AATAAAAAATTTTTTTATTGGAAAAAGAAATCTGATTTTTATAGTTTTTT +TTTTTAATTTGAAAAATACGAATTCTATCACGGCAACTCAAAATTCTGAG +AATGCGTACTGCCCAATATATTTGACGCGCAAAATATCTCGTAGCGAAAC +TACAGTAATTCTTTAGATGACTACTGTAGCGCTTGTGTCGATTTACGGGT +TCAGTTATTTAAATGACTTTATTTTTCGTATTATTTTCTTCATTTCAATT +AATTTTAAAAATTGAGCCCATAAATCGACGGTACCGTAGTCATTTCAGGA +ATTACTGTAGTTTTCGCTACGAGATACTTTGCGCGTCAAATATGTTGTGT +AGTACGCATTTTCAGAATTTTGAGCTGCCGTGATAGAATTCGTATTTTTC +AAATTAAAAAAAAACTATAAAAATCAGATTTATTTTTCCAATAAAATTCG +TCTAATTTATAAAAAAAAATAAAAATAAAAATTGAACAACGCTGAAAAAT +CCCAGATTTTTTTTTCTTATTTTTTGAAAAAAAAAACTTTTTTTGGAATT +TTAAATTACATAATTTTTTTGGTTCAATCAATTTTTTGGTTAAAAAGCGT +TTTTTTTTCCTTCAAAAAAGCACCTCAAACATGTGCCTTCTCTCCTTGCT +CGGCAACGGCCTAAGCATCTCATCAGGAATTTTTTCCGCCTGCTTCTCCT +TCAAATACCCTTCCCACTTGAATTTCCCGGGCTCAGTGCCTGCAAGCCGA +TCCAGCTTGATTCCGAACTTCTCAGCATAGCCGACAGGATGCATAAAATG +ATTATCAATATGAATTGGGAAGGATTCATCGTCCGATTCTGTCTCATCAG +GTGAAATAATTAAAAATCCAGGCGTTTTGCATATTTTTCGAATTGTTGCG +ACGCAAAATGACTGGCGTAGGTCGCTGAGTGGATCCAAAAGTTCAAATTT +TTGGCCGACTTTTAATAAATTTAATTTTTCAGCTGAAATGTCAGGTTTCC +CAGCAAAAAGTTGCTCGAAAGTGACGTCATCTTTATGATAACTTCCAGAA +CCTTCTGCGATCCGTCTGGAATGTTCCAAATAACCTTCCGTAGCCTTAGT +CCGTAATCCATTAATCATTGCAAATCCGACTGGAAACAGGAAGAAGCTCG +ACTCATCCACCCAAAACTCGACGTTTTCATGCTGAACCTGCCGATCTTTG +GCCTCTACGGAAGGAAGATCTTCGGGATAATCCTGAGCGGTGACCATGAC +CATCAGACGGCGGCCAAGGATCCTGAGGATCCGTGCGACACGGATTTCTG +TTGGCTCCAAGTAGTTGAGTAGCTCGACGCGTTGATTCAGCCGGAAACGA +GATGGGCGGTGGGCAAGGTGGCGGAGTTGGTCGAAGATTTTTGGCTCAGA +GATTCGATTTTCGTGCAGCTCGGCTTGAAATTCCTGCAGGAAAATTTTTT +TAGGAAATGGGAAAATCAGAAGGGTTTTTTGGAGTTTTTTTTCAGTATTA +TTTTTTTTGTTGTTTTTTTGAGGCTTTGTTAAAGTTTTTAGGTGAATTTT +TTTTGTTGATATTTTTGGTTTGTTTAAGAATAAATCATTTTTTGAAAAAA +ATTCGTTTTTATATTTTCGGTTTTTTGCTGGTTTAGTTAAAAGTTTTTTT +TTCGGATTTTCAGGTTTTTTGACAAAAATTGATTTTTTTTTTGGTGTATT +TCTTAGGTTTTCGGGTTTTTTTTTTGGTTTTTGACGAAAAATTTATTTTA +AAAAAAACGGACTTTTTTGCAAAAAATATGGGGTTTCTTAAAAAAAAAAA +TTTCAATTTTTTTTGTTGGATAAAATTAAAGTTTCTTTTATGGTTGATTT +TTCGCCTTTTTTCACATTCTCCACCAGAAAAAACATGAAAAAAACCAGAA +ATGAGTAGTTTTGAGCAATTTTCAGGCAGTTTTTCCGCTCAGAAAATAAT +TTTGGCCGGATTTTCATGGTTTCTCACATCCAGAATCTCCTCTGTTGGCC +GTGGTACGGGCGGTGCAGGCGTAGATCCACGTCGCCAAAATTTCGGGCAT +TGCATCGGTTTCAGCAAGCTCAGTGGCCTGTATTGAAGCCATTTCATCGC +TGGATCCTGCTTGAGACCGCTGAAATTTCGCGTTTTTTTTTGGAAATTTT +GTTGAAAAATAACCTTCCAATATCGAAAATATCCTCTGAAAGCATATGAA +ACCAGCAGGGTCTTCTATTAAGTTCCCCCACAAATTTAATTGCCACGTAG +AAGCCGCAGACAGCAGTCACCTCGCCGAACCAGCGGATTTCTGGAGATTT +AGTGGTTATAGAAGGATCCAGAGACGGTCGGACGACTACTTCTAGCTGAA +AACATTAAGATTATGGCTTTAAAGTTTTTTAAAAGAAAAACCAACTCGAA +ATCCAGGTTTTAGGTGCTGATTTAGCTTTTCCAGCGGCAAACAACCCTCC +AGCGCCTCGACAGGTAAAAATTGAGTATTTCCATCATAATTACATCGGAG +TTCATCTGACCATGTGAATGTTCCTTCAATTTGTGGCTTTGGAATCCGCT +GGGCGATTTTCTCAACTGGATTTCTTCGACGATAGAATAGCCGTTCTTCG +GTGAAATATCGGTATTCTGCTTCTTCAAGTTTGTAATTATATTGACGCTG +AAAGTAATAAATTATGTTTTTCGATAAAAATGCTTTTAAAATTACCTTTT +TATTCGAAAAATTCATTGATTTTAATTAAAAATCTATTTAAATTGTTTAA +ATTAAAAAGTTCCAAGCTTCTGTAGTTATCGCGTCGAGACCCAACACACT +AATTACCACATGCGCCTTTATATAAATTTTAAGAAAATCAACATTTTAAA +AGATTTTTACAGTTTTTTTGTCGTTAAATTTTTCGAAAAAATGATTTAAA +CTCACAGTAAACTTGTTTGAAACTTGAATATTAAAATTTCACAGTCAATG +GGTTAAATTTCAAGACTTTCCCACTGATGATACGGTAGGCGCGAAGTACG +GTAGTTTGTTTTGTTTTTTTTTATTTTGCCGCCGAGTGATTCAAATTTGA +ATTTATATCATCCGATTTTTTGATTTTTTCCTCAATATTTCTCGATTTTT +CAGCTTTCAGTGTGATTTTCCAAGAGCCGTAGCTCGACCAGACGACGGAG +AGCCTGAGGCTGGTCGCCAGAAGCACTATGAAGTGTGGGTTTTTTTTAAA +GCATAATTTGCATTAATCTTCTAATATTTTAGCCACCCCACACCGTCCAC +GAACAAACTCCGAATCGAAGCTCGGGGCTCGCGAGCAGCTTCTCCGGGAC +TCTATACGAGTGGGTTTAAGTTCTTAATTTCAAATTTCACTCATTTAAAA +ATTTTATTTTTTAGCTTCAAGTAGCTTCGGAGCCGTACGTCCACATGACA +AAGGAATCCACGTGGGACGAGGCGAAAAAACTGGCGATCAGTCTTGAAAA +GAAGCCGGACATTGTCCGTAAAGCGATATACAATCGTCGTCGCTTCGTCA +ATGAAAAGATAAAAAGTGCGCTGGTCAAGCGCGAAATCATCGACCCAAGA +AGCCCGGCAATCCATGAAATAGCAGTGGCGGCGGAGGTATTTTAAAAGAA +AATCAGACAAATTTAATATCTAATTATTCCCCATTTTCTTGCAGACAATC +GCCATAAACGTTGTGCACTTCTTGGAGACTCATCACGCAAAAATACTCGC +TGAAATCAAAGCGGCCGCCGCTGGAGCCGGCGCCCAGCTCCGAACTGCAT +GAATTAATTCAAATTAATATTTTTAAACTCATTTTTCACACAAATAATCA +TTCATGTGTCCATTTTTCACTCTCGAAACCCATATACCCTCACTCTAAAT +ATCAATATAATGCACATTTCTCATAGTTTTAATCTGCCCTTACCAATTAA +ATCTTACCAATTTTCCGCCATGATCCCTTTTTCCATTTTGAATAAAATTC +GACGACGATTTTTCCATTAAAACAAGAAATATATAAATAGATTCAAAAGT +GGCGCTTGTGTCGCTCAGCGGTCTCCTCCACTTGCACACTATCTCACCGC +GGCCTTCCAATTACTCGTCCATTTTCCAGCTGTAAAAAGTTTATAAAAAC +TGAAATAAATGCAATTTTCAGCAGAAAATCGCTGAAAATGCGGCAAATCG +TCGAGCTAAAGTCACTTTTGACTTCGGAGCCAATTAAAGCCATCGAGCTC +TTCGATAGGCTCGTTGGACAAGGTTTGAGCCGAAAATCTAAACTTTTAAG +CTGAGATTTCTTTTAAAAATCCCTTCCAGATGCCGACACAATCACCCAAG +AAGCCTGTGGAACCCTCGCCAACTATATCCGGCATTCCAGTGTTTTTAAG +CGACGGCTTCTGTTCACGGAGCTCCAAAAATGCTCGTTTTTTGCGAAATT +GTGCATTTCGTTTCAATTTCACAGCTTTGAAGACACAGTTTTTCCGCGAA +AATTGATTTTCGAACGATTCAGCGTGTTTTGCGGTGAACTGGAAAAGGAC +AAGCCACGTGGGTTTTCATTTATTGAAAATTGAAAAGATTTTTGCAAAAA +ATCATAAAATTTAATGTAAAACTGAACAAAACTCGATTTTTAACCGAACC +TTTTTGTTTTTTCCGTGAAAAAATCGGTTTTCCAAGTTTTTCGAACATTT +TCCGAAAAAAAATTACGCAAAAAAAACGATTTTCAAGTAACAAATCCGGA +AAAATCAAACTTAAATATTTTCGGAAAATTTCATAATTTTCTTCAAATCT +CTGTAAAAAGTAGATTCGATTTCTGGGAAATTTGAATTTATGTCATTTCT +TTAAAAGCGCATGCTCTTTTGTAGGGTCTCGCAACGAATTATTCAATTTA +AACTTTGAAATTCGCGCCGAAATTTGGGTCTCGGCGCGATTTTTCGAAAA +ATAAATGAAAAATATGTACTATTAATTTTTTCTTGAAAATTATTGATTTT +TCAGATCACCGGCACCACGTCACCGCTGTCGGATCGAATCGTTTCTTCAA +TTTGGGACTACTTTCCGATGGAAGCCCCGTCTCAGAACCCCGACTGGTCC +CCGTGCCACGTGTAATTCAAATCGAGATGACAAATACACACACAATATTT +TTGACTGCTGAAAATCAGATTTACGGCTGCGGAAAAGCTTCATCTTTTTT +GCCGGATAAAACGGAGGAGACGGACGGTGGTTATGTGGCACTGCCCACTT +TGGTAGAAATTCCAAAAGTTACTGGATACGTGGCAGCTGTGAAGGTGTTC +GATGGAGGATCACAGTTTTTGATTGGCGGCAAAGTACGGTTCTAGAAAAT +TGGTGGCCGAGTATTTTTTCGCGGCCACATGGCGATTTTCTACACGAAAA +GTTGTGTAAAAGACAAAAAGGTGTGCGCCTTCAGGGATTACTGTAGCTGA +AGTTTTTCATAGATTTTTTTTTTGGCTTTAAAAAAATCCTTAAAGGTGCA +TTTTCGTGTCGAGACCTTGTTGTATTATTTTGCTAATTTTGCTTAAAAAG +TACAGTACCAGGTCTCGACACGAATAGTTTTGATAAATGCAAAAATGTGT +GCGCCTTTAAGGATTACTGTAGTAAAAAATAAGAATCCCTTTTGGAAAAT +TTGATAATTTTGTGAAGAAAAATAGATAATTCTTTGAAAATGAATAATTT +TTTGGAAAAATCTTTTTTTCTTATTTTTCTAGACGTACACCTTTATTTCA +TTAAAAAATTGTCGCGCCGAGACCTGATACCGTATTTTTGAGCGCAAAAT +CGTGCCGAGACCCAAATTCGTGAATTCAAATTTTCAGTGGTATTGTGTTG +GAAAATGTTCGATTTCCGGAGAAACGCGACGAATTTCGTCGAATTGTTTT +GTTCTGGTAGAGGAGGAAAACGAGGAGAAATTGGAGAAAATGACGAAAAA +TATCGATTTTTACGTGGCAAATGTGCCGATTGAGGAAAGGATTGTGAAGG +TGGATTTTTTTTTTCAAAAATTTGAAAACAAAAAAACTAAAAAAAAAAAT +TTTAATCTTAGTTTCTAGAATTCAAGCCAGTTATAATAATTTTTAAACTA +AAAATTGCATCTTCCGAATTTTCAAGATTTTTTTTTCTGAAAATCGTTTA +AAAAAATATCTTGAAAAATCAAAATTTCAAAAAAAACAAAACTTGAAAAA +AAAAACGTCTTTAGCGGTATTTTTTCTATAATTTTTCAATTTTTTTCAGC +TTAAAAATCATAGAAAATCGTAATTTTTTGACATTTCTTCCAGGTATATC +TAAAAATGGACCAAAACGAGATTCTGTGGGATAGAACGAGCGATTTCTCA +GCGGAAAAGCCGATTTCCTTCATAATCAATGGATTTCCACAAATGGCAAT +TTTCGAATCATTTCAACTTTTAAATGATGGAACTATTTATGCTGCGAGAA +ATTCACTTTTCAAAGGAAAATTGGAGCTATGGAAGAACAAAGATGACGGG +TTTAAAGTGAAAAGCGGGACTGTTTTGGAGCATTTTGACACGAAATATAC +ACTTATTGCACTGATGGAAGAGGTTCCCGGTACTATTGGAACAGAGTTTT +TCAAAGTTTCACCAGATGGGCAGAATTTGATTATGAAGGTTCATTTTGTT +TGGAATTTGAAGGAATTCGACCTTAAAAATATAAAAAATTGCACTGATGA +TGTTATGGATTTTTTTTTCAGAAAAAAAAAAACGAAAAATTGAATGCTAA +ATGACAGAAAATATGCCCCTGTAACATTTTTTTTTTGAATTTTCTAAATT +TTAAATTATTTTTTTCAGTTTTGCGCAAATCAAAGAAACGGCCGAATTAA +ATTTGAATTCCCGCGCAAAAGAGTGACGTCATTTTTTTTTTCCCGTTTTC +CGGATGTATTATTAGGTTTTTATTTTAAACACAGTTTGTCAATTTTTCAG +ACATTTTTTTTTAAACTTGATAACCCGAAAAAAGTGGCCTAGAAATCGGC +TTTACAATTTTTTTTTTAAATCGACAAACTGTGTTTCAAATTATGAAACA +AGGAAAAAACGAAGAAAAACTTATAGCCGGAAAACGCGAAAATGTCGAAA +ATGACGTCACTAAATTGCGCGGGAAAAAATATAGAATTTTTTTTTAATTA +ACAGCTATATTGAATCATTAGGGCTTGTTCTCCGTCAAAAAATTTCTCGA +AAAAATTGATTTTTCGTTTTTTTTTGGAAAATCGAAAAATTTTGTTTCTC +AAAAAAAACAAAATTGGAATTTTTATTAAAAAATTATTTTAATCCAACAA +AAAAAACTAAAATTTTTTGCAAAATTTAAAAATTCATAAAACATTTAAAA +AAATTTTTTAAAAGTTATATTGGGACTGTATTCTAATACTTGGACTAAAA +AAACCCACATTTGACAAAAAATTCAATTTAAAATGAATATTTTCAGATGG +GCTACCAAAACGAACAGAAAACCGAGAAATTCGAATTCGAGTCTCCCAAA +ATTCACAAAATCATCAAAAATCGTGAAGTACAATGTGATCCAGTTGATAT +TCCGTTAGACTCGACTATTCATTTTCATCAAAATTATAATCAAGAAGTAT +TGAAATTTCAATCGAATCGATTATTATTTCAATGGATTTATCCGAATTAT +TTGTTTCAAAACGATGGAGATGTACATTTTTCAATTGAACAATTGGAGAC +TGTATTTGCTGTGGATTTGGAGAATTTGGAATTGTGAGGGTTTCTTTTAT +TAATTAAAATTTTTTTAAGTGGAAAAATTTTGGTTTTCTTTTCAGAAATT +ATTGTAATGTTGTAACTAAATATTACGGGAACACGAAATTCTAAGAATGC +GTATTGCACAACACATTTGACGCGCAAAATATCTCGTAGCGAAAACTACA +GTACTTCCTTTAATGACTACTGTAGCGCTCGTGTCGTTTTACAAGCTCAA +TTTTTTAATACTTTAAATTAAAACTTTTACTTTAAATTTTAAAAAAATTC +GTCTTATTTTTTAATTTTTGCTTTATTCCAATATTCTGTCGATAAATAAA +TCATTTTAATAAATTTAGAAAATTGAGCCCGTAAATCGACATGCGCTACA +GTATCCATTTAAAGAATTACTGTAGTTTTCGCTACGAGATACTTTGCGCG +TCAAATATGTTGCGCCGTACGCATTCTCAGAGTTTTGTGTTCCCGTAATA +TAGAAAATTAGAAGTATGTTAAAATTTTTAAAAAGTATTTTTTTTGGACT +TTTACCCAAAGATTTTTTTTTGCAAATTTTTAGTGAAAATTATTATTTTT +TTCAATTCACGCTAAAATTTATTAAAAATTTAAATAATTTTAGACTGTAT +TCTATACAATCAAAAAACAACAAACATCAATAAAATTTTCAATGAGTTTA +AAATTTTTTTGAATTTTTTAAAACTTTTCTTCGTTGACAAAACGTTCACA +AAACTTGAAAAAATATATTTCAAATTAATACTTAAAAATTCAAACAAAAA +ATTTTTAAAATTTTTCAGCCCGAAATACCAACCTATATCACCAGAAGAAT +TCGTTCCCACCGACACTTCTCCCTCCGATATCTGGTATTTAAAATCCAGT +GAAAAATTGAAAATCCCTTGTCACAAGTACCTTCTCCTACTACATTCACG +TCAAATTGGCGCGATGCAGAGATTTCATTCAAATTATGGGAATTTCGGCG +ATTTTAAGGATGGTAAATCTGAAAATGAAGTGGAAATTGAAGCGAATGCT +AGTGTTGAAACTGTGAAAAATGCGCTGAGAGGGATGATCAATATTCGAAC +TCTTTTCAAAATTAAGACTATTGAGGTAATTGGATTTTTGGTGAAATTTG +AAATGAAAAAATATTTATAAAAATTGAATTTTTTTTCAAATTTTTTTTTC +AGTCACCATAAGTCAATTTTCCATTAGATAAAAATCGATACTTTTTCTAA +ATGCGATAAAAATTGGAAATTCTATAAATTCCGGAAAAATCGATTGATTT +CAAAAAATCGATAATTTCCGGAAAATTGATAAATTCCAGAAAATCGGTAA +TTTCCGGAAAAATCGATAATTACCGGGAAAACCAATTAATTTCCAGAAAA +ATCGGTAATTTCCGGAAAAATCGATTGATTAAAAAAGTCAATAATTTCCG +GAAAATTGATAATATCTAGAAAAATCATTAAATTCAGGAAAAATCGATAA +ATTCCAAAAAAAAATCGATAATATCCTGAAAAGTGAATAAATTCCTAAAA +ATCGATGAATTTCATCAATTCCGGGAATATTTTTTAAATTTAAACTTTAA +AAAATATTTTTTTAAATCTTTAATCCTTATCTATTTTATTCAAAAAAAAA +CTGATCAAAAAAATTTTTTTTTTTCAAAAATTCAATATTTTTGCAGTTAA +TCGAATGCATCAACTTCTACGATTATCATTTAATGGATGAAATGTTCAAT +GATTCAATGCATATTCTAATGGAAACAATCACTGAATTCACACTTCCATT +TCTCTACGAATTATTCTATTCGTTTGAGGAAAAAGTGTTGGAGGGACTTT +TGCAGAGGAAATATTTGATTTCTAACTCAATTTCCAGTGTTTTACCGCCA +AAGGAGCTTCTTGTACGGTAAGTGAGTTAAGAAATGGAAAAAAACACATT +TTTGCTACTGTTGGAATTTTTTTTATTTCACAAATATATTTTTTTTCATT +TTAAAGTTAAGTAGGAGTTTATCAACTTTTCTGGATTTCATCAATTTTTC +CGAAATATTTTGATTTTCCCGGAATTTCTGTGTTTTTCAGGAAAAAAACC +CCAGAAAACTAACAAAAAGAATTAGCAAAAATGACCTAAAAACCGTAAAA +CTATGTATCCCAACTTGTCCACGTGTAGTACAAAAAACCGATGTGCACCA +ACAAGAATTTTTGCAAAAGCTATTTTTCCATTCAAAACTGCGCCCCAAAC +AGTTGAAATTTTATAGAAAATTTTCGAAATTCGCTTCTCCGAAAATAAAA +ATCTCAGATCCGCCCATTTCTTGGCTCTTCCAGATTCGCTGAAAAACTGG +ACACCGATTGCCGCCGACACCACACATTAAAAGTTCCGTCGAAATTCAAA +AATCTAGAAAATCTGAGCACCTATGAGCCCGAGTACATTGTCAAATATTT +TCTGAATCTCGACCAAGATTCGGAGGATGTGACGTGGCGATCGATACGTC +ACGAATTCCACGACACACTTGACACGTGGCACGCGGAAGCTGTGAAGAAG +AGAGATGAGAGACGTATGCACGGAGCAACACGGAGCAGAACGAGTAGTAT +TCGGAAGGAGAGCTTTACGAAAATTCAAAATGTTTCGATGACGTCATCGC +CGGTAGGAATATCGATTTTTATGTATCGAATTTACTGGAAAACTATCAAA +AGTTTAGGAAAAATATCGATTTTCCTGAAATTTATCGATTTTCCTGAAAT +TTATCGATTTTCCTGAAATTTATCGATTTTCCTGAAATTTATCGATTTTT +CTGGAATTTATCGATTTTTCTGGAATTTATCGATTTTCCTGAAATTTATC +AATTTTTCTGGAATTTATCGATTTTTCTGGAATTTATCGATTTTTCTGGA +ATTTATCGATTTTTCTGGAATTTATCGATTTTTCTGGAATTTATCGATTT +TTCTGGAATATATCGATTTTTCTGGAATTTATCGATTTTTCTGGAATTTA +TCGATTTTTCTGGAATTTATCGATTTTTCTGGAATATATCGATTTTCCTG +AAATTTATCGATTTTTCTGGAATTTATCGATTTTCCTGAAATTTATCAAT +TTTTCTGGAATTTATCGATTTTTCTGGAATTTATCGATTTTTCTGGAATT +TATCGATTTTTCTGGAATTTATCGATTTTTCTGGAATATATCGATTTTTC +TGGAATTCATCGATTTTTCTGGAATTCATCGAGTTTTCCGAGATTTATCG +ATTTTTCTGGAAATTTTACTCCCCAAAAATTTAACAATATATCGTGCCGA +GACACGAAAAACTCTAAAAATTACAGATTCCAATCGGAGGCGGCCGCTCG +AAACCCGAAAGTTTCTCAAAATCAATCAATAGTCCTGTCATTCAATCGCC +ACTCTCTCCCTCACCAATCAAAGGAATGCCAAAGGTAATCGTGTCGAGAC +CCACCACGCCACCTGCGCCTTTAAATTCCACGTTTTGTAGTCCCGCCACG +ATAGTATTTCCAATTCACTGGACGATTTTCCTGAAATGAGCATTTCACCG +TCCCCCTCGACACCGAAATCCTCTTCCGGTGGCGGTCGCTTCGCACCAAA +AGGAACTCGATTTAAAAAGGATTTCGAAATTCTCGTGAAACCTTCCCAGT +CACCACAGAATCCGTGGAAAATGGGCGGAGCTTCCGCGTCGATTCGCGAA +GAAATTGATCCGCAGGAGATCCGATTCGACGAAGTTGTGAAGAAGGAACA +GAAATTGCAGACTAATATACGTGCGTCACTTGAAATTTTGGAAAAATCGA +TAATTTTCAAAAAAATCGACCTTTTTTTGGAAATTATCGATTATTCTTGA +AAATTATCGATTATTTTAAAAAATTATCGAATTTTCTGAAAAGGATCGAT +TTTTTCGGAAAAAGTTTCGATTTTCTCGAAAAATAACGACTTTTTTTTTG +AAAATTATCGATTTTTAAAGAAATTATCTATTTTTTTGGAATTTATCGAT +TTTTTTTAGGAAATTATCGATTTTTTTAAGGAAATTATCGATTTTTTTAA +GGAAATTATCGATTTTTTTGGAATTTATCGATTTTTTTTAGGAAAAAGTT +TCGATTTTTCTCGAAAAATAACGACTTTTTAAAAAAAATTATCGATTTTT +AAAGAAATTATCTATTTTTTTGGAAATTATCGATTTTTTTTGAAAATCAT +CGATTTTTAAGGAAATTATCGATTTTTTTGGAATTTATCGATTTTTTTAG +GAAATTATCGATTTTTTTGAAATTTATCGATTTTTTTAGGAAATTATAGA +TTTTTTCTGAAAAATTATCGATTTCTCAAAGGAAAAGTATCGATTTTTGG +AAATTATCGATTTTTAAGGAAATTATCGATTTTTCAAAAGAAGAGTATGG +ATTTTTTGAAACTATCCATTTTTCTTTTAATTATTGATTTTTTTCCTGAA +AAATTATCGATTTCTCGATGGAAAAATATCGATTTTTCTGAAAATTATCG +ATATTCTTTTGGAAATTATCGATTATTCTAAAAAAAATTATCGATTTTGC +AGGAAATTCTCGATTTTTAAAATAAAAACAGCGATTTTTTTTTTGAAAAT +CATCGATTTTTAGGGAAATTATCGATTTTTCTGAAAATTATCGATTATTT +CTTCAAAATTATCGATTTTTCGAAGAAAAGTATGGATTTTTGGAAACAAT +CTATTTTTCTTCAAATTATTGATTTTTTTACCTGAAAAATTATCGATTTT +TCTGAAAAAATATATAATACCACACATTAATTTCAGGAACCGGATTCAAA +AAAGTGCAGCTTCTACCACACGTGGAGACGGAAGAGCTCGCCGGAGCTCA +AATTTTGGAGGTTTTTCGACGAGAACTTCACGATGAGGCGCTCATATGTG +TAGGAATTTGTGATGGTTTTTTTCTCAAAAATTTGAAATTTTTCAGGTGG +AACTGGTGATGAGCGACGATTTGGAGGTGGAAAACGAGCAGATCATCTGG +GGAAACATGCCGGGACTTGTTCGGCGTTAAATTTTAAATTTTTGATTTTT +TTTTTTCTTGTAACTTGCATTTTTTGTTTAATTTAGTGTTTTCGGTGTTT +TTTGTTGAAATTTTCGGGTTTTTAATATTCAAAAATTAATTAAATTATTA +TAAAATAAATGCATTTAGAAATGCGGTATAAATTGAAAATTTTCAAAAAC +CATCTCCCCGAATCCAGTGGTTTAGGATGAAAATCGTGGAAATCGAGCCG +AACGAGAGAATTCCGAGTTGCCACGTGGGTCTGAAAATTGAGTGTATTTA +ATTTTGGAAAATTTTGGAGATCTCTTACATTTTCTCAAAAATCCATAGCT +TTTTCCCATTTTTTTGTTGCTGGATCATTTTCAGCTCTTCGGTTATATCG +CAGACTTTGAAGCAAGGGTCTTCCGGGAGAATTTCCTGAACATTTGAAGT +TTTTGGGTGAAAAAAAAGCTTTTTTAAAAGATTTTTTGGTATTTTCAAAT +AATATCGGAAATTTGCGTTTAAATTTTTTTTTTCAAAAAAGTGAATTTTT +TCAAATTTTTCGAAAAATCGAAAAAAAAATTTTCGGGTTTCTGGAAATTT +TAATTAAATGTGTTTATGATGATGAGAAAATATAGTTGACAAAATAATTT +TTTCCGATTTGTCAATTTTTCAAAAAATTTGAAAACAATTTTTTCCATTT +TTATTCCTGAACATAAAAAATTCGAAAAATTTTCGTTTTTAATAATATTT +CGGAAAAATTTTGAATTTACCGTAGTTGTGTCATTTTTAAAGAAACTCAC +AATATTTCTAATCGAAGTATGCATATTTTTTAAATGAATTTTTGGAAAAA +TTTCAAACTTGTTTTTTGTTGAATTTTTCGATTTTCTTTCGTTTTTTTAA +ACAGAACAAATTTTTCATCATCTTTTCTACAAACAAAACAAAAAAAACAA +TTTTTGTTTTTAGAAAAGCGAGAAAAATTTCGAAAAATTATTTTAAAAGT +TAGAAAAATATCAATTTTGAACGGAATTTTTTCAAAATTCGATTTTTTTG +TTTATTTTTCTAATATGTTCAGTTTTTGGCCATTTTTTTTTTTAAAAAGT +TTGCTTTCAAAAAATTTTAAATGAGAATTTAGATTTTTTAACGAGAAATG +TTGAAATTGAGAAGAAAAAAAAAATCAATTTTAAATAGAAAAATTTTCTT +TTTCTAACATTTTCAGTTTTCTCAAAATTCCAAAAACTCGTACATCCTCC +TGACAAGTACAGTAATCCATTGGATCCAGATTCTCCTCTCCAACACTCGT +TTCCCGAATTTTTTCGAACCAATTGTTCATTTTCATAAGAACCGAGCTCA +TCGCAATCAATTTCCCGATATTTCCATCGTATTCCTCCGAAAATTGCTGA +AGCCGATTCAAATTTCCAATATCATGCTGGTGGAGAGAATACGGAGTGTT +CAATAGATCCGTAATTGATTTAACTTTAAAATTTGGTTTTTTCATTGTAA +TAATCATGCTAATCGTCTTGACATGATCTGTTAAATTCTCTCGTTTCGGC +GGAGAATATAATCGATATTCCAGTGGCACGAGGTTGTCGAGAAATCTATT +GAAATTCTGATAAATATCGTTCCAATCACTTCGAAGCTCGTCAGTTTCGG +CTCCAGCTCCGCTTTTGAAGCCAGTCGTTTTTAGATTGTGCTCCAGCGTG +TCTATTGAGCTCTGAAAATTTGATTTCAAAAATTTTGAGTAACTCAGACA +ACCCCATTTTTCATCTAATTCGAAATTAGCCACATCTTATAAGAAATTAA +CCACATCTAATTAGAAAAAACTTTTCTAATTAGAAAGGATGCATATCTAA +TTAGAAAAGCTGCACACCACATCTAATTAGAAGGGATGCTCGTCGAATTA +GAAATGAGGCACATCGAATTAGAAAGGGGGCGCGTTGAATTAGAAGTTAT +GCGCATCTAATTAGAAAGGAGGCACATATAATTAGAAACACATTGCGCAC +CACATCTAATTAGAAGGGATGCTCGTCGAATAGTACTACCTTTTAATGTA +TGTACTACCTTTTAATGTATGTACTACCTTTTAATGTATGTACTACCTTT +TAATGTAAGTACTACCTTTTAATGTATGTACTACCTTTTAATGTATGTAC +TACCTTTTAATGTATGTACTACCTTTTAATGTATGTACTACCTTTTAATG +TATTACCTTTTGGCTCTTAATGTTGAAAATAAATCGAATCAAATTAGAAA +CAATGCGCGTCGAATTAGAAAAAAAGCATGCCGAACTGGTGCGCGTTAGT +TGACCCTATTTTTCTAATTAGATGAGAGGAATAGGGTTGTCTGAGTAAGA +CTGCAAATTATCAAAATTTTTTGGTTCAATTTGTTTTTGGAGAAAAAAGT +GAAATTTCCTGGCAATTTTCAGTATTACGTAGACGTCGAAATATGAGATT +TTTCACCATTTCTTATGGGTCTCGCCACGCCGACAATCAATTATTGTAGT +TCATGTCGATTTACGGCGCCACTGTTGCATTAATATTCCAAATGACAAAT +TTGCCGCCGAGTTTAAAAAATTCAATAAAAGCCCGTAAATCGACATGAAC +TACAGTAGTTGGTTGTCGGCGTGACGAGACCCAATTCTCATCTTTCGACG +TCCACGTAATATAGAAATTTTTTTTTTGAAAAAATCAAACGATTTTGCCT +CAAAAAATTCAAAACTGTAAATTCAAAAAATACCACAAAAAAAAAGCGAA +AAAAATCGCGGAAGACAGTGGCGCCAGGCTGTCTCAATACAGTTTGATCT +ACAAAAAATGCGGGAATTTTTTCACAGAAAAATTGTGACGTCAGCACGTT +CTTAATCATACGAAATCAGATGAGATTTCTGCGTCTGCCATCCCGCATTT +TTCCAAGATCAAAGTGGAACTGGGACTTTCTGAATAGGACTTTCTGATTC +CACGTGGGAAGGATGACAAATCCAAAATTGCGATATTTCACTAATTTCAC +TGCTTGAATTTCCTTGGAACCAATCAGCGTCTTCAAACTCCGCCCACTCC +ATCTGATTGGTTGAAAAATGGGCGGAGCAAATCGCTGATTGGTCGCAGTT +CTCATTTTTAGCCAAAGTTAAAAATCTCGATTTTTCAAGGTTTTTTGACC +ACCTCCAATTCCAAGCTTCTTCCTTACCTTCAAATCGTAGAATATTGCCA +ATTCTCTGATTGCATCGAGTCCTTCCTTCAAAACCACAACATCTTGCAAC +TCGAACCCCGAAAGCTGATATTTCTCGTTTAACTTGAATTCTCCTAGAAT +ATCCTTCGCACGTAGCAACTCTCCAACGATTCCTTCAGTTGGTAGAAACT +CTTGTAGAGTGTACACTGCCTCCCAGATCTTCTCAAGATCCCCGTAATTC +CAGACATCTTCCGAGCTAGCCACAAGAAATTGCATATGTTGGTGTTTTTC +GTTTCGAGCAACACATTCCAATGAGGATACGAAGTTAGACTCGTTGATTT +TTGTTAAAACGTTTATCATGCTGGCGTAGATTTCCTCGAATTTGGTTTTT +CTTTCTTGAAATTTTTTGTATAGTTTTGAGGGTACCATACAAAAATTCTC +GGGAAACTTTTCGATATCATCTATCATCATCGTCAGGTTATCGAAATCTA +TATACTACAGTGTTTTTTGAAGAGTTTCCACGATATTTTGATCGGTTTGA +TGTTTGAATGCTTCGAGAATACGCTCGTCTCCGCAACGTCTTAACGTAAT +GCAAGGCTGTGCGGTGTGTACAAGTGTCTTGTTTAGCTCTTCAAAACCGG +AGCGATACATAGAAATGGTTTCATGCGCAGCGTCCAGCTGCTCAGGGCTC +TTGAAAGTTTTCAAACATTCCGAGAAGCTCTCAACAATGCTGTTGAGGTT +ATCCTTTGGCAGATTTTCCCGAATTTGATTAACTTCTGATAGATATTTAT +CGATATATTCGATATTTTTATGATATTTTTCCGTGTTGTTTAGGATTTTT +TGCATCGCAAATTCGAATTCCACATAGTATTCCAAAAACTTATTCAATGG +ATGCAGGAATTGAATTGATCCTTTGCCAGAAACGCTTTTTATCCAATAAT +TGCTCTGCTCTTTTATGGACTCTCGAATACTTTCTTCTCGTTTAGGGAGA +CCTAAAGCTCCACGATCAATTCGTTTTTTGAAAGAAATAATTCAAAAATA +CCCAAAAGAAAATTGCCTGATGGCCTGGAAGAATGCGTATCCGAGGATTG +AATACTACGAATAACGTCCCTGACCTTTCGCACCCAATTTGAAAGCCCCA +GTGTCGTATTGGTGTCGCGAATCATTTCTAAATCACTCAACGCCGCTTCG +AAATTGTTCAGTCTTCCCACTGACAAGAAGTCTCGAACACTTTTACTTTC +ATTTTTCATTATAGTCGTAGCGAGTTTGAAGAGATAAAATGGATCTTGAT +GGAGCAATGCGAATTCTTTCTTGGCTCGTTCCGGAAAGAAATTTCTGTAG +TAGGCGCCAACCTCTTCGACCGTTTCATAGTTCCGTTCTGTACGATCTTA +AAAGGTGGAGTACCGAAATCTGGGGAATATTTCTAAATAACTCCAAATTT +GCTCCTGATTTCAATTATCCATGTGAAAAAATTCAAAAAATCCCTGATTT +TGTATTTCGGCTTGAAATTGCCGAATTCCATTTGTGCACACATGCAAATT +TTTCAAACGCGCGCCCAAAGAAATTATCATTGGAGCGCGTTTGCCTCATT +TGATTCTCTCCGGAGCACGGTAGCACAGAAACTAGATGGATTGGTTCATG +ATACTCAGTCTGGAAACCTATATTGGCTACTATCTCGAAAACCATCATAA +AATCGATTTTGCGATGCAAATAAGAAATGACCGCAATGAAATTATCTATC +TCCATTCGTGATGAATTTTCGATTTTGTACTTCCTGGCCAAGTTATGCAC +GTTTGTTCGGTGGAGCGCGTTTTCACCCATCTAGCAACTGACACGGTGGT +TCAGTAGCCAATATAGGTTTCCAGACTGAGTATCATGAACCAATCCATCT +AGTTTCTGTGCTACCGTGCTGAACCACCGTGCTCCATCTACCGTATTTCT +TCCATTAATATGGCTGCAATACTATTTTTCGATGGTCTTCCCGCTTGCAA +TACTAATAGGGAGTGCAATACTAATTTTCAGAATATTTTTCTGCCTTTGA +GCTTACTAGTTTTTTTCTGAAAAAGCTCGAATTTTATGTAAAAATTCAGA +AAATTGGTTTTAATTGTAACCTATAAGTTTCAAAAATTCAATCTCGTAGA +AATTTTTTTGAAAAATTGTTGCAAAATAGGCAAAAAATATTGTAGAAGTC +CTGAAATTAGCGAGACGGGATTGCAATAAAAAAAAGTAAACGCAAGACTA +TTAGGGAGTGCAAGACTAATAGGGAGTGCCATACTAATTTTCGGAAGGTC +TCCGAGGGGCAATACTAATAGGGAGTGCAAATCTAATAGGGAGGCCATAC +TAATAGAAGAAATACGGTATTTTTAGTTCCTTTATTTTTTCAACGAGAAA +ATCAAATGAGGCAAACGCGCTGCAATGAAAATTTAAATTTATTTGTGCGC +GCATTTGAAAAAACTCATTCGGACGCAAATAATATTCACGAATTTAAGCT +GAAATATAATATCAGGGAAATTTTTTTGTTTTTTTTTCATACAGATATTC +TGAATCAGAGGAATTTTTTTTAGTTATTTGAAAATATTTCCTAGATTTCG +GAAATCAAAACAGCAGCTTTTGTACAACAGCTTTTTTTTAAATGTAAAAC +TTGTTTAAAAGTCAAACCACGTCCATTTTAAAAATTAATTTTTTAACTTA +TAAAATAAAAACATGTGGAATGCTTTTGCATGTACATTTATACAAGTTTT +TTGAGAAAAATACATTTAAATTTGAAAAATGTGTTTACTAAAATTCTGAT +CAATTTTTTTGTTATTATATATTAATAGCTGAACTTTACTTTAAGACAAA +AATTCCTCTAATTAAAATCTACCTTCAAACCATTTTTTCAGTTTTTGCAA +ATTCTCCTCGGAAATCGCAGTAGGACTGGGAGCTAGATCATTGAGTGGCG +AGAGAAAATCACGGAAATTTTTCAGGCGATTTTTCAGGCCCCATGTTGCC +TCCCATCCGTATTTTGGTTTCATGAGAGATTGGAGATCATGAAATAAATG +CAAAAAAGCATCCAGCGAGGTGGTGATCTTTTTCAGTGATTCTCGCTGTA +GTTGAAGATGTGGGAGCTTCTTAAGTAAATTTACAGCTTCTTCTCTATCC +TTATAAACTTTAGTGTTCTTCCTTTTCCCGACAATCTCCTCGAAAGCTCT +CTGCTCATCCCATCCGCCAATAGATTTTGTTTTGTAGAAACTTGAAATCC +TATCAAAATCGATATCCCGATCTAATAGCTGATTCGTCAGATCCGTAGAA +GCGATTTCGCCGCTGAGAAGCCCATCGACGATAAGATAGCAGTTAAGGAG +CGGTGGTAGTGAGCTCATCCAGTGAATCCACAGGTTTATGGAGTCCTTTC +CGTAGACTGAAATTATTTTTAATATTAAAGTAGAAAAAAATTATTATATA +AACTAACTATACGCAGGAAAACTCGCATTAAGTTCCAGATTTCGATCGAA +GAATTGTGGATGTCCAGCTGGAAATTTTTATTTAAATTTAGTAAATTTAG +TGAAGAAATTTAATTTAAAAATACCTGTCAAGCGCTGACCCTTAATCTGC +TCCAACATTTGAATACCTTGCCGCTCCCTTTCCGAAATTTCGAGATCTCC +GCAGTGTTTACCCGGTAGAAAAACAAGAAGAAAGGGAATTATCGTTATAA +TCATCGTTATCTGGAAAAAAATTAATTTTCATGGAATATATTTCAAATTT +TCGACAACCAAACGCTGAAAATTCACATTTTTTTCAGGAAATTTGGTTGA +AAAATCTGTTTTTCATAATATTTTGAGAAAAAAACAAGCGTAAAGGTATT +CAAGGCGCATGCAATTCACTTAAGCCAGGGTCTCGAAGCGAACAGAAAAC +ATACGGTATTTGAAAATATTCTTGTTTTCCGCGGAAAGTCGCAGAATTTA +ATTTGATTTTTTTAACTTAAAAAAACAGAAAATTAACCGTTATACCTTTT +GAGCAAAAAATTCTACAAGATAATAATTTAAAACAATTAAAAATTGAATA +AACATTGAAAATTAAATGCTTAAATTTCAAGGCGCGTGCATCAAACTTTT +GTTTGGGTCTCACACCGAATGTAACAAGTACGGTAAGAGTGCGCGCTTAT +TTTCATTTTTCGAATTTGTGCGACCAGTTCTCATGTTTTTTAAACGATTT +TTGCGGTAATTTTGGCCATTTTTTCTAGTTCCCACCGACAAATTGCATTA +TTTCCGTATTTTCAACGAAAATTTTGTTAAAAAACCAACAAAAATTTAGA +AAATTGCATCATTTTGCAGGGATGAATGGTCAAGCGGATCGCTGCGACCG +AACCGCCTACGTTTCGGGTCTTCAGCCGACAATCTCCGACATAGAACTGT +TCGAGGTGTTCAATCGTGTCGCTCACGTCGAGAAGGTCATCGTTCGCAAT +GGAGCCGCCCGTCACGCGCTCATTGTTTTCAAGTTGGCATTTTTTTTCCA +TTTTTCCTCATCTAAATCTCTAATTTTCCTATTTTCAGAACTGTTCAAGG +CCTGTATCAGGTGCTCGTAAACTTTCAAGGAACAACTCTTCACGGCCGTC +AGCTGCACATTCGTCCACTGCGAGAAAGTTCGCACGCGAACAGCGAGGCC +ATCTCAACGATGTTCGAGAAGGTCAAACATCAGGGAAATTCCGGGAATTC +TTCGTATCGTCAAGAGCACTCGTTCCCCGAGTACCGAAACCAGAACCCAC +AGGCTTCATCATACCTTCCACCGAATCCACGTGGTCATCGAAACTCGACG +GGCTGTTTCAATGGTGGCGGCGGGGGCTATGGACGCCGACGTTCCGCCGG +TGGATACAATCAGTACAATCAGAACAAGTATCCGAACGAAACGTACCCGG +GAATGACTCTGTTCGATAATCATCCAGTGCAGCAGTATTCGGGCTTCAAT +CCCATCGATTTTCGCTTTGATGACTATGTGGAAGGAGCCAAAAGGTGCAA +AATTCGCCAAATTTCGCTAAAAAATCTGAAATTTCGCCGACTTTTCCCGT +AGCGCAGCAGTTTTTTCTTGATTTAGGCACATTTCAGGCAATTATTTTCG +GCAAATCAATATTTTTCCACCATATATGTTCTAGAATGTTTTAGATGTAC +TAGATTTTATTTTCAGACGAATCAAGTGATTTTCTATATTTTTACACTAT +TTTTTGTTGAAATATGACCAGAATATGCAACAAAGTGTTCCAGCTGCTGC +GAAAAGGAAAAATCGGCGAAATTTCGAATTTCAGTCTATTTTAATCTGTT +TAAGCAAAAAGAAAACTAATTTTTAAAATTCAGATTCGACAACCTTGCCA +ACCTGATTCGCTCGAGCACCCCCACGGATCCGTTCGCTAATTATCAAAAG +TATTTTTGAATTATTTTGAAAATCAATAAATTGGTTTTTTTTTCAGACCT +TGTGAATCCACAAGCACATCTCGTTCTCGTACGAATTCAGCAAAAGATCA +AAAGCATGGCCCGCCAACGTGGAAAATGGAGCTGCAGATTAAGAAAGATG +ACCACCACCACGCCGGCGGCGGAGCAGCAACCACAGGACAGAAGCTGTCC +CCACAACAGTTTTTGGCTCAGATAGCTCAGAGACAACAATCGGAGCAAAA +CGCGGATGAGGTGGCCAAGAAGAAGCGTCCGAACCTTTCGGTTATCAATC +CATCGCTTTTCTATGAGCAATACCCGAGAACATCGTCGCCAGTTGTTTAT +GCTTCAAAATCTAGTAATATTTTTTGGAGATTTTGGGAAAAAAACAAATT +TTCTCGAAAATACGCCCGAAATAGTGTTTCACGCGAAAATTGGCGATATT +TCAATTTTTCTGAAAAATTGCTATAATTTTTTCGATTTTCACGTGAAATT +GCAAGTTTTTGGATATTTTTCTCGAAAAACGTCAATTCATCGATTTACGC +AGCTAGTATACTCAGCAATTGACCATTTTTTTGCAGACAATTTGGTATTT +TTTACTTAAAATTCTTGATTTTCGACTTCTCGTGTGCTAATCACTTTTTC +AAATGCGCGCCCGAAGAATTTCTCATTGGAGCGTGCTTGCTTCGTGTAGA +TTTACGAGAGCTTTCCATTTATTTAACTTCTTTCGTGCTTTTTCAGTTTT +CCAGCGAGTTTCTGGCTCGACCCCTCGGTGTATTTTGTGCAAACACCGTC +ACGCGCAAATGCATGCACTTTTTCAACGCGCTGCGTGAAAATTCCTCTTG +CGATTTCAAATATTTTTTCCCGCCATTTTCCAAAATTTTCGAGAGGGGGG +GGGTCGAGCCAGAACACCTTGGTTTTCCAGCGAGCTTCCTTCATTTACGT +CGATTTTTTTTTTAATCTTCGTTAAAGTTCTAAAAAAAAACGACTTAAAT +CATTTTAAATTTTCCAAAATTCGGTTTTCCTTCTGCAAAAAAAATCAATT +TCCAGAACCATCCGGCCACTACGACGACAAAATCGCATCTCCACACGATC +CGGCAGTTCTCGCTTACAGTCGCCTTCGTGTACCTCAATCGGCCTTCGAC +AGTCTCTCACCAATCGACACCGACAATTGCTCATTCATCACAAAACACTT +GGGACCAACAACCGGTGTGAAGCGTGATTTGACGAACGACGAGTTGTCGG +ATATGATTGTATCAACTGGAAATCTCCGAATCAATCCAACCAACATCGAT +CATCATGACGAACCGGCACCATGGTCTCCGCTGAAACGTCTTCGCGCCGA +AAGTGGATCCCTATCAACTGCACAAGTCGCGTCGCCCGAGTTCTCACCGA +TCAAACCAAAGACAATGGAGTTCCACGAGACTGAGGATGATGTTTTTGAA +ACTGGACCACCACCAACCTACTTATCCGAAGGAAACGAAAACGCGGAGAA +GAAATGTGTTGAGCAGCCGAAAATCAACTACGACGACATTAATAATAGTC +GTCTTCCGTCGAATTCTCATTCGGCAGCACCGAATTCCGAGAAGAAGCAT +TTCGTTTTTCCGGTACGGGGGCTTTTTTTTGAATTACCGAAAAAATTCCC +AACTTTTTAAAGTTCAAAAAAGTCAAAATTTTTGTCCATTTTCTGGGCGT +GACTGCTTATGCCCCGCCCCCCTTTTTCGAAGTTTCTGCTTCTCGGAAAA +TGTATTCCCAAAGTGCCAGTTTTGAGGCCCCACGAAAAGGGAGCAGAACG +AAAGAGGCACCACGGATTCAAGATCTGGTGCGATTCCGGATCTGGCACCG +TGCCAACAACTTGAAAACGTGGTGCCTCTGAAAATTTGCTGCCAAGGTTG +AAATATCGAATGCATTACCCGTAAATCGACACGAGCTACAGTAACCTGTT +ATCCGCGTGGCGAGACCCATGCGCGTCAGATGTGGCGAAAATTATACTAA +AACCTGTTTAACTTGCAGAAAAACTTAATTTTTTTTGTTGAAAAAATTGA +GCCAAAAGACTCAAAAATTTCATAAAGCGAAACTTTAAAAAAATATATAA +TTTGTTAAAAATTTCACGAAATATTTATGAATTAATTAATTTTTCCAGGA +ATACCCAATGTGCCGTCACTCGTCAGTTCCATCGATTGCTCACTTGGTCG +GTGATCTGTCGGACTTTTGCCCACACGCTACAGCCGACGAGAAGATGCTG +CTCGACGAGGCGTCTTCAATCATCGAAAATACAACTCCAGCAGTGTCTAC +TGCTCCGGCTGCTGCTCCAGGAGCTACAATGCTCCAAATATAGGAGAAGA +TCACATATACAATAATATAATCTTATTGCATTTTCGCAATTCTCGTTCTC +TCCACACACATACACACATCATCCCAAGTATTCCTGTGCTGAATCTCAGT +TTGAATGATGTTTCATACCGTTTTTATCCCACTATTGCCTTATCGTTTCC +TGTTTTATTATATTTTTCATTTTTTATATTGCCACCACCACCACCAACAC +CACCCTCTCTATCTTTTTTTGTTCTTTTGCCTCCAGCAACATCATCACCT +AGTTTTCTGTGAATTTTGAATTTTGTGTTCCCCCGAGTCCCCTCTGATAG +GAATGATAGGAAAAACAATGAAAATGCGAGTTTTTTCAAAAAAAAAAATA +CAAACTAGGAAACTATAGAAACAAAAAAAAAACATTTGAGGCGCAGTACC +GAAATCTCTCCAAATTTGGCCCGAAAAATTCAAAAGAAAATATTTATTTT +TCTAAATTATTATTTTTCCGCCACTCCAGCCTCATTCTCATTGGAGCGCG +CAAAGTCATGTCGATTTACGAGATTAAGATTTTCAACGAGTTTCATCATT +GTTATCGATTTTCGTGTAAAGTTAACTAATTTTTTTGAAAATTTTCGCAA +AAAATTTTGGATAATAAAATGTTCAATTGGCACGAAAATGCAGATCTTCA +GCAAAAAAACCGACAGAAATGTGTAAAACTGAATCTCGTAAATCCACATG +GCATAACGCGCTCCAATGAGAATGTTTTTAGCGAAAAGTTTGAACAAAGG +AATATCAAATCAGGGCCAAATTTGGAGCAATTTCTAAAATATTTTCTCAA +TTCTTTCGGTATTCCACATTCAAACAATTATTCGGCCGTTCCCATGAGCC +TCTGAATCTCGATGGGATCCCTTGGCACTGCAGCAGTCAACAGCTCAATC +CCTCCAGCTTCACTAGTCGCCACGTCATCCTCGATACGGTATCCAATGCC +ACGAAACTCTTTGACAGGCCAATCCATCGGAACATACACTCCCGGCTCAA +TGGTAAATGGCACATTCGGTGGTAGATCAATGTCTCGGGATACTGTCGGA +CAATCGTGAACATCCATTCCGAGATAATGACTGACGTGGTGGGGACACAG +CTTCTCCGCCTGATGAATCATCTCCTTGTGATCCGTAGATCGAATCAGGC +CGAGTTCGGTGAAACTTGCCGCGAGAAGTTCGTTCATTCGGCGGAACAGG +GCGCTTAGGCGGACTTTTTCCATTGAGTGGGCGTAGGTGAGCAGTTCTTC +GTGAACGTAGAGAAGCGCTTCGTAGAGGGATAGTTGGGCGTCTGACCAGC +TGCAAAAACGAATTTTTTTTTGGGAAGGTTTTTTGAAACGATTTTTCAAA +TTTCGTTCGAATTATATTAAACTTTATATAAAACGATTATCCACGGATTT +CTGGCTTCCCTCATAAATTGGAATGGAAGAGTTTACCGAACTAGGCCATT +TTGGCTCGGCCATATCTGGTGCCGCGTCGCGGCTCGATTTTAGTTGTAAA +ACTAAATGCATTTGTACGTGTGGAGTACACGACTTTCCCACGCGTTGTCC +GGCGGGCGATTGTCAATGGAGCGCAAAAAATTCACTGAGGAAGGGGAGAA +CTCCGTGATTAGCACCGCCTATTTCTCTCACGAATCTATTTCTTCGAAAT +CTTCTATTTTTTAAAAATCAATTCCTGAATTCTGAAATTCATTATAGCGT +AATTTTTTGGGAATTTCACCCTGAATTCCATTTCTACGGAACAACATTTT +TTTCTCGAATTTGTGGTGACTCAAATTAGAATTTTCAAAATCTCCAGAAA +AAAAATCATTTTTCCTGAATTTTCTGGAGATTTTTAAAGGAATAAAGTGC +AAAAAAGACTCTTTTTGAGGCACCACCGAAAGGAGAAAGGAGAACACAAA +CCACGCCCATTTTTCCGTGCCGCGCGCAAGTTTTTCTGCAAATTTTTATT +TTCAAACGAGACAGCGAAACTCCGAAATAACGCATATCGTGTTCTTTATC +ATCAACGTGTTATTTCGGAGCTTCGTTGTCTCGTTTGAAAATAAAAATTT +GCAGAAAAACTTGCGCGCGGCACGGAAAAAATGGGCGTGGTTTCTGTTCT +CCTTTTTCACCTGTTCTCCTTTCGGTGGTGCCTCTTCTTTTTTTTTTCTA +TGTTTAGCGTAATTTTTTAGCCATCTTGGAATAGCCCCGCCCATTTCTCC +CACAAATCCATATCTACGAAATCTCGCATTCCAGAATGATTTGTGGTGTC +TCAAACGGGAATATTCAAAATTTCTAGAAAAACCCCATTTTTCTTGAATT +TTGGCCGAACTTCTGTAATTTCTGAAATTCTGAGAAAAATTTTCATTTGA +GTCACCACAAATACTGGAATGCAAGATTTCGTAGAAATAGACTTGTGGGC +GAAATCAGCAAAAAATGGGAAAACATTCAATTTTTTTTAATTTTTTGGGG +TTTTATCACTAATTTTTAGCACTTACAATCCAGAAATCGGAAAGCAACGT +GTCACATCTGACACATAACCATTCAAATCACACCCAGCATCCACGAGAAC +ACATTCCCGGGGATTTAGGTCATTATTTGCGTCCAAATAGTGAATAGTGT +TGGCACGAACACCACCGGCAATCACTGGTGGATATGCTTGCATTTCTGAT +CCACGACGACGTCCTTCAAACTCCAAGAGCCCGCAAATCGCGTTCTCGTT +GTGTAGATCACGTGATCCCGATATCATTGAGCTCATTGTCTGGGCTCCCA +CGTTGCACACGTCACGCATTGAGCTCATTTCGGATGGAGATTTTATTACT +CGGCGACGTTCGATCTGCAAGGAAAACTTGAAAATTGGCAAAAAAATAAT +TTTTTTTTTGAAATTTTAAAACGATTTTTCGGGTGTTCAGAAAAATTTTT +TTTAGGAAAAAATTGGAATTTTCCAAAAGTTTCAAAAAAATAATTTCTAG +AACTTTTTCCTAAAAAAAAAATGAATATTGAAAGCTTCGAAAACAGTAAA +AATTGGGAAAATTTTTTTACCGTTAAAAAATTTTTTTTTTTCGAAAAAAA +AATTTTCAGTTTAAAATTTGGAGCGTTTTCAATTTTATTGATAATTGGTG +AAACAAAATAAAAAGTAAATAAAAAATTTTTTTTTTGAAAAATTTGAAAA +ATTAGATTAAAAATGTTTATTCTGGGATGAAAAAAACCTCCTAAAACCAT +TTTCTGGAAAATTTAAACTTCCTATTTTGTTCAGACAATTCTTTTTTTTC +AAGAAAAAATCAAATTTTCCACTAGAAAATCGAAAAAAATTGACCTCGTT +TAAAAATTGGACAAAATTTGAATTTTTACTTTGAAATCCTCGAAAATCGG +ATTTTTTTTTCAAAGCTTGAACAATTTTCTGCAAATTCTATATATCTCTA +AACTCACAAAATGATTAATTTCCCGAACAGAATTAGCTTTAGCCTGTACA +AACTTATAGAGTAAATCATCAGAAGTCGAATCAAAAAACACTGCAGTTCC +CTTATCGCAAACTTTTTCCAAAGTCTGTAGAATCCGGCTAGTCGGCACAC +ATTCCGTGAATTTTGCAGTCTTTTCCCATTCGGATTCAGTTGGTAGGGCG +CCTTCCCATAGCTCATCATAGGCACTACGACGGTCAGCAAAAAGAATATT +TGTTTCTTTTGAAGATTCGGATATTCCTGATTGCATTATGTAATAACAAT +CTGGTGTGGTGATTCCGTTTAGGTATCTGCAAAAAAAAAAAATTTTTCAA +ATTTTTTTCTGTTAAAAATTTAATTTATTTTTTTTATCAAACAATTTTGA +ATTTTTCCCAAAAAAAATCCGAAAATTGTGAACAAATCTATTATTTTCGT +TGAACAAAAAAAAAACAAATTCAATTTTTAAATAATTTAATTTTCGTTCA +GAAAAAGAAATTTGTCGATTTTCGGCGGCAATGATTTTTTTTAACGAAAT +TTTCCTGAAAAATTCAATATTTCAATAATTCCTGATTTTCTAGTTTTTTT +TTTAAAGAAGAACATGTTAAATTTCTACTAATTTTATAACAAAAAATTTC +GGGAAAATCTAGAATTAAAAAAAAATGTTTTCAGAAAAATTCTACTAATA +TATTAATTTTAGCTTAAATTTCGATAATTTTAGGTTATTTTTCAATTTTT +TTAGGGCGAAATTTTGATTTTCAAAACAAAAATATTTTCTGACAAAAAAA +TTAATTTTCATTCTTTTTTAGATTTTTTGAAAAATTTTCAATTTTTTTCT +GAAGAATTTGGTACTTGTGTATTACATGCCCTCATTTTTAAATTAAATTA +AATGTTCATTAATTTCTCCATTTAAATTAAACGTGATATACATTTTCTCT +TTTTAGGCTTAGAAATTGCTATTTTGCTACTTAAAAAATTACTATTAAAA +TGAGGGCATGTAATACACAAGTACCAAGAATTTCAATCAATTCTTGTTTT +TGAAATTTTTTTTGGGAAAAATTCCAATTTTCGATTGTTTCTTCGATTTT +TTCAGAAAAAAAATTTTCTAAAATTTATTTATTTGTTTGAAAATTGGATC +TAAAAAAATTTCTCTAGTTTTTATTTTTTTTCTGGAAAAAGCTTTTTTTA +ACTTAAAAAGTTAATCTGTGGCCGAGTTTTCTCTCATCATTTTCACGGCC +ACGGCCATCAAAACCGAACCTGAAATGGCTTTTCTGTCGGAACGCGTGTG +GAACATCCGGTGCAATATATGATTTCCGTGCTCCTTTCATCACAACGACC +ACTTGCTTTTCTGAAACTTTGAGAGGTCATGGCCTAACTTTTAAATGGTT +TTCTAGGCCACCATGGTGATTTTATTACGGTACCGGGTCTCGACACGATG +AATTTTGTAGAATAAATTTTTCTGAAAAATTTCGTTTTTTTTTTTAGAAA +AATCAAATTTTCAAGAAAAATGGGAAAACATTTTTCAGGAAAAATTTAAT +TTTATAGAGAAAAATAGAATTTTTCCTTTAAAAAAATCTGAAAATTCGGA +TTTTTGAGAATTCTCTTTATTTGGATTTTAAAATCCAAATTTTCAAAAAA +AAATTTAAAAATCGGAAAATTCCAAATTTTTGAGAGTAAACTCGCCTTAA +TTTTTTTTTTCAATTAAAAAAATTAATTTTTTAAAAGAAAATTGGAATTT +TTACCCAAAAATTGGCAATTATTCGAGTTTTCAACGGAAAAATCTGAAAA +ATCCGAATTTTTGAAAAAAAAATCTTTAAAAATCCCAATATTCAATTAAA +AACCGCGAAATTTCGGATTTTTGAGAAAATTTTAATTTAAAAAAGTCCTA +TTTGTACTGCAAATCCTCATTTCCAAATGATGGCCTAACTTTTTCAGAAT +TCTAGGCCACCAGGCCATTTCTCTGCCTCTTACCCCCAGTTTTGACCTCT +TTTTTGAGTAAATTCATCAAATTCGTCCTTCTCATCGCATATTCCTCATT +TGGAATTCGCGATGGCCACATTGGTAGGCTCCGCCCACTTCTGAGCATTT +TTTTTTTGGATCTGATCTGTGTGGGAGGGAAATTCGAATTTTTTTAGAAA +TGAATAAAACTGAAAAAAATAGATTTTTTGGGAAACAGGGAACCTGAATT +TTCGAGAGAAAAAAGGAGGTAAAACAAAAAATTCGATTTTATATTGAGCA +AAAATTAATAGAAAAAATAAACTTAACAATTATGAACAGAAAAACCTACT +AAAAAAGTCTAAAAAATGAATGAAAAATTGCAAAAAAAATCTACAATCGA +TACGAGACTCCTCCTCCAACAATGCAGTTCTCGCCGGTGATGTACCTGAA +AAAAAGTGATTTTTTTTTGAATTTTTAGAAAACAAAATTATGAAAAAAAC +CAAAAAATTTCCGAATAATCGAAATTTTTCGGAAGTTTTAGGTTAAAAAA +ATTTTTTTTTGATGAAAAGTTTTTTTTCAACAATTCTGAAAACTAAAAGT +ACCAAAAAATTTTAGTTTTTTCGATTGTTTCAATATGATAATTTTTTTTA +AATGTTTAAAATTGTATTTTTTCAAAAAATATTATCAATTTTCCAATTTT +ATGGTAATTTTGGATGTGAGATTTTTTTCTTCAAATTTTCGAAACGTTTC +TCGATAAAAAAATTTTTCAACTGAAAAACTACAAAAAATCCTCTTATTTT +GAAGAAAAATCGATATTTTCTCAAATTGTTCAGATTTTTTCTGTTTTTAC +CAAAAGTGACCAAATTTTGAGACTTTTCGATCCAATGTTTAAAATTTTTT +TCCCGTTTTTTCGATATTTGCGGTTTAAATTTTTTTTAAATTTTTTGATT +CAATTTTAATAAGTTTGAGTGTTTTTCGATTTTTTGTCCAAATTTTTTCA +AATTTTTTTCCAGAAAATTGGTAGAAGCCAAAAAGTGCGATTTTTTTCCT +TAAAAAAGATAATTATTCAGTTTTTCGACTGTTTTCTTCTTGCTTCCAGA +TTTTTTCTCGAATTTTTTTTATATTTGAATTGTATTTTTAAATATTATAC +TCCTAATCGATAAAAATTAGTTTTTTTTTCAATTTAAATTGGAAAAGAAC +GTTCTTTTTCGTAAATTTTAGTGATTTTTGATTCCAAATTTTTGGATTTT +TTCAAAAAATTTCCCAGAAAATTTTACCTGGCTTTGGTAGAAGCCAGAAA +TTCGATTAAATAAGCTTTTTTGGGTTTTTCGACAATTTGTCAAGTTAGGA +ATTAAACTTTTAAAATTTTTTTCAAAAATTTTAAAAAACTAAAAGTATTT +CTGAGAATTTTTCTGAAATTAATTATTCAGTTTTTCGACTGATTTCTGCT +TGCTTCCAGATTTTTTCTCGAAGTTTTTATATTTGATTTTTTTAAAAAAT +ACTCTTAATCGAAAAAAATTCATTTTTTGAATTTTTTCGGGACTTTTTTT +TTCGAAATTTTCGTAAGTGTTAGTGTTTTTCGATTTTTGATTCTAAATTA +TTGGATTTCTTCAAAAAATTTTTTCAGAAAATTCTACCTGGCTCTGGTAG +AAGCCAGAAATTCGATTGAAGAAGCTTTTTTTTCGGTTTTTTGGACAATT +TTCCAAGTTTTAGTTAGGAGTTAAACTTTTTCAAATTTTTTTTGAATTTT +TTTCAAAAACTTTAAAAACTAAAAGTATTTCTGAGAATTTTTGAGAAAAG +CATTTTTTTTCTCGAATATTTGAACTTTTTAAATTTTTAAAGCTTTTTAT +TTTTAAATATCAATTCGAATATTTTACTCTTAATCAATGAAAAATCGATT +TTTTTGATAATTTTTTTCAAAAAAAAATTCGAATTTGAAATTTTTTTCAT +TCAGAAAAGGGAATTTGACTATTTGAAAACAATTTTTTGTTGAATTTTTC +GTAAATTTGAGTGATTTTCGATTTTTGATTCCAAATTTTTGGATTTTTTT +TTGACAATTTTCCAAGTTTTGGTTAGGAGTTAGATTTCTTGGAATTTTTT +TTTGAATTTTTTTCAAAAATTTTAAAAACTAAAAGAATTTCTGAGATTTT +TTCCTTAAAAAAGAGGAATTTTTTAGTTTTTCGACAGATTTCTGCTTGCT +TCCAGATTTTTTCTCCAAGTTTTTATATTTAAATCTTTAAAATTTATATT +TTTAAATAATATTTCAGATATTATACTCCTAATCGATAAAAATTCGTTTT +TTTTTCAAATTTTAATTGAAAAAAATCGATTTTTTCGATAATTTTTTAAT +TTTTTTAAAATTCGAATTTGAAACTTTTTCATTCAGAAAAGAGAATTTGA +TTATTTGAAAACAATTTTTAATTTAATTTTTTAATCCTTTAAATTTTCGT +AAATTTGAGTGTTTTTTGATTTTTGATTCCAAATTTTTGGATTTTTTTCC +AAATTCTCCAAATTCTTATTTTTGCAGAAAATTCTACCTGGCTTTGGTAG +AAGCCAAAAATTCGATTTAAGAAGCTTTTTTTTGGTTTTTTTTGACAATT +TTCCAAGTTTTAGTTAGGAGTTAAACTTTTTTCAATTTTTTTTTTTGACT +TTAAAAAAAAATTTAAAAACTAAAAGTATTTCTTAGTTTTTTCCTTAAAA +AAGAGGAATTTTTTAGTTTTTCGACTGACTTCGTTTTTGAATTTTTAAAA +ATTCAAAAATTGTATTTTTTAAAATTCTAATCGATAAAAATTCGTATTTC +GATTTTTGATTTCAAATTTTTGGATTTTTTTCAAACATTTTTCCAGAAAT +TTTTACCTGGCTCTGCTAGAAGCCGAAAAGTGAGATTTTTTTCCAAAAAA +AGATAATTATTTAGTTTTTTTTCTCGAACGTTTTATTATTTGAACTTTAA +AATTGTATTTTTAAATAATATTAAAAATTTAAATAATAATATAGAATTTT +TTTGAAAATTCCTGAATTTTCTGATTAAAAAAACATTTTCTGATGCATTT +TAGAAACTGAAATTTTTTGAAATCTTTTTGGAATTTTGGAAAATTTCGAA +TTTGTTGATTATACTCTTAATCGATAAAAATTCGTTTTTTTCCAATTTTA +TTTGAAAAAAATGATTTTTTCGATAATTTTTTTGATTTATTTTCGGGAAT +TGACTATTTGAAAACAAAAACTTTAAAAAAAAATCAAACTTTTTTTTTGA +AATTTTCGTATTTAAATTTGAGTGTTTTTCGATTTTTGATTCTAAATTTT +TGGATTTTTTCCAAATTCTCCAAATTCTTATTTTCAGAAAATTCTACCTA +GCTCTGGTAGAAGCCAAACATTCGATTTAAGAATCTTTTTTTTTTTTTGG +ATTTTTTTTTGACAATTTTCCAAGTTTTGGTTAGGAGTTAGATTTCTTGG +AATTTTTTTTTGAATTTTTTTCAAAAATTTTAAAAACTAAAAGAATTTCT +GAGATTTTTTCCTTAAAAAAGAGGAATTTTTTAGTTTTTCGACAGATTTC +TGCTTGCTTCCAGATTTTTTCTCCAAGTTTTTATATTTAAATCTTTAAAA +TTTATATTTTTAAATAATATTTCAGATATTATACTCCTAATCGATAAAAA +TTCGTTTTTTTTTTCAAATTTTAATTGAAAAAAATCGATTTTTTCGATAA +TTTTTGAATTTTTTTTTTACAAATTCGAATTTGAAATTTTTTTCATTCAG +AAAAGAGAATTTGATTATTTGAAAACAATTTTTAATTTAATTTTTTAATC +CTTTAAATTTTCGTAAATTTGAGTGTTTTTTGATTTTTGATTCCAAATTT +TTGGATTTTTTTCCAAATTCTCCAAATTCTTATTTTGCAGAAAATTCTAC +CTGGCTTTGGTAGAAGCCAAAAATTCGACATAAGAAGCAACATCAGACGG +TCTTCCAAGTCTTCCCAACGGAATCATCGATTCCAAGTGTTGTTTAATCT +GACGAGCTTCTTCTCCAGAAGCATGATCCCATACAGCACCAGTTCCGTCT +CCTTCAATCATTCCAGACACCACACTATTCACGCGGACTCCTTGTTTCGC +GGCACTCTGAGCTACGGATTTTGTAAGTGATAGGACACTGCTAGATGCAA +CTGAGTAGAGACCCATATCGATTGATGGAGTGAAGCCGAAACATGACGTT +AGGTAGATGATACTGCCGTTTCTGGAAAAAATCGATTTTTTTTGGACGGG +AAATTTTGCCTGCCTACGTGCCTACCTGCCGGCCTATTTTAGCCTATTTT +TCATTTTTTTTTTGTTGTTCTATTTTTTTGCCGTTTTTTGGGAATTTCAT +GATTTCTAGGGTAGGCACGACTTCATGCCTACGTGCCTATCTACCGGCCT +AACATTTGATATTTTTTTAGAATTCCATGATTTCTAGGTAGGCAGGTAGG +CACGAATACATGCCTGCCTACCGCCTGTTTTTTTGAAATTTTTTTGTGTG +AAAAATTAAAATTCATGGGAATGCTTTTTTTTTCAAAATTCAATGATTTT +TAATGCAGACCGCGCCTATCTGCCTACCGCCTATTTTTGGCATTTTTTGT +GTGCAAAACAAAATCAAATTAGCTTTTTTCTTTTCGTTTTTCTACGATTT +TTTCCAAATTTCAAGATTTTTCGGATAGGCACGACTTCCCACCTACTGTG +CCTACCTACCTACCGCCTATTTTCGGCATTTTTTGTGAGCAAAAAGAGAT +CAAATTAGCTTTTTCTCCTGTTTTCTACGATTTTTTTGGGTAGGCACGAC +TCCATGCCTACCGTACCTACCTACCTACCTCCTATTTTCGGCATTTTTTC +TCGTGCAAAAAATCATATTTATGAAAACACTATTATTTTTGTTTTCTGAA +TTTTTTTTCCAAATTTTGTGATTTCTAGGGTAGGCACCGACTTCATGCCT +ACGTGCCTACCTACCTACCGCCTATTCTCGGCATTTTTGTGAGCAAAAAA +TTAACATTCTTGAAAATCCTTTTTTTGTTTTACTACAATTTTTTCAAGTT +GCATGATCTCTAGGGTAGGCACGATTTCATGCCTACTTGCCTATCTACCT +ATTTTTCCCTTTTTTCGTGGTTTAGGTAGGCAGGCATGAGTCAGGCACGA +AAATCTAGAAACATAATTTTGATTCTATCGGTAAAATAGGCTTTCAAGAC +AGTTTGAATTTTTCGAATTTCTACCAGCAAAAAATAATATCTGAAATTTT +CGCGGCGAGACCCAAAAACTGACTGAGATTTGGCGAGCGTCGACATAGCG +GCTTGCGAAAGACGGAACGGTGTTGTCAGATTGTTGGCGAAAAGCTGAAA +TTGGTTTTTCGAGGTCAATTTTCCACGTGGAATTCAAATTTCAGCACTCT +CGAACCTTATCAAAATCCTCGCCGGATGTCTCAATAATCTCGCCGAGCAC +CTCATTCTGTGGTGGAACTATAATTAAAGTGTCCAGACCGCCGAGCTTCT +CAGCCACTTTCGTGATCAGCTCCTTCCTGTGCTCGGCATTTGCCACGTCT +AATGAGAAAGCTGTGACGTCACCTCCAACCTGAATTTATTCCCGAATTTC +TCTTTGTCAAGTTCTGAAAATCTGCCTAAACCTTTATATTGTCCTCTGCA +ACCTTTCCCACACTATTCGGACAGTCTGCGGCGGCGGCGACCTTGTAGCC +GGTGAACGCGAGTCTTCTCACGACGGCTTTACCCAGTGTGGACGTTGCCC +CGATTACAAGTGCACATGACATTACGCTGAAAGCCCGATGAGGAGGCATA +TTTATAGATACACCTAGACATCAGGTACATTTATTTGGAAATCCGTAGAA +AATAAAGAACATGTGAAAATAAAACAGATCATTTTATTATTTAGAGGGGA +GAGGGAGGGGGAGCAAAGTCGCTGACTGAGAACTTTCGAAAACCGGTTAG +TTACTCCAGTTGTACGGCTTATTCATGCCATATTTTCCAAGATTTCCACC +ATTGTGTTGGTATTGAGAATGTTGATTCTGCTGAGCTGCCGGCTTCGATC +CAGACATCTTGTCGTAACTCTTGCGCTCGTCATCCTGTCCGTAGAGTTGA +CGACCTCCAGCGCTGCTTCCAACATTGGGCATCATGTTCATGAATGGAGT +AAACTGGTGTGGAGCAGGTGAGTACTGCTGCATGAACAACGACGACAAGT +TTGGCTGCTGCATGTAGTTCGTCGCCTGGAACCCTGGTGGTGGTCCAACA +TTCGACGCCTGCTGTGGCTGCTGATCCCGAGAGCTTTGAGATCCGAACTT +ATTGAAGTCCATAAGTCCATGAGTCTCTCGTTGTTGACCAGATTGCACTT +GTTGCGCAGAAGCAGCCTGTGACAGAGGAGCAGTTGGAAGCAAGCTTGTA +AGATCAACTCCCATTCCGTATTGCATAAGAGCAGCAGTATATTGCTCATC +TCGAACTCCTGGCATTGGATTGTACATGTTCATGTAGTTGTTCATGTATG +GTGCATATGGGAGTTGTTGAGTGAACATCATATGTGGATGCTGTTGTGGA +TGTCCAGTCGGTTGTGATTGAGCTGGTGGAGTTTGTTGCTGCTGCTGCAC +TGGCTGTTGTGGGGCTTGTTGCTGCTGCTGCTGTGGCTGGTGCTGCTGAG +CAGATTGCTGTGTTGGAAGTGGTCCAGAATTGTTGAACTTGTTCGGCGCC +ATCCTGTCCGTTGGTTGATATGAAGCGGACGAAGTGTCATAGGACAATCC +TCTGTTAGATTGACCATATGACAATGGAGGTGAAGTCGATTTTAAGTTGT +AATCGCCGTTAGGAATTGACGTCCTTTCTGGCTGAAAAATAATCGAGAAT +TAGAATAAATCGTCTTTGCAACAAATTTAACTCACCTCAACTTGTTTCGG +CATAATACGAGAAGAGTTAAATATATTTTCACTGGCATTTGAGATCGAGG +CAGCGGAGGATTCAGTAGAAGGAAGCTGTGGGCTTGGAGCAGCCTCAACA +AATCCAAATGAGTAGTCATGAATGTTCGTTGGTGCAGTTGTGCCAACAAA +TTCCACTCCAGGATCAGGAATAATCTGGACTGGTGCTGCACTTGGAATTG +GAGATAATCCTAGTCCTGGTGCTTCACTCAATCCTATTCCAAGATCCGTC +TTCAGTTGAGTGGTCCATGCTTGATTTGGTTCTGGGGTGCTTTGTACATT +TTCATCGTGATGAGTCGACACATTAGGCGTCTGCTCCTCAGCCAAAACTG +ATGATTCACTGAAAATTAAATTTTTAATAATCTAAATACAGGGACAGAAC +AACTTACTTTTGGAAGAAACCATCTTCTTTCTCCGGCTGATGCGAAGTTT +CTGGCTCGAAAAATACTGGCGCTGCTGCTGGCTCTTCTTTAACTGGTGCA +ACTGACGGAAGTGGAGGTTGTGGGGAGAGACTCCTCCTTGGCGGAGCTGC +TGGTTGAGGATTCTGTGCCTGGTGCTTTCGGAGTGCTTCTTTGCGATGGG +CAGCTGCTGCTACTGCAGCAAATGAGATCGGTGCCGGCGCAGAAGATGTG +GGAACAGTGGCGGTGACGGATTCCTCTACTGGTGGTTGAACTTCGGTGAC +TGTGGTATCCACTTTAGTCTGATTCTGAAAAAAAAACACGTTTCGTCATT +TTCCTTCTTCAAAAAATTCTTACCTCATCAAGATCAGCAGTGAACGCATC +TGGCTCAAGTGCTGATGATGGAGCAACAGCTCGAGAATATCCACCTCTGG +CGCCACCTCTTCCACCTCGTCCAACATACGGTTTTTCAAATCCTCCTCGA +GTCGAGCGTGGTGCTGCTCCTTCCTTATTATCACGGTTATCCCGATTATT +ATCTCGGCTTTCTCTTGGAGCTCCATTGGATCGCCCGCGGCCTCCTCGAT +CGACGAAACCGGTGCCTCCACCTGAAAAAAACATATCAATATTTGATCTA +TTTCAATCAAGCACGAACCTCTGCCTCTTGCTACAAATCCTCTGTTGTTA +TAACTGCCCTCTTCCGGCTTCTTCTTTTCCTTTTTGGCTCCTTTCTGTTC +AGTCCACGAGTCAAGTTTATCTCCAGCATCCAGAATGTGATCGATTGCGC +CGTAAAGATTGTTGTCAGTATCCAATAGCGCTATCTCAGCTTGTGCTTGT +GTACATCCGGTGGTTTCGATAATCTGAAAATGAAAAAGACATGAGCAGTG +AAAAAACCCCTCTTGGGAATGCGCGAACACTTTAATTAGCCCAAATGAGT +AGGACATCTGGGTACTCTTACTCTCGCTGCACCATCTCAAGCGCGGACAC +CTGCTTTTGTCCTTTTATTTGCCATCTGGCCCCGTCTTATCGATTCGAAA +TTCCTATTGACACTGAAAGCGGACACTAGAGAAGTTCAAAACAACAAAGT +CACGTGCGTCATCATGTCGGTGTGTGGCGAGCGTTCGCGTGATGCAAAGA +TCACTATTTTCAGGAAAGCGTCATTTCCAGCTTGGAGACTTACCGTCTTG +ATCATAAACTGTATGTCCTCCTCATTTCCGGAATTTCCTTCCAGTGTGAG +TCGTGCAAGTCGCGCTTGATCGCTGGTAGCTTTTTTGTCGCCTTTAATAC +CCATTCTGAAAACAAATTATAAATAAAAAGGGTCAGTCATGTGAACGCAA +GAATGGAATGAGAATCACGAGAATACAACTATCAAAGGAAGGTTGAATGA +AAGGAAGGTCAAATAGATGACAAAAGTAGGAAAAGAACGAAATATGGAAC +CTTCCAGATCAAAAAAACATTATCAAAAGAAAAGCCTCGCACAGAAGACT +CTAACGAACATGTTAGGATGAAGAAATATTTCAGCAATACATCACGTTGA +ACTTTGGAGGATATTCTAGAGAAGAAAACCCCTACATTTTACAATAATTT +TGAAATGGAATTCGAATGAACTGCTATACAATGGAGAATCCTATTTAGCG +ATGATAAAGTGTAGGCTTGTTACTAGATTGAATAAAAATTTCGAAATGTA +ACAATACATTGACTGAAAATTTGGGTATTTCAGAGTGTGATATCACAAAA +ATCAATATTTCCACTAAACTCAAAAGTTCTTACGCTGAAAAATCATTAAA +AAATTGAAAAATCACCGCAAGCTAGACTAGAATCGCGTTCAAATCTCGCA +AATTCCCGCAGTTGAAAATGCGGGCGACCGCGACGCGAGCCGCAACGCAC +CCCTCCAAACATGCGGCATGGGTCTCGCCACGACCGAAAGTACGGTCACT +CTTGGCAGTATAAAAGGCGACCATTTTCCGCTATTTTCCGGTAAATTTTC +AAATGAAACTGTGTTGCGGAGGGGGTTTTCCGCTAATTTTGCGGAAATTT +AGCTATTTAATGTGAAAATGTGAATAAAACAAAAGAAAGTGGGGAGGAAT +AGCAAGGAAACACACTGTTGAACCCTGGGAATTATCGATTTTGTATGAAC +TCTTCTTTTATGGCGTCGAAATTATCGACACGAAAACTCAAAACCTTGTC +ACATTTCTGAGAGAGAAATATCATTTTCAGCACACATGAGTCTTCCCAGA +TTTCGACTCGTTCAGGGAAAGGCGATCGGCGAGCGATCAACGCCAGGAGT +CAGCACACCAGAGCCGGTAATTGTTTTTTTTTATTTCAAAATTTCTACAA +CAACAAAAAGAACTAACAATAATTTATTCCTTTGATTCCAGGCCCCTCCG +CAAATAAAGCAGGAAGTCGACTACCAAGATGCTCATCAAATGGCTCCGGA +ACCCGTGGAAGCACCCCGTAAATATTTAAAAATTTAAAAAAGTTAGAAAA +AAAATTTGAATCCCAATTTTCAGAGGCTCAAAACCATCAAATGCAGCCGC +CTCGTCAACCTATACAACAGCAGATGCAGCATTTTCAGTCACCATCGCCA +ATGGCTCCACAAGGGCCGCCCGGGACTCCACAAAACTCTGCAGCGGCGGC +CGCCGCTGCTTCAGATGACAAAAACGTGACAAAATGCGTCCGCTTTCTGA +AAACTTTAATTAATCTGTCGAATAACGATGATCCAGAAATGCCGGACAAG +GCCGCCCGTGTCAAAGAGCTAATTCGAGTGAGGAATTGAGCGAAAAACGC +GATAAAAACCGGACAAATTCGGATATTTCAGGGCGTAATTTATCTGGAAA +CGACGGCTGAAGAGTTTACACGAAATCTGCAACAAGTGCTCAAATCTCAG +GCTCAACCGCATCTTTTACCATTCCTTCAGAATACTCTTCCGGCATTGAG +GAATGCTGTTCGAAATGGTTCGGTTTTATGTGCAAAAAAATTAAAATCGA +CAAAAAAAAAATCATCGAAAAACAGGAAAATTTGAGTTGAAAAGCAGCGA +AAAACTTGAATTTAACATAAAAAATTGCAAAAAATCCGTTGAATTACATT +TTTCAAGAAATTGTGTAGAAATTCCATGAAAAAAATTCAAAATTTCCAAA +TTTTTTGGCTATTTCTAGTCAATTTCTTTAAAATTCCATTTTTGATAGCG +AAAATTATCAAGTTTCTAACAATTTCAAGCTGTTTTTGGTGATTTTTTCA +ATTTTTCGGCTTTGAAATTCCATTTTCCGGAGTATATTGTCATAATATAT +CCGAGTTCCACAAAATTGAGCAAAAAAAATTAAAAATTTCCCTTTATTTA +AAAATATTTTCAGCTGTCAGTAATATATGGATTTTCCGCAATTTTTCTCA +CAATTTTGAGCAAATTCCTGATTTTCAGCCAATTTATAAGGATTTTCACC +CATTTTCATAATTTAGGGCTATTTTCAGCTGAAAAATTGTAATTTAAAGT +TTTTAAAATTTTTATAGATTTTCTCAAATTTCAGCTCAAAAATTCGATTT +TCAAGCGCATTTCCAGGCAATTTATTAAGATTTTCACCAATTTTTTTAGC +TTAAAATTTAAAATTTCCACATTTTTTGTCTATTTCCAGTAAATTTCTTT +AAAATTCTATTTTTGAAAGCAAAAATTATCAAGTTTATCGCACTTTTTCA +ATTTTTCCGGAGTATATTTTGGTAATTTATCCGAGTTCCACGAAATTGAG +CAAAACAATTATTTAAAAACTTCCCTTTATTTAGAGCTATTTCCGCAAAT +TTTCTCGCAATTTTCACGCAATTTTCATAATTCAGGGCTATTTTCAGCTG +AAAAATTGCAATTTAAAGTTTTTAAAATGTTTATAGATTTTCTCAACATT +CAGCTCAAAAATTCGATTTTGAGGCGCATTTCCAGGCAATTTATGAGGAT +TTTTACCTATTTTCTAGATTAAAAATCTTTTTTTTTCCTCAATTTTGGAC +TATTTTCCCTAATTTTCAAATTTCCAGGCACTGCATCAGTTGAAGGCGTA +AATCCACCGCCTGGCTACGTTTTCAACAATGGAAGAACCCCAGGACCCCC +TCAGCCACCTCCACCTCAACAACAATCCCAGCAGCAGCCACCACTAGAAA +TGCGTCAAATTCCGAATCCGAATCAAATACCCCCACAAATGGTTCAAGGG +GGTCCCCATATGGTATCTGTAGGCGCCCGGCCAATGATCAGGCCTATGGG +CCCCGGCGGCCCAAGCCCAATGGGCCTACAAGGCCCCGTACGAGGGCCGA +TGGGACATCAGATGGTCCAGATGCATCCTCCTCCCCCACCACAGCAGATT +CAACAGCAGCACCCGGCTCCCCCTGTAGAAATGGAGGTGGAAGAGAATTT +ACAGCCTACCGCGGCGGCCACGGCCACGAGGCAATATCCTGAAGGATCGC +TGAAATCGTCGATTCTGAAGCCGGATGAGGTGCTGAATAGGATCACGAAA +CGAATGATGTCATCGTGTTCGGTGGAAGAGGAGGCGCTTGTCGCGATTTC +AGATGCTGTTGAGTCGCATTTAAGGGAACTTATTACACTGATGGCCGGAG +TTGCAGAACATCGGGTGGAGAGTTTGAGGTATTGAGGAGAATTGATTTTG +CTTCAAAATACGGCAGCGAAAAAAAAATTAAGCAAAAATAAGGAAATTAT +TGAAGAAAAATCGTCTTAAAAACAATTTTACATTAAAAAAAAAGATTTTT +AAATTTCAAAGGTTCCGAACTATTTATTAAAAAAACATCTAGATTTTGTT +TTAAAATCCAAACAAAAAACATTGCTGAAACGCGGTAATTTTTTTTCAAA +AAAATATAAAAATCTGAGAAATATTTTCAAAAATATCTCCAATTTTCCCC +TGATTCCGAATATCTATTCGAAAAAATTCAAAAAAAAAATTTCCCTTTAT +ATTTCAGCTTGAAATCGCTTTGTGCATGCACACCATGAGATTTTTCAAAT +GCGCGCCCAGATAAATTCTCATTGGGGCGCACTTGCTTCGTGTCGATTTA +CGGGAGCTCTTCATTTTTAAATTTCTTTTAAGCTTTTTTTTTCAGTTTTT +CAACGAGTTTCCTTGATCTTCGTCGATTTTTTGTCATTTTTTTTCCTGAA +ATTTTGTTTTGTGTCAATTTGAAAATTTTTTAGGTCAAAACTCCTGTGTT +CGTCGAGATCTGACGTAAAAATATAAAATTCCGGGAGTTTTGAGATATAA +AAAAAATATTTTAAAAATTTCAGAAAAAAAACTGACAAAAAAGCGACAAA +ATAAAAGAAACGCGCAAAAAAAAAAAAAAAAAACTTAAAAAAGCACGAAT +AAATTTTTTTAAAAAATGGAGAGCTGCCGTAAATCGACACGAAGCAAGCA +TAAAAAAATGGCTTTTTTCCTAACAATTTTTTGTGAAAAATCCGAATTTT +CCATCCTAAACACTACAAAAGATTCCAGAATTCCGGAGAACTACGTGGCA +ATTGATGACGTCAAACGGCAACTTCGATTCCTTGAAGATTTGGATCGTCA +AGAGGAAGAATTAAGGGAAAGTCGAGAAAAAGAGTCGCTAATTCGAATGA +GCAAGAATAAGAATAGTGGAAAAGAGACGATTGAAAAAGCGAAAGAAATG +CAACGACAAGATGCTGAAGCGAAAAGGAATCGAGATGCGAATGCGGCTGC +AATTGCAGCACTTTCCAGTAATAAAACTGTCAAGAATAAGTTCGTTTTTA +GCGTGAAATTTGCAGAAAAATTATTTTTAAAAAATAGATTTAAAAAAAAC +AATTTCCTGACCAAGGGTGTCATTTTTCGATTTTTCGGTTTTCAAAAATT +CGAAAAATGAAAGTTTCGTTTTTCGATTAAAAAACTGAAAAACCGACACC +TTTGTTTCTGAGATTTGGATTTAGAAATAAGCAAAAAAAAATTATTCAGA +AAAAATTGTTTTACAATGCTGCAAAATCGATGAAAAAATAAAATAAATCA +ATAACTAATTTCTAAAAAATGGAAAAAAATTTCGTTTTTTTCGAATTTGT +ATTTCACAAAAAAAAATTTTTAGAATAAAAATTTTCAATTAAAAAATTAC +TATTTAAAATCACTGAAAAAAAATGAAAAAAATAGAAAATTCAGAAAAAT +AGCGAAAAAAAAGTTTTTTTCCAGAAATTTCGTTAAAACGATCAAGATTT +TAACCCTAAAATTTAGATAAAATCAATTTCTTGTGGTTCTTAATTTAAAA +AAAAAAACAGTACTTTCCAGTAATAAAACTTTCAAGAATAAATTCGTTTT +TAGTGCGAAATTCGCAAAAAAAATTACGAAAAAGTGAAAATAGAAGCTGA +AAGGAAAAATGTTTAAAAAATAAGAATATTTTCGACATTAATTGGTTTTT +ATATATATTTTTTTGTAGAAGTTTAAAAAAATTACTAAAATCACTGAAAA +AATTGAAAAAAATTTTAAAATCTGGAAAAAAAGCAACAAAAAATTAATTT +TTCCAGAAATTTCGTTGAACTATTCGGTATTTTAAGCCTAAAATCAAGAT +ACAATCAATTTGAACAAAATTTGCTTTAAAAATACTAAAAATTCGAAAGA +AATAGAAAAAAAAAGAAATTTTCGAAATTTGGATTTATTGTTAGAGAAAA +GCTGAACACTTACAAAAAATTAATTTTTTACGTTTTGAGAAAAATTTTTG +AAAAAAAAAATCGATTTTTAAATAATTTTTAGAATTTTTTAGAACTATAA +TTTTTGAATTTCAGTGACTTATAATCACAGAAAAAAATGAACGAAAAACA +AATGAACATCTTTTCGGCAAAAAAACGTATTTTAAAAAAAAATTTAAATT +TCTAGTGACTTTGAAAAAAAAACCACATTTTCCAATAATTTCCAGGAATA +AACTAAATTTCTTTGGAGAAATTTGAAATTTATTTCACATTTCGAAAAAA +TTTATATAGAAATAAAAATTTATATGAAAATAAAAAATTTGCAGCTCCTT +CCAGTAATGAAACTGTCATTTGTTGAAAATTAGTTTTTTTTTAATTTAGA +AATTCTCTTTTCGAAAAAAAAGGTTTTTTTAAAACTTGATTAAAAATTAA +TTCATTTTTTTCCAAAATTAGGGAAAAAATAATTTCTAAAAAATTAGACA +AATACACTTTTTTGTTGAAAAAAACAACATTTTCCAATAATTTTCTTTTT +TTTTAAATTGCTTCCACTAAATTCACTTTTTGAAGAAATTTTTTGAAATT +TTTACTGAAAATTGTTGGAAAATTATTAAATGTGCATTTTTTTGCAATTT +CCAGTTGATAAACTAATTTCGGTTCAATTTAAAATAAATTTCGAAAACCA +ATTTCAAAAAATTCATTTTTTACAATTTGTTTTTGTTGATAAAACGAGCT +AGTTTCTACCAAAATACCGCTTTTTTACATTGAAAATTTAAATTTCAGAT +GGGAAAACACGGGCGCCGCAACGACAGCACCTCGTCCACGAACAGTACGT +GTAACAACTCGTGATCTACATCTTTTAGTCAATCAGGATAGCCGATTCAC +AGGGTAGCGATTTTTTTTGTTTAAAAAATTTTTTTAAAAAACACCGTATT +AAACATTGATTTTTATTGGAAAAAACTGATTTAAAAATTTTTCAAAAAAC +TTTCAAACGAATGGTAGTTTTTAATATTTTAATTTTAAAAAATTCCAATA +AAAATCAATTTTTATACGATTTAAAAAAATTTTGAAAAGTTATCCAATTT +TCTCCTAAAAAAACTCATTTTTTTTCCAGAACATTCATCCGTGAAAAGAT +GTCATACGGTGGTCCGGCAGTCGATACAACTATCTGAACTAAAGAAATCT +CATGGAAAAAGGAGCGAAAATTCTGCTTTTCTCGTTTTCTTTTTTTAAAT +TTAATTTTATTTTTCCACAAATTAAAACAATCACTTTTTTCCAGTCAAAT +AATAATTATTTCTCATTTTTAAACGAAATGCTAGACATAAAAAAAAGCTT +CTTTTTCAGCTTTTCAGCCAAAAATTTCAGATTTTTCAGAAAAAATTCAA +CAACAAAAAATTGGCAAAGTGCAAAAAAACCAGAATTAGATATTAAAAAT +ACAGGAAAAATCGAGAAAAATGAGTTACAATTCAGAAAATTAGGCAATTT +TTTTTTCAAAATTTCTCAAAAACACTGAAATTTCGGTATTTTTTTCTCTA +CTTGAGCACAACCTCGTGTTGCACCAATCGAACATAGTCTTTGAGCGTCG +GTGGAAGTGGAAGCTCATCGATTATTCGCTGATAGACACCGTCGGCACCG +GCGATTTCCAGATTTCTACGTTGGCGAAGGTGGATGGCGCGCTGGAAAAA +TTTCTAATTTCGAGCTGAAAACTCTTTTATGAAAGCAAAATTTCATAGTT +TCAATTCAAAGTTCGAATTCCTCGCAAAATTTCAGTCGGAAAACCGAAAA +CTAGAAGTTTCTTGCTCGAATTTGAAATTTCGAGGGGTACGGTATCCTCG +AAAGTACGCAAACACCGAATTTTGTTTTTTTGAAGTTTTGGCGCCAAAAA +TACAATACCCGGGGTCTCGACACGACAAATTTTTAAAATTTTTTTGAGTA +CTGTAATTTCAAACAAAAAGTTTTTTTTCGAATAAACTCGAATAATACGA +TAAAAAACATATTTTAATAAAAACCGTGGCAACGAAAGTTTGAAAGTACA +GTACTCTCTTCAAAGGCGCACACCTTTTACGCATTTAACTTTCGTGGCGA +GACCCCGGGGATACCGTAATTTTTGACTCAAAATTTAAACAATAAATCCG +TTAAAATATTAAAAAATTTTTAGATATTTTTCAAAAAGACTGTCGAAAAA +TTGTTTTTTTATAATTTTTAGAATATTAACAAACAGTTTAAAAATTCCAA +CAAATTTTGTTTTTCATACCGAAAAGAAGAAAAATGGACAAAATAGTAAG +CTATTTTTGTGTGTCAAAGTGTCTTATTTCGGCTTGATCTACGTAGATCT +ACAAAAAATGCGGGAGAAGAGACTCAGAGTTCTCAACTGATTTCGCATGG +TTAAGAACGTGCTGACGTCACATTATTTTAAGGCGAAAAAATCCCGCCTT +TTTTTGTAGACCAAACCGTAATGGGACAGCTTGGCACCACGTGACACCCC +CCGATTCTCACCTGTTCGTGATTTTGGCGAACTGCCATTTCTTTCAAATA +ATCGTCTAGCGGGGGACTCGATGCTGAAAAAAAAGTTTGATTCGTTAGAG +GAGCACAAAATTCTGAAAATGCGTATTGCACAACATATTTGACGCGCAAA +ATATCTCGCAGCGAAAACTACAGTAATTCTTTTTAAATGACTACTGTAGC +GCTTGTGTCGATTTACGGGCATCGATAGAATATTTTTAAAAAAGAAGAAA +AAAGAGGGAATAATACGAAGAAAAAAAGGAAAAAAATAAATTCATTTCAG +AAATCGAGTTCGTAAATCGACACAAGCGCTACTGTAGTCATTTAAAGAGA +TACTGTAGTTTTCGCTACGAGATATTTTGCGCGTCAAATATGTTGTGCAA +TACGCATTTTAAGAATTTCCAGTTTTTTCAAACCTTTAAACTTACATTTT +CGCGGTTTTTTGGCGCTCTCGGATTTCGGGGAACGTATACGTCGGCGACG +TGACGTGGTTGAAGCAGGGTCCGTCGAACAACATGGCTCTTCGTCTTCCG +ATACTTGGGATTCTGTTGATGTTGACGGTGATTCCTGAGGTTCCTCTAGA +GCATCTGAAAGCTCTTGCTCATCTTCTGATAAATCTCCATTGGGACGTTC +AACCAGGCCGGTGGTGGAAGGATCATTGATGTTCGGAATCATTACAATCT +CCATTCTCGGAGTATTTGGAGTTTCTGGTCGAGCTATCGAGGAAAGTATG +CGATGATGGCGGGGTCTATCATGATTAATCGTCCATTCGAGCACATATGC +TGGTGCTGGAAGGATTGAAAATTGTAAAAACTAAGATTTTTTGGCCAAAA +AAATAATATTTTCTCAAAAATTTTGAAATTCTCGACAAATATTTTTTACT +ACTGCCGGTACAGAGAATGTAGATAGTTGAAGAGACACAGACATCCCGGG +ACCCAAGGGACGGGGCGCGGGATGTCTGTGTCTCTTCAACTACCTGCACT +CTGTGCTGGTAATATACTTTTTCCAAAAAAAAAACCTACATTTCGGAAGT +AAAAATGGAAAATTTCGGAGTCTACACAATTGGAAATTTTAATTTTTAAA +AAATTAAAAATTGTTTTAATTCGGAGTTTTAGACGGAAAAAATTATTAAA +CTCTAAAATTTTTAAAAATCGAAAAAAAACAAGTCTGAGAGTGAAAAAAA +TTAATAAATTTTGAACTGAACATTAGATATTTCGATTTTTTTTTGAAAAT +CATAAATGTTTTCTCGCAAAATTCGGATTTTTTTTACCTGCTCGATTTTC +AGTAGCCGGAGCTGTATTTCTGAGATTGGCACCATCCAAATGTGCTCGGG +GCGCTGCAAATTTGATATGAAATTTCACTAAAAAATTAGGAACTTATACA +AATTTTCTAAAAGAAATACTAATGTTAAGATAATCGAAAGTTTGGAGTGA +CAGAAAAAATTGTTTCAGCTTCTAGAAATATTTTAATTTAGAAAAAAACT +TCCAGAAAATTAAAATTTTTTTTTAGAACTTTTGAACATTTTTCCTGATT +TTTCTTTTTTTTCCATTTTTTGGAATTGCTGGGAATTTCTCTTTTTTCCT +TCATCCCACAGCTTCGCTTCAGCCTAGGCCTAAGCCTGAGCCTGAGCCTA +TGCATAAATCTAAGCCTAAGCCTGAGCCTAACCCTATGCCTAAAACAAAG +CCTGGACCTAGGCCCAAGTCTAAGTCTAAGCCCACGCTCAAATTTAAATC +GACTCAAGTAACTACTTTAAAGTTGGCAGAAATATTTGAATTCAAAAAAA +TTTTTAACCAAAAAAAAAATAATTTAAAAGAAATTGTTTTTTTTAAATAA +AATCAACAATTCACCTGTAAAACCTTGCATTTCTCTCCAATTCAACATTT +CTTGAAGAACGTCAGTTCTTGTTTCATTCCATCGACGAACATGATCTGCT +ATAGTATTTCGAATATTTTGTTCATTGTTTTCTTGTTCTCTGGGATTATT +ATTACGACCCAAATATTGATCCATGACGTCATCCAAGTGATTACGGGTTC +TTTGATCTGCTGGGAATATTTATAAGCACTTTTTTAATTTAAAACATATT +TTTTAGTGATTTTTTTTCGAACTTTTGAATTTTGAATTAAAAAAAAAAAA +ATTAAAAAGTTAAATCCTGAGGGGAGCCAAGAAGTGGGCGTGTTTTAGGA +CATACTCTTCACTTACGCTTTGGTCTACAAAAAATGCGGAAAACTTTTGC +CCAAAAAATGTGACGTCAGCACGTTTTTAATCATGCAAAATCATTTGAGA +GTTCTGCGTCTCTTCTCCCGCATTTCTTGTAGATCTACGTAGATCAAGCC +TAAATGAGACACTCTGACACCACGTGGAAAATAAAAATCGAACATTTTCA +AAAAAATTACTCAATTGATTGTCTGCTGGAGTTAGTTGGTCGATTCTGAA +AAATATTAAAGTACTAAAAATTTTCCTTAGCAACATAGCGCGTTTGCATC +AAGAAAGGCGGGATTTTTCTAAGCCCGCCCTTTAATGGATCATCAATTTC +TAGGAAAAATCGATATTTTACACTAAAAGTGATCCAAGAAAATCAATAAA +TCGGAAAATTTTCCGATTTTCCGTGCTTTTTTGCGAAGATTTATCGAAAA +TGATTTCCATTAGAGCACGCTTGCAATTTTTTTCACCAATAGCTTTCAAA +TTTCTTACGGTCTTGGATCAAAAAGTTGTCGACGCCGATTCAACTCTTCG +AATTGTTCAGCTTGATGCCTTTGAACATGATCCATATACGTATTTCCACC +TTTCATCACCATTATTGATCCGCAAATCTGACGATTGACACGTGGATTGT +AGGCGTGTCGGCGAATGTCGCGGCGGAGTGCCTGACGCGCCAAGTAGGTC +AACGTGCGGGGACCACTAAAAAATCCGGGTTATTGTGATTTCGTGGTGAG +ACCCGAAAATAGTAGAATTTCCTCGTACATCCTGTGCCTAAGCAGGTTTT +CAGTTTTTGATGGGTTGTTACCTTCATACACCTACCTGCCTTTGTGACTG +CCTCCCTGCCTGCCTAAATGTGTGTATGCCTACCTACAGTCTGTTTATCT +GCCTCAACGCCTTCATACCTACTTGCCTACCTTGCTTACATACCTGCCTA +CGCGACTACCTATCGGCCTACCGGTTGGCAAGTAGGAAGGCTAACAGGTA +GGTAAGCTGGCAGGCCTGAAATTTGCAATTTTTGCGAAAAACCGAATTAC +CTTTCGAAAGGCGTAGGCGTTGAGATCCGTTCCTCCTTCGGAAGCCTGAT +CATTGGAGAAAATGTCATTCTTCCAGGGACGTCATACACAGTACGATGCT +TGGAAACTCTTCTGTATCGGCAAACCTGAAAACCTTTAAATGTGGAGGAG +CCAAGAACTAGACGGAGCCTGTGTAAGGAATATTGTTTCCGGAAACACGG +ATTTTCAAATTTTAGGCTCCACCCACTCACCACGCAAAATTTCATTTTTG +GCCCAAAAAGTAAATGTGCAGAAATTGTATTTCAAATTGTAGAAAAACCA +AGAAATGGGTGGAGCCTGCCTGCCTGCCGGGCTCCGCCCACTTCTTGCCA +AACCATACCCCCTGTATACGTGGCATGACGAGTTGTCCTCCAACATTCAA +CATTCCAAGCATTCGTTTCAACAACTGAGAATCATCGGCAACGAATCCGA +CAAAAATTCGATCATATCGATTTTGATGAGCTTCAAGAAATTTGACATCG +GTGATGTCGCACACTTTGAGCTCCGGCCGAGCCCAGCCGACAGAGCTCGC +TTCGGGTGTAGTTATCCATTGATCAATGCATGTTTCCGAGTAGGTGACGA +GATTCTCGTAGAGTTCGATCCCATGATTTATACCAGTTTCACCTTAAAAA +TAGGGTTGTCAGGCCACGCCCATTTTTAAAAAATGAATTAAAATTTTCTG +AGAAAAAATTTCAAACTTTGGTGGGGCATTAAATTTTTTCACATAAATTT +TTTCGGCGGTAAATTCAAATTTTTTAAAAAATTTCTTTTTTTTTTGGCGG +GAAATGTGCCGCAAATCAGAAAAACTGAAATTTCTACAAAAAAAGTTGGT +GGCCGAGTTTTCTCATTTTGTGGTCAGAAAACTCGGCCACCGATTTCTTT +TGCGGCCCTGTGATTACTTAAAAAAATCGGTGGCCGAGGATTTTTATTTT +CGCGGCCACAAGTAATGAAGATTGCACAAAATTGATAATAGGGAAAACGC +GGCCACGAAGTCTAACATGTGCCACGTGGCCGTTAAAAAAAAGCCGGTGG +CCGAGTTTTATTATTTTTCTAGGCCACGTAACCAGAACTCACCCAATAAA +ATGCCAGCAATCGTACTTAAATACCCTGAACCAGTTCCAATATGAAGAAA +CGAGTGTCCTTTACGCAAATCCAAATAATCGAATAATTTCGCATAAATAT +CAATTGCTCCAACACGAAGTGCTCCCGGATAGAATGGACCGCCCGGTTCT +GTGGAGGTCAACGACGGAAGACGTGTAAATTTCCGTTCACTAATCGGAAG +AAAATCGGATCGATCGACAAGTCGAAAGGCTCGTTCGATGTTTCGACGAC +GGATTGTGTCGTTTTTTACGAGAAAATCGATAAGATCGTCGTTTTGGCTT +TCACTATTGCCCATTTCTGGAAAATCGATTTTTTAAAATTTTAGATTTAT +TTATTGATTTTAAATAAATAATTAGAGAGCAGTCATGTGTTTTTCCTTGT +ATATTTACGAGAGAAATCTCAAAAAATTATTCAAATAGGAAATTTTACGC +CAACAATAAAATGTAGAGTGGTGTGCGCCTTTAAAAAGTACTGTAATTTC +AAACTTTTTGGCGCAAAATTTTAGAGTAGTTTTCTTTAAATTAAAAATTC +AAAAAAAAAACAAATCAAGATAAATACAAAAGTTTGAAATTACAGTACTC +TTTAAAGGATCACACCATTTTGCATTTAAAAAAATTTGTCATGTCGAGAC +CGTAATTTTAACGCAAAAAACGTGAATTTTCGCGTCTGTGTAAAAGAAGT +TAGTTAAGGAGAAAAAACAATTGAAAAATCAACAAAAATTGAACTTTCAA +CGGAAAATCAAAAATCGTTGCGAGTCAATAATTTTTTAGCTTTGAGGAAA +ATAACCCTTAAACTCAAAAAACAATTAATCGACTCAACATCAAAATTTCG +TTACAAGACCCAAATTAAAATTTTTTATTTTGGGATAATTGCTCCTAAAA +TTAATAAATTCCAGAGAAACGAGTAAATCGTGGCGAGACCCACTAGCGTG +TACCTTTAATTTCGAAATTATTTTGGGCTGTTCATCACGAACACACGTGT +TGCCTAGCTACCAAATTCAAAAAACGAAAAACAAAAAAAACAGGTTTCTT +TGAAATGAAAAATCGATAATCAGCAACGTGGCAAGATGTCTCAAATTATT +GGAAAAAAAAAACGTTGAAATTCTCAGAAAACGGACCGTTCTGTGGCAGA +ATTGTGAACGGCGTGCAAGGCCACGACCCGTGGAGAGCGCGTGAGAGTGC +AGAAAAAAGGAGGCGAGAGAAAACGGCTGACCCGTGGATAGACTTTGTGG +ATTCAGGAGACGCAGAGCGACCGTGAGAGACCCAGAAATATATGCAGAAA +GTTAAGGCGTGCAATAGTGTTGGAAATTATAATAAATGGGTACGGTAGGT +ATGACCTTTCGGGATCTTTGTGGGTCTCGCCGCGACGAGTTTTCAAGGTG +CATAAGGTTTTTGCGATGGGTCTCGTCACATAATTGTAGATTAAAATTGT +TGATTCGAGATAAATGGAAATTCGAAGCGTCTAAAACAGTTAGAACTTTT +AGTTTACATTAGGTTGAGCTCAACTGTCCAACCCCGTCAAACTTTTTCCA +CTTTTTTTTTACTTTTGCCTTTATGTCCTTTTCGGCCGTTTTCCTGGGGG +TTTTTACCAGACTACGAAATATCCTAACTTGGTAAGCCTCAGCCTAAGCC +GAAGCCTAAACCTTGGCTTTCAACTATACATACACATTTCCTCCTTTTTA +AACGATATTAATTAATTTACAAAACACCAAAAAAATATACAAAAAATGAT +TTGAATTTATGAGTTTCCCGCTAAATACCTACCGAGACCCAACTTTTGGC +CGTGGAGCGCGCTTGCACTTAAACTTCAATTTTTCACATCCCCATAATAA +CTTCCTTGTTTTAAATTGTTTTTCCCTATATTATTATTATCCTTATCACG +AGATGCCCTTGCAAAACACATTGTAAAAATAGTATTCTCAACTGATAATA +ATATTTTGTGTACCCCACGATAGTCCTTCTGGACCATAAAAATATTGATA +ATTACTGATAAGCTTTCTCTGTGGTTTCTGTAGTTTTGGTGATATTTTGG +GCAACAAATTTATATTATGGAAACACGGAATTCTGAAAATGCGTATTGCG +CAACATATTTGACGCGCAAAATATCTCGTGGCGAAAACTACAGTAATTCT +TTAAATGACTACTGTATCGCTTGTGTCGATTTACGGGCTTGATGTTGGAA +ATTAATTTATTTTCGAATTGTGACAGCGATATTCAGTTTTCCTTTGTTTT +TTTTTTTCGTATTATTTTGTTATTTTTATGCTTTCTTTTAATATTTTATC +AATTAATGAATGATTTCCGTAAATCGACACAAGCACTACAGTAGTCATTT +AAACAATTACTGTAGTTTTCGCTTCGAGATATTTTGCGCGTCAAATATGT +TGTGTAATACGTAAAGAAAGTTTGAAATAATTGTTTTTAAATAATTTCTT +ATTCATTCACTGTCAATTCTCACCGAAAAAAAAAATCCAAGTTCCTCCCG +CCAACTTGGCACGATGCCAAAAGGTTCTCAGGCCCATTCATGTCATTCAA +TCCATTCAAATGAACGACTTTTCGTTCAGTTTAGGCTTCTTCTTTTTCCG +GTTTAGTACTTTTTCAAACCGAAAAAAAAAATCCCAAATTGAATGCAAAT +GTGCTCTATTTGGAATAGACTTCACCCTATTTCGGAATTTTTTCCGATTT +TCTGAAAATCTTTAACACTTTTCTAACTGTTGCATTAATGACATTTTTCT +AGAAATTCTACTGCAAACGCGCTCAGTTGCGAAAGTTTTTGGGTCTCACA +ACGATTTCGGAAGAAAAACGTTTTATTCGTTGCGAGACCCGGAAGAGCCC +GGTGGAGCGCGTTTGCAATTTTTTTGGTTTGAAAATTTTGCGCAACGATG +CTCCAAAATAACACCTCGCGAATTTTATCGTTTTTTTAAATTATGACTTT +TCCGGTCCCAGAACGAATTACAAAAGCAATTTTTATGAAAAATCGTTACA +AGACCACAAAAAAAAGCGCAATAGAGCGCGTTTGCACCTTTTTTTCTTGG +AAAAGTCTCGTTTTTTTCTCGGTAAATTCACTGTGACATTGACCTTTCTC +GAAAAACGAAAAGTGTAATGGAAAGTGAACAAAAGTGACAATAATCTGGT +GAATTGCGTTTTGTTGTAATATTTTTGTATCTTCTTTTCTTTTTTTAATC +TGCGAAAAATAGAAAAGTTTTGGAGAGAAAGTGATTTTCGACGAAATTTT +CCTGATTTTGCTAAAATTGAACAAAAATTGTCATCATTTTCTGTGGGGTT +TCTCAAATGTTTTCTTAAAATTTATGCGCCTTTAAATTTGGGGGATTACT +GTAATGTTTGTATTTTTCAATTTTGTTTCGGGAACACAAAATTCTGAGAA +TGCGTACTGCACAATATATTTGACGCGTAAAATATCTCGTAGCGAAAACT +ACAATGGATTAAAATTTAATGAAATATCGTTATCACTATTCGAAAAGAAT +TTCATTATGAAGCCCGTAAATCGACACAAACTCTACTGTAGTCATTTAAA +GAATTACTGTAGTTTTCGTTACGAGATATTTTGCGCGTTGTCCCCGTAAT +AGTTTAACTTTTACCACGTGGCCGCCAGAAGAAAAACTTCTGCTACCGGA +TTTCATTTCCTCGGCCACAAACCCTTTTGACCCCCGAAAAAGTACATAAA +AACAATTCCGGGGACTGGGTCTCCAATCTCTCGCCCCCACTCACTAACAA +GGGGGACACCCTCAGAAACGAATGCCGTCTAACCGTCTGCACCCTCTCTC +TCGTCGCTCCAAATTCTTGGATCAATGCCAACTAACACGCAGTGTCCCCC +CTCCTCCTTCATCCACCCAAATGTTTCATCACTTCCCGGCCACCGCAGGC +TTCTCTCTCTTTCTCTAAATCTCTCATAAACTACTGTCTCTCTCTCTCTG +CTTCTTTTGCTCAATTGTTCTTCTTCATCACTACTATTTGCTCAATTTTC +GTAAATATTTTATTTTAAAATATCCCTTTTTCCTCCCCCCGCCTCTCTGC +TTCTCGCTTGACGCGCCACATACACTAAAAAAACCGGTTTTTTCTATCTC +TCTCCGCCCGATCATTGAATAGATAGTGTGTGCTGTACATCAAATTTCCA +TGGAAAAATCGCGCCGAAATTCCAGAAAATTCCACTTTTTCTAGAATTTT +CAGCGATTTTTTTCGGTTTTTGAATTTAATACAAAGCGAAATTCCGTGAA +AAATTAATTTTTCCTCAATTTTTGACGAATAAAAAATATTAGTATTGTTT +TTTTTTCAAGAAAGTGTGATTTTTTCACAATTTTCTATGGTTTTTACGGA +GAAAATCTTGAAATTCCACAGAAAAACTTGACAAGTCTTTGAAAATTAAT +AAAATTTCACCGTATCTGCCGATCGTTGCAAGACACACTCCACAAAAGAA +GTCGTGCGCCTTTAAACTGTGTACGGTAACTATATTGATTTAATTTTTTT +TTTCGATTTTGCGTCTATTTTTCCAATTTTTCTCTGATTTCGAACGATTT +TTTGCATTTTACCTCAAAAAAATTAATTTATTTAGATGATAAAGTGGAAA +TTGCTGATTTTTTACAAAAAAACACTTAAAAAACGAGTTTTCTCATCAAT +TTCAGTTGTTTCTGGCATGGCTCATTGTTTATCGACACCTGCTTGTTTTT +CTCCTCCTCTCCCATCAATTTTATGAGTTTTTCTTCGAAATTTCTCCATT +TTTCCCGTTCAAAAACCGCCAAAATTCGCTTTTTTTTTTCCAGAATTCGC +CTACTATAGCACTACTAGATGTGTTAATTTGGAATTATTTTTCAAAAAGA +AATGTATCCGGACGGGCTAACTGAAAATGAGAAGCTCAAAGTGAAGGAGC +TGTGCACTATGGTCAATATATCCGATGAGGATGCGATTAAATTGTTAAAA +GGTGGATAATTAAGCTGAAAAGTGGAAAAAAAAATGCATTTTTTACCTAA +AATTTACCTAAAAATTGGACCAAAACGTCATAAAAATACTGAAAAGTCAG +AAAATTGTCTAAAAAAACTAATTTTCGCTAAATAATTCCAGAAAACGACT +GGGACATTGCAATTTCATCGCGAAGAGTGTCGAAACAAGAGGATAAAACA +TTGGTTCCGGGCACTTCGAGAGGTAAATTTTTGAAAATTTTCAGAATCAA +ACTGAAAAATAGATATATTGTGTGAAAAAATCCATTTTCGCCGTAAGAAA +AGTGAATTTTGAGCGGTTTTTCAAATTTTAAACTGAAAAAAAAAATCAAT +TTATCTGAAATTTTGCTCTTTTCGGCGGGTTTTTTCTCAAAATTTGATGA +AAAAGTTCTAGAAAAACGACAAAAAAGCCTTTAAATGTACTTTTTTCTGC +GATTTTTTATCTTAAATTTAAAATTTTGCGCGTCAAATCTGGTGTTTTGA +CTCCGCCCACTAAATTTAAACTTTTCGTGAAAAAAAAATGTTTTTCTCCG +ATTTTTAATTGTTAATTTAGGCAAAAAAATACAAAATTTGCCCAATTCAC +CTATTTTTTCCAAAAAATCACAAACTTTGCGCGTAAAATCGGGTGTTTAT +GCTCCGCCCACAAAAATTTCTTAGAATTCGATTTTCGGCGGATTTTTTCT +CAAAATTTGATGAAAAATCTGAGAAAAACTACAAAAAATCTTTGAGCTTT +ACTTTTTCTCTGCGATTTTTATCTAAAATTTGAAGTTTTGCGCGTCAAAT +CTGGTGCCTTGGCTCCGCCCACAAAATTTGAAGTTTTCGTTTAAAAAAAT +GTTTTCACCACTTTTTCGCCGCTTTTTCGCCTGAAAAATCCCAATTTTTT +TGCAGAAGAAATCGATTTGGATCGATTATCGAGTCGTCTACGTGTTCACG +GATTAGCCTTCTATCTTCCAGATTTTGGAGGTTTTCCTAATGAATTTCGA +ACGTTTTTGGAGAAGGATTTGATAGAGACGCAGACACAGAAGAGACTGGA +GGCGTCCAGTAAGTTGTTTTCTGGAAAAATTGAAAAAAAAATTGAATAAA +AAAAATGATTTTTTTAATCGAAAAAAAAAGTTTTCAAAAATAAAATTGGA +AAAGGTGAAAAAAATTGAACAATTAAAAAAGAGCGACAAATGAATAAAAA +AAAACTTTTTTTTTTTCGAAAAAAAAGAAAACAAATGTTTTTTAATCGAA +AAAATAAAAAACGAAAAAAAAAGGAAATTTTTTTTACAATTAGAAAAAAA +AAATTAAAAATGTATTAAAAATTTTTTTTTTTGACATTTTCTTTCGCTTT +TTAATAAGTTTTGACTAGCGGGCCCTGCGGGCCCGCCAGTTGCAGGGGGT +GTAGGGCGAGTCCCCCAGTCGGGCGTAGGTTCTCGGCTTCGCCTCGAACC +TGTCAGAGGATTCGCGACAATTGCAGTGAGTCAATGGGAGGAGGGGAGAC +CCACTCATATTTTATAAAATCGAACATTTCTATTTGAATCCCGAGCACTC +ACAAAAAAACGAAAAAGTTTGTCAGTTGGGGGAATCGAGCAAAAGAGCAA +ATTATTCACAGCCATACGCACTAACCACTCGGCCATGCGGGAGAGACCTG +TCACAGAAATGTAGGGAAGGAAAATTTCTGGGGGGGGGGGGGGGGGGGGG +AGTTGTCTTTCGATTCCGTTTTATTCAATAATGACAATTTGGGGAAAGAC +GTTTGAAAACCGTTTATCACTGATAAGTCAGCAGGAAAACAATTTTTTGA +AATTTTTTTATAGCATTGTACTCATTATTTAATTCCCGAAAGAGAGCCGA +AAAGTTGAAGGTGTTATCTTGTAAATTGGTTTATTTGAAGAAAAAACAAG +TTTTGGCCTGAAAATTTTGAAAAAATAATATCTCTTGGCAGAGCATTGCT +AATGCGACGAAACTCCAGCTTCCATTAGATAAAATCAAAAACTATGAATC +AGAAATACATTCCGCAAAACTTTAGTGGAAAAAATGTTCAGGAGACCCAG +GAAACCACTCCCCCCAGTACTAAATTTTTGAATTATTTTTTCTTGAAAAA +TTTTCCCACTGAACTTTTTACAAATTTTATATGTCTCGATGCGTCTTGAT +GAGACCTACACGTCAATTTTTGGAAAACTAAGAAAACTTGAAAACTGACC +GAGTTATGATTGAAAAAGTAGATTAGCAAAGATGGGAAAGTGTGCAAAAT +TTGGCACTTATTCGTCTTGCTCGGCCGACTCATAGTACTTTTTCCAATTC +TGAGTTAAAAATCGTGTTCAGCGTACTTTTGTACGTGGGATAAAGAAAAA +AAATATCAAAAAAGATGAAGTAGAACTTGAGATAAAGACGAAAAACTACT +TTTTCGGAAAAAATTTTTTTTTGGCAAAATGGCATTTTTTGGCCTTTTGT +TTTATCACAACTTTTTGCCTTTTGCACTTATGAACTCAAACTTTCTTTCA +AAAAATCCACCTCTCTGAGTAGTATCTTGCACATAAATTTGGAACAAAAC +CGAGCAAAACCCGAATTTTAATTCAATTAAAACATGGTTTTTTGGGGGTA +AAAAAAGCAACAAAAAATTTTTTCAAACTGGGGAAAGCCGCCCTGAGCTC +AGTTTTGCTCCAAACTTTGTGCAGTTTTTTGCTCCCCCGTGGGGTGAAAT +ATTTCTAGTAAGCTGTCAAATATTACAAAATTCAGTCAAACGGCTCTGGA +GTTATTAATGAAAACGCAGTGTGACATTTTTTCGCAAGCCAAAAAAAACG +CGAAAAAACGCGAAAAAGGGGCGGAGTCTGTACACTCGGCATTTATTAGA +GGCTGCTTGGCAGATTTTGAAATATCGGAAAAACTTTAAAAGTTCTTTTT +TTTCCAGGACACCTAAACTGGTGGCATCAATTCGGCCAAAAACTCTACCC +GCTATCAACTACTGGAGACGGAAATTGCCTTCTTCACGCCGCATCGTTAG +GTAAATGGTTTTTCAGAGCGAAAAATCGGAAAATCGCTAAAAACTTACTA +TTTCCACTGAAAAAATTGTAGAGATCGCTCAAAATTTCACTATTTTCTGA +ACATTTTGATAAAAAACGACAAAACTATTTCAAAACGAGCCCCGAAAATT +GTTTTTTTACTAAATTTTTTTTGGAGAAGTGCAAAAATTTAAATTTTTGA +ATTAAATCTTTAAATCTTCAACAAAATTTAATCGTAAATTTTCAGGTATG +TGGGGAGTACATGATCGTCAGCTATCACTTCGTGAAACTCTCTACGAACT +TCTTACAAATGGCGCCAGAAAAGAAGCAATCCGACGGCGATGGAAATGGG +TTGAGAATCATATGAATCAGGTAATTGGCGAGAGAAGAATCTGTGAAAAA +TTTCTTAAAATTCGGTGAAAAATGATCAAAAATGAGCTGAAAACCGGTCA +CGTAGTTGAAATCCGCGGAATATGAAATTCCCAGAGGAAACAATTTTCGT +GGATTTCAACTTCGCGAATTTTTCTCAAAAAGCTTGAGTAAAAACTCACG +GAGTTGAAATCCGCAGAATTCGAAATTCCCAGAGGAAACAATTTTCGTGG +ATTTCAACTTCGCGAATTTTTCTCAAAAAGCTTGAGTAAAAACTCACGGA +GTTGAAATCCGCGGAATTCGAAATTCCCAGAGAAAACAATTTTCGTAGAT +TTCAACTTCGCGAATTTTTCTCAAAAAGCTTGAGTAAAAACTCACGGAGT +TGAAATTCGCGGAATTCGAAATTCCCAGAGAAAACAATTTTCGTAGATTT +CAACTTTTCTAGATTTTTTGCCAATTTTTTATTACATGTTACAAAAAAAC +GGCTGTAAATACTCACGGAGTTGAAATCCGCGGAATTCGATTTTTGTAGA +TTTCATCTTTTCCAGAATTTTCCAACAAAAAAAACCGGAAAAAATTATCC +GACACTCCATGGAATCTAGTTTTCGGAGATTTCAACATCGAGAACTTTTT +TCGCACATTTTTTTGCATTTTTAAAAAATTTCACCAATTTTCCAATTAAA +ATTTTTTAAATTTCCAGAGTAACGGTCTCGCATTGACCCTCTCGGAAAAT +GAATGCTTGAGTAAAAACTCACGGAGTTGAAATCCGCGGAATTTGAAATT +CCCAGAGAAAACAATTTTCGTAGATTTCAACTTCGCGGAGTTTTCTGTCA +ATTTTTTGAGATTTTTTCTCAAAAAGCTTGAGTAAATACTCACGTAGTTG +AAATCCGCGGAATTCGAGATTCCAGGAAATTCGATTTTCGTAAATTTCAT +CTTTCCCAGAATTTTCCAATTAAAAAAAAACGGAAAAAATTATCCGACAC +TCCATGGAATTTAGTTTTCGGAGATTTCAACATCGAGAACTTTTCTCGTA +GATTTTTTTGCATTTTTTAAAAATTTCACCAATTTTTCAATTAAAATTTT +TTTAAAAAATTTTCAGAGTAACGGTCTCGCATTGACCCTGTCGGAAAATG +AATGGGAACTCGAATGGGATGTTGTACTCGGATTATCATCTCCATTACCC +CGTAAACAAGAGGATAATGGCTCAAATTCCACAGATCAAATCTACGAGAG +TCTTGAGGCAATCCACGTGTTCGCGTTGGCTCATGTGCTCAAAAGACCAG +TTGTGGTTGTATCGGATACGGTAGGGAATTTTAAAATTTTTTAATTCTTC +AAAAATTTATTGAAGAGAACAGTAACAAAAAATTTGGTTACTTGTGAATT +TTACAGAAATATAGAAAATTTTAATTTAAAAAAATTTCGAGATTGGTGAG +AATCAAGGGTGTCAAGTCCCGTGTCCCGTTTGTCCCGTTGTCCCGTTTTT +TGAGTGTTTTTACGGGAACGGGACGTCTTTTGTCCCGTTTTTGAGCGTTT +TCACGGGAACGGGATGTCCCGTTGTCCCGTTTTTTGGGTGTTTTCGCGGG +AACGGGACGTCCCGTTGTCCCGTTTTTAAAATTTTCACGGGAACGGGACA +TCCCGTTGTCCCGTTTTTGTCATTTTTACGGAACATTGACACCCTTGGTG +AGAATAAAAAATCAATAATACACATAATTTTTGGAAGAAAAATCGATTTT +AAAAATGAAGCTTAACATTTTTTTTCGAAAGAAAAAAAGTGTAAGAAGTT +TATCAAAAAAAAAAACATTGAAAAAAATATCTTCAAAAATGTTCAATTTT +GTTTTAAGGTGCAACTATTACAAGAGCACACAATTCTTAGAATACGTATT +GCGCAACCTATTTGACGCGCGAAAACTACAGTAATACTTTAATTGTCTAC +TGTAGTGCTTGTGTCTCGATTTACGAGCTCGATTGATAGAATATTCAAAT +AATTTATTTATCGATTTAATATTCAAATTAGGCAAAAAATGAGAAAAAAT +ATACATGTGAAAAAATTAATTTTAAAAATAGAGCCCGTAAATCGACACAG +TAGTCATTTAAAGGATTACTGTAGTTTTCGCTGCGAGATATTTTGCGCGT +CAAATTTGTTGCTTAATACGCATTCTTAGAATTTTGTGTTCACATAATAC +GGTTTTTCAAAAATTTTAAATCTTCATAAAATTCATCAATAATAACTAAA +ACTTAATCAAAAAAAAATTTAATTTTAGTGTAGTCATCAGAAAAAGTCCA +TCAAAAAAAGTCCATCAAAAAACCATCAAAAAAAGAAAATTGAAAAATTT +GAATTAAAACATTTTTTTCGAATTTTTGAAAATAAAAGTGTTTGAATATT +AAACGTAATTAAAAAAAAAGATTTTCCTTTTTTAGAAAAATCTATTCATA +TTAGAACAGTCGATTTTATAAAAGTATCGATTTTTTTATTAATTGGATTT +TTTGTAAAATAATAATCGATTTTGAAAATAAATTGCTTAAAATATTGTAT +TTTTTGACAGAAAAAATGTAAAAAATTTATCAAAAATAAATAAAATTGCA +AATAATTTTACCTACAAATTAATTGTTTATTAAATAAAGTTTAAATAAAT +TAGAACAGGAGTAAAACGAGTTTTCAAAATTATTCTTTAAAATCTCGGGT +TCTTTTAATTCAAAAATTCTTTTAAAAAAAACTCAATAATTACGGATTTT +TCAAAAATTCGAAAAATTCAGAAATTCAGAATTTGGATAACATAATTCTA +GTTGACTTCCAAAAAAATGATATGTGACTTACTTAAGGTACAACTAACAT +TAATTTTCCAAAATTCTTATGGCTGCTTTAAAACACGCCAATGGGGTCAT +AATGACCGAATATTATGTTTAAAAAAATCAAAAAAAATTTTCTGATTTTA +TATGATTTTTTGAAAATTGGAAAAATCACAGTTTTAACCTAATTATTTTT +GAATTTCTGCCAATTGGATTTGTTCGGTGCAGCGCGCTTGCATTATTTTT +ATTTATTTATTTTATTTATTCTCGTTATTTCACTGATTTTCTTCATTTTC +TATGTTTTTTTTTCTCGGAAAATGAAAGAAATAAACAAGACAAATGCGAA +ATGTTTGTTAAAAAGTAATTGAAAATGCGTAAAACTGTGATATTCTGAGT +TCCGACGACGAAGAGCCTGAAATTAGTATATTTTTCAGTTTCACTCATTT +TCAATTACTTTTAAACAAACATTTCGCATTTTTCTTGTTTATTTCTTCCA +TTTTCTGAAGAAAAAACATAGAAAATGAAGAAAATCAGTGAAATAACGAG +AAAAATATATATAATTCATTAAATAAAAATAATGCAAGCGCGCTCCACCG +AACAAATCCAATTGGCAGAAATTCAAAAATAATTAGGTGAAAACTGTGAT +TTTTCCAATTTTCAAAAAATCATATAAAATCTAGAAAATTTTTTTGAATT +TTTTTATCATAATATTCGGTTATTGTGGCCCCATGGGCGTGTTTTAAAAC +AATTTCCCCACTGAGCGTAGTCCACCTTTGAAATGTTCTCAAAAAATGAA +AAAAAAACGAGTTATAAAAATTATTTTTTAAAAATCCAATATTTTAAATT +AATTTTTTTTTGCAGATTCTGCGAAACGCGAAAGGCGAAGAACTGTCTCC +AGTGGCTTTTGGTGGAATATATCTTCCATTAGAATGCCCACCATCACAAT +GTCATCGTTCACCACTTGTTTTATGCTATGATTCTGCTCATTTCTCACCA +CTTGTACTCATGAGAACAGAGACTAACAATAAACGTAAGCAATTTTTTTT +TTGAGAAAAAATATTTTCAAAATCATTTTTTAACTGAATTTTCAAATTTC +CCCCCCCCGTCCCCCCCATTTTTTTCGAAAAGTGAATTGGAATTTTCCTG +AAATTTGCACAAATTTTTTTTTGAAAATTCAAATGTTCGTCAAATAATTA +TTTTATTCGAAAAATCGACATTTTTCCAAATTGTAATATTTTTATTCTAA +AAATCCAAAATTTGATTTGGTAAAATTTCTGTCAACAATCAATTAAAAAT +CCAAAATTTTCAAAAAAATTGTGATTTTATTCGGGAAATTCGAAATTTCC +TATTTAAAATTGGATAAATCTAAAAGTTTTCGATTTTATAATTTTTAGGA +AAACAAGTTTTCAAATTTTTTTAATTATTAAAGTTTTTTTTTGCAATTTC +ACTAAATGAGCCAAATTTTAAAAGTGGAGCACCGAAATTTGAGACTTTGC +TTTTTTAGACTCAAATTGGTCCAAAACTACCGAATTTTGTAATGATACAT +TCTGAAAATTTCTCAAAAAAAAGTTATGGCTGTTTAAAGTTCGGCAAAAT +AAGGCCCATTTTCAGCTAAAATCAAAATTTTTTCCAACTTCTAGGTGTCG +CAACGTCTGGACCCTAATTTTTATTTATTCATCACTTTTTAATAAATATT +GTGGCCTTTGATTGGGCGTTTAATCGTTGTTTTAAGTACATTTATGATCT +TTGGAGTACAAATAAACGTTACATTTTGTACCCCAAAGACCATAAATGTA +TTTAAATCAACGATTAAACGCCCAATCAAAGACCACAATATTTATTAAAA +AGTGTTGAATAAATAAAAATTAGGTTCCAGACGTTGCGACACCTAGAAGT +TGGAAAAAATTTTGATTTTAGCTGAAAATGGGCCTTATTTTGCCGAACTT +TGAACAGCCATAACTTTTTTTTGAAAAATTTTCAGAACGTTTCATTACGA +AATTCGGTAGTTTTGGGCCAGTTGGGTCTAAAACAGCAAAGTCTCAAATT +TCGGTACTCCACCTTTAAAGAATTCATATAATTTTCTAAAACAACATATT +CTGATTTACAGAAATACATTTAATATCAATATGAATTATTTGAAAAATCA +AAAAATTTATATTTATTTTCAGAAATAATTCCGATAATCGATGTGAACCG +TAACCTGCTGCCTGTCCATTTTGCAGTTGATCCAGGAGAGTCATTCGATT +GGTCCAAATTAGAAACAAATAGTAATACACAGACAACTACTGATATGTCA +CAAATTGATAAGCTTGCGTTGATTAGTCAATATATGGATATTATTAGGTA +AGAAAATGAGAAAAAGGATTTTTTCAATTTTTTTTTCTAAAAAAAATATT +CAAAATATCGTTAAAAAGCCAAAAATGTGTACGTTTTAATTTAAAAAATA +TTAAAATTTTTTTGTCCCAAAATTAAAATATCGAAATCAAAATGTTTCCG +ACATTTATATAGAAATTCTGGAAATTTTTTTAAAATCGAAATCATATTTT +GAATGAAACTACAGAAAACAGCGCCAAAAAAGAAAAAAAATCGTTTTTTT +TTTTCGAATCTTTCATTACAAATATTTGAAAAAACGATACAATTATGTTT +CGCGAAAATTTTTTAAAAAAACTTTAAAAAATTATTAAAATCAGCTATTT +AATCACAAATGAGAAATATATCGGAAATCTGGAAAAAATTAAAAAAATTT +TTTGCGTATTTGTCTGAATTTATTTTACAAATACCAAAAAAAAATCAATT +ATTGTTGGAAAAAATTGAAATACAACGGAAATCGAATTATTTTTATTTAA +AAAGAACGAAAAAAAATCCAGAAACCATCACAAAATTATTGGAAAACGAG +AAAGCTGATTTTTCGCTAAAGTTCAAAAAAATTTACAAAAAAAGAAAAGT +ATTAAAATCGGTAATTTTCCCAGCTTGAAAAACTGAAAAATTCGAGTTTT +CTAAAAAAAATTAAAAAAAAAAATCAAAATATTCCTCAATTTTGGAAAAT +TACGGAAAAAGTAGTTTTCTTTAGCTTTCGGCCAAAATTTTATCATAAAA +ATTTCCGGAATGCATAGTTTTAGAGAATAATTTTAAATTGAAACACTAAA +TTTTTTAAAAAACTTTTTTTTTTGAAAATCCCGGATTTAAAAAAAAATAA +AATTTGAATTTCATATAAATATTTTGTTAGGAAAAATAGTTTCTAAACAA +AAATTTGAAAAAAAAAATATTTTTTTTTTCATATTCTTTGAATAAAAAAA +ATCTGAATTTCGAAACATTTCAGATTAGACGTTCGTCGTGGCTCAATCCG +TAGCTCGCGGAGAGTTCGAAGTGCTCACGCGCAACTTCTCACCGAATCTC +GTGGTGAAAATGGGCAAAATTTGCCCCAAAAATCAATTAAACCAAGAGAT +TTAAGTGCACATTCTAGTGATGAACAACCATCAAATAATGCAAAAGGAAT +GACATTAGCATCAAGTGGAATTGGTTCTGGAAGACATGAGAAATGGAGAT +TAATCAATGAAATTCGGTAAGCATTTTTTTTTTGGATTTTTGGCCTGGAA +AAAATTTTTCCAACAAAAACTTGAACAGAATTTTTGAAAAAAATGTTGGA +AAATTAAAAAAAAATTTTTTAAAGTAATCAATTTTAATTTTAAAAATTAG +AAACTTTTCAAATAAAACATTTTTATTTTTTCAAAAATTTTGAATTTCAT +TTTCAGAATAATCATTAATGAAAATTCAAAAAATTCAAACTACAATACTA +CGATAACAAAAAAAAATCAGAATGCGTACTACGCAACATATTTGACGCGC +AAAATATCTCGTAGCGAAAACTACAGTAATCCTTTAAATGACTACTGTGT +CGATTTACGGGCCTTGTTCTATTTTTGAAATTAATTCATTTTCGAATAGT +GACAGCCATATTACATTTTTCTTCGCTTTTTTGTATTATTTTCTCTTTTT +TTTTGCCTAATTTGAACATTCTATCAATCGAGCCCGTAAATCGACACAAG +CGCTATAGTAGTCATTTAATGAATTACTGTAGTTTTCGCTACGAGATATT +TTGCGCGTCAAGTATGTTGCGTAGTACGCATTCTCAAAATTTTGTGTTCC +CGTAATATGGTAGTTTGAATTTTTTTAATTTTCATTAATGATTATTCTGA +AAATGAAATTCTAAAGTTTTGAATAAATAAAAATGTTAAAAAATTTTTAT +TTGAAAAGTTCCTCATTTTTTCAAATTAAAATGGATTACTTTTAATTTCC +AAAATTTGTTTACAAAAAATTCGTTTAAAAAAAAATTAAACTGTTAAATG +TTAAACTTTTCAAAAAAAAAGTTCAAAATTTTTCTGTAAATTTTTAAAAT +GATACATGTAATTTAGATTATAATTTAAAAAAAAGACAAACATTTTTTGA +CGAAAAATATTAAAATTTTGTTAAAAAATGAAAGTTTTTAAAGCCTAAGC +CTTAGCCTCAGCCTAAGCCCAAGCCAAAGCCCAAGCCTAAGCCTAAGTTT +GGTACTGCAGTATTTAAAAAAAAAGTTTTTCCCCAAATTTCTTCTGTTTT +CTTAAAATTCAAAACTGTTATTTGCAAAAAAAAATGTTTTTGCAAAATTT +GAAATTTATTGAACTGAAAAAAATGTGAATTTTTTAATTGCCTTTGTCGC +AGCGGCTGGAAACAATTTTTTTTTTGAAATCAATTTTAAGAATAAAAATT +GATTATCTTGCGTTTTAAACTTGATTAGGGTATTTAAAAATCGATGGACG +GCGAGTTTTGGTTCAAAAAAATTAAAAATCTCGCCGTCCATCGATTTTTA +AATACCCTAATCAAGTTTAAAACGCAAAGTAATTAACTTGTATACTCGAA +ATTTGACGATGATTTCAAAAAAAAATTGTTTCCAGCCGCTGCGACATTGA +TAAGTTGGTCAAATTTCCGATTTTAACTAATTTTAGGCCATTTTTTGAGC +CGTCATAACTTTTTTCTGAAAAGTTTTCAAGAAGTTTCATTATAAAATTC +GGTGTTTTCAGACAATTTTGAGTCTAAGAAGCCAATAAAAAATTCGACTA +CACCACCTTTAAAGCTCAAAATAACGTCCAAAAATTCAATTTCCAGAACC +CACGTGCTTCGAACTTTTCGTATTTCCTCATCAACACGTGGCAAAGAGAA +ACTAATCGATACGGACGATTGTATTGCAAGAATGAATTCGACGTGCGTTC +TCGCATCGGAATTATTGCCAACTCATCACCAATACATGGACAAAATAATC +AATGAATACATGAAAAGTGCGAAACAACGATTCCAACAGAATCAGCGAAC +ACAATCGGATAGTCGGAAACGGATTAGTCGAAGTTTCAGTGCAAGCTCAC +TTATGCTCACGTGTATCGGTTGTGATTCGGTACGAGACCCACTTTTTGAT +GGAATTTCGAGAAAAAAGTTGAAAAAACGGAGAATTTGGCACAAGTAGCC +TCAATATTTGAAAAAAAAAACCGAAAAAATGGATGAGGGGAAGCCAAAAT +ACCAGAATTTACACCCAATTTTCAAAGTTTTATTGATTTTTTACAAAAAT +TTTCTCAGTTTTTCAGTAAATTTGCCAGATCTGACCATTTTTTAGGAAAA +CTGAAAATTTTCGGTGTTTTGCCAGAATAGCAACTTCCTGAAAATTTAGA +ACTAATTAATTTTTAATTTTTTGGGGGAATCATTTATTTCCTAACAGAAT +TTGATTTGAATTTCGAAAAAAACCGATTCTAAGCTTAAAAATTGACAAAA +ATAGCAATGAATGTCAAAAAAATTGCTAATACATAAAAATTGTTTGAAAA +AATACAAAAATAACAATTTTTATTTAAACTTTAAAAATTTTTCAAATTTT +AAAAATTGAAAAAATTAAAAAATTTTTAAATTAAAATTAAAACATTTTTT +GTTAAAATTTGAAAAAAATTCAAAAAAAAAATTTACATTAAATTTTTTTT +TTCAGGAATTCAAGCCGGCATCTCAAGTGACAAATATCATGTGTGATCAA +TGTTTTGCTTGGCAAAAAATGAGTGTTCTCACGTCGAATTGCGACCAATT +TATGGGAAATTCTGGGCCACCGTGCAAAAGTTCAACACTTCCGTCATTTG +GTAGTAATGATAGTAATCAGAATGATAAGGAAAATATTGTGGAGGTACTT +AATGTGGTGCCTAAAGATGGAGCCAAGACACTTACCAGGTATGGTTGAAA +TTTTGAAAAAATTAGAATTTTCAACGAAAACAGAAATTATTTTAAAAATT +TATTTTTAATTGAAAAATAAAAATTTCTCCCAAAAAATTTTCCCAAAAAA +AGTGTTTATGTTATTTTTGAAAATTATAATTGAATTGATTTTAAAAATTA +CGAGAACACAAAATTTGGAGAATGCGTATTGCGCTACATATTTGACGCGC +AAAATATCTCGTAGCGAAAACTACAGAAATTCTTTAAATGGCTACTGTAG +CGCGCTAGTGTCGATTTACGGGCTCTATTAACGAAATGAATTAAAATCAT +TTAGTTATCGAATAATACAAAAATCATTTCATTTCGAAAATCAAGCTCGT +AAATCGACACAAAAGCTACAGTAGTCCTTTAAAGGATTACTGTAGTTTTC +GATCCGAGATATTTTGCGCGTAAAATATGTTGCGCAATACGCATTCTTTG +AATTTTGGCTTCTCGTAATATACGAAGGTTGAAAATTTCAGATTTTTAAC +ACAAAAACTTATGAAAATAAAATTTATGAAAAGTATAAAAAGTTGAGAAA +ACAGAAATTTTAATTTTTGCGAAATCTAGATGTCAATTTCTTCACAAAAT +TTTTCAAAAATCGATTTTTCTTTCGAAAATTTTTTTTTGTAATTATTTTA +AATCAAAATTTGGCGATTTTCAAAATGTTTATTTTATATTTAATTTCTAA +AATTAATTATTTTGATTAAAAAATATATAAATTCAATTTTCAGATTGTAA +AATGAATTTTTTTGAGTTTGCCGTTGAAAAATAGAAAACAAATTATTTTT +AATAACTGAAATAACTGAAAATGTTGTTTGAAAAATTACATTTTTCCAAA +AAATCGAACATTTTTGACGCGCAAAAAAATTTGTAAAAAAAAATTTTCAA +CAGAAAAAACTGTAAAATAGGAAAAAAATTTACCTAAAAAGCGTATGTGC +CTTTAAAATGTACAGTAACCCCAAAAAAATCGATTTTTTGCAGTATGCGA +GCTGTGGAGGACGAAAACGGTGTGGTTCACTATTATATGGACGATGAAGT +AGCGGATTCGTGATTCTATTATACTCATTTCTTTTTTTGAAAATATATTT +ACTAGAGACCAGTTTTCATTTGATTTTTATCGATTTTTTTTGGATTCCAA +ATTTTTGTTTTTTTTTTGTTGCTTTTTGTGTGTTTTTTGCCGTCTATCCT +TCTTTCCTGCCAACGGGATTTTCTCTTCTTTTTTTTTTTTGAAAAACTCA +ATTTTCCCGCCTAGTATTGCTTTTTTCGAGATTTTTTCTTCCATTTTTCA +TATCCGCGCCAGCTTTTTCTCTCCCCTCCTTTTTTCACCCATATTTTCGA +GCTTTTTTCTGTGATTTTTTTTTCACCCCCAAAATTTTTTGTACCAAAAA +ATTGATGCTTCTTTTTTTTCTCTTTATTTCCAACAAAATGCTTGGCCAAA +GAAATAAACTCTTTTATAATTTATCATTCAAAAAATTTGAGACCAAATTG +AAATCCACATCCAGCAACTGCAAAGTGTCATTTGACAATGCACAAATCGC +ATGGCACGCCCCGTTTTCGTCGATCACCCGGGAACGCGTATTCTGGGACC +AAATGAGGAGGCGGTGGCGATCGGAGAAAATGAAGTATGGACCACATCTG +GAAGACACACCTGATTTTACGCGCAAAAATTTAGAAATGTATCGATTTTT +CGGTCAAAAATCAATATTTATCGATTTTCGGAGGGCACATGGGCTTCTGG +CCTTCCTCATTGAATATTCGCGCTCCATTTACACTCGCCTGCCGGACAAC +GCGTGGAAAAGTGTGGTGTACTCCACACGGACAAATACATCAGTTTTACA +ACTAAAATCGAGCCGTGACGCGACACGCAACGCGCCGTAAATCTACACAA +AATCTCTCCGACCCAAAATGGCCTAGTTCGGCAAACTCTGCCATTTCGAT +TTATGAGGGAAGCCAGAAATCCGTGAGGGCAAAGATTGAACAATCTGAGT +TAAATCGTTTCCATACAAAATTTTTTGAAAATTTTATTCTGAAACTGTCT +AAAACTCGAAAAGTTATCAATTTTCTGGTTAATTTCTGAGGAAATTCAAA +AATTGATATTTTCGAAAAAAAAAATCGGAGCCAATACACATCATTTGACG +CGCAAATGTTGAATTTTCAAATTTACGAGAACCCACGGGATTCTGGCTTC +CCTCATAAATCGAAATGGCAGAGTTTGCCGAACTAGGCCATTTTGTGTCG +GAGAGATTTTGTGTAGATTTACGGCGCGTTGTGTGTCACGTCGCGGCTCG +TTTTCAGTTGTAAAATTGATGTATTTGCCCGTGTGGAGTACACGGCACTT +TTCCACGCGTTGTCCGGCATTTGTCAATGGAGCGCGAAAATTCAATGAGA +AAGGCCAGAACCCCGTGAGAATCCTAAAATTCTGAGAATGCGTATTGTGC +ATCATATCTGACGCGCAAAATATCTCAAAGCGAAAACTAGAGTAATTCAA +TAAATCACTACTGTAGTGCTTGTGTCGATTTACGGGCTTTCGAAGAAGTT +ACTTTTTTAGTTTCTTCGTATTATTTTTTTTCATTTTTTGTCTAATTTTA +ATATTCTATCGATAAATGAATGGGTTTTTATTTATTTTTATTATCGAGCC +CGTAAATCGACACCAGTGCTACAGTAGTTTTCACTACGAGATATTTTGCG +CAATATATTTATAAAAGTTGACTAGCGGGCCCTGCGGGCCCGCCAGTTGT +AGGGGTGTAAGGCGAGTCCCCTTGCCGGGCGTAGGTTCTCGACTTCGCCT +CGAACCTGTTAGAGGGTTTGTGAAAATTTCAGTAGGTCAATGGGAGTCTT +CTTGTTTTTTGAGTTCGGTTTGACCAAAAACAGATGCACCCGATGAATCA +GTTAAAGCTGAGTTTTGATTGATTGAAGTTTGAGGAGGCTTTATATTAGG +GGAGACGTACCCATATTTTGTATAAAATTGAGTATTTCTATTCGAATCCC +GATTACTCACAAAAAAACAAAAAAAATTGACACTTGGTAGAATTGAACCA +ACTACCAAAATTTCTGCAGTCATACGCACTAACCACACGGCCATGCGGGA +GACACCTCAAACTGGGATGTAGGGAAGAAAATTTTCTGGAGGAAGTCGTC +TTTCGATTCCGCTTTCTTCAATTATTACTATTTGGGGAAGACGTTCGAAA +ACCGTTTATCACTGATAAGTCAGCGGAAAACCTAATTTTTGAAAATTTTA +TCACAGGATTGTACTCATTATTGAATTCCCGAAAAGGAGACGTACAGTTG +AGGGTTATATCTTGTACACAGACAGATGTATAGAATAAAACAAGTTTTGG +CCTGAAAATTAAAAAAAATAATATCTCTTGGCAGAGCATTGCTAACGCGA +CGAAACTTCATCTTCCATTAAATAAAATCAAAAACTATGAATTAAAAATA +CATTCCGCGAAACTTTAGTGGAAAAAATGTTCAGGGGACCCAGGAAACCA +CTCCCCCCAGTAAAAAATTTTTGAATTATTTTTTTCTTGAAAAATTTTCC +CACTGAACTTTTTACAAATTTTATATGTCTCGATGCGTCTTGATGAGACC +TACACGTCATTTTTTAGAAAACTAAGAAAACTTGAAAACTGACCGAGTTA +TGATTGAAAAAGTAGATTAGCAAAGATGGGAAAGTGTGCAAAATTTGGCA +CTTATTCGTCTTGCTCGGCCGACTCATAGTACTTTTTCCAATTCTGAGTT +AAAAATCGTGTTCAGCGTACTTTTGTACGTGGGATAAAGAAAAAAAATAT +CAAAAAAGATGAAGTAGAACTTGAGATAAAGACGAAAAACTACTTTTTCG +GAAAAAAAAATTTTTTGGCAAAATGTCATTTTTTGGCCTTTTGTTTTATC +ACAACTTTTTTCCATTTGCACTTATGAACTCAAACTTTTTTTCAAAAAAT +CAGTCTCTCTGAGTAGTATCTTGCACATGAGTTTGAAACAAAACAGAGCA +AAACCCAAATTTTAATTCAATTAAAACATGCTTTTTTGGGGGTAAAAAGA +GCAACAAAAAATTTTTTCAAACTGGGGAAAGCCGTCCTGGGCTCAATTTT +GCTCCGAACTTAGTGCCGTTTTCTGCTCCACTGTGGGGCAAAATATTTCT +AGTAGGATTTCAAATATTAGAGCATGAAGTCACACGGTTCAGGAGTTGTG +AATGAAAACGAAGTGGGACATTTTTTCGCAAGCCAAAAAAAACGCGAAAA +AACGCGAAAAAGGGGCGGAGTCTGTACACTCGGCATTTATTAGAGGCTGC +TTGGCAGAAAATTATCAATTATGTATATGTATAAGAATTATATTCGAATT +TCACGCGTAAGTTCCAAATTTAGCAGCATAGCCGCCTGAAGTTCGAAATT +TAAAATAAAAAAACCGAAAAGTACCTGACAGCCAACGTGGCAACAAATGG +AATCTCATCGACAAGTCCTTCAGAATTGAATACTGCAGCATTTGACGTAT +GATAAATGACTGCCACGAATTGATTTTCAAATAATATTTTATTGATTGAC +GTGGCAGTTTGGTGGCTGAGCGGTTTGTAGTCCAATGTTGGTAGAGTTAA +TAGGTAAAGACCACCCGTCTCGGTTGCCACACACGCTTCTTCGCCGCTTC +TGGAAAATTTGGTTTTGAATTTCTAGGCCATTGGGTTAGGCTTCGTTTTT +GAAATGGGTTAGAAATGTAGAAAATGGAAATTCTAGGCCATCAATTTTCC +TTTTGCATGTGTGTAGGAAATTGGCTAACATTCTAGAAAATCGATAATTT +TATAGAAAGTGAAATTTTCGCAAAATCAATAATTCGATAAAAATCGATAA +ACTTTGATAGAATTTTGTTCATCAAAAACCGATAATGTCCGAAAAATTTC +AGAAAATTAGATTTTTTTAACAGTCCGTTCATTTCGTGGCCTAGAAACTT +ATTTCTAGGCCATGGAAGAGTTTTTCCAAGATTCACGGCCACACGTACAC +TTGCATGCTAACAATTGCGACAGGCTTATTAGAAGTCGAAGATCTTCGAA +TTTTTCGCGACAGATCCGACACTGTGAGCTTTGTCCCGCCGAGTTTTTCG +AGTCCTGATGTTCCGATTTGCCATAAAATTATATTACCATCGTCGGAGCC +TGATAAGAGCCCGTGACGACACATTTCTAGGCAAGTTACTGCTCCGTCGG +TTATCTGGAATAGAGAACATTTGGAACTTGTCGATTTTCAGAAACATTTT +ACTGTACTTTTTTATGGGCAAAACATTGTTTTTTTTAATTAAACAAAACT +ACTGATAATTGTAAAAAGATCAATTTTTTAGTGTTTGAAGATCATTTTTG +AGTTCTCTAGCTACAAAATAAACAATTTTAGAGGAGTTGCAAAATTGTGA +ATTTTAAAAATAAATTGCACAATTTTGCCACTTTTTAATGGTTTTTGATG +GATTAAACCTAGATTTTCTGAATTCAGCATACATGAATTACCGCTTTTTG +ACAAAATTAGACAACTTTTTATTTTTGTCCAATTTTTTTCAGCCATCTAA +TGACTGTCCTTTTTTTTGGGCAAAAAAATAAATTTCCTAAAAGCGTTCGA +AACTACTATATTTTGAAATATTTTGAAATAGGACATTTTTTTAGGGCTCG +GAGATCAATTTTAAGTTCTCTAGCTACAAAATGCGGAAAAAAATGTAAAA +AAGTTTGGTACTTGTGTATTATACCCCCGCCATTTTAAAAATTAATAAAA +TTTCCATCAATTTATTTATTTAAATTAAACGTGATACCATTTTTCCTTGT +TAGGCTTAGGAATGGTTTTTTCCTAAGCCTGAAATTCCACACGTTTTTAT +TTAATGAGTTAAAAAATTAACTTTTAAAATGGCGGGGGTGTAATACACAA +GTACCAAAAGTTTCTAGGCCCCATTTGCTGGGCCACAATTTATAGGCTAC +AGTTTCTAGGACACATTTTCCTGGGAACATTTTCTAGGCCACATTTTTTA +GGCCACATTTTCTTAGCCAAAAATGTACTCACAAACTTAGCAACGTGGCA +ACAATTTCAAAAAAAACCTCATACCTTCTTCTCGAAAATTATTTTATCTT +TCGAACACATTGAGATATTTCCAGAAACGTCGCCGAGAATAAATTGGGTC +GATGAGTAGAAAACTGCAGAAATTGCACAAACCGGTAGAGGAATCGAGAA +GCTCTGAAATTATTCTTCATTAATTTTTTCATGCAATTTTAATCTCGGAC +CAATCAGCGTTTTGCCACGTCTTTTTTTTTTCGAACCAATCAGAGGAGTG +GGCGTGGCTAGATGCTGATTGGTCAGTTTCAAATTTTTTACAAAATATGA +CCTACCGTAGAAGAAGAGCACAAAAGTGCTCTCGCCTGATGGGAACAGAA +AGTTTCATGAGAGATACCGCCGAACAGAATTAACGTTTTTCGTGAAGAGC +TTGAAATCATTAGAATGATATTTTCATTTCGGGTCTGAAATAATTCAATT +TTAAAGGTACACGCCACCACCGTATCCTACTTCCACATGGTACTTCTGCA +CAGTTTCCAGCGCAACTTTATCGAATTTGTGAAATTCCAAAAGCCTGTCG +TAGTTCACCTTAGAATCGTCACGCACGAAATCGTAAAGGAGGTTCAGAAC +CTAAAAGTTTGAAAAATATGTCCTAGGAAATCAAACTGTGAATTTCTAGG +CCACATGATGGAAATTCGTATGAAAATTGCGACGAGCCCTAACATGTGGC +CTATTAATTGTGGCCTAGAATCTGCTGCCTACAAGTTGTGGCCTAGAAAA +AATGTTTCCTAGAAATTGTGACCATGGCCTAGAAATTTTGGCCTAAAACA +TCCGACATAGAAATTGTATGGCCTAGAAAGTGTGGGGGCAAAAGTTTTTA +ATTTTCAATTCGGTTTTTTTTTTCACAAAATGGAGAATTTTTTGATTTTT +ATGTCTGAAATTGAATGGGAAACGTGGAAATATTGCTAATAACCAACCTC +TTTGCTAATTGTAATATCCTTTTTCACATTCTCGCCGACACCATTATCCA +CAGAATCCGTCTGAACTTCTTTATTTTCTTGAAAATTTGCCACGCATTCC +ACGTCTTTTTTATTTGTCTCTATCGTCTGCGTCTCCGACGTCTTATACAC +TATTTCTCCCTGAAGGCGGATAGGTTTTCTCCACGTGAACTGGAATAGTG +TGACGTCTTCCATTATTAGCTGAAAATTTGAGTTAAAACTGCTCTATCAC +ACTAAACACGACGACACTCGTGGCCGCGGAAATTAGAAAACTCGGCCACG +TAACTTGGTTAAATTAATGAAAATATTATAAAAAACTTTCGAAAATTTGA +AACATTTTAACTTTTTTTAAAAAAAAACACATGCAAATGTTCCTAAACTG +GCTAAATAAACAACGCACATCCCCGGCGGAGCTCTGGTTTTCCATGGCAA +CCGAGAATGCGTGCGGGCCAACGGGGAGAATGTGGAGTACGTAGAGACGC +AGACAGTTGTATAAAAGGGGCGGCAACTTGAAAAAATAAATTTCTAGAGA +TTAATCCGTGCAAAGTTGTTTTTAATTACCGATGACCGAGTATGTGATTT +TTCTAGGCCACGGAACCAAATTACGGCGGAGTTCCGTTGAAAATATCGCT +TAATTGTTTTGTTTTTTCTACCAAATTAACTGACTGAAAATCCAGTTTTT +TTTTTCTGAAATTAGGTTCTTTCAAAAATCACCTGAGTGTTCCTTGATTT +GTTTTCGTAACTTTTTTCTCTCCAACTATTACACTACGGGTGTTTTGACA +ACACAATGGAAAACATGTCTGTTCAGTTTAAAGACTGACAAGAGTCTCGT +AGTCTCTCGCGGACTAGCTTACGGCCCTTGTGGTCTAGAATATTGAAAAA +CTTAGTTTCAGTATTGTGGCCGCGAAATTTGAAAACTCGGCCATCGATTT +TTTTTCCCACACCATGGGGCAATATTCACTAGACCATTCATTGTAACAAA +TATTTCAAAACTAAGAATTAAAAAAACTCCTACGCAATTTCCCCAGTCAC +TTGGCGGCTCTCACGCTCCCTATTTCAGATGTTAACCTCATTTGAATAAA +TTGCTTCTCCCATGTGTTTTTGGATGGGATACTCCATCTTTTTCAACCAA +TTTTTTCTCCTTACTTTTGTTTTTAACGGCTTTTTTTCATATTTCTGATT +GAGAATAGGTCAAGTGATGACGTAATAGAGCGCGTGGAAGAGGAGTGACG +TCACACGTGGCCTAGGAATCTCTGCGACCACCACATGAAAAACGTTTTAT +TTTCTAGGCATCCCTACCTACCCGTGTTTTGTCTCTATTTTGACAATTTT +TTGCAAGTAAATTCCCGGCTGAAACAAGGTCACCGCAACACAAAATAGGT +GAAGGGATGATCTCTTTTTTTCCAGAGTCAGTCAACATACATAAATTATG +TGTGTCTCTTCCATTTTCCTGTCGTCAATTTGTTCACTATTCGCTTCACT +AATTCCTGCTTTGCATTTTAATGTCCGTGCCCTCTCACTGGAACTGACAT +TTCACACAATGTTTTTTTCGGTGGAAAAAAGTTTTCTAGATTTTGATATT +TGCTGGAAATTTGGAAAATTTAGCGACTTGACCGTGCAAAAATTTTGGTG +GCCGAGTTTTCTCTTTGCGCGGCCACGTTGTAATAGTTGCCAGGTGTCGC +AAGTTTTTAAAATGACCGAGTTTTCTCTATTCACGACCAGATCTCTCATT +ATAGCCGACTTCCGACTTGCGTAAAAGTGCGACGTGGCCGAGAAAAATTT +CAGTGGCCGAGTTTTCACGCGAAATCACATTTTTTTAGTTTTTTGGTACG +TGGCCTAGTTTTTTAAAACCATTGTTTATCTCCACCCAATCTAAAAAAAT +TTCTGCGGATTTTTAAACCATGGCCGTAAAAAAAAGCCACTCTCATATAT +TTTTTCGACACCTTTCGGTTTTAGAAATATATTTTTAAAAATAAAATAAA +TGGCTTTGAAATCCAAATAGAAATAATTTTCCAAAAAAAGTGGAAAATTT +TCTAGGCCACTTGAAATTTTTTCCGAGATCTTTTTTGGGAAAGGAATTTG +TGGCCTAAAAAACAAAAACTCGGCCACTATTTTTTGACCATTTTTTCCAT +GTGACGTCACTTGTGACTTACAATTCCTTTTCTCAATTTCTTATGAGAAA +TCGCTTTTTTTAAGACTTTTTGTGATTTTGTTGCATTTTCTCCCAGTCGG +AGATTACGCCACGTGGAAATAGTAAACTCGGCCACACTCTTATAATTTTT +TGGCGTCTTTTGGGAAAAGTAACAAGTTCAAACAATGTAAATTACATAGC +CTAGACTTAGCTGACCAAGGCTTGTTTGAAATATTTTTGTGGCCTAGAAA +AATAAATATTCGGCCATTACTTTTTCTTCTCATTTTTGTCAGAAGACGTC +ACTGAAAACCTAGAAATCTCTCTTCCAGGGCTGCAAATGAGTATATCCTC +CATACCTTCGTCACCGTCAATATCGGGATCTTCCACGTCATCGACCGCGC +CGAACATTTCATTTTGCTATACATGGACCACTAAGGTAAGCGGCCTAGAA +AAATAGAAAGTTCGGCCAGCTCTTAAAAAATCGATTAATGATAATTTTCA +GTTAAAAACGCGAAAAACTGCGGACGGCGAGGCGACGATTTTATCGATTT +CTCCGAAATTCGCGACAGTCCACCAACTTATTTCGTTTCAATGGAATATT +CGAATTCATGCCACTTCTGAAATGGTAAGGCTTGTTTTTTTTTGAGGTAG +GCAGGCACGAAAGCGTCTGCTTGCTTTTCGAAAACACGTGGCCGCGGACT +GAAAAACTCGGCCACCAAAAATCTGAAACTGTGAGTCGCCCTGACAGAAT +TTTTTATTTTCTCGGCCAAACAGCAAAAGCCGACAAGTGAAGTTAAAAAG +GGAGGAGCATTTTTTTAAATAAAAAACTAGACTCCTAATGACCTCAAAAA +TATCATATTTTTAAATGTAAGAGTCGGCCACGTCCGCGGTCGATTTACAC +AGTGTCGCGGCTCGGTTTTCATTAATTGAGCACTGAGAAGTTTCACTTCT +CCAATTTCAATTTTCAGAACGAAGAAGACGAAGAAGTGGACTACGTGGCA +GTTGACCTTTACCTGGTAGATGGTCCTGTAAATGAGGTGAACGTTATGGC +CGAGGTGGGGGCTCTAGAGAAAACATCCACAGCAACGTTGGGAAGCCTTC +CCGCAGGTCAGAAAGAAGCAAAGACTATAAAAATGCAAAAGGGCTCTGGT +TGTGAGATAACGGATGCAGATCGAGAATGTGTGTCGAAGTATTTGAAAGA +GAACGTGGAGAATATCATCAAAATCTCCATAATTATAAATATGGAGACAC +GACTTTTCGAACCATCAACCTATTTGGATGCAATATCTCCAACTCCTCGA +GCATCATTTCTAACGGCAAATTACAATGCGAGAGTTAACAGTAAGGTATG +GAAAAGGCGATCACGGAAAAGGAATGGAAGAATCGAGCGAAGAGTGTTAA +GTGATAAAGAGAAGACGAAGTATGAGAGTAAAGTACAAATGATATTGGAT +GAGGAACGGGAGAAGCTGTGTGATAAACGAGAGATGTTGAAGGAGGATGA +TGGTGGATCGAGAAGAGCTTCGTTGATTTCTCAGTTCAGGGAGAATCAAC +ATGAATCTGTGTTGGATGATCATATGTTCAAGAGGGTAAGGTTTTTATTT +TTAATGCCTAAAACTTTGAGATTGAGGCAGTTCAATAGGCTATATACATA +TAAGTGTGGACCCAACAAGTTTGTTACATGAAGTATTTTTTCCAGATCCT +CGTTAGCTGCTGTGAATCTTGCGAGCAACGTCGTCTATCGTTCATGTGTG +ACAGTCGAACCGAGGATGATTCTGAAGGGGAAGATTGTGAAGACAATGAA +GAAGATGATGGAGAAGGTGGCTCCGATGATGAGGAAGACGATGGAGAGGA +GCCTCATTTTGAATGTGATAAGAGTGATAAGGAGCATGTCCACGATGTAG +GTCTCCTTTAAAATATCGTCCATAAAAGCTTAATTTTTTAGATGCTAGCA +AATCTATACTTCAACAAAGTAGTCCTTCCCGAAATGGAATATGTCGAGGA +TTTTGTGGATTTCCTGATTGATGCGGAGCTCAATGACTTGCCAGTTCTCA +AAAGAGCTTGTGAGAGGTATTTGTGCAGTGAGCTGAATTCGGTAGGCTTC +TGTTCAAAAAAATATAAAAATCTAATCGAAATATTTTCTAGAAAAAAGAC +ATTGGAACTTGTCTCCTGTTGGATTTGCTTTTCAACTCGATTGTATTCAA +CTTGCCTGTTATGAAGTCGATGACCTTGACGGAGTTGGCGAATCGAACGC +ACGAGTTTGTGGATGCTGACACACTTTTGGATCAGGAAGAGTTTAAGTGA +GTAGGGTGTGCGGCAAATTTGCCGAATTTGCCGAGCACGGCAAATTTCAA +AAAAGTAGATTTGCCGAATTAGCCGAGCTCGGCAAATTTCGAAAAACTTA +TTCATAAACCAGCAGTGTGTAAAGAATTCAGTAGTTTTGGTGCTCCAAAA +ACATTAAAAAATATCAAATTTTTTCGAGTTTGTCAAGCACGGCAAATTTG +CCGAATTTGTCGTGTTCGGCAAATATTGGAAAAATAGATTTGCCGAATTT +GCCGAGTTCGGCAAATTTTGAGATTTGCCGCACACCCCTGGTGTGCACCG +TTAAAAATTCTTTTTTTCCAAAAATCTAAAACTTGAGGTTAATAATGTGA +CCGAAAAACTGAAAAAAAAAAGAATTTGCCGCTGCAAAACCATAATCCGC +CAAAAGTTTTCTTTGTTTTTTTTTCGAATTGCAGGGTTTCCGAATCTGTC +AAAATTGGAAACACGACAGTTTGCCGAAAAAATTTAATTTTTGAGTGAAC +TGTTGATGCGAAATTCATTAAATACGATTTCAAAAAGCTCGGTCACGTAT +TTGTAATTTTAGAGTGGGATTAGCTATTTAAAAATTACATTTGTAATTTC +CAGAAACCTGGATAAAAGAATGCGAACGCTTGCCGATCGGAATCTCGTGG +AGCTCATCGAGCAATGTGTTACGTTTCGTGACCAAAAAGCAAGAGTTCGC +GTTTTGCCGACCGCCGAATCATTTGATATTTCGTTCGACTTAACTAATCA +ATAATATTATCGTGTTACTTTTTTCCTCTGTTTTCTTTCTCATTTTTAAA +AATGAATGCTTTTTTCTTTTTTTTTACCCGATTTTCCACACGACTAACTC +CAAAATCAGCCGCGCCTTTTTGTCTTTTATCCCTGCTTTTCCACACATTT +ATCAAATAAAGAGTGCAACGAAGTGCAAAATGGGAAGTAGTAGTGCTGCA +ATCTCATTTAGAGTACACTTTTGGAGCTCTTTTCTCCAGATATATTATAT +GCTGTAGATGGAAACTTTTCCAGTTTATATATAATATTTTTCATTTTCCA +TATTGCCCTTCACTGCTCATCATCCAATGGGGTGCATCTAATTACGGCAA +AGCTTCGCGGATGGGGCGAGTAATGAGCAAAAGAGCACAAGGATGGGGCT +CTTTTGTTCCTCATTTTCATTGCTTTTTGATTTCATTTCCTTTTCAACAT +GTTTCCATTTTCGAAATTAATCGGCTTTTCTGACCCAGGACTCACGGGAG +ATGAAAATTAATCTTTAAGAATTGTGCTTTGAAAAACAATATCATACACT +TTTCAAACTTCATTTCTCCGTTGTTCCCAGTAAATTCCAAGCTGTCATCC +ATCAACTCTCTGATAAAAAATAATCATTTGGTCCGTCCGTCGCGAAAATG +AAGTGAAAAAAGTTAATCCCGGCATGTTGAAATACACATTTCGTCGCGTG +GAGTATAGTTATGCTTCGATTTCTCAAGAGGATTCTTCTTTCCCCCTTTT +CATATTTTTTCGGGCAAAGAAGAAATCTCTGTCACCCCCCGACTAAAAAA +CGCTAAAAATTCAGCGTTCATTGTTCGGTTTTCCGTTTCAAAAAGAAAAA +AGTGTGTCAAAAACTCAAAAAGTCACCGCCTAAAGTTAGTTAGCACCTTT +TTCCATTGTGAAGATTTTCTTTCCTTCAATACAACTACTACAGTTTGTTT +CCTGTGTGTCGTCCTATTGTTGTGGGTGTAATTGTTCGATAGAGCGCGCT +TTACAAAATCTCTCGAATTAATTGAAAGTAGTGTGTTGTAGAGACCTTGA +CTCCTTTGTCAACACTGTAATTAGGAAGGAGTACACCGCCGGGCGGGTGA +GAACAGTTATTATAACGATAGTGGTTTGTTATTGTTTGTTATTAATAGGT +TAGTTGTCTACTTTAAACGTTTTTTGTTTGAAAAATTTCTGTTAATTAAA +AAATACATAATTTATAAAAATGATGAGTTTTTTTTTCAAATTTCTCAAAA +ACCGAACCCATGGGGTTTTTTGAGAAAAAATAAGAAAATTTTTCTAGTTA +TTCAAAAAATCTATACATTTTAATATCTGTAAATTGTAAATTTGGAAGAC +AAAATTTTGACAATGCGTATTGAATTTGAGTTTATTTAACTTAAAGAAGA +GAAATAGACAAATAAATAATTATATATATTGCGCAACATATTTGAATAAT +ACATTTATTTTTCGTAATATTTATTTTGAAAAATGATTGAAAATAAAACT +CGAAGAAAACTATATTTAAAAATGTCGGTGCTTTTTTTACATTTTTTAAA +CGAAAAAAAATGCGAAAAAAACCAATTTTCCGAGTTTTTTCTTCAAAATA +TTCGTCCCTGTTCATGAAAAATGTTCTCACTTATGTAAGTAGTTGTCACG +CAATGCAGTTGTTGACCAAAAATTATTTTCCGAGTAAACCCTATAATTTC +CAAATTCTCTTCTTCTGAAATTTTGAACTTTCCATCGGGAAAATGTCATT +TCAACTCGAATTCACACTAATTATACATCTGTATAATTATTTAGATTCGT +ACTTTTTCCTCCTTTTGAAAAAGTGCACAGACACCGCCGTTATCTTTACA +ACCGTCTGGCAGGACCGGTGTTGTAGGCTTTAAATAAATATTTTTCCATA +AATTTCTTAATTTTTAGCTGAAACGATTTTCCAACAAAAAAAAACGCACA +TTTAAAATGGTTACCGTATTTCTTCTATCAATATGGCTGCAATACTATTT +TTCGATGGTCTTCCCGCTTGCAATACTAATAGGGAGTGCAAGACTATTAG +GGAGTGCAATACTAATTTTCAGAACATTTTTCTGACTGTGAGCTTACTAT +TTTTTTTTCTGAAAAAACTCGAACCTTGTGTGAAAATTCAGAAAATTTGA +TTGTAATTGCAACAAAAAGGTGTAATTACTTCAATTTCATAGAAATTTTT +CCAAAAATTGTTGCACAGTAGGCAAAAAATGTTGTTAAAATCTCAAAATT +AGTGAGGTGATTTTGTACCAAAAAAAAAGTAGACGCAAGACTATTAGGGA +GTGCAACACTAATTGGGAGTGCAATACTAATTTTCGGAGGGTGTTCAAGG +AGCAATACTAATAGGGAGTGCAAGTCTAATAGGGAGGTCATATTAATAGA +AGAAATACGGTAAATTTTATATTTTTCGGTGCATTTTCATTTTAAAAACA +ATTTGTTTGAAAATAATTATTTTTAAAAATCGTTTTCTTCAAATTTTTTG +TAATTATAAGAAACCAAAAAACCTTTATTTAATTTTCAACAAGTGTATTT +TCTAATTAAAACAATTATGAAAATTCACAATAAAAGTTCAGGACTGAATC +CAAAAAAACTTTTATTTCAAAAACGAAACACCTTGTTTGCCAAAAACCTG +GAAAAGCCTGGAAACCTGGAAAATCGAATAACATCAAATTTTGCAAATTT +TAAGCGTTTTCTCGAAAAAGTTGACCTGAAAAAATAGGATTTTTCAGTTT +ATTTTATAAAAAAGCGAAAAAAAGCAAATTTTTTGAGAAAAAAATTGCCT +AAATTTTCGAAAAATCATTTTCAAAACTATTCACGATTAAAATTCGTTTA +TCCATTTCTCCTTCAATTCTTCCAGATTCGAATGAAAACTTGCTCAGAAA +AAATGCTAAATTTGCTCTTTTTCGTATCCACTACTACTGCTACTACTAGT +AGAAGTTCTTCATTCGGTGGCCGGACTACTTTGAATCAAATCACATTTGT +CGGGAAGAAGGAGGTCGTGCCTTTGCACTTCTGGTCACTCCAGAAATTGA +AGAAATTGAGCAGCTCGAGTTTTGGAGTATTCCAGGCAACTCGGTCGTTT +TTGAAAACCTATTCTATGAAAACTGCAAACTATGTGGCTTCATCTGGTCT +GGCTGGAATTTTTTGTTTCAAATTTTGACCCAGTACCTTGAGAATCAACC +AGTTTTCCACGTATGCTAAAATCCGAAAAAGTGACTCACACGAGAAAGTT +TCAATTTCCGTGATGACGTTCGCCTGGCTCTTCAGCTCATTCTTTTGTCT +CCAGAGCTCTGGGGAGCTGCAAATGAGTTGAGAGCAAGCACATTGCGTGA +GCAAGATAGCTCCAAAAGATATGAAGAGAGCAGGCGGGAGATGAAATTGG +ACGATGAAAAGTCGCCAGAAGCATAATTTTGGTGCGGCGACACCCGAATA +ATGAGACATTTACGACTTTTTGGAACATCTTCTACGGCCAGAATGTTTCG +TAACATTTTGGTAATTTGAGTTGTAGTTTTGAGGGGCTTTTTTCGATTTT +TAAATTTGTATTGACTCTTATTCGCAAAAAAAGAGAACTGCTATAGCACG +TGTAGTCAAAATGTCTTTTTGATCTTCAAAAAATGCGGGAGTTAAGACGC +AGACATCTCATCTGATTCGCATGGCTAAGAGCGTGCTGACGTCACAATTT +TCTGGAAAAATATTCCCGCATTTTTTGTAGATCAGGCTATGATGTGTGTG +TTCAATGAGACAGCCTGACACGACGTGATGTAGTTCACATTTTTCGGCAG +TTTTTTTCCTGAAAAATGTGGATTTTCGTTTTTTCTTAAGTTGTCAAATT +ATCGAAAGCTACTAAGCTTGAGTAACTTAGGAAAAGTAGGATTTTAAATT +GTTTTCACGGGCCTCGCAACGAAAGATCTCACTACTATCACCAGAAATCC +AAAAAAACGAAAATATTTGTCGTGCCGAGCCCAGGCTCCGTATTTTTGAA +GCGAAATTCCTAAACTCCCTAACCGCTGAAAATTTCTGAGTTGATTTTCC +ATCACCTTCTATATACAGTCTTTCGACCCGAAAACGATTTGAATTCGGCT +TTCTCCTCAAATCCTCTGTATTGCATCCTCTTTCTTAGTTGATTTATTTG +TAGCGGATTCTTCCCCTTCCCATCCGTGGCGTTATTGTTCGGTTACGATA +CGCTTCTTGTGAAGGTTATATGCAAACGACTACTTGCCATTCGATTCTTT +CTCCAAAAACGGGTTATTCCATTTCCTTCAACTAAGGAGCTGTTTTTTTC +TTGGTTTTTTGTAGAAAACCTACATTCAAAGTCGAAGTAGTGAAAGTCAA +AAATTACCAAATTTCGCAATTTAATTTTTAATAAAATGTCGCCCGAATTA +ACGATAAGTTAATAAGAACAAAATTCAGTTAGTCCGAGAGTCGTGATTTG +CCGATTTAGAACAGCATTTAGCTGAAAAAAACGATTTTTTTCCAAAGTTT +GAACTGCCACTACATTTTTCTCAAGTATTGAGAGTTTTGAATTTCCATCA +TTTAGTTGTAACCACACTTTTCTTCCGATTCCCATAGCAAGCGAAAGTTG +TCTTGTTTCCGAATATCTGACCTAACTTGGTTTTTTTACGACTGATGCCG +AATTTTGAAAAGTTTATTAGAAACTTTCCACGCTCTCCCCTTTCCAACCT +CGCAAACCCTTTTGTCTGTAGTTGGGATCTGTTGCTTTTTTGGGCAATTT +TTTGCCAGCTTGATCTACATATCATTGCTTGATCTACATATCTTGAGCAA +AATTCCAAAAAGTACGTTTTCTTGTAGATTAATTTGTGGGTTTTTTTTGA +GAGTTTTGAGCTAAAACTTGCATGGCATATTTTTTTCACTGTTTTAAATC +ATTTACAAATTATTTTTTCCTCTTTTTAGAAAAAAAAAATGTTTGTTAAA +ATTTTGGAAAAATTTCAAAAAGGTGTTTTATCCACTTCCAAAACGTCATA +ACTTTGCTGAAACTGGACCGGGGCAGCTAAATTTTTCGGAGTGATCATTA +TTTTCACTCTTTTATCGAAAAATTACTATGGCATGCCAAGCCCGGGGACC +CATTTTTTCACTTCCCTTGTAAGCCGAAGCCTTAGTCTAAGTCTAAGCCT +ACGACTGAGCATAAGCCTCAGCCTAAGCCTAAGCGTAAGCCTCAGCCTAA +GCCGAAGCCTAAGAATAACGGGAAACTTTTTTTGTTGCTTTTTTTTGTTT +TTTTTCAAATGCCGAAAAATAATTGCGGCATTTGAATTATGACTTTGTTG +TTCCTAGAGATTCTGGTTTTCAAAATAGGCAAAAATGCCATGAACCTTAA +CGATGTTCTTTCTCCCCGCCCTTTGAACACTCTGAAGTTTCCCGCCATGT +CAACCTTTTTTGATCGTTCGGCACGATAACGTTCTGAGTGCCGCGGTGCC +CGTTAATTTCCATCGAATCGTTAGGTTTTCTCCTTCGATTCCTAACGATC +AAGCCATAATTTATCCGCAAAATGGGATTTTCTCCCTTTTGTGATGATGA +TGATGATGGCCTCCCTCTTCTCTGCGATGACAGTCGAGTGATGAATGTCT +TCCTTCACACTTGTCCGTTTATCATTTATCACTCTGTGCTGAAAACCGGT +CTCTCTCTTCCCCTGACCTCTTCGGCTTTGTGAAACAGTGAAAGAGAAAC +GGAAAACAGGCGGTGTCAGATGGTTGACACGTTGATGTCGTAATCCATTT +AGTATTAATGCTACTATGTACGCCATTTGCTATATATGCCCATATTTTCA +ATGGCTTTGAGCACAAGCTTTCAGAATTGGCAATCAATTTCTAAAATTTT +AAAATTTTGAAAACCAATATATGGGTGGAGCCTATTTCTTAACTGTTCCG +TAGTTACCGAAAAAGTCGGAAAGCGCCGATTATCTAATACTTTCGAATTC +GGCAAAACTGAAATTTCTTCAGATTTTTATGATTTTGAGAGAAAAAATTA +TATTTCTTTGCCAAAAAACATGTATTACTTAAAATTTGTTTGTTTTATTA +TTTGAGCTCAATAACTATCAAATTGACAAACAATTCATTGGCAAAAATGC +CGAATATCGTAAATTTGAAAAAATCGGCAATTCCCGAATTGCTGCACACA +TCTTGGTTTTTTGAGCTAATTAATTGAATTTTTAAGAACTTTTATTTTGG +TAATCTAAATTTTAGTTACTGAATTCATTGCGAAAAAAATTTCTTCAGAA +TAGGGCTTCCATGTAGGCGTCAAAAAGCTTGCCAAACCTTAAGACGACCT +CCGCCTGCCTACCTCAATCTATGTGCTGAAACATATGCGAACTTTTTTTG +CAATTCTTGATTTGGTTTTCATCAAATTGATAGAAATGAGAAAATTTAAA +ATTGAAAATTTACGCACAAGTACAAAAAAGGATTCAGGCAGGCGTCAGGC +CGTGAAACCCCGCCTGCTTACCATGGAAGCCCTACTGCAGAAAATGAGAA +ATCTGATGTTCAAAAAAAAAAAGAAACCAGCCGTGTTTTTCCTTGAAATT +CGAAAAGTTCCCCGATTTTCCCCTCATTTCCGGACCGTTTGTTCCCATGG +TGGACCGGTATTTTCACCACTCCGTCAAAGGCGATTATCGGGTCTGTCTG +TCGTCGACGACCCATCACACCAAATCGGTTGAATTGTCGCACCCCTTTTT +TAATCGGCAATATATTTACTTTTTATTTGTAACTAAGTAGTAAACAGTAA +TAGTTTTTGCAAGAATATGCATAACACCTTATTTCACTTGGTAAAGTAAT +TGAACTGTTGTAATTGAGCTGAACATGATTCACCTACGAAAAAATGCCTG +TGACCTAGGGCTTGCATGTATGCGTTAAAATGGCTGCCTGCCTGACCTGA +AGGCAGCCTCCGCCTGCCTTTTGCCGGTTTATGCTAAAAACACGGTTTAT +GCTACGTGAAATTATTTTTTCATTTTTAATTTGATTTTCAACTTTTGAAA +TTTGAAAATTCACGTCAAAATGCAGAAAAGAAGGTGAGGCAGGCAGGTTT +CAGGCAGGCGTCAGGCCCTGAATCGGCGCCTGCCTATCATGAAAACCCTA +CTCTGAACTCGATTTCGCCCTCCAAACATCAATTTCCGTCATTCTCCTTC +CGTCCTTTTTACCCACTTTTTTAATGAATTTGAAATGAGTAGTCGAGGGA +AATTTAGTGAACCTACTAGAAGTCGCCACGGTTTTCAGCAGCAAATTTTT +TAATTTCGTAGAAATATGTGCTTCTGTGTTAAAATTAATAATAGCTAAAC +TATTGGATAATGTACTTTAAGACGTGCAATGTCACTCCATATTTTATAAA +ATGTTGAAAATTTTGGAAGGGATCGAGGAAGTTTTGATACTTTTTAGAAA +TCGAAAATTTTCAATTTCCGATTCCCTTCAACATTATATACCTCAACAGT +TTAATAAGATACAAAAAATCTTCAGAGAGTCATCTGTTCGCACACAGTTT +TACCCCTATCAAATTACAGATGTGTTGCCTTGCACATCTGTGAGAACTTC +ACACTATTCTGCTACAACAGATTTTCGGAACAGAAGGTGTGATGTTTCTA +CCTGACTATGAAGATTCACGTGGGCGGCAATCCAAATTTTTTGAAATTTG +GAGATTTTCGAGTTATCCATAAAAGATTCCGTTAAAGAGAGATCACAATT +TTTTTTGTAGATTGACAAGAAAATTACCCCAACAATACTCAAAACAATTG +TAATTACAATTGAATTATTACTTTTCAATTCTTAATTTTTTAAGTGCTGA +AACTTTTCTATTGACGTTAACCCCTGCAAAAATGCAAAATTGATCAACAT +CTAGTAAATCTACTAACAAGGGAAGTCTTTGAGGGGGTCCGTAGATTTGG +GGTTTTCATGCTAAAATTCCTACAGAAGAGTGTTAGTTATGATCTCTCCA +AACATTTTTGCGCATTTTTAAAGTGATTTTATTCTTATTCGGGAACCTAG +AATCATTGTCCGCACTTTTTGGAAATTTTTATTTTTTTCATTTTTGCTCA +AAATTTCTTGATCAACTCCAAGCAAAAAAATCAAAGTTTTTCAACAAACA +TTTTGTTAGTTGATCATTTTTCAAAATAATTTATCTCAACGAAGTTATGC +AACTTCAAAGTTGGTTAAATATTTTGCACAAAGTTCGTGAGATGTAGATC +ATTTCGACGGTTTACTTGCGAATAGAGAGTTAAAACTTGTGTAATGTACG +TTTCATACATTTCTGAACAATAAAATAGCAATTTCATACATTTTTACAAA +AATAAAAAATTTCCTATTTTTGCTTGGAGTTGATCAAGAAATTTTGAGAA +AAAAATGAAAAAAATAAAAATTTCCAAAAAGTGCGGACAATGATTCTAGG +TTCCCGAATAAGAATAAAATCACTTTAAAAATGCGCAAAAATGTTTGAAT +GAGTTGTTTAAAACTTTATTCAACTTACAAAAATATTTCAAAAATAGAAG +GAATCGAACCAAACCTTAAATATTATCAGACGCGCGCACTCCCAACTCGG +CCACCGAGGACAATTTTCAACTCAATGTGGTGGGTGTCACATTTTCGGTG +GTCACGCAAGCTGAGATTTGCGTGGACTGCATGGTAAGACAGTGGATTTC +AATGGTGTTTTTAACTTTCAAAACGTCATAACTTTGCTGAAACTTGACCG +GGGCAGCTAAATTTTTTGGAGAGATCATAACTAACACTCTTCTGTAGGAA +TTTTAGCATGAGAACCCCAAATTTACGGACCCCCTCAAAGACTTCCCTTG +TAAGTCAAACAAAAATTCTGTATTTTTCAGAGTACTATACATAGTATATA +TTTCTGAAATTCTCTCAACAAAATCCTTTCCCCTTCCTCATTTTCCCACG +TAAAGTAAAGTCAAAATACATTTTAATTACCATTAATACCTAAATGAACC +AAACAGGTCGGTCTTGCCCTCAATTCTACCATTTAGTCTCACGGCTTTTG +TACTTGTTCACCCACTTCCCCCATTCTATAATCCAACCACACAACTTTTG +GCAACAAAGTGTTCAATCGAAATGGGACACTATTTGCTATTCCTATAATT +GACAGGCTGGTGGAAGGAGGAAAGCAGCACACGACGAAAGGCATAAAACA +ACTTGGCGTTGTGTTAATCCCACCCGTATTGCATTCTATATTAGGTCGAC +AAACAGTTCATTCATGTCGATTTTCGATCATTTCTCAGAGTTTTGCTTCG +AATTGTAGGTTCATGGAGTTAGCCATTCTGTCTATTTTTTCCGCTGAATT +TTAAGATGTTTAATACTTATTTGCCACCTTTTCGACGGAATGACATGTAC +CGGTCAAATCTTGGAATCGATCTAAGAGATGACAATTTTGAATGTATATA +CTCAAAATGGGCTCAAATGAACGAATTTCGTAATGAATTTTTTAAAAACT +TTTTTCAAAATTTTTTATGGTGGTTCAAAGTTTCGAAAAAATTGACCGGT +ACATGTCATTCCGTAAAAAAAAACGCGCTTCTATCTGAAACAACAATTTT +TTTGATAAAAACTTTCAACTACAAACTTGTTCTTTACAAAAAGATCAACA +ATTTATTAGTTGAGCACTTTTCTGTACAAAAAATTATCCGCGCAGATATG +ATCTACCAAAGTGAATTTGGAAAATTGGCCCGTCAATGCTTCGTTGCAGT +GCTATTTTAGGATCTTTGAGAGCTCGCCGTGAGCTTGGCTCTGGAGATTC +GCAGCTAAAAAGGAGTAACCGTTTCTTTAGGAGTAACCGTTTCAAGACAT +GGGCTATCGAATGGCATAGGTCTCATATGCAAGTCCGATGGGCACCTTCT +GACGGTTCCCTAGTCAGATAGAAGCCAAAATTTGGGCGAGGTATGCCCAT +TGTCATTCTAACTGTCGAAAAAGTGGCCTCCGCCAAGACGTGATAACTAT +TTCTCTCAATTACTCTGCACTCCCAATAAATCCTACACACCGTTTGAATT +GAGTCATTCAAGTGTCAGTTGTTCAGTTTCTAGCAGTTGATACGTGTGTG +TGTGTGGTGGTGGTCAACCAACTGTCGCCCTTTTGAACACACACACACAC +ACATACTTTTCCTGCCATATTCCTCCCCAAGAAAGAGAGAAAAATAAGTT +GCATCTTTTCTTTTCTATATCTCTATCCACTAACACACCCTGTTATTCGC +ATGTTCTCTCCATTCTCCAAACAACGCAATCAGTGTCCGCCTAAATCTAC +ATAATCGATTCAGGGGGAATTAACTGTGTAGTTTTGTAATACTTTTCCTT +AAGGTTCTTGAAATTTTATATTTATAACCGACCGATTATGCCTAATTATA +ATTATAATTTAGAGAATTTTTCAATTTAACAACTATTGCGATGTTTTGAA +TTTTTGATAAAACCGGAGTATTTTTTCAAGAAATTCTGTTTAAATTAGCT +GAAAAATTTTGATTTACTAGTTTTTGGTTTGCCTCAAAAAAAATCGGTAA +AAATTTTTTGGGGCTTTTTTCTCAAAAAAAAAAAAAGAACGATTCAGGAG +ATTTTTCATAACTTCGAAACTTAGTGCAAAATCTTTTACGGGGTGTCGAT +TTTTTGATATTCGAAAAATCGGGTTATTGCTCTATTAAGAAAAATTATTT +CCTGGACATATTTTCTGAAGAATTCCTCGTCTTTTCCCATTTTTCTACAA +TTTTTCCTGATTTTTCATAAAATTGAAACTTTCGCTGTTCAGTTTTCCCT +TCAAAATGAATTTTTTCGACGACGAATGTTCAGAAAGTTAACAAATAATT +TTAAGATTTTGTCAAATTTTCACACACGTACTTTTCCACCAAAATTGACA +AAAAACCCTAAAATTTAATTTCTAAAATTTTCAAAAAATCGTGATCCATT +CTTCCTTATCATGTTATGTCTTGCAAGCGTTGCCTCAAAGTCATCAATCA +GCTAGTTGTCATCAATTCAGCGATACTTAGAGATTGAAAGAATGAGTAAT +TCATCCTTGTTTCTTCTTCATCTCCTCACTTCACTTCCAATTTTCCATCT +GTTACCAATCAATTTGGCCTGGCGATTAGTCATCTATTTGTTCGTCAAAT +TTGGTGCAAATTGGCTCGTGTCACTGATAAGTGGTTATTTTTTCTTTTTG +GTTGATAAGGTTACCTGATAAGGTCATTTCCTTTTTGCGCCTTTTTTCCA +GTAGTAATTCCTCTAATGTTCGTTTTGAAGTTACTAATCGAAGTTGTTAA +AGTATTAGATTTTTTTAAAATCAATAGGAATGTTTTTTTAATACAGTGTT +AACAAAATTTTATGTTTTAAAAAGTTAAAATAAATTTTAAAAAATCTCTG +AGTTGAATTGTTAATTTTTAAAACTATAAACAGCATTGGCATTCATAAAA +AACAAATTTCGACAAAAAATTCGTAAAAATTGCTGAAATCGCACTAAAAA +TACTCAAAACACTGAATTTTTTGCTCATAAAATCTGGAAAATCAAGTCGT +TTTACTTGATATTGCCATAGGAAATTACAAAAAAAAAGTGATTTTATTAA +AGGCTGGTCAAAATTTCATATTTGAACAACGTTTTATAAAACCGCTGTCA +GAGTAGAACAATATAGGTGTAAAATGTAGTTAAAAACTATTCTTCGATTT +TCCTACATTTTTTTTTCTGTAAATTGAAGTTTTCTCGTAGAACAAGAAAA +TTTAAATTTAAACCAAGTTTTTAAATTTTCAATTAAAAAAAACTACAATT +TTCGATTTTTAATAAAACATTTTAAATCCCAATTTTTCTTTCAAACATAT +TTTCCAATGCTGATTCTGAATCCATGAAGTCTTAACACTTCAATTTCAAC +ATTCTGTTGCAACAAGTCGTCCCATCTGACCAATAACAAGGGCTCATCAA +TCACTGACGAGGGCCCCCGGGCGCCGCCATCATTTTCCTCTCTTTCTCTC +TCTCTCTCTCTCTCTCTCTCTCTCTTTGTTTCTAAAAAGAAGGAAAATGA +GCTGCAATTTATCAAATCCACGGTCTTTTTCCCTCAAATTCTCTGCCTCT +CTAGTCTTCTCCCTCATGGTCATTGTCGCCACATTTGTGACTCACAAAAT +GGGCCGTATTATACAAAAATAATTAAAGTTCAATGCGTATGCGTGTGTGT +ATTTGTTGCTGGGTGAGGGCAAATGCACATTAGCCACCCACAACAAACTT +ACGTTTTTTTTTTTGTATTTTGCCTTTTTCTTCTTGAAGAATGGTGTCAC +ATCACATTACGACCTTTTCTTTGTTACGGAGAAAAGAAGATTGGAGCGGA +ACTGACTGAGGTGTGAATACTTTGTTGGTTTTTTTGGCTATACCTATTAG +TTACCTTATTTTTGAGAAAAAAAAATTTGAAGCATGTTATTTTTCGAGTT +GTTGGTAGGATGTTGATAATTACTGTTATCGTGATTTTCTTATTAGAAAT +TAATTTTTATTTATTTAGTAGAATTGGGGATTTTTATTCAAATGATCCAA +AATAATCTTTCGACACTGAAATCACGACCATTTTTGAGCCAATCAACGTT +TTCAAACTCCGCCTACTCTGTCTGATTGGTAGGAAAAGTGGGCGGAGCAA +ATTGCTGATTGGTTTCGAAGTTCTCGACTTAGAAATACCAGAGTTCATCA +AAACTGAAATATTGCGTTTTTAAAGGATATATTATTATTATTATTATTTT +CTCTCGATTTTTTTATTCTGTGGTTGCTCTGAATTTCAGATTATTCGAGA +TAGTTTTCGATAAAGCGACAGATTGTTCAGTTTCTGAATTTTCCATAATT +TATCCTCTAAAAGTGAAAGTCAAGGTTTCGTTCTTTGCAAAAAACTCATT +TTTTTGAGAGTTTTCGGAATTTTGGCTCGCATCACGCTCCAATAAATTAA +TTCAAAATTTCGGAAATTATTAACTTCTTCCATTTTATCAATACTGTGTT +AATCTCACTTTTCTAATTTGAAACAAAAATTGAATTTTCATCGATTTCCA +AAATACTTTACATGCTTGGCAATTGAAAATTAGTTTTGTTTTTCAAATTC +TCAGTTTCCAGCCACTGAAAACATTGCGAGCCTCCATTGGGTCCGCTCCT +TCCTTCCGAAACACATTTCCCAGTACTCTTTCCAATCTGGTCAACATTTG +AAAGGCTGTTGGGGGGTCGAAAGGCAATAAACAACCGAAAAATAGTTTAT +TCCAATGTCATCGAATCCTCCCTGGCTTCTCATATTGTTGCCAAAAATGA +TGATGGAAATTGTTTTTCGTTTGTATACACACACATCATCCTGTGCCACA +CCTTTTCCCCACTCTTTTTCCCGACGAGCTTTTTCATTCGGAAGGCGACA +CAGATAGCAATTACCGACATATGGAACCAATCGTACTCTCTCCGCTGACT +GCGACTACCTAATTAACCTTTTATTGAAAAAATGAGAGAGAGAGAGAGAG +AGGATATGAACGGAAGAAGAAAAAGAGGCAAAGCAATTGTTTACTTGTTT +TTCGAAACAATTCAAAATTTTGTTTGGGCTGTGCGCCTTTAAAGATGGAA +AAAAAACGGAAATTTTCCAGCAATTCGTTAATTTTTTTGTCGGCAAATTT +GGCAAATCGGCGAATTACCGGTTCGCCGATTTGCCAGAAATTTTCATTTT +CGGCAACTTGCCGGTTCGCCGATTTGCCAGAATTTTTCATTTTCGGCAAT +TTGCCGGTTTGCCGTTGCCGGAAGTGTTTAGAGGAATTGTTTATGAGACG +GAAGCACTGTGCCTGTTTGCCAATTAAAATTGAAATTCTGAAATTTCCAA +AAAAATGTGCACAAGCACCATTTGCCAAAAACTTTTTCAGTTGCCGGTTT +GCCGATTCGCCGGAAAATTTAAAATCCGGCAATTTGCTAATTTGCCGGAA +AAAATCGTCTACCGCCCACCCCTCCACCGAATTCGGAAATTTTGTAAAAA +ATAACTTTATTTTTGGGCCAATACGATACTACTTTTCCTAAAAAGAAGCT +GTTACAGTAATCAATGTATCATCTTTTCCATCTCGAATGGTACCTGTGTT +TACCTTAACTGTCACATAACTGTGAAGAGTACTTGAGATGACAAGTAGTC +TTATCAGAAAACCTCCCCGCTTACTGCTTCTCGGGTGTCGGGTTACCGTG +TTTGCTGAACGGTTTTTCGTAGGGATTACTGTAGAGAATTGGATGAGTAG +TTGGGATTAGGCGATTAGGGGGAAATTGGAAAACATGTCTGTGCACTCCA +TATGTTTCTAATAAGAATCCTTGGAAATTTGCATATAACTCAAGAATGGT +ACTGGTCCAGTTTCGTTTTGTTATCTTGATTCTTCGAAATGGTGCTCACT +CAACTTGATTCCATCCCCAGGACGCTAGTTATTTTCGTAATCTCCGCGTT +CGATCTCGAGCTCCTCCTCTATGGAAATGTTAATTTGTGAGGGCTATCGC +GTCCACAGATTTATAGCGTCCGTCGTGCGGTTTCGCTGGGTTCTCGCAGC +CACCCCCCCCCCCCCCCACTCATCAATCGCGTAAACTTTTTGTGAGCACG +CAGCACACAATGTGCTCAGGAGGCTTCTGTCTCTTCGGAGATCCAACATG +TGGCACCTCCTCAAAGTGTTCACTGGCCTCCTCGGCTCGGCAGTCAAATT +TATTCATCTCTTTGGAGATTGGCTCACAATTTTTTTGATTTGAATTCAGT +GACTATTTATCGATTTCTAGGGCTTCTATGTAGGTTGCCTGTGTGCCTGT +CGTCTGACGTTAAGGCGGCCTCCGCCTGCCTCTCGCCTCAATCCGTGCCT +TGTGCCAAAACATACGAATTAATTCGATTTTTATCAAATTGATAAAAATG +AGAAAATTAAAAATTTGTAAATTCAGACAGGCCTGAGGCAGGCAGATTTC +AGGCAGGCATCAGGCTCAGAAACCGCGCCTGTCTACCATGGAAGCCCTAC +TGCTATGTAACGTTTGAGATATTCGGTTCAGATAAAATAAGTTGAAGTTA +CGCGAAGAAATTCTGAATTTTGAATATTTTCCAATAGTTTCTAATAGTAT +TTAGATGTTGTATTTAGATACTGTTGCTACAAAATTTTTAGAAAATTCCG +TGGTAAAGTTCAAAAGACTACTCTTTCTCTGAATATTGGTTAATAATATG +AATCCGTTCTGCTTTTTTTCCGTATTCCAATTCTTGACGACTTGATATGA +AGCCAGCTCAAATAATAGGCCTCCCACGTAGTGTCAGGCTGTCCCAATAC +GGTTTGATCGACAAAAAATGCTGGAGTGTTATGCCCTTCAAAATGTGACG +TAAGCGCGCTCTTAACCATGCAAAATCAGTTGAGAACTCTGCGTCTCAAC +TCCCGCATTTTTTGTAGATCTACGTAGATCAAGCCGAAAGGATACACTCT +GTCACCACGTGGCCTCCTAAGGACTATTTTGCAAGGACCTAATTTTCTGA +ACCTTCACAAATTGATCTGCACCGTTTGAGATCGATTGAAAAGTTCTAAA +ATTTTCTGTCCTGTATACTTTTTTCTGTACATAAGTCCTACCGAAAGGGT +CCCCCGCGGTAATAATTACCCGTCAAAACAATGTGTACTACTAGCCGAGA +TATGAGTTGTTATTGTTGTCATAATACAACAAGATTTGGATCACCGCCCA +ATTTTCTTGTCAAAAAGTTCCGCTTATCTCGTTTTCTGCTAATTAGAGAA +TACTCTATGGGAATTGGTTGATCCACTCCATTAGTGAATGTTCCCCCAAT +AACCATGGAGCTGTTAGAACAAAGACGAAAACCCGCCAGAAGACACACCT +TCTTGAGAGCAGGTATTAGAAAAATGAATGAAAGGAATTGGGAGGGGAAC +CCGCGGTGGCCGAGACTTCCCACTTGACCCGATTTGTGCACATACGCAAT +ATCGATGAGGAGTGGTGGTCCATTCGATACAGTCTTGCACCATGTTTGTC +GAAATGCTATTTGGTTGGCTGGCAGATGTTTTTGGTCCAATTTTCTTGTC +AAATTTTGTCTTGATCTGACAGATTTTAGGATTCTTGGAATTCGACTCTT +CACCGAAAAAATATAATTTTTCGGAGTTTGTTCAATTTTTCAGTATCCTG +ATTGGAACATTTTAATGTTTTACAAAATTCGATTCTAAATTTTAAAGCAC +ATTTTGACCTAGTAGGACTTTCGGAATTTTGTAGCCTTTATAAAAAATCG +GTGGCCAAGTTTTCTATAATTTTTTTACGTTTTGAAATTGCCTTTTTATG +AATTTTAAAATGTGGAACATTTTTTGAATGACTTAGTTTTTCAAATTCCA +CGTAGTGTCAGAGTGTCCCATACCAGTTTAATCGACGTTGATCTACAAAA +ATCGCGGGAACTCTGACGCAGAGCTCGCAACTGATTTTGCAGGGTTAAGA +CTTGCTGACGGCACATTTTTTGGGAAAATTTTCCGCATTTTTTTGTAGAT +CAAACAAATTTCATATTGCCTCGATTTGTTCGCTATAGTTGTCTAGTTTT +GAAATCGTTGGAATTTCTTGAAATTCGAGTTCTTGAAACATTTAGGTTTT +CACCTAGTTTTCTCTGCGTGTTTTTTGTGTTGAAAAAACGGTTCTCAGAA +ATTTGACTGCTTTGAAAATTTCGAATTTCATCTCCAAAAAAAAGTGAACA +ATTCGTATTTTTCTAGAATTTTTGGAAATCTCTTTAAAATGAGTTTTTCT +GAAACATTTTAATCTTGAAATAGTCAGAGTTACGTCAAATTCTATTTTTT +GAGGAATTTATTTTTTGTGAGAAGAAAACAACAAATCCGCAATTTTTTTC +CGAATAATTAACCAATCCAAGATCCCCCCTCAAACCGGATGGCATTTATT +CGGATCCCGGCAGGTGTTCGAATGAGATATGTATCCATTAACACATTGTG +CATACACATAGACACATTGTCAACGTATGCCGTACACAACAAAACAACTG +CGCTCGTTCGCACCTCAATCCTTTGATGTTCTCCGCCGGGGGCTCCTGTA +AGGTCAGGAGTTTTCTAAAAATGTTGCCTTCTTCTCCGTCTTCTTCCTCT +TCATCATCGAATATTCCAGAGGGGGGCCGACACGCTTCACTTGATTTTCG +ATGGCAATTTGTTTGAAGAATTCAAGAATTCGAAGAATTTATTTGGAAAC +TCACTACTCTCTTTTTTACGTTTACATCCAACTTTTGGCACTTAGGCGCC +TCCAACTGCAACCATATGGTGCTCAATGAGCCGAGAGGGATCATCTGTGA +ATTTGGTGTTTTTTTCTCAATTGGTTGCCTATTTGATTGGAGAGAGATTA +ATTTGCATACAATTACCTCTATTTGGCTCAGGGGTGGACGGATATTGCCG +TTCGGCATTTTTTGCCGACAAGTACGGCAAATCGGCAATTCGCCGATTTG +CCGGATTGCCGGAAATCTTGATTTTCGGCAAACCGGCAAACATCAGCGTA +CTATTTTACTATTCAAAATAAATGTAGGAACATTCATAGGATGCGTACAA +TTTTGCCGATAAAATTTAAATTCTGAAGCTTCAAAAAAAATGTGCAAAAC +CACAATTTGCCGAAAATTCTAGCCGATTTCAATTCCGGCAATTTTTTGCC +GAAAAAAATTGCCGCCCACCCCTGATTTATATTCAGTCTGTTACCGATTC +TAGTGAGGGGTTTCCAGCCTTTGCATGAATAGGTGTCTGCTACATTAATT +GCGCAATCCACATTGATACAGAGCAACCTCAATTGTTGTTTTTTCTGTGG +CCCGTTCTGCTCGTTACACCTATAAAAAGGTGGTCAAACAAGTCGTAAAA +TTTGGGTCATGAGATGGTCCCTGGGTTAATTGGAGAAGTGCCGTCATTGA +GAGAGAGAGAGCCAATTACGTATGAGGTCTGCTCTGCTCTCGGGAAGACT +CTATAACCCTTCGTTTTTGGCGGGAGATATGAGATATTTTGCTGTAATTA +TCGCACTTGTTTTGGGTATTCTCTTTTTGTATGATTTACCTAAAATTTTT +GAGCAGTTGTGATTATCCTATTTTGTTTCGAGAACCTAGAAATGTTTCCT +ATCATAGTAACCGTTTAACTCTGTGAGTATAGTTTTACTTTAAGTTGCTC +CGTTTTTTATTTGACTAGCACAGCTTTTTTACATTGCTCCGGTTTTGGCC +AAAGGTTTTCTGTTTTTTTTTTGCAGTTCTGACTTATTCTGGCCTGCTTT +ACCCAGCTAAGCCTAGGCCCGGTTACAGTCCATAAGACAACATTTTTGAA +CTCAATAATTGCCGAATTGCCAAAATGCCAAATTTTCGATTGCTGCCCGT +CTGAGTAAGTCTGAAACAGTTTGTGTGAAATTGAGCCGGAAACTACTCGG +AGTGATGTGGCCACTTCCCTATTTAGTTTCTCGGTTGATTTTCACACTTC +CCGCCGAGAGCCCGAAGCATCTGCCAGCTGAACGGTTTCCGTCGTTTTTC +ATGTCTATCGCTAAAATGTGCGCGTTACACACACATAAACATCACTTCTC +TCATAAAGTGATGGATAATTCGAGTGGATTGCATCTGACACCACACTTCC +GATAATCGCTTCGTCGTCTTGTCATCATCGTCGTTTTCTTCGTCTTGTTG +TCGTCCGGTCCCCACACACACATCAAAACGGCTACTTCCGACGTGTCGGT +GGTTTGTGTGTGTGTCAAACGGCACCACCACAAGTGGTTGCCCTCGCCCG +TGTGCTAATTCGCATTTTGTCTTGTTAATCACGCAATAGTGATAACAATC +GCTTCCGGACCATTGCTAATATAATATATTGGCTACCGCGGGCATGTCTT +GGCAATAACCTTGATGTCAAGTTTTATCGTTAAAATGTGAAGCTGTATGA +TTGCGAAAAATTTGAAGTTTTCCCATTCAAATTGTAGGCATATTTGAAAA +ATTTCTGAAGATTTATATCTGTCGTAAGTGGAACGGTTTATTTGGTTTCT +TGGGTCTCGTCGCGAAAAGCTTTCACTATGGTTTGAAGCACGACTGGGCG +ACAATCGAAGTCGAAGTTCGGCAATTCGATAATTCGGCAATTATCGAGTT +CAAGATTTTCGAAAGAAAAACTCAAAAAATAATTTTGAAATCTATTTATT +AGTTCTGGACACGCGGAGTCAGAAAGTCCCATTTCGCTTTGATCTACGAA +AAATGCGGGAGTTGAGACGCAGACATCTCAACTGATTTCGCATGGTTAAG +AGTGTGCTGACGTCACAATTGTTCTGGAAAAAAATTCCCGCATTTTTTGT +AGATCAAACCGTAATGGGATAGCCTGGACGTAGAACTCGCCAAATCGTAG +ATATATGCATAATGAAAACTCTAGAGTTAACTCTGCCACCCTCATAAAGT +GAGTTAGAAACATTTCTGCTCTGAGGAACTATAGAACCCCCTTTCAAGTC +GATCGTCAGCAAATTCTAGAAAGATCTGACAGTGGCCGATTTTTTAATGT +TTCTAGGCCACGTAAAAAAGCTGATGTCTTGTTACTTTTCAATTCGAAAA +TCCCTTTTTTTAATTTTTCGGCAGCACCCGGTCCGAGCCAACATCTTCCT +AAACACTTCAAAACCCCGCCCCCTTCTGCCCTCCCGGGTGTCTCCGTGTT +GTTCAGGGTGTTGTCCACCCCCTAGACACCCAACTGACCATTCACCCGTC +TCCTCCTCCCCCTCTACAACAACCACCACCACCTGATCCATTCCATTATA +ATGATATTCCGCATACTTGTCATACAGAGAATACCCGGTTTGCTGCTGCC +AAACGGTTTTCATTTATTCCATTTCTCACTCCTTGATTGTTGTCAATTCT +TCGATTCGATGCGGCAAAACTAGCGCCAGTGGGGAAATTGCTTTAATAGT +AAACAATTTTTTTTTAAATTTCCTATATTTTTTGTATAAGATTTTCTTTT +TTAATTGCAAATCCCATGGGAAAGTCTCAAATTGCATAAAATTCCAATTT +GAATTCCCTCCAAGATTTCAATGTTCGATGGAGCGCACTTGAATTATTAT +AATTTTTATCAATGAATTTTTTTATCATCTCTGACTGATTTTTCACGATT +TTTTGTGTAGTTTTAGAGGAAATTTACTGAAAAATCCAAGTTAAATGTAA +ATTTCCGATTTTTATACAATTTTTGGACTACTTTCCCCGCCGCCACCGCT +AAAAATCCCAATTTCCTGCAAGTTTTCACCTGCTCTGACTCATTTCCGCG +AGCGCGCGCGCTCGTACTTTTTATCTTTTACTATTTATTTATCTCCTCCA +AAAAAAAATCCCGTTAATTTTTTTTCCCTTACAATTTCTCATAATTTACT +CATTTAGTTTGGCTTAAAAAATGCTAAATTAAATTTTTTTTTCATTTCCA +GATCACATGACACAACGAAAATGAGGAGAAAAATGAAGTTATTCCTATTT +TTATTATTAGTAATTAATATATGTCGGTCGGCCGCTGCTAACGGTGACGA +ATGCCCGAAATTGTGAGTTTTTCGCTTAAAAATCAGATTTTTTTCGAATT +TACTGTGAAAAAGCGGGGGTTTTGCCTGAATTTTCACTTAAAACTGCTTT +TTTTTGCTAAAATCCCGAATTTTTCAGAGAAAAATAATTCAAACTCCATT +TTTCAGCTGTAAATGTGCTCCGGATCCGGTGCAGCCGACGTCTAAACTCC +TATTATGCGACTATTCTTCGAAAAACACGACAATTACACCTATTGCGTCG +TCGAATTATGATCAGGTTGCTAATATTCGGTGAGGTTTTTTGATTTTTGA +ATGAAAAATTTGAGAAATTTTTAAAAATGGGAAAAAATGTGATTTTTAAA +TCAAGATATCGGAAAATACGAAAAAATTTCCAAGAAAATGGTGGTTTTTA +TCGAAAAAATGTTAAAAATGCATAGAAAATTTGGATTTTCGAGTTCGAAA +TTTTCTGAAAACAAAAAAAAAAGTTTTTTTTTACACTAAAACATTAAAAT +TGGATTTTTTTTTCCGAAAAACATAATTTTTCAATTTCTAACAGTAAAAA +GCTCACAAAATTCCTTGAAAATTGACAAATTTAGTATTTTTTACACGAAA +AATGCATTAAAATTGAATTTTTAAAACAATTTAAACTTAAAATAAAATTT +TCATTTATTCTACACGGTAAATGTATTAAAATAATTAAAACTTCGAAATT +TTAAATTAGAAAAAATCAAATTTCCTCCTTATTCTCAATTTTTTTTTACA +GCAAAATTTCAGTTGAATTTTCCCATAATTTTGAATTAAAAATGTGTTTT +ATTCCAAAAAAAAACTATTTTACAACAAAAAACTAGATTTTTTCCCAATT +TTCTATGAAAAATTTAATTTACCAATAACTTATTTTCTTTTTCCAGATCA +CTATTCATATCTTGTGATAATAATAATTTCCAATTTCCGGATGCCTACTT +CAAGTCGTTAACCGCGTTGCATCATCTGCGGATAGTGGTGAGTTTATTAT +ATTATCAGCTGTTCTCTACAGAACATCTGCTTTTTGCGTGTAAATTTAGA +GGTCAATTTTCGGAAAAATTGGAAAAATTGGCCTAAATCTCAATTTGAAG +TAGATTTTCACGTGTTCAAAAATGTTCAAAAAATTCAATCAAAAATTCGA +TTTTTGGAATACTTCAAAAAAATTTAATTTTCTTCGAGAAAACCGGTAAA +TTACAAAAAAAATTCGTTGGCGATTTTTTTCTGTAGTTTTCCAGAAAAAC +AAACGAAAATTTTAATTTTTAATTTTTAAATTTTCCAAAAAAAATTTTTA +AAAATTCATAAATATGTCCATTTTAATTTTAAAAATATCGGAAAATATTC +AAAAAATGAACTTTTCTATCGAAAATTTAACAAGAAAAAATTACGAAAAG +TTCATTCAAAAAATTAAAATTCTTCTATATCTGAGGAAGGCTAACAGTAA +TTTTTTCCCATTTTTTGACTCTTTGAGCAAATAACCGTATCACTAATTAC +CTTAACCATCAAAAAAGAAAGGTGTGCCTGTCTTCTATTCATCCTCCTCT +CGACACCAAATTCTTAAGAAGAGCCCCCCACTCGGATGTCTCTAATTAGG +CACAAATGTTACGTCATTTTGTCATTTGTACGGCCACAGATGACCTCCGG +TGTGCTTGGAGGACTGCGAGAGAGGAGGATTAAGGGGATTTTTATGTCCT +ACAATTGATTTTTTTAGGTCAAAAGTAGGGATTTTAAGGCCAAAAATAGA +GATTTTTTAGGTCAAAAGTAGGGATTTTAAAGCAAAAAAAAAAAATTTTC +GGCCAAAACAGTGGTTTTTAAGGCCAAAAAATTTAATTTTTCCGTTTATG +ACACCTAAAATTGGGGTGAAATTTTTTTTTCGGATAGAAATCTAAAATTG +CAATTGTTAATTATTCCAACATTTTTTTTTGCATTAAACGTTATTGTAAA +AACATTGAAAATCACTTGATTTATCCGAAAATTTCATTTATTTCAGATAA +ATATTGTTTAATAAAAAATGTGTTAAAAAACATGGTGCATAGATATATAG +ATAATTTTGTAGAATAATTGAAAATTGCAATTTTTAACTTCCTACCCGAG +TAAACAGAATTTAAATCCAATTTTAGGTGTCATAAACGGAAAAATCCCAA +TTTTTGGCCTTAAAAAATCCCAATTTTTCGGCCTAAAACTCCCTAATTTT +GGCCTAAATCACCCTATTTTTAGCCTAAAAAAGTCCCCTGTTTTTCCATT +TTCCCCAGGAACTCGTAGAAGAACATGTGTTAGGCGTGAAGAGGTTAAGC +CGATTAGCCATGTAATATTCAATACTTGAGTATAGAAGGGCCAGAAGCAG +CAGCAGCAGGGGGTGCTCCAAGAGCACCACCTCAACAGATGTATAAAGTG +GTTTTCGAGTAGATTTGTGGTTTTGCACACGGTGGAAGAAATCTGAAATT +TGAATTTTTTAAAGCCATTTTTGTGCTGAAAAATGTACAGAATAACCGAA +ATTTCACCACCCTTTAAGGTACGGGAAATCTCCGGGAAAAAGGCTCAAAA +ATTGCATAAAAATGGAGATTTTAAAGCTAAAAATAGCTATTTTAACAAGT +TTTTTTGCAGGGATGCGAGACCACACATTTCTCTGTGAAATTGTTCGAAG +ATTTGGCCGCTTTGAGAAGATTGGAACTCGATCAGGTACATTTTTCTTGG +AAAATCTAGAAAATTTTGCTGAAATTGGCTCAGAAATGTCCTAAATAGTA +GAATTTTTCATTAAAAAGCCTCTCAAAACGGCTTAAAATTAAGTAAAAAT +CGACATTTTTTCACAGATCTCCACCGCCTCAACCTCTTTCGAAATGACCG +AAGACGTCCTAATGCCGTTGGCTCGTCTCGAAAAGTTTTCCCTCACGAGA +TCACGGAATATCGAGCTTCCACAGCGACTTTTGTGCTCTCTGCCGCATTT +ACAGGTATATTTTTGGGTAATTGATAATTCGCCAAAAAGAACAGCAGCCG +AAAACTCAAATTTTTCACGAAATTTGCCGAAAAACTAGATTTGTAAAATA +AAAAGAATGCAAGAATTTTTAGCTTAAAAATCTCAATTTTGAACGATTTT +TAAAGCAATTTCAGTATGAAAAATCCAGATATTTTTTGGGTAAAAATTTG +GTGAAAACGCGTTAAAATCTGCATTTTTGAACGAAATTCGCCAAAAATCA +AGACTTTTGTAGTGCATTTTACTGTAAAAATGTATACCTATTTTCTATTT +TTATGCGTACTGCGCAATATATTTGACGCGCAAAATACCTCGCAGCGAAA +ACTACTCTTCAAATGACTACTGTAGCGCTTGTGTCGATTTACGGGATCGA +TTGATAGAATATCAAAATTAGAAATAAATGGGAAACTACTGCGAAAACAA +AAATTTATTTCAAAAATTGAGTCCGTAAATCGACACTACAGTAGCCATCT +AAAGAATTACTGTAGTTTTCGCTACGAGATATTTTGCGCGTCAAATGTGT +TGCGCAGTATGCATTCCCATCTTGTTCTCTACATTCAATTACCACCACAT +CTCACAGAGAAGGAATTAGTTGTTTATTAGTACGTGGGGGGGGGGGGGCT +TTAAAGCTTACTACTTCTTCTTTCTTTCCACTTTCTGACGTTCAACCATC +TGGTATTCCTGGCGGCGGGGCAATTGAAAATGAGAACAAAAGGACATCGA +TGGAGGGAGGAGGATTGAGAGTTTGGAAATTGTGAAGAATGCGCGCGGAA +GGAGGAGGTCAAATATCACAAGCGCCGGAAGTTGTTGTCAGCCAGAAGCA +ATAAAGGCCTAATTATGATGATGATGAAGAACCTCCCTGAAAGAGAATAG +CGAAAATGTGAAGTTTCCATCTCAAGGGAGCGATTTTTTAGTGATCATGG +AGTCTTGAAGTGTGCACATAGTCTACGTGCCCCACAAGAGCCTATGCCTG +CCTTATGCCTACTCACATGCTCACAGCCAAACTCTTTCGAAATCAGAATT +CTACATTGTAGAATCTACAACACTGAAGTTTCTGCCATAACGTTGAAAAT +AGGCACCTACGCCTGAATACGTGCCTGATCAACATGGATGCCATATAGTC +CAGGCTGTATAGTCGTAAAACAGGGATTTTTTAGGCTCATGGGTTTTTGT +CGGAAAAAATCGAACATTGAGAAAACCAGAAATTTTTCAAATTTTCGTAT +ACTATTCCACGAATCGTTTCTCCCGTTTATCGAATCTCCACGTCGCACTG +TAATTTTTCAAGTATCGTTAGTCCATTCGCCCGAGAAACTCCACAGTTAC +AAATGCTTTGCAAGCATTTTCAAGAAACCATGGTTCCTGGCATGTTTTGG +CGCATTCAATAATGCCGCTAATAATAATAATAATGCCTCCTATTATGATG +TCGCGTTGTCATCGTTTGCTGCTGCTCCGTCAGATACTTTATGGGGTTGT +TGTTTTCTCCCTCCGCATCAAACGACGTTCCTCTCTTCAAAATGATGATG +GATATACATATATCTATGTTTATAGAAGAATTGAACCCCACACTTTACAT +ATGAATAGATGGGGACCTTGTTACCTTGACTATCGGGAAGAGATTGAAGG +TTCCAAAGACGGCTGGCTGTGGCTCATTAGGCTAAATGATGCGTGATATT +ATTCGCTCTACGGCACCTCTATGATCCAGGAATAGCAGTCACTGTCAACA +AGAGTCACCTCACGGCAACAATACTCCGCTAATTACGGTTTGCGGCAAAT +CCCAGAATTTATTTGAAATTCTAACTGGGTGTTGCAGATGGGTCGAGGAA +ATATGATAGCAATAATTATTGTTACGTGGCAATTGTAATCCGTCAGATAT +CGTTTCGAGACCTGCTGACTCAAGAATGTGAAAATGGACAAATTTGGAAA +ATAGGTAGCGGAAAATTTTCGCAAGTTTTGAAAATTTCGGTCATGATACG +ATACGAACTCCTTGATTTTCACAGCCCGACAAGCCGTACGCGTACGCAAT +TTGTCTACCGTATACCTGAACGTTCAGGCTCGTCTATCTCGAAAACAGTT +GGTCCAGCCTTTTTGTGGGGCATATAAAAAAGGTCAGAACATAAATTCTA +AAATTTTTTGGACCATAGCTTGTTTCGTTATCACGCGCCCAAACCTGATC +TACACTCAAATTATCAGTAGAGCGCATTTGCATGGATGTACCACTTGCCG +GGCCGTGATTTTGAATGGAATATTAAATTCCACGTCACTCTAGTGAATCT +CCGCTTCTCAATATGCTTCATAATTCATCAAATTCAATTCATTTTCGGAT +AAGCCAGTTGTAAACAGTTGTGTGTGTGTGAGTGAGCTCATCCTTCATAA +AATGAATAGAGATAGAACACTGACACTCTCTCAAAGACAAGAGGAATTAA +TAAATATGAGAGCTCATTAGCTGCTCTTCTCGTGACTACTAATTAGGAGG +GTGGTACATGAAACAGTGTATGTGTAAAGGGATCCCCGCCGCCGCCGCCG +CCAATGTCTTTCGGTGTATGGCAATATCCGAGTGGTGTTCTCATTTTGAA +GAGAGTATTAGGATGTAACGTATCAACTGGAGAGAGATAGTTCAAAGATT +GGGGATTTGAGATATTTTAGGTGTCGAAATGGATGAAATATAAGCATAGG +GAGGAAATAAATAGAAAACGATATACTTAAATAGCAATAGTTATTTGAGT +TCTTACTGTAGTTTTCGCTAAGAGATATCGCGCGTCAAATAAAATGCTCT +ACGTACGCATTCTCATGATTTATTGTTCCCGTAATATAAACATAGCAAAC +ATTTTTAATGTTTAAATATTCCTCAATGCTATTGAAATTCAAATTAGTAC +AAAGAAAGCAATTTTTCGATTTTTTTAACGAGAAAACTAAGTTTTCATCA +AAAAATATGTAGATTTTGATGAAAATAATATGACAATTTATTTAAACCTA +TATATTTTTTTGTCCAAAACAACTTTACGAAAAAAATTTTTTTTTAAATA +AAATTATTGGATTTTTCGTCAACTTTACGATAAATTCCGAAAAATTAACA +ATAAATATAGGAGAAAAAAATCCGAAATTAACGAAATCCCCTGTTCCAGG +TTCTGAATATCTCGTCAAATGAGCTTCCATCACTGCGAAGAGAGGAATCG +TGTGTCGCTCAGCAGCTTCTGATCGTCGATTTGTCTAGAAATCGGCTCAC +CAACATCGAGTAAGGGATTTTCCGGATATTTTATAATTTTGATCTTTAAA +AATTTCAAAGTTTGTGAAAATGTGAAAATCTAAAAATGTGGAACAAATAT +TGGTTTTATCTATTTAATACCCCAAAACTTTGAGATTTTTCCGTTGAAAA +ATCGAAAATTCGAAAATTATCTCAAAAATTTCAGGCAATTCCTACGTGGC +ATCCCGGCAATCCGGCAAATTTCGGTGGCTTACAACTCGATCGCCGAGCT +CGATTTATCGCTGGCAACTCCATTTCTACAACAACTCGATGCTGAAGCCA +ATCGAATCGTCGACTTGACGTCACTTCCAGGCACTGTTGTACACGTGAAT +TTGGCTGGAAACGCGCTGAAAAGGGTGCCGGATGCGGTAGCCGAGTTGGC +GAGTCTTGTGGCGTTAAATGTGTCGAGAAATGAGATTGAAGCCGGAAATT +CGTCGGTTTTTTGTGGGTTTTCGGATTTTTTTTTGGAAATTTGAAATTCG +GAAAAAATTCAATTTTTACATACAATTTTGCGTATTTTGATCATATTTGT +TTTAAAAATCGAGAAATTTAAAAAAAATCAAATTTCGCTTATTTTCATTG +GAGAATACATGAGAAAAATATTTTTGTTTCGAAAAAGAAAAATAGCCAAA +ATTTAATGCTATTTTTTCGAAAAATTTCGATTTTAATTTTTGAAAAAGTA +TTGAATAATTTCATCAAATTTGTTTTATTTTGATGAGAAAATACGTGAAA +AATGGAAAAAAAATATGAAAATCTTTAAAACAAAAAAATAATCAGATAAT +TATGCAATTTGGCCTGAATTTTCAAAAAAAAAACTTGGATTTTCGAAAAT +TTCATCAAATTTTGTGTATTTTCATGCAAAAAAAGATAGGAATTTCCAAT +TTTTGACCTTAATTGCAAAGAAAAATTCCTAAAAAATTGTAACGTTTCAA +AATCTCGATTTTCTTCTGAAATTCTCAAAATCTTCATATTTTTCCAGCTT +CCCCAGAACTCGAAATGCTCGACGCCTCTTACAACAAATTGGACAGTCTG +CCCGTCGAATGGCTTCAAAAATGCGAAAAACGCATTGCTCACCTCCATTT +GGAGCACAATTCGATTGAGCAGTTGACTGGTGGAGTGCTGGCGAATGCGA +CTAATTTACAGACGGTTCGTAGACTTGTTTTTTTTTTTTTTTGAAAAAAA +ACACGCTGAAAAAATCTAATAAAACCCAAAAATTTGCATTTTTGTTTAAA +AAATATGAAAAATTGTTCAAATTCCTAGTTTTCAATATTTTCAACAAAAA +AAAATTCTAGAGCTTCCATGGTAGGCAGGCGCGGTTTCAGGGCCTGACGC +CGACCTTTCGCCTCTTTTTTGCATTTTTACGGGAATTTTCAAATTTCTAA +TTTTCCCCATTTCTATCAAATTATTGAAAATCAAATTAAAAACGCGAATC +GCGTATTGAGGCGAGAGGCACGCAGAGGTTGCCTTAAGGTCAGACAGGCA +AGCTTTTTAACGCCTAACGTAATTTTAAAACCTGAAACATTAGAAACTTC +CACACGTTTGTAATTTCACTGAATTTTACTGCACCTCTTCATAAATTCAA +TGTATTTAGATAGTGTAATTTTTAAAGGTGGAGTGCCGAAATTAAAGACT +TTGCTTTTTTAGACCAAAATTGGTCCTAAATAACCGAATTTCGTAATGAG +ACTTTCTGAAAATTTCTCAAAAAAAAGTTATGGCGGTTCAAAGTTCGGGA +AAATAAGGTCAATTTTCAGCTAAAATCAAAATTTTTAAAATTTTTTATTT +ATCACTTTTTGATAAATATTGTGGTCTTTGATTAGGCGGGGCACCAATAA +AAGTTACATTTTGTGCCCCACTGACCATGAATGTATTTAAATCAACGAAT +AAACGCCTAATCAAAGTATTTATTAAAAAGTGATAAATATAAAATTTAAA +AATTTTGATTTTAGCAGAAAATGGGCTTTTTTTTCTCAAACTTTGAACCG +CCATAACTTTTTTTTTGAGAAATGTTCATTACGAAATTCGGTAGTTTTGG +ACCAATTTTGGTCTAAAAAAGCAAAGTCTCAAACTTCGGTACTCCACCTT +TAAAGAACAGTGTAATTTTTTTCTATTTTCCAATCAGTGCAACTCTAATA +GCAACTCTAAACTTATTTCAATAAACTCTAGGCACAGTATTTTGAATAGG +CGATCGCGTGACAGTTTTAACCAAACATAGTCATGATGACCTTTTGTCCT +TACAGAATCTTCTCAAGTTCATGCACACACAGGTGTCATTTACTCTGTCA +AGTATTGTATAAGTGTGATCCCGAGAGTATTATAAATGGTAGTTAGACCA +CCCTTATGGTTAAGTTGTTCGTATAATTGTAGGGGTGGGGATCTTACCAC +ATTTATACGGCCCCCCTCCTATTGTATTGTATTGTTGCCACCAGTCGTCT +TGAGAATCTCTTTCAATCCGGGGTGACAGAAGGTGTCATATTGTCGGAAT +GTGTAATAGGTGGGTCTCGAATGGATATCACTAACAGCTGTGAAGGGATC +CGAGAAGGGTTTTCGGTCTTTCTAGGGGTTTAACTTTCCGCTGTCAATAT +TATTAATCTTACAAGGAAAGGGTTTTAGTTTACCGTCAGACTTTAAAACG +AGACATGTGTCATTTGAAAGTGTTCCATAAGTGTATGTCACTCCAAAATT +TCAAGCGGCAAAGCTCCAGTCTCAAACCCTCTAGCATCGATCTGAAAACG +TTTCAGTGCATTTTTCACTACTTTGAGGCAAGTAGAATTTCTCCAAAATG +TTTTTTTTGCAAATCTTCAATGTTTTTCAGAACGTTTAAAAGAAGGAAAA +CAATCTGGAAAATTTTTGGAAAATTTTAAATTTTTTGAAAAAAATCTGAA +ATTTCTTGGACCTGGTGGCGTGAAGAAATGTTCAGATTTTTTTCAAAAAG +CTTAAAAATTTTCCAAAAATCTCCCAGATTGTTTTCCTACTTTTAAATGT +TCTGAAAAACATTGAAATTTTGCATGTTAGTACGATTTGATAATCGTGTT +ATAATTACATTACACATACATAATAATCAATTGAAATATACAATTCTTAA +TTATAACCTGAAATTGAGATTAGATACCTTAGACAATACAACAAATAAGA +ATGTGATCGATTCAGCAGACCCCTATTTGTGAAAAATGCCTTCTAAATTT +TATTTTACACTTCTCCTAGTAGATGAACAGCACCTGCCAATAATTTCATT +GACAGCCGGATGGTTTTTGGCTCCTCTAAGACAGTATAATCCCGTCTCCC +CCGGGGGGAAGTGGTGTCAAATTGCTTTGCGGAGCAGGTGCTTTTTTCTG +AATCGAATTGGCTTATTGGTGACGGAAAAACCCCTAATAGAATTGATACA +ATTTGGTTTTCAAGAGGATTTGGGGAAGGGTTTAGAGGGTAATTGAAACT +GAGTAGATCTTCGTCTGTCGTGGAGATCAGAAGATTGAGGGAAGTACTGG +GTTTAAGGGGGTCAAGGAGTACTGTAGCGCATTTCGAATACTACTTTAGA +TAGTTTAGGTATTATATCAATAGGATAATATATTATCAAGTTGCACCAAA +ATTGGAGATTCTAGTAGTGCATTTTCTTTATATGCTAGGTATGTATAAGT +TTCTAATAAGTGTATATACCTACCATGTACCTAGGGGGAGTAGAGTTTGT +GGGTATTTTGCTTAAATAGACTAAAACGTGTCCAAAACCACCGAATTTCA +AAATGAGACTTCACAAAAAATTTCCAAAATTTTTTTTATGGAAAAAAGAG +CAAAATTTAACTAAAATCCGAAATTTCGCACAGTTTTCTTTGTCACAGCC +GCTGGATTTGAATTTTTCTGAAATTATCACCCTTTAATCCTTATTTTAAT +AATTTATCTCGCGGAAATTCGTTGATTGAGACAACTTTTAGGCCGATAGG +CATCCCATCTTGATCATTTTTGGATGCCTATCGGCCTAAAAGTTTTTTAA +TTTCAGAAAAACTAAAATCCAGCTGCTGTGACAAAGAAAAGTGTGCGAAA +TTCCAGATTTTAGCAAAATTTGACTCTTTTTTTTCTCGAAATTTTGACTC +GCCATAAAAATTTTTGGAAATTTTTTGTGAAGTCTCATTATTAAATTCGG +TGGTTTTGTACCAGTTTTAGTCTATTTAAGCAAAATACCCACAAACTATT +ACACTTTACTTTAACAAGACACACAATAATTCACAAATGGTGTAGTATCA +TGCCGGAATTTATATAGGATTGATTCTTGAAGAACGAAATTTTAAATAGA +TTATTGCAGACTATGTAACACTATTTGTATAAACAATTAACATAGTGTAT +TGAATAATGACTTTATTGAAATTAATACAGTTTTTGATATACCCATCTTC +TATTAGTATATCATGCAATACTAATAGGTAATATGCAATATCACTTTAAT +TGTTATTTCCCATGCGTCGTGTTTATACAAATGCTTTATATACTCGAATA +GTCTATAATAATCCAATTAAACTCGAGAATCCCAATACAGAATATAGTGT +TTACCAAAAACACTTGTGTTATTATTCTAATATACATGCACACACACACC +CACACACAGGTTTGACACCAGTTCTACTAATATTTGCATAATACCATTGA +GATACCTGAAGTACCCTATAGATATGCACGGGATTCGTTTCGGGCACTGC +CACGGAAAATATTGAAAAAGTGTGGTAAATTTACGAAAAACAACAAAAAA +AATGCGATTTTCCAGAGAAATACGAAAAAAAAGAAAAAAATATTTAGAAA +ATTAACAACAAAAAAAAAAATTTTTTTTTATTAAATAATAAAATGTGTTC +ATTCTGTTTTTCCCCTTTCAGTTAAACTAGGGCTTCCATGGTAGGCAGGC +GCGATTTCAGGGCCTGCCTGAAACCTGCCTGCTTCACGCCGGCCTCAGGT +CGCCTTAAGACGGCTGCTAAAAATTTGAAAAAAAAATTTCAAAATTTGAA +TTTCCGCGCTTTTTTCGAATTCCTAGAACGCTTTTTTTTAATTAAAAAAA +AATTTCAGTTGGACCTTTCATCGAATCAACTTCGAGTTTTTCGAGACGAA +GTACTTCCGGAGAATTCGAAAATTGGAAATTTGAGACTTTCCAACAACTC +TTTGGAGCTCCTCGAGCCGTCGAGCCTGAGCGGCTTGAAATTGGGTAATC +TAAATTTATATGAATTTTGCAAATTCTAAAAATCCTAATTTTTTCTAGAA +TCCCTTGATCTGAGTCATAATAAGCTGACAGAAGTGCCCGCCGCAATTGG +AAAAGTCGAGCAGCTGAAAAAAGTGGATTTGAGCCATAATAGAATTGCGA +AGGTTTATCAATATGTGCTCAATAAGATTAAGCAATTGCATACTGTTGAT +TTGTCGAATAATCAGTTGCAAAGTGTGAGTTTTTTGTTGATAAAAATTTA +AAATTTGAATTTTTTCTTTTTTTTTAATTATTAATTCTGAAAAATTCAAC +GAAATTTCCGAAAATTACTTGAAATTTAAATTTTTATCCAGAATTTAGTC +TATAAAAACGAAAAAATACGAATTTTCGATTAAAAAAATTAATTTATTTA +AAAACACACTTTCGAAAAAAATTTTGAATTAAAAAAAATTTAAAGTTCCT +TAAAAAACTATTTCCTCAATTTTTTTTCTGAAATTTCACTCTTTCATTTT +TTAATTCATTTTTTAAGTCTATAAATACGAAAAAACACGAATTTTCGATA +AAAACACATTTTTCAAGTTTTTAAAAATAATTTTAAATTGGAATTTTTCT +TTAAAAAATTGTTTTTTATAAAAAAACACATTAATGCATTTTTTAAAAAA +TAATTTCTTGAAATCTTCCAAAATCGGTTGTAATTTTAGCGAAAATAACT +ATTTTTTCCAGTTTTTATTTAAAAAATCCCTCGAAATTTGAATTTATCCA +GAATTTAGTCAATAAATGCGAAAAAATACGAATTTTCGATTAAAAAAATT +AATTTATTTAAAAAACACTTTCGAAAAAAAAATTTTCAATAAAGAAACTA +GTTCTTAAAATTTTTTTTCTGAAATTTCACTTTTTCATTTTTTAATTCAA +AAAATTTGAATTTATCTACAATTTACTCTATAAATACGAAAAAAATACAA +ATTTTCGATGAAAAAACACAATTTTCAAATTAATAAAAATATAATAAAAA +ACATATTACTAAATTTTTAAAAAATAATTTCTTGCAATTTTCCAAAACCG +GTTTTAATTTTAGCGAAAATAACGATTTTTCCAGTTTTTATTTTAAAAAT +TCCCTTGAAATTTGAATTTCTCCAAATTTTTTCTTGAAGTTTTTAAAAGT +TCCATCATTTTCTACTGGAACTTTCAAAAAATGTTCCTTAAAAAATCTCT +AATTTCCGCTAATTTCTACGTAAAATTTCAGCAAAAAAACAACGACATTT +CATGTTTTTTTGTTAAAAAAATAATTTTCTTGAAAATTTCTTTGGAATTC +GATTTTTTTTGAGTTTTAACCCAATAAAATGTATAATTTAAAAATATAAA +AATTCCAGATCGGCCCCTACATCTTCTCCGACAGTTCTGAACTTCATTCC +CTGGACGTGTCGAATAATGAGATTTCACTGCTGTTCAAGGACGCTTTTGC +GAGATGCCCAAAGCTGAGGAAAATTTCGATGAAAATGAATAAAATTAGTG +AGTTGAAGCACGAAAAACTGCCCAAAAATTAAAATTTTGAAATTTTTGTG +TTTAAAAACCTAGAAAATCGAGATAAAGTAGGCTTAAAGTTGCTCAAAAT +CCGCAGATTTTTTGAAATCGATTTTGAAAAGTGCACATGGAGCTAAATCA +GACTTCAAAATTCTCAAATTAATTGCATTTTCACTCGAAAACCTCTGAAA +ACTGCCTTCTTCCTATATTAAATCAAATTTTCTTTCAGAATCCCTCGACG +AAGGTCTCACAGAAGCTTCCGGCCTCCGACGTCTCGACGTATCTCATAAC +GAGATCCTCGTGCTGAAATGGTCGGCTTTACCTGAAAACTTGGAGATTCT +CAACGCTGATAACAATGATATCAATCTCCTGACCGCCGCCTCAATGTCCC +CAAGCACCGCAAACTTGAAGTCCGTTTCGCTTTCCAACAACGGCATCACC +ATAATGAATGCGGACCAGATTCCGAATTCGCTCGAGTCGCTGGACGTGTC +GAATAATCGACTTGCAAAGCTCGGGAAGACAGCGTTGGCCGCGAAATCTC +AGTTGAGAAGGCTCAACTTGAAGGGCAATCTGCTTACCGTAGTGGCCACC +GAGTCGATGAAAGTCGTAGAGGCTGTGCATCCGTTGAAAGTGGAAATCTC +GGAGAATCCTCTGATCTGTGATTGTCAGATGGGATGGATGATTGGTGGAG +CGAAGCCAAAGGTTCTCATTCAGGACTCTGAAACCGCAAGCTGTTCCCAT +GCCGTTGATGGGCATCAGATCCAGATTCAAAGTCTCAGCAAGAAGGATCT +ACTGTGCCCATACAAAAGTGTATGTGAGCCGGAATGTATCTGCTGTCAAT +ACGGAAATTGCGATTGCAAATCCGTATGCCCCGCCAATTGCCGATGCTTC +AGAGATGATCAGTTTAATATCAACATTGTCAGATGCCACGGGAACTCATC +AATGGTGCCCAAAAGAGAATTCGTGGTCTCCGAGCTCCCGGTCTCTGCGA +CAGAGATCATTCTGAGCGGAGTCACCCTTCCACAGCTCCGAACTCACAGC +TTCATCGGAAGACTTCGTCTCCAGAGGCTTCATATCAATGGAACCGGGCT +CCGATCCATCCAACCGAAGGCTTTCCATACTCTTCCAGCACTGAAGACGC +TGGATTTGTCGGATAACTCGTTGATCTCGCTGAGCGGGGAGGAATTTCTA +AAGTGTGGAGAAGTCTCGCAGCTTTTCCTCAATGGAAATCGATTTTCCAC +GCTATCCCGTGGAATCTTCGAGAAGCTTCCGAACTTGAAATATCTGACAC +TTCATAACAACTCCCTCGAAGACATCCCTCAGGTTCTTCACTCGACGGCG +CTCTCCAAGATCTCCCTGTCATCGAACCCCTTGAGATGCGACTGCTCGGG +AGGATCCCAACAGCACCTTCACCATCGTCGTGACCCAAAAGCTCATCCAT +TCTGGGAGCATAATGCGGCCGAGTGGTTCTCGTTGCATCGGCATCTTGTC +GTTGATTTCCCCAAGGTTGAATGCTGGGAGAACGTGACGAAGGCCTTCCT +GACGAACGATACGACAGTGCTGAGCGCCTATCCACCTAATATGGGAAATG +ACGTCTTTGTGATGCCTATTGAAGGTTGGTTTAAAATTTTTAAAAAAACG +CGCGTCAAATATTTTAAAGTCTTTTGTTCGAAAAATAACCATTTTACAAT +ATGTAGGATATATCTATAACAGCTACTGTAAAAACTCCAGATGTTTGAAT +TTTTTTAAAAATGCGACGTGGTAAATATTATATTGATTACATATTATTAT +TACAGTGTATTTTTTTCCACTTCTACGACTTTAAAGGGGGGCGCATTTAC +GCGCGATGGTCCCAGCATTGGTCTCGCCACGCACCCCAAAAATCAATGGG +TGGCGCGTGTCGAGACCATCGCGCGTAAATGCGCCCCCCTTTAAAGTCGT +AGAAGTGGAAAAAAATTCACTGTATAATTATTAGTCAAATGATACTTATT +ATGATATTAGAAATTAAAAAATTAGGTTTGAACATTTTTTTTCCTCGATT +TTTTTACGGTATTTCACCATGTATGCGTTATAAATACAAATATGATCCCT +ACCTTTTTATAACTTTAAAATAACTTTTAAAAAGGAATATAATACATGCA +CAAGCACCATAAAATTTCAACGTTTGCTTTTTCTGGTCCGAAGAATTTTT +TCAATACGTTTACTTTATATATCACATATACAAACAATATACTGAGAATG +CGTATTGCGCATTATATTTGACGCGCAAAATATCTCGGAGCAAAAACTAC +AGTAGTCCTATAAATTCCTACTGTACTGCTTGTGTCGATAGAATATTAAA +ATTGCAAAAAAAATTTAAAAAAACGAAAAAAAAACAAAGTATAAGGGAAT +ATATAGCTATTCCAAAATAAATCAATTTCAAAAATCGAGCCCGTAAAAGA +GAAGGACTTACTGTAGTTTTCCCTACGAGATATTTTGCGCGTCAAATATG +TTGTGAAATACGCATTCTGAAGATGTAGTGTACTCGTAATATATATTTTT +TAACAATTCAAACATTTCCAGAATTCCTGCGCGACTACAACTCAACAATC +TGTGTTCCATTCTCATCTGGATTCTTTGGACAAGACCCTCAGAATAGTAT +ACTCTTTGTAATAATAACTATATCGATTGCTGTTCTCCTCTGTGTCCTCG +TTATTCTCGCAATTTCATTTATTCGAAAATCTCACGACGCAATCAATCAA +CGAAGATACAAAGCATCATCTCTAAATTGTTCAACATCAGCCGGCTCGTC +GCCTCTTCCGGTTCCGCTGTTGAGTTATCACGCATTTGTGAGCTATTCGA +AGAAGGACGAGAAAATGGTGATTGATCAATTGTGTCGACCGCTGGAAGAT +GAAGATTATCAGTTGTGCCTGTTGCATCGGGATGGGCCGACTTATTGCTC +GAATTTGCACGCGATATCTGATGAACTCATAGCTCAGATGGACTCGTCGC +AATGCTTGATTCTTGTGTTGACTAAACACTTTTTGGAGAACGAGTGGAAA +ACGCTGCAGATTAAGGTAACAAATTCAAACTTTCAGTTTAAAATTGATAT +ATTCGTAAAAATAAGATGTTCTTAAAAATTCTGAGAATGCGTGTTGCGCA +AAATATCTCGTAGCAAAAGCTACTGTAATTATTTTATATGACTATTGTAC +CGTTCAAAATTACTATCCAAATATTTGATTTTTTGTTTTAAAAAATTATT +TGGAATTTACTCAAATATTGAAAACAATATTATCGAAATTCCAGAAAATT +TGGTGGAAATTCCGAAAATTTCAATTTTTTCGCTAATTTTCGAGAAAAAA +AAAATTAAAATTTGGATTATATGAAATAAATTTCCAAACATTAAATTAAA +AATTTTTTTTTTTTGGATTTTCCAATCTCGAACCTTCAAAAATTGAATTT +TTTTGAAATTTTTTTTCGGAAAAAGTAAAAAATGCTCGAAATATCGACAA +AAAGTTGAAGTATTTTTTTTTGTAAAATTGGAGATTTTTGAGATAAAAAA +TCTGAATTTTCATTCAAATCTTGAACATCAAAATCAGCAAAAAAAATTGC +TATAAATTTATTTAAATAAGATTTAAAAAGAGTAATTCGCTATGATTTCA +AATTTTCAAAAAAAAAAATTTTTTTTTCAAGTTTTAATTTTTGAACACTA +TCAAAAAATTTATTTTTAAACCAAAAAAATTCCGAATTTTCTGTAAAATC +TCAAAAAAAAAACATTCCTAGAAATTTAATCGAATATGATTTAATAATAC +TATTTTTAAATCATATTCAATTACATTTCTAGGAATTATTTTTGCTAGAT +TTCAAAAAGTGTATGAAAAATTAAAACTTTTTGAAAAAAAAACTGTTCTA +AATATAATTTATAACACCAGAAATTCACTAATAACACTCCACATTTTTCC +AGACCTCCCACCAACTATTCGCCAAAAACCGTGCAAAACGAGTGATCGCC +GTGCTCGGCGACGGTGTGGACGCGAATCTGCTGGACGATGAGCTCGGACA +GATTCTACGGAAGCACACGAGAATCGAGATGCGGAGCCATTTATTCTGGA +CACTTTTGCACTCATCACTTCCATCACGACTTCCATTACCATCGAATAGT +GGCGATGATTCGTCTCAACTATATTCGGATATCTATGGAATTGTGCCTTC +CGATGTTGTTTAGCTTACAGTTTTCCCGTTTAGGTCACAGTTTTTAGTAT +TTTTTTTCCTCTTCCAAATGTACGAGTTTCCATTATTCACGAGGTTTCTT +TTTCACCCCCAATTCTTCTGTGCACAATTTGCCATTTTTTGAATTTTTAC +AACAATACCGGTACTATATTTTTCGATTTTCTTTCTTTTTCTATGAATCT +TGCCATCATTCTCTAATAATTGACCAGCGATTATATTTTTGATTTAATAA +TTGAATATTCTCGTTTTTCAGTTCAAATTATTGCTTTTTTCTGTCACCAC +AAATTCAAATTCTCGTCCTACCCTGTGTGATCTTCTGTAATATATATATA +TTAATAATCGTCGTTTCTTCTTGTTGTCATTTGCGGATGCTTATGAAGCT +TGTATTTTAATTTTTTGTTTTGTTTTTTTTCCTGTTTCAGCTGCAAAAAA +TTCGGTAGAAATTCTTGAAAACACGCAAAAAATGAATAAAAAAAATAGTA +TATTAGCTTGTTCGAGAGGAGTTCACAAGCGGGTGGCCTAACATCTCCGC +GGCCGCCCAGTGGTGTACTCCTCTCGGGTGAAAGAATTCCCATTTTATCA +TCAGTTTTTCGGCCTATTTTTTCAGTTTTTCTCAACTAAATAGTCAATTA +TCTCTTAAATAATGTCGAAATTAATTTAATTTACACATTTTTCCAATAAA +ATTCCAATTTTCGGTCATTAAATTACCCCTTTCGAATTTTGCTCTCAAAA +ATTAAATTTTCATCGATTTTTAATTCTCAAAATTCTTGAATTTTCAGCGA +AGATGGGCAAATTCAGCAATCAGAAGAAAAATCGTGTCAAGAACAAGGTG +GCGGTGACAGTAAAAAAAGCGCAACGCATGAAGGCAGACGCTAAAAATGC +CAAAAAAGACGGTGAAGTGGATGTTGAGATGAAGGAGGAGGTGGTGAGAG +TCAGAGGACTCGCCGTATCGTCGCTGGTGAGAAAAAAAAGGAAAAACTCG +GCGAAATTCTTGATTTTGATTTTTTTTTTCAGAAAAAATTGGCCTCCGGT +GAGCTGCAAAACGTGCCAAAAGTGAACGAGAAGAAGATTATCCGCAAGAC +AGAGCTTCCAGTTCGAGAGAAGTAGGATTTTCTCGATCTCTCCAAACAAA +AAAATTGTTTTTTTCAATTTCAGCAAAAAGATCCTGGACGCTCCGACTGG +AAAACGTGGCACCACTGCTCAATACATCACAAAAAAGAAGGCAAAGAAGA +TGTACAAGAAGATGACACACGACGCACGCGACAAATATCGAAAAATTCAG +GCGGAACTGGCCGGAGACGGTGAAGATGACGAGGAGGGTGAAGCCGAGGT +GATGGAGCAGTGATTTTGGATTTTTCCGCGGCATTTTTAAAATTGTTTTT +TTTTGTTGATTTTGTTACTGTTTTTCATTTTCCACATAAAATAATCGAAA +ATTTATTTATCACAAAAACTTTTGCACAATTTGCTCAGCTGAGTGAGCCA +ACTGTTTGCCACGTGTCACGATTCGATGCATCCATCCATTGGGCTCCTCG +AGCTCCGCTTTCGGAGCCTGTAGCTGCTTTAGAATTATTGTCCAGAGCTC +AGAAATGTTCATCGAGTACTGCGCGGAAACTTCGACGAAATTGCACTTGT +GTACTTTGGCCAGGCAGGCTCCTTCTGGAAAAATCAATTTAAAAATCGAT +AATTTGAAGTTCCACGTGGAGCCGCGACGCGGCACGCGTTGCATCGTTTT +TTTTGGTTGAAAAACATGGTGCATCGAAGAATTTTCCTTTGACGATTTTA +TACGCAATGCATCACATTTGACGAACAAAAATTCAAAATACTTTACTTTG +CAAACGCGCTCCATTGATAAACTTATGAAAAATCGATAATGCTCAAATTT +CACAAAATTTTCCTCCATTTGAAATTATATAATTGTTAAACAAAATTAAT +AAAACCAAACATTACGCGCAGTTTCCGATAAATTTCGATTTTTTCAAAAT +GCAAGCGCGCTCCATTGCTAAACCTTGAAAAACCCACCCATTTTCGACAC +CACCGTATTCCGTTTCAAATCGATTTTATTGCCAATCAGTATAATATTGG +CTCCGCGAGCAATTTTTCGATTCAAAAGCCGACTGAGCAGGTCTGTGGCA +CACACGAATGATTCTCGATTGTCAACGTTGTACACTATTGCGTACATTGT +CAGTGATGACGCGAATGGAGAATTCTGGAAAAAAACAAGAGTTTAAAGGC +GCAGGTGGGTCTCGCCACGATCCATGTTTTTTTGCGGGAATAGTGGAAAT +TTGGAGTTAGTAAAAAAATCAAATTTGTGTAATTTTTAATAAAATCGGTT +CGCAAATGCGCTAGTACATCAGATTTCACGAGCAAATTTCATTTTGTGAG +ATTTGCACCAAAGATATGGGCATTTGAACTGATTTTTTAATGGTAAACAC +GCGAAAAAAAATACATATCCAAAAATTGAAAAAATTTTAGGTTTCACAGT +CTCCTCGGCATCGAAAGCGCAGTAGTAGGGGTGGCGATAGGGCGCCCTGC +TAAATTTTCTATTTTTATGGTCTTTCTTTTCGTTTTGTGCTAATGAACAA +AAGTTAAGTTCGAAATCTGCGAAAAAAATCGTTTCAACGCTAAGAAACGA +AGAATTGTATTTCTCAACGGTAAAAAGATTCACTCCTGCGCCAAGGTGAC +AGCAAGTGCGCCCCAGCCCAATTCGACGCTGAAGAGACTGTGGGTTTAAA +AATTTAGAGAAGTCGCAAATTTTTTGTTCAAGATATCAGCCTAAGCTACG +GTTTAAAGGCGCACAAGCACGTGGTGTCAAAGTGTCCCGTTTCGGTGTGA +TCTACCAAAAATGCGGGAATTTAGACGCAGACTTCTCAGCAGATTTCGTA +TGGTTAAGAGTGTGCTGACGTCACATTTTTTTCTGCAACAAATTCCCGCA +TTTTTTGTAGATCACACCAAAATGGGACTGTTTGACAGCACGTGCACAAG +CAGATCACAAGTGGGTCTTCGAGTTATCCTCCCCAAATCCTGTCCAGTAG +AGCGCACTTACCTCCAACGTGGCTTCCAGCAACATTTCCAACTCGATTTG +CTCGTTATTCAACAGGAAATTCATCGTTTTCGATGAAGTGTCATCTCCCT +CATTTGACTCATTATTGTATTGGGTGACGAGCCGCGTGGCAAATTGGTTA +ATTGCTGACAGCAATGTTTTCTTCCCGCTATTCCTTGATCCATAGACACG +TAGAACCACATGTTGCATTGGAACATTCGACGTTTCTTGGAATAGCCAGA +CTTCTGGACACGTGGCTCGGCGATCTTCTGAAAATTTTTAAATTTTCAAA +AATTTGGTTAAAAATTTCAAAGGAAAATCAATAACAAAGCTACTCCAAAA +TTTTCAGCCTGATTGGTTGGAAAATGAGCAAGTTACAGCGCTTCAAGAAT +TCTAGGCCACGGTCACCAGGGAGTGGTGGCGGTGACCCTCTATTTCATTT +TTAGATTTTTTAAATGGGAAACTCGAATTTAAATGTGAAACATCTGGAAC +ATTCCAAGAAAAATTCAAAAAAACTCTCTAAGGAGTCGATAACCCTCCCC +CAATTCCTGAGATGTGACGTCATCAGGGGGAGCCCGTGACCTCATAGCTT +CAAATTTGAATTTTTCAACGAAAAGTGTGAAATTAGATGTAAAATTACTA +GGAAATTAGAAGAAAGATTAGAAAAGAAATTTCGGGGGTGTCGCTCACCA +CCCCCCCCCCCCATTTTTATAGTGTGACGTCATAGAAAGTGGGTCCCCGC +TACCTTCCATTTTCAAAGTTTTTTTATTTTTCCGATGATTGTATTACTGT +ACTCACTGTAATTATGTGCAACAACTGGTTGTCCAGTGGAGCACGTTTGC +TTATACCCATTGTCCGTCACAAATCCATTCTTAATTCCAAAACTTCGAAG +ACTTCCCTCCGGTCGTTTTTCGACGTCTTCGATGTTGTTTTGGTGCATTT +GTAGAGAGTATTGGTATCTGGAATATTAAGGGGAAAATGGTGAAAAAATT +CAGAAATGTACAGGTAATCGAGAAAAAAAGTTAAAACTTTTTTTGAACTA +AATTTTTTTTTCAAAATTTCAAAAAAAAAATTTTATTTGAAGAACTTTTT +TATAATGTTAATTTTTCTGAAATTACTTAACTTTTTCTCTCAAAACTTGC +TCACTGAGTTTTTAAATAGATTTCTGAAATTGTCCACGTGAAGTACACTC +CCAATATATCAAAGGAGCGCATTTGCGAACTAATTTTATTAAAAATTTCA +CAAATTTATTTTTTTTAATAACCTAAAATTTCCGCTATTTTATCGTATTT +TCTAAAATAAATAAAAAATTGTCCAGGAGGAGTACATGGCCAATTTATCA +ATAGAGCTCATATGCACACTTGTTAAAAAACAAAAAAAAAACTTATTCAC +CTTGGCGGTGGGTACCGCTTTTTCACACGACGAGGCGAGTTTTCCGAGCT +GAAAAATGATACTTTTGGCTGGGAAGTTTTAGAATTTTGGGGAAAATAAG +CATTTCCGGTGAAAAAAATTGTTTTTTATTGGAATAGTTTTTAATATTCT +AGGCCATGTCGTAAAGAATTGAGTCAAAATTTAAATTTTTGAGGTACGAG +CTTCCAGTTCCCCCCACAATTCCCCCTATGGCCTAGAATTTCAGAAAACT +CTACCATCGATTCAAACTCTGTTACTTTTGAGGATAAGGGGTTCCGGACG +CCCCAAACTTCCGCCCGTGGCCTAGAACTCCCAACTTCTTCCACCGACCT +GGACGTGGTGCTCGTACTGGCAAAATGCTCATCAACATCTGGCAGTAAAT +TGTTCAACCGTCTTGGGGACACGGTACGCGGGGTCGACGGTAGACTTCGG +CGACTGATCGGCGATGTAGGAAGACTTCTGAAAAAAAATCAATCAATAAA +ACAACGACACTCTGCTCGTGAGGATCCGCCCATTCCGCTTGTTGTTTACC +AATTTCTTTTAAAAAAATTCTACAATGCGCAAAATTAATAGAACTACCTT +CTTATAATTTTTTTTTAACTCAAAATTTTCAAAAAAACAACGACACTCCG +CTTGTAGACTCCGCCCACTTTCCAAAAAGGACAATTCGGAGTGTCGTTTG +AAAAATATCCTAAAATTTGAAAAATTCATTTTTAGTCTCAAATTTTAATG +AAAATTACGTGAAAATCTATAACAACTACGACACTTTTGGCTCCGCCCAA +AAACGTCTTTTGGCTCCGCCCGAAAACGGGGCGGAGCCTGGCACTAATAT +TACAGTTTTTATTTCCATAAGTGATTAAAAAATCAACAAAAAAATTATTT +AGAAAGTATCTCAATTATTGAATGAAATTTTATAAAATCTACGACACTCC +GCTTGGAACCCCCTCCCCCTGCCAAGAATGGGGCGGAGCCTAGAACAAAC +GGAGTTTTTGCAAGAAAAATATTGCTTGAAATTTTAAAGACATTAATAAA +AATCCGAGTTTTTAGAAAAAATTGGGCCAAATCGAATAATAAAAAATACG +TTACTCCGCTAATAAAACCAGCTCATTACCAAGAAAGGGGCGGAGCGTAT +ACAAACGGAGCGTCGTTTGAAAAAATATTTTTTGGAAAACTAAAATGTTC +CAGCGGGAGGAAGCTTCTGCAAAGGCAATCTAATTTTTTTCCGGAAATTC +AATTTCCTCAGGGTTCTCTCTGCAAATTTGTTCACGAAAAAACTCGTGGT +GCTGTGCAAATGCGCTCTATTGATAATTTCATCTAGAGAGCGCAATTACA +TCAAATTAGCAATGGAGCGTTGTTGCATACCTTCTCGTTGCAGCGGGAGA +CGCTGGTGACGGAGGTGATAGCCGTGATGAAAATTGTTGGGGTATCGCCA +GCGCAACTGGAAGGGAAACACGACGATTCGACATTTTTCTGTAAAAAAAT +CCATGATAAAACTAAAATTTAAAGAAAAAATAATGTAAATAAAAGTGGGG +AAGTGGCTAAGCGGGGACACAAATACGCGGGAAATCCGCTTATTTTTGCT +GGATTTGACGTAAAAATCCGAATCAACTGGCACTCGACGCGTTTTCCAAC +AAGAAAAAACTAAAATTTCATATATTGGTGGCCTAGAAAATTCGAAAACT +GTTCAAGCTTACGTAAAGTGTGTATTTTCCATATTAGAGAATAAGATTGG +AAGAAAAATAAAATATTGTAATTTCTAGGCCACCGCAAAACTTCTAAGTC +ACCTATGAAATTTCTAGGCTACCGTGAAAATTCTAGGCCATCATAAAAAT +AAAATTTATATATTTTTGTCCTGAAACTAAGTCCACTTCCGCCAGAAATA +ATAAAAGCAACAAAAAAGACAACACAAAAAATTGAGAACTCTTAAGAACA +AAAGAAATCCGACCCGAAATCGAATACCCCACGAAAATTTTCGGAATTTA +GAAAGAAAAACTATTTTTAGACACTTTTTTTCCTTCAAAAGGAGGAACTT +TGTTGCGCGGCCTAAAAAATAGAAAACTCGGCCACCGATTTGTTTGCGGC +CACGGGACACTCCGCAGTTGCAAAGGCTACAGATGCAAATACTGGAAAAC +TAGGTCACCAAGTGGAAACCTTGGTCATCGCGTGGAAAAAAACTTTGACA +AAAAATTTTTGAAAATTTTATAAATTTATTCTAGGCCACCACCTGTAAAC +CTAGGTCACCATGTGGAAGGCTAGATCACCAAGTGGAGCCTAGACCAGCA +CATAGGCCTCTAGGCCACCACGTAGGAAATTAGGTCACCAACTGGAAAGC +TAGGCCTCCACATGATAAATAATATTTTTTGGAATTTCGTTTTGACCAAA +GAAACTTGTTTTTACCAAAATATTTGAAAATCGTTTTCAAGGCCGTCCGG +TGAAAACTGTGTAAACCTAGGCCACCAAATAGAAAGTCAGGCTGCGACAT +CTAAACTTAGGCCACCAAGTAGAAAGCTAGGCCATCAAGTAAAAACCTAG +ACCACTATGCCAAAATAGATGCCTTCTAAAAGTTTCAGGGAAAAAATGTT +TTTCAACAAAAATTTTTAAAATCGTGGCCGAGTGGACTTTAATAAAATTT +ACTTTTTCAGCCACGTGGCAGAGGGATATCGGAGCATCGTTTGAATTTTC +TAATTTCTGAAAGAAAACCGGAACACATTGCAAAAATACGAAAAATATAA +CTGAAGCTGACATATGAAATGAGTTCTATGCTCCCCGCCGCCTATTCTTC +TCATTTTTCTGCTTATTTTTGTCGTTGGGGGGATGCTGGTATCATATTCT +TCGTCCCCTCTTCTTTCATTGCCAGTCTATTCAGTATTATAATTAGCGTG +TATTCCCTATTGGATTCTGCTCCCCGTCGTTTTTTTTTCTATGCGAAAAA +AATAGAAAATACGAGGTGAACGGGAATTTTTATGAAAACGAACGAAGAAT +TTGAGTTTTCTAGGCCACCAAATTGAAACATAGGCCAGCAAACGGATCTC +TAGTAAAAACCTAAACCACAATACGAAAAACTAGGCCACCAACGAAAAAT +GGGAGATTCCGCTTGTGTGTTCAAGCATTTTGTAAAATTCTAGATTGTTT +TAAAAACTTAATTTTTCCAGTACAAAAATCGATTTAAAAACAATAAATAT +GTCAAACTCGTGACGTTTGTCAATAGAGCGCGTTTGCATGTCAAACAAAA +CATATTCGGTGTAGATTTACGGGGACACTTTACAAAACAAAAAACCTTCA +CAAGGCTCCAAATAAGAGCTAGAAAATGGAACAAATTTCCGTTTTTTTTT +TGTTGGTGAGTGTCTTGTGAGGAGGGGACCGCCCGTGTCTCTTCCAAAAA +CATTATTAAATTAGTGTGACGTCATCACATTGAAAAACAACAAATATAGT +TCGAGTTTTGAAGAAAAATTTCGGAATTTTTCTGGTGAAAAAATTTCAAA +AATTTCAATTTTTTTTTAAATCTTAAAAAATTTTTTTATATATTTTTAAA +TTCAATGTTTGATCTGAAAACCAAATTTTCACACAAAAAATCTGGTTTTT +GACGTCTAAATTTGCTTTTTTTCACCCAATTACGAGGTTCTCCGTGGGAA +GGTGTGTCTAGACACTTGAACTTTTGCTTTTTTATTTTTTTTCTAATTGA +AAATGATTAAGCTAGATATATTTTCTCTCTTTTCTTAATCAGAATTGTTT +TTGAAGATTTTAAATTTTTACACTCCCGTGCAGCAAATGCGCTCCGATGA +GAAGTATGGAAAATTACAACTGGAGATTTTCAAAATGTTCAATTAAATTT +TGCGTTTTTCTTTTAAAGGTTCAAGAGTCTAGAAAACTACGAAAAGGTGC +TGTGACCTTATCGATGACGTCATCACAATTCTTGAAAAACTGGGAAAATG +AGACGATGTGGAAAGAGGGGATTATTTATGCTTAAATAAACTTTTTGCAC +TAATTAATTCATATAGTTGTAGTGCTTCTTTATATTTTAAGAGAAATTGC +AAATTTTGCACCCAAAAAAGTAGACGGCCGAGTTATGGGAAAACTCTTCC +ACGGACACAAGACGTGGAAAACTAAAAGTTTCCGTTGTTTTTTATACGCC +CGCATGGCACATCCCCGCCCCAACAAAATTTTCAGGTTTTCAGGGTGAAA +ACATTTTTTCAGCTGCTGGGCGGCTTCTCGTGCAGCTGGAAATGTATTTC +CAGACGGGTTGGAACCGGGATTTGTCATTCAATTGTTGCAATTACGCTCT +ACCGCTAAACCTCGTTTGGACTCCACGTGGACAACACGGTGCCTTATTGT +AAATTTCTCTTCCATCTCCATAATTTCAGAACAAACCGTCGCTTTTATTC +TCCATAATATTATAATTTTATTTAAAAAAATCTGGAAACATTTATCGATT +TTTGTGAAAATCTATGTGAGATTATTGACAGAAATACAAGAAAATCGATG +CAAACATGCTCTATTGCTAAATAGTTCGTGTACTCTACGTCGACGACACG +ATTTCAAGAGTTTTTCGTTTTTTGAACAAATCATCTGTTATTATTGCAAA +ATTCGGTACATTACTCAAAAAAAAACAATAATTGATAAACATGTAGAAAC +ATACATGCAAACACGCTCCGATGCAAATTATTGATTCTCCAACTTTTATT +GTTAAGTTATCTTGAAAAAAAAACTGATAAAACGAGAAGTCCTTGGTGGT +ATCGGTCGCCTAGAAACAGATGAAAGAATGGAAAAACAAGTATTACGGGC +GGCTCCGTTTCGTATTATGGGGGGTGGGCCGCAGATGATCCTTTAGAATA +AGAAAAATGTCTAGTATTTTGTGATACTTGTGAAATATAACCCTAAACTT +AAAAAATAAATTAGGGCTCAAAAATGGTATTTTTCTAATGTAAGCCTACA +AGCACAAAGGTATTCAATTTTTTTTCAATAGCTAAAAAATTAATTTTTAA +ATTGTGATATCATAATACTTACACAAGTACCAATTTTGTTTTAAACGATA +GAATATTCCATAACTTTTGGGTAGGATTTTCTGCACTCGAAAAAAACGGG +ATTTTCAACAAAAATAGTGGACTTTTTAAAAACAACGAAAAAGTTTTGCT +TGGGACCAAGTCTAGCAGATTACAAAAACTGATTCTTCCAGTCAAAGATT +TTATATATCACTTAACAATTGTTTTGCCTACCTAGCAAATTCCGAAGTCA +GAAAAGTTGAATTTCCCTCCAACAATTTTTTTTCATAGTCAGCCAATTTG +CATTTCCCGCGAAAAAATTTACGGATTCACCACAATGGGTCTCCCACCAA +TAAAATTAATTATTCATTTATCTTCTTAAAAGGTCACACGAGATGCCAGC +GACACAGGGCAAGTGCGACCACTATCCCGAAAAAGACACCGACAGAACCT +AACGAAGGCTTCCAAAAATAGCATAGAAATACAAGAGACACAGAATATCC +CATGATGTTCTGTCACTTTCAACATCATTCTGCCTTTTTTGTTCCCTTGT +GTATGGTGACTATAATTTGATTGTTCTGAATAATCTGATGTCTACTTGTG +TTGTGGACCACTATTGATTTTTGCTGCTTTGGTTAATTTGAGGTAATACT +TTAACAAATTTGTTTTAAATATCATAGTCATTTTACAAATTTCAACAGTT +TTCCTATTAATCCGCTAAACCGCAACTAAAAATGAAAAATTAAAACTTTT +CTCAAAACTGTATGGGAAAGTTCAGTGACGGTTATGATAATTACAACATC +GTTAATGCAAATTTAACAGGTTTCGCAAAATTATATTCAAAAAATTTTTT +AAATCGTTTATTACGGTAACAAGAAATGTTGAGAATGCGTATTACACAAC +ATATTTGACGGGCAAAATATCTCAGAGCGAAAACTACAGTAACTCTTTGA +ATGAATTCTGTAGCGTTTGTATCGATTTACGGGCTCGATTTTCGAAATTA +ATTTATATTTGTAAAGTGAAAGCTATATTCAATTTTTCTTCGATTATTTG +TTGTTTTGTTTTATTGTGTTTTTGTTTTATTTTAATATATTTGTCACTTT +TCAAATAGAAATTAATATCGAAAATCGAGCCCGTAAATGGACACAAACGC +TACAGTAGTCATTCAAAGAGTTACTGTAGTTTTCGCTCTGAGATATTTTG +CCCGTCAAATATGTTGTGCAATACGCATTCTCAACATTTCTTGTTACCGT +GATAAACGATTTAAAATTTTTTTTAAAACTATTTTTTTAAGTAGAACAAA +ATTCAGAAAACAACTCGATAAAAATGTCCTAGTCATCCGATTAAAATAAT +TTTTTGAAATAAATAATATGAAAATTAAATTTTAAATATTAAAAATGTTA +TTGTTGCTTTAAAAAACGTTATTGGACTTTTCAAGGCGGCAAAAAAATGA +AAATAAAAATCGATTATTTTTTGAATTGTTGACTTTTCGGTTTTTAAATG +TTGTTGAAAAGAAACGAATTGTATATACGAATTTCAATTAAAAAGTTTAA +AATGTTTTTTAAAATGTTTTATTTTTTTCAGATAGATCAACTGAACTTCA +TAACTTTAAAAAGCCAGTCTGGGATGTATCTATACCACTCTATCAGACTA +CTCTTAGCACTAATCACAATGTACGACCCCGTGAGAGATTGTTGTCTGTA +CTGGATACTACACGAGTGGTAGAATATAAAGAAGTGTGGTGACAAGTTTG +GGATCTGAGTCCAGTAGAAAAAAATCATTCTACCGAAACTCGAAACTATG +AGCGACTGGTCAAAACTATGCATTTGACGACGTCTACCTGTTTTAAGTTA +TTGGTGGTGAGTGTGCCTTTGAAGAAGTCGCTCGAGCCTCGACCTTCTCC +GACTGGCTTGAACTGAAAGATCCTCGGGAATCATTTTTAGAATTTAGAGG +TGGAGTACCGTCTGTGGATTTTTTTTTGCTCGAAACGATAGAATACAGCC +CCAATATTCCGAACAGGGGTGCGCGGCAATTGCCGTGTGGCAATAGAATT +TTCGGCAATTTCGGCAATTGCGGCAACTTAGGCAATTGCCAAAATTGCCG +AAAATTCACAAAACCGGCAATTGCCGAAATTGCCGATTGCCGGAAATCCG +AATTGGAAATATAAATTTGATTTTTTTTTGTAGTTTTAAGAGCTTGAACA +TGCATTTTACTAAACAATTTTCTCTTTTTAAGCTCAAAATGGTTTAATCC +TTTGAAGATTGACCGTTTTCTTTTAGAAAATTACTGACAGAATATTAAAA +AACAATGTGACATTTTTCAATTTTCGATATACATTTAGGTTTGAAAATTG +CCGAAAATGCCCTGCAATCGGAATTTCGGCGATTTCGGCAATTGCCGAAA +TTGCCGATTGCCGGAAATTTTGAAAACCGGCAATTGCCGAAATTGCCGCG +CACCCCTGATACCGAATATAACTGTAAAAAATGTATTAATTTTTTTGTTG +ATTTTTTGAAAATTTTCATAAAAGTAAAGAAAGGGCCAAATTATGTTTGA +ACTACTAGTAGTCTGTGACTTCATTTTTGGCATTTTTCCGTTTTTCAGCA +ATAATGATTGGTTTTCTTTGTTCTCTAATTTTAAACATATTTCTTCACAA +ATGTCTCAAAAATTAACAACTTCAGTTTAAAGTAATAAAACAAAAAAAAA +AGAAAAATAAAGAAAAACCAGTCAGTTTTTTCAAAAATAATTCAAAAATA +ATTCGGTCCTTATTTTTTTTTTTTTGCAAAAAAAAACAAAACAAACTCCC +CTATAAAAATTTTCCAAAAAAAAATTGGAAGGTTTTTTTTATTTCAGCCT +ATTTTTGGAAGTTGTCGAACTCGATCAAAACATTTTTTCATTGGTTTAAG +TTTTATTATGCTTGAAATATTCAAATTCCAACATACCAGGCATTGAAAAA +TCAGTTTTCGTCGCTTTTTGACTCGAAATAAAAAAAACCAAAAATTTTTG +AAAAGTTTTATTATGATAGAGTCATTCAATTATATTCCCAGTACTTTTAA +ATAATCAAACAATTTTTTAGAATGGCTAGTTTCAAAATCGGCGGCTTTGT +CGTTACATATGAAAACATGCACAACAATATGAAAAATAGGTATGTCTTCA +AAAATATTAAAAACAATATTTTTTTAATTTTACAGATGTTAGCAGTTCGA +CGAAGCAATGCAGGAACTACAGTTATACAATCAAATTCACGCTTTTTATA +TATTAATTTTTAAAATCATAAAAATTACAATTTTCATCAACGTTGATCAG +CTAGACGAATGCATTAAGAAAAGGGAAAACATAGGGCTTCCCAAAACGTC +TGCCTCGCCCGCCTTGTGGCGACCTGCGCCTGCCTCGTGCAGGCCGCGTC +TCCAGTCAGTGCAGTGCAGAAATTTTTATTTCAAAATTGTACAAAAACAT +GGAAAAATAGAGAAGGATAATTTTTTAGGCCTCGGAAATCAATTTTAAGT +CCTCTAGCTACAAAATGAACCATTTTAGAGGAGTTTCAAAATTGTGAATT +TTTACAAAAATTACCCAATTTTGCCACTTTTTAATGGTTTTTGATGGGTT +AAACCTAGATCTTCTGGATAATTCCGCATATATGAATTACCGTATTTCCT +ATATTAGTTTTGCATGCAAGACTAATTTTCAATTGGTCTGTAGGGGTGCA +AGACTAATAGAGACTGCAAGACTATTAGAGGCTGCAAGACTAATTTTCGA +ATGCTATAAAACTCCGAAACGTGACCAATTTTTGATTGTAAACTCAACTT +GATATCGTTTAAACAACAAAAAATACATCCTTTTCCAATATTTAATCAAT +TATTTGAACGCTTTTAATCAAAAACTCGAGTTCAATTTGCCCAGAAATGG +GCCAATTTATTAACGTTGCAGCATCTATGCAAGATATTGCTGGACTGGAA +AAAAGTCGGGTGCAAGACTTTTAGAGACTGCAATACTAATAGAGGCTGCA +AGACTAATTTTCGATTTGCCCGTAGGGGTGCAAGGCTAATAGGGATGCAA +GACTATTAGAGGAAATACGGTACCTGTTTTTGACAAAATTAGACAACTTT +TTATTTTTGCTCAATTTTTTTTCAGCCATCTAATTACTGTCCTTTTTTTT +TGGACAATAAAAATAAATTTTCTAAAAGCGTTTGAAACCACTATATTTTG +CAAAAGGACAATTTTTTAGGGATTGGAGATCAATTTTATGTCCTCTAGCT +ACAAAATAAACAATTTTAGAGGAGTGTCAAAATTGTGAATTTTTACAAAA +ATTGCCCAATTTCGCCACTTTTTATTTTGGTGGGTTATACCTAGATTTTT +TAAATTCAGCATATATGAATTACCCGTTTTCAACAAAATTAGACAACTTT +TGATTTTTGCCCAATTTTTTTTTCAGCCATCTAATGACTGTTCTTTTTTT +GGGCAAAAAAAATAAATTTTTCTGCAAACGTACAAAACCATTAGAAATTG +AAAAAAGGCAATTTTTTAGGGTTCCGATATCAATTTTGAGTCCTCTAACT +TCAAAAATGACTATTTTAGAAGAGTTTCAAAATTGTGATTTTTCTGCCAT +AAATTGCCCATTTTTGCCACTTTTAATGGTTTTTGGTGGGGTATACCTAG +ATTTTCTGAATTCTGCATATATGAATTACCGTTTTCGACAAATTTAGACA +ACTTTTTATTTAAAAAATTAAAAAGGATTAAAGGACGATCCGTTCTTCAA +GTGCTATGCACTGCGGATCTGGGATTCAGGTACACTGCCTGGTGGTGATC +CCTCTGGGCTGTAATTTAAGCCACGTCCTAGCCGAGGACTGTGGCCGATA +ATCCAGTCGTGGATTGCTCCACTTCCCAATAGAGGCAGGGTGAACCTAGG +GGGTGAGGCCGGACTTTTATCTCGTGACCTCCAGACTGCTAGCGGCCACC +ACTACCGACTGAGCTATCTGCCCCCCTTGGGGAAAAAAATAATTTTTTTT +TTTTTTGGGCAAAAAAATAATTTTTTGGAAATTGTACAAACCAAATATTA +TATATTAAAAAATATTATCCAAATATTCCAAATATTCCAAATATTCCAAA +TATTCCAAATTCCAAATATTCCAAATATTCCAAATTTATTCCAAATATTA +TGAAAAAGGCCATTTTGTTAGGGCTGGGAAGTTTGAATTTTTACAAATAT +TGCCCAATTTTGCCGCTTTTTAATGGTTTTTGATGGGTTAAACCTAGATT +TTCTGAATTCTGCATGTATAACTTACTTGTTTTCAACTAATTCAGACAAC +TTTTTATTGTTTTTTTTTCAAATTTTTTCAGCCAATTAATGACTGTCCTT +TTTTGGGCAATTTTTTTTTTGAAACAAAAAAAAATTAAATTAAATTGCCC +AATTTTGCCACTTTTTAATAGTTTTTGATGGGTCCCGGAAACCTAGGAAA +CCTAGTCCCGGAAAATCGAAAAATCGAAAAAAAATTTTTTGAAAATTTAC +CAATGAAAAAAAAAAGACTCAATTTTCTTCCAAAATAAAAAAGTGGGCAA +AATTTTAAATATTTTTCGAAAAAAAAACATAACTTTTCAAAAAATTTTTC +GATTTTCCAACAAAAAAGTGAAATAAGAAGACATGCAGGTTACTGTAGCT +GATAAAGCTTCAAGCGTACCAGGACCCGAAAACCGCCGCCCGCGTAAATC +GACATATTTAATGTACCATCAATTTGATCCTCCTCTTCCTCCTTCTTTCA +ATGACACCACACAACTTCCTGTTATGAAGGTCTCAATTCCCCTTCATAAC +ATTCTTTGTAATTAATGCACCCTAAACAAACTGTGCAAACACACTTAGAC +ACCCCCCTCCTACACGGTCTTTGGAGTTCCCCCGCTCAACTCTAATGGAC +CTGGTAATCCCTTGCGCGGTGGGATTTTTCGGGTCTCGCCGCGATTTTTA +TCCGAATTCCGGAATACCGGAAATGGCGTGGAAATCGATATTGCAACAAA +GAAAACTGTTTTATTTTCAAAAATCCGCTCACTTTTTTTAAAAGTGATCT +TTCAATCATTTTTAAAGTTTTTTTTTTTGAAAGCACGCGTTTCTGGCTTC +CCACATATTTTGGAATGTAAGAGATTGCCGAGTTAGGCCATTTGGACTCG +GCCACGGCCGGGGTCGATTCACGTAGCGGCTCGGTTTCTGTAGAAAACTA +AATGTATTTGTCCGTATGGAGTACACAAGCTTTTCCAGGCGTTATCCGGC +AGGCGATAGAAGCCGCGACGCAACACGCAACGCGCCGTAAGTCTACCCCG +GCCGTGGCTGAGTTATGATGGCCTAACTCGGCAAACTCTTGCATTTGAAA +ATATGAGCGAAGCCAGAAGCAGGTGAACATGGATTTCTGGCTTCCCTCAT +AAATTGAAAAGGGAGAGTTTACCGAACTAGGCCATTTTGGCTCGGCCATA +TCTGGGGCAGATTTACGGCGCGTTGCGTGTCGCGTCGCGGCTCGTTTTTA +GTTGTAAAACTGAATGTATTTGTCCGTGTGGAGCACACGACTTCCCCACG +CGTTGTCCGCTAGGCAATTGTTAATGGAGCGCGAAAAATTCAATGAGGAA +GGCCAGAACCCCGTGTGTGATTTTTGCAACTACAGCCCAAAATTTGAACG +GGATTAATACAAATTTAATTTGAAATTTGGATTTCGCAATTCAAAATTTG +CGAAATTAAAATTTCGAAAACCCGAATCTCATCGAAATTAAATTCCAAAG +TTTTCATTCAAATTTCAATGAGGTCTCTATTGAGCCGAATTCCCGCGCAC +GGGAGGTAGTGGACCCCTTTTGTATGTTTAGGGGTACAAAGACCCCCTAC +CACCCCTTGTATATGCACAATAGAGAACACTCAAGACCCGCCCACCCTAG +GCCCCTCATCCAAAACATATCCGAATATACCCTCTACTGTACTTTATTTA +CTTGCTCCGTTTACCCGGGTACCAGGGTACTCCCTAAACTAACTGAATTT +CCAAGGTGGTGGCCTAGAAAAAGCTCTTCCACTGGTTTTTTTGTGGAAAG +AAATGATGAAGGTTCTTGTTGTTTTATTGCTTATGGTAGGTTTTTTTTGC +CACGTGTCCGGAATTTTTAAAACTATTTTTCTATTTTTTAAAGGCCCAAA +ACTTTGTTTTTTTTAATGTTATCGTTTTTTCGTGAAAAAAATTATTTCAG +AAAACGGGGGTTTTAGGTTTAGGCTAAAGCGTAGGCTTAGGTTTAGGCCT +AGGCTTAGGCTTAGGCTAAGGCTTAGGCTTAGGCGTAGGCTTAGGCATAG +GCGTAGGCTTTAGGTTTAGGCTAAAGCGTAGGCTTAGGTTTAGGCCTAGG +CTTAGGCTTAGGCTAAGGCTTAGGCTTAGGCGTAGGCTTAGGCGTAGGCG +TAGGCTTAGGCCTAGGTGTAGACTTCGGCTCAAACATAAGCTCAGGGGCA +AAAAAAGGAGAAACTCCCACAAATTCCTAAAAATCCTGAAAATTTTTTTA +AATCTTAAAATAGTCAATTTTCAGTGGAAATCACAGTGAGCATTTTGAAA +ATCCAATAATTTTCAGGTAAATTCGACATTTTCGTTCGGATTGGACAATG +AAATAATCGGTGAACCAAAGTTACAATGCAATCCGGAAAGTATTACATTC +TCCTTCAACACACGGAATCCGTTTATGTTAGTCTTGGGGTCTCGACACGA +ATTTTTCCCGCAAAATTAGTTTTCCACGCGTGAGTACACCGCAAGTAGAG +CGCTTATCAATAAATTATAAATTATCAATAGAGCGCACTTGCCCCCGGTG +AATTTCAAAATTTTCAGAGGAAACGTATATATTCGTGGCTTCTACCGAAC +ACCCGGCTGTCGTCGGCAATTCCTCGCACCGAATCAGGCTGGCGGCTCAT +TTACGGTACGGCTCGGCGATTGCGGGATGCGGCGGTCCCGACAAATCAGT +GGACATTCGCCACGTGGCGTCAATCAGCACATCACTTTTGTTGCGAACTT +TCATCCGAATCTCGCGACGAAAGAGGAACGATCATTCAATATTCGATGCT +TTTATGCGCACAGTGAATCTGTTGTCAAAGCTGATCTGGATGTCAGGTAG +GGATGCAAATCGATAAATTCCCCGAAAATCGATACATTTTCCAAAAATCA +ATAAATACCCAAAAAATCAAACAATTCCCAGGAAACTGGAAGAAAATTCT +CAGAAAATCAATAAATGTATTTGTTTTGCAGCTCAATGCCGGAGGAATCG +TTCGAACAGGGTGTGACAATCGTGCCCCAATGCACATATTCCCTTCGTGA +AGGGACATTTGAGGGCCCAAAAGTGTCGAATACACGTGTCGGAATGACAA +TTGTTCATCGATGGGACTGTGATACATCGGGGAACTATGGAATACTGCTA +CGTGGATGCACCATACTCGATTCACGTGGTGTTGAATCTTTTCCATTGCT +TGATGAGAATGGGTAAGAGTAGGGAAGTGTCGGCCGCTAGAAAAGAAAAA +TATATCACTAGAGCACGTTTTCTAAGCATTTTTCTCAATAGAGCGCGCTT +GCTCAATTCCCTAAAATAAATTATGGTGCATCGAAACGTGTTTACGTACT +GATAGAGCTGTAGTACTCGAGGAAGCTGAAAAAAATAATGAAGAAAGTGA +AAAAAATCAAAAGTGTGAACACAGCAGCCTTGTCAGATAAATTTCAAAAA +TCTGTGCATTTGGTCGTAAGTTATGGCGCATCAAAAGTTTTTTGAAATTC +TCAATAGAGCACATTTCAAAAAAGAGCGATTTTCGCAAAGCCCCTTTCTT +CGTTGTTTGATTTAAAAACAATGCAATTTCGTTTATTTCGCAAATTTTTG +AAAAATGAGTTAAATTTGCAACAGAGCGCGATTGCACACACACACACACA +CACATCAAACGTCAGAGACCCCACTGATTTTGGAGATTAAATTACGGTAG +ATAGAGAATCACAAGGAAAACATCACGAACTTTGTTCCGTCAATTCCCGT +GGTGCTTGTTATCATCAGAATGCAAAATTTGAAAGTCTGAAATTAAGCCG +TTTCCGCTAAAACGTAGCGAATCTCATTCCACCGAGTCTATTTTCGTGAG +GTCTCCAATTTTTCAAGTCTCTGCTCGTTATGGTGCATCGAAGGGTAATT +CAAAATTTCAAATAATTTTTAGACACTTTTCGACCAGTTTCAAATTTTTT +TTATGGTGCATCGAAAGAATTTTGCGTAATTTTCAATAGAGCGCGATTAC +TACTCACCTCTAAATTTCTGCAAATTCTTTTTCCAGATGCTCAGTGTCCC +GTGACTTCCCACAAGTGGTATATCTCCCATCACTGACGTCAGCCTACATG +GCAATCGAAGCCATCTCATTTCCTGACCAACCATCGATTTCTTTCTCGTG +TCAAATTAAGTTGTGTGATAAGGGAAGTGATGAGTGTCGGGGTATGAGTC +CCCCCGCGTGTACCCCATTGACACAGGTCCCCATCACGGGACAAGTACCA +ATGCCATTTGATAATACAATTGGAAATACGTTTGGTGAGTTATTTACGGC +CGAGAAAACCTGCAAAAATAAAAGTTCAGCCACCGAACTTTTTGACATTT +TTGCGCAACTGTATGCCGCACAAAACTTTACGTCAGAAGTGGGCGTGTCC +GCCTGCAGGCCCCGATGTTTTAGATGTAAAAAATTAGATGTTTTAGATGT +AAAAAATTGAAATTTGAATACAAAAAAAAATTCGGCCACCGATTTTTTTT +GCTTTCACGTTTTGTCAATGCATTTCCAATTTTTTTCGTGAAACGTTTTT +GTCAGAATTGGGCGTGTCTTGTAAAGTCCCGCCCAATTTATGGGCGTGGC +TTGTTTCGACCAAAAATACTGTTTTTTTAACTCACAAAAACAGAGCTATA +AACTTTTTGAAAAAAACATCCCTGTCCTCTCCTTTCTCCATTCTAAAATT +TGAATTAGATGGCCGTGAAAAACCCCGGCCGCGGTCTTCAATTACAGTGA +AGAATTTGGCGGCAGAGCGCGTTTTCACAGTCTTTTTTTGTCGTCACTGA +GCAAACTTCAACTGCTTCCAGCCAGAAATTTACAAGCAAATTCAAATTTT +CAGACGGAATCCCACTGGAGCCGTGGATGAAAGAGCCCTCTCCTCCCACT +GATGACGTGGCAAACATCACCTCAGAAGGCGAGCCAATGCCTCGGCTCAT +CACCGAAGAGGAACAATATCAAATCGAGTCGAACCATGTTGAGGCGCGAG +AAAAACGATTTGCTCATCGATTATTCAATATCACATCGGAAGATCTCTAT +GTTGAGCCAACCGTCGAGCCAATGGAAGTAGAGATGCCTGGAGCACCGAG +AGAAACTGCTCGGAGGGTGTCTGAGCCATGTGTCTCGGTTGAAACTTTCT +ATATTTCGGCGCTTGCTGTACTTTTCGTTTTTGTCGTTTCGATTGGGATG +GTCTGTTTCTTCGGAAGTCATATGCTTAAAAAGTTGGTTTTTTATTGATT +TTCCTCCCACAGGGTTCTGACCTTCCTCATTGATTTTTTTGCGCTCCATT +GACAATCGCTTGTCGGAAAACGCATGGGAAAGTCGTGTACACGGACAAAT +ACATTTGGTTTTACAACTAAAATCGAGCCGCGCCGCAACACGCAACGCGC +CGTAAACCTACCCCAGCTATGGCCGAGTCAAAATGGCCTAGTTCGGCAAG +CTCTTTCATTTCAAAATATGAGGGAAGCCAGAAATCCATTTCCCCAAATT +GCAAGATATTTGTGGCGTGGCCGAAGTTTTCTAGTTTCTCGGCCACGTTG +TCAAAATTATTTTAAAAAATCGCTGGAAACACTAAATTTAGATATCTCCT +GTAGATTGTGATTCGCGGAAAAATTCCATGAATCTCGTTCTCCCAGTTTT +TTTTTTGTAAATTCAACAAATCAAATGAGCGCCAAAACCTCACAAATCTC +ATACTCCCAAAATTTGCGTTCCTCTTTTTCTTGAGGCCTCCTCTTTTCAC +TGTAAAATTTAAAAAACTCGTAACTTTTTCTTCAGAGCGCGATTGTACGC +ACAGAAAGTCTTTAAAGTCTGAAAATATTTTAAAGTTCCCCAAAATCCCA +CAAGTCTCATTCCACCGAGAGGTCATTTTCCGTGAGGTCTCTTTCTTGGG +TCTCGACGCGAAAATACTGTAATTTTATTAATTTTTTCAGAACTCAAGAC +TGTGAAATGCCGATTCCAGTCCCAGAAGGATATTATCTAAGCAAACACTG +AAACTCTTGTTTAAAAGTATTCGTTTCCCATCCACGTGCCATAATTCTTG +CATTTTCTTTTTTTTTCCCTATTTTTATTCAATTTATTTACAATTTGTTC +AATAAATAATCAATAAATAATAATTAAAAATAACATTAAAAGTCACATTT +TCCGAAGGAACCACGCGTGGAATGTAACGGTGCCGTTGTCGAATGGTTCC +GGCTGAAAATTGATTAGTTATCATTGGAGCGCAATTGCATTTTTCTCGAA +AAATCGATGTTTTCGTGTGATGCAGAATTATCGATTTTTTTTAACTTGGT +GGCCGAGAAAATGGAAAACTCAGTCATCGACTTTTATTTACCTGGAACGA +AAAAATTTAAAGTATGATCACGCGCTAAAGCCTCAATTATGCAAAATGCT +TTAAAAACGGGTTTTGGAATTTTCTAGACCACTGGTGACGTCACTCCTTT +TCCATTTATTATTCTAACTGTTTTCTAGGCCACTGATGTCGATATTTTTC +CACTGCTGGGTTTCTAGATCGCCTGCGACGTCATTTCTCTTCCATACCCT +GCTCTAGCGGGTTTCTAGGCCACTGGAGAAATTATTTTTCTTCCAAAAAA +AAATCTAGGCCATTAATGACGTCATCATTTTTTCTTTTCTTTAGGATTAC +CTCAGGTTCCTCATATTTCAGATCCATCCTCGCCAATCGTTCATTCTCAT +GAACTTTTTCCACTTTTTCACTGCTCGTACTCTTCTCATCTACTGTATCC +GCCACCACAACGTCCGGAGCAACAACTTGCTCATCAATATGCTGATATTC +GGGCTCCTGGCGGTCCCAACGTGGTGTCTCTGGTGCAGTTTCATATTGAT +TTTGCTGCTTCTCCAGGAGGTTCTGTATTGAACTGATCGCTGAGGAGGAT +CCAACGATCGGAGGCATCGGAGGCGGCAGAGGAGCCATAGATGGAGTCTG +AGAAGCCATCATTTGGAGTTGGGGGTAATTCCTTTGTTGGGACATCTGTG +GGGACATTTGTGGGGACATTTGAGGGGACGGGGACATTTGGAATACCATC +GGAGGCATTGGAGGTGGTGGGGGTGCCGGAGCCATTCGTATCATTGGAGC +CATCACGTAAGATGGTGGAGATGGTGGGAGACTGGAATTTTTTTTTTGAA +TTTTTTTTCATAACACGGGCTTCTGGCCTTCCTCATTGCACTTTTCGCGC +TCCGATGATAATCGCCTGCCGGACGACGCGTGGAAAAGTGGTGTACTCCC +CACGGACAAATACATTTAGTTTTCAACTGAAAACCGAGCCGCGACGCGAC +ACGCTACGCTCCGTAAATGGCCTAATTTCTTTTTTTTAATCTTGAGTTAA +AGTTTAATTTTAATTATTTCTACAGAATTGAATGGCTGGGAATGTGCCAC +GTGGCCGAGAATTCTCCTGCGTGGCCTAGAAAATTCAAAAAAATATTTTT +TGGTGGTTTTGATTACAAAATAAAGGATTTATACGGATAAAAACCTATTT +TAAAAGCCGTGAAAAAAATCATATCAACCACTGCCTAAAAGATTGAAATT +TTTATGATGGCCGAGAATTCTCAAGTGTGGCCTAGGAAAAATCAAGGCTT +TTGAAAATTTGCTCATTTCGAAAATTAAAAAAAACAACTCAATTGATAGT +AAATTTTAAAATTAAAAATTATTTTTCTGGTTTTTTTTTCTCAATTTTTG +CAAAATCAAAATTATAATCAATTTTTAATCCAGAATTCGTCAAGTGGCCT +AGAATTCTACTGCGTGGCCTAGAAAATTCCACCAAAATTTAAATTTTCCA +AATTGTAAAAATGCTCCAAAAAGTTGAATGGTGGCCGAGGTTTTTCGTTT +TTCACTTTTCACAGCTACGGAAAAAAACTCGGCCACCAATAGTTTTTCCT +AGGCCATGTTTCAAATGCCCTATTTCTATGTTAATTACAGTATTTTCTAG +ACAAGATTTTAAAAAAACTCACGAATAGCACATGGAGCTCTGACACTTGC +TCCCACCAATACAAAATTCCGAATTTTTTACAAAAAACTGCAAAACTATA +AGAATTTGTGGTAGAAATTGGGGCATTTACGTAATAATAATAATAGAAAA +ATGTCCGCAAATTTTTTTTCATAAATTTCATAGGGAGGAGGTAAGCTTCA +GCTAGAAGCCATTCAATTTTATTGGGGCGCCGAAAATCTTCGGAACACTT +TTACAATTTATACAGTAGTGTACAACTTTTACAACTTAATAATATCTTCA +ATACTTCCAGAATATTCTTCTTTTTCCTCCTCAATTGCCACGTCATCATC +TTGTAATCTGGTCCATTTTGCTGGCGAGGCGTCGGCAAACAGGCAGAATA +TAGTATTTACGAAGAGAAGTAAGCCCGCATGCGCCAGGAAACAATTACGC +CAGTCTGTGAATCTGAAAAAAATAATCTTGTCTTTTAGGGGACCTACGCC +TGCCTGCCTACGTCTGCCTTGCAGGGGACCTTTGCCTACCTACTTGACTA +CCTTCAAGTGAGACCTATGCTGGTTTGCCTGCATTTTTGTCAGTGCCTTC +TAGGCCTATGCCTACCTTACAAACTATGTAGGTCAACTGCCTGCTTTCAA +GGTGGCCTAGGTCAGCCTACATGCTATCTACGTCTACCCCAATGTACCCT +GTGCCTATCTACCTGCTTTTAATGCTATCTGCCTTTTCGATAGTCTTTTA +GGGAATCTACGCCGGTCCAGCTCACATAGGTTATCTATGCCTATGCCTTC +CTGCCTGCCTCCAAAGTGACCTACGCTTACCGTGTTTTTTTTTCTAGACC +GTATATACCTACATGCCTGCCTTTAAGTTTACCTATGCCTGCCTGCATTC +CTCATGCCTGCGTGCCTACATGAATACCCTAAAAACTCACGTATCATCAG +TAACAATCATATTGACGAAAAACGGTTCGATGAATATCGTGCAAGCCCAG +ATGTTCATGATCTGGAAGAGAAATCGTTGGCCTAAAAATTTTCTCGGCCA +CCCCCTTACCTGCTGCATTGCGAAATAGGCGTACTGCTGGGAGTGCAACG +TGGCACACTTGTTGAATCCCGCCGTATTGCACCCCATCACACTTTCGGCG +CAAATCTGGAATGATGGAAGAGAATAGGAAAACTAGGCCATGGATGTATG +TGCGCTATGTTGATAATCGGTTATTTTCAACGTGGATTATCAATTGAGCG +CACTTGTCCCACTGGAAGAGTTTTCCTATCTAGGCCACCACCACCACCCA +CTCACCATTATAATAATAGCCAGCAAGTGTTTTTCCTGTGGAATAAATGC +TAGAACGATGAAAAAAACGGCGGCCACGGCCAGAGCGATTGTGTTGAAAA +TTTTAGTTTTCAGAGTTTCTGAGATGAATCTGGAATATTGATTTTTGGTG +GCCGTAGAGAGGAAAAACTCGGCCACTGATTTTTTTCACAATTATAAAAT +TATTTTTCCAGGAAAAACCCCCGAAAAACTCACTTAATATAATCACTAAT +AATTCCCGACGATAATTTCGTAAAAAATTGAAAAATAATGGGTAAGGCGG +CCAAAAAACCGGACGAAAGTACGTCATAGTTCAAATAATTTTTCATATAT +TGCGGATTGAATTGGGATACCAATTGAACTGTTATCAAATCTCCAAACGA +TGCGATCCAACAAGCCCACAGGCTTAAACTTGTCAGAATATGACGGAATG +GAAGATTTTTTGGCCGGGATACCTGCAAAACGAAAAAGTAACGGGCTTCG +AATTTCCCCCCCCATGTTTTCAAATGGAAGAGGTGGCCGTGAATAAAGCT +AACCTCGACCGGGGGTCGCGACTCGCGACAAACGGGAAAACTCAAAGAAA +TTTCGTACTGTCGCTACTTTTTTTGAGTTTTCCTGTTTGTCGCAGGTCGC +GACCCCCGGTCGAGGTTCGCTTTTTTCACGGCCACCTCTTCCATTTGAAA +ACATGGGGGAAATTCGAAGCCCGTGAAAAGTTAGGCCATGCAATGCGATG +TGGTGGCCGAGGTTTTGAAAACTCGGTTACTCTCATGTAAAAAATCTTGA +AGTTGAAGCAAATTTGCGTACATGAAACCCATTTACCTTGCTAGCAGTTT +TAGAAAAATTTGGGATTTTCTCATACTAATGAGGGAACGTCTTAATTGTC +GACAAATTGCGTACCCCGAGGAACCCACTAATTCAGTCCTAGGTTTAGGC +ATAGGCTAAGGCATAGGCTTAGGCGTAGGCCTAGGCTTAGGCTTAGGCTT +ACACGTGGTGTCAGAGTGCCTCATTTCGGCTTGATCTACGTAGATCTACA +AAAAATGCGGGGGAAGAGCCGCAGAGTTTTCTACTGATTTCGCATGGTTA +AGAGTGTGCTGACGTCACATTTTTTGGGGCAAAAATTCCCGCATTTTTGG +TAGATCAAACCGTAATAGCACAGCCAGGCACCACGTGCGCTTAGGCTTGG +GCTTTCCGTAGTCCTTCGGGCGCGTTTTTCTCAAAAATGATACGTGGTAC +AATTTCAGGTTTTTTTCCGCTCCAAAATCTAGTTATAATTTGTGTCCGGT +AGAGCGCGTTTGCACTCACTTTCTTTCCCGATTTCCCTTCTCGAATCCTC +TCCAATTCCTTCTCGCCAACCCAACTGTGCTCACTCGGCGAGCACTTGAA +AAATAGGAAAAACAGCGAAAAAGTGACCAACGATACACCGGCGTGCACAT +AGTAAACCGACGTCCATCCACCAGACGAAGAGCATAACCATCCCGAAACT +GGCATTGTAAATATATTTGAAAGTTGTGTATTTCCGGTTAATAGGCCAAT +GAATACTCCGTGCTCGTATGGGGTCGCCCAATTTGCTGTAATTGAGCCAG +CGAGGGGAAATGTGGCTGAAAATGCCATTCCTTGACAAATACGGGCGGCC +ACAAACCATCCGTAGCCAATGGTACTTGCCAACGGGATAAGGGCTGTTGA +AACTGGAAAAATTGAGTGCAATACATCATATTTGACGCGCAATTTTAAAA +TTTAAAAAATGGAAACGCGCTCTAATGATGCTTGGTGTACTCCTCTCGGA +CAATAAATGTCAATAATTTTTGAAAAATCGATAATTTTTTTTTAATTTCA +GAAAAATTCGTGAAATCTGTTTTATTTTGCGCGTAAAATATGATTCTTAA +ATACTTAATTCAGTATTTTACGCACGTGGTATCAGAGTGTCTCATTTAGG +CTTGATCTACGTAGATCTACAAAAAATGCGGGAGAAGAGACGCAGAGTTC +TCAACTGATTTCGTATGGTTAAGAACGTGCTGACGTCACATATTTTCGGG +CAAATAATTCCCGCATTTTTTGTAGATCAAACCGTGATGGGACAGCCTGA +CACCACGTGCTTACGCGGAATTTTTCACAATTTTTTGTGCTTCATGCACC +ATGTTTTTGTTTCGAAAAATCAGATTATTGTTTTGCGCGTCAAATTCAAC +GCCCACTGTGCAATACACCATATTTGACGATCAATTTCAAAAATTTTGTA +TGCTTTCGATGCATGCTTTTTGGGGTACTGTAGCTTTGAAAATACGCATG +CACTGGATCTGACGACAAAATGCACAGTTTTTATAAAATTAGTCTATAGA +AATTAGTCAGTTTTCTCGAAACAAATAAAAACTACAGAAAACTCGGCCAC +CGATATTTTTTTTTTTCGCGGCCACGATTGACGTCATACCTCCCGAAATA +ATTCCGGCGACAATGAAGACAGGTTTATGCGGCAAATGTTGAAAAGAAAT +GGAAAATGGAATCGAGGCGGCAAGACTTCCAATAGCCACACACGATTGAA +GAAAATTCGACTCTGATTTGGTATAGTTCACTGAAAAAATCAACTATTTT +GGCGGCCGCCAATGGAAAAACTCGGCCACCGAATTAAAAAAAAAACCTTT +TGAAGTTGAAGTACTATTATCATCTTGCATACAAATTTTTGTAAAATTAT +AAGTATTCATATTTGAAAGAATCGAGGTCAAACACAGTGTGGCACATATT +AAAATCTGCAAAAAAATCGTGAAAATTCAATTTTCCGGGGGTTTTGAGTT +CAAAAAGTACCAGAAATCGGACAGGAACCGGCGAGTTGACCATGGTGCAT +CAGAAACCGGAATTTTAAATAAGTAAAAAAATTTATGATGCTCGTGGTGA +TAACGAAAAGTGCAGTGATCAATTTAAGGGAAATACTGCTGGGGGATTAA +ACGTTGACACTGTCAACTTGATAGTGTTAATTTGATCAATGCTTTGTACT +TGTTTTTTGCAGGAAAAATTCTTTGAAAATTGCAAGACTATAGTATAGTC +TGAAAATTTGATGAAATCTAAAACTTTAATAAATACCGTTAATTATCATT +ATTTTTATGAAATTTTAATATAATCTACTTAAAATTTAATTTTCTAGGCA +TCTGTGAGCTTAAATTTTCGATTATCCCTCGCCAAATTAAAGATTTGCAG +ACTTTTAAGCTTATAAGCTTATTTCAGTGAGAGAGAGCATTGTGAGAAGA +GAAACGCAGACGAGGCTGGCGCGTTTCTCGTGCTCTGACGCGAGTCTCGA +TTTGCTTGCGCATTTCTCGTGCGCGCGACGTTCATTTTATTTTTCCGATT +TTTTTTCAGTTTTTCTGAACTTTTCTTCTTGAATTTGTGATTTTTTATTT +ATTTTTCACGTCTTGTTGCAAAATAACTGATTTTTCACACAAAATATTCG +AATTTTCAGGTCGGCAACATGGGATTCTTGTGGAAAACTGCAAAACTCGG +AATTAAAGTTGGACTCGTCGCCGGCGCCGTCAAACTGTCAATCGATAACG +ATATTTGGTCGACTAACAATGTTAAAGTATTAGTTTTCCCCCGTTTTTCC +ACGAAAATTTTCATTTTCAGGGCAGCGAACTGTATCAAAAGCTTAAAAAG +TACATTCTTCCCGGCACCGTCGTCTTTCCAGAGCAGGTTTTATCGATTTT +TTCTCTGAAAATGCAATTTTTTTCTTATTTTCAGCTCCCAACAGTCGAAG +ATGTGCAGCTGAAGGCCGGTGGAAAGTGGAACAGTGCTGTTGACTCGGTA +GGCTGCATTTTTGAGAGGAAACACGGAAAAATTGCTGCGAAAGTTGGAAA +AAAGTCCTGGAAAATGGAGATTTGGCGCCGAACGTTCAAGAAATTCCATA +ATTTCAAAAATCTATCTGAAAAATTGAAAAATATAAATTTTCAGTCATTT +TTAGTCAAATTTCAGAGCTAAACGTCGAAAAAAGTGCCTAAAACTAGGAA +AAATATGACAAAAAGACACAAATTTTGGAGAAAATTGCTGAAAATAATGC +AAAACACTTTTTTTCACAAAAAGCGAAATTCAAAAAAAATTCCACGTCAA +AAATTAACCAAATGCAAGTACGCTCCATCGCACTTTTTTCCAATTTCTGA +CTCGAAAAGTCTTAAACTTCTGGTTTTGTAGGAAAGATAATGCAAAACGC +AATTTTTAGTCCAAAATTCAGAGAAAATTCTGAAATTGTTTTTAAAAAAA +ATGCAAGCACACTCCATGGCACTTTTTTTTCAAAATTTCGCGTAGAAAAT +TACAGACATTAGCAAAATTCCGACGATTTCGCTGCGAGACCAGAAATATT +GTAGGAAATTCAAACATTATCCGAAAATAATGCAAACGCGCTCTATCGCA +CTTTTCTCCAAGTTCCCACTCAAAAAGCATAGAAAATACCTGAAATTGCC +GTTTTTTTTCACCGGAATTTCAAATTTACGCGATTTCGCTGCGAGACCCA +AGCTAGTGCAGTGGAGCGCATTTCCAACAATTTCAAACTCAATTTTTTTC +AGGTATTCACCACAATCGAGAACGTCCCATCAAGCGTGAACACAGTGGCC +AATCGACTCATCAATAATAAATAAATTCCCCATAAAAATTATCGATTTAT +CGATTTTCCCCCCGAATCCTCGTGTATTCCTTCGTTAGAATGTACTTTTT +CCCTCCTCATTTTCTAGAAGTATTTGTGTGTGTGCTCTATGAAAAAATTT +GCAATTTTTGACTTTAAACGGACGGTGCATTATATTATTTTATCAGTTCC +CATCGTTGATCAATTTTCTCCACTGATTATCAATTCACTGATAAGCTCAC +ACTGCCTGCCCAACTCAATTCATCGCAACTTCTTCTCCCTGCTTTTTTTT +GCCATGAATTCGACGGTGGCTTCCCGTTTCTCATGGGGACTCATTCTGGC +GTCGATCGATTTTTTGGCGTGCCTACTGTTCGCCTGCCTGCATGATGGAA +CTTTCAAATTCGCGAATTTTACTTCGCAATTTGGCGATTTTTCGTTTTTT +ACGAGGTTCGATTGATTGGTGGCCGAGTTAATTTTAGGCCACGAAAAATG +AAAATTGAAGCCGAGTGTCATTTTCGCTGGAATTCTAATTTTCAGTGTGT +AAAAATATAGAAAACTGCAAAAAATTTTGAATTCCCGCCAAAAACTAACG +TTACGAGACCCAAGTACAGTGGAGCGCAGTTGCAAAAAAAAATTTTTAAT +GATTTTTTGCAACTACGCTCCACAGTCCTTGGGTCTCACAACGTTGAAAC +TAAAATTTTGGAATTTCTCCCATAACTTCTTATAATAACGATATTCGGTG +GTCTCGACACGACATATTTTCATTAAATAAAACGGGTGTGCGCCTTTAAA +GAGTACTGCAGTTTCAAACTTTAGTTGCTAAGCGTATTTTCAACAAAGCA +ACGAAAATTTGAACTTACAGTACTCTTTAAAGGCGCACGCCCGTTTTTAT +TAAAACAAAAACTTGTCGTGTCGAGACCGGGCACCGTAGTTTTGACGCTA +AAAATAAGATTTATGAAAAAGCGGGTTTCCAGAAAATTTCACTTTTCAAA +CTTTGATGAAAATTTGAAGTTTCCAGCCAAAATCGCTGCGAGACAGTGGA +ACGAGTGCAGTAGAGCGAAATTGCTGTAAAATTTTGAAAAAAAAGTGAGA +AATTTAAAAACACGGATTTCTGGCCTCCCCCGTAAATTGAAATGGAAGAG +TTTGCCGAACTAGGCCACTTTGGCTCGCCCATATCTGGGGTAGATTTACG +GCGCGTTGCGTGTCGCGACGCGATTTTAGTTGGAAAACTAAATGTTTTTG +CCCGTGTGGAGTACACGACTTTCCCACGTGTTGTCCTCTAGGTGATTGTC +AATGGAGCGCAAAAAATCCAATGAGGAAGGCCAGAACCCCGTGAAAAAGT +ACGCGTTTTTGAAGCAATTCATCGAAAGAAGACAGTAAAAAAAAAGTCAA +AAATTCAAATTTCCAATAGAAAAACCACCAAACCCAACAATTTTTCCAGC +ACAATCGACCTATTCTTGCTGCAATTTTTCCGTTTTGCACTATGGATGGT +GCCGGCGGCAATCCACGTGGCCAACAAGGCGGACACGTTGACAATGTGGA +AAGAGGTCGGTGGCCGCGTGGCCTAGAAATCCCGAAAATTTCAAATTTTC +AGCCAATTTTCTGCTCGGCACTGCTCATCTGCGCGGCGAGCCCCACAAAA +TTGCTCCTCCTCACAGAAAAGCTGAAGCCCGATGAGTTCCTCACTTTCGG +CGACACTGCCTTCCTTGTCTGGAATTTCATTTCCGCGATCATTTTGAACT +CCTCGTGGACAAGATACTTCTCGCGAACTCCGTCTTCATATATAATTCTG +GAGGATGAGGATGTAAGTGCGCTCCGCTGAGAAAATGTTTTGATCACCCC +CACGATTTTCAGCTCGAGGTGGCTCCAAAACAAACGTTTGAGCTTATTTT +CCGCCTGCTCCAATACTGCAAACGTGAATGGCTCTGGCATATTTCCGGAT +TCTCGTGGCTCTTTATTTATTCGATCAGTAAGAGAATATCCATCGGGGGT +TTTTTGTTTCGAATTTAAAAAAAAACACAAGTGGAAGAGTTTTTAAGATT +TCTAGGCCACGCACCCACTCCAAAACCCCAATTTTCAGCCCGTATCTTCG +TTCCCTATTACACGGGACAAGTCATCGCTACCGTGGTCGCCACAAAATCG +TACCCAGCCCTCTCCAACGCTGTCTACATCATGACAATCATCTCGTTGGT +GAGCGCTGTCGCCGCCGGATTCCGTGGAGGATCCTTCGAGTACGCCTACG +CAAGGATCCAGAGGGCAATCCGCTACGATTTGTTCCACGGACTCGTGAAA +CAGGATGTCGCTTTCTATGATGCTCACAAGACCGGAGAGGTGACTTCTCG +TCTGGCTGCTGATTGTCAGACTATGTCGGATACAGTTGCGTTGAATGTTA +ACGTTTTCCTCAGGTAGGTGTGGGTCCGTGACCGAAGTTCCCAGATCACA +TGACCTAATTTTCCCAAATTCTTCCAGAAACTGCGTTATGCTCCTTGGAT +CGATGATCTTCATGATGAAGCTTTCGTGGCGCCTCTCCCTCGTCACATTC +ATCCTCGTCCCAATTATCTTCGTCGCCTCCAAAATCTTCGGAACATATTA +TGATGTAAGTGCGCTCCAGCGAAAACGTTTTTCTAAAGTTTTGACCCCAA +AATTCTGTCAAAGTGCCCCATTTTGGTTTGATCTTCGGAAAATGCGGGAG +AAGAGGCGCAGGCTTTGCAACTGATATCACATGGTTAAGATTGTGCTGAC +GGCACATGGAAAAATTCCCACATTTTTTTGTAGATCAAACCGCACCTGAC +ACCACGTGAAATTTTTGACAAGTCAAGAAAAATTTCGGCAATTGTGAAAT +AATCGAATTACCGCCGCACGTGGTGCCAGGCTGTCCCATTACGGCTTGAT +CTACAAAAAGTGCGGGATGTTTTTGCCCAACAAATTTGACGTCAGCACGT +TCTTAGTCATGCGAAATTAGTTGAGAACTCTGCGTCTCTTCTCCCTCATT +TCTTGTTGTTCTACGTAGATCAAGTCGAAATGACCCACTCTGACATCACG +TGCCGCCTCCCACCCCTGCTCCGACCAGGAATCAGCGGATCTACTTAGTC +TCAACTTGTTTTATTTCAGCTCCTCTCCGAAAGAACACAGGACACAATTG +CCGAGTCCAATGACGTTGCCGAGGAAGTTCTCTCCACAATGCGTACAGTC +CGATCCTTTTCCTGCGAGAACGTCGAAGCCGATCGCTTCTACGGGAAGCT +CACCCACACACTTGATGTCACCAGAACCAAGGCGATCGCCTATATCGGAT +TCCTCTGGGTCTCCGAACTTTTCCAGTCGTTCATCATTGTGTCAGTCCTC +TGGTATGGAGGCCATCTAGTGCTCACCCAGAAAATGAAGGGAGATCTTCT +AGTGTCATTTTTGTTGTATCAAATGCAACTCGGAGATAATCTTCGTCAAA +TGGGAGAAGTCTGGACAGGATTGATGCAATCTGTTGGAGCAAGTCGTAAA +GTGTTCGAGTACATTGATCGAGAGCCCCAGATTCAGCACAACGGGGAGTA +TATGCCGGAGAATGTTGTTGGAAAGATCGAGTTCAGAAATGTTCACTTCA +GTTATCCAACTCGTTCCGATCAGCCAATCCTTAAGGATCTCTCGTTTACC +GTTGAACCAGGAGAGACTGTCGCATTGGTTGGACCATCTGGCTCTGGAAA +GTCTTCGTGCATTTCTTTGCTTGAGAACTTCTACGTCCCGAATGCTGGAC +AGGTTTTGGTGGATGGAGTTCCGTTGGAAGAATTCGAGCATCATTATATT +CATAAGAAGGTACGTTGGCGTCAATTTCGGTTCGACAAATCGTTTGCCGG +TTTGCCGGCAATTTTAATTTTTGGCAAATTGCCGATTTGCCGGAAGTGTT +TAGAGTGCTTTTTTATAAGACGGAAACACTTGAAACTGTGTCCGTTTTTA +AATTTTTGTGCCGGTTGGCGATTTGCATGATATTTTCAATTCCGGCAATT +TGCCGATTTGCCGGAAATTTTCCATTATGGCAAGTTGCCGATATGCCGGA +AATTTTGATTTCGAAAAATTGCCGATATGCCGGAAGTTTCAATTCCGGCA +ACTCGCCGATTTGCCGGAAATTTCAATTCCGGTAATTTGCCGATTGGCAG +GAACAAATCGTTTGCTCGCAGTCTATTCGGATAAAAATTGCGCCAACTGT +TCATCCGTTTGCACAGAAAAAGCATTTCCGCGCTTCGCTATTGGCCAATT +TCAAAATAGTGGGCGTGACCAACCAATCACGGGTGTTTCTTGCTTTCCAT +TGCTGAAACTGTGATTGGTTGGTCACGCCCAATTTTTTGAAATTGACCAA +TTGCGAAGCGATGACTTTGGACGCAAGAAACTTCCGTTTTCTCCTTTAGC +TGTCGATCGAGATTCTCTCCTTTAAAAACACATTGCCGATCTTTTCAAAG +CCCCTGTTTCAAACGACACAGCTAGTAACAACAAAAAACCTCGAAAACTT +CCAATTTTTCAGATCGCCCTTGTCGGCCAGGAGCCCGTCCTGTTCGCTCG +TTCTGTGATGGAAAATGTCAGATACGGTGTCGAAGTGGCGGATACGGTAA +GCACGATTAAATGCACCATGAGCCAAACAAAAAAACTTGTTCAGGAAATC +ATTCGCTCTTGTGAAATGGCGAATGCTCACGGATTCATTATGCAAACCAC +GTTGAAATACGAAACAAACGTCGGAGAGAAGGGAACACAAATGTCCGGTG +GTCAGAAGCAACGTATCGCAATTGCTCGTGCTCTTGTTCGTGAGCCAGCG +ATTCTCTTGTTGGATGAAGCCACGTCGGCGTTGGACACCGAGTCCGAACA +TCTTGTTCAGGAAGCGATTTATAAGAATTTGGATGGAAAGAGTGTCATTT +TGATTGCACATCGATTATCGACTGTTGAGAAGGCTGATAAGATTGTGGTT +ATTAATAAGGTGAGTTGCAATTGCGCTCGACCGAGATTTGGTCGCAAACC +TGACACGGGGTTCTGGCCTTCCTCATTGAAATTTCCGCGCTCTATTGACA +ATCGCCTGCCGGACAACGCGTGGGAAAGTCGTGTACTCCACACGGACAAA +TACATTTAGTTTTACAACTAAAATCGAACCGCGACGCGACACGCAACGCG +CCGTAAATCTACCCCAGATATGACAGAGCCAAAATGGCCTAGTTCAGCAA +AAACTCTTCCATTTCAATTTATGAGGGAATCCAGAAATCCGTGCGCCCGT +GGTGTATTACTCGTGGATAACTGCTCATATTTCAGAATTGAGAATTTCGC +TAAAAACTGTAGAATCGCGTTAAAAATGAGACCTCACCAAAACAGGATTC +ACGAAACAAGAATTCACAGAAATAGTATTCAGTGGGCGCAGTGTGCGCAG +TGTGCGCGGGGGTTGCGAAAGCACACTTTTCGGTCCCGCCCCCTAAATCG +TGTTTTTCGTGAATACTGAATCTGGGAAATTTGAAAACTCGGCGAGTTTT +TCTGTGCGGCACACCCACGTGGTGTCAGGCTGTCTCATGGCCCGACAAAG +GGTACATCAGTGTAAATGCGCTCTACCGAACAGAACGCATAACCCACGTG +GTGCCAGGCTGTCCCATTACGGCTTGATCTACAAAAAGTGCGGGATGTTT +TTGCCCAACAAATTTGACGTCAGCACGTTCTTAGTCATGCGAAATCAGTT +GAGAACTCTATGTCTCTTCTCCCGCATTTTTTGTTGATCTACGTAGATCA +AGCCAAAATGAGACACTCTGACATCACGTGATAACCTGGTTGATACTACG +CGTACGGCTTGCCGGGCCGTGTTTCTCATTGCGGTTTGATCTACAAAAAT +GCGGGAATTTTTCCCCCAGGAAAGTGTGACGTCAGCACACTCAGTCTGCG +TCTCTTCTCCCGCATTTTTCGAAGATCAAACCAAAATGAGACATTTTGAC +ACCAACACACACACACACACACACACACACACACACACACACACACACAC +ACACACACACACACACACACACACACACACACACACACACACATCATATT +TGACGAGCAAAATCGGCCCCATAATTACAGGGAAGAGTCGAACAAATCGG +AAATCACGAAACCCTACTGAAGGACACCAACGGAACCTACGCGAAGCTCG +TTCAACGCCAAATGATGGGAGATCAGAAGCCACGTAAACGGCCGGCGGTC +GCCAGATCGGGACCTCAGCCAGCCGCGTCAATCAACGTGGCAGGACCATC +GCAGGGTAATGCAATGTCCCTGTTGTCGACGTCTTTCAGCCAATCTGCCA +GTAGTGTTACTTCTCATTAATTGTTTTTTCTTCTTGTTAATTGGCAAAAT +ATTATGATGAACTTGTTGGGTTGATTAATTTGTAAGAATAAAGTATACAA +TTATTCATTCATTATTAAAATTTGGGCACATAGAAAAAATTGTGGAAAAA +CTGTAAAAAATCTGCAAAAACGTGGAAATTATTCAAATCAACTCCAGGTA +GTCGTTGTCGACATCGTCTCGCTGCCGCATCGTGTTCTGAAAATCCATTT +TTCACTTTTTTTAAAGCAGATTCCCAGCTAACCGTCGACCGGATCGTGTT +GTTCCATCCGCGGATTTGAGAGTAGGAGCTTACGGTGCCACGTGTCTCGT +TGGCAGCTTGCCAATTATTCACAGACAGCCTGTACGAGTCGCTCAACAGT +GGACCGGCGTTTAGAGTCTGAAAATATATAACGGGAACACTAAAATTTTA +GAATGCGTATTGTGCAACACATTTGACGCGCAAAATATCCAGTAGCGAAA +ACTACAGTAATCCTTTAAGTGACTGCTGCACGTGGTGCCCAGCAGGGCTG +GGAATTTTTTGGTTTTTTGGTTTTTTTGGTTTTTTGGGACCAAAAATCCA +AAAAATCAAAAAAAAAAACAAGTTACCGTGTCTAGTCTCGACTCGAGACT +ATTCTGTATTAAAATACATCGAAACATGTATTTTAACACAGTTGTGACGT +CATAATGTAATACATTTGGCAACATTACTTGAATAACCCCATTAAAAATC +ACCTAAAGCATAAAACATTTTTTGGTTTTTTTTTGGTTTTTTTGGTTTTT +CCAAAATTTCAATTTTTTTGTTTTTTGGTTTTTTTGGTTTTTCAAAAACT +TCAAATTTTTGTTTTTTGGTTTTTTGGTCCAAAATTTTTTTTTGGTCCCA +GCCCTGGTGCCCAGCTGTCCCATTACGGTTTGATCTACAAAAAATGCGGA +AATTTTTTGCCCGAAAACATGTGACGTCAGCACGTTTTTAACCATGCAAA +ATCCATGCTTCTCCCGTATTGTTTGTAGATCAACGTAGATCAAGCCGAAA +TGAGACACACTGTGACCGCGTGCTACTGTAGTGCTCGTGTCGATTTACGG +GATCAATTTTCGAAATGATTATACATATAATTATCGATAGAATAATAAAA +TTAAGCAAAAAAAAATCGGAAAATAATACGAAAAAACAAAGGAAAATTGA +AAATCGCTGTCAAAATTCGAAAAAAATTAATTTCGAAAATTGAGTTCCCG +TAAATCGACACAAGCGCTACAGTAGTCATTTAAAAAGGATTACTGTAGTT +TTCGCTACGAGATATTTTGCGCGTCAAATATGTTGCGCAATACGCATTCT +CAAAATTTTGAACTCTCCGCCTCTCACAGCTGGGTCTCGTTAGGTATGGG +GGCGGATCCTTTTCAATTGGCGGTGGAGCGCGATTGAAAAAATTTTCGAT +TTTTTTTCGATTTTTCAGAAAAAATATCGATTTTTTCCGATTTTTCAGAA +AAAAAATTCGATTTTTTTTTGATTTTTCACAAAAATTATCGATTTTTTAG +ATTTTTCAGAAAAAATATCGATTTTTTTTTCAGATTTTCAGAAATTATTT +TGATTTTTTTTTAATGTTTTGTCCCTTTTTCTGAAATTCTACCTCATAAT +GATTGCTTCTTCCACTTCCGCCTCGCTGCCCGTAGTTCAGTGTCGAGTAA +TTCGCATTCGATACTGTGTAAATCGACAGTGGAATAACCGGGTAGATCAT +AATGTAGTACAATGTGGCAAAGTGGATCACTAACTGAAAATTATTGGATT +TTTCTAATAATAAGGTGGGGAAATCCGAAATATGTGAAAAATATTTTTAA +TAAACAAATTTTCCCCTGATTCCGAATATCTATATGAAAAAATTCAAAAA +AAAATTTTCCAGATTTTATATTTAAGCTTGAAATCGCTTGAGCCCGCATC +ACTATTTTCAAATGCTTGCCCAAAGAAATTTGCATTAGAGCGCGCTTGCA +TAGTTTGATTTTCTTCATTCAAATATTGTATTTATTACCGCCAAATTCAA +TTTTTTAACCGGTTTCATTCATTTTTGTCGAAAAAATATATTTTTCAGAA +AAAAACCGGTGAAAAAAAACCAAAATCGACAAAAACTAATGAAGCTGGCT +GATAAAAATTGAATTCGGCGAAAATAAATAAAATATTTAAATGAAGAAAA +TTCTTAGGCGCAAACGAAATTCGCCATTTCAAGCTTAAATATAAAATCAG +GGATTTTTTATTTATTTTTTTTTCACATTGATTTTCGAAATCAGGGAAAA +ATTTGGGGTTAATTATAAATATTTCCCAGATTTCGGTAAGAAAAATATGA +TTATTTTTTTACTAAAAAATAACCCCCACCAGTAAAGCGTAGAAAATTGA +GATGATAAATGCGAAAATGATGAACGCGGAAGAAAAAAAGTCGGGATTCT +TTTGGGCAGTGCTGGAGTAGTTGAAGATCGAGATGATTGTAGAATAGTAG +GACAGGAATGAAACAATCATCTGTAATTTATTTTTTTAAAGCTTCCAGGT +CAATACACGTGGTGTCAGAGTGTCTCATTTCGGTTTGATCTACTGTGTAG +ATCTACAAAAAATGCGGGAGAAGAGACGCAGAGTTCTCAACTGATTTCGC +ATGGTTAAGAACGTGCTGACGTCACATTTTACTGGGAGAAAAATTTCCGC +ATTTTTTGTAGATCAAGCTGTGATGGGACAGCCTGGCACCATGTGGGTGC +CGAGGGTATTGCTTTTTGGTATTTTTTGGAATTTCGAAAAAAACTTTCCG +ATTTTTTTGAAAATAGCTCTTTTTATAGAAGAAAATAGTTTTTTCTCATC +AACACAAATTTTAGAACAATTTTTTCGATTATCCGGAAAATTGAAAACTT +TCGTTTTTCTATGTTCCATTTTCGATTTTATAAATTTGGGAAAACCTAAA +ATTTCGATTTTTTTTGAAAAAAAAACGAGTATTCAATAATTTATATATTA +TAGGCATTCTTTTTCTCTAATTTCTTCTGAAAAACATTAAAAAAAAATTT +TTTTTTTTTTAAACCAATTTTCAGTAATTTTTTTCTGTAAAAAATACGAT +TTTTTTCCAATAAATTTCTAGTTTTTTCTCTCTTTTCGTACATTTGGACT +GTTTCTAATTTTTGCTTGCTTTTTTCTATATTTTTTCTGCGATTTCCTAT +GAAAAGTACCAAAAAATCGGAAATTGTTTTTTTTTTTCGATTTTTTAGGA +AAAAAAACGATATTTCCTATAAATGTTTTTTTCATAAGTAAATACTTTTC +TCCTTTTCCTTTTGATCTACAATATTTCGAATTTATTTTGAAAAAAATTT +TTTAAACGTTTTTTTCTGAAACAAAGACCCAAATTTTCAATAATTCAATA +ATTTTTCTGGATTGTTCAACCGAAATATACACATTTTTCAATAAATTTAT +AGTGTTCTCCTCTATTTTTGTTGATTTTTACTACTATAATTTTTTCAAAA +TTTTCGAAAAAAAAAATTTTTTTTTTGGAAAATTTTAATTTCAATTCGAT +TTAAAAAATATAAATGTTCATATTTTTTCATCGAAATATATATTTTTTGA +TTTTTCAACCTTTAAATTGTTTCATTTTCTCTGAAAAAATCGGAAAAATC +AACAGAAAAATCGAATTTTTTAAAAATATTTTCCGATTCCGACCGTAATT +TTTATGAAAAAAAATCGGTTTTCTCGTTATTTTCTACAACAAAACCTCAA +AAAACATGCTGGGCAACGTCAAAATCGCTCGATATCGAATCTTCAGCTCA +GTCTCATGTTTCACCAAATACACCGCATACAGAAACATCGTGCAGCTGAT +GAAGAAGAAAATGAGCAGATATGCACACTTTTCCACGAAAAATTCGCCCG +AAATCATGAAACGTGGCAAATTGAGCACAATTCGATGGAGCCAATAGCAG +GTCGATGAGCTGCAGCGGAAGCTTTCATTGGGATCTGTGCAGTACGTTGG +AACCGATAAATGTTCCTGCATTGATCGGATACAGTAGACGAGCAGGCAAA +TGTATTTTACAAGTGAGATATTCTGAAAATTTTTGAATTTTTTTTCCGAA +TTTAAAATTTTAGTTTCTCGATTGTTTTGAAGTAAAAAAATTGATTTTTT +AGAAAATTATATGTTTTGTTTAATTTATTTAAAAATCTTTAAAGGTGGAG +TAGCGCTAGTGGGGAAATTGCTTTAAAACATGCCTATGATACCACAATGA +CCGAATATCATGATAAAAAAATTCAAAAAAATTTTCTAAATTTTATAGGT +TTTTTTTTGAAAATTGAAAAATCTCAGTTTTTGCCTAATTCCAATTTGAA +TTACCGCTAATTGGATTTGTTCGGTGGAGCGCGCTTGCACGTTTTTAAAT +TTTTTTATTTTATTTTATGTTATTTTCCACCGATTTTTAATGTTTTCGGT +GTATTTTTGCTCGAATTTTAGAGAAAAAGTCAAGATAAATGCAAATTTTC +AATTAAAAAGCACGCTTACAGGCGTAAAAATGACAAAGTACCGATTTTAA +ACGATTTCCAACCTGAATTAATTAATTTCACTGATTTACGCCTGTAATCG +TCTTTTTTAATTGAAAATTTGCATTTATCTTGGCTTTTTCTCTAAAATTC +GAGCAAAAATACACCGAAAACATTAAAAATCGGTGGAAAATAACAAAAAA +TAAAATAAATAAATTTAAAAACGTGCAAGCGCGCTCCATCGAACAAATCC +AATTGGCGGTAATTCAAATAGTAATTAACAAAAACTGAGATTTTTTTCAA +TTTTCAAAAAAATCGTATAAAATTTAGAAAATTTTTAAAAAATTTTTTTA +TCATGATATTCGGTCATTGTGGTACCGTGTTTTAAAGCAATTTCCCCACT +AGCGCTACTCCACCTTTAAAGTGCTAATGTAAACTTTTTGTACACGTGGT +GTCAGAAAGTCCCATTTCGGTTTGATCTACAAAAAATGCGGGAGAAGAGA +CGCAGATTTCTCAACTGATTTCGCATGGTTAAGAACGTGCTGACGTCACA +TTTTTTTGGGAGAAAAATTCCCGCATTTTTTGTAGATCAAACCGTAATGG +GACAGCCTGGCACCACGTGTTTGTACAATTACACATTATCCTGGAACTTT +CCAGAAAATTTGAATTTCCCGCCAAAACTTATGGGTCTCATCACGATGAG +TCACTAACCTCAAACCGATATATTTTCTAAAAACAAAAACTCAAATTTTC +TCAAACTACAGTAATCCTACAGTAATCCTACAGTACCCCTACAGTACTAC +TACAGTACCTTGGCATTGTCCCCCACCAATATACAACCCAATATACCTTC +AAAAAACAAACACGTAATTTTTCCCAAACTACAGTAATCCTACAGTACTC +CCACAGTACCCCTGCAGTACTACTACAGTACCTTTTTAGAACATTTTTCT +GTTTTTCGATTTTTTCGCAAAAAAAATTCAGTTTTTTCAGCAAAAAAATT +GATTTAAAAATATTTCAAGTTACTTTTTAAAAGCTTTTCAAAAGTTTTTC +AATCCGTAAAAATAAAATTTTACGAATTCTTGTATTTTTTAAATTATCTG +AAATTTTTTTTTCTCAAAAAAAAAACTTTTTTTTTCAAGTTTTCAATTAA +TTTCGGCAAATTTCAAAAATTTTAGGTTTAAAAAAATTCAATTTTTATGC +TTAACATTTTATCGACATAAAAATCATTAACAACTACGAACAATTTGTGG +AAAACGTTTTTGGCAGTGGAGCGCACTTGCGCCTCGTATTCAAAAAGCCA +CACAAAATGGTGAAATATTCCCAAAAATCAAATAATTGTTTCGCTACGAG +ACCCAAAATACCGCAAAATAGTAATTTTTACCATAGTAATTTTCAAAAGT +GGCACGTAGACCTGATGAAACGTGTCGTTCACCTCCTCCGAGCTTCTGCC +ACGTGAGCTGGTGCTCCGGCATGGCTGATACATTTGTGAAGTTCGCTAAA +ACAGAAAAATATTATAAATAAGCGGAAAATTTCGTGGTCTCGACACGACA +AAAAATAATAAAAATTATTCGTGGTGAGACCCGAGTTGTTTTGGAGCAAA +ATTCTCAAAATTGGGGTATCACGTGGTGTCAGAGAGTCCCATTTCGGTTT +GATCTACAAAAAATGCGGGAGAAGAGATGCAGAGTTTGCTGACGTCACAT +TAAAAAAAAATTCCCGCATTTTTTGTAGATCAAACCATAATAGGACAGCC +TGGCACCACGTGCGGGGATTAAAACCCGCGCCGCACAGAAAAAAGGCGGA +GTGTTATTTCGCAACACCGCGGCACGGTTTTTTGTTGATCTTTTTCAATA +GTCTTGATATTACTTGCGATTTTTGCAAATTCAAAAAATCAACAATAAAC +CGACCGTGCCGCAGGGTTGCAAACTCCGCCTCTTTTTCTGTGCGATGCTG +ATTTTTTGCGAAATTCACGAATCAAAAAGCACACGTGGTGTCAGTGTCTC +TCATTTCGGCTTGATCTACGTAGATCTACAAAAAATGCGGGAGAAGAGAC +GCAGAGGTCTCAACTGATTTCGCATGGCGCCACACATTTTTGGACAAAAA +ATTCCCGCATTTTTTGTAGATCAAACCATAATGGGACAGCCTGGCGGGGA +TTAGAAACCGCGCCGCACAGAAAAAAGGCGGACCGTAGTTTCGCAACACC +GCGGAACGGTTTTTGATATTACTTGCGATTTTTGCAAATTCAAAAAATCA +ACAAAAAACCGTGCCGCAGGGTTGCGAATTTACGCTCCGCCTCTTTTTCT +GTGCGATGCTGCTTTTTTTGCGAAATTCACGAAAAAAAAAAAGAAAATAA +AAGTACATTTACCGATAAAAGTGACGATAGACCGCCGATATACATGGAGA +GAATAGAGAGAGAAATGTGAAGTTTATTAGAAAACAATTTTGTAAAATTA +TGATGATGATGAAGATTTGCGGAATCTGGTAATCGGTTTTAATTGCCTTT +TCTATTATATTATATTAATCACTTGATGATGTACGGCTTTGCGCTTCCTC +GTCCACGGTCTCCAGATGACGGAATCCACGTGGCGATACTGATCGATTGT +GTTTTGGAGTGGTGGTGCTGCTGGTGCTGGTGGAGGGAATTGAGGTGGTT +GCAGTGGATATTACGCTGAAAAATTGAAAGGATTGGTGGAGGAAATGAGA +GAAATTTGAGAGCATAAATTTTTAAAAATCGAAAAAAAATTTTTTTTTGC +TATTTTTGGGCAGAAAATGGCGAAAACGGTGTTTTTTTGGGTAAAAAATA +ATAAAAATTTACAATTTTTTTTGACAGAAAAATGCGGAAAAGCTGTTTTT +AGGGTTGAAAAATCAAAAAAAAAATTTTTTTTTGCTATTTTTGGGCAGAA +AATGGCGAAAAACGGTTTTTTCAAGTTTAAAACATCAAAACAATTTTTTT +GCAATTTTTTTACAGAAAAATGAGGATAAGCGGTTTTAAAGGTTTAAAAA +ATTTTTTAAAATTTTTTTTGCGAATGGAAATTTTCTGAAAAATCGTGCAA +ATGCCTAATTTGTGCAAAAATCTTATCTGGTCAATGATTTTTAAGAGAAA +ACACCGGTTTTAGAAGGGAAAAAGGCAAAAAACTGGATTTCCAGTGAATT +TTATACTAAAAATGGAAAAAATTCAGAAATTTTTTAAGGAAAAATCAGAA +TTTTCGCAATTTCTACACGAAATATATAGAAATTGAGGAGAACCTCGGAT +TTTTTGCGGAGTTTAAACAATAATTTTTCGGCTAAAATTAACGATTTTCA +GAGAAAAAACTTAAAAAAAACAGAATTTCAGTGAATTTTACACGAAAAAC +GTAGCAAAAAGAGGTTTTTAGGGAATTTCCAAGTCAAATTTCCGAAATTC +TTGCGATTTCAACCCTGAAAAATAGCGAAATTGAGGAAAATCTCGGATTT +TTGCAGATAAATAATATTTTTTCCAATGATTTCCGTCTCCAAAACACAGA +ATAATTTACCAATTCGACGATGGTGAGGAGGGAGAGCCCAGCTCGTTGGT +GACAGCCGTGAGATCCGAGAACAAATTGACGCTGGCTGAAAAAATGGAAT +AAACGCTAAATTGATGATTTCCGCTATTAAAGTGCATATAGAAATACGAA +AAAGCAAAAAAAAAAATTTTTTTTAACCTTTTCCCTGGAATCGAAGAGCA +GTTTGCTTGCGGATTGGACGTCGAAAAGGGCCTAAAATTCTCATTTTTCG +GCCTGAAATTCCTCGAAAAACCCACTATTATTGGTAGTCGGAGATGGCTG +ATCGCCTCGTCGCTGGTAATATAGCACGCCTAGCCAGAATCCGTAGAGAA +AATAAATATAGACTGCCCACACTAGGATGATGGGAAGAAACGACAATCGG +CAGGCGTCTGGAGTGAAGAAATCAAGATTTTTCAGATTAAAAATTAAAAA +TCTTTAAAGGTGGACTACACCCTTGTTGGGAAATGGCTTTCAAACATGCC +TATGGTACCACAATGACCAAATATCATGATTAAAAAATTCAAAAAATTTT +TCTAGATTTTATATGATTTTTTGAAAATCGAAAAAATCTCAGTTTTTGCC +TAATTCCTATTTGAATTACCGCCAATTGGATTTGTTCGATGGAGCGCGCT +TGCACGTTTTTAAATTTATTTATTTTATTTATTGTTATTTTCCACCGATT +TTCAATGTTTTCGGTGTATTTTTGCTTGAATTTTAGAGAAAAAGTTAAAA +TAAATGCAAATTTCCGATTAAAAAATCACTGAAAATGCGTAAAACTGAAA +AATATGCTAGTTTCAGGTTTGTTGTCGTCGGAACTCATTATTTTACAGTT +TTACGTATTTTTCACATTTTTAAACACTTTTTAGCAAACATTCTGCATTA +ATCTTGCTTTTTTACTTCTCTTTCCGATAAAAATACACAACTAATGAAGA +AAATCAGTGACATAATAATAATAATAATAATAATAATAATAATAATAATT +TATTACGCTCGCTGGGAGACGTGAAGGAATACAGAATACAAAAGTTATCA +TTGTTCGCTTGAACTAGAATTGGTGCAGTCGGGCAATCGTTTCCATTCCA +TTAAAATGTTGTCCTTGTCCTTGCGTTGTCCTTGCGTAGACTCCCCCATA +GTGGGCGAGCGATTCCCGAGTGAAAATAATCACGGAATGAAAAAATAGAA +GAAAAATAGAATAAATTAATAAAAAAAATAATGCAAGCGCGCTCCAACGA +ACGAGTTCAATTGGCGGTAATTCAAATAGGAATTAGGCAAAAACTTTTTC +AGATTTTTTCAATTTTCAAAAAATCATATAAAATTTATAAATTTTTTTTG +AAGTTTTTTAACGTGATATTTGGTCATTGTGGTACCATAGGCGTGTTTTA +AAGCAATTTCCCCACAGGGTGTAGTCCACATTTAAAAATTAAAAATTAAA +AATTCTACCTAACAAAATCGAAAATTTCAGATTCTCCTTGATTAAAAGCT +TTATTATTGAGTAAAGAGTCCGCAGCGTCCACACGGAGAGCGGGTACAAG +ATGGAAATCTGAAAATATTTAGGATTTTCAAGGGAAAATATCGATTTTTT +TTAGCAAAATACCACAGAAATTGCGGATGGAATGAGTAGAAGAGCCGGGT +TGACTTTCCGGGGAAAGTTTCGACGTGTGATGCTCGTCTCGGAGAGCCTC +ATTCCCAAGAATCCGGTTGCGATGCTGAAGAAAATCGATGGAAAAAGTCT +AAAATATCCCCTGAAACCCTACCTAAAAATCAGAAAATACGAGAAAATAT +TGATTAAATTCCGAGCCGGCACATAATTCGGGCGGATCAGCACTCCATTG +AAGTTTCGAAACGTTTCGCCGTTCCACTGAAGTCCATCGAAGCCGGGAAC +GGATGGCGGCTCGAAATGCTCGTCGACACTCATTTGACTGGCCCTGTAGA +GCCCGATGATGGCAATGGCGAGCATTCCCACTTCCACATGGAATATCAGG +CTCTGAAAATTAAAATTTAGCTTTTAGACGGAAAAAATCGATTACTTTGA +CATGAACTTGATCGAAAAGCCAGCGATACGGGTATTTTCGGTGCAGGCGG +CCTGATTCCGGACGTGCTCCTCGAGATATATTGGAATTTGTCGAGCGGAT +ACTAATATCTGCCATTTTTTCGCTGAAAAATCAGAAAAAATAAGTTTTAA +AACAGAATTTTAATGGAAAAATAAACGGAAAAATTCGAAACTTTTTTTTC +CGCCATTGAACACCAATGTGCCTTTAAGGTACGGTAGCTCTTCGTGGTGA +GACCACTGCGCCGCGCATCTGATTTCTCGTGGGAAATTTGTTTCGAATCC +TACGTTTTTTACAGATTTTTCTTTTTTTTCGTTTCGCTCATCATTTTTTT +GTTGCAAAAAATCCAATTTTCAGACAAAAATGTCAGAACGCCAATCCAAA +TATTTTGACTATCAAGGTATCGTCATTTCCTCCACTGGACAAGATAACCA +AGATTCGGAGACGGACCTCGTTTATCTCATTCAAGCTCATGTACGTGAAT +TCCCTCCGTTTTCATATTTAAAATAATAAGCTCGTTTTCAGGGAAAAGCG +GCCCCGAAGAACATCATGTACGGTGTCTCCAAGTGCGCATTTGTTCCAAC +AAACTTGGAGAGGAATTTCGATAATATCGAAGAGGCAAAAAATCTTGAGT +CAGTATTTATTTGATTTGAGCCCATTTTTAATCTGTTAACACTTGAATTT +TTAGACGTCGATCCAAAATCCCGCTAAAGTTCGGTGAAGTCATCCTATGG +AACGAATCGGACTGTGATCACGATAAGAGGATAATCCTTCATATCAAACG +AGAAAAACCAATTTATGAAGCGTCTTCCAGCAGAAATGGACTTATTTTGA +AAGTCGGTGGCGTCATCCAGCCCACCTCAACGACCTCGTTCTGGACGCCA +TTATGCACCGTCACCATGCGTATGTTTCTTCTTGATTTTCGTCTCTCTAA +CCATATATTTATTTCAGCAGAGACAGAGGCGACCCGTGCGGAGCCAGACG +TTTGGCTATACGCCTGGATTCGATTCGAAACTACCATGAAGAGTGGTCTA +GATCCTTTCAATATGACTGCAACATTCGAATCGTTTGACAGTTGCGATCC +ATCCGACCAAGCACGCGTCTGTGAAGCGGTATTTGCTTTTTAAAAATTTT +TTACGGGATTTTGAACAAATAACTTTCAGCCATGGAACGCAGGCTCTCCT +GACTCTAAATTCGGTGTTTGGCGCCCGGATCCAAAGCCTGCTGATAGCGA +CGACGAAATTGATATCGAGCCGAGAGAAGGCTGGCACTTGCCAGAAGACA +AATGGGCCGAGGTTAAGGTTGCTTTGTGGATTCTCGTTGACAATTTGATT +TTGCAGGTCATCAAGATGCAGCTTGGCCTGTATGTCGGCGAACGCCTACT +GATTTGCAAAGAACTCTCGCAGTTCGACTTTATTATCCCACTGCAGAAGC +CATTTTCCCGAGGAACGGACAAGACTTTGATCTACCCAGCCGTCGGAGAA +TACTTTCATTTCAGTGCTATTTGGTCGATGCAGCACAACGGTTTTCTGAT +CTATGAGCTTCAGCCAGTTCCACTTCTCAGACAGCATGTAACCTCTGTCA +ACGGAAATGTAAGTTATAATATTGATTTTTTTGTCCAGAACATAATTTTT +ATTTCAGCTCCTTACCCGCGTCGTTCCTGCCAGTATCAGAGGTCTCTTCG +TTGACAAGGAGGGCACTCTTGGATTGATTGATGACCCACACCATCTTCTT +TCATTCTTCGAGTTTCATCCAGCTGGTTATGAATTCTTGAAGGCTATGGT +AATCATGAGTGGGTCTTATTTTGAAACAAACGCTTTCAATTTTCAGGCCG +AAGTTCGTGCTGTTCGAACTTCAGAAAACAAATCTGTCCGTTACCGCATT +GTTCGTACATCGGGAATGTCCATTTTTGAAAACTGGCTTCGTGATACTCA +GGTCTCTAAAGTTAAATTAAACTCGTCCTTTTCACCCCTGAATTTTTCAG +TTCGTTGTTGGTCCAGTCAAAGGAATTCGAATCAATGAAGACACTGTCAT +TTGTGCCAAACATCCGAATGTCTACTTCAAAATCCCCAATAACTTGAAGG +AAGGAATCCCCATTGGCGGAGGAGTTCAATTCGTTGGAAAGCGACAAGCT +GGAGTCGACAGTGAAATTATGATCACCGAATGTTCTCCATGTCCAGCGTT +CACTTGTAAAAATTACTCGGTCTCTGGTGATACGGTTAGTCTTCAATTAT +TTAATAGTTTTTATTCTATGAGGAAAATATTCAGCGCTTGTTCCAAGTGT +ACTTGAAGCCAAATTGCGATCATGAGCAGTTGGCGGAGAGTGATTCGATG +GGATTTGTGGATTTCAGAGAGTTGGAGACTCCATGTAGAGGAAAGTGAGT +GTGTCAGGATTTGTAAAAATAATTGTAACTGAATTTTAAGGTTCCTGGCA +TGGGTCCGTGAATCCATTACTGTCAATGATTGTCGCCGTGCTGCTACCAT +CATGGAAGTATGCTCGACAGCCATCTGTCCTCCGTTGATTGCAATGAGCG +CGAATTCGAGCAGAGCAACATCGGCCAGAACTACGCCAGCTGGAAGTTCC +ATTGGATCCAGATCATCAATTCAATCAAGAGCATCTGCAGCTACTTCAGT +GAGCTCAAACGTAAGTTTATTTTTAAGGTTGGAAGATGATAAAAACAAGT +TTTTCAGCGATTCGTTGGCCCTTCAAGCCGCAGAACTCCATCTGGTACTC +CACAAAGCTCAACATCTTCCAGGGTCTAATTAAATATATACGCATCCCGT +TTTCCCCCGTATTTGTGTTTCAAATGTTCTGCTCATTTTTAATCTACCCG +TTATCCAACCAAACATTTATTTATTTAAACTTCAAGTTCCACCACCATCC +CATTCCGCACAACTTGTCTCTCGTAATTTTCTCGTGCTCTAATGAAGTTG +TGGTAGGTTATTCTAGTGAATACACCCATGTTAAGCAGCGAATACATCCA +GAAAAGTCCCAACAAGCCGGCAGCGAGATACTCCCCCCAAAATGATGGTT +GATCGGTACATCCAAGATAGGGTAGCATCGCGATAGACGTGAGCAAGTTC +GTTATGCACTTGAAAAACTGAAATTTCAGAAAAGATTAGGTGTATCAGAA +GGTTAAAAACAAACAAATGACATATAGATGAACGAAGCCAGTGATGGAAT +CGGATTCCGAATACGAGAAGCCATTCCCGGAGATACTGTGATAAGAATCT +GTGTGATCGAGAACACAATGAGCACACCCACCGGGAACATGAATTCCATG +GAAACACGGAGGCAAAGGTCACAGTACTGCAAAAAATGCGAAATTTTGGC +GATTTTTGCTCTAAAAAATGCACTGGCCGGGTCTCACCACGACAAATTTT +TGTTGTCAGTTGAATGCCAATAGAGTGTGCGCCTTTAAGGAGTACTGTAG +TTTCAAACTTTTGTTGCAATCGATTTTACAGTTTTCCCGAGAAACAAAAG +TTTGAAACTACAGTACTCCTTAAAGGCGCACACACTCTTTCCGCATTCAA +CTTAACAAAAGGTTGTCGTGTCGAGACCCGGCCACTGCATTTTTGTAGCA +AAAAAAATTTGAAATTTTCAAGTCTGGATAATAATATATAATATTTTTAC +CTCAACTATGGGAATTTGCATTTCCAAACTATCCAAACAATCGCTGAACC +CGAAAGCATCACAAATAATCGATCGATATTGTCGCGGAATGTCACGTATC +GATTGGTAGATCATGAATATGTTGCCAAGTAACGCGAAAATCTTGAAAAA +TGGAGGATTTTGCTTTTAAAAAATCCAAAAAAAAAAACTTAAATTCGACA +ATTTCCTGTTTGAATTCCCTCCAAAATGAGAACTGCAGGACCCATCAGCG +ATTTGAGTGGGCGGAGTCCGAGGGCGCTGATTGGTCGTGCAGTTCTAATT +TTGGAGGGAATTCAGCCAAATTTGAAAAATCTCGTTTTTTTTGCAATTCT +TTTGTGGTATTTTCGACATGGGGTTCTTTCTGGCTTTCCTCATCAAGGGT +GTCAATGTCCCGTAAAAATTTCGAAAACGGGACAACGGGACGTCCCGTTT +TTTTGAAAACACTCAAAAAACGGGACAAACTGGGCGGTTAAAAAAAAGAG +TGCATAGTTGGCGTAAATTTTAGTGTATTCGAGGCAAATTATAAAAAAAA +CTGCATGTAAGTCATTAATCTGCATGAATTTAAGTCTTGAAACAAAACTC +AAAAACAGAACTTTAAAATGTGCAACAAAATCATGAGAGAAATTGTCCCG +TTCCCGTGATAATTTTGAAAACGGGACAGCGGGACGTCCCGTTCCCGTGA +AAACACTCAAAAAACGGGACAACGGGACAAACGGGACACGGGACTTGACA +CCCTTGTTCCTCATTAAAATTTTCGCGCTCCATTGATAATCGCCCTGGGA +AAGTCGTGTACTCCACACGGACAAATCACATTTAGTTTTACAACTAAAAT +CGAGAAAATTGGGAGAGTTTTTGCCGCGAACCTGGCCTAGTTCGCGGCAA +AAACTCTTCCATTTCAAAATATGGGCGAAGCCAGAAATCCGTGATATTTT +CAGCATATTTTTTTGATTCGAAGCAAATCTCAGTTTCTCACCACAACGAA +CAACTTCACATTCGGATGCCGCCCGTCTTCGAAACGTGGAATTCCAACTG +AATTCACCGATTCGACACTTGAACCGGGTGACATCATCATATTTTCAACC +AGCTCATACTCGAACTCCGGAATATCCGGCACCACATCAATCGGTGCTGG +TTCTGGCGGTGGCGGCTCAACTGACACTTTTCCTTGATCTAAAATTTGTT +TTTTTCTGAAAATTTGTCACTGTACCAAATCAAAAAAAAAACCTTACCAG +AACTCATAATTTTTGGATTTTAATTTGTAAATTGCAAAACAGAAAATGAA +TTGGAATTTTTTCACTTTTTATTTATGGGAGGGGGAAATTCTTTGTTGTT +GCAATAAAGTAAAACAATCACAGTAAATGCCAGTACAATAGTTTTTCACG +TGTAATATATTAACCAATTACTATATTCAATAACTCTGTCGTCTAAATAC +ATAGGGTAGGGGGGAAGGTGAGGGAAAATGGAAATAATAAAGTGAAATGT +TTTGTGTACTTGCGATAAAACAATCAATAAATGGGAACAAGCCGATGATA +AACTGATAAACCGAACGTGAATATCGAGAGAGGGAGGAAGAAAGCGTATA +CCCGTTAATGAAATCAAGTTAATGGAATTATTTGGATTGAAACAAAACAA +CGAAAATATCAATGTTTGAGCATAAGGAATGAGGGAATGGCACTGGTGGT +GGTGGATAAATGAATCGGGGGGAACGGAATCTATGGAAGTTTCGAATACA +TCGCATCGAGTTGCTTCGAGTAGTGAGCCTTCAGCTTCGGACGCTTGCTC +TTCAACGTTGGCGTGAGAAGATCATTCTCGACGCTGAATTGCTCGGCGGA +GAGACTGATATCGCGCACTTGCTCGAACGAGAAGAGCCCGGCTTTCTTGC +CAACAGCGACCATGTCATCGAGGATCGCCTTTTTGACGGCGTCGTTGTTG +CACAGCTCTTCGAATGTTCCCTTGATTCCTTGGTCGGCCATCGCTGGGAC +GAGAACCTAGAAAAATTGAAAATTACATATATATATATATATATACACGC +TCTCCTCAAAGCCGAATAATTATTAGAGCGCGCTTACTGCGCTTCGCGTT +TCTGCATCCGCGGCATTTTGACTTTACACTGAAAAAAGCAGATATTTCAG +ATTTATCGAATTTTTAGCTTTTAAAATTTTTTTCTGCATTTTTCGAACAA +ACCTTTTGTTAAACAGTGAAAATTGAATTTAAAATGACTAAAATGAACTT +TTTTCGTTCACTGGTTGTGAAATGGTTTGAATTTGAAGAAATCAAAGGGA +TTTTTCTGAATTTTTTAATAATTTTCTATTAAAAATCGGTTTTAAACCAT +TTTTTGACCCTAATGTTAAGTCACAAATGTATTGAAACCGGTTTTTAATA +GAAAATATTTAGAAAATAAGAAAAATCCCGTTGATTTCTTCAAATTCAAA +CCATTTCACAACCAGTGGACGAAAAAAGTTCATTTTAGTCATTTTAAATT +CGATTTTCACAGTTTAACAAAACGTTTGTTCGAAAAATGCAGAAAAAAAA +TTTTAAAAGCTAAAAAATCGATAAATCTGAAATATCTGCTTTTTTCAGTG +GAAAGTCAAAATACCGAGGATGCAGAAATGCGAAGCGCAGCAAGCGCGCT +CTAATAATTATTCGGCTTTGAGGAGAGCGTGATATATATATATTTTTTTT +AATTTTGAAAATATTTAAAACCAAAAAACCAACCTCAGCATCAGGTACAA +CAATAGCAATGAGACACGTCTTCAGTGATTCTCCATGAACAAATGATTGA +GCGACATATTTCGAGCGGACATAGATGTTCTCAATCTTCTCCGGTGCCAC +GTATTCTCCCTGCGAGAGCTTGAAAATGTGCTTCTTGCGATCCACAATTT +TCAGCGTTCCCTCAGGAGTCCATCGTCCAATGTCTCCAGTGTGCAACCAT +CCATCCTCATCGATCGTCTCCTTCGTTTGTTGCTCATTCTTATAGTATCC +CTTGAAGATGATGTGTCCCTTGACACACACCTCGCCGGCTTGATCCTTCG +CGTAATAGTTCAACTCGGGCACGTCGACCAACTTGATCTGACACGATGGG +ATCACCATTCCGACGTGTCCGGCTAGCGAATCTCCTTCCATCGAGACAGT +GCAAGCGGCGACACATTCCGTTTGACCGTAACCCTCGACGACGACACATC +CCATAGCGGCACGAACGAAGGTGAGCACATTCGTCGAAAGTGGAGCCGAT +CCGGTGATCATCAGACGCACACGCCCTCCGAATCCTTCGCGAATCTTTTT +GAATACCAAATTGTCGAAGAATCCGTCATTTCTGATATTGAAGCTGGAAA +AAAAAAGAAGATATTAAATACTATCAAATCATTCAGACCACTCACTTTGC +CATATCTCTGGCCTTGTATGAGATAGCGAAATCAAACAACATCTTCTTCA +AAGTCGACTTGTTCACTTCAGACATCACTTTATCGTAAAGCCGATTCAAG +ACACGTGGCACCACTGGCACCACAGTCGGCTTCAACTCTTTGATATCCTC +GGCCAACACACGTATGTCACCACGATAGAATCCGACTTTCGCGCCGACAC +AGAAGCACACACTCTCGATGACTCTCTCGAGCATGTGAGCCAACGGAAGG +AAGCTGATCATCGAATCGGTCGCCGCGATGCCACTGTGCTTGAAGAAGTC +CATACAGACACCGTCGGCGATCACGTTCGCGTGAGTGAGCATCACACCTT +TCGGTGTTCCAGTCGTTCCTGACGTGTAGCATATGGTCGCAAGATCCTCT +GGTGTTGGTGGAATGTGTGCTGGACGAGTCTTGGCGTTCTTTCCAATCTT +CTCGAGATCCTCGAACGTGAGCACCTCGACTCCGAGCGATGACGCAGTCG +TTTTGAGCTCATCGGTGACTGGTTCCATCACAACAAGTGTGCTCAAACTT +GGGCATTGCTCCTTGAACTTTAAGAGACCTGAAATCAAATTTCCTTTTGT +TTTTAATAATATTCCTTATCAAACTCACCAGTGGCCTTTGAAATATCATC +ACAGACAACAATCTTGATCTCTGCCTGATTCAAAATGAATATCGACGCTT +CTGATCCAAGGGTCTCGTAGATCGGCACAATCACATTCGAATAGTTGTAG +GTGGCGAATTCGGTGATGATCCATTCGGCGCGATTCTTCGAGTAGATCCC +AATGTTCTCCGCGTTGCCAGTCGGAACGCCGAGCTCACGGAACGCGACGG +ACACGTTGTCGGCTCGCTCGAGAATCGTATTGTACGACTCCCACACGTAC +GGGATTGAACCATCCGACTGTTTGACTCGACGTCCGAGCATCGGCCCGTT +GTTCGAGAGACGGGCTCCACGGCGAACACCCTGGTAGAGGGTGCGCGCGT +CTTCGAACAGGAAAGCCTGGAGTTCGTCGTCCTTCAGGTAGGCGCTGATG +CGGGAACCATCCTGGAAATTAACATTTTAGTCGGGGGGTTTTTTTGGAAT +TTTTTTTTCGCTCGTTTTCTATTGTAAACCGAGCCACGACGCGACACGCC +GTAAATCGACAAGCCCAAATGGCCTAATTCGGCAAAATCTTACATTTAAA +AATATGAGGGAAGACAGAACTCCGTGATTGGAGAAAATTAAAGGAGGACT +AACGGTTCGGACGATTTTGAACGTATAGACCCAAAATGAGCTCAAATGAA +TGAATTTCGTAATGAAACTGCTCAAAAATTTTTATGGCGGTTCAAAATGT +TGAAAAAATTACACTGATTTTGGCTAAAATCACGAATTTTCCCATTTTTC +CGTGTCACATCTGTTCGAAGTTGGATTTTTTGGAATTATCGTTTTTTATT +ACATATATTGGTAGTTTATCTCATTTAATTTCGTCGATTAAAGTACATTT +AAAGCCGATAGGTAACCAATTTCGATAATTTTTGGTCACCTATCGGCTTT +AAATGTACTTTAATCGACGAAATTAAATGAGATTAATTACCAATATATGT +AATAAAGGACGATAATTTCAAAAAATCCAATTTCGAACAGATGTGACACG +GAAAAATGAGAAAAATTCGTGATTTTCGCCAAAAACAGTGTAATTTTTTC +AAAATTTTGAACCGCCATAAAAAATTTTTGAAAATTTTTTGAGCAGCTTC +ATTACGAAATTCGTTCATTTGAGCTCATTTTTGGTCTATACGTTCAAAAT +CGTCCGAACCGTTAGTCCTCCTTTAAGTTTAAAAAAAAAACGGCATATTT +TGCCCAGTTTCTCTCCAAAACTCACCGGCAAAATGCGACTCTGATTCATC +GGATCCACCAGTGGCTTGACATTTCCCGAATAGGTGACCCCACTGAACGA +GTAGGATCCCGACGTCGATGACGAGCTCTTCGACAAGTAGTATCCGAGGC +CCATGGTGACTGCGGCGGCGGCTCCGATTTGAACGTACGGTGGAAGGGAA +GAGTTCGAGTTTGGTGCCGAAATGAAATCGACAATACTTCTGTAACAATT +GAAATTATATATAGAGAGGACGGGTGATGATGATGAGTCATCCAGGTTGT +TGTTGTCGCGCGCGCGCGCCCGCACTGTTCCCCACAAAACAAACAACTTA +TTTGAAATTATGAAATCGAAGTTTCTTTGTTATTTTATTTTATTTCACAA +GATGGTTTTCATGTTTTATATACCATTCGCAAAAACAATACCTAGTTTTT +GCTCGTTTGGACTCAGAAATTTTGAGACTTTTCTGAAAAGTTTAATGGCA +GATCAAAATTTCGGAAAAAGGTGAATTTTAGCGAAAATTCGCCGTGGTTT +TTCGAAATTTTGATCTGCCATTAAAGGTGGTGTAGTCGAATTTTTTTATT +GCTTTATTAGACTCAAAATTGTCTGAAAACACCGAATTTCATAATAAAAC +TTAAATGGCCTAAAATTAGCTAAAATTTGAAATTTGACCGACTTGTCAAG +CGGCTGGAAACGTTTTTTTTTTGAAATAACCGTCAAATTTTGAATATACA +GTGCAATTATCTTGCGTTTTCAACTTTATTTAGGTACTTTAAAGTCGATG +AACGGCGAGATTTGTTTTTAATTTTTCACGAAATCTCGCCGTCCATCGAC +TGTAAAATACCTAAATGAAGTTGAAAACGCAAGATAATTGCACAGTATAC +TCAAAATTCGACGGTGTTTTCAAAAAAAAAAAAACTGTTTCCAGCTGCGA +CGTCGGTCAAATTTCAAATTTTAACTAATTTTAGGCCATTTTTTGAGCCG +TCATAACTTTTTTTGAGGAGTTTTCAAGAAGTTTTATTATGAAATTCGGT +GTTTTCAGACAATTTTGAGTCTAATAAAGCAATAAAAAAATTCGACTACA +CCACCTGTAATGGCAGATCAAAATTTCGAAAAACCACGGCGAATTTTCGC +TAAAATTCACTTTTTTTCCGAAATTTTGATCTGCCATTAAACTTTTCGGA +AAAGTCTCAAACATTTTGAGTATAAACGAGCAAAAACTAGTCATTGTTTT +TGCCAATGGTATATAAAACATGAAAACCATCCTGTGAAAAAAAAACAACA +AAAACACTTGAAATTCGTAATTTCAAAGTTCTCCAAAAAAAACATACACA +CACCTCGATTGAGACTGTACCCACGCAGCCGTGTCTGTGATAACCATTCT +GTGTGTTTCTGTGTTTAAGTGGCCTATTTATAGTCTTCTGCCAAAAGACG +AAACCGCTTCTAAAATGAGATTAAATTGAGGTGGAAACGACGACGACGAA +GAGGTGGAAGAAGTGAGGGTGAAAATCGAAAAAACAAAGAAGAGATCAAA +GTTATTCCATCTTTAGAACTTTCTCCACTGCCAGCAATTGATAATAATAA +TAAGATTTTTATGTTGATAACTCTTTCGGTATCAATACAAAAAGGGGTGA +AAACCCGCCTCGACAAGCCTTTCGATCAGGCCATATATTTCTTTATCACA +CTCTCCGCACATTTCGGAACAACTTTGACCTCCCCGGCAAGGGGGCAAAA +GTTTCGAGAAGTTGTGTTGCGCGCGAAGGAGATGGAGAAGATGGAGAAGA +TAGGCAAACACGTACACCTCCTCAAACAGTAGTAGTAGTGATAATTGATT +TGATTAGTGATTCGGGTGGGCAAAACGAGAGAGATAGAAAGAGAAAAAGC +GAAGAGGACAACGAACACGTGTAATCAACCGTGTGCCAAAGTTCTCGCCC +TTTGGACTAATTGCAGTTTTTTTGGAGGAAATTCAATTAACACTGAAAAC +TAGTTCGTTTAAGTGTTTACGCTGAATGGGCCAGGTTTTGAGGGAAAACG +ATTTTCGAAGAACTTTAAATGTGGGGTACCGAAATCTGAAAAATATTTTA +AATTGACTCCAAATTTTCCCCTGACTCCGAATATCGATGCGAAAAAATTC +AAAAAAAATTTTCCTGATTTTATATTTAAGCTTGAAATCGCCGAATTTCA +TTTAGAAATTCGCATTGGAGCGCGCTTGCATCGTTCGATTTTTTTTTCGT +TTATTTTATTTATTTTCGCCGAATTCAATGTTTTTAAAGCCAGTTTCATT +CATTTTTGTCGAGTTCTCATCGAAAATTTTTTTGACAAAAATTAATGAAA +CTGATTAAAAAATTAAATTCGGCGAAAATAAGTAAAATAAACGAAGAAAA +TCAAACGATGCAAGCGCGCTCCAATGCGAATTTCTTTGGGCGCGAAGTTG +AAAAAATGGCCCGGGGCACAAATGAAATTCGGCGATTTCAAGCTTATATA +TAAAATCAGGGAAATTTTTTTGAATTCTTTCGCATCGATATTCGGAACCA +GGGAAAAATTTGGAGTCAATTAAAAATATTTTTCAGGTTTCGGTACCCCG +CCTTTAACGGATTTTTTGGCTTAAAATTGGAAACTCTTGGTATAAAACTT +GTAGAAACACCGAGTAAACTGTACACGTTTTAGACGATAATTCAAAAGTA +TCGAGGGAAGTATCCAGAAGTTGAAAAATTGCGAAAAACAGGTAAAATAA +GGAATTCAACGCGTGAATTTTGCATTTTTAGACTGAAAATAGTGTGGTAT +TTCTTCAAATATAGACGAAAATATCGACTCGCACTGGTTTTCGATCGATA +TTTATAGGTCGAGTTCATTGCTATGCACAACATTCCGGGAAAACACGAGA +TTTGAGGAATTTCTGAGCAACTTCTGAGTAAAGCCAACAAAACATTTTGT +ACAATTTTCTTGTTGGTCTTGCTCAGAAAATTTTCAATCAAAAACCAATT +GGAAATCGATATTTTCTGCTAGTTTTGAAAAAACACATGATTTTTAATCA +AAAATTGTTAAATTCATGCAATTTTCCAAAGTTTTACACGATTTTTTCTC +TGGAATTCACATGCTTTTTAAAGCAAACGGCCGTTTCTCGTTGTTGGCTT +CCACGCGATTCCATAGAAACACTGGGGCACCAGAGAAACCGGGGTCGAAG +CAAATGACCAACAAACAACAAACGGATGTGTGCTTTTTCTTCGTTTTCGT +CTTCACGAGGAACCACCACGCAAAATGGAACTACAACTACGATTGATAAC +GAAAAAAAAAAAGAAGAGATGATGATAGTTTTTGCAGCAATAAAAAGAGT +GATAACAAGATACACTCTATCTCACCCACACCTTTCCGTTCAAAGATAAG +GAAATAAGATTCGGGGGGTTCAAAGTGTTGGAGCCAAAAACGGAAAACGG +AGGGAGCAATTGTTCGTTGAACGAACAATAAAAAACCATCATCTGCGTCT +CAAAAATACACGTGTAAGTGAGCGAGTGAGTGAGTGAGTGCCGGGGAATA +AAACGAGTTATATGACCAACACAACGAGCGACGTTGCTGCTGCGTTTCCT +CACTCTCTCCTAGTGAGCGTGTGTGCGCACCACACTCACATGAGCGCATG +CAAGGGTGTATGCGCAAGTGGTACATCGACTCGGTATGAGAAGGACAGAG +CAGTAGTAAGGAGCACACGTTATGTTGCTCCACTACTCACTGTTGCGTAC +CGTGTAGCCTCTGCGGGCCACCGCGGTGCAAAGTACAGTGGAAGGAGGAG +AGTTGAAAAATTGGTGTTTATTTTACTTTTAAACTCGTTTTCCTCCGGAT +TTTGAAGAGAATTGGTTGACTTTTTGAATTTTAGGCGAAAAGCGCGGGAA +AAATCGAAAAAAGATGTGAAAACGAGTGAATTTTAGGGCAAAAGTGAACC +GACAATTGTTATGAGCAATCGGGCTTCTCGGTCGGAAAACAGATCACAGA +AAAATCGGAAAATCCAGCAAAATACTCGAAAATTATGCAATTTTCTAGAA +TTCCAAGCGAAATTTTCTAGAATTTGAGGCAAAATGATACGGAAACCAAC +GAGAAATCCAGAAAAAAAAACTATTTTCATCGAATTCTAGGTGAAAAGCT +CTGAATTATTCGATTCTCCGTCGGGGTAGACTCAAAAATTATGCAATTTT +ATAGAATTCTAGGCCAAACATCGCAGGATCGCCTACACAATTTCCGGAGA +ACGATGAAAATCGGCAAATCTGAAGCAAATAGGAGTGAAGGAAGAGGCTT +CGGATCAGTTTTTCTGAAGAAAATACAAAACTTTCGCTTTCGCAAAACTT +TCGGGAAAATTCTCAGAAAAGGCGTCAAAAGCTCAAAAAATCGACGTTGC +ACGATTTTCGACGGAAAAATAGAGTAAAATATCCGAATTTCTGCTTTAAA +ACTCGAAAAATTGCCCAAAAATCCGTTTGTTTTCGCCAAATTTCTCCTAT +TTGGGGGTCACAAGGCGAGCGGGAGCCGCCGACGACGAGATGAAGAGCTC +CACAGAGCCCCTGAGCCAACAACACAACACTCACCCAAAAATATTGGAAT +TCTCCGGAAGGAAATCGTCGTACGACGACGGCGGCGGCGACGGTGCCATT +TTTCATGGGGAAAATTAGAACCTTGTTCGAATTTGTGATTTATTATCGAT +TTTCTATGTGCCACGTGTGCCCTATTGCGTGACCCACCTGATTAGTTGCG +CGAGTATTTTGTCAGTGCGGTCGACGATCACCCGAAGAATGGAGAAATTG +TCTGAAAAATGCGAAAATTGGCTATAAAACGACGGAAAAAGAAATAAAAA +CAAGTGAAATGAGGCCGAGAGGCAGGTGAATTGTTTATGCAATTTTAGAA +ATGTGATATTTTGGCGGTGTAGATCGAAAAAAATGCGAAAAACACGAAAA +CAGGTCGAGAAATTCGCAAATTTTCGACGACTAAAATTCAAATGCACAAC +GTTCGCGCGCGCGCCGAAGTACGGTAGGTCCCGCATTTTTGCGCGCGCGA +AATTCAAATTTTAATTTTAATATTTTTTCTTTCTTCTTTTCTTTGATTTT +TTAAGAATATTGTGCAAAAAATTCGTTCATTTTTCGAATAAAATTGTTAA +AAAAGACGAAAAAAAAAAGAAAAAATAAGACAATTTCTCGGTGGCATACA +AATAAATACAAGTTATTGTTCGGGAAAAAAATCACAATTTTACATTAAAT +CCAGGTTTGTAATAAAATCAATTATTATTTTATTTTCTTCCAAATCCGGC +ACATGGATTTGGGCATCCGCTGGTCATGCAGCAGCCACCTCCACTCCCTC +CGAGACACGCCAACGACTCGCCGGTTGAGATGGCGAGAAGCACGAAGCAA +ACGACCATGACCTGGAAGAAAATGTGTGAATGTTAAGGTCTCTTAATATT +CCAATAAAATTATTTGGTTTTTTTTTACCGGTGTTCTTGCCAGAAAAGTG +CAAAAATTAATTAAAACTCTAATCGTGGCGAGACCCATCTTAGAAACGAG +AGTATGCGCCTTTAAAATTGGGCTACCGTAATAATCGAATTTTTGCAATC +TTGCAATTGTTGATTTCCGAAGAGCGTTAACAAGAAAACATTTGATAGAA +ATGACACATGAATTAGGTAAAATTTGGTTTTTTTTTAGATTCTTATAATT +CAGATTTCTATTTTGGACACGGAGTTCTGGCCTTCCTCATTGAATTTTTC +GCGCTCCATTGACAATCGCCTGCCGGACAACGAGTGGGAAAGTTGTGTAC +TCCACACGGACAAATACATTTAGTTTAACAACAAAAATCGAGACGCGACG +CGACACGCAACGCGCCGTAAATCTACCACAGATATGGCCGAGCCATAATG +GCCTAGTTCGGCAAACTCTTCCATTTCAATTTATCGATGTCAGAGTGTCC +CATTTCAGCTTGATCTACCTAGATCTACAAAAAATGCGGGAATAATACGT +AGAGTTCTCAACTGATTTCGCATGGTTAAGTACGTGCTGACGTCACATTT +TTTGGGCTAAAAATTCCCGATTTTTTTGTAGATCAAAAAACTGTGATGGG +ACAACCTGACACCACGTGATTTATGAGGAAAGCCAGAAATCCGTGATTTG +GAGAGCACAACAAAAAAAGAAATTCGGCCACTGATTCGCGGCCACGAGCC +ACTGTGCCTTCTCAAGATATTAGTTAATAATTGACGCTGTGAGACCCACA +GTTAAAGGCGCATGCTCAGCATGACTGGCCAATCTAATAGCATCTACAAT +GATAATTGAAAAAAAAACTCACAGTTTGGCAAAGTTTAGCGACGAGTGAC +ATAATTCTCCGAAAGCTTCAGCTGAACAATTTCAGGGATTACTTGCTGTT +TCCCATCGCTTCACCGTCTTATATAGTGCCATCGCCGATTGGCAGAATGG +GATACACCTGCCCCCAACTGCGCAGTTGAATACGATCCGTGTCTCTCTGC +GTCTAGCGGTTCTCCACGATGTTTCTTTGGCGGCGGTGAAGCCGTGTGTG +TGTGTGTGGCTCCTCCATTCTGATTGAATGTGTAACACTGCGGGCAACCG +ATTGGTTTTGGCGGCGGCGGGAAAACGAATCCGGATATACTACATACACA +AACACACACTCACACACACCGAAGGAACTTTTCAGAGAAACCTGAACTGG +CTTTTTATGATTTAAGTGCTTCTGCTGCCAGCATGATGTGGTGGTTGTGT +GTTGTCAATAGGATTTATGCTAATTCCTGACACATTCAAGAGATGAAACA +TCAAACCGGGTAGGAACTTAGTCCACCCAACACTGTGAGAGATTAATAGA +ACCTTTAAAAGTGGCACTTCTAGGCAGAATTCAATAGAATGGTGGGCGCC +TTGGAGCCTAGTCGACCTGCGATGTGAGTGGCAAGGTTGGCAGAGCTAGG +ATAAGCTTCTGGAACTGTAGGACTGTAACAAGCAAGGATCCCGCTCTGGT +ACTGTAGTGAACTTTGAGATCAAATTAGGTTTTTCAATATTTAACTAGCA +CTGTTTCCGGCCCTGATCTTAGAAAGAAAAGAAAAAAAGGATGTGAACCA +GGCGTGGGCGGCAAACGATTTTTTTCCGGCAAATCGGCAAATCACTAAGT +TGCCGGAATTTAAAATTTCCGGGAAATCGGCAAACTGACACTTTGCCAAT +TTGCCGAACGGCAATTGCCGCCCACACCTGATGTATACATATATATGCTT +TCACTCATGTGGTGCCAGAATGTCTCACTGCGGTTTGATCTACGAAAAAT +GCGGGAATATTTTTCCAGAAAAATTGCGGCGTCAGCACGCTCCTAACCAT +GCAAAATCAGATGAGATGTATGCGTCTCTTCTCCCGCATTTTTCGAAGAT +CACAGCAAAATAGAACTTTCTAACTCCACGTGTCTACTGAGAAAAGAGCC +GAAAGCTTATTTTCCACGTATCAGGGATCGGAATCCCACCTAGCTAGTTC +ATCCTATTGCATCATTGAAGCATCTTCCGGATTCTCCACTTTGTTTGCTT +TTCGAAATCGTTTCAAACTGAAAGCCGCGCTTTCAAGGCCGTCTTCTTCC +GATTTTCCGATCTACCTTTTGTTGATCATCACGTGCCTGGCGATCACAAC +ACTGTTCAACCGGAATATCTGAAGATGTTCCACCAAATGGATCGTTTTTC +TCAGGACGGAAGAATACTAATTTGCAAAAAGTCTGTTCTGTTCTAAAAGT +TCTGAATCTTCTGAAAAAATTTCAGACTTAGATCCTGTCATTACATGGGC +AAGCAGTGGAGTCGGAGATTTTTTATCAAAATATATGGATAGGTAATCAG +GGAGAGACTGGCTCCTTTGTTACAATATATTAGGTTTTGTTCTTTAAAAT +GTTCAAATGAAGCTGAAGAGTTCCGGCTAAAAGTATATGATTTTTCAATG +ATTTTTCAAAAATTTCAAGGTATAACTCTGAAACTAAAATTTAAAAAAGA +AACTAAATTGGATTTTAATGTTAGTTAGTTTATTAACAGAAAGAGCAGTA +TTTTTTGATAATTGTAACCTGAAAACTCTATTTTTCTAGTTTCGTGAAGT +AAAAATGTTTTATTATGTGACATTTATTGAAATTTAATTATAATACACTG +TTAGAAATATTTTATTATCTAAAAAAGTAAATTGGATTATAATGTTAATT +ATTTTACTATCAAAAAGATCAGTATTTTGTGATAGTTTTAACCTAAAAAC +ATTTCTGAAGATCGTTTTTTGTGAGAAAAAATTATTTTAATATGTGTCAT +TTAAAGCAACTTTTAAAAAATTGCGGTTTAGAAATGTTCAGCTATCAAAA +AAAAGTACATTGGATTTTGGTGCCAGTCAATTTATCAACAGAAAAAGTGG +TCCTAAAATAATTTCCAAACTTATTAATGGTGGTAGCATTAAAAAACACT +TTTTTTTGGATTTTTTGAAATGAATAGTGTATTTACAGAGACAATGGTAG +ATCAACCTAAAAGAACAAAAAATTTGCAATTTTTCCAGGTAGGACACTTT +TTTGCTACTCGAATATAACTAACCTAAAATAATTTCCAAAAGTTTCAATG +GTGGTAGCACAAAAAAAAACAATTTTTTTGATTTTTGAAATGAATAGTGT +AGTTACAAAGACGGTGGTACATTGACCTAAAAAAACGAAAAATTTACTAC +TTTTTCAGGTAGGACACTTTTTTGCTACTCGAATATAACTAACCTAAAAT +AATTTCCAAACGTTTTAATGGTGGTAGAATGTAAAATAACTTGTTTTTTT +TTGGTTTTTTGAAATGAATAGTGTAGTTACAGAGACAATGGTACATTGGC +CTGAAAGAACAAAAAATTTGCTATTTTTCCATTTTGACCAAAACTATTGG +TTTTGTCCCCAAAAAATTAGTAAAATGACCCACAATCCAGTTATTTTGCT +GTTTAAGCAGACACACTACACGGAATTATTTTCAGAAACCAGATGTATGT +TAAGATTTTAGTAGTTTCGGTGCTGCAAAAACCATCAAAAAATACCAAAT +TTTTCGGCGTTTGTGAAGCACGGCAATTTTTGAGATTTGCCGCACACCCC +AGGTACAGAGACAATTTTATATCGACGTGAACGAACAAAAAATTTGCTAT +TTTCTCAGGTAGGCCACTTTTTTGCTGCTCTAATTTCACACACCTAAAAT +AATTTTCAAATGTTCATCCTACCCTTATATGTAGTAAGAAGATTTCAAAT +TCAAATTTTCATGCATCAACTCGTCCCCACCGTTTTTGTTTCCCTGGCTG +ACTCATCTGACCTTGTCACCCCACGTGTGTTTGTCTTTGTGCCCCCTCTC +GTGCTCACCTTGTTCCAAATTCTGTGAAAACTCATCTCGAAGGCCATTTT +CTTCGTTTATGAGTTTTGAAAAAAAAACGTTCAGACAACTGCAAATGACC +TCCCCGGCGTCGAGAATCGGTGATAACCTTGTACTTCTTAGATGAGACCA +GATACAAAGTGCACCAAAAAATTCAAATTCGCTTGAGAAGCGCTGGCGAG +AGATTCGAAGCGAAAACGAGTTGATTGACAATCGGAACATATACACACAC +CAGAAGACAAAGTGGGGGAAGAAAGGAAACAAAAAAGATAGAGAGGAAGG +AGATTTTGAAAAACGACAATCTCTCTGTGTGTGTGGGTGACAGGTGAAAA +AGTGAGAGTGAGAAAAACGTGGTGGGTACTGTATTGATGAACGATCAGAC +CAAGTTGCATTCAAATTTCTTTGATTGGATTGTTTCCTGATCGGTTTGGT +AGAAGGTGAAGTTGGATTTTGTTCACTGAAGTTATTGGATTTCGAGACAT +GAGCAAGCTAGGTTCAGGATGACTGTAGTGGCCGGCTAACTTTCCGTACA +CCTTTGGCCGACTAGTGAGTGCAACCAGGGCTAGTAAGGAGGTACATCCT +GGTTACCGTAAGCTTATTAAAATTCTGAAAACACCCTACAAAAACTGATA +TTTGTAAAGTTCTCCAAAACAGGTGCTAAAACTTCTGAGGCAAGAGTAGG +CACTTGGAGTCAGAATGTCTTATTTCGATTTGATCTACAAAAAACGCGGG +AGTTGAGACGCAGAAATCTCATCTGATTTCGCATGTTTAAGAGCGTGCTG +ACGTCACAATTTTTCTGGAAAAATATTCCCGCATTGTTTGTAGATCAAAC +TGTAATGAGACAGCCTGGCACCACGTGGGTAGGTGTCGGGATGCACGTGG +TGTCAGAGTGTCTCATTTCAGCTTGATCTACGTAGATCAACAAAAAAAAA +TGCGGGAGAAGAGACGCAGAGTTCTCAACTGATTTTGCATAGTTAAGCTC +GCGCTGACGTCACATTTTTTTTGCAAAAAATTCCCGCATTTTTTGTAGAT +CAAACCGTAATGGGACACCCTGGCACCACGTGGGGATGCATTAGTATGAT +TACTGTAGATAAACCTTGAACAATATATATACATTAATTTCGTAATGCTT +GAAACTTGGACCCAAAAACTATCTGTAACTAGAAAATCTGCAAAAATGAG +TTTGCGACTTAAAGCTGGAAGTATACAAGTTGCGCAAAGACCTTTCCAGT +GGCTTCAGCTAGCAACAAAAAAATTTGTAAATTTGCCGGTCCCCACACCA +CATCAAACGTGCAAGACGGGCAACTTGGTACATCCAGTTCACCTGCACTC +CAACTACTTCCCAGAACGCCACATAGTTGAATTATTACCCAACTTGGTAC +ATTATTTTGTCGACTCGACACACCATCACCCCACCATAAACCCCTGCAAA +TAAGGAAGCCGATCGCTATCTACTCATCTTTAGGGACCTTTTTTGCCGCT +TTTTATTGATTTGAACAAAATCTCAGCAGAGCAGTCAAAAATTGCCAATA +TTCCAATTTTTCAGTTGCCAAAGATTTTAGAGTTTCAATACCTACTAAAT +CCTCCCGTTCCAGCCATCCGTTCTTGATATTCTCTCCATAAATTTTCGCT +TATCTCCTCTCTTGTAAAACTAACGTTCACTTATCCACATAATAAATACA +ATAAATGTATTATATACATATATATTATATATATATATATATATATATAT +ATATATATATATATATATATATATATATATGTATATATATATCTCAAAAA +ATCAAACAGGATCTCTCTTTTCCTCCAGGAGAAAAAGAAGATGCCGTTTT +GCAGCCAACAATACTCTCACTCTCAATACACGACGCGACGACGTGGCATC +CCTCCCCCCATCCTGCCTTTCTTCCCCGTTTCTCCTGTGTCATCGAAATT +TCTAGGGCAGCAGCACACCGGATTGGTTGGCATGTGAAGCCGACGTGGAT +CGTGATGGTGATGATGTGGCGGATTGTGATAGATCGTGTTTTTTATCCGG +CGATTACTGCGAGAATTGTCTGAAAGAGTGGCAGGGCCTGGCGCCTGTCT +TTCGGCTCTTTTCAGCATTTTGACGTGAATTTCCAAATTTCTAATTTTCT +CATTTCTATAAATTTGCTGAAAATCAATCAAAGAAATAAAAAAAAAGTTC +ACTCATGTTTTAGCATACGGCGCGAGAGGCAGGCGGAAATCGCCTTAAGG +TCAGGCAGGCAGGCAGGCGTTTAACGCCTACATGGAAGCCCTATAGCACA +CATCTATCTGAAACTGGGAATGTTGTTCCGAAGTGCATTGGAGTGCCTGA +AATTATACAATGTTTGTTCTAATTTATCATTTACTGTATATAAAGCGCTT +ATTCCGTGTGTCCATAGTTGGTAGTCTATGTAGTCTGAAGTTTTGGCTTC +TGGAAGGATAGTGAGTTGGGGTTAGTGTAGGGATATGGTCGGGGAACTGT +AGTGGTACAATAGCGGTACGGTAGCAGTACTGTAAGATTATGGTACTTTC +AGAAAAAAAAGTTTTCAGCCCCAGAAGTCGGGGTCCGTGCCGGAGGAGCG +GACAACGGCTGGTTATCAATAAAGAATACCAACATTTTATCACAGATCCG +GTTACCGCTAGACAAGAAGGTAAAAATTCGCATTCTGAGAAGGACTACTG +TAGCAGGTGTTTAGCGTGTGTCCGAGTAGATGTGGCGACTCTTAATTTAA +GTTGTAGTTTCCTGGTGTCCCCTAATCGCGAACTTCACGATAATTGTAAA +TATTTTAGTGTGGCCTATGTTTACAGGTTTTACGGACAGCCTAGCCCGAG +AAAAACTCTGGAAAAAAATCCCAAAGCTCTTATCCAAAACTACCAAAGTA +TTCTATGCAATTCCCTTTTTGCATTTCCTCACCCCGCCTAATAAGGCATG +TTGTTTACCGCCGACTTTCCGCAATAATCTATTCTCGCAATCATGTGATT +TACCTGACGAATCGAAAGTATACGGTAACCTTCACATTTGGAAGAGGAGA +AAGGAATTCGTTGGAGCCTTGACCCATTCTCCCCACAGGCATAATTCAAA +AAAATATTCACATCAAGAATGCGGATTTGCCACAACCCAACGGGTTCTCC +CATCCAGCATCTCTACCTTTGCCGAGGCAGAAACTGAGGACCGATCTCGA +TCTGCCGCCGCGTTCCGACAAATGTAATAGCAGCCCCCCGCGCGATAGCT +TTTCACTTTTTCTTCTCTTTCTGGCTACTGCTGCTGCCTCCTCCTGCGCA +AGGATGATGCTGCTAGGATGATACCCTACGAGATTACGGAACTATCCGAT +AAATTAGCTTGATGGAGACTAGAGAGATAGTCCCGAAGCTCTGAAGGAAC +CTGATCTTGTTACAGTCTTGGAACCGAACAGCTGATTGCGTCGTCGTATT +ATCGTTATTCATTTCGTAAATAGATTTTCTCTAGGATTGATTCCCGGTTG +AGCCACGTCACTGTCCGAGAACATTCCGGAATGTTATGCATATGTTCTTG +TAAATAACCTTTCTCCTTCCTCCGGCCGGCCAACTACTTTCATTTCTTGA +TGCGTCGAGATACGATATCCAAGGCCATCTTTCAAAATCGCAAGTTATCC +ATCTTTTTTCGCTGATGACTCTCTCTCTCTCTACTCCTTCGCCCATCTCG +TCTTCAGAATCACCTACCGCAAAACAATTGAATAAGTGAGTGTGGAAGTC +ACGTCATGAGCGCGACGAAAAGAGAGAAAGAGAGAGAGAGAGAGAGAAGT +GCCGAAGACGTCGGAAGAGAAAAGCTGGAGGAACCAGCGGCTGGATCGTC +GTTTGTTGATGATGCCTCCTGCCTGCCATTGTTATTCGATATTCCGATAT +TCGATTCTTTTTTGCCGCCGCCGCCCAAGAGGAGAGGAGAATCAGAGAAG +CTTGCGCGCTCCCTATCAAAAAAAAAAGATTATCAAATTTGAAGCAGCAG +AAGCTGGGAAATGGAAATAATAGCGGATAAACAAAGAGAGCGAAATGTTT +TGGGCAATAAATTTGTAGTTTGATGATAAGAGTGGCATTTTGATATGAGG +AAGCTTGCAGAATCTAGTGGTGAACAGGAAGCTTTCGCTATGGAGCAAAA +TGGATTCCGTCATTTTTCTTAAAGAATCGGAGTCGAGATGAAACACCCAG +CCGGAGCTGAAAAATTATTCTGCTGAAAAATCGAGGTGCTTGAAGTACCA +GGGCGTTATACAAAAGATCCAGGGATCCTAGTTCTGCAAAAAAAATTTCT +GCATAATGAAATGGAGAACAGGGCGGTGCGGCAACCAAGATTTTCGGCAA +CCGGCATTTGCCGAAGTTGCGGAACCCAAAACATTTTGGTAACCGGCAAT +TGCCGAAGTTGCCGAATTCAAAAAATTTCGGCAACCGGCATTTGCCGAAG +TTTCCGAATTCAAAAAATTTCGGTAACCGGTTGCCGCACAGCTCTGATGG +AGAACCCGGTGGTCAGCTCAGAGTTTGGATCACCACAACCCAGAAATACC +ATCTTTCAGTGCCTCATACGTAGAAACACTCTAGCTCAAAGTGCCCCTGA +GGCCCTGGGATCTATCCTTACCTGGAAAACTGTGTAAAATTTCGAATATT +CACAGATCCTAGGTTCGTAGTTTCACAAAATGTTCCCACTCTGCCTTCGG +ATCCCCAGGACGCATTGCTACAATATTTTGCACTTGGTACAGTACTCAGC +TTTCCCTTTTTGCACCAAAAAACATTATTGATCCTTACGATTAGCGTCCT +AGATAGGCTACAAGGCCATCGAGATCTCAAATATGCAAATTCTTAGGAAA +GAAAGTGATGTTTCATCATCTTCTTGGATTAAAAATTTCCGAGGAAAAAC +ATGGCAAAAATCAGGTTGATCGTGAAACTCGTCGGGGGCTCCTCTCACTT +CTTCCCCATCTTCATCTCTCCTTCTCTTTCCCGATCCGATCTTTTTTCCT +TTCTTCCATTTTTTCTCGTTTTTCCTTTCCATTTTTTCTTCTCCTTCTTG +TGTTTGATTATCCATCCTCCTTGCCCTTTCTCCTTGTTTCATTTTTCTTT +ATTCATTCGGATGGAATAGAGATATGGTTCCGTAAATTCATTCCAATACT +AGAAGAATTAGTAAGACATTCGTAGGGGGGAAAATTCCGCGCAGAAAGCG +ACAGGAGGTTCGGGATTGGGTAATCGTCGGAAAAATGAAAAATCGCGCGC +GCCCGCCCGCGCAGATGGGATATATACTAAATCACATGGGCAGCGACATG +ATATACGATGGATTGCGTGATCGATAGGCTTTTCAAGGAGGTACATTAGC +ACTGGTGGGCGTCGGGATCCTTGAAGGAACAACGATCTTTGAAGGAGATG +ACGAGGATGTTGTGAAATTTAATTGTGATATCATTCTATTATGTAATCCC +TGGCCTAGAATATACATCATCTTTTAGAATTCTGGTCGGCTAGATGTTTT +CGGGAAGTGCTTCTAAAGGATCGGCGCTGCCATGTATGTAGGAAGTGCTT +ATGCCAACCCCAATCATCTTCTCAATCTACTCCTCAGAAGTTCAGTTCTG +CCAATCCGCACGCTCCACCTTCGTCGCCTATCCGTTTCTCATGCGAGTCT +CGGGTCAAGGCAAGTGTGCGTGTCCTTCAGTCGTGAAGTCCTATTCATTT +ATTTTTGCTGCCAAAGTCCGAACAGTTTTTTCTAGCCGTGTCGGCAAAAG +GGTTCTTATTGCGAATTCACCTCCTCCCCACTCGAGAAATCGCCCCCCCG +CCGTCCTTGGTAAAACTGACCTAGAAAAATAAAAACAACACAAAGCGCCT +CCCGCATCATCACGCGTATATATGTATATATATAAGATGGTAGGCGGGAA +GAGGAATCGACGCAATATCGACTGAAGGTTGCGCAATCGAGATGGCTCTT +CCCGGCACGGATACCTCCTCCTCGCGTACCAGGCACCCGCATATAGCTAA +AGTTCTCCGCCCATTTCGTTACTTTTTTGTGCTTCCCATATCATCGCAGC +AGCGCTTATGCCATTGCGCCCGACCCGATCCAACTTCTTATTCTTGTTCC +TTAGCCTTCTCCCCCTCCCATGTCCTCCCGATATCCAAAAAGTGGGAGGA +GCATGCCATAGTGAGAGGGCGGGGCCGGGACACACACAAGGCTGGCCCCG +AGAAAACGAACAACACTTATGCCGAGGACAGTCTCGACAGTTGGTGGTCA +CCCCACCTGCGCCTTGATTACCCCGCCCGTCCGCGCCCGCCGGGCTAAAG +TATTTTGTTTGCCATTCATGTTCCTCATTCGACGTTCATCATGCTTCTGT +TATGCTTCTTGGTGTTGGTCCGGTTCCTTTCTCTCTCTCTCTCTTTCTTC +CTTGTGGATCAGATGCCATTCCGCCTTTTTGCTAGATAAGCGCATATTCA +TCTCACTCTTCTTTGTTTTCAATTCAGCATTCTTTATATATCCAATATCC +TACATCCTATATCCTCGTTCTCTCCTCATTTTCATTCATATGCCGATCCA +TTTGGTACTGGTGGTGTCGTTCTTCTTAGTTTCGCTTTCCAATTCTTTTC +TAGACAATTCCTTCTATTTCCAGAAACTTTTTTCTACACAATGCTCCCAG +ACAGCCCATCCCCACGCGCTGCCCTTGCTCTCTCCGATCTTCAGCCACAA +ACCGGCGCCTCAACGTTCTCAATTGGCAGTGGATCAGGCAACTGGTAAGT +CGAGTTCTTATGTAACTAGTAGGATGGTAGACATCGAGATTAACGATGTC +TGGCATTCGTTACGTAACGAACGGGTTCTCCCGCCGACCATCGAAGCGCT +TAAGCGCCGAGATTCTCGGCTTCATCGCACCCCGGTCTAGCAAGTTTTCC +AATCCATCCCAAATTACAGGTCGACAAGAGGTGTTGGTGGAGTTAATGGT +GATTACATGGACAAATCCGGCGGAGGAATGTTCGGAAACAGCAGAAACAA +CGTGAGTTATAAAAATATCTGGGGGGGGGGGGGGGGGGGGAAGTGTTGTT +CTAGGCCAGGGACATTGAGAACCGCGTGATGAAGAGTACTTTTATCCGAA +TTAAATCATAGGCAACGAAAACAAGAATCAGGTTCTTGTGCTTTGCACAC +CATGTCGGAATATCCGTCTCAATCAAAACGAAAAAAGCCTAAGCTGGACC +AAGTGTTAAATGCTTAAATGCCAAAAATGAACAATCTTTCAAATCACTTT +CATCTTCCTGTAAATCAGAGAATCCTGAAGCGCTGGCGCGCACACTTACG +AAAAGTGAAAGCCGATATATCAAGTTTATCACCTAGCGGGACTTCCATTG +TCTCATTGTCTCTTGGCTATCCGCCCCACTTTCCCCCCGGATCATCCGCC +CCCTTTAATCCAAGACTCTCGAACAGTTTCGCTTCATTCCAGGACCGATT +CGCATTCGGCAACAACAGCTCGTCGTCCTCATCAGCAGGCTGCTTTTCAT +CGAACAATTCGTCAGGCGGACTTTTTTCCAGTAAATAAGTTAATATGTGA +GTTTTTTTTGTGTACATTTTCTTCTTTTCAAGATCCATTAACATTTCAGA +ATGCGATCGGCAGCCGGTATTCTTATTCTAATCTCAATAATTATTCCCAG +TGTATATAGTGAACCGAAGTTGCCCGATTGCGAGCAAATACCAAAAGTCT +TGTGCTGTACCCAACGTGTCTTAGATAAATGTATGTCTGGTTGTATCGAT +TATGTCACTGAGAAATGCCCCCACAAATTGGAAAAATACGAGACGATCGA +TGAAGAGCCATCGACGAGAGCTCCAAAGAAGCAGGTGAAGCCAGCTAAGG +CAACTAGCAACAGAGTTGTCGGAGCAGTTGAAGTAGACCATGAAGCCAAG +GAGCAGTTCATCGACTCCAAGGATATCAGAAGAGCACCACGAGTCGGTGA +TGCCAAGCTTCTCAGCCAGGAGTACCCGATCACCGAGGTCACCGACGCCG +ATCTCTCCTCCGAGTGCGGAACCGAGAAGTCTCAGCCACCATTCTCACCA +TGCTTGTCCCGCAAGTCGGCCGACGACGTCTTCCTCTCCTGCTGTCGCCA +ACAGGTTCCATCCAACTGTCACAGCCTCTGCACCTACGAGCATCGTGAGC +ATGTCGCCGCTGAAACTCTGATCCAGGCGATCCAGCAAGAGCACTGCGAT +ATGAAGGTAAGAATAGTTCAATCTTCCTATTGAGTTTTTTTTTTGAGTTT +TTCCAAAATTCATTCTTTCAGTACTTCTCCAAACTGCTCTACTGTGCCAA +CCAAAACCGCGACAACCGCGCGTGCTGCTCCAACCTCGGCATGTCAAACG +CGGAACTCGGCGTCGGCGATCGTTGCCTACGTATGTGCAACATCTCCCCG +TCGGGAGACCGTGTAAGCTCTATGGAGAAGGAGGATCTCGTCTGCCTGTC +CAACTGGAACGTCATCATGTACTGTGCCAGAGGTGGTCTTCGTACCATCA +ACTAGACCATCATCCCCGTTCTCTTCTCCCGTAATAAGCTGGCTCAGGTT +GTTAATAATAATTCGCCGCTCGTCTTAATAATTTTCCATTTTCCAATCTC +GTCCCCCCTTTTCAACACTACCACATTTCTCAGTGATATTCATCTGAACC +GCCCGACATACATCAGTTCCTCATAGTACCTAACAGGGTTCAACAAACCC +ATCCACACACCGTCCCCCCACGTAACACCCCCCCCCCCCCCACTTAAGGA +CCAAGTCCTAGTTACTGAATGTGCAACATTATGGCCAGAATGCAATACTT +GTCCCATCTGAATCCATGTTATTTGATAATCTATGACAGTATTATACAAC +CCTTTTTTTTCAAAACACAAGCCCCCTTGAAAACGAACTCATTGTATGTC +AAACGGTATCCCTTATTACACGATTAATTGTGGAGTTTGAAGCGAATAAA +TCAATCAATTTCTCGTTGGGTCCCACAGCGAAATGACTATTACCGGTACA +GAGAGTGTGGATAGTTAGAGAGTGACAGACATCCGGGACCCAATGGGGCG +GGGCGCGCGGAAGAGACGATTTGTGTCGATTTACGAAATGATGACAACGA +GGAAAATTTCGTAAATCGACACAAATCGTCTCTTCCGCGCGCCCCGCCCC +ATCGGGTCCCGGATGTCTGTCGCTCTCTAACTATCTACACTCTCTGTACC +GGTAATACTATTGCAGTAATCGCAGGTTATTTCATAGCGATTTTTCATAG +CTTCACGGATTTCTGGATTCCCTCATAAATTGAAATGGAAGAGTTTTTGC +TGAACTAGGCCATTTTGGCTCTGTCATATCTGGGGTAGATTTACGGCGCG +TTGCGGTTCGATTTTAGTTGTAAAACTAAATGTATTTGTCCGTGTGGAGT +ACACGACTTTCTCACGCGTGGTCCGGCAGGCGATTATCAATAGAGCGCGA +AAAATTCAATGAGGAAGGCCAGAACTCCGTGCAGCTTTACTCGCGAATTC +AGGTGTGTGTTTTAATTGTCTTTATTCAGTTTTCTGATAATGAATATACT +TTTTTACAGAAAAACTGGAAAACTGATAAAAAAGTCCCCAGAAACTAGAT +ATTGAAATTACAGTACACTTTAAAGGCGCACACATTTTCACAGTAAACAA +AAAATTTGTCGTGCCGAGATATTTCATCCAGATTATCAGTTGTTAAAATT +TGACAACACTCATTTTGAATATTTAATGAATTCGAATGGAAAATAGAAGA +AAGCTGCCAAAAAGTGAAATTAAATTCTCAAAAACTTGATTTCCAGACTT +TTTTGGCGATTTTCGACCAATTTCAGCAGAATTTTTAGCAGAATTTTACA +AATTTTTTTATTTTAAAGCTTATTTTAATAATTTTTCTCTGAAATATATG +AATATCTTTGCTTTCGATGGATCTTCCTCAAAAGTAGACACACATTTTTA +TTTTTCTACAAAACTGTGCAAAGTTACGTCGTGGTGTTTGCGTACCTCGG +CGGGAATTTAAAGAGAAATGGCATTTTTCACTTTTCTCGCAGTTTTTCTG +ATTTTTTTTTCGTTTTTTCGTAGTTTCCTAATAAAAAATCGTTCCAATTG +TTTCCAGGTAATGGCTGGATTTGAAATTCAAAGAGTCGAGTCCAAATTCT +TCGACTTGGACGACATAATCGCCAAGTCAGACTCCACGTCTTGCACATTT +GAGATTGGAGATTTGAATCCAGGTCTGAAATCGCATCGAAAACCTAAAAA +TTCTAGTGATTTTTGTTCAGATTTCTTCCAGGAAATGCTGGGAGTTTCGA +AGCCAACTCAAAATGCAGACGGATACGGCGTTGATGCTCCATACTGGCTG +CTGGAATCCGTTAGAAGTTCATTTTCCGTGAGAATTTAGTTAGATTTGCC +CCGAATTTTAATTGAGAATTCCAGATTCAACTGCCAAAAGCTTATAGTGT +GAACATGCAAAATGTGCTGAACGCTGACAGCAAGAAATTGAATCTCTCCG +GCTTGCAGCAACACTTTTACGGCAACGGGATGCAGTTATGCCGTCTGATG +AAGGGCGAAAATCCGGACGGCGCGCTGAGCCTCGCACGATGCCTTGTCTC +CACACTTACACAGCGATTAGGAGGTATTAATTAATATAATAAAGCACTTA +TAAAATACAATTTCTGGTTTTTTTTTGTTGCAGAAATTGTGTCAACTGCA +ACCCATCTGCAAAGCAAAGGCGAGAAATTTGACAGCCTCGAGACAAAAGT +GTTCCTGGAGGGTAAACGGTGCAAAGAAGACATTGATACTTGGCTGAGAC +AAGACAATAAGTGCTCGAGCAAGAAGCGTAAGAGACTTTCGTTATAAACT +TCAAATTTCTTATGCTTGAATCTAATTTTATTTTTTTTAATTCGTCTGTA +AATATAATTCAAACCCACCATTCTGTAATAATATTTTATTTTCTATTTTT +TCACACTCATTTTTGTGTTTTATTGGTGTAATAAACCTGGTTTCATGACT +TTTTTGCAAGGAATTTTGTTGAAAAGGCTGCTGCAAAAACTTTGCCAGCA +AAAAATTTTGTGCAAGCACGCTCCATCGCACAGTTTGAACGTCGCGGGTC +GAAGTTAGATGTGCAGGTCATATAGCAAAAAGAGAAACACCCCTTTTTTT +CTCGAGAAGTTTCACCAATTTTGCGCGATTTTTCAACTATTTTTTTGCTT +TTTTGGCTTAATTTTGGCTCAGATTTTCCTCAAAAACATGAAAATCCAAT +CTAGAATAAGTAGTAATGGGTATATTCTAAGATTGTGCAAAAGTTAGCTT +GAATTTCCTCGATTAAAGCTTTCCTACCAAGAAAAATGTGTGGATATTTT +GAATTTACAAGTTTTTCATCTTTTTTTTGTAATATTCTCTTTGAAACTCC +TGTTTCTCTCAAATTTGTAAACTTTCATAAACGTTTTTTTCAGGGTTACC +ACATTAAACAATGACCGGAAGCACCGAAACTCGCCAGAAGGAAGTCAAGG +AGGTTGGTTGTTCAAAGTGACGTCTAAAATATTTAAATTTCTATATTTCA +GCCACAAGTTGACGTTTCGGATGATTCCGACAACGAGGCCGTCGAGCAAG +AGCTCACCGAGGAGCAAAGACGTGTGGCCGAGGCTGCTGGACTTGGAGAT +CACATCGACAAGCAGGCCAAGCAAAGCCGCTCCGAGAAGAAGGCCCGCAA +GCTCTTCTCCAAGCTCGGACTCAAGCAAGTGACTGGTGTCTCCCGTGTCT +GCATTCGCAAGTCGAAGAACATCCTCTTCGTCATAAACAAGCCAGACGTG +TTCAAGAGCCCAGGATCTGACACCTACATCATCTTCGGAGAAGCCAAGAT +CGAGGATCTCACCCAACACGCCCAGATGTCTGCTATTGAGAACTTGAAGC +CAACTCGTGAGGCCCCACAACTCAAGACTGTCGAAGAGGACGAGAATGAG +GATGTTGAGGTAATTCAGTAACTTAATCGGATTTATTACATTAATTGTAC +GGTTTAAGGAGGATTCCACCGGCATTGAGGAGAAGGACATCGAGCTTGTC +ATTTCCCAAGCCAACACCACCCGCAACAAGGCCATCAGGGCGCTTAAGGA +AGCTGACAATGACATCGTCAATGCCATCATGAGCCTTACCATGTAGCTTG +TTTCCTGATGACCTTGCAGATACTCTTGTTATCGTTGTATCTCTTGCTTA +TCCCGTTTTCCGTTCCAAGTAAACGTTTATCAGTCTTTTTTAACTTTTTT +GTTATGTTTAAAAAACAATTGCATCTTCGAATTGACCTACCTTTTACAGA +AAAGAACAATTAAATCACTGTTTGTGTAAAACACCCCTCTATTGATCGAG +AACTTTCACTTCTTCCAATTCTTTACTCGTATGACTGTGATTCATTTCAC +CTGCTATCACTTTATCATGACATTCCAGTTTATATCAACAGCAAATACCA +ATTGTGTGTATGTGAAAAATGCTATTGAATAGCAAGCGCGCCCCAACAAA +CACGTTTGAATGCGCGGGCGAAACGGCGTTAGATGTGCAGGTCATATGAC +AAAACGCATCGACCAGCAGCCAATGGCAAGTTACTGCGAGCTGTTATAGA +TTTTTTCACATGGTTTCCAGTGTTATCAGTTGGAAATCGATCGATAATAC +TGACTAGTAGTACTTAACATCTTCATTCCCACCGTTTCGCTGTCATTTGT +GATACGTGGATTTATTTTTCCTTTCTCGCCGACTTTTCATTTCTAAATCT +TTTCTGGAATTGCGAAATTTTCAGCCAGTTTCATGATCGAATGGCTCGTA +ATGAACCATCATCTCAGCAGCCGAGTAGTTCCGGGAGTAATGGAACTCCA +GCACAACAAAACGGGTCGGCAAAACCATCAAAAGTCACAGTAAAAGGTAA +CAATTCGTAGATTTTTTGCATCTGTTCTAATAATATTTCCCCCTCTTAAT +CATAGTCGTCAACGCGTCGTTCACCAAGGCAGCCGACTGCTATGTCGAAA +TCACGAGTGACACGTCATCGGCGGCGCCAAAAAAGACGACTGTAAAGAAG +AAGACGATGGCGCCCGAGTGGAATGAACATCTCAACGTTCATGCAAATGA +ATCTTCGACAATTTCGTTTCGTCTATTGCAAAAAGCCAAGCTATTCGATG +ACACGTGTCTAGGGATGGCGAAGCTGAAGCTTTCGAGTCTCACAAGAAAT +GAGAATGGAGAGTGTAAGTGGTTTTATTGAAATTTTCTCAAAAAATCTAT +TATTTCAGTCAAAAACGACATCAACAATATATCTTTGCTGGCCAAAGACT +CCTCGAAAATCGGAACTCTCAACATAATTTTCTCCGGATATCCAGAGCGG +AAACGAAGAAGCGCAGGAGTACGAGCTGAAACCGCCGCGTCTGCAAGTTC +AGAGGCATCCACGTCAAATGGCGTTGCCACGTCATCCTCGGCGCGGAGAC +CGGCGACAGCGAAGCGTGACACTTTGGCGGCTCCAACGGTAATCGTGACG +AGACCCATTTTGAATTATTTAATTATTCATGAAACGCTCTACTGGAGTGT +TCATAATTATTAGAGCGCGCTCGCGCGGCACCTCACGAACTCCAGAATTC +GCGAGGACAGCAGTTTTTGGCTATTTTGGCTTCTATGGAAATTATTTTTA +ATTTTTTGATTTAAAATATTGTTGAGAAGCATCATCCAAATTGAAAATGT +GTAAAATTATGAAATTTGACCGAAAACTGAAGGTTTTACAGAGAGGAGTC +ACCACGGCTTTCTTTAAAAAGTATCAGTACCGTCAGTATTTCGATTTCAG +AGTCGTAAAATATGTCATTTTGTAGAGAAACGTCTGTTTCACAAAAATCC +ATGTTTGATATTTTAATTGTTTAATTTCTAATATAAAAATATTCAAAAAA +TCAACAAGTTTACGTTAAAATCGAATTTTTCAATCGTTCAGTTTTTAATA +AGAACAATTTCATAATTTCAAACATGGATTTTTTTGAAACAGACTTTTCT +CTACAAAATGACATATTTTACGACTCTGAAATCGAAAAAACTGACGGTAC +TGATACATTTTAAAGAAAGTCGTGGTGACTCCTCTCTGTAAACCTTCAGT +TTTCGTTAAAAATAATTTCCATAGAAGCCAAAAATAGCCAAAAACTGCTG +TCCCCGCGAAACGAAATCCCGCGAATTCTGGAGTTCGTGAGGTGCCGCGC +GAGCGCGCTCTAATAATTATGAACACTAGAGGAGAGCGTGAATGAGCGTT +TTTTATTCTTTTCTCTGCATTTTTAATGCTACAGAATCATACACAAAATG +TTGTCAAATACATGCATTACTCTACTACTCTTAATGTTTACTTTTCATTT +GCAGAGTACCGCAGCGGCGGCAGCAGCAGCAACAGCAGGCGGCACCCCGG +CCGCCGGAGCAGAAGAACAGCTTCCCGATGGATGGGAGATGCGTTTCGAT +CAATACGGACGCAAGTACTACGTGGATCACACCACCAAGAGCACCACGTG +GGAACGCCCGTCTACTCAGCCATTGCCACAGGGATGGGAAATGCGAAGAG +ATCCGAGAGGAAGAGTGAGTTTTCCGGCGGAAAAAGCAGATTTAGCACGA +GAAAAGCGTTTCTGATTATATTTTTGATGCGAAAATCATCAAAATTAAAA +ATATCTACCGTAGCCGCGGATACCTGGACCAAAATGCCGAAAAAAGTGAC +CTGATAAATTATTTTAAGGCTAAAATCTTGATTTTTACATAATTAGTGTC +TTATTTTTGTTGATTTCTTGTCAAATTTTTGTAATTGCATCGAAATAGGC +CAAAAATAAAGTAAAAATCAACTTCCAACACAGAAAAACTGACAAAATTG +AAGATTTTTGCTGAATTCAGGCAATTTTTCGTAATTTTGATCCTCCATAA +TTTTTTTTTTCGAGAAAATTTTGAAATTTTTTCTTTAAATACGGCCTGAA +AATTAGGAAATTATAACGACGGATAAAAAATTCTGATTTTCTTTATTTTA +ACCAAGGTCTCCAATTATTTGATAATCGAAAAAAAAAAAACTTTCCAGTG +ATTTTTACTTGAAAAATGTCTAAAAATTCACGCCAGGTGCACAATCCATT +TTCAGGTGTATTACGTGGATCACAACACGCGCACGACCACCTGGCAACGA +CCGACAGCCGATATGCTTGAAGCACACGAACAATGGCAATCGGGAAGAGA +TCAGGCGATGCTTCAATGGGAACAACGTTTCCTTCTCCAGCAGAACAACT +TTAGCGCCGACGATCCACTCGGACCATTGCCCGAAGGATGGGAGAAGCGT +CAGGATCCGAATACGTCGAGAATGTACTTTGTGAATCATGTAAATAGAAC +GACACAATGGGAAGATCCGAGAACACAGGGGTATGTGAAAATTTTCAATT +TTCCAATAATATTTCTATCAAAAATCACAGTTTTAATGTTTTAAAAGTTT +CAGAGGCTCCGACCAACCTCTTCCGGATGGTTGGGAAATGCGATTCACCG +AGCAAGGCGTTCCATTCTTCATCGATCACCAGTCTAAAACCACCACCTAT +AATGATCCAAGAACCGGAAAACCCGTCGGCCCGCTCGGCGTCGTCGGTGT +TCAAATGGCCATGGAGAAGAGTTTCCGGTGGAAAATTGCACAATTCAGAT +ATTTATGCTTGGTAAGGGAGGGCGAGAAATTCTTTGATTTTAGCTATAAA +TTGATGTAAATTTCAGTCAAACAGTGTGCCTAATCATGTCAAAATCACAG +TATCCCGTAATAACGTGTTCGAAGACTCATTCCAAGAAATTATGCGTAAA +AATGCAGTCGATCTACGCCGGCGGCTGTACATTCAATTCCGAGGCGAAGA +GGGTCTCGACTATGGAGGTGTCGCCAGAGAATGGTTCTTCCTGCTGTCGC +ACGAAGTGTTGAATCCAATGTATTGCCTATTCATGTATGCTGGTAATAAC +AATTATAGTCTTCAAATCAATCCAGCTTCATTTGTTAACCCGGATCATCT +TAAGTATTTCGAGTATATTGGACGATTCATTGCCATGGTGAGTGTTTTTT +TTTAATTGAAAATCAGTTGAAAATTGGATGAAAAATTGCGAAATCGAGAG +AAAATCGAGTTTTCCGCTTTTTACAATTGAATTTTAAAATTTTAAACGTT +AAATTTTGCAGAAAATTCCAAAAAAAAGCACTTGGATCTAAAATTTTTGT +TTTTTGATGAAGGAAATCCTTAAATATCGATTTTTTTTATTTTCAGCTCA +AAAAACCTGGAAATTTCGATGATCTCAAATTTTCAGATCAATATATTTCA +TTTTCTCCTAAAAATGTTATTTTTATTTAAACAAATAGTTTTTCTTAGAA +AATTCCTTTATTTTCAAATTTCCAGGCGCTATTCCACGGGAAATTCATCT +ACAGCGGTTTCACGATGCCATTCTACAAAAAGATGCTCAACAAGAAGATT +GTTTTAAAGGACATTGAACAAGTCGATTCGGAAATTTATAATTCATTGAT +GTGGATCAAGGATAACAATATCGATGAATGCGATATGGAGCTCTATTTTG +TTGCCGATTACGAGCTGCTCGGCGAGCTCAAGACTTATGAGCTTAAGGAG +GGCGGTACAGAGATTGCTGTTACCGAGGAGAATAAGGTGAGATTTTGGAT +TAGCTAGAGCTTATAAAAATAATTTTTAAATAAAATAATTTAATTTAAAA +AATCCCAATTTTCCAGCTTGAATACATCGAACTGCTCGTTGAGTGGCGCT +TCAATCGCGGTGTCGAACAACAGACAAAAGCCTTCTTCACCGGCTTCAAC +TCGGTCTTCCCGTTGGAATGGATGCAGTATTTCGATGAAAGAGAGCTCGA +GCTGTTGCTCTGCGGAATGCAGGACGTTGATGTGGACGATTGGCAGAGAA +ATACTGTCTACAGACATTATGCTCCACAGAGCAAGCAGGTAGGGGAGAAC +ATTTGATTAAAACTTCTTTTTTTTTAATTAAAAAATACATTTATAGGTAA +CCTGGTTCTGGCAATGGGTTCGAAGTCTGGACCAAGAAAAACGTGCCCGG +CTCCTACAATTCGTCACAGGAACGTGCCGTGTGCCAGTCGGTGGATTTTC +CGAGCTGATGGGCTCGACGGGACCACAACTATTCTGTATCGAGCGTGTCG +GCAAGGAGAACTGGCTCCCACGGTCGCATACGTGCTTCAATCGACTCGAC +TTGCCGCCATACAGAAGCTACGATCAGCTCGTCGAGAAGTTGAGCATGGC +GATCGAGATGACGGAAGGATTTGGAAACGAGTAGTCTCCCGTTGTTTTTT +TTTGGTTTTGCTGGCCGCCTAATTATTGTATAACCCTCATATTTCTTCTC +ACCCACACCCCATTGATTTTCCTTTTTTTTCTCTCCATCAATTAATGTTT +CTTTTTTTCGAATTTGTGTGATAATTTTCGTGTTGTGATACCACTTGCTT +AGGTTCTCTCTCTCTCTTTCCTTTCCTTCTGGGTATGTACCTATATTTAG +AATATAGACAAAGCATCTAGTTTTTTTGGATTTAATTTCCAAATTTCCCG +TTTAATTCCAACAACAAAAAAACAATTCCACCACTTTTCAAATATTTTTT +TTGTGTCCATTGCTTCATTGCTTTTCTTCTTCTCAACCATTTCAACCACC +ACCACCCAATCTGCGCCTTATTCTTACTTGGTCCTTGTCATTCTTAGTTC +CAGTTTTTTTCACGGAAGCAGCTCTCGATTATTAATTCAAATGCATCCCC +CCAACCAATCCGTCACCCCCCGATTAGAACAATTTCTACTTTTTTTTTTG +AAAACAAAAAAAAACACAATTCCTCGATTTGGTTGTAATGAATTTAATTT +TTTAAATTTTGAGCCTTTTTTGTTGTTTTAGGTTTTCTTAACTTTCAGAA +TTTCCATGCGGAACACACGGAGAGCAGAAGATCAATATTTTGAACCCTGC +AGGAGAAGGACGGGACCCTTTGGTCACTCATGTCTCGGCTGTCTGCGTCT +CTCCTCTCTCTCTGCACCAGCGGAGAGGATGGATAGCCTCATTCGTTTTA +TTTCGGACTTTCGATTTTTCTTATTGCTCCACGTTTTAGCTTTTATCAGA +TTCGGGGAGCTCTTGCTTTTTTCTTGATTTTTTCGTAAAGTGTTCTTGGG +TCCCGCAGCGAAAATTAACTTTAAAATGATTTTTTAAAAACTTTCTAAAA +ATTCATTTTTTTTTCCATTTTACGACTAGAAATTTCGAAATTTGAAAAAA +AAAACCGTGTTGCAAATGCGCTCCAGCGTACAAAACAAAACCGCCACCAA +AAAATTTAAATTACTGAAAAAAACGATGAAATTTTGAATTTGTTTTGCCA +TTTTCCGGCAATGATTTTTTAAATAAAAAAAACTGTACAACAAATGCGCT +CCAGCGAACTAACCTTCAAATTACCGAAAAAAATTATAAAAATTTGAATA +TTTCCGATTAACCCAAGAAAAGAAAAATTCAATGATTTTCTTGAAAAAAG +TTATTTTTTTCTCAATTTTTCGTCAATAATACTCAAAATTTGAAGAAAAA +TTGTGTAGCAAGCGCGCTCTAGCGAACCAAACAAAATGTCTCCATTGAAG +CGTTTCACGAGGTTCTGGCCTACCTCATTGAATTTTTCGCGCTCCATTGA +CAATCGCCCGCCGATAACGCGTGGGAAAGTCGTGTACTCCACACGGACAA +ATACATTTAGTTTTACTACAAATATCGAGCCGCGACGCGACACGCAACGC +GCCGTAAATCTACCCCAGATATGGCCGAGCCAAAATGGCCTAGTTCGGCA +AAAACTCTTCCATTTCAATTCATGAGGGAAGCCAGAAATCCGTGATCCAA +GTAAAAAAAATCCCAAAAATTTCAATTTGCAAGGGCGCTCCATTGAAAAC +TGTGGCGCCGATCGCAACTCACGGTCGAATTTGAATTTTTAGTTGAAAAA +AAAACACCAATTTGAACTGAAAATTTGAATTTTCAATTTTTTCCCTAAAA +TAAATTGTTCAGAGTTTCGCTGTCGAATTGTTGATACCCGCTCGTCAGAA +TTGGAAATTGAGAAATTGAGAGAGAATTAGAGAAGGAGAGGAAGAGGAAC +TGCTCACTGGTCCCCTCACTCCATCCGAAATTAGGCACTTTTTGCTCCGG +CGGCACTAGAAGACGAAGAAAAAACGACGAGCAGAACACAATCAATTTCG +TCTTCTTTTTTATTTTTTTTTGGTTTTTTCTTGTTGGTACCACGGATTTC +TGGCTTCCCTCATAAATTGAAATGGAAGAGTTTTTGCCGAAATAGGCCAT +TTTGGCTCGGCCATATCTGGGGTAGACTAACGGCGCGTTGCGTGTCGCGT +CGCGGCTCGATTTTAGTTGTAAAACTAAATGTGATTTGTCCGTGTGGAGT +ACACGACTTTCCCACGCGTTGTCCGGCGGCCGATTGTCAAAGGAGCGCGA +AAAATTAAATGAGGAAGGCCAGAACCCCGTGGTAAAACATGGTGCATCAA +CGCAAAAATTTTTCATTTTTTGGAAAAACTCAATTGTTTTTTTTTCGCGT +CGGGACCACCAAACCAGAGCTCGTTTACTAAATTGAATTTCCAAAAAAAA +AAAAATTTCGCTCTGTTCAAAATTCGAAATTCAGTTTTCTTTCCCTAAAA +CCTAATAAAATGATCTTGGCACAATGCTCACTGTGAGCTTTGGGTCTAGA +ACGATGGCCTAGAAATGTTGAGATTTCCTCTTCCAGCGCCCTTGCCAATA +TGACCTAGAAATTTCAAATTAGTATAACTCGGCCATTTTCCATCCGTTTT +CGCCAAAACTACACCATTATAACATTTAAAACTTACTTTTCAATATGATG +ATAATACTTTTCCAGTGGAAGAGTTTTCCGCATTTCTAAGCCACCAAAAC +CTCTCCTCCCGACGTTTTATCAAACAAATGATATCACTGTGATTTGTGTT +GTAAGTTGCGCGCATGATTGCACTTGCGCAATTAATTAAGACAATTACTC +AAAATTTTGATCGAATTAACCCCATGTACATATAAAATACAAGATTTTTG +TGTATTGTTCTTCAAAGTTTTTTGTCGTATTTATATAGAAGCAGTGTACA +CCTAAATTTGGCTTATTACGACCTTTTTTTCCAAGGAAAATACTCATTTT +TAGGGTTATAAACCCCTTGATTGATACGTGGCCTAGAAAACTGCAAGCGC +TTTTTCCTGGAAGTTTTAGGCCACCAGTTAGAGTCTTGCAAAGACGGACT +AGTTCTCACATGACCCAGAAAGTCTTAGTCCATCACGTATTTCTTACGAC +GCCTAGAAATGCAAATCTTCGGCCACCAATTTTCATAGACATTCTAGGCC +ACCCTTTTTGACGTGGCCTAGAATCTTTAACAAAATTTGTAATGAAAATC +CTAGGCCACCATTCAGTTAGCTCTCATTTTTGATACTTCGCCTAGATTTT +TAACACGTGGCCTAGATTTCTATTTTAACACGGGGCTTAGATTTTTATTT +TAACAAGTGGCCTAGGTTTTTGCTACTAGGCCTAGATTTTTTACATGTGA +CCTAGATTTTTATTATAGCACCTGACCTAGATTGTTGGCATTTGGCCTAG +ATTTCTGTCATGTGACCTAGATTTTTTAATGCACCAAAAATTCTCCATAC +AAATTTAACTCCTACACTCTAATTTGTAATCAGAACCTTCGATAAGAAGA +ACATGGAGCTACTGCGGTGCTCAAATTCCGAACAAAAAACGGCTGAAAAA +CTGAGGTATACGTGGTTTTTTTTCGAAACTGATAAAAGTCAGCATAAATC +ACGATAAACTGAACGTCCTTTGGCCAAGTTTAGAGAAACCTAGGCCACGG +GTAGTGGTGGCCGAATTTTAAAAAAACTGGGCCACGAGGTTTACGTGGTG +TCAAATTGCCCCATTTTGGTTTGATCTTCGAAAAATGCGGGAGACGAGAC +GCAGACTTTTCAACTGATTTCGCATGGTTAAGAACGTGCTGATGTCATTT +TTTTCCGTGGAAAAATTCCCGCATTTTTTGTATATCAAACCACAACGAGA +CACTCTAACACCACCGTCCGGCCGTTCAAACGAAAATTTAGGCCACGGCC +ACTAGGGTGAAGTGGAATGCCCTACCCTTCTCTACAAAATTCATTGGAGA +AACCTAGGCCATGCTCAAAGAAACAGTGGCCGAGAAAGGAAAAACTAGGC +CGCGTTCGCTTTGTACCTCTCTTATAAAACACTGTCTGTTAGTTCATAAA +TCAGAGTCTGCCCCCTATCACTACTCTCTCATTTACCTGCCCCTCTTAAC +AAACAATCAACCCCCCCCCCCCCTCTAAATCGACTACTTGTTCCCCGTTG +TACCTGTCTGCGTCTAGCTATTTCTGTTAAATTTTTGGAACGAAAAAGTG +GTATGTGAATGTACGAAACAAAAATAATAATCAATTGAATACAATTTTTG +CCCCCCCCCCCCCCCCCCCTCCACCCCGCTTGCCTGTCTCTAACCGGTTT +CCCCTTTTTCACCTCAATCAGCCCCCCCCCCCCCCCCCCCCCCCCAATTT +CATTTGACTGTACATTTTCCTTTACCTATCTGCGTATCTACACAATTTTT +GTATTTTTTCCTTTTATTTTTCTAATTATAAACCCTGCAATTTGCAAGAA +AAAATCTCTAAAACGCTTGGCCCCTTACCCGGAGTGCGACCGTGGCCTAG +AAATTTATATTTCTTATAAGACCCCCCGGCTCTTCCAATTTTTCATTTTT +TTTCTTTGAAAGCCTAAATTTTCATTTTTTCTTAGAGATCAATATATTAT +GGGTTAGTAAAAAATTTCAAAAACAAGCTCCCCGTGGGCGTGGAAGAGTT +TTCCAAATTTCTAGGCCATACTTTTCGAAGGCGTTGGGGTACGCTCGCAC +CCTAGTACTAAAAATTGTAAATTATAAAATTATCATAAAATTGGCTCAAA +GCATTTGAAAATAATAAAAAAAATTTTAGAAAAAAAAACACGAAAAAAAC +CCCTATTTTGAACATGGTTCATCGACCGGGACCTATATACTTTGAAAGCT +TATATCTCGCTTCATGTTTGTTTTATCAAAAATTTTTCAACCAACAAAGT +GTTTATTATTTCTTTTTCTATATTTTTGTAATTAAAAACTTTTTGATAGC +TTCAAAAGTAACTGAGATACATGTTGTCAAACAGAGCAAACTTATAGCTT +AGCCCAATTATTTATTTTTTGCTGTGTAACATTTCTTGTATAAACTTTTA +AAGTTTACGGTTTTTTTAAATTTCTAAAAAAAATTATTGACACTTTAAAG +GGAGGTCATGTGGAGATAGCCTGTACAAATAAAACTGATGAACTAGGACG +AACCGAAATTCATAAACCATTATTTACACAGGTGATTCTTGCCTTTTTTG +GAGGGTTAAAAAGGTTAAAAATTATTTAAAGCTTGGAATGAGCATTTGTG +CGGCCGAGTTTTTATTTTCTAGGCCATAATTTTTAAAACAATGGCCTAGA +AGCACAAATTTCTAAGTAATTTGAATTTCCCGCCAAAAATTTTTTTTTCC +ATAAAATTTAAATTTCCCGTCGAAAATTTTTTTCGGAAAATTTGTATTTC +CCGCCAAAAGATTTTTTGTCTCAAAAAATTTGAATTTCCCGCCAAAATGT +TTTCTCAGAAAATTTAAATTTCCCGCCAAAATATTTTTTGTCTCAAAAAA +TTTGAATTTCCCACCAAAAATTTTATCCCAAATTTTATCGAACATTTGGT +GGCCGAATTTCCATTTTCTAGGCCACCCTCAGTTAAATCGTCAAATAAGT +TGACCTAATTAAATTAAAAATTAACAATTTGTAATGTCTCCGACCAAAGT +TTCATAATTGTATAGATCATCATGGTGGGAGGCGGGGTAGCACTTATGAG +TGAACTTACCGGTTGACCGCCCGTCTATACATAGTAGCCAACACCCGACC +ATTTTCTGCCAAATACCTATAATATCGCTTCGAGACCCAACTATTGCACA +GTTGTGGTTTATTTCGGTTGAAGACCCAATATTAATTTACCTACAGCTGC +TAAAGCGTTTTAAGACCTAAAAAATTAAGCAAGGCCCAAAAATCCAAACT +TAATACTATAGTACTTCTCGTTTCGAGACCCAAAATCCAAAGCTAGGCCA +TATTCTTGGTGAGACCCACCTGTAATAAATATCACTTGAGTGAAAGTGAA +CTCTTACTTTTTTCTTTCGACAGCCAAAAAAAATTGAGTCAATTAGCAAA +TAATTCACGTCTCCCCCTTTCAATGTTTACATACACATTACGAACGAGAA +AAAAAAACGAGAGAAAAATTGAGTGAAATGGAATCAAAATGTGTTCATTT +TTACTTTTTATGAAAATTTGGATAAGCACATGGCGTCAGAGTGTCCCATT +TCGGCTTGATCTACGTAGATCTACAAAAAATGCGGGTGAAGAGACGCAGA +GTTTTCAACTGATTTCGCTTTTTTTTGGGCGAAAATTTCCGCATTTCTTG +TAGATCAAACCGCAACAGTGGATAAGCGCGCTCTATCGACAAACCCGGTG +TGTACTACTCGCGGACAACCAGTATAGTTTTTTTTTTGAATTTTTGTTCG +AAATTGAAATTGAAATAAGATGCCAAATTTAAATTTAAAAAATAACAATT +TTTGAAATTAAATTTTTAAAAAAATTCCCCGTAAATCTACACTTAGTCGC +GTAAATCGACAAGGCTATTTTTTTCAATTTTTGCCCATCGCACTTTTTTT +GCCCAATCAATCATCGTCAATTGATCCATTGATCTTCTTCTTCTTCTCTT +TCTCTTTTCTCACCACCGAAAATTTCATTTTATTTTTTGACACTGTCTGC +GTCTCTCTGTACCCCCTATCTCTCGCCTCTCAAGGACTATTACAACGAAA +AATTTGGGGAGAAGCCGTGATTTTCATTTTAATTTTGATGAAAAATTTTA +ATGAAAATGAAAAAAAAAATTGTTTTTCAAAAAACAATCCCCTCGTAAAT +CCACACCAAATTCAATATTCACCTTATTACAAATTGATTATTTTTCCCAA +AAAACACACGACATCGGGCGCCCATCTATTGTTTTTTATTCCAAAAAATT +TCTGCAACTGTCTGGGTCTCCGGTTAACACTGAGAAACAACAAATTTCAT +TTTTCGATTCATTACATTCATTTTGAAGGTTTTTGGTTACAAATTCGACA +CGGATTTCTGGCTTCCCTCATAAATTGAAATGGAAGAGTTTGCTGAACTA +GGCCAACTTGGCTCGGCCATATTTGGCATAGATTTACGGCGCGTTGCGTG +CCGCGTCGCGCCTCGATTTTAGTTGTAAAACTAAATGTGAAGTGTCCGTG +TGGAGTACACGACTTTCCCACGCGTTGCCCGGTGAGAGATTATCAATGGA +GCGCGAAAAATTCAATGAGGTAGGTCAGAACCCCTTGAAAATTGAGAAAA +AAAAATCTCACAACGATACTCCGCAAAGTGTGCGTGTTGTTTATTAAATG +ATAAATTCAAAAGCAAATAGCGATTTTGTTTCAAAAATATGCCGAAAAAA +AAGAAAAAAAATTAAAACAAAAGTTCAGCCACCAATTTTATTTCTCGGCC +ATGGTTGTGTCGTTGTTTATTTTTTTGCGATCGTATTTTTTCTATAATAA +GACTTAAAATGAAAAAACATTAATAAAAATCAGTTTTCCATGGCCACTGT +CAAAAGTCGCAACGACACTCCCCTTCCAGGCTCCTCCCCCCCGCGTGTAT +TTTAGCATGTAGTGTAAATTACGCGATTTTGTGTATAAAACCCTAATTTT +TTAGAAGATTTGAGAAAAAAAACTTTTAAAAAATGGGAAAAAATCCCCTT +TCCCTTTTTTGAAATACTTTCCGACAAAGTTTAGCCAAAAGTCCAAAAAA +AACCAATATGGCCTAGAAAAAATCAAAAACTCGGCCATCCAAAAAACTAG +GCTACACGTTCTTTCGACCTCTGCTCCCCACACCTATGCTGCCCATATCA +AATGTCTCCCGGACCCCTCATTCCGTCACACATTTCTCAAAAAAATGTCT +CTTTTTCTCAAATTCTCTGTCGAAAATCCCATATCTCTTCAGTGTTCAGA +ACTGACTGTTTTATTTCAATTCTCATGATCTCATGTCTCCCGTATGACTC +ATTCCTTCGATATATTTTTCTTTTTTCAAAAATCATTTTTCCTATATATA +GACTTTGTTCGGTGGGAATTTCTCATTTCAAAATATTTATTGAGGTCGCT +TGTTCCCTTTTTATATGGTCGTGGCCTAGGTTTCTAAAACTCGGCTACCA +TTTCCAAGTTGTAGCTTCGAGCTCTTTGTCTCGGAGTAGAAAAAATTCAC +GAAATAAATAAAAGTTCACGTGGCCGTGGCCTAGATTTCCAAAACTCGGC +CACCAATTTTTTTTCTCTTATCAGTAAACCACGTAGAACTCCTTTTTTTC +TAGCCATTTTTTAACTGAATTTCCAACATATTCGGTGGCTCCATCAATTT +TACTGATAGTAATTTCCAATAAAAAAAGGTATTTCTGTCGATAGGAACCC +ATTTTCATCTACTTTTTGTTTCTACTTTATGGGTTTTGCCACTTTCCCGC +ATGCCTACGTGCCTACCTATCTGCCGACCTAATATACCTCGTAGGCAGAC +CTCGGCCGCCTTGTGGTGAGATGCCAGTCGCTTTGTAGGCAGGCGTAGTT +TGTCTTGCTGGTATGAGTAGGCCTGTCAGCCGCCCAGTAGGCAAGCGTAG +GCATGATGCAGGCGTAGCAGGCTTCTTAACTAGGCATCGGCTGCCTTGTA +AGCCGGCGCAGGTAAGTGAAGGTTTCTTCGTGGCTGGCATCCTTGCAGTC +AGGCACAAGTCGTCGGCTTGAAGGCAGGCAGATGCCGCCTAGTAGGCAGG +CATATTTTACTTATAGGCAGGTATTAATGATCCTCTAGGCTTGCGTTGGT +CGGTTTGTAAGCTGTCGTAGGCTGCCTTGTAGGCAAATTGAACTCTCAAA +CCAAACAACACAAAAATTCCAATATTTTTTGTACCTTCTGATTTTGATCT +CTGACTCATGATCTCTTTATCAATCTCATATCTTCCATTTTTAGAAATAA +ACAATTTTTCCCTTCTATTCATTGACTTTAATCCCTTTCAATATTCATTC +AATACACCTATAATACGAAAAACTCTTATCTTTTTTCATTTTATCACACA +TGGTGCATAATATTATAGTTAGTGGCTATTGAAAAAGCACATCATTTTAT +CACTTTTTGTCACTTTTCCACGATTCTCAACGTCTTTTTTTTAAACACAA +AACGTTTCCCGTCATTTTCTACAGTACTCCCGCATTTTCATTTTCCAGGT +GAGACCTGAGATGGAGGGCGACGAGTTGGTTCATCGTCCGACGGTCCTCT +CGACGACGGCTTCTGCCACGTCACGGCCGCGACACAAGACTTTTGAGCTG +TTGGGTCCGGTGAATAATATCTCGTTGGCTGATATGTATAAAGTGTGCGG +GAAGATAATTGGCAGGTTCCCGATTGCATTTATTATTGCGACGGTGATCA +TGTGCAGTGGTGGATGTTAGTTTTTTTTGACGTAAAAAATACAGTTTGAG +AAAATCTTTTTTTAGATCAAAAAAGAAGAAAAATATGGAAAAAACAACAT +TAAAATATTTTTTAGAAATCAAAAAATGTAAAAACTGTATGATTTTACTT +AGGGCCTCCATGGTAGACAGGCACGGTTTTAGGGCCTGACGCCTGCCTGA +AACCCACGTGGAGTCAGAAAGTCCCATTTCGCTTTGATCTTCAAAAAATG +CGGGAGAAGAGACGCAGATATCTCATCTATTTCGCATGGTTAAGAGCGTG +CTGACGTCACAATTTTTCTGGAAAAATAATCCCGCATTTTTCGTAGATCA +AACCGCAATGAGACAGCCTGACACTATTAAGCCGCAAAGGCAAAAGTGTT +TGCCGTCTGAATTTTGAAACAAAAGGTTTAACCGGTGAAAATTGAAAAAA +AGTATAATAATAATAATAATAAAAGTAGAACTATTTTCAGTATACTAAGG +CAAAATGATTGTTTTATAAAAAAACTTAATTTTAAAATTAGATCTCCTAA +ATGTTTCAAGCAGCACAACTTTCATGTGATGTCAGGCTGTCTCATTACAG +TTTGATCTACAGAAAACGCGGGAATTTTTCTCCCAGGAAAATGTGACGTC +AGCACACAATGCATAATCAGATGAGATTTCAGCGCCTCAACTCCCGCATT +TTTTGTAGATCAAACTAAAACAAGACTCTCTGACACCACGTGAATTTTCC +AACCATCAAACTATCACAAAAAAATTGGCGCCAAAAAAACTTCTTTAAAA +ACTTCTCATTTCCAGTAGGAACAATCGGTCTCGAACTGAAGGATAATGTG +CGCGACGGGTACACGCCGAAAAACTCGCTATCCCGACTGGAAAATCGCCT +TTATCGAGAGTTCCTGAGTTCCGAAGGTGATCCGGTGATGACTACAGTTC +TCATGACTGCCAAAGACGGAGGATCCATGCATCGGATCAAGTATTTGGAA +GAAGCACAGAGGGTGAGTGGCATAACATATCGGATACCGTATTTCCTCTA +TTAGTAAGGCATGCAAAACTGATTTTCGGACACCTAATTTGATGCAAAAC +TAATAGAGGGTGCAAAACTAATTTTCGAACAGGTTTTTTCTCATTAAGTT +ATATTATAATATCATCAATTTCAATAACAACTTCTGAACCAAAATGGGCG +AATTTTACGACTGATACGCAAAAATTGTCCGAGTTGTACTCATATTTTGC +CAGTTTTGACTTGTTATACCAAGTCTGTAAGAGTTTTCCTAATTTTCAGA +ACGATTTTATAATGCAAATTTTGAATTCCTAAACATAGGGAACAAATTAA +GGGGTGCAAAACTATTAGAGGTGCAAAACTAATAGAGAGTGGAAAACTAA +TTTTCGATTAGTGATTTTTGATGCAAAACTAATGGAGGTGCAAAACTAAT +AGAGGTGCCTAACTAATAGAGGAAATACGGTATATTTTTTTGAATAAAAA +ATTTCAGCAATGGCTCTACATCTCCAAAAACCTATCCGCAGACGTTGGAA +ACGGCGAGTTCATGAAATTCGGAGATTTCTGTGGCCACTACTGCCAAGCC +AACGACATCATAGGATACTTCCTGGACGCCTACAAAACCAAGTCAGCTGA +CCCAAAAATGGATGGCTACCAACTCAATTACCCAATTACCACAGTAATGG +GTTATCAGCTTCATCTCGAGCGTAACTTTTTCGGGGTTACTGTGAATCAG +TCGGATCCGGTCACTAATATTCAGAGTATGAAGGTTCTGACGCTTCCTGT +GTTGTCAGAGGTACGGACGTTTGAGGATACGGATAAGCTGAACAAGTGGG +AGCTGGCGGTTTATAACTATGCTACAGGATACTCTGCACTCGAGGGTGAC +GATCATTTGGTGGAGATTAATGTGAGTGAATAAATTGGACCATTTCAAAA +ATATTGGGCCAGCCCTAAAAAGACAGCGGGCCGGCCCATATCGACCCACT +TAACAGGCTCAAAAAAGTTTAGTTCAGTTGTGTGCCGCAAATTTTGAAAT +TTGCCGAGCTCGGCAAAATCGGCAAATTTGGTAAATCGCATAATTTTCGA +GCTCGGCAAATTCGACAAGGCTAAATGTTTGAATTTTGCCGTTTTCGGCA +AACGGCAAATTTGGCAAATTTGCCTTACACCCCTGGTTTAGGGGTTTCGT +GTTTAATTTTCAAAGCTTAGATAAACCTCTTTTCCAGGTGATCGGTGCTG +AAGTCGTCGACACCGAAATGAACAAGGACGCTCAGAAAATGGTGCCCTAC +TTCATCGTCGGAATCGTCTCGATGATCATCTTTATCTTCTGCACCGTATC +CATCTCTGCCTCCTACTACGGATACTTCTCCTGGCGAATCGGACTCATCG +CCCTCGCCTGCCTCTTGGTTCCGATTCTCGCAATTCTCACTGCATTCGGA +ATCAATAATATGCTGGGAAATCGAACGAACTCTCCGATGATGATCATGCC +GTTCTTGATCAATGGAATCGGAGTCAACGATGCGTTTTTGACACTTCAGA +ATTGGTTGCAGCATAGCCCGGATCTTCCGTCAGGGAAGCGGTTGGGGTAT +ATGTTGGCTGAAGCTGGTCCATCGATCACTACGACCACTTTGACAAATGT +CATTGTTTTTTTGATCGGGTGGATGAACCCAACGGAAGGTAAGGTTCAGT +GGTGGACGGCAAACTACTTTTTCCGGCAAATCGGTAAATCGGTAAATTGC +CGGAATTGAAATTTCTGGCAAATCGGAAAACCGGCAATTTGTCGATTTGC +CGAATTTGCCGGAAAAAAAATTGCCGAACGGCAACTGCGCCCTGGTAAGG +TGCATAGCTTTGGCCTCTAACATAAAAAAATTTGTCTATTCATCCTTTAT +TCCAAAACGACATGTATTCCAGAGATGTCAATCTTCTGCCTTGGATGTGC +AATCTCCCTACTCCTCGCCTACGTCTACACCCTCACGTTCTTCTGTCCGG +TGCTAGTTCTTCTGCTTAGCGAACGCGTCAATGAGCCCAGCAAGCTCGAG +GGCACTTTCAACAAGGTTCTCGGATTCTACGCAAAAATCATATGCAGCCG +ATGGACATTTGCACTTCTCATCATCGGAAGTCTTGTCTACTGGGGTTTCG +GTATCTATGGAACACTGGGGATCCGTGCCGTGTTGAACACTGCAAAGATC +CTTCCCCTGGAGACGCCGATCCGCAAACCGAACAGGATTATCGAGGAGAC +CGTGTGGAAAGAGTTCTACCCAGTCACCGTGATTGTGAACAGCCCAGTTG +ATATAAGCAATGCAAATGCACTTCGAGCATTTGACAACCTCGTTCACGAC +TTTGAATCGATGGAAAAGTGCCGGGGATCCGAGTACACCATATCTCCAAT +CAGAGACTATCAGACCTATTTTTACGGTGTAGGAGCCGAGGACTTTGATT +ATGAGGAAGAAACCGTGAAGAACACCTCACATTCTTTAGATTTCAGTGTA +AGTTAGCAGTCCACGCGTTCCTATTAAAATCTACATATATGCCTATAACG +TGTTTATCCACAGAAGAGAAAAATCGCCATATCTGCCGCGCAAATGAATC +CGCGGGAAGAGACAAAACTACTGTAGTTTTTAACCAATTTGTGTAGATTT +ACGAGCTATTGCGTCATCGTTGTATTTAATTTTGAGCCGCAATTTTTAAT +TTTCAGGCGTTTCACACGTTTTTATATTGAAATTTATCTATTTATTGAAT +AAATCTTAAAAGAAAACACAAAAAAATTAGAAAAATTCCGAAAAACGCCT +GAAAATTAAAAATTGCGGCTCAAAATTAAATTCAACGATGACGCAACAGC +TCGTAAATCTACACAAATTGGTTAAAAACTACAGTAGTTTTGTCTCTTCC +CGCGGTTTCATTTGCGCGGCAGATATGGCGATTTTTCTCATCTCTGGATA +AACACGTAATACCTACATATTTTGTGTGCCTAAAAACATGCCTAGCAGTG +AAAGTAGACAAATGTAAGCACACAAACGTGAGTAGGGCTTAAATGGTATG +CAGGCACTGTTTTAGGGCCTGACGCCTGTCCAGGCCTGTCTTGTACGTTT +CATCAATTCAAGCCGTATATTTCCAGAAGCTCACCGGATTCCTTGCCAAC +CCTATCTACAAGCATCACAAGGGAGGACTAAAACTTAACTTTTCAAATCC +GTTAGTTCCTATCTGCCACCTTACCCTAAATCGAGTACCATTTTCAGTGT +TCCAATCCGAAAAGTGCAACTGATATTCGCTTATGAAAACGTAACCTCGT +GGGATGAACGGATTCAGATTATGCAGGACTGGCGGACCATCGCAAGTTCC +CACGTGGCCCTAAATGTTTCAGTGTGGAATGTGAACGCAATGTTCGTAGA +TCAAATGCTGAGCCTGAAGGGATTGGCGATCTCGGTAAGATCTCAATGGT +TCCAGCGCCCTATAAATACCGCTTATTCTTTCAGAATGCCCTAGTCACCT +TGGGATGTATGGCTGCCGTCTGTGTAGTCTTCATCCGCAACCCCCTCTCA +GTTGGCCTAGCGACCGCTTCGATTTTATCCATCTCCATTGGAGTAACTGG +ATACCTGTGCTTCTGGGACTTGGATCTGGACCCTGTGACCCTGTGCGCAG +TAATTGTTTCAATTGGAATGTCCGTTGACTTTGTTGCCCATGTGGCCTGT +CACTATCAGGTCAGGTATAAAGAGTTCGAGGAGAAAGGAGTATTGAAGAG +GATTGAGATGAAAACTCCCGAATCTCGAGTCGTCAACTCGCTGTCCAATG +TGCTCTGGCCAATGGTCCAGTCGGCTTCTTCAACTCTTCTATGTGTACTT +CCACTTGGTATTCTACAGGTACCTACCAATACCTATACTTTACCGATAAC +CCTACCCCTCAAACATTCCAGAACTACCTCCCAATGGTTTTCGTGAAAAC +CATCCTGCTCGTCGTGATCTGGGGAATGTTCCACGGTCTTGTGCTGCTCC +CGTGCATTCTTGCACAGTTTCCCCTGTCTGTGTTCAACAAGACGTTCGCC +GACTTGTTGTTCGGCAGAACATCCTCTTCGTCTTGCTCTTCGGAGTCCGA +TTCGGAAACCGACACCGGTGATGCTCAGGAGATGGTTCCGCTCGCTGGAA +CCGAGAAGGCTTAAGATCTGAAATTAGTTGTTGTTTGTTATTATGTACAG +TTGGGATACAAAGTGTATTATGTAGTCTGTAGTGTCGCAGTGCTACAAAC +TACAAATTCAAAGCTACAAACTATAAACTCATTGAGATCTGGCAGAGCTT +TATAGTTTGTAGTTTTATGTAGCAGCTACAAACTACAAATTTCGATTGTT +GTAGTTTGTAGTATGGCGCACAAACTACAATGTACATTTTGCAAGTTTTC +TAGATTTTTTTCTCACTCACACACACACACACACACCGTTTTGCTACGTG +GCTGCGAAAATGGAAAACTCGGCCACGTCGCAATTCACACATCGCCAGAA +ACTTCTAGCTTTACCAGTAGCTTTTCATGATCTACTCTTCTTTTTTTTCC +CCCCTCAATTTCTATTTGAAATTTCAGAATTCCCTCAATTAGTCTTCTGT +CACCGTGTCCTCCTCTTTTCCGCAAGCGGGTCCCCTTTTCCCCCCACAGA +TCGGCTTCTCAGGGTTCCTATAGCACTTTTTTCTTCGATTCTTGTCACAC +AAAAAATTGTACTTCCATCGCTCGACGTGTCCATTTTTTCTTGTATTTTT +TAAATGCATTTTTTGATAAATTTGATTTTTTTTATCATCCAGAAAATGGT +CAAAAATTCAGATTTTCTTGCTGGGATTTTGCTGGAATTTTTTCAAGCAT +CAGACATGCAAAAACTTATGCAAAACTACACAACACTATCGATGGGGACT +GTTCGACTCTTCACCCCAGTCAAACTTTTTTTCACTATTTTTTTTAACTT +CTGTTTTTTTGGTACTTTTCTGGCCGTTTTTATGGAATTTTTTAACCCGA +CATTGGAGTTTACCTTTTTTTGTTCAGGATTTTAGGAATTTCTGCGACCT +TCTCACTCATGTCCTCCAGCCCCGCCTAAGCCTATGCCTTAACTCAAGCC +TAAGCCTAAGCCTAAGCCTAACCTAAATCGCGTCAGAGATAACGTTCGCC +ACTGACGCCAAGCCTCAACCTAAGCCTAAGCCTAAGCCTAACACTAAGCC +TAAGTATAAGCCTAAATCTAGGCCTCAGCCTAATACTAAGCCTAAGCCTA +AGCCTAAACCCAAGCCATAAGAAGACACTCTAAAAATGTTCAGATTATTA +AAAAATTACATTCAAAGTTTTCTTCAAAATTTTGCTTTCGACTTTCCAAG +TAATCAATACCATGTAGAATTCCAAAATCTTCATACAAATCCCTCCCCTC +TTCACATCTTTCCCGACAGCTTGTACTATTTCTCCTCCACCCACTAGGCC +AGGTCAACGCTGCGTCTCTCGTGTTTCATATGCTATCTCCCTTTTGTGGA +ATCCGTTCATCGTTTCGGATCGCTTACTTCTTCCGCGCGCGTCGCATTTC +ACTTTGAAATAAACTTCAGTTTTCCTTTCCAAAATTTAATTGGTGGAAGC +GTTTGAACATGCGAAATTTGTTAATTATCGTGTGTATTCATTTAATATTA +CTACTTGTACATTTTACCGATGCATGTAAGTTTTGGATGTATCAAAAAAT +TAAATAAATTTCTTAAAGGTGGAGTAGCGCTAGTGGGGAAATTGCTTTAA +AACATGCCTATAGTACCACAATGACCGAATATCATGATAAAAAAATTCAA +AAAATTTTTTAAAATTTTATATGATTTTTTGAAAATTGGAAAAATCTCAG +TTTTTGCCTAATTCCAATTTGAATTACCGCTAATTGGATTTGTTCGATGG +AGCGCGCTTGCACGTTTTTAAATTTATTTATTTTATTTTTTGTTATTTTC +CACCGATTTTTAATGTTTTCGGTGTATTTTTGCTCTAATTTTAGAGAAAA +AGTCAAGATGAATGCAGATTTTCGATTAAAAAGCACGCTCACAGGCGTAA +AAATGACAAAGTAACGATTTTAAACGGTTTCGAACCTGAATTAATTAATT +TCACTGATTTACGACTGTAAGCGTGCTTTTTAATCGAAAATTTGCATTCA +TCTTGACTTTTTCTCTAAAATTCGAGCAAAAATACACCGAAAACATTAAA +AATTGGTGGAAAATAACAAAAAATAAAATAAATAAATTTAAAAACGTGCA +AGCGCGCTCCATCGAACAAATCCAATTAGCGGTAATTCAAATTGGAATTA +GGCAAAAACTGAGATTTTTCCATTTTTCAAAAAATCATATAAAATTTAGA +ATTTTTTAAAGAATTTTTTTATCATGATATTCGGTCATTGTGGTACTATA +GGCATGTTTTAAAGCAATTTCCCCACTGGCGCTACTCCTCCTTTAAAGTT +TGAAGTTTTAGTAAAAAAAGAGTGTCGGTTTTTTAGTTAATAATATTAAT +TTCCAGTCGCCGGAACAGTTCGACCATTTTTCACCACTCTTCCAGTACCT +GGAAGAAATGTTCCTATTATGAGATTATTCGAGAACTATGCTGCATCGTG +CCGACCAAAAAGTAAAAAGGACTTCAATGTTGATCAGCTGGCAAATGTAG +GTTTTTTCCCGCAAAAAGTCGTATTTCGTGCAAAACCGGCAAAAGCAAAA +ATTTTGAATACCGGCAATTGTCGGAGTTGCCGATTGCCGGAAATCAAAAT +TGCGGGAAATAAAATTTTGATACTTTTTTGTAGATTTAGGAGCCTAAAAA +TGCATTTTAATGAACAATTTTTAATTTTCAAGCTCAAAATGATTTAATCC +TTTAAGGATTGACCGTTTTCTTTAAAAAAGACTAACCGAATACTATAAAA +AAAGGTGAAATTTTTTCAATTTCCAAAATATAATTAATTTTGGCAACTGC +CGATTGCCGGAAATTGCCGATTGCCGGAACTTCACAATACCGGCAATTGC +CGAAATTGCCGATTGCCGGAAATTTCCAAAACCGGTAATTGCCGGAATTG +CCGATCGCCGGAAATTCCCGATTGCCGCGCACCCTTGTATTAAAGGATAC +GGCAAACGATTTTTTCCGGCAAGCGGCAATATCGGCAATTGCCGAAATAA +AATATTTTCGGCAAACGGCAAACGGCAGTCAGCAACATGCCACTTCTCCA +ATTTTCGGCAATTTCGGCAAACGGTCAATTTGCCAGAAATCATCGGAAAA +TTGTGATCTTGCACATTTTTTCTGGAAATTTCAGAATTTCAATTTTAAAC +AACAACATTGTACACATCCTATGAATGACACATATCTTAAAAAGCCAGTA +AATTCTATAAAAAATATCTAGAGAAACTGGCAAAATAATTAAAAAAGGCA +CGGATTCATGTGATTTTGATTATTGAAAATATCTTTGAAAACTTTCGGAA +AATTGATATCCGGCAAATCGGCAAATCGGCAACTTGCCGATTTCCCGAAT +TTGCCGAGCAATGAACTACACTGTTCCAAATCGAATTCGTAACCGATATT +ATACTTAGGAATGATATTTGTAACTCGTATAAATATTAAATAATATATAT +ATATTCCAGCTTCTCCAATCTCTCCAAATGGATGAAGTAGCCACAAAAGC +CTACAACTCCCTATTCTTCTCAATGGCTGATATGCAAATCGAAAAGTTCA +TGGGTAAATGGTACACAGTTGTTGATTCGAAAGAAGTTCATAAAGAGGAT +TGTTCAATTTTCTATTGTAAGTTTATTTTCATTCTGAAAATGAGATGCAA +ATTTTCAGTCGATATGGTACTTCAAACTCCATACACCGCAACATTCACCT +CAAAGCAGTACGGAGTGATTAATAATGATGTCGTCACCAATGAGGGGTAC +GGTAGTATGGTTGGTCCAGAGCCCGGAGCCGTTCTTATTACAACTGGTAG +TGAAAGGGATCAGTGTCCATGTAAGTATTTTAAATAACTTTGGTCCGGTG +ATATTAAGATACAAGTTTAAATACTTCCCGTTTCAAAAACTTCGAATTTC +CCGCCAAAACTTTTCTCTGAAAATTTTAATTTCTTTCCAAATTTTTTTCT +TTGAAAATTTGAACTTCCCGCCAAAATTTTGTAGTCAAAAAATTTGAATT +TCTCTCCAAGAATGTTTTCTCAGAAAATTTGAATTTCCCGCCAAATTGTT +AATGTCAGAAATTTTGAATTTCTCGCCAAAATTTATTTTCTGAAAATTTG +GATTTCCCGCCAAAATTTTTTTCTCAATAAATTTAAATTTCCCGCCAAAA +TTTTTCTCTGAAAATTTTAATTTCTTTCCAAATTTTTTTCTTTGAAAATT +TGAATTTCCCGCTAAAATTTTATTGTCAAAAAATTTGAATTTCTCTCCAA +GACTGTTTTCTCAGAAAATTTGAATTTCCCGCCAAATTGTTAATGTCAGA +AAATTTTAATTTCTCGCCAAAATTTATTTTCTGAAAATTTGGATTTCCCG +CCAAAATTTTTTTCTCAATAAATTTAAATTTCCCGCCAAAATTTTGTTAT +CAAAAAATTTGGATTTCTCGCCAAAATTTTATTGTCAGAAAACTTGAATT +TCTCGCCAAAGTTTTTTCTCAAAAAATTTGAATTTCCTGTCAAAACTTTG +TAATCAAAAAATTTGAATTTCCCGCCAATTTTTTTTCTCCGAAAATTTAA +ATTTCCCGCCAAAACTTTGTTATCAAAAATTTGAATTTCTCGCCGAAATT +TTATTGTCAGAAAATTTGAATTTCCCGAAATCTCTATGAATAAAACATTT +GCAGTTTTCCCTGTACGAATCGGTGGCTTGAGTGACGAAGGAGAATATCA +ATACATGATCTTAAGTACACCACTGAAGTATCCAACGATGGTCTTGACAA +GGTAAGTTTGCCTTAAAATTTGTTTTCTAATTATTCCAATCTAATATTCA +TGTGATGTTGAAAAATGCAAACATGTATTTTAATACATTTTCCCCCTATT +GATGCGTACCGAAAAATGTAGAAAAATGGAATCATTTTTTTGCATTTTTC +GACATTTTCGACATTTTTGACATTTTCCGACATTTTTCGACGTTATTCGA +CATTTTCACGCATTTTTCAGAGTTTGCCGAACTAGGCCATTTTGGCTCGG +CCATATCTGGGGTAGATTTACGGCGCGTTGCGTGTCGCGTCGCGGCTTGA +TTTTAGTTGTAAAACTAAATGTATTTGTCCGTGTGGAGTACACGACTTTC +CCAGGCAATTGTCAATGGAGCGCGAAAAATTCAATGAGGAAGGACAGAAC +CCCGTGTCCACAGGAAGAATTTTCTCTGCGTCTCTTGTCTTTAGCATTTA +AAGTGGGGCGACGCCGATGTAGAAAAATGCTGAAAAATTTCGAAAAATGT +CGAAAAATGCACAAAATGATGGCATTTTTCTACATTTTTCTGCATTTTTC +GGCACGCAAAAAACATCACATGAATAACCCCATAAAAACTCCAATTTTTC +CAGAGACCTAACCCTATTCGAAACGAAATGGAAGCGAGAAGTGTATGATT +TTGTTGAGAAGAATGGTTTCATGAGTCCAATGGCAGCATTGAACACTCGT +CTCCATTTCACAGATGTCGATGTTTGCAGAAAAGTGAATAAGTTATATGA +GAATGGAAATGTTTGAGTTGAAGATTATCTTTTTTCTGATGATACGGCAG +AGTTTTTGACGGGTTGTGCAGTGTTTATTTTTAACCTTCATGTGTATTTA +AAAATAAAGATTAATTTAATTGAATCTGTGTTTAATATGAACTTTTAAAA +CACAAAATTTCAGATTTTGGTTTAAAAACTGTAGACACTTGATACTTTGT +TGAGTGTACCTAATTTGAATTTCGCGAAAAAATGACTGTCATATGATTCT +TGACCGTGTACACCTCTCGGGCAATACATTAATATTTTAATATTATGGAT +TATAAGCATTTTCAGTAGACTTTCCATATTTTCTCATGATTATTACTCAT +TTTTATGACGAAAAGCATTGTTTGGAACTCAAAATTATCAACTTTAAATG +TTAAAAAATATTCAGGAAAACATAATATTTGCCTATTAAAAAATTTCCCA +TTAATTTTCCACAAAAAAGTTTCTTTTTTCTTTTTTTAATTTTTTCTCAG +AAAAATCAAATCTCCCAACAAAGTTTTTCTCAGAAATTTTGAATCCACCG +CCAAAAATTTTTCTCAAAAAATTTGAATTTCCCACCAAAAAAATTTTTCT +CACAACTTGAAAATTTGAATTTCCCACCAAAAATGTTGTCTCAAAAATAG +TAAGTTTCCCGCCAAAACTTTTTTTAAAAGAAAATTTGAATTTACCGCCA +AAAATTACTTACCAGAAAATTTCAATTTCCCGCCAAAAATGTTTTCTCAG +AAATTTTTAATTTTTGGTCAAAATTAAAATACTACTACGGTACCTTAATC +CTACACCTCCACCCAAAAAATCATTTTTCAAAAACTACTACAGTACTACT +CCTACAGTACTCCTACCGTACTCCTAAAGTACTCCTACCGTACTCCTACA +GTACTCCGACCGTACTCCTACAGTATCACTACAGTACTCAAACCATATCC +ACCACTAACACCAATCCAATATCCCTTCAAACGACAATAACTTTTTTATT +CAAAAATACAGTAATCTTACTGTAGACAATCTTTTAAATTATAGTCTTCA +TCCAAAACTATAACGGACTCCATCCCATAAAAAGTAAACGACAAGTCATA +AAAATTGGAATTCGCGTAGTTTGCTCTTCCGTGAAGGCAAACACACGTGC +TCAGCTATATTGATAAGAGATGAAAAACGAGAGGAATCAGTCGAGGTGTC +TGATCTACTTCCAGGATGGTGACCGCTCTTCTACTCCTATTGGCTCTTGC +AGCCACCTCTTTGGCGGCTCTTCCAGATTTGGGATATCCCGGATGGCAGT +GCGATGCATCGCTTTATCAGAAGTAGGTGGCTTACTTTAATTACTAAAGT +TTGAAATTTTCCTCGCTTTCAGGAGCAAAAATACCCCGACTTCTGCCCAC +TCCGTCCGATTCACCGACATAAAAGTTTTGGGAGCTCTCGGAGACTCCTT +GACCGCCGCCAATGGAGCCGGAGCACCAAAGGGAGACCCTCTGGCTGTGA +TCCTTCAGTACAGAGGACTAGCCTTCCAGTGTGGAGGTGACCACTCTCTC +GACGAGCATGTCACTGTTGCAAGTAAGCCATTTTTCTGGGGAATTGAGAA +AACTGAGTTGTTGTAGATGTGCTGAAAAAGTTCAGCCCTAACCTAATGGG +ATACTCCACTGGAATCGGAAGTGCCAACGTTTGGGAGGTCTCAAAACTGA +ACCAAGCAGTTCCAGGAGCTGAAGCAATCGATATCATCACTCAGGCCAGA +GCTCTGGTGCAAATTATCCAAAGCCACAAGGAGGTAGCCAAGTCCAAACT +AAACATCAATTCCGATGCATTTCCAGATTGATTACAAAACTGATTGGAAG +CTTATCAACGTATTCATTGGAGCAAACGACATGTGCGCCTATTGCAACGA +CCGAGAAAATGTAAGATAGTCAAAGATCATTCCACACCCGTTTTATTGCA +TTTTTATAGGGCCCACACTCAAAAGCAATCTGGAAGCAAAATGTGATCAC +TGCCATTCAAATTTTAAAGGATAACTTACCAAGGTAGGTAAACTATAGGG +ACATGACATTCTAAAGTAAATAACTATAATATTATGAAATTGTCACATAC +CTATACCTACCTATAAATACCTACATGGTCCAGAGCTGTGTGGCAAATTT +TGAAACCAGCAAATTCGGCAAATTTTCCGGGGTCGTCATACTTGGTAAAT +CGCAACATTTTTTGGAGCTCAAAAAGTGCCAAAATTAACAGAAAATCTAA +GTTGTTAAACATTTGTGGCAAATTCCAAATTTGTCCAAATTTTTGAAATT +TGCCTTGCTCGGGCAAATTCGGCAAATTTGCCGTGGTTGTCGGACTCGGT +AAAGCGCATCATTTTTTGGTGCTCAAAAAGTACCGAGTTGATGGAAAAAC +TAAGTTGTTAAACATTTGTGGCAAATTACAAATTATGTCCAAATTTTTGA +AATTTGCCTTGCTCGGAAACCGGCAAATTCGGGAAATTTGGCACACCCTT +GATCCCATAGCAGTGGCGCAAAAAAATCTTTAAAATTGTGGCCGTTTCCG +TGCCAATTATAAATTAAAGTTAAACATATTATCGAAAAAATACGATCGCA +AATTTTTCTCGTAACTTATTTTTGATCTACCTTGTTGACTAGGCTCCTCC +CCTTCCTTCGACAAGACAGCCACAACAAAATAACAACGGCAACAAGATTA +TGGGCAAACACGTGGTGCCAGGCTGTCCCATTACAGTTTGATCTACAATA +AATGCGGGAATTTTTCGCCCAAAAAAAAAGTGACGTCGGCACGTTCTTAA +CCATACAAAATCAGTTGAGAACTCTGCGTCTCTTCTCCCGCATTTTTTGT +AGATCAACGTAATTCAAGCCAAAATGAGACACTCTGACACCACGTGGGCA +GAGTCAACTTCCAAAGGTAGATCAAAAATTAATTATGAAAAAAATTTGCG +ATCGTATTTTTCGATAATAAGAGCCAACATTTTTCGGCTTTTTATGGAAA +TTTTTTTTTCAAAAAGAGGAGTATGGTCCATAACTATGCCATCAATACCA +AGTCCTTCCAGAACCATCGTATCCATGACCGGAATGTTCGACATGGCCAT +GCTTCGCCAAATCGACCACGACAAGTATTTCTGCGACGGCCTCCATGTGT +TCGAGTGCCCATGCGAGAAGAACAAGGACTTCCCGAATACCGACATCTCG +GCTGCCTGCCATCTTTACATGGATGCTCAGCAGGAGATTCAAGACTCGGG +AATCTTCGACTCAACAGATGACTTTACATTTGTCGTTCAACCATTCTTCA +ATGGAATCACAGTTCCACCACTCAAGCCCGACGGAGAAGTCAATCTGGAT +TGGTTTGCACCGGATTGCTTCCATTTCTCAAAACTAGGCCACGCCAATGT +TGCCAAGCACTTGTGGAACAATATCGTTCAACCAGTCGGATCCAAGAACC +ATCAAGTCAATCTGTCAGATCCTACCATTCCACTAAACTGCCCAGACACC +AAGTGCCCATTCTTCAGAACAACTAAGAACAGTGTTGACTGCTCGAAATA +TATGACTGCCTAGACTGTAGGAAATTGTGTTTGTGTGTGTAATTTGACGT +ACTTATCGGGTCTGCAATAAAATGTTTTATCGTGTAATCTTTAATTTTTA +ACGTGTGAGACATAAAAGAAAGAGCACCCTCAATAAAAATATCCACGTGG +AGTACACAATATAGCTATCAATAGAGCGAGTAAATATTTTTGCAATAATT +ATTTATTAGAAGAATGGAGATCAAGGTAAAGGAAGAGATCTTGATTTTGA +AAAAAAGATACTGGAAATACTAATTGTTCACGGGATTTCATAGAAGGGGA +TATATCACGACTTTTTCTTTGAAAAATAAAAACAACATAAAATTATTGCG +GCTTCACAATCTGATCATCCGGTATTTCTCCAGATCTTTCCTTTTCCGAT +TCGATCTTCTTCCACTTGGTACGCCGGTTTTGGAACCAGATTTTCACCTG +GAATTTGAGAAATTTCAGCTATGCGTTGAGTGCCGCGTTACGTGCTGCGT +TGCGTGCTGCGTTGCGTGCCGCGTTGCGTGCCGCGTTGCGTGCTGCGTTG +CGGGCTGCGTTGCGTGCCGCGTTGCGTGCCGCGCTGCGTGCCTACCTGCG +TCTCCGTGACATCCAATCGTTTTGCAAGCTCACTTCTGTCACTACTTGAC +AAATACTTTTTCGCCTCAAACTGCTTCTCCAGTTCGAATACTTGTTTCCC +GGAAAATGTCGTCCGCGCCTTCTTTCTCTTTGTTTTTTGACATTTTTCTA +TAGGAGACTCTTTGGCTTGGTCGAGGCTGGATTCTGAAAAAATATTCTTA +ACGGCTGATTATTTTTCTACGTGTAGCATTTTTCCGAAATTCATTGAACA +AAATTAGTAAATTTCCAAAAAATTTTTTTTCCAAAATTTCGAAAACATTT +TAATTTTTTTCAGATTTTCTAATATTTTAAAACAAAAATTGAGAGTAAAG +GAAATAAAAATTGTGTCTTGTAATTTTTGTAATTTTTACTCAAAAAAAGG +CCAGTGAAATACTATTACAGAAAATACGAATCGATGCACCATGTCAAAAA +GAATAGTATTTCACCTCAAATGCACCATGCCCAAAACTAGTCTATTTTTC +AACTACAAAACTTCATACACCTAATCCAAAGAAATCAACTTACTGCATCT +TCCAAATTCATCAATTTCTCCATTAACAGTTGGTAAGTCAAGTATATCCT +TTATCAGGTGACTAGTTCTTCTCTGAGGCTTCACATCAGACGTCAACAGT +GCAAAATAGGGTTGAAGTAGTTGGAAGAGCAAGTCAGTTTCCATTTTGCA +CGAATGAAAAGAACTTCTGAGAGACTTCTCTATACAGTAGGTCTCGAGGT +CCCCGCCTTCTCTTCAATCAAATAATTGAAATTCTGGACTCTCGGGGGAC +TTTTGACGGTTGACGACGACGACAATTTGCCAAAGTGGTCGACAATGATT +GCGCAATTGGTGTGGAAAGAAGAAGAAGATGGGTGAGTTCTGAGATGATG +ACAAAAAGAGCATGAGCCAGCAAATTGAATAAGAGACCGGATTTCGAGAA +GGAGGAGGGGGAGAGGAGGATTGGTGGTAAGGGATGATACTTGATGCAAT +TGAGGGAATGAGTTTCTCATACTTGACTATTAAATACTTTTCTTGAACTT +AAATATTTGAATTGAAATTTTGGAGTAGCGGAGGCACATGTGATATTATT +GTGCTAATTTGTAATACTTGGACTTTCAAATATAACCTCAATTATTTGAA +AAAAACCTTTTGAATAGACCAGCAAGGTTGATCTACAATTTTTCATGGAA +ATCGAATTAATTTTGTAAGGATGTCTACTGAGTATAAGCTTATTGTTAGG +CTTAGGCTTAGGTCTAGACTTAAGGCTACACTTAAATTTAGGCCTAGGCT +TAGGCTTAGGCTTAGGCTTAGGCTGAGGCTTAGGCTGAGGCTTAGGCTTA +TGCTCAGACTTAGGCTTAGGCTTACGCTTAGGCTTAGGCTTAGGCTTAGG +TGTTGGCTTCAGAATAAGCTCAGGCGGGGGTGAGGGAAGGGTAGAAAAAA +AAGAAAGAGAAATTTCCAGAAATTCACAAAATCCAGAAAAAAACATATCT +TTCACAAGTTTATATTTTTATAGCCTAAAAAATTACTTTTAAAATGAGGA +TATGCAACCCGCACGCAAAAAAACCTTTTTTTTTTTGAGCAATTTCACGG +CTCTTTTTTTTTTAATTTAAAAATACAATAAAAAAACCAAAAAAATTTTC +CGAGCCCAAATTGCTCCCAAATGCTCTAAATTTCTTTTGAAGTTTGATAT +ATTCCTATTAGAACTTGAGGAAAATGTAAAACTAGAGAAACATAATACTA +CGCATGTTAAATGTAAATTACTTACTTTTAAAAAGTCGTACGTTATTTTG +GAAAGTTTGAGGTTTTTGCATTTTTTCCGTAATATCACTGAACACTTTTA +TTTAAATATCATCAATTATCGGAAGTTTATAGGTTGCAGGAGAAAAAATA +TATTCTAGCGGGAAAATGAAATGAGACGGGGGGACATTTTTAACCAAAAA +AAAATCAGAAAACAGTTGGTAATAAAATTATTCCAAGAAAGTATAAATTC +ATGACTTCCATCTTGGGAACATTCTTAAAAAATGTCAACGAATAAATCTT +TGTGGCAAACAAACTTTTTCCATAAATGTTTCAGACCTTATATAGCGGAC +ATGAAAGTTGATTGCTTATTTGATATGTCTTGAGTTTATTTTTTTTTATT +TCAAGCGAGGGGAGATTTATTTGCCTAGATATATATATAATTATTACTGG +AACGAACTGGAAAAAAACTTGGGATCAAAAAAATCAAAAATTCAAATTCA +AAAATTCAATTTTTCGAAACTTTTTCTACAGCTTTGTCGAGAAGCAACAG +ACAAATACTTATAGCTAAAATTTTATTAATATTCTAGTTTTAGTCATGGT +GAATCGAGAATTTGTCAATTTCCATGTGGTATCTTAAAAATAAACCGATA +CACGTGGTGTCAGGCTGTCTCATTGCGGTTTGATCTACGAAAAATGCGGG +AATATTTTTCCAGAAAAATTGTGACGTCAGCACGCAAAAATTAATAAGGG +TCTAACCAAAAAAACTTGAATCTTGGAAATATTTTTAATTCACCCTAAAT +TTTCCCCTGATTCCGAATATCTATGTGAAAAAATTCAAAAAAAATTTCCT +GAATTTATATTTAAGCTTGAAATCTTTCATAAAAATTTTTTTTTTTTAAA +TTTCATTTGTACTCGCATCACTTTTTTCAAACTCGCGCCCAAATAAATTA +GCGCGCGCGTGCATCGTTTGACTTTTTTTCGTTTAGTTTTTTTTATTTTA +GCCGAATTCAATTTTTTTAAAACCAGTTTCATTCATTTTTGTCGGTTTTT +TTTTTCTGAAAAAAATGTAAAAAAACGAAAAAAAATCAAACAATGCAAGC +GCGCTCCAATGCTAATTTATTTGAGCGCGTATTTAAAAAAGAGACCGGGA +TACAAATGAAATTCGGCGATTTCAAGCTTAAATATAAAATCTGGCAAACT +TGTTTTGAATTTTGAATTTTTTCACATAGATACTCGGAATCAGGGGAAAA +TTAGGATTCATTCAAAAATATTTCCCAGATTTCGGTACCCCACCTTAAAA +ATTTTTTTTTGGGAAAACAAGGAAAATTTCTAGCTTGAACTTATTTTATC +AATACCTAAAATGCTGAACATTAAAACACACACACCCGTATGAGTATTAT +ATACTTCCAAGGAAACATTTGAAAAATAAAGAGTTCTCCGCTTTGATTAT +ACACTTGTGATATTCTCACGCCTGGCACTTGGCACCAGCTCAAAGTGCTC +ATTTTGGCTATAATTGTTTGAAGGTATATGAAAAAGTGGAAATTGGGAGG +AAAAATGGCATGTTTTGGCTTTCTCTACCTGGCCTATGTGTTAGAAAACT +CTTCTATGAAAATTCAGTTTAATTTTTTAATTTTTCTTAATTTTTAAAAT +TCTAAAAATGGCTAAACTTTCAAAAATATAATTTTTAAAAAGCCAAATAC +TATAAAATTTCTTTGTTAATTTCTTTATAACCAAAATTAACGATTTTCAA +AAGTCAAATTTTTCAAAATTACAAAAAATTTAAAATTACTGTAAGATGCG +CAAAAATTACAGAGTTTTCAAAATATAAAACACTCTTGTGGCCGAAAACT +TCCAAAGTTCAGGAAATAAAAATTCCAAAAAAAAATCAAGAAAAAAATGT +ATCTCGAAAAATTACCGCTATTAGAAAATCGCCTTCTCTAAAATTTCCCA +AATTTTGAAATTATGGAAATTCCCAGATTTTTCAAAAGCGATCTAGTCGG +AGTTGTCAGAAAAAAAGTTCAAGAAAACTTTCTCTAGAAACTCAAAAGTT +GGCCTGATCAGAACTTGTTGGATTTTTTCAATTTCCCAAAAAATTTTCAA +AGTTGCCGAAAATTACCGAAACTCCAGGGCTCACTTTTAGACCTTAAAAT +GAAACTTTTGCCATTTGAAAGAGGCTCTTTGAAAAAGGTCTCTGCAAAAA +ATTTAGCGGCAGACGAGCACTTTGAACTAGAAAAATGTCGCTTTGAAATT +GCTATCATTGAAATTTTTCACTACTATAGAGGCCGGTAAAATTGCTCCAA +ATCATAATTTAAAATATTTTTTCATGATTTTTTTTTATTTTTTCATGATT +TTCGAGGTAGGCAAATATTCTGGGAGCTTTTTTGGATATATTTATTTTTT +TGGAAAAAAAAATCTGAAAAATCCCACATTTCCAATAAAAAAATTCCAAA +AAGCTTTCAGAATACTTGTCTACCTCGAAATATCCTAAAATTCATAACAA +TTTCAAAAAAAATATAATTTGGAGCAAATTAACGGGCTATTTCAAAGTCT +AAAAGTGAGCTATAATGCATACATTCCAGAAAATGGAATAAAAAATAAAC +TGAAGTCAATTTAATCTAATCTCATGATACCTCCTCCTCCTTCCCCCAAC +AAATAATGCTCAATAGCCCCCAATCAACTCGTTCCAATTTGAATAGTGTA +TTATTTAACAAGCTTCATCATCTCTTCCTCCTATTTTTTTTTGTCATTTT +TATCGTCCCCTCGGCCGTTTGTATTATCCTCTCTTCCTTCTTCTTCTTCT +TCTTGAAAACGAATTGAATTTCTTCATCCAATTCACTCAAATCACATCCT +CCACGGACACATTCAATTAATCCAAACGTCCTTTTCAATTGAACACAACA +ACAATCACTATTTCATAAAAATTGTTTTAAATGTACATATATCAAAAATT +GCGACGGCTGGAAAAATTAAATTTTTACACCAGAAGAAAATTTGTAACCA +TAGTTATGGTAGCTATAACTCAAAAAAGTAGGAAAATTATGTGACATGTT +CTCAGGACTGCAGTGATCGCAATAAAAATAAATTCTGCTGATCCAATACT +CTTGAAATAATAAATCGTTTCTGATTGATTCCATCATTCTCAGACAAATG +GAGCTTGGTTGAGTGCTCAATTGGTCAATGTCGCTGTCATGGCACCAGGG +TTGTGCGGCATCCGGATTTTCTGGCAATCGGAAATTGCCTTTTGCCGAAC +TCTAATTTTTTTTCGGCGATCGGCATTTGACGGTTGCCGAAAAACTCGTT +TTTTCGTTTTTTCGCCATTTTTCGGCATTTTTCGGCGTGCTTAAACTCTT +TGAGGTTTTTTTTTTATTTTTTCTTGTATTTTATAAATCTAAATAATTCA +ATTCCAAAGTTTGATAAGGAGTGTCTTGGTTTAAATTTTCTAGCACAGAT +CAATTTCATTTATTAACGATCAGACTGGTAGGATAGTCAACGACCCCAGA +GGCAAGAATGATTCAAAATAATTGCACACATGGACAGCAATAGACAAAAT +CTTTGAGAGTTTTATGGTGGGGTGGCGATGTGTCGACCAAAAGAGTATAT +CCACTATATGGCTAATAATTCAAATATGTTGTATTTTGGAAAGTGTTAGA +ACACAAGTGACTTGAGTGTTTTCGGATACAAGAGACACAGAGATGTCCCT +TGATTGTTCACAGAAAGCGATTTTTAAGGGTTTGACTACAATATTTGCGG +ATTTATGTTTCAACGAATAAATGTAAAAAACTCGAATATCATTTACTATA +TATAAAGCGCTTATTCCGTTTTTCCATAGTTTGTAGTCTATGTAGTCTTT +GTAGTTTGCGTAGTTTTAGCTTCTGGAGGGATAGTGAGTTGGGGTTAGTG +TAGGGATATAGTCGGCGTACTGTAGTGGTACAATGGTGGTACGGTAGGAG +TACTGTATGATTACGGTAGTCTCAGAAAAATTGGTTTTCAGCTCCAGAAG +TCGGGGACCGCGCCGGAGGTGCGGTCCACGGCTGGTATATACATATGTAT +ATGTATGTATATATGTATACATATATTCAATTAAATTTAATTTAAAAAAT +AAAAAAAACATTTCTCACAAATGCCGAAAACACCGAAAAATGGCGAACAT +TTTTAGGCATTCGCCGATATTTTGACCAACTATTTTTTTTTGGCGAACGG +CATTCGCCGTTTGCCGCACAGTCCTGCATGGCACCCCGACCGGTTTCGTG +ACAATAGTTTTGACCTGAACGCACTAGGAGTCAGGAAGTTATAGGGGTCA +TGGGTGCTTTAATGACCCCCTGTAAAAAGCACATATATTGACAACAAAAT +TTGCTCGCTGTTTTTTGTATTAATTTTTTGTCTCCCCTCGATGCGCATTT +CTAGTAATGATGCACATATTTATTTTTTTTCATGTACATATCTAACAAAA +TCATAAAACTGAAAATTGCGCTTTTGTTCAAGTTAGCAAGACAACCATTA +TAAAGCTTTCCAAATAGCAAATTAGCAAAATAAAGTGAGAAGCTAGAAGA +TAATGGGGAGTCCCTATAATCTTTCGGGCACACCTTCAGACTTTGTGACT +ATAATAGCCCAAACCCACAGTTTACTGAAAACTAGGGCTTCATTTGCTAT +ATGGTACCCCTAAACTCTTTCAGTAATCGAATCCCAAGAAACCTTCACAT +CTGTAACCTGGACAAAAAACACACAATTTCAAAAATATAGAATTCTTTAT +TCAAGAATCATCATCATCATTTTTTTCAATTTGTACAGACAAATGCCGAC +CATATTTTTTGTCAGGGGAATGGCAACGGCGGGAACCCAAGAAAATCAAA +CAAAAATATGGAGAGAAAAATGAGCCAGAGAGCCTGAGCAACCATGCATA +AATAATGAGGAGTTGGTGAGATGAAGAAAAAACAGATAAGTTACGACGCG +AGGAACTATTTGAAAAATAAACTGCTAAATACTATGTGCTTAATCCTCCC +AGGTAAGCGAGTGACGGTTCTCGTATCTCAGCTGGCAGTTTCTCTTTCCG +TACCAGTTGACGTAGGCGAACTCGGCGTCGGAGACGATGGCGTTGAAGCG +TCCTCCGAACTTGGCCGAGGCGTCTCCCTCGATCTTTCTGGCGGCCTCGA +GGTTGTCCTGGAAGGTATCAGTGATTGGATTTGATTTATAGTTCTTCTTT +TTTTTCTTTAATTTCTTGCTTGTAATTCTAAAGCAACTAGAAAAAAAAGT +TTCGGAACGCATCTATCATGCCAATTTACATAACCGGTATGGGTTTGCGC +GCGCGGGGGGCTTGCAACATTTTCTTAAATGACATTAAGGTATGAGAGGG +GGGGGGGGTGTTGTAAATAGGTATCCAGTTCTTGTCTGTCCCCGTTATCA +AGTTTCTCTGGTGACGCAACGCAAGAAGGCAGAAAACAAACCTTCTCGTT +CTTCAAAGCGGCCTCAACGATGGCGCGGAGCTCTGGATCGTCGCAGTTTC +CCTCGTCTCCTTCCTCATCGGCGTACTCGACAGCTTCGTTGGTCTCCTGG +AAATTTAGAAATTAGATTTTTCTCGAAATTTGGCCGAATATTAAAAAAGT +AATACTAAGAATTTCCCCACGAAAGGCTGAAAAATTGAGGTTTCCTGGGA +AAAGTGAAATCCCGGTCTCAAAATTTTTTTTGATTTTTGAGCTCCGTTGA +ATACCGACAACAAAAATGGCTCTGGAATTTGAAAATTACGGGGAACTGAA +ACGCAACTTCTACTGCTGCCAGTATGTCTCTCCCGCATTTTTCGGAGGCT +CAATCGTAGATCAAAGTTTTCCGAATAAGTTACTATTAGGTTTTTATCGA +GAATTGGTACTAGACTATGTTGACAAATACCAAATATCAAATATTTACCC +AGATGCAAGTTATGGTGCTTTGAACTAGAATTCCTGGGAACTGATTGTAA +GTGTCAAAACCTCAGCCAACCGACCTAAGCCTAAGCCTAAAAAGGTGTAT +CGAACTTAAATTTCGACATGGCATGGCATTCAAATGGGTATACCGATGAT +TTTGTTTCAGCAAAATTTTTAGGGTCGAATTTTTAAGGTAGAGCAAAATT +GGGATAATTTTAAAGACTATTAATTGTCCAAAAATCCAACAAATCCAACT +TCAAATGAGCTTAAACTTTAGATCACCTTCCCAAAAACCCACCTCAACAG +TATCAACTTGCTCGGTGGCGGTCTCGGTGGCGGTGGATGGCTGCTCCTCA +ACAATATCATCATAGGTGCTCTCCTCCTTGGCAGCGGCAACTGGAGCCTG +AGTCTCCTCGACAACTGGAGCTGGAGTCTCTTCAACGACAACTGGCGCCT +CGGTCTCCTCGATCACAACTGGAGCCTCGGTCTCAACGACGGCTGGGGCT +TCGGTCTCTGCAACGACGACTGGTGCTTCAGTCTCAGCGACGATTTCTGG +AACGATTTCCTCTTGGACAGTCTCGACTACGGTTGGCTCTGGTGCTTGCT +GTGGTTCTGGTGCTTGTTCTGGTTCTGGTGCTTGCTGTGGCTCGACAACA +GTCTCGACGACCTGCTCGACTGGTGTTGGGACTGGCCCCTGGGTGTATGT +GGCGACTGGTGCGACTTGCTCGACAACTGAAGTGTGGGTGAGGGTGAATG +TGTGGGTGGGAGTGGTGCGAATCTGACTAAAAACTGTAAAAACTAACCTT +GAACAGCCTGGTGCTCTGGTGGAGCTTGGTAGTTTCCTCCCTGAGAGACT +TGGGTCACCTGGATAGAGCTGACTGGTCCCGATTGGTATCCTCCTTGTTG +AACTTGCTCGACTGGGGCAGATGGTCCGGCGTTGTAGCCTCCTTGTGGGG +CGACTGGGGCTGGTTGGGAAGCTCCAGCGTATGAGGGTCCTCCTCCGATT +GGTCCTTGGGATCTAGCGTATGAGGATGCTCCGACAGCTTGGGAAGCTCC +AGCGTATGATGGTCCAGCTTGCGATCCAGCGTGCGAAGATGCTCCTCCTC +CGATTGGTCCTGGACCAGCTTGGTAGGCACCTCCACCGACTTGTCCTCCG +AATCCTTGTGGTCCCTGGGAGTACGAGGCTCCTCCGACTGGTCCAGGTCC +TCCGAATGGAGCGGCATATCCGGCTCCTCCACCGATTGGTGCCGGTGGTG +GTGGTGGTGGGGGTGGAAGGGCAGGCGCGGCGGCATAGCTTCCGCCTCCG +CCTCCGGTTGCGTAACTGTAAATGAACACATTGGGAATAGTATGGAAGAG +GTGGCTAAAGGGGGAAGCAGGTTTTAGTTTTGGCGGCCAGCAGCAGGAAA +AAGGACATTTTGCATGAATGGGGTGGCAAGTAGAATGGACTTTGTCGCTC +CTTAAGGAAGACATCAAAAGAACTTCAGAAAGAAAGAAATTATTTTTGGA +ATCAGGAAAAAGCATTTGAATAGCAGTAGAAAAAATATAACAATATAGTC +ACAAAAATACAAAAAAAAAGGCAAAATCTTACCTCGAGCTAGAGCATGGT +GGAGCTCCTCCTCCGCACGATGGCTGGGATGGTGGGCAACATCCACCGGC +GCTTCCGGCACCTCCGATACACGCTAACGCAGGCGACACGGCCAATAAGA +CGGCTACCAAGACTCTCATCGTTTTTGAGAAGGCAGAAATCGCAATGGAA +TGCTGAGTAACTGTCGTCGAATGGCAACGTAGGACGTGCCTTTTCTACCT +GCCACCCCAAACTAACTCCGCCCATGGCCGTCTGGAGGGGGCACGCGCGC +GCACGAAGCCTTCCGAAAGACGCCCGATCGCGTTTTATCGATGATGATGA +TGATGATGATGAGATGAAGCCTCTCTCTCTCCGTCGGAGGGCACCTTGCG +ACATTCACCTTGTGTGCGTGTGTGTGTTGCAGAATAGGGGGAAGAAGAAG +CGATCGCGTACAAATGGGGTTCGTGTTCATTTCCGAATGATCCAATGATC +AGCTGGAATATTCCTATTAATGTGACTCGTGCCACGTCATTGTTAATGAT +CTACTATGTTTGAGTGATTGGATATGTGTTGTTTCCTTTTCTAGTTAGAT +TCCTGAACAGTTCACCTTTACCTAATCCGTGATCGATGTTCCAATGCTTC +TTCCTTTCACTTCCACTGCGGCACCTAATCAGGCGGAATAACATGCTTTC +ACTCTGCCGGTAAAGTGTGAAAATCCTAAAGTTTTAGATTTCAAAGATTT +CAAAAACGATCGTTGATCCTACCGCGCTTCCGCATCAACTGATATCAGGT +TTTTTTTAATGATAAGATTGTGGAGGAATGAACTGGTGAACTTTTAGGCT +GATTTAGAGCCACGTGGAAACTTCCAGCTTTTACCTAGGTGACTACAAAC +TACGGAAGATCAAATTACATACATTTTGTAAAAACATACATCACTTTCAA +AAATTTCGGTTTAGTTCAAAAAATCACATTTTTACTGTTTTGAGATTTTT +TTTTTCCCAAAAATGTCGATGCAATTTTGATGTCGATGCACCACGGGCGA +TTTAAAGGAAATTATCAGTTTTCTGAATATTTTTGTTTTGTATGTCTTGA +TTATGGGTTTTGTAGGCTTCGATGGCAGGCAGGCGAGGTTTCGGGTCCTG +GCGCCCGTCTGAAACCTGCCCGCTTCACGCCGGCCTTTTGCCTCTTTTTT +GCATTTTTACGTGGTGCCAGGCTGTCCCATCGCAGTTTGATCTACAAAAA +ATACGGGAATTTTTCGCCCAAAAATATGTGACGTCAGCACGTCTTTAACC +ATACGAAATCAGTTGAGTCTCTTCTCCCGCATTTTTTAGTAGATCTGTGT +AGATCAAGCCGAAATGAAACACGCTGACACCACGTGTGTTTCTCATTGGA +AATTGGAGGCCTAGAAAATCAAAAGTTAGGCCATCAACAGATTTTTGTTT +CAGCAAAAGGCGCGGATTGAGGCGAGGTCGCTTTAAAGTCAGGCAGGCAG +GCGGTTAACGCCTACAGGAAAACTTTAGGTTTTAGGTTATCTGTATGTTT +CCCTTTTGTGTGTTTTTTAGACGTCCAAAGAATCGTATCAAAATGAATAG +GTATTTTCTCGTTTTATGCAGATAGGTAAAGGTCTGTAACCTGAAAATCC +AGAGCCTTATAATCCACACCTACATAATCATCAATTAAAATATTCAGATA +ACTGATAATTTCCTGAAAATAAGATATCTGTAAATTGAATCAACGCCAAT +TTTAGACATGGTGCATCAAATGCAACATTGTATTGATATATAAACTTTGG +CAGTCAATACATGTTTTCTTATGCATTGTTCTCAGTGCACCTTGATCAAT +TGAGTAGACTTCATCTATTCTTGTCCACTTCCTAAACATGAAATCAATCA +CGGCACCCCATCAAAAAGCACTAAAAAAGTACATTTAAAAGAGTACATTA +TTATTGCCCGTTCAATCCTCGTAATGTATCTTTGCAAGTTCCTTCTTCCT +CTGACATTTCCCGTTCCAATTCTCCGCCATACCTGATATGCTCGACTTTT +TCGATCATAGACGCGAATTGCGCAACGCGCGAAAATATCTTGCGCAATGA +ACAAGGAGAAGAAGAAGAAGAAGAAGTAGGAGAAGCCAAATAAAAGATAG +GTGATAACCGAGATGTTAAGCCAAAGAGAAGGAGTGTAATAGCTGAACAA +GGCCTATTTTTGTGTAACTCCCCCCTCTGACAGCTTCTTCACTTTCCCTC +CAACCGTTTTTACTCTGTATGGTAATTGTTGATGGATCGCCATGGAAAAG +CCCTAATGGTCATGAATAAGCTGTAAAGCAGCGGGGGTGAGCTTGAAGGA +CGATCGGACAAATGAGATATGAAGCGTGAAAAACGGAAAGCATGCTTAGA +GGGGGAGATATATGTAGATTTGGGGAAAAGCTACAAGGTCGATTTTTATC +CGGTGTCAGGATTATAGAGGTGTTTTGAAGTAGAAGCAAGGCACTGAAAT +TCACCACTTCCTCTATTAGAATTGCACATTTTTTAAAACTATGTACCTGT +TTTCTTAAGAATTGCTTGAAAAGTTGAATTTGAACTAGTAGTTCTTGCAA +AAAGGCCAATTGAACTATCTGGTTTAGTTGCGTCCTTATTAATATTGCAC +TTTTTAATTCCGGCAAAACGCCGGTTTACCGATTTGCCGGGGATTTTCAT +TTTTGGCAAATCGCCGATTTGCCGTTTGCCGAAAAAATTTTTAACAGACG +GAAAAACTTAAAACTGTGTCATTTTGAATTTATTACCGTTTTCTTTAGAT +ATTTTCATAGAATTTGCTAACTTTTGAAAATAGATGAAGGAACATTTATA +TTATTTGAAATTGAAATTCTGAAACTTCCAAAAAAATAAGTTGCAAAACC +ACAATTTGCCGAAATGCCGTTTTCCCGGCAAATTCGGCAAATCGGCAATT +CGCCGATTTGCCGAAAATTTCAATTCCGGCAATTTGCCGACAAAGCAATG +TTTCATAATACATAAAATGTTTCTAATTTTTAAGAATGATTTTTTTAATT +TTGACAAAGTTTTTTTATAAAAGAGCCGAATTATTTTAAATTCCCGCGCA +AATAAGTGACGCCATTTTTTACATTATTTGTATTTTCTGGCTAAATTAAT +TGGTTTATGTATGTTTTTTCTTTAGTTTATAACTTGAGTAAAATTCACAG +TAAAAATTAATTTTGAAAAATCGATTTTTAGTTTAATTTCAATTTCAGAG +TTTCGGCAAATGTATTCTCAATTAACAAGTTTTCCAAACGGAAAAAAATC +TGAAAATTGACAAACCACGGTGTAGATCAAGCCGAAATGAGACACCCTGA +CACCACGTGTGTTTCCCCTTGGAGGCCTAGAAAATCAAAAGTTGGGCCAC +CATCAGGTTTTTGTTCGACTCAATTGGTAAACTTTGCAGACTAGGCTGAA +ACGTGGTATTTGCAAAAATACAATTTTACACATTTATTTTCCACAAACCG +AATAAAATCCTTATCTCAACAGTGTTATATAATTTTTTTTTTCAGATCAC +TATCACGACATTTTAAAATATATTACAGCACTCTCTTCTTTAGGTTTTGA +ACGTTGATGCTTCGAGGATAAACATGAAAACCAGATCAGCTTTTCCGTGA +CCTCACATACTCATTTTCTCAAATTCTCAAAAATGTTCTGCCTTCTCCAA +CGGTCTCTAAAAAAGCTTGGAAAACCCACAAAAGGTGTCTTGGGACACTT +CTGTCTCTCTCTTTATCGATGACCGGATCGCCCGGATAAACGGGTTTTGC +GGACTGACACCAGCAGGTGGAAATGGGCACACTTGTAAAATAAAACGAAA +AATAATAATAAAAATAACACGGAATGCGGAGAGAAGTGGCAAGATCGGAA +GAATGGTGGGGGGGGGGGGGGGGGGGATTACATAAAGATGGCGGCATAAA +GTGCGTTTCATTAGGGGGATATTGGGGTTTTTATAGGGGGAAACTTTGAT +TTTTTTGAAAAAAAAATTAAGAAAGAAGCGACAACAACCAACAACAAGGT +CATTTTTAAAAGGAAAATTTGGCATGAAATTTACATTTTGGGATGAATTA +TGAGGATATACTCATATAACCTAATTTTGTCACAAAAAAAACGTCGAAAT +TTTTAAAAATGACTACATTCTGCAATTTTGAAAAATTTGGTGGTCCTACC +ACGAAAAATCCATGGTTTTTTGGTATTTTGACGAAAAACTGATTTTTTGT +GTTTTTTTTCTGGTATTTTGAAGCCGCTGTACTTCAAACCCTTTTCATAT +TCTAAAAATATCTTTTTGCAAAATTCGGTTTTTTCTAAAGTTCAAAATAA +ACCACTATTTTCAATTTTTAACGGAAAATCTGCAAATAAAATTTCGGAAA +ATTCCGGCAATTTGCCGATTTGCCATAAATTTCAAGTTCCGGCAACTTGC +CGATTTGCCGATTTGCCGTAAATTTCAAATTCCGGCAAATTTGCGAATTG +CCGTCCTACCATGAAAAATCCTTTTTTTTTCTGGTATTTTGAGACCGCCG +TACTTCAAACAATTTTCATATTTTACAAATATCTTTGAGCGAGCGGTTTT +TTTTGAATTTCAAAATAAACCAAAAGTCTACTTTTTTTGGTTTGTTTTTG +TAATTTTTTTTTTAGAAATTTCAATTTTTATCGGCAAATTGGCAATTTGC +CGGTTTGCCGGAAAATTTCAAATCTGGCGATTTGCTGCTTTGCCGATTTG +CAGGAAATACCTATCCCTGATGTAAATTACCAGAATTAGAAGTTTGGTAC +ACGATTGGCACACAATTTTTTGGTACAAAACACGCTAAAAATTAAATGTT +TTTGAAAAAAAAAATTCTTTTGAAATTGTTTTAGTGTGAATTTCCATGTA +TTCAATTTTTATCATTTTTAGCCTGTTGCTCCAAACTAGGTTTTCTCCCA +CGTGGCATCTCTCATTACACATAAAAATACCTGCTAACTCAAAACATTAA +TCAGAGATCCATATCCATGTCTTCCTCACTTTCCAACTTTTTCCCTCTTA +GCTTATCTAGATATATGTAGGTATATCTATATCTACAAACAACTTTCTCT +TTTCAAACATCCAGTTTATCTTCTGACCTTTGCAACATTTTTCCTTTATT +CCAAACTTCCGCCAGATTGCATCTTATTCAATTATTACATTCAAAGAGTG +TCTAAAGTTTTGCTTAGAAGAAACAAAAGGTAAAATGTCAGATATGTTTA +GAGAAGGTTGTAAAATTTGGCAATTTATTGTAAAAATTCGCAATTTTGAA +ACTCCTTTAAAATGGTTAATTTTGTAGTTAGAGGACTCAAAATTAATGCC +CGAGAACTAAAAAGTTGTCATTTTTCATTTATCAATGATTTTACTTAATT +TCAAAAAAAAAATTTTGCCCAAAAAAGGACAGTCATTAGAAAAAAAAACG +GAAAAATGATATGGAGTGATTCTTTTTTATTTTATAAAATTAATCAGCAT +AGTCAAATGTACCAGAAAAAAGCAAATAAAATGTACTAGATTGACCGGAA +GATTATTTGAAAAATTGCAGAAAATGCACCAAAAGCTGCTTTTTTCAAAA +ATTCAAAAGTACGGTAAAATCATACGGAGCTATTCTTTTTTATTTCATAA +AACTCTTCAGCATAGTCAATAATACCAGAATATGTCAATCAAAGTATAAT +AGCTTGTACGGAAGTATCTTTTTAAAAATTGATAAAAATATATAAAAGCT +GTTTTTTTCAAAAATTCAAAAGTATGGGAAAATCATAAGGAGTCATTCTT +TTTTATTTCATAAAACTCTTCAGCATAGTCAAAAATACCAAAATATGTCA +ATCAAAGTATAATAGCTTGTACGGAAGTATTTTTTTAAAAATTGATAAAA +ATATATAAAAGCTGTTTTTTTCAAAAATTCAAAAGTATGGGAAAATCATA +TGGAGTCATTCTTTTTTATTTCATAAAACTCTTCAGCATAGTCAAAAATA +CCAGAATATATCAATCAAAGTATAATAGCTTGTACGGAAGTATTTTTTTA +AAAATTGATAAAAATATATAAAAGCTGATTTTTTCAAAAATTCAAAAGTA +CGGGAAAATCATAAGGAGTCATTCTTTTTTATTTCATAAAACTCTTCAGC +ATAGTCAAAAATACCAGAATATGTCAATCAAAGTATTATAGCTTGTACGG +AAGTATTTTTTTAAAAATTGATAAAAACAAATAAAAGCTGATTTTTTCAA +AAATTCAAAAATTCAAAAGTATAGGAAAATCATATGGAGTCATTCTTTTT +CATTTTATAAAATTCTTTAGCATAGTCAAAAATACCAGAATATGCCAATC +AAAAGATAATAGCTTGTACGGAAGTATTTTTTTTTTAATTGATAAAATAT +ATAAAAGCTGATTTTTTCAAAAATTCAAAAGTACGGAAAACTATATGGAG +CGATTCTTTTTTCTGTTATAAAACTGTTCATCATTGCCACGTATACCTGA +AACTACCAGAAAAAGTATGCTAGCATGTACGGAAGATTATTTGAAAAATT +GCAGAAAATGCACCAAAAGCTACTTTTTTCAAAAATTCAAAAGTATGGAA +AAATCATATGGAGCGATTCTTTTTTCTTTTATAAAACTGTTCAGCATAGT +AAAATGTACCAGAAAATACCAAAAAAAAGTATGCTAGATGTTCGAAAAAT +TGTTTTTATAATTTTTGAGAGGAGTTTCAAAATTGTGAATATTTACAAAA +AATGGACATTCTTGCCACTTTTTACTAATTTTTGATGGGCTAAACCTAGA +TTTTCTAAATTTTACATTTATGAATTACCCGTTTTCGACGAATTCAGGCA +AATTTTTATTTTTTCCTATTTTTTTCAGCCATTTAATCAGCCATAATAAT +GAATGTCCTTTTTTTTTGGGCAAAAAAAAATTTTTTAAGGCTCGAAGACC +AATTTTAAGTTCTCTAGCTACAAAATGAACCATTTTAGAGGAGTTTCAAA +ATTGTGAATTTTTACAAAAATTGCCCAAATTTGCCACTTTGTAATGGTTT +TTGGTGGGTTAAACCTAGATTTTCTGTATTCTGCATATATGAATTACCCG +TTTTCAACAAATTTTGACAAATTTGTATTTTTTCCAAAATTTTTTTTCAG +CCATCTATTCCTTTTTTTTGTCTGTTTTTTTTTCGGGCAAAAAAACCTTT +CCCCACTGGTGCTATACCACCTTTAACGGTAAATTTTACTGGTCTAAAAA +TCGGCGAAAAAATGAAACTTCAAGTAATATAATGGTAATTGAGTTTTTTT +TTCTATTTTCTTTGTTCAAAATATCAATTATGACAAAAAACGAGAAAATT +ATATTGACACAGTTAAGCGGATATAAATAATAAAACATCACAAGCACATT +TAATTTTTCAAACCCCCTCAGGAGTCATGAAGAAATCCTTCTCTGCACAT +CTACCTTTCTCCACATCCAACTCCTGATTTATTTTCTCCGCAAACTCCGT +GTAACTTTTAATTGCAACATTCAAAAGTTTTGTATGATGCTTCAAGTGAC +CCAAGTCGATTACACAATCCCCGCAAAGAGCTTCGCTGCGAATTTTGTCA +ATTCCCTCTTCTAAATTCACTTGTTCTCCAAACTTCACTGATTTCCTTAG +GATCCCAGTGCTCAGGCAACAGTTTGTGCAAATTCTCAAGGTTTTTGATT +TCAGTTGGCAAGCGGTGCAGATAGGGCTCTGGAATTTTAAATTATTGATT +TTTTTAAAAATTATTAACTCACCTCGTCGCTTTCATGTGCTCTTCTTGGA +TTATGGTCATATGGAACTAAGAATCTCGAAAGTTGTATTAAATCCCACAA +AGATCGATTCACATTTTTCCTTTCAAACGCATTTTCTCGTTTACAGTGTG +GACATCTCTTGTTCAGTAGTTGGAGTCGGCATCTGTCGCAGATAGTGTGA +CCGCAAGTGCCTATACAAGGGATTTGGGTATCATGATCTGGAAAATTTGG +TGTGTAGGGCTTCCATGGTAGGCAGGCGCGGGGCCTGGCCCGCCTCACAC +CGGTGAATTTACAAATTTTTGATTTTCTCGTTTTTGTCATTTTGATGAAA +AACATTGAGGCAGGCGGAGGTCGCCTCAAGTTCACTCGGCGTTTGCCGCC +AGCTTCCTACCCAAAGCTTTGCAACTTCTTACCCAAGCCTTGCCCAAAAA +ATAAAAAGCTTAAATATCAAAATTACTCAATTTTGTGTCATTCTACTAAT +TTTCTGAGAAAATTTGAATTCCCGCCAAAAATTTTATTGGAAAATTTGAA +CTACTGGCCAAAATTTTTTTTGGCTGCGCCAAAGATGATCTTAGAAATTT +GAATTCTCGCCAAAATTTTTCCCAGAAATTTGAACTTCTTGCCAAAAATT +TTTGTTGGATAATTTGAATTTCCGGCAAAAATTATCTTATAGTGAGGACA +CAATCAATAGTTTTGGTCAAAATTGTATTGTCAAATTTTTGATGTGTGTT +GGCAATTTTGGCAAATTTGCCGAGCTCGGCAAATTCGGCAAATTTGCCGA +GCTCGGCAAATTCGGCAAATCTACTTTTTGAAATTCGCCAAATTTGGCAA +ACGGCAGAAAAAAGTACAATTGAGATTTGAGGTACACAAGACTTACATCG +AAATTAATCTAATTAAAAGAATATCACTTACCATATTCATCATAGCATAT +ACTGCATTCAGTAACCTTCATCGCAAAAATAGTCTAGACACTGACCAAAC +ACTACACACTTGATGCTATTTATATACTTTCTATGCTGAGCAATACAGAA +CATGCGATGTGAGGAATCTTAGGTAGCAGAAGTACAAAAAGATTGACATG +TGATTAAATTCCTAATTTGTTCCTTATCACATTTCCCATATGTCAATTAA +TTCGTAGATAGATGCTATTGATAAAAGTTTTTCGAGAATTTGTAAGATAT +GAAAATAATATTCCTCATTTTTAATTTAATGATACAGTACTTTGAAAATT +TCAACAATTTCAACAATTTTTAACACCTATGTTATCATTCATATACAACA +ATTTGTTTAATACAGTAATACATTTCCACCCACGTGGTATTAATACAGTG +TTAAGCTTGTTATTTTTTAAGGAAAAATTTAGAAAATTTTTCTTCCTCAT +TTTGCTTACTTTTCAAAAAAGCGTGCAACTCCCTCATTTAAAAAAGAAGT +GTCGTCGCTGAGGAAAATGTTAAAAATTTCTCTCATGGTGCTAAAATTAC +TAACTAAAAACAGTAAGTATCGTAGCGAAACATTTCAAAAATTTTCAAAA +TTTTTTATTTATAGTCAACAGTTTTGGAAGTCAATTAAAAACATTTTTTA +CTATTTTTAATATTTAAAAACGTTTCAGTTGCTTGTAACTTTATAAGTTT +GCCTACCGTACATTTACCGTAAAACAACCTTAAAAATGTAAGCTTTTCTC +CCGATAAAAATCTTTACTCAGGAGTTCGCGACTCTTCTCTTACATTAATT +TTTTCTCTAATATATTGAGCTCAAACCTAGCTGTAAAATTGAACATACAT +ATAACTTTGAGTGCAATTTAAATTTCAGAAAACTTTCACAAACCCAAAAA +TTGAGAATGCTCAGGACAGTTTGACCTTTCAAGAAAATTGCTTCGAGAAA +ATTCCAAGCTTCACAAAATTGCTCATTGCCAAGTTATTTCCTTTAGTTTG +TGGCCAACGCCTTAAAAAATACAAATTAAAAAAAAAACTCTGACACCACG +TGATGATGCAATCAGACTCACGAACTCACGGGGTTCTGCCCTTCCTCATT +GAATTTTTCGCGCTCCATTGACAATCACCCGCCGTACAACACGTGGGAAA +GTCGTGCACTCCACAAGGACAAATACATTTAGTTTTACAACTAAAATCGA +GCCGCGACGCAACACGCAACGCGCCGTAAATCTACCCCAGATATGGCCAA +GCCGAAATGGCCTAGTTCGACAAACTCTTCCATTTCAATTTATGAGGGAA +GCCAGAAATCCGTGCGAACTGCGAGCCTAACGCTCTTTTTCAAAATATCG +TATAACTCGGAAAACAAGCCATATTCCCAGTTCCTGTTTTATTTCGAAAA +ACAGATAATACAAAAGAAATATCTGAAAATTAAGAATAACATGTTTTTAT +TAAATACTTCTTTGTCTATATTCCATATAAAAAGAACCCCATGCGAATGA +AACACTCACTATACATTCAAAATGAATTTCGCCATCTACCTTCTCGCTTT +CATCGCCTGTCTCGCTAACTTTGGATTTTCACAGGGTAATCCAGGACTCT +CATTTGATCCAGAAGAAGATTCAGTAAATATTGTAAGTTTTCGGATTAAT +GGTTACAACATCTATAAGATTGTTTAGAATGGCAAAGTGCCAGTGGGAGC +AGTGAAGACGTTGGTTAATGATCTTGTGAGCCCTACTATTGCAATTGTGG +AGAAAACATTATCCTCATTGTCAGTTTAAGGAGCTACGGGAGGGCTCCAT +CCATCTCAAATGTTTCAAATAACTCATATTGTTCTTTTATCTGGGAGTAA +ATAAATTACATATTGGGAATCCTTTTCTTTTTGTTGTTAACCAAGTTGCC +GGTTGCCAAATTTTTTTGGGTTCGGCAACTTCGGCAACTGCCGGTATCAA +TGTCAAACATTTTGAAGCTTAGAAAAAAGCATTCAGAGAGCCTGAAAAAT +TCTATAATTTCACTTTAAACGAAAAAAATACCATACTTTTTTACCAAAAC +GCTCAAAAATATAATTTCAACAAAAAAACATTGAAACCTGGTTGCCGAAT +TTTTTTGGGTTCGGCAATTGCCGGTTGCCGTTAACCTCGGTTGCCGCACA +GCCCTGATGTGGAACTCTGCAGGCCTAGCGGCATCCATGTGAAGGAAGCG +TATTGTATTTATTTTTAAACATAAATTAAAAAATCGATAAAACCTTCATA +TATATAAATTGTTTAGACCGGTGAATAAAATGAGACGATGCCACGCGAGT +TGGCGAACGCTTTTCTCCGAAGATCCAAGTCAGGACGTCGTCGATGGGCG +GGACCCGGTGGGCGGAGCTTAGTCGGATAGCGCAGCAGACAAAATACGGG +CGGTGCGTCGAGATCAAGTGCGTACTCCTCGACGGAAAGGGAGCTTGACG +CGTGGATTGCGTTCTCGTCGGATGAGGAAAGATCTTCTGAGGAGGCGACC +GGAGCCAGTGGTGGGAGCTCTTGTAAAAGGCGTTCTCTGAAACAAATAGT +GGTGTTAAGGGGAAATGCGATGGGAAATCCGAGTAGTTGATGTCCGCGGA +ATTAATTTTCTCGGGGTTTCATGGAAATTTAATTCCGCGGATTTCAACTT +GCCGAGTTTTTATTCGATTTTATGAAAAAAATTACACGAGAAATTGTTTT +TTTTAATTAAAGTATCCGAAATCCCCGGAAACAGATTTCCATAGAGAGAG +GTCACTAGAGGTTTCATGGAAATATGTATTCAAGGTCTTAAACTACCCAA +TTTTTTTACCTCGATTTTAACAAATTTCAAGTAAATTTGAATAATTCTAA +CCAGTTTTGTTGTTTTACACAAAAATTCGGGTTCTTTAATCCGCGGAAGT +AGCTTTCCAGAGGTTTCATGGAAATATGTTTCCGGAGATTTCAACTACCC +GAGTTTTTTATCTCGATTTCACACATAATCGGCATAAGAAACTACAATTA +TTTGTTTTCGGGGAAACTTTAAAAAACCCCAGAAAGTTGAATTTCTAGAA +ATTAGCTTTCCCGAGGTTTCATGGAAATTTGTTTTCGCGGATTTCAACTA +CCCGAGTGTTTTACATCAATTTCATCATAATTGACATAAGAAACTACAAT +TATTTGTTTTGGGCGAAAGCTAAAAAATCCCCGGAAATCTGGATTTCTAG +AAACTAGCTTTCCCGAGGTTCATGGAAATTTGTTTCCGCGGATTTCAACT +ACCCGAAATTTTAATTCAATCTTTAAAAAACATCCAGTTTTTGTGACATG +TTCTTCAGTTAAAAAAAATCAGAGTGTTGGAATCCTAAATTTCTAAATTT +TCATAGTTTCCGCGGATCGAATTTCTAGAAATTACCTTTCCCGAGGTTTC +ATGGAAATTAGTTTCCGCGGATTTCAACTTCCTGAAGTCTTCATTGGATT +TCAATATAATTCGCACAATAAAAAAAGCCAAAACGCGAGGATCCCTGGTA +GTTGAAATCCGCAAGAATAGTTTTCTCTAGTTTGAAAAATTTACTGAAAC +CCTTACTTTTGCTTTCCCAAGAATTCGAGTTGTTCCTCGAACAATTGCTT +TGCAGTCCCAATATTCGGCGAAAATTTCGATGGTTCACTATTCCGACGGT +GCTCTTCAATTTTTCGCAAGTTTGCTGATACCTGAAAAATCGATATTTCA +GACTATCTGTGCCAAAAACTAATTTATTAATTTACCTTCTGACTAGCATC +CTCCAGCTCAACAATGTTATGAATGGTGGCTTTATACTGTTTCAGCTTCT +CCTCAATTAAAGTCTGAACACAACCCGTTTGCCGTGCCACGTCATTCAAC +TGCTCCTGAGCATGAGCACTATCTTGTCGGCGGAGCACACATGGCCCGAA +GATCAGAGCCAAGTTGTTGCAGCCCATTTTGTTGACGGCTTCCTGGTCGG +CGACACGTGCCAAATGATACATTAGGCGATCGAGCACGGCGCGGTTCGGT +TTTGGGAGAAGCTCGATCATTACGGAAAGACAACGGACACGTTCACCCAT +ATCCTCCACTTCTGTAATTTTTTTTTTTGGACGAAAAAAAACAACAAAAA +ATATCATTTTGACAAAATGTTAACTTTCAGAAAATTTGAAAATTGATTTA +TTTTTTTTGTTTCAATTTTTAGGTTTATGAATGAATCCGATTTTTTTTTG +TCGAAATTTTTAATAAGAGCCCTAGTAGGCATGTGGTGTGTGTGTGTCTC +ACTTCGGCTTGATCTACGTAGATCTACAAAAAAATGCGGGAGAAGAGACT +CAGAGTTCTTAATGGATTTCGCATGGTTAACAATATGCTGACGTCACTTT +TCTGGGGAAACAATTTCCGCAATTTTTTGTAGATCAAACCGTATTGAGAC +AGCATCTCTGCCTGGCTGCGGACGCCTCGTTTTTCCCTTGGAATTTTTTT +GGCGCGAAATTCAACTTTTCAAGTAAGTGCGAATATTTTCAAGTGAGACT +GTTCAAAATTATTTTGCTGCCAAATTTGAGTTTGCAAATAAATTATTGAG +CAAAATTCAAATGTTCAAATATTTTCTTTCGAAACGTAATGAAAAAACTC +AAAGTTACAAATAATTTGGCGCGAAATTTGAGTTTTCAAATATTTCTAGT +GCAAAATTTGAATTTTTCGAATAATTCTGGTCCAGAATTCAAAAATTAAA +AAAAAATTAGTTTGAAGCGATGCTCAGGTTTTCATAGAGCTTCCATGTAG +GCGTTAAAACGCCTGCCTGCCTGCCTTATGCCGCGCCTAATGCTTGAATA +TACGTAAACTTTTTTTCATTTCTTAATTTGATTTTCAACAAGTTGATAGA +ATTGAGAAAATTATAAATTTGTAAATTTACTTAAAAATGCAAAAAAATAG +GCAGGGTTCAGGCAGGTGCCAGGCCCTGAAACAGCGCCTGCCTACCATGA +AAGCCTATCGTAAAAAATCCAAATTTAATTTTTAATTTTACTAAAATTTC +GATGGCTCGTTTTTTTTCAAACATCACAAACCTGAAACATTCAAAAAATT +TTCATAGAGATCAAAAATGATAATTGGTTCGGCGAGTTCTCGGAAGAACG +CCTTGACAAGTGTAGTTAGCACATGAACTCCAATATCCTCAAGATTCACA +GAATCCGCATCTGAAACTCCGAATTTGATTTTTTTTTTTTCAATTTTTCT +GCAGTTCAACAATTACCTGCAGTCGATTCAATCACTTTTCTGATACTTCG +AACTTGGGGTAGAGATCCACTTTTTCTGAAATTACGGGCGCAAAAATTTC +AATTTCAAAGACCTTCAGGTATTTCCGTACCTATAAACTCCTTCCACAAA +CAGGGCACGTGTCTCAATTGCAAAAAAGAGTCGGTCCAAGAGTGTTGGCA +CAGTATGATCATCATCGACAATTGATACTAAAGAGGCGCCAAAGAATCTT +CCACCATTTGTCTGAAATTCAATATTTATAATAGCATTATAATAGCATAT +TTGGCTGTAGTTTGTAGCAGGGGTCAGGCAAATTGCCGATGTTGCAAATT +TCCGGCAAATCGGTAAACCGGCAAACTGCCGGTTTGCCGAATTTGCCTAA +AAATTTTTGGCAACTTTTTTTTGGAATTTTCAGAATTTCCATTTTAATCG +GCAAAATTTTACGCATCGTATGAATTTTCTTACATTTATTTTGAAAAGTA +AGCAAATTCTATGAAAACATCTGAAGTAAACGAGAAAAAAAAATTCAAAA +AGGCACAGTTTTAAGTGTTTCCGTCTTATAAAAAATCCCTCTAAACATTT +CCGGCAAATCTGATATCCGGCAAATTGCCGGAATTGAAAATTTCCGGCAA +ATTTTGATTCTGAATTTTTCAAAAAAACTGTTGGCAAAATACAAATTTTA +TTTTATTTTCAAATATGTTTTTTTTTTCAGAAAAGTAAATTTTTTGCCGG +GAATCAAATAATTCTTGAAAATGTTTCAAGATTTTTTTGCCACGAAATCA +GACGTTTTCAGAACAGTTTTAGCGGAAAGTTAAAAAAAAATTTCAAAATT +TTTGGCAGGAAGTAAATATTTTTCAGAACATTTATCCGAACAGTTTTGGC +TGTAAATTCAAATGTTTAGTTTTCAACAACTCTTAGCGGAGAATCCAATA +TTTTTCAGAATTTTGGAAATTAAAAATTTATCGAAAATTAATTGCAATTT +CTAAGAAAGCAGTTGTAATTTTTTTTTTCAAAAAGCTACCGAAAAAATCT +GGCAAATTTGGCAAATTTGCCGAGCTGAGCAAACCCGGAAAAACGCAATC +ATTTTCTATACTTTTTGGATCATCAAAATTACTGAATTCCTACAATTAGT +ACACGTCAGATTTTTCTATAAGCTCCGTTTGGTCTACTTGTATAAAGCTC +GAAATATCTCAATGTTATGTTATTTTACTAGATTTTTTTGGAACAAAATC +AATAATTTCGGTTAAATTATATTGTTCATTTTGTGGTGTGTGCGTGAAAT +TTCAAAATTTGCCGAGCTCGGCAAATTCGGCAAATTGCCGCACACCCCTG +TATTACTCACCTTCGGATCAATCGCCTTTCCAGTCATCTGACACGGATGT +GTGACCTTCGGCTGACACTTTTTATGACACGATATCCGGCACGCAACACA +AGTGTACAGCTTCTCATGGTGCCATATTAACTGATTACACACTTCACAAT +AAGTCGGCACATGGACTGCTTCGGCACGGAATCGGTGACCAGCATGTACC +TGGAATATTGGAATTGCGTGGGCAGGGGAAACTAGTTTCATTGAAAATTT +TTTTTTAACTGCTGAAATTAGTCCAAAATGACATGTTATATGTACTTTTC +TCAAAATGTGTGAAAATTGCACACTGAATTAATAAATAATATTATTTTTG +TAAAAACATAAAAAGGGGAAAAGTCAGCGGTAAAAAAGTAAAAACACTCA +GAGTTAGATCTTTTGACCTCCAAAATCCATGAAAATTTGCTTCATTTTTT +TCATAGCTGTGACGTCGTGGGCGGCAATTTCCGTTCGGTAAATTTATTTT +CGGCAAATACGGCAAATTGCCGATTTGCAGTTTGCCGGATATCAATTTGC +CGGAAGTTTTAAAAGGGATTTTTTAAAAATTCCAAAACACTTAAAACTAT +ACTTTTTTGAAATGTATTTTTACTATTTTCTCTAGATATTTTCATAGAAT +TTACTTACTGTTCAAGCCCACAGCTTGCCGAAAATTTCCGGCAAACTGTC +GGTTTGCCAAAATTGCCGAAAATTTAAGAAACGGCAATTTGTCGGTTTTG +CCGTTTTCTAAAAAAATTTAAATTTCGGCAATTACTGTATTTGCCGTTTG +CCGAAAAACTAAATTTGCCGCCCATCGTCACACAATACAAAAAAATTGCG +CGGCTTCTGGAGATCAAATCTGACAGCTTACTGTATTTATAACTACAAAT +TCACGCTGTGTTTTTCTCTAATTCACGTGGTGTCAGGGTGTCTCACTTCG +GCTTGATCTACGTATATCTACAAAAAATGCGGGTGAAGAGACGCAAGAAT +GTGCTAACGTCACTTTTTTTTTGAAAAAAAAAAAATTCCCGCATTTTTTG +TAGATCAAACCGAACCGTAATGGGACATCCTGGCGCCACGTGCTAATAGT +AAGTCACTACATTCTTACCGCCGCTACATCTGATTTTACACGCTTCTTTC +CAACTTTCTGAATACCAATATTTTACACAATTTTCTCGGATTTTGATATA +TAATACTTTCGCGGGCTTCTGGCTTCCCTCATATTTTCAAATGTAAGAGT +TTACCGAATTAGGTCATTTGGGCTAGGCTACGGCGCGTTGCGGCTCGGTT +TTCATTGCAAAACTAAATGTATCTGTCCGATTGTCATTGGAGCGCGAAAG +GAAAGCTAGAAGCCCATGTATTAAAAAATCGACTTACCTTTATCATTGAT +GACTTCTCCTTCCCGCGCTTCTTTTTCGATTGCACATGAACGTATTCGTT +CAGGAAACCTCTGGAATTGAAAAAAATTGTTTTGTTACAAACGCGCCCTA +ATAACAATTGATCGTGTATTCCACGTTGGAAATTATACAAAATACAATGG +AGCACTTTTGCTCGAAGCCTAACATTTTCTAAATGTTTTCTCACCTAAAA +GCGTTAACGCCAAGTGTTGTCGGAAAGGACACACTCTCCTCGAGACAGAC +TTTCGTTAAAAGTCCCTCGAACATTGTGATGACGTCACGGTATTTAAGCA +CGGATTGCTCGACCTGAAATGATATGTTGCTAATTACACTTGCGTGGCCG +TGAACCCCAAAAACTAGGCCACCTTTTGCGCCCAATTACCGATAAAACCG +CCTCATATCCCAACAGCTCCATATGGAAAGCTCTTAGCGACTTTTTGAAG +ACCGCATCCACTACAGTATCCCTTTTGGCATTACTCTCGTTTAGATGACG +TGTCTTGTTGAAAATAAAAATACTGAATTGTCGGAGATCGTCGGTGGATT +CTACAAGCATGTTTCCAGGGTCGATCTGTAAGTATTGTTTAAGTACAAAG +TACTACTGCTTCAGTAAGCATTGTAGTTTATAGTTTGTGGTTTGTGTTTT +GTAGTTTGTGGAGTTTTTAGTATGTAGTGTGAAGCTTGATATTCATTTTG +CAGTACATTGACTTGGATTGGGGTGTGCTAAGATATAATATCAAAGTACA +AACAACGAGTGACAAATTAAACATCTCAATCCGAAAACTACAAACTAAAC +CCTACAGACTACAAACTACAAACTATATCCTACAGACTACAAACTACAAA +CTACAAATCACAAACTACAACCACGCAAAAGAACGTTACCTACCTGTTGC +TTGGTTTTTGTTAAACTGAACGTGCCGGTCGATGGAATTGGTGAGGAGGA +TTGCCGCTGCTTGTGGGGCCTGGAGTTTCAATTTTTTTTTGTTTCAATTA +AGATTTTCCTCAAATGTTCCTTACATCAAATCCCATGGCCGTTTCGGTGA +ATGCAGCCGAGTTGGTGATCGAACTGGTGACGTGGGATTGCACGGGATGG +AGGAGGCGAAGACGAAGCATTCCTTGTGCAGGCACATTTTGCATCCTGGA +AGAGTTCAAATTACGGCTTCTAGGTCATGGTGGCCGTGGCCGAGGCTTGA +AGGGATGCTAGGTCAAGTCTGACGTCACATGTGGTTTTTCTAGGACACGT +TACAAAGATTTCGAAGCGCCCAATTTTCGGAGAGGTTTCTAGGCCATGAA +AATGTTCTAAATTTCAGGTTTCTAGGCCAGTAGAGAATTTTGAGGTTTCT +AGGCCAGTAGAGAATTTTAGGCCATATCTGACGTCACACTTTTTGGGACG +CGCCTAATAACGATGGTGGACCTCTATTCTAGGAGGAGCAACACATTTTT +TACGAATTTCTAGAAGATGCAACTTCTGCTACTAGGCCACTCATGTGGTG +CGAGGCTGTCCCATTACGGTTTGATCTACAAAAATGCGGGAACTTTTTGC +CCAAAAAAATGTGACGCCGGCACGTTCTTAAACTTGCAAAATCAGTTGAA +AACTCTGCGTCTCTTCTCCCGCATTTTTTGTAGATACGTAGATCAAACCA +AAATGGGACCACACACACACACAAAACCCACTCTGACACTTGTGCGCCTG +TGAGATGAAGTTCGTCAGGTGCCGATTGCAAAGAGCACATATCTCCTGCC +GATGAAGTCGTGACATCTTGAATTGATGGTCTTTTGCGACGAACACGTCC +GGCGTCAGTTCACTGGCAAAACTTGGTCCCGGAGACGGTTCATCCTCGTC +CTCGTTCCTGCCAAAAAGTGCCTTCAAATTCTTTTTTGCCTTCTGGAATC +CCAACTTTCCAGCGCCTCCTTTCTTTTTGGAATGTGTTGGGCTTAGCTGA +AAATTCAAAAGTTTTTAATATTATAAGTAGAACCCTTACCGTGAACGAGT +CATCACATCTTGACATTGAGAAATTGTTGGATTCGGTACTATTAGCCCTC +CGTAACATTTTTAATTTGGTGGTTGAAGCCGTAGGGGCCAGAGATTGCCG +TCTATGATGGGATAGATCAGCTCCTTCAATGAGCTTCAACTTTGTGTCTT +CCAGGATAAATGTGGCGTCCACGTCGACGGCGTCATCGCTGAAAAAAGGT +TTGCGTGTTACGGTAAGCGAGAATTATGGCAGAGGTGGGCGGAAATTGCC +GTTCGGGAAATTTTTTGTCGAAAAATTGCCGGTTTGCCGAAAATTTGAAT +TCCGACACTTTGTCGGTTTGCCAGAAATTTTCAATTCCGGCAATCTGCCG +ATTTGCCGGAAAATTTCAATTTCAGCAAGTTGTCGATTTACCGGAAATTT +AAATTCCGACAATTTGCCAATTTGCCGGAAATTTCAATTCAGGCCATTTG +CCGATTTGTCGGAAATTTCAATTCCGGCAATTTGCCGATTTTCCGGAAAT +TTCATATCCGGCAATTTGCCGATTTGCTTGGAAAATATCAATTCCGGCAA +TTTGCGAATTTTCCAGAAAATTTCAATTCCGGCAATTTGCAGATTTGCCT +GGAAATTTTCAATTCCGGCAATTTGCCGATTTACCGAAAATTTCAATTCG +GCAATTTGTGAATTTTCCAGAAAATTTCAATTCCGGCAATTTGCCGATTT +GCCTGGAAAATTTCAATTCCGGCAATTTACCGATTTACCGAAAATTTCAA +TTCGGCAATTTGCGAATTTTCCAGAAAATTTCAATTCCGGCAATTTGCAG +ATTTGCCTGGAAATTTTCAATTCCGGCAATTTGCCGATTTACCGAAAATT +TCAATTCGGCAATTTGTGAATTTTCCAGAAAATTTCAATTCCGGCAATTT +GCCGATTTGCCTGGAAAATTTCAATTCCGGCAATTTGCCGATTTACCGGA +AATCTCAATCCGGCAATTTGCGAATTTTCCAGAAAATTTCAATTCCGGCA +ATTTGCCGATTTACAGGAAATTTCAATTAAGGCAATTTGCCGCTTTACCG +GAAATTTCAATCCGGCAATTTGTGAATTTTCCAGAAAATTTCAATTCCGG +CAATTTGCCGATTTGCCTGGAAAATTTCAATTCCGGCAATTTGCCGATTT +GCCTGGAAAATTTAAATTCCGGCAATTTGCCGATTTGCCGAAAAATTGCA +ATTCTGGTACATTGCCGATTTCCCAGAAAATTTCAATTCCGGCAATTTGC +CGCTTTACCGGAAATTTAAATTCCAACAATTCTGGCACATTGCCGATTTG +CCAGAAAATTTCAATTCTGGCAATTTGCCGCTTTACCGGAAATTTAAATT +CCGGCCATTTGCCGATTTGCCAGAAAATTTCAATTCTGGCAATTTGCCGC +TTTACCGGAAATTTAAATTTCGGCAATTTGCCGATTTGCCGAAAATTTAA +ATTCCTAAGAAACAGTGGCCGAGATCAGTTTTGACGTCACTTCTCTTCCA +GAAAATGCACTAAAGTGATGTGACGTCATGACTGAGTTTCTAGGCCACGT +TTCGTTTGATAAAATTGGCGGAGGACCTAATTTCCAATGGAAAATAAATT +CAATTTTTTTAAATTTCATAATTTTCTGAAAAACTCACTCCTGAAAAGCA +TCACTCTCTTCCGAAATATCATCATCAAGCCCATCCTGCGTAGACTTATC +ACTATCCGAATCATCATCCGAAAGAGCAAAAGCCGCCAGAGAGCTCGGAT +CACTCAAGTCGAACTTTGTAAGATTCACAGCATGAACCTTCCGAACTCTA +AACGGTGACTTCTTAACGTTCAAGGCTCCATTGCCCTTTGGAATCTCGCC +GACTCGCTTGCGAACCGATGCTCCGCGATACGCAGACTGCAGAGCTATCA +CCGATTCTCTTATCGATAAATATTTCTGTCGTTGCTTGTACTTCTTCCAA +TTACACTGTATCGTTTGAGCTGCTAAGGCTTTTTTCCGAACTTCGGCTCT +TGCAAATGATCCTCGAATCAATGCTTGGATCTTGATAATACCCTCTTTCA +TTCTCAAATATCGCTTTCTAGCCAACATTGTCCTGAACCATCGTTGAAGT +GTATCGATTGATTGCATAATTGTCCGATGCAAATGATCATCGAGTATCAA +CTTTTCAGCGTCTCTCATAAAAATCTTATTGGTTCCATATTGAATATTAT +CATTATCAATCGATGCATGGGAATGTATGTACTCTTTCACATCGTCCACA +GTACTATCCCTTCCGTTTCGCAAAAGAATTCGGTATTGCTGGACAAAGCT +TGGATACTCAATTCTCACAGAGTACCCGGCTCTCCGAATTCGAACGGTCT +CGAGCATTCCGGTGTAGCGGAGCTGGCGGAGGATTATGTTGTCGTCGAAG +TGGTTGGCGATCTATAAAATTTTAGGTGAGTGGTTTTTGTCAATAATTTC +TACTTATATTCAGTCATATAAATAAAAACACATATAAGAAAAGTACCAAA +AATTTCGATCACCTAATTTTTTTAATTTTTTCTATACTCTTACTAGTATG +AAAGAGCAGGATACAAAAATAACTCCACAAGTGATAAACCACCTTTAATT +GGATTAGTGTGTATCACTTTGAGTAAGCTAATATTGAAGGTTTAAATGTG +GACTACCTAGCCTAGCAAGAGAGGTCCTGTAAGTAGACTTCAAAAATATA +GATTATCTTATGAACATATGAACTTATAAACAAACGTTGGTCAAAAAGTA +GGTGTTCAAAGATTCCACCAAAGTTTACAAACTCACCTTATCATTATTAC +TCTTAATACACCTTATAAAATACGGCGTCGCGTTGGCCAGTGTGCTCATC +AACCTCGTCAACGAGTACTCGAACTGCTTTGAAACCGACGATGGCTTCTT +GGCAAGCCTCCGTCCTCCGATAGCCTCCTTCAGAATTTGCAGCGATTTCA +GCAAGCTCATTTTGTCGTCAGACTTTCCCGCCGGGGTACGCCGTGCTTGA +TTTCTAATCGTGTTGAACATGCTGGTATCACAAAAGTCTGGAACTTCACA +GCGAAGTTCGCCACGTAGAAATGCGGAGAGAGCAGAGTCCGAGCCACGAC +GGGGAGACGTTACGATGGAGTCCATTACTCGGAGATGGCCGGCGGATTCT +GGAAATGTTGAATAATTATATTAAGGGCAGGATAGCTCAGTCGGTAGTGG +TGGCCGCTAGCAATCTGGAGGTCACGAGTTCAAGTCCGCCTTCAACCCCT +AGGTTCACCAAGCCTCTATTGGAAAATGGAGCAATCCACGACTGGATTAT +AAGCCACAGCCTTCGGCTAGGACGTGGCTTAAATTGTAGCCCAGTGGGAG +CACCACCAGGTAGTGTACCTGACTCCCAGATCCGCAGATACACTCAAAAT +AAAAAAACTGCGTGGCGTGTACTGCAGAAAACCTAATATTTAGGCCCCGC +CTTTTTCTCGTCCACTCACGGAGAAAAGGCAACAATTTTGAACCCAACCA +ATATCAGGCCGCCGACATCTCACGGGTTCCGCGCGCCGCTGCGTTTAACT +CGCTGTGGGCGTGGCGAGCTGTCTCCGCTTGCTGCGAGTTAAACATTGCG +GCGCGCGGAACCCGTGAGATGTCGGCGGCCTGATATTGGTTGTGGTCAAA +ATTTTTTGCCTTTTTTCCGTGAGTGGACGAGAAAAAGGCGGTGCCTAAAT +ATTAGGTTTTCTGCAGTACACGCCACGCAGTTTTATTATTTTGAGTGTAC +TTGAAGAACGGATCGTCCTTTAATCCTTTAATTATACATTGCATTTTTTT +TCCACTTCTACGACTTCGAAGGGGCGCATTTACGCGCGATGGTCCCGCCA +CCCACCCCAAAAATCAATGGGTGGCGCGTGCCGAGACCATCGCTCGTAAA +TGCGCCCTCCTTTAAAGTCGTAGAAGTGGAAAAAATGCACTGTAGAAATG +AAGAATACCATACTTTTTGTGTTAATTTTTTCTATAATATCGTAGTTGTC +CATTTATTTAAATCTTCCCACTTATTTAACTAAGGACTTCTAACGCTTAT +TCCAGTTATTGGTTCCGATTAAATCTACGAAAACTTGCCACTTTTCTGCA +GTTTCCTTGTCGATTGCTTGAACGCGTTCATCGCCCGAAACACACTTCTC +AGCACATTCCACCTGTGCACCGCCACCGGATCGATTCCCAGCAGGGTCTT +CATCACTGAGCTCGTACTGGACTTCAGAGCGTTCAACACATCCTGTCTCA +TTAGGTCCTTATTCTTTTCTCGGAAACCCTGAAATTTAGTTTTGACGTGA +TGATGGAAGACGTTTTTTTTGGAAGTTGTGTGTTTACCGTAATTTGATAC +TTGACTTTTCCAGCATAATGAGCCACGATGAATGCCGGCTCCTTCTTCTG +TGGGGTCTCATAGTATTCGTTGTTCTGGAAATTTAATCATGGAACTAGTC +ATGTAAAAACATTTGAAGGAGTTAAGGTATGTCCCGGAGGAGTGAACATC +TTAAGCCTGACCTAGAAGTTGATCCAAGTGGCAAGCCGACTGGCCTGAAT +CCTATTAACAGGCCTACATGTGAATATTAAAGGTGGATTAGCGCCAGTGG +GAATTTTGTCTAAATACACTTATAACGATGCCAAACGATCGAATATCATA +ATAAAACACTCCAAAAACTTTTAGATTTTTCATAATTTCCGGTCAAAGTT +TTGGAACATTGACAAAATTTTGACAAATTTGAGCTTTTGAGGACATCAAA +ATCAATGTAACATTTTTTTGGACCCCTACAATGTCTTTTTACGAAAAATT +AAAACAAAATTATAGTATAACAAATAGAGAAAAAAATTTTTTTTAGTCGA +ATTTGTGTCACTTTTTGACTGTAAATAAAAAAATTTTGAAAAATTTTTGA +AAAGTTTTATCATGAGATTTGGTCATTTTGGGAGCATAGGAGTGGTTTTT +AACAATTTCCACCACCGGCGCTACTCCACCTTTAATCGATAATCGATTGC +AATTTTTTCATACCTTCAAAAACTGATTTAACTTCGCCAGCATTGAGTCA +TCTGTTCCATTATTGATATTACTCTCCTCATCGACTAGCCTGAGAATTCC +ATATGGTTTACTCTGAAAATTCATCGGAATTTATATATATGGATATATAT +ATATATATATATATATATATATATATATATGTATATATATACATGTATAT +ATATATATATATACAGTGTGGGAAAGTTCTATAGGACCCCCCCTAATTTG +AAGGTTTGAGGAACTTCCGAAAATTTTTTTGAAAAACTGCTAATGCCATT +CGTTTTTAAATTGAAAAAAACCTATATACATTTTTTTCCAGAAGTTTATC +TCAAAAACTGAGGTCGCGCTGGAAAAAACGTCAAAATCCAGTGTGAAACT +TCTATAGGACCCCCCGTTTTTTTTCACGATTTTTACTAAAATCAACAGAT +TTTGGAATTTTTGACAAAGCTCAAATCAAGTTTGAGTTAGAAATGAGTTC +AGATAAGCAGTTTTGACTTTAAAAAATTAATACGAAATGTTCTCGTGGGA +TCTCCAGACTGGTTCTGATTCTTCCGAACTTTGATGTTCAAGTCTGTTTC +AAGCTTCCTGGTGCTCTCGGTAATGCCAAAACTTGATAAACTCTCTTTAA +CAAGTTCCTACTAAAATTCCTAGCACACACACCATAAAAATTTTTACGCC +ATCCCCAAGAAACCAGTCAGAAACAGCGTATTAACAAGTTGCAGTTATTT +TTGATCAACAACAGAACATTCATATACTAAAATCAAGAAAGGATCAATAG +TTAATCGGGTTTCCTTGTGTGCGGATGATCTCAAACAGTCTGTCCTCCAT +TGATCTGACCAAACTTTTCAGCTGGTTGTCCGGAATAGACTTCCAAGCGT +CGAGAATTCCTTGCTTCAACGATGCAACTGTTGGGTAAGTCTTGTTCTGA +GCATACACGATACGGACAAGAATCCCCCACAAATTTTCGATTGGATTGAG +ATCAGGACTTCGAGCTGGCCAATCAAGAAGGTTGATCTTCTTGAGCTTGA +AATAGTCGCGGGTTGAGTTGCTCACATGGATTGTCGCATTATCCTGCTGA +AATCTAAAGTCTTTTCTGGAGTAGTGACGAAGATATTTGGAGAGCTCCAG +TTCCAAGACGTTCTGATAGTCAGTGCTGTTCATCTTGCTACTGACGAACT +GTATCTCAAGCTTCTTCTTCTCCGTGAACGCTCCCCAAACCATCACCGTT +CCTCCTCCAAAATTACGTCTCGAAAAAACCATTGGTTCCTTGCGCAAATC +GCGCCAATAGTAGCGGCAACCGTCAGGCCCATCGAGATTGAATTTCTTTT +CATCGGAGAAGACAACCTAAAACAATGATCCTAATTATTCACTCTTGCTT +TTTTAAATTCTCACTTTACTCCAATTCGTTCCCATATTGTTCTTAGCAAA +TTCCAATCGCTTGAGTTTATGGTCTGCAGAGAGTAACGGAGCAGGGCGAA +GTTTCTGACGAACGATTACACCAGATCGTTTGATGACATTGAGGATGGTC +CTTTTTGAAGCAGACAATTGAAGCTCATTGCGAATATCTCTTGCCGTCTT +ACAGGAGTTGGAGGCAGCACGAATCACATTTCGTTCGTCACGCACGGAGA +GAGCTTTGCGACGAGGAGCTCTTTTAGATGTACCGTAGCTCACCGGATCC +TTCAGATACTCGCGAATACAGTGTCGAGAACGGGAAATTTTCCTACTCAT +TTCATGCAGGGACACATTGAGCAATTTCATAACATCCAGCTGAGCGCGTT +CAGTGTCCGAAAGGGCAGATCCTCGAGGCATTGCAAGTTAGACTGCTTTC +GAAGTAAGCTTTCCAGCCTCTATATGTGTGCCACAACACATGCCACAATT +CCACATTTAATAATTCACGCAAAAAATAGTAAATAACATCTGTGAGGGAC +AATTTAACTTGAAATATTGGTCCCATGGAACCTTGTAATCAAAGAAAAAC +GATTTGATTCCTGATAAGCCTTCCATTGTTTCCTGCTGCATATTTTGCCA +AATCAGCTTGACTACACAGTCGAAACATCTAAAGTGCGTGCTAGGAATTT +TAGTAGGAACTTGTTAAAGAGAGTTTATCAAGTTTTGGCATTACCGAGAG +CACCAGGAAGCTTGAAACAGACTTGAACATCAAAGATCGGAAGAATCAGA +ACCAGTCTGGAGATCCCACGAGAACATTTCGTATTAATTTTTTAAAGTCA +AAACTGCTTATCTGAACTCATTTCTAACTCAAACTTGATTTGAGCTTTGT +CAAAAATTCCAAAATCTGTTGATTTTAGTAAAAATCGTGAAAAAAAACGG +GGGGTCCTATAGAAGTTTCACACTGGATTTTGACGTTTTTTCCAGCGCGA +CCTCAGTTTTTGAGATAAACTTCTGGAAAAAAATGTATATAGGTTTTTTT +CAATTTAAAAACGAATGGCATTAGCAGTTTTTCAAAAAAATTTTCGGAAG +TTCCTCAAACCTTCAAATTAGGGGGGGTCCTATAGAACTTTCCCACACTG +TATATATATATATATATATATATATATATATATATATACATTTGAAACCC +AAAAAGGACTTATTGGACTTACCTGAAACAGCTGCACACACTCCGTATTA +TCAGTATACTCGATATTGGTCCAAGAAATGCCCTCCTTCAAGTACTCCTC +TTGTTCAAATTGGAAAATATGTTGATTGAAATAGGACTGAAGTTTCTCAT +TGGCATAGTTGATACATAACTGTTCAAAGCTGTTGCATTGAGACCCAACG +TCTTCAAATCCGAATATATCCAAGATACCTGAAAATTACTTCAATTTTTA +ACAATTGATAATTAATCTAACCTATATAATATCCCTTCCCAGCTGAGAAG +TCCTTCTTCAGCAATGCCTGATTAATCCTGAGGACTATATAGTGAAACAA +AGAGTTGTATATGCACTTTGCCATGGCATCTCGGGTGTTGGTGGCCTGAA +ATTAATTTTTTTATCTTAAATTCTTAATTTCACTTCTGTATATATATATA +TATTACCTCAGAGACACTATATCGCAAAACTACAGTTTCTGTCTTCATAA +CATGTCGTTTCATAGTGAGAGCCTGCATGAGAGTGTCGGTTTTGATGTGA +AGCAGATTTGCGACTAAATCCACGACTTCTTCATTTTCAATATACCCACT +TTCATCGCTATGGTAGCCATGTCTCTGGAAAATTATAGGAAAAATAAAAA +AAATTTGATGATGTTATTATATTAGGAATCCTCTAAACAAGGGTTTTCAG +GTTTTTTGGTTTTACGATTATTCAAATTTTTATGGAAATTATTATAATTC +AAATAATTTTCGTCACTATTATAGCCATGTTGTTGAAATAATATGCTATT +TTTAAAGGTGGAGCACCGAAATCTGGAAACTATTTTTAAATGAGTCCAAA +TTTTCCCCTGATTCCGAATATCTATGCGAAAAAATTTTAAAAAATTCATT +TATCAAATTTTTCAAATTTTTCAAACGCCGAATTTTCGCGCCAGAAACGC +CGTGTGCACGTGGTATCAGAGTGTCTCATTTTGGCTTGATCTACGCAGAT +CTACAAAAAATGCGGGAGAAGAGACGCAGAGTTCTCAACTGAGTTCGCAT +GGTTAAAAATGTGCTGACATCACATTTTTTTGGCATTTTTTGTAGATCAA +ACCGTATTGGGAGAGCCTGGCACCACGTGAGTAGGCAGGCATTTTTTGCC +TGACTGCTTGCGGGTGCCTCGTTTTTTCCTCAGAATTTTTTGGCGCGAAA +TTCAAATTTTCAAATAATTTTGAATATTTTCGAGTGAAGCTTTAAAAAAT +TGTTTGTGCCAAATTTGAGTTTGCAAATAAATTATCAAAAAATGTGCTGA +CGTCACGTTTTTGAGCAAAAAAATCCCGCATTGTTTGTAGATCAAACCGT +AATAGGACAGCCTGGCACCACGTGCGTGTGTCGATTTACGAGATTTGTGT +ATATTTACGAAATTTTTTATCTTTATCAGACTCTGATTTCCGTTTCTCAA +CGAGTTTTCTTCATTTTTGTATGTGATTTTTTTAAGGTCCATTAAATATT +TTATCATCAAAACCCTAGCAAAATTTTTGGTTTTCAGTCGGTAAAATCAT +ATAGGTAATTGGATTTTCCGTCAAGTTGTGACCTAATAGTAAGCATTGGA +AATTTTGCCAGTTTTAAATAATTAATTGACCCTAAAACACACATTTCAAA +AAAAAACCGATAAAAATTAAAAATTCCGTAAATATACACAAATCTCGTAA +ATCGACACATGGCTCTAGCGCGAAAATTCAAAAACTCTTGGGTGTACAAA +TAGCGATTTCAAGCTCAAATATAAAATCGTGGAAATTTTTTAAAAATTTT +TTTCACATAGGTATTCGGAGTCAGGGGAAATTTGGAGTCAATTAAAAATA +TTTTCCGGATTTCGGTGCTCCGTAGAACTCTCTGAATAAAATTTTAAAAC +TATGGCAATAGGCAATTATTCGGTAGTTAACTTCTGAGTTGTATTTCTTT +TTGAATCTAAACCGTCCCAATCATTTACCTTGATATATGTGATATTGCCA +AGAAGCAACACTGCCGAAATAATCCCGAAGATGGTCTGCTGGGTCTTTGC +ACAAAATCCGACGGAGCTCATTGCATGCCTCAACCTATCAAACTCATTCC +GCTCGTTCACACCTTCCAGTGCAAATGGCTCGTTCTGGAAATTTTTAATT +TTTTTTTAATTTTTTAAAAAATCGCTTTGAGGTCAGCGAATGTATCCCAT +ATTCGCTCCAAACTTTTCCCAGCTCACCTGATTCAAATACTTATAATCAT +GTGGCTTCAACAAGAAATATTTCTTCCTTTCTTCCTCGTCTGCACCCTCC +AGAAGGTAGTAGAATACGTGGTAGTTTCTGAAAAAAATATTAAAGTCCAA +GTGAGACACCAAGGGAGTTTATGTACCGTTCTCCTTTAGTTTGAAATATG +ATGCGTGATTTTTCCAGCAGGTAGATCTCCACGTTTGCTCTGAAAAGTAC +AAGTATAAGGCATTTTTTGTGGTATAGACATCAGGAGAAGGCTTCAAATT +TTACGTTCAGGTCAATTTAGGTCATAGTGCCTAGTAAGTATGGTACCTAT +AAGCTTGATTACTGCCAAAGTAAAAATTTTCCAATTCCGGCAATTTGCCG +ATTTGCCAGGCATTCCAATTCCGGCAATTTGCCGATTTGCCAGGCATTCC +AATTCCGGCAATTTGCCGGTTTGCCGGATTGCCGGAAATTTAAATTCCGG +CAATTTGCCAAATTTTTTTTCATTTTCGGCAATATACTGGATTGCCAAAA +TGTTACGGCAACTTGCCAATTTGCAGGAACTTTTCATTTTCGGCAAATTG +CCGGTTTGCCGATATCCCAGAAATTTTCAGGTCCGGCAAGTTGTTGATTT +GCCGATTTTCCCCGGAAATTTCAGTTCCGGCAACTTGCCGATTTGCCGGA +AATTTTCAAATCCGGCAATCGCGGGAAATTTCAATTCCGTCAAATTACCA +ACATGCCGGATATTCCAATTCCGGCAATTTGCCAATTTGCCGAGCATTTA +AATTCCGGCAATTTGCCGAATTGCTGAAAATATCAATTCTGTCAAATTGG +CAATTCGCCGGACATTCCATATCCGACAATTTGCCGATTTGCCGATTTCC +CAGAAGTTTTCAATTCCGTCAAATTTCTAATTTGCCGGAAATTTCAATTT +TACTAAGTCGCCTGCTAGCCTCCTAATCTCACCCGGAAACCATTCCATTC +TCCCGATAGTTGATTTTGATGAATTTTCCAAATCTGCTACTATTATTGTT +CGTTAATGTGACAGCATTTCCAAAAGCCTCTAGAACAGGACCAGCTGACA +GAAGTGTCTGCTCAGTACTGCAGCCTGTTGAACCTTTCTGTGATAGCGAT +ATCAAATGGGACATCAGATGATTCGTGGATTCCGTTTTTCCCGAACCACT +TTCCCCGGAGATTACCACGCATTGATTTTCCTTGATACGGAGCATGCTGA +AAAAATTGTTTTTTTAAAAATTAGTAGTGACATTTAGTTAATCCAGCATC +AGAATGTATATCTCTGAAATTTTTTTTTTTTGAAATCAAAAGTGCTTTGA +GGGTCTTGAAACGAACACATAACTGCAAAAAAATTGGTGGCCGAGTTTAT +AATCGGCCACTTGGCAAATATACAAGCCAATACACAAAGTGAGCACCTAC +GTGGCCGAGTGAAGAGAAAACTCGGCCACCGATTTTTTTTGGTCACTTGA +AATTTCAAAAAATAGTAACTGGGTTCCGGTAAGAACTTAGAACTCACTTG +TGGTAACAGACATCGGCGATTGCAAAAATGTGCGGTGGTAGTGAGCCGAG +CCGCTTACTTTGAAAGTATAGCCTAGCATATTTTGGGTTGTAGATCGGAA +AAAAGCAAAACGGGTTCACGGCTACTAGAATTGGACCGATGTACGTGTAA +ATGTGCCCATTCGCGAATCTTTCCCGCAAATTGTCCAAGAGGGTTTGCTC +CGTGAGCTCTGGCAAGTTACAGAGATCGGCGTACTCGCGGTCCTGAAAAA +TTTTAAGGTTTAAAATTTTTTTAAATATAAAAATATTTAAAGGTGGAGTA +CAGGAAATCTTTTTTAATTACTCCAAATTTTCCACTGATTCCGAATATCT +AAGTAAAAAATTTTGAAAAAATTTCCCTGATTTTGTATTTAAGCTTGAAA +TTACGATTTTCATTTGTGCCCGCACCACTTTTTTCAAATACGCCCAAAGA +AATTCGCATTGGAGCGCGTTTGCATCGTTTGATTTTCGTCGTTTATTTTA +TTTATTTTCCACGGGGTTCTGGCCTTCTTCATTGAATTTTCGCGCTTCAT +TGACAATCGCCTGCCGGACAACACATGGAAAAGTGTCGTGTACTCCACAC +GGACAAATACATCAGTTTTACAACTAAAGACAAGCCGCGACGCGACACGC +AATGCGCCGTAAGTCTACACAAAATCTCTCCGACCCAAAATGGCCTAGTT +CGGCAAACTCTGCCATTTCGATTTATGAGGGAAGCCAGAATTTCGTGATT +TTCGCCGAATTCAATTTTCTTGAACCAGTTTCAATCATTTTTGTCGAGTT +TTTGCTAGTTTTTTTTTTGACAAAAAGGAATGAAACTGGTTAAAAAAACT +GAATTTGGCAAAAAATAAATAAAATCTTTAAATGAAGGAAATCAAAGCGC +GCTCCAATGCGAATTTATTTGGGTGCGCATTTGACAAAAACACGTGGTGT +CAGGCAAGGGTGTCAATGTCCCGTAAAAATTTCGAAAACGGGACAACGGG +AATTCCCGTTCCCGTGAAAATTTTAAAAACGGGAAAACGGGAATTCCCGT +TCCCGTGAAAACTTCAAAAACGGGACAACGGGAATTCCCGTTTTTTTGAA +AACACTCAAAAAACGGGACAAACTGGGCGGTTTAAAAAAAAGAGTGCATA +GTTGGCGTAAATTTTAGTGTATTCGAGGCAAATTATAAAAAAAAACTGCA +TTATAAACATTTTTATCGATTTTTTCAATATTAAACAGATATTTAAAAAG +CCAAAAAATGAGGGAAAATGTCCCGTTCCCGTGAAAATTTTAAAAACGGG +ACAACGGGAATTCCCGTTCCCGTGAAAACACCCCAAAAACGGGACAACGG +GAAAAACGGGAAACGGGAATTGACACCCTTGGTGTCAGGCTGTCTTATTT +CATTTTTATCTACAAAAACAGCGGGAATAGTTTTCCAGAAAAATTCTAAC +GTCTCTTAACCAGGCGAAATAAAATGAGAAGTCTGCGTCTCTTCACCCGC +ATTTTTTGAAAATCAAAAAAAGAGACCGGGCACAAATTAAAATCAGGGAA +AATTTTTTTAATTTTTTCACATCTAAATTCGTAATCGGGGGGCAAATTTG +GAGTGATTTAAAAATATTTCCCAGATTTCGGTACCCCACCTCAAAAAAAA +ATAATTAAAGAATTGCCTTACCTGTGGCTGAACCAAGAATTTCGTAAGAA +AAGCGTCAATCGATGAGGCGACCTCTGCACTTCCAAACCGAGTGCCGCCG +GTTGCCGCCCGATAACCGCGGTGTCGAAAAACAAATCGATTTTTTGGAGT +GGCACTGTCCACCACTACAGGCAGTCTTGACCAGATGGCCTGTACCGCGA +CTGGGTATTCCCCGGGGTCTAGGCGGCGTTCTTTATAGGTTTGACCGTCG +GGGGTGCCCATCATTTCGAAGAGGTCAAAGTCTTGTACCGACTGGCCTTC +TAGTTCTGGAAATTTTGACTTTAGAAAAGAAATCATTTTTTTCCTTTTTT +TTTTAAATTAGCCCGTGGCTTTTTTCAAAATTAAAAGAAAATTTGAAAAA +ACATTGCATTAGAGCGCGTTTGCATTCACTCCATGCAGTCCAATTTTGCT +GGGAGCTTGAGTGTCTCATTCGACTTGATCTACACAGATCTACTAAAAAT +GCGGGAGAATAAAATCCACTGATATTGTATGTTTAAGAATGTGCTGACGT +CACATATTTTTGTGCAAAAAATTCCCGCATTTTTTGTAGATCAAACCATG +ATGGGACAGCCTGGCACCACGTGAGATTGTGTCGACAAATTTTTGTTGAA +GATGTATGGCCAAAAACGGCACATTTAAGTAATTTATCAGTAGAGCGCGT +TTGAAATTTTTTTAAAAATTCTTTTTAAAAAGAGTTCCCACTACCCCAAG +TCCTCCGATTTTTCTAAAAACTAAGAGAAGGGAATCTACGACCCTGAGCC +TTTAAAACCTTACCAGCTCTCCCGGCGAGCACTTTTTCGATGAGCTCCTC +CGTCGTGCACCGCTTATGAACCTCCAAGTGTACCGTATCATTTTCACAGT +CGGGATTGAAGCTGTGCATATAGACGGCTACCGTATGTGTGTGTGCATCG +TCTGTCGCGTTGATTGTGTGAGCACGTCGGGGAAAACATCCCGTCGCTCC +AGCTGATATTGAGTCAAATGACATTTAGTTAATCCAGCATCGGGATGTAT +ACGTCTGTAAAAAAAAATTTTTTTTGAAAGTAAAAGTGCCTGGAGGATCA +TGCAACGAACACAAAAATTTCACAAGACCGCGAAAAAAATTGGTGGCCGA +GTTTTCTCTTTCGCGGTCACTTAGGAAATACTAAAATCGCACGGATTTCT +GGCTTCTCTCATAAATTGAAATGGAAGAGTTTGCCGAACTAGGCCAGTAT +AGCTCGGCCATATCTGGGGTATGTTTACGACTTTCCCACGCGTTGGGCGA +TTGTCAATGGAGCGCGAAAAACTCAATGAGAAAGGCCTGAACCCCGTGAG +ATCGAAATGAAAATTTGCTAAATGGCCGAAAATTGAAAACTCGGCCGCCA +ATTTTTCCACACGGCCACAGGAAAAATCGTAACAAAAATTGAGAGAAATT +TATTAGAAAAAATCTGAAAGTAATTTTTATTGCACCGTGGCCGCAAAATG +TAAAAACACGGCCACCGATTTTTTTAATGTGGGGAAAAAGTTGGTGGCCG +AGTTTTATATTTTGCGGCCACTTAGAAAATATTTAGATCGAAGGGAAATT +TTGTTGAAAATACGTTTAACGAAAACCAGTCAATTTTTGTTACATGACCG +AAAATTGAAAACTCGACCACGGATTTGTTATGCTTTATAAATTTTTGAAA +CTATTATTTTTTGATTCAGAAATTTTTGGAATTGAAAAAAACTCGGCCAC +ATTTTTTTTCTCACGGTCACAATTTTCTTTGTGATCGTAAAAATGGAAGC +TCGGCCATGGTGGCCGAGTTTTTTTCGCTGCCACTGGTAACACGTGGTTG +GGTGAAGAGAAAACTCGACCACCGGTTTTTTTTGGCCACTGAAATTTTTA +AATTTCAAAATATTTTAAATTTTGCTGCAATCTTTCACCACAACCGCGAA +AAAATGAAAACTCGGCCACCAATTTATTTATTTTCACTGGGAGAAAAAAT +ATTGTTAGCCAAGTTTTCTTTTAGGCAACAGAAAATCAAAAAATGAGCGA +TTTTTGTTACGTGGCCGTGAAAAGAGAAAACTCGGCCACCAATTATTTTT +AATTTGAGGCACTGTTGTAAAATCTTATAAAAAAACAGGTTTAGTACATT +GAACTCAAAAATAAGGAAGATTTTTGGAAAAACAAAAATTGCTGGCCGAC +AGCTGGAAAAAACTCGGCCACCAACTTTTTCCGGTCACACAGCAAAATCT +CTCGATTTTCGATGTTTGAATATTGAAATCTGGGTTGGATTTTTTAGTAT +CATTTCATTTCCGGTGGAGAAACGAGGGAGAGAAAAAGAGAAAAAAGATT +TGTAGTGTCCCGGGAAGCTCTCTTTCTCATGTGGGAAGAAAGTGGAAGAA +GCTTCTGCAGCAGCAGCAGCAGTGGTTCACGAAAGAGAGAGAGAGACATG +TTGGACATGAAAATGATGAAAAATAAGAAGATGAAGAAGAGAATTACACA +CTACCACCACAACACAGATTATTTTTATCACGTTTTGAGGGGGCGCTTCA +CACAATTTTCATGGCCTAGAAATTTAAATTTGGTGGCCTAGAAAATACTA +TATTAATTATATTAATTATTCTACGAAATTTCAATAATTTAAACGGGAAA +ATTTATTTTAAAAAAGCCTAGAAGAGGTTTTGAAATTGGTGGACTAGGAA +ATGAAAAATTGTGTTTTTGGAATTTTCTAGGCCATTTGAACTTCCTGACG +TCCACAATTTTCATGACTTCGAAATTTGAATTTGGTGGCCTAGAAAATCA +TAAAAACTAAGAAATCGAGAAAAAAGCATGGATGAAATTTTGAATTTTAT +AGGTCATTTGAAATTTGTGGCCTAAAAAATGAAAAATGATTTTTTTTTTT +TTGAATTTTCTAGGCCACTTGAATTTCCTGACGTCCACAATTTTCATAGC +CTAGAAATTTAAATGTGGTGGCCTAGAAAATAATTAAAACTAAGAAATCG +AGAAGAAAAGCGTGGAAGAAGTTTTGTATTTTCTAGGCCATTTGAATTTG +GTGGCCTAGGAAATGAAAAAAAGATTTTTTAGGAAATTTCTAGGCCACTT +GAAGTTGGTGGCCTAGGAAATTAAAATTTGTGTTTTTGGCATTTTCTAGG +CCATTTGGATTAGGTGGCCTAGGAATTGAAAAGTAGTTGTTTTGGAATTT +TCTAGGCCATTTGAATTTGGTGGCCTAGGAAATGAAAAAATATTTTTTAA +AGGAAATTTCTAGGCCATTGAAGTAGATGGCTTAGGAAATAAAAAAAGGT +TTTTTAAAGGAAATTTCTAGGCCATTGAATTAGGTGGCCTAGGAAATGAA +AAATGATTTTCTTAAAATTTTTTAAGCCACTTGAATTTGACGGCCTAGGA +AATGAAAAATGGTTTTTTGGTAATTTCTAGGCCACTTGAATTTGGTGACC +TGGGAAATGAAAAATGATTTTCTTGGAATTTTTTAGGCCACTTGAATTTG +ACGGCCTAGGAAATGAAAAATAGTTTTTTGGTAATTTCTAGGCCACTTGA +ATTTGGTGGCCTGGGAAATGAAAAATGATTTTCTTGGAATTTTTTAGGCC +ACTTGAATTTGACGGCCTAGGAAATGAAAAACAGTTGTTTTGGAATTTTT +TAGGCCACTTGAATTTGACGGCCTAGGAAATGAAAAATAGTTTTTTGGTA +ATTTCGAGGCCACTTGAATTTGGTGGCCTGGGAAATGAAAAATGATTTTC +TTGGAATTTCTAGGCCATTGAATTAGGTGGCCTAGGAAATGAAAAATGAT +TTTCTTAAAATTTTTTAAGCCACTTGAATTTGACGGCCTAGGAAATGAAA +AATGGTTTTTTGGTAATTTCTAGGCCACTTGAATTTGGTGACCTGGGAAA +TGAAAAATGATTTTCTTGGAATTTTTTAAGCCACTTGAATTTGACGGCCT +AGGAAATGAAAAATAGTTTTTTGGTAATTTCGAGGCCACTTGAATTTGGT +GGCCTGGGAAATGAAAAATGATTTTCTTGGAATTTTTTAGGCCACTTGAA +TTTGACGGCCTAGGAAATGAAAAACAGTTGTTTTGGAATTTTTTAGGCCA +CTTGAATTTGACGGCCTAGGAAATGAAAAATAGTTTTTTGGTAATTTCGA +GGCCACTTGAATTTGGTGGCCTGGGAAATGAAAAATGATTTTCTTGGAAT +TTTTTAGGCCACTTGAATTTGACGGCCTAGGAAATGAAAAATAGTTTTTT +GGTAATTTCGAGGCCACTTGAATTTGGTGGCCTGGGAAATGAAAAATGAT +TTTCTTGGAATTTTTTAGGCCACTTGAATTTGACGGCCTAGGAAATGAAA +AATAGTTTTTTGGTAATTTCGAGGCCACTTGAATTTGGTGGCCTGGGAAA +TGAAAAATGATTTTCTTGGAATTTTTTAGACCACTTGAATTTGACGGCCT +AGGAAATGAAAAATAGTTTTTTGGTAATTTCTAGACCACTTGAATTTGAC +GGCCTAGGAAATGAAAAATAGTTTTTTGGTAATTTCTAGGCCACTTGAAT +TTGACGGCCTAGGAAATGAAAAATAGTTTTTTGGTAATTTCTAGGCCACT +TGAATTTGGTGGCCTGGGAAATGAAAAATGATTTTCTTGGAATTTTTTAG +GCCACTTGAATTTGACGGCCTAGGAAATGAAAAATAGTTGTTTTGGAATT +TTTTAGGCCACTTGAATTTGGTGGCCTAGGAAATGAAAAATGATTTTCTT +GGAATTTTCTAGGCCACTTCAATTTGGTGGTCTGGGAAATGAAAAATGAT +TTTCTTGGAATTTTTTAGGCCGCTTGATTTAAGTGGCCTAAGAAGTTAAA +AATAGTTGTTACGGAATTTTCTAGGACATTTGAACTTGGTGGCCAAGGAA +ATGAAAAACTTTTTTTTTTGGAATTTTCTAGGTTTCTGTAATATTAATTT +ATTCATTTAAAAAATTAAAAAACATTAAATTTTTTTTAAATTTCGTGTTT +CAAAAAAAAAATCAAATTTGCCCAAAACTTCAAAAATAAGCTAAAAAATT +GTAAAACGATACAGAACGGCTAATAAAACTTGAAAATAATAAGCGTGATG +AGTCGGAAAGAGCCGCCGGCTGGGACCTTCTTTTTCTCTTTTTTTCTCCT +CTCCCATCCCGAGCAGGAGCAACACAACGAGAGAGCACAACAAGAAGAAA +AAGCAGATAGATATATAGATATATGTTGTCTTCTCTTCATATTCTTTCTT +TTATCTTTTCATTCGAAACAACACTTGGTTCATTTCTCCCGTTGGACGGG +GGGGGGGGGGGGGTGACGCGGGAGACGAAAAAAAAAAGAAAAATTGGGAT +GGGAGTTGGAGAATGTTTGGAAAAAGATTGATTGATCAATCAATTTGCCA +GCCGAGTCTCGGCTATTTTCGGAAATCGGGGCTATGCAAATGCGCCCTAC +TGATAAAATGGGACGTGTCATTTAAACTCCTTGGTTTTTTAAAAAGAAAC +GCAAAATCGGCAAATTTCCAGTTTGCCGATTTGCCAGAAATTTTCAGGTA +CAGCAATTTTCCGATTTGCCGATTTGGCGGAAATTTCAATTTCGACAAAT +TGCCGGCTTGCCGAAATGCCAAAAATGTCCAGGTCCGGCAATTTGCCAAT +GTTGATTTTTGGCAAGTTGCTGGTTTGCCGGAAGTTTTCATTTTCGGCAA +ATTACCGATTTGCCAAAACGCCAGAAATGCTCAGGTTCTCTAAACCGCTG +GTTTTCCGGAAATTTTTCATTTTCGGCAAATTGCCGGCTTGCCAAAATGC +CGGAATTTTTCAGGTCCGGCAATTTGCTGATTTGCCGGAATTTTTCATTT +TCGACAAATAGCCGGTTTGCCGAAACGCAAGAAATGCTCAGGACCGGCAA +TTTGCCGATTTGCCGGATATTTTCATTTTCGGCAAATTGCCGGTTTGCCG +ATATGCCAGAAATGTTCAGGACCGGCAATTTGCCGAATCGCCGGAATTTT +TTATTTTTGGCAAATTGCCGATTTGCCGAAAGGCCAGAAATGTTGCGGTC +CGGCAATTTGCCGAAATGCCATAAATGTTCAATTTCGGCAATTTGCTGAT +ATGCCGGAGTTATAAATTTCGGCAAATTGCCGGCTGGTCGATATGCCGGA +AATTTTCAGGTCCTGCAATTTGATGATTTGCCGAAAAATTTATTTAAAAA +AAAAATTAATTTTCGCCAGACAGGAAGTCTCTAAAATCTCATTTAATAAG +AAGAAACAAAAGAAACAAATAATGGCCTAACAAGTTTCGAGTTACAGAAA +CCTAGGCCACTTTGAACACCGGCGTGGCCTAGAAACCCAGTTCCACAAAA +ATAAGAAAAAATTAAAACTCGGCCACATAATCTAGGCCAGGAGCATACAA +ACACTCCACGAATATTAAAGACTCTTTCTAGGTTAGTGGCCGTTAACGAT +GGAAAACTCGGCCATCTGCCAATTTTGTTGCTTAAAATTGGAAAAATAAA +GTTTTTTTAACTTCAAAAAAAAAAGATCAAAAAGTGAAAAGCGGGGATTT +TTCTCACACAATTTAATAAATCTGTAGCCGAGTAGCGGAGAGCTCTCGGA +GCTCAACGTGAATTTCTATTTGGACGGTAAACATACTCAGCCACCCCACC +CCAGCCCACCGTTACTGTCTTTTTTTTTCTTTTTTATTATCAACCAGCAC +TTATCACCTTTATCTTTGTGTGATCCCGCGCCGCGCAAGAACGAGAGAGA +GAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGA +GAGAGAGAGAGAGAGAGAGAGAGAGAGGGATATTGGTGAGACGCAGACAA +TTAGAGTCACTCGTGGGCTCTTTCACACATGTGATATTGAACGAGAAATT +GCGCACCTAGGCCACAAAAAAAACAGTGTATTCGATTTCATGATAGGGGA +GAAGCTGGCACGGTGCCAAGTTTCAGAAAAAATATGGAATTTTTGCTTGA +AGCATGGTGAATCAGACGTGCTTACGTCACAATTTTTCGGGATAAATATT +CCCGCATTTTTTGTAGATCAAACCGCAATGAGACATCCTGATACCACGTG +AGGTTAATTAAAAGTATTGATCGGGATTTTGAGCGTCAAATATGGTGTAT +TTACCGGCAACCCTGATGCGCCAGATTTGACGCGCAAATCCTTCATTTTA +ATAAAGTTGGGCGTCAAATGTGATGCCTGAATTTTCAATTTTTTGAAATG +TTTTGATTTCGTGAAAAATAACGGCGAATTTCAGAAAATTTATAGAAAAT +TTTCATTTGTTGCGACCAAAAAATCTTAAAAACGACCAAAATTCTCAGGA +AATTCACATTTTTCGCCGAAAAAACCATTTTGCGCGTAAATTCTGATGTA +TTTTGCCGTAGGCTATGTGTGAAAACACTGCCGGTAATACGTCATATTTG +ACGCGCAAACCACAAAAATCCTCGAAATTATTCCGGGTGCACAAAATTCT +GAGAATGCGTACAGCGCAACATATCTGACGCGCAAAATATCTCGAAGTGA +AAAATACAGTAATCCTTTTCAAATGACTACTGTAGCGCTTGTGTCGATTT +ACGGGATCTCGACTTTCGAAATGGATTAAAATATATTATTTATCAATAGA +ATATTACAAAAAGAAGTTAATTTCGTAAATCGAGCCCGTAAATCGACACA +AGTGCTATAGTAGTAATTTTAAGAATTACTGTAGTTTTCGCTACGAGATA +TTTTGCGCGTCAAATATGTTGTGCAGTACGCATTCCCAGAATTTTTCGTT +CCCTTTCACAAAAATCTTCAAACTTGCGCGTAAAATTTGGTGCATTGGTG +CCTGTGAGGGGTTCACCTATACAAGTGGTGCCAGGGGCTGTCCCATTACG +GGTTGATCTACAAAAAATGCGGGATTTTTTTGCCCAAACCATGCGAAATC +AGTTGAGAACTCTGCGTCTCTTCTCCCGCATATTTTGTAGATCAACGTAG +ATCAAACACTCTGACACCACGTGGGCCGATGATGTAAGTGCACCAGATTA +CACGCGCAAAAACTGAAAATCTGAAAAAAAAAAAAGCGAAAACTGTCAAA +TTTGAGCGAGTTTTTTATTCCACGAATCACAAGAAAAATCAATAAAAAAT +CGATAATTAAAAAGTGAGTATCATATTATTCGACAACAGAATTTCCAGTT +TCATCAGTGAGCATTGTAAGTATACGATGAGCCATCGACGGTCCAACACG +TCTCCCGTTCTCTGCCACAAGTTCGCCAATCTCTTGAATAGCGAGCGAGT +AGTCCATTTTCGAGTATTTATCGATTGCGGCAATCGGATCAGGGATTAGT +TCGAGAATTGCACGTCGCTGAGCATCCGACAGCCGATCGATTGTTGTGAG +CATTTTCGACCACCAATCCGTGACGATTTCCGAGCGACTTCCGATCACAA +TTCCCTGGAAAAAAATCATCGGGGTTTCATGCTTTTGATCTATTTGATCT +ACAAAAAATGCGGGAATTTTTTGCCCAAAAATATGTGACATCGTCAGCAC +GTTTTTAACCATGTGAAATCAGTTGAGAACTCTGCGTCTCTTCTCCCGCA +TTTTTTGTAGATCTACGTAGATCAAGCCGAAATGAGACATTCTGAGTCTT +GACACGACAAATTCCCGTTAATTACATACGGATGTGAGCCTTTGTTGGAT +GTGAGAGAGTTTCCAGGTTTTTCTCGAAAGCTTTTCTTAAAATTTCATTT +AAAAAATAATTTTTTAACGAAAAACTACAAAAAATCATCCGAAAAACCTG +GAAAATCGATGAAAAACTCTGTGACAAATACAGTACTGTTTAAAGGCGCA +CGCCAGTTTGTGTTCTGAGAATGCGTATTGCACAACATATTTGACGCGCA +AAATATCTAAACTACAGTAATTCTTCAAATGACTACTGTAGCGCGCTAGT +GTCGATTTACGGGTTCGGCTTTCGACAAATTTCGAAAATCGAGCTCGTAA +ATCGACACAACCGCTACAGTAGTAATTTAAAGAATTACTGTAGTTTAGAT +ATTTTGCGCGTCAAATTTGTTGTGCAGTACGCATTCTCAGAACTTTGCGT +TTTCGTGATTTTTTTTTTCAAATTTTAACTGCAGTAAAAAAATAAAACAA +CATAAATTTCATTTAAAAAAAACGATTTTAGCGCTCTTTAAAGGCGCACA +CCCGTTTGTGTTTTAAAAAAAAAAAATGTCGCGTCGAGACCAGATTTCTG +CAGATTTCGCGCCAAGACCCAACCTTATCCCCCTGATACTGTAATTTGTG +GCCTCCTGCCCCTCCTGCACCACCACCACCGCCCCCATCACTTCCAGCGT +CGAGCTTCTTCTTCTCCTGTCTCGCCAAACTTCTCAAGTACTGAGCAGTG +AATAGTGCCAGCTCGGCGATCGTTTCGATTTGCACAATTTGTGCACGATG +TTGCTCGTAGATTTGCAGCGATAAATTGTGCAATTTCTTCTTTTTCGCAA +TTTCTGCCCTGCCGAATGAGACGATGAGCATTGTGCAGCGGCCACCGTTT +TGAAATCCGGATTTTTGCTGGAAAAATAAAATTTAAATGAGATTTTTTTT +AGGTCTCGCAGCGATTTCTCTGGCTGTTTCAATTACGGTTTGATCTACAA +AAAATGCGGGAATTATAAAAAATGTTTGCCCGGTAAAATGTGACGTCAGC +AAAGTTTCTTAACGCTGCGAAATCAGTTGAGAACTCTGCGTCTTTTCTCC +CGCATTTTTTGTAGATCTACGTAGATCAAGCTGAAATGAGACACACTGTG +TGTGCAAACACTATCCACAATACATCATATTTGACGCGCAAACTCTAATT +TTTTTTTCGCTGCGAGACCTAAAAAATCTCAAACTTTCCAGTGAAAATCC +GGATTTTCTTCATTTTAAAAAAGTTGCGCGTCAAATGTGATGTGCCTGAA +TTTTCGCTTTTTTGAAATTTTTTATTTCGTTAAAAATGACGACGAATTTC +TGAACATTTATAGAAATTTTAAAAATTTTCATTTTTTGGGTTAAAAAAAT +CTTAAAAACAACCATAATTCTCAGGAAACTCAAATTTTTCGCCGAAAAAC +CCATTTTGCGCGTGAAATCTGGTGTATTTTACCGTGCCTAGGCTGTGTGA +GAACACTGCCGGTCATACATTATATTTGACGCGCAAACTGCAAAAATCGT +CGGAATTATTTCGGGAACACAAAATTCTGAAAATGCGTATCGCGCAACAT +ATTTGACGCGCAAAATATCTCGTAGCGAAAACTACAGTAATTATGTAAAT +GACTACTGTAGCGCTTGTGTGTCGATTTAAGGGCTCAATTTTCAAAAAAT +CGAGATCCCGTAAATCGACATAAGTGCTATAGTTGTTATTTAAAGAATTA +CTGTAGTTTTCGCTACGAGATATTTTGCGCGTCAAATATGTTGTGCAATG +CGCATTTTCAGAATTTTGTGTTCCCGGAATAATTCCGACGATTTTTGCAG +TTTGCGCGTCAAATATGATGTATGACCGGCAGTGTTTGCCCACATCGCCT +AAGCACGGTAAAATACACCAGATTTTTCGCGCAAAATGGGATTTTCGGCG +AAAAATTCGAATTTCCTGAGAATTTTGGTCGTTTTTACTATTTTTTAATC +CAAAAAAAAATGAAAATTTTTAAATTTTCTATAAATTTTCTGAAATTCAC +CGTCGTTTTTCACGAAATAAAAAAATTTCAAAAAATTGAAAATTCCGCCA +CATCACATTTGACGCGCAACTTTTTTAAAATGAAGAAAATCCGGATTTTT +CTGGGAAATTTGAGATTTATTTAGGTCTCGCAGTGAAAAAAAAATTAGAG +TTTGCGCGTCAAATATGATGTATGACCGGCAGTGTTTGCCCACATCGCCT +AAGCACGGTAAAATACACCAGATTTTTCGCGCAAAATGGGATTTTCGGCG +AAAAATTCGAATTTCCTGAGAATTTTGGTCGTTTTTACTATTTTTTAATC +CAAAAAAAAATGAAAATTTTTAAATTTTCTATAAATTTTCTGAAATTCAC +CGTCGTTTTTCACGAAATAAAAAAATTGATAAAATACACGATAAAATACA +TCAGATTTTACGCGCAAACTGGCAAAAAAATTGATTTTTTGGCTTTTTCG +GACTAAACATTAGAAAAAAGACTATACTCCAACAATTTTTTTCAGTTTTT +TTTTTGAAATTTTTTTTTTTTCAGTTTAAAATGTTTTCATCGCTTTGTTC +GTTTATTTTTGTCTGTTTATTGTGATTTTGTGCGAAATTAATGTAAAAAT +TTCCATATCCAACACTGTTGTGTGATTAATTCGGATAGATAAAAAATGTT +CAGCATGTTGAGTTTAGTTTTCGTCGTCCAATTTAGCTTTTTTTTGATTT +TCAGTCAAGATTTTCTTTCGCCTTCCGCTTTTTGACTTTTCTATGCAGTA +ATGACCAGTCTTGATTTCTAGCTTTTAGTGTTTGGTTTACCATTCTGCTA +TTACATGAGTCACACCTTATCACTAATATAAATGCAGCATAATTTCAGAG +TGAGCAAGCCGCGAGTATTAAACCAGCGCTGTTTTAAACTGATAATGATT +GCAGTTTCCTTCATTCTCACCGAATATCAATTTTTATTCATTCCAGAGTC +TCTCGAACTATGGAAAAACAACTAAAAGCTATGTCCGTCTCGGACAAACC +TGCTGCCCCAGCTGCCCAAAAGCTTGGTACCGCTCCGCTCGCTGCAAAAA +AGACGAGAAATGAGGAGTGGGGAACCAAGGTCAATATCGATACCAACATT +CGCAAATTGAGTAAGTTTTTATTTTACAATTACCAGTCGTTCGAGCTTAT +ATATCAATATTTTTTAGCGATCAAACCGAATCAGCCAATTTACAAGTACG +CTGTGCAAGTGAACTACGTCTTCCGGAAACCTGATGGAACTGAGGCGACA +ATCGAAATGTCCAAATCAGCCAAAAAGGGAACGGAGCACGACAACGACAA +AACACGCTGCCAGAACGTTTACAATGAGGCGATCAAGCGTTACGATGAGC +TGAAAACCGGAGGACCGTTTTTCTACGATCGTCAAGCCTCTTTGTACACT +CTGACCAAATTGAAGAATGAGGTGAATTCGATTTTTTGCTTAAAGTTTAA +TATTTTGCGTTTCAGAGCATCTCTTTCGTTGTTACTGACAAGATTTGCAA +GCGACAAAACTTCAAAGAGGCACAGTTTGTTCTCAAAAAGGTGGATCAAT +CGTTCCAGTCAACATCGAATGACGTCATCAAGACAACCAACTCGTGCCCA +GCCAATGCCGACAAAACTTTGCTTGAGGCAATGAACATCATTGTCTCGGG +ACCAGCGTTCGAAAAGTATGCCGCTTTCAGAGATAATCAAGCTATTTCAT +CTCGTTTTCAGCAAAAATGTTATCACCGTTGGAGCATGCGTTCATTACCT +CATCGACCCAACTGGAGTCGAGTAAGAAACTTTAAGAAATTATCATATTA +GTATTTAATTGTTTTTTCCAGCGTCGCGTACAAGGAATACCCTGAAGGAC +AACTCTACTCAGGAGTTGGTGTTTCAAAGTCGGTGAAGACATTGGAAGGA +ACGGACAAGAAAGTTCCATCACTCTTCATGACAACTGAAAGTTAGTTTTT +CAGAAAATCTAGATTTTTTTCAAAATGTTATAAAATTGTTAATTTTCAGT +GAAGACAACATTGTTCCATCCGGATTATGCCCCACTTGTGGAACTTTTGC +AAACGTTCAGAGGTTTCAGCACAACTCTCAAAGCGAATTCTCCAGCCGCG +CAGAGAATTGAGAAAGCCTTTGTTGGACTGGATGTTGTCTTGAATTACGG +TGTGCACAAGGGTCTCGGAGAGGATGGTGTCGTTATGAAGATCCGTCGAT +TCCACACGTCAGCTAAGGAGACATGTTTTGAAGTTGAGAAGTCAACTCGT +GAATTCACGAACGTCTTTGACTACTTCAAAAAGAAGTATGGAATCACTTT +GAAGTATCCCGATTTATTCACCATTGAAGCGAAAGGGAAACAAGGAAAAA +TTCATTTCCCTGCGGAAGTTCTCCTTCTCTGTCCGAACCAGACGGTCACG +AATGATCAAATGATCAACAATGAGCAGGCGGACATGATTAAGGTAATTTT +ATATTTTAAAATCTGAATAATATCCCGAATCCTTTTTCAGATGTCAGCCG +CACAACCACATATCAGAAAGACGACAACTGATACTATCGTGAGAAACGTC +GGATTGGCTTCCAACAATATCTATGGCTTCATCAAAGTTGAAGACCCAGT +CAACCTTGAAGGAATGGTTCTTCCAAAACCTAAGATTGCGTTTGCCGGTA +ACCGACTCGCTGATTTGGCAAATCCGAAGTCTAGATTCCCCACCGACTTC +AATCGTGCTGGACAATATTACGATGCCAAGGAATTGACGAAATGGGAACT +GGTCTTTGTTCAAAACGAAGAAGTCCAGTAAGTTTCAGTTTGTAATTTTT +CATTTTATTGACTCAATTTCAGAGGACTTGCTAAGCAGCTCGCCGATGAA +ATGGTGAATAATGGTATGAAATGCAGCAACCCAACGATGAGTTTCATCAT +TAGAGGTGATTTGGAACCAATCTTCAAGAAAGCGAAGGCTGCTGGAACGC +AACTTCTCTTCTTCGTTGTCAAATCTCGTTACAACTATCATCAGCAAATC +AAGGCGTTGGAGCAAAAGTATGACGTGCTCACTCAGGAGATTCGCGCTGA +AACCGCCGAGAAAGTCTTCCGTCAACCACAAACTCGTCTTAACATTATCA +ATAAGACGAACATGAAACTCGGAGGACTGAACTATGCTATTGGGAGTGAA +GCGTTCAACAAGCCAAATAGATTGATCGTTGGATTTGTTACTTCTCAACG +AGTTGGTGGAAATCCAGATGTGAGTCATGTGTTACAGCTGCATAATCATA +TTTCATTTTTTCCAGTATCCAATATCAGTTGGATTTGCTGCAAATATGCT +CAAGCATCATCAAAAGTTTGCTGGTGGATACGTGTATGTTCATCGCGATA +GGGATGTTTTCGGATCCATCATCAAGGATACTCTCTTGACAATCTTCAAA +ACATGCACTGAGCAGCGCGGAAGACCAGATGATATCCTTTTGTATTTCAA +TGGAGTTTCCGAAGGTCAATTCTCAATGATCAACGAGGAATTCAGTGCCC +GAGTGAAGGAGGCGTGCATGGCATTCCAAAAGGAGGGAACCCCGCCATTC +AGACCACACATCACCATCATCGCCTCATCAAAGGCTCACAACGAGCGTCT +GTACAAATCCGACAAGGGACGTATTGTTAATTTGGAGCCGGGTACGGTCG +TGGACCATACCATTGTGAGCAATGTCTACACTGAGTGGTATCATGCCTCA +GCTGTTGCTCGCCAAGGAACTGCAAAGGCTACCAAGTTCACTCTCATCTT +CACCACTAAAGCAGGCCCACAAGCTGAGCCATTGTGGCATCTCGAGCAAT +TGACCAATGATCTTTGCTACGATCATCAGATTGTCTTCCATCCGGTCGGA +CTTCCCGTCCCACTGTACATTGCTGATCGTTACAGTCAACGTGGAGCAAT +GGTTCTTGCCGCCAATCAAGGGTAAGTGACAGTAAAATGTATACGACATC +TCATTTATTATTTTCAGTCCAATCTACAATGAAGGACAAATCGATCTCGC +GGCTACCAACAGTGCATATGGCTACGGAGAGAAGAAGCTCTTCACCACTC +GTTTCAATGCATAATTATCTGTACTCCTCCGTTTCTTTTACTTGGCTGTA +TAATGTTTTATTTCCGTCAACCCAGAATACTTTCGCTATGATTGTATATT +TTTTAGCATTTGGTCATTGTAATTTGTAATTATTGTCGGAATGGATAAAA +TGATTTAAGAATCAAATGCATCGCTTCTTGGACAATTCCTGTTGATAATC +TCATTCTGATATTTTCAAAAGTTGTGGTATTGATTTTAAAAATTAAAATA +TTTGCAGCTGCCGCTGTGACTGGAGAAGTTGATGTTTTATTTCTTCCAAT +TCAGCTGTTTGTACCTGTTTTGTAATGTTATTACTTTCTTTCGAACAAAA +TGTCATTCTTTTTCTGGCAGAAAATTCCGCTTTTTAAAAATTATTTTTAC +AACATTACAGATGGATCCTTCTGTTGTGTCTCGATTGGAGAATGTTGCGA +ATCGAATGGAGAATATATTGCTGAAATATGACTCGAACAAAAAAGGCAAT +TAAATAATTGTTCTCGAAATTATAAATTTCAAAAAAATCGCTTTTTAGAA +ACTCCGGTCGGCGCGACGCCTCAAATCATTAATCTTTATGACGATGCGAT +CTGTGAGAATCTCGTCTCGTTTTATGATTTATCTGCAAAAATTGGAGGAG +ATTTGAATCGCCTTGGATGCATGGTAAGGAAATATATAATATTTAATTCA +AATTTTATCGAAAATTATGTTTCAGACTAAGAATCTATTTTTCACGCTTT +TTTCGATGTTTTTTGTGGATTGCGTGTGGGCGCAAAAAAGCGGACAACGA +CGAGTTCGCGACTCTTGTGAACGATTTGACGACGGAAATTGTTGCATTTT +CCGATTTCAAGGAGAAAAATCGAAAATCCGAATTCTATAATCATATTTGT +GGACTTGAAGCTGCGGTTGGAGGTGAAAATTTGCCCCTGATTCCGAATAT +CTATGTGAAAAACTCTTCATTATTTTATATATTTCAGCTTGAAATCGCTT +GTTCATCCATTACTTTTTTCAAGCACACGCCATATTCTCATTGGAGCGCG +CCGTGTTGTGTCGATTTACGAAATTTTCCAATTTTTTCAATTTCAGTTTT +TAAACGAGTTTTTATTTTTGTGGGCTTTATTTTTGTGGGTTTTGCTTTTT +GGGTTAAAAAAAATCTTAAAAACAATCATAATTCTCAGGAAACTCAAATT +TTTCGCCGAAAAACCTATTTTGCGCGTGAAATCTGGTGTATTTTACCGTG +CCTAGACTATGTGTGAGAACACTGCCGGTCATACATCATATTTGACGCGC +AAACTGCAAAAATCGTCGGAATTATTTCGGGAACACAAAATTCTGAAAAT +GCGTATCGCGCAACATATTTGACGCGCAAAATATCTCGTAGCGAAAACTA +CAGTAATTATGTAAATGACTACTGTAGCGCTTGTGTGTCGATTTAAGGGC +TCAATTTTCAAAAAATCGAGATCCCGTAAATCGACACAAGTGCTATAGTT +GTTATTTGAAGAATTACTGTAGTTTTCGCTACGAGATATTTTGCGCGTCA +AATATGTTGTGCAATGCGCATTTTCAGAATTTTGTGTAAAAATTTTTGTG +TTTAGTGATTTTTGCAGTTTGCGCGTCAAATATGATGTATGACCGGCAGT +GTTTGCCCACATCGCCTAAGCACGGTAAAATACACCAGATTTTTCGCGCA +AAATGGGATTTTCGGCGAAAAATTCGAATTTCCTGAGAATTTTGGTCGTT +TTTACTATTTTTTAATGCAAAAAATGAAAATTTTTAAATTTTCTGAAATT +CACCGTCGTTTTTCACGAAATAAAAAAATTTCAAAAAATTGAAAATTCCG +CCACATCACATTTGACGCTCAACTTTTTTTAAATGAACAAAATCCGGATT +TTTCTGGGAAATTTGAGATTTATTTAGGTCTCGCAGCGAAAAAAAAATTA +GAGTTTGCGCGTCAAATATGATGTATGACCGGCAGTGTTTGCACACACAT +AGCCTAGACACGATAAAATACATCAGATTTTACGCGCAAACTGGCAAAAA +AATTGATTTTTTGGCTTTTTCGGACTAAACATTAGAAAAAAAGACTATAC +TCCAACAATTTTTTTCAGTTTTTTTTTGAAAAATTCCATATTTTCTGAAG +TTCAAATAAATATATATTTTTTTAAAGTTCAAAAAATTCAAATTAGCGCG +TTAAAAACGACGTATTCAGGCATCACATTTGACGCGCAATTTTTTTTTTA +AAGAAGGATTTGCGCGTCAAATCTGGCGAATTAGGTTTGTCGGCGGAATA +CACCGAATTTCACGCGCAAAATTTCAAACCTCAATGATAAAATCGGAGAG +CGTATTCGAGGAAATTACGTCTTTCAACGTCTCCGCCGGAACCACGACAG +CGAACAAGTTTTGTGTTGACTGAAAAAAAAATTAAATTTTTCGACAAAAA +AAATTAAATTTTTCAACGAAAAAAAATTAAATTTTTCAACGAAAAAAATT +AAATTTTTCAACAAAAAAAAATTAAATTTTTCAACAAAAAAAAATTAAAT +TTTTCAACAAAAAAAAATTTAATTTTTCAACAAAAAAAAATTAAATTTTT +CAACGAAAAAAAATTAAATTTTTCAACGAAAAAAATTAAATTTTTCAACA +AAAAAAAATTAAATTTTTCAACAAAAAAAAATTAAATTTTTCAACAAAAA +AAAATTTAATTTTTCAACAAAAAAAAATTAAATTTTCCAACAAAAAAAAA +TTAAATTTTTCAACAAAAAAAAATTAAATTTTTCAACAAAAAAAAATTAA +ATTTTTCAACAAAAAAAATTAAATTTTTCAACAAAAAAAAATTAAAATTT +TCAACGAAAAAAAAATAAATTTTTCATCAAAAAAAAATTCAAATTTCCGC +GGAAAGTGCAAAAAATTTACAGAATATTCAAATCGTTCACTTCTCCCGCT +ATCATCTTCACGCAATTCGACACATTTTCGTCGCCATTCGATGCGGGTTC +CCAGTGATGTATCGATTTTCAGTTGATTATCGATTTTTCGCTCCACGTAC +AGTACACTGAGTTCCGCTTCGAGGCCATCTGAAATTTTTTTTTTAATTTT +TTTAACGATTTTTTTTTCAGAAAATCAATAAATCTATTCAGCTGTTTTTT +TTTAACTTTTACACAATTTTCCGTTTGAAAATGTCAAAAAAATGTGTTTT +TGAGCCACATTTTATTCAAAAAATTTTGAAAAATTCCCTGCATTTTCGCT +GTGGGACCCAAGAAGTGCGTGTGCCTTTAAGAATATTTCAATTTTTTTAA +AACTTAAAACAAAGATTTAAAAAATTCTGCTTTCTAAAAATAAATAATTA +TTTATTTTTTTGTGTATTTGTTTGGAAAAAATCAATATTTATCGATTTTT +GTGAATTTTTTTGAAAAAAATCAATATTTATCGATTTTTGTGAAATTTTT +TGGGAAAAATCAATTATTATCGATTTGTGTGAATTTTTAAGAAAAAAATC +AATAATTATCGATTTTTTGTGATTTTTTCGCAAAAAAATCAATAATAATC +GATTTTTGTAAATTTTTTAATCGAAATTCGGCTTTTTCCGGTAAAAACAT +CAAAATTTCGCAAAATTAATCTGATTTTTTCTGCGAAAAATTTAAATTTC +TCATAAAACTTTCGAAAAATCGAATATTTTAAAAATCAATAATTTCGATT +TTTGGTTAATTTTTTTGGAAAAAATCAATAATTATCGACTTTTTGCGATT +TTTTTTTTGAAAAAAATCAATAATTATCGTTTTTTTTTAATTTATTGGAA +AAAATCAATAATTATCGAATTTCAGTGAAATTTTTGGAAAAATCAATAAT +AATCGTTTTTGTGTGATTTTTTTCGGAAAAAATTAAAAATTATCGTTTTT +TGTGAATTTTTTGAAAAAATTAATAATTATCGACTTTTTTTGAATTTTTT +TTTGGAAAAAATCAATAATCATCGATTTTTTTGTGAATTTCTTGGAAAAA +AATCAATAATTATCGATTTTTCCGATTTTTTCCAAAAAAATCGATAAATC +AATAAATTTTCCTTACGCCACGTGTCAATTACAGTCTTCCCCACGTGGCA +AAACGTGTAGAGCTCGCATTTCGAATTGGTTGCCGCCGAAATTTCGCGTT +CGATTTTTCGAGTTTCCTTGTCTTTCTGAAATTTTTTTTTTCTAAATTTT +CTGAAAAATGTTCTTCTGTAAAACCTCTTTTTCCTCTCTTCTCCGCGCCA +TTTCGCTCTTTTTTTTATCCTTTTCCAGCGTTTTTCTCTCTTTTTCAGCT +TCCCGAGCTCGTTTCGCATTTTCTCGGAGTTCCTGAATAAGTTTTAAATT +TTTAAGGGGAAATTTGCTATTTGGAGAGTAAATATTTTCGCTGTGAGACC +CGTGGACCTGAAAATTTTGATTTTCCGCTTAAAATCAACTGAATTTCGCT +TAATTGTGATGTTTTAAGCTGAAAAAGCTGAATTTTGATTTTAAAAAATT +GAAAAAAATTAATATTTTTAAAGGCGCATTTATTGATTTTTTTTCCAAAA +AAAATTTACAAAAAAATTGATAATTATTGATTTTTTCCAAAAAAATTAAC +AAAAATCGATAATTTTTTTTCCAAAAAATTCACTGAAAAATCGATGATTA +TTGATTTTTCCAAAAAAAATTCACTGAAAAATCGATAATTATTGATTTTT +CCAAAAAAAAAAATCACAAAAGTCGATAATTACTGATTTTTTCCAAGAAA +ATTTTAAAAAAAAACGACAATTATTGATTTTTTCCGAAAAAATTACAAAA +AAATCGACAAGTACTGATTTTTTCTGAAAAAATTACAAAAAATCGATAAT +TATTGATATTTTACAAAAAATCGATAATTATTGATTTTTTACAAAAAATT +CACAAAAAAATCGATAATTTTTAATTTTTTTCAAAAAAAAAATCACAAAA +AATCGATAATTATTGATTTTTACCAAAAAAAATCACAAAAATCGATAAAT +ATTGATTTTTTCCAAAAAAATTAACAAAAATCGATAATTATTGATTTTCC +CAGAAAAAATTCACTGAAAAATCGATAATTATTGATTTTTTCCGAAAAAA +TTCACAAAAAAATTGATAATTTTTTAATTTAAAAAAAAATCACAAAAAAT +CGATAATTATTGATATTTTACAAAAAAAAATTACAAAAAATCGATAATTA +TTGATTTTTTACAAAAAATTCACAAAAAATCGATAATTTTTAATTTTTTT +CAAAAAAAAAATCACAAAAATCGATAATTATTGATTTTTTCCAAAAAAAA +TCACAAAAATCGATAATTATTGATTTTTAGGCGGAAAAGCCGAATTTTTA +TCAATTTTTCAAAAAAACAATCGATAGTTTTAAAGGCGCATGCGCTCTCC +TGGGTCTCACAGCGAAAAGTGGATTTTTCAAATTTTTTGGATAAAATTTC +GCTGAAAAACGTATTTTTTTGACAGTTTTAAAGGGAAAATTGTCTAAAAT +CAAAAAAAAATTGTTTTTTCAACAATTTTTTCAGTTTTCGTAAAAATTTC +AGTTTTTTTTTCGCGAGAAAAAAACCAATTCTCGATTGAAAAATCAGAAA +AAATATTTTGGTATAAAAATATTTTTGGTATTACGATATTTTGGGTCCCA +CCTCGAAAATTTAGAGGATTTTTCAAATTTTTAAGATTTTCTTCAACTTT +TAACAATTTTTTTCGGAATAAAAATCGATTTTCGCTGCGAGACCCAAAAA +AAATAGATTTTCTCGAATTTTGCGATGGAACTTTTGATTTTTCGGTGAAA +TTTTCCAAAAAAATCGCAAAATGTATCAAAAAATCAATACTTTCTGCGTC +TCCTTCTCATCGTCGGTTAGTTTCCGTTTTTCGGGTCTCGCCACGACATT +TATGTCAGTATTCTCTTGTGAGCTTGAGCACAACGAGTATGAGCTCGTCT +GATTCTGACGGATCATTTCGTCAAGATTGAGCATTCCCTCATCCAGCCAT +GTTTTTGCCGCAGCTGGAATTGCATCCTTATTCACTGCGTCGACGATGGA +AAAGCTCGCGGAGTACCTGGAAATTATCGATTTTTTATTGATTTTTTATC +GGTTTTTATCGATTTTTTTGATGGGGAAAAATCGGAAAAATCGATAAATT +TCGATTGGAAAATTCAAAAAACTTCGGGTCTCACAACGAAAATTGATTTT +TTTTCTTGGATTTTCAAATTTTTAAACTTTTGTTGTAACAACAGTGGAAA +TTTTTCGAAAAAAAATCAATAATTATCGATTTTTTGAAAAAAGGCAATAA +TTTTCGATTTTTTGAAAAAAAAAATCAATAATTATCGATTTTTTGTGAAT +TTTTGAAAAAAATCAATAATTTTCGATTTTTTGTGATTTTTTTTCGAAAA +AATCAAAAATTATCGATTTTTTGTGGATTTTTTGAAAAAAATCAATAATT +ATCGATTTTTTGTGGATTTTTTGTAAAAAATCAATAGTTTTCGATTTTTT +TTGGAAAAAATCAATAATTATCGATTTTTTGTGGATTTTTTGAAAAAAAT +CAATAATTATCGATTTTTTTTTGAATTTTTTGAAAAAAATCAATAATTTT +CGATTTTTTGTGATTTTTTTTCGGAAAAATCAAAAATTATCGATTTTTTG +TGAATTTTTTTTTGGAAAAATCAATAATTATCGATTTTTGTGATTTTTTG +TAAAATATCAATAATTATCGATTTTTTGTGATTTTTTTCGGAAAAATCAA +AAATTATCAATTTTTTGTGATTTTTTTTTTGGAAAAATCAATAATTATCG +ATTTTTGTGATTTTTTTTGGAAAAAATCAATAATTATCGATTTTTGTGAA +TTTTTTGTAAAATATCAATAATTATCGATTTTTTTGTAATTTTTTCGGAA +AAAATCAATAATTTTCGATTTTTTGTGAATTGATTTTTTTTGGATTTTCA +AAATTTTAAACTTTTAAAATAAATTTTTCCATAGTTTTTTGTTGAAAAAT +TCCTGAAATTTTGAAATTCAATTAATTTTTTCTTTAAAAAAATCGTTGAA +AAATTAGTTTTTTGGAAAAAAAATCAGCGAAAATAGATTTCGCTGTGAGA +CCCAATTGGGCTCCTCAAAGTCCCTGCGCCTTTAAAATATCGTTTTTTTT +TTGTTAAATTTTCCCGCGAAATTAATCAGATTTTTTCTGCGAAAAAGTGA +AATTTCTCAAAGAAACTTTTGAAAAATCAATTTTTTTAAAAAGAAAACCC +CTAATTTTTCCGTTTTTCCCGCTAAAAAAACATCTAAAAAATTTGAAAAA +TCCACAGAATTATCGCTGTGGGACCCAAAGTGCATGCGCCTTTAAGATTA +TCGATTTTTTTCAAATTTTTCAATCGAAATCAGGTTTTTTTTTTCAGCAA +AAATCATAATTTTGCGAAATTAATCTGATTTTTTGTGCAAAAAATTCAAA +TTTCTCTTTTTTTACGTTTTTTCGCCAAAAAAACCATCTAAAAAATTCGA +AAAATCCAGAGCATTTTCGCTGTGAGACCCAAGTCCGTGTGGCTTTAAAA +ATATCGATTTTTTGTTAAATTTTCCAATCGAAATTTGGCTTTTTTTCCGG +TAAAAAATCATAATTTTGCGAAATTAGTCTGATTTTTTGTGCGAAAAAGC +GAAATTTCTCATAAAACTTTTGAAAAATTGATTTTTTTTTTCTGGAAAAA +CCTATTTTTGTCGTTTTTTCGCTAAAAAACATCTAAAAAATTCGAAAAAT +CCCCAAAATTTTCGCTGTGAGACCCAAGGCTCCTGAAAGTCCCTGCGCCT +TTAAAAATATCGATTTTTTGTTAAATTTTTAATCGAAATTCGGCTTTTTC +CGGTAAAAACATCAAAATTTCGCAAAATTAATCTGATTTTTTCTGCGAAA +AATTCAAATTTCTCATAAAACTTTCGAAAAATCGATTATTTTCCGCTAAA +AAATCATCTAAAAACTCGAAAAATCCAGACAACTTTCGCTGCGAGACCCA +AGTCCATGCGCCTTTAAAAATATCGATTTTCTGTGAAATTTCTCACAATT +TCTCCAAATTTCCCGCCGTCACATTCGTCGTTTCCTCAATCATAATACAG +TCCTCATCATCATCATCATCCGAGAGGACTACAATCGCCTCGTCCATCTG +AATAGTGGGAAAATATCGATTTTTTGTAGAAAATCATATATTTAAATTGA +TTTTTTTACAATAAAAAAAATTTTGGGGATTTTTTTGGAAATTTCGGGTA +AATCGTGTAAATCGTGTAATTTAAATTCATTTTTGTTTCAAAAAGTCAGG +GGGGGGGGGGGAAAGAAAAAATTTTTGACAAAGCGTGGAGCGAAAAAAGG +AGTAAATATACAGTCGCGAAAGAACGATCGCTCCGCCAATTTTGCATGCG +GCAAAGGGGCGTGGTTTATTGGGGGCGGGATTCCGGCGCAACCCTGCGGC +ACGCTTTTTTCTCGCTTTTTTCGTGCGGTAATTTTCAGTTATTTTTATTC +GTTTTCTGTTCGAAATTTCACGATTTCGCTCGATTTTGTTCGTTTTTTCG +GATAAAAAAGTGTCCAACTATTTTTTAAATGAAAAATAACCAAGTTTTCA +CAGATTTTACCCGAAAAATGCTTTGTTGGTCTCTCTTTCTGTAAATTACG +ATTGTTAAGAGAGAAAATGGGGGAAAAAAAGGAAAATGTGAAGAGAAAGC +GTTTTTTTAACGTTTTGCTTCGTATATTACAGAAAAAGAGACAAACGAAG +CATTTTTTCAAGTAAAAACTGATAAAACTTGGTTATTTTTCATTTAAAAA +TAGACACTTTTTTATCCGAAAAAACGAACAAAATCGAGCGAAATCGTGAA +ATTTCGAACAGAAAACGAATAAAAATAACTGAAAATTACCGCATGAAAAA +AGCGAGAAAAAAGCGTGCCGCAGGGTTGCGCCGGAATCCCGCCCCCAATA +AACCACGCCCCTTTGCCGCATGCAAAATTGGCGGAGCGATCGTTCTGTCG +CGACAGTATGTAACATGTAAGGTAACAAGGTGGGTTGGTTTTCATGAGAA +AAGGGGGATTTTTTTCCGATTTTTCAGGTGCAGAGCCCACGCTCTCCTCA +AAGCCGAATAATTATTAGAGCGCGCTTGCAGCGACTCGCGTTTCTGCATC +CGCGGTATTTTGACTTTCCACTGAAGAAAGCAGATATTTCAGATTTATCG +AATTTTTAGGTTTAAAATTTTTTTTTTCTGTATTTTTCGAACAAACCTTT +TGTCAAACAGTAAAAATCGAAATTAAAATGACTAAAATGAACTTTTTTTG +TCCACTGGTTGTGAAATGGTTTGAATTTGAAGAAATCAACGGGGTTTTTC +GTATTTTCTGAATATTGTTCTATTAAAAATTGGTTTTAATACATTTTTGA +CTTAACATTAGGGTCAAAAAATGGTTTAAAACCGATTTTTAATAGAAAAA +TATTCAGAAAATACGAAAAATCCCGCTGATTTCTTCAAATTCGAACCATT +TCGCAACCAGTGGACGAAAAAAGTTCATTTTAGTCATTTTAATTTCGATT +TTTACTGTTTAACAAAAGGTTTGTTCGAAAAAAACAGAAAAAAAAATTTA +AAAGCTAAAAATTCGATAAATCTAAAAAAGCTGCTCATTTCAGTGGAAAG +GCAAAATACCGCGGATGCAGAAACGCGAGACGCTGCAAGCGCGCTCTAAT +AATTATTCGGCTTTGAGGAGAGCGTGGTGCAGAGCCCCAATTTTAAACAA +AAAATAGCGGAAAAAAATATTTAAAAAAACGAGGATTAAATAAAAATTAA +ATAATGTCAGCAATGTTCATTGGCATCTCGTCGATTTGTGTCGAGTAGTA +CTGCTCAATGTCGCGGAGAATTCGTACGTCGTCCTGTTTGACGAAATTGA +TGGCGACTCCCTTGCGGCCGAAACGTCCCGAACGGCCGATACGATGGATG +TAGAGCTCACGGTTGTTCGGCAAATCGTAGTTGATTACCTGGAAAATTTT +TTGATGAAAAGCTGATTTTTGCGATTTTCGCCATTTTTTGACCTAAAAAA +TTGGCGAAATTCGGAATTTTTAAACAAATTTTGACGTAAAGTAGGTTAAA +ATTCCAAAATTTTACGGTAAATAGCCTGAAATGCTTTAAATTGATCCTTT +TTTGCACAATTTTAACAATGAAAACCTGTATTTTTCGCTACTTTTAGTTG +AAAAAAGCTTCCAAAACGAGTAAAATTGGCAAATTTTATCGATTTTCGCA +AATTTTTCACCTAAAAATTGAAGAAATCGGTATTTTTAAACAAATTTTGG +CATAAAGTAGGTTAAAATACCAAAATTTTACGGTTTTTAGCAATTTTTGA +GTCAATTTTCAAAATGTTCCAGTATTTTTGCAACTATTTGGTTTTTATTC +ATCTATGTTCTTTGAAAATTCTTTTTTTTAAGGTGAATTAGGTTAGAAAG +GGGTCGATTTTGCAGAATTTTGACAGATTTCGCATGAAAATTTGAATTTT +CAGGCGAAAATCATCAAAAATATGATTTTTTGCTACTTTTAGTTGAAAAA +AGTGTACAAAACGAGTAAAATTGACAAATTTTTATCGATTTTCGCCATTT +TTTCACCTAAAATTTAATTTTTTTTTTCTAGTGAAATAAGTTAAATTCAG +TGTTAAAATGTATCTATTTTCGCTCAAAATTTGAAAATTTTCAATTTTTC +ACTGAAATCCATGAGAAACCAGTGAAAAAATTTTTTCAATTTGAAAATTT +GCAGTGAATTTTATTTTTTCTGCTAATTTTTCGGTGAAAAATGCACTTTT +TCACGAGAATTAGCGGGATTTTTGTATTTTCAAAGTTTTTAGGTCAATTT +TCAAAATGTTTCTCGCTGAAAATTGATTTTTTGTCAATTTTTCACATATT +TTCTATGAAAAATTCCACCTTTTTGCAGAATTTTGACTAAGAAATACGAT +TGTCGCCTAAAAGTTTGAATTTTTCAATTTTTTTTTGCTGAAAATCATCA +AAAATACGATTTTCAACAGAAAATGATGAAATTTTCGCTATTTTTCGCGG +AAAATCATTGATAACCTGATTTTTTCGCTACTTTTAGTTGAAAATAGTTT +ACAAAACGAGTGAAATTGGCAAATTTTATCGATTTTCGCAAGTTTTTAAC +CTAAAATTTAGTTTTTTTCTAGTAAAGTAGGGAAATTCAGTGAGATTTTT +TAATTTTTCACTGAAATCCATGAGAAATTAGTGAAAAAAAAATGTTTTTG +GCCAACTTTTCACATATTTGATATGAAAAATGCCGTTCTGAACAAAAAAA +GCTGTGGAAATTCAACTTATTGTCAATTTTCCGCTGAAAAATTCATTTTT +TTTTCTCAAGAATTCGCATTAAAACTGCGGAATTTCTTGTATTTTCAGCA +GAAAATGATGAAATTTTCGCTATTCTTCGCGGAAAATCATTGAAAACCTG +ATTTTTTCGCTACTTATAGTTTAAAATAGTTTACAAAACGAGTGAAATTG +GCAAATTTTATCGATTTTCGCAAGTTTTTAACCTAAAATTTAGTTTTTTT +TTCTAGTAAAGTAGGGAAATTCAGTGTCAAAATTTACTTATTGGGTAAAT +GGAAATTGTGAATTTCCATTGTAAAAATTTCTGCTGGGAAATTCATTTTT +CCCAAATTTTTCATATAATTAGGCTGAAAAATGCAATTTTTCACAAGAAT +TCGCATTAAAACAGCGGGATTTTTTTTGTATTTTCAACGGAAAATGCTAT +TTTTTGCGGAAAATCACTGCAACTTTTTCCTCACGAGGGCCAAGGAAAAG +TGGTTTCTAGGCCATGGCCGAGGGGCCGACAAGTTTCAGCGGCCATTTAT +CTTGCTTTGTTTTTCGCCTGTTTTCTTTCGTTTTTCACAGCTTTTTCCCG +TTTTTTCTTATTAAAACTGATAAATAAATATTTTTTGCAGATGCTAAAAC +AATTTCCAAGTAAAAAAAATTATGTATTCAGTCGGCAAGCAGCGGTGAAA +GTGGGCAATGTAAAATGATGGATTACGGGAATACAAAACCTGAAATTTTT +CTGAAACATGATACATATGCTGCTTAGATGCTGATACTACCTGATTTTCA +TAACGAGACCGCTGAAAAGTTTTGAGGTTTCCACAATTCAACTTTTTTGG +TGAAAAAATCGAGATTTTCGCACAAAAAGTTGAATTTTGAAAACCTCAAA +ACTTTTTCAGCGGTCTTGATATGAAAATCAGGTAATTTCAGCATCGAAGC +ATCATATGTATCATGTTTCAGAAAAAGTTTAGGTTTTGTATTCCCGTAAT +CCATCATATTGCATTGACCACTTTCACCGCTGCTTGCCGACTGAATACAT +AATTTTTTTACTTGGAAATTGTTTTAGCATCTGCAAAAAATATTTATTTA +TCAGTTTTAATGAGAAAAAACGGGAAAAAGCTGTGAAAAACGAAAGAAAA +CAGGCGGAAAACAAAGCAAGATAAATGGCCGCTGAAACTTGTCGGCCCCT +CGGCCATGGCCTAGAAACCACTTTTCCTCGTCCCTTGTGAGGAAAAAGTT +GCAGTGGGAAAATGCTATTTTTCGCGGAAAATCGAAAATGTACCAAGGAC +ACTTGCGGAACATCAAGTCCTCTCGCCCAAACATCAGTAGAAATGAGGAC +ACGAGTGGTTCCAGCTCTAAACTCCTTCATAACCTCATCACGATCCTTCT +GCTCCATATCTCCATGCATCGATGAGACGGTAAAGTTGGCCTCCTTCATC +TTATCAGTCAACCAGTCCACCTTTCTACGTGTATTACAGAACAACACCGC +CTGAGTGATGGTTAAGGTGTCGTAGAGATCGATAAGCGTGTCGAACTTCC +ACTCCTCCCGATCAACTGCGACGAAGAACTGCTTGATGCCTTCCAGTGTC +AACTCGTCACGCTTCACAAGGATCCGAATCGGATCCGTCATGAATTTACT +CGTCATCTCCAGAATCTCATGAGGAAGCGTCGCGGAGAGCAGCACCACCT +GGGCTCCGGGCGGTAGATAGCGATAGATATCGTAAAGCTGCTCCTTGAAC +CCCTTGTTGAGCATCTCGTCGGCTTCATCGAGCACGAGAAGCTTGATGGC +GCGGGTGCGCAGGTTTCGACGGCGAATCATGTCGAAAACGCGTCCCGGAG +TACCGGAAACCACGTGCTGGCCGTAGTCGAGCTTTCGGATGTCCTCGCCG +AGATTGGTGCCTCCGATACAGGCGTGGCACTGGACATTCATATAGTCGCC +GAGCGCCAGCACGACTTTTTGAATTTGAACTGCCAGCTCTCGTGTCGGCG +AGAGAATCAGAGCTTGGGTCTCGCGGACCTGGGTGTCTAGCGATTGGAGT +ACGGAGATCGAGAATGTCGCCGTTTTTCCTGTTCCGGATTGAGCTTGAGC +GATGACGTCACGAGCCTTGAGAATTGCGGGGATGGCGCGTTGCTGGATGG +CAGATGGCTTTTCGAAGCCGTACGCGTAGATTCCACGAAGCAAGTCTTCT +CTGGAAATCGAAAAAAATTTTGAATTTAGTTTTATATTTAAAGGTGGTGT +AGTCGAATTATTTATTTCTTTATTAGACTCAAAATTGTCTGAAAAAAAAG +TGTACAAAACGAGTAAAATTGGCGACTTTTATCAATTTTCGCCATTTTTC +ATCTAAAAAAATCGACGAAATTCGGAATTTTAAACAAATTTTGACGTAAA +GTAGGTTAAAATTCCAAAATTTTACGGTTTTTAACAATTTTTCCACCTTA +AAGTTGGTGTAGTCGAATTATTTTTTTCTTTATTAGACTCAAAATTGTCT +GAAAACACCGAATTTCATAATGAAACTTCTTGAAAACTGAAAACTTTTCA +AAAAAAAGTTAAGGCCTCTTGAAAAAAGGCCTAAAATTAGTGAAAATTTG +AAATTTGACCAACTTGTCTGTCAAGCGGCTGGAAACAATTTTCTTTGAAA +TTGTCGTCTAATTTTGGGTATACAGGTCGATTACCTTGCGTTTTCAGCTT +TATTTAGGTATTTAAAAGTCGATGGACGAAGAGATTTGTCAAATTTTTTT +CACCAACTATCTTCGTCCATCGACTTTTAATACCTTAATAAAGCTGAAAA +CGCAAGATACGACCTGTATACCCAAAATTAGACGACAATTTCAAAAAAAA +TTGTTTCCAGCCGCTTGACAGACAAGTTGGTCAAATTTCAAATTTTCACT +AATTATAGGCCATTTTTCGAGCCGCCATAACTTTTTTTTGAAAATTTTTC +AAGAAGTTTCATTATGAAATTCGGTGTTTTCAGACAATTTTGAGTCTAAA +AAAGCAATAAAAAAAATTCGACTACACCAACTTTAAGGTGGAAAATTTTT +TAAAAACCGTAAAATTCTGGAATTTTAACCTACTTTACGTCAAAATTTGT +TTAAAATTCCGAATTTCGTCAATATTTTAGATGAAAAAATGGCGAAAATC +GATAAAATTTGCCAATTTTACTCGTTTTGTACACTTTTTTCAACTAAAAG +TAGCGAAAAATCATGTTTTTGATGATTTTCAGCTGAAAATTCAAATTTCA +GGCAAAATTCTGCAAAAAAAGGGTAATTTTTCAGAGAAAATATGGGAAGA +ATTGAAAAAAAAAACTGTTTTCAGCAAGAAAAATGTTTGAAAATTTACCT +TTAAATTTTTAAAAACCGTAAAATTTTGGAATTTTAACCTACTTTACGTC +AAAATTTGTTTAAAATTCCGAATTCCGTCGATTTTTTTAGATGGAAAAAT +GGCGAAAATCGATAAAAGTCGCCAATTTTACTCGTTTTGTACACTTTTTT +CAACTAAAAGTAGCAAAAAAATAATGTTTTGAGGCGAAAATTTGTTTTCA +GCGACAAAATTCTGCAGAAAAAGTTCAATTTGAAGCTTTTCTGGTTATTT +ATAGAGAATTTTCCGTTAAAATCGATAATTCAATACCTCAAGCCCATTTT +GTCAAAAGTTGGGATAATTGAGACCTCCTCAGACGACTCGAACTCCACAG +TCGCCATATCGTCGTTTTTCTTTTTATTTTCCGCCATTTTTGAAGGAAGA +ACTCTGTAAAATCGATAAAAATGAATATTTTCGAGCATTTTTGCAGAGAA +ATTCAGCATTTTTAGGTGGAAATTATCAAAATCAATGAATTTACATCGAA +AAAATCAAGAAAAACGTGTAAAAACGATAAAATTGATTATTTTCGGGCAT +TTTGCAGAGAAATTCAAGATTTTTAGGAGGAAATTAACAAAATAAGTGAA +TTTACATCGAAAAAATTAAGAAAAACCTGTAAAATCGATAAAAAACTAGT +AAAAGAGCAAGATTTCGAATGTTTTCGAAAATACCCTGTTTTTCGAGCAT +TTTTGCAGAGAAATTCAGCATTTTCAGGTGGAAATTTACAAAATAAACGA +ATTTACATCGAAAAAATCAAGAAAAATGGTGATTTATCGGCTATTTTCGC +TCATAATTCGTAGGATTCGATTCGAAACTAAAATTCGAAATTCGAAAAAC +ATGGCCGTGGCCTAGCGTTCCCCTCCTAGTCCACGGCCGCGCACGATTTA +CGGAGAGCACGATTATTGATCGATGACACATGGTTTCCGACTGTTTTTCC +TGTTAAATTTACCCTTAAATCACAATTTTCCATAGAAAATGGGTGGAATT +TTCTCGAAAAAAGAGAAATCACCAAAATCGGCGCCGGTTTCCGATCAGGA +TAATGCGATTTTGGTTTGTTTAATGAGGTTTTTCACATGGAAATTATTAT +TTTTTTGGTAATTCCAGGCGCTCAAAACACAACGGGACAAAATGAAGCAA +ATGATCAAGCGAAAAGAGAATTGTTTGGAGAAGGAACGACAATTGGCAAA +GCAGCTTATAAAGGATGGCCGGAAAGAGTAAGAAATTTCAGAAAATTCAG +TTTTTTTTATTAAAAAAAAAAAATATATATATAAATTTTTAACTAATTTC +AGCCGTGCTTTACTGCTTCTGAAGAAGAAACGCTACCAAGAGAAGATTAT +CGATCAAACCTTGAACCATCTTAGCAAAATCGAGCAAATGGTATTGAAAA +ATCAGAAAAAAATAAAAATTTTCCAGCAAAAAAATCAATAAATTTCCAGG +TAAATGACCTAGAATTCGCAGAAGTTCAACAGCGAGTAACCGATGGCCTT +CGACAGGGAAATGAAGCGCTGAAAAAGATGAATCAACTATTCGATATCGA +TGAAATCGACAGGATTATGGAAGAAACCAAAGAGGCGGCGGAATATCAGG +AAGAAATCTCGAATATGCTGTCCGGCCAGCTTTCCAACACGGATGTCTCG +GACGTTGAGAAGGAATTGGAGGATCTGTTGGCGGCGGAATGGGGCACAGT +TCAACTTCCAGAGGCTCCGAGCCATGAGCTGCCTGAAGCAGAGCGGGAAC +GGCAAAAAGGTTGAGATTTGCCGAGAAAATTCCTAAATTTTCCCTAAATA +AATAATTTTTTTTCAGAAAAAGAGAAGCCACGTCGCGAGAAGATTGCTCT +GGAAGCCTAAATATTGCTTGTTACTGTGAATAAAATAAATTCATTATATT +ATTATATTTTTTATACAATTTTGTCTTACTGGCGTTGTTCTGAACGAAAA +ATTCGTTTAAATAACGCTGTAACAAAAAATCTCATTTTGCGCGTAAAATA +AAATGCCAGTGACGCAATCACCACGACGAAAAATCGCACAGTTTTGGAAA +ATTGCGGTTTTTCAAACTTTTTTTCGCTTTTTTCGGATGCTATTCAAGCG +GCTATAGACTGTTCAAATAGAGATAGAACGGTCAGAAAATGAGAGACATA +GACAAATAAAGAGACAATTTATTGACTTTTTAGGCTATTCTTTGTTGGAA +ACCAACAAATTTTGTGTTCCCAGGCTTTTCTTTGCAAATTCTGAAGGGTT +CTTCTTTGCAGATGCGTCGACGTGTAGCTCTGGACAATGACGAGCATGAT +GGTAAGTTAAAAAATTGGAAAAAAAAACTTTGGTTTCTATTAAATTTTAT +TATACTTATTCCCTTTTTATAATTAAAGGCGTCTAGCTCATTCAAGCTCA +TTCACGACGTTAGTTGACGCATTCCTCCTTGCACGCATGGCCTAGTGGGT +TAAGGCGCTGAACATCACTCAGAAGTCCACAAGTTCAAACCCAACGAGGC +TCCCCCGCTTTTCTAACCCTAGCTTGAGCAATCAAATCGGAACGCGTCGG +AGCTGTCTCGGAGCCAGAAGCCTCATCAAATATTTTGATTGCTTCCAAAC +AATCTATCGCACGTTTGTGCTCCTCATTTCCGTAATGACGTGTGCATGCG +TAAGCTTGATCTTTCCAGTTTTCTAGGTGACTCATTAGAGCAATCGAGTG +AGCAACTCGTTCATTCGGTATTTCATGAGCCAATTGCTATGCGGGACGCG +GGGCTTCTGGAAAAAAATTAAACAAATATTTTTCGTTTTTTTTTCCACAT +GTGTGGATCTCAATTTATTTCTTCTTTTTTTGTTTACCTCATTTTATAAA +ATTCTCCTTTTTTTCTCATTAATTTTTCTGTGGCTTTTTCAAAGATCTTT +TTCAGATTTTCATCTATTTGTTTTAATGTGGAGCAAAATTGAAATTTGTC +TGGTAGATACGGTAGCTTTAAAGGCGCATACTAAATTTAAAGTGACAAAA +TAAATATTTAGTTCCATATGGAATTTCAATTTTTGCTGCCAGGCTGTCCC +ATTACGGTTTGATCTACAAAAAATGCGGGATTTTTTTTGGCCCAAATAAT +GTGACGTCAGTGCGTTCTTAACCATGCGAAATCAGATGAGAACTCTGCGT +CTCAACTCCCGCATTTTTTGTAGATCTACGTAGATCAAACCAAAATGGGA +CATTCTGACACCAGGTATTCGCCGCGGAACCAGTTTTATACCGTATTTCG +TATGCGCCTTTAATTCTACCGTACCCGCTTCTGGAAGTTTGGAAGTTTGA +AAAACTATTAAATAGAATTTATTAATTTTTGCATATTACTTTGTGCAGAA +TACCTCTAAAAATATTAAAATCCAGATAAACATGGTTTTTAAAATGTTTT +GCACAAAAAGCGATGATTTCGCTTCGAAGCCACTTTTATAAAACGCCTTG +TGCGCCTTTAATTCCACCGTACTCGCCAAAATTAAACTGAATGAGCGAGT +TTTGAACTAATTTTTACGAAAAAATCACGTTCTGATCGGTTCTGGTCTTC +CTCTAAAATCGGTTCTGGTCTTCCTCTTTGAATTTTCGCGCTCCATTGAC +AATCGCCTGCCGGACTGGGAAAGCCGTGTACTCCACACGGACAAGTACAT +TTAGTTTTACAACTAAAATCGAGCCGCGACGCGACACGCAACGCGCCGTA +AATCTACACCAGATATGACCGAGCCAAAATGGCCTAGTTCGGCAAATTCT +TCCATTTCAATTTATGAGGGAAGCCAGAAATTCGTGTATTTTTACAAAGA +AATGTGAAAAAGAAGCTAAAATTTCAATGAAATAGAGATTTTTCCCGGAA +TCTCACAGTTTCGCAAAATACTATGGATTAAAAAACGCTGAAACCCAAAT +TTGCGCGTAAAATTCAATGTATAGCGAACATTCACGGATTTCTGGCTTTT +CTCATATATTGAGATGGAAGAGTTTGCCGAACTAGGCCATATCTGGGGTA +GATTTACGGCGCGTTGCGTGTCGCGTCACGGCTCGATTTTAGTTGTGAAA +CTAAATGCATTTGTCCGTGTGGAGTACACGCGTTGTCCGGCGGGCGATTG +TCAATGGAGCGCGAAAAATTCAATATGGAAGGCCAGCACCCCGTGTTAAC +ATTTAAAAATTACAAGATTTGCCCCCTCAAACATCCCTGGAAAAACTAGA +AAAACCTCGTGTGCTTGCCTGACAACAGCTGTTCACCGACAAAAAGATGA +CGGGGAGGGCTTCAACAGAGCTTGTGTTTCCTCGTCAACTCTTTCGAGAA +CTGACAGCACTTTTCATATAACTCATCACTAACTTTCGCTGCTCGTGGCG +GTTTTTTTTAGTTCCCCAGTAATGAGTCAATTTTGTCTGTTGCTCAGAAC +TCTCAATTACCGATGCTATTAGCGGAAGGACTTTTGGAGCGCCGGAGCCA +GAGCAAAATTCATTTTTATTTGAAAAATGAGTGATGAGTTTTTCTCTCTG +TTTTTCCATGTTTTTCTCGTTATTAGGCTGTAACTTTCCACATACACTTG +GTGTCAGAGAGTCCCATCACGGTTTGATCTACAATAAAAGTGTGACGTCA +GCACGTTCTTAACCTTGTGACAATAGTTGAAAAATCTGCGTCTCGTTTCC +CGCATTTTTGTAGATCACACCGTGATGGGACTCTCTAACACCACGTACAC +ATGAAAATTTCGCTGAGAACAAATTTTTGCGCGTCAAATCTGATGTATCC +GGACTGTACCGATATCAATACATCGAATTTGACGCGCAAACTCCGTTTTC +ATGGGGAATTGTTGAAAAAATTGTTTAACTTCCGGAAAATTAACATTTTT +CAGAGAAAATCAAAATTTTTCTGTTTTTCCTTAAATTTCTAGGAAAATTG +TAGCAGATTTTGTTGAAAAAATTCAAATTTCCACTGAAAAAAGTGAAAAA +AGAAGACGATGTGTAAGCCTAAGCCTAAGCCTAAGACTAGGTCTAAGCCT +AAGCCTGAGCCTATGCCTCAGCCTGAGAATAAGCGTGAGCCTAAGCGTGA +GCCTAAGTCTAAACCTAAGCCTAAGCCTAAGCCTGATCCCAAGCCCACAT +GGTGCCAGGCTGACCCATAACGGTTTGATCTACAAAAAATGCGCGAAATT +TTTTGCCCAAAAAATTTGACGTCAGCGCTTTCTTAACCATGCGAAATCAG +TTGAGAACTCTGCGTCTCTTCTCCCGCATTTTTTGTAGGTCTACGTAGAT +CAAGCCTAAATGGGACACTCTGACACCACGTGAAGCCTAATCCTGAGTCT +AAGCCTTATCCAATTTCCGAAAAATCAATAAAAATGCAAATAACATAGAT +ATTCCCGCCTAAAGTTCAATAAATTATTTTTTTTTTGAAACTTTTTTCAA +AACTTGCGAATGCATAGTACCAAAAAACGCCATATTTGACACGCAATTTT +TGGATTGACTTTTTTTTGAATTGATATGTCATGAGCATCACTCAAGTGGT +TCCAAAAAGGCGTCAAGTACTTGAGCCCTCCCTGAGCCACCACCGTCTCC +AGAGAGCAGCCGAAACCAAAAACAAATAGGGGGAAAATGAACAGTTCTCT +CTGTTTCAAAATCTATATATTCCTGTTCTTTGTATTATCTTCCGTGCGGC +TTCAGCCCTCTACTGCTGCTCCAATTCTTTTCCATGTTATTTACCGTACC +GATTAGACGAATGAACAGGACCCCCTTTTACCGTTCAACCGATGAGCCAA +TGTGCTCTTTGTTGGCTAATTTGGGTGTTTTGCTCATTTGGAGGAACACT +AACTTCCCTCTCTAACTACACACTACGTGGAAGAGTTTTTTCAGATTTCT +AGGCCTCGCAACTTTCCTTGGTGGCCCAGAAAAACGCTTAAAACTTCAAG +GTCAAAACGTTCATTTTTGTTGAAAAAAATCGGTTTTTACTTGAATTTTT +GTAAGTGGAAGAGTTTCCTAAATTTCTAGGCCATGTGCTTTTTCCTGCTT +ATCTGAAAATTTTGTTCAAATTTTCTTTTTTCACTTAAATATTCCCCGAA +AATCTTGTCCACAAAAAGAAATTTTTGAACATTCAGATCATTTTTTTAGA +ATGGAAGAGTTTTCCAGATTTCTAGGCCACATGGCTTTTCCTGGTGGCCT +AGAAAATTTTCTGTTGGAAGAATCAGGTTTTTACTTGAATTTTCGTAAGT +GGAAGAGTTTTTTACATTTCTAGGCCACGTGATTTTTTCCTGATGGTCTA +GGAAACTTGTTTTCTCTAATTTTCCCCGATTTTGGATAATATTTGTTTAG +TGGAAGAGTTTTCAGAAATTCTAGGCCACGTGGTTTTTTTTGGAAAAGAA +ATATTTAGATCACATTAACATGTACTTTTATCAGATTTTTTGCCAAAATA +CCAATTTTACAAGCTATTTTTTTGTTATATGTAAATTTTGTGGAAGAGTT +TTTAAAATTTCTAGGCCACCTCTAGATGCACGTGGTGTCAGAGTGTCTCA +TTTCGGTTTGATCTACGTGGATCTACAAAAAATGCGGGAGACGAGACGCA +GAGTTCTCACCTGATTTTACATGGTTAAAAACGTGATGACGTCACATTTT +TTGGGCAAAAAATGCCCGCATTTTTTGTAGATCAAACCGTAATGGGACAG +CCTGACACTACGTGTACGTGCAGATGGCCTAGAAATGCTAAAATCTGTTT +TTTAAATGTAACTTTGAAAGTGAAAGAAACAATTAGAAACAAGTTGGCTG +TCGCCCCCAAAAAGTCTGTTCTGGCTATGTGGTCGCGAAAAGAGAAAACT +CGGCCACCAATTTTTTTCCACGGCCAGATCTATGACGCTATGACGTCACA +TTATCAGCTTGTTTCCGATTTACACACACATACACGTACACTTTCTCCGT +ATCACTTTCACTGATAGCTTGAAACGAGATTAATCATCATTCCAGCTATT +CTTTTTTATTTATTAGGTTAATGCCACTACTACTATTATTTTTTCATATT +AAAAATACAATGAACTATGTAGACATAAATTCAAATGAGATGCCAAAAAG +TGAGTTCGAACTTGTGGAAAATGTGTGTTTTTTAATAAATTTTTCAAAAA +TATAGTAAATTTCTGAGAAATTTTGATTTGAATTCCCGCCAAGCATTTTT +TTGAAAATTTTGAATTTCCGCCAACCATTTTTCCTTTTTTTTCAGAAAAT +GTGAAAAAATTTTCTAAGCAAATTTGAATTCCCGCCAAGATTTTTTTTGA +AAAAATTTCGAACTCGCGCTAAAGTATTTTCTAATAGCAGGATTATTGGA +ATCCCCGCCAAAACATATTCTCAGAAAATTTGAATTTCCGCCAAAACTTT +TTCTGATAAAATTTAAATTCCCGCCAACCATTTTTTCCTGAGAAAATTGG +AAGAACATATTTTCAGTGAGAAAATTTGAAAATATTTTTTTCAGAAAGTT +TGAATTCCTGCCAAAACATTTTCTAAGAAAATTTGAATTCCCGGCAAAAC +TTTTTCTGATAATTTTTTGAAATTCCCCGCAAAAATTATAGTAAATGAAG +ATGATTTGAGCAAAGTTATGACGTGGCCGCGGAAAAAATTGATGGCCTAG +TTTTCTCTTTTCGCGGCCACGTAGCCAGAACAGACTTTTGGGACCGTACA +ACCAACAAATATTTCTTCCCCTTTCAAAAATACATAAAAACCAGATTTTG +ACATTTCTAGGCCATCTAGAGGTGCCTCGTGGCCTAGAAATTTGAAAAAC +TCTTCCAGAAAATTTGTTCTGTATTTCTTGAAAGTGCTCCAACAAATCTG +TGCAAATTTTAAAAACGTTTCAAAATATTTATTGAATAAATTCTTTTGTT +GCACAAAGAGGTATCAAATCAGACCACCAGATTCGACCGTTGATCATTGT +TTCAGTTGATAGGTAGGCAGGCGTGGTTGCCTGAAACCTGCCGGCCTCAC +GCTGGGCAAGAGGCAGGCCGCCTTAATGTCAGGCAGGCAATGCCTACATG +CAAGCCCTACTTCTACGATTATCTCGAAATTTAGCGAGTTTTTATTTGTC +CAAAAAAAATCGCAACTGAACAACATGAAAAAGCCGCCAAAAACTTTCAT +AAAATTCTGTCCGTAGTTTGCCGTTTCTTGCGTCTCATGCTTAATCCCTA +CTGTAATTATGCTGAGATTTTCGATCTTTTTCACTTTTTGAAAATTTTTG +GCAATTCTTTTTAAGTTTGTTGAGATTGTAACTTATGACAAACAGTAACT +AAAAAGCGTGGCCGCCAAATTTTCTAGGCCATCAATATCAGTTGCGTCAT +TTCTTCACCATACTGGACCGAGAAATATTCTAGGACATGTGTTTGCGTCA +TTCCTCTTCCATTCCTAGTCCCAAAAATTCCAAGACCACTGGTAAGTTTG +GCCGAGAAATATTCTAGGCCATCGATGTTAATGACGCCATTTCATACCCG +AATTGGCCACTAAAATTCTTAGGTCATCAGTGACGTCACACCTCTTCCAA +GCCTTGGAATGAGCATTTCTAGGCCCTCTAATCTAGGCTTGGCCGTCAAA +TTTCCTAGGCCAGGTGACGCCATCCCTCTTCCATGCTCGACCACTAGGTC +AAATGACGTCACTTCTCTTCCACCTACCGCACTTTGATTTCCTAGGCCAC +CATGTGACGTCACTTCTCTTCCATCTAGGTTTCCCATGCCCTTCTCCTCT +CACGTGGTATCAGAGTGTCTCATTTTGGCTTGAATACGTTGATCTACAAA +AAATGCGGGAGAAGAGACGCAGAGTTCTCAACTGATTTCGCTTGGTTACG +CACGTGCTGCCGTCACATGTTTTTGGGCAAAAAATTCCCGCATATTTTGT +AGATCATACCGTAATGGGGTGTCCTCTTGTTAAATATCGAAATTTCACTC +AAAAATTTTGAACGTTGAGCTTTTTGTCTCTGGTCAATTGGTCAGCCGCG +CAGTGTTCCGGTGTTTGTGCTCATGTTTTCGTGTGCACATTTGTGCTGCT +GCTAAGGCGTATTTTGTTCCGGGCGGGGACAGGGGGGAGAAGCAAAAATG +TGTCAAATTTTGTTTGTTTTGCCATTGATATTGTCCTTCTGCTCAGAGGA +TTTCCTCATTTTACGCCGTATGTGCAAAAGCATAATCACCACAATAATTT +TGAATATAAGTTTGCAAACGCGCCCTATTGATAATCCTCGCCCGTGTAGT +CTGCGTGCACGTGGTGTCAAAGTGTCCCATTTTGGTTTGATCTCCGAGAA +ATGCGGGAGAAGAGGTGCAGACTTTTCAACTGATTTTGCATGGTTAAGAG +TGTGCTGGCGTCACGTTTTTCTGGGGGCAAAAATTCCCGCATTTTTTGTA +GATCAAATTGCAAAGCGCCCTATTGATAATCTTAGCCCGTGTAGTCTCTT +GTTTAATTTTCAATTTTTCTGGTTCGGCACCTAAACTGTAACTACGACAC +TCGGCAAAGTTAGACTGTTAAACTCCGCCCACTTAATGGCACCTTTTTTG +CAGAAATGTACCCAACCGACAAAGAGCAGACACGGACCACATTTGAGACG +AGAAGTCGGCGGAAACGGCTGGCGCAGGTCGCCGCCACAATTGCTAGCGT +GATATTACTCTTGTTGGCAGGTAAGTAAAAAACTACGACACTCCGAGACC +AAGAAAAAAATCGATAATCGAAATATTGAAAATTGCAAAATTTATTTCGC +CGGATCATTTTCCATATCGATTTTCTGAATGGTATGGTGATAGCAATCCT +CATCGTAGGCACATCCCTGGTGAACCACGTACTCTTGACCTGAAAAATAG +GTTTCGTTCGGTGGAGCACATTTGCATACACTGACCTTTAATCGGGTGGC +GACACGCTTTACACTTGAAACATGCCATATGCCAGTGGACATTGAACGCC +GACACGCATCTCTCAAAAGTCGGCAATTTGCACGCGTGGCATTTTGGTGA +CTTGGCCGCCCGGTGACAGTCCGCGCAACGGAGCGCGTTACCGTACGGCG +CGGGCATTGGATAACACGGAGTCATGCCAATGACTGTCTGGAATTTTGGA +GAAATTTATTGATTTTTTGACGGAATTTAATGAAATAAGGAAAAATCAAT +AATCGACATATTTATTTCGCAAAAAAAAAAAAACATTATTGATTTTCACG +GATTTCTGTCTTCCTCATAAATTGAAATGGAAGAGTTTGCCAAACTAGGC +CATTTTGGCTCGGCCATATCTGGGGTAGATTTACGGTGCGTTGCGTGTCG +CGTCGCGGCTCGGTTTTTGTAGTAAAACTAAATGTATTTGTCCGTGCATG +GAATTCTGGCTTCCCTCATAAATCGAAATGGCAGAGTTTGCCGAACTAGG +CCATTTTGGGTCGGAGAGATTTTGTGTAGATTTACGGCGCGTTGCGTGTC +GCGTCGCGGCTCGTTTTCAATTGTAAAACTGCTGTATTTGTCCGTGTGGC +GTACACAACACTTTTCCACGCGCTGTCCGGCAGGCAATTGTCAGTGGAGC +GCAAAAATTTAATTAGGAAGGCCAGAACCCCGTGTCCGTGTGGAGTGCAC +GACTTTCCCGCGCGTTGTCCGGCGGGCGATTGTCAATGGAGCGCGAAAAA +TTCAATGAGGAATGCTTTTTGCAAAAACAAATTACATAGGCTTCTGGCCT +TCCTTATTGCATTTTTCGCGCTCCATTGACAATCGCCTTAAAGCAGGGGA +GTGTGTGTGTGTGTACTCCACACGGACAACTTAAAATGAAAACCGAGCCG +CGACGCGACACGCAACGCGCCGTAAATCTACTCCGGCCGTGGCCGAGTCA +AAATGGCTTAATTCGGCAAATTCTTACATTTCAAAATATGAGGGAAGCCA +GAAACGTGTGTCACATGTTATAAAATTAATTTTGAAAAAAAATTATTAAA +TTTCGCAAAAATAAATTATCGCTTTTTGTAAGAATTTATCGAATGAATTG +ATTAAAAATTTATCGAATTTTAGCGAATCAGAAAATATCGATTTTCACTA +AGAATTTATCGAATTTTTCGATTTTCGCTAAAATTATCGATTATTGCAAT +GTTTTACCTGACAAACCGAGCATTTCAGGCATGAAATATGATACATCTCG +TCTTGAATTTTGATCACTTTTCCCTGCAAAAATCGATATTTTCAATTGAA +TATATTTCCAAAAATGTGCTCACAGTTTGAGCTTCTTGACGAGTCAGCGT +TGTTGTGCAGCCTTTGCAATGGATATTATATGGAAGAACAATCTTTTGGA +GACGTTTATTGTTCGACGAGAACAACATTTTTCGAATTTTTGAAATGATT +AATTTAAAAATTTAAAAAGGAGAAAATTGAAGAGAAACGTGGGAAAACGT +ATTGATTTTTCAAAATTTTATTGATTTTATAATCACGGGGTTCTGGTCTT +CCTCATTGAACTTTTCGCGCTCCATTGACAATCGCCTTCCGGACAACACG +TGGGAAAGTCGTGCACTCCACACGGATAAATACATTTAGTTTTACAACTA +AAATCGAGCCGCGACGCGACACGCAACGCGCCGTAAATTTACCCCAGATA +TGGCCGAGCCAAAATGGCCTAGTTCGGCAAACTCTTCCATATGAGGGAAG +CCATAAATCCGTGTATAATCAAACGCGCAATTTAAAAAAATCGGTAGTTG +AAATTTATCGATTTTCCCTCGATCTTATCGATTTTTTAAAAGACTTATCG +ATTTTTATCCAAATTTATTATTTGAAAAATCAACGTTTATTGATTTTTGT +CATGATTTATCGATTTTCTCAAGAGATTATCGATTTTTGCTGTCGATGCA +CCATGTGCAAACTTCGCCACGTTTTTTCGCGGCGAGACCCAAATTTTTTG +AATTTTCAGTTCTCGGTCTGGTATACGGTAGCTTCTACCTCTACCGTCGA +TATGTGACAGATGCAAATGATAAACGGGATAATGATGAATATTTGCGAAA +ACTCGTTCGTCAAGTCAATGACTCGCCGGAGACCACGTGGAAAGCCAAAT +TCAATAAGTTTGGTGTCAAAAATCGATCTTACGGATTCAAATATACGAGA +AATCAGACGGCGGTCGAGGAATACGTGGAACAAATTCGAAAATTTTTCGA +GTCTGATGCGATGAAGCGGCATTTGGAGTGAGTTAACTCAAATTTGAGCG +GGAAACAACGATGCTCCGCGCTCTCGCCGCATGTAGTTTAGCGCGTAAGC +TGTGATGCATACGATAGTCCTGCAATGCATCATATTTTACACGCAAATGC +AGAAATTACCAAATTTCGAGAAAAATCTATATTTTCCTTTTAAAAATTAT +CGATTTTTTAAACAAAACTTCTGAATAATTGGATTTTTCCAGAAGTTTTA +AAAAGTTTTGGGTACAATTTTAAAAAGACAGCAAAATGTTCGTAAAACAC +CAGGAAAGAATTTTGCGCTTAATATCTGATGCTTCGTAAACTCCACGGGC +TTCCGGCCTTAAAGCCGAGCTACGCGCCGTAAATCTACCCCGGCCGCAAC +GCGCCGTAAATCTACCCCGGCCGTGGCCGAGCTCAAGTGGCCTAGTTCGG +CAAAATCTTCCATTTCAAAATATGAGTGAAGCCAGAGGCGCGTGAATGAT +GACTTCCACACGGTTCGGTAGAGCGCATTTACACTTATGTACCCTTTGCC +GGGCTGCGTTCAAAACCTAAAAATTTGAAATTTTGCGCGTAAGATCTGGT +GCATTGGGTTCTGAATTATTTCGACAAAACATGGTGCATCATCCCAAAAA +TTGCAATTTTGCAGTGAACTTGAGAACTTTAACAGCTCCGATGTTCCGAA +AAACTTCGACGCTCGTCAGAAATGGCCGAATTGCCCGTCAATCTCGAATG +TTCCAAATCAGGGAGGATGCGGGAGTTGCTTCGCGGTCGCCGCCGCCGGT +GTCGCCTCCGACCGTGCCTGTATTCACTCGAATGGAACTTTTAAGTCACT +TTTGTCCGAGGAGGATATTATTGGATGCTGCTCGGTGTGCGGAAATTGTT +ACGGCGGAGATCCACTGAAGGCCTTGACCTATTGGGTTAATCAGGGACTT +GTTACAGGTAAGCGGTGACGTCATAGGTGGCCTAGGAACCTTTGATCTAC +AATGTGATTGAATTTTATATCACGCAATTTGTAATAATTTCTAAAAAAAA +TTATCGATTTTTCTGAAAATTAACTTGTTCCAAATTTTGAACGAATTTTC +TAGAAGATTTTTGCAATTTATCGATTTTTGCAAGCTATCGATTTTTCCCG +CAAACTATCGATTTTTCTAGAAAATTATCGATTTTTCTAGAAAATTATTG +ATTTTTCTAGAAAATTAACGATTTTTCTAGAAAATTTTCGATTTTTCTAA +AAAAAATTATCGACTTTTTTCTCAAAAGTATCGATTTTTGCAATTTATCG +ATTTTTTGAAATTTATCGATTTTCCCTGCAAATTATCGACTTTTTTGAAA +ATAAGCTTTCCCCAAAAATTTGAACGAATTTTCTAGAAATTATCAACTTT +TGCAATTTATCGATTTTTTTTTTGCAATTTATCGATTTTTTGCAAATTAT +CGATTTTTTGCAAATTATCGATTTTTCCAAACAATAACGATTCTTCTAAG +CCCGTGGAAGAGTTTTTCGAAAACCTAGGCCATCTCTATTTCCAGGTGGC +CGTGACGGTTGCCGTCCCTACTCGTTTGACCTGTCGTGTGGAGTCCCGTG +CTCGCCGGCCACGTTCTTTGAGGCCGAGGAGAAGCGAACATGTATGAAGA +GGTGTCAGAATATCTATTACCAGCAGAAATATGAGGAGGATAAACATTTT +GGTTAGAGATATTTTTTTTTAGAAAAATAGAGAAAAATTTCTGGGATCTA +TTCAAAAATGGCAAACTTTTTGGAAATTATCGATTTTTTCCGGTAAATAA +TTATCAATTTTTTTCTCGGAAAATATCGATTTTTTCGGAAATTGACAATT +TTTTCGAAAATTATCGATTTTTTCCGTTAATGATCGATTTATTCGGAAAT +TATTGATTTTTCCGATAATTATCGATTTTTCGGATAATTATCGATTTTTT +CCGATAATTATCGATTTTCCGGTAATTATCGATTTTTTCCGATAATTATC +GATTTTTCGGATAATTATCGATTTTTTCCGATAATTATCGATTTTCCCGA +TAATTATCGATTTTTCCGGTAATTATCGATTTTTTCCGATAATTATCGAT +TTTTCCGATAATAATCGACTTTTCCGATAGTTATCGATTTTTCCGATAAT +TATCGATTTTTTTTCGATAATTATCGACTTTTCGGGTAATTATCGATTTT +CCCGATAATTATCGATTTTTCCGGTAATTATCGATTTTTTCCGATAATTA +TCGATTTTTCCGATAATAATCGACTTTTCCGATAGTTATCGATTTTTCCG +ATAATTATCGATTTTTTTCGATAATTATCGACTTTTCGGGTAATTATCGA +TTTTTTTCCGGAAATCATGAATTTTTCCAATAATTATCAATTTTTTCCGA +TAATTATCAATTTTTCCGATAATTATCGATTTTTCGGATAATTATCGATT +TTTTCCGATAATTATCGATTTTCCCGATAATTATCGATTTTTCCGGTAAT +TATCGATTTTTTCCGATAATTATCGATTTTTCCGATAATAATCGACTTTT +CCGATAGTTATCGATTTTTCCGATAATTATCGATTTTTTTTCGATAATTA +TCGACTTTTCGGGTAATTATCGATTTTCCCGATAATTATCGATTTTTCCG +GTAATTATCGATTTTTTCCGATAATTATCGATTTTTCCGATAATAATCGA +CTTTTCCGATAGTTATCGATTTTTCCGATAATTATCGATTTTTTTCGATA +ATTATCGACTTTTCGGGTAATTATCGATTTTTTCCGATAATTATCGATTT +TTCCGATAATAATCGACTTTTCCGATAGTTATCGATTTTTCCGATAATTA +TCGATTTTTTTTCGATAATTATCGACTTTTCGGGTAATTATCGATTTTCC +CGATAATTATCGATTTTTCCGGTAATTATCGATTTTTTCCGATAATTATC +GATTTTTCCGATAATAATCGACTTTTCCGATAGTTATCGATTTTTCCGAT +AATTATCGATTTTTTTCGATAATTATCGACTTTTCGGGTAATTATCGATT +TTTTTCCGGAAATCATGAATTTTTCCAATAATTATCAATTTTTTCCGATA +ATTATCAATTTTTCCGATAATTATCGATTTTTCGGATAATTATCGATTTT +TTCCGATAATTATCGATTTTCCCGATAATTATCGATTTTTCCGGTAATTA +TCGATTTTTTCCGATAATTATCGATTTTTCCGATAATAATCGACTTTTCC +GATAGTTATCGATTTTTCCGATAATTATCGATTTTTTTTCGATAATTATC +GACTTTTCGGGTAATTATCGATTTTCCCGATAATTATCGATTTTTCCGGT +AATTATCGATTTTTTCCGATAATTATCGATTTTTCCGATAATAATCGACT +TTTCCGATAGTTATCGATTTTTCCGATAATTATCGATTTTTTTCGATAAT +TATCGACTTTTCGGGTAATTATCGATTTTTTTCCGGAAATCATGAATTTT +TCCAATAATTATCAATTTTTTCCGATAATTATCAATTTTTCCGATAATTA +TCGTTTTTCCCGATAATTATCGATTTTTCCGATAATTATCGATTTTTCGG +ATAATTATCGATTTTTCCGATAATTATCGATTTTTCCGATAATTATCGAT +TTTTCCGATATTTTTTTTCGGAAATGGCTGCCCCATCGCGGTTTGATCTA +CAAAAAATGCGGGAATTTTTAGCCCAAAAACATGTGACGTCAGCACGTTC +TTAACCATACGAAATCAGTTGAGAACTCTGCCGCATTTTTTGTAGATCTA +CGTAGATCAAGCCGCAATGAGACACTCTGGCACCAAGTGTCTGTAATTGA +CTTCACGGAAGAAAATATATTTTTATTTAATGCGCAAAACTAAAATTCAA +AATTTCGTTTCGACCAATACACCATATTTTAAGCGCAAATTTCAATTCAA +ATTTTTGCAGCAACCTTTGCCTATTCAATGTATCCCCGCTCAATGACAGT +ATCCCCCGATGGAAAGGAACGAGTAAAGGTGCCAACAATCATCGGTCACT +TCAACGATAAGAAAACCGAGAAGCTGAACGTGACCGAGTACCGTGACATC +ATCAAAAAAGAAATTCTACTTTATGGTCCCACCACGATGGCGTTCCCAGT +TCCCGAAGAATTCCTTCATTATTCGAGCGGAGTGTTCCGCCCGTATCCAA +CTGATGGTTTCGACGATCGAATTGTGTATTGGCACGTGGTTCGATTGATC +GGATGGGGAGAATCAGATGATGGAACACATTATTGGTTGGCTGTTAATAG +TTTTGGTAACCATTGGGGTGATAATGGTGAGTTTTCGTGCAAATTGCGCA +CGGCCCGGCAAATGGTACATCCATACAAATGCGCTCCACTGATAATTTGA +GTTTAGCCAGGTTTGGGCGCGTTTTAAGGAAAACGCCTTGGTCCAAAAAA +TTTAGAATTTGATTTCGGACATTTTTTATATGCATCACAAAAATGTTGGA +CGAGCCGTTTTTGAGAAAAAGCCCAGGTATACGGTAAACAAATTGCGTAC +AGGTGCCACTTCCCGGGCCGTGATGCGCGTTAAATATAGTGCTTTTTAAG +AATGCGTGTTGCGCAACATATTTGACGCGCAAAATATCTCGTAGCGAAAA +CTACAGTAATTTTTCAAATGACTACTGCAGCGTTTATGTCGATTTACGGG +CTCGAAAAAAAAGTTACAGCGATATTCCATTTTTTCTCATTTTTTGCTTA +TTTTTATTTAGTATTCTCACGGTGTGTCATTCTGTCCCATAACGGTTTGA +TCTACAAAAAATGCGGGAATTTTTCGCCCAGAAAAATGTGACGTCAGCAC +GCCCCTAACCATACGAAATTTGTTGAGAAATCTGCGTATAAATTCCCGCA +TTTTTTGTAGATCACGCCGAAGTGAGACACTTTGACACCACGTGTATTCT +ATCTAAAAATAAATTATTTCAAAAATAGAGCCCGAAAAAAAAAATCGACA +AAAGCGCTACAGTAGTCATTTAAAGAATTACTGTAGTTTTCGCTACGAGA +TATATTGATATTTTGCGCGTCAAATATGTTGCGCAGTACGCATTCTTAGA +ATTTTGTGTTGCCGTAAATAAATTTGGACGTCAAATACAGTGCATATTGC +GGTCGATCGGCTACAATATATTTTGCGAGCAAATTTGGAAGTTTTGCGCG +TCAAATATGATGTAGCCGATCGGTCATGTTACAAAACACCATATTTGACG +CGCAAAACTTTAATTTTGGACAAAAAGTCAGAGAATTAGTATTGCGTAAA +AAACTGGCAATTTTTTTCAGAACATTTCTGAAATTTTCAATATATTTTCA +AACTTTTCGCGTCAAATATGGTGCATCAAGTCGCGAACCAAATACACTCT +AACACGTAGTGCCAGGCTGTCCCATTATAGTTTGACCTACAAAAACGCGG +GAATTTTTCGCCCAAAAAAATGTGACGTCAGCACGTTCTTAACCATGCAA +AATCAGTTGAGAACTCTGCGTCTCCTCTTCTCCCGCATTTTTTGTAGATC +AACGTGATTCAGGCCGAAATGTGACACTCTGACACCACGTGGCCCTAATT +AACTCGTAAAAATCACCGTTTTTTTTCCAGGACTCTTCAAAATCAACACA +GATGACATGGAAAAATATGGTCTAGAGTATGAAACAGCAGTCGTCTAATT +ATAATTCGGCAAAAATTTTTTGTAAATAGTTAAATATTGTCCACCTATTT +CATCCCCACCCGATTGCATCTCAACTTGTCCCATTTCCGAGAAAAATCAA +TTATTTATTTTTATTCGGCCATCATTTATTAGCTCCCTCATTCCATAGTT +TTTTTTTAATTAGATTTTTTTTGGATGCTAAAATTTGCCATTCTCATAGA +CATTTTCAATATTTGTGTAAAATAAATCATTTTTCTGGTCTTCAATTGAT +GGAAAAAAACAAAAAAACAATGCGCGTCAGGTTTGGTGTATTGTGTGCAA +ACACCGAGCGCATATTTTGAAATTTATTTTTTTTTCGAATTTATTAAAGT +TGCTGATTTTCTTTAATTTCAGGCTTAAAAATGTCGGATTTTGTGCCAGA +AACCGAGGAAAATGAAGAAGAATCTCAAAAAATCGATGAAATCCATGAAA +TCCCAGCTCAAGCAATTATCATCGACCAAAGTGAGGAAAATGCTGACTTT +ATCAATTTTCAATTACTTTTTGCATTTTTTCTAGGTTTTTCTCGACGATT +TCTCAACAATAAATGGTACGGAAAGTACGGCGAAGATGGGGATCTTCTGG +AGCACATCGACGAGGACTGGGAGCCCGTAGGCCTCGACGAAGTGGAATTT +CTGAGCCAACTGTGGTTTGAGCAGGAGGATCAGGAGAAAAAAGCTCATAG +GCACTATGATTGGGATGAAGAAAAGAAGGAATGGGTGCCGAAGGCCAAGC +AGGAGGAGGTCAATGAGGATTTTATCGCAGAATATCAAGCGAATTACGGG +GTTCAGTATGATGATATTTATAAAAAAATGGACGAAGAATTGCAAGAAAA +AGCGGCAAAAGCTCAAAAAGAAGATGAAGAGAAGAAGGAGAAGAAGCGGA +AGAAAAAAGTTGGCTTGGGAGCCGGGGAAGACGCTAAAGAGGGCTGGCTT +GACCTAGGAGATAAGGTTCATGCGGTTTATGTGTCAAATCTTCCGGAAGA +TATTACAGACGAGGAATTCCAGAAATTTATGTCAAAATGTGGAGTTATTC +AGCCGGATATTCGAACGAATAAGCCGAAATGCAAGCTTTACCGGGAGGAA +AACGGCAAACTCAAAGGCGACGGAAGATGTTGTTATATTAAGAAGGAATC +TGTCGAGCTTGCCTGTAACATTCTCGATGGAGCGAACTTAAACGGGCGGG +AAGTGAAGGTTGAGGAAGCTCGATTTGAGATGAAGGGCGATTTTGATCCG +GCGAGGAAGCGGCGGAAATTGACTGCGGCGCAGAAAAAACGATATATGGA +GCAGCAGAATAAGTAATAAAAAATATTTGGTGAAATTTGAAATAGCTCGT +AAATCGACATTAAGCTGAAAAAATGAAAAAAAACCAAAAAAAATCGTATT +ACGGGAACACAAAATCCTGAGAATGCGTACTGCACAACATTTTTTTTTTT +TTTGACGCGCAAAATATCTCGTAGCGAAAACTAAGACTACTGTAGCACTT +GTGTCGATTTACGGGCTGCTCGATTTTCGAAATGAATTAAAAGAATTTAT +TTATGTGATATTGAAATTGGGCAAAAAATAGGAAAATAATACAAAAAAAT +CTAAAAAATTTATTTCAATAATCGAGCCCCCGTAAATCGACACAAGCGCT +ACAGTAGTCATTTAAAGAGTAGTTTTCGCTACGAGATATTTTGCGCGTCA +AATATGTTGTGCAATACGCATTCTCAGAATATTGTGTTCCCATAATATAT +CGACGAAAAAATTGGCCCAAATCGTGGATTTTTGTCCGAATTTGATGACA +AATGGCTTTGAAAATCACAAAATTTCAGAGAAAAATTTCGATTTTTGGTT +TAAAAAAACAAAAATTGAAACCCAACAAAATTGTTAACTTGAAAACCTGC +TGAAAATCTTAGAATTTTCAGATTTCACCCAAAAAATACCAGATTTAACT +CGAAAATTGCGAATTTTAGGCCAAATTCCTTGCATTTTTCATAGAAAAGA +TAAACTAACGACACAAAATCAAATGAAATTGACTAAAAAACTATTGAATT +TTGTAGAAAAATCCCGATTTTTGCCTAAATTTGCAAAAAAAATATTTTAT +TTTAGCATTTATTTTTCTTAATTTTCACACTAAAAATTACGAAAAAAGAC +AAAAAAAATTTTTTTCATTCAACAATTTTTTGGGCTAAAATGTGTAATTT +TTGCGGGAAGAATTCGAAAAATTATTAAAAATCGTAGAGACTAACTGAAA +ATCAATTTTTTAGGGACCAAAAAGCCAACTTTTGCCGATTTTACTCAAAT +TTTTCCGTGAAAAATCCGGAATTTTTAAATATTATTTAATTGCGAAATTT +TACAATTTTTACACCTAAAATAAGGGTAATATGGTAGCTGGTCTCGACAC +GACAATTTTTTTGTTAAATTCTAAAAAAGGTGTGCGCCTTTAAAGAGTAC +TGTAATTTCAAACTTTATGGAATTTTTTTTTGATTTTTCAAATAAACTAC +GAAAAATCGAAGAAAATTCCACAAAGTTTGAAATTACAGTACTATTTAAA +GGCGCATACCTTCTAGAATTTTACAAAAAATTTGTCGTGTCGTGACCGGC +GACCGTATTACCCTAATTTTTGGTGTAAAAATCGTAAACTGTCCGGATTT +TTCACTACGAAATTCAAATTCGAGGAAAATCGAAAAAAATTGGTTGTTTT +GGTTCCTAAAAAATATATTTTCAGTTATTTTCTACCATTTTTTAATAATT +TCTCGAATTTTTCCCTCAAAAATTACAAATTTCAACCAAAAATTCTGATT +TTTCCTTTCCAGAATCTTCGAATGGACGCCCGATAAGCCACGTAACTACC +GTCCAAAGTCGGACTGTACGGTTATTGTCAAAAATCTGTTCACTCAGGAA +ATGATGAACAAGAATGCGGCGTTGATGTTGGATTTGAAAGAGGAAATGAC +ACAAAGTTGTCAGAAATATGGAATTGTTAAGAAAGTCGTCGTTTATGCGG +TTGGTATTGAGGAAATTTGTGGAAATTTCGATTTTTGTAAAAAAAATTTA +TTCTAAAAAAATAAATTAAAAAAAAAAGATTTAAAAAATTTAAAGCTAAT +TTTCGAAAAATTGAAGCTAATTTTGGAAAAAAAACCGAAAATTTTTTTTT +TTAGAAATGTCGATATTCATAAAAAAATATTCTAAAAACGGGATCAAAAA +TTTTTGAAGCTATTTGTCGGAAAAATTTTTAAAAATGTAAAGCTAATTTT +CGAAAAAAAACCCAAAATTATTGGATTTAACAAATTTTCAAAAAAATTGT +TCTAAAAAAGATAACCTTTAACATTAAAAAAATTATGGCTATTTTTCGAA +AAATTAAAAAAAAACAATTTTTAGCTAATTTTCGAAAAAAAACCGAAAAT +TTGTGGAAATTTCGATTTTCAAAAAAAATTTATTCTAAAAAAAAATAAAT +TTTTAACATTAAAAAATGGCTATTTTTCGAAAAAACCGAAAATTTTTGGA +AATTTCGATTTTCGAAAAAAAAGTATTTTAAAAAAACAATGAGAAATGGT +TATAAATTGTTCTTTCAAAAATTTTTTACAGCTTTTTTTTTCGAGAAATT +TAAAAAAAAAACAATTTTTAGCTAATTTTAAAAAAAGTAAAGCTAATTTT +CAAAAAGAACCCCAAAATTGATGCATTTATCGATTTTTAAAATAAATTGT +TCAAAAAAAGATAAATTTAAAAAAAAAGTTATTGGATTTTTTAAAGTTAT +TGGATTTTTCAATTTTCAAAAAAAAATCATCAAAATGTATAAATAAATAA +AAACTATGGATTTCGAAAAATTAAAAAAAACAATTTTTAGCTAATTTACG +GAATTAAAACAACAAAAAATTTCGGAAAATCGAAAATTGTTGGTTTTTTT +TTTCGATTAAAAAAATTCAAAATCTTCTGAAAGACATTTTTAAAATTTCC +AAAAAAAAATTATGAGTATTTTTCAAAAAACAAATCGAATTTTGATTAAC +TTCCGATTTTTTTGACATTTTTTGAAATTTGAAAATCATTAATTCAAAAA +TTTTTATTGAAATAATTCAAAAATTTTTATTGAAAAAAAACGAAAATTTT +TGATTTTCGCTTGAATATTTCGGAAAAAACAATCGATTAAAAAAAAAATA +GGCTTCTAGTTATTTTTCAAGAATGCTTTTTTTTTGACAAACTGCTGATT +TTTTGTCAGTTTCGTCGATTTTCGTTGTAAAAAATCAAAAATTCAAAATT +TCTTCTATTACGAGTGCACAAAATGCTGAGAATGCGTATTGCACAACATA +TTTGACGCGCAAAATATCTCGTAGTGAAAACTACAGTAATTCTTTAAGTG +TCTACTGTAGCGTTTTCGGAAAAATTCCACTTTTTTTATTTAAAAAAAAA +AACGAAATATTTTGATTTTTTTTTATTAAAAAAAACGAAAAATTTTGATT +TTTTATTAAAAAAAACGAAATATTTTGATTTTTTATTTAAAAATGAAAAA +TTTTGATTTTCCAAAAAAAAAGGCTCCTAATTAAAAAAAAAAAATTTTTT +TGAAAAAGTGCAGATTTTTGTCTTAAAATTTAAATCAAAAATTTTTAAAT +AGACTTTTTCCATCAAATTTCCCCAATTTTCAGAACCACCCGGACGGCGT +CGTATCGGTAACCTTTCCAACGACAGAAGAATCGGATATGGCTGTAAAAT +ATCTTCACGGACGAGTAGTTGACGGTCGGAAATTGACAGCCGAGCTCTGG +GATGGCCGGACAAAGTTTAAGGTCGAGGAGACGGAAGAGGATGAGGAGAA +GCGTCGGAAAGAGTACGAGAAGTATATTGAGGGTGGAAGTAGTGAGACGA +AGGAGGAGAGCGACGACGATGATGACGATGAAGCTGAAGATAATTGATTG +ATCTAAATTTTTTCTGAAAAATTATTTTTAGTTTATACATTTTTCCGAGA +GGAGCAACCGATCAATTTTTACCAAAACTATGTTCAAAATCGCCCAAAAA +TATCAAATTTAGTGAATTTTCGTTTTCATGGATTTTTCGAAAATTAGCTT +TAAAAATGTTTGCCAAATTTTTCGACGATATCGCGATATTTTAAAAATTT +CGAACAAAAATTGCCTAATTTCAATTAAAATCCCTTTTTTCGAAATTTTC +TAATTTTCTGGGCCAATTGGGTCGTTTAAAAAGAAAAATGGCCAAAATTT +GAGAGTTTTAAAAAAATTCTAAAATTTCAATCCTACGATTTTCCGGGAAA +AAAATTTTCTTGGAAAATTGCTTTTTTTTTGAAAAATTATTTTTAGTTTA +TACATTTTTTTTCCGAAAATAGGAACAACCTATCAATTGAAATTTTCAAA +AAAAAATTAATTAAATTTGGAAGCATTTTCCGAAAAATTGCATGTTTTCG +GTTTTCTAGAAAAAAAAAACTAAAAGAACGAAAAATTTCACCAAAAATCA +AATATGAAAATACTTTCCTAAAAATCTGAAAAAACCAGAAATTTTCAATT +TTCCCTATTCAAAAAAATTCAAAAAAAAAACCAAATTTCAGTTAAAATCC +CTTTTTTCGAAAATTTTAAAATTTTCTGCGCCATTGTGTCGTTGATTTGA +AATTTTTTTCTAAAAATCCGTGAATTTTTGAAATTAAACAAAAAATTTGA +AAAATCAAAAATTATCGATTTTTCAACGAAAAAAATTCAAAAATTTCAAT +CTTACGATTTTCCTAACTAAAAAAAAATTTGAAGTAACGAAAATGTTCAA +TTTTTTCGAAAATTTTCGAGGTCATTTAGAGTTTTTTCTGTGAAAATTGC +ATTTTATATACTCATTTAAATTAATTTTTATATATTCCTTTCCCGAAAAC +TTCAAAAAAAAAACCCAGTGATACAATTAAAATCGAGATAATCACAGACA +AAGTAGGCAGTAGGCAGGCAAAAAACAGGCACATATTAAGGTAATACAAA +ATTCTGATAATGCGTACATCGCAACACATTTGACGCGCAAAATATCTCGT +AGCGAAAACTACAGTAATTCGTTAAATGGATACTGTAGCGCGCTAGTGTC +GATTTACGGGAATCTTAAAGAACCTAAAGAATTGTAGGAAAAATGGAGAT +CCCGTAAATCGACACTAGCACGCTACAGTATCCATTTAAAGAATTACTGT +AGTTTTCGCTACGAGATATTTTGCGCGTCAAATATGTTGTTCAATACGCA +ATTTTAAAATTTCGTGGTCTCATAATAATACACAATTAACTAGTAAATCT +ACAAAATCTACAAAATTCTCTTCATCAACTCTTCCACAGCTCGATCGACC +TGTGCATGAGTCTCCTCGTACGCCCACAGAGAGCACAACGCCACGTTGCT +ATTGTCGAGACGTTCCTTATTCGAAATCTGTGACGACATGCGATCCTTGG +CCTGATCTTCAGTCAGGTTATCCCTCGCAACGACTCGACGGACGGCTTCA +TCTGCTGGCACGAAGACGGTCCACGTTTCGGCGAGCGTTTTGTGCCATCC +GGCTTCGATTAGTGCGGCCGCTTCGATGACTGAATGAATTTTTTTTTCAA +CTTTTGCAGAAAAAATTATGATTTTGCAAAATTAACGGGGAAAATTGGCT +GTTTAAAAAATATTTATTATTTTCGCGTTTTTTTAAAATAAAAATCGATG +AAAATTCTGCAAAGTTTGAAGCTTCAGTACCCTTCAAAGGTGCACACGTT +TTAGAATTTAGCAAAAAATTGGCGTGTTCACGATTTTTTGGTTAAATTCA +AGAAAGTGTGCGCCTTTAAGGAGTACTGTAATTTCAAACTTGCTGAATTT +TCATCGATTTTTCACTTTTTTTCCACGATTAGTAAAATTTGTGTTATTGT +AGATTTTTTAAATTTAATACCAAATTTTCCAAAAAATCGAAAAAAAAAAG +AAAAAATGTAATTTCGTTATTTTCAAAAAAATAAATTGAAAAATTGAAAA +AAAATCCAAATATTCGATTATATTTTTTCAGAAAATAAAAATGAAAATTG +GAAAAAAACATATTTTTGGGTTTTTCGATTTTCCAAAAAATTTAAACATT +TAAAAAATCTAAATTTTCGATTTTTTTCAAAAAAAAAAAATTAAAAATCG +CACAAAAACCAAAAAAAAACATTTTTGGCCGATTTTCCAAAAAAAAAAAG +AAAAATAAACAAAAAACAAATATTTCAAAAAAAAAAACAATTTTTTCCGA +TTTTCCAGAAAAACAACTGAAAATCGGAACAATAATTTGTTTTTTCTATT +TTTCAAAAAATATTGAAAAATAAAAGAAAAAACGGAAATTTTCGATTTTT +TTCCGATTTTCCAAAAAAAAAAAATTCAAAATCTCACAAAAACCAAAAAA +AAAATGAAAAATCAAAGAAAAACGAAAATTTCGTTATTTTCAAAAAAATA +TCGAAAAAACTTTTTTTTCTTCCGATTTTTCTGAAAAACACTGAAAATTG +TGGAACAATAATTTTTTTCTATTTTTTTGGTCTCCACGATTTTTCGCGAA +AAATTTCAAAAATTAAAATTTTTTTTTAAATCCAAATTTTCACTTCCCCC +CCCCCCCCCCCCTCACCAACAACTTTCGCCGTCGACTTCTTCACAATCTC +CATAGCTTTCTCTTTCACCTCCGGCCAAACAAGCTCACTCAGCTCGCGGA +GCTTCACCTTATCCGAAAACACAATCGTTCCAAGCTTCCGGCGGTCTACA +ACTCCATCGACAACGACATCCCCTCCAAAATGTTCGCCGATTTTTCGATT +CAACGAGCTTCCCCGTTCATAGCACGTGTGTGCGAGCTTATCACAATCGA +TAACATCGAAATTATGCGTTTCACGGAGATATTTCCCGATATGACTTTTT +CCCGACGCAATTCCTCCGGCGAGCCCAATGATGTACGGGGCGCCCGTTTC +TCTTGGAACCGCCCGAGCTGGACGGAGCAAGTGACCCAGATCCTCTCGTC +GTCTGGAGCTTGAGCTGATCTTTGTCTCTTTCAGGATCTCATCGGATCCT +TCCACCAGCTCGACAATTATCACATCCAGCTGCGACATTCCTTGCTCATT +TCTCTTTTTGTTCACCGCATCGGCACCTTTAATGGTCTCCCGGGACACTA +CTATCGCTTCGAGGTCTTTTATTCGTGTTGAAGGACCGAATGGGTCGATT +ATTGGCTCCGCGAGACATTTTGCCTGGAAAATTATGGGGGTTCTATTGAG +CAAAAATGACCATTTTTGGAAAATTAGAAAATCGGAAAAAAAAATTTCCC +GAAATTTTTTATTTAGAAAAGTTTTTAAAAAACAGAAATTATATATTTTT +TGAGTTTTCCAAAACAAAAAATCGAAAAAAAATATTTCGATTTTTCGGAT +TTTCCAAAAGAAAAAATTTAAAATATCGAAGTTTTTAGATTTGCAATAGA +AAAATTGAAAAAAAAAACGGAAATTGTATTACGGGAACAGAAAATTCTGA +GAATGCGTATTGTACAGCATATTTGACGCGCAAAATATCTCGTAGCGAAA +ACTACAGTAATTCTTTAAATGACTACTGTATCGATCAAGCCCGTAAAGCG +CTACAGTAGTAATTTAAAGAATAACTGTAGTTTTCGCTACGAGATATTTT +GCGCGTCAAATATGTTGTGCAATACGCATTCTCAGAATTTTGTGTTGCCT +TAATGCAATTTCCGTTTTTTTTTCAATTTTTTCTTTTGAAAATCTGAAAA +TTTCGATTTTTTCCAATATTTTCTTTTGGAAAATCCGAAAAATCGAAACA +AAAATTTTTTTCTATTTTTTAAATTTTTTTTTTTGGAAAATCGGAAAAAA +ACCAAAATTTTAGTTTTTTAAAATATTTTATGGACCAAAAATATCGATTT +TTCCTAAAGTTTTATAGAAAACTGAAAAATTTAAATATTTATTGTTTTTA +ATTTTTAAATTTTTATGGAAAAATTTCGATTTTTTTTTTTGAAAAAAAAG +CTTTTTTTAAATTTTTCTTTATTTTTGGAAAATCGAACATTTGCTATTTT +TTTCGATTTATTAATTTTTTTGTTGGAATTTCTAAAAATCGATGTTTTTT +TTTCAAAAATCAGCGGTTTTTCCGAAAAATGGCAAAAATTTTTTTAAAAA +ACATTTATTTCTTTTCCTATTTTTCGAAAAATAAATAATTTTTTCTGGCA +ATTTTTAATGAATTTTTGATCGGTAAACTCTTACAGTCCCGGAAATATCC +TCTACAAAATCGACAACTTTCTTAATCCGAAACTCCACAGGCTCAATCAT +CTCGAATAATGATTTTTCTGGAAAAAAACCAATTTGTTAAAAGTTTTATT +TCCAAGCTATAATCTAACTAATAATCATATCCTTATCGGTGACCCCCACG +ACAATAACATCAGAAGCGAGCTCGGCAGCCTTGTTGAGCAACACTTTGTG +CCCATTGTGAAGCCGATCGAATGTGCCGCCCAACACGACTTTTTTGTACT +TTTTCGCCGCCGCCAATTTTTCCGCGTTCTCCAGCAGCTTTTCCTCGCCG +ATTAGCACATAATTCTCGGGATTTCCGCGGATTTCCGACACAACTAGACG +GACGTCGAGCTTCGGGCATGAGCTCGCCGATGTTAGGTAGATTTTCGAGA +CTAGCTCGTCGGTTCGCGGCTCGTCCGGGTTTAATCTGCAAGGAATCCTT +GTTAACCTTGATTTTCGAGCCGGGAAAAAGCCCAAAGCACTTTACCTCAC +ATAAAGCTTCGAGCTTAACGCGGCGGCCGCAGACGTGAGCAATTGCTTCA +GTTTTTGCACATTTTTGCTATTTAACACGAGAAGTCCGTATTTCGAGCTC +ATTGAGAATGATGAGAAATATCAGGGAATATTCCTCAAATTTTAAATAGA +AAAATTCGCTGCCACGTTCGCTGCTCTTATCGGCTGCGCGTCTCTATTTC +ACACACTGTCTCATCTCTCGTCTCGCGTCTCTCGTCTCTAACGCGTGACG +GTGGAGCGCACTTCCCTCCTTTTTGCTGTAAAATCTTTCAGACATGCCGG +AAACAAGAAAAACAAGAAATTATTGAACTGAAAATAATGTAAACGTACTC +ATCGTATTTTTAGCTTCAAAATAAGATATCTTAGAGCATATAATATTGCT +TCTAAACGAAAACCAAAACATCGCTGAAAATGTTGTGAATTTTATGAAAA +TCTAGGATCATTTTTCCCATTTTTATGAAAAACAACAATAAAATGAGAAA +AATTACATGGGAAAACCGAAGAAACTTAAATTTCGCGGTCAAAATTCGAA +TAGAATTACTGTATTTTTCGCTCAGAGATATTTTGCGCGTCAAATATGCT +GCGCGTGCGCATTTTCAGCGTTTTTTATTTAATAAAACAGTAATAAAATG +ATTAAATGCATTTTGTGTGATTTTCAGCCCAAAATGTTTGAAATAAATTG +TTGTTGTTCAAGCCAGCATTTTTAAAAACTTAAATATATGGGAAAACCAT +AAAAATAACACAAAAGTTTGGCAAAATTTTGGAACTACAGTACTCTTTAA +AGGCGCAAGCCCTTTAAGACAAATTTGTCGTGTCGAGACCGGACAAATAT +TCTAAATTTCTTTTATACATACGCAGATTTAAAGTACAGAAGAGCGCATT +TGCACATTGCGCAATTTCTAAATTTTTTCTAACAATAAAAATTAAAACCA +AAATACTTTATTCAATACCACAAATACCACAAGTTCATTTATCTTATTCA +ACCTCTTCCGCCGACGTCGTCTTGTTGTTGCTCTTCCCCTTCTTCTTAGA +CTTCTGAAATTTTCTCATATATGCAAACGCGCTCCGATCTAACAAAAAAC +CTTCTTCTTCTTTTTCTTCTTGTCTACATCTTCCTTCTTTGGCTGCTCTC +CAGATTCTCCCCCGGCTCCCGGATCGCTCTTCCCAAAGTCCATCTCCGGC +TTCAGCCCAACCTCAGGCGGCGCGATCATCAGTACCTGCTGGTATCCAGG +ACCCTTGTAATCCGTCATACAATCCCAATCTGGTGGCTCCTGGGTATTTG +ACTTAGACTTCTCAAACAGCTTCTGGATCCCATCAGCGATACGATCGTAC +TCCACAAACTCAGCGACATCCTTGGGCTCACGGATGATGGTCAGAAGATC +AAAGAACCCAGCAGTGGTCTCCGGAGTGGTTCTCCAAACCGACGCGACCA +TCGGTGCAGTGATGCGACGTTTCAAGTGGAGCACGTCCTGTTCGCTTTCC +TTGTTGAGCAGTGCCCATGGGAGAGCTCCGAGGAAGAACTCGAGCACCAT +GTACCACCAGGACTCCATGTCGTCCTTGTAGCCGAGATTTTTCTGGGAAG +AAAAAAGTTGTTTTAAAGAAATTGTGAACGGTAAGGTTACCCCCTGGTGA +GCATGAACGCTCGCGTACTTGACGACTCCTCTGTAGTGAATCGCATTCTT +TCTCATCTGCTCCTCAGCTTCGTCGTTCTTCTTGACGAATCTTAATTTTA +AAAAAAGTTTTCGGGGTAGATTTACGACGCGTTGCGTGTCGCGTCGCGGT +TCGATTTTAGTTGTAAAACTAAATGTATTTGTCCGTGTGGAGTACGCGAC +TTTCCCACTCAAAATAGGATGGAATTGAATTCGGAGGTGTCACTGTTGCA +AATTCGGATTGTCTGAATAAATATAAAGTGAGATTTTGAATTTTGCTCCA +TTGATAATGGTGTTTTTTTTTGAAATTCTGCAAACATTAATTTCTAGAAA +ATTTGAGTTTCCTCCAAAATTTTTCACAGAAAATTTGAATTCCCGCCAAA +ATTTTTTCTCTGAAAATTTGAATTCCCGCCAAAATTTTTTCTCTGAAAAT +TTGAATTCCCGCCAAAATTTTTTTCAGAAAATTTTGAACCCCGCTTAAAT +TTTTCTGAACTAGGCTATTTTGTCTCGGCCATATCTGGGGTTGTGGTTTA +CGGCGCGTTGCGTGTCGCGTCGCGGTTCGATTTTAGTTGTAAAACTAAAT +GTATTTGTCCGTGTGGAGTACGCGACTTTCCCACGCGTTGTTCGTCAGGC +AATTGTCAATGGAGCGCAAAAACTGCCGACCGCTCCAAATAACTACTTTT +TGCACTTCATTGCATACACACAGCAGCTCGCGGCGTCGGCATGAGGCCCG +CATTTCGCGCCTCACTGGAAGCCCCAGATGCCAAAAAATCTACCTCGGTC +TCTTGCACAACCCGAAATCCAACAGGTAAACCTGATGGTGCTGTCCGTCA +ATCTCTCTTCCAATACAGAAATGCGATGGCTTGATGTCGCGATGAATGAA +TCCATTCCGGTGGAGGTCACGAATCGCGGCCAGCGTCTGCTCGGCCAAGT +GAAGACCCGTCGCCATGCTGAACTTGCGATCCTTGTTCGTGTTCATCATC +AGCTCGTATAGGTTCTCGCCATACTGAAATATGGAATTTGGCTGAAGTTA +GGCAACGAAAAATGGTACCAATGGCATCACAATGAAGAGGAAGCGATTTG +GAACCGTGCCACGTTCCTCGATATTCGTGAAGAATGGAGATGGGTGGTGG +ATCGACGCGAGGACAACTGACATTTGACGAAGAAGCTGGAATTTTTTTTG +CTCTCTAGAAACGGTTTTCACGGCGGCCGACAATTTCCGAGTTTGGAAAC +TCACTGCACTTAGTCATGTAATTTTATAATGAGTGGCTAAACTCGGAAAT +TGTCGGCCGCCGTTAAACCACAAGACAGTTGGTTCCTTCGCCGCGACCGA +CACTTTCCGGGTTCCGCGCCGCACTATACAGAAGGCTGGCTGCGCGAGGC +AGGCTAGACCGCGGCAGGTGTTTTGGCGCCTACATGGACTTGCTAGCTTG +TCAACAAGGCAGCCTAAGCGTGCCTGCAAAAGGTAGGCAGGCACGCTTCT +AAAACCGTGCCAACACACCATAAATAACTCACCGAGCTCTTCATGTTCAA +CGCCTCGCATTTGACCGCAAAAAACTTCCCATCACTCTCGCGTTTTACCT +TGTAAATCGAGCCGAACGTGCCCGTCGCCAGCGGTTTCGTGACTTCATAG +GTGCCGTTTTGTGCTCGAATGAGCTCCTTTTGCGGTTGTCCAGGTCCGAC +TGACGTCATTGATTTAACACCGAAATGTCACGATTATTCGGAAGTGAATC +GATAATCGAAAATCGGAGACTCGAAACTTTTCGAAATAGATTTTATTATC +TTTGTAGGGTTTTTGGAGTTTATGATGATAATGAGGAGGGGGGATTTTAT +GTGGATTAAGAGGCTACGAGGAAAATATTTATTTTTTAAATATTTTATTG +TCATTCTTTTCTTTTCTAGATTTTCACGTTTCTTATTATTAAAAAAAAGT +TTTTCAGAATTTATTTCAAAATCCCGAACATAATCTGAAATTCAGAAATA +CTACAAAAGAAAAACGAAAAAAAAACTGAGAAAAATGGTCCAGATTCTCC +AGTATTGTTAGGCGTACCCCCCCCCCCCCCCTACCTACCTACCTTCCTTC +CTCGCCTACATGCATACCTGCCTACCTACACGCCTGAGTGCCTGCCTACC +TTCCTGCCTACGTTTATGTCTACATTTTTTGCCAGCCTTTTGCCAGTATA +CTTTACGTGTGTTCTTAATTACGTGCCTATCTACTTACCTTTCTTCCTCG +CCTACATGTATACCTGCCTACCTACACGCCTGCGTGCCCGCCTCCTGCCT +ACCGACTCACCTACCTGTTTGCATACATGCCTGCCTACCTTCTTGACCAT +GCCGAGGCGCCTATCTTACTACTTGCCTACCTACACGCCTGTCTATTGCC +TACCTGTTTGGTTACATGTGCCTGTGCCGTACCTGTGCCTCAAAAACATC +TTTATTTTGTAATAAATAATTTTAATAAAACTTTATAAAACAAAAAAAAT +ACAAAATTGAAAATAACCTAACAAAAAGTCTTTTCAAACCCCTTCGAACT +GCTCAACTGATAAAAATTCGTGTTATACTTCAACAATTTGAACTGCAAAT +CCGATTTGACACAGCTAAACTTGTATTTTGTCGCTTTATTCCTGCACAAA +TGTGCTCCTGGGCAATTGAGACTATTCGAATCGGTGTAGTAGGTGGTGAA +GCTCTTTGAGCATCTTATTGTAGAGTGATCTTCTCGAAGACGGCCCAGAA +TTGCGGTGATTGAGGGATTTGTGATGGTTCTGGGGATTTGATGTAATAAA +TTTAAATTGTGTGGTTTAGATGCAGAAAAAAAAACTAAGAATTCAGACTA +AGACTTGAAAAAGAGCCAAGAAAAGGGCGGAGCCTATTATCGATTTTTCC +ACTTGCCGTTTATGAAGTTTATCTACACTTATATAGTCTATTATCAATAC +TAAGTTAAACAGTATATGGTATAATAAATACTATGACTGCAAGAAAAATC +AATAATAGGCTCCGCCTCTATATTTGTTTTTTCTCGTTTTAAATCCAAAC +TTATTCTACGCCGAACCTATATTCTTTTATATTTTTTGAAATTATATATC +CTTCTATTTATAAGATTTTTCCAGCCAAATTTTTTTTTCTCAGAAAAATT +GAGTTTCCCGCCAAAATGGGTTTCTCAAAAAATTTGAATTTCCCGCCAAA +ATTTTTTTTCTCAGAAATTTAAAATTTTCCGCAAAATTTTTCTCAGAAAA +TTTGTATTCCCCGCCAAAAATGTTTTCTCATAAAACATGAATTTCCGTCA +AAACTTTCTCATAAAATTTGAATTTCCCGCCAAAATTTTACGAGAAAATT +TGAATTTCCAGCCAATCTTTTAATGAGAAAGTTTGAATTCCCACCAAAAT +GTTTTTTTTTCTCAGAAAATTTGAATATTCCGCAAAAATGTTCTGAGGAA +AATTGAATCTCTAGCCAATTTTTTCTCATAAAATTTGAATTTCCGGTCAG +CTTTTTTCTGAAAAAGTTTGAATTTCGCGCAAATTTTTTTTCTCAGAAAT +TTTAAACCTCTGGCTTTATTTTTCTCAAAAAAATTAAAATGTCCTGCCAT +TTTTTTAAAAAAAAAATTCACAGAAAATTTGAATTTTTAATTTTAAAATT +CACAGAAACTGACTTCCTTATCGACATGCCAATTGTCACACGATCCGCTG +ATTTCAGAGCTTCACCGCCCGTAAAATTGCCACGTATCATTGAATCCACG +TCAGCAGAGTCGGGAATTTCAAAAATTTTACTATCATCCATTTGTTTGCT +GGAAGCGTTAATTATCGATTTTTATTTTTTTATTCGTTATATTAATTTTA +TTCATATTTTTTGAAAAATAAAATCAATTTCACTCTCGGATTTCTGGCTT +CCCTCATAAAATGAAATGAAAGAGTTTGCCGAACTAGGCCATTTTGGCTC +GGACATATCTGGGGCAGATTTACGGCGCGTTGCGTCGCGGCTCGTTAAAA +ATAGATTTATTTGTCCGTGTGGAGTACACGACCTTCTCACGCGTTGTCCG +GCAGGCAATTGTCAATGGAGCGCGAAAAATTCAATGAGGAAGGCCAGAAT +CCCGTGTTCACTGATTTTTTGAATTTACAATTTTTAAAGTATCGAATAAA +AATTTTAATTGAAAAAATCGATTTTCAAAAAGTTTTAAAATTTTTTTAAA +ATTTGAATTTAAAAATCGATTTTAAAAATTATTGATTTTCGAGCAAACCT +TTTCAATTTGTACGGATGTTCCACGGGCTCATTGAGCCAAACACTATCGA +TTAAAGATGGGCGTGTCACCACTTTTGCCACGCCCCTTTCATTCCGATAT +TCCAATAACCCGACGAATATCTCTGATTCGTAATAGTTGGCGAACTCGGA +AAAATAGGAATCGAACGATTTCGGCACGAGAAGATCAGACCACTCGAGAA +ATGTGATGAAGCTCCCCGAGCTCTGAAAGTTTGGATAAATAGAGTTGTGA +GCCTAGTGCAAGTTGAAAAAATCGATAAATTATCGATTTTCTGATTGTAC +GAATTTAGGCGGGAATTCAAATTTTCTGAGAAAAATTTAGGCGAGAATTC +AAATTTTCTGAGAAAAATTTAGGCGAGAATTCAAATTTTCAGAGAAAAAA +TTTTGGCGGGAATTCAAATATTCTGAAAAAAAATTTGCCGGGAATTCAAA +ATTTCTGAAAATAAATTTAGGCGAGAATTCAAATTTTCAGAGAAAAATTT +TGGCGGGAATTCAAATTTTCTGAAAAAAATTTAGGCGAGAATTCAAATTT +TCTGAGAAAAATTTAGGCGAGAATTCAAATTTTCAGAGAAAAAATTTTGG +CGGGAATTCAAATTTTCTAAGAAAAATTTTGGCGGGAATTCACATTTTCT +GAGTAAAACTTTGGCGGGAATTCAAATTTTCTGAGAAAAATTTAGGCGAG +AATTCAAATTTTCAGAGAAAAAATTTTGGCGGGAATTCAAATATTCTGAA +AAAAAATTTGCCGGGAATTCAAAATTTCTGAAAATAAATTTAGGCGAGAA +TTCAAATTTTCAGAGAAAAATTTTGGCGGGAATTCAAATTTTCTAAGAAA +AATTTTGGCGGGAATTCAAATTTTCTGAGTAAAACTCTCTGGCGGGAATT +CAAATTTTCTGAGAAAAATTTAGGCGAGAATTCAAATTTTCAGAGAAAAA +TTTTGGCGGGAATTCAAATTTTCAGAGAAAAAATTTTGGCGGGAATTCAA +ATTTTCTGAAAAAAAATTTCGGCGGGAATTCAAATTTTCAGAAAAAAAAT +TTTGGCGGGAATCCAAATTTTCTGAAAAAAAATTGGCCGGGAATTCAAAA +TTTTTGAAAAACATTTTCGGCGAGACTTCAGGGGTTCTCATAAGAATTCA +AATTTTCTGCAAAAAATTTTGGCGGGAATTCAAAATTTCTGAAAAAAAAT +TTGCCGGGAATTCAAAATTTCTGAAAATAAATTTTGGCGGGAATTCAAAT +TTTCTGCAAAAAATTTTGGCGGGAATTCAAATTTTCTGCAAAAAATTTTG +GCGGGAATTCAAATTTTCTGAGAAAAATTTTGGCGGGAATTCAAATTTTC +TGAGAAAAATTTTGGCGGGAATTCAAATTTTCTGAGAAAAATTTAGGCGA +GAATTCAAATTTTCAGAGAAAAAATTTTGGCGGGAATTCAAATATTCTGA +AAAAAAATTTAGGCGAGAATTCAAATTTTCTGAGAAAAATTTAGGCGAGA +ATTCAAATTTTCAGAGAAAAATTTTTGGCGGGAATTCAAATATTCTGAAA +AAAAATTTTGGCGAGAATTCAAATTTTCAGAGAAAAATTCTGGCGGGAAT +TCAAATTTTCTAAGAAAAATTTTGGCGGGAATTCAAATTTTCTGAGTAAA +ACTTTGGCGGGAATTCAAATTTTCTGAGAAAAATTTAGGCGAGAATTCAA +ATTTTCAGAGAAAAAATTTTGGCGGGAATTCAAATATTCTGAAAAAAAAT +TTGCCGGGAATTCAAAATTTCTGAAAATAAATTTAGGCGAGAATTCAAAT +TTTCAGAGAAAAATTTTGGCGGGAATTCAAATTTTCTAAGAAAAATTTTG +GCGGGAATTCACATTTTCTGAGAAAAATTTTGGCGGGAATTCAAATTTTC +TGAGAAAAATTTAGGCGAGAATTCAAATTTTCTGAGAAAAATTTTGGCGG +GAATTCAAAATTTCTGAAAATAAATTTTGGCGGGAATTCAAATTTTCTGC +AAAAAATTTTGGCGGGAATTCAAAATTTCTGAAAAAAAATTTGCCGGGAA +TTCAAAATTTCTGAAAATAAATTTTGGCGGGAATTCAAATTTTCTGCAAA +AAATTTTGGCGGGAATTCAAATTTTCTGCAAAAAATTTTGGCGGGAATTC +AAATTTTCTGAGAAAAATTTTGGCGGGAATTCAAATTTTCTGAGAAAAAT +TTTGGCGGGAATTCTAATTTTCTGAGAACATTTTTGGCGGAAATTCAAAT +TTTCTGAGAACATTTTTGGCGGGAATTCAAATTTTCTGAGAAAAATTGTG +GCGGGAATTCAAATTTTCTGAGAAAAATTTAGGCGAGAATTCAAATTTTC +AGAGAAAAAATTTTGGCGGGAATTCAAATTTTCTAGAAATCAATTTTTTC +AGAATTTCAGAAAACACCACCCCGGCTATCAATGGAGCGCATTTTCAAAG +TCTCACTTTATATTTATTCAGGCAATCCGAATTTGCAACAGTGACACCTC +CGAATTCAATTCCATCCAATTTTGATTTCCCATAAAAATCTGGAAACGAG +TCAACTGATATTCCTTTGAGACGCTTCAACTCTTCTAGAAATTCGAAAAA +CGGGCGACTTGTAGTTGCCACGTATGTTTGGAGATGGCCGCCGAAACTGG +AAAAAAAATTTAAAATAAACTGACAAGTCTATATATTTTTTCTTTAAATT +TTTTTAATCGAAAAGCACCCCTATTATATAAATTCTTTTTTAACCTACCA +TTTTTAATAATTGATGGCCGAGTTTTCTTTTTTAGGCCATGTTCAAAAAG +CCGGTCGGTACCGGTTTTTGGTGACCGAATTTTCAAATTTTCTAGGCCAC +GTAGAAAAACCAGGTCTTCTTTTCACACATTGATTGTTTAATTTTCTCAA +AGAACCCCCTCTCCTATAAAATCACGGTTAATAATGGGCGATAAGTCGAT +GGCCGAGTTTTTTTTATATATTTCTAGGCCATGTAGCAAAAACTGACCCT +TTTTAACCCCAGAATGTTCTCAAAAATCGTATTTTTTAATTGTGGAAAAG +CCCAGAAAATTATGTATGCTGCAAAATCACAAGTCGGCGGCCGAGTTTTC +TCTCTTCCTAGGCCACGCTGCAAAAACTAACCTTCCGTTTACATATTGAC +TTTTTCAAATAATTTCAAATAAAAATTATTCACTTATAAAATCATTTACT +TCATGATTGAAAGCTTTTGCAATTGGTTAAAACCGGAAATAAGTCGATGG +CCGAGTTTTCTCTTTTCTAGGCCACGTACCAAAAGTGTCCCTTTTTTACC +CCAGAATGTTGTCAAAAATCGTACTTTTTATTTGTGGAAAAGCTCGGAAA +ATTATGTTGTAAAAAATGAGTCAAGTCGGTGGCCGAGTTTTCTCGTTTTC +TAGGCCACGCTGCTTAAACTGACCTTCCATTTACATTTTGGCTTTTAAAT +TCAAATAAAAATTATTCCCTCATAAAATCGTTTACTGTCATTGTGAGAGC +GGTTAAAAACAGGAAAAAAAATGGTGGCCGAGTTTTTTTTTCTAGGCCAC +ATAGCAAAAAACTCACCTAGTATACACATGAGCAGCTAACAACACATTCT +GCCATCTAACATTATTATAAATAGGCGGAGTACATACAACCAAATCCTTC +GGCTTATTATCCGCCGGCAAATATCGAATTTTCGCGGTATGCCCATTCAC +AAGAAGCTCCAGCTCCGTGAGCTCTTCCTCAAGCACTATAGTACCCACAA +GTGTCACGTATTTGCATAAATTATGCGGAGTTGCTCTGTGAATTTGAAGT +GGTTCGAGTACGGTTTTTGTGTCGTCACTTCGGATTGCGATAATCTTACG +ACGTTCCCAATGTGCATCCCGAGGCGCGATAAAGGTTAGTGTTACTTGGT +TTTTGTAGAGACTGAAAAATTTCAAAAAACTTAAAAACTAACGTAGTTTA +AAACATTATTCTTCTTTTTTTATTCTGAGATATTTTGATTTTGAAAAAAA +AAAATTTTTTTTACGAAAAAAAAATTGGCAAAATACCTCAAAAACTGTGA +GTTTGTTGAAATTTAGTTTTTACAACCACGGGAATTAATTTTTTAAATTA +AAAATATTTTACGTGTAGTCCTCTCGAATAATTTTGAAAAATATATGAAG +TGTTGAATTCTGCCGAGTTTTTTTGTCCGAGTGGACTACAAAAGTGCAAA +GAATTTGGAATTTTTCAAACTTTATATTTAACAGCAATCATCAACAACTT +AAAAAAATTTAGATTAAAAAATTTTTTTTGGAAGTTTTCGAAAAAAATTT +TGGAAAAATTTTTAATTTTCAGAAAAAAAAACCAGTTTGATTTTTTTCTT +AAATTCTTTTTTGAAAATTTGTTGAAACTTTTTTTTAAATGCTAGACTTT +TTTTTTGAATTTTCACGCGGTTCTGGTTTCCCTCATATTTTGAGATAGAA +GAGTTTGCCGAACTAGGCCATTTTGGCTCGGCCATATCTGGGGTAGATTT +ACGGCGCGTTGCTTGTCGCGTCGCGGCTCGATTTTAGTTGTAAAACTAAA +TGTATTTGTCCGTGTGGAGTACACGACTTTCCCACGCGTTAGGCGATTAT +CAATGGAGCGCGAAAAATGCAATGAGGAAGGCCAGAACCCCGTGAAAAAT +ACATTTTAAAAAATACTTGAAAAATGTTTAAATTTTCTTTTTAATTTTAA +ATTTGAAATTTTGTAACTTTACAAAAAAATAATTTTTCGAAATTTTTTTC +AAATATTTTTTTTCGAATTCTTTTTGTTGAAATTTCAATTCTCTGTTCTA +AAAATCAATATTCCTTACCTGTATCCCACTCCATAATAGTACGCAGAAAC +AAAAAATAGGTCTTTCTCGTCGTTATCCAATTCTTTAATCTTATTAGTGT +ATGAATTTCGAACACGTCTTAATGCAGGCAAGGGATGTATTGAAATATGT +ACAAATGCTAATAATATTACAAATAGATATAGGGTCCTGCAACGAATTAT +CAACTGCTCGTAGAGTACGGAATAGGGTTTGAATTGAAAATTGTTCAGTT +TCGTGTCGGAATTCATTTTTATTATGGGGAGGAAAACAATCTAGTGTGGT +GGTAGTGGTAATGACAATCGCTATTTGAAATCATGTTGAATAGCTGCGAT +AAGTATCGGGCGCCAGCGTGGCCTAGGATTTGGGAAAACTCTTCCACCAG +AAGCTAGTCGTGGCCCAGGCTTTGAAATTTGATTATTTGATCAAGAAATT +TACAACTAAATTTAAAATCAACAGTTAAGACATTTTTCATAGACTTTTCG +AAAAATTTCTGATTTTTCCATATCAAATAACCTCGGTACGCATCTAACCG +TCAGTTGCCTCGTGGCCTAGAACTTTTGAAAACTCTTCCACGAGAAGATC +ACCGTAGCCTAGAAAGTTTTGAAACTTGTGTAGTTCGCTTGGAATTTTTT +TTTGGTTTTTTTGTCAACTAACTAGGTTCAATAGCTATTAAAAAGTTGCA +AAATTGGGCAATTTTTGTAAAAATTCACAATTTTGAAACTTCTCTAAAAT +TATTAATTTTGTAGTAAGAGGACTCAAAATTGATCTTCAAACACTAAAAA +ATAGCCCCTTTTTAGAATATAGTGGTTTCGAATATTTTTTGAAAAATTAT +TTTTTTTTACCCAAAAAAAAGGACAGTCATGGCTGAAAAAAAATTTGGAA +AAAATAAAAAAATTGTCTAAATTTGTTGAAAACGGGTAATTCATGCATTC +TGAATTCAGAAAATCTAGGTTTAACCCATCAAAAACCATTAAAAGCGGCA +AAAATGGGTATTTCATTGTAAAAATAATGCATATACAATAAACAATGTAG +AGGAATGTTCAAAATCTGTAGTTTTTTTGAAATTTTTTTTTTATTTTTTG +GTTATTTTTTTTGCAAATTTTCTCGTATATTTAATTAGTTAGTTTGCATG +TTACATGCTAATGAAAAGGTTTTTTTTGCATCCTGATACATATAAAAACC +GAGAAATTCACAATTTCACAATCAGTAAGGTATTTAATTTTTGTTAACTT +TAGTCTTCTGCTCCTCTAAAGCTTCTGGAAGAGGACACTTTCTAAATTTT +CAAAATATTTTTTGCGCCACTTTCCTGTCAAATTTCTGGCATTATTACAT +CCAAAACCCAATAAAAATCGGAATTCCATAGCTACGACAACGTCAGCAAA +AGCGGAAAGAAATTTTGTTTGCCTGTCTAACGAGTCACAAAGCATACACT +GACGTTTCTATCAGAAAAGACAGGATTTCGTATCAAAACAAAAATTGGGT +GCGATTGCTCGATGCACCATGTGATATTTTGCTTTTTTTAAAAATTATTT +TGCTCGTAATGTATGGTGCACTTATTCCGTGGCTGCAGAATTTGCACGCG +GTGTCATAATGCCCATTTCGACTTGATCTACGTAGATCTACAAAAAAATG +CGGGAGTTGAGACGCAGAGGTAGGGCTCAACTGATTTCGTTTGGTTAAGA +GCGTGCTGACGTCACATTTTTATGGGCAAAAAATTCCCGCATTTTTTATA +GATCAAACCGTAATGGGACAGCCTGACACCATGTGAATGCAAACTTTACG +CGAAAATTTTTAGATTTTTAATAATCTTAGAATTAGCTATGGTTTTATAG +AATAATATCAAAATTATCCAAAAAAAAAAACACATTTTTTCAAAATTTTA +GATTTCCAGTTTCTATTTGAACTTTAAAAGACTAATCCAAATTTTGTTTT +GTTTCAACGATTTTTCATTGATCTCTCCAAAGAATTCATTGAAACACATC +ATTCACAAAACTAAAAACTCCCCGACTTGGCTTTCATTCCGAGATGACAA +ACTTTTTGGTGGCAAAGGTTCTTTGGGGGCATAGTATCCAAGGAGACAAA +GTATCCAGTAGACAAACTGGGGTGTATCTTGGGGGGCATATCTTTGGTGA +CAACCTTTTTTTCGACTAGATTTTTATATGTATTTTTCAACTAATTTTTG +TTCACATTTTTCTGGAATAAGTTTTTTAATGCAATTTTCAATCGATTATC +GGTTATAATTCCCTCACAATTGAATGTATGAACGATGGTCTTTTGGGAAA +AGCTGTAAAACGTCCAACTAACTTGGAATATCGTGAGGCTCTTCCTGATG +GTCTGATCCTTCAGCTCCGAAGGATCACATGGGTACCTTCTGATGTTCTG +ATCCTTCAGATAAGAAGGATCGTAAGGGTACCTCCTGATGGTCTGATCCT +TCAGATCCAAAGGATCACGAGGCTCTTCCTGATGTTCTGATCCTTCAGCT +CCGAAGGATCACATGGGCACCTTCTGATGATCTGATCCTTCAGATAAGAA +GGATCGTAAGGGTACCACTTGATGGTCTGATCCTTCAGCTCCAAAGGATC +ACGAGGCTCTTCCTGATGTTCTGATCCTTCAGCTTCGACAGATCATATGG +ATTTTTGACAAAATTTGAAAAAAAAAATTTTGAAAAAAAACTCGAAATTT +TTTTTTGTTATTCTTCATTCTTTAAAGAATAGTTCAAATTTATCATGATA +GGACCGAAAACTTTCAAGAAACAGTATAACTATACATGATAATCAGCTTC +TACCAAATAATGATAAATTCTCCGCGATGACAAACTTTTCGGTGACAAAG +TATCTTGGTGGACAAACAAAAATTACCGAAAACTGATGTAAGGAATAGTG +AAATAGAGTCCTATGGACTATTAAACATGTTCAGTAGGTGTATTCAGGAC +TGTCCGTCAAAATAAAAAAAAGTTTGTCAGACGAAGTTCGAACCTGGGAC +CTGTAGGATGCAAAGTGCGCTCACTACCACTACACCAGCTATGCGAAAGT +CGGCGAGCCTCATCGAAGGCTATTATAAAACTTAGTTCGCACGAGTATGA +TCGACATTCAACAAACAGTAATATCTCTCAACAAGAATTTCTTCATGGAA +TTGAGGTCATTTGACTATTTTTATCGGTTTTTCAAGTTGAGCATAGGGTC +TTTTAATTTTTTGAGCATAGAAAATCATGAAAGCTGCCTGTTCCTTGTAT +CCTGGATCGAAATAGACGTATCTGGCCTAAAATATTTCCTGAACAGTGAT +CATTTCATGTCCATTGTGTGTTTCTCTGTATTTTGAACCAGAAAGTTGAA +CAAAAATGATAATATTATATCGAAAAATGGAACAAATACAAATAATTTTT +AGGCCTAATCAAATTTCCTCCGGATATTGTTTTTTTTGTCATGATTATAT +GTGTCTAAATTTTTTATAATGTGTTTTATAACAAAATTCGTCATTATTTC +CTTCATTTCAAGCAAAATTTAACAGTTCGAGCTTAAAAATAGATGTTGCA +AAAAATTTAATTCGTTTCGATGAATACGGTATACGGTCGGTGTTTGCGTA +CTTTGGCGTTTGCGTATGAAGCATCCTATTTGACGCACGAAACTTTCAAT +GAAATTTAATGCAAATTCTGCATCGAAAAGGACATTACGATGGAAATAAT +GACGAATTTAATTATAAAACACATTATAAAAAATTTAAAAACATATAATC +ATGACAAAAAAAACAATGTCCGGAAAAAATTTGATTAGGCCTAAAATTTA +TTTTTTATTTTTTTCCATTTATCGATTCAATATCAACATTTTTGTTCGAC +TTTCTAGCTCAAAATTCATAGAAACTCACGATGGACATGAAATGATCACT +TTTCAGGAAATATTTTAGGCCAGATACGTCTATTTCGATCCAGGATACAA +GGAACAGGCAGCTTTCATGATTTTCTATGATCAAAAAATTAAAAGACCCT +ATGCTCAACTTGAAAAACCGATAAAAATAGTCAAATGACCTCAATTCCAT +GAAGAAATTCTTGTTGAGAGATATTACTGTTTGTTGAATGTCGATCATAC +TCGTGCGAACTAAGTTTTATAATAGCCTTCGATGAGGCTCGCCGACTTTC +GCATAGCTGGTGTAGTGGTAGTGAGCACACTTTGCATCCTACAGGTCCCA +GGTTCGAACTTCGTCTGACAAACTTTTTTTTATTTTGACGGACAGTCCTG +AATACACCTACTGAACATGTTTAATAGTCCATAGGACTCTATTTCACTAT +TCCTTACATCAGTTTTCGGTAATTTTTGTTTGTCCACCAAGATACTTTGT +CACCGAAAAGTTTGTCATCGCGGAGAATTTATCATTATTTGGTAGAAGCT +GATTATCATGTATAGTTATACTGTTTCTTGAAAGTTTTCGGTCCTATCAT +GATAAATTTGAACTATTCTTTAAAGAATGAAGAATAACAAAAAAAAATTT +CGAGTTTTTTTTCAAAATTTTTTTTTTCAAATTTTGTCAAAAATCCATAT +GATCCGTCGAAGCTGAAGGATCAGAACATCAGGAGGTACCCATGTGATCA +TTCGGCTCTGAAAGATCAGATCATCAGGCAGAGCCTCATGTTCTTTTGGA +GCTGAAGGATCAGAACATCAGGAAGAGCCTCATGATCCTTTGGATCTGAA +GGATCAGACCATCAGGAGGTACCCTTACGATCCTTCTTATCTGAAAGATC +AGATCATCAGAAGGTACCATGTGATCCTTCGGATCGGAAGGATCAGATCA +TCAGGCAGAGCCTCATGTTCTTTTGGAGCTGAAGGATCAGAACATCAGGA +AGAGCCTCGTGATCCTTTGGAGCTAAAGGATCAGACCATCAGGAGGTACC +CTTACGATCCTTCTTATCTGAAGGATCAGAACATCAGAAGGTACCCATGT +GATCCTTCGGAGCTGAAGGATCAGACCATCAGGAAGAGCCTCACGATATT +CCAAGTTAGTTGGACGTTTTACAGCTTTTCCCAAAAGACCATCGTTCATA +CATTCAATTGTGAGGGAATTATAACCGAAAATCGATTGAAAATTGCATTA +AAAAACTTATTCCAGAAAAATGTGAACAAAAATTAGTTGAAAAATACATA +TAAAAATCTAGTTGAAAAAAAGGTTGTCACCAAAGATATGCCCCCCAAGA +TACACCCCAGTTTGTCTACTGGATACTTTGTCTCCTTGGATACTATGCCC +CCAAAGAACCTTTGCCACCAAAAAGTTTGTCATCTCGGAAAGAAGGCCCC +CCGACTTTGCGCGTAAGATATGGTGAGACCCTAATACATCATATTTGACG +CGCAAAATATCTCGTAGCGAAAACTACAGTAATTTTTTAACCGTCTACTG +TAGCGCTTTTGTAGATTTACGGGATCTCGATTTTTTTAATGATTAAAATT +AAACAAAAAATGAGAAAAGAACACGAAGAAACGAAGAAAAATGAAATATC +GATGTCACTTTTCGGAAAAAAAATTAATTTCAAAAATTGAGCCCGTAAAT +TTACACATGCGCTACGGTAGGCATTTAAAGAATTACTGTAGTTTTCGCTA +CGAGAAATTTTGCGCGTCAAATATATCGCACAATACGCATTCTCAGAATT +TTGTGTTCCCGGAATAATTTATTTATTGGTTTTTTTGGTTGTATTGGTCC +CAAAAATCAACAAAAAAAAATTTTTATGAAAAATTTCTGTTTTCTTCCGT +TTAAAATCAAAAGTTGGTTTTTTTTGTTGCTTTTTTTGATAAATTTTTGC +AACCACCCTGATTTATATTCCCATAAATATTTAATGTCCCGCTTATGATA +TCTACTGCTTCTTTGCAAAATTCCTACCTGCGGACCAAATACACCATACT +TGACGCGCACTTTTTTTTTGATTTTCAGCTGGACAAACACATTTTCAGCA +TGATTCTGACTTATTTATTTAACTCTTATCACTCTTATCGTTAAAAAACG +CCCTTATAATCATCAACCTATTTTCATTGTCATTCAAAAAAAAAAACTAT +TTCGTACTCGTCGATCGTACTAATTTGTCATCGTTCCGTTCCGTCGAGCA +AAAATCAATGTAGTTTTACGTTTTCCGTTGAAAAAAAATCCAAAAAAAAA +TCCAAAAAAAATCCAAAAAAAAATTCCTTGTTAATTAAAACCGTGAATAA +AAAATTCAAATAAAAAGTTCAAACGTTTCGTTATAAAAATGTACACCGGT +AAGCTCAAAAATATACATTTCAGATTCTAACAATGTGCGACGATACGCTC +GCATTTTGTAGTCCAGCTCCATTCAGTGATGATGTAGAGGTTATTAAACA +ACGTGAGGCGATCGATTATTCCACACAAATCACTCTGGAAGATGCGATTT +CTGGGAACGGTAAGTGCGCTCCGTTGTACTCATAGTTCGCTGAATGAAGG +CATGTATAAGTTCTCAAATTTTCAAATGAAAATTGCCAAAAAAGGTTTTT +AGTTAAAATTTAAAAAATTATCGATTTTTAAATTTATGAAAATTTTTTCG +AAAAATTTGTGTTTTTTGTTTTTGTTTTATTGATTTATTGATTTTTTGGC +GAAAATGTGAAAAATCGAAAAATTATGGATTTTTTCTTGGAATTAAAAAA +AAAAAACTTTATCGGCTAAGCGAAACTTGCTTTTTCGGTTTATTGATTTA +TTGATTTACTTGAAAAAGTACTTGAAAAAATCAATACTTTTATCACGGGG +ATCTGGTCTTCCACATTAAACTTTTCGCGCTCCATTGAGAATCGCCCGCC +GGACAACGCATGGGAAAGCCGTGTACTCCACACGGACAAATACAATTAGT +TTTACAACTAAAATCGAGCCGCGACGCGACACGCAACGCGCCGTAAATCT +ACCCCAGATATGGTCTGGTCTAGTTCGGCAAAAACTCTTCCATTTCAATT +TATGAGGGAAGCCAGAAATCTTTGTTTTTATTTTATTTTGAAATCTCTAA +ATTATCGATTTTTTGGAAAAAAAGTTGTTTGAAAATTCGATAATTTTTTA +ATGATTTTTTGGAAAAATTAAAATTTTCGCTTTTCAAAAAAATCAAAAAT +TATTGATTTTTGAAAAAAAAAACACGGCATGCAATTCCTAGACCACCAAT +CAAAAACCTCTTCCAGTGACTCGTCCGGTCCGTGTCTACGCTGACGGTGT +CTACGATATGTTCCACTACGGCCACGCGAATCAATTCCTGCAGATCAAAC +AAACTCTTCCGAATGTCTACCTTATTGTTGGCGGTGAGTTCGGCCCGCGA +AATTCAAAATTTTTAATTTAAAAAAAAAACGATTTTTTCCAGTATGTTCC +GACGAAGAGACTATGAAGAATAAGGGGAGAACTGTGCAGGGCGAAGAAGA +GCGGTACGAGGCGATTCGGCATTGCCGGTACGTCGACGAGGTGTATAAGG +CGTCGCCGTGGACATGTCCGATACCGTTTTTGAAGGAGTTGAAGGTAAAT +TGCGCGTGAGATTCGGTGTTTGCACAGTTTTATCTCCAAAAACGAAGGAA +ATTTTATACCTAAATTTCGGAAAAAAATCAATAAATTATTGCCTTTTTTA +AACGTCACAAAATCGATAATAATTTGAAAATCACAATTTTTTTTTTATTT +TCAAAATTTCGTTGTTCTATCACGGGAACGCAAATTTCTGAGAATGCGTA +CTACACAACACATTTGACGCGCAAAATATCTCGTAGCGAAAACTACAGTA +ATTTGTTAAATGACTACTGTAGCGCGCTAGTGTCGATTTACGGGCTCTAT +TTTAAGCTCAATTTTAATTTCTATCGATAAATATATGATACAAAATATTG +ATCCCGTAAACCGACACTAGCGCGCTACAGTAGTCATTTAATGAGTCACT +GTAGTTTTCGCTACAAGATATTTTGCGCGTCAGATATGTTGCGCAATACG +CATTCTCAGAATTTTGTGTACCCCGTAATAAAAATTCGAGTTTTTCTAAA +CAAAAAGTTTAATTTAAAAAACCTCTAATATTCGAGATATTCGATTGGCG +ATTTATTGATCTATTGATTTTTCGATAAATATTGATTTTTCAATTTTTAC +AATATTCGAAACAATTATCGAAATTTCAAAAAACTAACAAAAAATATTTG +CGGAGGAAAATCGATAACTTTTCAATGTATATTTTCTGAAATTCCATAAA +ATTGATTTTTCTAAATTTAATTTGTTCGATTTTCGGGTTCTCGGTTTTCC +TAATTTCTGTAATTTTGAGTCGATAATTTTTTGTTTTTTTTTTTCAATTG +AAACTTTTATTGATCAAGGATTTCTGGCTTCCCTCATAAATTGAAATAAG +AAGAGTTTGCCGAACTAGGCCATTCTGGCTCGGCCATATCTGGGGTAGAA +TTACGGCGCGTTTCGTGTCGCGTCGCGCCTCGATTTTAGTTGTAAAACTA +AATGTATTTGTCCGTGTGGAGTACACGACTTTCCCACGCGTTGTCCGGCA +GGTGATTGTCAATGGAGCGCGAAAAATGCAATGAGGAAAGCCAGAACCCC +GTGATTGATTATTGAGTTTTGGAAAACTATTTTAAAAAACCTGATTTGAA +AGAAAATATCGGTAATTTTTCGGATTTTTAACAAGAAAAAGGTTTTTATT +TCCGGAAACTTTCATTTAATAATTATTTATTTATTGATTTCTCGGAAAAA +TTTGCCGAAATCTATCGAACTATAGTCGAACAATTGGACGAAAAGTCGAA +AAAAAAATTCTAGGCCACGAATTTCCTCTTCCAGGTTGACTTTATGTCAC +ACGACGCTCTGCCATACCAAGGTCCCGCCGGAGAAGACATCTACGAGAAG +CACCGCACTGCTGGCATGTTCCTGGAGACGCAGAGAACCGAAGGAATATC +GACGAGCGATTCGATTTGTCGAATTATCCGCGACTACGACACGTATGTCA +GAAGAAATTTGCAGAGAGGATATTCGGCCACCGATTTGAACGTTGGCTTT +TTTACAGTGAGCCTGGCCTAGAAAGGGGTGGCCTAGAAAGAGGTGGCCTA +GAAAAACAAAACTCTTCCATTTTTTTCAGACCAGCAAATATCGTCTGCAA +GACACCGTAGTGGGAATAAAAGAAATGGGCCGTGGCCTGCTGCAAACGTG +GAAAACGAATGCGGATTACTTGATTGAAGGATTCTTGACGACATTTGCCA +TCAGTGATCCGAAACCGTTGCCGAACAACAAAAGTGCCGATGAAAATGTG +GAAAATAGGGAGAATATTGAGAATTTTTAGAAAAAATTCGTGTAAATTAT +GGGAATAAAAATTTGAATACTCGTGAATGTTTACTGAATTTATGCAATTT +ATTTTTTGAAATTTTCAATTTTTTTGGAAAATTTTTTTTTCTTAGGCTTA +GGCTTAGTCTTTGGCTTTGTTTTAGGATTAGGCTCAGGCTTAGGCTTAGG +CTTAGACAAAAACTTAAGCTTAGGCTTAGGCTTTTGATTAGGCTTAGGAT +TAGTCTGTCTTTCACTTAGGTTTGGCGTCATTGGCGAGCGTTAGCTCGCT +ATTTAGGCTTGGGTTAATTAGGCTTAGGCTTAGGCCTAGCTTTAAACTTA +GGCTTAGGCTTAGGCTTAGAATCAGGCGGGCTAGGGAAAAAAAGAGAAAT +TCCAAAAATTCCAGAAAAAAGGAAAAAAGGTACATCCCGAAGTCGGGTCA +AAAAATAAAGTGAAGAAATGAAATTTTCACGAAAATGAATGAATATTTTT +GAGTCTCGCGGATTTATTTTCATAATCATATTCTGAACGTATTATTGACT +CGTCGTGTGGGACATTTCAATGGATTTTCCGTAAAAATCTCTGTAGAATT +CCGCATCGGTCAACGATTCTTCACAAGAGCCAAACTCGGCGAGTCGGTCA +AACAAATGTTTGAGCTCCTGAAGAGTAGTTCAAAATTATATATGAATTGA +TTTATCTTGCAAACCTTTCTCCGTTTGTCGGTAAAAAGTCCTTCGGGGCA +GATTTCCGTATCAATCAGACGATTTGCAATGTATACAATCCACATGAGAT +TTGTTCGCCTTGAGAATTTCTCCCAGTTGCCCCTGGAACGTTAATTTTTC +TGAATTTCTAGAAAGGATATAGAAATAGCAACAAAAAAAAAAATTTATCC +AAATAATGAGCTATTTAATGTAGTTTATATTACTACAAATACTTTTCTCA +TAAGAACGTCTTGTTCTTGATAAATGTGTGCTTTAAGAAAAAAAAAAGTC +AAAAATAGAGTCCTGAGATGTCGGGCGTTGTACCTATTAGGGGTGTCAAT +AAGTTCCGGGTCAAAATTCATAACTTTTTTCGTAGCAAATCGATTTTCTT +GAAAATGTGGGAATTTATGTTATCAAACATGGTCTTTCATTTGACGGCAC +TTTCAAAAAGTTTTGACCACTCCATGTACCCTAGCTCGGATCCACTTTTT +TCAGGCGTATGCCCGATCTCGCTTCTTTGTAACTTTCAATTGAGACTTAT +GTGCGGATTTTGATTTATTAAGTATACAATGTAAGAATACAATAAAAATT +TGAGAAAAAATTCGTTCAAAAAAACAATTTTTTTGATCGGCAAAAAACCC +TCGAAAAAAATTTTGTCGAAAACTCTTGATTTTTTGTACAGGAATGATGC +AACCAATTCTGAACAATTTTTTAACACATAAAAGTTTTGAATTTAAGGCG +TCACACACAAGCGGCAATGGAAAAAATGATTTTTTAGTGAATTTTTTGAA +ATTTTTGAGAAATTCTCGAAATCTAAATATTAAATTCAAATGTTTTGTGT +GTTGAAAAATTGTTCAGAATTGGTTGCATCACTCCTGTACAAAAAATCCA +GAATTTCCGACAAAAATTTTTCCGAGGATTTTTTTGACGAACCAAAAAAA +TTTGTTTTTTTGAAGGAATTTTTTCTCAAATTTTTATTGTATTCTTACAT +TGTATACTTAATAATTCAAAATCCGCACATAAGTATCAATTTAAAGTTTC +AAAGAAGCAAGATCGGGCATACGCCTGAAAAAAGTAGATCCGAGCTAGGG +TACATGGAGTGGTCAAAACTTTTTGAAAGTGCCGTCAAATGAAAGACCAT +GTTTGATAACATAAATTCCCACACTTTTAGAAAAATCGATCAGCCACGAA +AAAAGTTATGAATTTTGACCGGTAACTTATTGTGCTACATGCTTGTTGCG +AAATATACCTATTTACCTATACCTATACCTATACCTGCAATTTTCTCTCA +TCTCTCGATATACTTCAAATTGTGGAGCATCTGGTCCTTCAAAAATCGTC +GTATCCTCTTCCAAATCCCAGTAGATAGTTGTCGAATCTAAAATTATATA +TTTTTAATTATAAATTGTATATTTTTTTAATTTAAAAAATAACAAACCTT +TCGAAATGCGGCTAAGTGTGAAATCAATAATATTGACCTTAATTCCATGG +GTCCTCAGTGGAATTTTCTGCCCATGCACAGTGTAGGAGAGCTCTTCCAC +TCCGTTCCGATCGATCAGCACATTACTAAGATTTAAGTCCCTGTGCTCGA +ATTCAAGAGCTGCTTCAGCTGCAACCATTGAGAGAACGAGCTGGTGAATG +ATGGAAAATAGTTCATCTTCACTTTCAAGGACAAAATCTGCCAGCGAGAT +TCCACCATTTGCCGAGACGAAAAGGATAAAGTTTTGATGTTCTGAAGAAT +AAACATCTGGCCTCAAATGTTCGGATTCATTGAGCTTATCGTAGGCATCC +CAAGCTCTCAGTAATCCCTTAGGATATTCCCCCATGACTATTTGGGCTGA +AATCATTTCGATAAAATTCGGCGTAGAATTCCAAGAATTTGGGTCTCTCA +ACGCCGACAGTTCCTTCATCACAATCACCTCTGACAAAACGGCGTGGGTT +GTTGGAATTTCCACGGATTGATTTCCATAGAAACATCCGTTGTTTTCGAA +TGGCAAAACTTTGATAGCAACCGGTCTACCATTCCAGACAGTTGAGAAGA +CTTCACCGTACGCTCCTTCGCCGAGCTTCTTCACTTTTCGAGCATCTAGC +GCCGACGTTGGTAGGCTACCCCATGGTTTTGCTTCTTTTTGGCCAACAAC +ACGGAGAAGTTGTGCCATGGACGGATCTTTATTGTGAAATGTGATACTGC +CAATGGGGATATTTCTAGAATCAGGATGAGCTGACCGAGCACTTGGAGCT +CTTTTGTTCAGTTCAGCAAGCATGCACAGTTCGAAAGCATGCACAGTGCG +AAAATGACTTTTCTGAAATTGTCATCAAAAGTTCATTTTGAAAATTCGTT +TCGGCAAAAAATGCAAATGCATTTCTGAAACTCACCAGTTGTCTCTGAAA +TCGTCGTTCGTTGGTCATGTCGAGTGCAAGAACGGTGGTGGTGACCACTG +CCGTCTTCGTGAACCGCATCCAAGACATGTTTCGGGAGATAACTATAGAA +ATGAAGTTTTGAAAAAAAAACTCAAGACCACAATTATTGAAACACGTTAT +TAGGCCAATAACTTCACACGTGAATTAATACATTATTCATGTATTTTTCT +TTGACTGTTATCACGGGTTTCTGGCTCACGGTAGTTCAGCCACGGTAGCA +CAGAAAATAGATGACCACGTTCATGATAATCAGTCTGAAACCTATATTGG +CTGCTATCTCGAAAACTCTCTAAAAACGCGTTTTGAGAAATTATTGATTT +TTTGGAGAAAATCAGAAATTATTGATTTTTTGGAGAAAATGGGAAATTTT +TCATTTTTATTGGGGAAAATCAGAAATTATTGATTTTTGGAGAAAATCAA +AAATTATTGGTAATTTGAAGCAGGGGTGGGCAGCAATTGCCGTTCGGCAA +ATACGCAAATTACCGGTGTGCCGATTTTCCGGGAATTTTCATTTTCGGCA +GTTTGCCGGTTTGCCGATTTGCCCGACATTTTTCAATTTCGGCAATTTGC +CGGTTTGGCGATTTGCCCGAAATTTTTCAATTTCGGCAATTTGCCGGTTT +GCCGATTTGCCCGACATTTTTCATTTTCGGCAATTTGCCGGTTTGCCGAT +TTGCCCGAAATTTTTCAATTTCGGCAATTTGCCGGTGTGCCGATTTGCCA +AAAATTTTCATTTTCGGCAATTTGCCGGTTTGCCGATTTGCCGGAAATTT +TCATTTTCGGCAATTCGCCGGTTTGCCGATTTGCCCGAAATTTTTCAATT +TCGGCAATTTGCCGGTTTGCCGATTTGCCGGAATATTTCATTTTCGGCAA +TTTGCCGGTTTGCCGATTTGCCCGAAGTTTTTCAATTTCGGCAATTTGCC +GGTTTGCCGATTTGTCGAAATTTTTCATTTTCGGCAGTTTGCCGGTTTGC +CGATTTGCCCGAAATTTTTCAATTTCGGCAGTTTGCCGGTTTGCCGATTA +GCCCGAAATTTTTAAATTTCGGCAATTTGCCGGTTTGCTGATTTGCCAGA +AATTTTTCAATTTCAGCAATTTGTCGATTTGCCATTTGCCGGAAGTGTTC +AGAAGGATTTTTTTATAATAAGGAACCACTTAAAACTGTGCCGTTTTGAA +ACTTTTCCCCGTTTTTTTAAGATGATTTCATAGAATTTGCTTACTTGGTT +TGCCGAATTGCCAGAAGTTTTTCAATTCCTACAAGTTGCCAATTTGCCGG +AAACTTTTTAATTATGGCAATTTGCCGGTTTGCCGATTTGCCAGAAATTT +TTCAAATTCGGCAATTTGCCGGTTTTCCGATTTGCCGGAAATTTTTCAAT +TTCGGCAATTTGTCGATTTGCCGGAAAATTTTTATTTATGGCAATTTGCC +GATTTGCCGGAAAAAAAATCGTTTCCCGCCCACCCATCAGTTGATGAGTT +CATCTTTTTTTTGTGTCTCTCCCATCAATCTTAAACTCGACCCGGACTGG +CAAACGCATATCTACAGTTTTCTCTTCTTAAAATGGTGGTCTTAAGTTTC +GAAGACTATGCGTAATTTCTAGCGAAGTGGCGAATTTATGATGATTTTGC +AATTTATCAATGAAAAAAACAAAAAAAGCATTTCCGCGTGGCAAATTATC +ACTGTTGTGCCGCGAATAGTACTGATATAATTTTTTTTTAAATTGAATCA +GTTTGGAATTTCGAATTTTTGAAACAACCGCCGGCAAAATGAATAATATA +CTGTGAGTTTTCAAAGATTGTTCCCCGGTTTTCTTGTAGATTTTTGTGCA +ATTTCAAAATTTCGACTTTCAGCGGAAACAAAAATACAAAAAGGTCGAAG +AAGAAGGACGGAAAGTTTACGAGCAACACTGCTACTGATCTTGTTGAGGC +CCCAAAACCAGGTGGAGCAAAAGGATCCAAAAAACCAAAACCATCGAAAG +GAAATGACGACAGGGGACGCGGCGAACCGGAAATGACTGCACAATCCAAA +AAAACCAAGAAAGCAAAAAAAGTGAGACAGAAGAGAGATACAAAGGCACC +GGTGAGTTCAATTTTATAGTTTTATAGATATAATTGTATCAGGGGTGGGC +GGCAATTGCCGTTCGGCAAATTGATTTGCCGCCAAATTCGGCAAATCGGC +AAATTGCCGGTTTGTCGATTTGCCGGAAATTTTCAATTCCGGCAATTTGC +CGGTTTGCCGATTTGCCGGAAATTTTCATTTTCGGCAAATCAGTTTTGCC +GGAAATTATAAGACGGAAACACTGTCTTTTTGAATTGTTCCCGTTTTTTC +TACATATTTTCATAGAAATTCATAGAGACAATTTGCCGAAATTTTTCGGC +AAATTGCCGTTTTTCCGGCAATTTACCGATTTGCCGGAAATTTCCATTCC +GACAATTTGCCGATATGCCAATTTGCCGGAAAAAATCGTTTGCCGCCCAC +CCCTGAATTGTATCCCTTTTTAGTCTCCTGCAATGTCACCTCGATCACGT +GAAATGCTGTCAGACAACAAAAAGACGACCGTTGAGGATGATCCAGCAGT +TCCAATAACAACAAGTGTCGCTGGAGCAGTCACTTCTACGATTCCTGCTC +TCATGTCCACTGAAAACGCGTTTCAGCCACCCAAAGACCTTATCTCGGGA +AGAAAGCCAAAAGGTGCTCCGAACTTGTCTGGCAGCGTTCCACTTTCACA +CAAGAGCTCACTGACTGGAGGTGCAGATAAGTGGGTTGGAGAAGCCGTGG +CAAAGAATTGGATGGATTCGATCGGTTAGCATCCTTTTTTTTTTGATTGG +AGGCCCCGGCGGCGTAAAAATCGTGCCAGAGTTGCAAGAGTGGGCGTTTT +TTTTTGTTTTTCTGTTGTTGGTGGGGCCTTATTATTATTTGGTCGATGGT +GATTTTACTTTACTGGTGGAAATCCGCTAATAAAACTAAAAACATCGGGA +TATAAATAAAAACCCAACTTTAAAAAGCGAACCAATCAGCAGTTTGCTCC +GCCCACTATTCAACCAATCCAATGATTGGTTCGAAATTGGGTACTCATTT +TAGAGGGAATTCAAACGGAAGATTAGCAAGTTCGGTTATTGTTTCATTAG +AATATCATTGCTTATATATATATTTAAAAAATAGTGTACGTCATCAATGG +GCGGAGCCTCTCCCATCGTGGTGAGACCCATCGTGGTGAGACACATCGTG +GTGAGACCCATCGTGGTGAGACCCTTCGTGGTGAGACCCTTCGTGGTGAG +ACCCATAGTGGTGAGACCCATCGAGGTGAGACCCTTCGTGGTGAGACCCA +TCGTGGTGAGACCCATCGAGGTGAGACCCTTCGTGGTGAGACCCATCGTG +GTGAGACCCCCATTAATTTTGGCGTGAAATTCAAATTTTCTAAGAACAAT +TTTTTGGCTTTTTTTTTAAAGAAAAATCTGGAAAATTCTAGCATTTTTTT +AGAATAATCTGGAAATTTCTTGAACTTTCTAGCTAAATCTGGAAACTTCT +AGAATTTCCTAAAATAATCTGGAGACTTCTAAAAAAGATCAGGAAATTTC +TAGAACTTTTTTAGAATCTTCTGAAAACATCTTTCTAGAGTATACTAAAA +ACTTCTAGAATTTTCATAAAATTCCGCGAAGCTCGAGGAAGAGATTTGTT +CAAACAATTTAATGTGGAGCCAGTCGGAGCACGCGCTTTGCGCGTGCGAA +CGGCTGGTTTTAGATATTTTGTAGTATTTTTCAAATCTTAAATTTTCAGA +CACAGCAGAGGTGAAAAAGGAATACGATAAACTGCAATCACAGAAAATCG +ATGTGGAGAAGGACTGTAAAACCTGGAAATCCAACCCAAAATTTAACCAA +TCGGAAGACTTTCCTGCCTTGGACTCCAATCTCGTGAAACTTGGCAAAGA +CTATGTGAACATCAGTCTTGTAGATGTTCCATTGGGAAGAAATGTTCATC +TCGGTCAAGCTCCGGTCACGAATACGGAAGAAACATTCTGGAAAGCTGTG +TTCGACAAGCGAATCACTCACATTGATCTACTCGTCGGCGATGAAACCAT +CGAATTCTTCCCTAAAAAGGCAGAAGATTATACAAATTATGGTCAGATGT +GGATCAATAACCGCCGTGTCGAGTATGTCAATGACGATGTGTACCGATTT +GCAATTGAAGTAGTGCCCCACGGATGCTCCAACTCGATCATCTGCAATGT +GACTGTTATCAGCAATTGGAAAGTGGACACTGTGCCTTTGAAACAAGCAA +TCGCCATCAAAGAGGCACTTGGTCTAAACTACTTCTTGCTGAAAGCACCC +GCTGACGAGCATGCGATGATCGTCTCACCACGTGGAGCTGGACGAGCTGG +CTACTTTTTGGCACTTGCGGTTGCAGTCAACACAATTGATACAAAGCTTG +CCGAACCATGTATTGCGGATATTGTCAAATCGATTCGCTCCCAGAGACCA +AGAGCCGTGGATTCATTTTGTCAGTACTGCTCCCTGTACATTTCGTTGCT +CTACTTCATCAAGGTATTGGTTTTTGAATTCTTGTAAAATGAGAGAATAA +TTCTCAAATTTACAGAAGAAAGTGACAAAACCAGCAGAAGGGGATAAAAA +GCCAACACTGAGCAACAAATACATCTACAAGAAGTCGGTTGAGTTGACCA +AGCAATTCACCGTTCTCCTTCTCGAAGCGAGTCAGCAAAGTGTGATGGGA +CCTGGATAGTTCTTCTCCATTTTGAATATGGAAAAGCATTTGAAGTGATC +TGGGTTTGAAAATGAAGAATTTTATAAACCCTGGATTATTTCCGAAATCT +TGCTTCCTGCCTGCTGTCTCATTGCGGCTTGATCTGCAAAAAAAGCGGTA +ATATCTCCCCCAGGAAAATGTGACGTCAGCACACTCTTAACCATGCGAAC +TTAGTTGAAACGTCTGCGTCTCTTCTCCCGCATTTTTCGAAGATCACACC +AAAATGGGATACTTTGACAACACGTGCCTGCCATCGCTTCCTGACCGGGG +ATTTTGTTAGAAAATTCGTGGAAAATAGGAAGAAACATTTCGAGAAAATG +GAATTATTGATTTTCTGATTTTCTCAAACAAAAAAAAATTAAAATCGATT +GATTTCTGGCTTACCTTACCTAAATTGAAATGGAAGAGTTTGCCGAACTA +GGCCATTTTGGCTCGGCTATATCTGGGGTATATTTACGGCGCGTTGCGTG +TCGCGTCGCGGCTCGATTTTAGTTGTAAAACTAAATGTGATTTGTCCGTG +TGGAGTACACGACTTTCACATGCGTTGTGCGGCGGGCGATTGTCAATGGA +GCGCGAAAAATACAATGAGGAAGGCCAAAACCCCGTGTATTGATTTTTCG +AAAATTTCGGAAAAATCTCTTTGTTTTATTCATTATATTTCTCCTAATTT +CTTTGAAATGTTCAAAAAATCTGATTTTCTTTTTTTTCCCGAATTTTTGA +AAAATCGATATTTGCTATCCATATTTCTCGAAAAGAAAATCGATTTATTA +ATTTTTTTCCGGTTTCCTGGAAGCTAATCAATTCAGAAAAAAGAATTAAG +CTATGAATATATACTTATAAAAGTCGTAAAAGGTATGGAAATATAAATTA +TTTTAAAACAGCGATTTCTGACTGAAGCATCTCAACTACCAGGGCCTCAA +GTTCTTCTTATCCGACAAATGAGACGAGTCGGAGCTATAATTGTGCCGAT +ATTTTCCGGGCGCCGTCTCTTCGAACTCGGATACGGTAGCCTGCCCGACG +TATTTGAAATCGCCCATTAGCACTTCGTGGCAAGCGCCGATTGATGCTCC +GTGGCCGACGAGTAGGAGGTTTCCTGAAAGAAAATCGATAATTTGCGGCG +TGTTTTTATTGATTTTTATGTGAAAATTGGCAAATTATTGATTTTTCGGC +TGAAATCATAAATTACTGATTTTCTGTTAAAATCAGTAATTTATTGATTT +CTTCGGCTAAAATCAGAAATTATCGATTTTTCTGTGAAAATCTGAAATTA +TTGATTTTTCTGCGAAAATCAGAAATTTACGTAGTTTCGCAAAAATTACC +TTCATATTTCTCGAAAATCGCCCGCAGGGTCTTTCCGACACGTGGCACAC +ACGCGTCATCTCCACATCCTTCGCGGGGTAGTGTGTACTGAAAAAAAAGT +TCGGTAATTTTTCTAGGTCTCGCAGCGATTGCCGCCACCGGTATATTTTC +ATTTTTCCAACTATAGCGCCAGCACCGGCGCCGGCATCTTTTTATTTTTC +CACGTAGCACGTCGGCGCCGGCAGCCGCGCAAACTATTTTCGCCGCCGCC +GGCATCTTTTTATTTTCCACATAGCGTGCCAGCGGCGCCGCCGGCATATT +TTTATTTTTCCACGTAGCACGTCGGCGCCGGCAGCCGCGCAAACTATTTT +CGCCGCCGCCGGCATTTTTTACTTTTCCACGTAGCGCGCCAGCGGCGCCG +CCGGCATGTTTTTATTTTTCCACGTAGCGCGCTAGCGCCGCCCCCGGCAT +CTTTTCATTTTTCCACGTAGCGCGCCAGCGCCGGCAGCCGCGCAAACTAT +TTTTCGCCGCCGCCGGCATCTTTTTATTTTCCACATAGCGTGCCAGCGGC +GCCGCCGGCATATTTTTATTTTTCCACGTAGCGCGCTAGCGCCGCCGCCG +GAATTCATTCATTTTTCCACATAGCGTGCCAGCGGCGCCGCCGGCATCTT +TTTATTTTTCCACGTAGGGCTCTAGCGCCGGCGGCATTTTTCATCTTTTC +ATGCAGTGCGCCAGCGCCGCCGCCGGCATCTTCTCATTTTCCACGTAGCG +CGCTAGCGCCGGCGGCAGCGCAAACCATTTTTCGCCGCCGCCGGCGCCTT +TTCATTTTTTAATTAGTGCGCCAGCGCCGCCGCCGGCATCTTTTGATTTT +TCCACGCAGTGCGCCAGCGCCGGCAACGGCGCAATTCTAATCTCGGTCTT +TTTTGAGCGATACCTTCGAGAAAACAGGAATATAGTCCAAATCAACAAGC +GGAAATTTCTCAGCTAGTTTGTCGGTTTCCCAGAATCCAGGAGGCTTATC +ACAAAGATAAAGTGCCTCACAAAGGCCACCGTCCGCCTTCACTTTCATTC +CTTTATCCTCAATAATCGTCGAAGCTGTCTCAATTGTCCGATCGAACGGC +GAGGCAAACGTATGCGAGATTTGTGCATTTTCGAACCGATTTTTGCACTC +TTTCGCCTGTTGCCGACCGCGCACGCTGAGCATCGAATTGTCCGACGTGA +GACCGTCGGCTCCGTCGAGTTTACGCCAATTTCGATTGATATTGTCCTCG +CTGCAATGCAAAAAATCAATAATTTCTGATTTTCACCGTAAAATCAATAA +TTTCTGATTTTTGTTGAAAAATCATTACTACCGATTTTAACTGAAAAATC +AATAAATTCAGATTTTCATTTTTTTTTTCGAAAAGTCGATAATTTCCAGA +TTTTCATCGAAAAATCAATAATTTGTGATTTTCACAAAAAATCAATAATT +TCAGATTTTTACCGAAAAATTGATAATTTCAGATTTTCACCGAAAAATCA +GAAATTATTGACTTTGGATAAAAATCAATAATTTACTGATTTTCATCGAA +AAATCAATAATTTCAACCTTTCGCAGAAAAATCGATAATTTTTAATTTTC +ACCGTAAAATCAATAATTTCTGATTTTCACCAAAAATTAATAATTTTAAT +TTTTTTTTCGAAAAATCAATATTTTCTAATTTTTATCGGAAAATCAATAA +TTTCCGATTTTCACCAAAAAATCAATAATTTCAGATTTTTATCGAAAAGT +CAATAATTTACGAATTTGACAAGAAAAAATCAATAATTTGCTGATTTTAA +CAGAAAATCAATAATTTTTGTTTTTCATTGTTTTTTTTTTCGAAAAATCA +GTACTTTTTTAATTTCCCCAAAAAATCAATAATTTTTGATTTTATTTTTC +GAAAAATCAATAATTTCAGATTTTAACCGAAAAAATCAATAAATTCAAAA +AATTTCATTTTTTTTTTGAATTCTTAAAAGTACCGTTCCGCGTGTCGAAC +AATCCAAATTTTCCGTGGATAAGTGATCGGCATCCTATCGAGATCGGTTT +TAGCTGAAAAAAAACCTTAAAAAATTGGAAAATTGTGTACTCCTCTCGGA +CACGTGGTGTCAGGCTCTCCCATTACGGCTTGATCTACAAAAAATGCGGG +ATTTTTCGTTCTTAACCATACGAAATAAGTTGAGAACTCTGCGTCTCTTT +TCCCGCATTTTTTGTAGATCAACGTAGATCAGACCAAAATGAGGCACACT +GAATGAAGACACCAGGATTTTTCGAAAAACTCTAAAGAAATTCTGCAAAA +AGTGACGTCATGAGGTATATAGGTATGAAATCGAATACTTGGTATCAAAT +GACGGAAAAATAAACGAAAGTTGCAAAATTTTTTGATGGAATTTATTAGA +CTTTGGCGGGAAAACAGATCAAATTTACATTAAAAAATACGGGAAAAATA +CGGGGAAAAAAACCAAATTATTGCCTAAAAAATTCAGGATTAAATTGCGA +GTTGTACTTTTTCTTCTTTCCACGCTTCTCCTCCGAATCCTCGGCGGCTT +TTCGTTTCTGCAGCAGCTCAATTTCCTCGGGATGTAGCAAAGCATGGGCT +CTCGCTTTCTCGACTCGTTCCCGTTTCAGACGCTCGTCTCGCATGCTCGC +CAGCTTCTGCCGTCGTTCTCGATCGTATTCCTCTTCGAGTTGACGTTCTT +CAGAGCTATCACGGTGGCGGTGCTTTTCTGGAAGAAAAATTGGGGGGTTT +TTTTGTAGTTTGTAGTGGTGAGCACCCTATATTGGAGGCGCGAAAAAAGA +GATTTTTCGAATTTTTTTGTAGATCAACGTATGGTTAAGAACGTACTGAC +GTCATATAATTTTGGGCGAAAAATTCCCGCATTTTTGGTAGATCAAACCG +TAGTTGGACAAAAAGCAGATTTTTCGAAAAAAAATTACGAATTTCTGGCT +TTCCTCATAAATTGAAATGGAAGAGTTTTTGCCGAACTAGGCCATTTTGG +CTCGGCCATATCTGGGGTAGATTTACGGCGCGTTGCGTGTCGCGTCGCGG +CTCGATTTTAGTTGTAAAACTAAATGTGATTTGTCCGTGTGGAGTACACG +ACTTTCCCACGCGTTGTCCGGCAGGCGATTGTCAATGGAGCGCGAAAAAT +TCAATGAGGAAGGCCAGAACCCCGTGGTGGTTCGATGCACAAAACTGATT +TTTTTTTTGAATTTTTGGGATTGCGCAAGAAATTTCGCCGAAAAAGTCGA +TAACTCAAAAAATTCTAAAAAATTTGGATAATCTGGAGAAAAAACCCAAA +ATTTGCTGAAAATTTCGAAAAATTTAGCTTTAAATTTTTGCGCATGGCCG +CCGGAAAAGAAAAAAACTCGGCCACCAATTTTTTGGCGGCCATGATGCAA +GACACCATATTTGACGCGCAAAAAAAGGTGTTTTTGGATGTTTTTTTCTT +TTTCCGCCAGAATTTTTAATAAATTTCCAGGGACAAAAAATCATAACCTG +TTGCAAAAATGTTTTTTTTTTCGAAAAATTCAATTTTGCGCGTCAAATGT +GATGCTTTAAGCTCTGTGGCCGCCGAAAAAGAGAAAACTCGGCCACCAAT +TTTTCACTCTCCATTGACAATAGCCTGCTGGACAACGCGTGGAAAAGTGT +CGTGTACTCCACACGGACAAATACATTTAGTTTTACAACTAAAATCGAGC +CGCGACGCGACACGCAACGCGCCGTAAATCTACCCCAGATATGGCCTGGC +CTAGTTCGGCAGAAACTCTTCCATTTCAATTTATGAGGGAAGCCAGAAAT +CCGTGGCCGAAAAAGAGAAAACTCGTCCACCGATTTTTTTTTCTGACGGC +CATGACAAGTTTGCGCGTCTATAATACATCATATTTGACGGGCAAAAAAT +TTTTTAAAGTTTTTTCGAATTTTTGTGATTTTCGAGCAGAAAAAAAAATT +GTCTGAAAATTTATGTGGCCGCGGTAAACTTCGGCCACCGATTTTCGACG +ATTTTGCCCGTGTTTTTGACGGAAAAAGCACCATATTTGACGCGCAAATT +TTCAAAAAAAAAAAAGCCCAAAAATCGCACTTTTCTTCTTCTTCTTGTCT +TTTTTATGCCGTTTCTTCGCCTTTTCTTCACTATCACTATCCTCTCGTTT +CCGCTTCCTCTCCTCTTCCAGCTCTTTCCGCTTCAAACACGGCAGAGCAT +CGCGCGCTTTTCGTCGATCAAGTGGCTCGGCGGCTCTTCGCAGCGGCAGC +TCCTCGTACCACTCTTTTTTCTTGTTTAAATCGTTCGTATTATCCGCGAA +ATACACCTGAATTCCCATTTTCGACTCCCATTCCTTCTTCTCGGCAGCTT +TTTCCGCTTCGTAGTCGCGGTTTCCGGATCCATAGTTTTTTCGTTCTTCA +CGTTCGAGATCCTGCAAAATATCGATTTTTTTTGGAGAAAAATATAGTTT +TTTTTTGGAGAAAATTATCGATTTTTTGGAAAAAATTATCGATTTTTCGG +GGAAAATTAACGATTATTTTAATAGAAAATTATCGATTTTATTTGGGAAA +AAAATCACTGATTTTTTGGAAAAAATTATCGATTTTTTTCGGAGAAAAAA +TATTAAATTTTTTGCAGAAAATTATCGTTTTTTTTTTTGAAAAAAATTAC +CGATTTTTCAGAGAAAAATATCGATTTTTTAGAATAAAATTATGGATTTT +TAAATATAAAATTATCGATTTTTAGGGAGAAAATTATCGATTTTTTTTGG +AGAAAAATATTGTTTTTTTTTGGAGAAAAATATAGATTTTTTTGAAGACA +TTTATCGATTTTTTGTTGATAAAATTATCGATTCTTCGGGGAAAAATATA +GCTTTTTTTGGATGAAATTATCGATTTTTTGGGAGAAAAAATACCGATTT +TTTTGGATAAAATTATCGATTTTTTTGAAGAAAAAGTATCAAATTTTTTG +AAGAAAATTGTCAATTTTTTTGGAAATAAATATAGTTGTTTTGGATAAAA +TTATCGATTTTTCAGCTAAAAATATAGATTTTTTTAGATAAAATTATTGA +ATTTTAAAAAGAAAATTATCGATTTTATGGGTAAAAAATAACGATTTTTT +TGGAGAAAATTATTGATTTCTTCGGGGATAAATATTGATTTTTTCGGGGA +AAATTATCGATTTTTGGATAAAATTATCGATTTTTTGGAAAAAATTATGG +ATTTTTTTTGAAGAAAAATATTGATTTTATTGGAGAATAATATCGAATTT +TTTCGGGAAAATATCGATTTTTTGGAGAAAATTATCGATTTTTTTTGAGA +AAATTATCGATCTTTTGGGTAAAATTATCGATTTTTTGAAGCAATTCTGA +GCCAAATTTTGAATTTTCGAGCGGCAATTTTTGCACAAAGCATCATATTT +GACGCACAAAATTCGAATTTTTCAGAATTAGTTTCACAAAAAATCCATCT +ATCACCTGAAACAAGTTCACATGTCCCGTTTCATCTGAAATGCTGACATC +TTTTGCTGCTCCAGCTGAGCTCGAACCAGCTCCTCCACTAAACATCGTCT +CCATTCGTTGATCGGCTCGGGCTCTCAACTGCATCAGGCGCCGCTCGTTT +TCCGCCTGAATTTGCCTGTCGAGCACACGCTGCTCATCTTCGGCGGCTTT +TCGCTCGTCTCGGCGAACCCGTTCCATGTTGGATTTTGTCCGAACGTGCC +ATGATTTGTGATGGAGAATGTTCATCTGGAAGCGGAAATTATAGAGAAAA +AATTGATAATTGTGCTCGAAATTTGGCTCTGGAAGAAGCGCAAAAAATCA +ATAATTTCGTCCAAAAATCTATTTTTTTCAGAAAAACCAAAAAGCGTTTT +TTTTTCCAAAAATAAAGATAATTTTCGTCCAAAAATCGATAATTTACTTC +TAAAAGTCTCAATTTTTCCAACAAAAAAAACAATTTTCGTCGAAAAACCG +ATGTTTTCAGAAAAATCAAAAATCAAAAAAAAAATTAAAAAATTAAAAAA +AAATCAATAATTTTTTCTAAAATCTTGCAATTTTTTTCTAAAAATCGTTT +AATTTTCTTCTAAAATCGATATTTTTCCGGATTTTTTCGTGAAAAAAATC +GTCAATTTTATTGAAAATTTTGCTAAATTTCGAAAAAAAATCTTGAAATT +TCGATGCACCATGATTTTGAAAATTCTGCTCCGGATCAATTTTTCGAGAA +AAATTATCGATTTTTGGAAACTATCATACAAAAATCGATAATTGTGCTCG +AAATTTGGCTCTGGAAGGAGGAAAAATCAATAATTTTCTTCTAAAAATCG +ATTTTTTTTCCAAAAAATCAATAATTTCGTCGAAAAACCGATGTTTTCAG +AATAATCAAAAAAAAAATTAAAAAATTAAAAAAAAATCAATAATTTTTTC +TAAAATTTTGCAATTTTTTTCTAAAAATCGTTTAATTTTCTTCTAAAATC +GATATTTTTCCGGATTTTTTCGTGAAAAAAATCGTCAATTTTATTGAAAA +TTTTGCTAAATTTCGAAAAAAAATCTTGAAATTTCGATGCACCATGATTT +TGAAAATTCTGCTCCGGATCAATTTTTCGAGAAAAATTATCGATTTTTGG +AAACTATCATACAAAAATCGATAATTGTGCTCGAAATTTGGCTCTGGAAG +GAGGAAAAATCAATAATTTTTTTCTAAAAATCGATTTTTTTTCCAAAAAA +TCAATAATTTCGTCGAAAAACCGATGTTTTCAGAATAATCAAAACAAATT +TCAAAAATAATAAAAAAATCAATAATTTTTTCTAAAATTTTGCAATTTTT +TTCTAAAAATCGTTTAATTTTCTTCTAAAATCGATATTTTTCCGGATTTT +TTCGTGAAAAAAATCGTCAATTTTATTGAAAATTTTGCTAAATTTCGAAA +AAAAATCTTGAAATTTCGATGCACCATGATTTTGAAAATTCTGCTCCGGA +TCAATTTTTCGAGAAAAATTATCGATTTTTGGAAACTATCATACAAAAAT +CGATAATTGTGCTCGAAATTTGGCTCTGGAAGGAGGAAAAATCAATAATT +TTTTTCTAAAAATCGATTTTTTTTCCAAAAAATCAATAATTTCGTCGAAA +AACCGATGTTTTCAGAATAATCAAAACAAATTTCAAAAATAATAAAAAAA +TCAATAATTTTTTCTAAAATTTTGCAATTCTTTTTTCTAAAAATCGATAT +TTTTCCGGATTTTTTTCGTGAAAAAAATCATCAATTTTATTGAAAATTTT +GCTAAATTTCGATAAAAAAACGATTATTTTGAAATTTCGATGCACCATGA +TTTCGAAAATTCTGCTCCGGATCAATTTTTCTAAGAAAATTATCGATTTT +TGGAAATCATCACACAAAAACCGATAATTTTGTGAAAAAATTCGATTTCG +ACGCACCATGATTTCGAAAATTCTCAGAATTTTAGAAAATTTATCGATTT +TGTGTAGAAAAAATTCGAATTCCAGTGGAAAAATTTTCTTTTCTCAAAAA +ATGTTTGATTTAAAAAAAATCGATTTTTTCAGAAATTCAGGCATTTTTTT +GCAAAGTAAATTGGCAAAAATCGATGATTTCACTGTGAGAATGGGAATTT +TTGTCAAGAAAAATATAAAAAATTGCCTTAAACAATTAAAGTGGTTGTCA +AAAAATTGAAATTTTGGAAAAAAAAATATATTTACAGATGAATTTATTAG +AAAAATTCCACAAAAAAAGAGGTTCCATCTAATATTATTATGTCGCATTG +GGGGTTATATCACATAATTTTTTTTTGTTTCGGTTGCCGTGGAGAAGGGG +GAAAATATCACATTTTTCAGAGGAAAATCACAATAAATATAAGGAAAATT +TACAAAAAAAGGCGCGGGGGGAAATTTCTTCACAAAATTATCGATTTTTG +GGTGATAATTTCCAAAAATCGATAATTTTTATAGCAAAATTGATCCGGAG +CAGACTTGGGATAAAGCAATTTTTGGAGGGGGAAAATGAACAAATTAATA +AAATATAATATATTCGGGGCAAAAAATGGGAAATAATTAAATTGTCGGAA +CATATAAATGATTAATTGATTGAGCCAATGGCATTGGTTTCATGTGAGCC +AACGCTTCATTACAGAACACTTCCTCCCGATGACGTAGCGGACGAGAATG +CTCGACACGAGCAAGACGGTAGGCTCTAGAGGCGTTTATCTGGAAATTTT +TAGTGAAAAAATGAAGAAAAAAAGCTTTTTTCGCTGAAAATTCCAAGATT +TTCGGTTTTTTTTTTGCTAATTTTTTTTAAATTTGCCTTGAAATAAATTT +CCAGATGTTCAGTAGTTTTTTTTTCCTTCAAAAATCGATTTTTCAATTAA +TCAAACATTTTTCAGAGTATTAATAATTATTTTTCCACCAGAATTTGAAT +ATTTTATCCAAATTCTGAGAACCGGCTCACAGAGCGGCTTTTACTACGTG +GCCTAGAAAAATCAAAAACTCGGCCAATGATTTATCTGGGATTTTTGACG +TGGGATGTTTCTGAAACTTGACGAGAATGTTCTCGAATGGGGGTTCTATA +GTTCCACCGTGGCGATTTTTGAGAAAATGTTCCGTATCCATGTTATGAAG +GTGGCCGAGTTTTCTTATTTTACGGCCACGTAATAAAAACCGCTCTGTGG +GCTCCTGAGCAGAATTTTCGAAATCATGGTGCATCGAAATTTCAAAATTA +TCGATTTTTTTTTTGAAATTTAGCAAAATTTTCAATAAAATAAGAAATCA +TGGTGCATCGACAAACCAATTTTTTCCATTTTTTCGGAATTTAAAATAAA +ATTTAAAAAAAAGTAAAATTCTGGAATTTTACTTTTTAAAAATATTTTTT +TCAAATTCTGGCAGTTTTAAAATAATAAAAAAACGTTTTTTTTCCATTTT +TTTCAAAAATTTTTTTCTGAAATTTAGTACTACAAATGTAAAATTTACAG +GGTTTTCACAGGAATTTTCAATTTTTTTGATTCGCTTTTCTTCAAAAATT +CTAAATTTTCAGGTTTTTTTTTCCAATAATTTTTAATTTTTGTGAAAAAA +AAATGTTTTTAATTTTTTCAGACTTAAAAAATTGTGACAACTTTTTTCAA +TTTTTTCCGCAAAAATAAATTTTCGGAAATTTCAAATTTCCTACAAAAAA +CCCCTTACCACAGTAAGCCTAACCAACTGTGAGAGCTCGCTCATCGTGAC +TAGGGCTCCGTCGAAAAGCGGCCCAAACTCCAGAGCCGACGCCGTATCGA +CGCGCACTCGAACCATCTGAAAAACCAAATAATGGAAGAGTTTTTTTTCG +GCCAAGCCAGGCTGCCCCATTTGATCTACGTAGATCTACAAAAATTGCGG +GAGAAGAAAAAAGAGACGCAGAGTTCTCAACTGATTTCGCATGATTAGGA +GTGTGTTGACGTCACATTCGCAAAAAATTCCCGCATTTTTTGTAGATCAA +ACTGTAATGGGACAGCCTGGGACCACGTGCAGTGTTAGGCTGTACCATTA +CGGTTTGATCTACAAAAAATGCGGGAATTTTTTGCAAATGTGACGTCAAC +ACACTCCTAATCATGCGAAATCAGTTGAGAACTCTGCGTCTCTTTTTCTT +CTCCTGCATTTTTGTAGATCAACGTAGATCAAGCCTCTCTCTCTCTCTCA +CCTTATCACCGACTTGCTCCAACACAATCAGAACATCACAAAACTTGGTG +GCAATCGTTTCGCGGGTGTACACTTTCCGTGTGTTCTCAGTCCACACCAC +ATGCACCTCATCGTTTCCGATATGCTTCCACTTCTGCTGCACATCGCCGT +TGAGCATCGTGCTCACATGGAATATCACCTCAGCCTCGGCGTCTGCAAAG +TACGGAGCCCGTGTCTCTACGGGCAGGCCACCGGTGTAACCATCGTGTCC +TCGACCCACTTTGACCTCCCATCCGAGCTCCGACGTGAAGCTGTCAAATT +GCGCGGAGGCGGAGGCGGTGGTGTTGGACAGGATCGATGCTCGATCTTCC +TGCGATTCTCCTACATAGATCACAGCGACTTTGTGGACTTCACGAGAAGA +AGTCTGATCTAAATGCTTCAAGTCACGCGGGAAATTCGCGTGAACCTGAG +ACACAAGTGGGACGAATCCCAGGGAAGCCGACAGCGATCTCCATTCGAGA +AGCTTCGCGTTCGGAGTGAACGCTGGCAAATGATCGTGAGCCGCCGACGA +TGTGCCAAGTGGCTGTGGCTGCCGTCTAGACGTCTCAATCATCGATGTAA +ACTGTGCGGCACCATCGACAGGCGACGGGCTCCCACGTGACGGGGGGTAG +TCCGGTAGATCATCGAACGGATCATCCATCGCTCCGAGGATCCCCTGGCT +TTCCCGACCGGGACGTTTTCCCTTGAGAGCCTCTTTGCGTAGCCAATTGG +TCACGTTGGTGGCAGGCTTGCGGACATCCTCATGGGTATCTAGATCCCAG +ATATGCCGGCCAACTACCGTTCGGGAAGTGATTTCGCAGGGGGATTTGGC +GTGGAAATCCTTTCGGGACACCGAGATAATCGCCGATTGCCGATTAATAT +ACACGGTTTTATCCTCTCCTTGTAGTGGCGGTGCTCCTGCAGAGCTCCAT +CTCTGAACCTCAAAACCTCGCACCGGATACTGACCACCGAGCGTGACAAG +CTCTCCATGGAGCACCATATGCCTATCGGCCAAGATACCGCGAATGACGA +GTGGCATTTGAGGTTGTGGCCATTGCATACAAAGCTTCACCATCTGCCAC +TCGATCTCGGTTCTCGTACGCTCATCGCGTAGGAGCGAGGCGGCACGGAG +CAGCGCCTGGAGCAGTGTGGTGCTCGAGAACTGGGCGAGCGAGGCGATCG +ACGAGCAGAAGAGCGGAATGAGCCGCATCGCGAACTGATGGGTTTCGATG +GTTTTCAACGCGGTGAGCACCAAGTCGATTTGGGCTTTTTGCACGATGAG +GATGGCTAGCGAGTTCACGATTACTACGGCGTTCTCCGATACGTCTGTAT +TGACAAGCTGATCGAGAAGGATTTGCTCGGCTCCAGGATACGATGAAGCT +AGCATTGAGAACAACTTCAACGAGTTCGAGGACACCTTCGTGTCCTTCGA +GCTCTTCGCCGCCTGCAACATTTTCGGCAAATGCTCCAGAGCTCGCGGAG +CAACAATTGATAGTTCGTGTGCAGAGAGCCCACAAATCGCCGGAACCACT +AGTTGCTCCTTGGATTGCATAAGACGGATGAAGCAGAGAAGAATGTTGGC +CGAGAGCAGTGGCGGTGGCCGACGGATCGTTGATTCGTTGAGAATTGCAC +TTAGAGCCGGAACACAGCGGGGCAGCAGGATTGGGGGTACGGTAACGAGG +CGGCACGCGAGCCAGTGGGCCAGTGGGTGCAGGTTTACCTGGAAAGCAAA +ATTTTTTTTGGCTTTTTTTCAACTCGAATTTTTTTTCTAAATTTTTTTGT +AAATTTTCTAAAAAATAATTGTTCGATTTCAGAGTGCCTCATTTCGTGCG +TGATCTACGTTGATCAACAAAAAATGCGGGACTGATTCTGCATGGTTAAG +AACGTGCCGACGTCCTATTTTTTGGGCAAAAAATTCCCGCATTTTTTGTA +GATCAAACCGTAATGAGACAGCCTCACGTCGTGCCAGAAAGTCCCATTTC +GTGCTTGATCTACGCTGATCTACAAAAAATGAGGGGCTGATTGTGCATGA +TTAAGAACGTGCTGACGTCACATTTCGTTTGGCAAAAATTCCCCCATTTT +TTGTAGATCAAACCGTAATGGGACAGCCTGACACCACGTGATCTTCAAAT +TCCCGCCAGCCTGAAACCCACCAAAAGCAAGTTCTCAATCGTCAATGACA +TGCAATCGATTGCCACGTGGGCACTCGAGTTGGTATACGGATCCACGAGT +GAGATGACACGACGCCACGTTTGAAGCCACGCCAATTTGTCGCCAGACCA +AGACTGAAAAAGGGAAAAAGTGTGATATGCGCCTTTAAGAAAGGGTTACT +ATAGTTGTCAACAACAGAAAAAAATGCTGAAAAGGCATTTTTCAGGGTCT +ATTTTCACAATGAGTTTGGGTATAAATTGGAGAGTTTTCCATAGATAATG +CGTACTGCGCAACTAATTTGACGCGCAAAATATCTCGTAGCGAAAACTAC +AGTAATTTTTTAAATTACTACTGTAGCGCGCTGGTGTCGATTTACGGAAA +TTAATTAAAATAATTGATAAACAAAACAGAAACTATGTTCAAAAATCGAG +ATCCCGTAAATCGACACAATCGCTACAGTAGTAATTTAAAAATTACTGTA +GTTTTCGCTACGAGATATTTTGCGCGTCAAGTATGCTAAAAAATATGTAG +TTGTGGGAGCCTTGTGAATTTTTAGAAGGTTTTTTGAAAAATAACTCGCC +ACTATTAAAAAAAATATATATTTTGAGGCATTTTCAAAGGGTTAAAGTAA +TTTTTTACTTTTAAAATCGTTTTTTTTTATAAAATAAATTTTAAGCGTTT +TTAGTACAATTTTCGTAAGTTTTTCGTTTTTTTTTTGGTCATATTTTGTA +GTTTTTTACACTATTTTCCAGAAATCGTCAGAAAAAGCACTTAGAAACGG +GCTAGAAAAACGGGGTTTCGGCTTGCTGCGAAACTTTTTTTTTTGAAATT +ACCGCGCAAAAATAAATTGTCATTCAAGTAATGTTGCAAAATGTATTAAA +ATATAGGTTTTTAAAAATGTATTTTAATACAGTTGTGACGTAATTTTTCT +ATTTCAATTTTTGCAAAATAAGAAAAAAAATGAATGTAAAAAAGTTAGAA +AGTTTTTAAAACACATTTTATACAGGTCATTACGCTCTATTTTCTGCCAT +TTAAAGCGAGAATGTTTTTTGACACTACATATATAATATTAGGTCTCCAA +ATAAGATCCGGGTCAAAAATCATAACTTTGTTCGCTGTGTATCGATTTTT +ATGAAATTGTGGGAATTTGTGTTATCAACCATGATCTTTCATTTGACAAT +AGTCACAAAATTTTTTGGCCGTCCGAAGTGCCCGTACTCGGAGCCAATTT +TTTCAGACATTTTTCAGATCTCGCTTCTTTTCAGGTTTCAACTGAGGTTT +GTGTGCGGATATTGCTTAGTTTAGTACACAATGTAAGAAAACAAAAAAGT +TTGGAAAAAAATCCGTCCAAAAAAAATTTTTTTGTCGCTCGTCAAAAAAT +CTACAAAAAAAATTTTGTCGAAAATTCTTGAATTTTTATACAAAAATGAT +GTAACCGTGTGCAAACTAATTTTAAACATACAAAACATTTGAGTATGAAA +TTTGGATCTCGAGAAATACTCCAAAAACTCGAAAATAGTTCGAAAAAGCT +GTGTTTTTTGTTATTTTTTTTAGTGTGACGCACCAAATTGAAATTTTTTG +TATGTGTAAAAATAGTTTGCACATGGTTACATCATTTTTGTATAAAAAAT +CGAGAATTTTCGAAAAAAAATTTTTTTGAAGATTTTTTGACAACCGACAA +AAAAAATTTTGTTTGGACGGATTTTTTTCCAAACTTTTTTGTTTTCTTAC +ATTGTGTACTAAACTAAGCACAATCCGCACACAAAGCTCAATTGAAAACT +GAAAAGAAGCGAGATCTGAAAAATGACTGAAAAAACTGGCTCCGAGTTAG +GGCACTTCGGACGGCCAAAAAATTTTGTGACTATTGTCAAATGAAAGATC +ATGGTTGATAACATAAATTCCCAAAGTTTCATAAAAATCGATACACAGCG +AACAAAGTTATGATTTTTGACCCGGATCCTATTTGGAGACCTAATATATA +TATATATATAAAGATACCTACCGTAACCGCATTTGTAATATCATCAGTGT +CATGAAAAACGTTATCCTCTGCAATTGTGTTCGACGATGACATACTATCT +CCAGTCTCTGACGAATCATTTCTCGGGTCTTCCATCATTGCCGATTCGAT +TGCCTGTCGTCGACTGTTCTGTGAGACTTCTGTCGCTTTTCGGAGAGCTT +CTTCACGGGAGAATATGTCGATATGGGCGAGATGGAGGATTAATGCTCGC +GTTACACTGTTCACAATCTGAAGATTTCGTTTTATTTTTTTGATTTTGGC +TGGAATTTCATAAAGCTATGCTTTTGTTGAAATTTTAGTTTAAAAAAATA +AGGTTTTTGAACTGAAAATTGGGATAAGAATTAGAAAAAAATTTGTTTTT +TCTCTAAAACTGCTGCATTTTATTCAATTTTGTAAGATTTTCTGTACAAA +ACATAATAATTTTTGAATTTTTTTTCCAAATTTTTTGAAATTTCAGCAAG +AACTGGACAAAAAGCACTACTTTTACTAAAATTGGCAGCATTTAGGAATT +TCTGAAAAAAACAATTAATTTTCATTATTATTGTCATATTACAGGAACAC +ACTATTCTGAGAATGCGTATTACACAACATATTTGACGCGCAAAATATCT +CGTAGCGAAAATTACAGTAATTCTTTAAAAATGACTACTGTAGCGATTGT +GTCGATTTGCGGGCACGATTTTTTGAAATGAATTTTAATCATATTTTGAG +CAAAAAATGGGTCAAAAATCAAGCCCGTAAATCGACACAATCGCTACAGT +AGTAATTTAAAGAATTACTGTAGTTTTCGCTACGAGATATTTTGCGCGTC +AAATATGTTGTGTAATACGCATTCTCAGAATTTTGTGACTTTTCAAAAAA +AAAAATCGTGATCAAAAAAAATTTTTAATTGTTTTTTAAGATGAAATTAC +GATTTTTTTCGTTCTCTATAAATTTTGATCAAATTTATTTCAAAAAAAAA +AAAAATTCTTTCTATATTTTTTTTTTCGAGTTTTTTTTTCTATCTTCTGT +ACAAAACACAGCAATTTAAAAAAAAGGCAAAATTTTAAGATTTTTCTAAA +TCTAGATTTCTAATTTTCCTCGGGGTTCTGGCCATCATCCTCATAAACAG +AAATGGAAGAGTTTTTGCCGAACTAGGCCATTTTGAAACTCTTCCATTTC +AATTTATGAGGAAGGCCAGAGCCTCGTGGAAAATTAGAAATTAGATTTCG +AAAAATCTTAAAAGTTTGCCAATTTCTTATTAGATTGATGTGTTTTGTAC +AGGAAATTGAGAAAAAAATCGGCAATTTTATACAATTTAATTTTAAAAAA +TATAGTTAAAAATAGAAAAATTCAATGAAACTGGAAAAAAATGTTTACTT +TGAAAAGTTAAAAAAAATGAAAAAAAAACTCAAAAATTGAATAAAATGCG +GCAATTTTTGAAAAAAAAGCTTTTTTCCATAGTTTCTGTCAATTTTCGGC +TAAGATTAATTTTTTTTCATCAAAATTTCGATTTTCATACTCTCTTACCG +AAGCCCATTGCTCAGCCATCGGTATCCAAACACCCTTCTTCATGACGTCA +TGCACCTCATCCCACACCTCATCGTCAATTTCGATGCGTCTGACAGCCTT +AATCACGACGACATTCGAGATGAGTGTCGACGTAAAAGCAGCCGACACAT +TCTGGGAAAACGCGTCCGATTGGCGACAGATTTTCGAGGCGCATGTGCAG +AGTCCCGCCATTAATTCTGACCAGAATTTTTGGGGAAGCGGTTTTGGGAG +TTGAATTCGATGGGAAACCAGCTGCTTGCACGATGTTAGAATCGCAAATG +CTATGGAGATTGCCGAGGAGTGACTGGAAAAATATTTTTTTTTAATTTTT +AAAGGGATTTTGGGAAATCGAAAAATTTAAAAACTCGGATTTATGAAGTT +TTTGGCAAACCGGCAACTTCTGGTTTTTGGAATTTCGCCACTTTTTAACA +ACCGGCAATTTGGCGATTTGCAAATTTTTGGAAAACCGGCAATTTATGGT +TTTTTTTTCGGAATTTTGCTAATATTTTTAAAACCGGCAATTTGCCGATT +TGCAAATTTTTGGAAAACCGGCAATTTGTGGTTTTTGGAACTTGGCCACT +TTTTATAAACCTACAATTTGCCGATTTGCAAATTTTTGGAAAACCGGCAA +TTTCTGGTTTTTTTTTGGAATTTTTCTAAAATTTTAATAAACCGGCAATT +TGCCGATTTGCAAATTTTTGGCAAACCGGCAATTTGTGGTTTTTTTGGAA +TTTTTCTAAAATTTTAATAAACCGGCAATTTGGCGATTTGCAAATTTTTG +GAAAACCGGCAATTTTTGATTTTTGGAATTTTGCTAAAATTTTTAAAAAC +CGGCAATTTGCAAATTTTTGGAAAACCGGCAATTTGTGGTTTTTGGAATT +TTGCCACTTTTTATAAACCGACAATTTGCCGATTTTCAAATTTTTCGGCA +AATTGCAGGTTAATAAAAAGTGGCAAAATTCCAAAAAAAAACCAGAAATT +GCCGGTTTTCCAAAAATTTGAATATCGGCAAATTGTAGGTTTATAAAAAG +TGGCAAAATTCCAAAAAACCACAAAATGCCGGTTTTCCAACTCTTTCAAC +GAGAGTATCCAATTTTTTAAATAGAAAAATTGCCGTATTCCCTACCGTTC +TCCACACATTTGCCGATTTTCAAATTTTTGGAAAACCGGCAATTTTTGAT +TTTTGGAAGTTTGTAAAAATTTTTTAAAGCGGCAATTAGTGGTTTTTGGA +TTTCTGTCACTTTTTATAAACTTACAACTTGCCGATTTCAAATTTTTGGA +AAACCGGCAATTTGTGGTTTTCAAGTTTTCTGCTAATTGGCAAACTGGCA +AAATGCCTTATTTTGGAAGTTTAGGTAAATTCTCAGAACACCGGAATTTT +GACATTTTTCAAAATTCCAAAAACCACAAATTGCCGGTTCGCCAAAAATT +TGCAAATCGGCAAATTGCCGGTTTTTAAAAATTTTTGCAAAATTCCAAAA +AACCACAAATTGGCGGTTTGCCAAATTTGCGAAATTCCTAAAACTGGAAA +TTGACGGTTTTCAAAAAAAAAAAAAGAAGGGAAATCGGCAAATTACAGAC +TTACAAAAAGTGGCAAAGTTGCAAAAGCCAGAAATTGCCGGTGTGCCAAA +TTTGCGAACATTTGAAAATACCACAAATCAAATTGCCGGTTTGCCGAAAA +TGTGCAAATCGGCAAATTGCCGGTTTTTGAAAATTTTTGCAAAACTCCCA +AATTTACAGAAAAAATTCCAAAAATCAAAAATCCGAATTATTCCAGAATT +TTTTCTTCGGAAAAATTAAAAATTGTTCAATTTTCCAACATTTTCAACAA +GCGTATTATCGAAAAAATACAATCGCACAAATTTCTCGTAATTTATTTTT +GATCTACCTTGTTGACTAGGCTCCGCCCCTAATCTTGTTGCTGTTGTTAT +TGTTGTTGTGGCTGTCTAGTTGAAGGAAGGGGCGGAGCCTATTCAACGAG +GTAGATCAAAAATAAATTATGAGAAATTTGTGCGATTGTATTTTTTTTCG +ATAAAATCCAATTTTTTAATGGAAAATTGCCAATTCCCTACCGTTCTCCA +CATTGAGCCAAATACGGACTGTGAAAGAAGCCCAACAAGATATTCGAGAG +CAACAGCGAGCATCGTTCAATCGATACTTCTCCAGACGCTACGAATGGTG +GAATTTCGTACTGCAAAAGCCATGCGGAGAGCACATTTACCACTTTATGA +GTGACATTTGAGCATGCCAATGGGAGTTTCATTGCTTCACGCATTAGTGT +TAATGCTACGTTTGTTGCCTGGAAAAGGACAAAAAAAACTATTTTTTTAA +ATGTAAGTAATTAATTTATTTTTTGAAATCTCTTTAAATGTGGTGTAGTC +GAATATTTTTTCAATTGCTTAATTACCCTCAAAATTGTCTGAAAACACCG +AATTTCATAATGAACTTCTTGAAAACTTCTCAGAAAAAAGTTATGACGCC +TCAAAAAATGGCCTAAAATTGGTTAAAATTTGAAATTTGACCTACTTGCC +AAGCGGCTGGAAACTAGCTTTTTTTTGAAATCACTGTCAAATTTTGAGTA +TTCAATTTAATTATCTTGCGTTTTCAACTCGATTCAGGTATTTTAAAATC +GATAAACGAAGAGATTTTTAAAAATTATTTACCAAATCTCTTCTTCCATC +GATTTAAAAATACATAAATTTAGTTGAAAACGCAAAATAATTAAATTGAA +TACCCAAAACTTGACTGTGATTTCAAAAAAAAGTTAGTTTCCAGCCGCTT +GACAAGTCGGTCAAATTTCAAATTTTAACTAATTTTAGGCCATTTTTTAA +GCCGTCATAACTTTTTTTTTTTGAGAAGTTTTTCAGGAAGTTTCATTATG +AAATTCGGGTGTTTTCAGATAATTTTTAGTATAATAAAGCAATAAAAAAA +ATTCGACTACACCAACTTCAAAAATGTAAGCAATTTTATTTTGAAGGCGG +TTTTCTTTTACTTTTCTAAAAAAAAAATTTATTCAATTTTACGATTTTTT +GCGTAAAAAAACACGGTCAAATTTTTGTTAAATTCGAAAAGGCGTGTCCA +TTACGGTTTGATCTACAAAGAATGCGTGAATTTTTAGCCCAAAAAGTGTG +ACGTCAGCACGAAAATTCTGCGTCTCTTCTCCCGCATTTTTTGTAGACCT +ACGTAGATCAAGCCGAAATGGGAGAGCCTGACACCACGCGCGTGTGCGCC +TTTAAAGTGAGTACTGTAATTTCAAAAATTCCACAACATCGAGAGTTTGA +AACTACAGTACTACTATTTAAAGGCGCACACACTTTTTCGAATTTAACAA +TAAATTGTCGTGTTGAGACCGTATTTAGGGCTCAAAAAAAAAATAATTAT +CTCGTTAGAATATTCGGGAAAGTTGCAGTTTCACTGAAAATTTGAATTTC +CCGCCAAAACGAATTTTCTCCGAAAAATTTGAATTTCCGCCAAAAAATTT +TTTTAAATCAGAAATTTGAATTTCCCGCCAAAATCGTATTTCTCAGAAAA +TTTGAATTTTCGCCAAAAATAAAAAAAAAAATTTTGGCGCGAAATTCAAA +TTTTCTGAAAAAAAAATATTTCGGCGGGAAATTCAAATTTTCTGAGAAAA +TATTTTTTGGCGCAATTTTTCATAGAAATTTAGTTCTTTTGATGTGTAAA +TTTCCAAAAATTTCAACAAAAAATCGCATTTTTCTTATTTTTTCCCCAAA +ATTTTCAAATTTTCTCCTCCAAAACCACGGAACTTTATGCGACGAAAAAA +GCGCATTTCGATACAAAATCAACGATGCATGTGTAGTTTGTAGTGTTGGT +TGTCCTCCAGCCGTTGTGTGAGTTGTTGCCGATGCAATATTATTAATCCA +TCGAATTAGCCAATATCTTGCAATTACCACCGGATCAGCTGTATCCAATG +GTTCACCATTCTCATCGGCTCCTTCCCAGCCACCGAATACATCGACTCCG +TTCGTTTCCAGATCGGTGAAGCATTCGCCCATGTATAGCTTTATTACTCT +GAAAATTGAAGAGTTTTGTACTCCTCTCGGACAATTGGAGGTGATTTTTT +TTTTCGAATTCTTTAAAACAAAAATTTCCCGAAATTGAGCTTTTAAAATT +TTAAAATTTCAAAATTTCAAAATCAAAAAAAAAAAAAAAAACTTTTGTAC +AAAATTTAAAGTGGAGAATTTTTGTATTTTAGACAAATTTTTTAAACATT +TCTAGCAGAGTTGAAAATTTCAGGCAAATCGGCAATTCGCCGAAATTGAA +AATTTCTCATAAATCGGAAATTGCCGAAAATGAAAAATTCCGGCAAGTCG +GCATATTGCCGGAATTGAAAATTCCTTACAAATCGGCAATTTGCCGGAAT +TGAAAATATCCGGCAAATCGGCAATTTGCCGGAGTTGAAAATTTCCGCCA +AATCGGGAATTTGCCGAAATTGAAAATTTCGTATAAATCGCCGATTTGCC +GGAGTTGAAAAATTCTGGCAAATTTGAAAATCGGCAATTTGCCAAAGTTG +AAAAATTCCCGGCAAATCGCCAATTTGCAGATTTTTCGACCGAAATTTGC +CTACCGGCAATTCCTGCCGACCCCTTTTTTCCGAGAAGAATTAACTTTCC +ATTTCTAAAAAATCTGTAATTTGCCGGAGTTGAAAATTTCCGGCAAATGG +GCATTATGCCGAAAATGAAACATTCCGGTAAATCGGCAAATTGCCGAAAA +TGAAAAATTCCGGCAAATCGGCAATTTGCCGAAGTTGATAATTTCCGCCA +AATCGGGAATTTGCCGAAATTGAAAATTTCGTATAAATCGCCGATTTGCC +GGAGTTGAAAAATTCTGGCAAATTTGCAAATCGGCAATTTGCCAAAGTTG +AAAAATTCCCGGCAAATCGCCAATTTTTTGCCTGTTGTGCATATTATTTT +CACGACTAAAAATCGTAATAAATTAAATTAAATTAAATTTGCCGATTTTT +CGACCGAAATTTGCCTACCGGCAATTCCTGCCGACTCCTTTTTGCCGAGA +AGAATTAAAATTTCCATTTCTAAAAAATCTGTAATTTGCCGGAGTTGAAA +ATTTCTGGCAAATTTGCAAATCGGTACTTTGCCGGAGTTGAAAAATTCTG +GAAACCGGCAACCCGGCAAACCGCCAATTTGCCGATTTTTCGACCGAAAT +TTTTTTTTCCTGTCGACCCCTTTTTCCGAGAACAGTTAAATTTCCAAAAT +TCTCTAAAAACCACAAACTTCTCAATAATAAAATTCGCACACTCCTGTCG +CTTCCCCTCATCATTCCACTCAATCCGCACAGTTTCTCGAGTACAGTACT +CCAAAAACTTGTCCAAATAAACTTGGAGCATTTGAGCCCGTTCTTTCGAA +GTCTGCGACGCCGACGCTGTCGACTTAATCGGCCCATTCCTCAGTCGTTC +ACACCAATCACCGGCTCCCGACGATTGACAGTACTCGTTGAGCACATCTT +CCGCATTTTCACCACTGGTAAGCGGGAAGAATGGTAGAAGACACTGAAAG +ACACGATCCAGGTCTGGTGTCGCTTTTCCGTACACTCCGAGACATTGATA +GAACACTAGGAACAAGCGAATCGCAATTTTTCGCACTGCCATCAAATTTT +TCGGGTAGACTGCTTTTTTGAGAAGCGATTCGATTGCATGACGTTGCCAG +CCTGAGAGAAATTGGGAAAATTTTTAATTTTTATTTAAAGGTGGGGTAGC +GCTAGTGGGGAAATTGCTTTAAAACATGCCTATGGTACCACAATGACCGA +ATATCATGATATAACAATTCAAAAAAATTTTCTAAATTTTATATGATTTT +TTGAAAATTGAAAAAATCTCAGTTTTTGTCTAATTCCAATTTGAATTACC +GCCAATTGAATTTGTTCTATGGAGCGCGCTTGCACGTTTTTAAATTTATC +TATTTTATTTTTTGTTATTTTTTGTTATTTTTCCACCAATTTTTAATGTT +TTCGGTGTATTTTTGCTCGAATTTTAGAGAAAAAGTCAAAATAAATGCAA +ATTTTCGATTAAAAAGCACGCTTACAGTCGTAAATCAGTGAAATTAATTA +ATTCAGGTTTGAAATCGTTTAAAATCGTTACTTTGTCATTTTTACGCCTG +TAAGCGTGCTTTTTAATTGAAAATTTGCATTTATCTTGACTTTTTCTCTA +AAATTCGAGCAAAAATACACCGAAAACATTAAAACTCGGTGGAAAAAACA +ACAAAAAATAAAATAAAATAAATTAAAAAACGTGCAAGCGCGCTCCATCG +AACAAATTCAGTTGGCGGTAATTCCAATAGAAATTAGGGGGGAAAACTGA +GATTTTTTCAATTTTCAAAAAATCATATAAAATCAGGAAAATTTTTTTGG +ATTTTTTATCATGATATTGGGTCATTGTGGTACCATAGGCGTGTTTTAAA +GCAATTTCCCCACTGGCGCTACTCCACCTTTAATTTCTGACGGTTTTTTT +TCGGTTTTCCTTGAAAAATCCTCTAAAAATCGATAATTTGTAAAAATTGC +GTTGTTTTTCCGGGATTTTTTTCGGTTTTCCCACGGGGTTCTGGCCTTCC +TCACTGAATTTTTCGCGCTCCATTGACAATCGCCCGTGTACTCCACACGG +ACAAATCACATTTAGTTTTACAACTAAAATCGAGCCGAGACGCGGCAGCC +AACGCGCCGTAAATCTACCCCAGATATGGCCTGGCCTAGTTCGGCGAAAA +CTCTTCCATTTCAATGTATGAGGGAAGCCAGAAATCCGTGTTTTCCTCAA +AAAAATCCTCTAAAAATCGATAGTTTGTAAAAATTGCGTTGTTTCTTCGG +TTTTTTTTGCAATTTGAGGGTTTTTTGTCGATTTAACACGGATTTTTGGC +TTCCCTCATATATTGAAATGGAAGAGTTTCTGCCGAACTAGGCCAGGCCA +TAACTGGGGTAGATTTACGGCGCGTTGCGTGTCGCGTCGCGGCTCGATTT +TAGTTGTAAAACTAAATGGACTTTGTCCGTGTGGAGTACACGGGCGATTG +TCAATGGAGCGCGAAAAATGCAATGAGGAAGGCCAGAAGCCCGTGCGGAA +AGACGGGGAATCTCCGAAAAACGGGGAAAATCTACAAAAAAATGAGTTTA +AAAAAGACTTCCTCAAAAAAATTCAAAAATTGTGGTTTTTTTTTTAATTT +TTTGTATTTTGATAAATTTTTTGCGACATCAAAAAATGGAAGAACTTTTT +TTTTTCGATTTTTGAATTTTTTGTTGGTGAAAAAGAAGAAAATTTCGAAA +ATTCGTTAGGGAATGGATAAATTTTAATCAAAAATCGATTTTTTAAAATT +ATTTTTTGCAGTTTTGCATAAAAAATCCAGATTTTTTCGCATTTCGCGCG +TAATTTTCATTTTTGTCGTTTTTTTTTTCTGAATTTTCCGAAATTTCTGG +AAATTTTTTCTTTTCTTGTTCTGAAGCTTATGCCTAAGCCTAAGCCTAAG +CCTGAGCCTAAACTCCAAAAACGCACCACTTCCAACAAGCTCCGGAGCCA +AACAGAGCACCTGCTCCAGTGTCCAAAGCCCGGATTCAGCCTCAATTGCA +CTATCGATTTGACTGCTGACATCGTTGACGAGCAGAAGCTCGTCAACGAG +ATGAAACGTCTCGAAACTGTGCTCATCGAGCAGTTGACGCTTTTCCTGAA +GAAATGTTGTGGTCGAATTAATGCGAAATATGGGAAAATTCGGGTTAAAA +TCGATGAAAAATCGTGGAAAAACGAACATTTGTGGTTAATGTGTCGAGCA +GAATCTTCAAATGTTTCACACGGCTCACACAATCCCTGTTCAAGTCGGTG +AATCTTGAAATGCTCGCCTGCACATCTGACGATTTGCCCTTTCGGGCAAA +CATTTTCCTGTGATTTTAGGCGATTTTCTGCAAATTTTCGCTGTTCAAAC +GTTTAAAATCGACGGAAAATGCGTATATTTTACTAAATATGCGAGAAATA +GTGATTTAATTCGAAAAAATCAATATAAAAATGGAAAAAAACTACAGTAG +TAATTTAAAGGCGCATACCTCGAAAGTTAAAATCGTGCCAGGACCCGCCG +CAATTTAGAATCGATTTTTAGATGAAAACTTTTTTTTTCTTACTAAAAAT +TAAAATTTTTCACTGAAAATTTCGGTTAAACTCTGACTTTTAACACGAAT +CTTATCAAAACTATGACAATTTCTAGTGAAAATCGCCAATTTTGTGTGTA +AAATCAATTTTTTCAGTGAAAAATGTTTTTTTTTGAGTTAAAACTAAATT +TCGAGCTTGAAACTAGAAAATGTCAAGTAAAAAATTCATTTTTAAGCGAA +AAATTAACGTTTTTTTCCAAATTTTCGCCTATAATTCACACAAAAAATAC +TGAGTCAGCAAACAATGTGGGAGCATCCCGAAAATGGTGCAGAATGGTAG +AGCAAAAACGAAAAATCGATGAATTATTGTGGAGAGAGAGGAAATTTTAT +TCAATTTTTGAGGAATGGAGGTTAAAAAAAAGAGTAGAAACATTGAAAAG +TGGCAAAGAAATCCAGCTTGAAACCGGAAAAACTCCCAGAAAACGAAGCA +AATAAGAAAATCCCACAAAAAATCCGAATTTAATTGCAGTTTTCGACCGA +AATTCAGCCAACCAGTGAGTGGTTCAATTATTAAAAAGCACATATATACA +TATAACTTTATTCAAAGGACATAATCCATATAAAGTCTGTCAAAACGGAA +AAGGTTCTTTCCAAACACACAATTGGCCCCCTCTGTCCAAAAGAGAGAGC +ATGGGAATCGGAGAGGGCGGTGAGAGAGACGCAGACATCGAGATGACACT +TTTTGACAGCAACACTGTGTGTGTGTGTGTGTGTGTGTGAGTCTCTGTGA +TTGAGTGAAAGCACTTTTGGGAATATATACTGGTAGAAATTTAATTTAAA +ATGATAAAAAATTTCTTGGGATTTTTTTTTTTGAGTACTGTAGCCACAAA +AGTACGTAATTTTCTTGAAAATGCGCCCATGGGGTCCCAATGACGTAATA +TCATGATAAAAAATTTTTGAAAATTGGAAAAATCTCAGTTTCCCCTCCCC +CCCCCCCCTAATTCCAATTTGAATTTCCGCCAATTGAATTCGTTCGGCGG +AGCGCGCTTGCATTATTTTTATTAATTTATTTAATTTTCTCTGTTGTTAT +TTCACTGATTTTCTTCATTTTTTGGGGATTTTTAATTGGGAAAAGAGAGA +AAAATGCAAGATAAATGCAAATTGTTCATTAAAAAATCACTGAAAATGGG +TAAAACTGTGAAATATGCTAATTTCAGGCTTGGTGTCGTCGGAACTCATA +ATTTCGCAGTTTTACCCATTGTCTATGATTTTTTAATGAACATTCTGCAT +TTATCTTTTTTTTTTAAATTCAATTTCTATTAAAAATCCCCAAAAAATGA +AGAAAATCAGTGAAATAATTAGAAAAAATAAAATAAATTTATAAAAATAA +TGCAAGTGCGCTCCACCGAACGAATCCAATTGGCGGGAGTTCAAATAGGA +ATTAGAGGGAAAACTGAGATTTTTTTCAATTTTCAAAACAAAAAATCATA +AAAAATAAGCATTTTAGCTTAAAAACTCGAAAAATCATACAAAAATTAAT +TTCAAGTTGTCCACGAGTAGTACGCGACGCCTGGTTGCAAAAATGGCGTA +GATTTCGATTATTAAAAGCTTAAAAATCATTTTTTTTTACCAATTTCCAA +TTAAATATCCTAATTTTAATCTCAATTTTCTTGAAAGACGTGCAAATATA +GATACATCTAACATAAAAATTCTTCGCTGCGAGACCCAGGACCCCATAAA +TCGCCTGCGCCTTTAAATTTATATTTATTTTCTCTCTCAAACAGCGATGA +AAATCACGTTTTTGAACCAAATTTTCCAAAAAATAAGACAAATTTCTATT +TTAAAAACCACTTTATTCGGTCATTGGTCGCTAAATAGCAAATTTTGAGG +CAAAAAAAAAACGAACAGACAACGGAAAATCGAGTGAAAACAAGTGAGAA +CACAGCAAAACTAGTCCTAATTAATTAAAGGCGCATCACGTGGAAAAGAA +AACTGTTGTGGAAGAATAAAAAATGGAACATGAGGGGAAATATTACAAAA +ATAGTAAAAGATGCATCAAGATTAAGATTATTTTTCGAAAATCGAGTCCT +CCGCCCAGGAATTCATCATAATTTTCGACTTTTTTTCGGAGATTGCTGCA +CATTGTCGGCTGCTCCGGTGGTTTTTGGAAGGTTTTGAGAGCTGAAAGAT +TTGAATTTTTTGGAATTTCGAGAAATTTCTTTTTTTTTACCCGAAGCGCT +TTGAATCTGGCGAGCTGCATTTCCAGCTTGCATTGTGCGTTCAGTGACTT +TGATTCCTTGAAGTTTTCTGAAAAAAAAACACGAGTTTTGACTTGAATTT +TCTGAATTTTTAACACGGATTTCTGGTTTCCCTCAAAGAATTGAAATGGA +AGAGTTTTTGCCGAACTAGGCCAGGCCATATCTGGGGTAGATTTACGGCG +CGTTGCGTGTCGCGTCGCGGCTCGATTTTAGTTGTAAAACTAAATGTGAT +TTGTCCGTGTGGAGTACACGACTTTCCCACGCGTTGTCCGGCAGGCGATT +GTCAATGGAGCGCGAAAAATTCAATGAGGAAGGCCAGAACTCCGTGATTT +TTCTTTTACTCGGCGTAGATTTCGCAGTTTCAGACTGTTTTTCAGGTCTA +ATCAGCGGAATTTTCGTTGATCCCGCCAATCTCGGTGATTTTCTGAGCCT +TTTTCGCTCAGGTTCTGGGATTTCAGCTTTTAGAGCAATCTTTGAAATTT +CGACAAGTTTCTTGAAGTCCAACGCAATTTTTTCAGTAGAATTCGCAGTT +TCCGGCAATTTTTCTGAGCAAATCTTCGAAATTTCGACAAGTTTTCTGAT +TTTCGGCGTAGATTTCGCAAATTCTGACGGTTTTTCTGCAAATCCCGGTG +ATTTTCGGCTCGGGGAGCTCCCAGCTGCTCTATTGTTCAGCATTGTCTGC +TCAGTAGCGGGGGCGTCTTGATTTTCAGTAGAATTCGCAGTTTTCGGCAA +TTTTTCTGAGCAAATCTTCGAAATTTTGACAAGTTTTCTGATTTTCGGCG +TAGATTTCGCAATTTCTGACGGTTTTTCTGCAAATCTCGGTGATTTTGGG +CTCGGGGAGCTTCCAGCTGCTCTATTGTTCAGAATTGTCTGCTCAGTAGC +GGGGGCGTCTTGATTTTCAGTAGAATTCGCAGTTTCCGGCAATTTTTCTG +AGCTAATCTTTGGAATTTCGACAGGTTTTCTGACTTTTGGCGTAGATTTT +GCAGTTTCTGGTGATTTTCGGCACGATGAGCTCCCAGCTTCTTCATTATT +CAGCCTTTTCAGCTGGGCTTCTTGAGAGCCTGTAATCACGTCGCCGTCGT +CATCCTCTGAATCATCATCCGACCAAATTTCGATTATTTCCTGAAAAAAT +CGATTTTTTTTGCTTTTTTTTGCTCTTTTCTCATTGTCAGCTTTTCCCAC +TTTCCAGCAATTTCTCCACGGCGTCGGAACGTATCCTGGTCGCCGATCGA +ACTGGATAACTGGCCGGCTGGCGATAAATCGGCGAAGATCTGCTACTATC +TCGTCTGAAAACGTTTTTCTGCTAAAACCTGCTAAAAATCTGGCTAAAAT +CACCTGATAACATTACAATATCATCATTTCCGGGCATTTTTGACGAAAAA +AAAGCGGAAAATATTTAGAATTTCGAAAATGAAGAGAAGCGTTACGGTAC +TAAAGGCACATGGCGTAAAATATTCCGCAGGACTCGCCGCGATTTATGAT +TTTCACTATTTTTTTTGGGCAAAAGTTGACATTTTTCAGAATAAAATTCA +AAATTGTGTTGATAAATTGTTTATTTGATTTTTTTTTCAAATTTAAATAA +ACTGTAAATTTTAAATTTTCCCCCAAATATTCGCAAATTCCCTTCAAACC +AAGTTTTAATAGCACAGTAATCCCCGAGCAATCGCGCTCCACCGGACTAA +CCTACACACCGCGAATTTTGAATTACATCCCTCATAATTAATTGTTTTTG +CCATTTTTTCGCTATTTCCAGTGGATTTTAATGAGTAAAAGCCTACAAAC +CGACGAAAATGGCGGTGTCAGTGAAAGTCCATCAAATTGCACATATTGCT +ACACGCTGGAATGCTCTCTACGCATCGAATCCACCTCATCAATCAAGAAA +AAGACTCCAATCTCCTCGAAAAGCGCTATAATGACTGTCGGCAGAAATGC +GCAGAGGTTTTAGTCGAATAAATGCCTGTTTTAAGAGCAAGAAATTCCAG +AAAAATTCACCTTCAAATCGAGCTAAAGACCACCGCTACCGGACAGCCCG +CTGTAGTGTGCTATGACGTCACAGATGCAGTTGTACACTTGCAAAGCGTT +GCAAATGGGAAGTGTACTGTAGAAATTCCTTCGCTGTAAGACCTAAAAGA +CCAGAAAAATGGAAAATATCTGAAAACCCCAATTTCAGCTCGTTAATGTT +CCAAATGTTCAATTGCGCGCCGCGAAAGCTCAACGTCTTCATGAAATCTC +TCCAAGCAAAGTTGGATATTATGAAAATGGAGAAAAGCCCAATTTCAGCA +GTGTAAAATCGATAAATTATCGAAAAATCAATAAAAATCCCTTCAGACCC +CGGCAATTCTCACGTCCGCCGGCAGTTTTCAGCGTTCTGAGCCCGCTGAC +GATCAGCGAAATGCGAAAAGTGAAGAAGCTACGCGAACCGTCGGCGCTGG +CGAGACCTTCGAAAGAGGCGACCACACCGAAGCGGCGGTAATTTGAAATT +TTCACATTAAAAAAAAATCGCGAATTTCAGCACTTCCTCAATGAATTTGC +TCGCCGGCGGCTTGGAAAATCGAATTATGAATCGATCGATTGGGCTGAAA +AGGACGACCAGTTTTGCTAGAGATGATCGTGAAAAAGCCGAGGTTCTGCA +GCTAAAAATCCGTCAAAAATCGATAAAATTTCGTTTTTTTTCCGTGAAAT +TCCAGGTTTTTTAGTCCAAAAGCACGGATTTCTGGCTTCCCTCATAAATT +GTAATGGAAGAGTTTTTGCCGAACTAGGCCAGGCCATATCTGGGGTAGAT +TTACCGCGCGTTGCGGCTCGATTTTAGTTGTAAAACTAAATGTATTTGTC +CGTGTGAAGTACACCACTTTCCCATTGCCCGGCGGGTGATTGTCAATGGA +GCGCGAAAAATGCAATTAGGAAGCCCAGAAACCCGTGAAAAAGTGTCACG +CGGTTTTCAAAGTAGAGGTGCAAGCGCGCTCCACCGCTCCACGGTGCTTG +GCGGCAAAACCAGAATTTTCGCTGATTTCAAGCATTTTCCGTCGTTTTTC +ATGATTTTTCATGTCGAAATAGTGTTTAAAAATGTCTTTAATGCTGAAAA +GTGAATATTTCAAAAATTTTGACATGAAAAACTGAAAAAATTATCGGAAA +AGTGTATTTTTTGAGTTTTTACTTATTTTTCGGTTTTTAAAAAAATTATT +TCTACATAAAAATTGATGAAAACAGCATATGAAGTGCAAAAAATAAAGAA +GACTTACATAAATTTTGACAAAAAATCACGAAAAACAATAAAAAAAGTCG +AAAAATGATTGGAAACGGAGAAATTTTCCCTTTTTATGTAGAAATTTTGA +ATTTTTTCAAAATTTTCTCAATTTTGAAACACAAAACATGAAAAATGATT +GAAAATATTTGAATTTTTAGTTTTCGCGCCAATACCTAACGAGACCCATC +GGTGTGACCATGGAGCGCGCTTGCATCTCGATTTTAAAATTCGTATGAAT +TTTTCGCGCTCCATTGACAATCGCCTGCCGGACAACGCGTGGGAAAGTGT +CGTGTACTCCACCCGGACAAATCACATTTAGTTTTACAACTAAAATCGAG +CCGCGACGCGACACGCAACGCGCCGTAAGTCTACTGAGCCAAAATGGCCT +AGTTCGGCAAAAACTCTTCCATTTCAATTCATGAGGGAAGCCAGAAATCC +GTGTTTTAACCTAGAAAATCAAGAATTTTTTTGGGAAAAAATGCCAAAAA +TCCCAGATTTTCCCTAATTTTCAGCTAATTTTTCCCCGAAACAGACAATT +TTCAGACCCTTGTCTCCCTGAAATCATTCAAAGATGCTCCTGCAATTTCC +GAACGGATTCAGCTGTCAGATGAGCAAAAATCGGTTGTCCGATGTGTGAT +AAATGTAATTTTTCGTTCGAAAAAAAAAATTATCGAAAATTTGCAGTCTC +GAACGAGTGTCTTCTTCACGGGATCCGCTGGAACCGGAAAATCTGTGATT +CTTCGGAGAATCATTGAAATGTTGCCCGCCGGGAACACCTATATCACCGC +AGCGACAGGTAGAGGATCACTTTGAAATATTTGCAAAAAATCGTGTCAAG +ACCTCCCCCAAAAAATTTTCATGTACCTTTAAAAAATAGAACCGACTTTT +TTTAATTAAAAAATCATTAAAAATTAGAGGAAAATTTGGGAAAAAAAATC +ATTAAAAATTCTTTAAATAATAATTGTTTATTTGTCACAAGAAATAGCTA +CACCGGAAGATAAAGAAAAAAAGTAAATAAAAAAGATGTGTGGACAAATA +AGTAAAATGACCAATTGGACCAGCTATAGCCCGGGAGGTTAAGTTGGGGG +GAAGAGGGTGTCAAATGGGATAGAGTCTAGGCGCGACGCAAATGCATTTG +GAGTTAACAGTTTAGGGAAACGTTTTGCTATTCGATTCCACAACGACAAG +TTAACATGTAGAAAACATTTAGACGGAACACCACAGACAACAAGTAGATA +GGGGTGACGAGTAGAGCGTGAAGCTCGAACGAACGATGATAAGGACGGGA +AGTGATACTCGCTTGAAATAATTTTATGGAAGGTTCGGAGGATTTGAAGA +ACCCGTCTATGGTGGGTAGACAATAAATTAAATTGGGAAAGCCTACTACT +GTATGACGAGTAAGATAAATTGCACCTTTGAAAGACACACTTTGAGAAAA +ACCGGAGGGGAGATTCTAGTTTTTTGGCAAGTTCGGTGGAGTTGGGCGGG +AAGAGCTCGCAGCCATATTCGAGTACGGGGCGGATGTAAACATTGAACAG +TTTAAAATAGAATTCGGGACTTTTAGAGCGGAATGAACGAAGGATTTGGC +GACACTTAAGGAGGGCACTATTAGAAGTCTGATTAATATGATTAACAAAT +GATAATTTGGTATCGACAATGATTCCAAGATCTCTGATAGAATCACGCGG +TTTAATTTCAACACTATTTACAAAGTATTTATGACGGGGGTTCTTTTTTC +CAAAATGTAATACGGCAGTTTTGTGCTCAGCAAGATTTAGACGCCATTTT +TTACACCAATCAGCGACAATATTGATGCTTGTTTGGATAGAGGTGGGGTC +CGATCCGAGTAATTTTAGATCGTCGGCAAAGGCTGTAACATGGACATCAG +GGGGGAACAAATCTAATAAGCCATTAATATACAAAAGAAAGAGGAATGGT +AAAATTGTGGAAAAAACCGTAAAAAATCAACAAATTTCGAAAAAAAATCG +TAAAAAATTGAGACATTTGAAAAAAAAAATCTAGCAAAATTTGAAAAAAA +AATCGTTTAAAATTCAGCAAATTTATTTTAAAAAATCATTAAAAATTCAG +CAAATTTGGAAAAAAAAACAAATAAATCTGTAAAATTAAAAAAAAAACCA +GTAGAATTTGAAAAAAAATTGTTTAAAAAATTCAGCAAATTTCTAAAAAA +AAATCATAATTAATTGATAAAAATTTTTTAAAAACGTAAAAAAATATTTA +AAATTCTGCAAAATTTGAAAAAAATCGTTTAAAATTCAGCCAAATTTCGA +AAAAAAAAGCATTAAAAAAGAGAAAAATTGGAAAAATTCTGCACAATTTT +TTAAAAATTATTATTCTGAAAAAATTTAAAAATTCATGGAAAAATCTGCA +AAATTATTTTTTAAAAATCGTTAAAAATTCAGCAAAAATTGGAAAAAAAT +CGTTTAAAAATCAGCAAATTTCGAAAAAATAAAACATTAAAAATTCAACA +AAATTAAAATTAGAAAATCATTAAAAAATACAGCCAAACCGTAAAAAATC +AGCAAATTTCGAAAAAAATTCGTTAAAACTTCAGCAAACTTCTAAAAAAA +ATCATTTAAAAATGCAGGAAATACGAAAAAGAAACATCAAAAATTGAGAC +AATTGGAAAAAAAATTCGTTAAAAATTAACCCAAATAAATTTGCTGAATT +TTTTAGATTTTTTTTTTTATAAATTTGCTGAATTTTTGAAAAAAATTCAG +CAAATTTATTTTAAAAAAATCTAAAAAATTCAGCAAATTTCTTTTTTAAA +AATCGTTAAAAATTCGGGAAAACTTGGAAAAATTGTGCAAAATTCAGAAC +AAAAAAATTTCAAAATTTTCCCAATTTTTAAGTTTTTATTTAGTTTTCAG +AATTGTTTAATAGTGAAAAAATCATAAAAAATTCAACAAAATTTAAAATT +CAAAATTTTCCAGAATTTTCATGGGTCCCGCCACGATCCACTCCAACCTT +CAAAAAATCATGTAATTTCCAGGCGTTGCGGCTTCCCAAATCGGCGGAAT +CACACTTCACGCGTTTTGCGGTTTTCGCTACGAAAATTCGACGCCTGAAC +AGTGCCTAAAACAGGTTTTACGCCAAAATCACATGGTCCGACAGTGGAAA +CAATGCTCACACTTGATAATTGACGAGATTTCCATGATTGATCGCGACTT +TTTTGAAGCTCTCGAATATGTGAGCTCATTGCGTTTTGATCTACAAAAAA +TGCGGGAGTTTTCCCAGCATTGCTCATTTGATCTACAAAAAATGCGGGAA +TTTTCCCAGCATTGCTCATTTGATCTACAAAAAATGCGGGAATTTTCCCA +GCATTGCTCATTTGATCTACAAAAAATGCGGGAATTCTCCCAGCATTGCT +CATTTGATCTACAAAAAATGCGGGAATTTTTCAAGTGTGACGTCAGCACA +CTCATGCGAAATCTGCTGAAAAGTCTGCGTCTCTTCTCCCGCATTTTTCG +GAGATCAAACCAAAATGGGACCCGAAAATTCCAAAAAAAAATCGATAATT +TCCCGTCCCAGGTCGCCCGTACCGTCCGTAATAACGATAAGCCGTTCGGT +GGAATTCAGCTCATTATCACTGGAGATTTCTTTCAATTACCGCCCGTCTC +GAAGGATGAACCAGTTTTTTGTTTTGAGGTGTGCGGAATTTCGGAGCATC +GTTTGGACCGTCTAACTCTTAAAAAAATTTTTTTGCAGAGCGAAGCCTGG +AGCCGATGTATCCAAAAAACGATTGTCCTGAAAAATGTGAAACGACAAAA +TGACAATGTTTTTGTGAAAATTTTGAATAACGTTAGAGTTGGAAAGTACG +TAAACTACTGGAAAAATTTAAAAATTTCGAATTTTTCAGGGTTTTTTACT +AGAAATTTTGATTTTTTTCAGTGAAATTCCAATAAAAGCTGGAAATTTTC +AGAAAAAAGGAAAAAATATGAAAATTTGTAATTAATTTCGAAAAATGCTG +AAAATCTCCAAAACCTTCCAGAAAAGCTGGAAATTTCATAAAAATTTGGG +AAAATCTGAAATTTTCAGAAAATTCAAAAAAAGCAAAAAATCTCGAAAAC +CTTCCAGAGAAGCTGGAAAATTTCAGAAATTTCCAAAAAAATAAATTCTG +AAAATTTCATAAAAATCACTGATTTCTGTCTTCCCTCATAAGTTGAAATG +GAAGAGTTTTTGCCGAACTAGGCCATTTTGGTTCGGCCAGATCTTGGGTA +GATTTACGGCGCGTTGCGTGTCGCGTCGCGGCTCGATTTTAGTTGTAAAA +TTAAATGTATTAGTCCGTGTGGAGTACACGACACTTTCCCACGTGTTGTC +CGGCAGGTGATTGTCAATGGAGCGCGAAAAATTCAACGAGGAAGGCCAGA +ACCCCGTGATAGACCGGCCAACACGGGGTTCTGGCCTTCCTCATTTCATT +TTTCGCGCTCCATTGACAATCACCTGCCGGACAACACGTGGGAAAGTGTC +GTGTACTCCACACGGACAAATACATTTAGTTTTACAACTACTAATTGAGC +CGCGACGCGACACGCAACGCGCCGTAAATCTACCCCAGAAATGGCCGAGC +CAAAATGGCCTAGTTCGGCAAAAACTCTTCCATTTCAATTTATGAGGGAA +GCCAGAAATCCGTGTGAAATTTCCAAAAAGAAGCTCCAAAAAAAAATTCC +AAAAAAATCTCGATAGCCTTCCAGAAACGCTGGAAATTTCAGAAAATCTC +CTGAAATTGATAAAAATTCGAAAAAAATCTCAAAATTGTTAGAAAATCAG +TGAAAATTTGGAAAATTCAGAAATTAAAAAAAAAATTTTTTTTTGGAATT +TAGAATTTAGAATTCTTCAAATTGTCGAAAAATGCTCCAGAAAACTTGTA +AAAATTTAATTTTTTTTTTTTTGAAATTTCCAAAAAATTTATTTATTTTT +CAGATGCGACTTCAAATCAGCGGATATTCTAAAGGAATCCTCGAAAAATC +AATTCCCATCCAGCGTAATTCCAACCAAACTGTGCACACATTCAGATGAC +GCCGATCGAATTAACAGCTCAAGCATCGAGACAACACAAGGCGACGCGAA +AACCTTCCACGCCTACGATGACGAGAGTTTTGACACGCACGCCAAGGCCC +GAACGTTGGCACAGAAGAAGCTTGTGCTGAAAGTTGGAGCTCAGGTGATG +CTCATCAAGAATATCGATGTGATCAAGGGACTTTGTAATGGGTCACGTGG +ATTTGTGGAGAAATTCTCTGAAAACGGGAATCCTATGATTCGATTTGTAT +CGCAAGCCGATGCTTCCATTGAGGCACGTTTTACAGAAATGACCATTTTA +TGGGCGTGGCTTTTTTTTTGGAAAATTTTTTTTTTCCAAAATTTCTGGAT +TTTTCTCTAAAATGGAAAAAACCCACGAAAATTGTAAAAAATTTGAAATT +TTTTAAATCGGAAAAAAATAAAATTTTCTTTCTTAAAATTAGGCTTAGGC +TTAAGCTTAGGCCTGGGCTTAGTCTTAGGTTTATGCTTAAATTTGAAAAA +AAAAATTTCTAATTTTTTCCAGATTTTTCCGTTTTTTTTTTCAGAGAAAA +TCTAAAATTTTCGATTTCTACCTGTTTTTCAAAAAAAGTTAAAAAAAACA +TTTTCGCAAATTTTGTAGAAATTTTTCTTTTTTTTTTCTTAAAATTAGGC +TTAGGCTTAGGCTTATGCTTAGGCTTAGGCTTAGGCTTATTCGTAGGACT +ATGCTTTGGCTTAGACTTAGACTTAAGCCTAAGCCTAAGCCTAACCCTCT +TAAAAAGTTACAAGAAGGTTTTTCCTTGCGCTTGGAGCGCAAAAGAAAAG +AAAAAGAGCTATTCAGACTTAGGGTGCCCAACTGGAATAAAACATTGGAA +ATCCTTATGACACACTTAAGCCTAAAGGCCCGAAAAACATACTAGGATGC +CCAACTGGAATAAAATATTGGAAATCCTTATGACACACCGGCGGTATGGC +GCGGCTTAAGCCTAAATAGCCACTTTTATCAAAATACATTTGAGCTCGTC +TTGCGTTTTACTTTGACTTCTCAGGCAACTCAAAAGTAATCTGTGGATAT +TTTTCAGTAATCTAAATGAAGACTATAGATTACTAAGAAACTTGGAGATT +TCATAATATTTGGGGGGATGCGAGCATCCATTGGAGATTTGCCATTTGAT +AGAACTTTTAGCGGCAAAAGTCCAAAACAAAGCTCACAGTGGGCTCTCAA +AGATCATAAAATAGCACTGTAACGAAGAACTTTAACGATCTAACGAAGCA +ATTTTACAAATTCACTTTGGTAGCTCATATCTCCGTGGATAAAATTTTTA +CAGAAAAGTCATCAACTGATAAGTTGTTGATATTGTTGTAAAGAACAAGT +TTGTAGTTGAAAGTTTTTTTTACCAAAAAATTTTTGTTTGAGAGAAAAGC +ATTAGAAACGGAATAGCATCATAAAAATAACAACAGCAGTTGCCGCACTT +CACGCGGTTCTATCTCAAACAAAAGCGGAGATATGAGCTACCAAAGTGAA +ATTGGAAAATTGGCCCTCCAATGCTTCGTTAAAGTGCTATTTTCAGGATC +TTTGAGAGCCCGCCGTGAGCTTGGTTCTGGAGTTATATTGATCAAATTGA +CCCTCCAGTAAAGGAGGACCTTTGATGAATATAATCACTCTGATGGTATT +TAATTCCGATGAGTAATCCATTTTTCTTTTTCTCACATTTGTGAACCAAA +AATAAGTTTTAAATTAAGGCGGGATATTCTAAGGTGTGATAACATATGTT +ATTTATTTTTAAATTTAAATAAAGTTTTTTTTTAATTTTTGCTAAAAAAC +GAATAGTTTACAACCGCCTCGCTCAAATGTATTTTGATAAAAGTGGCTAT +TTAGGCTTAAGCCGCGCCATACCGCCGGTGTGTCATAAGGATTTCCAATA +TTTTATTCCAGTTGGGCATCCTAGTATGTTTTTCGGGCCTTTAGGCTTAA +GTGTGTCATAAGGATTTCCAATATTTTATTCCAGTTGGGCATCCTAGTTT +TTTTTTTTCGGGCCCTTAGGCTTAAGTGTGTCATAAGGATTTCCAATGTT +TTATTCCAGTTGGGCACCCTAAGTCTAAATAGCTCTTTTTCTTTTCTTTT +GCGCTCCAAGCGCAAGGAAAAACCTTCATGTAACTTTTTAAGAGGGTTTC +ATATATTTTATTAAAATCGGGGCGAAGCCCTGATTTTAAATCCATATTGT +TTTTGTTTTTGTCTTCCACTATCCCTGCAAATAGGAAAGAGAATGTGTTC +TTTCTGATGAAGTAAAAATCATCATAAAATCTTGAAAACTGAGAGCAGGA +GGTAATATTTGAATATATTGGGTTGTAAATGTGTGTCTCCCTGTGGGTGG +GGTGGCGATGTGTTGGCAGCCAATCCTTCAACGAACTGTATCTCCCGCCT +GTATCTCCCTTCAAAGTGAGAATTGGGTTACAAAAATTTGAGGGAATATG +AAAAAAGGTGTGAGGATTTCAAAAATATTATTGTTGAAACACCAGACCAA +ACCACTTTTTCTGGGCAAGAGACAGAAAATTAATTTTTTGAAAAATTTCA +AACTGGCACAAAATTTTTTCAAAAACAAATTTTCACAAATTGTTAAAAGA +TGCCATTTTTAATCAATATTGTTCATTGAACACAGAAAAGAAAACGAAGA +TTCATCAAAAAATGAGTGAAAAATCGCAAAAATTCGAAAAAATCCGTGCT +GAAAAACTCGAGTTTTTGGCGGTGCTGAAAAAAATTTTCACTAAAATTTT +TTTGAAACTTAGTTTTTCGGATTTAGCGTCAAATTTTGAATCTATATAAA +AAAAAAAATTAAAATTGATCTCAGATTGAGTGAATAATAAACGCTCAAAG +TTGAAAAATGAACAACGCAAAAACGGCAGTAACTTGCTTCAAGGTCGGTT +GTCTCAGTGAGTTTTCACTCAATTTTCGAAATTTTTTTGCTCTATCGCTT +TAGAAATATTTGTAATTTCATTTTTTTTCCTCAAAATCAAAATATCTCAA +ACGACCGCCATCCTACGAGAAGGGAAAAAAAAAGTTTTTGGAAAAAAAAT +CAAAAATTTTTTTTCTGCCTCGATTTTCAAAATGAAAAAATCACTTTTTC +GGAATAAACTTTTTCACAAATGTATTTTGATAAAAGTGGCTATTTAGGCT +TAAGCCGCGCCATACCGCCGGTGTGTCATAAGGATTTCCAATATTTTATT +CCAGTTGGGCATCCTAGTATGTTTTTCGGGCCTTTAGGCTTAAGTGTGTC +ATAAGGATTTCCAATATTTTATTCCAGTTGGGCATCCTAGTTTTTTTTTC +GGGCCCTTAGGCTTAAGTGTGTCATAAGGATTTCCAATGTTTTATTCCAG +TTGGGCACCCTAAGTCTGAATAGCTCTTTTTCTTTTCTTTTGCGCTCCAA +GCGCAAGGAAAAACCTTCTTGTAACTTTTTAAGAGGGTTAGGCTTATGCT +TAAGCTTAGGGTTAGGGTTAGGGTTAGGCGTAGGCGTAGGCTTAGGTTTA +GGCTTCGGCTTCAGGAATATTTAAAAAAAAAATCGAAAAATAGATTTTGC +GAAAAAAAATCGTGATAAAGCCTGCTAAAATTTTCAAAAAAATTAAATTT +TTCAAATTTTTCAAATTCCCAAGATTTTGGTCTCGCAGCGAAAACATCCA +ACTACAGTAACCCTCGCCCTCTCCAATTTTCAGATCCGCCGCTCCAAGTT +CTCAGTCCGCATTCCCGGGAGTGATGCTCCGTTGATTCGCCGTCAACTCC +CACTCCAACTCGCCTGGGCCATCTCCATTCACAAATCGCAGGGAATGACG +CTCGACTGTGCGGAAATTTCGTTGGAGCGCGTTTTCGCCGACGGACAGGC +GTATGTGGCGTTGTCGCGCGCACGATCACTGGCGGCAATTCGAATTATCG +GCTTCGACGCGTCGTGTGTCCGAGCGAATTCAAAGGTGAACAGGGGATTT +TCGAGATGGAAATCTATGAAAAATGGATGGATGGTGTTAGATATCTCCTT +TTCGTGGGTTCTCTCACGTGGTGTCAGAGTGCCTCATTTCGGCTTGATCT +ACTGTGTAGATCTACAAAAAATGCGGGAGATGAGGAGACTCAGACTTCTC +AACTGGTTTCGCATGGTTTAGAACGTGCTGACGTCACTTTTCTCTGGGCA +TAAAATTCCCGCATTTTTTGTAGATCAACCGGTAAGGGATCTCTCAGCTA +CGTGGAAAATTATCGATTTTTTATCGAATTTTTTGCAGGTTATCGATTTT +TACAAATCCATCGAGGCAGAATGCGACGACGAACAGGACTGGGAAGCCCC +TGCTGCTGGACCAAGGCTCAAGCGAGTTCGATCGATTTAATTTTTTTTTT +TAAATTTTTTTGTGATCTCCAAATGTTCTGTCTTTCTGTCTTCCCCCCCC +CCCCGCCAAAATTCATTAACTGTTTTATTTCACAACACTCTGTATCATCA +TAATTTTCACATAAAATTGGCTATTTCAATGTTGACGTCGAATTTTTAGT +CCTATCAGCGTCATCGGATCTGAAAAAATGGAAAACTTGAACAAAAAAGC +AGGGGGTGGCCTAGAAAGCACAAAAAAAACTCGGACACAGTTCAACGGAG +GCAGTTTTTGGAAAAAAAAAAACGTCATTTTCGTATAGCCACACTGAAAT +TGAAAGAATTCTGGCCGAGTTTTCCAAATTCTAGGCCACCATGCAAACCG +ACATTTTGGGCGGAAAAGTGCCGGTGGCCGTGGAAGAAGGAAAAACTCGG +CCACTCTTCTATGTACTCCTCTCGGAGAATTGTGTTTTCCATGGCCGAGT +TTTTGTGTTTTCTAGGTCATGTCGGCATTTTTCTTAGTCAGTTTCAAAGG +AAATACGATACCTGGTCTCGACGCGAAAATTTTTTAATTAAATCCAAAAA +GACGTGCGCCTTTAAAAAGTACTGTAATTCCAAACTTTCGTTGTTGGGAA +ATTTTTATCGATTTTTCGTAGTTTTCAAAAAAAATAATCAAATGTACACA +GGGTTCTGTTCTTCCTTATTGATTTTTTCGCGCTCCATTGACAATCGCCC +GCAGGATAACGCGTGGGAAAGTCGTGTACTCCACACGGACATCGAGACGC +GACGCGACACGCAACGCGCCGTGAATCTACCCCAAATATGGCCGAGCCAA +AATGGCCTAGTTCGGCAAACTCTTCCATTTCGATTTATGAGGGAAACCAG +AAATCCGTGAGTAGTGAGAAAAACAACGAAAAAATCGATAAAATTTCCCA +ACAACGAAAGTTTGAGATTACAGTAGTCTTTAAAGGCGCACACTTTTTCG +CATTTAACATAAATTTGTCGTGGAGAGAGTATTTTTGGCGCAAAACTTCG +CATTCGGGTAATGATAATAGAAGAATTGAAACTCACAAATTCTCCGCCGT +CTCCTCAAGCAGCAGCGGCACAATAAACATGAAATACACGGCCAACACGA +AATGAATCGGCACCATCGACACAATGAACAACGACATTCGGAGCATGTGC +TCCAAAACTGTCGGCCGCAACTTTTTCGACGGATTCTCAAAGACGAGATA +CACTTGCATCGGGCTGTGCACATCTTGAACCGACTTCTTCTTCATTACAG +CGCTCCCTTCTCTGGGCAGCAGGTAGCGTGTCCACGTGGTGTAGGGTGAC +TCAGTGGTTGTGTGATTTATCGGCGTGAGCTGGAGCATTTTTGCAGAGTC +GCAGGCCAAGGTAGCGGAGGTGTTGGCTCTGAAAGTAGATTTCATACAGG +ATGGTAATTTCGGGGAGTACTGTAGTGGTGTTATAGTGGTACTGTAGGAG +TACACTGTAGGGGAACTGTAGGGGTGCTGTAGCAGTACTGAGGGGGGTAC +TGTAGGAGTACTGTAAGGGTGCTGTGGGGGTACTGTAGGAGTAATGTGGG +GGTACTGTAGGAGTACTGTAGGAGCACTGTAGGGGAACTGTAAGGATACT +GTAGGAGTAATGTAGATGTACTGTAAGGGTACTGTAGGGGTGCGCTAGTG +ATACTGTAGGGATCATGTAGGAGTACTGTAGAAGTCCTGTAAGGGTACTG +TAAGGGTACTGTAGAGGTGCGCTAGTTGTACTGTAAGGGTACTGTAGGGG +TACTGTAGGTGTACTGTAGGAGTAAAGTAGATTAACTGTAAGGGTACTAT +AGGGGTACGCTAGTGGTACTGTAGGAGCACTGTAGGCGTACTGTAAGGGT +ACTGTAGATGTACTGAAGTAGTTCTGTAAAAGGTACTGTAGGGGTACTGT +AAGAGCACTCTAGGGGCATTGTAGGGGTACTGTAGGTGTACTGTAAGGCT +ACTGTAGATGTACTGTAAGGGTACTGTAGTTGTTCTGTAAAGGGTACTGT +TGGGGTACTGTAGGTTTACTGTAAGGGTACTGCAGGGGTGCTGTAGGGAT +ACTGTAGGAGTACTGTAGTGGTTCTATAGGAAGACAAACTTTTTTTTGAA +TTTACTCATCGGAGAATCTAATATTTTTCACCAAATCGATACACCATAAA +AATTTTAGGACCCAAAGCTAAAAAAAAAAACCAACTTCTCCAATATCATC +TCCATCAATTCCAGTTGTTCTGTGCAGTTCGCCATCCCTTCCTTCTTCAA +TTCCTTCGCCCGATTCTCCAAATCGTCCACCATCTTCACACCCCGCCGAC +ACGCCGTCTCCACATACCGTTCCGTACCAGCTTCCGCAATTTTCATAAGC +TTCTCGACAGACTCGTTAAACTTGCGTGTTGGCTTGAACAAATGGGAACC +TCGAATCGCCTCGGGAACCGGATTTCCACCTCCTTCCACCAGGAGGCTGA +ACAGTGTCGTTTTCTGCCTGACGGCCTTAGCCAGGACGTGCCCATAGTTG +ATCATGTAGATTTGCCCGTTTTCAGATATGTTGAATGTTTCAGCAGATGA +GAACATTTGAAAATTATTTGATTGTGCGTGTTCATGCACCACAGTAATGA +ATGCGAGGCGGCAAATTAAAACTCTGAAAGTTTAGATAATTGTACTTTAG +TTGAAATCTACTTTGGTTGACAGGTGCATCGACTAGACTAAAAAAAAAAG +CAGGTAGGTAGGTAGGTAGGTAGGTAGGTAGGTAGGTAGGTAGGTAGGTA +GGTAGGTAGGTAAGTAGGTAAAGCGAGACACGGTGCATCGATCTGACTTT +AAAAATCTCAATCTCGTATCTTAAGCGTCTTTGGTTCTGTCAGGGAAGCA +GGTAGGGGGGAGGTAGGTGTGATCAGGCAGAGCACCGGAGTTAATATCTG +GTGGATTGTGGGCTGCTAATAGGTAGGTAGGCAGGTAAGCAAGCAAGCAA +GCAGGCAGGTAGGTAGGTAGGTAGGTAGGCAGGCAGGTATCAGTTGACAA +CTTTTTGATGAAACTAAACGGAGCCGAGTTATAAGGTGCCAAAGTTGCAC +TAGACATGGTGCATCGACCTAACTTGAACAACTCGTATCTCAAGTGTCTT +TGGTTCTATTTGGGAGGCAGGTAGGTAGGCAGGTAGGTAGGTATATCTTA +GGTTGTTTCAAAACTATTATACATTAAAAATCAATAAAAATATTTTGCAC +GAATTTTATCAGTTGACAACTTTTTGATAAAACTAAAAAGAGCCGAGATA +TTGTAAGTTGCAAAAGTTGAACGAGACATGGTGCATGGAAGGTAGGTAGG +TAGGTAGGTATTTAAGTAGGTAGGTAGGTAGGTAAGCATTCCGTATTTCT +CTCTACTTTGACAGCTTATATCCTGGTTGTTTTGTGTTGTATCAAAAAAT +GTTTAACAAAGATAACATAGAAAAATATTTAGCACGTAGCAAATTTAAAA +AACTAGGTTGGTAGGTAGGTAGGTAGGTAGGTAGGTAGGTAGGTAGAAAG +GCAGGCATGTAAGTAGTAGGTAGGTAAAAGTACTCACGTGGAGAACATCA +AGCAGAAGATCGCTGAGCAAATTGAAAGTGGTACTGGAAGTTGAGCAAGT +ATTTTGGAGTATTTTTCTGGAAAAAAATAAGTATTCAAAGTAACTAAAAT +AAAATACCAACTCCGAAACTCAAACTTCATAATAAACAAAATCGTCAGAA +TCCCACTGGAGAGGACTGCAAAAGTCACTGTCGTGTTGGCAGACGTCTGA +TAAAAATCGAGGGTGTGTCCGGATGGTAGGAGAAGCACATTATCGTGATA +CTTTGACACAAATTCCTCAATAGACGCCAACGCTTTTCCGAAAACTGGAA +TCGAAATGTTCAAAGCATTGAGAATTATTTTCTGGTACTCTTCCACCAGG +ATGTTGTACTCCGAGTTTAGATCCGCTGAAAAATTTAAATTTGTATTAAA +TTGGGCAATACTCGTACTTGCCTACCTGCCTACTGGTAGGTAAGCAGCCT +ACTTATTTTCAACTTACAAAACTCCATATACTTTGTAACGTTTTCCAATT +TCGCAAAAATCGATTTCTCGAAATATTTGCACTTCCCCTCGACAAATGAT +TTCTTGGCGGGAATCTTCAGGAACAGCTGAAAATTTTTTTTTCCGGTGGC +CGAGTTTTTTCACGGCCACGACGTACCTTATTAAACATGGTGAAGTTGGC +GTAGTCATGCAAGAACCAATTTATTCGGCGGCGTTCGTTTTCGGTGACGG +CGAAGTCTCGTTTGAACAAGCACGGTATGTAGGACAGCTTACGGTGGAGA +CGCTCTTTGATGGCTGGAAGTTATGACGTCATGAATAATAAGGGATGAGA +TTTCTAGGCCACGTGTGACGTCACAGCAATCCCCGAAACCTTTTTGGATG +GCCGAGTTTTACCGATTTCTAGGTCATCGAGCGAAACTTTTGTGATACTT +TGGCAATGGTCATCAGGTAGACAAACAACGTGATAATTTCTAGGCCACGC +TTGACGTCACGTGGCCGAGATTTCGCGCCAGGACCTAGAACTAGACATGG +TGCATCGACCTAACTTAAACAACTCGTATCTCAGGTGCCAGGTAGGTAGG +TAGGCAGTCAAGCAGATAGGTAGGTAGGCAGGTATTTTAATTTCATTTCT +GCGGCTACAACACAATTTCTCTTCCAATCAAGGACGTAAATTTCTAGGCC +ACACATGCAGAGTGCGGACTAATAATTAAAGTGTTTTACGACATGGCCGA +GAAAAGAGAAACCTAGGCCATGGTAGTTAGGTGTGACGTAACGCCACTAT +AAAACTATAAAACTCACGCATAAAACTGTGATCATTGGCGGTTTCGAGCA +TCGTCCTTCCATAGACCGCATCCTGAGTACACTCTCGTAAATTGGCGGCC +AAAATTGTTAGACCAAGGATACAAAATATGGGGATAATGGTGAATCCGGA +GCGATTATCATCCAACCAATACACGATTACTTCGATAACCTCGAAGAGCA +GCAATTTCCAGACGACGAGGAATATCAGCAGAGCTGTGATCAGGCAGAGC +ACCGAGATTATTATCTGGCGGATTGTGGGCTGAAAATAGGTAGGTAGGTA +GGTAGGTAGGTAGGTAGGTAGGTAGGTAGGTAGGTAGGTAGGTAGGTAAG +TAGGTGGTAGGTAGGGAGGCAGGTATCAGCTGCCAACTTTTCGATAAATA +TTAAACGGAGCCGAAATATAAGTTGCCAAAGTTGTACTAGACATGGTGCA +TCGACCTTACTTAAACAACTAGGTAGGTAGGTAGGTAGGCAGGCAGCTAG +GCAAGTAGGTAAACTCACCAAATGCCTGAAAACAATTGCAAGAACATCGG +AATAACCCGAGCTCCTCCCGTTCAACGTCGCGACCACAAGCGCGTGCACA +CTTTTCCGCAGCTTCTCATCATCGCGATCAGCGCAAAAATCCTCGATTAT +CTCGAATCGATGGCTCTTTTTTGTGTGCGCCGGGTCTAATTTATAGCTGA +TCGGCTCGGGAAACCAATCGTCACACTGCAGCTTCGCATCCTCCAAGCTG +AAGCCGTGAGAAAACTGTGCACTGCATAGGAATAGAAGTAATTTCAGTTT +CATTTTGTTCTGGAAGAAGGCATTTTTATGGATGGAGTTTGAAAATAATA +ATTAAAAAAAAAACTTACAGCTGGAACTTTGTGAAATCGATAATGAAACA +ATAAATCTAAAAAAAGCTCAAAAAATTTTTGACAAAATTTAAAACTGTAA +CGTGTATGAAGGTCACGTGAAAACTGAAAACTCACCAAAAATGTCGAAAA +ATAGCAAGAAACTAGGCACACGTAGACTTATGGTAGGCAGGAAGGCGTAG +GCGTTGATCCAGGCAGGGTCACGTCTTGAAGGCAGACTGTTAGGTATAGG +TAGGCATAGGTAGCCATGTAGGCGTAAAGGCAGGAAGGCATCTCTAGATC +CTTTTTGTGGCAGATTGGCCGCTTTGCAGGCAGGCAGGTGTGGGTGGCCT +AGTAGACCGAAAAACAAGCAGGTAAAGTTCCTATAGTAGTAGGTAGGCGA +GCCAAACCTTTTTTGTAGATGATGAAGTAGGTGTAGGCCGTCTTTTGAAG +GCAGTGAGGCAGACAAGGTTAGGTCGTCGCAAAGGTTGGCTGGAAGGTAG +GCACGTGGGTCAGCAGATAAGCGTAGGCAGGCATAGGCAGGAAGGCATAG +GCAGGTAGACGTAGGCAAGTAGACGTAGACAGGATAAGATTCAAATATGA +AAATCGTGAAAAGGGGAAAATTTCTTATTCTTCGAGTGCGCGACAGTATG +CGTAGGCAGGTAGGCGTTGGCAGGTAGGCGTAGATAGGTAGGCGTATGCA +GGTAGGCGTAGGCAGGTAGGCGTAGTCAGGTAGATAAGCAGGTAGTCGTA +GACCGGGTAGGCGTAGGCAGGTAGGCGCATGCAGATAGTCCTAGGCAGGT +AGGCGTAGGTACGTATGCGTTGGCAGGTAGGTGTTGGCAGGTAGGCGCAG +GCAGGTAGACGTAAGCAGGTAGGCGTAGGCAGGTAGGCGTAGGTAGCTAG +GCGAAGGCACGTAGGCGTAGGTAGGCAGGCATAGGCAGGTAGGAGTAGGC +GTAGACATGCGCACATGAAAACCTTATAAATGTTTCAACCCCATCATCAT +CATCCCATTTTTCTCTCGATCAAAACGAAAAAATGTGCGCAAACACAAAT +TGCATCCATTTCTCTCTCTTTTTTACTCACTTATATATCTCTCATCACTT +TATCCCCCGTTTTTCAATCCCTTCCAAACACTTCCAAAACTCTCCAGAAC +TCCTAGAGAGTAACACGAGAGAAAATATTTGAACTTTTGTCTCGACCAAG +TTTTTTTTTTTGGGATTTTTTCTTTGGATTTCTACAAATTTGATGTATTT +TTTCCATTTTTCGTGCAAAATATGGTAATGGACTAGGTTTTTGCTACGTG +GCCTAGAAAAATCTCGGCCACCGATTCTGAGCTGTTGCAGCGGCCGCGGC +CGAGGGTTTTTCTCTTCAATCGATACCAATCCCTCTTTTACGCAAGGAAT +CACGTGGAGTCAGAGTGTCTCATTTCGTCTTGATCTACGTAGATCTACAA +AAAATGCGGGAACTGATTTCGTATCGTTAAGAACATGCTGACGTCACATT +TTTTTATCATAAAATTCCCGCATTTTTTGTAGATCAAACCGTAATGGGAC +AGCCTGGCACCACGTGCTCTTCCAACCTTTATATTTTTTTTGACAAAATC +TTGAGAATTTCTAGAATTTCAACGAAAATCCAATTTTATACCTAAACTTA +AAAAAAAATTGCTAAATTGTGTTCGACTAGCAAAAAATGCACCCAGTTTT +ACAAAAAATCTTTTTTTCTGAAAAAAAATTCAGTGGAAAATGTCTATAAT +TTTCAGTAATTCATACTAAAAAAAATCCAATTTTTACAACAAGTTTTTGT +GGCCGCGGCCGAGGTTTTTCCTCTTCCACGTGGTGTCAGAGTGTCTCATT +TCGGCTTGATCTACTTAGATCTACAAAAAACGCGGAAACTGATTTCTCAT +GGTTAAGAACGCGCTGACGTCACATTTTTTTGATCGTAAAATTCCCGCAT +TTTTTGTAGATCAAACCGTGATGGGACAGCCTGGCATTACGTGAAAACAA +ACAATGCAAGCGCGCTCTAATGCGAATTTATTTGCTCTACCACCAGATAA +ATTCCACTTCTTCACAATAAACCTAAATATTTTTTGTGTCAAAATCTTGA +GAATTCTTAGAATTCCAGAATTTCAGCGAAAACCTAATTTCCGATTTTGT +TATTGTTCAGGGGAACAAGTGAGCCACACAGCTTCCGCTTCTGCTCCGAA +TTCGATTCCCCTGAGGCATTTTTGTTCGAAAAAAGCCTCATTTGTCATAC +TTGTCGGCAACTTTCTTCGAAGTTTTTGTTCTTGTTTCAAATAATTTTTC +TTCAATACTTGTTCCTTCCGTCCCAGCAGATGTTCTTTTGCGGGGGGGGG +GGGGGAGGGGAGAAAAAAAATAAGAGAAGAAGCAGAAACTCGACACTTTT +GTGTCCGCATTTCTCGTAAAGTTTTCAGATTTTTGAGCTTTTCAAGCTAT +AGCTGAACCACCTAGCAAATTTCGCCGCAGAGTTGATTTCTCCGGGAGTG +GGCTTTCTACTGGAGACCTCACTTTTTTTGCCTTTTACTAGAGCTTTTTA +ATTTTTTAATAATTTATTGCAGCTCTATGGATTTTCCAAAAAAAACCACC +GAAAAGCCCGAAAAATCAAAATTTTCGTTTTTAAAACCCTAAATTCCATA +CTCCGTCCAAAGAACACTCACATAAAAATGAAGAGATTTCGCAAAAAAAG +TGAAATCCAATTCAATTTATTTCAATTTTGAAGAGTGCGTTCGTCAGCTT +TTTTTTCGACAAAAAAAAAAGAAAATGTGTAGTGTAGTTTTCAACAGATT +TATTGTTTTATTAATATAATAGAAATCAAACCTTCTTTCTGCCTACCTAC +ATGGCTACCTACATGCCTACTTGCAAGTAGACCTGTCTATCTAGGTACTT +ATCTCACACCTCTTTACCTACATACCTACCTTCCTACTCACCTGCCTACT +CATGTAGACATAAGTGATCGTCAAAAAAGAAGCTGATGGATCAAAAGTTT +TTGGAAATTTATTTCGATTTTCAGGTTTTAAAAATTGCGGAAGATGTTTA +ATTTTTGACGATTAGTTTAGAATTTTTTTTAATTAAGAAAAAAAAATTAT +TATTTTATTTTTAAATTACTCCAAATTTTCTTCTGATTCCGAATATCTAT +GTGAAACTTTTTTTAATTCCTTGGTTTTATATCTAAGCTTAAAATCGCTA +ATTTCATTTGTGCACCCAAGAGATTTCGAATTTTCACGGGGTTCTGGCCT +TCCTCCTTGAATTTTTCGCGCTCCATATAATATTGACAACCGGGCAACGC +GTGGGAAAGTCGTGTACTCCACACGGACAAATACATTTAGTTTTACAACT +AAAATCGAGCCGCAACGCGACACGCAACGCGCCGTAAATCTACCCCAGAT +ATGGCCGAGCCAAAATGGCCTAGTTCGGCCAAAACTCTTCCATTTCAATT +TATGAGGGAAGCCAGAAATCCCGTGTAATTTTCGCGCCAGAGACGCCATG +TGTCGATTTACGAGATTGGTGTATATTTACAAAATGCGTAATATTTATAG +AATGCTGATTTCCGTTTTTTTTTTTTGAAAAATGTCATGTGTGCACAAAT +TAAATTCGGCGATTTTAAGCTTAAATATAAAATCAGGGAAATTTTTTAAA +TTTTTTTCACGTAGATATCCGGAATCAGGGGAAAATTTGGAGGCAATTAA +AAATGTCTCCCTGATCATTTAAATTCTTTAAAAGTACTCTTCAGAATCCT +TTTTTCCTAACCAAATAAAATAATTTCTGACGCCTTTTTTTTCTCTTTTC +AATAAGAAGCAAAAAACAGACGAACCTATTCCAATTTCCCACCACACCAA +TTCGTTCTTTCCGCATTCTTTTTCTTCCATTATTTATCTCCCTTTTCCTC +CATTCTCTGTGCTCTTATATATATCCATCATCCCATTTTCTTTCTTGTTG +TGTGGTATGCTCTTTTTCATCCATTATTCGAAAATTCTCATTTTCCAAAG +TTTTTTGTCAAATACATTTTAAAAATTTTTTAAACCAAATCTCGCCGTCC +ATCGATTTTAAAATACCTTTTTTGAGAAGTTTTCAAGAAATTTCATTATG +AAATTCGGTGTTTTCAGACAATTTTGAGTCTAATAAAACAATTTAAAATT +TTGACTACATCACCTTTAACTTGTAGAATGCCGTTTAAACACACTTATCA +CTTTTTCAGAATCCCATATGCAATGGTATCACTTGCTCCTGCTAATTCTC +ATGATCCTGGTCCTAATCCCATGTTTCTGGCAAGCTTGGCTACGATTTTC +CCGTCGATTCTCCGATTCCTATGATCTATCAAGAAATGCGCAAAAGCGTG +ATGAGATTTCAAAGGAGTTTTATGTATAGCTTACTGACAGGATTTCAAAA +AACAGTTACGGTAAGATTTTTCGAAAAATTTCACGGATTTCTGGCTTCTT +GCCGAACTAGGCCATTTTGGCTCGGCCATATCTGGGGTAGATTTACGGCG +CGTCGCGTGTCGCGTCGCGGCTCGATTTTATTTGTAAAACTAAAGGTATT +TGTCCGTGTGGAGTACACGACTTTCCCACGCGTTGTCCGGCAGGCGATTG +TCAATGGAGCGCGAAAAATTCAATGAGGAAGGCCAGAACCCCGTGCCTGT +ATAGGATATTTTGAATCTCAAAAGAATTTGAAGATTTGAGTTTTTTGAGT +TTGAGGTGGAAAAAATCGTAAAAAATTCCTTTCCGATAAATTTTCGGATG +CAAAAATGTGATCCATGCTGTAAAAAATCCAAGAAAGCCAAGAAATGTGC +GGAGCCTATTTTCCGATTTTATATTACTTTTTATTTCAAAACTTGATTCA +AAAATATCAAATTCGGAAAAAAAACCGCAACTTGGTAAAATTTCCTCGAA +AAATATTCTCCGCTTAGGCTCCGCCCCTCTCTTGGCATTTCGAATTTTTT +CGTTAAATTTTTGGAAACTGAAATTAAGTCCAAAAAGAACCAAGAAACGG +GCGGAGCCTATTATGGGATTGAAAAGACATGGTGCATCGATATGGAAACG +GCTTTTAATTTGGTTTTTTTTAAATAAGAAACTAGTAAAATTCAAGTGGC +CTAGCTTTCAACTCTTCCATAATTCCAGGGTACATTCCAACTGGTCATTG +CTCATTGCTCTCTCTTTATGCTTCTCATCACCAACGAATCCACTCAATTT +TCACTCAAGCTGTCATCTCCAGCAGTGTCTCTCAATTTAATTATGCTTGA +AATTGTTTTATAAAATGAAACAACACATACTCTGTAAAAGCAATTTATTA +TTAAAAAGTATATATATACACGTATTCCTGGGTGGGTATTGAGACTAACG +GAAGCGACATGGAGGAGAATTTGAATTTCATTAAAAAAAAAAAACAAAGA +GGTAGGTTACTGTAGCATCCAATCTGGAGGCTGCACAAAGAAGACTCCAC +CGTCAGTGGCGCAATATGTTGGACACACCTGGAACGTTGAAACATTATTT +ATTTCTTTAAAATTTTCTTTGTCTAAGCCTGAGCCTAAGCCTGAGCCTAA +GCCTAAGTCTAAGCCTAAGCCTAAGACTAAGCCTAAGCCTGAACCTAACC +CTAGGCCTAAGCCTAAGAAAAAGCCTAAGCTTGAGCCTAAGCCTAAGCCT +AAGCTAAAGCCAAAACCTAAGCCTAAGCCTAAGCCTAAACCTAAACCTAA +ACCTAAGCCCGAGCTTAAACCTGAGCCGATACCTAAGCCTAAGCCTAAGC +CTATTCCTAAGCTTAAATATAAACCTAAACCTAAGCCTAAGCCTAAGCCA +AGCGCTGAGCGTAACTTAGAGCCTAAGCTGAGCCTGAGCCTAAAACTAAG +CCTAAGCCTAAGCTTGATCTTAAGCTTAAACCTAAGCCCTGAAAATTTTT +TTTTGAACTCCACAACAAAATTTGTAGAATGTTTTTAAAAATTGGGATTC +TGAAATTCCCCGCCAAAAAGTTTAAAAACTTCACATTTTAATTTATCTGT +AAAATTTATATAAATCCCTCTTAGATTTTGAATTTCGCGGCAAAACTTGT +AGAACATTTCAAAAATTTACTTTCGCGCCAAAGATTGTCGCGCGAAATTT +ACAATTTTTCTATAAAAAATGTATTTCATAGAATTTCAAAAAATCGAAAA +TCGCGCCAAAAATTTTTAAAAATTTTTTTCAGTATTGTGACGTCATAGAC +TACAAACTAACCCCTCTATTGCCTTCCTCCCCGTGCTCTCCAATGAGTCC +TTCGGCTCCAGCGGCTCCATCATTCCCTGGGAACCCGGGCTTTCCTTTTT +TTCCTCGTGGGCCTTGTGCACCAATCGGGCCGTCGATACCAGGCATACCA +GACGGTCCTGGCGGCCCGAATTCTCCTTCCTCTCCGATCGGACCTGCTGG +TCCCGGGATGAACGGTTTCGGGGTGGCGTCCGCACCCGGGTCGCCCATTT +CTCCATCCAGTCCTGGGATTCCCTGAGGGCCTTGTAGGCCTTTTGGTCCC +ATTTTTCCATTTTCCGCGCGAAGTCCATTCTTTCCGGGTAGTCCTGGCTT +ACCAGTGTCTCCTTTCACTCCCTGAAAATTTAGTTTAGTGGTATAGGCCT +ATCAAATAGGCTTACCGTAATCCCAGTGGTTCCTTGTACTCCTCTCGGAC +ACGGATCCGCACACAAAGACTTATAATTCACAGGAGGCTCACATGGTAGG +CGCGCTGGAAGTCCAGGCGCACCTTCCGATCCTTTCGTTCCAGGAAGACC +GTCTCTTCCAGGGATCCCCGGGGGTCCTTGTGGATGACGGCACTCACATA +CTCGGGATCTTGCGGTTTGGCCGGTTTCGTGTAGGAAATCGTCGATTTCA +TCTCTTTTCTGGCGGGTTATGTTTTGGAAATCGTTGGCGACATCGGATAG +CATGCTTTTCATCTCGAGGGCGCTTTCCTGGAAATTTAAATTTGCTTGAA +ATTTTCAAACTTGTTATATTATAATTTACAAAATACTGACTGCCTTTGTA +AGTTTTTTATTACAGGAACACAAAATTCTAAGAATGCGTACTGCACACTA +TATTTGACGTGCAAGATATCTCGTAGCAAAATCTACAGTAACTCTTAAAA +TGACTACTGTAGCGCTTGTGTCGATTTACGGGCTCGATTTTCGCAACTTT +TTTTCTTTCGGATTTCCTTCGTTTCCTCGTATTGTTTTCTAACTTTTTAT +TTCAAAATTTCAATATTCTATCGATAAATAAATCATTTTAATTTATTTCG +AAAATCGAGCCCGTAAATCGACACAAGCGCTACACGAGTCATTTAAAGGA +TTACTGTAGTTTTCGCTACGAGATATTTTGCGCGTCAAACATGTTGTGCA +ATACGCATTCTCAGAATTTTGTGTTTCTCTAATACTGTGTTTTTAGGATT +TTGAATGAAAGGAAAAGCCCGAAACTTTTAAACAAATTTTTCGGTGCATT +CTGTATCAAAAAAGCTGAATAAAATTGTACAAAAAAATGCCACATTTTAT +TGAAAAATTGATAATAAAATAATTTGTTTTCAAATAATTTTTGAATTTCT +CAATTTTTTGGAATTTTTGGATTTTTTCGGGCCAGAAAGTAGGCATTCTC +ATGAACTGATGTTTTCTTCATAATAATATATTCAAAAAAACGGTCTCCCA +CAAAAAAAGGTCCGAGAGGCGAACAAGGTTGTAAAACGTTGCTATTGTTT +ATTTACAACCGCCATTATACCCCTATAGGGGTTGTAACACCACATAACAA +TTTGATAAGATGCACACAGTAGGGAGAGGGACCTAATTTAAAAATATAAG +AATTTTCTCATTAAATACAATTTTTCTCTATTTCTTTCAGGAATATCGAG +AGGAAAATCAGCGAAATTTTTGGAAATTTTTGGTTTTTTTCTAACGAAAC +AAAAAAAATTTGAAAAATTTCAAGAAATTCTGTGTATTTATCGATAAAAT +TTCAGAAATCAAGGGGGTCGTATAGAAGTTAGACGGTCGGTTTTTCGACT +TTCAGCTAACTTTATGAAATTTCATTTGTTCAGGTTTTCTGACATTTAAA +ATCAAATGGCATTAGCAGTTTTTCATGAAAAGTTTCAGAAAGTCTCAAAA +CTCCAAATTTTAGGAGGGTCGTATAGAAGTTGGACGCACTAAAAAAAAAG +GAAAAATGTGGTTCTAATTATTACGGAAACACTAAATTCTGAGAATGCGT +ATTGCACAAAATATTTGATGCGCAAAACATCTCGTAGCGAAGACTACAGT +AATTCTTTAAATGTCTACTATTGTAGCGATTGTGTCGACACGTGGTGCCA +GATTGTCTCATTTCGGCTTGATCTACGTAGATCTACAAAAAATGCGGGAG +AAGAGACGCAGACTTCTCAACTGATTTCTCATGGTTAGGAACGTGCTGAC +GTCACATGTTTTCGGGCCGAAATTTCCCGCATTTTTTGTAGATCAAACCG +TGATGGGACAGCCTGGCACTATGTGTGTCGATTTACGGGTTTGATTTTTG +AAATTGATTAAAATAATTTAGTTATCGATATTATATTGGAATTAAACAAA +AAGTGAGAAAATAAATCGAGCCCGTAAATCGACACTACAATAGTCATTTA +AAGAATTACTGTAGTTTTCGCCACGAAATATGTTTGCGCGTCAAATATAT +TGCGCAGTACGCATTCTCAGAATCCGTAATCCTCAAACTTCTCACCTGAC +AGTACTCGGCATCGTTGACAGTATTCTGAATAGCCGGTCGAAGTATTGAG +GCAAGGGGTCCTACAGAAACAATAAACGACGACATCGTGGATAGCGAGAT +AAATAGTGAAACTATGATTAACGTGGCGTATGCCGTTTCTCGAGCATTCT +CGTTCATTTTCCAGGAGAAGCCTCCAAGAGAAGCCTATGGAGGATCCAAC +GAAAAGTGAGTAAAGAAATGATGCGCTGGCGCGGAAAAACGAGAGCGATT +ATGTATGGGAGTGACCCAAAAACACGGAAAAGTAACGGATAACAAGAAGA +AGAAGGGGGCAACTTTCTCTTTCTTATCATCATCACCATCATCATTATCT +CCTTTCTCGGGATTCCGTCATGAGAATGAATCCATTTACTCGTTTTTTTT +GTGTCCATTTAAGTTTGTCTATAAATATGGCTAATTTCAACTTTAAAATC +AAATAATGAAACTAGTAATTCTACTCATCTCTCACGCAATTCCTTTCTTT +TATGTGATATATAACTTTTATTCCATATTTAAAGAAATTTCAAGTTTAGA +GTCTAACTTGAAAATTGCCAGTGATGAATATAAAATTAGAATGAAGAATA +TTTCCAGAGAAGTTGGAAGTGTAGTCTTGGAGGAGAAGAGAGGAAAGAGG +TGAGGAAAAATTACGTTTTTTTGAAGTTTTTTCTTTAATTTTCGAAGGAT +TAGGCTTAGGCTTGGGCTTAAGTTTAGGCTTAGGCTTAGGTATAGGCTTA +GGCTTAGGTTCAGGCTTAGGCTTAGGCTTAGGCTTAGGCTCAGGCTTTTG +TTCAGGCTTAGGTTTAGGCTTAGGCTTAGGCTTAAGCTTATGCTTAGGTC +AAAGTATAGGCTAAGAGGCCTAGGCTTAGACTTAGGCATAGGCTTAGGCT +TAGGCTTAGGCTTAATCTTAGGCTTAGGATTAGGCTTAGGCTTAGGCTCA +GGCTGAGGCGTAGGCTTAGACCAAGGCGTAGGCTTAGGCTTAATCTAAGG +CTTAGGTGTCGGCTTAGGCTTTAGCTTAGGCTTAGTCTTAGGCTTAGGTT +AATCTTAGGCTTAGGATTAGGCTCAGGCTTAGGCTGCGGCCTGGGTTTCG +GCTTAGGTATAGGCTTAGACTGAAGCTTAGACTTAGGTTTGGGCCTAGAA +TTAGGCTTAGGCTTAGGCTTAGGCTTGGGCGGGGCTGGGCGGATGAGAAA +AAGAGAAAAATTCCAGGAAAATCCAAATAAAATCCAGAAAAAATAATAAA +AAAAAGGAAAAAACGGGAAAAATGGGGCATTCCGCGTCTGCCGCGTCAAA +AACTTTTCCTTAAAAAACGGACAGAGAAGGACCAAAAAAGACAAAAGTTT +AACAAAAAATAGTTAAACATTTTTTTTGACGGGGTTGAGCTCACCCAGCT +GAGCTCACTCCATAATTGCACACAAAATTCTCAAAAAATTAATTTTAGAG +ACTACACAGAGGCTAGCAAATGCACATGCTCTTCAACGATTTCTCAGAAA +TGCCCGCCAGGCTCCAAAGGTGCTCCAGGAGCCGATGGTCTGGACGGGAT +CCCCGGCTCCAGAGGTCTGAATGGTATGGATGGAGAGGCTGGGCACAGTA +ATATGGATGTTTGTGAGTTGCCCAAGATCTTTTATTTTTTGTAAATTCAT +ATTCCAGTCATGTCACCAACAGGATGCATCCGTTGCCCAGCGGGTCCTCC +AGGTTCTCCTGGTGATCATGGTTCTCCTGGTGAACCAGGGTACCCGGGAC +CTTCAGGACCTCCAGGGGAACCCGGTTACGCAACAATTGGGCTTATCGGG +CCACCTGGACTCGCAGGCCAAGATGGTCTAGATGGAGCTCCTGGCGAGCC +GGGAGTAGGTGGAATGGATGCAGTGAAGATTGTTTCAGTTGATGGAAAAA +TTGGAAGAAGAGGAGAAGCGGGGAACAAAGGAAGCAAAGGATCTCGAGGA +GGAGTTGGAAAAGCTGGAGGGCCTGGGGTGGCTGGAGAAATGGGAGAAAA +AGGAGCAAGAGGTGAAGATGGAAAGCCTGGAGAGTCTGGAAAGGTGAATT +CTCAGAAGTTGATCGATGCACCAGGGCGACAAATTAAGCGAATCGGCAAA +TTGCCGATTTGCCGATTTGCCGGAAACTTTCAATTCCGGCAATTTGCCGG +TTTGCCGATTTGTCGGATATCAAATTTGCCGGAAATGTTTAGAGGGATTT +TTTAAAAGACGAAAACACTTAAAACTGTGTCTTTTTGAATTTTTTCCCGT +TTTCCTAACTAGGGAATATTGTTAATAGGGGAATTGCTAGGAAGAAAAAA +CAAACTTTAAATGACAGATGAGGTCTTTGGCTACAAAAATCATTTTTTTC +ATTATTTTCATTTTCATAGAATTTGCTCACTTTTCAAAATAGACGTAGGA +ACATTCATAGGATGCGTTTAATTTTGCCATTTGAAATTTAAATTCTGAAA +TTTCCAAAAAAAATCGGCAATTTGCCGGTTTGCAGATTTGCCGGAAATTT +TCCATTCTGGCAATTTGCCGATTTGCCGATTTGCCGATTTGCCGGAAACT +TTCAATTCCTGCAATTTGCCGGTTTGCCGATTTTCCGGAAATTTTAATTC +CGGCAATTTGCGGATTGGCCGATTTGCCGGAAATTTCAATTCTGGCAATT +TGCCGGTTTGCCGATTTGCCGGAAATTTCAATTCCGGCAATTTGCCGGTT +TGCCGATTTGCCAAAAATTTTAATTCCGGCAATTTGCCGGTTTGCCGATT +TGCCAAAAATTTTAATTCCGGCAATTTGTCGATTTGCCGATTTGCCGGAA +ATTTCAATTCCGGAAATTTGCCGATTTGCCGGAAATTTAAAATCCGGCAA +TTTTCCAAAACTGTATAATTTTTCAGCCCGGCGCACCTGGAATCCCTGGA +AAAGATGGTACATACTGCCGATGTCCCGATCGAAATCGTTACGATATTTA +TCCATATAAATCGAAAATCTGATAATAGTAATAAAGATAATAATACAATA +AATACAGACAGGAAACGAAAAGCGAGCACTGACGATTGTATGATACCCGA +AACATTAAGGAGGAGCACAAAAAACACAATGAAAAGACACAGAAAAATTG +AAATTTGAAAAACTGGTAACGAGACTAGAATCGTCATGATGATCTTGTTG +AAAAAATTGGAAAAAAATTAAGAATCTAGAAATTGATTAGGCGCAGACTT +GACACGATCTTGGGCACCGAATCCGCATTTCAGGCCAATTTTCGCATGCT +CCGCCTTGAGCCCATGACGGGCAGAGGGAGTCATGATCGATGCATCTGAA +ATGGGGAATATTATTTTTTATTTTAAGCTTCAGGTTTAGGCCTATCGACT +TTGGATCTCAGGCTCAGGTCCTTAAATTGTGTATCTCAAGCTTAGGCTCT +CTCGCTCAAGCCTTTAGATTTCAAATCTTAGCCTTAAGCTCTCCGACTGA +TTTAGGATATCAGACTAAGGGCCTAGGACTTTGGGCCTGAGGCTTAGGCT +AGTATTCAATCTCAAGCTTAGGATCTCACGCTTAAGCTCTCTGACTCAGG +CCCTTAGATTGCAAAAACCTTTGCCTTAGGCTCCCCGACTGATTTAGGAT +ATCAGACTTAGGGCCTAGGACTTCGGGCCTGAAGCTTAGGCTCTCAGATT +CAGGCCCTCAGACTTTGAGTCTAAGGCCGTTAGACTACGATTTTCAAGCT +TACAATTAGGCTTTTAGGCTTAAAATCAAAATCCAATTTAGTTTTTTAAA +AATAATCAAACATTTCTAAACCGCAATTTTTTAAAAATTACCCTAAATAT +TTCAAATTTTAGGCTTAGACTGTCAGACTTAAACCTTTAAACTTTTGGTC +TCAAATTTAGCCTACAAGTCTGTGGACCTAAGACTTTGGACCTCGAACTG +CGGCCCTTAGATCTCATGCTTGGGCTCTAATATCCTGGCCCTCTAGTCCT +AAATCTTTTGATCGTAGTTTCAGGCTTTCAGGCTTTCAGGCTTTCAGGCT +TTCAGGCTTTCAGGCTTTCAGGCTTTCAGGCTTTCAGGCTTTCAGGCTTT +CAGGCTTTCAGGCTTTCAGGCTTTCAGGCTTTCAGGCTTTCAGGCTTTCA +GGCTTTCAGGCTTTCAGGCTTTCAGGCTTTCAGGCTTTCAGGCTTTCAGG +CTTTCAGGCTTTCAGGCTTTCAGGCTTTCAGGCTTTCAAGCTTTCAGGGT +TTCAGGGTTTCAGGCTTTTAGGCTTTCAGGCTTTCAGGCTTTCAGGCTTT +CAGGCTTTCAGGCTTTCAGGCTTTCAGGCTTTCAGGCTTTCAGGCTTTCA +GGCTTTCAGGCTTTCAAACTTTCAGGCTTTCAGGCTTTCAGACTTTCAGG +CTTTCAGGCTTTCAGGCTTTCAGGCTTTCAGGCTTTCAGGCTTTCAGGCT +TTCAGGCTTTCAGGCTTTCAAGCTTTCAGGCTTTCAGGCTTTCAGGCTTT +CAGGCTTTCAGGCTTTCAGGCTTTCAAACTTTCAGGCTTTCAAACTTTCA +GGCTTTCAGGCTTTCAGACTTTCAGGCTTTCAGGCTTTCAGGCTTTCAGG +CTTTCAGACCACAACTAGACCGGCAACACACGTGTTGTCTAATTACTCAC +CTCTTTGCCGGCGCTCTATGCATATTTGATGGTAACTCGATTGTCTTTGA +AGCTATCACTTTCACAGCTTCCCAAGTTTCCCGAGCTGTTGGAATGATCT +GAAACATCAAAAATCATCATCAAAAATCGTGGTGAGACCCAAAATTACCT +GATTCTCAGCCAGCAAGAATCCATCCCAAACTTGCTCTTCGGGTCTCAAC +TCGAACAGGAATGAGTACTTGACGTGGGCTTTTCCCTTTGCCCAATCTTC +TGAGCCACCGGATGCGGGGTCTGGAAATATTTTGAAATTTTATACATAGG +AGCCTTAAGAAATAAGAGCTTACAAAGTGTATCTGCACCGGTTCCAACTT +TGTACTGCGTGTTGTAGACACTATTCAAAGCCTGAGCTGCTGAGAGGGCA +GTGGATCTCTGAAATGTTTAGAAAATATGACCATGACTTGGGCAAACAAT +TCCCGCATTTTTGTAGATCACACCGTAATGGGATAGCAAAACACGTGGTG +CCAGAGTGTCGCATTTCGTTTTGATCTACGTAGATCTACAAAAAATGCGG +GAACTGATTTCTCATGGTTAAGAACGTGCTGACGACAAATTTTTTTGAGC +GAAAAATTCCCGCATTTTTTTGTAGATCAAACCGTAATGGGACAGCCAGG +TACCAAGTAAAAAATTTACTCACCAAATCATTCAAATCATTTGAATAAGT +TCTGACTTGATGGCCGAACGGATACATCAAAATTTGAGAGTACGAGTGGA +AGGTGAGGAATGTCGAAATACGGTGTCGCTGCACGAAATCTCTAACCGCC +GCAGTCTCGGGCTCCGAGAAGGCGTAGGCTCCCTGATAGATTTCCGAGCA +AGGATCAGTTGATGAGCCGACTTGTCCGAAGAACCAGTCAAAGTTTCGAT +TGAGGTCGACACCTTGACAGCAGGTTGTTGTGGGAGGTTGGAAGAGTCCT +GAAAGGTTTTTATTTTTGAGATTTTTAACTTTCACGGAGAATTGCCAAAA +TCTTAGTTTTCATAGCCTGTGTGACATTATTTGGGCGTAACTAAAGTCCA +GAAAGCTTATGCCTTGTGCTGATGAATTTTTGCAAAAATTGTGCCCAGTT +TTGCCACTTTTTAGTGGTTTTTGATGGGTTAAACCTAAATTTTCTGAATT +CAAGTTTTTATTTTTTCCAAATGTTTTTTCAGCCATCGAATGGCTGTCCT +TTTTTTCGGGCAAAAAAATAATTTTTCTGAAACTATTTGAAACAATTATA +TTCTAAAAAAAAGGCAATTTTTTAAATTTTTTCTAGTAAATTTTGAGTCC +TCTAGCTACAAAATAAACCATTTTAGATGAGTTTCAAAAATGTGCATTTC +TACAAAAATTGCCCAATTTTTGCCACTTTTTGGTGGTTTTTGATGGGTTA +AACCTAGATTTTCTGAATTCTGCGTATATAAATTACACGTTTTCAACAAA +TTTAGACATTTTTTTTATTTTTGCCCAATTTTTTTCCAGCCATCTAATGA +ATATCCTTGTTTTTAAGCAAAAAAAAATTTCTGAAACACAATTATTATGA +TCCAAAACAGGGGTGTGCGGCAAATCTCAAAATTTGCCGAGCACGGCGAA +TTTGGCAAACGGCAAATTTGGCAAATTCAGCAAATTTGCTGAGCTTTACA +AACTCCGCAAAAATTTGATACTTTTTGAAGCACCTAAACTACTAATTTCT +GAACAAAAGTTCAAAATAACTTGATTTTGTGCCATTTTTCTAAATTTTTG +GAAATCAATAATTTTGTCAATAATTTTGGTTAAAATTGTATTGTCAAATT +TTTGACGCGGGCGGCAAATTTCAAAAATTTCTGAGCTCGGCAAACGGCAA +ATTCGGCAAATTTGTCAAATTTGCCGCACATTTTTTTTTGGTTCAAAACA +TAATAAAACACTCCAAAACATTTTAGATTTTTCATAATTTCCGGTCGAAG +TTTTTGCAAATTGCTAAAATTTTGAAAAATATGAGTATTTGAGGAAATCC +AAAGCAATGCCGCATGTTCCGACCCCTACAATGTTTTAATACAAATGATT +AAAACAAAATTACAGTAGACAAAATGTAGAAAAAAAATTATTTTTGGTCG +ACTTCCAAAATTATGAGTGGCAAAAACTTTGTAATTGTCACTTTTTGACA +GTAAATAAAAAATTTTAAGAATTTTTTTGAAAAGTTTTATTATGATATTC +GGTCATTTTGGGACCAAATGAGTGGTTTTTAACAATTCCGGCACTGGCGC +TAGTCCAGCAATTGCCGTAATCTTAGTTTTCATAGCCGACATTATGCGGG +CCTAACTGAAGCCCAGAAAGCTTGTTTACAAAAATGAGCAAGCAGAAAAA +GATCAATCTCCCCAACAAACCTGTTGCCTGTTGAATACACTTTGGTGGTG +ATCGATTCTTTCTCCACAGTCTGATCTCCGGATCATTGCTACTTCTACTG +TACTCATAACCATCCGGATTCAGCAGTGGAACAATATACCATTCGAGTTG +ATCTACGAATTGCTTGATTTGAACATCTTTGTCGTACTGGGTGACCAGTT +GGTGAATGAAATAGAGTACTGTCGATGGTGACACCCATTCACGAGCATGG +ATTCCTCCGTCCACCCAGATGCCACGTTTCGTACCTCCGTTACGCTTGTT +GGTGATCTGGAATTGAGGAAAATTGGGGTGAGATGGGATCACTAAATTCT +GAATCTAGTTTCTGTGCTACTGTGTGTTACGTTTTTTGCGCTCCATTGGC +AATCGCCTGCCCATCAAAAACCGTCAAAAAGTGGCAAAACTGGGCAATTT +TTGTAAAAATTCACAATTTTGAAACTCCTCTAAAATGGTTTAGTTCGTAG +CTAGAGGACTTAAAACTGATTTTTGAACCCTAAAAATTGTCTAAATTTGT +TGAAAACGGGTAATTCTTGTATGCTGAATTCAGAAAATCTAGGTTTAACC +CATCAAAAACCATCAAAAAGTGGCAAAAGTGGGCAATTTTTGTAAAAATT +CACAATTTTGAAACTCCTCTAAAATGGGCAATTTTGTAGCTAGAGGACTC +AAAATTAACTTCCAAACTCTAAAGAATTGTCCTTTTTCAGAATATAGTGG +TTTCAATAATTTTCGAACAGAATTTTTTTTGCCCAAAAAAGGATGGCTGA +AAAAAATTTTGGCCGAAAAAAAAACAGTTTTACACTGAAAGCCAAACCGC +GACGCGACACGCAAAGCTCGGTAAATCGACCCCAGCCGTGGCCGAGTTCA +AATAGCCAAATTCGGCAAACTCTCACATTTCAAGCTATGTATTTCAAGCC +AGAAGCGCGTGCGATTTCTCAAAAACCGGCAGCTCAGTTGCAGGAGGACT +TTACGAAAATTTTGAAAATTCCAAGGTATAATTGGTAGGTTTTTCGGCAT +GTTGCATGGTACTTAAAATGTTTTTTTAACTTTCAAAAGTGGCTTTTATT +TTGGTTCTTATTGCAAGCCTAATAATAAAAATACTATCCACGTTTTAAAT +GCTTCCATGGTAGGCAGGCGCGGTTTCAGGGCCGACATGGAAGCCCTACA +TGTCACGGATTTCTGGCTTTCCTCAGAAATTGAAATGGACTAGTTTTTGC +CGAACTAGGCCAGGCCATATCTGGGGTAGATTTACGGCGCGTTGCATGTC +GCGTCGCGGCTCGATTTTAGTTGAACAACTAAATGCATTTGTCCGAGTGG +AGTACACGACTTTCCCACGCGGGCGATTGCCAATGGAGCGCGAAAAATTC +AATGAGGAGGGCCAGAATCCCGTGATCGTCCGAAGCAAAGTTAAAACAAC +AAACCTTAATCAACGGGATCTGCCGTCCTTCGTGAGTCGTTCCGATTGGC +TGCACAGACACAAGCTCCGGATAAGTGATGGCTAATGAGTTCAGGTAGTT +GATAACATCAGCAAACGAGTGATATTGTGCAAGGTTAAATTGAACCTGCA +ATAAGTTATCATTGAACTATGTATCATTGAACGTGGTGTCAGAGTGCCTC +ATTTTGGATTGATCTACGCTGATCTACAAAAAATGCGGGAGAAGAGACTG +ATTTCGAATGGTTAAAAACGTGCTGACGTCACATTCTTTTGGGCTAAAAA +TTCCCGCATTTTTTGTAGATCAAACCCTAATGTGACAGCCTGACACCACA +TGTATTCACTCACTTTCTGCTCCCTCCAATCATGCAGTCGAACTTGTTTC +CGAAGCTTATCCCGCTTCTCCTTCTGCTCGACAATCACTCTGAAATCGAT +GAAAAGATTGTAAGTTGGGCTTAATAGTGTCCCCCCACGACACCCATCAC +GCGTAACGAGCCGTGACGCGTTTGAATAAAGAGGGTATTTGGACTTGTGA +TCCCCGCGAAAACCTGTATCGCACGTGATGCCAGGGTGTCTCATCACGGG +TTGATCTACAAAAAATGCGGGAATTTTTTGCCCAAAAATGTGACGTCAGC +ACGTTCTTAACCATGCCGAATCAGTTCCCGCATTTTTTGTAGATCTACGT +AGATCAAGCCGAAATGACACACTCTAACACCACGTGGTATCGCCGAAAAA +CTCACTTCTGAACGTCATCAATCATAACGGAATGCTTAATCGAATGATCA +TCGAGCTCTTTCATAAACTGCTGAGTTTTCTCATCGGATATCATCACATG +AACCTCCTTGCCAGCTTCCGACGGGGATTTCCAAAAATCTAGCTGAAAAT +TTTAGTGTGTCTGAAGCACCCTTATCAGTTCATATTATATTTCAAAATTT +TTCTCTAACCCCTCAAAACCGTACTCTACATCGCCGTTCTGGCCGAGTGG +TCTAAGGCGCTGCGTTCAGGTCGCAGTCCTCTCCGGAGGGCGCAGGTTCG +AATCCTGCGGACGGCAGATTTCTTTTTTGAATACTACTCTTCAAACAATG +CATGATGAAGAAGAAGAAGAAAAAGAGAAGAACGTATTGGCTTTTCCTTT +TACACTTTTGAAATCCCTAGGAACAGGTTATAAACAGCGATGTAACATGA +AAAGCAGCAGTGAGAGTGATCGAAAACGAGCTGATTATGATTTGTTCAAT +TATATGACACCGCTATTGTAATGGTTGGTTTGTTGCGTGGAACATCATAC +CTTTACATCGTTTAAATACAAATTATGAAGAATGTCTACTTGATCTTGGG +TCGTCGGCTGCACCCTCCATACTGTGAATTTTCTGAAAAATCGAGAGTTT +ACCGGCTTTTGGGGGAGATTTTACCATTGACTTTTGACTTTTTGGAGAGA +TCTAAATCTAGTAGATCATGCGGCGGTTAGGTCTATTTGAAATAAAAAAT +AATTGATCTAGATATGAAACATGGAGATCTAAGGGTTTTGGTAAAGTTGA +GTAGAATTTTTAATTATTTTTTTGAAAATTTGTTTGCAACATTTTACAAA +AAAAGACGAATAAATATTGACCAAAAAAATTTTTTTCAATAAAAAATGTT +TAAAAATTAACCACCTTTTAGCTTTTAAAGGTGGAGTAGCGCCAGTGTGG +ATTTTGTCTAAATACACTTATTATGAACCAAAATGGTCAAATATCATAAT +AAAACACTTCAAAAAATTTCTTTATTTCAGATTTTTCATAATTTCTGGCC +AAACTTTTTGCAAATTTCCAAAATTTTGAATGTCGCAGTTTCCGACCCCT +ACAATGTTTTAATACAAATAATTGAAACAAAATTACAGAATAAAAAATGT +AGAAAAAAAATTTTTTTTTTGGTCGACTTCCAAAATTATGAGTGGCAAAA +ACTTTGTAATTGTCACTTTTTGACAGTAAATAAAAAATTTTAAGAATTTT +TTTGAAAAGTTTTATTATGATATTCGGTCATTTTGGGACCAAATGAGTGG +TTTTTAACAATTTTCCCACTGGCGCTACTCCCCGTTTAAAGTTCTATGCT +TTTTAAAACTCACGTTTTTTTTTATGAAGTCTATTTTAAAAAATCTGGAC +TCTCAGTGGACAGTTCAGCATTTTTCGAAATTTCAGAAATTTAAAACATA +AGTCGGAAGTGGTCCTAAAACTTCAAAAAAATTCTCACAATATGTTTTTT +TGATGAAAACAAACTTATATAGTGCCAAAATGACCAAAATTTTTTTAAAA +ATAATATTGAAAAATCTTTAAAAAATCTCTTCTGGTTTTTATTTATTTTT +AAAAAATTCATATTGAAATATTATAGTGCTTTAGATTGCTCGGAAAAAAT +TCCACAAATTTTTTAATTTTTCATAAGTTTTATAATGATATTTGGTCATT +TTAGCTCTCTCGGGTAATATTGCAAACCCACGTGGTGCCAGGATTTCCCA +TTAAGGTTTGATCTACAAAAAATGCGGGACTTTTGCTTAAAAAGATGTGA +CGTCAGCTCATTTTTAACCATGCGAAATCAGTTGAAAACTCTGCCGCATT +TTTTGTAGATCTACGTAGATCAAACCGAAATGAGACATTCTGACACCGTG +TCCACCTTCAAAGTTAGTAATTGCATGAACAAAAAATTCTGAGAATGCGT +ATTGCACAACATATTTGACGCGCAAAATATCTCGTTGCGAAAACTACAGT +AATTCTTTAAATTACTACTGTAGGTCGATTTGCAGGCTGGATTTTTTGAA +AAAAAAATGAATTAAAATTTAATGGAATATTTCTATCTCTATGTATTAGA +AAATAAGGCCCGTAATGAGACGAGCGGGCCTGCCTACCTGCCTGACTATA +GAAGGCCGCCTTAAAATCAGGCAGGTAGGTTTTAACGCCTAGCCTACAAT +AAACATAAAGAGAAATGCCTAAAATAACAATAACTTTCCATTATCCTTTC +TTATCGTCCTCCCCGGCGATCGCCTAAGATCATAGATAACAACTAGCCGC +TTTCGGTCATGCAATTATTATATTATAATTGATGGATGGATGGGGGATAA +TCCAGAGAAAGGGGAAAAAGAAACTTAGAATCCTAGAAAACAGGTTCCCC +TGACTCAAACAAAAAAACCTACTTTTGATGTGCCAACACGGATGGCACGA +CGAGACCGATGAGGACGATGATGTTCATCATCATCATCTTCATCGTAGTA +AAACTTGGGGGGTGATGAGGGGCAAAAGGATGCCCTCCGCGCGTCAAGTG +TTCGTTGAACAATCCGACGCAGACGGTTAAACAAAGCAACACAGTGGTGT +CTCCGCCCAACTTCCGCGCGTCCGCCGCCGCCCTGTGTACTGTCTTGTCC +TATCCTATCTCTCTTTTCTCCACCCCGGCTTGTGTGTGAGATTCTTCTCA +TTACTAGAAGAAGGTCTCTAGTAGAAGAAGAAGAAGAAGAAGAAGACGAC +GATGGAGAAGAAGCTGAAGGCACGACGCACTGTTTGCGCAACGATGTTTG +TTTTTGCGCGCGCGAGCGCCGCATTTAAAACGTGTAAATCGGTTTCAAAA +GCGCGCGCGCGCGGTCTTCTTTCTCCGCTTGCGCGCACGCATTGACAAAA +AACCGAAAGGTGCAAAAGTGAAGAGGCCAACTACTTTCGTCCTCTTTGAG +CACCTAATTGTGGAAAGCTTTTGCTCTGATGTGAAATGAAGTTGGGTAAG +CTATTCCTGTCTGGGTGATGTTTTCAACTTCCGGGGGCATTGTTGGGAAA +GTGAGGACCAAAATAGGCCATAAAAAATGAAGATATACATGCTTAGGCTT +AGGCTTAGGCTAGGCTTAGGCTCAGATTCAGGCTTAGGCTTAGGCTTAGG +CTTAGGCTTGGAATTAGGCTTGGCGTCAGTGGCGAGCGTGAGCTCGCCAC +TGACGCTATTTAAGCTTGGGTTTATAGCGTTTTTTTAATTTTTCAAAGGG +TTCCCGTCTGCCGATCAAAGCGAAATTTAACAGAGAGTCGTATAGGCCGC +CAAGAAGCTGAAGTTTCAAATTGATTTCATTAAAACTCGCTTAGGCTTAG +GCTTAGGCTTAGGCTTAGGCTTACGTTTTAGCCATATGCTAGCTGATCCG +GAACATATTTTTAAAAATTGCAATATTTGACCAAAAGCTGTATTTTTCAA +AAATTCAAAAGTACGGTGAAATCATATGAAGTGCTTCTTTTTCTTTCATT +AAACTGTTCAGAATTGTCATTTTTACTAGAAAATACCAAAAAAATATGCT +AGCTTGACCGGAACATATTTTAAAAAATTAGAATTTTTGACCAAAAGCTG +TTTTTTCAAAAATTGAAAAGTACTGGAAAATCATATGAAGTGCTTCTTTT +TTCTTTCGTAAAACTGTTCAGAATTGTCATTTATACCAGAATATATTAAA +CAAAGTATGCTACCTAATCCGGAACATTTGTTTCCAAAATATTTTAAAAA +AAAGATTTAGCGGCTCTGACTAAATTGTTTGTTCGTTTTTTCTGGACCCT +TTTTTCCCCTTTTTTTGGGTGAAAAATTTTGTAAAAAATTTGACCGAGGT +GTAGAGTTGAATTCATCAATTTTTGACATTTTTTGAGTACGGGCACATGA +AAACGAAATTTCCAAAAACGCTTATTTCTCGGGATATAGTTCAATTCTAC +CAAATTTTTCTTTGCAAATCTGCTAATTCTACAAATCTACTAACCGAGGT +GTAGAGTTGAACTCATTTCCAAAAAGAAGTAGCAGTCCTGAAAATGTTTT +AGTGAGTCTTCTCATTACTCTAGAGCCCGAGCCAGTAAAATTGCTCCAGA +AGTAGTAATTTATAAATTTTTGATGATTTTATGCTGTCCAATGATAGAGA +AATTGTTAAAATTTTCAGATCAAATTGGAATTTTCACAGCCAAATATTTT +GGAAAATTTTCCACAAAAAATTGGGCGGAAAATTGGAATATTAATTCGAT +TTAATTCGATTTGGAGCAGTGTATAAATTCAAAGTTTCCTCCCTTAACAG +CCATTCAAATGCACTTTTTTCTTCGTTTAGTTTTTCTTATATATGGCCTC +CAGGCTCTCACCAAGTTCCGTTTTACCGGAGAGTTTAAGTGCAAACATAA +ATTCCTGAGATCCCATGTTGTAGTTTATGAAGACGATGAGGTGTTGTGAG +TAGAAATGAAAAATGAGAAATGTTGTATTTAATATTATAAATTGTTTCAG +CGACAACGTTATCAGTAAACACCATTACGTGTTCCATACAAATGCACCAC +ATATGTATCTCGTTGAGGCTGAAGATACCAATGACGGATTGCCAAAGTTT +TTGGATGTATTACTTTTTAATTTCCCGCTTTTTCTCTAGTATTTCTCAGA +AAATTTGAATTTCCCGCCAAAATTTTTTATCAGAAAATTTGAATTTCCCA +CCAAAATTTTTTATCAGAAGATTTGAATTTCCCACCAAAATTTTTTCTCA +GAAAATTTGAATTTCCCACCAAAATTTTTTCTCAGAAAACTTGAAATTCC +CGCCAAAAACTTTTTCTCAGAATTTTGAAATTCCCGCTAAAAAATGTTTC +TCAGAAAATTTGAAATTCCCGCTAAAAAATTTTTCTCAGAAAATTAAAAA +TCCCCGCCAAAACTTTTTCTCAGAGAATTTGAATTTGCCGCCAAAAAATT +GTTTTCTCATAAAATCTGAATTTCCCACCAAAATTTTTTCTCAGAAAATT +TGAAATTCCCGCTAAAAAATTTTTCTCAGAAAATTTGAATTTCCCGCCAA +AAAATTGTTTTCTCATAAAATCTGAATTTCCCACCAAAATTTTTTCTCAG +AAAATTTGAAATTCCCGCCAAAAAGTGAACATTCTAGGAGTTTGGCAGTT +TACCAAAACCTTGACTGACCATTTTGGAAAATCGAATAACCTCTTAAATA +AAAACTCTAACTTGTAAAAATTTAAAAATTTAAATATATAGCTTTGCAGT +CCTATTTCGAGTTATACATGATCATCGTCCACGATTGCACGGATAATGGG +AAATACAAGTTGTTGACAGTTGACTTGGGATCTTATTATATCAAGGATGG +GTAGGTACTTTCTACAGGGCAGGCTAGGGCTTCCATGGGAAGCAGGAGCG +GTTTGCCGGCTTCTCGCCTGATTCCTGCACTTTGGCAAAAAGTCAAAGCC +TCGATGCTCACACATAGGGCTGGCATATTGGACGTCCGGTTTTTTGAAAT +TTTAAATTTTTCCGAGAGGTAGTTTTTTTTGTACGGCGGCCGACAATTTC +CGAGTTTGGCCACTCATGATAAAATTATATGATTAAGTATAGTGAGTGGC +CAAACTCGGAAGTTGTCGGCCGCCGTAAAAAAAACTACCTCTCGGAAAAT +TTTAACATTTCAAAAAACCGGACGACCGGACGCCCAATATGTCAGCCCTA +CTCACACCAAATAGTACTATAAGCGGTTTTTTTTGCCGCAGCCGACACTT +TACGGTTTCCACGCCGCACTGTACAGAAGGCGCGGCCCGAGGCAGTTGTC +AGGCGTTTTGGCGCCTATATGCAAGCTCTAGGGCAGGCATTTCTGTTTTC +AAAAAAATGAATATTTTCAGGCAGTACGATGAAACACGCAACATTGATCT +GAACAATGCCGGTGAATGGACAAACGAAGTAATCGGAACAAAGGCGATGG +TGATGTCCAAGATATTTATTTTCAATGTTTGATAAAATTGTTCATCAATA +TTCGAGCCTACCTGCCTACCTGCCTACAAGGTAGCCTACATATATTATTT +TTCCTAATTAACAAAAATATAATTAAAACATGATTAAGATTTTCTGAAAA +TGAGCAAAAAGTGTTTTTAACTTCCACTTTTTGCCCAACAGGTTTAAGAT +AGGCATGTGACAAAGCCTACATTCAAGGCAACCTACGCCTGCCTCGGTAC +ATGGTGCATCGACCAATATTTACCTCTTAGCTTCGAGAGCTTATATCTCG +GTTACATTTTGCTCTATCAAAAAATGGCCAACTGAAAAACTGTTCTCCAA +TGCTTTTTCTACAATTTTGTGGTTAAAATTTTTTTAATATCTTGAAAACT +AGCGAAAATATAAGCCACCAAGTCGCGCGCCGCCCCCCGAGAAAGCAATT +CCAACCCAAGTTTATTGCAATAAAAACCAGTAGAAACGAAAGAAATGAGG +TTGATTTTGGAGACTTTTCAGGTGACACGGGGAGCTTTTACATTGTTGGC +ATTATTCAGATTCGATTGAGAAGGAGAAGTCGTGGGGAGGCTGAAAAATG +TTCTAAAGTTGTTGAAAAAAAAAACAAATCTGCATTTTTTAAAAACCTGA +TTCTTAGACTTCCATCCATTGCCATAAGTGCACAGAACAGTCTGTTGCTC +TTGCTCTTTGTGCTCCGTGTCCTCGGAGCCATCATCAACCTTGTCTGGAA +CTGTCGGTGTTGGCTCAGTTGACGGAGCAAATGAGGAGGATGGTGTCGCA +GATTCTTCAGTTGGTTTGGGAGATGATGGCGGAGCTGGCTGAGGTGGAAT +TGGAGATGCTCTTTTTTTTGGGGTTACTGCAGACTGTTTGCTCTTCAAAC +TCTTGCTCCGCGATTTTGTGCTCTTTTTTGTTTTCTTCTTTGGCTGCTTT +TGTTGCTTCTTGCTTTTTATCTGAAAATTAGAGTTTCTAGGCAATCAAGC +GATTTTCTAGGCCACCAGACATATATTTAGCCATGTCGGAATTTTCTAGG +CCATCACAGACAGTTCTAGGCCATATAGAAATTTTCCTAGGCCACCTAAA +ACTTTTCTAGGCCACCAACAGTTTTCTATGACCGCGATTTTTGCTAGGCC +ACCAACAAAAGCAAATAACACATTTTCTAGGCCACCAACAGAATTTCAAA +TTTTTCTAGGCCTTTAACTCACATATTTCACAATGGAAGCGGCGGCGAAC +ACGATGAAGGCTACAATGAGAATGCCAATAATTATGAATATCACTACTTT +GGTCCATGGCGTCGATGATGCTGCAAAAATGGAGGTACAAATATGCAAAC +GCGCTCTACCGCACTACCAGTGCGGTAGAGCGCGTTTCAAACTAACAGCC +AGAAGTTGGCGCCGGTGGTCGCTGAGTGGAGGTGGACATTATCAGGAAAT +GAATGAGAAAATGGAGAAATTTGGGAAAATTGATTAGAGACGTGGGATTT +TTTTTTAATTTTTATAATATTTTTGAGTAGGGGGAAGGGGTGAGGGTCAC +GGAAAATACCAAAAATTTGTGATTTTTCCATTAAATTGGAGCAAAATTGA +CACCATACCTCGTGGCCTAGAAAACTGCAGAATTTCTAGGCCACACACGT +TACGTCATAATTTACCTGTTAAACCAGGTAACCAACAATCAAATCAAGAG +TAATTAATCATCTCCTATCTTCTTTTATTTCTCCGTTCCTACTCCCAATG +TTTTTTTTTTGTAATTTGTGATCACAAATTGTTGCATTTTGTAGTACGCC +ATGTACGTCTGTGTACTACTTTTTTCTGAAAAGAATACAAAAATGTTCGC +TTTCAGGAAAAAATAGTGAGAGGTAGAAGAAAATTACGGGGACTAGAAAT +ATGTAAAAGTTCGACAACGTAGGCAGGTAGGCAGGTAGGCAAGTAGGCAG +TTAGGCAGGTAGGCAGGTAGGTAGGCAGGATAGTAGGCTGCCAGGCATAA +TAAAGATGCAGGCACGTAGGCGGATGTTGCAGGCATGAGAAGTCACGGGG +GTAGGCAGGCATGTGGGTATGTAAGCAGATGTAGGTATGTTGCAGTCATG +ATGAAGGCACGTGGGTAGGTAGGTAACTAGTTAGGCTGGTAGGGAGGTAG +ACAGGTAGGCAGGCAGGCAGACGCGTAGGAAGAGTTAGCCAGGATGCAGG +CACGTAGGCAGATGTAGGTATGTTGCAGGCATGCTGAAGTCACGTGGGTA +GGCAGATTGGTAGATGTGTACGCAGAGTAGGTATGTTGCGGGTATTATGA +AGGCACGTGAGTAGGTAAGCAGGCAGGTAGGTAGGCAGGCAGGTAGGTAG +ACATGTAAATATGCAGGCATATACGTAGGGAGAACAAGCCATAATTCAGG +CACGTAGGCAGATTTAGGTATGTTGTAGGCATGATGACGTAACGTGGGTA +GGCAGATAGGCAGGTAGGCAGGCTCGTAAGCTGGCAGGCATAATAAAGAC +ACATAAGCAGGCATGTAAATCTGCAGGCAGATACGTAGGAAGAGCAAGCC +AGGATGCAGGCACGTAGGCAGATGTAGGTATGTTACAGGCATGATGAAGG +CACGTGGGTAGGCAGGTAGGTAGGCAGCCAGGCAGGATCGGAGGTTGGTA +AGCATAAAAAAGACAGGTAGGCGGGTAGGCAAGCATGTAAATGTGCAGGC +AGATACGTAGAAAGAGCAAGCCAGGATGCAGGCACGTAGGCAGATGTAGG +CATGTTGCAGGCATGATGAAGGCACGTGAGTAGGTATGCAGGGAGGTAGG +TAGGCAGGCATGCAGGCACATAGGCAGATGTAGGTATGATTCAGGTAGGC +AGGCAGGTAGGAAGACTGAATGCAGGCGTGTTGCTAGGCAATAGCGCCCT +TTCCTTGTTATGACAATCTCGATAATCTAATAATAAGCTATTCAATCATA +TCACTGCGCCGATACACAAACTACCTGTTTTTTTGAGTATGCGTCTCGTC +CCGTCTGCCCAAAAGCTTCTACAGAGTGATTAGACGGTATAAGGGGGAGA +GATAGGATAGATTTCAATCATTCTCACTTTTTTCGTCCCTTTCTCTGTCT +TCCTTCTCCCTTTTTTTACAGGGCGCTTTTCCCTTTTTCGTGTGTGTGAA +GAGAGAGAGTGCATACAGGTGTAGAGGACGCCCAGACATCGAGACGGAAC +AGGATTCTTCATCGTCTTCAGCTATTTGGAAGAAGGAGTAGAAGGTGACG +GTGTGTGTTTGGAGACGTGTAATCATCATGTCTTATAACACTTCCTTGCT +CATTTTTCTCTGCTTCTCTGCCGTCTCTCTATCCACACAGTCTTTCGCCG +TTTATCGATTATTACCCAAATCACAGACGGATTTTCAAGCTATACAGAGG +TTATACAAAAATGCGACGGATCATGATGTGAGTTTTGTTTTCGCATTTTT +GAGCAGATACAATTTCTAGGCCACGAAAAATTTTTCTAGGCCGTTGAAAA +AATCACTAAGTTTTCTAGGTCACAATTTTGAGCAAGTTCTAGGTTATTAG +ATTATTACGCCATGGTGCATTTTTCTAGGCCATCAGGAAATTTTGCAGGC +CACCATGAGTTTTTGATGCAGTTGAACTAAGTTAGTGGCCTAGAAAAACC +TTTGTTAGCCTAGAAATATTTTCGCCGGCCTAAACACATTTTTGGTGGCC +TAGAAACACTAACTTTTGGTAGCCTAAAAATTTTCTGTGACCTGGAAACA +TTGTTGGTGGCCTGAAAACATTTTTGGAGGCTTAGAAATATTCTTGGCAA +TTTGAAAACATTTTTGGTGGCCTAGAAATATTTCTGATGGCCTAGAATGT +TTTTGTGGTCTATAAATAATTTTGGCGGCCTAAAAAAATTTGGAATGGCT +TAGCAAAATTTTTTTGGTGGTCTACAGCCATTTTTAACGACCAAGAAACA +TTTTTGGTGGCCTGAAAACATTTTTGGTGGCCTAGAAACATTCTTGGTGG +CCTGAAAAAACATTTTCGGTGGCCTAGAACCATTTTTGGAGACCTGGAAA +TGTTTTGGTGGCCTAGAATATTTTTGGTGGCCTAGAATATTTTTGTTGGT +CTAGAAATATTTCTGATGGTCTAGAATGTTTTTGTGGTCTATAAATAATT +TTGGTGGCCTAGAAATATTCTTGGAGACCTGATAACATTTTTGGTGACCT +ATAAATATTTTTGACAGCCTAAAAATTTTCGGTGGCCTAGAAATATTTCT +GATGGCCTATAATGTTTTTGTGGTCTATAAATATTTTTAATGACCTAGAA +ACATTTTTGGTGGCCTTGAAACATTCTTGGTGGCCTAGAAATGTTTTTGG +TTTCCTAGAAACATTCTTGGAGACCTGAAAACATTTTTGGTGGCCTAGAG +ACATTTTTAATGGCCTAGACACATTTTTGGTGGCCTAGAAATGTTGCTGA +TGGCCTATAATGTTTTTGTGGTCTATAAGTATTTTCGGTGGCCTAAAATA +TTTTTGGTGACCTAAAACATTTTTGGAGACCTGGAAATGTTTTGATGGCC +TAGAAATATTTGTGCTGTGCTGAAACATTGTCGGTGGCCTAGAAATATTC +TTGGAGACCTGAAAACATTTTTGGTGGCCTAGAAAAATTTTTAGTGACCT +AAAAACTTTGGGAATTACCTGGAACCAATTTTGGCGACCTAGAAAAATTT +TCCGTGGCCTAAAAATTCCCAATAATTAACAAATTTTTAGTTAAACTTCT +GGAAAACGGGCAAGGACAAGCACGGGTTTTGGGATGTGATGGTGGATATG +AAGAATTCGAAATATTTTTTGGACTTTCTACAAGTAAATGATATTTCTTA +CATAAAGACGATTGATGACGTGGAGGGGTGAGCTTTTTCTGAAATTTTGA +AAATTACATAAAGAATATGTGCTCTATTGATAAACTGTTAGGATCACCGT +AAATCGACAAGAATCTCCGTAAATCCACAACGGCCCCATCTCAAAAATTC +CTAGGCCACACGTGTGACGTCATTTTTCTTCCAGATTAATAACAAAGCAC +GAGAACAACAAGACCCTCTCGAACATGTTCCCCCGTCTGTGGGATGACTC +CTCGTCTGCTCACTATGACTTCCACACCTATGGATCCTATCAGCGAATGA +CTGATTGGATGAAGCAGCTGGTGGTGAAGTATCCAAAGATGGTTCAGTAT +ATATCGATTGGAAAAACTACGGAGGGCAGGAATATCGATGGAGTTGAGGT +AGGTCTAGAAGGCTGCCTTAGGTCTAGGTCTAAGTCTAGGTTTAGGTCTA +GGTCTAAGTCTAGGTCTAGGTCTAGGTCTAGGTCTAGGTCTAGGTCTAGG +TCTAGGTCTAGGTCTAGGTCTAGGTCTAGGTCTAGGTCTAGGTCTAGGTC +TAGGTCTAGGTCTAGGTCTAGGTTTAGGTCTAGGTCTAGGTCTAGGTCTA +GGTCTAGGTCTAGGTCTAGGTCTAGGTCTATGTCTAGGTCTAGGTCTAGG +TCTAGGTCTAGGTCTAGGTCTAGGTCTAGATCTAGGTCTAGGTCTAGGTC +TAGGTCTAGGTCTAGGTCTATGTCTAGGTCTAGGTCTAGGTCTAGGTCTA +GGTCTAGGCCGATTTCGATTTCTATGTGTATGGCCTATAGTGATTTTCAG +ATCGGCGGAGACTCCCGCACCAAAAAGATTTTCTGGATCGATGGAGGTAT +TCATGCTCGAGAATGGGCGGCCCCGCACACGGCTCTCTTTTTCATCCATC +AGGTATGTGATCCCGCCCCGTCAGAAAAAAGTGTGTTCCGATTAGTTCCG +CTTGGATTACACAAATGTTGGCAGAGACAGAAAAAGGGAAAACTTGATAA +CAAATGGGTTGGAAAAAACGTAATCGGATGATGCAATCGCGCTCTATTGC +ACTACCTCAACAAAACATTTTTTTGCAGCTTACCTCCCGCGCAAACGAGC +CCGGAATCAAGAAATTGCTGAACGAGATCACATTTGTAGTGGTTCCCTGC +TTGAATCCAGATGGCTATGAGTTCACGAGATCCTCCACAAATCCACATGT +AGGTTGGGGTAAGGTTACTGTACCCCCCCCCCCCCCCCTCCCCAACTCAA +AGGCGCAAGCACTCCCAAATTTTTTGCAGGTTCGTCTGTGGCGTAAGAAT +CGCTCGAAAATGCAATGCCGTAAGGACATTTGGGGACGCAATCGCTGCTG +TCGCGGTGTCGATTTGAATCGTAATTTCGATTTCCACTTCAGAGGTGAGA +GAGAGAGGGTGAAGTGGTTTTTCTCAAAGTGGACTTGAAAATTAAGGGTC +AAAGTTTTAAAAGAAAACTTTGAAAAAAACTTGGGAATTTGTTCACGAAT +ATACGAAGTTGCTGAAAATTTGTAAAACGTTTTGGAACATTCTAAAACTA +TTTGAATTTAAATAATTTTTTAAACATACCAAAAAATCATGGATGTTCTC +AAAATTCACGAAAGTTTCGAAAATTCCCTACAGTACTCCTGAACTAGCCC +TACAGTATCCTTAAAGTACCCTACAGTACCTATACAGAACTCCTGTAGTA +CCCTTACAGTATCCCTAAAGTATCCCTACAGTACCCCTACAGTTTCCCTA +TAGTACCCCTACAGTACTCTCGCAGTATCCATACAGTAGCCCTACAGTAT +CCCTACAGTACCCCTACAGTATTCCTACAGTACCCCTACAGTAGCCCTAC +AGTACCCCAGTAGCACCCTTACAGCATCCCTAAATTATCCCTAAATTATC +CCTACAGTACCCCTACAGTACCCCTACAGTACCCCTACAGCATCTCCATA +GTACCCCTATAGTAGCCCTACAGTATCCCTACAGTTCCCACACAGCCCTT +ATAGTGCCTATACAGAACCCCTATAATACCCCATACAATACACCTACAGT +ACCCCTACAGTATCCGTAAATTACCCCTATAGTACCCCTACAGTACCCTG +ACGGTACTCCTACAGCACCCCTACAGGACCCCTATAACATCCCTACAGTA +CCCCTGCAGTACCCATACGGTTTCCTTACAGTATCCCGTGAGTATCCCAA +CAATACTCTCACAGTACTGCTACAGTACCCCAAAAGTACCCCTACAAAAC +CCCTACAGTACCCCTACAGAACCCTCACGGTACCCTACCGTATCCCTACA +GTACCTGAAGCTCCAAAAATTTCCCCCACTTCCAGAAAGCGGTACAAGCG +ACGACCCGTGCTCCGAGATCTACCAGGGACCCTCTCCATTCAGTGAGCCC +GAGGCAAAAGCCGTACGGGACGCGTTGTTGTCGCAGAGATATAAGGGAAG +AACCGACGCCTATATCACACTACACACTTATTCACAGGTACTGTTAAGCA +GTTGAAAACTTTTTCTCGGCCACCGATATTTTCTCGGCCATCGATATTTT +CTCGGCCATCGTTATTTTCTCGGCCACCGAAATTTTTCGCGGTCATCGAT +ATACTCTCGTTCACGGATATTTTTCCCAGCAACAGATTTTTTTCTCGGTC +ACCGATATTCTCTCGGCCACCAAAATTTTGTTTTTCGTGGCCATCTATAT +TTTCATCGGCCATCTATATTTTCTAGGCCACCAATTTTTTCTCGGTCTCC +GATACTTTCTCGGCTACCAACTTTTTCTCGGCCACCAACTTTTTCTCGGC +CACAGATTTTTTTCTCGGCCATTAATTTTTGACGGCCACCGAAATTTTTT +CGGTCACCGATATTTTTCACGGCCATCGATATTTTTTGAAAATTTGATTT +GAGCAAAATATTTTCAGATTTGGATTCATCCATATGGTCACAAGAAGGAT +GCATATCCAGGAGATATTAAGGATTTGGTGAGTGATGACGTCATTCTTCT +TGAAACGTTGAAACTCAAATTTCCAGTACGAAGTTGGCAAAAAAGCTGCT +CAGGCTCTGAAACGGGTTTACGGCACAAAATACGTAGTCGGAAGCGGTGC +GGACACGCTCTACCCGGCGTCTGGAGGATCTGAAGATTGGGCAAAGCACG +AGGCGAAGGTCAAATTTGTGTACCTTCTAGAATTGAGGCCCGACGAGAAA +AGTAAAAAAAAAACGTTCAAAGTTGCTGAAAAAAGTTGGGCGATAATTTC +AGACTGGGATGGGTTCATACTGGACGAGAAGGAGCTGATCCCGACGGCTC +GGGAAACTTGGGAAGGTGTCCGAGTTGTGGCGGAAGCTGTGCTCGATCGG +ATAGTTGCCGGCAAGTCGACGGCTCCAAGAGGTAGGCAAACTTTGATGTG +CCTGCCTGCCTTGTGCCTACAGTTTTATTACGAGTAGGCACGGAGGTAGG +CAAGTAGGCAGGCAGAAATGTGGACCGATAGGCGTAAGGTTAAGGGCTTG +CGAAGCTTTTGCGTAGGATTGCGAAGAAAAAGAAAAATTTTTAAATTACA +GTGCGCACGGCCTAAAGGAGATGGCTTCTGAGGTAGTCACTACGATGCTC +CGCTTGCTCTGACCACGCCCCTTTCCTGGTTTCTCGATTTTTGTTTTAGA +AATTACACTTGTTTATTTTATGATTGTTGATAAATGTTTTTTTAAACATA +ATTTTTCTTGAAAAAACAACTTACAATTAGAGAAAATAGCAGCGAGCGGT +CGTGGCCGCGAAATGAAAAACTCGGCCACCAGCTATAAAATTAAAACGTA +TAATAATTTGGTGGTTCAGTTTTGCAGAAAAAAAATGGCTGGCCGAGTTT +TCACGTTTCTAGGCCATCTAGCAAAGTTGCTCTGAAGCGCTTTCGCGAAT +TCGTTTCCGAAAAAACTGTAGGTGAAAACTCGACCACGCTTTCTTAAAGC +CACGAAATTTTGAAATAATTTCTCGGTCACCAATTTTTTCTCGGCCACCG +ATATTTTCTCGGCCACCGATATTTTCTCGGCCATCAAAATTTTTCTAGGC +CACCAACTTTTTCTCGGCCATCTATATTTTCCCGGCCTCCGACATTTTTT +CTCGGCCACCGATATTTTTCGCAGCCGTCTATATTTTCTTGGCTACCAAT +TTTTTCTCGGTCTCCGATACTTTCTCGGCTACCAACTTTTTCTCGGTCAT +AGATATTTTTCGCAGTCATCTATATTTTCTCGGTCAGCGATATTTTCTTG +ACAACCCATGTTTTTCTCGGCCACCGATATTTTTCTCGACCATCTATATT +TTCTCGGCCACCAAAATTCATTCTCGGCCACTTATATTTTTCTCGGCCAC +CAATATTTTTCTCGGCCACCTATATTTTCTCGGCCACAGATTTTTTTCTC +GGCCATTAATTTTTGACGGCCACCGACATTTTTTTCGGTCACCGATTTTT +TCTCCGCCATCTATATTATCTCGGTCACCAATTTTTTTCGGTCATTGATA +TTTTCTCGGCCACCAATATTCTTCTCAGCCATCAATATTTTATCGTCCAC +CGATATTTTATCGGCCACCGAAATTTTTCGCGGTCATCGATATACTCTCG +TTCACGGATATTTTTCCCAGCAACAGATTTTTTTCTCGGTCACCGATATT +CTCTCGGCCACCAAAATTTTTCTCGGCCACCGATTTTTTTTCCTGGCCAC +AGATATTTTTCTCGGCCTCCGATATTTTTCTCGGCCACCGATATTTTTTC +TCGGCCACCAATTTTTTTCTCGGCCTCCGAAGTTTTTCGTGGCCATCTAT +ATTTTCATCGGCCATCTATATTTTCTAGGCCACCAATTTTTTCTCGGTCT +CCGATACTTTCTCGGCTACCAACTTTTTCTCGGCCACCAACTTTTTCTCG +GCCACAGATTTTTTTCTCGGCCATTAATTTTTGACGGCCACCGAAATTTT +TTCGGTCACCGATATTTTTCACGGCCATTGATATTTTTTTCGGTCACCGA +TACTTTCTCGGCCACAAATATTTTCTCGGCCACCGAAATTTTTCGCGGTC +ATCTATATTTTCTCGGCCTCAGCCATCGATATTTTTCTGGACCGTCTAGT +TTTTTTTTGGCGGCCGTGACCTCCAAATAGAATTACAAAAATAAACGATA +CATCTCATATTTCTGTGCAAATCCTCAAGTACCCCCCCCCCCCCCCCTAT +ATTTATTGTTCCAGAAGCCCCAAAAGCTCGCGGATTCCGATTCGGCGACG +GCACCGAAGGATCATGTTTCGATGTCCGACACGCGTGCAAACGATGGGTT +CAGGAACGGGAGGAGCTGTGCAGGACGGTGCCAATTTTCATGCGCGAGAA +CTGTGCCTACTCTTGCAATTTCTGTTGATTTTTTTGGTGTGCCACTTCTT +TTTTTCTGTACATATATCCATTTTGTCTTCTACATACATATATATATGTA +TATGAAATGTTTAAAAACCTAAAACCTAAAAAAAAACCAACCTAATACAG +TGTTCACGAGCGCCCCCCGCCGCCGGCGGCTCCTTATTTGCCTTGTGCTC +ACCGTGAGGTGGCTCCGCGCGGGAGCCCCAGTGATGAGCATCTCTTTTGT +CCACACGAATAATACAATAACACGGGCTCCTCTTCAAATATGACAAAATG +TCAAATTTTGGCGTCTCTCCTGACTTGGAGCTTCTGGGTCTTCAGACACG +TTCTTGTGTGTGAGGCGAGATGCGATTACCCTCTATATTATTGATTTTTG +GAGCATTTTTTGGTTTTCTAGGGGCTATTCATAATACAGATGATAATGCC +GGCTATCATAAGTAAGTCCCCAAAATACCCATATGGCCTAGAAATTCGGA +AAACTCTTCCATTTCACTTTTGGTAGGGACATTTGGTTTCTGTGCTAGCT +TTTATGCTGAACATGGTAATTTAAATTTCCTAGGCCAGTGTATATGTTTT +TCTAGGCTACGACATACAATTTTTTAGGCCACAAAATATTTTTTTTTAAT +TGCGGCTTATAAAAATGTTCCATAAATTATCTATTTTTGAATTACAGTCT +GAAAATTTCCATTTTTCAAGGGCACCTTTCTAGGTCACAAAAACGGATTT +TTTCAACATAGGCATTTTTCAACTTTTTCTAGGCCACGACAGATGGAAGA +GCTTTTCGTAAAAAAAGCTCGGCCACCACCCCACCTAACCAAAAAACTTC +CAGCTTCAAACTAATCCGAATCAATCCGGAAACTGAAGGAAGTGTGAAAT +ATCTTCGAAGTCTGTACGAGGATCCCTCTCCGTACGAGCTGGATTTCTGG +CAGCCTCCTACGAATATCGGTTAGTGTGACTTACCCACGTGATGTTAGCG +TGCTCCATTTTAGTTTGATCTACGTAGATCTACCAAAAATGCGGGAGAAG +AGACGCAAAGTTCTCAACTGATTCTGCATGGTTAAGAACGCACTGACGTC +ACATTTTTTGGGAAAAAAATTTCCGCATTTTTTGTAGATCAAACCGTGAT +GGGACAGTTTGCCACCACGTGCTTACCCAGGAAAATCTGAAATTTTTCGA +ATTTTCAGGAGCTATCGTAGACTTGACAGTTGCTCCGGCGGATGCTCCAA +GATTTGTGAAGGATTTGGAATCGAAGAAGATCAGCTACATCGTGGCGGTT +AATGATTTGTCAAAGTTCGTTTTTTTTTCTAGGCCACGGCCATCTGTGAC +GTCACTAAACTAGTTTCCAGAGCAATCGAAAACGAGCGCGGAAGCGACAA +ATTCTACAACCCGGTCGCTGGTTTTGCATACGACAAGTACAACAGTTTGG +AGGAGATTCAAACAGAAATGAAGCGGCTTAAGAAGGAATACCCGACCATG +ATCACGCTTATCGATATCGGACAAAGTCACGAGAATCGTACGCTTTTGGT +GATGAAGGTAAGAGCGAACGCGCTCTATTGAGTGGGCGTGGTCACGTCCA +CGTTTTGCAAAGTGGGCGCACGGTTCCTTTACAGCGGTCCCAGCTTTTGA +AGTTTTGAGTAATTTTTATATGGGAATTGCTAAAATTCATTTAAAAAAAA +CCCAAAAATACTTTTAAAGTCGTAACGAAAAAAAACAACCGTGCGCACGT +CTTGCAAAAAGTGGGCGTCCCAAAATTTAAAAACGACGGAATTTCATTAT +TTCAAACGCTCCAAACTCATTTTCACTACAAAGACATCACGCGCCTCCTG +GGAAAGCCCTTACTCATTTGCTGGATATATGACGTCACTAAATTAGTAGA +TGTGTGAAACCGCGCTCCATTGTACTACGCAACACCGATCCCACGTGGTG +TCAAAGTGTCCCATTTCGGTGTGATCTACCATAAATGCGGAAATATTGAC +GCGGATTTCTAAACAGATTTCGCATGGTTAAGAGCGTGCTGACGTCACAA +ACTTTTCTCTGCAAGAAATTCCCACATTTTTTGTAGATCACACTAAAATG +GGACCGTTTGACGCCACCTGCGCTCTCACTTTCAACACTCCCATTAAGCA +TTTCATGTACGCGCCAAAACGCACGGATTTCTGGCTCCCCTCATAAATTG +AAATGGAGTTTGAAAAAGAGTTTGCCGAACTAGGTCATTTTGGCTCGGCC +ATATTTGGGGTAGATTTTCGCCGCGTTGCGTGTCGCGTCGCGGCTCAATT +TAAGTTGTAAATCTAAATGTGTTTGTCCGTGTGGAGTACCCGACTTTTCC +ACGCGTTGCCCGGAATGCTATTGTCGATGGAGCGCGAAAAATTCAATAAG +GAAGGCCAGAACCCCGTGAAAACGCCTGCCGCGCCCCCGCCTGCCTTGCA +GCGCGGAACCCAAAAAGTGTCGGCCGCGGCGAAAGGCCCTCGCACTATGT +GGTGTTAATACCTAGCTATTGACTTATCGCCATAGTGTAAAAAAACCCAA +ACTCCCAAATTTTCAGATAACCGGAAAACGAAACCCCCTCGGCTCGAAAA +TCTCCATGTGGATTGACGCTGGAATTCATGCTCGAGAATGGATTGCTCCG +GCGACCGCAATGTACATTGCTCATGAGCTTCTCCTCGGGTATGAAAACGA +CGCGACGGTGGCTAAGCTGATGGACCATATCGATTTTTATATTCTACCTG +TGATGAATCCAGATGGATATGAGTATTCCAGAGAAAAGGTTGGTTTTTGA +ACCTTAAAAAAGCCTTAAAACTAGTGCTCTATTTCAGAACCGCATGTGGA +GGAAAAACCGGAGCCCCGCAAAATGCGCCCGGCAAACTTTCAGTACGGTC +TGCTGCTCTGGAGTCGACCTCAACAGGAACTTCGATTGGTTCTGGGCCTG +TAAGTCACGTGGTGTGAGAAAGTCTCATTGAAGTTTGATCTACAAAAAAT +GCGGGAATTTTTTGTAGATCAAAGCGAACTGGGACTTTCTGACTCTACGT +AGTTAGCTCAATATTAGACCTTAAAAAATAACTTAATCTTAAAAATTTCC +AGCCACCGGTTCGTCTTCGGATCCGTGCCACGACACGTATCACGGCTCGG +CGGCCTTCTCGGAGCCTGAATCACAAGCTGTCAGGGATTTCCTGGAGCAA +AACACACCCGAGGCGTTCATCTCGCTGCATTCCTACTCCCAAATGTGGTT +GATACCATATGGACACAGAAAACAGAGTTATCCACAGGACTACCATACTG +GACTGCGGCCATTGGCTCTGCGAGCAACTAAGGCGTTGTATGAGTTGTAT +GGGACGAAATATCAAGTCGGGACCGGCGCTGATTTGATGTGTAAGTTAAT +ATCCTTTTTTTCGAATTTTTAAATTTCTAAGGATTTCAAGAAATTTTAAA +ATTTTGAAAATTAAAAAAACAATTTTTTTCGGAATGTTTAAAAATTAAAA +AAAAAATTTTTTTAATAGATTTTTTAAAGCTTTTTTTTGGAAAATTTTGG +ATTTCTGAATAATTTTTTTTTCAGAACTGGAATTTTTGAGTTTATTAAAG +ATTTTTTTAGGACTTTTTAATTTAGACATTTCCTGAAAAATTTTTAAATT +ATTAAAATTTCGAACTTTTTCGATATTTTCGGATTTTTTTGCAACCTGAA +TTTTATTTTTTTTTTGGATTTATTGGAATCAGGGGTGGGCAGCAAGATTT +TTTCCGGCAAATCGGCAAATCGGCAATTTGCCGGAATTGAAAATTTCCGC +CAAATCGACAATTGCCGGAATTGAAAATTTCTGGCAAAACGGCAAATCGG +CAAACTGCCGGAATTGAAAATTTCTGGCAAACCGGCAAACAGGCTAATTG +CCGATTTGCCGAATTTATCGATAAATCAGTTTGCCAAACGGCAATTTCCC +CCACCCCTGATTGGGATCTTTGAAATAAAACAGGAAGAAATTTTAAATTG +TTGGAAATTTTGTAATTTTCCGGAATTTTAATAAAAAAATGCTGAAAATT +CTAAAAGTTTTGGAACTATGTAAAATTATTTGTGATTTAAAAAAAAACAT +TTTTTGGACTTTTCTGAAATTTCACGGATTTCTGGCTTCTCTCATAAATT +GAAATGGAAGAGTTTTTGCCGAACTTGGCCATTTTGGCTCTGCCATATCT +GGGATGGATTTACGGCGCGCTTCATGTCGCGTCGCGGCTCGTAAAACTAA +ATGCACTTGTCCGTGTGGAGTGAGGAGAAAATTCAATGAGGAAGGCCAGA +ACCCCGGATTTTTGAAATTTTGAAGTTCTGAGAAACCTGTGACTTCTACA +CTAACCCTGAAGAATTTCAGACGAAGCTTCTGGAGGATCACATGATTGGG +CTAAGGGGCAGCTTAAGGTCCCCTATGCCTACCTGATTGAGCTCCGACCA +AAGAACACCATGATGGGGTAAGCAGGACGATGGATCCTATCAAAATTAAA +ATTTCATTCCAGCCACGGCTTCCTTCTCCCCGAACGAGAAATTGTGCCCA +CTGGACTGGAAACCTTCGAATCCATCAAAGTGGTTGCCGACGAGCTTGTG +GCGCAATTTGTGGAACCTGTGATTCGAGCCAAGCTGACCACAACCACAAA +GCCCGCAATCCCACCGTATCGGCGTGGGTACTCTATAATCGACACCACAA +CTATGGAGCCGGTCGATGAGATTACACAGAAGCCTACAGAAGCTCCAACC +ACAGAGGAGCCCACCACCACTACTACGGAGGAACCTACAACTACCACAAC +GACCGAGGAGCCTACCGAAGCCCCAACAGAACCCAGTCCGACTACTGTCG +AGGCCACCGAGGCTTCTACAACACCAGAAGCTTCCACATCCTCTGAGACA +AGTACCACAGAGAATTCTGAACCCAACCAGAAGACATCCACACCAGTGGC +ACCTGAAGCAAGCACCGTTGAGGAGGTTGGTTTTTGGCTTAGTCTCAGGC +TTAGGCCTAGGCTTAGGCTTAAACTTAGCCTTAGTCTTAGACTGAGGTTT +AGGCTTAAGCTTAGGCTTCAGCTTAGGCTTAAACTTAGTTTTATCTTAGG +CTGTGGCTTAGTCTTAGACTTAGTTTTAGGCTTAAGCTTAGGCTCAGACT +TCGGCTTAGGCTTAGGCTTAGAATTAGGCTTAGTTTTAGGCTTAGACTTA +GTCTTGGGCTTAAGCTTCGGCTTAGGCTTAGTCTTAAGCTTAGGCTTAGG +CTTGCACTTGGGCTTGGCGTCAGTGGCGAGCGTGAGCTATTTAGGGTTTT +ATGCTTAGCTTTAGACTTAGGCTGAGGTTTAGGCTTAGGCTTCGGCTTAA +GCTTAGGCTTATGCTTAAGCTTAGGCTTGGACTTAGGCTCATACTTAGGC +TTAGGCTTAGGCTTGCACTTGGGCTTAGGCTTAGGCTTATGCTTAGTTTT +AGGCTTAGATTTAGGTTCAGGCTTAGGCTTATACCTAAGCAAATACATTT +TTGGAACTATTTGGATATTTGCCAGCGACCTATTTTATCAACTTCGAATT +TCCCAAACCATTCCAGGAAACAATCACACCGATCAAATGCGTCGACTACG +GTGACTACTGTCGTCTTTGGGGAGTCTTACAGCTCTGCTATCGGGATCAG +GTCTCCAAGTTGTGCCCCAAGACTTGCGACTCGAGGTGCTCTTTTTCGGT +GTGAGGCGCACGCTGGAAACTATCGTATGACAAATTGTCAGCTTATGTAT +GTATAAATGATTTCAAACTTTTGTAATTGTGGACTAAAAACTACATTTTT +CATGCCTGACTGACTGCCTGGCAGCGTGTCAGCGCCCGACATTTTCTGAG +TTCCACGACGGGATCCTGGCAGGCGTAGGCGTGCAATTTTTAACTCATCA +AATTGCTTGGAATAATTAAGCACTGCTACAAATGATGTCACTAGCTACAA +ACTACAAACTAGGAATTTCACACTACAAACTACAAACTACAATCTTAGCT +CTTCAAGCTATGAAATACAAACTAAGAAATTTTTATTTTTCAATATTTTT +TAAACGAGGTGTAATTATGGGCGTATATAGTAAAAACTGGAACGACAAAA +TGCAAATCACAAACTACAAACTAGAAACTAAAAACTAGAAACTGCAAACC +ACAAACTACAAACCACAAACTACAAACTACAAACTACAAACTACAAATTA +CAAACTACAAGCTACAAACTACTGAACTCTGAATAAATTTGAACGTGTAA +ATTTTGCGTTTTTGTGCAATGATTGGACGTATGAATTGCGAAACCATAAG +TGTTTAATCACATACTACAAACTACAAACTACAAACTACAAACTACAACC +TAGAAACTACAAACTACAAACTACAAAATATAAATTACAACCTTCAAGAA +ATTTGTATTTTTCAATATTTGTCGAACAAGTTTTAATTATTCGACGAATA +AAGTCAAAGCTGAAATCATAAAATGCTAACCACAAACTACAAACTACAAA +CTACAAACGGCAAACTACAAGCTACAAATTACTGAACTCTAATAAAATTT +GAAAGTGTACTCATGGTTTTGTGCAATTACTGGACGTATGAATTACGAAA +CCATAAGTTTTTAGTCACACACTACAAACTATAAATTACAAACTACAAAC +TACAAATGAAAACTACAAACTACAAACTGGAAACTACAACAGCAAACTAC +AAACTACAAACTACAAGCTACAAACGCTTATATAATTCGATTTTAGTTAT +TTTCTGTTTTTTATTTTTCCAAATTCTCAAACCTGAAATAAAATTACCTA +CACACAAATGTTTCTCATTATGCATCGAATGACTAATATTTTCCAATAAA +CGTTTTGAAAAAGTATGGAAGAAACAAAAAATGGAAAACAAAGGCGGAAA +TATGCATTTTTCTTCTCATTTTCTTGAAATTCGTGTGATTGTACTTATTG +ATTTTTTGTTGTTGTTAAAAACGTGGTAGGCAGGCATTCATGCCTACGTG +CCTGCCTACCAGTCGAATTCGAACCCGCAAGATGTCGGCCGCTCCCTTCG +ATTTTGGAAGTTTTTACTTATTTTCCTCTTCTGCTAACACATTAGACAAT +TATTATTCAACCCGTGTACACAATAGGGCGGCTAATAATTAGGTTGGCAG +GTAGAGGTGTACAGGAAACGTTTATAAGCTCTTTATTTACTACTGAGCTA +CCACTTATTTGGAGCCAATGCATTTTGTTTCTCAACAAGTTGGAGATTCC +AGAACAACCAAGATTTGGGCGGGGCTTATTTTGAGGCAATTTTTCAACTG +TACAGTAGATTCATATAATTTAAGTTTTGAAAACATTTAGGCGGGAATTC +AAACATTTATTTTTAAAACCATCTTGGCGGGAATTCAAATTCTAGTTTTT +CGAAAACACTTTGGCGGGAATTCAAAATGTTATTTCTTAACAACTTCCTG +AAATGCTCTAGAACCTTCTGGAATATTTGAGAAAACTCTAGAATGTTCTA +GAACCTTCTGAAAAATTCGAAAAAAGTCTAGAATGTTCTAGAGCCTTTTG +GAAAATTCGAAAAAAATCTGGAATATTCTAGAACCTTTTGGAAATTTTGA +GAAAATTCTGGAATGTTTTGGAACCTTCTGGAAAATTCGAGAAAATTCTG +GAATGTTCTAGAACCTTCTGAAAAATTTGAGAAAATTCTGGAATGTTCTA +GACCCTTCTGGAAATCCGAGAAAATTCTGGAATGTTCTGGAACCTTCTGG +GAAATTTTTAGAAAAATCCTGGAATTCTCTAGGACCTTCTGGAAAATTTG +AGAAAATTCTTGTCGCCAAAGTTTTGTGAAAAAATTTAGCTGGAAACTAA +ATAATTTTGTGAGAATTCAAACTTTAATTTTTCCAATTTTTTCGGATTTT +TTTTTTAGCTTTTAAGCTTTTTACATTTTCTATAAATTTTAGATTTCAAA +AAAAAATTGGCGAAAAATTTTGACCAAATTTTTTGGCTTTATAGCATAAT +TTCAAAAAGTTTAAAAAGTCCAAACTTTGCTCCAGTCCCCAAAAAAAAAT +TTGGTGGAAAATTCAAATCATGTTTTTTTTTCAAAAAATTTCATGGCCTA +GAAATTTCAGCAAAGCAGTAAGGCCGCCTACCTCCCTTCAATCCGAAAAA +TACCTAAAAATCAATCCCAAGCAATTAGTCTCATCAAGAAAGAAAAGAGC +TCTGCTTGTGCCCACTCCTCATCTCTTCTCATGTATTTGCTTTCAATTAA +ACATCTTACTCAGACAGCATAACGAAGGAAACAAAAAACGAAAGAATAAC +GAAATGAGCATATTTCCTTTATTTATTCCTTATTTATGTATTCAGGTAGA +ACAAAAAACCGGCTCGGCGGGTTTATTTGTCTGTAAATCTTATGCCTACA +CGCCTACCTACAGCGGCCCTGACTGCCTATTTGCCTAGTGTTTTTTGTCC +TTTTCGGTTGGTTTTTTTGAAAATTTTTCCTGATTTTTTTTGTCGTTTTT +TTTCCATTTTTCTGAATTTTTAGGAATTTATGGAAGTTTCTCTTTTTTCT +CCCCCCCTCGAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC +TAAGCCTAAGCCTAAGCCTAAGCTTAAGCCTAAGCCTAAGCCTAAGCCTA +AGCCTAGGCCTAAGCCTAAGTCTAAGCCTAAGCCTGAGCTTAAGCTTAAG +CCAAAGCCTAAGCCTAAGCCTACTCCTATGCCTGAGCCTAAGCCTAAGCT +TAAGCCTAGGCCTAGGCTAAAATAAGCTCGGCCCCTTTCTGGGCCAAAAT +TTCATGTACCTCCTAAAATTGTTTCATCCTAAATAGCCAAAAAACTTGGA +AAAATCTATTTTTCAAATTTTGTAAAAATGTCAAAGAAAAATTCTCAGAA +GTGCGGGAAAGTTTGACCCAGAAAGGGGCGGAGCCTATTTTTGGATAGGC +TCCACCTCTTTCTAGGTTTTTTAAAATCAAATTTAGGTGTTTCCTGGTAA +GTTTTAGTGGTTTCATTTTTTGTATTCATCACATTTCTTTTTCATTTTTT +ACATTTTTCTCATAATTAATTTTTGATCTACCTGTTGAACTTGACCCCGC +CCCCCAATCTTGGTGCCTCAAAGGAATCGGAGGAGTCTAGTCAACAAGGT +AGATCAAAAATAGAATATGAGAAAAATTTGCCATCGTGTTTTTCGATAAT +ACTACAGTTTCATACAATAATCCCACAACTTAAAATTCAGCAAATAACCA +GGAAATGGGCGTGGCTTAGATTTTCAAGGTTTCGTTGTTTTTTCCAGGTT +GTACTTCTGATCTGAACGGGCTCCGACAAAAAATGCACAAAATTCGTAGT +AGTGATGTGTGACGTCTGTTCTGATGACTCGAATTCATATTATAAAAGTC +GAAAAATTCCTTCCGAACGAACCCCCCATCATCCTAACATCATCCAGATT +TCTGCACCCGCCCGCTCATATAATCTGCTTTTCTTCACCCATAATCTTTT +TGCTTATTTCTCAATCTCCGTCTTTCTTGCGTTTTTAATTAATTCAAGAA +ATCAAGCAGTTGTGGTTAATGGCGTTCTACGAATTGCTTTTTTCTTGTAT +TATGATATTTTAATTAATTTTATTTATTTAGTATCTAGGCATGTACATGT +GCAAGTTGAGTTGAATAACCTGAAAATTTTGTGCCAAGTTATGGGCGGGG +TCTTTCATACATTTTTTAAATTGTTTTTTGGCTAAATTTGTATGTATTTG +CTTTGAAAGTTGTGTTTTTTTTCAATTTTCTTTAAAAAAAAACCAAAAAA +AAAAATTGGTATGTAGTAACGTAGGTAGGCAAGAAGTAGGTGTGTAGGTA +GGGACTTGAAAATAGCAGCTATGAATAGGTGCGGTATTTGAAGTTTAGGC +AGGTTCGTAGGCAGATAGGTGAGTAGAAAAATCATTCAGAAGACATTCAG +GTAGGCTCACTGGTAAACAGGCATAATGTAGGTACGTAGGTTAACACTTA +CGTACGAGGCAGTAGGCATCAGGGCTTTGTGGCAACCGGCAACCGGCAAC +CAAAAACCAGGTTGCCGGTTACCGAAAAATTGCCGAAATTTCAGCAACCA +AAAGTCGCCAGAATTTTCTCGAATTTTCCAAAAGGTTCTAGAACATTCCA +GAATTTTCTCGAATTTTCCAAAAAGTTCTAGAACATTCCAAACTTTTCTC +AAAGTTTCCAAAAGACTCTAGAACATTCCAGAATATTCTCGAATTTTTCA +GAAGGTTCTAGAACATTCCTGAATTTTCTCGAATTTTCCAAAAGGTTCTA +GAACATTTCAGAATTTTCTCAAATTTTCCAAAAGGTTCTAGAACATTCCA +GAATTTTCTCGAATTTCCCAAAAGATTCTAGAACATTCCAGAATATTCTC +GAATTTTTCAGAAGGCTCTAGAACATTCCAGAATTTTCTCGAATTTTCCA +AAAAGTTCTAGAACAAGTTGCAGAAATTTTCAGCGTACGGCAACTTCAGC +AATTGCGGGTTGGCATGTAGGCAGGCAGGCATGTTAGTAGGCAGTTTTGA +CAGTTTTGAAGTTAACAATCCTACCTGATGCACAAGAAAATGCGCGGCAA +TTGCGGACCGGCAATTCTACAAAATGAGACGGTTGGGCATGTTCTTCCGA +TATTTTTTATAAAAATTTAATGATAAAGTATAGAAAAATATTTGTTTTAT +TTGAAAATAAGTTTTATTTGGCTAGGAACAACCGAAAAAGTGCTCAAAAA +TTGTCGGAATCTTGAAAATTGCCGTGAAAATTTCCAACATTTCGACTATT +TCTGGAGATTTTTACAATTTTGTCTATTAAAAAAAAACAGTTACTTTCAA +ATAAAGCAAATATTTTTCTATATTGTGTCATAAAATTTTAATGAAAAATA +TCGGGGGAACATGCCCAACCGTCTCATTTTGTAGAATTGCCGCGCACCTT +GTTAGTTTATCCCCGTAAACCTCCAATCAATAATTGGCAGGAGTCCTTCC +GTCCGTCCAAAACGTCATAAAAACTGGAGATGGCAAAATTGGAGATGTGC +CAAGTTTGCTGGAGAGGTGGGGGGAAGGAGACAATCATGTTGTCTGCGTA +TCTCCAGACTCGTCTGCTATCTCTCTTACCCGGCAAATGGGACCTCCCCA +GAACGGTGATTTTGTCCTTTCAACACCTTTTTTGTGTTTTTGTTTGTTTG +ACACCTTTTTTTCACTACTTTGCGGGAATTTAGACTGATTTCTCATGGTT +AGAAACGTGCTGACGTCACATTTTTCGCGGGAAAAAAAACCCGCATTTTT +TGTAGATCAAACCGTAATGGGATAGCCATTCTAACAATTCACTGTTTTTT +TTCTGAACAAGTCTGAAGTTTTGAGTTCAGCACCCGCAATTCTGTCTCTG +ATAAGTTTGAAAGTTTCAAATTGAAACCTCTCAATTGTCGGACCAGCGCG +CTACTTCATCATACCCCGCAAATTGAACTGCTCGCGGCAAAATTTAAAAA +TTGATGCTTCTCCAGAAACATCCCACCAGAAACTACCTAAGACTGAACTT +CATTAACTTCTTTTAAACTTAAACAACGATCATAATACCTTAAAAAAAAC +GTTTGTACGTGGTGTCAGAGTGTCTCATTTCATTTTGATCTACATAGATC +TACAAAAAATGCGGGAGAAGAGACGCAGAGTTTTCAACTAATTTCGCATG +GTTAAGAACGTGCTGACGTCACATTTTTTAAGACAAAAAATTCCCGCATT +TTTTGTAGATCAAACCAGCCTGGCACCACGTGCGTTTGTAATCGTTCAGT +AGGTAGGTAGACACAGGTCAGTAGGCATAGGCAGGCGGTAGGTCTGAGGT +AGGTGTTGTTGACCACCACGTAGCAAAACATGCTTGACGACTAGCCTCAA +ACGGATAATCACAAAGTTCTACGTAGTTATGATTTCAAGCCGCCCAATGA +GCACATTACGCTGACTAAGCCACAGCTATAAGACCCTCCTCCGAGTATAT +TCATCACCAAACATCCAGTTTCCAGTTTTGCAGTCTTTTTTTCTGTTGGA +AAATAAAGAGAGTACATCTCATTAGACATTAATGAACAATTTCAAGTTCA +AAACTATAAATATAAATAAAGCTAGTAATTGAGCCAGTCCAACCAAATTT +CCCACAAGTTCATTATAAACCTTCCAATTTCCCAATCCCATGTACTCCTA +CTAATCAGTTTCTTTCCTTCCGCCCTACTTTCCAAATTCATTTATCCGCT +TTGCATACTTTTCGGTAATCTTGACGATTTTCAGAGGTTTCTAGAATTTT +TTATTTTTTCAAATTATGCAAAAATAAATTTTTTTTTGTATTCTCAAAAA +ATTGCTAAAATTGTCCAACTTCCCATTTCTCGGAAATTGACGTAATTGCA +GTTTTTTGAATTTTTTTGAAATTACTGATTATCAAATTTTGTTGGAAAGT +TTCCTTTCACCTACACACCTACCATATGCCTATCTGCCTAGGTGCTTACC +TACACGCCTGCTTCGTGCCTACCCGCCTACCTGCCTGGTTTATTTTCAAA +AAGTGCAAAGAGCCAAAAGTTTCGAATTGTCAAAATTGCTCGGAAATTTT +TTGAAATTTTGGAAAATGCCAAAGTTATCAAATTCCGCATAATTCCAAAA +TTGGCAGCCCGACATCATGCCTGCCTACATTCCTGTCTGCATATAGGCCT +GACAAAATGTCCATCAATGTATGTACCTATTTCATACCTATCTACCTGTA +TAAACATAGTTCATCTCTACCTCAAACAGCATGTTACAGGCTTTCTTCTT +ACCTACCTACTTCATTAACTGCTGTTTATTGACCTGTCTACGTGCCTACC +TCATGCCTGGCATGCCTACATACCTACCTCATTTATCAGATCCTCATGAA +GCCTATTTACAAGCCTACTTCCAGCCTACCTATTGTGTTGATTACGATCA +CCGTCTCTAGGTTGCCTAACCTGCCTAATTGTGTCTACCTACTTAGCTAT +CTCATGCCTGCCTACTAACCTGCTCAAGTACCTACTTCATGCCTACCGGC +CTACGTTCCTACCAACGTGCCTACCTGCCTGCTCACTAGCCTAAGTGTCT +GCCTACATAAATTGTCCGTTAGCACATCAAACTTTTCATATATACAAAAA +ATTGCATAAAAAGAAGATAATCATTAATAACAAGTGGGTAACTCAAAAAG +GAAATTGGTTTATCTCCCGGCTCCGAGAGCACCAGCACTTTGCTTTTCCG +TCACTACAACCTCTAATTTTGTTTTGTTCGAAGGACTTTTTCCGATACAT +ACTAATTTTTTCGGATCCGGTTGGCTCTGGATTTCAAGTTTTATAATAAC +TTGGGCTCCCAGTGACGTCACCAAAGGTGCAATTAGCCTTTAGTAACTAA +ACAGCTTGCTCCTAGTTTACTGCTCCGTCTTCTCAAATGAAACAGAGAGA +GAGAGACCCCTCATCGAAAATTCAATTAAACGCTAAAACGTAGGTAGGAA +AGAGAGAGAGAGGGAGACTCCTCCCAAAAAAAAAAGACAGGGGGGCTCTC +TCTCTCTCTCTCTCTCTTCGAGCGGCAAATCGGCTTCCGAATTCATTCAA +AAACAAATTATGTGCTCAACCACCACCACTACCACCGGCTCCAGGCGGGT +AATTACACTTGAAAACCAAGTACTAGGTTTCGTGGCCGGGCAAACTTTTT +CTTATTAGACCCCGGTGGAATTTTGTAGGTCTATTTAAAATTGCAATTGC +GCTATTTTGTACTGTTTTTTTTTTGACAAACTTTGAAGCTTGATGTCAAG +CTGGAAAAGCGCTCTAATGACAATTTTGACTGCAAACTCGCTCTGTTGAT +AAATGGTCCGTGTACTCCACGGGGGCAATTAAACTGGATTTTTTTCGATT +AAATAATATCTTGATTTTTTGCAGTTTTGGCTTAGGCTTAGGCGTAGGCT +TAGGTTTAGTCTCGGGCTTAGGCTTATGGTCAGGGTCAGGCTTAGGCATA +GGCTTGGGCTTAGGCTTCGGCTTAGACTTAGGCTTAGGCTTAGGCTTATG +CTTAGGCTTAGGCTTAGGCTTAGGCTTAGGCTTAGGCTTAGGCTTAGGCT +TAGCCTAAGGCTTAGGTTTAGGTTTAGACTTGGGTTTAAACAGGCTCAGG +CTTAGGCTTAGGCTTAGGCTCATGCTCAGGCTTAGGCTCAGACTTAGGCT +TAGACTTAGGCTTAGGTTTTGGCTTAGTCTTAGGTTTGGACTTAGTCTTA +GGCTTAGTCTCGGACTTAGGCTTAGGCTTAGGCTTAAGTTCAGGCTTAGG +TTTAGGCTTAGTCTGAATATTTGGCTTAGCCTTGGGCTTAGGTTTAGGGC +TTAGGCTTAGGATTAGGCTTTGGCTTGCCGGCCACCTGGGATATTGTGGT +TTGACAGGGGTAGGCTATCCAAATTTTAAAAAAATCGGCTACAAAATTAA +TATTTAGATGGGCCTATAAACCATTTCTCTAGGCCATCCTGCAAAATTTC +CAAGACAAAAAATTTCAGCCGTGAATAAAGTTTCCGTTTTCCTAGGCCAC +CTCCAAATTTTCCACGTCATATGTAAGGTATCGGAGGGTACTGTAGAAAA +TTATCAAAAATCAGCAAAGAATAATAAGCATTAAGAAAGTTTTTTGTGTG +TGTGTGTTTTTTGTGGGAGAAAACATTTCCCCTCTCCTTTTTTCCTGCCT +GCTCATTTGGATACCACCGCTAACCCAACTTGACTTTGATTACCCATTTC +CGTCCCCCTTCTTCGTCTTCTTCTACTCCGGGATAGAGAGAGAGCTCTAT +ATAATTACTTTCGTCTTCTTGATCCGTTACCGCCCATTGGCAATTGGTTT +GAGCTGCCGCCGGAGAATATATACATGCATGTCTAATTTTTTACTGGAAA +ATCTAGGAATTTTTACGTTGAAAATTTGGTTAAAAGCGTACAGATCGGCC +CATGCTGGTGATTGTACGAAATTTTAATTTCAAAGTATCTGCAGAGGAAG +CTAGGCCACAAGCCCCTACGTGGCCGTCAAAATTGAAAACTAGGCCACCG +ATTTTTTCTCGGACTTTTTATCTACTTTTGCTATTAGGCCCTGTCTTCTT +TTTTGCAACGTTTTTCTACTAGGTCACCAAATTTTTCCTAGGCCAAGCTG +CCTAAAACTCGGCCACCGATTTTTATTTGCGTCGGTCTGTAACATTCAGG +CTCCGACCAGCTCGACTTTTGCTACGTGGTCGCCAATTTTTCCTAAGCCA +CATCGAAAATGACTCGCTGTGACATTTGTACTTTTTTTCACACGGCCTAG +AAATTTTGAAAAAAACTCGGCCACTGATTTTATCGGCCTTGGAATCCTAT +GTAACTTTGTGGCATGATTTTTTATTTATTGTCCATCCAGTTTTTCGTCT +GAAAATTACAAAATGTTTTAGTGACCGTTGGAGAAAACTCGGCCACCAAT +TTTTTTCTCGGCCAAGTTACCAAATTACGGGATACCGTGTTCTCTATCAT +ATTTCTGGCTCAAAACATACTCTACTTCCGTGCTGCCTACGTGGCCGCTC +AATTAGAAAACTCGGCCATCATTTATTTTCTAGGCCATGTCGAAAATAGG +TAATGTAAATAGGCAAAGAAATGCGGTGGTTCAAGTAATTTTGGGAAATG +GATTTTAAAAAGTTGAAAAAATAAATCAAATGAATAAGTCTGAAACACGC +AAACCTTAAAGCTAGGCCATCAATTTTTTTCTCGGCCAGGTTGCCAATCT +ACGGGAAACCGTCATGTTTCTTGCTCAAAATTACACCCTATTCTCGAAAA +ATTTTTTTTACGTTGAGTATGCTCTCCTGCCTAGGTGGCCGCCCAATTAG +AAAACTCGGCCACCCATTTTTTCTAGGTCATGTCGAAAATAGGCGATGTA +ATGGGGTAGTTCAAGGAATTTTGCGTAATGAATAACCCCAAAAATGTTTT +CAATATATGGCCGCCAAAAAAGAGAAAACTCGGCCACCAATTTTTTCTCG +GCCAGGTAGGCTACCAATCTACGGGATACCGTGCGCTCCATCCTATCTCA +GGCTCAAAATTACGCCTTTTTTCTCGAAAAATTTTTGCATACTCTCCTCC +CGTGCGGTTCTTAATTAATGAAAGACGTTTTTTTTTCTCCTTGTCTAACG +AGTGTATATGCAGAATTCAGGATATGCGTCTTCTTTTCCGAGTTCTTGTC +TTCAAAGCGAAAAAAAAAGTGTGTGTGTGTGTGGACATAGGAAGACTCGT +CTTCGAAGAAGACGCTCATTAGCTCATTTCATCCACTCGCTAATTTCATT +AATTTTGTGCTTCTTCGCCTGCCTTCCGACGTGTTTACCCACACCACCAC +CACCACCACTCAGTACCTCCTCACTCACCGTGGAGAAAGATGTTCTGGAT +TTTTTTAAGCTTTTGAGTTTTGTGTCCTACCTCACTTTTTAGGCTCCACC +CACTTCTTGGCCTGTTATTTCGGTTTATTAGAATTTGTGTGGTGTACTAC +AAATTTCGATCCGAAAAATGGCAAAAAAAATTCAGCTGACTGAATTTTTT +TGAACTTTCAAAGGTTCAGTTTTTGCAACCTAAACCTCTCTTAACTTTAA +CTTTTCAAACCCCGCCCATATCTTGGCTTTTCCAGAAATCCGAGTAATTT +TTTGGGAAATTTATGGTACAACTAGCTATCAGAACAATTTTTACGGAAGT +TTTTTTAATTTTATTTTAATTTAAGTCTACTGAAATTGACCTAAAATATA +TTTTGAAAAATGCTTTGATCCCGAAATTGGCCAAAAAACGTTGATCGAAT +TTTTTTGAACTTTCAAAGGTTCCGTTTTTGCAACCTTTAAGCCCCGCCCA +TTTCTTGGCTGTATTGTATTCAAAGTTAAAAGTTGTTCATGTTGTACTAA +AATATGCAAGTATCATAAAAAATTAACTTTTTAAGCTCCACCCATTTCTT +GGTTTTTCTGGAAATCCGAGTAATTTTCTGGGACTGTTATGGTACAATTA +GGTATCAGAAAAAAAATCAAGGAAATTTTTTACTTGGATTTATGTCCACT +GAAATTGACCTGAAAATGTTTTGAGATTTTAAAAAACTTAAATATTTTGA +TAAAGGTAGAATTTGAATTCCCCGCCCATTTCTTGGCTGCAGTTCCCAAC +TGTCCAACTTTTTGAATATTTTCTCAAACTTCATGGCCTAAAATTCCAAT +TCTAGGCGATCTACCTTGATAAGAACTTACTCCTGAGTTATGTTTGAAAA +ACATAGTCCATCCGTGGCCTAGAAATCCAAAACTAGGCCATCAGATTTTA +GGTTCCTAGGCCCAATCACAAAGCCAGCAGTTTTAAGTTGTCAAATTTCA +GTGGCATAACCTCATAATTTAGACCTATTCTATTAGGTCAGCCTATTACA +GTTTCTGGCCTAGAAATTTTCCTTGAGTTTGAGCCCACTACCCCCACACC +CACATGGCCTAGAAACCCAAGCCTAGGCCATGTGTCCCCGCATTCTCACT +CACTTTTAATTTCTTAATCCACTTACACAATTCTCTCCCCCTTTTCCAAT +TTCACATCGAACCCGGGAAAGCTCATTAGCTCATTTGTTGTGTGTGTGTG +AGTTGAAGAAGCGGGGGGAAGGGGAGCCCCCCTTTTGAGTGGACGGGGAG +CTAGAATGGAAAAAGAGAGAAGGAGGAGAGCCATTTTGAATGGGTATGCA +ATTAGCAATCGATGCAAATTAATTAAACAAACAACGAAATGAAAGGGGGG +GGGAGGGCAATCGCCTGCCTTCACTCACATGTCCCCTGCTGGAAATGTGG +AAATGTAAATTAAAATTAGAAAACTAGCAAAAAAGTGCGTTTTTTATTCC +TACTTGTTAATTAGTTTAAATTTCTATATATGTATGGCTTTGACATGCCA +ATTTTGGCGTCTAAGGGTAGGTATAGGTGGGCGATGCACCATGTTAAATG +GTCGATGCATCATGCCAAATAGTAAATGCACCATTACACATTGAAAATTT +AGCATTATACACTCCATATAACTGAAATTCGGGGCCCAATCAATATCATG +CCGCCGACATCTCACGGACTCCGCGCGCCGCTATGTTTAACTCGCAGCGG +GCGGAGACAGCTGGCCGCGCCCACAACGAGTTAAACACAGCGGCGCGCGG +AACTCGTCAGATGTCGGCGGCCTGATATTGGTTGGGTCCCGAATTTCAGT +TTTATTATTTGGAGTGTATAATGCTAAATTTTCAGTGCACCGTGTTAAAT +GGTCGATGCACCATGATAATGCTAAATAGTCCATGCACCTTGTCTAGAAA +ACTCGTTGAGTACTGTACTTATGAAACAGTTTAGCAAAAAAAAATGTCAG +CCTGTTTGGCTCCGCCCCGAAAGTGGGCGGAGTCTAATTTCTTGAATTTT +TTTTTTTCAAAAATTGTTTTAAAAATAGCTCTGTGAATTCCACCTCAAGC +AACTATTTTTAGTGGAAAGCAAATTTTTTCAAAATTTTGCGCAAATGGTT +CTGAGGCTCCGCCTTGAAATTGGATGCTCTTAATTAAGAAAGAAGTGTTA +GTCCCGCCCCTTTATTGGAGGAACTCAAAACTGGGAGGAGCTTAAGAAGG +TTATAAAATTTTGCTAATTAAAACACCCAGCTCCGCCCACTTATATATTA +GTTGACTCCGCCCCACCCATTAAAAGTGGGCGGAGCTTAAAAATATTGAC +CACGCCCCTTTCTTGGGTAGTTTTAGCGTTTTTCATAGAGTCAATTTTCA +CGGCGGACCCCGGATCGATGCACCATGATTTGACGCGCAACCCAGGTAGT +ATGACGTCACTCGTGGCCGAACTCGCGGGGGAAATTTGTACTTACAGCAA +TAAATTTCGGTGTCTCACGGAGCACCTGCACACGTACCGATCATGAGTGA +TGCTTCCCACCACAAGACATACACATACACACACAAACCTACTAATTAAG +ACAAATTATAGAACAAGTGGGGGGAGAAGGGTAGAAATAAAAGTGAGCAG +TGCGAGAGAACGCGTTAGACGGAGAAGCTGCTCTGGGAGACGTGTGAGAG +GCATATAAGTTGGATTAGTTTACTGCTCTTTTCATGGGAAAATCAAGTGA +GCTCCAAATATTTAATCTGACTTTGATTTGGTTTGATTCTGAAATTTTTT +TGGGATTTGCAGAAAAAATACGTTTTTTTTTTGTATTTTGGAATCGCACC +AATACGCTGCGTTGCCCTCCTACAGTGCAACTGAGCCACATTTCTCTTCC +ATAACTTTTTTCTTAATCTCAAAGATTAAAACTCTGCAAAAGCTCAATAT +TTTATAAATGATAAGGATTAGCAAAGTGTGGGTGATCTTCCAGAGGGGGG +ATTACCATAATTCATTACTCATTTTTCAAGTTTCATCTGTGTTTTGTTTG +ATAACCTAGGTGACCTACTAGTTTACCTACAGGGCTGGGACCAAAAAAAA +AATTTGGACCAAAAAACAAAAAAACAAAAAATTGAAGTTTTTGAAAAACC +AAAAAAACCAAAAAAAAACAAAAAAAAACAAAAAAAACCAAAAAATTTCT +TATGCTTAAGTTGATTTTTAATGGGGTTATTCAAGTAATGTTGCAAAATG +TATTAAAATACATTATGACGTCACAACTGTGTTAAAATACATGTTTTAAT +GTATTTTAATACATAATAGTCTCGAGTCGAGACTAGACATGGTAAACACT +TTTTTGGATTTTTTTGGATTTTTTGGTCCCAAAAAACCAAAAAAAACCAA +AAAATCGATTTTTCGTCAAAATACCAAAAAAAACCAAAAAACAAAAAAAC +CAAAAAATTCTCAGCCCTGGTTACCTGTCAGCAAATAATTTTCCCCTATA +TACCACCACTTCAGGAGTAATCAACACCCCAAAACGGCAATTATTTATTA +TTCTCCTCACCTTCTCAACACTCCATAGCGCCAGCCAGCCTAATTTGATT +GTTTAATTCGATTCAGGGCACCCCACCACACACAAATTTATTAATTTTTG +TCTTTCTCGCCATTTTCCAATAATTTTCCCGGCTTGTGTTAGCTTTATTA +TAGAACTTTTTAAAGCGATTTCTTAGGAATTTATGTGTGACGTCATATGT +GGCCTAGAAATTTGACGATCTCGGCCGTCGGGGTCGATGAACCATGTTGT +GATGTATAGGTGGCCTAGAAAATTTTAGGCCACCATGATAGTCGTGGCCT +AGAAATTTGGGCGCCTTCAGGTGGCCTAGAATGTGAAAAGTGACGTGTGA +CCTAGTTCTTTTGACGGTCAGGCCAATTTTCCAATTCGATGCTTTTTTTT +TAATTAAGGTCCATATTCTGCAATTTTAGGCTCCGCCCATATCTTGGTTA +AACATTTTTTTTTTGCTTTTTTCTGCAATATTCATTCAATCCATCATTGT +TCTCAAAATTTGATGCACCATGATATGGCAGTGGCCTAGAAAGTGCGACG +TCACTTATGGCCTAGAAAATTCGAGGATCTTGAAATGGGAGAAAGTAAAA +AAGTAATCAATGCCCCCTGACGGTCGTGGCCTAGAAAGTTAGAAGATCTA +GAGGAGGGGAGTAGGAGATCGATGCACCATGTACAGCTACAGTGACGACT +CACGTGGCCTAGCTTTTTCGCAGGCCACGGCCATAAAAAAATGTATGAGA +TCGATGCACCATGTTTGTCGCGGACTAGAAATCGAGAATGTGGCGCACGT +CACGCATGGCCTAGTGACTTTTACGGTCAGGCCGAAACACAGGACGTGGC +CTAGAAGTCATTCTGTTAGTCTAAGACTCTGCAAAAACTCTCAAAAAAGG +TGAGCCTATGTGCCTGCCTACTTGCCTACATTCTTGCCTACCTGTTACTG +CTTTCTAATCTGTACCATGTTGCACCTACAGTACCCCCTTCCGCCCCCGT +AAAGACTTTTTCAAGCTCAAAAGTGCAGAAAACACAAAAAAACCAAGCCC +AATCTACAAAAGCTCATCTATTCGTGCTTGGATTAAAATGCCATTAAAAT +GATTAATTAAGAGATCGAGACTGGTTGAAAACTGATCGGATTAAAAAGCG +ACTGAGGTCCGCGAATGAGCTTTTCATGCAACTGATGAGCACATTTCCTG +TGTATGTACACTGATGATAAATCTCCTTATCTTTTCCGAGACATAACAAT +CTCAATGTTCTTCATTAGTTTCTTCATTTCCTTCTGAAATTTGTTGCTTT +GATTTATTAAAATCTGTCATTATGTAAGATCCATAATTCGTGATTCGAAA +TTTTTTACAGTTTTTTTTAAATATAGAAATTTGAATAACTTTCCTACCAG +GCTGCCCCAATACAGTTTGATCTACAAAAAATGCAGGAATTTTTTGCCCA +AAAATATGTGACGTCGGCACGTTCTTAACCATACAAAATCAGTTGAGAAC +TCTGCGTCTCTTCTCCCGCATTTTTTGTAGATCACCGTAATACAATGCAA +AAATGAGAGAAAAAAATTTGAAAAAAAACCTATCCAAAAATTTGGCATTT +TCAGATTTTTTTTTTCGAGCAGACCTAAAAATGTTTCGATAAATTCTAGA +TTTTTTGAAAAACTGTCTGAATTTTTTTTTTTGAGAACAATGATTATATT +TTAAAGATCAGTGTTGACTTGAATCATGTAGCGTAAAAAAGGTTAAAAAA +AAACCAAGATATGGGCGGAGCCTAAAATTACAGAAATTGGACCGTAAATT +TTTCAAAAAATTTTTTTTTTTTCAAAAACCGAAATTTTATTTTAATTTCA +AAATTTCCGCTTCGGGGTTTCTTGATTTTTTCAATTTCAATTTCAATTTT +TTAGATTTTTTCCCGTAAAAAACCCAAAATAAATTTACTGAAGCTTCACT +ATTGGATTAAAAATTTATAAGGTTCTTTGCGGGAAAATTAGTAAAAAATC +GAAAATATATTTTAAAAGAAAAATCAAAATTAATACAAAAAATTAAGGCT +TCCATGTAGGTTCGCGTGGTGCCAGGCTATCCCATTATTTTTTCTTTTTT +TTTTTTTTTCCCCAGGAAAGTGTGACGTCAGCACGTTCTTAACCATGCTA +AATCAGTTGAAAAGTCTGCCTCTCTTCTCCCGCATTTTTCGAAGATCATA +CCAAAATGGGGCACTTTGACGCCACGTGTAGACTGTAGGCCATAAACCGC +CTGCCTGCCTGATTTTCAGGCTACCTACGCCTGCCTGTCGCCCCAATCCA +CGCCTTATGCTAAAACACATTCATTTCTTAATTTGATTTTCACCAAAGTG +ATAAAAAAGATAAATTAAAAATTAGTACATTCTCGTCAAAATGCAAAAAG +AGTCGTAAGGCAGGCAGGTTTCAGGCAGGCCCTGGCTCACGCCTGCCTAC +GATGCCGGTTACTAAATTTCCAAAATACCTTATTTACTCTACAAATCCTC +TACCCCGACATTACATCTCAAAACTCATCCTATTAATTTCTGCCTTAGGC +GATAGCACTACTAATTGATAGGAAAAGACGACTTGTGTGTGTGTGTGTTG +AATTATTATACTAATTTGATTTTAGGCAGCCTCTTTCGTCAAAAAAAAAT +AACTTGTGTCACCACACCACACCTCCCACGTTTTCGGTGAGTGAAGCTTC +TGCTCGCTGTTAAGCAGATATTCATAGGGAAATCATCCGTAAATCCATTC +AAGTCCTCTTTATTTTCTTCTTCTTCGACCAAATTAGTATACAGATGTCA +TCAACTTTTTTGTTTCTTTTTGAGGGCCCAGGCTGTTCATTACAGTTTGA +TCTACAAAAAATGCGGGAAATTTTTTTTTGCTTTTTGCCCAAAAGAATGT +GATGTCAGCACGTTCTTAACCATGCAAAATCAGTTGAGAACTCTGCGTCT +CTTCTCCCGCGTTTTTTGTAGATCTACGTAGATCAAAACGAAGTGGGAAA +CTCTGACACCACGTGCTTTGAGAAAAAATTTGAAAAAAACCTTTTTAAAA +ATTTGGCATTTTCAGTTTTTTTTCTTTACAAAATTTTTTTGAACGAATCT +AAAAATGTTTCGATAAATTCTAGATTTTTTGAAAAACTGTCTGAAATTTT +TTTTGAGAACAATGATGACTTGAATATTGTAGAAAATCGTAAAAATAAGT +TTTAAAAAACCAAAAAAAAATGGGCGAAGCCTAAAATTACAGAAATTGGA +CCATAAAAGTTTCCAATTTTTTCTTCACAACTTTTTTCAAAATTGCTGAA +ATAAAAAAAAACTATTATTTAAATTTAAATTTCAGAAATCCTAATTTTGA +CGCCGATTTTTAAAATTATAAGGATTTGTTTTTCCTGCAAATTCTTTTTT +AGAAACTTACAGTTTGTCGGTTTGTCGATAGAGCGTGCTTATTCACCAGG +CTGTCCATTACAGTTTGATCTACAACAGTGCGGGAACTTTTTTTTTGCTT +TTTGTCCAAAAGACTGTGATATCAGCACGTTCTTAACCATGCAAAATCAG +TTCCGCATTTTTTGTAGATCAACGTAGATCAAACCGAAATGGGGCACTCA +GACACCATTTGGGGGGCCTAGAAACCGGTCCCTTTCTCATTCAAAGTGCT +CTCTGTTATCAAGTCTCCCACGCTTTATGCATTCAACTTTGTACAGTTCA +CACGAGCACCCACGACTTGATTATTCAACATTATACAGAACTAACTATTC +GAATCTCAACATGTCTTCTTGCCGAATTTAATCAACTTTGACTTCTTCTT +CTTCTTCGACTTCTTTATACAACTACTCATCTTCTAGTTGCCCCTCTAGT +TGCCTCACACAAGCAAAATTTCTTTTGAAATGAAATTTAAACGAATTGAC +TAGTCAAAATTTCATCAATTTTCTACTAGAGAGAAGCGACGAGATTGACT +TTTGGCAAATATAGAAGAGAGAGGCACCGAAAATGAATTAGATTTAACAC +AACAACGAGTAGATGAGTTGTGTGTGTCAACGATGATGATGATGATGAAA +AGAGCAGAGGAGCAAGCATGAGTGATTTCTGCGGCGGAGGTCGCGGCGGA +TGAGGAGCATGAAGAAGGCTAACGAGCTGTATGGCGATGCGGCTAAAAAT +TGATGATTACTGTAGATTTGGAGCTTTACACCGTTTAACTGGCTACGATT +ACATACAGACTTGTTTTAGAATATTCTGTATTTGAACCTAATAATACAAA +TTGCCGGTGTGCCGATTTGTCGGAAATTTTCAATTCCGGCAACTTGCCGG +TTTGCCGATTTGCCGTAAATTTTCAATTCCGGCAACTTACCCGTTTGCCG +ATTTGTCGGAAATTTTCAATTTTGGCAAACTGCCGGTTTGCCGATTTGCC +GGAAATTTTCAGTTCCGGCATTGTGCCGGTTTTCCGATTTGCCGGAAATT +TTCAATTCCGGCAATTTTCCGGTTTGCCGGTTTTCCGTTTGCCGGATAAC +AAAGTGTTTAGAGGGATTTTTTTATAAGAAGGAAACTCTTAAAACTGTGA +CTTTTTGAAATTGTTTCCCGTTTTCTTTCGATATTTTCATAGATTTTGCA +AGAATGCGTGCAGTTTTGCCAGTTTAGACGTAATTGAATTTCTGAAATTT +CAAAAAAAAAAGAGCAAAATCATAAATTTTTGACATTTGTCATTTTTTCC +GGTAATTTGGCGATTTGCCGGAAAAAAATCATTTACCGCCAACTCATGGT +ACGTCTAACCGCAAAAATTTTTTGGGAGCCTTTTCTGGCTTTCTTTCAAG +CTATCGTAATGTGACCACACACCTTTCTATAATTTTCAGTATTTACGCCA +ACCGAGAAGATCACAGTAGTTTAAGGTTATGGTGCAGTTTCTGGTCAATG +GATCAGGTCTCTTACCGCGAAAATTTTAATACCGTGAATGTGAAGCTTTG +AAAATTTTCTGCCGTAAATTGTCAATTTTCTGCCGCACTTTGTCAATTAT +CTCATAAAGGTCACTTAAGTTAGCCAAACCGTGAGCCAAAACTGCCCCTT +ATTCCTCATGCCGTCTTGGAGGAAATCTAGAGCAAACCCTGGTCCATATT +CGGACAAGGTCCCCTTTCTTCCCCCAAGCGTCTATCTAACTTTTCCTAAC +TACATATAACGGAATTAATAATCCTATCCCCAATTTAACGTGTCCTTAGT +TCTCTCCCTCCTCACCGTCTTCTAGTTTCATCCATGTCTCGCCTCATTTC +ACCCGATTGACGATCATCTCGGCTCCACACTTTGACACCATAGCGCGAAT +GAAAAAAAAGAGTGAGAAAAGGGGAAAAAAGTTGCTGCCCTATTATTAGG +AGTCAAATAGACACACACACACACACACTCAATTTTAATGGATTACGCAC +GATTTGACACCATCGACACCAATTTGGAAATGGGGGCACATAGATACATA +CACAAAAGGAGTTGTACTACTAGTTTTGGAACTAACATCATTCTAACATC +TTTTCATTTATGATTTGATTTATGCAAATGTCCTTGATTCGTGACTTCTT +GTATAGTAGTTGGGATAAGTATAATCATGGTAGTTGCTCAAGACTAAGCT +CTTTAGCAGTTTTAAGGTATGCCCCAGTTCGGGATTCCGGCTTGCTTTAA +GATTGTGTCAGGTCCGGCCCAGGTTGTCTAAAAAATACAGGTCTGCTCCA +GGACGGTATACCATCACTATATATAAAGCACGTGTCGTTCCGTCACTTTG +TAGTTTGATCTTTGATCTTTGGTCTTTGAACTCTGTAGTTTGTAGTCTCA +TCACACAAGAGAGGTCACATAGGCCCGGCCCCTTTTGTGACGTCATCACA +AATTGGCGGGAAATTCAAATTTTCTGAGAAAATATTTTGGCGGGAATTTA +AAATTTAATTTTTTGAAAACATTTTGGCGGGAATTCAAAATTTAATTTTT +CAAAAACATTTTGGCGGGAATTCAAAATTTAATTAGACTGTTCTCGGTTC +GGCCCAGATATATCCAAAGTCTTTCTCAAGACCACTCTAGATCTGTTCTA +CTGTTACCCAAGACTGCCGCTCCACACCCTGTATAAGTTGGCCCTAAGGC +TCCACTGAGTTTGATCTAAATCTTGCCAAGCCTGTATCATACCTACCTAG +GCCCACCACAGGTGCATCTCTCACAGATCTTCCACAGGTGTTTCTAAGTC +TGCATCAGATCTGCACCAGGTCTGTTTTTAGTTTTGCCTGGGTGCAGCCC +AGTAGTCTACCTCACTATCAAGCAGGTCCTGACACCAGTTCCTAGGTCTC +ACATTCAGGCTTCTAGGCGGATAGTCTACCTAGCTAAAGCTTCAACCTCC +ACTGTTTTAAAGGCTTCCCCTTTCTCAAACTTCAACTAACATTAATACAT +ACATATGTTATTAATTCGCGACTCGGCGACTAATAACAATAATTCAAAAC +CTCTTTCCTGGCAATGCTTCCGGTACTCAACACGGAGAGAGAGGTGGCAG +GTATCCGGTAATTTATTAACTCGAGCCGCGCACCCTGGCTCCCAGAGCAC +ACAGCCCCGATGATCGCTCATGCACGAGTACTCACGAGGTGGTGGTGAGA +GGTAGGTTGGAGAAAGCGGTTCTACCTTCACAAATTTCAATTTCAATTTC +CTCTCTCTCCGTTTTACTCGGGCCCCCCCCCCCCCCTTCTTTCAATTTTT +CGCCCCAAATTTGTGCTCCTTTAATGGAAAGTTTTTGAGTGTGTGAAAAA +GAGAGAGAGAGAGACACCTTCGTGTCACGCCAATTGCACTAATATGTGCT +CTCCCCACCTAGAGCTACCAAAATGTTCAATTCACAATTTCCGAACTAAT +TCGTGTCAAGTTTTTAGTTGGTCCTAACTCTTTGGCCTAGAAATTTAGAT +TTCTAGGTCATCCATCTAGATGCGGTGGCCTAGAATCCTAAGCTTTAGAT +GCCCAGGCCATCAGGCCTAGATTCTGAAATGTTGTGATTTTTCTAGGCCA +TCAACTTTGACCCAAAAGCCCTCTAATTTTTAGGCCATGAATCTAGGAAC +CTATCTAGAGGCCTGATCCTTTGCTTTCTTGCCAAAAACTAGAGATCAAT +GCCCTAGAAGCTCACAAAACTTTGGCCTAATAATTTCTAGGCCACCAATT +TTTTAAAGATAGCCTAAGTCATCAACCTAAAATTCACGGCTTAGAAATCA +TGAAATTCGCTTTTAAGGCCTCTGACTTCATAGCAGTGGCCTAGAACTCC +AACTGTCAGTAGTCTGTAAAAATGAGCTTTTTGGGCCTCGAACATAACAT +GGGACATGGAAGCCTAACATTTTTAGGCCACTGACTTTAAAGCAATGGCC +TAGAACCCCGTTTTTCTGTAGCCTAGACTATCATCTAGAAATCAATGGCC +TAGAATCCTAAAAATAATATTTTTCGGCCACCAAAATTTTAGGAATGGCC +TAGAAATCTATTTTGTTGTATTCTAGGCCACCATCGACAAAGGAATGGCC +TAGAATCCCTAAGAATGTGATTTTTAAGCTACCATCTATGTACCCTAGAC +CAATATTTAAAATTCAATGGCCTAGAAACTAGAATATTTGATTTCTAGGC +TACTAACTGTTGTGTGCTGTATCCTAGACTATCATCTAGACCCCAATGGC +CTAGATTCCTGAAAATGTGTTTTTTCGGCCTGAAATAGGCCTAACATTTT +CCAGTCCACCAACTCTTTTAAGATGACCTATAAGTTTAATCCTAGCTTGC +CAACTATAAATAATTGGCCTAGAAGCTCCTAGGCCACCATCTAAAAATCG +ATGGCCTAGAAACCTCCCATTTTTTGTATCCTAGGTCACCAACAACCACT +GATGCCCAAGACCCCGGCTTCTTATTTCATAATGAGCAGCAAAATGAGCT +ACCGTACCCCTTCTACACACTTCCTCCCTCTCCACACCCATTTCCCCTCA +TTTTCGTCTATTTTAGCGGCGGCGGCGCGGGATAAATTTGTGAGAGGTGA +GCTGCTCACAATAATGGGCTTGTTGTAATAGGCGGCAGTGGTAGTGGTGT +ATGGATAATTGTGAGTTCCCAAACACACAAAACTTTTTCGCCGCAATTGT +TATGCAACCAACATAATAATAATAATAATAATAAGTGAATGTGCATAAAT +GTGCTCTTTTTCGCGTCTCCACACCCGTCCTGATGGGGACGACGCTTCGT +TGAATAGTTGGAATAATAATAATAAATTGGAAAAATTTGGGCGCCCCCCT +CCTAACACCCTAACACACACGAAATTTCATTTCGCGCGCGCTTCGAAAAC +AAATTGAAGACAAGTTTTTCGCGAGACAAGTTATGATAATTTGTGGGATG +TGGTCCCCGCCGGCAGCTTCCATCAATTTGGAGCAAATTTGTGCTCTCGC +AAACCTTCTTAAAGAAGGGTTACACTAACAAAGTTTGTCGGAATCGATAT +TTTTGTAAGTGGGCCTGAACCCCCGGTCTTTAAATTGATAGGCATTCTAG +GGCTTCCATGGGAGACAGGCGCGGTTTTAGGGCCTGACGTCTGCCTCTCG +CCTCATTTCTGCACTATGGCGAGAAGCCAAAGACTTGGTGTTCACACTTT +GTGCCAGAGTGTCTCATTTCGGCTTGATCTACGTAGATCTACAAAAAATG +CGGGAGAAGAGAAGCAGAGTTCTCAACTGATTTTACATGGTAACCAGAGC +GTGCTGACGTCAAATTTTTTTTGGACGAAAAACTCCCGCATTTTTTGTAG +ATCAAAAACCGTAATGGGACAGCAGGGCTTTCCACCAGAATAATTCCACT +TCTTCACAATAAACCTATATATTTTTTGTCAAAATCTTGAGAATTTCCAG +AATTTCAACGAAAACCACTGCTAAAAGTGGTTCTTTCGCCGCGGCCGACA +CTTCTCGGGTTCCGCGCCGCACTATAAAGGAGGCGCGGCGCGGCAGGCGG +CGCGAGGCAGGCGTTTTGGCGCCTGCACGGATGCCCTACACCAAACTGCA +AACATTTCTAGTTCCGTTTTTCGGACTCTACGCCTTATTTCGGTCTTTTT +GTCACAACTTCGCTACACTGTGTTTTTCTCCACTTCTACGACTTTAAAGG +AGGGCGCATTTATGCGACTCACGTGATGTCAGGCAGTCTCATTGCGGTTT +GATCTACAAAAAATGCGGGAAACTTTCCACCAGGAAATTGTGACGTCAGC +ACACTCTTGCGAGATCAGTTGAAAAGTCTGCGTCTCTTCCCCCGCATTTT +TCGAAGATCAAACCAAAATGGGACATTCTGACACCACGTGGTGACTTAGG +ATCAATGTAGATCGTTTTATAGGCCATCCATTGATTTTTAGAGCTCGTGG +CGGGACCATTTTAAATGAATTTCCAAAATTTCCAAAAACCTAATTTTTTT +GTCAATTCACAAGTCCCCCGGACCCCCTCCATTGTCCCCCCGTTCACAAC +TTCCTCCCTCGTTAACCACCCATTTAGCGGGACAAAATTAGCGAATCATT +GACTCATAATCAAGTAAATCGGTGAATGCTCGACGAGCAAGAGGGGGGGG +GGGGGGTCTAATCGAATTACGGGGGAAGGGACCCCTCCGCGCGACACCTC +GCTGCCAATTAACAAGTCAACTACTTGGGGGGGGGGGGGGGCCTCCTCAC +AACAATGTTTGTATTATATTTCTTGTTATTATGATTATTATTATTATTAT +TATTAGAAATTAGTTAGTATGAATATATAATATCAACTAAAAACAAAATG +ACAGTTAATAATAATTGAAGTCATCCAATAGGTGCCAATTAGTCAAAGAG +AGGGGTCTGTGAAAACAGAAATAGAAGAGAGAAGGCGGGGGAGAGGGGCG +CTCTCAAATAATTAATCTGGGCTCCGTGGGCTCGTAAAATTCCGATAAAA +TGGCTTGTTTGTAATTAGCTAATTCACGGAATCTTTTTTTTTTGTTGGGG +TTTTACTATGTTTTTGAACAACTTCCTGTTATAACATGTTTCAAAAAAAC +TATTGACCTCAATGGGGACTTGAACCCTGGTGTTGAGATTGGTAGTCTGT +TAACCAGTACACCAAAAATTGCAATTTTTGATTTTTGTTTTGTCTATTTT +TTTGAATACAACGATAAGTAAGGATCATTTAGATTTAGAAAAAAAAAATC +AAATTTTGTAAAAATTTTCTAAAAAGTTATGAACATTCCACTTTCACAGA +TTTCTGGCTTCCCTCATAAATTGAAATGGAAGAGTTTTTGGCGAACAAGG +CCAGATGCGTTGCGTGTCGCGTCGCGGCTCGATTTTAGTTGTAATGCATT +TGTCCGTGTGGAGTCCACGACTTCCCCCGTTGTCCGCCAGACGATTGTAA +ATGGAGCGCGAAAAATTCAATGAGAAAGGCCAGAACCCCGTGCACTTTTT +TGTTTCTAAAACTGAAAATTTTGAATTTTGAAATTTTCCTTGGTTTTACA +GTATACAAAATCTTAGTTAGAGTTCGCAAAAATTATTGACACTGGTGGGG +GCTCGAACCCCTGAGGTGGCATCCCTGTTAGCCACTGCGCCAAAATTGCT +AAAACTTCAAAAAAAAAATTTTAGACCGTTTTTAAGGTTTTTAGACTTCC +GACTCTTCAAAAATTCCTATACAAAGTCTTCGTAAAACTAATGACTGGGG +GGCTCGAACCCCGGTCATCATATCAGTAGTCATCCCTGTTAGCCACTACA +CCACTTGCCCAGACGTGTTTCTGGTCTGTTTTACGGTGCAGCGTGGCCTA +AAAAAAACTAGTCAGCCACCAATTCGAAAAGTCCATAGACCAAGTTAATA +TAGAAATTCTTCAAATTCGAGCCAACCAACTAATATATTCACTAACCTAT +TTTCGCCGTGTTCTTCCTGTGTGTGTGTGTGCCCCTAGTTAGGCCTAGCC +CCCCCCCTTATTTTACTTGCTTCACAATTCAATTGGCCCCCACACAGTTA +CAGTAGTCATAATCATTTTGTGTCGGGTTTTCGACACATCGTTTAAACGT +GTTACCTCTCTCGGGGCCCCCTCTTCTCCTACTTCTTATTTGCTCAAAAT +TTGCGCGTCGAAATTTCTTCGTCGTGCGCTGCCCCTTCCGAATAATCAGA +TTCACCAATGTATAAAATAGGCCACGCCTCACTTTTTCCACACCCATCAT +CTTCCCTCAACTTTCCTCTTCTCAAATGCCTAATCAGCCAGTTATAACGA +GGGTGCTCTTCGTGTGGCGCCCCGCCCATTTTATTGCTCCTCTCACGCTT +CATTCTCACTTTTTCTGGGGATCCTTTTTTTTGCAGACCTAAAAATTTGA +GCTCAGCCATATTTTTTTTGTAAATATAATCAAAAAAAAATAGACATTAG +TCAAAAAAGTTGTTTTAAGCCTAAAAAAATTTTTCAGATAAAAAACGTTT +TTCTTTAACTTTTTTGAACAGTTTTTCAGTGAAATATTTTTAAATTTTTA +TTTTAAAAAAATGTTTGTTTATCTAAGCCTAAGCCTGAGCCTAAGCCTAA +TAACCCTAAATAGCGTCAGTGTGGGAGCTCACGCTCCCCCACTGACGCCA +AGCCTAAGCCTACGCCTAAGCCAAAGCCTAGGCCTAAGCCTGGGCCGAAG +CTGAGACCTAAGCTTAAGCCTAAGCCTAAGACTAAGACTGAGTTCAAGCC +TAAGCCTAAGACTAAGCCTGAGTCTCAGCCTAAGCCTAAGCCTAAGCCTA +AGCCATTTTTAAAAAGCTTAAAAATTTTTGCAATTACTATTTTTTCTGAA +AATCTGTAAAAATATCGAATTTTTAGAGTTGCTATAAAATTTAAAATAAT +ATTCAAAAAATAATCCGAAAAAATTAAATTTTTTTTCTCAAAACGTAGAT +CATACAGAATCAGCTTTAAAAAATTCAAACCATTTTTCGTTTCAAAAATG +TTCAGAAAATTTGTAAAACTTGAGTTTTGAAGAAAGTTTTCATAAACTAG +CTCTTGAATAGGGTTTTTAAGTAGGCGTTAAAACGCTTGCCTGCCTGACC +TTCAGGCGACTTTCGCCTGCCTGCTTGACTTTCAGACGTTCTCTGCCTGC +CTGCCTGACTTTAAGGCGACCTCCGCCTGCCTCTCGCCTCAATCCGAGCC +TTATCTCTTAAAATGTTTTTCATCAAATTGATAAAAATACGGAAATTACA +AAATTTTGTAAGTTCATGCCAAAATGCAGGCATCAGGCCCTGAGGCCACG +CCTGCCTAGGCCTCCGACTTTCGTTCTACAATTGTCGTTGCCGTTTCCTT +TATCCCATTTTCATACCCCAAAGTTATGATTTTTCTTACACCCCTTGCGC +TGCTGCTGCTTCCGCTCCCGTGATCTTATGAATCAGTAGTCAGAACTGCA +GGTGTGAATCTCCACAATGATCACCCAATAAACGCGAAACACCATTAGAA +ACAAAGTGTCCCCCGTTTTTTTTTTCTGAGAGTGTCGGCACCCCCCTAAC +ATATGCGGGGAACATTGTCTCAAGTTTTGTGTGCTCCTCCAGCTCCTCCA +TATGGTTACCGTAGTCCCCTGTCAGTGTTGTGTAATTGTTGTTGTAACTA +TCCCCCGAATACCTCTCCTCACATTCATTCACAACACAGGTGTCAACTCA +ACTCTCAGTGCTCATTTGAAGTTTATGAAGTTTAGCTTCCGGGGATGGCT +CTCCTGGGCTCCCCGCTCCCTCGAAAATTTAATTAATTGCAAGCACATGT +GGTGGTGGTGGGGCGAGCTCCGCGCTTCCCCCCCCGCCCCGATTCTCTCT +CTCTCATTTGCGACTCATTAGATTTTAAATCACTCTCAAACTTTCCACTT +TTCCAACTTTCGCGTTTTTTAAATTTTTTTTTCACTTTTTCCTCACAAAT +TCATCCAAAAAAATTTCCAGAGTTGCCCCGCCCATATATTGGCTAAATGG +TAGAATGGACGTGGCACTTTTGACAATGGATGACAACAACAAACCGCCGA +CAAGTTCAGAGTCTACGGAGAGTAGGGGTGAGTTTGGCCGGGGGACCGAT +AATTTTTAGGCCAAGACTTCTTTTTTTTTAAATCATTTTCAAAGGTACGT +TACCTTCACGTGGTGCCAGGTTGTCCCATTGCTTTTTGATCTACAGAAAA +TGCGGGATTTTTTCCCCAAAAAATGTGACATAAGCAAGTTTTTAAATCTT +TTCTCCCGCATTTTTTTGTAGATCTACGTAGATCATACTCTGACACCACG +TGTACCTTGAACCTCCTGGCTTTCTGAAACTAGAGTTCGTCGCTAGAGCG +CATTTGCATTGTCGTGGTATTTAATTTACAGTACCGCCAGAATCATGTCG +CGCCGCTTGTAGTCTAGTGGTTAACACGCTTCAGCTCTAAACAATAGGTC +CGGGGTTCGATTCCTTGCAAGATGAAATTTCTTTCTCAAGATGGAGGTAC +ATATGTGCTTTAAATGTGCACTAGAGATAGCGAAACTGATACCATGGTCC +GACATGTACAGGGTTCCGCGCGCGCCTAGGGCCTCCAAAACGCCTGCTGC +TCCAGTTTTGCGGCGACCTCCGCCTAACTCGCGGCGCGCCTTCTGTATAG +TCACGGCCTTCTGTCTTCCCTCATAAATTGAAATGGAAGAGTTTTTGCCG +AACTAGGCCATCTTGGCTCGGTCACATCTGGGCTCGATTTTAGTTGTAAA +ACTAAATGTATTTGTCCGTGTGGAGTGCACGACTTGCCCACGCGTTGACC +GGCGGGCGATTGCCAATGGAGCGCGAAAAATTCAATAAGGAAGGCCAGAA +CCCCGTGTATAGTGCCGCGCGTGGAGCCCGAAAAGTGTCAGCCGCGGCGG +AAGAACCACATTCCATAGGACTAAAAAACTAAAGTTCTCTCTTTCAGAAA +CATCACCACCAATGATGCCAGATTCTGCGGAGATAATGCGACTTCTTACG +GATCCTTCGACGGCTCAGATGTTTGGTAGGTCATAGATCAAATTTTTTCC +GCAATTTCCCGAAAAATTTCAGCCAACGAAAACACAAAATGTCAGCTGGG +ACGGATTCTCGCCGCGTCAGGCTTCGACGAGGCGAGCCTCTCCTCCTCGT +TTCCGTTCGATCCGACGCTCGGCGCGTTCGCCGACATTTCCCAATTCTCA +TCACTCCGAAATTCGTCGAAAACGCTGAAATGTCCAAAGTGTAATTGGCA +CTACAAGTACCAGGAGACCCTTGAAATTCATATGAAGGAGAAGCACAACG +ATGTGGATGTCAAGTGCATGTTTTGTGCCGAGAACCGTCCCCACCCGAAG +CTGGCTCGCGGTGAGACGTACTCGTGCGGTTATAAGCCATATCGATGCGA +TTTGTGTCGGTACAGTACGACGACAAAGGGCAATTTGAGTATTCATATGC +AAAGTGATAAGCATCTTCATGCAGTGCAAGAGCTCCCGAATAGTATAGGT +AGGTGGGGGGGGGAGGCGAAGGGTACTGTAGTTTTCGTGGTGGGACCCAA +GAATTTGAATTTCACAAGAATTTAACTGTTGAACTCGAACTTTTCGATTC +TAGGGTCTCGCCACGCAAAAACACTTACGCCAACTTTTTTGAACCCTAAA +AAATGATACTCAAATTAGAGTTCTCAGTGAGCTAAATTCGATTTTACAGT +ATTTTTTTAATTTGACGTTCTGTGGGTCTTACACTCAAAGGGTACTGTAG +TTTTCGTGGTGGGACCCACTTGTCCAAAAATGGACGTGATTATATTTGGA +TTAAAGCCCCTAACTTTTCTGATTTTTTTAATAAATTGGTTTTATTTTGA +AAAGACTGAGAAAATTGAGTTTAGATCGAAATCAATAAAAATCGCGTCGA +GACCCAAATTTCCCGGGTCTCGGCACGAAAATGGGGGAGAGGGTACTGTA +GTTTTCTGGCTTCTCTCATAAATTGAAATGGAAGAGTTTTTGCCGAACTA +GGCCGGCCATATCTGGGGTAGATTTACGGCGCGTTGCGTGTCGCGGCTCA +ATTTTTGTTTTAAAACATGAGGAAGACCAGAATCCCGTGTTTTCGTGGTG +GGACCCAAATGGGTCATGATTATACTTATTTGAAAGGCCTTAAGCTGAAT +TCAGATTTAAGATTTTTATTATTTTAATTCAAAAATATCCTGAATTTTCC +AGAATTCGCAGCCAACTTCGCGTGTGGTGCTCCAGTTAGCCGAAGTAGTC +CAATCGAAGAGTCCGACGGATCCCTGGTCTGCTTGATCTGCGGTATCTTC +GCCACCGAATCTATCGCCGAAATGATGGAACACGTGGAACAAGACCGTTC +CAGGACGTTCCAAGGAGACGTTACCGTCCTAAACGGGAGCTTCCGGTGCC +ACTTGTGCCCGTATAATACCACATTGAAAGCCAATTTTCAACTACACACA +AGGACGGATAAGCATCTGCAAAAGGTTCAAATTGTGAGTTTTGTATTCCC +CTCTCATCATCTTCTTATTTTTATTGTGTCGCCCTTTTCCCATTTTTGTG +TGTCTGTGTGTGTATGCGCACCGATTTGCATATACACATAAATTCATGAT +TTAATAGCGTAATGAGTAATATTTGCTTTGCCAAGGCTAACAGTTTTCGT +ATTTTAAAGCCTTTTTTCTACAATTTTTGAAAAAAAAATTCAAAAATTTG +AACATTTCAATATATCCCTAGAAAGCTCAAAATTGTAGTGAAAATTTTGA +ATTCAGTACGCTGAGACCTTTAAAATAAGTATAATCATTACTAGGTTTTG +GTTCTGACGCGAAAGTGTTCGTGGCGGAGCCCAAGCAAGGCGTAATTGAT +TATAGGGTCTTTGCACGCAAAAAATTTAAGGTCCGGTAGGCGTGGCGGGG +AGTTTTTCTTGAATTATTTTAGTTTTTCATAAATTTTCAATTTTTTGGAA +TTTATTTTTCTCTCCCTTCCCCCGCGTAGTCTGAGCCTAAACCTAAGCCT +GAGTCTAGGCCTAAGCCGAAGCCTAGGCCTAAGCCTGAACTTAAGTTTAA +GCCAAAGCCTGAGCCTAAGCCTAAGCCTAACCCTAAGCCTCAGCCTCAGC +CTAAGCCTAAGCCTAAGCATAAGGCTAAGCCTAAGCCTAAGCCTGAGCCT +AAGCTTTAGTCTAAGCGTAAGCCTAGGCCTAACCCTAACCCTAAGCTTAA +GCCTAAGCCTAACCCTAAGCCTAACCCTAAGCCTGAGCCTAAGCCTAAGC +CTAAGCCTAAAGTCTTCGTGTCGAGACCCAATTATACTCAATTGAAAGCT +CACAATGAGCTGAATTCAAATTTCTCAATGAAAATGTGAAATACTTTGAT +TTTACGGTCGGTGGCTTTACTCCCCCCGAACTTTGGAAAAAACTCTGAAA +TTTTTTTTTCATTGAATTCCAACCGGTTTCTACTCTCTCCGGCTATTAGT +GTATGTGTCTTTCACTTCTTTGTTCTCGTCGGGTTGTTCCCAATTTCCAA +CCCTCGTTTTACTCATTCCCCCTCTCATTTTCCCAATCCAATTAGTTATA +CCTATGGGGGGAGGTGACGGTGATGATTCTCGCCATTAAGACCCCCCCCC +TCTCCTGGGCCGATTGAGATGGAAACAAAACAAAACTGTCAATTTGAATA +AATTCGCAATTTGACGCGGGCGGCGCAGCGGGTTGCCGAGAAGGATATAA +ACTGGGGAACTCTACTTAGCCCGTAAGGTGTCGGCTGCTACTATTTTATC +ACATCTCAATTTTTACGGAATCCCGTAAGGCGTCGGCTGCTTCTCTGTTT +TTGCATGTTTTTACAATTTCGGCCCGTCCGATGTCGGCTCCTTCCAATTG +TTTTGCAAAACAATTTAAAATTTGAGCCAGCAAGGTGTCTGCTGCTCCGG +CTAGATTTCCAATATTTAAATTTTGCACAATCCTGTAAGATGTCGGGTGC +TTCTCAGTTTTTACAAAGTTTTTTCTCAAATACGGCCCGCGCAGTGTCGG +CTGCCACCTCGTATGTTTCATCAAACTTGAACGATTCGAGCCCGTTAGCT +GCCCGCTGCGTCTTTTTTTTTCTCAAAATTCCAGCCCGTAACTTTTCGGT +TGCTGAAATAAACAAAATTTTTACGAAATTTTAACCCTTGAAGTGTCGGC +TGCAAATTAAGATACCTACTCATTCAGATTCCCGTCTCTCCCTCTCCCTC +TCTCTTCATTGGGGAGTCCTTCTCCCGAGAGAAAATGAATAAAACTGTCA +ATTTGAATAAATGCACACAGCGGGAGATGTTTTTCTGTGAAGACTCCACC +GTCTTCTTCGTCTTCTGCCTATTAAGCGATGATCCTTTCACTTAACTGTC +GTTCCCTCCCCCTCTCCACATACCAATAAACCTTGCCAAAAGGTCGAGTA +TGCTTTGCCCGCCATCGTGCCGGAGGTCTTTTTTCTAGACTATGATGTTT +TTTTTTGGAAAAACATTTTTCTATGGATCGATGCACCATGATGCCGAGTT +GCCGAGTTTTCAATTTAATCAATTTTGAGATTTCAATTTTAGTCCCTTGT +ACATATTAATTTTTTTGGAATTAACATTAAAATTTGATAAACTGTAGCTA +CAACTATTTATTTTTGTGCCAAATTATACTATCCAATTTGACTTGTCTCT +TGATGCACCATGACTAACTTTCTAAGATCTCATATTTTGGCCTAAAATTA +TTCTCAAATATTTAAATTTAAACTTTAATCCTATTTTTTTTAATTCCTCG +GAAAAAACCCCGTTAACAGAATTCCCAATTTGGCCTCCAGGAGCTTTTGT +CGATGCACCATGTTCAACATTTTTTCAAAATTTTGCTCATTTTGACCTGA +AATTTTGTCATTTTTTTATGTTGCAGGGTTTTTTATTTTGTGATTTTTGC +GATGAAAACCACATTATGTGGATGAAAAAAGTTTGGAATCTAGTTTTCCC +GTTAGGCTGCCATATTGCTATGTAGTTGATGCACCATGTCCAGCTTTCTG +AAATTTTACTTTTTACATTTTTTGCTCAGTTTGACTTTAAAATTTTGTTA +GTTTGACCTTAAAATTTTGTCAGTTTGACATTAAAAATTTGTCAGTTTGA +CCTTAAAATTTTGTCAGACATATAATTTTTTTGTGTTTTTTGGTTAAAAA +ATTGAAGGTTTTTCGCGATATTTAACACATTACATGGAAAAATTCTTTTG +ATCGAGTTTTCTAATTTGTTCGTGTAGCAAGTCGATGCACCATGTCCAAT +TTTTTAAAATTTTACTTTTTTTAAATTATTTGCCCAGTTTGACCTTAAAA +TTTTGTCAATTTTACCTTAAAATTTTGTCAGTTTTACCTTAAAATTTTGT +CAGTTTGACCTTAAAATTTTGTCGGTTTTACCTTAAAATTTAATCAGACA +TATAAACTTTTCTGTTTTTTGGGTTTAAAAATTGATTTTTTTTGCGTTAT +TTAACACATTAAATGGCAAAAAATCATGCACCATGTCTAACTTTTTCCAA +AAACACTTGATTTTTCCCTAAAAGTTTGTCAGAATTATACATTCCTTAAG +TTCAAAACCGTACGGAAAAATTAGAACCCATGTCGATGCACCATAAGCTT +TGGCTCGCTGAGCTTCGTTGCTGCTCCTGCATATCTCTTACCCCCTCTCG +CCGCTCTCAGAACATTGACCAATTACTTGGGCGGAGCGCCGTTTTGAAGT +CGATTGGCTGATGAGTGTCCCGCCCCGCCCCCCGTCCCGCAAATTTTCGG +AGGAACTTTGCACTGCACACACACACACACAAATACACAAATACACGCCA +ATTATTGTCGATTTTTCTTGAAAATTTCTTCATTATTGTATATTGACTTT +TTTTGTCGTCCTCTAGTCTTCCGCCATCCTCGGCGTCTTCATTCGTTTCA +TTATTTCATACACCTGTTACCACTTAATTCGCCTATTATCCTCCTCATTA +CCTTCATTTTTCGGTCGGCGTTGACGTCGACATTGGCGCCCGAAATGAGA +GGAATAGGGGGGGGGGGGGGGGGGTATAGACGACGGCGACTACGACAACC +ATGATGACGATCATTTTCGTTCTATTCATCTAATCCTGCTAACCGTCGAT +TTGTCCCCCTCCGTTTTTCAACTTACCAAAACTTTTCGGTTACCTTGGGA +ATTGGTCCCCTTTTATAGGCGCGTAAATTGACAACATATTTTGCTCACTT +TTTCGAAGTTTCACAATTTTTAGTTAGTCCGTTTTTTCAATCTGGCCGAG +TTTTCAATTTTCCCCGGCCAGGGAATTCTAGTAGTATTCACATAATTTTT +CTTATTTTTGGTTTTCTAGGCTACGATAAAACTCCCTATGCCCGGCAAAA +CTTCTAGGTCCCCCATTTTACACCTGACTTCCTACCTATCTGAAATGCCT +ACTTGCCTTTCCACCTGACTAAGATGCCTACCTACCTAAAACCTACCTAC +CGAACTATAAGATGACTCCTTACCTACATAAGACTTGCCGATCTGAAATA +CCTACCTACTAAGACATGCTACCTAGCCTACCTATCTAAGATACCTACTT +ACATATAATGTACCGACACAAGACTTGCCTACCTACGCAAGGCATGATTT +TTTAGACCTACCTACCTGTACTTAAGATTTAAGACCTAGATCTTATTACA +TAAGAATTTTCATCTTAGATACCTACTCAAGATCCTACCAAAGATGCATG +CCTACCTATGACCTACTTACTTAAGATACCTGCCTACCCAAACCATACTA +CCTAGCGCTAGCGCCCACCTACCTAAGATACCTACTTACCTATAATTTAC +CTACACAAGACTTGCCTACCTACGCAAGGCATGATTTCTTATTGTTAGAC +CTACCTACCTAACTATAAGTACTTAAGATTTAAGACCTAGACGTTATTGC +ATAAGACTTTTTGTCTTAGATACCTACTCAAGATCCTCTTACCCAAGATA +TACCTAGATGAACTTCCGACCTAAGATGCCTACCTACCTAAGCCCTGTCT +ACCTAAGACCTACCTATAAGAACTTCCTACCTAAAAACACCTACCTAAAA +TAAGCCTTCAAGCTCAAAAATTTTTAGAGCATCTGCATGAGATAAGGCTG +AAAAATATTAACGAATAAGTGGAGAGCGATTAAGTTCCTGCCCTCTTAAA +GTTCGGAAGTAGGCGCCGCAAAACCCCATTACTATTGCTCATCGAGCCAT +TTCCGGTACTCAGTACTTTTTCATCCCCCACTCACTTTTTTCTCTCTTCT +CGATTTGATTTGCATTCTTTTTCCCCTCGAATTTCGAGCTGAGCCCGGTT +GATTTCCCGCACTTTCTCAATTCTTTCCTTAAGCCACCCCACTTGACCAG +GAGCAGTTCATTCAAACTCGCACTTAATCATATTATGACCCCCCCCCCCC +ACCCCTCTTAATTTCCTAATCCCCCTCCCCCTCCTCCTCATAATGACCAA +AAGACAATTGCAGGCGAACCATCTTCGTGAGGGAACCCGTCCGCACACGG +CAATCTACCGTCTGTCGAGCACAAAGACGAGCGTTCAAGTGTTGTGTCGA +CAGTGTCAGGAGGTGATTTCCAGTGGCGAATCGCTCCGCGAGCATCGACA +CTTTCACGCCCTGCTTGCCGCCAAGCAACGCTTGTGGCGATGCAAGTTGT +GTAGGCTGGAGTTCGACTCGGTTCTTCTTGCAGCTGGTGAGTCTTGATAG +TTTTTTTTAGGAAGAAATTATATCTGTAAGTCCCCCAAATATAAGGCCCC +CATGTCCATCTTAATGAAGCTTCTGACTTAATTTTTACTATAGATTTTGA +ATTTTGTCTATCGGAAGAAAAATTAGGTTCACCATGATTTTTCGATTTTT +TCGATGCACCATGATTTGTTGCTAGAATAGAAAACAATTCGTGGCCAAGT +TTTTTAATTATTTCTTTTTGTCGGCCAAGTTGCAAAAGTCACCTTTTGTG +TATGGATGCACCATGACCCTTTTCAGTGGATTTTGGCCCAAAAATCGTCA +AAATTTTGAATTTTTAAAAATCAAACTCTAAATTACCTTTTTTTTTCAAA +TTTTCTTAACAAATTGGAGCATTTTAGAAAGTTCCGATGCACCATGATTT +TTGCTAGAATTGAAAAAAAAATTGTCGGGCAAGTTGCAAAACTCACATTT +TATGTATCGATGCACCATGACCATTTTCGAAATTTTGCGGTATCTATTCA +AATAGTGCACATAGCCGTAAAACATTGGTGGTGGCCTCGTTTTCTCATGC +TTCGGCCACGAAAAAAAAAATTCTACAGTTTTTTGGATCGATGCACCATG +ATTATTTTTCAGTGAAAGCCTGTTTTTGCTTAGCTCAAAGGCCGGCTTAA +TGTTCCCACAAAACGTAATCCAAAAATTTAAATTTTACCGTCAAACTTTT +AGCCTGGCGTACACTAAAAATTGTAAAGTTTCAATATATCAAAAATATTT +CTGGCCTATAAAATTAAACCTGAAAGCTTAGAAATCCAAGATATTTTTTA +AAAATTGATGTCCAATACATGGGTGTGCAGCAAATCTTAAAGTTTGCCGT +GCTTAACAAACTCCGAAAAAGTGTGATATTTTTTTATGCTTTTTGGAGCA +CCAAAACTACTGAATTCGAGCTTAGCAAACGGCAAATTCGGCATATTTGC +CGCACACCCCTGGTCCACTAGTTAAGCAAAACCCACCGCTGAAAACTAAT +CAATTGGTGTTTGAAACGGTGTGTGCTACTACTAGAATTTCGCTGTTCAA +AATCTCTTTATTAAACACTTCTTTGTGTCTATTCTGCCCTTTCCTCCTCC +CCGCGCAAAATCCACTTGTACCTCTAATTACGGTCAACAAGTTTGCGGGT +TTAGTTTAGTGAATGAATTGTGAAATGAATGAATGAATGAATGGATAGGT +AGGTGTGTATGTATGTGGAAAAGTGCGACACACACAACCAGCCGTCCACC +ACCGCCACTACCACCAGCAGTAGCAACAGCCATCAAATATAATCCTTGCA +TTTCATTATTTCATAGTGTCATAGATAGTAGCAGGCCCGAGGTCCGAAAG +GCCTTGACAGCGGTTTATATTCATTTCATTTGACACCCCCCCCCCCCCCC +CCCCCTCCTACTATTCTTCTTCTCTACCACATACATTCACATAGTCAGGC +CATTCATGTTTTTCACACAATTTTGTTAATAAAGCCGCTCTGGCGCCACC +TCTCAGTAGTCCTTCTTCTTCATCATCATCACTCCCTCTCCCTTACCAAT +AACCAAAAACAGTCATTGATGCGTATCGGAATTATAATCGCGCATTTATT +GTAGGACACATGACTTGCCACGAGCCGTCCACTTGGCCCAACGAATCACT +TGGTGAGTCTCTTTACAGTGAATCTTGAAATTTGTGTTGTTTTGTTGTGA +AATATTGTTAATCGTTTTTTTATTAGTATCTCCAATTAGGCCAAAACTAT +AATCGTGAAAGAATAGTTTCTGCACATGGTGCATCGACCTGATATATCTC +ACTTTTTTTGGGCTTCATCAAAAACTTGTTAGCTAATTATAGTTAAGCAT +CACATCTATTGATATGTTCAATTTGAACAGAATATGGTGCATTGCATTGT +ATTGCAACTGAAACAATTTTATTGGCGCATAACATTCAACTATGTAGTTT +ATGTACATCGATCTTAATTACCGTATTTCCTCTATTAGTATTGCACCCGA +CTAATTAACTTTGAAACGTTATATCTCGGTTCATATTGAAGATATCAATA +TATAATTAACTATGAAATAATAGCAAAAGAACTAACAAATATTTTGTTAG +TTGACCATTTTTGAATAGGACAAACGATTACGGAGATATAAGCTGTCAAA +AGTGGACAATGGGGTGCAATACTAATTGTGTAAATACCTTATATCTCGGT +TAGCTTTGGTTTTAACAAAAATTTTAAACTGACAAAATATAAGTCATCTA +AATGTCTATAGTTTTGTAGTTAACAATTTTTTGATATCTTTAAAATGAAC +TAGCGATTTTCGACATGGTGCATCAACCTCAGTTACCTAGTCTAACTTTG +CTGGCTTATTTCTTAGTTTTAGTATTAACCGTTCATTTTAATATACTTTT +ATAGATAGTTAACAATTTTTTGATACCTTCAAACTGAACCGGAGCGATTT +TAGACATGGAGCTTTGATCCTATCTTGCCTTAATTTTCAAGCTTATCTTT +TTGTTGTAAAAAAAATTTAGACATGGTGCATTGACCTGAACTACCTGATA +TATCCTACTTGATATATCTTCAATTTTAGCAAGTTTTGGAAAAAAAAAAT +TTTAATGAAATCGATTTTTTTTTTCAATTTTCAACCTTCCGGCCAAAATT +TTCAATTGAGTTTTGACCTTTTTTTTTAACCCGGCAAAAATCTAAAAACT +CCCAAGAACAATCTCCCACAAAAAACTAAAAACTTGTGAATCTAGATTAG +TCAAAAATTTGCACCTTCTTCCCATGGGATGGCTCAATCGAACCCAGTTG +ATGAATCGGCCTGTGGGCCGCGAGCAGTCGCCGCTCTAATTTGGAGCATT +TGTATATATATAGCGAAGAATTTCACGTGACCAATGAAACCAATAGAAAT +CCAATCATATTTTTGTTTTGTCTCTAAATCATCTCTTCCCTCTCTCTCTC +TTGCCCCTCCTTAACCAATCAGAGACGCACACACCGCACCGCCCGTCCGT +CATCGTCATCGAGTTCCGCAATTAGTCGTCTATTCTCTCCGTTTTTGATT +GGAGATGGAGCGAAAATGAAAGAGAAAGAACGAGAGAGAGAGAGAGAGAT +TAGCTCTTAGATTCATTCAACTAATCTTGAAATAACACACACACACACAT +ACACAAATGAATTATCTTCATTTCAAATCATGGAAATCAGCAAATTCGCT +AGAAATTTCATGGAAGGGGAAGCTTGAATTAGGAGCACTACTTGATACCG +AGTTGGAATTGTAGTCTTTTAATATTTGAAGAAATATCACAATTTTTCAT +TTTAAACTTCTAAAAATATGCAAGTTACTTGATGCATCATGCCTCTTTAG +ATAACGTTTTTTTTAGGCTTAGGCTTAGGTTTAGGCTTAGACTTAAGCTT +AGGCTTAGGCTTAGGCTTAGGCTTAGGCTTAGACTTAGGCTTAAGCTTAT +TCTTGAGCTTAGGCTTAAGCTTAGGCTTAAGCTTAGGCTTCGGCTTGGGC +TTAGGCTTAGGCTTAGGCTTAGGCTTAAGCTTAGGATTAGGCTTAGGCTT +AGGCTTAGACTTCTAGATACTTTCTCAAACCACCAAATTACTGTATATTC +TCTATCATAGATACTACTTCCATTTGGCAGCTTATAACTCAAGTCTTTTG +TTAGAGATATCACTAGCTTTAATTACAACATTATAGGTTAATAAATACAC +CATATTTTGTTAGTTTAAATTTTTTGATTAACCTAACGAGAACCGAAATA +TGAGTAGTCAAAGATTATCGATGCACCATGGTACTACACTTTACTTTGCC +GGCTCATAACTCGGTTTATTTTCAGGATATCAAAAAGGTTTTTACTACAA +ACCTATAGAGAAACATATAATAAAAATATTACTATTTTGCAAAATTTTGG +TAAAACTAATAAGAATTGAGATATAGGCCGTTGAAGTTGCATGATGCAGT +TCAAAGCCTACGGTTTTAGACTTATAGCTTTAAAAGGAGGTACCGTATTT +CCTCTATTAGTATTGCACCCATTGTTCAACTGTGACATCTTATATCTCAG +TTACTGTTTGTTCTAATCAGAAATTGTCAACTAACAAAATATTTCTTAGT +TCTTTTTCTATCATTTCTTAGTTGACCATATATTGATACCTTTAAAATAA +ACCGAGATAAAACGTTTCAAAGTTAAGTAGTCGGGTACAATACTGATGGA +CGAAATACGGTATTGAATATTTTGATCTACGTAACTATTTGATCTAAAAT +CTCGATATCATTTTAATTTCTGATCCATAGCTTTGCGTTCAATTATCCTG +CCTAAAAAATCTCTAAAAATAACGTCTACGAGAGCGGGTGAAACAATAGG +TGGTTAGATGTAAGTTGATACCCCTGAAGGGCATCCGGGATCCCCGCGCA +AAAACAGTACAAACATGAGATGTACAATCCCCCATTTTTTTGTGGTCGCT +CTAAGTTGAAATCGATGCAAAGACATTGTCAATATAGCCGTGACGCGACG +GGCGGGTGGGCTGGCGCGCTCGATGAATCAAGATCGTTTTGGGAAGAGAG +GGACTGAAAATATTAAAACAACATTTGCATTTTGTAATTGTTTGAATCAT +CATCATCATCATCATCATCAATCATATCATAGAGGTGGCGAAACTTTTCC +TGCTGGGAATGTGTCTCCTTTTTAGTACAGTCAATAAATGTGTCACTTTT +GAACGGGGGAAAAGCTTTTCTGCTTTTCTTCGCTCTCCACCTCCTTCCCT +CCTCGCTCATAAATTTCCATTTTTCTCTTTACAAACATTGATTGATTTCC +TTATTCTATTGACTTTCTGATATTAAATTGATCATCTACTAAAAGGTATA +TATATATATAAGGTATATCAACTCTTGGAGCCCATATCTTTAATGCTCAT +CAAACTCCCAGTTAGGACTGGGAGTGAAGTACTGGGTGGTGGGATAAGAA +AGTTAGGATGTGACGGGTTTCATGCTCATCTCGAGCTGCCAATCTTGCAC +ATTAAGCAATTGGCTCATTGGCATGCTCTTGTCCTCTGGGCCTGCCGCGG +AGCCATCTCCCGTTTGATATCTGCTCCCGGGGGACGTTCCCAACTTGGCC +GGAGAGAGGCGCCGCGAGCAGCACAAGCTCAACCAAAACTTTTCTAATGT +AATGGGCCGGAATTAGGGGAAGAACAAACGAAGGAGAGGGTGGGGTGGAT +GAGGAAGAGAAAAGAGCACCACCACCAACTGATTTGATGGTTGGAGAGGA +GCTCGTTTTGTTTTCTTGTCGTCGTAAGAAGAAGAAGAGAAGCGCGCTAG +TTAGTGGTTAAACGAATCAAGACCCGAATGTGATAGATGTGCCGCCAATT +TAGCCAGTAGGATTTTTGCTGATTTTGCACACATTCATTTTTCGCTTGCC +TAGACATTACATCCCATCTTATTCCGCTTCATTCATTGTCGTGATTTTAA +GATTTGGGGGACCTAGGAGCCTCTACAGTATTATTTAAATTTCACTTTTG +TTTGGATTTTGAAAAGAGATGCGTCATACATATTCATTTTAAGGCACCTA +TGTATACAGTTCAAAATTTTGGAACATTCCTGGAATAGCGATCTTAAAGC +GGGAAATATAATGACATTATCTGACAATTTTTTCGAACACCTAAAAAACT +TGAATAAAGCCAATTGAATCGTATCCCTAATGGTTTGCAAAGTTGGGCCA +TGCTCTGAATAGCTTAGAGTTCCATCCTGACCTGGAGCTCGCCAGAAGCT +GAACGCACTGAACTTCCAGTCAAGCCTCTTTTGAATAAGTCTAGTTGCAG +ACTAGAAGCCTTAACTACCTGATGAGATTTTCACCCTTCTCTCATTAGTC +TATTTCATTAAGCATGAACTTACACGAGAATTCCAATTACATTACCATGT +AACATTACTTAAGATCCCATTTCCACTTAATTTGTGAAAATCGATACGGT +TTTATAAGAAGTGTTTTGTCCTTTTGCCTATAGCCTAGAGCAAAAATACC +CAAATTATTTGGCAGATCATGTCTCCGACAATATTCTGATCGATTAAGAA +ACTTTTGCAAATCTGCTCGGTTTTTTAGTCTACATTCGCCCCCAGATAAC +TGTTCTCGGCAAGTTGCCGATTTGCCGATTTGCCGGAAATTTTTCTTTTC +GCCAACTTACTGATTTGCCGGAAATGTTTAAGGGATTTTTTTGAAGACGG +AAAATTTTTTCATTAGATATGTTCATAGAATTAGCTTGCATTACAAAATA +GGTGTATGAACATATTCAAAGGATGCGTACAATTTTGCCGATTGAAATTG +AAATTCTGAAATTTCCAAAAATAAATGTGCAAAACCACAATTTGCCGTTT +TCCGGCCAATTCGGCAAATCGGCAATTTTCTGGGTTGCAAATTTGCCGGA +AATTTTCAATTCCCTTTATTTGCCGGGTTGCAAATTTGCCAGAAATTTTC +AATTCCGCAAATTTGCCGTTTTTCCGGCCAATTCGGAAAATCGGCAATTT +TCCGGGTTGCAAATTTGCCGGAAATTTTCAATTCCCTTTATTTGCCGGGT +TGCAAATTTGCCAGAAATTTTCAATTCCGGCAATTTGTCAGTTTGCCATA +AATTTTCAATTCCGGCAATTTGTCGATTTACCGGAAAAAATCGTTTGCAC +CCACCCTTGATGTGAACGCTAGCACTACCTCCTAGTATTAAGGCTCCAGC +TAATTTCTATATCATGGCTCCCAACTATACGTTCCTCCCTCCTCCCCCTT +CCCAATTTCGAATGGGCGCCAATTATTGCTCAATTCCCATCAGATTGGGG +GGGGGGGGGGGGGGGGCATTCGTACAGTGTCAACACGAATAATAATTATG +AGCCCTCTTCTATGCCCAAAGCGCGGCGTCTTCTTCTTCACCCCTCTAGG +AAGTTCTCATAATTAGCATTTTGTAAGACTCGGTGTCCCCCTTTCAAGTA +TCTCTGGATGATTCCCCCCTCTCGCACACACTCTTCCCATTTTTTTTTCA +CAATAATCATAATCATCAAGTTGGACACCAAAAAAGCCATAAATTCGATT +CCGGTCGAATCGAGAGAATTGGAAGAGAGAGAGAGAGAGAGAGAGAAAGA +GGAAATGAGACAGAGGGGTGAGAGATGGAAAACGAACAAGTGTGATGGTC +GTCCCCCCCCCCCCCCCCCACGGGGCCGCTCCTCATTCCTATATTTATTT +ACAGCTCTCTCTCTCTTTTTCTCTTCAATTTCCATCTCTCTCTCCTCATC +TACAGTAATCGGGGAGGGGCTAGTGATAAGCCTCTCCCCGCCCCCCCCCC +CCCGGCGGTTAGGTGTCAGAAATTAATCATAGAGGCCACGCCTCTAATGT +TTTCGGAGCATCATCCTTAATTCTTTAATTCATTAACCTTAATTCATTAG +TCTTACACTGAATTTCATAACTAAACTTGTTGAAAAAACTTCTCAAAAAA +AAAAGTTTTGGCGGCTTAAGAAATGGCCTAAAATTAGTTCGACTTTTCAA +GCGGCTGGAAACTAACTTTTTTTGAAATCCCCCCTAATTATGGGTCTACA +ACGTAAATAAAAAAATTTGGTGACCAAATTTATTTAGGCCAAATCTGGTG +TTCAAATTTTTTAGGCCAAATCTGGTGACTAAATTTTTTAGGGCCCAATA +TGGTGAAAAAATTTTTTAAGGCCAAATCTGGTAACCACAATTTTTTAAGG +CCAAACTTTTTAAAGACCAAATTTTTAGAGCCAAATCTGGTGACCAAATG +TTTTTAGGGCCGAATCTAATGACCAAATTTTTTTAAGGCCAGACCTTTTA +AAGCCCTAATTTTTTAGGGCCAAATCTGGTGACCAAATTTTTTAGGCCAA +GTCTTGTGATCAATTTTTTAGGCCAAATCTGATGACCAAATTTTTTAGGC +CAAATCTAATGACCAAATTTATTTAGGGGCCAAATCTGGTGACCAAATGT +TTTTAGGGCCAAATCTGGTGACCAAAATTTTTTAAGGCCAAATCTAGTTA +CCAAATTTTTTAAGGCCAAATCTGGACACCGAATTTTTTAGGCCAAGTCT +TGTGATCAAATTTTTTTAAAAATTTTTTTCAAATCTGGTGACCAATGTTT +TTTTAGGGCCAAATTTGGTGGCAAAATTTTTAAGGCCAAACTTTTAGGAC +AAATCTGGTGACCAAATTTTTAAGGCCAGAATCCACAACTTTTTTTTGAG +AAATTTTGAAGAAGTTTCTAATTCACCTAATTCATTACTCTTTTTGAATT +TAATCTTCGATATTCCAGATTTCAAAACGATGGAGGATGCGGCGTACGCG +TGTGCTGGCTGCGAATTCACCACAAGTGAGTTTTTTTTGTTGTTTCCTTA +TCAGCATCAGCTCTATGCCTTCTCCCCACCCCACCCCGCTCCTCCTGTAA +CCTTCAATTGCCATTCATTTCACACAGTAATAGTACCACACAACACCCTT +CCATGCCTTCAATTTGACTCATTAATAGCCCTTTTCACCATGTCTTTTTT +CCCCCTCTTTCTCTCTAATACTCAACCTTTACCTACCTACCCGCAATTTA +ATTGGCAACTAATTCGGATACATTCGGACGGCTCAAACGATGCGCGTCCG +TCGTCGGGTGCCCGAGACTGGGACCGGTTTTCAGAATGTCAGTGTATGTG +TGTGTGTGTCTACCGGCTGGCAGGCAGCCACACCACTGCGATTCGTTAAT +TTATTGTGAGATGATGACTGTACATTATTTTGAGTGCTGCTGGTGCTGGT +GGTGTGCATCATATAATTATAGCCTAGTTGATAGAGATATATCGACACAC +ACACACATACACATTTACTTTCAATTGCTTCTACCCAATCTTTTTTCTTC +CATTCTCCAATTCCTCTCCTTGCTCCCATTTCGCCCTTTGATATTCACCG +CGTTTCGCGTTTCGCACTAATTTCGCGCCCGGACCGGTGTGCTCGGCGCG +GCACGTCTTTCATCTTTTTGCCTCAATTTCATTATGATAATAATAATCAT +AATAATTATGCGAGACACGGCAAAGACGCGCTCAAGAAACTGATAAGTGT +GTATAGGTCGGGTCCTTTTGAGAGAGAGAGAGAGCGAGGTGTGATAAGCG +CGTCTAATGAGCATGGGCAATCATAACATTCCTATCAGCATATCAGCATT +AAAACATTTTTTTCAGAAGACGTGTCGGACTTTGAAGAGCACGCAAAAGG +ACACGAAGAGGAGGCGGGAGCCTCTTCACAGAAGGCGTGTGCGCTGTGCC +TTGAGCCCACCGACGACCTCGAAAAGCACCTTGTCGAGCAGCATCGCATC +GCCGAGAGCGCCATCGAAAAGTTCCTGCTTACTGAGAGAACAGCCAAGTG +AGTGTTTTTTTTTGTCTTCAACTCAAATATCTAGTAGTAAGTCTTATACT +TGGCTTATATTTCAGCGGTTTCAAAATATTTTTGATAGCTTTAAAAACCA +TAGAATAACAATTAATTTGACAACAATTTTGTCGTTTGAATTTTTTGACA +AAAATTTTGTCGTTTGAATTTTTAAAATATGATCAAAGACAACCGAGATA +TAAACGGTCAAAGTCCAGTGAGAGGGGCCATACTAATAGGGATTATACGG +TAGGTAACGAAATTTTGATAAGACTTATAAGGACAAAGATAATGGAGCTA +GAAAGTTTAAACTAGTCCAATTTGAACAATAAAAATATATATAACGTACT +TTTTAGTATAAATATTTTAAAAGTAGAGTAATCTGAGATACAGTAAATTT +TTAACAATTTCTAGTCGATGCACCATGTGCAAAAACTAGTCCAGTCCTAA +AATTAAATAGCACGTACTTTTTAGTATAGATATTTCAAAAGTTAAGCCAT +CTGATATACAGTGAATTTTACCAATTCCTAGTCGATGCACCATGTGCCCA +CTTAACTTTCACGGCTCACTTCCCGGCTAATTTCAAACATTTCAAAATAT +TTTCAACTATTCAACTATAGAGGGGTTTTTAATAAACATTTTCTCAGTTG +ACGACTTTTTTATAGAATCAAAGACTACCTAGATATAGCTAGTTAAAGTC +GAGATGCAAGATTTTTCGGAAAAATCACGTGGTGTTTTCGGTTTGATCTA +CGTAGATCAACGAAAAATGCGGGAGAAGAGACGCAGAGTTATCAACTGAT +TTCGTATGGTTAAGAACGTGCTGACGATGTCACATATTTTTGAACAAAAA +ATTCCCGCATTTTTTGTAGATCAAACCGTAATGGGGCAGCCTCGCACCAC +GTGAAAAATTAGTTTTTGAGGCTTTTCTAGTCTAGAACTAAAGTTTTTTT +TTTGATTGGCCTTGTTTTTTCTAGTCTTAAACACCAGGTAGATCAAAACA +AATTTTTTTTCAGAGTTTCGATTTTTATGAATTTAAAACTTTCAAATTGT +CGCACTTCCTGTCCCTTTCAACCCCAAAAACGTTACGATTTTGTGCTATC +ATAATCATTAAACCATGCTTGCTTGTGTCAAGTTGCACAGGGTGCGCCGG +GGGGGGAATTGGCATTACTGTAAATTTCCTGCGTCTCCATCCTTGCCCCC +AGTGTCCCCATCGGCAATTAGAGAGTACGATCCGATTTTGCATATTTTAT +TCACGTCGCCTCTTGTTTACTCGCTCTAAATTTCAATCCGTCGCCAACCC +CCCCCCCCTGCCTGCCTCTTCCATTCATCATGAGTATTAGTGTATTGATT +AATTTTCGAATTCACGAATGAATTCATTGCTGGCGCCCTGCTACTAGTAA +TCAGTGAGGTCATAGCGTTGTCCTTTCGATAGGGCTAATTAAATGTTTCT +CTAGGAGCGGGAGGATAGATTTGAAATCAGGTGGGCGTCGGTTTGATCTA +CGTAGATCTACGAAAATCGCGGGAATTTAGACGCAGACTTCTCAACTGAT +TTTGCATGGTTAAAAACGTGCTGACGTCATATTTTTAGGGGAAAATATCC +CGCACTTTTTGTAGATCAAACCGTAATGGGACAGCCTGACACCATGTGCT +CAGTTACAATTGGAGGGTTTTGTCGGAAAAAAATCAATTTTTTAAACTGA +CAACACATTTTGCTCATTCTCCAAAACTGAGCCAAATGTGTTGTCAAACT +TTTCTTGAAATTTTTTTGAATCGAAATTATGTTGAGGTGAATTTTCAAAT +TTCAAAAAATGCCCCATTTCTATCAATTTGTCTTCACCTATGTTTTCGAA +TAAGGGCGAGGCGAGAGGCAGGCGGAGGTCGCCTTAAGATCATAAGATAG +CCTTATGTGATGACGAAAAATGCCAAAAAATGTATTTTTATATAGTTTCC +CCGTGTGTAGAAAAATGTAAAAAGAGCATGCATTTTGTGCATTTTTTGAC +ATTTTTCTGCATTTTTTTTTGACATTTTTTTTTCATTTTTCCACACAGAT +GAATAGCGGAAAAACGGAAAAAAATACATTTTTGCATTTTTCATGATCAC +ATGATCAACCCCATAAGGTTTTTTAAGGCATAATATTGATATTTTTTATT +AGTCTACTTTGCATAATGCCTACTATTGAAGGCAAATCAAATCGACATCA +CTTTTTGCTCATTTTCAAAACTGAGAGGCACGGCTTCTATGTTGACTACT +AAACCAGTAATAACACTTATAAAATGTTTGGAAAACACTCATACACATTG +CCACGTCATTTGGATGGAAGGGACCCCATTTGTTGAATCATAATTGATTG +TTAAACTATCATCACTCTCCCCCCGCCCTCCTCATTGGACCCCACGGCTC +CAAACTGTCTCCAATTTGCTCCCCGTCCTCTTCTTATCACCTTGACGCGT +AATTGAATTTTTGCGGATTTCTTCCTCCCCACCGCTTCCTTTGCTCCTTG +GGGGAAAAGTATGGAGAGTGTTTATGTGTGTGATTGAATTGAAGAAGGTG +GCTGACTCATTCTTTCATTTTCCTCGCTTTCTTTGAAAGAACAACTTGAC +AGTTTCCCCCCCCCCCCCCCATCTTATAATAATAATTACTTGCAGAGAAG +CGTCGTGTTCATCGTCGTCGGAGCCGAGCTCAGAATTCGCGCATCGCTGC +TCGAGGTGCTCGATGGCGTTCCGAAGCGAGTCGCAGCTGCAGACGCATTC +CCTTCAGCACGTGTTCAACACGTTCCACAAGTGCCCCACGTGTGGCGATT +CGTTTGATGAGAATACTATTGTGGTGAGTGAATTCACAACACTTTTGGCT +CACAGTCACGCTTGATCTACGTGTTTTTAGACAATTTTGAGTCTACACGT +GGTGTCAAAGTGTCTCATTTCGGCTTGATCTGCGTTGATCTACAGAAAAA +TGCGGGAGAAGAGTTTTGGGCAAAAAATTCCCGCATTTTTTGTAGATCAA +ACCGTAATGGGACAGCCTGGCACCACGTGCTCAGTTAAAGTAAGAGGGTT +TTGTCACTAGATTTATTTTTTGTAAACTGACAACACATTTTGCTCATTCT +CCAAAACTGAGCTAAAAGGGTTGTCAAACTTTTTTTGAAATTTTTTTAAA +CGAAATTAGTTTTGCTAGTTTTCCTAAAAGGAGCAAAAAGTGTTGTCCAA +TTTTTATAAGAAGGTTTTTGTCGCCTAATCGATTTTTGTAAACTGACAAC +CCTTTTCGCTCTTTTTTCAAAATTAATTTTTTTCTTTTGGCATTAATCCC +ATTTTTTGTAAATTAACAAAAAAATTTCAAAAAATCTTCAAGTATTTCTA +CAGGGTGGCCTAGATTCTCTATAGGGTGGCCTACATTTTTACACTGGTTG +TCTAGATTCTTAAACAGGGTGGCCTCGATTCTGTACAGGGTGGCCTAGAT +TTTCTACACGGTGGTCTAGATTTTCACTGGTGGCCTAGATTCTCACACTA +GGTGGCCTAGATTTTTCCACTAATAGCCTAGATTCTTTACAGGGTGTCCT +AGATTTTCACACTGGTGTCCTAGATTCTACACAGGGTGACCTAGATTTTT +TACACTGGTGGCCTAGATTCTCTACAGGGTGGCCTAAATTCCTTATAGGA +TGACCTAGATTCTTAAACAGGATGACCTAGATCTTGACACTAGTGGCCTT +GATTTTTTACAGGGTGGCCTAGATTTTCTACAATGGTCTAGATTTTTTGC +AGGGTGGCTTGGATTCTTAAACAAGGTGGCCTAGATTTTTCACAGGGTGG +CCTAGATTTTATACAGGGTGGTCTAGATTTTTACACTGGTGGCCTAGATT +CTTTACAGGTTGGCCTAGATTTTCTATAGGATGGCCTAGATTCTTAAACA +GGGTGGCCTAGATTCTTTACAGGGTAGCCTAGATTCTTCACAGGGTAGCC +TAGATTTTTACACTGGTGGCCTAGATTTTCTAAAGGGTGGTCTAGATTTT +TGCACTGGTGGCCTAGATTCTTAAACAGGCTGGCCTAAATTCCTTAGGGG +ATGCCCTAAATTCTTAAACAGGATGACCTAGATTTTATACAGGATGGCCT +AGAACTACTTTTTGTAAATCGACAGCACTTTTTGCTCGTTTTCCAAATCG +TTTTTTTTTCAGACCCATATGTTGGAGCACACCAAAGAAGAATGCGAAAT +GTGCTCTGAAACATTTGCCACAAAAGAGGCATTCCTCTCCCACCTGAACT +CAGCCCGACACCTACAACAGGCAAAGAAGCAGCTGGAAAACTCACTGGTC +GACTTAAACTCTCAGGTGAGCCGCCGCCGTGCCTCCTGCCGCCTGGCTAC +CGTAATCACCGTACCCATTATTTAATTGATTTGCTTTTAGCCACGCCGCT +GCTGCTCGGAGCGCAGAGGTGACAGAAAAAGCCGACAAAAAACAACATTA +AAATTATTACACTTTTTTATGATTGAATGCGAGAGAGATGTATGTGTGTG +GAGAGATGGAGAGGCGCATGAAATGGTGTTCGGGTTCCATAGCAAGCTCA +TTGAGCACAATGATTTTTTTTTAAATATATTTTATTGGAAATGACAGAAG +CTGAAGCTTGAAAGGAACGTCCAGTTTTAAATTAAAGTATTGATTTAGCA +CGTAGTTTCAGCATGTCCCATCACGGTTTGATCTACAAAAAATGCGGGAA +TTTTCTTCCCAAAAAATTTGACGTCAGAACGTTCTTAACCATGCGAAATA +AATTGAGAACTCTGCGTCTCAACTCCCGCATTTTTTGTAGATCTACGTAG +ATCAAACCGAAATGGGACACTTTGACACCATGTGATATTTAAAGGGTCTA +GAAAGAACTTAAAATAGCCTAGCCAAGAAATGGGCGGAGCTTTGGTAGGA +ATTTTCATAAGTCGAGGACTCCGCCCATTTCTTGGCCAATTTGGGTTTTT +GGCCTTCAATTTTTAGTGTTCACTAATTTCCAATAAGTTTAGGATTGAAA +AAGTTTTTGAGGTGAAAATTGATCCTAGCATAGGCTCCGCCCATTTCTTG +GTTTTTTTTTTGTTTTTTTTTCTTTTAAACTAAAATTCCACTGAACTTGT +CTAATTTCAGTTTCCAAAAATTTAACGAAAAAATTCGAAATGCCAAGAAA +GGGGCGGAGCCTAAGCGAAGCCTATTTTTCGATGAAATTTGGCCAAGTTG +CGATGTTTTTGAATTTCATATTTTTGAATCAAGTTTTGAAATAAAAAGTA +ATATAAAATTGGAAAATAGGCTCCGCCCATTTATTGGCTTTGTTGGATTT +TTTACTGCATAGATCACATTTTTGCATCCGAAAATTTATCGGAAAGGAAT +TTTTTTTCCCAATTTTTTTCCGTAAACTGTATCATCAAATTCTTTTGAGA +TTCAAAATATCCAACATAAGCACGGGGTTCTGGCCTTCCTCATTGAATTT +TTCGCGCTCCATTGACAATCGCCTGCCGGACAACGCGTGGGAAAGTCGTG +TACTCCACGCGGACAAATACATTCAGTTTTACGCGCCGTAAATCTACCCC +AGATATGGCCGAGCCAAAATGGCCTAGTTCGGCAAACTCTTTCATTTCAA +TTTATGAGGGAAGCCAGAACTCCGTACATAGGCGCAATAAAAGGTGAAAT +AGGCTCCGCCCATATCTTGGATCGGTTCCAATAATGTATCCAAATTGACA +TGTGTTAGTTACACTTGTTCCTAATCCAAAATTCTATCCGAATTTCAATT +TCCCAAAGTCAAAAAGTCAACAAGTTCTGTTCTTATATGTGTAAGGCGCG +CGCGAGCGCGATCGTTTGTCTCTAGTATTTGCTCATCTCTCTCTCTCTCA +ATTACCGTACCCATTATCATTCCGCCCATGGATGATGATGATGATGAAAA +TGAGCGGTGGGCCCCCTCTTCCCCTGCCTCTTTTCGCGCATTCATCATGT +TTGTACAAAAGGCGGCGGTTTGAGAAAAGAAGACAGTGAGAGGGAGGGAG +AGAGAGAGGTGATGAAGGTAGTAAACGTGTGTCGACAAACACATATAGAG +AACGATTCGTGTGAAATAGATGAGGGCAAATAGGACGAATTTATTTAAGA +GAAGAATAAGATGCTTTGGCCCAGATGAGGGGGGGGGGGGGGTATCATGA +AGGTGTGATGACGAACCATATTCCTTCAATGTTTGTTGCTCGCTTAACCG +CCGTCGCCGCAATTTAAATTGTTTCGACTGGGTGATCAGACAAATAAGAA +GACGTAGACGGTTTTTCTTGGGGGGGACGTTCAGCTTTGATTCCTTTCAA +ATAATCGATGCGGCCTAGCTTTCTGATCTCCAGAAACCGCGGGCCTTCTT +TGCTAAATTTTAAGCGATTTTACTTCCTCCCCCCCCCACTTTGATCTACT +TTAACAGCTTATATCTCGGTTGTCTTTAGCTTCATCAAAAAGTTATCAAC +TAACAAAGTGCGTGCCAAATATACTTCTACAATTCGGTAGTTAGTATTTT +TTGGATAAAAGTTAAGACAACCGAGATATAAGCTGTTGAAGTAAATTTCT +AGTTTGACTTACTCCCCCAATTTTGATCTACTTTTACAGCTTATATCTCG +GTTGTTTTTGGTAACAGCAAAAAGTTGCCAACTAACATATTACGTGCCAA +ATATTTTTCTAAAATTTGGTAGTTGAAAATTTTTGAATAAAAGTTAAGAC +AACCGAGATATAAGCTGTTGAAGTAGATTTCTAGTTTGACTTACTCCCCA +AATTTCGATCTACTTTGACAGCTTATATCTCGGTTATTTTTGGCAATATT +AAAAATTTGTCAACTAACAAATTACGACCCAAATATTTATCTACAATTTG +GTAGTTGAACATTTATGTTAAAAGTTAATACAGCCGAGATATAAGCTGTT +GAAGTAAAGCAGTGGTGGTGCTTAGCTCGACCTATATTTTTGAATTGCAT +AACAGTTATAACTAACTTATTTTATGTGAAACCCTATTCTAGCTAGTTGA +CTTACAAAATCTCCAGAAACTTTTGCCTGCCTACCACCTTTAGCAGCATA +AGCGCCTATAGGCACACCTACTGCCTACACCTATGCCACCTACTGTACAC +CTACTGCCCATACGCCTACTGCCTTTTGCCTACTTTCTACATAACTTTGA +ACTGCCCGTTTCTAACCAATAATCTTTTACAGGACGTCGAAAAGCAGCGT +ATTTATGTGTGCAACGTCTGCAAACAATCCTATCCACAAGCGGCTAATCT +CGACGTGCACATGAGGTCAATGACACATCAGAGCCGGATGAGCCGGCTGT +CCGAACTTGTGGCAACCGGGGAGCTTAATGGGGAGAAGGCCGTGTTTGAG +CAACCAGGAATACCGGCACCGACGATAAACAGCTTCATTGAGACGGTAGG +TGAGGGGTTTGAGCAGGCATAATCCTGAAAATCATCTGTTTGGTTTCTGT +GAAAAACTTTTGTGCGGAAAATAGTTTATTTATAGTTCAATTTGAAAAAA +ACTGGTTTCCGATTTTCAATTTTCAATTTTCAATGTTCGATTCTCGATTA +ATAAAATTTCCAATTTCCAATTTCCAGACAACCAACCAATCCTCACTGAA +CGACCTGATGTCACTGCTCACACGAAGCGAGTCGGATGAGATTCGAGAAG +AAGTAAACGGCCTTCAAGTCATGACACAGATCAAAGTCTACGGTGAATCC +AAGATCACTAGCCTAGTCCCAGAGTTGGCCGGGAAAATCGATAACATCGC +GCTCTTCGATGACTCCCGTGCGGCAGAGGTGTCGAAGATCGATTGCTCGG +CTTGTGGGCAGCAGATCTCAGGGATCCTTGCTTTGAACCTCCATTATGAA +GAGTCCCACTCATCGAAGATCCCGAGTGACGTGTTAAGAAAGTTCGGAGA +GCGGTTACTTGCCGCATTAGAGGACGGGTTGAGCAGAGAGAATTCCGTGA +AGAACGGATCGCAAAGCCCTCTGAGCAATGACGATGAGCCCATGGAGAAG +AAGATGCGGCTGGAGAATATGCTTCCTGAAATGGACAAGAACGCGGCAGC +GTCGCAATTCGCAATGTTCCAACAAATGATGAACTGCTTCCCGTTCATGG +CACCACCAGGCACGTCTAGTGGCAACTTTGGCTTCTCGCCAGAAATGATC +AATCAGCTCATGAATCCTGCGGCGGCGGCGGCGGCGGCGACTGCAGCAGC +AGCAGCCCACGCTGCCAACAACTCGCCGGCGAAGAGAGCAAGAACACGGA +TCACTGATGATCAGCTCAAAGTTTTACGGTAAGTCGTCTTGTGTGTGATT +ACTGTAAGCTACGGCGAACAGGAGAGACCCGAGGGGGGCGGTGGAAAATG +AATTGCAAACAATAGGTGGGAAATGGTCGGCGCCGTGATCAAAATGAATG +AGAAATGAGAATGAAGAACAAACATTGTGTGTGAGTGTGTGTGTGTGAGA +CGAGACGAAAATCAATCAATTTCTGGAAAAAACAACACATATATTTTCCC +GGCGAACTTTCTGTTTGAGGGACCGTACGGAACAGGACCCCCCTGAAAAA +AAAAATCCGGAAAATCCTAAGCCTAAATAGCGAACGCTCGCCACTGACGC +CAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAGCCCAAGCCTAA +ACAGGAACCCCCTGAAAAATCAAAAAATCCGGAAAATCCTAAGCCTAAAT +AGCGAACGCTCGCCACTGACGCCAAGCCTAAGTCTAAGCCTAAGCCTAAG +CCTAAGCCCAGCCCAAGCCTAAACAGGAACCCCCTGAAAAATCAAAAAAT +CCGGAAAATCCTAAGCCTAAATAGCGAACGCTCGCCACTGACGCCAAGCC +TAAGCCTAAGCCTAAGCCTTGGCCTAAGCCCAGCCCAAACCTAAGCCCAA +GCCTAAGCCTTTCACCTTCCTAATTTTCAGGCAGGCCCTAAAAACCGCGC +CTGCCTACCATGGAAGCCCTAATCTGTGCGCTTTAGATTCTGAACCCTTA +AATATGTTGCACGGCCGGGCAAAGGGCACATCAGTGTAAACGCGCTCTAC +TGATAATTCGAGTTTAGCCAGGTTTGGGCGCGTTTCAAGGAAAAAAACTT +TGGCTCAAAAAATTGTGAATTTATTTTCGAACATTTTTATATGCATCACA +AAAATGTTAGACCACCCGTTTTTGAGAAAAACGCGCCCAAACGTCCAGGT +ATACGGTAGACAAATTGCGTACAGGTACCACTGGGGGGTCGTGTTTGGGC +TTTAGTTTAGCTAAGGGAATCTGTAGCGGCCCAACGGCCTAACTGCAAGC +CTCAGCTTATTCGCCATAGGCTTGTCTGCCGTATTTCAAGCCGATTCCAA +CAAAAAACTTCCAGCCAATACTTCAACATCAACAACTCGCCGTCAGAAGC +GCAAATCAAGGAGATGTCACAAAAAGCGAGCCTTCCGGAGAAAGTCATCA +AACACTGGTTCCGCAACACACTTTTCAAGGTTTGTTTATTTATTTTACAA +AACTATTCATATTGTTATTAGGCTCGGAAATGAGCGAGGTGAGAACATGA +GTGTGGTAATTAGTCATCGGATGAATACCCACACAAGAGGGAGAGCAAAC +AAGAGAGCAATTGAGACGAGGGAGGGAGAGAGGGAGAAAATAAGAAAATG +TTAATGTCAGTTGGGTGCCAGGCGCTCGGCCAGGACCACAGGCCGGGAAC +CATTTAGTTTTGATTACGATTTCCGTCGTGACTCATGATGGGAATTGATA +TTTGTGCCTGTGGTGATAGAATAGTTAGAGCAACTATCAGGAAGGGGACA +AGTCGCAGTGCGAGACTATTAGAACCTGCAAGGTTTATTCTGATAGATTT +GGGACTAATAAGGACTAATTTGGTAGAGACTAATTGGCTCTCTTGATATA +TACCAGCTTAATAATTTCTACAAAAAAAACTGTAAGAACGCTTCGAGATT +CGAGCTTCGGGAAAGCTTAAAGGATGGGTACCGAAAAATTTTTTTTCCCT +GATTCCGAATATCGATGTGGAAAAATTCAAAAAAAATTCCCTGATTTTAT +ATTTGAGCTTGAAATCACGATTTTCATTTGTGCCCACCTGGTGTCAGAAT +GTCTCATTTTGGATTGATCTACGTTGATCTACAAAAAATGCGGGAGAAGA +GACGCAGAGTTCTCAACTGATTTTGCATGGTTAAGAACGTGCTGACGTCA +CACTTTTTTGGGCAAAAGCAAAAAATTCCCGCATTTTTTGTAGATCAGAC +TGTAATGAGACAGCCAGGCACCATGTGTGTGCCCCGGTTCAATTTTCAAT +TTTTTAAACCAGTTTCTTTTTTTTTAGTCGATAGGTTTTTTTCTGAAAAA +TATTTTTTTTGCCAAAAATGAATGTAACTGGCTAAAATAAACTAAAAATA +AAACGATGCAAGCGCGCTCAAATGCGAATTTATTTGGGCGCGAATTTGAA +AAAAGTGATGCGGGCACAAATGAAAATCGGCGATTTTAAGCCTAAATATA +AAATCAGGGAAATTTTTTTGAATTTTTTCACATTGATATTCGTAATCAGG +GGAGAATTTAAAGCCAATTAAAAATATTTTCCAGATTTCGGTGCCCCACC +CTTAATGAGATAAAAGAGCAAAAAAGGTGTTTTGTAACTTTTTAAGTTTT +TTCTTTTTCAGGAACGCCAACGCGACAAGGATTCCCCGTACAACTTCAGC +ATCCCTCCCCAAATGGGAATCGACTTGGACATCTACGAAAAGACCGGCGA +GACAAAAGTACTGTCGCTTAGCAATGAAGCACCAAAGTCTGAGTTGAGCT +CTGCGCGCGCCACACCGACAATTCCGACTCCGATACCGCTCATCGTTGAA +GAGAAGAAACCGGAGCCAAAGAGCCAGACACCATCGTCTTCTTCCTCTCA +GCTCAACCTGCAGGCTATGTTATCTCAGATGCAGGGAAGTTTTTTCGACG +CGTCAAACTTCATGTCAGCATCAGTGAATCCGATGACACCGAGCACTCCT +TCCTGCAACACGTCGTCCAGTGGACGACGAGCCAATCGCACGAGATTCAC +AGATTTCCAGCTGAGAACTCTACAGCAATTTTTCGACAAGCAAGCCTACC +CGAAGGATGATGATCTTGAAGCGCTGAGCAAGAAGCTTCAGCTGAGTCCA +CGTGTGATTGTCGTCTGGTTCCAGAATGCACGGCAGAAGGCCAGAAAGAT +CTATGAGAATCAGCCGAATCACGAGAGTTCCGATCGATTTGTGAGAACGC +CGGGATCGAACTTTCAGTGCAAAAGGTTGGTTTTAGTGTTGGAAATTAAC +ATTTAAATTTAAATTTTCCTAAATTCCAGGTGCAGCCAAGTATTCCAACG +CTACTATGAACTCATCCAACATCAACAGAAAAAGTGCTACAAAGACGATG +TAGCAGCCCTGGCAAGTGACAACAAGAGTGTCGAAGAATCACTGACGGAA +GAAGAGAAATCCCAGCTTCTGGCTCAACAACAGGTCGCCCAACTAGCCAG +CACTCTGGAGCTCCCAAAGTTCCAGCCGGCAGAGCTTTTGAAAATGATCG +GAGCTAATTCGGTGACTCCATCCTCGTCGGCTGCAAGTCAGAAGTCTAGC +AACGACGTTCTGCTGAAGATGTGTGAATCGATTGTTGGTGGGAGCACACC +TAGTACATCATCGTTCCACAAACTTTGTATATTCTGCGCTCAAGACTTCA +AGGATCGGAGTGCGATGAGTGAGCACATGGCTCAGAAGCATCCCCAGCAT +ATGCTTCTTCCAAACTTCGATCTCGACATGATGCCAGACGCTGGTGGTGC +AGAGCTTCTCATGGATCTTAAGGACTCTGCGTTGGATCTTTCGGGCTCTT +CAGTCGACTACCGGGATTCTATCTCAACATCTCCATCTCGATCCGAAGAT +GATGTTCTCACAGAGGCTCTGGATGATTCTGCATTTGCCGCGTTCGGACT +ACAGCTTGCAAATTCCACAAGCGGCTCGGAATGCAGATCTCCGGCGAGCA +ACAAGAGATTCCGAACTCATCTCACACCGATGCAAGTGCAAATGATGAAG +AGCGTGTTCAACGAGTACAAGACCCCATCCATGGCAGAATGTGAGCTTCT +CGGCAAGGAAGTCGGCCTTCACAAGCGTGTCGTTCAAGTCTGGTTTCAAA +ATGCGCGAGCCAAAGAGCGAAAGACGCGCGGTGCAGTTGACGAGGACTCT +CGATCCGGAGAACTTCACTGTGAAATTTGTGATAAGACGTTCGCGACGAG +GCTTTCGCTGCAAGACCATCTGTTTGCCGAGCAGCACATTGAATTTTTAA +GGAATAATCTGAAACGGGAAGGGGTCTCCGAGTTGACGACCGCTTCGGTG +ACCGAGTGCTCTCCAGAGAAAAAGTAAGAAATTTAATTTTTAATTTAATT +TTACATATATCATTACCCTATATAACCTTAAAAATTTTCCAGAGCAAAGC +TTCCGAACCCGCTGGATCTTGCCAGTTTCCCGTTCCTCAACACGTTCGAC +ATTCAAATTTACGGAACACCCATCGCCTTTCTGCAGGTGCCTGATGAGAT +CAAAAAGCAGATCACTGATGATATCACGGCTGGAAAATCGCGAACCACGT +TCTCCCAGGACGGTTTTCCGCTCGACAAGTTGAAAGAGTCGCTGCCCGAG +GATGAGAAATCAAATTTGACAGTAACCCAGAAAGATGTGAGCTTTTTTTT +TTTGGGAAAAATTTTGAACAATTTGGCAATTTCAGGTCGGATGGGCGTGC +CCGGCATGTACATTTGTCTTCCAAGAAGAAAAGAAGCTCCGTGAGCATCA +AAAAGCCATGTGCCAAAGTGATAAGGTGGGCAAGAAATTAGGAAACGGGC +TTGAAGTTTAGCTTATAGTTAGGCAAGTGGTGTCAGACTGTCTTTGATCT +ACAAAACATGCGGGAATTGGAATTTTTCCCAGAAAAATTGTGAAGTCAGC +ACGTTTTTAACCATGCAAAATCAGTTGAGATGTCTGCGTCATTTCTCCCG +CATTTTTTGTAGATCTACGTAGATCAAACCGAAATGAGGCACTTTCTGAA +TCCACGAGCTAGGCTTAAGCTTAGGCTTAAGCTTAGGCCTTTTCTCAGGC +TTAGGCTTAGGCTTATGCTCAGGCTTAGGCCTTTTCTTAGGCTTATGCTT +GGGCTTAGGCTTAGGCGTAGGCTTAGGCTTAGGCTTAGGCTTATGCTTAG +ACTTAGTCTCACTATCAGTCTTAGGCTTAGGCTTAGACTTAGGCTTAAGC +TTAGGCTTAAGCTTAGACTTAGGCTTAGGCTTAGGCTTAGGCTTAGGCTT +AGGTTTGGGCTTAGGCTTAGGCTTAACCTCAGGCTTAGGCTTAGGTTTAG +GCTCAGGCTTAAACTTAGGCTTAGGCCCAGGCTTAGGCTTAGGCTCACAC +GTTCTCACGTTCTTAACCATACAAAATCAGCTAAGAACTCTGCGTCTTTT +CTCCCGCATTTTTTGTAGATCTACGTAGATCAAACCGAAATGAGGCACTT +TCTGAATCCACGAGCTAGGCTTAAGCTTAGGCTTAAGCTTAGGCCTTTTC +TCAGGCTTAGGCTTAGGCTTATGCCCAGGCTTAGGCCTTTTCTTAGGCTT +ATGCTTGGGCTTAGGCTTAGGCGTAGGCTTAGGCTTAGGCTTAGGCTTAT +GCTTAGACTTAGTCTCACTATCAGTCTTAGGCTTAGGCTTAGACTTAGGC +TTAAGCTTAGGCTTAAGCTTAGACTTAGGCTTAGGCTTAGGCTTAGGCTT +AGGCTTAGGTTTGGGCTTAGGCTTAGGCTTAACCTCAGGCTTAGGCTTAG +GTTTAGGCTCAGGCTTAAACTTAGGCTTAGGCCCAGGCTTAGGCTTAGGC +TCACACGTTCTCACGTTCTTAACCATACAAAATCAGCTAAGAACTCTGCG +TCTTTTCTCTCGCATTTTTTGTAGATCAACGAAGATCGAGCCGAAATGAG +GCACTCTGACATCACGTGGGCTAAGACTTAGGCTTAGGCTTAGGCTTAGG +CTTAGGCTTAGGCTTAGACGTAGGCTTAGGCTAGTTATTTGTAACTTTTT +TAAATCTCCCCCGAGTCAAAAAAAAAAACAATTCAATTCCAGACGCTAAC +CCTGGTGCAAACCCACTACTCGTGCAAAAGCTGTCAAAGTGATTTCTGTC +TTCAATCCGAGTATCAATTCCATCTCTCAATGCCTCCTCATACTACTACT +ACTACCACGACGACGCCATAACAAAACCCCCCAAATAGTCACCTCATGTC +ATTTCATTTGCCGTCTTCACTCAAGGACCTATTTCCGTCTCTCTCTCTCT +CTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTTCTCCTCTCG +CCTTAAAAAAACCACAACTCCTTATCCCGATCTGAGCTCACTTCCTATCC +CCCAAAGTAATTTCTTTGTAATATCTATATCCACTTTTTTTTCGACACAC +TACACCACATCCCGCCGCCTATCCCCTCCCCCCCCACCGCTTTATGACAC +AAAAACTGGTTTCGCTTTTGTCTTCAATTTCACCCCAACCGCCTGCAGCT +TCTCATAATAATTAATAATTTTTCTTACCATGCACAATGATTTTGATAAA +TATATATATATATAGTCATCAGGATTGCCCCCTTTCTAGGATATGTTTTT +TTTTGTCTTCACACAATTTTCCCATTTTCTATGATAAGAATTATTTCTTT +GTCCGGTTGACACATGTAGATGTTCCCCTTCCAATTTTTAATAGATTTCT +ATTCTAGTTTTTTGCAGGCCTCACTATTCTATTGCAATAATCACTTGGAT +TTCATGTATGGAACGCAATAATAATTCAATAAAATATTGGTACTTCTTTT +TTGAAACACACTAATTCAAACTCACGCGGTTCCAGGCTGTCCCATTACGG +TTTGATCTACTAAAAATGCGGGAATGTTTACTCCCAACAAAATGTGACAT +CATCAGCACGTTTTTAACCATGCGATATCGGTTGAGTACTCTGCGTCTCT +TCTCCCGCATTTTTTGTAGATCAAGCCAAAATGAGACACTATGACACCAC +GTGTAGACTTAAAATTGACTGAAACCACCGAATTTCATATTGAAACTTCT +TGAAATCTATTCAAAAAAAAAGTTATGAAGGCTCAAAAAATGGCCTAAAA +TTTGTTAATTTAAACAAAAGACACTTAAAACGGTGTCTTTTTGAATTTTT +CCGTTTTTTTAAACATATTTTCATAAAATTTACTTATTTTTCAAAATAGA +TGTTGGAACATTTATAGGATGCGTTCAATTTTGCCATTTGAAATTGAAAT +TCGGAAATTTCAAAAAAAAAAAAAAAAAGTGCGAAACCAAAATTTTCCGA +AAATTTTCGGCAATTTGCCGGTTTGCCGGAAATTTCAATTCCAGCGATTT +GCCAATTTGCCGAATGGCCGGAAATTTCAATTCCAGCAATTTGGCGATTT +GCCGATTTGCCGGAACCTGCTAACCTACCTACTAACTTGCTAATCTTCAA +GGTTTTTAATGAGTTTTCAATAGAAAATTCGGTGTTTTCAGGCAATTTCG +AGAAAGCAATACAAACGTTTAACAAAAAAAAGACGCTTTTTCCTCTTATC +CGATTCTCAATTCTCAATTTTGCCTTATCAATTTATCCTCTCCTCACAAG +GTGACACACACACACACACACACACACACACTCGTTTCTCGACAAAAAGC +TATGAGCTAACAAGCGCCTTCTCGCGTAAGAATACATCCACTGATAAGAC +ATCTGATAAGAGTGGAACCCGTCCGCCATTGTCTCGAGAAATACACGCCT +CCGCCCGGGAGGACAGGTTATCCCACCTCGTCGCTTCACTTCATTGTTTC +AAACGATTCCATCGGCTTGCGCTTGGATTTTCTCTTATGCAACTTGCCAT +TCTTCTTTTATTGCTTAACATCATGGTTATTTATTAGGGAAATTGTTCTA +CCCTGACTTGAAATGACTGAAGACGGCCTATTTTACTTTAATTTGAAGAC +TCAGAAGACTACTATAAGCGGTCATTCTTGTTGCGAAAATTTCTGCATTT +AAAGGTGGAGTAGCGCCAGTGGGAAAATTGCTTTAAAACACGCCTATGAT +ACCACAATGACCGAATATCATGATAAAAAAATTCAAAAAAATTTTCTAAA +TTTTATATGATTTTTTGAAAATTGAAAAAATCTAAGGTTTTCACTTAATT +CATATTTGAATTACCGCCAATTGGATTTGCTCGATGGAGCGCACTTGCAC +GTTTTTAAATTTATTTATTTTATTTTTTGTTATTTTCCACCGATTTTTAA +TGTTTTCGGTGTATTTTTGCTTGAATTTTAGAGAAAAAGTCAAAATAAAT +GCAAATTTTCGATTAAAAAGCAAGCGTACAGGTAAATCAGTGAAATTAAT +CAATTCAGGTTCATTTTTACGCCTGTAAGCGTGCTTTTTAATCGAAAATT +TGCATAAAAAACATTTAAAATGGGTGGAAAATAACAAAACATAAAATAAA +TAAACTTAAAAACGTGCAAGCGCGCTCCATCGAACAAATCCAATTGGCGG +TAATTCAAATAGGAATTAGGCAAAAACTGAGATTCTTTCAATTTTCAAAA +AATCATATAAAATTTAGAAAATTTTTTTGAATTTTTTATGGTGCGTCCAT +AGAAAAACCGGAGTTCCTCGCTTTTTTTTCCTCGAAATTCAAAAAAGTAG +GCGTGGCCAACCAATCAGCTGTTGTTTCTTGTTTTCTCATTGCTCAGCTT +AAAATTTTACAGCCTCTAATTGGTTGAACACGCCCACTATTTTGAAATTG +ACCAATAACAAAGCGAGAAACTTCTTGTTTTTTCCGCAGACTATGGACGC +TTCCGCACGCGGGAAATTCCCGTTTTCCGCAGACTATGGACGCACCATTA +TCATGATATTGGGTCATTGTGGTACCATAGGCATGTTTTAAAGCAATTTC +CCCACTGGCGCTACTCCACCTTTAATGCATTCTGCGTCTATTTTATACAG +TTCTGACCATAAAAGCACACAAATTTTTGTCTCAAAGTTTAGGAATATAT +AGATTTAAGGAAGAAAGTTTTAGTAAGAAAAAATATAGTCTTCTTCATGT +TTTCTTTTTTTTTACAATATCCATTTGCCATCCGTTTTTTTTTCTGCAAC +CTTCGATAAATGTTTGCTTGGATAACTATGTAAGGTGCTTTGAACACTCG +GCATTTGGGCTTCTTGCCCAAGTTTAGCCCAAAAATATTAACTTGGATCA +GAAGTTGGGCAATACTTTGGCAAAACTTGGATTCAGGCAATATCAAAGTT +TAACCCAAGTTTCACCCAACAAACTTTGGCCTAAACTTTTTTTGTTTCGA +ACTTGCGTAGAAGTTTTTGCCCAATTTTCGCCCAACTTTTGCCCAACTTT +TGCCCAAGTTTAGCCAACTTCTGTTCCAAGTTAATATTTTTGGGCCAAAC +TTGGGCATGAAGCTAGAGCCGAATGCCGAGCTAATTTTATTTGACTCTAT +CCGAATTCTGCTAATTCTCAAAAATCCCCCAATTGCCAACCATTTCAGAA +TGCATTGGAGTAGTTGGGCGATAGCGCTCAACGTGCTCGTGCTGGCTTTG +GCCGACAGTGCTCCTGAAAGATTCCCAGAGGATCATGTGGACCTGGTACT +TCCTATGAAAAGTATCCACCATTTCACCTAAACGTTTCCTAATCATCTTG +ATTACAGTGAAATACGACAGTCATCTTCGACAAGCTGATCTTCCACCGCA +CTTCATTGGTGCCAATGAGACGAACTTGGTTCCGCTCACCTTGAGATTGG +AGACTAGGAGGAAGAGATGCTCGTGTGGATGTTCGGGATGTGATCTTTTC +CCGAATAGATCTTGTTGTTCGAGCTGTAAGTTTGCGATTCTATAAGACTT +GTCAACGTGGTGTCAGGCTGACCCATTACGGTTTGATCTATAAAAAATGC +GGGAAGTTTATGACCAGAAAAAAATGACGTCATCACATTCTTAACCATAC +GAAATCAGTTGAGAAATCTGCGTCTCCCATTTCTTCTCCCGCATTTTTTG +TAGATCTACGTAGATCAAGCCGAAATGAGACAATTGAAATTCCAGCTTGC +TGCTCCTCTCAAAAACCAATCCCACTTGCCTGCTGTCCGCCACCTCCCCC +ACCAAAACCGTGTTGCCAGCCAGCCTTCGGCCCTTGCTGCCCGGCAACCC +CAAACTGTTGCCCGAAACCCTGCTGCCGAGGTCGTCGCCCCGAATACGAA +GAGTACGAGGACGAGGAAGGCAACCCTGGAGGCGTCCCAGCACCACCAAA +CCCACCAAGAACTTGCTGTCCTCCACCAACACCTGCTGCTCCACCACCGC +CACCACCTCCACCACCACCGGCTCCGGAAGCTCCAACGCAATGCTGTGGA +TCACAGCCTTATGGAAGAACACCGTGCAGGTCCGGATGTCCGAATGGAGA +TTGTGGATGCGGAAGACCTTGCTGTTACTATCAGAACCCCACATGTTGTA +ATCAAGGGCAAAAGGCTTGCTGCCCACCTGAACAGCCTTGCTGTCCTGAG +TTGAAGCTGGATAACTGCCTCGCTTCGGTCCCACCTTGCCTTCGCGCCTG +TCCATCGTGCCCTTGCAGAAAACGTCTGATGCTTGGGAAGAGAACTAAGA +GAGACGCACCGGGGCTTCATTGCCAGCCAAGTAAGCTATAAGTTTAGATT +TTCACGTAGTGTCAGGCTGTCTCATAGTGGTTTGATCTACAAAAAATGCG +GGAATTTCTCGCCCAGGAAAATGTAACGTCAGCATCAGGGCTGGGACCAA +AAAAAAAATTTTTGGACCAAAGAAGTTTTTGAAAAACCAAAAAAACCAAA +AAAAAAACAAAAAAAAAACCAAAAAATTTTTGATATGCTTAAGTTGATTT +TTAATGGGGTTATTCAAGTAATGTTGCAAAATGTATTAAAATACATTTAT +GACGTCACAACTGTGTTAAAATACATGTTTTAATGTATTTTAATACAGAA +TAGTCTCGAGTCGACACTAGACACGGTAAACTTTTTTTTTAGATTTTTCG +TCAAAATACCAAAAAAACCAAAAAATTCCCAACAGCATGTTCTTAACCAT +GCGAAATCAGTTGAAAAGTCTGCGTCTTTTCTCCCGCATTTTTCGAAGAT +CAAACCAAAATGAGACACTTTGACACCACGTGGATTTTCAAAACGCTGAA +AATAAGTAAAATAAATAAAAAATTTTCAGTCGGACTCCTTGGTCAACAAT +CCCCACCGACTCTGATTTCAAAGCCGGTAAAGACCATAATAAAATCAAAA +TCCCGCGTTGCTGGAACCAAGACATCTCAAGTGTCCGTCACCAAAAAGCT +CATCGAACAATCCTCTGACCACGTGGAATCGCCTCCAACAGCCGGAAGAC +TCTACGACTTCCGAAGAGCCCACGTTCGAGTCAAGAGAAACATGAACTTT +GGAAATGGAGCATGCCAGCTGTGTCTGAACGGAACTCCGCTCAAGAGAAC +CAAGCGGTCTCTCGATTGTGTTCCGTGTACCTACCTACAACCACAATACT +CTGATTGGAACCCGTTCCTCGGAGATCAAACGCCACGCGGATCGCAGTCG +CCAGTTGGAACTCCACTCGCCGGGCACAGAACTAAGAGAGCTGGCGTGAG +TTTGACTCATAGAAATTAACGAAGCTAAATTTATTTTTGCAGTGCCTTCC +TCATCCACAATGTACCCTGCATGTCCGTCGCTATAAGAGAAACCTGATCG +GATCCCAATATTGTGAGCCATGCAATGGACACTACGGTAGAAAGAAGCGG +GAAGCCGAGAGAGATCAGTGCTTGAAAAGAGAGAAAAGATATGCAGATGA +ACAGTGTGATAACGATGAGTTCAGTATCAATGAGCGAAGCAAACGACAAG +CCTACAATCCAAAAGGAATTTTGGATATCGTGAAGCTCCTATCCAAGGCC +TCCTCAGGTGGCAACAATCCAGGAGGATGCATGAAGTTCCCAGCCTGTGT +TCTGGCTCAGAAGAAGAGAAGAAAGAGAAATGCAGATCGGCTCGACACGT +ACTACAAAGCTGTGGAGGAGCACAAGAAGCTGGTGGAAGAGTACGAGATG +GCGATGGAGGAGCACAAGAGAGTCAAGCGGCAGTTCTTCGCTCCGGATAA +TGCTGCATCCTGTGTTCCGTGTCCGGCATGGGTGACACTTGCCTTGGCTA +GCAGAAAGAAGAGAGAAGTCGAGAAGGTGGAGAAGCATATGACGATGAGC +GAAGCTATCGCGGATATCAGAGCAAAGAAGGGGTATAAGGAAGGATTTGA +TGATGATGATGAGGTGAGGACTAGACCTGAAATTTTAGATCCTGTAAGAA +CTTTTTGAAAATTCCAAAAAAAAAACCCCCTAATTGAATCACGCGCGCAC +GCGCCACTGAGAAGGATACTGTATTCGCGGCGAGACCACCTGCACATCTA +ACTCCCAAAAGTGACGAATTTGGCCTAAAATGGACTTTCTTCATGGGATT +TCCACAGTTTTCACCATTTTTTTGACACAGTTCATGACTTTTTACACTCA +AAAAATTATTGGCGTGGTCATTTCACAATGATTTCAACAATTTACTGGAT +TTTACGCGTGTACTTCATCGTGGCACACCAAAAATTGATCTGGTTCATGT +GGTGTCAGAGTGTCCCATTTTGGTTTGATCTACGTAGATCTACAAAAAAT +GCGGGAGTTGAGAGACGCAGAGATCTCAACTGGCGTCACATCAATGCGGA +TTTTAAAATTTTCATTTCAGGACGACGAATCCTCTGAGGAAACCATCGAG +ACCCGCAGAAAGCAACGCCGCTCCTGCCAACAAAGTGACGATTGCCTGAA +CAACGTCGAGTACGCAGTCTTCCAAAAAGTCTATGCAGACAAGAGAACCA +AACGAGAAGCCGTGTTCCGTCGTAAGAAGTGCTCGAGGTGTGGAGTCTCT +GGGCTCACGCCACATCGTGTCAAGAGAAACTTCGGGCAGCCGAATATCAA +CGTCTCGGAGCAGAATTGCATGGCATTCCCACAGTGTCGGCATAGAGTGA +AGAGAAACTTTTTGGGAGAAGACTGTAACATCTGTACACAGGATACTGGT +CTGAAGCGGAGGAAGAGAGACTTCGGAACAGCGGTAGGTGTTGGTTTAAT +GAGCTTCCTTATCTTAAAGTTTTCAGCAATGCTACCCATGCCCAGGAACA +CGTTCATAACTTGAAGCGACTCCCTCCGATTCTCACATATCACATAAATA +CCACCTCGTAATAATAATTCAACATGACTAATAAAACAAATAAGTACATA +CATACATTTATATTAGCTTTTAGCACCCGTAGCAATTTGGAGCAAGAAAA +TATGTGTAAAGAAATTGGAGATGTAGTAGATTTGGTCGTTATAGTGATCG +CAGATCTTTCAGCTCTTTGAGCCGGAATATTGACTGGCTTGGAGCCAGGA +GATGAACCTCGCGGGGGAATGAGGCAAAGTAGGGGTGAAGCATTGCACTG +GCTGCGGAGACACGGCTTTCCGGGCGGAGCTGTAATTTTGAGGGTTACAA +GATAGGTGTAGCTCGATGTCGATTCATTTAGACTAGCATTGCAGCCAGGG +CTTAAGCTTTCGGCTTGAGCTTGCACTCAAAGATTGCTGCTCAATATAGC +TCGCGAAGACTGATTTGTCAAAGATTAGACTAGTAGGCCTGGACTGACTT +AAAGGTGGAGTAGCGCCAGTGAAAATTTTGTCAAAATACATACATAATGA +TCCGAAAAAAAAACCAAATTTCATAATAAAACAATCCAAAAAATTAGATT +TTTCACAATTTCAGGTCAAATTTTTGGCAAACTGCTAAAATTTTGAAAAA +TGAGCAATTGAGGAAATCTAGAGCAATGTCGCATGTTCCGACCCCTACAA +TATTTTAATACAAATAATTAAAACACAATTACAGTATAAAAATGTAGGAA +AAAAAATTTTTTTTTTGTTGGTCGACTTCCAAAATTATGAGTGGTAAAAT +TACAGTAAATAAAAAATTTTCAAAATTTTTTTGAAACGTTTTATTATGAT +ATTCGGTCTTTTCGGGACTAAAGGAGTGGTTTTTACAAGTCTACTTACCT +GAAGAAGCATCCCCAGAAGTTCCTGACCAGTCTTCAGAATCTTCGTGAAC +ATCGGGTTCACCGCGATGAACGACAACTCCCGATACCTGGGGAACAGTTC +AGGGGTGTAGCCTGGTAATGTCTTTACCTGAAACAAACATCTTTATCTCT +AGATTTTAGATTCCCTCACCTCCGGCCACTTTTTCTCGTCTGGAGTACCC +CGAATGCTGAAGATCATATCCAACTGGTCCTTGGTGCCGGGGTAGTGAGA +GTCCTTGGATCCCGGGAACAAAGCGGCGCCCGTGCAAATTTCAGCGAATA +TGCAACCGACTCCCCTGCAAAGTTAGTAGATATAGTTGGTACCTTTTATA +AAATAATATCCTCCTACCACATATCCAGGGAAGTTGAATAGTCGGTACTT +CCCATGAGAACATCAGGCGGGCGGTACCATAAAGTGACCACCTCGTGAGA +ATATGTCCTACTCGGCACAGATTTCGCCCTGGCTAGACCAAAGTCTGCGA +GTTTTAAAACACCGTCTTCGTCGAGAAGCAAGTTTTGTGGTTTTAAATCT +CTGAAAATCAGTTTTTATCAAGAGTTATGGCCGCAACGGCGCCTCCGCCG +ACCCCAGCGGTCGCCGCGCCGGCCTCCGCGGAACCCCGAAAATGTCCGCC +GCTCCAAACAACCACTTTTTTGCACTACGTTGCGCACACACCAGGCTACT +CATTTCACGCCAAGCTGCGGAACACCGAACGTGTCCGCCGCTCCAAATAA +CTCCCTTTCGCAATACGTTGAGCACACACCAAGCTGCGGAACTCCCAACG +TGTCGGCCGCTCCGAACAACCACTTCTACGCACTTCATTGCGCACACACC +AGGTTACTCATTTCACGCCAAGCTGCGGAACCCTGAACGTGTCCGCCGCT +CCAAACAACTCCCTTTCGCAATACGTTGAGCACACACCAAGCTGCGGAAC +CCCCAACGTGTCGGCCGCTCCGAACAACCACTTCTACGCACTTCATTGCG +CACACACCAGGCTACTCATTTCACGCCAAGCTGCGGAACCCCGAACGTGT +CGGCCGCTCCAAATGACCACCTTTCGCGCTTCAACGGCGTCGGCGTGAGG +CCCGAATTTCGCGCCTCACTCTGCTGGGAGCCCTATCAACACTAGGGAGC +AGTTTGAAAACTAACCTATGAAGAATCTTCTTCCTATGACAAAAATCGAG +TCCGCGGAGAAGTTGAAAGAGGAGAAGCTTGATGTCAATGCTATCCAACC +CATAGACGTTCTGCTCCAGGTACTTGCTCAAATCCATTTTCATGTACTCG +AACACAAAAGTTAGCTGGTGGTGTTGATAGAAGATGTCGTGCAAAGAGAC +GATATTCGCATGACGGAGGTTTCTGAGAAGCGAGGCTTCACGGATAGCGG +TGAATGGGAGACCTTCTTGAAATTGAAGTTTGATCTCTTTCAAGGCGACT +ATGGATCCGTCAAGTCTGAAAATATAATTTAGTTTGGTAATATCAGCTGT +TAAAATATCTAACTTCGATTCGCACTTATACACCGTTGCATAAGATCCTT +CCCCCAGCTTGTCAATTCGTTTGTACAGATCATTCACATCCTGCGGTGTC +ACTTCCTCCTTCGCAGGAAGCGTTATCGTATTTGGATCGTAGTGTGCTGA +TCGTGGTCGACGTCTTCGAAGATGTGTGAAGTCGGCCACAGTGTTGCTAG +ATGGTGGTGGTGGCATTGATGGAGAGTTCTTGTTTAGGGGCACGGTTGAC +ATGGTTTCAGATGGATTGTTGGAGCGGGAACGTGATTGGCGGCTGAAATG +GTATTACTAGGGTTTGATTCGGAAAATTTTCTTAGAAAAACAGTTTGGCG +GGAAGTTTAAATTTTCTGATTAAAAATGTTGGCGTAAGTGGTGTCGAAGT +GCCCCATTTTGGTTTGATCTACAAAAAATGCGGGAGTTGAGACAAAGACT +TTTCAACTGATTTTGCAGGATTAAGCTGACGTCACATTTTCCTGGGGGAA +AAATCCCCGCATTTTTTGTAGATCAAGCCGCAATGAGACAGCCTGACACC +ACGTGTTGACGGGATATTTAAATTTTCTGAGAAAAACATTTTGGCGGGAA +GTTCAAATTTTCTGAGAAAAACATTTTGGCGGGAAATTGAAATTTTCTGA +CAAAAAAATTTTGGCGGGAAATTGAAATTTTCTGACAAAAAAATTTTGGC +GGGAAATTGAAATTTTCTGACCAAAAAATTTTGGCGGGAATTCAAATTTT +CTGGGAATTAACTTTGGGGAGAAGTTCAAATTTTCTGTAAAAATTTTGGC +GGGAAATTGAAATTTTCTGACAAAAATATTTTGGCGGGAAAATCAAATTT +TCTGAGAAAAAAAAATTTTGGCGGGAAATTGAAATTATCTGACAAAAATA +TTTTGGCGGGAAAATCAAATTTTCTGAGAAAAAAAAATTTTGGCGGGAAA +TTCAAATTTTTTTTTTTTGAAAAGAAAATTCTACGGACCGGAAACGGCCC +AACTACGGCTCAAGAGCCGTCTCGGTCGGTAAATGTGTTTTTTGCTCTAA +ATAATGCATTTAACGACGTAAAAATGCTTAAATTAGCCAACTGGTATCAT +AGGTATTAAAAAAATTCGTTTTCTTAAAATCCATTTTTTCCTCTTAATGG +CCATTTTTTTACATTCCCCGTAGTTTGCCCGTAATTGGTCCGTCCCGCTT +TTCATATCCGTAGTTCACCCGTTAATGGTCCGTTCCCGGTCCAAGATCCG +TAAACGGACCGTACCGGCTTTGCCTGTGGTCAGATATAACTAGTAAGTCA +AGTGGTGTCAGGCTGCCCCATTATGGTTTGATCTACTAAAAATGCGGGAA +TTTTTCCCCCAGAAAAACGTGACGTCAGCACACTCTTAACCATGCGATAC +CAGTTGAAAAGTCTGCGTCTCTTCTCCCGCATTTCTCGGAGATCAAACCA +AAATGAGGCACTTTGACACCACGTGGTAAGTTATAACAAACATAACAAGG +TGTCGAAACTACTGTAATTATAGTCTTACTTCAATATACAACCTAATAAG +AATCTCCACCACTTACCTCAAAAAGCTGAAAATATTCTTATTATGATGTT +CTGGTGTGCTTCCACTTCCGGACCCCGATGATGCCCCATTCACATGATTG +GAGTGCCATGATTGTGTCATTTCATTTGTTCCGTTTTTCGAGATCGGAGA +TTTGGTGGAGCCTCCTGAAAAAACAGAAATAAATGTGGAATTATATGGGG +GCTCATATGTTTTCGGAACTAACCTTTTTGGGAGAAGACTCTGGAGAGCA +TTTTTGTGTAAACTGACTAACTTGTGCCACCCCCCGCACCCCATAGAATA +AATTGGCTAAAATTTCGAAAAAAAAAACTCGCCACCGCCGTTATTCTTGT +ATTTCTTCTCCTTATTCCGACTCCGACGCAAAGTGAAAATTGACGAGGAC +GGCTGGAATAATGTTTTGTGTTTGTGTCTTGTGGAAGAGATGCAGAATCG +CCGGAAGTTTAGAGGGGGTGGCGTGACTTACTGACCTTCCGGGTGCTCGG +CGAGCCGTCCACGTCCTGAATTCGTTGCGGCGACATTGTGGTGATCTGGA +ATATGTGGAGCGTAATCGAAATATTTAGGCTTAGGCTTAGGCTTAGACTT +AGGCTTAGGCTTAGTCTTCGGCTTAGGCTTAGGCTTAGGCTTAGGCTTAG +GCTTAGGCTTAGGTTTAGGCTTAGGCTTAGGCTTAGGCTTGGGACTGGGC +TTAGGCTTGAGCTTTGGCCTAAGCTTAGGCTTAGGCTTACGCTCAGGCGT +AGGATTAGGCTCTGGCTTTGGCTTTGGCTTAGGCTTAGACTTAGGCCTAG +GTTCACGCTTAGGCCTAGGCTTACAAGAAATATCCTCATTTACCGACCGA +CTGTGGCCTAGGATTTCAAAAAATGACCCTTTTTGAAAAATCGAAAAATC +GAAATATCGACGAATTTTCGTTTTTGAGGCGTTTTTTTAATTTCAATTTT +TATAATTTTTTTCGGTTTTTCGATTTTTCTTGAAAATACCAAACTCACCC +TATGCAAGTTCTCCGACGACTTCGAACTCCTAACAGCGGTGAGCTGTGGA +TGATGTATCATAGACAGTGAGCACGATGCTCTCATAGGCCTCTCGGCCTC +CTCCAGCCCAAACCATACTCCAATATGCGAGGTTATTTTTCCCCACATTT +GATTGAAAGATGGTATCTGCAAGTGAAATTTTTGGGTGGGGAAAAAGAAC +GTGAACGAATAATTTGGAGCAGCGGGGGGGGGGGTGGAAAAATGAAGAGC +TGCTCGGAGGAGGGAGCCGTACTACGAAATGAGAAGAGGAGAAGCCGGTG +TGACGGCGGGGCGTTAATGGGTTACTAAAGCCTCCACATTCCCATTTTCA +GCCGTTTCGAAACAAAGAAAATAGAAGAAAAAAAAGACAGATGAAAAATG +ACTTGGGTGGGTTCGGTGAAAAACAGGAAGCAGGAAGAGTGAGGAAATTG +GATGGTGGAAGAGTTTTGGCATTTCTAGGTTACCGGGAATCTTTAGAGAC +ACCTGGGCTGTCCCATTACAGTTTGATCTACAAAAAATGCGGGAATTTTT +CGCCCAAAAAATTTTCAACAAATTTAGATATTTTTTACATTTTTTCCCCA +ATTTTTTCAGCCATATTTCAGCCATGACTGTCCTTTTTTTCGGGCAAAAA +AAAATTTTCTCTGAAAATGTTCGAAACTACTAAATTATGCAAGAAGACAA +TTTTTAAGGTTCGGAGATCAATTTTGAGTCCTCTAGCTTCAAAATTATCC +ATTTTAGAAGAGTTTTAAAATTGTAATTTTTTACAAAAATTGCTCAATTT +TGCCACTTTTTAATAGTTTTCTAAACCTAGATTTTCTGAATTCTGCATAT +ATGAATTACCCGTTTTCAACAAATTTAGACAATTTTTTTCATTGTTTCCA +AAATTTTTTTCAGCCATTTTTCAGCCATTACTGTCCTTTTTTTGGGCAAA +AAAAATAATTTTTTGAAATTACATTACACACCTTCCATGTTTTCTTCGTA +GATTTAAGGTCCATGTCCACTGCTTGGTGGAAGAGTTTTCCGAGTTTTTA +GGTCACCGGAGAATTTGAGTTGGAAGAGTTTTCGCATTACTCCCCAACTG +TGATAGAACTTCAAAAATGAATATGCAGGGACATCACTCAAATTTGACAT +GTCAGTTTAAAGGAACACGCTGATTTTCCGAGTGGGTCTCGCCACGATCC +AGGTGCTAAATTTTCACAGTTTTTAGTATGTACCTAGTGGCTTATTTCTA +AACAACACGCGCATAACATCGGAGTGTCGTTGCATAGGCCACTCTAAAAC +ATAAACAAATGTGTGAAGAAAAATTAAACACAACTAGCACGGATTTCTGA +CGTCCCTCAGAAGTTGAAATGGAAGAGTTTTTGCCGAACTAGGCCAGGCC +ATATTCGGGGTAGATTTACGGCGCGTCGCGGCTCGATTTTAGTTGTAAAA +CTAAATGTATTTGTCCGTGTGGAGTACACGACTTTCCCAAGCGTTGTCCG +GCAGGCGATTGTCAATGGAGCGCGAAAATTCAATGAGGAAGGCCAGAACC +CCGTGGGATTTTGAAATTTTTTTCAGATATGACAAAAAATGACAATGCCA +AATTTAAAATGCATTAATAGCGGACAGAGGAGAGAATAAGTAGGGAAAAA +AGAAGAAAAAAAAAGACGGGCGGCGCTTCTCTCATAAAATAGTAATTGGT +CATTCTAAGTGTGCCTATTGTTCCTTATTTTTCGCTTTTTCTTGGCTCAT +TTTAGCTCAAAAACCAAATGTCGAAGAGTTTCTAGAGAACTAGGCCACGA +GAGGGAAAAACTAGGTCACTGGAATTTCTACGCTAAAAATTTGGCCCAGC +ATATTTACCTACGTGGCCGTAAAAGAGAAAGTTCGGCCACTAACTTTTTC +GGGCTATCAAAAAAGGTTTACCAAAACTTTATAAAAAAGAAACCACTTGT +GGAAAATGGCCGTGCATACTAAAGTTCGGCCACAAACTTTTTTAAAACTT +TTTTGCTCAAATTGAAGTACAGGACCACTTCGATGGGCGGTTTTCACTAC +TTGGCCGTGTAAGAGAAAATTCGGCCACCAACTTTTATTCAAGGCCACAC +CTCCAAATCCTTCAAAGCTTCAAAAATTTTCTACGTGGCCGTGTACGAAA +AAATTCGGCCATCATCTTTTTTTTGGGTCACGTCGCTAAATTCTTCAGAG +CTTCAGAGATTTTCTACGTGGCCGCGGGAGAGAAAATTCGGCCACCAAAT +TATTTTTTAAGGCCAAACCTCCAAATCCTTCAGAGCTCATATTTTCTACG +TGGCCGTGTACGAGAAAACTCGGCCACCAAATTAATTTTGAAGGCCCCAC +CTCCAATTTCTTCAGAGATCATATTTTCTACGTGGCGTGGCCGTGTACGA +GAAAATTCGGCCACCAAATTCTTTTTTAAGGCCAAACCTCCAAATCCTTC +AGAGCTCCTATTCCCTTCGTGGCCGTAGGAGAGAAAGTTCGGCCACCAAA +TTAATTTTTTAAGGCCACGTCGCCAAATCCGTCATATTATGAGGAAAAAA +GTAAAAAATAATACTAATAACAATTGAAGAAAAATTGAAAAATATAAAAA +TATAAGAGACTAGTAAATGAAGAGGAATGAAGAGCGAATAGGAGGCCAGA +GAGAGAGAGGGGAGGAAATTGTGCTGTTGAAGACCTTGAAGAGAGAGGCA +CAGCAATAGATAAATTGAATTACAATGGGGTGAGGAGCCGGGTCTGTCCC +CCCCCCCCCCCCCCCCCAAAAAGTGCGAACAAATGTGATGAAGCGCAGAG +AAAATCGGGAGGGGGGGGGGGGGGGGGGGCTGTGCTCCAGGTTGATTGAG +CCAAGTGTTTCGACTTTTTGCCGAACACGTTGAGCTCTCCGATGAGCCGG +GCTCACCGGAGAGCCCATGGAGCCCCATGAAGCAGAGGGTCACAAGGTTT +GAGACTTCCGGGAGATAGACTGCACAACGTACAAAGACTAAAACTTTTTT +TTTTTGCAGGTGAGGCATGAAATTTAGAGCTTTTTTGCAACAAAATCCAA +CATTTTTTTTGTAAAATAAGGTCAAATGACGAGTTTTTTCTATAATCTGG +CCGAGAAAACCTGAAAATTGATCTACCTTGATAAACAGGGCGCGCGGGTG +TAAAGCGGAGCGTCGTTGTGAAGCTGAAATATTGTTGAACGAATTCTGGA +ATTCTGGCTTCCCTCATAAATTGAAATGGAAGAGTTTTTGCCAAACTAGG +CCAGGCCATATCTGGGGTAGATTTACGGCGCGCTTTTTTTTGGGCATAAA +ATTCCCGCATTTTTGTAGATCAAACCGTACAGCCTGACATCACGTGTTTT +TGGCCCCGCTAAACAGGGCGCGGGTGTAAACATTCAAATATTATGTGAAA +AAAACGCATGAATTTGAACAATTTTGCGTAGAATTTTTGAACAAAAAAAT +ATATGAATAACTAATGCAAAACGAACAGAACTAAATTTTCTGAAATTGAA +GAGAGGGGAAAAAAATTGGGAAATTTTGGACATTTTTCCCAACGACGCAT +TGAGCCACCGGACCCAGATGAGCAAATAAACCGGGGGAGCAGGAGGAAAA +TGGAAATGACAAAAAAGGCGGAGGAGGAGAAGCTCACGTATTTAGTATCC +CATAGAAAGAGTCAATTTTCATAAACTGCGACAATTGACTACCAGGGAGC +AGTTGCGCTCTACCGACAAAATCAAGCAAGGTCCACGTAGTGCCAGTCTG +TCCCATTACGGTTTGGTCTACAAAAAAATGCGGGAAAATTTGGCTCAAAA +AAATGTGACGTCAGCACACTCTTAACCATGCGAAATCAGTTCCCGCATTT +TTTGTAGATCTACGTAGATCAAGCCGAAATGAGACACTGTGGCACCACGT +AAAGGTCCTTCTTACAGAGTGCGCTAGAGCGCGATTGCCAGTTTTCTAAA +AAAAAAATCTAACATGAAAAAATGTGCGTAAAAGCGGAGTTTCGTTGCGA +TGTAAAATTTGGAGAAAAATTGAAAACTGAGGAGGAAGAAAAAGTTAAAT +GCCTCTTCGCAAGACCGGGATTAAATAGTATTTTACTTCTTTTTTTTCCT +TCAACTTTTTGAAATTTTTGGCGGAATGGGGAAAAGGAAAAAAAAACGAA +ATCATCGATAAAAACAACACTAGGCCACCCAGAAGTAAACCTAGGCCACC +TTATAGAAATTCTAGGCCACCCTGTAAGAATTTTAGGTCATCGCGTAGAA +AAGCTAGGCCACCCTGTTGAACTGCTAGGCCACGCTGTTGTACACTTAGG +CCACTCTGAAAAAAATCTAGACCTCCCTGTAAAAAATCTAGGCCACCTTG +AAAGAATTTTAGGTCATCGTGTAGAAATTCTAGGCTACGTTTTCGAAATT +CTAAACCACTCTGTAGAAAACTTGGGTCATCTACCGAAATTCTAGGCCAC +CCTGCAGAAAACTTAGGCCATCTACCAAAATCCTAGGCCACCCTGTAGAA +AACTTAGGTAATCTACCAAAATTCTATGCCACTCTGTAGAAAACTTAGGC +TACCTCGTTAAAGTTCTAGGCCACCGTGTAGAAATCTTAGGCTACCTCGT +TAAAATTCTAGGCCACCCTGTAGAAAAATTAGGTGATCTACCAAAATTCT +ATGCCACCCAGAAGTAAACCTAGGCCACCTTGTATAAATTCTAGACCACC +CTGTAGAAAACTTGGGCCATCTACCAAAACTCTAGGCCACCCTGTAGTAA +ACTTAGGTCATCTACCAGAAAACCTAGGCTGTTCCGTTACGCCGCATGCT +AAAAAATCCAGACCGCCCTGTAGAAAATCTAGGTCACTCCCTACAAAACC +TAGGCCACCCTGTGAAAATTCTAGGCCACCTTGTGAAAATTCTAGGTCCT +TCCCTAGAAAAACTAGGCCACCCTGTTAAACTCTTAGGCCACTCTCTAGA +AAAACTAGACCACCCTGTAAAAAAACCTCGATGGCCTAGAAAAAAGCTTG +GGAAATTTTGTGGAAAAAGTTAAAAAGGAAAATCTAGGCCACCCACAAAA +AAGCTAGGCCACGTGGCAAGAAACTTCAAAAGTGACGAATGATGAGACGA +GAGGCGTATGTTTGAGGTCAAAAAGGGTGAAAAAGAAGAAAAAAAGACGA +GGCGGAATGTTTCTTTTTTCTTTTCAAACTGAGCATCTCCCCGTCTCTCG +GCCACCAATACTACTACTACTACAACTATTCATGTTTTTAGAGAGCCCCC +CCCCCCCCCCCCCCCGGCCACTTGTCAGCTTTTTTGACGGAAGATGCCAA +CACACAATCGAAGGGGAAACCATAAAAACCCGCCAACGACACTCCGTTTT +AAGGCCCGGATGCCCAGATGTATGAAAAAATGGAGAAACTGAAGGCTTCT +CCCCTCCCCCCCCCCTCCCTTAAAAAATGATGAATTACTAATGGTGCCGC +CCATGTTGTGTTTGTTGCTTTCCCCCCGTGCTCCATCATTGGGGGAACAT +GACAGCCACCTTGAGTTACATGTATACAAAGTGACAGGAGGGAAAGATAG +GGGTAAAAGTGCAAAAAGGGAGTCGCGGGTTCGAACCAGTGAGGGTTTGC +AAAATTTGGGCTGTGCGCGGCGCCTTAGACTACTGCGCCACGCGTGCGAA +CTGTAAATAGAACTGTCAGGCTAAATACGAACGTTCGGTTTTTAAACTCG +ATTGGCAAAAATGAAATGAATGAATAGACAGGAATGACTCATATTTTTTG +CATAAAGGGGCCTGGGTCTGGGAACTAGGAACTAAACTAAATGAAGGAAA +ATTGAGGCATCTCCCGCCAATTGAGTAGAAAAGTGATGAGAGCGGCAAAA +AGAGATAGGGGGGGGGGGGGGGACCCATTCATTTTACACTGGACACCACA +CTCCCCACTCTCTCTTTGATGACGAAGGACATGAGTACGAACTCGCGAGC +ACAGAAATACGACACGTTTTCTTCTCATTTTTTTTTTGCAAAAGGTGATG +ATGATAATCGCTTCTAAACGAGGGGAAGTGTACTAAATAATAAAATTGCG +AGTGGATATTGGATTTTTTCGTTATTTTCCGCTCTGAAAAACCTGAAAAT +CAGTCGGAAATTCGAGTTTTGGCTAACTTTTTGTAAATTTTGTTTAAAAA +ATCACTTTTCGCTGCGAAACCACCTGAAAGGATATAGTGGGCATGCGCCT +TTGAGCGCTACAGTGGAGGGAATGCAGTATCTCTTGGAAGTCAGATTTGC +CGATTGTTTTAACGCAAAACCTGAAGTATAGAAAAGAAAACATGAAAATT +TATTGAAAATATAATAAATAAATTAAGCAAATTACATATCAACTGAGAAA +ATCACCTGCTGGAGAGAAAAATTGAAAAAAAACCAGAAAATCGGCAGGAA +AGCAATAATCGAGCAAGATCCTTGGAATTTTTAGGTGCAGATATCGGTTC +AAAAGGAAAACACTATTTTAGCTTTTTTTTGTCTTAAGTTCAATGTTATA +ACACAAGAAACGATATTTCTATGGCATAATTTTTGGGTCCTAGAAAGCAG +ATTTTCACGTAGATTGGTCTGAAAATAATATTGGAGTAATTTTTATTATT +TTTCCTTGCATTATAAAATTATTTCTCTTTTATTTCTCAATTCTAAAACT +ATTTTCCCGTTCTTTTTCCCTATATCTCGCGCTTCTCCGGGATGTTGCAT +CCCATTCGACGTGAGGAGTCTCGGAGTCTCTCCGCTGCAAACGCGCTCCA +CTGGACACAGAAAATAAAAAAAATAAATTGTTCGAATTCATGCAATTTTC +TTCTAATTTTCTCCGTTTTTCGTCAATTTTTCTGCAAAAAAGACAAAATT +TACAGGTAGCAATGTATGGGGGATATCAAGGAGCATATGGGTAAGGCTAA +AAACTAGGAAAAACTAGGAAAAACCCATGGAAAACTAAAAAACTCTGAAA +TTTCAAATTTCAGAGCCTACGGGAACCAGCAGGCGGCTTCACAACCATAT +CAACAGCAAGGACAACAGAATTATTACCCGTAACTCGAAAAAAATTCAAA +AAATTCGAAAAAAATCAATTAAAAAGATCAAAATTTTCAATTTTTTCAGC +GGAGTGAACCCGTACGCGGCGAATTATGGGTACGCGGCAGCACAGCTTCC +ACCACCACCTCCACCACCGCCAGTATGCCTTAAAATTGAAGAAAATTTGA +TTAAAAAATTGAAAAAATGCAAATTTTCAGGTAGCCGATCCATACGCCGG +CCAATTTCAGACATTTCCCTCTCAAGCCACCAAAATAGCGCCGAATCCCT +ACTTTAAAAAGCCACAAAATCAGCAGCAACAGGGTATTTTCGCCGTGTTT +TCTGGCTGCAAAAAAATTAAATTATCGATTTTTTTAAGGCTACGACGCGG +CGGTCTACAATTACGCTCAGCAGAATACTCCGAAAAATTGGAAACACGGC +GGCGGCGGCCGGCAGGGGAGACAGGGATCTGGCGATAATAAACAGTACTA +TTGTGAGGTAAAAAACGTGGAAAAAATCGAGTAAAACACAGTTTTTTATT +CAAAAATCTGAGAATTTCCGTAAAAAATCCTGTTTTTTGACGTTTTTAAG +CAAAAAAATCGGGTTTACAGCACTTTTAGAGATAAAAAAGTGGAAAAAAT +CGAATTTTGGACCTGATTTTGGGCTTGAAAGCATGATTTTTATCAGAAAA +CACAGTTTTTTTTGTCAAAAATCTGAGAATTTCTATGAAAAGTCTTGTTT +TTGGATGTTTTTAAGCAAAAAAATCGGGTTTACTTCATTTTTAAGGGTAC +AAAAGTGGCGAAAAGTTAATTTCTTGTAAATTTTCACATGCAAAATATTG +AATTTATTAGGTTTTTTCCAGAAAAAGTAGTTTTTTTGCTACATTTACAG +GCTTTTAAAACTGAAAACATGTTAAATTTCGAGGAAAATCGACAAAAAAA +CCCATCTTTTGGCCAAATTTAAATTTTTGAGCAAAAAAATCGGGTTTGCA +GCACTTTTAAAGATAAAGAACTGAAAAAATTCAATTTCTTGGAGAAAAAA +GCAATTTCAACACCAAAAATAGCAAAAAATTCGGGAAAATTGTCAATTTT +TTTGAAAAAAAACTTGAAAAACCGGATTTCCAGTTGTTTTTTCATGATTT +TTTCTCAAAAATCTCATAAAAATTGGAAAAAATTCCGAATTTTTTACTGA +AAATTCGAGAAAAATTGAAATTTTTTCAATTCCGATTTAAAAAACATTTT +TCGCTAAAAAATTGAATATTTTTAGGCTTTAAAGTTCGGAAAAGATGCTG +AAAATTCGGAATTTTCTGTTTAAAGAAATTTTTTTTGTGCTGAAAAATTG +ATTATTTTTATGCTCCAAAAACTGGAATTTTTGAAAAATTCGGACAAAAA +AATTTTTTTATTAGTTTTTAAATTTTTTTAATTTTTCAACCCGCTTTTTG +CCCAAATTTAGATTTTTTATTTAAAAAATCGGGTTTACAGAACTTTTAAA +GGTAGAAAAACTGCAAAAAGTTAATTTCTTGGATATTTTCAGGCTTTAAA +GTCCAAATAAACATGTTGAAAATTGAAATGTCTTCAATTTCGATTAAAAA +CATTTTTTTCGCTAAAAAATTGAATATTTTTAGGCTTTAAAGTCCAAAAA +ACAAGCTGAAACTTGAATTTTTTTCATTTTCAATTTAAAAAAATTTTGCT +CAATAGATTTTCAGGCTCCAAAACACTGGAATTTTGGACTTAATCGAAAA +TTCGGACAAAAAAATATTTTTATTAGTTTTTAATTTTTTTAATTAAAAAA +AATCCCAATTTCCAGGTGTGCAAAATCTCATGCATTTTTAGGGTTAAAAT +TGGAAAAAATTATCAAAAATTTTCGAATAAAAATGGAATAAATCTGATTT +TTATAAATTTTTCGAAAAAAAAATGATTTGTTGGCAAAAATTAAATATTT +CAGGCTTTGGAAGTCCAAAAACCATGTAGAAAATTCGGAAAAAATCGAAA +AATTGGAAGAAAAAAATTTTTAATTTCAAAAAATTTCAAAAAAATATTTA +AAGCTGGTGTAGTCGACTTTATTTATTGCTTAAGTAGACTCTAAATTGCC +TGGAAACACCGAATTTCATAATGAAACTTCTTGAAAACTTCTCAAAAAAA +AAAGTTATGACGGCTCAAAAAATGGACTAAAATTAGTTAAAATTTGAAAT +TTGACCGACTTTTCAAGCGGCTGGAAACATTTTTTCTTTGAAATCACCGT +CAAATTTTGAGTATACTGTGCAATTATGTTGCGTTTTCAACTTCATTTAG +GTATTTTAAAGTCAAAGGGCGGCGAGATTTTTAATTTTTTGAACCAAATC +TCGCCGACCATCGACTTTAAAATATCTAAATCGAGTTCAAAACTCAAGAT +AATTACATTATATAATCAAAATTTGACGGTGATTTCAAAAAAAAGGTAGT +TTCTAGCCGCTTGAAAAGTCGGTCAAATTTCAAATTTTAACCAATTTCAG +GCCATTTTTTTAGTCGTCATAACTTTTTTTCGAGAAGTTTTCAAGAAGTT +TCATTATGAAATTCGTTGTTTTCAGACAATTTTGAGTCTGATAAAGCAAT +AAATAAATTCGACTACACCGCCTTTAAATATTTTTTTGAATTTTTTTAAA +ATTAAAATTTTTTTTTCCAATTTTTCGATTTTTTCCGAATTCTCCACGTG +GTTTTTGAACTTCCAAAGCCTGAAATATTCAATTTTTGCCAACAAACTAT +TTTTTTTTCGAAAAATTTATAAAAATCAGATTTATTCCATTTTTATTCGA +AAATTTTTGATAATTTTTTCCAATTTTAACCCTAAAAATGCATGAGATTT +TACACACCTGGAAATTTGGAATTTATTTGTAGAATTTTTGGATTTTTTAA +TTAAAAAAAAGTTTTTTCCGATTTTTTTTCCGAATTTTCAACATGGTTTT +TGAACTTCCAAAGCCTGAAAATGATTTTTTGAATAACTTTTTGATTTTTT +CGGTTATTTTTGGTTTTTTTTTTTAATTTAAAAAAATTAAAAAATTAATA +AAAATATTTTTTGTCTGATTTTTCGATTAAAGTCCTGAAAAAAAAATCTA +TTTTATAGCAAAAAATCCTAATTTTCAGCCTGTTTTTTTGCCTTGATATT +CAATTTTTACAAAAAAATTTTTTTTGGAGTTTTTTGCCTAAAAAATCGAA +AAAAAAAATCATCTTTTCTCGAATTTTTGGAAGAAAAATTAAAAATTTCA +AAAATAATTCAATTTTTTGGTGTTTTTCGGGTTTTTTACATAATTATTTT +TTAATTTAAAAAGTTTGTAAATAACCATAAAAATTTATAATTTTTTAAAT +TAAAAAATAATTATTAAAAAAAACTCGAAAAACACCAAAAAATTGAATTA +TTTTTGAAATTTTTAATTTTTCTTCCGAAAATTCGAGAAAAGATGATTTT +TTTTCGATTTTTTAGGCAAAAAACTCCAAAAAAAAATTGTTTTGTAAAAA +TTTAATATCAAGGCAAAAAACAGGCTGAAAATTCGGATTTAAAAAAATAA +TTTCTTGCTATAAAATAGATTTTTTTCAGGACTTTAATCGAAAAATCAGA +CAAAAAATATTTTTATTAGTTTTTTAATTTTTTTAAATTAAAAAAAAAAT +CAAAAATATCCGAAAAAATCGAAAAATTATTCAAAAAATCATTTTCAGGC +TTTGGAAGTTCAAAAACCATGTTGAAAATTCGGATAAATAAAAATATTTT +TTTGTCCGAATTTTCGATCAAAGTCCAAAAACTTCCAGTTTTTGGAGCCT +AAAAATAATCAACTATTGAGCAAAATTTTTTTAAATTGAAAATGAAAAAA +ATTCAAGTTTCAGCTTGTTTTTTGGACTTTAAAGCCTAAAAATATTCAAT +TTTTTAGCGAAAAAAATGTTTTTAATCGAAATTGAAGACATTTCAATTTT +CAACATGTTTATTTGGACTTTAAAGCCTGAAAATATCCAAGAAATTAACT +TTTTGCAGTTTTTCTACCTTTAAAAGTTCTGTAAACCCGATTTTTTAAAT +AAAAAATCTAAATTTGGGCAAAAAGCGGGTTGAAAAATTAAAAAAATTTA +AAAACTAATAAAAAAATTTTTTTGTCCGAATTTTTCAAAAATTCCAGTTT +TTGGAGCATAAAAATAATCAATTTTTCAGCACAAAAAAAAAATTTTTAAA +CAGAAAATTCCGAATTTTCAGCATCTTTTCCGAACTTTAAAGCCTAAAAA +TATTCAATTTTTTAGCGAAAAATGTTTTTTAAATCGGAATTGAAAAAATT +TCAATTTTTCTCGAATTTTCAGTAAAAAATTCGGAATTTTTTTCCAATTT +TTATGAGATTTTTGAGAAAAAATCATGAAAAACCAACTGGAAATCCGGTT +TTTCGAGTTTTTTTTCAAAAAAATTGACAATTTTCCCGAATTTTTCGCTA +TTTTTGGTGTTGAAATTGCTTTTTTCTCCAAGAAATTGAATTTTTTCAGT +TCTTTATCTTTAAAAGTGCTGCAAAAAACTACTTTTTCTGGAAAAAACCT +AATAAATTCAATATTTTGCTTTTTTTTTTCCTTTTTTTGTGAAAATTTAC +AAGAAATTAACTTTTCGCCACTTTTGTACCCTTAAAAGTGCAGTAAACCC +GATTTTTTTGCTTAAAAACTTCCAAAAACAGGAATTTTCGGGTTGAAAAA +TTAAAAAATTTTAAAAACTAATAAAAATATTTTTTTGTCCGAATTTTTCC +AAAATTTCAGTTTTTGGAGCCTAAAAATATTCAATTCTTTAGCGAAAATT +GTTTTTTTTTCAAAAACTTCCTACGACTACACCACCTTTAAAAAACCAAA +TTAAATTAGAGAAATTCTTCAAAAAATCCCCAAATTTCCAGGTTTGCAAA +ATCTCGTGCGCCGGCGGAATCACATACAAAGAGCATCTGGAAGGTCAACG +GCACAAGAAAAAAGAAGCTATGGCAAAACAGGGCATACCGAGCACTTCCC +TCGCTAAAAATAAGCTTTCCTATCGTTGTGACCTATGTGACGTCACTTGC +ACCGGTCAGGACACGTATTCGGCTCACGTTCGTGGCGGAAAACACTTGAA +AACTGCACAATTGCACAAAAAATTGGGAAAACCGGTGCCTGAAGACGTTC +CGACGATTATTGCTCCGGGAGCCGATGGGCCGACGGAGACGAAAGCCAAA +CCGAAATGGCATCAACAAGCTCTGCCTGGCGGGAAAATGTGCGGAATTTG +AAAAAAAAATTTTGGAAAAAAACGAGAAATTGCACATTTTTAGGCTCAAA +AATCTGAAAATTTAGCAAAAAAATTTAGAAAAATTTGGATTTTTAGAAAA +AAATTAGCGTTTTCTCGCCAAAAATGCTCAATTTTCGATAAAAATGTACA +TTTTTTTTTTTGGATTTTTAAAAAAATATTTTAGGGCTCAAAAATCTGTA +AATTCTGCAAAAAAAATTCTAAAAAATTTTGAAATTTTGAGATTAAATCT +TTCGCCAAAAAAACGAAAAATTTGAATTTTACAACTTAAAATGCTCAATT +TTCGAGGAAAATGTAAAATTTTTTTTCAATCTGAAATCTGTAATTTCTTT +TAAAAAATCGATTTTTTAAAAAATTTCTACTGTAAAATTTTGAAAAAGTT +GAACGACGTCAAAAAGAAGGTAAAAATCCGAAAAAAAGCAACAAAGAACT +AGGAATTTTAAAATTTCCAAAAATGGTCTGAAATTTTGAGAAATCAAAAA +AAACAAGTTTCTTTCAGTTTTTTTTAGAAAAAAAAATCGTATTTCAGAAT +TTAAATTTAAAGCATATTTTAAACTTTTAAACGCAAAAACATTATATTTT +TCTTTAAAAAAAAAACAATTTAAACGCATTTATTGCTTTATTAGACTCAA +AATTGTCTGAAAACACCGAATTTCATAATGAAACTTCTTGAAAAAAAAAA +GTTCTCAAAAAAACAAGTTATGATTGCTCAAAAAATGGCCTCAAATTAGC +GGAAATTTGAAATTTGACCGACTTGTCAAGCGGCTGGAAACTAGCTTTCT +TTTTTGAAATCACCGTCAAATTTTGAGTATAAAATGTAATTATCTTGCGT +TATCAACTTGATTTAGGTATTTTAAAGTCGATGGACGGCGAGACTTTTAA +AGGTGGAATACCGAAATTTAAGACTTTGCTTTTTTAGACCGAAATTTTGG +CAACTTATCGGTGCGACACTGCAAAATTGCCAAAGTTTGAGATTTTAGCT +AAAAATAGGCCGTTTTTTCCAAAAACTTTGATCGGCCATAACTTTTTTTT +GGAAAATTTTCAGAACGTCTCATTACGAAATTCGGCAGTTTTGGGCCATT +TTGGGTCTAAAAAAGCAAAGTCTTAAATTTCGGTACTCCACCTTTAAAAA +TCTCTCCGTCCATCGACTTTAAAATACCTAAATCAAGTTGATAACGCAAG +ATAATTACATTTTATACTCAAAATTTGACGGTGATTTCAAAAAAAAAAAG +TTTCCAGCCGCTTGGCAAGTCGTTGAAATTTCAAATTTCCGCTAATTTGA +GGCCATTTTTTGAGCAATCATAACTTGTTTTTTTGAGAACTTTTTTTTTC +AAGAAGTTTCATTATGAAATTCGGTGTTTTCAGACAATTTTAAGCCTAAT +AAAGCAATAAAAAATTAGAATATACCACTTGAGAAAAATTAAAGATTTTT +TTAAGTTTAAAATAAAAAAATATGTTTCTTTCCGATTTTTTGATAGTGAG +ATAAAAGTTTTGGCGGGAAATTCAAATTTTCTAGAAAAAAAATTGAAAAA +ACTCATAGTGTATAACTGCACAGAACGGCTGGTTATGAGTATAGGTGTCA +AAGTGTCTCATTTCGGTTTGATCTACAAAAAAAGCGGGAATTTTTCCCAA +AAAAAGTTACGTCAAGATGCTCTTTTTTATTAAATTTAATTAAAAAAGAG +AGCATGCTGACGTAACTTTTGAGGAAAAATTCCCGCTTTTTTTTGTAGAT +CAAGCCGAAATGAGACACTTTGACACCACCACGTTCAGACAAAATCCCCC +ACTAAATATTTGGTAAAACTAGTTCAAAACCCGACTGTAGTACATTTTTG +GCAGAAATTTGAAAATTTCAACTTAAAACCGCTCAATTTTTCCAAAATTT +CACAGTATGCATTTCAAAAATTATGGTTTTGTTTTTTTTTAATCGAAAAA +TATATTTTTAACAAAAATGGACCATTTTTAGTTGTAAAATTCAATTTTTT +GGCGCTTTCAAATTTTTAGGCAAAAAATGTGCAATTTTCTCCAAAAATTT +TCAGATTTTTTAACCCTAAAATTAAAAAAAAATCTAAAATATTAGAATTT +CCCGCTTTTTTTTTCAAATATTTCAATTTTTCCAATTAAAACACAAAAAT +TGAGCATTTCTAGTTGGCACATTCAAATTTTTAGGCGAAAAATTTGATTT +CAAAATTTCAAAATTTTTCAGAATTTTTTTTTGCTAAATGTTCAGATTTT +TGTGCGCAAAAATATTTGAAAAATCCAAAAAAAAAACTCGAAAATTGAGC +ATTTTCAGTTGTAAAATTCAACTTTTTAGGCGGAAAAATGTGCAATTTTT +CCCCAAAAAATTCAAATTTTCCAACTTTTCTCTCAAATAGCACAGAAACA +GAAAATTTGTTATACCTAATGTGGAAAAACTGAATTAAAACAGATTTTTT +GAAAAAATGCACTTTTTTTTTTGCCAAAAAATAGTTCAAACCCCGACTTT +TTGTTCCAGAGTTTCCAGGTCATCGGTATCATCGCATAAATAAATTTTGC +GGGAAAAATTCCCGCTTTTTTTGTAGATCAAACCGAAATGAGACACTTTG +ACACCCCACGTTCAGACAAAACCCCGACTTTTTTCCAGAGTCATCGGCAT +CAACACGGTAAACTTCGTCGGCGGTACGAAGCTCAACTCCACCGGACAAC +TCGAGGAGAAGAAGCGCGAAGTTGCTGCCGCCGTGAGCTCAGTCGGCCGG +AAAACCGGTGGTGCTGCTGCAACGACGACGATTGAGGTGGAAGACGAGAA +GCTCCGAGCAATGATCGCCGCCGAGGAGGTACAACCTGTCGGCGAGGAAC +ATGTCACTGAAGAACGTGATGCTACCGGAAAACTGGTGCAATTCCATTGT +AAACTGTGCGATTGCAAGTTCTCCGATCCGAATGCCAAGGAGATTCATAT +CAAAGGTCGTCGACATCGTGTCAGCTATCGGCAGAAGATTGATCCGACGT +TGGTGGTGGATGTGAAGCCATCGAACAAAAGATCACAGGAGAAGCGGAAG +AATCAGCTTCCAGCAGTTCATGGTGTGTGGATTTTTTAACAAAAAATCAA +CAACAACACAAAAAACATGTTCCAATAAAATTGGAAAAAAAAATTTAAAA +AATCCCAAATTTCCAGGTGTCTAAAAATCTCATGCATTTTTAGGGTTAAA +ATTGGAAAAAATTATCAATTTTTCGAATAAAAATGGAACAAATCTGATTT +TTATAGAAAAAATAATAATTTGTTGGCAAAAATTGAATATTTCAGGCTTT +GGAAGTTCAAAAACTATGTAGAAAATTCAGAAAAAATCGAAAAATTGGAA +GAAAAAAATTTTTAATTTGAAAAAATTCAAAAAAATTTAAATTTTTTTAA +AAATTGAATTTTTGAGTAAATAATTTTTTGCTATAAAATTCAATATTTTC +AGGCTTTCAAGTCCAAAAACCGTGTTGAAAATTCCGAAAAAATCGAAAAA +TTGGAAAAAAAATTTTAATTTAAAGGTGGACTACGCTCAGTGGGGGGAAA +TTGCTTTAAAACATGCCTATGGTACCACAATGCCCGAATAGCATAATAAA +AAAATTTTAAAAAATATTCTAGATTTTATATGATTTTTTGAAAATTGGAA +AAATCACAGTTTTTGCCTAATTCCTATTTGAATTACCGCCAATTGGATTT +GTTCGGTGGAGCGCGCTTGCATGTTTTTAAATTTATTTATTTTATTTATT +GTTATTTTCCACCGATTTTTAATGTTTTCGATGTATTTTTGTTTGAATTT +TAGAGAAAAAGTCAAAATAAATACTAATTATCGATTAAAAAGCACGCTTA +CAGGCGTAAATCAGTGAAATTAATTAACTCAGGTTCGAAATCGTTTAAAA +TCGTTACTTTTTCATTTTTACGCCTGTAAGCGTGCTTTTTAGTCGAAAAT +TTGCATTTATTTTCACTTTTTCTTTAAAATTCAAGCAAAAATACACCGAA +AACATGAAAAATCGGTGGAAAATAACAAAAAATAAAATAAATAAATTTAA +AAACGTGCAAGCGCGCTCCATCGAACAAATCCAATTGGCGGAAATTTAAA +TGGGAATTAGGCAAAAACTGAGATTTTTCCAATTTTCAAAAAATCATATA +AAATCAGAAAAATTTTTGTTTTTTTTTTAATTTTTTTAAACATGACATTC +GGTCATTGTGACCCCATAGGCATGTTTTAAAGGTGGTGTAGTCGAATTTT +TTTTATTTCTTTATTAGACTCAAAATTGTCTGAAACCCCGAATTTCATAA +TGAAACTTCTTGAAAAATTTTCAAAAAAAAGTTATGGCGGCTCGAAAAAT +GGCCTAAAATTAGTGAAAATTTGAAATTTGACCAACTTATCATTGTCGCA +TCGGCTGGAAACAATTTTTTTTTTGAAATTGTCGTTCAATTTTGGGTGTA +CAGGTCGATTATCTTGCGTTTTCAGCTTTATTAAGGTATTTAAAAGTCGA +TGGACAAATGGATCTGTCAAATTTTTTACCAAATCTTTTCGTCCATCGAC +TTTTAAATACCTAAATAAAGCTGAAAACGCAAGATAATCGACCTGTATAC +CCAAAATTGAACGACAATTTCAAAAAAAAATTGTTTCCAGCCGCCGCAAC +AATGATAAGTTGGTCAAATTTCAAATTTTCACTAATTTTAGGCCATTTTT +CGAGCCGCCATAACTTTTTTTTTTGGAAAAGTTTTCAAGAAGTTTCATTA +TGAAATTCGGTGTTTTCAGACAATTTTGAGTCTAATTAAGCAATAAAAAA +AATTCGACTACACCACCTTTAAAGCAATTTCCCCACTGGGCGCTACTCGA +CCTTTAAAGGGGGAGTAGAGTTTGTGGGTATTTTGCTTAAATAGACTAAA +ATTGGTCCAAAACCACCGAATTTCATAATGAGACTTCACAAAAAATCTCC +AAAAATTTTTTATGGCCGGTCAAAATTTCGAAAAAAAAAGATGCAACTTT +TGCTAAAATCTGAAATTTCGCACACTTTTCTTTGTCTAGCAGCCGCTGGA +TTTTAGTTTTTCTAAAATTATCACCCTCTAATCCTTCTTTTTGGTAATTT +ATCTCGCGAAAATCCGTTGATTGAGACAACTTTTAGGCCGATAGGCATCC +AAAAATGATCAATCTAGGGATGCCTATCGGCCTAAAAGTTGTCTCAATCA +ACGAATTTTCGCGAGATAAATTATTAAAATAAGGATTAAAGGGTGATAAT +TTCAGAAAAACTAAAATCCAGCTGCTGTGACAAAGAAAACTGTGCGAAAT +TTCAGATTTTAGCAAAAGTTGCATCTTTTTTTTCGAAATTTTTTATTTTG +ACCGGCCATAAAAAATTTTTGGAGATTTTTTGTGAAGTCTCATTATCAAA +AATTCGGTGGTTTTGAACCAGTTTTAGTCTATTTAAGCAAAATACCCACA +AACTCTACTCCCCCTTTAAAGGAATGAAAATAATTATTGGAACGTTTGTT +TTTTAAAATGATTTTTTGTTAAAAAATCCACACCGACGAAAAATCCACAG +TTTCAGAGCCTCCGAGCTTCATGAAAACGCCGTGGTTCGCCCCACCGGCT +CCAGAAGGCCGGGAATTCAATATAGTAGATGATAGAACTATTAACGAAAA +GTACGCCGGGCTTAATCCGGGCGTCGAGTTCATTTCCAATGTGGATCGCC +TTATTTCGGATATCAATGAATCGCTGAAGTATGTGTCGGATAAGATCGAG +CGAGACGTTCGGAAGATCCCTGAAGACGTGGTGGAGCTTCCCACCACCAC +CACCACAACGGAGCAGCCGCCGCGGACAGTGTTGGGATGCTCGAGAGTTG +GAATTATTGCGAAGGGAACGTTCATCAAAGGGGATCGGTGTGCGGAGGTG +GTTCTGACGTGTACGCCGGTGCCGACTTCGGGGCTTGTTGAGCAGATTAG +GCGGTTGTTTGGAGAGTCGACGGTACGGGCCGGCCTTTTTTTTTTGCCGG +AAATTTGGGGATTTACCGGTTTGCCGGAATGTTTTTTTTTCCGGCAAATT +GCCGGTTTTCGCCGGAAATTTGAAAAACGGCAAACTGGCAATTTGCCGGA +AATTTTCAGTTTCGGCAAACTGCCGCGATTTGCCGGATAACAATCGTTTA +TTCCGGCAAATTCGGTAAGTTGCCGGTTTGCCGATTTGCCGGTTTGTTGA +ATTTAGCGGAAATACGGCAAACGGCAAGTTGCCGGTTTCCCGACTTACCG +GAAATTTGAAAAACGGCAATTCGCCGGTTTGCCGGAAATGTGAAAAACGG +CAAATTTCAGAATTCGGCAAGTTGCCGGTTTTTGCCGGAAATTTTCATCT +TAGGAAAATTGCCGGTTTTTGCCTGAAATTTTCATTTCAGGAAAATTGCC +GGTTTGCCGGTTTGTCGAATTTTCCGGAAATGTGGAAAACGGCAAGTTGC +CGGTTTGCTGATTTGCCGGAATTTTTAATTTTCGGCAATTTACCGATTAG +CCGGAAATTTTTTAACGGGATTTTTGATAAGACGGATTTTTTTCCGGCAA +ATTCGGCAAGTTGCCGGTTTGCCGGAATTTTTCATTTTTGACAAATTGCC +GATTCGCCGAATAATAATTATGCCAGAAATTTGAAAAACGACAAGTCGCC +GGTTTGCCGATTTGCCGGAAATTTCAAAAAACCGGCAATTTGCAGAATTT +TGCCGGAAATTTTCATATTCGGCAAAGCTACCGATTTGTCGGATTTTTTA +AACCGGTTTTTTCATAAGATAGATTTTTTTTCCGGCAAATTTCCGGCAAG +TTTCCGGTTTGCCGGAAATTTCAAAACCGGCAATTTTCCGAATTCTGCCG +ATTTCTTGTTTGACAATCTCAATAATTACACAGACATCTCTAACAATTGA +GCCGGATCCCGAGTCCCCATCAAGCCTTATCGTCACAGCTAACTACTTTC +CCAATATGAAATGTCGTATTCTAATCACTTCAGCTGTAGTTAGAAAAGAC +GATGATAGTATTGTAACTGGATGTGCTGCTGATAAGGATTTGTGCATTTA +TGCGCTCGCTTCTATTCGGAATACAAAGTGGTATGATGTGAGTTTTTTTT +AAATTTTACACATATTTTAAATTACTTTATCATTGGAAAATCTAATAAAT +TAGAGCCCAGTTTACAAAAAAATCTATATTTTTCCAAAAAAAAAATTGAA +AAAAATCGATAATTTCTCAAAAAATGTTTACAAAATCGAACTTTTTCCAA +AAAACTTTGAAAAAATCGATAATTTTAAATAACTGCTGACAATTAAAAAA +AATCGAAAATTTCTCAAAAATTGAAAAAATTAATATTTTTCCAGAAAAAA +TTGAAAAAAATCGATAATTTCAAATAATAGCTGAAGATTTTTTAAAAAAA +TCGAAAATTCGAACCTTTCTCAAAAAATTTGAAAAAAATCGATTTTTTTT +CCAAACTTTTTTTCATCAAAAATTTCTCAAAAATTTGGAAAAAATTAGAA +AAAATCAATATTTTTCCGAAAAAATTTGAAAAAAAAATCGATAATTTCAA +ATAATAGTTGCAATTTTTAATTTAAAAGCGAATTTTTCAAAGATTTTTCA +AATTTTTCACGTTTTTTTCTCTGGTTTTTTTAAATTAAAAAGCGTAATTT +TTACCGAATTTATCATTAAATTACATGAATTTTTCTGAAAGAGCGAGAAA +ATTAAAATAATTCGGAAAAAAAACAATTTTTTTCCAAATTTTCCAAAAAA +TCCCACAGAAACCACCAAACTTTACTTTTTTTTTTTAACAAAAAAAAGTT +TTAAAAAATATCAATTTCCGGCTTTTTTTCTGCCAAAAACTTTGGAAAAT +GTGGGTAATTTTGGCTTTAAAAGTTTAAAAATTGAAAAAATCAGGAAAAC +TTTTTGAATTTTCAGAGTCACTGCCAATACTTGAACTCGTGCCAATCAGT +TATTCGACTGCTCCGAGATTTGCGAAATAAATATCCTGAAGTTGCGTGTC +TTGATGATTATGTAAGTTTGAGAGTTTGGAAAAATATTCGAAATTTTTTG +TGAAAAATTTGGTAAAAGTTCAAAATAATGTATAATTCTTTAATTTTTTT +GCGAAAAAATTAATTTTTTGAAAAAAATTCTCTATATTTTTTGCTAAAAA +GTTTAAAAATCTAAATAGTATGAAAAAATATTTTAGCAGAATCAACAAAA +ATTGTATAAATTCATAATTTTTTGAAAAAAATGAAATAAATTTTTTTTTG +CGAAAAAAATATGCGAAATAGGTTTTTTTAATGCAGGAAATAGTTTTATA +GCTGAAAAAATGAGAAAAAACGGAGAAAATTGAAAAATTACGTACATTTT +GCAGATTTTTTGCTCACAATTTTGTAAAAATTTCGGCAAAAATATCACCT +TATTCAAAAAAAAATTGCTAAAAAAAATTTTTATAATACAAAAAAAATAT +GAAATCTATCTATTTTTTTCAAAAAATCTACCTAAAAATCTATTAATAAT +TTTCAATTTTTTTGTGAAAAATTTTGAAAAAATTCAAAATAATGTATAAT +TCTATAATATTTGGCGAAAAAAATTAATTTTTGAACTATAATTTTTTTTT +TGAAAACTAGTTTCAAAAATCGATATTTCACCTAAAAAAAACCCTAAAAA +TAGGCGGCAAACTATTTTTCCGGCAAATCGGTGGTAATTGCCGAAATTTT +TGGCAAATTGTGGCTTTGCAATTTTTTTTTTGTTGGAAATTTCAGATTTT +CAAATTTAATCGGCAAGAAAACTGAAAAATCCCGTTTAAAAAATTCCGGC +AAATCGGCAAACCGGCAAATTGCCGAATATAATATTAGGTCTCCTAATAA +GTTCCGGGTCAAAAATCATAACCTTGTTCGCTGTGTATCGATTTTTATGA +AACTTTGGGAGTTTATGTTATCAACCATGATCTTTCAATTGACAATAGTC +ACAAAATTTTTTGACCACCCCAAGTGCCCGTACTCGGAGCCAATTTTTTC +AGGCATTTTTCTGATCTCGCTTCTTTTTATCTTTGAATTGAGGTTTGTGT +GCGGATTTTGCTTTGTTTAGAATACATCATTAGAAAACGAGAAAAGTTTG +GAAAAAAATCTGTCCAAAAAATTTTTTTTGGTTGATCGTCAAAAAAGCTT +CAAAAAGATTTTTGTCGAAAATTCTAGATTTTTCATACAAAAATGATGTA +ACCAAGTGTAAATTATTTTTATACATACAAAACATATCAATTTAGTTCGA +TACACTAAAATGATAATAAAAAATATAATTTTTTCGGATAATTTTTGAGT +TTTTTGAATATTTCTTAAGTTTCAAATTTCAAACTAAAAGGATTTGTGTG +TGTAAAAATAATTTACACTTGGTTACATCATTTTTGTATCAAAAATCTAG +AATTTTCGACAAAAATCTTTTTGAAGCTTTTTTGACGATCAACCAAAAAA +AATTTTTTGGACGGATTTTTTTCCAAACTTTTCTCGTTTTCTAATGATGT +ATTCTAAACAAAGCAAAATCCGCACACAAACCTCAATTCAAAGATAAAAA +GAAGCGAGATCAGAAAAATGCCTGAAAAAATTGGCTCCGAGTACGGGCAC +TTGGGGTGGTCAAAACATTTTGTGACTATTGTCAAATGAAAGATCATGGT +TGATAACATAGATTCCCAAAGTTTCATAAAAATCGATTTGCAGCGAACAA +AGTTATGATTTTTGACCCGGAACTTATTTGGAGACCTTATACAATTTTTC +AACATTTTCCAGAAAATGGAGCTAATCGTCTCGAACATTATCGATTCATC +TCCAATGTCACTTGGACTATCCGATGCGTTTAAGCGAATTGTCGAAGCAC +TTGCATCTGGATATTTGTATTCTGGTAGGGGGGTTAACATTTAATTTTTT +TAAAATATTGAAAAATGAGCATATAAAAAGTTTTAAAATAATTCAAATTT +TTACAAAAAAAAAATTTTTTTTTTCAATTATTTTGCGTTTTTGTATGAAA +ATTGTGTTTTGAGAACGGAAACTCTTTTTTTTTAATTAAAATTTTGAAAA +AAAATCCTGGAAAAATGCTGAATTTTTCGGGATTTTTCGGAGCACATTTC +CAAGCTCTAAAAATTGCTGAAAATTGAATTTTTCAGAATTTTTTTGAATT +TTGCACAAAAATGACTATTTTAGGAACAAAAAAAACATTTTTCAATCACC +GAAAAAATGTATTAATTTTATATTTTAATGTTAAAAATACGGTATTTAAA +TTTTTTTTAAACGTTTTTTTTACACAAAAAGTTGAAATTAGGTTGAACCC +GGATAAAATTTAGAAAAATTAAAATTTCAATTTTTTTTGCGTTAAAATAT +GTTTTTTAAGGTTGAAAAAAATATGTTTATTCAAATAAACACATTTCTTG +AAAAGTTCTTTTGATTTTTAATATTAAAAAATTTATTTTTCAAAAAAAAA +TTTATTTATTTAATTTTAAAATTTTAAAAACTTTAAATGTGGTGTAGTCG +ATTTTTTAAATTGTTCTATTAGACTCAAAATTGACTGAAAACACCGAATT +TCATAATGAAACTTCTTGAAAACTTCTCAAAAAAAAGATATGACTGCTCA +AAAATGGCCTAAAGTTAGTTAAAATTTGAAATTTGATCTACTTGTCAAGC +GGCTGGAAACTATTCTTTTTTTTGAAATCACCGTCAAATTTTGAGTATAA +AATGTAATTATCTTGCGTTATCAACTTGATTTAGGTATTTTAAAGTCGAT +GGACGGAGAGATTTTTAAAGGTGGAGTACCGAAATTTAAGACTTTGCTTT +TTTAGACCCAAAATGGCCCAAAACTACCGAATTTCGTAATGAGACGTTCT +GAAAATTTTCCAAAAAAAAGTTATGGCCGATCAAAGTGTTTGGAAAAAAC +GGCCTATTTTTAGCTAAAATCTCAAACTTTGGCAATTTTGCAGTGTCGCA +CCGATAAGTTGCCAAAATTTCGGTCTAAAAAAGCAAAGTCTTAAATTTCG +GTATTCCACCTTTAAAAGTCTCGCCGTCCATCGACTTTAAAATACCTAAA +TCAAGTTGATAACGCAAGATAATTACATTTTATACTCAAAATTTGACGGT +GATTTCAAAAAAGAAAGCTAGTTTCCAGCCGCTTGACAAGTCGGTCAAAT +TTCAAATTTCCGCTAATATGAGGCCATTTTTTGAGCAATCATAACTTGTT +TTTTTGAGAACTTTTTTTTTTCAAGAAGTTTCATTATGAAATTCGGTGTT +TTCAGACAATTTTGAGTCTAATAAAGCAATCAAAAAAATATTCGACTACA +CCACTTTTTGGCCTATTTTTTTGGCCTAAAATTGAAATTTTTCCAGCAAT +CCTAAGCGATCCGTGTGAGACGTCTCGTCCGAACGTTCTCGATGCTTTAA +CCGACGAGCAGAAGCATTCGTTGACCGCCCTGGCACAGAATTTTGTTCGG +CAAATTGCATTCAATCAGATACATGAGGTATGCGGGGATTTTTCCTGTGC +AGAAAAAATGTTTAGTAAATTAGGGAAAAAAACCTTGAGTTTTTAAAAAA +ATCTGTGGAAAATTTGAAAATTTTCGCAAAAAAAAAATTATAATAAAATT +AATTTCAAATTTGCAAAAATTTCAATTTTTCAAATAAATTTTGAATTTGC +ATTCCAAAGTCACTAAAATGGAAAAAATTGCTAAAAATTGAATTAAAAAT +ATTTCTTTCCTGAAAATTTTTTATTTTCCGAGAATTCTGAAATTTCTTTT +AAAAAAAATTAAATTCGCGAAAATTTGAAATTTTTTGGAATATTGCAAAA +AATTGCTAAAAATTGAATTTTCCTATATTTCGATTTTTTAGATTTTTTAA +AAAAGTTTTTTTTCAGCCATTTCTGGCCGAAAATTGAGAAAAAATAGTCA +GAAAATTCAAAAAATTGCTTTTTTTTTTCAATTTTCCGAAATTTTGAATA +TTGATCGTCCGAAAATTTAAAAAATAATCGCTAAAAAGTTGAAAAAAAAA +TTATTGAAAACAATTTTTTTAAAATAATTTCTAGCAGATTCCATGAAAAA +AGGCTGAAATTTACGAAACAATTTTTTTTTCAAATTCAATTCAGAAAAAA +TTGAATTTTCTCTGATTTTGGGCCGATATTTATAAGATTTTTCAGAATTT +TCCAAAAATTCTTGAGAAAAATTGAAATTTTTCTGCCGTATTCCTATGAA +AACAGCGGAAAATTGAATTTTCCAAGTTATTTTTATAGGGAAAATTATAG +TGACTCTATAGAATTCATGAAAAATTCTGACAAAAAACCAAACATTTTGG +CCCAAAATCAGAGAAATTTCACAAAAAAAAATGCTGAAAAATTTAATTTT +CAGCAATTTTTTTTTCAAATTTTTTTAAAAAATAAATTTTCGTAAATTTC +ATAATTTTCAGTTTTTTTCCATTTTTTTAAAGAAAAATTATTTTTCCAAA +AATGTGCAGAAATTTCAGAATTTTCGGAAATTAAAAAAATTTTTAGCAAA +AAAAAAAATTTTTTAATCCAATTTTTATGATTTTTTCACTTTTTCGGTGG +TATTAGGACTGCAAATTCAAAGTTTCTTGGAAAAACATAAATTTTCAGAA +AATTTAAAATTAATCTGATTATAATTCGGAGCATAAAATGTGCTTTTATC +AACAACATTTTTTTTTTCGAAAAGTCAAATTTTTCCACATTTTTAAAAAT +TCCTTTTTTTTGTATATATTCGGATTTAAATTTTTTTTTTAACGGGTTTA +AGCTATTTTCTGAGCAAAAAAATTGAATTTTTCTAAAATCACCATTTTTC +TTCCAGATTCTCGGAATCGATCGTCTTCAAGACACTATTGATCTACCAGA +GGATGCTCCGATGCTCAAAAGACCACTGGAGAGCAATGAAAATGCTGAAA +ACGCAGAAAATCTCGACGATTCGCCAGTTTCCAAAAAGGAGAAGCTCGAC +GAGGAGCCCGCCGATATTTGAGCATTTTTATTTTAAATTTGGTTTAAATT +ATTTAATTTTCCTCTAATTTTCCTCATCTCTGTGTTTTTTTCTAATGTTT +TTGGGTCCCGTAGCGAAAAAACGAATGGCTCTCTTCGCGGTGAGACCTTG +AAATTTTTATTTTTCTACTATTACCTCCCCCTGCAAACGCGCTCTATTGA +CGACAATTCGCAATGGAGCGCGATTGCATATTTTGTTACTACATTTTCCA +CACTCATGTTCGATCTTCCACTAGGGGAGGGTCTCGCAGAAGGATTTTTT +GAAAAATTTAGATTTTTTTTTGAAAAATATTCGAAACAATTTTACGTTTT +TCGAAAAAAGTCCTAATTTAGCCTAAAAAATCAAAAATTGATAGGATTTT +TGAAAAAAATTCAAAAAATCAAAAAAAATTGCAAAAATTTTTATTTTTTT +TTTTGAAGAAAATTTTTTGTAAAAATTTGATTTTTTTGAAAAAGTTTGAA +TTTTGAATTTTTTGAAAAATTCTAAAAATTTTGAATTTTTTGAAAAAAAC +TCAAAATTTTTGGAAAAATTTTGAATTTTTTTGAATAAATTCAGAACTTT +TTGAAAAAATTCGAATTTTTCTGAAAAAAACTCAAAATTTTTGGAAATAA +TTGAATTTTTTCGAAAAAATTCGAGAAAATTCCTAATTTAGTCGGAAAAA +AATATTAAAAATTGATACGATTTTTGAAAAATTTTCCCAAAAATTCTATA +GAACTGTCCTGAATTTCTAAAAAAAATTCAAAAAAAAAACAAAGTTTTTT +AGAAATTTGCAAATTTCTTTAAAATTTTGGAAAAAAATCTATTTTTTGTT +GAAAAGTCTGCAATTTCTAAAATTTTCCTATTTTTTCTGCTAAAACCTGC +AAAAAAGGACCCTCCCCCTAACTATCTTGTTCCTCCAAAAACCGACAATT +CTTCTGTTTTTGTGTTATAATCGTCCTTTTTCTTGTTCAACAAAACAAAA +ATTAAAATAAATTAGAAGAAACGGTAAATTTAGATACGTTTTTGCCACGT +GGCCGAGGTTTTTCACGGCCACGTGGCAAAGGCCGGCAAGAAATGCGCTG +TGGTGGGAGAGGAAATTGATAGGGAAGAAGGAAGAAGAAAAAAAAAAGAA +AAAATCAATATTTTAAAGCCAAAAAAAATTTTTCTGAAAAGACTAGCAAA +ACAGGAATGGTTATGGGTAAAGCCGAGGTTTTGAAGGAGAATTTTTGGGA +AAAAAATTGCAAAAAAAAACGAATCGGTAGGGACGGACCGGCAAGATTGA +TTGAAAAATTGACGAAATTATCGATTTTTTGGCGGCGCACGGGGTTCTGG +CCTTCCCTCATAAATGGAAGAGTTTTTGCCGAACTAGGCCACCAGGTCAT +AACTGGGGTAGATTTACGGCGCGTTGCGTGTCGCGTCGCGGCTCAATTTT +AGTTGTAAAACTAAATGTATTTGTCCGTGTGAAGTACACGACTTTCCCGG +CGCGCGATTGTCAATGGAGCGCGAAAAATTAAGGGCCAGAAGTCCTCAGA +ACCCCGTGCAAAAATCGATAATCCCGTTTAGAATCGGTGAGGGCGTGGAA +AGAAAGAGATGATGAAACGGGAATGAGCAAAATTGCAAAAAAAAACAACA +AGAAATTCCAGAAAATTCCATAGAAAACGGGGTCTAAAGCACCAGATTAG +TGTTGTGCGTGGGCGTCCACTGTGACGTCATCACCTTCCACTTTTCGTAG +TTCGACTCGAGCTGATCGGAGAGCTCGCCGAGCTTGGCGAACTCGGTGAA +TGTGGCGAATGCTTCGCGGGCGAACATGTCTGTAAAATCAGAAATCAGGA +GAAAATTCAATTTTGCGCGTCAACTTTGATGTTTTTTTGTGTTGGTGAAC +TTCAGAAAATTGAATTCGCCAAATTTTACGAGCAAAATTGGGTTCTTTTG +GTATTTTTCGTGGAAATTTAACCAAAAATCGAGTTTTCTACGGAAATTTA +ACGTATTTTATCGATTTTTCATCAGAAATCAGTAGAAAATTCCGTTTTGC +GCGTCAATTTTGATGTATTCGGGTCGGTGAACTGCACCAAACTGGATTCG +CCAAATTTTACGAGCAAAATGGGGGTTTTTGCAAATTTTTCTTGGAAACT +CAGTCAAATATTGAATTTTTCGTGGAAATTTAACCAAAAATCGGGCTTGC +TACGGGAATTTAACAAATTTTATCGAATTTTATTTTGCGCGTCAAATTTG +ATGTTTTTTGTGTCGGTGAACTTCAGAAAACTGTATTCGCCAAATCTTAC +GAGCAAAATTGGATTTTTTGGGTATTTTTCGTCCGAAATTGATTTGTCCA +GTGAAATTAAACGCGATTTATCGATTTTTTATCGGAAATCAGTAGAAAAT +TCAATTTTGCGCGTCAAATTTGATGTTTTTTGTGTTGGTGAACCTCAAAA +AACCGTATTCGCCAAATCTTACGAGCAAAATTGTATTTTTTGCGTCTTTT +TCGTGGAAATTTTTACAAAAATCGTTCTTTTAAAAAAAAAATTCCACACA +AAAGTACAATTTTCACACAAAAACTGCAATTTGCGCGTAAAATATGGCGC +AATCGGTTGAGTGCGGCACGCCGACTCAAAAACACCATATTTTACGCGCA +AATATTAATTGAAGCGCCTTCACGTGGTGTCAGGTTGTGTCATCACGGTT +TGATCTACAAAAAAATGGGGGAATTTTTTGCCCAATAATATGTGACGTCA +GCATGTTCTTAGCCATGCAGAATCCGTCTAAAATCCCGCATTTTTTGTAG +ATCAACGTAGATCAAGCCGAAATGAGACACTCTGGGCTTCAAAACACTCA +CCAATAAAACCGCATTGTGTGATCGGAACGTTACACGTGTTCCGATCGAA +TACCTCCATCGTCACAGGCAGCCCTTTCTCTTTTTCTTCTCGAGTCTGGA +AAATATTGATTTTTATTTTATTCTTGGAATTTTAAATTTCAAACAGTAAA +AAACCAAAAATTATCAGTTTTAACTTATCCAAGTTAAAGAACTACAGTGA +AGAGAACTACAGAAATATTAAAAAATATATCGATGCACCAAGAGCCCAAG +AAATCTCTGAGAGCCCATATCTCGGCTCATTTCGAAGTTACAAATAAACT +GCAAACTATAAAAATATAGAAAATTAAATTTCAATTATTTTGTTAGTTAA +CAAAATTTGGCTAGAATTGAAATGAACTGGGATATGGGCTGGCAAAGTTC +ACTGAATTTTTTCTTTTTTTTTCTTTTGTTTATTTTGATCTCTACACCAG +GGGTGAGCAGCAAACGATTTTTTCCGGCAAATCGGCAAATCGTTAGCTTG +CCGGAATAGAAAACTTCCGGCAAATCGGCAAACCGGCAAATTACCGATGT +GCCGAGTTTGCCGGCAACCGTATGTTCCCTATGAGTATGGCCACCCATCA +ATCGACTTTGTCGGCTTATATTTCAGCGGTTTCAGAAGTTTTTTGATATT +TTCAAAAACCAGAGGATAACGATAGATTTGAAAAATATTTTGTCGTTTGA +ATTTTTAAAATATGATAAAAGACAACCGAGATATAAACGGTCAAAGTCAA +GGGGGGCCATACAAAAATTTTTTTTTGGAAATTTAGAAAAAAATGTGTTT +CCGTCTTATTAAAAAAATCTCTCTAAGCATTTCCGGCAAACCGGCAATTG +CCGCCCTCCCCCGACCTACGCACCTGTTCAAAGTACTCCTCAACAATCCG +ATGCGCCCATCGCTGACACAATCCCCACTCTCTCGCCGGATTGGAGATGT +CGGCGCACTTGACAAGCATATCACATATCGTAAGTGAATTCGTGTCACGT +TGCTCCTCCGGCACATCTGTCACATGCATCTTGTTGAACTTGGCGAGGTA +CTCAAAGTGTCGACTGATGTCCGTAGCCAACACCATCTCCACCATCGCAT +GTCGCATTTGGATGAACTCCTCGCGCGACAGACTCGAGAAGATGTTCACG +TTGGCGTTGTGCTGAAGGGTTAGCTGAAAGGCTAGTGCGATGTGATGATT +CTCCAGGATCGAGTTATCATTGTAAAGTATTGCAAGCGACTGTCGCGTAT +TGATCAGGTACGCGTTTCCTCTTCCAGGATGGTCTAGATCATGCACCGCC +GCTGCGAGAAGAGCCGCCACCGCATGACTTTCATTCACGTGCACGGCGAC +CGACGGGGAGTCCAGGAAGAATGAGGTGGCCTGTAGGACGTCGGCTGCGT +GAGTTGCGTTGTGATACGTGTTTCCCGCATGGTAATGAGCCTCGATGGAT +AGAATCCAACGGTGGAGGAGATCGTCGGAGCAGCCGAGCACATCGCAAAC +CTTCCACCTCTCGAAGACTTTCATTCCGACTTGGGAAAGAGCGTGGTGAT +CCGACACCTTTTCCAGATGAAGGATGTCGAATTTCCAGCAATTGTCGTTT +TCCAGCGCGTTCTTCACGTCAGCAGATACCCGCCGACGTTCTCCATGGGA +TCCTCGCTTCTCTCGATGCGCGTCGACGACGCTTCGCTTTCGCTGCCTAG +CTGGATGATGGAGGCGAATGAGACCATCGTAATATTGAGTTGCTATCCGA +TCGGCGTCGCGGAATCGATTGATCGACGGCGCGTACAGCTCGTGCGACGA +CAGCACCTTCATCGCGTCCTTTATCGTTTGTGCCGGTTCTCCGTCGACCC +GTGCCGACACGTCTCGGAGCATTGTGAGCACCTCGGAGATGGGCGCCTCG +ATGCCGCCCGTCTGCGATTTCAGGCTTTTCAGGGAGACGTCGCGGAAGAT +TGCGGCGGTGTCACCGGTGGTGTTTGAGCGTTTCATATACACGAATTGAC +TACTGCAAAATTCATAGAATTTTTTGATTAATTGGTGGCCGAGTTTTTTA +AAATGGTGGCCGAGATCTTTAAGTTGATGGCAGAGTTTTATAGGTTGATG +GCCTAGTTTTTTAAAATGGTGACCGAGTTTCTATGCAATTTTGTGGCCGT +GTTTTTGTATAAGTTGGTGGCCGAATTTTATAAATTGACGGCCGAGTTTT +ATAGGTTGGTGGTCGAGTTTTATGAATTGGTGGACGAGTTTCTATACAAT +TTTGTGGCCGAGTTTTTGTATAAGATGGTGGCCGAGTTTTTTCAAAATGA +TGGCCGCGATCTTTAAGTTGATGGTCGAGTTTTATAAGTTGGTGGCCGAG +TTTTATAGGTTGATGGCCGAGTTTTTATGTAATTTGGTGGCCGAGTTTTA +CAAGTTGATGGTCGAGTTTTATGAATTGGTGGCCGAGTTGCTATACAATT +TTGTGGCCGTGTTTTTATATAATTTGGTGGACGAGTTTTATAAATTGGTG +GCCGAGTTCTAACGGATTAAGCAAAGGAAACTAGAACAGAATTTTATTGA +AGAGAAAACTAGGCCATCAGCTTTTCAAGAAAAACTCGGCCACCAAATTT +TCCAGCAAAAAACTAGGTCGCCAGTTTTTCCAGCAAAAAAACTCGGCCAC +CAAATTTTCCAGCAAAAAAAACTGGGCCACCAAATTTTCCAGCAAAAAAA +AAATCGGCCACCAATTTTTCCACGGAAAACTAGGTCATTTCATTGTTCGT +TTGAGAGAAAGCTAGGTCACGTACATTTTCAATAAGAAAACTCGGCCACC +AATTTCTTGAGCATAAAAAATTCGGTCATCAATTTTTATAGAGAAAGTTC +GGCCATCAAGGTTGAAAAATAAGCTAGGCCACCGATTTTTTTTGTTGAAA +AAAACTCGAACACCTAACCTATTTTCCTTTTTTTTTCAAGCCTAGAAAGC +TCGGTTATCGAGTTTTTAGGCCACCGATGCATTTTTTTAATCGAAAATCG +AAAAAAAAAATCGGTGGCCGACTTTTTCATTTACGGCCACCAATTAAAAA +AAACTCACTTGTTAGCAAACGGGACCCGTATAAATTTCCAGTCGCAACTC +CGCCGCCGCTCCTCTTCGCCACGTGCCCTCGGCAGGGACTTTCGTCGCAT +TTCCGATTCGGGTTGTCCGATCACCTCGGATCGTATGCATCCCGTCACGT +TCTCGTAGGCGCGGTTAACGTACTGCACTACTCGTTGCTCATCACATATT +TCCACTGCTTGATCCGTCTGAAAATACGCAAAATTTGAAGGTTTGTAGGC +ACACGTGGCGCCAGTCTATCCCATTACGGTTTGATCTACAAAAAATGCGG +GAATTTTTCACCCAAAAATATATGACGTCAGCTAGTTCTTAACGATTCTC +AACTCCCGCATTTTTTGTAGATCTACGTAGATCAAGCCGAAATAAGAGAC +ACTCTGACACCGCGTGTAGGCAGGTAGGCGTTCAGCCAGGCGATTAGGCA +GGCAATTAGGCAGGCGATTAGGCAGACTAGTAGGAGGTAGGCAAGTAGGC +ATGGAGGCCAAAAGCTCACCTCATCCAAGACGGCGAAAAGGGCGGGCAGC +GCGCGGATCCGATTCGCGAGCCGCCCGACGTAGTCGAAGAGCACCACGTC +ACGCGTATTCCACGTCACATGCTGAAAAGAAACGAATGAATGGGAATCAG +TGTGAATGAAATATTAATTAGAATTGAAAATGAGAAAGTGAGAAAACGGA +TGACTTACATGAATGATGTTGGACTGCGCAAGAGCTCTCCGTCGCTTTTC +GCTGATGTGTCTGGAATATGTGATTGTTAGATTGACAATTCTGGCAGAGT +GGTCTAGAAAATTGGGGAAAACTCGGCCACCGATATGTTTTCTTTGGTTG +TGGTTGGAAAATTTTGGCGACCGAGTTTTTCAACGTATCCAAATTTATGT +GGCCGTGGCCGACTAAGGCCACATAGAAAAGAAAAATACCGAAACAATTG +GTGGCCGAGTTTTATCAACGTAGCCTAAATTAGGTGTACTTGGCCGAACT +TTTCTTCTGACGGCCACGCAGCAAGGAAAAAATAATGAAAAAAAAGTTGG +TGGCCGAATTTTAAGCCTACACAAAAAGTTGGTGGCCAAATTTTAAGCCA +ACAAAAATTGGGGGCCGAGTTTTTGTTTTTTGAAAAAATTCGTGGCCGAG +AAAATGGGAAAACGCGGCCACCGATATTTTTTCTTTGGTTTCTGGCTGAA +TAATTTTGGTGGCCGAGTTTTTGTTTTGGATAGGTTGGTGGCCGATTTTT +ATCATTTTAACCTAATTTACGTGTTCTTGGCCGAACTTTTTTCTGACGGC +CACATAGCAAGGAAAAAATAACGAAAAAAAAGTTGGTGGCCGAATTTTAA +ATTCACAAGAAAAGTTGGTGGCCGAATTTTAAGCCTACAAAAAGTGGCGG +CGCTTTTTCTACAAAAAATTGCTGTCATTATTTTAATTGGAAAAACTTGT +GGCAGTGATTTTGTTTTGAAAAAAATCGTGGCCGAGTTTATCTGTAAAAT +TGGTGGCCGAGTTTTATTCACGTAACCTAATTTACGTGGCCGTGGCCGAC +TAAGGCCACATGGCAAAGAAAAATACCGAAACAATTGGTGGCCGAATTTT +TAGTCTAAAAAAATGGCGGCCGAGTTTTTGTTTAAAAAACCGTATTTAGT +ATATCTGGGACGAAAAATGGTGGCCGAGGTTTTTCAATGTATCCAAACTT +ATGTGGCCTTGGCCTATAGCCTAGCTTTCCTAAACCATTTGTTTAAAAAG +TTGGTGGCCTAGTTTTCCAGCATAATCCAACTCACTTCTCGGCGATCGAC +ACAATCAACACCTCCTCTGTGCTCACCGATGAGACGTCTCTGAAAAAACG +TTTCGTTAGTAGATCATGGCTGACGATCAACGCTGCTCCCTTACTTGGCG +ATTTGCGATGGATGAGGTACTCGAAGATCTATGAATACAGCTAAAGGGCA +TATTGATTCAACGTCGGATGTTGCCTGCAAGAAAGTGTACATATTCAAAA +ATTTCCAAAATTCAATTTCCTTTTTCCCGCCCGTGTCCGAATTTCCGGAA +CTGACCACGTTTCAGTCATGGTCTTCGTAATTCCCAATTTCCTTTCTCCC +TTATCTTTTTACGTCAATCATGATAGCGGTGATACGTGTTTCGAGGGTAT +ACATGCTCACACAACAATGCACTTAATCAGCTAATCTCTCCCACACCTCC +ACAGAGAGAGTGTATGAGCAAGAGAAAGAGCACCTGCTTCTTCCAGATTA +GTGAGCGAGCCTGTTGTCATCTGACTTCTGCTGACGGAGGGAGCTTGAAA +AGCGGAGCAGGAAGAGGAGAAGAAAAAGAGCATTCAGAGATAATTACGCG +CGAGTCGTAGTAGTATGTATACACTGACTTCTTCTTCTTCTTCTTCCTCT +TTTTCCCCGTCTTCTGCTCGTTGATGTCTGCCATTATATTGATCATTAAC +GAGGGCCTAAGAATCCTGGAAGAGTTGTGAGGTCACACTGATGGCCTAGG +AATCTCAAAGGCAGGCCATATCATATTGCAGGCCATATCAGGCCCCATAG +CCTGGCCATAGTAGGTCACCACAAGCTATGGACTGCCCTCAAGATTTTGG +CTCAATTTGGATAAGCTTCTTCGGAGCCCCTGGGCTTAACAGACTGACGT +CATAGGTGGTATATCTCGGCTAGGCGGAAACTCGGAAATGTGAATTTTTT +GACGCAAACCAAAGATCTTATAGATCCTTGAAATCTTCCACAGGACTTAG +CAGACCGGAGGCCTGGGAACTATGCAGTTAGAGGTAATCGCCAAGTCCAG +GGCTCCGAAGAGAGAGGGCCCAGGGCTCCGAAAAAAGCTTAAATTTTGGG +CCAAAATTTCCCAAATTGGGCCAAAATTTCAAAAGTATGTAGTAGGTTTC +AACCCTCATATTTCCCGGGATCATGATTTTGTGGACTATTATTTTCAGGC +TGTGGTGGCCTGCTATACCCAGACTATGTACTGGGCCAAGCTCTGGCCAG +GTTATGGCCTTTAATAGCCTTGAATTTTGGAGTCAAAGTTCTTGCAGAGC +CTAGGAGACGCTTACCACTTCGGATCAAAGGCCTGATCTAAAACTCAATG +GATCATGACCCTAGAACCTCAATTTTCATAAGCCTAATACGCAGCTACTG +GTCTAAGCTCTGACCAGGCTATATGGCCTGATATGGCCTGAACACTTCGG +TGGCCTAGTCTAGGTTTTTCCAAGCCCCAGCGGGCCTCAATTTTCATAAG +CCTAAAGACGCAGCTACTGGGCCAAGCTCTGGCCTTGAATTTTGGAGTCA +AAGTTATTGCAGAGCCATCATGCCTAGGAGGCCTAATCTAGATTTTTTCA +AGCATTGAGGCCCCCACGGGCCTCAATTTTCATAAGCCTAAAAACTCAGC +TGCAGCCTTACCTGTGTTGGAAATGATACTGTACAAATCCATCCGGATGC +CCGTAACGAGTCGAGGAGCACACTCTCGCCGGTGCCTTCCGTCACGATCA +CAGCCTGCAAAAAACGCATTATTTATAAGATCTTACAGGGGATTTTCAGG +ATTTTCAGGAGGATTTTGTCGTGTGCCAGACCTCAAAGCTCGTCGGGATA +AAATGCTGATATTGATTGACATGCATATATATAGATATATATGAAGTTGG +AAGAGCGGAAGCAGCAGCAGTGGCAGCAGAACTTGTGTGTGACGGGGATT +TGTGGGAACAAGGCGCCAAAGAAGACGAAGCAGAAGAAGAAGAAGAAGCC +GGGGGCAAGCAATGAATGCAATGTATGTAGGAGTATGTAGGAGTATGTGT +GTGTGTGTGTGTGTGTGCGAAAGTCTCTCTATGTGTGTGTGTGTGTGTCA +CAGGTTATATATTGCGTGTGCACAAAAAATGTATGAGGTCTCCTGATGTG +TGTGTGAGTCTGTGAGTCTGTGAGTGCAGTGTACAACACCTTTTGCGGAG +TCTTATCACATACTGATACGAAAGTGTTTGTATTTTTATATTTCTTTGCC +ACGTCACACTTTATTCAATTTTTCGCGTGTGCCTCTCGCCCCGTCATTGG +CACGCAGTAGAAAAAGAAGTAGAAGAAGATGCGTCTTCTTGTTGCCATAT +CCCATCCAACTTTGTTACTTACGGAATTCGCGCGCGCGCGCGGGGAGGAG +AAGCAGAAGACGACGACGACGAGCTCGAAAGAAGACGAGTCCGCCTGCGT +CTCTGCAGTGATCTCGTGTCCCTCGGCAGCGGCTGCGGCGCGCGCTTCAA +TGAAGACGAGAGCTCCCCCCCCCGGCGGCGCCGAATACGCGATTTGTCAT +CGAGATGAGCGCTTTGCTGACAGAATAGAATGGGCAAAAACAGGAATACG +GAATACAGTAGTCATGTTTTGTTGCAATGTTATGGGAAAGCTGGCTTGGC +GTGGTCGTCGAATTTTTCGCGCTTCATTGACAAAGAACAAGGGAAAGTCG +TGTTCTCCACACGGACAATCACACTAAGTTTTAGGACTAAAATCGAGCCG +AGCCAAAATGGCCTAGTTCGGCAGAAACTCTTCCATTTCAATTTGTGAGG +AAAGCCAGAGTTCTGTGAACCTCGGACCTGCGATCGCCGACCTGCGATCT +CCTAGACAAATGCTCACCCCAGCGCGGGACTCGGGAAAGTTAGCGCCAGG +AGAATAATTTTTGAATTTGAAGAAAAAAAAAGAGGAACTTACAGTTTTCA +GCATCTTCACTTGCATCGGCCCAAACGTGTCATGTGCCGTGGAGCTGCTC +GTTGCGGCGACAGTTAGCAGAGGTGTTGGTGGAGCACCACGCGATCCACC +TCCACCACCACCATTCTCAGAATACGACCCACCGCAGCACCATCTCATTG +CAAACACTGGCCTGGATGCTGCTGCTGCTGTTGCTGCTGGTGCTCTCTGT +GCGCCTCTTCTAGCTGCGGGCGTGTGCTGCTGCCTCCTGCCAGCTTGACT +GTGTGCGCTAGCCCCGAGCACACTCTTTTCACCCTCTTCTCCCCCCACCC +CGCTTCTATCGCCCATTCTCTCAATTCTTCTCTGCCTCCTCCTCCTCCTC +CTCCTCCTCCCAATTCTCATGTCATCGGGGGGCCCGTCGTGTCGTCGAGC +GCCCTCTCGCGTCTGCAATGTCTCGGCGCCGCCGCCGTCGTCGTCGTCGT +CGTCGTCGTCTTCGGGGGGTTGTCATCTGCTGTGTTCACACTGATGCCGC +CGCCTTCACCGCCCATCTCATCGTCTCCTCCTCTGTGTGGACCCGCCGAC +CGGCCGGCGCGCCTTCTCCCATCAGTTCTATCTCTCTCTCTCACTCACTT +ACTCTCAAACCGCGTCAATTGATCTCTCTGGCACGCTCTTCGCTCGGCGA +GCTGTATTCGGTGTCGGTTTTCAGTCTGCGTCTCCGCGCCCCGGTGCAAT +GCATATGTTCCATCACGTTCTGCTCCTCTTGTTGCTCCGTGTGCTGCCGT +GTTTTACGCAATTTCAGGTAAAATTGATACAATTGATACAATTGATAATT +ATCGAAGTGTCCTCTATGTGTACTCACGCTGTTAATTTAATGGCCTAACC +ACAGAATTATGCTTCTTTAATTTTTGAACTCGTAGCTCCGTATGTTAAAT +CCACTATGAGATTCCTTGCCATGTTTTTGCAATATTTTTTTGTTTGTTAT +GAGAAAGTGTATGTGTTCTTTGTTTGGTCACATGGGAACCGCTGGCCTGA +GAATGGAAGGTTTTCTGGGACAGAGCTTTTGAATCCGATAGCCTACAATA +GGGCCCCAGTGCAATAACGGTAGGCAGGCCCGAGGCCTAATTTAGTCCAA +ATAGTCCTTATTCAGTCTAGATGATTCAAAGCTAAAATCAAAGGGCTGTT +CCATTACGTTGTGATCTACAAAAAATGCGGGAATTTTTCGCCCTAAAAAT +GTGACGTCATCACGTTCCATGCGTTATTATTTATGAAGTCTGCGTCTCAT +TCTCCCGCATTTTTTGAAGATCTACGTAGATCAACCAAGCGCCTTACCCC +TAGCCTCTAAGCTCCTAGGGCCCTCTAGGACCATTTAGGCTTTTTGGACT +GTCTAAGACCAATTGGGCTTTTAGGCTCCTAGGTCCCTCAAGGACCGTTA +GGCCTCTTGGACTATCTGAAACCATCTGGGCGTCTAGGCTCCTAGGGCCC +TATAGGACTATTTAGGCCTCTTGGACTATCTAAGACCATCTAGACTTCTA +GGCCTCTAGGGCTCTCTAGGACCATTTTGACTTGTTCTATAGGACCTTGT +TCGACTAGCTAAGACCATCTGGGCTTCTAGGCTTCTAGGGCTCTCTAGGA +CCATTTAGGCCTCTTGGACTATCTAAGACTAATTGGGCTTCGAAGCTCCT +AGAGCCCTCTAGGACCATTTAGGCCTCTTGGACGATCTAAGATCATCTGG +GCTTCTAGGCTTCTAGGCCCCTCTAGGACCTTCTAAGACCCCAGTCTTTA +TTCCCTAGCCTCTAGGCTCATAGGCCTCGATTCCGCAATTCTAATGGCTT +TGGGAGCCCAGTTCTAAGGCCCATGGACTTCCACAGTAGCTTTTCAGTCA +ACCATTTCCAAGTTAACCAAGGACCATTTAAGCCTTTTGGGCTATCTAAG +ACCATCTGGGCGTTTACTTTCTAGGCCTTTTCCTGCCTATTGGGCCCTCT +TGGACCTTCTAAGACCCTGGCCTTTAATCCCTAGCCTCTAATCCTCTAGG +CCTCGATTCCGCAAATACAATCCCAGTTCTAGACTCTCAGGGCCCTTGGG +CTTGTAGGCTTTCAGTTAACCATTTCAAACAGGTGCCGTTCCGCTCCATC +TCTTCTTGCTCTCTCGTGCTCCTCTCCTACACCATCTTCTCTCATTGTTC +TGACGGATGGATGAGCTGTCATCATCATCATCAAAGCTCCAAAGACAATT +GCCATCAATTTGTGCCGCCGCTGTGCTAAGTGGTCTTATTCCGCCGGGAC +AAAAAAAAAGGGAAAATTCTCTGGGAATTTGGACTAACACCACCTACTAT +TACTGTAAAAAAACATTTGGAGAAGGGGGAAACGAACTAAAAAAAAACAA +ATTAAAATCGGCGTATCAAATTGAAATCATATGATTCGTTGCCGTCGTCT +TCTCTGCAATGATCGTTTGTTCCGTCGTGTCAAACTAACCTACTCAATGT +CATAAAGAACAAAGAATGAATTCAATAAAACAAGACGAAAAAATAAATAC +ATTCAGGTACTGTACAGAGAGAATCAGGATCACATCTGGAATAGCTCGCA +GCCTGTTCGATTCTGGGAGCTTGTTGAGTATAGGCCGACCACTACGCCGC +CGCCGACCACTACGACAGAGTATATACCGCCACCGACGACTACGTATGTT +CCCTGAACTTTAAAGTTGCATTTTTTTTTAAAGGATTAGGGGTTTTAATT +GAAATTTCAGAATTTTTGGTAAATTTTTTAAAATTAGTTTTATTTTTTTA +CGGGCTTTTGCGATTATTACTGGTAGAATGAAAGTATGTACATTTCTTGT +AGATCAGAAAATTTAATTTTTGATATCCGATAAATGTTAAAAACTCGATC +GAAAAAAAAATAAATTTTTCCAAAAACTAAAAAGTTGCAAAATTTATTTT +TTTTACAATTCAAAGTTTGTAATTTTTGACAAAATACATTCAGAAATTTT +TTTGGAATAGGGGTTTTAATTAGGCTTGGGCCTAGGTTTGGGCTTGGGCT +TGGGCTCAGGCTTAGGCTTATGGTTAGGCTTAGGCTTAGGATTGGGCTTA +GGCTTAGGCTTGTGCTGAAGCTTGGGCTTGGGCTTAGGCTTCAGCTTAGG +CTCAGGCTTAGGCTTGTGCTGAGGCTTGGGCTTAGGCTTAGGCTCTCAGG +CTTAGGCTTGGGCTTGGGCTTGGGTATGGACTTAAAATTTTTTTTCAAAA +ATTTTATTTTTCGAAAACTTATGGTTTTACCGGTATAAAGTGCCTTTTTT +TGTACATAAAAAATAATTTTTAAATTTTTTAACGTTCCTAAAAACTAAAC +AAACTGTAAAGAAAATTTTGTTTAAAAAAAAAATCAATAAAACATATTTT +CAGTAATTTCCAAAATTTGATTTCCCGGTAAAAATCAACAATTTTTTATC +GGGGCCACAAAAAAAATTCGCTGAAAAAAATTCAATTGGATGTATTTTCT +GCAAATTCGAGTCAATTTTCTGAAAATAGTTGTTTATTTTCCCTGATTTT +TTGGCGTTTTTTCCTTTTTTAAAGGAATTTTAGGACTTTCTGGGATTTAG +AAGGGTTTAGCCTAGGGAGTGATTAAGCCTAGGCTTAAGCCTGAACCTGA +GCCTAAGCCTAAACCTAAGCCTAAGCCTAAGCCTAAGCCTAACATCAAAA +AAACCGGAAAATCCTAAGCCTAAATAGCGTCAGTAGCAAGCTGACGCTCG +CCACTGACACCAAGCCTAAGCCTCAGCCTTTAACGAAAAACAATTTTGGC +ACAAAAATTGCAAAATTTCCGCTTCTCCGCTTCTCTTAAGACATTGAATT +TCCAAATTTAAAAATTTTTAGAAAAAAAAAAACCTCGGTCCAAAATGATC +TGAAAATTCTTATTTAGGCTTTAAAAAATTTCACTTTTTTCTGCGACACT +TTCCATAACACCACAAATTCGGCGAATCGACCTGTTGATGATCTTGTCTC +GACCCGCTCTCTCTCTAAGTGATTATAACAGAGATACACCCACACACCCA +CACACGTCATCTATTATGTTCTAGAACTTTATACTTTATGATCAAAACCG +AATGCTACAAACTAAAAAAACATCACCACTTTTTCCAAATTTTCGTCACA +TTTTTGCAGTCCGACCACAACTACAACCACCAAAGATCCATGTGAATCAT +CGCATCCGTTGGATCTGACGCGTCGGCTACGGGACACTGCCGGCTACAAT +CGTATCTATATGGCCAGCGACTTTTCGTCGGCTCGCAATGAGTTTCCTGA +TCTCAGGGGTTTGGCTCACAAAGCTTCAACGGACTATCATTGCGGGAAGA +GGTGCCAACGGTGAGCTCTCAATGCCTACCTGCCTACTTGTTTGCAGTAT +CAAACATATAATGCCTACAAGCCTACGCGCCTACCTTGGTCTACGAGCCT +GAACTTGCACATTTTCTGTGCCTAACATGCACGCTATACTTTTGTCTGCC +GTTACGCGCCCCCCCCCCCCGCCTATCTGCCTACAAGTCTCCCTTTTGTC +TTTATGCCTACGCGCCTATCACATGCCTACCTGCCTATTTGCTTGCGGTC +GAGTCGATACATATCATGCCTACGAGCCTACGCGCCTACCTTGGGCTACT +AGCCTGAATTCGCATTTTTCCAGTGCCTAGCTTGCGCGCTATACCTATGC +CTGCATTTACGCGCCTTCTCCCCGCCTAGCTGCCTACAATCGCACTTTCT +GCCTACCTGCATGCTTTCCTACCTATTCGCATACTTAGGCGTACCTACCA +CAAGCTTAGCTACCTACAAAACTGCCTTCTGCCAACCTGCATGACTGCGT +ACCTTTTCGCCTGCTTAGGTGCCTACCACAAGCCTCCCTTTTGTCGATAT +TCCTACGCGCCTATGGCATACCTACATGCCTACTTGTTTGAGTCGATACA +TATCATGCCTACGCGCCTACCTTGGCCTACCTTGGCCTACAATCCTGAAA +TTGCACTTTTCCTGTGCCTAGCTTGTACACTATACTTATGCCTGCCTTTA +CGCGCCTTCTCCGCGCTTATCTGCCTACAAGCATACCTTCTGCCTCCTTG +CATGCCTACATACCTATTAGCCTGCTTACGCGAACCTACCACAAGCCTAG +CTGCCTACAAACCTCCCTTCTGCCTATATGCCTACGCACCTAACTCCTGC +CTGCCTATACGTCTACCTCATGCTGAAAACAGACCTGCCTTGTACCTACC +TATGCGCCTACCTAAGATTTGCCTCCTACGTGCCTGGCTGCCTATTTTGC +CTATAGGCATTACTGTCAGTGCGTCTACCCAGAGCCTACCTTTAGGTAGA +CGTTTGCCTACCTAAAGCAACCGCCTACAAGTCTATTCGCCTGCCTCCTT +GCCTACCTACACACCTCCCTTATCCCGGCTTTTCCACCTGCCTATGCGCC +TATCTCATGCCTACCCAAGACCTGCAAACGTGCCTAGCCGTATGCCTACC +TAACTTCGCGCCTACATATGCCTCCCTTTCAGATCGAGTCAACTAATTCG +AAAAATCATAAAAACCGGCCAACCGGCAGAAGCCGAACGTGCTTGGTGGG +ACGAGGAAGAGTGGGAGGAGGGCGAGAGACGCAGGAAACGAGCCATCGAA +TGGAAGGATACAGATGATCGTCTGCTAATGCGAAATACCGGAACATCACG +TCATATTTTGGGTAGGTAGGTGGGTGAACACAGCAAGTGCGCTCCATCGC +GCGGGACACTTTTCCCGCGCTTTTTAGGAGGTCAAACACGGAATTTTTCC +AGGAATGACTGTGGCGGTGGAATGCGATGATGAGACTGGAGAATTTAATA +GTGGTGGATACTCGTTGTGTACTACGTGCCGTGCTGTTCGACACTTGCCA +TCCACGTATTTTCCACGTGTCATTAATGAGCTGATTTGCTCGCAGAAAGC +GTGTTTGCGAGGAGAAGGAAAATGCATACAACGGGTTATGCCGATGAAGG +TATGTAATTTTTTTGTTTTTGCAATGGAGCGCGTTTGTACTCACTCCACG +AGAATCACGTGGTGTCAGAGTGTCTCATTTCGGATTGATCTACAAAAAAT +GCGGGAGAAGAGACACAGAGTTCTCAACTGATTTTGGATGATTAAGAACG +TGCTGACGACACATTTTTATGGGCAAAAAATTCCCGCATTTTTTGTAGAT +CAAATCACTTGGAGAATATTATTTTTATTTGAATATGATCCTTTAAAGAA +AAAATAAAATTTGACTGAATTTCCCGCCAAAAAATTTTCGCAGAAAATTT +AAATTTCCCGGCAAAATATTCACGGATATCTGGCTTCCCTCATAAATTGA +AATGGAAGAGTTTTTGCCGAACTAGGCCAGGCCATATCTGGGGTAGATTT +ACGGCGCGTTGCGTGTCGCGTCGCGGCTCGATTTTAGTTGTAAAACTAAA +TATATTTGTCCGTGTGGAGTACACGACTTTCCCACGCGTTGGTGATTGTC +AATAGAGTGCGAAAAATTCAATGAGGAAGGCCAGAACCCCGTGAATATTT +TCTCAAACATTTTTTTTGGAATTAATGTGCAATCGCGCTCTAACGCAGTA +TCCAAAACTTGGGTCTTTTGGCGGGAAACCCGAAAATTCAAAGTTTTTTG +CTTTAAAATATCGTGAAACAGACGAAACTAGTCGCAAAATTTCAAATTTA +CGGTCTTGCCGCGCCAGTGCGGTGGATCACGATTGCACTTAAGAAAAATG +GCGAAAATCAAAAATTTGGTTTATTTTTGGCAACTATTTTTCATTCTCTA +TCAAATTCATACTGTACACGTGGAAAATCATTGATTTTCCCGAAAATTTG +AATAAATGGGATGCAAGATGCGAGCGCGCCCTATTGTGATTCGTTAAAAT +TCGTTAGAAGCGTCACTGAAATCGAAATTTTTTCTCGAAAAATTGCAGTG +TGCCACGTTGTAAAATATCGATTTTTTGGAAAATTTAATATTTAGAAGTG +CGGATGCATCCTATTGTGCTCCAATAAAATTACACGGGCTTCTGGCATTC +CGCATTGAATTTTTCGCGCTCCATTGACAATCGCCCGTGTACTCCACACG +GACAAGTGCATTTAGTTTTAAAACTAAAATCGAGCCGCGACGCGACACGC +AACGCGCCGTAAATCTACCCCAGATATGGCCTGGCCTAGTTCGGCAAAAA +CTCTTCCATTTCAAAATATGAGGAAGGCCAGAACCTCGTGGAAAATATCG +ATTTTTCCATTTCCACGTGGCCAAAGGAATTGCAAGCGCGCTCCAGTGAA +CTCGCCCAATTTCCATTTTTTTTCAGGTTCTCCGAAACGTGGGCACCCGC +GAGTGTGCTCGCTGGCAAATGAGCCAAATCGACGTGCGAACGTGTTGTGA +TTGTATGCTCAATCCGTCATCACCACTGGTCACCTATTTATGAGCGAAAA +AACCGGGGAAAAAAACACACAAAACAACTCAAAACTCAACAAAAAAAAAT +AATTTTACAAGATTCAACGAAACAACAAAAAAAAGAAACAATGGAAATTA +AATACGGATGCTGCTGCCAAATATGCTGCCAAATTCAAATTCTCGCCTTG +TACACACGATCTTTATTGAAAACGGTGATCAACGGGCACCCTCCACTCTG +ACCTTTTATCTGAAAATAATTGTTTTAAAGATTTATTTGATTAAAAACCT +CGGCCACGGCCGCGACGCAGCCGCTCCGCACGCAGCTGCGTCGCGGTCGG +GTGGATCCTAACCTGAACTTCTCCCTCCAGCTCGTTATCGTCGTCGACCA +CCTCGAACAGTTTGCTAGCCTCGAATAGAAATCTGAAAAAAATAAATTGG +TTTTTTTTGAAAATATATAATGTTTTATGTTTGGGCTTCTGGCCTTCCTC +ATTGCATTTTTCGCGCGCTCCATTGACAATCGCCCCCCTGTGTACTCCAC +ACGGACAAATCACATTTAGTTTCACAACGAACGACACGCAACGCGCCTGG +CCTAGTTCGGCAGAAAAACTCTTCCATTTCAATTTATGAGGGAAGCCAGA +AATCCGCGTGTGTTTTATTGATTTTATTGATTTTTTTACTTTCGAAAATC +GCGTTCCGGCTGGTAGAAGCACAAGGTTTTGCGCGTGTCGGCGATTCGAT +CCCTCGACCATTCTCTGCAAAAAAAATATTTTTTTTTGTGTAAAATTCCC +AATCTGCGAGAACGGGATCTACGAAAAATTTTTTTTTTAATATTTTAATG +ATTTTTTCAGAGATCACATTCTTGCAGATCATCTTTTCGTGGGGCCTCCT +AATTTTTCAGTGCTGCACATTTATTTAACATTCAAAAGTGCTTTTTTCAT +AAAAATTTGAATTTTTTGCCCAAAAATCGAGAAAAACGCCGCTCCGCCTC +TTTTTCCGTGCGGCACTGTGCCTTTTGCAAGCTGGGCACAGTTTTGTAGG +CGTGAGCATGGTCTTACTTGAGCGCTTGAGTGATCTACGAAAAAAAAAAA +GCTTTTTTTAAAATTTATGTCGGAGATCACATTCTTGCAGATCATCTTTT +CGTGGGGCCTGCTTATTTTTCAGTGCCGCTCGGAAAAAGAGGCGGAGCGT +CGTGAGGCACGGTTTTTTGTCACGATTTTACAGTTTTTTGACTAAAATTC +TTGATTTTGGATCATTTTCGTTGATTTTTTTCGGAAAAAGCTGATTTTGG +CAAAAAAAAAATTTTCGCTTTTTTTTGGACAAAGAAGAGCCAAAAAAAAA +AATGAAAAAATCATAAATTTTTTAGTTTTTACATTTTTTTTTTTGAATTG +AAAAATGTTTTTTTGTGTTTTTGGTGAAAAAAACTACTTTTTTTTGCGAA +AAAAAAATAGCAAAAATGACCAAAAATCGGAAAAAAAAAACGCCGCTCCG +CCTCTTTTTCCGTGCGGCACTTCGCCATTTGCAAGCTGGGCACGGTTTTG +TAGGCGTTGTCATGTTGTCATTACTTGAGCGCTTTGAGGCAAAGACTTAT +GGGAGAACGGTGAAATCCAAAATTGGTGGCGTTCGGGAAGAGACGGGGGT +GCTTCTGAAAAAAATTGTTGAAAAATGCAAAAAATCTGAAATTTTAGAGC +ATTTTTCTGAAAAAAATGCAAAAAATCCGAAATTTTTGAGCATTTTTCTG +AAAAAAACGCTAAAAATCTGAAATTTTAGAGCATTTTTCTGAAAAAAACG +CTAAAAATCTTAAATTTCTGAGCATTTTTCCCGGAAAAAAAAAGATTTTT +ATGCACCTCTGCGAAAACTTCATCCATCATGGTAATTGAAAAATCACAAC +TGTCCAAAAGCACCGAATTCACAGTGTCGAGCTCGAAAACGCCGACATCT +TCGACCTAAAATGCTCAATTTTAATGGGGAAAAGCTCTAAAATCATGGGC +TTCTCGCATTCCTCGTTGAATTGTTCGCGCTCCATTGACAATCGCCCCCG +TGTACTCCACACGGACAAGTGCATTTAGTTTTAAAACTAAAATCGAGCCG +CGACGCGACACGCAACGCGCCGTAAATCTACGAAAAAAAAAAGGAATTTT +TGCTGAAAAATCGATAAAACACGTTAAATTTAACCAGAAAAATCAATTTT +CGCACAAAAATTCGCAAAAAAAAAAAATCAAATTTTGCTCATAAAATTTG +GCGATTTCAGTTTTCTGAGGTTCACCGACCCAAAAAACGTCAAATTTGAC +GCGCAAAACTTGAATTTTCTATTGATTTTTGATGAAAAATCGATAAAATA +CGGTCAATTTAACCAGAAAAATCAATTTTTAACGAGAAAAGCCAGAAATC +CGTTGAAAAAGCTCTAAAAATCAAAATACAAACATTCGAGAGAGTCAGAT +GACGGAGACAGACGCATTTTTTCAGAATTTTCTGCAAATTTAGGTCCAAA +CTGACGAAATAGAAGACACGGAGCACACTGGAAATGAAAAAAAATTAATT +AAATGCTAGATTTTTAGCATTTTTTCAGAAAAAATGCACTAAAATTTCAG +ATTTTTAGCATTTTTTTCCGGAAAAATACACTGAAATTTCAGATTTTTAT +TTATTTTTTTTTGAAAAAATGCTACAGACCGAATAGTTTGCCAGGCATTT +CTGCCGCGGAGGAAATCGGCGATGAACTCTGCCACGTAGCGGCATTCGAT +GGGCGGTGGCTGAAAATTTGCAATTTTTTGGCCTAAAATTTGGTTTTTTC +TGGAAAATTCTCTTACCAACGGGAAGGTTTTCGTCTTCGGAATGCCTAAA +TTAACAATACAAGCCCTCGGAGCAATTCTCGCAAGAATTGGAGCCAGCTC +CTTGGCATAAGTGAGATCATAGATGAAGAAGACGAGCCGCTTCACCATTA +CGATGCTCTTGCAGCGTTCACGGCGTTCTGCGTATTTTTCCAGGATACTC +TGAAAAAAAATCCGTTAATTTTAGCCAAAAAATCCGTTAAATTTAGCCAA +AAAATCCGTTAATTTTAGCCAAAAAATCCGTTAATTTTAGCCAAAAAATC +CGTTAATTTTAGCCAAAAAATCCGTTAACTTTAGCCAAAAAATCCGTTAA +TTTTAGCCAAAAAATCCGTTAATTTTAGCCAAAAAATCCGTTATTTTAAG +TCAAAAAATCCGTTATTTTAGCCAAAAAATTCGTTAATTTTAGCCAAAAA +TTCGCTAGTTTCAGCCAAAAAAATCCGTTAATTTTAGCCAAAAAAGAACC +GTTATTTTTAGCCAAAAAATCCTTTAATTTTAGCCAAAAATTCGATATTT +TTAGCAAAAAAAATCCGTTAATTTTAGGCAAAGAACCGTTAATTTTAGCC +AAAAAATCCGTTAATTTAAGTCAAAAAATCCGTTAATTTTAGCCAAAAAA +ATCCGTTAATTTAAGTCAAAAAATCCGTTATTTTAGCCAAAAAATTCGTT +AATTTTAGCCAAAAATTCGCTAGTTTCAGCCAAAAAAATCCGTTAATTTT +AGCCAAAAAAGAACCGTTATTTTTAGCCAAAAAATCCGTTAATTTTAGCC +AAAAATTCGATATTTTTAGCAAAAAGAATCCGTTAATTTTAGCCAAAAAT +CCGTTAATTTTAGCCAAAAATCCGTTAATTTTAGCCAAAAATTCGATATT +TTTAGCAAAAAAAATCCGTTAATTTTAGGCAAAGAACCGTTAATTTTAGC +CAAAAAATCCGTTAATTTTAGCCAAAAAAATCCGTTAATTTAAGTCAAAA +AATCCGTTAATTTTAGCCAAAAAAATCCGTTAATTTAAGTCAAAAAATCC +GTTAATTTTAGCCAAAAAAGAACCGTTATTTTTAGCCAAAAAATCCGTTA +ATTTTAGCCAAAAATTCGATATTTTTAGCCAAAAATCCGTTAATTTTAGC +CAAAAATCCGTTAATTTTAGCCAAAAATTCGATATTTTTAGCAAAAAAAA +TCCGTTAATTTTAGGCAAAGAACCGTTAATTTTAGCCAAAAAATCCGTTA +ATTTTAGCCAAAAAAATCCGTTAATTTAAGTCAAAAAATCCGTTAATTTT +AGCCAAAAAAATCCGTTAATTTAAGTCAAAAAATCCGTTAATTTAAGTCA +AAAAATCCGTTAATTTTAGCCAAAAAAGAACCGTTATTTTTAGCCAAAAA +ATCCGTTAATTTTAGCCAAAAATTCGATATTTTTAGCAAAAAGAATCCGT +TAATTTTAGCCAAAAATCCGTTAATTTTAGCCAAAAATCCGTTAATTTTA +GCCAAAAATTCGATATTTTTAGCAAAAAAAATCCGTTAATTTTAGGCAAA +GAACCGTTAATTTTAGCCAAAAATTCGATATTTTTAGCCAAAAAATCCAT +TAATTTTAGCCAAAAATTGCTAATTTTAGCAAAAAAATTCGCTATTTTTA +GCCAAAAAATCCGTTAATTTTAGCCAAAAAATTCGCTATTTTTAGCCAAA +AAACTGTTAATTTTAGCCAAAAATCCGTTGATTTTAGCCAAACAATTAGT +TATTTTTAGCCAAAAAATCCGTAAATTTTAGCCAAAAAATCTGTTATTTT +TAGTCAAAAAATTCGTGAATTTTAACCGGAAAATCCGTTATTTTTACTAC +AAAAATAGCGAATTTTCGCCAATTTTAACCAAAATCTCTCGATTTTTTGT +CAAAAATGCACCAAATGTGCGCTTTTTTTCGTTTTTTTTTGGCATTTTCC +CTACTTCAATAGCGAGTTTGCAGGTCGTCGTGCAAATTTTGACTTCGATT +TCAACAGAATCCAGCAGAATTCGATCATCAGTCATACATTCCAAACAATT +TTCAGCATCCTTTATGGGCACTTCGATGAAATCTGAGAGAGCAAACACTT +TAGAATTGCCAGGGTTTACAGCTCTTTTAATCACCATCCAACTGGCACGC +GACGTCCATTTTTGGCTGCCCATTCACCCGCTCGCCGCCACCCTTGCAAA +TGTTGTGAAATTTGATGACAAACGGTTCAGAGTACTCGAAATTCTCCAGA +TCGCCGTGCTCGACGAGCTCGTTGTTCGATGGTGCTCTTTCCGGCGTTCT +TTTCCATCTGAAACTGCTGTTTTTCCTTTTTTTTTTTGTCGCAAAATCTT +TTTTTTCACCGAATTTTTCCACCAACCTGAACAGCGAATCTTCGAAAAGA +GACGAAGAATTCGATTTTGGGCTTTCCAAATTTGAAAAATTCAAAGAAGT +TGTCACAGAACTAGCCATTTCGAAGTTGAAATGATTAAAAATCAAAAACA +AAAAATTTCGATAAAAATCAATGTCCGAGAGGAGTACACGCTTGGGAACC +ATGCGCGGGTCTCGCACGGAAGAGACGAATTGAAGTACGGTAGCTCGTAA +ATCCTGTTCCACGATTGATTTTTTTTTAATTTTTGATCAATTTTTTCGAT +TTTATCCCAGAAGTTCTTGAAAATTATCCGTTTTTTTGCAATTTTTTCGG +AAAAATGCTCTAAAATTTAAGATTTTTAGCATTTTTTTCCGAAAAAAATG +TTTTAAAATTGCAGATTTTTAGCATTTTTTCACAAAAAATGCACTAAAAT +TAAGATTCTTAGCATTTTTGCGGAAAAAATGCTAAAAATCTTGCAAAGAC +TGAAAAAATTCGAATTTGTTGTAATATTGCATGTTTTCCGAGGTTTTACA +TCAAATATGTGTAGTTTTCCTTGAAATTTGACGTTTTTCACTGAAAAATT +CTACAAAAATAAGCATTTCTGAGCTCTTTTGTCGCAAAAGTTGATGAAAA +TCGCCAATTTTTCAATAAATATTGCACAGAAACCGTCAGATTTTTAGCAT +TTTTTCGGAAAAATGCACTAAAATTTTAGATTATTAGCAATTTTTCAGAA +AAATGCTCTAAAATTTAAGATTTTTAGCCCTTTTTTTCAGAAAAATTCAC +TAAAATGCAAGATTTTAATCATTTTTTTGCAAGAAAAATGCTCTAAAATC +TAAGTTTTTTAGCATTTTTTTCGGAAAAATGTTTGAAAATCCCTTCTTTA +ACGCCTTTTTCGCACTAAAAGATGCTCAAACTAGTTTAAACTTTCAGAGT +TTTCCCATGCTTTTGCCCCCTCCACTTCACAAGAAACAAGAAACAAATAC +GGCGGGAAAAAAGCGAAAATTCGCGGAAAACATCATTTTCAATGCATTTT +TCATCGCGAAAATTGCCGAATTTCATTAAAAATGGCCAATTTTTGTGCCA +GTTTTCCCAATTAATCATGTATTTTCGATTAACTCGAAGCCCCTGCGCAG +AGAAAAACACAAAAATGTTGCAATTGTTTGTGTAAAGCACTTCTCTGCGC +GTCTGCGGCATCCGATATGACGAGCAATATATATCTTTTTTCTGGGAGCC +TTTTATTTTAAATTTATCATTTTTCATTTAGTTTTTGAGAAAAAAATTCC +CCCAAAATTAACCTAAAAGCCTTGAAAATGTGAATTTTGAGTGAAATTTG +ACGTTTTTCAGCGAAAAATTCTCAATTTTTAGTGCATTTTTGCCGGAAAA +ATGCTAATAATCTTGAATTTTAGTGCTTAGCGGAAAAAAAATTCAAAATG +AAAATGTCGAATTTGAAATAAAATGCTTTAAAATTTAAGATTTTTAGCAT +TTTTCCCGAGAAAAGCTAAAAATCTTAATTTTTATAGCTTTTTTGTTGAA +AAAATGCTAAAAATCTTAAATTTTAGATCATTTTATTTCAAATTCGACAT +TTTCATTTTGTATTTAAGAAAAAATAACCTCAAAGTCTGAAAAATATCGA +ATTTTGAGTGAAATTTGACATTTTTCAGCGAAAAATTCTCATTTTTCCGG +AAAAAATGCTAAAAATCTTGAATTTTAGTGCATTTTTCCAGAAAAAATGC +TAAAAATCTCAATTTTTAGTGCATTTTCCCGGAAAAATGCTAAAAATCTT +AAATTTTAGAGCATTTTCCAGAAAAAATGCTTAAAATCTTGAATTTCAGA +GCATTCTTAATGAAAAATGCTAAAAATCTTAAATTTTAGAGCATTTTTCT +CGAAAAAATGCTTAAAATCTTGAATTTCAGAGCATTTTTTTCGGAAAAAT +GCTAAAAATCTCAATTTTTAGTGCATTTTTCTGAAAAAATACTAAAAATC +TTGAATTTTTAGAGCATTTTCGCTGAAAAATAATAAAAATTTTAAATTTT +AGAGCATTTTTCCGAAAAAAAAAACGCTAAAAATCTTGTAATAACTGACA +AATTTCGAATTTTGAGTAAAATGGCATGTTTTTTGAGGTTTTTCCTTGAA +ATTTGACGTTTTTCACCTAAAAATTCGACAAAAATGAGCATTTCTGAGCT +CTTATATTGCAAAAATTGACAAAAATCGTCAATTTTTCAATAAAAATTGC +CCAAAAACCTTAAAATATTCTGAAAAAAGCCGAAAAAACCCCATTTTTTA +GCCATTTTCTACAGAATTTTACATTTTTCTCAACTTTCTTCACCTAAAAT +CGCCGATATTATTCACATAAAATTCCCTTTTTTTTCAGCGCAATGGGGTG +TATTATGAGCCAGGAGGATGAAGCAGCGAAACGAAGATCCAAGAAGATTG +ATCGATTGGTAAAAATTGGGGATTTTTCGTTGAAAAAACTCGATTTTTTG +TGAAAAAATATAGAAAAATCATCAAAACTGCTTCAAAAACCGAAAAATTT +CACTAAAAACTAGAATTTTTGCTCGAAAACCTCAAAAAATCGCAAAATTT +GCAGTGAAAACTAGATTTTTATTTTTAAAAAATTCAAAAAGTTCATAAAA +ATCAATAAAAAAATCGGATTTTTTTTCTTAAAAAATCTGGAAAACTGTCT +AAAAATCTACTCAAAATCAGTTTTTTTTGTAGAAAATCTGAAAAATAGTC +TAAAAAATTAATAAAAATCGATTTTTTGGGAGAAAATTTGAAAAATTACG +AAAAAAATCAATAAAAAATCGGTTTTTTGGGAAAAACCCTGAAAAATTGT +CTAAAAATCTATAAAAAATTGGATTTTTTAAATAGAAAATCTGAAAAACT +GCCAAAAAATCAACAAAAAATCGATTTTTGTGTGAGAAAACCTGAAAAAT +TATCAAAAAATTGGTTTTTATCTAGAAAATCTGAAAAATTACCTAAAAAG +CAATAAAAAATCGGTTTTTTTTCTGGAAAAACTGTAAAATAGCCTAAAAA +TCAATTTAAAAAAAGTTTTTTTTTTTGTAGAAAACCTGAAAAATTATCAA +TAAAAACCACGAAAAACAGTTTTTCGCTTATAAAAAGTCCAAAAATCCCA +AAATTTGCATTAAAAACTAGATTTTTATTTCAAAAAAATTCAAAAATTGC +CTGAAAATCAATAAAATTGTTGTTTTTTTTTGGAAAAAAAAACCCTGAAA +AATTGGCTAAAAATCCCAAATTTTTTTGCAGCTCAAAGAAGACGGCGAGA +ACTCAATGCGAACCATCAAGCTTCTGCTCCTGGGAGCCGGCGAGTCTGGA +AAATCGACGATTTTAAAGCAGATGCGTATCATTCACGACGTCGGATATAC +GACTGAAGAGAGAAAAGTGTTCAGAGGCGTCGTTTATGGAAATATTATTT +TGGTCAGTGCTCCGATGCACTGAAAAATGACAGGAAATTGGCAATTTTGA +CCAAAATTCGTGGATTTTAACCAAAAAATCCGTTAATTTTAATCAAAAAT +CGGCTAATTTTAGCCAAAAAATTCGCTATTTTAACCAAAAAATCCGTTGA +TTTTAGTCAAAAAATTCGCTATTTTTAGCCAAAAAATCGCTATTTTTAGC +CAAAAAATCCGTTAATTTTAGCCAAAAATTTCGCTATTTTTAGCCAAAAA +ATCCGTTAATTTTAGGCAAAAAATCCGTTAATTTTAGTCAAAAAATCCGC +TATTTTAACCAAAAAATCCGCTATTTTAACCAAAAAATCCGTTAATTTTA +GCCAAAAAATTCGCTATTTTTAGCCAAAAAATCCGTTAATTTTAGCCAAA +AAATTCGCTATTTTTAGCCAAAAAATCCGTTAATTTTAGCCAAAAAATTC +GCTATTTTTAGCCAAAAAATCCGTTAATTTTAGCCAAAAAATTCGCTATT +TTTAGCCAAAAAATTCGCTATTTTTAGCCAAAAAATCCGTTAATTTTAGC +CAAAAAATTCGCTATTTTTAGCCAAAAAATTCACCAATTTTCGTTTAAGA +TTCTCCAATTTTTGACAAAAAAATTCACCGAATTTGCTCAAAATTCTCCC +TTTTTTGGCTAAAAAATGCATAAATTACACAATTAAACCAAAAATTACCC +AATTTCGCCCAAAAACTCTAATTTTTACCAAAAATTCTCCTATTTTTTGT +TGAAAATTCACCAATTTTTTTGCCAAATTTTAACAGAAAAATCTATAAAA +TATCAGGCGAAAATAACCTAATTTTGGTCAAAAATTCACCGATTTTTGTT +CAAAATTCTTCAATTTTACCCAATATCCACCAAATTTTAACCAAAAAAGT +CTAAAAAATTATTCAAAAAATCCCTAATTTTGGCCCAAAATTCTCCATTT +TTACTCAAAAATTCTCTCTTTTTAACCAAAAAATCTAGAATATTAGGTGA +AAAATGCCTAATTTTGGCTAAAAATTCACCGATTTTTGACAAAAAAAACC +TCCAATTTCAGCTTAAAACTCTCTGATTTTTACCAAAAAAATCTAAAATA +TTAGGCGAAAAATCACTAATTTCGGCTAAAAAATCACCAATTTTTGACAA +AAGAAACTCCAATTCCAACAAAAAATTCACAAAATTTACTCAAAAATTGT +CTGTTTTTCACAAAAAAAAAACCTACAATATTAGGAGAAAATTCACAAAT +TTTGGCCAAAAAATCCCTAATTTTGGCCGAAAAATTTGCCAATTTTGTTC +AAAATTCTCCAATTTTCGACTAAAAAAACCTGCCATATTCAGCAAAAAGT +TCACCAAATTTACTCAAAAATTCAAAAAAAATCTAGAATATTAGGCGAAA +AATCCCTAATTTTGTCCAAAAATTCACCGATTTTTGACAAAAAAAACTCT +AATTTCAGCAGAAACTTCACCAAATCATAGCCAAAAATTCACTGTTTTTT +ACCAAAAAAATAGAATATTAAGAGAAAATCCCTAAATTTGTCCAAAAAAT +TAAACAAATTATAGCCAAAAATGCTCTGTTTTTTACCAAAAATTCTAGAA +TATTAGGTAAAAAATCCCTAATTTTTCCCAAAAAATTCACCGAATTTACT +CCAAAATGCTCTGTTTTTGACAAAAAAAAACTCTAATTCCAGCACAAAAT +TTACCAAATTATAGCCAAAAATGCTCTTTTTTTTCCAAAAAAAATCTAAA +ATATTAGGCGAAAAATCCCTAATTTTGTCCAAAAATTCACCGATTTTTGA +CAAAAAAAACTCTAATTTCAGCAGAAACTTCACCAAATCATAGCCAAAAA +TTCACTGTTTTTTACCAAAAAAATAGAATATTAAGAGAAAATCCCTAAAT +TTGTCCAAAAAATTAAACAAATTATAGCCAAAAATGCTCTGTTTTTTACC +AAAAATTCTAGAATATTAGGTAAAAAATCCCTAATTTTTCCCAAAAAATT +CACCGAATTTACTCCAAAATGCTCTGTTTTTGACAAAAAAAAACTCTAAT +TCCAGCACAAAATTTACCAAATTATAGCCAAAAATGCTCTTTTTTTTCCA +AAAAAAATCTAAAATATTAGGCGAAAAATCCCTAATTTTGGCCAAAAAAA +TTACACCAATTTTCCCCAATTTCAGCTTAAAATGTTCAATTTCTAACCAA +AAAACCCCCAAATTTTAACCCAAAAATTCCCTCGAATTTCACCCCAAAAC +CGTCCAATTTTCCAGTCTCTCAACGCCATCATCCACGCTATGGAGCAGTT +AAAAATCAGCTTCACCACACTCGATCACGAATCAGATGCCCGAAAGCTTC +TAATGTTCAGCACAACAGGCGAAGAGGATGAGCTGCCCGAAGAGCTCGTC +GTGCTCATGAAGAGTGTTTGGTCCGATTCGGGAATTCAGAAAGCGCTCGA +AAGGTGGCGAAATTTCGCTGAAAATTGAGAAAATTCGAGGGATTTTCGCT +GAAAATCGAGAAATTTCGGTGGAAATTAGCTTATTTCCATGAAAAAATCG +TTAAAATCCCAGAAAATTGCGATTTTGGACTATTTTTGAGCATTTTTCGC +GTTTTTTCCATGAAAAATTCAATGAAATCGATTTTTTAGGTCATATTCGC +TGAAAATTACCGCTTTTTTCGATGAAAAATTCGTTGATATTCGTTGAAAA +TTATCTATTAGAGCTAATTAAAATTTAAAATTCCAAAAAAAAAAATATTT +TAAAATAATCAATCGAATTATTTTTTGCTCACACTTTCAAAAACCGCTAA +AAATTCTAAAAAAAAAATTTTGGAGCCCCTTTTCCGCTAGAAAATAGCTT +TTTTTCCCTCAAAATCCGGGAAAATTCAGAAATATTTAATTTTTTGGCTA +TTTCTGACTCTTATTCCCACACAAAAATAGTTTTACCAAAAAAAAACAAT +TTTTGTCAAAAATTCGAAAAAAAAATTTCTGGGAAATATTTTTAAATGAC +TCTAAATTTTCCCCTGTACCCGAATATCGATGTGAAAAAATTCAGAAAAA +TTTTCCGCGATTTTATATGATTTTTTGAAAATTGGACAAATTTCAGTTTT +CCCCCCTAATTCCTATTTGAGTTACCGCCAATTTGATTTGTTCGATGGAC +TTGCACATTTTTGAATTAATCTATTTTATTTTTTGTTGTTTTTTCCACCG +ATTTTTAATGTTTTCGGTGTATTTTTGCTTGAATTTTAGGGGAAAAGTCA +AAATAAATGCAATTTTCGATTAAAAAGCACGCTGACCGGCGTAAAAATGA +AAAAGTAACGATTTTAAACGATTTCAAACCTGAATTAATTAATTTCACTG +ATTTACGCCTGTACGCGTGCGTAGATCAGTGAAATTAATTAATTTAGGTT +CGAAATCGTTTAAAAGCGTTACTTTTTCATTTTTACGCCTGTAAGCGTGC +TTTTTAATCAGAAATTTGCAATTATTTTGACTTTTTCTCTAAAATTCAAG +CAAAAATACACCGAAAACATCAAAAATCGGTGGAAAATAACAAAAAATAA +AATAAATAAATTTAAAATTGTGCAAGCGCGCTCCATCGAACAAATCCAAT +TGGCGGTAATTCAAATAGGAATTAGGCAAAAACTGAGATTTTTGAGGCAC +CACGTTTTGAAGATCTGTTCAAAAAGAAGATCTACGTTTTCAGGATCTGG +CACCGTGCCAACTGCGGTTTTCTCGATGAAAAACGTAACAACGATGCTCC +GATGTTACGCGTCGCGTGTTGTTTTGCGTTCAGAAAGATATTTTTTGAAA +TTTTCTTCTTGAAGAAACGCTTAACGACACGCAACGCGTAACATCGGAGC +ATCGTTGTTACGTTTTTCATCGAGAAAACCGCAGTTGGCACGGTGTCAGA +TCCTGAAAACGTAGATCTTCTTTTCGTAGATTTTCAGAATGTAGATCTTC +TTTTGGTAAATCTTCTTTTCGTAGATCTTCTTTTCGTCGATCTTCAAAAT +GTAGATCTTCTTTTGGTAGATCTTCTTTTCGTAGATCTTCTTTTCGTAGA +TCTTCTTTTTCGTAGATCTTCTTTTTGAACAGATCTTCAAAACGTGGTGC +CTCTGATTTTTCCAAATTTCAAAAAATCATATAAAATTTAGAAAATTTTT +TTGAATTTGTTTATCAGCATATTTGGTCATTTTGGTACCATATTTTCCCC +TGATTCCGAATATCAATGTGAAAAAATTCAAAAAAAAAATCCCTGATTTT +ATATTTAAGCTTGAAATCGCCGAATGAGATTTTTCAAATACGCGCGCACA +AATAAATTCTCCTTGGAGCGCGTTTGCCTCATTTGATTTTCTCCATATTT +ATTTTTACTTTTTTTTCAGTTTTTCACAGCTATTTTCATTCATTTTTATT +GTATTTTATAGGTTTTTTTTTTTGAAAAATAAATTTTTCCGTGTGAATTG +AACATTTTATTTGCATAAAAATGAATGAAAATAGCTGAAAAACTGAATAA +AGTAAAAATAAATATGGAGAAAATTAAATGAGGCAAACGTGCACCAAGGA +TAATTTATTTGGGCGCGTATTTGAAAAATCTCATGTAGGAGGCAAATGAA +ATTTAGGCGATTTCTAGCTTAAATATAATATCAGGGAAATTTTTTTTTTG +ATTTTTTCACATCGATATTCGAATCAGGGGAAAATTTGGAGCCACCTGTA +ACAAAATTTTATGAAAAAAAAAACTGTATAATTTCTAACAACTTTTTTTT +TGTAGATCACGCGAATATCAGCTCAACGATTCGGCCGGCTACTATCTGAG +CCAACTCGACAGAATTTGTGCTCCTAATTACATTCCCACACAGGTATTTT +TCACTGGAAAATTCACAAATTTTTCCACTACTAAAAATTCAAATTTCCTT +TGCAGGATGACATTCTCCGCACAAGAATCAAGACAACAGGAATCGTGGAG +ACGCAGTTTGTCTACAAAGATCGCCTATTTTTGTAGGGGGAAAATTAATT +TTAAAAAATTGAAAAAATCGTCGCCGAAATTCAGGGTTTTCGACGTTGGC +GGACAGCGATCCGAACGAAAAAAGTGGATTCATTGCTTCGAAGACGTGAC +GGCACTCATTTTCTGCGTTGCACTGTCAGAATATGATATGGTTCTTGTCG +AAGATTGTCAGACGGTGCGATTTTCGAGTTTTTTGCTTTTTTTTCGTTTT +TTTCCCAAGTTTAGGGTTTTTCGGTGAAAATCTCGGTTTTCTTGGTTAAA +ATGGTTTTTTTTTTGGCAAAAAACGGCAAAAATTGAGGATTTTAGCGTAT +TTTGGCGAAAAATTGAGTTAATTACATGGAAAATTTCAGAATTTGAGTTA +AAATTGTAGTTTAAGGATTTTTTAGTGATTTTCTCGATTTTTGAACCGAA +AAACGTTCGAATTTCATTTATTACGCGCGACCACAAAATGCTGAGAACGC +GTATTGCACAACATATTTGACGCGCAAAATATCTCGTAGCGAAAACTAAA +GTAATTCTATAAATGACTACTGTAGCGCTCTTGTGTCGATTTACGGAAAT +CGTGTATTAATCGATAAAGTATTTTTTTTAGAAACACAAAAATGACAAAA +AAAATACGAAAGAAAATTGAATTGTTTCGAAAATCGAGTCCTCCCGTAAA +TCGACACAGTTGCCATTAAATTAAATAAAATTAATGTTAACTGTGTCGAT +TTACGGGAGGACTCTCGATTTTCGAAACAATTCAATTTTCTTTCGTTTTT +TTTTGTCATTTTTGTGTTTCTAAAAAAAATACTTTATCGATTAATACACG +ATTTCCGTAAATCGACCCACAAGAGCTACAGTAGTCATTTTCAGAACTAC +TGTAGTTTTCGCTACGAGATATAATTGCGCATCAAATATGTTGTGCATGA +CGGATTCTCAGCATTTTGTGGTCGCGCGTAATGAAAAGCGAAAAAATCGA +TATTTTCTGATTTTTGCGCCGAATTTTGTTAATTTTTCTTTTTTTTTTTT +CTGCGAAAACCACATTTTCCTGACTAAATTCAGGTTCAGCCATTTTTTTT +CGCAAAAATCGACGAAAATTGAGAATTTTAGCGTACTTTTAATTTTTTTA +AATTTTTTTCGCTCTGAAAATTATGAAAAAATCAAAATTTTCTCGATTTT +TGAACCGAAAAACGTTCAAATTTCATTTAAAAATCGAAAAAATCGGGAAA +AAATTTTCCGATTTTTGCACCGATGATTTTCGCTAATTTTTCACCACATT +TTTTGCGTTTCAGAATCGAATGCGAGAATCGCTAAAACTGTTCGACTCGA +TTTGCAACAATAAATGGTTTGTCGAGACGTCGATTATTCTATTTCTCAAC +AAAAAAGACTTATTCGAAGAGAAAATCGTTCGATCTCCACTCACACACTG +CTTTCCGGAATATACGGGCGCCAATAATTACGAAGAAGCTTCTGCGTACA +TTCAACAACAGTTTGAGGATATGAATAAGAGGACTACTGGAGAGAAAGTT +GGTGGTTTTTTTGAATTCACTGCAACTTTTTCCTCACGAGGGACGAGGAA +AAGTGGTTTCTAGGTCATGGCCGAGGGGCCGACAAGTTTCAGCGGCCATT +TATCTTGCTTTGTTTTCCGCCTGTTTTCTTTCGTTTTTCATCGATTTTTT +TCGTTTTTTCGTAATAAAACTGATAAATAAATATTTTTTGCAGATGCTAA +AACAATTTCCAAGTAAAAAAATCATGTATTCAGTCGGCAAGCAGCGATGA +AAGTGGGCATTGTAATATGATGGATTACGGGAATACAAAACCTAAACTTT +TTCTGAAACATGAAACATATGATGCTTAGATGCTGAAATTACCTGATTTT +CATAACGAGACCGCTGAAAAGTTTTGAGGTTTCCACAATTCAACTTTTTG +TGCGAAAATCTCGACTTTTTCACCAAAAAAGTTGAATTTTGGAAACCTCA +AAACTTTTCAGCGGTCTCGTTATGAAAATCAGGTAGTTTCAGCATTTAAG +CAGCATATGTATCATGTTTCAGAAAAAGTTTAGGTTTTGTATTCCCGTAA +TCCATCATATTGCATTGCCCTTGTTTCACCGCTGCTTGCCCACTGAATGC +ATAATTTTTTTACTTGGAAATTGATTTAGCATCTGCAAAAAATATTTATT +CATCAGTTTTATTAAGAAAAACGAAAAAAATCGATGAAAAACGAAAGAAA +ACAGGCGGAAAACAAAGCAAGATAAATGGCCGCTGAAACTTGTCGGCCCC +TCGGCCATGGCCTAGAAACCACTTTTCCTCGTCCCTCGTGAGGAAAAAGT +TGCAGTGAGAAAACTCAATTTTTGAATTTTTTTTTCATGAAAATCGGAAA +ATCTTCAATTTTTCCTAATCAATTCCATCTCTACAGAATCAAGAAATCTA +CACCCAATTCACATGTGCCACCGACACTAACAACATTCGATTCGTTTTCG +ATGCCGTCACCGACATTATCATTCGAGATAACCTCCGCACGTGCGGGCTC +TACTAAATTTTTCCGCTAAATTTTAAATTCCGTTTTTTCCCCCCAAAATC +CCCTCCCAGACATTTTCCGGTATTTATGTATTGACCACACACAACTCTTC +TCTCCCGTCTCCGCTGTTCAATGCTACGGTTATTCTCTTTTTTTTTCTTT +TTTTTTCCTGAAATTCCAAGGGTGTCGTTGAAAATCGAAAAATTCGAAAA +ATTTGGGTTTTTTAGAGGAAAAGCCAAAAATTCAATTTTTTTTTCGATTT +TTATTCCGGAAAATTCGAAAAATTTCGATATTTTTTAAATTTTTTACAAA +AAATTCGAAAAATTTGGATTTTTTCAGGAAAAAGCCAAAAAATTACAATT +TGTGTTTAAAAAAACGAAAAAAAAAACCAATTTTTCTGATTTTAATTCCG +GAAAAATCAAAAAATCTCGATTATTTTCAGAAAAATTCGAAAAATTTGGG +GCTTTTTCCAAAAAAGAAATCGTAAAATTTCGATTTTGTCCAGGAAAAAT +CGAAAATTTAATGTTTTCTTTTTTGGCTTCTTCCGGAAAATCCAAAAATT +TTGAATTTTTTGGATTTGTTTTCAGAAAAATTGGAAAAATTTGGATTTTT +CAGGAAAAAGCCTAACAATTTCAAATTTTGCTTTAAAAAAACTTTAAAAA +ATTGATTTTTGAGGAAAAAACGAAAAATTTCGACATTTTTTGATTTTTTT +TTTACCGAAAAAAAAATTGATTTTTTCCAGGAAAAAGCCAAAAATTTTCA +ATTTTTAAGGAAAAATTCGAACAATTTTGATTTATTGATTTTTTATTACG +GAAAAATCGAAAAATTTGGGTTTTTTAGAGTGAAAAGCCAAAAGTTCAAT +TTATTTTTCTGGAGAAATCCAATTATTTCAATTTTTGAATTTTTTCAAGA +AAAATAAAAAAATGTCGATTTTTTCCAGAAAAATCGAAATTTTTCGTTTT +TGAATGAAAAAAAATTTTTCCTTCAATTTTTCCTTTAAAAAAAATAAATA +AAAATGTTCGAATTTCCATGAAAAAAAGTCAAAAAAATTCAAAATTTTCC +ATTTTCCCTGACAAAAAAATCAAAATTTTCCGATTTTCCTAAACCAAAAT +CCCAAAATTTTCTGATTTTCCAATTTTCTCTGAAAAATCGAATTTTTTGC +CGCACACCCCTGGTTTCACGTGGTGGTCAGGTTGTCTCATTGCGGTTTGT +GATCTACAAAAAATGAGGGAATCTTTCCTCCCCCGGGAAAATCTGACGTC +AGCGCACTCGTGTAACCATGCGAAATCCTCTGCTGAAAAGTCTGCGTCTC +TTCTCCCGCATTTTTTGTAGATCAACGTGTAGATCAATCCAAAATGACAC +CTTGACACCACGTGCCTAAGCCCAAGCCTAAACCAATCCATTTTACCGGT +ATATCTCTACAACCTTTCACTGTCTGAAATATCGCGTCATTTTATATTAT +TATTATTATTGTATTCGCTTAAACGACAACAATTCCCCCATTTGATTCAA +AAAGTAAACATTTCCGAACTTTTTAAAAATTTGAAAAATATTTTTAAAAA +ATTTTAGCGTGAAAAACTTGTCCCCCCACCCTGCCTCTCCAAAAAAAATT +AATTTCGGAAGAATTCAGTCCTTGTTTTTGTAAATTGAGAACGTAATGCG +CTTTTTGGTGGTTTTTGCATATTTATCGACACTTGCGTACGCGAGGTATC +GATTCAGATTCCGTGAGTTTTTTACAACAAAATATCGCTAAGCAATCGCG +CTCCAGCGCGAAACTTCAAAAAAAGGCCAAAATTTTGCGTCGCTTTCTAT +GATTTTAAGACGAAAAGCGAGGAAAAATCAATAAAAACAGCGAAAACTTT +GAAATTTACGGGTTTGCCGCTTTTTTTGCGCTAGAGCGCGATTGCTCACT +GAGTTCAAGCGCCCTTCTTATTTTGAAATGGAAGAGTTTGCCGAATTAGG +CTATTTTGGTGCGGCCACGGCTGGGGTCAATTTACGGCGCGTTGCGTGCC +GCGTCGCGGTTAGGTGGACCACGCCTTTCCCACGCGTAGACGATTGTCAA +TAGAGCGCCGAAAATGCAAGGCCAGAAGCCCGTATGAGTGAAGTTTTTGT +TCATTTTCACTTTTTTTTCGTCGTTTTTAACGGTTTTTATGTGAAAATCA +ATAAAAACGGCGAAAATAATTAAAAATTTTCGATATTCCGGGTTTGGCGC +TTGCTTTGCGCTAGAGCGCGATTGGTCACCGAGTTCAAACGGCGCACTCT +GGCTTCCCTCTTAATTTGAAATGGAAGAGTTTGCCGAATTAGGCCATTTT +GGGTCGGCGGGTAGATTTACGGCGCGTCGCGTGTCGCCTCGCGGCTCGAT +TTTGATTGTAAAACTAAATGTATTTGTCCGTGTGGAGTACACGCGACTTT +CCCACGCGTTGTCCAGCAGGTTATTGCCAATGGAGCGCCGAAAATGCAAG +GCCAGAAGCCCGCCGTATAAGTAGCCCATATGAGTGAAGTTTTTGTACAT +TTTCACTTTTTGTTTCGTCGTTTTTATGGTTTTTATGTGAAAATCCACAG +TCTCCTCGTCATCGAAAGCGCACAAAAAACTGCTTACTGGCGCGTTTCGC +GTTTTTGCTAGCGCATTTTGTGGATTTCTCCGAAATTCAGAAATTACGCG +GAAAATAGGCTAGAATTCCAAAATTACAGCCCCGCCTCTTTTGATAATTA +TCTTCAGGATGAAGAAGATGATCGCGAAGGTGGGTGACTTGAAATATACA +AGAAAAATATGATTTCTTCAGAGAAACACCATTTTTACCGTTGATTTTTT +AATATCCCTACTCCTCCTTCAGTTTTTGACCAAATTTTCTCTTTTAATGG +TCTTTCATTTCATTTTGTGCTAATAAACAAATGTTAAGTTCGAAATCTGC +GAAAAAATTCGTTTCAACGCTAAAAAACGACGAAGTTTATTTTTCAACAC +TAAAAAGATTCACTCCCTCCCCCGGCTGTGCCAATGTGCAAGTGCGCCCC +AGCCCAATTCGACGCCGAGGAGACTGTGAAATCATTAAAAACAGTGAAAA +TAACGGAAAATTTCTGAAATTCCGGGTTTGCCGTTTTTTTTTGCGCTGGA +GCGCGATTGCTCACCGATTTCAAATAGCGCACGCGCTTCTGGTTTCCCCC +ATAAATTGAAATGGAAGAGTTTTCGCCGAACTAGGTCATTTTGGCGCGGC +CACGACCGGGGTAGATTTACGGCGCGCTACGTGTCGCGTCGCGGATCGAT +TTTAGTTGTAAAACTAAATGTAGTTGTCCGTGTGGAGTACACGACTCTCC +CGTCCGGCAGGCGATTGCCAATGGAGCGCGAAAATGCGAGGCCAGAAGCC +CGTGCATTTTTTTGCGGGTCCCGCCACGACATTTACCCTTTCAGACACGT +GTATCAAAGAGGACCAGGCGATTTTGGAGAAGGAAAATGTGAACTCGCCA +CGCCAATTTCTTTTTATCGATGACGGACAATCGCCAAAGTGGAGGGAAAA +TGATCTCGAAGGAAAAGGAATTCTTATGAGCTTCGGAAATCGATTGAAGA +ATCTGACAAACGGATATGAGACTGTGAGACTTTTTTTACGGGTCTCGACG +CGATTTCCCACGGATTTCGGGCTTCCCTCATAAATTGAAATGGAAGAGTT +TTTGCCGAACTAGGCCCCGGCCATATCTGGGGTAGATTTACGGCGCGTTG +CGTGTCGCGTCGCGGCTCGATTTTAGTTGTAAAACTGAATGTATTTGTCC +GTGTGGAGTACACGGGCGATTGCCAATGGAGCGCGAAAAATTCAATTTGT +GTCTCGTAACGATTTTATCGATTCTTTTTCAGGGTGCCGAACAAGCTCTG +CGTGTCCTTGAATTCAACAATACGGAACACTTTCTTGTGATCAATTTTTG +GATGAGAAGCTACTATTTCATGAAAAACGGTGATAAATATCAGCTGTCAT +ACACCGGAACCACCATCAAACCCGATTTGAATCACTTTATTCGGTTAGCA +GGCTTGGAGATGCAATATCGCTCCATCGCAACCATTTTTTCAGCGTATAC +TTCATCTGCGCTCCAGCGAACAGTCAGAGTGCGTTGATCGTGTTTGGGGA +TCAGTCGAAACCGCGAGTTTTAAAGTCGAAAAAATCGAAGAGGGAAGCAG +CGCTGATTGATGAGAATTCGGGGAAGAAGCTGAAAAAGTGCCAGAAAATT +CGGACGAAAAGAGCGGCGGCGGTGAATAACTCGACGGAGTGGACACAGAC +GCTGAAGGATACTCGGAGACTTAAACAACGTAGCGAGTGAGTTTGTGCCA +AACGATACTCCGATGTTCCGCTTCCGCTGAGGACACCGCGTCGTTTGATC +TCTGAAAATTGCGAGAGTCAGCCTCGTGTAAGTTTACCGCAAATTTGGAG +GTCAAGCAGCGAGGTCCTCATATTTTGAAATGGAAGAGCTTGCCGAATTC +GGAGTCGATTTACGGCGCGTTGCGTTGCGTGTCGCGTCGCGGCTCGTGAT +TTTCGCATATTTATCGACACTTGCGTACGCCAGGTATCGATTCAGATTCC +GTGAGTTTTTGCAACAAAAAAATCGCTGAGCAATCGCGCTCCACTGGAAA +ACACCCGAAAAGTTTTGAAATTCCGGGGTTGCCGCTTGTTTTGCGCGCTA +GAGCGCGATTGCTCGCCGAGTTCAAACGGCGCTCTCTGGTTTCCCTCTTG +TACGAGACGAGCGTCGCGGTTCGGTCCAGCAGGCGATTGTCAATGGAGCG +CGAAAAACACGTGGTGTCAAAGTCTCCAATTATGGTTTGATCTTTGATAA +ATGCGGGAGACGAGAGACACCGACTTCTCAACTGATTTCGCATGGTTAAA +AGTGTTCTGACGTCACATTTTTCTGGAAGAAAAACTCCCGCATTTTTTGT +AAATCAAACCGGATTAGGACAGCCGGACACCACGTGGAAAAACAAGATGA +GGGAAGCCACAGAAGCCCGTGCAGAGTCTCGGTTTTATCAGTTTTCGAGT +AGTTTCGCACCACAAAAAGCGTGTGACGTCACATTTTTCCGGCCGCAAAA +TTCCCGCGTTTTTTGAAGATCAAACCGAAATGCATTGTTTCAGAATCTAC +TCTTGGGCACGTCTCTCGTTCATTCACCTCGGTTTCGGTATGGTAGTGAT +CGTCAACGCCATCGTTTTTTTGTTCTTTTTCAAGTATTTCAAGAAGTTCA +ACTCGACGGTGAAGAATGGAGATCAGAAGCAGGAATCGGATGAGACTGAT +GAGAAGACTGAGAAGACTGAGAAGCAGGCGAAATAGAACTTTTTAGTCTA +TCTGTGTTGAAATAAAGGAATTGAAAAAAAAAAAAAAAAAAATTTTTTTG +CATATATGCATGTGGTGTCAGAGTGGATTATTTCGGTTTGATCTACGTTG +ATCTACAAAAAATGCGGGAACTGATTTTGCATGGTTAAGAACGTGCTGAC +GTCACATATTTTTTGGGCGAAAATTCCGGCATTTTTTGTAGATCAAACCG +TGATGGGACGTCTGGCATCACGTGTACATGTAGAAATGAGATACAACATG +TTATGTACCGTTATCCTACAGTACTATAGGTGAACCCCTCAATTTTTGAC +CTCCAAAAATTGCGGGAATCTTATAGGTCAACTTGCAGGGGGTCGTGGAC +ATGAATAAGCCAAATTCAAATTATTCAAGCCAAAAAGATGATAGCGCAGA +CACGTGCCCCCAATTTCGGTTTGATCTACTGTGTAGATCTACAAAAAATG +CGGGATTTAGGACGCTGAGTTCTCCACTGATTTTGCAACAAATTCCCGCT +TTTTTGAAGATCAAACCGGACAACCTGACACCACGTGAAGGAATAAACCA +CGCTCTTTTGCCAAAATCTTAAGAGTTTCTACAATTTCAGTGAAAATCCA +GTATTAGATTTGATATTTGAAAAAAAAAAGTTAAATAATGGACGCGGCCT +AGTCTTTCCTCTTCTACTAGCAAAACGCCCTGTTTTACAAAAAAAATCTA +TAGTTTCTGAAAAAAAAATCAGTGGCAAATGTCTACAATTTTCAATATTT +CATACTAAAAAAATCACAAAAAATTATTTTATGGCCGCGGTCGAGTTTTT +TACTCTTCCACGTGGTTTCAGAGTGTCCAATTTCGGTTTGATCTACCGTG +TAGATCTACAAAAAATTCGGGAGTCGAGAACTGATTTTGCATGATTGCTG +ACGTCACATTGTTTTGGGTAAAAAATTCCCGCATTTTTTGTAGATCAAAC +CGGACAGCCTGGCACCGCGTGCGCAGAGAAGAAAAAGCTAATGAGCACGC +GGTGTCAGGCACGGCTTGATCTACAAAAATTGCGGGAATTTTCAACCCAA +AAATATGTGACGTCAGCGCGTGCTTAACCATGCAGAATCAGTTCTCAACT +CCCGCATTTTTTGTAGATCTACGTAGATCAAACCGAAATGGGACACTCTG +ACACCACGTGAATGAGCCAATTGAGCAAAAATCGTACTGAAAGAGATCGT +TTTATTCGCGCAGAGGTTAGTTACACGAATTGAATAGAAAAAATAATGTT +TTGCAAAAAAAAAGTAATGTACATACTCATGGAAATAAATTTATTATGGG +GGAGCTTGATTATTACAAGTCGACGAAGAGCAGCACTCCTTCTTCTCCTC +CTTTTCCTTCTCTTTTTCCTTTGGAAGCTGCGGAGAATTCGACTCGGATT +TCGACATTAGACGGGAGCTGGAGCATCTGAAAAAAAAACGCGAAAAATGG +TAGATCACACGTTTTCCAACCTATTACCTTCTCAAACTAAGACTGGAGAA +GAGCGGTGTACACTTGAGCATGTCCTTCAACTGTATCGACTCGTAGTTTT +CGATCGATTCCTGGAACTTTTCGATCCACAGCTCCTTGTCCGCCTTGTCG +GCGGCTTGTAGGGTGTAGATGCCGACGACTTGGTAGTAGCGGGTTAGGTG +AATTATGACGAACGCGAATTTTAGGGACACTGGAAGAGGAACACACTTAT +AGTGACACTACAGTAGTCCTAGAAGGTATTGGTAAGGTACTGGTAAGGTA +CTGACTGCGCTAATTTTTGTACGTTACACATTTTCCTGGAGTTTTGCAGA +CCTTCAAGAAAATTCTAGCACTTTCCAGATTTTCTCCGAAAAATTCCTGC +ACTTTCCAGATTTTTCCAGAGAATTCCTGCACTTTACAGATTTTTCTAGA +AAATTTTGTTTTTCTCAGAGTTTTTCCTATAACTACAGTAATCCTACAGT +ACCCCGACCATATCGCCCCTACCAACATACAACCCAATATTCCATCAAAA +GACAAAAACTCAAATTTTCCCGAACTACAGTAACCCTACAGTAACTCTAC +CGTATACCTACCGGGCCCCTACAGTACTGCTGCAGTACCTTGACATTATC +CCCTACCAACATACAACCCAATACCTCTTCGAAAGCTGGGAACTCAAATT +TTCTATAACTACAGTAATCCTACAGTACTTCTACAGTACCTCTACAGTAC +TACTACAGTACACCGACCATATCCCCCACTAACCCTAAACCAATATTCCT +TCAAAAGACAAAAACTCAAATTTTCCCAAACTACAGTAAGCCTACAGTAC +TCCTACAGTCCCCCGACCATATCCCCCTACCAACATACAGCCCAATATTC +CATCAAAAAACAGAAAAAACTCAAATTTTCCAAAACTACAGTAACCCTAC +AGTACTCCTACAGTACTCCTACAGTACCTCTACAGTACTCCTACAGTACC +TGGACATCATCCCTCACCAACTTCCAATCTAATACCTCTTCAAAAGCTGA +AAATTCAAATTTCCTATAACTACAGTAGTTCTACAGTACTCCTACACTAC +CTCTACAGTACCACTACAGTACCCCTACAGTACCCCGACCATATCCCCCC +ATTGACTCTAAACCAATATCCCTTCATAAGCACACTACAAAGACTACGTG +GACTACAAACTATGTATGGTCAGACGGATAGTTTTTTTTAATATAAGAAA +TGATGATGAACTATATATACTACTATATATACTGACACTGATTCACCAAA +ATTAGCGACTCTGGCAATTCACCAAAGATAGGGTACGGTACTGGTATGGT +ACTGGTACGATACTAGACCCCAGTTCTTTAATACGATTCCCTAAAATTGC +TCACTCGGCGTTGCCTGGTCGTTGGAGTCCGCATCACAGAACACGCAAGA +ATCCAACGGTACCGGCTGTCGTTGGACGATGTAGTGCTCGGATTTCCCGA +TTGGGACTCCTTTCTGCAACTCTTTCAGTAATAAAACAAGTAATAAAACT +TCGCGGGACTCCTCACCACTTTCAGCTTCGAAGCGCACTTTTTAGTTTTA +GTCAGCACAAACATGTCGTTGAATAGAAACGCGTACATATCAACGGTCCG +CCCGTTTTCCACAAGCTCGAGGGGGCCTTCGTGGATAAGCTTTCGACGCG +GATGAGCCAGAAGGTTCTCGCAGAATTGACGGGATAAGGCGACGCGGAGG +AACTGGAATTGTAAAAAATCAAGGGAAAATGTTGCAGAGAAAACAAATGT +TTCAACTTACATCAGGGACATAAGACTTGGGCTCCAAGTCGCTTATATTC +GGCCAGACAACCTGTGTCTGCAACTGCTGAAGCCTCTCAAAATTATGAAG +CCACTGCACACTATCATCAATCGATCGGAGTGATTCCGTCATTGTGTCGA +TCACCTTCTCCACCTTTCCCTTACTCTCCTCTTCCTCCGATCTTTTGTGA +ATCTCGCGGAGAAGGATCGGAAGCCGAGTGATTCGTTGAAGCGGTGCGAT +CAAAAGATCCTCGAGCTGTAGACGGAAGCATCGTTCATCGGCCAGACATA +TCCGCTCGAATTCCGTGAATCGTTCCTCTTTCTGTCGAATTGAGCCGAGA +TATTCCATGGTTGCCTTGTAGTTGATGCAGTAGGCTTGGTAGGCCGAAAT +TGTGGAGGGGCCTTTGGAGAACTGGAAGTTTAGAGTTATAACAAGGCATA +GTCTCAAGCTATTTTCCAGCCTATCCAAGCTGTGGTCCTGGTGCTTAACA +GTCTTGATTTTTTAGAGCTGGAAAACCGGACTAATCCGGACTACGGAAGC +CTGGTTTTTGTCTAAACGACAACATTGTGCAACAAGGTACGCTTTTTCAT +ACTACTTTTTTTTAAACTTTTGTTTTTTTTTGTTCTTTTTCGGCCAGCTT +TTAGGAACTTTTTTGACCCAACTTCGAAATGTCCCTTTTTTTTGCAATTT +TAGGAATTTCTAGAAATTTCCCATTTTCTCTTTTTCCAGCCCCTTCTAAA +CCTAGGCCTAGGCCTACGCATAAACCTAAGCCTACGCCTAAACCTAAGCC +TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA +AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCTTAAGCCTGAGCCCAAG +CCTAAGCCTAGGCCTAAGACTAAGCCTATGCCTAAGTCCCAGCCTTAACC +CAAGCCTAGGGCTAGTTGAAAGCCTAAGCCTAACCCAAACCCTAAGCCTA +AGCCTAAGCCTAAGTCCAAACTTAGGCCTAAGCATATGTCTAAGCCTAAG +CCGAAAATGTCCCTTTTTTTTGCAATTTTAGGAATTTCTAGAAATTTCCC +ATTTTCTCTTTTTCCAGCCCCTTCTAAACCTAGGCCTAGGCCTACGCATA +AACCTAAGCCTACGCCTAAACCTAAGCCTAAGCCTAAGCCTAAGCCTAAG +CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCT +TAAGCCTGAGCCCAAGCCTAAGCCTAGGCCTAAGACTAAGCCTATGCCTA +AGTCCCAGCCTTAACCCAAGCCTAGGGCTAGTTGAAAGCCTAAGCCTAAC +CCAAACCCTAAGCCTAAGCCTAAGCCTAAGTCTAAACTTAGGCCTAAGCA +TATGTCTAAGCCTAAGTCTAAACCGTAGCCTAAGCCGAATCCCAAGCCTA +AGCCTAAGCCTAAGCCGACGGAGCACGAGGAGAAAGTCCAAAACTTCACA +AAAAAACAAACATTTCTCTCAATCTCGGTCAGTTTTCAAAAATATTTCGA +GTAATTTTTCAGCACAAATAACAAAAACCCACCCTTTCAAACAAGTCCAC +AACTAGCTGTGTGCAGTCCCAATTCTCCTTGTCTACCGTACTGAGCAACG +AGAGGAACGATTGACAGAAGGTTCGAGATATTCTGACAAAATAAAAATGT +GTGTGTGTGTGTGAAATGGCACGTCAGATTAAAGGCAAATAGAAAAAAAA +AACCAATTAAAAACTGAACGAACCTGCAGAGTTGATCCAAATTTCCGAAA +AGGAGGTCCGGCTCGACAAGCAGTAGACATCCCTCGACTTGACATCTCTT +CAGCGGCTCCTTGTAAACATCGCGGAGCACGAAGAGCTGACGATAGAGGA +AGACGAGCTCCGTGTGGAACAGCTCCCATACCGCTTGAAGGTATTTGTGT +TTGGATGAGGGAATTGTCGTGGTCTCGTCTCCCATGAACATTTCCTCAAA +GGATACTGGCTGAAAGTTGGATTTTTTTTTGGTATTGGAGCTCGTCTCTG +GGCTCGTTTTGCAGTTTGTACCCTTAAAAAGACATTAAAAAGACATCAGC +AACAAACTCCAAAATCCCAAGTAGTAAACAGATTTTTTCTAAAACGGCAA +AATTGAGTGAGGAATTTTTATTCGATGCACCATGTTTCTCACTCAAATTT +GACTACTCATAACTTAGTTAAACCTAATCCAATACTTCCTTAAAAGCTAA +AAATTCACATATTCCCAAACTACAGTAAACTACAGTACCCTACCGTACCC +CTACAGTACCCCGACAGTACCCCTACAGTACTTTGACAATATCCCCCATC +AACTCCCTACAAAATCATCAAAAACAAAAAATTCACAATTTTCAAATCTA +CAGTAACCCTAGAGTGATCCTACAGTATTCCTACAGTACCACTACATTAC +CTTGACATATCCCCCACCAATTTCCAACCAAATACCTCTTCAAAAACTAA +AATTCACATTTTCCAAAACTACAGTAACCCTACCGTATACCTGCAGTACC +ACTACAGTACCATTAAAGTACCTTGACATTATCCCCCACCAACTTCCAAT +TAAATACCCCTTCAAAAGCTAAAAATTCAAATTTTCCCGAACTACAGTAA +CCCTACCGTATATCTACAGTACCCCTACAGTGCCACTACAGTACCTTGAC +ATTATCCCCCACCAACTTCCAACCCAATACTCAATCAAAAGCTAAAAATT +AACATTTTCCCAAACTACAGTAACCCTACCGTATTCCTACAGTACCCCTA +TAGTTTCACCTACCAACAACTTTTCCAGCTCATCATCCTCAATCTCATGA +TCCTTAGAAGTCTCCAACGCGGCCTGCACATGCTTAATAATCTCCCTCCG +TTTTGCTCTGCCCGACCACTCGTAAAGTCTCTTCAGAATCCAGGGCGATC +GATTTTTCTGCGGCTCGGAAGCTTCTGCATAGAAGGCTTGAGAGGCTATT +GTGGCACGTTGAAGATCTGAAAATGAAGAATCGGTGGGGATCAGTGGTGA +CAGCGAGGGAGAAGATCTATTTGGTGGGGTGGCACATATAGAAGTGGGTA +GTAGCGGTTTTTTGTGTGATCTTTGTGTAAATAGACATGATATTAGGGCT +TCCGTGGTAGGCAGGTGCGGTTTTAGGGCCTGGCGCTTGCCTGACGCCTG +CCCGTCTCACGCCTGCCTGGCGCCTTTTATTCTGCATTTTGAAGTGAGTT +TTCAAATTTCAAATTTTCTCATTTTTATCATTTTGTTGAAAATCAAATTA +AGAAATGAAAAAAAGTTTAAGTAGGTTTCAGCATCAGGCGTGTATTTAGG +CGAGAGGCAGGCGGGGGTCGCCTCAAAGTCAGGCAGGCAGGCCTTCTTAT +GCCTACACGGAAGGTGTTCAAAAATCGGTTGCGTGTGTCCAGTAATGACA +AGAATCATACAAAGCATTCGACTTTTACACAAAACTAGTCTGTATTCCAC +CTGTACAAAAACCTACCGATCGATGTGATTTTATTTGAATCTGAAAAAAA +AATTTCTCAGAAAATTTGAATTCCCGCCAAAATGTTTCTTTGAAAGTTTG +AATTCCCGCCAAAAACATTCTCAGAAAATTCAAGTTCCCGCCAAAAATGT +TTCTTTGAAAATTTGAATTCCCGCCAAAATTTTTCTTTGAAAATTTGAAC +TCTCGCCAAAATGTTTTTTTTTTGAAAATTTGAATTCCGATAAAAAACTT +TTTACAGAGAATTTGAATTTCCCCCTACAATTTTCACAGATCTACTTATA +TATATATATATATATATATATATAAGTTGAACTTTAGTTTTCAAAAAAAA +ACCGGTCATTTATTAAAGCACGTGGTGTCAAAAAATATTTTTTCGGCTTG +ATCTACGCCGATCTACAAAAAATGCGGGAGAAAAGACGCAGAGTTCTCAG +TTGATTTCTCATGGTTAAGAAAATTCCCGCATTTTTTTGTAGATCAAACC +GTAATGGGACAGCCTGGCACCACGTGCTAAAGTTATAGTTAATTTACAAG +TCCTGGTTGGACCTGCAGGATGTCAGGCGCTGAAACGTGTTTCAGGATAA +AGTTGGTAAACTTTTATAACCAAGAGGAAAGTGTTATCGATATTTTGCTA +GTTGAACATTTTTTTTTGCTAAAAAGTAAGAGTATTTGAGCCAGGCGTGC +GCGGCGATCGGTAATTTCCGAAAATCGGCAATTTCGGCAATTGCCGGTTT +TGTGAATTTTTGGCAATCGGCAATTTTGGCAATTGCCGAAATTGCCAAAA +ATTCGATTCCGCACACTCCTGCTTTGAACTGTGCATTTCCAAAATCCAAT +AGAAATTCTGCCGACACTGTAAAATCCCTACCTGCCAACAATCCTCCGCC +GGAGCTGATTGCCCTATTGAACAGGTGACTCTCGTTAGGGGATATTTGAA +CATGTGGGACAATGATCACATTGTCCGACGAGCAGCACCCTTGGGATGAG +GTCTCCTGATTCTCACTTGGCTGATCAGGAATGATTGCATGGCGACGGGA +AAGTTGCTCCTTGTTAGGGTGAATTGTTTGGGGCTCTCCTTCTTCTGGCT +GGAAAAATACAATTTTTTGAAGTTTTTGAAAGAAAGCTTGTCATTTTTTT +GTTGCTATTTTTTACCCTAAAAATGTGGTTTCAAAATTTTTTTACCTCTG +AATATCTAATCAACGGGGTGAACAACTGCAGATCGACCATACTCGAGCTT +CTAAAAGTTTGAAGGAAGCTTCTCGTGAAGCACGAAAAAAAAAGAAACAC +GAAATAGTTACCCTTACAATTATCAGAATAGGATGGGATGTACAGAGAAG +TAAATAGGAAGAGAAATGAGAAGAGTTGCACTAGAGAAGTATGTACAATT +AATAATTTATGAGCATCTGAGAAAATGAGAGCGACTTGAAAATCTGAAGA +GCAAGGAAGATAAAACTCGGTCTCTTATCAGAAAATTTGAATTCCCGCCA +AAATGTTTATTGTTTAATTTGAACTCCCGCCAAAACTTTTCACAACGAAT +TTGAATTTTTCGCCAAAATTTTTCTCAGAGATTTTGAATTTTCCCGCCAA +AAGTTTTACCAGAAAATTAAAATTCCCGCCAAAACTTTTCACATCGAATT +TGAATAACCCGGCAATTTTTTTATCAGAAAGTTTGAAATCCCGCCAAAAT +TTTTCACATGAAATTTGAATCTTTCGCCAAAGATTAAAATCGTTGGATAA +TTTTAATTCGCGGCTGAACGTTTCCAATTCGAATTAAGACCGGCCAAAGT +TTTTCGGGGTTTTTTTTAAAACTTACTACAAAAAAGCATACATTTATTTT +CCTAACTAGACGCCTTACTAGGTAAAACCCGCTTTTCACATCAAAAACTT +CAACAATGCGTCACCGCCAAAAGAGATTCTTGCACTTCACCACGAATAAT +AGATCAGAAGGAAAAGCGTGTAGTGAGTTGAGAATTATAACTAATTCATA +TCCCCCGAAAAAAAAAACGCTGATGTCTATATTCTATACATGGTGAGAGC +TCACCGAAGAATAGACGGGTAATGAGATATATATGTGATACATAAATGTG +TGCGCCCGGGTATCCGAGAGCCGAAAACAGAAGCATGGAATGGAGCAACT +AGGGAAAATTGTGTTATCACGAGGCACGGCGGGAGACACAATAATATGAT +TTTTTTTCGAAAATTTTCCATTTTCACGTGGTTGTCCCATTACGATTTGA +TCTACCAAAAACGCGGGAATTTTTCGCCTAAAAAAATGTAACGTCAGCAC +GTTGTTTACCATGAGAAATCAGTTGAGAAATTAGCGTCTCTTCTCCCGCA +TTTTTTGTAGATCTAACTTGATCAAGCCGAAATGAGACGCTCTGACACCA +CGTGACGAAAAAAAGTAGATCAAAATTTGGCAAGCCCTTATAAAAGTCCC +TTTTTCAGTTCTAGGCGGAGCTCAGTTTGACTGATGACGTAAACGCAAAT +CTTGAAATTTCGAATTCTCTATGGAAATTAGAACTTCCCGCCGCCGCCAG +ACTTTGTAAAGTTCAACAAAAAGATGAAATATAGAAAAAGTTCAGACTAT +GATAAGGAACTTATGACTAACGTCTCTTGAGGTGAATGAGTATGATGTGA +TTAGTGATGCAAGACGACTAGACCGAGAGAGAGAGAGAGAGAGGCTCAGA +GAGGGGGGAGGGAAGGTAGGTTTGTAGGTAGGCAAGCAGGTAGGCAAATA +GGGGGTCGTAAGTAGAAATTATGAAGGAACCTAGGTCGGCAGCCATAAGG +TAGGCATAAAGTAGGTCAGCATCCAAGTAAGTTAGACAGGTACGCATAAG +ATAGGAAGGTAAGTAGGCGCGGTGGCAAGGAGACTCAAGGCAAAGTAGGA +CAGGAAGTAGGCAGTAGGTAGGCACGATGAAGGTAGGTGGGTAAGCTTGT +AAGAAGGCAGCCATAAAGTATGAACGTCGACAGTAGGCATGCATTTAGTA +GGTAGGTATGAAATAAGAATTAAGGTATGCATAAGGAAGTAAAGTATGAA +TGATGAAGCCATGTAGATACAGAAATATCAGGCAAGTAGACAGGCAGGCA +TAATATAGGAAAGTTAGCAGTAGGTAAGCATGTAGTAGGTACGTGGGTAG +GCATGTAGGTAGGCACGTGGTAGGCTTATGGTAAGCAGAACACAAGCACA +GAAATATGCATATTGGCAAGTATGTAGGCATGCATAAGATACAAAAGTCG +ACCAGGCATGTAAGTAGGCACATAGGTAGGCAGGCTTGAGGCAGCAATGT +AGGTGATCTCGTAAGTAGGTATGAGTAAGGAACCTAGGTAAGTCACAGGT +ATTTGGCACAACCTGTGTTCTTCTCAATGGTAATCAGGCGAACGTGCCTG +CCTGCCTACCTAGAAATTCGGTAAAAATTGTGGAAAAAAAACTAGGGGAA +CCTCTTCCCAATGGAATGCGTAATGTGAGTGGGAGGCGGCAAAAAAAGAG +CGGACGTAAACACTTTTTCCTAATTCTCTTTTTCTCTCGCTCGTGGAAAA +TGAATGAATATTTGATACAATTTTGTGGGTACAGCCAGCAGGAGAAGAAG +AAGCTGATGTCCCGACGCGCGCGCGCGAAAAAAACTTTCAATTCCCTCCC +CTAATTATCCGCCTAACGCCTAATGTGTGTCAAATTTACCATGTATTTTT +TTCTCCTCTCGGCTATGTTTTTTTTTCTTTTTCAGCAAAAAAAAGTACGG +GCACATGAGGTCGGGATGGATAATTAGGGATGTGTGCCAATTTGTTGGAT +AATAGACAAAAAAAGTACGTGAGATTTTTAGAAAATGCTGAGAAAAATAT +CACGAGGCGTTGGGGACGTGGCCGCGAATGAGAAAACTAGGCCACCTTCA +CAGGGCCCTGGCCTTCCTCATTGAACTTTTCGCGCTCTACTGACAAACGC +CCGTGTACTCCACACGGACAAATGCATTTAGTTTTGCAGCTAGAATCGAT +CAGCGACGCGACACGCAACGCGCCGTAAATCTACCCCAGATATGGCCGAG +CCAAAATGGCCTTGTTCGGCAAACTCTTCCATTTCAAATTTTTTATTCAG +TTTAATTTTCTAGGATACGCCTACGAGTGATCTAGGCCAGGCAGTAGGTA +GGCAGCCAGTGTAGGTAGCCCCCTTTTGGTGTCAGGCTGTCCCATCGCGG +TTTGATCTATCAAAAATATGGGAACTTTTCTCCCAGTAAAATGTGACGTC +AGCACGTTCTTAATCAGGCGAAATCAGTTGAAAAGTCTGCGTCTCCTCTC +CCGCATTTTTTGTAGATCAACGTAGATAAATACGAAATCAGACACTCACG +TGGTGCCAGGCTGTCCCATCGCGGTTTGATCTACAAAAAATGCGGGAACT +TGTGCCCAAAATTATGAGACGTCAGCACGTTCTTAATCAGGCGAAATCAG +TTGAAAAGTCTGCGTCTCTTCTCCCGCATTTTTTGTAGATCAAAGTAGAT +CAATCCACATTAAGACCTTCTGACACCATGCCCGAGAAGAGTTCAAAATA +AATTTCGTAGAACATAAATTTTTCCACGTGGATTTTACAGCACAATTGCT +TATAGAGCGCGGTTGCACCCCAAATTTTACAGGAAAAATAGGAAAATTTT +ACCAGATTTCCCGCGCAAGACACGAAACAAGGTCTTAAAAAGTGGGAGAG +CATGAAAAATCCTAAGAAAGTAAGAAACTTCTGGTTTACCACATAACTCA +AATTGGAGATAGTTCGACCTTAAAAATTTTGCACCAAAAAGGGCGGAGCC +AAAAACACCAAAACTTACGTGCAAATTTGTACATGAAAAAACTAAAAAGG +ACACCTAAAACCGGCGAAATATGAAGAAAAACGCTGAAAATGAGCAATGT +GTATCAATTTTTCCCCCGTTGCTTAATTTAATACATGTCTTCCTCTCCCC +CTTCCTTCCTGCTTCACACTCTTTCGGGCGGCGCCTCCTTTTGATTTGAG +ACTTGAGACTTTTTTTTTTGTGATGTGCGTTGAGTAGTGTGGGATGCATA +TATTATGAATCAAAAATCTGACATCAAAAATATCGCTGAAAAAATAGTTA +AAATTTGCTTTAAAACTGCCGTTTTTGATCTACAGGGTGCTTTGGCGCGT +TGCGGTCGCGTCGCGGCTTGGAGTTCTAGGCCACGGCCATTCAATTTGAC +ACTACTACAATCAGAAATATTTTGTAATTCTTAGGCCATCAAAAAATTTT +TAAGCCAGAAAACAAATCCGGAAATTTCTAGGCCATCAAGAATTTCTAGG +CCACATCAGACTACTTTGAAAATTTCTAGGCCACAATGGGAACTAATTTC +TGAACCTAATTTCTGAAATTTCTTGGCCACGATCTGAAATTTCGAAGCCA +TAATTTGAAATTTCTAGGCCACGATCTGAAATTTCTAGACCATCAACAAT +TTCTAGGCCACGATTTGAAATTTCTAGGCCACGATTTGAAATTTGTAGGT +CAAGGTTTGAAATTTCTAAGCCACGATTTGAAATTTCTAGGCCACGATCT +GAAATTTTTAGGCCACGATTTGAAATTTCTAGGCCACGATTTGAAATTTT +TAGGCCACGATCTGAAGTTTTTAGGTCATCAAAAATTTCTAGGCCACGAT +TTGAAATTTCTAGGCCACGATCTGAAATTTCTAGGCCACGATTTGAAATT +TCTAGGCCACGATTTGAAATTTTTAGGCCACGATCTGAAATTTCTAGGCC +ATCAAAAATTTATAGGCCACGATTTGAAATTTCTAGGCCACGATTTGAAA +TTTCTGGGCCACGATCTGAAATTTCTAGGTCATCAAAAATTTCTAGGCCA +CGATTTGAAATTTCTAGGCCACGATTTGAAATTTCTAGGCCGCGATCTGA +AATTTCTAAGCCATCAAAAATTTCTAGGCCACGATTTGAAATTTCTAGGC +CGCGATCTGAAATTTCTAAGCCATCAAAAATTTCTAGGCCACGTTTTAAA +ATTTTTAAAATGTTTAAAATTATAGGTTTTCAAAAAAATTCTAGGCCATC +AAAAATTTCTAGGCCACGATTTGAAATTTCTAGGCCACGATCTGAAATTT +CTAGGCCACGATTTGAAATTTCTAGGCCACGATTTGAAATTTTTAGACCA +CGATCTGAAATTTCTAGGCCATCAAAAATTTATAGGCCACGATTTGAAAT +TTCTAGGCCACGATTTGAAATTTCTGGGCCACGATCTGAAATTTCTAGGT +CATCAAAAATTTCTAGGCCACGATTTGAAATTTCTAGGCCACGATTTGAA +ATTTCTAGGCCGCGATCTGAAATTTCTAAGCCATCAAAAATTTCTAGGCC +ACGTTTTAAAATTTTTAAAATGTTTAAAATTATAGGTTTTCAAAAAAATT +CTAGGCCATCAAAAATTTCTAGGCCACGATTTGAAATTTCTAGGCCACGA +TCTGAACTTTCTAGGCCACGATTTGAAATTTCTAGGCCACGATTTGAAAT +TTTTAGGCCACGATCTGAAATTTCTAGGCCATCAAAAATTTATAGGCCAC +GATTTGAAATTTCTAGGCCACGATTTGAAATTTTTAGGCCACGATCTGAA +ATTTCTAGGTCATCAAAAATTTATAGGCCACGATTTGAAATTTCTAGGCC +ACGATTTGAAATTTCTAGGCCACGATTTGAAATTTCTGGGCCACGATCTG +AAATTTCTAGGTCATCAAAAATTTCTAGGCCACGATTTGAAATTTCTAGG +CCGCGATTTGAAATTTCTAGGCCGCGATCTGAAATTTCTAAGCCATCAAA +AATTTCTAGGCCACGTTTTAAAATTTTTAAAATGTTCAAAATTATAGGTT +TTCAAAAAAATTCTAGGCCATCAAAAATTTCTAGGCCACGATTTGAAATT +TCTAGGCCACGATTTGAAATTTTTAGGCCACGATCTGAAATTTCTAGGCC +ATCAAAAATTTATAGGCCACGATTTGAAATTTCTAGGCCACGATTTGAAA +TTTCTGGGCCACGATCTGAAATTTATAGGTCATCAAAAATTTCTAGGCCA +CGATTTGAAATTTCTAGGCCACGATTTGAAATTTCTAGGCCGCGATCTGA +AATTTCTAAGCCATCAAAAATTTCTAGGCCACGATTTGAAATTTCTAGGC +CACGATTTGAAATTTCTAGGTCACGATTTGAAATTTCTAGGCCACGATTT +GAAATTTATAGGCCGTGATCTGAAAGTTCTAGGTCATCAAAAATTTCTAG +GCCACGATTTAAAATTTCTAGTCCATCAATAATTTCTAGGCCACCAGAAA +ATTTGTGTTCACTTTTTTTTTTGGTTTTTGGTATGTATTTTTCAGAAATT +ATTCAAAATTTGTGGGAGCAACAATATGCTTTTTTAGTTTCTAGGCCACA +AAATTCTTTGGCCATTTTCACATTTTTTCCCGCCGCCAATGGTTCCACTG +GATAATTGAATGTATAGAAAAAAGGGAAGAGTTCAGCTGGCGCAAAAGAG +GGTAATTCGTTGGAAGGGAAAAAAATAAATGCAAATCTTCGCCAAAAACC +AATTTTCGGTCGGAGCTCGGCGCGGGCCGACACACACAAAAATGGAGCAG +AAGAAGACGTCAGAATCTTCTTTTTCAGAAGGCTTTCTCACTTTGAAAAT +GGGGTGTCCAATTTCTTGTTGCTAATTGTTGGATGGGGAAAGAATGATAT +TTTGTTTCAGGTTTCACACAACATCAGCCTAGACATTTTTTTTTTGTTTT +CTCCAAGAGGAGTACACACACTATAAATTGTTGTAAAAATCGAAAAACAC +GTGGAGCCAGAATTTCTTATTTCGATTTGATCTACAAAAAATGCGGGAGT +TAAAACGCAGACATCTCACCTGATTTCGCAAGAGCGTGCTGACGTCACAA +TTTTTTTGGAAAAATATTTCCGCATTTTTTGTAGATCAAACTGCAATAAG +ACAGCCTGGCACCACGTGAAAAATAGTATTCCTCTTGAAAAAGATTTCCC +GAATCAAGTTCCTTGAAAGGAGTACGCAAAATTTTGCAAAAAAAATCGGT +GGCCGAGTTTACTCTTCTGGCGGCCACGTAACAAAATTAACAAGGAAGGA +TAAAAAGGAAAAAAAAATTTTACTCGTAAAAAGGTACCAGGAAGCAATTT +GAGGAAGGAAGGAAATGTGAGTGTCTACTCTAATGATCTACCAGTTTTGG +TGGCCGCGGAACAGAGAAAGCTCGGCCCCCAAGTATTTTTTCAAATATCA +CGGATTTCTGGTTTCCCTTATAAGTTGAAATGGAAGAGTTTTTAAAGAAC +GAACTAGGCCATTTTAGCTCGGCTGGGGCAGATTCACGGCGCGTTGCGTG +TCGCGTCGCGGCTCGATTTTAGTTGTAAAACTAAATATATTTGTCCGTGT +GGAGTACACGACTTTCCCACGAGTTTTCCGACAGGCGATTGTCAATGGAG +CGCGAAAAATTCAATGAGGAAGGCCAGAAACCCGTGAGATATTTAAAAAA +TATTCCAAAACTATTTTTATATTCAAAATTGGAAATTATACATACTATAA +GTATTTTCAACTACATACATGGCCGCGAAAAAAAAAAACTCGGCCACCAA +TTTTCGCGGTCACTTACCGCAGATCTCCATCGATCCTGAAGTCTTGCCGT +ATACAATAAGGGCATTTTTCGGATCATAGTGAATGAATTATTTCAGGAAT +ATTATTTCAAAGAACACACACCACACAAGAAATGTGAAGGGAAGGAAACG +GGGAAAAAGAGACAGGTGGAAAAATACATTTTCTCTATTATTTCGCTCTG +TTTCTGACTAGGAAATGAATCAGAATCAGTGATTAAGAAACTGGGAATTT +ACGAGGTGTTTAGGGAGATGGTGACCGCGAGGATGAGGAAAAACTCGGCC +ACCGCAAGACTTACCGGCTTAATCAATTGATTTCCTCGTATGTGATGGAA +CAATGTGAGAGCCGGAGCTTCACTCGTCATAGCTTTGTGTCTAGGTGGCT +CTCAAGTACTGCTGGACCTGAAAAACTGCAAATTTAATTTGAAATTTGGG +ATGCAAAAAAGTAGCAGCCGACACCTGCCGGGTTCCCGCAAGATGTCGGA +TGCTTCAATTGACCTAGTAGAATCGTTAGTGATCTACCGAGCAAAAACAA +GTATGCTTGCAAGAAAAAAAGGCGTGTGGCTCAAGTTGACAAATTCGACA +CTTTGGACACACGAAATTGGATGAGCCACACCACACCGGCTAGCTCAGAA +GAGCCCCTCCGACGTCAAGGACCCCATGTGCGCGCGCGAGCCGAGATTCT +GGCAACATGAGAAGCGGAGAAGGCGAAGAGAAGAGTGCTAATTTGAACTT +GTTGCTGCAGGAAAAATATCTAGGCTTAGGCTTAAGCTTGGGCTTAGGCT +TAGACTTATGCTCAGGCTTAGGCTTAGGATCAGGCTTAGGCTCAGGCTTA +GGCTCAGGCTTAGGCTCAGGCTTAGGCTCAGGCTTAGGCTCAGGCTTAGA +ATTGGGCTTCGGCTTCGGCTTTGGCTTGGCTTAAGCTTAGGCTTAGGCTC +AGGCTTAGAATTGGGCTTAGGCTCAGGCTTAGAATTGGGCTTAGGCTCAG +GCTTAGAATTGGGCTTAGGCTTTGGCGTAGGCTCAGGCTTAAGCTCAGCC +TTAGGCTTTGGCGTAGGCTTAGGCGTAGGCTTAGATTTAGGTTTAGGCTT +AGGCCTTGGCCTAGGCACAGGCCAAGGTTTCAGGTCACTATCACTTGCTC +CTAGGTGTGGCTGGGGAAATGAAATTCTAGGCCAGTGACGTCAAGATTCT +GGAAGAGTTCCGTTGACGCCACCAAAAGCAACAAAAACTTCAAATCTGAG +AAGATCAGAACAAAATGTACTGAAAGTACAAAAAGCCCGCACTTTTTTCT +CTATTAATACTACTCATTTGAAGAGTGGGGAAAAGAAAAATGTGTTGCAA +GAAATAATTCCGTGCGAAAAAAGTTTGAGAGGAATGAATGGAGCGGAACT +TTTTTTTTTGTTGGAAAATCGTAGGTGTGACGTCACTTCTCTTCCAACCC +CCAGGTTTCTAGGCCACTTAAGAATTTTGCAAAAAAATCACAGAGCAAAA +CTACAAAAAGGAAAGAAGAAATGTGCTCTTGGAAAATAATAAATATTGGT +TACGAAATAAAAGAGAAAAGAGAACGAAAAAAATAAAAAAGAAAATGGTA +ACCAAGGAGGAATGTGGAGAAGAGGCTCTTGTGAGCCCAGATATACCCAG +CAATTAGGGAGCGAGTACTACTGCCGGGAGATGACGTTTCGAGGATATTT +CTCACTGGAATGGAGGGTGGTGGAAGAGTTTTGTAAAGTTCTAGGCCACG +GGTTTTTTTGGCCAATGAGCATGGTGCATCGATGAAAAGCTATTGGGTTT +CTCGGCCACAATAAATTACAAAAACCTAGTACGATGGAGCACACTTGCAA +TAGAATTGCAAAAATTTATTTGAAAATTGAGAAGAAAGAAAATCAGGGGG +GCATAAATGCTCAACAGTGCAATCAAATTGTAGAAGAGCTTTCGCCCAAA +AAGCTAGGCCACGGAGGAGACCGCTGGAGAAAGTGACGTGTATATGGTGG +AAGAGTTTTCTTGTTTTTCTAGGCAATCTTGGGCGATTGCAAAAATTGAA +TTTATGAAAAGATGGCTTGGAAGAGCTTTCGCATAGAAAATCTAGGCCAC +GGCTAGCAATTTAATAGGGGATAGAAAACCGCGTCGGAAGATTATTTAAC +TTTTCCATAAAAATTAGCAAATTTAAGAGAGTTCAGATACTGATCTGGAA +GAGTTTTTGAGCAAAAAACCTTGGTCACGGCCAGCAACTAAAGTAGTAGG +GGAGATAGGGTACCCCGCTAATTTTTTATTTATTTTTCAAAGAAAGTTTT +AAGGTTTCGGGAAAAATTTTGATGAAATGGCTTGGAAGAGCTTTTGCCGA +AAAAACTTAGGCCACGGCCAGCAATCTAATGAGAGGGGGAGGGTTCCCCG +CTAAATTTTTTTTTTGATTTTTAAACAAAGTTTCATGGGTTCGGGGGAAA +ATTTGATTAATTGACTTGGAAGAGTTTTTAAGCAAAAAAAAACCTAGGCC +ACGGCCAGCACCCTAATGAGAGGGGGGAACATAAGGTACCCCGCTATTTT +TTTTTGATTTTTCAAAGAAAGTTTTAAGGTTTTGGAGGAAATTTGATTAC +ATGACTTGGAAGAGCTTTTGCAGAAAAAAACTTAGGCCACGGCCAGCAAC +CTAAAGTAGTATGGGAGATAGGGTACCCCGCGGAAAATTTTTAAATTTTG +GTAAACAACAATGGAAGAATTTTCCGGTTTTCTAAGCCATTGCATTTTTT +CAGCGTCACGGATTTCTGGCTTCCCTCATAAATTGAAATGGAAGAATTTG +CCGAACTAGGCCATATCTGGGGTAGATTTACGGTGCGTTGCGTGTCGCGT +CGCGGCTCGATTTTAGTTGCAAAACTAAATGTATTTGTCCGTGTGGAGTA +CACGACTTTCCCAGGAAGGTGATTGTCAATGAAGCGCAAAAAATTCTCAA +AAAAAATCCAGATTCCCGTGCAGCGTCGCAGGAGATCATTTTCAAGAAAT +CTGGCTCAAGAAGGATTCAGTAAAACAAGAAAAGAAATAAAAAAGGAATG +AAAGGTAAGGGTGGACAAGTCGGCGGGACAACTTGTCACTTGCCATCATC +ATCATTTTTCTTCTCCTCCTCATCAGCTTCTGCTGCTCCGCGAGCTCGCG +ATGACTCTTCTGAAATTCTAAACTTGGCCAACACAACGTCGGCAACGAGT +TTCGAATTTACACACACACACACATACGAAAAAAGCTCATTAATTGGAGC +AGGAGACGGAAGAGAGGGAGGAGAGGCTGCTCAATGAGTTAGAGAAGAGA +GAGAGAGAGAGAGACTGAAAGATGAAGTATAAGCAGAAGTCGTCGGGATG +ACAGAAGAAGGAGGAGAAGAAAACGTGAGAAGGGGGGATGTGGAGACAGA +AGGATATGTAAATATATTAGATTCAAGGATTACAGCAATTTTAAGGAGGC +AAAAAAAAACAACGATGCTCCGATATTTTTGGGGTTACTGTAGTTTTTGT +AGATACTTCCAGAAGTTATAAACAAATCCGAGTTTTTTAAAATTTGCCAG +AAGCACGGGTTCATTGAATTTTTCACGCTCCATTGACAATCGCCTGCCGG +ACAACGCGTGGGAAAGTCGTGTACTCCGCACGGACAAATACATTTAGTTT +TACAACTAAAATCGAGCCGCGACGCGACACGCAACGCGTCGTAAATCTAC +ATATATGGCCGAGCCAAAATGGCCTAGTTCGGCAAAAACTCTTCCATTTC +AATTTATGATGGAAGCCAGAAATCCGTGCAGAAGTTTCTCGGAAAAAAAA +CAGAAAGTTTGCGAAATGGCAACCTCTAATTCATTAACATAAATACATCA +AATATGACGCGCAAAAATGAATTAATTTACTAGTGAAACATGGTGCATTG +AAGTCGGTGTTTGCACAATTTGTACAGATCTGAGGTTCACTAACACCAAT +GCACGGCCCGAGAAGTGGTACCTGTACGCAATTTGTCTACCGTATACCTG +GACGTTTGGGCGCGTGTATCTCAAAAACGGTTGGTCCAGTTTTTTTGTGA +TGCATATAAAAAATGTCCGAAATTAAATTCTAAATTTTTTGGACCAAAGC +TTTTTTCGTTATCACGCGCCCAAACCTGGTCTACACTCAAATTATCAGTA +GAGCGCATTTGCATGGATGTACCACTTGCCGGGCCGTGTTCAAAGGGGTG +CAACGATTGCGCGTCAAATCGAATGCAACTACAGTAATCCCTAGTAGTTG +CGCTTCAAATTTGATGCATTGCACGGCCCGGCAAGTGGTACATCCATGCA +AATGCGCTCTACTGATAATTTGAGTGTAGACCAGGTTTGGGCGCGTGATA +ACGAAAAAAGCTTTGGTCCAAAAAATTTAGAATTTAATTTCGGACATTTT +TTATATGCATCACAAAAAAACTGGACCAACCGTTTTTGAGATACACGCGC +CCAAACGTCCAGGTATACGGTAGACAAATTGCGTACAGGTACCACTTCTC +GGGCCGTGTTGAATGTTTGAGAAATGTTAAAATTTTGAGTTATATGTGCT +GGAAAATTGACATGAACATGGTTTTTTTCATTATTTGCGCGTGAATTATG +GTGCATTGAAGTCGGTGTTTGCACAATTTTTTTCAGATCTGAGGTCAACT +GACACCAAAGCAACAGAGTTGACGCGCAAATTTTAAAATGCGCTTAACAT +TTGGCACGCTTTGCAGTCGGTGTTTGCACATTTTCGATTGTTGTGTGAGG +TTCACCGACACACCAGATTTGACGCGCAAAATTGTACGAAAACAGTTAGT +TAGAAAACTAATAAAAACTATTAATAAAAGTATAGTGCATCCTGAAAAAT +TATTTCTGCAAATGTATCTATAGAATTTAAAACAAAATATACAAAAAAAA +ATTATGAAAACCACGACGAAAAACCCGGAAATGTTTTTTGGGTAGCTTGT +CGATTCCGGAGGGTATTGAATTTCGCGCTCGAATAATATTGTTTCCTATT +CATTTTCCCCAGAATACCTACAGAATTTGGGGGAAAAATATGAACTGGTA +CCTAACATTAACCTACACGTTTTTTAAATAAGTTTAGGCCACGCCCAGAA +AGTAGCTGGGCGGGGTTGCAGATTTTTCTAGCTTTCCGAAATATAAAAAG +TCGGAAAAAAAGTTGTAGTCTGTAGTTTGTAGTTTGTAGTTTGTATCAGG +TTGTCCCATAAGTTTTTGTACTTTTTTTCAAATATTTTTCCAAAACTTCT +AGAAAGTTTTAAAATTTTTTCATCGTAGGTCGTGTCAAGGTCGGGTCGTC +CCCTTTCAGAAAAGATTCATTTCATCCATTTCTACTTTGCCACGATGACA +ATCATCAAACTTGAACGTCGAGACGTTAGATTGCTTCTTCTTTATGAATT +TCGTCTTGGTCATTCAGCAATGGAAGCGGAACGAAACATATGCGGTGCGA +TGGGTGAGGGAGCACTCTCTTATAATACAGCAAAGAGTTGGTTTCAAAAG +TTCAAGAACGGCGACTTCAGTCTCGAAGAAATAGAACGTTCTGGGCGACC +GGTAGAGTTAAATGAAGAAGACCTAGTGAAGCTGGTGGAGGAAGAGCCTC +GTCTTAGTCTTCGTGAAATGGAAGAGAAGCTTGAGTGTTGTCATAGCACA +ATTGCACGTCACTTGGGTCGCCTTGGTTTTACTTCAAAACTTGGAACTTG +GGTGCCTCATGAACTTTCGGCATCACAGAAGCTCACTCGGGTCAACGTTT +GTACTCAACTTCTAACTTTTCGTCGAAAGTTCGATTGGCTGAACAATCTG +GTTACTGGAGATGAGAAGTGGGTGCTCTATGTTAACCATTCCAGAAAACG +TCAATGGCTTCCGATCGGTGAGAAAGGAATACCGACGCCAAAGCCTGATC +TTCACCCAAAAAAGATTATGATCTGTGTCTGGTGGGGTGTTCAAGGACCC +GTGCACTGGGAATTGTTGCCAACTAATAAAACTATCACTGCTGATTACTA +TTGTGCCCAATTGGACCGAGTTGCAGAAAAGACCAACGGAAAATATGAAA +AACTATATTTTCTTCACGATAATGCTAGGCCTCATGTCGCCAAGAAGACT +TTCCAAAAGCTGCAAGATCTTGGTTGGACTGTTTTACCGCATCCACCATA +TTCTCCAGATCTTGCACCAACCGACTACCATTTGTTCTTGTCTCTCAGTG +ACTACATGCGCGACAAGCAATTCGACGACGAAGAGCATCTCAAAACTGAA +CTCTCCACTTTCTTCTCATCGCGTTCGCCGGATTTCTTCTCCCGTGGCAT +CATGATGTTACCTAGTAAATGGCAACAAGTGGTGGACACTAATGGTGAAT +ACTTGTGTGAATAGTACTACTTGTCGCTTGAGAGAAATAAATTTTTTTCA +AAAAAAAAATAGTACAAAAACTTATGGGACAACCTGATAGTTTTAGTTCG +TTATTTGCAAATTGTTAACGTTAGTATTAGGAGAGCCGAAATAAATAAAT +TTTAGAAAAGAAAACGAAATTATATACATAGTTCATTAAAATGTGGTAGT +TTGTAGTTTGTAGTCTATGTATTATGTCTATTCAAATTGTATTCAACATC +AAAAATTAAACAGGAAACTTATATTTAAAAAAAAAACGAATACTGAAAAA +AGGCGGCTGCATAGGAAAAAACAATGATTCTCCTCCAAAAAATAGAATTC +CGCATTTTTTCAGCGGCTATTTTCACGATGATGAGAGGAGACAACAAAAA +CATTTGAGATGAGAAATGAGGGGAATATTGCACAAAAATTGGGAAATGAT +TTTTTTTACTTTATACACAGTTAAAATGCGATGCGCGCATAGTGTTTTTG +GCGTGGATCGCGAGTGGGAGAAAAAGGAACCGGAAATGATGCGCATTGTG +CGTCCATCGCGAATTTGAGATGCATTGTGCGAGCATCGCGAACATAAATA +ATGGGCACATTGTGGATTCTCCTTTCTGATAATATTTTACTCTCTATGGC +TTCACCAATTATCTTTCTCTCTGTGGCTTCCCACTATATTTTACTCTCTG +TGGCTTCACCAATTATTTTACTCTCTGTGGCTTCCCACTATATTTTACTC +TCTGTGGCTTCCCACTATATTTTACTCTCTGTGGCTTCACCAATTATTTT +ACTCTCTGTGGCTTCCCACTATATTTTACTCTCTATGGCTTCCCACTATA +TTTTACTCTCTGTGGCTTCCCACTATATTTTACTCTCTGTGGCTTCCCAC +TATATTTTACTCTCTGTGGCTTCCCACTATATTTTACTCTCTGTGGCTTC +ACCAACTATTTTACTCTCTGTGGCTTCCCACTATATTTTACTCTCTGTGG +CTTCCCACTATATTTTACTCTCTGTGGCTTCACCAACTATTTTACTCTCT +GTGGCTTCCCACTATATTTTACTCTCTGTGGCTTCCCACTATATTTTACT +CTCTGTGGCTTCACCAACTATTTTACTCTCTGTGGCTTCCCACTATATTT +TACTCTCTGTGGCTTCCCACTATATTTTACTCTCTGTGGCTTCCCACTAT +ATTTTACTCTCTGTGGCTTCACCAACTATTTTACTCTCTGTGGCTTCCCA +CTATATTTTACTCTCTGTGGCTTCCCACTATATTTTACTCTCTGTGGCTT +CACCAACTATTTTACTCTCTGTGGCTTCCCACTATATTTTACTCTCTGTG +GCTTCACCAACTATTTTACTCTCTGTGGCTTCCCACTATATTTTACTCTC +TGTGGCTTCCCACTATATTTTACTCTCTGTGGCTTCACCAACTATTTTAC +TCTCTGTGGCTTCCCACTATATTTTACTCTCTGTGGCTTCCCACTATATT +TTACTCTCTGTGGCTTCCCACTATATTTTACTCTCTGTGGCTTCACCAAC +TATTTTACTCTCTGTGGCTTCCCACTATATTTTACTCTCTGTGGCTTCCC +ACTATATTTTACTCTCTGTGGCTTCACCAACTATTTTACTCTCTGTGGCT +TCCCACTATATTTTACTCTCTGTGGCTTCCCACTATATTTTACTCTCTGT +GGCTTCCCACTATATTTTACTCTCTGTGGCTTCACCAACTATTTTACTCT +CTGTGGCTTCCCACTATATTTTACTCTCTGTGGCTTCCCACTATATTTTA +CTCTCTGTGGCTTCACCAACTATTTTACTCTCTGTGGCTTCCCACTATAT +TTTACTCTCTGTGGCTTCCCACTATATTTTACTCTCTGTGGCTTCCCACT +ATATTTTGCTCTCTGTGGCTTCCCTCTATATTTTACTCTCTGGCTTCACA +GTATATTTTATTCTCTGGCATCACAATATATTTTACTCTTTGGCTTCGCA +GAATATTTTACACTCTGGCTTCACAGAATATTTTACTCTCTGGCTTCGCA +GAATATTTTACTCTCTGGCTTCGCAGAATATTTTACTTTTTGGCTTCACA +GAATATTTTACTATCTATTAATCTATTTCTTCGTATAACAATCTATTTTT +TCGTATAACAATCTATTTTTTGTATAACCAACTTCAAACTCAACTTTTCC +AAAACTACAGTAACCCTACAGTAATTCTACAGTACTCCTAAAACTCCCCG +CAACCTGCAGTTCAATGACACCTCGTTTTCTCACGCGTGACTCATGACTC +ATTAGCTTACATTTCCTTCATCCATCGGTGGTGGGGCGCTGTGTAATATA +CAAGAAGAGACACCACCACACGCTGCTATTTCTGCTGCTGGTCTGTCTTC +GTTTACAGCCACTTATGACTCAGCACTGCCATCAATGACTACTTCCTTCT +TTTTTATCTTTTCGGCTTCATCTCGAGTAGCAAATTTAACAAAATTCAAT +AGGTGTGACGTCATCAAATGCCTTCGTGGCCTAGAAATCCAAGATTTCTC +TCGAAAGGATCAATGTGATGTCATGGCCTAGAAAACTCCAGTGTGAAACC +TAGGCTATGTTAAATAGTCTTGAAAACTCTAAACTGAAGCACATAAGGCT +ATGATGTCATCGCCTAGAAATCCCAATTCTATGACGTCATGAAAGACCTG +AACTGCACCGAACCTAGGAATACCCTCAAAAGGGGTGCTGTGAAGTCATG +TCCTAGAAAACATGAGTGCGAAAACTAGGCCGTAATCTACCATGTGGTAG +ATCATGCGGCCCTATTGATGGCCTAGAAAACTTACAGCTTGAAGCTATGA +CGTCACGACCTAGAAACTCAATAGTTGTGACGTCATCAATGACCTAGGTG +TATCTCGAATGACGCAGTGTCTTCTAGAATTTTCTGAATATCACACAAAA +GTATAACATCTTCTTGAAACCAAATAATTTTGTTGTAACACCCACACAGT +ATTGTAACAAATGATATAATAGCGATCGGAACATTCGAGAAAACTGGAAA +TTTGAGGTCTGACCTTGTTCGGGAAACTTACCAGTGATTATGAGGTGTGT +CGAAAAATGTGGCCTAGAAAATGGGAAAACTCTTCCTTTTTTTTTTTAAG +TCCAGAATTACGTTTTTGAGAACGAGAAGTGCATTTCATTTTTGTTAGGT +TGGAAATTTAAAATTTAAATATTCGTCTCCTAGTTACAGCCGTGGAAGAC +TTTTTCAGAAACTTTAGCCACGTGTCGAAGTCTGGCGGTGCCCACGATAT +TAGAGAATGTTGATATTTCCTATTTACTTATCTCCTCGAAATTTTAAGAT +TAAAGTTACTACCGTTAAGTTACACTGGTGGAAGAGTTTTTAAGATTTCT +AGGCCACGGCAAAGTTTGGGTGTGCCAGTGACCCTAACACTAAATTTTTA +AATTTTTTTCAGCCAAATCATCATATGGTGGCCTAGAAAATTTAACACCT +GAAAACTCTTCCATATTTTTTTAATTTCTGAAATCCAACAAAATAATTTT +CAAACTCCTCTTCCTTTTTTCCAGAAAAATAAATTAATGTAAATCAAGTT +CATCTCCGATGAGTTGTAACGGTGGAAGAGTTTTTCAGATTCCTAGGCCA +CGTCAAAGTTTGAGGGCTTCCCGTGACCCTAACACTAAATTTTCCAATTT +TTTCAGCCAACCCAAATCAGAAGCTTTCTCTTTGTGTAATAACATGACAA +TGGGCTATTATCATATGAAATTTTTTAGGTTAAATTTCTGAGGTCATTTG +CAAAGGTGTCATGACTAGTTTGGAGGCTTTTTTGTGACGGTAGGGGTATG +AAACAGGTTGTTACTTCTCGAAAAGGTATTATGTGGGGGAGAGGAGAAGG +AAAGAAGGAAGGCAGATTAGCCAACAGAGATACAGCAAATTTTGTGTGAG +TGGTGGCAGTTTTGAGAAGAAGCCATGCGCCAAGGTCGGTTTGCCTTCTG +TTTTCTCATTTTTCGGTTTTTTTGTGAATTTTTGTAAATTAAAAATATTT +TTTTATAGAAATAAAAATTCAGAAAAAAATACTGGAAGTTTCGTTTGCCT +TCAAACATTTTTTAAAAATTCACAGTTTTTCAAATAGATGTACTTACACG +TGGTGTCAGAGTATCTCATTTCGGCTTGATCTACAAAAAATGCGGGAATC +TTTTGCCCAAAAAAATGTGACGTCAGCACGCTCTTAACCATGCGAAATCA +GTTGAGAACTCTGCGTCTCTCCTCCCGCATTTTCTGTAGATCAGCGTAGA +TCAAGCCGAAACGAGACACTCTGGCACCATTTTATTTTTTTGGGCAAAAA +ATTCCCGCATTTTTTGTAGATCAAGCCGTAATGGGGCAGCCTAGCCCCAC +GTGCACTTATTCCTTATGCCGAACGACACTCCGATATCCCGTTTTTACAG +TATGACTGTGGGTTACTGTACCATTTTTGGTGCTTTTGTAATATTGTTTT +CTTTTTCAAAAACCTAGGAAAAATTTGAACAATTGTTTTTTTTTAATATT +TGAATTTTTTAGGCTCCCACACATTTTTGCGGGATATCGGAGTATCGATG +TAAACGTTTTTTTTTTGGTTTTTTGTATTAAAAAGGGTTTAAGTGTTGTA +ATAAGACATTTTGAATAAAAAATAGCTAGCGCCGCCACGACTTACTGGCT +TCCCTCCTAAATAAAAATGGAAGAGTTTTTGCCGAACTAGGCCATTTTGG +CTCGGCCGTATCTGGGGTAGATTTACGGCGCGTTGCGTGCCGCGTCGCGG +CTCGATTTTAGTTGTAAAACCTAATGTATTTGTTGTCCGTGTGGAGTACA +CGACTTTTCCACGCGTTGTCCGGCAGGCGATTGTCAATGGAGCGCGAAAA +ATTCAATGAGGAAGGCCAGGACACCGATTCAAAGATATAACGGCCCACAT +TTCAACAGCGGTATATCGGAGTGTCGTTATTTTAAGCTTGCACACGGGCT +TCTGGCCTTTTTCATTGCTTTTTCGCGCTCCATTGACAATCGCCTGCCGG +GCAACGCGTGGGAAAGTCGTGTACTACACACGGACAATTTTTTTTAAGTT +TTATAATGAAAACCGCGACGCGACACGCAACGCGCCGTAAGTCTACCCGG +GCCGAGCCGACACGTGGTGTCAGAGTGTCACATTTCGGCTTGATCTACGT +AGATCTACAAAGAATTTTGCATGATTAAGAACGTGCTGACGTCACACTTT +TTTGGGCAAAAAATTCCCGCATTTTTTGTAGATCAAACCGTAATGGGACA +GTCAAAATGGCCTAATTCGGCAAACTCTTACATTTCAAAATATGAGGGTA +GGCAAAAGCATGGGGAATTTGTGATTAAATTTTTGAAAAAGGAAACAATT +TTTTCCAAATTTCTTCTGAAATTTTTTCAAAAATAAATGTTTTTTTTCCC +TAAAATCTTCAAATTTCTAGTTTCAATCTCTCACTACCGTGACCAAAAAT +TCCTATGATTTTTGACCTCCCGCCACACAGTTTTACCACCAACTTCTTCC +CCCCCCACCCACCCACTTACTTCTCGTATCTGAAACTCCGCCCCATTTTT +CGTGTACTTTACTACTACTTGAATGATGATGAGGTAGAACAAATCTGTGT +TTATTGGCAGAAGCAGTGGTAGCAACAGGAGAAGAAGAAGAAGCATCACT +GTGTGGAACCAGCCAGCTGTGTTGTTGTGGTTTTAGTCGTGGAGAGAGAG +AGGACGACGTCGTGTGTCTTCAATAAAATGAGGATCTCATCGCTCTTATT +CCTGACTTTCTTGGCTGGAATTGTTCAGGTAAGCTGGGTTTTAAGTATTG +ATATTGGTGCTATTTGAAAATTATTTCAAAAATTTTTGGAAAATTTTTTT +TTTTTGGCCCAAAAATTTTTTTTCTAGAACTTTATCATTCCTAGGCATAC +TCTAAATATGTCTGCAAATATTATTTTTTCTGGAAAATTTTGAAAATGTA +TATATGGATATATGCAAGTTTGGAAGTTTTTGTAGTATTATTTTGAAAAA +TTATAAATTTTTTTAAAAAAAATTTTTTATAAATCTAGCAAGAAACGTTT +TTAAAAACAATTTTAGTCAATTTTCTTTTTTTTTTTTTGGAAATTTCACC +AAATATCGGCCCAAAATCTATGCAATACTATTTTTTCACAAAGAAATTTT +CAAAATTTGACATTATACAGATCTAGGGCTTCCATGGTAGGCAGGCGCGG +TTTCAGGGACCTGCCTGAAACCTACCTGGTTGCCGCCGGCCTTAATTTTT +ACGGGAATTTAGAAAAATTTCTAATTTTCTCATTTCTATCAATTTGATGA +AAACCAAAATAAATTGGCGAGAGGCAGGCAAAGGTTGCCTTAAGGTCAGG +CAGGTAGGCGTTTTAACACCTACCAGCTTTACAAAAGCACTATACAAGTC +TGTTTTTGAAATCTCTAGCTTTAAAAATCTTTAAAACAAGACATGGTGCA +AGACAAGACAAAGACATCGACAAATTTTCGACAAAATTAATTTTTTTCGA +AAAACTTTTAGTTTCTTCCAAATTCGCCAAAGTGAAAAAAAAGTTCCTAA +ACGTGAGATTTTGTATGTCTTCTACACATACTAGCCAATATCAATTACAA +TATCTGCCAAGAAAATCTCGAACCAAACACCAAAACGGCGTTTGGTAACC +GGTTTTTTTTCCAAAAAAACTATACATAGTTTCTAGTAAACAAATACGAA +TGGAAAATTCAAGGTGACAGAGAAGAAAAAAAGATTAGAAAAAAAATCGT +TGGTGGTGGCCTAAGAATTGCATGTAACGTCACACCTCGGCCAGGGGTCA +TGGTACATGCTACATGATTACCCATGGCAAAAGAGTGACGTCACTCTCGG +CCATGATTTTTAGGTTTAGGCTTAGGTTTAGGCTAAGGCTTCGGCTTAGG +CTTAGGCTTAGGCTTAGGCTTAGGCTTAGGCTCAAGCTTAAGCTTAGGCT +TAGGCTTACCTTATGAGCCTATAAGCTTTCTGAAAAAGACAAATATTAAA +AAAAAACCAAAAAAAAAAAAAAAAAAAGACAAACAAACAAATAAAAACAA +ACAAACAAATAAAAACAAACAAAATATTTGAATTTTTCTAAGAAGACAAA +TACGTCACTCTCGGTCACGATTTTTGGGCTTTAGGTTTAGGTATATGTTT +AGGCTTAGGCTTAGGCTTAGCCTTAAGCTTAAGCTTAGGCTTAGGCTTAG +GCTCAAGCTTAAGCTTAGGCTTAGGCTTACCTTATGAGCCTATAAGCTTT +CTGAAAAAGACAAATATTTAAAAAAAACCAAAACAAAAAAAAAAAAAAGA +CAAACAAACAAATAATTTTTCAGGCCCAAGACTTCCTTGCAATGTTCAAG +CCGTTCCTCGGAGGTGGCGGTGGTGGCGGTAATCCGTTTGCAAATCCACA +GGCGATCGGCGTAAGTTTTCCGACAGGCAAACAGACAAACAGACAGACAA +TCGAAAGTGGTCCAGGCGCTGAATGTGTTACGCAATTGGTTTTGGGCGCC +ACGCGCGCATATTATTTGCATATATACATACATCATTCAGATAAATGGCA +TTGAAAACGAACAAAAAGTGAAGATTTAGTGGGTTGGCAAATGCATTTTA +TGTTTCTTTTCCAGGGTCTCTTCCAACAATTCGCCGGTGGAAATGGTGGA +GGATTCGGGCAGCTTCTTGCTGGAGCAATGGCACCGGTATGTGTTGCGAC +GTTTAAAACGGAGTGTCGTTGTGAAATTTTCCGCTTTGGAAAACATTGAG +AATATTCGAAAATTTTTGTAAAATTTTGAGATTTTCTTTTTCCAAGCTTT +GGGCTTAGGCTTAGGCTTAGGCTTAGGCTTATGCTTAGGCCTATGCTTAG +GCATAGGCTTAGGCTTAGGCTAAAGCTCATGACTAGACTTATTCTTAGAT +TTAGGCTTAAGCTTAGGCATGGCCTTAGGCTTAGGCTTTTGCTTAGGCTT +TTGCTTAGGCTTTGGCTAAGGCTTAGGCATAAAAAAAGAGAAATTCCCAA +AGTAAAAAAAAAGACAACTTTTGACAAATTTGAAAGTGAAATGTTATATT +TTAAGCCTATTTTTTCCCTAAAAAGAAAAAAATTACTTTTATAGTTTGAA +AACGATTTTTTGGCAGTGTTTCAGCAGTCAAAAATATGGCTGAAAAAAAA +AGTTGATTTCAAAATATTTTTCATCTCAATTAATTCTTTTAAATTTGCTC +TACCAAATTTGAAAAAAATATTGAAAATTATTTTAAAAAATTTAAATTAT +TAAATTTCAAGTAAATTCGAAAATTTTGTTTATTTATTTTATGTTCATTC +TTACAATATAAATTATAATCCTCAAACATTCCAGAAACCAGCACCAGCCG +CAGCTGGACCCCGCTCAGCTCCAGCTCCAACCAACGAGGACTACAACACC +GACATCGACGTTCCAGCTCCAAAGGCAAAAGCCCGTGCAGCTCCAACTCC +ACGAAGAGCACAAGGTGATGATGATGATAATATTGATATGTCTCTGACAC +GTCTTGGCTAACTTGGCTTGGAAAAGCACAATTCCTTTAATACAAAAAAC +TTTTCTCAAAGATTCTCAAAATTTTGCACTTTAATTTTTAAGTTTAATTT +TTAAAAAGTTGTCTCACGTTTTTTAACCAATTTATATGCAAATATGATCC +TTATTACTAACAATATTTAGTAATCTAAACATAAATTGGAATATTTGGTC +TTTTTCCCGCCGAAAATGATCAAACATTCCGAAGTTAGCAACAGAAAATT +GCAGTGAATTGTGTTGGTTTAATTTTACCGTCGTTTTTCATATTTTTTGC +CCGATAGCTTTGCTGTGTTTTGTGCCATCCTTTTCTCTGTGATATACTGT +TACTTACTTCAATAAATGATTCTTTTAAAAGTTTTCGCACAAAAAATTTA +GAGTTGCTGGCAAAAATACACGCCGACACGTGGTGCCAGAATGTCTCATT +TCAGCTTGATCTACGTTGATCTACAAAAAATGCGGGAGAATGGAGACGCA +GAGTTTTCTATTGATTTTGCATGGTTAAAAACGTGCTGACGTCACATTTT +TTTAGGCAACAAATTCCCGCACTTTTTGTAGATCAAGCCGTGGTGGGACA +TCCTATCCCCACTTGCGCTGGGGGGACAAAACCGACATAACTTTTGAAAC +ATATTACACATATACATTTAATATATGTTTTCTGAGAGAAAACGTTTTAG +AAATTTTAGAGAATTTCAGCAAAAAAATTCTGATAGCTTTTTGGAAATTT +ATATTATAAAATTTTTTTTTTTTTTTAATTCTGATTTTTTCAAAAAAGAA +ATTTTGCAAACCGCTTCCAGAATATTTGCCTGTCTCAAAATACCCTAAAA +AACATTAAAATGTTTCAAAAATTATCTTTTGGAACAATTTTACGGGTCAC +GCGAAATACTTTCTTTGTAAGTTTTTTTTCTTTGAAATTTAGATTTATTC +CCAATATACGTAAATTTTTCACTGAAAGTTAGAAAATTATACTTGTTTTT +TTCCAGAAATTGAAATGTACATGATACAAGGCTGCTACATTCCAGCAAAA +TCTGATAAAATTTTAGAAACAATTTTTTGTCAGAGTCTTGAATATTTGAG +CTATAAGTTTTTTTTTTTCAAAAAATTAATCCAGTTTGGCCTAAAAACTT +AAGGCTAAGATTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGCCTAA +GCTTAAGCCTAAGCCTAAGCCTCAGCCTAAGCCTAAGCCTAAGCCTAAGC +CTAAACTTAAGTCTAATCCTAAGTCTATTCACTAAAATTTTGACGTAAAA +CTTGGGACTAAAAATTTTGGACACAAATTTTTTACCTAATTTTTTGGTCT +AAGCCTAAGTCTAAGTCTAAGTCTAAGTCTAAGTCTAAGTCTAAGTCTAA +GTCTAAGTCTAAGTCTAAGTCTAAGTCTAAGTCTAAGTCTAAGTCTAAGT +CTAAGTCTAAGTCTAAGTCTAAGTCTAAGTCTAAGTCTAAGTCTAAGTCT +AAGTCTAAGTCTAAGTCTAAGTCTAAGTCTAAGTCTAAGTCTAAGTCTAA +GTCTAAGTCTAAGTCTAAGTCTAAGTCTAAGTCTAAGTCTAAGTCTAAGT +CTAAGTCTAAGTCTAAGTCTAAGTCTAAGTCTAAGTCTAAGTCTAAGTCT +AAGTCTAAGCCTAATAAGCCTAAATAGGGTCTAACGCTTGCCACTGACGC +CGCTAACGCTCGCCACTGAAGCCACGCCTAAGCCTAAGCATAAATCAATT +TGAAACTTCAGACTTAAATTTCTTGGTGGCCTATACGACTCTCTGTTAAA +TTTCGCTTTGATCAGCAAACGGGAACCCTTTGAAAAATTAAAAAACTAAC +GCTCGCCACTGACGCCAAGTCTAAACCTAAGGGTAATACTAAGCCTAAGC +CTAAGCCTAAGCCTAAGCGTAAGCTTAAGCGTAAGCGTAAGCGTAAGCGT +AAGCCTAAGCCTAAGCCTAAGTGTAAGCGTAAGCGTAAGCCTAAGCTTAA +GCCTAAATCTTTCGACAAAAATTTTGACCTAAAACTTTGACCTAAAATTC +GAGACTAAATATTTTGGACTCAAATTTCGGCCTGAATTTTTGGACTAATT +CTGGGAACTATTCCAGAGTAATTTTAACAAATTGAAAACTTTTTATAGTA +TTCGTAAAACAAAAAATAAATGCAATGTTTTCAGTAATAAATTTGAAATT +TTTGAAAAATTAAATATTTCTTCGAAACTTCGAAACTTCCTTTTCTCTGG +CATAACCCCAAATTCCAGCCGATGCCCCGCCAGTATACCGCCAGCCTCGC +ACAAAAGCCGAAAAAATCGAAAGATTCCGGAATATCGCTCGGACATTCTC +GCCGTTCGTGTACGAGGTGAACACCACACCAGCTCCTCACTTTGACAACT +TCATCTGGCAACAAAATGCACCAGCTGTCACACCGGAGCCATTCACATTT +GCTCCATTCTCCTTCCCAACTCTTGCCACCGTCGCACCACCAGCTCCAGG +ACCCGGCGGCCCGACTCTTGAGCCATTTTTACCGACGACAGCTAGTCCGA +AGCTGTTAGCTCATAACACTGCTAGGGTATCAAAAAATTTGAATACTGAA +TACCTCCCTATAACTTTTTCAAATTCCAGATGATCCGAGAAATCGCTTCA +TTTTCCGACGGAGGACGCTCCAGGGATCAGGATTTTGGCGCAGTTCAAAC +GCTCATGCAAGCCTTCTTCGAGGCAGTCTCCAGTGGAAATAATGGTGGAG +CAGGAGCTGCCGCCGGAGCCGGAACAGCTTTAGGAGTAGGTTAAGCTCAC +TAAAGAGGTCGTGAAGTTTTACTATTTTTTAGGATGCTCCAATGCTTCAA +GCCCATCGAGATGGTACCGAGTTGGGAGCCAATCGCGCGCTGACCAACAA +GCTTTTCGAGTCGGATATGGTGTTGACGGTCAAGCAGATGAAGGCGTGAG +TGTGGGGTACTGTGCAATAAGATTATTGTAGGGGTACTGTAAGATCTCTG +TAGGGATACTGTAACCCTACCGTGATATTACTAAAGGGGTGCTGTAAGAT +TACTGTAGGGGTACTATAGGATTACTGTAGGGGTACTGTAAGATTACTAA +AAATATACTGTGAGATTACTGTAGGGGATGCTGTAAAATTTCTCTAGGGA +TATTGTAGAATTACTCTAGGGATACTGTAGAATTACTGTAGGGAACTGTA +AGAGTACTGTAGGATTACTAAAGAAGTACTGAAGGATTACTGTAGGGGTA +CTGTAAAATTACTATAGAGTACTGTAAGATTTCTGTAGGGGTACTGTAGG +ATTACTGTAGTATTACTAAACAAGTACTGAAAGATTATTGTAGGGGTACT +GTAAAACTACTATAGAGTACTTTAAGAGTACTGTAGGATTACTAAAGAAG +TACTGAAAGATTACTGTAGGGGTACTGTAGGATTACTGTAGTATTACTAA +ACAAGTACTGAAAGATTACTGTAGGGGTACTGTAAAATTACTATAGAGTA +CTGTAAGATTGCTGTAGGGATACTAGAAGATTATCGTAGAGATACTGTAA +GATTATTGTAGGGGTTCTGTAAGATTTCTGTAGGGTTACTGTAGAATTAC +TGTAGGGTACTGTAAAATTACTGTAGGGGCACTCTAAGATTACTGTAGGG +ATACTGTAGAATTACTGTAGGGTTACTATAAGTTTACTATAATATTTCTG +TAGGGTACTGTAAGATCACTGTAGAGGCTACTTTAAGATTCCTGTGGGGG +CACTGTAACACTACTGTAAGATAACTAAAACGGTACTGCAGGGTACTATA +AAGCGACCACCCTAGATTCATGGTTATTTCAGAATTGTGCTCGCCGCCCA +AGAGGCCCGTAACCCTCACGGACGTAAAAAGCGAAAGGTGATTACGGGAT +CCGTGTACCGATGGAAGAGTGTGATTCCGTTCCGATTCAAAGGTGGCGAC +GCGAAATGGAAGAAGCTGATCCGTGAAGGCCTAGGCCTATGGGAGAAGGA +GACGTGTGTGCGATGGAGCGAGAATGGCCCCGGCAAGGATTATGTGATCT +TCTTCAGAGGATCTGGGTGAGTTGACATGCCCATGGTTTTCGGATAGTTT +GGGTTTTTTTTCAGATGCTACTCATCAGTCGGAAGAACCGGAGGATCTCA +ACTTATTTCGATTGGGTATGGCTGTGAAGATGTAAGTTTTAGTATTTCTA +CTATACTATTTATTTTAAATTCAAATTTTCTAGAAAGGAATCGTTGCTCA +CGAAGTCGGTCACTCCCTAGGCTTTTGGCACGAGCAGTCCCGTCCGGATC +GGGATGACTACATCCATTTAAGAAAAGACTGGATTATTAAGGGAACCGAT +GGAAACTTCGAGAAACGAAGTTGGGAAGAAATCGAGGATATGGGCGTTCC +GTATGATGTCGGAAGTGTCATGCATTATGGGTCGAATGTAATTTTAAATT +TTGAGTCGGTAGTTTTTAGATACTGAAAATGATTTTCAGGCTTTTACAAA +GGACTGGGACCAAATTACTATTGAGACCAAAGATAGTAGATATCAGGGAA +CGATTGGTCAACGCCAGAAGCTTTCGTTTATCGATGTTAAGCAGGTTAAC +AGGCTTTATTGCAATTGTGAGTTGTTATGCAAAAATGAAAAAAATTTGAC +CACTGAGCGGATCGAACGCCCAACCTTTCGATCTGGAGTCGAACGCGCTA +CCATTGCGCCAAGCAGTCACATTTTGTTGCAGGAAACGGTGTATAGAGGA +GAAAAGAAAAAAATAGACAACCCTACGTTTTATCAGTGGAGCGCGGTTGC +AATTTAACATTCCAATTTTCAGCTGTTTGCCCGGTTGCCTTACCCTGCAT +GCATGGAGGATACCCCGACCCCAATAACTGTGCGGTTTGTAAATGTCCAG +ACGGGCTCGGAGGAAAGTTGTGTGGACGTGCCGCAAAGGGCACAGATCAT +GGTAAAACTCCACTGGAATTTATGGGAAAAAACCGCCCTAAAATTTTCAG +ACAAATGCGGCGGTGAGCTAACAGCGACCGCCGAATGGCAGGAGATGGTC +TACAAGGGGAAGCGGACGTGCAATTGGAAGGTGAAGTCGCCGAGCGGTGG +GCGGGTTCGGTTGGTGCTCACCGAGCTCAGGTACCAGTGTGCACCGGCGT +GTAAGGCCTACATCGAGATCAAGCATAATACCGACTTCCAGCAGACCGGA +TTCAGAGTTTGGTGAGAACCCTAGATTCCCAAAAAATTCTACACTTTTTT +CAAAAAAAAATCGAAATTCGACCCATTTTGGCGGGAATTAAAATTCTTTT +AAAAATGACAAAAAAAATATTTTTCTGCAGAAAATTTTTCGACTTTTTAG +AAAGCAAGCTTTCACGCGGTTTTGGCTTCCCTCATATATTGAAATGGAAG +AGTTTTTGCCGAACTAGGCCATTTTGGCTCGGCCATATCTGGGGTAGATT +TACGGCGCGTCGCGGCTCGATTTTAGTTGTAAAACTAAATTTAATTGTCC +GTGTGGAGTACACGACTTTCCCACGCGTTGTCCGGCAGGCGATTGTCAAT +GGAGCGCGAAAAATTCAATGAGAATCGAGCCGCGAAGCGACACGCTACGC +GCCGTAAATCTACCCCAGATATCGCCGAGCCAAAATGGCCTAGTTCGGCA +AGCTCTTCCATTTCAATTTATGAGGGAAGCCAGAAATCCGTGTAAATTTT +TAATTTTTTGTTGCTTAAAAAAAAAAACTAAACAACTATTTTCCAATTCA +AATTTCTAGCTGCTTCAACAAAACATATGACGTCATCTCCGACCAATCTG +AAGCCCTGATCCTCTCCAACGCGAACATCGTGGACTACGAGGTCTCCTAC +AAGCTGCAATGGATTCAAGGTACCCCACCCCCCCCCCCTTTTCTATCCAT +AGAGCGCACTTTCTTCCAGACAACGGAAAAGCTCTTCCACCACCGAAACC +CACGTCGACCTGGGTGCCCGGAAAAGAGAATCGACCATTCCGCGGAGTGG +AGAACTCGGGCGGCACCATCGAAAAGTTCATCCTGCAAGCGATCCCAAAG +ATCCGAGACTCGCATCGGCCACTGGAGAGTATTACTAGTATTGTTGCGGA +ATATGGTCTCGCAACGCTACTTGGTATATCGCATAATGGAAAGTAGTCCG +TTTGGAACGATTTTTGCAATCGTCAGAGGAATATATATATATATTTGTAA +ATTTTGTACCATCCCAAAAATCCAATTTTTCTTTCACAATTTTCAAAATT +TGCTCATCTTTCACAATTACTCGGGTTAACACATTTAATGCCTTTCAATA +AATAACAGTTCATATTTAAAGTGATTATTATATTACAGGAACACATATAT +CTGAGAATGCGTATCGCACAACATATTTGACGCGCAAATATCTCGTAGCG +AAAACTACTTTAAAAGACTACTGTAGCGCTTGTGTCGATTTACGGGCTCC +TTTTTTGAAATTTACACGGTTTTCTGGCTTCCCCTCAAAAATTGATATGG +AAGAGTTTGCCGCGAACTAGGCTATTTTGGCTCGGCCATATCGTTCAAAA +ACTACAGTAATTCTTTAAATGACTACTGTAGCGCTTGTGTCGATTTACGG +GCTCCATTTTTGAGATTAATTTTCTTTCGAGTTCCCTTCGTTTTTTTTGT +AATTTTTTGCTTAATTTGAATTTTTTTTTGATAAATTAAAAAAAATGATT +TTACACGGTCTCCTGGAGTGTTCATAATTATTGGAGCGCACTTGCTGATC +CTTGCCGCGCTTATTTACTTTTTCACCGTTAGAAAATGAGTAAAATCAAC +GATTTTGGTCAAGAAATTTAAAATAAAATTATTAATTGATTTCAAAACCG +AGTCCACGTGGTGCCAGACTGCCCCATCACGGTTTGATCTACAAAAAATG +CGGGAATTTTTAGCCCAAAAATGTGTGACGTCAGCACGTTCTTAACCATA +CGTTGAGAGTTGAGAATTCTGCGTCTCTTCTCCCGCATTTTTTGTAGATC +TACGTAGATCAAGCCGAAATGAGACACTTTGACACCACGTGCGAGTCCGT +AAATTGACACCAGTGCTACAGTAGTCATTTAAAAAATTACTGCATGTCAA +AATTAAAATTTCCTGTTCCCGTAATATCGGATTCAGCACGCCACCCTATG +AAAATACCAAAATTGGAAAAAAATCAATAAAAATTTATTTTCACGGAAAA +AAAATTGAGACCACCTTTTTGTTTTCTATCTATATAAACCCGTTGTTTTT +CACAAAAAATTATTGAAAATCCTTCAAATCATATTATTAGTTCGAAACGT +CGACGCGTAGGTGTTCGATCGTACACTGCCGGGAATTGCCGGTTGCGGGA +GTTTCGGCACCGCCGGTTGATATCCTTCTTTGCTAACTTTTGATCTGAAA +ACATTGACTTTTGCTACGTGGCCGTGGAAGAGAAAACTCGGCCACTATGT +TTGTTTTTGCGACAAGGGTATATGCGCAAACCTACAAAATGATCTTCAAA +AAATGCGCGAATTTTTTGCCCAACAAAATGTGACGTCAGCACGTTCTGAA +CCGTGCGAAATCAGTCCCGCATTTTTGTAGATCTGCGTAGATCAAGCCGG +ACCACAAAAATATCAAAGTTTTGAATTTTTCAAAAAAAAAGCACTTACGA +GGATCTAGTCAGCTTGAACAGCAAGTAAATAAGACAAAACGCGCACATGA +TGAAGCTCAAAAGTAGATCAAACGAGAACTTGGTCGCCGAGACCGCATTG +AAACTTCTGTCAAGTGAGAGAAACGGTTGGGTGGCATCGTTCACAAATGA +AAGCATGTAGAATAGTCCAGCAATCGGATAGACTGCCGCAATGACTCCGG +CAAACCCATTGACATCCTGCTCATCAGAGCCCAGAAGCCGGCAGCAAGCA +GACGAAAGAGCAATGGAGCTGATGAGGAATAGAATGAGCTCCCGGCAGAA +GTTGAAGATCCAGTCGGTGCCCGGGCTTGCTGCTCCGATGAAGCAGAGTC +CGGTGAGGGGATTGGCGGCGACGGCTTTTGCGAGTAGCGCGAGCATTAGT +GGAGCCAGCGGGATTCCCAGGATGAGCATGATCACGCGGGTTCTGGAGTC +GTCCTGGAAGATCATTTTTTAAAATTTAATTTTTGAAAATTTATCATGTC +TGGAAAAGGTACAGGCTGACTAGACCTATTTCACGTGGTGTCAGAGTGTC +TCATTTCGTCTTGATCTACCATGATCTACAAAAAATGCGGGAGAAGAGAC +GCAGACTTCTCAACTCTCAACGTATGGTTAAGAACGTGCTGACGTCACAT +TTTTAATGTACAAAAAATTCCCGCATTTTTTGTAGATCAAACTGTAATGG +GACAGCACAAAATTCAAAGCCCTCACCAATATATGCGATGTCCTTGTCGC +CTTATTCCACGACACACAGATCAAGAGCCACCAGAGACGTGAGCAAGTTG +CCGTGTAGTAAATCAGTGAGGCCACTGAAGAACATGGAACATGGGAAAGC +CCTCCGACGACGAAGAGCAGGTGATGGGTGTAGTCGGTGCACGAGATTTG +GAACTTCAAAATTCTTTTTGAATGGCTCAAAAATATTGATTTTGAGCCAA +GTTATGACCATTTGAAGTTGGTGGCCTAGGATTTTCCTAGGAGTATTCTA +GGCCGCCAACTTTGGACAGCCATAACTTGGCTGAAAATCAAAATTTTTGA +AATCTAATTTCACAGTTGAATTTAGCAGACCATGAGCTTCAATTTGACAT +ATTTTTGTGCCTTGCCACGAAAACTCGCAGACCTACAGTAATCCGGCCAA +AATTAATGTTAACTGAAAGCTCAGAGTCTGCTAAATTTAAATACAAAGTT +CGATTTCCAAAATATCGATTTTCAGAAAAATTATGAAGACTCTAAGTTGA +TGGCCTAGCATTTTCCTAAGAGTATTCTAGGCCGCCAACTTCAAACGCTC +ATAACTTGGCTCAGAATTAATATTTTCGAGCAAAATAAAAAGAATTTAAC +ATAAAATCTTGTGATCTACCGGAAACTAGATGAGCGCAATTTTAAATATC +TAGGCCACCAAGTGAAAACCGTGACTCGGCTCAAAACCATTATTTCTGCT +TCAACCAAAAACCAATTCATTTATTTTAATTGAAAGCCCGGTGTCGGCTC +AATTCAAATACCAAGTTTATTTTCAAAATACCAATTTTCAAAAAAGCTAT +GAGCGTTCTAAGTTGGTGGCCTAGGATTTTCATAGATTTTCTAAGCCACC +AACTTCAAACGCTCATAACGTGGCTTAAAATCAATATTTTCGAGCAAAAT +AAAAAGGATTTAATATAAAATCTTGTGATCTATCGGAGAATGCATTTTAA +ACTTTCTAGGTCACTAAGTAAAATATTCGGCCACCAAGCCTCGGCCGAAA +ATTATTATTTCTGCTGAACCAACAACCAAATTCTCCAAAAACTGCTTACC +TGATCTTTAAACGAAATGCTCACAATATAAATAACCGATGTCGCACAAAA +CGAGATGCACGAGAAGAACATGGCAGTTTCCGTAAGTGAGTGGAGCATCT +CGAGACGTCTAAAGTAGATCCTGATTTGTTTTTGGAAAATTAGGCCACAC +ATCACACAATCCAAAACTTACGATAAGCCGACTATGAAGACTGATAGAAT +TGTTAGTATTACAGAGCTCAGCGAGAGAAACAACAAGAGTCGGGTGGCGG +ATTCACGGTCAGTCTGGAAATGTACTCATTTGTTACTGAACTAGGTCTAC +TTAAGCCTAAGCCTAAAGTAAGCCTACCTGCCCAACCTTCTGTGGGTTCG +AGCACAAAGGCACACACTTTGAGGATCGATTCAGGAACACCTCATCCTGT +GGGCATTTTCCGTTGAGGCGGTCGACCTCCCGTTGAATATCTTCTACTCG +ATCATTACCGTCGTCCTCCTGGAAAAGAAAGATGATTGATTTGAAGCTGT +GTATCTAATTACCCCCGTTTGATGATATAGGACCGGAGAGATTACAATCA +GATGTTCTCTGTTTTTCTCTGTTTTTTGGTAGCTAGTATAGAAAACTATA +GAGTACTGTAATTCCTATTTGGGAGCTTGGTATTGTGCTATTATCAAAAA +AATACAATTGCACAAATTTCTCATTGATCTACCTGTTGAACTTGACTCCG +CCCCCAATCTTGTTACTGTTATTATTAAGGTAGATCAAAAACACAAGGAA +GATCAAAAATAAATTACGAGAAAATTGTGCGATTGTATTTTTTTCTATAA +TATCAAACATTTATTGATTTTAGGACATTCTGTGATTAACTTTTATAGTT +TCACGGATTTCTGGCTTCCCTCAAAAAATTGAAATGGAAGAGTTTTCGCT +GAACTAGGCCATGTTGGCTCGGCCATGCGTGGCGCGTCGCGGCTCGATTT +TAGTTGTAAAACTAAATATTTCCCACGCGTTGTCCGACAGGCGACTGTCA +ATGGAGCGCAAAAAATTCATTGAGGGAGGTTTCTATTTGGCAATGCGCTT +TTGGTTTTTCAGTGATCTTTCAAAAAATTTTTCATAATTTTCAAATGATC +ATAACTCTGAAACTAAAATTTTTGGCAAAAAAAGTCGTAATATTGTAAAA +TGTTGCTCATAATTTCTTCTATACGATAAAATTTTTTCAAAATTTTACAT +TAAACAGGCCAATAGGTACAGCGCCCGACAACTCGCGGGTCCAAAAATTT +TTCAAAAATCGTCAAAATTCGACTCGCGAGATGTCGAGCGCTAAATCTAT +TGACTTTATTTTTTTGAATTTTTGAAAAATTTGTTATCCAATAGAAAAAT +TTATGAGCAACATCTTATTAGTTAAGCGTTTCTTGCCAAACATTTTATAT +TCAGAGCTATACTCATTTGAAATTTTTGAAAAATCACTGGAAAACCAAAA +ACGCGGCAAACGATTTTTTCCGGCAAATCGGCAAATTGGCAAACTGCCGG +AATTAAAAATTTCCGGCAAATCGGCAAACCGGCAAATTGTCGAATTTGCC +GAAAATTTCGGCAAATTGTGGTTTTGCACTTTTTTTGGAAATTTCAGAAT +TCCAATTTCAAATTGCAAAATTAAACTCATCCTATGAATGTTTCTACATC +TATTTTGAAAAGTAAGTAAATTCTATGAAAATATATAAGGAAAACGAGAA +AAAATTCAAAAAGACATAGTTTTAAGTGTTTCCGTCTTATTAAAAAATCC +CTCTAAACTTTTCCGGCAAATTGATATTCGGCAAACGGGAAATTGGCGAT +TTGCCGGAATTGAAAAACCCGATACTTACACTTTCTTTCGCTGCAAACTT +TGCCCTCTCATCTTGAATTGCTCCTTGCTCATTTGGCCCTTTCATGCACA +TTTTCTCTCGATTGTTCTCCAACGGGAACTTATCACAACGAATCACATCG +GGCCACTTGAAACCGAAACTTTCCAGCACTGGAAGACACTTTTCCTGGAC +GGAAAGACACAATGGACGGCATGGACCAATTGGGTTGGCTAGCTGAAATT +TTTTTGAACGATGAAATGATAAAAATGTGCGATTCTTCTCTGATTGAACT +CACTTTCTCGTTGCACATCGGGAAGTAGACGGAGCACAGGAAGAATTTGA +GCTGCTCGGAGCAGACAACGGAGAGCAGGGGCTTGTAGGTGAGGATGGAT +TCGGAGGCGTCTTTCCATGATTCCTCGTCGACGAGATTTGGGAAGCTGGA +ATAAGGGCTTGTAAAGTTTGAGAAACTTGAGATTTACAGGGTCAAAAATT +GCTCACGAGCTCTCCCGGTTGCCGAAAAGTTGCCGAAATTTGAGCAACCA +AAAGTTGCCGATTGCCAAAAATTGTAGGGTACGGCAACTTCGGCAATCGC +CGGTTACCGAAGATTTTAGAGTTCGGCAACTTCGGCAATTGGCGGTTGCC +ATTGCTCTCACGAAATTTCCCGCTATTTTTGGAGATTAAACAGCAAGTGC +ACGTGTGGGTGTCTCAAACAGGAAAGAAATCCTGGCAGGAGCCGACTTGC +CGGAAATTTTCAATTCCGGCAAATTGTCGGTGTGCCGATTTGCCGTTTGC +CGGTTCTCAGATTTGCCGGAAGTGTGTAGAGGGATTTTTATAAGACGGGA +ACACTGAAAACTGTGCCTTTTTGAATTTTTTCCCGTTTTCTTTGGATATT +TTCATAGAATTTGCTTACTTGTCAAAATAAATGTAGGAACATTCATAGGA +TGCGTTTAATTTTGCTATTTGAAATTGAAATTCTGAAATTTCCAAAAAAA +TGAGTACAAAACCACAATTTGCCGGAATTTTTCAATTCCAGCAATTTGCC +GATTTGTCGTTCTGCTGGTTTGCCGGAAATTTTCAATTCCGGCATCTTGC +CGATTTGCCGATTTGCCGGATTGCCGAAAATTTTCAATTCGTTAGCCGTC +CACCCTGGATCCTGCCATCCAAAAAAAAACGGAAATTTTTGGCAAATTGC +CGTTTTTCCGCCGGCAAATTCGGCAAATCGACAATGTGCTGTTTTGCCGT +TTTGCCGGTTTGCCGGAAATTTTCAATTCCGGCAATTTGCCGATTTGCCG +ATTTGCCGGTTTGCCGGAAATTTTCAATTCCGGCAATTTGCCGATTTGCC +GATTTGTCGGATTGCCGAAAATTTTCAATTCCGGCAATTTGCCGATTTGC +CGATTTGCCGGTTTGCCGGAAATTTTCAATTTCGGCAATTTGCCAATTTG +CCGTTTTGCCAAAAATTTTAGGGTTCGGCAAATTCGGCATTTGCCGTTTT +GCCAAAAATTTTAGGGTTCGGCAACTTCGGCAGTTGCCGGTTTCCGCACA +TCCCTGTCCCACACGCCCCAGGTGGAAGATGGTGGAAGAGTTTCTAGCGG +GTCCATCAAGGATGGGAATCATCAGTCAACACAATCCTTAGCCGACCTTC +CCAAATTCATCATTCCTTCCTTCCAAAACGGTTTCCGGTCCTTCCAACCG +ATTAATACCAGCCACCACCACACCACCAAACCCTCTCCTTCGAAATAGAG +AACAAAGTTGCATACCCCCCTCATTTCTTTGTTTTGGAGAAAGGGGCGTT +TTGTGGTGCTCGGAGAGCCTGAGGACGTGACAAGTGATATATAGACGGGC +TTATACTATCTGCTCGTCCATATTTCACGCGCCTAGGCAGAGTGCACGGC +GGGAGGTGGAGACGCAGGTTCCCGTCCAACTGCCGCAGAGCATCCCCTAT +CTGCTCCTATCAATTGTTCGGAGAGACTTTTTTGTGAATTTTGCTTTTTG +CAAGAGGTCTGAGATTTGGGATTTGGGCTTTAGGCTTTAGGCTTTGGGCC +TTGCTTTGCGCCAGAGAATGGAAATTTGTCCGTATTTTCCCGGCTTATAT +CAAAAAATGATCAACTATAAAAATATAGAAAAAGATTTTTCAAGTTTTTT +GTCAGTTAGTATTTTTTGATAAAACAAAAGAGAGCCGAGATATAAGCTGT +CAAAGTAGAGCAAGGGTGGTTTGATCTACAAAAAATGTGGGGATTTTTTT +TCAAACTTGCTCTACTGATAAATTAATAAATTACTCCCACAAAATCCAAT +AAGCCTCAAACTTAAGATCCTCCAATCTCATCCCCCTCATAAAACAATAA +TACTAAATCACTAGATCTCCGGTAGTATATATCAGAAGAACAGTTGCTCT +CCCTCTCGAACCGACTTACACTTGAGAAAAATAAATTGTGGAAGAGAAGT +GACGTCATCGTAACCTACCACTTCACAAGTTTATATATATATATATATAT +AAGTACTAGTTAGGACTAGCTAAGACCTAATTAGAACATAGGTATGGCCT +GTAAGTTAGGACCTAATTAGAACTCACTAGGACTATATATATATATATAT +ATATATATATATATGTAAACAAGAACCCGGGCGGGTAACCCCTTACGGTC +AGAGGGGTGTCCTACACCAAGGGTATGTAAACTGTGTCCCCCAGATGGCG +AAGTTGAGAGGGAAAAAGTGAAAAAGCGCGCCACATGGCCTCGCGACCGT +ATGTTGCAATTTCAAGCAACGTTTTTGATTTATTGGAGGAGAAATTTTGA +AAAAGTGAAAAAAAAGTGTGACCCCATATGAGATGCGTAATAAAATTCCC +TACAAGACCTTAAGAAAACGTGGCTGGCTGGCAAAAAACAGGTGGCAATA +ATAACGGAATGTGTATGTGTGTGTGTGTGTGTGTGTGTGCTCGAAGAGCA +TAGGCTCGTAAATCGTTATAATTGATGCTGGAGGAGAAGGAGACGATGGG +CCTCCAAAGTGTTACACCTGGTATTTGTTGGGAAAAATTTGGGGGAAATA +GGCATTAAACTACCTAACCTAATTGAGTTTTTTCAAAGCTTATATTATTA +TAACTAAGAGGTGTGATAATAAATAATTAAAGAACTTTAAAATTATCAAA +AACGCTTTCAAATCTGTAAAGCTGATATTTTAAGGATTTAGTACTAGTTA +AAATCTAGTTGGGACCTAGGTAAGACTATCTAGAAAGTTCGGACGTAACT +ATTTAGGAACAGTTAAGTCTTAAGGCCTGCTTGGGTACAGGTAAAAATGT +TTAGAAAGTTAGGAACTAAATAGGTGAAGTAAGTACTAGTTAGGACTAGT +TAAGACCTAATTAGAACCTAGGTAAGGCCTGTAAGTCAGGACCTAAATAG +GACTTACTAGGACTTAGGTAAGACTAGTTAATGTCTTGTTAGGACTAGTT +AGGACTAGCTAAGACCTAATTAGAACCTAGGTACGGCCTGTTAGGGAAGA +TCTGATTAGGACTTACTAGGATTTAGGTAGGACTAGTTAAGATATAGTTA +GGACTAGTTAGGACTAGTTAAAATTTAATTAGAATCTAGGTATAGCCTGT +AAGTTAGGACCTAATTAAGACATACTAGGGCTTAAGTAGGACTAGTTAAT +ACTAGTTAAGAACTAATTAGAACATAGGCACGGTCTGTTAGTTAAGGCCT +AATTAGGACTCACTAGGACTTAGGCAGGGCTAGTTTAGTTATTGTTAGGA +CTAGTTAGGACTAGTTATGTACCTAATTTGGACTCACTAGGACTTAGGAG +GACTAGTTATAATCTTGTTAGTAATATTTAGGACTAGTTAAAATCTAATT +AGAACCTAGATATATCCTGTAAGTTAGGACCTAATTAAGACTTGATTTGC +TAGGATTTAGACAGGACTAGTTAAGATATTGTTTGGACTAGTTGACTAGT +TAAGACCTAATTAGATCTTAGGTACGGCCTGTAAGTTAGGGCCTAATTAG +GACTGAATAGGACTAAATTTCTTGTTACGAATAGTTAGGACTAGTTAGGA +CTAGTTAAAACTTAATTAGCACCTAGGTACGGTCTGTTAGGGAATACCTA +CTTACCAGGCCTTAGGTAGGACTAGTCATGACCTAATTAGAACCTAATTA +GAACCTGGGCACGGCCTGTTAGGGAAGACCTAATAAGGACTTACTAGGAC +TTTGGTAGGACTAGTTAGGACAAGTTAAGACCTAATTAGAACCTTGGTAT +GGCCTGTAACTTAGGACCTATTTAGGACTTACTAGGACTTAGGTAGGACT +AGTTAGGACTAGTTGGAAGTTGGGACCTAACTAGTTAGACCCTTAATTTT +GTACACATGCTTATTAATATCAATAAAAATTTTAAAAAAAAACGACTTTT +TTAAGTTTAACTATATAAAACTATAATTTGGAAAATTTGCTACTTTGCTT +AAAAATCGAGAAAAAACCTAGCGTTTCTATAAAAAACCATCTTGAAGTAA +ATTTTGAAAAAACCCCGTCATCCAACAAAAAAAAATTCACTCACCTCGTT +AAGTTATACGGCAAATCGTTGCACATCTCATGATCCACCTTTTGACACCT +CTGCCCGTCCACACCAATCACAAAGAGCAACAGGAGCCAGGTTACTCCGA +GGTAACCACGAAATGGTCCCATTAATGATAATGGAATTGGGGGCTCACAG +CCTGGGTGGGGGAAAATATTGAGAGTAAAAGAAACAATGAGAAGAAAGAC +GGAGGATAGGCGGGGGGGGGGGGGGGGGCACACCAGTGGCAGCACCACCA +CGGCCTGTCCGTTGACAAATGACCCAAACGGACGGGGGGCGGGAAGAGTG +TCTGTTGTGTGTGTTGGGAGATGGCTGGCGGTTATCAGATATTTGTATGA +ATGGTATAGAGAAACAGAGACCAAGAGAGCTGAAAGATGGAGAGGCGAAA +GAGGAAGAGAGGGGGGGGGGTGGAAGTTTTTACGAGCCCTGTAATAAATA +TATCGAAGCAGCAAATCATTTGATGATAACGGAGGAGCGCGGGGACGGAG +ACGGCAATCATCATCATTATTAGGACCAGGGGTGTGCGGCGAATTTTTGC +CGAATTTGCTGTTTGCCGAGCTCGGCAATTTACCGAATTTGCCAAGCTCG +GCATATTTCAAAAAAGTAGATTTGCCAAATTTGCCGAACTTGGCATATTT +CAAAAAAGTAGATTTGCCGAATTTGCCGAGCTCGGCATATTTCAAAAAAG +TAGATTTGCTGAATTTGCCGAATTTGCCGAGTTTGCCGAGTTCGGCAAAT +TTGCCTATTTAGCCGAGCACGGCATATTTCAAAACAGTAGATTTGCCGAA +TTTGCCGAATTTCCCGAGCTTGGCATATTTCAAAAAAGTAGATTTGCTGA +ACTTGCCGAGCTCGGCAAATTTGCCTATTTAGCCGAGCACGGCATAGTTC +ACAAAAGTAGATTTGCCGAATTTCCCGAGCTTGGCATATTTCAAAAAAGT +AGATTTGCTGAACTTGCCGAGCTCGGCATTTTTCAAAAAAGTAGATTTGC +TGAATTTGCCGAGCTCGGAATATTTCAAAAAAGTAGATTTGCTGAACTTG +CCGAGCTCGGCATTTTTCAAAAAAGTAGATTTGCTGAATTTGCCGAGCTC +GGAATATTTCAAAAAAAGTAGATTTGCCGAATTTGCCAATTTTGCCGTTT +GCCGAATTTGCCGAGTTTGCCGAGCACGGCATATTTCAAAAAAGGAGATT +TGCCGAATTTGCCGAATTTGCCGAATGTGCCGATGCCGAGCTCCTGTTGA +TTTTTTTCCAAAAATTCAGTAAAACGACACAAATTTGTTTTAAGAATTCA +TTAGTTTCGGTGCTCCAAACAACATCAAAAAATATCAAATTTTTCCGAAT +TTGTTAAGCACGGCAAATTTGCCAAATTTGCCGAATTAGCCGAGCACGGC +TAATTTTGAGATTTGTCGCACACCCCTGATTAGGACCGAGAAAAGGGCCT +AACATGATTTCACAATTTGAAAAATGAAATTCAATTTTTCTTGGGAAAAA +AGTATGAATCTCTTCCTCCGCCCGTTTCCGGCAGAAACAATTGAACCAAA +CGAGCCAAAAGTGCACATTGTGTAACTTTGTTGGGGAGTGTAGATAACCA +GAAGAAACAAGATGGAACTTTCTTGCGATGAACAAATTTTGGAACACTGG +AGGGAAATGATTGGTGTTTTTTAAGCTTTTTAGTCAATTTTTAAAACTGT +TCTACAATAGGCCCAAATAGGCCTAATAGGCAAATAGGCCAAGTAGATTA +AATATGCCAAATAGGCAGTATAGGTTGAATAGGTAAACAGGCCAAATAGG +TCAAATAAGCTAAATAGGCGAGACTGGTGCTCAAATACTTTTTGAACTGG +TAATGTTTTTTCTTCTACACAAAAACAAAAAAGTCCCTGCTTAGTTCCGC +AAAATTGTGATTTTTTTCAAAAGTTACCCATTTTTGCCACTTTTCAATAG +TTTTTGATAGGTTAAATCTAGATTTTCTGAATTCAGCATATATGAATTAC +CCGTTTTCAACAAATTTAGGCAAATTTTTATTTTTGCCCAATTTTTTTAG +CCATCTAATGACTGATTTTGGGCAAAAAAAATTTTCCTGAAATTGTACAA +AACCATTAGAAATTGAAAGGCTCCTCAGTTTAAAGGCACATGAGGTTTTT +CGGAGTGGGTGGCAATTGTCGTTCGGCAAATTTTTTGGTCGACAAATTCG +ACAAATTGCTGGTTTGCCGATTTGCCGGAAAGTATTTTCTTACTTTTCAA +AATATATGTACGAACATTCAGAGGATGCTTACAATTTTGCCGGATAAAAT +TTAAATTCTGAAATTTCCAAAAAAAATGTGCAAAACCATAAATTGCCAGA +AATTTTTAATTCCGGCAATTTGCCGATTTGCCGATTTGCCGTAAATATCA +ATTCTGGCAAATTGAAGATTTGCCTTATTTGCCGACTTGCCGCATTCCCT +GATGTAGGCATCAAAACGACTGAACACCAAGTTTTTTTGAAATTTTTTAA +GTTTACCTTTATAATTTTTAGTGCTCAAAAAGTTCCAAAATTAAATAAAT +TTTGAAATTTGCCGAGCTCGGCAAACGGCAAATTTGCCGCACACTCCTGT +TTCAAATGTTCACCGCATATTTAATTATTCCGGAAATCCCCTCAAACATC +CATCAAAAGTGTTTTCCCTGATTTGTTGACTCTGACACCGACGGTGGTTG +TTCCCCCCCCCCCCCACTCTCTCAATTGGCATGTAAATGAGCCCGTATGA +ATGAAGTGCCTTTTGGCGGCAAAATGCATATCTCCAAGTACCATTGAATT +AATCTTCCATCCCCCATTGTCTCCTTCTATATAGGGGGGAGGATGCGTGA +TTCTTGAAATATGCTCCAAGTGACAAATTGAGCGTGTGTGATGTATCGTG +GCAGATGGCTGGCTGACAACGGATCAGTTCCAAAAACTGTATCAAACGGG +GGGAAGAGAGGGGAGATGAAGGATGGGGGCAGATGGAGTCATCGATGCAC +CATGTTTTTTGGAGCTGTTCACTCAGATATTAGGGTATTTTAATCCCCTT +TTTAAGGTCTATTTCTGATCGTAAAACTTGGTATTTTCAACTCATTTTTA +ACTCAAAAATTTCAAAAGTTTGAAAAATTACACAAAAACCTAAATTTTGA +TCTGGTTTCAGGCATTTTAGTCCCTGTTAAGCGTCTTCGATGCACCATGT +TGTTGTTCAACTTGTTTAAACTTTTTAGTTCCGCATCGAGGAAAATGGAA +AATAAAAAATTATTTATGTATTCAAAAACCATACTTTGATTCTCCGTAAC +AACGCTTTCATGGGAGGCAGGCCCGGTTTCAGGGCCTGACGCCTGCCTCA +AACCTGCCGGCCTCACGCCGGCCTCACGCTTCATTTCTGCATTTTGGCGA +AAAATCAAATACTTGGACATAGTGAAAAGGTGATTTTTTCGACGCGGCCG +ACACTTTTCGGGTTCCGCGCCGCACTATACAGGAGGCGAGGCTCGCGGCG +CGAGGCAGGCGGAGGTCGCCGCAAGGCAGGCGCGGCAAGCATTTTTGTGC +TTACATGGATACCCTAGTCTACACAAAGCACGAATTTCTGGCTTCCCTCA +TAAATTGAAATGGAAGAGTTTTTGCCGAACTAGGCCATTTTGGCTCGGCC +ATATCTGGGGTAGATTTGCCGTCGCGTTGCGTGTCGCGTCGCGGCTCGAT +TTTAGTTGTAAAACTAAATTTATTTGTCCGTGTGGAGTACACGACTTTCC +CATGCGTTGTCCGGCGGGCGATTGTCAATGGAGCGCAAAAAATTCAATGA +GAAAGGCCAGAACCCAGTGAAAAAGATAATTTGACAATTACAAATAATGT +TTTAAAATTTTTTTTTTGTTTTTGATTTTTGGTTCTCCTTGTGTTTTAAG +AACATGATGCATCACCGCTGGAAATTGCTGGATATGTAAAAAAATTATTT +ATGTACATATTTAAAAATCAAATCTTGATTCTCCGGGATAGAAGCATAAA +TCAAATTTATTACTGTAAATTTTCACTTTCAGTACATTTAATTGAAATTT +TTAAGCTTTGAAGTTCAGGGTATCATAGTTTTGCCTCCAAAATCCACTTG +CCCAAAAAACAAGCCTGGGAATTCCCGGCGGGGGACGATGGTTCGATAGC +ATCAATCATATCTGTGTGTGTGTGTGTGTGTGCAAGGCGGGGGATGAGCC +GGCGCGCGAAACGAATAAATCAAGAAACGTCTCTCTTTCTGCGAGCACGG +ATTTTGTTGTGAGCCCGCGCGCGCGCGGTCTATAGGAGGAGAGGGGTATC +ATGCACTTCTAATACAGGGCATACTTCTTTCATTTCCATGTTTTTGTACA +TGTAATAAAATTGTTAATTAGCCAAAAGTGGTTCACATGTTCACGTCGTA +AACGTCGGTTTTCAGGGTGTACTGATAATGAATGTGCTCTGTGTGGAGAC +AAAGAGGCAGCAGAGAGCAAGAATCAGTTAGGTTAGGCAGGTGACTTTTT +TCAATTGTGACTAGGAAAAAGTATTTTGGGAATTTTTTTTTATTTTTTGG +CTTCTGCCCAGATTTTCAGGATTTTTCAAATATTTACAGAAATTTTCGTG +ATTCTACAATAATATTTTTGTAATTTTCTCAATTTTCAAAAAATTGCCAA +TTAATTTTGAAGTTTCGAATGAAGCAATTGCCGAAGTTGCCGACTCCCGG +AAATTTCGAAAACCGGCAATTGCCGAAATTGCCCTTTGCCGGAAATTTCG +AAAACCGGCAATTGCCGAAATTGCCCATTGCCGGAAATTTCGAAAACCGG +CAATTGCCGAAGTTGCCGACTCCCGGAAATTTTGAAAACCGGCAATTGCC +GAAATTGCCGACTCCCGGAAATTTTGAAAACCGGCAATTGCCGAAGTTGC +CGACTCCCGGAAATTTTGAAAACCGGCAATTGCCGAAATTGCCGATTGCC +GGAAATTTCGAAAACCGGCAATTGCCGAAATTGCCGCTTGCCGGAAATTT +CGAAAACCGGCAATTGCCGAAATTGCCGATTGCCGGAAATTTTGAAAACC +GGCAATTGCCGAAATTGCCCATTGCCGGAAATTTCGAAAACCGGCAATTG +CCGAAATTGCCGCTTGCCGGAAATTTTGAAAACCGGCAATTGCCGAAACT +GCCCTTTGCCGGAAATTTCGAAAACCGGCAATTGCCGAAGTTGCCGACTC +CCGGAAATTTCGAAAACCGGCAATTGCCAAAATTGCCGATTGCCGGAAAT +TTTGAAAACCGGCCACTGCCGAAATTGCCGACTCCCGGAAATTTCGAAAA +CCGGCCACTGCCGAAATTGCCGACTCCCGGAAATTTCGAAAACCGGCAAT +TGCCGAAATTGTCGATTGCCGGAAATTTCCGATCCCTGAGTTTAGCCTTT +TTCAGCAAATCAAACAGTTTATGATTATTTTTTCAAAAATTGTTCTGCAG +TGCTCTTGCATCACTATTGTGGCCCTCCTTTAAATACAGTACTTTCTACA +GTACCACTACATCAATGATCCTTCCAGTAATGCCAACACATTGTACCACT +ACAGTAATCCTACAGTACCCTATACAGTACCGTCGCAATACAATTACCGT +AACACTATGGAAATCTTACAATAGACTACAGTATACAAAAACTACAGTAA +CCCCCTTGAAGTTAGTGTAAAACTTGCAAATGTTCAACTCTTTTTTCTGA +TTTTTTTTTGTTAAGTTCAGAAATTGTAGAGTGTTTTTGGCTTCTGCTCA +GATTTTCAGAAATTTTCGTGATTCTATAGTAGTCTTTTTGTAATTTTCTC +AATTTTCAGAAGTCTGCAAATTCAATTTGAAATTTCGAATAGCTTTAAAA +AACTCAAACAGTTGGCACAAATTTTCGTAGATTCATGTAGAATTTCAGCC +TTTTTCAGCAAATCAAACAGTTTTTTATTTTTTTTTGTATTTTTTCAGAA +ATTGTTCTCACCCTCCAAAGTACTATCACGTCCTAACAGTAATATTAGCC +TACCGTATCACTACAGTAATCCTCCAGTACTTCTGTGCATGCTACAGTAC +CCTTGCATCACCATAGTGGTGATCCTTTAAACCTACAGTACTTCTACAGT +AGCACTACAGTACCACTTCAACAGAATCTTTCCAGTAATATCAGCGTATC +GTACCACTACCGTAATCCTACAGTACCGCCACAATACTATTACCGTAACA +CTATGAAAATCTTACAATACACTACAGTATACAGAAACTACAGTAACCCC +CTTGAAGTTGGTGTAAAACTTGCAAATGTCTACCGTAATCCTACAGTACC +CCAATACAGTACCGCCGCAATACAGTTGGCATAACACTATGAAATCTTAC +AGTACACTACAGTATGCAAAAACTACAGTAACCCCCTTGAAGTTGGTGTA +AAACTTGCAAATGTTCAACTTTCTACAAATTTGCCATTAACTTGTAAGTT +GGGGGAGCGCATCGAAGTGTATACATATATGTACATGCCAACCAATTATT +TTCTCAAACAATTTATTAATAGTCGAAGACACCTGAAGCAACGATTAGGA +GGAAGAAGAGGAAGGAGAGATTGGCAGAAGAAGAAGAAGAAGACGAATTA +GTTGAAGAATCTGTTGAAACGGAAGACGTTGTTTGTGATGTCTCTAGCCG +TGCGACAGCTTCCCGTTTCAGACGGAACGATGTTCGCACGGCGATCGGTG +GCGAGATGTCGGACACGTCGATTGTCGACGAGAAGGGATCAGAGACGGTT +GAAGAATGCTGAGGAGGAGGATTATGTCGACGTTCCCGTCCACGACACGC +GGCCTGGGTACACGTGACACAGACGTGAAGGTTACAGAAGATGTTCACCT +CGTTGCTGGAATAATAGTCGTTGAGAGGTGGGTGGTGAACATGAACAAAG +AACTTAAGAGGCTTACCTCGTTGGGAATGCCCAGGCCTTTATGTGGTTGT +AGAAGATGGTCTTACCATTGCGTCGAGTCACTTCAAAGTCTCCCCACACC +TGGAACAATTTTCTATGGAAGTCTCAAATAATATGCAGTTGGTCGATGCA +CCATGATGAATTTTGTGCCAAACATGGTGCATCGACGCTAAATAATTTTC +TTTTGAAATTTAAAGATGTTTTGCTAGACTTTCCCTAAACATCTTTTTAG +TTTTCCAATGTTCTCTAATATCATCATGGTGCATCGAGAAAAAATGTTAT +AGGAGGAGTAAAGAAATTTGAGACTTTACTTTTTTAGACCAAAAATGACC +CAAAACTACCGAATTGTTTAGTAAGACGTTTTGAAAATTTTTCAAAACAA +AGTTATGGCGGTTCAAAGTTCTGGAAAATAGCATATTTTCAGCTAAAATC +TAATGTTTTGGGGGCTGGGGGAGGAATAAAAATGTAATTTTTTTTGTGCC +CCACTGCCCGAAAATGCACTTTAATCGACGAATAAACGGCTTATAAAGGG +CCACAATATTTATAAAAGAGTGATATTATAGTTATAAATAAAAATTGAGT +TCCCGACCGAGAAAAAAATTGGCGAAAATTTTGATTTTAGCTGAAAATGG +GACTTTTTTTCCAAAACTTTGAGCGGCCATAACTCTTTTTTGAGAATTTT +TCAAAACGTCTCATTTAGAAATTCGGTAGTTTTGGGTCATTTTTGATCTA +AAAAAGTAAAGTCTCAGATTTTGATACTTTCTCGATGCACCATGATGAAG +AGAAAAAAAGAGAAAATTTAGAAAAAAAAAAAAAGAACCCTTCCGATGTT +TTGGGAAAGTTTAGCAAAACAATAGATTTGTAAAAAATTCAATTTCATTA +GGTTCCTATTAAATTCTCAATTTTTATCAATGCACCATACATTTTCAGCG +TGAAATTTTGTAAAATGCACAGGAATGAAAATTGATCAGGTGTGTATAAA +ATAAGAAATTGAAGCATTTTGTTTTTTTTTTTGTCGATACACCATGATGA +ATTTCACGTCAAACATGGTGCATCCTGGAAAGGAGAGAGTTTTAAAAATG +TATCATTTTTTTCTCGATGCACCATGAAGATTTATATTAGTAGAACTTAA +ACCGCACTTAGCATAAGTGGCATAAAAATAAAGGTGGAGTAGCGCCAGTG +GGGATTTTGTCTAAATACACTTATTATGATCCAAAACGATCGAATATCTT +AATGAAACACTCCAAAATATTTAAGATTTTTCAAAATTTCCGGTCAAAGT +TTTGGCAAATTGCCAAAATATTGAAAAAAAATATGAGCTTTTGAGGAAAT +CCAAAGCAATGTCGCGTGTTTCGACCCACACAATGTTTTTATACAATGTA +GAAAAAAATTTTTTTTTGGTCGACTTTTAAAATTATGAGTGGCAAAAACT +GAGTAATTGCCAATTTGACAGTAAATAAAAAATTTTCAAAAACATTTTGA +AAAGTTTTATTATGATATTCGATTATTCTGGCACCATTTGAGTGGTTTGT +AGTAATTTCCCCACTGGCGCTACTCCACCTTTAAAATTAAAAAAAAAATC +ATAATGGGTATTTAATATCAATGAAAATTATCGATTTTTCCAAATGTATC +AAAATGAGTGTTCAGCGCGAAATTTTGTGCAGGACTTTTTCTTTCAATAA +TATATCCCGAATGATGCATTAGTTATGGGCTACACGATTTTTAGAAAAGC +ACCTGGCCATGCTTAAAGGTGGAGAACCGAAATTTGATACTCTGTCTTTT +TAGATCCAAAACGGTCCAAGACTACCAATATTCGTATTAACCTGTTCTGA +AAAATTTTCAAAAAAAAAAGTTATGGCCGTTCAAAGTTTTGGAAAAATGG +TCGATTTTTAGCTAAAATCTCAAATTTTGGCAACTTTTCAATGTCACAGC +TGGAGCTTAATTTTAGAAACAACACGTGATAAATTGCAAAAACATGCATA +AAAATATGTTCCAACCGCTCCGACACTAAAAAGGTGCCAAAATTTGAGAT +TTTAGCTAAAAATCGACCATTTTTCCAAAACTTTGAGAGGTCATAATAAC +TTTTTTTTTTGAAAAATTTTCAGAACGTCTTAATACGAAATTTGGTAGTT +TTTGGACCATTTTGGGTCTAAAAAGGCAAAGTATCAAATTTTGGTACTCC +ACTCACTATCGAAGCAGAGCCCCAAAAGTTTGACCGACCCAATTTTGTAA +GTTTCTTCGATGCACCATGATGATTTTGAGCAAAATGTTAGTTTTTTTTT +TTGAAAAACTGGTGGCAGGACCCAGTTTTCGGCAGTTTTAAATGTCGATG +CACTATGTTTTTTTTAATGAAACCGTACCTCCCCAGAAGTCTGCAAAGAG +CATCCGTGTGAGTCCGACAGTGGTAGCTCGGATATCCCGTCATTCGCCCA +ACACCTTCCGATTCGTCCATTCTGATCATCGCTCATTTCAACTGTGTACG +TCTACAATTTACAAATAGTTTCTTTTGATTTATGGAGTTCCTGACTTACA +ACTCTATCACCAACTGCTAACGGACGTTGTAGCTGCTTTGAATCCAAACT +GTCGCCGTCTAGAACACGAACTCGGTAGTCGACTGTCTGAAACATTATAT +GGGTCTCGCAGCGATAATTTTGAAGCCTTACCTTATTATCCGATAGGCTG +CTTGCTGTGGTGGCATCCGAGTTGATAGTGATTCCACCAAATGATACGGT +CACCTGAGATTCTTTAGGAACAGGGGCAGCAGGTATACAGGTCAGAAGGA +ATCGTTTGTCGGTTTCTGCATTGAATCTTAAAAAAAAACGATGAGGAAGT +TATTTTTTATTTTTGTCTATTTTTGTTCGTATATGGTTTTTTAGGTACCA +TGGAATAAAAAAAATGCTCCTAAAAAAATTAGTTTCAATAACAAATATTA +CTGGTACAGAGAGTGTAGATAGTTAGAGAGTGACTGACATCCGGGACCCA +ATGGGGCGGGGCGCGCGGAAGAGACGATTCGTGTCGATTTACGAAATGAT +GACAACGAGGAAAATTTCGTAAATCGACACAAATCGTCTCTTCCGCGCGC +CCCGCCCCATTGGGTCCCGGATGTCTGTCACTCTCTAACTATCTACACTC +TCTGTACCGGTAATACCATAATATGATCATCATTCTACATTAGAAAAAAA +TCGATCTGAAATCACAGATTTTTGGCCTCCCTCATAAATTGAAATGGAAG +AGGTTGCCAAACTAGGCCATTTTGGCTCGGCCATATCTGGGGTAGATTTA +CGGCGCGTTGCGTGTCGCGTCGCGGCTCGATTTTAGTTGTAAAACTAAAT +GTAGTTGGCCGTGTGGAGTACACGACTTTCCCACGCGTTGCCCGACGGGC +GATTGCCAATGGAGCGCGAGAATCTCAATGAGGAAGGCCATGGCGCGCCA +GAACCCCATGATTTTTAGGGTATTTCGAGGTTGGCAAATATTCTGGAATC +TTTTTGCAAAATGTTTTTTTTTTTTTGAAAAAAAAATTCCAAAAAATATC +CTGATCGATGAAAATTGTTGGGTGAAAAAAGTTGCAGATTTTAGGAGAAA +TTTGAGAAATTAGAAAAATTAGAAATTAGGAGAAATCAGAAAATTTCCAA +ACTGCTTTCAGAATACTTGTATATATTGAAATAGCCTAAAAATCATGAAA +TTTTTCAAAAATCTCTAAACTAGCCTCCAAATTTGGTAGTAAAAATTCAC +TGGCAGCAACTTCAGAGCGAGATTTTTCTAGTTTAAAGTGCTCGTATGTC +GCTGAATTTTTTGTAGGTACCTATTTGAAAGCTCTTTTTTCAAAATTCAA +AAATTTAATTTTCAAGTTTAAATTTTGAGTAGCGCCAGTGGGGGATTTTG +TCTAAATACACTTATTATGATCCAAAACTACCTGAGTAATTGCCACTTTT +TGACAGTAAATAAAACATTTACAAAAAAAAAAAGTTTTATTATGATATTC +GGTCACTTTGGGACCAAATGAATGGTTTTTCATAATTTCCCCACTGGCGC +TACTCCACCTTTAAAACTGAGCGGAAATACTTTCCTTGTGAGCATAGCGG +CAAAAAACCTAGTACCACATGGCAAAAAAAAACCCACTCATTGTTAGCCA +TGACAACTTCGATCTCATTCTCCAAATTGCCTTCTCCGTTCTCCTTGGTC +GAGCAGCCAGTCAGCGGAATCTAAGCAAATAATGCCCAAAAATTCAATAC +ATTCAATAGGCAAACCTTCAACTGATATTTGGTGTCCGATTGAAGCGTGC +CATTTGCATAGATACACTTTGGATGGCCCGCGCGACTTTGTATCGTGCCA +CGGAAGTTTTCGTCAAACTTGATTTTCAATAGGAAATCTGTCTGCAATAT +TGAAGACGTTTTTTTCCTTTTGGGTGGTGACCTTCAAGGTTTTGGCCTGG +GTTTTTAATCTTCCATTAAATGCCCTTCAAATTCGCTTTTTCGAGAGTTT +AAGAGAGTTTCTAGGCCGAGGAACTTCTTGCAGCCAAATTTCTAAACTAT +TGTGTTTTTGCCAAAAACAATGAAACATTTTAGGTCATTTGTGACGTCAT +ATTTTTTCCATGCAACTTTTTATTGGCTCCACACATCTTCCAAAAAGCCT +AAGCCTGAGCCTAATCCAATGTCTAAGCTTAAGCCTAAGGTTCAGCCTAA +GCGTACGCCTAAGCCCAAACCAAAGCCTAAGCCTAAGCCTAAGCTCAAGC +CTAAGCCTAAGCCTAAAAAAAACCGGAAAATCCTAAGCCTAAATAGCGTC +AGTAGCAAGCTAACGCTCGCCACTGACGCCAAACCTAAGTCTCGGCTTAA +GCCAAAGCCTAAGCCTAAGCCTAAACCCATGATTAAGCCCAATCCTAAGC +CTACGCCTAAGCTTGAACCTAAGCCTAAGCCTAAGCCTAAGCCTAAACCC +AAGCCTAAAATTTCTAGGCCACCACTAATATTTTCAAAACTCGATTCTCA +AATACTTCATTTTTTCAGGATATGAAAGTAGGACAGAAATGGGCAGGGTA +TATGGTATGGAGCAAATATCACAAGATGGCAGAAACATAGATACGGGGTG +GGAGTGTTGGACTAGGTGACTTTTCTAGGCCATCCCTACCTTATCACACG +CAATAGAAGTGTCCACAATGGCCACCGATCGAATCAGCGGTGGCAGCACC +ACAACCACCGACCAAATGATCCAGTGCAGAATCATAAATTTGTGGAGGGA +AAAATCGATAACAGTAGTGAGGGTAATAGTAGTAATAATACAAATATATA +TGGAAAAATGAATACAAAAAGATGTGTGCGCTGGTTGGGGTGGTGGCGTA +AGAATACAGGTGGGTGGGGGGTATAATTGGAGCGTGGCCGAGGAGGAGAA +TCGCAAAAAAAAGAAGGAAAGAAAGAGGATAGTAATGACATGTTACGGTA +GAAGGACAACTACCTAATTATATGAAAGAGGGGGGGGGGGCGACGAGAAG +AGGAAAAAGTCTTACGCAATTTGCTACTCGACTTTCAATTTCTGCTAGAA +ATCGAAAAAAAAATAGGAAACAAATTCCGACTCTATTCGATATCATCACT +TCCGGGGCACGGCACAGGTCGGATCAGATTCAACGACGTCTCGCGATCCC +GACCCTCTGATCTTACGGCCATCCCATTGTTCACATGATAGTCGATGAGC +TCGGAGATGTTGCTGAACGTGCGATCGCGGGTTCGGACTCTCTGGAATTT +TGAGAGGTTTGAAGGTTTGGCGGGGGGGGGGGGGGAGGGTCTTACGTTGT +GATTGTCCAGTAGGATGAGATGTTTGTGCTCGTTTTCAGCGGTTCGGCCA +CTCAGCACGTACTTGCCGGGCGTGTGGTCGCTTTGTCTGCAAGGAACTGG +ACAAGTTTGAGCGGCCGACAGCTTGCGGGTCTTTTTTTAGAGAAGGGGGA +CTGTGAGGTGTCCGCTGCTTCAGACAATCTTAGCATCATGCAAACCTTGC +ATCTTTTGTATTTCAGGAAAAAAAACGCAGAATTTTTTTTGAACAAAAAT +TTCAGCCTAAAACAAAAATCTAAAAAAATTTTGGGAGGATGAACTTATTA +TACCAAATTTTTCCCTGGTCTTTCAAGAATTATTTAAAGTCCCGTGGCTT +CTGAACATATTATCAAACAAATTACAGATATGAAATGTGACTGAAGCTTT +AAGTCAGAAATATATAAAAAAAGGATTTTTCATACTATTTTTTCAGTCAA +ATTTGATTTTTTTAGACTTTCAGATTTTTCTACGAAGTCTGTTAAAATAG +GCTTATATTAAAAAGCTATCCGATTTACTATTAGTAAAAAATTATAGCGA +CCGACATCTCGCAGGTTGTTAAAGAAAGGTGCATTGACCGGCTCACGCAA +CTTTGGCAGTTTATATCTCGGCTGGTTTTGGTTTTATTTAAAAAATTTCA +ATTGACGAAATATTTATCACTTTTTCTACTAAAAGTTTGCAGTTGAACAC +TTTTTGCTAGCAGCGATATTAACCGAGATATTAGCGGAAAAAGTTACATG +AGAAACATGGTGCATCGACAAGAAATATCTCACAAAACTTTGGCAGGTTA +TATCTCGGCTGGTTGTGGTTTTATTAAAAAATTGTTAATTAACAAAATGT +TTATCCTTTTTTCTACTACAACTTTGCAGTTCAACACTTTTTGCTAGCAG +CGATATAAACTGAGATATGATTGGAAAAAGTTACATGAGAAACATGGTGC +ATCGACAAGAAATATCTCACAAAACTTTGGCAGGTTATATCTCGGCTGGT +TGTGGTTTTATTGAAAAATTGTTAATTGACAAAATATTTATCCTTTTTTC +TTCTACAACTTTGCAGATGAACACTTTTTGCTAGCAGCGATATAAACCGA +GATATGATTGGAAAAAGTTACATGGAAGACATGATGCATTGATCAGCTAT +CATAGCTCATATTTTACTTATCTCAGGTTTTACCAAAAAATTGCCAATTA +TTAAAACATTAGCCTTTTTCTTCGAAAATCAAATTTTGCCCGAATTGCCG +AAAATATAGATTTGATATTTTTTTGTAGATTTAGGAGCCTAAACATTTTT +TAATTTTTTAAGTATTGACCGTTTGCTTTTAAAAATGACTAATCGAATAC +TGAAAACACAGTTAACTTTTTTTTCAATTTTCGAATTATAATTAGGTTTG +AAAATTGCCGAAAATGTCGGCATTCAGTTGGAATTTTGGCAATTGCCGAA +AATTTCCGATTCCCACGCGCCCCTGGTGTGAACCTAGTGAGCATGAAGCC +AAACGTTTTAGAAATTCAGCAAAATTCTAGCTTTTTTCTATAAAAAATCT +AAAAAACCTTACCGAACCAAAAAATCGCCTTCCGTCTTCAACAAAGCCTG +TGCATCCTCTCTGGATAAATTTCCATGATACCAAACTTTCCCCACAACAT +CCTCAGTTACAGTAGGTGCTCGGCAATTCGACACAAATGACTGTCGATGC +AGGGAACTTCGATGTCTCATCGGCGGCGCCGGCACCGTAAATCCATCGTT +AGAAGTTGAAGCATTCGCCTGTGCATCAAAAGTGTTCACATTGATCGCAG +TGGTAAGGACGTCGATGAGCTTGTGCACATCCTCGATGAACACCACGTGG +CATCGACGATCCGTTCCATCCTTGTTTTTTGCAATGTAGCCGAACATTAG +CTCCGTGTCCGTACCCTTTTGGCCTTGGGCTCCGAATGAGAAGAATGAGA +AGGGATGACGTTGAATTAGACGACTTTGTTTTATGATTTTGATCATTTTC +GAGGAAATGTTGATGTCTACGGGGAAGTTTTCTTTTTTCACTTCACCGAT +TACCTGAAAATAAGGATGGTTTACCTGGGCTTTTTCATCGACCCGTGAAG +TGTCGGCTGCAATAATTTTAGTCGAAATTAGTGATAATAACTTTTCTATG +AATAGCGGCTGACATCATTTAGCGCGATTTTTGATCGACCTGTAAGATGT +CGGTTGCTTTAGTACATTCTATAGTGGCAGGCTTTTGGCTGAATTTTACT +GGATATATAAGCTATCAAAATTTGAGCATGTCGATGCACCATGTAATTAT +CTCAGTTTATTTCGCTGCTATCAAAAAGTGTTAAAATGCAAAGTTGTAGT +AGAAAAAACGATAAATATTTTGTCAATTGACACTTTTTCAAAAAAAAACC +AAAACCAGCAGAGATATAAGTAGTCAAAGTTGAGTCAGATATTTCCAGTC +GATGCACCATGTAACCTTTCTCTAATAACCTGTGAGATGTCGGTTGCTAT +AATTTTTTAGTCATTAATTTGGACAGCTTGTTAATTTGAAATAATTCTTG +ATGATATCGTAGCGCGGCCGACATTTCACGGGTCCTTTTTTTTAAACATT +TTGCAAAGCTGGTCATGCATTTTTGGTAACACTTAATATCAAATTAATGA +GAAAATGTTTTGTAAATTTTTACCATTAGCCATGCCCAGAAAATGGCAAG +TTTTTTATTTTATTTTCTTTTGTTTTTTTAATTCACAAAAAAAAACTCAC +CCTACTAACAACTGGATTGATTTCTCGAGCCGCAGTCACGCCAACAGTAG +CTGCAACGTGTTGTATACATTCTCTGAAAATTGAACAGTTACTTTATTAC +GTAGATTTTTTATATATTTGAGCTTGCAGATGTTACCTGCCTTCATGCCT +GCCTACCGCCTGATTTCTAAATTTTTGAATAAGATTTTTTTTTGATTTTA +AGCCATAAGAAAAACTAACGAAACAACTTGAACTCTCATCTCGGAAACCA +TCACGTTAATTGACTCGACTACTGGAACACTTCCGAGGTACTGAAAGACA +ATTTTATATTTTCGGGGAAAAAACTAATAAAAAACTCACAGTAGCCGACA +GAGAAACCCCGCTGCTCCGCAGCTCCTCTGCAAATGAGGGCTCCACGTTG +AGCATCGCCGCAAGTTGATTTTTCGACGGAATTTTTGATTATTTTTAGAA +AAAATCTAACTCGAAATTCGTTACAACCGAAATTTTGCAAGATTGAGTGA +ATGAGGAATGACCGGGAGAGAAACGGAGACGCAGGCGGTGAGCGGAGTGG +GAGAGAGAGCGCGCGAGACGGAGAGGCGCAGGCAGTGGTAGGGAGGAGAC +AGGGAAAGATTAGATTGGTGTTGACTGGGTGAGTCAGCATTCTAATAAGT +TAAGTTGACTCATAACTTTGTTTTTTGATCAGAGGGGGGGGGGGGGGGCC +GTGGCCTAGAAATATCATTGCCTACTTATTACGTCACTAATGGTCTAGGA +AACCTTGATAGCTTTTTTGGAGAAGATATGACGTCATTGGTGGACTAGAA +ATCTTATTTTAGTTCTGGAAGAGAAATATTTTGACCGTGGAAGACAAATT +AATGATGTGACCGGTGGCCTAGAAGTTTCAAATTATACCTGGGGAAATAA +ATGACGCCACTGGTGGCCTAGAAAACTTAAACTATGCCTGAAGACGTCAC +CGGTGACCTAGAAATTTAATTCTGGCGACGTCATAGAAATTTGAATTTTG +ATGAGAAATATAATTTTATTGGTTTTCTGAAAATTGAATTTTATTGATTT +TTTGGCAACCGGGAAATTGGAAAATTTAATAAATTGAACAAATGCAATAA +AATAAGCAGTTTTTCAACAGTCATTACTTTTTTCCGTGAACCGTTTTGAT +TTTAATTTGTAAATCTAGGTCGGAAAAGTCACACACACCGAGGAGCTAGG +CTTCCCTCATCCTCCCTGGATCAGTCGGTAACTGCTTCGACTGGGGAGTG +GGAGTGGGAGGGGTGGGATGAGTTCAATTCTTCTGGAATTCCATAATTTT +TATTTTTGTTTTTTTGCGCCTACATATACCTGTGAAAATTGGAACTCGAC +ACAAAAACCTCTCTAGCGCGGCTTAAATTTACTTCAATTCTTCCGGAATT +CCATAATTTTTATTTTTGTTTTTTTGCGCCTACATATACCTGTGAAAATT +GGAACTCGACACAAAAACCTCTCTAGCGCGGCTTAAATTTACTTCAATTC +TTCCGGAATTCCATAATTTTTATTTTTGTTTTTTTGCGCCTACATATACC +TGTGAAAATTGGAACTCGACACAAAAACCTCTCTAGCGCGGCTTAAATTT +ACTTTCCTGTCCCAAAAACTCATCTATAATATTTTTAAAAAATCTAGAAT +CCAAATCTATCAGATTTTTTTGAATTTTTGATTTTTTTCACATCGAGATT +CGGAATCAGCGGAAAATTTGGAGTCAATTAAAAATATTTTCCAGATTTCG +ATACTCCACATTTAAAGGAGATCACCTGAAACGCCAGAATATGATACTTC +ATACTCTCCATCTTCTCGCTTTTGAAAATAATCAAAAAAAATAGTCGCCG +TATTTATTGATATAGAAAAAATACCAACAAAATTGTAACTATTGATTAGC +CATACAGGGTCTTTAAAGTCAACATTCATAGTGGGAGCTGGAAATAAAAA +TATATAATAACTTGTTAAAAGAATTTATTGTGGAGTTATAACACCTATAG +ACAATTAAAATCACCTGCTGCCTAAAGAGTATTTACCAGAAGCAAAGGTT +TTCTGAATTTAGTGGAGTTTGTCCATTTACTTTTTTGCATGTTATTGAAT +TGAAATGATACATAGTGAAACTAGAGGTAAAATTATTTACTAGGTTTAGG +CTTAGGCTCAGGATTAGGCTTAAGCTTAGGCTTAGTTTTAGGTTTAAGCT +TAGGCTTAGAATTACCGATAACTGATAACTTTTGCAAATTTAGTTTCTGT +AATATCGGCAATAGCCAAAATTGCCGGAAATTTCTGCGATTCGAATAAAT +AAATTGAAATAAAAATTAGTTCCTAAGATACATGTTATCTTTTTGCAGGA +AAGTAGTAAACTCATGAAATGAGAAATTGCGATGAGGAAGGAAACATTCC +TATGGTTCCTTAGACAATTTCTCACCTTACATTAGTATATAATATATAAA +TGATGAATTTGACTTTTTGAAATTCTCTTCAATAAATACATTTAGTTGTT +TTTTTTTTAATGAGTAAAGTTGCAGAGTGTGGAATTTGCTATGAGGAATA +TGGTGAATATTGTTGGAAATTTTTCTGAAAGTTTTCATTTTTTTTGGAAA +TTTAGAGAGTTGGGTGTAATTACTGAAAATTATGTTTTAAAAAAATTATT +TTAATGATTTTTCAAATTTTACAAAAAACTCATTTTTAAATCATTAGAGG +TGAAGTAGCGCCAGTGGGAAGATTGTTAAGATCCACTCCTTTAATCCGAA +AAGTACCGAATATCATAATAAAACTTTTCAAAAGTTTTTAGAAACTTTTT +TATTTACTGTCAAAAAGTGACAACCACTCAGTTTTTGCCACTCATAATTT +TGGAAGTCGACCAAAAAAAAAATTTGTTTTCCTACATTTTTTATACTTCA +ATTTTGTTTTAATTATTTGTATTGAAAAATTGTAGGGGTCGAAACATGCG +ACAATCCTGTCAGTTTTCAAAAATCTCTACTTACAAAATATAGGACACGC +TCCAAAAATGGATGTTTTTACAGACAACGCCATCCGAATCCCTTGCATCG +GCACATGCGGTCATACGTTATGTGACAGTTGTCGTGAGAAAATTAGAGAT +AAAAAATGTCCTCAGTGTAATCAGAAAGGCGCATTTGTTGTGAAAAATGT +GAATAAACAATTTTGGGATTTAATTGTTTTTTCGCAATTTTTAAAACCCC +AGGAAGTGGAAGTTGAGGAGACTGAAGAGGTTGGTTAGAACTTTTTTTTT +GGATTTTTCGGTATTTCCGGGAGGTTATCCTTTGGGCCTAAAACTAGGCC +TAAGAGTAGGCCTAAGCCTAAGCCTAAAACTAAGCCTTAGCCTAAAACTA +AGCATGAGCCTGAGCCTAAGCCTAAGTCAAGCCTAAGCCTAAGTCTGCTG +CTATCAAAAAGTGTTAAAATGCAAAGTTGTAGTAGAAAAAACGATAAATA +TTTTGTCAATTGACACTTTTTCAAAAAAAAACCAAAACCAGCAGAGATAT +AAGTAGTCAAAGTTGAGTCAGATATTTCCGGTCGATGCACCATGTAACCT +TTCTCTAACAACCTGTGAGATGTCGGTTGCTATAATTTTTTAGTCATTAA +TTTGGACAGCTTGTTAATTTGAAATAATTCTTGAAGATATCGTAGCGCGG +CCGACATTTCACGGGTCCTTTTTTTTTAACATTTTGCAAAGCTGGTCATG +CATTTTTGGTAACACTTAATATCAAATTAATGAGAAAATGTTTTGTAAAT +TTTTACCATTAGCCATGCCCAGAAAATGGCAAGTTTTTTATTTTATTTTC +TTTTGTTTTTTTAATTCACAAAAAAAAACTCACCCTACTAACAACTGGAT +TGATTTCTCGAGCCGCAGTCACGCCAACAGTAGCTGCAACGTGTTGTATA +CATTCTCTGAAAATTGAACAGTTACTTTATTACGTAGATTTTTTATATAT +TTGAGCTTGCGATTGTGTAATCAAAAATAAACATTTGAAGCATGATACGA +AATATTTGAACGATTTTTTTTCGGAAAATCAATAAATGAAAATGGAGTTT +TTTTTTTGGATTTTAGCAAACTTCCCAAATGACGGAAAGATCATATGAAG +CGATTATTTTTTCTTTTATAAACCTGTTCAGGATTTGCATTATTATGAGA +AAATCCTAGGCCAATTAGGCTAGCATGTACGGAAGTATTTTTTAAAAATT +TATAAAAGTAATGAAAAGCTGATATTTTCAAAATTCCAGAATGACGGAAA +AATCATATGAAATGCTTTTTTTTTCTTTTTTAAACCTATTTAGAATTGTC +AAAACTACCAGAAAATACTAAACAAAATAGCTTAGCTTGACCGGAAATAT +TTTTAAAAAATTGATAAAAATAATGAAAAGCTGATATTTTCAAAATTCCA +GAATGACGGAAAAATCATATGAAGCGATTCTTTTTTCTTTTATAAACCTC +TTTAGAATTGTCAAAACTACCAGAAAATACTAAACAAAGTAGGTTAGCTT +AACCGGAAGTAATTTTTACAAATTGATAAAAATAATAAAAAGATGATTTT +TTCAAAATTCCAAAATGACTGAAAAATCATATGAAGCGATTCGTTTTTCT +TTCATAAAATAGTTTAGAATTGGAATAATGATCAGAAAATACTAAACAAA +GTAGGTTAGCTTGACCGGAAGTATTTTTAAAAAATTGATTAAAAAAACTG +ATTTTTAAAAAATTCCAGAATGACGGAAAAATCATATGAAGCGATTCTTT +TTTCTTTTATAAAGTAGTTCAGAATTAGCATTATTATGAGAAAATCCTAG +GCGAATAAGGCTAGCATGTACGAAAGTATTATTTAAAAATTTATAAAAGT +AATGAAAAGCTGATTTTTTAAAAATTCCAGAATGACGGAAAAATCATATG +AAGCGATTCTTTTTTCTTTTATAAACCTGTTCAGAATTGTCAAAACTATT +AGAAAATACTGAACAAAATAGGTTAGTCCTAGGAAAGACTAGTTAGGAAA +GAATTCTTAAAATAGGAAGTTGAACAGAAAATCTGGAAGATCTGAAATCT +GGAAAAAGGAACTCTCAAAAATTCAAATTTTGATTTTAGGAATATATGGC +TGTGTAGTCTTCATAGACAATCAAAATAAATTATTTTAATTTTCAACCAT +CTTTATGCACGGGCTTCTGTCCTTCCTCATTGAATTTTTCGCGCTCCATT +GACAATCGCCTGCCGGACAACGCGTGGGAAAGTCGTGTACTGCACACGGA +CAAATACATTTAGTTTTACAACTGAAATCGAGCCGCGACGCGACACGCAA +CGCGCCGTAAATCTACCCCAGATAAAGCCGAGCGAAATGACCTAGTTCGG +CAAACTCTTCCATTTCAATTTATGAGGAAAGCCAGAAACCCGTTTTTATG +TAAAAGTTCATACTGTGCAGTTAATTTAAACAAATCAAAATTTGGAAAAT +GATTTTTGGCTTTGTGCAAACTGTAGCGTTTTAGGCGTTAAAAATCTGCC +AATGAATAAAAAATTTCTTATAAAATTTTTAAAAAAACAGTTATTTGCCC +ATTCCGTTTTTCAACATTTATCAATTACGGTTCCAGTGATCAAATGCTTA +CACGGAAGTCACTGATGACGTTGACTCCATGTTTGGTTGCGATGTGAAAA +AGGTCGTTTTTTGCCCAAATCGAGAAAGGTCACACACACACATAGACATA +CACAAAACCTTATATAATTTGTAAGATATTTTGTTTGATTCGAGAAAAAA +AAGAGAACAGGTATTCCTGCGGCGTCGTTTTTCTAAACGGCAGGAAAAAT +TGAGAGAAAGAAAGAGAAAGAAAAAAGATGGAAAAAAACAGTTTAAATAT +AATTATTAGGTTTAATTATTAGGGGTGCACCATATTGTCGTAGGGGCCCT +TCTCCAAAGTTTTAAGTTTATCGATAGAGCGTGAATACATAAAGAGAGAA +ATTTGACCTGCAAAAAAAATATATGGTTGGTTCCCATGCCATAAAAAATC +CATGCAACAAATGAAAACAACACCTGCCAAGTGTCCTCCGCCAGTCTAAC +CAAGTTTTTCCCTCCAAATGTTCCGTGACATTGTTGTTTAAGGCTTTGTA +CTTTCCGCAGCCGTCTTATTCTTCCCGAAGTGTTCTCCCTAGATAATCGG +GTTTTTTTGTTGAATCGGCTGGGGGTAATGATCGGAGAAGATATATATGA +AATAAATGAATAAAAAAGGAACACCTTTTGGAGAGGCAATGAAAAAGCAA +GCTATGCAAAGTATTGATCCGTTGTTATGAATGGGCTTGCTTAGGCTTAG +GTTTTTTTTAGGTTTAGGCTTAGGCTTAGGCTTAGGCTTAAGCTTAGGCT +TAGGCTTAGGCTTAGGCTTAGGCTCAGGTTTAGGCTTAGGCTTAGGCTTA +GGCTAAATCTAGGCTTTGGCTTAGGCTCAGGCTTGTGCTTAGGCTTAGAC +TTAGGCTTAAGCGTAGGCTTAGGCTTAGGTTAGGCTTAAGCTTAGACTTA +GACTTAGACTTAGGCTTAGGCTTAGGCTTAGGCTTAGGTTTAGGCTTAAG +CATATACCTAGGCTTAGCCTTAGGCTTAGTCTTAGGCTTAGGCATATGTT +TAGGCTTATGCTTTGGCTTAGGCTTAGACCGTGTGTATATCCGAATGTGT +GTTTTATTATGTTCCTGATTCTGAAATCATTTGTTAGTTTAATATCTACA +AACATTCCAAACAGTCTGTATGTGCTGCCAGGTGGCATTTCCTTAATCAT +CTCAAATATATTTGGTTTCATTTCCCCCCATCGCAAAATCCCTTTCTTTA +CCTGTCATTTCGCCTTTACGTTCAACTTATTTGATTTTGTTGTTTTCAAG +TGATAATCACGATTAAACAACGAATGAAAAAAATAGGTATGGAATTACTA +CGGTATTCTTTTTTGCCTATCCTTCTTTCTCTCATTTTCCTTTTGTCTTC +TAACAAGTTCTGCACTTTTGAGGTACATTTCTTCTCCGATTTGATCAGTC +AAAGCTATTCGGTTTAAACCATCTTCTATTCAATTCTGATGATTACTGTT +AGGAACGAGTGGTCACTTAATTCAAAAGGTCGAACTGTTGTACTAATTGA +ATTGGAAAATTTGTAGGTTATTTGACGTTTATCAGAAATTGAAGATTTGA +TATAAAATTTGGTAAAAATGAAAAATAAAGTTTCATTTAGAGTAGTATTC +TGGCTTCCCTTATAAATTGAAATGGAAGAGTTGGCCATATTGGTTCGGCC +ATATCTGGGGTAGATTTACGGTGCGTTGCGTGTCGCGTCGCGGCTCGATT +TTAGTTGTAAAACTAAATGTATTTGTCCGTGCGGAGTACACGACTTTCCC +ACGCGTGGTCCGGAAGGCGATTGTCAATGGAGCGCGAAAAATTCAATGAA +GAAGGCCAGAACTTCGTGTAGTATTAGCAGACCAGGTTTTCACATAAATT +TGAGCGGTGAAACTAAAATTATGACATTTTTACGCAACTTTATGGGCCAG +AAAATTTTTTTTGGAATTTTTTTTTGAAAAAAATTCAAAAATTTTGATAA +AAATAATTGCCAAAAAGACAAATTTTGAGTAATTTTACTGCCTCTAAGAG +TAATGAACAAATGTTTCCAACTTTTTGCAAATTACTTTCTTAGTTCCCTT +GCAAATCTATATAGATTGAATTTGGCCTAAAACCTAACAATTTAGCTGCA +ATATTGTTACAAAAATTTGGGTACAATAGTGTGTTTATTGAAACATGTGT +TCTTGAAGTTGGGTAAGAACAATTGATGAAGGTTGTACCAAATGAACAAA +AAATGTTTTTAGAGAAAGTCTGAAACATTACCTTATTTCTGATGGTCGTT +AATCAAATTTTACTGTAACTGTTATGAAACCTTGGGCATCGTATTTTCTC +TATTAGTGCAGCCGACCACTTTACTTTGAGACGTTATATCTTGTTTATTT +TAAAGATCTCAATATTTTAATAACTACAAAATCATAGAAAAAGAACCAAC +AAAAATTTTGTTAGTTGACAATTTTTAAATAGGACAAACGATAACTGAGC +TATGAGCTGTCAAAGTTGAACAATGGTGGTGCAATAGAAGGAACTAATAG +AAAAAATACGGTAACCTAAATTTTTATGTTAAAATAAGAAGTAGCGGGTT +GATTCCAGAAGTTTTTTTTATAAGGTAACCATCCGCTGACCGCGCCTCCG +GTATCCCTTCAAAAGGCAAAAAGTAAACTTTTCTAAAACTACAGTAATCC +TACAGTAGTCCTACAGTACACCTACAGTACCTCTACAGTATCTTGACGTG +ATCCTCTACCTACTCCCAACCCAGTACCTCTTCAAAGGCTAAAAAGTCAA +AATTTCACAAACTACAGTAATCTTACCGTACTCCTACAGTAACCCTACTG +AACCATTACAGTAATTTGACATTATTCGCCACCAACTCCAAACCCAATAC +CTCTTCAAAAGCTAAAAAGTCAAAAGTTCACAAACTACAGTAATCCTACC +GTACTCCTACAGTACTCATACAGTACTACTACAGCACCCCGAACATATCC +CTCCACTAACCCCAAGTCAATATCACTTTAAATGACGAAAAGTCAATTTT +TCCAAAACTACAGTAATCCTACAGAACTCCTACAGTACCCCTACAGTACT +ACTACAGTACCCTGACTATGTCTTCCACTAACTCCAAACCAATATCCCTT +CAAAAGACGAAAAGTTAATTTTTTCAAAACTACAGTTATCCTACAGTACT +CCTACAGTACCCCTACAGTACTACTACAGTACCCTGACTATGTCCTCCAC +TAACTTCAAACCAATATCCCTTCAAAAGACGAAAAGTCAATTTTTCTTTT +TTTTAATTGAAATTGTCCGTTATTCCCTATTATGTAATTTTCCAAAAATG +CTGCCAACCTCAAAAACTGTTATGTTTTTGTTCATACACATTTAGCCACC +ACCTGCATATGACCCATAAAAAACCATACTTGACACCTGACATACTTGGT +TTCGATACAAAAATCGAGAACATTTGGCAAACCTGAGTTGTCATTCTATT +GGCTATTCACCATGTGCTCTAATAAAGTAGAACTTGGGAATATGTGTCAA +GGCTGTAAATGGGGGAGTAGTATTGTTAACATATGTACTGTGCAATATTC +CAGATGCCAAGTATACAGATTATCTAATGACAATAGGTAATTTGATTTTT +CAGGTGCTATAGTGCTATATTGTTCCAAATACATGATTTTTTTTTAGAAA +ATTGAGGTGTGGTGTCGAATGTCCCATAATTTTGGATAATCAATGGTCTT +TAGCGAAATCCTAATTTGAAGCCGTTTAAAGCTCGAAAGTGCAATACATT +TACAACTGCATTAGCGTTTTATTTTGCTCTGAAAATGACAAATACACAAA +ATTTTGGATTAAACCGCCTGGTTTTAAAACCGAGCTTAAACAATTTTAAT +TTTACAAATTAAAAAGTAAAAATTTTCTAAATTCCCTACACTAAAATTTA +GCGTCCACATCACTTTCTACCCCTTAATTAATTTTTTACTACATTTAACA +AAAATATTTTGAACCGGCAATTGTCAAAATTGCCTATTCCCAAAAATTTC +CAAAACCGGCAATTGCCAAAAATTTTCAAAGCCGGCAATTGTCAAAATGG +CAGCAAACTTCCAAAACCGGCAATTGCAGAAATTGCCTATTGCCAAAAAT +TTCCAAAACCGGCAGTTGCCGAAATTGCCGAAACTGCCGCAAACTCCCAA +GACCGGCAATTGCCGAAATTGCCGGTTGCCGAAAATTTTCGAAACCGGCA +GTTGCCAAAATTGCCGCAAACTTCCAAAACCGGCAATAGCCTAAATTGCC +GATTGCCAAAAACTTCCAAAACCGGCAATTGCCGAAATTGCCGATTGCCA +AAAATTTTCAAAAAAATTTCAGAAATTGATACATTTTTTAATTGTTGTGG +GAATTTTTTTTAATAAATTTGAAACATATCACATCCTCCTGTTTTCAAGA +AAATTAATTCCTTCAGTTTCTGACGACTCAAATCCTCCTCAAATGCCAAA +TTTATTTTGGGAAACTATACCAATTCTCAACAGAATTTTAAAGGTGGAGT +ATTGCCATTTTGTCTAAGTACACTTATAATAATCCAAAACAACCGAATAT +CATAGAAAAACACTCGAAAAAAAATTTTTTTGGTCGACTTCCAAAATTAT +GAGTGGCAAAAACTGTGTAATTGCCACTTTTTGACAGTAAATTAAAAAAT +TTCAAAAAAATTTTGGAACGTTTTATTATGATATTCGGTTATTTTGGGAC +CAAGTAAGTGGTTTACTCCACCTTTAAAAATTTAGCCGTTATACAAACTC +CAAATATATCCATTATTCTTTCCCCTATACCCTACCCATAACAAAACAGA +AAAATTTCAAATTCGCATCGTCGCAACACCACAGCATGTCCATTTGGAGT +GATGTTCCGAGAGAAAGGTCAAATCGTCTACCTCCTTATTCTAACCATCT +CTATGCCTATATATGAGGCTTTAATGAACGAGCGCCGCCGCCGCGCGCCT +AGCGCCTCCGAGTGTCGCAAAGTGCAACTTGTTTGCCACTTAACCTGAAT +TCTACTGTGTTGTATTATGTGAAGAATCGGATAGATGTAATGTGTAACAA +TAAAAAACCACGACGACGACTGAATATGGAATCGGCTAGGTGGTGCAAAA +GTTTCGGAAAGTTTTGGTACCATGGTACCTGTGTAAGCTCTGTGAATTTT +TGAGCCACGATGGGTCTTGCAACGATATTTCGTCGAAACTTTATTCTCGA +AAGTTTGTTCAATTTAAATTTTCCGCAAAAAACATTTTTTGAAAATATTT +TTAAAAATTTGAATTTCTCAGCATTTTTTTTTTAATATGAAGTTTCCAAT +CCCTCAAAAAAAAATTTCAGAGAACATTTGAATTTCCCGGAAACCCCAAT +TTCTGAGAATTTTTCAATTTCCCTTTAAAATAAAATTTCGGAGAAAATTT +GAAATTCCCGCCAAAAATTTATTTTGAAATATTTGAATTTCCCGGCAAAT +ATTTTTTTTTTTGAAAACTTTGATTTAACGGAAAACCCATTTTCTGAGAA +TTTTTGAATTTCCAGTTTAAAGAAATTTCCACGGGTTTCTGGCTTCCCTC +ATAATTTGAAATGGAAGAGTTTGCCAAACTAGGAAATTTTGGTTCGGCCA +TATCTGGGGTAGATTTACGGCGCGTTGCGTGTCGCGTCGCGGCTCGTTTT +TAGTTTTAAAACTAAATGTATTTGTCCGTGTGGAGTACACGACTTTCCCA +CGCGTTGTCCGGCAGGCGATTATCAATGGAGCGCGAAAAATTCAATGAGA +GGAGGCCAGAACCCCGTGAATTCCAGCAAAAAACTTTTTGAAACTATTTG +AAATTCCCACAAAAAAACAGTTTTGCTGAGAATGTTTGAATTTCCCCCAA +AAATTTGACGGAATACTTGAAATTCAAAAAATGTTGTGAGAATATTTGAA +TTTCACGGAAAACCTGTTTTCTGAGAGTTTTTGAAAATCCCTCAAAAAAA +ATTTCGGAGAAAATTTGAATTTCCCTCAAAAAATTTCGAGAAATATTTTA +AATTACCGGCATATTTGACATTTTGAACTCCCCACAAAAAAATTTTTAAA +AATATTCGAAATCCCGGAATAATTTTTTTCACTATAGTCAGAAAAGATCA +ACTGACTAACTTTTCACAAACTACAGTACCCCGAGCATAACCCCCATTAG +TTTCCGATAGCTACAGTAATCCTACAGTACTCCTACAGTCCCCCTTCATA +ACGTTACCCTGATTACTAAAAAATGCCCTTTTAATGCGTATTATCACATC +TCACATGATTAAGACCAAAAGGGGTGTCCTTCAGATAAATACCCGTTCAA +TTTTTTTTACTGCAAACCAGTAGACTTATCTTTTAGGTTATGAAGAGAAG +ACTAAGTGTAAACAAGGTCGTTGGAAGTGATATGTTAAGAAGTTAACGGC +AAAATCGATGCCAAAACTTTCTGAGCTTTGACTTCTACTGCTCAGCTCAT +TCCATAACCATGAGCTCATCAACTTGCCCTCCCCCCCCCCCCACTATCTT +CCACCACACTTTCATTCCTCATTGTCGCAACAAACCCATCTCCGGATTCG +TCCTTTATATGTGTCACAACACAGGAGTACTCCTTAGCGCGCCAACTTGT +AATACCCCCAGCGACTTGGCCCAGAAACTTTCCTCTAATCGCATCCACAA +TATGGTCTGCAAGTACAAGATTAGCACCCCCCATTGTATGTCAATTTGTC +TAGTTTTTTATTCCATTGAACTATTCCACAATGTAGAAAATTGGGGTTAT +GGTACATCGAAACATGGTGCATCACATAATGGTGCGCCCAAATCATGGTG +CATCGTCAAAAATAAGAATCCGAGTCCTCCCAGACATCCATGTAGGTATG +CACTTTCCCGCCTGTTCCGCATCTAACCAACGATTAACTGAATCTTTACG +ACCCTCTCTCCTCCGAATGATGTTGAAAAATGTCGCGCCAGCACCTTTGG +AGGGGGGGGGGGGGGGGTTAAAAAGATTCCAATCTACCACCTATCTTCTC +CATTTATCTTCAATTTTGCCCCCCGCGCTCTTAGATTAAATGAACTTTTT +TGGCTAAATCACAGCACTTTTTATTAAATCTAGGAACTGCATCTAAACAA +TGCATGTGGTTACAGTTACCGAAAAATGTTGAAAATTTGGGAAAAGTGCC +AAAATGTTGAATCAAGTCAGAGAGATTCGAAGACGCTTCTAGCAATTTTT +TGACAGTTCCTGTCCCCTTTCGGTAGATTTGCGGCAATTCTCGGCAGATT +TACACCCGCTTTTGGCGGTTTTTCGGCAACTTTTAGCAATTTTGGCAATA +CTTGAAAACTTCTATTTAGCAACTTTTAGTCAATTTTCTGTAATTTCTGA +GAACTTCCGATTTTCTTTTACATTTTTTTCCGATGCACCATGTCCCGAAC +GCTCGCGCCACGACTTTTTTATTCATTATAGCAGGTCCTGTTTTGATGCA +CCATTTCCCAAAGAGCCCTGCAATGCTGGCTCATGTCACGATGTACCATG +TCCTGAAATGTCCCACCACGTTCCGCAACGAGGTTTTACTGCCAAATTTT +ATGCAATTCCAAAATATTTTTCGACAATTTTGTTAGCTTTTCCCTTTCAA +AAAAGTTCAAAATTCTGTTCCAAAACCACACCAAAAAGTCGAAGATCCGC +AAAATCTGCACTTGAAACACTATTTGACAACCCCCGTCTCAGGCTATCAA +AAATGCATGCAGTCATATATATGTGCAGCTTGTGAAACAAAAAACGAGAG +GGATCAGGGGCTCAGTGCTCAAGGTAGATGTGCAACTCTATGAAATTGAT +CGGTTATGCATGGCAAGTAAGAGTGACAGGGAGCGAAGTATGATGATGAT +GATGATGATGGTGTGGAGAGAGCGCAACGTATGAGTTATTGAATTTTCGA +GAAAGGTTATCTTTGATGGGGGGGGGGGGGGGGGGGGGCAGTTCATGGTG +CATCGAAATATAAAAACTTTCGTGGAGATACGCTTTTTGAAGAATTTTGT +GACACCTTCTTGGTGCATCGAGACAGTTTTCGTGGCAAGACCTGATTTGG +TTGAAACTCTGAAGCCAAATCGCGCTCTACGATTTTGTGACCAGGCCAAG +GTTTTGGTGGCATGGTGCATCATAGTAAAAATTCAAATCCTGGGAAAAAA +AGCTCTATATATAATATCCAGCCATGGACCGTACCTCCGGCGTGGCCCCC +TTCTGGAGCTAAAAACTAATTTTTCTAAAACTACCGTAACCCTACAGTAT +TCCTACCGTACCACTATTGTACCACTACAGTACCCCGACTATATCCGTAC +ACTAACCCCAACTCACTATTTATCCAGAAGCCAAAATTTCACAGACTACA +AAGACTACATAGACTACAAACTATGAACACACTGAATAAGCGCTTCATAT +ATATAGTGAATGATGGTGAGATCCATGTGACATGGTGCATTGTGACAGGC +ATTTTGGGAAGGGGGCTTTGAGCACAAAACAAAATCTTTAGTTATTTTTC +CGACAATTTCCAATAAAGTGAAGTTTAACAAGGTCGTGGCGTACTTGTTT +CCAGTTTCCTCACGAGTGTTGCATATTATACCGCCAGTAATCATTGCGTT +ATCACATAGTTGACAACGGTTTGATAATGTCACAAAGTGTGGGGGTTTTG +GAAGTGGCATCAAACCAGCATACGATTATTAGGACGTGATGAACTTATTT +TACCTGAACATATTATTAGGTGGTGGAGCTGGGAGCTTAGTATTTGGGTT +TTGAAACAATGCACCATGTCCTAAAAGATGCTTTATTGCTAGGTAGGGTG +CCGCGACTTTTGTATTCATTATAACAGGTTCAGTTCCGATGCATCTGCAT +TTTTTTAATACTGTTCTTATTCTTAGCTCAATAATTCAGGCTGAGGGCCG +CAGGCCCGAACACTTTTGGAAACTGTGGGCCAGCTTTAATCCTATTCCAA +GCAAATTGTTATTCGCTCAAGCCAACTAACCAGATTTCCTCAAACTTTAA +TCGCTCATATCTCAGCTCAAAACCATAGCAGAGACTTCGTTTTTTAATCA +ATCGATCCCAAATTTTAAGGAAATCTCGAATATCCCTATCTCAGGTTTGA +GGCGAAGCCGAGAACCTGCGCAAGCTACCTTTTTGGGCATCCCCATTCCC +GAAAGGCTGGCGCGGCCCTGCCGCGCTAGTCATTTGCCAAAACGTCAAAT +TGGGTCATGTCACGATCCATAATGCCTTGAAGGGTTCTTCTGCAACAGTC +CTAAAAGAAACTTAGCACTCTGGTTGAGGTATAGATGCACCATGTCCCAA +AGGGTCCTGCCATGTTGGGACATGTCACGATGCACCATGTCCTGAAAGAT +CCACGCTCGAAAACTACATTGTTCCGAGTGTTTACCTCTATTTCAATACG +AAATTTCAGCAAATTTTCATCCAAAATCATGTCATTCTAACAAGAGCCTA +TCATGTTGACTATAGGGTCGTGACGACCTGCTCGAGTTCGCCTCGCTCTC +CTGTTATCAGAATAGCTATGCGTTAAGTACGCGATGTATTGACAGTCACG +ATTTAACGCTTCAGTTTTGTAGTTTTGCTGGAAATTTTTGAGTTTTTCGT +TTTTTCTTGGCTCAAATCGAGTTTTGCTTTTTTGACGATTAAATAATTTA +TTCAAACTTAGCGCATTTTTTGACTGATTGTGAGACATGATGCATCGTGA +CAGGGCTTAATCGTGGCGAGACCCTCTAGTACAAAATTCTTTTAAATATC +AGTAAAAGCATTATAATCCATGATTCTATACTTATTTTATGGAATTTTCA +GCCTTAAAAAAATCTGAAATTTACGATGCTAAAATCTACTCAAATTATAG +TTTCAATCCAAAATTTCTCAGAAAAACTAAAATTAGCATAAAAATGTTTT +GTCTGAAATAGCCCTAGATTCAAATATCCCTAACACCCCAAAAAACACCA +ATGTACATCTCCTCTCCGTAAATATGTAGTTTTACAACCGCCTCTCTGTT +GACTGGGAGCCATAAATGTAGATATATTATCTTTCCAGATCGCCTGCCGG +CCCCTCCCACCTGATTGTAAAACTGCTCTTTGCTGTGGCGTTTTATCGAC +TTGTTTTTCTTATATATTCCATTCTACATACCTTAACACATTTTCTTGTA +TATTTTCTATACACTTTTTTAACTATAAAAAAATGCAACTCCTTTTTCTT +CGATTTCTTTTTATGCAAATTTTTTGGCTTAAAGGTGGAGTAGCACCAGT +GGGGAAATTGTTAAAAACCACTCCTTTGGTCCGAAAAGTACCGAATATCA +TAATAAAACTTTTCAATATTTTTTTGAAATTTTTTTATTCACAGTCAAAA +AGTGACAATTACTCAGTTTTTGCCACTCATAATTTTGGAAGTCGACCAAA +ATGTTTTTTCTAGATTTTTTATAATGTAATTTTGTTTTAATTATTTGTAT +TAAAACATTGTAGGACAAGACAGGGCCGCAACATAAGATATTGCTTTGAA +TTTCCTCAAAAGCTCATATTTTTCAGAATTTTGGCAATTTGCCAAAACTT +GGTATTTAAAAATTGTTAGAGATAACTGTATTTAGACAAAACATAGAACA +TTTGTATTTCAAAATAGTTCAAAATTCTAGTTTTTGAAATTTTGCGGCCA +GACTGACCCCCACGATGCACCATGACAAATTGCAATTTTTTTTTCAAAAA +TTGTTTTCCAAAGGGGCGTGTTCCAAACATCTGACCCAAGTTTTCCCGTA +GTTCTTCTCTCCCCCAGCGGGGTAAACACTGCACTTTACGACTCCGCCCA +GTGCTTTTTTTTTCGCAGAAACTGAGAGAGAAAGAAACATCTGCGTCTCT +CTCCCCCAGTACGCTCGTTCCCGCCCCCCACCTCGATCTGCCAGACCACA +CCGACCTGATTCGAGATTCCTTTTCTTATATGTATATGTTTTTGGAGAGG +TGATCTTGTTTTTTTTGTTTCTCCTTTGGTTTTGTTCTCTATGCTCTGAA +CTTTGGTCTCCTACTTTTCTACAGTTTTTTTTTAAATATCCAAATTTTTG +AAATAGAGGGTTGATCTACATGACCCCCATGCTTAGTCTACTATAATGCT +ATATCTGATGTTCCTCCGAATGGCCTGGTAGGTGAAGGCTTTTAGCCCAG +TGAGCCGGCTTACCAAGTCTAGGACCATGGTTCTTACAACTTAAGGCCCA +AAAAAAGCAGGCATCTGGCCTAACTTCCCCCTCACTAAATCTGTAGTAAT +ATATAATTTCTATCATACTCCATCTTTTTTTTCTACCTCTTATCATGTTG +TTATTGAGCAGCAACGCCCTTATATTGTTTGGTATATATATTTGATATAA +AACCTCGTAGTAAATTTTATTTTACACAACAACATTTATTATTTACTTTT +TTCATACTATTTTTTTTTTGCAGAACGAAAAAAAAATGCTATTCACAATT +GAGCAACTGGAGCTCATCAAAAAGCTGCAGCACACCGGGATGTCATCCGA +TCAGTTGCTCAAGGCTTTCGGCGAGCTGGAAGTTCCAGAGCAGCTTCAGA +ATAACAACACAATTGCCGCAGCCCTGTACAGCCCACTCCTGGTCCAACAT +CTCACAACACCCAAATCAGAGACTCCAGTCAAACTCACCGTGCAGACAGT +GCCGACACCTGTGAAGTCAGAGCCACAATCTTCAAATTGCTCATCACCAT +TTGAGCATCCGATATGCTCGAATGCTCCGCGGCCGATTAGGAGTCAGCGA +ACGCCGATGAAGGAGATTACCACACTGGACGATCCGAATGAGCTGGAGGA +GTTTATGAAGCAGGGCGAGGAGGCGTGCATTTTGGATATGAAGACGTTCA +TCACACAGTACTCGTTGCGGCAGACTACGGTAGCAATGATGACAGGTTAG +TTCCCAAAAAAGTGTAGTGGAAGAGTTTTTTTTCTAAAATCTAGGCCGTG +ACCAGAAATAAATCAGAATACATCCCCGGCGCCTAGATTCCTAATCTCTG +CTGTTAGCCACAGTGCAATTGAGTATTACTGTAGTTGATTCCTTCTATGA +CGTCATATGAAATAAAGTTGGCTAAAGGGAAAAGGGGACTCTGGATCTTA +GGCGAGCAATTCTCTGCAGCCGGCACCAGAATCGATCCCCCGGTGCCTGG +ATATCTATCCTTCGCTGTTAGCCACTGCGATATGCCCCACCCGCGCTGAG +ATCGAACCGAGGCGTCTGAATTCCTAGGCGAAGTTGCTACCCACTGCGCC +ATGTCAATCTTCTAAATTTCCAGGAGTAAGCCAGCCGTACATTTCAAAAT +TGCTCAACGGCAACCACCGTGAACTATCGCTCCGTTGCCGTAAGAACATC +TACTGTTGGTACCTGAACTGCCGACGTCATCCCAACAAACTCGCTGCCTT +CCTCGCCGATCCGACAACTCGCCTCGAGACGAATGGAGATGGCGAGCTGA +TCCCACAACGTCGAGAACGCTACGTGTTTCGTCCAATTCTGATCCGAATG +CTCGAATCGTTCTTCACACAAACGCCATTCCCAGATCTTCCACGGCGAGT +CGAAATAGCGAATGCTTGCAATCACGTTCTGAAGATGGATAAGAAAGGTG +TTGGGTTGATGCCAAAGGAGGTGGTCAGCCCTCAAGTCGTTTCGAATTGG +TTTGCAAATAAGCGGAAGGAGTTGAGAAGAAGATCCGCTGAAGCTTCGGC +CGCCTCCACCTCGTCCGCTTCGTCTTCTGCGTCATCGACTGCTAATCATG +ATTCTGTCAGTGTGTCCAGCATGAGCCCTCGCGATGAAGAGGTGAGAGTT +TTAATTTCTCGGCCACGCTGTACTCTGGGTGGAAGAATTTTTTCTAGGTC +TTATAAGGAATCGCCCTCCGGTTTTCCCACAAGTTAAGTCATAAACATGT +ACCATCTAGAGGTTAAATTTCTGGAGTTTTATTTATTTGAGAGTGATGGT +AAACCTGTCAGTCCTTGGCAAACATGTGTCTGTTTGGTCTCAAGGTCTAG +GTCTAGGTCTAGGTCTAGGTCTAGGTCTAGGTCTAGGTCTAGGTCTTGGT +CTAGGTCTAGGTCTAGGTCTAGGTCTTGGTCTAGGTCTAGTTCTAGGTCT +AGGTCTAGGTTTAGATCTAGGTCTAAGTCTAGACACAAATTTCTAGGCCA +CGAAAATTTCTGCCGTGGCCTAGGATTTTTTAGGCCATATTCTAGGCCAT +GATCCAAAAAAAACTTTCCCTGTCTCCTCTAACCCATTTTTTTTGCAGAC +AAGCTCCCGTAACACAACCCCGGAGACAGCAATCTCCCCGTCACCAGCAG +TGTCCACTTTTGAAGTGTCCCGCCCGTCTGCCATCATCTCTGCCACGTCA +TCAACAACCTCGCCAATCTCAATTCCAGCAACAATCATTCCATCAGTATC +GCCGTCAGCTCTCGAACTTTTCGCCATGGCCCAACAGCTTGGAGTTCAGC +TTCCGGTTCCATTTCCAACATTGCCAACACACTTTTTCCCGTTTCAAATG +GCTCCGTTCTACGGGAACCCGGCTTCTATTTTGAAGTCTGAGTGAAGCAT +TTCTAATTTTCTTTTCTTCTTTTCTACACCAATATCTTTTTTCCATTTAT +TCAACTTATTCTTCATCCCATGACACGAAAACCTCAATTCTTTCATATTA +CAAACGATGCTCCGTTTCTTACGCCCTGCCCCACACACGCCCCGTCGTCC +AAAAATCAATCAATAATCGATTTTTCATAATATTTAATTTGTTTTTTTTG +TGTTTCACTGCTGTTTTAATATTTGAAAAGTACGAGTTTCTTCAAAAACA +TTTTTTGAATAATATTTGCTTCACACTCCACGCGTTTCGTAATGTATCGC +TTGCCCTGAAATTTTGTTGGAAATTCCATTTTTTGTGCAATTTTTGTTAT +TTTTCAAGCGAGAATAAATATTTTCTAGTTCTAGAATTCAATGAAATACT +TTGCAAACGCGCCCCATTGCACTTGACCGAGGGGTGAAACAAGTTCAGTG +TTTTCAAGATATTTTTTTTTAAAAACTTGAATTTGGAACAATACAAAAAT +CTGTAAAATGCAACAGTTTTCCAAATCTTGTCCACGTGGAGTACACAATA +ATTTTTCATCTATACGGAAAGATTTCAAAATTGGAATGTTAAATTATTTT +GTGGTTTCTAGGCCATGGGAAAACTAGGCCATCAAAAAAATTCCAGAGTG +ACCTGGAAGTTCTAAAACTTTGTTGAAACTTGTCCACGCGGAGTACACTG +GTAGTTTATCAATAGAGCGCAAGTAATAATCCCTTGGAATTTTCCTTTCA +CCACCCCCTCTCCATTTTATATGAGAGTTTTTTCCCTCGCAATTTCCCAC +GCTCAAGAGGGTGGCCGCCATTTTTTTTTTGAGGGGGGTCGTATCTAGAT +TTGAAACACGTCATTTATGCTCAAAATGTGCCAATTTTTATTGTTTTTGA +GTTTCTAGGCCATTTTGGTAAGTTAGGCCGCATACTGACTTTGCGACTTC +CAAGGATATGGCCTAGCTTTTTTTTCTAGGCCATATAATTTTTTGAGTTG +CAACTGTGGCCAATGTTTTTTTTGGGTTTCTATGCCACTTGTGAAAAGTT +AGGTCATAGAATAACCTTACGGTAAACGGGTCCTCACAATCAGGAATTGT +GGCCTAGTTATTTTCCTAGACCACGTTTGATTTTTGTCTTTCTAAGCCAG +GTTTGGGTCTAGCTCTAGACCTAGGTCTAGTTATAGGTTTAGGTCTATGT +CTAGGTCTAGATCTAGTTCTCTCACGTGAGAAAATTAGGCCACCTTTCCC +CCTAAACTCTATGATCTCAAAACTAAAACTACTTTCTAGAAAACTCTTCG +AAAATCATAAAACCAATTTTTTATCACTATTTTCATGCTAAAATCCAATC +AGAAATTGATCAGAAACACGCATATAAATATTACGACTATAAATTCATTT +CTTTTTTTCATTTTTCCCTCCAGAAGGCTCTTGGCGAGAAAAATCTGCAT +ATTTTTTCTACTTATATGTGTCGAGACATGTGATATTCCGGCCGGCGGTG +GCTCTGAATGAATATTCAATTGAACGAGAAATATATATTCTCTCCAGAAG +GCGTTTATATTATGTGTTGCTCCTGTTTCTGCTGTATTCGGCGTGGATTT +GGTATGCTTCTCAGTGGAGTGCAGTTGCAGCATTTTTTTTCTTCAAATTT +TGCAATACTCTCGAACTTTTTGCACTTTCAAGCCTTTTTTTGGGTTGCCT +AGAACACTAGAATTTCTGGACTATTTTGAGATTTCTAGGCCATTCTTGAG +ATTTCTAGGTCATATTGTGATTTCTAGTGCATTTTTGGGTTTCAAGGCCG +AATTGGAAGTTTTAGGCCATTTTTCGGTCACATTTTTAGGCCACTTTGAG +GATCTTGTGCCAAACTGGGATTTCTAGGCCATCATGGGATTTTTAGGCCA +ATTTCGAGTTTTCCAATCATAATATAATTTCTAGGCTATCTTGAAATTCC +TAAGCCAGTTTGGAATAATTAGGCCATCTTGGGATTTCTACGCCATTCTC +GGATTTCTAGGTTAAACTGGAAATTTTAGAATTTTGGGGTTTTAGGTCAT +ACTGCGAATTTTAGTCCACGGTTGGATTTATAGGCTATCGTGGGATATCT +AGGCCGATTATGGAATCCTAGGCCAACTTAAAATTTCTAGGCCATTTCGG +GATTTCTAGGCCATCACGCCTCCTAAAAGGCACAGACAAAGTTCAAAACC +CACCAGAAAAAAGTGTGGTGAATTAGTCAGCAACACCCCATTTTTCTTTT +CCAAAAACTAAAAAAAAAGATACCCCTGACAGATATGGGCAGAGATGGGG +GTGCGAATCCTGAAATTTCCGAATTCCAGTGGGAGCTTTTTGTGCCAGAA +AAAGTAGTGGGGGCAAGTGAGAATGTGTGTCGCCTTCCACTTTTCAAACA +CCATTTTGTCCGTTTCGCCGGCCCGAAAATAGGCTTGAAAAGCTGTGGCC +TAGAATTTTTTCTCAAAATTTTTGACGTGGAAGAATTTTAAGATTTTCGA +GGCCAAGAGGTGGCCTAGAATTCAACATTTATTGGCAATTCTTATCTAGA +TCCCGAACCCCGACCAGTCAGTTTTTAATTTCCGGTGTCTGAATTTTTAA +TTAAAAATTATTTTTGGGAGGAGCTCCTGTTTTTTTTTCGTCAAAAAATG +AAGGTGGCCTAGCTTTCTCATTTTAAGGCCACGAATTCTAGTTAATTTTT +TGTTGGCTCATTTTTTGTTTCAAAAAACGAAAGTTAGTCAAAAACTTTTC +TGCATATCAAATGCTCTTTTTGAGGTTTTTTCCTGCAAAATTTTTCTAAG +AAAATTTGAATTCCCGCCAAAATTTTTCTCAGAAAATTTGAATCGCCGCC +AAAATTTTTCTCTGAAAATTTGAATCGCCGCCAAAATTTTTCTCAGAAAA +TTTGAATTCCTGCCAAAGTTTTCACAATGGGTCTCACCACGAATGGTGAC +AGACCCCGCCCACAATTCTCAGAGACATTGGGTGTGTTTTAGACCAAAAA +AATAGTTTTTTTGACAAAAACAATTTTCCAGCAAATACTTGATCAGTTGT +CATTTGTCTGTTGAAAGTGGAAAAAATAAAACATCTTTCCTTTTTTCTGC +TTCTCCCCCTCCTCGTCCTTTTTGTGCTCAAAACTCAAATGGCTCATCTC +AATTGCATGACAAATTAGCAAAAGATGAGAGCAATTTTTCCCTGTTTCTC +TTTTTTTTCCTCCGTTTGGCTCCTCCTCATGTCTTTTTTAGTAGTTTTCA +GAGTTTTACTATTGAATGTTCAAACTGGCTAAAAAATTTTGATTTTTTTT +TCTTTTTAAACTCTTACGAAAATTTTGATATAGGTTAGAAGCCTAAAATC +GACGGCCGACAGCGTTTACATTTGTGGCCTAGACTTCCAAGCCTTGGCCA +TGGCAAAAACTTGTTTAGCCTACCTTTTCAAATAGTGGCCTAAAAACCCT +ATACTATTTCTGAACTAACTGTTTACTCAAAAGAGAAAGTTAGTGGCCTA +ACTTTCTGAATGGTGGCCTAGAAAACTCAATTTTGGCACTTTCATGACCG +GTCATCAACCTGACATTGATGGCCTAAGTTTTAATGAGTGGCCTAGAAAA +CCAAAATATGGCCATGCGCTCAGCATGATCTAACTTTCCAACTCTGCTAT +AAATTCCTTGGGGGAAACCTCGGCCATCACGGGTGGCCTAGAAAACTGAA +ATTCGGCCAAGACCCCGTAATTCATGGCCTAACCTTTTAAAAGGACAGCG +TTTGGCTTTGGTGGCCTAGTTTTTTGTGGCCTAAGTTTACAGAAAAAATC +CTCAGCCACAACCTTTACCCTATACATGAAATCACATAAAAATGTCCAAA +TATTTTCCAATTAAATTTCCAATTCTCCACCCTTAATTCTTCATTTTCTT +TTCTTCTTTTATTCCTTTTTCACTCTCTCATTCTGAAACTAATCTTTTCA +ATTCTCAGCTTCTCTTTTCTGGTGTGGCTCATTTTCACTTTGCTCCCGCC +ATTTTCAATAGGATTATCCGTGCACACACACACATGGCTCAGTGAGCAAC +ACCAACAAAAAAGAGCATATATAGATATACAACGATATCTAACTAGTTAC +ACAGGAGCTTCTGTTGTTCTTCTGTGCCCCTCCGAGCCGAGACAAACTTT +TGGAAAAGAGCAACGCTTCTGGAGCCGACATATTCGAGAAATCCCGGAGA +CAATTGAGACCACTATGAGGAAATGATGGTCGAGGAGGAAATGAGAGATG +GCGTTGCTCAAAGTGCGCGACGAATGCACCAAGTTGGTCGGTTATCAGTG +AGTTTTTGTTTTTTGTTGGCCGAAGTAGAGATAACTAGGCCACCTAGGTA +GGCTGACGATCTTCAGTTTGCAAATCCTCTTCCACCGCACTTTTTTGAGC +GATCTAGGTTTGCAAAAGGTAGGCCGCCTGGTCCACCGTACCTGTCATGA +TGGCAGAGGTCTGGGAAAGCTAGGCCACCTGTCTGAGCTGATGGTCTAAG +GTCTAAAGTTTACCAAAAACTCTTCCACCGAAACTTCTAGGGCGACCAAG +GCTTGCAGAAGCTAGGCCACCAGACTTGGCTGGTGGCCTAGAATTTTCAA +GAACTCTTCCATCGTAAATTATTAGAAAGCCGCGATAGGAAAAAGCTAGG +CCATCAAATTACCTTAGTGGCCCAGAATCTATAAAAACTCTCCCATCGTA +CTTGCAAGGACGGCTGAGCTTGGAGGAAGCTAGGCCACCAGAATAGGCCA +GTGGCCTAGAATCAACAAAAACTCTTCCACCGTACCTCCTACAATGGCCG +AGGTAGGGAAAGATGGGCGCCAGGCTAATCTAACTTTCCTAGACTGACCT +AAATGGGAGAAAACTAGGCCACCGGGTTTACGGAGTGGCCGAAAATTTAC +AGAGACATTGCCCTGACTATCTGACTGTTTTTTTAGCGAAAGAACTTTCA +TATTTTCTAGGCCAGGGGCCCCCTACGCCCCAACACAATCCAATTTTTTT +TTGCAGATTTCTAAACTCGCACATTTGTCTCTGCATATCACTCATCCAAC +TTCTCGTCTGCTGCTGGGCCGTTGCTCAGCACGTCAACTCCTACATGACG +CACAGTAAGGTGAGCCAGAGTTCATTGGAGCGAATTTGTAACCAGAGAAA +ATTCAAAACTTAAAATTCAGATCTTAAAATGTGATTTTCTGGAGGGGAGC +CTCCCGCTGGAGGCCGTGGACGCTGTAATTTTCGATATCCGACTTTTCCA +TTACCTATGGGGCATTCGAGGATGTGTGGCAGAGTATTTGGATGGTGGTG +GGTACTTTTAAGTTTCCCACGAATCTTTACATTTTTCAGGCTTCGGGCGA +CTTTTGTGGTGCGTTTCCCACTGCTTAACCCTGGTTTTCTCCATTCCGGT +CACCTTCATCTCCCATCCGAAACCCTGTCTCTTTTGGCCGCTGCTTTTTC +AGGTGAGTTGGCTGAAAAAATTTGGAAAAAAACTTTTTTTAAATTTTTTC +CGAAAGGCGCAGTGGGATTTTCCACGACTACCACTCACAACAGCATGGTT +CGAGTCCGCGTGGGTGTGAATTTTTTTTCTAGTTTTTTTTTGAAGTACGG +AAATTGTATTTAGCCTTGAAAAATTCTAAAATAAGCAAACAAAAAAATCT +CCGATCAAAAACCAAGTAATCCGTCTCCAAAAAGCGCAGCAACGTCAAAT +GGTCCCACGGGGGGCCTGAAACGACAAATTTCCTGACATTTGACGGGTTA +TCCATCCGCCACCCTCTCTTTTGAAGGAACATTGACTTTTTCCGTTGAAA +TGGATAAAAAAATTGAAAAGAATTAGATAGATTCGGGAGAATGAATATGG +TGGCCTAACGTGGTGCTTGACCCATGGCCTAGAAAACCAAAACTCGGTCA +TCACCCTGATTGAGTGCCCCTTAAGAGATTAGGCCACGTACGACGCAATC +CTAGGCCACCAATAGTTTTCTCGGCCACAGCCACCCCAAACAAAAATCTG +AACTTTCTGATCCTTTACATCTGTACCCACTGAACCGTTTGTTTATCTTT +TTACTCGCTCCACGGGAGCAGAACTTTTTTTTTGAAATTCCACAAAAGAA +TGGGACAGACACTAGTGTATTTTGTTTTTTATTCAGACATGAGCCAGACC +AGAAATAGGATTGAATCTCGGATTTCAGAGCCTGGAAAAAATCGGAAAAA +ATCGGAAAACTGTGGGAGAACTGAGTTGGGTGGTAAATTTGGCAAATTGC +CAAAATTTGGGGAAATTAACTTTTTCGGTAATCTGTAACTACAGTTTTCC +TAAAAATGTATGAACCTTTTAGTGATTCTGCATAGTTTTTCAGGTCTTTA +AAATTGAACCTACAGTACCCCGTACAGTTCAGAAGATTGCAAAATTCCAA +GTTGACCACTTTTAAAGCGTGTATCTCGGCTGATTGGGAAAGTATCGGAA +TATTTTAAAATACAAATTTATTGTTGGAAAATTTTTTTTTTACTTAAAAT +TAATTAAAATTCAGCTTTCAAAAATGAGCAATGTATTTATAGATCCCATG +CAACTTTGAAATTTTGTATATCCGTTCAGTGTAATGCTATCAAAAAATTT +TCAACTACAAAATTATAGTAAAATTTAAAACAAAGAAATTGTTAGTTGAC +AACTTTTTAATGAAATAAAGTACAACCGAGATATGAGTTCTCAAAGTTGA +GCTACTTTATCGGCAACCACAATATAAGTCGCCCCCGTACAATGCACCAT +GTGGTAGAAAATTTCGGTTTGATCTACTTAGATCTACAAAAAATGCGGGA +GAAGAGACGCAGAGTTTTCTTTGCATAGCTAAGAACGTGCTGACGTCACA +TTTTGTTGGGCAAAAAATTCCCGCACTTTTTTGTAGATCAAACCGTCGTG +GGACAAACTGGTACCTCTTCGTTTTGTTGGTATGAAAATTTTGAAACTCA +CTTCGTTTTGTTGGTATGAAAATTTTAAACTAGAAAAGCCAAAGTTGAGC +TAGGCGCTCAAATTTGGTAGCATTTACTTTTTTCAACAAAAAAATAGCTG +GCCGAGCTTTCCATTTTTACGGCCACGTTGCAATAAAAATCCAGGAGGTC +CATGTCATCAACAAAAGTGCAAGGATGTAAAACAATTACACCGAAACGTT +TGTCGGGAACTGCTTTTTCAAAAAAAAAAGGATCTCTCCGAACACATGTC +ACCCCCAGGGTTCTAGCGTCTCCCTGACCTCGCGAGCTATTTCCTTGAGA +CATATCTCTCCGAAACCTTTTGTAATGGTTGTTTCATGTTCTTCTTCATC +TTCTTCTTATTCTTCTTGCTGGGATAAGCAGTTGAAGAAAACGAAAAGAA +CACATAAAACACAGGCACACACACACACACATACACACCGTAATATCATA +TAAAAGTGCGTCTGCGTCTCCCAGAGCGCCCTGGGGGTAAGGAACGCGGG +CGCCCGCAAGGAAAAAGTGCAAAGAAATTAGAGAAAAAAGACCGGTCTCT +CACTCACTTCTCGATCCCTTTTCTTCTCTAACTATAAAAAATGTGTGGCC +TAACGAACGAAAGCGAAAAAAATCATCGAGAGAAGTAGTGAGAGAGAGAG +AGAAGAAAGAGTTTTTTTGGACGCCGCCGCAGCTGCCAAGCATCAATAAT +GACATGCTGGAGAAAAAAATTCCAAAGTTTAAATGTATTGGGGATCAGAA +GCCTAGACCATAAGCCCCTTGTAGAGTTTTCTCGGCCATAGCCTTTTTGG +TACGGTACATATACATAGAGACTTCAGATTGGGAATTGCGAATCTGTTAG +AGTAAGAATAGGCCTAAAACATTATTGGGCCTAACGGTGACGGAGACTAG +GCCTTGTTGGGAGCTTAAACTATGACATTGCCTAGAACGTTGTTCTATCT +TGGTTATGTGGTGTCGGGCTTTCATGATCAGCTCCTCTACTTTTGTCCAG +AGAAGTTTTGGACTTTTTCTATGGGTGCTTTGGGCTCTATTGCACAAATA +GGTGATCTTGCGCACAACACAACGTAGATCAAATTCTTGTCTAGGCTCTG +CAGGCTCCTGAGAGGTGTTTTGTACAAGCCCAAAGCAAAAGGATCTCAAA +ATGTTTACAGGACTTCAAGCAGAGGATTTTTCGATGATTGCCAAAAATTT +TGGAACTTTTATAGGCTTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAG +GCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTAC +CATAATACTATTCTTACCGCCGCGCCTGATCAAACCAAAGAGTAGGTCAA +CAACCAATATTGACTTGCACTTTTTGCACGGTTAACTATATTACGCATAT +GGCCAATTTTGTTAGGGAGCATTGATTCATGGAAAAAATTTCGGAATTTA +TGAATGTTCAATTAAACTGGACTACGACAATTATTGGGTTCAAACATTTG +AAAATTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTA +GTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTT +CTGATTTTTTTTCAATTGTTTTTAGATCCCCCAAGCCTAAGCCTAAGCCT +AAGTTTGAGCATAAGCATAAGCCTAAGCCTATGCCTAAGCCTACGCCTCA +GCCTAAGCCAAATGCGGAGCCTAAGCCTAAGCCTAAGTCTAAGTCTAAGT +CTAAGTCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTGCCGGAGCCTAA +GTATAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTATGCCTAAGC +CTAAGCCGATGCCTAAGCCTAAGCAAGTTCCGACATTTTCGACAATTATC +TAAAAATCCTCTGCTTGAAGTCCTGAAAATCTTTAGGGATCCTCAAGTTT +CGAGTTTGTACGGAACGGCCCAAACGCCTCTTGGGAGCCTGCAGAGCCTA +GACAAGAATTTGATCTATGTTGTGTTGTGTACAAGGTCACCGGTTTGTTG +TGCATCAGAGCCCAGAGCACCCACAGAAAAAAGTCCAAGAGTGTATAGCC +TACGGTAGTCCTTGTTTAGGAGACATAGACAGTCTCTTTCTCTAACGGTT +TTTGTTTATTCTTTTTTTTTCTCCCCCTACTTTCCGACAGACATTTTCAT +CTTCATCTCCTTCTGCTGCGCGAATTGAATGGTCTATAAGTGTCTGCGTC +TCGCCGATTCGTACAGTCTATCGCTTTGAAATCGTCACCCCCAGGCAAAC +AGGACACCCGAACACACACACAACTATCCTCATTCTCAAAAAAATGGCTT +CACTGCATAAAAGTGCAGAAATTAGAGAGTATGCGGAGATTCGGAGAATC +CAGAGAGAACACACAAACACACTCGGAAACTCATATAGTTAGACCAGAAT +GATGGCTTTTTTGGAAATTTGAATTGGAAAGGGGTTTTTGAAAATTGGTT +GGAAGCATTTCAGATAGATTTACATTGAAAAAAATTAGCAGGAGCACCTT +TTCCATTTTAAAAAAATTTGTTGGCTTTAACTTTTTTCACGGATTTCCGG +CTTCCCTCATAAATTGAAATGGAAGAGTTTTTGCCGAACTAGGCCATTTT +GGCTCAGCCATATCTGGGGTAGATTTACGGCGCGTTGCGTGTCGCGTCGC +GGCTCGTTTTTAGTTGTAAAACTAAATTTATTTGTCCGTGTGGAGTACAC +AACTTTCCCACGCGTTGTCCATCTGAAGATTGTCAATGGAGCGCGAAAAA +TTCAGTGAGGAAGGCAAGAACTCCGTTTTTTTTCAATACTGAAAAGTCTA +ATATTGAATTAATAAATTATTCAAACATTTAAAAACAATCATTTTTAGTG +TTTTGAATTTTTTTTTAATTTCAAATTTTTCTTTAAAACATTAAAATTTT +AAAAACATTTCCAAATTGTTCCAAATAACATTATTGAACTCGTAAAACAA +ATTTAAAGGTGAATTTTCATTTATTTCGTTAAAAAATTACCTTTTAAAAT +TTATTTTTTCAAAAAAATCTGGATTTTTTTTCTGCGATAATATTCGAGCA +GTTTTTTTCGATTTTTTTTCTGAAATTCTGTGAAATTATTAACATTATTT +GAATTAATTATTCAAACAATTTCAAACAGAAATACAACGAGTTTTTCCAT +TTTTTATCATTTTCCAAAGTTTCAGAAAAAAATTTCGCAAAAAAAATTTA +GATTTTTCCAAAAAAATTAAATTTTTGAAAAATTCTCAAACGCACGGATT +TCTGGCTTCCTTCATAAATTGAAATGGAAGAGTTTGCCGAACTAGGCCAC +TTTGGCTCTGCCATATCTGGGGTAGATTTACGGTGCGTTGCGTGTCGCGT +CGCGGCTCGATTTTAGTTGTAAAACTGAATGTATTTGTCCGTGTGGAGTA +TACGACTTCCCCTCCGACAGGCGATTATCAATGAAGCGCGAAAAATTCAG +TGAAGAAGGCCAGAACCCAAAGTTTCACCGAAAAATTCGAAATAAAAAAT +CCTGAAAAATTTTTTAATTTTTCCATAAAAATTAAAAATTTTTAAAAAAT +TCCCAAATTGTTCCCAAGACACATTTCAGAGCAGGTTTCAAAAAATCATA +TAAATTATTTATAAGGGTAACAACACAAAATTTGGGTAACAAGAGCAACA +ATTTTTAATTAAATGTTTCATTTTTCATCAAAATTACTCAATTTTCCTGT +TTCAGCAATCGGCCTATGGTATTTGCCTCCTGGTTCTTCTTCTAGCAGCT +CTTCCAAGAATTGTTGTAGTACTTGCGTCTCCACAAGCGGCTCCGTTGAT +TCCTATTCTGTTTTATCTTGTTGGCACCGCGCTCAATTTTTTTCATGTGA +GTACTTGCTCACTTTTTTATGGTCTAAAAATTGGCGGGAAATTCAAAAAG +TTAAGAGTTTCAAAAAGATCGGCATAAAATTTAAATTGAACATTTTTGGG +GATTTTTTTGGAAAAAAAAACAAATTCGAAATTCGACCCATTTTGGCGGG +AATTCAAATTCTTTTAAAAATTTCGAAAAACATTTTTCTGCAAAAAAAAA +TTTTCATTTAAAAAAAAAACATTTTAAAAAGCAAGCTTTGCCGGGAAGAA +TTCAAAAAATGTGGCTGGGATTTTGGCGCGAAATTCAAATTATTTTAAAA +AATTTTTGAGAATTTCTTGAAAAAAAAACTGAAAATAATTTTGCGGAAAT +TTGAATTCTTTAAAAAATGCCAAAAAAAAAAGATGTTTGTGCAGAAAAAA +TTTTAATTTTTAAAAAGCAATTTTTCACGGATTTCTGGCTTCCCTCATAA +ATTGAAATGGAAGAGTCTGCCGAACTAGGCCATTTTGGCTCTGCCATATC +TGGGGTAGATTTACGGCGCGTTGCGTGTCGCGTCGCGGCTAGATTTCAGT +TGTAAAACTAAATATATTTGTCCGTGTGGAGTACACGACTTTCCCTCCGA +CAGGCGATTATCAATGAAGCGCGAAAAATGCGAAGAGGAATCTGTTCTAA +AATTAGGCGGGAAATTCAAAAAGTTCAGGGTTTCAAAAAGATCGGCGAGT +TTCAAAAATGTCAAAAATATTTTTCTGCAGAAAAATTGTCGACTTTTTAA +TAAACTTTGCCGGAAAGAACACGTGGTGTCAGAGTGTCCGGTTTGATCTA +CGTAGATCTACAAAAAATGCGGGAGTTGAGAAGCAGAGTTTTCAACTGAT +TTCGAATGGTTAAGAACGTGCTGACGTCACATTTTTTGGGCGAAAAATTC +CCGCATTTTTCGTAGATCAAACTGTAATAGGACAGCCTGACACCAAGTGG +AAGAATTCAAAAAATGTCGCTGGAATTTTGGCGCCAAATTCAAATTACCA +AACGGTATTTAATACATAGAGCTTCTCACTCAACATCTGAGCTACCGGAA +ATTAACAACCTTCACATTTTTTGGTCATTACTCACACACATGTGGTTTCT +AAGCAAGTTTCAGATACTTTTTTTTTTGTTTTTTTTTGAATTAATACTAT +TTTTTGCAAAAGTCGCTTCGGGACCCAAAAAAAAAATTCAAATTCTTTTC +AGTTATACGTGTACTGGCACTGGTATTGGCACGTGAGTGCCATGTGGAAT +TCGGTGGTCCGTGTAAAGTTCGGACAGCGGCTCGAAAACGCGAATAAGCG +AAGTCGACGGGATAAGCCGATTGTGCCGAAGGAGGAGATCGATGATGCCG +TTTTGCATTACGTGCCAGCGAATAAGCCAACTGAACAGCAGAATCATGTT +CAGCACAATCATATCAGTCAAAACGGGATTCAGCAACCAAAGGTAGGTCT +ACCTACCTGCCTATTTTCCTGCCTACTCGCCTAAGTTTGTCTAAAATCCA +ACTGCTTGTCTGTTCTGCATACCAATATGCCTAGACATACCCACATTTCT +GCTCATCTTCTAACATGCCTGCTCGCCTAACTTAATGCTGTATGCCTATG +TACTAGGTTACTGCCAGCCTGCCTACTTGCCTACCTACACAAATGCCTAT +CTGACTACATCTATATCTATGCTTCTCTATGCATAAATGCCTGCCAAAAT +GGCTAACTGCGTGTCAAAGTTCATGCATGCCTGCCTACCTGCCTACGTGC +GTACATGCCTATTTGCTTACCTAAATGATTACCTGCTTACCTACCTGTCT +ACCTGCTTAAGAGCCCTACTTATTTGCCTACCCACCTGACTGCGTGTCTC +CTCTCTTTCTGCCAATCTAGGCCTACATGTCTATCTAGGCCTATATAAAT +TCCTACCTGCCTACCTGCCTAAATACCTACTTGCCTGGCTATGTCTTCTT +ACCAACCGTTTTTTTATGCCTACGTACCTGCGCGCATGCCTACCTACGTG +CCTACCAACACCTACAAATACCTGTCCGCCTTCATCACATTTTTCACTTT +CAGGTAATCCTTCCACCACCGAACCCCTACAACACCGCGCCCACACATTA +TCGACGCCGATCCTCCACAGAAGCCCACCGCTACCACCCACCTCGGCAAC +CGAGAAGCTCGGCGCCAACCCACCACCGAGTACCGGCCGACTATCCGAGT +GACGAGGAAGACGACTATGATGATACGGAAGGAGACGACGCGGATATTGA +TGATCTACCCACACCGCCACCGCCGATTTACGCCGTCCGCCTGACCTCCG +ATTCGTGGGAGAATCAGATGAGCCGCCCGTCGGGTAGACGGCGGTTACCG +GCGACTCCGAATTTGCCGAAACATGGAGAATTGCCGCAGATATTTAATAT +TCCTCATGCTAATGTTTAATCAAATTTTTGGGGTTTTATGGGTTTTGGAT +TTTCGGAGCGGAGCTTCATTTATCGCTTTAATATTGAATCAGGGGTGGTT +TTCAATTCTGGCAATTTGCCGGTTTGCCGATTTGCCGGAAAATTTAATTT +TCGGCAAATTGCCGATTTGCTGTTTGCGGGATATCAGATTTTCCGAAAAT +GTTTAGAGGGATTTTTTATAAGACGAAAACACTAAAAACTGTGTCTTTTT +GAATTTTTTCCACTTTTTCTACATATTTTCATAGAATTTGCTTACTTTTC +AAATTGCCGATTTGCCGGAAATTTTTAATTCCGCCAATTTGCCGATTTGC +CGTAAAAAATCGTTTGCCGCCCACCCCTGTATTGAATTATCCGGAACACA +GCAACACTCTGAAATTCGAGCATTAGTTAAATGTGATATTTGAAAAGTTT +TGCAAACAAAATAATTTTCCCCAGCTTTCATCATTTTTTCTTTCAACAAA +AAAAAACCGAGTGATATTATGATTTCCAATTTGTGCTATTTTGAGTTTTA +TTTTATTTTATTTTGTTGTTTTCAATAAATAAACGAATATTTTTTGGTCA +TTTTTTGGCGCTATGAAACTAAAAAATTTCAGGTAACTTAGCGGGAAGTT +CGTTTTTTTTTCATTTTGGCAGGAAATTTCATTTTATAAAATGGCAAAAA +CTTTTCGTTAAAAGTTTTTAAATTTTGAGAAAATTGACGAGAAATTAAAA +ATTTCAAATTTTTTGGCTCTAAATTAGGATTATAATTTTCCGAAAAAAAA +TTTCCGAATTAAAATTTGGCGTCTCAGTGTGTTATTCAATTAGAAAAACC +TTCAAAATATTAAGACATTTCGGAGGCACGTGGTGTCAGAGTGTCTATGC +TCCTTTAAATGATGCATTCAAAGGCGCTTACACACCTGCACGTAATCTCT +TTTTTGTCTCAAAACAAAAGGAGCTTCAGACTCTCATATCTCTGCAACTC +TGTGTGAATAACAATTGGAGCCCCAGAGCTCCCGGCTCCACACTGCCCGT +TTCCATGGAGACACTCCGATTATTATGGGAGCCACCACTGCTCGCCGAGC +AGCAGCAGCAGCAGCCCATTGTTAACCCGATGACCAATATCTATATTTGG +GTCCGGACCGCCCGTTCCCTTCAGTTTTTTTATCCGACTATCGATGTGGA +GGCCGATGGGTTCGGTTGCTTCTATGAGATTTGTTTGGAAATTGTTGGCT +TGTTGGAGGAGTGCGATAGGATAAGGTAAGCGTGCTCTATTGATAACACG +AATTTTTGTGATTTTCGGCAACATGGGATCGTGGTTGCCCAAAGGGAAGG +AAACGCGCCCCATTGATAAAATCATTTCGCAATAGAGCGCATTTTCATCA +TTTTTTTGCAGTTTTGACGAATTCGAGACCCTCTGCGAGAATAGAATATC +GGAAGTTTATGTGGAAGCAATTTTTCAAAATGCTGTGGAACAAGGAAATG +TGAGTTTTTTTGTGATTCTGATGCTAATTTAGCAAGTGCGCTCTATCGAC +AATTATGACTTTGTGCACGTTCAAGTATTTTCTCAAAGTTTAACACAGGG +TTCTGGCCTTCCTCATTGAATTTTTCGCGCTCCGTTGACAACGGACAACG +CGTGGGAAAGTCGTGTACTCCACACGGACAAATCACATTTAGTTTTACAA +CTAAAATCGAGCCGCGACGCGACACGCAAGGCACCGTAAATCTACCCCAG +ATATGGCCTGGCCTAGTTCGGCAAAAACTCTTCCATTTCAATTTATGAGG +GAAGCCAGAAATCCGTGAAGTTTAACAAATTGTGAGGTGTAGCAAACACG +CTCAAGTGACAATTTTTACTTTATGCGTCATAAATTTTTTTCTGAAACTT +AGAAAACGCGCTCTATTGCTACTTTGAACGGAAAACGCGCTCTAATGTCA +ATTTTATTTCCAGATAAACGCCCGCCTAAAGCTTCTCACCTTCTCACTTC +TCTTCGCAGGTCTCCAACAAAAAACTAAAGATCTCACAAATTCCATCAGG +TCGGACGTGGTCTTGACACGAATTTCAAAGTATTTTGGTAAATTTGAAGA +ATAATATTTGTTATTTTTTTGTTTTCCGAACTTTACAGGCCAAATATTCA +AACTTTTGTTCCAATTAGACGTGGGCCGCCGGTTACAGGACAGAGAAATT +GTGCACGCCATTGGAAATGGGCAGGTGAGAAGTTCGGCTACAAAAACGTG +ACGCGTGGCCTAGAAAGATTTGTTTTTCCACAGCTTTTTCCACAGCTTCC +ACAGCTAACGGGATGGCCTAAAAAACTCCACGGTCGTTTAGCGATAGAGC +GCACTTGCACATTTTGGGAATCTTCTGGAAGAAAGTAAACGTCACAAAAG +GCCTAGAAATCTTTATCGTGTGTGGTTTAAGAAACAATTCTCGGCTTTGA +AATTATGGTTGCTCGGGGAAAAATGATTGTTAGAAGCCGCACGCTTGCAC +GCTTGCACAGCTTGGGAATTTTCTGGAAAAGAAGTGACGTCACTGGTGGC +CTGTAAATTTTCGCCATGTGTGAATTAACAAAACAATTATGGGCTTTGAA +ATTATCGATTTTCACGGATTTTTGGCTTCCCTCATAAATTGAAATGGAAG +AGTTTTTGCCGAACTGGGCCATTTTGGCTCGGCCATATCTGGGGTAGATT +TACGGCGCGTTGCGTGTCGCGTCGCGGCTCGATTTTAGTTGTAAAACTAA +ATTTATTTGTCCGTGTGGAGTACACAACTTTCCCACGCGTTGTCCGGCGG +GCGATTGTCAATGGAGCGCAGAAAATGCAATGAGGAAGGCCAGACCACGT +GAAAATCAATAAGTTTTTCAGCAAAAAATGAAAAATGAGGAAATTTCGTT +AATTTTAGTTTTGTTAGCAGTAGTGCGCGCTCACATAGTTCAAGAATTTT +TTTTTGAAGAAAAGTGACGTCACAGATGGCCTAGGAATCTCATCTATGAC +GTCACTTCTCTTCCACTTTATCCTATTTTCAGGGCTATGAATTCCTGGAC +ACCCTATTTGAGGGAACAGTAAAGCACAGACATGGAAACTCGAACCATCA +CGGCGTGCGCCCCCTTTCCCATATAATCCCCCAAATTCTCGAAAAAGCGA +GCAGTATATCCCTACCATTTTTGTGCAATGAAATCCGCCGACACCTTATC +AAAGACCCGTATCATATTGATGAGTTCTCAACTGACAAACCTGCTCAAAT +ATTTTCGACGGCTCGCCGAACGACACTGGCACATAATAGTCGACGGGTGG +TACACTTGCATTATATCAATCGAATTGAAGTGCTGCGGAAGGATGAATTT +GCGAATGTTCACCTGAGAATTTGTAATGATAAGATTAAGGCGTCGTTCGT +TTTTGTTCCGTTGAGGTGAGATTTTTTGGCCGAACTTTTTTTTGTTGCTT +TTTTTGAAAAATTTTTAAGAGATTTCATTTTTTTTTACAATAATAGAAAA +ATCAGAAAAAATATTCACGAAACCAATAAGTGGGCGGAGCATCAAACTGG +CACGCTCATTTTCCGTAGATCGTTTCTGCCAAAATCGAAAATTTCTTGAG +CTTTCTTGAAAAATAACGCTAAATTTAATTGTGAATTTATTATTATCTCG +AAAAAAATTATCAAACAGTCGAAAAGGGGCGGGCTTACATTTTTTAGCCC +CGCCCACTTTTAGATTTGTTTTGTTGAAAAATGTCCAAAACTGAGTTTTT +CTCGGAAAATTTGAATTCCCGCCAAATTTTTTTCCAGAAAATTTGAATTC +CCGCCAAATTTTTTCCAGAAAATTTGAATTCCCTCCAAATGTTTTTATTT +TCAGAAAATTTGAATTTCCCGTCAAAATTTTTTTCCCAGAAAATTTGAAT +TTCCCGCCAAAATTTTCCCAGAAAATTTGAATTCCCGCCAAAATGTTTTC +CAGAAAATTTGAATTCCCGCCAAAAAAAATTTTCTCAGAAAATTTGAGTT +TCCCGCCAAAAAATTTTTTTTAAAAGAAAATTTAAATTCCCGCCAAAATG +TTTTCCAGAAAATTTGAATTCCCGCCAAAATTTTTTCCAGAAAATTTGAG +TTCCCGCCAAAAATTGTTTTACTGAGAAAATTTGAATCCCCCCCCCCCCC +CCCCCCCCAAAAAATTGTTTTACTCAGAAAATTTGAATTCCCGCCAAAAA +TGTTTTTTCTAAGAAAATTTTAATTCCCGCCAAATTTTTTTTTAGAGAAA +ATTTGAATTCCAGCCAAAATGTTTTCCAGAGAATTTGAATTTCCCTCCAA +ATTTTTTTTTCTCAGAAAATTTGAATTCCCGAATTTTATTTTTGAAAATT +TGTGTGAAATTAAATTTTTGGGAATATTCATTGTGGCCTAAGTTGCTCCA +AATGGAAGAGTTTTTTAAAAATTAATTTCTCGTCCACGCGTCAATTGTAT +GCAAGAAATTCGAATATTTTCCAGATGTGAAACCGTATTCCTCGACAGAC +TTATGTACACAAAATGGATAGTTCTTGGCGCAGTACGCGGCATTGTAATT +GTTAAAAATTGTCAATCGACCCGAATTTCGGTGAGCTGCGACCAGCTGAT +TGTGCTCGATTCGAAAAATATCGAAATCTACGCCATGTCGCCGAAGAAAC +CAATAATTTTCAACAGCTCTGCGGTAACTTTTGCCCCATTTAACACGATT +TACGAGGTGAGTTCGGTGGGGCGCGTTTGCATTTATGAATTCGAAAATTT +>CHROMOSOME_II +CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC +TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA +AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG +CCTAAGCCTAAGCCTAAGCCTAAAATAGTGACTCTGGCAGTTCTCTAAAA +TAAGTGACTCTGGCAGTTCACCAAAAATTGTGACTCTGACCGTTCACCAA +AAATAGTGACTCTGACCGTTCACCAAAAATAGTGACTCTGACCGTTCACA +AAAAATAGTGACTCTGACCGTTCACCAAATATAGTGACTCTGACCGTTCA +CCAAAAATTGTGACAATGACCGTTCACCAAAAATTGTGACTCTGACCGTC +ACTATTTTTATTGAACTGCCAGAGTCACTATTTTTAGTGAACTTCCAGAG +TCACAATTTTTAGTGAACTGCCAGAGTCACTATTTTTAGTGAACTGCCAG +AGTCACTTATTTTGGTGCACTGGGGTGGGTCACGCCCCCAGTTCTCAGTT +ATGGGTACTCTGATCCACTCGGGACCCACTTTATCGTGTTCCCCGTGCCT +CATTTACCCTAGAGCTTCCTCCTTTACCTCTCCTCTCGCTATCTCTAACA +TTCCAATGGAAACTCCTATTTGAATTACCGCCACCGATGTGCCCGACGCG +ACTTACTGTTAGCCCTTGTTTTGCACAAATCTGTTGGCTTCCATATTTAA +AAGTTAATTAATGACCCAATGTTCTTTTTTTCTCTAAATCTCCACAAGAT +GTTCTGTTTTCCCTACTGGACACTATCGTTCACTGCGTCTCACCAATTCA +CATTGTCTCTACTTTACCTTTTTTGTCATAGTACACGTTCGCCAACGGTG +TCGACGGCCAAATGCTTTGGGCAGCGTTTGCTTTTTTTATAATTAGTTTT +ATTTTATTAAAACAATAGCTCTAAAGTTTACAAGTCATTTGTTATAGGCT +AAATGAGTTATGTCTAATAAGTAATTTGAACTAGATACTTCCGTGTAAGT +GACAATGTATCGGAAAAGTCCTCAAAGTGCGATGTAGAAGTTCACATGTA +CTTTGTTTGGCATGTTAGTAAAAGAGCCAGTATGCTGATTCATTTTATAT +TCTATATACTCATGTAATATGCCCATGTAAGGTTTAATTCCAAAAATATG +AGCGTGTTCTATTTTATAATATTTTACTAAAATACCTTTCAGTTAATTGC +ACTCAAATTTGTTGTTCTTCATTCTCTCGTTATGATTTAATCTTATTGCG +TCAAGGTCATTATTTTAGGTCCATTAGTTATCGATCTGAAACATGTTGTT +GTATTTTTCTATTCTTGTGAGCTCAGGACACCTCATACAACTCCAGAGAA +AATGTGTCTCATTATTCTTGTCTTTTTTCAAGATCTAATCAATTTTCTAC +ATTAACGACGTTTTTGTCGTTCTGCTTCTTTTTTTCGTTCGTTTGTCTCG +TCCATCAGCTGTCCACTCATTTCTCTCCCACTCACTAGGCAGTGCTTTGT +TTGGTTCCGATTGGCAGCTGGCTGCAGGGCCTGCATCTCTTCTATGTCTC +TCATTTACTTGCATTCTTTTCTTCGTTAATTTTTGTTATGATATTTAAAC +GGGAAGAAGAGTTTGTGGTTCTTCTTTTTATAATCACTAAAACTTTTGGA +TAAGTAACAATTTTCTGATAAAAATATTTTCACGGCGAAGAAAAAAGAAA +AAGAAGAGTAGTTTTTGCACGTTTTCATATAATTATTTTCGTTGATCAAA +TGTTCTTCTGGAGTTTTCTAATAAATTTCTTATCGACTTTTTTTCAGAAA +TTTTTCTCAACTTGTCATGTCAATGGTAAGAAATGTATCAAATCAGAGCG +AAAAATTGGAAGTAAGTTCTTTATAATTTCATTTATATACTATAAGTTTT +CTCGATCACAGGAGAAACAAAAACAACAGACAACACAAAAAACAATAAAA +CAATATTGCTCTAGTAATCAATAGTGTTGTAAAGAGGGAAGAAAATTGTT +ATCTGTGTAGCAGTCAACGTTGATTGAGATGTTGTGTTTGACTATAGAGT +TGAAAATAATAACTTCAAACTTGCAAGTCATGACTTATCAAACACTGCCG +GAACTTATTCTGGATCAAAGGAAAGTTGTCCAACTGTAGAGTCATGTTTT +TCAAAAGAAAACACAATTTTTAAGTATAAATATTTTGAAAAAGTATGTTT +TAGAAGTATGTCAAATTAAAAAAAAAATCCTTGGTTAAAAAATGATTTTT +TTGGATATATGTGTATTTTTAACTAAAAATATATACTTTACATATATATT +TTGGCGCAGTTATTTGATCTATAAATCAAACTTTTTGATAGACATTTTTT +TATATTTACAACAACTAGGGTTGTTATGAAAACGCCTATTATTCTACAAA +CTAAATTATTTTAATCATACATTCCCCACTATCTAAAAACTAATGCAATT +TTCAGATTTTGTCATGTAAATGGGTAGGATGTCTCAAATCAACAGAAGTG +TTCAAAACGGTTGAAAAGTTATTAGATCATGTTACGGCTGATCATATTCC +AGAAGTTATTGTAAACGATGACGGGTCGGAGGAAGTCGTTTGTCAGTGGG +ATTGCTGCGAAATGGGTGCCAGTCGTGGAAATCTTCAAAAAAAGGTATTT +TTAATTTAATGTGCATTTTATAATATAAATTCTTCAGAAAGAGTGGATGG +AGAATCACTTCAAAACACGTCATGTTCGCAAAGCAAAAATATTCAAATGC +TTAATTGAGGATTGCCCTGTGGTAAAGTCAAGTAGTCAGGAAATTGAAAC +CCATCTCAGAATAAGTCATCCAATAAATCCGAAAAAAGGTATTCACAATT +TGCATGATATTGTTATAATCTAATTTTCAGAGAGACTGAAAGAGTTTAAA +AGTTCTACCGACCACATCGAACCTACTCAAGCTAATAGAGTATGGACAAT +TGTGAACGGAGAGGTTCAATGGAAGACTCCACCGCGGTAAGTGTGTTTCT +TTAAAAATTACTTCCTTTTTTCAATTGTTTGAAATTAACAAGAAACCTGT +TGGAGCGTATTTCTGAACTTTTAAATCGAAAATATCATTTGCAAAAAAAC +TTGAAAATTGAGAAACTTTTTTAAAAGTGGAGTAGCGTCTGCGGGTTTTT +TTGCCCTAAATGACAGAATACATACCCAATATACCGAATATAACCGTAAT +AAAATTATGCGATTTTTATTTTTATTTTTCATGAATGTTAGGGGCAAAAA +ACCCACATGCGCTACTCCGCCTTAAGAAGAATCAGCTGTGAGCACTATCC +ACTATACATTGGAAATTTACAAATAAAATAGAGATTAAGTAATATAATTT +TTAAGGGTTAAAAAAAAGACTGTGATATACTATGATGATGGGCCGAGGTA +TGTATTTCCAACGGGATGTGCGAGATGCAACTATGATAGTGACGAATCAG +AACTGGAATCAGATGAGTTTTGGTCAGCCACAGAGATGTCAGATAATGAA +GAGTACGTTGTTTTGCAAATTGATTAAAAGTGGAGTAGCGTCAGTTAAAA +ACTCTAACATGTCTTAGGTTTTTCAAAAGTTTGGTCAAAGTTTTGGCAAA +CTGCCAACTTCTTGAAAACTTCGTTAAAAAAATTCTTGAAATGATTTGAA +AATTTGTATTATGTTATTCTCTTATTTCTGCACTATTCTATATGGCGCTA +CTATACTTTTAATTGATTTCTTGAAAGCAGTTCAATAATAATTAATTTTA +GAGTATATGTGAACTTCCGTGGAATGAACTGTATCTCAACAGGAAAGTCG +GCCAGTATGGTCCCGAGCAAACGAAGAAATTGGCCAAAAAGAGTGAAGAA +AAGGCTATCGACACAAAGAAACAATCAGAAAACTATTCGACCACCAGAGC +TGAATAAAAATAATATAGAGATAAAAGATATGAAGTAAGTCGAAATTGAC +AAACAGTGGTTTTTGTTTAAGTTTATTGCGAAATATTCAAAATTAGACAT +GTTAAAATTTTGCGAGATAATCTAAAGATTAGGTATACAGATTTTTTCAT +GTAAAGTTACATTCATCAAAATTTTTGTGTTCACCAAATTAGACAAAAAA +TGTTAGTTACACAGTATATTTATTTTTTATATCAATAAAACCTTTTTCAG +CTCAAATAACCTTGAAGAACGCAACAGAGAAGAATGCATTCAGCCTGTTT +CTGTTGAAAAGAACATCCTGCATTTTGAAAAATTCAAATCAAATCAAATT +TGCATTGTTCGGGAAAACAATAAATTTAGAGAAGGAACGAGAAGACGCAG +AAAGAATTCTGGTGAATCGGAAGACTTGAAAATTCATGAAAACTTTACTG +AAAAACGAAGACCCATTCGATCATGCAAACAAAATATAAGTTTCTATGAA +ATGGACGGGGATATAGAAGAATTTGAAGTGTTTTTCGATACTCCCACAAA +AAGCAAAAAAGTACTTCTGGATATCTACAGTGCGAAGAAAATGCCAAAAA +TTGAGGTTGAAGATTCATTAGTTAATAAGTTTCATTCAAAACGTCCATCA +AGAGCATGTCGAGTTCTTGGAAGTATGGAAGAAGTACCATTTGATGTGGA +AATAGGATATTGATTTTATAACGTGTAATTGAGTTTTGGCCAAAAAGGTA +TGGAAAGGTGGCTGTTTAGTTATATATTTTTCTATTATTTATTTGAAACA +TGCAAAATTGAAGTGAACAATAAGTGATGTTCATGGAAATTTAAACTGTT +TTATGATACTTTTTTGAGAAATTGAAAAATCTGTTCATTTTAGAAACAAT +GTCCACATGGTTCTAAGAGCTAAAATTTTTATTTTCATCCATTTAGAGTA +CTTTCTCTTTTAGAGTACGGCCCCAGAGCGATGTTAGAAACCTGAGATCG +GTCAACACAGACCGTTAATTTTGGGAAGTTGAGAAATTCGCTAGTTTCTG +>CHROMOSOME_III +CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC +TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA +AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAAAGCGCGAAATCCTATGAA +ATTTCAAGGATTTATAACTTTTTGAGTCCGAAAAGAGTCGGAAATCAGAT +AAAAATAAAATCGGGCTTTTCGACCAAAATTAGGCCTCGAAAAGTCTGAG +AAAGCACGGAATCTATGGATTCACGGCTTTTTGATTCCGAAAAGGTTCGG +AAAGAGAAGAAAATCGATAAGAATCGATAAATTTGCAGAAGTATTGCAAA +TAAATTCCGATTTTCACTAGAATTGCGTCATTTGGGCGCAGAAAATCGGG +CTTTTCGACCAAAATTAGGCCTAGAAAAGCATGAAAGGGGTCCAAATTCT +GAGATTTCGCAACAGTTTTTGGACAAGGTTATCCCGGAAATTAGCGGGAA +GCACTGAAGATCGAAAAACTGAAGATCTTGTCGTTGAGAATCGTTCAACA +GCGTCAGTTACTACTGAAAAACTGATTTTTGAGCAGAAAATCTGCCGAAA +TGTCCGAAATCGGACGAAAATCAGTGGTTTTTTCGCCAGGAACTCGTAAA +CGAGCGGGAAAAAACACTAAAATAAAATACATAATCGACGACGGCGTCGA +AAAATGGACTTTGGAGGCGATTTTGAGATAAAAATCGCCTCTCTGAGACC +GCGTCACCGCAACTCGTGCCTCCATCTGACTCAGGATCCGAATCCGGCTA +ACGAGTATACTCGAGCCGGTTCGGTCGATGCACCAATGTTGACACCCATG +TTCTTCGGGGAGTCAGACCCGATAGGTAACAAACGTGCGGCTTACTTAAT +GACGCGCGGAGGATACGGAGCTAGTACGAGCGGTGCGACTGATGATGCAT +AGGGAATAACAGTACACAAGCTTGAATTAATCCAAGTTTATTATCGAATA +AAAGGATTAATCAAAGAAAAAAGGAGGAAAGGGCGAGCTGCATTGGGAGA +CGACCTTCCTCGACGAGAGTTGTTGAAGAATTGAATGGATCGTAGGCCAG +AGGCCTATAATAATTATTGTGTGGATTTACGGGCAACGACACTCCGGAGT +AACGCTAGTTAGCGAAGGAGCGCGGTTGCATCTGAATAGCGCATATTGGC +ACGTAATTCAAATTAGAAAAATTACTAAAATGATTTAATTAATATTTTTA +ATAGAACGAATGGAATATAATCAGCAAATGGTACAAGCGGACGTCACAAC +ACATTGCTTAATAATAAATTTCCAACTGAAACCAATAGATTCAGCCTTGA +CACAACTGATCAGAAACTTGTTATACATGTTAAAATACTCGAATTCATAT +TCATCGCCACCAGAATAATTGGGTTTGAACACTCTTGCAACTTCAACTGA +GATGGTACCGTAGATCCAGAAGATGCATTGTTGGAATGTACTTCTTCCCA +AAAGGTTCTGAGATTTAATTGATTTTTAACCTGGGCCTAAAAGTCTCACT +TACGTACATTTCTAATATTTAAGGCATTTAGTGCTGAGAAGTATTCGACA +GTGGCCTCGATGTTTTCAAAGTGGAATAAATAGTGAATTGGAATAATGTT +GAAATAATGACCAAAAACTTTACATTCTTTTGATTTTTTCCACTGATCCA +TCCGAGTAAGCTGCTCAAAGTCATCGATTGTTTCAAGGCACCACAATTCA +ATACTTTTCAACTCTTGAGAATCGAAATAGGGCAGCGTAGATATAATATC +ATTGGGTGACACAAAAATAATCGATAATTCCTTCACATGAATGCATTGCT +TAGATCTGAGGATGTTCAGAAAATATGTGGTAACGTTAAGGTCTTGTTGA +GTTTGTACAACAAATTCGAATTTCCTGACACGTCCCAGTAATATTCTCAA +ATCGTTGAAAGCTAGTTGCATGAAGTTATCTCTTCTGATTACTCTCTTTC +TTCTGTGAGCCACAATCGAATTCCCGGCATTGGTGTAAATGATTTTCATT +TTATCAATCCGCATTTCAACTCGACCATTGCATACCCAAAACGAGATAGA +ATCAAAATTAATTCCATTTTGAACAACGGCAGCTCTCAGACTTTTACAAA +TTTTGAGGTGAGTGAGTCTGAAGAATAATAGCTTGGTTATGATTTTTCAA +AGTTAGGGTACCGAAATCTTGGAAATATTTTTAATTGACTCCAAATTTTC +CCCTGATTCCGAAAATCGATGTGAAAAAAATCAAAAAAAAAATTCCTGAT +TTTATATTTAAGCTTGAAATCACGCATTTCATTTGCGCCCCGGTCTCTTT +TTCAAATTCGCGCCCTAATAAATTTGCATTGGAGCGCACTTGCATCGTGT +CATTTTCTTCGTTTTTTTCATTTATTTTAGCAGAATTCAATGTTTTAAAC +CAGTTTTATTCATTTGTGGAGAGTTTTTATCGGTTTTTTTTTGACTAAAA +TGAAAAAAGGGTCCAAATGAAAATCGAGATGAGCTAAGTCAAGCTTAAAT +ATAAACCAGGGAAATTTTTTGATTTTTTTTTTCACTTTGATATTCGGAAT +CATGGGAAAGTTGGGAGTCAATTAAAAATATTTCCCAGATTTTGGTACCC +CACCTTTAAATCAAAATAAAATCAACTAACTGATCCACTGGCTTTAGCTT +TTCCAATACTTGACCAAGGATCTCTGTGGGCATATCGGAAAGAGTAGGGG +AAATGTGATCCTGGCTGTAACAATCAAATTGCCATTAACCCAGTATTTCA +ACAAAAAAAAACTCACAAATTCATATATCGCACAAAATTAATTGCCCACC +ATTTGTCAAGTTCAGCTCCACGTTCTTCAGTCAGAGCACGTCGGGCTTCT +CGGCTGATTGTTACCATTTTCAAAAGATATAATTTCTTTAACACAGAAGT +TAAATAAAATATGTCCGATAGCAAAAAGTAGATGTGAATGAATTGAAGAA +GAACTGAGAAATAGCTAAATATCTATGTCTTAATTTTCATAGAGTTTCAT +TGTCAGATACTACAATGTTACTATTGAGAAAAATGTCAAACTGATAAGAA +TAGCAGCCAGCTTAAAATGAAAATTTGATAAGAAATTGCTGTCTTAATAT +TAATTATATTGTCGTCAACAAACAGAATTTTCACTTGCTAGTTGTTAGTT +TAAATTATATTTTGATACCGAAAAGTGTTTGAAAATGGGGGGAAACTAGA +AAATAAAGTTTTCTACATATTCCATATTTTCAGAATTGATTTCAAGATAT +GTCGGAAACAAGCCAAGAAGCTCGACGTGCACTGACGGAAGAACGGGGAA +AGGTGCTTGACAAATGGTGGGCAATTAATAATTTGAGATTCATGAAATTG +TGAGTTTTTTGGAAATACTGGCTTATTGGCAATTTGATCGTTACAGTCAG +GATCCCATCTCCCCCACGCTTTCCGATATGCCCTCAGAGATTCTTGTTCA +AGTTTTTGAAAAATTTGATTCTATAGAACATTAAGTTGCTTTGACTTTTA +CAACAAAAATATTCGGTATAAAAATGTTCTGACTTATTTGTCGAAAAGTT +TGTCGAAGTCTGAAGCCCGCCGTGGAAAAAATTGAAATTCATTTTGATTC +TATTTCGTTTGAAGTATTTAATAAAAATGTTATTATGAAATTGGATGGAT +TAAAAATTCTCTACAATGATGTCGCAAATGGTGCTTCTATAGTGGCTTAC +AATAACAAGGAAGCAATGATCAAAGGAGAAAACTTCCTGAAACTAGCTTT +CAATGATTTGGGAATACTACTGGAACGTGTGAGGAAATTCAAATTTAAAA +TCTATGGAGACGATGTACATGCCATTGCCACATTTCTTTTAAATGCTCTC +AAAGCTGATCAGTGTGTTCACGTAAAGAAAGTAGTGCTTTTTTACGTGCC +ACTCATTGATATTTGCTCAATTCTCTCGTATTTTGATTCTGAAATGTTGG +AAAGAATTGACTTGCGTTGTAATGATACAAACGCTCATTTTGAACAACTT +GCTCAAAAAAAATTCCAACGAACAAAATGTGTAGAACTTTACAAACTTTT +TAATAAAGATTTCCAAATTTTTATACAGATTCACTAAATTTCCAGAGTTT +TCCCACAAACCGACACACAGACCGACCTGGTAGTGGAGAGGACAACATCT +TTATTCAATCCAGTCAGTGAAGTGGAATTAAACTGGGCTTCAAGGAAGAA +ATGGCACAACGGAAAAGGTCGGGAGAAAAACCAAAAGTCGGAAAAATATT +TTTTATTGAGATATAATTTTTTTTGGTAGAAACATTTTTCTTCGAATTTA +AATTATATTTCGCGCCAAAAATAAGATTAAAAACCAAAAAAAAATCCGAG +TGTGGAGAAGTATTTTCGAAAATATTCCTCTAATAATACCCTTTAACAAG +GGCTATAATAATGGAATTTCGGGTGCTTGAAAATGAGTTTTTTGAGATTA +GAAGTATGAATTTTTGTGTCTGGTGTTAAGTTATTTAAGTTATATTGAAT +GGAAATTTTAAAAAAATTCTGAAAAAATATTTATAATTGTTATTTGAAGC +TACTATTTCACCTGGAAACTTCTAGAATTTCCAATTTTTTTAAAAATTTT +TTAAAAATTTTTTTATTTTCAATATTTTTCACATATGATACAATTTATTA +TGAGATTTTCGGAATTTTTTAAGTGAAGTTTTTCCCTAAAGTTTTCAAAA +TTGACTCGAATTTTCTGGAATTTTTTTTTTCAAAAGAAACACAGCTTTTG +TTTAATTTCCAGTTAATTTTTAGCTTTAACATTAAAAAAAATTTCTTTCA +TTTTTAACAAAAATTCCCCTTCTTATTTTAGAATTCAAAGCCAAAAACCT +TCAAATTCAATTTTATCTAAAACCTCCTCAAATTCGAAAAAGTGTGAGAG +AGATACGCGAGCTACGAAAAAGAGCAAAAATCTGAGTTTTTGAGCACTTC +>CHROMOSOME_IV +CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC +TAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA +AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG +AAGAGACCAAACCGAAAAATCAATTTTTTAAACGAAAAATCAATGTTTTA +AACGAAAAATCTATTTTTTAAACGAAAAATCAATTTTTTAAACGAAAAAT +CCATAAAAAATTGTTATAAAAATTCTGGAAAAAAAACTAAAAACTTAATT +TTTGCTTTGAACATTCATCTTTCGCGAAAAAAATTTCAGCAAATAAAACC +GAAAAATACAATTTTGCAAATAAAAAATCTAAATCAACAGTAAAAAACAA +AAAAGTCAAATTTCAATAGAAAAAAAAATCAAAAATCAATTTTTACTCCA +AAAATTCAACTTTTGCCAAAAACTAACCCAGAAAAATCAATTCACAAAAA +AAAACAAATAAAACTGCAACGATGTTTGTGAAAAAAATTTTCTTGATATT +TTTAACGAATTTTTGTTTAAAAAAAATGCATTTTTGGCCATAAAATTTTT +TTTTTGATTTTTTCAAACGAATTTCCCCTCAATTTTCTTGCAGATCGACG +TGGATATTGACGACCTAGATGACGATTTTCTCTCCACAAACGGAATGGAA +CGACACGGTTCAGAATTACAAGGAGAAGTGGTTTTCACTACGACGAGTAC +TGCGAATAACAATGTGAAACGAGCCGTCGAATATTTCGAAGATGATAATC +AAGATGATGCACTAACATCCACTTCATCCGGCAACAGTACACAGAAGGAA +TCGTCTCCGTTCACAGATTTTGATGATGTTCCGCCGCCACCAGTGGCTCC +AGAAACTCCGGCTCCAGCCCAAAACCGCCGAGAATCTGCGTCCCCCGAAC +GTCAATTCCTTGATGAGAGTCACCTCGGCGGAATTGGATCTCCGTTATCA +CAGAGTACAAGGCTCGATGAGACGTTTATTGAGGAATACTCTATTGAGTT +GGACACCTCTGGGAAGTATGTGTTGGGAAAAAAAATCCGAAAATTGATTG +AAAAATTCGAAAAAAAATCCAAAAAACCTTTTTTTAATGAGATTTTTTCT +GTGCAAAAATTTAAATTTAGAGCTGAAGAATGTGCACTTCTAGAAAAACT +TATTTTAAAAAATTCACATCCTTTTATCAAATTTATTAGCATATTTATAC +AAATTGATGGATAAAAATTCCAAAAAAGTTCCAGAAACAACATTTCCTCC +GCAGCTTCACCTGGTCCCAAGTCTCCATTTGACGATGATTTTACTGATAC +AGCTGCCCCGGTAGCCCCGCCACCAGCACCTACAAAAGCTGCCGAGGAAT +ATCGCCGGCAACCACATCAGAATCCGTTTGATGAGGAAGAGGAAGAAGAA +TCACAGTTTGGAGGGGGAACTCTGTCCGGTAGAGACCCATTTGATGAAGA +TGTACGTTTTTCGTAAAGTTCGGAATTTTTAGGGTATAAATTGAAAAACA +AATGGAGAAAATATGAATACCTAGATTTTTTTTTTCAAAAAATCCCAACC +CTAATATCCACAATTTTTTTTCAGTCTGGAAACTCTAACGAAAACCAGCT +CCGCGAGAAGAAGCTCCACAAAAAAGAGCAACTAGCTCACCGGCTCTCCT +CCTCTTCCGAAGAAATTGTGGAAGCCTCAATTCACGAGGATGAGCCAATT +GTGATGGCTCAAATTCCAGAGGAGAAGCCCAAACCGAAGGCTATCCCGGC +TTTTGATAACGCCTATGACGCCGACTTTGACAATTCCCCACCACTTCATC +ATTACTCGGCGGTTCATTTGGAAACTGGGCTCTCACCGCTTGAGGAGGCT +CAGAGAGCTCTCCGAGCCAACCGAGCAAGGCATAAGCCGTCGAATGTATC +GTTGGCAGAGGAGGCGAAGCTTGCTGCCAGACAGAGATACTCGAATGCAT +CGGATATTAGAAGAGAAGAGGAGGAAGAAGTGGTGGAAGAAGATCCAGCA +GTGGTAGTTCCAGTTCTACGGAAGGATCTGGAAGTTGAGGAGGCTCCGAA +ATCGGTACGACCGCCTAGATATCGGAAGTCAAGGGAAATTGAGGAACCGG +TGGTTGTTGATCGGTTTGTTGAGGAAGAAGTTGATGAGAAGGAGGATATT +GATGCGATTTTTGAGAAATATCGAAAGGTTCGTAAAGATATTTTCGAGAA +AAACGGCGTTGAAAATTTGAAAAATTTGAACTTTTTTCGAGTTAAAAAAT +TTAAACATATAGAAAAGATATGAATCTGACAAAATTTAGAGAAAATAAAA +TGTTATTCACCAAATCGCCAATTTTTCTGCATTTTTTTGTCCGTTTTACT +TTGGTAATTCTGGTAATACAAAACTAGTTTTTTTTCAAAAAAGAATTGCA +GGAAAAATATTTTAAATGTTTTTTGAGCACTTGAGCGCATGAGATAGAAA +AACGTTATTATTATGCATTCCCTCATATAAAAGTTTGTTCAATTTTTTTA +ATTAAACCCCGCTCTTTGAGAAAACTCTGGACTCTACATGGAGACTACCT +TAATACAAATTAACTCATAAATATCGGTGATTTTCAACTAGTTATTCATA +TGATGCAATTACTGATTTAATAACTTATAATTATGTGTCAATTTCAGACG +TCTGTATCCGCCGATCCAAAATCTCACACGCCGATTTTGATGGCCGACGA +GTACAAAGAACCACAGAAGCAAGGTGAGTTTAGTTCGAAAAAAAATCATA +ACACACACCAGTAGGTTTAAAAACATTGTTAGAAAATTTGAAAAAAAAAC +CAAAAACCGTCGTAAAACTACACTTTTTCTATTGAAAATAGAACAAACAA +ATCAAATATCCTACATGTAAAATTTAAATCGAGTGGAATTAAATTCTTCA +AACAAAAAGAAATCTGCATTTAATATCGATTTCTCATTCAATTGTTATGA +ATCATAAAACATGATTTTTTACGACCTCCTCTGAATTATCTACATTGTTC +TCTTTTTCATACTTTCCATATTCTAGTATCGATAGGACTAACACAATTTT +AATAGTGAGTTCGTTTTTCAAACAATTGGGATTAATTTTCAACTGAACTA +GACTGTTTCTGAACGCTCAAAACCTTCAAAATCAATGCTTCTTCAAAACG +AAAAAATTGCACACTATAAACCTCTCACTTGAAGTGGCAGCGCTCGTTTT +ATATTGAGAAAATATATATTCAAAAAAATGCTGCTGCTTCTTCAGCTTCT +CGCCGGGCGCCGCCGCCAATAGTGAGTTCGTTTTTCAAACAATTGGGATT +AATTACAAAAAAACTTAACTGCGCAATTTTTAATTGGGAAACTTTCAAAT +TTTAACCATAACTATCAATACAGCGGTATACGAAATTTTTCTTCAAAATC +GCACCGATACTGCATTTTTCCGGAAAAGGACAAAGTCGGCTAATTGCCGT +TTTTTATAATTTTTGCTCATTAAGCTTATTCCATTAAAAATACAATTCTT +GTTTAAAATTGCTGAACGGCGAATTTTGTCTACCCGTGGTGGAAATTGCC +GGTTTGTTGATTTGCCGGAAAATTTCATTTTCGGCGTGCCGATTTGCCGT +TTGCCGGATATCAATTTGCCTGAAACGTTTAGAGGAATACAACGGAAACA +TTTGAAACTGTGCCTTTTTGAAATTTTTTCCCTTTTTTTTACAATATTTT +CATAGAATTAGCTTACTTTTCAAAATAGATGTAGGAAAATTCATATGCGT +ACAATTTTGCCAATTAAAATTTGAATTCTGAAATTTACAAAAAAATGTGC +AAAACTACTATTCGCCGAAAATTTTTGGCAAGTTGCCGATTTGCCCGAAA +TTTTTATTTTCGGCAAATTGGCGCTTTGCAGTTTGCCGGAAAAAATTACA +TACACCTGTTTTCAGCCTAACTTAAAATATGATTTACTTTGAAAAATAGG +GAATTAAATTTAAATAAAATTTTCAACTGAACTAGACTGTTTCTGAACGC +TCAAAACCTTCAAAATCAATGCTTCTTCAAAACGAAAAAATTGCACACTA +TAAACCTCTCACTTGAAGTGGCAGCGCTCGTTTTATATTGAGAAAATATA +TATTCAAAAAAATGCTGCTGCTTCTTCAGCTTCTCGCCGGGCGCCGCCGC +CACATTTACCGGCCAACCCGTTCTCTGAGGTTGTCTTGTAAGGGCGTTTT +CATGTATATTCAGATTATTATGCATGAATTTCATTAATAACGGTATCCAC +CCACGTATATTTCTCTTCTTCTCTCTTTCTGTTGCGACGGGGGATGTCGA +TTGATTACGCCACGGGCGCTAGCCTATTCCGAATGCATTGAGAAATTTGA +GACCAGATTCTGGAGTTTTTGGAAGACGAATCGTCGGCAGCGGAGCTCGA +TTTTAATTTAATTTCGAGAGCACCCGCGTGCTCTCGCCTCGACACCCTCA +AACTTCTTCTTCTTCTTCTTTTCGTCGTCTTTCACTTTTTTCCTTTTTCC +TGTCCTAACCACGGCGTCTTCCTCCAATTTTCAGTTACCACGACGTCATT +AGCTATTTTTAGTCCGAGTGTGGAGCTTGAGAATTTGAGCAATTCTGATG +TGACATTCCTGTCGTTTTTAGCGCTAATTTTCCTAAAACCTTCAAAAATT +TAGAAATTCTCTATCGTCTTTCAAGAGCCTAAGTTTTTTTTTTCGAAATG +AGTTTTTTAAAATTGATCGAAATGTTTTTAAATACTGAATATTTGGGATT +CTAGATTTTTACTACAATATTTCCTTTTCAATTTTTAATTGGACAAAATT +TCAAAATGTTAAAAAAATTCTCCGAATTGAATGCTATTTTTCTAAAAAAT +TTTTTTAGTGCCTGCACCAGTTGTCGTTGCTCAGGAGTCGCCGATCTTAA +AACGAAGAAACTCTCTAGTACCATCAAGGATCAGTGGAAGGCAGTCGACG +>CHROMOSOME_V +GAATTCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT +AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA +GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCATAAGC +CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCATAAGCCTAAGCCT +AAGCCTAAGCCTAAGCCTAAGCCTAAGCATAAGCCTAAGCCTAAGCCTAA +GCCTAAGCATAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAACA +TAAGCCTAAGCCTAAGCCTAAGCCTAAAACATTTGGAAATTATTTTTGAT +CGTTTATATTCAAGTAGCAAGAAAGTGTCCTAGCGTTAAAAATAGGAAAT +TTTTCGCTCTTTCAGAACAGTATACTCATGTCTCTGTACCGACGATATTC +ATTTCAAAAATCGCAAAAAAAGTTTTTTTCAAAATACCACTACCATTAAA +ACATTTGGAAATTATTTTTGATTGTTTATATTCAAGTAGCAAGAAAGTCT +CCTAGCCTTAAAAATAGGAAATTTTTTGCTCTTTCAGACCAGTATACTCA +TGTCTCTGTACCGACGATATTCATTTCAAAAATCGCAAAAAAAGTTTTTT +TCAAAATACCACTACCATTAAAACATTTGGAAATTATTTTTGATTGTTTA +TATTCAAGTAGCAAGAAAGTCTCCTAGCCTAAAAATAGGAAATTTTTTGC +TCTTTCAGATCAGTATACTAAACATTCACGTGTGGATTAAAATTAAAAAG +GTAGTGTTAAATAATTAAAAATCATTTAGGGGGGAACGAAACCCATAAGT +ACTGTCATTTTTATCTTTTACCTTTTTTTTTACGTACGACGTTTTTCGCT +GTTTTCCCTGATACAAAATGCGTTGTTCTTGTTTATTTATGTTTTTCTTG +AAAATGTTATCAACACTGATAATCTGAAAAATTATAATTTAAAACTTAAA +CGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAA +AAAAAAGTTGACTTTGCACGCTATGGTTAAAAATGAATGAATTCTTGGTA +CATGGTTTTTTTTGTGATTTTTCGTTGAAAAATCCACACCGTGTCAGAGA +GAGCTGAAAATGTTCATTGAAAGCGCTAAAAAACATTTGAAAAAATGCCA +ATTTTTCCGTAAGATAATTCGATTTGAAAGTTTGAAAACTGAATAGTAAT +TGTGTACTTGCAAAGAACCTTGTTTGCTTTTTAGTTTATTTGATCTACCA +AAGATTGGGTTCACTGTCTGACCATTTGGGTCACTAATCTCATAAACCGT +CAAAACAATCCAACGTGGTACAAAGTACATCAAGTTTTTCACAAGTCCTA +TAGTTAGCTGCGCATCCCTAATAGGTATCCCCATTAATTTTTTTTTCAAG +AAGAATGGCAACACAAACACCCCTCTCTGATGACTCGTGTGTCAAAAACG +AGCTCACGGAACTCTCCTCTAGTGAAACGGTCTCGAGGATTGACGAGGCC +CCGAAATTTCATGTCAAAGCTGTCGTCGGGCGTGTTGTGATAAAACATTT +TGGTGGCCGGGATTGTGTTTGTGTTCAGTCAGAAAGACGCTATTAGGACC +TCCTCCCATCTTATTAGCTGTTTATCCTTCATCACAAGACATCTCAATAT +TGGTTCCATAGGTGTAGCTTCTTCTGCTTCTTCTTCTTCTTTTGTACCTA +CGTCATCATACATTGGTTACTGTCCTACCGAGGTTTGTTTCAAATTTGCA +CATTTAAAATGGTATACACCTTTAAAATAAACTTAGAAGTAAGGCCTTTG +GTCCCACTACAAAAAATTTACAGTAGTTTGGTTCAGACTTTTCTAAATTT +AATTAAGGTTATTTTTTATTGACAAAAAATATTTTAGTTCAAGAAATATG +TTATGTTCAGTGGGTTTCCTGTCATATACTTCTACTTACCGTATTTCCTC +TATTAGTCTTGCATGCAAGACTAATTTTCAATTGACCCGTAGGGGTGCAA +GACAAATAGGGGTGCAAGACTAATAGAGGCTGCAAGACTAATAGAGGAAA +TACGGTAGTTATACTGGCAAAAGCGATAGCATTTCGAGAGAAAAATATTC +TTAAATGTGCTCAATATGGCTGATACAGATAGGTTTTTCATTACAGACAG +GTTTTTCATTTCAATTTTAAAAAAATACCTGACAGTTTATCAAGAAAATC +CAGCACATTCCAGAGTTATTAGAGTATTTAGAAGTTTGCAAGTTCCCAAT +TTTGTCAAGGGATTTTTCAAAGTTTCTCAAACTTTCCAAAAAATGCAAAA +AAAAACATTCCAAAAGTACCAATTTTTCCAAAAAATTCCATAAATATATT +CAGTCCACCGAACGTTTTTAGATCTAACTGAGAGTTCTCCTGAAGTTTTT +CAGAGTTTGAAGGAATTTATAGAAGTTTCTGATGTTTTACAGGATTTTAC +TAAAGTTTCTCAAATTATCTATAAATGCCAAATCTTTTCAGAAAGTTTTC +AGAAAATTTCCGGAAAACCTTCAATTTTTCAAGTTTTCCAGAAGATTCTC +GATTTTTCCAAAATCGTTTCAGAATTTTCCTAATTATTCCCATATTTCCA +TCGACTTCTTATAGTTAATTTGATTTTCATCAAATTGATACAAATGAGAA +AATTTGAAATTTGATAACTCACGTTTCGATGCAAAAAGAGACGAAAGGCG +GCGTGATGCGGGCAGGATTTAGGCAGGCGGTAGGCCCTGAAACCGCGCCT +GCCTACCATTGAACCTCTAAGCTATTCAGTTGTATTTTTTGATATTCCAG +ATGCGTACCGACGTGGGTTCTATTCCATTAATGTGTATTCTGTCTACACT +AACGACTGGTGTGTTCGGATATCTTCCGACAGAAGATCTCACAAATTCGA +TAATTTCTGGAAACGGTGGTTATCCAGTTCCTCCGGACACTATTATACCG +GCCGATGGAGATGAGGAAGGTAAGTGGATCAGAAATTAATAATTATAAAT +TTTTTAGAGTAGGGTTTTCTTCTGAAATCTGATATTTTGTGAATTTTGCA +AAAAATTGGAAAAATCGACAACGAAATGCAAATTGTTTCGTTAAATGCAA +AATGGATGCGCACCTTTAAAGAGTAAAGTCTATGAAAATGCTTTTATTTG +GTATTACAGTTCTTTTCAAAGGCACACATATTTTTCCCATTTAACAAAAA +ATCGTCGTGACCTTATATCGTATTTTCGGCGCAAAAATCGGAACATTTCG +CGTCTGTATAATACCCCATTTAAAGGTTTACGACGCAATAATACAACACC +GGGTCTCGACGTGGCAAACGATTGTTTATTGATTTTCACGGGATTCTCGC +CTTTCTCATTGAATTTTCGCGCTCCATTGGCAATCGCCTGCCGGACAACG +CGTGGAAAAGTGTAGTGTACTCCACACGGACAAATCCATCGGTTTTACAA +CTAAAAACGAGCCGCGACGCGACACGCAACGCGCCGTAAACCTACACAAA +ATCTCGCAGACTCAAAATGGCCTAGTTCGGCAAACTCTGCCATTTCGATT +TATGAGGGAGGCCAGAATTCCGTGATTTTCCCATTTCTCACGGCCTCTCT +AAATTTCACGTTTTTCAAAATTCATCTTTTTCTCTCTGCTCTCTCACTCT +CTCACGTGAAATATTACTTTCTGAATAATAATTTGTGTTTAATTAAATCA +AATTTTATTGAAATAGAATTCATGATACACGATCTTGAGAAGAGCTCTGT +CCTCCAGGCTGCGAACTGGAGCGTAGAAAGTTTTCATATTGTTAAAAACT +GCAAAGAAATGTTCGATTCCTTGTTCTGACGTGTGATGCTTTCTCAAATA +TGGAATTAAATGACAGACAAGGAGATGAAGTTTCGGAGTGACTCATTATT +TCATCAGTTTCATTAGGAAAAACACTCTTTATTTATTCAAGGAACTGTTA +AAGCAAAACTTCAATTTTGTCAATCTCTTGGTCTAAATACGTTCTATCGT +TAGATAAATCTAACTATCTAACTGAAACGAGTACAACTATATTGTAACGC +AAAAAAGCTAAGCGCCATAAAACACAAGTAAAACAGGCTCCAGTAAGCCT +AAACTGTTTTTACTATAAAAAACCATTGTCATTCACCTACCTACCAAATG +CCAATATAATTGGTTGAAAAAAATTAGTTTAATCTCCTCGCAAGATTTTT +TTCTCCGAAATTTAAGATTGGGCGCTTAGTGATATCATAACTCTGCAGCC +ATCAGAAAAACTGAAATGTATTTAAACTGTAAGTTTTAATCGTTTTATGA +GGATTATTTCTTTAGTTGACAGTTTTTTGATAGCTTCTTTAGTTATTGAG +CTACGGGCTTCCAAACAAAACCAATCAAAAACCACTATAAACCACAATGT +CCTAAAAGTGAAATTACTCAGTCAATTTTTAACCAAATCACGGCATTAAT +GTCTTGATATCTTCATTGAACAATTTGTAAAAATCGTAAGGCGGTAGTTT +TTTGATAAATGTCATCGTTTTTTAGTAAAATTGCGTTAACCATTCAAAAA +TGGCCCATTTTTGGTGTTTTCAGGCCTAATTCTTGTTTTTGAAATTCCGG +AGCCAGTTTTTATCGAAAATTTTTGTGCAATGTCAAAAAATGACCGGAAA +AAAATTCTCTACAATCGTAAGGCGGTACTTTTTAAAATTTCGTTTCCATT +CTTCCCCAGGGATCCCGTAAAAAACCAATTTTTCGACTAGGTCTCACATT +TTGCCCTACAAGAGCATAGTTAATGTTATAAATTTGGATTTTTTGGTCGA +ATTTGATGAGGTAATATTCAGTTGTTTGTTTATTGTTGGAAAATCTGAAG +ATTTATCAGCGAATCGCCCGATTTCGCCAATTATGACCAAAAAACCCCTA +GTTTAGGATTTTCACGAATGGATGAGTTCATTGATACGCTTCCACGCTGT +>CHROMOSOME_X +CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT +AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA +GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC +CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT +AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA +GCCTAAGCCTAATCTGTGCTCCAAAGCCTTCGAACTGACGGACTTGTGTC +CCGAAGTCGAACTCTTGAAGTTGTCACCGAGTTGGCTAATAAGAAAGACG +CTACGTTGTCTTTTTAGTAAGTTTTCGTCGAAATTCTGGAAAAAAAATCT +GAAAAAAATGAGAAAAATTGAAGTTTCTTGTGAAAACGTGCAGCAGCCGA +CAACTAACGGGACTGGGAAAATTGAAATTTTTAACACAGTTAAAATGCGA +TGTGCGCATAGTGGGTGAAAGGCCATAATGCGCGCACCGCGTGTGGCGAC +CACCTATTATGCACCTATCGCGAACGGTTCGCGATGGGTCCATAATGTAT +CCATGATGCTTCCACTCACGAAACAAACAATTCGAAATATATATTGTTTT +TCCGCCTATTTTTTTAACATTTTATTGCAATTTTTATTGAATTTTTGATT +TTCACTGGTTTTTTTTCTTGAAATTTTGTTTCGTATGTGGTATTTTGTAA +TAATCATAATATTTTTTTCTAAATTGTGTGTCTTTTAGAGAAAACACTGC +TCTAACCAGTTGAGGCATTTAAAAACATCATTTAAACAACATGGCAAGCA +TTATTTCGTTCCTAATAAATGGAAGCATAGTCAGGATTACTGTAGTTTCA +CATGTATTTAGCCATTAAAATTGTAATCTGAGTTTTCATCGGTATTTGCG +CATACTCAGAAAATTTTGTTAAATATCTTGTCTGTTCCTGTATTTTCAAG +AGCGAAATATCGTCAAAATCAGATTATTTCGTTGAATCTGGAAATGCCCA +CTGTTCTTCATGCCTACTCGTGTCTATTTCTCATCAGCGTTATAACTCAC +CTTTTAAATAATATTGTAGAAACAAATTTTTTGCAGCTTCTCTTCGAGCA +GCAGACTTCACAAAAACTTCATCAACAAGAAAAAACGTGAAAATCTTCAA +CATCGGAATGTCTAGGTGAAGGATGTCGGAACGGGCAAGGTTTGGAGCAC +GGCTCAGCTTTTTTAGATTTTAGATTTTAATTTAGAAACACACCACCGAA +GAGGAGGAGATAAAGATGGCTCGGCTCCAGAATGCATTGAAAGTTGTTGG +ATGGTCGAATGGATTGAAAGCAATTGTAAAGAAGGAGTTACCCGGAAGCC +CTGCCAAAAAGCAAAAACCGGCATCGGTTGCGCCGAAAATTTTGGACTCC +TCGATCGGAAGAGTGAGTTTGCTGATTTTCAAAAAATAGGATGAAAGGCT +GAATAATTTGAGCTCTCAATTGCTAAAAATATACGGATGGACAAAACTAC +GATCATCATTGGTCACAAAGTTGCAGCTCCGAAAAGAGAATTCGTCTGGA +ATTCTGGAAGTTCACAAGAATGGTGAGTACTTACCTTTCAAGTGAAAATT +GAGGAAAAATTGAGAAAGAAATGTGTTGAAAGATGCAAAAAAATGACAAA +AATTCAGCTTATCCTGCTAAAATTACGCGTGAAAAAGCATATTTTGTGCT +TGAAAATCGCAAAAAAAATGCATTAGAAAAACCGATTTTATTTAGTCCTC +CTAACAAATCGATTTTTGTAGACCTGGAGTCATCCTAGCAAACGAGGCAA +CACTGGAAGAGGACGAAAAACGTTAAGATTCCGTATGAAGATGTCAGAAA +ACCGAGTTAGGAGGAACTTGGAATTAAATTTTTGAGTTTTCTTTTTTCTA +TAATTTTTTAAGCTCACTTTTTATTCTTTTTGTTGTTCTCCCACTTAGGC +CATGTTAATAAATGTGTTTTTGTTAAGAAAACTGTCTTAAATGCTCAGGA +TCAGGTTTTATTATTAAAAAAACAAACCCACAAAGCGCACATAATTTATG +TTCGCGATGCTCGCAAAATGCATCTCAAGTTCGCGATGGACGCACAATGC +GCGCGCAATGGGCATCATTTCCGGTTCCTTTTTAACACACGCGCGATCCA +CTCCGAAAGCACTATGCGCACATCGCATTTTAACTGTGAATGAAAATTTG +AAAAAAAAAACTAAATTTGAGAATTTTTGAGTTAAAAAATTCCAAATTGC +AGAGAAATAAAAAATAAAATTTTTAGAGAATATATCGAAAGAAATTCAAA +TTTTTAAGAAAAAATCCCAAAAATAAATCATTTTTCTAGAAAAAAAAAGC +AAAAAGTTCCCATTTTGCGATAAAAATCGAAAAAATCCCGATTTTTTTAC +AATTTTCTAAAAAAAAACTGAAAAAAATCCACATTTTTCAGAAAAAAACG +CTAAAATTTTAATTTTTTCTAGAAAAAAACAAAATTTTCAATAAAAAATC +CAAAAATAAAACCAAAATTAAAAAATTATAATAATTACGGGTAAAATCGT +ATGAAATCCCGTTTTTTTTTTAATCGTAATTATTTTCAATTTTTTGGTTT +TCTCCTCAAAAAAAATTGTTTTTTTTCCCATTTTTCTTTAAAAATTGAAA +GTTTTCCAATTTTTCTCGAAATATTGACTCAAAATTAGCTTTTTCTTCGA +ATTTTTAATAAAAATTTTCACTTTCGCGACCCGTTAGTTGTCGGCTGCAC +GGTTTTCTGAAAATTGAGAAATAAAATTAGAAAAGTTGCAAAATTGAAAA +AAAAAACCGCAAAAAACTGAAATTTTCTAGGCAAAAAAAAATAAAAGAAA +TCCACAAATTTTTCGATAAAAATTGAAAAAATCCAAATTTTCCTGAAAAA +AATCCCAAAAATCCAATTTTTCTACAAAAAAAACCCAAAAATCCACTCCT +TTTCGATAAAAATTTGAAAAAATTTCGATTTTTCAAGAGAAAATTCCGAT +TTTCTAGAAAAAAAACCCCAAAAAACTATAATTTTCCTTTAAAAAAAACC +CCAAAAATCCAAATTTTTGTACAAAAAATACCAAAAAATCCACATTTTTC +AATAAAAATTAGAAAAAAAAAACAATTTTTGCTGAAAAAAAACTCAAAAA +TCCAATTTTTCTACAAAAAATACAAAAAAAATCACGATTTTCCTGAAAAA +AACCCAAGAAACTATAATTTTCCTGAAAAAAAAATCCAAAAAAAAACCGG +GAAAAAAACAAAAGAAATCCAAATTTTTCGATAAAAAAATTGAAAAAATT +CCGATTTTTCAGGAATAAATTCGGGAAAAAATTCAATTTTTCTAGTAAAA +AACCCAAAAAAAACCCAATTTTTTTGCAGTTTCCGTCACTTCTTCTACCA +CCGTTTCCACAGTAATCTCATCGAAGGCTCAGGTCGCCTACTCGACAAAA +TTAATGATACTACTGCTGCCCAACAACAACTACAAACTACAAACGAACTC +GACGGCGATGGCCCAGAAATTGCAATCGCCGCGGCCAAAGCATTCCATCC +GACTGAAAAATTCGATTTGGAAATGAATGTGGACGACGATGAAATTCTGA +AAGTATTTGATATACAAGAACAAGAGCAAATTGTGATGAAAAAAATCCGA +CATTTGGAGAATTCTGACTTACATTTCGAGCGAATTCATGTGAATTTGGC +GAAAAAAGAGCGATTTTTTGACGACGTTCAGAGCTCGGAAAATGACTCAA +GCGACGAAAAACTCGCCGAAATTTGCGATTTTTTGAGCCAAAATTGCCGG +GAAATCTCGAATGCTGCGAAGCCTCCTCGAGCAACACTTGACGAATGGAT +TCAGACTGGAAAATTCGAAATACAACGTCGTCGAGAGATCCGGGCGGTTT +GCCACGTCATCAAGGCCATCGGTCAGATGGGAATTACACTTGGCGAGCTG +TGTGGAAAATTGAAAATTGCCATAGAGCAAATTGCCGGAATTTTGGCCGA +TTTGAGCGGAGACAAGAAATGGTGTCCGAGACCTTGGATTGCGCCGGAAG +GTTTTCTTTTTTTTTTAAGGGATTTTTGGGGCGAAAAATTTGGATTTGGC +TCACCACGCGCGAGCTTTGATTTAAAGGTGTTTTCGCGGTGAGACCCGGA +AATTTTATTTTTCCTAAATCTCTCTGAACCCGGTCATGATTATACTTAAA +TGAAAGCTCACGGCGAGCTGAATTCGAATACTAAGTGTATGGTTGTTTAG +CTACAGTAACCTGACACTGAATTTCGCGGCGAGACCCGAAAATCCCCAAA +AACTCTCAAAAACCGTCTTAGAATCGATCAAAATATGTCAAGATTATACT +TAAAGGAGGACTAACGGTTCGGACGATTTTGAACGTATAGACCAAAAATA +AGCTCAAATGAACGAAATTCGTAATGAAACTGCTCAAAAATTTTTCAAAA +ATTTTTTATGGCGGTTCAAAATTTTGAAAAAAATTACACTGATTTTGGCT +AAAATCACGATTTTTTCTCATTTTTCCGTGTCACATTTGTCCGAAGTTGG +TTTTTTTAGAATTATCGTCCTTTATTACATATTTTGGTAATAAATCTCAT +TTAATTTCGTCGATTAAAGTGCATTTAAAGCCGATAAATAACCAGTTTCG +ATGATTTTTGGTTACCTATCGGCTTTAAATGTACCTTAATCGATGAAATT +AAATGAGATAATCTACCAATATATGTAATAAAGGACGATACTTCCAAAAA +AGCCAACTTCGGTCAGATGTGACACGGAAAAATGGGAAAAATTCGTGATT +TTAGCCAAAATCAGTGTAATTTTTTTCAAAATTTTGAACCGCCATAAAAG +ATTTTTGAAAAATTTTTGAGCAGTTTCATTACGAAATTCATTTAAGTATA +ATCATGAGCGGGTTCAGAGAGATTTAGGAAAAATAAAATTTCCGGGTCTC +>CHROMOSOME_MtDNA +CAGTAAATAGTTTAATAAAAATATAGCATTTGGGTTGCTAAGATATTATT +ACTGATAGAATTTTTAGTTTAATTTAGAATGTATCACTTACAATGATGGG +GTTTAAAATTCTATAGTAAAAGTGTTTTTTGTTTTAGCTGTTTTAAGTAG +GATTATTAGATATATTAATATTGACCCTATAAAAAGAAGTTTTTTTCTTA +TCTTTTCACTACTTTTTAGTATACCAGTTATTTCAATGAGAATACATATT +TGGTTTTCTTACTTTATTTGTTTATTATTTTTAAGTGGTATTTTTGTTAT +TTTGGTATATTTTTCTAGTTTATCTAAAATTAATGTAGTGAAAAGTTATA +TAGCTGTGTTTTTACTTTTGTTAAGAATGTTATATTTTTCTCCCACAGTA +TTAACTTATAGAAGATATTTAGGTTTAAGAGGTTTTTATTATAGTATTTA +CTGGTTTATTTTTTGTTTTATTTTAGTATGTTTATTATTTTTTATAAATT +TTAGTAGTTATTTTTTAAATTTTTCAGGTGCTTTACGTAAAGTTTAAAAT +TATGTTTTTATTTGTTAGATTATTTATATTTATTTTTAAATGACAACGTT +TAATTTTTATTCTAATTTCTTTAGAATTTATAATGTTGAGATTATTTTTA +AAATTTTCTTATGTTTTAGGGGAAATAATGTTTTTTTATTTTATGTGTTT +TTCTGTTATTTCAAGAATCCTGGGTATGGTAGTTATAGTAGGTAATATAA +AATTTTTTGGTAGTGATAATTGTATTTTTTAGTAACAGATATAAGTTAAG +TTTAAACTATTGATCTTCAAAATCAAAAATTTATTTCTGTAGAGATAATA +GTATAAATAAGTATGTTTCTTTTTCGCAGAAATGGTTTTTTATCTTATAA +AGTTTTCTTTCAGGGAATTAAAATTTGATCATGGTTTAAGATGATTTAAA +ATGGTATTATCTAAATTTGATTTACAGAGTAGGCAATAAAAATTTACCTC +GGCAATTTATCGCTTGTAAAATACTTGTTCCAGAATAATCGGCTAGACTT +GTTAAAGCTTGTACTTTAATTGATGTTAATTATGAAATTATTATATTTTC +TTTTAGATCTATGGTAGAATTTGGATTTATATTAGTGAATTTTCATAATT +TTAAGATTTGTTGAACAAAGCAGATTAGTACCTGGTTAGACAAAAATTAA +AAGAGCAGGAGTAAAGTTGTATTTAAACTGAAAAGATATTGGCAGACATT +CTAAATTATCTTTGGAGGCTGAGTAGTAACTGAGAACCCTCATTAACTAC +TTAATTTTTTGACTCGTGTATGATCGTTTATTTTATTCTTAAGGATTATA +ATAAAAAATTTTTAATTTATTAAAATAGATATATACCCGGTTTATGATTT +AAGAAACATTTGGCCTACAATATTTTATATTATGGATTTTAGTTTTAGTT +AACTAAATGAAATTGTAAAAGACAGTAAAAAATTCTTAATGTATTTTTGA +AGATTATCTAGAAGTGGTACAAATCATCCATCAATTGCCCAAAGGGGAGT +AAGTTGTAGTAAAGTAGATTTAGGGGAACCTGAATCTAGTAATAAAACTA +TTTTTAAATATGTTTTGAAAACATGTTTTGAGGTAACTCGTAGTTTTTAA +GAGTTAGTTTAATATAGAATTGTTGACTGTTAATCAAAAGGTGTACCTCT +TAATATAAGAGTTTAGTTTAAGTTAAAACGTTAGATTGTAAATCTAAAGA +TTATTGCTCTTGATAATTTTAGTTTTACTTATAGTTATTTTAATGATGAT +TTTTATTGTTCAAAGAATCGCTTTTATTACTCTATATGAGCGTCATTTAT +TGGGAAGAAGACAAAATCGTCTAGGGCCCACCAAGGTTACATTTATGGGA +TTAGCACAAGCTTTATTGGATGGGGTTAAACTTTTAAAAAAAGAACAAAT +AACACCCTTAAATTCCTCTGAAGTATCATTTTTACTTGTACCAGGAATTT +CTTTTGTTGTAATATATTTAGAATGATTTACGTTACCATATTTTTTTGAT +TTTATTAGTTTTGAGTATTCAGTTTTATTTTTTTTATGTTTAATTGGATT +TTCTGTTTATACAACTTTAATTAGCGGTATCGTAAGAAAATCAAAATATG +GTATAATTGGGGCCATCCGTGCTAGAAGACAAAGAATTTCTTATGAAATT +GCTTTTTCTTTATATGTTTTGTGTATTATTATTCATAATAATGTTTTTAA +TTTTGTTTCAAAATTTAATTTGAGACTTTTAATTATTTACATCCCATTTT +TAATTATAGTAATTGCTGAACTTAACCGGGCGCCATTTGATTTTTCTGAA +GGTGAAAGGGAGTTAGTTAGAGGATTTAATGTGGAGTTTGCCAGAGTAGC +TTTTGTTTTATTATTTTTAAGGGAATATGGAAGATTAATTTTTTTTAGGG +TACTTTCTTCTGCTATATTTTTTAAATTTTCAATTTTTATAGCATTTAGT +ATTTTTTCATTATTAATTTTTATTCGTAGTTCATACCCTCGTTATCGTTA +TGATTTAATAATAAGTTTATTTTGATTTAAACTTTTACCAATCTCTTTAA +TTATATTGTGTTTTTACGCAGTTATTTTTTATTATTAATCAAGTTTATTT +TTTAGACATTTTTATATTTGTTTTTGTTTTACAATTTTTGTTTTATTTTA +AAGAAAGTATATTAAATACTTTAGTGAAAAAATTTCTTAATAGGTTAGTA +GGAGTATTTAGATATACAAATACTTTACCATTAAGGTCAGTAATTTCTAT +TTTTACTTTTATTGTTCTTTTAACTTGTTGTTTTGGAGGTTATTTTACTT +ACTCTTTTTGTCCTTGTGGAATGGTTGAATTTACTTTTGTTTATGCTGCT +GTAGCGTGATTAAGTACTTTGTTAACTTTTATTTCAAGAGAAAAATTTTC +AGTTTATATAAGAAAACCAGGAGACACATATTTGAAAACTCTTAGAATGC +TATTAATTGAAATCGTTAGAGAATTTTCTCGTCCACTTGCTTTAACAGTG +CGTTTAACAGTTAATATTACTGTTGGTCATTTAGTTAGAATAATGCTTTA +TCAAGGATTAGAATTAAGAATAGGTGATCAGTATATTTGATTATCAATTT +TAGCCATTATAATAGAATGTTTTGTTTTCTTCATTCAAAGTTATATTTTC +TCTCGTTTAATTTTTTTATATCTTAATGAGTAATAAAAAAAAAAAGATGT +TAACTTAAGTTTTAAAGTGCCAAACTTTTAATTTGGAAATGGTGGACCAC +ATCTTAGTTGATATAGCATAAGAAGTGCATTTGTTTTAAGCGCAAAAGAT +ATCCGTCAACTAACGAGTTCATAAAGCAAGTCTTCTAAATTTGTTCTAGG +TTAAATCCTGCTCGTTTTTGATTGTTTTTATTTCTTTATTTACCTTGTTT +TTAACATTATTAAGAATTTTGACTAATAACGTTATTGTTTGATGAAGAAT +TTTTTTATTGATAACTGTAGTTTTTATTCTATTAAATAAAAGCAGCAAGA +GATATACCAGAATTTTTAATTATTTTGTTATTCAAGAGTCTTTAGGTTTA +TTATTTCTTCTTTGTAGAGGAGGTCTATTACAATTTTTTATTATTTTATT +GAAAATTGGTGTAGCACCGCTCCACTTTTGAATTTTTAATGTAACAAATA +ACATTTTTAATTATGGGCTAATGTGGTTTTTAACATTTCAAAAATTACCA +TTTTTAACTATTTTATTACAAATTTTTTGGTTAAGATCCGTGTATATTTT +GTTATTTGGTTTATTGATTTGTTATGTTCAAATTTTTGTCATAAAAAGTT +ATAAAAATTTGTTAATTATTTCATCCACAGAGTCTTTTAATTGGATTGTT +TTGGGAGTATTTTTTTCAATGTTTAATACATTTTATTTATTTATTTATTA +CTTTGTATTAATAGTTTTATTAATTTCTAAGTTTTCTAAAACTAGGGGTT +ATAATTTTATTAATTGAGAAACAACATTAGTATTTTTAAATATTCCATTT +AGAGTTTCATTTTTTGTAAAAATTTTCTCATTGAGGGAAATTTTTAAATA +TGATAGATTCTTTACTCTATTTTTGCTTTTTACAATATTTTTATCTGTAT +TGGCATTTAGATTTTGATTAATTAACTTGAGAATGAAAAATAATGAAGAA +ACTTCAAATAATAATAAAATAAATTATTTTATTATTTTTCCGTTAATAGT +TATTTCTATTATTTAATTACTTTTCTAGTAAAATATATTATATTATCTTG +ATAAGGTAAAGTTCCAGTTGGGAGAAGTAAGATGTAAAATAGATATTACT +ATGTTTGGTTACGGTCCAAAAAGATGCACATCTTTGCGATCTAGTTTAGA +AAAAATATTTGTTTTTGGTGCAAAAGAGTTTGATTGCATTTAGTTTACTC +TTTTAGTTTATAATTAAAATATGGCCCTGAAGAGGCTAAGAATATTAGGA +GTATTGAAAATTAATAATAGATTATTAAATTTTGTTAATGGGATGTTGGT +GACATTGCCATCTAGAAAAACTTTAACATTAAGATGAAATTTTGGTAGTA +TATTGGGTATAGTTTTAATCTTTCAGATTTTAACAGGTACATTTTTAGCA +TTTTATTATACGCCCGATAGGTTAATAGCATTTTCAACAGTGCAGTATAT +TATGTATGAGGTAAATTTTGGATGAGTATTTCGAATTTTTCATTTTAATG +GGGCCAGGTTATTTTTTATTTTTTTGTATTTACATATTTTTAAAGGGTTA +TTTTTTATAAGATATCGTTTAAAAAAAGTATGAATGTCTGGTTTAACAAT +TTATTTATTAGTAATAATAGAAGCTTTTATAGGTTATGTTTTAGTTTGAG +CTCAAATAAGATTTTGAGCAGCAGTAGTTATTACTAGACTTTTAAGAGTT +ATTCCAATTTGAGGGCCAACTATTGTTACTTGAATTTGAAGAGGTTTTGG diff --git a/src/htslib-1.21/test/ce.fa.fai b/src/htslib-1.21/test/ce.fa.fai new file mode 100644 index 0000000..2ad2e7f --- /dev/null +++ b/src/htslib-1.21/test/ce.fa.fai @@ -0,0 +1,7 @@ +CHROMOSOME_I 1009800 14 50 51 +CHROMOSOME_II 5000 1030025 50 51 +CHROMOSOME_III 5000 1035141 50 51 +CHROMOSOME_IV 5000 1040256 50 51 +CHROMOSOME_V 5000 1045370 50 51 +CHROMOSOME_X 5000 1050484 50 51 +CHROMOSOME_MtDNA 5000 1055602 50 51 diff --git a/src/htslib-1.21/test/colons.bam b/src/htslib-1.21/test/colons.bam new file mode 100644 index 0000000..53b6031 Binary files /dev/null and b/src/htslib-1.21/test/colons.bam differ diff --git a/src/htslib-1.21/test/colons.bam.bai b/src/htslib-1.21/test/colons.bam.bai new file mode 100644 index 0000000..71dbd1a Binary files /dev/null and b/src/htslib-1.21/test/colons.bam.bai differ diff --git a/src/htslib-1.21/test/compare_sam.pl b/src/htslib-1.21/test/compare_sam.pl new file mode 100755 index 0000000..499cb23 --- /dev/null +++ b/src/htslib-1.21/test/compare_sam.pl @@ -0,0 +1,214 @@ +#!/usr/bin/env perl +# +# Copyright (C) 2013-2018 Genome Research Ltd. +# +# Author: James Bonfield +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Compares two SAM files to report differences. +# Optionally can skip header or ignore specific types of diff. + +use strict; +use warnings; +use Getopt::Long; + +my %opts; +GetOptions(\%opts, 'noqual', 'noaux', 'notemplate', 'unknownrg', 'nomd', 'partialmd=i', 'template-1', 'noflag', 'Baux'); + +my ($fn1, $fn2) = @ARGV; +open(my $fd1, "<", $fn1) || die $!; +open(my $fd2, "<", $fn2) || die $!; + +# Headers +my ($c1,$c2)=(1,1); +my (@hd1, @hd2, $ln1, $ln2); +while (<$fd1>) { + if (/^@/) { + push(@hd1, $_); + } else { + $ln1 = $_; + last; + } + $c1++; +} + +while (<$fd2>) { + if (/^@/) { + push(@hd2, $_); + } else { + $ln2 = $_; + last; + } + $c2++; +} + +# FIXME: to do +#print "@hd1\n"; +#print "@hd2\n"; + +# Compare lines +while ($ln1 && $ln2) { + $ln1 =~ s/\015?\012/\n/; + $ln2 =~ s/\015?\012/\n/; + chomp($ln1); + chomp($ln2); + + # Java CRAM adds RG:Z:UNKNOWN when the read-group is absent + if (exists $opts{unknownrg}) { + $ln1 =~ s/\tRG:Z:UNKNOWN//; + $ln2 =~ s/\tRG:Z:UNKNOWN//; + } + + if (exists $opts{nomd}) { + $ln1 =~ s/\tMD:Z:[A-Z0-9^]*//; + $ln2 =~ s/\tMD:Z:[A-Z0-9^]*//; + $ln1 =~ s/\tNM:i:\d+//; + $ln2 =~ s/\tNM:i:\d+//; + } + + # Validate MD and NM only if partialmd & 'file' set, otherwise + # discard it. Ie: + # + # 1: if file 1 has NM/MD keep in file 2, otherwise discard from file2 + # 2: if file 2 has NM/MD keep in file 1, otherwise discard from file1 + # 3: if file 1 and file 2 both have NM/MD keep, otherwise discard. + if (exists $opts{partialmd}) { + if ($opts{partialmd} & 2) { + $ln1 =~ s/\tNM:i:\d+// unless ($ln2 =~ /\tNM:i:\d+/); + $ln1 =~ s/\tMD:Z:[A-Z0-9^]*// unless ($ln2 =~ /\tMD:Z:[A-Z0-9^]*/); + } + if ($opts{partialmd} & 1) { + $ln2 =~ s/\tNM:i:\d+// unless ($ln1 =~ /\tNM:i:\d+/); + $ln2 =~ s/\tMD:Z:[A-Z0-9^]*// unless ($ln1 =~ /\tMD:Z:[A-Z0-9^]*/); + } + } + + my @ln1 = split("\t", $ln1); + my @ln2 = split("\t", $ln2); + + # Fix BWA bug: unmapped data should have no alignments + if ($ln1[1] & 4) { $ln1[4] = 0; $ln1[5] = "*"; } + if ($ln2[1] & 4) { $ln2[4] = 0; $ln2[5] = "*"; } + + # Canonicalise floating point numbers + map {s/^(..):f:(.*)/{"$1:f:".($2+0)}/e} @ln1[11..$#ln1]; + map {s/^(..):f:(.*)/{"$1:f:".($2+0)}/e} @ln2[11..$#ln2]; + + + if (exists $opts{Baux}) { + # Turn ??:H: into ??:B:c, so we can compare + # Cramtools.jar vs htslib encodings. Probably doable with (un)pack + map {s/^(..):H:(.*)/{join(",", "$1:B:C", map {hex $_} $2=~m:..:g)}/e} @ln1[11..$#ln1]; + map {s/^(..):H:(.*)/{join(",", "$1:B:C", map {hex $_} $2=~m:..:g)}/e} @ln2[11..$#ln2]; + + # Canonicalise ??:B:? data series to be unsigned + map {s/^(..):B:c(,?)(.*)/{"$1:B:C$2".join(",",map {($_+256)&255} split(",",$3))}/e} @ln1[11..$#ln1]; + map {s/^(..):B:c(,?)(.*)/{"$1:B:C$2".join(",",map {($_+256)&255} split(",",$3))}/e} @ln2[11..$#ln2]; + + map {s/^(..):B:s(,?)(.*)/{"$1:B:S$2".join(",",map {($_+65536)&65535} split(",",$3))}/e} @ln1[11..$#ln1]; + map {s/^(..):B:s(,?)(.*)/{"$1:B:S$2".join(",",map {($_+65536)&65535} split(",",$3))}/e} @ln2[11..$#ln2]; + + map {s/^(..):B:i(,?)(.*)/{"$1:B:I$2".join(",",map {$_<0? ($_+4294967296) : $_} split(",",$3))}/e} @ln1[11..$#ln1]; + map {s/^(..):B:i(,?)(.*)/{"$1:B:I$2".join(",",map {$_<0? ($_+4294967296) : $_} split(",",$3))}/e} @ln2[11..$#ln2]; + } + + # Rationalise order of auxiliary fields + if (exists $opts{noaux}) { + @ln1 = @ln1[0..10]; + @ln2 = @ln2[0..10]; + } else { + #my @a=@ln1[11..$#ln1];print "<<<@a>>>\n"; + @ln1[11..$#ln1] = sort @ln1[11..$#ln1]; + @ln2[11..$#ln2] = sort @ln2[11..$#ln2]; + } + + if (exists $opts{noqual}) { + $ln1[10] = "*"; + $ln2[10] = "*"; + } + + if (exists $opts{notemplate}) { + @ln1[6..8] = qw/* 0 0/; + @ln2[6..8] = qw/* 0 0/; + } + + if (exists $opts{noflag}) { + $ln1[1] = 0; $ln2[1] = 0; + } + + if (exists $opts{'template-1'}) { + if (abs($ln1[8] - $ln2[8]) == 1) { + $ln1[8] = $ln2[8]; + } + } + + # Cram doesn't uppercase the reference + $ln1[9] = uc($ln1[9]); + $ln2[9] = uc($ln2[9]); + + # Cram will populate a sequence string that starts as "*" + $ln2[9] = "*" if ($ln1[9] eq "*"); + + # Fix 0 cigar fields + $ln1[5] =~ s/(\D|^)0\D/$1/g; + $ln1[5] =~ s/^$/*/g; + $ln2[5] =~ s/(\D|^)0\D/$1/g; + $ln2[5] =~ s/^$/*/g; + + # Fix 10M10M cigar to 20M + $ln1[5] =~ s/(\d+)(\D)(\d+)(\2)/$1+$3.$2/e; + $ln2[5] =~ s/(\d+)(\D)(\d+)(\2)/$1+$3.$2/e; + + if ("@ln1" ne "@ln2") { + print "Diff at lines $fn1:$c1, $fn2:$c2\n"; + my @s1 = split("","@ln1"); + my @s2 = split("","@ln2"); + my $ptr = ""; + for (my $i=0; $i < $#s1; $i++) { + if ($s1[$i] eq $s2[$i]) { + $ptr .= "-"; + } else { + last; + } + } + print "1\t@ln1\n2\t@ln2\n\t$ptr^\n\n"; + exit(1); + } + + $ln1 = <$fd1>; + $ln2 = <$fd2>; + + $c1++; $c2++; +} + +if (defined($ln1)) { + print "EOF on $fn1\n"; + exit(1); +} + +if (defined($ln2)) { + print "EOF on $fn2\n"; + exit(1); +} + +close($fd1); +close($fd2); + +exit(0); diff --git a/src/htslib-1.21/test/cross_validate.sh b/src/htslib-1.21/test/cross_validate.sh new file mode 100755 index 0000000..04704f6 --- /dev/null +++ b/src/htslib-1.21/test/cross_validate.sh @@ -0,0 +1,145 @@ +#!/bin/sh + +# Copyright (C) 2015, 2018 Genome Research Ltd. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +# +# ----------------------------------------------------------------------------- +# Author: James Bonfield. +# +# This cross validation script is designed to run the htslib test_view +# and cramtools.jar CRAM implementations to test compatibility between +# implementations. +# +# The test set may contain many dubious and ambiguous SAM cases, such as +# single base reads (is that quality "*" really meaning Q9 or no quality?). +# Some of these may fail one or the other implementations and be acceptable +# in the short-term, so to spot more important regressions we can tag +# specific cases as being known-pass or known-fail. +# ----------------------------------------------------------------------------- +# + +vers=3.0 + +cramtools_jar=$HOME/work/cram/cramtools/cramtools-$vers.jar + +test_view="./test_view -o VERSION=$vers" + +cramtools="/software/bin/java -Xmx4000m -jar $cramtools_jar" +cramtools="/software/bin/java -Xmx4000m -jar $cramtools_jar" + +run_out() { + out=$1; shift + echo "$@ > $out" + $@ > $out +} + +run() { + echo "$@" + $@ +} + + +sam_to_Ccram() { + run_out _tmp.cram $test_view -C -t $1 $2 + #run_out _tmp.cram $HOME/io_lib/trunk/build.seq3/progs/scramble -r $1 -O CRAM $2 + if [ $? != 0 ] + then + crash=`expr $crash + 1` + false + fi +} + +Ccram_to_sam() { + run_out _tmp.sam $test_view -i REFERENCE=$1 _tmp.cram + #run_out _tmp.sam $HOME/io_lib/trunk/build.seq3/progs/scramble -r $1 _tmp.cram + + if [ $? != 0 ] + then + crash=`expr $crash + 1` + false + fi +} + +sam_to_Jcram() { + run $cramtools cram -R $1 -I $2 -O _tmp.cram -n -Q --capture-all-tags + if [ $? != 0 ] + then + crash=`expr $crash + 1` + false + fi +} + +Jcram_to_sam() { + run $cramtools bam -R $1 -I _tmp.cram -O _tmp.sam + + if [ $? != 0 ] + then + crash=`expr $crash + 1` + false + fi +} + +compare_sam() { + #run ./compare_sam.pl $i _tmp.sam -nomd -notemplate -unknownrg -Baux + run ./compare_sam.pl $i _tmp.sam -nomd -Baux + if [ $? != 0 ] + then + fails=`expr $fails + 1` + false + fi +} + +trials=0 +fails=0 +crash=0 + +files=`ls -1 *#*.sam` + +# Restrict to known workers from SAM->CRAM->CRAM in cramtools +#files="auxf#values.sam c1#bounds.sam c1#noseq.sam c1#pad1.sam c1#pad2.sam c1#pad3.sam c1#unknown.sam ce#1.sam ce#2.sam ce#5b.sam ce#large_seq.sam ce#tag_depadded.sam ce#tag_padded.sam ce#unmap.sam ce#unmap1.sam ce#unmap2.sam xx#large_aux.sam xx#large_aux2.sam xx#pair.sam xx#rg.sam xx#unsorted.sam" + +for i in $files +do + r=`echo $i | sed 's/#.*/.fa/'` + echo "=== $i" + + # C to C + trials=`expr $trials + 1` + sam_to_Ccram $r $i && Ccram_to_sam $r && compare_sam $i _tmp.sam + + # Java to Java + trials=`expr $trials + 1` + sam_to_Jcram $r $i && Jcram_to_sam $r && compare_sam $i _tmp.sam + + # C to Java + trials=`expr $trials + 1` + sam_to_Ccram $r $i && Jcram_to_sam $r && compare_sam $i _tmp.sam + + # Java to C + trials=`expr $trials + 1` + sam_to_Jcram $r $i && Ccram_to_sam $r && compare_sam $i _tmp.sam +done + +# Overcounts failures as an early fail can lead to 1 or 2 more fails. +echo "" +echo ============ +echo No. tests: $trials +echo No. diffs: $fails +echo No. crash: $crash diff --git a/src/htslib-1.21/test/emptyfile b/src/htslib-1.21/test/emptyfile new file mode 100644 index 0000000..e69de29 diff --git a/src/htslib-1.21/test/faidx/ce.1.expected.fa b/src/htslib-1.21/test/faidx/ce.1.expected.fa new file mode 100644 index 0000000..d606105 --- /dev/null +++ b/src/htslib-1.21/test/faidx/ce.1.expected.fa @@ -0,0 +1,8 @@ +>CHROMOSOME_I:5001-5125 length: 125 +AACTGGTTCAAAAACAAAAATTTTTTAAACTGTACAAACTGTCCAAAAAT +TCGTCGTAAATCGACACACCCTTCTCATTTTTTCAAAATTTTAATTGTTT +TCGAATGTTTTTTTTGCAGAATAAT +>CHROMOSOME_X:101-225 length: 125 +GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC +CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT +AAGCCTAAGCCTAAGCCTAAGCCTA diff --git a/src/htslib-1.21/test/faidx/faidx.1.expected.fa b/src/htslib-1.21/test/faidx/faidx.1.expected.fa new file mode 100644 index 0000000..d14656e --- /dev/null +++ b/src/htslib-1.21/test/faidx/faidx.1.expected.fa @@ -0,0 +1,6 @@ +>trailingblank2:28-33 length: 6 +GGGCCC +>trailingblank3:4-5 length: 2 +TA +>bar:4-5 length: 2 +TA diff --git a/src/htslib-1.21/test/faidx/faidx.fa b/src/htslib-1.21/test/faidx/faidx.fa new file mode 100644 index 0000000..0c936ef --- /dev/null +++ b/src/htslib-1.21/test/faidx/faidx.fa @@ -0,0 +1,21 @@ +> +ATGC +>trailingblank1 +AAATTTGGGCCC +TTTGGGCCCAAA +GGGCCCAAA + +>trailingblank2 with last dna line the same length +AAATTTGGGCCCAAATTTGGGCCC +TTTGGGCCCAAATTTGGGCCCAAA +GGGCCCAAATTTGGGCCCAAATTT + +>trailingblank3 with cr-lf style line endings +ACGT +A + +> foo +TGCATG +CA +> bar description +TTTTAAAA diff --git a/src/htslib-1.21/test/faidx/faidx.fa.expected.fai b/src/htslib-1.21/test/faidx/faidx.fa.expected.fai new file mode 100644 index 0000000..b4d1aff --- /dev/null +++ b/src/htslib-1.21/test/faidx/faidx.fa.expected.fai @@ -0,0 +1,6 @@ + 4 2 4 5 +trailingblank1 33 23 12 13 +trailingblank2 72 111 24 25 +trailingblank3 5 234 4 6 +foo 8 252 6 7 +bar 8 280 8 9 diff --git a/src/htslib-1.21/test/faidx/faidx.tst b/src/htslib-1.21/test/faidx/faidx.tst new file mode 100644 index 0000000..b6bd7ca --- /dev/null +++ b/src/htslib-1.21/test/faidx/faidx.tst @@ -0,0 +1,74 @@ +# Copyright (C) 2022 Genome Research Ltd. +# +# Author: Robert Davies +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# First field: +# INIT = initialisation, not counted in testing +# P = expected to pass (zero return; expected output matches, if present) +# N = expected to return non-zero +# F = expected to fail +# +# Second field (P/N/F only): +# Filename of expected output. If '.', output is not checked +# +# Rest: +# Command to execute. $bgzip and $test_faidx are replaced with the path to +# bgzip and test_faidx. + +# Index fasta +P . $test_faidx -i faidx.fa -f faidx.fa.tmp.fai -e faidx.fa.expected.fai + +# Test various functions on the fasta index +P . $test_faidx -i faidx.fa -f faidx.fa.tmp.fai -t fai_line_length -e 24 trailingblank2 +P . $test_faidx -i faidx.fa -f faidx.fa.tmp.fai -t faidx_has_seq -e 1 foo +P . $test_faidx -i faidx.fa -f faidx.fa.tmp.fai -t faidx_has_seq -e 0 absent +P . $test_faidx -i faidx.fa -f faidx.fa.tmp.fai -t faidx_iseq -e trailingblank3 3 +P . $test_faidx -i faidx.fa -f faidx.fa.tmp.fai -t faidx_seq_len -e 33 trailingblank1 +P . $test_faidx -i faidx.fa -f faidx.fa.tmp.fai -t faidx_seq_len64 -e 72 trailingblank2 + +# Index fastq +P . $test_faidx -i fastqs.fq -f fastqs.fq.tmp.fai -e fastqs.fq.expected.fai + +# Test various functions on the fastq index +P . $test_faidx -i fastqs.fq -f fastqs.fq.tmp.fai -Q -t fai_line_length -e 63 FAKE0005_3 +P . $test_faidx -i fastqs.fq -f fastqs.fq.tmp.fai -Q -t fai_line_length -e 144 SRR014849.203935_3 +P . $test_faidx -i fastqs.fq -f fastqs.fq.tmp.fai -t faidx_has_seq -e 1 SRR014849.203935_3 +P . $test_faidx -i fastqs.fq -f fastqs.fq.tmp.fai -t faidx_has_seq -e 0 absent +P . $test_faidx -i fastqs.fq -f fastqs.fq.tmp.fai -t faidx_iseq -e FAKE0005_1 0 +P . $test_faidx -i fastqs.fq -f fastqs.fq.tmp.fai -t faidx_seq_len -e 453 FSRRS4401CM938_1 +P . $test_faidx -i fastqs.fq -f fastqs.fq.tmp.fai -t faidx_seq_len64 -e 309 FSRRS4401AOV6A_4 + +# Fasta retrieval tests +P faidx.1.expected.fa $test_faidx -i faidx.fa -f faidx.fa.tmp.fai trailingblank2:28-33 trailingblank3:4-5 bar:4-5 +P faidx.1.expected.fa $test_faidx -i faidx.fa -f faidx.fa.tmp.fai -t fai_fetch trailingblank2:28-33 trailingblank3:4-5 bar:4-5 +P faidx.1.expected.fa $test_faidx -i faidx.fa -f faidx.fa.tmp.fai -t faidx_fetch_seq64 trailingblank2:28-33 trailingblank3:4-5 bar:4-5 +P faidx.1.expected.fa $test_faidx -i faidx.fa -f faidx.fa.tmp.fai -t fai_adjust_region trailingblank2:28-33 trailingblank3:4-5 bar:4-5 + +# Fastq retrieval tests +P fastqs.1.expected.fq $test_faidx -i fastqs.fq -f fastqs.fq.tmp.fai -Q FAKE0006_1:4-12 FSRRS4401BE7HA_1:81-120 FAKE0010_2 SRR014849.50939_3:71-90 +P fastqs.1.expected.fq $test_faidx -i fastqs.fq -f fastqs.fq.tmp.fai -Q -t fai_fetch FAKE0006_1:4-12 FSRRS4401BE7HA_1:81-120 FAKE0010_2 SRR014849.50939_3:71-90 +P fastqs.1.expected.fq $test_faidx -i fastqs.fq -f fastqs.fq.tmp.fai -Q -t faidx_fetch_seq64 FAKE0006_1:4-12 FSRRS4401BE7HA_1:81-120 FAKE0010_2 SRR014849.50939_3:71-90 +P fastqs.2.expected.fa $test_faidx -i fastqs.fq -f fastqs.fq.tmp.fai FAKE0006_1:4-12 FSRRS4401BE7HA_1:81-120 FAKE0010_2 SRR014849.50939_3:71-90 + +# Indexing and retrieval on bgzip compressed fasta +INIT $bgzip -c < ../ce.fa > ce.fa.tmp.gz +P . $test_faidx -i ce.fa.tmp.gz -f ce.fa.tmp.gz.fai -g ce.fa.tmp.gz.gzi -e ../ce.fa.fai +P ce.1.expected.fa $test_faidx -i ce.fa.tmp.gz -f ce.fa.tmp.gz.fai -g ce.fa.tmp.gz.gzi CHROMOSOME_I:5001-5125 CHROMOSOME_X:101-225 diff --git a/src/htslib-1.21/test/faidx/fastqs.1.expected.fq b/src/htslib-1.21/test/faidx/fastqs.1.expected.fq new file mode 100644 index 0000000..7293938 --- /dev/null +++ b/src/htslib-1.21/test/faidx/fastqs.1.expected.fq @@ -0,0 +1,16 @@ +@FAKE0006_1:4-12 length: 9 +TGCATGCAT ++ +{zyxwvuts +@FSRRS4401BE7HA_1:81-120 length: 40 +GCCCGTTTGTCGATATTTGtatttaaagtaatccgtcaca ++ +c^^^YRPOSNVU\YTMMMSMRKKKRUUNNNNS[`aa```\ +@FAKE0010_2 length: 30 +gatcrywsmkhbvdnGATCRYWSMKHBVDN ++ +I?5+I?5+I?5+I?5+I?5+I?5+I?5+I? +@SRR014849.50939_3:71-90 length: 20 +CAATAAATCAATACATAAAA ++ +\aZ\d`OY[aY[[\[[e`WP diff --git a/src/htslib-1.21/test/faidx/fastqs.2.expected.fa b/src/htslib-1.21/test/faidx/fastqs.2.expected.fa new file mode 100644 index 0000000..9b67d15 --- /dev/null +++ b/src/htslib-1.21/test/faidx/fastqs.2.expected.fa @@ -0,0 +1,8 @@ +>FAKE0006_1:4-12 length: 9 +TGCATGCAT +>FSRRS4401BE7HA_1:81-120 length: 40 +GCCCGTTTGTCGATATTTGtatttaaagtaatccgtcaca +>FAKE0010_2 length: 30 +gatcrywsmkhbvdnGATCRYWSMKHBVDN +>SRR014849.50939_3:71-90 length: 20 +CAATAAATCAATACATAAAA diff --git a/src/htslib-1.21/test/faidx/fastqs.fq b/src/htslib-1.21/test/faidx/fastqs.fq new file mode 100644 index 0000000..b151e73 --- /dev/null +++ b/src/htslib-1.21/test/faidx/fastqs.fq @@ -0,0 +1,500 @@ +@FAKE0005_1 Original version has PHRED scores from 0 to 62 inclusive (in that order) +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG ++ +@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ +@FAKE0006_1 Original version has PHRED scores from 62 to 0 inclusive (in that order) +GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA ++ +~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@ +@FAKE0005_2 Original version has PHRED scores from 0 to 62 inclusive (in that order) +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG ++ +!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +@FAKE0006_2 Original version has PHRED scores from 62 to 0 inclusive (in that order) +GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA ++ +_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;:9876543210/.-,+*)('&%$#"! +@FAKE0005_3 Original version has PHRED scores from 0 to 62 inclusive (in that order) +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG ++ +;;>@BCEFGHJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ +@FAKE0006_3 Original version has PHRED scores from 62 to 0 inclusive (in that order) +GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA ++ +~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJHGFECB@>;; +@FAKE0005_4 Original version has PHRED scores from 0 to 62 inclusive (in that order) +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG ++ +@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ +@FAKE0006_4 Original version has PHRED scores from 62 to 0 inclusive (in that order) +GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA ++ +~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@ +@FSRRS4401BE7HA_1 [length=395] [gc=36.46] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=95] +tcagTTAAGATGGGATAATATCCTCAGATTGCGTGATGAACTTTGTTCTGGTGGAGGAGAAGGAAGTGCATTCGACGTATGCCCGTTTGTCGATATTTGtatttaaagtaatccgtcacaaatcagtgacataaatattatttagatttcgggagcaactttatttattccacaagcaggtttaaattttaaatttaaattattgcagaagactttaaattaacctcgttgtcggagtcatttgttcggttattggtcgaaagtaaccncgggaagtgccgaaaactaacaaacaaaagaagatagtgaaattttaattaaaanaaatagccaaacgtaactaactaaaacggacccgtcgaggaactgccaacggacgacacagggagtagnnn ++ +eeeccccccc`UUU^UWWeegffhhhhhhhhhhhhhhhhhhggghhhhhhhhhfgfeeeee\\\\ceeeeeeeeeeeeeec^^^YRPOSNVU\YTMMMSMRKKKRUUNNNNS[`aa```\bbeccccccccYUUUbceeee\[`a`\ZYRRRPPP[\\\XXZaWWXeeeeeeccacaccc\WWSSQRPMMKKKLKKKKKKKKPPRRMMLLLPVPPPKKKKKQQTTTPRPPQPMLLMKRRRPPKMKKRLLKKMKKLLKRTPPPQRMMLL@KKKKLLKLLLLXKKKKW\KKLKKKLKKKKLLLQUYXYTLMMPKKKKPPNNKKKK@KKPXPVLLKKKKLRMKLLKKPVKKKKLLLJPPPPRMOROOOOKKKOSSSOOORUZXUUUQMNNZV][Z@@@ +@FSRRS4401BRRTC_1 [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74] +tcagCCAGCAATTCCGACTTAATTGTTCTTCTTCCATCATTCATCTCGACTAACAGTTCTACGATTAATGAGTTTGGCtttaatttgttgttcattattgtcacaattacactactgagactgccaaggcacncagggataggnn ++ +eeeeeeeeecccceeeefecccca`````\[SSSS__a\TTTYaaaaa__^WYW[^[WXWXW[WSSSQZ\\RKKKTPSKKKPPKKKMKKQPVVVTTTTPRKMMLLPPPTVTWMNNRSSWW][[ZZZZXXSSN@NSKKKTVWTT@@ +@FSRRS4401B64ST_1 [length=382] [gc=40.58] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=346] +tcagTTTTCTTAAATTACTTGAATCTGTTGAAGTGGATGTCCACTTTTGTATGCCAAATATGCCCAGCGTATACGATCTTGGCCACATCTCCACATAATCATCAGTCGGATGCAAAAAGCGATTAAACTAAAAATGAATGCGTTTTTAGATGAGTAAATAGGTAATACTTTGTTTAAATAATAAATGTCACAAACAGAACGCGGATTACAGTACCTGAAAATAGTTGTACTGTATCTGTGCCGGCACTTCCTCGGCCCTGAGAAGTTGTCCCGTTGTTTCCATTCGCACCATCCAATGGCCAAAGTTTGCGAAGAATCTGTTCCGTTCCATTACCAATTGTTTTTCCATGctgagactgccaaggcacacaggggataggnn ++ +hhhhbbbbh^^UUUhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhUUUUh`hhhhh^^^hhhhbbbhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhUURRRdhbdYYRRW\NLLLLKW\]]^^YQLNNNNV]bddhdhggghhhhhhhhhdZZXXPPPXXa^^^habghhhhhhggghhggghhhhhhhhhhhhhhhhhhaabbhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhfffhhhhhhhhhc^\\\chhhggghhhhhhhhhggghhhhhhhhhhggghggghhhhhhhhhhhhhhhhhhhhhh^]ZXXWW\\TLLLLM__`dfhhhhhhhhhgg^^^^dhhheeXXXZdhhaa@@ +@FSRRS4401EJ0YH_1 [length=381] [gc=48.29] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=343] +tcagTTTTTGGAGAATTCCGTCAGGGACGGCATGGCATATTTGTGGGTTCGGCACGGCGTCCTGGCCAAGAAGAAGAAGACGAATTAGCCCGTTAATTTAATGACACCTTCCCCAATTTTGCAGCAATGATTGGTTCATTCTTGGCGGTGCGTTTTTGTGCTTCGTCGAATTGTTGGCCATTTTGGTCCACCGGCCATCATCTTTACGCTATCCGACTGATTGGAAATCACCGCCTAGCATTTTGCCGAAGATTGTTGCGTTGTACGGCCATGTGCTGATTGTTTACATTGGCATTCTTGGCAATTTGTCCTTGGTCGGCTTTGACGGCAAATTTGCGGTGTTAAGTctgagactgccaaggcacacagggggatagggnn ++ +hhhh^^^^^hhhhhhhhhhhhhhggghhhhhhhhhhhhhggghhggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggghhhhhggghhhhhhhhhhh````hh]]]]hhhhhhhhhhhhhhhhhhhhhhhhhhddhddZRRRRRcVhhhhhhhhhhhhhhhhhhhhhbb__gghhhhhhhhhhhhhhhhggghhhhhhhhhhhhhhhhhhhggghhhhhhhhhhhhhaaaahgbcbghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggghhhggbbchhhhhhggghhbbbg\bbhhhhhhhhhfffhhhhhhgggggghhhhhhhhhhhhhhhggghhggd^^]]^dedd^NNNNNZYWOLL@@ +@FSRRS4401BK0IB_1 [length=507] [gc=49.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=208] +tcagTTGACCGGCGTTGTGTAACAATAATTCATTATTCTGAGACGATGCCAATGTAATCGACGGTTTATGCCCAATTATTCCCATCTATGCTTAACTGATCAAATACTATTTGCATTACGTCACGAAATTGCGCGAACACCGCCGGCCGACAATAATTTATACCGGACATACCGGAGTTGATGGTAATCGGTAAAGAGTTTTATTTAATTATntattatcnctattaattattgttancaacaatgtgcacgctntgccgcccgccgccgccgtgtcggtaggaccccggacggacccggacccggttcgggtacccgttttcgggttcccggaaccgtttttcgggtacccggttttttcggggggccccccggtaaaaaaccggggaaccccctaaaacgggtaaacgtaccgtaagggaccccctaaacgggggccccgaaaaaccgggacccaaaccggggggaaacggttaaaggggggggaagtaggngnnnnnnnnnnnn ++ +eee`__eeeeeeeeeeggaYYY_aeeeeffghghhhhhhhhhhhhhhhhhhhhhhheeeeeeeee^\a`_PPPWWOPP[[WWZ^``accb^^^cc````c`UUUc^ccc\\\\\``]^]][[[\[PPPWW[[^^^``^XTTT\`aaa__^\]]^__PPPSQYYcc`^^^ceeeeeeeeeeeeea````[[OOOOMQQ\NNNNWKLLPPPPPP@QRLLNQS@RVYUUUU[ZWQQNMMS@SUTQPPVVTTRMLMQR@QRPPQPPPQKKLKKQPP\\TLLLLLLKPQKKKKKKLKKKLPKKKKLKKPTTLLKKKKPRPPPMKKKKKKKKJJPPPMMPPMMPKKKKKKKKKJRKKKKKLLQQLLLLLNNLLLLTTNNIINLLQQLLIIKKKKIIKKKKKKMPMKIKKKKIIIKKKKKKKKKKKKKKKKKKKKKKKHKKLKKKKKKHKKKKKIINNMHKKKNNNKKKKKKKKKKKMHHRRLLLKKKKKKKKKKGOKKK@M@@@@@@@@@@@@ +@FSRRS4401ARCCB_1 [length=258] [gc=46.90] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=193] +tcagTTATTGCAGTCGTTCCGCGCCATCGCCGGTAACCGTCCGCGTGTTATTCTGTGTATCGGCCAACCTTCGTATAACTTCGTATAATGTATGCTATACGAAGTTATTACGATCTATACCGGCGAAACTCAGCCGAAAGGTCTCGCGGTAGAGCCTATGAGCTGCCCGACCGATGCATTTAAATTTCCGGGGATCGtcgctgatctgagactgccaaaggcacactagggggataggnnnnnnnnnnnnnnnnnnnn ++ +eee[WYY_ceeeeeeeffecb`````a__OOOOSU[ZUURQQRUUVUQQSRRSW[[\^^SSSTYY]`a```_[[\\a\YTTTYaac^^\acccceeebbbbbbbeebccceeeeeca``\\WWWWOOOS][[[XXUWWZWWX[WWX^aaaa`\^^^ccaaa__^^WWWWXLLLQRVVVPKKKKKKKKLLPPTQ[[OOPTW`_][[[[[SRQVVVPPKKKLLRV\\\VTKLLLLRSUUU@@@@@@@@@@@@@@@@@@@@ +@FSRRS4401CM938_1 [length=453] [gc=44.15] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=418] +tcagGTTTTAAATCGCTTTCCAAGGAATTTGAGTCTAAATCCGGTGGATCCCATCAGTACAAATGCGGCGACAAGGCCGTGAAAACACTGCTTAATTCTTTGCACTTTTTGGCCACCTTTTTGGAAATGTTGTTTTGTGTTCTCAAAATTTTCCATCTCAGAACAAACATTCCATCGGGCTGATGTTGTGGCTTTTGGCGCGCGAAGTGCTGCTACTGCGCGGCAAAATCAGTCGCCAGACCGGTTTTGTTGTGGACGACAAAGTGATCATGCCTGACTTGTACTTCTACCGCGATCCGCAAGCGCGAATTGGTCACATAGTTATAGAATTTTTGAGCCTTTTTCTTGACATAAAAAGTGTGGTTTTAAAAATTTCCTGGCAGGACCCACGCCAACGTTCAGGAATAATATCTTTTAAAAAGctgagactgccaaggcacacaggggataggn ++ +hhhhhbb]]UUUhhhhbbbhhhhhhhhggghhhhhfUUUhhhhhhhhhhggghhhhhhhhbbbhhhhhhhhhhhhhhhhhh____hhhhhhhhhhhhhggghhhh^^^\ZhhddhYYNNNNNVTSSY^haaVQQSSdWddbdab\_______gghhhhhhhhhhaaahhhhhhhhhggghhhhhhhhhhhhhbbbbhhhhhhhhhhhhhhhhhhhhhhhhhhhhUUUUcdhhgda^^c_VVVVVQQQQcWXddhhhhhhhggghhhhhhhhggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggghhhhhhhhhhhhhhh\\^\\hhhhh^^^\ZhURcccWQLLKKKRW\\YYLLLLKKLLLJJJRROUUZ_URWOOOWNYWWX[Yafhhhhhhhhhed[^eTTOOLLLLLTYZZZY]^_b[[VXXXdddddd____ddddd@ +@FSRRS4401EQLIK_1 [length=411] [gc=34.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=374] +tcagTTTAATTTGGTGCTTCCTTTCAATTCCTTAGTTTAAACTTGGCACTGAAGTCTCGCATTTATAACTAGAGCCCGGATTTTAGAGGCTAAAAAGTTTTCCAGATTTCAAAATTTATTTCGAAACTATTTTTCTGATTGTGATGTGACGGATTTCTAAATTAAATCGAAATGATGTGTATTGAACTTAACAAGTGATTTTTATCAGATTTTGTCAATGAATAAATTTTAATTTAAATCTCTTTCTAACACTTTCATGATTAAAATCTAACAAAGCGCGACCAGTATGTGAGAAGAGCAAAAACAACAAAAAGTGCTAGCACTAAAGAAGGTTCGAACCCAACACATAACGTAAGAGTTACCGGGAAGAAAACCACTctgagactgccaaggcacacagggggataggnn ++ +hhh^UUU^^ggghhhhhhhhhfffhhhhhhhhhhhfffggghhhhhhhhhhhhhhhhhhhhfffhhhhhhhhhhggghhh____hhhhdhdPPPPOOLLLLQQ^\WLLLYLLLLLLLKKKKRRLLLTYRKLLLLYPaadddghhhhhhhhhhha^^`PQQOOOMMMY^\OQSfhhhhhhhhhhhhhhhhhhdbbgga\NNLKKQP^^[TLOOQ\Ueaa^YX[\PPNSSSSNNLNNVV^^fdhddgh`bbhhhggghhhhhhhbbb`hhhgggggghhhhhhhhhhhhhhhhhhhhhhddPNNLLWQQZLLLLMVVV_dhhhhhh^^^hhhhhhhhhhhggghhhhhhhhhhhhhhhhhhhhXXSQQVVVTTTT`dZhdddddhhhhh^^XVTT]_\\YRKKKKKRRRRU@@ +@FSRRS4401AOV6A_1 [length=309] [gc=22.98] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=273] +tcagTTTTCAAATTTTCCGAAATTTGCTGTTTGGTAGAAGGCAAATTATTTGATTGAATTTTGTATTTATTTAAAACAATTTATTTTAAAATAATAATTTTCCATTGACTTTTTACATTTAATTGATTTTATTATGCATTTTATATTTGTTTTCTAAATATTCGTTTGCAAACTCACGTTGAAATTGTATTAAACTCGAAATTAGAGTTTTTGAAATTAATTTTTATGTAGCATAATATTTTAAACATATTGGAATTTTATAAAACATTATATTTTTctgagactgccaaggcacacagggggataggn ++ +hhhhbbbbhZZZbbbbhhh^^^ggghhhhggghhhhhhhhhhggghhhggghhhhhhh____hehbbbhb``ZZZZdc^a__cUUSSTTTT[[[fhh]]``hhhhhhhhZZZYYhhh^^^bbbhhhZZZZheehhhhhbbbahahddcbSSSS^Saaad^dhhhbgghhZZZghhhhhhggZZZgghhhhhZZZhhhhggghhhhhh]]^^]hddaffYYPPPPNSUeaeaa^\Z\`^XVVVPPPXYd```ccacVVVV\NPPPPQQc`__aUWZZZhWgghhhhhZZZZ^]hdbbbaNNNNNZVST\@ +@FSRRS4401EG0ZW_1 [length=424] [gc=23.82] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=389] +tcagTTTTGATCTTTTAATAATGAATTTTAATGTGTTAAAATGATTGCATTGATGGCATAACCGCATTTAAATTAATTACATGAAGTGTAAGTATGAAATTTTCCTTTCCAAATTGCAAAAACTAAAATTTAAAATTTATCGTAAAAATTAACATATATTTTAAACGATTTTAAGAAACATTTGTAAATTATATTTTTGTGAAGCGTTCAAACAAAAATAAACAATAAAATATTTTTCTATTTAATAGCAAAACATTTGACGATGAAAAGGAAAATGCGGGTTTGAAAATGGGCTTTGCCATGCTATTTTCATAATAACATATTTTTATTATGAATAATAAATTTACATACAATATATACAGTCTTAAATTTATTCATAATATTTTTGAGAATctgagactgccaaggcacacaggggataggn ++ +hh`XSSSTddhh\\\]hhhhhhhhhbbbbhhghhhbbZZZZhhhhhhhhhhhhhhhhhhhhhhhhheZZUUUcchhhhhhhhhhhhhhhhhhhddXSSSQQSS__UUUbb[[acc`\LLLLLQ[KKKKUTXNNOO\\\WbhhhZ]]\\ggZZhhhhhhbb__^^^hhh____hb^UUUghccbh^a^^bb[ddPPPPPaSaccbaZ\_aVVV]NNNNL\RQR^SQRKKKN\PKKKKLYSdZ^^dhhhhhbbbbh]ZZZhhhhhhh[[__^\NNNNV\`XXXWW[[SSTThdddhhhhhhhhhhhhh[XXXghhhhhhhhhhh^^^^^hhhhhhhhhhhb`bZTTTRXdhhhhhhhhhhhhhhhhggXXXgggh`\`ddee_\MMMMM`c___ccddddehhhZZZXVVeebbb_QSSSX^ecc@ +@FSRRS4401BE7HA_2 [length=395] [gc=36.46] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=95] +tcagTTAAGATGGGATAATATCCTCAGATTGCGTGATGAACTTTGTTCTGGTGGAGGAGAAGGAAGTGCATTCGACGTATGCCCGTTTGTCGATATTTGtatttaaagtaatccgtcacaaatcagtgacataaatattatttagatttcgggagcaactttatttattccacaagcaggtttaaattttaaatttaaattattgcagaagactttaaattaacctcgttgtcggagtcatttgttcggttattggtcgaaagtaaccncgggaagtgccgaaaactaacaaacaaaagaagatagtgaaattttaattaaaanaaatagccaaacgtaactaactaaaacggacccgtcgaggaactgccaacggacgacacagggagtagnnn ++ +FFFDDDDDDDA666?688FFHGGIIIIIIIIIIIIIIIIIIHHHIIIIIIIIIGHGFFFFF====DFFFFFFFFFFFFFFD???:3104/76=:5...4.3,,,366////4<;!!! +@FSRRS4401BRRTC_2 [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74] +tcagCCAGCAATTCCGACTTAATTGTTCTTCTTCCATCATTCATCTCGACTAACAGTTCTACGATTAATGAGTTTGGCtttaatttgttgttcattattgtcacaattacactactgagactgccaaggcacncagggataggnn ++ +FFFFFFFFFDDDDFFFFGFDDDDBAAAAA=<4444@@B=555:BBBBB@@?8:8<<;;;;9944/!/4,,,57855!! +@FSRRS4401B64ST_2 [length=382] [gc=40.58] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=346] +tcagTTTTCTTAAATTACTTGAATCTGTTGAAGTGGATGTCCACTTTTGTATGCCAAATATGCCCAGCGTATACGATCTTGGCCACATCTCCACATAATCATCAGTCGGATGCAAAAAGCGATTAAACTAAAAATGAATGCGTTTTTAGATGAGTAAATAGGTAATACTTTGTTTAAATAATAAATGTCACAAACAGAACGCGGATTACAGTACCTGAAAATAGTTGTACTGTATCTGTGCCGGCACTTCCTCGGCCCTGAGAAGTTGTCCCGTTGTTTCCATTCGCACCATCCAATGGCCAAAGTTTGCGAAGAATCTGTTCCGTTCCATTACCAATTGTTTTTCCATGctgagactgccaaggcacacaggggataggnn ++ +IIIICCCCI??666IIIIIIIIIIIIIIIIIIIIIIIIIIIIII6666IAIIIII???IIIICCCIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII66333EICE::338=/----,8=>>??:2-////7>CEEIEIHHHIIIIIIIIIE;;9911199B???IBCHIIIIIIHHHIIHHHIIIIIIIIIIIIIIIIIIBBCCIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIGGGIIIIIIIIID?===DIIIHHHIIIIIIIIIHHHIIIIIIIIIIHHHIHHHIIIIIIIIIIIIIIIIIIIIII?>;9988==5----.@@AEGIIIIIIIIIHH????EIIIFF999;EIIBB!! +@FSRRS4401EJ0YH_2 [length=381] [gc=48.29] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=343] +tcagTTTTTGGAGAATTCCGTCAGGGACGGCATGGCATATTTGTGGGTTCGGCACGGCGTCCTGGCCAAGAAGAAGAAGACGAATTAGCCCGTTAATTTAATGACACCTTCCCCAATTTTGCAGCAATGATTGGTTCATTCTTGGCGGTGCGTTTTTGTGCTTCGTCGAATTGTTGGCCATTTTGGTCCACCGGCCATCATCTTTACGCTATCCGACTGATTGGAAATCACCGCCTAGCATTTTGCCGAAGATTGTTGCGTTGTACGGCCATGTGCTGATTGTTTACATTGGCATTCTTGGCAATTTGTCCTTGGTCGGCTTTGACGGCAAATTTGCGGTGTTAAGTctgagactgccaaggcacacagggggatagggnn ++ +IIII?????IIIIIIIIIIIIIIHHHIIIIIIIIIIIIIHHHIIHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIHHHIIIIIHHHIIIIIIIIIIIAAAAII>>>>IIIIIIIIIIIIIIIIIIIIIIIIIIEEIEE;33333D7IIIIIIIIIIIIIIIIIIIIICC@@HHIIIIIIIIIIIIIIIIHHHIIIIIIIIIIIIIIIIIIIHHHIIIIIIIIIIIIIBBBBIHCDCHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIHHHIIIHHCCDIIIIIIHHHIICCCH=CCIIIIIIIIIGGGIIIIIIHHHHHHIIIIIIIIIIIIIIIHHHIIHHE??>>?EFEE?/////;:80--!! +@FSRRS4401BK0IB_2 [length=507] [gc=49.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=208] +tcagTTGACCGGCGTTGTGTAACAATAATTCATTATTCTGAGACGATGCCAATGTAATCGACGGTTTATGCCCAATTATTCCCATCTATGCTTAACTGATCAAATACTATTTGCATTACGTCACGAAATTGCGCGAACACCGCCGGCCGACAATAATTTATACCGGACATACCGGAGTTGATGGTAATCGGTAAAGAGTTTTATTTAATTATntattatcnctattaattattgttancaacaatgtgcacgctntgccgcccgccgccgccgtgtcggtaggaccccggacggacccggacccggttcgggtacccgttttcgggttcccggaaccgtttttcgggtacccggttttttcggggggccccccggtaaaaaaccggggaaccccctaaaacgggtaaacgtaccgtaagggaccccctaaacgggggccccgaaaaaccgggacccaaaccggggggaaacggttaaaggggggggaagtaggngnnnnnnnnnnnn ++ +FFFA@@FFFFFFFFFFHHB:::@BFFFFGGHIHIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFF?=BA@11188011<<88;?AABDDC???DDAAAADA666D?DDD=====AA>?>><<<=<11188<>?@@11142::DDA???DFFFFFFFFFFFFFBAAAA<<0000.22=////8,--111111!23--/24!37:6666<;822/..4!46521177553.-.23!231121112,,-,,211==5------,12,,,,,,-,,,-1,,,,-,,155--,,,,13111.,,,,,,,,++111..11..1,,,,,,,,,+3,,,,,--22-----//----55//**/--22--**,,,,**,,,,,,.1.,*,,,,***,,,,,,,,,,,,,,,,,,,,,,,),,-,,,,,,),,,,,**//.),,,///,,,,,,,,,,,.))33---,,,,,,,,,,(0,,,!.!!!!!!!!!!!! +@FSRRS4401ARCCB_2 [length=258] [gc=46.90] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=193] +tcagTTATTGCAGTCGTTCCGCGCCATCGCCGGTAACCGTCCGCGTGTTATTCTGTGTATCGGCCAACCTTCGTATAACTTCGTATAATGTATGCTATACGAAGTTATTACGATCTATACCGGCGAAACTCAGCCGAAAGGTCTCGCGGTAGAGCCTATGAGCTGCCCGACCGATGCATTTAAATTTCCGGGGATCGtcgctgatctgagactgccaaaggcacactagggggataggnnnnnnnnnnnnnnnnnnnn ++ +FFF<8::@DFFFFFFFGGFDCAAAAAB@@000046<;66322366762243348<<=??4445::>ABAAA@<<==B=:555:BBD??=BDDDDFFFCCCCCCCFFCDDDFFFFFDBAA==88880004><<<99688;889<889?BBBBA=???DDBBB@@??88889---237771,,,,,,,,--1152<<00158A@><<<<<43277711,,,--37===75,----34666!!!!!!!!!!!!!!!!!!!! +@FSRRS4401CM938_2 [length=453] [gc=44.15] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=418] +tcagGTTTTAAATCGCTTTCCAAGGAATTTGAGTCTAAATCCGGTGGATCCCATCAGTACAAATGCGGCGACAAGGCCGTGAAAACACTGCTTAATTCTTTGCACTTTTTGGCCACCTTTTTGGAAATGTTGTTTTGTGTTCTCAAAATTTTCCATCTCAGAACAAACATTCCATCGGGCTGATGTTGTGGCTTTTGGCGCGCGAAGTGCTGCTACTGCGCGGCAAAATCAGTCGCCAGACCGGTTTTGTTGTGGACGACAAAGTGATCATGCCTGACTTGTACTTCTACCGCGATCCGCAAGCGCGAATTGGTCACATAGTTATAGAATTTTTGAGCCTTTTTCTTGACATAAAAAGTGTGGTTTTAAAAATTTCCTGGCAGGACCCACGCCAACGTTCAGGAATAATATCTTTTAAAAAGctgagactgccaaggcacacaggggataggn ++ +IIIIICC>>666IIIICCCIIIIIIIIHHHIIIIIG666IIIIIIIIIIHHHIIIIIIIICCCIIIIIIIIIIIIIIIIII@@@@IIIIIIIIIIIIIHHHIIII???=;IIEEI:://///7544:?IBB72244E8EECEBC=@@@@@@@HHIIIIIIIIIIBBBIIIIIIIIIHHHIIIIIIIIIIIIICCCCIIIIIIIIIIIIIIIIIIIIIIIIIIII6666DEIIHEB??D@777772222D89EEIIIIIIIHHHIIIIIIIIHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIHHHIIIIIIIIIIIIIII==?==IIIII???=;I63DDD82--,,,38==::----,,---+++33066;@6380008/:889<:BGIIIIIIIIIFE?@C<<7999EEEEEE@@@@EEEEE! +@FSRRS4401EQLIK_2 [length=411] [gc=34.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=374] +tcagTTTAATTTGGTGCTTCCTTTCAATTCCTTAGTTTAAACTTGGCACTGAAGTCTCGCATTTATAACTAGAGCCCGGATTTTAGAGGCTAAAAAGTTTTCCAGATTTCAAAATTTATTTCGAAACTATTTTTCTGATTGTGATGTGACGGATTTCTAAATTAAATCGAAATGATGTGTATTGAACTTAACAAGTGATTTTTATCAGATTTTGTCAATGAATAAATTTTAATTTAAATCTCTTTCTAACACTTTCATGATTAAAATCTAACAAAGCGCGACCAGTATGTGAGAAGAGCAAAAACAACAAAAAGTGCTAGCACTAAAGAAGGTTCGAACCCAACACATAACGTAAGAGTTACCGGGAAGAAAACCACTctgagactgccaaggcacacagggggataggnn ++ +III?666??HHHIIIIIIIIIGGGIIIIIIIIIIIGGGHHHIIIIIIIIIIIIIIIIIIIIGGGIIIIIIIIIIHHHIII@@@@IIIIEIE111100----22?=8---:-------,,,,33---5:3,----:1BBEEEHIIIIIIIIIIIB??A122000...:?=024GIIIIIIIIIIIIIIIIIIECCHHB=//-,,21??<5-002=6FBB?:9<=11/4444//-//77??GEIEEHIACCIIIHHHIIIIIIICCCAIIIHHHHHHIIIIIIIIIIIIIIIIIIIIIIEE1//--822;----.777@EIIIIII???IIIIIIIIIIIHHHIIIIIIIIIIIIIIIIIIII994227775555AE;IEEEEEIIIII??9755>@==:3,,,,,33336!! +@FSRRS4401AOV6A_2 [length=309] [gc=22.98] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=273] +tcagTTTTCAAATTTTCCGAAATTTGCTGTTTGGTAGAAGGCAAATTATTTGATTGAATTTTGTATTTATTTAAAACAATTTATTTTAAAATAATAATTTTCCATTGACTTTTTACATTTAATTGATTTTATTATGCATTTTATATTTGTTTTCTAAATATTCGTTTGCAAACTCACGTTGAAATTGTATTAAACTCGAAATTAGAGTTTTTGAAATTAATTTTTATGTAGCATAATATTTTAAACATATTGGAATTTTATAAAACATTATATTTTTctgagactgccaaggcacacagggggataggn ++ +IIIICCCCI;;;CCCCIII???HHHIIIIHHHIIIIIIIIIIHHHIIIHHHIIIIIII@@@@IFICCCICAA;;;;ED?B@@D66445555<<>AAIIIIIIII;;;::III???CCCIII;;;;IFFIIIIICCCBIBIEEDC4444?4BBBE?EIIICHHII;;;HIIIIIIHH;;;HHIIIII;;;IIIIHHHIIIIII>>??>IEEBGG::1111/46FBFBB?=;=A?97771119:EAAADDBD7777=/111122DA@@B68;;;I8HHIIIII;;;;?>IECCCB/////;745=! +@FSRRS4401EG0ZW_2 [length=424] [gc=23.82] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=389] +tcagTTTTGATCTTTTAATAATGAATTTTAATGTGTTAAAATGATTGCATTGATGGCATAACCGCATTTAAATTAATTACATGAAGTGTAAGTATGAAATTTTCCTTTCCAAATTGCAAAAACTAAAATTTAAAATTTATCGTAAAAATTAACATATATTTTAAACGATTTTAAGAAACATTTGTAAATTATATTTTTGTGAAGCGTTCAAACAAAAATAAACAATAAAATATTTTTCTATTTAATAGCAAAACATTTGACGATGAAAAGGAAAATGCGGGTTTGAAAATGGGCTTTGCCATGCTATTTTCATAATAACATATTTTTATTATGAATAATAAATTTACATACAATATATACAGTCTTAAATTTATTCATAATATTTTTGAGAATctgagactgccaaggcacacaggggataggn ++ +IIA94445EEII===>IIIIIIIIICCCCIIHIIICC;;;;IIIIIIIIIIIIIIIIIIIIIIIIIF;;666DDIIIIIIIIIIIIIIIIIIIEE94442244@@666CC<>==HH;;IIIIIICC@@???III@@@@IC?666HIDDCI?B??CC////-=323?423,,,/=1,,,,-:4E;??EIIIIICCCCI>;;;IIIIIII<<@@?=////7=A99988<<4455IEEEIIIIIIIIIIIII<999HIIIIIIIIIII?????IIIIIIIIIIICAC;55539EIIIIIIIIIIIIIIIIHH999HHHIA=AEEFF@=.....AD@@@DDEEEEFIII;;;977FFCCC@24449?FDD! +@FSRRS4401BE7HA_3 [length=395] [gc=36.46] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=95] +tcagTTAAGATGGGATAATATCCTCAGATTGCGTGATGAACTTTGTTCTGGTGGAGGAGAAGGAAGTGCATTCGACGTATGCCCGTTTGTCGATATTTGtatttaaagtaatccgtcacaaatcagtgacataaatattatttagatttcgggagcaactttatttattccacaagcaggtttaaattttaaatttaaattattgcagaagactttaaattaacctcgttgtcggagtcatttgttcggttattggtcgaaagtaaccncgggaagtgccgaaaactaacaaacaaaagaagatagtgaaattttaattaaaanaaatagccaaacgtaactaactaaaacggacccgtcgaggaactgccaacggacgacacagggagtagnnn ++ +eeeccccccc`UUU^UWWeegffhhhhhhhhhhhhhhhhhhggghhhhhhhhhfgfeeeee\\\\ceeeeeeeeeeeeeec^^^YRPOSNVU\YTMMMSMRKKKRUUNNNNS[`aa```\bbeccccccccYUUUbceeee\[`a`\ZYRRRPPP[\\\XXZaWWXeeeeeeccacaccc\WWSSQRPMMKKKLKKKKKKKKPPRRMMLLLPVPPPKKKKKQQTTTPRPPQPMLLMKRRRPPKMKKRLLKKMKKLLKRTPPPQRMMLL;KKKKLLKLLLLXKKKKW\KKLKKKLKKKKLLLQUYXYTLMMPKKKKPPNNKKKK;KKPXPVLLKKKKLRMKLLKKPVKKKKLLLJPPPPRMOROOOOKKKOSSSOOORUZXUUUQMNNZV][Z;;; +@FSRRS4401BRRTC_3 [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74] +tcagCCAGCAATTCCGACTTAATTGTTCTTCTTCCATCATTCATCTCGACTAACAGTTCTACGATTAATGAGTTTGGCtttaatttgttgttcattattgtcacaattacactactgagactgccaaggcacncagggataggnn ++ +eeeeeeeeecccceeeefecccca`````\[SSSS__a\TTTYaaaaa__^WYW[^[WXWXW[WSSSQZ\\RKKKTPSKKKPPKKKMKKQPVVVTTTTPRKMMLLPPPTVTWMNNRSSWW][[ZZZZXXSSN;NSKKKTVWTT;; +@FSRRS4401B64ST_3 [length=382] [gc=40.58] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=346] +tcagTTTTCTTAAATTACTTGAATCTGTTGAAGTGGATGTCCACTTTTGTATGCCAAATATGCCCAGCGTATACGATCTTGGCCACATCTCCACATAATCATCAGTCGGATGCAAAAAGCGATTAAACTAAAAATGAATGCGTTTTTAGATGAGTAAATAGGTAATACTTTGTTTAAATAATAAATGTCACAAACAGAACGCGGATTACAGTACCTGAAAATAGTTGTACTGTATCTGTGCCGGCACTTCCTCGGCCCTGAGAAGTTGTCCCGTTGTTTCCATTCGCACCATCCAATGGCCAAAGTTTGCGAAGAATCTGTTCCGTTCCATTACCAATTGTTTTTCCATGctgagactgccaaggcacacaggggataggnn ++ +hhhhbbbbh^^UUUhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhUUUUh`hhhhh^^^hhhhbbbhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhUURRRdhbdYYRRW\NLLLLKW\]]^^YQLNNNNV]bddhdhggghhhhhhhhhdZZXXPPPXXa^^^habghhhhhhggghhggghhhhhhhhhhhhhhhhhhaabbhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhfffhhhhhhhhhc^\\\chhhggghhhhhhhhhggghhhhhhhhhhggghggghhhhhhhhhhhhhhhhhhhhhh^]ZXXWW\\TLLLLM__`dfhhhhhhhhhgg^^^^dhhheeXXXZdhhaa;; +@FSRRS4401EJ0YH_3 [length=381] [gc=48.29] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=343] +tcagTTTTTGGAGAATTCCGTCAGGGACGGCATGGCATATTTGTGGGTTCGGCACGGCGTCCTGGCCAAGAAGAAGAAGACGAATTAGCCCGTTAATTTAATGACACCTTCCCCAATTTTGCAGCAATGATTGGTTCATTCTTGGCGGTGCGTTTTTGTGCTTCGTCGAATTGTTGGCCATTTTGGTCCACCGGCCATCATCTTTACGCTATCCGACTGATTGGAAATCACCGCCTAGCATTTTGCCGAAGATTGTTGCGTTGTACGGCCATGTGCTGATTGTTTACATTGGCATTCTTGGCAATTTGTCCTTGGTCGGCTTTGACGGCAAATTTGCGGTGTTAAGTctgagactgccaaggcacacagggggatagggnn ++ +hhhh^^^^^hhhhhhhhhhhhhhggghhhhhhhhhhhhhggghhggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggghhhhhggghhhhhhhhhhh````hh]]]]hhhhhhhhhhhhhhhhhhhhhhhhhhddhddZRRRRRcVhhhhhhhhhhhhhhhhhhhhhbb__gghhhhhhhhhhhhhhhhggghhhhhhhhhhhhhhhhhhhggghhhhhhhhhhhhhaaaahgbcbghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggghhhggbbchhhhhhggghhbbbg\bbhhhhhhhhhfffhhhhhhgggggghhhhhhhhhhhhhhhggghhggd^^]]^dedd^NNNNNZYWOLL;; +@FSRRS4401BK0IB_3 [length=507] [gc=49.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=208] +tcagTTGACCGGCGTTGTGTAACAATAATTCATTATTCTGAGACGATGCCAATGTAATCGACGGTTTATGCCCAATTATTCCCATCTATGCTTAACTGATCAAATACTATTTGCATTACGTCACGAAATTGCGCGAACACCGCCGGCCGACAATAATTTATACCGGACATACCGGAGTTGATGGTAATCGGTAAAGAGTTTTATTTAATTATntattatcnctattaattattgttancaacaatgtgcacgctntgccgcccgccgccgccgtgtcggtaggaccccggacggacccggacccggttcgggtacccgttttcgggttcccggaaccgtttttcgggtacccggttttttcggggggccccccggtaaaaaaccggggaaccccctaaaacgggtaaacgtaccgtaagggaccccctaaacgggggccccgaaaaaccgggacccaaaccggggggaaacggttaaaggggggggaagtaggngnnnnnnnnnnnn ++ +eee`__eeeeeeeeeeggaYYY_aeeeeffghghhhhhhhhhhhhhhhhhhhhhhheeeeeeeee^\a`_PPPWWOPP[[WWZ^``accb^^^cc````c`UUUc^ccc\\\\\``]^]][[[\[PPPWW[[^^^``^XTTT\`aaa__^\]]^__PPPSQYYcc`^^^ceeeeeeeeeeeeea````[[OOOOMQQ\NNNNWKLLPPPPPP;QRLLNQS;RVYUUUU[ZWQQNMMS;SUTQPPVVTTRMLMQR;QRPPQPPPQKKLKKQPP\\TLLLLLLKPQKKKKKKLKKKLPKKKKLKKPTTLLKKKKPRPPPMKKKKKKKKJJPPPMMPPMMPKKKKKKKKKJRKKKKKLLQQLLLLLNNLLLLTTNNHHNLLQQLLHHKKKKHHKKKKKKMPMKHKKKKHHHKKKKKKKKKKKKKKKKKKKKKKKGKKLKKKKKKGKKKKKHHNNMGKKKNNNKKKKKKKKKKKMGGRRLLLKKKKKKKKKKFOKKK;M;;;;;;;;;;;; +@FSRRS4401ARCCB_3 [length=258] [gc=46.90] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=193] +tcagTTATTGCAGTCGTTCCGCGCCATCGCCGGTAACCGTCCGCGTGTTATTCTGTGTATCGGCCAACCTTCGTATAACTTCGTATAATGTATGCTATACGAAGTTATTACGATCTATACCGGCGAAACTCAGCCGAAAGGTCTCGCGGTAGAGCCTATGAGCTGCCCGACCGATGCATTTAAATTTCCGGGGATCGtcgctgatctgagactgccaaaggcacactagggggataggnnnnnnnnnnnnnnnnnnnn ++ +eee[WYY_ceeeeeeeffecb`````a__OOOOSU[ZUURQQRUUVUQQSRRSW[[\^^SSSTYY]`a```_[[\\a\YTTTYaac^^\acccceeebbbbbbbeebccceeeeeca``\\WWWWOOOS][[[XXUWWZWWX[WWX^aaaa`\^^^ccaaa__^^WWWWXLLLQRVVVPKKKKKKKKLLPPTQ[[OOPTW`_][[[[[SRQVVVPPKKKLLRV\\\VTKLLLLRSUUU;;;;;;;;;;;;;;;;;;;; +@FSRRS4401CM938_3 [length=453] [gc=44.15] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=418] +tcagGTTTTAAATCGCTTTCCAAGGAATTTGAGTCTAAATCCGGTGGATCCCATCAGTACAAATGCGGCGACAAGGCCGTGAAAACACTGCTTAATTCTTTGCACTTTTTGGCCACCTTTTTGGAAATGTTGTTTTGTGTTCTCAAAATTTTCCATCTCAGAACAAACATTCCATCGGGCTGATGTTGTGGCTTTTGGCGCGCGAAGTGCTGCTACTGCGCGGCAAAATCAGTCGCCAGACCGGTTTTGTTGTGGACGACAAAGTGATCATGCCTGACTTGTACTTCTACCGCGATCCGCAAGCGCGAATTGGTCACATAGTTATAGAATTTTTGAGCCTTTTTCTTGACATAAAAAGTGTGGTTTTAAAAATTTCCTGGCAGGACCCACGCCAACGTTCAGGAATAATATCTTTTAAAAAGctgagactgccaaggcacacaggggataggn ++ +hhhhhbb]]UUUhhhhbbbhhhhhhhhggghhhhhfUUUhhhhhhhhhhggghhhhhhhhbbbhhhhhhhhhhhhhhhhhh____hhhhhhhhhhhhhggghhhh^^^\ZhhddhYYNNNNNVTSSY^haaVQQSSdWddbdab\_______gghhhhhhhhhhaaahhhhhhhhhggghhhhhhhhhhhhhbbbbhhhhhhhhhhhhhhhhhhhhhhhhhhhhUUUUcdhhgda^^c_VVVVVQQQQcWXddhhhhhhhggghhhhhhhhggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggghhhhhhhhhhhhhhh\\^\\hhhhh^^^\ZhURcccWQLLKKKRW\\YYLLLLKKLLLJJJRROUUZ_URWOOOWNYWWX[Yafhhhhhhhhhed[^eTTOOLLLLLTYZZZY]^_b[[VXXXdddddd____ddddd; +@FSRRS4401EQLIK_3 [length=411] [gc=34.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=374] +tcagTTTAATTTGGTGCTTCCTTTCAATTCCTTAGTTTAAACTTGGCACTGAAGTCTCGCATTTATAACTAGAGCCCGGATTTTAGAGGCTAAAAAGTTTTCCAGATTTCAAAATTTATTTCGAAACTATTTTTCTGATTGTGATGTGACGGATTTCTAAATTAAATCGAAATGATGTGTATTGAACTTAACAAGTGATTTTTATCAGATTTTGTCAATGAATAAATTTTAATTTAAATCTCTTTCTAACACTTTCATGATTAAAATCTAACAAAGCGCGACCAGTATGTGAGAAGAGCAAAAACAACAAAAAGTGCTAGCACTAAAGAAGGTTCGAACCCAACACATAACGTAAGAGTTACCGGGAAGAAAACCACTctgagactgccaaggcacacagggggataggnn ++ +hhh^UUU^^ggghhhhhhhhhfffhhhhhhhhhhhfffggghhhhhhhhhhhhhhhhhhhhfffhhhhhhhhhhggghhh____hhhhdhdPPPPOOLLLLQQ^\WLLLYLLLLLLLKKKKRRLLLTYRKLLLLYPaadddghhhhhhhhhhha^^`PQQOOOMMMY^\OQSfhhhhhhhhhhhhhhhhhhdbbgga\NNLKKQP^^[TLOOQ\Ueaa^YX[\PPNSSSSNNLNNVV^^fdhddgh`bbhhhggghhhhhhhbbb`hhhgggggghhhhhhhhhhhhhhhhhhhhhhddPNNLLWQQZLLLLMVVV_dhhhhhh^^^hhhhhhhhhhhggghhhhhhhhhhhhhhhhhhhhXXSQQVVVTTTT`dZhdddddhhhhh^^XVTT]_\\YRKKKKKRRRRU;; +@FSRRS4401AOV6A_3 [length=309] [gc=22.98] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=273] +tcagTTTTCAAATTTTCCGAAATTTGCTGTTTGGTAGAAGGCAAATTATTTGATTGAATTTTGTATTTATTTAAAACAATTTATTTTAAAATAATAATTTTCCATTGACTTTTTACATTTAATTGATTTTATTATGCATTTTATATTTGTTTTCTAAATATTCGTTTGCAAACTCACGTTGAAATTGTATTAAACTCGAAATTAGAGTTTTTGAAATTAATTTTTATGTAGCATAATATTTTAAACATATTGGAATTTTATAAAACATTATATTTTTctgagactgccaaggcacacagggggataggn ++ +hhhhbbbbhZZZbbbbhhh^^^ggghhhhggghhhhhhhhhhggghhhggghhhhhhh____hehbbbhb``ZZZZdc^a__cUUSSTTTT[[[fhh]]``hhhhhhhhZZZYYhhh^^^bbbhhhZZZZheehhhhhbbbahahddcbSSSS^Saaad^dhhhbgghhZZZghhhhhhggZZZgghhhhhZZZhhhhggghhhhhh]]^^]hddaffYYPPPPNSUeaeaa^\Z\`^XVVVPPPXYd```ccacVVVV\NPPPPQQc`__aUWZZZhWgghhhhhZZZZ^]hdbbbaNNNNNZVST\; +@FSRRS4401EG0ZW_3 [length=424] [gc=23.82] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=389] +tcagTTTTGATCTTTTAATAATGAATTTTAATGTGTTAAAATGATTGCATTGATGGCATAACCGCATTTAAATTAATTACATGAAGTGTAAGTATGAAATTTTCCTTTCCAAATTGCAAAAACTAAAATTTAAAATTTATCGTAAAAATTAACATATATTTTAAACGATTTTAAGAAACATTTGTAAATTATATTTTTGTGAAGCGTTCAAACAAAAATAAACAATAAAATATTTTTCTATTTAATAGCAAAACATTTGACGATGAAAAGGAAAATGCGGGTTTGAAAATGGGCTTTGCCATGCTATTTTCATAATAACATATTTTTATTATGAATAATAAATTTACATACAATATATACAGTCTTAAATTTATTCATAATATTTTTGAGAATctgagactgccaaggcacacaggggataggn ++ +hh`XSSSTddhh\\\]hhhhhhhhhbbbbhhghhhbbZZZZhhhhhhhhhhhhhhhhhhhhhhhhheZZUUUcchhhhhhhhhhhhhhhhhhhddXSSSQQSS__UUUbb[[acc`\LLLLLQ[KKKKUTXNNOO\\\WbhhhZ]]\\ggZZhhhhhhbb__^^^hhh____hb^UUUghccbh^a^^bb[ddPPPPPaSaccbaZ\_aVVV]NNNNL\RQR^SQRKKKN\PKKKKLYSdZ^^dhhhhhbbbbh]ZZZhhhhhhh[[__^\NNNNV\`XXXWW[[SSTThdddhhhhhhhhhhhhh[XXXghhhhhhhhhhh^^^^^hhhhhhhhhhhb`bZTTTRXdhhhhhhhhhhhhhhhhggXXXgggh`\`ddee_\MMMMM`c___ccddddehhhZZZXVVeebbb_QSSSX^ecc; +@FSRRS4401BE7HA_4 [length=395] [gc=36.46] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=95] +tcagTTAAGATGGGATAATATCCTCAGATTGCGTGATGAACTTTGTTCTGGTGGAGGAGAAGGAAGTGCATTCGACGTAT +GCCCGTTTGTCGATATTTGtatttaaagtaatccgtcacaaatcagtgacataaatattatttagatttcgggagcaact +ttatttattccacaagcaggtttaaattttaaatttaaattattgcagaagactttaaattaacctcgttgtcggagtca +tttgttcggttattggtcgaaagtaaccncgggaagtgccgaaaactaacaaacaaaagaagatagtgaaattttaatta +aaanaaatagccaaacgtaactaactaaaacggacccgtcgaggaactgccaacggacgacacagggagtagnnn ++ +FFFDDDDDDDA666?688FFHGGIIIIIIIIIIIIIIIIIIHHHIIIIIIIIIGHGFFFFF====DFFFFFFFFFFFFFF +D???:3104/76=:5...4.3,,,366////4<;!!! +@FSRRS4401BRRTC_4 [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74] +tcagCCAGCAATTCCGACTTAATTGTTCTTCTTCCATCATTCATCTCGACTAACAGTTCTACGATTAATGAGTTTGGCtt +taatttgttgttcattattgtcacaattacactactgagactgccaaggcacncagggataggnn ++ +FFFFFFFFFDDDDFFFFGFDDDDBAAAAA=<4444@@B=555:BBBBB@@?8:8<<;;;;9944/!/4,,,57855!! +@FSRRS4401B64ST_4 [length=382] [gc=40.58] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=346] +tcagTTTTCTTAAATTACTTGAATCTGTTGAAGTGGATGTCCACTTTTGTATGCCAAATATGCCCAGCGTATACGATCTT +GGCCACATCTCCACATAATCATCAGTCGGATGCAAAAAGCGATTAAACTAAAAATGAATGCGTTTTTAGATGAGTAAATA +GGTAATACTTTGTTTAAATAATAAATGTCACAAACAGAACGCGGATTACAGTACCTGAAAATAGTTGTACTGTATCTGTG +CCGGCACTTCCTCGGCCCTGAGAAGTTGTCCCGTTGTTTCCATTCGCACCATCCAATGGCCAAAGTTTGCGAAGAATCTG +TTCCGTTCCATTACCAATTGTTTTTCCATGctgagactgccaaggcacacaggggataggnn ++ +IIIICCCCI??666IIIIIIIIIIIIIIIIIIIIIIIIIIIIII6666IAIIIII???IIIICCCIIIIIIIIIIIIIII +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII66333EICE::338=/----,8=>>??:2-////7>CEEIEIHHHII +IIIIIIIE;;9911199B???IBCHIIIIIIHHHIIHHHIIIIIIIIIIIIIIIIIIBBCCIIIIIIIIIIIIIIIIIII +IIIIIIIIIIIIIIIGGGIIIIIIIIID?===DIIIHHHIIIIIIIIIHHHIIIIIIIIIIHHHIHHHIIIIIIIIIIII +IIIIIIIIII?>;9988==5----.@@AEGIIIIIIIIIHH????EIIIFF999;EIIBB!! +@FSRRS4401EJ0YH_4 [length=381] [gc=48.29] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=343] +tcagTTTTTGGAGAATTCCGTCAGGGACGGCATGGCATATTTGTGGGTTCGGCACGGCGTCCTGGCCAAGAAGAAGAAGA +CGAATTAGCCCGTTAATTTAATGACACCTTCCCCAATTTTGCAGCAATGATTGGTTCATTCTTGGCGGTGCGTTTTTGTG +CTTCGTCGAATTGTTGGCCATTTTGGTCCACCGGCCATCATCTTTACGCTATCCGACTGATTGGAAATCACCGCCTAGCA +TTTTGCCGAAGATTGTTGCGTTGTACGGCCATGTGCTGATTGTTTACATTGGCATTCTTGGCAATTTGTCCTTGGTCGGC +TTTGACGGCAAATTTGCGGTGTTAAGTctgagactgccaaggcacacagggggatagggnn ++ +IIII?????IIIIIIIIIIIIIIHHHIIIIIIIIIIIIIHHHIIHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +IIIIIIIIHHHIIIIIHHHIIIIIIIIIIIAAAAII>>>>IIIIIIIIIIIIIIIIIIIIIIIIIIEEIEE;33333D7I +IIIIIIIIIIIIIIIIIIIICC@@HHIIIIIIIIIIIIIIIIHHHIIIIIIIIIIIIIIIIIIIHHHIIIIIIIIIIIII +BBBBIHCDCHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIHHHIIIHHCCDIIIIIIHHHIICCCH=CCIIIIIIIII +GGGIIIIIIHHHHHHIIIIIIIIIIIIIIIHHHIIHHE??>>?EFEE?/////;:80--!! +@FSRRS4401BK0IB_4 [length=507] [gc=49.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=208] +tcagTTGACCGGCGTTGTGTAACAATAATTCATTATTCTGAGACGATGCCAATGTAATCGACGGTTTATGCCCAATTATT +CCCATCTATGCTTAACTGATCAAATACTATTTGCATTACGTCACGAAATTGCGCGAACACCGCCGGCCGACAATAATTTA +TACCGGACATACCGGAGTTGATGGTAATCGGTAAAGAGTTTTATTTAATTATntattatcnctattaattattgttanca +acaatgtgcacgctntgccgcccgccgccgccgtgtcggtaggaccccggacggacccggacccggttcgggtacccgtt +ttcgggttcccggaaccgtttttcgggtacccggttttttcggggggccccccggtaaaaaaccggggaaccccctaaaa +cgggtaaacgtaccgtaagggaccccctaaacgggggccccgaaaaaccgggacccaaaccggggggaaacggttaaagg +ggggggaagtaggngnnnnnnnnnnnn ++ +FFFA@@FFFFFFFFFFHHB:::@BFFFFGGHIHIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFF?=BA@11188011<< +88;?AABDDC???DDAAAADA666D?DDD=====AA>?>><<<=<11188<>?@@1114 +2::DDA???DFFFFFFFFFFFFFBAAAA<<0000.22=////8,--111111!23--/24!37:6666<;822/..4!46 +521177553.-.23!231121112,,-,,211==5------,12,,,,,,-,,,-1,,,,-,,155--,,,,13111.,, +,,,,,,++111..11..1,,,,,,,,,+3,,,,,--22-----//----55//**/--22--**,,,,**,,,,,,.1., +*,,,,***,,,,,,,,,,,,,,,,,,,,,,,),,-,,,,,,),,,,,**//.),,,///,,,,,,,,,,,.))33---,, +,,,,,,,,(0,,,!.!!!!!!!!!!!! +@FSRRS4401ARCCB_4 [length=258] [gc=46.90] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=193] +tcagTTATTGCAGTCGTTCCGCGCCATCGCCGGTAACCGTCCGCGTGTTATTCTGTGTATCGGCCAACCTTCGTATAACT +TCGTATAATGTATGCTATACGAAGTTATTACGATCTATACCGGCGAAACTCAGCCGAAAGGTCTCGCGGTAGAGCCTATG +AGCTGCCCGACCGATGCATTTAAATTTCCGGGGATCGtcgctgatctgagactgccaaaggcacactagggggataggnn +nnnnnnnnnnnnnnnnnn ++ +FFF<8::@DFFFFFFFGGFDCAAAAAB@@000046<;66322366762243348<<=??4445::>ABAAA@<<==B=:5 +55:BBD??=BDDDDFFFCCCCCCCFFCDDDFFFFFDBAA==88880004><<<99688;889<889?BBBBA=???DDBB +B@@??88889---237771,,,,,,,,--1152<<00158A@><<<<<43277711,,,--37===75,----34666!! +!!!!!!!!!!!!!!!!!! +@FSRRS4401CM938_4 [length=453] [gc=44.15] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=418] +tcagGTTTTAAATCGCTTTCCAAGGAATTTGAGTCTAAATCCGGTGGATCCCATCAGTACAAATGCGGCGACAAGGCCGT +GAAAACACTGCTTAATTCTTTGCACTTTTTGGCCACCTTTTTGGAAATGTTGTTTTGTGTTCTCAAAATTTTCCATCTCA +GAACAAACATTCCATCGGGCTGATGTTGTGGCTTTTGGCGCGCGAAGTGCTGCTACTGCGCGGCAAAATCAGTCGCCAGA +CCGGTTTTGTTGTGGACGACAAAGTGATCATGCCTGACTTGTACTTCTACCGCGATCCGCAAGCGCGAATTGGTCACATA +GTTATAGAATTTTTGAGCCTTTTTCTTGACATAAAAAGTGTGGTTTTAAAAATTTCCTGGCAGGACCCACGCCAACGTTC +AGGAATAATATCTTTTAAAAAGctgagactgccaaggcacacaggggataggn ++ +IIIIICC>>666IIIICCCIIIIIIIIHHHIIIIIG666IIIIIIIIIIHHHIIIIIIIICCCIIIIIIIIIIIIIIIII +I@@@@IIIIIIIIIIIIIHHHIIII???=;IIEEI:://///7544:?IBB72244E8EECEBC=@@@@@@@HHIIIIII +IIIIBBBIIIIIIIIIHHHIIIIIIIIIIIIICCCCIIIIIIIIIIIIIIIIIIIIIIIIIIII6666DEIIHEB??D@7 +77772222D89EEIIIIIIIHHHIIIIIIIIHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIHHHIIIIII +IIIIIIIII==?==IIIII???=;I63DDD82--,,,38==::----,,---+++33066;@6380008/:889<:BGII +IIIIIIIFE?@C<<7999EEEEEE@@@@EEEEE! +@FSRRS4401EQLIK_4 [length=411] [gc=34.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=374] +tcagTTTAATTTGGTGCTTCCTTTCAATTCCTTAGTTTAAACTTGGCACTGAAGTCTCGCATTTATAACTAGAGCCCGGA +TTTTAGAGGCTAAAAAGTTTTCCAGATTTCAAAATTTATTTCGAAACTATTTTTCTGATTGTGATGTGACGGATTTCTAA +ATTAAATCGAAATGATGTGTATTGAACTTAACAAGTGATTTTTATCAGATTTTGTCAATGAATAAATTTTAATTTAAATC +TCTTTCTAACACTTTCATGATTAAAATCTAACAAAGCGCGACCAGTATGTGAGAAGAGCAAAAACAACAAAAAGTGCTAG +CACTAAAGAAGGTTCGAACCCAACACATAACGTAAGAGTTACCGGGAAGAAAACCACTctgagactgccaaggcacacag +ggggataggnn ++ +III?666??HHHIIIIIIIIIGGGIIIIIIIIIIIGGGHHHIIIIIIIIIIIIIIIIIIIIGGGIIIIIIIIIIHHHIII +@@@@IIIIEIE111100----22?=8---:-------,,,,33---5:3,----:1BBEEEHIIIIIIIIIIIB??A122 +000...:?=024GIIIIIIIIIIIIIIIIIIECCHHB=//-,,21??<5-002=6FBB?:9<=11/4444//-//77??G +EIEEHIACCIIIHHHIIIIIIICCCAIIIHHHHHHIIIIIIIIIIIIIIIIIIIIIIEE1//--822;----.777@EII +IIII???IIIIIIIIIIIHHHIIIIIIIIIIIIIIIIIIII994227775555AE;IEEEEEIIIII??9755>@==:3, +,,,,33336!! +@FSRRS4401AOV6A_4 [length=309] [gc=22.98] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=273] +tcagTTTTCAAATTTTCCGAAATTTGCTGTTTGGTAGAAGGCAAATTATTTGATTGAATTTTGTATTTATTTAAAACAAT +TTATTTTAAAATAATAATTTTCCATTGACTTTTTACATTTAATTGATTTTATTATGCATTTTATATTTGTTTTCTAAATA +TTCGTTTGCAAACTCACGTTGAAATTGTATTAAACTCGAAATTAGAGTTTTTGAAATTAATTTTTATGTAGCATAATATT +TTAAACATATTGGAATTTTATAAAACATTATATTTTTctgagactgccaaggcacacagggggataggn ++ +IIIICCCCI;;;CCCCIII???HHHIIIIHHHIIIIIIIIIIHHHIIIHHHIIIIIII@@@@IFICCCICAA;;;;ED?B +@@D66445555<<>AAIIIIIIII;;;::III???CCCIII;;;;IFFIIIIICCCBIBIEEDC4444?4BBBE? +EIIICHHII;;;HIIIIIIHH;;;HHIIIII;;;IIIIHHHIIIIII>>??>IEEBGG::1111/46FBFBB?=;=A?97 +771119:EAAADDBD7777=/111122DA@@B68;;;I8HHIIIII;;;;?>IECCCB/////;745=! +@FSRRS4401EG0ZW_4 [length=424] [gc=23.82] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=389] +tcagTTTTGATCTTTTAATAATGAATTTTAATGTGTTAAAATGATTGCATTGATGGCATAACCGCATTTAAATTAATTAC +ATGAAGTGTAAGTATGAAATTTTCCTTTCCAAATTGCAAAAACTAAAATTTAAAATTTATCGTAAAAATTAACATATATT +TTAAACGATTTTAAGAAACATTTGTAAATTATATTTTTGTGAAGCGTTCAAACAAAAATAAACAATAAAATATTTTTCTA +TTTAATAGCAAAACATTTGACGATGAAAAGGAAAATGCGGGTTTGAAAATGGGCTTTGCCATGCTATTTTCATAATAACA +TATTTTTATTATGAATAATAAATTTACATACAATATATACAGTCTTAAATTTATTCATAATATTTTTGAGAATctgagac +tgccaaggcacacaggggataggn ++ +IIA94445EEII===>IIIIIIIIICCCCIIHIIICC;;;;IIIIIIIIIIIIIIIIIIIIIIIIIF;;666DDIIIIII +IIIIIIIIIIIIIEE94442244@@666CC<>==HH;;IIIIIICC +@@???III@@@@IC?666HIDDCI?B??CC////-=323?423,,,/=1,,,,-:4E +;??EIIIIICCCCI>;;;IIIIIII<<@@?=////7=A99988<<4455IEEEIIIIIIIIIIIII<999HIIIIIIIII +II?????IIIIIIIIIIICAC;55539EIIIIIIIIIIIIIIIIHH999HHHIA=AEEFF@=.....AD@@@DDEEEEFI +II;;;977FFCCC@24449?FDD! +@FAKE0007_1 Original version has lower case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTA ++ +@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh +@FAKE0008_1 Original version has mixed case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) +gTcatAGcgTcatAGcgTcatAGcgTcatAGcgTcatAGcg ++ +@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh +@FAKE0009_1 Original version has lower case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) +tcagtcagtcagtcagtcagtcagtcagtcagtcagtcagt ++ +@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh +@FAKE0010_1 Original version has mixed case ambiguous DNA and PHRED scores of 40, 30, 20, 10 (cycled) +gatcrywsmkhbvdnGATCRYWSMKHBVDN ++ +h^TJh^TJh^TJh^TJh^TJh^TJh^TJh^ +@FAKE0007_2 Original version has lower case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTA ++ +!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI +@FAKE0008_2 Original version has mixed case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) +gTcatAGcgTcatAGcgTcatAGcgTcatAGcgTcatAGcg ++ +!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI +@FAKE0009_2 Original version has lower case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) +tcagtcagtcagtcagtcagtcagtcagtcagtcagtcagt ++ +!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI +@FAKE0010_2 Original version has mixed case ambiguous DNA and PHRED scores of 40, 30, 20, 10 (cycled) +gatcrywsmkhbvdnGATCRYWSMKHBVDN ++ +I?5+I?5+I?5+I?5+I?5+I?5+I?5+I? +@FAKE0007_3 Original version has lower case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTA ++ +;;>@BCEFGHJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh +@FAKE0008_3 Original version has mixed case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) +gTcatAGcgTcatAGcgTcatAGcgTcatAGcgTcatAGcg ++ +;;>@BCEFGHJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh +@FAKE0009_3 Original version has lower case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) +tcagtcagtcagtcagtcagtcagtcagtcagtcagtcagt ++ +;;>@BCEFGHJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh +@FAKE0010_3 Original version has mixed case ambiguous DNA and PHRED scores of 40, 30, 20, 10 (cycled) +gatcrywsmkhbvdnGATCRYWSMKHBVDN ++ +h^TJh^TJh^TJh^TJh^TJh^TJh^TJh^ +@FAKE0007_4 Original version has lower case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTA ++ +!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI +@FAKE0008_4 Original version has mixed case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) +gTcatAGcgTcatAGcgTcatAGcgTcatAGcgTcatAGcg ++ +!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI +@FAKE0009_4 Original version has lower case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) +tcagtcagtcagtcagtcagtcagtcagtcagtcagtcagt ++ +!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI +@FAKE0010_4 Original version has mixed case ambiguous DNA and PHRED scores of 40, 30, 20, 10 (cycled) +gatcrywsmkhbvdnGATCRYWSMKHBVDN ++ +I?5+I?5+I?5+I?5+I?5+I?5+I?5+I? +@FAKE0011_1 Original version has lower case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) +ACGUACGUACGUACGUACGUACGUACGUACGUACGUACGUA ++ +@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh +@FAKE0012_1 Original version has mixed case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) +gUcauAGcgUcauAGcgUcauAGcgUcauAGcgUcauAGcg ++ +@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh +@FAKE0013_1 Original version has lower case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) +ucagucagucagucagucagucagucagucagucagucagu ++ +@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh +@FAKE0014_1 Original version has mixed case ambiguous RNA with PHRED scores from 35 to 40 inclusive (cycled) +gaucrywsmkhbvdnGAUCRYWSMKHBVDN ++ +cdefghcdefghcdefghcdefghcdefgh +@FAKE0011_2 Original version has lower case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) +ACGUACGUACGUACGUACGUACGUACGUACGUACGUACGUA ++ +!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI +@FAKE0012_2 Original version has mixed case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) +gUcauAGcgUcauAGcgUcauAGcgUcauAGcgUcauAGcg ++ +!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI +@FAKE0013_2 Original version has lower case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) +ucagucagucagucagucagucagucagucagucagucagu ++ +!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI +@FAKE0014_2 Original version has mixed case ambiguous RNA with PHRED scores from 35 to 40 inclusive (cycled) +gaucrywsmkhbvdnGAUCRYWSMKHBVDN ++ +DEFGHIDEFGHIDEFGHIDEFGHIDEFGHI +@FAKE0011_3 Original version has lower case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) +ACGUACGUACGUACGUACGUACGUACGUACGUACGUACGUA ++ +;;>@BCEFGHJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh +@FAKE0012_3 Original version has mixed case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) +gUcauAGcgUcauAGcgUcauAGcgUcauAGcgUcauAGcg ++ +;;>@BCEFGHJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh +@FAKE0013_3 Original version has lower case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) +ucagucagucagucagucagucagucagucagucagucagu ++ +;;>@BCEFGHJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh +@FAKE0014_3 Original version has mixed case ambiguous RNA with PHRED scores from 35 to 40 inclusive (cycled) +gaucrywsmkhbvdnGAUCRYWSMKHBVDN ++ +cdefghcdefghcdefghcdefghcdefgh +@FAKE0011_4 Original version has lower case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) +ACGUACGUACGUACGUACGUACGUACGUACGUACGUACGUA ++ +!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI +@FAKE0012_4 Original version has mixed case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) +gUcauAGcgUcauAGcgUcauAGcgUcauAGcgUcauAGcg ++ +!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI +@FAKE0013_4 Original version has lower case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) +ucagucagucagucagucagucagucagucagucagucagu ++ +!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI +@FAKE0014_4 Original version has mixed case ambiguous RNA with PHRED scores from 35 to 40 inclusive (cycled) +gaucrywsmkhbvdnGAUCRYWSMKHBVDN ++ +DEFGHIDEFGHIDEFGHIDEFGHIDEFGHI +@FAKE0001_1 Original version has PHRED scores from 0 to 93 inclusive (in that order) +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC ++ +@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@FAKE0002_1 Original version has PHRED scores from 93 to 0 inclusive (in that order) +CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@ +@FAKE0001_2 Original version has PHRED scores from 0 to 93 inclusive (in that order) +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC ++ +!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ +@FAKE0002_2 Original version has PHRED scores from 93 to 0 inclusive (in that order) +CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA ++ +~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;:9876543210/.-,+*)('&%$#"! +@FAKE0001_3 Original version has PHRED scores from 0 to 93 inclusive (in that order) +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC ++ +;;>@BCEFGHJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@FAKE0002_3 Original version has PHRED scores from 93 to 0 inclusive (in that order) +CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJHGFECB@>;; +@FAKE0001_4 Original version has PHRED scores from 0 to 93 inclusive (in that order) +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC ++ +!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ +@FAKE0002_4 Original version has PHRED scores from 93 to 0 inclusive (in that order) +CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA ++ +~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;:9876543210/.-,+*)('&%$#"! +@FAKE0003_1 Original version has Solexa scores from -5 to 62 inclusive (in that order) +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT ++ +AABBCCDDEEFGHIJJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ +@FAKE0004_1 Original version has Solexa scores from 62 to -5 inclusive (in that order) +TGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA ++ +~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJJIHGFEEDDCCBBAA +@FAKE0003_2 Original version has Solexa scores from -5 to 62 inclusive (in that order) +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT ++ +""##$$%%&&'()*++,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +@FAKE0004_2 Original version has Solexa scores from 62 to -5 inclusive (in that order) +TGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA ++ +_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;:9876543210/.-,++*)('&&%%$$##"" +@FAKE0003_3 Original version has Solexa scores from -5 to 62 inclusive (in that order) +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT ++ +;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ +@FAKE0004_3 Original version has Solexa scores from 62 to -5 inclusive (in that order) +TGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA ++ +~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<; +@FAKE0003_4 Original version has Solexa scores from -5 to 62 inclusive (in that order) +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT ++ +;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ +@FAKE0004_4 Original version has Solexa scores from 62 to -5 inclusive (in that order) +TGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA ++ +~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<; +@SRR014849.50939_1 EIXKN4201BA2EC length=135 +GAAATTTCAGGGCCACCTTTTTTTTGATAGAATAATGGAGAAAATTAAAAGCTGTACATATACCAATGAACAATAAATCAATACATAAAAAAGGAGAAGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTCGG ++ +Zb^Ld`N\[d`NaZ[aZc]UOKHDA[\YT[_W[aZ\aZ[Zd`SF_WeaUI[Y\[[\\\[\Z\aY`X[[aZ\aZ\d`OY[aY[[\[[e`WPJC^UZ[`X\[R]T_V_W[`[Ga\I`\H[[Q^TVa\Ia\Ic^LY\S +@SRR014849.110027_1 EIXKN4201APUB0 length=131 +CTTCAAATGATTCCGGGACTGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTTCGGTTCCAACTCGCCGTCCGAATAATCCGTTCAAAATCTTGGCCTGTCAAAACGACTTTACGACCAGAACGATCCG ++ +\aYY_[FY\T`X^Vd`OY\[[^U_V[R^T[_ZDc^La\HYYO\S[c^Ld`Nc_QAZaZaYaY`XZZ\[aZZ[aZ[aZ[aZY`Z[`ZWeaVJ\[aZaY`X[PY\eaUG[\[[d`OXTUZ[Q\\`W\\\Y_W\ +@SRR014849.203935_1 EIXKN4201B4HU6 length=144 +AACCCGTCCCATCAAAGATTTTGGTTGGAACCCGAAAGGGTTTTGAATTCAAACCCCTTTCGGTTCCAACTATTCAATTGTTTAACTTTTTTTAAATTGATGGTCTGTTGGACCATTTGTAATAATCCCCATCGGAATTTCTTT ++ +`Z_ZDVT^YB[[Xd`PZ\d`RDaZaZ`ZaZ_ZDXd`Pd`Pd`RD[aZ`ZWd`Oc_RCd`P\aZ`ZaZaZY\YaZYaY`XYd`O`X[e`WPJEAc^LaZS[YYN[Z\Y`XWLT^U\b]JW[[RZ\SYc`RD[Z\WLXM`\HYa\I +@SRR014849.50939_2 EIXKN4201BA2EC length=135 +GAAATTTCAGGGCCACCTTTTTTTTGATAGAATAATGGAGAAAATTAAAAGCTGTACATATACCAATGAACAATAAATCAATACATAAAAAAGGAGAAGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTCGG ++ +;C?-EA/=60,)%"<=:5<@85@7@8+8<<3;=4:DA3%<;=8-9.A=):B=* +@SRR014849.50939_3 EIXKN4201BA2EC length=135 +GAAATTTCAGGGCCACCTTTTTTTTGATAGAATAATGGAGAAAATTAAAAGCTGTACATATACCAATGAACAATAAATCAATACATAAAAAAGGAGAAGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTCGG ++ +Zb^Ld`N\[d`NaZ[aZc]UOKGB;[\YT[_W[aZ\aZ[Zd`SE_WeaUH[Y\[[\\\[\Z\aY`X[[aZ\aZ\d`OY[aY[[\[[e`WPJ@^UZ[`X\[R]T_V_W[`[Fa\H`\G[[Q^TVa\Ha\Hc^LY\S +@SRR014849.110027_3 EIXKN4201APUB0 length=131 +CTTCAAATGATTCCGGGACTGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTTCGGTTCCAACTCGCCGTCCGAATAATCCGTTCAAAATCTTGGCCTGTCAAAACGACTTTACGACCAGAACGATCCG ++ +\aYY_[EY\T`X^Vd`OY\[[^U_V[R^T[_ZBc^La\GYYO\S[c^Ld`Nc_Q;ZaZaYaY`XZZ\[aZZ[aZ[aZ[aZY`Z[`ZWeaVJ\[aZaY`X[PY\eaUF[\[[d`OXTUZ[Q\\`W\\\Y_W\ +@SRR014849.203935_3 EIXKN4201B4HU6 length=144 +AACCCGTCCCATCAAAGATTTTGGTTGGAACCCGAAAGGGTTTTGAATTCAAACCCCTTTCGGTTCCAACTATTCAATTGTTTAACTTTTTTTAAATTGATGGTCTGTTGGACCATTTGTAATAATCCCCATCGGAATTTCTTT ++ +`Z_ZBVT^Y>[[Xd`PZ\d`RBaZaZ`ZaZ_ZBXd`Pd`Pd`RB[aZ`ZWd`Oc_R@d`P\aZ`ZaZaZY\YaZYaY`XYd`O`X[e`WPJC;c^LaZS[YYN[Z\Y`XWLT^U\b]JW[[RZ\SYc`RB[Z\WLXM`\GYa\H diff --git a/src/htslib-1.21/test/faidx/fastqs.fq.expected.fai b/src/htslib-1.21/test/faidx/fastqs.fq.expected.fai new file mode 100644 index 0000000..77ba04a --- /dev/null +++ b/src/htslib-1.21/test/faidx/fastqs.fq.expected.fai @@ -0,0 +1,105 @@ +FAKE0005_1 63 85 63 64 151 +FAKE0006_1 63 300 63 64 366 +FAKE0005_2 63 515 63 64 581 +FAKE0006_2 63 730 63 64 796 +FAKE0005_3 63 945 63 64 1011 +FAKE0006_3 63 1160 63 64 1226 +FAKE0005_4 63 1375 63 64 1441 +FAKE0006_4 63 1590 63 64 1656 +FSRRS4401BE7HA_1 395 1823 395 396 2221 +FSRRS4401BRRTC_1 145 2720 145 146 2868 +FSRRS4401B64ST_1 382 3118 382 383 3503 +FSRRS4401EJ0YH_1 381 3990 381 382 4374 +FSRRS4401BK0IB_1 507 4860 507 508 5370 +FSRRS4401ARCCB_1 258 5982 258 259 6243 +FSRRS4401CM938_1 453 6606 453 454 7062 +FSRRS4401EQLIK_1 411 7620 411 412 8034 +FSRRS4401AOV6A_1 309 8550 309 310 8862 +FSRRS4401EG0ZW_1 424 9276 424 425 9703 +FSRRS4401BE7HA_2 395 10231 395 396 10629 +FSRRS4401BRRTC_2 145 11128 145 146 11276 +FSRRS4401B64ST_2 382 11526 382 383 11911 +FSRRS4401EJ0YH_2 381 12398 381 382 12782 +FSRRS4401BK0IB_2 507 13268 507 508 13778 +FSRRS4401ARCCB_2 258 14390 258 259 14651 +FSRRS4401CM938_2 453 15014 453 454 15470 +FSRRS4401EQLIK_2 411 16028 411 412 16442 +FSRRS4401AOV6A_2 309 16958 309 310 17270 +FSRRS4401EG0ZW_2 424 17684 424 425 18111 +FSRRS4401BE7HA_3 395 18639 395 396 19037 +FSRRS4401BRRTC_3 145 19536 145 146 19684 +FSRRS4401B64ST_3 382 19934 382 383 20319 +FSRRS4401EJ0YH_3 381 20806 381 382 21190 +FSRRS4401BK0IB_3 507 21676 507 508 22186 +FSRRS4401ARCCB_3 258 22798 258 259 23059 +FSRRS4401CM938_3 453 23422 453 454 23878 +FSRRS4401EQLIK_3 411 24436 411 412 24850 +FSRRS4401AOV6A_3 309 25366 309 310 25678 +FSRRS4401EG0ZW_3 424 26092 424 425 26519 +FSRRS4401BE7HA_4 395 27047 80 81 27449 +FSRRS4401BRRTC_4 145 27952 80 81 28101 +FSRRS4401B64ST_4 382 28352 80 81 28741 +FSRRS4401EJ0YH_4 381 29232 80 81 29620 +FSRRS4401BK0IB_4 507 30110 80 81 30626 +FSRRS4401ARCCB_4 258 31244 80 81 31508 +FSRRS4401CM938_4 453 31874 80 81 32335 +FSRRS4401EQLIK_4 411 32898 80 81 33317 +FSRRS4401AOV6A_4 309 33838 80 81 34153 +FSRRS4401EG0ZW_4 424 34570 80 81 35002 +FAKE0007_1 41 35549 41 42 35593 +FAKE0008_1 41 35752 41 42 35796 +FAKE0009_1 41 35955 41 42 35999 +FAKE0010_1 30 36143 30 31 36176 +FAKE0007_2 41 36324 41 42 36368 +FAKE0008_2 41 36527 41 42 36571 +FAKE0009_2 41 36730 41 42 36774 +FAKE0010_2 30 36918 30 31 36951 +FAKE0007_3 41 37099 41 42 37143 +FAKE0008_3 41 37302 41 42 37346 +FAKE0009_3 41 37505 41 42 37549 +FAKE0010_3 30 37693 30 31 37726 +FAKE0007_4 41 37874 41 42 37918 +FAKE0008_4 41 38077 41 42 38121 +FAKE0009_4 41 38280 41 42 38324 +FAKE0010_4 30 38468 30 31 38501 +FAKE0011_1 41 38649 41 42 38693 +FAKE0012_1 41 38852 41 42 38896 +FAKE0013_1 41 39055 41 42 39099 +FAKE0014_1 30 39250 30 31 39283 +FAKE0011_2 41 39431 41 42 39475 +FAKE0012_2 41 39634 41 42 39678 +FAKE0013_2 41 39837 41 42 39881 +FAKE0014_2 30 40032 30 31 40065 +FAKE0011_3 41 40213 41 42 40257 +FAKE0012_3 41 40416 41 42 40460 +FAKE0013_3 41 40619 41 42 40663 +FAKE0014_3 30 40814 30 31 40847 +FAKE0011_4 41 40995 41 42 41039 +FAKE0012_4 41 41198 41 42 41242 +FAKE0013_4 41 41401 41 42 41445 +FAKE0014_4 30 41596 30 31 41629 +FAKE0001_1 94 41745 94 95 41842 +FAKE0002_1 94 42022 94 95 42119 +FAKE0001_2 94 42299 94 95 42396 +FAKE0002_2 94 42576 94 95 42673 +FAKE0001_3 94 42853 94 95 42950 +FAKE0002_3 94 43130 94 95 43227 +FAKE0001_4 94 43407 94 95 43504 +FAKE0002_4 94 43684 94 95 43781 +FAKE0003_1 68 43963 68 69 44034 +FAKE0004_1 68 44190 68 69 44261 +FAKE0003_2 68 44417 68 69 44488 +FAKE0004_2 68 44644 68 69 44715 +FAKE0003_3 68 44871 68 69 44942 +FAKE0004_3 68 45098 68 69 45169 +FAKE0003_4 68 45325 68 69 45396 +FAKE0004_4 68 45552 68 69 45623 +SRR014849.50939_1 135 45737 135 136 45875 +SRR014849.110027_1 131 46057 131 132 46191 +SRR014849.203935_1 144 46369 144 145 46516 +SRR014849.50939_2 135 46706 135 136 46844 +SRR014849.110027_2 131 47026 131 132 47160 +SRR014849.203935_2 144 47338 144 145 47485 +SRR014849.50939_3 135 47675 135 136 47813 +SRR014849.110027_3 131 47995 131 132 48129 +SRR014849.203935_3 144 48307 144 145 48454 diff --git a/src/htslib-1.21/test/faidx/test-faidx.sh b/src/htslib-1.21/test/faidx/test-faidx.sh new file mode 100755 index 0000000..ae501e0 --- /dev/null +++ b/src/htslib-1.21/test/faidx/test-faidx.sh @@ -0,0 +1,35 @@ +#!/bin/sh +# +# Copyright (C) 2022 Genome Research Ltd. +# +# Author: Robert Davies +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Load in the test driver +. ../simple_test_driver.sh + +echo "Testing faidx..." + +bgzip="../../bgzip" +test_faidx="../test_faidx" + +test_driver $@ + +exit $? diff --git a/src/htslib-1.21/test/fastq/fastq.tst b/src/htslib-1.21/test/fastq/fastq.tst new file mode 100644 index 0000000..3b5fd9f --- /dev/null +++ b/src/htslib-1.21/test/fastq/fastq.tst @@ -0,0 +1,116 @@ +# Copyright (C) 2020 Genome Research Ltd. +# +# Author: James Bonfield +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# First field: +# INIT = initialisation, not counted in testing +# P = expected to pass (zero return; expected output matches, if present) +# N = expected to return non-zero +# F = expected to fail +# +# Second field (P/N/F only): +# Filename of expected output. If '.', output is not checked +# +# Rest: +# Command to execute. $pileup is replaced with the path to the pileup test +# program + +# -------------------- +# Reading + +# Minimal +P minimal.sam $tview minimal.fq +P minimal-q.sam $tview minimal.fa + +# Multi-line FASTQ +P multiline.sam $tview multiline.fq +P multiline-q.sam $tview multiline.fa + +# FASTQ with a very long header line +P longline.sam $tview -i fastq_aux longline.fq + +# Single file, unpaired data, with / without aux tags +P single_noaux.sam $tview single.fq +P single_noaux-q.sam $tview single.fa +P single_aux.sam $tview -i fastq_aux single.fq +P single_aux-q.sam $tview -i fastq_aux single.fa + +# Single file, interleaved paired data, no aux +P inter_noaux.sam $tview interleaved.fq +P inter_noaux-q.sam $tview interleaved.fa + +# Single file, interleaved paired data, with aux +P inter_aux.sam $tview -i fastq_aux interleaved.fq +P inter_aux-q.sam $tview -i fastq_aux interleaved.fa + +# Single file, interleaved paired data, using CASAVA +P inter_casava.sam $tview -i fastq_casava interleaved_casava.fq +P inter_casavaOX.sam $tview -i fastq_barcode=OX -i fastq_casava interleaved_casava.fq +P inter_casava-q.sam $tview -i fastq_casava interleaved_casava.fa +P inter_casavaOX-q.sam $tview -i fastq_barcode=OX -i fastq_casava interleaved_casava.fa + +# CASAVA with filtering +P filter_casava.sam $tview -i fastq_casava filter_casava.fq +P filter_casava-q.sam $tview -i fastq_casava filter_casava.fa + +# Paired data is mainly tested by the Samtools test harness. +# Basically though it's just reading two files and relying on either +# this code or explicit overloading of READ1/READ2. +# We simply test here we can read r1 and r2 as separate files +P r1.sam $tview -i fastq_aux r1.fq +P r2.sam $tview -i fastq_aux r2.fq +P r1-q.sam $tview -i fastq_aux r1.fa +P r2-q.sam $tview -i fastq_aux r2.fa + +# Simple tests for the FASTQ_NAME2 option. +P name2.sam $tview -i fastq_name2 name2.fq +P name2-q.sam $tview -i fastq_name2 name2.fa + +# -------------------- +# Writing + +# Minimal +P minimal.fq $tview -f minimal.sam +P minimal.fa $tview -F minimal.sam + +# Single file with unpaired data plus aux tags +P single.fq $tview -f -o fastq_aux single_aux.sam +P single.fa $tview -F -o fastq_aux single_aux.sam + +# Single file, interleaved paired data, with aux and /rnum +P interleaved.fq $tview -f -o fastq_aux -o fastq_rnum inter_aux.sam +P interleaved.fa $tview -F -o fastq_aux -o fastq_rnum inter_aux.sam + +# CASAVA with interleaved data +P interleaved_casava.fq $tview -f -o fastq_casava inter_casava.sam +P interleaved_casava.fq $tview -f -o fastq_barcode=OX -o fastq_casava inter_casavaOX.sam +P interleaved_casava.fa $tview -F -o fastq_casava inter_casava.sam +P interleaved_casava.fa $tview -F -o fastq_barcode=OX -o fastq_casava inter_casavaOX.sam + +# CASAVA with filtering +P filter_casava.fq $tview -f -o fastq_casava filter_casava.sam +P filter_casava.fa $tview -F -o fastq_casava filter_casava.sam + +# Paired data +P r1.fq $tview -f -o fastq_aux -o fastq_rnum r1.sam +P r2.fq $tview -f -o fastq_aux -o fastq_rnum r2.sam +P r1.fa $tview -F -o fastq_aux -o fastq_rnum r1.sam +P r2.fa $tview -F -o fastq_aux -o fastq_rnum r2.sam diff --git a/src/htslib-1.21/test/fastq/filter_casava-q.sam b/src/htslib-1.21/test/fastq/filter_casava-q.sam new file mode 100644 index 0000000..6d7a2ea --- /dev/null +++ b/src/htslib-1.21/test/fastq/filter_casava-q.sam @@ -0,0 +1,4 @@ +HS25_09827:2:1201:1505:59795#49 77 * 0 0 * * 0 0 CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG * BC:Z:NGTCTATC +HS25_09827:2:1201:1505:59795#49 141 * 0 0 * * 0 0 AAGGAAAGAAGGGAGGGAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAAGTAGGAAGAATTCATCTACCCAATT * BC:Z:NGTCTATC +HS25_09827:2:1201:1559:70726#49 589 * 0 0 * * 0 0 TTGTTAAAATGACCATACCCAAAGTGATCTACAGACTCAATACAATTTCTATTGAAATACCAATCACACTCTTCACAGAACTAGAAAAACAGTTCTAAAA * BC:Z:NGTCTATC +HS25_09827:2:1201:1559:70726#49 653 * 0 0 * * 0 0 TTTTCTTTTATTAATTTTATACTTACATTTAAGTCTTTATTCCATTTTGAGTCAATGTTTGTATATGATGAGAGATAGGGGTCTAGTTTCATACTTCTAC * BC:Z:NGTCTATC diff --git a/src/htslib-1.21/test/fastq/filter_casava.fa b/src/htslib-1.21/test/fastq/filter_casava.fa new file mode 100644 index 0000000..ba58034 --- /dev/null +++ b/src/htslib-1.21/test/fastq/filter_casava.fa @@ -0,0 +1,8 @@ +>HS25_09827:2:1201:1505:59795#49 1:N:0:NGTCTATC +CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG +>HS25_09827:2:1201:1505:59795#49 2:N:0:NGTCTATC +AAGGAAAGAAGGGAGGGAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAAGTAGGAAGAATTCATCTACCCAATT +>HS25_09827:2:1201:1559:70726#49 1:Y:0:NGTCTATC +TTGTTAAAATGACCATACCCAAAGTGATCTACAGACTCAATACAATTTCTATTGAAATACCAATCACACTCTTCACAGAACTAGAAAAACAGTTCTAAAA +>HS25_09827:2:1201:1559:70726#49 2:Y:0:NGTCTATC +TTTTCTTTTATTAATTTTATACTTACATTTAAGTCTTTATTCCATTTTGAGTCAATGTTTGTATATGATGAGAGATAGGGGTCTAGTTTCATACTTCTAC diff --git a/src/htslib-1.21/test/fastq/filter_casava.fq b/src/htslib-1.21/test/fastq/filter_casava.fq new file mode 100644 index 0000000..adb1913 --- /dev/null +++ b/src/htslib-1.21/test/fastq/filter_casava.fq @@ -0,0 +1,16 @@ +@HS25_09827:2:1201:1505:59795#49 1:N:0:NGTCTATC +CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG ++ +CABCFGDEEFFEFHGHGGFFGDIGIJFIFHHGHEIFGHBCGHDIFBE9GIAICGGICFIBFGGHGDGGGHE?GIGDFGGHEGIEJG>;FGDCHFEFBHAGCGACF7CJI8HBIIEFGFEBG?DCGA?ACFGGI=BEDG?EFEHFFFEHFD?HG+DFH>FFHGFBFE4F@I3HF@>A5F?GFHEFCFHGGGHEJEAJE +@HS25_09827:2:1201:1559:70726#49 2:Y:0:NGTCTATC +TTTTCTTTTATTAATTTTATACTTACATTTAAGTCTTTATTCCATTTTGAGTCAATGTTTGTATATGATGAGAGATAGGGGTCTAGTTTCATACTTCTAC ++ +;CBCEFDHDGFGHDGDIGEF@EJIIGEEIECGFHGFHGGGHHHHGGKIFFEHGEGHFIEFFHHGDHHGJEGF?FBHFFGCHHFFII>GCFCFFGGCEBF? diff --git a/src/htslib-1.21/test/fastq/filter_casava.sam b/src/htslib-1.21/test/fastq/filter_casava.sam new file mode 100644 index 0000000..9a27853 --- /dev/null +++ b/src/htslib-1.21/test/fastq/filter_casava.sam @@ -0,0 +1,4 @@ +HS25_09827:2:1201:1505:59795#49 77 * 0 0 * * 0 0 CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG CABCFGDEEFFEFHGHGGFFGDIGIJFIFHHGHEIFGHBCGHDIFBE9GIAICGGICFIBFGGHGDGGGHE?GIGDFGGHEGIEJG>;FGDCHFEFBHAGCGACF7CJI8HBIIEFGFEBG?DCGA?ACFGGI=BEDG?EFEHFFFEHFD?HG+DFH>FFHGFBFE4F@I3HF@>A5F?GFHEFCFHGGGHEJEAJE BC:Z:NGTCTATC +HS25_09827:2:1201:1559:70726#49 653 * 0 0 * * 0 0 TTTTCTTTTATTAATTTTATACTTACATTTAAGTCTTTATTCCATTTTGAGTCAATGTTTGTATATGATGAGAGATAGGGGTCTAGTTTCATACTTCTAC ;CBCEFDHDGFGHDGDIGEF@EJIIGEEIECGFHGFHGGGHHHHGGKIFFEHGEGHFIEFFHHGDHHGJEGF?FBHFFGCHHFFII>GCFCFFGGCEBF? BC:Z:NGTCTATC diff --git a/src/htslib-1.21/test/fastq/inter_aux-q.sam b/src/htslib-1.21/test/fastq/inter_aux-q.sam new file mode 100644 index 0000000..cd5fbc4 --- /dev/null +++ b/src/htslib-1.21/test/fastq/inter_aux-q.sam @@ -0,0 +1,10 @@ +HS25_09827:2:1201:1505:59795#49 77 * 0 0 * * 0 0 CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG * RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1=BDDDF +HS25_09827:2:1201:1505:59795#49 141 * 0 0 * * 0 0 AAGGAAAGAAGGGAGGGAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAAGTAGGAAGAATTCATCTACCCAATT * RG:Z:1#49 +HS25_09827:2:1201:1559:70726#49 77 * 0 0 * * 0 0 TTGTTAAAATGACCATACCCAAAGTGATCTACAGACTCAATACAATTTCTATTGAAATACCAATCACACTCTTCACAGAACTAGAAAAACAGTTCTAAAA * RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1:DBDFD +HS25_09827:2:1201:1559:70726#49 141 * 0 0 * * 0 0 TTTTCTTTTATTAATTTTATACTTACATTTAAGTCTTTATTCCATTTTGAGTCAATGTTTGTATATGATGAGAGATAGGGGTCTAGTTTCATACTTCTAC * RG:Z:1#49 +HS25_09827:2:1201:1564:39627#49 77 * 0 0 * * 0 0 ACGCGGCAATCCAATGTGTGAGTTGAGAAGCGGTGAGGAGGGAATCCTAATTTTATGAGCAGGTCAGGACCGTGGGAGATACCTGACACCTGAGATGGTA * RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1:DDDFE +HS25_09827:2:1201:1564:39627#49 141 * 0 0 * * 0 0 AATTCCTTGTTTGAAAACACTCCTAGCACCATGAGTCTCTGCTGCTTTATCTAAAGCATCTCTGGGGCTCATGTGTTTGTGAAGAGCTTGCCTAGCTTTT * RG:Z:1#49 +HS25_09827:2:1201:1565:91731#49 77 * 0 0 * * 0 0 GACATGCCATAACATTCATGTTTTATGTGTACAAGTCAATGAATTTTAGTATATTTACAGAGTTGTATGACTGTCTCCACAATCTAATTTTAGGTTTCCA * RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1=DDFFD +HS25_09827:2:1201:1565:91731#49 141 * 0 0 * * 0 0 AGCACAGATTCATGCTAGCACATGGATGACCCTCAACAGCATTATCCCCAGCGATAGAAGCCAGACACAAAAGAACACATATGACAGGATTCCATTGATA * RG:Z:1#49 +HS25_09827:2:1201:1624:69925#49 77 * 0 0 * * 0 0 GCCAGCCTCCTTCTCAATGGTCTTTTTAAACATTATATGAAAACCAGACATTTACATTTGATTTCTTTTTCAATACTATACAGTTCTAAGAGAAAAAACA * RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1=DD?DB +HS25_09827:2:1201:1624:69925#49 141 * 0 0 * * 0 0 TCCTTCCATTCCATTTTTGTGTTTGTTTTGTTCTTTTCTGTCACTGATCCGTATTACCACTTTTGGAAAAAAATAAATAAATAAATAAATAAAAGGCAGC * RG:Z:1#49 diff --git a/src/htslib-1.21/test/fastq/inter_aux.sam b/src/htslib-1.21/test/fastq/inter_aux.sam new file mode 100644 index 0000000..91e4aca --- /dev/null +++ b/src/htslib-1.21/test/fastq/inter_aux.sam @@ -0,0 +1,10 @@ +HS25_09827:2:1201:1505:59795#49 77 * 0 0 * * 0 0 CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG CABCFGDEEFFEFHGHGGFFGDIGIJFIFHHGHEIFGHBCGHDIFBE9GIAICGGICFIBFGGHGDGGGHE?GIGDFGGHEGIEJG>;FGDCHFEFBHAGCGACF7CJI8HBIIEFGFEBG?DCGA?ACFGGI=BEDG?EFEHFFFEHFD?HG+DFH>FFHGFBFE4F@I3HF@>A5F?GFHEFCFHGGGHEJEAJE RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1:DBDFD +HS25_09827:2:1201:1559:70726#49 141 * 0 0 * * 0 0 TTTTCTTTTATTAATTTTATACTTACATTTAAGTCTTTATTCCATTTTGAGTCAATGTTTGTATATGATGAGAGATAGGGGTCTAGTTTCATACTTCTAC ;CBCEFDHDGFGHDGDIGEF@EJIIGEEIECGFHGFHGGGHHHHGGKIFFEHGEGHFIEFFHHGDHHGJEGF?FBHFFGCHHFFII>GCFCFFGGCEBF? RG:Z:1#49 +HS25_09827:2:1201:1564:39627#49 77 * 0 0 * * 0 0 ACGCGGCAATCCAATGTGTGAGTTGAGAAGCGGTGAGGAGGGAATCCTAATTTTATGAGCAGGTCAGGACCGTGGGAGATACCTGACACCTGAGATGGTA BACCFGBFGFHGGJGHGGFEGHIGIJHFEH:HHEHGHHBGGH9IAGHGFHIFJFFAFGIFDIGHKEIGF,CGD66?7EFI5EEG>EGGGGD5=HH6E RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1:DDDFE +HS25_09827:2:1201:1564:39627#49 141 * 0 0 * * 0 0 AATTCCTTGTTTGAAAACACTCCTAGCACCATGAGTCTCTGCTGCTTTATCTAAAGCATCTCTGGGGCTCATGTGTTTGTGAAGAGCTTGCCTAGCTTTT ;CACCDEBDEFGAGGGF5EGFFFEIDDIICGGF>H?GGG@DGAHD@HIDIFGG@IEEFHFGFEBGIHHDE5GHEIABFG>CB=FCF4GGHLBBJGFEHEG RG:Z:1#49 +HS25_09827:2:1201:1565:91731#49 77 * 0 0 * * 0 0 GACATGCCATAACATTCATGTTTTATGTGTACAAGTCAATGAATTTTAGTATATTTACAGAGTTGTATGACTGTCTCCACAATCTAATTTTAGGTTTCCA CABFFGFFJFHEGEGJGGDG?FIGHHHBGHHHGIIGHGHGGHDGHFHIDFCIKEGIFHGGII9HFFGGGEEIGGEEHGGEEGDEHFH>FGGGGHAFAHGE RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1=DDFFD +HS25_09827:2:1201:1565:91731#49 141 * 0 0 * * 0 0 AGCACAGATTCATGCTAGCACATGGATGACCCTCAACAGCATTATCCCCAGCGATAGAAGCCAGACACAAAAGAACACATATGACAGGATTCCATTGATA ;CBABFEEFJFGEFGCCDIGIFFIHDCGIEGHGHHEHGIDBGHHGFEIJHHGG@GFCHGFDHKFFEGHDIEFHHFHEIFCHFGCFBHG@>IBF5*DBHHG RG:Z:1#49 +HS25_09827:2:1201:1624:69925#49 77 * 0 0 * * 0 0 GCCAGCCTCCTTCTCAATGGTCTTTTTAAACATTATATGAAAACCAGACATTTACATTTGATTTCTTTTTCAATACTATACAGTTCTAAGAGAAAAAACA CABEFGFGIFGGGJGHGGFH?FDHGHDHGHEHHJCGHHFHDHDHFGHIGHIFFHGHFGGGI9GHF@IGGH;FICGEFEIHGGIEEFC:DEGGGBDJHHFF RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1=DD?DB +HS25_09827:2:1201:1624:69925#49 141 * 0 0 * * 0 0 TCCTTCCATTCCATTTTTGTGTTTGTTTTGTTCTTTTCTGTCACTGATCCGTATTACCACTTTTGGAAAAAAATAAATAAATAAATAAATAAAAGGCAGC ;CBCDDDHDHFGGEGGICEGFGJF@GEIFECIFHJCDAI>HDAH?@EIDICDGEIFEB@7DHFFE?CCD4EFIEBHFC?FFBFFG3HC@AGFDFGCBHEG RG:Z:1#49 diff --git a/src/htslib-1.21/test/fastq/inter_casava-q.sam b/src/htslib-1.21/test/fastq/inter_casava-q.sam new file mode 100644 index 0000000..31f8e8b --- /dev/null +++ b/src/htslib-1.21/test/fastq/inter_casava-q.sam @@ -0,0 +1,10 @@ +HS25_09827:2:1201:1505:59795#49 77 * 0 0 * * 0 0 CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG * BC:Z:NGTCTATC +HS25_09827:2:1201:1505:59795#49 141 * 0 0 * * 0 0 AAGGAAAGAAGGGAGGGAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAAGTAGGAAGAATTCATCTACCCAATT * BC:Z:NGTCTATC +HS25_09827:2:1201:1559:70726#49 77 * 0 0 * * 0 0 TTGTTAAAATGACCATACCCAAAGTGATCTACAGACTCAATACAATTTCTATTGAAATACCAATCACACTCTTCACAGAACTAGAAAAACAGTTCTAAAA * BC:Z:NGTCTATC +HS25_09827:2:1201:1559:70726#49 141 * 0 0 * * 0 0 TTTTCTTTTATTAATTTTATACTTACATTTAAGTCTTTATTCCATTTTGAGTCAATGTTTGTATATGATGAGAGATAGGGGTCTAGTTTCATACTTCTAC * BC:Z:NGTCTATC +HS25_09827:2:1201:1564:39627#49 77 * 0 0 * * 0 0 ACGCGGCAATCCAATGTGTGAGTTGAGAAGCGGTGAGGAGGGAATCCTAATTTTATGAGCAGGTCAGGACCGTGGGAGATACCTGACACCTGAGATGGTA * BC:Z:NGTCTATC +HS25_09827:2:1201:1564:39627#49 141 * 0 0 * * 0 0 AATTCCTTGTTTGAAAACACTCCTAGCACCATGAGTCTCTGCTGCTTTATCTAAAGCATCTCTGGGGCTCATGTGTTTGTGAAGAGCTTGCCTAGCTTTT * BC:Z:NGTCTATC +HS25_09827:2:1201:1565:91731#49 77 * 0 0 * * 0 0 GACATGCCATAACATTCATGTTTTATGTGTACAAGTCAATGAATTTTAGTATATTTACAGAGTTGTATGACTGTCTCCACAATCTAATTTTAGGTTTCCA * BC:Z:NGTCTATC +HS25_09827:2:1201:1565:91731#49 141 * 0 0 * * 0 0 AGCACAGATTCATGCTAGCACATGGATGACCCTCAACAGCATTATCCCCAGCGATAGAAGCCAGACACAAAAGAACACATATGACAGGATTCCATTGATA * BC:Z:NGTCTATC +HS25_09827:2:1201:1624:69925#49 77 * 0 0 * * 0 0 GCCAGCCTCCTTCTCAATGGTCTTTTTAAACATTATATGAAAACCAGACATTTACATTTGATTTCTTTTTCAATACTATACAGTTCTAAGAGAAAAAACA * BC:Z:NGTCTATC +HS25_09827:2:1201:1624:69925#49 141 * 0 0 * * 0 0 TCCTTCCATTCCATTTTTGTGTTTGTTTTGTTCTTTTCTGTCACTGATCCGTATTACCACTTTTGGAAAAAAATAAATAAATAAATAAATAAAAGGCAGC * BC:Z:NGTCTATC diff --git a/src/htslib-1.21/test/fastq/inter_casava.sam b/src/htslib-1.21/test/fastq/inter_casava.sam new file mode 100644 index 0000000..823d78b --- /dev/null +++ b/src/htslib-1.21/test/fastq/inter_casava.sam @@ -0,0 +1,10 @@ +HS25_09827:2:1201:1505:59795#49 77 * 0 0 * * 0 0 CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG CABCFGDEEFFEFHGHGGFFGDIGIJFIFHHGHEIFGHBCGHDIFBE9GIAICGGICFIBFGGHGDGGGHE?GIGDFGGHEGIEJG>;FGDCHFEFBHAGCGACF7CJI8HBIIEFGFEBG?DCGA?ACFGGI=BEDG?EFEHFFFEHFD?HG+DFH>FFHGFBFE4F@I3HF@>A5F?GFHEFCFHGGGHEJEAJE BC:Z:NGTCTATC +HS25_09827:2:1201:1559:70726#49 141 * 0 0 * * 0 0 TTTTCTTTTATTAATTTTATACTTACATTTAAGTCTTTATTCCATTTTGAGTCAATGTTTGTATATGATGAGAGATAGGGGTCTAGTTTCATACTTCTAC ;CBCEFDHDGFGHDGDIGEF@EJIIGEEIECGFHGFHGGGHHHHGGKIFFEHGEGHFIEFFHHGDHHGJEGF?FBHFFGCHHFFII>GCFCFFGGCEBF? BC:Z:NGTCTATC +HS25_09827:2:1201:1564:39627#49 77 * 0 0 * * 0 0 ACGCGGCAATCCAATGTGTGAGTTGAGAAGCGGTGAGGAGGGAATCCTAATTTTATGAGCAGGTCAGGACCGTGGGAGATACCTGACACCTGAGATGGTA BACCFGBFGFHGGJGHGGFEGHIGIJHFEH:HHEHGHHBGGH9IAGHGFHIFJFFAFGIFDIGHKEIGF,CGD66?7EFI5EEG>EGGGGD5=HH6E BC:Z:NGTCTATC +HS25_09827:2:1201:1564:39627#49 141 * 0 0 * * 0 0 AATTCCTTGTTTGAAAACACTCCTAGCACCATGAGTCTCTGCTGCTTTATCTAAAGCATCTCTGGGGCTCATGTGTTTGTGAAGAGCTTGCCTAGCTTTT ;CACCDEBDEFGAGGGF5EGFFFEIDDIICGGF>H?GGG@DGAHD@HIDIFGG@IEEFHFGFEBGIHHDE5GHEIABFG>CB=FCF4GGHLBBJGFEHEG BC:Z:NGTCTATC +HS25_09827:2:1201:1565:91731#49 77 * 0 0 * * 0 0 GACATGCCATAACATTCATGTTTTATGTGTACAAGTCAATGAATTTTAGTATATTTACAGAGTTGTATGACTGTCTCCACAATCTAATTTTAGGTTTCCA CABFFGFFJFHEGEGJGGDG?FIGHHHBGHHHGIIGHGHGGHDGHFHIDFCIKEGIFHGGII9HFFGGGEEIGGEEHGGEEGDEHFH>FGGGGHAFAHGE BC:Z:NGTCTATC +HS25_09827:2:1201:1565:91731#49 141 * 0 0 * * 0 0 AGCACAGATTCATGCTAGCACATGGATGACCCTCAACAGCATTATCCCCAGCGATAGAAGCCAGACACAAAAGAACACATATGACAGGATTCCATTGATA ;CBABFEEFJFGEFGCCDIGIFFIHDCGIEGHGHHEHGIDBGHHGFEIJHHGG@GFCHGFDHKFFEGHDIEFHHFHEIFCHFGCFBHG@>IBF5*DBHHG BC:Z:NGTCTATC +HS25_09827:2:1201:1624:69925#49 77 * 0 0 * * 0 0 GCCAGCCTCCTTCTCAATGGTCTTTTTAAACATTATATGAAAACCAGACATTTACATTTGATTTCTTTTTCAATACTATACAGTTCTAAGAGAAAAAACA CABEFGFGIFGGGJGHGGFH?FDHGHDHGHEHHJCGHHFHDHDHFGHIGHIFFHGHFGGGI9GHF@IGGH;FICGEFEIHGGIEEFC:DEGGGBDJHHFF BC:Z:NGTCTATC +HS25_09827:2:1201:1624:69925#49 141 * 0 0 * * 0 0 TCCTTCCATTCCATTTTTGTGTTTGTTTTGTTCTTTTCTGTCACTGATCCGTATTACCACTTTTGGAAAAAAATAAATAAATAAATAAATAAAAGGCAGC ;CBCDDDHDHFGGEGGICEGFGJF@GEIFECIFHJCDAI>HDAH?@EIDICDGEIFEB@7DHFFE?CCD4EFIEBHFC?FFBFFG3HC@AGFDFGCBHEG BC:Z:NGTCTATC diff --git a/src/htslib-1.21/test/fastq/inter_casavaOX-q.sam b/src/htslib-1.21/test/fastq/inter_casavaOX-q.sam new file mode 100644 index 0000000..1273184 --- /dev/null +++ b/src/htslib-1.21/test/fastq/inter_casavaOX-q.sam @@ -0,0 +1,10 @@ +HS25_09827:2:1201:1505:59795#49 77 * 0 0 * * 0 0 CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG * OX:Z:NGTCTATC +HS25_09827:2:1201:1505:59795#49 141 * 0 0 * * 0 0 AAGGAAAGAAGGGAGGGAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAAGTAGGAAGAATTCATCTACCCAATT * OX:Z:NGTCTATC +HS25_09827:2:1201:1559:70726#49 77 * 0 0 * * 0 0 TTGTTAAAATGACCATACCCAAAGTGATCTACAGACTCAATACAATTTCTATTGAAATACCAATCACACTCTTCACAGAACTAGAAAAACAGTTCTAAAA * OX:Z:NGTCTATC +HS25_09827:2:1201:1559:70726#49 141 * 0 0 * * 0 0 TTTTCTTTTATTAATTTTATACTTACATTTAAGTCTTTATTCCATTTTGAGTCAATGTTTGTATATGATGAGAGATAGGGGTCTAGTTTCATACTTCTAC * OX:Z:NGTCTATC +HS25_09827:2:1201:1564:39627#49 77 * 0 0 * * 0 0 ACGCGGCAATCCAATGTGTGAGTTGAGAAGCGGTGAGGAGGGAATCCTAATTTTATGAGCAGGTCAGGACCGTGGGAGATACCTGACACCTGAGATGGTA * OX:Z:NGTCTATC +HS25_09827:2:1201:1564:39627#49 141 * 0 0 * * 0 0 AATTCCTTGTTTGAAAACACTCCTAGCACCATGAGTCTCTGCTGCTTTATCTAAAGCATCTCTGGGGCTCATGTGTTTGTGAAGAGCTTGCCTAGCTTTT * OX:Z:NGTCTATC +HS25_09827:2:1201:1565:91731#49 77 * 0 0 * * 0 0 GACATGCCATAACATTCATGTTTTATGTGTACAAGTCAATGAATTTTAGTATATTTACAGAGTTGTATGACTGTCTCCACAATCTAATTTTAGGTTTCCA * OX:Z:NGTCTATC +HS25_09827:2:1201:1565:91731#49 141 * 0 0 * * 0 0 AGCACAGATTCATGCTAGCACATGGATGACCCTCAACAGCATTATCCCCAGCGATAGAAGCCAGACACAAAAGAACACATATGACAGGATTCCATTGATA * OX:Z:NGTCTATC +HS25_09827:2:1201:1624:69925#49 77 * 0 0 * * 0 0 GCCAGCCTCCTTCTCAATGGTCTTTTTAAACATTATATGAAAACCAGACATTTACATTTGATTTCTTTTTCAATACTATACAGTTCTAAGAGAAAAAACA * OX:Z:NGTCTATC +HS25_09827:2:1201:1624:69925#49 141 * 0 0 * * 0 0 TCCTTCCATTCCATTTTTGTGTTTGTTTTGTTCTTTTCTGTCACTGATCCGTATTACCACTTTTGGAAAAAAATAAATAAATAAATAAATAAAAGGCAGC * OX:Z:NGTCTATC diff --git a/src/htslib-1.21/test/fastq/inter_casavaOX.sam b/src/htslib-1.21/test/fastq/inter_casavaOX.sam new file mode 100644 index 0000000..85fbbdf --- /dev/null +++ b/src/htslib-1.21/test/fastq/inter_casavaOX.sam @@ -0,0 +1,10 @@ +HS25_09827:2:1201:1505:59795#49 77 * 0 0 * * 0 0 CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG CABCFGDEEFFEFHGHGGFFGDIGIJFIFHHGHEIFGHBCGHDIFBE9GIAICGGICFIBFGGHGDGGGHE?GIGDFGGHEGIEJG>;FGDCHFEFBHAGCGACF7CJI8HBIIEFGFEBG?DCGA?ACFGGI=BEDG?EFEHFFFEHFD?HG+DFH>FFHGFBFE4F@I3HF@>A5F?GFHEFCFHGGGHEJEAJE OX:Z:NGTCTATC +HS25_09827:2:1201:1559:70726#49 141 * 0 0 * * 0 0 TTTTCTTTTATTAATTTTATACTTACATTTAAGTCTTTATTCCATTTTGAGTCAATGTTTGTATATGATGAGAGATAGGGGTCTAGTTTCATACTTCTAC ;CBCEFDHDGFGHDGDIGEF@EJIIGEEIECGFHGFHGGGHHHHGGKIFFEHGEGHFIEFFHHGDHHGJEGF?FBHFFGCHHFFII>GCFCFFGGCEBF? OX:Z:NGTCTATC +HS25_09827:2:1201:1564:39627#49 77 * 0 0 * * 0 0 ACGCGGCAATCCAATGTGTGAGTTGAGAAGCGGTGAGGAGGGAATCCTAATTTTATGAGCAGGTCAGGACCGTGGGAGATACCTGACACCTGAGATGGTA BACCFGBFGFHGGJGHGGFEGHIGIJHFEH:HHEHGHHBGGH9IAGHGFHIFJFFAFGIFDIGHKEIGF,CGD66?7EFI5EEG>EGGGGD5=HH6E OX:Z:NGTCTATC +HS25_09827:2:1201:1564:39627#49 141 * 0 0 * * 0 0 AATTCCTTGTTTGAAAACACTCCTAGCACCATGAGTCTCTGCTGCTTTATCTAAAGCATCTCTGGGGCTCATGTGTTTGTGAAGAGCTTGCCTAGCTTTT ;CACCDEBDEFGAGGGF5EGFFFEIDDIICGGF>H?GGG@DGAHD@HIDIFGG@IEEFHFGFEBGIHHDE5GHEIABFG>CB=FCF4GGHLBBJGFEHEG OX:Z:NGTCTATC +HS25_09827:2:1201:1565:91731#49 77 * 0 0 * * 0 0 GACATGCCATAACATTCATGTTTTATGTGTACAAGTCAATGAATTTTAGTATATTTACAGAGTTGTATGACTGTCTCCACAATCTAATTTTAGGTTTCCA CABFFGFFJFHEGEGJGGDG?FIGHHHBGHHHGIIGHGHGGHDGHFHIDFCIKEGIFHGGII9HFFGGGEEIGGEEHGGEEGDEHFH>FGGGGHAFAHGE OX:Z:NGTCTATC +HS25_09827:2:1201:1565:91731#49 141 * 0 0 * * 0 0 AGCACAGATTCATGCTAGCACATGGATGACCCTCAACAGCATTATCCCCAGCGATAGAAGCCAGACACAAAAGAACACATATGACAGGATTCCATTGATA ;CBABFEEFJFGEFGCCDIGIFFIHDCGIEGHGHHEHGIDBGHHGFEIJHHGG@GFCHGFDHKFFEGHDIEFHHFHEIFCHFGCFBHG@>IBF5*DBHHG OX:Z:NGTCTATC +HS25_09827:2:1201:1624:69925#49 77 * 0 0 * * 0 0 GCCAGCCTCCTTCTCAATGGTCTTTTTAAACATTATATGAAAACCAGACATTTACATTTGATTTCTTTTTCAATACTATACAGTTCTAAGAGAAAAAACA CABEFGFGIFGGGJGHGGFH?FDHGHDHGHEHHJCGHHFHDHDHFGHIGHIFFHGHFGGGI9GHF@IGGH;FICGEFEIHGGIEEFC:DEGGGBDJHHFF OX:Z:NGTCTATC +HS25_09827:2:1201:1624:69925#49 141 * 0 0 * * 0 0 TCCTTCCATTCCATTTTTGTGTTTGTTTTGTTCTTTTCTGTCACTGATCCGTATTACCACTTTTGGAAAAAAATAAATAAATAAATAAATAAAAGGCAGC ;CBCDDDHDHFGGEGGICEGFGJF@GEIFECIFHJCDAI>HDAH?@EIDICDGEIFEB@7DHFFE?CCD4EFIEBHFC?FFBFFG3HC@AGFDFGCBHEG OX:Z:NGTCTATC diff --git a/src/htslib-1.21/test/fastq/inter_noaux-q.sam b/src/htslib-1.21/test/fastq/inter_noaux-q.sam new file mode 100644 index 0000000..107f31a --- /dev/null +++ b/src/htslib-1.21/test/fastq/inter_noaux-q.sam @@ -0,0 +1,10 @@ +HS25_09827:2:1201:1505:59795#49 77 * 0 0 * * 0 0 CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG * +HS25_09827:2:1201:1505:59795#49 141 * 0 0 * * 0 0 AAGGAAAGAAGGGAGGGAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAAGTAGGAAGAATTCATCTACCCAATT * +HS25_09827:2:1201:1559:70726#49 77 * 0 0 * * 0 0 TTGTTAAAATGACCATACCCAAAGTGATCTACAGACTCAATACAATTTCTATTGAAATACCAATCACACTCTTCACAGAACTAGAAAAACAGTTCTAAAA * +HS25_09827:2:1201:1559:70726#49 141 * 0 0 * * 0 0 TTTTCTTTTATTAATTTTATACTTACATTTAAGTCTTTATTCCATTTTGAGTCAATGTTTGTATATGATGAGAGATAGGGGTCTAGTTTCATACTTCTAC * +HS25_09827:2:1201:1564:39627#49 77 * 0 0 * * 0 0 ACGCGGCAATCCAATGTGTGAGTTGAGAAGCGGTGAGGAGGGAATCCTAATTTTATGAGCAGGTCAGGACCGTGGGAGATACCTGACACCTGAGATGGTA * +HS25_09827:2:1201:1564:39627#49 141 * 0 0 * * 0 0 AATTCCTTGTTTGAAAACACTCCTAGCACCATGAGTCTCTGCTGCTTTATCTAAAGCATCTCTGGGGCTCATGTGTTTGTGAAGAGCTTGCCTAGCTTTT * +HS25_09827:2:1201:1565:91731#49 77 * 0 0 * * 0 0 GACATGCCATAACATTCATGTTTTATGTGTACAAGTCAATGAATTTTAGTATATTTACAGAGTTGTATGACTGTCTCCACAATCTAATTTTAGGTTTCCA * +HS25_09827:2:1201:1565:91731#49 141 * 0 0 * * 0 0 AGCACAGATTCATGCTAGCACATGGATGACCCTCAACAGCATTATCCCCAGCGATAGAAGCCAGACACAAAAGAACACATATGACAGGATTCCATTGATA * +HS25_09827:2:1201:1624:69925#49 77 * 0 0 * * 0 0 GCCAGCCTCCTTCTCAATGGTCTTTTTAAACATTATATGAAAACCAGACATTTACATTTGATTTCTTTTTCAATACTATACAGTTCTAAGAGAAAAAACA * +HS25_09827:2:1201:1624:69925#49 141 * 0 0 * * 0 0 TCCTTCCATTCCATTTTTGTGTTTGTTTTGTTCTTTTCTGTCACTGATCCGTATTACCACTTTTGGAAAAAAATAAATAAATAAATAAATAAAAGGCAGC * diff --git a/src/htslib-1.21/test/fastq/inter_noaux.sam b/src/htslib-1.21/test/fastq/inter_noaux.sam new file mode 100644 index 0000000..bc7edff --- /dev/null +++ b/src/htslib-1.21/test/fastq/inter_noaux.sam @@ -0,0 +1,10 @@ +HS25_09827:2:1201:1505:59795#49 77 * 0 0 * * 0 0 CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG CABCFGDEEFFEFHGHGGFFGDIGIJFIFHHGHEIFGHBCGHDIFBE9GIAICGGICFIBFGGHGDGGGHE?GIGDFGGHEGIEJG>;FGDCHFEFBHAGCGACF7CJI8HBIIEFGFEBG?DCGA?ACFGGI=BEDG?EFEHFFFEHFD?HG+DFH>FFHGFBFE4F@I3HF@>A5F?GFHEFCFHGGGHEJEAJE +HS25_09827:2:1201:1559:70726#49 141 * 0 0 * * 0 0 TTTTCTTTTATTAATTTTATACTTACATTTAAGTCTTTATTCCATTTTGAGTCAATGTTTGTATATGATGAGAGATAGGGGTCTAGTTTCATACTTCTAC ;CBCEFDHDGFGHDGDIGEF@EJIIGEEIECGFHGFHGGGHHHHGGKIFFEHGEGHFIEFFHHGDHHGJEGF?FBHFFGCHHFFII>GCFCFFGGCEBF? +HS25_09827:2:1201:1564:39627#49 77 * 0 0 * * 0 0 ACGCGGCAATCCAATGTGTGAGTTGAGAAGCGGTGAGGAGGGAATCCTAATTTTATGAGCAGGTCAGGACCGTGGGAGATACCTGACACCTGAGATGGTA BACCFGBFGFHGGJGHGGFEGHIGIJHFEH:HHEHGHHBGGH9IAGHGFHIFJFFAFGIFDIGHKEIGF,CGD66?7EFI5EEG>EGGGGD5=HH6E +HS25_09827:2:1201:1564:39627#49 141 * 0 0 * * 0 0 AATTCCTTGTTTGAAAACACTCCTAGCACCATGAGTCTCTGCTGCTTTATCTAAAGCATCTCTGGGGCTCATGTGTTTGTGAAGAGCTTGCCTAGCTTTT ;CACCDEBDEFGAGGGF5EGFFFEIDDIICGGF>H?GGG@DGAHD@HIDIFGG@IEEFHFGFEBGIHHDE5GHEIABFG>CB=FCF4GGHLBBJGFEHEG +HS25_09827:2:1201:1565:91731#49 77 * 0 0 * * 0 0 GACATGCCATAACATTCATGTTTTATGTGTACAAGTCAATGAATTTTAGTATATTTACAGAGTTGTATGACTGTCTCCACAATCTAATTTTAGGTTTCCA CABFFGFFJFHEGEGJGGDG?FIGHHHBGHHHGIIGHGHGGHDGHFHIDFCIKEGIFHGGII9HFFGGGEEIGGEEHGGEEGDEHFH>FGGGGHAFAHGE +HS25_09827:2:1201:1565:91731#49 141 * 0 0 * * 0 0 AGCACAGATTCATGCTAGCACATGGATGACCCTCAACAGCATTATCCCCAGCGATAGAAGCCAGACACAAAAGAACACATATGACAGGATTCCATTGATA ;CBABFEEFJFGEFGCCDIGIFFIHDCGIEGHGHHEHGIDBGHHGFEIJHHGG@GFCHGFDHKFFEGHDIEFHHFHEIFCHFGCFBHG@>IBF5*DBHHG +HS25_09827:2:1201:1624:69925#49 77 * 0 0 * * 0 0 GCCAGCCTCCTTCTCAATGGTCTTTTTAAACATTATATGAAAACCAGACATTTACATTTGATTTCTTTTTCAATACTATACAGTTCTAAGAGAAAAAACA CABEFGFGIFGGGJGHGGFH?FDHGHDHGHEHHJCGHHFHDHDHFGHIGHIFFHGHFGGGI9GHF@IGGH;FICGEFEIHGGIEEFC:DEGGGBDJHHFF +HS25_09827:2:1201:1624:69925#49 141 * 0 0 * * 0 0 TCCTTCCATTCCATTTTTGTGTTTGTTTTGTTCTTTTCTGTCACTGATCCGTATTACCACTTTTGGAAAAAAATAAATAAATAAATAAATAAAAGGCAGC ;CBCDDDHDHFGGEGGICEGFGJF@GEIFECIFHJCDAI>HDAH?@EIDICDGEIFEB@7DHFFE?CCD4EFIEBHFC?FFBFFG3HC@AGFDFGCBHEG diff --git a/src/htslib-1.21/test/fastq/interleaved.fa b/src/htslib-1.21/test/fastq/interleaved.fa new file mode 100644 index 0000000..bd5ca44 --- /dev/null +++ b/src/htslib-1.21/test/fastq/interleaved.fa @@ -0,0 +1,20 @@ +>HS25_09827:2:1201:1505:59795#49/1 RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1=BDDDF +CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG +>HS25_09827:2:1201:1505:59795#49/2 RG:Z:1#49 +AAGGAAAGAAGGGAGGGAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAAGTAGGAAGAATTCATCTACCCAATT +>HS25_09827:2:1201:1559:70726#49/1 RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1:DBDFD +TTGTTAAAATGACCATACCCAAAGTGATCTACAGACTCAATACAATTTCTATTGAAATACCAATCACACTCTTCACAGAACTAGAAAAACAGTTCTAAAA +>HS25_09827:2:1201:1559:70726#49/2 RG:Z:1#49 +TTTTCTTTTATTAATTTTATACTTACATTTAAGTCTTTATTCCATTTTGAGTCAATGTTTGTATATGATGAGAGATAGGGGTCTAGTTTCATACTTCTAC +>HS25_09827:2:1201:1564:39627#49/1 RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1:DDDFE +ACGCGGCAATCCAATGTGTGAGTTGAGAAGCGGTGAGGAGGGAATCCTAATTTTATGAGCAGGTCAGGACCGTGGGAGATACCTGACACCTGAGATGGTA +>HS25_09827:2:1201:1564:39627#49/2 RG:Z:1#49 +AATTCCTTGTTTGAAAACACTCCTAGCACCATGAGTCTCTGCTGCTTTATCTAAAGCATCTCTGGGGCTCATGTGTTTGTGAAGAGCTTGCCTAGCTTTT +>HS25_09827:2:1201:1565:91731#49/1 RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1=DDFFD +GACATGCCATAACATTCATGTTTTATGTGTACAAGTCAATGAATTTTAGTATATTTACAGAGTTGTATGACTGTCTCCACAATCTAATTTTAGGTTTCCA +>HS25_09827:2:1201:1565:91731#49/2 RG:Z:1#49 +AGCACAGATTCATGCTAGCACATGGATGACCCTCAACAGCATTATCCCCAGCGATAGAAGCCAGACACAAAAGAACACATATGACAGGATTCCATTGATA +>HS25_09827:2:1201:1624:69925#49/1 RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1=DD?DB +GCCAGCCTCCTTCTCAATGGTCTTTTTAAACATTATATGAAAACCAGACATTTACATTTGATTTCTTTTTCAATACTATACAGTTCTAAGAGAAAAAACA +>HS25_09827:2:1201:1624:69925#49/2 RG:Z:1#49 +TCCTTCCATTCCATTTTTGTGTTTGTTTTGTTCTTTTCTGTCACTGATCCGTATTACCACTTTTGGAAAAAAATAAATAAATAAATAAATAAAAGGCAGC diff --git a/src/htslib-1.21/test/fastq/interleaved.fq b/src/htslib-1.21/test/fastq/interleaved.fq new file mode 100644 index 0000000..5aa88e1 --- /dev/null +++ b/src/htslib-1.21/test/fastq/interleaved.fq @@ -0,0 +1,40 @@ +@HS25_09827:2:1201:1505:59795#49/1 RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1=BDDDF +CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG ++ +CABCFGDEEFFEFHGHGGFFGDIGIJFIFHHGHEIFGHBCGHDIFBE9GIAICGGICFIBFGGHGDGGGHE?GIGDFGGHEGIEJG>;FGDCHFEFBHAGCGACF7CJI8HBIIEFGFEBG?DCGA?ACFGGI=BEDG?EFEHFFFEHFD?HG+DFH>FFHGFBFE4F@I3HF@>A5F?GFHEFCFHGGGHEJEAJE +@HS25_09827:2:1201:1559:70726#49/2 RG:Z:1#49 +TTTTCTTTTATTAATTTTATACTTACATTTAAGTCTTTATTCCATTTTGAGTCAATGTTTGTATATGATGAGAGATAGGGGTCTAGTTTCATACTTCTAC ++ +;CBCEFDHDGFGHDGDIGEF@EJIIGEEIECGFHGFHGGGHHHHGGKIFFEHGEGHFIEFFHHGDHHGJEGF?FBHFFGCHHFFII>GCFCFFGGCEBF? +@HS25_09827:2:1201:1564:39627#49/1 RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1:DDDFE +ACGCGGCAATCCAATGTGTGAGTTGAGAAGCGGTGAGGAGGGAATCCTAATTTTATGAGCAGGTCAGGACCGTGGGAGATACCTGACACCTGAGATGGTA ++ +BACCFGBFGFHGGJGHGGFEGHIGIJHFEH:HHEHGHHBGGH9IAGHGFHIFJFFAFGIFDIGHKEIGF,CGD66?7EFI5EEG>EGGGGD5=HH6E +@HS25_09827:2:1201:1564:39627#49/2 RG:Z:1#49 +AATTCCTTGTTTGAAAACACTCCTAGCACCATGAGTCTCTGCTGCTTTATCTAAAGCATCTCTGGGGCTCATGTGTTTGTGAAGAGCTTGCCTAGCTTTT ++ +;CACCDEBDEFGAGGGF5EGFFFEIDDIICGGF>H?GGG@DGAHD@HIDIFGG@IEEFHFGFEBGIHHDE5GHEIABFG>CB=FCF4GGHLBBJGFEHEG +@HS25_09827:2:1201:1565:91731#49/1 RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1=DDFFD +GACATGCCATAACATTCATGTTTTATGTGTACAAGTCAATGAATTTTAGTATATTTACAGAGTTGTATGACTGTCTCCACAATCTAATTTTAGGTTTCCA ++ +CABFFGFFJFHEGEGJGGDG?FIGHHHBGHHHGIIGHGHGGHDGHFHIDFCIKEGIFHGGII9HFFGGGEEIGGEEHGGEEGDEHFH>FGGGGHAFAHGE +@HS25_09827:2:1201:1565:91731#49/2 RG:Z:1#49 +AGCACAGATTCATGCTAGCACATGGATGACCCTCAACAGCATTATCCCCAGCGATAGAAGCCAGACACAAAAGAACACATATGACAGGATTCCATTGATA ++ +;CBABFEEFJFGEFGCCDIGIFFIHDCGIEGHGHHEHGIDBGHHGFEIJHHGG@GFCHGFDHKFFEGHDIEFHHFHEIFCHFGCFBHG@>IBF5*DBHHG +@HS25_09827:2:1201:1624:69925#49/1 RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1=DD?DB +GCCAGCCTCCTTCTCAATGGTCTTTTTAAACATTATATGAAAACCAGACATTTACATTTGATTTCTTTTTCAATACTATACAGTTCTAAGAGAAAAAACA ++ +CABEFGFGIFGGGJGHGGFH?FDHGHDHGHEHHJCGHHFHDHDHFGHIGHIFFHGHFGGGI9GHF@IGGH;FICGEFEIHGGIEEFC:DEGGGBDJHHFF +@HS25_09827:2:1201:1624:69925#49/2 RG:Z:1#49 +TCCTTCCATTCCATTTTTGTGTTTGTTTTGTTCTTTTCTGTCACTGATCCGTATTACCACTTTTGGAAAAAAATAAATAAATAAATAAATAAAAGGCAGC ++ +;CBCDDDHDHFGGEGGICEGFGJF@GEIFECIFHJCDAI>HDAH?@EIDICDGEIFEB@7DHFFE?CCD4EFIEBHFC?FFBFFG3HC@AGFDFGCBHEG diff --git a/src/htslib-1.21/test/fastq/interleaved_casava.fa b/src/htslib-1.21/test/fastq/interleaved_casava.fa new file mode 100644 index 0000000..8ea7447 --- /dev/null +++ b/src/htslib-1.21/test/fastq/interleaved_casava.fa @@ -0,0 +1,20 @@ +>HS25_09827:2:1201:1505:59795#49 1:N:0:NGTCTATC +CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG +>HS25_09827:2:1201:1505:59795#49 2:N:0:NGTCTATC +AAGGAAAGAAGGGAGGGAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAAGTAGGAAGAATTCATCTACCCAATT +>HS25_09827:2:1201:1559:70726#49 1:N:0:NGTCTATC +TTGTTAAAATGACCATACCCAAAGTGATCTACAGACTCAATACAATTTCTATTGAAATACCAATCACACTCTTCACAGAACTAGAAAAACAGTTCTAAAA +>HS25_09827:2:1201:1559:70726#49 2:N:0:NGTCTATC +TTTTCTTTTATTAATTTTATACTTACATTTAAGTCTTTATTCCATTTTGAGTCAATGTTTGTATATGATGAGAGATAGGGGTCTAGTTTCATACTTCTAC +>HS25_09827:2:1201:1564:39627#49 1:N:0:NGTCTATC +ACGCGGCAATCCAATGTGTGAGTTGAGAAGCGGTGAGGAGGGAATCCTAATTTTATGAGCAGGTCAGGACCGTGGGAGATACCTGACACCTGAGATGGTA +>HS25_09827:2:1201:1564:39627#49 2:N:0:NGTCTATC +AATTCCTTGTTTGAAAACACTCCTAGCACCATGAGTCTCTGCTGCTTTATCTAAAGCATCTCTGGGGCTCATGTGTTTGTGAAGAGCTTGCCTAGCTTTT +>HS25_09827:2:1201:1565:91731#49 1:N:0:NGTCTATC +GACATGCCATAACATTCATGTTTTATGTGTACAAGTCAATGAATTTTAGTATATTTACAGAGTTGTATGACTGTCTCCACAATCTAATTTTAGGTTTCCA +>HS25_09827:2:1201:1565:91731#49 2:N:0:NGTCTATC +AGCACAGATTCATGCTAGCACATGGATGACCCTCAACAGCATTATCCCCAGCGATAGAAGCCAGACACAAAAGAACACATATGACAGGATTCCATTGATA +>HS25_09827:2:1201:1624:69925#49 1:N:0:NGTCTATC +GCCAGCCTCCTTCTCAATGGTCTTTTTAAACATTATATGAAAACCAGACATTTACATTTGATTTCTTTTTCAATACTATACAGTTCTAAGAGAAAAAACA +>HS25_09827:2:1201:1624:69925#49 2:N:0:NGTCTATC +TCCTTCCATTCCATTTTTGTGTTTGTTTTGTTCTTTTCTGTCACTGATCCGTATTACCACTTTTGGAAAAAAATAAATAAATAAATAAATAAAAGGCAGC diff --git a/src/htslib-1.21/test/fastq/interleaved_casava.fq b/src/htslib-1.21/test/fastq/interleaved_casava.fq new file mode 100644 index 0000000..39a1945 --- /dev/null +++ b/src/htslib-1.21/test/fastq/interleaved_casava.fq @@ -0,0 +1,40 @@ +@HS25_09827:2:1201:1505:59795#49 1:N:0:NGTCTATC +CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG ++ +CABCFGDEEFFEFHGHGGFFGDIGIJFIFHHGHEIFGHBCGHDIFBE9GIAICGGICFIBFGGHGDGGGHE?GIGDFGGHEGIEJG>;FGDCHFEFBHAGCGACF7CJI8HBIIEFGFEBG?DCGA?ACFGGI=BEDG?EFEHFFFEHFD?HG+DFH>FFHGFBFE4F@I3HF@>A5F?GFHEFCFHGGGHEJEAJE +@HS25_09827:2:1201:1559:70726#49 2:N:0:NGTCTATC +TTTTCTTTTATTAATTTTATACTTACATTTAAGTCTTTATTCCATTTTGAGTCAATGTTTGTATATGATGAGAGATAGGGGTCTAGTTTCATACTTCTAC ++ +;CBCEFDHDGFGHDGDIGEF@EJIIGEEIECGFHGFHGGGHHHHGGKIFFEHGEGHFIEFFHHGDHHGJEGF?FBHFFGCHHFFII>GCFCFFGGCEBF? +@HS25_09827:2:1201:1564:39627#49 1:N:0:NGTCTATC +ACGCGGCAATCCAATGTGTGAGTTGAGAAGCGGTGAGGAGGGAATCCTAATTTTATGAGCAGGTCAGGACCGTGGGAGATACCTGACACCTGAGATGGTA ++ +BACCFGBFGFHGGJGHGGFEGHIGIJHFEH:HHEHGHHBGGH9IAGHGFHIFJFFAFGIFDIGHKEIGF,CGD66?7EFI5EEG>EGGGGD5=HH6E +@HS25_09827:2:1201:1564:39627#49 2:N:0:NGTCTATC +AATTCCTTGTTTGAAAACACTCCTAGCACCATGAGTCTCTGCTGCTTTATCTAAAGCATCTCTGGGGCTCATGTGTTTGTGAAGAGCTTGCCTAGCTTTT ++ +;CACCDEBDEFGAGGGF5EGFFFEIDDIICGGF>H?GGG@DGAHD@HIDIFGG@IEEFHFGFEBGIHHDE5GHEIABFG>CB=FCF4GGHLBBJGFEHEG +@HS25_09827:2:1201:1565:91731#49 1:N:0:NGTCTATC +GACATGCCATAACATTCATGTTTTATGTGTACAAGTCAATGAATTTTAGTATATTTACAGAGTTGTATGACTGTCTCCACAATCTAATTTTAGGTTTCCA ++ +CABFFGFFJFHEGEGJGGDG?FIGHHHBGHHHGIIGHGHGGHDGHFHIDFCIKEGIFHGGII9HFFGGGEEIGGEEHGGEEGDEHFH>FGGGGHAFAHGE +@HS25_09827:2:1201:1565:91731#49 2:N:0:NGTCTATC +AGCACAGATTCATGCTAGCACATGGATGACCCTCAACAGCATTATCCCCAGCGATAGAAGCCAGACACAAAAGAACACATATGACAGGATTCCATTGATA ++ +;CBABFEEFJFGEFGCCDIGIFFIHDCGIEGHGHHEHGIDBGHHGFEIJHHGG@GFCHGFDHKFFEGHDIEFHHFHEIFCHFGCFBHG@>IBF5*DBHHG +@HS25_09827:2:1201:1624:69925#49 1:N:0:NGTCTATC +GCCAGCCTCCTTCTCAATGGTCTTTTTAAACATTATATGAAAACCAGACATTTACATTTGATTTCTTTTTCAATACTATACAGTTCTAAGAGAAAAAACA ++ +CABEFGFGIFGGGJGHGGFH?FDHGHDHGHEHHJCGHHFHDHDHFGHIGHIFFHGHFGGGI9GHF@IGGH;FICGEFEIHGGIEEFC:DEGGGBDJHHFF +@HS25_09827:2:1201:1624:69925#49 2:N:0:NGTCTATC +TCCTTCCATTCCATTTTTGTGTTTGTTTTGTTCTTTTCTGTCACTGATCCGTATTACCACTTTTGGAAAAAAATAAATAAATAAATAAATAAAAGGCAGC ++ +;CBCDDDHDHFGGEGGICEGFGJF@GEIFECIFHJCDAI>HDAH?@EIDICDGEIFEB@7DHFFE?CCD4EFIEBHFC?FFBFFG3HC@AGFDFGCBHEG diff --git a/src/htslib-1.21/test/fastq/longline.fq b/src/htslib-1.21/test/fastq/longline.fq new file mode 100644 index 0000000..09cabd1 --- /dev/null +++ b/src/htslib-1.21/test/fastq/longline.fq @@ -0,0 +1,4 @@ +@readname XX:Z:baaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab +ATGC ++ +qqqq diff --git a/src/htslib-1.21/test/fastq/longline.sam b/src/htslib-1.21/test/fastq/longline.sam new file mode 100644 index 0000000..4dc5e82 --- /dev/null +++ b/src/htslib-1.21/test/fastq/longline.sam @@ -0,0 +1 @@ +readname 4 * 0 0 * * 0 0 ATGC qqqq XX:Z:baaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab diff --git a/src/htslib-1.21/test/fastq/minimal-q.sam b/src/htslib-1.21/test/fastq/minimal-q.sam new file mode 100644 index 0000000..d2daaf4 --- /dev/null +++ b/src/htslib-1.21/test/fastq/minimal-q.sam @@ -0,0 +1 @@ +x 4 * 0 0 * * 0 0 A * diff --git a/src/htslib-1.21/test/fastq/minimal.fa b/src/htslib-1.21/test/fastq/minimal.fa new file mode 100644 index 0000000..4f65d8c --- /dev/null +++ b/src/htslib-1.21/test/fastq/minimal.fa @@ -0,0 +1,2 @@ +>x +A diff --git a/src/htslib-1.21/test/fastq/minimal.fq b/src/htslib-1.21/test/fastq/minimal.fq new file mode 100644 index 0000000..6f13b94 --- /dev/null +++ b/src/htslib-1.21/test/fastq/minimal.fq @@ -0,0 +1,4 @@ +@x +A ++ ++ diff --git a/src/htslib-1.21/test/fastq/minimal.sam b/src/htslib-1.21/test/fastq/minimal.sam new file mode 100644 index 0000000..e12f4a6 --- /dev/null +++ b/src/htslib-1.21/test/fastq/minimal.sam @@ -0,0 +1 @@ +x 4 * 0 0 * * 0 0 A + diff --git a/src/htslib-1.21/test/fastq/multiline-q.sam b/src/htslib-1.21/test/fastq/multiline-q.sam new file mode 100644 index 0000000..d31407b --- /dev/null +++ b/src/htslib-1.21/test/fastq/multiline-q.sam @@ -0,0 +1,2 @@ +seq1 4 * 0 0 * * 0 0 NAAAAAAAAACCCCCCCCCCGGGGGGGGGGTTTTTTTTN * +seq2 4 * 0 0 * * 0 0 RAAAAAAAAACCCCCCCCCCGGGGGGGGGGTTTTTTTTY * diff --git a/src/htslib-1.21/test/fastq/multiline.fa b/src/htslib-1.21/test/fastq/multiline.fa new file mode 100644 index 0000000..2917e06 --- /dev/null +++ b/src/htslib-1.21/test/fastq/multiline.fa @@ -0,0 +1,10 @@ +>seq1 +NAAAAAAAAA +CCCCCCCCCC +GGGGGGGGGG +TTTTTTTTN +>seq2 +RAAAAAAAAA +CCCCCCCCCC +GGGGGGGGGG +TTTTTTTTY diff --git a/src/htslib-1.21/test/fastq/multiline.fq b/src/htslib-1.21/test/fastq/multiline.fq new file mode 100644 index 0000000..0c94aec --- /dev/null +++ b/src/htslib-1.21/test/fastq/multiline.fq @@ -0,0 +1,20 @@ +@seq1 +NAAAAAAAAA +CCCCCCCCCC +GGGGGGGGGG +TTTTTTTTN ++ +++++++++++ +@@@@@@@@@@ +########## +........> +@seq2 +RAAAAAAAAA +CCCCCCCCCC +GGGGGGGGGG +TTTTTTTTY ++ +<<<<<<<<<< +!!!!!!!!!! +########## +@@@@@@@@@ diff --git a/src/htslib-1.21/test/fastq/multiline.sam b/src/htslib-1.21/test/fastq/multiline.sam new file mode 100644 index 0000000..70442ef --- /dev/null +++ b/src/htslib-1.21/test/fastq/multiline.sam @@ -0,0 +1,2 @@ +seq1 4 * 0 0 * * 0 0 NAAAAAAAAACCCCCCCCCCGGGGGGGGGGTTTTTTTTN ++++++++++@@@@@@@@@@##########........> +seq2 4 * 0 0 * * 0 0 RAAAAAAAAACCCCCCCCCCGGGGGGGGGGTTTTTTTTY <<<<<<<<<ignore_001 name_001 +CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG +>ignore_002 name_002 +TTGTTAAAATGACCATACCCAAAGTGATCTACAGACTCAATACAATTTCTATTGAAATACCAATCACACTCTTCACAGAACTAGAAAAACAGTTCTAAAA +>ignore_003 name_003 +ACGCGGCAATCCAATGTGTGAGTTGAGAAGCGGTGAGGAGGGAATCCTAATTTTATGAGCAGGTCAGGACCGTGGGAGATACCTGACACCTGAGATGGTA +>name_004 +GACATGCCATAACATTCATGTTTTATGTGTACAAGTCAATGAATTTTAGTATATTTACAGAGTTGTATGACTGTCTCCACAATCTAATTTTAGGTTTCCA diff --git a/src/htslib-1.21/test/fastq/name2.fq b/src/htslib-1.21/test/fastq/name2.fq new file mode 100644 index 0000000..6ac19c9 --- /dev/null +++ b/src/htslib-1.21/test/fastq/name2.fq @@ -0,0 +1,16 @@ +@ignore_001 name_001 +CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG ++ +CABCFGDEEFFEFHGHGGFFGDIGIJFIFHHGHEIFGHBCGHDIFBE9GIAICGGICFIBFGGHGDGGGHE?GIGDFGGHEGIEJG>;FGEFCFHGGGHEJEAJE +@ignore_003 name_003 +ACGCGGCAATCCAATGTGTGAGTTGAGAAGCGGTGAGGAGGGAATCCTAATTTTATGAGCAGGTCAGGACCGTGGGAGATACCTGACACCTGAGATGGTA ++ +BACCFGBFGFHGGJGHGGFEGHIGIJHFEH:HHEHGHHBGGH9IAGHGFHIFJFFAFGIFDIGHKEIGF,CGD66?7EFI5EEG>EGGGGD5=HH6E +@name_004 +GACATGCCATAACATTCATGTTTTATGTGTACAAGTCAATGAATTTTAGTATATTTACAGAGTTGTATGACTGTCTCCACAATCTAATTTTAGGTTTCCA ++ +CABFFGFFJFHEGEGJGGDG?FIGHHHBGHHHGIIGHGHGGHDGHFHIDFCIKEGIFHGGII9HFFGGGEEIGGEEHGGEEGDEHFH>FGGGGHAFAHGE diff --git a/src/htslib-1.21/test/fastq/name2.sam b/src/htslib-1.21/test/fastq/name2.sam new file mode 100644 index 0000000..a79a058 --- /dev/null +++ b/src/htslib-1.21/test/fastq/name2.sam @@ -0,0 +1,4 @@ +name_001 4 * 0 0 * * 0 0 CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG CABCFGDEEFFEFHGHGGFFGDIGIJFIFHHGHEIFGHBCGHDIFBE9GIAICGGICFIBFGGHGDGGGHE?GIGDFGGHEGIEJG>;FGEFCFHGGGHEJEAJE +name_003 4 * 0 0 * * 0 0 ACGCGGCAATCCAATGTGTGAGTTGAGAAGCGGTGAGGAGGGAATCCTAATTTTATGAGCAGGTCAGGACCGTGGGAGATACCTGACACCTGAGATGGTA BACCFGBFGFHGGJGHGGFEGHIGIJHFEH:HHEHGHHBGGH9IAGHGFHIFJFFAFGIFDIGHKEIGF,CGD66?7EFI5EEG>EGGGGD5=HH6E +name_004 4 * 0 0 * * 0 0 GACATGCCATAACATTCATGTTTTATGTGTACAAGTCAATGAATTTTAGTATATTTACAGAGTTGTATGACTGTCTCCACAATCTAATTTTAGGTTTCCA CABFFGFFJFHEGEGJGGDG?FIGHHHBGHHHGIIGHGHGGHDGHFHIDFCIKEGIFHGGII9HFFGGGEEIGGEEHGGEEGDEHFH>FGGGGHAFAHGE diff --git a/src/htslib-1.21/test/fastq/r1-q.sam b/src/htslib-1.21/test/fastq/r1-q.sam new file mode 100644 index 0000000..c87f07f --- /dev/null +++ b/src/htslib-1.21/test/fastq/r1-q.sam @@ -0,0 +1,5 @@ +HS25_09827:2:1201:1505:59795#49 77 * 0 0 * * 0 0 CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG * RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1=BDDDF +HS25_09827:2:1201:1559:70726#49 77 * 0 0 * * 0 0 TTGTTAAAATGACCATACCCAAAGTGATCTACAGACTCAATACAATTTCTATTGAAATACCAATCACACTCTTCACAGAACTAGAAAAACAGTTCTAAAA * RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1:DBDFD +HS25_09827:2:1201:1564:39627#49 77 * 0 0 * * 0 0 ACGCGGCAATCCAATGTGTGAGTTGAGAAGCGGTGAGGAGGGAATCCTAATTTTATGAGCAGGTCAGGACCGTGGGAGATACCTGACACCTGAGATGGTA * RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1:DDDFE +HS25_09827:2:1201:1565:91731#49 77 * 0 0 * * 0 0 GACATGCCATAACATTCATGTTTTATGTGTACAAGTCAATGAATTTTAGTATATTTACAGAGTTGTATGACTGTCTCCACAATCTAATTTTAGGTTTCCA * RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1=DDFFD +HS25_09827:2:1201:1624:69925#49 77 * 0 0 * * 0 0 GCCAGCCTCCTTCTCAATGGTCTTTTTAAACATTATATGAAAACCAGACATTTACATTTGATTTCTTTTTCAATACTATACAGTTCTAAGAGAAAAAACA * RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1=DD?DB diff --git a/src/htslib-1.21/test/fastq/r1.fa b/src/htslib-1.21/test/fastq/r1.fa new file mode 100644 index 0000000..4b7af5c --- /dev/null +++ b/src/htslib-1.21/test/fastq/r1.fa @@ -0,0 +1,10 @@ +>HS25_09827:2:1201:1505:59795#49/1 RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1=BDDDF +CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG +>HS25_09827:2:1201:1559:70726#49/1 RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1:DBDFD +TTGTTAAAATGACCATACCCAAAGTGATCTACAGACTCAATACAATTTCTATTGAAATACCAATCACACTCTTCACAGAACTAGAAAAACAGTTCTAAAA +>HS25_09827:2:1201:1564:39627#49/1 RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1:DDDFE +ACGCGGCAATCCAATGTGTGAGTTGAGAAGCGGTGAGGAGGGAATCCTAATTTTATGAGCAGGTCAGGACCGTGGGAGATACCTGACACCTGAGATGGTA +>HS25_09827:2:1201:1565:91731#49/1 RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1=DDFFD +GACATGCCATAACATTCATGTTTTATGTGTACAAGTCAATGAATTTTAGTATATTTACAGAGTTGTATGACTGTCTCCACAATCTAATTTTAGGTTTCCA +>HS25_09827:2:1201:1624:69925#49/1 RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1=DD?DB +GCCAGCCTCCTTCTCAATGGTCTTTTTAAACATTATATGAAAACCAGACATTTACATTTGATTTCTTTTTCAATACTATACAGTTCTAAGAGAAAAAACA diff --git a/src/htslib-1.21/test/fastq/r1.fq b/src/htslib-1.21/test/fastq/r1.fq new file mode 100644 index 0000000..3728b9a --- /dev/null +++ b/src/htslib-1.21/test/fastq/r1.fq @@ -0,0 +1,20 @@ +@HS25_09827:2:1201:1505:59795#49/1 RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1=BDDDF +CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG ++ +CABCFGDEEFFEFHGHGGFFGDIGIJFIFHHGHEIFGHBCGHDIFBE9GIAICGGICFIBFGGHGDGGGHE?GIGDFGGHEGIEJG>;FGEFCFHGGGHEJEAJE +@HS25_09827:2:1201:1564:39627#49/1 RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1:DDDFE +ACGCGGCAATCCAATGTGTGAGTTGAGAAGCGGTGAGGAGGGAATCCTAATTTTATGAGCAGGTCAGGACCGTGGGAGATACCTGACACCTGAGATGGTA ++ +BACCFGBFGFHGGJGHGGFEGHIGIJHFEH:HHEHGHHBGGH9IAGHGFHIFJFFAFGIFDIGHKEIGF,CGD66?7EFI5EEG>EGGGGD5=HH6E +@HS25_09827:2:1201:1565:91731#49/1 RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1=DDFFD +GACATGCCATAACATTCATGTTTTATGTGTACAAGTCAATGAATTTTAGTATATTTACAGAGTTGTATGACTGTCTCCACAATCTAATTTTAGGTTTCCA ++ +CABFFGFFJFHEGEGJGGDG?FIGHHHBGHHHGIIGHGHGGHDGHFHIDFCIKEGIFHGGII9HFFGGGEEIGGEEHGGEEGDEHFH>FGGGGHAFAHGE +@HS25_09827:2:1201:1624:69925#49/1 RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1=DD?DB +GCCAGCCTCCTTCTCAATGGTCTTTTTAAACATTATATGAAAACCAGACATTTACATTTGATTTCTTTTTCAATACTATACAGTTCTAAGAGAAAAAACA ++ +CABEFGFGIFGGGJGHGGFH?FDHGHDHGHEHHJCGHHFHDHDHFGHIGHIFFHGHFGGGI9GHF@IGGH;FICGEFEIHGGIEEFC:DEGGGBDJHHFF diff --git a/src/htslib-1.21/test/fastq/r1.sam b/src/htslib-1.21/test/fastq/r1.sam new file mode 100644 index 0000000..ffc0619 --- /dev/null +++ b/src/htslib-1.21/test/fastq/r1.sam @@ -0,0 +1,5 @@ +HS25_09827:2:1201:1505:59795#49 77 * 0 0 * * 0 0 CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG CABCFGDEEFFEFHGHGGFFGDIGIJFIFHHGHEIFGHBCGHDIFBE9GIAICGGICFIBFGGHGDGGGHE?GIGDFGGHEGIEJG>;FGEFCFHGGGHEJEAJE RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1:DBDFD +HS25_09827:2:1201:1564:39627#49 77 * 0 0 * * 0 0 ACGCGGCAATCCAATGTGTGAGTTGAGAAGCGGTGAGGAGGGAATCCTAATTTTATGAGCAGGTCAGGACCGTGGGAGATACCTGACACCTGAGATGGTA BACCFGBFGFHGGJGHGGFEGHIGIJHFEH:HHEHGHHBGGH9IAGHGFHIFJFFAFGIFDIGHKEIGF,CGD66?7EFI5EEG>EGGGGD5=HH6E RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1:DDDFE +HS25_09827:2:1201:1565:91731#49 77 * 0 0 * * 0 0 GACATGCCATAACATTCATGTTTTATGTGTACAAGTCAATGAATTTTAGTATATTTACAGAGTTGTATGACTGTCTCCACAATCTAATTTTAGGTTTCCA CABFFGFFJFHEGEGJGGDG?FIGHHHBGHHHGIIGHGHGGHDGHFHIDFCIKEGIFHGGII9HFFGGGEEIGGEEHGGEEGDEHFH>FGGGGHAFAHGE RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1=DDFFD +HS25_09827:2:1201:1624:69925#49 77 * 0 0 * * 0 0 GCCAGCCTCCTTCTCAATGGTCTTTTTAAACATTATATGAAAACCAGACATTTACATTTGATTTCTTTTTCAATACTATACAGTTCTAAGAGAAAAAACA CABEFGFGIFGGGJGHGGFH?FDHGHDHGHEHHJCGHHFHDHDHFGHIGHIFFHGHFGGGI9GHF@IGGH;FICGEFEIHGGIEEFC:DEGGGBDJHHFF RG:Z:1#49 BC:Z:NGTCTATC QT:Z:!1=DD?DB diff --git a/src/htslib-1.21/test/fastq/r2-q.sam b/src/htslib-1.21/test/fastq/r2-q.sam new file mode 100644 index 0000000..033fa98 --- /dev/null +++ b/src/htslib-1.21/test/fastq/r2-q.sam @@ -0,0 +1,5 @@ +HS25_09827:2:1201:1505:59795#49 141 * 0 0 * * 0 0 AAGGAAAGAAGGGAGGGAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAAGTAGGAAGAATTCATCTACCCAATT * RG:Z:1#49 +HS25_09827:2:1201:1559:70726#49 141 * 0 0 * * 0 0 TTTTCTTTTATTAATTTTATACTTACATTTAAGTCTTTATTCCATTTTGAGTCAATGTTTGTATATGATGAGAGATAGGGGTCTAGTTTCATACTTCTAC * RG:Z:1#49 +HS25_09827:2:1201:1564:39627#49 141 * 0 0 * * 0 0 AATTCCTTGTTTGAAAACACTCCTAGCACCATGAGTCTCTGCTGCTTTATCTAAAGCATCTCTGGGGCTCATGTGTTTGTGAAGAGCTTGCCTAGCTTTT * RG:Z:1#49 +HS25_09827:2:1201:1565:91731#49 141 * 0 0 * * 0 0 AGCACAGATTCATGCTAGCACATGGATGACCCTCAACAGCATTATCCCCAGCGATAGAAGCCAGACACAAAAGAACACATATGACAGGATTCCATTGATA * RG:Z:1#49 +HS25_09827:2:1201:1624:69925#49 141 * 0 0 * * 0 0 TCCTTCCATTCCATTTTTGTGTTTGTTTTGTTCTTTTCTGTCACTGATCCGTATTACCACTTTTGGAAAAAAATAAATAAATAAATAAATAAAAGGCAGC * RG:Z:1#49 diff --git a/src/htslib-1.21/test/fastq/r2.fa b/src/htslib-1.21/test/fastq/r2.fa new file mode 100644 index 0000000..4d6f08e --- /dev/null +++ b/src/htslib-1.21/test/fastq/r2.fa @@ -0,0 +1,10 @@ +>HS25_09827:2:1201:1505:59795#49/2 RG:Z:1#49 +AAGGAAAGAAGGGAGGGAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAAGTAGGAAGAATTCATCTACCCAATT +>HS25_09827:2:1201:1559:70726#49/2 RG:Z:1#49 +TTTTCTTTTATTAATTTTATACTTACATTTAAGTCTTTATTCCATTTTGAGTCAATGTTTGTATATGATGAGAGATAGGGGTCTAGTTTCATACTTCTAC +>HS25_09827:2:1201:1564:39627#49/2 RG:Z:1#49 +AATTCCTTGTTTGAAAACACTCCTAGCACCATGAGTCTCTGCTGCTTTATCTAAAGCATCTCTGGGGCTCATGTGTTTGTGAAGAGCTTGCCTAGCTTTT +>HS25_09827:2:1201:1565:91731#49/2 RG:Z:1#49 +AGCACAGATTCATGCTAGCACATGGATGACCCTCAACAGCATTATCCCCAGCGATAGAAGCCAGACACAAAAGAACACATATGACAGGATTCCATTGATA +>HS25_09827:2:1201:1624:69925#49/2 RG:Z:1#49 +TCCTTCCATTCCATTTTTGTGTTTGTTTTGTTCTTTTCTGTCACTGATCCGTATTACCACTTTTGGAAAAAAATAAATAAATAAATAAATAAAAGGCAGC diff --git a/src/htslib-1.21/test/fastq/r2.fq b/src/htslib-1.21/test/fastq/r2.fq new file mode 100644 index 0000000..a766bf9 --- /dev/null +++ b/src/htslib-1.21/test/fastq/r2.fq @@ -0,0 +1,20 @@ +@HS25_09827:2:1201:1505:59795#49/2 RG:Z:1#49 +AAGGAAAGAAGGGAGGGAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAAGTAGGAAGAATTCATCTACCCAATT ++ +DCHFEFBHAGCGACF7CJI8HBIIEFGFEBG?DCGA?ACFGGI=BEDG?EFEHFFFEHFD?HG+DFH>FFHGFBFE4F@I3HF@>A5F?GFHGCFCFFGGCEBF? +@HS25_09827:2:1201:1564:39627#49/2 RG:Z:1#49 +AATTCCTTGTTTGAAAACACTCCTAGCACCATGAGTCTCTGCTGCTTTATCTAAAGCATCTCTGGGGCTCATGTGTTTGTGAAGAGCTTGCCTAGCTTTT ++ +;CACCDEBDEFGAGGGF5EGFFFEIDDIICGGF>H?GGG@DGAHD@HIDIFGG@IEEFHFGFEBGIHHDE5GHEIABFG>CB=FCF4GGHLBBJGFEHEG +@HS25_09827:2:1201:1565:91731#49/2 RG:Z:1#49 +AGCACAGATTCATGCTAGCACATGGATGACCCTCAACAGCATTATCCCCAGCGATAGAAGCCAGACACAAAAGAACACATATGACAGGATTCCATTGATA ++ +;CBABFEEFJFGEFGCCDIGIFFIHDCGIEGHGHHEHGIDBGHHGFEIJHHGG@GFCHGFDHKFFEGHDIEFHHFHEIFCHFGCFBHG@>IBF5*DBHHG +@HS25_09827:2:1201:1624:69925#49/2 RG:Z:1#49 +TCCTTCCATTCCATTTTTGTGTTTGTTTTGTTCTTTTCTGTCACTGATCCGTATTACCACTTTTGGAAAAAAATAAATAAATAAATAAATAAAAGGCAGC ++ +;CBCDDDHDHFGGEGGICEGFGJF@GEIFECIFHJCDAI>HDAH?@EIDICDGEIFEB@7DHFFE?CCD4EFIEBHFC?FFBFFG3HC@AGFDFGCBHEG diff --git a/src/htslib-1.21/test/fastq/r2.sam b/src/htslib-1.21/test/fastq/r2.sam new file mode 100644 index 0000000..f75edc9 --- /dev/null +++ b/src/htslib-1.21/test/fastq/r2.sam @@ -0,0 +1,5 @@ +HS25_09827:2:1201:1505:59795#49 141 * 0 0 * * 0 0 AAGGAAAGAAGGGAGGGAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAAGTAGGAAGAATTCATCTACCCAATT DCHFEFBHAGCGACF7CJI8HBIIEFGFEBG?DCGA?ACFGGI=BEDG?EFEHFFFEHFD?HG+DFH>FFHGFBFE4F@I3HF@>A5F?GFHGCFCFFGGCEBF? RG:Z:1#49 +HS25_09827:2:1201:1564:39627#49 141 * 0 0 * * 0 0 AATTCCTTGTTTGAAAACACTCCTAGCACCATGAGTCTCTGCTGCTTTATCTAAAGCATCTCTGGGGCTCATGTGTTTGTGAAGAGCTTGCCTAGCTTTT ;CACCDEBDEFGAGGGF5EGFFFEIDDIICGGF>H?GGG@DGAHD@HIDIFGG@IEEFHFGFEBGIHHDE5GHEIABFG>CB=FCF4GGHLBBJGFEHEG RG:Z:1#49 +HS25_09827:2:1201:1565:91731#49 141 * 0 0 * * 0 0 AGCACAGATTCATGCTAGCACATGGATGACCCTCAACAGCATTATCCCCAGCGATAGAAGCCAGACACAAAAGAACACATATGACAGGATTCCATTGATA ;CBABFEEFJFGEFGCCDIGIFFIHDCGIEGHGHHEHGIDBGHHGFEIJHHGG@GFCHGFDHKFFEGHDIEFHHFHEIFCHFGCFBHG@>IBF5*DBHHG RG:Z:1#49 +HS25_09827:2:1201:1624:69925#49 141 * 0 0 * * 0 0 TCCTTCCATTCCATTTTTGTGTTTGTTTTGTTCTTTTCTGTCACTGATCCGTATTACCACTTTTGGAAAAAAATAAATAAATAAATAAATAAAAGGCAGC ;CBCDDDHDHFGGEGGICEGFGJF@GEIFECIFHJCDAI>HDAH?@EIDICDGEIFEB@7DHFFE?CCD4EFIEBHFC?FFBFFG3HC@AGFDFGCBHEG RG:Z:1#49 diff --git a/src/htslib-1.21/test/fastq/single.fa b/src/htslib-1.21/test/fastq/single.fa new file mode 100644 index 0000000..cdd58d8 --- /dev/null +++ b/src/htslib-1.21/test/fastq/single.fa @@ -0,0 +1,10 @@ +>HS25_09827:2:1201:1505:59795#49 RG:Z:1#49 +CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG +>HS25_09827:2:1201:1559:70726#49 RG:Z:1#49 +TTGTTAAAATGACCATACCCAAAGTGATCTACAGACTCAATACAATTTCTATTGAAATACCAATCACACTCTTCACAGAACTAGAAAAACAGTTCTAAAA +>HS25_09827:2:1201:1564:39627#49 RG:Z:1#49 +ACGCGGCAATCCAATGTGTGAGTTGAGAAGCGGTGAGGAGGGAATCCTAATTTTATGAGCAGGTCAGGACCGTGGGAGATACCTGACACCTGAGATGGTA +>HS25_09827:2:1201:1565:91731#49 RG:Z:1#49 +GACATGCCATAACATTCATGTTTTATGTGTACAAGTCAATGAATTTTAGTATATTTACAGAGTTGTATGACTGTCTCCACAATCTAATTTTAGGTTTCCA +>HS25_09827:2:1201:1624:69925#49 RG:Z:1#49 +GCCAGCCTCCTTCTCAATGGTCTTTTTAAACATTATATGAAAACCAGACATTTACATTTGATTTCTTTTTCAATACTATACAGTTCTAAGAGAAAAAACA diff --git a/src/htslib-1.21/test/fastq/single.fq b/src/htslib-1.21/test/fastq/single.fq new file mode 100644 index 0000000..c6ad15d --- /dev/null +++ b/src/htslib-1.21/test/fastq/single.fq @@ -0,0 +1,20 @@ +@HS25_09827:2:1201:1505:59795#49 RG:Z:1#49 +CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG ++ +CABCFGDEEFFEFHGHGGFFGDIGIJFIFHHGHEIFGHBCGHDIFBE9GIAICGGICFIBFGGHGDGGGHE?GIGDFGGHEGIEJG>;FGEFCFHGGGHEJEAJE +@HS25_09827:2:1201:1564:39627#49 RG:Z:1#49 +ACGCGGCAATCCAATGTGTGAGTTGAGAAGCGGTGAGGAGGGAATCCTAATTTTATGAGCAGGTCAGGACCGTGGGAGATACCTGACACCTGAGATGGTA ++ +BACCFGBFGFHGGJGHGGFEGHIGIJHFEH:HHEHGHHBGGH9IAGHGFHIFJFFAFGIFDIGHKEIGF,CGD66?7EFI5EEG>EGGGGD5=HH6E +@HS25_09827:2:1201:1565:91731#49 RG:Z:1#49 +GACATGCCATAACATTCATGTTTTATGTGTACAAGTCAATGAATTTTAGTATATTTACAGAGTTGTATGACTGTCTCCACAATCTAATTTTAGGTTTCCA ++ +CABFFGFFJFHEGEGJGGDG?FIGHHHBGHHHGIIGHGHGGHDGHFHIDFCIKEGIFHGGII9HFFGGGEEIGGEEHGGEEGDEHFH>FGGGGHAFAHGE +@HS25_09827:2:1201:1624:69925#49 RG:Z:1#49 +GCCAGCCTCCTTCTCAATGGTCTTTTTAAACATTATATGAAAACCAGACATTTACATTTGATTTCTTTTTCAATACTATACAGTTCTAAGAGAAAAAACA ++ +CABEFGFGIFGGGJGHGGFH?FDHGHDHGHEHHJCGHHFHDHDHFGHIGHIFFHGHFGGGI9GHF@IGGH;FICGEFEIHGGIEEFC:DEGGGBDJHHFF diff --git a/src/htslib-1.21/test/fastq/single_aux-q.sam b/src/htslib-1.21/test/fastq/single_aux-q.sam new file mode 100644 index 0000000..68af8bc --- /dev/null +++ b/src/htslib-1.21/test/fastq/single_aux-q.sam @@ -0,0 +1,5 @@ +HS25_09827:2:1201:1505:59795#49 4 * 0 0 * * 0 0 CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG * RG:Z:1#49 +HS25_09827:2:1201:1559:70726#49 4 * 0 0 * * 0 0 TTGTTAAAATGACCATACCCAAAGTGATCTACAGACTCAATACAATTTCTATTGAAATACCAATCACACTCTTCACAGAACTAGAAAAACAGTTCTAAAA * RG:Z:1#49 +HS25_09827:2:1201:1564:39627#49 4 * 0 0 * * 0 0 ACGCGGCAATCCAATGTGTGAGTTGAGAAGCGGTGAGGAGGGAATCCTAATTTTATGAGCAGGTCAGGACCGTGGGAGATACCTGACACCTGAGATGGTA * RG:Z:1#49 +HS25_09827:2:1201:1565:91731#49 4 * 0 0 * * 0 0 GACATGCCATAACATTCATGTTTTATGTGTACAAGTCAATGAATTTTAGTATATTTACAGAGTTGTATGACTGTCTCCACAATCTAATTTTAGGTTTCCA * RG:Z:1#49 +HS25_09827:2:1201:1624:69925#49 4 * 0 0 * * 0 0 GCCAGCCTCCTTCTCAATGGTCTTTTTAAACATTATATGAAAACCAGACATTTACATTTGATTTCTTTTTCAATACTATACAGTTCTAAGAGAAAAAACA * RG:Z:1#49 diff --git a/src/htslib-1.21/test/fastq/single_aux.sam b/src/htslib-1.21/test/fastq/single_aux.sam new file mode 100644 index 0000000..bfe355e --- /dev/null +++ b/src/htslib-1.21/test/fastq/single_aux.sam @@ -0,0 +1,5 @@ +HS25_09827:2:1201:1505:59795#49 4 * 0 0 * * 0 0 CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG CABCFGDEEFFEFHGHGGFFGDIGIJFIFHHGHEIFGHBCGHDIFBE9GIAICGGICFIBFGGHGDGGGHE?GIGDFGGHEGIEJG>;FGEFCFHGGGHEJEAJE RG:Z:1#49 +HS25_09827:2:1201:1564:39627#49 4 * 0 0 * * 0 0 ACGCGGCAATCCAATGTGTGAGTTGAGAAGCGGTGAGGAGGGAATCCTAATTTTATGAGCAGGTCAGGACCGTGGGAGATACCTGACACCTGAGATGGTA BACCFGBFGFHGGJGHGGFEGHIGIJHFEH:HHEHGHHBGGH9IAGHGFHIFJFFAFGIFDIGHKEIGF,CGD66?7EFI5EEG>EGGGGD5=HH6E RG:Z:1#49 +HS25_09827:2:1201:1565:91731#49 4 * 0 0 * * 0 0 GACATGCCATAACATTCATGTTTTATGTGTACAAGTCAATGAATTTTAGTATATTTACAGAGTTGTATGACTGTCTCCACAATCTAATTTTAGGTTTCCA CABFFGFFJFHEGEGJGGDG?FIGHHHBGHHHGIIGHGHGGHDGHFHIDFCIKEGIFHGGII9HFFGGGEEIGGEEHGGEEGDEHFH>FGGGGHAFAHGE RG:Z:1#49 +HS25_09827:2:1201:1624:69925#49 4 * 0 0 * * 0 0 GCCAGCCTCCTTCTCAATGGTCTTTTTAAACATTATATGAAAACCAGACATTTACATTTGATTTCTTTTTCAATACTATACAGTTCTAAGAGAAAAAACA CABEFGFGIFGGGJGHGGFH?FDHGHDHGHEHHJCGHHFHDHDHFGHIGHIFFHGHFGGGI9GHF@IGGH;FICGEFEIHGGIEEFC:DEGGGBDJHHFF RG:Z:1#49 diff --git a/src/htslib-1.21/test/fastq/single_noaux-q.sam b/src/htslib-1.21/test/fastq/single_noaux-q.sam new file mode 100644 index 0000000..24542c6 --- /dev/null +++ b/src/htslib-1.21/test/fastq/single_noaux-q.sam @@ -0,0 +1,5 @@ +HS25_09827:2:1201:1505:59795#49 4 * 0 0 * * 0 0 CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG * +HS25_09827:2:1201:1559:70726#49 4 * 0 0 * * 0 0 TTGTTAAAATGACCATACCCAAAGTGATCTACAGACTCAATACAATTTCTATTGAAATACCAATCACACTCTTCACAGAACTAGAAAAACAGTTCTAAAA * +HS25_09827:2:1201:1564:39627#49 4 * 0 0 * * 0 0 ACGCGGCAATCCAATGTGTGAGTTGAGAAGCGGTGAGGAGGGAATCCTAATTTTATGAGCAGGTCAGGACCGTGGGAGATACCTGACACCTGAGATGGTA * +HS25_09827:2:1201:1565:91731#49 4 * 0 0 * * 0 0 GACATGCCATAACATTCATGTTTTATGTGTACAAGTCAATGAATTTTAGTATATTTACAGAGTTGTATGACTGTCTCCACAATCTAATTTTAGGTTTCCA * +HS25_09827:2:1201:1624:69925#49 4 * 0 0 * * 0 0 GCCAGCCTCCTTCTCAATGGTCTTTTTAAACATTATATGAAAACCAGACATTTACATTTGATTTCTTTTTCAATACTATACAGTTCTAAGAGAAAAAACA * diff --git a/src/htslib-1.21/test/fastq/single_noaux.sam b/src/htslib-1.21/test/fastq/single_noaux.sam new file mode 100644 index 0000000..9a98989 --- /dev/null +++ b/src/htslib-1.21/test/fastq/single_noaux.sam @@ -0,0 +1,5 @@ +HS25_09827:2:1201:1505:59795#49 4 * 0 0 * * 0 0 CCGTTAGAGCATTTGTTGAAAATGCTTTCCTTGCTCCATGTGATGACTCTGGTGCCCTTGTCAAAAGCCAGCTGGGCCTATTCGTGTGGGTCTGTTTCTG CABCFGDEEFFEFHGHGGFFGDIGIJFIFHHGHEIFGHBCGHDIFBE9GIAICGGICFIBFGGHGDGGGHE?GIGDFGGHEGIEJG>;FGEFCFHGGGHEJEAJE +HS25_09827:2:1201:1564:39627#49 4 * 0 0 * * 0 0 ACGCGGCAATCCAATGTGTGAGTTGAGAAGCGGTGAGGAGGGAATCCTAATTTTATGAGCAGGTCAGGACCGTGGGAGATACCTGACACCTGAGATGGTA BACCFGBFGFHGGJGHGGFEGHIGIJHFEH:HHEHGHHBGGH9IAGHGFHIFJFFAFGIFDIGHKEIGF,CGD66?7EFI5EEG>EGGGGD5=HH6E +HS25_09827:2:1201:1565:91731#49 4 * 0 0 * * 0 0 GACATGCCATAACATTCATGTTTTATGTGTACAAGTCAATGAATTTTAGTATATTTACAGAGTTGTATGACTGTCTCCACAATCTAATTTTAGGTTTCCA CABFFGFFJFHEGEGJGGDG?FIGHHHBGHHHGIIGHGHGGHDGHFHIDFCIKEGIFHGGII9HFFGGGEEIGGEEHGGEEGDEHFH>FGGGGHAFAHGE +HS25_09827:2:1201:1624:69925#49 4 * 0 0 * * 0 0 GCCAGCCTCCTTCTCAATGGTCTTTTTAAACATTATATGAAAACCAGACATTTACATTTGATTTCTTTTTCAATACTATACAGTTCTAAGAGAAAAAACA CABEFGFGIFGGGJGHGGFH?FDHGHDHGHEHHJCGHHFHDHDHFGHIGHIFFHGHFGGGI9GHF@IGGH;FICGEFEIHGGIEEFC:DEGGGBDJHHFF diff --git a/src/htslib-1.21/test/fastq/test-fastq.sh b/src/htslib-1.21/test/fastq/test-fastq.sh new file mode 100755 index 0000000..d1f7719 --- /dev/null +++ b/src/htslib-1.21/test/fastq/test-fastq.sh @@ -0,0 +1,32 @@ +#!/bin/sh +# +# Copyright (C) 2020 Genome Research Ltd. +# +# Author: James Bonfield +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +. ../simple_test_driver.sh + +echo "Testing fastq support" + +# Eg set TEST_PRECMD=valgrind +tview="${TEST_PRECMD} ../test_view" +test_driver fastq.tst +exit $? diff --git a/src/htslib-1.21/test/fastqs_README.txt b/src/htslib-1.21/test/fastqs_README.txt new file mode 100644 index 0000000..f0c6c26 --- /dev/null +++ b/src/htslib-1.21/test/fastqs_README.txt @@ -0,0 +1,109 @@ +This README file describes the FASTQ example files provided as supplementary +information to the open-access publication: + +P.J.A. Cock, C.J. Fields, N. Goto, M.L. Heuer and P.M. Rice (2009). The Sanger +FASTQ file format for sequences with quality scores, and the Solexa/Illumina +FASTQ variants. + +These files are provided freely and we encourage anyone writing a FASTQ parser +to use them as part of your test suite. Permission is granted to freely +distribute and modify the files. We request (but do not insist) that this +README file is included, or at least a reference to the above paper. Please +cite the above paper if appropriate. We also request (but do not insist) that +the example files are not modified, in order that they may serve as a common +reference. + +Invalid FASTQ files +=================== + +The archive contains the following sample FASTQ files with names of the form +error_NAME.fastq, which all contain errors and should be rejected (if parsed +as any of the three FASTQ variants): + +error_diff_ids.fastq +error_double_qual.fastq +error_double_seq.fastq +error_long_qual.fastq +error_no_qual.fastq +error_qual_del.fastq +error_qual_escape.fastq +error_qual_null.fastq +error_qual_space.fastq +error_qual_tab.fastq +error_qual_unit_sep.fastq +error_qual_vtab.fastq +error_short_qual.fastq +error_spaces.fastq +error_tabs.fastq +error_trunc_at_seq.fastq +error_trunc_at_plus.fastq +error_trunc_at_qual.fastq +error_trunc_in_title.fastq +error_trunc_in_seq.fastq +error_trunc_in_plus.fastq +error_trunc_in_qual.fastq + +Of these, those with names error_qual_XXX.fastq would be valid except for the +inclusion of spaces or non-printing ASCII characters outside the range allowed +in the quality string. The files named error_trunc_XXX.fastq would be valid +but for being truncated (e.g. simulating a partial copy over the network). + +The special cases of FASTQ files which would be valid as one variant, but not +another, are covered below. + +Valid FASTQ +=========== + +The archive contains the following valid sample FASTQ input files for testing: + +longreads_original_sanger.fastq +wrapping_original_sanger.fastq +illumina_full_range_original_illumina.fastq +sanger_full_range_original_sanger.fastq +solexa_full_range_original_solexa.fastq +misc_dna_original_sanger.fastq +misc_rna_original_sanger.fastq + +These all have the form NAME_original_FORMAT.fastq, where NAME is a prefix for +that example, and FORMAT is one of sanger, solexa or illumina indicating which +FASTQ variant that example is using. There are three matching files called +NAME_as_FORMAT.fastq showing how the original file should be converted into +each of the three FASTQ variants. These converted files are standardised not +to use line wrapping (so each record has exactly four lines), and omit the +optional repetition of the read titles on the plus line. + +The file longreads_original_sanger.fastq is based on real Roche 454 reads from +the Sanger Institute for the the potato cyst nematodes Globodera pallida. Ten +of the reads have been presented as FASTQ records, wrapping the sequence and +the quality lines at 80 characters. This means some of the quality lines start +with "@" or "+" characters, which may cause problems with naive parsers. Also +note that the sequence is mixed case (with upper case denoting the trimmed +region), and furthermore the free format title lines are over 100 characters +and encode assorted read information (and are repeated on the "+" lines). + +The wrapping_original_sanger.fastq is based on three real reads from the NCBI +Short Read Archive, but has been carefully edited to use line wrapping for the +quality lines (but not the sequence lines) such that the due to the occurrence +of "@" and "+" on alternating lines, the file may be misinterpreted by a +simplistic parser. While this is therefore a very artificial example, it +remains a valid FASTQ file, and is useful for testing purposes. + +The sanger_full_range_original_sanger.fastq file uses PHRED scores from 0 to +93 inclusive, covering ASCII characters from 33 (!) to 126 (~). This means it +cannot be treated as a Solexa or Illumina 1.3+ FASTQ file, and attempting to +parse it as such should raise an error. + +The solexa_full_range_original_solexa.fastq file uses Solexa scores from -5 to +62 inclusive, covering ASCII characters from 59 (;) to 126 (~). This means it +cannot be treated as a Illumina 1.3+ FASTQ file, and attempting to parse it as +such should raise an error. On the basis of the quality characters, the file +would also qualify as a valid Sanger FASTQ file. + +The illumina_full_range_original_illumina.fastq file uses PHRED scores from 0 +to 62 inclusive, covering ASCII characters from 64 (@) to 126 (~). On the +basis of the quality characters, the file would also qualify as a valid Sanger +or Solexa FASTQ file. + +The misc_dna_original_sanger.fastq and misc_rna_original_sanger.fastq files +are artificial reads using the full range of IUPAC DNA or RNA letters, +including ambiguous character codes, and both cases. diff --git a/src/htslib-1.21/test/fieldarith.c b/src/htslib-1.21/test/fieldarith.c new file mode 100644 index 0000000..5a46b3e --- /dev/null +++ b/src/htslib-1.21/test/fieldarith.c @@ -0,0 +1,74 @@ +/* test/fieldarith.c -- CIGAR field arithmetic test suite. + + Copyright (C) 2013-2014 Genome Research Ltd. + + Author: John Marshall + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include + +#include + +#include "../htslib/sam.h" + +int ntests = 0; +int nfailures = 0; + +void check(const bam1_t *aln, const char *testname, const char *tag, int value) +{ + int32_t refvalue; + uint8_t *aux = bam_aux_get(aln, tag); + if (!aux) return; + ntests++; + refvalue = bam_aux2i(aux); + if (value != refvalue) { + fprintf(stderr, "%s FAIL for %s: computed %d != %d expected\n", + testname, bam_get_qname(aln), value, refvalue); + nfailures++; + } +} + +int main(int argc, char **argv) +{ + sam_hdr_t *header; + bam1_t *aln = bam_init1(); + int i; + + for (i = 1; i < argc; i++) { + samFile *in = sam_open(argv[i], "r"); + if (in == NULL) { perror(argv[1]); return 1; } + + header = sam_hdr_read(in); + while (sam_read1(in, header, aln) >= 0) { + check(aln, "cigar2qlen", "XQ", + bam_cigar2qlen(aln->core.n_cigar, bam_get_cigar(aln))); + check(aln, "cigar2rlen", "XR", + bam_cigar2rlen(aln->core.n_cigar, bam_get_cigar(aln))); + check(aln, "endpos", "XE", bam_endpos(aln)); + } + + sam_hdr_destroy(header); + sam_close(in); + } + + bam_destroy1(aln); + + return (nfailures > 0); +} diff --git a/src/htslib-1.21/test/fieldarith.sam b/src/htslib-1.21/test/fieldarith.sam new file mode 100644 index 0000000..60eeca8 --- /dev/null +++ b/src/htslib-1.21/test/fieldarith.sam @@ -0,0 +1,15 @@ +@SQ SN:one LN:1000 +@SQ SN:two LN:500 +@CO For each SAM record that has each listed aux field, performs these tests: +@CO XQ is the expected result for bam_cigar2qlen() +@CO XR is the expected result for bam_cigar2rlen() +@CO XE is the expected result for bam_endpos() +@CO (Note that these are all zero-based, while POS is one-based in SAM) +r1 0 one 50 20 8M * 0 0 ATGCATGC qqqqqqqq XQ:i:8 XR:i:8 XE:i:57 +r2 0 one 100 20 50M * 0 0 ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq XQ:i:50 XR:i:50 XE:i:149 +unmapped 5 two 200 0 * = 200 0 ATGCATGC qqqqqqqq XQ:i:0 XR:i:0 XE:i:200 +hascigar 5 two 200 0 6M2S = 200 0 ATGCATGC qqqqqqqq XQ:i:8 XR:i:6 XE:i:200 +s1 0 one 300 20 2M * 0 0 AT qq XQ:i:2 XR:i:2 XE:i:301 +su1 4 * 0 0 * * 0 0 AT qq XQ:i:0 XR:i:0 XE:i:0 +su2 5 two 400 0 * = 400 0 AT qq XQ:i:0 XR:i:0 XE:i:400 +su3 4 one 500 0 2M * 0 0 AT qq XQ:i:2 XR:i:2 XE:i:500 diff --git a/src/htslib-1.21/test/formatcols.vcf b/src/htslib-1.21/test/formatcols.vcf new file mode 100644 index 0000000..6bbdb3a --- /dev/null +++ b/src/htslib-1.21/test/formatcols.vcf @@ -0,0 +1,6 @@ +##fileformat=VCFv4.3 +##FILTER= +##contig= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 S² S3 +1 100 a A T . . . S a bbbbbbb ccccccccc diff --git a/src/htslib-1.21/test/formatmissing-out.vcf b/src/htslib-1.21/test/formatmissing-out.vcf new file mode 100644 index 0000000..dd8d5bb --- /dev/null +++ b/src/htslib-1.21/test/formatmissing-out.vcf @@ -0,0 +1,6 @@ +##fileformat=VCFv4.3 +##FILTER= +##contig= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 S2 S3 +1 100 a A T . . . . . . . diff --git a/src/htslib-1.21/test/formatmissing.vcf b/src/htslib-1.21/test/formatmissing.vcf new file mode 100644 index 0000000..dd8d5bb --- /dev/null +++ b/src/htslib-1.21/test/formatmissing.vcf @@ -0,0 +1,6 @@ +##fileformat=VCFv4.3 +##FILTER= +##contig= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 S2 S3 +1 100 a A T . . . . . . . diff --git a/src/htslib-1.21/test/fuzz/hts_open_fuzzer.c b/src/htslib-1.21/test/fuzz/hts_open_fuzzer.c new file mode 100644 index 0000000..2818eb1 --- /dev/null +++ b/src/htslib-1.21/test/fuzz/hts_open_fuzzer.c @@ -0,0 +1,213 @@ +/* test/fuzz/hts_open_fuzzer.c -- Fuzz driver for hts_open. + + Copyright (C) 2018 Google LLC. + Copyright (C) 2019-2020, 2023 Genome Research Ltd. + + Author: Markus Kusano + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include + +#include +#include +#include +#include +#include +#include + +#include "../../htslib/hfile.h" +#include "../../htslib/hts.h" +#include "../../htslib/sam.h" +#include "../../htslib/vcf.h" + +static void hts_close_or_abort(htsFile* file) { + if (hts_close(file) != 0) { + abort(); + } +} + +static void view_sam(const uint8_t *data, size_t size, char *mode, + int close_abort) { + uint8_t *copy = malloc(size); + if (copy == NULL) { + abort(); + } + memcpy(copy, data, size); + + hFILE *memfile = hopen("mem:", "rb:", copy, size); + if (memfile == NULL) { + free(copy); + return; + } + + htsFile *in = hts_hopen(memfile, "data", "rb"); + if (in == NULL) { + if (hclose(memfile) != 0) + abort(); + return; + } + + samFile *out = sam_open("/dev/null", mode); + if (!out) + abort(); + +#ifdef FUZZ_FAI + // Not critical if this doesn't work, but can test more if + // we're in the right location. + // + // We can't rely on what the pwd is for the OSS-fuzz so we don't enable + // this by default. + if (hts_set_fai_filename(out, "../c2.fa") < 0) { + static int warned = 0; + if (!warned) { + warned = 1; + fprintf(stderr, "Warning couldn't find the c2.fa file\n"); + } + } +#endif + + sam_hdr_t *hdr = sam_hdr_read(in); + if (hdr == NULL) { + if (close_abort) + hts_close_or_abort(out); + else + hts_close(out); + hts_close(in); + return; + } + + // This will force the header to be parsed. + (void) sam_hdr_count_lines(hdr, "SQ"); + + if (sam_hdr_write(out, hdr) != 0) + goto err; + + bam1_t *b = bam_init1(); + if (b == NULL) + goto err; + + while (sam_read1(in, hdr, b) >= 0) { + if (sam_write1(out, hdr, b) < 0) + break; + } + bam_destroy1(b); + + err: + sam_hdr_destroy(hdr); + if (close_abort) + hts_close_or_abort(out); + else + hts_close(out); + hts_close(in); +} + +static void view_vcf(const uint8_t *data, size_t size, char *mode) { + uint8_t *copy = malloc(size); + if (copy == NULL) { + abort(); + } + memcpy(copy, data, size); + + hFILE *memfile = hopen("mem:", "rb:", copy, size); + if (memfile == NULL) { + free(copy); + return; + } + + htsFile *in = hts_hopen(memfile, "data", "rb"); + if (in == NULL) { + if (hclose(memfile) != 0) + abort(); + return; + } + + vcfFile *out = vcf_open("/dev/null", mode); + if (!out) + abort(); + + bcf_hdr_t *hdr = bcf_hdr_read(in); + if (hdr == NULL) { + hts_close_or_abort(out); + hts_close(in); + return; + } + + if (bcf_hdr_write(out, hdr) != 0) + goto err; + + bcf1_t *rec = bcf_init(); + if (rec == NULL) + goto err; + + while (bcf_read(in, hdr, rec) >= 0) { + if (bcf_write(out, hdr, rec) < 0) + break; + } + bcf_destroy(rec); + + err: + bcf_hdr_destroy(hdr); + hts_close_or_abort(out); + hts_close(in); +} + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + // Only data as a mem file purely for purposes of determining format + hFILE *memfile; + uint8_t *copy = malloc(size); + if (copy == NULL) { + abort(); + } + memcpy(copy, data, size); + // hopen does not take ownership of `copy`, but hts_hopen does. + memfile = hopen("mem:", "rb:", copy, size); + if (memfile == NULL) { + free(copy); + return 0; + } + + htsFile *ht_file = hts_hopen(memfile, "data", "rb"); + if (ht_file == NULL) { + if (hclose(memfile) != 0) { + abort(); + } + return 0; + } + int ftype = ht_file->format.category; + hts_close(ht_file); + + // Now repeat a read-write loop multiple times per input, testing + // encoding in all output formats. + // (Although we could just ignore ftype and do all 5 for all inputs) + switch (ftype) { + case sequence_data: + view_sam(data, size, "w", 1); // SAM + view_sam(data, size, "wb", 1); // BAM + view_sam(data, size, "wc", 0); // CRAM + break; + case variant_data: + view_vcf(data, size, "w"); // VCF + view_vcf(data, size, "wb"); // BCF + break; + default: + break; + } + return 0; +} diff --git a/src/htslib-1.21/test/header_syms.pl b/src/htslib-1.21/test/header_syms.pl new file mode 100755 index 0000000..a8d4a88 --- /dev/null +++ b/src/htslib-1.21/test/header_syms.pl @@ -0,0 +1,106 @@ +#!/usr/bin/env perl +# +# Copyright (C) 2019 Genome Research Ltd. +# +# Author: Rob Davies +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Script to extract symbol names from HTSlib header files. Used to +# check the shared library for missing exports. + +# Instead of implementing a full C parser, this attempts to do the minimum +# amount it can get away with by scrubbing out most of the header text and +# then looking through the rest for function declarations. + +# Roughly equivalent Exuberant-ctags command is: +# ctags -f - -n -I HTS_RESULT_USED -I HTS_DEPRECATED+ -I HTS_FORMAT+ \ +# -I KS_ATTR_PRINTF+ +# Unfortunately this is not the default ctags on all platforms, hence this +# script. + +use strict; +use warnings; +use Getopt::Long; + +# Use this option to show the processed version of the header text +# instead of the function list. +my $show_processed = 0; + +GetOptions('show-processed' => \$show_processed); + +# List of functions to strip from the output. Currently empty, +# but this functionality is retained for potential future use. +my %ignore = map { $_ => 1 } qw( ); + +foreach my $file (@ARGV) { + extract_symbols($file, $show_processed, \%ignore); +} + +sub extract_symbols { + my ($file, $show_processed, $ignore) = @_; + + local $/ = undef; + + open(my $f, '<', $file) || die "Couldn't open $file : $!\n"; + my $text = <$f>; + $text =~ tr/\r//d; + close($f) || die "Error reading $file : $!\n"; + + # Get rid of comments + $text =~ s#/\*.*?\*/##sg; + $text =~ s#//.*$##mg; + + # Remove extern "C" brackets + $text =~ s/#ifdef\s+__cplusplus.*?#endif//sg; + + # Remove #if 0 sections + $text =~ s/^\s*#\s*if\s+0\s+.*?#\s*endif\s//msg; + + # Remove #defines + $text =~ s/\n\s*?#\s*?define\s+(?:[^\n]+\\\n)*[^\n]+//sg; + + # Remove content inside curly braces + $text =~ s/(\{(?:(?>[^{}]+)|(?1))*\})/{}/sg; + + # Get rid of typedefs + $text =~ s/typedef\s+[^;]+;//sg; + + # Get rid of some macros + $text =~ s/HTS_RESULT_USED//g; + $text =~ s/HTSLIB_EXPORT//g; + + $text =~ s/HTS_DEPRECATED\s*?\(\"[^"]+\"?\)//g; + $text =~ s/HTS_FORMAT\s*?\(.*?\)//g; + $text =~ s/KS_ATTR_PRINTF\s*?\(.*?\)//g; + + # Get rid of static inline functions + $text =~ s/static\s+inline\s+(?:\S+\s+)+?(\S+)\s*(\((?:(?>[^()]+)|(?-1))*\))\s*{}//g; + + if ($show_processed) { + print $text; + return; + } + + # Find functions and print them + while ($text =~ m/^\s+(?:\S+\s+)+?(?:\*+\s*)?(\S+)\s*(\((?:(?>[^()]+)|(?-1))*\))\s*;/msg) { + next if (exists($ignore->{$1})); + print "$1\n"; + } +} diff --git a/src/htslib-1.21/test/hfile.c b/src/htslib-1.21/test/hfile.c new file mode 100644 index 0000000..741cf7a --- /dev/null +++ b/src/htslib-1.21/test/hfile.c @@ -0,0 +1,312 @@ +/* test/hfile.c -- Test cases for low-level input/output streams. + + Copyright (C) 2013-2014, 2016, 2018 Genome Research Ltd. + + Author: John Marshall + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include + +#include +#include +#include +#include + +#include + +#include "../htslib/hfile.h" +#include "../htslib/hts_defs.h" +#include "../htslib/kstring.h" + +void HTS_FORMAT(HTS_PRINTF_FMT, 1, 2) HTS_NORETURN +fail(const char *format, ...) +{ + int err = errno; + va_list args; + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + if (err != 0) fprintf(stderr, ": %s", strerror(err)); + fprintf(stderr, "\n"); + exit(EXIT_FAILURE); +} + +void check_offset(hFILE *f, off_t off, const char *message) +{ + off_t ret = htell(f); + if (ret < 0) fail("htell(%s)", message); + if (ret == off) return; + + fprintf(stderr, "%s offset incorrect: expected %ld but got %ld\n", + message, (long)off, (long)ret); + exit(EXIT_FAILURE); +} + +char *slurp(const char *filename) +{ + char *text; + struct stat sbuf; + size_t filesize; + FILE *f = fopen(filename, "rb"); + if (f == NULL) fail("fopen(\"%s\", \"rb\")", filename); + if (fstat(fileno(f), &sbuf) != 0) fail("fstat(\"%s\")", filename); + filesize = sbuf.st_size; + + text = (char *) malloc(filesize + 1); + if (text == NULL) fail("malloc(text)"); + + if (fread(text, 1, filesize, f) != filesize) fail("fread"); + fclose(f); + + text[filesize] = '\0'; + return text; +} + +hFILE *fin = NULL; +hFILE *fout = NULL; + +void reopen(const char *infname, const char *outfname) +{ + if (fin) { if (hclose(fin) != 0) fail("hclose(input)"); } + if (fout) { if (hclose(fout) != 0) fail("hclose(output)"); } + + fin = hopen(infname, "r"); + if (fin == NULL) fail("hopen(\"%s\")", infname); + + fout = hopen(outfname, "w"); + if (fout == NULL) fail("hopen(\"%s\")", outfname); +} + +int main(void) +{ + static const int size[] = { 1, 13, 403, 999, 30000 }; + + char buffer[40000]; + char *original; + int c, i; + ssize_t n; + off_t off; + + reopen("vcf.c", "test/hfile1.tmp"); + while ((c = hgetc(fin)) != EOF) { + if (hputc(c, fout) == EOF) fail("hputc"); + } + if (herrno(fin)) { errno = herrno(fin); fail("hgetc"); } + + reopen("test/hfile1.tmp", "test/hfile2.tmp"); + if (hpeek(fin, buffer, 50) < 0) fail("hpeek"); + while ((n = hread(fin, buffer, 17)) > 0) { + if (hwrite(fout, buffer, n) != n) fail("hwrite"); + } + if (n < 0) fail("hread"); + + reopen("test/hfile2.tmp", "test/hfile3.tmp"); + while ((n = hread(fin, buffer, sizeof buffer)) > 0) { + if (hwrite(fout, buffer, n) != n) fail("hwrite"); + if (hpeek(fin, buffer, 700) < 0) fail("hpeek"); + } + if (n < 0) fail("hread"); + + reopen("test/hfile3.tmp", "test/hfile4.tmp"); + i = 0; + off = 0; + while ((n = hread(fin, buffer, size[i++ % 5])) > 0) { + off += n; + buffer[n] = '\0'; + check_offset(fin, off, "pre-peek"); + if (hputs(buffer, fout) == EOF) fail("hputs"); + if ((n = hpeek(fin, buffer, size[(i+3) % 5])) < 0) fail("hpeek"); + check_offset(fin, off, "post-peek"); + } + if (n < 0) fail("hread"); + + reopen("test/hfile4.tmp", "test/hfile5.tmp"); + while (hgets(buffer, 80, fin) != NULL) { + size_t l = strlen(buffer); + if (l > 79) fail("hgets read %zu bytes, should be < 80", l); + if (hwrite(fout, buffer, l) != l) fail("hwrite"); + } + if (herrno(fin)) fail("hgets"); + + reopen("test/hfile5.tmp", "test/hfile6.tmp"); + n = hread(fin, buffer, 200); + if (n < 0) fail("hread"); + else if (n != 200) fail("hread only got %d", (int)n); + if (hwrite(fout, buffer, 1000) != 1000) fail("hwrite"); + check_offset(fin, 200, "input/first200"); + check_offset(fout, 1000, "output/first200"); + + if (hseek(fin, 800, SEEK_CUR) < 0) fail("hseek/cur"); + check_offset(fin, 1000, "input/seek"); + for (off = 1000; (n = hread(fin, buffer, sizeof buffer)) > 0; off += n) + if (hwrite(fout, buffer, n) != n) fail("hwrite"); + if (n < 0) fail("hread"); + check_offset(fin, off, "input/eof"); + check_offset(fout, off, "output/eof"); + + if (hseek(fin, 200, SEEK_SET) < 0) fail("hseek/set"); + if (hseek(fout, 200, SEEK_SET) < 0) fail("hseek(output)"); + check_offset(fin, 200, "input/backto200"); + check_offset(fout, 200, "output/backto200"); + n = hread(fin, buffer, 800); + if (n < 0) fail("hread"); + else if (n != 800) fail("hread only got %d", (int)n); + if (hwrite(fout, buffer, 800) != 800) fail("hwrite"); + check_offset(fin, 1000, "input/wrote800"); + check_offset(fout, 1000, "output/wrote800"); + + if (hflush(fout) == EOF) fail("hflush"); + + original = slurp("vcf.c"); + for (i = 1; i <= 6; i++) { + char *text; + snprintf(buffer, sizeof(buffer), "test/hfile%d.tmp", i); + text = slurp(buffer); + if (strcmp(original, text) != 0) { + fprintf(stderr, "%s differs from vcf.c\n", buffer); + free(text); + free(original); + return EXIT_FAILURE; + } + free(text); + } + free(original); + + if (hclose(fin) != 0) fail("hclose(input)"); + if (hclose(fout) != 0) fail("hclose(output)"); + + fout = hopen("test/hfile_chars.tmp", "w"); + if (fout == NULL) fail("hopen(\"test/hfile_chars.tmp\")"); + for (i = 0; i < 256; i++) + if (hputc(i, fout) != i) fail("chars: hputc (%d)", i); + if (hclose(fout) != 0) fail("hclose(test/hfile_chars.tmp)"); + + fin = hopen("test/hfile_chars.tmp", "r"); + if (fin == NULL) fail("hopen(\"test/hfile_chars.tmp\") for reading"); + for (i = 0; i < 256; i++) + if ((c = hgetc(fin)) != i) + fail("chars: hgetc (%d = 0x%x) returned %d = 0x%x", i, i, c, c); + if ((c = hgetc(fin)) != EOF) fail("chars: hgetc (EOF) returned %d", c); + if (hclose(fin) != 0) fail("hclose(test/hfile_chars.tmp) for reading"); + + fin = hopen("preload:test/hfile_chars.tmp", "r"); + if (fin == NULL) fail("preloading \"test/hfile_chars.tmp\" for reading"); + for (i = 0; i < 256; i++) + if ((c = hgetc(fin)) != i) + fail("preloading chars: hgetc (%d = 0x%x) returned %d = 0x%x", i, i, c, c); + if ((c = hgetc(fin)) != EOF) fail("preloading chars: hgetc (EOF) returned %d", c); + if (hclose(fin) != 0) fail("preloading hclose(test/hfile_chars.tmp) for reading"); + + char* test_string = strdup("Test string"); + fin = hopen("mem:", "r:", test_string, 12); + if (fin == NULL) fail("hopen(\"mem:\", \"r:\", ...)"); + if (hread(fin, buffer, 12) != 12) + fail("hopen('mem:', 'r') failed read"); + if(strcmp(buffer, test_string) != 0) + fail("hopen('mem:', 'r') missread '%s' != '%s'", buffer, test_string); + char* internal_buf; + size_t interval_buf_len; + if((internal_buf = hfile_mem_get_buffer(fin, &interval_buf_len)) == NULL){ + fail("hopen('mem:', 'r') failed to get internal buffer"); + } + if (hclose(fin) != 0) fail("hclose mem for reading"); + + test_string = strdup("Test string"); + fin = hopen("mem:", "wr:", test_string, 12); + if (fin == NULL) fail("hopen(\"mem:\", \"w:\", ...)"); + if (hseek(fin, -1, SEEK_END) < 0) + fail("hopen('mem:', 'wr') failed seek"); + if (hwrite(fin, " extra", 7) != 7) + fail("hopen('mem:', 'wr') failed write"); + if (hseek(fin, 0, SEEK_SET) < 0) + fail("hopen('mem:', 'wr') failed seek"); + if (hread(fin, buffer, 18) != 18) + fail("hopen('mem:', 'wr') failed read"); + if (strcmp(buffer, "Test string extra") != 0) + fail("hopen('mem:', 'wr') misswrote '%s' != '%s'", buffer, "Test string extra"); + if((internal_buf = hfile_mem_steal_buffer(fin, &interval_buf_len)) == NULL){ + fail("hopen('mem:', 'wr') failed to get internal buffer"); + } + free(internal_buf); + if (hclose(fin) != 0) fail("hclose mem for writing"); + + fin = hopen("data:,hello, world!%0A", "r"); + if (fin == NULL) fail("hopen(\"data:...\")"); + n = hread(fin, buffer, 300); + if (n < 0) fail("hread"); + buffer[n] = '\0'; + if (strcmp(buffer, "hello, world!\x0A") != 0) fail("hread result"); + if (hclose(fin) != 0) fail("hclose(\"data:...\")"); + + fin = hopen("test/emptyfile", "r"); + if (fin == NULL) fail("hopen(\"test/emptyfile\") for reading"); + if (hread(fin, buffer, 100) != 0) fail("test/emptyfile is non-empty"); + if (hclose(fin) != 0) fail("hclose(\"test/emptyfile\") for reading"); + + fin = hopen("data:,", "r"); + if (fin == NULL) fail("hopen(\"data:\") for reading"); + if (hread(fin, buffer, 100) != 0) fail("empty data: URL is non-empty"); + if (hclose(fin) != 0) fail("hclose(\"data:\") for reading"); + + fin = hopen("data:;base64," +// Wikipedia's example quote from Thomas Hobbes' Leviathan +"TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlz" +"IHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2Yg" +"dGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGlu" +"dWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRo" +"ZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4=", "r"); + if (fin == NULL) fail("hopen(\"data:;base64,...\")"); + n = hread(fin, buffer, 300); + if (n < 0) fail("hread for base64"); + buffer[n] = '\0'; + if (strcmp(buffer, "Man is distinguished, not only by his reason, but by " +"this singular passion from other animals, which is a lust of the mind, that " +"by a perseverance of delight in the continued and indefatigable generation " +"of knowledge, exceeds the short vehemence of any carnal pleasure.") != 0) + fail("hread result for base64"); + if (hclose(fin) != 0) fail("hclose(\"data:;base64,...\")"); + + kstring_t kstr = { 0, 0, NULL }; + + if (strcmp(haddextension(&kstr, "foo/bar.bam", 0, ".bai"), + "foo/bar.bam.bai") != 0) fail("haddextension foo/bar.bam[.bai]"); + if (strcmp(haddextension(&kstr, "foo/bar.bam", 1, ".bai"), + "foo/bar.bai") != 0) fail("haddextension foo/bar[.bai]"); + if (strcmp(haddextension(&kstr, "foo.bar/baz", 1, ".bai"), + "foo.bar/baz.bai") != 0) fail("haddextension foo.bar/baz[.bai]"); + if (strcmp(haddextension(&kstr, "foo#bar.bam", 0, ".bai"), + "foo#bar.bam.bai") != 0) fail("haddextension foo#bar.bam[.bai]"); + if (strcmp(haddextension(&kstr, ".bam", 1, ".bai"), + ".bai") != 0) fail("haddextension [.bai]"); + if (strcmp(haddextension(&kstr, "foo", 1, ".csi"), + "foo.csi") != 0) fail("haddextension foo[.csi]"); + + if (strcmp(haddextension(&kstr, "http://host/bar.cram?a&b&c", 0, ".crai"), + "http://host/bar.cram.crai?a&b&c") != 0) + fail("haddextension http://host/bar.cram[.crai]?a&b&c"); + + if (strcmp(haddextension(&kstr, "http://host/bar.cram#frag", 1, ".crai"), + "http://host/bar.crai#frag") != 0) + fail("haddextension http://host/bar[.crai]#frag"); + + free(ks_release(&kstr)); + + return EXIT_SUCCESS; +} diff --git a/src/htslib-1.21/test/hts_endian.c b/src/htslib-1.21/test/hts_endian.c new file mode 100644 index 0000000..1b8e970 --- /dev/null +++ b/src/htslib-1.21/test/hts_endian.c @@ -0,0 +1,513 @@ +/* test/hts_endian.c -- hts_endian.h unit tests + + Copyright (C) 2017 Genome Research Ltd. + + Author: Rob Davies +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include + +#include +#include +#include +#include +#include + +#include "../htslib/hts_endian.h" + +typedef struct { + uint8_t u8[2]; + uint8_t u8_unaligned[3]; + int16_t i16; + uint16_t u16; +} Test16; + +typedef struct { + uint8_t u8[4]; + uint8_t u8_unaligned[5]; + int32_t i32; + uint32_t u32; +} Test32; + +typedef struct { + uint8_t u8[8]; + uint8_t u8_unaligned[9]; + int64_t i64; + uint64_t u64; +} Test64; + +typedef struct { + uint8_t u8[4]; + uint8_t u8_unaligned[5]; + float f; +} Test_float; + +typedef struct { + uint8_t u8[8]; + uint8_t u8_unaligned[9]; + double d; +} Test_double; + +#define T16(b0, b1, sgn, unsgn) { { b0, b1 }, { 0x00, b0, b1 }, sgn, unsgn } + +Test16 tests_16_bit[] = { + T16(0x00, 0x00, 0, 0), + T16(0x01, 0x00, 1, 1), + T16(0x00, 0x01, 256, 256), + T16(0xff, 0x7f, 32767, 32767), + T16(0x00, 0x80, -32768, 32768), + T16(0xff, 0xff, -1, 65535), +}; + +#define T32(b0, b1, b2, b3, sgn, unsgn) { \ + { b0, b1, b2, b3 }, \ + { 0x00, b0, b1, b2, b3 }, \ + sgn, unsgn \ +} + +Test32 tests_32_bit[] = { + T32(0x00, 0x00, 0x00, 0x00, 0, 0), + T32(0x01, 0x00, 0x00, 0x00, 1, 1), + T32(0x00, 0x01, 0x00, 0x00, 256, 256), + T32(0x00, 0x00, 0x01, 0x00, 65536, 65536), + T32(0xff, 0xff, 0xff, 0x7f, 2147483647, 2147483647), + // Odd coding of signed result below avoids a compiler warning + // as 2147483648 can't fit in a signed 32-bit number + T32(0x00, 0x00, 0x00, 0x80, -2147483647 - 1, 2147483648U), + T32(0xff, 0xff, 0xff, 0xff, -1, 4294967295U), +}; + +#define T64(b0, b1, b2, b3, b4, b5, b6, b7, sgn, unsgn) { \ + { b0, b1, b2, b3, b4, b5, b6, b7 }, \ + { 0x00, b0, b1, b2, b3, b4, b5, b6, b7 }, \ + sgn, unsgn \ +} + + +Test64 tests_64_bit[] = { + T64(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0, 0), + T64(0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 1, 1), + T64(0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 256, 256), + T64(0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 65536, 65536), + T64(0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 4294967296LL, 4294967296ULL), + T64(0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, + 9223372036854775807LL, 9223372036854775807ULL), + // Odd coding of signed result below avoids a compiler warning + // as 9223372036854775808 can't fit in a signed 64-bit number + T64(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, + -9223372036854775807LL - 1LL, 9223372036854775808ULL), + T64(0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + -1, 18446744073709551615ULL), +}; + +#define TF(b0, b1, b2, b3, f) { { b0, b1, b2, b3 }, { 0x00, b0, b1, b2, b3}, f } + +Test_float tests_float[] = { + TF(0x00, 0x00, 0x00, 0x00, 0.0f), + TF(0x00, 0x00, 0x80, 0x3f, 1.0f), + TF(0x00, 0x00, 0x80, 0xbf, -1.0f), + TF(0x00, 0x00, 0x20, 0x41, 10.0f), + TF(0xd0, 0x0f, 0x49, 0x40, 3.14159f), + TF(0xa8, 0x0a, 0xff, 0x66, 6.022e23f), + TF(0xcd, 0x84, 0x03, 0x13, 1.66e-27f), +}; + +#define TD(b0, b1, b2, b3, b4, b5, b6, b7, d) { \ + { b0, b1, b2, b3, b4, b5, b6, b7 }, \ + { 0x00, b0, b1, b2, b3, b4, b5, b6, b7 }, \ + d \ +} + +Test_double tests_double[] = { + TD(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0.0), + TD(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, 1.0), + TD(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0xbf, -1.0), + TD(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 10.0), + TD(0x18, 0x2d, 0x44, 0x54, 0xfb, 0x21, 0x09, 0x40, 3.141592653589793), + TD(0x2b, 0x08, 0x0c, 0xd3, 0x85, 0xe1, 0xdf, 0x44, 6.022140858e23), + TD(0x55, 0xfa, 0x81, 0x74, 0xf7, 0x71, 0x60, 0x3a, 1.66053904e-27), +}; + +#define NELE(x) (sizeof(x)/sizeof(x[0])) + +static char * to_hex(uint8_t *buf, int len) { + static char str[64]; + int i, o; + + for (i = 0, o = 0; i < len; i++, o += 3) { + snprintf(str + o, sizeof(str) - o, "%02x ", buf[i]); + } + return str; +} + +static int t16_bit(int verbose) { + uint8_t buf[9]; + size_t i; + int errors = 0; + + for (i = 0; i < NELE(tests_16_bit); i++) { + uint16_t u16; + int16_t i16; + + if (verbose) { + fprintf(stderr, "%s %6"PRId16" %6"PRId16"\n", + to_hex(tests_16_bit[i].u8, 2), + tests_16_bit[i].i16, tests_16_bit[i].u16); + } + + u16 = le_to_u16(tests_16_bit[i].u8); + if (u16 != tests_16_bit[i].u16) { + fprintf(stderr, "Failed %s => %"PRIu16"; expected %"PRIu16"\n", + to_hex(tests_16_bit[i].u8, 2), u16, tests_16_bit[i].u16); + errors++; + } + + i16 = le_to_i16(tests_16_bit[i].u8); + if (i16 != tests_16_bit[i].i16) { + fprintf(stderr, "Failed %s => %"PRId16"; expected %"PRId16"\n", + to_hex(tests_16_bit[i].u8, 2), i16, tests_16_bit[i].i16); + errors++; + } + + u16 = le_to_u16(tests_16_bit[i].u8_unaligned + 1); + if (u16 != tests_16_bit[i].u16) { + fprintf(stderr, + "Failed unaligned %s => %"PRIu16"; expected %"PRIu16"\n", + to_hex(tests_16_bit[i].u8_unaligned + 1, 2), + u16, tests_16_bit[i].u16); + errors++; + } + + i16 = le_to_i16(tests_16_bit[i].u8_unaligned + 1); + if (i16 != tests_16_bit[i].i16) { + fprintf(stderr, + "Failed unaligned %s => %"PRId16"; expected %"PRId16"\n", + to_hex(tests_16_bit[i].u8_unaligned + 1, 2), + i16, tests_16_bit[i].i16); + errors++; + } + + u16_to_le(tests_16_bit[i].u16, buf); + if (memcmp(buf, tests_16_bit[i].u8, 2) != 0) { + fprintf(stderr, "Failed %"PRIu16" => %s; expected %s\n", + tests_16_bit[i].u16, + to_hex(buf, 2), to_hex(tests_16_bit[i].u8, 2)); + errors++; + } + + i16_to_le(tests_16_bit[i].i16, buf); + if (memcmp(buf, tests_16_bit[i].u8, 2) != 0) { + fprintf(stderr, "Failed %"PRId16" => %s; expected %s\n", + tests_16_bit[i].i16, + to_hex(buf, 2), to_hex(tests_16_bit[i].u8, 2)); + errors++; + } + + u16_to_le(tests_16_bit[i].u16, buf + 1); + if (memcmp(buf + 1, tests_16_bit[i].u8, 2) != 0) { + fprintf(stderr, "Failed unaligned %"PRIu16" => %s; expected %s\n", + tests_16_bit[i].u16, + to_hex(buf + 1, 2), to_hex(tests_16_bit[i].u8, 2)); + errors++; + } + + i16_to_le(tests_16_bit[i].i16, buf + 1); + if (memcmp(buf + 1, tests_16_bit[i].u8, 2) != 0) { + fprintf(stderr, "Failed unaligned %"PRId16" => %s; expected %s\n", + tests_16_bit[i].i16, + to_hex(buf + 1, 2), to_hex(tests_16_bit[i].u8, 2)); + errors++; + } + } + + return errors; +} + +static int t32_bit(int verbose) { + uint8_t buf[9]; + size_t i; + int errors = 0; + + for (i = 0; i < NELE(tests_32_bit); i++) { + uint32_t u32; + int32_t i32; + + if (verbose) { + fprintf(stderr, "%s %11"PRId32" %11"PRIu32"\n", + to_hex(tests_32_bit[i].u8, 4), + tests_32_bit[i].i32, tests_32_bit[i].u32); + } + + u32 = le_to_u32(tests_32_bit[i].u8); + if (u32 != tests_32_bit[i].u32) { + fprintf(stderr, "Failed %s => %"PRIu32"; expected %"PRIu32"\n", + to_hex(tests_32_bit[i].u8, 4), u32, tests_32_bit[i].u32); + errors++; + } + i32 = le_to_i32(tests_32_bit[i].u8); + if (i32 != tests_32_bit[i].i32) { + fprintf(stderr, "Failed %s => %"PRId32"; expected %"PRId32"\n", + to_hex(tests_32_bit[i].u8, 4), i32, tests_32_bit[i].i32); + errors++; + } + + u32 = le_to_u32(tests_32_bit[i].u8_unaligned + 1); + if (u32 != tests_32_bit[i].u32) { + fprintf(stderr, + "Failed unaligned %s => %"PRIu32"; expected %"PRIu32"\n", + to_hex(tests_32_bit[i].u8_unaligned + 1, 4), + u32, tests_32_bit[i].u32); + errors++; + } + i32 = le_to_i32(tests_32_bit[i].u8_unaligned + 1); + if (i32 != tests_32_bit[i].i32) { + fprintf(stderr, + "Failed unaligned %s => %"PRId32"; expected %"PRId32"\n", + to_hex(tests_32_bit[i].u8_unaligned + 1, 4), + i32, tests_32_bit[i].i32); + errors++; + } + + u32_to_le(tests_32_bit[i].u32, buf); + if (memcmp(buf, tests_32_bit[i].u8, 4) != 0) { + fprintf(stderr, "Failed %"PRIu32" => %s; expected %s\n", + tests_32_bit[i].u32, + to_hex(buf, 4), to_hex(tests_32_bit[i].u8, 4)); + errors++; + } + + i32_to_le(tests_32_bit[i].i32, buf); + if (memcmp(buf, tests_32_bit[i].u8, 4) != 0) { + fprintf(stderr, "Failed %"PRId32" => %s; expected %s\n", + tests_32_bit[i].i32, + to_hex(buf, 4), to_hex(tests_32_bit[i].u8, 4)); + errors++; + } + + u32_to_le(tests_32_bit[i].u32, buf + 1); + if (memcmp(buf + 1, tests_32_bit[i].u8, 4) != 0) { + fprintf(stderr, "Failed unaligned %"PRIu32" => %s; expected %s\n", + tests_32_bit[i].u32, + to_hex(buf + 1, 4), to_hex(tests_32_bit[i].u8, 4)); + errors++; + } + + i32_to_le(tests_32_bit[i].i32, buf + 1); + if (memcmp(buf + 1, tests_32_bit[i].u8, 4) != 0) { + fprintf(stderr, "Failed unaligned %"PRId32" => %s; expected %s\n", + tests_32_bit[i].i32, + to_hex(buf + 1, 4), to_hex(tests_32_bit[i].u8, 4)); + errors++; + } + } + + return errors; +} + +static int t64_bit(int verbose) { + uint8_t buf[9]; + size_t i; + int errors = 0; + + for (i = 0; i < NELE(tests_64_bit); i++) { + uint64_t u64; + int64_t i64; + + if (verbose) { + fprintf(stderr, "%s %20"PRId64" %20"PRIu64"\n", + to_hex(tests_64_bit[i].u8, 8), + tests_64_bit[i].i64, tests_64_bit[i].u64); + } + + u64 = le_to_u64(tests_64_bit[i].u8); + if (u64 != tests_64_bit[i].u64) { + fprintf(stderr, "Failed %s => %"PRIu64"; expected %"PRIu64"\n", + to_hex(tests_64_bit[i].u8, 8), u64, tests_64_bit[i].u64); + errors++; + } + + i64 = le_to_i64(tests_64_bit[i].u8); + if (i64 != tests_64_bit[i].i64) { + fprintf(stderr, "Failed %s => %"PRId64"; expected %"PRId64"\n", + to_hex(tests_64_bit[i].u8, 8), i64, tests_64_bit[i].i64); + errors++; + } + + u64 = le_to_u64(tests_64_bit[i].u8_unaligned + 1); + if (u64 != tests_64_bit[i].u64) { + fprintf(stderr, + "Failed unaligned %s => %"PRIu64"; expected %"PRIu64"\n", + to_hex(tests_64_bit[i].u8_unaligned + 1, 8), + u64, tests_64_bit[i].u64); + errors++; + } + + i64 = le_to_i64(tests_64_bit[i].u8_unaligned + 1); + if (i64 != tests_64_bit[i].i64) { + fprintf(stderr, + "Failed unaligned %s => %"PRId64"; expected %"PRId64"\n", + to_hex(tests_64_bit[i].u8_unaligned + 1, 8), + i64, tests_64_bit[i].i64); + errors++; + } + + u64_to_le(tests_64_bit[i].u64, buf); + if (memcmp(buf, tests_64_bit[i].u8, 8) != 0) { + fprintf(stderr, "Failed %"PRIu64" => %s; expected %s\n", + tests_64_bit[i].u64, + to_hex(buf, 8), to_hex(tests_64_bit[i].u8, 8)); + errors++; + } + + i64_to_le(tests_64_bit[i].i64, buf); + if (memcmp(buf, tests_64_bit[i].u8, 8) != 0) { + fprintf(stderr, "Failed %"PRId64" => %s; expected %s\n", + tests_64_bit[i].i64, + to_hex(buf, 8), to_hex(tests_64_bit[i].u8, 8)); + errors++; + } + + u64_to_le(tests_64_bit[i].u64, buf + 1); + if (memcmp(buf + 1, tests_64_bit[i].u8, 8) != 0) { + fprintf(stderr, "Failed unaligned %"PRIu64" => %s; expected %s\n", + tests_64_bit[i].u64, + to_hex(buf + 1, 8), to_hex(tests_64_bit[i].u8, 8)); + errors++; + } + + i64_to_le(tests_64_bit[i].i64, buf + 1); + if (memcmp(buf + 1, tests_64_bit[i].u8, 8) != 0) { + fprintf(stderr, "Failed unaligned %"PRId64" => %s; expected %s\n", + tests_64_bit[i].i64, + to_hex(buf + 1, 8), to_hex(tests_64_bit[i].u8, 8)); + errors++; + } + } + + return errors; +} + +int t_float(int verbose) { + uint8_t buf[9]; + size_t i; + int errors = 0; + + for (i = 0; i < NELE(tests_float); i++) { + float f; + + if (verbose) { + fprintf(stderr, "%s %g\n", + to_hex(tests_float[i].u8, 4), tests_float[i].f); + } + + f = le_to_float(tests_float[i].u8); + if (f != tests_float[i].f) { + fprintf(stderr, "Failed %s => %g; expected %g\n", + to_hex(tests_float[i].u8, 4), f, tests_float[i].f); + errors++; + } + + f = le_to_float(tests_float[i].u8_unaligned + 1); + if (f != tests_float[i].f) { + fprintf(stderr, "Failed unaligned %s => %g; expected %g\n", + to_hex(tests_float[i].u8_unaligned + 1, 4), + f, tests_float[i].f); + errors++; + } + + float_to_le(tests_float[i].f, buf); + if (memcmp(tests_float[i].u8, buf, 4) != 0) { + fprintf(stderr, "Failed %g => %s; expected %s\n", + tests_float[i].f, + to_hex(buf, 4), to_hex(tests_float[i].u8, 4)); + } + + float_to_le(tests_float[i].f, buf + 1); + if (memcmp(tests_float[i].u8, buf + 1, 4) != 0) { + fprintf(stderr, "Failed unaligned %g => %s; expected %s\n", + tests_float[i].f, + to_hex(buf + 1, 4), to_hex(tests_float[i].u8, 4)); + } + } + return errors; +} + +int t_double(int verbose) { + uint8_t buf[9]; + size_t i; + int errors = 0; + + for (i = 0; i < NELE(tests_double); i++) { + double f; + + if (verbose) { + fprintf(stderr, "%s %.15g\n", + to_hex(tests_double[i].u8, 8), tests_double[i].d); + } + + f = le_to_double(tests_double[i].u8); + if (f != tests_double[i].d) { + fprintf(stderr, "Failed %s => %.15g; expected %.15g\n", + to_hex(tests_double[i].u8, 8), f, tests_double[i].d); + errors++; + } + + f = le_to_double(tests_double[i].u8_unaligned + 1); + if (f != tests_double[i].d) { + fprintf(stderr, "Failed unaligned %s => %.15g; expected %.15g\n", + to_hex(tests_double[i].u8_unaligned + 1, 8), + f, tests_double[i].d); + errors++; + } + + double_to_le(tests_double[i].d, buf); + if (memcmp(tests_double[i].u8, buf, 8) != 0) { + fprintf(stderr, "Failed %.15g => %s; expected %s\n", + tests_double[i].d, + to_hex(buf, 8), to_hex(tests_double[i].u8, 8)); + } + + double_to_le(tests_double[i].d, buf + 1); + if (memcmp(tests_double[i].u8, buf + 1, 8) != 0) { + fprintf(stderr, "Failed unaligned %.15g => %s; expected %s\n", + tests_double[i].d, + to_hex(buf + 1, 8), to_hex(tests_double[i].u8, 8)); + } + } + return errors; +} + +int main(int argc, char **argv) { + int verbose = 0; + int errors = 0; + + if (argc > 1 && strcmp(argv[1], "-v") == 0) verbose = 1; + + errors += t16_bit(verbose); + errors += t32_bit(verbose); + errors += t64_bit(verbose); + errors += t_float(verbose); + errors += t_double(verbose); + if (errors) { + fprintf(stderr, "%d errors\n", errors); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/src/htslib-1.21/test/index.bam.bai b/src/htslib-1.21/test/index.bam.bai new file mode 100644 index 0000000..9d7f43d Binary files /dev/null and b/src/htslib-1.21/test/index.bam.bai differ diff --git a/src/htslib-1.21/test/index.bam.csi b/src/htslib-1.21/test/index.bam.csi new file mode 100644 index 0000000..a19a316 Binary files /dev/null and b/src/htslib-1.21/test/index.bam.csi differ diff --git a/src/htslib-1.21/test/index.bcf.csi b/src/htslib-1.21/test/index.bcf.csi new file mode 100644 index 0000000..13d0e9f Binary files /dev/null and b/src/htslib-1.21/test/index.bcf.csi differ diff --git a/src/htslib-1.21/test/index.cram.crai b/src/htslib-1.21/test/index.cram.crai new file mode 100644 index 0000000..acdfe67 Binary files /dev/null and b/src/htslib-1.21/test/index.cram.crai differ diff --git a/src/htslib-1.21/test/index.sam b/src/htslib-1.21/test/index.sam new file mode 100644 index 0000000..1368988 --- /dev/null +++ b/src/htslib-1.21/test/index.sam @@ -0,0 +1,190 @@ +@HD VN:1.6 SO:coordinate +@SQ SN:CHROMOSOME_I LN:1009800 M5:8ede36131e0dbf3417807e48f77f3ebd +@SQ SN:CHROMOSOME_II LN:5000 M5:8e7993f7a93158587ee897d7287948ec +@SQ SN:CHROMOSOME_III LN:5000 M5:3adcb065e1cf74fafdbba1e8c352b323 +@SQ SN:CHROMOSOME_IV LN:5000 M5:251af66a69ee589c9f3757340ec2de6f +@SQ SN:CHROMOSOME_V LN:5000 M5:cf200a65fb754836dcc56b24b3170ee8 +@SQ SN:CHROMOSOME_X LN:5000 M5:6f9368fd2192c89c613718399d2d31fc +@SQ SN:CHROMOSOME_MtDNA LN:5000 M5:cd05857ece6411f40257a565ccfe15bb +@PG ID:bowtie2 PN:bowtie2 VN:2.0.0-beta5 +SRR065390.17240207 16 CHROMOSOME_I 999901 42 100M * 0 0 ATGTTTACAGGACTTCAAGCAGAGGATTTTTCGATGATTGCCAAAAATTTTGGAACTTTTATAGGCTTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAG CACAC?CBBAA@?@?BADDBBDBBAB>DDDBBDDABBBCCADDDDDCBCBCCCDBDDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.15493040 0 CHROMOSOME_I 999912 42 100M * 0 0 ACTTCAAGCAGAGGATTTTTCGATGATTGCCAAAAATTTTGGAACTTTTATAGGCTTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDBCCBDBCCBDDA@>DC?5@?@@??:><<>8>39<37 AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.6144221 0 CHROMOSOME_I 999914 42 100M * 0 0 TTCAAGCAGAGGATTTTTCGATGATTGCCAAAAATTTTGGAACTTTTATAGGCTTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCDCCCCBDCDDBBDDBDBDD@BBB@DBABDB AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.8057275 0 CHROMOSOME_I 999916 42 100M * 0 0 CAAGCAGAGGATTTTTCGATGATTGCCAAAAATTTTGGAACTTTTATAGGCTTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTT CCCCCCCBCCC@CCCCCCCCCCC>BBB>BB?4CCCCCC;>====ACCCA@CCCBBCCBC;>@==>BBBBA?<;@<@######################## AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.24679913 16 CHROMOSOME_I 999917 42 100M * 0 0 AAGCAGAGGATTTTTCGATGATTGCCAAAAATTTTGGAACTTTTATAGGCTTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTTT ==56>??>AB?>D>?A?DBDABBB=BDBDACDBBCCDBBBBDDCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.25513175 0 CHROMOSOME_I 999934 42 100M * 0 0 ATGATTGCCAAAAATTTTGGAACTTTTATAGGCTTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCBC@CADCDDAABA=B?=A=B.>AA?AADA########################## AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.17492782 0 CHROMOSOME_I 999935 42 100M * 0 0 TGATTGCCAAAAATTTTGGAACTTTTATAGGCTTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCDCDCCDCCBDCDDBDDBDD@BBBBBBACBBAB=AB>BBBAB>?BA@CAAA? AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.17146364 16 CHROMOSOME_I 999942 42 100M * 0 0 CAAAAATTGTGGAACTTTTATAGGCTTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAG #######@/A@@<:BBBBB>ABBDADC@=DDBDDDCDCCCBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-3 XN:i:0 XM:i:1 XO:i:0 XG:i:0 NM:i:1 MD:Z:8T91 YT:Z:UU +SRR065390.14459471 16 CHROMOSOME_I 999944 42 100M * 0 0 AAAATTTTGGAACTTTTATAGGCTTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGC @@@@=B@CCCBAABACCC@DCCCCCDCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.6968616 16 CHROMOSOME_I 999947 42 100M * 0 0 ATTTTGGAACTTTTATAGGCTTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTA BDB>B@DDDD@DDDDBCACB@DCBCCACCCCCCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.9052825 16 CHROMOSOME_I 999952 42 100M * 0 0 GGAACTTTTATAGGCTTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACC ?B;DABDABDDBDDADCCCD@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.22926164 0 CHROMOSOME_I 999967 42 100M * 0 0 TTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBBCCBCCCCCCCCDCCDCDDDDCCDACDCADBDDBBCBCBCCABBA@BABABCBABC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.27108093 16 CHROMOSOME_I 999969 42 100M * 0 0 AAGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTACN ##########AAAAA388333-533')''+AA8AAAAAAAAAA8AAAAAA67788AAAA888887AAA5AAAAAAAAAAAA8AAAAAAAA+*++)))))! AS:i:-1 XN:i:0 XM:i:1 XO:i:0 XG:i:0 NM:i:1 MD:Z:99C0 YT:Z:UU +SRR065390.19145675 0 CHROMOSOME_I 999970 42 100M * 0 0 AGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTACCG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCBCCCCCCCCCCADCBDBBCBBBBBDCBABBBABAABB??DDAACCAACC>AC?C?= AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.22660118 16 CHROMOSOME_I 999972 42 100M * 0 0 CTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTACCGCC B9ABABDB>DBBBD8CBDCDBCDBCDBCBCCBCCCCCCCCCCCCCCC>CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.1589310 0 CHROMOSOME_I 999973 42 100M * 0 0 NTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTACCGCCG !++((22221AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA7A8AAAAAAAA8AAAAAAAAAAAAA7A7AA768655 AS:i:-1 XN:i:0 XM:i:1 XO:i:0 XG:i:0 NM:i:1 MD:Z:0T99 YT:Z:UU +SRR065390.32984687 0 CHROMOSOME_I 999978 42 100M * 0 0 GTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTACCGCCGCGCCT CCCCCCCCCCCBCCCCCCCCCCCCCCCCC@CC@CCCBCCCCCCBDACDCC>@B@CDBADB@BCBD@B=BBB@BD>C@BBCBACAABAB;D9<4:<66 AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.28347129 16 CHROMOSOME_I 999978 42 100M * 0 0 GTTATGTTTAGGCGTAGGCTTAGACATACGCTTAGGTTTCGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTACCGCCGCGCCT ##############################################@B?BB@A@ABBBDABD@DDBBB@@B;C@BACBC@CC@CCCCCBCCCCCCCCCCC AS:i:-10 XN:i:0 XM:i:5 XO:i:0 XG:i:0 NM:i:5 MD:Z:23G1T2G2G7A60 YT:Z:UU +SRR065390.17964692 16 CHROMOSOME_I 999984 42 100M * 0 0 TTTGGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTACCGCCGCGCCTGATCAA #####@<@=<53.830;>.?A5@@?ABAAADBDBC<@CB@D@BCB@CBCDCDBBDC=C@C@CAAC@C@ACCCCCCCCCCCCCCCCCCCCC AS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 NM:i:1 MD:Z:3A96 YT:Z:UU +SRR065390.16701032 0 CHROMOSOME_I 999987 42 100M * 0 0 AGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTACCGCCGCGCCTGATCAAACC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCACCCCCCCCCCDCBCCCCCCDCCBAA@BBBBBC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.24060716 16 CHROMOSOME_I 999989 42 100M * 0 0 GCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTACCGCCGCGCCTGATCAAACCAA @8>68BD?B??B@DB>ABB?BA@A=ADBCC@?AA@CCBBCBCCDBCDCCBCBCCC@CCCCBCCCCCCCACCCCCCCCACCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.24907628 16 CHROMOSOME_I 999989 42 100M * 0 0 GCGTAGGCTGAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTACCGCCGCGCCTGATCAAACCAA ################################BDDBB?BB>?>BADABBBDBDBABDBDC;?>9=C?B>CC@CCCCDCCCCCCCCCCCCCCCCCCCCCCC AS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 NM:i:1 MD:Z:9T90 YT:Z:UU +SRR065390.21366278 16 CHROMOSOME_I 999991 42 100M * 0 0 GTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTACCGCCGCGCCTGATCAAACCAAAG ######?9>A09=@?=>BBDBBBB8B>DBCDCCDCBCBCBDCCC@CCCCCCCBCCCCCCC@@CCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.27662957 0 CHROMOSOME_I 999995 42 100M * 0 0 GCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTACCGCCGCGCCTGATCAAACCAAAGAGTA CCCCCCCCCCCCCCCCCCC@ACCCCCCCCCCCCCCADCCCBC?CDDDDAC=BA?@B@DBDB>?>>D?#################### AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.29477959 0 CHROMOSOME_I 999997 42 100M * 0 0 TTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTACCGCCGCGCCTGATCAAACCAAAGAGTAGG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC=AB?DAB@3=@8@=@?@ AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.13030274 16 CHROMOSOME_I 1000208 42 100M * 0 0 TCAATTAAACTGGACTACGACAATTATTGGGTTCAAACATTTGAAAATTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTC 955576>0@BBBBBBDBBD?DABDDDDCD@DCDDCCDCDDCACBACCCCCCBCCCCCCCCCCCCCCBCCCCCCCCCCBBCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.18054898 16 CHROMOSOME_I 1000209 42 100M * 0 0 CAATTAAACTGGACTACGACAATTATTGGGTTCAAACATTTGAAAATTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCC CAC@CAA?BC?D??BCABB8=>@@?#### AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.26866653 16 CHROMOSOME_I 1000217 42 100M * 0 0 CTGGACTACGACAATTATTGGGTTCAAACATTTGAAAATTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACC ###########??????4D;AA?AAD?A>>?CABCBABBBBAA@AD>ADAAC@CCCCBCCBCCC?CCCCCCCCCCBBCCCCCCCCCCCCCCCCCCCCBCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.23714265 0 CHROMOSOME_I 1000218 0 78M2I20M * 0 0 TGGACTACGACAATTATTGGGTTCAAACATTTGAAAATTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGAGATCGGAAGAGCGGTTCAGCAGGAA CCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCADDCCBBBBBDBBBB+=7=0?==>A#################### AS:i:-48 XN:i:0 XM:i:16 XO:i:1 XG:i:2 NM:i:18 MD:Z:75T0T1T0T0G0T2T0T0T3C0T0T0T0T0T1C0 YT:Z:UU +SRR065390.20744360 16 CHROMOSOME_I 1000218 42 100M * 0 0 TGGACTACGACAATTATTGGGTTCAAACATTTGAAAATTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCG #####@ABBBBDBD@BA@DCDBABBBBBDA>@CBBDBBAD=BBDCBACBCCCCCCCBCBCCCCACCCCCCCCCCBBCCCCCCCCCCCCCCCCCCCCBCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.3611567 16 CHROMOSOME_I 1000225 42 100M * 0 0 CGACAATTATTGGGTTCAAACATTTGAAAATTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTT #####@<2@=BBBBAC=DBBB@BBACBBBB=C;BBCCBACC@CCACCCCBCCCCCCBCCCCCCCCCCBBCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.10053218 16 CHROMOSOME_I 1000225 42 100M * 0 0 CGACAATTATTGGGTTCAAACATTTGAAAATTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTT @@=@6AA=AAC?CAC>BB>?A>>CBB@@CBAD>CC;>C@BC>A################################################# AS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 NM:i:4 MD:Z:66A7A14C2A7 YT:Z:UU +SRR065390.21951837 0 CHROMOSOME_I 1000229 42 100M * 0 0 AATTATTGGGTTCAAACATTTGAAAATTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCBCDCACCCCCCBCCB>AACCC@1/?@?CCC@@BABCB=?@@+:A?B###### AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.21381202 0 CHROMOSOME_I 1000232 40 100M * 0 0 TATTGGGTTCAAACATTTGAAAATTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGGTTTTTTTAGTTTTTTCTTTTTTCCCAATTTTTTTGGATA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCDCDC?=8@';4@AA############################################# AS:i:-16 XN:i:0 XM:i:8 XO:i:0 XG:i:0 NM:i:8 MD:Z:60A5G8C6A2G7A0G1A3 YT:Z:UU +SRR065390.22184926 16 CHROMOSOME_I 1000235 42 100M * 0 0 TGGGTTCAAACATTTGAAAATTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCT ??CAACCBAADD?DBB?@>BBB;BABBBBB@>CCCDBCDBACCCCAACACACCACCC@@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.17603173 0 CHROMOSOME_I 1000236 42 100M * 0 0 GGGTTCAAACATTTGAAAATTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTG CCCCCCCCCCCCCCCCCCCCCCCCACCCCCCCCDCCCCCCCCB>CAB@ACCC################################################ AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.17587471 16 CHROMOSOME_I 1000250 42 100M * 0 0 GAAAATTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTT 10?8;;?;AA??:AA@BBBBB?BDDDDDBCDA>@DDDCCCDACCCDDCCDCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.635026 0 CHROMOSOME_I 1000255 42 100M * 0 0 TTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTTCTGA CCCCCCCCCCCCCCBCCCCCCCBD@CCCCB0:>8:=BBBBC6:=7@>?B?B43/+2>@@/@########## AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.33333470 0 CHROMOSOME_I 1000257 42 100M * 0 0 TTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTTTTGTTT CCCCBCCCCCCC?CCC?CCCCDBCADCCCCCA@@:;CCCC?7.)8;>???-3>>;A?3?6;/2;>?A:24775=4B<@@<4)+75:70(4@>::)9,B>BB?BBD:>BADDD=ABBBDDDBD@DBCCCDCCDBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 NM:i:1 MD:Z:0T99 YT:Z:UU +SRR065390.18670433 0 CHROMOSOME_I 1000260 40 100M * 0 0 TGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTTTGAATATCTGGGGATTTTTCGTTTTTTTTTTTTT CCCCCCCCCCCCC>CCCCBBC4A@ACCC8@;5/8;A?A/6,>==AAC6<@################################################## AS:i:-14 XN:i:0 XM:i:7 XO:i:0 XG:i:0 NM:i:7 MD:Z:65A0G11A4C7C1G0A5 YT:Z:UU +SRR065390.5800524 0 CHROMOSOME_I 1000261 42 100M * 0 0 GGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTTCCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTTCTGATTTTTT CCCCCCCCCCCCCCCCCDDC*/,0/??/<<508BAA@@BCBCAC?BAADBCD@@@CBCCBA9CCCACCCCCCCCCDCCCCCC?CCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCBBBCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.1793614 16 CHROMOSOME_I 1000274 42 100M * 0 0 ACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTTCTGATTTTTTTTCAATTGTTTTT A:CAADB=DBDD@CBACC>@CACCCCCCCDCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBBBCCCCCCCCBBBCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.20107270 0 CHROMOSOME_I 1000276 42 100M * 0 0 GTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTTCTGATTTTTTTTCAATTGTTTTTAG CCCCCCCCCCCCCCCCCCCCCCC@@CCCCCCCCCCCCCCCCADDCCCCCDCC?ACACDCCCCC@CCCDCD@BCDCBB3>B@BCCC@@9=3BB?@B@>85; AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.8268806 16 CHROMOSOME_I 1000276 42 100M * 0 0 GTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTTCTGATTTTTTTTCAATTGTTTTTAG ##########D?:BBA>;BBABBAABBBBBDDB>DDDDBDCDDCDCDDCCCDCCCDCCCCDCCCCCCCCCCCCCCCCC@BBCCCCCCCCBBBCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.19264263 16 CHROMOSOME_I 1000280 42 100M * 0 0 CACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTTCTGATTTTTTTTCAATTGTTTTTAGATCC ##BB?>CBABBB?:BBBBABABABB@DBCBBDAABDCCCCCCBCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCBBBCCCCCCCCBBBCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.18391831 0 CHROMOSOME_I 1000283 42 100M * 0 0 TTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTTCTGATTTTTTTTCAATTGTTTTTAGATCCCCC CCCCCCCCCCCCCCCCBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC?ACCCCCCCBCC@CC8BBCCCCCB@>A>CCCDDC@@@DBBBC?:CCDBAC;CDDDDCBDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBBBCCCCCCCC@BBCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.24029537 16 CHROMOSOME_I 1000284 42 100M * 0 0 TTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTTCTGATTTTTTTTCAATTGTTTTTAGATCCCCCA DB>B8BB<9;?>ABDDAADB@DD@C@BBAABBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBBBCCCCCCCCBBBCCCCCCCCCCCCCDCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.28630205 0 CHROMOSOME_I 1000286 42 100M * 0 0 TTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTTCTGATTTTTTTTCAATTGTTTTTAGATCCCCCACG CCCCCCCCCCCCC@BCCCCCCCCCCCCCCCCCDBCCCCCDDBBBCBCDCDB@=?BBBBDBBABBBBBB@@CBBDB>>>A>BCBCCB:;:>=<9:@A#### AS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 NM:i:1 MD:Z:98A1 YT:Z:UU +SRR065390.15799530 0 CHROMOSOME_I 1000295 42 100M * 0 0 TTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTTCTGATTTTTTTTCAATTGTTTTTAGATCCCCCAAGCCTAAGCCT CCCCCCCCCCCCCCCCCCCCCCAACCCCCCCCCACCBCBCCCC?B@CCCCB@93=@B5>BB>>3/77:7:B>CDBDDB@>;B>BBBBDACAAB@D@<9<9<7 AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.22494349 0 CHROMOSOME_I 1000297 42 100M * 0 0 TGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTTCTGATTTTTTTTCAATTGTTTTTAGATCCCCCAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACDCCCCCD@@CCDCDCBBDCDDDBADDDDCD>B;@>DAABBB@>5A>BDBB?6??@D?9@####### AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.12445253 0 CHROMOSOME_I 1000298 42 100M * 0 0 GTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTTCTGATTTTTTTTCAATTGTTTTTAGATCCCCCAAGCCTAAGCATAAC CCCCCCCCCCCCCCCCCCCCACCCCCCCCC@DCCCCCCCC?BACCBC@CBDCCACB?BBBCDC@@;4BCBABDC@B56?B@96=4A>BAB;;5;:@19A;@;;;6?BBBBB3BBB??@@@>@BBB;@AA@9@AA9BABBBAA@@AABAABAB@BB:;??>:?DBAB?BBDDBBABB;ACBDB?BBB@CCCBDD@CD@CCDBCDDDCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.27194079 0 CHROMOSOME_II 2920 42 100M * 0 0 CTAATTTTCAGAGAGACTGAAAGAGTTTAAAAGTTCTACCGACCACATCGAACCTACTCAAGCTAATAGAGTATGGACAATTGTGAACGGAGAGGTTCAA CCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCC=BBBCB?BBBA?BBBDB?>BB=CBCCAACAC;DAB=ACAC?##### AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.21775125 16 CHROMOSOME_II 2934 42 100M * 0 0 AACTGAAAGAGTTTAAAAGTTCTACCGACCACATCGAACCTACTCAAGCTAATAGAGTATGGACAATTGTGAACGGAGAGGTTCAATGGAAGACTCCACC #####ABA?=<<=5=@BBA?=@>:A:7.44?B?8B@@>BBB=@B?ADBBBCBBACBD9CBD?A9?=A?.AABADDABBB@BABDDBACBBCCDCBCCDCCCCDCCCCDCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.3790175 16 CHROMOSOME_II 2944 42 100M * 0 0 GTTTAAAAGTTCTACCGACCACATCGAACCTACTCAAGCTAATAGAGTATGGACAATTGTGAACGGAGAGGTTCAATGGAAGACTCCACCGCGGTAAGTG 8BDD@:=7)/>B>ABBB?BB?>?DB@B:BBB?BBADDC@BDCDDCDBCDCCCBADCCCCCCCBCCCCCCCCCCCCCCCCCCCDCCCCDCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.4091455 0 CHROMOSOME_II 2946 42 100M * 0 0 TTAAAAGTTCTACCGACCACATCGAACCTACTCAAGCTAATAGAGTATGGACAATTGTGAACGGAGAGGTTCAATGGAAGACTCCACCGCGGTAAGTGTG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC?CCCCCCCCCCDCCCCBCCCDACBCDCACC@C@CA@CBAAD=BBAADD06@##### AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.8676436 0 CHROMOSOME_II 2947 42 100M * 0 0 TAAAAGTTCTACCGACCACATCGAACCTACTCAAGCTAATAGAGTATGGACAATTGTGAACGGAGAGGTTCAATGGAAGACTCCCCCGCGGTCCGTGTGC <:>:>/000/:<<:BAB?>8A?A;:A873;3?>?>A>>A8B############################################# AS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 NM:i:4 MD:Z:84A7A0A5T0 YT:Z:UU +SRR065390.28734084 0 CHROMOSOME_II 2948 42 100M * 0 0 AAAAGTTCTACCGACCACATCGAACCTACTCAAGCTAATAGAGTATGGACAATTGTGAACGGAGAGGTTCAATGGAAGACTCCACCGCGGTAAGTGTGTT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCBCBCDCBCCCBBDDDCADABADBBABB:BB=D?B<@B@>CA?CA>BACADAA########### AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.10526869 0 CHROMOSOME_II 2956 40 100M * 0 0 TACCGACCACATCGAACCTACTCAAGCTAATAGAGTATGGACAATTGTGAACGGAGAGGTTCAATGGAAGACGCCACCGCGGGGAGGGGGGTTGTTTTAT CCCCCCCCCCCCCCCCCCCCADCBBDDDDDDDBBB8BA@B>6<:>9=789=0>D>AA<@<8B>1>A9>;@5=@8C:48;*AAA=<>9>9>>:>>AB?D>BBDBCBDBBCCBABBB>@CDCCBCDCAACCCCCACCCCCCCCBCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.10879394 16 CHROMOSOME_V 938 42 100M * 0 0 TATGTTTTTCTTGAAAATGTTATCAACACTGATAATCTGAAAAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCA B;B:B>@B?>@>7BBDABADADBBCBDCCBACBCCBBB@CCCCCBCCACACCCCCC>CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.1520161 0 CHROMOSOME_V 941 42 100M * 0 0 GTTTTTCTTGAAAATGTTATCAACACTGATAATCTGAAAAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATG CCCCCCCBBBCCCCCCCCCCCCCCCCCCCCACCCCCCDCC@CCCCCCCCCCCCCCCCCCACCCBCCAD=D@BC?C?C?C##################### AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.17468019 16 CHROMOSOME_V 943 42 100M * 0 0 TTTTCTTGAAAATGTTATCAACACTGATAATCTGAAAAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAA >ABBBABBDDDB=DBCD?DDBDBDADDADDBDCCCCCCC=CCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.12403970 0 CHROMOSOME_V 949 42 100M * 0 0 TGAAAATGTTATCAACACTGATAATCTGAAAAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCC@CBBCACBC@?144:>><@@DAB?:=9@<>/>9?;=927= AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.16193993 16 CHROMOSOME_V 949 42 100M * 0 0 TGAAAATGTTATCAACACTGATAATCTGAAAAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTC ;;/67AAC@ADCCDBCDCCCCCCD@CCCCCCCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCDC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.30032741 16 CHROMOSOME_V 950 42 100M * 0 0 GAAAATGTTATCAACACTGATAATCTGAAAAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCA AACBBAB?BB>BABBCDBBDABDBADDDDBDDBBADDDDBACCDCBDDDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.32455256 0 CHROMOSOME_V 956 42 100M * 0 0 GTTATCAACACTGATAATCTGAAAAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGGTATAATACAGCGACTCAATGAAAAAATCAAAAAAA CCCCACCCCCBB=?ABB?BBA?BAABBBBBB@BBABBBBBBBBBBBBBAA@BBBBBBB>B######################################## AS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 NM:i:2 MD:Z:64T25C9 YT:Z:UU +SRR065390.15571530 16 CHROMOSOME_V 966 42 100M * 0 0 CTGATAATCTGAAAAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTT B;:B;B?D?@?BBBB5-=<:@@AA@BBA>BBADBBDDDDCDCDCDBBDCCCDCCCCCCCCCDCCCCCDCCCCCCCCCCCACCBBBCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.9595122 0 CHROMOSOME_V 967 42 100M * 0 0 TGATAATCTGAAAAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCC?@:AAA>C@CBB@@>?B=A?BBBBBCB>@/@>=>=>BB# AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.3600239 16 CHROMOSOME_V 969 42 100M * 0 0 ATAATCTGAAAAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTGCA DB>DBBBBA@AAB?DCA@CB@ABB@BB=AAAA>@==>>6/>:>5:688/85A?AAA>>657==BBB<;;;9>>8>>BBBB> AS:i:-6 XN:i:0 XM:i:2 XO:i:0 XG:i:0 NM:i:2 MD:Z:28T25T45 YT:Z:UU +SRR065390.31266674 0 CHROMOSOME_V 971 42 100M * 0 0 AATCTGAAAAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTTACTTTGCACG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCADDCCBC@CBC5<5<7?:83;+471/0<4=8;??BBD(.94;9?@?################ AS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 NM:i:1 MD:Z:89G10 YT:Z:UU +SRR065390.23187971 16 CHROMOSOME_V 972 42 100M * 0 0 ATCTGAAAAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTGCACGC 647:0BBB?B==@?@@BDBDBBBDDDBDDBDBDDDCBCCCCBBCCCCCDCCBCCCCCCCCCCCBBBCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.28661392 16 CHROMOSOME_V 975 42 100M * 0 0 TGAAAAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTGCACGCTAT ACCACAA5BDABAA>BDBDBDCBCBA@DBDB>DBBBBBAABDBDBDDBCCCCDCCCCCDCCCCCCCCCCCCCCBBBCCCCCCCCCCDCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.1859967 0 CHROMOSOME_V 979 42 100M * 0 0 AAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGCAAAACTCAAAAAAAAGTTGACTTTGCACGCTATGGTT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCA>=>C<@@;:@A@A=53@?AB::?@CCACC=B/<;53;7BB:>B=::=A@?@?ACCC>C@CCCCCB:/&-7735@B7B>B?;@@CC@35A@@CCBC@######################### AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.12435485 16 CHROMOSOME_V 981 42 100M * 0 0 ATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTGCACGCTATGGTTAA B<=BB>B@>>BBBD@>?DABBBBBDDDDDDDDCADCDCCDCCCDCDBCCCCCDCCCCCCCCCCCCCCBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.7485987 0 CHROMOSOME_V 983 42 100M * 0 0 TATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTGCACGCTATGGGTAAAA C@?C@CCCCCCCCCCCC@CCCCC@?C8CCC@BC?@CC############################################################### AS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 NM:i:1 MD:Z:94T5 YT:Z:UU +SRR065390.17264189 0 CHROMOSOME_V 983 42 100M * 0 0 TATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTGCACCCTATGGTTAAAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC=C=/////=?5=;:@8???AA############################ AS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 NM:i:1 MD:Z:87G12 YT:Z:UU +SRR065390.6356855 0 CHROMOSOME_V 986 42 100M * 0 0 AATTTAAAACTTAAACGAAGCTAAAATGTGGCTGGTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTGCACGCTATGGTTAAAAAAA CCBCCBCCCCCCCCCC@CC?@CCCCC@CCC>A=@.88/45+()/.=>2==BBCB659?9?'))10;9??############################### AS:i:-7 XN:i:0 XM:i:3 XO:i:0 XG:i:0 NM:i:3 MD:Z:34T63T0G0 YT:Z:UU +SRR065390.20107175 0 CHROMOSOME_V 989 42 100M * 0 0 TTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTGCACGCTATGGTTAAAAATGAAT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@C@?./..):3872A=@=A<=:;=B>B>>87777@>&@9A@@@8:@>88 AS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 NM:i:1 MD:Z:87T12 YT:Z:UU +SRR065390.6431660 16 CHROMOSOME_V 994 42 100M * 0 0 ACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTGCACGCTATGGTTAAAAATGAATGAATT AACBABABDC@@ADABBDDCDCDBCDDDCCDCDBCACCCBCCDCCCCCCCCBCC@@@CCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.4439503 16 CHROMOSOME_V 997 42 100M * 0 0 TAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTGCACGCTATGGTTAAAAATGAATGAATTCTT ###########B>:AAAAA@C@=;937<ACCC8@@@AABCC>@+/662BBBC?B>BBB?BBBB#################### AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.11492188 0 CHROMOSOME_V 998 42 100M * 0 0 AAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTGCACGCTATGGTTAAAAATGAATGAATTCTTG CCCCCCCCCCCCCCCCDCCCCC>A@AAAAAACA??B@@BBD>BACACC08;;AAACB==/*/1//:=@99BBABA@;<@;<:9>>B??>B??:?6B??B9 AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.9605367 0 CHROMOSOME_V 999 42 100M * 0 0 AACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTGCACGCTATGGTTAAAAATGAATGAATTCTTGG CCCCCCCCCCCCCCCDCCCCC@C<>>A9<4=9>=B###################### AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.29302896 0 CHROMOSOME_V 1000 42 100M * 0 0 ACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTGCACGCTATGGTTAAAAATGAATGAATTCTTGGT CCCCCCCCCCCCCCDCCCCC6?:??AABCCCC8?C@BCCCC@@5;><9>>>B>>AB=<)6=4:):9>>@@################ AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.13754 4 * 0 0 * * 0 0 TCGCTGCTGTGATGTTGCGTTTTTATCAGCACAAAGGCGGTCAGGCCGAGGCCTATTTTTTCCGGATCCAGCAGGGCGACTTTGCCGATAAGGATACCGT CCCCCCCCCCCCCCCCCCCCCCCCCBBCCCCCCC@CCCCCDCCCCCCCDCCCC@ACCCC@>>CCD?>>>@@@ YT:Z:UU +SRR065390.13765 4 * 0 0 * * 0 0 CGTGGTCGTGCCGGTTACAAGCCTGCCGTGAAAAGCCGTTTCAGTAAGTCAGCCAATAGCAAATTCTCCCATACTATCGCTTTTGCCTGATCCTGAACTT CCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCDCCACCCCCCCC@CCCCCCCCCACCCBB@?CBDABDDADB<=ABBB@B@BB@ YT:Z:UU +SRR065390.13778 4 * 0 0 * * 0 0 TTTTATACCAACAAAAAACGGAAAGCAGATAACCCAGCAGCCCGAGTAACAGTATCCGGGCATCCAGGCCAAAAGCTAACAGAGCCGCGATAAAATCCCA CCCCCCCCCCCCCCBBBBBCCCCCCCBCCCCCCCCCBCCCCCCCCCDCCCCCACCCCCCCCCDCCCCDCBCA@AC>@=@CC?B>CBBCC>=?8A8=?>66 YT:Z:UU +SRR065390.13779 4 * 0 0 * * 0 0 ATAATGGACAACTTTAATGGCAATCACTAAATCAACTCCGGCACCATTAACCGGTGGGACGTTATGGTGCGTCACTATTGCATTGTCATTAGCGACATTT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCC=CC?BCCBCCACBABCCDCDBDADBBCDCBD>DBBD==BAA:>5<> YT:Z:UU +SRR065390.13802 4 * 0 0 * * 0 0 AAGGCGTTTATTATATACACTCGCATGGCTTTTCTTCTGAAAATGTAGAATAATTGAGTAATTTTTAAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAG ??B:??????BBB>B99;;;>B>>>:BBBBB;;B=70///0-/01/BA>BABB>B>BBB@BB>>ABABA>BA>6BB88183,<8;<4>:@><>A>> YT:Z:UU +SRR065390.13808 4 * 0 0 * * 0 0 GTTTGCTGACTGGCCAGCCAGCTCAAGGCATCAAAAGCGTCTTTGAGAGGGAAAGGAATATCGATAACCCGAATGCCGGGTGGCGCCGGAATCTCTTGCG CCCCBCCCBCCC@CCC@CCCAA:A=BB?BBBB@>B>A#################### YT:Z:UU +SRR065390.13853 4 * 0 0 * * 0 0 CTGGTACGTCACCACACGCCGCGATGGCGTCATCCACCGACTTCACCCACGTTACGCGATCGTCCGTACCCGGGTGACCGTTGGGGATAATATTTTTGCG #################################################################################################### YT:Z:UU +SRR065390.13861 4 * 0 0 * * 0 0 TTCAGAAACTGGATGAACAGTGCGCAGCCATCTGCAAATATGAATTAGTTCAAGTCACTCAAAAGCTATTTATTTGAATGGAAGAAATTTTTGAACTATA CCCCCCCCCCCBCCB@@CCCBCCBCCCCCCCCCCCCC?CCCC@CCC@C@CCC@CACCBCC?BBBC@C7CBCBCB@@ABCCBBBC=BABCCBBBBAB@@CA YT:Z:UU +SRR065390.13907 4 * 0 0 * * 0 0 CATTACCATTCAGTTGTATTGTTTGCGCACCAGAAAAATGAGACTGCACAGAATAAATTATACTGACCAGAAATTGTAAAATTCGTATATTCTTATTCAT 8998;9:;9;>9:9>?BABBAAA2A@@@@@>:3'3A################################################################ YT:Z:UU +SRR065390.13946 4 * 0 0 * * 0 0 TTTCCTCGAGTTCTTGATGAAATGGTCCATTATTTGTCAACCATTTATTTTTCCATATTTTTTCCAGGTAAGGCATGAATTCTGCAAGTTCCGGCAAAGA CCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCBBBBCACCCCCCB@BCCCCBC?CCCCACCBCBCCCCC@BBDCDDBCA4@@A YT:Z:UU +SRR065390.13956 4 * 0 0 * * 0 0 CGGCGCAACAATACTCAGCAGTTAATTGCAAAGGTATCGCACACCATTAAAAGCATTAAGCCGGGAGTCGAATTTGGTGTTAGCCCGGCAGGCGTGTGGC CCCCCCCCCCCBCCCCCCCCCDCCCCCCCCCCCC>CCCCCCCCCCCDCCCCBDCCDDCBDCC@?@BA@B@B>BBABAABB6?BB>B@?B??2?=+>->60 YT:Z:UU +SRR065390.13964 4 * 0 0 * * 0 0 NTTGAGGTGCTCCAGTGGCTTCTGTTTCTATCAGCTGTCCCTCCTGTTCAGCTACTGACGGGGGGGTGCGCAACGGCAAAAGCACCGCCGGGCATCAGCG !))))++++*AAAAA8AAAA################################################################################ YT:Z:UU +SRR065390.13969 4 * 0 0 * * 0 0 CGGGCGATAGTCAAAAACTTATTTTCACAATTTTCGGCTAGGGAGTATATTTACAGTTAATTTGCGATGTGTTAGATCGGAAGAGCGGTTCAGCAGGAAT CCCCCCCCCCCCCCCCCCCCCCCCBBCCCCCCCCCCCCCCCCCCCACCCCCCCCCCDCCCACCCCCDCCACBCCCCCCBCBBCDCBCC?BCBBCBCBC;A YT:Z:UU +SRR065390.13978 4 * 0 0 * * 0 0 AGACGGTAACTTTCAATTTGCACCCATGATTAAATTTTATGTTGATTAAAATAGAAGCAAAAATCATTACATTACACTACAAAATACGCCGAAATGTTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBACCCDCBCCCABCADCCAABC? YT:Z:UU +SRR065390.13985 4 * 0 0 * * 0 0 TAACCAAAAACTGGATTATGCAAATAACTAAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATATCGTATGCCGTCTTCTGCTTGAAAAAAAAA CCCCCCCCCCCCCC?CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCBCCDCDCCD@CBD5>@=:=><9A@3>=B?BB>CBACB?BBA YT:Z:UU +SRR065390.14000 4 * 0 0 * * 0 0 TAGGTGAGAAAAGCGTTATTGGTCCGGTATACCTGCGAAGCGACAAAGCAATAAGGCAACAATGGCAGGTAATGCTGCTCAAAAAAGCGTTTACTGATCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCBADCCCB?@B>B@BADAAABBD@C5;B9?:?;ACABAB YT:Z:UU +SRR065390.14032 4 * 0 0 * * 0 0 GAAGGTCCAAGTGCCTTGAAGATAGAAAATTATAGCATTTCTCTTTAATTTCAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATATCGCATGA CCCACCCCCC>AAAACBCCCCCCCBCC?CCCCCCCCCCCCCCCCCCCCCCCB@CCCCCCBAC@CDCA/@B<;8=?@B>BC>?>?BB=:A########### YT:Z:UU +SRR065390.14061 4 * 0 0 * * 0 0 TGAAGCCGACAATTTGAGGCCAAACATCTTACATTCGACAGTAAATATTTGGGGATTAAGACTTATGTTAGATCGGAAGAGCGGTTCAGCAGGAATGCCG CCCCCCCCCCCCCCCCCCCCCCCCCCC=CCCCCCCCCCADCBCCCC=CCCCCCCBCCC=CBCCCCCCCABCCCCCCBACBC@CCBB;@B;?A@A@=?99A YT:Z:UU +SRR065390.14072 4 * 0 0 * * 0 0 TGAGTGAGGCTCAGGATTTTGAGTGAGGCTCAGGATTATGAGTGAGGTTGAAGAATTTGAGAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGAG @B=@@BB@B@<@BB>BB>>@BB@==2;:;8BBBBBB@B@@@:@?1B@B@B@3@@@>3;@;@<@?>;@B@@##### YT:Z:UU +SRR065390.14100 4 * 0 0 * * 0 0 AAGCCTGAGGGATAATTTTCGTCAAATTAAGGCAATTGCCGAGTGTTTCATCCCTGGCAAGCAGAACGGCTTTTTCGTTATTTATATCGGGAGAATTTAT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCBCDCDCDCCCCDBBCDADBBBABDDBBBBBBBBDCBCD?BABB>B>AB>BCABAA>C YT:Z:UU +SRR065390.14105 4 * 0 0 * * 0 0 AAATTGTCCCCAAATAAAACAATTCCAGTGATCTTCCGATTCTAGGTGCCAAATAACCCAAATAGTCACTGCATTAGTTTTTATCTCACTTTTCTCCCCC #################################################################################################### YT:Z:UU +SRR065390.14107 4 * 0 0 * * 0 0 TGAAATTTCAAGAAAAATGTTAATTACCACCGTATTAAAAAAAAAAAACTTAAAATCAAAGATCGGAAAAGGGGTCAGGCAGGAATGCCAAAACCGACAC CCCCBCCCCCCCBCB>>>ACCCBCACCC?CCCCCCCCCCC?B########################################################## YT:Z:UU +SRR065390.14137 4 * 0 0 * * 0 0 CTGTGGCGTTTTTATCAAATTGGCAGAGCCACGTTCAGAGCTGAAAAAGCCACAGAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTTGGA BCCC@CCCCC?CCC>>CCBCCC?>C@CCCB=6?AA>=>3?>@?@86;86.@A@==378::68829>B9B############################### YT:Z:UU +SRR065390.14141 4 * 0 0 * * 0 0 GGTCACCAATCATAAGAGGAACAGCGACTGCACCTGCGTACATGACAAGGACGTGTTGCAGACCGAGTATGATCAGCTTTCCTGGTGATAGTATGCGCTC AAA@A?AA8:>A######################################################################################## YT:Z:UU +SRR065390.14162 4 * 0 0 * * 0 0 ATACTTCACCGGATGGTGGAATTAACGAAAACAACAACTGGTGTCACATCCCGCAGGCAAAAGAGGCAGCGGCTAACTAAGCGGCCTGCTGACTTTCTCG CCCCCCCCCCCCCCCC@CCCCCCCCCCCCCCCCBBCCCCCCDCDCCCCCCCCCC?C?CCCCCACD@CAD@AB<>@CB;6B#################### YT:Z:UU +SRR065390.14168 4 * 0 0 * * 0 0 TCGAGGGTGAGGGCGTCTGCCAGATCGGAAGAGCGGGTCAGCAGGAATGCCGAGACCGATATCGGATGCCGTCTCCTGCTGGACAAAAAATGAGAATGGG AACC@0@>@6:<>??>?BBBBB?+B6BBB>B?B=:?BBB=BBBBB>B######################################## YT:Z:UU +SRR065390.14173 4 * 0 0 * * 0 0 AAGAAACTCAACAAACCGGACTTGCAGGTGAAACTGATTCCGATTACCTCACAAAACCGTATTCCACTGCTGCAAAACGGCACTTTCGATTTTGAATGTG CCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC=@CCCCCCCCAA?ABC@CCA=CCCABDCCAABDD?BB@BBA YT:Z:UU +SRR065390.14178 4 * 0 0 * * 0 0 GCGCTTTGTTTACCTGATACGGAATTTCGTGGACGATAATGGTTTCACGACCGGTTTTGGCGTCAACTTCCACTTCTGCGCGAGCGCGGATATACACCTT BCCCCCCCCC@@CCCCCCCCCCCCCCCCCCA=AA>AAA=ACCBCCCCC?CCCCAB@ACCC?A?<CB?=CAB9B@BA################### YT:Z:UU +SRR065390.14182 4 * 0 0 * * 0 0 ATTTACTCTAATGTTCTGAAAAATAATTTACTCTAATGTTCTGCCAAATAATTTACTCTAACGTTCTGCCAAATAATTTACTCTAATGTTCTGCCAAATA CCCCCCCCCBCCCCCCCBCC@CCB@@@BCCCCCCC@CCCCCCBBCCCCCCBCCCC@CCC?CCC>>CCBCCCCA@CCCC;CBCCBDCCB@CCBCAACB@BB;B?B0B=8??9>??BB>B?@?B>A>A########## YT:Z:UU +SRR065390.14197 4 * 0 0 * * 0 0 GTACCTCGCCGTTGTTCTCGACCTGTTCGCAAGAAAACCAGTGGGCTGGGCCATGTCGTTCTCGCCGGACAGCAGGCTCACCATGCAAGCGCTGGAAATG CCCCCCCCCCCCCDCBB=B@?BB@BBBBBB@@@B@B==BBB9B@@@@B@=BBB@BBB=@BBABBB@@@BB<@BA@BBB=B;B?BBACA YT:Z:UU +SRR065390.14284 4 * 0 0 * * 0 0 CGGTGCATGATGCGGATTCCAGGAATCAACGTACAGCGTCGGGCTAAACCAGAACCAGCCAATAATGCACAGACCGACGACCGGAATAATAACCCCCCAC BCACCCCCCCCBCCBCCCCCCCBCC@CCCCCCCAC@CBACCACCCCC@CBCCCCCCCA8CC?A@9@AB@9CACC8=81B@CC9CCCCCCC,<8??CBC@BB?@C@ACBCB################################################## YT:Z:UU +SRR065390.14312 4 * 0 0 * * 0 0 ACAGTAACATTCAACGTTAAATATGTTAATAAGACGTTGCATTATTGTCCTGAAGTTGAAGATAGCAGGTATGGCGGTTGGATAGCACGGCGTTGGTTTA CCCCCCCCCCCCCCCCCCCCBCCCCCCBCCCCCCCCCCCCCCCCCCCBCCCCCDCCCCCCCCCACCBC@?CACBC######################### YT:Z:UU +SRR065390.14331 4 * 0 0 * * 0 0 GAATAATGAAGATGATGCGACGCGTCTGGCGCGTTTGAACGAACGCTTTAAACGCGAAGGTAAACCGGAGTTGAAGAAGCTGGATGATCTACCTAAAGAT CCCCCCCCCCCCCCBCCBCCCCCCBCCCC@CCCB@CCCCACCDDCBCC?CAC@B@DABA?BAB@@@?C?C@BC?9A::>=@@C;?############### YT:Z:UU +SRR065390.14335 4 * 0 0 * * 0 0 TCCATTTGATGAACCTGAAGTTTAAGTATTGACTTGAGAGGAAAAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCT CCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCC@BBA>CCCCCCCCBBCCBC=CBCCCB################### YT:Z:UU +SRR065390.14342 4 * 0 0 * * 0 0 AAGTTCATGAATTAAAGCCGACTCAAACACTCTGTTTAAAAACTGGATAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGAGATCGTAGGCCGTC 0000079;9;AAAA?;;;>9>3>9BB8BBBB@############################ YT:Z:UU +SRR065390.14359 4 * 0 0 * * 0 0 GCATCAGTACGATAAAACGCGTACCGAACTACTGAATGATGTCGCAGGGGCGCTGGCTCTTGATGACAAACTCGGACGTAGCACCAATCAACTTTCCGGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCBABACCBB@BBBBBB>BDBBDBBB>B@@@>>?BCBAC?CBC?> YT:Z:UU +SRR065390.14364 4 * 0 0 * * 0 0 GGTCGCCGATCCGATTTGCACTTTAACCACTTTCGGTAAAGAAACCGTTGTTAGTGAAAGCGAAAAACGCACAACGACCACTGATGACCCGCTACAGGTG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@@CCCCCCCCCBCCCCCCCCCACCDACCBCACA@CACCAA=B=CBC=ACAAAC=)>? YT:Z:UU +SRR065390.14392 4 * 0 0 * * 0 0 GTTATCCTTTTCCGTGATATGTGCGGTACTGCAGCGTATGCCGGCAAGGGTTGCAAACGGTGGTAGTGTGCAGGTTGACTGTTGGTCGGATTCCTCCACC CCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCDCCCCCCCCCBCCCA@CCB@AACB?CB?BA=A8@BACB?:===@C@-A6==?@@<@@AA##### YT:Z:UU +SRR065390.14393 4 * 0 0 * * 0 0 AGAAATTTACTGGCTCGCCGCAGCCAACTCCTCTTCTGACACCCCGGTAAAGCGCATGATGTCTGTAAGAGGGGCCCCGGATTCAAGCATTATTTTGGCT CCCCCCCCCCCCCCCBA9::<4A>AAAA:?A#################################### YT:Z:UU +SRR065390.14434 4 * 0 0 * * 0 0 GGTAGATTCCCATAAAAATCGCCAGCGGAATGGTGAACGCAACGGTATACGTTCCCCACGGGCTATGAGTCAGGGCTTTCACCACGATCATCGCCAGTAC DCACCCBCCCCCCCC>CBBCCCCCCCCCCCCCC?CCCCCCCCCCCACACCC@BCCCCBCD=ABB@BCBD?@@B6BC8B@B>BABCBB@AB=@2C###### YT:Z:UU diff --git a/src/htslib-1.21/test/index.sam.gz.bai b/src/htslib-1.21/test/index.sam.gz.bai new file mode 100644 index 0000000..0d41e3c Binary files /dev/null and b/src/htslib-1.21/test/index.sam.gz.bai differ diff --git a/src/htslib-1.21/test/index.sam.gz.csi b/src/htslib-1.21/test/index.sam.gz.csi new file mode 100644 index 0000000..2992ed1 Binary files /dev/null and b/src/htslib-1.21/test/index.sam.gz.csi differ diff --git a/src/htslib-1.21/test/index.vcf b/src/htslib-1.21/test/index.vcf new file mode 100644 index 0000000..b32e2d2 --- /dev/null +++ b/src/htslib-1.21/test/index.vcf @@ -0,0 +1,728 @@ +##fileformat=VCFv4.2 +##FILTER= +##bcftoolsVersion=1.8-31-g9ba4024+htslib-1.8-32-g6e87a1e-dirty +##bcftoolsCommand=mpileup --fasta-ref /nfs/srpipe_references/references/Human/1000Genomes_hs37d5/all/fasta/hs37d5.fa test/index.bam +##reference=file:///nfs/srpipe_references/references/Human/1000Genomes_hs37d5/all/fasta/hs37d5.fa +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##ALT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT ERS220911 +1 9999919 . G <*> 0 . DP=1;I16=1,0,0,0,26,676,0,0,60,3600,0,0,0,0,0,0;QS=1,0;MQ0F=0 PL 0,3,26 +1 9999920 . T <*> 0 . DP=1;I16=1,0,0,0,34,1156,0,0,60,3600,0,0,1,1,0,0;QS=1,0;MQ0F=0 PL 0,3,34 +1 9999921 . A <*> 0 . DP=1;I16=1,0,0,0,33,1089,0,0,60,3600,0,0,2,4,0,0;QS=1,0;MQ0F=0 PL 0,3,33 +1 9999922 . A <*> 0 . DP=1;I16=1,0,0,0,34,1156,0,0,60,3600,0,0,3,9,0,0;QS=1,0;MQ0F=0 PL 0,3,34 +1 9999923 . T <*> 0 . DP=1;I16=1,0,0,0,35,1225,0,0,60,3600,0,0,4,16,0,0;QS=1,0;MQ0F=0 PL 0,3,35 +1 9999924 . C <*> 0 . DP=1;I16=1,0,0,0,35,1225,0,0,60,3600,0,0,5,25,0,0;QS=1,0;MQ0F=0 PL 0,3,35 +1 9999925 . C <*> 0 . DP=1;I16=1,0,0,0,36,1296,0,0,60,3600,0,0,6,36,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +1 9999926 . C <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,7,49,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 9999927 . A <*> 0 . DP=1;I16=1,0,0,0,36,1296,0,0,60,3600,0,0,8,64,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +1 9999928 . G <*> 0 . DP=1;I16=1,0,0,0,34,1156,0,0,60,3600,0,0,9,81,0,0;QS=1,0;MQ0F=0 PL 0,3,34 +1 9999929 . C <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,10,100,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 9999930 . A <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,11,121,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 9999931 . C <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,12,144,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 9999932 . T <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,13,169,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 9999933 . T <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,14,196,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 9999934 . T <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,15,225,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 9999935 . A <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,16,256,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 9999936 . G <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,17,289,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 9999937 . G <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,18,324,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 9999938 . A <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,19,361,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 9999939 . G <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,20,400,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 9999940 . G <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,21,441,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 9999941 . C <*> 0 . DP=1;I16=1,0,0,0,41,1681,0,0,60,3600,0,0,22,484,0,0;QS=1,0;MQ0F=0 PL 0,3,41 +1 9999942 . T <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,23,529,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 9999943 . A <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,24,576,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 9999944 . A <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 9999945 . G <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 9999946 . G <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 9999947 . C <*> 0 . DP=1;I16=1,0,0,0,35,1225,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,35 +1 9999948 . A <*> 0 . DP=1;I16=1,0,0,0,34,1156,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,34 +1 9999949 . G <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 9999950 . G <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 9999951 . C <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 9999952 . A <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 9999953 . G <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 9999954 . A <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 9999955 . T <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 9999956 . C <*> 0 . DP=1;I16=1,0,0,0,41,1681,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,41 +1 9999957 . A <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 9999958 . C <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 9999959 . T <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 9999960 . T <*> 0 . DP=1;I16=1,0,0,0,35,1225,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,35 +1 9999961 . G <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 9999962 . A <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 9999963 . G <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 9999964 . A <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 9999965 . C <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 9999966 . C <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 9999967 . A <*> 0 . DP=1;I16=1,0,0,0,41,1681,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,41 +1 9999968 . G <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 9999969 . G <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 9999970 . A <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 9999971 . G <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 9999972 . T <*> 0 . DP=1;I16=1,0,0,0,36,1296,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +1 9999973 . T <*> 0 . DP=1;I16=1,0,0,0,36,1296,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +1 9999974 . A <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 9999975 . C <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 9999976 . A <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 9999977 . G <*> 0 . DP=1;I16=1,0,0,0,36,1296,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +1 9999978 . A <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 9999979 . C <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 9999980 . C <*> 0 . DP=1;I16=1,0,0,0,33,1089,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,33 +1 9999981 . A <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 9999982 . G <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 9999983 . C <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 9999984 . C <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 9999985 . T <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 9999986 . G <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 9999987 . G <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 9999988 . C <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 9999989 . C <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 9999990 . G <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 9999991 . A <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 9999992 . C <*> 0 . DP=1;I16=1,0,0,0,36,1296,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +1 9999993 . A <*> 0 . DP=1;I16=1,0,0,0,36,1296,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +1 9999994 . C <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,24,576,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 9999995 . G <*> 0 . DP=1;I16=1,0,0,0,33,1089,0,0,60,3600,0,0,23,529,0,0;QS=1,0;MQ0F=0 PL 0,3,33 +1 9999996 . G <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,22,484,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 9999997 . C <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,21,441,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 9999998 . G <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,20,400,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 9999999 . A <*> 0 . DP=1;I16=1,0,0,0,31,961,0,0,60,3600,0,0,19,361,0,0;QS=1,0;MQ0F=0 PL 0,3,31 +1 10000000 . A <*> 0 . DP=1;I16=1,0,0,0,33,1089,0,0,60,3600,0,0,18,324,0,0;QS=1,0;MQ0F=0 PL 0,3,33 +1 10000001 . A <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,17,289,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 10000002 . C <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,16,256,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 10000003 . C <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,15,225,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 10000004 . C <*> 0 . DP=1;I16=1,0,0,0,29,841,0,0,60,3600,0,0,14,196,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000005 . C <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,13,169,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 10000006 . G <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,12,144,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 10000007 . T <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,11,121,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 10000008 . C <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,10,100,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 10000009 . T <*> 0 . DP=1;I16=1,0,0,0,43,1849,0,0,60,3600,0,0,9,81,0,0;QS=1,0;MQ0F=0 PL 0,3,43 +1 10000010 . C <*> 0 . DP=2;I16=1,1,0,0,59,2105,0,0,89,4441,0,0,8,64,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,59 +1 10000011 . T <*> 0 . DP=2;I16=1,1,0,0,76,2888,0,0,89,4441,0,0,8,50,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,67 +1 10000012 . A <*> 0 . DP=2;I16=1,1,0,0,77,2965,0,0,89,4441,0,0,8,40,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,67 +1 10000013 . C <*> 0 . DP=2;I16=1,1,0,0,66,2250,0,0,89,4441,0,0,8,34,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,66 +1 10000014 . A <*> 0 . DP=2;I16=1,1,0,0,67,2285,0,0,89,4441,0,0,8,32,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,67 +1 10000015 . A <*> 0 . DP=2;I16=1,1,0,0,69,2385,0,0,89,4441,0,0,8,34,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,65 +1 10000016 . T <*> 0 . DP=2;I16=1,1,0,0,75,2817,0,0,89,4441,0,0,8,40,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,68 +1 10000017 . A <*> 0 . DP=2;I16=1,1,0,0,67,2285,0,0,89,4441,0,0,8,50,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,58 +1 10000018 . A <*> 0 . DP=2;I16=1,1,0,0,64,2120,0,0,89,4441,0,0,8,64,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,55 +1 10000019 . A <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,9,81,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000020 . T <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,10,100,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000021 . T <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,29,841,0,0,11,121,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000022 . A <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,12,144,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000023 . A <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,13,169,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000024 . A <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,14,196,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000025 . A <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,15,225,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000026 . T <*> 0 . DP=1;I16=0,1,0,0,29,841,0,0,29,841,0,0,16,256,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000027 . A <*> 0 . DP=1;I16=0,1,0,0,40,1600,0,0,29,841,0,0,17,289,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000028 . T <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,18,324,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000029 . T <*> 0 . DP=1;I16=0,1,0,0,41,1681,0,0,29,841,0,0,19,361,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000030 . A <*> 0 . DP=1;I16=0,1,0,0,36,1296,0,0,29,841,0,0,20,400,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000031 . G <*> 0 . DP=1;I16=0,1,0,0,40,1600,0,0,29,841,0,0,21,441,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000032 . C <*> 0 . DP=1;I16=0,1,0,0,40,1600,0,0,29,841,0,0,22,484,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000033 . T <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,29,841,0,0,23,529,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000034 . G <*> 0 . DP=1;I16=0,1,0,0,36,1296,0,0,29,841,0,0,24,576,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000035 . G <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000036 . G <*> 0 . DP=1;I16=0,1,0,0,42,1764,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000037 . C <*> 0 . DP=1;I16=0,1,0,0,34,1156,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000038 . A <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000039 . T <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000040 . G <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000041 . G <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000042 . T <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000043 . G <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000044 . G <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000045 . T <*> 0 . DP=1;I16=0,1,0,0,42,1764,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000046 . G <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000047 . T <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000048 . G <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000049 . T <*> 0 . DP=1;I16=0,1,0,0,41,1681,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000050 . G <*> 0 . DP=1;I16=0,1,0,0,31,961,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000051 . C <*> 0 . DP=1;I16=0,1,0,0,16,256,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,16 +1 10000052 . T <*> 0 . DP=1;I16=0,1,0,0,31,961,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000053 . T <*> 0 . DP=1;I16=0,1,0,0,35,1225,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000054 . G <*> 0 . DP=1;I16=0,1,0,0,40,1600,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000055 . T <*> 0 . DP=1;I16=0,1,0,0,33,1089,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000056 . A <*> 0 . DP=1;I16=0,1,0,0,22,484,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,22 +1 10000057 . G <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000058 . T <*> 0 . DP=1;I16=0,1,0,0,40,1600,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000059 . C <*> 0 . DP=1;I16=0,1,0,0,34,1156,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000060 . C <*> 0 . DP=1;I16=0,1,0,0,40,1600,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000061 . C <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000062 . A <*> 0 . DP=1;I16=0,1,0,0,34,1156,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000063 . G <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000064 . C <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000065 . T <*> 0 . DP=1;I16=0,1,0,0,40,1600,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000066 . A <*> 0 . DP=1;I16=0,1,0,0,32,1024,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000067 . C <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000068 . T <*> 0 . DP=1;I16=0,1,0,0,36,1296,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000069 . T <*> 0 . DP=1;I16=0,1,0,0,36,1296,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000070 . G <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000071 . G <*> 0 . DP=1;I16=0,1,0,0,33,1089,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000072 . C <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000073 . G <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000074 . G <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000075 . G <*> 0 . DP=1;I16=0,1,0,0,36,1296,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000076 . C <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000077 . T <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000078 . G <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000079 . A <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000080 . G <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000081 . G <*> 0 . DP=1;I16=0,1,0,0,36,1296,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000082 . T <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000083 . G <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000084 . G <*> 0 . DP=1;I16=0,1,0,0,41,1681,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000085 . G <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,24,576,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000086 . A <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,23,529,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000087 . G <*> 0 . DP=1;I16=0,1,0,0,40,1600,0,0,29,841,0,0,22,484,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000088 . A <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,21,441,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000089 . A <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,20,400,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000090 . T <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,19,361,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000091 . C <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,29,841,0,0,18,324,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000092 . A <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,17,289,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000093 . T <*> 0 . DP=1;I16=0,1,0,0,33,1089,0,0,29,841,0,0,16,256,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000094 . C <*> 0 . DP=1;I16=0,1,0,0,41,1681,0,0,29,841,0,0,15,225,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000095 . C <*> 0 . DP=1;I16=0,1,0,0,36,1296,0,0,29,841,0,0,14,196,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000096 . A <*> 0 . DP=1;I16=0,1,0,0,41,1681,0,0,29,841,0,0,13,169,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000097 . A <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,12,144,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000098 . G <*> 0 . DP=1;I16=0,1,0,0,36,1296,0,0,29,841,0,0,11,121,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000099 . C <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,10,100,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000100 . C <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,29,841,0,0,9,81,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000101 . T <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,8,64,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000102 . T <*> 0 . DP=1;I16=0,1,0,0,36,1296,0,0,29,841,0,0,7,49,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000103 . G <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,29,841,0,0,6,36,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000104 . G <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,5,25,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000105 . A <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,29,841,0,0,4,16,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000106 . G <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,29,841,0,0,3,9,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000107 . G <*> 0 . DP=1;I16=0,1,0,0,33,1089,0,0,29,841,0,0,2,4,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000108 . C <*> 0 . DP=1;I16=0,1,0,0,32,1024,0,0,29,841,0,0,1,1,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10000109 . A <*> 0 . DP=1;I16=0,1,0,0,35,1225,0,0,29,841,0,0,0,0,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +2 4999907 . C <*> 0 . DP=1;I16=1,0,0,0,33,1089,0,0,60,3600,0,0,0,0,0,0;QS=1,0;MQ0F=0 PL 0,3,33 +2 4999908 . C <*> 0 . DP=1;I16=1,0,0,0,32,1024,0,0,60,3600,0,0,1,1,0,0;QS=1,0;MQ0F=0 PL 0,3,32 +2 4999909 . A <*> 0 . DP=1;I16=1,0,0,0,33,1089,0,0,60,3600,0,0,2,4,0,0;QS=1,0;MQ0F=0 PL 0,3,33 +2 4999910 . G <*> 0 . DP=1;I16=1,0,0,0,36,1296,0,0,60,3600,0,0,3,9,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +2 4999911 . G <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,4,16,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +2 4999912 . A <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,5,25,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +2 4999913 . C <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,6,36,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +2 4999914 . A <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,7,49,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +2 4999915 . T <*> 0 . DP=1;I16=1,0,0,0,41,1681,0,0,60,3600,0,0,8,64,0,0;QS=1,0;MQ0F=0 PL 0,3,41 +2 4999916 . C <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,9,81,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +2 4999917 . C <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,10,100,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +2 4999918 . A <*> 0 . DP=1;I16=1,0,0,0,35,1225,0,0,60,3600,0,0,11,121,0,0;QS=1,0;MQ0F=0 PL 0,3,35 +2 4999919 . G <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,12,144,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +2 4999920 . G <*> 0 . DP=1;I16=1,0,0,0,41,1681,0,0,60,3600,0,0,13,169,0,0;QS=1,0;MQ0F=0 PL 0,3,41 +2 4999921 . C <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,14,196,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +2 4999922 . T <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,15,225,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 4999923 . T <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,16,256,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +2 4999924 . T <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,17,289,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +2 4999925 . T <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,18,324,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +2 4999926 . C <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,19,361,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +2 4999927 . T <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,20,400,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +2 4999928 . G <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,21,441,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +2 4999929 . T <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,22,484,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +2 4999930 . A <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,23,529,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 4999931 . A <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,24,576,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 4999932 . A <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 4999933 . T <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 4999934 . C <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 4999935 . T <*> 0 . DP=1;I16=1,0,0,0,36,1296,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +2 4999936 . T <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 4999937 . C <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 4999938 . T <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 4999939 . G <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 4999940 . A <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +2 4999941 . A <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 4999942 . A <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +2 4999943 . T <*> 0 . DP=1;I16=1,0,0,0,41,1681,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,41 +2 4999944 . C <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +2 4999945 . A <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 4999946 . A <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 4999947 . G <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +2 4999948 . A <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 4999949 . C <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 4999950 . A <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +2 4999951 . G <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +2 4999952 . A <*> 0 . DP=2;I16=1,1,0,0,64,2120,0,0,120,7200,0,0,25,625,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,64 +2 4999953 . G <*> 0 . DP=2;I16=1,1,0,0,60,1962,0,0,120,7200,0,0,26,626,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,60 +2 4999954 . T <*> 0 . DP=2;I16=1,1,0,0,67,2257,0,0,120,7200,0,0,27,629,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,67 +2 4999955 . C <*> 0 . DP=2;I16=1,1,0,0,57,1805,0,0,120,7200,0,0,28,634,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,57 +2 4999956 . T <*> 0 . DP=2;I16=1,1,0,0,77,2969,0,0,120,7200,0,0,29,641,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,77 +2 4999957 . C <*> 0 . DP=2;I16=1,1,0,0,60,2000,0,0,120,7200,0,0,30,650,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,60 +2 4999958 . A <*> 0 . DP=2;I16=1,1,0,0,71,2561,0,0,120,7200,0,0,31,661,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,71 +2 4999959 . C <*> 0 . DP=2;I16=1,1,0,0,77,2965,0,0,120,7200,0,0,32,674,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,77 +2 4999960 . A <*> 0 . DP=2;I16=1,1,0,0,71,2561,0,0,120,7200,0,0,33,689,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,71 +2 4999961 . A <*> 0 . DP=2;I16=1,1,0,0,71,2521,0,0,120,7200,0,0,34,706,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,71 +2 4999962 . G <*> 0 . DP=2;I16=1,1,0,0,79,3121,0,0,120,7200,0,0,35,725,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,79 +2 4999963 . C <*> 0 . DP=2;I16=1,1,0,0,70,2452,0,0,120,7200,0,0,36,746,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,70 +2 4999964 . C <*> 0 . DP=2;I16=1,1,0,0,78,3042,0,0,120,7200,0,0,37,769,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,78 +2 4999965 . T <*> 0 . DP=2;I16=1,1,0,0,79,3121,0,0,120,7200,0,0,38,794,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,79 +2 4999966 . T <*> 0 . DP=2;I16=1,1,0,0,76,2888,0,0,120,7200,0,0,39,821,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,76 +2 4999967 . A <*> 0 . DP=2;I16=1,1,0,0,79,3125,0,0,120,7200,0,0,40,850,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,79 +2 4999968 . A <*> 0 . DP=2;I16=1,1,0,0,73,2669,0,0,120,7200,0,0,41,881,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,73 +2 4999969 . T <*> 0 . DP=2;I16=1,1,0,0,73,2669,0,0,120,7200,0,0,42,914,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,73 +2 4999970 . T <*> 0 . DP=2;I16=1,1,0,0,78,3042,0,0,120,7200,0,0,43,949,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,78 +2 4999971 . C <*> 0 . DP=2;I16=1,1,0,0,80,3208,0,0,120,7200,0,0,44,986,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,80 +2 4999972 . T <*> 0 . DP=2;I16=1,1,0,0,75,2817,0,0,120,7200,0,0,45,1025,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,75 +2 4999973 . T <*> 0 . DP=2;I16=1,1,0,0,78,3044,0,0,120,7200,0,0,46,1066,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,78 +2 4999974 . G <*> 0 . DP=2;I16=1,1,0,0,76,2888,0,0,120,7200,0,0,47,1109,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,76 +2 4999975 . C <*> 0 . DP=2;I16=1,1,0,0,77,2965,0,0,120,7200,0,0,48,1154,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,77 +2 4999976 . A <*> 0 . DP=2;I16=1,1,0,0,72,2594,0,0,120,7200,0,0,49,1201,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,72 +2 4999977 . T <*> 0 . DP=2;I16=1,1,0,0,77,2965,0,0,120,7200,0,0,50,1250,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,77 +2 4999978 . T <*> 0 . DP=2;I16=1,1,0,0,80,3200,0,0,120,7200,0,0,50,1250,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,80 +2 4999979 . C <*> 0 . DP=2;I16=1,1,0,0,78,3044,0,0,120,7200,0,0,50,1250,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,78 +2 4999980 . T <*> 0 . DP=2;I16=1,1,0,0,81,3281,0,0,120,7200,0,0,50,1250,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,81 +2 4999981 . G <*> 0 . DP=2;I16=1,1,0,0,75,2813,0,0,120,7200,0,0,50,1250,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,75 +2 4999982 . T <*> 0 . DP=2;I16=1,1,0,0,72,2592,0,0,120,7200,0,0,49,1201,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,72 +2 4999983 . G <*> 0 . DP=2;I16=1,1,0,0,75,2813,0,0,120,7200,0,0,48,1154,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,75 +2 4999984 . C <*> 0 . DP=2;I16=1,1,0,0,76,2888,0,0,120,7200,0,0,47,1109,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,76 +2 4999985 . A <*> 0 . DP=2;I16=1,1,0,0,76,2890,0,0,120,7200,0,0,46,1066,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,76 +2 4999986 . C <*> 0 . DP=2;I16=1,1,0,0,76,2890,0,0,120,7200,0,0,45,1025,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,76 +2 4999987 . C <*> 0 . DP=2;I16=1,1,0,0,81,3281,0,0,120,7200,0,0,44,986,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,81 +2 4999988 . T <*> 0 . DP=2;I16=1,1,0,0,77,2965,0,0,120,7200,0,0,43,949,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,77 +2 4999989 . G <*> 0 . DP=2;I16=1,1,0,0,78,3044,0,0,120,7200,0,0,42,914,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,78 +2 4999990 . C <*> 0 . DP=2;I16=1,1,0,0,77,2965,0,0,120,7200,0,0,41,881,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,77 +2 4999991 . A <*> 0 . DP=2;I16=1,1,0,0,82,3362,0,0,120,7200,0,0,40,850,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,82 +2 4999992 . G <*> 0 . DP=2;I16=1,1,0,0,77,2965,0,0,120,7200,0,0,39,821,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,77 +2 4999993 . G <*> 0 . DP=2;I16=1,1,0,0,77,2969,0,0,120,7200,0,0,38,794,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,77 +2 4999994 . T <*> 0 . DP=2;I16=1,1,0,0,76,2890,0,0,120,7200,0,0,37,769,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,76 +2 4999995 . T <*> 0 . DP=2;I16=1,1,0,0,74,2738,0,0,120,7200,0,0,36,746,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,74 +2 4999996 . T <*> 0 . DP=2;I16=1,1,0,0,79,3121,0,0,120,7200,0,0,35,725,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,79 +2 4999997 . A <*> 0 . DP=2;I16=1,1,0,0,76,2888,0,0,120,7200,0,0,34,706,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,76 +2 4999998 . A <*> 0 . DP=2;I16=1,1,0,0,75,2813,0,0,120,7200,0,0,33,689,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,75 +2 4999999 . T C,<*> 0 . DP=2;I16=0,1,1,0,39,1521,38,1444,60,3600,60,3600,25,625,7,49;QS=0.506494,0.493506,0;SGB=-0.379885;RPB=1;MQB=1;MQSB=1;BQB=1;MQ0F=0 PL 32,0,33,35,36,68 +2 5000000 . A <*> 0 . DP=2;I16=1,1,0,0,76,2890,0,0,120,7200,0,0,31,661,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,76 +2 5000001 . T <*> 0 . DP=2;I16=1,1,0,0,76,2896,0,0,120,7200,0,0,30,650,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,76 +2 5000002 . T <*> 0 . DP=2;I16=1,1,0,0,77,2965,0,0,120,7200,0,0,29,641,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,77 +2 5000003 . A <*> 0 . DP=2;I16=1,1,0,0,77,2965,0,0,120,7200,0,0,28,634,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,77 +2 5000004 . C <*> 0 . DP=2;I16=1,1,0,0,79,3121,0,0,120,7200,0,0,27,629,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,79 +2 5000005 . A <*> 0 . DP=2;I16=1,1,0,0,71,2525,0,0,120,7200,0,0,26,626,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,71 +2 5000006 . T <*> 0 . DP=2;I16=1,1,0,0,70,2458,0,0,120,7200,0,0,25,625,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,70 +2 5000007 . G <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 5000008 . G <*> 0 . DP=1;I16=0,1,0,0,40,1600,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +2 5000009 . A <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 5000010 . A <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 5000011 . G <*> 0 . DP=1;I16=0,1,0,0,35,1225,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,35 +2 5000012 . C <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 5000013 . C <*> 0 . DP=1;I16=0,1,0,0,33,1089,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,33 +2 5000014 . A <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 5000015 . A <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +2 5000016 . G <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +2 5000017 . G <*> 0 . DP=1;I16=0,1,0,0,41,1681,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,41 +2 5000018 . C <*> 0 . DP=1;I16=0,1,0,0,40,1600,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +2 5000019 . T <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +2 5000020 . T <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 5000021 . A <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 5000022 . A <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 5000023 . G <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +2 5000024 . G <*> 0 . DP=1;I16=0,1,0,0,35,1225,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,35 +2 5000025 . C <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 5000026 . T <*> 0 . DP=2;I16=1,1,0,0,67,2357,0,0,120,7200,0,0,25,625,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,67 +2 5000027 . T <*> 0 . DP=2;I16=1,1,0,0,72,2600,0,0,120,7200,0,0,25,577,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,72 +2 5000028 . G <*> 0 . DP=2;I16=1,1,0,0,72,2610,0,0,120,7200,0,0,25,533,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,72 +2 5000029 . T <*> 0 . DP=2;I16=1,1,0,0,75,2825,0,0,120,7200,0,0,25,493,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,75 +2 5000030 . G <*> 0 . DP=2;I16=1,1,0,0,72,2594,0,0,120,7200,0,0,25,457,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,72 +2 5000031 . C <*> 0 . DP=2;I16=1,1,0,0,75,2813,0,0,120,7200,0,0,25,425,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,75 +2 5000032 . T <*> 0 . DP=2;I16=1,1,0,0,71,2525,0,0,120,7200,0,0,25,397,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,71 +2 5000033 . C <*> 0 . DP=2;I16=1,1,0,0,74,2738,0,0,120,7200,0,0,25,373,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,74 +2 5000034 . T <*> 0 . DP=2;I16=1,1,0,0,75,2813,0,0,120,7200,0,0,25,353,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,75 +2 5000035 . C <*> 0 . DP=2;I16=1,1,0,0,77,2965,0,0,120,7200,0,0,25,337,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,77 +2 5000036 . C <*> 0 . DP=2;I16=1,1,0,0,70,2452,0,0,120,7200,0,0,25,325,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,70 +2 5000037 . A <*> 0 . DP=2;I16=1,1,0,0,74,2740,0,0,120,7200,0,0,25,317,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,74 +2 5000038 . A <*> 0 . DP=2;I16=1,1,0,0,84,3530,0,0,120,7200,0,0,25,313,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,84 +2 5000039 . A <*> 0 . DP=2;I16=1,1,0,0,76,2888,0,0,120,7200,0,0,25,313,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,76 +2 5000040 . G <*> 0 . DP=2;I16=1,1,0,0,76,2888,0,0,120,7200,0,0,25,317,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,76 +2 5000041 . C <*> 0 . DP=2;I16=1,1,0,0,77,2965,0,0,120,7200,0,0,25,325,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,77 +2 5000042 . A <*> 0 . DP=2;I16=1,1,0,0,73,2665,0,0,120,7200,0,0,25,337,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,73 +2 5000043 . G <*> 0 . DP=2;I16=1,1,0,0,75,2813,0,0,120,7200,0,0,25,353,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,75 +2 5000044 . C <*> 0 . DP=2;I16=1,1,0,0,77,2969,0,0,120,7200,0,0,25,373,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,77 +2 5000045 . A <*> 0 . DP=2;I16=1,1,0,0,72,2594,0,0,120,7200,0,0,25,397,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,72 +2 5000046 . G <*> 0 . DP=2;I16=1,1,0,0,75,2813,0,0,120,7200,0,0,25,425,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,75 +2 5000047 . T <*> 0 . DP=2;I16=1,1,0,0,73,2665,0,0,120,7200,0,0,25,457,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,73 +2 5000048 . G <*> 0 . DP=2;I16=1,1,0,0,73,2665,0,0,120,7200,0,0,25,493,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,73 +2 5000049 . A <*> 0 . DP=2;I16=1,1,0,0,73,2689,0,0,120,7200,0,0,25,533,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,73 +2 5000050 . T <*> 0 . DP=2;I16=1,1,0,0,71,2545,0,0,120,7200,0,0,25,577,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,71 +2 5000051 . A <*> 0 . DP=2;I16=1,1,0,0,72,2610,0,0,120,7200,0,0,25,625,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,72 +2 5000052 . T <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +2 5000053 . G <*> 0 . DP=1;I16=1,0,0,0,34,1156,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,34 +2 5000054 . G <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +2 5000055 . T <*> 0 . DP=1;I16=1,0,0,0,36,1296,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +2 5000056 . T <*> 0 . DP=1;I16=1,0,0,0,32,1024,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,32 +2 5000057 . T <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +2 5000058 . G <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +2 5000059 . A <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 5000060 . C <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +2 5000061 . T <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +2 5000062 . G <*> 0 . DP=1;I16=1,0,0,0,36,1296,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +2 5000063 . T <*> 0 . DP=1;I16=1,0,0,0,21,441,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,21 +2 5000064 . G <*> 0 . DP=1;I16=1,0,0,0,34,1156,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,34 +2 5000065 . T <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +2 5000066 . C <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 5000067 . C <*> 0 . DP=1;I16=1,0,0,0,41,1681,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,41 +2 5000068 . A <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 5000069 . C <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 5000070 . A <*> 0 . DP=1;I16=1,0,0,0,34,1156,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,34 +2 5000071 . C <*> 0 . DP=1;I16=1,0,0,0,35,1225,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,35 +2 5000072 . C <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 5000073 . C <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +2 5000074 . A <*> 0 . DP=1;I16=1,0,0,0,20,400,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,20 +2 5000075 . A <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 5000076 . A <*> 0 . DP=1;I16=1,0,0,0,35,1225,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,35 +2 5000077 . T <*> 0 . DP=1;I16=1,0,0,0,35,1225,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,35 +2 5000078 . C <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +2 5000079 . T <*> 0 . DP=1;I16=1,0,0,0,36,1296,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +2 5000080 . C <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +2 5000081 . A <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +2 5000082 . T <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +2 5000083 . C <*> 0 . DP=1;I16=1,0,0,0,35,1225,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,35 +2 5000084 . T <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +2 5000085 . T <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 5000086 . G <*> 0 . DP=1;I16=1,0,0,0,32,1024,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,32 +2 5000087 . A <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +2 5000088 . A <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +2 5000089 . T <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +2 5000090 . T <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 5000091 . T <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +2 5000092 . T <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +2 5000093 . A <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +2 5000094 . G <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +2 5000095 . C <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +2 5000096 . T <*> 0 . DP=1;I16=1,0,0,0,36,1296,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +2 5000097 . G <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +2 5000098 . C <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 5000099 . C <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +2 5000100 . A <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +2 5000101 . T <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,24,576,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 5000102 . A <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,23,529,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +2 5000103 . A <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,22,484,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +2 5000104 . C <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,21,441,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +2 5000105 . T <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,20,400,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +2 5000106 . C <*> 0 . DP=1;I16=1,0,0,0,36,1296,0,0,60,3600,0,0,19,361,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +2 5000107 . C <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,18,324,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +2 5000108 . C <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,17,289,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +2 5000109 . A <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,16,256,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +2 5000110 . T <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,15,225,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +2 5000111 . G <*> 0 . DP=1;I16=1,0,0,0,33,1089,0,0,60,3600,0,0,14,196,0,0;QS=1,0;MQ0F=0 PL 0,3,33 +2 5000112 . T <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,13,169,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 5000113 . G <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,12,144,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +2 5000114 . T <*> 0 . DP=1;I16=1,0,0,0,26,676,0,0,60,3600,0,0,11,121,0,0;QS=1,0;MQ0F=0 PL 0,3,26 +2 5000115 . T <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,10,100,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +2 5000116 . G <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,9,81,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 5000117 . T <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,8,64,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +2 5000118 . A <*> 0 . DP=1;I16=1,0,0,0,41,1681,0,0,60,3600,0,0,7,49,0,0;QS=1,0;MQ0F=0 PL 0,3,41 +2 5000119 . G <*> 0 . DP=1;I16=1,0,0,0,30,900,0,0,60,3600,0,0,6,36,0,0;QS=1,0;MQ0F=0 PL 0,3,30 +2 5000120 . A <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,5,25,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +2 5000121 . A <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,4,16,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +2 5000122 . G <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,3,9,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 5000123 . G <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,2,4,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +2 5000124 . A <*> 0 . DP=1;I16=1,0,0,0,36,1296,0,0,60,3600,0,0,1,1,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +2 5000125 . A <*> 0 . DP=1;I16=1,0,0,0,33,1089,0,0,60,3600,0,0,0,0,0,0;QS=1,0;MQ0F=0 PL 0,3,33 +10 2999980 . G <*> 0 . DP=1;I16=0,1,0,0,36,1296,0,0,60,3600,0,0,0,0,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +10 2999981 . C <*> 0 . DP=2;I16=1,1,0,0,63,2045,0,0,120,7200,0,0,1,1,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,63 +10 2999982 . A <*> 0 . DP=2;I16=1,1,0,0,73,2677,0,0,120,7200,0,0,3,5,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,73 +10 2999983 . A <*> 0 . DP=2;I16=1,1,0,0,69,2393,0,0,120,7200,0,0,5,13,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,69 +10 2999984 . C <*> 0 . DP=2;I16=1,1,0,0,71,2525,0,0,120,7200,0,0,7,25,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,71 +10 2999985 . A <*> 0 . DP=2;I16=1,1,0,0,68,2314,0,0,120,7200,0,0,9,41,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,68 +10 2999986 . A <*> 0 . DP=2;I16=1,1,0,0,73,2669,0,0,120,7200,0,0,11,61,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,73 +10 2999987 . A <*> 0 . DP=2;I16=1,1,0,0,61,1945,0,0,120,7200,0,0,13,85,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,61 +10 2999988 . G <*> 0 . DP=3;I16=1,2,0,0,108,3896,0,0,180,10800,0,0,15,113,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,102 +10 2999989 . T <*> 0 . DP=3;I16=1,2,0,0,112,4190,0,0,180,10800,0,0,18,146,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,106 +10 2999990 . G <*> 0 . DP=3;I16=1,2,0,0,115,4409,0,0,180,10800,0,0,21,185,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,109 +10 2999991 . A <*> 0 . DP=3;I16=1,2,0,0,110,4082,0,0,180,10800,0,0,24,230,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,104 +10 2999992 . G <*> 0 . DP=3;I16=1,2,0,0,117,4569,0,0,180,10800,0,0,27,281,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,111 +10 2999993 . A <*> 0 . DP=3;I16=1,2,0,0,115,4411,0,0,180,10800,0,0,30,338,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,108 +10 2999994 . C <*> 0 . DP=3;I16=1,2,0,0,111,4145,0,0,180,10800,0,0,33,401,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,105 +10 2999995 . C <*> 0 . DP=3;I16=1,2,0,0,108,3944,0,0,180,10800,0,0,36,470,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,102 +10 2999996 . T <*> 0 . DP=3;I16=1,2,0,0,113,4257,0,0,180,10800,0,0,39,545,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,106 +10 2999997 . C <*> 0 . DP=3;I16=1,2,0,0,111,4109,0,0,180,10800,0,0,42,626,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,105 +10 2999998 . A <*> 0 . DP=3;I16=1,2,0,0,111,4121,0,0,180,10800,0,0,45,713,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,105 +10 2999999 . T <*> 0 . DP=3;I16=1,2,0,0,109,3961,0,0,180,10800,0,0,48,806,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,103 +10 3000000 . C <*> 0 . DP=3;I16=1,2,0,0,109,3979,0,0,180,10800,0,0,51,905,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,103 +10 3000001 . T <*> 0 . DP=3;I16=1,2,0,0,109,3969,0,0,180,10800,0,0,54,1010,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,103 +10 3000002 . C <*> 0 . DP=3;I16=1,2,0,0,109,3961,0,0,180,10800,0,0,57,1121,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,103 +10 3000003 . T <*> 0 . DP=3;I16=1,2,0,0,116,4494,0,0,180,10800,0,0,60,1238,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,110 +10 3000004 . A <*> 0 . DP=3;I16=1,2,0,0,118,4642,0,0,180,10800,0,0,63,1361,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,111 +10 3000005 . C <*> 0 . DP=3;I16=1,2,0,0,117,4569,0,0,180,10800,0,0,66,1490,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,110 +10 3000006 . A <*> 0 . DP=3;I16=1,2,0,0,112,4190,0,0,180,10800,0,0,68,1574,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,106 +10 3000007 . A <*> 0 . DP=3;I16=1,2,0,0,111,4113,0,0,180,10800,0,0,69,1611,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,105 +10 3000008 . A <*> 0 . DP=3;I16=1,2,0,0,115,4417,0,0,180,10800,0,0,70,1650,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,108 +10 3000009 . A <*> 0 . DP=3;I16=1,2,0,0,115,4411,0,0,180,10800,0,0,71,1691,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,109 +10 3000010 . A <*> 0 . DP=3;I16=1,2,0,0,117,4569,0,0,180,10800,0,0,72,1734,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,110 +10 3000011 . A <*> 0 . DP=3;I16=1,2,0,0,116,4494,0,0,180,10800,0,0,73,1779,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,109 +10 3000012 . T <*> 0 . DP=3;I16=1,2,0,0,112,4190,0,0,180,10800,0,0,74,1826,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,106 +10 3000013 . G <*> 0 . DP=3;I16=1,2,0,0,114,4334,0,0,180,10800,0,0,75,1875,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,108 +10 3000014 . A <*> 0 . DP=3;I16=1,2,0,0,117,4577,0,0,180,10800,0,0,75,1875,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,110 +10 3000015 . A <*> 0 . DP=3;I16=1,2,0,0,112,4182,0,0,180,10800,0,0,75,1875,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,105 +10 3000016 . A <*> 0 . DP=3;I16=1,2,0,0,116,4490,0,0,180,10800,0,0,75,1875,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,110 +10 3000017 . A <*> 0 . DP=3;I16=1,2,0,0,113,4259,0,0,180,10800,0,0,75,1875,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,106 +10 3000018 . G <*> 0 . DP=3;I16=1,2,0,0,110,4042,0,0,180,10800,0,0,75,1875,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,104 +10 3000019 . G <*> 0 . DP=3;I16=1,2,0,0,116,4488,0,0,180,10800,0,0,75,1875,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,109 +10 3000020 . T <*> 0 . DP=3;I16=1,2,0,0,115,4409,0,0,180,10800,0,0,75,1875,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,109 +10 3000021 . T <*> 0 . DP=3;I16=1,2,0,0,110,4046,0,0,180,10800,0,0,75,1875,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,104 +10 3000022 . A <*> 0 . DP=3;I16=1,2,0,0,105,3713,0,0,180,10800,0,0,75,1875,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,99 +10 3000023 . G <*> 0 . DP=3;I16=1,2,0,0,114,4334,0,0,180,10800,0,0,75,1875,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,107 +10 3000024 . C <*> 0 . DP=3;I16=1,2,0,0,113,4267,0,0,180,10800,0,0,75,1875,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,107 +10 3000025 . T <*> 0 . DP=3;I16=1,2,0,0,112,4182,0,0,180,10800,0,0,75,1875,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,106 +10 3000026 . T <*> 0 . DP=3;I16=1,2,0,0,119,4725,0,0,180,10800,0,0,75,1875,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,112 +10 3000027 . G <*> 0 . DP=3;I16=1,2,0,0,96,3464,0,0,180,10800,0,0,75,1875,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,93 +10 3000028 . G <*> 0 . DP=3;I16=1,2,0,0,113,4277,0,0,180,10800,0,0,75,1875,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,107 +10 3000029 . T <*> 0 . DP=4;I16=1,2,0,0,116,4490,0,0,180,10800,0,0,75,1875,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,110 +10 3000030 . G <*> 0 . DP=4;I16=1,3,0,0,152,5778,0,0,240,14400,0,0,76,1876,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,134 +10 3000031 . T <*> 0 . DP=4;I16=1,3,0,0,140,5100,0,0,240,14400,0,0,77,1879,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,122 +10 3000032 . A <*> 0 . DP=4;I16=1,3,0,0,137,4965,0,0,240,14400,0,0,78,1884,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,123 +10 3000033 . G <*> 0 . DP=4;I16=1,3,0,0,153,5853,0,0,240,14400,0,0,79,1891,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,135 +10 3000034 . T <*> 0 . DP=4;I16=1,3,0,0,140,4998,0,0,240,14400,0,0,80,1900,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,124 +10 3000035 . G <*> 0 . DP=4;I16=1,3,0,0,152,5794,0,0,240,14400,0,0,81,1911,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,133 +10 3000036 . G <*> 0 . DP=4;I16=1,3,0,0,150,5628,0,0,240,14400,0,0,82,1924,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,132 +10 3000037 . C <*> 0 . DP=4;I16=1,3,0,0,157,6165,0,0,240,14400,0,0,83,1939,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,138 +10 3000038 . A <*> 0 . DP=4;I16=1,2,0,0,110,4034,0,0,180,10800,0,0,75,1875,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,103 +10 3000039 . C <*> 0 . DP=4;I16=1,3,0,0,149,5553,0,0,240,14400,0,0,85,1975,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,131 +10 3000040 . A <*> 0 . DP=4;I16=1,3,0,0,137,4767,0,0,240,14400,0,0,86,1996,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,121 +10 3000041 . T <*> 0 . DP=4;I16=1,3,0,0,148,5514,0,0,240,14400,0,0,87,2019,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,130 +10 3000042 . A <*> 0 . DP=4;I16=1,3,0,0,135,4599,0,0,240,14400,0,0,88,2044,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,119 +10 3000043 . T <*> 0 . DP=4;I16=1,3,0,0,155,6011,0,0,240,14400,0,0,89,2071,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,136 +10 3000044 . C <*> 0 . DP=4;I16=1,3,0,0,150,5646,0,0,240,14400,0,0,90,2100,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,132 +10 3000045 . T <*> 0 . DP=4;I16=1,3,0,0,133,4559,0,0,240,14400,0,0,91,2131,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,118 +10 3000046 . G <*> 0 . DP=4;I16=1,3,0,0,146,5362,0,0,240,14400,0,0,92,2164,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,128 +10 3000047 . T <*> 0 . DP=4;I16=1,3,0,0,150,5630,0,0,240,14400,0,0,93,2199,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,131 +10 3000048 . G <*> 0 . DP=4;I16=1,3,0,0,155,6007,0,0,240,14400,0,0,94,2236,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,137 +10 3000049 . G <*> 0 . DP=4;I16=1,3,0,0,152,5814,0,0,240,14400,0,0,95,2275,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,135 +10 3000050 . T <*> 0 . DP=4;I16=1,3,0,0,147,5465,0,0,240,14400,0,0,96,2316,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,130 +10 3000051 . C <*> 0 . DP=4;I16=1,3,0,0,152,5782,0,0,240,14400,0,0,97,2359,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,133 +10 3000052 . C <*> 0 . DP=4;I16=1,3,0,0,153,5861,0,0,240,14400,0,0,98,2404,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,134 +10 3000053 . C <*> 0 . DP=4;I16=1,3,0,0,149,5595,0,0,240,14400,0,0,99,2451,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,131 +10 3000054 . A <*> 0 . DP=4;I16=1,3,0,0,144,5202,0,0,240,14400,0,0,100,2500,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,127 +10 3000055 . G <*> 0 . DP=4;I16=1,3,0,0,153,5859,0,0,240,14400,0,0,99,2451,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,135 +10 3000056 . C <*> 0 . DP=4;I16=1,3,0,0,157,6163,0,0,240,14400,0,0,97,2355,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,139 +10 3000057 . T <*> 0 . DP=5;I16=2,3,0,0,166,5624,0,0,300,18000,0,0,95,2263,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,15,145 +10 3000058 . A <*> 0 . DP=5;I16=2,3,0,0,187,7011,0,0,300,18000,0,0,94,2176,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,15,162 +10 3000059 . T <*> 0 . DP=5;I16=2,3,0,0,177,6285,0,0,300,18000,0,0,93,2095,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,15,154 +10 3000060 . T <*> 0 . DP=5;I16=2,3,0,0,182,6644,0,0,300,18000,0,0,92,2020,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,15,159 +10 3000061 . T <*> 0 . DP=5;I16=2,3,0,0,187,7011,0,0,300,18000,0,0,91,1951,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,15,162 +10 3000062 . G <*> 0 . DP=5;I16=2,3,0,0,188,7084,0,0,300,18000,0,0,90,1888,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,15,164 +10 3000063 . G <*> 0 . DP=5;I16=2,3,0,0,184,6802,0,0,300,18000,0,0,88,1782,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,15,160 +10 3000064 . G <*> 0 . DP=5;I16=2,3,0,0,187,6999,0,0,300,18000,0,0,86,1684,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,15,162 +10 3000065 . A <*> 0 . DP=5;I16=2,3,0,0,185,6865,0,0,300,18000,0,0,84,1594,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,15,161 +10 3000066 . G A,<*> 0 . DP=5;I16=1,1,1,2,71,2525,112,4186,120,7200,180,10800,39,821,43,691;QS=0.387978,0.612022,0;VDB=0.946712;SGB=-0.511536;RPB=1;MQB=1;MQSB=1;BQB=0.5;MQ0F=0 PL 91,0,56,97,65,152 +10 3000067 . G <*> 0 . DP=5;I16=2,3,0,0,190,7224,0,0,300,18000,0,0,80,1438,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,15,164 +10 3000068 . C <*> 0 . DP=5;I16=2,3,0,0,185,6877,0,0,300,18000,0,0,78,1372,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,15,161 +10 3000069 . T <*> 0 . DP=5;I16=2,3,0,0,183,6715,0,0,300,18000,0,0,76,1314,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,15,158 +10 3000070 . G <*> 0 . DP=5;I16=2,3,0,0,186,6922,0,0,300,18000,0,0,74,1264,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,15,161 +10 3000071 . A <*> 0 . DP=5;I16=2,3,0,0,183,6707,0,0,300,18000,0,0,72,1222,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,15,159 +10 3000072 . G <*> 0 . DP=5;I16=2,3,0,0,188,7080,0,0,300,18000,0,0,70,1188,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,15,164 +10 3000073 . A <*> 0 . DP=5;I16=2,3,0,0,179,6411,0,0,300,18000,0,0,68,1162,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,15,155 +10 3000074 . T <*> 0 . DP=5;I16=2,3,0,0,175,6187,0,0,300,18000,0,0,66,1144,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,15,153 +10 3000075 . A <*> 0 . DP=5;I16=2,3,0,0,172,5958,0,0,300,18000,0,0,64,1134,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,15,149 +10 3000076 . G <*> 0 . DP=5;I16=2,3,0,0,184,6776,0,0,300,18000,0,0,62,1132,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,15,159 +10 3000077 . G <*> 0 . DP=5;I16=2,3,0,0,182,6654,0,0,300,18000,0,0,60,1138,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,15,158 +10 3000078 . A <*> 0 . DP=5;I16=2,3,0,0,184,6784,0,0,300,18000,0,0,58,1152,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,15,159 +10 3000079 . G <*> 0 . DP=5;I16=2,3,0,0,181,6697,0,0,300,18000,0,0,56,1174,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,15,158 +10 3000080 . G <*> 0 . DP=4;I16=2,2,0,0,147,5435,0,0,240,14400,0,0,55,1203,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,12,135 +10 3000081 . A <*> 0 . DP=3;I16=1,2,0,0,114,4334,0,0,180,10800,0,0,55,1237,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,107 +10 3000082 . T <*> 0 . DP=3;I16=1,2,0,0,107,3841,0,0,180,10800,0,0,55,1275,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,101 +10 3000083 . C <*> 0 . DP=3;I16=1,2,0,0,111,4109,0,0,180,10800,0,0,54,1266,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,105 +10 3000084 . A <*> 0 . DP=3;I16=1,2,0,0,94,3054,0,0,180,10800,0,0,53,1259,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,88 +10 3000085 . C <*> 0 . DP=3;I16=1,2,0,0,112,4210,0,0,180,10800,0,0,52,1254,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,106 +10 3000086 . T <*> 0 . DP=3;I16=1,2,0,0,111,4145,0,0,180,10800,0,0,51,1251,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,105 +10 3000087 . T <*> 0 . DP=3;I16=1,2,0,0,112,4214,0,0,180,10800,0,0,50,1250,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,106 +10 3000088 . G <*> 0 . DP=2;I16=1,1,0,0,70,2450,0,0,120,7200,0,0,50,1250,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,70 +10 3000089 . A <*> 0 . DP=2;I16=1,1,0,0,75,2813,0,0,120,7200,0,0,50,1250,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,75 +10 3000090 . G <*> 0 . DP=2;I16=1,1,0,0,69,2385,0,0,120,7200,0,0,50,1250,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,69 +10 3000091 . C <*> 0 . DP=3;I16=1,2,0,0,109,4001,0,0,180,10800,0,0,50,1250,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,103 +10 3000092 . C <*> 0 . DP=3;I16=1,2,0,0,110,4058,0,0,180,10800,0,0,51,1251,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,104 +10 3000093 . C <*> 0 . DP=3;I16=1,2,0,0,107,3817,0,0,180,10800,0,0,52,1254,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,101 +10 3000094 . A <*> 0 . DP=3;I16=1,2,0,0,92,3026,0,0,180,10800,0,0,53,1259,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,88 +10 3000095 . A <*> 0 . DP=3;I16=1,2,0,0,102,3518,0,0,180,10800,0,0,54,1266,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,96 +10 3000096 . G <*> 0 . DP=3;I16=1,2,0,0,114,4332,0,0,180,10800,0,0,55,1275,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,108 +10 3000097 . A <*> 0 . DP=3;I16=1,2,0,0,110,4038,0,0,180,10800,0,0,56,1286,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,104 +10 3000098 . G <*> 0 . DP=3;I16=1,2,0,0,111,4109,0,0,180,10800,0,0,57,1299,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,105 +10 3000099 . G <*> 0 . DP=3;I16=1,2,0,0,113,4261,0,0,180,10800,0,0,58,1314,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,106 +10 3000100 . T <*> 0 . DP=3;I16=1,2,0,0,112,4230,0,0,180,10800,0,0,59,1331,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,106 +10 3000101 . C <*> 0 . DP=3;I16=1,2,0,0,111,4109,0,0,180,10800,0,0,60,1350,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,104 +10 3000102 . A <*> 0 . DP=3;I16=1,2,0,0,116,4488,0,0,180,10800,0,0,61,1371,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,110 +10 3000103 . A <*> 0 . DP=3;I16=1,2,0,0,114,4332,0,0,180,10800,0,0,62,1394,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,108 +10 3000104 . G <*> 0 . DP=3;I16=1,2,0,0,117,4565,0,0,180,10800,0,0,62,1370,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,111 +10 3000105 . G <*> 0 . DP=3;I16=1,2,0,0,118,4650,0,0,180,10800,0,0,62,1350,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,111 +10 3000106 . C <*> 0 . DP=3;I16=1,2,0,0,114,4370,0,0,180,10800,0,0,62,1334,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,107 +10 3000107 . T <*> 0 . DP=3;I16=1,2,0,0,111,4109,0,0,180,10800,0,0,62,1322,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,105 +10 3000108 . G <*> 0 . DP=3;I16=1,2,0,0,119,4721,0,0,180,10800,0,0,62,1314,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,112 +10 3000109 . C <*> 0 . DP=3;I16=1,2,0,0,110,4054,0,0,180,10800,0,0,62,1310,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,104 +10 3000110 . A <*> 0 . DP=3;I16=1,2,0,0,107,3821,0,0,180,10800,0,0,62,1310,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,101 +10 3000111 . G <*> 0 . DP=3;I16=1,2,0,0,121,4893,0,0,180,10800,0,0,62,1314,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,114 +10 3000112 . T <*> 0 . DP=3;I16=1,2,0,0,110,4034,0,0,180,10800,0,0,62,1322,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,104 +10 3000113 . G <*> 0 . DP=3;I16=1,2,0,0,112,4186,0,0,180,10800,0,0,62,1334,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,105 +10 3000114 . A <*> 0 . DP=3;I16=1,2,0,0,115,4409,0,0,180,10800,0,0,62,1350,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,108 +10 3000115 . G <*> 0 . DP=3;I16=1,2,0,0,113,4261,0,0,180,10800,0,0,62,1370,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,107 +10 3000116 . C <*> 0 . DP=3;I16=1,2,0,0,115,4417,0,0,180,10800,0,0,62,1394,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,108 +10 3000117 . C <*> 0 . DP=3;I16=1,2,0,0,115,4409,0,0,180,10800,0,0,61,1371,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,109 +10 3000118 . A <*> 0 . DP=3;I16=1,2,0,0,118,4642,0,0,180,10800,0,0,60,1350,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,112 +10 3000119 . T <*> 0 . DP=3;I16=1,2,0,0,108,3890,0,0,180,10800,0,0,59,1331,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,102 +10 3000120 . G <*> 0 . DP=3;I16=1,2,0,0,113,4259,0,0,180,10800,0,0,58,1314,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,107 +10 3000121 . A <*> 0 . DP=3;I16=1,2,0,0,106,3750,0,0,180,10800,0,0,57,1299,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,100 +10 3000122 . T <*> 0 . DP=3;I16=1,2,0,0,111,4109,0,0,180,10800,0,0,56,1286,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,105 +10 3000123 . T <*> 0 . DP=3;I16=1,2,0,0,115,4411,0,0,180,10800,0,0,55,1275,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,108 +10 3000124 . G <*> 0 . DP=3;I16=1,2,0,0,108,3890,0,0,180,10800,0,0,54,1266,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,101 +10 3000125 . C <*> 0 . DP=3;I16=1,2,0,0,113,4275,0,0,180,10800,0,0,53,1259,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,106 +10 3000126 . A <*> 0 . DP=3;I16=1,2,0,0,109,3977,0,0,180,10800,0,0,52,1254,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,103 +10 3000127 . T <*> 0 . DP=3;I16=1,2,0,0,111,4121,0,0,180,10800,0,0,51,1251,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,105 +10 3000128 . C <*> 0 . DP=3;I16=1,2,0,0,101,3489,0,0,180,10800,0,0,50,1250,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,9,96 +10 3000129 . C <*> 0 . DP=2;I16=1,1,0,0,78,3042,0,0,120,7200,0,0,50,1250,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,78 +10 3000130 . C <*> 0 . DP=2;I16=1,1,0,0,76,2888,0,0,120,7200,0,0,50,1250,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,76 +10 3000131 . T <*> 0 . DP=2;I16=1,1,0,0,76,2890,0,0,120,7200,0,0,50,1250,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,76 +10 3000132 . G <*> 0 . DP=2;I16=1,1,0,0,80,3202,0,0,120,7200,0,0,49,1201,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,80 +10 3000133 . C <*> 0 . DP=2;I16=1,1,0,0,74,2738,0,0,120,7200,0,0,48,1154,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,74 +10 3000134 . A <*> 0 . DP=2;I16=1,1,0,0,75,2817,0,0,120,7200,0,0,47,1109,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,75 +10 3000135 . C <*> 0 . DP=2;I16=1,1,0,0,74,2738,0,0,120,7200,0,0,46,1066,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,74 +10 3000136 . T <*> 0 . DP=2;I16=1,1,0,0,74,2756,0,0,120,7200,0,0,45,1025,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,74 +10 3000137 . C <*> 0 . DP=2;I16=1,1,0,0,75,2817,0,0,120,7200,0,0,44,986,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,75 +10 3000138 . C <*> 0 . DP=2;I16=1,1,0,0,77,2965,0,0,120,7200,0,0,43,949,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,77 +10 3000139 . A <*> 0 . DP=2;I16=1,1,0,0,78,3044,0,0,120,7200,0,0,42,914,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,78 +10 3000140 . A <*> 0 . DP=2;I16=1,1,0,0,73,2677,0,0,120,7200,0,0,41,881,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,73 +10 3000141 . C <*> 0 . DP=2;I16=1,1,0,0,77,2965,0,0,120,7200,0,0,40,850,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,77 +10 3000142 . C <*> 0 . DP=2;I16=1,1,0,0,81,3281,0,0,120,7200,0,0,39,821,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,81 +10 3000143 . T <*> 0 . DP=2;I16=1,1,0,0,77,2965,0,0,120,7200,0,0,38,794,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,77 +10 3000144 . G <*> 0 . DP=2;I16=1,1,0,0,80,3208,0,0,120,7200,0,0,37,769,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,80 +10 3000145 . G <*> 0 . DP=2;I16=1,1,0,0,78,3044,0,0,120,7200,0,0,36,746,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,78 +10 3000146 . G <*> 0 . DP=2;I16=1,1,0,0,78,3044,0,0,120,7200,0,0,35,725,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,78 +10 3000147 . T <*> 0 . DP=2;I16=1,1,0,0,73,2677,0,0,120,7200,0,0,34,706,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,73 +10 3000148 . G <*> 0 . DP=2;I16=1,1,0,0,79,3121,0,0,120,7200,0,0,33,689,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,79 +10 3000149 . A <*> 0 . DP=2;I16=1,1,0,0,71,2545,0,0,120,7200,0,0,32,674,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,71 +10 3000150 . C <*> 0 . DP=2;I16=1,1,0,0,74,2746,0,0,120,7200,0,0,31,661,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,74 +10 3000151 . A <*> 0 . DP=2;I16=1,1,0,0,77,2965,0,0,120,7200,0,0,30,650,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,77 +10 3000152 . G <*> 0 . DP=2;I16=1,1,0,0,77,2965,0,0,120,7200,0,0,29,641,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,77 +10 3000153 . A <*> 0 . DP=2;I16=1,1,0,0,72,2594,0,0,120,7200,0,0,28,634,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,72 +10 3000154 . G <*> 0 . DP=2;I16=1,1,0,0,75,2817,0,0,120,7200,0,0,27,629,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,75 +10 3000155 . C <*> 0 . DP=2;I16=1,1,0,0,56,1730,0,0,120,7200,0,0,26,626,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,56 +10 3000156 . A <*> 0 . DP=2;I16=1,1,0,0,67,2285,0,0,120,7200,0,0,25,625,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,67 +10 3000157 . A <*> 0 . DP=1;I16=0,1,0,0,31,961,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,31 +10 3000158 . G <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +10 3000159 . A <*> 0 . DP=1;I16=0,1,0,0,29,841,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +10 3000160 . C <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +10 3000161 . C <*> 0 . DP=1;I16=0,1,0,0,36,1296,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +10 3000162 . C <*> 0 . DP=1;I16=0,1,0,0,40,1600,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +10 3000163 . T <*> 0 . DP=1;I16=0,1,0,0,36,1296,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +10 3000164 . G <*> 0 . DP=1;I16=0,1,0,0,40,1600,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +10 3000165 . T <*> 0 . DP=1;I16=0,1,0,0,36,1296,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +10 3000166 . T <*> 0 . DP=1;I16=0,1,0,0,40,1600,0,0,60,3600,0,0,24,576,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +10 3000167 . A <*> 0 . DP=1;I16=0,1,0,0,36,1296,0,0,60,3600,0,0,23,529,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +10 3000168 . A <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,60,3600,0,0,22,484,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +10 3000169 . T <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,60,3600,0,0,21,441,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +10 3000170 . A <*> 0 . DP=1;I16=0,1,0,0,34,1156,0,0,60,3600,0,0,20,400,0,0;QS=1,0;MQ0F=0 PL 0,3,34 +10 3000171 . T <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,60,3600,0,0,19,361,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +10 3000172 . T <*> 0 . DP=1;I16=0,1,0,0,40,1600,0,0,60,3600,0,0,18,324,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +10 3000173 . T <*> 0 . DP=1;I16=0,1,0,0,35,1225,0,0,60,3600,0,0,17,289,0,0;QS=1,0;MQ0F=0 PL 0,3,35 +10 3000174 . A <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,60,3600,0,0,16,256,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +10 3000175 . A <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,60,3600,0,0,15,225,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +10 3000176 . C <*> 0 . DP=1;I16=0,1,0,0,36,1296,0,0,60,3600,0,0,14,196,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +10 3000177 . A <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,60,3600,0,0,13,169,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +10 3000178 . T <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,60,3600,0,0,12,144,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +10 3000179 . T <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,60,3600,0,0,11,121,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +10 3000180 . A <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,60,3600,0,0,10,100,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +10 3000181 . T <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,60,3600,0,0,9,81,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +10 3000182 . C <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,60,3600,0,0,8,64,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +10 3000183 . T <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,60,3600,0,0,7,49,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +10 3000184 . A <*> 0 . DP=1;I16=0,1,0,0,35,1225,0,0,60,3600,0,0,6,36,0,0;QS=1,0;MQ0F=0 PL 0,3,35 +10 3000185 . C <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,60,3600,0,0,5,25,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +10 3000186 . C <*> 0 . DP=1;I16=0,1,0,0,35,1225,0,0,60,3600,0,0,4,16,0,0;QS=1,0;MQ0F=0 PL 0,3,35 +10 3000187 . A <*> 0 . DP=1;I16=0,1,0,0,35,1225,0,0,60,3600,0,0,3,9,0,0;QS=1,0;MQ0F=0 PL 0,3,35 +10 3000188 . G <*> 0 . DP=1;I16=0,1,0,0,33,1089,0,0,60,3600,0,0,2,4,0,0;QS=1,0;MQ0F=0 PL 0,3,33 +10 3000189 . A <*> 0 . DP=1;I16=0,1,0,0,29,841,0,0,60,3600,0,0,1,1,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +10 3000190 . A <*> 0 . DP=1;I16=0,1,0,0,26,676,0,0,60,3600,0,0,0,0,0,0;QS=1,0;MQ0F=0 PL 0,3,26 diff --git a/src/htslib-1.21/test/index.vcf.gz.csi b/src/htslib-1.21/test/index.vcf.gz.csi new file mode 100644 index 0000000..2503396 Binary files /dev/null and b/src/htslib-1.21/test/index.vcf.gz.csi differ diff --git a/src/htslib-1.21/test/index2.sam b/src/htslib-1.21/test/index2.sam new file mode 100644 index 0000000..97d39e6 --- /dev/null +++ b/src/htslib-1.21/test/index2.sam @@ -0,0 +1,11 @@ +@HD VN:1.4 SO:coordinate +@SQ SN:1 LN:249250621 M5:1b22b98cdeb4a9304cb5d48026a85128 +@SQ SN:2 LN:243199373 M5:a0d9851da00400dec1098a9255ac712e +um1 69 1 1000000 0 * * 0 0 AAAAAAAAAA * +um1 137 1 1000000 44 10M * 0 0 AAAAAAAAAA * +um2 69 1 2000000 0 * * 0 0 AAAAAAAAAA * +um2 137 1 2000000 44 10M * 0 0 AAAAAAAAAA * +mu1 137 2 1000000 44 10M * 0 0 AAAAAAAAAA * +mu1 69 2 1000000 0 * * 0 0 AAAAAAAAAA * +mu2 137 2 2000000 44 10M * 0 0 AAAAAAAAAA * +mu2 69 2 2000000 0 * * 0 0 AAAAAAAAAA * diff --git a/src/htslib-1.21/test/index_dos.sam b/src/htslib-1.21/test/index_dos.sam new file mode 100644 index 0000000..b006aa7 --- /dev/null +++ b/src/htslib-1.21/test/index_dos.sam @@ -0,0 +1,190 @@ +@HD VN:1.6 SO:coordinate +@SQ SN:CHROMOSOME_I LN:1009800 M5:8ede36131e0dbf3417807e48f77f3ebd +@SQ SN:CHROMOSOME_II LN:5000 M5:8e7993f7a93158587ee897d7287948ec +@SQ SN:CHROMOSOME_III LN:5000 M5:3adcb065e1cf74fafdbba1e8c352b323 +@SQ SN:CHROMOSOME_IV LN:5000 M5:251af66a69ee589c9f3757340ec2de6f +@SQ SN:CHROMOSOME_V LN:5000 M5:cf200a65fb754836dcc56b24b3170ee8 +@SQ SN:CHROMOSOME_X LN:5000 M5:6f9368fd2192c89c613718399d2d31fc +@SQ SN:CHROMOSOME_MtDNA LN:5000 M5:cd05857ece6411f40257a565ccfe15bb +@PG ID:bowtie2 PN:bowtie2 VN:2.0.0-beta5 +SRR065390.17240207 16 CHROMOSOME_I 999901 42 100M * 0 0 ATGTTTACAGGACTTCAAGCAGAGGATTTTTCGATGATTGCCAAAAATTTTGGAACTTTTATAGGCTTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAG CACAC?CBBAA@?@?BADDBBDBBAB>DDDBBDDABBBCCADDDDDCBCBCCCDBDDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.15493040 0 CHROMOSOME_I 999912 42 100M * 0 0 ACTTCAAGCAGAGGATTTTTCGATGATTGCCAAAAATTTTGGAACTTTTATAGGCTTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDBCCBDBCCBDDA@>DC?5@?@@??:><<>8>39<37 AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.6144221 0 CHROMOSOME_I 999914 42 100M * 0 0 TTCAAGCAGAGGATTTTTCGATGATTGCCAAAAATTTTGGAACTTTTATAGGCTTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCDCCCCBDCDDBBDDBDBDD@BBB@DBABDB AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.8057275 0 CHROMOSOME_I 999916 42 100M * 0 0 CAAGCAGAGGATTTTTCGATGATTGCCAAAAATTTTGGAACTTTTATAGGCTTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTT CCCCCCCBCCC@CCCCCCCCCCC>BBB>BB?4CCCCCC;>====ACCCA@CCCBBCCBC;>@==>BBBBA?<;@<@######################## AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.24679913 16 CHROMOSOME_I 999917 42 100M * 0 0 AAGCAGAGGATTTTTCGATGATTGCCAAAAATTTTGGAACTTTTATAGGCTTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTTT ==56>??>AB?>D>?A?DBDABBB=BDBDACDBBCCDBBBBDDCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.25513175 0 CHROMOSOME_I 999934 42 100M * 0 0 ATGATTGCCAAAAATTTTGGAACTTTTATAGGCTTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCBC@CADCDDAABA=B?=A=B.>AA?AADA########################## AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.17492782 0 CHROMOSOME_I 999935 42 100M * 0 0 TGATTGCCAAAAATTTTGGAACTTTTATAGGCTTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCDCDCCDCCBDCDDBDDBDD@BBBBBBACBBAB=AB>BBBAB>?BA@CAAA? AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.17146364 16 CHROMOSOME_I 999942 42 100M * 0 0 CAAAAATTGTGGAACTTTTATAGGCTTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAG #######@/A@@<:BBBBB>ABBDADC@=DDBDDDCDCCCBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-3 XN:i:0 XM:i:1 XO:i:0 XG:i:0 NM:i:1 MD:Z:8T91 YT:Z:UU +SRR065390.14459471 16 CHROMOSOME_I 999944 42 100M * 0 0 AAAATTTTGGAACTTTTATAGGCTTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGC @@@@=B@CCCBAABACCC@DCCCCCDCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.6968616 16 CHROMOSOME_I 999947 42 100M * 0 0 ATTTTGGAACTTTTATAGGCTTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTA BDB>B@DDDD@DDDDBCACB@DCBCCACCCCCCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.9052825 16 CHROMOSOME_I 999952 42 100M * 0 0 GGAACTTTTATAGGCTTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACC ?B;DABDABDDBDDADCCCD@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.22926164 0 CHROMOSOME_I 999967 42 100M * 0 0 TTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBBCCBCCCCCCCCDCCDCDDDDCCDACDCADBDDBBCBCBCCABBA@BABABCBABC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.27108093 16 CHROMOSOME_I 999969 42 100M * 0 0 AAGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTACN ##########AAAAA388333-533')''+AA8AAAAAAAAAA8AAAAAA67788AAAA888887AAA5AAAAAAAAAAAA8AAAAAAAA+*++)))))! AS:i:-1 XN:i:0 XM:i:1 XO:i:0 XG:i:0 NM:i:1 MD:Z:99C0 YT:Z:UU +SRR065390.19145675 0 CHROMOSOME_I 999970 42 100M * 0 0 AGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTACCG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCBCCCCCCCCCCADCBDBBCBBBBBDCBABBBABAABB??DDAACCAACC>AC?C?= AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.22660118 16 CHROMOSOME_I 999972 42 100M * 0 0 CTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTACCGCC B9ABABDB>DBBBD8CBDCDBCDBCDBCBCCBCCCCCCCCCCCCCCC>CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.1589310 0 CHROMOSOME_I 999973 42 100M * 0 0 NTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTACCGCCG !++((22221AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA7A8AAAAAAAA8AAAAAAAAAAAAA7A7AA768655 AS:i:-1 XN:i:0 XM:i:1 XO:i:0 XG:i:0 NM:i:1 MD:Z:0T99 YT:Z:UU +SRR065390.32984687 0 CHROMOSOME_I 999978 42 100M * 0 0 GTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTACCGCCGCGCCT CCCCCCCCCCCBCCCCCCCCCCCCCCCCC@CC@CCCBCCCCCCBDACDCC>@B@CDBADB@BCBD@B=BBB@BD>C@BBCBACAABAB;D9<4:<66 AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.28347129 16 CHROMOSOME_I 999978 42 100M * 0 0 GTTATGTTTAGGCGTAGGCTTAGACATACGCTTAGGTTTCGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTACCGCCGCGCCT ##############################################@B?BB@A@ABBBDABD@DDBBB@@B;C@BACBC@CC@CCCCCBCCCCCCCCCCC AS:i:-10 XN:i:0 XM:i:5 XO:i:0 XG:i:0 NM:i:5 MD:Z:23G1T2G2G7A60 YT:Z:UU +SRR065390.17964692 16 CHROMOSOME_I 999984 42 100M * 0 0 TTTGGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTACCGCCGCGCCTGATCAA #####@<@=<53.830;>.?A5@@?ABAAADBDBC<@CB@D@BCB@CBCDCDBBDC=C@C@CAAC@C@ACCCCCCCCCCCCCCCCCCCCC AS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 NM:i:1 MD:Z:3A96 YT:Z:UU +SRR065390.16701032 0 CHROMOSOME_I 999987 42 100M * 0 0 AGGCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTACCGCCGCGCCTGATCAAACC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCACCCCCCCCCCDCBCCCCCCDCCBAA@BBBBBC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.24060716 16 CHROMOSOME_I 999989 42 100M * 0 0 GCGTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTACCGCCGCGCCTGATCAAACCAA @8>68BD?B??B@DB>ABB?BA@A=ADBCC@?AA@CCBBCBCCDBCDCCBCBCCC@CCCCBCCCCCCCACCCCCCCCACCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.24907628 16 CHROMOSOME_I 999989 42 100M * 0 0 GCGTAGGCTGAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTACCGCCGCGCCTGATCAAACCAA ################################BDDBB?BB>?>BADABBBDBDBABDBDC;?>9=C?B>CC@CCCCDCCCCCCCCCCCCCCCCCCCCCCC AS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 NM:i:1 MD:Z:9T90 YT:Z:UU +SRR065390.21366278 16 CHROMOSOME_I 999991 42 100M * 0 0 GTAGGCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTACCGCCGCGCCTGATCAAACCAAAG ######?9>A09=@?=>BBDBBBB8B>DBCDCCDCBCBCBDCCC@CCCCCCCBCCCCCCC@@CCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.27662957 0 CHROMOSOME_I 999995 42 100M * 0 0 GCTTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTACCGCCGCGCCTGATCAAACCAAAGAGTA CCCCCCCCCCCCCCCCCCC@ACCCCCCCCCCCCCCADCCCBC?CDDDDAC=BA?@B@DBDB>?>>D?#################### AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.29477959 0 CHROMOSOME_I 999997 42 100M * 0 0 TTAGGCTTAGGCGTAGGTTTAGGCTTTGGCTTAGGCCTATGCTAGGCCTAGTACCATAATACTATTCTTACCGCCGCGCCTGATCAAACCAAAGAGTAGG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC=AB?DAB@3=@8@=@?@ AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.13030274 16 CHROMOSOME_I 1000208 42 100M * 0 0 TCAATTAAACTGGACTACGACAATTATTGGGTTCAAACATTTGAAAATTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTC 955576>0@BBBBBBDBBD?DABDDDDCD@DCDDCCDCDDCACBACCCCCCBCCCCCCCCCCCCCCBCCCCCCCCCCBBCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.18054898 16 CHROMOSOME_I 1000209 42 100M * 0 0 CAATTAAACTGGACTACGACAATTATTGGGTTCAAACATTTGAAAATTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCC CAC@CAA?BC?D??BCABB8=>@@?#### AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.26866653 16 CHROMOSOME_I 1000217 42 100M * 0 0 CTGGACTACGACAATTATTGGGTTCAAACATTTGAAAATTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACC ###########??????4D;AA?AAD?A>>?CABCBABBBBAA@AD>ADAAC@CCCCBCCBCCC?CCCCCCCCCCBBCCCCCCCCCCCCCCCCCCCCBCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.23714265 0 CHROMOSOME_I 1000218 0 78M2I20M * 0 0 TGGACTACGACAATTATTGGGTTCAAACATTTGAAAATTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGAGATCGGAAGAGCGGTTCAGCAGGAA CCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCADDCCBBBBBDBBBB+=7=0?==>A#################### AS:i:-48 XN:i:0 XM:i:16 XO:i:1 XG:i:2 NM:i:18 MD:Z:75T0T1T0T0G0T2T0T0T3C0T0T0T0T0T1C0 YT:Z:UU +SRR065390.20744360 16 CHROMOSOME_I 1000218 42 100M * 0 0 TGGACTACGACAATTATTGGGTTCAAACATTTGAAAATTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCG #####@ABBBBDBD@BA@DCDBABBBBBDA>@CBBDBBAD=BBDCBACBCCCCCCCBCBCCCCACCCCCCCCCCBBCCCCCCCCCCCCCCCCCCCCBCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.3611567 16 CHROMOSOME_I 1000225 42 100M * 0 0 CGACAATTATTGGGTTCAAACATTTGAAAATTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTT #####@<2@=BBBBAC=DBBB@BBACBBBB=C;BBCCBACC@CCACCCCBCCCCCCBCCCCCCCCCCBBCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.10053218 16 CHROMOSOME_I 1000225 42 100M * 0 0 CGACAATTATTGGGTTCAAACATTTGAAAATTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTT @@=@6AA=AAC?CAC>BB>?A>>CBB@@CBAD>CC;>C@BC>A################################################# AS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 NM:i:4 MD:Z:66A7A14C2A7 YT:Z:UU +SRR065390.21951837 0 CHROMOSOME_I 1000229 42 100M * 0 0 AATTATTGGGTTCAAACATTTGAAAATTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCBCDCACCCCCCBCCB>AACCC@1/?@?CCC@@BABCB=?@@+:A?B###### AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.21381202 0 CHROMOSOME_I 1000232 40 100M * 0 0 TATTGGGTTCAAACATTTGAAAATTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGGTTTTTTTAGTTTTTTCTTTTTTCCCAATTTTTTTGGATA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCDCDC?=8@';4@AA############################################# AS:i:-16 XN:i:0 XM:i:8 XO:i:0 XG:i:0 NM:i:8 MD:Z:60A5G8C6A2G7A0G1A3 YT:Z:UU +SRR065390.22184926 16 CHROMOSOME_I 1000235 42 100M * 0 0 TGGGTTCAAACATTTGAAAATTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCT ??CAACCBAADD?DBB?@>BBB;BABBBBB@>CCCDBCDBACCCCAACACACCACCC@@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.17603173 0 CHROMOSOME_I 1000236 42 100M * 0 0 GGGTTCAAACATTTGAAAATTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTG CCCCCCCCCCCCCCCCCCCCCCCCACCCCCCCCDCCCCCCCCB>CAB@ACCC################################################ AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.17587471 16 CHROMOSOME_I 1000250 42 100M * 0 0 GAAAATTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTT 10?8;;?;AA??:AA@BBBBB?BDDDDDBCDA>@DDDCCCDACCCDDCCDCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.635026 0 CHROMOSOME_I 1000255 42 100M * 0 0 TTTTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTTCTGA CCCCCCCCCCCCCCBCCCCCCCBD@CCCCB0:>8:=BBBBC6:=7@>?B?B43/+2>@@/@########## AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.33333470 0 CHROMOSOME_I 1000257 42 100M * 0 0 TTTTGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTTTTGTTT CCCCBCCCCCCC?CCC?CCCCDBCADCCCCCA@@:;CCCC?7.)8;>???-3>>;A?3?6;/2;>?A:24775=4B<@@<4)+75:70(4@>::)9,B>BB?BBD:>BADDD=ABBBDDDBD@DBCCCDCCDBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 NM:i:1 MD:Z:0T99 YT:Z:UU +SRR065390.18670433 0 CHROMOSOME_I 1000260 40 100M * 0 0 TGGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTTTGAATATCTGGGGATTTTTCGTTTTTTTTTTTTT CCCCCCCCCCCCC>CCCCBBC4A@ACCC8@;5/8;A?A/6,>==AAC6<@################################################## AS:i:-14 XN:i:0 XM:i:7 XO:i:0 XG:i:0 NM:i:7 MD:Z:65A0G11A4C7C1G0A5 YT:Z:UU +SRR065390.5800524 0 CHROMOSOME_I 1000261 42 100M * 0 0 GGCCGACGTCGGAACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTTCCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTTCTGATTTTTT CCCCCCCCCCCCCCCCCDDC*/,0/??/<<508BAA@@BCBCAC?BAADBCD@@@CBCCBA9CCCACCCCCCCCCDCCCCCC?CCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCBBBCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.1793614 16 CHROMOSOME_I 1000274 42 100M * 0 0 ACGTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTTCTGATTTTTTTTCAATTGTTTTT A:CAADB=DBDD@CBACC>@CACCCCCCCDCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBBBCCCCCCCCBBBCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.20107270 0 CHROMOSOME_I 1000276 42 100M * 0 0 GTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTTCTGATTTTTTTTCAATTGTTTTTAG CCCCCCCCCCCCCCCCCCCCCCC@@CCCCCCCCCCCCCCCCADDCCCCCDCC?ACACDCCCCC@CCCDCD@BCDCBB3>B@BCCC@@9=3BB?@B@>85; AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.8268806 16 CHROMOSOME_I 1000276 42 100M * 0 0 GTCTCACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTTCTGATTTTTTTTCAATTGTTTTTAG ##########D?:BBA>;BBABBAABBBBBDDB>DDDDBDCDDCDCDDCCCDCCCDCCCCDCCCCCCCCCCCCCCCCC@BBCCCCCCCCBBBCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.19264263 16 CHROMOSOME_I 1000280 42 100M * 0 0 CACTTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTTCTGATTTTTTTTCAATTGTTTTTAGATCC ##BB?>CBABBB?:BBBBABABABB@DBCBBDAABDCCCCCCBCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCBBBCCCCCCCCBBBCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.18391831 0 CHROMOSOME_I 1000283 42 100M * 0 0 TTTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTTCTGATTTTTTTTCAATTGTTTTTAGATCCCCC CCCCCCCCCCCCCCCCBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC?ACCCCCCCBCC@CC8BBCCCCCB@>A>CCCDDC@@@DBBBC?:CCDBAC;CDDDDCBDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBBBCCCCCCCC@BBCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.24029537 16 CHROMOSOME_I 1000284 42 100M * 0 0 TTTTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTTCTGATTTTTTTTCAATTGTTTTTAGATCCCCCA DB>B8BB<9;?>ABDDAADB@DD@C@BBAABBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBBBCCCCCCCCBBBCCCCCCCCCCCCCDCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.28630205 0 CHROMOSOME_I 1000286 42 100M * 0 0 TTCCTGATTTTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTTCTGATTTTTTTTCAATTGTTTTTAGATCCCCCACG CCCCCCCCCCCCC@BCCCCCCCCCCCCCCCCCDBCCCCCDDBBBCBCDCDB@=?BBBBDBBABBBBBB@@CBBDB>>>A>BCBCCB:;:>=<9:@A#### AS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 NM:i:1 MD:Z:98A1 YT:Z:UU +SRR065390.15799530 0 CHROMOSOME_I 1000295 42 100M * 0 0 TTTGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTTCTGATTTTTTTTCAATTGTTTTTAGATCCCCCAAGCCTAAGCCT CCCCCCCCCCCCCCCCCCCCCCAACCCCCCCCCACCBCBCCCC?B@CCCCB@93=@B5>BB>>3/77:7:B>CDBDDB@>;B>BBBBDACAAB@D@<9<9<7 AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.22494349 0 CHROMOSOME_I 1000297 42 100M * 0 0 TGTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTTCTGATTTTTTTTCAATTGTTTTTAGATCCCCCAAGCCTAAGCCTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACDCCCCCD@@CCDCDCBBDCDDDBADDDDCD>B;@>DAABBB@>5A>BDBB?6??@D?9@####### AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.12445253 0 CHROMOSOME_I 1000298 42 100M * 0 0 GTAGTTTTTCCTTTTTACCGAATTTTTAGGAATATCTGGGAATTTCTCGTTTTCTGATTTTTTTTCAATTGTTTTTAGATCCCCCAAGCCTAAGCATAAC CCCCCCCCCCCCCCCCCCCCACCCCCCCCC@DCCCCCCCC?BACCBC@CBDCCACB?BBBCDC@@;4BCBABDC@B56?B@96=4A>BAB;;5;:@19A;@;;;6?BBBBB3BBB??@@@>@BBB;@AA@9@AA9BABBBAA@@AABAABAB@BB:;??>:?DBAB?BBDDBBABB;ACBDB?BBB@CCCBDD@CD@CCDBCDDDCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.27194079 0 CHROMOSOME_II 2920 42 100M * 0 0 CTAATTTTCAGAGAGACTGAAAGAGTTTAAAAGTTCTACCGACCACATCGAACCTACTCAAGCTAATAGAGTATGGACAATTGTGAACGGAGAGGTTCAA CCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCC=BBBCB?BBBA?BBBDB?>BB=CBCCAACAC;DAB=ACAC?##### AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.21775125 16 CHROMOSOME_II 2934 42 100M * 0 0 AACTGAAAGAGTTTAAAAGTTCTACCGACCACATCGAACCTACTCAAGCTAATAGAGTATGGACAATTGTGAACGGAGAGGTTCAATGGAAGACTCCACC #####ABA?=<<=5=@BBA?=@>:A:7.44?B?8B@@>BBB=@B?ADBBBCBBACBD9CBD?A9?=A?.AABADDABBB@BABDDBACBBCCDCBCCDCCCCDCCCCDCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.3790175 16 CHROMOSOME_II 2944 42 100M * 0 0 GTTTAAAAGTTCTACCGACCACATCGAACCTACTCAAGCTAATAGAGTATGGACAATTGTGAACGGAGAGGTTCAATGGAAGACTCCACCGCGGTAAGTG 8BDD@:=7)/>B>ABBB?BB?>?DB@B:BBB?BBADDC@BDCDDCDBCDCCCBADCCCCCCCBCCCCCCCCCCCCCCCCCCCDCCCCDCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.4091455 0 CHROMOSOME_II 2946 42 100M * 0 0 TTAAAAGTTCTACCGACCACATCGAACCTACTCAAGCTAATAGAGTATGGACAATTGTGAACGGAGAGGTTCAATGGAAGACTCCACCGCGGTAAGTGTG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC?CCCCCCCCCCDCCCCBCCCDACBCDCACC@C@CA@CBAAD=BBAADD06@##### AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.8676436 0 CHROMOSOME_II 2947 42 100M * 0 0 TAAAAGTTCTACCGACCACATCGAACCTACTCAAGCTAATAGAGTATGGACAATTGTGAACGGAGAGGTTCAATGGAAGACTCCCCCGCGGTCCGTGTGC <:>:>/000/:<<:BAB?>8A?A;:A873;3?>?>A>>A8B############################################# AS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 NM:i:4 MD:Z:84A7A0A5T0 YT:Z:UU +SRR065390.28734084 0 CHROMOSOME_II 2948 42 100M * 0 0 AAAAGTTCTACCGACCACATCGAACCTACTCAAGCTAATAGAGTATGGACAATTGTGAACGGAGAGGTTCAATGGAAGACTCCACCGCGGTAAGTGTGTT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCBCBCDCBCCCBBDDDCADABADBBABB:BB=D?B<@B@>CA?CA>BACADAA########### AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.10526869 0 CHROMOSOME_II 2956 40 100M * 0 0 TACCGACCACATCGAACCTACTCAAGCTAATAGAGTATGGACAATTGTGAACGGAGAGGTTCAATGGAAGACGCCACCGCGGGGAGGGGGGTTGTTTTAT CCCCCCCCCCCCCCCCCCCCADCBBDDDDDDDBBB8BA@B>6<:>9=789=0>D>AA<@<8B>1>A9>;@5=@8C:48;*AAA=<>9>9>>:>>AB?D>BBDBCBDBBCCBABBB>@CDCCBCDCAACCCCCACCCCCCCCBCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.10879394 16 CHROMOSOME_V 938 42 100M * 0 0 TATGTTTTTCTTGAAAATGTTATCAACACTGATAATCTGAAAAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCA B;B:B>@B?>@>7BBDABADADBBCBDCCBACBCCBBB@CCCCCBCCACACCCCCC>CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.1520161 0 CHROMOSOME_V 941 42 100M * 0 0 GTTTTTCTTGAAAATGTTATCAACACTGATAATCTGAAAAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATG CCCCCCCBBBCCCCCCCCCCCCCCCCCCCCACCCCCCDCC@CCCCCCCCCCCCCCCCCCACCCBCCAD=D@BC?C?C?C##################### AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.17468019 16 CHROMOSOME_V 943 42 100M * 0 0 TTTTCTTGAAAATGTTATCAACACTGATAATCTGAAAAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAA >ABBBABBDDDB=DBCD?DDBDBDADDADDBDCCCCCCC=CCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.12403970 0 CHROMOSOME_V 949 42 100M * 0 0 TGAAAATGTTATCAACACTGATAATCTGAAAAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCC@CBBCACBC@?144:>><@@DAB?:=9@<>/>9?;=927= AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.16193993 16 CHROMOSOME_V 949 42 100M * 0 0 TGAAAATGTTATCAACACTGATAATCTGAAAAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTC ;;/67AAC@ADCCDBCDCCCCCCD@CCCCCCCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCDC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.30032741 16 CHROMOSOME_V 950 42 100M * 0 0 GAAAATGTTATCAACACTGATAATCTGAAAAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCA AACBBAB?BB>BABBCDBBDABDBADDDDBDDBBADDDDBACCDCBDDDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.32455256 0 CHROMOSOME_V 956 42 100M * 0 0 GTTATCAACACTGATAATCTGAAAAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGGTATAATACAGCGACTCAATGAAAAAATCAAAAAAA CCCCACCCCCBB=?ABB?BBA?BAABBBBBB@BBABBBBBBBBBBBBBAA@BBBBBBB>B######################################## AS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 NM:i:2 MD:Z:64T25C9 YT:Z:UU +SRR065390.15571530 16 CHROMOSOME_V 966 42 100M * 0 0 CTGATAATCTGAAAAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTT B;:B;B?D?@?BBBB5-=<:@@AA@BBA>BBADBBDDDDCDCDCDBBDCCCDCCCCCCCCCDCCCCCDCCCCCCCCCCCACCBBBCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.9595122 0 CHROMOSOME_V 967 42 100M * 0 0 TGATAATCTGAAAAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCC?@:AAA>C@CBB@@>?B=A?BBBBBCB>@/@>=>=>BB# AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.3600239 16 CHROMOSOME_V 969 42 100M * 0 0 ATAATCTGAAAAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTGCA DB>DBBBBA@AAB?DCA@CB@ABB@BB=AAAA>@==>>6/>:>5:688/85A?AAA>>657==BBB<;;;9>>8>>BBBB> AS:i:-6 XN:i:0 XM:i:2 XO:i:0 XG:i:0 NM:i:2 MD:Z:28T25T45 YT:Z:UU +SRR065390.31266674 0 CHROMOSOME_V 971 42 100M * 0 0 AATCTGAAAAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTTACTTTGCACG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCADDCCBC@CBC5<5<7?:83;+471/0<4=8;??BBD(.94;9?@?################ AS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 NM:i:1 MD:Z:89G10 YT:Z:UU +SRR065390.23187971 16 CHROMOSOME_V 972 42 100M * 0 0 ATCTGAAAAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTGCACGC 647:0BBB?B==@?@@BDBDBBBDDDBDDBDBDDDCBCCCCBBCCCCCDCCBCCCCCCCCCCCBBBCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.28661392 16 CHROMOSOME_V 975 42 100M * 0 0 TGAAAAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTGCACGCTAT ACCACAA5BDABAA>BDBDBDCBCBA@DBDB>DBBBBBAABDBDBDDBCCCCDCCCCCDCCCCCCCCCCCCCCBBBCCCCCCCCCCDCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.1859967 0 CHROMOSOME_V 979 42 100M * 0 0 AAATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGCAAAACTCAAAAAAAAGTTGACTTTGCACGCTATGGTT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCA>=>C<@@;:@A@A=53@?AB::?@CCACC=B/<;53;7BB:>B=::=A@?@?ACCC>C@CCCCCB:/&-7735@B7B>B?;@@CC@35A@@CCBC@######################### AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.12435485 16 CHROMOSOME_V 981 42 100M * 0 0 ATTATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTGCACGCTATGGTTAA B<=BB>B@>>BBBD@>?DABBBBBDDDDDDDDCADCDCCDCCCDCDBCCCCCDCCCCCCCCCCCCCCBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.7485987 0 CHROMOSOME_V 983 42 100M * 0 0 TATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTGCACGCTATGGGTAAAA C@?C@CCCCCCCCCCCC@CCCCC@?C8CCC@BC?@CC############################################################### AS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 NM:i:1 MD:Z:94T5 YT:Z:UU +SRR065390.17264189 0 CHROMOSOME_V 983 42 100M * 0 0 TATAATTTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTGCACCCTATGGTTAAAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC=C=/////=?5=;:@8???AA############################ AS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 NM:i:1 MD:Z:87G12 YT:Z:UU +SRR065390.6356855 0 CHROMOSOME_V 986 42 100M * 0 0 AATTTAAAACTTAAACGAAGCTAAAATGTGGCTGGTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTGCACGCTATGGTTAAAAAAA CCBCCBCCCCCCCCCC@CC?@CCCCC@CCC>A=@.88/45+()/.=>2==BBCB659?9?'))10;9??############################### AS:i:-7 XN:i:0 XM:i:3 XO:i:0 XG:i:0 NM:i:3 MD:Z:34T63T0G0 YT:Z:UU +SRR065390.20107175 0 CHROMOSOME_V 989 42 100M * 0 0 TTAAAACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTGCACGCTATGGTTAAAAATGAAT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@C@?./..):3872A=@=A<=:;=B>B>>87777@>&@9A@@@8:@>88 AS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 NM:i:1 MD:Z:87T12 YT:Z:UU +SRR065390.6431660 16 CHROMOSOME_V 994 42 100M * 0 0 ACTTAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTGCACGCTATGGTTAAAAATGAATGAATT AACBABABDC@@ADABBDDCDCDBCDDDCCDCDBCACCCBCCDCCCCCCCCBCC@@@CCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.4439503 16 CHROMOSOME_V 997 42 100M * 0 0 TAAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTGCACGCTATGGTTAAAAATGAATGAATTCTT ###########B>:AAAAA@C@=;937<ACCC8@@@AABCC>@+/662BBBC?B>BBB?BBBB#################### AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.11492188 0 CHROMOSOME_V 998 42 100M * 0 0 AAACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTGCACGCTATGGTTAAAAATGAATGAATTCTTG CCCCCCCCCCCCCCCCDCCCCC>A@AAAAAACA??B@@BBD>BACACC08;;AAACB==/*/1//:=@99BBABA@;<@;<:9>>B??>B??:?6B??B9 AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.9605367 0 CHROMOSOME_V 999 42 100M * 0 0 AACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTGCACGCTATGGTTAAAAATGAATGAATTCTTGG CCCCCCCCCCCCCCCDCCCCC@C<>>A9<4=9>=B###################### AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.29302896 0 CHROMOSOME_V 1000 42 100M * 0 0 ACGAAGCTAAAATGTGGCTGTTATAATACAGCGACTCAATGAAAAACTCAAAAAAAAGTTGACTTTGCACGCTATGGTTAAAAATGAATGAATTCTTGGT CCCCCCCCCCCCCCDCCCCC6?:??AABCCCC8?C@BCCCC@@5;><9>>>B>>AB=<)6=4:):9>>@@################ AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU +SRR065390.13754 4 * 0 0 * * 0 0 TCGCTGCTGTGATGTTGCGTTTTTATCAGCACAAAGGCGGTCAGGCCGAGGCCTATTTTTTCCGGATCCAGCAGGGCGACTTTGCCGATAAGGATACCGT CCCCCCCCCCCCCCCCCCCCCCCCCBBCCCCCCC@CCCCCDCCCCCCCDCCCC@ACCCC@>>CCD?>>>@@@ YT:Z:UU +SRR065390.13765 4 * 0 0 * * 0 0 CGTGGTCGTGCCGGTTACAAGCCTGCCGTGAAAAGCCGTTTCAGTAAGTCAGCCAATAGCAAATTCTCCCATACTATCGCTTTTGCCTGATCCTGAACTT CCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCDCCACCCCCCCC@CCCCCCCCCACCCBB@?CBDABDDADB<=ABBB@B@BB@ YT:Z:UU +SRR065390.13778 4 * 0 0 * * 0 0 TTTTATACCAACAAAAAACGGAAAGCAGATAACCCAGCAGCCCGAGTAACAGTATCCGGGCATCCAGGCCAAAAGCTAACAGAGCCGCGATAAAATCCCA CCCCCCCCCCCCCCBBBBBCCCCCCCBCCCCCCCCCBCCCCCCCCCDCCCCCACCCCCCCCCDCCCCDCBCA@AC>@=@CC?B>CBBCC>=?8A8=?>66 YT:Z:UU +SRR065390.13779 4 * 0 0 * * 0 0 ATAATGGACAACTTTAATGGCAATCACTAAATCAACTCCGGCACCATTAACCGGTGGGACGTTATGGTGCGTCACTATTGCATTGTCATTAGCGACATTT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCC=CC?BCCBCCACBABCCDCDBDADBBCDCBD>DBBD==BAA:>5<> YT:Z:UU +SRR065390.13802 4 * 0 0 * * 0 0 AAGGCGTTTATTATATACACTCGCATGGCTTTTCTTCTGAAAATGTAGAATAATTGAGTAATTTTTAAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAG ??B:??????BBB>B99;;;>B>>>:BBBBB;;B=70///0-/01/BA>BABB>B>BBB@BB>>ABABA>BA>6BB88183,<8;<4>:@><>A>> YT:Z:UU +SRR065390.13808 4 * 0 0 * * 0 0 GTTTGCTGACTGGCCAGCCAGCTCAAGGCATCAAAAGCGTCTTTGAGAGGGAAAGGAATATCGATAACCCGAATGCCGGGTGGCGCCGGAATCTCTTGCG CCCCBCCCBCCC@CCC@CCCAA:A=BB?BBBB@>B>A#################### YT:Z:UU +SRR065390.13853 4 * 0 0 * * 0 0 CTGGTACGTCACCACACGCCGCGATGGCGTCATCCACCGACTTCACCCACGTTACGCGATCGTCCGTACCCGGGTGACCGTTGGGGATAATATTTTTGCG #################################################################################################### YT:Z:UU +SRR065390.13861 4 * 0 0 * * 0 0 TTCAGAAACTGGATGAACAGTGCGCAGCCATCTGCAAATATGAATTAGTTCAAGTCACTCAAAAGCTATTTATTTGAATGGAAGAAATTTTTGAACTATA CCCCCCCCCCCBCCB@@CCCBCCBCCCCCCCCCCCCC?CCCC@CCC@C@CCC@CACCBCC?BBBC@C7CBCBCB@@ABCCBBBC=BABCCBBBBAB@@CA YT:Z:UU +SRR065390.13907 4 * 0 0 * * 0 0 CATTACCATTCAGTTGTATTGTTTGCGCACCAGAAAAATGAGACTGCACAGAATAAATTATACTGACCAGAAATTGTAAAATTCGTATATTCTTATTCAT 8998;9:;9;>9:9>?BABBAAA2A@@@@@>:3'3A################################################################ YT:Z:UU +SRR065390.13946 4 * 0 0 * * 0 0 TTTCCTCGAGTTCTTGATGAAATGGTCCATTATTTGTCAACCATTTATTTTTCCATATTTTTTCCAGGTAAGGCATGAATTCTGCAAGTTCCGGCAAAGA CCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCBBBBCACCCCCCB@BCCCCBC?CCCCACCBCBCCCCC@BBDCDDBCA4@@A YT:Z:UU +SRR065390.13956 4 * 0 0 * * 0 0 CGGCGCAACAATACTCAGCAGTTAATTGCAAAGGTATCGCACACCATTAAAAGCATTAAGCCGGGAGTCGAATTTGGTGTTAGCCCGGCAGGCGTGTGGC CCCCCCCCCCCBCCCCCCCCCDCCCCCCCCCCCC>CCCCCCCCCCCDCCCCBDCCDDCBDCC@?@BA@B@B>BBABAABB6?BB>B@?B??2?=+>->60 YT:Z:UU +SRR065390.13964 4 * 0 0 * * 0 0 NTTGAGGTGCTCCAGTGGCTTCTGTTTCTATCAGCTGTCCCTCCTGTTCAGCTACTGACGGGGGGGTGCGCAACGGCAAAAGCACCGCCGGGCATCAGCG !))))++++*AAAAA8AAAA################################################################################ YT:Z:UU +SRR065390.13969 4 * 0 0 * * 0 0 CGGGCGATAGTCAAAAACTTATTTTCACAATTTTCGGCTAGGGAGTATATTTACAGTTAATTTGCGATGTGTTAGATCGGAAGAGCGGTTCAGCAGGAAT CCCCCCCCCCCCCCCCCCCCCCCCBBCCCCCCCCCCCCCCCCCCCACCCCCCCCCCDCCCACCCCCDCCACBCCCCCCBCBBCDCBCC?BCBBCBCBC;A YT:Z:UU +SRR065390.13978 4 * 0 0 * * 0 0 AGACGGTAACTTTCAATTTGCACCCATGATTAAATTTTATGTTGATTAAAATAGAAGCAAAAATCATTACATTACACTACAAAATACGCCGAAATGTTAA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBACCCDCBCCCABCADCCAABC? YT:Z:UU +SRR065390.13985 4 * 0 0 * * 0 0 TAACCAAAAACTGGATTATGCAAATAACTAAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATATCGTATGCCGTCTTCTGCTTGAAAAAAAAA CCCCCCCCCCCCCC?CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCBCCDCDCCD@CBD5>@=:=><9A@3>=B?BB>CBACB?BBA YT:Z:UU +SRR065390.14000 4 * 0 0 * * 0 0 TAGGTGAGAAAAGCGTTATTGGTCCGGTATACCTGCGAAGCGACAAAGCAATAAGGCAACAATGGCAGGTAATGCTGCTCAAAAAAGCGTTTACTGATCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCBADCCCB?@B>B@BADAAABBD@C5;B9?:?;ACABAB YT:Z:UU +SRR065390.14032 4 * 0 0 * * 0 0 GAAGGTCCAAGTGCCTTGAAGATAGAAAATTATAGCATTTCTCTTTAATTTCAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATATCGCATGA CCCACCCCCC>AAAACBCCCCCCCBCC?CCCCCCCCCCCCCCCCCCCCCCCB@CCCCCCBAC@CDCA/@B<;8=?@B>BC>?>?BB=:A########### YT:Z:UU +SRR065390.14061 4 * 0 0 * * 0 0 TGAAGCCGACAATTTGAGGCCAAACATCTTACATTCGACAGTAAATATTTGGGGATTAAGACTTATGTTAGATCGGAAGAGCGGTTCAGCAGGAATGCCG CCCCCCCCCCCCCCCCCCCCCCCCCCC=CCCCCCCCCCADCBCCCC=CCCCCCCBCCC=CBCCCCCCCABCCCCCCBACBC@CCBB;@B;?A@A@=?99A YT:Z:UU +SRR065390.14072 4 * 0 0 * * 0 0 TGAGTGAGGCTCAGGATTTTGAGTGAGGCTCAGGATTATGAGTGAGGTTGAAGAATTTGAGAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGAG @B=@@BB@B@<@BB>BB>>@BB@==2;:;8BBBBBB@B@@@:@?1B@B@B@3@@@>3;@;@<@?>;@B@@##### YT:Z:UU +SRR065390.14100 4 * 0 0 * * 0 0 AAGCCTGAGGGATAATTTTCGTCAAATTAAGGCAATTGCCGAGTGTTTCATCCCTGGCAAGCAGAACGGCTTTTTCGTTATTTATATCGGGAGAATTTAT CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCBCDCDCDCCCCDBBCDADBBBABDDBBBBBBBBDCBCD?BABB>B>AB>BCABAA>C YT:Z:UU +SRR065390.14105 4 * 0 0 * * 0 0 AAATTGTCCCCAAATAAAACAATTCCAGTGATCTTCCGATTCTAGGTGCCAAATAACCCAAATAGTCACTGCATTAGTTTTTATCTCACTTTTCTCCCCC #################################################################################################### YT:Z:UU +SRR065390.14107 4 * 0 0 * * 0 0 TGAAATTTCAAGAAAAATGTTAATTACCACCGTATTAAAAAAAAAAAACTTAAAATCAAAGATCGGAAAAGGGGTCAGGCAGGAATGCCAAAACCGACAC CCCCBCCCCCCCBCB>>>ACCCBCACCC?CCCCCCCCCCC?B########################################################## YT:Z:UU +SRR065390.14137 4 * 0 0 * * 0 0 CTGTGGCGTTTTTATCAAATTGGCAGAGCCACGTTCAGAGCTGAAAAAGCCACAGAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTTGGA BCCC@CCCCC?CCC>>CCBCCC?>C@CCCB=6?AA>=>3?>@?@86;86.@A@==378::68829>B9B############################### YT:Z:UU +SRR065390.14141 4 * 0 0 * * 0 0 GGTCACCAATCATAAGAGGAACAGCGACTGCACCTGCGTACATGACAAGGACGTGTTGCAGACCGAGTATGATCAGCTTTCCTGGTGATAGTATGCGCTC AAA@A?AA8:>A######################################################################################## YT:Z:UU +SRR065390.14162 4 * 0 0 * * 0 0 ATACTTCACCGGATGGTGGAATTAACGAAAACAACAACTGGTGTCACATCCCGCAGGCAAAAGAGGCAGCGGCTAACTAAGCGGCCTGCTGACTTTCTCG CCCCCCCCCCCCCCCC@CCCCCCCCCCCCCCCCBBCCCCCCDCDCCCCCCCCCC?C?CCCCCACD@CAD@AB<>@CB;6B#################### YT:Z:UU +SRR065390.14168 4 * 0 0 * * 0 0 TCGAGGGTGAGGGCGTCTGCCAGATCGGAAGAGCGGGTCAGCAGGAATGCCGAGACCGATATCGGATGCCGTCTCCTGCTGGACAAAAAATGAGAATGGG AACC@0@>@6:<>??>?BBBBB?+B6BBB>B?B=:?BBB=BBBBB>B######################################## YT:Z:UU +SRR065390.14173 4 * 0 0 * * 0 0 AAGAAACTCAACAAACCGGACTTGCAGGTGAAACTGATTCCGATTACCTCACAAAACCGTATTCCACTGCTGCAAAACGGCACTTTCGATTTTGAATGTG CCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC=@CCCCCCCCAA?ABC@CCA=CCCABDCCAABDD?BB@BBA YT:Z:UU +SRR065390.14178 4 * 0 0 * * 0 0 GCGCTTTGTTTACCTGATACGGAATTTCGTGGACGATAATGGTTTCACGACCGGTTTTGGCGTCAACTTCCACTTCTGCGCGAGCGCGGATATACACCTT BCCCCCCCCC@@CCCCCCCCCCCCCCCCCCA=AA>AAA=ACCBCCCCC?CCCCAB@ACCC?A?<CB?=CAB9B@BA################### YT:Z:UU +SRR065390.14182 4 * 0 0 * * 0 0 ATTTACTCTAATGTTCTGAAAAATAATTTACTCTAATGTTCTGCCAAATAATTTACTCTAACGTTCTGCCAAATAATTTACTCTAATGTTCTGCCAAATA CCCCCCCCCBCCCCCCCBCC@CCB@@@BCCCCCCC@CCCCCCBBCCCCCCBCCCC@CCC?CCC>>CCBCCCCA@CCCC;CBCCBDCCB@CCBCAACB@BB;B?B0B=8??9>??BB>B?@?B>A>A########## YT:Z:UU +SRR065390.14197 4 * 0 0 * * 0 0 GTACCTCGCCGTTGTTCTCGACCTGTTCGCAAGAAAACCAGTGGGCTGGGCCATGTCGTTCTCGCCGGACAGCAGGCTCACCATGCAAGCGCTGGAAATG CCCCCCCCCCCCCDCBB=B@?BB@BBBBBB@@@B@B==BBB9B@@@@B@=BBB@BBB=@BBABBB@@@BB<@BA@BBB=B;B?BBACA YT:Z:UU +SRR065390.14284 4 * 0 0 * * 0 0 CGGTGCATGATGCGGATTCCAGGAATCAACGTACAGCGTCGGGCTAAACCAGAACCAGCCAATAATGCACAGACCGACGACCGGAATAATAACCCCCCAC BCACCCCCCCCBCCBCCCCCCCBCC@CCCCCCCAC@CBACCACCCCC@CBCCCCCCCA8CC?A@9@AB@9CACC8=81B@CC9CCCCCCC,<8??CBC@BB?@C@ACBCB################################################## YT:Z:UU +SRR065390.14312 4 * 0 0 * * 0 0 ACAGTAACATTCAACGTTAAATATGTTAATAAGACGTTGCATTATTGTCCTGAAGTTGAAGATAGCAGGTATGGCGGTTGGATAGCACGGCGTTGGTTTA CCCCCCCCCCCCCCCCCCCCBCCCCCCBCCCCCCCCCCCCCCCCCCCBCCCCCDCCCCCCCCCACCBC@?CACBC######################### YT:Z:UU +SRR065390.14331 4 * 0 0 * * 0 0 GAATAATGAAGATGATGCGACGCGTCTGGCGCGTTTGAACGAACGCTTTAAACGCGAAGGTAAACCGGAGTTGAAGAAGCTGGATGATCTACCTAAAGAT CCCCCCCCCCCCCCBCCBCCCCCCBCCCC@CCCB@CCCCACCDDCBCC?CAC@B@DABA?BAB@@@?C?C@BC?9A::>=@@C;?############### YT:Z:UU +SRR065390.14335 4 * 0 0 * * 0 0 TCCATTTGATGAACCTGAAGTTTAAGTATTGACTTGAGAGGAAAAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCT CCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCC@BBA>CCCCCCCCBBCCBC=CBCCCB################### YT:Z:UU +SRR065390.14342 4 * 0 0 * * 0 0 AAGTTCATGAATTAAAGCCGACTCAAACACTCTGTTTAAAAACTGGATAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGAGATCGTAGGCCGTC 0000079;9;AAAA?;;;>9>3>9BB8BBBB@############################ YT:Z:UU +SRR065390.14359 4 * 0 0 * * 0 0 GCATCAGTACGATAAAACGCGTACCGAACTACTGAATGATGTCGCAGGGGCGCTGGCTCTTGATGACAAACTCGGACGTAGCACCAATCAACTTTCCGGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCBABACCBB@BBBBBB>BDBBDBBB>B@@@>>?BCBAC?CBC?> YT:Z:UU +SRR065390.14364 4 * 0 0 * * 0 0 GGTCGCCGATCCGATTTGCACTTTAACCACTTTCGGTAAAGAAACCGTTGTTAGTGAAAGCGAAAAACGCACAACGACCACTGATGACCCGCTACAGGTG CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@@CCCCCCCCCBCCCCCCCCCACCDACCBCACA@CACCAA=B=CBC=ACAAAC=)>? YT:Z:UU +SRR065390.14392 4 * 0 0 * * 0 0 GTTATCCTTTTCCGTGATATGTGCGGTACTGCAGCGTATGCCGGCAAGGGTTGCAAACGGTGGTAGTGTGCAGGTTGACTGTTGGTCGGATTCCTCCACC CCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCDCCCCCCCCCBCCCA@CCB@AACB?CB?BA=A8@BACB?:===@C@-A6==?@@<@@AA##### YT:Z:UU +SRR065390.14393 4 * 0 0 * * 0 0 AGAAATTTACTGGCTCGCCGCAGCCAACTCCTCTTCTGACACCCCGGTAAAGCGCATGATGTCTGTAAGAGGGGCCCCGGATTCAAGCATTATTTTGGCT CCCCCCCCCCCCCCCBA9::<4A>AAAA:?A#################################### YT:Z:UU +SRR065390.14434 4 * 0 0 * * 0 0 GGTAGATTCCCATAAAAATCGCCAGCGGAATGGTGAACGCAACGGTATACGTTCCCCACGGGCTATGAGTCAGGGCTTTCACCACGATCATCGCCAGTAC DCACCCBCCCCCCCC>CBBCCCCCCCCCCCCCC?CCCCCCCCCCCACACCC@BCCCCBCD=ABB@BCBD?@@B6BC8B@B>BABCBB@AB=@2C###### YT:Z:UU diff --git a/src/htslib-1.21/test/longrefs/index.expected1.vcf b/src/htslib-1.21/test/longrefs/index.expected1.vcf new file mode 100644 index 0000000..e0e7f91 --- /dev/null +++ b/src/htslib-1.21/test/longrefs/index.expected1.vcf @@ -0,0 +1,6 @@ +1 10010000100 . C <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,29,841,0,0,9,81,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000101 . T <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,8,64,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000102 . T <*> 0 . DP=1;I16=0,1,0,0,36,1296,0,0,29,841,0,0,7,49,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000103 . G <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,29,841,0,0,6,36,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000104 . G <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,5,25,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000105 . A <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,29,841,0,0,4,16,0,0;QS=1,0;MQ0F=0 PL 0,3,29 diff --git a/src/htslib-1.21/test/longrefs/index.expected2.vcf b/src/htslib-1.21/test/longrefs/index.expected2.vcf new file mode 100644 index 0000000..fed1109 --- /dev/null +++ b/src/htslib-1.21/test/longrefs/index.expected2.vcf @@ -0,0 +1 @@ +1 10010000110 . G 0 . SVTYPE=DEL;SVLEN=-890;END=10010001000;QS=1,0 PL 0,1,45 diff --git a/src/htslib-1.21/test/longrefs/index.vcf b/src/htslib-1.21/test/longrefs/index.vcf new file mode 100644 index 0000000..e861ed1 --- /dev/null +++ b/src/htslib-1.21/test/longrefs/index.vcf @@ -0,0 +1,216 @@ +##fileformat=VCFv4.2 +##FILTER= +##reference=file:10_gig_at_front.fa +##contig= +##ALT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##ALT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT ERS220911 +1 10009999919 . G <*> 0 . DP=1;I16=1,0,0,0,26,676,0,0,60,3600,0,0,0,0,0,0;QS=1,0;MQ0F=0 PL 0,3,26 +1 10009999920 . T <*> 0 . DP=1;I16=1,0,0,0,34,1156,0,0,60,3600,0,0,1,1,0,0;QS=1,0;MQ0F=0 PL 0,3,34 +1 10009999921 . A <*> 0 . DP=1;I16=1,0,0,0,33,1089,0,0,60,3600,0,0,2,4,0,0;QS=1,0;MQ0F=0 PL 0,3,33 +1 10009999922 . A <*> 0 . DP=1;I16=1,0,0,0,34,1156,0,0,60,3600,0,0,3,9,0,0;QS=1,0;MQ0F=0 PL 0,3,34 +1 10009999923 . T <*> 0 . DP=1;I16=1,0,0,0,35,1225,0,0,60,3600,0,0,4,16,0,0;QS=1,0;MQ0F=0 PL 0,3,35 +1 10009999924 . C <*> 0 . DP=1;I16=1,0,0,0,35,1225,0,0,60,3600,0,0,5,25,0,0;QS=1,0;MQ0F=0 PL 0,3,35 +1 10009999925 . C <*> 0 . DP=1;I16=1,0,0,0,36,1296,0,0,60,3600,0,0,6,36,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +1 10009999926 . C <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,7,49,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 10009999927 . A <*> 0 . DP=1;I16=1,0,0,0,36,1296,0,0,60,3600,0,0,8,64,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +1 10009999928 . G <*> 0 . DP=1;I16=1,0,0,0,34,1156,0,0,60,3600,0,0,9,81,0,0;QS=1,0;MQ0F=0 PL 0,3,34 +1 10009999929 . C <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,10,100,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 10009999930 . A <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,11,121,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 10009999931 . C <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,12,144,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 10009999932 . T <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,13,169,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 10009999933 . T <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,14,196,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 10009999934 . T <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,15,225,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 10009999935 . A <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,16,256,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 10009999936 . G <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,17,289,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 10009999937 . G <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,18,324,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 10009999938 . A <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,19,361,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 10009999939 . G <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,20,400,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 10009999940 . G <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,21,441,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 10009999941 . C <*> 0 . DP=1;I16=1,0,0,0,41,1681,0,0,60,3600,0,0,22,484,0,0;QS=1,0;MQ0F=0 PL 0,3,41 +1 10009999942 . T <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,23,529,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 10009999943 . A <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,24,576,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 10009999944 . A <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 10009999945 . G <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 10009999946 . G <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 10009999947 . C <*> 0 . DP=1;I16=1,0,0,0,35,1225,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,35 +1 10009999948 . A <*> 0 . DP=1;I16=1,0,0,0,34,1156,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,34 +1 10009999949 . G <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 10009999950 . G <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 10009999951 . C <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 10009999952 . A <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 10009999953 . G <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 10009999954 . A <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 10009999955 . T <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 10009999956 . C <*> 0 . DP=1;I16=1,0,0,0,41,1681,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,41 +1 10009999957 . A <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 10009999958 . C <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 10009999959 . T <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 10009999960 . T <*> 0 . DP=1;I16=1,0,0,0,35,1225,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,35 +1 10009999961 . G <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 10009999962 . A <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 10009999963 . G <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 10009999964 . A <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 10009999965 . C <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 10009999966 . C <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 10009999967 . A <*> 0 . DP=1;I16=1,0,0,0,41,1681,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,41 +1 10009999968 . G <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 10009999969 . G <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 10009999970 . A <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 10009999971 . G <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 10009999972 . T <*> 0 . DP=1;I16=1,0,0,0,36,1296,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +1 10009999973 . T <*> 0 . DP=1;I16=1,0,0,0,36,1296,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +1 10009999974 . A <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 10009999975 . C <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 10009999976 . A <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 10009999977 . G <*> 0 . DP=1;I16=1,0,0,0,36,1296,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +1 10009999978 . A <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 10009999979 . C <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 10009999980 . C <*> 0 . DP=1;I16=1,0,0,0,33,1089,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,33 +1 10009999981 . A <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 10009999982 . G <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 10009999983 . C <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 10009999984 . C <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 10009999985 . T <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 10009999986 . G <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 10009999987 . G <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 10009999988 . C <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 10009999989 . C <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 10009999990 . G <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 10009999991 . A <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 10009999992 . C <*> 0 . DP=1;I16=1,0,0,0,36,1296,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +1 10009999993 . A <*> 0 . DP=1;I16=1,0,0,0,36,1296,0,0,60,3600,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,36 +1 10009999994 . C <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,24,576,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 10009999995 . G <*> 0 . DP=1;I16=1,0,0,0,33,1089,0,0,60,3600,0,0,23,529,0,0;QS=1,0;MQ0F=0 PL 0,3,33 +1 10009999996 . G <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,22,484,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 10009999997 . C <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,21,441,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 10009999998 . G <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,20,400,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 10009999999 . A <*> 0 . DP=1;I16=1,0,0,0,31,961,0,0,60,3600,0,0,19,361,0,0;QS=1,0;MQ0F=0 PL 0,3,31 +1 10010000000 . A <*> 0 . DP=1;I16=1,0,0,0,33,1089,0,0,60,3600,0,0,18,324,0,0;QS=1,0;MQ0F=0 PL 0,3,33 +1 10010000001 . A <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,17,289,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 10010000002 . C <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,16,256,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 10010000003 . C <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,15,225,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 10010000004 . C <*> 0 . DP=1;I16=1,0,0,0,29,841,0,0,60,3600,0,0,14,196,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000005 . C <*> 0 . DP=1;I16=1,0,0,0,39,1521,0,0,60,3600,0,0,13,169,0,0;QS=1,0;MQ0F=0 PL 0,3,39 +1 10010000006 . G <*> 0 . DP=1;I16=1,0,0,0,38,1444,0,0,60,3600,0,0,12,144,0,0;QS=1,0;MQ0F=0 PL 0,3,38 +1 10010000007 . T <*> 0 . DP=1;I16=1,0,0,0,40,1600,0,0,60,3600,0,0,11,121,0,0;QS=1,0;MQ0F=0 PL 0,3,40 +1 10010000008 . C <*> 0 . DP=1;I16=1,0,0,0,37,1369,0,0,60,3600,0,0,10,100,0,0;QS=1,0;MQ0F=0 PL 0,3,37 +1 10010000009 . T <*> 0 . DP=1;I16=1,0,0,0,43,1849,0,0,60,3600,0,0,9,81,0,0;QS=1,0;MQ0F=0 PL 0,3,43 +1 10010000010 . C <*> 0 . DP=2;I16=1,1,0,0,59,2105,0,0,89,4441,0,0,8,64,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,59 +1 10010000011 . T <*> 0 . DP=2;I16=1,1,0,0,76,2888,0,0,89,4441,0,0,8,50,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,67 +1 10010000012 . A <*> 0 . DP=2;I16=1,1,0,0,77,2965,0,0,89,4441,0,0,8,40,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,67 +1 10010000013 . C <*> 0 . DP=2;I16=1,1,0,0,66,2250,0,0,89,4441,0,0,8,34,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,66 +1 10010000014 . A <*> 0 . DP=2;I16=1,1,0,0,67,2285,0,0,89,4441,0,0,8,32,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,67 +1 10010000015 . A <*> 0 . DP=2;I16=1,1,0,0,69,2385,0,0,89,4441,0,0,8,34,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,65 +1 10010000016 . T <*> 0 . DP=2;I16=1,1,0,0,75,2817,0,0,89,4441,0,0,8,40,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,68 +1 10010000017 . A <*> 0 . DP=2;I16=1,1,0,0,67,2285,0,0,89,4441,0,0,8,50,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,58 +1 10010000018 . A <*> 0 . DP=2;I16=1,1,0,0,64,2120,0,0,89,4441,0,0,8,64,0,0;QS=1,0;MQSB=1;MQ0F=0 PL 0,6,55 +1 10010000019 . A <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,9,81,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000020 . T <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,10,100,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000021 . T <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,29,841,0,0,11,121,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000022 . A <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,12,144,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000023 . A <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,13,169,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000024 . A <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,14,196,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000025 . A <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,15,225,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000026 . T <*> 0 . DP=1;I16=0,1,0,0,29,841,0,0,29,841,0,0,16,256,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000027 . A <*> 0 . DP=1;I16=0,1,0,0,40,1600,0,0,29,841,0,0,17,289,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000028 . T <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,18,324,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000029 . T <*> 0 . DP=1;I16=0,1,0,0,41,1681,0,0,29,841,0,0,19,361,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000030 . A <*> 0 . DP=1;I16=0,1,0,0,36,1296,0,0,29,841,0,0,20,400,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000031 . G <*> 0 . DP=1;I16=0,1,0,0,40,1600,0,0,29,841,0,0,21,441,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000032 . C <*> 0 . DP=1;I16=0,1,0,0,40,1600,0,0,29,841,0,0,22,484,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000033 . T <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,29,841,0,0,23,529,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000034 . G <*> 0 . DP=1;I16=0,1,0,0,36,1296,0,0,29,841,0,0,24,576,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000035 . G <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000036 . G <*> 0 . DP=1;I16=0,1,0,0,42,1764,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000037 . C <*> 0 . DP=1;I16=0,1,0,0,34,1156,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000038 . A <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000039 . T <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000040 . G <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000041 . G <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000042 . T <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000043 . G <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000044 . G <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000045 . T <*> 0 . DP=1;I16=0,1,0,0,42,1764,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000046 . G <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000047 . T <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000048 . G <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000049 . T <*> 0 . DP=1;I16=0,1,0,0,41,1681,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000050 . G <*> 0 . DP=1;I16=0,1,0,0,31,961,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000051 . C <*> 0 . DP=1;I16=0,1,0,0,16,256,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,16 +1 10010000052 . T <*> 0 . DP=1;I16=0,1,0,0,31,961,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000053 . T <*> 0 . DP=1;I16=0,1,0,0,35,1225,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000054 . G <*> 0 . DP=1;I16=0,1,0,0,40,1600,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000055 . T <*> 0 . DP=1;I16=0,1,0,0,33,1089,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000056 . A <*> 0 . DP=1;I16=0,1,0,0,22,484,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,22 +1 10010000057 . G <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000058 . T <*> 0 . DP=1;I16=0,1,0,0,40,1600,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000059 . C <*> 0 . DP=1;I16=0,1,0,0,34,1156,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000060 . C <*> 0 . DP=1;I16=0,1,0,0,40,1600,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000061 . C <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000062 . A <*> 0 . DP=1;I16=0,1,0,0,34,1156,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000063 . G <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000064 . C <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000065 . T <*> 0 . DP=1;I16=0,1,0,0,40,1600,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000066 . A <*> 0 . DP=1;I16=0,1,0,0,32,1024,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000067 . C <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000068 . T <*> 0 . DP=1;I16=0,1,0,0,36,1296,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000069 . T <*> 0 . DP=1;I16=0,1,0,0,36,1296,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000070 . G <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000071 . G <*> 0 . DP=1;I16=0,1,0,0,33,1089,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000072 . C <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000073 . G <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000074 . G <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000075 . G <*> 0 . DP=1;I16=0,1,0,0,36,1296,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000076 . C <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000077 . T <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000078 . G <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000079 . A <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000080 . G <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000081 . G <*> 0 . DP=1;I16=0,1,0,0,36,1296,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000082 . T <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000083 . G <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000084 . G <*> 0 . DP=1;I16=0,1,0,0,41,1681,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000085 . G <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,24,576,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000086 . A <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,23,529,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000087 . G <*> 0 . DP=1;I16=0,1,0,0,40,1600,0,0,29,841,0,0,22,484,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000088 . A <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,21,441,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000089 . A <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,20,400,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000090 . T <*> 0 . DP=1;I16=0,1,0,0,39,1521,0,0,29,841,0,0,19,361,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000091 . C <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,29,841,0,0,18,324,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000092 . A <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,17,289,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000093 . T <*> 0 . DP=1;I16=0,1,0,0,33,1089,0,0,29,841,0,0,16,256,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000094 . C <*> 0 . DP=1;I16=0,1,0,0,41,1681,0,0,29,841,0,0,15,225,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000095 . C <*> 0 . DP=1;I16=0,1,0,0,36,1296,0,0,29,841,0,0,14,196,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000096 . A <*> 0 . DP=1;I16=0,1,0,0,41,1681,0,0,29,841,0,0,13,169,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000097 . A <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,12,144,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000098 . G <*> 0 . DP=1;I16=0,1,0,0,36,1296,0,0,29,841,0,0,11,121,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000099 . C <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,10,100,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000100 . C <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,29,841,0,0,9,81,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000101 . T <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,8,64,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000102 . T <*> 0 . DP=1;I16=0,1,0,0,36,1296,0,0,29,841,0,0,7,49,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000103 . G <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,29,841,0,0,6,36,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000104 . G <*> 0 . DP=1;I16=0,1,0,0,38,1444,0,0,29,841,0,0,5,25,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000105 . A <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,29,841,0,0,4,16,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000106 . G <*> 0 . DP=1;I16=0,1,0,0,37,1369,0,0,29,841,0,0,3,9,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000107 . G <*> 0 . DP=1;I16=0,1,0,0,33,1089,0,0,29,841,0,0,2,4,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000108 . C <*> 0 . DP=1;I16=0,1,0,0,32,1024,0,0,29,841,0,0,1,1,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000109 . A <*> 0 . DP=1;I16=0,1,0,0,35,1225,0,0,29,841,0,0,0,0,0,0;QS=1,0;MQ0F=0 PL 0,3,29 +1 10010000110 . G 0 . SVTYPE=DEL;SVLEN=-890;END=10010001000;QS=1,0 PL 0,1,45 diff --git a/src/htslib-1.21/test/longrefs/longref.sam b/src/htslib-1.21/test/longrefs/longref.sam new file mode 100644 index 0000000..a2611f6 --- /dev/null +++ b/src/htslib-1.21/test/longrefs/longref.sam @@ -0,0 +1,96 @@ +@SQ SN:CHROMOSOME_I LN:10001009800 +SRR065390.14978392 16 CHROMOSOME_I 10000000002 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-18 XS:i:-18 XN:i:0 XM:i:5 XO:i:1 XG:i:1 YT:Z:UU MD:Z:4A0G5G5G5G3^A73 NM:i:6 +SRR065390.921023 16 CHROMOSOME_I 10000000003 12 100M * 0 0 CTAAGCCTAAATCTAAGCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############################################???88:;98768700000<>:BBA?BBAB?BBBBBBBB>B>BB::;?:00000 AS:i:-6 XS:i:-13 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:10G0C10G77 NM:i:3 +SRR065390.1871511 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA 0:BB@>B<=B@???@=8@B>BB@CA@DACDCBBCCCA@CCCCACCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.3743423 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##################?6@:7<=@3=@ABAAB>BDBBABADABDDDBDDBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.4251890 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###########@BB=BCBBC?B>B;>B@@ADBBB@DBBBBDCCBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.5238868 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA @,=@@D8D;?BBB>;?BBB==BB@D;>D>BBB>BBDDBA@@BCCB@=BACBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.8289592 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############################A?@C9@@BC=AABDD@A@DC@CB=@BA?6@CCAAC@+CCCCCCCCCCCCCCC@CCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.14497557 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ######@:@@.>=><;;B>AB>>BB?B=>B=BD>BDADDD>CCDDDBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.15617929 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA D?;;D>?C>CBAAACD@BB?B>BBDB>@BBDDBDC@CBDDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16049575 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #######################@??BB8BBB@@:AB@BDBCCDCBDCCCCACCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.17838261 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #################@>=?B@DCBDB>@D>DBADCDDD>CC@DCCCCBCCACCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.22711273 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################B<@=<:6/0307==72@@=?788==;AAA:@CCAACCC?CCAACCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.22922978 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##########################B=B>A@BBBC??=@=A@AC<><<8>C6CCCCC8CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23087186 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ############@:73???@6;D?B>:>BBA?B<>B@B>@B>@>BCDCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23506653 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############A/=A5::87@:=>6@AA>@CDBA@ABCB?BC>CD>DDBDC@CCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23791575 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCCCACCCCAACCCTTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##############################B4;:=B@>A@BCB@@ABCCBB@BCC@CCDCCDCCDCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-12 XS:i:-12 XN:i:0 XM:i:6 XO:i:0 XG:i:0 YT:Z:UU MD:Z:7T0A1G2T2G3A79 NM:i:6 +SRR065390.25911768 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##############@8B@B?9=:A?=@DDB>;B6?DDBCABABB@DDCCBDBDCCDACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.26055380 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #################################DAA><0=>=B;?BACDBDABCBBC@CACACACACCACCCCCCCCCCCCCCCCCCCCCCBCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.26121674 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #################?:AA::@DAAA>B??@A4@=BBBBDDBDBDCCBDDBCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.30352568 16 CHROMOSOME_I 10000000003 7 100M * 0 0 CTAGGGCTAACCCTCAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #################################################################A>>5A?CCC@CCCCCCCCCC?CC:C@A@==@A@A@ AS:i:-10 XS:i:-19 XN:i:0 XM:i:5 XO:i:0 XG:i:0 YT:Z:UU MD:Z:3A1C4G3A37G47 NM:i:5 +SRR065390.31341126 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ########################?AD?D@BCAABBBD@=DBCDBAACCDCAABCDCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.33653624 16 CHROMOSOME_I 10000000003 17 100M * 0 0 CTAATCCTAGGCCTAAGCCCAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ####################################??8?000-+0000,@ABBBB@B:B@B>BB????>>>@@?::?6?>>;>>@ACCCCBCCBACCCC AS:i:-6 XS:i:-19 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:4G4A9T80 NM:i:3 +SRR065390.28043279 16 CHROMOSOME_I 10000000004 0 9M1I90M * 0 0 TCTTCCGATCTCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #####A>=7A6DD=@AA?>AAABC@CAABDBCBBABDADBADCABBBDCDCDCACDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCC AS:i:-26 XS:i:-26 XN:i:0 XM:i:6 XO:i:1 XG:i:1 YT:Z:UU MD:Z:1A0A0G2T1A0G89 NM:i:7 +SRR065390.29270443 16 CHROMOSOME_I 10000000006 1 100M * 0 0 AGCCTAAGCCGAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC ###################################@:88@@>B>C>CCCCA@CCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCC AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:10T2G86 NM:i:2 +SRR065390.1364843 16 CHROMOSOME_I 10000000011 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ####################@=A=8@:>@;@@=>>B8?C6CCCCCCCCCCACCCCBBCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.10190875 16 CHROMOSOME_I 10000000011 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ##################@@@@@@;>BBB?>A6BAB?BB=BAB@?:A.<===@7:4::>8D@BABBACCCCAB@CCCDDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.13556211 0 CHROMOSOME_I 10000000011 0 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGATTGGAAGAGCGGCTAAGCAGGAACGCCGGGCTCGATCTCAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCBCDCCB>BBBBB########################################### AS:i:-50 XS:i:-50 XN:i:0 XM:i:25 XO:i:0 XG:i:0 YT:Z:UU MD:Z:57C0C1A0A0G0C0C0T0A0A1C6C0T0A1G1C0T0A0A1C2A0A0G0C2A3 NM:i:25 +SRR065390.20524775 16 CHROMOSOME_I 10000000011 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ############################?9<8B=?@C8A<@?@C8CBDCCC=CCCCC??@CCDCCCCCCCCCCCCCCCCCCCCDCCCCCCCDCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.20580336 16 CHROMOSOME_I 10000000011 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ############################?:>@?@=>@=0<:CB>@B=DCADB@CCCCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.22573273 16 CHROMOSOME_I 10000000011 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ##################################A9;?@CBBDBA>BB;ABDB>AAA;=>=0943@########### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.20870114 0 CHROMOSOME_I 10000000012 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCABCCCC=BBBCA@B>B?D;B=>9?############################ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.3863623 16 CHROMOSOME_I 10000000012 1 100M * 0 0 CGCCTACGCCTACGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC ##############################?@BB>B@BCABBB?DC@DADC@DCDCACDCBCCCCCCCCCCC@CCCCCCCCCCCCCCC1CCCCCCCCCCC AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0A5A5A87 NM:i:3 +SRR065390.1659845 0 CHROMOSOME_I 10000000013 0 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAACCTAAGCCTAAGCCCAACCCTAAGACCGAGACCGAGACC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCAB@CCC######################################### AS:i:-22 XS:i:-22 XN:i:0 XM:i:11 XO:i:0 XG:i:0 YT:Z:UU MD:Z:60G14T2G6C1T0A2C1T0A2C1T0 NM:i:11 +SRR065390.1567418 16 CHROMOSOME_I 10000000015 1 100M * 0 0 CACAGCCTACGTCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #########################################?:8A@<@>>BBB8>BBB@BBBB>@:??::87688:?:::?@<@@97866@?>@@;;>:< AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1T0A6A1C88 NM:i:4 +SRR065390.4996386 16 CHROMOSOME_I 10000000015 17 100M * 0 0 CCAAGCCGAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###################################@@@@A=BB@C>>DCCACCCCCCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-4 XS:i:-22 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1T5T92 NM:i:2 +SRR065390.14822977 16 CHROMOSOME_I 10000000015 1 100M * 0 0 CGAAGCCAGAGCCTAGGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ####################################B:B?:==2>6@B@@C>?>A@CB5@??@28C@CCCBC@CC?CC?A@CC:CBCCCCCCCCCCCCCC AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1T5T0A6A84 NM:i:4 +SRR065390.15148736 16 CHROMOSOME_I 10000000015 17 100M * 0 0 CTGAGCCGAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###########################CCBC<=C;9??<;==C@BCCCCC=CCCCACACACCBBCCCCCCCCCCCCCCCCCBCCCCCCCCCCCBCA?CCC AS:i:-4 XS:i:-21 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:2A4T92 NM:i:2 +SRR065390.18089757 16 CHROMOSOME_I 10000000015 1 100M * 0 0 CTGAGCCTGAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ########################A212.0:?.>8?BB?B<@@C?CCBCB;DCCCACDCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:2A5A91 NM:i:2 +SRR065390.25601994 16 CHROMOSOME_I 10000000015 17 100M * 0 0 ATAAGCCTAATCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #####################???DD?BD?BDBB>ACBDBDDBDDDBDBDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-4 XS:i:-21 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0C9G89 NM:i:2 +SRR065390.29400981 16 CHROMOSOME_I 10000000015 17 100M * 0 0 CGAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############################A<:?C>>BCABABC?AD>BDADDDBDBBDBDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-4 XS:i:-18 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1T2G95 NM:i:2 +SRR065390.29022479 0 CHROMOSOME_I 10000000167 0 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAACCCTAAGCCTAATCCTATGCATAAACCTAAACAGAATCAAAAGAAAAATCCAATCT CCCCCCCCCACCCCCBCCCC?CCCCCCCD;?D?D################################ AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:94C0T0A3 NM:i:3 +SRR065390.23298396 16 CHROMOSOME_I 10000000167 1 100M * 0 0 AAGCCTCGGCCTACGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC #####################A@><>B==BC@CCBB?BA'@>>;>>DADDDBDBADB?B6@7=;;7DBD?B<8=AA:4-9<@@1:@A################################ AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:98C1 NM:i:1 +SRR065390.23263331 0 CHROMOSOME_I 10000000168 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCBCCCDCCDCDDDBBDA=B@BB@B>B>AB?@?BB>;;ACC>CAA@;9<5@############## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.1428659 16 CHROMOSOME_I 10000000168 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC #######?DB@;>BBB::>:D=>D?BDDBBBBCCAC@DCCBDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.9270489 16 CHROMOSOME_I 10000000168 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC ##########?4=>@BAA>BB>AA@====3BBBBB;B?@C==CCC?@CCC?CCC?ACCCCCBCCCBCCCCBCCCCCCCCCCCCCC=BCCCCCACCCDCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.9538669 16 CHROMOSOME_I 10000000168 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC ##########@=?6??@B;BA@@@?.@?@@;D>A;DB@DBBBD>@DDDBADCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.15525407 16 CHROMOSOME_I 10000000168 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC ####################@37:0BC@@C@ACCAB?@CCACCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.18387934 16 CHROMOSOME_I 10000000168 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC ##########################@@A@4BDDBB@ACABB@8BCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.27778447 16 CHROMOSOME_I 10000000168 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC ###############@@B=;>89<>/8?<8@>=ABDCCDCC@CCACB@@C@9ACCCC;CCCC@CCAAB@@CCCCCBCCCCCCCBCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.4767844 0 CHROMOSOME_I 10000000170 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCCTAAGCCTAACCCCA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCCCCCCDCC=CCBA=BCCACCBCC<@@@A@>A?D<5/772AA####################### AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:80T14G2T1 NM:i:3 +SRR065390.6036148 0 CHROMOSOME_I 10000000170 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCCTCAGACCA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCCBCCC=C########################################## AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:86T6A2C1T1 NM:i:4 +SRR065390.7523697 0 CHROMOSOME_I 10000000170 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTATACCTATGCATA 8773399<;8BBB>BAA<A################################### AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:84C3A4A5A0 NM:i:4 +SRR065390.21777229 0 CHROMOSOME_I 10000000170 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCCAAGCCCAAGACCAAGCCAAGACCCC CCCCCCCCCCCCCCCCCCCCCCCCCCC@CCCCCCCCCCCCCCCCCBDABAA@48@############################################# AS:i:-18 XS:i:-18 XN:i:0 XM:i:9 XO:i:0 XG:i:0 YT:Z:UU MD:Z:74T5T3C1T5T1A0G2T0A0 NM:i:9 +SRR065390.22082412 0 CHROMOSOME_I 10000000170 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTTGGCCGCAGCCTCAGCCTGAACAGA CCCCACACCCCCCCC??:??@CCCC@9A>9?AA@AC>@CA@B-73>8=53@=:=A?><=>49778?################ AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:88A5A5 NM:i:2 +SRR065390.32243033 0 CHROMOSOME_I 10000000170 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTACGACTA CCCCCCCCCCCACC@CCACCCCCCCCCCCCCCCCC@CADCCBBD@BB>=?A@9C@?C>A88?>8A?:@CCCCCCCCC:?>;:CCC?BCCCCACCCCCCCCCC AS:i:-39 XS:i:-39 XN:i:0 XM:i:18 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0C0T0A0A2C0T2G1C0T0A0A0G0C1T0A1G1C64C10 NM:i:18 +SRR065390.28296401 16 CHROMOSOME_I 10000000171 1 100M * 0 0 CTAAGCCTAAGCCTAAGGCTAAGCCTAAACCCACGCCTAGGCCGAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##################################################BAADDDBBDDCCDCCCCCACDCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-12 XS:i:-12 XN:i:0 XM:i:6 XO:i:0 XG:i:0 YT:Z:UU MD:Z:17C10G2T1A5A3T56 NM:i:6 +SRR065390.1242089 0 CHROMOSOME_I 10000000173 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC A=@@?=?=8A3BB>>B@B>BAB@B@B77//8<;>5:@@@B6ABA@BA<@BB5):5;*83736?;;;@@=;6B>??##################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.3872193 0 CHROMOSOME_I 10000000173 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCBCCCC@DCACD=ABCB@BCDDA@BA=BBB@C??@;:0A>?>B>?)?#################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.14566073 0 CHROMOSOME_I 10000000173 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCBCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCCCCCAB=?CCCA6?AACABCCAC=1B@A@;B<@A@@;>?@>8BB?B#################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.18391952 0 CHROMOSOME_I 10000000173 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCDCCCAADCCB?CBABD=A>?BB5:??:B;>?@AA?>3?;@(8>=>>/(5500;+@@6 AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.18719419 0 CHROMOSOME_I 10000000173 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCAC@@C@@B@DBBDBB################################ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23668023 0 CHROMOSOME_I 10000000173 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCC@@ABDB@@BBB>DBABB@D@BDBAABAB>B>AA@??9:8>>A:255@###### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23826980 0 CHROMOSOME_I 10000000173 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCDCDBCDDBDDDABBBBDDBBBBBBB>D?#################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.28024258 0 CHROMOSOME_I 10000000173 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCBACDDBC>DDBDB>BBBBB;?@BBB3@???=0<=>@@:@################ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.30039772 0 CHROMOSOME_I 10000000173 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCDACDBBDDDDDBBBDBBD>BBAADAABAAC??B??######################### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.5345749 16 CHROMOSOME_I 10000000173 1 100M * 0 0 GACCCAGACCCCGCGCCTAAGCCCAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ##########################################@BA=>AAA@;AAAA@AA9AAAA@BAA@:=@@@4A=?A@AAAAA:B@@BBBBB@>>>>> AS:i:-18 XS:i:-18 XN:i:0 XM:i:9 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0A1G2T0A1G2T0A0A9T76 NM:i:9 +SRR065390.16932911 16 CHROMOSOME_I 10000000173 1 100M * 0 0 AACCCTAAACCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ###############B?BAA;;9>0A1BAAA@=CA*@CCCCACCCC@@?CAAB>AC=C?CCCCBCCBBCBCCCABCCBCA@CCCCCCBCCCCC?BCCCCC AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:2G5G5G85 NM:i:3 +SRR065390.17106354 0 CHROMOSOME_I 10000000173 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCACC?CCADCCAC@BB@CBB@C?@A@@A>=B?BAABBABB6A>BBB:BBA=?DD??;D/<71; AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.22716808 0 CHROMOSOME_I 10000000174 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCGAAGCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCABBBBB?################################### AS:i:-2 XS:i:-2 XN:i:0 XM:i:1 XO:i:0 XG:i:0 YT:Z:UU MD:Z:94T5 NM:i:1 +SRR065390.12986460 0 CHROMOSOME_I 10000000176 1 100M * 0 0 CCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCATAATCGTAAGACTAAGAGCAAGCCTCAGCATA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCA?CCA############################### AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:92T2G4 NM:i:2 +SRR065390.14729559 16 CHROMOSOME_I 10000000176 1 100M * 0 0 CCTACGCCCAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA #########################?(4<=B@;BBBBCB?>BCCA?DCCACCCCCC@C;BBB??B<;9=C@BCAACBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:2T0A0A6G88 NM:i:4 +SRR065390.26023345 0 CHROMOSOME_I 10000000177 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTCAGCCGAA CCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCA?CDADABDBDDBDDBAB>>BBBB@;>@BBB?A>CBBB<>>B@@4@?>>?0ABD@@###### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.6149508 0 CHROMOSOME_I 10000000179 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCDDCCBD=CCDB@@DABAB=ABB??>>@BB=BCBAB>>D;A?><>AA>?A==+@A AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.6618950 0 CHROMOSOME_I 10000000179 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCDCCCCCDCCBCAACBBCBB@DADABBDAB?CBB@B;?BB=B>>>?:? AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.7246333 0 CHROMOSOME_I 10000000179 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCACCCDCCCCCCCCCCCDCCBCD@CBBDCADADADBDABBDBDABDBCBBA>BAB>>AC9A################## AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.8266146 0 CHROMOSOME_I 10000000179 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAACCCTCAGCCGAGGCCTACGC CDCCCCCCCCCCCCCCCCCBCCCCCCDCCCCCCACDCCCCCDACBDCABCB@A=ABBB@BBD@DB?B################################# AS:i:-10 XS:i:-10 XN:i:0 XM:i:5 XO:i:0 XG:i:0 YT:Z:UU MD:Z:80G3A4T1A5A2 NM:i:5 +SRR065390.8986893 0 CHROMOSOME_I 10000000179 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCC@CCCCCCCCCCA@CCCCD=CCCDAABBDB>BDDBDB;BB@@B=@BDB:.A>>BB:@################ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 diff --git a/src/htslib-1.21/test/longrefs/longref_itr.expected.sam b/src/htslib-1.21/test/longrefs/longref_itr.expected.sam new file mode 100644 index 0000000..6aca067 --- /dev/null +++ b/src/htslib-1.21/test/longrefs/longref_itr.expected.sam @@ -0,0 +1,26 @@ +@SQ SN:CHROMOSOME_I LN:10001009800 +SRR065390.14978392 16 CHROMOSOME_I 10000000002 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-18 XS:i:-18 XN:i:0 XM:i:5 XO:i:1 XG:i:1 YT:Z:UU MD:Z:4A0G5G5G5G3^A73 NM:i:6 +SRR065390.921023 16 CHROMOSOME_I 10000000003 12 100M * 0 0 CTAAGCCTAAATCTAAGCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############################################???88:;98768700000<>:BBA?BBAB?BBBBBBBB>B>BB::;?:00000 AS:i:-6 XS:i:-13 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:10G0C10G77 NM:i:3 +SRR065390.1871511 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA 0:BB@>B<=B@???@=8@B>BB@CA@DACDCBBCCCA@CCCCACCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.3743423 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##################?6@:7<=@3=@ABAAB>BDBBABADABDDDBDDBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.4251890 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###########@BB=BCBBC?B>B;>B@@ADBBB@DBBBBDCCBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.5238868 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA @,=@@D8D;?BBB>;?BBB==BB@D;>D>BBB>BBDDBA@@BCCB@=BACBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.8289592 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############################A?@C9@@BC=AABDD@A@DC@CB=@BA?6@CCAAC@+CCCCCCCCCCCCCCC@CCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.14497557 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ######@:@@.>=><;;B>AB>>BB?B=>B=BD>BDADDD>CCDDDBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.15617929 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA D?;;D>?C>CBAAACD@BB?B>BBDB>@BBDDBDC@CBDDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16049575 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #######################@??BB8BBB@@:AB@BDBCCDCBDCCCCACCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.17838261 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #################@>=?B@DCBDB>@D>DBADCDDD>CC@DCCCCBCCACCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.22711273 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################B<@=<:6/0307==72@@=?788==;AAA:@CCAACCC?CCAACCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.22922978 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##########################B=B>A@BBBC??=@=A@AC<><<8>C6CCCCC8CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23087186 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ############@:73???@6;D?B>:>BBA?B<>B@B>@B>@>BCDCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23506653 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############A/=A5::87@:=>6@AA>@CDBA@ABCB?BC>CD>DDBDC@CCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23791575 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCCCACCCCAACCCTTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##############################B4;:=B@>A@BCB@@ABCCBB@BCC@CCDCCDCCDCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-12 XS:i:-12 XN:i:0 XM:i:6 XO:i:0 XG:i:0 YT:Z:UU MD:Z:7T0A1G2T2G3A79 NM:i:6 +SRR065390.25911768 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##############@8B@B?9=:A?=@DDB>;B6?DDBCABABB@DDCCBDBDCCDACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.26055380 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #################################DAA><0=>=B;?BACDBDABCBBC@CACACACACCACCCCCCCCCCCCCCCCCCCCCCBCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.26121674 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #################?:AA::@DAAA>B??@A4@=BBBBDDBDBDCCBDDBCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.30352568 16 CHROMOSOME_I 10000000003 7 100M * 0 0 CTAGGGCTAACCCTCAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #################################################################A>>5A?CCC@CCCCCCCCCC?CC:C@A@==@A@A@ AS:i:-10 XS:i:-19 XN:i:0 XM:i:5 XO:i:0 XG:i:0 YT:Z:UU MD:Z:3A1C4G3A37G47 NM:i:5 +SRR065390.31341126 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ########################?AD?D@BCAABBBD@=DBCDBAACCDCAABCDCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.33653624 16 CHROMOSOME_I 10000000003 17 100M * 0 0 CTAATCCTAGGCCTAAGCCCAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ####################################??8?000-+0000,@ABBBB@B:B@B>BB????>>>@@?::?6?>>;>>@ACCCCBCCBACCCC AS:i:-6 XS:i:-19 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:4G4A9T80 NM:i:3 diff --git a/src/htslib-1.21/test/longrefs/longref_multi.expected.sam b/src/htslib-1.21/test/longrefs/longref_multi.expected.sam new file mode 100644 index 0000000..997ead5 --- /dev/null +++ b/src/htslib-1.21/test/longrefs/longref_multi.expected.sam @@ -0,0 +1,46 @@ +@SQ SN:CHROMOSOME_I LN:10001009800 +SRR065390.14978392 16 CHROMOSOME_I 10000000002 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-18 XS:i:-18 XN:i:0 XM:i:5 XO:i:1 XG:i:1 YT:Z:UU MD:Z:4A0G5G5G5G3^A73 NM:i:6 +SRR065390.921023 16 CHROMOSOME_I 10000000003 12 100M * 0 0 CTAAGCCTAAATCTAAGCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############################################???88:;98768700000<>:BBA?BBAB?BBBBBBBB>B>BB::;?:00000 AS:i:-6 XS:i:-13 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:10G0C10G77 NM:i:3 +SRR065390.1871511 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA 0:BB@>B<=B@???@=8@B>BB@CA@DACDCBBCCCA@CCCCACCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.3743423 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##################?6@:7<=@3=@ABAAB>BDBBABADABDDDBDDBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.4251890 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###########@BB=BCBBC?B>B;>B@@ADBBB@DBBBBDCCBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.5238868 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA @,=@@D8D;?BBB>;?BBB==BB@D;>D>BBB>BBDDBA@@BCCB@=BACBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.8289592 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############################A?@C9@@BC=AABDD@A@DC@CB=@BA?6@CCAAC@+CCCCCCCCCCCCCCC@CCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.14497557 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ######@:@@.>=><;;B>AB>>BB?B=>B=BD>BDADDD>CCDDDBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.15617929 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA D?;;D>?C>CBAAACD@BB?B>BBDB>@BBDDBDC@CBDDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.16049575 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #######################@??BB8BBB@@:AB@BDBCCDCBDCCCCACCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.17838261 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #################@>=?B@DCBDB>@D>DBADCDDD>CC@DCCCCBCCACCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.22711273 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################B<@=<:6/0307==72@@=?788==;AAA:@CCAACCC?CCAACCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.22922978 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##########################B=B>A@BBBC??=@=A@AC<><<8>C6CCCCC8CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23087186 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ############@:73???@6;D?B>:>BBA?B<>B@B>@B>@>BCDCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23506653 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############A/=A5::87@:=>6@AA>@CDBA@ABCB?BC>CD>DDBDC@CCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.23791575 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCCCACCCCAACCCTTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##############################B4;:=B@>A@BCB@@ABCCBB@BCC@CCDCCDCCDCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-12 XS:i:-12 XN:i:0 XM:i:6 XO:i:0 XG:i:0 YT:Z:UU MD:Z:7T0A1G2T2G3A79 NM:i:6 +SRR065390.25911768 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##############@8B@B?9=:A?=@DDB>;B6?DDBCABABB@DDCCBDBDCCDACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.26055380 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #################################DAA><0=>=B;?BACDBDABCBBC@CACACACACCACCCCCCCCCCCCCCCCCCCCCCBCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.26121674 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #################?:AA::@DAAA>B??@A4@=BBBBDDBDBDCCBDDBCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.30352568 16 CHROMOSOME_I 10000000003 7 100M * 0 0 CTAGGGCTAACCCTCAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #################################################################A>>5A?CCC@CCCCCCCCCC?CC:C@A@==@A@A@ AS:i:-10 XS:i:-19 XN:i:0 XM:i:5 XO:i:0 XG:i:0 YT:Z:UU MD:Z:3A1C4G3A37G47 NM:i:5 +SRR065390.31341126 16 CHROMOSOME_I 10000000003 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ########################?AD?D@BCAABBBD@=DBCDBAACCDCAABCDCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.33653624 16 CHROMOSOME_I 10000000003 17 100M * 0 0 CTAATCCTAGGCCTAAGCCCAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ####################################??8?000-+0000,@ABBBB@B:B@B>BB????>>>@@?::?6?>>;>>@ACCCCBCCBACCCC AS:i:-6 XS:i:-19 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:4G4A9T80 NM:i:3 +SRR065390.28043279 16 CHROMOSOME_I 10000000004 0 9M1I90M * 0 0 TCTTCCGATCTCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #####A>=7A6DD=@AA?>AAABC@CAABDBCBBABDADBADCABBBDCDCDCACDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCBCCCC AS:i:-26 XS:i:-26 XN:i:0 XM:i:6 XO:i:1 XG:i:1 YT:Z:UU MD:Z:1A0A0G2T1A0G89 NM:i:7 +SRR065390.29270443 16 CHROMOSOME_I 10000000006 1 100M * 0 0 AGCCTAAGCCGAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC ###################################@:88@@>B>C>CCCCA@CCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCC AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:10T2G86 NM:i:2 +SRR065390.1364843 16 CHROMOSOME_I 10000000011 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ####################@=A=8@:>@;@@=>>B8?C6CCCCCCCCCCACCCCBBCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.10190875 16 CHROMOSOME_I 10000000011 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ##################@@@@@@;>BBB?>A6BAB?BB=BAB@?:A.<===@7:4::>8D@BABBACCCCAB@CCCDDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.13556211 0 CHROMOSOME_I 10000000011 0 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGATTGGAAGAGCGGCTAAGCAGGAACGCCGGGCTCGATCTCAGC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCBCDCCB>BBBBB########################################### AS:i:-50 XS:i:-50 XN:i:0 XM:i:25 XO:i:0 XG:i:0 YT:Z:UU MD:Z:57C0C1A0A0G0C0C0T0A0A1C6C0T0A1G1C0T0A0A1C2A0A0G0C2A3 NM:i:25 +SRR065390.20524775 16 CHROMOSOME_I 10000000011 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ############################?9<8B=?@C8A<@?@C8CBDCCC=CCCCC??@CCDCCCCCCCCCCCCCCCCCCCCDCCCCCCCDCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.20580336 16 CHROMOSOME_I 10000000011 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ############################?:>@?@=>@=0<:CB>@B=DCADB@CCCCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.22573273 16 CHROMOSOME_I 10000000011 1 100M * 0 0 AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC ##################################A9;?@CBBDBA>BB;ABDB>AAA;=>=0943@########### AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.20870114 0 CHROMOSOME_I 10000000012 1 100M * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCABCCCC=BBBCA@B>B?D;B=>9?############################ AS:i:0 XS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 YT:Z:UU MD:Z:100 NM:i:0 +SRR065390.3863623 16 CHROMOSOME_I 10000000012 1 100M * 0 0 CGCCTACGCCTACGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC ##############################?@BB>B@BCABBB?DC@DADC@DCDCACDCBCCCCCCCCCCC@CCCCCCCCCCCCCCC1CCCCCCCCCCC AS:i:-6 XS:i:-6 XN:i:0 XM:i:3 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0A5A5A87 NM:i:3 +SRR065390.1659845 0 CHROMOSOME_I 10000000013 0 100M * 0 0 GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAACCTAAGCCTAAGCCCAACCCTAAGACCGAGACCGAGACC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCAB@CCC######################################### AS:i:-22 XS:i:-22 XN:i:0 XM:i:11 XO:i:0 XG:i:0 YT:Z:UU MD:Z:60G14T2G6C1T0A2C1T0A2C1T0 NM:i:11 +SRR065390.1567418 16 CHROMOSOME_I 10000000015 1 100M * 0 0 CACAGCCTACGTCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #########################################?:8A@<@>>BBB8>BBB@BBBB>@:??::87688:?:::?@<@@97866@?>@@;;>:< AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1T0A6A1C88 NM:i:4 +SRR065390.4996386 16 CHROMOSOME_I 10000000015 17 100M * 0 0 CCAAGCCGAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###################################@@@@A=BB@C>>DCCACCCCCCC@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-4 XS:i:-22 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1T5T92 NM:i:2 +SRR065390.14822977 16 CHROMOSOME_I 10000000015 1 100M * 0 0 CGAAGCCAGAGCCTAGGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ####################################B:B?:==2>6@B@@C>?>A@CB5@??@28C@CCCBC@CC?CC?A@CC:CBCCCCCCCCCCCCCC AS:i:-8 XS:i:-8 XN:i:0 XM:i:4 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1T5T0A6A84 NM:i:4 +SRR065390.15148736 16 CHROMOSOME_I 10000000015 17 100M * 0 0 CTGAGCCGAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###########################CCBC<=C;9??<;==C@BCCCCC=CCCCACACACCBBCCCCCCCCCCCCCCCCCBCCCCCCCCCCCBCA?CCC AS:i:-4 XS:i:-21 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:2A4T92 NM:i:2 +SRR065390.18089757 16 CHROMOSOME_I 10000000015 1 100M * 0 0 CTGAGCCTGAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ########################A212.0:?.>8?BB?B<@@C?CCBCB;DCCCACDCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-4 XS:i:-4 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:2A5A91 NM:i:2 +SRR065390.25601994 16 CHROMOSOME_I 10000000015 17 100M * 0 0 ATAAGCCTAATCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #####################???DD?BD?BDBB>ACBDBDDBDDDBDBDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-4 XS:i:-21 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0C9G89 NM:i:2 +SRR065390.29400981 16 CHROMOSOME_I 10000000015 17 100M * 0 0 CGAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############################A<:?C>>BCABABC?AD>BDADDDBDBBDBDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-4 XS:i:-18 XN:i:0 XM:i:2 XO:i:0 XG:i:0 YT:Z:UU MD:Z:1T2G95 NM:i:2 diff --git a/src/htslib-1.21/test/maintainer/check_copyright.pl b/src/htslib-1.21/test/maintainer/check_copyright.pl new file mode 100755 index 0000000..22556df --- /dev/null +++ b/src/htslib-1.21/test/maintainer/check_copyright.pl @@ -0,0 +1,99 @@ +#!/usr/bin/env perl +# check_copyright.pl : Basic source file checks for copyright boilerplate +# +# Author : Rob Davies +# +# Copyright (C) 2018 Genome Research Ltd. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +use strict; +use warnings; +use File::Find; +use Getopt::Long; + +my $verbose = 0; +GetOptions('v' => \$verbose); + +my ($root) = @ARGV; +if (!$root) { + die "Usage: $0 [-v] \n"; +} +my $errors = 0; +find({ wanted => \&check, no_chdir=>1}, $root); +exit($errors ? 1 : 0); + +sub check { + # Only check C, perl and shell files + return unless (/(?:\.[ch]|\.pl|\.sh)$/); + + # Exclude htscodecs submodule + return if (/\/htscodecs\//); + + # Exclusions: + my %exclude = map { ("$root/$_", 1) } ( +'config.h', # Auto-generated +'config_vars.h', # Auto-generated +'version.h', # Auto-generated +'cram/rANS_byte.h', # "Public domain" +'os/lzma_stub.h', # "Public domain" +'os/rand.c'); # drand48 license + return if exists($exclude{$_}); + + my $remove_left = /\.[ch]$/ ? qr/\s*\*?\s*/ : qr/\s*#\s*/; + + return unless (-f $_); # Only check plain files + my $in; + if (!open($in, '<', $_)) { + print STDERR "Couldn't open $_ : $!\n"; + $errors++; + return; + } + my $count = 0; + my $copyright_found = 0; + my $license_found = ""; + my $line; + while ($count < 100 && ($line = <$in>)) { + $count++; + $line =~ s/^$remove_left//; + $line =~ s/\s+/ /g; + if ($line =~ /^Copyright\s+\([cC]\)\s+(?:19|20)\d\d[-, ]/) { + $copyright_found = 1; + } elsif ($line =~ /^Redistribution and use in source and binary forms/) { + $license_found = "BSD"; + } elsif ($line =~ /^Permission is hereby granted, free of charge/) { + $license_found = "MIT"; + } + last if ($copyright_found && $license_found); + } + if (!close($in)) { + print STDERR "Error on closing $_ : $!\n"; + $errors++; + return; + } + my $failed = (!$copyright_found || !$license_found); + if ($verbose || $failed) { + printf("$_ : %s%s\n", + $license_found ? $license_found : "no_license", + $copyright_found ? "" : " no_copyright_line"); + } + if ($failed) { + $errors++; + } +} diff --git a/src/htslib-1.21/test/maintainer/check_spaces.pl b/src/htslib-1.21/test/maintainer/check_spaces.pl new file mode 100755 index 0000000..e48518f --- /dev/null +++ b/src/htslib-1.21/test/maintainer/check_spaces.pl @@ -0,0 +1,93 @@ +#!/usr/bin/env perl +# check_spaces.pl : Check source files for tabs and trailing spaces +# +# Author : Rob Davies +# +# Copyright (C) 2018 Genome Research Ltd. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +use strict; +use warnings; +use File::Find; +use Getopt::Long; + +my $verbose = 0; +GetOptions('v' => \$verbose); + +my ($root) = @ARGV; +if (!$root) { + die "Usage: $0 [-v] \n"; +} +my $errors = 0; +find({ wanted => \&check, no_chdir=>1}, $root); +exit($errors ? 1 : 0); + +sub check { + # Only check C, perl and shell files + return unless (/(?:\.[ch]|\.pl|\.sh)$/); + + # Exclude htscodecs submodule + return if (/\/htscodecs\//); + + my %allow_tabs = map { ("$root/$_", 1) } ( +'kfunc.c', +'kstring.c', +'md5.c', +'htslib/khash.h', +'htslib/kseq.h', +'htslib/ksort.h', +'htslib/kstring.h', +'htslib/klist.h', +'htslib/kbitset.h', +'os/rand.c', +); + + my $check_tabs = !exists($allow_tabs{$_}); + + my $in; + if (!open($in, '<', $_)) { + print STDERR "Couldn't open $_ : $!\n"; + $errors++; + return; + } + my $tab = 0; + my $trailing = 0; + while (my $line = <$in>) { + chomp($line); + if ($check_tabs && $line =~ /\t/) { $tab = 1; } + if ($line =~ /\s$/) { $trailing = 1; } + } + if (!close($in)) { + print STDERR "Error on closing $_ : $!\n"; + $errors++; + return; + } + my $failed = ($tab || $trailing); + if ($verbose || $failed) { + my $msg = ($failed ? join(" ", + $tab ? ("includes_tabs") : (), + $trailing ? "trailing_spaces" : ()) + : "ok"); + print "$_ : $msg\n"; + } + if ($failed) { + $errors++; + } +} diff --git a/src/htslib-1.21/test/md#1.sam b/src/htslib-1.21/test/md#1.sam new file mode 100644 index 0000000..7e2c0c0 --- /dev/null +++ b/src/htslib-1.21/test/md#1.sam @@ -0,0 +1,12 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:a LN:40 +x 0 a 1 255 40M * 0 0 AAAAAAAAAACCCCCCCCYNNRGGGGGGGGTTTTTTTTTT * NM:i:2 MD:Z:19N0N19 +x* 0 a 1 255 40M * 0 0 CAAAAAAAANNCCCCCCCYNNRGGGGGGGGTTTTTTTTTC * NM:i:6 MD:Z:0A8A0C8N0N18T0 +xIP 0 a 1 255 1I10M1I10M2I10M2P3I2P10M1I * 0 0 NAAAAAAAAAAGCCCCCCCCCCAAGGGGGGGGGGCCCTTTTTTTTTTN * NM:i:12 MD:Z:18Y0N0N0R18 +xIP* 0 a 1 255 1I10M1I10M2I10M2P3I2P10M1I * 0 0 NCAAAAAAAAAGCCCCCCCCCCAAGGGGGGGGGACCCATTTTTTTTCN * NM:i:16 MD:Z:0A17Y0N0N0R7G0T8T0 +xD 0 a 1 255 9M1D8M2D7M3D6M * 0 0 AAAAAAAAACCCCCCYNNRGGGGGTTTTTT * NM:i:9 MD:Z:9^A6C0C0^YN0N6^GGG6 +xD* 0 a 1 255 9M1D8M2D7M3D6M * 0 0 CAAAAAAANNCCCCCCTTGGGGGGTTTTTA * NM:i:13 MD:Z:0A7A0^A0C6C0^YN0N0R5^GGG5T0 +xN 0 a 1 255 10M20N10M * 0 0 AAAAAAAAAATTTTTTTTTT * NM:i:0 MD:Z:20 +xN* 0 a 1 255 10M20N10M * 0 0 CAAAAAAACCAATTTTTTTA * NM:i:6 MD:Z:0A7A0A0T0T7T0 +xS 0 a 11 255 5H10S20M10S5H * 0 0 AAAAAAAAAACCCCCCCCYNNRGGGGGGGGTTTTTTTTTT * NM:i:2 MD:Z:9N0N9 +xS* 0 a 11 255 5H10S20M10S5H * 0 0 CAAAATAAAACCCCCCCCYNNRGGGGGGGGTTTTATTTTC * NM:i:2 MD:Z:9N0N9 diff --git a/src/htslib-1.21/test/md.fa b/src/htslib-1.21/test/md.fa new file mode 100644 index 0000000..3723483 --- /dev/null +++ b/src/htslib-1.21/test/md.fa @@ -0,0 +1,3 @@ +>a +AAAAAAAAAACCCCCCCCYNNRGGGGGGGGTTTTTTTTTT + diff --git a/src/htslib-1.21/test/md.fa.fai b/src/htslib-1.21/test/md.fa.fai new file mode 100644 index 0000000..87f5585 --- /dev/null +++ b/src/htslib-1.21/test/md.fa.fai @@ -0,0 +1 @@ +a 40 3 40 41 diff --git a/src/htslib-1.21/test/modhdr.expected.vcf b/src/htslib-1.21/test/modhdr.expected.vcf new file mode 100644 index 0000000..bad663c --- /dev/null +++ b/src/htslib-1.21/test/modhdr.expected.vcf @@ -0,0 +1,4 @@ +##fileformat=VCFv4.3 +##FILTER= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO diff --git a/src/htslib-1.21/test/modhdr.vcf.gz b/src/htslib-1.21/test/modhdr.vcf.gz new file mode 100644 index 0000000..f97e06a Binary files /dev/null and b/src/htslib-1.21/test/modhdr.vcf.gz differ diff --git a/src/htslib-1.21/test/modhdr.vcf.gz.csi b/src/htslib-1.21/test/modhdr.vcf.gz.csi new file mode 100644 index 0000000..61b60e7 Binary files /dev/null and b/src/htslib-1.21/test/modhdr.vcf.gz.csi differ diff --git a/src/htslib-1.21/test/mpileup/c1#pad1.out b/src/htslib-1.21/test/mpileup/c1#pad1.out new file mode 100644 index 0000000..eda8600 --- /dev/null +++ b/src/htslib-1.21/test/mpileup/c1#pad1.out @@ -0,0 +1,10 @@ +c1 1 9 ^!A^!A^!A^!A^!A^!A^!A^!A^!A ~~~~~~~~~ +c1 2 9 AAAAAAAAA-3() ~~~~~~~~~ +c1 3 9 CCCCCCCC* ~~~~~~~~~ +c1 4 9 CCCCCCCC-1()* ~~~~~~~~~ +c1 5 9 GGGG+6(GTTAAC)G+6(*TTAA*)G+6(GTT***)G+6(***AAC)*+6(**TA**)-1()*+6(GTTAAC)-3() ~~~~~~~~~ +c1 6 9 CCCCCCC** ~~~~~~~~~ +c1 7 9 GGGGGGGG* ~~~~~~~~~ +c1 8 9 GGGGGGGG* ~~~~~~~~~ +c1 9 9 TTTTTTTTT ~~~~~~~~~ +c1 10 9 T$T$T$T$T$T$T$T$T$ ~~~~~~~~~ diff --git a/src/htslib-1.21/test/mpileup/c1#pad1.sam b/src/htslib-1.21/test/mpileup/c1#pad1.sam new file mode 100644 index 0000000..93d88d1 --- /dev/null +++ b/src/htslib-1.21/test/mpileup/c1#pad1.sam @@ -0,0 +1,47 @@ +@HD VN:1.6 SO:coordinate +@SQ SN:c1 LN:10 +@CO +@CO Copyright (c) 2014,2018 Genome Research Ltd. +@CO +@CO Permission is hereby granted, free of charge, to any person obtaining +@CO a copy of this software and associated documentation files (the +@CO "Software"), to deal in the Software without restriction, including +@CO without limitation the rights to use, copy, modify, merge, publish, +@CO distribute, sublicense, and/or sell copies of the Software, and to +@CO permit persons to whom the Software is furnished to do so, subject +@CO to the following conditions: +@CO +@CO The above copyright notice and this permission notice shall be included +@CO in all copies or substantial portions of the Software. +@CO +@CO THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +@CO OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +@CO MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +@CO IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +@CO CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +@CO TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +@CO SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +@CO +@CO Ref AACCG******CGGTT +@CO 12345 67890 Depadded base numbering +@CO 1 +@CO +@CO s0a AACCG CGGTT 10M +@CO s0b AACCG CGGTT 10M +@CO s0c AACCG CGGTT 10M +@CO s1 AACCGGTTAACCGGTT 5M 6I 5M +@CO s2 AACCG*TTAA*CGGTT 5M 1P 4I 1P 5M +@CO s3 AACCGGTT***CGGTT 5M 3I 3P 5M +@CO s4 AACCG***AACCGGTT 5M 3P 3I 5M +@CO s5 AACC***TA***GGTT 4M 1D 2P 2I 2P 1D 4M +@CO s6 AA***GTTAAC***TT 2M 3D 6I 3D 2M +@CO +s0a 0 c1 1 0 10M * 0 0 AACCGCGGTT * +s0b 0 c1 1 0 10M * 0 0 AACCGCGGTT * +s0c 0 c1 1 0 10M * 0 0 AACCGCGGTT * +s1 0 c1 1 0 5M6I5M * 0 0 AACCGGTTAACCGGTT * +s2 0 c1 1 0 5M1P4I1P5M * 0 0 AACCGTTAACGGTT * +s3 0 c1 1 0 5M3I3P5M * 0 0 AACCGGTTCGGTT * +s4 0 c1 1 0 5M3P3I5M * 0 0 AACCGAACCGGTT * +s5 0 c1 1 0 4M1D2P2I2P1D4M * 0 0 AACCTAGGTT * +s6 0 c1 1 0 2M3D6I3D2M * 0 0 AAGTTAACTT * diff --git a/src/htslib-1.21/test/mpileup/c1#pad2.out b/src/htslib-1.21/test/mpileup/c1#pad2.out new file mode 100644 index 0000000..7bab80e --- /dev/null +++ b/src/htslib-1.21/test/mpileup/c1#pad2.out @@ -0,0 +1,10 @@ +c1 1 12 ^!A^!A^!A^!A^!A^!A^!A^!A^!A^!A^!A^!* ~~~~~~~~~~~~ +c1 2 12 AAAAAAAAAA-3()A* ~~~~~~~~~~~~ +c1 3 12 CCCCCCCCC*C* ~~~~~~~~~~~~ +c1 4 12 CCCCCCCCC-1()*C-2()* ~~~~~~~~~~~~ +c1 5 13 GGGGG+6(GTTAAC)G+6(*TTAA*)G+6(GTT***)G+6(***AAC)*+6(**TA**)-1()*+6(GTTAAC)-3()**+6(**TA**)-5()^!G+6(**TA**)$ ~~~~~~~~~~~~~ +c1 6 12 CCCCCCCC**** ~~~~~~~~~~~~ +c1 7 12 GGGGGGGGG*G* ~~~~~~~~~~~~ +c1 8 12 GGGGGGGGG*G* ~~~~~~~~~~~~ +c1 9 12 TTTTTTTTTTT* ~~~~~~~~~~~~ +c1 10 12 T$T$T$T$T$T$T$T$T$T$T$*$ ~~~~~~~~~~~~ diff --git a/src/htslib-1.21/test/mpileup/c1#pad2.sam b/src/htslib-1.21/test/mpileup/c1#pad2.sam new file mode 100644 index 0000000..bbbdd11 --- /dev/null +++ b/src/htslib-1.21/test/mpileup/c1#pad2.sam @@ -0,0 +1,55 @@ +@HD VN:1.6 SO:coordinate +@SQ SN:c1 LN:10 +@CO +@CO Copyright (c) 2014,2018 Genome Research Ltd. +@CO +@CO Permission is hereby granted, free of charge, to any person obtaining +@CO a copy of this software and associated documentation files (the +@CO "Software"), to deal in the Software without restriction, including +@CO without limitation the rights to use, copy, modify, merge, publish, +@CO distribute, sublicense, and/or sell copies of the Software, and to +@CO permit persons to whom the Software is furnished to do so, subject +@CO to the following conditions: +@CO +@CO The above copyright notice and this permission notice shall be included +@CO in all copies or substantial portions of the Software. +@CO +@CO THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +@CO OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +@CO MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +@CO IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +@CO CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +@CO TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +@CO SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +@CO +@CO Ref AACCG******CGGTT +@CO 12345 67890 Depadded base numbering +@CO 1 +@CO +@CO s0a AACCG CGGTT 10M +@CO s0b AACCG CGGTT 10M +@CO s0c AACCG CGGTT 10M +@CO s0d AACCG CGGTT 10M +@CO s1 AACCGGTTAACCGGTT 5M 6I 5M +@CO s2 AACCG*TTAA*CGGTT 5M 1P 4I 1P 5M +@CO s3 AACCGGTT***CGGTT 5M 3I 3P 5M +@CO s4 AACCG***AACCGGTT 5M 3P 3I 5M +@CO s5 AACC***TA***GGTT 4M 1D 2P 2I 2P 1D 4M +@CO s6 AA***GTTAAC***TT 2M 3D 6I 3D 2M +@CO s7 AACC* *GGTT 4M 2D 4M +@CO s8 *******TA******* 5D 2P 2I 2P 5D +@CO s9 G**TA** 1M 2P 2I 2P +@CO +s0a 0 c1 1 0 10M * 0 0 AACCGCGGTT * +s0b 0 c1 1 0 10M * 0 0 AACCGCGGTT * +s0c 0 c1 1 0 10M * 0 0 AACCGCGGTT * +s0d 0 c1 1 0 10M * 0 0 AACCGCGGTT * +s1 0 c1 1 0 5M6I5M * 0 0 AACCGGTTAACCGGTT * +s2 0 c1 1 0 5M1P4I1P5M * 0 0 AACCGTTAACGGTT * +s3 0 c1 1 0 5M3I3P5M * 0 0 AACCGGTTCGGTT * +s4 0 c1 1 0 5M3P3I5M * 0 0 AACCGAACCGGTT * +s5 0 c1 1 0 4M1D2P2I2P1D4M * 0 0 AACCTAGGTT * +s6 0 c1 1 0 2M3D6I3D2M * 0 0 AAGTTAACTT * +s7 0 c1 1 0 4M2D4M * 0 0 AACCGGTT * +s8 0 c1 1 0 5D2P2I2P5D * 0 0 TA * +s9 0 c1 5 0 1M2P2I2P * 0 0 GTA * diff --git a/src/htslib-1.21/test/mpileup/c1#pad3.out b/src/htslib-1.21/test/mpileup/c1#pad3.out new file mode 100644 index 0000000..e0ce418 --- /dev/null +++ b/src/htslib-1.21/test/mpileup/c1#pad3.out @@ -0,0 +1,5 @@ +c1 6 11 ^!C^!C^!C^!C^!C^!C^!C^!C^!*^!*^!* ~~~~~~~~~~~ +c1 7 11 GGGGGGGGG*G ~~~~~~~~~~~ +c1 8 11 GGGGGGGGG*G ~~~~~~~~~~~ +c1 9 11 TTTTTTTTTTT ~~~~~~~~~~~ +c1 10 11 T$T$T$T$T$T$T$T$T$T$T$ ~~~~~~~~~~~ diff --git a/src/htslib-1.21/test/mpileup/c1#pad3.sam b/src/htslib-1.21/test/mpileup/c1#pad3.sam new file mode 100644 index 0000000..c5c0438 --- /dev/null +++ b/src/htslib-1.21/test/mpileup/c1#pad3.sam @@ -0,0 +1,53 @@ +@HD VN:1.6 SO:coordinate +@SQ SN:c1 LN:16 +@RG ID:p.sam SM:unknown LB:p.sam +@CO +@CO Copyright (c) 2014,2018 Genome Research Ltd. +@CO +@CO Permission is hereby granted, free of charge, to any person obtaining +@CO a copy of this software and associated documentation files (the +@CO "Software"), to deal in the Software without restriction, including +@CO without limitation the rights to use, copy, modify, merge, publish, +@CO distribute, sublicense, and/or sell copies of the Software, and to +@CO permit persons to whom the Software is furnished to do so, subject +@CO to the following conditions: +@CO +@CO The above copyright notice and this permission notice shall be included +@CO in all copies or substantial portions of the Software. +@CO +@CO THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +@CO OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +@CO MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +@CO IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +@CO CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +@CO TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +@CO SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +@CO +@CO Ref ***********CGGTT +@CO 12345 Depadded base numbering +@CO +@CO s0a AACCG******CGGTT 5I 6P 5M +@CO s0b AACCG******CGGTT 5I 6P 5M +@CO s0c AACCG******CGGTT 5I 6P 5M +@CO s0d AACCG******CGGTT 5I 6P 5M +@CO s1 AACCGGTTAACCGGTT 11I 5M +@CO s2 AACCG*TTAA*CGGTT 5I 1P 4I 1P 5M +@CO s3 AACCGGTT***CGGTT 8I 3P 5M +@CO s4 AACCG***AACCGGTT 5I 3P 3I 5M +@CO s5 AACC***TA***GGTT 4I 3P 2I 2P 1D 4M +@CO s6 AA***GTTAAC***TT 2I 3P 6I 3D 2M +@CO s7 AACC********GGTT 4I 7P 1D 4M +@CO s8 *******TA** 7P 2I 2P +@CO +s0a 0 c1 6 0 5I6P5M * 0 0 AACCGCGGTT * RG:Z:p.sam +s0b 0 c1 6 0 5I6P5M * 0 0 AACCGCGGTT * RG:Z:p.sam +s0c 0 c1 6 0 5I6P5M * 0 0 AACCGCGGTT * RG:Z:p.sam +s0d 0 c1 6 0 5I6P5M * 0 0 AACCGCGGTT * RG:Z:p.sam +s1 0 c1 6 0 11I5M * 0 0 AACCGGTTAACCGGTT * RG:Z:p.sam +s2 0 c1 6 0 5I1P4I1P5M * 0 0 AACCGTTAACGGTT * RG:Z:p.sam +s3 0 c1 6 0 8I3P5M * 0 0 AACCGGTTCGGTT * RG:Z:p.sam +s4 0 c1 6 0 5I3P3I5M * 0 0 AACCGAACCGGTT * RG:Z:p.sam +s5 0 c1 6 0 4I3P2I2P1D4M * 0 0 AACCTAGGTT * RG:Z:p.sam +s6 0 c1 6 0 2I3P6I3D2M * 0 0 AAGTTAACTT * RG:Z:p.sam +s7 0 c1 6 0 4I7P1D4M * 0 0 AACCGGTT * RG:Z:p.sam +s8 0 c1 6 0 7P2I2P * 0 0 TA !! RG:Z:p.sam diff --git a/src/htslib-1.21/test/mpileup/mp_D.out b/src/htslib-1.21/test/mpileup/mp_D.out new file mode 100644 index 0000000..72f1c43 --- /dev/null +++ b/src/htslib-1.21/test/mpileup/mp_D.out @@ -0,0 +1,11 @@ +z 2 3 ^!A^!A^!* 002 +z 3 3 GG* 112 +z 4 3 CCC 222 +z 5 3 TT-3()T 333 +z 6 3 T*T 474 +z 7 3 A*A 575 +z 8 3 G*G 676 +z 9 3 CCC 777 +z 10 3 AAA-2() 888 +z 11 3 GG* 99~ +z 12 3 G$G$*$ 00~ diff --git a/src/htslib-1.21/test/mpileup/mp_D.sam b/src/htslib-1.21/test/mpileup/mp_D.sam new file mode 100644 index 0000000..73b9599 --- /dev/null +++ b/src/htslib-1.21/test/mpileup/mp_D.sam @@ -0,0 +1,36 @@ +@HD VN:1.6 SO:coordinate +@SQ SN:z LN:13 +@CO +@CO Copyright (c) 2018 Genome Research Ltd. +@CO +@CO Permission is hereby granted, free of charge, to any person obtaining +@CO a copy of this software and associated documentation files (the +@CO "Software"), to deal in the Software without restriction, including +@CO without limitation the rights to use, copy, modify, merge, publish, +@CO distribute, sublicense, and/or sell copies of the Software, and to +@CO permit persons to whom the Software is furnished to do so, subject +@CO to the following conditions: +@CO +@CO The above copyright notice and this permission notice shall be included +@CO in all copies or substantial portions of the Software. +@CO +@CO THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +@CO OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +@CO MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +@CO IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +@CO CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +@CO TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +@CO SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +@CO +@CO 1 +@CO 1234567890123 Depadded base numbering +@CO ref TAGCTTAGCAGGT +@CO +@CO s1 AGCTTAGCAGG 11M +@CO s2 AGCT***CAGG 4M 3D 4M +@CO s3 **CTTAGCA** 2D 7M 2D +@CO qual 01234567890 +@CO +s1 0 z 2 0 11M * 0 0 AGCTTAGCAGG 01234567890 +s2 0 z 2 0 4M3D4M * 0 0 AGCTCAGG 01237890 +s3 0 z 2 0 2D7M2D * 0 0 CTTAGCA 2345678 diff --git a/src/htslib-1.21/test/mpileup/mp_DI.out b/src/htslib-1.21/test/mpileup/mp_DI.out new file mode 100644 index 0000000..5818c16 --- /dev/null +++ b/src/htslib-1.21/test/mpileup/mp_DI.out @@ -0,0 +1,12 @@ +z 2 5 ^!A^!A^!A^!*^!* 000AB +z 3 5 GGG*+2(AA)*+2(*A) 111AB +z 4 5 CCCCC 22222 +z 5 5 TTTTT 33333 +z 6 5 TTTTT 44444 +z 7 5 AAAAA 55555 +z 8 5 G-2()G-2()G-2()GG 66666 +z 9 5 ***CC AAB77 +z 10 5 *+2(TT)*+2(TT)$*+2(*T)$AA AAB88 +z 11 3 GGG 999 +z 12 3 GG$G$ 000 +z 13 1 C$ 1 diff --git a/src/htslib-1.21/test/mpileup/mp_DI.sam b/src/htslib-1.21/test/mpileup/mp_DI.sam new file mode 100644 index 0000000..dffd95c --- /dev/null +++ b/src/htslib-1.21/test/mpileup/mp_DI.sam @@ -0,0 +1,40 @@ +@HD VN:1.6 SO:coordinate +@SQ SN:z LN:13 +@CO +@CO Copyright (c) 2018 Genome Research Ltd. +@CO +@CO Permission is hereby granted, free of charge, to any person obtaining +@CO a copy of this software and associated documentation files (the +@CO "Software"), to deal in the Software without restriction, including +@CO without limitation the rights to use, copy, modify, merge, publish, +@CO distribute, sublicense, and/or sell copies of the Software, and to +@CO permit persons to whom the Software is furnished to do so, subject +@CO to the following conditions: +@CO +@CO The above copyright notice and this permission notice shall be included +@CO in all copies or substantial portions of the Software. +@CO +@CO THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +@CO OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +@CO MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +@CO IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +@CO CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +@CO TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +@CO SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +@CO +@CO 1 +@CO 123 4567890 123 Depadded base numbering +@CO ref TAG CTTAGCA GGT +@CO +@CO s1 AG CTTAG**TTGGC 7M 2D 2I 3M +@CO s2 AG CTTAG**TT 7M 2D 2I +@CO s4 AG CTTAG***T 7M 2D 1P 1I +@CO s3 **AACTTAGCA GG 2D 2I 9M +@CO s5 ***ACTTAGCA GG 2D 1P 1I 9M +@CO 01AB2345678AB901 +@CO +s1 0 z 2 0 7M2D2I3M * 0 0 AGCTTAGTTGGC 0123456AB901 +s2 0 z 2 0 7M2D2I * 0 0 AGCTTAGTT 0123456AB +s4 0 z 2 0 7M2D1P1I * 0 0 AGCTTAGT 0123456B +s3 0 z 2 0 2D2I9M * 0 0 AACTTAGCAGG AB234567890 +s5 0 z 2 0 2D1P1I9M * 0 0 ACTTAGCAGG B234567890 diff --git a/src/htslib-1.21/test/mpileup/mp_I.out b/src/htslib-1.21/test/mpileup/mp_I.out new file mode 100644 index 0000000..d62e992 --- /dev/null +++ b/src/htslib-1.21/test/mpileup/mp_I.out @@ -0,0 +1,11 @@ +z 2 3 ^!A^!A^!A 000 +z 3 3 GGG 111 +z 4 3 CCC 222 +z 5 3 TT+3(CCC)T 333 +z 6 3 TTT 444 +z 7 3 AAA 555 +z 8 3 GGG 666 +z 9 3 CCC 777 +z 10 3 AAA 888 +z 11 3 GGG 999 +z 12 3 G$G$G+2(=A)$ 000 diff --git a/src/htslib-1.21/test/mpileup/mp_I.sam b/src/htslib-1.21/test/mpileup/mp_I.sam new file mode 100644 index 0000000..9f48e21 --- /dev/null +++ b/src/htslib-1.21/test/mpileup/mp_I.sam @@ -0,0 +1,41 @@ +@HD VN:1.6 SO:coordinate +@SQ SN:z LN:13 +@CO +@CO Copyright (c) 2018 Genome Research Ltd. +@CO +@CO Permission is hereby granted, free of charge, to any person obtaining +@CO a copy of this software and associated documentation files (the +@CO "Software"), to deal in the Software without restriction, including +@CO without limitation the rights to use, copy, modify, merge, publish, +@CO distribute, sublicense, and/or sell copies of the Software, and to +@CO permit persons to whom the Software is furnished to do so, subject +@CO to the following conditions: +@CO +@CO The above copyright notice and this permission notice shall be included +@CO in all copies or substantial portions of the Software. +@CO +@CO THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +@CO OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +@CO MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +@CO IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +@CO CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +@CO TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +@CO SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +@CO +@CO Various I combinations +@CO Starting with I isn't handled due to the pileup ordering. +@CO The record only appears in the pileup list when the first base +@CO is placed against the reference, which is too late. +@CO +@CO 1 +@CO 1 2345 6789012 3 Depadded base numbering +@CO ref T AGCT TAGCAGG T +@CO +@CO s1 AGCT TAGCAGG 11M +@CO s2 AGCTCCCTAGCAGG 4M 3I 7M +@CO s3 A=AGCT TAGCAGG=A 2I 11M 2I +@CO AB0123ABC4567890CD +@CO +s1 0 z 2 0 11M * 0 0 AGCTTAGCAGG 01234567890 +s2 0 z 2 0 4M3I7M * 0 0 AGCTCCCTAGCAGG 0123ABC4567890 +s2 0 z 2 0 2I11M2I * 0 0 A=AGCTTAGCAGG=A AB01234567890CD diff --git a/src/htslib-1.21/test/mpileup/mp_ID.out b/src/htslib-1.21/test/mpileup/mp_ID.out new file mode 100644 index 0000000..4f83ef4 --- /dev/null +++ b/src/htslib-1.21/test/mpileup/mp_ID.out @@ -0,0 +1,12 @@ +z 2 3 ^!A^!A^!A 000 +z 3 3 GGG 111 +z 4 5 CCC^!*^!* 22244 +z 5 5 TTT** 33344 +z 6 5 TTTTT 44444 +z 7 5 AAAAA 55555 +z 8 5 G+2(TT)-2()G+2(TT)-2()G+2(T*)-2()GG 66666 +z 9 5 ***CC 9~~77 +z 10 5 **$*$AA 9~~88 +z 11 3 GGG 999 +z 12 3 GG$G$ 000 +z 13 1 C$ 1 diff --git a/src/htslib-1.21/test/mpileup/mp_ID.sam b/src/htslib-1.21/test/mpileup/mp_ID.sam new file mode 100644 index 0000000..1b21976 --- /dev/null +++ b/src/htslib-1.21/test/mpileup/mp_ID.sam @@ -0,0 +1,45 @@ +@HD VN:1.6 SO:coordinate +@SQ SN:z LN:13 +@CO +@CO Copyright (c) 2018 Genome Research Ltd. +@CO +@CO Permission is hereby granted, free of charge, to any person obtaining +@CO a copy of this software and associated documentation files (the +@CO "Software"), to deal in the Software without restriction, including +@CO without limitation the rights to use, copy, modify, merge, publish, +@CO distribute, sublicense, and/or sell copies of the Software, and to +@CO permit persons to whom the Software is furnished to do so, subject +@CO to the following conditions: +@CO +@CO The above copyright notice and this permission notice shall be included +@CO in all copies or substantial portions of the Software. +@CO +@CO THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +@CO OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +@CO MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +@CO IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +@CO CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +@CO TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +@CO SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +@CO +@CO Various I combinations +@CO Starting with I isn't handled due to the pileup ordering. +@CO The record only appears in the pileup list when the first base +@CO is placed against the reference, which is too late. +@CO +@CO 1 +@CO 123 45678 90123 Depadded base numbering +@CO ref TAG CTTAG CAGGT +@CO +@CO s1 AG CTTAGTT**GGC 7M 2I 2D 3M +@CO s2 AG CTTAGTT** 7M 2I 2D +@CO s3 AG CTTAGT*** 7M 1I 1P 2D +@CO s4 AA**TAG CAGG 2I 2D 7M +@CO s5 *A**TAG CAGG 1P 1I 2D 7M +@CO qual 01AB23456CD78901 +@CO +s1 0 z 2 0 7M2I2D3M * 0 0 AGCTTAGTTGGC 0123456CD901 +s2 0 z 2 0 7M2I2D * 0 0 AGCTTAGTT 0123456CD +s3 0 z 2 0 7M1I1P2D * 0 0 AGCTTAGT 0123456D +s4 0 z 4 0 2I2D7M * 0 0 AATAGCAGG AB4567890 +s5 0 z 4 0 1P1I2D7M * 0 0 ATAGCAGG B4567890 diff --git a/src/htslib-1.21/test/mpileup/mp_N.out b/src/htslib-1.21/test/mpileup/mp_N.out new file mode 100644 index 0000000..0b8dede --- /dev/null +++ b/src/htslib-1.21/test/mpileup/mp_N.out @@ -0,0 +1,11 @@ +z 2 3 ^!A^!A^!> 002 +z 3 3 GG> 112 +z 4 3 CCC 222 +z 5 3 TTT 333 +z 6 3 T>T 474 +z 7 3 A>A 575 +z 8 3 G>G 676 +z 9 3 CCC 777 +z 10 3 AAA 888 +z 11 3 GG> 99~ +z 12 3 G$G$>$ 00~ diff --git a/src/htslib-1.21/test/mpileup/mp_N.sam b/src/htslib-1.21/test/mpileup/mp_N.sam new file mode 100644 index 0000000..cc9ab77 --- /dev/null +++ b/src/htslib-1.21/test/mpileup/mp_N.sam @@ -0,0 +1,40 @@ +@HD VN:1.6 SO:coordinate +@SQ SN:z LN:13 +@CO +@CO Copyright (c) 2018 Genome Research Ltd. +@CO +@CO Permission is hereby granted, free of charge, to any person obtaining +@CO a copy of this software and associated documentation files (the +@CO "Software"), to deal in the Software without restriction, including +@CO without limitation the rights to use, copy, modify, merge, publish, +@CO distribute, sublicense, and/or sell copies of the Software, and to +@CO permit persons to whom the Software is furnished to do so, subject +@CO to the following conditions: +@CO +@CO The above copyright notice and this permission notice shall be included +@CO in all copies or substantial portions of the Software. +@CO +@CO THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +@CO OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +@CO MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +@CO IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +@CO CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +@CO TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +@CO SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +@CO +@CO Various I combinations +@CO Starting with I isn't handled due to the pileup ordering. +@CO The record only appears in the pileup list when the first base +@CO is placed against the reference, which is too late. +@CO +@CO 1 +@CO 1234567890123 Depadded base numbering +@CO ref TAGCTTAGCAGGT +@CO s1 AGCTTAGCAGG 11M +@CO s2 AGCT>>>CAGG 4M 3N 4M +@CO s3 >>CTTAGCA>> 2N 7M 2N +@CO qual 01234567890 +@CO +s1 0 z 2 0 11M * 0 0 AGCTTAGCAGG 01234567890 +s2 0 z 2 0 4M3N4M * 0 0 AGCTCAGG 01237890 +s3 0 z 2 0 2N7M2N * 0 0 CTTAGCA 2345678 diff --git a/src/htslib-1.21/test/mpileup/mp_N2.out b/src/htslib-1.21/test/mpileup/mp_N2.out new file mode 100644 index 0000000..5ade66a --- /dev/null +++ b/src/htslib-1.21/test/mpileup/mp_N2.out @@ -0,0 +1,13 @@ +z 1 6 ^!T^!T^!T^!T^!T^!T AAAAAA +z 2 6 AAAAAA BBBBBB +z 3 6 GGGGGG CCCCCC +z 4 6 C+2(AA)-5()C+2(A*)-5()C+2(*A)-5()C+2(AA)C+2(A*)C+2(*A) DDDDDD +z 5 6 ***>>> GHGGHG +z 6 6 ***>>> GHGGHG +z 7 6 ***>>> GHGGHG +z 8 6 ***>>> GHGGHG +z 9 6 *+2(TT)*+2(*T)*+2(T*)>+2(TT)>+2(*T)>+2(T*) GHGGHG +z 10 6 AAAAAA IIIIII +z 11 6 GGGGGG JJJJJJ +z 12 6 GGGGGG KKKKKK +z 13 6 T$T$T$T$T$T$ LLLLLL diff --git a/src/htslib-1.21/test/mpileup/mp_N2.sam b/src/htslib-1.21/test/mpileup/mp_N2.sam new file mode 100644 index 0000000..8ea072d --- /dev/null +++ b/src/htslib-1.21/test/mpileup/mp_N2.sam @@ -0,0 +1,46 @@ +@HD VN:1.6 SO:coordinate +@SQ SN:z LN:13 +@CO +@CO Copyright (c) 2018 Genome Research Ltd. +@CO +@CO Permission is hereby granted, free of charge, to any person obtaining +@CO a copy of this software and associated documentation files (the +@CO "Software"), to deal in the Software without restriction, including +@CO without limitation the rights to use, copy, modify, merge, publish, +@CO distribute, sublicense, and/or sell copies of the Software, and to +@CO permit persons to whom the Software is furnished to do so, subject +@CO to the following conditions: +@CO +@CO The above copyright notice and this permission notice shall be included +@CO in all copies or substantial portions of the Software. +@CO +@CO THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +@CO OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +@CO MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +@CO IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +@CO CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +@CO TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +@CO SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +@CO +@CO Various I combinations +@CO Starting with I isn't handled due to the pileup ordering. +@CO The record only appears in the pileup list when the first base +@CO is placed against the reference, which is too late. +@CO +@CO 1 +@CO 1234 56789 0123 Depadded base numbering +@CO ref TAGC TTAGC AGGT +@CO sD1 TAGCAA*****TTAGGT 4M 2I 5D 2I 4M +@CO sD2 TAGCA*******TAGGT 4M 1I 1P 5D 1P 1I 4M +@CO sD3 TAGC*A*****T*AGGT 4M 1P 1II 5D 1I 1P 4M +@CO sN1 TAGCAA>>>>>TTAGGT 4M 2I 5N 2I 4M +@CO sN2 TAGCA*>>>>>*TAGGT 4M 1I 1P 5N 1P 1I 4M +@CO sN3 TAGC*A>>>>>T*AGGT 4M 1P 1I 5N 1I 1P 4M +@CO qual ABCDEF GHIJKL +@CO +sD1 0 z 1 0 4M2I5D2I4M * 0 0 TAGCAATTAGGT ABCDEFGHIJKL +sD2 0 z 1 0 4M1I1P5D1P1I4M * 0 0 TAGCATAGGT ABCDEHIJKL +sD3 0 z 1 0 4M1P1I5D1I1P4M * 0 0 TAGCATAGGT ABCDFGIJKL +sN1 0 z 1 0 4M2I5N2I4M * 0 0 TAGCAATTAGGT ABCDEFGHIJKL +sN2 0 z 1 0 4M1I1P5N1P1I4M * 0 0 TAGCATAGGT ABCDEHIJKL +sN3 0 z 1 0 4M1P1I5N1I1P4M * 0 0 TAGCATAGGT ABCDFGIJKL diff --git a/src/htslib-1.21/test/mpileup/mp_P.out b/src/htslib-1.21/test/mpileup/mp_P.out new file mode 100644 index 0000000..003bed0 --- /dev/null +++ b/src/htslib-1.21/test/mpileup/mp_P.out @@ -0,0 +1,10 @@ +z 2 5 ^!A^!A^!A^!A^!A 00000 +z 3 5 GGGGG 11111 +z 4 5 CCCCC 22222 +z 5 5 TTTTT 33333 +z 6 5 TT+4(GGCC)T+4(GG**)T+4(*GC*)T+4(**CC) 44444 +z 7 5 AAAAA 55555 +z 8 5 GGGGG 66666 +z 9 5 CCCCC 77777 +z 10 5 AAAAA 88888 +z 11 5 G$G$G$G$G$ 99999 diff --git a/src/htslib-1.21/test/mpileup/mp_P.sam b/src/htslib-1.21/test/mpileup/mp_P.sam new file mode 100644 index 0000000..9a8c9d6 --- /dev/null +++ b/src/htslib-1.21/test/mpileup/mp_P.sam @@ -0,0 +1,41 @@ +@HD VN:1.6 SO:coordinate +@SQ SN:z LN:13 +@CO +@CO Copyright (c) 2018 Genome Research Ltd. +@CO +@CO Permission is hereby granted, free of charge, to any person obtaining +@CO a copy of this software and associated documentation files (the +@CO "Software"), to deal in the Software without restriction, including +@CO without limitation the rights to use, copy, modify, merge, publish, +@CO distribute, sublicense, and/or sell copies of the Software, and to +@CO permit persons to whom the Software is furnished to do so, subject +@CO to the following conditions: +@CO +@CO The above copyright notice and this permission notice shall be included +@CO in all copies or substantial portions of the Software. +@CO +@CO THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +@CO OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +@CO MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +@CO IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +@CO CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +@CO TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +@CO SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +@CO +@CO Various I combinations with (P)ads +@CO +@CO 1 +@CO 12345 67890 Depadded base numbering +@CO ref AGCTT AGCAG +@CO +@CO s1 AGCTT AGCAG 10M +@CO s2 AGCTTGGCCAGCAG 5M 4I 5M +@CO s3 AGCTTGG**AGCAG 5M 2I 2P 5M +@CO s4 AGCTT*GC*AGCAG 5M 1P 2I 1P 5M +@CO s5 AGCTT**CCAGCAG 5M 2P 2I 5M +@CO qual 01234ABCD56789 +s1 0 z 2 0 10M * 0 0 AGCTTAGCAG 0123456789 +s2 0 z 2 0 5M4I5M * 0 0 AGCTTGGCCAGCAG 01234ABCD56789 +s3 0 z 2 0 5M2I2P5M * 0 0 AGCTTGGAGCAG 01234AB56789 +s4 0 z 2 0 5M1P2I1P5M * 0 0 AGCTTGCAGCAG 01234BC56789 +s5 0 z 2 0 5M2P2I5M * 0 0 AGCTTCCAGCAG 01234CD56789 diff --git a/src/htslib-1.21/test/mpileup/mp_overlap1.out b/src/htslib-1.21/test/mpileup/mp_overlap1.out new file mode 100644 index 0000000..56d70b0 --- /dev/null +++ b/src/htslib-1.21/test/mpileup/mp_overlap1.out @@ -0,0 +1,12 @@ +1 100003 2 ^St^+T {! +1 100004 2 aA-5() {! +1 100005 2 a* N! +1 100006 2 g* N! +1 100007 2 c* N! +1 100008 2 a* N! +1 100009 2 c* N! +1 100010 2 aA {! +1 100011 2 cC {! +1 100012 2 aA {! +1 100013 2 gG {! +1 100014 2 a$A$ {! diff --git a/src/htslib-1.21/test/mpileup/mp_overlap1.sam b/src/htslib-1.21/test/mpileup/mp_overlap1.sam new file mode 100644 index 0000000..0e3d14b --- /dev/null +++ b/src/htslib-1.21/test/mpileup/mp_overlap1.sam @@ -0,0 +1,4 @@ +@HD VN:1.5 SO:coordinate +@SQ SN:1 LN:249250621 +r1 147 1 100003 50 12M * 0 0 TAAGCACACAGA ZZZZZZZZZZZZ +r1 99 1 100003 10 2M5D5M * 0 0 TAACAGA BBBBBBB diff --git a/src/htslib-1.21/test/mpileup/mp_overlap2.out b/src/htslib-1.21/test/mpileup/mp_overlap2.out new file mode 100644 index 0000000..7e5af6d --- /dev/null +++ b/src/htslib-1.21/test/mpileup/mp_overlap2.out @@ -0,0 +1,12 @@ +1 100003 2 ^+T^St {! +1 100004 2 A-5()a {! +1 100005 2 *a {! +1 100006 2 *g {! +1 100007 2 *c {! +1 100008 2 *a {! +1 100009 2 *c {! +1 100010 2 Aa {! +1 100011 2 Cc {! +1 100012 2 Aa {! +1 100013 2 Gg {! +1 100014 2 A$a$ {! diff --git a/src/htslib-1.21/test/mpileup/mp_overlap2.sam b/src/htslib-1.21/test/mpileup/mp_overlap2.sam new file mode 100644 index 0000000..ba9b517 --- /dev/null +++ b/src/htslib-1.21/test/mpileup/mp_overlap2.sam @@ -0,0 +1,4 @@ +@HD VN:1.5 SO:coordinate +@SQ SN:1 LN:249250621 +r1 99 1 100003 10 2M5D5M * 0 0 TAACAGA BBBBBBB +r1 147 1 100003 50 12M * 0 0 TAAGCACACAGA ZZZZZZZZZZZZ diff --git a/src/htslib-1.21/test/mpileup/mpileup.tst b/src/htslib-1.21/test/mpileup/mpileup.tst new file mode 100644 index 0000000..534383e --- /dev/null +++ b/src/htslib-1.21/test/mpileup/mpileup.tst @@ -0,0 +1,78 @@ +# Copyright (C) 2017-2018 Genome Research Ltd. +# +# Author: Robert Davies +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# First field: +# INIT = initialisation, not counted in testing +# P = expected to pass (zero return; expected output matches, if present) +# N = expected to return non-zero +# F = expected to fail +# +# Second field (P/N/F only): +# Filename of expected output. If '.', output is not checked +# +# Rest: +# Command to execute. $pileup is replaced with the path to the pileup test +# program + +# Deletions +P mp_D.out $pileup mp_D.sam +P mp_D.out $pileup -m mp_D.sam + +# Deletions followed by insertions +P mp_DI.out $pileup mp_DI.sam +P mp_DI.out $pileup -m mp_DI.sam + +# NB: pileup currently cannot return leading insertions. +# Test output reflects this. +# Insertions +P mp_I.out $pileup mp_I.sam +P mp_I.out $pileup -m mp_I.sam +P mp_P.out $pileup mp_P.sam +P mp_P.out $pileup -m mp_P.sam + +# Insertions followed by deletions +P mp_ID.out $pileup mp_ID.sam +P mp_ID.out $pileup -m mp_ID.sam + +# Ref skips +P mp_N.out $pileup mp_N.sam +P mp_N.out $pileup -m mp_N.sam + +# Ref skips and deletions +P mp_N2.out $pileup mp_N2.sam +P mp_N2.out $pileup -m mp_N2.sam + +# Various combinations of insertions, deletions and pads +P c1#pad1.out $pileup c1#pad1.sam +P c1#pad1.out $pileup -m c1#pad1.sam +P c1#pad2.out $pileup c1#pad2.sam +P c1#pad2.out $pileup -m c1#pad2.sam +P c1#pad3.out $pileup c1#pad3.sam +P c1#pad3.out $pileup -m c1#pad3.sam + +# Issue #852. Problem caused by alignments with entirely S/I ops in CIGAR. +P small.out $pileup -m small.bam + +# Overlap removal and the effect on quality values +P mp_overlap1.out $pileup -m mp_overlap1.sam +P mp_overlap2.out $pileup -m mp_overlap2.sam + diff --git a/src/htslib-1.21/test/mpileup/small.bam b/src/htslib-1.21/test/mpileup/small.bam new file mode 100644 index 0000000..94e4797 Binary files /dev/null and b/src/htslib-1.21/test/mpileup/small.bam differ diff --git a/src/htslib-1.21/test/mpileup/small.out b/src/htslib-1.21/test/mpileup/small.out new file mode 100644 index 0000000..13f943a --- /dev/null +++ b/src/htslib-1.21/test/mpileup/small.out @@ -0,0 +1,322 @@ +2 1 1 ^]T A +2 2 1 G E +2 3 1 G D +2 4 1 A @ +2 5 1 G ? +2 6 1 A ? +2 7 1 G E +2 8 1 C C +2 9 1 A B +2 10 1 C J +2 11 1 A B +2 12 1 T C +2 13 1 A A +2 14 1 A D +2 15 1 C J +2 16 1 T A +2 17 1 T A +2 18 1 G J +2 19 1 G I +2 20 1 G D +2 21 1 T @ +2 22 1 G I +2 23 1 A < +2 24 1 G J +2 25 1 A A +2 26 1 T < +2 27 1 G I +2 28 1 A ? +2 29 1 T @ +2 30 1 G K +2 31 2 A^]A BA +2 32 2 AA E< +2 33 2 AA E@ +2 34 2 TT A@ +2 35 2 GG KF +2 36 2 AA C; +2 37 2 GG K? +2 38 2 CC HF +2 39 2 AA DA +2 40 2 CC J= +2 41 2 TT FE +2 42 2 GG II +2 43 2 GG K; +2 44 2 CC I= +2 45 2 TT GE +2 46 2 TT CA +2 47 2 TT AB +2 48 2 GG 1H +2 49 2 GG :9 +2 50 2 AA =9 +2 51 2 GG FJ +2 52 2 TT AA +2 53 2 CC HH +2 54 2 AA CC +2 55 2 CC HG +2 56 2 AA C@ +2 57 2 CC HE +2 58 2 AA D> +2 59 2 GG AI +2 60 2 AA A@ +2 61 2 CC IG +2 62 2 CC 7( +2 63 2 AA +2 80 3 Aat !3) +2 81 3 Ccc !HU +2 82 3 Ccc !JP +2 83 3 Aaa !@Z +2 84 3 Ttt !3S +2 85 3 Aaa !@T +2 86 3 Aaa !=S +2 87 3 Ccc !HI +2 88 3 Acc !F9 +2 89 3 Ccc !EY +2 90 3 Ttt !<] +2 91 3 Ccc !5N +2 92 3 Tgt !-N +2 93 3 Aaa !C_ +2 94 3 Ggg !CT +2 95 3 Ttt ! +2 110 2 tt B> +2 111 2 cc JD +2 112 2 tt B@ +2 113 2 cc K; +2 114 2 aa ?C +2 115 2 gg B= +2 116 2 aa ?8 +2 117 2 cc J& +2 118 2 cc AF +2 119 2 tt B@ +2 120 2 cc J: +2 121 2 cc J> +2 122 2 cc IF +2 123 2 aa DC +2 124 2 gg H@ +2 125 2 cc I7 +2 126 2 cc IG +2 127 2 aa C7 +2 128 2 gg BB +2 129 2 aa >7 +2 130 2 aa <> +2 131 2 aa C> +2 132 2 gg F6 +2 133 2 gg F, +2 134 2 ag >0 +2 135 2 aa ?? +2 136 2 tt ?? +2 137 2 cc EB +2 138 2 t$t$ ?> +2 495 1 ^Ft E +2 496 1 t E +2 497 1 t D +2 498 1 g J +2 499 1 g L +2 500 1 c N +2 501 1 a D +2 502 1 a D +2 503 1 t F +2 504 1 t F +2 505 1 t C +2 506 1 a B +2 507 1 c N +2 508 1 a B +2 509 1 c N +2 510 1 t D +2 511 1 g L +2 512 1 t D +2 513 1 g L +2 514 1 t F +2 515 1 t B +2 516 1 a D +2 517 1 t C +2 518 1 a G +2 519 1 g J +2 520 1 c M +2 521 1 a C +2 522 1 a C +2 523 1 t B +2 524 1 a C +2 525 1 t B +2 526 1 a H +2 527 1 g L +2 528 1 t D +2 529 1 g M +2 530 1 a D +2 531 1 a D +2 532 1 a D +2 533 1 a H +2 534 1 g J +2 535 1 g J +2 536 1 g L +2 537 1 t D +2 538 1 g M +2 539 1 a C +2 540 1 t C +2 541 1 c M +2 542 1 a C +2 543 1 t D +2 544 1 t A +2 545 1 a @ +2 546 1 c L +2 547 1 c M +2 548 1 t C +2 549 1 c M +2 550 1 a C +2 551 1 a G +2 552 1 g K +2 553 1 a @ +2 554 1 c M +2 555 1 t C +2 556 1 g K +2 557 1 t E +2 558 1 t B +2 559 1 c K +2 560 1 a @ +2 561 1 c K +2 562 1 a A +2 563 1 a A +2 564 1 a @ +2 565 1 c I +2 566 1 a ? +2 567 1 c I +2 568 1 a > +2 569 1 t$ @ +2 648 1 ^gA ? +2 649 1 C F +2 650 1 G 0 +2 651 1 C D +2 652 1 A < +2 653 1 C < +2 654 1 C G +2 655 1 C > +2 656 1 T D +2 657 1 C H +2 658 1 T @ +2 659 1 A A +2 660 1 T > +2 661 1 C E +2 662 1 C : +2 663 1 C = +2 664 1 C H +2 665 1 A A +2 666 1 C F +2 667 1 A C +2 668 1 T < +2 669 1 A ? +2 670 1 A @ +2 671 1 A ? +2 672 1 T / +2 673 1 C : +2 674 1 T @ +2 675 1 A + +2 676 1 T > +2 677 1 A ? +2 678 1 C D +2 679 1 A B +2 680 1 A @ +2 681 1 C 5 +2 682 2 A^>a +2 687 2 Cc GH +2 688 2 Cc 2H +2 689 2 Cc :F +2 690 2 Tt E? +2 691 2 Cc GG +2 692 2 Tt C@ +2 693 2 Aa +? +2 694 2 Cc =G +2 695 2 Aa B@ +2 696 2 Cc FH +2 697 2 Cc J +2 701 2 Aa AA +2 702 2 Tt ?? +2 703 2 Aa 3A +2 704 2 Cc CM +2 705 2 Aa CC +2 706 2 Tt BB +2 707 2 Cc HL +2 708 2 Tt EB +2 709 2 Aa =C +2 710 2 Tt ?? +2 711 2 Aa @A +2 712 2 Cc CK +2 713 2 Aa =C +2 714 2 Aa ;A +2 715 2 Cc >K +2 716 2 Aa @@ +2 717 2 Cc 5B +2 718 2 Gg 8I +2 719 2 C$c (G +2 720 1 a = +2 721 1 c J +2 722 1 c K +2 723 1 c J +2 724 1 t A +2 725 1 c H +2 726 1 t > +2 727 1 a ? +2 728 1 c J +2 729 1 c L +2 730 1 c J +2 731 1 c L +2 732 1 a @ +2 733 1 c L +2 734 1 a B +2 735 1 t A +2 736 1 a ? +2 737 1 c C +2 738 1 g J +2 739 1 t B +2 740 1 c I +2 741 1 t @ +2 742 1 a @ +2 743 1 c J +2 744 1 a @ +2 745 1 c I +2 746 1 a B +2 747 1 a @ +2 748 1 c K +2 749 1 a @ +2 750 1 t B +2 751 1 g H +2 752 1 c H +2 753 1 a > +2 754 1 c B +2 755 1 g F +2 756 1 c$ ? diff --git a/src/htslib-1.21/test/mpileup/test-pileup.sh b/src/htslib-1.21/test/mpileup/test-pileup.sh new file mode 100755 index 0000000..8a83cca --- /dev/null +++ b/src/htslib-1.21/test/mpileup/test-pileup.sh @@ -0,0 +1,34 @@ +#!/bin/sh +# +# Copyright (C) 2017-2018 Genome Research Ltd. +# +# Author: Robert Davies +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Load in the test driver +. ../simple_test_driver.sh + +echo "Testing (m)pileup..." + +pileup="../pileup" + +test_driver $@ + +exit $? diff --git a/src/htslib-1.21/test/no_hdr_sq_1.bam b/src/htslib-1.21/test/no_hdr_sq_1.bam new file mode 100644 index 0000000..93d57a2 Binary files /dev/null and b/src/htslib-1.21/test/no_hdr_sq_1.bam differ diff --git a/src/htslib-1.21/test/no_hdr_sq_1.bam.csi b/src/htslib-1.21/test/no_hdr_sq_1.bam.csi new file mode 100644 index 0000000..e37fc63 Binary files /dev/null and b/src/htslib-1.21/test/no_hdr_sq_1.bam.csi differ diff --git a/src/htslib-1.21/test/no_hdr_sq_1.expected.sam b/src/htslib-1.21/test/no_hdr_sq_1.expected.sam new file mode 100644 index 0000000..ae05bb1 --- /dev/null +++ b/src/htslib-1.21/test/no_hdr_sq_1.expected.sam @@ -0,0 +1,16 @@ +@CO SN:CHROMOSOME_I LN:1009800 +@CO SN:CHROMOSOME_II LN:5000 +@CO SN:CHROMOSOME_III LN:5000 +@CO SN:CHROMOSOME_IV LN:5000 +@CO SN:CHROMOSOME_V LN:5000 +@SQ SN:CHROMOSOME_I LN:1009800 +@SQ SN:CHROMOSOME_II LN:5000 +@SQ SN:CHROMOSOME_III LN:5000 +@SQ SN:CHROMOSOME_IV LN:5000 +@SQ SN:CHROMOSOME_V LN:5000 +I 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +II.14978392 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +III 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +IV 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +V 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +VI 2048 CHROMOSOME_I 2 1 27M100000D73M * 0 0 ACTAAGCCTAAGCCTAAGCCTAAGCCAATTATCGATTTCTGAAAAAATTATCGAATTTTCTAGAAATTTTGCAAATTTTTTCATAAAATTATCGATTTTA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC diff --git a/src/htslib-1.21/test/noroundtrip-out.vcf b/src/htslib-1.21/test/noroundtrip-out.vcf new file mode 100644 index 0000000..21fa160 --- /dev/null +++ b/src/htslib-1.21/test/noroundtrip-out.vcf @@ -0,0 +1,11 @@ +##fileformat=VCFv4.3 +##FILTER= +##contig= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA1 +3 50 . A T 0 PASS . GT 0/1 +3 60 . T C 0 PASS . GT 0/1 +3 70 . G A 0 PASS . GT 0/1 +3 80 . C G 0 PASS . GT 0/1 +3 90 . A G 0 PASS . GT:S 0/1:. diff --git a/src/htslib-1.21/test/noroundtrip.vcf b/src/htslib-1.21/test/noroundtrip.vcf new file mode 100644 index 0000000..61206bf --- /dev/null +++ b/src/htslib-1.21/test/noroundtrip.vcf @@ -0,0 +1,10 @@ +##fileformat=VCFv4.3 +##contig= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA1 +3 50 . A T 0 PASS . GT:GT 0/1 +3 60 . T C 0 PASS . GT 0/1 +3 70 . G A 0 PASS . GT:GT 0/1:. +3 80 . C G 0 PASS . GT:GT 0/1:0/1 +3 90 . A G 0 PASS . GT:S:S 0/1 diff --git a/src/htslib-1.21/test/pileup.c b/src/htslib-1.21/test/pileup.c new file mode 100644 index 0000000..757b2ae --- /dev/null +++ b/src/htslib-1.21/test/pileup.c @@ -0,0 +1,275 @@ +/* test/pileup.c -- simple pileup tester + + Copyright (C) 2014,2018-2019, 2024 Genome Research Ltd. + + Author: James Bonfield + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +/* + The output from this program isn't quite the same as that from + `samtools mpileup`. It doesn't print the reference base column, + it puts brackets around insertion sequences to make them easier to spot + and it writes empty brackets after a reported deletion. + + The output from `samtools mpileup` can be converted to the same format like + this: + +samtools mpileup -B -Q 0 in.bam | perl -lane \ + 'pop(@F); + splice(@F, 2, 1); + $F[3] =~ s/\+(\d+)([ACGTN]+)/sprintf("+%d(%s)%s",$1,substr($2,0,$1),substr($2,$1))/ieg; + $F[3] =~ s/\-(\d+)([ACGTN]+)/sprintf("-%d()%s",$1,substr($2,$1))/ieg; + print join("\t", @F);' + + */ + +#include + +#include +#include +#include +#include +#include + +#include "../htslib/sam.h" +#include "../htslib/kstring.h" + +#define MIN(a,b) ((a)<(b)?(a):(b)) + +typedef struct ptest_t { + const char *fname; + samFile *fp; + sam_hdr_t *fp_hdr; +} ptest_t; + +static int readaln(void *data, bam1_t *b) { + ptest_t *g = (ptest_t*)data; + int ret; + + while (1) { + ret = sam_read1(g->fp, g->fp_hdr, b); + if (ret < 0) break; + if ( b->core.flag & (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP) ) continue; + break; + } + + return ret; +} + +static int print_pileup_seq(const bam_pileup1_t *p, int n) { + kstring_t ks = { 0, 0, NULL }; + int i; + + for (i = 0; i < n; i++, p++) { + uint8_t *seq = bam_get_seq(p->b); + int del_len, is_rev = bam_is_rev(p->b); + + if (p->is_head) + putchar('^'), putchar('!'+MIN(p->b->core.qual,93)); + + if (p->is_del) + putchar(p->is_refskip ? (is_rev ? '<' : '>') : '*'); + else { + unsigned char c = seq_nt16_str[bam_seqi(seq, p->qpos)]; + putchar(is_rev ? tolower(c) : toupper(c)); + } + + del_len = -p->indel; + if (p->indel > 0) { + int j, len = bam_plp_insertion(p, &ks, &del_len); + if (len < 0) { + perror("bam_plp_insertion"); + goto fail; + } + printf("%+d(", len); + for (j = 0; j < len; j++) + putchar(is_rev ? + tolower((uint8_t) ks.s[j]) : + toupper((uint8_t) ks.s[j])); + putchar(')'); + } + if (del_len > 0) { + printf("-%d()", del_len); + } + if (p->is_tail) + putchar('$'); + } + free(ks.s); + return 0; + + fail: + free(ks.s); + return -1; +} + +static void print_pileup_qual(const bam_pileup1_t *p, int n) { + int i; + + for (i = 0; i < n; i++, p++) { + uint8_t *qual = bam_get_qual(p->b); + uint8_t q = '~'; + if (p->qpos < p->b->core.l_qseq && + qual[p->qpos]+33 < '~') + q = qual[p->qpos]+33; + putchar(q); + } +} + +static int test_pileup(ptest_t *input) { + bam_plp_t plp = NULL; + const bam_pileup1_t *p; + int tid, pos, n = 0; + + plp = bam_plp_init(readaln, input); + if (!plp) { + perror("bam_plp_init"); + goto fail; + } + while ((p = bam_plp_auto(plp, &tid, &pos, &n)) != 0) { + if (tid < 0) break; + if (tid >= input->fp_hdr->n_targets) { + fprintf(stderr, + "bam_plp_auto returned tid %d >= header n_targets %d\n", + tid, input->fp_hdr->n_targets); + goto fail; + } + + printf("%s\t%d\t%d\t", input->fp_hdr->target_name[tid], pos+1, n); + + if (print_pileup_seq(p, n) < 0) + goto fail; + + putchar('\t'); + print_pileup_qual(p, n); + + putchar('\n'); + } + if (n < 0) { + fprintf(stderr, "bam_plp_auto failed for \"%s\"\n", input->fname); + goto fail; + } + + bam_plp_destroy(plp); + return 0; + + fail: + bam_plp_destroy(plp); + return -1; +} + +static int test_mpileup(ptest_t *input) { + bam_mplp_t iter = NULL; + const bam_pileup1_t *pileups[1] = { NULL }; + int n_plp[1] = { 0 }; + int tid, pos, n = 0; + + iter = bam_mplp_init(1, readaln, (void **) &input); + if (!iter) { + perror("bam_plp_init"); + goto fail; + } + if (bam_mplp_init_overlaps(iter) < 0) { + perror("bam_mplp_init_overlaps"); + goto fail; + } + + while ((n = bam_mplp_auto(iter, &tid, &pos, n_plp, pileups)) > 0) { + if (tid < 0) break; + if (tid >= input->fp_hdr->n_targets) { + fprintf(stderr, + "bam_mplp_auto returned tid %d >= header n_targets %d\n", + tid, input->fp_hdr->n_targets); + goto fail; + } + + printf("%s\t%d\t%d\t", input->fp_hdr->target_name[tid], pos+1, n_plp[0]); + + if (print_pileup_seq(pileups[0], n_plp[0]) < 0) + goto fail; + + putchar('\t'); + print_pileup_qual(pileups[0], n_plp[0]); + + putchar('\n'); + } + if (n < 0) { + fprintf(stderr, "bam_plp_auto failed for \"%s\"\n", input->fname); + goto fail; + } + + bam_mplp_destroy(iter); + return 0; + + fail: + bam_mplp_destroy(iter); + return -1; +} + +int main(int argc, char **argv) { + ptest_t g = { NULL, NULL, NULL }; + int use_mpileup = 0, opt; + + while ((opt = getopt(argc, argv, "m")) != -1) { + switch (opt) { + case 'm': + use_mpileup = 1; + break; + default: + fprintf(stderr, "Usage: %s [-m] \n", argv[0]); + return EXIT_FAILURE; + } + } + + if (optind >= argc) { + fprintf(stderr, "Usage: %s [-m] \n", argv[0]); + return EXIT_FAILURE; + } + + g.fname = argv[optind]; + g.fp = sam_open(g.fname, "r"); + if (!g.fp) { + fprintf(stderr, "Couldn't open \"%s\" : %s", g.fname, strerror(errno)); + goto fail; + } + g.fp_hdr = sam_hdr_read(g.fp); + if (!g.fp_hdr) { + fprintf(stderr, "Couldn't read header from \"%s\" : %s", + g.fname, strerror(errno)); + goto fail; + } + + if (use_mpileup) { + if (test_mpileup(&g) < 0) + goto fail; + } else { + if (test_pileup(&g) < 0) + goto fail; + } + + sam_hdr_destroy(g.fp_hdr); + sam_close(g.fp); + + return EXIT_SUCCESS; + + fail: + if (g.fp_hdr) sam_hdr_destroy(g.fp_hdr); + if (g.fp) sam_close(g.fp); + return EXIT_FAILURE; +} diff --git a/src/htslib-1.21/test/pileup_mod.c b/src/htslib-1.21/test/pileup_mod.c new file mode 100644 index 0000000..d725a09 --- /dev/null +++ b/src/htslib-1.21/test/pileup_mod.c @@ -0,0 +1,228 @@ +/* test/pileup_mod.c -- simple pileup tester with base modifications + + Copyright (C) 2020 Genome Research Ltd. + + Author: James Bonfield + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include + +#include +#include +#include +#include "../htslib/sam.h" + +typedef struct { + samFile *fp; + sam_hdr_t *h; +} plp_dat; + +static int readaln(void *data, bam1_t *b) { + plp_dat *dat = (plp_dat *)data; + return sam_read1(dat->fp, dat->h, b); +} + +#ifndef MIN +# define MIN(a,b) ((a)<(b)?(a):(b)) +#endif + +// No modification reporting. +// This is just a simple base-line for comparison against mod_pileup1 for +// performance testing. +void process_pileup(sam_hdr_t *h, const bam_pileup1_t *p, + int tid, int pos, int n) { + kstring_t s = {0,0}; + printf("%s\t%d\t", sam_hdr_tid2name(h, tid), pos); + int i; + for (i = 0; i < n; i++, p++) { + if (p->is_del) { + putchar('*'); + continue; + } + + uint8_t *seq = bam_get_seq(p->b); + uint8_t *qual = bam_get_qual(p->b); + unsigned char c = seq_nt16_str[bam_seqi(seq, p->qpos)]; + putchar(c); + kputc(MIN('~','!'+qual[p->qpos]), &s); + } + putchar('\t'); + puts(s.l ? s.s : ""); + + free(s.s); +} + +// Initialise and destroy the base modifier state data. This is called +// as each new read is added or removed from the pileups. +int pileup_cd_create(void *data, const bam1_t *b, bam_pileup_cd *cd) { + hts_base_mod_state *m = hts_base_mod_state_alloc(); + if (bam_parse_basemod(b, m) < 0) { + hts_base_mod_state_free(m); + return -1; + } + cd->p = m; + return 0; +} + +int pileup_cd_destroy(void *data, const bam1_t *b, bam_pileup_cd *cd) { + hts_base_mod_state_free(cd->p); + return 0; +} + +// Report a line of pileup, including base modifications inline with +// the sequence (including qualities), as [...] +void process_mod_pileup1(sam_hdr_t *h, const bam_pileup1_t *p, + int tid, int pos, int n) { + kstring_t s = {0,0}; + printf("%s\t%d\t", sam_hdr_tid2name(h, tid), pos); + int i; + for (i = 0; i < n; i++, p++) { + if (p->is_del) { + putchar('*'); + continue; + } + + uint8_t *seq = bam_get_seq(p->b); + uint8_t *qual = bam_get_qual(p->b); + unsigned char c = seq_nt16_str[bam_seqi(seq, p->qpos)]; + putchar(c); + kputc(MIN('~','!'+qual[p->qpos]), &s); + + // Simple mod detection; assumes at most 5 mods + hts_base_mod_state *m = p->cd.p; + hts_base_mod mod[5]; + int nm; + if ((nm = bam_mods_at_qpos(p->b, p->qpos, m, mod, 5)) > 0) { + int j; + putchar('['); + for (j = 0; j < nm && j < 5; j++) { + if (mod[j].modified_base < 0) + // ChEBI + printf("%c(%d)%d", "+-"[mod[j].strand], + -mod[j].modified_base, mod[j].qual); + else + printf("%c%c%d", "+-"[mod[j].strand], + mod[j].modified_base, mod[j].qual); + } + putchar(']'); + } + } + putchar('\t'); + puts(s.l ? s.s : ""); + + free(s.s); +} + +// Report a line of pileup, including base modifications. +// This replaces the base with the mod call (NB this can be confusing +// as both C and G can map to m depending on orientation). +// It also reports qualities in the QUAl column, remapped to +// phred scale as only one single mod is supported and hence extreme +// unlikely probabilities shouldn't be reported (although we don't +// scan to pick the highest). +void process_mod_pileup2(sam_hdr_t *h, const bam_pileup1_t *p, + int tid, int pos, int n) { + kstring_t s = {0,0}; + printf("%s\t%d\t%d\t", sam_hdr_tid2name(h, tid), pos, n); + int i; + for (i = 0; i < n; i++, p++) { + if (p->is_del) { + putchar('*'); + continue; + } + + uint8_t *seq = bam_get_seq(p->b); + uint8_t *qual = bam_get_qual(p->b); + unsigned char c = seq_nt16_str[bam_seqi(seq, p->qpos)]; + + // Simple mod detection; assumes at most 2 non-ChEBI mods + hts_base_mod_state *m = p->cd.p; + int n, is_rev = bam_is_rev(p->b); + hts_base_mod mod; + char base; + uint8_t q = qual[p->qpos]; + if ((n = bam_mods_at_qpos(p->b, p->qpos, m, &mod, 1)) > 0) { + base = mod.modified_base; + // base mod as phred scale + q = -10 * log10(1-((mod.qual+0.5)/256)) + 0.5; + } else { + base = c; + } + + // Case is inappropriate here as some mods (eg "a") are lc. + // So we dim/bold them instead using ANSI escape codes. + // It's a test script, so I'm not going to care about curses. + if (is_rev) { + printf("\033[2m%c\033[0m", base); + } else { + printf("\033[1m%c\033[0m", base); + } + kputc(MIN('~','!'+q), &s); + } + putchar('\t'); + puts(s.l ? s.s : ""); + + free(s.s); +} + +int main(int argc, char **argv) { + int compact = 0; + while (argc > 1 && strcmp(argv[1], "-c") == 0) { + compact++; + argc--; + argv++; + } + + samFile *in = sam_open(argc > 1 ? argv[1] : "-", "r"); + bam1_t *b = bam_init1(); + sam_hdr_t *h = sam_hdr_read(in); + + // Pileup iterator with constructor/destructor to parse base mod tags + plp_dat dat = { + .fp = in, + .h = h, + }; + bam_plp_t iter = bam_plp_init(readaln, &dat); + bam_plp_constructor(iter, pileup_cd_create); + bam_plp_destructor(iter, pileup_cd_destroy); + + const bam_pileup1_t *p; + int tid, pos, n = 0; + while ((p = bam_plp_auto(iter, &tid, &pos, &n)) != 0) { + switch (compact) { + case 0: + process_mod_pileup1(h, p, tid, pos, n); + break; + case 1: + process_mod_pileup2(h, p, tid, pos, n); + break; + default: + process_pileup(h, p, tid, pos, n); + break; + } + } + bam_plp_destroy(iter); + + sam_close(in); + bam_destroy1(b); + sam_hdr_destroy(h); + + return n != 0; +} diff --git a/src/htslib-1.21/test/plugins-dlhts.c b/src/htslib-1.21/test/plugins-dlhts.c new file mode 100644 index 0000000..33f432f --- /dev/null +++ b/src/htslib-1.21/test/plugins-dlhts.c @@ -0,0 +1,186 @@ +/* test/plugins-dlhts.c -- Test plugins with dynamically loaded libhts. + + Copyright (C) 2020 University of Glasgow. + + Author: John Marshall + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include + +#include +#include + +#if defined _WIN32 || defined __CYGWIN__ || defined __MSYS__ +#define SKIP "running on Windows" +#elif !defined ENABLE_PLUGINS +#define SKIP "plugins being disabled" +#endif + +#ifndef SKIP + +#include +#include +#include +#include +#include + +#ifndef EPROTONOSUPPORT +#define EPROTONOSUPPORT ENOSYS +#endif + +void *sym(void *htslib, const char *name) +{ + void *ptr = dlsym(htslib, name); + if (ptr == NULL) { + fprintf(stderr, "Can't find symbol \"%s\": %s\n", name, dlerror()); + exit(EXIT_FAILURE); + } + return ptr; +} + +typedef void void_func(void); +void_func *func(void *htslib, const char *name) { + void_func *fptr; + *(void **) &fptr = sym(htslib, name); + return fptr; +} + +int errors = 0; +int verbose = 0; + +struct hFILE; +typedef struct hFILE *hopen_func(const char *fname, const char *mode, ...); +typedef void hclose_abruptly_func(struct hFILE *fp); + +hopen_func *hopen_p; +hclose_abruptly_func *hclose_abruptly_p; + +void test_hopen(const char *fname, int expected) +{ + struct hFILE *fp = hopen_p(fname, "r"); + if (fp) { + hclose_abruptly_p(fp); + fprintf(stderr, "Opening \"%s\" actually succeeded\n", fname); + errors++; + return; + } + + int supported = (errno != EPROTONOSUPPORT); + if (supported != expected) { + fprintf(stderr, "Opening \"%s\" failed badly: %s\n", fname, strerror(errno)); + errors++; + } + else if (verbose) + printf("Opening \"%s\" produces %s\n", fname, strerror(errno)); +} + +void verbose_log(const char *message) +{ + fflush(stderr); + if (verbose) puts(message); + fflush(stdout); +} + +int main(int argc, char **argv) +{ + int dlflags = RTLD_NOW; + int skip = 0; + int c; + + while ((c = getopt(argc, argv, "glv")) >= 0) + switch (c) { + case 'g': dlflags |= RTLD_GLOBAL; break; + case 'l': dlflags |= RTLD_LOCAL; break; + case 'v': verbose++; break; + } + + if (optind >= argc) { + fprintf(stderr, "Usage: plugins-dlhts [-glv] LIBHTSFILE\n"); + return EXIT_FAILURE; + } + + void *htslib = dlopen(argv[optind], dlflags); + if (htslib == NULL) { + fprintf(stderr, "Can't dlopen \"%s\": %s\n", argv[optind], dlerror()); + return EXIT_FAILURE; + } + + if (verbose) { + int *hts_verbosep = sym(htslib, "hts_verbose"); + *hts_verbosep += verbose; + + typedef const char *cstr_func(void); + printf("Loaded HTSlib %s\n", ((cstr_func *) func(htslib, "hts_version"))()); + } + + hopen_p = (hopen_func *) func(htslib, "hopen"); + hclose_abruptly_p = (hclose_abruptly_func *) func(htslib, "hclose_abruptly"); + + test_hopen("bad-scheme:unsupported", 0); + +#ifdef __APPLE__ + /* Skip -l tests as we don't link plugins back to libhts on macOS, as this + would conflict with a statically linked libhts.a on this platform. */ + skip = (dlflags & RTLD_LOCAL) != 0; +#endif + + if (! skip) { +#ifdef HAVE_LIBCURL + test_hopen("https://localhost:99999/invalid_port", 1); +#endif +#ifdef ENABLE_GCS + test_hopen("gs:invalid", 1); +#endif +#ifdef ENABLE_S3 + test_hopen("s3:invalid", 1); +#endif + } + else + verbose_log("Skipping most tests"); + + verbose_log("Calling hts_lib_shutdown()"); + (func(htslib, "hts_lib_shutdown"))(); + + verbose_log("Calling dlclose(htslib)"); + if (dlclose(htslib) < 0) { + fprintf(stderr, "Can't dlclose \"%s\": %s\n", argv[optind], dlerror()); + errors++; + } + + verbose_log("Returning from main()"); + + if (errors > 0) { + printf("FAILED: %d errors\n", errors); + return EXIT_FAILURE; + } + + if (verbose) printf("All tests passed\n"); + return EXIT_SUCCESS; +} + +#else + +int main(void) +{ + printf("Tests skipped due to " SKIP "\n"); + return EXIT_SUCCESS; +} + +#endif diff --git a/src/htslib-1.21/test/range.bam b/src/htslib-1.21/test/range.bam new file mode 100644 index 0000000..5cb4270 Binary files /dev/null and b/src/htslib-1.21/test/range.bam differ diff --git a/src/htslib-1.21/test/range.bam.bai b/src/htslib-1.21/test/range.bam.bai new file mode 100644 index 0000000..b0b7432 Binary files /dev/null and b/src/htslib-1.21/test/range.bam.bai differ diff --git a/src/htslib-1.21/test/range.cram b/src/htslib-1.21/test/range.cram new file mode 100644 index 0000000..5be551c Binary files /dev/null and b/src/htslib-1.21/test/range.cram differ diff --git a/src/htslib-1.21/test/range.cram.crai b/src/htslib-1.21/test/range.cram.crai new file mode 100644 index 0000000..6164b7c Binary files /dev/null and b/src/htslib-1.21/test/range.cram.crai differ diff --git a/src/htslib-1.21/test/range.out b/src/htslib-1.21/test/range.out new file mode 100644 index 0000000..8733ed8 --- /dev/null +++ b/src/htslib-1.21/test/range.out @@ -0,0 +1,17 @@ +@HD VN:1.4 SO:coordinate +@RG ID:1 PL:ILLUMINA PU:130410_HS18_09653_A_C1JT2ACXX_4 LB:7053878 DT:2013-04-10T00:00:00+0100 SM:ERS225193 CN:SC +@SQ SN:CHROMOSOME_I LN:1009800 M5:8ede36131e0dbf3417807e48f77f3ebd UR:/ +@SQ SN:CHROMOSOME_II LN:5000 M5:8e7993f7a93158587ee897d7287948ec UR:/ +@SQ SN:CHROMOSOME_III LN:5000 M5:3adcb065e1cf74fafdbba1e8c352b323 UR:/ +@SQ SN:CHROMOSOME_IV LN:5000 M5:251af66a69ee589c9f3757340ec2de6f UR:/ +@SQ SN:CHROMOSOME_V LN:5000 M5:cf200a65fb754836dcc56b24b3170ee8 UR:/ +@SQ SN:CHROMOSOME_X LN:5000 M5:6f9368fd2192c89c613718399d2d31fc UR:/ +@SQ SN:CHROMOSOME_MtDNA LN:5000 M5:cd05857ece6411f40257a565ccfe15bb UR:/ +@PG ID:scramble PN:scramble VN:1.14.7 CL:scramble -M -I sam -s 50 -r /tmp/ce.fa - /tmp/ERR304769_subset.cram +HS18_09653:4:2112:13048:11874 99 CHROMOSOME_II 2976 60 100M = 3206 330 CTCAAGCTAATAGAGTATGGACAATTGTGAACGGAGAGGTTCAATGGAAGACTCCACCGCGGTAAGTGTGTTTCTTTAAAAATTACTTCCTTTTTTCAAT DCEFDEGGFFGGGGGGFGGGGGGHDGHI?FGGGJGGHHGECGIH?HFGEHGHHGHEHHHHGHGGGGCGHHGHGHFGGGGHHGHGGGGIIGGGIGHHHG:G X0:i:1 X1:i:0 BC:Z:GTGTGCGG XG:i:0 AM:i:37 SM:i:37 XM:i:0 XO:i:0 QT:Z::=+4AD04 XT:A:U MD:Z:100 NM:i:0 RG:Z:1 +HS18_09653:4:1204:2530:59238 83 CHROMOSOME_IV 1422 0 100M = 922 -600 ATCCGTTTGATGAGGAAGAGGAAGAAGAATCACAGTTTGGAGGGGGAACTCTGTCCGGTAGAGACCCATTTGATGAAGATGTACGTTTTTCGTAAAGTTC GHCFGHFFFGGCCFHGGFGHEHGBJGGGGFGHFFH?@GHHEGGFAGE@BEGGFBDE?F X0:i:2 X1:i:0 XA:Z:CHROMOSOME_IV,+46358,100M,0; BC:Z:GTNTGCNG XG:i:0 AM:i:0 SM:i:0 XM:i:0 XO:i:0 QT:Z:<CDDGEEFFFDHFFGDIJHEIJDGFFFIEIGIFFFFHGHFGF?BHHGGGGFGIGGHHGGFGHGFFIHGGHHEEHEDGHJHGGHBGGGFFGGEHHFHFGGG X0:i:2 X1:i:11 XG:i:1 AM:i:0 SM:i:0 XM:i:3 XO:i:1 XT:A:R MD:Z:28G29^A13C21G6 NM:i:4 RG:Z:1 +HS18_09653:4:2314:14991:85680 83 CHROMOSOME_I 1020 10 28M72S = 853 -195 TGAATATCATAGCTATAGAAACGGTAGTATTTTACTCTCTGTGGCTTCACAGTATATTTTACTCTCTGTGGCTTCACAGTATTTTTTACTCTCTGTGGCT GGCFGHHGGGDHHGEGGHGH;FGGEGEGHFHHFBHBFGCJEHD<3BGH;GEGHFEGIFEIEFFHGG8GIEFFDHFIHFFGEHBFGDGGEGBFG5GEECCD BC:Z:GTNTGCAG XG:i:0 AM:i:10 SM:i:10 XM:i:3 XO:i:0 QT:Z:==!2@A+4 XT:A:M MD:Z:8G6C9T2 NM:i:3 RG:Z:1 diff --git a/src/htslib-1.21/test/range.out2 b/src/htslib-1.21/test/range.out2 new file mode 100644 index 0000000..22e6fd5 --- /dev/null +++ b/src/htslib-1.21/test/range.out2 @@ -0,0 +1,21 @@ +@HD VN:1.4 SO:coordinate +@RG ID:1 PL:ILLUMINA PU:130410_HS18_09653_A_C1JT2ACXX_4 LB:7053878 DT:2013-04-10T00:00:00+0100 SM:ERS225193 CN:SC +@SQ SN:CHROMOSOME_I LN:1009800 M5:8ede36131e0dbf3417807e48f77f3ebd UR:/ +@SQ SN:CHROMOSOME_II LN:5000 M5:8e7993f7a93158587ee897d7287948ec UR:/ +@SQ SN:CHROMOSOME_III LN:5000 M5:3adcb065e1cf74fafdbba1e8c352b323 UR:/ +@SQ SN:CHROMOSOME_IV LN:5000 M5:251af66a69ee589c9f3757340ec2de6f UR:/ +@SQ SN:CHROMOSOME_V LN:5000 M5:cf200a65fb754836dcc56b24b3170ee8 UR:/ +@SQ SN:CHROMOSOME_X LN:5000 M5:6f9368fd2192c89c613718399d2d31fc UR:/ +@SQ SN:CHROMOSOME_MtDNA LN:5000 M5:cd05857ece6411f40257a565ccfe15bb UR:/ +@PG ID:scramble PN:scramble VN:1.14.7 CL:scramble -M -I sam -s 50 -r /tmp/ce.fa - /tmp/ERR304769_subset.cram +HS18_09653:4:2108:14085:93656 147 CHROMOSOME_I 1122 60 100M = 756 -466 AATTTGCAAGAAAATTCGCAAGAAATTTGTATTAAAAACTGTTCAAAATTTTTGGAAATTAGTTTAAAAATCTCACATTTTTTTTAGAAAAATTATTTTT GEFGHHFHEGGIFEFHFHFECDDE? X0:i:1 X1:i:0 XG:i:0 AM:i:37 SM:i:37 XM:i:0 XO:i:0 XT:A:U MD:Z:100 NM:i:0 RG:Z:1 +HS18_09653:4:2314:21094:58611 99 CHROMOSOME_II 1353 60 100M = 1775 522 ATTTTTCTATTCTTGTGAGCTCAGGACACCTCATACAACTCCAGAGAAAATGTGTCTCATTATTCTTGTCTTTTTTCAAGATCTAATCAATTTTCTACAT D;?FBD9CDBGBGG?GF8DFGFFHDACDGFGGD/HGHHGFFEFGD=FGIG0D.GH7HHFFGFDGGFF:HFDGGHGGGGE;F:@GGEGGCFGFGHHB@FHG X0:i:1 X1:i:0 BC:Z:GTNTGCCG XG:i:0 AM:i:37 SM:i:37 XM:i:0 XO:i:0 QT:Z:=?!4AD22 XT:A:U MD:Z:100 NM:i:0 RG:Z:1 +HS18_09653:4:2108:10782:59721 83 CHROMOSOME_II 1366 60 100M = 1241 -225 TGTGAGCTCAGGACACCTCATACAACTCCAGAGAAAATGTGTCTCATTATTCTTGTCTTTTTTCAAGATCTAATCAATTTTCTACATTAACGACGTTTTT IFGHDHHFFGHIIEGGGHEHHJGGGFGGHFHHGFGGGGGGHHDHFHGIF=IFIFHIGIHGHF=HGJGGGFGGGHEEHGFGGFGEGGGGEGFFGGGFEBCD X0:i:1 X1:i:0 BC:Z:GTNTGCCG XG:i:0 AM:i:37 SM:i:37 XM:i:0 XO:i:0 QT:Z:=?!4AD+2 XT:A:U MD:Z:100 NM:i:0 RG:Z:1 +HS18_09653:4:2111:5602:28724 99 CHROMOSOME_II 1416 60 100M = 1881 565 TCTTGTCTTTTTTCAAGATCTAATCAATTTTCTACATTAACGACGTTTTTGTCGTTCTGCTTCTTTTTTTCGTTCGTTTGTCTCGTCCATCAGCTGTCCA ECE>EGGGGFGGGGDGFEFGGGFHEGHGIIFGFEJGHHFGGGHFGEFHIHGFFGGECGFHHGGFGHIHHHGEGGHBGBGHHEHGEBGGFFGFFHHGCGFF X0:i:1 X1:i:0 BC:Z:GTNTGCCG XG:i:0 AM:i:37 SM:i:37 XM:i:0 XO:i:0 QT:Z:=@!4AD24 XT:A:U MD:Z:100 NM:i:0 RG:Z:1 +HS18_09653:4:2103:6720:15025 99 CHROMOSOME_II 1459 60 100M = 1617 258 CGTTTTTGTCGTTCTGCTTCTTTTTTTCGTTCGTTTGTCTCGTCCATCAGCTGTCCACTCATTTCTCTCCCACTCACTAGGCAGTGCTTTGTTTGGTTCC ECEFFGGGEHGEGGGGGGHFGGGHIGHIGGGG?HFGHGEGFBFGGGFGIHGDGGDEDFFFGGAEFGGGDDGEGGFGGEEEGEIFGFG@E>EFGDG?HCFCF>DGGHDFFCHF>=G;CFBEHG000000F 57093-57778 +CAGACAAACATACACCATCAGACAGCAGCACCATATTCTTTTTTTCTGCTAATTTGCTAA +TTACACAAACACAGACACTAAGAAATATTCTTCCCCTTTGGAAAATGTTTTCCACATTGA +ATCATGGTTGATTTCATTCATATTTTTTACGCTAATTAAAAAAATAAAGACTCTTTCATG +TGAAAACAGAGCTTCACAAAGTACTCTCATTCTCACCGCTAGCAGGCTGACAATCAGCAG +CAGACAGAGCATGATGTGTCTGATGGTCTGGTTGTCATCTGTCTGGGATTGAAGGCCAGG +TGGCGGCAGATCTAATGGCTGCTCAGGAGGCAGAAGAACTCTGTTGCTGCCCATCACACA +GCGCCCCACTGTGGTCTGAAGTAAAAAAGTTCATGTACTCGCATGAGAGTTATGTGTAAA +ACAGAGCTCAATTTTATTTTGTATTTATTGTGCAGTTAGAAAATAATATGATCAAATCTG +TTTGTTTACCTGTGTTGTTTGTGTGCTCCCTCTGCGTGCTGGCTATCATGTCAGGTACTG +GTCTGGACCGGAGAGAAACTGTGGATGTCTGTCAACACAACAGAAAAAGTGACTGATTAG +TTAGATGTAACTAATTTCAACTATCTAGTCTTCCTCTAAAGACCCCTACTTTAACACAAT +ACAAATTACATCTAGATATTTGTTAT diff --git a/src/htslib-1.21/test/realn01.fa.fai b/src/htslib-1.21/test/realn01.fa.fai new file mode 100644 index 0000000..d6bd5ca --- /dev/null +++ b/src/htslib-1.21/test/realn01.fa.fai @@ -0,0 +1 @@ +000000F 686 21 60 61 diff --git a/src/htslib-1.21/test/realn01.sam b/src/htslib-1.21/test/realn01.sam new file mode 100644 index 0000000..1f961d1 --- /dev/null +++ b/src/htslib-1.21/test/realn01.sam @@ -0,0 +1,8 @@ +@HD VN:1.4 SO:coordinate +@SQ SN:000000F LN:686 +@RG ID:rg SM:sample +@CO The alignment on the first read overhangs the reference by two bases +ST-E00128:308:HHVVLALXX:8:1217:16001:6565 147 000000F 532 60 6M1D117M5D28M = 195 -494 CAGGTATGGTCTGGACAGGAGAGAAACTGTGGATGTCTGTCAACACAACAGAAAAAGTGACTGATTAGTTAGATGTAACTAATTTCAACTATCTAGTCTTCCTCTAAAGACCCCTACTTTAACACAAATTACATCTAGATATTTGTTATTT 7AFAAF<)7<<7<7FA$!! MD:Z:6^C10C106^ACAAT28 PG:Z:MarkDuplicates NM:i:7 AS:i:129 XS:i:20 RG:Z:rg ZQ:Z:@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@QTieabT@@@@@@@@@@@@@@@@@FFHb`` +ST-E00128:308:HHVVLALXX:8:1217:16001:6566 147 000000F 532 60 6M1D117M5D26M2S = 195 -494 CAGGTATGGTCTGGACAGGAGAGAAACTGTGGATGTCTGTCAACACAACAGAAAAAGTGACTGATTAGTTAGATGTAACTAATTTCAACTATCTAGTCTTCCTCTAAAGACCCCTACTTTAACACAAATTACATCTAGATATTTGTTATTT 7AFAAF<)7<<7<7FA$AA PG:Z:MarkDuplicates RG:Z:rg ZQ:Z:@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@QTieabT@@@@@@@@@@@@@@@@@FFHb@@ +ST-E00128:308:HHVVLALXX:8:1217:16001:6567 147 000000F 532 60 151I = 195 -494 CAGGTATGGTCTGGACAGGAGAGAAACTGTGGATGTCTGTCAACACAACAGAAAAAGTGACTGATTAGTTAGATGTAACTAATTTCAACTATCTAGTCTTCCTCTAAAGACCCCTACTTTAACACAAATTACATCTAGATATTTGTTATTT 7AFAAF<)7<<7<7FA MD:Z:86 RG:Z:rg AM:i:29 NM:i:0 SM:i:29 MQ:i:29 XT:A:M BQ:Z:zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz +ERR156632.12704932 163 17 1 29 36S64M = 195 293 TGGAGAAGGGGACAAGAGGTCCCCAACTTCTTTGCAAAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGACAATTGCCTTGTCCCTGCTGAATGTG BFAFGFEIGFEFHHEIDKJGHHHJIIE=@KKGGKJGIBLLMFKMDIIHJKKHFELLLKFIHMHIHHIHLKJFCHFJIJAID=JHKFGHJIHKKCH:@HD? MD:Z:64 RG:Z:rg AM:i:29 NM:i:0 SM:i:29 MQ:i:29 XT:A:M BQ:Z:zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz +ERR156632.9601178 99 17 1 29 62S38M = 279 377 CTATGACAGGGAGGTCATGTGCAGGCTGGAGAAGGGGACAAGAGGTCCCCAACTTCTTTGCAAAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGA DEEEIIHHKIJILKHLHIKEKHHMKLKKJGKKKKLKLFIHEKIKL=KLJLKIILHKMH9LJJJJLHLHJJKJJKMLKJD>MJKLEHIGHIH=FFCHF>BE MD:Z:38 RG:Z:rg AM:i:29 NM:i:0 SM:i:29 MQ:i:29 XT:A:M BQ:Z:zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz +ERR162872.21706338 99 17 1 29 10S90M = 246 344 CTTCTTTGCAAAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGACAATTGCCTTGTCCCTGCTGAATGTGCTCTGGGGTCTCTGGGGTCTCACCCA BHBFHDBC:CE>A8C>C>7DBA=BEDDB4=9;:@=;@D@@=B@E.3?972<>6@8=>?1$0:95%5%*1=8;0%4<228% X0:i:1 X1:i:0 XC:i:88 MD:Z:88 RG:Z:rg AM:i:37 NM:i:0 SM:i:37 MQ:i:60 XT:A:U BQ:Z:zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz +ERR162875.26247502 83 17 2858 60 100M = 2581 -376 ACCCAGGGTGTCTGAAACAGATGTGGAGGTCTCGGGTGAGGCGTGGCTCAGATACAGGGAGTGGCCCACAGCTCGGCCTGTCTTTGAAAGGCCACGTGAC /;,FBHLI?CKH=BJ?FEE==DA MD:Z:4^C6C0A2T1T36G8G36 RG:Z:rg AM:i:29 NM:i:7 SM:i:29 MQ:i:29 XT:A:M BQ:Z:zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz +ERR243091.881299 83 17 3491 60 100M = 3174 -416 CTCAAAAAAAAAAAAATCACACCATTTTGGCTTCAGATTGCATATCCTCCTGCAAGGATATATACGCGTGAAATTCAAGTCAATGACAAATCAGAAAAAA C5;=CAAEFDFEEEEBCI;JFIKHGHGHIIKGEKGJIGGIJG=GDIKGFIHIJEIHJFFFFDDDAI>IDJDFGFEIFGHGIFGFJFIDFGEHGGDB@A@? X0:i:1 X1:i:0 MD:Z:96G3 RG:Z:rg AM:i:37 NM:i:1 SM:i:37 MQ:i:60 XT:A:U BQ:Z:zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz +ERR013140.23480670 133 17 3771 0 35M73S = 3771 0 TTCTCATCAATCCCTCATCTCTTATAACCATTTCGGTCCTTTCGGCCCTACAGCCACCTTGTTTATACTTGGTAAGACCCACACCACTCGCCAACTTACTCTACTCCC 8+7?5>09:),/%81,$,7<+?)+1+*+),3%5+)#%(4B%$&'%'/*@,)*%%&,%(/0%-&$$*$-,$3*.%/$:%$+.$*%&+.,.%%,%(%7(-.-',1*6%&$ XC:i:35 RG:Z:rg diff --git a/src/htslib-1.21/test/realn02.fa b/src/htslib-1.21/test/realn02.fa new file mode 100644 index 0000000..7c2ec2a --- /dev/null +++ b/src/htslib-1.21/test/realn02.fa @@ -0,0 +1,71 @@ +>17 17:1-4200 +AAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGACAATTGCCTTGTCCCTGCTGAA +TGTGCTCTGGGGTCTCTGGGGTCTCACCCACGACCAACTCCCTGGGCCTGGCACCAGGGA +GCTTAACAAACATCTGTCCAGCGAATACCTGCATCCCTAGAAGTGAAGCCACCGCCCAAA +GACACGCCCATGTCCAGCTTAACCTGCATCCCTAGAAGTGAAGGCACCGCCCAAAGACAC +GCCCATGTCCAGCTTATTCTGCCCAGTTCCTCTCCAGAAAGGCTGCATGGTTGACACACA +GTGCCTGCGACAAAGCTGAATGCTATCATTTAAAAACTCCTTGCTGGTTTGAGAGGCAGA +AAATGATATCTCATAGTTGCTTTACTTTGCATATTTTAAAATTGTGACTTTCATGGCATA +AATAATACTGGTTTATTACAGAAGCACTAGAAAATGCATGTGGACAAAAGTTGGGATTAG +GAGAGAGAAATGAAGACATATGTCCACACAAAAACCTGTTCATTGCAGCTTTCTACCATC +ACCAAAAATTGCAAACAACCACACGCCCTTCAACTGGGGAACTCATCAACAACAAACTTG +TGGTTTACCCACACAATGGAAGACCACTTAGCAACAAAAAGGACCAAACTCCTGGTACAT +GCAACTGACAGATGAATCTCAAACGCATTCCTCCGTGTGAAAGAAGCCGGACTCACAGGG +CAACACACTATCTGACTGTTTCATGGGAAAGTCTGGAAACGGCAACACCATTGAGACAGA +AAACAGGTGAGTGGTTGCCTGGGGCCAGGGAACTTTCTGGGGTCATATTCTCTGTGTTGA +TTCTGGTGGTGGAAACAAGACTGTCCCAGCCTGGGTGATACAGCGAGACCCCATCTCTAC +CAAAAAATTAAAAATTAGCTGGGCATGGTGGTGCATGCCTGTAGTCCCAGCTATTCACAG +TGCTGAGGTGGGAAGATGCTTGAGCCCAGGAGTTCAAGGCTGCAATGAGCTATGATTGCG +CCACTGCACTTTGGCCTGGACAACAGAGCAAAACCCTGTCTCTAAAAAAAGAAAAGAAAA +GAAAAACTCACTGGATATGAATGATACAGGTTGAGGATCCATTATCTGAAATGCTTGGAC +CAGATGTTTTGAATTTTGGATTTTTTCATATTTTGTAATCTTTGCAGTATATTTACCAGT +TCAGCATCCCTAACTCAAAAATTCAAAAATCTGAAATCCCAAACGCGCCAATAAGCATTC +CCTTTGAGCGTCATGTCGGTGCTTGGAATGTTTGGGGTTTTGGATTTACAGCTTTGGGAC +GCTCAACCTGTACCTCAATAAACCTGATTTTAAAAAAGTTTGGGGGGATTCCCCTAAGCC +CGCCACCCGGAGACAGCGGATTTCCTTAGTTACTTACTATGCTCCTTGGCCATTTCTCTA +GGTATTGGTATATTGTGTCTGCTGTGAACTGTCCTTGGCCTGTTTGGTGACGGGTGAGGA +GCAGGGACAGAAGGGTCCTGCGTGCCCTGCCTTCACAAGCCCCTGGAAGGAAAGTTGTTT +TGGGATCTCTGCACCCTCAGCCTGGACAACTTGTGCCCATCTGGTGACCCCTCACTCAGC +CACCAGACTTCCACGACAGGCTCCAGCCTCGGCACCTTCAGCCATGGACAGTTCCGCCAG +CGTTGCCCTCTGTTCTGCTGTTTTCTCTACCAGAAGTGCCCTTCCCTCCTCACCTGACCA +CTCTGGGGAAATCCCTCAGCACCCTCCCTGAGCATACCCTACTCTGGCACAAGCCCACCC +TGCAAAGCCCCTGAGGCCCGCCCTGTGGCGTCTCTCCCTCCCTTGCTGTCAGGACAGTGG +TCCTGGCCACCGGGGCTCACGGAGCCGCCCTGTGCCGTGTACCTCTGAGCCCTCTGCACA +GTGCCTTCTGCTTGCCTGTGGCTTTGAGAAGAAACCCCTTCTGGTTATACATAAGACAGC +CAGAGAAGGGAGTTGCCCAGGGTGGCACAGCACGTTGCTGCCAGTTACTGCCATTTTCAC +GGGCATGAAATGGAGATAACAACAGGAGCGACCGCACAGGCTGCTGAGCGCGTCACACGC +AGCCATCGCGCAGCTCAGGGATATTACGTGTAACTCGACATGTCAGCGATTGTCACAGGC +ACTGCTACTCCTGGGGTTTTCCATCAAACCCTCAAGAGCTGGGCCTGGGGTCAACTTCCG +GCCTGGGGAAACTGGGGCAAGTATCACCAGAGATGAGCTTTATAAAAATAATGGTGCTAG +CTGGGCATGGTGGCTTGCACCTGTAATCCCAGCACTTTGGGAGGCCGAGCTAGGAGGATC +GTTTGAGTCCAGCAGTTTGAGACCAGCCTGGCCAATACGGCAAAACCCAGTCTCTACAAA +AAATACAAAAAACAACTAGCCAGGCGTGGTGGTGCACACCTGTAGTCCCAGCTACTCAGG +AGGCTGAGGGGGAAGGACTGCTTGAGCCCAGGAGTTTGAGGCTGCTGTGAGCTGTGATCG +CATCACTGCATTCCAGCCCGGTGACAGAGTGAGTCACTGTCTCAAAAAAGAAAGGAAGAA +ATAAAGAAAACAAATAAAAATAATAGTGCAGACAAAAGGCCTTGACCCATCTAGCTTTGG +CCCTCAGCATCAACCGCTAGATACGTCCCTCCCTTTCTTCTGGGGCACAGGTCACACTCT +CTTCCAGGTCTAGGATGCAGCTGAGGGGTGCCCCTCTTACCATCTAATCTGTGCCCTTAT +TTCCTCTGCTTTAGTGAGGAAGAGGCCCCTGGTCCATGAAGGGGCCTTTCAGAGACGGGG +ACCCCTGAGGAGCCCCGAGCAGCAGCCGTCGTGTCTCACCCAGGGTGTCTGAAACAGATG +TGGAGGTCTCGGGTGAGGCGTGGCTCAGATACAGGGAGTGGCCCACAGCTCGGCCTGTCT +TTGAAAGGCCACGTGACCTGGCCCACGGCTGGCAGGTGGGACCCAGCTGCAGGGGTCCAG +CAGCACCCACAGCAGCCACCTGTGGCAGGGAGGAGCTTGTGGTACAGTGGACAGGCCCTG +CCCAGATGGCCCCCCGCCTGCCTGTGGAAGTTGACCAGACCATCTGTCACAGCAGGTAAG +ACTCTGCTTTCTGGGCAACCCAGCAGGTGACCCTGGAATTCCTGTCCATCTGGCAGGTGG +GCATTGAAACTGGTTTAAAAATGTCACACCATAGGCCGGGCACAGTGGCTCACGCCTGTA +ATCCCAGCCCTTTGGGAGGCCAGGGTGGGTGGATCACTTGAGGTCAGGAGTTCAAGACCA +GCCTGGCCAACATGGTGAAACCCCGTCTACTAAAAATACAAAAATTAGCCTGGCGTGGTG +GCGCATGCCTGTAATCCCAGCTACTTGGGAAGCTGAGGGATGAGAACTGCTTGAACCTGG +GAGGCAGACGTTGCAGTGAGCTGAGATCACGCCACTGCACTCCAGCCTGGGCAACAGAGT +AAGACTCTGTCTCAAAAAAAAAAAAATCACACCATTTTGGCTTCAGATTGCATATCCTCC +TGCAAGGATATATACGCGTGAAATTCAAGTCAATGACAAATCAGAAGAAAAAACATATAT +ATACGCAAACCAGTATCCTACTGTGTGTGTCGTTTGTTGTGTTTTCGACAGCTGTCCGTG +TTATAATAATTCCTCTAGTTCAAATTTATTCATTTTTAACTTCATAGTACCACATTCTAC +ACACTGCCCATGTCCCCTCAAGCTTCCCCTGGCTCCTGCAACCACAAATCTACTCTCTGC +CTCTGTGGGTTGACCTATTCTGGACACGTCATAGAAATAGAGTCCTGCAACACGTGGCCG +TCTGTGTCTGGCTTCTCTCGCTTAGCATCTTGTTTCCAAGGTCCTCCCACAGTGTAGCAT +GCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCACCTGCTACACTCCTTCTTAT +GGCTGATATTCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACACACCCGCT +ACACTCCTTCTTAGGGCTGATATTCCACGCACCCGCTACACTCCTTCTTAGGGCTGATAT +TCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCACCTGCTACACTCCTT +CTTAGGGCTGATATTCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCAC diff --git a/src/htslib-1.21/test/realn02.fa.fai b/src/htslib-1.21/test/realn02.fa.fai new file mode 100644 index 0000000..c211266 --- /dev/null +++ b/src/htslib-1.21/test/realn02.fa.fai @@ -0,0 +1 @@ +17 4200 14 60 61 diff --git a/src/htslib-1.21/test/realn02.sam b/src/htslib-1.21/test/realn02.sam new file mode 100644 index 0000000..42ef1f8 --- /dev/null +++ b/src/htslib-1.21/test/realn02.sam @@ -0,0 +1,12 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:17 LN:4200 SP:Human +@RG ID:rg SM:sample +ERR013140.3521432 99 17 1 29 22S86M = 226 313 AGAGGTCCCCAACTTCTTTGCAAAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGACAATTGCCTTGTCCCTGCTGAATGTGCTCTGGGGTCTCTGGGGTCTCA @AEDGBHIIIIIFJGIKHGHIJJJEJKHJKJKGKLLIFHKLLCJJIDEFFHKHEHHJIIIDJEEEJEIKGJIHCGKHFKFE9BBDIAJAHF4?DE@I:DD48(86D=> MD:Z:86 RG:Z:rg AM:i:29 NM:i:0 SM:i:29 MQ:i:29 XT:A:M +ERR156632.12704932 163 17 1 29 36S64M = 195 293 TGGAGAAGGGGACAAGAGGTCCCCAACTTCTTTGCAAAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGACAATTGCCTTGTCCCTGCTGAATGTG BFAFGFEIGFEFHHEIDKJGHHHJIIE=@KKGGKJGIBLLMFKMDIIHJKKHFELLLKFIHMHIHHIHLKJFCHFJIJAID=JHKFGHJIHKKCH:@HD? MD:Z:64 RG:Z:rg AM:i:29 NM:i:0 SM:i:29 MQ:i:29 XT:A:M +ERR156632.9601178 99 17 1 29 62S38M = 279 377 CTATGACAGGGAGGTCATGTGCAGGCTGGAGAAGGGGACAAGAGGTCCCCAACTTCTTTGCAAAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGA DEEEIIHHKIJILKHLHIKEKHHMKLKKJGKKKKLKLFIHEKIKL=KLJLKIILHKMH9LJJJJLHLHJJKJJKMLKJD>MJKLEHIGHIH=FFCHF>BE MD:Z:38 RG:Z:rg AM:i:29 NM:i:0 SM:i:29 MQ:i:29 XT:A:M +ERR162872.21706338 99 17 1 29 10S90M = 246 344 CTTCTTTGCAAAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGACAATTGCCTTGTCCCTGCTGAATGTGCTCTGGGGTCTCTGGGGTCTCACCCA BHBFHDBC:CE>A8C>C>7DBA=BEDDB4=9;:@=;@D@@=B@E.3?972<>6@8=>?1$0:95%5%*1=8;0%4<228% X0:i:1 X1:i:0 XC:i:88 MD:Z:88 RG:Z:rg AM:i:37 NM:i:0 SM:i:37 MQ:i:60 XT:A:U +ERR162875.26247502 83 17 2858 60 100M = 2581 -376 ACCCAGGGTGTCTGAAACAGATGTGGAGGTCTCGGGTGAGGCGTGGCTCAGATACAGGGAGTGGCCCACAGCTCGGCCTGTCTTTGAAAGGCCACGTGAC /;,FBHLI?CKH=BJ?FEE==DA MD:Z:4^C6C0A2T1T36G8G36 RG:Z:rg AM:i:29 NM:i:7 SM:i:29 MQ:i:29 XT:A:M +ERR243091.881299 83 17 3491 60 100M = 3174 -416 CTCAAAAAAAAAAAAATCACACCATTTTGGCTTCAGATTGCATATCCTCCTGCAAGGATATATACGCGTGAAATTCAAGTCAATGACAAATCAGAAAAAA C5;=CAAEFDFEEEEBCI;JFIKHGHGHIIKGEKGJIGGIJG=GDIKGFIHIJEIHJFFFFDDDAI>IDJDFGFEIFGHGIFGFJFIDFGEHGGDB@A@? X0:i:1 X1:i:0 MD:Z:96G3 RG:Z:rg AM:i:37 NM:i:1 SM:i:37 MQ:i:60 XT:A:U +ERR013140.23480670 133 17 3771 0 35M73S = 3771 0 TTCTCATCAATCCCTCATCTCTTATAACCATTTCGGTCCTTTCGGCCCTACAGCCACCTTGTTTATACTTGGTAAGACCCACACCACTCGCCAACTTACTCTACTCCC 8+7?5>09:),/%81,$,7<+?)+1+*+),3%5+)#%(4B%$&'%'/*@,)*%%&,%(/0%-&$$*$-,$3*.%/$:%$+.$*%&+.,.%%,%(%7(-.-',1*6%&$ XC:i:35 RG:Z:rg diff --git a/src/htslib-1.21/test/realn02_exp-a.sam b/src/htslib-1.21/test/realn02_exp-a.sam new file mode 100644 index 0000000..4cc897f --- /dev/null +++ b/src/htslib-1.21/test/realn02_exp-a.sam @@ -0,0 +1,12 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:17 LN:4200 SP:Human +@RG ID:rg SM:sample +ERR013140.3521432 99 17 1 29 22S86M = 226 313 AGAGGTCCCCAACTTCTTTGCAAAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGACAATTGCCTTGTCCCTGCTGAATGTGCTCTGGGGTCTCTGGGGTCTCA @AEDGBHIIIIIFJGIKHGHIJ!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! MD:Z:86 RG:Z:rg AM:i:29 NM:i:0 SM:i:29 MQ:i:29 XT:A:M ZQ:Z:@@@@@@@@@@@@@@@@@@@@@@iidijgijijfjkkhegjkkbiihcdeegjgdggihhhcidddidhjfihgbfjgejedXaach`i`geS^cd_hYccSWGWUc\] +ERR156632.12704932 163 17 1 29 36S64M = 195 293 TGGAGAAGGGGACAAGAGGTCCCCAACTTCTTTGCAAAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGACAATTGCCTTGTCCCTGCTGAATGTG BFAFGFEIGFEFHHEIDKJGHHHJIIE=@KKGGKJG!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! MD:Z:64 RG:Z:rg AM:i:29 NM:i:0 SM:i:29 MQ:i:29 XT:A:M ZQ:Z:@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@hakklejlchhgijjgedkkkjehglghgghgkjiebgeihi`hc\igjefgihgjjbgY_gc^ +ERR156632.9601178 99 17 1 29 62S38M = 279 377 CTATGACAGGGAGGTCATGTGCAGGCTGGAGAAGGGGACAAGAGGTCCCCAACTTCTTTGCAAAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGA DEEEIIHHKIJILKHLHIKEKHHMKLKKJGKKKKLKLFIHEKIKL=KLJLKIILHKMH9LJJ!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! MD:Z:38 RG:Z:rg AM:i:29 NM:i:0 SM:i:29 MQ:i:29 XT:A:M ZQ:Z:@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@iikgkgiijiijlkjic]lijkdghfghg\eebge]ad +ERR162872.21706338 99 17 1 29 10S90M = 246 344 CTTCTTTGCAAAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGACAATTGCCTTGTCCCTGCTGAATGTGCTCTGGGGTCTCTGGGGTCTCACCCA BHBFHBEEIHHHICFJIFKGHIKJHII>DBC:CE>A8C>C>7DBA=BEDDB4=9;:@=;@D@@=B@E.3?972<>6@8=>?1$0:95%5%*1=8;0%4<228% X0:i:1 X1:i:0 XC:i:88 MD:Z:88 RG:Z:rg AM:i:37 NM:i:0 SM:i:37 MQ:i:60 XT:A:U ZQ:Z:EHG@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +ERR162875.26247502 83 17 2858 60 100M = 2581 -376 ACCCAGGGTGTCTGAAACAGATGTGGAGGTCTCGGGTGAGGCGTGGCTCAGATACAGGGAGTGGCCCACAGCTCGGCCTGTCTTTGAAAGGCCACGTGAC X0:i:1 X1:i:0 MD:Z:100 RG:Z:rg AM:i:37 NM:i:0 SM:i:37 MQ:i:60 XT:A:U ZQ:Z:@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@C +ERR156632.1508798 83 17 3006 29 1S4M1D95M = 2587 -518 CCCCAAGCAGCAGCCGGGGGCAGGGAGGAGCTTGTGGTACAGTGGACAGGCCCTCCCCAGATGCCCCCCCGCCTGCCTGTGGAAGTTGACCAGACCATCT /;,FBHLI?CKH=BJ?FEE==DA MD:Z:4^C6C0A2T1T36G8G36 RG:Z:rg AM:i:29 NM:i:7 SM:i:29 MQ:i:29 XT:A:M ZQ:Z:@FEIG@@@@@@A@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +ERR243091.881299 83 17 3491 60 100M = 3174 -416 CTCAAAAAAAAAAAAATCACACCATTTTGGCTTCAGATTGCATATCCTCCTGCAAGGATATATACGCGTGAAATTCAAGTCAATGACAAATCAGAAAAAA C5;=CAAEFDFEEEEBCI;JFIKHGHGHIIKGEKGJIGGIJG=GDIKGFIHIJEIHJFFFFDDDAI>IDJDFGFEIFGHGIFGFJFIDFGEH742+!!!! X0:i:1 X1:i:0 MD:Z:96G3 RG:Z:rg AM:i:37 NM:i:1 SM:i:37 MQ:i:60 XT:A:U ZQ:Z:@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@PSRW_`_^ +ERR013140.23480670 133 17 3771 0 35M73S = 3771 0 TTCTCATCAATCCCTCATCTCTTATAACCATTTCGGTCCTTTCGGCCCTACAGCCACCTTGTTTATACTTGGTAAGACCCACACCACTCGCCAACTTACTCTACTCCC 8+7?5>09:),/%81,$,7<+?)+1+*+),3%5+)#%(4B%$&'%'/*@,)*%%&,%(/0%-&$$*$-,$3*.%/$:%$+.$*%&+.,.%%,%(%7(-.-',1*6%&$ XC:i:35 RG:Z:rg diff --git a/src/htslib-1.21/test/realn02_exp-e.sam b/src/htslib-1.21/test/realn02_exp-e.sam new file mode 100644 index 0000000..2425019 --- /dev/null +++ b/src/htslib-1.21/test/realn02_exp-e.sam @@ -0,0 +1,12 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:17 LN:4200 SP:Human +@RG ID:rg SM:sample +ERR013140.3521432 99 17 1 29 22S86M = 226 313 AGAGGTCCCCAACTTCTTTGCAAAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGACAATTGCCTTGTCCCTGCTGAATGTGCTCTGGGGTCTCTGGGGTCTCA @AEDGBHIIIIIFJGIKHGHIJJJEJKHJKJKGKLLIFHKLLCJJIDEFFHKHEHHJIIIDJEEEJEIKGJIHCGKHFKFE9BBDIAJAHF4?DE@I:DD48(86D=> MD:Z:86 RG:Z:rg AM:i:29 NM:i:0 SM:i:29 MQ:i:29 XT:A:M BQ:Z:@@@@@@@@@@@@@@@@@@@@@@iidijgijijfjkkhegjkkbiihcdeegjgdggihhhcidddidhjfihgbfjgejedXaach`i`geS^cd_hYccSWGWUc\] +ERR156632.12704932 163 17 1 29 36S64M = 195 293 TGGAGAAGGGGACAAGAGGTCCCCAACTTCTTTGCAAAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGACAATTGCCTTGTCCCTGCTGAATGTG BFAFGFEIGFEFHHEIDKJGHHHJIIE=@KKGGKJGIBLLMFKMDIIHJKKHFELLLKFIHMHIHHIHLKJFCHFJIJAID=JHKFGHJIHKKCH:@HD? MD:Z:64 RG:Z:rg AM:i:29 NM:i:0 SM:i:29 MQ:i:29 XT:A:M BQ:Z:@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@hakklejlchhgijjgedkkkjehglghgghgkjiebgeihi`hc\igjefgihgjjbgY_gc^ +ERR156632.9601178 99 17 1 29 62S38M = 279 377 CTATGACAGGGAGGTCATGTGCAGGCTGGAGAAGGGGACAAGAGGTCCCCAACTTCTTTGCAAAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGA DEEEIIHHKIJILKHLHIKEKHHMKLKKJGKKKKLKLFIHEKIKL=KLJLKIILHKMH9LJJJJLHLHJJKJJKMLKJD>MJKLEHIGHIH=FFCHF>BE MD:Z:38 RG:Z:rg AM:i:29 NM:i:0 SM:i:29 MQ:i:29 XT:A:M BQ:Z:@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@iikgkgiijiijlkjic]lijkdghfghg\eebge]ad +ERR162872.21706338 99 17 1 29 10S90M = 246 344 CTTCTTTGCAAAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGACAATTGCCTTGTCCCTGCTGAATGTGCTCTGGGGTCTCTGGGGTCTCACCCA BHBFHDBC:CE>A8C>C>7DBA=BEDDB4=9;:@=;@D@@=B@E.3?972<>6@8=>?1$0:95%5%*1=8;0%4<228% X0:i:1 X1:i:0 XC:i:88 MD:Z:88 RG:Z:rg AM:i:37 NM:i:0 SM:i:37 MQ:i:60 XT:A:U BQ:Z:EHG@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +ERR162875.26247502 83 17 2858 60 100M = 2581 -376 ACCCAGGGTGTCTGAAACAGATGTGGAGGTCTCGGGTGAGGCGTGGCTCAGATACAGGGAGTGGCCCACAGCTCGGCCTGTCTTTGAAAGGCCACGTGAC /;,FBHLI?CKH=BJ?FEE==DA MD:Z:4^C6C0A2T1T36G8G36 RG:Z:rg AM:i:29 NM:i:7 SM:i:29 MQ:i:29 XT:A:M BQ:Z:@FEIG@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +ERR243091.881299 83 17 3491 60 100M = 3174 -416 CTCAAAAAAAAAAAAATCACACCATTTTGGCTTCAGATTGCATATCCTCCTGCAAGGATATATACGCGTGAAATTCAAGTCAATGACAAATCAGAAAAAA C5;=CAAEFDFEEEEBCI;JFIKHGHGHIIKGEKGJIGGIJG=GDIKGFIHIJEIHJFFFFDDDAI>IDJDFGFEIFGHGIFGFJFIDFGEHGGDB@A@? X0:i:1 X1:i:0 MD:Z:96G3 RG:Z:rg AM:i:37 NM:i:1 SM:i:37 MQ:i:60 XT:A:U BQ:Z:@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@PSRW_`_^ +ERR013140.23480670 133 17 3771 0 35M73S = 3771 0 TTCTCATCAATCCCTCATCTCTTATAACCATTTCGGTCCTTTCGGCCCTACAGCCACCTTGTTTATACTTGGTAAGACCCACACCACTCGCCAACTTACTCTACTCCC 8+7?5>09:),/%81,$,7<+?)+1+*+),3%5+)#%(4B%$&'%'/*@,)*%%&,%(/0%-&$$*$-,$3*.%/$:%$+.$*%&+.,.%%,%(%7(-.-',1*6%&$ XC:i:35 RG:Z:rg diff --git a/src/htslib-1.21/test/realn02_exp.sam b/src/htslib-1.21/test/realn02_exp.sam new file mode 100644 index 0000000..7caca46 --- /dev/null +++ b/src/htslib-1.21/test/realn02_exp.sam @@ -0,0 +1,12 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:17 LN:4200 SP:Human +@RG ID:rg SM:sample +ERR013140.3521432 99 17 1 29 22S86M = 226 313 AGAGGTCCCCAACTTCTTTGCAAAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGACAATTGCCTTGTCCCTGCTGAATGTGCTCTGGGGTCTCTGGGGTCTCA @AEDGBHIIIIIFJGIKHGHIJJJEJKHJKJKGKLLIFHKLLCJJIDEFFHKHEHHJIIIDJEEEJEIKGJIHCGKHFKFE9BBDIAJAHF4?DE@I:DD48(86D=> MD:Z:86 RG:Z:rg AM:i:29 NM:i:0 SM:i:29 MQ:i:29 XT:A:M BQ:Z:@@@@@@@@@@@@@@@@@@@@@@iidijgijijfjkkhegjkkbiihcdeegjgdggihhhcidddidhjfihgbfjgejedXaach`i`geS^cd_hYccSWGWUc\] +ERR156632.12704932 163 17 1 29 36S64M = 195 293 TGGAGAAGGGGACAAGAGGTCCCCAACTTCTTTGCAAAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGACAATTGCCTTGTCCCTGCTGAATGTG BFAFGFEIGFEFHHEIDKJGHHHJIIE=@KKGGKJGIBLLMFKMDIIHJKKHFELLLKFIHMHIHHIHLKJFCHFJIJAID=JHKFGHJIHKKCH:@HD? MD:Z:64 RG:Z:rg AM:i:29 NM:i:0 SM:i:29 MQ:i:29 XT:A:M BQ:Z:@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@hakklejlchhgijjgedkkkjehglghgghgkjiebgeihi`hc\igjefgihgjjbgY_gc^ +ERR156632.9601178 99 17 1 29 62S38M = 279 377 CTATGACAGGGAGGTCATGTGCAGGCTGGAGAAGGGGACAAGAGGTCCCCAACTTCTTTGCAAAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGA DEEEIIHHKIJILKHLHIKEKHHMKLKKJGKKKKLKLFIHEKIKL=KLJLKIILHKMH9LJJJJLHLHJJKJJKMLKJD>MJKLEHIGHIH=FFCHF>BE MD:Z:38 RG:Z:rg AM:i:29 NM:i:0 SM:i:29 MQ:i:29 XT:A:M BQ:Z:@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@iikgkgiijiijlkjic]lijkdghfghg\eebge]ad +ERR162872.21706338 99 17 1 29 10S90M = 246 344 CTTCTTTGCAAAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGACAATTGCCTTGTCCCTGCTGAATGTGCTCTGGGGTCTCTGGGGTCTCACCCA BHBFHDBC:CE>A8C>C>7DBA=BEDDB4=9;:@=;@D@@=B@E.3?972<>6@8=>?1$0:95%5%*1=8;0%4<228% X0:i:1 X1:i:0 XC:i:88 MD:Z:88 RG:Z:rg AM:i:37 NM:i:0 SM:i:37 MQ:i:60 XT:A:U BQ:Z:EHG@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +ERR162875.26247502 83 17 2858 60 100M = 2581 -376 ACCCAGGGTGTCTGAAACAGATGTGGAGGTCTCGGGTGAGGCGTGGCTCAGATACAGGGAGTGGCCCACAGCTCGGCCTGTCTTTGAAAGGCCACGTGAC /;,FBHLI?CKH=BJ?FEE==DA MD:Z:4^C6C0A2T1T36G8G36 RG:Z:rg AM:i:29 NM:i:7 SM:i:29 MQ:i:29 XT:A:M BQ:Z:@FEIG@@@@@@A@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +ERR243091.881299 83 17 3491 60 100M = 3174 -416 CTCAAAAAAAAAAAAATCACACCATTTTGGCTTCAGATTGCATATCCTCCTGCAAGGATATATACGCGTGAAATTCAAGTCAATGACAAATCAGAAAAAA C5;=CAAEFDFEEEEBCI;JFIKHGHGHIIKGEKGJIGGIJG=GDIKGFIHIJEIHJFFFFDDDAI>IDJDFGFEIFGHGIFGFJFIDFGEHGGDB@A@? X0:i:1 X1:i:0 MD:Z:96G3 RG:Z:rg AM:i:37 NM:i:1 SM:i:37 MQ:i:60 XT:A:U BQ:Z:@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@PSRW_`_^ +ERR013140.23480670 133 17 3771 0 35M73S = 3771 0 TTCTCATCAATCCCTCATCTCTTATAACCATTTCGGTCCTTTCGGCCCTACAGCCACCTTGTTTATACTTGGTAAGACCCACACCACTCGCCAACTTACTCTACTCCC 8+7?5>09:),/%81,$,7<+?)+1+*+),3%5+)#%(4B%$&'%'/*@,)*%%&,%(/0%-&$$*$-,$3*.%/$:%$+.$*%&+.,.%%,%(%7(-.-',1*6%&$ XC:i:35 RG:Z:rg diff --git a/src/htslib-1.21/test/realn03.fa b/src/htslib-1.21/test/realn03.fa new file mode 100644 index 0000000..9ac86c1 --- /dev/null +++ b/src/htslib-1.21/test/realn03.fa @@ -0,0 +1,2 @@ +>MX +CGTCTACTACG diff --git a/src/htslib-1.21/test/realn03.fa.fai b/src/htslib-1.21/test/realn03.fa.fai new file mode 100644 index 0000000..dfaa593 --- /dev/null +++ b/src/htslib-1.21/test/realn03.fa.fai @@ -0,0 +1 @@ +MX 11 4 11 12 diff --git a/src/htslib-1.21/test/realn03.sam b/src/htslib-1.21/test/realn03.sam new file mode 100644 index 0000000..5026624 --- /dev/null +++ b/src/htslib-1.21/test/realn03.sam @@ -0,0 +1,4 @@ +@HD VN:1.6 SO:coordinate +@SQ SN:MX LN:11 +M 64 MX 1 60 11M * 0 0 CGTCTCCTACG IIIIIIIIIII +X 64 MX 1 60 5=1X5= * 0 0 CGTCTCCTACG IIIIIIIIIII diff --git a/src/htslib-1.21/test/realn03_exp.sam b/src/htslib-1.21/test/realn03_exp.sam new file mode 100644 index 0000000..3b608d0 --- /dev/null +++ b/src/htslib-1.21/test/realn03_exp.sam @@ -0,0 +1,4 @@ +@HD VN:1.6 SO:coordinate +@SQ SN:MX LN:11 +M 64 MX 1 60 11M * 0 0 CGTCTCCTACG IIIIIIIIIII BQ:Z:D@@@@@@@@@D +X 64 MX 1 60 5=1X5= * 0 0 CGTCTCCTACG IIIIIIIIIII BQ:Z:D@@@@@@@@@D diff --git a/src/htslib-1.21/test/sam.c b/src/htslib-1.21/test/sam.c new file mode 100644 index 0000000..74591fc --- /dev/null +++ b/src/htslib-1.21/test/sam.c @@ -0,0 +1,2370 @@ +/* test/sam.c -- SAM/BAM/CRAM API test cases. + + Copyright (C) 2014-2020, 2022-2024 Genome Research Ltd. + + Author: John Marshall + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Suppress message for faidx_fetch_nseq(), which we're intentionally testing +#include "../htslib/hts_defs.h" +#undef HTS_DEPRECATED +#define HTS_DEPRECATED(message) + +#include "../htslib/sam.h" +#include "../htslib/faidx.h" +#include "../htslib/khash.h" +#include "../htslib/hts_log.h" + +KHASH_SET_INIT_STR(keep) +typedef khash_t(keep) *keephash_t; + +#ifndef HTS_VERSION +#error HTS_VERSION not defined +#endif +#if HTS_VERSION < 100900 +#error HTS_VERSION comparison incorrect +#endif + +int status; + +static void HTS_FORMAT(HTS_PRINTF_FMT, 1, 2) fail(const char *fmt, ...) +{ + va_list args; + + fprintf(stderr, "Failed: "); + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + fprintf(stderr, "\n"); + + status = EXIT_FAILURE; +} + +#define VERIFY(test, message) if (!(test)) { \ + fail("%s: %s", __func__, (message)); \ + goto cleanup; \ +} + +uint8_t *check_bam_aux_get(const bam1_t *aln, const char *tag, char type) +{ + uint8_t *p = bam_aux_get(aln, tag); + if (p) { + if (*p == type) return p; + else fail("%s field of type '%c', expected '%c'", tag, *p, type); + } + else fail("can't find %s field", tag); + + return NULL; +} + +static void check_aux_count(const bam1_t *aln, int expected, const char *what) +{ + const uint8_t *itr; + int n = 0; + for (itr = bam_aux_first(aln); itr; itr = bam_aux_next(aln, itr)) n++; + if (n != expected) + fail("%s has %d aux fields, expected %d", what, n, expected); +} + +static void check_int_B_array(bam1_t *aln, char *tag, + uint32_t nvals, int64_t *vals) { + uint8_t *p; + if ((p = check_bam_aux_get(aln, tag, 'B')) != NULL) { + uint32_t i; + + if (bam_auxB_len(p) != nvals) + fail("Wrong length reported for %s field, got %u, expected %u", + tag, bam_auxB_len(p), nvals); + + for (i = 0; i < nvals; i++) { + if (bam_auxB2i(p, i) != vals[i]) { + fail("Wrong value from bam_auxB2i for %s field index %u, " + "got %"PRId64" expected %"PRId64, + tag, i, bam_auxB2i(p, i), vals[i]); + } + if (bam_auxB2f(p, i) != (double) vals[i]) { + fail("Wrong value from bam_auxB2f for %s field index %u, " + "got %f expected %f", + tag, i, bam_auxB2f(p, i), (double) vals[i]); + } + } + } +} + +#define PI 3.141592653589793 +#define E 2.718281828459045 +#define HELLO "Hello, world!" +#define NEW_HELLO "Yo, dude" +#define NEW_HELLO2 "Bonjour, tout le monde" +#define BEEF "DEADBEEF" + +#define str(x) #x +#define xstr(x) str(x) + +#define NELE(x) (sizeof(x)/sizeof(x[0])) + +static int test_update_int(bam1_t *aln, + const char target_id[2], int64_t target_val, + char expected_type, + const char next_id[2], int64_t next_val, + char next_type) { + uint8_t *p; + + // Try updating target + if (bam_aux_update_int(aln, target_id, target_val) < 0) { + fail("update %.2s tag", target_id); + return -1; + } + + // Check it's there and has the right type and value + p = bam_aux_get(aln, target_id); + if (!p) { + fail("find %.2s tag", target_id); + return -1; + } + if (*p != expected_type || bam_aux2i(p) != target_val) { + fail("%.2s field is %c:%"PRId64"; expected %c:%"PRId64, + target_id, *p, bam_aux2i(p), expected_type, target_val); + return -1; + } + + // If given, check that the next tag hasn't been clobbered by the + // update above. + if (!*next_id) return 0; + p = bam_aux_get(aln, next_id); + if (!p) { + fail("find %.2s tag after updating %.2s", next_id, target_id); + return -1; + } + if (*p != next_type || bam_aux2i(p) != next_val) { + fail("after updating %.2s to %"PRId64":" + " %.2s field is %c:%"PRId64"; expected %c:%"PRId64, + target_id, target_val, + next_id, *p, bam_aux2i(p), next_type, next_val); + return -1; + } + return 0; +} + +#define CHECK_ARRAY_VALS(T, GET_VAL, FMT1, FMT2) do { \ + T * vals = (T *) data; \ + uint32_t i; \ + for (i = 0; i < nitems; i++) { \ + if (GET_VAL(p, i) != vals[i]) { \ + fail("Wrong value from %s for %.2s field index %u, " \ + "got %" FMT1 " expected %" FMT2, \ + xstr(GET_VAL), target_id, i, GET_VAL(p, i), vals[i]); \ + return -1; \ + } \ + } \ +} while (0) + +static int test_update_array(bam1_t *aln, const char target_id[2], + uint8_t type, uint32_t nitems, void *data, + const char next_id[2], int64_t next_val, + char next_type) +{ + uint8_t *p; + + // Try updating target + if (bam_aux_update_array(aln, target_id, type, nitems, data) < 0) { + fail("update %2.s tag", target_id); + return -1; + } + + // Check values + p = bam_aux_get(aln, target_id); + if (!p) { + fail("find %.2s tag", target_id); + return -1; + } + switch (type) { + case 'c': + CHECK_ARRAY_VALS(int8_t, bam_auxB2i, PRId64, PRId8); break; + case 'C': + CHECK_ARRAY_VALS(uint8_t, bam_auxB2i, PRId64, PRIu8); break; + case 's': + CHECK_ARRAY_VALS(int16_t, bam_auxB2i, PRId64, PRId16); break; + case 'S': + CHECK_ARRAY_VALS(uint16_t, bam_auxB2i, PRId64, PRIu16); break; + case 'i': + CHECK_ARRAY_VALS(int32_t, bam_auxB2i, PRId64, PRId32); break; + case 'I': + CHECK_ARRAY_VALS(uint32_t, bam_auxB2i, PRId64, PRIu32); break; + case 'f': + CHECK_ARRAY_VALS(float, bam_auxB2f, "e", "e"); break; + } + + // If given, check that the next tag hasn't been clobbered by the + // update above. + if (!*next_id) return 0; + p = bam_aux_get(aln, next_id); + if (!p) { + fail("find %.2s tag after updating %.2s", next_id, target_id); + return -1; + } + if (*p != next_type || bam_aux2i(p) != next_val) { + fail("after updating %.2s:" + " %.2s field is %c:%"PRId64"; expected %c:%"PRId64, + target_id, next_id, *p, bam_aux2i(p), next_type, next_val); + return -1; + } + + return 0; +} + +// This function uses bam_hdr_t etc as a check ensuring the legacy typedef +// and functions continue to compile successfully. +static int aux_fields1(void) +{ + static const char sam[] = "data:," +"@SQ\tSN:one\tLN:1000\n" +"@SQ\tSN:two\tLN:500\n" +"r1\t0\tone\t500\t20\t8M\t*\t0\t0\tATGCATGC\tqqqqqqqq\tXA:A:k\tXi:i:37\tXf:f:" xstr(PI) "\tXd:d:" xstr(E) "\tXZ:Z:" HELLO "\tXH:H:" BEEF "\tXB:B:c,-2,0,+2\tB0:B:i,-2147483648,-1,0,1,2147483647\tB1:B:I,0,1,2147483648,4294967295\tB2:B:s,-32768,-1,0,1,32767\tB3:B:S,0,1,32768,65535\tB4:B:c,-128,-1,0,1,127\tB5:B:C,0,1,127,255\tBf:B:f,-3.14159,2.71828\tZZ:i:1000000\tF2:d:2.46801\tY1:i:-2147483648\tY2:i:-2147483647\tY3:i:-1\tY4:i:0\tY5:i:1\tY6:i:2147483647\tY7:i:2147483648\tY8:i:4294967295\n" +"r2\t0x8D\t*\t0\t0\t*\t*\t0\t0\tATGC\tqqqq\n" +; + + // Canonical form of the alignment records above, as output by sam_format1() + static const char r1[] = "r1\t0\tone\t500\t20\t8M\t*\t0\t0\tATGCATGC\tqqqqqqqq\tXi:i:37\tXf:f:3.14159\tXd:d:2.71828\tXZ:Z:" NEW_HELLO "\tXH:H:" BEEF "\tXB:B:c,-2,0,2\tB0:B:i,-2147483648,-1,0,1,2147483647\tB1:B:I,0,1,2147483648,4294967295\tB2:B:s,-32768,-1,0,1,32767\tB3:B:S,0,1,32768,65535\tB4:B:c,-128,-1,0,1,127\tB5:B:C,0,1,127,255\tBf:B:f,-3.14159,2.71828\tZZ:i:1000000\tF2:f:9.8765\tY1:i:-2147483648\tY2:i:-2147483647\tY3:i:-1\tY4:i:0\tY5:i:1\tY6:i:2147483647\tY7:i:2147483648\tY8:i:4294967295\tN0:i:-1234\tN1:i:1234\tN2:i:-2\tN3:i:3\tF1:f:4.5678\tN4:B:S,65535,32768,1,0\tN5:i:4242\tZa:Z:" HELLO "\tZb:Z:" NEW_HELLO2; + static const char r2[] = "r2\t141\t*\t0\t0\t*\t*\t0\t0\tATGC\tqqqq"; + + samFile *in = sam_open(sam, "r"); + bam_hdr_t *header = sam_hdr_read(in); + bam1_t *aln = bam_init1(); + uint8_t *p; + kstring_t ks = { 0, 0, NULL }; + int64_t b0vals[5] = { -2147483648LL,-1,0,1,2147483647LL }; // i + int64_t b1vals[4] = { 0,1,2147483648LL,4294967295LL }; // I + int64_t b2vals[5] = { -32768,-1,0,1,32767 }; // s + int64_t b3vals[4] = { 0,1,32768,65535 }; // S + int64_t b4vals[5] = { -128,-1,0,1,127 }; // c + int64_t b5vals[4] = { 0,1,127,255 }; // C + // NB: Floats not doubles below! + // See https://randomascii.wordpress.com/2012/06/26/doubles-are-not-floats-so-dont-compare-them/ + float bfvals[2] = { -3.14159f, 2.71828f }; + + int8_t n4v1[] = { -128, -64, -32, -16, -8, -4, -2, -1, + 0, 1, 2, 4, 8, 16, 32, 64, 127 }; + uint32_t n4v2[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1234, 5678, 1U << 31, 0 }; + int16_t n4v3[] = { -32768, -1, 0, 1, 32767 }; + float n4v4[] = { 0, 1, 2, 10, 20, 30, 1.5, -2.5 }; + uint8_t n4v5[] = { 0, 255 }; + int32_t n4v6[] = { -2147483647 - 1, 10, -1, 0, 1, 2147483647 }; + uint16_t n4v7[] = { 65535, 32768, 1, 0 }; + + int32_t ival = -1234; + uint32_t uval = 1234; + float f1 = 4.5678; + float f2 = 9.8765; + const char *hose = "OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO"; + + size_t nvals, i; + + if (sam_read1(in, header, aln) >= 0) { + if ((p = check_bam_aux_get(aln, "XA", 'A')) && bam_aux2A(p) != 'k') + fail("XA field is '%c', expected 'k'", bam_aux2A(p)); + + check_aux_count(aln, 24, "Original record"); + + bam_aux_del(aln,p); + if (bam_aux_get(aln,"XA")) + fail("XA field was not deleted"); + + check_aux_count(aln, 23, "Record post-XA-deletion"); + + p = bam_aux_get(aln, "Y2"); + if (p == NULL || strncmp(bam_aux_tag(p), "Y2", 2) != 0 || bam_aux_type(p) != 'i') + fail("bam_aux_get() missed Y2 field"); + + p = bam_aux_next(aln, p); + if (p == NULL || strncmp(bam_aux_tag(p), "Y3", 2) != 0 || bam_aux_type(p) != 'c') + fail("bam_aux_next() missed Y3 field"); + + p = bam_aux_get(aln, "Y8"); + if (p == NULL || strncmp(bam_aux_tag(p), "Y8", 2) != 0 || bam_aux_type(p) != 'I') + fail("bam_aux_get() missed Y8 field"); + + p = bam_aux_next(aln, p); + if (p != NULL || errno != ENOENT) + fail("bam_aux_next missed the end of fields"); + + if ((p = check_bam_aux_get(aln, "Xi", 'C')) && bam_aux2i(p) != 37) + fail("Xi field is %"PRId64", expected 37", bam_aux2i(p)); + + if ((p = check_bam_aux_get(aln, "Xf", 'f')) && fabs(bam_aux2f(p) - PI) > 1E-6) + fail("Xf field is %.12f, expected pi", bam_aux2f(p)); + + if ((p = check_bam_aux_get(aln, "Xd", 'd')) && fabs(bam_aux2f(p) - E) > 1E-6) + fail("Xf field is %.12f, expected e", bam_aux2f(p)); + + if ((p = check_bam_aux_get(aln, "XZ", 'Z')) && strcmp(bam_aux2Z(p), HELLO) != 0) + fail("XZ field is \"%s\", expected \"%s\"", bam_aux2Z(p), HELLO); + + bam_aux_update_str(aln,"XZ",strlen(NEW_HELLO)+1,NEW_HELLO); + if ((p = check_bam_aux_get(aln, "XZ", 'Z')) && strcmp(bam_aux2Z(p), NEW_HELLO) != 0) + fail("XZ field is \"%s\", expected \"%s\"", bam_aux2Z(p), NEW_HELLO); + if (!check_bam_aux_get(aln, "XH", 'H')) + fail("bam_aux_update_str(,,strlen(NEW_HELLO)+1,NEW_HELLO) corrupted XH tag"); + + bam_aux_update_str(aln,"XZ",strlen(NEW_HELLO2), NEW_HELLO2); + if ((p = check_bam_aux_get(aln, "XZ", 'Z')) && strcmp(bam_aux2Z(p), NEW_HELLO2) != 0) + fail("XZ field is \"%s\", expected \"%s\"", bam_aux2Z(p), NEW_HELLO2); + if (!check_bam_aux_get(aln, "XH", 'H')) + fail("bam_aux_update_str(,,strlen(NEW_HELLO2),NEW_HELLO2) corrupted XH tag"); + + bam_aux_update_str(aln,"XZ",-1,NEW_HELLO); + if ((p = check_bam_aux_get(aln, "XZ", 'Z')) && strcmp(bam_aux2Z(p), NEW_HELLO) != 0) + fail("XZ field is \"%s\", expected \"%s\"", bam_aux2Z(p), NEW_HELLO); + if (!check_bam_aux_get(aln, "XH", 'H')) + fail("bam_aux_update_str(,,-1,NEW_HELLO) corrupted XH tag"); + + if ((p = check_bam_aux_get(aln, "XH", 'H')) && strcmp(bam_aux2Z(p), BEEF) != 0) + fail("XH field is \"%s\", expected \"%s\"", bam_aux2Z(p), BEEF); + + if ((p = check_bam_aux_get(aln, "XB", 'B')) + && ! (memcmp(p, "Bc", 2) == 0 + && memcmp(p + 2, "\x03\x00\x00\x00\xfe\x00\x02", 7) == 0)) + fail("XB field is %c,..., expected c,-2,0,+2", p[1]); + + check_int_B_array(aln, "B0", NELE(b0vals), b0vals); + check_int_B_array(aln, "B1", NELE(b1vals), b1vals); + check_int_B_array(aln, "B2", NELE(b2vals), b2vals); + check_int_B_array(aln, "B3", NELE(b3vals), b3vals); + check_int_B_array(aln, "B4", NELE(b4vals), b4vals); + check_int_B_array(aln, "B5", NELE(b5vals), b5vals); + + nvals = NELE(bfvals); + if ((p = check_bam_aux_get(aln, "Bf", 'B')) != NULL) { + if (bam_auxB_len(p) != nvals) + fail("Wrong length reported for Bf field, got %d, expected %zd", + bam_auxB_len(p), nvals); + + for (i = 0; i < nvals; i++) { + if (bam_auxB2f(p, i) != bfvals[i]) { + fail("Wrong value from bam_auxB2f for Bf field index %zd, " + "got %f expected %f", + i, bam_auxB2f(p, i), bfvals[i]); + } + } + } + + if ((p = check_bam_aux_get(aln, "ZZ", 'I')) && bam_aux2i(p) != 1000000) + fail("ZZ field is %"PRId64", expected 1000000", bam_aux2i(p)); + + if ((p = bam_aux_get(aln, "Y1")) && bam_aux2i(p) != -2147483647-1) + fail("Y1 field is %"PRId64", expected -2^31", bam_aux2i(p)); + + if ((p = bam_aux_get(aln, "Y2")) && bam_aux2i(p) != -2147483647) + fail("Y2 field is %"PRId64", expected -2^31+1", bam_aux2i(p)); + + if ((p = bam_aux_get(aln, "Y3")) && bam_aux2i(p) != -1) + fail("Y3 field is %"PRId64", expected -1", bam_aux2i(p)); + + if ((p = bam_aux_get(aln, "Y4")) && bam_aux2i(p) != 0) + fail("Y4 field is %"PRId64", expected 0", bam_aux2i(p)); + + if ((p = bam_aux_get(aln, "Y5")) && bam_aux2i(p) != 1) + fail("Y5 field is %"PRId64", expected 1", bam_aux2i(p)); + + if ((p = bam_aux_get(aln, "Y6")) && bam_aux2i(p) != 2147483647) + fail("Y6 field is %"PRId64", expected 2^31-1", bam_aux2i(p)); + + if ((p = bam_aux_get(aln, "Y7")) && bam_aux2i(p) != 2147483648LL) + fail("Y7 field is %"PRId64", expected 2^31", bam_aux2i(p)); + + if ((p = bam_aux_get(aln, "Y8")) && bam_aux2i(p) != 4294967295LL) + fail("Y8 field is %"PRId64", expected 2^32-1", bam_aux2i(p)); + + // Try appending some new tags + if (bam_aux_append(aln, "N0", 'i', sizeof(ival), (uint8_t *) &ival) != 0) + fail("Failed to append N0:i tag"); + + if ((p = bam_aux_get(aln, "N0")) && bam_aux2i(p) != ival) + fail("N0 field is %"PRId64", expected %d", bam_aux2i(p), ival); + + if (bam_aux_append(aln, "N1", 'I', sizeof(uval), (uint8_t *) &uval) != 0) + fail("failed to append N1:I tag"); + + if ((p = bam_aux_get(aln, "N1")) && bam_aux2i(p) != uval) + fail("N1 field is %"PRId64", expected %u", bam_aux2i(p), uval); + + // Append tags with bam_aux_update_int() + if (bam_aux_update_int(aln, "N2", -2) < 0) + fail("failed to append N2:c tag"); + + if (bam_aux_update_int(aln, "N3", 3) < 0) + fail("failed to append N3:C tag"); + + p = bam_aux_get(aln, "N2"); + if (!p) + fail("failed to retrieve N2 tag"); + else if (*p != 'c' || bam_aux2i(p) != -2) + fail("N2 field is %c:%"PRId64", expected c:-2", *p, bam_aux2i(p)); + + p = bam_aux_get(aln, "N3"); + if (!p) + fail("failed to retrieve N3 tag"); + else if (*p != 'C' || bam_aux2i(p) != 3) + fail("N3 field is %c:%"PRId64", expected C:3", *p, bam_aux2i(p)); + + // Try changing values with bam_aux_update_int() + i = test_update_int(aln, "N2", 2, 'C', "N3", 3, 'C'); + if (i == 0) test_update_int(aln, "N2", 1234, 'S', "N3", 3, 'C'); + if (i == 0) test_update_int(aln, "N2", -1, 's', "N3", 3, 'C'); + if (i == 0) test_update_int(aln, "N2", 4294967295U, 'I', "N3", 3, 'C'); + if (i == 0) test_update_int(aln, "N2", -2, 'i', "N3", 3, 'C'); + + // Append a value with bam_aux_update_float() + if (bam_aux_update_float(aln, "F1", f1) < 0) + fail("append F1:f tag"); + + p = bam_aux_get(aln, "F1"); + if (!p) + fail("retrieve F1 tag"); + else if (*p != 'f' || bam_aux2f(p) != f1) + fail("F1 field is %c:%e, expected f:%e", *p, bam_aux2f(p), f1); + + // Change a double tag to a float + if (bam_aux_update_float(aln, "F2", f2) < 0) + fail("update F2 tag"); + + p = bam_aux_get(aln, "F2"); + if (!p) + fail("retrieve F2 tag"); + else if (*p != 'f' || bam_aux2f(p) != f2) + fail("F2 field is %c:%e, expected f:%e", *p, bam_aux2f(p), f2); + + // Check the next one is intact too + p = bam_aux_get(aln, "Y1"); + if (!p) + fail("retrieve Y1 tag"); + else if (*p != 'i' && bam_aux2i(p) != -2147483647-1) + fail("Y1 field is %"PRId64", expected -2^31", bam_aux2i(p)); + + // bam_aux_update_array tests + // append a new array + i = test_update_array(aln, "N4", 'c', NELE(n4v1), n4v1, "\0\0", 0, 0); + + // Add a sentinel to check resizes work + if (i == 0) i = test_update_int(aln, "N5", 4242, 'S', "\0\0", 0, 0); + + // alter the array tag a few times + if (i == 0) + i = test_update_array(aln, "N4", 'I', NELE(n4v2), n4v2, + "N5", 4242, 'S'); + if (i == 0) + i = test_update_array(aln, "N4", 's', NELE(n4v3), n4v3, + "N5", 4242, 'S'); + if (i == 0) + i = test_update_array(aln, "N4", 'f', NELE(n4v4), n4v4, + "N5", 4242, 'S'); + if (i == 0) + i = test_update_array(aln, "N4", 'c', NELE(n4v5), n4v5, + "N5", 4242, 'S'); + if (i == 0) + i = test_update_array(aln, "N4", 'i', NELE(n4v6), n4v6, + "N5", 4242, 'S'); + if (i == 0) + i = test_update_array(aln, "N4", 'S', NELE(n4v7), n4v7, + "N5", 4242, 'S'); + + // Append a couple of strings + // First add and remove some data so that failure to NUL-terminate will + // be spotted + bam_aux_update_str(aln,"oo",strlen(hose) + 1,hose); + if ((p = check_bam_aux_get(aln, "oo", 'Z')) && strcmp(bam_aux2Z(p), hose) != 0) + fail("oo field is \"%s\", expected \"%s\"", bam_aux2Z(p), hose); + if (p) bam_aux_del(aln, p); + if (bam_aux_get(aln, "oo")) + fail("oo field wasn't deleted correctly"); + + bam_aux_update_str(aln,"Za",strlen(HELLO),HELLO); + if ((p = check_bam_aux_get(aln, "Za", 'Z')) && strcmp(bam_aux2Z(p), HELLO) != 0) + fail("Za field is \"%s\", expected \"%s\"", bam_aux2Z(p), HELLO); + + bam_aux_update_str(aln,"Zb",strlen(NEW_HELLO2)+1,NEW_HELLO2); + if ((p = check_bam_aux_get(aln, "Zb", 'Z')) && strcmp(bam_aux2Z(p), NEW_HELLO2) != 0) + fail("Zb field is \"%s\", expected \"%s\"", bam_aux2Z(p), NEW_HELLO2); + + if (sam_format1(header, aln, &ks) < 0) + fail("can't format record"); + + if (strcmp(ks.s, r1) != 0) + fail("record formatted incorrectly: \"%s\"", ks.s); + + // Test field removal APIs -- after the strcmp(..., r1) check so that + // can also check the formatting of the to-be-removed fields. + + p = bam_aux_remove(aln, check_bam_aux_get(aln, "XH", 'H')); + if (bam_aux_get(aln, "XH")) + fail("XH field was not removed"); + check_aux_count(aln, 31, "Record post-XH-removal"); + if (strncmp(bam_aux_tag(p), "XB", 2) != 0 || bam_aux_type(p) != 'B') + fail("bam_aux_remove() missed XB field"); + } + else fail("can't read record"); + + if (sam_read1(in, header, aln) >= 0) { + if (sam_format1(header, aln, &ks) < 0) + fail("can't format record r2"); + + if (aln->core.flag != 0x8D) + fail("r2 flag value is 0x%X, expected 0x8D", aln->core.flag); + + if (strcmp(ks.s, r2) != 0) + fail("record r2 formatted incorrectly: \"%s\"", ks.s); + } + else fail("can't read record r2"); + + bam_destroy1(aln); + bam_hdr_destroy(header); + sam_close(in); + free(ks.s); + + return 1; +} + +static void set_qname(void) +{ + static const char sam[] = "data:," +"@SQ\tSN:one\tLN:1000\n" +"@SQ\tSN:two\tLN:500\n" +"r1\t0\tone\t500\t20\t8M\t*\t0\t0\tATGCATGC\tqqqqqqqq\tXA:A:k\tXi:i:37\tXf:f:" xstr(PI) "\tXd:d:" xstr(E) "\tXZ:Z:" HELLO "\tXH:H:" BEEF "\tXB:B:c,-2,0,+2\tB0:B:i,-2147483648,-1,0,1,2147483647\tB1:B:I,0,1,2147483648,4294967295\tB2:B:s,-32768,-1,0,1,32767\tB3:B:S,0,1,32768,65535\tB4:B:c,-128,-1,0,1,127\tB5:B:C,0,1,127,255\tBf:B:f,-3.14159,2.71828\tZZ:i:1000000\tF2:d:2.46801\tY1:i:-2147483648\tY2:i:-2147483647\tY3:i:-1\tY4:i:0\tY5:i:1\tY6:i:2147483647\tY7:i:2147483648\tY8:i:4294967295\n" +"r22\t0x8D\t*\t0\t0\t*\t*\t0\t0\tATGC\tqqqq\n" +"r12345678\t0x8D\t*\t0\t0\t*\t*\t0\t0\tATGC\tqqqq\n" +; + + // Canonical form of the alignment records above, as output by sam_format1() + static const char r1[] = "r1\t0\tone\t500\t20\t8M\t*\t0\t0\tATGCATGC\tqqqqqqqq\tXA:A:k\tXi:i:37\tXf:f:3.14159\tXd:d:2.71828\tXZ:Z:" HELLO "\tXH:H:" BEEF "\tXB:B:c,-2,0,2\tB0:B:i,-2147483648,-1,0,1,2147483647\tB1:B:I,0,1,2147483648,4294967295\tB2:B:s,-32768,-1,0,1,32767\tB3:B:S,0,1,32768,65535\tB4:B:c,-128,-1,0,1,127\tB5:B:C,0,1,127,255\tBf:B:f,-3.14159,2.71828\tZZ:i:1000000\tF2:d:2.46801\tY1:i:-2147483648\tY2:i:-2147483647\tY3:i:-1\tY4:i:0\tY5:i:1\tY6:i:2147483647\tY7:i:2147483648\tY8:i:4294967295"; + static const char r2[] = "r234\t141\t*\t0\t0\t*\t*\t0\t0\tATGC\tqqqq"; + static const char r3[] = "xyz\t141\t*\t0\t0\t*\t*\t0\t0\tATGC\tqqqq"; + + samFile *in = sam_open(sam, "r"); + bam_hdr_t *header = sam_hdr_read(in); + bam1_t *aln = bam_init1(); + kstring_t ks = { 0, 0, NULL }; + + if (sam_read1(in, header, aln) >= 0) { + bam_set_qname(aln, "r1"); + if (sam_format1(header, aln, &ks) < 0) fail("can't format record"); + if (strcmp(ks.s, r1) != 0) fail("record formatted incorrectly:\nGot: \"%s\"\nExp: \"%s\"\n", ks.s, r1); + } + else fail("can't read record"); + + if (sam_read1(in, header, aln) >= 0) { + bam_set_qname(aln, "r234"); + if (sam_format1(header, aln, &ks) < 0) fail("can't format record"); + if (strcmp(ks.s, r2) != 0) fail("record formatted incorrectly:\nGot: \"%s\"\nExp: \"%s\"\n", ks.s, r2); + } + else fail("can't read record"); + + if (sam_read1(in, header, aln) >= 0) { + bam_set_qname(aln, "xyz"); + if (sam_format1(header, aln, &ks) < 0) fail("can't format record"); + if (strcmp(ks.s, r3) != 0) fail("record formatted incorrectly:\nGot: \"%s\"\nExp: \"%s\"\n", ks.s, r3); + } + else fail("can't read record"); + + bam_destroy1(aln); + bam_hdr_destroy(header); + sam_close(in); + free(ks.s); +} + +static void iterators1(void) +{ + hts_itr_destroy(sam_itr_queryi(NULL, HTS_IDX_REST, 0, 0)); + hts_itr_destroy(sam_itr_queryi(NULL, HTS_IDX_NONE, 0, 0)); +} + +// This function uses bam_hdr_t etc as a check ensuring the legacy typedef +// and functions continue to compile successfully. +static void copy_check_alignment(const char *infname, const char *informat, + const char *outfname, const char *outmode, const char *outref) +{ + samFile *in = sam_open(infname, "r"); + samFile *out = sam_open(outfname, outmode); + bam1_t *aln = bam_init1(); + bam_hdr_t *header = NULL; + int res; + + if (!in) { + fail("couldn't open %s", infname); + goto err; + } + if (!out) { + fail("couldn't open %s with mode %s", outfname, outmode); + goto err; + } + if (!aln) { + fail("bam_init1() failed"); + goto err; + } + + if (outref) { + if (hts_set_opt(out, CRAM_OPT_REFERENCE, outref) < 0) { + fail("setting reference %s for %s", outref, outfname); + goto err; + } + } + + header = sam_hdr_read(in); + if (!header) { + fail("reading header from %s", infname); + goto err; + } + if (sam_hdr_write(out, header) < 0) fail("writing headers to %s", outfname); + + while ((res = sam_read1(in, header, aln)) >= 0) { + int mod4 = ((intptr_t) bam_get_cigar(aln)) % 4; + if (mod4 != 0) + fail("%s CIGAR not 4-byte aligned; offset is 4k+%d for \"%s\"", + informat, mod4, bam_get_qname(aln)); + + if (sam_write1(out, header, aln) < 0) fail("writing to %s", outfname); + } + if (res < -1) { + fail("failed to read alignment from %s", infname); + } + + err: + bam_destroy1(aln); + aln = NULL; + bam_hdr_destroy(header); + header = NULL; + if (in) sam_close(in); + if (out) sam_close(out); +} + +static int check_target_names(sam_hdr_t *header, int expected_n_targets, + const char **expected_targets, + const int *expected_lengths) { + int i; + + // Check consistency of target_names array + if (!header->target_name) { + fail("target_name is NULL"); + return -1; + } + if (!header->target_len) { + fail("target_len is NULL"); + return -1; + } + if (header->n_targets != expected_n_targets) { + fail("header->n_targets (%d) != expected_n_targets (%d)", + header->n_targets, expected_n_targets); + return -1; + } + for (i = 0; i < expected_n_targets; i++) { + if (!header->target_name[i] + || strcmp(header->target_name[i], expected_targets[i]) != 0) { + fail("header->target_name[%d] (%s) != \"%s\"", + i, header->target_name[i] ? header->target_name[i] : "NULL", + expected_targets[i]); + return -1; + } + if (header->target_len[i] != expected_lengths[i]) { + fail("header->target_len[%d] (%d) != %d", + i, header->target_len[i], expected_lengths[i]); + return -1; + } + } + return 0; +} + +static void use_header_api(void) { + static const char header_text[] = "data:," + "@HD\tVN:1.4\tGO:group\tSS:coordinate:queryname\n" + "@SQ\tSN:ref0\tLN:100\n" + "@CO\tThis line below will be updated\n" + "@SQ\tSN:ref1\tLN:5001\tM5:983dalu9ue2\n" + "@SQ\tSN:ref1.5\tLN:5001\n" + "@CO\tThis line is good\n" + "@SQ\tSN:ref2\tLN:5002\n"; + + static const char rg_line[] = + { '@', 'R', 'G', '\t', 'I', 'D', ':', 'r', 'u', 'n', '1' }; + + static const char expected[] = + "@HD\tVN:1.5\tSO:coordinate\n" + "@CO\tThis line below will be updated\n" + "@SQ\tSN:ref1\tLN:5001\tM5:kja8u34a2q3\n" + "@CO\tThis line is good\n" + "@SQ\tSN:ref2\tLN:5002\n" + "@SQ\tSN:ref3\tLN:5003\n" + "@PG\tID:samtools\tPN:samtools\tVN:1.9\n" + "@RG\tID:run1\n" + "@RG\tID:run4\n"; + + static const char *expected_targets[] = { "ref1", "ref2", "ref3" }; + static const int expected_lengths[] = { 5001, 5002, 5003 }; + const int expected_n_targets = sizeof(expected_targets) / sizeof(char *); + + const char outfname[] = "test/sam_header.tmp.sam_"; + const char outmode[] = "w"; + FILE *inf = NULL; + char buffer[sizeof(expected) + 1024]; + + samFile *in = sam_open(header_text, "r"); + samFile *out = sam_open(outfname, outmode); + sam_hdr_t *header = NULL; + kstring_t ks = { 0, 0, NULL }; + size_t bytes; + int r; + const char *name; + + if (!in) { + fail("couldn't open file"); + goto err; + } + if (!out) { + fail("couldn't open %s with mode %s", outfname, outmode); + goto err; + } + + header = sam_hdr_read(in); + if (!header) { + fail("reading header from file"); + goto err; + } + r = sam_hdr_remove_tag_id(header, "HD", NULL, NULL, "GO"); + if (r != 1) { fail("sam_hdr_remove_tag_id"); goto err; } + + r = sam_hdr_update_hd(header, "VN", "1.5"); + if (r != 0) { fail("sam_hdr_update_hd"); goto err; } + + r = sam_hdr_add_line(header, "SQ", "SN", "ref3", "LN", "5003", NULL); + if (r < 0) { fail("sam_hdr_add_line"); goto err; } + + r = sam_hdr_update_line(header, "SQ", "SN", "ref1", + "M5", "kja8u34a2q3", NULL); + if (r != 0) { fail("sam_hdr_update_line SQ"); goto err; } + + r = sam_hdr_add_pg(header, "samtools", "VN", "1.9", NULL); + if (r != 0) { fail("sam_hdr_add_pg"); goto err; } + + // Test addition with no newline or trailing NUL + r = sam_hdr_add_lines(header, rg_line, sizeof(rg_line)); + if (r != 0) { fail("sam_hdr_add_lines rg_line"); goto err; } + + // Test header line removal + r = sam_hdr_add_line(header, "RG", "ID", "run2", NULL); + if (r < 0) { fail("sam_hdr_add_line"); goto err; } + + r = sam_hdr_add_line(header, "RG", "ID", "run3", NULL); + if (r < 0) { fail("sam_hdr_add_line"); goto err; } + + r = sam_hdr_add_line(header, "RG", "ID", "run4", NULL); + if (r < 0) { fail("sam_hdr_add_line"); goto err; } + + r = sam_hdr_line_index(header, "RG", "run4"); + if (r != 3) { fail("sam_hdr_line_index - run4~3"); goto err; } + + r = sam_hdr_line_index(header, "RG", "run5"); + if (r != -1) { fail("sam_hdr_line_index - run5~-1"); goto err; } + + name = sam_hdr_line_name(header, "RG", 2); + if (!name || strcmp(name, "run3")) { fail("sam_hdr_line_name - 2~run3"); goto err; } + + name = sam_hdr_line_name(header, "RG", 10); + if (name) { fail("sam_hdr_line_name - 10~NULL"); goto err; } + + r = sam_hdr_remove_line_id(header, "RG", "ID", "run2"); + if (r < 0) { fail("sam_hdr_remove_line_id"); goto err; } + + r = sam_hdr_find_tag_id(header, "RG", "ID", "run3", "ID", &ks); + if (r < 0 || !ks.s || strcmp(ks.s, "run3") != 0) { + fail("sam_hdr_find_tag_id() expected \"run3\" got \"%s\"", + r == 0 && ks.s ? ks.s : "NULL"); + goto err; + } + + r = sam_hdr_remove_line_pos(header, "RG", 1); // Removes run3 + if (r < 0) { fail("sam_hdr_remove_line_pos"); goto err; } + + r = sam_hdr_remove_line_id(header, "SQ", "SN", "ref0"); + if (r < 0) { fail("sam_hdr_remove_line_id"); goto err; } + + r = sam_hdr_remove_line_pos(header, "SQ", 1); // Removes ref1.5 + if (r < 0) { fail("sam_hdr_remove_line_pos"); goto err; } + + r = sam_hdr_find_tag_id(header, "SQ", "SN", "ref1", "M5", &ks); + if (r < 0 || !ks.s || strcmp(ks.s, "kja8u34a2q3") != 0) { + fail("sam_hdr_find_tag_id() expected \"kja8u34a2q3\" got \"%s\"", + r == 0 && ks.s ? ks.s : "NULL"); + goto err; + } + + r = sam_hdr_line_index(header, "RG", "run4"); + if (r != 1) { fail("sam_hdr_line_index - run4~1"); goto err; } + + name = sam_hdr_line_name(header, "RG", 2); + if (name) { fail("sam_hdr_line_name - 2~NULL"); goto err; } + + r = sam_hdr_remove_tag_hd(header, "SS"); + if (r < 0) { + fail("sam_hdr_remove_tag_hd"); + } + + r = sam_hdr_find_hd(header, &ks); + if (r < 0 || !ks.s || strcmp(ks.s, "@HD\tVN:1.5") != 0) { + fail("sam_hdr_find_hd() expected \"@HD\tVN:1.5\" got \"%s\"", + r == 0 && ks.s ? ks.s : "NULL"); + } + + r = sam_hdr_find_tag_hd(header, "VN", &ks); + if (r < 0 || !ks.s || strcmp(ks.s, "1.5") != 0) { + fail("sam_hdr_find_tag_hd() expected \"1.5\" got \"%s\"", + r == 0 && ks.s ? ks.s : "NULL"); + } + + r = sam_hdr_update_hd(header, "SO", "coordinate"); + if (r < 0) { + fail("sam_hdr_update_hd"); + } + + if (check_target_names(header, expected_n_targets, expected_targets, + expected_lengths) < 0) { + goto err; + } + + if ((r = sam_hdr_count_lines(header, "HD")) != 1) { + fail("incorrect HD line count - expected 1, got %d", r); + goto err; + } + if ((r = sam_hdr_count_lines(header, "SQ")) != 3) { + fail("incorrect SQ line count - expected 3, got %d", r); + goto err; + } + if ((r = sam_hdr_count_lines(header, "PG")) != 1) { + fail("incorrect PG line count - expected 1, got %d", r); + goto err; + } + if ((r = sam_hdr_count_lines(header, "RG")) != 2) { + fail("incorrect RG line count - expected 2, got %d", r); + goto err; + } + if ((r = sam_hdr_count_lines(header, "CO")) != 2) { + fail("incorrect CO line count - expected 2, got %d", r); + goto err; + } + + if (sam_hdr_write(out, header) < 0) { + fail("writing headers to \"%s\"", outfname); + goto err; + } + r = sam_close(out); + out = NULL; + if (r < 0) { + fail("close \"%s\"", outfname); + goto err; + } + + inf = fopen(outfname, "r"); + if (!inf) { + fail("Opening written header \"%s\"", outfname); + goto err; + } + bytes = fread(buffer, 1, sizeof(buffer), inf); + if (bytes != sizeof(expected) - 1 || memcmp(buffer, expected, bytes) != 0) { + fail("edited header does not match expected version"); + fprintf(stderr, + "---------- Expected:\n%.*s\n" + "++++++++++ Got:\n%.*s\n" + "====================\n", + (int) sizeof(expected), expected, + (int) bytes, buffer); + goto err; + } + + free(ks_release(&ks)); + + err: + sam_hdr_destroy(header); + header = NULL; + if (in) sam_close(in); + if (out) sam_close(out); + if (inf) fclose(inf); + free(ks_release(&ks)); +} + +static void test_header_pg_lines(void) { + static const char header_text[] = "data:," + "@HD\tVN:1.5\n" + "@PG\tID:prog1\tPN:prog1\n" + "@PG\tID:prog2\tPN:prog2\tPP:prog1\n"; + + static const char expected[] = + "@HD\tVN:1.5\n" + "@PG\tID:prog1\tPN:prog1\n" + "@PG\tID:prog2\tPN:prog2\tPP:prog1\n" + "@PG\tID:prog3\tPN:prog3\tPP:prog2\n" + "@PG\tID:prog4\tPN:prog4\tPP:prog1\n" + "@PG\tID:prog5\tPN:prog5\tPP:prog2\n" + "@PG\tID:prog6\tPN:prog6\tPP:prog3\n" + "@PG\tID:prog6.1\tPN:prog6\tPP:prog4\n" + "@PG\tID:prog6.2\tPN:prog6\tPP:prog5\n" + "@PG\tPN:prog7\tID:my_id\tPP:prog6\n"; + + samFile *in = sam_open(header_text, "r"); + sam_hdr_t *header = NULL; + const char *text = NULL; + enum htsLogLevel old_log_level; + int r; + + if (!in) { + fail("couldn't open file"); + goto err; + } + + header = sam_hdr_read(in); + if (!header) { + fail("reading header from file"); + goto err; + } + + r = sam_hdr_add_pg(header, "prog3", NULL); + if (r != 0) { fail("sam_hdr_add_pg prog3"); goto err; } + + + r = sam_hdr_add_pg(header, "prog4", "PP", "prog1", NULL); + if (r != 0) { fail("sam_hdr_add_pg prog4"); goto err; } + + r = sam_hdr_add_line(header, "PG", "ID", + "prog5", "PN", "prog5", "PP", "prog2", NULL); + if (r != 0) { fail("sam_hdr_add_line @PG ID:prog5"); goto err; } + + r = sam_hdr_add_pg(header, "prog6", NULL); + if (r != 0) { fail("sam_hdr_add_pg prog6"); goto err; } + + r = sam_hdr_add_pg(header, "prog7", "ID", "my_id", "PP", "prog6", NULL); + if (r != 0) { fail("sam_hdr_add_pg prog7"); goto err; } + + text = sam_hdr_str(header); + if (!text) { fail("sam_hdr_str"); goto err; } + + // These should fail + old_log_level = hts_get_log_level(); + hts_set_log_level(HTS_LOG_OFF); + + r = sam_hdr_add_pg(header, "prog8", "ID", "my_id", NULL); + if (r == 0) { fail("sam_hdr_add_pg prog8 (unexpected success)"); goto err; } + + r = sam_hdr_add_pg(header, "prog9", "PP", "non-existent", NULL); + if (r == 0) { fail("sam_hdr_add_pg prog9 (unexpected success)"); goto err; } + + hts_set_log_level(old_log_level); + // End failing tests + + text = sam_hdr_str(header); + if (!text || strcmp(text, expected) != 0) { + fail("edited header does not match expected version"); + fprintf(stderr, + "---------- Expected:\n%s\n" + "++++++++++ Got:\n%s\n" + "====================\n", + expected, text); + goto err; + } + + err: + sam_hdr_destroy(header); + header = NULL; + if (in) sam_close(in); + return; +} + +// Test handling of @PG PP loops +static void test_header_pg_loops(void) { + static const char *header_texts[2] = { + // Loop to self + "data:," + "@HD\tVN:1.5\n" + "@PG\tID:loop1\tPN:prog1\tPP:loop1\n", + + // circuit + "data:," + "@HD\tVN:1.5\n" + "@PG\tID:loop1\tPN:prog1\tPP:loop2\n" + "@PG\tID:loop2\tPN:prog2\tPP:loop1\n" + }; + + static const char *expected[2] = { + "@HD\tVN:1.5\n" + "@PG\tID:loop1\tPN:prog1\tPP:loop1\n" + "@PG\tID:new_prog\tPN:new_prog\tPP:loop1\n", + + "@HD\tVN:1.5\n" + "@PG\tID:loop1\tPN:prog1\tPP:loop2\n" + "@PG\tID:loop2\tPN:prog2\tPP:loop1\n" + "@PG\tID:new_prog\tPN:new_prog\n" + }; + + int i, r; + samFile *in = NULL; + sam_hdr_t *header = NULL; + const char *text = NULL; + enum htsLogLevel old_log_level = hts_get_log_level(); + + // Silence header loop warning + hts_set_log_level(HTS_LOG_OFF); + + for (i = 0; i < 2; i++) { + in = sam_open(header_texts[i], "r"); + if (!in) { + fail("couldn't open file for PG loop test %d", i); + goto err; + } + + header = sam_hdr_read(in); + if (!header) { + fail("reading header for PG loop test %d", i); + goto err; + } + + r = sam_hdr_add_pg(header, "new_prog", NULL); + if (r != 0) { + fail("sam_hdr_add_pg new_prog for PG loop test %d", i); + goto err; + } + + text = sam_hdr_str(header); + if (!text || strcmp(text, expected[i]) != 0) { + fail("edited header does not match expected version for PG loop test %d", i); + fprintf(stderr, + "---------- Expected:\n%s\n" + "++++++++++ Got:\n%s\n" + "====================\n", + expected[i], text); + goto err; + } + sam_hdr_destroy(header); + header = NULL; + if (in) sam_close(in); + in = NULL; + } + hts_set_log_level(old_log_level); + return; + + err: + sam_hdr_destroy(header); + header = NULL; + if (in) sam_close(in); + hts_set_log_level(old_log_level); + return; +} + +static void test_header_updates(void) { + static const char header_text[] = + "@HD\tVN:1.4\n" + "@SQ\tSN:chr1\tLN:100\n" + "@SQ\tSN:chr2\tLN:200\n" + "@SQ\tSN:chr3\tLN:300\n" + "@RG\tID:run1\n" + "@RG\tID:run2\n" + "@RG\tID:run3\n" + "@PG\tID:prog1\tPN:prog1\n"; + + static const char expected[] = + "@HD\tVN:1.4\n" + "@SQ\tSN:1\tLN:100\n" + "@SQ\tSN:chr2\tLN:2000\n" + "@SQ\tSN:chr3\tLN:300\n" + "@RG\tID:run1\tDS:hello\n" + "@RG\tID:aliquot2\n" + "@RG\tID:run3\n" + "@PG\tID:prog1\tPN:prog1\n"; + + static const char *expected_targets[] = { "1", "chr2", "chr3" }; + static const int expected_lengths[] = { 100, 2000, 300 }; + const int expected_n_targets = sizeof(expected_targets) / sizeof(char *); + + sam_hdr_t *header = sam_hdr_parse(sizeof(header_text) - 1, header_text); + const char *hdr_str; + int r, i, old_log_level; + + if (!header) { + fail("creating sam header"); + goto err; + } + + if (sam_hdr_name2tid(header, "chr1") != 0) { // Should now be unknown + fail("sam_hdr_name2tid(\"chr1\") != 0"); + goto err; + } + + r = sam_hdr_update_line(header, "SQ", "SN", "chr2", "LN", "2000", NULL); + if (r != 0) { fail("sam_hdr_update_line SQ SN chr2 LN 2000"); goto err; } + r = sam_hdr_update_line(header, "SQ", "SN", "chr1", "SN", "1", NULL); + if (r != 0) { fail("sam_hdr_update_line SQ SN chr1 SN 1"); goto err; } + r = sam_hdr_update_line(header, "RG", "ID", "run1", "DS", "hello", NULL); + if (r != 0) { fail("sam_hdr_update_line RG ID run1 DS hello"); goto err; } + r = sam_hdr_update_line(header, "RG", "ID", "run2", "ID", "aliquot2", NULL); + if (r != 0) { fail("sam_hdr_update_line RG ID run2 ID aliquot2"); goto err; } + + // These should fail + old_log_level = hts_get_log_level(); + hts_set_log_level(HTS_LOG_OFF); + + r = sam_hdr_update_line(header, "PG", "ID", "prog1", "ID", "prog2", NULL); + if (r == 0) { fail("sam_hdr_update_line PG ID prog1 ID prog2"); goto err; } + + r = sam_hdr_update_line(header, "SQ", "SN", "chr3", "SN", "chr2", NULL); + if (r == 0) { fail("sam_hdr_update_line SQ SN chr3 SN chr2"); goto err; } + + r = sam_hdr_update_line(header, "RG", "ID", "run3", "ID", "run1", NULL); + if (r == 0) { fail("sam_hdr_update_line RG ID run3 ID run1"); goto err; } + + hts_set_log_level(old_log_level); + // End failing tests + + if (check_target_names(header, expected_n_targets, expected_targets, + expected_lengths) < 0) { + goto err; + } + + for (i = 0; i < expected_n_targets; i++) { + if (sam_hdr_name2tid(header, expected_targets[i]) != i) { + fail("sam_hdr_name2tid unexpected result"); + goto err; + } + } + if (sam_hdr_name2tid(header, "chr1") != -1) { // Should now be unknown + fail("sam_hdr_name2tid(\"chr1\") != -1"); + goto err; + } + + hdr_str = sam_hdr_str(header); + if (!hdr_str || strcmp(hdr_str, expected) != 0) { + fail("edited header does not match expected version"); + fprintf(stderr, + "---------- Expected:\n%s\n" + "++++++++++ Got:\n%s\n" + "====================\n", + expected, hdr_str ? hdr_str : ""); + goto err; + } + + err: + sam_hdr_destroy(header); +} + +static void test_header_remove_lines(void) { + static const char header_text[] = + "@HD\tVN:1.4\n" + "@SQ\tSN:chr1\tLN:100\n" + "@SQ\tSN:chr2\tLN:200\n" + "@SQ\tSN:chr3\tLN:300\n" + "@RG\tID:run1\n" + "@RG\tID:run2\n" + "@RG\tID:run3\n" + "@PG\tID:prog1\tPN:prog1\n"; + + static const char expected[] = + "@HD\tVN:1.4\n" + "@SQ\tSN:chr1\tLN:100\n" + "@SQ\tSN:chr3\tLN:300\n" + "@PG\tID:prog1\tPN:prog1\n"; + + sam_hdr_t *header = sam_hdr_parse(sizeof(header_text) - 1, header_text); + keephash_t rh = kh_init(keep); + khint_t k; + const char *hdr_str; + int r = 0; + + if (!header) { + fail("creating sam header"); + goto err; + } + if (!rh) { + fail("creating keep hash table"); + goto err; + } + + kh_put(keep, rh, strdup("chr3"), &r); + if (r < 0) { fail("adding chr3 to hash table"); goto err; } + kh_put(keep, rh, strdup("chr1"), &r); + if (r < 0) { fail("adding chr1 to hash table"); goto err; } + + r = sam_hdr_remove_lines(header, "SQ", "SN", rh); + if (r != 0) { fail("sam_hdr_remove_lines SQ SN rh"); goto err; } + + r = sam_hdr_remove_lines(header, "RG", "ID", NULL); + if (r != 0) { fail("sam_hdr_remove_lines RG ID NULL"); goto err; } + + hdr_str = sam_hdr_str(header); + if (!hdr_str || strcmp(hdr_str, expected) != 0) { + fail("edited header does not match expected version"); + fprintf(stderr, + "---------- Expected:\n%s\n" + "++++++++++ Got:\n%s\n" + "====================\n", + expected, hdr_str ? hdr_str : ""); + goto err; + } + + err: + if (rh) { + for (k = 0; k < kh_end(rh); ++k) + if (kh_exist(rh, k)) free((char*)kh_key(rh, k)); + kh_destroy(keep, rh); + } + if (header) sam_hdr_destroy(header); +} + +static void check_ref_lookup(sam_hdr_t *header, const char *msg, ...) { + const char *name; + va_list args; + va_start(args, msg); + while ((name = va_arg(args, const char *)) != NULL) { + int exp = va_arg(args, int); + int tid = sam_hdr_name2tid(header, name); + if (tid != exp) + fail("%s: altname \"%s\" => %d (expected %d)", msg, name, tid, exp); + } + va_end(args); +} + +static void test_header_ref_altnames(void) { + static const char initial_header[] = + "@SQ\tSN:1\tLN:100\tAN:chr1\n" + "@SQ\tSN:chr2\tAN:2\tLN:200\n" + "@SQ\tSN:3\tLN:300\n" + "@SQ\tSN:chrMT\tLN:16569\tAN:MT,chrM,M\n"; + + sam_hdr_t *header = sam_hdr_init(); + if (header == NULL) { fail("sam_hdr_init"); return; } + + if (sam_hdr_add_lines(header, initial_header, 0) < 0) + fail("sam_hdr_add_lines() for altnames"); + + check_ref_lookup(header, "initial", + "1", 0, "chr1", 0, "2", 1, "chr2", 1, "3", 2, + "chrMT", 3, "chrM", 3, "M", 3, "fred", -1, "barney", -1, + NULL); + + if (sam_hdr_add_line(header, "SQ", "AN", "fred", "LN", "500", "SN", "barney", NULL) < 0) + fail("sam_hdr_add_line() for altnames"); + + check_ref_lookup(header, "barney added", + "1", 0, "chr1", 0, "2", 1, "chr2", 1, "3", 2, + "chrMT", 3, "chrM", 3, "M", 3, "fred", 4, "barney", 4, + NULL); + + if (sam_hdr_remove_line_id(header, "SQ", "SN", "chr2") < 0) + fail("sam_hdr_remove_line_id() for altnames"); + + check_ref_lookup(header, "chr2 removed", + "1", 0, "chr1", 0, "2", -1, "chr2", -1, "3", 1, + "chrMT", 2, "chrM", 2, "M", 2, "fred", 3, "barney", 3, + NULL); + + if (sam_hdr_remove_tag_id(header, "SQ", "SN", "1", "AN") < 0) + fail("sam_hdr_remove_tag_id() for altnames"); + + check_ref_lookup(header, "1's AN removed", + "1", 0, "chr1", -1, "CM000663", -1, "2", -1, "chr2", -1, "3", 1, + "chrMT", 2, "chrM", 2, "M", 2, "fred", 3, "barney", 3, + NULL); + + sam_hdr_destroy(header); + + static const char initial_header_duplicates[] = + "@SQ\tSN:1\tLN:100\tAN:foo,2\n" + "@SQ\tSN:2\tLN:200\tAN:bar\n" + "@SQ\tSN:3\tLN:300\tAN:baz,3\n"; + + header = sam_hdr_init(); + if (header == NULL) { fail("sam_hdr_init"); return; } + + int old_log_level = hts_get_log_level(); + hts_set_log_level(HTS_LOG_ERROR); // Silence "Duplicate entry AN:2" warning + + if (sam_hdr_add_lines(header, initial_header_duplicates, 0) < 0) + fail("sam_hdr_add_lines() for altnames with duplicates"); + + hts_set_log_level(old_log_level); + + // Check "2" is SN:2 and not AN:2 + check_ref_lookup(header, "initial_header_duplicates", + "1", 0, "foo", 0, + "2", 1, "bar", 1, + "3", 2, "baz", 2, NULL); + + if (sam_hdr_remove_tag_id(header, "SQ", "SN", "1", "AN") < 0) + fail("sam_hdr_remove_tag_id() for duplicate altnames SN:1"); + + // Check "2" still works and "foo" does not + check_ref_lookup(header, "initial_header_duplicates", + "1", 0, "foo", -1, + "2", 1, "bar", 1, + "3", 2, "baz", 2, NULL); + + if (sam_hdr_remove_tag_id(header, "SQ", "SN", "3", "AN") < 0) + fail("sam_hdr_remove_tag_id() for duplicate altnames SN:3"); + + // Check "3" still works and "baz" does not + check_ref_lookup(header, "initial_header_duplicates", + "1", 0, "foo", -1, + "2", 1, "bar", 1, + "3", 2, "baz", -1, NULL); + + sam_hdr_destroy(header); +} + +#define ABC50 "abcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxy" +#define ABC250 ABC50 ABC50 ABC50 ABC50 ABC50 + +static void samrecord_layout(void) +{ + static const char qnames[] = "data:," +"@SQ\tSN:CHROMOSOME_II\tLN:5000\n" + "a\t0\tCHROMOSOME_II\t100\t10\t4M\t*\t0\t0\tATGC\tqqqq\n" + "bc\t0\tCHROMOSOME_II\t200\t10\t4M\t*\t0\t0\tATGC\tqqqq\n" + "def\t0\tCHROMOSOME_II\t300\t10\t4M\t*\t0\t0\tATGC\tqqqq\n" + "ghij\t0\tCHROMOSOME_II\t400\t10\t4M\t*\t0\t0\tATGC\tqqqq\n" +"klmno\t0\tCHROMOSOME_II\t500\t10\t4M\t*\t0\t0\tATGC\tqqqq\n" + ABC250 "\t0\tCHROMOSOME_II\t600\t10\t4M\t*\t0\t0\tATGC\tqqqq\n" + ABC250 "1\t0\tCHROMOSOME_II\t650\t10\t4M\t*\t0\t0\tATGC\tqqqq\n" + ABC250 "12\t0\tCHROMOSOME_II\t700\t10\t4M\t*\t0\t0\tATGC\tqqqq\n" + ABC250 "123\t0\tCHROMOSOME_II\t750\t10\t4M\t*\t0\t0\tATGC\tqqqq\n" + ABC250 "1234\t0\tCHROMOSOME_II\t800\t10\t4M\t*\t0\t0\tATGC\tqqqq\n" +; + + size_t bam1_t_size, bam1_t_size2; + + assert(sizeof(hts_pos_t) == 8 || sizeof(hts_pos_t) == 4); + int core_size = sizeof(hts_pos_t) == 8 ? 48 : 36; + bam1_t_size = (core_size + sizeof(int) + sizeof(char *) + sizeof(uint64_t) + + 2 * sizeof(uint32_t)); + bam1_t_size2 = bam1_t_size + 4; // Account for padding on some platforms + + if (sizeof (bam1_core_t) != core_size) + fail("sizeof bam1_core_t is %zu, expected %d", + sizeof (bam1_core_t), core_size); + + if (sizeof (bam1_t) != bam1_t_size && sizeof (bam1_t) != bam1_t_size2) + fail("sizeof bam1_t is %zu, expected either %zu or %zu", + sizeof(bam1_t), bam1_t_size, bam1_t_size2); + + copy_check_alignment(qnames, "SAM", + "test/sam_alignment.tmp.bam", "wb", NULL); + copy_check_alignment("test/sam_alignment.tmp.bam", "BAM", + "test/sam_alignment.tmp.cram", "wc", "test/ce.fa"); + copy_check_alignment("test/sam_alignment.tmp.cram", "CRAM", + "test/sam_alignment.tmp.sam_", "w", NULL); +} + +static int check_ref_lengths(const sam_hdr_t *header, + const hts_pos_t *expected_lengths, + int num_refs, const char *hdr_name) +{ + int i; + for (i = 0; i < num_refs; i++) { + hts_pos_t ln = sam_hdr_tid2len(header, i); + if (ln != expected_lengths[i]) { + fail("Wrong length for %s ref %d : " + "expected %"PRIhts_pos" got %"PRIhts_pos"\n", + hdr_name, i, expected_lengths[i], ln); + return -1; + } + } + return 0; +} + +static void check_big_ref(int parse_header) +{ + static const char sam_text[] = "data:," + "@HD\tVN:1.4\n" + "@SQ\tSN:large#1\tLN:5000000000\n" + "@SQ\tSN:small#1\tLN:100\n" + "@SQ\tSN:large#2\tLN:4611686018427387904\n" + "@SQ\tSN:small#2\tLN:1\n" + "r1\t0\tlarge#1\t4999999000\t50\t8M\t*\t0\t0\tACGTACGT\tabcdefgh\n" + "r2\t0\tsmall#1\t1\t50\t8M\t*\t0\t0\tACGTACGT\tabcdefgh\n" + "r3\t0\tlarge#2\t4611686018427387000\t50\t8M\t*\t0\t0\tACGTACGT\tabcdefgh\n" + "p1\t99\tlarge#2\t1\t50\t8M\t=\t4611686018427387895\t4611686018427387903\tACGTACGT\tabcdefgh\n" + "p1\t147\tlarge#2\t4611686018427387895\t50\t8M\t=\t1\t-4611686018427387903\tACGTACGT\tabcdefgh\n" + "r4\t0\tsmall#2\t2\t50\t8M\t*\t0\t0\tACGTACGT\tabcdefgh\n"; + const hts_pos_t expected_lengths[] = { + 5000000000LL, 100LL, 4611686018427387904LL, 1LL + }; + const int expected_tids[] = { + 0, 1, 2, 2, 2, 3 + }; + const int expected_mtid[] = { + -1, -1, -1, 2, 2, -1 + }; + const hts_pos_t expected_positions[] = { + 4999999000LL - 1, 1LL - 1, 4611686018427387000LL - 1, 1LL - 1, + 4611686018427387895LL - 1, 2LL - 1 + }; + const hts_pos_t expected_mpos[] = { + -1, -1, -1, 4611686018427387895LL - 1, 1LL - 1, -1 + }; + samFile *in = NULL, *out = NULL; + sam_hdr_t *header = NULL, *dup_header = NULL; + bam1_t *aln = bam_init1(); + const int num_refs = sizeof(expected_lengths) / sizeof(expected_lengths[0]); + const int num_align = sizeof(expected_tids) / sizeof(expected_tids[0]); + const char *outfname = "test/sam_big_ref.tmp.sam_"; + int i, r; + char buffer[sizeof(sam_text) + 1024]; + FILE *inf = NULL; + size_t bytes; + + if (!aln) { + fail("Out of memory"); + goto cleanup; + } + + in = sam_open(sam_text, "r"); + if (!in) { + fail("Opening SAM file"); + goto cleanup; + } + out = sam_open(outfname, "w"); + if (!out) { + fail("Opening output SAM file \"%s\"", outfname); + goto cleanup; + } + header = sam_hdr_read(in); + if (!header) { + fail("Reading SAM header"); + goto cleanup; + } + if (parse_header) { + // This will force the header to be parsed + if (sam_hdr_count_lines(header, "SQ") != num_refs) { + fail("Wrong number of SQ lines in header"); + goto cleanup; + } + } + if (check_ref_lengths(header, expected_lengths, num_refs, "header") < 0) + goto cleanup; + + dup_header = sam_hdr_dup(header); + if (!dup_header) { + fail("Failed to duplicate header"); + } + + if (check_ref_lengths(dup_header, expected_lengths, + num_refs, "duplicate header") < 0) + goto cleanup; + + if (sam_hdr_count_lines(dup_header, "SQ") != num_refs) { + fail("Wrong number of SQ lines in duplicate header"); + goto cleanup; + } + + if (check_ref_lengths(dup_header, expected_lengths, + num_refs, "parsed duplicate header") < 0) + goto cleanup; + + if (sam_hdr_write(out, header) < 0) { + fail("Failed to write SAM header"); + goto cleanup; + } + i = 0; + while ((r = sam_read1(in, header, aln)) >= 0) { + if (i >= num_align) { + fail("Too many alignment records.\n"); + goto cleanup; + } + if (aln->core.tid != expected_tids[i]) { + fail("Wrong tid for record %d : expected %d got %d\n", + i, expected_tids[i], aln->core.tid); + goto cleanup; + } + if (aln->core.mtid != expected_mtid[i]) { + fail("Wrong mate tid for record %d : expected %d got %d\n", + i, expected_mtid[i], aln->core.mtid); + goto cleanup; + } + if (aln->core.pos != expected_positions[i]) { + fail("Wrong position for record %d : " + "expected %"PRIhts_pos" got %"PRIhts_pos"\n", + i, expected_positions[i], aln->core.pos); + } + if (aln->core.mpos != expected_mpos[i]) { + fail("Wrong mate position for record %d : " + "expected %"PRIhts_pos" got %"PRIhts_pos"\n", + i, expected_mpos[i], aln->core.mpos); + } + if (sam_write1(out, header, aln) < 0) { + fail("Failed to write alignment record %d\n", i); + goto cleanup; + } + i++; + } + if (r < -1) { + fail("Error reading SAM alignment\n"); + goto cleanup; + } + if (i < num_align) { + fail("Not enough alignment records\n"); + goto cleanup; + } + r = sam_close(in); in = NULL; + if (r < 0) { + fail("sam_close(in)"); + goto cleanup; + } + r = sam_close(out); out = NULL; + if (r < 0) { + fail("sam_close(out)"); + goto cleanup; + } + + inf = fopen(outfname, "r"); + if (!inf) { + fail("Opening \"%s\"", outfname); + goto cleanup; + } + bytes = fread(buffer, 1, sizeof(buffer), inf); + if (bytes != sizeof(sam_text) - 7 + || memcmp(buffer, sam_text + 6, bytes - 7) != 0) { + fail("Output file does not match original version"); + fprintf(stderr, + "---------- Expected:\n%.*s\n" + "++++++++++ Got:\n%.*s\n" + "====================\n", + (int) sizeof(sam_text) - 7, sam_text + 6, + (int) bytes, buffer); + goto cleanup; + } + + cleanup: + bam_destroy1(aln); + sam_hdr_destroy(header); + sam_hdr_destroy(dup_header); + if (in) sam_close(in); + if (out) sam_close(out); + if (inf) fclose(inf); + unlink(outfname); + return; +} + +static void faidx1(const char *filename) +{ + int n, n_exp = 0, n_fq_exp = 0; + char tmpfilename[FILENAME_MAX], line[500]; + FILE *fin, *fout; + faidx_t *fai; + + fin = fopen(filename, "rb"); + if (fin == NULL) fail("can't open %s", filename); + snprintf(tmpfilename, sizeof(tmpfilename), "%s.tmp", filename); + fout = fopen(tmpfilename, "wb"); + if (fout == NULL) fail("can't create temporary %s", tmpfilename); + while (fgets(line, sizeof line, fin)) { + if (line[0] == '>') n_exp++; + if (line[0] == '+' && line[1] == '\n') n_fq_exp++; + fputs(line, fout); + } + fclose(fin); + fclose(fout); + + if (n_exp == 0 && n_fq_exp != 0) { + // probably a fastq file + n_exp = n_fq_exp; + } + + if (fai_build(tmpfilename) < 0) fail("can't index %s", tmpfilename); + fai = fai_load(tmpfilename); + if (fai == NULL) { fail("can't load faidx file %s", tmpfilename); return; } + + n = faidx_fetch_nseq(fai); + if (n != n_exp) + fail("%s: faidx_fetch_nseq returned %d, expected %d", filename, n, n_exp); + + n = faidx_nseq(fai); + if (n != n_exp) + fail("%s: faidx_nseq returned %d, expected %d", filename, n, n_exp); + + fai_destroy(fai); +} + +static void test_empty_sam_file(const char *filename) +{ + samFile *in = sam_open(filename, "r"); + if (in) { + enum htsExactFormat format = hts_get_format(in)->format; + bam1_t *aln = bam_init1(); + sam_hdr_t *header = sam_hdr_read(in); + int ret = sam_read1(in, header, aln); + + if (format != empty_format) + fail("detected %s as %d (expected empty_format)", filename, format); + if (header) + fail("sam_hdr_read() from %s should fail", filename); + if (ret >= -1) + fail("sam_read1() from %s returned %d but should fail", filename, ret); + + bam_destroy1(aln); + sam_hdr_destroy(header); + sam_close(in); + } + else fail("can't open %s to read as SAM", filename); +} + +static void test_text_file(const char *filename, int nexp) +{ + htsFile *in = hts_open(filename, "r"); + if (in) { + kstring_t str = KS_INITIALIZE; + int ret, n = 0; + while ((ret = hts_getline(in, '\n', &str)) >= 0) { + size_t len = strlen(str.s); + n++; + if (ret != len) fail("hts_getline read length %d (expected %zu)", ret, len); + } + if (ret != -1) fail("hts_getline got an error from %s", filename); + if (n != nexp) fail("hts_getline read %d lines from %s (expected %d)", n, filename, nexp); + + hts_close(in); + free(str.s); + } + else fail("can't open %s to read as text", filename); +} + +static void check_enum1(void) +{ + // bgzf_compression() returns int, but enjoys this correspondence + if (no_compression != 0) fail("no_compression is %d", no_compression); + if (gzip != 1) fail("gzip is %d", gzip); + if (bgzf != 2) fail("bgzf is %d", bgzf); +} + +static void check_cigar_tab(void) +{ + int i, n_neg = 0; + + for (i = 0; i < 256; ++i) + if (bam_cigar_table[i] < 0) n_neg++; + + if (n_neg + strlen(BAM_CIGAR_STR) != 256) + fail("bam_cigar_table has %d unset entries", n_neg); + + for (i = 0; BAM_CIGAR_STR[i]; ++i) + if (bam_cigar_table[(unsigned char) BAM_CIGAR_STR[i]] != i) + fail("bam_cigar_table['%c'] is not %d", BAM_CIGAR_STR[i], i); +} + +#define MAX_RECS 1000 +#define SEQ_LEN 100 +#define REC_LENGTH 150 // Undersized so some won't fit. + +static int generator(const char *name) +{ + FILE *f = fopen(name, "w"); + char *ref = NULL; + char qual[101]; + size_t i; + uint32_t lfsr = 0xbadcafe; + int res = -1; + + if (!f) { + fail("Couldn't open \"%s\"", name); + return -1; + } + + ref = malloc(MAX_RECS + SEQ_LEN + 1); + if (!ref) goto cleanup; + for (i = 0; i < MAX_RECS + SEQ_LEN; i++) { + // Linear-feedback shift register to make random reference + lfsr ^= lfsr << 13; + lfsr ^= lfsr >> 17; + lfsr ^= lfsr << 5; + ref[i] = "ACGT"[lfsr & 3]; + } + ref[MAX_RECS + SEQ_LEN] = '\0'; + for (i = 0; i < SEQ_LEN; i++) { + qual[i] = 'A' + (i & 0xf); + } + + if (fputs("@HD\tVN:1.4\n", f) < 0) goto cleanup; + if (fprintf(f, "@SQ\tSN:ref1\tLN:%d\n", MAX_RECS + SEQ_LEN) < 0) + goto cleanup; + for (i = 0; i < MAX_RECS; i++) { + if (fprintf(f, "read%zu\t0\tref1\t%zu\t64\t100M\t*\t0\t0\t%.*s\t%.*s\n", + i + 1, i + 1, SEQ_LEN, ref + i, SEQ_LEN, qual) < 0) + goto cleanup; + } + + if (fclose(f) == 0) + res = 0; + f = NULL; + + cleanup: + if (f) fclose(f); + free(ref); + return res; +} + +static int read_data_block(const char *in_name, samFile *fp_in, + const char *out_name, samFile *fp_out, + sam_hdr_t *header, bam1_t *recs, size_t max_recs, + uint8_t *buffer, size_t bufsz, size_t *nrecs_out) { + size_t buff_used = 0, nrecs; + uint32_t new_m_data; + int ret = -1, res = -1; + + for (nrecs = 0; nrecs < max_recs; nrecs++) { + bam_set_mempolicy(&recs[nrecs], + BAM_USER_OWNS_STRUCT|BAM_USER_OWNS_DATA); + + recs[nrecs].data = &buffer[buff_used]; + recs[nrecs].m_data = bufsz - buff_used; + + res = sam_read1(fp_in, header, &recs[nrecs]); + if (res < 0) break; // EOF or error + + if (fp_out) { + if (sam_write1(fp_out, header, &recs[nrecs]) < 0) { + nrecs++; // To return correct count + fail("sam_write1() to \"%s\"", out_name); + goto out; + } + } + + if ((bam_get_mempolicy(&recs[nrecs]) & BAM_USER_OWNS_DATA) == 0) { + continue; // Data not put in buffer + } + + new_m_data = ((uint32_t) recs[nrecs].l_data + 7) & (~7U); + if (new_m_data < recs[nrecs].m_data) recs[nrecs].m_data = new_m_data; + + buff_used += recs[nrecs].m_data; + } + if (res < -1) { + fail("sam_read1() from \"%s\" failed", in_name); + } else { + ret = 0; + } + + out: + *nrecs_out = nrecs; + return ret; +} + +static void test_parse_decimal1(long long exp, const char *str, size_t exp_consumed, int flags, const char *warning) +{ + if (warning) fprintf(stderr, "(Expect %s message for \"%s\")\n", warning, str); + + long long val = hts_parse_decimal(str, NULL, flags); + if (val != exp) fail("hts_parse_decimal(\"%s\", NULL, %d) returned %lld, expected %lld", str, flags, val, exp); + + char *end; + val = hts_parse_decimal(str, &end, flags); + if (val != exp) fail("hts_parse_decimal(\"%s\", ..., %d) returned %lld, expected %lld", str, flags, val, exp); + size_t consumed = end - str; + if (consumed != exp_consumed) fail("hts_parse_decimal(\"%s\", ..., %d) consumed %zu chars, expected %zu", str, flags, consumed, exp_consumed); +} + +static void test_parse_decimal(void) +{ + test_parse_decimal1(37, "+37", 3, 0, NULL); + test_parse_decimal1(-1001, " \t -1,001x", 9, HTS_PARSE_THOUSANDS_SEP, "trailing 'x'"); + test_parse_decimal1(LLONG_MAX, "+9223372036854775807", 20, 0, NULL); + test_parse_decimal1(LLONG_MIN, "-9,223,372,036,854,775,808", 26, HTS_PARSE_THOUSANDS_SEP, NULL); + test_parse_decimal1(1500, "1.5e3", 5, 0, NULL); + test_parse_decimal1(1500, "1.5e+3k", 6, 0, "trailing 'k'"); + test_parse_decimal1(1500000000, "1.5G", 4, 0, NULL); + test_parse_decimal1(12345, "12.345k", 7, 0, NULL); + test_parse_decimal1(12345, "12.3456k", 8, 0, "dropped fraction"); + test_parse_decimal1(0, "A", 0, 0, "invalid numeric"); + test_parse_decimal1(0, "G", 0, 0, "invalid numeric"); + test_parse_decimal1(0, " +/-", 0, 0, "invalid numeric"); + test_parse_decimal1(0, " \t -.e+9999", 0, 0, "invalid numeric"); +} + +static void test_mempolicy(void) +{ + size_t bufsz = MAX_RECS * REC_LENGTH, nrecs = 0, i; + bam1_t *recs = calloc(MAX_RECS, sizeof(bam1_t)); + uint8_t *buffer = malloc(bufsz); + const char *fname = "test/sam_alignment.tmp.sam"; + const char *bam_name = "test/sam_alignment.tmp.bam"; + const char *cram_name = "test/sam_alignment.tmp.cram"; + const char tag_text[] = + "lengthy text ... lengthy text ... lengthy text ... lengthy text ... " + "lengthy text ... lengthy text ... lengthy text ... lengthy text ... " + "lengthy text ... lengthy text ... lengthy text ... lengthy text ... " + "lengthy text ... lengthy text ... lengthy text ... lengthy text ... " + "lengthy text ... lengthy text ... lengthy text ... lengthy text ... "; + int res = 0; + samFile *fp = NULL, *bam_fp = NULL, *cram_fp = NULL; + htsFormat cram_fmt; + sam_hdr_t *header = NULL; + + if (!recs || !buffer) { + fail("Allocating buffer"); + goto cleanup; + } + + memset(&cram_fmt, 0, sizeof(cram_fmt)); + + // Make test file + if (generator(fname) < 0) + goto cleanup; + + // Open and read header + fp = sam_open(fname, "r"); + if (!fp) { + fail("sam_open(\"%s\")", fname); + goto cleanup; + } + + bam_fp = sam_open(bam_name, "wb"); + if (!fp) { + fail("sam_open(\"%s\")", bam_name); + goto cleanup; + } + + header = sam_hdr_read(fp); + if (!header) { + fail("read header from \"%s\"", fname); + goto cleanup; + } + + if (sam_hdr_write(bam_fp, header) < 0) { + fail("sam_hdr_write() to \"%s\"", bam_name); + goto cleanup; + } + + if (read_data_block(fname, fp, bam_name, bam_fp, header, recs, + MAX_RECS, buffer, bufsz, &nrecs) < 0) + goto cleanup; + + res = sam_close(bam_fp); + bam_fp = NULL; + if (res < 0) { + fail("sam_close(\"%s\")", bam_name); + goto cleanup; + } + + // Add a big tag to some records so they no longer fit in the allocated + // buffer space. + for (i = 0; i < MAX_RECS; i += 11) { + if (bam_aux_update_str(&recs[i], "ZZ", + sizeof(tag_text) - 1, tag_text) < 0) { + fail("bam_aux_update_str()"); + goto cleanup; + } + } + + // Delete all the records. bam_destroy1() should free the data + // for the ones that were expanded. + for (i = 0; i < nrecs; i++) { + bam_destroy1(&recs[i]); + } + + res = sam_close(fp); + fp = NULL; + if (res < 0) { + fail("sam_close(\"%s\")", fname); + goto cleanup; + } + + // Same test but reading BAM, writing CRAM + nrecs = 0; + sam_hdr_destroy(header); + header = NULL; + + bam_fp = sam_open(bam_name, "r"); + if (!bam_fp) { + fail("sam_open(\"%s\", \"r\")", bam_name); + goto cleanup; + } + + if (hts_parse_format(&cram_fmt, "cram,no_ref") < 0) { + fail("hts_parse_format"); + goto cleanup; + } + cram_fp = hts_open_format(cram_name, "wc", &cram_fmt); + if (!cram_fp) { + fail("hts_open_format(\"%s\", \"wc\")", cram_name); + goto cleanup; + } + + header = sam_hdr_read(bam_fp); + if (!header) { + fail("read header from \"%s\"", bam_name); + goto cleanup; + } + + if (sam_hdr_write(cram_fp, header) < 0) { + fail("sam_hdr_write() to \"%s\"", cram_name); + goto cleanup; + } + + if (read_data_block(bam_name, bam_fp, cram_name, cram_fp, header, recs, + MAX_RECS, buffer, bufsz, &nrecs) < 0) + goto cleanup; + + res = sam_close(cram_fp); + cram_fp = NULL; + if (res < 0) { + fail("sam_close(\"%s\")", cram_name); + goto cleanup; + } + + for (i = 0; i < MAX_RECS; i += 11) { + if (bam_aux_update_str(&recs[i], "ZZ", + sizeof(tag_text) - 1, tag_text) < 0) { + fail("bam_aux_update_str()"); + goto cleanup; + } + } + + for (i = 0; i < nrecs; i++) { + bam_destroy1(&recs[i]); + } + + // Now try reading the cram file + nrecs = 0; + sam_hdr_destroy(header); + header = NULL; + + cram_fp = sam_open(cram_name, "r"); + if (!cram_fp) { + fail("sam_open(\"%s\", \"r\")", cram_name); + goto cleanup; + } + + header = sam_hdr_read(cram_fp); + if (!header) { + fail("read header from \"%s\"", cram_name); + goto cleanup; + } + + if (read_data_block(cram_name, cram_fp, NULL, NULL, header, recs, + MAX_RECS, buffer, bufsz, &nrecs) < 0) + goto cleanup; + + for (i = 0; i < MAX_RECS; i += 11) { + if (bam_aux_update_str(&recs[i], "ZZ", + sizeof(tag_text) - 1, tag_text) < 0) { + fail("bam_aux_update_str()"); + goto cleanup; + } + } + + cleanup: + sam_hdr_destroy(header); + if (fp) sam_close(fp); + if (bam_fp) sam_close(bam_fp); + if (cram_fp) sam_close(cram_fp); + + for (i = 0; i < nrecs; i++) { + bam_destroy1(&recs[i]); + } + free(buffer); + free(recs); + if (cram_fmt.specific) { + hts_opt_free(cram_fmt.specific); + } +} + +static void test_bam_set1_minimal(void) +{ + int r; + bam1_t *bam = NULL; + bam = bam_init1(); + VERIFY(bam != NULL, "failed to initialize BAM struct."); + + r = bam_set1(bam, 0, NULL, BAM_FUNMAP, -1, 0, 0xff, 0, NULL, -1, 0, 0, 0, NULL, NULL, 0); + // expected number of bytes written is qname: 4, cigar: 0, sequence: 0, qual: 0, aux: 0. + VERIFY(r == 4, "call to bam_set1() failed or did not write the correct number of bytes."); + + VERIFY(bam->core.l_qname == 4, "l_qname should include terminating null and be padded to the nearest 32-bit boundary."); + VERIFY(bam->core.l_extranul == 2, "l_extranul not set correctly"); + VERIFY(strcmp(bam_get_qname(bam), "*") == 0, "qname not set correctly."); + + VERIFY(bam->core.pos == 0, "pos not set correctly."); + VERIFY(bam->core.tid == -1, "tid not set correctly."); + VERIFY(bam->core.bin == hts_reg2bin(0, 1, 14, 5), "bin not set correctly."); + VERIFY(bam->core.qual == 0xff, "mapq not set correctly."); + VERIFY(bam->core.flag == BAM_FUNMAP, "flag not set correctly."); + VERIFY(bam->core.n_cigar == 0, "n_cigar not set correctly."); + VERIFY(bam->core.mtid == -1, "mtid not set correctly."); + VERIFY(bam->core.mpos == 0, "mpos not set correctly."); + VERIFY(bam->core.isize == 0, "isize not set correctly."); + VERIFY(bam->core.l_qseq == 0, "l_seq not set correctly."); + VERIFY(bam_get_l_aux(bam) == 0, "l_aux not set correctly."); + +cleanup: + if (bam != NULL) bam_destroy1(bam); +} + +static void test_bam_set1_full(void) +{ + const char *qname = "!??AAA~~~~"; + const uint32_t cigar[] = { 6 << BAM_CIGAR_SHIFT | BAM_CMATCH, 2 << BAM_CIGAR_SHIFT | BAM_CINS, 2 << BAM_CIGAR_SHIFT | BAM_CMATCH }; + const char *seq = "TGGACTACGA"; + const char *qual = "DBBBB+=7=0"; + + int r, i; + bam1_t *bam = NULL; + bam = bam_init1(); + VERIFY(bam != NULL, "failed to initialize BAM struct."); + + r = bam_set1(bam, strlen(qname), qname, + BAM_FREVERSE, 1, 1000, 42, + sizeof(cigar) / 4, cigar, 2, 2000, 3000, + strlen(seq), seq, qual, 64); + // expected number of bytes written is qname: 12, cigar: 12, sequence: 5, qual: 10, aux: 0. + VERIFY(r == 39, "call to bam_set1() failed or did not write the correct number of bytes."); + + VERIFY(bam->core.l_qname == 12, "l_qname should include terminating null and be padded to the nearest 32-bit boundary."); + VERIFY(bam->core.l_extranul == 1, "l_extranul not set correctly"); + VERIFY(strcmp(bam_get_qname(bam), qname) == 0, "qname not set correctly."); + + VERIFY(bam->core.n_cigar == sizeof(cigar) / 4, "n_cigar not set correctly."); + VERIFY(memcmp(bam_get_cigar(bam), cigar, sizeof(cigar)) == 0, "cigar not set correctly."); + + VERIFY(bam->core.l_qseq == strlen(seq), "l_seq not set correctly."); + for (i = 0; i < strlen(seq); i++) { + VERIFY(bam_seqi(bam_get_seq(bam), i) == seq_nt16_table[(uint8_t)seq[i]], "seq not set correctly."); + } + VERIFY(memcmp(bam_get_qual(bam), qual, strlen(seq)) == 0, "qual not set correctly."); + + VERIFY(bam->core.pos == 1000, "pos not set correctly."); + VERIFY(bam->core.tid == 1, "tid not set correctly."); + VERIFY(bam->core.bin == hts_reg2bin(1000, 1010, 14, 5), "bin not set correctly."); + VERIFY(bam->core.qual == 42, "mapq not set correctly."); + VERIFY(bam->core.flag == BAM_FREVERSE, "flag not set correctly."); + VERIFY(bam->core.mtid == 2, "mtid not set correctly."); + VERIFY(bam->core.mpos == 2000, "mpos not set correctly."); + VERIFY(bam->core.isize == 3000, "isize not set correctly."); + VERIFY(bam_get_l_aux(bam) == 0, "l_aux not set correctly."); + VERIFY(bam->m_data - bam->l_data >= 64, "not enough memory allocated for aux data."); + +cleanup: + if (bam != NULL) bam_destroy1(bam); +} + +static void test_bam_set1_even_and_odd_seq_len(void) +{ + const char *seq_even = "TGGACTACGA"; + const char *seq_odd = "TGGACTACGAC"; + + int r, i; + bam1_t *bam = NULL; + bam = bam_init1(); + VERIFY(bam != NULL, "failed to initialize BAM struct."); + + r = bam_set1(bam, 0, NULL, BAM_FUNMAP, 0, 0, 0, 0, NULL, 0, 0, 0, + strlen(seq_even), seq_even, NULL, 0); + VERIFY(r >= 0, "call to bam_set1() failed."); + VERIFY(bam->core.l_qseq == strlen(seq_even), "l_seq not set correctly."); + for (i = 0; i < strlen(seq_even); i++) { + VERIFY(bam_seqi(bam_get_seq(bam), i) == seq_nt16_table[(uint8_t)seq_even[i]], "seq not set correctly."); + } + + r = bam_set1(bam, 0, NULL, BAM_FUNMAP, 0, 0, 0, 0, NULL, 0, 0, 0, + strlen(seq_odd), seq_odd, NULL, 0); + VERIFY(r >= 0, "call to bam_set1() failed."); + VERIFY(bam->core.l_qseq == strlen(seq_odd), "l_seq not set correctly."); + for (i = 0; i < strlen(seq_odd); i++) { + VERIFY(bam_seqi(bam_get_seq(bam), i) == seq_nt16_table[(uint8_t)seq_odd[i]], "seq not set correctly."); + } + +cleanup: + if (bam != NULL) bam_destroy1(bam); +} + +static void test_bam_set1_with_seq_but_no_qual(void) +{ + const char *seq = "TGGACTACGA"; + + int r, i; + bam1_t *bam = NULL; + bam = bam_init1(); + VERIFY(bam != NULL, "failed to initialize BAM struct."); + + r = bam_set1(bam, 0, NULL, + BAM_FUNMAP, 0, 0, 0, + 0, NULL, 0, 0, 0, + strlen(seq), seq, NULL, 0); + VERIFY(r >= 0, "call to bam_set1() failed."); + VERIFY(bam->core.l_qseq == strlen(seq), "l_seq not set correctly."); + for (i = 0; i < strlen(seq); i++) { + VERIFY(bam_seqi(bam_get_seq(bam), i) == seq_nt16_table[(uint8_t)seq[i]], "seq not set correctly."); + VERIFY(bam_get_qual(bam)[i] == 0xff, "qual not set correctly"); + } + +cleanup: + if (bam != NULL) bam_destroy1(bam); +} + +static void test_bam_set1_validate_qname(void) +{ + int r; + bam1_t *bam = NULL; + bam = bam_init1(); + VERIFY(bam != NULL, "failed to initialize BAM struct."); + + // qname too long + const char too_long[255] = { 'A' }; + r = bam_set1(bam, sizeof(too_long), too_long, BAM_FUNMAP, -1, 0, 0xff, 0, NULL, -1, 0, 0, 0, NULL, NULL, 0); + VERIFY(r < 0, "call to bam_set1() should have failed."); + VERIFY(errno == EINVAL, "errno should be set."); + +cleanup: + if (bam != NULL) bam_destroy1(bam); +} + +static void test_bam_set1_validate_seq(void) +{ + int r; + bam1_t *bam = NULL; + bam = bam_init1(); + VERIFY(bam != NULL, "failed to initialize BAM struct."); + + // seq too long + const char *sequence = "C"; + r = bam_set1(bam, 0, NULL, BAM_FUNMAP, -1, 0, 0xff, 0, NULL, -1, 0, 0, (size_t)INT32_MAX + 1, sequence, NULL, 0); + VERIFY(r < 0, "call to bam_set1() should have failed."); + VERIFY(errno == EINVAL, "errno should be set."); + +cleanup: + if (bam != NULL) bam_destroy1(bam); +} + +static void test_bam_set1_validate_cigar(void) +{ + const uint32_t cigar[] = { 20 << BAM_CIGAR_SHIFT | BAM_CMATCH }; + const char *seq = "TGGACTACGA"; + + int r; + bam1_t *bam = NULL; + bam = bam_init1(); + VERIFY(bam != NULL, "failed to initialize BAM struct."); + + // mapped query must have a CIGAR + r = bam_set1(bam, 0, NULL, 0, -1, 0, 0xff, 0, NULL, -1, 0, 0, strlen(seq), seq, NULL, 0); + VERIFY(r < 0, "call to bam_set1() should have failed."); + VERIFY(errno == EINVAL, "errno should be set."); + + // pos + ref len from CIGAR should be <= HTS_POS_MAX + r = bam_set1(bam, 0, NULL, 0, -1, HTS_POS_MAX - 10, 0xff, sizeof(cigar) / 4, cigar, -1, 0, 0, 0, NULL, NULL, 0); + VERIFY(r < 0, "call to bam_set1() should have failed."); + VERIFY(errno == EINVAL, "errno should be set."); + + // query len from CIGAR should match the sequence length + r = bam_set1(bam, 0, NULL, 0, -1, 0, 0xff, sizeof(cigar) / 4, cigar, -1, 0, 0, strlen(seq), seq, NULL, 0); + VERIFY(r < 0, "call to bam_set1() should have failed."); + VERIFY(errno == EINVAL, "errno should be set."); + +cleanup: + if (bam != NULL) bam_destroy1(bam); +} + +static void test_bam_set1_validate_size_limits(void) +{ + const uint32_t cigar[] = { 20 << BAM_CIGAR_SHIFT | BAM_CMATCH }; + const char *seq = "TGGACTACGA"; + + int r; + bam1_t *bam = NULL; + bam = bam_init1(); + VERIFY(bam != NULL, "failed to initialize BAM struct."); + + // very long sequence. each base counts for 1/2 byte of sequence data and + // 1 byte of sequence quality data. the sum of all components may not exceed + // INT32_MAX, which is the maximum possible value that can be stored in l_data. + // In this case the 4 bytes of qname will cause it to overflow. + r = bam_set1(bam, 0, NULL, BAM_FUNMAP, -1, 0, 0xff, 0, NULL, -1, 0, 0, 2 * (size_t)INT32_MAX / 3, seq, NULL, 0); + VERIFY(r < 0, "call to bam_set1() should have failed."); + VERIFY(errno == EINVAL, "errno should be set."); + + // very long CIGAR + r = bam_set1(bam, 0, NULL, BAM_FUNMAP, -1, 0, 0xff, (size_t)INT32_MAX / 4, cigar, -1, 0, 0, 0, NULL, NULL, 0); + VERIFY(r < 0, "call to bam_set1() should have failed."); + VERIFY(errno == EINVAL, "errno should be set."); + + // very long aux + r = bam_set1(bam, 0, NULL, BAM_FUNMAP, -1, 0, 0xff, 0, NULL, -1, 0, 0, 0, NULL, NULL, INT32_MAX); + VERIFY(r < 0, "call to bam_set1() should have failed."); + VERIFY(errno == EINVAL, "errno should be set."); + +cleanup: + if (bam != NULL) bam_destroy1(bam); +} + +static void test_bam_set1_write_and_read_back(void) +{ + const char *qname = "q1"; + const uint32_t cigar[] = { 6 << BAM_CIGAR_SHIFT | BAM_CMATCH, 2 << BAM_CIGAR_SHIFT | BAM_CINS, 2 << BAM_CIGAR_SHIFT | BAM_CMATCH }; + const char *seq = "TGGACTACGA"; + const char *qual = "DBBBB+=7=0"; + const char *temp_fname = "test/test_bam_set1_write_and_read_back.tmp.bam"; + + int r; + htsFile *writer = NULL, *reader = NULL; + sam_hdr_t *w_header = NULL, *r_header = NULL; + bam1_t *w_bam = NULL, *r_bam = NULL; + kstring_t ks = KS_INITIALIZE; + + // open file for writing + writer = hts_open(temp_fname, "wb"); + VERIFY(writer != NULL, "failed to open bam file for writing."); + + // write header + w_header = bam_hdr_init(); + VERIFY(w_header != NULL, "failed to initialize bam header."); + r = sam_hdr_add_line(w_header, "SQ", "SN", "t1", "LN", "5000", NULL); + VERIFY(r == 0, "failed to add SQ header line."); + r = sam_hdr_write(writer, w_header); + VERIFY(r == 0, "failed to write bam header."); + + // write alignments + w_bam = bam_init1(); + VERIFY(w_bam != NULL, "failed to initialize BAM struct."); + r = bam_set1(w_bam, strlen(qname), qname, + BAM_FREVERSE, 0, 1000, 42, + sizeof(cigar) / 4, cigar, 0, 2000, 3000, + strlen(seq), seq, qual, 64); + VERIFY(r >= 0, "call to bam_set1() failed."); + r = sam_write1(writer, w_header, w_bam); + VERIFY(r >= 0, "failed to write alignment."); + bam_destroy1(w_bam); + + // close file + r = hts_close(writer); + VERIFY(r == 0, "failed to close bam file for writing."); + sam_hdr_destroy(w_header); + + // open file for reading + reader = hts_open(temp_fname, "rb"); + VERIFY(reader != NULL, "failed to open bam file for reading."); + + // read header + r_header = sam_hdr_read(reader); + VERIFY(r_header != NULL, "failed to read bam header."); + r = sam_hdr_find_tag_id(r_header, "SQ", NULL, NULL, "SN", &ks); + VERIFY(r == 0, "failed to read SQ/SN value"); + VERIFY(strcmp(ks_c_str(&ks), "t1") == 0, "expected reference sequence name in the header == 't1'"); + VERIFY(r_header->n_targets == 1, "expected number of reference sequences == 1"); + VERIFY(strcmp(r_header->target_name[0], "t1") == 0, "expected reference sequence name == 't1'"); + VERIFY(r_header->target_len[0] == 5000, "expected reference sequence length == 5000"); + + // read alignments + r_bam = bam_init1(); + VERIFY(r_bam != NULL, "failed to initialize BAM struct."); + r = sam_read1(reader, r_header, r_bam); + VERIFY(r >= 0, "failed to read alignment."); + VERIFY(strcmp(bam_get_qname(r_bam), qname) == 0, "qname does not match."); + VERIFY(r_bam->core.n_cigar == sizeof(cigar) / 4, "cigar length does not match."); + VERIFY(memcmp(bam_get_cigar(r_bam), cigar, sizeof(cigar)) == 0, "cigar data does not match."); + VERIFY(r_bam->core.l_qseq == strlen(seq), "sequence length does not match."); + + r = sam_read1(reader, r_header, r_bam); + VERIFY(r < 0, "expected no more alignments."); + bam_destroy1(r_bam); + + // close file + r = hts_close(reader); + VERIFY(r == 0, "failed to close bam file for reading."); + sam_hdr_destroy(r_header); + +cleanup: + ks_free(&ks); +} + +static void test_cigar_api(void) +{ + uint32_t *buf = NULL; + char *cig = "*"; + char *end; + size_t m = 0; + int n; + n = sam_parse_cigar(cig, &end, &buf, &m); + VERIFY(n == 0 && m == 0 && (end-cig) == 1, "failed to parse undefined CIGAR"); + cig = "2M3X1I10M5D"; + n = sam_parse_cigar(cig, &end, &buf, &m); + VERIFY(n == 5 && m > 0 && (end-cig) == 11, "failed to parse CIGAR string: 2M3X1I10M5D"); + n = sam_parse_cigar("722M15D187217376188323783284M67I", NULL, &buf, &m); + VERIFY(n == -1, "failed to flag CIGAR string with long op length: 722M15D187217376188323783284M67I"); + n = sam_parse_cigar("53I722MD8X", NULL, &buf, &m); + VERIFY(n == -1, "failed to flag CIGAR string with no op length: 53I722MD8X"); + +cleanup: + free(buf); +} + +int main(int argc, char **argv) +{ + int i; + + status = EXIT_SUCCESS; + + aux_fields1(); + iterators1(); + samrecord_layout(); + use_header_api(); + test_header_pg_lines(); + test_header_pg_loops(); + test_header_updates(); + test_header_remove_lines(); + test_header_ref_altnames(); + test_empty_sam_file("test/emptyfile"); + test_text_file("test/emptyfile", 0); + test_text_file("test/xx#pair.sam", 7); + test_text_file("test/xx.fa", 7); + test_text_file("test/faidx/fastqs.fq", 500); + check_enum1(); + check_cigar_tab(); + check_big_ref(0); + check_big_ref(1); + test_parse_decimal(); + test_mempolicy(); + set_qname(); + for (i = 1; i < argc; i++) faidx1(argv[i]); + + hts_set_log_level(HTS_LOG_OFF); + test_bam_set1_minimal(); + test_bam_set1_full(); + test_bam_set1_even_and_odd_seq_len(); + test_bam_set1_with_seq_but_no_qual(); + test_bam_set1_validate_qname(); + test_bam_set1_validate_seq(); + test_bam_set1_validate_cigar(); + test_bam_set1_validate_size_limits(); + test_bam_set1_write_and_read_back(); + test_cigar_api(); + + return status; +} diff --git a/src/htslib-1.21/test/sam_filter/filter.sh b/src/htslib-1.21/test/sam_filter/filter.sh new file mode 100755 index 0000000..575cb13 --- /dev/null +++ b/src/htslib-1.21/test/sam_filter/filter.sh @@ -0,0 +1,34 @@ +#!/bin/sh +# +# Copyright (C) 2020 Genome Research Ltd. +# +# Author: James Bonfield +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Load in the test driver +. ../simple_test_driver.sh + +echo "Testing sam_filter..." + +tv="../test_view" + +test_driver $@ + +exit $? diff --git a/src/htslib-1.21/test/sam_filter/filter.tst b/src/htslib-1.21/test/sam_filter/filter.tst new file mode 100644 index 0000000..e74c307 --- /dev/null +++ b/src/htslib-1.21/test/sam_filter/filter.tst @@ -0,0 +1,59 @@ +# Copyright (C) 2020, 2022 Genome Research Ltd. +# +# Author: James Bonfield +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# First field: +# INIT = initialisation, not counted in testing +# P = expected to pass (zero return; expected output matches, if present) +# N = expected to return non-zero +# F = expected to fail +# +# Second field (P/N/F only): +# Filename of expected output. If '.', output is not checked +# +# Rest: +# Command to execute. $tv is replaced with the path to test_view + +# String matches +P string1.out $tv -i 'filter=qname =~ "\.1" && cigar =~ "D"' ../ce#1000.sam +P string2.out $tv -i 'filter=rname=="CHROMOSOME_II"' ../ce#5b.sam +P string3.out $tv -i 'filter=rname=~"CHROMOSOME_II"' ../ce#5b.sam +P string4.out $tv -i 'filter=cigar=~"D"' ../ce#1000.sam +P string5.out $tv -i 'filter=seq =~ "(AT){2}"' ../ce#1000.sam +P string6.out $tv -i 'filter=library=="x"' ../xx#rg.sam +P string7.out $tv -i 'filter=library!="x"' ../xx#rg.sam + +# Integer ops +P int1.out $tv -i 'filter=pos % 23 == 11' ../ce#1000.sam | grep -E -cv '^@' +P int2.out $tv -i 'filter=qlen/(flag*mapq+pos)>5' ../ce#1000.sam | grep -E -cv '^@' + +# Aux tags +P int3.out $tv -i 'filter=[NM]>=10 || [MD]=~"A.*A.*A"' -t4 ../ce#1000.sam | grep -E -cv '^@' + +# Functions. +P func1.out $tv -i 'filter=length(seq) != qlen' ../ce#5b.sam | grep -E -cv '^@' +P func2.out $tv -i 'filter=min(qual) >= 20' ../ce#1000.sam | grep -E -cv '^@' +P func3.out $tv -i 'filter=max(qual) <= 20' ../ce#1000.sam | grep -E -cv '^@' +P func4.out $tv -i 'filter=avg(qual) >= 20 && avg(qual) <= 30' ../ce#1000.sam | grep -E -cv '^@' +P func5.out $tv -i 'filter=sclen>=20' ../realn02.sam | grep -E -v '^@' +P func6.out $tv -i 'filter=rlen<50' ../realn02.sam | grep -E -v '^@' +P func7.out $tv -i 'filter=qlen>100' ../realn02.sam | grep -E -v '^@' +P func8.out $tv -i 'filter=hclen>=4' ../c1#clip.sam | grep -E -v '^@' diff --git a/src/htslib-1.21/test/sam_filter/func1.out b/src/htslib-1.21/test/sam_filter/func1.out new file mode 100644 index 0000000..d00491f --- /dev/null +++ b/src/htslib-1.21/test/sam_filter/func1.out @@ -0,0 +1 @@ +1 diff --git a/src/htslib-1.21/test/sam_filter/func2.out b/src/htslib-1.21/test/sam_filter/func2.out new file mode 100644 index 0000000..d81cc07 --- /dev/null +++ b/src/htslib-1.21/test/sam_filter/func2.out @@ -0,0 +1 @@ +42 diff --git a/src/htslib-1.21/test/sam_filter/func3.out b/src/htslib-1.21/test/sam_filter/func3.out new file mode 100644 index 0000000..0cfbf08 --- /dev/null +++ b/src/htslib-1.21/test/sam_filter/func3.out @@ -0,0 +1 @@ +2 diff --git a/src/htslib-1.21/test/sam_filter/func4.out b/src/htslib-1.21/test/sam_filter/func4.out new file mode 100644 index 0000000..103a99d --- /dev/null +++ b/src/htslib-1.21/test/sam_filter/func4.out @@ -0,0 +1 @@ +604 diff --git a/src/htslib-1.21/test/sam_filter/func5.out b/src/htslib-1.21/test/sam_filter/func5.out new file mode 100644 index 0000000..6c2e2bc --- /dev/null +++ b/src/htslib-1.21/test/sam_filter/func5.out @@ -0,0 +1,5 @@ +ERR013140.3521432 99 17 1 29 22S86M = 226 313 AGAGGTCCCCAACTTCTTTGCAAAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGACAATTGCCTTGTCCCTGCTGAATGTGCTCTGGGGTCTCTGGGGTCTCA @AEDGBHIIIIIFJGIKHGHIJJJEJKHJKJKGKLLIFHKLLCJJIDEFFHKHEHHJIIIDJEEEJEIKGJIHCGKHFKFE9BBDIAJAHF4?DE@I:DD48(86D=> MD:Z:86 RG:Z:rg AM:i:29 NM:i:0 SM:i:29 MQ:i:29 XT:A:M +ERR156632.12704932 163 17 1 29 36S64M = 195 293 TGGAGAAGGGGACAAGAGGTCCCCAACTTCTTTGCAAAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGACAATTGCCTTGTCCCTGCTGAATGTG BFAFGFEIGFEFHHEIDKJGHHHJIIE=@KKGGKJGIBLLMFKMDIIHJKKHFELLLKFIHMHIHHIHLKJFCHFJIJAID=JHKFGHJIHKKCH:@HD? MD:Z:64 RG:Z:rg AM:i:29 NM:i:0 SM:i:29 MQ:i:29 XT:A:M +ERR156632.9601178 99 17 1 29 62S38M = 279 377 CTATGACAGGGAGGTCATGTGCAGGCTGGAGAAGGGGACAAGAGGTCCCCAACTTCTTTGCAAAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGA DEEEIIHHKIJILKHLHIKEKHHMKLKKJGKKKKLKLFIHEKIKL=KLJLKIILHKMH9LJJJJLHLHJJKJJKMLKJD>MJKLEHIGHIH=FFCHF>BE MD:Z:38 RG:Z:rg AM:i:29 NM:i:0 SM:i:29 MQ:i:29 XT:A:M +ERR013140.13475139 99 17 2401 60 88M20S = 2680 386 AAATACAAAAAACAACTAGCCAGGCGTGGTGGTGCACACCTGTAGTCCCAGCTACTCAGGAGGCTGAGGGGGAAGGACTGCTTGAGCCCAGGCGTTTGAGGCTGCTGT @CEBEEIHHHICFJIFKGHIKJHII>DBC:CE>A8C>C>7DBA=BEDDB4=9;:@=;@D@@=B@E.3?972<>6@8=>?1$0:95%5%*1=8;0%4<228% X0:i:1 X1:i:0 XC:i:88 MD:Z:88 RG:Z:rg AM:i:37 NM:i:0 SM:i:37 MQ:i:60 XT:A:U +ERR013140.23480670 133 17 3771 0 35M73S = 3771 0 TTCTCATCAATCCCTCATCTCTTATAACCATTTCGGTCCTTTCGGCCCTACAGCCACCTTGTTTATACTTGGTAAGACCCACACCACTCGCCAACTTACTCTACTCCC 8+7?5>09:),/%81,$,7<+?)+1+*+),3%5+)#%(4B%$&'%'/*@,)*%%&,%(/0%-&$$*$-,$3*.%/$:%$+.$*%&+.,.%%,%(%7(-.-',1*6%&$ XC:i:35 RG:Z:rg diff --git a/src/htslib-1.21/test/sam_filter/func6.out b/src/htslib-1.21/test/sam_filter/func6.out new file mode 100644 index 0000000..de091ed --- /dev/null +++ b/src/htslib-1.21/test/sam_filter/func6.out @@ -0,0 +1,2 @@ +ERR156632.9601178 99 17 1 29 62S38M = 279 377 CTATGACAGGGAGGTCATGTGCAGGCTGGAGAAGGGGACAAGAGGTCCCCAACTTCTTTGCAAAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGA DEEEIIHHKIJILKHLHIKEKHHMKLKKJGKKKKLKLFIHEKIKL=KLJLKIILHKMH9LJJJJLHLHJJKJJKMLKJD>MJKLEHIGHIH=FFCHF>BE MD:Z:38 RG:Z:rg AM:i:29 NM:i:0 SM:i:29 MQ:i:29 XT:A:M +ERR013140.23480670 133 17 3771 0 35M73S = 3771 0 TTCTCATCAATCCCTCATCTCTTATAACCATTTCGGTCCTTTCGGCCCTACAGCCACCTTGTTTATACTTGGTAAGACCCACACCACTCGCCAACTTACTCTACTCCC 8+7?5>09:),/%81,$,7<+?)+1+*+),3%5+)#%(4B%$&'%'/*@,)*%%&,%(/0%-&$$*$-,$3*.%/$:%$+.$*%&+.,.%%,%(%7(-.-',1*6%&$ XC:i:35 RG:Z:rg diff --git a/src/htslib-1.21/test/sam_filter/func7.out b/src/htslib-1.21/test/sam_filter/func7.out new file mode 100644 index 0000000..1fe2500 --- /dev/null +++ b/src/htslib-1.21/test/sam_filter/func7.out @@ -0,0 +1,3 @@ +ERR013140.3521432 99 17 1 29 22S86M = 226 313 AGAGGTCCCCAACTTCTTTGCAAAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGACAATTGCCTTGTCCCTGCTGAATGTGCTCTGGGGTCTCTGGGGTCTCA @AEDGBHIIIIIFJGIKHGHIJJJEJKHJKJKGKLLIFHKLLCJJIDEFFHKHEHHJIIIDJEEEJEIKGJIHCGKHFKFE9BBDIAJAHF4?DE@I:DD48(86D=> MD:Z:86 RG:Z:rg AM:i:29 NM:i:0 SM:i:29 MQ:i:29 XT:A:M +ERR013140.13475139 99 17 2401 60 88M20S = 2680 386 AAATACAAAAAACAACTAGCCAGGCGTGGTGGTGCACACCTGTAGTCCCAGCTACTCAGGAGGCTGAGGGGGAAGGACTGCTTGAGCCCAGGCGTTTGAGGCTGCTGT @CEBEEIHHHICFJIFKGHIKJHII>DBC:CE>A8C>C>7DBA=BEDDB4=9;:@=;@D@@=B@E.3?972<>6@8=>?1$0:95%5%*1=8;0%4<228% X0:i:1 X1:i:0 XC:i:88 MD:Z:88 RG:Z:rg AM:i:37 NM:i:0 SM:i:37 MQ:i:60 XT:A:U +ERR013140.23480670 133 17 3771 0 35M73S = 3771 0 TTCTCATCAATCCCTCATCTCTTATAACCATTTCGGTCCTTTCGGCCCTACAGCCACCTTGTTTATACTTGGTAAGACCCACACCACTCGCCAACTTACTCTACTCCC 8+7?5>09:),/%81,$,7<+?)+1+*+),3%5+)#%(4B%$&'%'/*@,)*%%&,%(/0%-&$$*$-,$3*.%/$:%$+.$*%&+.,.%%,%(%7(-.-',1*6%&$ XC:i:35 RG:Z:rg diff --git a/src/htslib-1.21/test/sam_filter/func8.out b/src/htslib-1.21/test/sam_filter/func8.out new file mode 100644 index 0000000..8553866 --- /dev/null +++ b/src/htslib-1.21/test/sam_filter/func8.out @@ -0,0 +1 @@ +s0C 0 c1 3 0 2H6M2H * 0 0 CCGCGG ****** diff --git a/src/htslib-1.21/test/sam_filter/int1.out b/src/htslib-1.21/test/sam_filter/int1.out new file mode 100644 index 0000000..6529ff8 --- /dev/null +++ b/src/htslib-1.21/test/sam_filter/int1.out @@ -0,0 +1 @@ +98 diff --git a/src/htslib-1.21/test/sam_filter/int2.out b/src/htslib-1.21/test/sam_filter/int2.out new file mode 100644 index 0000000..e522732 --- /dev/null +++ b/src/htslib-1.21/test/sam_filter/int2.out @@ -0,0 +1 @@ +38 diff --git a/src/htslib-1.21/test/sam_filter/int3.out b/src/htslib-1.21/test/sam_filter/int3.out new file mode 100644 index 0000000..ea70ce0 --- /dev/null +++ b/src/htslib-1.21/test/sam_filter/int3.out @@ -0,0 +1 @@ +72 diff --git a/src/htslib-1.21/test/sam_filter/string1.out b/src/htslib-1.21/test/sam_filter/string1.out new file mode 100644 index 0000000..7ba8527 --- /dev/null +++ b/src/htslib-1.21/test/sam_filter/string1.out @@ -0,0 +1,6 @@ +@SQ SN:CHROMOSOME_I LN:1009800 +@SQ SN:CHROMOSOME_II LN:5000 +@SQ SN:CHROMOSOME_III LN:5000 +@SQ SN:CHROMOSOME_IV LN:5000 +@SQ SN:CHROMOSOME_V LN:5000 +SRR065390.14978392 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-18 XS:i:-18 XN:i:0 XM:i:5 XO:i:1 XG:i:1 YT:Z:UU MD:Z:4A0G5G5G5G3^A73 NM:i:6 diff --git a/src/htslib-1.21/test/sam_filter/string2.out b/src/htslib-1.21/test/sam_filter/string2.out new file mode 100644 index 0000000..be94b07 --- /dev/null +++ b/src/htslib-1.21/test/sam_filter/string2.out @@ -0,0 +1,6 @@ +@SQ SN:CHROMOSOME_I LN:1009800 +@SQ SN:CHROMOSOME_II LN:5000 +@SQ SN:CHROMOSOME_III LN:5000 +@SQ SN:CHROMOSOME_IV LN:5000 +@SQ SN:CHROMOSOME_V LN:5000 +II.14978392 16 CHROMOSOME_II 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU diff --git a/src/htslib-1.21/test/sam_filter/string3.out b/src/htslib-1.21/test/sam_filter/string3.out new file mode 100644 index 0000000..2424115 --- /dev/null +++ b/src/htslib-1.21/test/sam_filter/string3.out @@ -0,0 +1,7 @@ +@SQ SN:CHROMOSOME_I LN:1009800 +@SQ SN:CHROMOSOME_II LN:5000 +@SQ SN:CHROMOSOME_III LN:5000 +@SQ SN:CHROMOSOME_IV LN:5000 +@SQ SN:CHROMOSOME_V LN:5000 +II.14978392 16 CHROMOSOME_II 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +III 16 CHROMOSOME_III 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU diff --git a/src/htslib-1.21/test/sam_filter/string4.out b/src/htslib-1.21/test/sam_filter/string4.out new file mode 100644 index 0000000..386300c --- /dev/null +++ b/src/htslib-1.21/test/sam_filter/string4.out @@ -0,0 +1,8 @@ +@SQ SN:CHROMOSOME_I LN:1009800 +@SQ SN:CHROMOSOME_II LN:5000 +@SQ SN:CHROMOSOME_III LN:5000 +@SQ SN:CHROMOSOME_IV LN:5000 +@SQ SN:CHROMOSOME_V LN:5000 +SRR065390.14978392 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:-18 XS:i:-18 XN:i:0 XM:i:5 XO:i:1 XG:i:1 YT:Z:UU MD:Z:4A0G5G5G5G3^A73 NM:i:6 +SRR065390.32874267 0 CHROMOSOME_I 75 1 13M1D87M * 0 0 CTAAGCCTAAGCCAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAG DCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCC@CCCCCCCCCCCCCCA>/=;=9>:/5AA############# AS:i:-8 XS:i:-8 XN:i:0 XM:i:0 XO:i:1 XG:i:1 YT:Z:UU MD:Z:13^T87 NM:i:1 +SRR065390.723611 0 CHROMOSOME_I 155 1 5M1D95M * 0 0 AAGCCAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCDCCBCABCDADBACDDDBABBDB?AB@@<>;B>B?DB?=@@?@: AS:i:-8 XS:i:-8 XN:i:0 XM:i:0 XO:i:1 XG:i:1 YT:Z:UU MD:Z:5^T95 NM:i:1 diff --git a/src/htslib-1.21/test/sam_filter/string5.out b/src/htslib-1.21/test/sam_filter/string5.out new file mode 100644 index 0000000..4be4621 --- /dev/null +++ b/src/htslib-1.21/test/sam_filter/string5.out @@ -0,0 +1,6 @@ +@SQ SN:CHROMOSOME_I LN:1009800 +@SQ SN:CHROMOSOME_II LN:5000 +@SQ SN:CHROMOSOME_III LN:5000 +@SQ SN:CHROMOSOME_IV LN:5000 +@SQ SN:CHROMOSOME_V LN:5000 +SRR065390.9154510 16 CHROMOSOME_I 56 0 100M * 0 0 TTCATATGGGCAGGGAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTA ##############################@B;@?>>B1?BCBBC@>CDB>B@CA@CCAC=AA>>AC;CCACCCCCCC=CCCCCCCCCCCCBCCCCCCCC AS:i:-28 XS:i:-28 XN:i:0 XM:i:14 XO:i:0 XG:i:0 YT:Z:UU MD:Z:0C0C0T1A0G0C0C0T0A0A0G0C0C0T85 NM:i:14 diff --git a/src/htslib-1.21/test/sam_filter/string6.out b/src/htslib-1.21/test/sam_filter/string6.out new file mode 100644 index 0000000..c2766bd --- /dev/null +++ b/src/htslib-1.21/test/sam_filter/string6.out @@ -0,0 +1,9 @@ +@HD VN:1.4 SO:coordinate +@SQ SN:xx LN:20 AS:? SP:? UR:? M5:bbf4de6d8497a119dda6e074521643dc +@RG ID:x1 SM:x1 +@RG ID:x2 SM:x2 LB:x PG:foo:bar PI:1111 +@PG ID:emacs PN:emacs VN:23.1.1 +@CO also test +@CO other headers +b1 16 xx 1 1 10M * 0 0 AAAAAAAAAA ********** RG:Z:x2 +b2 16 xx 11 1 10M * 0 0 TTTTTTTTTT ********** RG:Z:x2 diff --git a/src/htslib-1.21/test/sam_filter/string7.out b/src/htslib-1.21/test/sam_filter/string7.out new file mode 100644 index 0000000..8efe886 --- /dev/null +++ b/src/htslib-1.21/test/sam_filter/string7.out @@ -0,0 +1,11 @@ +@HD VN:1.4 SO:coordinate +@SQ SN:xx LN:20 AS:? SP:? UR:? M5:bbf4de6d8497a119dda6e074521643dc +@RG ID:x1 SM:x1 +@RG ID:x2 SM:x2 LB:x PG:foo:bar PI:1111 +@PG ID:emacs PN:emacs VN:23.1.1 +@CO also test +@CO other headers +a1 16 xx 1 1 10M * 0 0 AAAAAAAAAA ********** RG:Z:x1 +c1 16 xx 1 1 10M * 0 0 AAAAAAAAAA ********** +a2 16 xx 11 1 10M * 0 0 TTTTTTTTTT ********** RG:Z:x1 +c2 16 xx 11 1 10M * 0 0 TTTTTTTTTT ********** diff --git a/src/htslib-1.21/test/simple_test_driver.sh b/src/htslib-1.21/test/simple_test_driver.sh new file mode 100644 index 0000000..5bc020e --- /dev/null +++ b/src/htslib-1.21/test/simple_test_driver.sh @@ -0,0 +1,194 @@ +#!/bin/sh +# simple_test_driver.sh -- shell functions for test scripts +# +# Copyright (C) 2017-2018 Genome Research Ltd. +# +# Author: James Bonfield +# Robert Davies +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Executes a single test and compares against the expected output +run_test() { + # Expected result: pass (P) / fail (F) / nonzero exit (N) + p="$1"; shift + # File with expected output (empty or '.' if none) + e="$1"; shift + # Test result + r="P" + # Why the test failed + y="" + if [ "x$test_iter" = "x" ] + then + test_iter=1 + else + test_iter=`expr $test_iter + 1` + fi + result=`eval ${@+"$@"} 2>_err.tmp > _out.tmp` + if [ $? != 0 ] + then + if [ "$p" != "N" ] + then + # Expected zero exit code, got non-zero + r="F" + y="exit_code" + else + # Expected non-zero exit code and got it + r="P" + rm -f _out.tmp _err.tmp + fi + elif [ "$p" = "N" ] + then + # Expected non-zero exit code, but got zero + r="F" + y="exit_code" + elif [ "x$e" != "x" -a "$e" != "." ] + then + sed -n 's/.*/&/p' _out.tmp > _out.tmp2 + if cmp -s _out.tmp2 "$e" + then + # Output was as expected + r="P" + rm -f _out.tmp _out.tmp2 _err.tmp + else + # Output differed + r="F" + y="output" + rm -f _out.tmp2 + fi + else + # Expected zero exit code and got it. + r="P" + rm -f _out.tmp _err.tmp + fi + + if [ "$r" = "F" ] + then + # Test failed + case "$p" in + [PN]) + echo "FAIL : $@" + if [ "x$e" != "x" -a "$e" != "." ] + then + keep_output="FAIL-$e.${test_iter}" + else + keep_output="FAIL.${test_iter}" + fi + mv _out.tmp "${keep_output}.out" + mv _err.tmp "${keep_output}.err" + nufail=`expr $nufail + 1` + if [ "$y" = "exit_code" ] + then + if [ "$p" != "N" ] + then + echo "Got non-zero exit code" + else + echo "Got unexpected zero exit code" + fi + echo "See ${keep_output}.{out,err} for output" + else + echo "Output differed from expected result" + echo "Compare $e ${keep_output}.out" + fi + ;; + *) + echo "XFAIL: $@" + rm -f _out.tmp _err.tmp + nefail=`expr $nefail + 1` + ;; + esac + else + # Test passed + case "$p" in + "P") + echo "PASS : $@" + nepass=`expr $nepass + 1` + ;; + "N") + echo "PASS : $@ (must exit non-zero)" + nepass=`expr $nepass + 1` + ;; + *) + echo "XPASS: $@" + nupass=`expr $nupass + 1` + ;; + esac + fi +} + +# Reads in a file containing a list of tests and executes them. +# The format for the file is: +# First field: +# INIT = initialisation, not counted in testing +# P = expected to pass (zero return; expected output matches, if present) +# N = expected to return non-zero +# F = expected to fail +# +# Second field (P/N/F only): +# Filename of expected output. If '.', output is not checked +# +# Rest: +# Shell command to execute. The command is executed using `eval` so +# all normal shell substitutions will be done first. + +test_driver() { + nupass=0; nepass=0 + nufail=0; nefail=0 + + exec 9<"$1" + while read -r line <&9 + do + set -- $line + case $1 in + "#"*) # skip comments + ;; + "") # skip blank lines too + ;; + + "INIT") + shift + eval ${@+"$@"} > /dev/null + if [ $? != 0 ] + then + echo "INIT FAIL: $@" + return 1 + fi + ;; + + *) + p=$1;shift + o=$1;shift + run_test "$p" "$o" ${@+"$@"} + ;; + esac + done + exec 9<&- + + echo "" + echo "Expected passes: $nepass" + echo "Unexpected passes: $nupass" + echo "Expected failures: $nefail" + echo "Unexpected failures: $nufail" + if [ "$nupass" -gt 0 -o "$nufail" -gt 0 ] + then + return 1 + else + return 0 + fi +} diff --git a/src/htslib-1.21/test/tabix.out b/src/htslib-1.21/test/tabix.out new file mode 100644 index 0000000..0e61ac7 --- /dev/null +++ b/src/htslib-1.21/test/tabix.out @@ -0,0 +1 @@ +1 10000060 . C <*> 0 . DP=1;I16=0,1,0,0,40,1600,0,0,29,841,0,0,25,625,0,0;QS=1,0;MQ0F=0 PL 0,3,29 diff --git a/src/htslib-1.21/test/tabix/bed_file.Y.100200.out b/src/htslib-1.21/test/tabix/bed_file.Y.100200.out new file mode 100644 index 0000000..623b6eb --- /dev/null +++ b/src/htslib-1.21/test/tabix/bed_file.Y.100200.out @@ -0,0 +1 @@ +Y 100000 100900 Y1 600 + 100000 100900 255,0,0 diff --git a/src/htslib-1.21/test/tabix/bed_file.bed b/src/htslib-1.21/test/tabix/bed_file.bed new file mode 100644 index 0000000..185ebf5 --- /dev/null +++ b/src/htslib-1.21/test/tabix/bed_file.bed @@ -0,0 +1,39 @@ +# The MIT License +# +# Copyright (c) 2017 Genome Research Ltd. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# Bed format information: https://genome.ucsc.edu/FAQ/FAQformat.html +# +X 1000 1100 X1 500 + 1000 1100 255,0,0 +X 1200 1300 X2 500 + 1200 1300 255,0,0 +X 1400 1500 X3 500 + 1400 1500 255,0,0 +X 1600 1700 X4 500 + 1600 1700 255,0,0 +X 1800 1900 X5 500 + 1800 1900 255,0,0 +Y 100000 100900 Y1 600 + 100000 100900 255,0,0 +Y 100200 100700 Y2 600 + 100200 100700 255,0,0 +Y 100400 100500 Y3 600 + 100400 100500 255,0,0 +Y 100600 100700 Y4 600 + 100600 100700 255,0,0 +Y 100800 100900 Y5 600 + 100800 100900 255,0,0 +Z 100000 100001 Z1 600 + 100000 100001 255,0,0 +Z 100002 100003 Z2 600 + 100002 100003 255,0,0 +Z 100004 100005 Z3 600 + 100004 100005 255,0,0 +Z 100006 100007 Z4 600 + 100006 100007 255,0,0 +Z 100008 100009 Z5 600 + 100008 100009 255,0,0 diff --git a/src/htslib-1.21/test/tabix/bed_file.separate.out b/src/htslib-1.21/test/tabix/bed_file.separate.out new file mode 100644 index 0000000..28f8f3f --- /dev/null +++ b/src/htslib-1.21/test/tabix/bed_file.separate.out @@ -0,0 +1,11 @@ +#X:1100-1400 +X 1000 1100 X1 500 + 1000 1100 255,0,0 +X 1200 1300 X2 500 + 1200 1300 255,0,0 +#Y:100000-100550 +Y 100000 100900 Y1 600 + 100000 100900 255,0,0 +Y 100200 100700 Y2 600 + 100200 100700 255,0,0 +Y 100400 100500 Y3 600 + 100400 100500 255,0,0 +#Z:100000-100005 +Z 100000 100001 Z1 600 + 100000 100001 255,0,0 +Z 100002 100003 Z2 600 + 100002 100003 255,0,0 +Z 100004 100005 Z3 600 + 100004 100005 255,0,0 diff --git a/src/htslib-1.21/test/tabix/gff_file.X.2934832.2935190.out b/src/htslib-1.21/test/tabix/gff_file.X.2934832.2935190.out new file mode 100644 index 0000000..c22931b --- /dev/null +++ b/src/htslib-1.21/test/tabix/gff_file.X.2934832.2935190.out @@ -0,0 +1,4 @@ +X Vega exon 2934816 2935190 . - . Name=OTTHUME00001604789;Parent=OTTHUMT00000055643 +X Vega gene 2934816 2964270 . - . ID=OTTHUMG00000137358;Name=OTTHUMG00000137358;biotype=protein_coding +X Vega transcript 2934816 2964270 . - . ID=OTTHUMT00000055643;Name=OTTHUMT00000055643;Parent=OTTHUMG00000137358;biotype=protein_coding +X Vega CDS 2934832 2935190 . - . Name=OTTHUMP00000022851;Parent=OTTHUMT00000055643 diff --git a/src/htslib-1.21/test/tabix/gff_file.gff b/src/htslib-1.21/test/tabix/gff_file.gff new file mode 100644 index 0000000..eb86185 --- /dev/null +++ b/src/htslib-1.21/test/tabix/gff_file.gff @@ -0,0 +1,70 @@ +##gff-version 3 +##sequence-region X 1 156040895 +# arylsulfatase E transcript ENST00000540563, Vega annotations downloaded +# from EnsEMBL and sorted into ascending chromosome order. +# +# GFF3 specification: +# https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md +# +X Vega exon 2934816 2935190 . - . Name=OTTHUME00001604789;Parent=OTTHUMT00000055643 +X Vega gene 2934816 2964270 . - . ID=OTTHUMG00000137358;Name=OTTHUMG00000137358;biotype=protein_coding +X Vega transcript 2934816 2964270 . - . ID=OTTHUMT00000055643;Name=OTTHUMT00000055643;Parent=OTTHUMG00000137358;biotype=protein_coding +X Vega CDS 2934832 2935190 . - . Name=OTTHUMP00000022851;Parent=OTTHUMT00000055643 +X Vega intron 2935191 2936741 . - . Name=intron00049;Parent=OTTHUMT00000055643 +X Vega CDS 2936742 2936863 . - . Name=OTTHUMP00000022851;Parent=OTTHUMT00000055643 +X Vega exon 2936742 2936863 . - 1 Name=OTTHUME00001604793;Parent=OTTHUMT00000055643 +X Vega intron 2936864 2938094 . - . Name=intron00048;Parent=OTTHUMT00000055643 +X Vega CDS 2938095 2938257 . - . Name=OTTHUMP00000022851;Parent=OTTHUMT00000055643 +X Vega exon 2938095 2938257 . - 2 Name=OTTHUME00001604790;Parent=OTTHUMT00000055643 +X Vega intron 2938258 2943064 . - . Name=intron00047;Parent=OTTHUMT00000055643 +X Vega CDS 2943065 2943199 . - . Name=OTTHUMP00000022851;Parent=OTTHUMT00000055643 +X Vega exon 2943065 2943199 . - 1 Name=OTTHUME00001604796;Parent=OTTHUMT00000055643 +X Vega intron 2943200 2945997 . - . Name=intron00046;Parent=OTTHUMT00000055643 +X Vega CDS 2945998 2946134 . - . Name=OTTHUMP00000022851;Parent=OTTHUMT00000055643 +X Vega exon 2945998 2946134 . - 1 Name=OTTHUME00001604791;Parent=OTTHUMT00000055643 +X Vega intron 2946135 2949303 . - . Name=intron00045;Parent=OTTHUMT00000055643 +X Vega CDS 2949304 2949727 . - . Name=OTTHUMP00000022851;Parent=OTTHUMT00000055643 +X Vega exon 2949304 2949727 . - 2 Name=OTTHUME00001604792;Parent=OTTHUMT00000055643 +X Vega CDS 2949623 2949727 . - . Name=OTTHUMP00000022852;Parent=OTTHUMT00000055644 +X Vega exon 2949623 2949727 . - 1 Name=OTTHUME00002367917;Parent=OTTHUMT00000055644 +X Vega transcript 2949623 2964264 . - . ID=OTTHUMT00000055644;Name=OTTHUMT00000055644;Parent=OTTHUMG00000137358;biotype=protein_coding +X Vega intron 2949728 2953142 . - . Name=intron00044;Parent=OTTHUMT00000055643 +X Vega intron 2949728 2953142 . - . Name=intron00054;Parent=OTTHUMT00000055644 +X Vega exon 2952831 2952992 . - . Name=OTTHUME00000241561;Parent=OTTHUMT00000055642 +X Vega transcript 2952831 2953228 . - . ID=OTTHUMT00000055642;Name=OTTHUMT00000055642;Parent=OTTHUMG00000137358;biotype=processed_transcript +X Vega intron 2952993 2953142 . - . Name=intron00055;Parent=OTTHUMT00000055642 +X Vega CDS 2953143 2953265 . - . Name=OTTHUMP00000022851;Parent=OTTHUMT00000055643 +X Vega CDS 2953143 2953265 . - . Name=OTTHUMP00000022852;Parent=OTTHUMT00000055644 +X Vega exon 2953143 2953228 . - . Name=OTTHUME00000241571;Parent=OTTHUMT00000055642 +X Vega exon 2953143 2953265 . - 1 Name=OTTHUME00001604788;Parent=OTTHUMT00000055643 +X Vega exon 2953143 2953265 . - 1 Name=OTTHUME00001604788;Parent=OTTHUMT00000055644 +X Vega intron 2953266 2955415 . - . Name=intron00043;Parent=OTTHUMT00000055643 +X Vega intron 2953266 2955415 . - . Name=intron00053;Parent=OTTHUMT00000055644 +X Vega CDS 2955416 2955537 . - . Name=OTTHUMP00000022851;Parent=OTTHUMT00000055643 +X Vega CDS 2955416 2955537 . - . Name=OTTHUMP00000022852;Parent=OTTHUMT00000055644 +X Vega exon 2955416 2955537 . - 1 Name=OTTHUME00001604794;Parent=OTTHUMT00000055643 +X Vega exon 2955416 2955537 . - 1 Name=OTTHUME00001604794;Parent=OTTHUMT00000055644 +X Vega intron 2955538 2958273 . - . Name=intron00042;Parent=OTTHUMT00000055643 +X Vega intron 2955538 2958273 . - . Name=intron00052;Parent=OTTHUMT00000055644 +X Vega CDS 2958274 2958435 . - . Name=OTTHUMP00000022851;Parent=OTTHUMT00000055643 +X Vega CDS 2958274 2958435 . - . Name=OTTHUMP00000022852;Parent=OTTHUMT00000055644 +X Vega exon 2958274 2958435 . - 2 Name=OTTHUME00001604795;Parent=OTTHUMT00000055643 +X Vega exon 2958274 2958435 . - 2 Name=OTTHUME00001604795;Parent=OTTHUMT00000055644 +X Vega exon 2958411 2958435 . - . Name=OTTHUME00000241567;Parent=OTTHUMT00000055641 +X Vega transcript 2958411 2964264 . - . ID=OTTHUMT00000055641;Name=OTTHUMT00000055641;Parent=OTTHUMG00000137358;biotype=processed_transcript +X Vega intron 2958436 2959602 . - . Name=intron00058;Parent=OTTHUMT00000055641 +X Vega intron 2958436 2960377 . - . Name=intron00041;Parent=OTTHUMT00000055643 +X Vega intron 2958436 2960377 . - . Name=intron00051;Parent=OTTHUMT00000055644 +X Vega exon 2959603 2959888 . - . Name=OTTHUME00000241555;Parent=OTTHUMT00000055641 +X Vega intron 2959889 2960377 . - . Name=intron00057;Parent=OTTHUMT00000055641 +X Vega CDS 2960378 2960400 . - . Name=OTTHUMP00000022851;Parent=OTTHUMT00000055643 +X Vega CDS 2960378 2960400 . - . Name=OTTHUMP00000022852;Parent=OTTHUMT00000055644 +X Vega exon 2960378 2960420 . - 2 Name=OTTHUME00001604787;Parent=OTTHUMT00000055643 +X Vega exon 2960378 2960420 . - . Name=OTTHUME00000241553;Parent=OTTHUMT00000055641 +X Vega exon 2960378 2960468 . - 2 Name=OTTHUME00000241569;Parent=OTTHUMT00000055644 +X Vega intron 2960421 2964223 . - . Name=intron00040;Parent=OTTHUMT00000055643 +X Vega intron 2960421 2964223 . - . Name=intron00056;Parent=OTTHUMT00000055641 +X Vega intron 2960469 2964223 . - . Name=intron00050;Parent=OTTHUMT00000055644 +X Vega exon 2964224 2964264 . - . Name=OTTHUME00000241556;Parent=OTTHUMT00000055641 +X Vega exon 2964224 2964264 . - . Name=OTTHUME00000241556;Parent=OTTHUMT00000055644 +X Vega exon 2964224 2964270 . - . Name=OTTHUME00001604786;Parent=OTTHUMT00000055643 diff --git a/src/htslib-1.21/test/tabix/large_chr.20.1.2147483647.out b/src/htslib-1.21/test/tabix/large_chr.20.1.2147483647.out new file mode 100644 index 0000000..637e62c --- /dev/null +++ b/src/htslib-1.21/test/tabix/large_chr.20.1.2147483647.out @@ -0,0 +1,9 @@ +chr20 76962 . T C 999 PASS . +chr20 126310 . ACC A 999 PASS . +chr20 138125 . G T 999 PASS . +chr20 138148 . C T 999 PASS . +chr20 271225 . T TTTA,TA 999 PASS . +chr20 304568 . C T 999 PASS . +chr20 620255100 . AG T 999 PASS . +chr20 630255200 . G C 999 PASS . +chr20 2147483647 . A T 999 PASS . diff --git a/src/htslib-1.21/test/tabix/large_chr.vcf b/src/htslib-1.21/test/tabix/large_chr.vcf new file mode 100644 index 0000000..f8a3661 --- /dev/null +++ b/src/htslib-1.21/test/tabix/large_chr.vcf @@ -0,0 +1,18 @@ +##fileformat=VCFv4.2 +##reference=file:///seq/references/long_chrom.fasta +##FILTER= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO +chr11 2343543 . A . 999 PASS . +chr11 5464562 . C T 999 PASS . +chr11 116870911 . C G 999 PASS . +chr20 76962 . T C 999 PASS . +chr20 126310 . ACC A 999 PASS . +chr20 138125 . G T 999 PASS . +chr20 138148 . C T 999 PASS . +chr20 271225 . T TTTA,TA 999 PASS . +chr20 304568 . C T 999 PASS . +chr20 620255100 . AG T 999 PASS . +chr20 630255200 . G C 999 PASS . +chr20 2147483647 . A T 999 PASS . diff --git a/src/htslib-1.21/test/tabix/tabix.tst b/src/htslib-1.21/test/tabix/tabix.tst new file mode 100644 index 0000000..316c26f --- /dev/null +++ b/src/htslib-1.21/test/tabix/tabix.tst @@ -0,0 +1,72 @@ +# Copyright (C) 2017, 2024 Genome Research Ltd. +# +# Author: Robert Davies +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# First field: +# INIT = initialisation, not counted in testing +# P = expected to pass (zero return; expected output matches, if present) +# N = expected to return non-zero +# F = expected to fail +# +# Second field (P/N/F only): +# Filename of expected output. If '.', output is not checked +# +# Rest: +# Command to execute. $bgzip and $tabix are replaced with the path to +# bgzip and tabix. + +# TBI index on VCF +INIT $bgzip -c vcf_file.vcf > vcf_file.tbi.tmp.vcf.gz +P . $tabix -f -p vcf vcf_file.tbi.tmp.vcf.gz +P vcf_file.1.3000151.out $tabix vcf_file.tbi.tmp.vcf.gz 1:3000151-3000151 +P vcf_file.2.3199812.out $tabix vcf_file.tbi.tmp.vcf.gz 2:3199812-3199812 + +# CSI index on VCF +INIT $bgzip -c vcf_file.vcf > vcf_file.csi.tmp.vcf.gz +P . $tabix -f -C -p vcf vcf_file.csi.tmp.vcf.gz +P vcf_file.1.3000151.out $tabix vcf_file.csi.tmp.vcf.gz 1:3000151-3000151 +P vcf_file.2.3199812.out $tabix vcf_file.csi.tmp.vcf.gz 2:3199812-3199812 + +# VCF file with chromosome > 2^29-1 bases long +# TBI cannot index this file, so building the index should fail +INIT $bgzip -c large_chr.vcf > large_chr.tmp.vcf.gz +N . $tabix -f -p vcf large_chr.tmp.vcf.gz + +# CSI can handle positions > 2^29-1, so building should work +P . $tabix -f -C -p vcf large_chr.tmp.vcf.gz +P large_chr.20.1.2147483647.out $tabix large_chr.tmp.vcf.gz chr20:1-2147483647 + +# TBI index on BED +INIT $bgzip -c bed_file.bed > bed_file.tbi.tmp.bed.gz +P . $tabix -f -p bed bed_file.tbi.tmp.bed.gz +P bed_file.Y.100200.out $tabix bed_file.tbi.tmp.bed.gz Y:100200-100200 + +# TBI index on GFF3 +INIT $bgzip -c gff_file.gff > gff_file.tbi.tmp.gff.gz +P . $tabix -f -p gff gff_file.tbi.tmp.gff.gz +P gff_file.X.2934832.2935190.out $tabix gff_file.tbi.tmp.gff.gz X:2934832-2935190 + +# tabix with --separate-regions +P bed_file.separate.out $tabix --separate-regions bed_file.tbi.tmp.bed.gz X:1100-1400 Y:100000-100550 Z:100000-100005 + +# Using threads with tabix +P . $tabix -f -p bed bed_file.tbi.tmp.bed.gz -@ 2 +P vcf_file.1.3000151.out $tabix vcf_file.tbi.tmp.vcf.gz 1:3000151-3000151 --threads 2 diff --git a/src/htslib-1.21/test/tabix/test-tabix.sh b/src/htslib-1.21/test/tabix/test-tabix.sh new file mode 100755 index 0000000..e9a5a9c --- /dev/null +++ b/src/htslib-1.21/test/tabix/test-tabix.sh @@ -0,0 +1,35 @@ +#!/bin/sh +# +# Copyright (C) 2017-2018 Genome Research Ltd. +# +# Author: Robert Davies +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Load in the test driver +. ../simple_test_driver.sh + +echo "Testing tabix..." + +bgzip="../../bgzip" +tabix="../../tabix" + +test_driver $@ + +exit $? diff --git a/src/htslib-1.21/test/tabix/vcf_file.1.3000151.out b/src/htslib-1.21/test/tabix/vcf_file.1.3000151.out new file mode 100644 index 0000000..e9d5c87 --- /dev/null +++ b/src/htslib-1.21/test/tabix/vcf_file.1.3000151.out @@ -0,0 +1 @@ +1 3000151 . C T 59.2 PASS AN=4;AC=2 GT:DP:GQ 0/1:32:245 0/1:32:245 diff --git a/src/htslib-1.21/test/tabix/vcf_file.2.3199812.out b/src/htslib-1.21/test/tabix/vcf_file.2.3199812.out new file mode 100644 index 0000000..be2f66e --- /dev/null +++ b/src/htslib-1.21/test/tabix/vcf_file.2.3199812.out @@ -0,0 +1 @@ +2 3199812 . G GTT,GT 82.7 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:322:26 1/2:322:26 diff --git a/src/htslib-1.21/test/tabix/vcf_file.bcf b/src/htslib-1.21/test/tabix/vcf_file.bcf new file mode 100644 index 0000000..a4aafec Binary files /dev/null and b/src/htslib-1.21/test/tabix/vcf_file.bcf differ diff --git a/src/htslib-1.21/test/tabix/vcf_file.vcf b/src/htslib-1.21/test/tabix/vcf_file.vcf new file mode 100644 index 0000000..d3cf30f --- /dev/null +++ b/src/htslib-1.21/test/tabix/vcf_file.vcf @@ -0,0 +1,38 @@ +##fileformat=VCFv4.1 +##FILTER= +##INFO= +##FORMAT= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FILTER= +##FILTER= +##contig= +##contig= +##contig= +##contig= +##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta +##readme=AAAAAA +##readme=BBBBBB +##INFO= +##INFO= +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B +1 3000150 . C T 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245 +1 3000151 . C T 59.2 PASS AN=4;AC=2 GT:DP:GQ 0/1:32:245 0/1:32:245 +1 3062915 id3D GTTT G 12.9 q10 DP4=1,2,3,4;AN=4;AC=2;INDEL;STR=test GT:GQ:DP:GL 0/1:409:35:-20,-5,-20 0/1:409:35:-20,-5,-20 +1 3062915 idSNP G T,C 12.6 test TEST=5;DP4=1,2,3,4;AN=3;AC=1,1 GT:TT:GQ:DP:GL 0/1:0,1:409:35:-20,-5,-20,-20,-5,-20 2:0,1:409:35:-20,-5,-20 +1 3106154 . CAAA C 342 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32 +1 3106154 . C CT 59.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32 +1 3157410 . GA G 90.6 q10 AN=4;AC=4 GT:GQ:DP 1/1:21:21 1/1:21:21 +1 3162006 . GAA G 60.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:212:22 0/1:212:22 +1 3177144 . G T 45 PASS AN=4;AC=2 GT:GQ:DP 0/0:150:30 1/1:150:30 +1 3177144 . G . 45 PASS AN=4;AC=0 GT:GQ:DP 0/0:150:30 0/0:150:30 +1 3184885 . TAAAA TA,T 61.5 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:12:10 1/2:12:10 +2 3199812 . G GTT,GT 82.7 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:322:26 1/2:322:26 +3 3212016 . CTT C,CT 79 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:91:26 1/2:91:26 +4 3258448 . TACACACAC T . PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31 +4 3258501 . C A,T,G,CA,CT,CG,CC,CAA,CAT,CAG,CAC,CTA,CTT,CTG,CTC,CGA,CGT,CGG,CGC,CCA,CCT,CCG,CCC,CAAA,CAAT,CAAG,CAAC,CATA,CATT,CATG,CATC,CAGA,CAGT,CAGG,CAGC,CACA,CACT,CACG,CACC,CTAA,CTAT,CTAG,CTAC,CTTA,CTTT,CTTG,CTTC,CTGA,CTGT,CTGG,CTGC,CTCA,CTCT,CTCG,CTCC,CGAA,CGAT,CGAG,CGAC,CGTA,CGTT,CGTG,CGTC,CGGA,CGGT,CGGG,CGGC,CGCA,CGCT,CGCG,CGCC,CCAA,CCAT,CCAG,CCAC,CCTA,CCTT,CCTG,CCTC,CCGA,CCGT,CCGG,CCGC,CCCA,CCCT,CCCG,CCCC,CAAAA,CAAAT,CAAAG,CAAAC,CAATA,CAATT,CAATG,CAATC,CAAGA,CAAGT,CAAGG,CAAGC,CAACA,CAACT,CAACG,CAACC,CATAA,CATAT,CATAG,CATAC,CATTA,CATTT,CATTG,CATTC,CATGA,CATGT,CATGG,CATGC,CATCA,CATCT,CATCG,CATCC,CAGAA,CAGAT,CAGAG,CAGAC,CAGTA,CAGTT,CAGTG,CAGTC,CAGGA,CAGGT,CAGGG,CAGGC,CAGCA,CAGCT,CAGCG,CAGCC,CACAA,CACAT,CACAG,CACAC,CACTA,CACTT,CACTG,CACTC,CACGA,CACGT,CACGG,CACGC,CACCA,CACCT,CACCG,CACCC,CTAAA,CTAAT,CTAAG,CTAAC,CTATA,CTATT,CTATG,CTATC,CTAGA,CTAGT,CTAGG,CTAGC,CTACA,CTACT,CTACG,CTACC,CTTAA,CTTAT,CTTAG,CTTAC,CTTTA,CTTTT,CTTTG,CTTTC,CTTGA,CTTGT,CTTGG,CTTGC,CTTCA,CTTCT,CTTCG,CTTCC,CTGAA,CTGAT,CTGAG,CTGAC,CTGTA,CTGTT,CTGTG,CTGTC,CTGGA,CTGGT,CTGGG,CTGGC,CTGCA,CTGCT,CTGCG,CTGCC,CTCAA,CTCAT,CTCAG,CTCAC,CTCTA,CTCTT,CTCTG,CTCTC,CTCGA,CTCGT,CTCGG,CTCGC,CTCCA,CTCCT,CTCCG,CTCCC,CGAAA,CGAAT,CGAAG,CGAAC,CGATA,CGATT,CGATG,CGATC,CGAGA,CGAGT,CGAGG,CGAGC,CGACA,CGACT,CGACG,CGACC,CGTAA,CGTAT,CGTAG,CGTAC,CGTTA,CGTTT,CGTTG,CGTTC,CGTGA,CGTGT,CGTGG,CGTGC,CGTCA,CGTCT,CGTCG,CGTCC,CGGAA,CGGAT,CGGAG,CGGAC,CGGTA,CGGTT,CGGTG,CGGTC,CGGGA,CGGGT,CGGGG,CGGGC,CGGCA,CGGCT,CGGCG,CGGCC,CGCAA,CGCAT,CGCAG,CGCAC,CGCTA,CGCTT,CGCTG,CGCTC,CGCGA,CGCGT,CGCGG,CGCGC,CGCCA,CGCCT,CGCCG,CGCCC,CCAAA,CCAAT,CCAAG,CCAAC,CCATA,CCATT,CCATG,CCATC,CCAGA,CCAGT,CCAGG,CCAGC,CCACA,CCACT,CCACG,CCACC,CCTAA,CCTAT,CCTAG,CCTAC,CCTTA,CCTTT,CCTTG,CCTTC,CCTGA,CCTGT 45 PASS AN=4;AC=2 GT 0/300 240/260 diff --git a/src/htslib-1.21/test/test-bcf-sr.c b/src/htslib-1.21/test/test-bcf-sr.c new file mode 100644 index 0000000..0fb5990 --- /dev/null +++ b/src/htslib-1.21/test/test-bcf-sr.c @@ -0,0 +1,270 @@ +/* + Copyright (C) 2017, 2020, 2023 Genome Research Ltd. + + Author: Petr Danecek + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +/* + Test bcf synced reader allele pairing +*/ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "../htslib/hts_defs.h" +#include "../htslib/synced_bcf_reader.h" +#include "../htslib/hts.h" +#include "../htslib/vcf.h" + +void HTS_FORMAT(HTS_PRINTF_FMT, 1, 2) HTS_NORETURN +error(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + +void HTS_NORETURN usage(int exit_code) +{ + fprintf(stderr, "Usage: test-bcf-sr [OPTIONS] vcf-list.txt\n"); + fprintf(stderr, " test-bcf-sr [OPTIONS] -args file1.bcf [...]\n"); + fprintf(stderr, "Options:\n"); + fprintf(stderr, " --args pass filenames directly in argument list\n"); + fprintf(stderr, " --no-index allow streaming\n"); + fprintf(stderr, " -o, --output output file (stdout if not set)\n"); + fprintf(stderr, " -O, --output-fmt fmt: vcf,bcf,summary\n"); + fprintf(stderr, " -p, --pair logic: snps,indels,both,snps+ref,indels+ref,both+ref,exact,some,all\n"); + fprintf(stderr, " -r, --regions comma-separated list of regions\n"); + fprintf(stderr, " -t, --targets comma-separated list of targets\n"); + fprintf(stderr, "\n"); + exit(exit_code); +} + +void write_summary_format(bcf_srs_t *sr, FILE *out) +{ + int n, i, j; + while ((n = bcf_sr_next_line(sr)) > 0) { + for (i=0; inreaders; i++) + { + if ( !bcf_sr_has_line(sr,i) ) continue; + bcf1_t *rec = bcf_sr_get_line(sr, i); + if (!rec) error("bcf_sr_get_line() unexpectedly returned NULL\n"); + fprintf(out, "%s:%"PRIhts_pos, + bcf_seqname_safe(bcf_sr_get_header(sr,i),rec),rec->pos+1); + break; + } + + for (i=0; inreaders; i++) + { + fprintf(out, "\t"); + + if ( !bcf_sr_has_line(sr,i) ) + { + fprintf(out, "%s","-"); + continue; + } + + bcf1_t *rec = bcf_sr_get_line(sr, i); + if (!rec) error("bcf_sr_get_line() unexpectedly returned NULL\n"); + fprintf(out, "%s", rec->n_allele > 1 ? rec->d.allele[1] : "."); + for (j=2; jn_allele; j++) + { + fprintf(out, ",%s", rec->d.allele[j]); + } + } + fprintf(out, "\n"); + } +} + +void write_vcf_bcf_format(bcf_srs_t *sr, bcf_hdr_t *hdr, vcfFile *vcf_out, + const char *fmt_type) +{ + int i, n; + if (bcf_hdr_write(vcf_out, hdr) != 0) + error("Couldn't write %s header\n", fmt_type); + + while ((n = bcf_sr_next_line(sr)) > 0) { + for (i=0; inreaders; i++) + { + if ( !bcf_sr_has_line(sr,i) ) continue; + bcf1_t *rec = bcf_sr_get_line(sr, i); + if (!rec) error("bcf_sr_get_line() unexpectedly returned NULL\n"); + if (vcf_write(vcf_out, hdr, rec) < 0) + error("vcf_write() failed\n"); + } + } +} + +int main(int argc, char *argv[]) +{ + static struct option loptions[] = + { + {"help",no_argument,NULL,'h'}, + {"output-fmt",required_argument,NULL,'O'}, + {"pair",required_argument,NULL,'p'}, + {"regions",required_argument,NULL,'r'}, + {"targets",required_argument,NULL,'t'}, + {"no-index",no_argument,NULL,1000}, + {"args",no_argument,NULL,1001}, + {NULL,0,NULL,0} + }; + + int c, pair = 0, use_index = 1, use_fofn = 1; + enum htsExactFormat out_fmt = text_format; // for original pos + alleles + const char *out_fn = NULL, *regions = NULL, *targets = NULL; + while ((c = getopt_long(argc, argv, "o:O:p:r:t:h", loptions, NULL)) >= 0) + { + switch (c) + { + case 'o': + out_fn = optarg; + break; + case 'O': + if (!strcasecmp(optarg, "vcf")) out_fmt = vcf; + else if (!strcasecmp(optarg, "bcf")) out_fmt = bcf; + else if (!strcasecmp(optarg, "summary")) out_fmt = text_format; + else error("Unknown output format \"%s\"\n", optarg); + break; + case 'p': + if ( !strcmp(optarg,"snps") ) pair |= BCF_SR_PAIR_SNPS; + else if ( !strcmp(optarg,"snp+ref") ) pair |= BCF_SR_PAIR_SNPS|BCF_SR_PAIR_SNP_REF; + else if ( !strcmp(optarg,"snps+ref") ) pair |= BCF_SR_PAIR_SNPS|BCF_SR_PAIR_SNP_REF; + else if ( !strcmp(optarg,"indels") ) pair |= BCF_SR_PAIR_INDELS; + else if ( !strcmp(optarg,"indel+ref") ) pair |= BCF_SR_PAIR_INDELS|BCF_SR_PAIR_INDEL_REF; + else if ( !strcmp(optarg,"indels+ref") ) pair |= BCF_SR_PAIR_INDELS|BCF_SR_PAIR_INDEL_REF; + else if ( !strcmp(optarg,"both") ) pair |= BCF_SR_PAIR_BOTH; + else if ( !strcmp(optarg,"both+ref") ) pair |= BCF_SR_PAIR_BOTH_REF; + else if ( !strcmp(optarg,"any") ) pair |= BCF_SR_PAIR_ANY; + else if ( !strcmp(optarg,"all") ) pair |= BCF_SR_PAIR_ANY; + else if ( !strcmp(optarg,"some") ) pair |= BCF_SR_PAIR_SOME; + else if ( !strcmp(optarg,"exact") ) pair = BCF_SR_PAIR_EXACT; + else error("The --pair logic \"%s\" not recognised.\n", optarg); + break; + case 'r': + regions = optarg; + break; + case 't': + targets = optarg; + break; + case 1000: + use_index = 0; + break; + case 1001: + use_fofn = 0; + break; + case 'h': + usage(EXIT_SUCCESS); + default: usage(EXIT_FAILURE); + } + } + if ( !pair ) pair = BCF_SR_PAIR_EXACT; + if ( optind == argc ) usage(EXIT_FAILURE); + + int i, nvcf; + char **vcfs = NULL; + if (use_fofn) { + vcfs = hts_readlist(argv[optind], 1, &nvcf); + if ( !vcfs ) error("Could not parse %s\n", argv[optind]); + } else { + vcfs = &argv[optind]; + nvcf = argc - optind; + } + + bcf_srs_t *sr = bcf_sr_init(); + if (!sr) error("bcf_sr_init() failed\n"); + bcf_sr_set_opt(sr, BCF_SR_PAIR_LOGIC, pair); + if (use_index) { + bcf_sr_set_opt(sr, BCF_SR_REQUIRE_IDX); + } else { + bcf_sr_set_opt(sr, BCF_SR_ALLOW_NO_IDX); + } + + if (regions) + { + if (bcf_sr_set_regions(sr, regions, 0) != 0) + error("Failed to set regions\n"); + } + + if (targets) + { + if (bcf_sr_set_targets(sr, targets, 0, 0) != 0) + error("Failed to set targets\n"); + } + + for (i=0; ierrnum)); + + if (!sr->readers || sr->nreaders < 1) + error("No readers set, even though one was added\n"); + + if (out_fmt == text_format) { + FILE *out = stdout; + if (out_fn) + { + out = fopen(out_fn, "w"); + if (!out) error("Couldn't open \"%s\" for writing: %s\n", + out_fn, strerror(errno)); + } + write_summary_format(sr, out); + if (out_fn) + { + if (fclose(out) != 0) + error("Error on closing %s : %s\n", + out_fn, strerror(errno)); + } + } else { + const char *fmt_type = out_fmt == vcf ? "VCF" : "BCF"; + + bcf_hdr_t *hdr = bcf_sr_get_header(sr, 0); + if (!hdr) error("%s output, but don't have a header\n", fmt_type); + + if (!out_fn) { out_fn = "-"; } + vcfFile *vcf_out = vcf_open(out_fn, out_fmt == vcf ? "w" : "wb"); + if (!vcf_out) error("Couldn't open \"%s\" for writing: %s\n", + out_fn, strerror(errno)); + write_vcf_bcf_format(sr, hdr, vcf_out, fmt_type); + if (vcf_close(vcf_out) != 0) + error("Error on closing \"%s\"\n", out_fn); + } + + if (sr->errnum) error("Synced reader error: %s\n", + bcf_sr_strerror(sr->errnum)); + + bcf_sr_destroy(sr); + if (use_fofn) + { + for (i=0; i Random seed\n", + " -t, --temp-dir When given, temporary files will not be removed\n", + " -v, --verbose \n", + " -h, -?, --help This help message\n", + "\n"; + exit 1; +} +sub parse_params +{ + my $opts = {}; + while (defined(my $arg=shift(@ARGV))) + { + if ( $arg eq '-t' || $arg eq '--temp-dir' ) { $$opts{keep_files}=shift(@ARGV); next } + if ( $arg eq '-v' || $arg eq '--verbose' ) { $$opts{verbose}=1; next } + if ( $arg eq '-s' || $arg eq '--seed' ) { $$opts{seed}=shift(@ARGV); next } + if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); } + error("Unknown parameter \"$arg\". Run -h for help.\n"); + } + $$opts{tmp} = exists($$opts{keep_files}) ? $$opts{keep_files} : tempdir(CLEANUP=>1); + if ($^O =~ /^msys/) { + $$opts{tmp} = cygpath($$opts{tmp}); + } + if ( $$opts{keep_files} ) { cmd("mkdir -p $$opts{keep_files}"); } + if ( !exists($$opts{seed}) ) + { + $$opts{seed} = time(); + print STDERR "Random seed is $$opts{seed}\n"; + } + srand($$opts{seed}); + return $opts; +} + +sub _cmd +{ + my ($cmd) = @_; + my $kid_io; + my @out; + my $pid = open($kid_io, "-|"); + if ( !defined $pid ) { error("Cannot fork: $!"); } + if ($pid) + { + # parent + @out = <$kid_io>; + close($kid_io); + } + else + { + # child + exec('bash', '-o','pipefail','-c', $cmd) or error("Cannot execute the command [/bin/sh -o pipefail -c $cmd]: $!"); + } + return ($? >> 8, join('',@out)); +} +sub cmd +{ + my ($cmd) = @_; + my ($ret,$out) = _cmd($cmd); + if ( $ret ) { error("The command failed [$ret]: $cmd\n", $out); } + return $out; +} + +sub save_vcf +{ + my ($opts,$vars,$fname) = @_; + open(my $fh,"| $FindBin::Bin/../bgzip -c > $fname") or error("$FindBin::Bin/../bgzip -c > $fname: !"); + print $fh qq[##fileformat=VCFv4.3\n]; + print $fh qq[##FILTER=\n]; + print $fh qq[##contig=\n]; + print $fh qq[##contig=\n]; + print $fh '#'. join("\t", qw(CHROM POS ID REF ALT QUAL FILTER INFO))."\n"; + for my $var (@$vars) + { + my @als = split(/,/,$var); + my @alts = (); + my $ref; + for my $al (@als) + { + my ($xref,$alt) = split(/>/,$al); + $ref = $xref; + push @alts,$alt; + } + print $fh join("\t", (1,100,'.',$ref,join(',',@alts),'.','.','.'))."\n"; + } + for my $var (@$vars) + { + my @als = split(/,/,$var); + my @alts = (); + my $ref; + for my $al (@als) + { + my ($xref,$alt) = split(/>/,$al); + $ref = $xref; + push @alts,$alt; + } + print $fh join("\t", (1,300,'.',$ref,join(',',@alts),'.','.','.'))."\n"; + } + for my $var (@$vars) + { + my @als = split(/,/,$var); + my @alts = (); + my $ref; + for my $al (@als) + { + my ($xref,$alt) = split(/>/,$al); + $ref = $xref; + push @alts,$alt; + } + print $fh join("\t", (2,100,'.',$ref,join(',',@alts),'.','.','.'))."\n"; + } + close($fh) or error("close failed: bgzip -c > $fname"); + cmd("$FindBin::Bin/../tabix -f $fname"); +} + +sub random_alt +{ + my ($ref,$is_snp) = @_; + my @acgt = qw(A C G T); + my $alt = $acgt[rand @acgt]; + if ( $ref eq $alt ) { return '.'; } # ref + if ( !$is_snp ) { $alt = $ref.$alt; } + return $alt; +} + +sub check_outputs +{ + my ($fname_bin,$fname_perl) = @_; + my %out = (); + open(my $fh,'<',$fname_bin) or error("$fname_bin: $!"); + while (my $line=<$fh>) + { + my ($pos,@vals) = split(/\t/,$line); + chomp($vals[-1]); + $vals[-1] =~ s/\r$//; + push @{$out{$pos}},join("\t",@vals); + } + close($fh) or error("close failed: $fname_bin"); + if ( keys %out != 3 ) { error("Expected 3 positions, found ",scalar keys %out,": $fname_bin\n"); } + my $n; + for my $pos (keys %out) + { + if ( !defined $n ) { $n = scalar @{$out{$pos}}; } + if ( @{$out{$pos}} != $n ) { error("Expected $n positions, found ",scalar keys %{$out{$pos}},"\n"); } + } + my @blines = @{$out{(keys %out)[0]}}; + + my @plines = (); + open($fh,'<',$fname_perl) or error("$fname_perl: $!"); + while (my $line=<$fh>) + { + chomp($line); + $line =~ s/\r$//; + push @plines,$line; + } + close($fh) or error("close failed: $fname_perl"); + if ( @blines != @plines ) { error("Different number of lines: ",scalar @blines," vs ",scalar @plines," in $fname_bin vs $fname_perl\n"); } + @blines = sort @blines; + @plines = sort @plines; + for (my $i=0; $i<@plines; $i++) + { + if ( $blines[$i] ne $plines[$i] ) + { + #error("Different lines in $fname_bin vs $fname_perl:\n\t$blines[$i].\nvs\n\t$plines[$i].\n"); + error("Different lines in $fname_bin vs $fname_perl:\n\t".join("\n\t",@blines)."\nvs\n\t".join("\n\t",@plines)."\n"); + } + } +} + +sub run_test +{ + my ($opts) = @_; + my @acgt = qw(A C G T); + my $ref = $acgt[rand @acgt]; + my @vcfs = (); + my $nvcf = 1 + int(rand(10)); + for (my $i=0; $i<$nvcf; $i++) + { + my %vars = (); + my $nvars = 1 + int(rand(6)); + for (my $j=0; $j<$nvars; $j++) + { + my $snp = int(rand(2)); + my $alt = random_alt($ref,$snp); + my $var = "$ref>$alt"; + if ( $alt ne '.' && !int(rand(5)) ) # create multiallelic site + { + my $alt2 = random_alt($ref,$snp); + if ( $alt2 ne '.' && $alt ne $alt2 ) + { + $var .= ",$ref>$alt2"; + } + } + $vars{$var} = 1; + } + my $ndup = 1 + int(rand(4)); + for (my $j=0; $j<$ndup; $j++) + { + my @keys = shuffle keys %vars; + push @vcfs, \@keys; + } + } + @vcfs = shuffle @vcfs; + open(my $fh,'>',"$$opts{tmp}/list.txt") or error("$$opts{tmp}/list.txt: $!"); + my %groups = (); + my @group_list = (); + for (my $i=0; $i<@vcfs; $i++) + { + my $vcf = $vcfs[$i]; + my $key = join(';',sort @$vcf); + if ( !exists($groups{$key}) ) + { + push @group_list,$key; + $groups{$key}{vars} = [@$vcf]; + $groups{$key}{key} = $key; + } + push @{$groups{$key}{vcfs}},$i; + save_vcf($opts,$vcf,"$$opts{tmp}/$i.vcf.gz"); + print $fh "$$opts{tmp}/$i.vcf.gz\n"; + } + close($fh); + + my @groups = (); + for my $group (@group_list) { push @groups, $groups{$group}; } + for my $logic (qw(snps indels both snps+ref indels+ref both+ref exact some all)) + #for my $logic (qw(snps)) + { + print STDERR "$FindBin::Bin/test-bcf-sr $$opts{tmp}/list.txt -p $logic > $$opts{tmp}/rmme.bin.out\n" unless !$$opts{verbose}; + cmd("$FindBin::Bin/test-bcf-sr $$opts{tmp}/list.txt -p $logic > $$opts{tmp}/rmme.bin.out"); + + open(my $fh,'>',"$$opts{tmp}/rmme.perl.out") or error("$$opts{tmp}/rmme.perl.out: $!"); + $$opts{fh} = $fh; + $$opts{logic} = $logic; + pair_lines($opts,\@groups); + close($fh) or error("close failed: $$opts{tmp}/rmme.perl.out"); + + check_outputs("$$opts{tmp}/rmme.bin.out","$$opts{tmp}/rmme.perl.out"); + } +} + +sub pair_lines +{ + my ($opts,$groups) = @_; + + #print 'groups: '.Dumper($groups); + + # get a list of all unique variants and their groups + my %vars = (); + my @var_list = (); + for (my $igrp=0; $igrp<@$groups; $igrp++) + { + my $grp = $$groups[$igrp]; + for (my $ivar=0; $ivar<@{$$grp{vars}}; $ivar++) + { + my $var = $$grp{vars}[$ivar]; + if ( !exists($vars{$var}) ) { push @var_list,$var; } # just to keep the order + push @{$vars{$var}}, { igrp=>$igrp, ivar=>$ivar, cnt=>scalar @{$$grp{vcfs}} }; + } + } + + # each variant has a list of groups that it is present in + my @vars = (); + for my $var (@var_list) { push @vars, $vars{$var}; } + + #print STDERR 'unique variants: '.Dumper(\@var_list); + # for (my $i=0; $i<@vars; $i++) + # { + # my $igrp = $vars[$i][0]{igrp}; + # my $jvar = $vars[$i][0]{ivar}; + # my $var = $$groups[$igrp]{vars}[$jvar]; + # print STDERR "$i: $var\n"; + # } + + # initialize variant sets - combinations of compatible variants across multiple reader groups + my @var_sets = (); + for (my $i=0; $i<@vars; $i++) { push @var_sets,[$i]; } + + my @bitmask = (); + my @pmatrix = (); + for (my $iset=0; $iset<@var_sets; $iset++) + { + $pmatrix[$iset] = [(0) x (scalar @$groups)]; + $bitmask[$iset] = 0; + } + my @max; + for (my $iset=0; $iset<@var_sets; $iset++) + { + my $tmp_max = 0; + for my $ivar (@{$var_sets[$iset]}) + { + my $var = $vars[$ivar]; + for my $grp (@$var) + { + my $igrp = $$grp{igrp}; + $pmatrix[$iset][$igrp] += $$grp{cnt}; + if ( $bitmask[$iset] & (1<<$igrp) ) { error("Uh!"); } + $bitmask[$iset] |= 1<<$igrp; + $tmp_max += $$grp{cnt}; + } + } + push @max, $tmp_max; + } + + # pair the lines + while ( @var_sets ) + { + my $imax = 0; + for (my $iset=1; $iset<@var_sets; $iset++) + { + if ( $max[$iset] > $max[$imax] ) { $imax = $iset; } + } + # if ( @var_sets == @vars ) { dump_pmatrix($groups,\@vars,\@var_sets,\@pmatrix,\@bitmask); } + + my $ipair = undef; + my $max_score = 0; + for (my $iset=0; $iset<@var_sets; $iset++) + { + if ( $bitmask[$imax] & $bitmask[$iset] ) { next; } # cannot merge + my $score = pairing_score($opts,$groups,\@vars,$var_sets[$imax],$var_sets[$iset]); + if ( $max_score < $score ) { $max_score = $score; $ipair = $iset; } + } + + # merge rows thus creating a new variant set + if ( defined $ipair && $ipair != $imax ) + { + $imax = merge_rows($groups,\@vars,\@var_sets,\@pmatrix,\@bitmask,\@max,$imax,$ipair); + next; + } + + output_row($opts,$groups,\@vars,\@var_sets,\@pmatrix,\@bitmask,\@max,$imax); + # dump_pmatrix($groups,\@vars,\@var_sets,\@pmatrix,\@bitmask); + } +} + +sub merge_rows +{ + my ($grps,$vars,$var_sets,$pmat,$bitmask,$max,$ivset,$jvset) = @_; + if ( $ivset > $jvset ) { my $tmp = $ivset; $ivset = $jvset; $jvset = $tmp; } + push @{$$var_sets[$ivset]}, @{$$var_sets[$jvset]}; + for (my $igrp=0; $igrp<@{$$pmat[$ivset]}; $igrp++) + { + $$pmat[$ivset][$igrp] += $$pmat[$jvset][$igrp]; + } + $$max[$ivset] += $$max[$jvset]; + $$bitmask[$ivset] |= $$bitmask[$jvset]; + splice(@$var_sets,$jvset,1); + splice(@$pmat,$jvset,1); + splice(@$bitmask,$jvset,1); + splice(@$max,$jvset,1); + return $ivset; +} + +sub output_row +{ + my ($opts,$grps,$vars,$var_sets,$pmat,$bitmask,$max,$ivset) = @_; + my $varset = $$var_sets[$ivset]; + my @tmp = (); + for my $grp (@$grps) + { + for my $vcf (@{$$grp{vcfs}}) { push @tmp, '-'; } + } + for my $idx (@$varset) + { + for my $var (@{$$vars[$idx]}) + { + my $igrp = $$var{igrp}; + my $jvar = $$var{ivar}; + my $str = $$grps[$igrp]{vars}[$jvar]; + $str =~ s/[^>]>//g; + for my $ivcf (@{$$grps[$igrp]{vcfs}}) { $tmp[$ivcf] = $str; } + } + } + print {$$opts{fh}} join("\t",@tmp)."\n"; + splice(@$var_sets,$ivset,1); + splice(@$pmat,$ivset,1); + splice(@$bitmask,$ivset,1); + splice(@$max,$ivset,1); +} + +sub dump_pmatrix +{ + my ($grps,$vars,$var_sets,$pmat,$bitmask) = @_; + for (my $ivset=0; $ivset<@$var_sets; $ivset++) + { + my $varset = $$var_sets[$ivset]; + my @tmp = (); + for my $ivar (@$varset) + { + my $igrp = $$vars[$ivar][0]{igrp}; + my $jvar = $$vars[$ivar][0]{ivar}; + push @tmp, $$grps[$igrp]{vars}[$jvar]; + } + printf STDERR "%-10s",join(',',@tmp); + for (my $igrp=0; $igrp<@{$$pmat[0]}; $igrp++) + { + print STDERR "\t$$pmat[$ivset][$igrp]"; + } + print STDERR "\n"; + } + print STDERR "\n"; +} + +sub var_type +{ + my ($vars) = @_; + my %type = (); + for my $var (split(/,/,$vars)) + { + my ($ref,$alt) = split(/>/,$var); + if ( $ref eq $alt or $alt eq '.' ) { $type{ref} = 1; } + elsif ( length($ref)==length($alt) && length($ref)==1 ) { $type{snp} = 1; } + else { $type{indel} = 1; } + } + return keys %type; +} +sub multi_is_subset +{ + my ($avar,$bvar) = @_; + my %avars = (); + my %bvars = (); + for my $var (split(/,/,$avar)) { $avars{$var} = 1; } + for my $var (split(/,/,$bvar)) { $bvars{$var} = 1; } + for my $var (keys %avars) + { + if ( exists($bvars{$var}) ) { return 1; } + } + for my $var (keys %bvars) + { + if ( exists($avars{$var}) ) { return 1; } + } + return 0; +} +sub multi_is_exact +{ + my ($avar,$bvar) = @_; + my %avars = (); + my %bvars = (); + for my $var (split(/,/,$avar)) { $avars{$var} = 1; } + for my $var (split(/,/,$bvar)) { $bvars{$var} = 1; } + for my $var (keys %avars) + { + if ( !exists($bvars{$var}) ) { return 0; } + } + for my $var (keys %bvars) + { + if ( !exists($avars{$var}) ) { return 0; } + } + return 1; +} +sub pairing_score +{ + my ($opts,$grps,$vars,$avset,$bvset) = @_; + + my $score = {}; + if ( $$opts{logic}=~/both/ or $$opts{logic}=~/snps/ or $$opts{logic}=~/all/ ) + { + $$score{snp}{snp} = 3; + if ( $$opts{logic}=~/ref/ or $$opts{logic}=~/all/ ) { $$score{snp}{ref} = 2; } + } + if ( $$opts{logic}=~/both/ or $$opts{logic}=~/indels/ or $$opts{logic}=~/all/ ) + { + $$score{indel}{indel} = 3; + if ( $$opts{logic}=~/ref/ or $$opts{logic}=~/all/ ) { $$score{indel}{ref} = 2; } + } + if ( $$opts{logic}=~/all/ ) + { + $$score{snp}{indel} = 1; + $$score{indel}{snp} = 1; + } + for my $a (keys %$score) + { + for my $b (keys %{$$score{$a}}) + { + $$score{$b}{$a} = $$score{$a}{$b}; + } + } + + my $max_int = 0xFFFFFFFF; + my $min = $max_int; + for my $ia (@$avset) + { + for my $ib (@$bvset) + { + my $avar = $$grps[ $$vars[$ia][0]{igrp} ]{vars}[ $$vars[$ia][0]{ivar} ]; + my $bvar = $$grps[ $$vars[$ib][0]{igrp} ]{vars}[ $$vars[$ib][0]{ivar} ]; + + if ( $avar eq $bvar ) { return $max_int; } + if ( $$opts{logic} eq 'exact' ) + { + if ( multi_is_exact($avar,$bvar) ) { return $max_int; } + next; + } + elsif ( multi_is_subset($avar,$bvar) ) { return $max_int; } + + my @atype = var_type($avar); + my @btype = var_type($bvar); + my $max = 0; + for my $a (@atype) + { + for my $b (@btype) + { + if ( !exists($$score{$a}{$b}) ) { next; } + if ( $max < $$score{$a}{$b} ) { $max = $$score{$a}{$b}; } + } + } + if ( !$max ) { return 0; } # some of the variants in the two groups are not compatible + if ( $min > $max ) { $min = $max; } + } + } + if ( $$opts{logic} eq 'exact' ) { return 0; } + + my $cnt = 0; + for my $ivar (@$avset,@$bvset) + { + my $var = $$vars[$ivar]; + for my $grp (@$var) + { + $cnt += $$grp{cnt}; + } + } + return (1<<(28+$min)) + $cnt; +} diff --git a/src/htslib-1.21/test/test-bcf-translate.c b/src/htslib-1.21/test/test-bcf-translate.c new file mode 100644 index 0000000..263e71e --- /dev/null +++ b/src/htslib-1.21/test/test-bcf-translate.c @@ -0,0 +1,203 @@ +/* test/test-bcf-translate.c + + Copyright (C) 2017-2018 Genome Research Ltd. + + Author: Petr Danecek + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +#include + +#include + +#include "../htslib/vcf.h" + +void HTS_FORMAT(HTS_PRINTF_FMT, 1, 2) error(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); + exit(-1); +} + +int main(int argc, char **argv) +{ + char *fname = argc>1 ? argv[1] : "/dev/null"; + htsFile *fp = hts_open(fname, "w"); + bcf_hdr_t *hdr1, *hdr2; + + hdr1 = bcf_hdr_init("w"); + hdr2 = bcf_hdr_init("w"); + + // Add two shared and two private annotations + bcf_hdr_append(hdr1, "##contig="); + bcf_hdr_append(hdr1, "##contig="); + bcf_hdr_append(hdr2, "##contig="); + bcf_hdr_append(hdr2, "##contig="); + bcf_hdr_append(hdr1, "##FILTER="); + bcf_hdr_append(hdr1, "##FILTER="); + bcf_hdr_append(hdr1, "##FILTER="); + bcf_hdr_append(hdr2, "##FILTER="); + bcf_hdr_append(hdr2, "##FILTER="); + bcf_hdr_append(hdr2, "##FILTER="); + bcf_hdr_append(hdr1, "##INFO="); + bcf_hdr_append(hdr1, "##INFO="); + bcf_hdr_append(hdr1, "##INFO="); + bcf_hdr_append(hdr2, "##INFO="); + bcf_hdr_append(hdr2, "##INFO="); + bcf_hdr_append(hdr2, "##INFO="); + bcf_hdr_append(hdr1, "##FORMAT="); + bcf_hdr_append(hdr1, "##FORMAT="); + bcf_hdr_append(hdr1, "##FORMAT="); + bcf_hdr_append(hdr2, "##FORMAT="); + bcf_hdr_append(hdr2, "##FORMAT="); + bcf_hdr_append(hdr2, "##FORMAT="); + bcf_hdr_add_sample(hdr1,"SMPL1"); + bcf_hdr_add_sample(hdr1,"SMPL2"); + bcf_hdr_add_sample(hdr2,"SMPL1"); + bcf_hdr_add_sample(hdr2,"SMPL2"); + if (bcf_hdr_sync(hdr1) < 0) error("bcf_hdr_sync(hdr1)"); + if (bcf_hdr_sync(hdr2) < 0) error("bcf_hdr_sync(hdr2)"); + + hdr2 = bcf_hdr_merge(hdr2,hdr1); + if (bcf_hdr_sync(hdr2) < 0) error("bcf_hdr_sync(hdr2) after merge"); + if ( bcf_hdr_write(fp, hdr2)!=0 ) error("Failed to write to %s\n", fname); + + bcf1_t *rec = bcf_init1(); + rec->rid = bcf_hdr_name2id(hdr1, "1"); + rec->pos = 0; + bcf_update_alleles_str(hdr1, rec, "G,A"); + int32_t tmpi[3]; + tmpi[0] = bcf_hdr_id2int(hdr1, BCF_DT_ID, "FLT1"); + tmpi[1] = bcf_hdr_id2int(hdr1, BCF_DT_ID, "FLT2"); + tmpi[2] = bcf_hdr_id2int(hdr1, BCF_DT_ID, "FLT3"); + bcf_update_filter(hdr1, rec, tmpi, 3); + tmpi[0] = 1; bcf_update_info_int32(hdr1, rec, "INF1", tmpi, 1); + tmpi[0] = 2; bcf_update_info_int32(hdr1, rec, "INF2", tmpi, 1); + tmpi[0] = 3; bcf_update_info_int32(hdr1, rec, "INF3", tmpi, 1); + tmpi[0] = tmpi[1] = 1; bcf_update_format_int32(hdr1, rec, "FMT1", tmpi, 2); + tmpi[0] = tmpi[1] = 2; bcf_update_format_int32(hdr1, rec, "FMT2", tmpi, 2); + tmpi[0] = tmpi[1] = 3; bcf_update_format_int32(hdr1, rec, "FMT3", tmpi, 2); + + bcf_remove_filter(hdr1, rec, bcf_hdr_id2int(hdr1, BCF_DT_ID, "FLT2"), 0); + bcf_update_info_int32(hdr1, rec, "INF2", NULL, 0); + bcf_update_format_int32(hdr1, rec, "FMT2", NULL, 0); + + bcf_translate(hdr2, hdr1, rec); + if ( bcf_write(fp, hdr2, rec)!=0 ) error("Failed to write to %s\n", fname); + + // Clean + bcf_destroy1(rec); + bcf_hdr_destroy(hdr1); + bcf_hdr_destroy(hdr2); + int ret; + if ( (ret=hts_close(fp)) ) + { + fprintf(stderr,"hts_close(%s): non-zero status %d\n",fname,ret); + exit(ret); + } + return 0; +} + + + + // // Create VCF header + // kstring_t str = {0,0,0}; + // bcf_hdr_add_sample(hdr, "NA00003"); + // bcf_hdr_add_sample(hdr, NULL); // to update internal structures + // bcf_hdr_write(fp, hdr); + // // Add a record + // // 20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,. + // // .. CHROM + // rec->rid = bcf_hdr_name2id(hdr, "20"); + // // .. POS + // rec->pos = 14369; + // // .. ID + // bcf_update_id(hdr, rec, "rs6054257"); + // // .. REF and ALT + // bcf_update_alleles_str(hdr, rec, "G,A"); + // // .. QUAL + // rec->qual = 29; + // // .. FILTER + // int32_t tmpi = bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS"); + // bcf_update_filter(hdr, rec, &tmpi, 1); + // // .. INFO + // tmpi = 3; + // bcf_update_info_int32(hdr, rec, "NS", &tmpi, 1); + // tmpi = 14; + // bcf_update_info_int32(hdr, rec, "DP", &tmpi, 1); + // float tmpf = 0.5; + // bcf_update_info_float(hdr, rec, "AF", &tmpf, 1); + // bcf_update_info_flag(hdr, rec, "DB", NULL, 1); + // bcf_update_info_flag(hdr, rec, "H2", NULL, 1); + // // .. FORMAT + // int32_t *tmpia = (int*)malloc(bcf_hdr_nsamples(hdr)*2*sizeof(int)); + // tmpia[0] = bcf_gt_phased(0); + // tmpia[1] = bcf_gt_phased(0); + // tmpia[2] = bcf_gt_phased(1); + // tmpia[3] = bcf_gt_phased(0); + // tmpia[4] = bcf_gt_unphased(1); + // tmpia[5] = bcf_gt_unphased(1); + // bcf_update_genotypes(hdr, rec, tmpia, bcf_hdr_nsamples(hdr)*2); + // tmpia[0] = 48; + // tmpia[1] = 48; + // tmpia[2] = 43; + // bcf_update_format_int32(hdr, rec, "GQ", tmpia, bcf_hdr_nsamples(hdr)); + // tmpia[0] = 1; + // tmpia[1] = 8; + // tmpia[2] = 5; + // bcf_update_format_int32(hdr, rec, "DP", tmpia, bcf_hdr_nsamples(hdr)); + // tmpia[0] = 51; + // tmpia[1] = 51; + // tmpia[2] = 51; + // tmpia[3] = 51; + // tmpia[4] = bcf_int32_missing; + // tmpia[5] = bcf_int32_missing; + // bcf_update_format_int32(hdr, rec, "HQ", tmpia, bcf_hdr_nsamples(hdr)*2); + // char *tmp_str[] = {"String1","SomeOtherString2","YetAnotherString3"}; + // bcf_update_format_string(hdr, rec, "TS", (const char**)tmp_str, 3); + // bcf_write1(fp, hdr, rec); + // // 20 1110696 . A G,T 67 . NS=2;DP=10;AF=0.333,.;AA=T;DB GT 2 1 ./. + // bcf_clear1(rec); + // rec->rid = bcf_hdr_name2id(hdr, "20"); + // rec->pos = 1110695; + // bcf_update_alleles_str(hdr, rec, "A,G,T"); + // rec->qual = 67; + // tmpi = 2; + // bcf_update_info_int32(hdr, rec, "NS", &tmpi, 1); + // tmpi = 10; + // bcf_update_info_int32(hdr, rec, "DP", &tmpi, 1); + // float *tmpfa = (float*)malloc(2*sizeof(float)); + // tmpfa[0] = 0.333; + // bcf_float_set_missing(tmpfa[1]); + // bcf_update_info_float(hdr, rec, "AF", tmpfa, 2); + // bcf_update_info_string(hdr, rec, "AA", "T"); + // bcf_update_info_flag(hdr, rec, "DB", NULL, 1); + // tmpia[0] = bcf_gt_phased(2); + // tmpia[1] = bcf_int32_vector_end; + // tmpia[2] = bcf_gt_phased(1); + // tmpia[3] = bcf_int32_vector_end; + // tmpia[4] = bcf_gt_missing; + // tmpia[5] = bcf_gt_missing; + // bcf_update_genotypes(hdr, rec, tmpia, bcf_hdr_nsamples(hdr)*2); + // bcf_write1(fp, hdr, rec); + // free(tmpia); + // free(tmpfa); diff --git a/src/htslib-1.21/test/test-bcf-translate.out b/src/htslib-1.21/test/test-bcf-translate.out new file mode 100644 index 0000000..e021f43 --- /dev/null +++ b/src/htslib-1.21/test/test-bcf-translate.out @@ -0,0 +1,18 @@ +##fileformat=VCFv4.2 +##FILTER= +##contig= +##contig= +##FILTER= +##FILTER= +##FILTER= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FILTER= +##INFO= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SMPL1 SMPL2 +1 1 . G A 0 FLT1;FLT3 INF1=1;INF3=3 FMT1:FMT3 1:3 1:3 diff --git a/src/htslib-1.21/test/test-bcf_set_variant_type.c b/src/htslib-1.21/test/test-bcf_set_variant_type.c new file mode 100644 index 0000000..eb12ecd --- /dev/null +++ b/src/htslib-1.21/test/test-bcf_set_variant_type.c @@ -0,0 +1,135 @@ +/* test/test-bcf_set_variant_type.c -- bcf_set_variant_type test harness. + + Copyright (C) 2022 Genome Research Ltd. + + Author: Martin Pollard + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include + +#include + +#include "../htslib/hts.h" +#include "../vcf.c" + +void HTS_FORMAT(HTS_PRINTF_FMT, 1, 2) error(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); + if (strrchr(format, '\n') == NULL) fputc('\n', stderr); + exit(-1); +} + +static void test_bcf_set_variant_type(void) +{ + // Test SNVs + bcf_variant_t var1; + bcf_set_variant_type("A", "T", &var1); + if ( var1.type != VCF_SNP) + { + error("A -> T was not detected as a SNP"); + } + + // Test INDEL + bcf_variant_t var2a; + bcf_set_variant_type("A", "AA", &var2a); + if ( var2a.type != (VCF_INDEL|VCF_INS) ) + { + error("A -> AA was not detected as an INDEL"); + } + bcf_variant_t var2b; + bcf_set_variant_type("AA", "A", &var2b); + if ( var2b.type != (VCF_INDEL|VCF_DEL) ) + { + error("AA -> A was not detected as a INDEL"); + } + + // Test breakends + bcf_variant_t var3a; + bcf_set_variant_type("N", "N]16:33625444]", &var3a); + if ( var3a.type != VCF_BND) + { + error("N]16:33625444] was not detected as a breakend"); + } + + bcf_variant_t var3b; + bcf_set_variant_type("N", "N[16:33625444[", &var3b); + if (var3b.type != VCF_BND) + { + error("N[16:33625444[ was not detected as a breakend"); + } + + bcf_variant_t var3c; + bcf_set_variant_type("N", "]16:33625444]N", &var3c); + if ( var3c.type != VCF_BND) + { + error("]16:33625444]N was not detected as a breakend"); + } + + bcf_variant_t var3d; + bcf_set_variant_type("N", "[16:33625444[N", &var3d); + if ( var3d.type != VCF_BND) + { + error("[16:33625444[N was not detected as a breakend"); + } + // Test special reference alleles + bcf_variant_t var4a; + bcf_set_variant_type("A", "", &var4a); + if ( var4a.type != VCF_REF) + { + error(" was not detected as a special reference allele"); + } + bcf_variant_t var4b; + bcf_set_variant_type("A", "<*>", &var4b); + if ( var4b.type != VCF_REF) + { + error("<*> was not detected as a special reference allele"); + } + // Test MNP + bcf_variant_t var5; + bcf_set_variant_type("AA", "TT", &var5); + if ( var5.type != VCF_MNP) + { + error("AA->TT was not detected as a MNP"); + } + // Test Overlapping allele + bcf_variant_t var6; + bcf_set_variant_type("A", "*", &var6); + if ( var6.type != VCF_OVERLAP) + { + error("A->* was not detected as an overlap"); + } + // Test . + bcf_variant_t var7; + bcf_set_variant_type("A", ".", &var7); + if ( var7.type != VCF_REF) + { + error("A->. was not detected as a special reference allele"); + } +} + +int main(int argc, char **argv) +{ + test_bcf_set_variant_type(); + return 0; +} + diff --git a/src/htslib-1.21/test/test-logging.pl b/src/htslib-1.21/test/test-logging.pl new file mode 100755 index 0000000..2f22560 --- /dev/null +++ b/src/htslib-1.21/test/test-logging.pl @@ -0,0 +1,96 @@ +#!/usr/bin/env perl +# +# Copyright (C) 2017 Genome Research Ltd. +# +# Author: Anders Kaplan +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +use strict; + +my $log_message_count = 0; +my $file_count = 0; +my $failure_count = 0; + +sub check_log_message +{ + my ($message, $filename, $line_num) = @_; + $log_message_count++; + + unless ($message =~ /^\"([A-Z!-@]|%s)/) + { + print "$filename line $line_num:\n"; + print "Log message should begin with a capital letter: $message.\n"; + $failure_count++; + } + + if ($message =~ /\\n\"$/) + { + print "$filename line $line_num:\n"; + print "Log message should NOT end with a newline: $message.\n"; + $failure_count++; + } + + if ($message =~ /\.\"$/) + { + print "$filename line $line_num:\n"; + print "Log message should NOT end with a full stop: $message.\n"; + $failure_count++; + } +} + +sub check_file +{ + my ($filename) = @_; + $file_count++; + + open(my $fh, '<', $filename) or die "Could not open $filename."; + my $line_num = 1; + my $line = <$fh>; + while ($line) + { + if ($line =~ /hts_log_\w+\s*\(\s*(\"[^\"]*\")/) + { + unless ($line =~ /\\n\"\s*$/) # string constant continues on next line + { + check_log_message($1, $filename, $line_num); + } + } + + $line_num++; + $line = <$fh>; + } +} + +sub check_dir +{ + my ($path) = @_; + foreach my $filename (glob("$path/*.c")) + { + check_file($filename); + } +} + +check_dir(".."); +check_dir("../cram"); + +print "$file_count files scanned\n"; +print "$log_message_count log messages checked\n"; +print "$failure_count errors found\n"; +exit($failure_count > 0); diff --git a/src/htslib-1.21/test/test-parse-reg.c b/src/htslib-1.21/test/test-parse-reg.c new file mode 100644 index 0000000..1703723 --- /dev/null +++ b/src/htslib-1.21/test/test-parse-reg.c @@ -0,0 +1,204 @@ +/* + Copyright (C) 2018-2019 Genome Research Ltd. + + Author: James Bonfield + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +/* + Test region description parser. + Usage: test-parse-reg [-c] file.bam region + test-parse-reg [-c] -m file.bam region,region... + test-parse-reg -t + + -c is chr:pos is a single base coordinate, ie chr:pos-pos, + otherwise it is chr:pos- + -m is multi-region list. + -t runs built-in tests + + ./test/test-parse-reg -c -m test/colons.bam "{chr1:100-200},{chr1}:100-200,{chr1:100-200}:100,{chr1,chr3},chr1:" +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include "../htslib/hts.h" +#include "../htslib/sam.h" + +void reg_expected(sam_hdr_t *hdr, const char *reg, int flags, + char *reg_exp, int tid_exp, hts_pos_t beg_exp, hts_pos_t end_exp) { + const char *reg_out; + int tid_out = -1; + hts_pos_t beg_out = -1, end_out = -1; + + reg_out = sam_parse_region(hdr, reg, &tid_out, &beg_out, &end_out, flags); + + if ((reg_out != NULL) != (reg_exp != NULL) || + (reg_out && reg_exp && strcmp(reg_out, reg_exp) != 0) || + (reg_exp && tid_out != tid_exp) || + (reg_exp && beg_out != beg_exp) || + (reg_exp && end_out != end_exp)) { + fprintf(stderr, "Parsing \"%s\" expected return \"%s\", %d:%"PRIhts_pos"-%"PRIhts_pos", " + "but got \"%s\", %d:%"PRIhts_pos"-%"PRIhts_pos"\n", + reg, + reg_exp?reg_exp:"(null)", tid_exp, beg_exp, end_exp, + reg_out?reg_out:"(null)", tid_out, beg_out, end_out); + exit(1); + } +} + +int reg_test(char *fn) { + samFile *fp; + sam_hdr_t *hdr; + + if (!(fp = sam_open(fn, "r"))) + return 1; + + if (!(hdr = sam_hdr_read(fp))) + return 1; + + // 0 chr1 + // 1 chr1:100 + // 2 chr1:100-200 + // 3 chr2:100-200 + // 4 chr3 + // 5 chr1,chr3 + + // Check range extensions. + reg_expected(hdr, "chr1", 0, "", 0, 0, HTS_POS_MAX); + reg_expected(hdr, "chr1:50", 0, "", 0, 49, HTS_POS_MAX); + reg_expected(hdr, "chr1:50", HTS_PARSE_ONE_COORD, "", 0, 49, 50); + reg_expected(hdr, "chr1:50-100", 0, "", 0, 49, 100); + reg_expected(hdr, "chr1:50-", 0, "", 0, 49, HTS_POS_MAX); + reg_expected(hdr, "chr1:-50", 0, "", 0, 0, 50); + + // Check quoting + fprintf(stderr, "Expected error: "); + reg_expected(hdr, "chr1:100-200", 0, NULL, 0, 0, 0); // ambiguous + reg_expected(hdr, "{chr1}:100-200", 0, "", 0, 99, 200); + reg_expected(hdr, "{chr1:100-200}", 0, "", 2, 0, HTS_POS_MAX); + reg_expected(hdr, "{chr1:100-200}:100-200", 0, "", 2, 99, 200); + reg_expected(hdr, "{chr2:100-200}:100-200", 0, "", 3, 99, 200); + reg_expected(hdr, "chr2:100-200:100-200", 0, "", 3, 99, 200); + reg_expected(hdr, "chr2:100-200", 0, "", 3, 0, HTS_POS_MAX); + + // Check numerics + reg_expected(hdr, "chr3", 0, "", 4, 0, HTS_POS_MAX); + reg_expected(hdr, "chr3:", 0, "", 4, 0, HTS_POS_MAX); + reg_expected(hdr, "chr3:1000-1500", 0, "", 4, 999, 1500); + reg_expected(hdr, "chr3:1,000-1,500", 0, "", 4, 999, 1500); + reg_expected(hdr, "chr3:1k-1.5K", 0, "", 4, 999, 1500); + reg_expected(hdr, "chr3:1e3-1.5e3", 0, "", 4, 999, 1500); + reg_expected(hdr, "chr3:1e3-15e2", 0, "", 4, 999, 1500); + + // Check list mode + reg_expected(hdr, "chr1,chr3", HTS_PARSE_LIST, "chr3", 0, 0, HTS_POS_MAX); + fprintf(stderr, "Expected error: "); + reg_expected(hdr, "chr1:100-200,chr3", HTS_PARSE_LIST, NULL, 0, 0, 0); // ambiguous + reg_expected(hdr, "{chr1,chr3}", HTS_PARSE_LIST, "", 5, 0, HTS_POS_MAX); + reg_expected(hdr, "{chr1,chr3},chr1", HTS_PARSE_LIST, "chr1", 5, 0, HTS_POS_MAX); + // incorrect usage; first reg is valid (but not what user expects). + reg_expected(hdr, "chr3:1,000-1,500", HTS_PARSE_LIST | HTS_PARSE_ONE_COORD, "000-1,500", 4, 0, 1); + + // More expected failures + reg_expected(hdr, "chr2", 0, NULL, 0, 0, 0); + reg_expected(hdr, "chr1,", 0, NULL, 0, 0, 0); + fprintf(stderr, "Expected error: "); + reg_expected(hdr, "{chr1", 0, NULL, 0, 0, 0); + reg_expected(hdr, "chr1:10-10", 0, "", 0, 9, 10); // OK + reg_expected(hdr, "chr1:10-9", 0, NULL, 0, 0, 0); // Issue#353 + fprintf(stderr, "Expected error: "); + reg_expected(hdr, "chr1:x", 0, NULL, 0, 0, 0); + fprintf(stderr, "Expected error: "); + reg_expected(hdr, "chr1:1-y", 0, NULL, 0, 0, 0); + fprintf(stderr, "Expected error: "); + reg_expected(hdr, "chr1:1,chr3", 0, NULL, 0, 0, 0); + + sam_hdr_destroy(hdr); + sam_close(fp); + + exit(0); +} + +int main(int argc, char **argv) { + sam_hdr_t *hdr; + samFile *fp; + int flags = 0; + + while (argc > 1) { + if (strcmp(argv[1], "-m") == 0) { + flags |= HTS_PARSE_LIST; + argc--; argv++; + continue; + } + + if (strcmp(argv[1], "-c") == 0) { + flags |= HTS_PARSE_ONE_COORD; + argc--; argv++; + continue; + } + + // Automatic mode for test harness + if (strcmp(argv[1], "-t") == 0) + reg_test(argv[2]); + + break; + } + + // Interactive mode for debugging + if (argc != 3) { + fprintf(stderr, "Usage: test-parse-reg [-m] [-c] region[,region]...\n"); + exit(1); + } + + if (!(fp = sam_open(argv[1], "r"))) { + perror(argv[1]); + exit(1); + } + + if (!(hdr = sam_hdr_read(fp))) { + fprintf(stderr, "Couldn't read header\n"); + exit(1); + } + + const char *reg = argv[2]; + while (*reg) { + int tid; + hts_pos_t beg, end; + reg = sam_parse_region(hdr, reg, &tid, &beg, &end, flags); + if (!reg) { + fprintf(stderr, "Failed to parse region\n"); + exit(1); + } + printf("%-20s %12"PRIhts_pos" %12"PRIhts_pos"\n", + tid == -1 ? "*" : hdr->target_name[tid], + beg, end); + } + + sam_hdr_destroy(hdr); + sam_close(fp); + + return 0; +} diff --git a/src/htslib-1.21/test/test-regidx.c b/src/htslib-1.21/test/test-regidx.c new file mode 100644 index 0000000..4ba6237 --- /dev/null +++ b/src/htslib-1.21/test/test-regidx.c @@ -0,0 +1,473 @@ +/* test/test-regidx.c -- Regions index test harness. + + gcc -g -Wall -O0 -I. -I../htslib/ -L../htslib regidx.c -o test-regidx test-regidx.c -lhts + + Copyright (C) 2014,2016,2018, 2020, 2023 Genome Research Ltd. + + Author: Petr Danecek + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../htslib/kstring.h" +#include "../htslib/regidx.h" +#include "../htslib/hts_defs.h" +#include "../textutils_internal.h" + +static int verbose = 0; + +HTS_FORMAT(HTS_PRINTF_FMT, 1, 2) +static void debug(const char *format, ...) +{ + if ( verbose<2 ) return; + va_list ap; + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); +} + +HTS_FORMAT(HTS_PRINTF_FMT, 1, 2) +static void info(const char *format, ...) +{ + if ( verbose<1 ) return; + va_list ap; + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); +} + +HTS_NORETURN HTS_FORMAT(HTS_PRINTF_FMT, 1, 2) +static void error(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); + exit(-1); +} + +int custom_parse(const char *line, char **chr_beg, char **chr_end, hts_pos_t *beg, hts_pos_t *end, void *payload, void *usr) +{ + // Use the standard parser for CHROM,FROM,TO + int i, ret = regidx_parse_tab(line,chr_beg,chr_end,beg,end,NULL,NULL); + if ( ret!=0 ) return ret; + + // Skip the fields that were parsed above + char *ss = (char*) line; + while ( *ss && isspace_c(*ss) ) ss++; + for (i=0; i<3; i++) + { + while ( *ss && !isspace_c(*ss) ) ss++; + if ( !*ss ) return -2; // wrong number of fields + while ( *ss && isspace_c(*ss) ) ss++; + } + if ( !*ss ) return -2; + + // Parse the payload + char *se = ss; + while ( *se && !isspace_c(*se) ) se++; + char **dat = (char**) payload; + *dat = (char*) malloc(se-ss+1); + memcpy(*dat,ss,se-ss+1); + (*dat)[se-ss] = 0; + return 0; +} +void custom_free(void *payload) +{ + char **dat = (char**)payload; + free(*dat); +} + +void test_sequential_access(void) +{ + // Init index with no file name, we will insert the regions manually + regidx_t *idx = regidx_init(NULL,custom_parse,custom_free,sizeof(char*),NULL); + if ( !idx ) error("init failed\n"); + + // Insert regions + kstring_t str = {0,0,0}; + int i, n = 10; + for (i=0; ibeg!=itr->end || itr->beg+1!=10*(i+1) ) error("listing failed, expected %d, found %"PRIhts_pos"\n",10*(i+1),itr->beg+1); + str.l = 0; + ksprintf(&str,"%"PRIhts_pos, itr->beg+1); + if ( strcmp(regitr_payload(itr,char*),str.s) ) error("listing failed, expected payload \"%s\", found \"%s\"\n",str.s,regitr_payload(itr,char*)); + i++; + } + if ( i!=n ) error("Expected %d regions, listed %d\n", n,i); + debug("ok: listed %d regions\n", n); + + // Clean up + regitr_destroy(itr); + regidx_destroy(idx); + free(str.s); +} + +void test_custom_payload(void) +{ + // Init index with no file name, we will insert the regions manually + regidx_t *idx = regidx_init(NULL,custom_parse,custom_free,sizeof(char*),NULL); + if ( !idx ) error("init failed\n"); + + // Insert regions + char *line; + line = "1 10000000 10000000 1:10000000-10000000"; if ( regidx_insert(idx,line)!=0 ) error("insert failed: %s\n", line); + line = "1 20000000 20000001 1:20000000-20000001"; if ( regidx_insert(idx,line)!=0 ) error("insert failed: %s\n", line); + line = "1 20000002 20000002 1:20000002-20000002"; if ( regidx_insert(idx,line)!=0 ) error("insert failed: %s\n", line); + line = "1 30000000 30000000 1:30000000-30000000"; if ( regidx_insert(idx,line)!=0 ) error("insert failed: %s\n", line); + line = "1 8000000000 8000000000 1:8000000000-8000000000"; if ( regidx_insert(idx,line)!=0 ) error("insert failed: %s\n", line); + + // Test + regitr_t *itr = regitr_init(idx); + hts_pos_t from, to; + + from = to = 10000000; + if ( !regidx_overlap(idx,"1",from-1,to-1,itr) ) error("query failed: 1:%"PRIhts_pos"-%"PRIhts_pos"\n",from,to); + if ( strcmp("1:10000000-10000000",regitr_payload(itr,char*)) ) error("query failed: 1:%"PRIhts_pos"-%"PRIhts_pos" vs %s\n", from,to,regitr_payload(itr,char*)); + if ( !regidx_overlap(idx,"1",from-2,to-1,itr) ) error("query failed: 1:%"PRIhts_pos"-%"PRIhts_pos"\n",from-1,to); + if ( !regidx_overlap(idx,"1",from-2,to+3,itr) ) error("query failed: 1:%"PRIhts_pos"-%"PRIhts_pos"\n",from-1,to+2); + if ( regidx_overlap(idx,"1",from-2,to-2,itr) ) error("query failed: 1:%"PRIhts_pos"-%"PRIhts_pos"\n",from-1,to-1); + + from = to = 20000000; + if ( !regidx_overlap(idx,"1",from-1,to-1,itr) ) error("query failed: 1:%"PRIhts_pos"-%"PRIhts_pos"\n",from,to); + + from = to = 20000002; + if ( !regidx_overlap(idx,"1",from-1,to-1,itr) ) error("query failed: 1:%"PRIhts_pos"-%"PRIhts_pos"\n",from,to); + + from = to = 30000000; + if ( !regidx_overlap(idx,"1",from-1,to-1,itr) ) error("query failed: 1:%"PRIhts_pos"-%"PRIhts_pos"\n",from,to); + + from = to = 8000000000; + if ( !regidx_overlap(idx,"1",from-1,to-1,itr) ) error("query failed: 1:%"PRIhts_pos"-%"PRIhts_pos"\n",from,to); + + // This shouldn't bring anything back + from &= 0xffffffffU; + to &= 0xffffffffU; + if ( regidx_overlap(idx,"1",from-1,to-1,itr) ) error("query should not succeed: 1:%"PRIhts_pos"-%"PRIhts_pos"\n",from,to); + + // Clean up + regitr_destroy(itr); + regidx_destroy(idx); +} + +void get_random_region(uint32_t min, uint32_t max, uint32_t *beg, uint32_t *end) +{ + uint64_t b = rand(), e = rand(); + *beg = min + (b * (max-min)) / RAND_MAX; + *end = *beg + (e * (max-*beg)) / RAND_MAX; +} + +void test_random(int nregs, uint32_t min, uint32_t max) +{ + min--; + max--; + + // Init index with no file name, we will insert the regions manually + regidx_t *idx = regidx_init(NULL,custom_parse,custom_free,sizeof(char*),NULL); + if ( !idx ) error("init failed\n"); + + // Test region + uint32_t beg,end; + get_random_region(min,max,&beg,&end); + + // Insert regions + int i, nexp = 0; + kstring_t str = {0,0,0}; + for (i=0; i=beg && b<=end ) nexp++; + } + + // Test + regitr_t *itr = regitr_init(idx); + int nhit = 0, ret = regidx_overlap(idx,"1",beg,end,itr); + if ( nexp && !ret ) error("query failed, expected %d overlap(s), found none: %d-%d\n", nexp,beg+1,end+1); + if ( !nexp && ret ) error("query failed, expected no overlaps, found some: %d-%d\n", beg+1,end+1); + while ( ret && regitr_overlap(itr) ) + { + str.l = 0; + ksprintf(&str,"1:%"PRIhts_pos"-%"PRIhts_pos"",itr->beg+1,itr->end+1); + if ( strcmp(str.s,regitr_payload(itr,char*)) ) + error("query failed, incorrect payload: %s vs %s (%d-%d)\n",str.s,regitr_payload(itr,char*),beg+1,end+1); + if ( itr->beg > end || itr->end < beg ) + error("query failed, incorrect hit: %d-%d vs %"PRIhts_pos"-%"PRIhts_pos", payload %s\n", beg+1,end+1,itr->beg+1,itr->end+1,regitr_payload(itr,char*)); + nhit++; + } + if ( nexp!=nhit ) error("query failed, expected %d overlap(s), found %d: %d-%d\n",nexp,nhit,beg+1,end+1); + debug("ok: found %d overlaps\n", nexp); + + // Clean up + regitr_destroy(itr); + regidx_destroy(idx); + free(str.s); +} +void test_explicit(char *tgt, char *qry, char *exp) +{ + regidx_t *idx = regidx_init(NULL,regidx_parse_reg,NULL,0,NULL); + + char *beg = tgt, *end, *exp_ori = exp; + kstring_t str = {0,0,0}; + while ( *beg ) + { + end = tgt; + while ( *end && *end!=';' ) end++; + str.l = 0; + kputsn(beg, end-beg, &str); + debug("insert: %s\n", str.s); + if ( regidx_insert(idx,str.s)!=0 ) error("insert failed: %s\n", str.s); + beg = *end ? end + 1 : end; + } + + beg = qry; + while ( *beg ) + { + end = qry; + while ( *end && *end!=';' ) end++; + str.l = 0; + kputsn(beg, end-beg, &str); + beg = *end ? end + 1 : end; + + char *chr_beg, *chr_end; + hts_pos_t reg_beg, reg_end; + if ( regidx_parse_reg(str.s, &chr_beg, &chr_end, ®_beg, ®_end, NULL, NULL)!=0 ) error("could not parse: %s in %s\n", str.s, qry); + chr_end[1] = 0; + int hit = regidx_overlap(idx,chr_beg,reg_beg,reg_end,NULL); + if ( *exp=='1' ) + { + if ( !hit ) + { + error("query failed, there should be a hit .. %s:%"PRIhts_pos"-%"PRIhts_pos"\n",chr_beg, reg_beg+1, reg_end+1); + } + else + { + debug("ok: overlap found for %s:%"PRIhts_pos"-%"PRIhts_pos"\n",chr_beg,reg_beg+1,reg_end+1); + } + } + else if ( *exp=='0' ) + { + if ( hit ) + { + error("query failed, there should be no hit .. %s:%"PRIhts_pos"-%"PRIhts_pos"\n",chr_beg,reg_beg+1,reg_end+1); + } + else + { + debug("ok: no overlap found for %s:%"PRIhts_pos"-%"PRIhts_pos"\n",chr_beg,reg_beg+1,reg_end+1); + } + } + else error("could not parse: %s\n", exp_ori); + exp++; + } + + free(str.s); + regidx_destroy(idx); +} + +void create_line_bed(char *line, size_t size, char *chr, int start, int end) +{ + snprintf(line,size,"%s\t%d\t%d\n",chr,start-1,end); +} +void create_line_tab(char *line, size_t size, char *chr, int start, int end) +{ + snprintf(line,size,"%s\t%d\t%d\n",chr,start,end); +} +void create_line_reg(char *line, size_t size, char *chr, int start, int end) +{ + snprintf(line,size,"%s:%d-%d\n",chr,start,end); +} + +typedef void (*set_line_f)(char *line, size_t size, char *chr, int start, int end); + +void test(set_line_f set_line, regidx_parse_f parse) +{ + regidx_t *idx = regidx_init(NULL,parse,NULL,0,NULL); + if ( !idx ) error("init failed\n"); + + char line[250], *chr = "1"; + int i, n = 10, start, end, nhit; + for (i=1; ibeg > end-1 || itr->end < start-1 ) error("query failed, incorrect region: %"PRIhts_pos"-%"PRIhts_pos" for %d-%d\n",itr->beg+1,itr->end+1,start,end); + debug("\t %"PRIhts_pos"-%"PRIhts_pos"\n",itr->beg+1,itr->end+1); + nhit++; + } + if ( nhit!=1 ) error("query failed, expected one hit, found %d: %s:%d-%d\n",nhit,chr,start,end); + + + // one hit + start = end = 10*i+1; + if ( !regidx_overlap(idx,chr,start-1,end-1,itr) ) error("query failed, there should be a hit: %s:%d-%d\n",chr,start,end); + debug("ok: overlap(s) found for %s:%d-%d\n",chr,start,end); + nhit = 0; + while ( regitr_overlap(itr) ) + { + if ( itr->beg > end-1 || itr->end < start-1 ) error("query failed, incorrect region: %"PRIhts_pos"-%"PRIhts_pos" for %d-%d\n",itr->beg+1,itr->end+1,start,end); + debug("\t %"PRIhts_pos"-%"PRIhts_pos"\n",itr->beg+1,itr->end+1); + nhit++; + } + if ( nhit!=1 ) error("query failed, expected one hit, found %d: %s:%d-%d\n",nhit,chr,start,end); + + + // two hits + start = 10*i; end = start+1; + if ( !regidx_overlap(idx,chr,start-1,end-1,itr) ) error("query failed, there should be a hit: %s:%d-%d\n",chr,start,end); + debug("ok: overlap(s) found for %s:%d-%d\n",chr,start,end); + nhit = 0; + while ( regitr_overlap(itr) ) + { + if ( itr->beg > end-1 || itr->end < start-1 ) error("query failed, incorrect region: %"PRIhts_pos"-%"PRIhts_pos" for %d-%d\n",itr->beg+1,itr->end+1,start,end); + debug("\t %"PRIhts_pos"-%"PRIhts_pos"\n",itr->beg+1,itr->end+1); + nhit++; + } + if ( nhit!=2 ) error("query failed, expected two hits, found %d: %s:%d-%d\n",nhit,chr,start,end); + + // fully contained interval, one hit + start = 20000*i - 5000; end = 20000*i + 3000; + set_line(line,sizeof(line),chr,start,end); + if ( !regidx_overlap(idx,chr,start-1,end-1,itr) ) error("query failed, there should be a hit: %s:%d-%d\n",chr,start,end); + debug("ok: overlap(s) found for %s:%d-%d\n",chr,start,end); + nhit = 0; + while ( regitr_overlap(itr) ) + { + if ( itr->beg > end-1 || itr->end < start-1 ) error("query failed, incorrect region: %"PRIhts_pos"-%"PRIhts_pos" for %d-%d\n",itr->beg+1,itr->end+1,start,end); + debug("\t %"PRIhts_pos"-%"PRIhts_pos"\n",itr->beg+1,itr->end+1); + nhit++; + } + if ( nhit!=1 ) error("query failed, expected one hit, found %d: %s:%d-%d\n",nhit,chr,start,end); + } + regitr_destroy(itr); + regidx_destroy(idx); +} + +static void usage(void) +{ + fprintf(stderr, "Usage: test-regidx [OPTIONS]\n"); + fprintf(stderr, "Options:\n"); + fprintf(stderr, " -h, --help this help message\n"); + fprintf(stderr, " -s, --seed random seed\n"); + fprintf(stderr, " -v, --verbose increase verbosity by giving multiple times\n"); + + exit(1); +} + +int main(int argc, char **argv) +{ + static struct option loptions[] = + { + {"help",0,0,'h'}, + {"verbose",0,0,'v'}, + {"seed",1,0,'s'}, + {0,0,0,0} + }; + int c; + int seed = (int)time(NULL); + while ((c = getopt_long(argc, argv, "hvs:",loptions,NULL)) >= 0) + { + switch (c) + { + case 's': seed = atoi(optarg); break; + case 'v': verbose++; break; + default: usage(); break; + } + } + + info("Testing sequential access\n"); + test_sequential_access(); + + info("Testing TAB\n"); + test(create_line_tab,regidx_parse_tab); + + info("Testing REG\n"); + test(create_line_reg,regidx_parse_reg); + + info("Testing BED\n"); + test(create_line_bed,regidx_parse_bed); + + info("Testing custom payload\n"); + test_custom_payload(); + + info("Testing cases encountered in past\n"); + test_explicit("12:2064519-2064763","12:2064488-2067434","1"); + + int i, ntest = 1000, nreg = 50; + srand(seed); + info("%d randomized tests, %d regions per test. Random seed is %d\n", ntest,nreg,seed); + for (i=0; i + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include + +#include +#include +#include + +#include "../htslib/hts.h" +#include "../htslib/vcf.h" +#include "../htslib/kstring.h" +#include "../htslib/kseq.h" + +void HTS_FORMAT(HTS_PRINTF_FMT, 1, 2) error(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); + if (strrchr(format, '\n') == NULL) fputc('\n', stderr); + exit(-1); +} + +#define STRINGIFY(x) #x +#define check0(x) ((x) == 0 ? (void) 0 : error("Failed: %s", STRINGIFY(x))) + +static int check_alleles(bcf1_t *rec, const char **alleles, int num) { + int i; + if (rec->n_allele != num) { + fprintf(stderr, "Wrong number of alleles - expected %d, got %d\n", + num, rec->n_allele); + return -1; + } + if (bcf_unpack(rec, BCF_UN_STR) != 0) + return -1; + for (i = 0; i < num; i++) { + if (0 != strcmp(alleles[i], rec->d.allele[i])) { + fprintf(stderr, + "Mismatch for allele %d : expected '%s' got '%s'\n", + i, alleles[i], rec->d.allele[i]); + return -1; + } + } + return 0; +} + +static void test_update_alleles(bcf_hdr_t *hdr, bcf1_t *rec) +{ + // Exercise bcf_update_alleles() a bit + const char *alleles1[2] = { "G", "A" }; + const char *alleles2[3] = { "C", "TGCA", "CATG" }; +#define rep10(x) x x x x x x x x x x + const char *alleles3[3] = { rep10("ATTCTAGATC"), "TGCA", + rep10("CTATTATCTCTAATGACATG") }; +#undef rep10 + const char *alleles4[3] = { alleles3[2], NULL, alleles3[0] }; + // Add some alleles + check0(bcf_update_alleles(hdr, rec, alleles1, 2)); + check0(check_alleles(rec, alleles1, 2)); + // Erase them + check0(bcf_update_alleles(hdr, rec, NULL, 0)); + check0(check_alleles(rec, NULL, 0)); + // Expand to three + check0(bcf_update_alleles(hdr, rec, alleles2, 3)); + check0(check_alleles(rec, alleles2, 3)); + // Now try some bigger ones (should force a realloc) + check0(bcf_update_alleles(hdr, rec, alleles3, 3)); + check0(check_alleles(rec, alleles3, 3)); + // Ensure it works even if one of the alleles points into the + // existing structure + alleles4[1] = rec->d.allele[1]; + check0(bcf_update_alleles(hdr, rec, alleles4, 3)); + alleles4[1] = alleles3[1]; // Will have been clobbered by the update + check0(check_alleles(rec, alleles4, 3)); + // Ensure it works when the alleles point into the existing data, + // rec->d.allele is used to define the input array and the + // order of the entries is changed. The result of this should + // be the same as alleles2. + char *tmp = rec->d.allele[0] + strlen(rec->d.allele[0]) - 4; + rec->d.allele[0] = rec->d.allele[2] + strlen(rec->d.allele[2]) - 1; + rec->d.allele[2] = tmp; + check0(bcf_update_alleles(hdr, rec, (const char **) rec->d.allele, 3)); + check0(check_alleles(rec, alleles2, 3)); +} + +void write_bcf(char *fname) +{ + // Init + htsFile *fp = hts_open(fname,"wb"); + if (!fp) error("Failed to open \"%s\" : %s", fname, strerror(errno)); + bcf_hdr_t *hdr = bcf_hdr_init("w"); + if (!hdr) error("bcf_hdr_init : %s", strerror(errno)); + bcf1_t *rec = bcf_init1(); + if (!rec) error("bcf_init1 : %s", strerror(errno)); + + // Check no-op on fresh bcf1_t + check0(bcf_update_alleles(hdr, rec, NULL, 0)); + + // Create VCF header + kstring_t str = {0,0,0}; + check0(bcf_hdr_append(hdr, "##fileDate=20090805")); + check0(bcf_hdr_append(hdr, "##FORMAT=")); + check0(bcf_hdr_append(hdr, "##INFO=")); + check0(bcf_hdr_append(hdr, "##FILTER=")); + check0(bcf_hdr_append(hdr, "##unused=")); + check0(bcf_hdr_append(hdr, "##unused=")); + check0(bcf_hdr_append(hdr, "##unused=unformatted text 1")); + check0(bcf_hdr_append(hdr, "##unused=unformatted text 2")); + check0(bcf_hdr_append(hdr, "##contig=")); + check0(bcf_hdr_append(hdr, "##source=myImputationProgramV3.1")); + check0(bcf_hdr_append(hdr, "##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta")); + check0(bcf_hdr_append(hdr, "##contig=")); + check0(bcf_hdr_append(hdr, "##phasing=partial")); + check0(bcf_hdr_append(hdr, "##INFO=")); + check0(bcf_hdr_append(hdr, "##INFO=")); + check0(bcf_hdr_append(hdr, "##INFO=")); + check0(bcf_hdr_append(hdr, "##INFO=")); + check0(bcf_hdr_append(hdr, "##INFO=")); + check0(bcf_hdr_append(hdr, "##INFO=")); + check0(bcf_hdr_append(hdr, "##INFO=")); + check0(bcf_hdr_append(hdr, "##FILTER=")); + check0(bcf_hdr_append(hdr, "##FILTER=")); + check0(bcf_hdr_append(hdr, "##FORMAT=")); + check0(bcf_hdr_append(hdr, "##FORMAT=")); + check0(bcf_hdr_append(hdr, "##FORMAT=")); + check0(bcf_hdr_append(hdr, "##FORMAT=")); + check0(bcf_hdr_append(hdr, "##FORMAT=")); + + // Try a few header modifications + bcf_hdr_remove(hdr, BCF_HL_CTG, "Unused"); + check0(bcf_hdr_append(hdr, "##contig=")); + bcf_hdr_remove(hdr, BCF_HL_FMT, "TS"); + check0(bcf_hdr_append(hdr, "##FORMAT=")); + bcf_hdr_remove(hdr, BCF_HL_INFO, "NEG"); + check0(bcf_hdr_append(hdr, "##INFO=")); + bcf_hdr_remove(hdr, BCF_HL_FLT, "s50"); + check0(bcf_hdr_append(hdr, "##FILTER=")); + + check0(bcf_hdr_add_sample(hdr, "NA00001")); + check0(bcf_hdr_add_sample(hdr, "NA00002")); + check0(bcf_hdr_add_sample(hdr, "NA00003")); + check0(bcf_hdr_add_sample(hdr, NULL)); // to update internal structures + if ( bcf_hdr_write(fp, hdr)!=0 ) error("Failed to write to %s\n", fname); + + + // Add a record + // 20 14370 rs6054257 G A 29 PASS NS=3;DP=14;NEG=-127;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,. + // .. CHROM + rec->rid = bcf_hdr_name2id(hdr, "20"); + // .. POS + rec->pos = 14369; + // .. ID + check0(bcf_update_id(hdr, rec, "rs6054257")); + // .. REF and ALT + test_update_alleles(hdr, rec); + const char *alleles[2] = { "G", "A" }; + check0(bcf_update_alleles_str(hdr, rec, "G,A")); + check0(check_alleles(rec, alleles, 2)); + // .. QUAL + rec->qual = 29; + // .. FILTER + int32_t tmpi = bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS"); + check0(bcf_update_filter(hdr, rec, &tmpi, 1)); + // .. INFO + tmpi = 3; + check0(bcf_update_info_int32(hdr, rec, "NS", &tmpi, 1)); + tmpi = 500; + check0(bcf_update_info_int32(hdr, rec, "DP", &tmpi, 1)); + tmpi = 100000; + check0(bcf_update_info_int32(hdr, rec, "DP", &tmpi, 1)); + tmpi = 14; + check0(bcf_update_info_int32(hdr, rec, "DP", &tmpi, 1)); + tmpi = -127; + check0(bcf_update_info_int32(hdr, rec, "NEG", &tmpi, 1)); + float tmpf = 0.5; + check0(bcf_update_info_float(hdr, rec, "AF", &tmpf, 1)); + check0(bcf_update_info_flag(hdr, rec, "DB", NULL, 1)); + check0(bcf_update_info_flag(hdr, rec, "H2", NULL, 1)); + // .. FORMAT + int32_t *tmpia = (int*)malloc(bcf_hdr_nsamples(hdr)*2*sizeof(int)); + tmpia[0] = bcf_gt_phased(0); + tmpia[1] = bcf_gt_phased(0); + tmpia[2] = bcf_gt_phased(1); + tmpia[3] = bcf_gt_phased(0); + tmpia[4] = bcf_gt_unphased(1); + tmpia[5] = bcf_gt_unphased(1); + check0(bcf_update_genotypes(hdr, rec, tmpia, bcf_hdr_nsamples(hdr)*2)); + tmpia[0] = 48; + tmpia[1] = 48; + tmpia[2] = 43; + check0(bcf_update_format_int32(hdr, rec, "GQ", tmpia, bcf_hdr_nsamples(hdr))); + tmpia[0] = 0; + tmpia[1] = 0; + tmpia[2] = 1; + check0(bcf_update_format_int32(hdr, rec, "DP", tmpia, bcf_hdr_nsamples(hdr))); + tmpia[0] = 1; + tmpia[1] = 100000; + tmpia[2] = 1; + check0(bcf_update_format_int32(hdr, rec, "DP", tmpia, bcf_hdr_nsamples(hdr))); + tmpia[0] = 1; + tmpia[1] = 8; + tmpia[2] = 5; + check0(bcf_update_format_int32(hdr, rec, "DP", tmpia, bcf_hdr_nsamples(hdr))); + tmpia[0] = 51; + tmpia[1] = 51; + tmpia[2] = 51; + tmpia[3] = 51; + tmpia[4] = bcf_int32_missing; + tmpia[5] = bcf_int32_missing; + check0(bcf_update_format_int32(hdr, rec, "HQ", tmpia, bcf_hdr_nsamples(hdr)*2)); + char *tmp_str[] = {"String1","SomeOtherString2","YetAnotherString3"}; + check0(bcf_update_format_string(hdr, rec, "TS", (const char**)tmp_str, 3)); + tmp_str[0] = "LongerStringRequiringBufferReallocation"; + check0(bcf_update_format_string(hdr, rec, "TS", (const char**)tmp_str, 3)); + tmp_str[0] = "String1"; + check0(bcf_update_format_string(hdr, rec, "TS", (const char**)tmp_str, 3)); + if ( bcf_write1(fp, hdr, rec)!=0 ) error("Failed to write to %s\n", fname); + + // 20 1110696 . A G,T 67 . NS=2;DP=10;NEG=-128;AF=0.333,.;AA=T;DB GT 2 1 ./. + bcf_clear1(rec); + rec->rid = bcf_hdr_name2id(hdr, "20"); + rec->pos = 1110695; + check0(bcf_update_alleles_str(hdr, rec, "A,G,T")); + rec->qual = 67; + tmpi = 2; + check0(bcf_update_info_int32(hdr, rec, "NS", &tmpi, 1)); + tmpi = 10; + check0(bcf_update_info_int32(hdr, rec, "DP", &tmpi, 1)); + tmpi = -128; + check0(bcf_update_info_int32(hdr, rec, "NEG", &tmpi, 1)); + float *tmpfa = (float*)malloc(2*sizeof(float)); + tmpfa[0] = 0.333; + bcf_float_set_missing(tmpfa[1]); + check0(bcf_update_info_float(hdr, rec, "AF", tmpfa, 2)); + check0(bcf_update_info_string(hdr, rec, "AA", "SHORT")); + check0(bcf_update_info_string(hdr, rec, "AA", "LONGSTRING")); + check0(bcf_update_info_string(hdr, rec, "AA", "T")); + check0(bcf_update_info_flag(hdr, rec, "DB", NULL, 1)); + tmpia[0] = bcf_gt_phased(2); + tmpia[1] = bcf_int32_vector_end; + tmpia[2] = bcf_gt_phased(1); + tmpia[3] = bcf_int32_vector_end; + tmpia[4] = bcf_gt_missing; + tmpia[5] = bcf_gt_missing; + check0(bcf_update_genotypes(hdr, rec, tmpia, bcf_hdr_nsamples(hdr)*2)); + if ( bcf_write1(fp, hdr, rec)!=0 ) error("Failed to write to %s\n", fname); + + free(tmpia); + free(tmpfa); + + // Clean + free(str.s); + bcf_destroy1(rec); + bcf_hdr_destroy(hdr); + int ret; + if ( (ret=hts_close(fp)) ) + { + fprintf(stderr,"hts_close(%s): non-zero status %d\n",fname,ret); + exit(ret); + } +} + +void bcf_to_vcf(char *fname) +{ + htsFile *fp = hts_open(fname,"rb"); + if (!fp) error("Failed to open \"%s\" : %s", fname, strerror(errno)); + bcf_hdr_t *hdr = bcf_hdr_read(fp); + if (!hdr) error("bcf_hdr_read : %s", strerror(errno)); + bcf1_t *rec = bcf_init1(); + if (!rec) error("bcf_init1 : %s", strerror(errno)); + + char *gz_fname = (char*) malloc(strlen(fname)+4); + if (!gz_fname) error("malloc : %s", strerror(errno)); + snprintf(gz_fname,strlen(fname)+4,"%s.gz",fname); + htsFile *out = hts_open(gz_fname,"wg"); + if (!out) error("Couldn't open \"%s\" : %s\n", gz_fname, strerror(errno)); + + bcf_hdr_t *hdr_out = bcf_hdr_dup(hdr); + if (!bcf_hdr_get_hrec(hdr_out, BCF_HL_STR,"ID","BB","unused")) + error("Missing header ##unused="); + bcf_hdr_remove(hdr_out,BCF_HL_STR,"BB"); + if (bcf_hdr_get_hrec(hdr_out, BCF_HL_STR,"ID","BB","unused")) + error("Got pointer to deleted header ##unused="); + + if (!bcf_hdr_get_hrec(hdr_out,BCF_HL_GEN,"unused","unformatted text 1",NULL)) + error("Missing header ##unused=unformatted text 1"); + bcf_hdr_remove(hdr_out,BCF_HL_GEN,"unused"); + if (bcf_hdr_get_hrec(hdr_out,BCF_HL_GEN,"unused","unformatted text 1",NULL)) + error("Got pointer to deleted header ##unused=unformatted text 1"); + + if (!bcf_hdr_get_hrec(hdr_out,BCF_HL_FLT,"ID","Flt",NULL)) + error("Missing header ##FILTER="); + bcf_hdr_remove(hdr_out,BCF_HL_FLT,"Flt"); + if (bcf_hdr_get_hrec(hdr_out,BCF_HL_FLT,"ID","Flt",NULL)) + error("Got pointer to deleted header ##FILTER="); + + if (!bcf_hdr_get_hrec(hdr_out,BCF_HL_INFO,"ID","UI",NULL)) + error("Missing header ##INFO="); + bcf_hdr_remove(hdr_out,BCF_HL_INFO,"UI"); + if (bcf_hdr_get_hrec(hdr_out,BCF_HL_INFO,"ID","UI",NULL)) + error("Got pointer to deleted header ##INFO="); + + if (!bcf_hdr_get_hrec(hdr_out,BCF_HL_FMT,"ID","UF",NULL)) + error("Missing header ##INFO="); + bcf_hdr_remove(hdr_out,BCF_HL_FMT,"UF"); + if (bcf_hdr_get_hrec(hdr_out,BCF_HL_FMT,"ID","UF",NULL)) + error("Got pointer to deleted header ##INFO="); + + if (!bcf_hdr_get_hrec(hdr_out,BCF_HL_CTG,"ID","Unused",NULL)) + error("Missing header ##contig="); + bcf_hdr_remove(hdr_out,BCF_HL_CTG,"Unused"); + if (bcf_hdr_get_hrec(hdr_out,BCF_HL_FMT,"ID","Unused",NULL)) + error("Got pointer to header ##contig="); + + if ( bcf_hdr_write(out, hdr_out)!=0 ) error("Failed to write to %s\n", fname); + int r; + while ((r = bcf_read1(fp, hdr, rec)) >= 0) + { + if ( bcf_write1(out, hdr_out, rec)!=0 ) error("Failed to write to %s\n", fname); + + // Test problems caused by bcf1_sync: the data block + // may be realloced, also the unpacked structures must + // get updated. + check0(bcf_unpack(rec, BCF_UN_STR)); + check0(bcf_update_id(hdr, rec, 0)); + check0(bcf_update_format_int32(hdr, rec, "GQ", NULL, 0)); + + bcf1_t *dup = bcf_dup(rec); // force bcf1_sync call + if ( bcf_write1(out, hdr_out, dup)!=0 ) error("Failed to write to %s\n", fname); + bcf_destroy1(dup); + + check0(bcf_update_alleles_str(hdr_out, rec, "G,A")); + int32_t tmpi = 99; + check0(bcf_update_info_int32(hdr_out, rec, "DP", &tmpi, 1)); + int32_t tmpia[] = {9,9,9}; + check0(bcf_update_format_int32(hdr_out, rec, "DP", tmpia, 3)); + + if ( bcf_write1(out, hdr_out, rec)!=0 ) error("Failed to write to %s\n", fname); + } + if (r < -1) error("bcf_read1"); + + bcf_destroy1(rec); + bcf_hdr_destroy(hdr); + bcf_hdr_destroy(hdr_out); + int ret; + if ( (ret=hts_close(fp)) ) + { + fprintf(stderr,"hts_close(%s): non-zero status %d\n",fname,ret); + exit(ret); + } + if ( (ret=hts_close(out)) ) + { + fprintf(stderr,"hts_close(%s): non-zero status %d\n",gz_fname,ret); + exit(ret); + } + + + // read gzip, write stdout + htsFile *gz_in = hts_open(gz_fname, "r"); + if ( !gz_in ) + { + fprintf(stderr,"Could not read: %s\n", gz_fname); + exit(1); + } + + kstring_t line = {0,0,0}; + while ( hts_getline(gz_in, KS_SEP_LINE, &line)>0 ) + { + kputc('\n',&line); + fwrite(line.s,1,line.l,stdout); + } + + if ( (ret=hts_close(gz_in)) ) + { + fprintf(stderr,"hts_close(%s): non-zero status %d\n",gz_fname,ret); + exit(ret); + } + free(line.s); + free(gz_fname); +} + +void iterator(const char *fname) +{ + htsFile *fp = hts_open(fname, "r"); + if (!fp) error("Failed to open \"%s\" : %s", fname, strerror(errno)); + bcf_hdr_t *hdr = bcf_hdr_read(fp); + if (!hdr) error("bcf_hdr_read : %s", strerror(errno)); + hts_idx_t *idx; + hts_itr_t *iter; + + bcf_index_build(fname, 0); + idx = bcf_index_load(fname); + + iter = bcf_itr_queryi(idx, bcf_hdr_name2id(hdr, "20"), 1110600, 1110800); + bcf_itr_destroy(iter); + + iter = bcf_itr_querys(idx, hdr, "20:1110600-1110800"); + bcf_itr_destroy(iter); + + hts_idx_destroy(idx); + bcf_hdr_destroy(hdr); + int ret; + if ( (ret=hts_close(fp)) ) + { + fprintf(stderr,"hts_close(%s): non-zero status %d\n",fname,ret); + exit(ret); + } +} + +void test_get_info_values(const char *fname) +{ + htsFile *fp = hts_open(fname, "r"); + if (!fp) error("Failed to open \"%s\" : %s", fname, strerror(errno)); + bcf_hdr_t *hdr = bcf_hdr_read(fp); + if (!hdr) error("bcf_hdr_read : %s", strerror(errno)); + bcf1_t *line = bcf_init(); + if (!line) error("bcf_init : %s", strerror(errno)); + int r; + while ((r = bcf_read(fp, hdr, line)) == 0) + { + float *afs = 0; + int32_t *negs = NULL; + int count = 0; + int ret = bcf_get_info_float(hdr, line, "AF", &afs, &count); + + if (line->pos == 14369) + { + if (ret != 1 || afs[0] != 0.5f) + { + fprintf(stderr, "AF on position 14370 should be 0.5\n"); + exit(-1); + } + } + else + { + if (ret != 2 || afs[0] != 0.333f || !bcf_float_is_missing(afs[1])) + { + fprintf(stderr, "AF on position 1110696 should be 0.333, missing\n"); + exit(-1); + } + } + + free(afs); + + int32_t expected = (line->pos == 14369)? -127 : -128; + count = 0; + ret = bcf_get_info_int32(hdr, line, "NEG", &negs, &count); + if (ret != 1 || negs[0] != expected) + { + if (ret < 0) + fprintf(stderr, "NEG should be %d, got error ret=%d\n", expected, ret); + else if (ret == 0) + fprintf(stderr, "NEG should be %d, got no entries\n", expected); + else + fprintf(stderr, "NEG should be %d, got %d entries (first is %d)\n", expected, ret, negs[0]); + exit(1); + } + free(negs); + } + if (r < -1) error("bcf_read"); + + bcf_destroy(line); + bcf_hdr_destroy(hdr); + hts_close(fp); +} + +void write_format_values(const char *fname) +{ + // Init + htsFile *fp = hts_open(fname, "wb"); + if (!fp) error("Failed to open \"%s\" : %s", fname, strerror(errno)); + bcf_hdr_t *hdr = bcf_hdr_init("w"); + if (!hdr) error("bcf_hdr_init : %s", strerror(errno)); + bcf1_t *rec = bcf_init1(); + if (!rec) error("bcf_init1 : %s", strerror(errno)); + + // Create VCF header + check0(bcf_hdr_append(hdr, "##contig=")); + check0(bcf_hdr_append(hdr, "##FORMAT=")); + check0(bcf_hdr_add_sample(hdr, "S")); + check0(bcf_hdr_add_sample(hdr, NULL)); // to update internal structures + if ( bcf_hdr_write(fp, hdr)!=0 ) error("Failed to write to %s\n", fname); + + // Add a record + // .. FORMAT + float test[4]; + bcf_float_set_missing(test[0]); + test[1] = 47.11f; + bcf_float_set_vector_end(test[2]); + test[3] = -1.2e-13; + check0(bcf_update_format_float(hdr, rec, "TF", test, 4)); + if ( bcf_write1(fp, hdr, rec)!=0 ) error("Failed to write to %s\n", fname); + + bcf_destroy1(rec); + bcf_hdr_destroy(hdr); + int ret; + if ((ret = hts_close(fp))) + { + fprintf(stderr, "hts_close(%s): non-zero status %d\n", fname, ret); + exit(ret); + } +} + +void check_format_values(const char *fname) +{ + htsFile *fp = hts_open(fname, "r"); + bcf_hdr_t *hdr = bcf_hdr_read(fp); + bcf1_t *line = bcf_init(); + + while (bcf_read(fp, hdr, line) == 0) + { + float *values = 0; + int count = 0; + int ret = bcf_get_format_float(hdr, line, "TF", &values, &count); + + // NOTE the return value from bcf_get_format_float is different from + // bcf_get_info_float in the sense that vector-end markers also count. + if (ret != 4 || + count < ret || + !bcf_float_is_missing(values[0]) || + values[1] != 47.11f || + !bcf_float_is_vector_end(values[2]) || + !bcf_float_is_vector_end(values[3])) + { + fprintf(stderr, "bcf_get_format_float didn't produce the expected output.\n"); + exit(-1); + } + + free(values); + } + + bcf_destroy(line); + bcf_hdr_destroy(hdr); + hts_close(fp); +} + +void test_get_format_values(const char *fname) +{ + write_format_values(fname); + check_format_values(fname); +} + +void test_invalid_end_tag(void) +{ + static const char vcf_data[] = "data:," + "##fileformat=VCFv4.1\n" + "##contig=\n" + "##INFO=\n" + "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n" + "X\t86470037\trs59780433a\tTTTCA\tTGGTT,T\t.\t.\tEND=85725113\n" + "X\t86470038\trs59780433b\tT\tTGGTT,T\t.\t.\tEND=86470047\n"; + + htsFile *fp; + bcf_hdr_t *hdr; + bcf1_t *rec; + int ret; + int32_t tmpi; + enum htsLogLevel logging = hts_get_log_level(); + + // Silence warning messages + hts_set_log_level(HTS_LOG_ERROR); + + fp = hts_open(vcf_data, "r"); + if (!fp) error("Failed to open vcf data : %s", strerror(errno)); + rec = bcf_init1(); + if (!rec) error("Failed to allocate BCF record : %s", strerror(errno)); + + hdr = bcf_hdr_read(fp); + if (!hdr) error("Failed to read BCF header : %s", strerror(errno)); + + check0(bcf_read(fp, hdr, rec)); + // rec->rlen should ignore the bogus END tag value on the first read + if (rec->rlen != 5) { + error("Incorrect rlen - expected 5 got %"PRIhts_pos"\n", rec->rlen); + } + + check0(bcf_read(fp, hdr, rec)); + // While on the second it should use it + if (rec->rlen != 10) { + error("Incorrect rlen - expected 10 got %"PRIhts_pos"\n", rec->rlen); + } + + // Try to break it - will change rlen + tmpi = 85725113; + check0(bcf_update_info_int32(hdr, rec, "END", &tmpi, 1)); + + if (rec->rlen != 1) { + error("Incorrect rlen - expected 1 got %"PRIhts_pos"\n", rec->rlen); + } + + ret = bcf_read(fp, hdr, rec); + if (ret != -1) { + error("Unexpected return code %d from bcf_read at EOF", ret); + } + + bcf_destroy1(rec); + bcf_hdr_destroy(hdr); + ret = hts_close(fp); + if (ret != 0) { + error("Unexpected return code %d from hts_close", ret); + } + + hts_set_log_level(logging); +} + +void test_open_format(void) { + char mode[5]; + int ret; + strcpy(mode, "r"); + ret = vcf_open_mode(mode+1, "mode1.bcf", NULL); + if (strncmp(mode, "rb", 2) || ret) + error("Mode '%s' does not match the expected value '%s'", mode, "rb"); + mode[1] = 0; + ret = vcf_open_mode(mode+1, "mode1.vcf", NULL); + if (strncmp(mode, "r", 1) || ret) + error("Mode '%s' does not match the expected value '%s'", mode, "r"); + mode[1] = 0; + ret = vcf_open_mode(mode+1, "mode1.vcf.gz", NULL); + if (strncmp(mode, "rz", 2) || ret) + error("Mode '%s' does not match the expected value '%s'", mode, "rz"); + mode[1] = 0; + ret = vcf_open_mode(mode+1, "mode1.vcf.bgz", NULL); + if (strncmp(mode, "rz", 2) || ret) + error("Mode '%s' does not match the expected value '%s'", mode, "rz"); + mode[1] = 0; + ret = vcf_open_mode(mode+1, "mode1.xcf", NULL); + if (!ret) + error("Expected failure for wrong extension 'xcf'"); + mode[1] = 0; + ret = vcf_open_mode(mode+1, "mode1.vcf.gbz", NULL); + if (!ret) + error("Expected failure for wrong extension 'vcf.gbz'"); + mode[1] = 0; + ret = vcf_open_mode(mode+1, "mode1.bvcf.bgz", NULL); + if (!ret) + error("Expected failure for wrong extension 'vcf.bvcf.bgz'"); +} + +int main(int argc, char **argv) +{ + char *fname = argc>1 ? argv[1] : "rmme.bcf"; + + // format test. quiet unless there's a failure + test_get_format_values(fname); + + // main test. writes to stdout + write_bcf(fname); + bcf_to_vcf(fname); + iterator(fname); + + // additional tests. quiet unless there's a failure. + test_get_info_values(fname); + test_invalid_end_tag(); + test_open_format(); + return 0; +} diff --git a/src/htslib-1.21/test/test-vcf-api.out b/src/htslib-1.21/test/test-vcf-api.out new file mode 100644 index 0000000..dd2f4f9 --- /dev/null +++ b/src/htslib-1.21/test/test-vcf-api.out @@ -0,0 +1,29 @@ +##fileformat=VCFv4.2 +##FILTER= +##fileDate=20090805 +##unused= +##source=myImputationProgramV3.1 +##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta +##contig= +##phasing=partial +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##FILTER= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 +20 14370 rs6054257 G A 29 PASS NS=3;DP=14;NEG=-127;AF=0.5;DB;H2 GT:GQ:DP:HQ:TS 0|0:48:1:51,51:String1 1|0:48:8:51,51:SomeOtherString2 1/1:43:5:.,.:YetAnotherString3 +20 14370 . G A 29 PASS NS=3;DP=14;NEG=-127;AF=0.5;DB;H2 GT:DP:HQ:TS 0|0:1:51,51:String1 1|0:8:51,51:SomeOtherString2 1/1:5:.,.:YetAnotherString3 +20 14370 . G A 29 PASS NS=3;DP=99;NEG=-127;AF=0.5;DB;H2 GT:DP:HQ:TS 0|0:9:51,51:String1 1|0:9:51,51:SomeOtherString2 1/1:9:.,.:YetAnotherString3 +20 1110696 . A G,T 67 . NS=2;DP=10;NEG=-128;AF=0.333,.;AA=T;DB GT 2 1 ./. +20 1110696 . A G,T 67 . NS=2;DP=10;NEG=-128;AF=0.333,.;AA=T;DB GT 2 1 ./. +20 1110696 . G A 67 . NS=2;DP=99;NEG=-128;AF=0.333,.;AA=T;DB GT:DP 2:9 1:9 ./.:9 diff --git a/src/htslib-1.21/test/test-vcf-hdr-in.vcf b/src/htslib-1.21/test/test-vcf-hdr-in.vcf new file mode 100644 index 0000000..1aba1f4 --- /dev/null +++ b/src/htslib-1.21/test/test-vcf-hdr-in.vcf @@ -0,0 +1,25 @@ +##fileformat=VCFv4.1 +##fileDate=20150126 +##reference=hs37d5 +##phasing=partial +##FILTER= 200.0"> +##FILTER= +##FILTER= +##FILTER= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 +1 12065947 PTV001 C T,A 29 PASS . GT:GATK:AD:DP:GQ 0/1:0/1:3,2:5:19 +1 109817590 PTV002 G T 77 PASS . GT:GATK:AD:DP:GQ 0/1:0/1:3,2:5:20 +1 153791300 PTV003 CTG C 81 PASS . GT:GATK:AD:DP:GQ 0/1:0/1:3,2:5:21 +1 156104666 PTV004 TTGAGAGCCGGCTGGCGGAT TCC 30 PASS . GT:GATK:AD:DP:GQ 0/1:0/1:3,2:5:22 +1 156108541 PTV005 G GG 31 PASS . GT:GATK:AD:DP:GQ 0/1:0/1:3,2:5:23 +1 161279695 PTV006 T C,A 32 PASS . GT:GATK:AD:DP:GQ 0/1:0/1:3,2:5:24 +1 169519049 PTV007 T . 35 PASS . GT:GATK:AD:DP:GQ 0/1:0/1:3,2:5:24 +1 226125468 PTV097 G A 99 PASS . GT:GATK:AD:DP:GQ 0/1:0/1:3,2:5:109 +16 2103394 PTV056 C T 68 PASS . GT:GATK:AD:DP:GQ 0/1:0/1:3,2:5:72 +4 31789170 PTV021 G . 77 PASS . GT:GATK:AD:DP:GQ 0/1:0/1:3,2:5:38 diff --git a/src/htslib-1.21/test/test-vcf-hdr.out b/src/htslib-1.21/test/test-vcf-hdr.out new file mode 100644 index 0000000..578ef39 --- /dev/null +++ b/src/htslib-1.21/test/test-vcf-hdr.out @@ -0,0 +1,16 @@ +##fileformat=VCFv4.1 +##FILTER= +##fileDate=20150126 +##reference=hs37d5 +##phasing=partial +##FILTER= 200.0"> +##FILTER= +##FILTER= +##FILTER= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 diff --git a/src/htslib-1.21/test/test-vcf-sweep.c b/src/htslib-1.21/test/test-vcf-sweep.c new file mode 100644 index 0000000..40ee4e4 --- /dev/null +++ b/src/htslib-1.21/test/test-vcf-sweep.c @@ -0,0 +1,115 @@ +/* test/test-vcf-sweep.c -- VCF test harness. + + Copyright (C) 2013-2014 Genome Research Ltd. + + Author: Petr Danecek + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include + +#include + +#include "../htslib/vcf_sweep.h" + +int main(int argc, char **argv) +{ + if ( argc!=2 ) + { + fprintf(stderr,"Usage: test-vcf-sweep \n"); + return 1; + } + + // Init variables. The checksum is just for this test program to output + // something and verify that all sites are read in both passes - fwd and + // bwd. + bcf_sweep_t *sw = bcf_sweep_init(argv[1]); + bcf_hdr_t *hdr = bcf_sweep_hdr(sw); + int chksum = 0; + + // First we must sweep forward and read the whole file to build an index. + // If this is undesirable, we can require the presence of a .gzi index + // which can be created with `bgzip -r` from the samtools/htslib package + bcf1_t *rec; + while ( (rec = bcf_sweep_fwd(sw)) ) chksum += rec->pos+1; + printf("fwd position chksum: %d\n", chksum); + + // Now sweep backward. + chksum = 0; + while ( (rec = bcf_sweep_bwd(sw)) ) chksum += rec->pos+1; + printf("bwd position chksum: %d\n", chksum); + + // And forward and backward again, this time summing the PL vectors + int i,j, mPLs = 0, nPLs; + int32_t *PLs = NULL; + chksum = 0; + while ( (rec = bcf_sweep_fwd(sw)) ) + { + // get copy of the PL vectors + nPLs = bcf_get_format_int32(hdr, rec, "PL", &PLs, &mPLs); + if ( nPLs <= 0 ) continue; // PL not present + + // how many values are there per sample + int nvals = nPLs / bcf_hdr_nsamples(hdr); + + int32_t *ptr = PLs; + for (i=0; i +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +use strict; +use warnings; +use Carp; +use FindBin; +use lib "$FindBin::Bin"; +use Getopt::Long; +use File::Temp qw/ tempfile tempdir /; +use IO::Handle; + +my $opts = parse_params(); +srand($$opts{seed}); + +run_test('test_bgzip',$opts, 0); +run_test('test_bgzip',$opts, 4); + +run_test('ce_fa_to_md5_cache',$opts,needed_by=>'test_index'); +run_test('test_index',$opts, 0); +run_test('test_index',$opts, 4); + +run_test('test_multi_ref',$opts,0); +run_test('test_multi_ref',$opts,4); + +run_test('test_view',$opts,0); +run_test('test_view',$opts,4); + +run_test('test_MD',$opts); + +run_test('test_vcf_api',$opts,out=>'test-vcf-api.out',needed_by=>'test_vcf_sweep'); +run_test('test_bcf2vcf',$opts); +run_test('test_vcf_sweep',$opts,out=>'test-vcf-sweep.out'); +run_test('test_vcf_various',$opts); +run_test('test_bcf_sr_sort',$opts); +run_test('test_bcf_sr_no_index',$opts); +run_test('test_bcf_sr_range', $opts); +run_test('test_command',$opts,cmd=>'test-bcf-translate -',out=>'test-bcf-translate.out'); +run_test('test_convert_padded_header',$opts); +run_test('test_rebgzip',$opts); +run_test('test_logging',$opts); +run_test('test_plugin_loading',$opts); +run_test('test_realn',$opts); +run_test('test_bcf_set_variant_type',$opts); +run_test('test_annot_tsv',$opts); + +print "\nNumber of tests:\n"; +printf " total .. %d\n", $$opts{nok}+$$opts{nfailed}; +printf " passed .. %d\n", $$opts{nok}; +printf " failed .. %d\n", $$opts{nfailed}; +print "\n"; + +exit ($$opts{nfailed} > 0); + +#-------------------- + +sub error +{ + my (@msg) = @_; + if ( scalar @msg ) { confess @msg; } + print + "About: samtools/htslib consistency test script\n", + "Usage: test.pl [OPTIONS]\n", + "Options:\n", + " -f, --fail-fast Fail-fast mode: exit as soon as a test fails.\n", + " -F, --function LIST Run only the listed tests (e.g. 'annot_tsv')\n", + " -r, --redo-outputs Recreate expected output files.\n", + " -s, --random-seed Initialise rand() with a different seed.\n", + " -t, --temp-dir When given, temporary files will not be removed.\n", + " -h, -?, --help This help message.\n", + "\n"; + exit 1; +} + +sub cygpath { + my ($path) = @_; + $path = `cygpath -m $path`; + $path =~ s/\r?\n//; + return $path +} + +sub safe_tempdir +{ + my $dir = tempdir(CLEANUP=>1); + if ($^O =~ /^msys/) { + $dir = cygpath($dir); + } + return $dir; +} + +sub parse_params +{ + my $opts = { keep_files=>0, nok=>0, nfailed=>0, seed=>42 }; + my $help; + Getopt::Long::Configure('bundling'); + my $ret = GetOptions ( + 't|temp-dir:s' => \$$opts{keep_files}, + 'r|redo-outputs' => \$$opts{redo_outputs}, + 's|random-seed=i' => \$$opts{seed}, + 'f|fail-fast' => \$$opts{fail_fast}, + 'F|function:s' => \$$opts{function}, + 'h|?|help' => \$help + ); + if ( !$ret or $help ) { error(); } + $$opts{tmp} = $$opts{keep_files} ? $$opts{keep_files} : safe_tempdir(); + if ( $$opts{keep_files} ) { cmd("mkdir -p $$opts{keep_files}"); } + $$opts{path} = $FindBin::RealBin; + $$opts{bin} = $FindBin::RealBin; + $$opts{bin} =~ s{/test/?$}{}; + if ($^O =~ /^msys/) { + $$opts{path} = cygpath($$opts{path}); + $$opts{bin} = cygpath($$opts{bin}); + } + if ($$opts{function}) { + $$opts{run_function} = { map {$_=>1} split(/,/,$$opts{function}) }; + } + + return $opts; +} +sub run_test +{ + my ($name,$opts,@args) = @_; + if ( $$opts{run_function} ) + { + my $run = 0; + if ( exists($$opts{run_function}{$name}) ) { $run = 1; } + if ( !$run ) + { + my %args; + if (!(scalar @args % 2)) # check that a hash was passed + { + %args = @args; + } + for my $func (keys %{$$opts{run_function}}) + { + if ((exists($args{cmd}) && $args{cmd}=~/$func/) || + (exists($args{needed_by}) && $args{needed_by}=~/$func/)) { + $run = 1; + } elsif ( $name=~/$func/ ) { + $$opts{run_function}{$name} = 1; + $run = 1; + } + last if ($run); + } + } + if ( !$run ) { return; } + } + my $sym = ${main::}{$name}; # Symbol table look-up, works with "use strict" + &$sym($opts,@args); +} +sub _cmd +{ + my ($cmd) = @_; + my $kid_io; + my $out; + my $pid = open($kid_io, "-|"); + if ( !defined $pid ) { error("Cannot fork: $!"); } + if ($pid) + { + # parent + local $/; # Read entire input + $out = <$kid_io>; + close($kid_io); + } + else + { + # Example of how to embed Valgrind into the testing framework. + # TEST_PRECMD="valgrind --leak-check=full --suppressions=$ENV{HOME}/valgrind.supp" make check + $cmd = "$ENV{TEST_PRECMD} $cmd" if exists $ENV{TEST_PRECMD}; + + # child + exec('bash', '-o','pipefail','-c', $cmd) or error("Cannot execute the command [/bin/sh -o pipefail -c $cmd]: $!"); + } + return ($? >> 8, $out); +} +sub _cmd3 +{ + my ($cmd) = @_; + + $cmd = "$ENV{TEST_PRECMD} $cmd" if exists $ENV{TEST_PRECMD}; + + my $tmp = "$$opts{tmp}/tmp"; + system('bash', '-o','pipefail','-c', "($cmd) 2>$tmp.e >$tmp.o"); + + my $status = $? >> 8; + + my ($out,$err); + local $/; # Read entire input + if ( open(my $fh,'<',"$tmp.o") ) + { + $out = <$fh>; + close($fh) or error("Failed to close $tmp.o"); + } + if ( open(my $fh,'<',"$tmp.e") ) + { + $err = <$fh>; + close($fh) or error("Failed to close $tmp.e"); + } + unlink("$tmp.o"); + unlink("$tmp.e"); + + return ($status,$out,$err); +} +sub cmd +{ + my ($cmd) = @_; + my ($ret,$out) = _cmd($cmd); + if ( $ret ) { error("The command failed [$ret]: $cmd\n", $out); } + return $out; +} +sub test_cmd +{ + my ($opts,%args) = @_; + if ( !exists($args{out}) ) + { + if ( !exists($args{in}) ) { error("FIXME: expected out or in key\n"); } + $args{out} = "$args{in}.out"; + } + my ($package, $filename, $line, $test)=caller(1); + $test =~ s/^.+:://; + + print "$test:\n"; + print "\t$args{cmd}\n"; + + my ($ret,$out,$err) = _cmd3("$args{cmd}"); + if ( $err ) { $err =~ s/^/\t\t/mg; $err .= '\n'; } + if ( $ret ) { failed($opts,$test,"Non-zero status $ret\n$err"); return; } + if ( $$opts{redo_outputs} && -e "$$opts{path}/$args{out}" ) + { + rename("$$opts{path}/$args{out}","$$opts{path}/$args{out}.old"); + open(my $fh,'>',"$$opts{path}/$args{out}") or error("$$opts{path}/$args{out}: $!"); + print $fh $out; + close($fh); + my ($ret,$out) = _cmd("cmp $$opts{path}/$args{out} $$opts{path}/$args{out}.old"); + if ( !$ret && $out eq '' ) { unlink("$$opts{path}/$args{out}.old"); } + else + { + print "\tthe expected output changed, saving:\n"; + print "\t old .. $$opts{path}/$args{out}.old\n"; + print "\t new .. $$opts{path}/$args{out}\n"; + } + } + my $exp = ''; + if ( open(my $fh,'<',"$$opts{path}/$args{out}") ) + { + local $/; # Read entire file + $exp = <$fh>; + $exp =~ s/\015?\012/\n/g; + close($fh); + } + elsif ( !$$opts{redo_outputs} ) { failed($opts,$test,"$$opts{path}/$args{out}: $!"); return; } + + (my $out_lf = $out) =~ s/\015?\012/\n/g; + if ( $exp ne $out_lf ) + { + open(my $fh,'>',"$$opts{path}/$args{out}.new") or error("$$opts{path}/$args{out}.new"); + print $fh $out; + close($fh); + if ( !-e "$$opts{path}/$args{out}" ) + { + rename("$$opts{path}/$args{out}.new","$$opts{path}/$args{out}") or error("rename $$opts{path}/$args{out}.new $$opts{path}/$args{out}: $!"); + print "\tthe file with expected output does not exist, creating new one:\n"; + print "\t\t$$opts{path}/$args{out}\n"; + } + else + { + my $diff = `diff $$opts{path}/$args{out} $$opts{path}/$args{out}.new`; + $diff =~ s/^/\t\t/mg; + chomp($diff); + failed($opts,$test,"${err}The outputs differ:\n\t\t$$opts{path}/$args{out}\n\t\t$$opts{path}/$args{out}.new\n$diff\n"); + } + return; + } + passed($opts,$test); +} + +# Run cmd, producing file out, and compare contents against exp +sub test_compare +{ + my ($opts,$cmd,$exp_fn,$out_fn, %args) = @_; + my ($package, $filename, $line, $test)=caller(1); + $test =~ s/^.+:://; + + print "$test:\n\t$cmd\n"; + + my ($ret,$stdout) = _cmd($cmd); + if ( $ret ) { failed($opts,$test); return; } + + local $/; + my ($exp,$out) = ("",""); + if ( exists($args{"gz"}) ) { + if ( open(my $fh,'-|',"$$opts{bin}/bgzip -d < $exp_fn") ) { + $exp = <$fh>; + close($fh); + } else { + failed($opts,$test,"bgzip -d < $exp_fn $!"); return; + } + } else { + if ( open(my $fh,'<',$exp_fn) ) { + $exp = <$fh>; + close($fh); + } else { + failed($opts,$test,"$exp_fn $!"); return; + } + } + + if ( exists($args{"gz"}) ) { + if ( open(my $fh,'-|',"$$opts{bin}/bgzip -d < $out_fn") ) { + $out = <$fh>; + close($fh); + } else { + failed($opts,$test,"bgzip -d < $out_fn $!"); return; + } + } else { + if ( open(my $fh,'<',$out_fn) ) { + $out = <$fh>; + close($fh); + } else { + failed($opts,$test,"$out_fn $!"); return; + } + } + + if (exists($args{fix_newlines})) { + $exp =~ s/\015\012/\n/g; + $out =~ s/\015\012/\n/g; + } + + if ( $exp ne $out ) + { + failed($opts,$test,"The outputs differ:\n\t\t$exp_fn\n\t\t$out_fn"); + return; + } + passed($opts,$test); +} +sub failed +{ + my ($opts,$test,$reason) = @_; + $$opts{nfailed}++; + print "\n"; + STDOUT->flush(); + if ( defined $reason ) { print STDERR "\t$reason\n"; } + print STDERR ".. failed ...\n\n"; + STDERR->flush(); + if ($$opts{fail_fast}) { + die "\n"; + } +} +sub passed +{ + my ($opts,$test) = @_; + $$opts{nok}++; + print ".. ok\n\n"; +} +sub is_file_newer +{ + my ($afile,$bfile) = @_; + my (@astat) = stat($afile) or return 0; + my (@bstat) = stat($bfile) or return 0; + if ( $astat[9]>$bstat[9] ) { return 1 } + return 0; +} + +sub ce_fa_to_md5_cache { + my ($opts) = @_; + + # These should really be worked out from the file contents, but + # pre-calculating them avoids a dependency on Digest::MD5 + my %csums = (CHROMOSOME_I => '8ede36131e0dbf3417807e48f77f3ebd', + CHROMOSOME_II => '8e7993f7a93158587ee897d7287948ec', + CHROMOSOME_III => '3adcb065e1cf74fafdbba1e8c352b323', + CHROMOSOME_IV => '251af66a69ee589c9f3757340ec2de6f', + CHROMOSOME_V => 'cf200a65fb754836dcc56b24b3170ee8', + CHROMOSOME_X => '6f9368fd2192c89c613718399d2d31fc', + CHROMOSOME_MtDNA => 'cd05857ece6411f40257a565ccfe15bb'); + + my $m5_dir = "$$opts{tmp}/md5"; + if (!-d $m5_dir) { + mkdir($m5_dir) || die "Couldn't make directory $m5_dir\n"; + } + my $out; + open(my $fa, '<', "$$opts{path}/ce.fa") + || die "Couldn't open $$opts{path}/ce.fa : $!\n"; + my $name = ''; + while (<$fa>) { + chomp; + if (/^>(\S+)/) { + if ($out) { + close($out) || die "Error closing $m5_dir/$csums{$name} : $!\n"; + } + $name = $1; + if (!exists($csums{$name})) { + die "Unexpected fasta entry : $name\n"; + } + open($out, '>', "$m5_dir/$csums{$name}") + } else { + if (!$out) { + die "$$opts{path}/ce.fa : Got data before fasta header\n"; + } + $_ = uc($_); + s/\s+//g; + print $out $_; + } + } + if ($out) { + close($out) || die "Error closing $m5_dir/$csums{$name} : $!\n"; + } + close($fa) || die "Error reading $$opts{path}/ce.fa : $!\n"; + $$opts{m5_dir} = $m5_dir; +} + + +# The tests -------------------------- + +sub test_bgzip { + my ($opts, $threads) = @_; + + my $at = $threads ? "-@ $threads" : ''; + my $data = "$$opts{path}/ce.fa"; + my $compressed = "$$opts{tmp}/ce.fa.$threads.gz"; + my $compressed_copy = "$$opts{tmp}/ce.fa.$threads.copy.gz"; + my $uncompressed = "$$opts{tmp}/ce.fa.$threads.uncomp"; + my $offset = 1055584; # Start of MT in ce.fa + my $uncompressed_part = "$$opts{tmp}/ce.fa.$threads.part"; + my $uncompressed_part2 = "$$opts{tmp}/ce.fa.$threads.part2"; + my $expected_part = "$$opts{tmp}/ce.fa.$threads.tail"; + my $index = "${compressed}.gzi"; + my $test = sprintf('%s %2s threads', 'bgzip round-trip', + $threads ? $threads : 'no'); + my $uncompressed1 = "$$opts{tmp}/ce.fa.$threads"; + my $uncompressed1_copy = "$$opts{tmp}/ce.fa.$threads.copy"; + + # Round-trip test + print "$test: "; + my $c = "$$opts{bin}/bgzip $at -i -I '$index' < '$data' > '$compressed'"; + my ($ret, $out) = _cmd($c); + if ($ret) { + failed($opts, $test, "non-zero exit from $c"); + return; + } + $c = "$$opts{bin}/bgzip $at -d < '$compressed' > '$uncompressed'"; + ($ret, $out) = _cmd($c); + if ($ret) { + failed($opts, $test, "non-zero exit from $c"); + return; + } + $c = "cmp '$data' '$uncompressed'"; + ($ret, $out) = _cmd($c); + if ($ret) { + failed($opts, $test, $out ? $out : "'$data' '$uncompressed' differ"); + return; + } + passed($opts,$test); + + # Round-trip test of text in binary mode + $test = sprintf('%s %2s threads', 'bgzip text mode round-trip', + $threads ? $threads : 'no'); + print "$test: "; + $c = "$$opts{bin}/bgzip $at --binary -i -I '$index' < '$data' > '$compressed'"; + ($ret, $out) = _cmd($c); + if ($ret) { + failed($opts, $test, "non-zero exit from $c"); + return; + } + $c = "$$opts{bin}/bgzip $at -d < '$compressed' > '$uncompressed'"; + ($ret, $out) = _cmd($c); + if ($ret) { + failed($opts, $test, "non-zero exit from $c"); + return; + } + $c = "cmp '$data' '$uncompressed'"; + ($ret, $out) = _cmd($c); + if ($ret) { + failed($opts, $test, $out ? $out : "'$data' '$uncompressed' differ"); + return; + } + passed($opts,$test); + + # Extract from an offset + $test = sprintf('%s %2s threads', 'bgzip -b', + $threads ? $threads : 'no'); + print "$test: "; + $c = sprintf("tail -c +%d '%s' > '%s'", $offset + 1, $data, $expected_part); + ($ret, $out) = _cmd($c); + if ($ret) { + failed($opts, $test, "non-zero exit from $c"); + return; + } + $c = "$$opts{bin}/bgzip $at -b $offset -d '$compressed' > $uncompressed_part"; + ($ret, $out) = _cmd($c); + if ($ret) { + failed($opts, $test, "non-zero exit from $c"); + return; + } + $c = "cmp '$expected_part' '$uncompressed_part'"; + ($ret, $out) = _cmd($c); + if ($ret) { + failed($opts, $test, + $out ? $out : "'$expected_part' '$uncompressed_part' differ"); + return; + } + passed($opts,$test); + + # Extract from an offset with named index + $test = sprintf('%s %2s threads', 'bgzip -b -I', + $threads ? $threads : 'no'); + print "$test: "; + $c = "cp '$compressed' '$compressed_copy'"; + ($ret, $out) = _cmd($c); + if ($ret) { + failed($opts, $test, "non-zero exit from $c"); + return; + } + $c = "$$opts{bin}/bgzip $at -b $offset -d -I '$index' '$compressed_copy' > $uncompressed_part2"; + ($ret, $out) = _cmd($c); + if ($ret) { + failed($opts, $test, "non-zero exit from $c"); + return; + } + $c = "cmp '$expected_part' '$uncompressed_part2'"; + ($ret, $out) = _cmd($c); + if ($ret) { + failed($opts, $test, + $out ? $out : "'$expected_part' '$uncompressed_part2' differ"); + return; + } + passed($opts,$test); + + # multi file test, expects compressed files from previous tests + # bgzip should return failure if both inputs not present + $test = sprintf('%s %2s threads', 'bgzip multifile', + $threads ? $threads : 'no'); + print "$test: "; + + #decompress and remove + $c = "$$opts{bin}/bgzip $at -d '$compressed' '$compressed_copy'"; + ($ret, $out) = _cmd($c); + if ($ret) { + failed($opts, $test, "non-zero exit from $c"); + return; + } + #check both files present and matches or not + $c = "cmp '$data' '$uncompressed1'"; + ($ret, $out) = _cmd($c); + if ($ret) { + failed($opts, $test, + $out ? $out : "'$data' '$uncompressed1' differ"); + return; + } + $c = "cmp '$data' '$uncompressed1_copy'"; + ($ret, $out) = _cmd($c); + if ($ret) { + failed($opts, $test, + $out ? $out : "'$data' '$uncompressed1_copy' differ"); + return; + } + #compress and remove + $c = "$$opts{bin}/bgzip $at '$uncompressed1' '$uncompressed1_copy'"; + ($ret, $out) = _cmd($c); + if ($ret) { + failed($opts, $test, "non-zero exit from $c"); + return; + } + #decompress again to ensure successful compression + $c = "$$opts{bin}/bgzip $at -d '$compressed' '$compressed_copy'"; + ($ret, $out) = _cmd($c); + if ($ret) { + failed($opts, $test, "non-zero exit from $c"); + return; + } + passed($opts,$test); + + # try writing to an explicit file name, round trip test + $test = sprintf('%s %2s threads', 'bgzip --output', + $threads ? $threads : 'no'); + print "$test: "; + + my $compressed_op = "$$opts{tmp}/arbitrary.$threads.gz"; + my $uncompressed_op = "$$opts{tmp}/arbitrary.$threads.txt"; + + $c = "$$opts{bin}/bgzip $at '$data' -o '$compressed_op'"; + + ($ret, $out) = _cmd($c); + if ($ret) { + failed($opts, $test, "non-zero exit from $c"); + return; + } + $c = "$$opts{bin}/bgzip $at -d $compressed_op --output '$uncompressed_op'"; + + ($ret, $out) = _cmd($c); + if ($ret) { + failed($opts, $test, "non-zero exit from $c"); + return; + } + $c = "cmp '$data' '$uncompressed_op'"; + ($ret, $out) = _cmd($c); + if ($ret) { + failed($opts, $test, $out ? $out : "'$data' '$uncompressed_op' differ"); + return; + } + passed($opts,$test); +} + +my $test_view_failures; +sub testv { + my ($opts, $cmd) = @_; + print " $cmd\n"; + my ($ret, $out) = _cmd($cmd); + if ($ret != 0) { + STDOUT->flush(); + print STDERR "FAILED\n$out\n"; + STDERR->flush(); + $test_view_failures++; + if ($$opts{fail_fast}) { + die "\n"; + } + } +} + +sub fake_multi_ref_data +{ + open(SAM, ">multi_ref.tmp.sam") || die; + for (my $r=0;$r<1000;$r++) { + print SAM "\@SQ\tSN:c$r\tLN:10000\n"; + } + + # Single ref + my $rnum=0; + for (my $p=1;$p<1000;$p++) { + print SAM "X\t0\tc$rnum\t$p\t40\t10M\t*\t0\t0\tCCTAGCCCTA\tB?8B?BACCD\n"; + } + + # Multi ref; 1 seq per ref + for (my $r=1;$r<300;$r++) { + print SAM "X\t0\tc$rnum\t1\t40\t10M\t*\t0\t0\tCCTAGCCCTA\tB?8B?BACCD\n"; + $rnum++; + } + + # Single ref again + for (my $p=1;$p<1000;$p++) { + print SAM "X\t0\tc$rnum\t$p\t40\t10M\t*\t0\t0\tCCTAGCCCTA\tB?8B?BACCD\n"; + } + + # Multi ref; 1 seq per ref + for (my $r=1;$r<300;$r++) { + print SAM "X\t0\tc$rnum\t1\t40\t10M\t*\t0\t0\tCCTAGCCCTA\tB?8B?BACCD\n"; + $rnum++; + } + close(SAM); +} + +sub test_multi_ref +{ + my ($opts, $nthreads) = @_; + my $tv_args = $nthreads ? "-\@$nthreads" : ""; + + fake_multi_ref_data; + print "test_view testing multi-ref CRAM modes:\n"; + $test_view_failures = 0; + + for (my $mf = -1; $mf <= 1; $mf++) { + testv $opts, "./test_view $tv_args -o seqs_per_slice=100 -o no_ref=1 -o multi_seq_per_slice=$mf -S -C multi_ref.tmp.sam > multi_ref.tmp.cram"; + testv $opts, "./test_view $tv_args multi_ref.tmp.cram > multi_ref.tmp.sam_"; + testv $opts, "./compare_sam.pl multi_ref.tmp.sam multi_ref.tmp.sam_"; + } + + if ($test_view_failures == 0) { + passed($opts, "multi-ref conversions"); + } else { + failed($opts, "multi-ref conversions", "$test_view_failures subtests failed"); + } +} + +sub test_view +{ + my ($opts, $nthreads) = @_; + my $tv_args = $nthreads ? "-\@$nthreads" : ""; + + # Files appropriate for CRAM V3.1 and V4.0 testing + my %cram31 = ("auxf#values.sam" => 1, + "c1#pad3.sam" => 1, + "ce#5.sam" => 1, + "ce#1000.sam", => 1, + "ce#large_seq.sam", => 1, + "ce#supp.sam", => 1, + "xx#MD.sam", => 1, + "xx#blank.sam", => 1, + "xx#large_aux.sam", => 1, + "xx#pair.sam", => 1, + "xx#tlen.sam" => 1); + + # Files appropriate for CRAM multi-ref containers + my %cram_ms = ("ce#1000.sam" => 1, + "ce#5.sam" => 1, + "ce#5b.sam" => 1, + "ce#unmap.sam" => 1, + "ce#unmap1.sam" => 1, + "ce#unmap2.sam" => 1, + "xx#blank.sam" => 1, + "xx#minimal.sam" => 1, + "xx#tlen.sam" => 1, + "xx#tlen2.sam" => 1, + "xx#triplet.sam" => 1); + + foreach my $sam (glob("*#*.sam")) { + my ($base, $ref) = ($sam =~ /((.*)#.*)\.sam/); + $ref .= ".fa"; + + my $bam = "$base.tmp.bam"; + my $cram = "$base.tmp.cram"; + + my $md = "-nomd"; + if ($sam =~ /^md/) { + $md = ""; + } + + print "test_view testing $sam, ref $ref:\n"; + $test_view_failures = 0; + + # SAM -> BAM -> SAM + if ($sam eq "ce#1000.sam") { + testv $opts, "./test_view $tv_args -S -b $sam > $bam"; + testv $opts, "./test_view $tv_args $bam > $bam.sam_"; + testv $opts, "./compare_sam.pl $sam $bam.sam_"; + } + + # SAM -> BAMu -> SAM + testv $opts, "./test_view $tv_args -S -l0 -b $sam > $bam"; + testv $opts, "./test_view $tv_args $bam > $bam.sam_"; + testv $opts, "./compare_sam.pl $sam $bam.sam_"; + + # SAM -> CRAM2 -> SAM + testv $opts, "./test_view $tv_args -t $ref -S -C -o VERSION=2.1 $sam > $cram"; + testv $opts, "./test_view $tv_args -D $cram > $cram.sam_"; + testv $opts, "./compare_sam.pl $md $sam $cram.sam_"; + + # SAM -> CRAM3u -> SAM + if ($sam eq "ce#1000.sam") { + $cram = "$base.tmp.cram"; + testv $opts, "./test_view $tv_args -t $ref -S -l0 -C -o VERSION=3.0 $sam > $cram"; + testv $opts, "./test_view $tv_args -D $cram > $cram.sam_"; + testv $opts, "./compare_sam.pl $md $sam $cram.sam_"; + } + + # BAM -> CRAM3 -> SAM + $cram = "$bam.cram"; + testv $opts, "./test_view $tv_args -t $ref -C -o VERSION=3.0 $bam > $cram"; + testv $opts, "./test_view $tv_args $cram > $cram.bam.sam_"; + testv $opts, "./compare_sam.pl $md $sam $cram.bam.sam_"; + + # CRAM3 -> CRAM3 + multi-slice + if (exists($cram_ms{$sam}) && $nthreads > 0) { + testv $opts, "./test_view $tv_args -t $ref -C -o VERSION=3.0 -o seqs_per_slice=7 -o slices_per_container=5 $cram > $cram.ms"; + testv $opts, "./test_view $tv_args $cram.ms > $cram.sam_"; + testv $opts, "./compare_sam.pl $md $sam $cram.sam_"; + } + + if (exists($cram31{$sam}) && $nthreads > 0) { + ## Experimental CRAM 3.1 support. + # SAM -> CRAM31 -> SAM + my @p = $sam eq "ce#1000.sam" + ? (qw/fast normal small archive/) + : (qw/archive/); + foreach my $profile (@p) { + $cram = "$base.tmp.cram"; + testv $opts, "./test_view $tv_args -t $ref -S -l7 -C -o VERSION=3.1 -o $profile $sam > $cram"; + testv $opts, "./test_view $tv_args -D $cram > $cram.sam_"; + testv $opts, "./compare_sam.pl $md $sam $cram.sam_"; + } + + ## Experimental CRAM 4.0 support. + # SAM -> CRAM40 -> SAM + @p = $sam eq "ce#large_seq.sam" || $sam eq "xx#large_aux.sam" + ? (qw/fast normal small archive/) + : (qw/archive/); + foreach my $profile (@p) { + $cram = "$base.tmp.cram"; + testv $opts, "./test_view $tv_args -t $ref -S -l7 -C -o VERSION=4.0 -o $profile $sam > $cram"; + testv $opts, "./test_view $tv_args -D $cram > $cram.sam_"; + testv $opts, "./compare_sam.pl $md $sam $cram.sam_"; + } + } + + # Java pre-made CRAM -> SAM + my $jcram = "${base}_java.cram"; + if (-e $jcram) { + my $jsam = "${base}_java.tmp.sam_"; + testv $opts, "./test_view $tv_args -i reference=$ref $jcram > $jsam"; + testv $opts, "./compare_sam.pl -Baux $md $sam $jsam"; + } + + if ($test_view_failures == 0) + { + passed($opts, "$sam conversions"); + } + else + { + failed($opts, "$sam conversions", "$test_view_failures subtests failed"); + } + } + + # BAM files with alignment records that span BGZF blocks + # HTSlib starts a new block if an alignment is likely to overflow the + # current one, so for its own data this will only happen for records + # longer than 64kbytes. As other implementations may not do this, + # check that reading works correctly on some BAM files where records + # have been deliberately split between BGZF blocks. + print "test_view testing BAM records in multiple BGZF blocks:\n"; + $test_view_failures = 0; + my $src_sam = "ce#1.sam"; + foreach my $test_bam (qw(bgzf_boundaries/bgzf_boundaries1.bam + bgzf_boundaries/bgzf_boundaries2.bam + bgzf_boundaries/bgzf_boundaries3.bam)) { + testv $opts, "./test_view $tv_args -p $test_bam.tmp.sam $test_bam"; + testv $opts, "./compare_sam.pl $test_bam.tmp.sam $src_sam"; + } + + # Test a file with a long alignment record. Boundaries hit in the middle of + # the CIGAR data, and in the sequence. Generate the test file here as it's + # big, but with fairly simple contents. + $src_sam = "bgzf_boundaries/large_rec.tmp.sam"; + open(my $test_sam, '>', $src_sam) || die "Couldn't open $src_sam : $!\n"; + print $test_sam "\@HD\tVN:1.6\tSO:coordinate\n"; + print $test_sam "\@SQ\tSN:ref\tLN:100000\n"; + print $test_sam "read\t0\tref\t1\t60\t", "1M1I" x 16000, "\t*\t0\t0\t", "A" x 32000, "\t", "Q" x 32000, "\n"; + close($test_sam) || die "Error on closing $src_sam : $!\n"; + + testv $opts, "./test_view $tv_args -b -l 0 -p $src_sam.bam $src_sam"; + testv $opts, "./test_view $tv_args -p $src_sam.bam.sam $src_sam.bam"; + testv $opts, "./compare_sam.pl $src_sam $src_sam.bam.sam"; + + if ($test_view_failures == 0) { + passed($opts, "BAM records spanning multiple BGZF block tests"); + } else { + failed($opts, "BAM records spanning multiple BGZF block tests", + "$test_view_failures subtests failed"); + } + + # embed_ref=2 mode + print "test_view testing embed_ref=2:\n"; + $test_view_failures = 0; + my $ersam = "ce#1000.sam"; + my $ercram = "ce#1000_er.tmp.cram"; + my $ersam2 = "${ercram}.sam"; + testv $opts, "./test_view $tv_args -C -p $ercram $ersam"; + testv $opts, "./test_view $tv_args -p $ersam2 $ercram"; + testv $opts, "./compare_sam.pl $ersam $ersam2"; + if ($test_view_failures == 0) { + passed($opts, "embed_ref=2 tests"); + } else { + failed($opts, "embed_ref=2 tests", "$test_view_failures subtests failed"); + } + + # BAM and CRAM range queries on prebuilt BAM and CRAM + # The cram file has @SQ UR: set to point to an invalid location to + # force the reference to be reloaded from the one given on the + # command line and nowhere else. REF_PATH should also point to nowhere + # (currently done by the Makefile). This is to test the refseq reference + # counting and reload (Issue #654). + print "test_view testing region queries:\n"; + $test_view_failures = 0; + + my $regions = "CHROMOSOME_II:2980-2980 CHROMOSOME_IV:1500-1500 CHROMOSOME_II:2980-2980 CHROMOSOME_I:1000-1100"; + testv $opts, "./test_view $tv_args -i reference=ce.fa range.cram $regions > range.tmp"; + testv $opts, "./compare_sam.pl range.tmp range.out"; + + testv $opts, "./test_view $tv_args range.bam $regions > range.tmp"; + testv $opts, "./compare_sam.pl range.tmp range.out"; + + # Regression check for out-of-bounds read on regions list (see + # samtools#2063). As reg_insert() allocates at least four slots + # for chromosome regions, we need more than that many in the second + # chr. requested to ensure it has a bigger array. + + $regions = "CHROMOSOME_I:1122-1122 CHROMOSOME_II:1136-1136 CHROMOSOME_II:1241-1241 CHROMOSOME_II:1267-1267 CHROMOSOME_II:1326-1326 CHROMOSOME_II:1345-1345 CHROMOSOME_II:1353-1353 CHROMOSOME_II:1366-1366 CHROMOSOME_II:1416-1416 CHROMOSOME_II:1459-1459 CHROMOSOME_II:1536-1536"; + testv $opts, "./test_view $tv_args -i reference=ce.fa -M range.cram $regions > range.tmp"; + testv $opts, "./compare_sam.pl range.tmp range.out2"; + + testv $opts, "./test_view $tv_args -M range.bam $regions > range.tmp"; + testv $opts, "./compare_sam.pl range.tmp range.out2"; + + if ($test_view_failures == 0) { + passed($opts, "range.cram tests"); + } else { + failed($opts, "range.cram tests", "$test_view_failures subtests failed"); + } + + # Test BAM files with references in targets list but no corresponding @SQ + # lines in the text header. + print "test_view testing BAM files with absent \@SQ lines:\n"; + $test_view_failures = 0; + testv $opts, "./test_view $tv_args -p no_hdr_sq_1.tmp.sam no_hdr_sq_1.bam"; + testv $opts, "./compare_sam.pl no_hdr_sq_1.tmp.sam no_hdr_sq_1.expected.sam"; + + # Try a range query to ensure id <-> name mapping works + # Input only has reads from CHROMOSOME_I, so same "expected" file is used + testv $opts, "./test_view $tv_args -p no_hdr_sq_1.chr1.tmp.sam no_hdr_sq_1.bam CHROMOSOME_I"; + testv $opts, "./compare_sam.pl no_hdr_sq_1.chr1.tmp.sam no_hdr_sq_1.expected.sam"; + if ($test_view_failures == 0) { + passed($opts, "no_hdr_sq tests"); + } else { + failed($opts, "no_hdr_sq tests", "$test_view_failures subtests failed"); + } + + # File with large (> 2Gbases) positions + # Only works for SAM at the moment, but we can still round-trip it. + print "test_view testing large (> 2Gbases) positions:\n"; + $test_view_failures = 0; + testv $opts, "./test_view $tv_args -z -p longrefs/longref.tmp.sam.gz -x longrefs/longref.tmp.sam.gz.csi.otf -m 14 longrefs/longref.sam"; + testv $opts, "./test_view $tv_args -p longrefs/longref.tmp.sam_ longrefs/longref.tmp.sam.gz"; + testv $opts, "./compare_sam.pl longrefs/longref.sam longrefs/longref.tmp.sam_"; + + # CRAM disabled for now as the positions cannot be 32-bit. (These tests are useful for + # checking SQ headers only.) + # testv $opts, "./test_view $tv_args -C -o no_ref -p longrefs/longref.tmp.cram longrefs/longref.sam"; + # testv $opts, "./test_view $tv_args -p longrefs/longref.tmp.sam_ longrefs/longref.tmp.cram"; + # testv $opts, "./compare_sam.pl longrefs/longref.sam longrefs/longref.tmp.sam_"; + + # Build index and compare with on-the-fly one made earlier. + test_compare $opts, "$$opts{path}/test_index -c longrefs/longref.tmp.sam.gz", "longrefs/longref.tmp.sam.gz.csi.otf", "longrefs/longref.tmp.sam.gz.csi", gz=>1; + + # Large position iterator tests + testv $opts, "./test_view $tv_args -p longrefs/longref_itr.tmp.sam longrefs/longref.tmp.sam.gz CHROMOSOME_I:10000000000-10000000003"; + testv $opts, "./compare_sam.pl longrefs/longref_itr.expected.sam longrefs/longref_itr.tmp.sam"; + testv $opts, "./test_view $tv_args -M -p longrefs/longref_multi.tmp.sam longrefs/longref.tmp.sam.gz CHROMOSOME_I:10000000000-10000000003 CHROMOSOME_I:10000000100-10000000110"; + testv $opts, "./compare_sam.pl longrefs/longref_multi.expected.sam longrefs/longref_multi.tmp.sam"; + + # 64-bit positions are currently not compiled in by default for VCF + # # VCF round trip + # unlink("longrefs/index.tmp.vcf.gz.csi"); # To stop vcf_hdr_read from reading a stale index + # testv $opts, "./test_view $tv_args -z -p longrefs/index.tmp.vcf.gz -x longrefs/index.tmp.vcf.gz.csi.otf -m 14 longrefs/index.vcf"; + # testv $opts, "./test_view $tv_args -p longrefs/index.tmp.vcf_ longrefs/index.tmp.vcf.gz"; + # testv $opts, "cmp longrefs/index.vcf longrefs/index.tmp.vcf_"; + # + # # Build index and compare with on-the-fly one made earlier. + # test_compare $opts, "$$opts{path}/test_index -c longrefs/index.tmp.vcf.gz", "longrefs/index.tmp.vcf.gz.csi.otf", "longrefs/index.tmp.vcf.gz.csi", gz=>1; + # + # # test_view can't do indexed look-ups on vcf, but we can use tabix + # test_compare $opts, "$$opts{bin}/tabix longrefs/index.tmp.vcf.gz 1:10010000100-10010000105 > longrefs/index.tmp.tabix1.vcf", "longrefs/index.expected1.vcf", "longrefs/index.tmp.tabix1.vcf", fix_newlines => 1; + # test_compare $opts, "$$opts{bin}/tabix longrefs/index.tmp.vcf.gz 1:10010000120-10010000130 > longrefs/index.tmp.tabix2.vcf", "longrefs/index.expected2.vcf", "longrefs/index.tmp.tabix2.vcf", fix_newlines => 1; + + if ($test_view_failures == 0) { + passed($opts, "large position tests"); + } else { + failed($opts, "large position tests", "$test_view_failures subtests failed"); + } +} + +# Tests CRAM's ability to correctly preserve MD and NM, irrespective of whether +# they are correct. +sub test_MD +{ + my ($opts) = @_; + + foreach my $sam (glob("*#MD*.sam")) { + my ($base, $ref) = ($sam =~ /((.*)#.*)\.sam/); + $ref .= ".fa"; + + my $bam = "$base.tmp.bam"; + my $cram = "$base.tmp.cram"; + + print "\ntest_MD testing $sam, ref $ref:\n"; + $test_view_failures = 0; + $cram = "$base.tmp.cram"; + + # Forcibly store MD and NM and don't auto-generate. + # ALL NM/MD should match and be present only when originally present + testv $opts, "./test_view -o store_nm=1 -o store_md=1 -t $ref -C $sam > $cram"; + testv $opts, "./test_view -i decode_md=0 -D $cram > $cram.sam_"; + testv $opts, "./compare_sam.pl $sam $cram.sam_"; + + # Skip auto-MD generation; check MD iff in output file. + # (NB this does not check that all erroneous values are stored.) + testv $opts, "./test_view -t $ref -C $sam > $cram"; + testv $opts, "./test_view -i decode_md=0 -D $cram > $cram.sam_"; + testv $opts, "./compare_sam.pl -partialmd=2 $sam $cram.sam_"; + + # Also check we haven't added NM or MD needlessly for xx#MD.sam. + # This file has no errors so without auto-generation there must be + # no NM or MD records. + if ($sam eq "xx#MD.sam") { + print " Checking for MD/NM in $sam\n"; + open(my $fh, "<$cram.sam_") || die; + while (<$fh>) { + if (/(MD|NM):/) { + print STDERR "Failed\nLine contains MD/NM:\n$_"; + $test_view_failures++; + last; + } + } + close($fh); + } + + # Force auto-MD generation; check MD iff in input file. + # This will ensure any erroneous values have been round-tripped. + testv $opts, "./test_view -t $ref -C $sam > $cram"; + testv $opts, "./test_view -i decode_md=1 -D $cram > $cram.sam_"; + testv $opts, "./compare_sam.pl -partialmd=1 $sam $cram.sam_"; + + if ($test_view_failures == 0) { + passed($opts, "$sam MD tests"); + } else { + failed($opts, "$sam MD tests", "$test_view_failures subtests failed"); + } + } +} + +sub test_index +{ + my ($opts, $nthreads) = @_; + $nthreads = $nthreads ? "-\@$nthreads" : ""; + + # BAM + test_compare($opts,"$$opts{path}/test_view $nthreads -l 0 -b -m 14 -x $$opts{tmp}/index.bam.csi $$opts{path}/index.sam > $$opts{tmp}/index.bam", "$$opts{tmp}/index.bam.csi", "$$opts{path}/index.bam.csi", gz=>1); + unlink("$$opts{tmp}/index.bam.csi"); + test_compare($opts,"$$opts{path}/test_index -c $$opts{tmp}/index.bam", "$$opts{tmp}/index.bam.csi", "$$opts{path}/index.bam.csi", gz=>1); + test_compare($opts,"$$opts{path}/test_view $nthreads -l 0 -b -m 0 -x $$opts{tmp}/index.bam.bai $$opts{path}/index.sam > $$opts{tmp}/index.bam", "$$opts{tmp}/index.bam.bai", "$$opts{path}/index.bam.bai"); + unlink("$$opts{tmp}/index.bam.bai"); + test_compare($opts,"$$opts{path}/test_index -b $$opts{tmp}/index.bam", "$$opts{tmp}/index.bam.bai", "$$opts{path}/index.bam.bai"); + + # SAM + test_compare($opts,"$$opts{path}/test_view $nthreads -l 0 -z -m 14 -x $$opts{tmp}/index.sam.gz.csi $$opts{path}/index.sam > $$opts{tmp}/index.sam.gz", "$$opts{tmp}/index.sam.gz.csi", "$$opts{path}/index.sam.gz.csi", gz=>1); + unlink("$$opts{tmp}/index.bam.bai"); + test_compare($opts,"$$opts{path}/test_index -c $$opts{tmp}/index.sam.gz", "$$opts{tmp}/index.sam.gz.csi", "$$opts{path}/index.sam.gz.csi", gz=>1); + test_compare($opts,"$$opts{path}/test_view $nthreads -l 0 -z -m 0 -x $$opts{tmp}/index.sam.gz.bai $$opts{path}/index.sam > $$opts{tmp}/index.sam.gz", "$$opts{tmp}/index.sam.gz.bai", "$$opts{path}/index.sam.gz.bai"); + unlink("$$opts{tmp}/index.sam.gz.bai"); + test_compare($opts,"$$opts{path}/test_index -b $$opts{tmp}/index.sam.gz", "$$opts{tmp}/index.sam.gz.bai", "$$opts{path}/index.sam.gz.bai"); + + # SAM DOS LINE ENDINGS (\r\n) + test_compare($opts,"$$opts{path}/test_view $nthreads -l 0 -z -m 14 -x $$opts{tmp}/index.sam.gz.csi $$opts{path}/index_dos.sam > $$opts{tmp}/index.sam.gz", "$$opts{tmp}/index.sam.gz.csi", "$$opts{path}/index.sam.gz.csi", gz=>1); + unlink("$$opts{tmp}/index.bam.bai"); + test_compare($opts,"$$opts{path}/test_index -c $$opts{tmp}/index.sam.gz", "$$opts{tmp}/index.sam.gz.csi", "$$opts{path}/index.sam.gz.csi", gz=>1); + test_compare($opts,"$$opts{path}/test_view $nthreads -l 0 -z -m 0 -x $$opts{tmp}/index.sam.gz.bai $$opts{path}/index_dos.sam > $$opts{tmp}/index.sam.gz", "$$opts{tmp}/index.sam.gz.bai", "$$opts{path}/index.sam.gz.bai"); + unlink("$$opts{tmp}/index.sam.gz.bai"); + test_compare($opts,"$$opts{path}/test_index -b $$opts{tmp}/index.sam.gz", "$$opts{tmp}/index.sam.gz.bai", "$$opts{path}/index.sam.gz.bai"); + + # CRAM + local $ENV{REF_PATH} = $$opts{m5_dir}; + test_compare($opts,"$$opts{path}/test_view $nthreads -l 0 -C -x $$opts{tmp}/index.cram.crai $$opts{path}/index.sam > $$opts{tmp}/index.cram", "$$opts{tmp}/index.cram.crai", "$$opts{path}/index.cram.crai", gz=>1); + unlink("$$opts{tmp}/index.cram.crai"); + test_compare($opts,"$$opts{path}/test_index $$opts{tmp}/index.cram", "$$opts{tmp}/index.cram.crai", "$$opts{path}/index.cram.crai", gz=>1); + + # BCF + test_compare($opts,"$$opts{path}/test_view $nthreads -l 0 -b -m 14 -x $$opts{tmp}/index.bcf.csi $$opts{path}/index.vcf > $$opts{tmp}/index.bcf", "$$opts{tmp}/index.bcf.csi", "$$opts{path}/index.bcf.csi", gz=>1); + unlink("$$opts{tmp}/index.bcf.csi"); + test_compare($opts,"$$opts{path}/test_index -c $$opts{tmp}/index.bcf", "$$opts{tmp}/index.bcf.csi", "$$opts{path}/index.bcf.csi", gz=>1); + + # VCF + test_compare($opts,"$$opts{path}/test_view $nthreads -l 0 -z -m 14 -x $$opts{tmp}/index.vcf.gz.csi $$opts{path}/index.vcf > $$opts{tmp}/index.vcf.gz", "$$opts{tmp}/index.vcf.gz.csi", "$$opts{path}/index.vcf.gz.csi", gz=>1); + unlink("$$opts{tmp}/index.vcf.gz.csi"); + test_compare($opts,"$$opts{path}/test_index -c $$opts{tmp}/index.vcf.gz", "$$opts{tmp}/index.vcf.gz.csi", "$$opts{path}/index.vcf.gz.csi", gz=>1); + test_compare($opts,"$$opts{path}/test_view $nthreads -l 0 -z -m 0 -x $$opts{tmp}/index.vcf.gz.tbi $$opts{path}/index.vcf > $$opts{tmp}/index.vcf.gz", "$$opts{tmp}/index.vcf.gz.tbi", "$$opts{path}/index.vcf.gz.tbi", gz=>1); + unlink("$$opts{tmp}/index.vcf.gz.tbi"); + test_compare($opts,"$$opts{path}/test_index -t $$opts{tmp}/index.vcf.gz", "$$opts{tmp}/index.vcf.gz.tbi", "$$opts{path}/index.vcf.gz.tbi", gz=>1); + + # Tabix and custom index names + _cmd("$$opts{bin}/tabix -fp vcf $$opts{tmp}/index.vcf.gz"); + my $wtmp = $$opts{tmp}; + if ($^O =~ /^msys/) { + $wtmp =~ s/\//\\\\/g; + } + test_cmd($opts,out=>'tabix.out',cmd=>"$$opts{bin}/tabix $wtmp/index.vcf.gz##idx##$wtmp/index.vcf.gz.tbi 1:10000060-10000060"); + + cmd("$$opts{path}/test_view -b -p $$opts{tmp}/index2.bam -x $$opts{tmp}/index2.bam.bai $$opts{path}/index2.sam"); + for (my $tid = 1; $tid <= 2; $tid++) { + for (my $pos = 1; $pos <= 2; $pos++) { + # All queries should return exactly two sequences. + # The input data consists of mapped/unmapped and unmapped/mapped + # in both orders. + # Done verbatim as test_cmd cannot return $out for us to check. + my $test = "$$opts{path}/test_view $$opts{tmp}/index2.bam $tid:${pos}000000-${pos}000000"; + print "test_index:\n\t$test\n"; + my ($ret, $out) = _cmd($test); + if ($ret ne 0) { + failed($opts, $test); + } else { + my $rnum = ($out =~ s/^[^@].*\n//gm); + if ($rnum ne 2) { + failed($opts, $test); + } else { + passed($opts, $test); + } + } + } + } + unlink("$$opts{tmp}/index2.bam"); + unlink("$$opts{tmp}/index2.bam.bai"); +} + +sub test_bcf2vcf +{ + my ($opts) = @_; + test_cmd($opts, + out => "tabix/vcf_file.vcf", + cmd => "$$opts{path}/test_view $$opts{path}/tabix/vcf_file.bcf"); +} + +sub test_vcf_api +{ + my ($opts,%args) = @_; + test_cmd($opts,%args,cmd=>"$$opts{path}/test-vcf-api $$opts{tmp}/test-vcf-api.bcf"); +} + +sub test_vcf_sweep +{ + my ($opts,%args) = @_; + test_cmd($opts,%args,cmd=>"$$opts{path}/test-vcf-sweep $$opts{tmp}/test-vcf-api.bcf"); +} + +sub test_vcf_various +{ + my ($opts, %args) = @_; + + # Excess spaces in header lines + test_cmd($opts, %args, out => "test-vcf-hdr.out", + cmd => "$$opts{bin}/htsfile -ch $$opts{path}/test-vcf-hdr-in.vcf"); + + # Various VCF parsing issues + test_cmd($opts, %args, out => "formatcols.vcf", + cmd => "$$opts{bin}/htsfile -c $$opts{path}/formatcols.vcf"); + test_cmd($opts, %args, out => "noroundtrip-out.vcf", + cmd => "$$opts{bin}/htsfile -c $$opts{path}/noroundtrip.vcf"); + test_cmd($opts, %args, out => "formatmissing-out.vcf", + cmd => "$$opts{bin}/htsfile -c $$opts{path}/formatmissing.vcf"); + test_cmd($opts, %args, out => "vcf_meta_meta.vcf", + cmd => "$$opts{bin}/htsfile -c $$opts{path}/vcf_meta_meta.vcf"); + + # VCF file with contig IDX=1, simulating an edited BCF file + # See htslib issue 1534 + test_cmd($opts, %args, out => "modhdr.expected.vcf", + cmd => "$$opts{path}/test_view $$opts{path}/modhdr.vcf.gz chr22:1-2"); +} + +sub write_multiblock_bgzf { + my ($name, $frags) = @_; + + my $tmp = "$name.tmp"; + open(my $out, '>', $name) || die "Couldn't open $name $!\n"; + for (my $i = 0; $i < @$frags; $i++) { + local $/; + open(my $f, '>', $tmp) || die "Couldn't open $tmp : $!\n"; + print $f $frags->[$i]; + close($f) || die "Error writing to $tmp: $!\n"; + open(my $bgz, '-|', "$$opts{bin}/bgzip -c $tmp") + || die "Couldn't open pipe to bgzip: $!\n"; + my $compressed = <$bgz>; + close($bgz) || die "Error running bgzip\n"; + if ($i < $#$frags) { + # Strip EOF block + $compressed =~ s/\x1f\x8b\x08\x04\x00{5}\xff\x06\x00\x42\x43\x02\x00\x1b\x00\x03\x00{9}$//; + } + print $out $compressed; + } + close($out) || die "Error writing to $name: $!\n"; + unlink($tmp); +} + +sub test_rebgzip +{ + my ($opts, %args) = @_; + + # Write a file that should match the one we ship + my @frags = qw(1 22 333 4444 55555); + my $mb = "$$opts{path}/bgziptest.txt.tmp.gz"; + write_multiblock_bgzf($mb, \@frags); + + # See if it really does match + my ($ret, $out) = _cmd("cmp $mb $$opts{path}/bgziptest.txt.gz"); + + if (!$ret && $out eq '') { # If it does, use the original + test_cmd($opts, %args, out => "bgziptest.txt.gz", + cmd => "$$opts{bin}/bgzip -I $$opts{path}/bgziptest.txt.gz.gzi -c -g $$opts{path}/bgziptest.txt"); + } else { + # Otherwise index the one we just made and test that + print "test_rebgzip: Alternate zlib/deflate library detected\n"; + cmd("$$opts{bin}/bgzip -I $mb.gzi -r $mb"); + test_cmd($opts, %args, out => "bgziptest.txt.tmp.gz", + cmd => "$$opts{bin}/bgzip -I $mb.gzi -c -g $$opts{path}/bgziptest.txt"); + } +} + +sub test_convert_padded_header +{ + my ($opts, %args) = @_; + + $args{out} = "headernul.tmp.cram"; + cmd("$$opts{path}/test_view -t ce.fa -C ce#1.sam > $args{out}"); + + foreach my $nuls (0, 1, 678) { + my $nulsbam = "$$opts{tmp}/headernul$nuls.bam"; + cmd("$$opts{path}/test_view -b -Z $nuls ce#1.sam > $nulsbam"); + test_cmd($opts, %args, + cmd => "$$opts{path}/test_view -t ce.fa -C $nulsbam"); + } +} + +sub test_bcf_sr_sort +{ + my ($opts, %args) = @_; + for (my $i=0; $i<10; $i++) + { + my $seed = int(rand(100000000)); + my $test = 'test-bcf-sr'; + my $cmd = "$$opts{path}/test-bcf-sr.pl -t $$opts{tmp} -s $seed"; + print "$test:\n"; + print "\t$cmd\n"; + my ($ret,$out) = _cmd($cmd); + if ( $ret ) { failed($opts,$test); } + else { passed($opts,$test); } + } +} + +sub test_bcf_sr_no_index { + my ($opts) = @_; + + my $test = "test_bcf_sr_no_index"; + + my $vcfdir = "$$opts{path}/bcf-sr"; + + # Positive test + test_cmd($opts, out => "bcf-sr/merge.noidx.abc.expected.out", + cmd => "$$opts{path}/test-bcf-sr --no-index -p all --args $vcfdir/merge.noidx.a.vcf $vcfdir/merge.noidx.b.vcf $vcfdir/merge.noidx.c.vcf 2> $$opts{tmp}/no_index_1.err"); + + # Check bad input detection + + my @bad_file_tests = (["out-of-order header", + ["merge.noidx.a.vcf", "merge.noidx.hdr_order.vcf"]], + ["out-of-order records", + ["merge.noidx.a.vcf", "merge.noidx.rec_order.vcf"]], + ["out-of-order records", + ["merge.noidx.rec_order.vcf", "merge.noidx.a.vcf"]]); + my $count = 2; + foreach my $test_params (@bad_file_tests) { + my ($badness, $inputs) = @$test_params; + my @ins = map { "$vcfdir/$_" } @$inputs; + + my $cmd = "$$opts{path}/test-bcf-sr --no-index -p all --args @ins > $$opts{tmp}/no_index_$count.out 2> $$opts{tmp}/no_index_$count.err"; + print "$test:\n\t$cmd (expected fail)\n"; + my ($ret) = _cmd($cmd); + if ($ret == 0) { + failed($opts, $test, "Failed to detect $badness: $cmd\n"); + } else { + passed($opts, $test); + } + $count++; + } +} + +sub test_bcf_sr_range { + my ($opts) = @_; + + my $test = "test_bcf_sr_range"; + + my $vcfdir = "$$opts{path}/bcf-sr"; + + my @tests = (['r', '1', 'weird-chr-names.vcf', 'weird-chr-names.1.out'], + ['r', '1:1-2', 'weird-chr-names.vcf', 'weird-chr-names.1.out'], + ['r', '1:1,1:2', 'weird-chr-names.vcf', 'weird-chr-names.1.out'], + ['r', '1:1-1', 'weird-chr-names.vcf', 'weird-chr-names.2.out'], + ['r', '{1:1}', 'weird-chr-names.vcf', 'weird-chr-names.3.out'], + ['r', '{1:1}:1-2', 'weird-chr-names.vcf', 'weird-chr-names.3.out'], + ['r', '{1:1}:1,{1:1}:2', 'weird-chr-names.vcf', 'weird-chr-names.3.out'], + ['r', '{1:1}:1-1', 'weird-chr-names.vcf', 'weird-chr-names.4.out'], + ['r', '{1:1-1}', 'weird-chr-names.vcf', 'weird-chr-names.5.out'], + ['r', '{1:1-1}:1-2', 'weird-chr-names.vcf', 'weird-chr-names.5.out'], + ['r', '{1:1-1}:1,{1:1-1}:2', 'weird-chr-names.vcf', 'weird-chr-names.5.out'], + ['r', '{1:1-1}:1-1', 'weird-chr-names.vcf', 'weird-chr-names.6.out'], + ['r', '{1:1-1}-2', 'weird-chr-names.vcf', undef], # Expected failure + ['t', '1', 'weird-chr-names.vcf', 'weird-chr-names.1.out'], + ['t', '1:1-2', 'weird-chr-names.vcf', 'weird-chr-names.1.out'], + ['t', '1:1,1:2', 'weird-chr-names.vcf', 'weird-chr-names.1.out'], + ['t', '1:1-1', 'weird-chr-names.vcf', 'weird-chr-names.2.out'], + ['t', '{1:1}', 'weird-chr-names.vcf', 'weird-chr-names.3.out'], + ['t', '{1:1}:1-2', 'weird-chr-names.vcf', 'weird-chr-names.3.out'], + ['t', '{1:1}:1,{1:1}:2', 'weird-chr-names.vcf', 'weird-chr-names.3.out'], + ['t', '{1:1}:1-1', 'weird-chr-names.vcf', 'weird-chr-names.4.out'], + ['t', '{1:1-1}', 'weird-chr-names.vcf', 'weird-chr-names.5.out'], + ['t', '{1:1-1}:1-2', 'weird-chr-names.vcf', 'weird-chr-names.5.out'], + ['t', '{1:1-1}:1,{1:1-1}:2', 'weird-chr-names.vcf', 'weird-chr-names.5.out'], + ['t', '{1:1-1}:1-1', 'weird-chr-names.vcf', 'weird-chr-names.6.out'], + ['t', '{1:1-1}-2', 'weird-chr-names.vcf', undef] # Expected failure + ); + + my $count = 0; + my %converted; + foreach my $tst (@tests) { + my ($option, $range, $in, $exp_out) = @$tst; + $count++; + if (!$converted{$in}) { + my $cmd = "$$opts{path}/test_view -b -p $$opts{tmp}/$in.bcf -x $$opts{tmp}/$in.bcf.csi $vcfdir/$in"; + print "$test:\n\t$cmd\n"; + my ($ret) = _cmd($cmd); + if ($ret) { + failed($opts, $test); + $converted{$in} = 'fail'; + next; + } else { + passed($opts, $test); + $converted{$in} = "$$opts{tmp}/$in.bcf"; + } + } + next if ($converted{$in} eq 'fail'); + my $cmd = "$$opts{path}/test-bcf-sr -O vcf -o $$opts{tmp}/range_test_$count.out.vcf -$option '$range' --args $converted{$in}"; + if ($exp_out) { + test_compare($opts, $cmd, "$vcfdir/$exp_out", + "$$opts{tmp}/range_test_$count.out.vcf", + fix_newlines => 1); + } else { + print "$test:\n\t$cmd (expected fail)\n"; + my ($ret) = _cmd($cmd); + if ($ret) { + passed($opts, $test); + } else { + failed($opts, $test); + } + } + } +} + +sub test_command +{ + my ($opts, %args) = @_; + my $cmd = "$$opts{path}/$args{cmd}"; + test_cmd($opts, %args, cmd=>$cmd); +} + +sub test_logging +{ + my ($opts) = @_; + my $test = 'test-logging'; + my $cmd = "$$opts{path}/test-logging.pl"; + print "$test:\n"; + print "\t$cmd\n"; + my ($ret,$out) = _cmd($cmd); + if ( $ret ) { + print $out; + failed($opts,$test); + } + else { passed($opts,$test); } +} + +sub test_plugin_loading { + my ($opts) = @_; + + my $test = "test_plugin_loading"; + + unless (-e "$$opts{bin}/hfile_libcurl.so" || -e "$$opts{bin}/hfile_libcurl.bundle") { + print "$test: .. skipping\n\n"; + return; + } + + # Test that plugins can be loaded from an executable statically linked to libhts.a + my $url = "https://localhost:99999/invalid_port"; + my $cmd = "HTS_PATH=$$opts{bin} $$opts{path}/with-shlib.sh $$opts{bin}/htsfile $url"; + print "$test:\n\t$cmd\n"; + my ($ret, $out) = _cmd("$cmd 2>&1"); + if ($ret == 0) { failed($opts, $test, "successful exit status"); } + elsif ($out =~ /couldn't register/i || $out =~ /not supported/i) { failed($opts, $test, $out); } + else { passed($opts, $test); } +} + +sub test_realn { + my ($opts) = @_; + + my $test_realn = "$$opts{path}/test_realn"; + # Calculate BAQ + test_cmd($opts, cmd => "$test_realn -f $$opts{path}/realn01.fa -i $$opts{path}/realn01.sam -o -", out => "realn01_exp.sam"); + test_cmd($opts, cmd => "$test_realn -f $$opts{path}/realn02.fa -i $$opts{path}/realn02.sam -o -", out => "realn02_exp.sam"); + + # Calculate and apply BAQ + test_cmd($opts, cmd => "$test_realn -a -f $$opts{path}/realn01.fa -i $$opts{path}/realn01.sam -o -", out => "realn01_exp-a.sam"); + test_cmd($opts, cmd => "$test_realn -a -f $$opts{path}/realn02.fa -i $$opts{path}/realn02.sam -o -", out => "realn02_exp-a.sam"); + + # Calculate extended BAQ + test_cmd($opts, cmd => "$test_realn -e -f $$opts{path}/realn01.fa -i $$opts{path}/realn01.sam -o -", out => "realn01_exp-e.sam"); + test_cmd($opts, cmd => "$test_realn -e -f $$opts{path}/realn02.fa -i $$opts{path}/realn02.sam -o -", out => "realn02_exp-e.sam"); + + # Recalculate BAQ + test_cmd($opts, cmd => "$test_realn -r -f $$opts{path}/realn02.fa -i $$opts{path}/realn02-r.sam -o -", out => "realn02_exp.sam"); + + # Apply from existing BQ tags + test_cmd($opts, cmd => "$test_realn -a -f $$opts{path}/realn02.fa -i $$opts{path}/realn02_exp.sam -o -", out => "realn02_exp-a.sam"); + + # Revert quality values (using data in ZQ tags) + test_cmd($opts, cmd => "$test_realn -f $$opts{path}/realn02.fa -i $$opts{path}/realn02_exp-a.sam -o -", out => "realn02_exp.sam"); + + # Make sure multiple matches are treated the same way as a single match of the same length. + test_cmd($opts, cmd => "$test_realn -f $$opts{path}/realn03.fa -e -i $$opts{path}/realn03.sam -o -", out => "realn03_exp.sam"); +} + +sub test_bcf_set_variant_type +{ + my ($opts) = @_; + my $test = 'test-bcf_set_variant_type'; + my $cmd = "$$opts{path}/test-bcf_set_variant_type"; + print "$test:\n"; + print "\t$cmd\n"; + my ($ret,$out) = _cmd($cmd); + if ( $ret ) { + print $out; + failed($opts,$test); + } else { passed($opts,$test); } +} + +sub run_annot_tsv +{ + my ($opts,%args) = @_; + my $exe = "$$opts{bin}/annot-tsv"; + my $dat = "$$opts{path}/annot-tsv"; + my $args = exists($args{args}) ? $args{args} : ''; + $args{out} = "annot-tsv/$args{out}"; + test_cmd($opts,%args,cmd=>"$exe $args -s $dat/$args{src} -t $dat/$args{dst}"); + test_cmd($opts,%args,cmd=>"cat $dat/$args{dst} | $exe $args -s $dat/$args{src}"); + test_cmd($opts,%args,cmd=>"cat $dat/$args{src} | $exe $args -t $dat/$args{dst}"); +} + +sub test_annot_tsv +{ + my ($opts) = @_; + run_annot_tsv($opts,src=>'src.1.txt',dst=>'dst.1.txt',out=>'out.1.1.txt',args=>'-f smpl:overlap --allow-dups'); + run_annot_tsv($opts,src=>'src.1.txt',dst=>'dst.1.txt',out=>'out.1.2.txt',args=>'-f smpl:overlap'); + run_annot_tsv($opts,src=>'src.1.txt',dst=>'dst.1.txt',out=>'out.1.2.txt',args=>'-f smpl:overlap -c chr,beg,end'); + run_annot_tsv($opts,src=>'src.1.txt',dst=>'dst.1.txt',out=>'out.1.3.txt',args=>'-f smpl,value:overlap,value'); + run_annot_tsv($opts,src=>'src.1.txt',dst=>'dst.1.txt',out=>'out.1.4.txt',args=>'-f smpl:overlap -O 0.5'); + run_annot_tsv($opts,src=>'src.1.txt',dst=>'dst.1.txt',out=>'out.1.5.txt',args=>'-f smpl:overlap -rO 0.5'); + run_annot_tsv($opts,src=>'src.1.txt',dst=>'dst.1.txt',out=>'out.1.6.txt',args=>'-f smpl:overlap --allow-dups --max-annots 2'); + run_annot_tsv($opts,src=>'src.2.txt',dst=>'dst.2.txt',out=>'out.2.1.txt',args=>'-c 1,2,3:1,2,3 -f 4:5 --allow-dups'); + run_annot_tsv($opts,src=>'src.2.txt',dst=>'dst.2.txt',out=>'out.2.2.txt',args=>'-c 1,2,3:1,2,3 -f 4:5'); + run_annot_tsv($opts,src=>'src.2.txt',dst=>'dst.2.txt',out=>'out.2.3.txt',args=>'-c 1,2,3:1,2,3 -f 4,value:5,value'); + run_annot_tsv($opts,src=>'src.2.txt',dst=>'dst.2.txt',out=>'out.2.4.txt',args=>'-c 1,2,3:1,2,3 -f value,4:value,5'); + run_annot_tsv($opts,src=>'src.2.txt',dst=>'dst.2.txt',out=>'out.2.5.txt',args=>'-c 1,2,3:1,2,3 -f value,4:value,5 -a nbp,frac'); + run_annot_tsv($opts,src=>'src.2.txt',dst=>'dst.2.txt',out=>'out.2.6.txt',args=>'-c 1,2,3:1,2,3 -f 4:5 --allow-dups --max-annots 2'); + run_annot_tsv($opts,src=>'src.3.txt',dst=>'dst.3.txt',out=>'out.3.1.txt',args=>'-f smpl:overlap -a nbp,frac'); + run_annot_tsv($opts,src=>'src.4.txt',dst=>'dst.4.txt',out=>'out.4.1.txt',args=>'-c 2,3,4:2,3,4 -m 1:1 -f 1:1 -a nbp,frac'); + run_annot_tsv($opts,src=>'src.5.txt',dst=>'dst.5.txt',out=>'out.5.1.txt',args=>'-c 2,3,4:2,3,4 -a nbp,frac'); + run_annot_tsv($opts,src=>'src.6.txt',dst=>'dst.6.txt',out=>'out.6.1.txt',args=>'-c 1,2,2:1,2,2 -a nbp'); + run_annot_tsv($opts,src=>'src.7.txt',dst=>'dst.7.txt',out=>'out.7.1.txt',args=>'-c 1,2,2:1,2,2 -f overlap -H'); + run_annot_tsv($opts,src=>'src.8.txt',dst=>'dst.8.txt',out=>'out.8.1.txt',args=>'-c chr,beg,end:chr,start,end -m sample -f is_tp'); + run_annot_tsv($opts,src=>'src.9.txt',dst=>'dst.9.txt',out=>'out.9.1.txt',args=>'-c 1,2,3:chr,beg,end -a nbp,frac,cnt'); + run_annot_tsv($opts,src=>'src.10.txt',dst=>'dst.10.txt',out=>'out.10.1.txt',args=>'-f smpl'); + run_annot_tsv($opts,src=>'src.10.txt',dst=>'dst.10.txt',out=>'out.10.2.txt',args=>''); + run_annot_tsv($opts,src=>'src.10.txt',dst=>'dst.10.txt',out=>'out.10.3.txt',args=>'-x'); + run_annot_tsv($opts,src=>'src.10.txt',dst=>'dst.10.txt',out=>'out.10.4.txt',args=>'-m smpl -f smpl'); + run_annot_tsv($opts,src=>'src.10.txt',dst=>'dst.10.txt',out=>'out.10.5.txt',args=>'-m smpl '); + run_annot_tsv($opts,src=>'src.10.txt',dst=>'dst.10.txt',out=>'out.10.6.txt',args=>'-m smpl -x'); + run_annot_tsv($opts,src=>'src.11.txt',dst=>'dst.11.txt',out=>'out.11.1.txt',args=>'-c 1,2,3:1,2,3 -f 4:5 -h 0:0'); + run_annot_tsv($opts,src=>'src.11.txt',dst=>'dst.11.txt',out=>'out.11.1.txt',args=>'-c chr1,beg1,end1:chr,beg,end -f smpl1:src_smpl -h 2:2 -II'); + run_annot_tsv($opts,src=>'src.11.txt',dst=>'dst.11.txt',out=>'out.11.1.txt',args=>'-c chr1,beg1,end1:chr,beg,end -f smpl1:src_smpl -h 2:-1 -II'); + run_annot_tsv($opts,src=>'src.11.txt',dst=>'dst.11.txt',out=>'out.11.2.txt',args=>'-c chr1,beg1,end1:chr,beg,end -f smpl1:src_smpl -h 2:2'); + run_annot_tsv($opts,src=>'src.11.txt',dst=>'dst.11.txt',out=>'out.11.2.txt',args=>'-c chr2,beg2,end2:chr,beg,end -f smpl2:src_smpl -h 3:2'); + run_annot_tsv($opts,src=>'src.11.txt',dst=>'dst.11.txt',out=>'out.11.3.txt',args=>'-c chr1,beg1,end1:chr,beg,end -f smpl1:src_smpl -h 2:2 -I'); + run_annot_tsv($opts,src=>'src.11.txt',dst=>'dst.11.txt',out=>'out.11.3.txt',args=>'-c chr2,beg2,end2:chr,beg,end -f smpl2:src_smpl -h 3:2 -I'); + run_annot_tsv($opts,src=>'src.12.txt',dst=>'dst.12.txt',out=>'out.12.1.txt',args=>'-c 1,2,3:1,2,3 -f 4:5 -h 0:0 -d ,'); + run_annot_tsv($opts,src=>'src.12.txt',dst=>'dst.11.txt',out=>'out.11.1.txt',args=>q[-c 1,2,3:1,2,3 -f 4:5 -h 0:0 -d $',:\t']); + run_annot_tsv($opts,src=>'src.13.txt',dst=>'src.13.txt',out=>'out.13.1.txt',args=>q[-c 1,2,3 -f 4:5]); + run_annot_tsv($opts,src=>'src.13.txt',dst=>'src.13.txt',out=>'out.13.1.txt',args=>q[-c 1,2,3 -f 4:5 -O 0.5]); + run_annot_tsv($opts,src=>'src.13.txt',dst=>'src.13.txt',out=>'out.13.2.txt',args=>q[-c 1,2,3 -f 4:5 -O 0.5 -r]); + run_annot_tsv($opts,src=>'src.13.txt',dst=>'src.13.txt',out=>'out.13.2.txt',args=>q[-c 1,2,3 -f 4:5 -O 0.5,0.5]); + run_annot_tsv($opts,src=>'src.13.txt',dst=>'src.13.txt',out=>'out.13.3.txt',args=>q[-c 1,2,3 -f 4:5 -O 0,1]); + run_annot_tsv($opts,src=>'src.13.txt',dst=>'src.13.txt',out=>'out.13.4.txt',args=>q[-c 1,2,3 -f 4:5 -O 1,0]); +} diff --git a/src/htslib-1.21/test/test_bgzf.c b/src/htslib-1.21/test/test_bgzf.c new file mode 100644 index 0000000..bda68d1 --- /dev/null +++ b/src/htslib-1.21/test/test_bgzf.c @@ -0,0 +1,1162 @@ +/* test/test_bgzf.c -- bgzf unit tests + + Copyright (C) 2017, 2019, 2022-2024 Genome Research Ltd + + Author: Robert Davies + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../htslib/bgzf.h" +#include "../htslib/hfile.h" +#include "../htslib/hts_log.h" +#include "../hfile_internal.h" + +const char *bgzf_suffix = ".gz"; +const char *idx_suffix = ".gzi"; +const char *tmp_suffix = ".tmp"; + +#define BUFSZ 32768 + +typedef struct { + char *src_plain; + char *src_bgzf; + char *src_idx; + char *tmp_bgzf; + char *tmp_idx; + FILE *f_plain; + FILE *f_bgzf; + FILE *f_idx; + const unsigned char *text; + size_t ltext; +} Files; + +typedef enum { + USE_BGZF_OPEN, + USE_BGZF_DOPEN, + USE_BGZF_HOPEN +} Open_method; + +static FILE * try_fopen(const char *name, const char *mode) { + FILE *f = fopen(name, mode); + if (!f) { + fprintf(stderr, "Couldn't open %s : %s\n", name, strerror(errno)); + return NULL; + } + return f; +} + +static int try_fclose(FILE **file, const char *name, const char *func) { + FILE *to_close = *file; + *file = NULL; + if (fclose(to_close) != 0) { + fprintf(stderr, "%s : Error on closing %s : %s\n", + func, name, strerror(errno)); + return -1; + } + + return 0; +} + +static ssize_t try_fread(FILE *in, void *buf, size_t len, + const char *func, const char *fname) { + size_t got = fread(buf, 1, len, in); + if (got == 0 && ferror(in)) { + fprintf(stderr, "%s : Error reading from %s : %s\n", + func, fname, strerror(errno)); + return -1; + } + return got; +} + +static int try_fseek_start(FILE *f, const char *name, const char *func) { + if (0 != fseek(f, 0, SEEK_SET)) { + fprintf(stderr, "%s : Couldn't seek on %s : %s\n", + func, name, strerror(errno)); + return -1; + } + return 0; +} + +static BGZF * try_bgzf_open(const char *name, const char *mode, + const char *func) { + BGZF * bgz = bgzf_open(name, mode); + if (!bgz) { + fprintf(stderr, "%s : Couldn't bgzf_open %s with mode %s : %s\n", + func, name, mode, strerror(errno)); + return NULL; + } + return bgz; +} + +static BGZF * try_bgzf_dopen(const char *name, const char *mode, + const char *func) { + BGZF *bgz = NULL; + int fd = open(name, hfile_oflags(mode), 0666); + if (fd < 0) { + fprintf(stderr, "%s : Failed to open %s with mode %s : %s\n", + func, name, mode, strerror(errno)); + return NULL; + } + + bgz = bgzf_dopen(fd, mode); + if (!bgz) { + fprintf(stderr, "%s : bgzf_dopen failed on %s mode %s : %s\n", + func, name, mode, strerror(errno)); + close(fd); + return NULL; + } + + return bgz; +} + +static BGZF * try_bgzf_hopen(const char *name, const char *mode, + const char *func) { + hFILE *hfp = hopen(name, mode); + BGZF *bgz = NULL; + + if (!hfp) { + fprintf(stderr, "%s : hopen failed on %s mode %s : %s\n", + func, name, mode, strerror(errno)); + return NULL; + } + + bgz = bgzf_hopen(hfp, mode); + if (!bgz) { + fprintf(stderr, "%s : bgzf_hopen failed on %s mode %s : %s\n", + func, name, mode, strerror(errno)); + hclose_abruptly(hfp); + return NULL; + } + + return bgz; +} + +static int try_bgzf_close(BGZF **bgz, const char *name, const char *func, int expected_fail) { + BGZF *to_close = *bgz; + *bgz = NULL; + if (bgzf_close(to_close) != 0) { + if (!expected_fail) + fprintf(stderr, "%s : bgzf_close failed on %s%s%s\n", + func, name, + errno ? " : " : "", + errno ? strerror(errno) : ""); + return -1; + } else if (expected_fail) { + fprintf(stderr, "%s : bgzf_close worked on %s, but expected failure\n", + func, name); + } + return 0; +} + +static ssize_t try_bgzf_read(BGZF *fp, void *data, size_t length, + const char *name, const char *func) { + ssize_t got = bgzf_read_small(fp, data, length); + if (got < 0) { + fprintf(stderr, "%s : Error from bgzf_read %s : %s\n", + func, name, strerror(errno)); + } + return got; +} + +static ssize_t try_bgzf_write(BGZF *fp, const void *data, size_t length, + const char *name, const char *func) { + ssize_t put = bgzf_write_small(fp, data, length); + if (put < (ssize_t) length) { + fprintf(stderr, "%s : %s %s : %s\n", + func, put < 0 ? "Error writing to" : "Short write on", + name, strerror(errno)); + return -1; + } + + return put; +} + +static int try_bgzf_compression(BGZF *fp, int expect, + const char *name, const char *func) { + int res = bgzf_compression(fp); + if (res != expect) { + fprintf(stderr, + "%s : Unexpected result %d from bgzf_compression on %s; " + "expected %d\n", + func, res, name, expect); + return -1; + } + return 0; +} + +static int try_bgzf_mt(BGZF *bgz, int nthreads, const char *func) { + if (bgzf_mt(bgz, nthreads, 64) != 0) { + fprintf(stderr, "%s : Error from bgzf_mt : %s\n", + func, strerror(errno)); + return -1; + } + return 0; +} + +static int try_bgzf_index_build_init(BGZF *bgz, + const char *name, const char *func) { + if (bgzf_index_build_init(bgz) != 0) { + fprintf(stderr, "%s : Error from bgzf_index_build_init on %s : %s\n", + func, name, strerror(errno)); + return -1; + } + return 0; +} + +static int try_bgzf_index_load(BGZF *fp, const char *bname, const char *suffix, + const char *func) { + if (bgzf_index_load(fp, bname, suffix) != 0) { + fprintf(stderr, "%s : Couldn't bgzf_index_load %s%s : %s\n", + func, bname, suffix ? suffix : "", strerror(errno)); + return -1; + } + return 0; +} + +static int try_bgzf_index_dump(BGZF *fp, const char *bname, const char *suffix, + const char *func) { + if (bgzf_index_dump(fp, bname, suffix) != 0) { + fprintf(stderr, "%s : Couldn't bgzf_index_dump %s%s : %s\n", + func, bname, suffix ? suffix : "", strerror(errno)); + return -1; + } + return 0; +} + +static int64_t try_bgzf_tell(BGZF *fp, const char *name, const char *func) { + int64_t told = bgzf_tell(fp); + if (told < 0) { + fprintf(stderr, "%s : %s %s : %s\n", + func, "Error telling in", + name, strerror(errno)); + return -1; + } + + return told; +} + +static int64_t try_bgzf_tell_expect(BGZF *fp, int64_t expected, const char *name, const char *func) { + int64_t told = try_bgzf_tell(fp, name, func); + if (told != expected) { + fprintf(stderr, "%s : Unexpected value (%" PRId64 ") from bgzf_tell on %s; " + "expected %" PRId64 "\n", + func, told, name, expected); + return -1; + } + return told; +} + +static int try_bgzf_seek(BGZF *fp, int64_t pos, int whence, + const char *name, const char *func) { + if (bgzf_seek(fp, pos, whence) < 0) { + fprintf(stderr, "%s : Error from bgzf_seek(%s, %" PRId64 ", %d) : %s\n", + func, name, pos, whence, strerror(errno)); + return -1; + } + return 0; +} + +static int try_bgzf_useek(BGZF *fp, long uoffset, int where, + const char *name, const char *func) { + if (bgzf_useek(fp, uoffset, where) < 0) { + fprintf(stderr, "%s : Error from bgzf_useek(%s, %ld, %d) : %s\n", + func, name, uoffset, where, strerror(errno)); + return -1; + } + return 0; +} + +static int try_bgzf_getc(BGZF *fp, size_t pos, int expected, + const char *name, const char *func) { + int c = bgzf_getc(fp); + if (c != expected) { + fprintf(stderr, + "%s : Unexpected value (%d) from bgzf_getc on %s pos %zu; " + "expected %d\n", + func, c, name, pos, expected); + return -1; + } + return c; +} + +static int try_skip(BGZF *fp, size_t count, + const char *name, const char *func) { + size_t i; + int c; + for (i = 0; i < count; i++) { + c = bgzf_getc(fp); + if (c < 0) { + fprintf(stderr, + "%s : Error from bgzf_getc on %s\n", + func, name); + return -1; + } + } + return 0; +} + +static int compare_buffers(const unsigned char *b1, const unsigned char *b2, + size_t l1, size_t l2, + const char *name1, const char *name2, + const char *func) { + if (l1 != l2) { + fprintf(stderr, "%s : EOF on %s\n", func, l1 < l2 ? name1 : name2); + return -1; + } + if (memcmp(b1, b2, l1) != 0) { + fprintf(stderr, "%s : difference between %s and %s\n", + func, name1, name2); + return -1; + } + + return 0; +} + +static void cleanup(Files *f, int retval) { + /* Remove temp files if successful. If not, leave them for inspection */ + if (retval == EXIT_SUCCESS) { + unlink(f->tmp_bgzf); + unlink(f->tmp_idx); + } + if (f->f_plain) fclose(f->f_plain); + if (f->f_bgzf) fclose(f->f_bgzf); + if (f->f_idx) fclose(f->f_idx); + free(f->src_plain); + free((unsigned char *) f->text); +} + +static int setup(const char *src, Files *f) { + size_t len = (strlen(src) + strlen(bgzf_suffix) + strlen(idx_suffix) + + strlen(tmp_suffix) + 8); + char *mem, *text; + const unsigned int max = 50000; + unsigned int i; + size_t text_sz = max * 8 + 1; + + mem = calloc(5, len); + if (mem == NULL) { + perror(__func__); + return -1; + } + + snprintf(mem, len, "%s", src); + snprintf(mem + len * 1, len, "%s%s", src, bgzf_suffix); + snprintf(mem + len * 2, len, "%s%s%s", src, bgzf_suffix, idx_suffix); + snprintf(mem + len * 3, len, "%s%s%s", src, tmp_suffix, bgzf_suffix); + snprintf(mem + len * 4, len, "%s%s%s%s", + src, tmp_suffix, bgzf_suffix, idx_suffix); + + f->src_plain = mem; + f->src_bgzf = mem + len * 1; + f->src_idx = mem + len * 2; + f->tmp_bgzf = mem + len * 3; + f->tmp_idx = mem + len * 4; + + text = malloc(text_sz); + if (!text) { + perror(__func__); + goto fail; + } + for (i = 0; i < max; i++) snprintf(text + i*8, text_sz - i*8, "%07u\n", i); + f->text = (unsigned char *) text; + f->ltext = text_sz - 1; + + if ((f->f_plain = try_fopen(f->src_plain, "rb")) == NULL) goto fail; + if ((f->f_bgzf = try_fopen(f->src_bgzf, "rb")) == NULL) goto fail; + if ((f->f_idx = try_fopen(f->src_idx, "rb")) == NULL) goto fail; + + return 0; + + fail: + return -1; +} + +static int test_read(Files *f) { + BGZF* bgz; + ssize_t bg_got, f_got; + unsigned char bg_buf[BUFSZ], f_buf[BUFSZ]; + + errno = 0; + bgz = try_bgzf_open(f->src_bgzf, "r", __func__); + if (!bgz) return -1; + + do { + bg_got = try_bgzf_read(bgz, bg_buf, BUFSZ, f->src_bgzf, __func__); + if (bg_got < 0) goto fail; + + f_got = try_fread(f->f_plain, f_buf, BUFSZ, __func__, f->src_plain); + if (f_got < 0) goto fail; + + if (compare_buffers(f_buf, bg_buf, f_got, bg_got, + f->src_plain, f->src_bgzf, __func__) != 0) { + goto fail; + } + } while (bg_got > 0 && f_got > 0); + + if (try_bgzf_close(&bgz, f->src_bgzf, __func__, 0) != 0) return -1; + if (try_fseek_start(f->f_plain, f->src_plain, __func__) != 0) return -1; + + return 0; + + fail: + if (bgz) bgzf_close(bgz); + return -1; +} + +static int test_write_read(Files *f, const char *mode, Open_method method, + int nthreads, int expected_compression) { + BGZF* bgz = NULL; + ssize_t bg_put, bg_got; + size_t pos = 0; + unsigned char bg_buf[BUFSZ]; + + switch (method) { + case USE_BGZF_DOPEN: + bgz = try_bgzf_dopen(f->tmp_bgzf, mode, __func__); + break; + case USE_BGZF_HOPEN: + bgz = try_bgzf_hopen(f->tmp_bgzf, mode, __func__); + break; + default: + bgz = try_bgzf_open(f->tmp_bgzf, mode, __func__); + break; + } + if (!bgz) goto fail; + + if (nthreads > 0 && try_bgzf_mt(bgz, nthreads, __func__) != 0) goto fail; + + bg_put = try_bgzf_write(bgz, f->text, f->ltext, f->tmp_bgzf, __func__); + if (bg_put < 0) goto fail; + + if (try_bgzf_close(&bgz, f->tmp_bgzf, __func__, 0) != 0) goto fail; + + switch (method) { + case USE_BGZF_DOPEN: + bgz = try_bgzf_dopen(f->tmp_bgzf, "r", __func__); + break; + case USE_BGZF_HOPEN: + bgz = try_bgzf_hopen(f->tmp_bgzf, "r", __func__); + break; + default: + bgz = try_bgzf_open(f->tmp_bgzf, "r", __func__); + break; + } + if (!bgz) goto fail; + + if (nthreads > 0 && try_bgzf_mt(bgz, nthreads, __func__) != 0) goto fail; + + if (try_bgzf_compression(bgz, expected_compression, + f->tmp_bgzf, __func__) != 0) { + goto fail; + } + + do { + bg_got = try_bgzf_read(bgz, bg_buf, BUFSZ, f->tmp_bgzf, __func__); + if (bg_got < 0) goto fail; + + if (pos < f->ltext && + memcmp(f->text + pos, bg_buf, + pos + bg_got < f->ltext ? bg_got : f->ltext - pos) != 0) { + fprintf(stderr, "%s : Got wrong data from %s, pos %zu\n", + __func__, f->tmp_bgzf, pos); + goto fail; + } + pos += bg_got; + } while (bg_got > 0); + + if (pos != bg_put) { + fprintf(stderr, "%s : bgzf_read got %zd bytes; expected %zd\n", + __func__, pos, bg_put); + goto fail; + } + + if (try_bgzf_close(&bgz, f->tmp_bgzf, __func__, 0) != 0) goto fail; + + return 0; + + fail: + if (bgz) bgzf_close(bgz); + return -1; +} + +static int test_embed_eof(Files *f, const char *mode, int nthreads) { + BGZF* bgz = NULL; + ssize_t bg_put, bg_got; + size_t pos = 0, half = BUFSZ < f->ltext ? BUFSZ : f->ltext / 2; + char append_mode[16]; + unsigned char bg_buf[BUFSZ]; + + for (pos = 0; pos < sizeof(append_mode) - 1 && mode[pos] != 0; pos++) { + append_mode[pos] = mode[pos] == 'w' ? 'a' : mode[pos]; + } + append_mode[pos] ='\0'; + + // Write first half + bgz = try_bgzf_open(f->tmp_bgzf, mode, __func__); + if (!bgz) goto fail; + + if (nthreads > 0 && try_bgzf_mt(bgz, nthreads, __func__) != 0) goto fail; + + bg_put = try_bgzf_write(bgz, f->text, half, f->tmp_bgzf, __func__); + if (bg_put < 0) goto fail; + + if (try_bgzf_close(&bgz, f->tmp_bgzf, __func__, 0) != 0) goto fail; + + + // Write second half. Append mode, so an EOF block should be in the + // middle. + bgz = try_bgzf_open(f->tmp_bgzf, append_mode, __func__); + if (!bgz) goto fail; + + if (nthreads > 0 && try_bgzf_mt(bgz, nthreads, __func__) != 0) goto fail; + + bg_put = try_bgzf_write(bgz, f->text + half, f->ltext - half, f->tmp_bgzf, + __func__); + if (bg_put < 0) goto fail; + + if (try_bgzf_close(&bgz, f->tmp_bgzf, __func__, 0) != 0) goto fail; + + // Try reading + pos = 0; + bgz = try_bgzf_open(f->tmp_bgzf, "r", __func__); + if (!bgz) goto fail; + + if (nthreads > 0 && try_bgzf_mt(bgz, nthreads, __func__) != 0) goto fail; + + do { + bg_got = try_bgzf_read(bgz, bg_buf, BUFSZ, f->tmp_bgzf, __func__); + if (bg_got < 0) goto fail; + + if (pos < f->ltext && + memcmp(f->text + pos, bg_buf, + pos + bg_got < f->ltext ? bg_got : f->ltext - pos) != 0) { + fprintf(stderr, "%s : Got wrong data from %s, pos %zu\n", + __func__, f->tmp_bgzf, pos); + goto fail; + } + pos += bg_got; + } while (bg_got > 0); + + if (pos != f->ltext) { + fprintf(stderr, "%s : bgzf_read got %zd bytes; expected %zd\n", + __func__, pos, f->ltext); + goto fail; + } + + if (try_bgzf_close(&bgz, f->tmp_bgzf, __func__, 0) != 0) goto fail; + + return 0; + + fail: + if (bgz) bgzf_close(bgz); + return -1; +} + +static int test_index_load_dump(Files *f) { + BGZF* bgz = NULL; + FILE *fdest = NULL; + unsigned char buf_src[BUFSZ], buf_dest[BUFSZ]; + ssize_t got_src, got_dest; + + bgz = try_bgzf_open(f->src_bgzf, "r", __func__); + if (!bgz) return -1; + + if (try_bgzf_index_load(bgz, f->src_bgzf, idx_suffix, __func__) != 0) { + goto fail; + } + + if (try_bgzf_index_dump(bgz, f->tmp_bgzf, idx_suffix, __func__) != 0) { + goto fail; + } + + fdest = try_fopen(f->tmp_idx, "r"); + do { + got_src = try_fread(f->f_idx, buf_src, BUFSZ, __func__, f->src_idx); + if (got_src < 0) goto fail; + got_dest = try_fread(fdest, buf_dest, BUFSZ, __func__, f->tmp_idx); + if (got_dest < 0) goto fail; + if (compare_buffers(buf_src, buf_dest, got_src, got_dest, + f->src_idx, f->tmp_idx, __func__) != 0) goto fail; + } while (got_src > 0 && got_dest > 0); + if (try_fclose(&fdest, f->tmp_idx, __func__) != 0) goto fail; + + if (try_bgzf_close(&bgz, f->src_bgzf, __func__, 0) != 0) goto fail; + + return 0; + + fail: + if (fdest) fclose(fdest); + if (bgz) bgzf_close(bgz); + return -1; +} + +static int test_check_EOF(char *name, int expected) { + BGZF *bgz = try_bgzf_open(name, "r", __func__); + int eof; + if (!bgz) return -1; + eof = bgzf_check_EOF(bgz); + if (eof != expected) { + fprintf(stderr, "%s : Unexpected result %d from bgzf_check_EOF on %s; " + "expected %d\n", + __func__, eof, name, expected); + bgzf_close(bgz); + return -1; + } + + return try_bgzf_close(&bgz, name, __func__, 0); +} + +static int test_index_useek_getc(Files *f, const char *mode, + int cache_size, int nthreads) { + BGZF* bgz = NULL; + ssize_t bg_put; + size_t i, j, k, iskip = f->ltext / 10; + int is_uncompressed = strchr(mode, 'u') != NULL; + size_t offsets[3] = { 0, 100, 50 }; + + bgz = try_bgzf_open(f->tmp_bgzf, mode, __func__); + if (!bgz) goto fail; + + if (try_bgzf_index_build_init(bgz, f->tmp_bgzf, __func__) != 0) goto fail; + + if (nthreads > 0 && try_bgzf_mt(bgz, nthreads, __func__) != 0) goto fail; + + bg_put = try_bgzf_write(bgz, f->text, f->ltext, f->tmp_bgzf, __func__); + if (bg_put < 0) goto fail; + + if (!is_uncompressed) { + if (try_bgzf_index_dump(bgz, f->tmp_idx, NULL, __func__) != 0) { + goto fail; + } + } + + if (try_bgzf_close(&bgz, f->tmp_bgzf, __func__, 0) != 0) goto fail; + + bgz = try_bgzf_open(f->tmp_bgzf, "r", __func__); + if (!bgz) goto fail; + + if (nthreads > 0 && try_bgzf_mt(bgz, nthreads, __func__) != 0) goto fail; + + if (!is_uncompressed) { + if (try_bgzf_index_load(bgz, f->tmp_bgzf, idx_suffix, __func__) != 0) { + goto fail; + } + } + + for (i = 0; i < f->ltext; i += iskip) { + for (k = 0; k < sizeof(offsets) / sizeof(offsets[0]); k++) { + size_t o = offsets[k]; + if (try_bgzf_useek(bgz, i + o, SEEK_SET, f->tmp_bgzf, __func__) != 0) { + goto fail; + } + + for (j = 0; j < 16 && i + o + j < f->ltext; j++) { + if (try_bgzf_getc(bgz, i + o + j, f->text[i + o + j], + f->tmp_bgzf, __func__) < 0) goto fail; + } + } + } + + if (try_bgzf_useek(bgz, 0, SEEK_SET, f->tmp_bgzf, __func__) != 0) { + goto fail; + } + for (j = 0; j < 70000 && j < f->ltext; j++) { // Should force a block load + if (try_bgzf_getc(bgz, j, f->text[j], + f->tmp_bgzf, __func__) < 0) goto fail; + } + + if (cache_size > 0) { + size_t mid = f->ltext / 2; + bgzf_set_cache_size(bgz, cache_size); + + for (i = 0; i < 10; i++) { + if (try_bgzf_useek(bgz, 0, SEEK_SET, f->tmp_bgzf, __func__) != 0) { + goto fail; + } + for (j = 0; j < 64 && j < f->ltext; j++) { + if (try_bgzf_getc(bgz, j, f->text[j], + f->tmp_bgzf, __func__) < 0) goto fail; + } + + if (try_bgzf_useek(bgz, mid, SEEK_SET, + f->tmp_bgzf, __func__) != 0) { + goto fail; + } + for (j = 0; j < 64 && j + mid < f->ltext; j++) { + if (try_bgzf_getc(bgz, j + mid, f->text[j + mid], + f->tmp_bgzf, __func__) < 0) goto fail; + } + } + } + + if (try_bgzf_close(&bgz, f->tmp_bgzf, __func__, 0) != 0) goto fail; + + return 0; + + fail: + if (bgz) bgzf_close(bgz); + return -1; +} + +static int test_tell_seek_getc(Files *f, const char *mode, + int cache_size, int nthreads) { + + BGZF* bgz = NULL; + ssize_t bg_put; + size_t num_points = 10; + size_t i, j, k, iskip = f->ltext / num_points; + size_t offsets[3] = { 0, 100, 50 }; + size_t points[num_points]; + int64_t point_vos[num_points]; + + bgz = try_bgzf_open(f->tmp_bgzf, mode, __func__); + if (!bgz) goto fail; + + for (i = 0; i < num_points; i++) { + point_vos[i] = try_bgzf_tell(bgz, f->tmp_bgzf, __func__); + if (point_vos[i] < 0) goto fail; + points[i] = i * iskip; + bg_put = try_bgzf_write(bgz, f->text + i * iskip, iskip, f->tmp_bgzf, __func__); + if (bg_put < 0) goto fail; + } + + if (try_bgzf_close(&bgz, f->tmp_bgzf, __func__, 0) != 0) goto fail; + + bgz = try_bgzf_open(f->tmp_bgzf, "r", __func__); + if (!bgz) goto fail; + + if (nthreads > 0 && try_bgzf_mt(bgz, nthreads, __func__) != 0) goto fail; + + for (i = 0; i < f->ltext; i += iskip) { + for (k = 0; k < sizeof(offsets) / sizeof(offsets[0]); k++) { + size_t o = offsets[k]; + + if (try_bgzf_seek(bgz, point_vos[i/iskip], SEEK_SET, f->tmp_bgzf, __func__) != 0) { + goto fail; + } + if (try_bgzf_tell_expect(bgz, point_vos[i/iskip], f->tmp_bgzf, __func__) < 0) { + goto fail; + } + + if (try_skip(bgz, o, f->tmp_bgzf, __func__) != 0) { + goto fail; + } + for (j = 0; j < 16 && i + o + j < f->ltext; j++) { + if (try_bgzf_getc(bgz, i + o + j, f->text[i + o + j], + f->tmp_bgzf, __func__) < 0) goto fail; + } + } + } + + if (try_bgzf_seek(bgz, 0, SEEK_SET, f->tmp_bgzf, __func__) != 0) { + goto fail; + } + if (try_bgzf_tell_expect(bgz, 0, f->tmp_bgzf, __func__) < 0) { + goto fail; + } + for (j = 0; j < 70000 && j < f->ltext; j++) { // Should force a block load + if (try_bgzf_getc(bgz, j, f->text[j], + f->tmp_bgzf, __func__) < 0) goto fail; + } + + if (cache_size > 0) { + size_t mid = points[num_points / 2]; + int64_t mid_vo = point_vos[num_points / 2]; + bgzf_set_cache_size(bgz, cache_size); + + for (i = 0; i < 10; i++) { + if (try_bgzf_seek(bgz, 0, SEEK_SET, f->tmp_bgzf, __func__) != 0) { + goto fail; + } + if (try_bgzf_tell_expect(bgz, 0, f->tmp_bgzf, __func__) < 0) { + goto fail; + } + for (j = 0; j < 64 && j < f->ltext; j++) { + if (try_bgzf_getc(bgz, j, f->text[j], + f->tmp_bgzf, __func__) < 0) goto fail; + } + + if (try_bgzf_seek(bgz, mid_vo, SEEK_SET, + f->tmp_bgzf, __func__) != 0) { + goto fail; + } + if (try_bgzf_tell_expect(bgz, mid_vo, f->tmp_bgzf, __func__) < 0) { + goto fail; + } + for (j = 0; j < 64 && j + mid < f->ltext; j++) { + if (try_bgzf_getc(bgz, j + mid, f->text[j + mid], + f->tmp_bgzf, __func__) < 0) goto fail; + } + } + } + + if (try_bgzf_close(&bgz, f->tmp_bgzf, __func__, 0) != 0) goto fail; + + return 0; + + fail: + if (bgz) bgzf_close(bgz); + return -1; +} + +static int test_tell_read(Files *f, const char *mode) { + + BGZF* bgz = NULL; + ssize_t bg_put; + size_t num_points = 10; + size_t i, iskip = f->ltext / num_points; + int64_t point_vos[num_points]; + + unsigned char *bg_buf = calloc(iskip+1,1); + if (!bg_buf) return -1; + + bgz = try_bgzf_open(f->tmp_bgzf, mode, __func__); + if (!bgz) goto fail; + + for (i = 0; i < num_points; i++) { + point_vos[i] = try_bgzf_tell(bgz, f->tmp_bgzf, __func__); + if (point_vos[i] < 0) goto fail; + bg_put = try_bgzf_write(bgz, f->text + i * iskip, iskip, f->tmp_bgzf, __func__); + if (bg_put < 0) goto fail; + } + + if (try_bgzf_close(&bgz, f->tmp_bgzf, __func__, 0) != 0) goto fail; + + bgz = try_bgzf_open(f->tmp_bgzf, "r", __func__); + if (!bgz) goto fail; + + for (i = 0; i < f->ltext; i += iskip) { + if (try_bgzf_tell_expect(bgz, point_vos[i/iskip], f->tmp_bgzf, __func__) < 0) { + goto fail; + } + if (try_bgzf_read(bgz, bg_buf, iskip, f->tmp_bgzf, __func__) < 0) { + goto fail; + } + if (compare_buffers(f->text+i, bg_buf, iskip, iskip, + f->tmp_bgzf, f->tmp_bgzf, __func__) != 0) { + goto fail; + } + } + + if (try_bgzf_close(&bgz, f->tmp_bgzf, __func__, 0) != 0) goto fail; + free(bg_buf); + return 0; + + fail: + fprintf(stderr, "%s: failed\n", __func__); + if (bgz) bgzf_close(bgz); + free(bg_buf); + return -1; +} + +static int test_useek_read_small(Files *f, const char *mode) { + + BGZF* bgz = NULL; + char bg_buf[99]; + + bgz = try_bgzf_open(f->tmp_bgzf, mode, __func__); + if (!bgz) goto fail; + + + if (try_bgzf_write(bgz, "#>Hello, World!\n", 16, + f->tmp_bgzf, __func__) != 16) + goto fail; + if (try_bgzf_close(&bgz, f->tmp_bgzf, __func__, 0) != 0) goto fail; + + bgz = try_bgzf_open(f->tmp_bgzf, "r", __func__); + if (!bgz) goto fail; + + if (try_bgzf_getc(bgz, 0, '#', f->tmp_bgzf, __func__) < 0 || + try_bgzf_getc(bgz, 1, '>', f->tmp_bgzf, __func__) < 0) + goto fail; + + if (try_bgzf_read(bgz, bg_buf, 5, f->tmp_bgzf, __func__) != 5) + goto fail; + if (memcmp(bg_buf, "Hello", 5) != 0) + goto fail; + + if (try_bgzf_useek(bgz, 9, SEEK_SET, f->tmp_bgzf, __func__) < 0) + goto fail; + + if (try_bgzf_read(bgz, bg_buf, 5, f->tmp_bgzf, __func__) != 5) + goto fail; + if (memcmp(bg_buf, "World", 5) != 0) + goto fail; + + if (try_bgzf_close(&bgz, f->tmp_bgzf, __func__, 0) != 0) goto fail; + return 0; + + fail: + fprintf(stderr, "%s: failed\n", __func__); + if (bgz) bgzf_close(bgz); + return -1; +} + +static int test_bgzf_getline(Files *f, const char *mode, int nthreads) { + BGZF* bgz = NULL; + ssize_t bg_put; + size_t pos; + kstring_t str = { 0, 0, NULL }; + const char *text = (const char *) f->text; + + bgz = try_bgzf_open(f->tmp_bgzf, mode, __func__); + if (!bgz) goto fail; + + if (nthreads > 0 && try_bgzf_mt(bgz, nthreads, __func__) != 0) goto fail; + + bg_put = try_bgzf_write(bgz, f->text, f->ltext, f->tmp_bgzf, __func__); + if (bg_put < 0) goto fail; + + if (try_bgzf_close(&bgz, f->tmp_bgzf, __func__, 0) != 0) goto fail; + + bgz = try_bgzf_open(f->tmp_bgzf, "r", __func__); + if (!bgz) goto fail; + + if (nthreads > 0 && try_bgzf_mt(bgz, nthreads, __func__) != 0) goto fail; + + for (pos = 0; pos < f->ltext; ) { + const char *end = strchr(text + pos, '\n'); + size_t l = end ? end - (text + pos) : f->ltext - pos; + int res; + + if ((res = bgzf_getline(bgz, '\n', &str)) < 0) { + fprintf(stderr, "%s : %s from bgzf_getline on %s : %s\n", + __func__, res < -1 ? "Error" : "Unexpected EOF", + f->tmp_bgzf, res < -1 ? strerror(errno) : "EOF"); + goto fail; + } + + if (str.l != l || memcmp(text + pos, str.s, l) != 0) { + fprintf(stderr, + "%s : Unexpected data from bgzf_getline on %s\n" + "Expected : %.*s\n" + "Got : %.*s\n", + __func__, f->tmp_bgzf, (int) l, (char *) f->text + pos, + (int) str.l, str.s); + goto fail; + } + + pos += l + 1; + } + + if (try_bgzf_close(&bgz, f->tmp_bgzf, __func__, 0) != 0) goto fail; + free(ks_release(&str)); + return 0; + + fail: + if (bgz) bgzf_close(bgz); + free(ks_release(&str)); + return -1; +} + +static int test_bgzf_getline_on_truncated_file(Files *f, const char *mode, int nthreads) { + BGZF* bgz = NULL; + ssize_t bg_put; + size_t pos; + kstring_t str = { 0, 0, NULL }; + const char *text = (const char *) f->text; + + // Turn off bgzf errors as they're expected. + enum htsLogLevel lvl = hts_get_log_level(); + hts_set_log_level(HTS_LOG_OFF); + + bgz = try_bgzf_open(f->tmp_bgzf, mode, __func__); + if (!bgz) goto fail; + + if (nthreads > 0 && try_bgzf_mt(bgz, nthreads, __func__) != 0) goto fail; + + const char *text_line2 = strchr(text, '\n') + 1; + bg_put = try_bgzf_write(bgz, text, text_line2 - text, f->tmp_bgzf, __func__); + if (bg_put < 0) goto fail; + if (bgzf_flush(bgz) < 0) goto fail; + int64_t block2_start = bgz->block_address; + + const char *text_line3 = strchr(text_line2, '\n') + 1; + bg_put = try_bgzf_write(bgz, text_line2, text_line3 - text_line2, f->tmp_bgzf, __func__); + if (bg_put < 0) goto fail; + if (bgzf_flush(bgz) < 0) goto fail; + int64_t block3_start = bgz->block_address; + + if (try_bgzf_close(&bgz, f->tmp_bgzf, __func__, 0) != 0) goto fail; + + int64_t newsize; + for(newsize = block3_start - 1; newsize > block2_start; newsize--) { + //fprintf(stderr, "test_bgzf_getline_on_truncated_file : size truncated to %" PRId64 " with threads %d\n", newsize, nthreads); + + if (truncate(f->tmp_bgzf, newsize) != 0) goto fail; + + bgz = try_bgzf_open(f->tmp_bgzf, "r", __func__); + if (!bgz) goto fail; + + if (nthreads > 0 && try_bgzf_mt(bgz, nthreads, __func__) != 0) goto fail; + + for (pos = 0; pos < f->ltext; ) { + const char *end = strchr(text + pos, '\n'); + size_t l = end ? end - (text + pos) : f->ltext - pos; + + int res = bgzf_getline(bgz, '\n', &str); + if (res < -1) { + // ok, we expect error from truncated file + break; + } else if (res == -1) { + // truncated file should never return EOF since we do not truncate at block boundary + fprintf(stderr, "%s : %s from bgzf_getline on %s\n", + __func__, "Unexpected EOF", + f->tmp_bgzf); + goto fail; + } + + if (str.l != l || memcmp(text + pos, str.s, l) != 0) { + fprintf(stderr, + "%s : Unexpected data from bgzf_getline on %s\n" + "Expected : %.*s\n" + "Got : %.*s\n", + __func__, f->tmp_bgzf, (int) l, (char *) f->text + pos, + (int) str.l, str.s); + goto fail; + } + pos += l + 1; + } + + // verify error is persistent + int k; + for(k = 0; k < 3; k++) { + int res = bgzf_getline(bgz, '\n', &str); + if (res > -2) { + fprintf(stderr, "%s : unexpected bgzf_getline result %d\n", __func__, res); + goto fail; + } + } + // closing a stream with error returns error + if (try_bgzf_close(&bgz, f->tmp_bgzf, __func__, 1) == 0) goto fail; + } + free(ks_release(&str)); + hts_set_log_level(lvl); + return 0; + + fail: + hts_set_log_level(lvl); + if (bgz) bgzf_close(bgz); + free(ks_release(&str)); + return -1; +} + +int main(int argc, char **argv) { + Files f = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0 }; + int retval = EXIT_FAILURE; + + if (argc != 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return EXIT_FAILURE; + } + + if (setup(argv[1], &f) != 0) goto out; + + // Try reading an existing file + if (test_check_EOF(f.src_bgzf, 1) != 0) goto out; + if (test_read(&f) != 0) goto out; + + // Try writing some data and reading it back + if (test_write_read(&f, "wu", USE_BGZF_OPEN, 0, 0) != 0) goto out; + if (test_check_EOF(f.tmp_bgzf, 0) != 0) goto out; + if (test_write_read(&f, "w", USE_BGZF_OPEN, 0, 2) != 0) goto out; + if (test_check_EOF(f.tmp_bgzf, 1) != 0) goto out; + if (test_write_read(&f, "w0", USE_BGZF_OPEN, 0, 2) != 0) goto out; + if (test_check_EOF(f.tmp_bgzf, 1) != 0) goto out; + if (test_write_read(&f, "w1", USE_BGZF_DOPEN, 0, 2) != 0) goto out; + if (test_check_EOF(f.tmp_bgzf, 1) != 0) goto out; + if (test_write_read(&f, "w9", USE_BGZF_HOPEN, 0, 2) != 0) goto out; + if (test_check_EOF(f.tmp_bgzf, 1) != 0) goto out; + if (test_write_read(&f, "wg", USE_BGZF_OPEN, 0, 1) != 0) goto out; + if (test_check_EOF(f.tmp_bgzf, 0) != 0) goto out; + + // Try writing and reading with threads + if (test_write_read(&f, "w", USE_BGZF_OPEN, 1, 2) != 0) goto out; + if (test_check_EOF(f.tmp_bgzf, 1) != 0) goto out; + if (test_write_read(&f, "w", USE_BGZF_OPEN, 2, 2) != 0) goto out; + if (test_check_EOF(f.tmp_bgzf, 1) != 0) goto out; + + // Embedded EOF block + if (test_embed_eof(&f, "w", 0) != 0) goto out; + if (test_embed_eof(&f, "w", 1) != 0) goto out; + if (test_embed_eof(&f, "w", 2) != 0) goto out; + + // Index load and dump + if (test_index_load_dump(&f) != 0) goto out; + + // Index building on the fly and bgzf_useek + if (test_index_useek_getc(&f, "w", 1000000, 0) != 0) goto out; + + // Index building on the fly and bgzf_useek, with threads + if (test_index_useek_getc(&f, "w", 1000000, 1) != 0) goto out; + if (test_index_useek_getc(&f, "w", 1000000, 2) != 0) goto out; + + // bgzf_useek on an uncompressed file + if (test_index_useek_getc(&f, "wu", 0, 0) != 0) goto out; + + // bgzf_tell and bgzf_seek + if (test_tell_seek_getc(&f, "w", 0, 0) != 0) goto out; + if (test_tell_seek_getc(&f, "wu", 0, 0) != 0) goto out; + if (test_tell_seek_getc(&f, "w", 1000000, 0) != 0) goto out; + if (test_tell_seek_getc(&f, "wu", 1000000, 0) != 0) goto out; + if (test_tell_seek_getc(&f, "w", 0, 1) != 0) goto out; + if (test_tell_seek_getc(&f, "w", 0, 2) != 0) goto out; + if (test_tell_seek_getc(&f, "wu", 0, 1) != 0) goto out; + if (test_tell_seek_getc(&f, "wu", 0, 2) != 0) goto out; + if (test_tell_seek_getc(&f, "w", 1000000, 1) != 0) goto out; + if (test_tell_seek_getc(&f, "w", 1000000, 2) != 0) goto out; + if (test_tell_seek_getc(&f, "wu", 1000000, 1) != 0) goto out; + if (test_tell_seek_getc(&f, "wu", 1000000, 2) != 0) goto out; + + // bgzf_tell and bgzf_read + if (test_tell_read(&f, "w") != 0) goto out; + if (test_tell_read(&f, "wu") != 0) goto out; + + // bgzf_useek and bgzf_read_small + if (test_useek_read_small(&f, "w") != 0) goto out; + if (test_useek_read_small(&f, "wu") != 0) goto out; + + // getline + if (test_bgzf_getline(&f, "w", 0) != 0) goto out; + if (test_bgzf_getline(&f, "w", 1) != 0) goto out; + if (test_bgzf_getline(&f, "w", 2) != 0) goto out; + + if (test_bgzf_getline_on_truncated_file(&f, "w", 0) != 0) goto out; + if (test_bgzf_getline_on_truncated_file(&f, "w", 1) != 0) goto out; + if (test_bgzf_getline_on_truncated_file(&f, "w", 2) != 0) goto out; + + retval = EXIT_SUCCESS; + + out: + cleanup(&f, retval); + return retval; +} diff --git a/src/htslib-1.21/test/test_expr.c b/src/htslib-1.21/test/test_expr.c new file mode 100644 index 0000000..ecd1232 --- /dev/null +++ b/src/htslib-1.21/test/test_expr.c @@ -0,0 +1,366 @@ +/* test-expr.c -- Testing: filter expression parsing and processing. + + Copyright (C) 2020, 2022 Genome Research Ltd. + + Author: James Bonfield + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notices and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include + +#include +#include +#include "../htslib/hts_expr.h" + +int lookup(void *data, char *str, char **end, hts_expr_val_t *res) { + int foo = 15551; // my favourite palindromic prime + int a = 1; + int b = 2; + int c = 3; + res->is_str = 0; + if (strncmp(str, "foo", 3) == 0) { + *end = str+3; + res->d = foo; + } else if (*str == 'a') { + *end = str+1; + res->d = a; + } else if (*str == 'b') { + *end = str+1; + res->d = b; + } else if (*str == 'c') { + *end = str+1; + res->d = c; + } else if (strncmp(str, "magic", 5) == 0) { + // non-empty string + *end = str+5; + res->is_str = 1; + kputs("plugh", ks_clear(&res->s)); + } else if (strncmp(str, "empty-but-true", 14) == 0) { + // empty string + *end = str+14; + res->is_true = 1; + res->is_str = 1; + kputs("", ks_clear(&res->s)); + } else if (strncmp(str, "empty", 5) == 0) { + // empty string + *end = str+5; + res->is_str = 1; + kputs("", ks_clear(&res->s)); + } else if (strncmp(str, "zero-but-true", 13) == 0) { + *end = str+13; + res->d = 0; + res->is_true = 1; + } else if (strncmp(str, "null-but-true", 13) == 0) { + *end = str+13; + hts_expr_val_undef(res); + res->is_true = 1; + } else if (strncmp(str, "null", 4) == 0) { + // null string (eg aux:Z tag is absent) + *end = str+4; + hts_expr_val_undef(res); + } else if (strncmp(str, "nan", 3) == 0) { + // sqrt(-1), 0/0 and similar + // Semantically the same operations as null. + *end = str+3; + hts_expr_val_undef(res); + + } else { + return -1; + } + + return 0; +} + +typedef struct { + int truth_val; + double dval; + char *sval; + char *str; +} test_ev; + +static inline int strcmpnull(const char *a, const char *b) { + if (!a && !b) return 0; + if (!a && b) return -1; + if (a && !b) return 1; + return strcmp(a, b); +} + +// Compare NAN as equal, for testing we returned the correct values +static inline int cmpfloat(double d1, double d2) { + // If needs be, can use DBL_EPSILON in comparisons here. + return d1 == d2 || (isnan(d1) && isnan(d2)); +} + +int test(void) { + // These are all valid expressions that should work + test_ev tests[] = { + { 1, 1, NULL, "1"}, + { 1, 1, NULL, "+1"}, + { 1, -1, NULL, "-1"}, + { 0, 0, NULL, "!7"}, + { 1, 1, NULL, "!0"}, + { 1, 1, NULL, "!(!7)"}, + { 1, 1, NULL, "!!7"}, + + { 1, 5, NULL, "2+3"}, + { 1, -1, NULL, "2+-3"}, + { 1, 6, NULL, "1+2+3"}, + { 1, 1, NULL, "-2+3"}, + { 0, NAN, NULL, "1+null" }, + { 0, NAN, NULL, "null-1" }, + { 0, NAN, NULL, "-null" }, + + { 1, 6, NULL, "2*3"}, + { 1, 6, NULL, "1*2*3"}, + { 0, 0, NULL, "2*0"}, + + { 1, 7, NULL, "(7)"}, + { 1, 7, NULL, "((7))"}, + { 1, 21, NULL, "(1+2)*(3+4)"}, + { 1, 14, NULL, "(4*5)-(-2*-3)"}, + + { 0, NAN, NULL, "2*null"}, + { 0, NAN, NULL, "null/2"}, + { 0, NAN, NULL, "0/0"}, + + { 1, 1, NULL, "(1+2)*3==9"}, + { 1, 1, NULL, "(1+2)*3!=8"}, + { 0, 0, NULL, "(1+2)*3!=9"}, + { 0, 0, NULL, "(1+2)*3==8"}, + + { 0, 0, NULL, "1>2"}, + { 1, 1, NULL, "1<2"}, + { 0, 0, NULL, "3<3"}, + { 0, 0, NULL, "3>3"}, + { 1, 1, NULL, "9<=9"}, + { 1, 1, NULL, "9>=9"}, + { 1, 1, NULL, "2*4==8"}, + { 1, 1, NULL, "16==0x10"}, + { 1, 1, NULL, "15<0x10"}, + { 1, 1, NULL, "17>0x10"}, + { 0, 0, NULL, "2*4!=8"}, + { 1, 1, NULL, "4+2<3+4"}, + { 0, 0, NULL, "4*2<3+4"}, + { 1, 8, NULL, "4*(2<3)+4"}, // boolean; 4*(1)+4 + + { 1, 1, NULL, "(1<2) == (3>2)"}, + { 1, 1, NULL, "1<2 == 3>2"}, + + { 0, NAN, NULL, "null <= 0" }, + { 0, NAN, NULL, "null >= 0" }, + { 0, NAN, NULL, "null < 0" }, + { 0, NAN, NULL, "null > 0" }, + { 0, NAN, NULL, "null == null" }, + { 0, NAN, NULL, "null != null" }, + { 0, NAN, NULL, "null < 10" }, + { 0, NAN, NULL, "10 > null" }, + + { 1, 1, NULL, "2 && 1"}, + { 0, 0, NULL, "2 && 0"}, + { 0, 0, NULL, "0 && 2"}, + { 1, 1, NULL, "2 || 1"}, + { 1, 1, NULL, "2 || 0"}, + { 1, 1, NULL, "0 || 2"}, + { 1, 1, NULL, "1 || 2 && 3"}, + { 1, 1, NULL, "2 && 3 || 1"}, + { 1, 1, NULL, "0 && 3 || 2"}, + { 0, 0, NULL, "0 && 3 || 0"}, + { 0, 0, NULL, " 5 - 5 && 1"}, + { 0, 0, NULL, "+5 - 5 && 1"}, + { 0, 0, NULL, "null && 1"}, // null && x == null + { 0, 0, NULL, "1 && null"}, + { 1, 1, NULL, "!null && 1"}, + { 1, 1, NULL, "1 && !null"}, + { 1, 1, NULL, "1 && null-but-true"}, + { 0, 0, NULL, "null || 0"}, // null || 0 == null + { 0, 0, NULL, "0 || null"}, + { 1, 1, NULL, "!null || 0"}, + { 1, 1, NULL, "0 || !null"}, + { 1, 1, NULL, "0 || null-but-true"}, + { 1, 1, NULL, "null || 1"}, // null || 1 == 1 + { 1, 1, NULL, "1 || null"}, + + { 1, 1, NULL, "3 & 1"}, + { 1, 2, NULL, "3 & 2"}, + { 1, 3, NULL, "1 | 2"}, + { 1, 3, NULL, "1 | 3"}, + { 1, 7, NULL, "1 | 6"}, + { 1, 2, NULL, "1 ^ 3"}, + { 0, NAN, NULL, "1 | null"}, + { 0, NAN, NULL, "null | 1"}, + { 0, NAN, NULL, "1 & null"}, + { 0, NAN, NULL, "null & 1"}, + { 0, NAN, NULL, "0 ^ null"}, + { 0, NAN, NULL, "null ^ 0"}, + { 0, NAN, NULL, "1 ^ null"}, + { 0, NAN, NULL, "null ^ 1"}, + + { 1, 1, NULL, "(1^0)&(4^3)"}, + { 1, 2, NULL, "1 ^(0&4)^ 3"}, + { 1, 2, NULL, "1 ^ 0&4 ^ 3"}, // precedence, & before ^ + + { 1, 6, NULL, "(1|0)^(4|3)"}, + { 1, 7, NULL, "1 |(0^4)| 3"}, + { 1, 7, NULL, "1 | 0^4 | 3"}, // precedence, ^ before | + + { 1, 1, NULL, "4 & 2 || 1"}, + { 1, 1, NULL, "(4 & 2) || 1"}, + { 0, 0, NULL, "4 & (2 || 1)"}, + { 1, 1, NULL, "1 || 4 & 2"}, + { 1, 1, NULL, "1 || (4 & 2)"}, + { 0, 0, NULL, "(1 || 4) & 2"}, + + { 1, 1, NULL, " (2*3)&7 > 4"}, + { 0, 0, NULL, " (2*3)&(7 > 4)"}, // C precedence equiv + { 1, 1, NULL, "((2*3)&7) > 4"}, // Python precedence equiv + { 1, 1, NULL, "((2*3)&7) > 4 && 2*2 <= 4"}, + + { 1, 1, "plugh", "magic"}, + { 1, 1, "", "empty"}, + { 1, 1, NULL, "magic == \"plugh\""}, + { 1, 1, NULL, "magic != \"xyzzy\""}, + + { 1, 1, NULL, "\"abc\" < \"def\""}, + { 1, 1, NULL, "\"abc\" <= \"abc\""}, + { 0, 0, NULL, "\"abc\" < \"ab\""}, + { 0, 0, NULL, "\"abc\" <= \"ab\""}, + + { 0, 0, NULL, "\"abc\" > \"def\""}, + { 1, 1, NULL, "\"abc\" >= \"abc\""}, + { 1, 1, NULL, "\"abc\" > \"ab\""}, + { 1, 1, NULL, "\"abc\" >= \"ab\""}, + + { 0, NAN, NULL, "null == \"x\"" }, + { 0, NAN, NULL, "null != \"x\"" }, + { 0, NAN, NULL, "null < \"x\"" }, + { 0, NAN, NULL, "null > \"x\"" }, + + { 1, 1, NULL, "\"abbc\" =~ \"^a+b+c+$\""}, + { 0, 0, NULL, "\"aBBc\" =~ \"^a+b+c+$\""}, + { 1, 1, NULL, "\"aBBc\" !~ \"^a+b+c+$\""}, + { 1, 1, NULL, "\"xyzzy plugh abracadabra\" =~ magic"}, + + { 1, 1, "", "empty-but-true" }, + { 0, 0, NULL, "!empty-but-true" }, + { 1, 1, NULL, "!!empty-but-true" }, + { 1, 1, NULL, "1 && empty-but-true && 1" }, + { 0, 0, NULL, "1 && empty-but-true && 0" }, + + { 0, NAN, NULL, "null" }, + { 1, 1, NULL, "!null" }, + { 0, 0, NULL, "!!null", }, + { 0, 0, NULL, "!\"foo\"" }, + { 1, 1, NULL, "!!\"foo\"" }, + + { 1, NAN, NULL, "null-but-true" }, + { 0, 0, NULL, "!null-but-true" }, + { 1, 1, NULL, "!!null-but-true" }, + { 1, 0, NULL, "zero-but-true" }, + { 0, 0, NULL, "!zero-but-true" }, + { 1, 1, NULL, "!!zero-but-true" }, + + { 1, log(2), NULL, "log(2)"}, + { 1, exp(9), NULL, "exp(9)"}, + { 1, 9, NULL, "log(exp(9))"}, + { 1, 8, NULL, "pow(2,3)"}, + { 1, 3, NULL, "sqrt(9)"}, + { 0, NAN, NULL, "sqrt(-9)"}, + + { 1, 2, NULL, "default(2,3)"}, + { 1, 3, NULL, "default(null,3)"}, + { 0, 0, NULL, "default(null,0)"}, + { 1, NAN, NULL, "default(null-but-true,0)"}, + { 1, NAN, NULL, "default(null-but-true,null)"}, + { 1, NAN, NULL, "default(null,null-but-true)"}, + + { 1, 1, NULL, "exists(\"foo\")"}, + { 1, 1, NULL, "exists(12)"}, + { 1, 1, NULL, "exists(\"\")"}, + { 1, 1, NULL, "exists(0)"}, + { 0, 0, NULL, "exists(null)"}, + { 1, 1, NULL, "exists(null-but-true)"}, + }; + + int i, res = 0; + hts_expr_val_t r = HTS_EXPR_VAL_INIT; + for (i = 0; i < sizeof(tests) / sizeof(*tests); i++) { + hts_filter_t *filt = hts_filter_init(tests[i].str); + if (!filt) + return 1; + if (hts_filter_eval2(filt, NULL, lookup, &r)) { + fprintf(stderr, "Failed to parse filter string %s\n", + tests[i].str); + res = 1; + hts_filter_free(filt); + continue; + } + + if (!hts_expr_val_exists(&r)) { + if (r.is_true != tests[i].truth_val || + !cmpfloat(r.d, tests[i].dval)) { + fprintf(stderr, + "Failed test: \"%s\" == \"%f\", got %s, \"%s\", %f\n", + tests[i].str, tests[i].dval, + r.is_true ? "true" : "false", r.s.s, r.d); + res = 1; + } + } else if (r.is_str && (strcmpnull(r.s.s, tests[i].sval) != 0 + || !cmpfloat(r.d, tests[i].dval) + || r.is_true != tests[i].truth_val)) { + fprintf(stderr, + "Failed test: \"%s\" == \"%s\", got %s, \"%s\", %f\n", + tests[i].str, tests[i].sval, + r.is_true ? "true" : "false", r.s.s, r.d); + res = 1; + } else if (!r.is_str && (!cmpfloat(r.d, tests[i].dval) + || r.is_true != tests[i].truth_val)) { + fprintf(stderr, "Failed test: %s == %f, got %s, %f\n", + tests[i].str, tests[i].dval, + r.is_true ? "true" : "false", r.d); + res = 1; + } + + hts_expr_val_free(&r); + hts_filter_free(filt); + } + + return res; +} + +int main(int argc, char **argv) { + if (argc > 1) { + hts_expr_val_t v = HTS_EXPR_VAL_INIT; + hts_filter_t *filt = hts_filter_init(argv[1]); + if (hts_filter_eval2(filt, NULL, lookup, &v)) + return 1; + + printf("%s\t", v.is_true ? "true":"false"); + + if (v.is_str) + puts(v.s.s); + else + printf("%g\n", v.d); + + hts_expr_val_free(&v); + hts_filter_free(filt); + return 0; + } + + return test(); +} diff --git a/src/htslib-1.21/test/test_faidx.c b/src/htslib-1.21/test/test_faidx.c new file mode 100644 index 0000000..f73f973 --- /dev/null +++ b/src/htslib-1.21/test/test_faidx.c @@ -0,0 +1,516 @@ +/* test/test_fadix.c -- Test faidx interfaces + + Copyright (C) 2022 Genome Research Ltd. + + Author: Rob Davies + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include + +#include +#include +#include + +#include "../htslib/faidx.h" + +int file_compare(const char *file1, const char *file2) { + FILE *f1 = NULL; + FILE *f2 = NULL; + unsigned int lno = 1; + size_t got1, got2, i; + char buf1[1024], buf2[1024]; + int ret = -1; + + f1 = fopen(file1, "rb"); + if (!f1) { + perror(file1); + goto out; + } + f2 = fopen(file2, "rb"); + if (!f2) { + perror(file2); + goto out; + } + + do { + got1 = fread(buf1, 1, sizeof(buf1), f1); + got2 = fread(buf2, 1, sizeof(buf2), f2); + + for (i = 0; i < got1 && i < got2 && buf1[i] == buf2[i]; i++) + lno += (buf1[i] == '\n'); + if (i < got1 || i < got2) { + fprintf(stderr, "%s and %s differ at line %u\n", + file1, file2, lno); + goto out; + } + } while (got1 > 0 && got2 > 0); + + if (ferror(f1)) { + perror(file1); + goto out; + } + if (ferror(f2)) { + perror(file2); + goto out; + } + + if (got1 > 0 || got2 > 0) { + fprintf(stderr, "EOF on %s at line %u\n", + got1 ? file2 : file1, lno); + goto out; + } + + ret = 0; + out: + if (f1) fclose(f1); + if (f2) fclose(f2); + return ret; +} + +faidx_t * load_index(const char *fn, const char *fnfai, const char *fngzi, + int flags, enum fai_format_options format) { + faidx_t *fai = fai_load3_format(fn, fnfai, fngzi, flags, format); + if (!fai) { + fprintf(stderr, "Failed: fai_load3(%s, %s, %s, %d, %d)\n", + fn, fnfai ? fnfai : "NULL", fngzi ? fngzi : "NULL", flags, + (int) format); + return NULL; + } + return fai; +} + +int do_retrieval(const char *fn, const char *fnfai, const char *fngzi, + int flags, enum fai_format_options format, const char *fnout, + const char *interface, int nreg, char **regions) { + int i, use_64bit = 1, use_parse_reg = 0, use_adjust_reg = 0; + faidx_t *fai = NULL; + FILE *out = stdout; + + if (interface) { + if (strcmp(interface, "fai_fetch") == 0) { + use_64bit = 0; + } else if (strcmp(interface, "faidx_fetch_seq") == 0) { + use_64bit = 0; + use_parse_reg = 1; + } else if (strcmp(interface, "faidx_fetch_seq64") == 0 + || strcmp(interface, "fai_parse_region") == 0) { + use_parse_reg = 1; + } else if (strcmp(interface, "fai_adjust_region") == 0) { + use_parse_reg = 1; + use_adjust_reg = 1; + } + } + + if (fnout) { + out = fopen(fnout, "wb"); + if (!out) { + perror(fnout); + return -1; + } + } + + fai = load_index(fn, fnfai, fngzi, flags, format); + if (!fai) + goto fail; + + for (i = 0; i < nreg; i++) { + hts_pos_t len = 0, pos, beg = 0, end = 0; + int tid = 0; + char *seq = NULL; + size_t l; + + if (use_parse_reg) { + const char *e = fai_parse_region(fai, regions[i], + &tid, &beg, &end, 0); + if (e == NULL) { + fprintf(stderr, "Failed: " + "fai_parse_region(fai, %s, &tid, &beg, &end, 0)\n", + regions[i]); + goto fail; + } + if (use_adjust_reg) { + hts_pos_t orig_beg = beg, orig_end = end; + int r = fai_adjust_region(fai, tid, &beg, &end); + if (r < 0 + || (((r & 1) != 0) ^ (beg != orig_beg)) + || (((r & 2) != 0) ^ (end != orig_end))) { + fprintf(stderr, "Failed: fai_adjust_region(fai, %d, " + "%"PRIhts_pos", %"PRIhts_pos") returned %d\n" + "After: beg = %"PRIhts_pos" end = %"PRIhts_pos"\n", + tid, orig_beg, orig_end, r, beg, end); + goto fail; + } + } + if (use_64bit) { + seq = faidx_fetch_seq64(fai, faidx_iseq(fai, tid), + beg, end - 1, &len); + } else { + int ilen = 0; + seq = faidx_fetch_seq(fai, faidx_iseq(fai, tid), + beg, end - 1, &ilen); + len = ilen; + } + if (!seq) { + fprintf(stderr, "Failed: faidx_fetch_seq%s(fai, %s, " + "%"PRIhts_pos", %"PRIhts_pos", &len)\n", + use_64bit ? "64" : "", faidx_iseq(fai, tid), beg, end); + goto fail; + } + } else { + if (use_64bit) { + seq = fai_fetch64(fai, regions[i], &len); + } else { + int ilen = 0; + seq = fai_fetch(fai, regions[i], &ilen); + len = ilen; + } + if (!seq) { + fprintf(stderr, "Failed: fai_fetch%s(fai, %s, &len)\n", + use_64bit ? "64" : "", regions[i]); + goto fail; + } + } + + l = strlen(seq); + fprintf(out, "%c%s length: %"PRIhts_pos"\n", + format == FAI_FASTQ ? '@' : '>', regions[i], len); + for (pos = 0; pos < l; pos += 50) { + fprintf(out, "%.*s\n", 50, seq + pos); + } + free(seq); + if (format == FAI_FASTQ) { + hts_pos_t qual_len = 0; + char *qual; + if (use_parse_reg) { + if (use_64bit) { + qual = faidx_fetch_qual64(fai, faidx_iseq(fai, tid), + beg, end - 1, &qual_len); + } else { + int ilen = 0; + qual = faidx_fetch_qual(fai, faidx_iseq(fai, tid), + beg, end - 1, &ilen); + qual_len = ilen; + } + } else { + if (use_64bit) { + qual = fai_fetchqual64(fai, regions[i], &qual_len); + } else { + int ilen = 0; + qual = fai_fetchqual(fai, regions[i], &ilen); + qual_len = ilen; + } + if (!qual) { + fprintf(stderr, "Failed: fai_fetchqual64(fai, %s, &len)\n", + regions[i]); + goto fail; + } + } + if (qual_len != len) { + fprintf(stderr, + "Sequence and quality lengths differ for %s %s\n", + fn, regions[i]); + free(qual); + goto fail; + } + fprintf(out, "+\n"); + l = strlen(qual); + for (pos = 0; pos < l; pos+=50) { + fprintf(out, "%.*s\n", 50, qual + pos); + } + free(qual); + } + } + + fai_destroy(fai); + + if (fnout) { + if (fclose(out) != 0) { + perror(fnout); + return -1; + } + } + return 0; + + fail: + if (fai) + fai_destroy(fai); + if (fnout) + fclose(out); + + return -1; +} + +int test_fai_line_length(const char *fn, const char *fnfai, const char *fngzi, + enum fai_format_options format, const char *expected, + const char *reg) { + hts_pos_t found_len; + faidx_t *fai = NULL; + + fai = load_index(fn, fnfai, fngzi, 0, format); + if (!fai) + return -1; + + found_len = fai_line_length(fai, reg); + fai_destroy(fai); + if (expected) { + long long exp_len = strtoll(expected, NULL, 10); + if (found_len != exp_len) { + fprintf(stderr, "Unexpected result %"PRIhts_pos" from " + "fai_line_length, expected %s\n", found_len, expected); + return -1; + } + } else { + printf("%"PRIhts_pos"\n", found_len); + } + return 0; +} + +int test_faidx_has_seq(const char *fn, const char *fnfai, const char *fngzi, + enum fai_format_options format, const char *expected, + const char *seq) { + int res; + faidx_t *fai = NULL; + + fai = load_index(fn, fnfai, fngzi, 0, format); + if (!fai) + return -1; + + res = faidx_has_seq(fai, seq); + fai_destroy(fai); + if (expected) { + long exp_res = strtol(expected, NULL, 10); + if (res != exp_res) { + fprintf(stderr, "Unexpected result %d from faidx_has_seq(%s) " + "expected %s\n", res, seq, expected); + return -1; + } + } else { + printf("%d\n", res); + } + return 0; +} + +int test_faidx_iseq(const char *fn, const char *fnfai, const char *fngzi, + enum fai_format_options format, const char *expected, + const char *index) { + const char *found_name = NULL; + int idx = atoi(index); + faidx_t *fai = NULL; + + fai = load_index(fn, fnfai, fngzi, 0, format); + if (!fai) + return -1; + + found_name = faidx_iseq(fai, idx); + + if (expected) { + if (!found_name || strcmp(found_name, expected) != 0) { + fprintf(stderr, "Unexpected result %s from faidx_iseq(fai, %d), " + "expected %s\n", found_name ? found_name : "(null)", + idx, expected); + fai_destroy(fai); + return -1; + } + } else { + printf("%s\n", found_name ? found_name : "(null)"); + } + + fai_destroy(fai); + return 0; +} + +int test_faidx_seq_len(const char *fn, const char *fnfai, const char *fngzi, + enum fai_format_options format, const char *expected, + const char *seq) { + int found_len; + faidx_t *fai = NULL; + + fai = load_index(fn, fnfai, fngzi, 0, format); + if (!fai) + return -1; + + found_len = faidx_seq_len(fai, seq); + fai_destroy(fai); + + if (expected) { + int exp_len = atoi(expected); + if (found_len != exp_len) { + fprintf(stderr, "Unexpected result %d from faidx_seq_len(fai, %s) " + "expected %s\n", found_len, seq, expected); + return -1; + } + } else { + printf("%d\n", found_len); + } + + return 0; +} + +int test_faidx_seq_len64(const char *fn, const char *fnfai, const char *fngzi, + enum fai_format_options format, const char *expected, + const char *seq) { + hts_pos_t found_len; + faidx_t *fai = NULL; + + fai = load_index(fn, fnfai, fngzi, 0, format); + if (!fai) + return -1; + + found_len = faidx_seq_len(fai, seq); + fai_destroy(fai); + + if (expected) { + long long exp_len = strtoll(expected, NULL, 10); + if (found_len != exp_len) { + fprintf(stderr, "Unexpected result %"PRIhts_pos + " from fai_seq_len64(fai, %s) expected %s\n", + found_len, seq, expected); + return -1; + } + } else { + printf("%"PRIhts_pos"\n", found_len); + } + + return 0; +} + +void usage(FILE *out, const char *arg0) { + fprintf(out, + "Usage: %s [-c] -i fasta/q [-f fai_file] [-g gzi_file] [-e expected_fai]\n" + " %s [-cQ] -i fasta/q [-f fai_file] [-g gzi_file] [region]\n" + " %s -t FUNC -i fasta/q [-f fai_file] [-g gzi_file] [-e expected] \n" + " %s -h\n", + arg0, arg0, arg0, arg0); +} + +void help(FILE *out, const char *arg0) { + usage(out, arg0); + fprintf(out, + "Options:\n" + " -i FILE Input file\n" + " -f FILE Fasta/q index file name\n" + " -g FILE Bgzip index file name\n" + " -o FILE Output file name\n" + " -e FILE|STR Expected output\n" + " -c Set FAI_CREATE flag\n" + " -Q Output fastq format\n" + " -t FUNC Test function\n" + " -h Print this help\n" + "\n" + "Expected output is compared to the FAI file in indexing mode;" + " the output file\n" + "in retrieval mode; " + "expected output for various -t function tests.\n" + "\n" + "Unit tests (-t option):\n" + " fai_line_length, faidx_has_seq, faidx_iseq, faidx_seq_len, faidx_seq_len64\n" + "In retrieval mode, -t can change the functions used to fetch data:\n" + " fai_fetch, fai_fetch64, faidx_fetch_seq, faidx_fetch_seq64,\n" + " fai_parse_region, fai_adjust_region\n" + "\n"); +} + +int main(int argc, char **argv) { + int opt; + const char *fn = NULL; + const char *fnout = NULL; + const char *fnfai = NULL; + const char *fngzi = NULL; + const char *expected = NULL; + const char *func = ""; + int flags = 0; + enum fai_format_options format = FAI_FASTA; + int res; + + while ((opt = getopt(argc, argv, "i:f:g:o:e:t:cQh")) > 0) { + switch (opt) { + case 'i': + fn = optarg; + break; + case 'f': + fnfai = optarg; + break; + case 'g': + fngzi = optarg; + break; + case 'o': + fnout = optarg; + break; + case 'e': + expected = optarg; + break; + case 'c': + flags |= FAI_CREATE; + break; + case 'Q': + format = FAI_FASTQ; + break; + case 't': + func = optarg; + break; + case 'h': + help(stdout, argv[0]); + return EXIT_SUCCESS; + default: + usage(stderr, argv[0]); + return EXIT_FAILURE; + } + } + + if (!fn) { + usage(stderr, argv[0]); + return EXIT_FAILURE; + } + + if (optind == argc) { + // Index building mode + res = fai_build3(fn, fnfai, fngzi); + if (res) { + fprintf(stderr, "Failed: fai_build3(%s, %s, %s)\n", + fn, fnfai ? fnfai : "NULL", fngzi ? fngzi : "NULL"); + } else if (expected) { + res = file_compare(fnfai, expected); + } + } else { + if (strcmp(func, "fai_line_length") == 0) { + res = test_fai_line_length(fn, fnfai, fngzi, format, expected, + argv[optind]); + } else if (strcmp(func, "faidx_has_seq") == 0) { + res = test_faidx_has_seq(fn, fnfai, fngzi, format, expected, + argv[optind]); + } else if (strcmp(func, "faidx_iseq") == 0) { + res = test_faidx_iseq(fn, fnfai, fngzi, format, expected, + argv[optind]); + } else if (strcmp(func, "faidx_seq_len") == 0) { + res = test_faidx_seq_len(fn, fnfai, fngzi, format, expected, + argv[optind]); + } else if (strcmp(func, "faidx_seq_len64") == 0) { + res = test_faidx_seq_len64(fn, fnfai, fngzi, format, expected, + argv[optind]); + } else { + res = do_retrieval(fn, fnfai, fngzi, flags, format, fnout, + func, argc - optind, &argv[optind]); + if (res == 0 && fnout && expected) { + res = file_compare(fnout, expected); + } + } + } + return res == 0 ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/htslib-1.21/test/test_index.c b/src/htslib-1.21/test/test_index.c new file mode 100644 index 0000000..cc90f51 --- /dev/null +++ b/src/htslib-1.21/test/test_index.c @@ -0,0 +1,83 @@ +/* test/test_index.c -- simple tool to build an index, for the test harness. + + Copyright (C) 2018 Genome Research Ltd. + + Author: James Bonfield + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include +#include +#include + +#include "../htslib/sam.h" +#include "../htslib/vcf.h" + +void HTS_NORETURN usage(FILE *fp) { + fprintf(fp, "Usage: test_index [opts] in.{sam.gz,bam,cram}|in.{vcf.gz,bcf}\n\n"); + fprintf(fp, " -b Use BAI index (BAM, SAM)\n"); + fprintf(fp, " -c Use CSI index (BAM, SAM, VCF, BCF)\n"); + fprintf(fp, " -t Use TBI index (VCF) \n"); + fprintf(fp, " -m bits Adjust min_shift; implies CSI\n"); + fprintf(fp, "\nThe default index format is CSI for sam/bam/vcf/bcf and CRAI for crams\n"); + exit(fp == stderr ? 1 : 0); +} + +int main(int argc, char **argv) { + int c, min_shift = 14; + + while ((c = getopt(argc, argv, "bctm:")) >= 0) { + switch (c) { + case 't': case 'b': min_shift = 0; break; + case 'c': min_shift = 14; break; + case 'm': min_shift = atoi(optarg); break; + case 'h': usage(stdout); + default: usage(stderr); + } + } + + if (optind >= argc) usage(stderr); + + htsFile *in = hts_open(argv[optind], "r"); + if (!in) { + fprintf(stderr, "Error opening \"%s\"\n", argv[optind]); + exit(1); + } + + int ret; + if (in->format.format == sam || + in->format.format == bam || + in->format.format == cram) { + ret = sam_index_build(argv[optind], min_shift); + } else { + ret = bcf_index_build(argv[optind], min_shift); + } + + if (ret < 0) { + fprintf(stderr, "Failed to build index for \"%s\"\n", argv[optind]); + exit(1); + } + + if (hts_close(in) < 0) { + fprintf(stderr, "Error closing \"%s\"\n", argv[optind]); + exit(1); + } + + return 0; +} diff --git a/src/htslib-1.21/test/test_introspection.c b/src/htslib-1.21/test/test_introspection.c new file mode 100644 index 0000000..843e45e --- /dev/null +++ b/src/htslib-1.21/test/test_introspection.c @@ -0,0 +1,87 @@ +/* test/test_introspection.c -- demonstration of introspection function usage + + Copyright (C) 2020-2021 Genome Research Ltd. + + Author: James Bonfield + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include +#include + +#include "../htslib/hts.h" +#include "../htslib/hfile.h" + +int main(void) { + printf("Version string: %s\n", hts_version()); + printf("Version number: %d\n", HTS_VERSION); + printf("\nhtscodecs version: %s\n", + hts_test_feature(HTS_FEATURE_HTSCODECS)); + + printf("\nCC: %s\n", hts_test_feature(HTS_FEATURE_CC)); + printf("CPPFLAGS: %s\n", hts_test_feature(HTS_FEATURE_CPPFLAGS)); + printf("CFLAGS: %s\n", hts_test_feature(HTS_FEATURE_CFLAGS)); + printf("LDFLAGS: %s\n", hts_test_feature(HTS_FEATURE_LDFLAGS)); + + unsigned int feat = hts_features(); + printf("\nFeature number: 0x%x\n", feat); + if (feat & HTS_FEATURE_CONFIGURE) + printf(" HTS_FEATURE_CONFIGURE\n"); + if (feat & HTS_FEATURE_PLUGINS) + printf(" HTS_FEATURE_PLUGINS\n"); + if (feat & HTS_FEATURE_LIBCURL) + printf(" HTS_FEATURE_LIBCURL\n"); + if (feat & HTS_FEATURE_S3) + printf(" HTS_FEATURE_S3\n"); + if (feat & HTS_FEATURE_GCS) + printf(" HTS_FEATURE_GCS\n"); + if (feat & HTS_FEATURE_LIBDEFLATE) + printf(" HTS_FEATURE_LIBDEFLATE\n"); + if (feat & HTS_FEATURE_LZMA) + printf(" HTS_FEATURE_LZMA\n"); + if (feat & HTS_FEATURE_BZIP2) + printf(" HTS_FEATURE_BZIP2\n"); + if (feat & HTS_FEATURE_HTSCODECS) + printf(" HTS_FEATURE_HTSCODECS\n"); + + printf("\nFeature string: %s\n", hts_feature_string()); + + + // Plugins and schemes + printf("\nPlugins present:\n"); + const char *plugins[100]; + int np = 100, i, j; + + if (hfile_list_plugins(plugins, &np) < 0) + return 1; + + for (i = 0; i < np; i++) { + const char *sc_list[100]; + int nschemes = 100; + if (hfile_list_schemes(plugins[i], sc_list, &nschemes) < 0) + return 1; + + printf(" %s:\n", plugins[i]); + for (j = 0; j < nschemes; j++) + printf("\t%s\n", sc_list[j]); + puts(""); + } + + return 0; +} diff --git a/src/htslib-1.21/test/test_kfunc.c b/src/htslib-1.21/test/test_kfunc.c new file mode 100644 index 0000000..30da877 --- /dev/null +++ b/src/htslib-1.21/test/test_kfunc.c @@ -0,0 +1,88 @@ +/* test_kfunc.c -- kt_fisher_exact() unit tests + + Copyright (C) 2020 University of Glasgow. + + Author: John Marshall + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include + +#include +#include +#include + +#include "../htslib/kfunc.h" + +int differ(double obs, double expected) +{ + return fabs(obs - expected) > 1e-8; +} + +int nfailed = 0; + +void fail(const char *test, double obs, double expected, + int n11, int n12, int n21, int n22) +{ + fprintf(stderr, "[%d %d | %d %d] %s: %g (expected %g)\n", + n11, n12, n21, n22, test, obs, expected); + nfailed++; +} + +void test_fisher(int n11, int n12, int n21, int n22, + double eleft, double eright, double etwo, double eprob) +{ + double prob, left, right, two; + prob = kt_fisher_exact(n11, n12, n21, n22, &left, &right, &two); + if (differ(left, eleft)) fail("LEFT", left, eleft, n11, n12, n21, n22); + if (differ(right, eright)) fail("RIGHT", right, eright, n11, n12, n21, n22); + if (differ(two, etwo)) fail("TWO-TAIL", two, etwo, n11, n12, n21, n22); + if (differ(prob, eprob)) fail("RESULT", prob, eprob, n11, n12, n21, n22); +} + +int main(int argc, char **argv) +{ + test_fisher(2, 1, 0, 31, 1.0, 0.005347593583, 0.005347593583, 0.005347593583); + test_fisher(2, 1, 0, 1, 1.0, 0.5, 1.0, 0.5); + test_fisher(3, 1, 0, 0, 1.0, 1.0, 1.0, 1.0); + test_fisher(3, 15, 37, 45, 0.021479750169, 0.995659202564, 0.033161943699, 0.017138952733); + test_fisher(12, 5, 29, 2, 0.044554737835, 0.994525206022, 0.080268552074, 0.039079943857); + + test_fisher(781, 23171, 4963, 2455001, 1.0, 0.0, 0.0, 0.0); + test_fisher(333, 381, 801722, 7664285, 1.0, 0.0, 0.0, 0.0); + test_fisher(4155, 4903, 805463, 8507517, 1.0, 0.0, 0.0, 0.0); + test_fisher(4455, 4903, 805463, 8507517, 1.0, 0.0, 0.0, 0.0); + test_fisher(5455, 4903, 805463, 8507517, 1.0, 0.0, 0.0, 0.0); + + test_fisher(1, 1, 100000, 1000000, 0.991735477166, 0.173555146661, 0.173555146661, 0.165290623827); + test_fisher(1000, 1000, 100000, 1000000, 1.0, 0.0, 0.0, 0.0); + test_fisher(1000, 1000, 1000000, 100000, 0.0, 1.0, 0.0, 0.0); + + test_fisher(49999, 10001, 90001, 49999, 1.0, 0.0, 0.0, 0.0); + test_fisher(50000, 10000, 90000, 50000, 1.0, 0.0, 0.0, 0.0); + test_fisher(50001, 9999, 89999, 50001, 1.0, 0.0, 0.0, 0.0); + test_fisher(10000, 50000, 130000, 10000, 0.0, 1.0, 0.0, 0.0); + + if (nfailed > 0) { + const char *plural = (nfailed == 1)? "" : "s"; + fprintf(stderr, "Failed %d test case%s\n", nfailed, plural); + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} diff --git a/src/htslib-1.21/test/test_khash.c b/src/htslib-1.21/test/test_khash.c new file mode 100644 index 0000000..a2e80b5 --- /dev/null +++ b/src/htslib-1.21/test/test_khash.c @@ -0,0 +1,502 @@ +/* test_khash.c -- khash unit tests + + Copyright (C) 2024 Genome Research Ltd. + Copyright (C) 2024 Centre for Population Genomics. + + Author: Rob Davies + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include + +#include +#include +#include +#include +#include +#include +#ifdef HAVE_CLOCK_GETTIME_CPUTIME +#include +#else +#include +#endif +#include +#include +#include + +#include +#include + +#define MAX_ENTRIES 99999999 + +KHASH_MAP_INIT_STR(str2int, int) + +static void write_stats_str2int(khash_t(str2int) *h) { + khint_t empty = 0, deleted = 0, hist_size = 0, *hist = NULL; + + if (kh_stats(str2int, h, &empty, &deleted, &hist_size, &hist) == 0) { + khint_t i; + printf("n_buckets = %u\n", + kh_n_buckets(h)); + printf("empty = %u\n", empty); + printf("deleted = %u\n", deleted); + for (i = 0; i < hist_size; i++) { + printf("dist[ %8u ] = %u\n", i, hist[i]); + } + free(hist); + } +} + +char * make_keys(size_t num, size_t kl) { + size_t i; + char *keys; + + if (num > MAX_ENTRIES) return NULL; + keys = malloc(kl * num); + if (!keys) { + perror(NULL); + return NULL; + } + for (i = 0; i < num; i++) { + if (snprintf(keys + kl * i, kl, "test%zu", i) >= kl) { + free(keys); + return NULL; + } + } + + return keys; +} + +static int add_str2int_entry(khash_t(str2int) *h, char *key, khint_t val) { + int ret = 0; + khint_t k = kh_put(str2int, h, key, &ret); + + if (ret != 1 && ret != 2) { + fprintf(stderr, "Unexpected return from kh_put(%s) : %d\n", key, ret); + return -1; + } + kh_val(h, k) = val; + return 0; +} + +static int check_str2int_entry(khash_t(str2int) *h, char *key, khint_t val, + uint8_t is_deleted) { + khint_t k = kh_get(str2int, h, key); + if (is_deleted) { + if (k < kh_end(h)) { + fprintf(stderr, "Found deleted entry %s in hash table\n", key); + return -1; + } else { + return 0; + } + } + + if (k >= kh_end(h)) { + fprintf(stderr, "Couldn't find %s in hash table\n", key); + return -1; + } + if (strcmp(kh_key(h, k), key) != 0) { + fprintf(stderr, "Wrong key in hash table, expected %s got %s\n", + key, kh_key(h, k)); + return -1; + } + if (kh_val(h, k) != val) { + fprintf(stderr, "Wrong value in hash table, expected %u got %u\n", + val, kh_val(h, k)); + return -1; + } + return 0; +} + +static int del_str2int_entry(khash_t(str2int) *h, char *key) { + khint_t k = kh_get(str2int, h, key); + if (k >= kh_end(h)) { + fprintf(stderr, "Couldn't find %s to delete from hash table\n", key); + return -1; + } + kh_del(str2int, h, k); + return 0; +} + +static int test_str2int(size_t max, size_t to_del, int show_stats) { + const size_t kl = 16; + size_t mask = max; + char *keys = make_keys(max, kl); + uint8_t *flags = NULL; + khash_t(str2int) *h; + khint_t i; + uint32_t r = 0x533d; + + if (!keys) return -1; + + h = kh_init(str2int); + if (!h) goto memfail; + + // Add some entries + for (i = 0; i < max; i++) { + if (add_str2int_entry(h, keys + i * kl, i) != 0) + goto fail; + } + + // Check they exist + for (i = 0; i < max; i++) { + if (check_str2int_entry(h, keys + i * kl, i, 0) != 0) + goto fail; + } + + if (show_stats) { + printf("Initial fill:\n"); + write_stats_str2int(h); + } + + // Delete a random selection + flags = calloc(max, sizeof(*flags)); + if (!flags) { + perror(""); + goto fail; + } + + kroundup_size_t(mask); + --mask; + + // Note that this method may become slow for a high %age removed + // as it searches for the last available entries. Despite this, it + // seems to be acceptable for the number of entries allowed. + for (i = 0; i < to_del; i++) { + khint_t victim; + // LFSR, see http://users.ece.cmu.edu/~koopman/lfsr/index.html + do { + r = (r >> 1) ^ ((r & 1) * 0x80000057U); + victim = (r & mask) - 1; + } while (victim >= max || flags[victim]); + if (del_str2int_entry(h, keys + victim * kl) != 0) + goto fail; + flags[victim] = 1; + } + + // Check correct entries are present + for (i = 0; i < max; i++) { + if (check_str2int_entry(h, keys + i * kl, i, flags[i]) != 0) + goto fail; + } + + if (show_stats) { + printf("\nAfter deletion:\n"); + write_stats_str2int(h); + } + + // Re-insert deleted entries + for (i = 0; i < max; i++) { + if (flags[i] && add_str2int_entry(h, keys + i * kl, i) != 0) + goto fail; + } + + // Ensure they're all back + for (i = 0; i < max; i++) { + if (check_str2int_entry(h, keys + i * kl, i, 0) != 0) + goto fail; + } + + if (show_stats) { + printf("\nAfter re-insert:\n"); + write_stats_str2int(h); + } + + kh_destroy(str2int, h); + free(keys); + free(flags); + + return 0; + + memfail: + perror(NULL); + fail: + kh_destroy(str2int, h); + free(keys); + free(flags); + return -1; +} + +static size_t read_keys(const char *keys_file, char **keys_out, + char ***key_locations_out) { + FILE *in = fopen(keys_file, "r"); + char *keys = NULL, *key, *end; + size_t keys_size = 1000000; + size_t keys_used = 0; + size_t avail, got, nkeys = 0; + char **key_locations = NULL; + struct stat fileinfo = { 0 }; + + if (!in) + return 0; + + // Slurp entire file + if (fstat(fileno(in), &fileinfo) < 0) { + if (fileinfo.st_size > keys_size) + keys_size = (size_t) fileinfo.st_size; + } + + keys = malloc(keys_size + 1); + if (!keys) + goto fail; + + do { + avail = keys_size - keys_used; + if (avail == 0) { + size_t new_size = keys_size + 1000000; + char *new_keys = realloc(keys, new_size + 1); + if (!new_keys) + goto fail; + keys = new_keys; + keys_size = new_size; + avail = keys_size - keys_used; + } + got = fread(keys + keys_used, 1, avail, in); + keys_used += got; + } while (got == avail); + keys[keys_used] = '\0'; + + if (ferror(in)) + goto fail; + if (fclose(in) < 0) + goto fail; + in = NULL; + + // Split by line + end = keys + keys_used; + for (key = keys; key != NULL; key = memchr(key, '\n', end - key)) { + while (*key == '\n') key++; + if (key < end) nkeys++; + } + + key_locations = malloc(nkeys * sizeof(*key_locations)); + if (!key_locations) + goto fail; + + nkeys = 0; + for (key = keys; key != NULL; key = memchr(key, '\n', end - key)) { + while (*key == '\n') *key++ = '\0'; + if (key < end) { + key_locations[nkeys++] = key; + } + } + *keys_out = keys; + *key_locations_out = key_locations; + return nkeys; + + fail: + if (in) + fclose(in); + free(keys); + *keys_out = NULL; + *key_locations_out = NULL; + return 0; +} + +static long long get_time(void) { +#ifdef HAVE_CLOCK_GETTIME_CPUTIME + struct timespec ts; + if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts) < 0) { + perror("clock_gettime"); + return -1; + } + return ts.tv_sec * 1000000000LL + ts.tv_nsec; +#else + struct timeval tv; + if (gettimeofday(&tv, NULL) < 0) { + perror("gettimeofday"); + return -1; + } + return tv.tv_sec * 1000000LL + tv.tv_usec; +#endif +} + +static char * fmt_time(long long elapsed) { + static char buf[64]; +#ifdef HAVE_CLOCK_GETTIME_CPUTIME + long long sec = elapsed / 1000000000; + long long nsec = elapsed % 1000000000; + snprintf(buf, sizeof(buf), "%lld.%09lld processor seconds", sec, nsec); +#else + long long sec = elapsed / 1000000; + long long usec = elapsed % 1000000; + snprintf(buf, sizeof(buf), "%lld.%06lld wall-time seconds", sec, usec); +#endif + return buf; +} + +static int benchmark(const char *keys_file) { + const size_t kl = 16; + size_t max = 50000000; + size_t i; + char *keys = NULL; + char **key_locations = NULL; + khash_t(str2int) *h; + long long start, end; + + if (keys_file) { + max = read_keys(keys_file, &keys, &key_locations); + } else { + keys = make_keys(max, kl); + } + + if (!keys) return -1; + + h = kh_init(str2int); + if (!h) goto fail; + + if ((start = get_time()) < 0) + goto fail; + + if (keys_file) { + for (i = 0; i < max; i++) { + int ret; + khint_t k = kh_put(str2int, h, key_locations[i], &ret); + if (ret < 0) { + fprintf(stderr, "Unexpected return from kh_put(%s) : %d\n", + key_locations[i], ret); + goto fail; + } + kh_val(h, k) = i; + } + } else { + for (i = 0; i < max; i++) { + int ret; + khint_t k = kh_put(str2int, h, keys + i * kl, &ret); + if (ret <= 0) { + fprintf(stderr, "Unexpected return from kh_put(%s) : %d\n", + keys + i * kl, ret); + goto fail; + } + kh_val(h, k) = i; + } + } + + if ((end = get_time()) < 0) + goto fail; + + printf("Insert %zu %s\n", max, fmt_time(end - start)); + + if ((start = get_time()) < 0) + goto fail; + + if (keys_file) { + for (i = 0; i < max; i++) { + khint_t k = kh_get(str2int, h, key_locations[i]); + if (k >= kh_end(h)) { + fprintf(stderr, "Couldn't find %s in hash table\n", + key_locations[i]); + goto fail; + } + } + } else { + for (i = 0; i < max; i++) { + khint_t k = kh_get(str2int, h, keys + i * kl); + if (k >= kh_end(h)) { + fprintf(stderr, "Couldn't find %s in hash table\n", + keys + i * kl); + goto fail; + } + } + } + + if ((end = get_time()) < 0) + goto fail; + + printf("Lookup %zu %s\n", max, fmt_time(end - start)); + + write_stats_str2int(h); + + kh_destroy(str2int, h); + free(keys); + free(key_locations); + + return 0; + fail: + kh_destroy(str2int, h); + free(keys); + return -1; +} + +static void show_usage(FILE *out, char *prog) { + fprintf(out, "Usage : %s [-t ] [-i ]\n", prog); + fprintf(out, " Options:\n"); + fprintf(out, " -t Test to run (str2int, benchmark)\n"); + fprintf(out, " -i Optional input file for benchmark\n"); + fprintf(out, " -n Number of items to add\n"); + fprintf(out, " -f Fraction to delete and re-insert\n"); + fprintf(out, " -d Dump hash table stats\n"); + fprintf(out, " -h Show this help\n"); +} + +int main(int argc, char **argv) { + int opt, res = EXIT_SUCCESS; + char *test = NULL; + char *input_file = NULL; + size_t max = 1000; + double del_frac = 0.25; + int show_stats = 0; + + while ((opt = getopt(argc, argv, "df:hi:n:t:")) != -1) { + switch (opt) { + case 'd': + show_stats = 1; + break; + case 'f': + del_frac = strtod(optarg, NULL); + if (del_frac < 0 || del_frac > 1.0) { + fprintf(stderr, "Error: -d must be between 0.0 and 1.0\n"); + return EXIT_FAILURE; + } + break; + case 'h': + show_usage(stdout, argv[0]); + return EXIT_SUCCESS; + case 'i': + input_file = optarg; + break; + case 'n': + max = strtoul(optarg, NULL, 0); + if (max == 0 || max > 99999999) { + fprintf(stderr, "Error: -n must be between 1 and %u\n", + MAX_ENTRIES); + return EXIT_FAILURE; + } + break; + case 't': + test = optarg; + break; + default: + show_usage(stderr, argv[0]); + return EXIT_FAILURE; + } + } + + if (!test || strcmp(test, "str2int") == 0) { + if (test_str2int(max, (size_t) (max * del_frac), show_stats) != 0) + res = EXIT_FAILURE; + } + + if (test && strcmp(test, "benchmark") == 0) { + if (benchmark(input_file) != 0) + res = EXIT_FAILURE; + } + + return res; +} diff --git a/src/htslib-1.21/test/test_kstring.c b/src/htslib-1.21/test/test_kstring.c new file mode 100644 index 0000000..8b6188b --- /dev/null +++ b/src/htslib-1.21/test/test_kstring.c @@ -0,0 +1,504 @@ +/* test_kstring.c -- kstring unit tests + + Copyright (C) 2018, 2020, 2024 Genome Research Ltd. + + Author: Rob Davies + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include + +#include +#include +#include +#include +#include +#include + +#include "../htslib/kstring.h" + +static inline void clamp(int64_t *val, int64_t min, int64_t max) { + if (*val < min) *val = min; + if (*val > max) *val = max; +} + +static int test_kroundup_size_t(int verbose) { + size_t val, exp; + int ret = 0; + + val = 0; + kroundup_size_t(val); + if (verbose) { + printf("kroundup_size_t(0) = 0x%zx\n", val); + } + if (val != 0) { + fprintf(stderr, "kroundup_size_t(0) produced 0x%zx, expected 0\n", val); + ret = -1; + } + + for (exp = 0; exp < sizeof(val) * 8; exp++) { + size_t expected = ((size_t) 1) << exp; + ssize_t delta; + for (delta = exp > 1 ? -1 : 0; delta <= (exp < 2 ? 0 : 1); delta++) { + size_t val_in = expected + delta; + val = val_in; + kroundup_size_t(val); + if (verbose) { + printf("kroundup_size_t(0x%zx) = 0x%zx\n", val_in, val); + } + if (delta <= 0) { + if (val != expected) { + fprintf(stderr, "kroundup_size_t(0x%zx) produced 0x%zx, " + "expected 0x%zx\n", + val_in, val, expected); + ret = -1; + } + } else { + expected *= 2; + if (!expected) --expected; + if (val != expected) { + fprintf(stderr, "kroundup_size_t(0x%zx) produced 0x%zx, " + "expected 0x%zx\n", + val_in, val, expected); + ret = -1; + } + } + } + } + return ret; +} + +static int test_kroundup_signed(int verbose) { + int32_t val, ret = 0; + size_t exp; + for (exp = 0; exp < sizeof(val) * 8 - 1; exp++) { + uint32_t expected = ((uint32_t) 1) << exp; + ssize_t delta; + for (delta = exp > 1 ? -1 : 0; delta <= (exp < 2 ? 0 : 1); delta++) { + int32_t val_in = expected + delta; + val = val_in; + kroundup32(val); + if (verbose) { + printf("kroundup32(%d) = %d\n", val_in, val); + } + if (delta <= 0) { + if ((uint32_t) val != expected) { + fprintf(stderr, "kroundup32(%d) produced %d, expected %u\n", + val_in, val, expected); + ret = -1; + } + } else { + if (exp < sizeof(val) * 8 - 2) { + expected *= 2; + } else { + expected = ((expected - 1) << 1 | 1); + } + if ((uint32_t) val != expected) { + fprintf(stderr, "kroundup32(%d) produced %d, expected %u\n", + val_in, val, expected); + ret = -1; + } + } + } + } + return ret; +} + +static int test_kputuw_from_to(kstring_t *str, unsigned int s, unsigned int e) { + unsigned int i = s; + + for (;;) { + str->l = 0; + memset(str->s, 0xff, str->m); + if (kputuw(i, str) < 0 || !str->s) { + perror("kputuw"); + return -1; + } + if (str->l >= str->m || str->s[str->l] != '\0') { + fprintf(stderr, "No NUL termination on string from kputuw\n"); + return -1; + } + if (i != strtoul(str->s, NULL, 10)) { + fprintf(stderr, + "kputuw wrote the wrong value, expected %u, got %s\n", + i, str->s); + return -1; + } + if (i >= e) break; + i++; + } + return 0; +} + +static int test_kputuw(int64_t start, int64_t end) { + kstring_t str = { 0, 0, NULL }; + int64_t val; + + str.s = malloc(2); + if (!str.s) { + perror("malloc"); + return -1; + } + str.m = 2; + + for (val = 0; val < UINT_MAX; val = val == 0 ? 1 : val * 10) { + unsigned int s = val == 0 ? 0 : val - 5; + unsigned int e = val + 5; + + if (test_kputuw_from_to(&str, s, e) < 0) { + free(ks_release(&str)); + return -1; + } + } + + if (test_kputuw_from_to(&str, UINT_MAX - 5, UINT_MAX) < 0) { + free(ks_release(&str)); + return -1; + } + + str.m = 1; // Force a resize + clamp(&start, 0, UINT_MAX); + clamp(&end, 0, UINT_MAX); + + if (test_kputuw_from_to(&str, start, end) < 0) { + free(ks_release(&str)); + return -1; + } + + free(ks_release(&str)); + + return 0; +} + +static int test_kputw_from_to(kstring_t *str, int s, int e) { + int i = s; + + for (;;) { + str->l = 0; + memset(str->s, 0xff, str->m); + if (kputw(i, str) < 0 || !str->s) { + perror("kputw"); + return -1; + } + if (str->l >= str->m || str->s[str->l] != '\0') { + fprintf(stderr, "No NUL termination on string from kputw\n"); + return -1; + } + if (i != strtol(str->s, NULL, 10)) { + fprintf(stderr, + "kputw wrote the wrong value, expected %d, got %s\n", + i, str->s); + return -1; + } + if (i >= e) break; + i++; + } + return 0; +} + +static int test_kputw(int64_t start, int64_t end) { + kstring_t str = { 0, 0, NULL }; + int64_t val; + + str.s = malloc(2); + if (!str.s) { + perror("malloc"); + return -1; + } + str.m = 2; + + for (val = 1; val < INT_MAX; val *= 10) { + if (test_kputw_from_to(&str, val > 5 ? val - 5 : 0, val + 5) < 0) { + free(ks_release(&str)); + return -1; + } + } + + for (val = -1; val > INT_MIN; val *= 10) { + if (test_kputw_from_to(&str, val - 5, val < -5 ? val + 5 : 0) < 0) { + free(ks_release(&str)); + return -1; + } + } + + if (test_kputw_from_to(&str, INT_MAX - 5, INT_MAX) < 0) { + free(ks_release(&str)); + return -1; + } + + if (test_kputw_from_to(&str, INT_MIN, INT_MIN + 5) < 0) { + free(ks_release(&str)); + return -1; + } + + str.m = 1; // Force a resize + clamp(&start, INT_MIN, INT_MAX); + clamp(&end, INT_MIN, INT_MAX); + + if (test_kputw_from_to(&str, start, end) < 0) { + free(ks_release(&str)); + return -1; + } + + free(ks_release(&str)); + + return 0; +} + +static int test_kputll_from_to(kstring_t *str, long long s, long long e) { + long long i = s; + + for (;;) { + str->l = 0; + memset(str->s, 0xff, str->m); + if (kputll(i, str) < 0 || !str->s) { + perror("kputll"); + return -1; + } + if (str->l >= str->m || str->s[str->l] != '\0') { + fprintf(stderr, "No NUL termination on string from kputll\n"); + return -1; + } + if (i != strtoll(str->s, NULL, 10)) { + fprintf(stderr, + "kputll wrote the wrong value, expected %lld, got %s\n", + i, str->s); + return -1; + } + if (i >= e) break; + i++; + } + return 0; +} + +static int test_kputll(long long start, long long end) { + kstring_t str = { 0, 0, NULL }; + unsigned long long val; + + str.s = malloc(2); + if (!str.s) { + perror("malloc"); + return -1; + } + str.m = 2; + + for (val = 1; val < INT64_MAX-5; val *= 10) { + if (test_kputll_from_to(&str, val >= 5 ? val - 5 : val, val) < 0) { + free(ks_release(&str)); + return -1; + } + } + + for (val = 1; val < INT64_MAX-5; val *= 10) { + long long valm = -val; + if (test_kputll_from_to(&str, valm >= 5 ? valm - 5 : valm, valm) < 0) { + free(ks_release(&str)); + return -1; + } + } + + if (test_kputll_from_to(&str, INT64_MAX - 5, INT64_MAX) < 0) { + free(ks_release(&str)); + return -1; + } + + if (test_kputll_from_to(&str, INT64_MIN, INT64_MIN + 5) < 0) { + free(ks_release(&str)); + return -1; + } + + str.m = 1; // Force a resize + int64_t start2 = (int64_t)start; // no larger on our platforms + int64_t end2 = (int64_t)end; + clamp(&start2, INT64_MIN, INT64_MAX); + clamp(&end2, INT64_MIN, INT64_MAX); + + if (test_kputll_from_to(&str, start, end) < 0) { + free(ks_release(&str)); + return -1; + } + + free(ks_release(&str)); + + return 0; +} + +// callback used by test_kgetline +static char *mock_fgets(char *str, int num, void *p) { + int *mock_state = (int*)p; + (*mock_state)++; + switch (*mock_state) { + case 1: + case 4: + case 7: + // a few characters, no endline + strcpy(str, "ABCD"); + break; + case 2: + case 3: + // \n endline + strcpy(str, "\n"); + break; + case 5: + case 6: + // \r\n endline + strcpy(str, "\r\n"); + break; + default: + // eof + return 0; + } + + return str; +} + +static int test_kgetline(void) { + kstring_t s = KS_INITIALIZE; + int mock_state = 0; + + // normal line, \n terminated, called with non-empty s + kputs("_", &s); + if (0 != kgetline(&s, mock_fgets, &mock_state) || 0 != strcmp("_ABCD", s.s) || 5 != s.l) return -1; + s.l = 0; + // empty line, \n terminated + if (0 != kgetline(&s, mock_fgets, &mock_state) || 0 != strcmp("", s.s) || 0 != s.l) return -1; + s.l = 0; + // normal line, \r\n terminated + if (0 != kgetline(&s, mock_fgets, &mock_state) || 0 != strcmp("ABCD", s.s) || 4 != s.l) return -1; + s.l = 0; + // empty line, \r\n terminated + if (0 != kgetline(&s, mock_fgets, &mock_state) || 0 != strcmp("", s.s) || 0 != s.l) return -1; + s.l = 0; + // line terminated by EOF + if (0 != kgetline(&s, mock_fgets, &mock_state) || 0 != strcmp("ABCD", s.s) || 4 != s.l) return -1; + s.l = 0; + // EOF + if (EOF != kgetline(&s, mock_fgets, &mock_state) || 0 != s.l) return -1; + + ks_free(&s); + return EXIT_SUCCESS; +} + +// callback used by test_kgetline2 +static ssize_t mock_fgets2(char *str, size_t num, void *p) { + int *mock_state = (int*)p; + (*mock_state)++; + switch (*mock_state) { + case 1: + case 4: + case 7: + // a few characters, no endline + strcpy(str, "ABCD"); + break; + case 2: + case 3: + // \n endline + strcpy(str, "\n"); + break; + case 5: + case 6: + // \r\n endline + strcpy(str, "\r\n"); + break; + default: + // eof + return 0; + } + + return strlen(str); +} + +static int test_kgetline2(void) { + kstring_t s = KS_INITIALIZE; + int mock_state = 0; + + // normal line, \n terminated, called with non-empty s + kputs("_", &s); + if (0 != kgetline2(&s, mock_fgets2, &mock_state) || 0 != strcmp("_ABCD", s.s) || 5 != s.l) return -1; + s.l = 0; + // empty line, \n terminated + if (0 != kgetline2(&s, mock_fgets2, &mock_state) || 0 != strcmp("", s.s) || 0 != s.l) return -1; + s.l = 0; + // normal line, \r\n terminated + if (0 != kgetline2(&s, mock_fgets2, &mock_state) || 0 != strcmp("ABCD", s.s) || 4 != s.l) return -1; + s.l = 0; + // empty line, \r\n terminated + if (0 != kgetline2(&s, mock_fgets2, &mock_state) || 0 != strcmp("", s.s) || 0 != s.l) return -1; + s.l = 0; + // line terminated by EOF + if (0 != kgetline2(&s, mock_fgets2, &mock_state) || 0 != strcmp("ABCD", s.s) || 4 != s.l) return -1; + s.l = 0; + // EOF + if (EOF != kgetline2(&s, mock_fgets2, &mock_state) || 0 != s.l) return -1; + + ks_free(&s); + return EXIT_SUCCESS; +} + +int main(int argc, char **argv) { + int opt, res = EXIT_SUCCESS; + int64_t start = 0; + int64_t end = 0; + char *test = NULL; + int verbose = 0; + + while ((opt = getopt(argc, argv, "e:s:t:v")) != -1) { + switch (opt) { + case 's': + start = strtoll(optarg, NULL, 0); + break; + case 'e': + end = strtoll(optarg, NULL, 0); + break; + case 't': + test = optarg; + break; + case 'v': + verbose++; + break; + default: + fprintf(stderr, "Usage : %s [-s ] [-e ] [-t ]\n", + argv[0]); + return EXIT_FAILURE; + } + } + + if (!test || strcmp(test, "kroundup_size_t") == 0) + if (test_kroundup_size_t(verbose) != 0) res = EXIT_FAILURE; + + if (!test || strcmp(test, "kroundup_signed") == 0) + if (test_kroundup_signed(verbose) != 0) res = EXIT_FAILURE; + + if (!test || strcmp(test, "kputuw") == 0) + if (test_kputuw(start, end) != 0) res = EXIT_FAILURE; + + if (!test || strcmp(test, "kputw") == 0) + if (test_kputw(start, end) != 0) res = EXIT_FAILURE; + + if (!test || strcmp(test, "kputll") == 0) + if (test_kputll(start, end) != 0) res = EXIT_FAILURE; + + if (!test || strcmp(test, "kgetline") == 0) + if (test_kgetline() != 0) res = EXIT_FAILURE; + + if (!test || strcmp(test, "kgetline2") == 0) + if (test_kgetline2() != 0) res = EXIT_FAILURE; + + return res; +} diff --git a/src/htslib-1.21/test/test_mod.c b/src/htslib-1.21/test/test_mod.c new file mode 100644 index 0000000..ebe9b2a --- /dev/null +++ b/src/htslib-1.21/test/test_mod.c @@ -0,0 +1,230 @@ +/* test/test_mod.c -- testing of base modification functions + + Copyright (C) 2020-2021, 2023 Genome Research Ltd. + + Author: James Bonfield + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +/* +This tests multiple APIs. The simplest is to parse the MM/ML tags with +bam_parse_basemod and then call bam_mods_at_next_pos once for each base in +the bam sequence to check for modifications. + +Ie: + + hts_base_mod_state *m = hts_base_mod_state_alloc(); + bam_parse_basemod(b, m); // b=bam1_t pointer + hts_base_mod mods[5]; + for (i = 0; i < b->core.l_qseq; i++) { + n = bam_mods_at_next_pos(b, m, mods, 5); + for (j = 0; j < n && j < 5; j++) { + // Report 'n'th mod at seq pos 'i'. + // mods[j].modified_base holds the base mod itself, with + // mods[j].canonical_base, mods[j].strand and mods[j].qual + // also present in hts_base_mod struct. + // ... + } + } + hts_base_mod_state_free(m); + +The extended mode has the same loop above, but calls bam_mods_query_type +to return additional meta-data including the strand, canonical base and +whether the base modification is recorded implicitly or explicitly: + + int ret = bam_mods_query_type(m, mods[j].modified_base, + &m_strand, &m_implicit, + &m_canonical); + +Looping over every base in the sequence is not particularly efficient +however unless this fits your natural processing order. The alternative +is to call bam_next_base_mod to iterate only over modified locations: + + hts_base_mod_state *m = hts_base_mod_state_alloc(); + bam_parse_basemod(b, m); // b=bam1_t pointer + hts_base_mod mods[5]; + while ((n=bam_next_basemod(b, m, mods, 5, &pos)) > 0) { + for (j = 0; j < n && j < 5; j++) { + // Report 'n'th mod at sequence position 'pos' + } + } + hts_base_mod_state_free(m); + +*/ + +#include +#include + +#include "../htslib/sam.h" + +static char *code(int id) { + static char code[20]; + if (id > 0) { + code[0] = id; + code[1] = 0; + } else { + snprintf(code, sizeof(code), "(%d)", -id); + } + + return code; +} + +int main(int argc, char **argv) { + int extended = 0; + uint32_t flags = 0; + + if (argc > 1 && strcmp(argv[1], "-x") == 0) { + extended = 1; + argv++; + argc--; + } + + if (argc > 2 && strcmp(argv[1], "-f") == 0) { + flags = atoi(argv[2]); + argv+=2; + argc-=2; + } + + if (argc < 2) + return 1; + + samFile *in = sam_open(argv[1], "r"); + if (!in) + return 1; + + bam1_t *b = bam_init1(); + sam_hdr_t *h = sam_hdr_read(in); + hts_base_mod_state *m = hts_base_mod_state_alloc(); + if (!h || !b || !m) + goto err; + + int r; + while ((r = sam_read1(in, h, b)) >= 0) { + if (bam_parse_basemod2(b, m, flags) < 0) { + fprintf(stderr, "Failed to parse MM/ML aux tags\n"); + goto err; + } + + // per-base iterator + int i, j, n; + hts_base_mod mods[5]; + for (i = 0; i < b->core.l_qseq; i++) { + char sp = '\t'; + n = bam_mods_at_next_pos(b, m, mods, 5); + printf("%d\t%c", i, seq_nt16_str[bam_seqi(bam_get_seq(b), i)]); + for (j = 0; j < n && j < 5; j++) { + char qstr[10]; + if (mods[j].qual == HTS_MOD_UNCHECKED) + qstr[0] = '#', qstr[1] = 0; + else if (mods[j].qual == HTS_MOD_UNKNOWN) + qstr[0] = '.', qstr[1] = 0; + else + snprintf(qstr, 10, "%d", mods[j].qual); + + if (extended) { + int m_strand, m_implicit; + char m_canonical; + int ret = bam_mods_query_type(m, mods[j].modified_base, + &m_strand, &m_implicit, + &m_canonical); + if (ret < 0 || + m_canonical != mods[j].canonical_base || + m_strand != mods[j].strand) + goto err; + printf("%c%c%c%s%c%s", + sp, mods[j].canonical_base, + "+-"[mods[j].strand], + code(mods[j].modified_base), + "?."[m_implicit], + qstr); + } else { + printf("%c%c%c%s%s", + sp, mods[j].canonical_base, + "+-"[mods[j].strand], + code(mods[j].modified_base), + qstr); + } + sp = ' '; + } + putchar('\n'); + } + + puts("---"); + + bam_parse_basemod2(b, m, flags); + + // List possible mod choices. + int *all_mods; + int all_mods_n = 0; + all_mods = bam_mods_recorded(m, &all_mods_n); + printf("Present:"); + for (i = 0; i < all_mods_n; i++) { + int m_strand, m_implicit; + char m_canonical; + bam_mods_queryi(m, i, &m_strand, &m_implicit, &m_canonical); + printf(all_mods[i] > 0 ? " %c" : " #%d", all_mods[i]); + putchar("?."[m_implicit]); + } + putchar('\n'); + + int pos; + while ((n=bam_next_basemod(b, m, mods, 5, &pos)) > 0) { + char sp = '\t'; + printf("%d\t%c", pos, + seq_nt16_str[bam_seqi(bam_get_seq(b), pos)]); + for (j = 0; j < n && j < 5; j++) { + char qstr[10]; + if (mods[j].qual == HTS_MOD_UNCHECKED) + qstr[0] = '#', qstr[1] = 0; + else if (mods[j].qual == HTS_MOD_UNKNOWN) + qstr[0] = '.', qstr[1] = 0; + else + snprintf(qstr, 10, "%d", mods[j].qual); + + printf("%c%c%c%s%s", + sp, mods[j].canonical_base, + "+-"[mods[j].strand], + code(mods[j].modified_base), + qstr); + sp = ' '; + } + putchar('\n'); + } + + if (n < 0) + goto err; + + puts("\n===\n"); + } + fflush(stdout); + int ret = 0; + if (sam_close(in) != 0 || r < -1) + ret = 1; + + bam_destroy1(b); + sam_hdr_destroy(h); + hts_base_mod_state_free(m); + return ret; + + err: + bam_destroy1(b); + sam_hdr_destroy(h); + hts_base_mod_state_free(m); + return sam_close(in) != 0 ? 1 : 2; +} diff --git a/src/htslib-1.21/test/test_nibbles.c b/src/htslib-1.21/test/test_nibbles.c new file mode 100644 index 0000000..1ef3456 --- /dev/null +++ b/src/htslib-1.21/test/test_nibbles.c @@ -0,0 +1,164 @@ +/* test/test_nibbles.c -- Test SIMD optimised function implementations. + + Copyright (C) 2024 Centre for Population Genomics. + + Author: John Marshall + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include + +#include +#include +#include +#include + +#ifdef HAVE_CLOCK_GETTIME_CPUTIME +#include +#else +#include +#endif + +#include "../htslib/sam.h" +#include "../sam_internal.h" + +long long gettime(void) { +#ifdef HAVE_CLOCK_GETTIME_CPUTIME + struct timespec ts; + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts); + return ts.tv_sec * 1000000000LL + ts.tv_nsec; +#else + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec * 1000000LL + tv.tv_usec; +#endif +} + +char *fmttime(long long elapsed) { + static char buf[64]; + +#ifdef HAVE_CLOCK_GETTIME_CPUTIME + long long sec = elapsed / 1000000000; + long long nsec = elapsed % 1000000000; + sprintf(buf, "%lld.%09lld processor seconds", sec, nsec); +#else + long long sec = elapsed / 1000000; + long long usec = elapsed % 1000000; + sprintf(buf, "%lld.%06lld wall-time seconds", sec, usec); +#endif + + return buf; +} + +void nibble2base_single(uint8_t *nib, char *seq, int len) { + int i; + for (i = 0; i < len; i++) + seq[i] = seq_nt16_str[bam_seqi(nib, i)]; +} + +unsigned char nibble[5000]; +char buf[10000]; + +int validate_nibble2base(void) { + char defbuf[500]; + int i, start, len; + unsigned long long total = 0, failed = 0; + + for (i = 0; i < sizeof nibble; i++) + nibble[i] = i % 256; + + for (start = 0; start < 80; start++) + for (len = 0; len < 400; len++) { + memset(defbuf, '\0', sizeof defbuf); + nibble2base_single(&nibble[start], defbuf, len); + + memset(buf, '\0', sizeof defbuf); + nibble2base(&nibble[start], buf, len); + + total++; + if (strcmp(defbuf, buf) != 0) { + printf("%s expected\n%s FAIL\n\n", defbuf, buf); + failed++; + } + } + + if (failed > 0) { + fprintf(stderr, "Failures: %llu (out of %llu tests)\n", failed, total); + return 1; + } + + return 0; +} + +int time_nibble2base(int length, unsigned long count) { + unsigned long i, total = 0; + + for (i = 0; i < length; i++) + nibble[i] = i % 256; + + printf("Timing %lu nibble2base iterations with read length %d...\n", count, length); + long long start = gettime(); + + for (i = 0; i < count; i++) { + nibble2base(nibble, buf, length); + total += buf[i % length]; + } + + long long stop = gettime(); + printf("%s (summing to %lu)\n", fmttime(stop - start), total); + return 0; +} + +int main(int argc, char **argv) { + int readlen = 5000; + unsigned long count = 1000000; + int status = 0; + int c; + + if (argc == 1) + printf( +"Usage: test_nibbles [-c NUM] [-r NUM] [-n|-v]...\n" +"Options:\n" +" -c NUM Specify number of iterations [%lu]\n" +" -n Run nibble2base speed tests\n" +" -r NUM Specify read length [%d]\n" +" -v Run all validation tests\n" +"", count, readlen); + + while ((c = getopt(argc, argv, "c:nr:v")) >= 0) + switch (c) { + case 'c': + count = strtoul(optarg, NULL, 0); + break; + + case 'n': + status += time_nibble2base(readlen, count); + break; + + case 'r': + readlen = atoi(optarg); + break; + + case 'v': + status += validate_nibble2base(); + break; + } + + return status; +} diff --git a/src/htslib-1.21/test/test_realn.c b/src/htslib-1.21/test/test_realn.c new file mode 100644 index 0000000..3f51170 --- /dev/null +++ b/src/htslib-1.21/test/test_realn.c @@ -0,0 +1,170 @@ +/* test/test_realn.c -- test sam_prob_realn() function + + Copyright (C) 2018 Genome Research Ltd. + + Author: Rob Davies + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include + +#include +#include +#include +#include +#include +#include + +#include "../htslib/sam.h" +#include "../htslib/hts.h" +#include "../htslib/faidx.h" + +void usage(const char *prog) { + fprintf(stderr, "Usage: %s -i -o -f \n", prog); +} + +int main(int argc, char **argv) { + htsFile *in = NULL; + htsFile *out = NULL; + char *in_name = "-"; + char *out_name = "-"; + char *ref_name = NULL; + char *ref_seq = NULL; + char modew[8] = "w"; + faidx_t *fai = NULL; + sam_hdr_t *hdr = NULL; + bam1_t *rec = NULL; + int c, res, last_ref = -1, ref_len = 0; + int adjust = 0, extended = 0, recalc = 0, flags = 0; + + while ((c = getopt(argc, argv, "aef:hi:o:r")) >= 0) { + switch (c) { + case 'a': adjust = 1; break; + case 'e': extended = 1; break; + case 'f': ref_name = optarg; break; + case 'h': usage(argv[0]); return EXIT_SUCCESS; + case 'i': in_name = optarg; break; + case 'o': out_name = optarg; break; + case 'r': recalc = 1; break; + default: usage(argv[0]); return EXIT_FAILURE; + } + } + + if (!ref_name) { + usage(argv[0]); + return EXIT_FAILURE; + } + + flags = (adjust ? 1 : 0) | (extended ? 2 : 0) | (recalc ? 4 : 0); + + fai = fai_load(ref_name); + if (!fai) { + fprintf(stderr, "Couldn't load reference %s\n", ref_name); + goto fail; + } + + rec = bam_init1(); + if (!rec) { + perror(NULL); + goto fail; + } + + in = hts_open(in_name, "r"); + if (!in) { + fprintf(stderr, "Couldn't open %s : %s\n", in_name, strerror(errno)); + goto fail; + } + + hdr = sam_hdr_read(in); + if (!hdr) { + fprintf(stderr, "Couldn't read header for %s\n", in_name); + goto fail; + } + + out = hts_open(out_name, modew); + if (!out) { + fprintf(stderr, "Couldn't open %s : %s\n", out_name, strerror(errno)); + goto fail; + } + + if (sam_hdr_write(out, hdr) < 0) { + fprintf(stderr, "Couldn't write header to %s : %s\n", + out_name, strerror(errno)); + goto fail; + } + + while ((res = sam_read1(in, hdr, rec)) >= 0) { + if (rec->core.tid >= hdr->n_targets) { + fprintf(stderr, "Invalid BAM reference id %d\n", rec->core.tid); + goto fail; + } + if (last_ref != rec->core.tid && rec->core.tid >= 0) { + free(ref_seq); + ref_seq = faidx_fetch_seq(fai, hdr->target_name[rec->core.tid], + 0, INT_MAX, &ref_len); + if (!ref_seq) { + fprintf(stderr, "Couldn't get reference %s\n", + hdr->target_name[rec->core.tid]); + goto fail; + } + last_ref = rec->core.tid; + } + if (rec->core.tid >= 0) { + res = sam_prob_realn(rec, ref_seq, ref_len, flags); + if (res <= -4) { + fprintf(stderr, "Error running sam_prob_realn : %s\n", + strerror(errno)); + goto fail; + } + } + if (sam_write1(out, hdr, rec) < 0) { + fprintf(stderr, "Error writing to %s\n", out_name); + goto fail; + } + } + res = hts_close(in); + in = NULL; + if (res < 0) { + fprintf(stderr, "Error closing %s\n", in_name); + goto fail; + } + + res = hts_close(out); + out = NULL; + if (res < 0) { + fprintf(stderr, "Error closing %s\n", out_name); + goto fail; + } + + sam_hdr_destroy(hdr); + bam_destroy1(rec); + free(ref_seq); + fai_destroy(fai); + + return EXIT_SUCCESS; + + fail: + if (hdr) sam_hdr_destroy(hdr); + if (rec) bam_destroy1(rec); + if (in) hts_close(in); + if (out) hts_close(out); + free(ref_seq); + fai_destroy(fai); + return EXIT_FAILURE; +} diff --git a/src/htslib-1.21/test/test_str2int.c b/src/htslib-1.21/test/test_str2int.c new file mode 100644 index 0000000..70e7991 --- /dev/null +++ b/src/htslib-1.21/test/test_str2int.c @@ -0,0 +1,225 @@ +/* test/test_str2int.c -- Test integer string conversion (and safe printing) + + Copyright (C) 2019-2020 Genome Research Ltd. + + Author: Rob Davies + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + + +#include +#include +#include +#include +#include +#include +#include + +#include "../textutils_internal.h" + +// Test hts_str2int() and hts_str2uint() on various values around the +// maximum (or minimum for negative numbers) allowed for the given +// number of bits. Ensures that the failed flag is set when the output +// isn't going to fit, that the correct value is returned and that +// 'end' points to the character following the number. +static int check_str2int(int verbose) { + char buffer[64], *end; + int64_t val; + uint64_t num, uval; + int failed = 0, efail, i, offset; + const char sentinel = '#'; + + // Positive value (unsigned) + for (i = 1; i < 64; i++) { + num = (1ULL << i) - 1; + for (offset = i < 5 ? -(1LL << (i - 1)) : -16; offset <= 30; offset++) { + efail = (offset > 0); + snprintf(buffer, sizeof(buffer), "%" PRIu64 "%c", + num + offset, sentinel); + + uval = hts_str2uint(buffer, &end, i, &failed); + if (failed != efail || uval != (!efail ? num + offset : num) + || *end != sentinel) { + fprintf(stderr, "hts_str2uint failed: %d bit " + "%s %"PRIu64" '%c' %d (%d)\n", + i, buffer, uval, *end, failed, efail); + return -1; + } else if (verbose) { + fprintf(stderr, "hts_str2uint OK: %d bit " + "%s %"PRIu64" '%c' %d (%d)\n", + i, buffer, uval, *end, failed, efail); + } + failed = 0; + } + + // Positive value (signed) + for (offset = i < 5 ? -(1LL << (i - 1)) : -16; offset <= 30; offset++) { + efail = (offset > 0); + snprintf(buffer, sizeof(buffer), "%" PRIu64 "%c", + num + offset, sentinel); + + val = hts_str2int(buffer, &end, i + 1, &failed); + if (failed != efail || val != (!efail ? num + offset : num) + || *end != sentinel) { + fprintf(stderr, + "hts_str2int failed: %d bit " + "%s %"PRId64" '%c' %d (%d)\n", + i + 1, buffer, val, *end, failed, efail); + return -1; + } else if (verbose) { + fprintf(stderr, "hts_str2int OK: %d bit " + "%s %"PRId64" '%c' %d (%d)\n", + i + 1, buffer, val, *end, failed, efail); + } + failed = 0; + } + + // Negative value (signed) + for (offset = i < 5 ? -(1LL << (i - 1)) : -16; offset <= 30; offset++) { + efail = (offset > 0); + snprintf(buffer, sizeof(buffer), "-%" PRIu64 "%c", + num + offset + 1, sentinel); + + val = hts_str2int(buffer, &end, i + 1, &failed); + // Cast of val to unsigned in this comparison avoids undefined + // behaviour when checking INT64_MIN. + if (failed != efail + || -((uint64_t) val) != (!efail ? num + offset + 1 : num + 1) + || *end != sentinel) { + fprintf(stderr, + "hts_str2int failed: %d bit " + "%s %"PRId64" '%c' %d (%d)\n", + i + 1, buffer, val, *end, failed, efail); + return -1; + } else if (verbose) { + fprintf(stderr, "hts_str2int OK: %d bit " + "%s %"PRId64" '%c' %d (%d)\n", + i + 1, buffer, val, *end, failed, efail); + } + failed = 0; + } + } + + // Special case for UINT64_MAX + for (offset = 0; offset <= 999; offset++) { + efail = offset > 615; + snprintf(buffer, sizeof(buffer), "18446744073709551%03d%c", + offset, sentinel); + uval = hts_str2uint(buffer, &end, 64, &failed); + if (failed != efail + || uval != (efail ? UINT64_MAX : 18446744073709551000ULL + offset) + || *end != sentinel) { + fprintf(stderr, "hts_str2uint failed: 64 bit %s " + "%"PRIu64" '%c' %d (%d)\n", + buffer, uval, *end, failed, efail); + return -1; + } else if (verbose) { + fprintf(stderr, "hts_str2uint OK: 64 bit " + "%s %"PRIu64" '%c' %d (%d)\n", + buffer, uval, *end, failed, efail); + } + } + return 0; +} + +static int +check_strprint2(int verbose, const char *str, size_t len, size_t destlen, + char quote, const char *expect) { + char buf[100]; + hts_strprint(buf, destlen, quote, str, len); + if (strcmp(buf, expect) != 0) { + fprintf(stderr, "hts_strprint failed: length %zu: got \"%.*s\", " + "expected \"%s\"\n", destlen, (int) destlen, buf, expect); + return -1; + } + else if (verbose) { + fprintf(stderr, "hts_strprint OK: length %zu: got \"%s\"\n", + destlen, expect); + } + return 0; +} + +static int +check_strprint1(int v, const char *str, size_t destlen, const char *expect) { + return check_strprint2(v, str, SIZE_MAX, destlen, '\0', expect); +} + +static int +check_strprintq(int v, const char *str, size_t destlen, char quote, + const char *expect) { + return check_strprint2(v, str, SIZE_MAX, destlen, quote, expect); +} + +static int check_strprint(int v) { + int res = 0; + + res |= check_strprint1(v, "chr10", 9, "chr10"); + res |= check_strprint1(v, "chr10", 6, "chr10"); + res |= check_strprint1(v, "chr10", 5, "c..."); + res |= check_strprint1(v, "chr10", 4, "..."); + res |= check_strprint1(v, "tab\twxyz",10, "tab\\twxyz"); + res |= check_strprint1(v, "tab\twxyz", 9, "tab\\t..."); + res |= check_strprint1(v, "tab\twxyz", 8, "tab\\..."); + res |= check_strprint1(v, "tab\twxyz", 7, "tab..."); + res |= check_strprint1(v, "tab\twxyz", 6, "ta..."); + res |= check_strprint1(v, "\xab", 5, "\\xAB"); + res |= check_strprint1(v, "\xab", 4, "..."); + res |= check_strprint1(v, "hello\xff", 40, "hello\\xFF"); + res |= check_strprint1(v, "hello\xff", 10, "hello\\xFF"); + res |= check_strprint1(v, "hello\xff", 9, "hello..."); + res |= check_strprint1(v, "hello\t", 40, "hello\\t"); + res |= check_strprint1(v, "hello\t", 8, "hello\\t"); + res |= check_strprint1(v, "hello\t", 7, "hel..."); + res |= check_strprint1(v, "\t", 40, "\\t"); + res |= check_strprint1(v, "", 40, ""); + + res |= check_strprintq(v, "chr10", 9, '\'', "'chr10'"); + res |= check_strprintq(v, "chr10", 8, '\'', "'chr10'"); + res |= check_strprintq(v, "chr10", 7, '\'', "'c'..."); + res |= check_strprintq(v, "chr10", 6, '\'', "''..."); + res |= check_strprintq(v, "quo'wxyz",12, '\'', "'quo\\'wxyz'"); + res |= check_strprintq(v, "quo'wxyz",11, '\'', "'quo\\''..."); + res |= check_strprintq(v, "quo'wxyz",10, '\'', "'quo\\'..."); + + res |= check_strprint2(v, "foo\0bar", SIZE_MAX, 10, '\0', "foo"); + res |= check_strprint2(v, "foo\0bar", 7,10, '\0', "foo\\0bar"); + res |= check_strprint2(v, "foo\0bar", 7, 9, '\0', "foo\\0bar"); + res |= check_strprint2(v, "foo\0bar", 7, 8, '\0', "foo\\..."); + + return res; +} + +int main(int argc, char **argv) { + int verbose = 0, opt, res; + + while ((opt = getopt(argc, argv, "v")) != -1) { + switch (opt) { + case 'v': + verbose = 1; + break; + default: + fprintf(stderr, "Usage: %s [-v]\n", argv[0]); + return EXIT_FAILURE; + } + } + + res = check_str2int(verbose); + res |= check_strprint(verbose); + return res ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/src/htslib-1.21/test/test_time_funcs.c b/src/htslib-1.21/test/test_time_funcs.c new file mode 100644 index 0000000..0e05129 --- /dev/null +++ b/src/htslib-1.21/test/test_time_funcs.c @@ -0,0 +1,125 @@ +/* test_time_compat.c -- Test time functions + + Copyright (C) 2022 Genome Research Ltd. + + Author: Rob Davies + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../hts_time_funcs.h" + +int test_normalised(time_t start, time_t end, time_t incr) { + time_t i, j; + struct tm *utc; + + for (i = start; i < end; i += incr) { + utc = gmtime(&i); + j = hts_time_gm(utc); + if (i != j) { + fprintf(stderr, + "hts_time_gm() failed, got %"PRId64" expected %"PRId64"\n", + (int64_t) j, (int64_t) i); + return 1; + } + } + return 0; +} + +int test_specific(int year, int mon, int mday, int hour, int min, int sec, + time_t expected) { + struct tm utc = { sec, min, hour, mday, mon - 1, year - 1900, 0, 0, 0 }; + time_t res = hts_time_gm(&utc); + if (res != expected) { + fprintf(stderr, + "hts_time_gm() failed for %4d/%02d/%02d %02d:%02d:%02d :" + " got %"PRId64" expected %"PRId64"\n", + year, mon, mday, hour, min, sec, + (int64_t) res, (int64_t) expected); + return 1; + } + return 0; +} + +int main(int argc, char **argv) { + int res = 0; + + if (test_normalised(0, INT_MAX - 1000, 1000) != 0) + return EXIT_FAILURE; + if (sizeof(time_t) >= 8) { + if (test_normalised(INT_MAX - 1000, + (time_t)((int64_t) INT_MAX * 2), 1000) != 0) + return EXIT_FAILURE; + } + + // 2022-06-14 12:32:10 + res |= test_specific(2022, 6, 14, 12, 32, 10, 1655209930); + // 2022-06-14 12:32:10 + res |= test_specific(1993, 9, 10514, 12, 32, 10, 1655209930); + // 2022-02-28 12:00:00 + res |= test_specific(2020, 2, 28, 12, 0, 0, 1582891200); + // 2022-02-29 12:00:00 + res |= test_specific(2020, 2, 29, 12, 0, 0, 1582977600); + // 2022-03-01 12:00:00 + res |= test_specific(2020, 2, 30, 12, 0, 0, 1583064000); + // 2022-02-29 12:00:00 + res |= test_specific(2020, 3, 0, 12, 0, 0, 1582977600); + // 2020-02-01 12:00:00 + res |= test_specific(2019, 14, 1, 12, 0, 0, 1580558400); + // 2020-03-01 12:00:00 + res |= test_specific(2019, 15, 1, 12, 0, 0, 1583064000); + // 2021-03-01 12:00:00 + res |= test_specific(2019, 27, 1, 12, 0, 0, 1614600000); + // 2024-02-01 12:00:00 + res |= test_specific(2019, 62, 1, 12, 0, 0, 1706788800); + // 2024-03-01 12:00:00 + res |= test_specific(2019, 63, 1, 12, 0, 0, 1709294400); + // 2020-12-31 23:59:59 + res |= test_specific(2021, 0, 31, 23, 59, 59, 1609459199); + // 2020-03-01 12:00:00 + res |= test_specific(2021, -9, 1, 12, 0, 0, 1583064000); + // 2020-02-01 12:00:00 + res |= test_specific(2021, -10, 1, 12, 0, 0, 1580558400); + // 2019-02-01 12:00:00 + res |= test_specific(2021, -22, 1, 12, 0, 0, 1549022400); + // 1970-01-01 00:00:00 + res |= test_specific(1970, 1, 1, 0, 0, 0, 0); + // 2038-01-19 03:14:07 + res |= test_specific(1970, 1, 1, 0, 0, INT_MAX, INT_MAX); + // 2038-01-19 03:14:07 + res |= test_specific(2038, 1, 19, 3, 14, 7, INT_MAX); + if (sizeof(time_t) < 8) { + // 2038-01-19 03:14:08 + res |= test_specific(2038, 1, 19, 3, 14, 8, (time_t) -1); + } else { + // 2038-01-19 03:14:08 + res |= test_specific(2038, 1, 19, 3, 14, 8, + (time_t)((int64_t) INT_MAX + 1)); + } + + return res == 0 ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/htslib-1.21/test/test_view.c b/src/htslib-1.21/test/test_view.c new file mode 100644 index 0000000..c899ff9 --- /dev/null +++ b/src/htslib-1.21/test/test_view.c @@ -0,0 +1,440 @@ +/* test/test_view.c -- simple view tool, purely for use in a test harness. + + Copyright (C) 2012 Broad Institute. + Copyright (C) 2013-2020 Genome Research Ltd. + + Author: Heng Li + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include + +#include +#include +#include +#include +#include +#include + +#include "../cram/cram.h" +#include "../htslib/sam.h" +#include "../htslib/vcf.h" +#include "../htslib/hts_log.h" + +struct opts { + char *fn_ref; + int flag; + int clevel; + int ignore_sam_err; + int nreads; + int extra_hdr_nuls; + int benchmark; + int nthreads; + int multi_reg; + char *index; + int min_shift; +}; + +enum test_op { + READ_COMPRESSED = 1, + WRITE_BINARY_COMP = 2, // eg bam, bcf + READ_CRAM = 4, + WRITE_CRAM = 8, + WRITE_UNCOMPRESSED = 16, + WRITE_COMPRESSED = 32, // eg vcf.gz, sam.gz, fastq.gz + WRITE_FASTQ = 64, + WRITE_FASTA = 128, +}; + +int sam_loop(int argc, char **argv, int optind, struct opts *opts, htsFile *in, htsFile *out) { + int r = 0; + sam_hdr_t *h = NULL; + hts_idx_t *idx = NULL; + bam1_t *b = NULL; + + h = sam_hdr_read(in); + if (h == NULL) { + fprintf(stderr, "Couldn't read header for \"%s\"\n", argv[optind]); + return EXIT_FAILURE; + } + h->ignore_sam_err = opts->ignore_sam_err; + if (opts->extra_hdr_nuls > 0) { + char *new_text = realloc(h->text, h->l_text + opts->extra_hdr_nuls); + if (new_text == NULL) { + fprintf(stderr, "Error reallocing header text\n"); + goto fail; + } + h->text = new_text; + memset(&h->text[h->l_text], 0, opts->extra_hdr_nuls); + h->l_text += opts->extra_hdr_nuls; + } + + b = bam_init1(); + if (b == NULL) { + fprintf(stderr, "Out of memory allocating BAM struct\n"); + goto fail; + } + + /* CRAM output */ + if ((opts->flag & WRITE_CRAM) && opts->fn_ref) { + // Create CRAM references arrays + int ret = hts_set_fai_filename(out, opts->fn_ref); + + if (ret != 0) + goto fail; + } + + if (!opts->benchmark && sam_hdr_write(out, h) < 0) { + fprintf(stderr, "Error writing output header.\n"); + goto fail; + } + + if (opts->index) { + if (sam_idx_init(out, h, opts->min_shift, opts->index) < 0) { + fprintf(stderr, "Failed to initialise index\n"); + goto fail; + } + } + + if (optind + 1 < argc && !(opts->flag & READ_COMPRESSED)) { // BAM input and has a region + int i; + if ((idx = sam_index_load(in, argv[optind])) == 0) { + fprintf(stderr, "[E::%s] fail to load the BAM index\n", __func__); + goto fail; + } + if (opts->multi_reg) { + hts_itr_t *iter = sam_itr_regarray(idx, h, &argv[optind + 1], argc - optind-1); + if (!iter) + goto fail; + while ((r = sam_itr_next(in, iter, b)) >= 0) { + if (!opts->benchmark && sam_write1(out, h, b) < 0) { + fprintf(stderr, "Error writing output.\n"); + hts_itr_destroy(iter); + goto fail; + } + if (opts->nreads && --opts->nreads == 0) + break; + } + hts_itr_destroy(iter); + if (r < -1) { + fprintf(stderr, "Error reading input.\n"); + goto fail; + } + } else { + for (i = optind + 1; i < argc; ++i) { + hts_itr_t *iter; + if ((iter = sam_itr_querys(idx, h, argv[i])) == 0) { + fprintf(stderr, "[E::%s] fail to parse region '%s'\n", __func__, argv[i]); + goto fail; + } + while ((r = sam_itr_next(in, iter, b)) >= 0) { + if (!opts->benchmark && sam_write1(out, h, b) < 0) { + fprintf(stderr, "Error writing output.\n"); + hts_itr_destroy(iter); + goto fail; + } + if (opts->nreads && --opts->nreads == 0) + break; + } + hts_itr_destroy(iter); + if (r < -1) { + fprintf(stderr, "Error reading input.\n"); + goto fail; + } + } + } + hts_idx_destroy(idx); idx = NULL; + } else while ((r = sam_read1(in, h, b)) >= 0) { + if (!opts->benchmark && sam_write1(out, h, b) < 0) { + fprintf(stderr, "Error writing output.\n"); + goto fail; + } + if (opts->nreads && --opts->nreads == 0) + break; + } + + if (r < -1) { + fprintf(stderr, "Error parsing input.\n"); + goto fail; + } + + if (opts->index) { + if (sam_idx_save(out) < 0) { + fprintf(stderr, "Error saving index\n"); + goto fail; + } + } + + bam_destroy1(b); + sam_hdr_destroy(h); + + return 0; + fail: + if (b) bam_destroy1(b); + if (h) sam_hdr_destroy(h); + if (idx) hts_idx_destroy(idx); + + return 1; +} + +int vcf_loop(int argc, char **argv, int optind, struct opts *opts, htsFile *in, htsFile *out) { + bcf_hdr_t *h = bcf_hdr_read(in); + bcf1_t *b = bcf_init1(); + hts_idx_t *idx; + int i, exit_code = 0, r = 0; + + if (!h) + return 1; + if (!b) + return 1; + + if (!opts->benchmark && bcf_hdr_write(out, h) < 0) + return 1; + + if (opts->index) { + if (bcf_idx_init(out, h, opts->min_shift, opts->index) < 0) { + fprintf(stderr, "Failed to initialise index\n"); + return 1; + } + } + + if (optind + 1 < argc) { + // A series of regions. + if ((idx = bcf_index_load(argv[optind])) == 0) { + fprintf(stderr, "[E::%s] fail to load the BVCF index\n", __func__); + return 1; + } + + for (i = optind + 1; i < argc; i++) { + hts_itr_t *iter; + if ((iter = bcf_itr_querys(idx, h, argv[i])) == 0) { + fprintf(stderr, "[E::%s] fail to parse region '%s'\n", __func__, argv[i]); + exit_code = 1; + break; + } + while ((r = bcf_itr_next(in, iter, b)) >= 0) { + if (!opts->benchmark && bcf_write1(out, h, b) < 0) { + fprintf(stderr, "Error writing output.\n"); + exit_code = 1; + break; + } + if (opts->nreads && --opts->nreads == 0) + break; + } + if (r < -1) { + fprintf(stderr, "Error reading input.\n"); + exit_code = 1; + } + hts_itr_destroy(iter); + if (exit_code != 0) break; + } + + hts_idx_destroy(idx); + + } else { + // Whole file + while ((r = bcf_read1(in, h, b)) >= 0) { + if (!opts->benchmark && bcf_write1(out, h, b) < 0) { + fprintf(stderr, "Error writing output.\n"); + exit_code = 1; + break; + } + if (opts->nreads && --opts->nreads == 0) + break; + } + if (r < -1) { + fprintf(stderr, "Error reading input.\n"); + exit_code = 1; + } + } + + if (exit_code == 0 && opts->index) { + if (bcf_idx_save(out) < 0) { + fprintf(stderr, "Error saving index\n"); + exit_code = 1; + } + } + + bcf_destroy1(b); + bcf_hdr_destroy(h); + return exit_code; +} + +int main(int argc, char *argv[]) +{ + htsFile *in, *out; + char moder[8]; + char modew[800]; + int c, exit_code = EXIT_SUCCESS; + hts_opt *in_opts = NULL, *out_opts = NULL; + char *out_fn = "-"; + + struct opts opts; + opts.fn_ref = NULL; + opts.flag = 0; + opts.clevel = -1; + opts.ignore_sam_err = 0; + opts.nreads = 0; + opts.extra_hdr_nuls = 0; + opts.benchmark = 0; + opts.nthreads = 0; // shared pool + opts.multi_reg = 0; + opts.index = NULL; + opts.min_shift = 0; + + while ((c = getopt(argc, argv, "DSIt:i:bzCfFul:o:N:BZ:@:Mx:m:p:v")) >= 0) { + switch (c) { + case 'D': opts.flag |= READ_CRAM; break; + case 'S': opts.flag |= READ_COMPRESSED; break; + case 'I': opts.ignore_sam_err = 1; break; + case 't': opts.fn_ref = optarg; break; + case 'i': if (hts_opt_add(&in_opts, optarg)) return 1; break; + case 'b': opts.flag |= WRITE_BINARY_COMP; break; + case 'z': opts.flag |= WRITE_COMPRESSED; break; + case 'C': opts.flag |= WRITE_CRAM; break; + case 'f': opts.flag |= WRITE_FASTQ; break; + case 'F': opts.flag |= WRITE_FASTA; break; + case 'u': opts.flag |= WRITE_UNCOMPRESSED; break; // eg u-BAM not SAM + case 'l': opts.clevel = atoi(optarg); break; + case 'o': if (hts_opt_add(&out_opts, optarg)) return 1; break; + case 'N': opts.nreads = atoi(optarg); break; + case 'B': opts.benchmark = 1; break; + case 'Z': opts.extra_hdr_nuls = atoi(optarg); break; + case 'M': opts.multi_reg = 1; break; + case '@': opts.nthreads = atoi(optarg); break; + case 'x': opts.index = optarg; break; + case 'm': opts.min_shift = atoi(optarg); break; + case 'p': out_fn = optarg; break; + case 'v': hts_verbose++; break; + } + } + if (argc == optind) { + fprintf(stderr, "Usage: test_view [-DSI] [-t fn_ref] [-i option=value] [-bC] [-l level] [-o option=value] [-N num_reads] [-B] [-Z hdr_nuls] [-@ num_threads] [-x index_fn] [-m min_shift] [-p out] [-v] || [region]\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "-D: read CRAM format (mode 'c')\n"); + fprintf(stderr, "-S: read compressed BCF, BAM, FAI (mode 'b')\n"); + fprintf(stderr, "-I: ignore SAM parsing errors\n"); + fprintf(stderr, "-t: fn_ref: load CRAM references from the specified fasta file instead of @SQ headers when writing a CRAM file\n"); + fprintf(stderr, "-i: option=value: set an option for CRAM input\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "-b: write binary compressed BCF, BAM, FAI (mode 'b')\n"); + fprintf(stderr, "-z: write text compressed VCF.gz, SAM.gz or FASTQ.gz (mode 'z')\n"); + fprintf(stderr, "-C: write CRAM format (mode 'c')\n"); + fprintf(stderr, "-f: write FASTQ format (mode 'f')\n"); + fprintf(stderr, "-l 0-9: set zlib compression level\n"); + fprintf(stderr, "-o option=value: set an option for CRAM output\n"); + fprintf(stderr, "-N: num_reads: limit the output to the first num_reads reads\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "-B: enable benchmarking\n"); + fprintf(stderr, "-M: use hts_itr_multi iterator\n"); + fprintf(stderr, "-Z hdr_nuls: append specified number of null bytes to the SAM header\n"); + fprintf(stderr, "-@ num_threads: use thread pool with specified number of threads\n\n"); + fprintf(stderr, "-x fn: write index to fn\n"); + fprintf(stderr, "-m min_shift: specifies BAI/CSI bin size; 0 is BAI(BAM) or TBI(VCF), 14 is CSI default\n"); + fprintf(stderr, "-p out_fn: output to out_fn instead of stdout\n"); + fprintf(stderr, "-v: increase verbosity\n"); + fprintf(stderr, "The region list entries should be specified as 'reg:beg-end', with intervals of a region being disjunct and sorted by the starting coordinate.\n"); + return 1; + } + strcpy(moder, "r"); + if (opts.flag & READ_CRAM) strcat(moder, "c"); + else if ((opts.flag & READ_COMPRESSED) == 0) strcat(moder, "b"); + + in = hts_open(argv[optind], moder); + if (in == NULL) { + fprintf(stderr, "Error opening \"%s\"\n", argv[optind]); + return EXIT_FAILURE; + } + + strcpy(modew, "w"); + if (opts.clevel >= 0 && opts.clevel <= 9) + snprintf(modew + 1, sizeof(modew) - 1, "%d", opts.clevel); + if (opts.flag & WRITE_CRAM) strcat(modew, "c"); + else if (opts.flag & WRITE_BINARY_COMP) strcat(modew, "b"); + else if (opts.flag & WRITE_COMPRESSED) strcat(modew, "z"); + else if (opts.flag & WRITE_UNCOMPRESSED) strcat(modew, "bu"); + if (opts.flag & WRITE_FASTQ) strcat(modew, "f"); + else if (opts.flag & WRITE_FASTA) strcat(modew, "F"); + out = hts_open(out_fn, modew); + if (out == NULL) { + fprintf(stderr, "Error opening standard output\n"); + return EXIT_FAILURE; + } + + // Process any options; currently cram only. + if (hts_opt_apply(in, in_opts)) + return EXIT_FAILURE; + hts_opt_free(in_opts); + + if (hts_opt_apply(out, out_opts)) + return EXIT_FAILURE; + hts_opt_free(out_opts); + + // Create and share the thread pool + htsThreadPool p = {NULL, 0}; + if (opts.nthreads > 0) { + p.pool = hts_tpool_init(opts.nthreads); + if (!p.pool) { + fprintf(stderr, "Error creating thread pool\n"); + exit_code = 1; + } else { + hts_set_opt(in, HTS_OPT_THREAD_POOL, &p); + hts_set_opt(out, HTS_OPT_THREAD_POOL, &p); + } + } + + int ret; + switch (hts_get_format(in)->category) { + case sequence_data: + ret = sam_loop(argc, argv, optind, &opts, in, out); + break; + + case variant_data: + ret = vcf_loop(argc, argv, optind, &opts, in, out); + break; + + default: + fprintf(stderr, "Unsupported or unknown category of data in input file\n"); + return EXIT_FAILURE; + } + + if (ret != 0) + exit_code = EXIT_FAILURE; + + ret = hts_close(out); + if (ret < 0) { + fprintf(stderr, "Error closing output.\n"); + exit_code = EXIT_FAILURE; + } + ret = hts_close(in); + if (ret < 0) { + fprintf(stderr, "Error closing input.\n"); + exit_code = EXIT_FAILURE; + } + + if (p.pool) + hts_tpool_destroy(p.pool); + + if (fclose(stdout) != 0 && errno != EBADF) { + fprintf(stderr, "Error closing standard output.\n"); + exit_code = EXIT_FAILURE; + } + + return exit_code; +} diff --git a/src/htslib-1.21/test/thrash_threads1.c b/src/htslib-1.21/test/thrash_threads1.c new file mode 100644 index 0000000..aeff903 --- /dev/null +++ b/src/htslib-1.21/test/thrash_threads1.c @@ -0,0 +1,48 @@ +/* The MIT/Expat License + +Copyright (C) 2017 Genome Research Ltd. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + */ +// Test extreme rapid turnover of readers, to check for +// race conditions between reader thread launching and file close. + +#include + +#include +#include +#include + +#include "../htslib/bgzf.h" + +int main(int argc, char *argv[]) { + if (argc <= 1) { + fprintf(stderr, "Usage: thrash_threads1 input.bam\n"); + exit(1); + } + + int i; + for (i = 0; i < 10000; i++) { + printf("i=%d\n", i); + BGZF *fpin = bgzf_open(argv[1], "r"); + bgzf_mt(fpin, 2, 256); + if (bgzf_close(fpin) < 0) abort(); + } + return 0; +} diff --git a/src/htslib-1.21/test/thrash_threads2.c b/src/htslib-1.21/test/thrash_threads2.c new file mode 100644 index 0000000..09fb436 --- /dev/null +++ b/src/htslib-1.21/test/thrash_threads2.c @@ -0,0 +1,46 @@ +/* The MIT/Expat License + +Copyright (C) 2017 Genome Research Ltd. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + */ +// Test extreme rapid turnover of writers, to check for +// race conditions between reader thread launching and file close. + +#include + +#include +#include +#include + +#include "../htslib/bgzf.h" +#include "../htslib/thread_pool.h" + +int main(int argc, char *argv[]) { + int i; + for (i = 0; i < 1000; i++) { + printf("i=%d\n", i); + BGZF *fp = bgzf_open("/dev/null", "w"); + bgzf_mt(fp, 8, 256); + if (bgzf_close(fp)) + abort(); + } + + return 0; +} diff --git a/src/htslib-1.21/test/thrash_threads3.c b/src/htslib-1.21/test/thrash_threads3.c new file mode 100644 index 0000000..446a313 --- /dev/null +++ b/src/htslib-1.21/test/thrash_threads3.c @@ -0,0 +1,51 @@ +/* The MIT/Expat License + +Copyright (C) 2017 Genome Research Ltd. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + */ +// Simple open,read,close thrash. + +#include + +#include +#include +#include + +#include "../htslib/bgzf.h" + +int main(int argc, char *argv[]) { + char buf[1000000]; + int i; + + if (argc <= 1) { + fprintf(stderr, "Usage: thrash_threads3 input.bam\n"); + exit(1); + } + + for (i = 0; i < 10000; i++) { + printf("i=%d\n", i); + BGZF *fpin = bgzf_open(argv[1], "r"); + if (bgzf_read(fpin, buf, i*10) < 0) abort(); + bgzf_mt(fpin, 8, 256); + if (bgzf_read(fpin, buf, i*10) < 0) abort(); + if (bgzf_close(fpin) < 0) abort(); + } + return 0; +} diff --git a/src/htslib-1.21/test/thrash_threads4.c b/src/htslib-1.21/test/thrash_threads4.c new file mode 100644 index 0000000..de86333 --- /dev/null +++ b/src/htslib-1.21/test/thrash_threads4.c @@ -0,0 +1,69 @@ +/* The MIT/Expat License + +Copyright (C) 2017 Genome Research Ltd. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + */ +// Spam seeks +#include + +#include +#include +#include + +#include "../htslib/bgzf.h" +#include "../htslib/thread_pool.h" + +int main(int argc, char *argv[]) { + if (argc <= 1) { + fprintf(stderr, "Usage: thrash_threads4 input.bam\n"); + exit(1); + } + + // Find a valid seek location ~64M into the file + int i; + BGZF *fpin = bgzf_open(argv[1], "r"); + char buf[65536]; + for (i = 0; i < 1000; i++) + if (bgzf_read(fpin, buf, 65536) < 0) + abort(); + int64_t pos = bgzf_tell(fpin); + bgzf_close(fpin); + +#define N 1000 + + // Spam seeks + for (i = 0; i < 1000; i++) { + printf("i=%d\n", i); + fpin = bgzf_open(argv[1], "r"); + bgzf_mt(fpin, 8, 256); + if (bgzf_seek(fpin, pos, SEEK_SET) < 0) puts("!");//abort(); + usleep(N); + //if (bgzf_read(fpin, buf, 65536) < 0) abort(); + //write(1, buf, 65536); + if (bgzf_seek(fpin, 0LL, SEEK_SET) < 0) puts("!");//abort(); + usleep(N); + //if (bgzf_read(fpin, buf, 65536) < 0) abort(); + //write(1, buf, 65536); + if (bgzf_close(fpin)) + abort(); + } + + return 0; +} diff --git a/src/htslib-1.21/test/thrash_threads5.c b/src/htslib-1.21/test/thrash_threads5.c new file mode 100644 index 0000000..ce5df6e --- /dev/null +++ b/src/htslib-1.21/test/thrash_threads5.c @@ -0,0 +1,67 @@ +/* The MIT/Expat License + +Copyright (C) 2017 Genome Research Ltd. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + */ +// A basic 'zcat filename [N-threads]' + +#include + +#include +#include +#include + +#include "../htslib/bgzf.h" +#include "../htslib/thread_pool.h" + +#define N 1000 +int main(int argc, char *argv[]) { + char buf[N]; + ssize_t l, t = 0; + + if (argc < 2 || isatty(STDOUT_FILENO)) { + fprintf(stderr, + "Usage: thrash_threads5 input.bam num_threads | md5sum\n"); + exit(1); + } + + BGZF *fpin = bgzf_open(argv[1], "r"); + hts_tpool *p = NULL; + if (argc > 2) { + p = hts_tpool_init(atoi(argv[2])); + bgzf_thread_pool(fpin, p, 0); + } + int n = rand()%(N-1)+1; + while ((l = bgzf_read(fpin, buf, n)) > 0) { + if (l != write(STDOUT_FILENO, buf, l)) abort(); + t += l; + if (l != n) { + fprintf(stderr, "expected %d bytes, got %d\n", n, (int)l); + break; + } + n = rand()%(N-1)+1; + } + fprintf(stderr, "close=%d\n", (int)bgzf_close(fpin)); + if (p) hts_tpool_destroy(p); + + fprintf(stderr, "wrote %d bytes\n", (int)t); + + return 0; +} diff --git a/src/htslib-1.21/test/thrash_threads6.c b/src/htslib-1.21/test/thrash_threads6.c new file mode 100644 index 0000000..f7b68d2 --- /dev/null +++ b/src/htslib-1.21/test/thrash_threads6.c @@ -0,0 +1,109 @@ +/* The MIT/Expat License + +Copyright (C) 2017 Genome Research Ltd. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + */ +// Spam seeks +#include + +#include +#include +#include + +#include "../htslib/bgzf.h" +#include "../htslib/thread_pool.h" + +int main(int argc, char *argv[]) { + if (argc <= 1) { + fprintf(stderr, "Usage: thrash_threads4 input.bam\n"); + exit(1); + } + + // Find a valid seek location ~64M into the file + int i; + ssize_t got; + BGZF *fpin = bgzf_open(argv[1], "r"); + uint64_t upos = 0, uend = 0; + char buf[100000]; + for (i = 0; i < 100; i++) { + if ((got = bgzf_read(fpin, buf, 65536)) < 0) + abort(); + upos += got; + } + int64_t pos = bgzf_tell(fpin); + while ((got = bgzf_read(fpin, buf, 65536)) > 0) { + uend += got; + } + if (got < 0) abort(); + int64_t end = bgzf_tell(fpin); + bgzf_close(fpin); + + // Ensure input is big enough to avoid case 3,4 below going off the end + // of the file + if (uend < upos + 10000000) { + fprintf(stderr, "Please supply a bigger input file\n"); + exit(1); + } + +#define N 1000 + + // Spam random seeks & reads + for (i = 0; i < 1000; i++) { + printf("i=%d\t", i); + fpin = bgzf_open(argv[1], "r"); + int j, eof = 0, mt = 0; + for (j = 0; j < 80; j++) { + int n = rand() % 7; + putchar('0'+n); fflush(stdout); + switch (n) { + case 0: // start + if (bgzf_seek(fpin, 0LL, SEEK_SET) < 0) puts("!");//abort(); + eof = 0; + break; + case 1: // mid + if (bgzf_seek(fpin, pos, SEEK_SET) < 0) puts("!");//abort(); + eof = 0; + break; + case 2: // eof + if (bgzf_seek(fpin, end, SEEK_SET) < 0) puts("!");//abort(); + eof = 1; + break; + case 3: case 4: { + int l = rand()%(n==3?100000:100); + if (bgzf_read(fpin, buf, l) != l*(1-eof)) abort(); + break; + } + case 5: + usleep(N); + break; + case 6: + if (!mt) + bgzf_mt(fpin, 8, 256); + mt = 1; + break; + } + } + printf("\n"); + if (bgzf_close(fpin)) + abort(); + } + + return 0; +} diff --git a/src/htslib-1.21/test/thrash_threads7.c b/src/htslib-1.21/test/thrash_threads7.c new file mode 100644 index 0000000..8536c73 --- /dev/null +++ b/src/htslib-1.21/test/thrash_threads7.c @@ -0,0 +1,120 @@ +/* The MIT/Expat License + +Copyright (C) 2017-2018 Genome Research Ltd. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + */ +/* + * Test for thread lock-ups caused by a race condition on the queue list + * where the process tpool_worker is working on could get detached just + * after it finished running a job. This would result on the pointer + * to the next process to be searched for work being set to NULL, which + * stopped all the workers from finding anything to do. + */ + + +#include + +#include +#include +#include +#include +#include + +#include "../htslib/thread_pool.h" + + +void *job(void *v) { + unsigned int *usecs = (unsigned int *) v; + usleep(*usecs); + return NULL; +} + +int main(int argc, char *argv[]) { + int run_for_secs = 120; + int num_threads = 8; + int num_jobs = 8, count = 0, n_proc = 8, i; + struct timeval end, now; + hts_tpool *p = NULL; + hts_tpool_process *q[n_proc]; + + p = hts_tpool_init(num_threads); + if (!p) { + perror("hts_tpool_init"); + exit(EXIT_FAILURE); + } + + for (i = 0; i < n_proc; i++) { + q[i] = hts_tpool_process_init(p, 10, 1); + if (!q[i]) { + perror("hts_tpool_process_init"); + exit(EXIT_FAILURE); + } + } + + if (gettimeofday(&end, NULL) != 0) { + perror("gettimeofday"); + exit(EXIT_FAILURE); + } + + end.tv_sec += run_for_secs; + + do { + unsigned int *t; + int qnum = rand() % n_proc; + t = malloc(num_jobs * sizeof(*t)); + if (!t) { + perror("malloc"); + exit(EXIT_FAILURE); + } + if ((count++ & 15) == 0) { + fprintf(stderr, "\r%d ", count); + alarm(10); + } + for (i = 0; i < num_jobs; i++) { + t[i] = 1000; + if (hts_tpool_dispatch(p, q[qnum], job, &t[i]) < 0) { + perror("hts_tpool_dispatch"); + exit(EXIT_FAILURE); + } + } + hts_tpool_process_flush(q[qnum]); + hts_tpool_process_destroy(q[qnum]); + free(t); + q[qnum] = hts_tpool_process_init(p, 10, 1); + if (!q[qnum]) { + perror("hts_tpool_process_init"); + exit(EXIT_FAILURE); + } + + if (gettimeofday(&now, NULL) != 0) { + perror("gettimeofday"); + exit(EXIT_FAILURE); + } + } while (now.tv_sec < end.tv_sec + || (now.tv_sec == end.tv_sec && now.tv_usec < end.tv_usec)); + for (i = 0; i < n_proc; i++) { + hts_tpool_process_flush(q[i]); + hts_tpool_process_destroy(q[i]); + } + hts_tpool_destroy(p); + fprintf(stderr, "\n"); + + return EXIT_SUCCESS; +} diff --git a/src/htslib-1.21/test/thread_pool.md b/src/htslib-1.21/test/thread_pool.md new file mode 100644 index 0000000..1a596c9 --- /dev/null +++ b/src/htslib-1.21/test/thread_pool.md @@ -0,0 +1,58 @@ +Thread pool tests +================= +The thread_pool.c file has a built-in test program which is enabled when compiling with TEST_MAIN defined. The test program can be run in four different modes by giving a command-line parameter: unordered, ordered1, ordered2, and pipe. The modes and their expected outputs are described below. + +unordered +--------- +Dispatches TASK_SIZE (=1000) jobs to the thread pool and waits for them to finish. The job index (0..TASK_SIZE-1) is passed as a parameter. The job function is doit_square_u, which sleeps for a while and then prints the square of its input parameter to stdout. + +Expected output when n = 1: +``` +RESULT: 0 +... +RESULT: 998001 +``` + +Expected output when n > 1: same, but in jumbled up order. + +ordered1 +-------- +Dispatches TASK_SIZE (=1000) jobs to the thread pool in non-blocking mode. Results are returned on the result queue and are pulled in order. The job index (0..TASK_SIZE-1) is passed as a parameter. The job function is doit_square, which sleeps for a while and then returns the square of its input parameter as a result. Some of the jobs take way longer than the others to finish. + +The expected output is the results printed in order, regardless of n. + +ordered2 +-------- +Starts a dispatcher thread which dispatches jobs to the thread pool. After all regular jobs have been dispatched, a sentinel job follows where the input parameter is set to -1, which receives special handling in doit_square to return the -1 as the result. + +Results are consumed on the main thread using hts_tpool_next_result_wait, until the end-of-job marker is found. + +The expected output is the results printed in order, regardless of n. + +pipe +---- +This program uses one thread pool (hts_tpool) and three queues (hts_tpool_process) shared across threads using a pipe_opt struct. There are four threads: input, stage1to2, stage2to3, and output. + +The input thread (pipe_input_thread procedure) dispatches jobs to the thread pool with the job number (1..TASK_SIZE) and an end-of-job flag as parameters. The jobs are executed by the pipe_stage1 procedure, which multiplies by 256 and sleeps for a short while. + +The stage1to2 thread (pipe_stage1to2 procedure) pulls results from the first queue (q1) and passes them to new jobs in the thread pool. These jobs are executed by the pipe_stage2 procedure, which does the same as pipe_stage1, only slower. + +The stage2to3 thread is similar to the stage1to2 thread. It pulls from the second queue and dispatches new jobs to be executed by the pipe_stage3 procedure. pipe_stage3 is similar to pipe_stage1. + +The output thread pulls from the third queue. + +Expected output: +``` +I 00000001 +1 00000100 +2 00010000 +O 01000000 +... +I 000003e8 +1 0003e800 +2 03e80000 +O e8000000 +``` +...but not in order, because the input queues might be served in any order. + +However, if only the lines from the output thread are printed, they should be in order regardless of the number of threads. diff --git a/src/htslib-1.21/test/usepublic.cpp b/src/htslib-1.21/test/usepublic.cpp new file mode 100644 index 0000000..5dcd861 --- /dev/null +++ b/src/htslib-1.21/test/usepublic.cpp @@ -0,0 +1,75 @@ +/* test/usepublic.cpp -- Test compiling public headers with a C++ compiler. + + Copyright (C) 2023 Centre for Population Genomics. + + Author: John Marshall + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include + +// Include *all* the public HTSlib headers. + +#include "../htslib/bgzf.h" +#include "../htslib/cram.h" +#include "../htslib/faidx.h" +#include "../htslib/hfile.h" +#include "../htslib/hts.h" +#include "../htslib/hts_defs.h" +#include "../htslib/hts_endian.h" +#include "../htslib/hts_expr.h" +#include "../htslib/hts_log.h" +#include "../htslib/hts_os.h" +#include "../htslib/kbitset.h" +#include "../htslib/kfunc.h" +#include "../htslib/khash.h" +#include "../htslib/khash_str2int.h" +#include "../htslib/klist.h" +#include "../htslib/knetfile.h" +#include "../htslib/kroundup.h" +#include "../htslib/kseq.h" +#include "../htslib/ksort.h" +#include "../htslib/kstring.h" +#include "../htslib/regidx.h" +#include "../htslib/sam.h" +#include "../htslib/synced_bcf_reader.h" +#include "../htslib/tbx.h" +#include "../htslib/thread_pool.h" +#include "../htslib/vcf.h" +#include "../htslib/vcf_sweep.h" +#include "../htslib/vcfutils.h" + +// Instantiate macro-based klib facilities so the resulting function +// definitions are seen by the C++ compiler. + +KHASH_SET_INIT_STR(strhash) + +#define noop_free(ptr) +KLIST_INIT(intlist, int, noop_free) + +KSORT_INIT_STR + +struct myFILE; +extern int myread(struct myFILE *, void *, int); +KSEQ_INIT2(, struct myFILE *, myread) + +int main() +{ + return 0; +} diff --git a/src/htslib-1.21/test/vcf_meta_meta.vcf b/src/htslib-1.21/test/vcf_meta_meta.vcf new file mode 100644 index 0000000..43aba73 --- /dev/null +++ b/src/htslib-1.21/test/vcf_meta_meta.vcf @@ -0,0 +1,9 @@ +##fileformat=VCFv4.3 +##FILTER= +##META= +##META= +##META= +##META= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO +1 123 . TC T . . . diff --git a/src/htslib-1.21/test/with-shlib.sh b/src/htslib-1.21/test/with-shlib.sh new file mode 100755 index 0000000..a5fd7e2 --- /dev/null +++ b/src/htslib-1.21/test/with-shlib.sh @@ -0,0 +1,65 @@ +#!/bin/sh -e +# test/with-shlib.sh -- make shared libhts available via $LD_LIBRARY_PATH etc. +# +# Copyright (C) 2020 University of Glasgow. +# +# Author: John Marshall +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +libdir=${0%/*}/libdir-$$.tmp +case $libdir in +/*) abslibdir=$libdir ;; +*) abslibdir=$PWD/$libdir ;; +esac + +# Create a directory containing *only* the shared libhts, and add it +# to the platform-appropriate $LD_LIBRARY_PATH environment variable. + +mkdir $libdir + +case `uname -s` in +Darwin) + (cd $libdir; ln -s ../../libhts.*.dylib .) + export DYLD_LIBRARY_PATH=$abslibdir${DYLD_LIBRARY_PATH:+:$DYLD_LIBRARY_PATH} + ;; + +*CYGWIN*) + (cd $libdir; ln -s ../../cyghts-*.dll .) + export PATH="$abslibdir${PATH:+;$PATH}" + ;; + +*MSYS*|*MINGW*) + (cd $libdir; cp -p ../../hts-*.dll .) + export PATH="$abslibdir${PATH:+;$PATH}" + ;; + +*) + (cd $libdir; ln -s ../../libhts.so.* .) + export LD_LIBRARY_PATH=$abslibdir${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} + ;; +esac + +status=0 +"$@" || status=$? + +rm $libdir/*hts* +rmdir $libdir + +exit $status diff --git a/src/htslib-1.21/test/xx#MD.sam b/src/htslib-1.21/test/xx#MD.sam new file mode 100644 index 0000000..7f70f37 --- /dev/null +++ b/src/htslib-1.21/test/xx#MD.sam @@ -0,0 +1,22 @@ +@SQ SN:zz LN:30 +@CO All MD and NM should match the stored values +a1 0 zz 6 1 10M * 0 0 AAAAATTTTT * co:Z:no fields +a2 0 zz 6 1 10M * 0 0 AAAAGGTTTT * +a3 0 zz 6 1 10M * 0 0 GAAAATTTTG * +i1 0 zz 6 1 5M1I5M * 0 0 AAAAAGTTTTT * +i2 0 zz 6 1 5M3I5M * 0 0 AAAAAGGGTTTTT * +i3 0 zz 6 1 10M2I * 0 0 AAAAATTTTTCC * +i4 0 zz 6 1 10M2P2I * 0 0 AAAAATTTTTCC * +d1 0 zz 6 1 5M10D5M * 0 0 AAAAACCCCC * +d2 0 zz 6 1 5M10N5M * 0 0 AAAAACCCCC * +sid 0 zz 6 1 1S4M10D5I4M1S * 0 0 AAAAAGGGGGCCCCC * +A1 0 zz 6 1 10M * 0 0 AAAAATTTTT * MD:Z:10 NM:i:0 co:Z:correct fields +A2 0 zz 6 1 10M * 0 0 AAAAGGTTTT * MD:Z:4A0T4 NM:i:2 +A3 0 zz 6 1 10M * 0 0 GAAAATTTTG * MD:Z:0A8T0 NM:i:2 +I1 0 zz 6 1 5M1I5M * 0 0 AAAAAGTTTTT * MD:Z:10 NM:i:1 +I2 0 zz 6 1 5M3I5M * 0 0 AAAAAGGGTTTTT * MD:Z:10 NM:i:3 +I3 0 zz 6 1 10M2I * 0 0 AAAAATTTTTCC * MD:Z:10 NM:i:2 +I4 0 zz 6 1 10M2P2I * 0 0 AAAAATTTTTCC * MD:Z:10 NM:i:2 +D1 0 zz 6 1 5M10D5M * 0 0 AAAAACCCCC * MD:Z:5^TTTTTTTTTT5 NM:i:10 +D2 0 zz 6 1 5M10N5M * 0 0 AAAAACCCCC * MD:Z:10 NM:i:0 +SID 0 zz 6 1 1S4M10D5I4M1S * 0 0 AAAAAGGGGGCCCCC * MD:Z:4^ATTTTTTTTT0T3 NM:i:16 diff --git a/src/htslib-1.21/test/xx#MD2.sam b/src/htslib-1.21/test/xx#MD2.sam new file mode 100644 index 0000000..b586b35 --- /dev/null +++ b/src/htslib-1.21/test/xx#MD2.sam @@ -0,0 +1,20 @@ +@SQ SN:zz LN:30 +@CO All MD and/or NM should differ to the stored values +a1 0 zz 6 1 10M * 0 0 AAAAATTTTT * MD:Z:9 NM:i:0 co:Z:MD incorrect fields +a2 0 zz 6 1 10M * 0 0 AAAAGGTTTT * MD:Z:4A0A4 NM:i:2 +a3 0 zz 6 1 10M * 0 0 GAAAATTTTG * MD:Z:0G8T0 NM:i:2 +i1 0 zz 6 1 5M1I5M * 0 0 AAAAAGTTTTT * MD:Z:11 NM:i:1 +i2 0 zz 6 1 5M3I5M * 0 0 AAAAAGGGTTTTT * MD:Z:1A1 NM:i:3 +i3 0 zz 6 1 10M2I * 0 0 AAAAATTTTTCC * MD:Z:12 NM:i:2 +d1 0 zz 6 1 5M10D5M * 0 0 AAAAACCCCC * MD:Z:5^CTTTTTTTTT5 NM:i:10 +d2 0 zz 6 1 5M10N5M * 0 0 AAAAACCCCC * MD:Z:9 NM:i:0 +sid 0 zz 6 1 1S4M10D5I4M1S * 0 0 AAAAAGGGGGCCCCC * MD:Z:4^TTTTTTTTT0T3 NM:i:16 +A1 0 zz 6 1 10M * 0 0 AAAAATTTTT * MD:Z:10 NM:i:1 co:Z:NM incorrect fields +A2 0 zz 6 1 10M * 0 0 AAAAGGTTTT * MD:Z:4A0T4 NM:i:0 +A3 0 zz 6 1 10M * 0 0 GAAAATTTTG * MD:Z:0A8T0 NM:i:0 +I1 0 zz 6 1 5M1I5M * 0 0 AAAAAGTTTTT * MD:Z:10 NM:i:0 +I2 0 zz 6 1 5M3I5M * 0 0 AAAAAGGGTTTTT * MD:Z:10 NM:i:0 +I3 0 zz 6 1 10M2I * 0 0 AAAAATTTTTCC * MD:Z:10 NM:i:0 +D1 0 zz 6 1 5M10D5M * 0 0 AAAAACCCCC * MD:Z:5^TTTTTTTTTT5 NM:i:11 +D2 0 zz 6 1 5M10N5M * 0 0 AAAAACCCCC * MD:Z:10 NM:i:1 +SID 0 zz 6 1 1S4M10D5I4M1S * 0 0 AAAAAGGGGGCCCCC * MD:Z:4^ATTTTTTTTT0T3 NM:i:1 diff --git a/src/htslib-1.21/test/xx#blank.sam b/src/htslib-1.21/test/xx#blank.sam new file mode 100644 index 0000000..df02675 --- /dev/null +++ b/src/htslib-1.21/test/xx#blank.sam @@ -0,0 +1 @@ +@CO No useful headers or records (0-length file is not considered SAM) diff --git a/src/htslib-1.21/test/xx#large_aux.sam b/src/htslib-1.21/test/xx#large_aux.sam new file mode 100644 index 0000000..93fb8cf --- /dev/null +++ b/src/htslib-1.21/test/xx#large_aux.sam @@ -0,0 +1,4 @@ +@SQ SN:xx LN:20 +a1 16 xx 1 1 10M * 0 0 AAAAAAAAAA * aa:i:1 ab:i:1 ac:i:1 ad:i:1 ae:i:1 af:i:1 ag:i:1 ah:i:1 ai:i:1 aj:i:1 ak:i:1 al:i:1 am:i:1 an:i:1 ao:i:1 ap:i:1 aq:i:1 ar:i:1 as:i:1 at:i:1 au:i:1 av:i:1 aw:i:1 ax:i:1 ay:i:1 az:i:1 ba:i:1 bb:i:1 bc:i:1 bd:i:1 be:i:1 bf:i:1 bg:i:1 bh:i:1 bi:i:1 bj:i:1 bk:i:1 bl:i:1 bm:i:1 bn:i:1 bo:i:1 bp:i:1 bq:i:1 br:i:1 bs:i:1 bt:i:1 bu:i:1 bv:i:1 bw:i:1 bx:i:1 by:i:1 bz:i:1 ca:i:1 cb:i:1 cc:i:1 cd:i:1 ce:i:1 cf:i:1 cg:i:1 ch:i:1 ci:i:1 cj:i:1 ck:i:1 cl:i:1 cm:i:1 cn:i:1 co:i:1 cp:i:1 cq:i:1 cr:i:1 cs:i:1 ct:i:1 cu:i:1 cv:i:1 cw:i:1 cx:i:1 cy:i:1 cz:i:1 da:i:1 db:i:1 dc:i:1 dd:i:1 de:i:1 df:i:1 dg:i:1 dh:i:1 di:i:1 dj:i:1 dk:i:1 dl:i:1 dm:i:1 dn:i:1 do:i:1 dp:i:1 dq:i:1 dr:i:1 ds:i:1 dt:i:1 du:i:1 dv:i:1 dw:i:1 dx:i:1 dy:i:1 dz:i:1 ea:i:1 eb:i:1 ec:i:1 ed:i:1 ee:i:1 ef:i:1 eg:i:1 eh:i:1 ei:i:1 ej:i:1 ek:i:1 el:i:1 em:i:1 en:i:1 eo:i:1 ep:i:1 eq:i:1 er:i:1 es:i:1 et:i:1 eu:i:1 ev:i:1 ew:i:1 ex:i:1 ey:i:1 ez:i:1 fa:i:1 fb:i:1 fc:i:1 fd:i:1 fe:i:1 ff:i:1 fg:i:1 fh:i:1 fi:i:1 fj:i:1 fk:i:1 fl:i:1 fm:i:1 fn:i:1 fo:i:1 fp:i:1 fq:i:1 fr:i:1 fs:i:1 ft:i:1 fu:i:1 fv:i:1 fw:i:1 fx:i:1 fy:i:1 fz:i:1 ga:i:1 gb:i:1 gc:i:1 gd:i:1 ge:i:1 gf:i:1 gg:i:1 gh:i:1 gi:i:1 gj:i:1 gk:i:1 gl:i:1 gm:i:1 gn:i:1 go:i:1 gp:i:1 gq:i:1 gr:i:1 gs:i:1 gt:i:1 gu:i:1 gv:i:1 gw:i:1 gx:i:1 gy:i:1 gz:i:1 ha:i:1 hb:i:1 hc:i:1 hd:i:1 he:i:1 hf:i:1 hg:i:1 hh:i:1 hi:i:1 hj:i:1 hk:i:1 hl:i:1 hm:i:1 hn:i:1 ho:i:1 hp:i:1 hq:i:1 hr:i:1 hs:i:1 ht:i:1 hu:i:1 hv:i:1 hw:i:1 hx:i:1 hy:i:1 hz:i:1 ia:i:1 ib:i:1 ic:i:1 id:i:1 ie:i:1 if:i:1 ig:i:1 ih:i:1 ii:i:1 ij:i:1 ik:i:1 il:i:1 im:i:1 in:i:1 io:i:1 ip:i:1 iq:i:1 ir:i:1 is:i:1 it:i:1 iu:i:1 iv:i:1 iw:i:1 ix:i:1 iy:i:1 iz:i:1 ja:i:1 jb:i:1 jc:i:1 jd:i:1 je:i:1 jf:i:1 jg:i:1 jh:i:1 ji:i:1 jj:i:1 jk:i:1 jl:i:1 jm:i:1 jn:i:1 jo:i:1 jp:i:1 jq:i:1 jr:i:1 js:i:1 jt:i:1 ju:i:1 +a2 16 xx 1 1 10M * 0 0 AAAAAAAAAA * aa:i:1 ab:i:1 ac:i:1 ad:i:1 ae:i:1 af:i:1 ag:i:1 ah:i:1 ai:i:1 aj:i:1 ak:i:1 al:i:1 am:i:1 an:i:1 ao:i:1 ap:i:1 aq:i:1 ar:i:1 as:i:1 at:i:1 au:i:1 av:i:1 aw:i:1 ax:i:1 ay:i:1 az:i:1 ba:i:1 bb:i:1 bc:i:1 bd:i:1 be:i:1 bf:i:1 bg:i:1 bh:i:1 bi:i:1 bj:i:1 bk:i:1 bl:i:1 bm:i:1 bn:i:1 bo:i:1 bp:i:1 bq:i:1 br:i:1 bs:i:1 bt:i:1 bu:i:1 bv:i:1 bw:i:1 bx:i:1 by:i:1 bz:i:1 ca:i:1 cb:i:1 cc:i:1 cd:i:1 ce:i:1 cf:i:1 cg:i:1 ch:i:1 ci:i:1 cj:i:1 ck:i:1 cl:i:1 cm:i:1 cn:i:1 co:i:1 cp:i:1 cq:i:1 cr:i:1 cs:i:1 ct:i:1 cu:i:1 cv:i:1 cw:i:1 cx:i:1 cy:i:1 cz:i:1 da:i:1 db:i:1 dc:i:1 dd:i:1 de:i:1 df:i:1 dg:i:1 dh:i:1 di:i:1 dj:i:1 dk:i:1 dl:i:1 dm:i:1 dn:i:1 do:i:1 dp:i:1 dq:i:1 dr:i:1 ds:i:1 dt:i:1 du:i:1 dv:i:1 dw:i:1 dx:i:1 dy:i:1 dz:i:1 ea:i:1 eb:i:1 ec:i:1 ed:i:1 ee:i:1 ef:i:1 eg:i:1 eh:i:1 ei:i:1 ej:i:1 ek:i:1 el:i:1 em:i:1 en:i:1 eo:i:1 ep:i:1 eq:i:1 er:i:1 es:i:1 et:i:1 eu:i:1 ev:i:1 ew:i:1 ex:i:1 ey:i:1 ez:i:1 fa:i:1 fb:i:1 fc:i:1 fd:i:1 fe:i:1 ff:i:1 fg:i:1 fh:i:1 fi:i:1 fj:i:1 fk:i:1 fl:i:1 fm:i:1 fn:i:1 fo:i:1 fp:i:1 fq:i:1 fr:i:1 fs:i:1 ft:i:1 fu:i:1 fv:i:1 fw:i:1 fx:i:1 fy:i:1 fz:i:1 ga:i:1 gb:i:1 gc:i:1 gd:i:1 ge:i:1 gf:i:1 gg:i:1 gh:i:1 gi:i:1 gj:i:1 gk:i:1 gl:i:1 gm:i:1 gn:i:1 go:i:1 gp:i:1 gq:i:1 gr:i:1 gs:i:1 gt:i:1 gu:i:1 gv:i:1 gw:i:1 gx:i:1 gy:i:1 gz:i:1 ha:i:1 hb:i:1 hc:i:1 hd:i:1 he:i:1 hf:i:1 hg:i:1 hh:i:1 hi:i:1 hj:i:1 hk:i:1 hl:i:1 hm:i:1 hn:i:1 ho:i:1 hp:i:1 hq:i:1 hr:i:1 hs:i:1 ht:i:1 hu:i:1 hv:i:1 hw:i:1 hx:i:1 hy:i:1 hz:i:1 ia:i:1 ib:i:1 ic:i:1 id:i:1 ie:i:1 if:i:1 ig:i:1 ih:i:1 ii:i:1 ij:i:1 ik:i:1 il:i:1 im:i:1 in:i:1 io:i:1 ip:i:1 iq:i:1 ir:i:1 is:i:1 it:i:1 iu:i:1 iv:i:1 iw:i:1 ix:i:1 iy:i:1 iz:i:1 ja:i:1 jb:i:1 jc:i:1 jd:i:1 je:i:1 jf:i:1 jg:i:1 jh:i:1 ji:i:1 jj:i:1 jk:i:1 jl:i:1 jm:i:1 jn:i:1 jo:i:1 jp:i:1 jq:i:1 jr:i:1 js:i:1 jt:i:1 ju:i:1 Aa:i:1 Ab:i:1 Ac:i:1 Ad:i:1 Ae:i:1 Af:i:1 Ag:i:1 Ah:i:1 Ai:i:1 Aj:i:1 Ak:i:1 Al:i:1 Am:i:1 An:i:1 Ao:i:1 Ap:i:1 Aq:i:1 Ar:i:1 As:i:1 At:i:1 Au:i:1 Av:i:1 Aw:i:1 Ax:i:1 Ay:i:1 Az:i:1 Ba:i:1 Bb:i:1 Bc:i:1 Bd:i:1 Be:i:1 Bf:i:1 Bg:i:1 Bh:i:1 Bi:i:1 Bj:i:1 Bk:i:1 Bl:i:1 Bm:i:1 Bn:i:1 Bo:i:1 Bp:i:1 Bq:i:1 Br:i:1 Bs:i:1 Bt:i:1 Bu:i:1 Bv:i:1 Bw:i:1 Bx:i:1 By:i:1 Bz:i:1 Ca:i:1 Cb:i:1 Cc:i:1 Cd:i:1 Ce:i:1 Cf:i:1 Cg:i:1 Ch:i:1 Ci:i:1 Cj:i:1 Ck:i:1 Cl:i:1 Cm:i:1 Cn:i:1 Co:i:1 Cp:i:1 Cq:i:1 Cr:i:1 Cs:i:1 Ct:i:1 Cu:i:1 Cv:i:1 Cw:i:1 Cx:i:1 Cy:i:1 Cz:i:1 Da:i:1 Db:i:1 Dc:i:1 Dd:i:1 De:i:1 Df:i:1 Dg:i:1 Dh:i:1 Di:i:1 Dj:i:1 Dk:i:1 Dl:i:1 Dm:i:1 Dn:i:1 Do:i:1 Dp:i:1 Dq:i:1 Dr:i:1 Ds:i:1 Dt:i:1 Du:i:1 Dv:i:1 Dw:i:1 Dx:i:1 Dy:i:1 Dz:i:1 Ea:i:1 Eb:i:1 Ec:i:1 Ed:i:1 Ee:i:1 Ef:i:1 Eg:i:1 Eh:i:1 Ei:i:1 Ej:i:1 Ek:i:1 El:i:1 Em:i:1 En:i:1 Eo:i:1 Ep:i:1 Eq:i:1 Er:i:1 Es:i:1 Et:i:1 Eu:i:1 Ev:i:1 Ew:i:1 Ex:i:1 Ey:i:1 Ez:i:1 Fa:i:1 Fb:i:1 Fc:i:1 Fd:i:1 Fe:i:1 Ff:i:1 Fg:i:1 Fh:i:1 Fi:i:1 Fj:i:1 Fk:i:1 Fl:i:1 Fm:i:1 Fn:i:1 Fo:i:1 Fp:i:1 Fq:i:1 Fr:i:1 Fs:i:1 Ft:i:1 Fu:i:1 Fv:i:1 Fw:i:1 Fx:i:1 Fy:i:1 Fz:i:1 Ga:i:1 Gb:i:1 Gc:i:1 Gd:i:1 Ge:i:1 Gf:i:1 Gg:i:1 Gh:i:1 Gi:i:1 Gj:i:1 Gk:i:1 Gl:i:1 Gm:i:1 Gn:i:1 Go:i:1 Gp:i:1 Gq:i:1 Gr:i:1 Gs:i:1 Gt:i:1 Gu:i:1 Gv:i:1 Gw:i:1 Gx:i:1 Gy:i:1 Gz:i:1 Ha:i:1 Hb:i:1 Hc:i:1 Hd:i:1 He:i:1 Hf:i:1 Hg:i:1 Hh:i:1 Hi:i:1 Hj:i:1 Hk:i:1 Hl:i:1 Hm:i:1 Hn:i:1 Ho:i:1 Hp:i:1 Hq:i:1 Hr:i:1 Hs:i:1 Ht:i:1 Hu:i:1 Hv:i:1 Hw:i:1 Hx:i:1 Hy:i:1 Hz:i:1 Ia:i:1 Ib:i:1 Ic:i:1 Id:i:1 Ie:i:1 If:i:1 Ig:i:1 Ih:i:1 Ii:i:1 Ij:i:1 Ik:i:1 Il:i:1 Im:i:1 In:i:1 Io:i:1 Ip:i:1 Iq:i:1 Ir:i:1 Is:i:1 It:i:1 Iu:i:1 Iv:i:1 Iw:i:1 Ix:i:1 Iy:i:1 Iz:i:1 Ja:i:1 Jb:i:1 Jc:i:1 Jd:i:1 Je:i:1 Jf:i:1 Jg:i:1 Jh:i:1 Ji:i:1 Jj:i:1 Jk:i:1 Jl:i:1 Jm:i:1 Jn:i:1 Jo:i:1 Jp:i:1 Jq:i:1 Jr:i:1 Js:i:1 Jt:i:1 Ju:i:1 +b1 16 xx````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""####################################################################################################$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((())))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))****************************************************************************************************++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,----------------------------------------------------------------------------------------------------....................................................................................................////////////////////////////////////////////////////////////////////////////////////////////////////0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111122222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222223333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444455555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555556666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777788888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888889999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<====================================================================================================>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>????????????????????????????????????????????????????????????????????????????????????????????????????@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^____________________________________________________________________________________________________````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""####################################################################################################$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((())))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))****************************************************************************************************++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,----------------------------------------------------------------------------------------------------....................................................................................................////////////////////////////////////////////////////////////////////////////////////////////////////0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111122222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222223333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444455555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555556666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777788888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888889999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<====================================================================================================>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>????????????????????????????????????????????????????????????????????????????????????????????????????@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^____________________________________________________________________________________________________````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""####################################################################################################$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((())))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))****************************************************************************************************++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,----------------------------------------------------------------------------------------------------....................................................................................................////////////////////////////////////////////////////////////////////////////////////////////////////0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111122222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222223333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444455555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555556666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777788888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888889999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<====================================================================================================>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>????????????????????????????????????????????????????????????????????????????????????????????????????@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^____________________________________________________________________________________________________````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""####################################################################################################$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((())))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))****************************************************************************************************++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,----------------------------------------------------------------------------------------------------....................................................................................................////////////////////////////////////////////////////////////////////////////////////////////////////0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111122222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222223333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444455555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555556666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777788888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888889999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<====================================================================================================>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>????????????????????????????????????????????????????????????????????????????????????????????????????@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^____________________________________________________________________________________________________````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""####################################################################################################$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((())))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))****************************************************************************************************++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,----------------------------------------------------------------------------------------------------....................................................................................................////////////////////////////////////////////////////////////////////////////////////////////////////0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111122222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222223333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444455555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555556666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777788888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888889999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<====================================================================================================>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>????????????????????????????????????????????????????????????????????????????????????????????????????@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^____________________________________________________________________________________________________````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""####################################################################################################$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((())))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))****************************************************************************************************++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,----------------------------------------------------------------------------------------------------....................................................................................................////////////////////////////////////////////////////////////////////////////////////////////////////0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111122222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222223333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444455555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555556666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777788888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888889999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<====================================================================================================>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>????????????????????????????????????????????????????????????????????????????????????????????????????@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^____________________________________________________________________________________________________````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""####################################################################################################$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((())))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))****************************************************************************************************++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,----------------------------------------------------------------------------------------------------....................................................................................................////////////////////////////////////////////////////////////////////////////////////////////////////0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111122222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222223333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444455555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555556666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777788888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888889999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<====================================================================================================>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>????????????????????????????????????????????????????????????????????????????????????????????????????@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^____________________________________________________________________________________________________````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""####################################################################################################$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((())))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))****************************************************************************************************++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,----------------------------------------------------------------------------------------------------....................................................................................................////////////////////////////////////////////////////////////////////////////////////////////////////0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111122222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222223333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444455555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555556666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777788888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888889999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<====================================================================================================>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>????????????????????????????????????????????????????????????????????????????????????????????????????@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^____________________________________________________________________________________________________````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""####################################################################################################$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((())))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))****************************************************************************************************++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,----------------------------------------------------------------------------------------------------....................................................................................................////////////////////////////////////////////////////////////////////////////////////////////////////0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111122222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222223333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444455555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555556666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777788888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888889999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<====================================================================================================>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>????????????????????????????????????????????????????????????????????????????????????????????????????@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^____________________________________________________________________________________________________````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""####################################################################################################$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((())))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))****************************************************************************************************++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,----------------------------------------------------------------------------------------------------....................................................................................................////////////////////////////////////////////////////////////////////////////////////////////////////0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111122222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222223333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444455555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555556666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777788888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888889999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<====================================================================================================>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>????????????????????????????????????????????????????????????????????????????????????????????????????@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^____________________________________________________________________________________________________````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz diff --git a/src/htslib-1.21/test/xx#large_aux2.sam b/src/htslib-1.21/test/xx#large_aux2.sam new file mode 100644 index 0000000..9e338ed --- /dev/null +++ b/src/htslib-1.21/test/xx#large_aux2.sam @@ -0,0 +1,11 @@ +@SQ SN:xx LN:20 +a1 0 xx 1 1 1M * 0 0 A # aa:i:1 +a2 0 xx 1 1 1M * 0 0 A # aa:i:1 ab:i:1 +a3 0 xx 1 1 1M * 0 0 A # aa:i:1 ab:i:1 ac:i:1 +a4 0 xx 1 1 1M * 0 0 A # aa:i:1 ab:i:1 ac:i:1 ad:i:1 +a5 0 xx 1 1 1M * 0 0 A # aa:i:1 ab:i:1 ac:i:1 ad:i:1 ae:i:1 +a6 0 xx 1 1 1M * 0 0 A # aa:i:1 ab:i:1 ac:i:1 ad:i:1 ae:i:1 af:i:1 +a7 0 xx 1 1 1M * 0 0 A # aa:i:1 ab:i:1 ac:i:1 ad:i:1 ae:i:1 af:i:1 ag:i:1 +a8 0 xx 1 1 1M * 0 0 A # aa:i:1 ab:i:1 ac:i:1 ad:i:1 ae:i:1 af:i:1 ag:i:1 ah:i:1 +a9 0 xx 1 1 1M * 0 0 A # aa:i:1 ab:i:1 ac:i:1 ad:i:1 ae:i:1 af:i:1 ag:i:1 ah:i:1 ai:i:1 +aA 0 xx 1 1 1M * 0 0 A # aa:i:1 ab:i:1 ac:i:1 ad:i:1 ae:i:1 af:i:1 ag:i:1 ah:i:1 ai:i:1 aj:i:1 diff --git a/src/htslib-1.21/test/xx#large_aux_java.cram b/src/htslib-1.21/test/xx#large_aux_java.cram new file mode 100644 index 0000000..ceafd2f Binary files /dev/null and b/src/htslib-1.21/test/xx#large_aux_java.cram differ diff --git a/src/htslib-1.21/test/xx#minimal.sam b/src/htslib-1.21/test/xx#minimal.sam new file mode 100644 index 0000000..eb72140 --- /dev/null +++ b/src/htslib-1.21/test/xx#minimal.sam @@ -0,0 +1,10 @@ +@SQ SN:xx LN:20 +@SQ SN:yy LN:20 +a0 16 xx 4 1 10H * 0 0 * * +a1 16 xx 4 1 5H0M5H * 0 0 * * +a2 16 xx 4 1 5H0I10M0D5H * 0 0 * * +A0 16 yy 4 1 0H * 0 0 * * +A1 16 yy 4 1 0I * 0 0 * * +A2 16 yy 4 1 0D * 0 0 * * +A3 16 yy 4 1 0M * 0 0 * * +A4 16 yy 4 1 0P * 0 0 * * diff --git a/src/htslib-1.21/test/xx#pair.sam b/src/htslib-1.21/test/xx#pair.sam new file mode 100644 index 0000000..aa8c77b --- /dev/null +++ b/src/htslib-1.21/test/xx#pair.sam @@ -0,0 +1,7 @@ +@SQ SN:xx LN:20 +a1 99 xx 1 1 10M = 11 20 AAAAAAAAAA ********** +b1 99 xx 1 1 10M = 11 20 AAAAAAAAAA ********** +c1 99 xx 1 1 10M = 11 20 AAAAAAAAAA ********** +a1 147 xx 11 1 10M = 1 -20 TTTTTTTTTT ********** +b1 147 xx 11 1 10M = 1 -20 TTTTTTTTTT ********** +c1 147 xx 11 1 10M = 1 -20 TTTTTTTTTT ********** diff --git a/src/htslib-1.21/test/xx#repeated.sam b/src/htslib-1.21/test/xx#repeated.sam new file mode 100644 index 0000000..88fc3d6 --- /dev/null +++ b/src/htslib-1.21/test/xx#repeated.sam @@ -0,0 +1,7 @@ +@SQ SN:xx LN:20 +S 67 xx 1 1 10M = 11 20 AAAAAAAAAA ********** +S 131 xx 11 1 10M = 1 -20 TTTTTTTTTT ********** +S 67 xx 1 1 10M = 11 20 AAAAAAAAAA ********** +S 131 xx 11 1 10M = 1 -20 TTTTTTTTTT ********** +S 67 xx 1 1 10M = 11 20 AAAAAAAAAA ********** +S 131 xx 11 1 10M = 1 -20 TTTTTTTTTT ********** diff --git a/src/htslib-1.21/test/xx#rg.sam b/src/htslib-1.21/test/xx#rg.sam new file mode 100644 index 0000000..2d7efbc --- /dev/null +++ b/src/htslib-1.21/test/xx#rg.sam @@ -0,0 +1,13 @@ +@HD VN:1.4 SO:coordinate +@SQ SN:xx LN:20 AS:? SP:? UR:? M5:bbf4de6d8497a119dda6e074521643dc +@RG ID:x1 SM:x1 +@RG ID:x2 SM:x2 LB:x PG:foo:bar PI:1111 +@PG ID:emacs PN:emacs VN:23.1.1 +@CO also test +@CO other headers +a1 16 xx 1 1 10M * 0 0 AAAAAAAAAA ********** RG:Z:x1 +b1 16 xx 1 1 10M * 0 0 AAAAAAAAAA ********** RG:Z:x2 +c1 16 xx 1 1 10M * 0 0 AAAAAAAAAA ********** +a2 16 xx 11 1 10M * 0 0 TTTTTTTTTT ********** RG:Z:x1 +b2 16 xx 11 1 10M * 0 0 TTTTTTTTTT ********** RG:Z:x2 +c2 16 xx 11 1 10M * 0 0 TTTTTTTTTT ********** diff --git a/src/htslib-1.21/test/xx#tlen.sam b/src/htslib-1.21/test/xx#tlen.sam new file mode 100644 index 0000000..4b2f70e --- /dev/null +++ b/src/htslib-1.21/test/xx#tlen.sam @@ -0,0 +1,34 @@ +@CO xx has been encoded using the SAM spec; leftmost to rightmost +@CO yy has been encoded using bwa/picard methods; 5' to 3' +@CO +@CO 00000000011111111112 +@CO 12345678901234567890 +@CO AAAAAAAAAATTTTTTTTTT +@CO 1>>>> <<<<2 x1 +@CO +@CO 1>>>> x2 +@CO <<<<2 +@CO +@CO 1>>>> x3 (7..15 vs 10..11) +@CO <<<<2 +@CO +@CO <<<<2 1>>>> x4 (1..20 vs 16..5) +@CO +@SQ SN:xx LN:20 +@SQ SN:yy LN:20 +x1 99 xx 1 1 5M = 16 20 AAAAA ***** +x1 147 xx 16 1 5M = 1 -20 TTTTT ***** +x2 99 xx 7 1 5M = 10 8 AAAAT ***** +x2 147 xx 10 1 5M = 7 -8 ATTTT ***** +x3 147 xx 7 1 5M = 10 8 AAAAT ***** +x3 99 xx 10 1 5M = 7 -8 ATTTT ***** +x4 147 xx 1 1 5M = 16 20 AAAAA ***** +x4 99 xx 16 1 5M = 1 -20 TTTTT ***** +y1 99 yy 1 1 5M = 16 20 AAAAA ***** +y1 147 yy 16 1 5M = 1 -20 TTTTT ***** +y2 99 yy 7 1 5M = 10 8 AAAAT ***** +y2 147 yy 10 1 5M = 7 -8 ATTTT ***** +y3 147 yy 7 1 5M = 10 -2 AAAAT ***** +y3 99 yy 10 1 5M = 7 2 ATTTT ***** +y4 147 yy 1 1 5M = 16 10 AAAAA ***** +y4 99 yy 16 1 5M = 1 -10 TTTTT ***** diff --git a/src/htslib-1.21/test/xx#tlen2.sam b/src/htslib-1.21/test/xx#tlen2.sam new file mode 100644 index 0000000..b02989e --- /dev/null +++ b/src/htslib-1.21/test/xx#tlen2.sam @@ -0,0 +1,36 @@ +@CO As per xx#tlen.sam but every read is unsorted with partner +@CO +@CO xx has been encoded using the SAM spec; leftmost to rightmost +@CO yy has been encoded using bwa/picard methods; 5' to 3' +@CO +@CO 00000000011111111112 +@CO 12345678901234567890 +@CO AAAAAAAAAATTTTTTTTTT +@CO 1>>>> <<<<2 x1 +@CO +@CO 1>>>> x2 +@CO <<<<2 +@CO +@CO 1>>>> x3 (7..15 vs 10..11) +@CO <<<<2 +@CO +@CO <<<<2 1>>>> x4 (1..20 vs 16..5) +@CO +@SQ SN:xx LN:20 +@SQ SN:yy LN:20 +x1 147 xx 16 1 5M = 1 -20 TTTTT ***** +x1 99 xx 1 1 5M = 16 20 AAAAA ***** +x2 147 xx 10 1 5M = 7 -8 ATTTT ***** +x2 99 xx 7 1 5M = 10 8 AAAAT ***** +x3 99 xx 10 1 5M = 7 -8 ATTTT ***** +x3 147 xx 7 1 5M = 10 8 AAAAT ***** +x4 99 xx 16 1 5M = 1 -20 TTTTT ***** +x4 147 xx 1 1 5M = 16 20 AAAAA ***** +y1 147 yy 16 1 5M = 1 -20 TTTTT ***** +y1 99 yy 1 1 5M = 16 20 AAAAA ***** +y2 147 yy 10 1 5M = 7 -8 ATTTT ***** +y2 99 yy 7 1 5M = 10 8 AAAAT ***** +y3 99 yy 10 1 5M = 7 2 ATTTT ***** +y3 147 yy 7 1 5M = 10 -2 AAAAT ***** +y4 99 yy 16 1 5M = 1 -10 TTTTT ***** +y4 147 yy 1 1 5M = 16 10 AAAAA ***** diff --git a/src/htslib-1.21/test/xx#triplet.sam b/src/htslib-1.21/test/xx#triplet.sam new file mode 100644 index 0000000..1255725 --- /dev/null +++ b/src/htslib-1.21/test/xx#triplet.sam @@ -0,0 +1,7 @@ +@SQ SN:xx LN:20 +@SQ SN:yy LN:20 +a1 67 xx 1 1 10M = 6 20 AAAAAAAAAA ********** +a1 35 xx 6 1 10M = 11 -20 AAAAATTTTT ********** +a1 147 xx 11 1 10M = 1 -20 TTTTTTTTTT ********** +a1 67 yy 1 1 10M = 6 15 AAAAAAAAAA ********** +a1 3 yy 6 1 10M = 1 -15 AAAAATTTTT ********** diff --git a/src/htslib-1.21/test/xx#unsorted.sam b/src/htslib-1.21/test/xx#unsorted.sam new file mode 100644 index 0000000..05887a3 --- /dev/null +++ b/src/htslib-1.21/test/xx#unsorted.sam @@ -0,0 +1,8 @@ +@SQ SN:xx LN:20 +@SQ SN:yy LN:20 +b1 147 yy 11 1 10M = 1 -20 TTTTTTTTTT ********** +a1 147 xx 11 1 10M = 1 -20 TTTTTTTTTT ********** +a1 99 xx 1 1 10M = 11 20 AAAAAAAAAA ********** +b1 99 yy 1 1 10M = 11 20 AAAAAAAAAA ********** +c1 99 xx 1 1 10M = 11 20 AAAAAAAAAA ********** +c1 147 xx 11 1 10M = 1 -20 TTTTTTTTTT ********** diff --git a/src/htslib-1.21/test/xx.fa b/src/htslib-1.21/test/xx.fa new file mode 100644 index 0000000..faa1fb0 --- /dev/null +++ b/src/htslib-1.21/test/xx.fa @@ -0,0 +1,7 @@ +>xx +AAAAAAAAAATTTTTTTTTT +>yy +AAAAAAAAAATTTTTTTTTT +>zz +AAAAAAAAAATTTTTTTTTTCCCCCCCCCC + diff --git a/src/htslib-1.21/test/xx.fa.fai b/src/htslib-1.21/test/xx.fa.fai new file mode 100644 index 0000000..279eeec --- /dev/null +++ b/src/htslib-1.21/test/xx.fa.fai @@ -0,0 +1,3 @@ +xx 20 4 20 21 +yy 20 29 20 21 +zz 30 54 30 31 diff --git a/src/htslib-1.18/textutils.c b/src/htslib-1.21/textutils.c similarity index 100% rename from src/htslib-1.18/textutils.c rename to src/htslib-1.21/textutils.c diff --git a/src/htslib-1.21/textutils_internal.h b/src/htslib-1.21/textutils_internal.h new file mode 100644 index 0000000..faa1d4d --- /dev/null +++ b/src/htslib-1.21/textutils_internal.h @@ -0,0 +1,426 @@ +/* textutils_internal.h -- non-bioinformatics utility routines for text etc. + + Copyright (C) 2016,2018-2020, 2024 Genome Research Ltd. + + Author: John Marshall + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#ifndef HTSLIB_TEXTUTILS_INTERNAL_H +#define HTSLIB_TEXTUTILS_INTERNAL_H + +/* N.B. These interfaces may be used by plug-ins */ + +#include +#include +#include "htslib/kstring.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/// Decode percent-encoded (URL-encoded) text +/** On input, _dest_ should be a buffer at least the same size as _s_, + and may be equal to _s_ to decode in place. On output, _dest_ will be + NUL-terminated and the number of characters written (not including the + NUL) is stored in _destlen_. +*/ +int hts_decode_percent(char *dest, size_t *destlen, const char *s); + +/// Return decoded data length given length of base64-encoded text +/** This gives an upper bound, as it overestimates by a byte or two when + the encoded text ends with (possibly omitted) `=` padding characters. +*/ +size_t hts_base64_decoded_length(size_t len); + +/// Decode base64-encoded data +/** On input, _dest_ should be a sufficient buffer (see `hts_base64_length()`), + and may be equal to _s_ to decode in place. On output, the number of + bytes written is stored in _destlen_. +*/ +int hts_decode_base64(char *dest, size_t *destlen, const char *s); + +/// Token structure returned by JSON lexing functions +/** Structure is defined in hts_internal.h + */ + +typedef struct hts_json_token hts_json_token; + +/// Allocate an empty JSON token structure, for use with hts_json_* functions +/** @return An empty token on success; NULL on failure + */ +HTSLIB_EXPORT +hts_json_token *hts_json_alloc_token(void); + +/// Free a JSON token +HTSLIB_EXPORT +void hts_json_free_token(hts_json_token *token); + +/// Accessor function to get JSON token type +/** @param token Pointer to JSON token + @return Character indicating the token type + +Token types correspond to scalar JSON values and selected punctuation +as follows: + - `s` string + - `n` number + - `b` boolean literal + - `.` null literal + - `{`, `}`, `[`, `]` object and array delimiters + - `?` lexing error + - `!` other errors (e.g. out of memory) + - `\0` terminator at end of input +*/ +HTSLIB_EXPORT +char hts_json_token_type(hts_json_token *token); + +/// Accessor function to get JSON token in string form +/** @param token Pointer to JSON token + @return String representation of the JSON token; NULL if unset + +If the token was parsed from a string using hts_json_snext(), the return value +will point into the string passed as the first parameter to hts_json_snext(). +If the token was parsed from a file using hts_json_fnext(), the return value +will point at the kstring_t buffer passed as the third parameter to +hts_json_fnext(). In that case, the value will only be valid until the +next call to hts_json_fnext(). + */ +HTSLIB_EXPORT +char *hts_json_token_str(hts_json_token *token); + +/// Read one JSON token from a string +/** @param str The input C string + @param state The input string state + @param token On return, filled in with the token read + @return The type of the token read + +On return, `token->str` points into the supplied input string, which +is modified by having token-terminating characters overwritten as NULs. +The `state` argument records the current position within `str` after each +`hts_json_snext()` call, and should be set to 0 before the first call. +*/ +HTSLIB_EXPORT +char hts_json_snext(char *str, size_t *state, hts_json_token *token); + +/// Read and discard a complete JSON value from a string +/** @param str The input C string + @param state The input string state, as per `hts_json_snext()` + @param type If the first token of the value to be discarded has already + been read, provide its type; otherwise `'\0'` + @return One of `v` (success), `\0` (end of string), and `?` (lexing error) + +Skips a complete JSON value, which may be a single token or an entire object +or array. +*/ +HTSLIB_EXPORT +char hts_json_sskip_value(char *str, size_t *state, char type); + +struct hFILE; + +/// Read one JSON token from a file +/** @param fp The file stream + @param token On return, filled in with the token read + @param kstr Buffer used to store the token string returned + @return The type of the token read + +The `kstr` buffer is used to store the string value of the token read, +so `token->str` is only valid until the next time `hts_json_fnext()` is +called with the same `kstr` argument. +*/ +HTSLIB_EXPORT +char hts_json_fnext(struct hFILE *fp, hts_json_token *token, kstring_t *kstr); + +/// Read and discard a complete JSON value from a file +/** @param fp The file stream + @param type If the first token of the value to be discarded has already + been read, provide its type; otherwise `'\0'` + @return One of `v` (success), `\0` (EOF), and `?` (lexing error) + +Skips a complete JSON value, which may be a single token or an entire object +or array. +*/ +HTSLIB_EXPORT +char hts_json_fskip_value(struct hFILE *fp, char type); + +// The functions operate on ints such as are returned by fgetc(), +// i.e., characters represented as unsigned-char-valued ints, or EOF. +// To operate on plain chars (and to avoid warnings on some platforms), +// technically one must cast to unsigned char everywhere (see CERT STR37-C) +// or less painfully use these *_c() functions that operate on plain chars +// (but not EOF, which must be considered separately where it is applicable). +// TODO We may eventually wish to implement these functions directly without +// using their equivalents, and thus make them immune to locales. +static inline int isalnum_c(char c) { return isalnum((unsigned char) c); } +static inline int isalpha_c(char c) { return isalpha((unsigned char) c); } +static inline int isdigit_c(char c) { return isdigit((unsigned char) c); } +static inline int isgraph_c(char c) { return isgraph((unsigned char) c); } +static inline int islower_c(char c) { return islower((unsigned char) c); } +static inline int isprint_c(char c) { return isprint((unsigned char) c); } +static inline int ispunct_c(char c) { return ispunct((unsigned char) c); } +static inline int isspace_c(char c) { return isspace((unsigned char) c); } +static inline int isupper_c(char c) { return isupper((unsigned char) c); } +static inline int isxdigit_c(char c) { return isxdigit((unsigned char) c); } +static inline char tolower_c(char c) { return tolower((unsigned char) c); } +static inline char toupper_c(char c) { return toupper((unsigned char) c); } + +/// Copy possibly malicious text data to a buffer +/** @param buf Destination buffer + @param buflen Size of the destination buffer (>= 4; >= 6 when quotes used) + @param quote Quote character (or '\0' for no quoting of the output) + @param s String to be copied + @param len Length of the input string, or SIZE_MAX to copy until '\0' + @return The destination buffer, @a buf. + +Copies the source text string (escaping any unprintable characters) to the +destination buffer. The destination buffer will always be NUL-terminated; +the text will be truncated (and "..." appended) if necessary to make it fit. + */ +const char *hts_strprint(char *buf, size_t buflen, char quote, + const char *s, size_t len); + +// Faster replacements for strtol, for use when parsing lots of numbers. +// Note that these only handle base 10 and do not skip leading whitespace + +/// Convert a string to a signed integer, with overflow detection +/** @param[in] in Input string + @param[out] end Returned end pointer + @param[in] bits Bits available for the converted value + @param[out] failed Location of overflow flag + @return String value converted to an int64_t + +Converts a signed decimal string to an int64_t. The string should +consist of an optional '+' or '-' sign followed by one or more of +the digits 0 to 9. The output value will be limited to fit in the +given number of bits (including the sign bit). If the value is too big, +the largest possible value will be returned and *failed will be set to 1. + +The address of the first character following the converted number will +be stored in *end. + +Both end and failed must be non-NULL. + */ +static inline int64_t hts_str2int(const char *in, char **end, int bits, + int *failed) { + uint64_t n = 0, limit = (1ULL << (bits - 1)) - 1; + uint32_t fast = (bits - 1) * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322 + const unsigned char *v = (const unsigned char *) in; + const unsigned int ascii_zero = '0'; // Prevents conversion to signed + unsigned int d; + + int neg; + switch(*v) { + case '-': + limit++; + neg=1; + v++; + // See "dup" comment below + while (--fast && *v>='0' && *v<='9') + n = n*10 + *v++ - ascii_zero; + break; + + case '+': + v++; + // fall through + + default: + neg = 0; + // dup of above. This is somewhat unstable and mainly for code + // size cheats to prevent instruction cache lines spanning 32-byte + // blocks in the sam_parse_B_vals calling code. It's been tested + // on gcc7, gcc13, clang10 and clang16 with -O2 and -O3. While + // not exhaustive, this code duplication gives stable fast results + // while a single copy does not. + // (NB: system was "seq4d", so quite old) + while (--fast && *v>='0' && *v<='9') + n = n*10 + *v++ - ascii_zero; + break; + } + + // NB gcc7 is slow with (unsigned)(*v - ascii_zero) < 10, + // while gcc13 prefers it. + if (*v>='0' && !fast) { // rejects ',' and tab + uint64_t limit_d_10 = limit / 10; + uint64_t limit_m_10 = limit - 10 * limit_d_10; + while ((d = *v - ascii_zero) < 10) { + if (n < limit_d_10 || (n == limit_d_10 && d <= limit_m_10)) { + n = n*10 + d; + v++; + } else { + do { v++; } while (*v - ascii_zero < 10); + n = limit; + *failed = 1; + break; + } + } + } + + *end = (char *)v; + + return neg ? (int64_t)-n : (int64_t)n; +} + +/// Convert a string to an unsigned integer, with overflow detection +/** @param[in] in Input string + @param[out] end Returned end pointer + @param[in] bits Bits available for the converted value + @param[out] failed Location of overflow flag + @return String value converted to a uint64_t + +Converts an unsigned decimal string to a uint64_t. The string should +consist of an optional '+' sign followed by one or more of the digits 0 +to 9. The output value will be limited to fit in the given number of bits. +If the value is too big, the largest possible value will be returned +and *failed will be set to 1. + +The address of the first character following the converted number will +be stored in *end. + +Both end and failed must be non-NULL. + */ + +static inline uint64_t hts_str2uint(const char *in, char **end, int bits, + int *failed) { + uint64_t n = 0, limit = (bits < 64 ? (1ULL << bits) : 0) - 1; + const unsigned char *v = (const unsigned char *) in; + const unsigned int ascii_zero = '0'; // Prevents conversion to signed + uint32_t fast = bits * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322 + unsigned int d; + + if (*v == '+') + v++; + + while (--fast && *v>='0' && *v<='9') + n = n*10 + *v++ - ascii_zero; + + if ((unsigned)(*v - ascii_zero) < 10 && !fast) { + uint64_t limit_d_10 = limit / 10; + uint64_t limit_m_10 = limit - 10 * limit_d_10; + while ((d = *v - ascii_zero) < 10) { + if (n < limit_d_10 || (n == limit_d_10 && d <= limit_m_10)) { + n = n*10 + d; + v++; + } else { + do { v++; } while (*v - ascii_zero < 10); + n = limit; + *failed = 1; + break; + } + } + } + + *end = (char *)v; + return n; +} + +/// Convert a string to a double, with overflow detection +/** @param[in] in Input string + @param[out] end Returned end pointer + @param[out] failed Location of overflow flag + @return String value converted to a double + +Converts a floating point value string to a double. The string should +have the format [+-]?[0-9]*[.]?[0-9]* with at least one and no more than 15 +digits. Strings that do not match (inf, nan, values with exponents) will +be passed on to strtod() for processing. + +If the value is too big, the largest possible value will be returned; +if it is too small to be represented in a double zero will be returned. +In both cases errno will be set to ERANGE. + +If no characters could be converted, *failed will be set to 1. + +The address of the first character following the converted number will +be stored in *end. + +Both end and failed must be non-NULL. + */ + +static inline double hts_str2dbl(const char *in, char **end, int *failed) { + uint64_t n = 0; + int max_len = 15; + const unsigned char *v = (const unsigned char *) in; + const unsigned int ascii_zero = '0'; // Prevents conversion to signed + int neg = 0, point = -1; + double d; + static double D[] = {1,1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, + 1e8, 1e9, 1e10,1e11,1e12,1e13,1e14,1e15, + 1e16,1e17,1e18,1e19,1e20}; + + while (isspace(*v)) + v++; + + if (*v == '-') { + neg = 1; + v++; + } else if (*v == '+') { + v++; + } + + switch(*v) { + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + break; + + case '0': + if (v[1] != 'x' && v[1] != 'X') break; + // else fall through - hex number + + default: + // Non numbers, like NaN, Inf + d = strtod(in, end); + if (*end == in) + *failed = 1; + return d; + } + + while (*v == '0') ++v; + + const unsigned char *start = v; + + while (--max_len && *v>='0' && *v<='9') + n = n*10 + *v++ - ascii_zero; + if (max_len && *v == '.') { + point = v - start; + v++; + while (--max_len && *v>='0' && *v<='9') + n = n*10 + *v++ - ascii_zero; + } + if (point < 0) + point = v - start; + + // Outside the scope of this quick and dirty parser. + if (!max_len || *v == 'e' || *v == 'E') { + d = strtod(in, end); + if (*end == in) + *failed = 1; + return d; + } + + *end = (char *)v; + d = n / D[v - start - point]; + + return neg ? -d : d; +} + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/htslib-1.18/thread_pool.c b/src/htslib-1.21/thread_pool.c similarity index 100% rename from src/htslib-1.18/thread_pool.c rename to src/htslib-1.21/thread_pool.c diff --git a/src/htslib-1.18/thread_pool_internal.h b/src/htslib-1.21/thread_pool_internal.h similarity index 100% rename from src/htslib-1.18/thread_pool_internal.h rename to src/htslib-1.21/thread_pool_internal.h diff --git a/src/htslib-1.18/vcf.5 b/src/htslib-1.21/vcf.5 similarity index 100% rename from src/htslib-1.18/vcf.5 rename to src/htslib-1.21/vcf.5 diff --git a/src/htslib-1.21/vcf.c b/src/htslib-1.21/vcf.c new file mode 100644 index 0000000..105c753 --- /dev/null +++ b/src/htslib-1.21/vcf.c @@ -0,0 +1,5952 @@ +/* vcf.c -- VCF/BCF API functions. + + Copyright (C) 2012, 2013 Broad Institute. + Copyright (C) 2012-2024 Genome Research Ltd. + Portions copyright (C) 2014 Intel Corporation. + + Author: Heng Li + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +#include "fuzz_settings.h" +#endif + +#include "htslib/vcf.h" +#include "htslib/bgzf.h" +#include "htslib/tbx.h" +#include "htslib/hfile.h" +#include "hts_internal.h" +#include "htslib/hts_endian.h" +#include "htslib/khash_str2int.h" +#include "htslib/kstring.h" +#include "htslib/sam.h" +#include "htslib/khash.h" + +#if 0 +// This helps on Intel a bit, often 6-7% faster VCF parsing. +// Conversely sometimes harms AMD Zen4 as ~9% slower. +// Possibly related to IPC differences. However for now it's just a +// curiousity we ignore and stick with the simpler code. +// +// Left here as a hint for future explorers. +static inline int xstreq(const char *a, const char *b) { + while (*a && *a == *b) + a++, b++; + return *a == *b; +} + +#define KHASH_MAP_INIT_XSTR(name, khval_t) \ + KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, xstreq) + +KHASH_MAP_INIT_XSTR(vdict, bcf_idinfo_t) +#else +KHASH_MAP_INIT_STR(vdict, bcf_idinfo_t) +#endif + +typedef khash_t(vdict) vdict_t; + +KHASH_MAP_INIT_STR(hdict, bcf_hrec_t*) +typedef khash_t(hdict) hdict_t; + + +#include "htslib/kseq.h" +HTSLIB_EXPORT +uint32_t bcf_float_missing = 0x7F800001; + +HTSLIB_EXPORT +uint32_t bcf_float_vector_end = 0x7F800002; + +HTSLIB_EXPORT +uint8_t bcf_type_shift[] = { 0, 0, 1, 2, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + +static bcf_idinfo_t bcf_idinfo_def = { .info = { 15, 15, 15 }, .hrec = { NULL, NULL, NULL}, .id = -1 }; + +/* + Partial support for 64-bit POS and Number=1 INFO tags. + Notes: + - the support for 64-bit values is motivated by POS and INFO/END for large genomes + - the use of 64-bit values does not conform to the specification + - cannot output 64-bit BCF and if it does, it is not compatible with anything + - experimental, use at your risk +*/ +#ifdef VCF_ALLOW_INT64 + #define BCF_MAX_BT_INT64 (0x7fffffffffffffff) /* INT64_MAX, for internal use only */ + #define BCF_MIN_BT_INT64 -9223372036854775800LL /* INT64_MIN + 8, for internal use only */ +#endif + +#define BCF_IS_64BIT (1<<30) + + +// Opaque structure with auxilary data which allows to extend bcf_hdr_t without breaking ABI. +// Note that this preserving API and ABI requires that the first element is vdict_t struct +// rather than a pointer, as user programs may (and in some cases do) access the dictionary +// directly as (vdict_t*)hdr->dict. +typedef struct +{ + vdict_t dict; // bcf_hdr_t.dict[0] vdict_t dictionary which keeps bcf_idinfo_t for BCF_HL_FLT,BCF_HL_INFO,BCF_HL_FMT + hdict_t *gen; // hdict_t dictionary which keeps bcf_hrec_t* pointers for generic and structured fields + size_t *key_len;// length of h->id[BCF_DT_ID] strings +} +bcf_hdr_aux_t; + +static inline bcf_hdr_aux_t *get_hdr_aux(const bcf_hdr_t *hdr) +{ + return (bcf_hdr_aux_t *)hdr->dict[0]; +} + +static char *find_chrom_header_line(char *s) +{ + char *nl; + if (strncmp(s, "#CHROM\t", 7) == 0) return s; + else if ((nl = strstr(s, "\n#CHROM\t")) != NULL) return nl+1; + else return NULL; +} + +/************************* + *** VCF header parser *** + *************************/ + +static int bcf_hdr_add_sample_len(bcf_hdr_t *h, const char *s, size_t len) +{ + const char *ss = s; + while ( *ss && isspace_c(*ss) && ss - s < len) ss++; + if ( !*ss || ss - s == len) + { + hts_log_error("Empty sample name: trailing spaces/tabs in the header line?"); + return -1; + } + + vdict_t *d = (vdict_t*)h->dict[BCF_DT_SAMPLE]; + int ret; + char *sdup = malloc(len + 1); + if (!sdup) return -1; + memcpy(sdup, s, len); + sdup[len] = 0; + + // Ensure space is available in h->samples + size_t n = kh_size(d); + char **new_samples = realloc(h->samples, sizeof(char*) * (n + 1)); + if (!new_samples) { + free(sdup); + return -1; + } + h->samples = new_samples; + + int k = kh_put(vdict, d, sdup, &ret); + if (ret < 0) { + free(sdup); + return -1; + } + if (ret) { // absent + kh_val(d, k) = bcf_idinfo_def; + kh_val(d, k).id = n; + } else { + hts_log_error("Duplicated sample name '%s'", sdup); + free(sdup); + return -1; + } + h->samples[n] = sdup; + h->dirty = 1; + return 0; +} + +int bcf_hdr_add_sample(bcf_hdr_t *h, const char *s) +{ + if (!s) { + // Allowed for backwards-compatibility, calling with s == NULL + // used to trigger bcf_hdr_sync(h); + return 0; + } + return bcf_hdr_add_sample_len(h, s, strlen(s)); +} + +int HTS_RESULT_USED bcf_hdr_parse_sample_line(bcf_hdr_t *hdr, const char *str) +{ + const char *mandatory = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"; + if ( strncmp(str,mandatory,strlen(mandatory)) ) + { + hts_log_error("Could not parse the \"#CHROM..\" line, either the fields are incorrect or spaces are present instead of tabs:\n\t%s",str); + return -1; + } + + const char *beg = str + strlen(mandatory), *end; + if ( !*beg || *beg=='\n' ) return 0; + if ( strncmp(beg,"\tFORMAT\t",8) ) + { + hts_log_error("Could not parse the \"#CHROM..\" line, either FORMAT is missing or spaces are present instead of tabs:\n\t%s",str); + return -1; + } + beg += 8; + + int ret = 0; + while ( *beg ) + { + end = beg; + while ( *end && *end!='\t' && *end!='\n' ) end++; + if ( bcf_hdr_add_sample_len(hdr, beg, end-beg) < 0 ) ret = -1; + if ( !*end || *end=='\n' || ret<0 ) break; + beg = end + 1; + } + return ret; +} + +int bcf_hdr_sync(bcf_hdr_t *h) +{ + int i; + for (i = 0; i < 3; i++) + { + vdict_t *d = (vdict_t*)h->dict[i]; + khint_t k; + if ( h->n[i] < kh_size(d) ) + { + bcf_idpair_t *new_idpair; + // this should be true only for i=2, BCF_DT_SAMPLE + new_idpair = (bcf_idpair_t*) realloc(h->id[i], kh_size(d)*sizeof(bcf_idpair_t)); + if (!new_idpair) return -1; + h->n[i] = kh_size(d); + h->id[i] = new_idpair; + } + for (k=kh_begin(d); kid[i][kh_val(d,k).id].key = kh_key(d,k); + h->id[i][kh_val(d,k).id].val = &kh_val(d,k); + } + } + + // Invalidate key length cache + bcf_hdr_aux_t *aux = get_hdr_aux(h); + if (aux && aux->key_len) { + free(aux->key_len); + aux->key_len = NULL; + } + + h->dirty = 0; + return 0; +} + +void bcf_hrec_destroy(bcf_hrec_t *hrec) +{ + if (!hrec) return; + free(hrec->key); + if ( hrec->value ) free(hrec->value); + int i; + for (i=0; inkeys; i++) + { + free(hrec->keys[i]); + free(hrec->vals[i]); + } + free(hrec->keys); + free(hrec->vals); + free(hrec); +} + +// Copies all fields except IDX. +bcf_hrec_t *bcf_hrec_dup(bcf_hrec_t *hrec) +{ + int save_errno; + bcf_hrec_t *out = (bcf_hrec_t*) calloc(1,sizeof(bcf_hrec_t)); + if (!out) return NULL; + + out->type = hrec->type; + if ( hrec->key ) { + out->key = strdup(hrec->key); + if (!out->key) goto fail; + } + if ( hrec->value ) { + out->value = strdup(hrec->value); + if (!out->value) goto fail; + } + out->nkeys = hrec->nkeys; + out->keys = (char**) malloc(sizeof(char*)*hrec->nkeys); + if (!out->keys) goto fail; + out->vals = (char**) malloc(sizeof(char*)*hrec->nkeys); + if (!out->vals) goto fail; + int i, j = 0; + for (i=0; inkeys; i++) + { + if ( hrec->keys[i] && !strcmp("IDX",hrec->keys[i]) ) continue; + if ( hrec->keys[i] ) { + out->keys[j] = strdup(hrec->keys[i]); + if (!out->keys[j]) goto fail; + } + if ( hrec->vals[i] ) { + out->vals[j] = strdup(hrec->vals[i]); + if (!out->vals[j]) goto fail; + } + j++; + } + if ( i!=j ) out->nkeys -= i-j; // IDX was omitted + return out; + + fail: + save_errno = errno; + hts_log_error("%s", strerror(errno)); + bcf_hrec_destroy(out); + errno = save_errno; + return NULL; +} + +void bcf_hrec_debug(FILE *fp, bcf_hrec_t *hrec) +{ + fprintf(fp, "key=[%s] value=[%s]", hrec->key, hrec->value?hrec->value:""); + int i; + for (i=0; inkeys; i++) + fprintf(fp, "\t[%s]=[%s]", hrec->keys[i],hrec->vals[i]); + fprintf(fp, "\n"); +} + +void bcf_header_debug(bcf_hdr_t *hdr) +{ + int i, j; + for (i=0; inhrec; i++) + { + if ( !hdr->hrec[i]->value ) + { + fprintf(stderr, "##%s=<", hdr->hrec[i]->key); + fprintf(stderr,"%s=%s", hdr->hrec[i]->keys[0], hdr->hrec[i]->vals[0]); + for (j=1; jhrec[i]->nkeys; j++) + fprintf(stderr,",%s=%s", hdr->hrec[i]->keys[j], hdr->hrec[i]->vals[j]); + fprintf(stderr,">\n"); + } + else + fprintf(stderr,"##%s=%s\n", hdr->hrec[i]->key,hdr->hrec[i]->value); + } +} + +int bcf_hrec_add_key(bcf_hrec_t *hrec, const char *str, size_t len) +{ + char **tmp; + size_t n = hrec->nkeys + 1; + assert(len > 0 && len < SIZE_MAX); + tmp = realloc(hrec->keys, sizeof(char*)*n); + if (!tmp) return -1; + hrec->keys = tmp; + tmp = realloc(hrec->vals, sizeof(char*)*n); + if (!tmp) return -1; + hrec->vals = tmp; + + hrec->keys[hrec->nkeys] = (char*) malloc((len+1)*sizeof(char)); + if (!hrec->keys[hrec->nkeys]) return -1; + memcpy(hrec->keys[hrec->nkeys],str,len); + hrec->keys[hrec->nkeys][len] = 0; + hrec->vals[hrec->nkeys] = NULL; + hrec->nkeys = n; + return 0; +} + +int bcf_hrec_set_val(bcf_hrec_t *hrec, int i, const char *str, size_t len, int is_quoted) +{ + if ( hrec->vals[i] ) { + free(hrec->vals[i]); + hrec->vals[i] = NULL; + } + if ( !str ) return 0; + if ( is_quoted ) + { + if (len >= SIZE_MAX - 3) { + errno = ENOMEM; + return -1; + } + hrec->vals[i] = (char*) malloc((len+3)*sizeof(char)); + if (!hrec->vals[i]) return -1; + hrec->vals[i][0] = '"'; + memcpy(&hrec->vals[i][1],str,len); + hrec->vals[i][len+1] = '"'; + hrec->vals[i][len+2] = 0; + } + else + { + if (len == SIZE_MAX) { + errno = ENOMEM; + return -1; + } + hrec->vals[i] = (char*) malloc((len+1)*sizeof(char)); + if (!hrec->vals[i]) return -1; + memcpy(hrec->vals[i],str,len); + hrec->vals[i][len] = 0; + } + return 0; +} + +int hrec_add_idx(bcf_hrec_t *hrec, int idx) +{ + int n = hrec->nkeys + 1; + char **tmp = (char**) realloc(hrec->keys, sizeof(char*)*n); + if (!tmp) return -1; + hrec->keys = tmp; + + tmp = (char**) realloc(hrec->vals, sizeof(char*)*n); + if (!tmp) return -1; + hrec->vals = tmp; + + hrec->keys[hrec->nkeys] = strdup("IDX"); + if (!hrec->keys[hrec->nkeys]) return -1; + + kstring_t str = {0,0,0}; + if (kputw(idx, &str) < 0) { + free(hrec->keys[hrec->nkeys]); + return -1; + } + hrec->vals[hrec->nkeys] = str.s; + hrec->nkeys = n; + return 0; +} + +int bcf_hrec_find_key(bcf_hrec_t *hrec, const char *key) +{ + int i; + for (i=0; inkeys; i++) + if ( !strcasecmp(key,hrec->keys[i]) ) return i; + return -1; +} + +static void bcf_hrec_set_type(bcf_hrec_t *hrec) +{ + if ( !strcmp(hrec->key, "contig") ) hrec->type = BCF_HL_CTG; + else if ( !strcmp(hrec->key, "INFO") ) hrec->type = BCF_HL_INFO; + else if ( !strcmp(hrec->key, "FILTER") ) hrec->type = BCF_HL_FLT; + else if ( !strcmp(hrec->key, "FORMAT") ) hrec->type = BCF_HL_FMT; + else if ( hrec->nkeys>0 ) hrec->type = BCF_HL_STR; + else hrec->type = BCF_HL_GEN; +} + + +/** + The arrays were generated with + + valid_ctg: + perl -le '@v = (split(//,q[!#$%&*+./:;=?@^_|~-]),"a"..."z","A"..."Z","0"..."9"); @a = (0) x 256; foreach $c (@v) { $a[ord($c)] = 1; } print join(", ",@a)' | fold -w 48 + + valid_tag: + perl -le '@v = (split(//,q[_.]),"a"..."z","A"..."Z","0"..."9"); @a = (0) x 256; foreach $c (@v) { $a[ord($c)] = 1; } print join(", ",@a)' | fold -w 48 +*/ +static const uint8_t valid_ctg[256] = +{ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; +static const uint8_t valid_tag[256] = +{ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +/** + bcf_hrec_check() - check the validity of structured header lines + + Returns 0 on success or negative value on error. + + Currently the return status is not checked by the caller + and only a warning is printed on stderr. This should be improved + to propagate the error all the way up to the caller and let it + decide what to do: throw an error or proceed anyway. + */ +static int bcf_hrec_check(bcf_hrec_t *hrec) +{ + int i; + bcf_hrec_set_type(hrec); + + if ( hrec->type==BCF_HL_CTG ) + { + i = bcf_hrec_find_key(hrec,"ID"); + if ( i<0 ) goto err_missing_id; + char *val = hrec->vals[i]; + if ( val[0]=='*' || val[0]=='=' || !valid_ctg[(uint8_t)val[0]] ) goto err_invalid_ctg; + while ( *(++val) ) + if ( !valid_ctg[(uint8_t)*val] ) goto err_invalid_ctg; + return 0; + } + if ( hrec->type==BCF_HL_INFO ) + { + i = bcf_hrec_find_key(hrec,"ID"); + if ( i<0 ) goto err_missing_id; + char *val = hrec->vals[i]; + if ( !strcmp(val,"1000G") ) return 0; + if ( val[0]=='.' || (val[0]>='0' && val[0]<='9') || !valid_tag[(uint8_t)val[0]] ) goto err_invalid_tag; + while ( *(++val) ) + if ( !valid_tag[(uint8_t)*val] ) goto err_invalid_tag; + return 0; + } + if ( hrec->type==BCF_HL_FMT ) + { + i = bcf_hrec_find_key(hrec,"ID"); + if ( i<0 ) goto err_missing_id; + char *val = hrec->vals[i]; + if ( val[0]=='.' || (val[0]>='0' && val[0]<='9') || !valid_tag[(uint8_t)val[0]] ) goto err_invalid_tag; + while ( *(++val) ) + if ( !valid_tag[(uint8_t)*val] ) goto err_invalid_tag; + return 0; + } + return 0; + + err_missing_id: + hts_log_warning("Missing ID attribute in one or more header lines"); + return -1; + + err_invalid_ctg: + hts_log_warning("Invalid contig name: \"%s\"", hrec->vals[i]); + return -1; + + err_invalid_tag: + hts_log_warning("Invalid tag name: \"%s\"", hrec->vals[i]); + return -1; +} + +static inline int is_escaped(const char *min, const char *str) +{ + int n = 0; + while ( --str>=min && *str=='\\' ) n++; + return n%2; +} + +bcf_hrec_t *bcf_hdr_parse_line(const bcf_hdr_t *h, const char *line, int *len) +{ + bcf_hrec_t *hrec = NULL; + const char *p = line; + if (p[0] != '#' || p[1] != '#') { *len = 0; return NULL; } + p += 2; + + const char *q = p; + while ( *q && *q!='=' && *q != '\n' ) q++; + ptrdiff_t n = q-p; + if ( *q!='=' || !n ) // wrong format + goto malformed_line; + + hrec = (bcf_hrec_t*) calloc(1,sizeof(bcf_hrec_t)); + if (!hrec) { *len = -1; return NULL; } + hrec->key = (char*) malloc(sizeof(char)*(n+1)); + if (!hrec->key) goto fail; + memcpy(hrec->key,p,n); + hrec->key[n] = 0; + hrec->type = -1; + + p = ++q; + if ( *p!='<' ) // generic field, e.g. ##samtoolsVersion=0.1.18-r579 + { + while ( *q && *q!='\n' ) q++; + hrec->value = (char*) malloc((q-p+1)*sizeof(char)); + if (!hrec->value) goto fail; + memcpy(hrec->value, p, q-p); + hrec->value[q-p] = 0; + *len = q - line + (*q ? 1 : 0); // Skip \n but not \0 + return hrec; + } + + // structured line, e.g. + // ##INFO= + // ##PEDIGREE= + int nopen = 1; + while ( *q && *q!='\n' && nopen>0 ) + { + p = ++q; + while ( *q && *q==' ' ) { p++; q++; } + // ^[A-Za-z_][0-9A-Za-z_.]*$ + if (p==q && *q && (isalpha_c(*q) || *q=='_')) + { + q++; + while ( *q && (isalnum_c(*q) || *q=='_' || *q=='.') ) q++; + } + n = q-p; + int m = 0; + while ( *q && *q==' ' ) { q++; m++; } + if ( *q!='=' || !n ) + goto malformed_line; + + if (bcf_hrec_add_key(hrec, p, q-p-m) < 0) goto fail; + p = ++q; + while ( *q && *q==' ' ) { p++; q++; } + + int quoted = 0; + char ending = '\0'; + switch (*p) { + case '"': + quoted = 1; + ending = '"'; + p++; + break; + case '[': + quoted = 1; + ending = ']'; + break; + } + if ( quoted ) q++; + while ( *q && *q != '\n' ) + { + if ( quoted ) { if ( *q==ending && !is_escaped(p,q) ) break; } + else + { + if ( *q=='<' ) nopen++; + if ( *q=='>' ) nopen--; + if ( !nopen ) break; + if ( *q==',' && nopen==1 ) break; + } + q++; + } + const char *r = q; + if (quoted && ending == ']') { + if (*q == ending) { + r++; + q++; + quoted = 0; + } else { + char buffer[320]; + hts_log_error("Missing ']' in header line %s", + hts_strprint(buffer, sizeof(buffer), '"', + line, q-line)); + goto fail; + } + } + while ( r > p && r[-1] == ' ' ) r--; + if (bcf_hrec_set_val(hrec, hrec->nkeys-1, p, r-p, quoted) < 0) + goto fail; + if ( quoted && *q==ending ) q++; + if ( *q=='>' ) + { + if (nopen) nopen--; // this can happen with nested angle brackets <> + q++; + } + } + if ( nopen ) + hts_log_warning("Incomplete header line, trying to proceed anyway:\n\t[%s]\n\t[%d]",line,q[0]); + + // Skip to end of line + int nonspace = 0; + p = q; + while ( *q && *q!='\n' ) { nonspace |= !isspace_c(*q); q++; } + if (nonspace) { + char buffer[320]; + hts_log_warning("Dropped trailing junk from header line '%s'", + hts_strprint(buffer, sizeof(buffer), + '"', line, q - line)); + } + + *len = q - line + (*q ? 1 : 0); + return hrec; + + fail: + *len = -1; + bcf_hrec_destroy(hrec); + return NULL; + + malformed_line: + { + char buffer[320]; + while ( *q && *q!='\n' ) q++; // Ensure *len includes full line + hts_log_error("Could not parse the header line: %s", + hts_strprint(buffer, sizeof(buffer), + '"', line, q - line)); + *len = q - line + (*q ? 1 : 0); + bcf_hrec_destroy(hrec); + return NULL; + } +} + +static int bcf_hdr_set_idx(bcf_hdr_t *hdr, int dict_type, const char *tag, bcf_idinfo_t *idinfo) +{ + size_t new_n; + + // If available, preserve existing IDX + if ( idinfo->id==-1 ) + idinfo->id = hdr->n[dict_type]; + else if ( idinfo->id < hdr->n[dict_type] && hdr->id[dict_type][idinfo->id].key ) + { + hts_log_error("Conflicting IDX=%d lines in the header dictionary, the new tag is %s", + idinfo->id, tag); + errno = EINVAL; + return -1; + } + + new_n = idinfo->id >= hdr->n[dict_type] ? idinfo->id+1 : hdr->n[dict_type]; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + // hts_resize() can attempt to allocate up to 2 * requested items + if (new_n > FUZZ_ALLOC_LIMIT/(2 * sizeof(bcf_idpair_t))) + return -1; +#endif + if (hts_resize(bcf_idpair_t, new_n, &hdr->m[dict_type], + &hdr->id[dict_type], HTS_RESIZE_CLEAR)) { + return -1; + } + hdr->n[dict_type] = new_n; + + // NB: the next kh_put call can invalidate the idinfo pointer, therefore + // we leave it unassigned here. It must be set explicitly in bcf_hdr_sync. + hdr->id[dict_type][idinfo->id].key = tag; + + return 0; +} + +// returns: 1 when hdr needs to be synced, -1 on error, 0 otherwise +static int bcf_hdr_register_hrec(bcf_hdr_t *hdr, bcf_hrec_t *hrec) +{ + // contig + int i, ret, replacing = 0; + khint_t k; + char *str = NULL; + + bcf_hrec_set_type(hrec); + + if ( hrec->type==BCF_HL_CTG ) + { + hts_pos_t len = 0; + + // Get the contig ID ($str) and length ($j) + i = bcf_hrec_find_key(hrec,"length"); + if ( i<0 ) len = 0; + else { + char *end = hrec->vals[i]; + len = strtoll(hrec->vals[i], &end, 10); + if (end == hrec->vals[i] || len < 0) return 0; + } + + i = bcf_hrec_find_key(hrec,"ID"); + if ( i<0 ) return 0; + str = strdup(hrec->vals[i]); + if (!str) return -1; + + // Register in the dictionary + vdict_t *d = (vdict_t*)hdr->dict[BCF_DT_CTG]; + khint_t k = kh_get(vdict, d, str); + if ( k != kh_end(d) ) { // already present + free(str); str=NULL; + if (kh_val(d, k).hrec[0] != NULL) // and not removed + return 0; + replacing = 1; + } else { + k = kh_put(vdict, d, str, &ret); + if (ret < 0) { free(str); return -1; } + } + + int idx = bcf_hrec_find_key(hrec,"IDX"); + if ( idx!=-1 ) + { + char *tmp = hrec->vals[idx]; + idx = strtol(hrec->vals[idx], &tmp, 10); + if ( *tmp || idx < 0 || idx >= INT_MAX - 1) + { + if (!replacing) { + kh_del(vdict, d, k); + free(str); + } + hts_log_warning("Error parsing the IDX tag, skipping"); + return 0; + } + } + + kh_val(d, k) = bcf_idinfo_def; + kh_val(d, k).id = idx; + kh_val(d, k).info[0] = len; + kh_val(d, k).hrec[0] = hrec; + if (bcf_hdr_set_idx(hdr, BCF_DT_CTG, kh_key(d,k), &kh_val(d,k)) < 0) { + if (!replacing) { + kh_del(vdict, d, k); + free(str); + } + return -1; + } + if ( idx==-1 ) { + if (hrec_add_idx(hrec, kh_val(d,k).id) < 0) { + return -1; + } + } + + return 1; + } + + if ( hrec->type==BCF_HL_STR ) return 1; + if ( hrec->type!=BCF_HL_INFO && hrec->type!=BCF_HL_FLT && hrec->type!=BCF_HL_FMT ) return 0; + + // INFO/FILTER/FORMAT + char *id = NULL; + uint32_t type = UINT32_MAX, var = UINT32_MAX; + int num = -1, idx = -1; + for (i=0; inkeys; i++) + { + if ( !strcmp(hrec->keys[i], "ID") ) id = hrec->vals[i]; + else if ( !strcmp(hrec->keys[i], "IDX") ) + { + char *tmp = hrec->vals[i]; + idx = strtol(hrec->vals[i], &tmp, 10); + if ( *tmp || idx < 0 || idx >= INT_MAX - 1) + { + hts_log_warning("Error parsing the IDX tag, skipping"); + return 0; + } + } + else if ( !strcmp(hrec->keys[i], "Type") ) + { + if ( !strcmp(hrec->vals[i], "Integer") ) type = BCF_HT_INT; + else if ( !strcmp(hrec->vals[i], "Float") ) type = BCF_HT_REAL; + else if ( !strcmp(hrec->vals[i], "String") ) type = BCF_HT_STR; + else if ( !strcmp(hrec->vals[i], "Character") ) type = BCF_HT_STR; + else if ( !strcmp(hrec->vals[i], "Flag") ) type = BCF_HT_FLAG; + else + { + hts_log_warning("The type \"%s\" is not supported, assuming \"String\"", hrec->vals[i]); + type = BCF_HT_STR; + } + } + else if ( !strcmp(hrec->keys[i], "Number") ) + { + if ( !strcmp(hrec->vals[i],"A") ) var = BCF_VL_A; + else if ( !strcmp(hrec->vals[i],"R") ) var = BCF_VL_R; + else if ( !strcmp(hrec->vals[i],"G") ) var = BCF_VL_G; + else if ( !strcmp(hrec->vals[i],".") ) var = BCF_VL_VAR; + else + { + sscanf(hrec->vals[i],"%d",&num); + var = BCF_VL_FIXED; + } + if (var != BCF_VL_FIXED) num = 0xfffff; + } + } + if (hrec->type == BCF_HL_INFO || hrec->type == BCF_HL_FMT) { + if (type == -1) { + hts_log_warning("%s %s field has no Type defined. Assuming String", + *hrec->key == 'I' ? "An" : "A", hrec->key); + type = BCF_HT_STR; + } + if (var == -1) { + hts_log_warning("%s %s field has no Number defined. Assuming '.'", + *hrec->key == 'I' ? "An" : "A", hrec->key); + var = BCF_VL_VAR; + } + if ( type==BCF_HT_FLAG && (var!=BCF_VL_FIXED || num!=0) ) + { + hts_log_warning("The definition of Flag \"%s/%s\" is invalid, forcing Number=0", hrec->key,id); + var = BCF_VL_FIXED; + num = 0; + } + } + uint32_t info = ((((uint32_t)num) & 0xfffff)<<12 | + (var & 0xf) << 8 | + (type & 0xf) << 4 | + (((uint32_t) hrec->type) & 0xf)); + + if ( !id ) return 0; + str = strdup(id); + if (!str) return -1; + + vdict_t *d = (vdict_t*)hdr->dict[BCF_DT_ID]; + k = kh_get(vdict, d, str); + if ( k != kh_end(d) ) + { + // already present + free(str); + if ( kh_val(d, k).hrec[info&0xf] ) return 0; + kh_val(d, k).info[info&0xf] = info; + kh_val(d, k).hrec[info&0xf] = hrec; + if ( idx==-1 ) { + if (hrec_add_idx(hrec, kh_val(d, k).id) < 0) { + return -1; + } + } + return 1; + } + k = kh_put(vdict, d, str, &ret); + if (ret < 0) { + free(str); + return -1; + } + kh_val(d, k) = bcf_idinfo_def; + kh_val(d, k).info[info&0xf] = info; + kh_val(d, k).hrec[info&0xf] = hrec; + kh_val(d, k).id = idx; + if (bcf_hdr_set_idx(hdr, BCF_DT_ID, kh_key(d,k), &kh_val(d,k)) < 0) { + kh_del(vdict, d, k); + free(str); + return -1; + } + if ( idx==-1 ) { + if (hrec_add_idx(hrec, kh_val(d,k).id) < 0) { + return -1; + } + } + + return 1; +} + +static void bcf_hdr_unregister_hrec(bcf_hdr_t *hdr, bcf_hrec_t *hrec) +{ + if (hrec->type == BCF_HL_FLT || + hrec->type == BCF_HL_INFO || + hrec->type == BCF_HL_FMT || + hrec->type == BCF_HL_CTG) { + int id = bcf_hrec_find_key(hrec, "ID"); + if (id < 0 || !hrec->vals[id]) + return; + vdict_t *dict = (hrec->type == BCF_HL_CTG + ? (vdict_t*)hdr->dict[BCF_DT_CTG] + : (vdict_t*)hdr->dict[BCF_DT_ID]); + khint_t k = kh_get(vdict, dict, hrec->vals[id]); + if (k != kh_end(dict)) + kh_val(dict, k).hrec[hrec->type==BCF_HL_CTG ? 0 : hrec->type] = NULL; + } +} + +static void bcf_hdr_remove_from_hdict(bcf_hdr_t *hdr, bcf_hrec_t *hrec) +{ + kstring_t str = KS_INITIALIZE; + bcf_hdr_aux_t *aux = get_hdr_aux(hdr); + khint_t k; + int id; + + switch (hrec->type) { + case BCF_HL_GEN: + if (ksprintf(&str, "##%s=%s", hrec->key,hrec->value) < 0) + str.l = 0; + break; + case BCF_HL_STR: + id = bcf_hrec_find_key(hrec, "ID"); + if (id < 0) + return; + if (!hrec->vals[id] || + ksprintf(&str, "##%s=", hrec->key, hrec->vals[id]) < 0) + str.l = 0; + break; + default: + return; + } + if (str.l) { + k = kh_get(hdict, aux->gen, str.s); + } else { + // Couldn't get a string for some reason, so try the hard way... + for (k = kh_begin(aux->gen); k < kh_end(aux->gen); k++) { + if (kh_exist(aux->gen, k) && kh_val(aux->gen, k) == hrec) + break; + } + } + if (k != kh_end(aux->gen) && kh_val(aux->gen, k) == hrec) { + kh_val(aux->gen, k) = NULL; + free((char *) kh_key(aux->gen, k)); + kh_key(aux->gen, k) = NULL; + kh_del(hdict, aux->gen, k); + } + free(str.s); +} + +int bcf_hdr_update_hrec(bcf_hdr_t *hdr, bcf_hrec_t *hrec, const bcf_hrec_t *tmp) +{ + // currently only for bcf_hdr_set_version + assert( hrec->type==BCF_HL_GEN ); + int ret; + khint_t k; + bcf_hdr_aux_t *aux = get_hdr_aux(hdr); + for (k=kh_begin(aux->gen); kgen); k++) + { + if ( !kh_exist(aux->gen,k) ) continue; + if ( hrec!=(bcf_hrec_t*)kh_val(aux->gen,k) ) continue; + break; + } + assert( kgen) ); // something went wrong, should never happen + free((char*)kh_key(aux->gen,k)); + kh_del(hdict,aux->gen,k); + kstring_t str = {0,0,0}; + if ( ksprintf(&str, "##%s=%s", tmp->key,tmp->value) < 0 ) + { + free(str.s); + return -1; + } + k = kh_put(hdict, aux->gen, str.s, &ret); + if ( ret<0 ) + { + free(str.s); + return -1; + } + free(hrec->value); + hrec->value = strdup(tmp->value); + if ( !hrec->value ) return -1; + return 0; +} + +int bcf_hdr_add_hrec(bcf_hdr_t *hdr, bcf_hrec_t *hrec) +{ + kstring_t str = {0,0,0}; + bcf_hdr_aux_t *aux = get_hdr_aux(hdr); + + int res; + if ( !hrec ) return 0; + + bcf_hrec_check(hrec); // todo: check return status and propagate errors up + + res = bcf_hdr_register_hrec(hdr,hrec); + if (res < 0) return -1; + if ( !res ) + { + // If one of the hashed field, then it is already present + if ( hrec->type != BCF_HL_GEN ) + { + bcf_hrec_destroy(hrec); + return 0; + } + + // Is one of the generic fields and already present? + if ( ksprintf(&str, "##%s=%s", hrec->key,hrec->value) < 0 ) + { + free(str.s); + return -1; + } + khint_t k = kh_get(hdict, aux->gen, str.s); + if ( k != kh_end(aux->gen) ) + { + // duplicate record + bcf_hrec_destroy(hrec); + free(str.s); + return 0; + } + } + + int i; + if ( hrec->type==BCF_HL_STR && (i=bcf_hrec_find_key(hrec,"ID"))>=0 ) + { + if ( ksprintf(&str, "##%s=", hrec->key,hrec->vals[i]) < 0 ) + { + free(str.s); + return -1; + } + khint_t k = kh_get(hdict, aux->gen, str.s); + if ( k != kh_end(aux->gen) ) + { + // duplicate record + bcf_hrec_destroy(hrec); + free(str.s); + return 0; + } + } + + // New record, needs to be added + int n = hdr->nhrec + 1; + bcf_hrec_t **new_hrec = realloc(hdr->hrec, n*sizeof(bcf_hrec_t*)); + if (!new_hrec) { + free(str.s); + bcf_hdr_unregister_hrec(hdr, hrec); + return -1; + } + hdr->hrec = new_hrec; + + if ( str.s ) + { + khint_t k = kh_put(hdict, aux->gen, str.s, &res); + if ( res<0 ) + { + free(str.s); + return -1; + } + kh_val(aux->gen,k) = hrec; + } + + hdr->hrec[hdr->nhrec] = hrec; + hdr->dirty = 1; + hdr->nhrec = n; + + return hrec->type==BCF_HL_GEN ? 0 : 1; +} + +bcf_hrec_t *bcf_hdr_get_hrec(const bcf_hdr_t *hdr, int type, const char *key, const char *value, const char *str_class) +{ + int i; + if ( type==BCF_HL_GEN ) + { + // e.g. ##fileformat=VCFv4.2 + // ##source=GenomicsDBImport + // ##bcftools_viewVersion=1.16-80-gdfdb0923+htslib-1.16-34-g215d364 + if ( value ) + { + kstring_t str = {0,0,0}; + ksprintf(&str, "##%s=%s", key,value); + bcf_hdr_aux_t *aux = get_hdr_aux(hdr); + khint_t k = kh_get(hdict, aux->gen, str.s); + free(str.s); + if ( k == kh_end(aux->gen) ) return NULL; + return kh_val(aux->gen, k); + } + for (i=0; inhrec; i++) + { + if ( hdr->hrec[i]->type!=type ) continue; + if ( strcmp(hdr->hrec[i]->key,key) ) continue; + return hdr->hrec[i]; + } + return NULL; + } + else if ( type==BCF_HL_STR ) + { + // e.g. ##GATKCommandLine= + // ##ALT= + if (!str_class) return NULL; + if ( !strcmp("ID",key) ) + { + kstring_t str = {0,0,0}; + ksprintf(&str, "##%s=<%s=%s>",str_class,key,value); + bcf_hdr_aux_t *aux = get_hdr_aux(hdr); + khint_t k = kh_get(hdict, aux->gen, str.s); + free(str.s); + if ( k == kh_end(aux->gen) ) return NULL; + return kh_val(aux->gen, k); + } + for (i=0; inhrec; i++) + { + if ( hdr->hrec[i]->type!=type ) continue; + if ( strcmp(hdr->hrec[i]->key,str_class) ) continue; + int j = bcf_hrec_find_key(hdr->hrec[i],key); + if ( j>=0 && !strcmp(hdr->hrec[i]->vals[j],value) ) return hdr->hrec[i]; + } + return NULL; + } + vdict_t *d = type==BCF_HL_CTG ? (vdict_t*)hdr->dict[BCF_DT_CTG] : (vdict_t*)hdr->dict[BCF_DT_ID]; + khint_t k = kh_get(vdict, d, value); + if ( k == kh_end(d) ) return NULL; + return kh_val(d, k).hrec[type==BCF_HL_CTG?0:type]; +} + +void bcf_hdr_check_sanity(bcf_hdr_t *hdr) +{ + static int PL_warned = 0, GL_warned = 0; + + if ( !PL_warned ) + { + int id = bcf_hdr_id2int(hdr, BCF_DT_ID, "PL"); + if ( bcf_hdr_idinfo_exists(hdr,BCF_HL_FMT,id) && bcf_hdr_id2length(hdr,BCF_HL_FMT,id)!=BCF_VL_G ) + { + hts_log_warning("PL should be declared as Number=G"); + PL_warned = 1; + } + } + if ( !GL_warned ) + { + int id = bcf_hdr_id2int(hdr, BCF_DT_ID, "GL"); + if ( bcf_hdr_idinfo_exists(hdr,BCF_HL_FMT,id) && bcf_hdr_id2length(hdr,BCF_HL_FMT,id)!=BCF_VL_G ) + { + hts_log_warning("GL should be declared as Number=G"); + GL_warned = 1; + } + } +} + +int bcf_hdr_parse(bcf_hdr_t *hdr, char *htxt) +{ + int len, done = 0; + char *p = htxt; + + // Check sanity: "fileformat" string must come as first + bcf_hrec_t *hrec = bcf_hdr_parse_line(hdr,p,&len); + if ( !hrec || !hrec->key || strcasecmp(hrec->key,"fileformat") ) + hts_log_warning("The first line should be ##fileformat; is the VCF/BCF header broken?"); + if (bcf_hdr_add_hrec(hdr, hrec) < 0) { + bcf_hrec_destroy(hrec); + return -1; + } + + // The filter PASS must appear first in the dictionary + hrec = bcf_hdr_parse_line(hdr,"##FILTER=",&len); + if (!hrec || bcf_hdr_add_hrec(hdr, hrec) < 0) { + bcf_hrec_destroy(hrec); + return -1; + } + + // Parse the whole header + do { + while (NULL != (hrec = bcf_hdr_parse_line(hdr, p, &len))) { + if (bcf_hdr_add_hrec(hdr, hrec) < 0) { + bcf_hrec_destroy(hrec); + return -1; + } + p += len; + } + assert(hrec == NULL); + if (len < 0) { + // len < 0 indicates out-of-memory, or similar error + hts_log_error("Could not parse header line: %s", strerror(errno)); + return -1; + } else if (len > 0) { + // Bad header line. bcf_hdr_parse_line() will have logged it. + // Skip and try again on the next line (p + len will be the start + // of the next one). + p += len; + continue; + } + + // Next should be the sample line. If not, it was a malformed + // header, in which case print a warning and skip (many VCF + // operations do not really care about a few malformed lines). + // In the future we may want to add a strict mode that errors in + // this case. + if ( strncmp("#CHROM\t",p,7) && strncmp("#CHROM ",p,7) ) { + char *eol = strchr(p, '\n'); + if (*p != '\0') { + char buffer[320]; + hts_log_warning("Could not parse header line: %s", + hts_strprint(buffer, sizeof(buffer), + '"', p, + eol ? (eol - p) : SIZE_MAX)); + } + if (eol) { + p = eol + 1; // Try from the next line. + } else { + done = -1; // No more lines left, give up. + } + } else { + done = 1; // Sample line found + } + } while (!done); + + if (done < 0) { + // No sample line is fatal. + hts_log_error("Could not parse the header, sample line not found"); + return -1; + } + + if (bcf_hdr_parse_sample_line(hdr,p) < 0) + return -1; + if (bcf_hdr_sync(hdr) < 0) + return -1; + bcf_hdr_check_sanity(hdr); + return 0; +} + +int bcf_hdr_append(bcf_hdr_t *hdr, const char *line) +{ + int len; + bcf_hrec_t *hrec = bcf_hdr_parse_line(hdr, (char*) line, &len); + if ( !hrec ) return -1; + if (bcf_hdr_add_hrec(hdr, hrec) < 0) + return -1; + return 0; +} + +void bcf_hdr_remove(bcf_hdr_t *hdr, int type, const char *key) +{ + int i = 0; + bcf_hrec_t *hrec; + if ( !key ) + { + // no key, remove all entries of this type + while ( inhrec ) + { + if ( hdr->hrec[i]->type!=type ) { i++; continue; } + hrec = hdr->hrec[i]; + bcf_hdr_unregister_hrec(hdr, hrec); + bcf_hdr_remove_from_hdict(hdr, hrec); + hdr->dirty = 1; + hdr->nhrec--; + if ( i < hdr->nhrec ) + memmove(&hdr->hrec[i],&hdr->hrec[i+1],(hdr->nhrec-i)*sizeof(bcf_hrec_t*)); + bcf_hrec_destroy(hrec); + } + return; + } + while (1) + { + if ( type==BCF_HL_FLT || type==BCF_HL_INFO || type==BCF_HL_FMT || type== BCF_HL_CTG ) + { + hrec = bcf_hdr_get_hrec(hdr, type, "ID", key, NULL); + if ( !hrec ) return; + + for (i=0; inhrec; i++) + if ( hdr->hrec[i]==hrec ) break; + assert( inhrec ); + + vdict_t *d = type==BCF_HL_CTG ? (vdict_t*)hdr->dict[BCF_DT_CTG] : (vdict_t*)hdr->dict[BCF_DT_ID]; + khint_t k = kh_get(vdict, d, key); + kh_val(d, k).hrec[type==BCF_HL_CTG?0:type] = NULL; + } + else + { + for (i=0; inhrec; i++) + { + if ( hdr->hrec[i]->type!=type ) continue; + if ( type==BCF_HL_GEN ) + { + if ( !strcmp(hdr->hrec[i]->key,key) ) break; + } + else + { + // not all structured lines have ID, we could be more sophisticated as in bcf_hdr_get_hrec() + int j = bcf_hrec_find_key(hdr->hrec[i], "ID"); + if ( j>=0 && !strcmp(hdr->hrec[i]->vals[j],key) ) break; + } + } + if ( i==hdr->nhrec ) return; + hrec = hdr->hrec[i]; + bcf_hdr_remove_from_hdict(hdr, hrec); + } + + hdr->nhrec--; + if ( i < hdr->nhrec ) + memmove(&hdr->hrec[i],&hdr->hrec[i+1],(hdr->nhrec-i)*sizeof(bcf_hrec_t*)); + bcf_hrec_destroy(hrec); + hdr->dirty = 1; + } +} + +int bcf_hdr_printf(bcf_hdr_t *hdr, const char *fmt, ...) +{ + char tmp[256], *line = tmp; + va_list ap; + va_start(ap, fmt); + int n = vsnprintf(line, sizeof(tmp), fmt, ap); + va_end(ap); + + if (n >= sizeof(tmp)) { + n++; // For trailing NUL + line = (char*)malloc(n); + if (!line) + return -1; + + va_start(ap, fmt); + vsnprintf(line, n, fmt, ap); + va_end(ap); + } + + int ret = bcf_hdr_append(hdr, line); + + if (line != tmp) free(line); + return ret; +} + + +/********************** + *** BCF header I/O *** + **********************/ + +const char *bcf_hdr_get_version(const bcf_hdr_t *hdr) +{ + bcf_hrec_t *hrec = bcf_hdr_get_hrec(hdr, BCF_HL_GEN, "fileformat", NULL, NULL); + if ( !hrec ) + { + hts_log_warning("No version string found, assuming VCFv4.2"); + return "VCFv4.2"; + } + return hrec->value; +} + +int bcf_hdr_set_version(bcf_hdr_t *hdr, const char *version) +{ + bcf_hrec_t *hrec = bcf_hdr_get_hrec(hdr, BCF_HL_GEN, "fileformat", NULL, NULL); + if ( !hrec ) + { + int len; + kstring_t str = {0,0,0}; + if ( ksprintf(&str,"##fileformat=%s", version) < 0 ) return -1; + hrec = bcf_hdr_parse_line(hdr, str.s, &len); + free(str.s); + } + else + { + bcf_hrec_t *tmp = bcf_hrec_dup(hrec); + if ( !tmp ) return -1; + free(tmp->value); + tmp->value = strdup(version); + if ( !tmp->value ) return -1; + bcf_hdr_update_hrec(hdr, hrec, tmp); + bcf_hrec_destroy(tmp); + } + hdr->dirty = 1; + return 0; // FIXME: check for errs in this function (return < 0 if so) +} + +bcf_hdr_t *bcf_hdr_init(const char *mode) +{ + int i; + bcf_hdr_t *h; + h = (bcf_hdr_t*)calloc(1, sizeof(bcf_hdr_t)); + if (!h) return NULL; + for (i = 0; i < 3; ++i) { + if ((h->dict[i] = kh_init(vdict)) == NULL) goto fail; + // Supersize the hash to make collisions very unlikely + static int dsize[3] = {16384,16384,2048}; // info, contig, format + if (kh_resize(vdict, h->dict[i], dsize[i]) < 0) goto fail; + } + + bcf_hdr_aux_t *aux = (bcf_hdr_aux_t*)calloc(1,sizeof(bcf_hdr_aux_t)); + if ( !aux ) goto fail; + if ( (aux->gen = kh_init(hdict))==NULL ) { free(aux); goto fail; } + aux->key_len = NULL; + aux->dict = *((vdict_t*)h->dict[0]); + free(h->dict[0]); + h->dict[0] = aux; + + if ( strchr(mode,'w') ) + { + bcf_hdr_append(h, "##fileformat=VCFv4.2"); + // The filter PASS must appear first in the dictionary + bcf_hdr_append(h, "##FILTER="); + } + return h; + + fail: + for (i = 0; i < 3; ++i) + kh_destroy(vdict, h->dict[i]); + free(h); + return NULL; +} + +void bcf_hdr_destroy(bcf_hdr_t *h) +{ + int i; + khint_t k; + if (!h) return; + for (i = 0; i < 3; ++i) { + vdict_t *d = (vdict_t*)h->dict[i]; + if (d == 0) continue; + for (k = kh_begin(d); k != kh_end(d); ++k) + if (kh_exist(d, k)) free((char*)kh_key(d, k)); + if ( i==0 ) + { + bcf_hdr_aux_t *aux = get_hdr_aux(h); + for (k=kh_begin(aux->gen); kgen); k++) + if ( kh_exist(aux->gen,k) ) free((char*)kh_key(aux->gen,k)); + kh_destroy(hdict, aux->gen); + free(aux->key_len); // may exist for dict[0] only + } + kh_destroy(vdict, d); + free(h->id[i]); + } + for (i=0; inhrec; i++) + bcf_hrec_destroy(h->hrec[i]); + if (h->nhrec) free(h->hrec); + if (h->samples) free(h->samples); + free(h->keep_samples); + free(h->transl[0]); free(h->transl[1]); + free(h->mem.s); + free(h); +} + +bcf_hdr_t *bcf_hdr_read(htsFile *hfp) +{ + if (hfp->format.format == vcf) + return vcf_hdr_read(hfp); + if (hfp->format.format != bcf) { + hts_log_error("Input is not detected as bcf or vcf format"); + return NULL; + } + + assert(hfp->is_bgzf); + + BGZF *fp = hfp->fp.bgzf; + uint8_t magic[5]; + bcf_hdr_t *h; + h = bcf_hdr_init("r"); + if (!h) { + hts_log_error("Failed to allocate bcf header"); + return NULL; + } + if (bgzf_read(fp, magic, 5) != 5) + { + hts_log_error("Failed to read the header (reading BCF in text mode?)"); + bcf_hdr_destroy(h); + return NULL; + } + if (strncmp((char*)magic, "BCF\2\2", 5) != 0) + { + if (!strncmp((char*)magic, "BCF", 3)) + hts_log_error("Invalid BCF2 magic string: only BCFv2.2 is supported"); + else + hts_log_error("Invalid BCF2 magic string"); + bcf_hdr_destroy(h); + return NULL; + } + uint8_t buf[4]; + size_t hlen; + char *htxt = NULL; + if (bgzf_read(fp, buf, 4) != 4) goto fail; + hlen = buf[0] | (buf[1] << 8) | (buf[2] << 16) | ((size_t) buf[3] << 24); + if (hlen >= SIZE_MAX) { errno = ENOMEM; goto fail; } +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (hlen > FUZZ_ALLOC_LIMIT/2) { errno = ENOMEM; goto fail; } +#endif + htxt = (char*)malloc(hlen + 1); + if (!htxt) goto fail; + if (bgzf_read(fp, htxt, hlen) != hlen) goto fail; + htxt[hlen] = '\0'; // Ensure htxt is terminated + if ( bcf_hdr_parse(h, htxt) < 0 ) goto fail; + free(htxt); + return h; + fail: + hts_log_error("Failed to read BCF header"); + free(htxt); + bcf_hdr_destroy(h); + return NULL; +} + +int bcf_hdr_write(htsFile *hfp, bcf_hdr_t *h) +{ + if (!h) { + errno = EINVAL; + return -1; + } + if ( h->dirty ) { + if (bcf_hdr_sync(h) < 0) return -1; + } + hfp->format.category = variant_data; + if (hfp->format.format == vcf || hfp->format.format == text_format) { + hfp->format.format = vcf; + return vcf_hdr_write(hfp, h); + } + + if (hfp->format.format == binary_format) + hfp->format.format = bcf; + + kstring_t htxt = {0,0,0}; + if (bcf_hdr_format(h, 1, &htxt) < 0) { + free(htxt.s); + return -1; + } + kputc('\0', &htxt); // include the \0 byte + + BGZF *fp = hfp->fp.bgzf; + if ( bgzf_write(fp, "BCF\2\2", 5) !=5 ) return -1; + uint8_t hlen[4]; + u32_to_le(htxt.l, hlen); + if ( bgzf_write(fp, hlen, 4) !=4 ) return -1; + if ( bgzf_write(fp, htxt.s, htxt.l) != htxt.l ) return -1; + if ( bgzf_flush(fp) < 0) return -1; + + free(htxt.s); + return 0; +} + +/******************** + *** BCF site I/O *** + ********************/ + +bcf1_t *bcf_init(void) +{ + bcf1_t *v; + v = (bcf1_t*)calloc(1, sizeof(bcf1_t)); + return v; +} + +void bcf_clear(bcf1_t *v) +{ + int i; + for (i=0; id.m_info; i++) + { + if ( v->d.info[i].vptr_free ) + { + free(v->d.info[i].vptr - v->d.info[i].vptr_off); + v->d.info[i].vptr_free = 0; + } + } + for (i=0; id.m_fmt; i++) + { + if ( v->d.fmt[i].p_free ) + { + free(v->d.fmt[i].p - v->d.fmt[i].p_off); + v->d.fmt[i].p_free = 0; + } + } + v->rid = v->pos = v->rlen = v->unpacked = 0; + bcf_float_set_missing(v->qual); + v->n_info = v->n_allele = v->n_fmt = v->n_sample = 0; + v->shared.l = v->indiv.l = 0; + v->d.var_type = -1; + v->d.shared_dirty = 0; + v->d.indiv_dirty = 0; + v->d.n_flt = 0; + v->errcode = 0; + if (v->d.m_als) v->d.als[0] = 0; + if (v->d.m_id) v->d.id[0] = 0; +} + +void bcf_empty(bcf1_t *v) +{ + bcf_clear1(v); + free(v->d.id); + free(v->d.als); + free(v->d.allele); free(v->d.flt); free(v->d.info); free(v->d.fmt); + if (v->d.var ) free(v->d.var); + free(v->shared.s); free(v->indiv.s); + memset(&v->d,0,sizeof(v->d)); + memset(&v->shared,0,sizeof(v->shared)); + memset(&v->indiv,0,sizeof(v->indiv)); +} + +void bcf_destroy(bcf1_t *v) +{ + if (!v) return; + bcf_empty1(v); + free(v); +} + +static inline int bcf_read1_core(BGZF *fp, bcf1_t *v) +{ + uint8_t x[32]; + ssize_t ret; + uint32_t shared_len, indiv_len; + if ((ret = bgzf_read(fp, x, 32)) != 32) { + if (ret == 0) return -1; + return -2; + } + bcf_clear1(v); + shared_len = le_to_u32(x); + if (shared_len < 24) return -2; + shared_len -= 24; // to exclude six 32-bit integers + indiv_len = le_to_u32(x + 4); +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + // ks_resize() normally allocates 1.5 * requested size to allow for growth + if ((uint64_t) shared_len + indiv_len > FUZZ_ALLOC_LIMIT / 3 * 2) return -2; +#endif + if (ks_resize(&v->shared, shared_len ? shared_len : 1) != 0) return -2; + if (ks_resize(&v->indiv, indiv_len ? indiv_len : 1) != 0) return -2; + v->rid = le_to_i32(x + 8); + v->pos = le_to_u32(x + 12); + if ( v->pos==UINT32_MAX ) v->pos = -1; // this is for telomere coordinate, e.g. MT:0 + v->rlen = le_to_i32(x + 16); + v->qual = le_to_float(x + 20); + v->n_info = le_to_u16(x + 24); + v->n_allele = le_to_u16(x + 26); + v->n_sample = le_to_u32(x + 28) & 0xffffff; + v->n_fmt = x[31]; + v->shared.l = shared_len; + v->indiv.l = indiv_len; + // silent fix of broken BCFs produced by earlier versions of bcf_subset, prior to and including bd6ed8b4 + if ( (!v->indiv.l || !v->n_sample) && v->n_fmt ) v->n_fmt = 0; + + if (bgzf_read(fp, v->shared.s, v->shared.l) != v->shared.l) return -2; + if (bgzf_read(fp, v->indiv.s, v->indiv.l) != v->indiv.l) return -2; + return 0; +} + +#define bit_array_size(n) ((n)/8+1) +#define bit_array_set(a,i) ((a)[(i)/8] |= 1 << ((i)%8)) +#define bit_array_clear(a,i) ((a)[(i)/8] &= ~(1 << ((i)%8))) +#define bit_array_test(a,i) ((a)[(i)/8] & (1 << ((i)%8))) + +static int bcf_dec_typed_int1_safe(uint8_t *p, uint8_t *end, uint8_t **q, + int32_t *val) { + uint32_t t; + if (end - p < 2) return -1; + t = *p++ & 0xf; + /* Use if .. else if ... else instead of switch to force order. Assumption + is that small integers are more frequent than big ones. */ + if (t == BCF_BT_INT8) { + *val = *(int8_t *) p++; + } else { + if (end - p < (1<= end) return -1; + *type = *p & 0xf; + if (*p>>4 != 15) { + *q = p + 1; + *num = *p >> 4; + return 0; + } + r = bcf_dec_typed_int1_safe(p + 1, end, q, num); + if (r) return r; + return *num >= 0 ? 0 : -1; +} + +static const char *get_type_name(int type) { + const char *types[9] = { + "null", "int (8-bit)", "int (16 bit)", "int (32 bit)", + "unknown", "float", "unknown", "char", "unknown" + }; + int t = (type >= 0 && type < 8) ? type : 8; + return types[t]; +} + +static void bcf_record_check_err(const bcf_hdr_t *hdr, bcf1_t *rec, + char *type, uint32_t *reports, int i) { + if (*reports == 0 || hts_verbose >= HTS_LOG_DEBUG) + hts_log_warning("Bad BCF record at %s:%"PRIhts_pos + ": Invalid FORMAT %s %d", + bcf_seqname_safe(hdr,rec), rec->pos+1, type, i); + (*reports)++; +} + +static int bcf_record_check(const bcf_hdr_t *hdr, bcf1_t *rec) { + uint8_t *ptr, *end; + size_t bytes; + uint32_t err = 0; + int type = 0; + int num = 0; + int reflen = 0; + uint32_t i, reports; + const uint32_t is_integer = ((1 << BCF_BT_INT8) | + (1 << BCF_BT_INT16) | +#ifdef VCF_ALLOW_INT64 + (1 << BCF_BT_INT64) | +#endif + (1 << BCF_BT_INT32)); + const uint32_t is_valid_type = (is_integer | + (1 << BCF_BT_NULL) | + (1 << BCF_BT_FLOAT) | + (1 << BCF_BT_CHAR)); + int32_t max_id = hdr ? hdr->n[BCF_DT_ID] : 0; + + // Check for valid contig ID + if (rec->rid < 0 + || (hdr && (rec->rid >= hdr->n[BCF_DT_CTG] + || hdr->id[BCF_DT_CTG][rec->rid].key == NULL))) { + hts_log_warning("Bad BCF record at %"PRIhts_pos": Invalid %s id %d", rec->pos+1, "CONTIG", rec->rid); + err |= BCF_ERR_CTG_INVALID; + } + + // Check ID + ptr = (uint8_t *) rec->shared.s; + end = ptr + rec->shared.l; + if (bcf_dec_size_safe(ptr, end, &ptr, &num, &type) != 0) goto bad_shared; + if (type != BCF_BT_CHAR) { + hts_log_warning("Bad BCF record at %s:%"PRIhts_pos": Invalid %s type %d (%s)", bcf_seqname_safe(hdr,rec), rec->pos+1, "ID", type, get_type_name(type)); + err |= BCF_ERR_TAG_INVALID; + } + bytes = (size_t) num << bcf_type_shift[type]; + if (end - ptr < bytes) goto bad_shared; + ptr += bytes; + + // Check REF and ALT + if (rec->n_allele < 1) { + hts_log_warning("Bad BCF record at %s:%"PRIhts_pos": No REF allele", + bcf_seqname_safe(hdr,rec), rec->pos+1); + err |= BCF_ERR_TAG_UNDEF; + } + + reports = 0; + for (i = 0; i < rec->n_allele; i++) { + if (bcf_dec_size_safe(ptr, end, &ptr, &num, &type) != 0) goto bad_shared; + if (type != BCF_BT_CHAR) { + if (!reports++ || hts_verbose >= HTS_LOG_DEBUG) + hts_log_warning("Bad BCF record at %s:%"PRIhts_pos": Invalid %s type %d (%s)", bcf_seqname_safe(hdr,rec), rec->pos+1, "REF/ALT", type, get_type_name(type)); + err |= BCF_ERR_CHAR; + } + if (i == 0) reflen = num; + bytes = (size_t) num << bcf_type_shift[type]; + if (end - ptr < bytes) goto bad_shared; + ptr += bytes; + } + + // Check FILTER + reports = 0; + if (bcf_dec_size_safe(ptr, end, &ptr, &num, &type) != 0) goto bad_shared; + if (num > 0) { + bytes = (size_t) num << bcf_type_shift[type]; + if (((1 << type) & is_integer) == 0) { + hts_log_warning("Bad BCF record at %s:%"PRIhts_pos": Invalid %s type %d (%s)", bcf_seqname_safe(hdr,rec), rec->pos+1, "FILTER", type, get_type_name(type)); + err |= BCF_ERR_TAG_INVALID; + if (end - ptr < bytes) goto bad_shared; + ptr += bytes; + } else { + if (end - ptr < bytes) goto bad_shared; + for (i = 0; i < num; i++) { + int32_t key = bcf_dec_int1(ptr, type, &ptr); + if (key < 0 + || (hdr && (key >= max_id + || hdr->id[BCF_DT_ID][key].key == NULL))) { + if (!reports++ || hts_verbose >= HTS_LOG_DEBUG) + hts_log_warning("Bad BCF record at %s:%"PRIhts_pos": Invalid %s id %d", bcf_seqname_safe(hdr,rec), rec->pos+1, "FILTER", key); + err |= BCF_ERR_TAG_UNDEF; + } + } + } + } + + // Check INFO + reports = 0; + bcf_idpair_t *id_tmp = hdr ? hdr->id[BCF_DT_ID] : NULL; + for (i = 0; i < rec->n_info; i++) { + int32_t key = -1; + if (bcf_dec_typed_int1_safe(ptr, end, &ptr, &key) != 0) goto bad_shared; + if (key < 0 || (hdr && (key >= max_id + || id_tmp[key].key == NULL))) { + if (!reports++ || hts_verbose >= HTS_LOG_DEBUG) + hts_log_warning("Bad BCF record at %s:%"PRIhts_pos": Invalid %s id %d", bcf_seqname_safe(hdr,rec), rec->pos+1, "INFO", key); + err |= BCF_ERR_TAG_UNDEF; + } + if (bcf_dec_size_safe(ptr, end, &ptr, &num, &type) != 0) goto bad_shared; + if (((1 << type) & is_valid_type) == 0 + || (type == BCF_BT_NULL && num > 0)) { + if (!reports++ || hts_verbose >= HTS_LOG_DEBUG) + hts_log_warning("Bad BCF record at %s:%"PRIhts_pos": Invalid %s type %d (%s)", bcf_seqname_safe(hdr,rec), rec->pos+1, "INFO", type, get_type_name(type)); + err |= BCF_ERR_TAG_INVALID; + } + bytes = (size_t) num << bcf_type_shift[type]; + if (end - ptr < bytes) goto bad_shared; + ptr += bytes; + } + + // Check FORMAT and individual information + ptr = (uint8_t *) rec->indiv.s; + end = ptr + rec->indiv.l; + reports = 0; + for (i = 0; i < rec->n_fmt; i++) { + int32_t key = -1; + if (bcf_dec_typed_int1_safe(ptr, end, &ptr, &key) != 0) goto bad_indiv; + if (key < 0 + || (hdr && (key >= max_id + || id_tmp[key].key == NULL))) { + bcf_record_check_err(hdr, rec, "id", &reports, key); + err |= BCF_ERR_TAG_UNDEF; + } + if (bcf_dec_size_safe(ptr, end, &ptr, &num, &type) != 0) goto bad_indiv; + if (((1 << type) & is_valid_type) == 0 + || (type == BCF_BT_NULL && num > 0)) { + bcf_record_check_err(hdr, rec, "type", &reports, type); + err |= BCF_ERR_TAG_INVALID; + } + bytes = ((size_t) num << bcf_type_shift[type]) * rec->n_sample; + if (end - ptr < bytes) goto bad_indiv; + ptr += bytes; + } + + if (!err && rec->rlen < 0) { + // Treat bad rlen as a warning instead of an error, and try to + // fix up by using the length of the stored REF allele. + static int warned = 0; + if (!warned) { + hts_log_warning("BCF record at %s:%"PRIhts_pos" has invalid RLEN (%"PRIhts_pos"). " + "Only one invalid RLEN will be reported.", + bcf_seqname_safe(hdr,rec), rec->pos+1, rec->rlen); + warned = 1; + } + rec->rlen = reflen >= 0 ? reflen : 0; + } + + rec->errcode |= err; + + return err ? -2 : 0; // Return -2 so bcf_read() reports an error + + bad_shared: + hts_log_error("Bad BCF record at %s:%"PRIhts_pos" - shared section malformed or too short", bcf_seqname_safe(hdr,rec), rec->pos+1); + return -2; + + bad_indiv: + hts_log_error("Bad BCF record at %s:%"PRIhts_pos" - individuals section malformed or too short", bcf_seqname_safe(hdr,rec), rec->pos+1); + return -2; +} + +static inline uint8_t *bcf_unpack_fmt_core1(uint8_t *ptr, int n_sample, bcf_fmt_t *fmt); +int bcf_subset_format(const bcf_hdr_t *hdr, bcf1_t *rec) +{ + if ( !hdr->keep_samples ) return 0; + if ( !bcf_hdr_nsamples(hdr) ) + { + rec->indiv.l = rec->n_sample = 0; + return 0; + } + + int i, j; + uint8_t *ptr = (uint8_t*)rec->indiv.s, *dst = NULL, *src; + bcf_dec_t *dec = &rec->d; + hts_expand(bcf_fmt_t, rec->n_fmt, dec->m_fmt, dec->fmt); + for (i=0; im_fmt; ++i) dec->fmt[i].p_free = 0; + + for (i=0; in_fmt; i++) + { + ptr = bcf_unpack_fmt_core1(ptr, rec->n_sample, &dec->fmt[i]); + src = dec->fmt[i].p - dec->fmt[i].size; + if ( dst ) + { + memmove(dec->fmt[i-1].p + dec->fmt[i-1].p_len, dec->fmt[i].p - dec->fmt[i].p_off, dec->fmt[i].p_off); + dec->fmt[i].p = dec->fmt[i-1].p + dec->fmt[i-1].p_len + dec->fmt[i].p_off; + } + dst = dec->fmt[i].p; + for (j=0; jnsamples_ori; j++) + { + src += dec->fmt[i].size; + if ( !bit_array_test(hdr->keep_samples,j) ) continue; + memmove(dst, src, dec->fmt[i].size); + dst += dec->fmt[i].size; + } + rec->indiv.l -= dec->fmt[i].p_len - (dst - dec->fmt[i].p); + dec->fmt[i].p_len = dst - dec->fmt[i].p; + } + rec->unpacked |= BCF_UN_FMT; + + rec->n_sample = bcf_hdr_nsamples(hdr); + return 0; +} + +int bcf_read(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v) +{ + if (fp->format.format == vcf) return vcf_read(fp,h,v); + int ret = bcf_read1_core(fp->fp.bgzf, v); + if (ret == 0) ret = bcf_record_check(h, v); + if ( ret!=0 || !h->keep_samples ) return ret; + return bcf_subset_format(h,v); +} + +int bcf_readrec(BGZF *fp, void *null, void *vv, int *tid, hts_pos_t *beg, hts_pos_t *end) +{ + bcf1_t *v = (bcf1_t *) vv; + int ret = bcf_read1_core(fp, v); + if (ret == 0) ret = bcf_record_check(NULL, v); + if (ret >= 0) + *tid = v->rid, *beg = v->pos, *end = v->pos + v->rlen; + return ret; +} + +static inline int bcf1_sync_id(bcf1_t *line, kstring_t *str) +{ + // single typed string + if ( line->d.id && strcmp(line->d.id, ".") ) { + return bcf_enc_vchar(str, strlen(line->d.id), line->d.id); + } else { + return bcf_enc_size(str, 0, BCF_BT_CHAR); + } +} +static inline int bcf1_sync_alleles(bcf1_t *line, kstring_t *str) +{ + // list of typed strings + int i; + for (i=0; in_allele; i++) { + if (bcf_enc_vchar(str, strlen(line->d.allele[i]), line->d.allele[i]) < 0) + return -1; + } + if ( !line->rlen && line->n_allele ) line->rlen = strlen(line->d.allele[0]); + return 0; +} +static inline int bcf1_sync_filter(bcf1_t *line, kstring_t *str) +{ + // typed vector of integers + if ( line->d.n_flt ) { + return bcf_enc_vint(str, line->d.n_flt, line->d.flt, -1); + } else { + return bcf_enc_vint(str, 0, 0, -1); + } +} + +static inline int bcf1_sync_info(bcf1_t *line, kstring_t *str) +{ + // pairs of typed vectors + int i, irm = -1, e = 0; + for (i=0; in_info; i++) + { + bcf_info_t *info = &line->d.info[i]; + if ( !info->vptr ) + { + // marked for removal + if ( irm < 0 ) irm = i; + continue; + } + e |= kputsn_(info->vptr - info->vptr_off, info->vptr_len + info->vptr_off, str) < 0; + if ( irm >=0 ) + { + bcf_info_t tmp = line->d.info[irm]; line->d.info[irm] = line->d.info[i]; line->d.info[i] = tmp; + while ( irm<=i && line->d.info[irm].vptr ) irm++; + } + } + if ( irm>=0 ) line->n_info = irm; + return e == 0 ? 0 : -1; +} + +static int bcf1_sync(bcf1_t *line) +{ + char *shared_ori = line->shared.s; + size_t prev_len; + + kstring_t tmp = {0,0,0}; + if ( !line->shared.l ) + { + // New line created via API, BCF data blocks do not exist. Get it ready for BCF output + tmp = line->shared; + bcf1_sync_id(line, &tmp); + line->unpack_size[0] = tmp.l; prev_len = tmp.l; + + bcf1_sync_alleles(line, &tmp); + line->unpack_size[1] = tmp.l - prev_len; prev_len = tmp.l; + + bcf1_sync_filter(line, &tmp); + line->unpack_size[2] = tmp.l - prev_len; + + bcf1_sync_info(line, &tmp); + line->shared = tmp; + } + else if ( line->d.shared_dirty ) + { + // The line was edited, update the BCF data block. + + if ( !(line->unpacked & BCF_UN_STR) ) bcf_unpack(line,BCF_UN_STR); + + // ptr_ori points to the original unchanged BCF data. + uint8_t *ptr_ori = (uint8_t *) line->shared.s; + + // ID: single typed string + if ( line->d.shared_dirty & BCF1_DIRTY_ID ) + bcf1_sync_id(line, &tmp); + else + kputsn_(ptr_ori, line->unpack_size[0], &tmp); + ptr_ori += line->unpack_size[0]; + line->unpack_size[0] = tmp.l; prev_len = tmp.l; + + // REF+ALT: list of typed strings + if ( line->d.shared_dirty & BCF1_DIRTY_ALS ) + bcf1_sync_alleles(line, &tmp); + else + { + kputsn_(ptr_ori, line->unpack_size[1], &tmp); + if ( !line->rlen && line->n_allele ) line->rlen = strlen(line->d.allele[0]); + } + ptr_ori += line->unpack_size[1]; + line->unpack_size[1] = tmp.l - prev_len; prev_len = tmp.l; + + if ( line->unpacked & BCF_UN_FLT ) + { + // FILTER: typed vector of integers + if ( line->d.shared_dirty & BCF1_DIRTY_FLT ) + bcf1_sync_filter(line, &tmp); + else if ( line->d.n_flt ) + kputsn_(ptr_ori, line->unpack_size[2], &tmp); + else + bcf_enc_vint(&tmp, 0, 0, -1); + ptr_ori += line->unpack_size[2]; + line->unpack_size[2] = tmp.l - prev_len; + + if ( line->unpacked & BCF_UN_INFO ) + { + // INFO: pairs of typed vectors + if ( line->d.shared_dirty & BCF1_DIRTY_INF ) + { + bcf1_sync_info(line, &tmp); + ptr_ori = (uint8_t*)line->shared.s + line->shared.l; + } + } + } + + int size = line->shared.l - (size_t)ptr_ori + (size_t)line->shared.s; + if ( size ) kputsn_(ptr_ori, size, &tmp); + + free(line->shared.s); + line->shared = tmp; + } + if ( line->shared.s != shared_ori && line->unpacked & BCF_UN_INFO ) + { + // Reallocated line->shared.s block invalidated line->d.info[].vptr pointers + size_t off_new = line->unpack_size[0] + line->unpack_size[1] + line->unpack_size[2]; + int i; + for (i=0; in_info; i++) + { + uint8_t *vptr_free = line->d.info[i].vptr_free ? line->d.info[i].vptr - line->d.info[i].vptr_off : NULL; + line->d.info[i].vptr = (uint8_t*) line->shared.s + off_new + line->d.info[i].vptr_off; + off_new += line->d.info[i].vptr_len + line->d.info[i].vptr_off; + if ( vptr_free ) + { + free(vptr_free); + line->d.info[i].vptr_free = 0; + } + } + } + + if ( line->n_sample && line->n_fmt && (!line->indiv.l || line->d.indiv_dirty) ) + { + // The genotype fields changed or are not present + tmp.l = tmp.m = 0; tmp.s = NULL; + int i, irm = -1; + for (i=0; in_fmt; i++) + { + bcf_fmt_t *fmt = &line->d.fmt[i]; + if ( !fmt->p ) + { + // marked for removal + if ( irm < 0 ) irm = i; + continue; + } + kputsn_(fmt->p - fmt->p_off, fmt->p_len + fmt->p_off, &tmp); + if ( irm >=0 ) + { + bcf_fmt_t tfmt = line->d.fmt[irm]; line->d.fmt[irm] = line->d.fmt[i]; line->d.fmt[i] = tfmt; + while ( irm<=i && line->d.fmt[irm].p ) irm++; + } + + } + if ( irm>=0 ) line->n_fmt = irm; + free(line->indiv.s); + line->indiv = tmp; + + // Reallocated line->indiv.s block invalidated line->d.fmt[].p pointers + size_t off_new = 0; + for (i=0; in_fmt; i++) + { + uint8_t *p_free = line->d.fmt[i].p_free ? line->d.fmt[i].p - line->d.fmt[i].p_off : NULL; + line->d.fmt[i].p = (uint8_t*) line->indiv.s + off_new + line->d.fmt[i].p_off; + off_new += line->d.fmt[i].p_len + line->d.fmt[i].p_off; + if ( p_free ) + { + free(p_free); + line->d.fmt[i].p_free = 0; + } + } + } + if ( !line->n_sample ) line->n_fmt = 0; + line->d.shared_dirty = line->d.indiv_dirty = 0; + return 0; +} + +bcf1_t *bcf_copy(bcf1_t *dst, bcf1_t *src) +{ + bcf1_sync(src); + + bcf_clear(dst); + dst->rid = src->rid; + dst->pos = src->pos; + dst->rlen = src->rlen; + dst->qual = src->qual; + dst->n_info = src->n_info; dst->n_allele = src->n_allele; + dst->n_fmt = src->n_fmt; dst->n_sample = src->n_sample; + + if ( dst->shared.m < src->shared.l ) + { + dst->shared.s = (char*) realloc(dst->shared.s, src->shared.l); + dst->shared.m = src->shared.l; + } + dst->shared.l = src->shared.l; + memcpy(dst->shared.s,src->shared.s,dst->shared.l); + + if ( dst->indiv.m < src->indiv.l ) + { + dst->indiv.s = (char*) realloc(dst->indiv.s, src->indiv.l); + dst->indiv.m = src->indiv.l; + } + dst->indiv.l = src->indiv.l; + memcpy(dst->indiv.s,src->indiv.s,dst->indiv.l); + + return dst; +} +bcf1_t *bcf_dup(bcf1_t *src) +{ + bcf1_t *out = bcf_init1(); + return bcf_copy(out, src); +} + +int bcf_write(htsFile *hfp, bcf_hdr_t *h, bcf1_t *v) +{ + if ( h->dirty ) { + if (bcf_hdr_sync(h) < 0) return -1; + } + if ( bcf_hdr_nsamples(h)!=v->n_sample ) + { + hts_log_error("Broken VCF record, the number of columns at %s:%"PRIhts_pos" does not match the number of samples (%d vs %d)", + bcf_seqname_safe(h,v), v->pos+1, v->n_sample, bcf_hdr_nsamples(h)); + return -1; + } + + if ( hfp->format.format == vcf || hfp->format.format == text_format ) + return vcf_write(hfp,h,v); + + if ( v->errcode & ~BCF_ERR_LIMITS ) // todo: unsure about the other BCF_ERR_LIMITS branches in vcf_parse_format_alloc4() + { + // vcf_parse1() encountered a new contig or tag, undeclared in the + // header. At this point, the header must have been printed, + // proceeding would lead to a broken BCF file. Errors must be checked + // and cleared by the caller before we can proceed. + char errdescription[1024] = ""; + hts_log_error("Unchecked error (%d %s) at %s:%"PRIhts_pos, v->errcode, bcf_strerror(v->errcode, errdescription, sizeof(errdescription)), bcf_seqname_safe(h,v), v->pos+1); + return -1; + } + bcf1_sync(v); // check if the BCF record was modified + + if ( v->unpacked & BCF_IS_64BIT ) + { + hts_log_error("Data at %s:%"PRIhts_pos" contains 64-bit values not representable in BCF. Please use VCF instead", bcf_seqname_safe(h,v), v->pos+1); + return -1; + } + + BGZF *fp = hfp->fp.bgzf; + uint8_t x[32]; + u32_to_le(v->shared.l + 24, x); // to include six 32-bit integers + u32_to_le(v->indiv.l, x + 4); + i32_to_le(v->rid, x + 8); + u32_to_le(v->pos, x + 12); + u32_to_le(v->rlen, x + 16); + float_to_le(v->qual, x + 20); + u16_to_le(v->n_info, x + 24); + u16_to_le(v->n_allele, x + 26); + u32_to_le((uint32_t)v->n_fmt<<24 | (v->n_sample & 0xffffff), x + 28); + if ( bgzf_write(fp, x, 32) != 32 ) return -1; + if ( bgzf_write(fp, v->shared.s, v->shared.l) != v->shared.l ) return -1; + if ( bgzf_write(fp, v->indiv.s, v->indiv.l) != v->indiv.l ) return -1; + + if (hfp->idx) { + if (bgzf_idx_push(fp, hfp->idx, v->rid, v->pos, v->pos + v->rlen, + bgzf_tell(fp), 1) < 0) + return -1; + } + + return 0; +} + +/********************** + *** VCF header I/O *** + **********************/ + +static int add_missing_contig_hrec(bcf_hdr_t *h, const char *name) { + bcf_hrec_t *hrec = calloc(1, sizeof(bcf_hrec_t)); + int save_errno; + if (!hrec) goto fail; + + hrec->key = strdup("contig"); + if (!hrec->key) goto fail; + + if (bcf_hrec_add_key(hrec, "ID", strlen("ID")) < 0) goto fail; + if (bcf_hrec_set_val(hrec, hrec->nkeys-1, name, strlen(name), 0) < 0) + goto fail; + if (bcf_hdr_add_hrec(h, hrec) < 0) + goto fail; + return 0; + + fail: + save_errno = errno; + hts_log_error("%s", strerror(errno)); + if (hrec) bcf_hrec_destroy(hrec); + errno = save_errno; + return -1; +} + +bcf_hdr_t *vcf_hdr_read(htsFile *fp) +{ + kstring_t txt, *s = &fp->line; + int ret; + bcf_hdr_t *h; + tbx_t *idx = NULL; + const char **names = NULL; + h = bcf_hdr_init("r"); + if (!h) { + hts_log_error("Failed to allocate bcf header"); + return NULL; + } + txt.l = txt.m = 0; txt.s = 0; + while ((ret = hts_getline(fp, KS_SEP_LINE, s)) >= 0) { + int e = 0; + if (s->l == 0) continue; + if (s->s[0] != '#') { + hts_log_error("No sample line"); + goto error; + } + if (s->s[1] != '#' && fp->fn_aux) { // insert contigs here + kstring_t tmp = { 0, 0, NULL }; + hFILE *f = hopen(fp->fn_aux, "r"); + if (f == NULL) { + hts_log_error("Couldn't open \"%s\"", fp->fn_aux); + goto error; + } + while (tmp.l = 0, kgetline(&tmp, (kgets_func *) hgets, f) >= 0) { + char *tab = strchr(tmp.s, '\t'); + if (tab == NULL) continue; + e |= (kputs("##contig=\n", 2, &txt) < 0); + } + free(tmp.s); + if (hclose(f) != 0) { + hts_log_error("Error on closing %s", fp->fn_aux); + goto error; + } + if (e) goto error; + } + if (kputsn(s->s, s->l, &txt) < 0) goto error; + if (kputc('\n', &txt) < 0) goto error; + if (s->s[1] != '#') break; + } + if ( ret < -1 ) goto error; + if ( !txt.s ) + { + hts_log_error("Could not read the header"); + goto error; + } + if ( bcf_hdr_parse(h, txt.s) < 0 ) goto error; + + // check tabix index, are all contigs listed in the header? add the missing ones + idx = tbx_index_load3(fp->fn, NULL, HTS_IDX_SILENT_FAIL); + if ( idx ) + { + int i, n, need_sync = 0; + names = tbx_seqnames(idx, &n); + if (!names) goto error; + for (i=0; ivalue ) + { + int j, nout = 0; + e |= ksprintf(str, "##%s=<", hrec->key) < 0; + for (j=0; jnkeys; j++) + { + // do not output IDX if output is VCF + if ( !is_bcf && !strcmp("IDX",hrec->keys[j]) ) continue; + if ( nout ) e |= kputc(',',str) < 0; + e |= ksprintf(str,"%s=%s", hrec->keys[j], hrec->vals[j]) < 0; + nout++; + } + e |= ksprintf(str,">\n") < 0; + } + else + e |= ksprintf(str,"##%s=%s\n", hrec->key,hrec->value) < 0; + + return e == 0 ? 0 : -1; +} + +int bcf_hrec_format(const bcf_hrec_t *hrec, kstring_t *str) +{ + return _bcf_hrec_format(hrec,0,str); +} + +int bcf_hdr_format(const bcf_hdr_t *hdr, int is_bcf, kstring_t *str) +{ + int i, r = 0; + for (i=0; inhrec; i++) + r |= _bcf_hrec_format(hdr->hrec[i], is_bcf, str) < 0; + + r |= ksprintf(str, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO") < 0; + if ( bcf_hdr_nsamples(hdr) ) + { + r |= ksprintf(str, "\tFORMAT") < 0; + for (i=0; isamples[i]) < 0; + } + r |= ksprintf(str, "\n") < 0; + + return r ? -1 : 0; +} + +char *bcf_hdr_fmt_text(const bcf_hdr_t *hdr, int is_bcf, int *len) +{ + kstring_t txt = {0,0,0}; + if (bcf_hdr_format(hdr, is_bcf, &txt) < 0) + return NULL; + if ( len ) *len = txt.l; + return txt.s; +} + +const char **bcf_hdr_seqnames(const bcf_hdr_t *h, int *n) +{ + vdict_t *d = (vdict_t*)h->dict[BCF_DT_CTG]; + int i, tid, m = kh_size(d); + const char **names = (const char**) calloc(m,sizeof(const char*)); + if ( !names ) + { + hts_log_error("Failed to allocate memory"); + *n = 0; + return NULL; + } + khint_t k; + for (k=kh_begin(d); k= m ) + { + // This can happen after a contig has been removed from BCF header via bcf_hdr_remove() + if ( hts_resize(const char*, tid + 1, &m, &names, HTS_RESIZE_CLEAR)<0 ) + { + hts_log_error("Failed to allocate memory"); + *n = 0; + free(names); + return NULL; + } + m = tid + 1; + } + names[tid] = kh_key(d,k); + } + // ensure there are no gaps + for (i=0,tid=0; tidformat.compression!=no_compression ) { + ret = bgzf_write(fp->fp.bgzf, htxt.s, htxt.l); + if (bgzf_flush(fp->fp.bgzf) != 0) return -1; + } else { + ret = hwrite(fp->fp.hfile, htxt.s, htxt.l); + } + free(htxt.s); + return ret<0 ? -1 : 0; +} + +/*********************** + *** Typed value I/O *** + ***********************/ + +int bcf_enc_vint(kstring_t *s, int n, int32_t *a, int wsize) +{ + int32_t max = INT32_MIN, min = INT32_MAX; + int i; + if (n <= 0) { + return bcf_enc_size(s, 0, BCF_BT_NULL); + } else if (n == 1) { + return bcf_enc_int1(s, a[0]); + } else { + if (wsize <= 0) wsize = n; + + // Equivalent to: + // for (i = 0; i < n; ++i) { + // if (a[i] == bcf_int32_missing || a[i] == bcf_int32_vector_end ) + // continue; + // if (max < a[i]) max = a[i]; + // if (min > a[i]) min = a[i]; + // } + int max4[4] = {INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN}; + int min4[4] = {INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX}; + for (i = 0; i < (n&~3); i+=4) { + // bcf_int32_missing == INT32_MIN and + // bcf_int32_vector_end == INT32_MIN+1. + // We skip these, but can mostly avoid explicit checking + if (max4[0] < a[i+0]) max4[0] = a[i+0]; + if (max4[1] < a[i+1]) max4[1] = a[i+1]; + if (max4[2] < a[i+2]) max4[2] = a[i+2]; + if (max4[3] < a[i+3]) max4[3] = a[i+3]; + if (min4[0] > a[i+0] && a[i+0] > INT32_MIN+1) min4[0] = a[i+0]; + if (min4[1] > a[i+1] && a[i+1] > INT32_MIN+1) min4[1] = a[i+1]; + if (min4[2] > a[i+2] && a[i+2] > INT32_MIN+1) min4[2] = a[i+2]; + if (min4[3] > a[i+3] && a[i+3] > INT32_MIN+1) min4[3] = a[i+3]; + } + min = min4[0]; + if (min > min4[1]) min = min4[1]; + if (min > min4[2]) min = min4[2]; + if (min > min4[3]) min = min4[3]; + max = max4[0]; + if (max < max4[1]) max = max4[1]; + if (max < max4[2]) max = max4[2]; + if (max < max4[3]) max = max4[3]; + for (; i < n; ++i) { + if (max < a[i]) max = a[i]; + if (min > a[i] && a[i] > INT32_MIN+1) min = a[i]; + } + + if (max <= BCF_MAX_BT_INT8 && min >= BCF_MIN_BT_INT8) { + if (bcf_enc_size(s, wsize, BCF_BT_INT8) < 0 || + ks_resize(s, s->l + n) < 0) + return -1; + uint8_t *p = (uint8_t *) s->s + s->l; + for (i = 0; i < n; ++i, p++) { + if ( a[i]==bcf_int32_vector_end ) *p = bcf_int8_vector_end; + else if ( a[i]==bcf_int32_missing ) *p = bcf_int8_missing; + else *p = a[i]; + } + s->l += n; + } else if (max <= BCF_MAX_BT_INT16 && min >= BCF_MIN_BT_INT16) { + uint8_t *p; + if (bcf_enc_size(s, wsize, BCF_BT_INT16) < 0 || + ks_resize(s, s->l + n * sizeof(int16_t)) < 0) + return -1; + p = (uint8_t *) s->s + s->l; + for (i = 0; i < n; ++i) + { + int16_t x; + if ( a[i]==bcf_int32_vector_end ) x = bcf_int16_vector_end; + else if ( a[i]==bcf_int32_missing ) x = bcf_int16_missing; + else x = a[i]; + i16_to_le(x, p); + p += sizeof(int16_t); + } + s->l += n * sizeof(int16_t); + } else { + uint8_t *p; + if (bcf_enc_size(s, wsize, BCF_BT_INT32) < 0 || + ks_resize(s, s->l + n * sizeof(int32_t)) < 0) + return -1; + p = (uint8_t *) s->s + s->l; + for (i = 0; i < n; ++i) { + i32_to_le(a[i], p); + p += sizeof(int32_t); + } + s->l += n * sizeof(int32_t); + } + } + + return 0; +} + +#ifdef VCF_ALLOW_INT64 +static int bcf_enc_long1(kstring_t *s, int64_t x) { + uint32_t e = 0; + if (x <= BCF_MAX_BT_INT32 && x >= BCF_MIN_BT_INT32) + return bcf_enc_int1(s, x); + if (x == bcf_int64_vector_end) { + e |= bcf_enc_size(s, 1, BCF_BT_INT8); + e |= kputc(bcf_int8_vector_end, s) < 0; + } else if (x == bcf_int64_missing) { + e |= bcf_enc_size(s, 1, BCF_BT_INT8); + e |= kputc(bcf_int8_missing, s) < 0; + } else { + e |= bcf_enc_size(s, 1, BCF_BT_INT64); + e |= ks_expand(s, 8); + if (e == 0) { u64_to_le(x, (uint8_t *) s->s + s->l); s->l += 8; } + } + return e == 0 ? 0 : -1; +} +#endif + +static inline int serialize_float_array(kstring_t *s, size_t n, const float *a) { + uint8_t *p; + size_t i; + size_t bytes = n * sizeof(float); + + if (bytes / sizeof(float) != n) return -1; + if (ks_resize(s, s->l + bytes) < 0) return -1; + + p = (uint8_t *) s->s + s->l; + for (i = 0; i < n; i++) { + float_to_le(a[i], p); + p += sizeof(float); + } + s->l += bytes; + + return 0; +} + +int bcf_enc_vfloat(kstring_t *s, int n, float *a) +{ + assert(n >= 0); + bcf_enc_size(s, n, BCF_BT_FLOAT); + serialize_float_array(s, n, a); + return 0; // FIXME: check for errs in this function +} + +int bcf_enc_vchar(kstring_t *s, int l, const char *a) +{ + bcf_enc_size(s, l, BCF_BT_CHAR); + kputsn(a, l, s); + return 0; // FIXME: check for errs in this function +} + +// Special case of n==1 as it also occurs quite often in FORMAT data. +// This version is also small enough to get inlined. +static inline int bcf_fmt_array1(kstring_t *s, int type, void *data) { + uint32_t e = 0; + uint8_t *p = (uint8_t *)data; + int32_t v; + + // helps gcc more than clang here. In billions of cycles: + // bcf_fmt_array1 bcf_fmt_array + // gcc7: 23.2 24.3 + // gcc13: 21.6 23.0 + // clang13: 27.1 27.8 + switch (type) { + case BCF_BT_CHAR: + e |= kputc_(*p == bcf_str_missing ? '.' : *p, s) < 0; + break; + + case BCF_BT_INT8: + if (*(int8_t *)p != bcf_int8_vector_end) { + e |= ((*(int8_t *)p == bcf_int8_missing) + ? kputc_('.', s) + : kputw(*(int8_t *)p, s)) < 0; + } + break; + case BCF_BT_INT16: + v = le_to_i16(p); + if (v != bcf_int16_vector_end) { + e |= (v == bcf_int16_missing + ? kputc_('.', s) + : kputw(v, s)) < 0; + } + break; + + case BCF_BT_INT32: + v = le_to_i32(p); + if (v != bcf_int32_vector_end) { + e |= (v == bcf_int32_missing + ? kputc_('.', s) + : kputw(v, s)) < 0; + } + break; + + case BCF_BT_FLOAT: + v = le_to_u32(p); + if (v != bcf_float_vector_end) { + e |= (v == bcf_float_missing + ? kputc_('.', s) + : kputd(le_to_float(p), s)) < 0; + } + break; + + default: + hts_log_error("Unexpected type %d", type); + return -1; + } + + return e == 0 ? 0 : -1; +} + +int bcf_fmt_array(kstring_t *s, int n, int type, void *data) +{ + int j = 0; + uint32_t e = 0; + if (n == 0) { + return kputc_('.', s) >= 0 ? 0 : -1; + } + + if (type == BCF_BT_CHAR) + { + char *p = (char *)data; + + // Note bcf_str_missing is already accounted for in n==0 above. + if (n >= 8) { + char *p_end = memchr(p, 0, n); + e |= kputsn(p, p_end ? p_end-p : n, s) < 0; + } else { + for (j = 0; j < n && *p; ++j, ++p) + e |= kputc(*p, s) < 0; + } + } + else + { + #define BRANCH(type_t, convert, is_missing, is_vector_end, kprint) { \ + uint8_t *p = (uint8_t *) data; \ + for (j=0; jid[BCF_DT_ID][key] vdict + int max_m; // number of elements in field array (ie commas) + int size; // field size (max_l or max_g*4 if is_gt) + int offset; // offset of buf into h->mem + uint32_t is_gt:1, // is genotype + max_g:31; // maximum number of genotypes + uint32_t max_l; // length of field + uint32_t y; // h->id[0][fmt[j].key].val->info[BCF_HL_FMT] + uint8_t *buf; // Pointer into h->mem +} fmt_aux_t; + +// fmt_aux_t field notes: +// max_* are biggest sizes of the various FORMAT fields across all samples. +// We use these after pivoting the data to ensure easy random access +// of a specific sample. +// +// max_m is only used for type BCF_HT_REAL or BCF_HT_INT +// max_g is only used for is_gt == 1 (will be BCF_HT_STR) +// max_l is only used for is_gt == 0 (will be BCF_HT_STR) +// +// These are computed in vcf_parse_format_max3 and used in +// vcf_parse_format_alloc4 to get the size. +// +// size is computed from max_g, max_l, max_m and is_gt. Once computed +// the max values are never accessed again. +// +// In theory all 4 vars could be coalesced into a single variable, but this +// significantly harms speed (even if done via a union). It's about 25-30% +// slower. + +static inline int align_mem(kstring_t *s) +{ + int e = 0; + if (s->l&7) { + uint64_t zero = 0; + e = kputsn((char*)&zero, 8 - (s->l&7), s) < 0; + } + return e == 0 ? 0 : -1; +} + +#define MAX_N_FMT 255 /* Limited by size of bcf1_t n_fmt field */ + +// detect FORMAT "." +static int vcf_parse_format_empty1(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v, + const char *p, const char *q) { + const char *end = s->s + s->l; + if ( q>=end ) + { + hts_log_error("FORMAT column with no sample columns starting at %s:%"PRIhts_pos"", bcf_seqname_safe(h,v), v->pos+1); + v->errcode |= BCF_ERR_NCOLS; + return -1; + } + + v->n_fmt = 0; + if ( p[0]=='.' && p[1]==0 ) // FORMAT field is empty "." + { + v->n_sample = bcf_hdr_nsamples(h); + return 1; + } + + return 0; +} + +// get format information from the dictionary +static int vcf_parse_format_dict2(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v, + const char *p, const char *q, fmt_aux_t *fmt) { + const vdict_t *d = (vdict_t*)h->dict[BCF_DT_ID]; + char *t; + int j; + ks_tokaux_t aux1; + + for (j = 0, t = kstrtok(p, ":", &aux1); t; t = kstrtok(0, 0, &aux1), ++j) { + if (j >= MAX_N_FMT) { + v->errcode |= BCF_ERR_LIMITS; + hts_log_error("FORMAT column at %s:%"PRIhts_pos" lists more identifiers than htslib can handle", + bcf_seqname_safe(h,v), v->pos+1); + return -1; + } + + *(char*)aux1.p = 0; + khint_t k = kh_get(vdict, d, t); + if (k == kh_end(d) || kh_val(d, k).info[BCF_HL_FMT] == 15) { + if ( t[0]=='.' && t[1]==0 ) + { + hts_log_error("Invalid FORMAT tag name '.' at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); + v->errcode |= BCF_ERR_TAG_INVALID; + return -1; + } + hts_log_warning("FORMAT '%s' at %s:%"PRIhts_pos" is not defined in the header, assuming Type=String", t, bcf_seqname_safe(h,v), v->pos+1); + kstring_t tmp = {0,0,0}; + int l; + ksprintf(&tmp, "##FORMAT=", t); + bcf_hrec_t *hrec = bcf_hdr_parse_line(h,tmp.s,&l); + free(tmp.s); + int res = hrec ? bcf_hdr_add_hrec((bcf_hdr_t*)h, hrec) : -1; + if (res < 0) bcf_hrec_destroy(hrec); + if (res > 0) res = bcf_hdr_sync((bcf_hdr_t*)h); + + k = kh_get(vdict, d, t); + v->errcode |= BCF_ERR_TAG_UNDEF; + if (res || k == kh_end(d)) { + hts_log_error("Could not add dummy header for FORMAT '%s' at %s:%"PRIhts_pos, t, bcf_seqname_safe(h,v), v->pos+1); + v->errcode |= BCF_ERR_TAG_INVALID; + return -1; + } + } + fmt[j].max_l = fmt[j].max_m = fmt[j].max_g = 0; + fmt[j].key = kh_val(d, k).id; + fmt[j].is_gt = (t[0] == 'G' && t[1] == 'T' && !t[2]); + fmt[j].y = h->id[0][fmt[j].key].val->info[BCF_HL_FMT]; + v->n_fmt++; + } + return 0; +} + +// compute max +static int vcf_parse_format_max3(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v, + char *p, char *q, fmt_aux_t *fmt) { + int n_sample_ori = -1; + char *r = q + 1; // r: position in the format string + int l = 0, m = 1, g = 1, j; + v->n_sample = 0; // m: max vector size, l: max field len, g: max number of alleles + const char *end = s->s + s->l; + + while ( rkeep_samples ) + { + n_sample_ori++; + if ( !bit_array_test(h->keep_samples,n_sample_ori) ) + { + while ( *r!='\t' && ris_gt) g++; + break; + + case '\t': + *r = 0; // fall through + + default: // valid due to while loop above. + case '\0': + case ':': + l = r - r_start; r_start = r; + if (f->max_m < m) f->max_m = m; + if (f->max_l < l) f->max_l = l; + if (f->is_gt && f->max_g < g) f->max_g = g; + l = 0, m = g = 1; + if ( *r==':' ) { + j++; f++; + if ( j>=v->n_fmt ) { + hts_log_error("Incorrect number of FORMAT fields at %s:%"PRIhts_pos"", + h->id[BCF_DT_CTG][v->rid].key, v->pos+1); + v->errcode |= BCF_ERR_NCOLS; + return -1; + } + } else goto end_for; + break; + } + if ( r>=end ) break; + r++; + } + end_for: + v->n_sample++; + if ( v->n_sample == bcf_hdr_nsamples(h) ) break; + r++; + } + + return 0; +} + +// allocate memory for arrays +static int vcf_parse_format_alloc4(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v, + const char *p, const char *q, + fmt_aux_t *fmt) { + kstring_t *mem = (kstring_t*)&h->mem; + + int j; + for (j = 0; j < v->n_fmt; ++j) { + fmt_aux_t *f = &fmt[j]; + if ( !f->max_m ) f->max_m = 1; // omitted trailing format field + + if ((f->y>>4&0xf) == BCF_HT_STR) { + f->size = f->is_gt? f->max_g << 2 : f->max_l; + } else if ((f->y>>4&0xf) == BCF_HT_REAL || (f->y>>4&0xf) == BCF_HT_INT) { + f->size = f->max_m << 2; + } else { + hts_log_error("The format type %d at %s:%"PRIhts_pos" is currently not supported", f->y>>4&0xf, bcf_seqname_safe(h,v), v->pos+1); + v->errcode |= BCF_ERR_TAG_INVALID; + return -1; + } + + if (align_mem(mem) < 0) { + hts_log_error("Memory allocation failure at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); + v->errcode |= BCF_ERR_LIMITS; + return -1; + } + + // Limit the total memory to ~2Gb per VCF row. This should mean + // malformed VCF data is less likely to take excessive memory and/or + // time. + if ((uint64_t) mem->l + v->n_sample * (uint64_t)f->size > INT_MAX) { + static int warned = 0; + if ( !warned ) hts_log_warning("Excessive memory required by FORMAT fields at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); + warned = 1; + v->errcode |= BCF_ERR_LIMITS; + f->size = -1; + f->offset = 0; + continue; + } + + f->offset = mem->l; + if (ks_resize(mem, mem->l + v->n_sample * (size_t)f->size) < 0) { + hts_log_error("Memory allocation failure at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); + v->errcode |= BCF_ERR_LIMITS; + return -1; + } + mem->l += v->n_sample * f->size; + } + + { + int j; + for (j = 0; j < v->n_fmt; ++j) + fmt[j].buf = (uint8_t*)mem->s + fmt[j].offset; + } + + // check for duplicate tags + int i; + for (i=1; in_fmt; i++) + { + fmt_aux_t *ifmt = &fmt[i]; + if ( ifmt->size==-1 ) continue; // already marked for removal + for (j=0; jsize==-1 ) continue; // already marked for removal + if ( ifmt->key!=jfmt->key ) continue; + static int warned = 0; + if ( !warned ) hts_log_warning("Duplicate FORMAT tag %s at %s:%"PRIhts_pos, bcf_hdr_int2id(h,BCF_DT_ID,ifmt->key), bcf_seqname_safe(h,v), v->pos+1); + warned = 1; + v->errcode |= BCF_ERR_TAG_INVALID; + ifmt->size = -1; + ifmt->offset = 0; + break; + } + } + return 0; +} + +// Fill the sample fields +static int vcf_parse_format_fill5(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v, + const char *p, const char *q, fmt_aux_t *fmt) { + static int extreme_val_warned = 0; + int n_sample_ori = -1; + // At beginning of the loop t points to the first char of a format + const char *t = q + 1; + int m = 0; // m: sample id + const int nsamples = bcf_hdr_nsamples(h); + + const char *end = s->s + s->l; + while ( tkeep_samples ) + { + n_sample_ori++; + if ( !bit_array_test(h->keep_samples,n_sample_ori) ) + { + while ( *t && ty>>4&0xf; + if (!z->buf) { + hts_log_error("Memory allocation failure for FORMAT field type %d at %s:%"PRIhts_pos, + z->y>>4&0xf, bcf_seqname_safe(h,v), v->pos+1); + v->errcode |= BCF_ERR_LIMITS; + return -1; + } + + if ( z->size==-1 ) + { + // this field is to be ignored, it's either too big or a duplicate + while ( *t != ':' && *t ) t++; + } + else if (htype == BCF_HT_STR) { + int l; + if (z->is_gt) { + // Genotypes. + // ([|/])+... where is [0-9]+ or ".". + int32_t is_phased = 0; + uint32_t *x = (uint32_t*)(z->buf + z->size * (size_t)m); + uint32_t unreadable = 0; + uint32_t max = 0; + int overflow = 0; + for (l = 0;; ++t) { + if (*t == '.') { + ++t, x[l++] = is_phased; + } else { + const char *tt = t; + uint32_t val; + // Or "v->n_allele < 10", but it doesn't + // seem to be any faster and this feels safer. + if (*t >= '0' && *t <= '9' && + !(t[1] >= '0' && t[1] <= '9')) { + val = *t++ - '0'; + } else { + val = hts_str2uint(t, (char **)&t, + sizeof(val) * CHAR_MAX - 2, + &overflow); + unreadable |= tt == t; + } + if (max < val) max = val; + x[l++] = (val + 1) << 1 | is_phased; + } + is_phased = (*t == '|'); + if (*t != '|' && *t != '/') break; + } + // Possibly check max against v->n_allele instead? + if (overflow || max > (INT32_MAX >> 1) - 1) { + hts_log_error("Couldn't read GT data: value too large at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); + return -1; + } + if (unreadable) { + hts_log_error("Couldn't read GT data: value not a number or '.' at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); + return -1; + } + if ( !l ) x[l++] = 0; // An empty field, insert missing value + for (; l < z->size>>2; ++l) + x[l] = bcf_int32_vector_end; + + } else { + // Otherwise arbitrary strings + char *x = (char*)z->buf + z->size * (size_t)m; + for (l = 0; *t != ':' && *t; ++t) + x[l++] = *t; + if (z->size > l) + memset(&x[l], 0, (z->size-l) * sizeof(*x)); + } + + } else if (htype == BCF_HT_INT) { + // One or more integers in an array + int32_t *x = (int32_t*)(z->buf + z->size * (size_t)m); + int l; + for (l = 0;; ++t) { + if (*t == '.') { + x[l++] = bcf_int32_missing, ++t; // ++t to skip "." + } else { + int overflow = 0; + char *te; + long int tmp_val = hts_str2int(t, &te, sizeof(tmp_val)*CHAR_BIT, &overflow); + if ( te==t || overflow || tmp_valBCF_MAX_BT_INT32 ) + { + if ( !extreme_val_warned ) + { + hts_log_warning("Extreme FORMAT/%s value encountered and set to missing at %s:%"PRIhts_pos, + h->id[BCF_DT_ID][fmt[j-1].key].key, bcf_seqname_safe(h,v), v->pos+1); + extreme_val_warned = 1; + } + tmp_val = bcf_int32_missing; + } + x[l++] = tmp_val; + t = te; + } + if (*t != ',') break; + } + if ( !l ) + x[l++] = bcf_int32_missing; + for (; l < z->size>>2; ++l) + x[l] = bcf_int32_vector_end; + + } else if (htype == BCF_HT_REAL) { + // One of more floating point values in an array + float *x = (float*)(z->buf + z->size * (size_t)m); + int l; + for (l = 0;; ++t) { + if (*t == '.' && !isdigit_c(t[1])) { + bcf_float_set_missing(x[l++]), ++t; // ++t to skip "." + } else { + int overflow = 0; + char *te; + float tmp_val = hts_str2dbl(t, &te, &overflow); + if ( (te==t || overflow) && !extreme_val_warned ) + { + hts_log_warning("Extreme FORMAT/%s value encountered at %s:%"PRIhts_pos, h->id[BCF_DT_ID][fmt[j-1].key].key, bcf_seqname(h,v), v->pos+1); + extreme_val_warned = 1; + } + x[l++] = tmp_val; + t = te; + } + if (*t != ',') break; + } + if ( !l ) + // An empty field, insert missing value + bcf_float_set_missing(x[l++]); + for (; l < z->size>>2; ++l) + bcf_float_set_vector_end(x[l]); + } else { + hts_log_error("Unknown FORMAT field type %d at %s:%"PRIhts_pos, htype, bcf_seqname_safe(h,v), v->pos+1); + v->errcode |= BCF_ERR_TAG_INVALID; + return -1; + } + + if (*t == '\0') { + break; + } + else if (*t == ':') { + t++; + } + else { + char buffer[8]; + hts_log_error("Invalid character %s in '%s' FORMAT field at %s:%"PRIhts_pos"", + hts_strprint(buffer, sizeof buffer, '\'', t, 1), + h->id[BCF_DT_ID][z->key].key, bcf_seqname_safe(h,v), v->pos+1); + v->errcode |= BCF_ERR_CHAR; + return -1; + } + } + + // fill end-of-vector values + for (; j < v->n_fmt; ++j) { + fmt_aux_t *z = &fmt[j]; + const int htype = z->y>>4&0xf; + int l; + + if (z->size == -1) // this field is to be ignored + continue; + + if (htype == BCF_HT_STR) { + if (z->is_gt) { + int32_t *x = (int32_t*)(z->buf + z->size * (size_t)m); + if (z->size) x[0] = bcf_int32_missing; + for (l = 1; l < z->size>>2; ++l) x[l] = bcf_int32_vector_end; + } else { + char *x = (char*)z->buf + z->size * (size_t)m; + if ( z->size ) { + x[0] = '.'; + memset(&x[1], 0, (z->size-1) * sizeof(*x)); + } + } + } else if (htype == BCF_HT_INT) { + int32_t *x = (int32_t*)(z->buf + z->size * (size_t)m); + x[0] = bcf_int32_missing; + for (l = 1; l < z->size>>2; ++l) x[l] = bcf_int32_vector_end; + } else if (htype == BCF_HT_REAL) { + float *x = (float*)(z->buf + z->size * (size_t)m); + bcf_float_set_missing(x[0]); + for (l = 1; l < z->size>>2; ++l) bcf_float_set_vector_end(x[l]); + } + } + + m++; t++; + } + + return 0; +} + +// write individual genotype information +static int vcf_parse_format_gt6(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v, + const char *p, const char *q, fmt_aux_t *fmt) { + kstring_t *str = &v->indiv; + int i, need_downsize = 0; + if (v->n_sample > 0) { + for (i = 0; i < v->n_fmt; ++i) { + fmt_aux_t *z = &fmt[i]; + if ( z->size==-1 ) { + need_downsize = 1; + continue; + } + bcf_enc_int1(str, z->key); + if ((z->y>>4&0xf) == BCF_HT_STR && !z->is_gt) { + bcf_enc_size(str, z->size, BCF_BT_CHAR); + kputsn((char*)z->buf, z->size * (size_t)v->n_sample, str); + } else if ((z->y>>4&0xf) == BCF_HT_INT || z->is_gt) { + bcf_enc_vint(str, (z->size>>2) * v->n_sample, (int32_t*)z->buf, z->size>>2); + } else { + bcf_enc_size(str, z->size>>2, BCF_BT_FLOAT); + if (serialize_float_array(str, (z->size>>2) * (size_t)v->n_sample, + (float *) z->buf) != 0) { + v->errcode |= BCF_ERR_LIMITS; + hts_log_error("Out of memory at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); + return -1; + } + } + } + + } + if ( need_downsize ) { + i = 0; + while ( i < v->n_fmt ) { + if ( fmt[i].size==-1 ) + { + v->n_fmt--; + if ( i < v->n_fmt ) memmove(&fmt[i],&fmt[i+1],sizeof(*fmt)*(v->n_fmt-i)); + } + else + i++; + } + } + return 0; +} + +// validity checking +static int vcf_parse_format_check7(const bcf_hdr_t *h, bcf1_t *v) { + if ( v->n_sample!=bcf_hdr_nsamples(h) ) + { + hts_log_error("Number of columns at %s:%"PRIhts_pos" does not match the number of samples (%d vs %d)", + bcf_seqname_safe(h,v), v->pos+1, v->n_sample, bcf_hdr_nsamples(h)); + v->errcode |= BCF_ERR_NCOLS; + return -1; + } + if ( v->indiv.l > 0xffffffff ) + { + hts_log_error("The FORMAT at %s:%"PRIhts_pos" is too long", bcf_seqname_safe(h,v), v->pos+1); + v->errcode |= BCF_ERR_LIMITS; + + // Error recovery: return -1 if this is a critical error or 0 if we want to ignore the FORMAT and proceed + v->n_fmt = 0; + return -1; + } + + return 0; +} + +// p,q is the start and the end of the FORMAT field +static int vcf_parse_format(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v, + char *p, char *q) +{ + if ( !bcf_hdr_nsamples(h) ) return 0; + kstring_t *mem = (kstring_t*)&h->mem; + mem->l = 0; + + fmt_aux_t fmt[MAX_N_FMT]; + + // detect FORMAT "." + int ret; // +ve = ok, -ve = err + if ((ret = vcf_parse_format_empty1(s, h, v, p, q))) + return ret ? 0 : -1; + + // get format information from the dictionary + if (vcf_parse_format_dict2(s, h, v, p, q, fmt) < 0) + return -1; + + // FORMAT data is per-sample A:B:C A:B:C A:B:C ... but in memory it is + // stored as per-type arrays AAA... BBB... CCC... This is basically + // a data rotation or pivot. + + // The size of elements in the array grow to their maximum needed, + // permitting fast random access. This means however we have to first + // scan the whole FORMAT line to find the maximum of each type, and + // then scan it again to find the store the data. + // We break this down into compute-max, allocate, fill-out-buffers + + // TODO: ? + // The alternative would be to pivot on the first pass, with fixed + // size entries for numerics and concatenated strings otherwise, also + // tracking maximum sizes. Then on a second pass we reallocate and + // copy the data again to a uniformly sized array. Two passes through + // memory, but without doubling string parsing. + + // compute max + if (vcf_parse_format_max3(s, h, v, p, q, fmt) < 0) + return -1; + + // allocate memory for arrays + if (vcf_parse_format_alloc4(s, h, v, p, q, fmt) < 0) + return -1; + + // fill the sample fields; at beginning of the loop + if (vcf_parse_format_fill5(s, h, v, p, q, fmt) < 0) + return -1; + + // write individual genotype information + if (vcf_parse_format_gt6(s, h, v, p, q, fmt) < 0) + return -1; + + // validity checking + if (vcf_parse_format_check7(h, v) < 0) + return -1; + + return 0; +} + +static khint_t fix_chromosome(const bcf_hdr_t *h, vdict_t *d, const char *p) { + // Simple error recovery for chromosomes not defined in the header. It will not help when VCF header has + // been already printed, but will enable tools like vcfcheck to proceed. + + kstring_t tmp = {0,0,0}; + khint_t k; + int l; + if (ksprintf(&tmp, "##contig=", p) < 0) + return kh_end(d); + bcf_hrec_t *hrec = bcf_hdr_parse_line(h,tmp.s,&l); + free(tmp.s); + int res = hrec ? bcf_hdr_add_hrec((bcf_hdr_t*)h, hrec) : -1; + if (res < 0) bcf_hrec_destroy(hrec); + if (res > 0) res = bcf_hdr_sync((bcf_hdr_t*)h); + k = kh_get(vdict, d, p); + + return k; +} + +static int vcf_parse_filter(kstring_t *str, const bcf_hdr_t *h, bcf1_t *v, char *p, char *q) { + int i, n_flt = 1, max_n_flt = 0; + char *r, *t; + int32_t *a_flt = NULL; + ks_tokaux_t aux1; + khint_t k; + vdict_t *d = (vdict_t*)h->dict[BCF_DT_ID]; + // count the number of filters + if (*(q-1) == ';') *(q-1) = 0; + for (r = p; *r; ++r) + if (*r == ';') ++n_flt; + if (n_flt > max_n_flt) { + a_flt = malloc(n_flt * sizeof(*a_flt)); + if (!a_flt) { + hts_log_error("Could not allocate memory at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); + v->errcode |= BCF_ERR_LIMITS; // No appropriate code? + return -1; + } + max_n_flt = n_flt; + } + // add filters + for (t = kstrtok(p, ";", &aux1), i = 0; t; t = kstrtok(0, 0, &aux1)) { + *(char*)aux1.p = 0; + k = kh_get(vdict, d, t); + if (k == kh_end(d)) + { + // Simple error recovery for FILTERs not defined in the header. It will not help when VCF header has + // been already printed, but will enable tools like vcfcheck to proceed. + hts_log_warning("FILTER '%s' is not defined in the header", t); + kstring_t tmp = {0,0,0}; + int l; + ksprintf(&tmp, "##FILTER=", t); + bcf_hrec_t *hrec = bcf_hdr_parse_line(h,tmp.s,&l); + free(tmp.s); + int res = hrec ? bcf_hdr_add_hrec((bcf_hdr_t*)h, hrec) : -1; + if (res < 0) bcf_hrec_destroy(hrec); + if (res > 0) res = bcf_hdr_sync((bcf_hdr_t*)h); + k = kh_get(vdict, d, t); + v->errcode |= BCF_ERR_TAG_UNDEF; + if (res || k == kh_end(d)) { + hts_log_error("Could not add dummy header for FILTER '%s' at %s:%"PRIhts_pos, t, bcf_seqname_safe(h,v), v->pos+1); + v->errcode |= BCF_ERR_TAG_INVALID; + free(a_flt); + return -1; + } + } + a_flt[i++] = kh_val(d, k).id; + } + + bcf_enc_vint(str, n_flt, a_flt, -1); + free(a_flt); + + return 0; +} + +static int vcf_parse_info(kstring_t *str, const bcf_hdr_t *h, bcf1_t *v, char *p, char *q) { + static int extreme_int_warned = 0, negative_rlen_warned = 0; + int max_n_val = 0, overflow = 0; + char *r, *key; + khint_t k; + vdict_t *d = (vdict_t*)h->dict[BCF_DT_ID]; + int32_t *a_val = NULL; + + v->n_info = 0; + if (*(q-1) == ';') *(q-1) = 0; + for (r = key = p;; ++r) { + int c; + char *val, *end; + while (*r > '=' || (*r != ';' && *r != '=' && *r != 0)) r++; + if (v->n_info == UINT16_MAX) { + hts_log_error("Too many INFO entries at %s:%"PRIhts_pos, + bcf_seqname_safe(h,v), v->pos+1); + v->errcode |= BCF_ERR_LIMITS; + goto fail; + } + val = end = NULL; + c = *r; *r = 0; + if (c == '=') { + val = r + 1; + + for (end = val; *end != ';' && *end != 0; ++end); + c = *end; *end = 0; + } else end = r; + if ( !*key ) { if (c==0) break; r = end; key = r + 1; continue; } // faulty VCF, ";;" in the INFO + k = kh_get(vdict, d, key); + if (k == kh_end(d) || kh_val(d, k).info[BCF_HL_INFO] == 15) + { + hts_log_warning("INFO '%s' is not defined in the header, assuming Type=String", key); + kstring_t tmp = {0,0,0}; + int l; + ksprintf(&tmp, "##INFO=", key); + bcf_hrec_t *hrec = bcf_hdr_parse_line(h,tmp.s,&l); + free(tmp.s); + int res = hrec ? bcf_hdr_add_hrec((bcf_hdr_t*)h, hrec) : -1; + if (res < 0) bcf_hrec_destroy(hrec); + if (res > 0) res = bcf_hdr_sync((bcf_hdr_t*)h); + k = kh_get(vdict, d, key); + v->errcode |= BCF_ERR_TAG_UNDEF; + if (res || k == kh_end(d)) { + hts_log_error("Could not add dummy header for INFO '%s' at %s:%"PRIhts_pos, key, bcf_seqname_safe(h,v), v->pos+1); + v->errcode |= BCF_ERR_TAG_INVALID; + goto fail; + } + } + uint32_t y = kh_val(d, k).info[BCF_HL_INFO]; + ++v->n_info; + bcf_enc_int1(str, kh_val(d, k).id); + if (val == 0) { + bcf_enc_size(str, 0, BCF_BT_NULL); + } else if ((y>>4&0xf) == BCF_HT_FLAG || (y>>4&0xf) == BCF_HT_STR) { // if Flag has a value, treat it as a string + bcf_enc_vchar(str, end - val, val); + } else { // int/float value/array + int i, n_val; + char *t, *te; + for (t = val, n_val = 1; *t; ++t) // count the number of values + if (*t == ',') ++n_val; + // Check both int and float size in one step for simplicity + if (n_val > max_n_val) { + int32_t *a_tmp = (int32_t *)realloc(a_val, n_val * sizeof(*a_val)); + if (!a_tmp) { + hts_log_error("Could not allocate memory at %s:%"PRIhts_pos, bcf_seqname_safe(h,v), v->pos+1); + v->errcode |= BCF_ERR_LIMITS; // No appropriate code? + goto fail; + } + a_val = a_tmp; + max_n_val = n_val; + } + if ((y>>4&0xf) == BCF_HT_INT) { + i = 0, t = val; + int64_t val1; + int is_int64 = 0; +#ifdef VCF_ALLOW_INT64 + if ( n_val==1 ) + { + overflow = 0; + long long int tmp_val = hts_str2int(val, &te, sizeof(tmp_val)*CHAR_BIT, &overflow); + if ( te==val ) tmp_val = bcf_int32_missing; + else if ( overflow || tmp_valBCF_MAX_BT_INT64 ) + { + if ( !extreme_int_warned ) + { + hts_log_warning("Extreme INFO/%s value encountered and set to missing at %s:%"PRIhts_pos,key,bcf_seqname_safe(h,v), v->pos+1); + extreme_int_warned = 1; + } + tmp_val = bcf_int32_missing; + } + else + is_int64 = 1; + val1 = tmp_val; + t = te; + i = 1; // this is just to avoid adding another nested block... + } +#endif + for (; i < n_val; ++i, ++t) + { + overflow = 0; + long int tmp_val = hts_str2int(t, &te, sizeof(tmp_val)*CHAR_BIT, &overflow); + if ( te==t ) tmp_val = bcf_int32_missing; + else if ( overflow || tmp_valBCF_MAX_BT_INT32 ) + { + if ( !extreme_int_warned ) + { + hts_log_warning("Extreme INFO/%s value encountered and set to missing at %s:%"PRIhts_pos,key,bcf_seqname_safe(h,v), v->pos+1); + extreme_int_warned = 1; + } + tmp_val = bcf_int32_missing; + } + a_val[i] = tmp_val; + for (t = te; *t && *t != ','; t++); + } + if (n_val == 1) { +#ifdef VCF_ALLOW_INT64 + if ( is_int64 ) + { + v->unpacked |= BCF_IS_64BIT; + bcf_enc_long1(str, val1); + } + else + bcf_enc_int1(str, (int32_t)val1); +#else + val1 = a_val[0]; + bcf_enc_int1(str, (int32_t)val1); +#endif + } else { + bcf_enc_vint(str, n_val, a_val, -1); + } + if (n_val==1 && (val1!=bcf_int32_missing || is_int64) + && memcmp(key, "END", 4) == 0) + { + if ( val1 <= v->pos ) + { + if ( !negative_rlen_warned ) + { + hts_log_warning("INFO/END=%"PRIhts_pos" is smaller than POS at %s:%"PRIhts_pos,val1,bcf_seqname_safe(h,v),v->pos+1); + negative_rlen_warned = 1; + } + } + else + v->rlen = val1 - v->pos; + } + } else if ((y>>4&0xf) == BCF_HT_REAL) { + float *val_f = (float *)a_val; + for (i = 0, t = val; i < n_val; ++i, ++t) + { + overflow = 0; + val_f[i] = hts_str2dbl(t, &te, &overflow); + if ( te==t || overflow ) // conversion failed + bcf_float_set_missing(val_f[i]); + for (t = te; *t && *t != ','; t++); + } + bcf_enc_vfloat(str, n_val, val_f); + } + } + if (c == 0) break; + r = end; + key = r + 1; + } + + free(a_val); + return 0; + + fail: + free(a_val); + return -1; +} + +int vcf_parse(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v) +{ + int ret = -2, overflow = 0; + char *p, *q, *r, *t; + kstring_t *str; + khint_t k; + ks_tokaux_t aux; + +//#define NOT_DOT(p) strcmp((p), ".") +//#define NOT_DOT(p) (!(*p == '.' && !p[1])) +//#define NOT_DOT(p) ((*p) != '.' || (p)[1]) +//#define NOT_DOT(p) (q-p != 1 || memcmp(p, ".\0", 2)) +#define NOT_DOT(p) (memcmp(p, ".\0", 2)) + + if (!s || !h || !v || !(s->s)) + return ret; + + // Assumed in lots of places, but we may as well spot this early + assert(sizeof(float) == sizeof(int32_t)); + + // Ensure string we parse has space to permit some over-flow when during + // parsing. Eg to do memcmp(key, "END", 4) in vcf_parse_info over + // the more straight forward looking strcmp, giving a speed advantage. + if (ks_resize(s, s->l+4) < 0) + return -1; + + // Force our memory to be initialised so we avoid the technicality of + // undefined behaviour in using a 4-byte memcmp. (The reality is this + // almost certainly is never detected by the compiler so has no impact, + // but equally so this code has minimal (often beneficial) impact on + // performance too.) + s->s[s->l+0] = 0; + s->s[s->l+1] = 0; + s->s[s->l+2] = 0; + s->s[s->l+3] = 0; + + bcf_clear1(v); + str = &v->shared; + memset(&aux, 0, sizeof(ks_tokaux_t)); + + // CHROM + if (!(p = kstrtok(s->s, "\t", &aux))) + goto err; + *(q = (char*)aux.p) = 0; + + vdict_t *d = (vdict_t*)h->dict[BCF_DT_CTG]; + k = kh_get(vdict, d, p); + if (k == kh_end(d)) { + hts_log_warning("Contig '%s' is not defined in the header. (Quick workaround: index the file with tabix.)", p); + v->errcode = BCF_ERR_CTG_UNDEF; + if ((k = fix_chromosome(h, d, p)) == kh_end(d)) { + hts_log_error("Could not add dummy header for contig '%s'", p); + v->errcode |= BCF_ERR_CTG_INVALID; + goto err; + } + } + v->rid = kh_val(d, k).id; + + // POS + if (!(p = kstrtok(0, 0, &aux))) + goto err; + *(q = (char*)aux.p) = 0; + + overflow = 0; + char *tmp = p; + v->pos = hts_str2uint(p, &p, 62, &overflow); + if (overflow) { + hts_log_error("Position value '%s' is too large", tmp); + goto err; + } else if ( *p ) { + hts_log_error("Could not parse the position '%s'", tmp); + goto err; + } else { + v->pos -= 1; + } + if (v->pos >= INT32_MAX) + v->unpacked |= BCF_IS_64BIT; + + // ID + if (!(p = kstrtok(0, 0, &aux))) + goto err; + *(q = (char*)aux.p) = 0; + + if (NOT_DOT(p)) bcf_enc_vchar(str, q - p, p); + else bcf_enc_size(str, 0, BCF_BT_CHAR); + + // REF + if (!(p = kstrtok(0, 0, &aux))) + goto err; + *(q = (char*)aux.p) = 0; + + bcf_enc_vchar(str, q - p, p); + v->n_allele = 1, v->rlen = q - p; + + // ALT + if (!(p = kstrtok(0, 0, &aux))) + goto err; + *(q = (char*)aux.p) = 0; + + if (NOT_DOT(p)) { + for (r = t = p;; ++r) { + if (*r == ',' || *r == 0) { + if (v->n_allele == UINT16_MAX) { + hts_log_error("Too many ALT alleles at %s:%"PRIhts_pos, + bcf_seqname_safe(h,v), v->pos+1); + v->errcode |= BCF_ERR_LIMITS; + goto err; + } + bcf_enc_vchar(str, r - t, t); + t = r + 1; + ++v->n_allele; + } + if (r == q) break; + } + } + + // QUAL + if (!(p = kstrtok(0, 0, &aux))) + goto err; + *(q = (char*)aux.p) = 0; + + if (NOT_DOT(p)) v->qual = atof(p); + else bcf_float_set_missing(v->qual); + if ( v->max_unpack && !(v->max_unpack>>1) ) goto end; // BCF_UN_STR + + // FILTER + if (!(p = kstrtok(0, 0, &aux))) + goto err; + *(q = (char*)aux.p) = 0; + + if (NOT_DOT(p)) { + if (vcf_parse_filter(str, h, v, p, q)) { + goto err; + } + } else bcf_enc_vint(str, 0, 0, -1); + if ( v->max_unpack && !(v->max_unpack>>2) ) goto end; // BCF_UN_FLT + + // INFO + if (!(p = kstrtok(0, 0, &aux))) + goto err; + *(q = (char*)aux.p) = 0; + + if (NOT_DOT(p)) { + if (vcf_parse_info(str, h, v, p, q)) { + goto err; + } + } + if ( v->max_unpack && !(v->max_unpack>>3) ) goto end; + + // FORMAT; optional + p = kstrtok(0, 0, &aux); + if (p) { + *(q = (char*)aux.p) = 0; + + return vcf_parse_format(s, h, v, p, q) == 0 ? 0 : -2; + } else { + return 0; + } + + end: + ret = 0; + + err: + return ret; +} + +int vcf_open_mode(char *mode, const char *fn, const char *format) +{ + if (format == NULL) { + // Try to pick a format based on the filename extension + char extension[HTS_MAX_EXT_LEN]; + if (find_file_extension(fn, extension) < 0) return -1; + return vcf_open_mode(mode, fn, extension); + } + else if (strcasecmp(format, "bcf") == 0) strcpy(mode, "b"); + else if (strcasecmp(format, "vcf") == 0) strcpy(mode, ""); + else if (strcasecmp(format, "vcf.gz") == 0 || strcasecmp(format, "vcf.bgz") == 0) strcpy(mode, "z"); + else return -1; + + return 0; +} + +int vcf_read(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v) +{ + int ret; + ret = hts_getline(fp, KS_SEP_LINE, &fp->line); + if (ret < 0) return ret; + return vcf_parse1(&fp->line, h, v); +} + +static inline uint8_t *bcf_unpack_fmt_core1(uint8_t *ptr, int n_sample, bcf_fmt_t *fmt) +{ + uint8_t *ptr_start = ptr; + fmt->id = bcf_dec_typed_int1(ptr, &ptr); + fmt->n = bcf_dec_size(ptr, &ptr, &fmt->type); + fmt->size = fmt->n << bcf_type_shift[fmt->type]; + fmt->p = ptr; + fmt->p_off = ptr - ptr_start; + fmt->p_free = 0; + ptr += n_sample * fmt->size; + fmt->p_len = ptr - fmt->p; + return ptr; +} + +static inline uint8_t *bcf_unpack_info_core1(uint8_t *ptr, bcf_info_t *info) +{ + uint8_t *ptr_start = ptr; + int64_t len = 0; + info->key = bcf_dec_typed_int1(ptr, &ptr); + len = info->len = bcf_dec_size(ptr, &ptr, &info->type); + info->vptr = ptr; + info->vptr_off = ptr - ptr_start; + info->vptr_free = 0; + info->v1.i = 0; + if (info->len == 1) { + switch(info->type) { + case BCF_BT_INT8: + case BCF_BT_CHAR: + info->v1.i = *(int8_t*)ptr; + break; + case BCF_BT_INT16: + info->v1.i = le_to_i16(ptr); + len <<= 1; + break; + case BCF_BT_INT32: + info->v1.i = le_to_i32(ptr); + len <<= 2; + break; + case BCF_BT_FLOAT: + info->v1.f = le_to_float(ptr); + len <<= 2; + break; + case BCF_BT_INT64: + info->v1.i = le_to_i64(ptr); + len <<= 3; + break; + } + } else { + len <<= bcf_type_shift[info->type]; + } + ptr += len; + + info->vptr_len = ptr - info->vptr; + return ptr; +} + +int bcf_unpack(bcf1_t *b, int which) +{ + if ( !b->shared.l ) return 0; // Building a new BCF record from scratch + uint8_t *ptr = (uint8_t*)b->shared.s, *ptr_ori; + int i; + bcf_dec_t *d = &b->d; + if (which & BCF_UN_FLT) which |= BCF_UN_STR; + if (which & BCF_UN_INFO) which |= BCF_UN_SHR; + if ((which&BCF_UN_STR) && !(b->unpacked&BCF_UN_STR)) + { + kstring_t tmp; + + // ID + tmp.l = 0; tmp.s = d->id; tmp.m = d->m_id; + ptr_ori = ptr; + ptr = bcf_fmt_sized_array(&tmp, ptr); + b->unpack_size[0] = ptr - ptr_ori; + kputc_('\0', &tmp); + d->id = tmp.s; d->m_id = tmp.m; + + // REF and ALT are in a single block (d->als) and d->alleles are pointers into this block + hts_expand(char*, b->n_allele, d->m_allele, d->allele); // NM: hts_expand() is a macro + tmp.l = 0; tmp.s = d->als; tmp.m = d->m_als; + ptr_ori = ptr; + for (i = 0; i < b->n_allele; ++i) { + // Use offset within tmp.s as realloc may change pointer + d->allele[i] = (char *)(intptr_t)tmp.l; + ptr = bcf_fmt_sized_array(&tmp, ptr); + kputc_('\0', &tmp); + } + b->unpack_size[1] = ptr - ptr_ori; + d->als = tmp.s; d->m_als = tmp.m; + + // Convert our offsets within tmp.s back to pointers again + for (i = 0; i < b->n_allele; ++i) + d->allele[i] = d->als + (ptrdiff_t)d->allele[i]; + b->unpacked |= BCF_UN_STR; + } + if ((which&BCF_UN_FLT) && !(b->unpacked&BCF_UN_FLT)) { // FILTER + ptr = (uint8_t*)b->shared.s + b->unpack_size[0] + b->unpack_size[1]; + ptr_ori = ptr; + if (*ptr>>4) { + int type; + d->n_flt = bcf_dec_size(ptr, &ptr, &type); + hts_expand(int, d->n_flt, d->m_flt, d->flt); + for (i = 0; i < d->n_flt; ++i) + d->flt[i] = bcf_dec_int1(ptr, type, &ptr); + } else ++ptr, d->n_flt = 0; + b->unpack_size[2] = ptr - ptr_ori; + b->unpacked |= BCF_UN_FLT; + } + if ((which&BCF_UN_INFO) && !(b->unpacked&BCF_UN_INFO)) { // INFO + ptr = (uint8_t*)b->shared.s + b->unpack_size[0] + b->unpack_size[1] + b->unpack_size[2]; + hts_expand(bcf_info_t, b->n_info, d->m_info, d->info); + for (i = 0; i < d->m_info; ++i) d->info[i].vptr_free = 0; + for (i = 0; i < b->n_info; ++i) + ptr = bcf_unpack_info_core1(ptr, &d->info[i]); + b->unpacked |= BCF_UN_INFO; + } + if ((which&BCF_UN_FMT) && b->n_sample && !(b->unpacked&BCF_UN_FMT)) { // FORMAT + ptr = (uint8_t*)b->indiv.s; + hts_expand(bcf_fmt_t, b->n_fmt, d->m_fmt, d->fmt); + for (i = 0; i < d->m_fmt; ++i) d->fmt[i].p_free = 0; + for (i = 0; i < b->n_fmt; ++i) + ptr = bcf_unpack_fmt_core1(ptr, b->n_sample, &d->fmt[i]); + b->unpacked |= BCF_UN_FMT; + } + return 0; +} + +int vcf_format(const bcf_hdr_t *h, const bcf1_t *v, kstring_t *s) +{ + int i; + int32_t max_dt_id = h->n[BCF_DT_ID]; + const char *chrom = bcf_seqname(h, v); + if (!chrom) { + hts_log_error("Invalid BCF, CONTIG id=%d not present in the header", + v->rid); + errno = EINVAL; + return -1; + } + + bcf_unpack((bcf1_t*)v, BCF_UN_ALL & ~(BCF_UN_INFO|BCF_UN_FMT)); + + // Cache of key lengths so we don't keep repeatedly using them. + // This assumes we're not modifying the header between successive calls + // to vcf_format, but that would lead to many other forms of breakage + // so it feels like a valid assumption to make. + // + // We cannot just do this in bcf_hdr_sync as some code (eg bcftools + // annotate) manipulates the headers directly without calling sync to + // refresh the data structures. So we must do just-in-time length + // calculation during writes instead. + bcf_hdr_aux_t *aux = get_hdr_aux(h); + if (!aux->key_len) { + if (!(aux->key_len = calloc(h->n[BCF_DT_ID]+1, sizeof(*aux->key_len)))) + return -1; + } + size_t *key_len = aux->key_len; + + kputs(chrom, s); // CHROM + kputc_('\t', s); kputll(v->pos + 1, s); // POS + kputc_('\t', s); kputs(v->d.id ? v->d.id : ".", s); // ID + kputc_('\t', s); // REF + if (v->n_allele > 0) kputs(v->d.allele[0], s); + else kputc_('.', s); + kputc_('\t', s); // ALT + if (v->n_allele > 1) { + for (i = 1; i < v->n_allele; ++i) { + if (i > 1) kputc_(',', s); + kputs(v->d.allele[i], s); + } + } else kputc_('.', s); + kputc_('\t', s); // QUAL + if ( bcf_float_is_missing(v->qual) ) kputc_('.', s); // QUAL + else kputd(v->qual, s); + kputc_('\t', s); // FILTER + if (v->d.n_flt) { + for (i = 0; i < v->d.n_flt; ++i) { + int32_t idx = v->d.flt[i]; + if (idx < 0 || idx >= max_dt_id + || h->id[BCF_DT_ID][idx].key == NULL) { + hts_log_error("Invalid BCF, the FILTER tag id=%d at %s:%"PRIhts_pos" not present in the header", + idx, bcf_seqname_safe(h, v), v->pos + 1); + errno = EINVAL; + return -1; + } + if (i) kputc_(';', s); + if (!key_len[idx]) + key_len[idx] = strlen(h->id[BCF_DT_ID][idx].key); + kputsn(h->id[BCF_DT_ID][idx].key, key_len[idx], s); + } + } else kputc_('.', s); + + kputc_('\t', s); // INFO + if (v->n_info) { + uint8_t *ptr = v->shared.s + ? (uint8_t *)v->shared.s + v->unpack_size[0] + + v->unpack_size[1] + v->unpack_size[2] + : NULL; + int first = 1; + bcf_info_t *info = v->d.info; + + // Note if we duplicate this code into custom packed and unpacked + // implementations then we gain a bit more speed, particularly with + // clang 13 (up to 5%). Not sure why this is, but code duplication + // isn't pleasant and it's still faster adding packed support than + // not so it's a win, just not as good as it should be. + const int info_packed = !(v->unpacked & BCF_UN_INFO) && v->shared.l; + for (i = 0; i < v->n_info; ++i) { + bcf_info_t in, *z; + if (info_packed) { + // Use a local bcf_info_t when data is packed + z = ∈ + z->key = bcf_dec_typed_int1(ptr, &ptr); + z->len = bcf_dec_size(ptr, &ptr, &z->type); + z->vptr = ptr; + ptr += z->len << bcf_type_shift[z->type]; + } else { + // Else previously unpacked INFO struct + z = &info[i]; + + // Also potentially since deleted + if ( !z->vptr ) continue; + } + + bcf_idpair_t *id = z->key >= 0 && z->key < max_dt_id + ? &h->id[BCF_DT_ID][z->key] + : NULL; + + if (!id || !id->key) { + hts_log_error("Invalid BCF, the INFO tag id=%d is %s at %s:%"PRIhts_pos, + z->key, + z->key < 0 ? "negative" + : (z->key >= max_dt_id ? "too large" : "not present in the header"), + bcf_seqname_safe(h, v), v->pos+1); + errno = EINVAL; + return -1; + } + + // KEY + if (!key_len[z->key]) + key_len[z->key] = strlen(id->key); + size_t id_len = key_len[z->key]; + if (ks_resize(s, s->l + 3 + id_len) < 0) + return -1; + char *sptr = s->s + s->l; + if ( !first ) { + *sptr++ = ';'; + s->l++; + } + first = 0; + memcpy(sptr, id->key, id_len); + s->l += id_len; + + // VALUE + if (z->len <= 0) continue; + sptr[id_len] = '='; + s->l++; + + if (z->len != 1 || info_packed) { + bcf_fmt_array(s, z->len, z->type, z->vptr); + } else { + // Single length vectors are unpacked into their + // own info.v1 union and handled separately. + if (z->type == BCF_BT_FLOAT) { + if ( bcf_float_is_missing(z->v1.f) ) + kputc_('.', s); + else + kputd(z->v1.f, s); + } else if (z->type == BCF_BT_CHAR) { + kputc_(z->v1.i, s); + } else if (z->type < BCF_BT_INT64) { + int64_t missing[] = { + 0, // BCF_BT_NULL + bcf_int8_missing, + bcf_int16_missing, + bcf_int32_missing, + }; + if (z->v1.i == missing[z->type]) + kputc_('.', s); + else + kputw(z->v1.i, s); + } else if (z->type == BCF_BT_INT64) { + if (z->v1.i == bcf_int64_missing) + kputc_('.', s); + else + kputll(z->v1.i, s); + } else { + hts_log_error("Unexpected type %d at %s:%"PRIhts_pos, z->type, bcf_seqname_safe(h, v), v->pos+1); + errno = EINVAL; + return -1; + } + } + } + if ( first ) kputc_('.', s); + } else kputc_('.', s); + + // FORMAT and individual information + if (v->n_sample) { + int i,j; + if ( v->n_fmt) { + uint8_t *ptr = (uint8_t *)v->indiv.s; + int gt_i = -1; + bcf_fmt_t *fmt = v->d.fmt; + int first = 1; + int fmt_packed = !(v->unpacked & BCF_UN_FMT); + + if (fmt_packed) { + // Local fmt as we have an array of num FORMAT keys, + // each of which points to N.Sample values. + + // No real gain to be had in handling unpacked data here, + // but it doesn't cost us much in complexity either and + // it gives us flexibility. + fmt = malloc(v->n_fmt * sizeof(*fmt)); + if (!fmt) + return -1; + } + + // KEYS + for (i = 0; i < (int)v->n_fmt; ++i) { + bcf_fmt_t *z; + z = &fmt[i]; + if (fmt_packed) { + z->id = bcf_dec_typed_int1(ptr, &ptr); + z->n = bcf_dec_size(ptr, &ptr, &z->type); + z->p = ptr; + z->size = z->n << bcf_type_shift[z->type]; + ptr += v->n_sample * z->size; + } + if ( !z->p ) continue; + kputc_(!first ? ':' : '\t', s); first = 0; + + bcf_idpair_t *id = z->id >= 0 && z->id < max_dt_id + ? &h->id[BCF_DT_ID][z->id] + : NULL; + + if (!id || !id->key) { + hts_log_error("Invalid BCF, the FORMAT tag id=%d at %s:%"PRIhts_pos" not present in the header", z->id, bcf_seqname_safe(h, v), v->pos+1); + errno = EINVAL; + return -1; + } + + if (!key_len[z->id]) + key_len[z->id] = strlen(id->key); + size_t id_len = key_len[z->id]; + kputsn(id->key, id_len, s); + if (id_len == 2 && id->key[0] == 'G' && id->key[1] == 'T') + gt_i = i; + } + if ( first ) kputsn("\t.", 2, s); + + // VALUES per sample + for (j = 0; j < v->n_sample; ++j) { + kputc_('\t', s); + first = 1; + bcf_fmt_t *f = fmt; + for (i = 0; i < (int)v->n_fmt; i++, f++) { + if ( !f->p ) continue; + if (!first) kputc_(':', s); + first = 0; + if (gt_i == i) { + bcf_format_gt(f,j,s); + break; + } + else if (f->n == 1) + bcf_fmt_array1(s, f->type, f->p + j * (size_t)f->size); + else + bcf_fmt_array(s, f->n, f->type, f->p + j * (size_t)f->size); + } + + // Simpler loop post GT and at least 1 iteration + for (i++, f++; i < (int)v->n_fmt; i++, f++) { + if ( !f->p ) continue; + kputc_(':', s); + if (f->n == 1) + bcf_fmt_array1(s, f->type, f->p + j * (size_t)f->size); + else + bcf_fmt_array(s, f->n, f->type, f->p + j * (size_t)f->size); + } + if ( first ) kputc_('.', s); + } + if (fmt_packed) + free(fmt); + } + else + for (j=0; j<=v->n_sample; j++) + kputsn("\t.", 2, s); + } + kputc('\n', s); + return 0; +} + +int vcf_write_line(htsFile *fp, kstring_t *line) +{ + int ret; + if ( line->s[line->l-1]!='\n' ) kputc('\n',line); + if ( fp->format.compression!=no_compression ) + ret = bgzf_write(fp->fp.bgzf, line->s, line->l); + else + ret = hwrite(fp->fp.hfile, line->s, line->l); + return ret==line->l ? 0 : -1; +} + +int vcf_write(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v) +{ + ssize_t ret; + fp->line.l = 0; + if (vcf_format1(h, v, &fp->line) != 0) + return -1; + if ( fp->format.compression!=no_compression ) { + if (bgzf_flush_try(fp->fp.bgzf, fp->line.l) < 0) + return -1; + if (fp->idx && !fp->fp.bgzf->mt) + hts_idx_amend_last(fp->idx, bgzf_tell(fp->fp.bgzf)); + ret = bgzf_write(fp->fp.bgzf, fp->line.s, fp->line.l); + } else { + ret = hwrite(fp->fp.hfile, fp->line.s, fp->line.l); + } + + if (fp->idx && fp->format.compression == bgzf) { + int tid; + if ((tid = hts_idx_tbi_name(fp->idx, v->rid, bcf_seqname_safe(h, v))) < 0) + return -1; + + if (bgzf_idx_push(fp->fp.bgzf, fp->idx, + tid, v->pos, v->pos + v->rlen, + bgzf_tell(fp->fp.bgzf), 1) < 0) + return -1; + } + + return ret==fp->line.l ? 0 : -1; +} + +/************************ + * Data access routines * + ************************/ + +int bcf_hdr_id2int(const bcf_hdr_t *h, int which, const char *id) +{ + khint_t k; + vdict_t *d = (vdict_t*)h->dict[which]; + k = kh_get(vdict, d, id); + return k == kh_end(d)? -1 : kh_val(d, k).id; +} + + +/******************** + *** BCF indexing *** + ********************/ + +// Calculate number of index levels given min_shift and the header contig +// list. Also returns number of contigs in *nids_out. +static int idx_calc_n_lvls_ids(const bcf_hdr_t *h, int min_shift, + int starting_n_lvls, int *nids_out) +{ + int n_lvls, i, nids = 0; + int64_t max_len = 0, s; + + for (i = 0; i < h->n[BCF_DT_CTG]; ++i) + { + if ( !h->id[BCF_DT_CTG][i].val ) continue; + if ( max_len < h->id[BCF_DT_CTG][i].val->info[0] ) + max_len = h->id[BCF_DT_CTG][i].val->info[0]; + nids++; + } + if ( !max_len ) max_len = (1LL<<31) - 1; // In case contig line is broken. + max_len += 256; + s = hts_bin_maxpos(min_shift, starting_n_lvls); + for (n_lvls = starting_n_lvls; max_len > s; ++n_lvls, s <<= 3); + + if (nids_out) *nids_out = nids; + return n_lvls; +} + +hts_idx_t *bcf_index(htsFile *fp, int min_shift) +{ + int n_lvls; + bcf1_t *b = NULL; + hts_idx_t *idx = NULL; + bcf_hdr_t *h; + int r; + h = bcf_hdr_read(fp); + if ( !h ) return NULL; + int nids = 0; + n_lvls = idx_calc_n_lvls_ids(h, min_shift, 0, &nids); + idx = hts_idx_init(nids, HTS_FMT_CSI, bgzf_tell(fp->fp.bgzf), min_shift, n_lvls); + if (!idx) goto fail; + b = bcf_init1(); + if (!b) goto fail; + while ((r = bcf_read1(fp,h, b)) >= 0) { + int ret; + ret = hts_idx_push(idx, b->rid, b->pos, b->pos + b->rlen, bgzf_tell(fp->fp.bgzf), 1); + if (ret < 0) goto fail; + } + if (r < -1) goto fail; + hts_idx_finish(idx, bgzf_tell(fp->fp.bgzf)); + bcf_destroy1(b); + bcf_hdr_destroy(h); + return idx; + + fail: + hts_idx_destroy(idx); + bcf_destroy1(b); + bcf_hdr_destroy(h); + return NULL; +} + +hts_idx_t *bcf_index_load2(const char *fn, const char *fnidx) +{ + return fnidx? hts_idx_load2(fn, fnidx) : bcf_index_load(fn); +} + +hts_idx_t *bcf_index_load3(const char *fn, const char *fnidx, int flags) +{ + return hts_idx_load3(fn, fnidx, HTS_FMT_CSI, flags); +} + +int bcf_index_build3(const char *fn, const char *fnidx, int min_shift, int n_threads) +{ + htsFile *fp; + hts_idx_t *idx; + tbx_t *tbx; + int ret; + if ((fp = hts_open(fn, "rb")) == 0) return -2; + if (n_threads) + hts_set_threads(fp, n_threads); + if ( fp->format.compression!=bgzf ) { hts_close(fp); return -3; } + switch (fp->format.format) { + case bcf: + if (!min_shift) { + hts_log_error("TBI indices for BCF files are not supported"); + ret = -1; + } else { + idx = bcf_index(fp, min_shift); + if (idx) { + ret = hts_idx_save_as(idx, fn, fnidx, HTS_FMT_CSI); + if (ret < 0) ret = -4; + hts_idx_destroy(idx); + } + else ret = -1; + } + break; + + case vcf: + tbx = tbx_index(hts_get_bgzfp(fp), min_shift, &tbx_conf_vcf); + if (tbx) { + ret = hts_idx_save_as(tbx->idx, fn, fnidx, min_shift > 0 ? HTS_FMT_CSI : HTS_FMT_TBI); + if (ret < 0) ret = -4; + tbx_destroy(tbx); + } + else ret = -1; + break; + + default: + ret = -3; + break; + } + hts_close(fp); + return ret; +} + +int bcf_index_build2(const char *fn, const char *fnidx, int min_shift) +{ + return bcf_index_build3(fn, fnidx, min_shift, 0); +} + +int bcf_index_build(const char *fn, int min_shift) +{ + return bcf_index_build3(fn, NULL, min_shift, 0); +} + +// Initialise fp->idx for the current format type. +// This must be called after the header has been written but no other data. +static int vcf_idx_init(htsFile *fp, bcf_hdr_t *h, int min_shift, const char *fnidx) { + int n_lvls, fmt; + + if (min_shift == 0) { + min_shift = 14; + n_lvls = 5; + fmt = HTS_FMT_TBI; + } else { + // Set initial n_lvls to match tbx_index() + int starting_n_lvls = (TBX_MAX_SHIFT - min_shift + 2) / 3; + // Increase if necessary + n_lvls = idx_calc_n_lvls_ids(h, min_shift, starting_n_lvls, NULL); + fmt = HTS_FMT_CSI; + } + + fp->idx = hts_idx_init(0, fmt, bgzf_tell(fp->fp.bgzf), min_shift, n_lvls); + if (!fp->idx) return -1; + + // Tabix meta data, added even in CSI for VCF + uint8_t conf[4*7]; + u32_to_le(TBX_VCF, conf+0); // fmt + u32_to_le(1, conf+4); // name col + u32_to_le(2, conf+8); // beg col + u32_to_le(0, conf+12); // end col + u32_to_le('#', conf+16); // comment + u32_to_le(0, conf+20); // n.skip + u32_to_le(0, conf+24); // ref name len + if (hts_idx_set_meta(fp->idx, sizeof(conf)*sizeof(*conf), (uint8_t *)conf, 1) < 0) { + hts_idx_destroy(fp->idx); + fp->idx = NULL; + return -1; + } + fp->fnidx = fnidx; + + return 0; +} + +// Initialise fp->idx for the current format type. +// This must be called after the header has been written but no other data. +int bcf_idx_init(htsFile *fp, bcf_hdr_t *h, int min_shift, const char *fnidx) { + int n_lvls, nids = 0; + + if (fp->format.compression != bgzf) { + hts_log_error("Indexing is only supported on BGZF-compressed files"); + return -3; // Matches no-compression return for bcf_index_build3() + } + + if (fp->format.format == vcf) + return vcf_idx_init(fp, h, min_shift, fnidx); + + if (!min_shift) + min_shift = 14; + + n_lvls = idx_calc_n_lvls_ids(h, min_shift, 0, &nids); + + fp->idx = hts_idx_init(nids, HTS_FMT_CSI, bgzf_tell(fp->fp.bgzf), min_shift, n_lvls); + if (!fp->idx) return -1; + fp->fnidx = fnidx; + + return 0; +} + +// Finishes an index. Call after the last record has been written. +// Returns 0 on success, <0 on failure. +// +// NB: same format as SAM/BAM as it uses bgzf. +int bcf_idx_save(htsFile *fp) { + return sam_idx_save(fp); +} + +/***************** + *** Utilities *** + *****************/ + +int bcf_hdr_combine(bcf_hdr_t *dst, const bcf_hdr_t *src) +{ + int i, ndst_ori = dst->nhrec, need_sync = 0, ret = 0, res; + for (i=0; inhrec; i++) + { + if ( src->hrec[i]->type==BCF_HL_GEN && src->hrec[i]->value ) + { + int j; + for (j=0; jhrec[j]->type!=BCF_HL_GEN ) continue; + + // Checking only the key part of generic lines, otherwise + // the VCFs are too verbose. Should we perhaps add a flag + // to bcf_hdr_combine() and make this optional? + if ( !strcmp(src->hrec[i]->key,dst->hrec[j]->key) ) break; + } + if ( j>=ndst_ori ) { + res = bcf_hdr_add_hrec(dst, bcf_hrec_dup(src->hrec[i])); + if (res < 0) return -1; + need_sync += res; + } + } + else if ( src->hrec[i]->type==BCF_HL_STR ) + { + // NB: we are ignoring fields without ID + int j = bcf_hrec_find_key(src->hrec[i],"ID"); + if ( j>=0 ) + { + bcf_hrec_t *rec = bcf_hdr_get_hrec(dst, src->hrec[i]->type, "ID", src->hrec[i]->vals[j], src->hrec[i]->key); + if ( !rec ) { + res = bcf_hdr_add_hrec(dst, bcf_hrec_dup(src->hrec[i])); + if (res < 0) return -1; + need_sync += res; + } + } + } + else + { + int j = bcf_hrec_find_key(src->hrec[i],"ID"); + assert( j>=0 ); // this should always be true for valid VCFs + + bcf_hrec_t *rec = bcf_hdr_get_hrec(dst, src->hrec[i]->type, "ID", src->hrec[i]->vals[j], NULL); + if ( !rec ) { + res = bcf_hdr_add_hrec(dst, bcf_hrec_dup(src->hrec[i])); + if (res < 0) return -1; + need_sync += res; + } else if ( src->hrec[i]->type==BCF_HL_INFO || src->hrec[i]->type==BCF_HL_FMT ) + { + // Check that both records are of the same type. The bcf_hdr_id2length + // macro cannot be used here because dst header is not synced yet. + vdict_t *d_src = (vdict_t*)src->dict[BCF_DT_ID]; + vdict_t *d_dst = (vdict_t*)dst->dict[BCF_DT_ID]; + khint_t k_src = kh_get(vdict, d_src, src->hrec[i]->vals[0]); + khint_t k_dst = kh_get(vdict, d_dst, src->hrec[i]->vals[0]); + if ( (kh_val(d_src,k_src).info[rec->type]>>8 & 0xf) != (kh_val(d_dst,k_dst).info[rec->type]>>8 & 0xf) ) + { + hts_log_warning("Trying to combine \"%s\" tag definitions of different lengths", + src->hrec[i]->vals[0]); + ret |= 1; + } + if ( (kh_val(d_src,k_src).info[rec->type]>>4 & 0xf) != (kh_val(d_dst,k_dst).info[rec->type]>>4 & 0xf) ) + { + hts_log_warning("Trying to combine \"%s\" tag definitions of different types", + src->hrec[i]->vals[0]); + ret |= 1; + } + } + } + } + if ( need_sync ) { + if (bcf_hdr_sync(dst) < 0) return -1; + } + return ret; +} + +bcf_hdr_t *bcf_hdr_merge(bcf_hdr_t *dst, const bcf_hdr_t *src) +{ + if ( !dst ) + { + // this will effectively strip existing IDX attributes from src to become dst + dst = bcf_hdr_init("r"); + kstring_t htxt = {0,0,0}; + if (bcf_hdr_format(src, 0, &htxt) < 0) { + free(htxt.s); + return NULL; + } + if ( bcf_hdr_parse(dst, htxt.s) < 0 ) { + bcf_hdr_destroy(dst); + dst = NULL; + } + free(htxt.s); + return dst; + } + + int i, ndst_ori = dst->nhrec, need_sync = 0, res; + for (i=0; inhrec; i++) + { + if ( src->hrec[i]->type==BCF_HL_GEN && src->hrec[i]->value ) + { + int j; + for (j=0; jhrec[j]->type!=BCF_HL_GEN ) continue; + + // Checking only the key part of generic lines, otherwise + // the VCFs are too verbose. Should we perhaps add a flag + // to bcf_hdr_combine() and make this optional? + if ( !strcmp(src->hrec[i]->key,dst->hrec[j]->key) ) break; + } + if ( j>=ndst_ori ) { + res = bcf_hdr_add_hrec(dst, bcf_hrec_dup(src->hrec[i])); + if (res < 0) return NULL; + need_sync += res; + } + } + else if ( src->hrec[i]->type==BCF_HL_STR ) + { + // NB: we are ignoring fields without ID + int j = bcf_hrec_find_key(src->hrec[i],"ID"); + if ( j>=0 ) + { + bcf_hrec_t *rec = bcf_hdr_get_hrec(dst, src->hrec[i]->type, "ID", src->hrec[i]->vals[j], src->hrec[i]->key); + if ( !rec ) { + res = bcf_hdr_add_hrec(dst, bcf_hrec_dup(src->hrec[i])); + if (res < 0) return NULL; + need_sync += res; + } + } + } + else + { + int j = bcf_hrec_find_key(src->hrec[i],"ID"); + assert( j>=0 ); // this should always be true for valid VCFs + + bcf_hrec_t *rec = bcf_hdr_get_hrec(dst, src->hrec[i]->type, "ID", src->hrec[i]->vals[j], NULL); + if ( !rec ) { + res = bcf_hdr_add_hrec(dst, bcf_hrec_dup(src->hrec[i])); + if (res < 0) return NULL; + need_sync += res; + } else if ( src->hrec[i]->type==BCF_HL_INFO || src->hrec[i]->type==BCF_HL_FMT ) + { + // Check that both records are of the same type. The bcf_hdr_id2length + // macro cannot be used here because dst header is not synced yet. + vdict_t *d_src = (vdict_t*)src->dict[BCF_DT_ID]; + vdict_t *d_dst = (vdict_t*)dst->dict[BCF_DT_ID]; + khint_t k_src = kh_get(vdict, d_src, src->hrec[i]->vals[0]); + khint_t k_dst = kh_get(vdict, d_dst, src->hrec[i]->vals[0]); + if ( (kh_val(d_src,k_src).info[rec->type]>>8 & 0xf) != (kh_val(d_dst,k_dst).info[rec->type]>>8 & 0xf) ) + { + hts_log_warning("Trying to combine \"%s\" tag definitions of different lengths", + src->hrec[i]->vals[0]); + } + if ( (kh_val(d_src,k_src).info[rec->type]>>4 & 0xf) != (kh_val(d_dst,k_dst).info[rec->type]>>4 & 0xf) ) + { + hts_log_warning("Trying to combine \"%s\" tag definitions of different types", + src->hrec[i]->vals[0]); + } + } + } + } + if ( need_sync ) { + if (bcf_hdr_sync(dst) < 0) return NULL; + } + return dst; +} + +int bcf_translate(const bcf_hdr_t *dst_hdr, bcf_hdr_t *src_hdr, bcf1_t *line) +{ + int i; + if ( line->errcode ) + { + char errordescription[1024] = ""; + hts_log_error("Unchecked error (%d %s) at %s:%"PRIhts_pos", exiting", line->errcode, bcf_strerror(line->errcode, errordescription, sizeof(errordescription)), bcf_seqname_safe(src_hdr,line), line->pos+1); + exit(1); + } + if ( src_hdr->ntransl==-1 ) return 0; // no need to translate, all tags have the same id + if ( !src_hdr->ntransl ) // called for the first time, see what needs translating + { + int dict; + for (dict=0; dict<2; dict++) // BCF_DT_ID and BCF_DT_CTG + { + src_hdr->transl[dict] = (int*) malloc(src_hdr->n[dict]*sizeof(int)); + for (i=0; in[dict]; i++) + { + if ( !src_hdr->id[dict][i].key ) // gap left after removed BCF header lines + { + src_hdr->transl[dict][i] = -1; + continue; + } + src_hdr->transl[dict][i] = bcf_hdr_id2int(dst_hdr,dict,src_hdr->id[dict][i].key); + if ( src_hdr->transl[dict][i]!=-1 && i!=src_hdr->transl[dict][i] ) src_hdr->ntransl++; + } + } + if ( !src_hdr->ntransl ) + { + free(src_hdr->transl[0]); src_hdr->transl[0] = NULL; + free(src_hdr->transl[1]); src_hdr->transl[1] = NULL; + src_hdr->ntransl = -1; + } + if ( src_hdr->ntransl==-1 ) return 0; + } + bcf_unpack(line,BCF_UN_ALL); + + // CHROM + if ( src_hdr->transl[BCF_DT_CTG][line->rid] >=0 ) line->rid = src_hdr->transl[BCF_DT_CTG][line->rid]; + + // FILTER + for (i=0; id.n_flt; i++) + { + int src_id = line->d.flt[i]; + if ( src_hdr->transl[BCF_DT_ID][src_id] >=0 ) + line->d.flt[i] = src_hdr->transl[BCF_DT_ID][src_id]; + line->d.shared_dirty |= BCF1_DIRTY_FLT; + } + + // INFO + for (i=0; in_info; i++) + { + int src_id = line->d.info[i].key; + int dst_id = src_hdr->transl[BCF_DT_ID][src_id]; + if ( dst_id<0 ) continue; + line->d.info[i].key = dst_id; + if ( !line->d.info[i].vptr ) continue; // skip deleted + int src_size = src_id>>7 ? ( src_id>>15 ? BCF_BT_INT32 : BCF_BT_INT16) : BCF_BT_INT8; + int dst_size = dst_id>>7 ? ( dst_id>>15 ? BCF_BT_INT32 : BCF_BT_INT16) : BCF_BT_INT8; + if ( src_size==dst_size ) // can overwrite + { + uint8_t *vptr = line->d.info[i].vptr - line->d.info[i].vptr_off; + if ( dst_size==BCF_BT_INT8 ) { vptr[1] = (uint8_t)dst_id; } + else if ( dst_size==BCF_BT_INT16 ) { *(uint16_t*)vptr = (uint16_t)dst_id; } + else { *(uint32_t*)vptr = (uint32_t)dst_id; } + } + else // must realloc + { + bcf_info_t *info = &line->d.info[i]; + kstring_t str = {0,0,0}; + bcf_enc_int1(&str, dst_id); + bcf_enc_size(&str, info->len,info->type); + uint32_t vptr_off = str.l; + kputsn((char*)info->vptr, info->vptr_len, &str); + if( info->vptr_free ) free(info->vptr - info->vptr_off); + info->vptr_off = vptr_off; + info->vptr = (uint8_t*)str.s + info->vptr_off; + info->vptr_free = 1; + line->d.shared_dirty |= BCF1_DIRTY_INF; + } + } + + // FORMAT + for (i=0; in_fmt; i++) + { + int src_id = line->d.fmt[i].id; + int dst_id = src_hdr->transl[BCF_DT_ID][src_id]; + if ( dst_id<0 ) continue; + line->d.fmt[i].id = dst_id; + if( !line->d.fmt[i].p ) continue; // skip deleted + int src_size = src_id>>7 ? ( src_id>>15 ? BCF_BT_INT32 : BCF_BT_INT16) : BCF_BT_INT8; + int dst_size = dst_id>>7 ? ( dst_id>>15 ? BCF_BT_INT32 : BCF_BT_INT16) : BCF_BT_INT8; + if ( src_size==dst_size ) // can overwrite + { + uint8_t *p = line->d.fmt[i].p - line->d.fmt[i].p_off; // pointer to the vector size (4bits) and BT type (4bits) + if ( dst_size==BCF_BT_INT8 ) { p[1] = dst_id; } + else if ( dst_size==BCF_BT_INT16 ) { i16_to_le(dst_id, p + 1); } + else { i32_to_le(dst_id, p + 1); } + } + else // must realloc + { + bcf_fmt_t *fmt = &line->d.fmt[i]; + kstring_t str = {0,0,0}; + bcf_enc_int1(&str, dst_id); + bcf_enc_size(&str, fmt->n, fmt->type); + uint32_t p_off = str.l; + kputsn((char*)fmt->p, fmt->p_len, &str); + if( fmt->p_free ) free(fmt->p - fmt->p_off); + fmt->p_off = p_off; + fmt->p = (uint8_t*)str.s + fmt->p_off; + fmt->p_free = 1; + line->d.indiv_dirty = 1; + } + } + return 0; +} + +bcf_hdr_t *bcf_hdr_dup(const bcf_hdr_t *hdr) +{ + bcf_hdr_t *hout = bcf_hdr_init("r"); + if (!hout) { + hts_log_error("Failed to allocate bcf header"); + return NULL; + } + kstring_t htxt = {0,0,0}; + if (bcf_hdr_format(hdr, 1, &htxt) < 0) { + free(htxt.s); + return NULL; + } + if ( bcf_hdr_parse(hout, htxt.s) < 0 ) { + bcf_hdr_destroy(hout); + hout = NULL; + } + free(htxt.s); + return hout; +} + +bcf_hdr_t *bcf_hdr_subset(const bcf_hdr_t *h0, int n, char *const* samples, int *imap) +{ + void *names_hash = khash_str2int_init(); + kstring_t htxt = {0,0,0}; + kstring_t str = {0,0,0}; + bcf_hdr_t *h = bcf_hdr_init("w"); + int r = 0; + if (!h || !names_hash) { + hts_log_error("Failed to allocate bcf header"); + goto err; + } + if (bcf_hdr_format(h0, 1, &htxt) < 0) { + hts_log_error("Failed to get header text"); + goto err; + } + bcf_hdr_set_version(h,bcf_hdr_get_version(h0)); + int j; + for (j=0; j 0) { + char *p = find_chrom_header_line(htxt.s); + int i = 0, end = n? 8 : 7; + while ((p = strchr(p, '\t')) != 0 && i < end) ++i, ++p; + if (i != end) { + hts_log_error("Wrong number of columns in header #CHROM line"); + goto err; + } + r |= kputsn(htxt.s, p - htxt.s, &str) < 0; + for (i = 0; i < n; ++i) { + if ( khash_str2int_has_key(names_hash,samples[i]) ) + { + hts_log_error("Duplicate sample name \"%s\"", samples[i]); + goto err; + } + imap[i] = bcf_hdr_id2int(h0, BCF_DT_SAMPLE, samples[i]); + if (imap[i] < 0) continue; + r |= kputc('\t', &str) < 0; + r |= kputs(samples[i], &str) < 0; + r |= khash_str2int_inc(names_hash,samples[i]) < 0; + } + } else r |= kputsn(htxt.s, htxt.l, &str) < 0; + while (str.l && (!str.s[str.l-1] || str.s[str.l-1]=='\n') ) str.l--; // kill trailing zeros and newlines + r |= kputc('\n',&str) < 0; + if (r) { + hts_log_error("%s", strerror(errno)); + goto err; + } + if ( bcf_hdr_parse(h, str.s) < 0 ) { + bcf_hdr_destroy(h); + h = NULL; + } + free(str.s); + free(htxt.s); + khash_str2int_destroy(names_hash); + return h; + + err: + ks_free(&str); + ks_free(&htxt); + khash_str2int_destroy(names_hash); + bcf_hdr_destroy(h); + return NULL; +} + +int bcf_hdr_set_samples(bcf_hdr_t *hdr, const char *samples, int is_file) +{ + if ( samples && !strcmp("-",samples) ) return 0; // keep all samples + + int i, narr = bit_array_size(bcf_hdr_nsamples(hdr)); + hdr->keep_samples = (uint8_t*) calloc(narr,1); + if (!hdr->keep_samples) return -1; + + hdr->nsamples_ori = bcf_hdr_nsamples(hdr); + if ( !samples ) + { + // exclude all samples + khint_t k; + vdict_t *d = (vdict_t*)hdr->dict[BCF_DT_SAMPLE], *new_dict; + new_dict = kh_init(vdict); + if (!new_dict) return -1; + + bcf_hdr_nsamples(hdr) = 0; + + for (k = kh_begin(d); k != kh_end(d); ++k) + if (kh_exist(d, k)) free((char*)kh_key(d, k)); + kh_destroy(vdict, d); + hdr->dict[BCF_DT_SAMPLE] = new_dict; + if (bcf_hdr_sync(hdr) < 0) return -1; + + return 0; + } + + if ( samples[0]=='^' ) + for (i=0; ikeep_samples,i); + + int idx, n, ret = 0; + char **smpls = hts_readlist(samples[0]=='^'?samples+1:samples, is_file, &n); + if ( !smpls ) return -1; + for (i=0; ikeep_samples, idx); + else + bit_array_set(hdr->keep_samples, idx); + } + for (i=0; insamples_ori; i++) + if ( bit_array_test(hdr->keep_samples,i) ) bcf_hdr_nsamples(hdr)++; + + if ( !bcf_hdr_nsamples(hdr) ) { free(hdr->keep_samples); hdr->keep_samples=NULL; } + else + { + // Make new list and dictionary with desired samples + char **samples = (char**) malloc(sizeof(char*)*bcf_hdr_nsamples(hdr)); + vdict_t *new_dict, *d; + int k, res; + if (!samples) return -1; + + new_dict = kh_init(vdict); + if (!new_dict) { + free(samples); + return -1; + } + idx = 0; + for (i=0; insamples_ori; i++) { + if ( bit_array_test(hdr->keep_samples,i) ) { + samples[idx] = hdr->samples[i]; + k = kh_put(vdict, new_dict, hdr->samples[i], &res); + if (res < 0) { + free(samples); + kh_destroy(vdict, new_dict); + return -1; + } + kh_val(new_dict, k) = bcf_idinfo_def; + kh_val(new_dict, k).id = idx; + idx++; + } + } + + // Delete desired samples from old dictionary, so we don't free them + d = (vdict_t*)hdr->dict[BCF_DT_SAMPLE]; + for (i=0; i < idx; i++) { + int k = kh_get(vdict, d, samples[i]); + if (k < kh_end(d)) kh_del(vdict, d, k); + } + + // Free everything else + for (k = kh_begin(d); k != kh_end(d); ++k) + if (kh_exist(d, k)) free((char*)kh_key(d, k)); + kh_destroy(vdict, d); + hdr->dict[BCF_DT_SAMPLE] = new_dict; + + free(hdr->samples); + hdr->samples = samples; + + if (bcf_hdr_sync(hdr) < 0) + return -1; + } + + return ret; +} + +int bcf_subset(const bcf_hdr_t *h, bcf1_t *v, int n, int *imap) +{ + kstring_t ind; + ind.s = 0; ind.l = ind.m = 0; + if (n) { + bcf_fmt_t fmt[MAX_N_FMT]; + int i, j; + uint8_t *ptr = (uint8_t*)v->indiv.s; + for (i = 0; i < v->n_fmt; ++i) + ptr = bcf_unpack_fmt_core1(ptr, v->n_sample, &fmt[i]); + for (i = 0; i < (int)v->n_fmt; ++i) { + bcf_fmt_t *f = &fmt[i]; + bcf_enc_int1(&ind, f->id); + bcf_enc_size(&ind, f->n, f->type); + for (j = 0; j < n; ++j) + if (imap[j] >= 0) kputsn((char*)(f->p + imap[j] * f->size), f->size, &ind); + } + for (i = j = 0; j < n; ++j) if (imap[j] >= 0) ++i; + v->n_sample = i; + } else v->n_sample = 0; + if ( !v->n_sample ) v->n_fmt = 0; + free(v->indiv.s); + v->indiv = ind; + v->unpacked &= ~BCF_UN_FMT; // only BCF is ready for output, VCF will need to unpack again + return 0; +} + +int bcf_is_snp(bcf1_t *v) +{ + int i; + bcf_unpack(v, BCF_UN_STR); + for (i = 0; i < v->n_allele; ++i) + { + if ( v->d.allele[i][1]==0 && v->d.allele[i][0]!='*' ) continue; + + // mpileup's allele, see also below. This is not completely satisfactory, + // a general library is here narrowly tailored to fit samtools. + if ( v->d.allele[i][0]=='<' && v->d.allele[i][1]=='X' && v->d.allele[i][2]=='>' ) continue; + if ( v->d.allele[i][0]=='<' && v->d.allele[i][1]=='*' && v->d.allele[i][2]=='>' ) continue; + + break; + } + return i == v->n_allele; +} + +static void bcf_set_variant_type(const char *ref, const char *alt, bcf_variant_t *var) +{ + if ( *alt == '*' && !alt[1] ) { var->n = 0; var->type = VCF_OVERLAP; return; } // overlapping variant + + // The most frequent case + if ( !ref[1] && !alt[1] ) + { + if ( *alt == '.' || *ref==*alt ) { var->n = 0; var->type = VCF_REF; return; } + if ( *alt == 'X' ) { var->n = 0; var->type = VCF_REF; return; } // mpileup's X allele shouldn't be treated as variant + var->n = 1; var->type = VCF_SNP; return; + } + if ( alt[0]=='<' ) + { + if ( alt[1]=='X' && alt[2]=='>' ) { var->n = 0; var->type = VCF_REF; return; } // mpileup's X allele shouldn't be treated as variant + if ( alt[1]=='*' && alt[2]=='>' ) { var->n = 0; var->type = VCF_REF; return; } + if ( !strcmp("NON_REF>",alt+1) ) { var->n = 0; var->type = VCF_REF; return; } + var->type = VCF_OTHER; + return; + } + + // Catch "joined before" breakend case + if ( alt[0]==']' || alt[0] == '[' ) + { + var->type = VCF_BND; return; + } + + // Iterate through alt characters that match the reference + const char *r = ref, *a = alt; + while (*r && *a && toupper_c(*r)==toupper_c(*a) ) { r++; a++; } // unfortunately, matching REF,ALT case is not guaranteed + + if ( *a && !*r ) + { + if ( *a==']' || *a=='[' ) { var->type = VCF_BND; return; } // "joined after" breakend + while ( *a ) a++; + var->n = (a-alt)-(r-ref); var->type = VCF_INDEL | VCF_INS; return; + } + else if ( *r && !*a ) + { + while ( *r ) r++; + var->n = (a-alt)-(r-ref); var->type = VCF_INDEL | VCF_DEL; return; + } + else if ( !*r && !*a ) + { + var->n = 0; var->type = VCF_REF; return; + } + + const char *re = r, *ae = a; + while ( re[1] ) re++; + while ( ae[1] ) ae++; + while ( re>r && ae>a && toupper_c(*re)==toupper_c(*ae) ) { re--; ae--; } + if ( ae==a ) + { + if ( re==r ) { var->n = 1; var->type = VCF_SNP; return; } + var->n = -(re-r); + if ( toupper_c(*re)==toupper_c(*ae) ) { var->type = VCF_INDEL | VCF_DEL; return; } + var->type = VCF_OTHER; return; + } + else if ( re==r ) + { + var->n = ae-a; + if ( toupper_c(*re)==toupper_c(*ae) ) { var->type = VCF_INDEL | VCF_INS; return; } + var->type = VCF_OTHER; return; + } + + var->type = ( re-r == ae-a ) ? VCF_MNP : VCF_OTHER; + var->n = ( re-r > ae-a ) ? -(re-r+1) : ae-a+1; + + // should do also complex events, SVs, etc... +} + +static int bcf_set_variant_types(bcf1_t *b) +{ + if ( !(b->unpacked & BCF_UN_STR) ) bcf_unpack(b, BCF_UN_STR); + bcf_dec_t *d = &b->d; + if ( d->n_var < b->n_allele ) + { + bcf_variant_t *new_var = realloc(d->var, sizeof(bcf_variant_t)*b->n_allele); + if (!new_var) + return -1; + d->var = new_var; + d->n_var = b->n_allele; + } + int i; + b->d.var_type = 0; + d->var[0].type = VCF_REF; + d->var[0].n = 0; + for (i=1; in_allele; i++) + { + bcf_set_variant_type(d->allele[0],d->allele[i], &d->var[i]); + b->d.var_type |= d->var[i].type; + //fprintf(stderr,"[set_variant_type] %d %s %s -> %d %d .. %d\n", b->pos+1,d->allele[0],d->allele[i],d->var[i].type,d->var[i].n, b->d.var_type); + } + return 0; +} + +// bcf_get_variant_type/bcf_get_variant_types should only return the following, +// to be compatible with callers that are not expecting newer values +// like VCF_INS, VCF_DEL. The full set is available from the newer +// vcf_has_variant_type* interfaces. +#define ORIG_VAR_TYPES (VCF_SNP|VCF_MNP|VCF_INDEL|VCF_OTHER|VCF_BND|VCF_OVERLAP) +int bcf_get_variant_types(bcf1_t *rec) +{ + if ( rec->d.var_type==-1 ) { + if (bcf_set_variant_types(rec) != 0) { + hts_log_error("Couldn't get variant types: %s", strerror(errno)); + exit(1); // Due to legacy API having no way to report failures + } + } + return rec->d.var_type & ORIG_VAR_TYPES; +} + +int bcf_get_variant_type(bcf1_t *rec, int ith_allele) +{ + if ( rec->d.var_type==-1 ) { + if (bcf_set_variant_types(rec) != 0) { + hts_log_error("Couldn't get variant types: %s", strerror(errno)); + exit(1); // Due to legacy API having no way to report failures + } + } + if (ith_allele < 0 || ith_allele >= rec->n_allele) { + hts_log_error("Requested allele outside valid range"); + exit(1); + } + return rec->d.var[ith_allele].type & ORIG_VAR_TYPES; +} +#undef ORIG_VAR_TYPES + +int bcf_has_variant_type(bcf1_t *rec, int ith_allele, uint32_t bitmask) +{ + if ( rec->d.var_type==-1 ) { + if (bcf_set_variant_types(rec) != 0) return -1; + } + if (ith_allele < 0 || ith_allele >= rec->n_allele) return -1; + if (bitmask == VCF_REF) { // VCF_REF is 0, so handled as a special case + return rec->d.var[ith_allele].type == VCF_REF; + } + return bitmask & rec->d.var[ith_allele].type; +} + +int bcf_variant_length(bcf1_t *rec, int ith_allele) +{ + if ( rec->d.var_type==-1 ) { + if (bcf_set_variant_types(rec) != 0) return bcf_int32_missing; + } + if (ith_allele < 0 || ith_allele >= rec->n_allele) return bcf_int32_missing; + return rec->d.var[ith_allele].n; +} + +int bcf_has_variant_types(bcf1_t *rec, uint32_t bitmask, + enum bcf_variant_match mode) +{ + if ( rec->d.var_type==-1 ) { + if (bcf_set_variant_types(rec) != 0) return -1; + } + uint32_t type = rec->d.var_type; + if ( mode==bcf_match_overlap ) return bitmask & type; + + // VCF_INDEL is always set with VCF_INS and VCF_DEL by bcf_set_variant_type[s], but the bitmask may + // ask for say `VCF_INS` or `VCF_INDEL` only + if ( bitmask&(VCF_INS|VCF_DEL) && !(bitmask&VCF_INDEL) ) type &= ~VCF_INDEL; + else if ( bitmask&VCF_INDEL && !(bitmask&(VCF_INS|VCF_DEL)) ) type &= ~(VCF_INS|VCF_DEL); + + if ( mode==bcf_match_subset ) + { + if ( ~bitmask & type ) return 0; + else return bitmask & type; + } + // mode == bcf_match_exact + return type==bitmask ? type : 0; +} + +int bcf_update_info(const bcf_hdr_t *hdr, bcf1_t *line, const char *key, const void *values, int n, int type) +{ + static int negative_rlen_warned = 0; + int is_end_tag; + + // Is the field already present? + int i, inf_id = bcf_hdr_id2int(hdr,BCF_DT_ID,key); + if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_INFO,inf_id) ) return -1; // No such INFO field in the header + if ( !(line->unpacked & BCF_UN_INFO) ) bcf_unpack(line, BCF_UN_INFO); + + is_end_tag = strcmp(key, "END") == 0; + + for (i=0; in_info; i++) + if ( inf_id==line->d.info[i].key ) break; + bcf_info_t *inf = i==line->n_info ? NULL : &line->d.info[i]; + + if ( !n || (type==BCF_HT_STR && !values) ) + { + if ( n==0 && is_end_tag ) + line->rlen = line->n_allele ? strlen(line->d.allele[0]) : 0; + if ( inf ) + { + // Mark the tag for removal, free existing memory if necessary + if ( inf->vptr_free ) + { + free(inf->vptr - inf->vptr_off); + inf->vptr_free = 0; + } + line->d.shared_dirty |= BCF1_DIRTY_INF; + inf->vptr = NULL; + inf->vptr_off = inf->vptr_len = 0; + } + return 0; + } + + if (is_end_tag) + { + if (n != 1) + { + hts_log_error("END info tag should only have one value at %s:%"PRIhts_pos, bcf_seqname_safe(hdr,line), line->pos+1); + line->errcode |= BCF_ERR_TAG_INVALID; + return -1; + } + if (type != BCF_HT_INT && type != BCF_HT_LONG) + { + hts_log_error("Wrong type (%d) for END info tag at %s:%"PRIhts_pos, type, bcf_seqname_safe(hdr,line), line->pos+1); + line->errcode |= BCF_ERR_TAG_INVALID; + return -1; + } + } + + // Encode the values and determine the size required to accommodate the values + kstring_t str = {0,0,0}; + bcf_enc_int1(&str, inf_id); + if ( type==BCF_HT_INT ) + bcf_enc_vint(&str, n, (int32_t*)values, -1); + else if ( type==BCF_HT_REAL ) + bcf_enc_vfloat(&str, n, (float*)values); + else if ( type==BCF_HT_FLAG || type==BCF_HT_STR ) + { + if ( values==NULL ) + bcf_enc_size(&str, 0, BCF_BT_NULL); + else + bcf_enc_vchar(&str, strlen((char*)values), (char*)values); + } +#ifdef VCF_ALLOW_INT64 + else if ( type==BCF_HT_LONG ) + { + if (n != 1) { + hts_log_error("Only storing a single BCF_HT_LONG value is supported at %s:%"PRIhts_pos, bcf_seqname_safe(hdr,line), line->pos+1); + abort(); + } + bcf_enc_long1(&str, *(int64_t *) values); + } +#endif + else + { + hts_log_error("The type %d not implemented yet at %s:%"PRIhts_pos, type, bcf_seqname_safe(hdr,line), line->pos+1); + abort(); + } + + // Is the INFO tag already present + if ( inf ) + { + // Is it big enough to accommodate new block? + if ( inf->vptr && str.l <= inf->vptr_len + inf->vptr_off ) + { + if ( str.l != inf->vptr_len + inf->vptr_off ) line->d.shared_dirty |= BCF1_DIRTY_INF; + uint8_t *ptr = inf->vptr - inf->vptr_off; + memcpy(ptr, str.s, str.l); + free(str.s); + int vptr_free = inf->vptr_free; + bcf_unpack_info_core1(ptr, inf); + inf->vptr_free = vptr_free; + } + else + { + if ( inf->vptr_free ) + free(inf->vptr - inf->vptr_off); + bcf_unpack_info_core1((uint8_t*)str.s, inf); + inf->vptr_free = 1; + line->d.shared_dirty |= BCF1_DIRTY_INF; + } + } + else + { + // The tag is not present, create new one + line->n_info++; + hts_expand0(bcf_info_t, line->n_info, line->d.m_info , line->d.info); + inf = &line->d.info[line->n_info-1]; + bcf_unpack_info_core1((uint8_t*)str.s, inf); + inf->vptr_free = 1; + line->d.shared_dirty |= BCF1_DIRTY_INF; + } + line->unpacked |= BCF_UN_INFO; + + if ( n==1 && is_end_tag) { + hts_pos_t end = type == BCF_HT_INT ? *(int32_t *) values : *(int64_t *) values; + if ( (type == BCF_HT_INT && end!=bcf_int32_missing) || (type == BCF_HT_LONG && end!=bcf_int64_missing) ) + { + if ( end <= line->pos ) + { + if ( !negative_rlen_warned ) + { + hts_log_warning("INFO/END=%"PRIhts_pos" is smaller than POS at %s:%"PRIhts_pos,end,bcf_seqname_safe(hdr,line),line->pos+1); + negative_rlen_warned = 1; + } + line->rlen = line->n_allele ? strlen(line->d.allele[0]) : 0; + } + else + line->rlen = end - line->pos; + } + } + return 0; +} + +int bcf_update_format_string(const bcf_hdr_t *hdr, bcf1_t *line, const char *key, const char **values, int n) +{ + if ( !n ) + return bcf_update_format(hdr,line,key,NULL,0,BCF_HT_STR); + + int i, max_len = 0; + for (i=0; i max_len ) max_len = len; + } + char *out = (char*) malloc(max_len*n); + if ( !out ) return -2; + for (i=0; iunpacked & BCF_UN_FMT) ) bcf_unpack(line, BCF_UN_FMT); + + for (i=0; in_fmt; i++) + if ( line->d.fmt[i].id==fmt_id ) break; + bcf_fmt_t *fmt = i==line->n_fmt ? NULL : &line->d.fmt[i]; + + if ( !n ) + { + if ( fmt ) + { + // Mark the tag for removal, free existing memory if necessary + if ( fmt->p_free ) + { + free(fmt->p - fmt->p_off); + fmt->p_free = 0; + } + line->d.indiv_dirty = 1; + fmt->p = NULL; + } + return 0; + } + + line->n_sample = bcf_hdr_nsamples(hdr); + int nps = n / line->n_sample; // number of values per sample + assert( nps && nps*line->n_sample==n ); // must be divisible by n_sample + + // Encode the values and determine the size required to accommodate the values + kstring_t str = {0,0,0}; + bcf_enc_int1(&str, fmt_id); + assert(values != NULL); + if ( type==BCF_HT_INT ) + bcf_enc_vint(&str, n, (int32_t*)values, nps); + else if ( type==BCF_HT_REAL ) + { + bcf_enc_size(&str, nps, BCF_BT_FLOAT); + serialize_float_array(&str, nps*line->n_sample, (float *) values); + } + else if ( type==BCF_HT_STR ) + { + bcf_enc_size(&str, nps, BCF_BT_CHAR); + kputsn((char*)values, nps*line->n_sample, &str); + } + else + { + hts_log_error("The type %d not implemented yet at %s:%"PRIhts_pos, type, bcf_seqname_safe(hdr,line), line->pos+1); + abort(); + } + + if ( !fmt ) + { + // Not present, new format field + line->n_fmt++; + hts_expand0(bcf_fmt_t, line->n_fmt, line->d.m_fmt, line->d.fmt); + + // Special case: VCF specification requires that GT is always first + if ( line->n_fmt > 1 && key[0]=='G' && key[1]=='T' && !key[2] ) + { + for (i=line->n_fmt-1; i>0; i--) + line->d.fmt[i] = line->d.fmt[i-1]; + fmt = &line->d.fmt[0]; + } + else + fmt = &line->d.fmt[line->n_fmt-1]; + bcf_unpack_fmt_core1((uint8_t*)str.s, line->n_sample, fmt); + line->d.indiv_dirty = 1; + fmt->p_free = 1; + } + else + { + // The tag is already present, check if it is big enough to accommodate the new block + if ( fmt->p && str.l <= fmt->p_len + fmt->p_off ) + { + // good, the block is big enough + if ( str.l != fmt->p_len + fmt->p_off ) line->d.indiv_dirty = 1; + uint8_t *ptr = fmt->p - fmt->p_off; + memcpy(ptr, str.s, str.l); + free(str.s); + int p_free = fmt->p_free; + bcf_unpack_fmt_core1(ptr, line->n_sample, fmt); + fmt->p_free = p_free; + } + else + { + if ( fmt->p_free ) + free(fmt->p - fmt->p_off); + bcf_unpack_fmt_core1((uint8_t*)str.s, line->n_sample, fmt); + fmt->p_free = 1; + line->d.indiv_dirty = 1; + } + } + line->unpacked |= BCF_UN_FMT; + return 0; +} + + +int bcf_update_filter(const bcf_hdr_t *hdr, bcf1_t *line, int *flt_ids, int n) +{ + if ( !(line->unpacked & BCF_UN_FLT) ) bcf_unpack(line, BCF_UN_FLT); + line->d.shared_dirty |= BCF1_DIRTY_FLT; + line->d.n_flt = n; + if ( !n ) return 0; + hts_expand(int, line->d.n_flt, line->d.m_flt, line->d.flt); + int i; + for (i=0; id.flt[i] = flt_ids[i]; + return 0; +} + +int bcf_add_filter(const bcf_hdr_t *hdr, bcf1_t *line, int flt_id) +{ + if ( !(line->unpacked & BCF_UN_FLT) ) bcf_unpack(line, BCF_UN_FLT); + int i; + for (i=0; id.n_flt; i++) + if ( flt_id==line->d.flt[i] ) break; + if ( id.n_flt ) return 0; // this filter is already set + line->d.shared_dirty |= BCF1_DIRTY_FLT; + if ( flt_id==0 ) // set to PASS + line->d.n_flt = 1; + else if ( line->d.n_flt==1 && line->d.flt[0]==0 ) + line->d.n_flt = 1; + else + line->d.n_flt++; + hts_expand(int, line->d.n_flt, line->d.m_flt, line->d.flt); + line->d.flt[line->d.n_flt-1] = flt_id; + return 1; +} +int bcf_remove_filter(const bcf_hdr_t *hdr, bcf1_t *line, int flt_id, int pass) +{ + if ( !(line->unpacked & BCF_UN_FLT) ) bcf_unpack(line, BCF_UN_FLT); + int i; + for (i=0; id.n_flt; i++) + if ( flt_id==line->d.flt[i] ) break; + if ( i==line->d.n_flt ) return 0; // the filter is not present + line->d.shared_dirty |= BCF1_DIRTY_FLT; + if ( i!=line->d.n_flt-1 ) memmove(line->d.flt+i,line->d.flt+i+1,(line->d.n_flt-i-1)*sizeof(*line->d.flt)); + line->d.n_flt--; + if ( !line->d.n_flt && pass ) bcf_add_filter(hdr,line,0); + return 0; +} + +int bcf_has_filter(const bcf_hdr_t *hdr, bcf1_t *line, char *filter) +{ + if ( filter[0]=='.' && !filter[1] ) filter = "PASS"; + int id = bcf_hdr_id2int(hdr, BCF_DT_ID, filter); + if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_FLT,id) ) return -1; // not defined in the header + + if ( !(line->unpacked & BCF_UN_FLT) ) bcf_unpack(line, BCF_UN_FLT); + if ( id==0 && !line->d.n_flt) return 1; // PASS + + int i; + for (i=0; id.n_flt; i++) + if ( line->d.flt[i]==id ) return 1; + return 0; +} + +static inline int _bcf1_sync_alleles(const bcf_hdr_t *hdr, bcf1_t *line, int nals) +{ + line->d.shared_dirty |= BCF1_DIRTY_ALS; + + line->n_allele = nals; + hts_expand(char*, line->n_allele, line->d.m_allele, line->d.allele); + + char *als = line->d.als; + int n = 0; + while (nd.allele[n] = als; + while ( *als ) als++; + als++; + n++; + } + + // Update REF length. Note that END is 1-based while line->pos 0-based + bcf_info_t *end_info = bcf_get_info(hdr,line,"END"); + if ( end_info ) + { + if ( end_info->type==BCF_HT_INT && end_info->v1.i==bcf_int32_missing ) end_info = NULL; + else if ( end_info->type==BCF_HT_LONG && end_info->v1.i==bcf_int64_missing ) end_info = NULL; + } + if ( end_info && end_info->v1.i > line->pos ) + line->rlen = end_info->v1.i - line->pos; + else if ( nals > 0 ) + line->rlen = strlen(line->d.allele[0]); + else + line->rlen = 0; + + return 0; +} +int bcf_update_alleles(const bcf_hdr_t *hdr, bcf1_t *line, const char **alleles, int nals) +{ + if ( !(line->unpacked & BCF_UN_STR) ) bcf_unpack(line, BCF_UN_STR); + char *free_old = NULL; + char buffer[256]; + size_t used = 0; + + // The pointers in alleles may point into the existing line->d.als memory, + // so care needs to be taken not to clobber them while updating. Usually + // they will be short so we can copy through an intermediate buffer. + // If they're longer, or won't fit in the existing allocation we + // can allocate a new buffer to write into. Note that in either case + // pointers to line->d.als memory in alleles may not be valid when we've + // finished. + int i; + size_t avail = line->d.m_als < sizeof(buffer) ? line->d.m_als : sizeof(buffer); + for (i=0; id.m_als) // Don't shrink the buffer + needed = line->d.m_als; + if (needed > INT_MAX) { + hts_log_error("REF + alleles too long to fit in a BCF record"); + return -1; + } + new_als = malloc(needed); + if (!new_als) + return -1; + free_old = line->d.als; + line->d.als = new_als; + line->d.m_als = needed; + } + + // Copy from the temp buffer to the destination + if (used) { + assert(used <= line->d.m_als); + memcpy(line->d.als, buffer, used); + } + + // Add in any remaining entries - if this happens we will always be + // writing to a newly-allocated buffer. + for (; i < nals; i++) { + size_t sz = strlen(alleles[i]) + 1; + memcpy(line->d.als + used, alleles[i], sz); + used += sz; + } + + if (free_old) + free(free_old); + return _bcf1_sync_alleles(hdr,line,nals); +} + +int bcf_update_alleles_str(const bcf_hdr_t *hdr, bcf1_t *line, const char *alleles_string) +{ + if ( !(line->unpacked & BCF_UN_STR) ) bcf_unpack(line, BCF_UN_STR); + kstring_t tmp; + tmp.l = 0; tmp.s = line->d.als; tmp.m = line->d.m_als; + kputs(alleles_string, &tmp); + line->d.als = tmp.s; line->d.m_als = tmp.m; + + int nals = 1; + char *t = line->d.als; + while (*t) + { + if ( *t==',' ) { *t = 0; nals++; } + t++; + } + return _bcf1_sync_alleles(hdr, line, nals); +} + +int bcf_update_id(const bcf_hdr_t *hdr, bcf1_t *line, const char *id) +{ + if ( !(line->unpacked & BCF_UN_STR) ) bcf_unpack(line, BCF_UN_STR); + kstring_t tmp; + tmp.l = 0; tmp.s = line->d.id; tmp.m = line->d.m_id; + if ( id ) + kputs(id, &tmp); + else + kputs(".", &tmp); + line->d.id = tmp.s; line->d.m_id = tmp.m; + line->d.shared_dirty |= BCF1_DIRTY_ID; + return 0; +} + +int bcf_add_id(const bcf_hdr_t *hdr, bcf1_t *line, const char *id) +{ + if ( !id ) return 0; + if ( !(line->unpacked & BCF_UN_STR) ) bcf_unpack(line, BCF_UN_STR); + + kstring_t tmp; + tmp.l = 0; tmp.s = line->d.id; tmp.m = line->d.m_id; + + int len = strlen(id); + char *dst = line->d.id; + while ( *dst && (dst=strstr(dst,id)) ) + { + if ( dst[len]!=0 && dst[len]!=';' ) dst++; // a prefix, not a match + else if ( dst==line->d.id || dst[-1]==';' ) return 0; // already present + dst++; // a suffix, not a match + } + if ( line->d.id && (line->d.id[0]!='.' || line->d.id[1]) ) + { + tmp.l = strlen(line->d.id); + kputc(';',&tmp); + } + kputs(id,&tmp); + + line->d.id = tmp.s; line->d.m_id = tmp.m; + line->d.shared_dirty |= BCF1_DIRTY_ID; + return 0; + +} + +bcf_fmt_t *bcf_get_fmt(const bcf_hdr_t *hdr, bcf1_t *line, const char *key) +{ + int id = bcf_hdr_id2int(hdr, BCF_DT_ID, key); + if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_FMT,id) ) return NULL; // no such FMT field in the header + return bcf_get_fmt_id(line, id); +} + +bcf_info_t *bcf_get_info(const bcf_hdr_t *hdr, bcf1_t *line, const char *key) +{ + int id = bcf_hdr_id2int(hdr, BCF_DT_ID, key); + if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_INFO,id) ) return NULL; // no such INFO field in the header + return bcf_get_info_id(line, id); +} + +bcf_fmt_t *bcf_get_fmt_id(bcf1_t *line, const int id) +{ + int i; + if ( !(line->unpacked & BCF_UN_FMT) ) bcf_unpack(line, BCF_UN_FMT); + for (i=0; in_fmt; i++) + { + if ( line->d.fmt[i].id==id ) return &line->d.fmt[i]; + } + return NULL; +} + +bcf_info_t *bcf_get_info_id(bcf1_t *line, const int id) +{ + int i; + if ( !(line->unpacked & BCF_UN_INFO) ) bcf_unpack(line, BCF_UN_INFO); + for (i=0; in_info; i++) + { + if ( line->d.info[i].key==id ) return &line->d.info[i]; + } + return NULL; +} + + +int bcf_get_info_values(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, void **dst, int *ndst, int type) +{ + int i, ret = -4, tag_id = bcf_hdr_id2int(hdr, BCF_DT_ID, tag); + if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_INFO,tag_id) ) return -1; // no such INFO field in the header + if ( bcf_hdr_id2type(hdr,BCF_HL_INFO,tag_id)!=(type & 0xff) ) return -2; // expected different type + + if ( !(line->unpacked & BCF_UN_INFO) ) bcf_unpack(line, BCF_UN_INFO); + + for (i=0; in_info; i++) + if ( line->d.info[i].key==tag_id ) break; + if ( i==line->n_info ) return ( type==BCF_HT_FLAG ) ? 0 : -3; // the tag is not present in this record + if ( type==BCF_HT_FLAG ) return 1; + + bcf_info_t *info = &line->d.info[i]; + if ( !info->vptr ) return -3; // the tag was marked for removal + if ( type==BCF_HT_STR ) + { + if ( *ndst < info->len+1 ) + { + *ndst = info->len + 1; + *dst = realloc(*dst, *ndst); + } + memcpy(*dst,info->vptr,info->len); + ((uint8_t*)*dst)[info->len] = 0; + return info->len; + } + + // Make sure the buffer is big enough + int size1; + switch (type) { + case BCF_HT_INT: size1 = sizeof(int32_t); break; + case BCF_HT_LONG: size1 = sizeof(int64_t); break; + case BCF_HT_REAL: size1 = sizeof(float); break; + default: + hts_log_error("Unexpected output type %d at %s:%"PRIhts_pos, type, bcf_seqname_safe(hdr,line), line->pos+1); + return -2; + } + if ( *ndst < info->len ) + { + *ndst = info->len; + *dst = realloc(*dst, *ndst * size1); + } + + #define BRANCH(type_t, convert, is_missing, is_vector_end, set_missing, set_regular, out_type_t) do { \ + out_type_t *tmp = (out_type_t *) *dst; \ + int j; \ + for (j=0; jlen; j++) \ + { \ + type_t p = convert(info->vptr + j * sizeof(type_t)); \ + if ( is_vector_end ) break; \ + if ( is_missing ) set_missing; \ + else set_regular; \ + tmp++; \ + } \ + ret = j; \ + } while (0) + switch (info->type) { + case BCF_BT_INT8: + if (type == BCF_HT_LONG) { + BRANCH(int8_t, le_to_i8, p==bcf_int8_missing, p==bcf_int8_vector_end, *tmp=bcf_int64_missing, *tmp=p, int64_t); + } else { + BRANCH(int8_t, le_to_i8, p==bcf_int8_missing, p==bcf_int8_vector_end, *tmp=bcf_int32_missing, *tmp=p, int32_t); + } + break; + case BCF_BT_INT16: + if (type == BCF_HT_LONG) { + BRANCH(int16_t, le_to_i16, p==bcf_int16_missing, p==bcf_int16_vector_end, *tmp=bcf_int64_missing, *tmp=p, int64_t); + } else { + BRANCH(int16_t, le_to_i16, p==bcf_int16_missing, p==bcf_int16_vector_end, *tmp=bcf_int32_missing, *tmp=p, int32_t); + } + break; + case BCF_BT_INT32: + if (type == BCF_HT_LONG) { + BRANCH(int32_t, le_to_i32, p==bcf_int32_missing, p==bcf_int32_vector_end, *tmp=bcf_int64_missing, *tmp=p, int64_t); break; + } else { + BRANCH(int32_t, le_to_i32, p==bcf_int32_missing, p==bcf_int32_vector_end, *tmp=bcf_int32_missing, *tmp=p, int32_t); break; + } + case BCF_BT_FLOAT: BRANCH(uint32_t, le_to_u32, p==bcf_float_missing, p==bcf_float_vector_end, bcf_float_set_missing(*tmp), bcf_float_set(tmp, p), float); break; + default: hts_log_error("Unexpected type %d at %s:%"PRIhts_pos, info->type, bcf_seqname_safe(hdr,line), line->pos+1); return -2; + } + #undef BRANCH + return ret; // set by BRANCH +} + +int bcf_get_format_string(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, char ***dst, int *ndst) +{ + int i,tag_id = bcf_hdr_id2int(hdr, BCF_DT_ID, tag); + if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_FMT,tag_id) ) return -1; // no such FORMAT field in the header + if ( bcf_hdr_id2type(hdr,BCF_HL_FMT,tag_id)!=BCF_HT_STR ) return -2; // expected different type + + if ( !(line->unpacked & BCF_UN_FMT) ) bcf_unpack(line, BCF_UN_FMT); + + for (i=0; in_fmt; i++) + if ( line->d.fmt[i].id==tag_id ) break; + if ( i==line->n_fmt ) return -3; // the tag is not present in this record + bcf_fmt_t *fmt = &line->d.fmt[i]; + if ( !fmt->p ) return -3; // the tag was marked for removal + + int nsmpl = bcf_hdr_nsamples(hdr); + if ( !*dst ) + { + *dst = (char**) malloc(sizeof(char*)*nsmpl); + if ( !*dst ) return -4; // could not alloc + (*dst)[0] = NULL; + } + int n = (fmt->n+1)*nsmpl; + if ( *ndst < n ) + { + (*dst)[0] = realloc((*dst)[0], n); + if ( !(*dst)[0] ) return -4; // could not alloc + *ndst = n; + } + for (i=0; ip + i*fmt->n; + uint8_t *tmp = (uint8_t*)(*dst)[0] + i*(fmt->n+1); + memcpy(tmp,src,fmt->n); + tmp[fmt->n] = 0; + (*dst)[i] = (char*) tmp; + } + return n; +} + +int bcf_get_format_values(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, void **dst, int *ndst, int type) +{ + int i,j, tag_id = bcf_hdr_id2int(hdr, BCF_DT_ID, tag); + if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_FMT,tag_id) ) return -1; // no such FORMAT field in the header + if ( tag[0]=='G' && tag[1]=='T' && tag[2]==0 ) + { + // Ugly: GT field is considered to be a string by the VCF header but BCF represents it as INT. + if ( bcf_hdr_id2type(hdr,BCF_HL_FMT,tag_id)!=BCF_HT_STR ) return -2; + } + else if ( bcf_hdr_id2type(hdr,BCF_HL_FMT,tag_id)!=type ) return -2; // expected different type + + if ( !(line->unpacked & BCF_UN_FMT) ) bcf_unpack(line, BCF_UN_FMT); + + for (i=0; in_fmt; i++) + if ( line->d.fmt[i].id==tag_id ) break; + if ( i==line->n_fmt ) return -3; // the tag is not present in this record + bcf_fmt_t *fmt = &line->d.fmt[i]; + if ( !fmt->p ) return -3; // the tag was marked for removal + + if ( type==BCF_HT_STR ) + { + int n = fmt->n*bcf_hdr_nsamples(hdr); + if ( *ndst < n ) + { + *dst = realloc(*dst, n); + if ( !*dst ) return -4; // could not alloc + *ndst = n; + } + memcpy(*dst,fmt->p,n); + return n; + } + + // Make sure the buffer is big enough + int nsmpl = bcf_hdr_nsamples(hdr); + int size1 = type==BCF_HT_INT ? sizeof(int32_t) : sizeof(float); + if ( *ndst < fmt->n*nsmpl ) + { + *ndst = fmt->n*nsmpl; + *dst = realloc(*dst, *ndst*size1); + if ( !*dst ) return -4; // could not alloc + } + + #define BRANCH(type_t, convert, is_missing, is_vector_end, set_missing, set_vector_end, set_regular, out_type_t) { \ + out_type_t *tmp = (out_type_t *) *dst; \ + uint8_t *fmt_p = fmt->p; \ + for (i=0; in; j++) \ + { \ + type_t p = convert(fmt_p + j * sizeof(type_t)); \ + if ( is_missing ) set_missing; \ + else if ( is_vector_end ) { set_vector_end; break; } \ + else set_regular; \ + tmp++; \ + } \ + for (; jn; j++) { set_vector_end; tmp++; } \ + fmt_p += fmt->size; \ + } \ + } + switch (fmt->type) { + case BCF_BT_INT8: BRANCH(int8_t, le_to_i8, p==bcf_int8_missing, p==bcf_int8_vector_end, *tmp=bcf_int32_missing, *tmp=bcf_int32_vector_end, *tmp=p, int32_t); break; + case BCF_BT_INT16: BRANCH(int16_t, le_to_i16, p==bcf_int16_missing, p==bcf_int16_vector_end, *tmp=bcf_int32_missing, *tmp=bcf_int32_vector_end, *tmp=p, int32_t); break; + case BCF_BT_INT32: BRANCH(int32_t, le_to_i32, p==bcf_int32_missing, p==bcf_int32_vector_end, *tmp=bcf_int32_missing, *tmp=bcf_int32_vector_end, *tmp=p, int32_t); break; + case BCF_BT_FLOAT: BRANCH(uint32_t, le_to_u32, p==bcf_float_missing, p==bcf_float_vector_end, bcf_float_set_missing(*tmp), bcf_float_set_vector_end(*tmp), bcf_float_set(tmp, p), float); break; + default: hts_log_error("Unexpected type %d at %s:%"PRIhts_pos, fmt->type, bcf_seqname_safe(hdr,line), line->pos+1); exit(1); + } + #undef BRANCH + return nsmpl*fmt->n; +} + +//error description structure definition +typedef struct err_desc { + int errorcode; + const char *description; +}err_desc; + +// error descriptions +static const err_desc errdesc_bcf[] = { + { BCF_ERR_CTG_UNDEF, "Contig not defined in header"}, + { BCF_ERR_TAG_UNDEF, "Tag not defined in header" }, + { BCF_ERR_NCOLS, "Incorrect number of columns" }, + { BCF_ERR_LIMITS, "Limits reached" }, + { BCF_ERR_CHAR, "Invalid character" }, + { BCF_ERR_CTG_INVALID, "Invalid contig" }, + { BCF_ERR_TAG_INVALID, "Invalid tag" }, +}; + +/// append given description to buffer based on available size and add ... when not enough space + /** @param buffer buffer to which description to be appended + @param offset offset at which to be appended + @param maxbuffer maximum size of the buffer + @param description the description to be appended +on failure returns -1 - when buffer is not big enough; returns -1 on invalid params and on too small buffer which are improbable due to validation at caller site +on success returns 0 + */ +static int add_desc_to_buffer(char *buffer, size_t *offset, size_t maxbuffer, const char *description) { + + if (!description || !buffer || !offset || (maxbuffer < 4)) + return -1; + + size_t rembuffer = maxbuffer - *offset; + if (rembuffer > (strlen(description) + (rembuffer == maxbuffer ? 0 : 1))) { //add description with optionally required ',' + *offset += snprintf(buffer + *offset, rembuffer, "%s%s", (rembuffer == maxbuffer)? "": ",", description); + } else { //not enough space for description, put ... + size_t tmppos = (rembuffer <= 4) ? maxbuffer - 4 : *offset; + snprintf(buffer + tmppos, 4, "..."); //ignore offset update + return -1; + } + return 0; +} + +//get description for given error code. return NULL on error +const char *bcf_strerror(int errorcode, char *buffer, size_t maxbuffer) { + size_t usedup = 0; + int ret = 0; + int idx; + + if (!buffer || maxbuffer < 4) + return NULL; //invalid / insufficient buffer + + if (!errorcode) { + buffer[0] = '\0'; //no error, set null + return buffer; + } + + for (idx = 0; idx < sizeof(errdesc_bcf) / sizeof(err_desc); ++idx) { + if (errorcode & errdesc_bcf[idx].errorcode) { //error is set, add description + ret = add_desc_to_buffer(buffer, &usedup, maxbuffer, errdesc_bcf[idx].description); + if (ret < 0) + break; //not enough space, ... added, no need to continue + + errorcode &= ~errdesc_bcf[idx].errorcode; //reset the error + } + } + + if (errorcode && (ret >= 0)) { //undescribed error is present in error code and had enough buffer, try to add unkonwn error as well§ + add_desc_to_buffer(buffer, &usedup, maxbuffer, "Unknown error"); + } + return buffer; +} + diff --git a/src/htslib-1.18/vcf_sweep.c b/src/htslib-1.21/vcf_sweep.c similarity index 100% rename from src/htslib-1.18/vcf_sweep.c rename to src/htslib-1.21/vcf_sweep.c diff --git a/src/htslib-1.18/vcfutils.c b/src/htslib-1.21/vcfutils.c similarity index 100% rename from src/htslib-1.18/vcfutils.c rename to src/htslib-1.21/vcfutils.c diff --git a/src/htslib-1.21/version.sh b/src/htslib-1.21/version.sh new file mode 100755 index 0000000..f35234c --- /dev/null +++ b/src/htslib-1.21/version.sh @@ -0,0 +1,59 @@ +#!/bin/sh +# version.sh -- Script to build the htslib version string +# +# Author : James Bonfield +# +# Copyright (C) 2017-2018, 2021 Genome Research Ltd. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Master version, for use in tarballs or non-git source copies +VERSION=1.21 + +# If we have a git clone, then check against the current tag +srcdir=${0%/version.sh} +if [ -e $srcdir/.git ] +then + # If we ever get to 10.x this will need to be more liberal + v=`cd $srcdir && git describe --always --match '[0-9].[0-9]*' --dirty` + case $v in + [0-9]*.[0-9]*) VERSION="$v" ;; + [0-9a-f][0-9a-f]*) VERSION="$VERSION-1-g$v" ;; + esac +fi + +# Numeric version is for use in .dylib or .so libraries +# +# Follows the same logic from the Makefile commit c2e93911 +# as non-numeric versions get bumped to patch level 255 to indicate +# an unknown value. +if [ "$1" = "numeric" ] +then + v1=`expr "$VERSION" : '\([0-9]*\)'` + v2=`expr "$VERSION" : '[0-9]*.\([0-9]*\)'` + v3=`expr "$VERSION" : '[0-9]*.[0-9]*.\([0-9]*\)'` + if [ -z "`expr "$VERSION" : '\([0-9.]*\)$'`" ] + then + VERSION="$v1.$v2.255" + else + VERSION="$v1.$v2${v3:+.}$v3" + fi +fi + +echo $VERSION